[ { "title": "Finite Population Regression Adjustment and Non-asymptotic Guarantees for Treatment Effect Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73081", "id": "009LK0vLcY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eaf5d2cdb582c058a078d4fdf52a20f9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=009LK0vLcY", "openreview": "https://openreview.net/forum?id=009LK0vLcY", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73081", "video": "https://nips.cc/virtual/2023/poster/73081", "author_site": "Mehrdad Ghadiri, David Arbour, Tung Mai, Cameron Musco, Anup B. Rao", "tldr": "", "abstract": "The design and analysis of randomized experiments is fundamental to many areas, from the physical and social sciences to industrial settings. \nRegression adjustment is a popular technique to reduce the variance of estimates obtained from experiments, by utilizing information contained in auxiliary covariates. \nWhile there is a large literature within the statistics community studying various approaches to regression adjustment and their asymptotic properties, little focus has been given to approaches in the finite population setting with non-asymptotic accuracy bounds. \nFurther, prior work typically assumes that an entire population is exposed to an experiment, whereas practitioners often seek to minimize the number of subjects exposed to an experiment, for ethical and pragmatic reasons.\nIn this work, we study the problems of estimating the sample mean, individual treatment effects, and average treatment effect with regression adjustment. \nWe propose approaches that use techniques from randomized numerical linear algebra to sample a subset of the population on which to perform an experiment. We give non-asymptotic accuracy bounds for our methods and demonstrate that they compare favorably with prior approaches.", "keywords": "regression adjustment; treatment effect estimation; average treatment effect", "primary_area": "", "supplementary_material": "/attachment/f038a28788bd4be0ce11f2f453688a5ed03891f6.zip", "author": "Mehrdad Ghadiri;David Arbour;Tung Mai;Cameron N Musco;Anup Rao", "authorids": "~Mehrdad_Ghadiri2;~David_Arbour1;~Tung_Mai1;~Cameron_N_Musco1;~Anup_Rao1", "gender": ";M;M;;M", "homepage": "http://darbour.github.io;;https://people.cs.umass.edu/~cmusco/;;https://www.cs.ubc.ca/~ghadirim/", "dblp": "87/7578;177/8902.html;149/2327;63/6846;172/1358", "google_scholar": "prj0heYAAAAJ;eUt8nlIAAAAJ;EeYGZCwAAAAJ;pkwXPU0AAAAJ;Z5OUClEAAAAJ", "orcid": ";;;;0000-0003-0934-1322", "linkedin": "david-arbour/;;;;mehrdad-ghadiri-2a955562/", "or_profile": "~David_Arbour1;~Tung_Mai1;~Cameron_N_Musco1;~Anup_Rao1;~Mehrdad_Ghadiri1", "aff": "Adobe Systems;Adobe;University of Massachusetts, Amherst;Adobe Systems;Georgia Institute of Technology", "aff_domain": "adobe.com;adobe.com;umass.edu;adobe.com;gatech.edu", "position": "Research Scientist;Research Scientist;Assistant Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nghadiri2023finite,\ntitle={Finite Population Regression Adjustment and Non-asymptotic Guarantees for Treatment Effect Estimation},\nauthor={Mehrdad Ghadiri and David Arbour and Tung Mai and Cameron N Musco and Anup Rao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=009LK0vLcY}\n}", "github": "", "project": "", "reviewers": "HGb9;zpw6;ZgSr;UdXn", "pdf_size": 699800, "rating": "4;4;5;7", "confidence": "3;3;3;5", "soundness": "2;3;2;3", "novelty": "3;2;2;3", "presentation": "1;2;2;3", "wc_summary": "340;66;112;69", "wc_strengths": "190;49;49;162", "wc_weaknesses": "600;147;165;794", "wc_questions": "343;80;128;37", "wc_limitations": "73;16;65;1", "wc_review": "1546;358;519;1063", "wc_reply_reviewers": "116;0;102;272", "wc_reply_authors": "0;0;104;520", "reply_reviewers": "1;0;1;2", "reply_authors": "1;1;2;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 146.75, 113.04727993189398 ], "wc_strengths_avg": [ 112.5, 64.26702109169211 ], "wc_weaknesses_avg": [ 426.5, 279.1330327997745 ], "wc_questions_avg": [ 147.0, 117.6499043773517 ], "wc_limitations_avg": [ 38.75, 30.84132779242813 ], "wc_review_avg": [ 871.5, 468.92456749460246 ], "wc_reply_reviewers_avg": [ 122.5, 97.23553877055446 ], "wc_reply_authors_avg": [ 156.0, 214.40149253211834 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9428090415820632, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5825953214593790595&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "email": "adobe.com;adobe.com;umass.edu;adobe.com;gatech.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Adobe;University of Massachusetts Amherst;Georgia Institute of Technology", "aff_unique_dep": "Adobe Systems Incorporated;;", "aff_unique_url": "https://www.adobe.com;https://www.umass.edu;https://www.gatech.edu", "aff_unique_abbr": "Adobe;UMass Amherst;Georgia Tech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Complexity Matters: Rethinking the Latent Space for Generative Modeling", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73080", "id": "00EKYYu3fD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5e8023f07625374c6fdf3aa08bb38e0e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=00EKYYu3fD", "openreview": "https://openreview.net/forum?id=00EKYYu3fD", "poster": "/media/PosterPDFs/NeurIPS%202023/73080.png?t=1702110456.1567054", "slides": "https://nips.cc/virtual/2023/poster/73080", "video": "https://nips.cc/virtual/2023/poster/73080", "author_site": "Tianyang Hu, Fei Chen, Haonan Wang, Jiawei Li, Wenjia Wang, Jiacheng Sun, Zhenguo Li", "tldr": "", "abstract": "In generative modeling, numerous successful approaches leverage a low-dimensional latent space, e.g., Stable Diffusion models the latent space induced by an encoder and generates images through a paired decoder. Although the selection of the latent space is empirically pivotal, determining the optimal choice and the process of identifying it remain unclear. In this study, we aim to shed light on this under-explored topic by rethinking the latent space from the perspective of model complexity. Our investigation starts with the classic generative adversarial networks (GANs). Inspired by the GAN training objective, we propose a novel \"distance\" between the latent and data distributions, whose minimization coincides with that of the generator complexity. The minimizer of this distance is characterized as the optimal data-dependent latent that most effectively capitalizes on the generator's capacity. Then, we consider parameterizing such a latent distribution by an encoder network and propose a two-stage training strategy called Decoupled Autoencoder (DAE), where the encoder is only updated in the first stage with an auxiliary decoder and then frozen in the second stage while the actual decoder is being trained. DAE can improve the latent distribution and as a result, improve the generative performance. Our theoretical analyses are corroborated by comprehensive experiments on various models such as VQGAN and Diffusion Transformer, where our modifications yield significant improvements in sample quality with decreased model complexity.", "keywords": "generative model;latent space;distance between distributions;generative adversarial network;vqgan", "primary_area": "", "supplementary_material": "", "author": "Tianyang Hu;Fei Chen;Haonan Wang;Jiawei Li;Wenjia Wang;Jiacheng Sun;Zhenguo Li", "authorids": "~Tianyang_Hu1;~Fei_Chen8;~Haonan_Wang1;~Jiawei_Li5;~Wenjia_Wang2;~Jiacheng_Sun1;~Zhenguo_Li1", "gender": "M;M;M;M;M;M;M", "homepage": "https://hu-tianyang.github.io/;https://feierustc.github.io/;http://charles-haonan-wang.me/;http://scholar.google.com/citations?user=WRPLXCEAAAAJ&hl=zh-TW;https://www.wenjia-w.com/;;http://www.ee.columbia.edu/~zgli/", "dblp": "170/2551;81/4345-13;;;;165/5350;23/6479", "google_scholar": "mlA_3r0AAAAJ;LhjKgcoAAAAJ;cLziVZMAAAAJ;;EKS1sO0AAAAJ;;XboZC1AAAAAJ", "orcid": ";;0009-0006-6963-8987;;;;", "linkedin": ";;;;;https://www.linkedin.cn/incareer/in/jiacheng-sun-ab622b131;", "or_profile": "~Tianyang_Hu1;~Fei_Chen8;~Haonan_Wang1;~Jiawei_Li5;~Wenjia_Wang2;~Jiacheng_Sun1;~Zhenguo_Li1", "aff": "Huawei Noah's Ark Lab;Huawei Noah's Ark Lab;National University of Singapore;Huawei Technologies Ltd.;Hong Kong University of Science and Technology;Huawei Noah's Ark Lab;Huawei Noah's Ark Lab", "aff_domain": "huawei.com;huawei.com;u.nus.edu;huawei.com;ust.hk;huawei.com;huawei.com", "position": "Researcher;Researcher;PhD student;Researcher;Assistant Professor;Senior Researcher;Principal Researcher", "bibtex": "@inproceedings{\nhu2023complexity,\ntitle={Complexity Matters: Rethinking the Latent Space for Generative Modeling},\nauthor={Tianyang Hu and Fei Chen and Haonan Wang and Jiawei Li and Wenjia Wang and Jiacheng Sun and Zhenguo Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=00EKYYu3fD}\n}", "github": "", "project": "", "reviewers": "14Q5;ssMC;JCyg;2zRJ", "pdf_size": 8521925, "rating": "6;6;7;7", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "novelty": "2;3;4;2", "presentation": "3;3;3;2", "wc_summary": "80;116;80;117", "wc_strengths": "15;111;90;68", "wc_weaknesses": "183;56;136;334", "wc_questions": "107;280;41;94", "wc_limitations": "3;11;18;42", "wc_review": "388;574;365;655", "wc_reply_reviewers": "214;17;13;124", "wc_reply_authors": "671;13;0;276", "reply_reviewers": "2;1;1;2", "reply_authors": "3;2;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 98.25, 18.25342433627181 ], "wc_strengths_avg": [ 71.0, 35.72814016989969 ], "wc_weaknesses_avg": [ 177.25, 101.25061728206896 ], "wc_questions_avg": [ 130.5, 89.78446413494932 ], "wc_limitations_avg": [ 18.5, 14.568802284333465 ], "wc_review_avg": [ 495.5, 122.66723278854872 ], "wc_reply_reviewers_avg": [ 92.0, 83.3276664739869 ], "wc_reply_authors_avg": [ 240.0, 272.1148654520734 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7250171637843011920&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "huawei.com;huawei.com;u.nus.edu;huawei.com;ust.hk;huawei.com;huawei.com", "author_num": 7, "aff_unique_index": "0;0;1;0;2;0;0", "aff_unique_norm": "Huawei;National University of Singapore;Hong Kong University of Science and Technology", "aff_unique_dep": "Noah's Ark Lab;;", "aff_unique_url": "https://www.huawei.com;https://www.nus.edu.sg;https://www.ust.hk", "aff_unique_abbr": "Huawei;NUS;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;Singapore" }, { "id": "01GQK1gwe3", "title": "Can Neural Networks Improve Classical Optimization of Inverse Problems?", "track": "main", "status": "Reject", "tldr": "", "abstract": "Finding the values of model parameters from data is an essential task in science.\nWhile iterative optimization algorithms like BFGS can find solutions to inverse problems with machine precision for simple problems, their reliance on local information limits their effectiveness for complex problems involving local minima, chaos, or zero-gradient regions.\n\nThis study explores the potential for overcoming these limitations by jointly optimizing multiple examples. To achieve this, we employ neural networks to reparameterize the solution space and leverage the training procedure as an alternative to classical optimization.\nThis approach is as versatile as traditional optimizers and does not require additional information about the inverse problems, meaning it can be added to existing general-purpose optimization libraries.\nWe evaluate the effectiveness of this approach by comparing it to traditional optimization on various inverse problems involving complex physical systems, such as the incompressible Navier-Stokes equations. Our findings reveal significant improvements in the accuracy of the obtained solutions.", "keywords": "Inverse problems;neural networks;iterative optimization;chaos;convergence", "primary_area": "", "supplementary_material": "/attachment/fe6ef25eb26850028380f6794e08cac5b8b93015.zip", "author": "Philipp Holl;Nils Thuerey", "authorids": "~Philipp_Holl1;~Nils_Thuerey1", "gender": "M;M", "homepage": ";https://ge.in.tum.de", "dblp": "256/9374;42/478", "google_scholar": "LilimmEAAAAJ;https://scholar.google.com.tw/citations?user=GEehwv8AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Philipp_Holl1;~Nils_Thuerey1", "aff": "Technical University Munich;Technical University Munich", "aff_domain": "tum.de;tum.de", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nholl2023can,\ntitle={Can Neural Networks Improve Classical Optimization of Inverse Problems?},\nauthor={Philipp Holl and Nils Thuerey},\nyear={2023},\nurl={https://openreview.net/forum?id=01GQK1gwe3}\n}", "github": "", "project": "", "reviewers": "XkkZ;dBXc;nXp8;VvA5", "site": "https://openreview.net/forum?id=01GQK1gwe3", "pdf_size": 487078, "rating": "4;5;6;7", "confidence": "2;2;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "84;82;167;43", "wc_strengths": "77;86;123;31", "wc_weaknesses": "77;123;173;64", "wc_questions": "135;393;4;56", "wc_limitations": "52;15;9;5", "wc_review": "425;699;476;199", "wc_reply_reviewers": "15;41;0;51", "wc_reply_authors": "18;26;0;32", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 94.0, 45.205088209182826 ], "wc_strengths_avg": [ 79.25, 32.7595405950694 ], "wc_weaknesses_avg": [ 109.25, 42.83908845902303 ], "wc_questions_avg": [ 147.0, 149.49080239265558 ], "wc_limitations_avg": [ 20.25, 18.673175948402566 ], "wc_review_avg": [ 449.75, 177.69549093885303 ], "wc_reply_reviewers_avg": [ 26.75, 20.27775875189366 ], "wc_reply_authors_avg": [ 19.0, 12.041594578792296 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8944271909999159, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CrCAFL1bQTUJ:scholar.google.com/&scioq=Can+Neural+Networks+Improve+Classical+Optimization+of+Inverse+Problems%3F&hl=en&as_sdt=0,14", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Technical University of Munich", "aff_unique_dep": "", "aff_unique_url": "https://www.tum.de", "aff_unique_abbr": "TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Robustness Guarantees for Adversarially Trained Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73079", "id": "02Uc0G2Cym", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7cde9bd7774c9f5056cb6e5474fbadff-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=02Uc0G2Cym", "openreview": "https://openreview.net/forum?id=02Uc0G2Cym", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73079", "video": "https://nips.cc/virtual/2023/poster/73079", "author_site": "Poorya Mianjy, Raman Arora", "tldr": "", "abstract": "We study robust adversarial training of two-layer neural networks as a bi-level optimization problem. In particular, for the inner loop that implements the adversarial attack during training using projected gradient descent (PGD), we propose maximizing a \\emph{lower bound} on the $0/1$-loss by reflecting a surrogate loss about the origin. This allows us to give a convergence guarantee for the inner-loop PGD attack. Furthermore, assuming the data is linearly separable, we provide precise iteration complexity results for end-to-end adversarial training, which holds for any width and initialization. We provide empirical evidence to support our theoretical results.", "keywords": "Adversarial training;neural networks;robustness;guarantees", "primary_area": "", "supplementary_material": "/attachment/dc93ba95d080e110f2caf11627c82da48d2cff1c.zip", "author": "Poorya Mianjy;Raman Arora", "authorids": "~Poorya_Mianjy1;~Raman_Arora1", "gender": "M;M", "homepage": "https://www.cs.jhu.edu/~r3831/;http://www.cs.jhu.edu/~raman/Home.html", "dblp": "182/8944;", "google_scholar": "PTG3GAsAAAAJ;Spe0xdkAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Poorya_Mianjy1;~Raman_Arora1", "aff": "Citadel Securities;Johns Hopkins University", "aff_domain": "citadelsecurities.com;jhu.edu", "position": "Researcher;Associate Professor", "bibtex": "@inproceedings{\nmianjy2023robustness,\ntitle={Robustness Guarantees for Adversarially Trained Neural Networks},\nauthor={Poorya Mianjy and Raman Arora},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=02Uc0G2Cym}\n}", "github": "", "project": "", "reviewers": "8NN7;2oBp;avt7;Gmw4", "pdf_size": 425793, "rating": "6;6;6;7", "confidence": "4;4;4;3", "soundness": "3;3;2;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "26;138;82;62", "wc_strengths": "8;73;93;66", "wc_weaknesses": "281;65;236;169", "wc_questions": "10;62;90;21", "wc_limitations": "3;28;10;9", "wc_review": "328;366;511;327", "wc_reply_reviewers": "165;80;135;7", "wc_reply_authors": "456;0;121;0", "reply_reviewers": "3;1;2;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 77.0, 40.53393639902249 ], "wc_strengths_avg": [ 60.0, 31.614869919074472 ], "wc_weaknesses_avg": [ 187.75, 81.30613445491059 ], "wc_questions_avg": [ 45.75, 32.06536293261001 ], "wc_limitations_avg": [ 12.5, 9.340770846134703 ], "wc_review_avg": [ 383.0, 75.55461600723017 ], "wc_reply_reviewers_avg": [ 96.75, 60.11811291116846 ], "wc_reply_authors_avg": [ 144.25, 186.64454854080256 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9162151635322800457&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "citadelsecurities.com;jhu.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Citadel Securities;Johns Hopkins University", "aff_unique_dep": ";", "aff_unique_url": "https://www.citadel.com;https://www.jhu.edu", "aff_unique_abbr": "Citadel;JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Exploiting hidden structures in non-convex games for convergence to Nash equilibrium", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73078", "id": "05P1U0jk8r", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d37c9ad425fe5b65304d500c6edcba00-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=05P1U0jk8r", "openreview": "https://openreview.net/forum?id=05P1U0jk8r", "poster": "/media/PosterPDFs/NeurIPS%202023/73078.png?t=1702393886.6532638", "slides": "https://nips.cc/virtual/2023/poster/73078", "video": "https://nips.cc/virtual/2023/poster/73078", "author_site": "Iosif Sakos, Emmanouil-Vasileios Vlatakis-Gkaragkounis, Panayotis Mertikopoulos, Georgios Piliouras", "tldr": "", "abstract": "A wide array of modern machine learning applications \u2013 from adversarial models to multi-agent reinforcement learning \u2013 can be formulated as non-cooperative games whose Nash equilibria represent the system\u2019s desired operational states. Despite having a highly non-convex loss landscape, many cases of interest possess a latent convex structure that could potentially be leveraged to yield convergence to an equilibrium. Driven by this observation, our paper proposes a flexible first-order method that successfully exploits such \u201chidden structures\u201d and achieves convergence under minimal assumptions for the transformation connecting the players\u2019 control variables to the game\u2019s latent, convex-structured layer. The proposed method \u2013 which we call preconditioned hidden gradient descent (PHGD) \u2013 hinges on a judiciously chosen gradient preconditioning scheme related to natural gradient methods. Importantly, we make no separability assumptions for the game\u2019s hidden structure, and we provide explicit convergence rate guarantees for both deterministic and stochastic environments.", "keywords": "Nash Equilibrium;Games;Gradient;Non-monotone VI;Natural Gradient;Precondition", "primary_area": "", "supplementary_material": "/attachment/f116fc7acb7c8ee614e8a0f2f2b67175ea870d5a.zip", "author": "Iosif Sakos;Emmanouil-Vasileios Vlatakis-Gkaragkounis;Panayotis Mertikopoulos;Georgios Piliouras", "authorids": "~Iosif_Sakos1;~Emmanouil-Vasileios_Vlatakis-Gkaragkounis1;~Panayotis_Mertikopoulos1;~Georgios_Piliouras1", "gender": "M;M;M;", "homepage": ";http://www.cs.columbia.edu/~emvlatakis/;http://polaris.imag.fr/panayotis.mertikopoulos/;", "dblp": "271/1082;251/8372;49/6721;62/1236", "google_scholar": "https://scholar.google.gr/citations?user=69xvSfQAAAAJ;MKutDKcAAAAJ;xsusqPYAAAAJ;", "orcid": "0000-0002-1871-9078;;0000-0003-2026-9616;", "linkedin": "joseph-sakos-3b3a6a200?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BP9xevRgnRfKhbYYoPyDf3Q%3D%3D;;;", "or_profile": "~Iosif_Sakos1;~Emmanouil-Vasileios_Vlatakis-Gkaragkounis1;~Panayotis_Mertikopoulos1;~Georgios_Piliouras1", "aff": "Singapore University of Technology and Design;University of California, Berkeley;French National Center for Scientific Research;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;berkeley.edu;imag.fr;sutd.edu.sg", "position": "PhD student;Postdoc;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nsakos2023exploiting,\ntitle={Exploiting hidden structures in non-convex games for convergence to Nash equilibrium},\nauthor={Iosif Sakos and Emmanouil-Vasileios Vlatakis-Gkaragkounis and Panayotis Mertikopoulos and Georgios Piliouras},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=05P1U0jk8r}\n}", "github": "", "project": "", "reviewers": "hHrj;cb9a;zCUr;rghq;dBPZ", "pdf_size": 5921377, "rating": "5;5;6;7;7", "confidence": "3;4;2;3;4", "soundness": "3;2;3;3;3", "novelty": "4;3;3;4;4", "presentation": "2;1;2;3;3", "wc_summary": "100;88;38;71;46", "wc_strengths": "118;32;31;101;96", "wc_weaknesses": "127;335;33;24;166", "wc_questions": "100;218;19;68;263", "wc_limitations": "10;22;1;10;16", "wc_review": "455;695;122;274;587", "wc_reply_reviewers": "31;127;0;35;114", "wc_reply_authors": "21;502;0;0;283", "reply_reviewers": "1;2;0;1;2", "reply_authors": "2;3;1;1;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.6, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 68.6, 23.72846391994223 ], "wc_strengths_avg": [ 75.6, 36.74016875301473 ], "wc_weaknesses_avg": [ 137.0, 112.89818421923357 ], "wc_questions_avg": [ 133.6, 92.1229613071573 ], "wc_limitations_avg": [ 11.8, 6.997142273814361 ], "wc_review_avg": [ 426.6, 207.1913125591901 ], "wc_reply_reviewers_avg": [ 61.4, 49.92233968876058 ], "wc_reply_authors_avg": [ 161.2, 201.29918032620003 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14506649512056312367&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 18, "email": "sutd.edu.sg;berkeley.edu;imag.fr;sutd.edu.sg", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Singapore University of Technology and Design;University of California, Berkeley;French National Center for Scientific Research", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sutd.edu.sg;https://www.berkeley.edu;https://www.cnrs.fr", "aff_unique_abbr": "SUTD;UC Berkeley;CNRS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Singapore;United States;France" }, { "title": "Knowledge Diffusion for Distillation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73077", "id": "08hStXdT1s", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cdddf13f06182063c4dbde8cbd5a5c21-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=08hStXdT1s", "openreview": "https://openreview.net/forum?id=08hStXdT1s", "poster": "/media/PosterPDFs/NeurIPS%202023/73077.png?t=1701147484.4867098", "slides": "https://nips.cc/virtual/2023/poster/73077", "video": "https://nips.cc/virtual/2023/poster/73077", "author_site": "Tao Huang, Yuan Zhang, Mingkai Zheng, Shan You, Fei Wang, Chen Qian, Chang Xu", "tldr": "", "abstract": "The representation gap between teacher and student is an emerging topic in knowledge distillation (KD). To reduce the gap and improve the performance, current methods often resort to complicated training schemes, loss functions, and feature alignments, which are task-specific and feature-specific. In this paper, we state that the essence of these methods is to discard the noisy information and distill the valuable information in the feature, and propose a novel KD method dubbed DiffKD, to explicitly denoise and match features using diffusion models. Our approach is based on the observation that student features typically contain more noises than teacher features due to the smaller capacity of student model. To address this, we propose to denoise student features using a diffusion model trained by teacher features. This allows us to perform better distillation between the refined clean feature and teacher feature. Additionally, we introduce a light-weight diffusion model with a linear autoencoder to reduce the computation cost and an adaptive noise matching module to improve the denoising performance. Extensive experiments demonstrate that DiffKD is effective across various types of features and achieves state-of-the-art performance consistently on image classification, object detection, and semantic segmentation tasks. Code is available at https://github.com/hunto/DiffKD.", "keywords": "knowledge distillation;diffusion models", "primary_area": "", "supplementary_material": "", "author": "Tao Huang;Yuan Zhang;Mingkai Zheng;Shan You;Fei Wang;Chen Qian;Chang Xu", "authorids": "~Tao_Huang5;~Yuan_Zhang20;~Mingkai_Zheng1;~Shan_You3;~Fei_Wang9;~Chen_Qian1;~Chang_Xu4", "gender": "M;M;;M;M;M;", "homepage": "https://taohuang.info;https://gumpest.github.io/;;https://shanyou92.github.io/;;;", "dblp": "34/808-20;;;179/2548;;;", "google_scholar": "jkcRdBgAAAAJ;dXj1WskAAAAJ;;https://scholar.google.com/citations?hl=en;ljt16JkAAAAJ;AerkT0YAAAAJ;", "orcid": ";;;0000-0003-1964-0430;;;", "linkedin": ";;;;;;", "or_profile": "~Tao_Huang5;~Yuan_Zhang20;~Mingkai_Zheng1;~Shan_You3;~Fei_Wang9;~Chen_Qian1;~Chang_Xu4", "aff": "The University of Sydney;Peking University;;SenseTime Research;University of Science and Technology of China;Tsinghua University;", "aff_domain": "sydney.edu.au;pku.edu.cn;;sensetime.com;mail.ustc.edu.cn;mails.tsinghua.edu.cn;", "position": "PhD student;MS student;;Researcher;PhD student;PhD student;", "bibtex": "@inproceedings{\nhuang2023knowledge,\ntitle={Knowledge Diffusion for Distillation},\nauthor={Tao Huang and Yuan Zhang and Mingkai Zheng and Shan You and Fei Wang and Chen Qian and Chang Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=08hStXdT1s}\n}", "github": "", "project": "", "reviewers": "AKTb;pbtu;NwaZ;78h4;tFex;hqhp", "pdf_size": 3392912, "rating": "5;5;5;6;6;7", "confidence": "4;4;4;4;3;4", "soundness": "2;3;3;4;2;3", "novelty": "2;3;3;3;2;3", "presentation": "3;3;3;3;3;3", "wc_summary": "59;61;102;66;73;89", "wc_strengths": "48;17;60;58;30;136", "wc_weaknesses": "282;157;187;43;114;78", "wc_questions": "3;5;5;58;16;22", "wc_limitations": "6;36;5;1;6;1", "wc_review": "398;276;359;226;239;326", "wc_reply_reviewers": "26;108;89;0;226;24", "wc_reply_authors": "405;398;210;141;827;59", "reply_reviewers": "1;1;2;0;2;1", "reply_authors": "3;5;3;3;4;2", "rating_avg": [ 5.666666666666667, 0.7453559924999299 ], "confidence_avg": [ 3.8333333333333335, 0.3726779962499649 ], "soundness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 75.0, 15.609825965290794 ], "wc_strengths_avg": [ 58.166666666666664, 37.97550380033014 ], "wc_weaknesses_avg": [ 143.5, 78.01442174367506 ], "wc_questions_avg": [ 18.166666666666668, 19.07368751855696 ], "wc_limitations_avg": [ 9.166666666666666, 12.184917817622829 ], "wc_review_avg": [ 304.0, 62.50066666311115 ], "wc_reply_reviewers_avg": [ 78.83333333333333, 75.96581833664109 ], "wc_reply_authors_avg": [ 340.0, 251.7339865810733 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.6871842709362768 ], "reply_authors_avg": [ 3.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.20000000000000004, "gs_citation": 75, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4615443208731882220&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "sydney.edu.au;pku.edu.cn;;sensetime.com;mail.ustc.edu.cn;mails.tsinghua.edu.cn;", "author_num": 7, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "University of Sydney;Peking University;SenseTime;University of Science and Technology of China;Tsinghua University", "aff_unique_dep": ";;SenseTime Research;;", "aff_unique_url": "https://www.sydney.edu.au;http://www.pku.edu.cn;https://www.sensetime.com;http://www.ustc.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "USYD;Peking U;SenseTime;USTC;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Australia;China" }, { "title": "Exposing flaws of generative model evaluation metrics and their unfair treatment of diffusion models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73076", "id": "08zf7kTOoh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0bc795afae289ed465a65a3b4b1f4eb7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=08zf7kTOoh", "openreview": "https://openreview.net/forum?id=08zf7kTOoh", "poster": "/media/PosterPDFs/NeurIPS%202023/73076.png?t=1701990842.3741822", "slides": "https://nips.cc/virtual/2023/poster/73076", "video": "https://nips.cc/virtual/2023/poster/73076", "author_site": "George Stein, Jesse Cresswell, Rasa Hosseinzadeh, Yi Sui, Brendan Ross, Valentin Villecroze, Zhaoyan Liu, Anthony Caterini, Eric Taylor, Gabriel Loaiza-Ganem", "tldr": "", "abstract": "We systematically study a wide variety of generative models spanning semantically-diverse image datasets to understand and improve the feature extractors and metrics used to evaluate them.\nUsing best practices in psychophysics, we measure human perception of image realism for generated samples by conducting the largest experiment evaluating generative models to date, and find that no existing metric strongly correlates with human evaluations.\nComparing to 17 modern metrics for evaluating the overall performance, fidelity, diversity, rarity, and memorization of generative models, we find that the state-of-the-art perceptual realism of diffusion models as judged by humans is not reflected in commonly reported metrics such as FID. This discrepancy is not explained by diversity in generated samples, though one cause is over-reliance on Inception-V3.\nWe address these flaws through a study of alternative self-supervised feature extractors, find that the semantic information encoded by individual networks strongly depends on their training procedure, and show that DINOv2-ViT-L/14 allows for much richer evaluation of generative models. Next, we investigate data memorization, and find that generative models do memorize training examples on simple, smaller datasets like CIFAR10, but not necessarily on more complex datasets like ImageNet. However, our experiments show that current metrics do not properly detect memorization: none in the literature is able to separate memorization from other phenomena such as underfitting or mode shrinkage. To facilitate further development of generative models and their evaluation we release all generated image datasets, human evaluation data, and a modular library to compute 17 common metrics for 9 different encoders at https://github.com/layer6ai-labs/dgm-eval.", "keywords": "generative models;generative model evaluation;self-supervised learning;representation learning;metrics", "primary_area": "", "supplementary_material": "", "author": "George Stein;Jesse C. Cresswell;Rasa Hosseinzadeh;Yi Sui;Brendan Leigh Ross;Valentin Villecroze;Zhaoyan Liu;Anthony L. Caterini;Eric Taylor;Gabriel Loaiza-Ganem", "authorids": "~George_Stein1;~Jesse_C._Cresswell1;~Rasa_Hosseinzadeh2;~Yi_Sui1;~Brendan_Leigh_Ross1;~Valentin_Villecroze1;~Zhaoyan_Liu1;~Anthony_L._Caterini1;~Eric_Taylor3;~Gabriel_Loaiza-Ganem1", "gender": ";;M;F;M;;M;M;M;M", "homepage": "https://georgestein.github.io/;;;https://www.linkedin.com/in/yi-sui-90513699/;;;;;;https://sites.google.com/view/gabriel-loaiza-ganem/about-me", "dblp": "220/5541;;266/1688;;295/0098;;;167/4383;;238/1617", "google_scholar": ";;;fLo2o54AAAAJ;https://scholar.google.ca/citations?user=TyY1aSYAAAAJ;;https://scholar.google.com/citations?hl=en;34sCXQEAAAAJ;https://scholar.google.ca/citations?user=OqtTvI0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-5193-516X;;;0009-0009-9207-7403;;;;;;", "linkedin": ";;rasa-hosseinzadeh-6204a6208/;;brendan-ross;;zhaoyan-liu-9309aa180/;;eric-taylor-815049156/;", "or_profile": "~George_Stein1;~Jesse_C._Cresswell1;~Rasa_Hosseinzadeh2;~Yi_Sui1;~Brendan_Leigh_Ross1;~Valentin_Villecroze1;~Zhaoyan_Liu1;~Anthony_L._Caterini1;~Eric_Taylor3;~Gabriel_Loaiza-Ganem1", "aff": "Layer6 AI;;Layer6;Layer6 AI;Layer 6 AI;;Department of Computer Science, University of Toronto;Layer6;Layer 6;Layer 6 AI", "aff_domain": "layer6.ai;;layer6.ai;layer6.ai;layer6.ai;;cs.toronto.edu;layer6.ai;layer6.ai;layer6.ai", "position": "Machine Learning Scientist;;Researcher;Machine Learning Scientist;Senior Machine Learning Scientist;;MS student;Researcher;Researcher;Machine Learning Research Scientist", "bibtex": "@inproceedings{\nstein2023exposing,\ntitle={Exposing flaws of generative model evaluation metrics and their unfair treatment of diffusion models},\nauthor={George Stein and Jesse C. Cresswell and Rasa Hosseinzadeh and Yi Sui and Brendan Leigh Ross and Valentin Villecroze and Zhaoyan Liu and Anthony L. Caterini and Eric Taylor and Gabriel Loaiza-Ganem},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=08zf7kTOoh}\n}", "github": "", "project": "", "reviewers": "xpCe;SaoU;uTCz;6HZb;uifn", "pdf_size": 42390635, "rating": "4;6;7;7;7", "confidence": "4;4;2;5;4", "soundness": "2;3;3;4;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "93;52;157;104;79", "wc_strengths": "59;47;32;85;103", "wc_weaknesses": "813;46;170;108;23", "wc_questions": "4;148;2;5;21", "wc_limitations": "11;18;49;72;18", "wc_review": "980;311;410;374;244", "wc_reply_reviewers": "886;26;21;19;9", "wc_reply_authors": "1995;0;0;0;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "4;1;1;1;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 97.0, 34.68140712254911 ], "wc_strengths_avg": [ 65.2, 25.662423891752706 ], "wc_weaknesses_avg": [ 232.0, 294.97728726123984 ], "wc_questions_avg": [ 36.0, 56.40921910468182 ], "wc_limitations_avg": [ 33.6, 23.277456905770443 ], "wc_review_avg": [ 463.8, 264.2274777535448 ], "wc_reply_reviewers_avg": [ 192.2, 346.9440300682518 ], "wc_reply_authors_avg": [ 399.0, 798.0 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.14002800840280094, "gs_citation": 96, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4484852000141982776&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "layer6.ai;;layer6.ai;layer6.ai;layer6.ai;;cs.toronto.edu;layer6.ai;layer6.ai;layer6.ai", "author_num": 10, "aff_unique_index": "0;0;0;1;2;0;3;1", "aff_unique_norm": "Layer6 AI;Layer 6 AI;University of Toronto;Layer 6", "aff_unique_dep": ";;Department of Computer Science;", "aff_unique_url": "https://layer6.ai;https://layer6.ai;https://www.utoronto.ca;", "aff_unique_abbr": "Layer6;Layer 6 AI;U of T;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Canada;" }, { "title": "On the Powerfulness of Textual Outlier Exposure for Visual OoD Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73075", "id": "090ORrOAPL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a2374637af47ac9471b43c99b68acf27-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=090ORrOAPL", "openreview": "https://openreview.net/forum?id=090ORrOAPL", "poster": "/media/PosterPDFs/NeurIPS%202023/73075.png?t=1701062751.973874", "slides": "https://nips.cc/virtual/2023/poster/73075", "video": "https://nips.cc/virtual/2023/poster/73075", "author_site": "Sangha Park, Jisoo Mok, Dahuin Jung, Saehyung Lee, Sungroh Yoon", "tldr": "", "abstract": "Successful detection of Out-of-Distribution (OoD) data is becoming increasingly important to ensure safe deployment of neural networks. One of the main challenges in OoD detection is that neural networks output overconfident predictions on OoD data, make it difficult to determine OoD-ness of data solely based on their predictions. Outlier exposure addresses this issue by introducing an additional loss that encourages low-confidence predictions on OoD data during training. While outlier exposure has shown promising potential in improving OoD detection performance, all previous studies on outlier exposure have been limited to utilizing visual outliers. Drawing inspiration from the recent advancements in vision-language pre-training, this paper venture out to the uncharted territory of textual outlier exposure. First, we uncover the benefits of using textual outliers by replacing real or virtual outliers in the image-domain with textual equivalents. Then, we propose various ways of generating preferable textual outliers. Our extensive experiments demonstrate that generated textual outliers achieve competitive performance on large-scale OoD and hard OoD benchmarks. Furthermore, we conduct empirical analyses of textual outliers to provide primary criteria for designing advantageous textual outliers: near-distribution, descriptiveness, and inclusion of visual semantics.", "keywords": "Out-of-distribution detection", "primary_area": "", "supplementary_material": "/attachment/d29c7abe5ca9c6b446a901d59782ab37b843c948.pdf", "author": "Sangha Park;Jisoo Mok;Dahuin Jung;Saehyung Lee;Sungroh Yoon", "authorids": "~Sangha_Park2;~Jisoo_Mok1;~Dahuin_Jung2;~Saehyung_Lee1;~Sungroh_Yoon1", "gender": ";F;F;M;", "homepage": ";;https://hai.ssu.ac.kr/;;http://ailab.snu.ac.kr", "dblp": "311/1675;294/8666;224/0158;260/0442;99/1474", "google_scholar": "https://scholar.google.com/citations?hl=ko;LZP5k2cAAAAJ;https://scholar.google.co.kr/citations?user=wleS-UQAAAAJ;nS24h74AAAAJ;Bphl_fIAAAAJ", "orcid": ";0000-0001-7002-0275;;;0000-0002-2367-197X", "linkedin": ";;;;", "or_profile": "~Sangha_Park2;~Jisoo_Mok1;~Dahuin_Jung2;~Saehyung_Lee1;~Sungroh_Yoon1", "aff": "Seoul National University;Amazon;Seoul National University;Qualcomm Inc, QualComm;Seoul National University", "aff_domain": "snu.ac.kr;amazon.com;snu.ac.kr;qti.qualcomm.com;snu.ac.kr", "position": "PhD student;Internship;PhD student;Intern;Full Professor", "bibtex": "@inproceedings{\npark2023on,\ntitle={On the Powerfulness of Textual Outlier Exposure for Visual OoD Detection},\nauthor={Sangha Park and Jisoo Mok and Dahuin Jung and Saehyung Lee and Sungroh Yoon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=090ORrOAPL}\n}", "github": "", "project": "", "reviewers": "Lz76;yebH;T6K7;JSZe", "pdf_size": 6150617, "rating": "5;6;6;7", "confidence": "4;5;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;4", "wc_summary": "67;79;155;67", "wc_strengths": "104;30;124;79", "wc_weaknesses": "202;121;108;54", "wc_questions": "71;2;163;52", "wc_limitations": "34;1;4;28", "wc_review": "478;233;554;280", "wc_reply_reviewers": "28;0;64;20", "wc_reply_authors": "0;0;245;0", "reply_reviewers": "1;0;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 92.0, 36.701498607005135 ], "wc_strengths_avg": [ 84.25, 35.1452343853331 ], "wc_weaknesses_avg": [ 121.25, 52.95930041078715 ], "wc_questions_avg": [ 72.0, 58.27091899052219 ], "wc_limitations_avg": [ 16.75, 14.446020213193666 ], "wc_review_avg": [ 386.25, 133.5409581364459 ], "wc_reply_reviewers_avg": [ 28.0, 23.15167380558045 ], "wc_reply_authors_avg": [ 61.25, 106.08811196359373 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6570547292873333303&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "snu.ac.kr;amazon.com;snu.ac.kr;qti.qualcomm.com;snu.ac.kr", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Seoul National University;Amazon;Qualcomm Incorporated", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.snu.ac.kr;https://www.amazon.com;https://www.qualcomm.com", "aff_unique_abbr": "SNU;Amazon;Qualcomm", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Online Ad Procurement in Non-stationary Autobidding Worlds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73074", "id": "09bZyE9tfp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/84bad835faaf48f24d990072bb5b80ee-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=09bZyE9tfp", "openreview": "https://openreview.net/forum?id=09bZyE9tfp", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73074", "video": "https://nips.cc/virtual/2023/poster/73074", "author_site": "Jason Cheuk Nam Liang, Haihao Lu, Baoyu Zhou", "tldr": "", "abstract": "Today's online advertisers procure digital ad impressions through interacting with autobidding platforms: advertisers convey high level procurement goals via setting levers such as budget, target return-on-investment, max cost per click, etc.. Then ads platforms subsequently procure impressions on advertisers' behalf, and report final procurement conversions (e.g. click) to advertisers. In practice, advertisers may receive minimal information on platforms' procurement details, and procurement outcomes are subject to non-stationary factors like seasonal patterns, occasional system corruptions, and market trends which make it difficult for advertisers to optimize lever decisions effectively. Motivated by this, we present an online learning framework that helps advertisers dynamically optimize ad platform lever decisions while subject to general long-term constraints in a realistic bandit feedback environment with non-stationary procurement outcomes. In particular, we introduce a primal-dual algorithm for online decision making with multi-dimension decision variables, bandit feedback and long-term uncertain constraints. We show that our algorithm achieves low regret in many worlds when procurement outcomes are generated through procedures that are stochastic, adversarial, adversarially corrupted, periodic, and ergodic, respectively, without having to know which procedure is the ground truth. Finally, we emphasize that our proposed algorithm and theoretical results extend beyond the applications of online advertising.", "keywords": "autobidding;online advertising;bandit online convex optimization;constrained optimization", "primary_area": "", "supplementary_material": "/attachment/3032f45441c74269f1798d51e0690ffe7db951f0.pdf", "author": "Jason Cheuk Nam Liang;Haihao Lu;Baoyu Zhou", "authorids": "~Jason_Cheuk_Nam_Liang1;~Haihao_Lu2;~Baoyu_Zhou2", "gender": "M;Not Specified;", "homepage": "http://www.mit.edu/~jcnliang/;https://faculty.chicagobooth.edu/haihao-lu;https://baoyuzhou18.github.io/", "dblp": "254/0873;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;Y5hkgwwAAAAJ", "orcid": ";;0000-0003-3385-5788", "linkedin": "jason-cheuk-nam-liang-307459113/;;", "or_profile": "~Jason_Cheuk_Nam_Liang1;~Haihao_Lu2;~Baoyu_Zhou2", "aff": "Massachusetts Institute of Technology;University of Chicago;University of Chicago", "aff_domain": "mit.edu;uchicago.edu;uchicago.edu", "position": "PhD student;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nliang2023online,\ntitle={Online Ad Procurement in Non-stationary Autobidding Worlds},\nauthor={Jason Cheuk Nam Liang and Haihao Lu and Baoyu Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=09bZyE9tfp}\n}", "github": "", "project": "", "reviewers": "EvVQ;g73L;bowu;hupT", "pdf_size": 649404, "rating": "6;6;7;7", "confidence": "3;3;4;4", "soundness": "3;3;2;4", "novelty": "3;3;3;3", "presentation": "2;3;3;4", "wc_summary": "105;50;116;207", "wc_strengths": "50;63;75;173", "wc_weaknesses": "120;36;89;160", "wc_questions": "394;12;257;20", "wc_limitations": "17;1;1;28", "wc_review": "686;162;538;588", "wc_reply_reviewers": "179;0;225;81", "wc_reply_authors": "80;0;299;0", "reply_reviewers": "1;0;2;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 119.5, 56.36710033343918 ], "wc_strengths_avg": [ 90.25, 48.586906672477106 ], "wc_weaknesses_avg": [ 101.25, 45.30659444275193 ], "wc_questions_avg": [ 170.75, 162.1779501041988 ], "wc_limitations_avg": [ 11.75, 11.431863365173676 ], "wc_review_avg": [ 493.5, 198.65736835063532 ], "wc_reply_reviewers_avg": [ 121.25, 87.20772614854718 ], "wc_reply_authors_avg": [ 94.75, 122.362933521553 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11964164174046572869&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "mit.edu;uchicago.edu;uchicago.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;University of Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.uchicago.edu", "aff_unique_abbr": "MIT;UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Task Arithmetic in the Tangent Space: Improved Editing of Pre-Trained Models", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73073", "id": "0A9f2jZDGW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d28077e5ff52034cd35b4aa15320caea-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0A9f2jZDGW", "openreview": "https://openreview.net/forum?id=0A9f2jZDGW", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73073", "video": "https://nips.cc/virtual/2023/poster/73073", "author_site": "Guillermo Ortiz-Jimenez, Alessandro Favero, Pascal Frossard", "tldr": "", "abstract": "Task arithmetic has recently emerged as a cost-effective and scalable approach to edit pre-trained models directly in weight space: By adding the fine-tuned weights of different tasks, the model's performance can be improved on these tasks, while negating them leads to task forgetting. Yet, our understanding of the effectiveness of task arithmetic and its underlying principles remains limited. We present a comprehensive study of task arithmetic in vision-language models and show that weight disentanglement is the crucial factor that makes it effective. This property arises during pre-training and manifests when distinct directions in weight space govern separate, localized regions in function space associated with the tasks. Notably, we show that fine-tuning models in their tangent space by linearizing them amplifies weight disentanglement. This leads to substantial performance improvements across multiple task arithmetic benchmarks and diverse models. Building on these findings, we provide theoretical and empirical analyses of the neural tangent kernel (NTK) of these models and establish a compelling link between task arithmetic and the spatial localization of the NTK eigenfunctions. Overall, our work uncovers novel insights into the fundamental mechanisms of task arithmetic and offers a more reliable and effective approach to edit pre-trained models through the NTK linearization.", "keywords": "model editing;transfer learning;neural tangent kernel;vision-language pre-training;deep learning science", "primary_area": "", "supplementary_material": "/attachment/ec080011525ad4f95e9330e9cb3f6a8247a1f132.pdf", "author": "Guillermo Ortiz-Jimenez;Alessandro Favero;Pascal Frossard", "authorids": "~Guillermo_Ortiz-Jimenez1;~Alessandro_Favero1;~Pascal_Frossard1", "gender": ";M;", "homepage": "http://gortizji.github.io;https://alesfav.github.io/;", "dblp": "222/2737;292/3039;", "google_scholar": "xAsJnG0AAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";0000-0002-7640-5501;", "linkedin": ";;", "or_profile": "~Guillermo_Ortiz-Jimenez1;~Alessandro_Favero1;~Pascal_Frossard1", "aff": "Swiss Federal Institute of Technology Lausanne;Amazon;", "aff_domain": "epfl.ch;amazon.com;", "position": "PhD student;Intern;", "bibtex": "@inproceedings{\nortiz-jimenez2023task,\ntitle={Task Arithmetic in the Tangent Space: Improved Editing of Pre-Trained Models},\nauthor={Guillermo Ortiz-Jimenez and Alessandro Favero and Pascal Frossard},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0A9f2jZDGW}\n}", "github": "", "project": "", "reviewers": "eVrq;9cnX;DjkZ;sE5a;qb7d", "pdf_size": 5286394, "rating": "6;8;8;9;10", "confidence": "3;4;4;4;4", "soundness": "3;4;4;4;4", "novelty": "3;4;4;4;3", "presentation": "3;3;4;4;4", "wc_summary": "99;135;115;187;60", "wc_strengths": "38;67;125;256;182", "wc_weaknesses": "184;42;141;109;185", "wc_questions": "135;56;125;211;24", "wc_limitations": "9;7;70;62;6", "wc_review": "465;307;576;825;457", "wc_reply_reviewers": "0;42;49;23;118", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 8.2, 1.32664991614216 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.8, 0.39999999999999997 ], "novelty_avg": [ 3.6, 0.4898979485566356 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 119.2, 41.897016600230614 ], "wc_strengths_avg": [ 133.6, 78.70603534672547 ], "wc_weaknesses_avg": [ 132.2, 53.334416655664285 ], "wc_questions_avg": [ 110.2, 65.36482234352053 ], "wc_limitations_avg": [ 30.8, 28.86797533600166 ], "wc_review_avg": [ 526.0, 172.24633522951947 ], "wc_reply_reviewers_avg": [ 46.4, 39.63130076088848 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8291561975888501, "gs_citation": 124, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17387344167922799551&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "epfl.ch;amazon.com;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.epfl.ch;https://www.amazon.com", "aff_unique_abbr": "EPFL;Amazon", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1", "aff_country_unique": "Switzerland;United States" }, { "title": "Stochastic Multi-armed Bandits: Optimal Trade-off among Optimality, Consistency, and Tail Risk", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73072", "id": "0BfQT652sC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6ffa1f5ad26addef897dcb938e525db7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0BfQT652sC", "openreview": "https://openreview.net/forum?id=0BfQT652sC", "poster": "/media/PosterPDFs/NeurIPS%202023/73072.png?t=1697228593.9260921", "slides": "https://nips.cc/virtual/2023/poster/73072", "video": "https://nips.cc/virtual/2023/poster/73072", "author_site": "David Simchi-Levi, Zeyu Zheng, Feng Zhu", "tldr": "", "abstract": "We consider the stochastic multi-armed bandit problem and fully characterize the interplays among three desired properties for policy design: worst-case optimality, instance-dependent consistency, and light-tailed risk. We show how the order of expected regret exactly affects the decaying rate of the regret tail probability for both the worst-case and instance-dependent scenario. A novel policy is proposed to achieve the optimal regret tail risk for any regret threshold. Concretely, for any given $\\alpha\\in[1/2, 1)$ and $\\beta\\in[0, 1)$, our policy achieves a worst-case expected regret of $\\tilde O(T^\\alpha)$ and instance-dependent expected regret of $\\tilde O(T^\\beta)$, while enjoys a probability of incurring an $\\Omega(T^\\delta)$ regret that decays exponentially with a polynomial $T$ term. Such decaying rate is proved to be best achievable. We also generalize our analysis to the stochastic multi-armed bandit problem with non-stationary baseline rewards, where in each time period $t$, the decision maker pulls one of $K$ arms and collects a reward which is the sum of three terms: the mean of the pulled arm, an independent noise, and a non-stationary baseline reward as a function of $t$. Our results reveal insights on the trade-off between expected regret and tail risk for both worst-case and instance-dependent scenario, indicating that more sub-optimality and inconsistency leaves space for more light-tailed risk of incurring a large regret.", "keywords": "multi-armed bandit;worst-case optimality;instance-dependent consistency;light-tailed risk", "primary_area": "", "supplementary_material": "/attachment/6d14a597e31e19e0fff1fc1480b738c39346b31c.pdf", "author": "David Simchi-Levi;Zeyu Zheng;Feng Zhu", "authorids": "~David_Simchi-Levi2;~Zeyu_Zheng2;~Feng_Zhu7", "gender": "M;M;M", "homepage": "http://slevi1.mit.edu/;https://zheng.ieor.berkeley.edu/;https://sites.mit.edu/fengzhu/", "dblp": ";48/7883.html/;", "google_scholar": "https://scholar.google.co.uk/citations?hl=en;;", "orcid": ";0000-0001-5653-152X;0000-0003-4979-4879", "linkedin": ";;feng-zhu-165a98184/", "or_profile": "~David_Simchi-Levi2;~Zeyu_Zheng2;~Feng_Zhu7", "aff": "Massachusetts Institute of Technology;University of California, Berkeley;Massachusetts Institute of Technology", "aff_domain": "mit.edu;berkeley.edu;mit.edu", "position": "Full Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nsimchi-levi2023stochastic,\ntitle={Stochastic Multi-armed Bandits: Optimal Trade-off among Optimality, Consistency, and Tail Risk},\nauthor={David Simchi-Levi and Zeyu Zheng and Feng Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0BfQT652sC}\n}", "github": "", "project": "", "reviewers": "RNGE;URLi;2XJT;tbar", "pdf_size": 327799, "rating": "7;7;7;7", "confidence": "4;2;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "182;118;75;99", "wc_strengths": "99;181;77;42", "wc_weaknesses": "472;120;13;1", "wc_questions": "3;91;65;110", "wc_limitations": "1;13;1;1", "wc_review": "757;523;231;253", "wc_reply_reviewers": "196;25;35;13", "wc_reply_authors": "66;7;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 118.5, 39.702015062210634 ], "wc_strengths_avg": [ 99.75, 51.12423593561081 ], "wc_weaknesses_avg": [ 151.5, 190.75180208847308 ], "wc_questions_avg": [ 67.25, 40.38796231552169 ], "wc_limitations_avg": [ 4.0, 5.196152422706632 ], "wc_review_avg": [ 441.0, 215.65249824660043 ], "wc_reply_reviewers_avg": [ 67.25, 74.74080210969106 ], "wc_reply_authors_avg": [ 18.25, 27.716195626384224 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9072000651563557202&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "mit.edu;berkeley.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.berkeley.edu", "aff_unique_abbr": "MIT;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Gaussian Process Probes (GPP) for Uncertainty-Aware Probing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73071", "id": "0BwB03qA5T", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c8b100b376a7b338c84801b699935098-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0BwB03qA5T", "openreview": "https://openreview.net/forum?id=0BwB03qA5T", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73071", "video": "https://nips.cc/virtual/2023/poster/73071", "author_site": "Zi Wang, Alexander Ku, Jason Baldridge, Tom Griffiths, Been Kim", "tldr": "", "abstract": "Understanding which concepts models can and cannot represent has been fundamental to many tasks: from effective and responsible use of models to detecting out of distribution data. We introduce Gaussian process probes (GPP), a unified and simple framework for probing and measuring uncertainty about concepts represented by models. As a Bayesian extension of linear probing methods, GPP asks what kind of distribution over classifiers (of concepts) is induced by the model. This distribution can be used to measure both what the model represents and how confident the probe is about what the model represents. GPP can be applied to any pre-trained model with vector representations of inputs (e.g., activations). It does not require access to training data, gradients, or the architecture. We validate GPP on datasets containing both synthetic and real images. Our experiments show it can (1) probe a model's representations of concepts even with a very small number of examples, (2) accurately measure both epistemic uncertainty (how confident the probe is) and aleatory uncertainty (how fuzzy the concepts are to the model), and (3) detect out of distribution data using those uncertainty measures as well as classic methods do. By using Gaussian processes to expand what probing can offer, GPP provides a data-efficient, versatile and uncertainty-aware tool for understanding and evaluating the capabilities of machine learning models.", "keywords": "Interpretability;probing;Bayesian;Gaussian process;transparency", "primary_area": "", "supplementary_material": "/attachment/812da05b424f227c20e6ddb5279556aade533d17.zip", "author": "Zi Wang;Alexander Ku;Jason Michael Baldridge;Thomas L. Griffiths;Been Kim", "authorids": "~Zi_Wang1;~Alexander_Ku1;~Jason_Michael_Baldridge1;~Thomas_L._Griffiths1;~Been_Kim1", "gender": "F;M;M;;", "homepage": "http://zi-wang.com/;https://alexyku.github.io/;https://research.google/people/jasonbaldridge/?&type=google;http://cocosci.princeton.edu/tom/;https://beenkim.github.io/", "dblp": "78/8711-4;215/4289.html;90/6617;34/4472;https://dblp.uni-trier.de/pers/k/Kim:Been.html", "google_scholar": "U0egIsIAAAAJ;Lh_ZqdcAAAAJ;TP_JZm8AAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;", "linkedin": ";;jason-baldridge-9b26295/;;", "or_profile": "~Zi_Wang1;~Alexander_Ku1;~Jason_Michael_Baldridge1;~Thomas_L._Griffiths1;~Been_Kim1", "aff": "Google DeepMind;Google;Google;Princeton University;Google DeepMind", "aff_domain": "google.com;google.com;google.com;princeton.edu;google.com", "position": "Research scientist;Researcher;Research Scientist;Professor;Research Scientist", "bibtex": "@inproceedings{\nwang2023gaussian,\ntitle={Gaussian Process Probes ({GPP}) for Uncertainty-Aware Probing},\nauthor={Zi Wang and Alexander Ku and Jason Michael Baldridge and Thomas L. Griffiths and Been Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0BwB03qA5T}\n}", "github": "", "project": "", "reviewers": "3D6e;VzJz;iFt6;5Gmc;zzXk", "pdf_size": 5566670, "rating": "5;6;6;6;7", "confidence": "2;2;3;3;3", "soundness": "3;3;2;3;3", "novelty": "2;3;2;3;3", "presentation": "2;3;2;2;4", "wc_summary": "61;117;200;33;78", "wc_strengths": "62;55;66;56;40", "wc_weaknesses": "206;21;231;51;180", "wc_questions": "61;102;687;17;95", "wc_limitations": "9;88;1;5;6", "wc_review": "399;383;1185;162;399", "wc_reply_reviewers": "34;40;0;49;29", "wc_reply_authors": "241;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 97.8, 57.894386601811405 ], "wc_strengths_avg": [ 55.8, 8.863407922464134 ], "wc_weaknesses_avg": [ 137.8, 85.1995305151384 ], "wc_questions_avg": [ 192.4, 249.1261527820795 ], "wc_limitations_avg": [ 21.8, 33.19879515886082 ], "wc_review_avg": [ 505.6, 351.39812179350076 ], "wc_reply_reviewers_avg": [ 30.4, 16.596385148579795 ], "wc_reply_authors_avg": [ 48.2, 96.39999999999999 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6454972243679028, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6520159360600644785&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "google.com;google.com;google.com;princeton.edu;google.com", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Google;Princeton University", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.princeton.edu", "aff_unique_abbr": "DeepMind;Princeton", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Partial Label Learning with Dissimilarity Propagation guided Candidate Label Shrinkage", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73070", "id": "0CbmvZPBGB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6b97236d90d945be7c58268207a14f4f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0CbmvZPBGB", "openreview": "https://openreview.net/forum?id=0CbmvZPBGB", "poster": "/media/PosterPDFs/NeurIPS%202023/73070.png?t=1701934129.5486205", "slides": "https://nips.cc/virtual/2023/poster/73070", "video": "https://nips.cc/virtual/2023/poster/73070", "author_site": "Yuheng Jia, Fuchao Yang, Yongqiang Dong", "tldr": "", "abstract": "In partial label learning (PLL), each sample is associated with a group of candidate labels, among which only one label is correct. The key of PLL is to disambiguate the candidate label set to find the ground-truth label. To this end, we first construct a constrained regression model to capture the confidence of the candidate labels, and multiply the label confidence matrix by its transpose to build a second-order similarity matrix, whose elements indicate the pairwise similarity relationships of samples globally. Then we develop a semantic dissimilarity matrix by considering the complement of the intersection of the candidate label set, and further propagate the initial dissimilarity relationships to the whole data set by leveraging the local geometric structure of samples. The similarity and dissimilarity matrices form an adversarial relationship, which is further utilized to shrink the solution space of the label confidence matrix and promote the dissimilarity matrix. We finally extend the proposed model to a kernel version to exploit the non-linear structure of samples and solve the proposed model by the inexact augmented Lagrange multiplier method. By exploiting the adversarial prior, the proposed method can significantly outperform\nstate-of-the-art PLL algorithms when evaluated on 10 artificial and 7 real-world partial label data sets. We also prove the effectiveness of our method with some theoretical guarantees. The code is publicly available at https://github.com/Yangfc-ML/DPCLS.", "keywords": "partial label learning;dissimilarity propagation;candidate label shrinkage", "primary_area": "", "supplementary_material": "/attachment/3c10942bfc2db1272f510efd3c4425788bccd9d8.pdf", "author": "Yuheng Jia;Fuchao Yang;Yongqiang Dong", "authorids": "~Yuheng_Jia1;~Fuchao_Yang1;dongyq@seu.edu.cn", "gender": ";M;", "homepage": "https://jyh-learning.github.io/;https://yangfc-ml.github.io/;", "dblp": "160/7861;336/2241;", "google_scholar": "https://scholar.google.com.hk/citations?user=iEW0KRUAAAAJ;D7XloC8AAAAJ;", "orcid": "0000-0002-3907-6550;0000-0002-5209-7153;", "linkedin": ";;", "or_profile": "~Yuheng_Jia1;~Fuchao_Yang1;dongyq@seu.edu.cn", "aff": "Southeast University;Southeast University;", "aff_domain": "seu.edu.cn;seu.edu.cn;", "position": "Associate Professor;MS student;", "bibtex": "@inproceedings{\njia2023partial,\ntitle={Partial Label Learning with Dissimilarity Propagation guided Candidate Label Shrinkage},\nauthor={Yuheng Jia and Fuchao Yang and Yongqiang Dong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0CbmvZPBGB}\n}", "github": "", "project": "", "reviewers": "vgJ9;ysvn;6Rie;9Zpk;sPwu", "pdf_size": 811069, "rating": "5;5;6;6;7", "confidence": "4;4;4;4;3", "soundness": "3;3;2;3;3", "novelty": "3;2;2;3;3", "presentation": "2;3;2;3;3", "wc_summary": "113;139;53;79;125", "wc_strengths": "30;75;26;69;67", "wc_weaknesses": "47;226;36;37;156", "wc_questions": "43;3;147;135;6", "wc_limitations": "1;3;8;7;6", "wc_review": "234;446;270;327;360", "wc_reply_reviewers": "59;10;43;9;43", "wc_reply_authors": "20;20;20;20;20", "reply_reviewers": "1;1;2;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 101.8, 31.460451363577096 ], "wc_strengths_avg": [ 53.4, 20.943734146517425 ], "wc_weaknesses_avg": [ 100.4, 77.31131870560739 ], "wc_questions_avg": [ 66.8, 62.3166109476438 ], "wc_limitations_avg": [ 5.0, 2.6076809620810595 ], "wc_review_avg": [ 327.4, 73.6847338327282 ], "wc_reply_reviewers_avg": [ 32.8, 19.903768487399564 ], "wc_reply_authors_avg": [ 20.0, 0.0 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8017837257372731, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16070675925425987636&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "seu.edu.cn;seu.edu.cn;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Southeast University", "aff_unique_dep": "", "aff_unique_url": "https://www.seu.edu.cn/", "aff_unique_abbr": "SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Adaptive Uncertainty Estimation via High-Dimensional Testing on Latent Representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73069", "id": "0DpKUzl1Se", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7da558c6bd476ba77f5ba712626bba1a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0DpKUzl1Se", "openreview": "https://openreview.net/forum?id=0DpKUzl1Se", "poster": "/media/PosterPDFs/NeurIPS%202023/73069.png?t=1702013976.0093153", "slides": "https://nips.cc/virtual/2023/poster/73069", "video": "https://nips.cc/virtual/2023/poster/73069", "author_site": "Tsai Hor Chan, Kin Wai Lau, Jiajun Shen, Guosheng Yin, Lequan Yu", "tldr": "", "abstract": "Uncertainty estimation aims to evaluate the confidence of a trained deep neural network. However, existing uncertainty estimation approaches rely on low-dimensional distributional assumptions and thus suffer from the high dimensionality of latent features. Existing approaches tend to focus on uncertainty on discrete classification probabilities, which leads to poor generalizability to uncertainty estimation for other tasks. Moreover, most of the literature requires seeing the out-of-distribution (OOD) data in the training for better estimation of uncertainty, which limits the uncertainty estimation performance in practice because the OOD data are typically unseen. To overcome these limitations, we propose a new framework using data-adaptive high-dimensional hypothesis testing for uncertainty estimation, which leverages the statistical properties of the feature representations. Our method directly operates on latent representations and thus does not require retraining the feature encoder under a modified objective. The test statistic relaxes the feature distribution assumptions to high dimensionality, and it is more discriminative to uncertainties in the latent representations. We demonstrate that encoding features with Bayesian neural networks can enhance testing performance and lead to more accurate uncertainty estimation. We further introduce a family-wise testing procedure to determine the optimal threshold of OOD detection, which minimizes the false discovery rate (FDR). Extensive experiments validate the satisfactory performance of our framework on uncertainty estimation and task-specific prediction over a variety of competitors. The experiments on the OOD detection task also show satisfactory performance of our method when the OOD data are unseen in the training. Codes are available at https://github.com/HKU-MedAI/bnn_uncertainty.", "keywords": "Bayesian deep learning;high-dimensional testing;uncertainty estimation;out-of-distribution detection", "primary_area": "", "supplementary_material": "/attachment/b7325be78c706406d7f486f75e9a75112810b15f.pdf", "author": "Tsai Hor Chan;Kin Wai Lau;Jiajun Shen;Guosheng Yin;Lequan Yu", "authorids": "~Tsai_Hor_Chan1;~Kin_Wai_Lau1;~Jiajun_Shen2;~Guosheng_Yin1;~Lequan_Yu1", "gender": "M;M;;M;M", "homepage": "https://howardchanth.github.io/;;;https://saasresearch.hku.hk/~gyin/;https://yulequan.github.io/", "dblp": "340/3009;232/0177;;185/3223;165/8092", "google_scholar": "SBy4bDcAAAAJ;inhIzDgAAAAJ;;HAE6EtkAAAAJ;https://scholar.google.com.hk/citations?user=llXf3wUAAAAJ", "orcid": "0000-0002-3545-397X;0000-0001-5364-5070;;0000-0003-3276-1392;0000-0002-9315-6527", "linkedin": ";;;guosheng-yin-91089790/?originalSubdomain=hk;", "or_profile": "~Tsai_Hor_Chan1;~Kin_Wai_Lau1;~Jiajun_Shen2;~Guosheng_Yin1;~Lequan_Yu1", "aff": "University of Hong Kong;City University of Hong Kong;;University of Hong Kong;The University of Hong Kong", "aff_domain": "hku.hk;cityu.edu.hk;;hku.hk;hku.hk", "position": "PhD student;PhD student;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchan2023adaptive,\ntitle={Adaptive Uncertainty Estimation via High-Dimensional Testing on Latent Representations},\nauthor={Tsai Hor Chan and Kin Wai Lau and Jiajun Shen and Guosheng Yin and Lequan Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0DpKUzl1Se}\n}", "github": "", "project": "", "reviewers": "HkqB;VpVz;jk7P;vwXa", "pdf_size": 2237962, "rating": "6;6;6;6", "confidence": "4;4;4;3", "soundness": "4;3;3;3", "novelty": "3;3;3;2", "presentation": "3;2;3;3", "wc_summary": "58;104;130;92", "wc_strengths": "264;45;133;29", "wc_weaknesses": "222;210;247;304", "wc_questions": "157;108;24;2", "wc_limitations": "87;1;1;2", "wc_review": "788;468;535;429", "wc_reply_reviewers": "87;9;213;0", "wc_reply_authors": "62;0;773;0", "reply_reviewers": "1;1;2;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 96.0, 25.88435821108957 ], "wc_strengths_avg": [ 117.75, 93.26139340584614 ], "wc_weaknesses_avg": [ 245.75, 36.18269614055868 ], "wc_questions_avg": [ 72.75, 62.69519519070022 ], "wc_limitations_avg": [ 22.75, 37.09700122651425 ], "wc_review_avg": [ 555.0, 139.7622982066337 ], "wc_reply_reviewers_avg": [ 77.25, 85.36502503953244 ], "wc_reply_authors_avg": [ 208.75, 326.75172149508256 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10087232578120664044&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "hku.hk;cityu.edu.hk;;hku.hk;hku.hk", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Hong Kong;City University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;https://www.cityu.edu.hk", "aff_unique_abbr": "HKU;CityU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "AR-Diffusion: Auto-Regressive Diffusion Model for Text Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73068", "id": "0EG6qUQ4xE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7d866abba506e5a56335e4644ebe18f9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0EG6qUQ4xE", "openreview": "https://openreview.net/forum?id=0EG6qUQ4xE", "poster": "/media/PosterPDFs/NeurIPS%202023/73068.png?t=1697696990.3066034", "slides": "https://nips.cc/virtual/2023/poster/73068", "video": "https://nips.cc/virtual/2023/poster/73068", "author_site": "Tong Wu, Zhihao Fan, Xiao Liu, Hai-Tao Zheng, Yeyun Gong, yelong shen, Jian Jiao, Juntao Li, zhongyu wei, Jian Guo, Nan Duan, Weizhu Chen", "tldr": "", "abstract": "Diffusion models have gained significant attention in the realm of image generation due to their exceptional performance. Their success has been recently expanded to text generation via generating all tokens within a sequence concurrently. \nHowever, natural language exhibits a far more pronounced sequential dependency in comparison to images, and the majority of existing language models are trained with a left-to-right auto-regressive approach.\nTo account for the inherent sequential characteristic of natural language, we introduce Auto-Regressive Diffusion (AR-Diffusion). AR-Diffusion ensures that the generation of tokens on the right depends on the generated ones on the left, a mechanism achieved through employing a dynamic number of denoising steps that vary based on token position. This results in tokens on the left undergoing fewer denoising steps than those on the right, thereby enabling them to generate earlier and subsequently influence the generation of tokens on the right.\nIn a series of experiments on various text generation tasks, including text summarization, machine translation, and common sense generation, AR-Diffusion clearly demonstrated its superiority over existing diffusion language models and that it can be $100\\times\\sim600\\times$ faster when achieving comparable results. Our code is available at https://github.com/microsoft/ProphetNet/tree/master/AR-diffusion.", "keywords": "text generation;diffusion model;auto-regression;sequential dependency", "primary_area": "", "supplementary_material": "/attachment/791f6308218f647b6c44684236e0ac9c8312e71b.pdf", "author": "Tong Wu;Zhihao Fan;Xiao Liu;Hai-Tao Zheng;Yeyun Gong;yelong shen;Jian Jiao;Juntao Li;zhongyu wei;Jian Guo;Nan Duan;Weizhu Chen", "authorids": "~Tong_Wu9;~Zhihao_Fan1;~Xiao_Liu14;~Hai-Tao_Zheng2;~Yeyun_Gong2;~yelong_shen1;~Jian_Jiao2;~Juntao_Li2;~zhongyu_wei1;~Jian_Guo2;~Nan_Duan1;~Weizhu_Chen1", "gender": "M;M;M;M;M;;M;M;M;M;M;M", "homepage": "https://wutong4012.github.io/;;https://xiaoliunlc.github.io/;https://www.sigs.tsinghua.edu.cn/fg3/105069.jhtml;;;;https://lijuntaopku.github.io/;http://www.sdspeople.fudan.edu.cn/zywei/;https://idea.edu.cn/person/guojian/;https://nanduan.github.io/;https://www.microsoft.com/en-us/research/people/wzchen/", "dblp": ";220/0988;82/1364-29;20/134-2;06/10400.html;;29/265-7.html;;31/10489;96/2596-2;;79/2536", "google_scholar": "https://scholar.google.com.hk/citations?user=yn0GDR4AAAAJ;xfqnSacAAAAJ;https://scholar.google.com.sg/citations?user=cn1k7gYAAAAJ;https://scholar.google.com.hk/citations?user=7VPeORoAAAAJ;piUkwMYAAAAJ;;D6KwmF8AAAAJ;sZSygsYAAAAJ;AjLDxxgAAAAJ;;Qaa6OxIAAAAJ;LG_E-4EAAAAJ", "orcid": ";;0000-0002-8893-366X;0000-0001-5128-5649;;;0000-0003-4779-9588;0000-0002-6286-7529;;;;", "linkedin": ";;xiao-liu-71357b72/;;;;jian-jiao-82897810/;;;;;", "or_profile": "~Tong_Wu9;~Zhihao_Fan1;~Xiao_Liu14;~Hai-Tao_Zheng2;~Yeyun_Gong2;~yelong_shen1;~Jian_Jiao2;~Juntao_Li2;~zhongyu_wei1;~Jian_Guo2;~Nan_Duan1;~Weizhu_Chen1", "aff": "Tsinghua University;Fudan University;Microsoft Research Asia;Tsinghua University;Microsoft;;Microsoft;Soochow University, China;Fudan University;International Digital Economy Academy, International Digital Economy Academy;Microsoft Research Asia;Microsoft GenAI", "aff_domain": "mails.tsinghua.edu.cn;fudan.edu.cn;microsoft.com;tsinghua.edu.cn;microsoft.com;;microsoft.com;suda.edu.cn;fudan.edu.cn;idea.edu.cn;microsoft.com;microsoft.com", "position": "MS student;PhD student;Researcher;Associate Professor;Researcher;;Principal Researcher;Associate Professor;Associate Professor;Researcher;Principal Researcher;Vice President", "bibtex": "@inproceedings{\nwu2023ardiffusion,\ntitle={{AR}-Diffusion: Auto-Regressive Diffusion Model for Text Generation},\nauthor={Tong Wu and Zhihao Fan and Xiao Liu and Hai-Tao Zheng and Yeyun Gong and yelong shen and Jian Jiao and Juntao Li and zhongyu wei and Jian Guo and Nan Duan and Weizhu Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0EG6qUQ4xE}\n}", "github": "", "project": "", "reviewers": "63dm;TRPf;CQLg;i56m", "pdf_size": 1241001, "rating": "4;5;5;7", "confidence": "4;4;3;3", "soundness": "2;4;3;4", "novelty": "2;3;2;4", "presentation": "2;2;3;4", "wc_summary": "77;41;64;121", "wc_strengths": "42;32;59;53", "wc_weaknesses": "259;23;158;14", "wc_questions": "9;199;29;29", "wc_limitations": "1;1;148;28", "wc_review": "388;296;458;245", "wc_reply_reviewers": "13;14;196;28", "wc_reply_authors": "71;44;659;32", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 75.75, 29.13224158900238 ], "wc_strengths_avg": [ 46.5, 10.35615758860399 ], "wc_weaknesses_avg": [ 113.5, 101.53940121942811 ], "wc_questions_avg": [ 66.5, 76.93341276714558 ], "wc_limitations_avg": [ 44.5, 60.76388730158728 ], "wc_review_avg": [ 346.75, 82.168652781946 ], "wc_reply_reviewers_avg": [ 62.75, 77.16014191277775 ], "wc_reply_authors_avg": [ 201.5, 264.51512244104305 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3555010863554485749&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "mails.tsinghua.edu.cn;fudan.edu.cn;microsoft.com;tsinghua.edu.cn;microsoft.com;;microsoft.com;suda.edu.cn;fudan.edu.cn;idea.edu.cn;microsoft.com;microsoft.com", "author_num": 12, "aff_unique_index": "0;1;2;0;2;2;3;1;4;2;2", "aff_unique_norm": "Tsinghua University;Fudan University;Microsoft;Soochow University;International Digital Economy Academy", "aff_unique_dep": ";;Research;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.fudan.edu.cn;https://www.microsoft.com/en-us/research/group/asia;https://www.soochow.edu.cn;", "aff_unique_abbr": "THU;Fudan;MSR Asia;Soochow U;", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;0;1;1;0;0;0;1", "aff_country_unique": "China;United States;" }, { "title": "Efficient Potential-based Exploration in Reinforcement Learning using Inverse Dynamic Bisimulation Metric", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73067", "id": "0FhKURbTyF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/79f7f00cbe3003cea4d0c2326b4c0b42-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0FhKURbTyF", "openreview": "https://openreview.net/forum?id=0FhKURbTyF", "poster": "/media/PosterPDFs/NeurIPS%202023/73067.png?t=1698222994.189243", "slides": "https://nips.cc/virtual/2023/poster/73067", "video": "https://nips.cc/virtual/2023/poster/73067", "author_site": "Yiming Wang, Ming Yang, Renzhi Dong, Binbin Sun, Furui Liu, Leong Hou U", "tldr": "", "abstract": "Reward shaping is an effective technique for integrating domain knowledge into reinforcement learning (RL). However, traditional approaches like potential-based reward shaping totally rely on manually designing shaping reward functions, which significantly restricts exploration efficiency and introduces human cognitive biases.\nWhile a number of RL methods have been proposed to boost exploration by designing an intrinsic reward signal as exploration bonus. Nevertheless, these methods heavily rely on the count-based episodic term in their exploration bonus which falls short in scalability. To address these limitations, we propose a general end-to-end potential-based exploration bonus for deep RL via potentials of state discrepancy, which motivates the agent to discover novel states and provides them with denser rewards without manual intervention. Specifically, we measure the novelty of adjacent states by calculating their distance using the bisimulation metric-based potential function, which enhances agent's exploration and ensures policy invariance. In addition, we offer a theoretical guarantee on our inverse dynamic bisimulation metric, bounding the value difference and ensuring that the agent explores states with higher TD error, thus significantly improving training efficiency. The proposed approach is named \\textbf{LIBERTY} (exp\\textbf{L}oration v\\textbf{I}a \\textbf{B}isimulation m\\textbf{E}t\\textbf{R}ic-based s\\textbf{T}ate discrepanc\\textbf{Y}) which is comprehensively evaluated on the MuJoCo and the Arcade Learning Environments. Extensive experiments have verified the superiority and scalability of our algorithm compared with other competitive methods.", "keywords": "Reinforcement learning;reward shaping;potential-based exploration;inverse dynamic bisimulation metric", "primary_area": "", "supplementary_material": "/attachment/7cfe3a034df5aa21fd3e811461edd1e822370667.zip", "author": "YIMING WANG;Ming Yang;Renzhi Dong;Binbin Sun;Furui Liu;Leong Hou U", "authorids": "~YIMING_WANG8;~Ming_Yang16;~Renzhi_Dong1;~Binbin_Sun1;~Furui_Liu1;~Leong_Hou_U2", "gender": "M;;;M;M;M", "homepage": ";;;https://github.com/zhushi-math;;https://www.fst.um.edu.mo/personal/ryanlhu/", "dblp": ";;;;116/7289;38/4996", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;RWwpD58AAAAJ;;https://scholar.google.com.hk/citations?user=DJY8NXMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-7113-9726;;;;;0000-0002-5135-5165", "linkedin": ";;;;;", "or_profile": "~YIMING_WANG8;~Ming_Yang16;~Renzhi_Dong1;~Binbin_Sun1;~Furui_Liu1;~Leong_Hou_U2", "aff": "University of Macau;;Shanghai Normal University;University of Macau;Zhejiang Lab & UCAS & Zhejiang University;University of macau", "aff_domain": "um.edu.mo;;shnu.edu.cn;umac.mo;zhejianglab.com;um.edu.mo", "position": "PhD student;;MS student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023efficient,\ntitle={Efficient Potential-based Exploration in Reinforcement Learning using Inverse Dynamic Bisimulation Metric},\nauthor={YIMING WANG and Ming Yang and Renzhi Dong and Binbin Sun and Furui Liu and Leong Hou U},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0FhKURbTyF}\n}", "github": "", "project": "", "reviewers": "AtiU;dnFb;naKC;pk3V", "pdf_size": 3811003, "rating": "5;6;6;7", "confidence": "5;4;3;4", "soundness": "3;2;2;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "51;96;125;106", "wc_strengths": "67;76;58;63", "wc_weaknesses": "370;39;104;64", "wc_questions": "173;321;152;127", "wc_limitations": "29;24;35;44", "wc_review": "690;556;474;404", "wc_reply_reviewers": "891;0;25;10", "wc_reply_authors": "1652;0;21;19", "reply_reviewers": "3;0;1;1", "reply_authors": "4;1;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 94.5, 27.189152248645048 ], "wc_strengths_avg": [ 66.0, 6.59545297913646 ], "wc_weaknesses_avg": [ 144.25, 132.38273112456926 ], "wc_questions_avg": [ 193.25, 75.53269159774462 ], "wc_limitations_avg": [ 33.0, 7.44983221287567 ], "wc_review_avg": [ 531.0, 106.40018796975878 ], "wc_reply_reviewers_avg": [ 231.5, 380.8664464087116 ], "wc_reply_authors_avg": [ 423.0, 709.6108088241047 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4505174837210682078&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "um.edu.mo;;shnu.edu.cn;umac.mo;zhejianglab.com;um.edu.mo", "author_num": 6, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "University of Macau;Shanghai Normal University;Zhejiang University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.um.edu.mo;http://www.shnu.edu.cn;http://www.zju.edu.cn", "aff_unique_abbr": "UM;SHNU;ZJU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Macau SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "RoboHive: A Unified Framework for Robot Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73725", "id": "0H5fRQcpQ7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8a84a4341c375b8441b36836bb343d4e-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=0H5fRQcpQ7", "openreview": "https://openreview.net/forum?id=0H5fRQcpQ7", "poster": "/media/PosterPDFs/NeurIPS%202023/73725.png?t=1702502757.8337026", "slides": "https://nips.cc/virtual/2023/poster/73725", "video": "https://nips.cc/virtual/2023/poster/73725", "author_site": "Vikash Kumar, Rutav Shah, Gaoyue Zhou, Vincent Moens, Vittorio Caggiano, Abhishek Gupta, Aravind Rajeswaran", "tldr": "", "abstract": "We present RoboHive, a comprehensive software platform and ecosystem for research in the field of Robot Learning and Embodied Artificial Intelligence. Our platform encompasses a diverse range of pre-existing and novel environments, including dexterous manipulation with the Shadow Hand, whole-arm manipulation tasks with Franka and Fetch robots, quadruped locomotion, among others. Included environments are organized within and cover multiple domains such as hand manipulation, locomotion, multi-task, multi-agent, muscles, etc. In comparison to prior works, RoboHive offers a streamlined and unified task interface taking dependency on only a minimal set of well-maintained packages, features tasks with high physics fidelity and rich visual diversity, and supports common hardware drivers for real-world deployment. The unified interface of RoboHive offers a convenient and accessible abstraction for algorithmic research in imitation, reinforcement, multi-task, and hierarchical learning. Furthermore, RoboHive includes expert demonstrations and baseline results for most environments, providing a standard for benchmarking and comparisons. Details: https://sites.google.com/view/robohive", "keywords": "Robot Learning", "primary_area": "", "supplementary_material": "/attachment/ac776053fc263c9a219fef15c9429eee3c332e1d.zip", "author": "Vikash Kumar;Rutav Shah;Gaoyue Zhou;Vincent Moens;Vittorio Caggiano;Abhishek Gupta;Aravind Rajeswaran", "authorids": "~Vikash_Kumar2;~Rutav_Shah1;~Gaoyue_Zhou1;~Vincent_Moens3;~Vittorio_Caggiano1;~Abhishek_Gupta1;~Aravind_Rajeswaran1", "gender": "M;M;F;M;;M;M", "homepage": "http://vikashplus.github.io/;https://shahrutav.github.io;https://gaoyuezhou.github.io/;https://github.com/vmoens;;https://homes.cs.washington.edu/~abhgupta/;http://aravindr93.github.io/", "dblp": "82/7475;;;220/5625;;18/6404-4;164/5778", "google_scholar": "nu3W--sAAAAJ;;-1iyBukAAAAJ;8l-tvFoAAAAJ;lCt9zVkAAAAJ;1wLVDP4AAAAJ;_EJrRVAAAAAJ", "orcid": ";;;;0000-0002-2186-1550;;", "linkedin": ";rutav-shah-01a2941a7;gaoyue-zhou/;vincent-moens-9bb91972/;vittorio-caggiano-26b6a7b/;;", "or_profile": "~Vikash_Kumar2;~Rutav_Shah1;~Gaoyue_Zhou1;~Vincent_Moens3;~Vittorio_Caggiano1;~Abhishek_Gupta1;~Aravind_Rajeswaran1", "aff": "Meta Facebook;University of Texas at Austin;Carnegie Mellon University;Meta;;University of Washington;Meta Facebook", "aff_domain": "facebook.com;utexas.edu;cmu.edu;fb.com;;uw.edu;meta.com", "position": "Researcher;PhD student;MS student;Applied ML Scientist;;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nkumar2023robohive,\ntitle={RoboHive: A Unified Framework for Robot Learning},\nauthor={Vikash Kumar and Rutav Shah and Gaoyue Zhou and Vincent Moens and Vittorio Caggiano and Abhishek Gupta and Aravind Rajeswaran},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=0H5fRQcpQ7}\n}", "github": "", "project": "", "reviewers": "DRrC;NtXK;iKLG;UW1F", "pdf_size": 4111953, "rating": "5;6;7;8", "confidence": "3;4;3;3", "wc_summary_and_contributions": "61;82;80;67", "wc_strengths": "26;69;52;47", "wc_improvement": "2;122;124;88", "wc_limitations": "87;6;1;29", "wc_correctness": "2;6;1;1", "wc_clarity": "2;26;1;28", "wc_relation_to_prior_work": "10;9;1;34", "wc_documentation": "7;4;1;10", "wc_additional_feedback": "1;1;1;1", "wc_review": "198;325;262;305", "wc_reply_reviewers": "0;342;54;132", "wc_reply_authors": "754;2351;574;351", "reply_reviewers": "0;1;1;1", "reply_authors": "2;5;1;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 72.5, 8.789197915623474 ], "wc_strengths_avg": [ 48.5, 15.337861650177967 ], "wc_improvement_avg": [ 84.0, 49.457052075512955 ], "wc_limitations_avg": [ 30.75, 34.14948755105997 ], "wc_correctness_avg": [ 2.5, 2.0615528128088303 ], "wc_clarity_avg": [ 14.25, 12.774486291041217 ], "wc_relation_to_prior_work_avg": [ 13.5, 12.338962679253067 ], "wc_documentation_avg": [ 5.5, 3.3541019662496847 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 272.5, 48.66466890876789 ], "wc_reply_reviewers_avg": [ 132.0, 130.0076920801227 ], "wc_reply_authors_avg": [ 1007.5, 788.6965512793878 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14141354778639593428&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "facebook.com;utexas.edu;cmu.edu;fb.com;;uw.edu;meta.com", "author_num": 7, "aff_unique_index": "0;1;2;0;3;0", "aff_unique_norm": "Meta;University of Texas at Austin;Carnegie Mellon University;University of Washington", "aff_unique_dep": "Meta Platforms, Inc.;;;", "aff_unique_url": "https://meta.com;https://www.utexas.edu;https://www.cmu.edu;https://www.washington.edu", "aff_unique_abbr": "Meta;UT Austin;CMU;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Multi-Agent Meta-Reinforcement Learning: Sharper Convergence Rates with Task Similarity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73066", "id": "0Iw2dLh8uq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d1b1a091088904cbc7f7faa2b45c8f36-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0Iw2dLh8uq", "openreview": "https://openreview.net/forum?id=0Iw2dLh8uq", "poster": "/media/PosterPDFs/NeurIPS%202023/73066.png?t=1698851518.7243178", "slides": "https://nips.cc/virtual/2023/poster/73066", "video": "https://nips.cc/virtual/2023/poster/73066", "author_site": "Weichao Mao, Haoran Qiu, Chen Wang, Hubertus Franke, Zbigniew Kalbarczyk, Ravishankar Iyer, Tamer Basar", "tldr": "", "abstract": "Multi-agent reinforcement learning (MARL) has primarily focused on solving a single task in isolation, while in practice the environment is often evolving, leaving many related tasks to be solved. In this paper, we investigate the benefits of meta-learning in solving multiple MARL tasks collectively. We establish the first line of theoretical results for meta-learning in a wide range of fundamental MARL settings, including learning Nash equilibria in two-player zero-sum Markov games and Markov potential games, as well as learning coarse correlated equilibria in general-sum Markov games. Under natural notions of task similarity, we show that meta-learning achieves provable sharper convergence to various game-theoretical solution concepts than learning each task separately. As an important intermediate step, we develop multiple MARL algorithms with initialization-dependent convergence guarantees. Such algorithms integrate optimistic policy mirror descents with stage-based value updates, and their refined convergence guarantees (nearly) recover the best known results even when a good initialization is unknown. To our best knowledge, such results are also new and might be of independent interest. We further provide numerical simulations to corroborate our theoretical findings.", "keywords": "Reinforcement learning;game theory;multi-agent systems;meta-learning", "primary_area": "", "supplementary_material": "/attachment/4b80fdedc518143db9fcb784c78143a4220b4e55.pdf", "author": "Weichao Mao;Haoran Qiu;Chen Wang;Hubertus Franke;Zbigniew Kalbarczyk;Ravi Iyer;Tamer Basar", "authorids": "~Weichao_Mao1;~Haoran_Qiu1;~Chen_Wang17;~Hubertus_Franke1;~Zbigniew_Kalbarczyk1;~Ravi_Iyer1;~Tamer_Basar1", "gender": ";M;F;;M;M;M", "homepage": ";https://haoran-qiu.com/;https://research.ibm.com/people/chen-wang;https://researcher.watson.ibm.com/researcher/view.php?person=us-frankeh;https://depend.csl.illinois.edu/#sthash.YS6mIoZS.dpbs;https://www.ece.illinois.edu/directory/profile/rkiyer/;http://tamerbasar.csl.illinois.edu/", "dblp": ";217/3886;;17/3453;;i/RavishankarKIyer;b/TamerBasar", "google_scholar": ";9AZbRFMAAAAJ;JL6iWLgAAAAJ;iklPa0oAAAAJ;;;", "orcid": ";;0000-0003-0204-2362;;;;", "linkedin": ";;chenw615/;hubertus-franke/;;;", "or_profile": "~Weichao_Mao1;~Haoran_Qiu1;~Chen_Wang17;~Hubertus_Franke1;~Zbigniew_Kalbarczyk1;~Ravi_Iyer1;~Tamer_Basar1", "aff": ";University of Illinois, Urbana Champaign;International Business Machines;New York University;University of Illinois, Urbana Champaign;University of Illinois;University of Illinois, Urbana Champaign", "aff_domain": ";illinois.edu;ibm.com;cs.nyu.edu;illinois.edu;illinois.edu;illinois.edu", "position": ";PhD student;Researcher;Full Professor;Full Professor;Full Professor;Emeritus", "bibtex": "@inproceedings{\nmao2023multiagent,\ntitle={Multi-Agent Meta-Reinforcement Learning: Sharper Convergence Rates with Task Similarity},\nauthor={Weichao Mao and Haoran Qiu and Chen Wang and Hubertus Franke and Zbigniew Kalbarczyk and Ravi Iyer and Tamer Basar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0Iw2dLh8uq}\n}", "github": "", "project": "", "reviewers": "CSow;kvor;Mmgh;wdob;8vaQ", "pdf_size": 551890, "rating": "4;6;8;8;8", "confidence": "3;3;1;4;3", "soundness": "2;2;4;4;3", "novelty": "2;3;3;3;3", "presentation": "3;3;4;3;3", "wc_summary": "27;38;16;30;75", "wc_strengths": "84;38;31;66;73", "wc_weaknesses": "127;20;1;245;145", "wc_questions": "182;32;1;24;25", "wc_limitations": "27;1;7;1;2", "wc_review": "447;129;56;366;320", "wc_reply_reviewers": "184;27;0;26;38", "wc_reply_authors": "350;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.8, 1.6 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 37.2, 20.173249614278806 ], "wc_strengths_avg": [ 58.4, 20.460694025374604 ], "wc_weaknesses_avg": [ 107.6, 89.09680128938413 ], "wc_questions_avg": [ 52.8, 65.43821513458325 ], "wc_limitations_avg": [ 7.6, 9.951884243699782 ], "wc_review_avg": [ 263.6, 147.32087428467156 ], "wc_reply_reviewers_avg": [ 55.0, 65.69627082262737 ], "wc_reply_authors_avg": [ 70.0, 140.0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.15309310892394867, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10251116130613001178&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";illinois.edu;ibm.com;cs.nyu.edu;illinois.edu;illinois.edu;illinois.edu", "author_num": 7, "aff_unique_index": "0;1;2;0;3;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;International Business Machines Corporation;New York University;University of Illinois", "aff_unique_dep": ";;;", "aff_unique_url": "https://illinois.edu;https://www.ibm.com;https://www.nyu.edu;https://www.illinois.edu", "aff_unique_abbr": "UIUC;IBM;NYU;UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Dynamic Non-monotone Submodular Maximization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73065", "id": "0K1ZTfHZ0N", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/387982dbf23d9975c7fc45813dd3dabc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0K1ZTfHZ0N", "openreview": "https://openreview.net/forum?id=0K1ZTfHZ0N", "poster": "/media/PosterPDFs/NeurIPS%202023/73065.png?t=1702419770.1895626", "slides": "https://nips.cc/virtual/2023/poster/73065", "video": "https://nips.cc/virtual/2023/poster/73065", "author_site": "Kiarash Banihashem, Leyla Biabani, Samira Goudarzi, MohammadTaghi Hajiaghayi, Peyman Jabbarzade, Morteza Monemizadeh", "tldr": "", "abstract": "Maximizing submodular functions has been increasingly used in many applications of machine learning, such as data summarization, recommendation systems, and feature selection. Moreover, there has been a growing interest in both submodular maximization and dynamic algorithms. \nIn 2020, Monemizadeh and \nLattanzi, Mitrovic, Norouzi-Fard, Tarnawski, and Zadimoghaddam initiated developing dynamic algorithms for the monotone submodular maximization problem under the cardinality constraint $k$. \nIn 2022, Chen and Peng studied the complexity of this problem and raised an important open question: \"\\emph{Can we extend [fully dynamic] results (algorithm or hardness) to non-monotone submodular maximization?}\". \nWe affirmatively answer their question by demonstrating a reduction from maximizing a non-monotone submodular function under the cardinality constraint $k$ to maximizing a monotone submodular function under the same constraint. \nThrough this reduction, we obtain the first dynamic algorithms to solve the non-monotone submodular maximization problem under the cardinality constraint $k$. Our algorithms maintain an $(8+\\epsilon)$-approximate of the solution and use expected amortized $O(\\epsilon^{-3}k^3\\log^3(n)\\log(k))$ or $O(\\epsilon^{-1}k^2\\log^3(k))$ oracle queries per update, respectively. \nFurthermore, we showcase the benefits of our dynamic algorithm for video summarization and max-cut problems on several real-world data sets.", "keywords": "Non-monotone submodular maximization;dynamic algorithm;oracle query;video summarization", "primary_area": "", "supplementary_material": "/attachment/311085928fb8e159373d4dac2c5f9c420bf98106.zip", "author": "Kiarash Banihashem;Leyla Biabani;Samira Goudarzi;MohammadTaghi Hajiaghayi;Peyman Jabbarzade;Morteza Monemizadeh", "authorids": "~Kiarash_Banihashem1;~Leyla_Biabani1;~Samira_Goudarzi1;~MohammadTaghi_Hajiaghayi1;~Peyman_Jabbarzade1;~Morteza_Monemizadeh1", "gender": "M;;F;M;M;M", "homepage": ";https://research.tue.nl/en/persons/leyla-biabani;;http://www.cs.umd.edu/~hajiagha/;https://research.tue.nl/en/persons/morteza-monemizadeh;", "dblp": "285/5061;;;334/4488;11/4322.html;308/2567", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.tw/citations?user=SQ1eGN4AAAAJ;wVH7Gp4AAAAJ;waMTjQcAAAAJ", "orcid": ";;;0000-0003-4842-0533;;", "linkedin": ";;;mohammad-hajiaghayi-2139a913a&ved=2ahUKEwjMyeH-5-_-AhV3K1kFHeeBDKwQjjh6BAgSEAE&usg=AOvVaw1NSVoT5FCGtOTi4eT8nr4b;;", "or_profile": "~Kiarash_Banihashem1;~Leyla_Biabani1;~Samira_Goudarzi1;~MohammadTaghi_Hajiaghayi1;~Morteza_Monemizadeh1;~Peyman_Jabbarzade_Ganje1", "aff": "University of Maryland, College Park;Eindhoven University of Technology;University of Maryland, College Park;University of Maryland, College Park;Eindhoven University of Technology;Department of Computer Science, University of Maryland, College Park", "aff_domain": "umd.edu;tue.nl;umd.edu;umd.edu;tue.nl;cs.umd.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nbanihashem2023dynamic,\ntitle={Dynamic Non-monotone Submodular Maximization},\nauthor={Kiarash Banihashem and Leyla Biabani and Samira Goudarzi and MohammadTaghi Hajiaghayi and Peyman Jabbarzade and Morteza Monemizadeh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0K1ZTfHZ0N}\n}", "github": "", "project": "", "reviewers": "PoYA;Ma6Y;dvm4;4fuM", "pdf_size": 406542, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "3;4;3;3", "novelty": "2;3;4;4", "presentation": "3;2;4;3", "wc_summary": "85;24;185;185", "wc_strengths": "37;24;104;28", "wc_weaknesses": "203;33;156;145", "wc_questions": "3;95;212;1", "wc_limitations": "1;30;1;1", "wc_review": "329;206;658;360", "wc_reply_reviewers": "25;0;53;0", "wc_reply_authors": "27;0;1045;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;3;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 119.75, 68.72181240334105 ], "wc_strengths_avg": [ 48.25, 32.529794035622174 ], "wc_weaknesses_avg": [ 134.25, 62.383391219137806 ], "wc_questions_avg": [ 77.75, 86.3115722252816 ], "wc_limitations_avg": [ 8.25, 12.55736835487436 ], "wc_review_avg": [ 388.25, 166.04875037169055 ], "wc_reply_reviewers_avg": [ 19.5, 21.86892772862904 ], "wc_reply_authors_avg": [ 268.0, 448.73655968730696 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12215956188383441639&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "umd.edu;tue.nl;umd.edu;umd.edu;tue.nl;cs.umd.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;1;2", "aff_unique_norm": "University of Maryland;Eindhoven University of Technology;University of Maryland, College Park", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": "https://www/umd.edu;https://www.tue.nl;https://www/umd.edu", "aff_unique_abbr": "UMD;TU/e;UMD", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;1;0;0;1;0", "aff_country_unique": "United States;Netherlands" }, { "title": "Universal Prompt Tuning for Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73064", "id": "0LmWBhIYLi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a4a1ee071ce0fe63b83bce507c9dc4d7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0LmWBhIYLi", "openreview": "https://openreview.net/forum?id=0LmWBhIYLi", "poster": "/media/PosterPDFs/NeurIPS%202023/73064.png?t=1698238536.990879", "slides": "https://nips.cc/virtual/2023/poster/73064", "video": "https://nips.cc/virtual/2023/poster/73064", "author_site": "Taoran Fang, Yunchao Zhang, YANG YANG, Chunping Wang, Lei Chen", "tldr": "", "abstract": "In recent years, prompt tuning has sparked a research surge in adapting pre-trained models. Unlike the unified pre-training strategy employed in the language field, the graph field exhibits diverse pre-training strategies, posing challenges in designing appropriate prompt-based tuning methods for graph neural networks. While some pioneering work has devised specialized prompting functions for models that employ edge prediction as their pre-training tasks, these methods are limited to specific pre-trained GNN models and lack broader applicability. In this paper, we introduce a universal prompt-based tuning method called Graph Prompt Feature (GPF) for pre-trained GNN models under any pre-training strategy. GPF operates on the input graph's feature space and can theoretically achieve an equivalent effect to any form of prompting function. Consequently, we no longer need to illustrate the prompting function corresponding to each pre-training strategy explicitly. Instead, we employ GPF to obtain the prompted graph for the downstream task in an adaptive manner. We provide rigorous derivations to demonstrate the universality of GPF and make guarantee of its effectiveness. The experimental results under various pre-training strategies indicate that our method performs better than fine-tuning, with an average improvement of about 1.4% in full-shot scenarios and about 3.2% in few-shot scenarios. Moreover, our method significantly outperforms existing specialized prompt-based tuning methods when applied to models utilizing the pre-training strategy they specialize in. These numerous advantages position our method as a compelling alternative to fine-tuning for downstream adaptations.", "keywords": "graph neural networks;prompt tuning", "primary_area": "", "supplementary_material": "/attachment/30bfdf3b2c3b85ca8550b280513f9de10f629d44.zip", "author": "Taoran Fang;Yunchao Mercer Zhang;Yang Yang;Chunping Wang;Lei CHEN", "authorids": "~Taoran_Fang2;~Yunchao_Mercer_Zhang1;~Yang_Yang35;~Chunping_Wang1;~Lei_CHEN23", "gender": "M;M;M;F;M", "homepage": "https://www.baidu.com;https://yunchaozhang.netlify.app/;http://yangy.org;;https://www.linkedin.cn/incareer/in/ACoAAAPh8noB_KF0tgucaqFyKbDGOv9wkJkM0sY", "dblp": ";;;54/2715-1;09/3666a.html", "google_scholar": ";;;Rmy5RogAAAAJ;https://scholar.google.com.hk/citations?user=wDG2dMYAAAAJ", "orcid": ";;0000-0002-5058-4417;0000-0003-1854-8667;0000-0002-4912-3293", "linkedin": ";;;https://linkedin.com/in/chunping-wang-7b94a15/;", "or_profile": "~Taoran_Fang2;~Yunchao_Mercer_Zhang1;~Yang_Yang35;~Chunping_Wang1;~Lei_CHEN23", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Finvolution Group;Peking University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;xinye.com;pku.edu.cn", "position": "PhD student;Undergrad student;Associate Professor;Principal Scientist;PhD student", "bibtex": "@inproceedings{\nfang2023universal,\ntitle={Universal Prompt Tuning for Graph Neural Networks},\nauthor={Taoran Fang and Yunchao Mercer Zhang and Yang Yang and Chunping Wang and Lei CHEN},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0LmWBhIYLi}\n}", "github": "", "project": "", "reviewers": "KwHM;Cfab;GUF8;dbJu;aqDC", "pdf_size": 1824853, "rating": "5;5;5;6;7", "confidence": "4;4;4;4;4", "soundness": "2;2;3;3;3", "novelty": "2;2;2;3;3", "presentation": "3;3;2;3;3", "wc_summary": "135;88;73;54;72", "wc_strengths": "122;33;17;67;72", "wc_weaknesses": "381;189;217;123;78", "wc_questions": "251;12;2;3;63", "wc_limitations": "1;7;7;8;11", "wc_review": "890;329;316;255;296", "wc_reply_reviewers": "22;18;47;27;12", "wc_reply_authors": "15;15;15;15;15", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 84.4, 27.499818181217126 ], "wc_strengths_avg": [ 62.2, 36.30647325202491 ], "wc_weaknesses_avg": [ 197.6, 103.8799306892337 ], "wc_questions_avg": [ 66.2, 95.09447933502764 ], "wc_limitations_avg": [ 6.8, 3.249615361854384 ], "wc_review_avg": [ 417.2, 237.72202253893096 ], "wc_reply_reviewers_avg": [ 25.2, 11.956588142108098 ], "wc_reply_authors_avg": [ 15.0, 0.0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 116, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2169274323586437571&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;xinye.com;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Zhejiang University;FinVolution Group;Peking University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.finvolutiongroup.com;http://www.pku.edu.cn", "aff_unique_abbr": "ZJU;;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Katakomba: Tools and Benchmarks for Data-Driven NetHack", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73724", "id": "0MGvE1Gkgv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a5f596699d8d4637532f955c7f2860f4-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=0MGvE1Gkgv", "openreview": "https://openreview.net/forum?id=0MGvE1Gkgv", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73724", "video": "https://nips.cc/virtual/2023/poster/73724", "author_site": "Vladislav Kurenkov, Alexander Nikulin, Denis Tarasov, Sergey Kolesnikov", "tldr": "", "abstract": "NetHack is known as the frontier of reinforcement learning research where learning-based methods still need to catch up to rule-based solutions. One of the promising directions for a breakthrough is using pre-collected datasets similar to recent developments in robotics, recommender systems, and more under the umbrella of offline reinforcement learning (ORL). Recently, a large-scale NetHack dataset was released; while it was a necessary step forward, it has yet to gain wide adoption in the ORL community. In this work, we argue that there are three major obstacles for adoption: tool-wise, implementation-wise, and benchmark-wise. To address them, we develop an open-source library that provides workflow fundamentals familiar to the ORL community: pre-defined D4RL-style tasks, uncluttered baseline implementations, and reliable evaluation tools with accompanying configs and logs synced to the cloud.", "keywords": "Offline Reinforcement Learning;NetHack", "primary_area": "", "supplementary_material": "/attachment/56920fa3bd8b04df6a8ec28ab4820087663c808d.pdf", "author": "Vladislav Kurenkov;Alexander Nikulin;Denis Tarasov;Sergey Kolesnikov", "authorids": "~Vladislav_Kurenkov1;~Alexander_Nikulin1;~Denis_Tarasov1;~Sergey_Kolesnikov1", "gender": "M;M;;M", "homepage": "https://vkurenkov.me;https://howuhh.github.io/;https://dt6a.github.io/;https://scitator.com", "dblp": "251/9126;314/6349;255/7697;191/1945", "google_scholar": "w09vtVsAAAAJ;yACvnqUAAAAJ;LQcCkD8AAAAJ;iukbpVEAAAAJ", "orcid": "0000-0003-4078-1086;;0000-0001-9744-5265;", "linkedin": ";;tarasovdeal/;scitator/", "or_profile": "~Vladislav_Kurenkov1;~Alexander_Nikulin1;~Denis_Tarasov1;~Sergey_Kolesnikov1", "aff": "Tinkoff;Higher School of Economics, Higher School of Economics;Jacobs University Bremen;Tinkoff", "aff_domain": "tinkoff.ai;edu.hse.ru;jacobs-university.de;tinkoff.ru", "position": "Researcher;MS student;Undergrad student;Principal Researcher", "bibtex": "@inproceedings{\nkurenkov2023katakomba,\ntitle={Katakomba: Tools and Benchmarks for Data-Driven NetHack},\nauthor={Vladislav Kurenkov and Alexander Nikulin and Denis Tarasov and Sergey Kolesnikov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=0MGvE1Gkgv}\n}", "github": "", "project": "", "reviewers": "ZoWq;obG3;iC1Z;yKAo", "pdf_size": 715357, "rating": "4;6;7;7", "confidence": "3;3;4;3", "wc_summary_and_contributions": "63;34;107;83", "wc_strengths": "18;29;219;121", "wc_improvement": "251;51;187;26", "wc_limitations": "46;1;113;8", "wc_correctness": "19;1;55;9", "wc_clarity": "7;1;31;43", "wc_relation_to_prior_work": "19;1;48;6", "wc_documentation": "10;8;60;42", "wc_additional_feedback": "1;1;1;1", "wc_review": "434;127;821;339", "wc_reply_reviewers": "0;11;36;0", "wc_reply_authors": "835;218;688;153", "reply_reviewers": "0;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 71.75, 26.789690181112583 ], "wc_strengths_avg": [ 96.75, 81.12451848855561 ], "wc_improvement_avg": [ 128.75, 93.46222498956463 ], "wc_limitations_avg": [ 42.0, 44.42409256248236 ], "wc_correctness_avg": [ 21.0, 20.639767440550294 ], "wc_clarity_avg": [ 20.5, 17.168284713389397 ], "wc_relation_to_prior_work_avg": [ 18.5, 18.255136263528684 ], "wc_documentation_avg": [ 30.0, 21.95449840010015 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 430.25, 251.48894110874934 ], "wc_reply_reviewers_avg": [ 11.75, 14.703315952532613 ], "wc_reply_authors_avg": [ 473.5, 293.5528061524877 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11477810455054414491&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tinkoff.ai;edu.hse.ru;jacobs-university.de;tinkoff.ru", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Tinkoff Bank;Higher School of Economics;Jacobs University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tinkoff.ru;https://www.hse.ru;https://www.jacobs-university.de", "aff_unique_abbr": "Tinkoff;HSE;JUB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Bremen", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Russian Federation;Germany" }, { "title": "ScaleLong: Towards More Stable Training of Diffusion Model via Scaling Network Long Skip Connection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73063", "id": "0N73P8pH2l", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ded98d28f82342a39f371c013dfb3058-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0N73P8pH2l", "openreview": "https://openreview.net/forum?id=0N73P8pH2l", "poster": "/media/PosterPDFs/NeurIPS%202023/73063.png?t=1701252027.7398767", "slides": "https://nips.cc/virtual/2023/poster/73063", "video": "https://nips.cc/virtual/2023/poster/73063", "author_site": "Zhongzhan Huang, Pan Zhou, Pan Zhou, Shuicheng Yan, Liang Lin", "tldr": "", "abstract": "In diffusion models, UNet is the most popular network backbone, since its long skip connects (LSCs) to connect distant network blocks can aggregate long-distant information and alleviate vanishing gradient. Unfortunately, UNet often suffers from unstable training in diffusion models which can be alleviated by scaling its LSC coefficients smaller. However, theoretical understandings of the instability of UNet in diffusion models and also the performance improvement of LSC scaling remain absent yet. To solve this issue, we theoretically show that the coefficients of LSCs in UNet have big effects on the stableness of the forward and backward propagation and robustness of UNet. Specifically, the hidden feature and gradient of UNet at any layer can oscillate and their oscillation ranges are actually large which explains the instability of UNet training. Moreover, UNet is also provably sensitive to perturbed input, and predicts an output distant from the desired output, yielding oscillatory loss and thus oscillatory gradient. Besides, we also observe the theoretical benefits of the LSC coefficient scaling of UNet in the stableness of hidden features and gradient and also robustness. Finally, inspired by our theory, we propose an effective coefficient scaling framework ScaleLong that scales the coefficients of LSC in UNet and better improve the training stability of UNet. Experimental results on CIFAR10, CelebA, ImageNet and COCO show that our methods are superior to stabilize training, and yield about 1.5x training acceleration on different diffusion models with UNet or UViT backbones.", "keywords": "Diffusion Model;Stable Training;Network architectures", "primary_area": "", "supplementary_material": "/attachment/0bf11c2c9b306a5088702b92b1134930e2bc2e9c.zip", "author": "Zhongzhan Huang;Pan Zhou;Shuicheng YAN;Liang Lin", "authorids": "~Zhongzhan_Huang1;~Pan_Zhou3;~Shuicheng_YAN3;~Liang_Lin1", "gender": "M;;M;M", "homepage": "https://dedekinds.github.io/;;https://yanshuicheng.ai/;http://www.linliang.net", "dblp": "241/9753;;y/ShuichengYan;", "google_scholar": "R-b68CEAAAAJ;;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ;https://scholar.google.com.hk/citations?user=Nav8m8gAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Zhongzhan_Huang1;~Pan_Zhou3;~Shuicheng_YAN3;~Liang_Lin1", "aff": "Sun Yat-Sen University;;sea Group;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;;sea.com;sysu.edu.cn", "position": "PhD student;;Researcher;Full Professor", "bibtex": "@inproceedings{\nhuang2023scalelong,\ntitle={ScaleLong: Towards More Stable Training of Diffusion Model via Scaling Network Long Skip Connection},\nauthor={Zhongzhan Huang and Pan Zhou and Shuicheng YAN and Liang Lin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0N73P8pH2l}\n}", "github": "", "project": "", "reviewers": "Ttzr;Ehww;VLJa;7ePs;xZoY;KfUJ", "pdf_size": 20792407, "rating": "5;6;6;6;7;7", "confidence": "3;2;3;2;3;2", "soundness": "3;3;3;2;3;3", "novelty": "3;3;3;2;3;3", "presentation": "3;3;2;2;3;4", "wc_summary": "138;85;69;188;73;31", "wc_strengths": "58;45;44;101;89;82", "wc_weaknesses": "129;9;53;124;259;228", "wc_questions": "10;1;110;1;300;40", "wc_limitations": "9;1;10;1;1;1", "wc_review": "344;141;286;415;722;382", "wc_reply_reviewers": "0;14;67;42;62;15", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "0;1;1;1;1;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.166666666666667, 0.6871842709362768 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 2.8333333333333335, 0.6871842709362768 ], "wc_summary_avg": [ 97.33333333333333, 51.35064643107123 ], "wc_strengths_avg": [ 69.83333333333333, 22.025868629611157 ], "wc_weaknesses_avg": [ 133.66666666666666, 88.29055567965479 ], "wc_questions_avg": [ 77.0, 106.63645405457429 ], "wc_limitations_avg": [ 3.8333333333333335, 4.017323597731316 ], "wc_review_avg": [ 381.6666666666667, 175.79786372106145 ], "wc_reply_reviewers_avg": [ 33.333333333333336, 25.335526220879817 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.372677996249965 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.24253562503633294, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=956831221007906396&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 8, "email": "sysu.edu.cn;;sea.com;sysu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Sun Yat-sen University;Sea Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.sysu.edu.cn/;", "aff_unique_abbr": "SYSU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China;" }, { "title": "Gaussian Mixture Solvers for Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73062", "id": "0NuseeBuB4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/51373b6499708b6fcc38f1e8f8f5b376-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0NuseeBuB4", "openreview": "https://openreview.net/forum?id=0NuseeBuB4", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73062", "video": "https://nips.cc/virtual/2023/poster/73062", "author_site": "Hanzhong Guo, Cheng Lu, Fan Bao, Tianyu Pang, Shuicheng Yan, Chao Du, Chongxuan LI", "tldr": "", "abstract": "Recently, diffusion models have achieved great success in generative tasks. Sampling from diffusion models is equivalent to solving the reverse diffusion stochastic differential equations (SDEs) or the corresponding probability flow ordinary differential equations (ODEs). In comparison, SDE-based solvers can generate samples of higher quality and are suited for image translation tasks like stroke-based synthesis. During inference, however, existing SDE-based solvers are severely constrained by the efficiency-effectiveness dilemma. Our investigation suggests that this is because the Gaussian assumption in the reverse transition kernel is frequently violated (even in the case of simple mixture data) given a limited number of discretization steps. To overcome this limitation, we introduce a novel class of SDE-based solvers called \\emph{Gaussian Mixture Solvers (GMS)} for diffusion models. Our solver estimates the first three-order moments and optimizes the parameters of a Gaussian mixture transition kernel using generalized methods of moments in each step during sampling. Empirically, our solver outperforms numerous SDE-based solvers in terms of sample quality in image generation and stroke-based synthesis in various diffusion models, which validates the motivation and effectiveness of GMS. Our code is available at https://github.com/Guohanzhong/GMS.", "keywords": "Diffusion models;SDE-based solver;Gaussian mixture;Stroke-based synthesis", "primary_area": "", "supplementary_material": "/attachment/05ce51be4f5009839fece40338f5cc4ffc7cdcb9.zip", "author": "Hanzhong Allan Guo;Cheng Lu;Fan Bao;Tianyu Pang;Shuicheng YAN;Chao Du;Chongxuan Li", "authorids": "~Hanzhong_Allan_Guo1;~Cheng_Lu5;~Fan_Bao1;~Tianyu_Pang1;~Shuicheng_YAN3;~Chao_Du1;~Chongxuan_Li1", "gender": "M;M;M;M;M;M;M", "homepage": ";https://luchengthu.github.io/;https://baofff.github.io/;https://p2333.github.io/;https://yanshuicheng.ai/;https://duchao0726.github.io/;http://ml.cs.tsinghua.edu.cn/~chongxuan", "dblp": ";91/1482-11;71/3877;202/2550;y/ShuichengYan;75/7523;161/9965", "google_scholar": ";vPE9VRoAAAAJ;;wYDbtFsAAAAJ;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ;QOp7xW0AAAAJ;UKMcQn4AAAAJ", "orcid": ";;;0000-0003-0639-6176;;0000-0003-1244-6336;0000-0002-0912-9076", "linkedin": "hanzhong-guo-19965b1a5/;;;%E5%A4%A9%E5%AE%87-%E5%BA%9E-b3999017a/;;duchao/;", "or_profile": "~Hanzhong_Allan_Guo1;~Cheng_Lu5;~Fan_Bao1;~Tianyu_Pang1;~Shuicheng_YAN3;~Chao_Du1;~Chongxuan_Li1", "aff": "Renmin University of China;Tsinghua University;Tsinghua University;Sea AI Lab;sea Group;Sea AI Lab;Renmin University of China", "aff_domain": "ruc.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;sea.com;sea.com;sea.com;ruc.edu.cn", "position": "MS student;PhD student;PhD student;Research Scientist;Researcher;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nguo2023gaussian,\ntitle={Gaussian Mixture Solvers for Diffusion Models},\nauthor={Hanzhong Allan Guo and Cheng Lu and Fan Bao and Tianyu Pang and Shuicheng YAN and Chao Du and Chongxuan Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0NuseeBuB4}\n}", "github": "", "project": "", "reviewers": "iBkM;iSEj;iDhp;naDW", "pdf_size": 11253581, "rating": "5;6;6;7", "confidence": "4;3;3;3", "soundness": "3;2;2;3", "novelty": "3;2;3;2", "presentation": "3;1;3;2", "wc_summary": "104;109;107;71", "wc_strengths": "65;32;64;67", "wc_weaknesses": "156;524;25;35", "wc_questions": "5;52;244;92", "wc_limitations": "1;12;167;1", "wc_review": "331;729;607;266", "wc_reply_reviewers": "0;54;45;97", "wc_reply_authors": "298;45;37;211", "reply_reviewers": "0;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 97.75, 15.54630181104175 ], "wc_strengths_avg": [ 57.0, 14.474114826130128 ], "wc_weaknesses_avg": [ 185.0, 202.39935770649075 ], "wc_questions_avg": [ 98.25, 89.60573363351254 ], "wc_limitations_avg": [ 45.25, 70.43569762556484 ], "wc_review_avg": [ 483.25, 191.1051739226335 ], "wc_reply_reviewers_avg": [ 49.0, 34.44560929929967 ], "wc_reply_authors_avg": [ 147.75, 111.12914784159915 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11801868961707224123&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ruc.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;sea.com;sea.com;sea.com;ruc.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;2;3;2;0", "aff_unique_norm": "Renmin University of China;Tsinghua University;Sea AI Lab;Sea Group", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ruc.edu.cn;https://www.tsinghua.edu.cn;;", "aff_unique_abbr": "RUC;THU;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "title": "SaVeNet: A Scalable Vector Network for Enhanced Molecular Representation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73061", "id": "0OImBCFsdf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/860c1c657deafe09f64c013c2888bd7b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0OImBCFsdf", "openreview": "https://openreview.net/forum?id=0OImBCFsdf", "poster": "/media/PosterPDFs/NeurIPS%202023/73061.png?t=1702422266.4313703", "slides": "https://nips.cc/virtual/2023/poster/73061", "video": "https://nips.cc/virtual/2023/poster/73061", "author_site": "Sarp Aykent, Tian Xia", "tldr": "", "abstract": "Geometric representation learning of molecules is challenging yet essential for applications in multiple domains. Despite the impressive breakthroughs made by geometric deep learning in various molecular representation learning tasks, effectively capturing complicated geometric features across spatial dimensions is still underexplored due to the significant difficulties in modeling efficient geometric representations and learning the inherent correlation in 3D structural modeling. These include computational inefficiency, underutilization of vectorial embeddings, and limited generalizability to integrate various geometric properties. To address the raised concerns, we introduce an efficient and effective framework, Scalable Vector Network (SaVeNet), designed to accommodate a range of geometric requirements without depending on costly embeddings. In addition, the proposed framework scales effectively with introduced direction noise. Theoretically, we analyze the desired properties (i.e., invariance and equivariant) and framework efficiency of the SaVeNet. Empirically, we conduct a comprehensive series of experiments to evaluate the efficiency and expressiveness of the proposed model. Our efficiency-focused experiments underscore the model's empirical superiority over existing methods. Experimental results on synthetic and real-world datasets demonstrate the expressiveness of our model, which achieves state-of-the-art performance across various tasks within molecular representation learning.", "keywords": "geometric deep learning;molecule property prediction;geometric representation learning", "primary_area": "", "supplementary_material": "", "author": "Sarp Aykent;Tian Xia", "authorids": "~Sarp_Aykent1;~Tian_Xia10", "gender": "M;", "homepage": "https://www.sarpaykent.com/;", "dblp": "282/3095;", "google_scholar": "CaNz-e4AAAAJ;", "orcid": "0000-0002-4293-3699;", "linkedin": "sarp-aykent/;", "or_profile": "~Sarp_Aykent1;~Tian_Xia10", "aff": "Auburn University;", "aff_domain": "auburn.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\naykent2023savenet,\ntitle={SaVeNet: A Scalable Vector Network for Enhanced Molecular Representation Learning},\nauthor={Sarp Aykent and Tian Xia},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0OImBCFsdf}\n}", "github": "", "project": "", "reviewers": "FRcS;QLtV;jP4K;T8Du;6Z9X", "pdf_size": 473292, "rating": "5;5;6;6;7", "confidence": "4;4;3;4;2", "soundness": "4;2;3;4;4", "novelty": "3;2;3;4;3", "presentation": "2;2;2;2;3", "wc_summary": "52;44;83;36;61", "wc_strengths": "21;21;51;54;170", "wc_weaknesses": "129;403;117;29;102", "wc_questions": "69;34;233;31;1", "wc_limitations": "1;17;27;1;11", "wc_review": "272;519;511;151;345", "wc_reply_reviewers": "0;346;16;11;10", "wc_reply_authors": "74;2054;0;0;0", "reply_reviewers": "0;4;1;1;1", "reply_authors": "2;6;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.4, 0.8 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 55.2, 16.191355718407276 ], "wc_strengths_avg": [ 63.4, 55.13837139415708 ], "wc_weaknesses_avg": [ 156.0, 128.2996492590685 ], "wc_questions_avg": [ 73.6, 82.56294568388412 ], "wc_limitations_avg": [ 11.4, 9.911609354691095 ], "wc_review_avg": [ 359.6, 141.23115803532872 ], "wc_reply_reviewers_avg": [ 76.6, 134.79999999999998 ], "wc_reply_authors_avg": [ 425.6, 814.7042653625916 ], "reply_reviewers_avg": [ 1.4, 1.3564659966250538 ], "reply_authors_avg": [ 2.2, 1.9390719429665317 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8685990362153793, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12279371369171594225&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "auburn.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Auburn University", "aff_unique_dep": "", "aff_unique_url": "https://www.auburn.edu", "aff_unique_abbr": "Auburn", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Improved Communication Efficiency in Federated Natural Policy Gradient via ADMM-based Gradient Updates", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73060", "id": "0ORqsMY6OL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bc6a1f968f8b1dae3e880f3f723d7d46-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0ORqsMY6OL", "openreview": "https://openreview.net/forum?id=0ORqsMY6OL", "poster": "/media/PosterPDFs/NeurIPS%202023/73060.png?t=1699237572.4267704", "slides": "https://nips.cc/virtual/2023/poster/73060", "video": "https://nips.cc/virtual/2023/poster/73060", "author_site": "Guangchen Lan, Han Wang, James Anderson, Christopher Brinton, Vaneet Aggarwal", "tldr": "", "abstract": "Federated reinforcement learning (FedRL) enables agents to collaboratively train a global policy without sharing their individual data. However, high communication overhead remains a critical bottleneck, particularly for natural policy gradient (NPG) methods, which are second-order. To address this issue, we propose the FedNPG-ADMM framework, which leverages the alternating direction method of multipliers (ADMM) to approximate global NPG directions efficiently. We theoretically demonstrate that using ADMM-based gradient updates reduces communication complexity from $\\mathcal{O}({d^{2}})$ to $\\mathcal{O}({d})$ at each iteration, where $d$ is the number of model parameters. Furthermore, we show that achieving an $\\epsilon$-error stationary convergence requires $\\mathcal{O}(\\frac{1}{(1-\\gamma)^{2}{\\epsilon}})$ iterations for discount factor $\\gamma$, demonstrating that FedNPG-ADMM maintains the same convergence rate as standard FedNPG. Through evaluation of the proposed algorithms in MuJoCo environments, we demonstrate that FedNPG-ADMM maintains the reward performance of standard FedNPG, and that its convergence rate improves when the number of federated agents increases.", "keywords": "reinforcement learning;federated learning", "primary_area": "", "supplementary_material": "/attachment/a95b22a723f068713f469bf873277e0f7208a978.zip", "author": "Guangchen Lan;Han Wang;James Anderson;Christopher Brinton;Vaneet Aggarwal", "authorids": "~Guangchen_Lan1;~Han_Wang14;~James_Anderson6;~Christopher_Brinton1;~Vaneet_Aggarwal1", "gender": "M;;;;M", "homepage": ";https://sites.google.com/view/han-wang/home;http://www.columbia.edu/~ja3451/;https://www.cbrinton.net/;", "dblp": "359/6836;;;;91/6560", "google_scholar": "0OkYBPQAAAAJ;ALzWbZQAAAAJ;https://scholar.google.co.uk/citations?user=rIX6oiMAAAAJ;vWmHA5MAAAAJ;", "orcid": "0000-0001-7969-7303;;0000-0001-8210-6527;;", "linkedin": "guangchen-lan-97420017b/;;;;", "or_profile": "~Guangchen_Lan1;~Han_Wang14;~James_Anderson6;~Christopher_Brinton1;~Vaneet_Aggarwal1", "aff": "Purdue University;Columbia University;Columbia University;Purdue University;Purdue University", "aff_domain": "purdue.edu;columbia.edu;columbia.edu;purdue.edu;purdue.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nlan2023improved,\ntitle={Improved Communication Efficiency in Federated Natural Policy Gradient via {ADMM}-based Gradient Updates},\nauthor={Guangchen Lan and Han Wang and James Anderson and Christopher Brinton and Vaneet Aggarwal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0ORqsMY6OL}\n}", "github": "", "project": "", "reviewers": "pYuC;ZDkh;q8Jc;GLbD", "pdf_size": 1120824, "rating": "4;6;6;7", "confidence": "4;4;4;2", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "71;34;58;43", "wc_strengths": "118;56;33;53", "wc_weaknesses": "153;222;48;61", "wc_questions": "74;2;337;4", "wc_limitations": "1;7;36;1", "wc_review": "417;321;512;162", "wc_reply_reviewers": "0;14;45;21", "wc_reply_authors": "41;0;14;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 51.5, 14.150971698084906 ], "wc_strengths_avg": [ 65.0, 31.851216617265973 ], "wc_weaknesses_avg": [ 121.0, 70.98239218285053 ], "wc_questions_avg": [ 104.25, 137.47067869185778 ], "wc_limitations_avg": [ 11.25, 14.49784466739798 ], "wc_review_avg": [ 353.0, 129.30777238820565 ], "wc_reply_reviewers_avg": [ 20.0, 16.294170736800325 ], "wc_reply_authors_avg": [ 13.75, 16.7388022271607 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4044157112583319452&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 8, "email": "purdue.edu;columbia.edu;columbia.edu;purdue.edu;purdue.edu", "author_num": 5, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Purdue University;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://www.columbia.edu", "aff_unique_abbr": "Purdue;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Pruning vs Quantization: Which is Better?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73059", "id": "0OU1ZXXxs5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c48bc80aa5d3cbbdd712d1cc107b8319-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0OU1ZXXxs5", "openreview": "https://openreview.net/forum?id=0OU1ZXXxs5", "poster": "/media/PosterPDFs/NeurIPS%202023/73059.png?t=1701694992.735572", "slides": "https://nips.cc/virtual/2023/poster/73059", "video": "https://nips.cc/virtual/2023/poster/73059", "author_site": "Andrey Kuzmin, Markus Nagel, Mart van Baalen, Arash Behboodi, Tijmen Blankevoort", "tldr": "", "abstract": "Neural network pruning and quantization techniques are almost as old as neural networks themselves. However, to date, only ad-hoc comparisons between the two have been published. In this paper, we set out to answer the question of which is better: neural network quantization or pruning? By answering this question, we hope to inform design decisions made on neural network hardware going forward. \nWe provide an extensive comparison between the two techniques for compressing deep neural networks. \nFirst, we give an analytical comparison of expected quantization and pruning error for general data distributions.\nThen, we provide lower and upper bounds for the per-layer pruning and quantization error in trained networks and compare these to empirical error after optimization.\nFinally, we provide an extensive experimental comparison for training 8 large-scale models trained on 3 tasks and provide insights into the representations learned during fine-tuning with quantization and pruning in the loop.\nOur results show that in most cases quantization outperforms pruning. Only in some scenarios with a very high compression ratio, compression might be beneficial from an accuracy standpoint.", "keywords": "Neural network quantization;neural network pruning;magnitude pruning;post-training quantization;quantization-aware training", "primary_area": "", "supplementary_material": "/attachment/24785040f202c8eb2fa6abf17af2d6b65149738e.pdf", "author": "Andrey Kuzmin;Markus Nagel;Mart Van Baalen;Arash Behboodi;Tijmen Blankevoort", "authorids": "~Andrey_Kuzmin1;~Markus_Nagel1;~Mart_Van_Baalen1;~Arash_Behboodi1;~Tijmen_Blankevoort1", "gender": ";M;M;M;M", "homepage": "https://www.qualcomm.com/research/artificial-intelligence/ai-research;;;https://arashbehboodi.github.io/;", "dblp": ";38/1463;;97/7718;", "google_scholar": ";akNuBBEAAAAJ;a-Au4JUAAAAJ;;OGEyrG8AAAAJ", "orcid": ";;;;", "linkedin": ";;;;tijmen-blankevoort-a5633a24/", "or_profile": "~Andrey_Kuzmin1;~Markus_Nagel1;~Mart_Van_Baalen1;~Arash_Behboodi1;~Tijmen_Blankevoort1", "aff": "Qualcomm Inc, QualComm;Qualcomm AI Research;QualComm;QualComm;Qualcomm Inc, QualComm", "aff_domain": "qti.qualcomm.com;qualcomm.com;qualcomm.com;qualcomm.com;qti.qualcomm.com", "position": "Senior machine learning researcher;Researcher;Researcher;Machine Learning Researcher;Researcher", "bibtex": "@inproceedings{\nkuzmin2023pruning,\ntitle={Pruning vs Quantization: Which is Better?},\nauthor={Andrey Kuzmin and Markus Nagel and Mart Van Baalen and Arash Behboodi and Tijmen Blankevoort},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0OU1ZXXxs5}\n}", "github": "", "project": "", "reviewers": "B8sD;S4q6;jvrg;XswT", "pdf_size": 733916, "rating": "3;4;6;7", "confidence": "3;4;4;4", "soundness": "2;2;4;3", "novelty": "2;2;3;3", "presentation": "2;2;3;4", "wc_summary": "50;69;118;78", "wc_strengths": "13;36;55;63", "wc_weaknesses": "72;94;192;116", "wc_questions": "1;36;107;49", "wc_limitations": "1;15;0;23", "wc_review": "137;250;472;329", "wc_reply_reviewers": "217;0;25;40", "wc_reply_authors": "429;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 78.75, 24.81305100143874 ], "wc_strengths_avg": [ 41.75, 19.279198634798075 ], "wc_weaknesses_avg": [ 118.5, 45.196791921551245 ], "wc_questions_avg": [ 48.25, 38.192767639960316 ], "wc_limitations_avg": [ 9.75, 9.67923034130297 ], "wc_review_avg": [ 297.0, 121.92005577426546 ], "wc_reply_reviewers_avg": [ 70.5, 85.78024248042203 ], "wc_reply_authors_avg": [ 107.25, 185.7624491117621 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7302967433402215, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14787406802928442370&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "qti.qualcomm.com;qualcomm.com;qualcomm.com;qualcomm.com;qti.qualcomm.com", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Qualcomm Incorporated;Qualcomm", "aff_unique_dep": ";Qualcomm AI Research", "aff_unique_url": "https://www.qualcomm.com;https://www.qualcomm.com/research", "aff_unique_abbr": "Qualcomm;QAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient Diffusion Policies For Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73058", "id": "0P6uJtndWu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d45e0bfb5a39477d56b55c0824200008-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0P6uJtndWu", "openreview": "https://openreview.net/forum?id=0P6uJtndWu", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73058", "video": "https://nips.cc/virtual/2023/poster/73058", "author_site": "Bingyi Kang, Xiao Ma, Chao Du, Tianyu Pang, Shuicheng Yan", "tldr": "", "abstract": "Offline reinforcement learning (RL) aims to learn optimal policies from offline datasets, where the parameterization of policies is crucial but often overlooked. Recently, Diffsuion-QL significantly boosts the performance of offline RL by representing a policy with a diffusion model, whose success relies on a parametrized Markov Chain with hundreds of steps for sampling. However, Diffusion-QL suffers from two critical limitations. 1) It is computationally inefficient to forward and backward through the whole Markov chain during training. 2) It is incompatible with maximum likelihood-based RL algorithms (e.g., policy gradient methods) as the likelihood of diffusion models is intractable. Therefore, we propose efficient diffusion policy (EDP) to overcome these two challenges. EDP approximately constructs actions from corrupted ones at training to avoid running the sampling chain. We conduct extensive experiments on the D4RL benchmark. The results show that EDP can reduce the diffusion policy training time from 5 days to 5 hours on gym-locomotion tasks. Moreover, we show that EDP is compatible with various offline RL algorithms (TD3, CRR, and IQL) and achieves new state-of-the-art on D4RL by large margins over previous methods.", "keywords": "Offline Reinforcement Learning;Diffusion Models", "primary_area": "", "supplementary_material": "/attachment/ae335861141ae15e1fd1567e910c492b975b74db.zip", "author": "Bingyi Kang;Xiao Ma;Chao Du;Tianyu Pang;Shuicheng YAN", "authorids": "~Bingyi_Kang1;~Xiao_Ma2;~Chao_Du1;~Tianyu_Pang1;~Shuicheng_YAN3", "gender": ";M;M;M;M", "homepage": "https://bingykang.github.io/;https://yusufma03.github.io/;https://duchao0726.github.io/;https://p2333.github.io/;https://yanshuicheng.ai/", "dblp": ";35/573-6;75/7523;202/2550;y/ShuichengYan", "google_scholar": "https://scholar.google.com.sg/citations?user=NmHgX-wAAAAJ;hR4G6hoAAAAJ;QOp7xW0AAAAJ;wYDbtFsAAAAJ;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ", "orcid": ";;0000-0003-1244-6336;0000-0003-0639-6176;", "linkedin": ";;duchao/;%E5%A4%A9%E5%AE%87-%E5%BA%9E-b3999017a/;", "or_profile": "~Bingyi_Kang1;~Xiao_Ma2;~Chao_Du1;~Tianyu_Pang1;~Shuicheng_YAN3", "aff": "Sea AI Lab;SEA AI Lab;Sea AI Lab;Sea AI Lab;sea Group", "aff_domain": "sea.com;sea.com;sea.com;sea.com;sea.com", "position": "Researcher;Research Scientist;Research Scientist;Research Scientist;Researcher", "bibtex": "@inproceedings{\nkang2023efficient,\ntitle={Efficient Diffusion Policies For Offline Reinforcement Learning},\nauthor={Bingyi Kang and Xiao Ma and Chao Du and Tianyu Pang and Shuicheng YAN},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0P6uJtndWu}\n}", "github": "", "project": "", "reviewers": "UWuP;TjdE;44Ux;tRDF;SXfY", "pdf_size": 503629, "rating": "5;5;6;6;7", "confidence": "4;4;4;5;3", "soundness": "3;3;3;4;3", "novelty": "2;3;3;3;4", "presentation": "3;3;3;3;3", "wc_summary": "66;59;59;77;108", "wc_strengths": "32;20;119;141;172", "wc_weaknesses": "65;347;175;166;216", "wc_questions": "388;2;52;58;319", "wc_limitations": "11;1;15;1;24", "wc_review": "562;429;420;443;839", "wc_reply_reviewers": "21;445;44;0;19", "wc_reply_authors": "60;1080;24;0;23", "reply_reviewers": "1;2;1;0;1", "reply_authors": "2;4;2;1;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.8, 18.323755073674175 ], "wc_strengths_avg": [ 96.8, 60.330423502574554 ], "wc_weaknesses_avg": [ 193.8, 91.30038335078336 ], "wc_questions_avg": [ 163.8, 157.62284098442078 ], "wc_limitations_avg": [ 10.4, 8.754427451295715 ], "wc_review_avg": [ 538.6, 158.74835432217873 ], "wc_reply_reviewers_avg": [ 105.8, 170.17332340881163 ], "wc_reply_authors_avg": [ 237.4, 421.7371693365431 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12584601413910732436&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "sea.com;sea.com;sea.com;sea.com;sea.com", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Sea AI Lab;Sea Group", "aff_unique_dep": ";", "aff_unique_url": ";", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";Unknown" }, { "id": "0QpwcDPiHT", "title": "Language Models Implement Simple Word2Vec-style Vector Arithmetic", "track": "main", "status": "Reject", "tldr": "", "abstract": "A primary criticism towards language models (LMs) is their inscrutability. This paper presents evidence that, despite their size and complexity, LMs sometimes exploit a computational mechanism familiar from traditional word embeddings: the use of simple vector arithmetic in order to encode abstract relations (e.g., Poland:Warsaw::China:Beijing). We investigate a range of language model sizes (from 124M parameters to 176B parameters) in an in-context learning setting, and find that for a variety of tasks (involving capital cities, upper-casing, and past-tensing), a key part of the mechanism reduces to a simple linear update applied by the feedforward networks. We further show that this mechanism is specific to tasks that require retrieval from pretraining memory, rather than retrieval from local context. Our results contribute to a growing body of work on the mechanistic interpretability of LLMs, and offer reason to be optimistic that, despite the massive and non-linear nature of the models, the strategies they ultimately use to solve tasks can sometimes reduce to familiar and even intuitive algorithms.", "keywords": "interpretability;nlp;neural networks;deep learning;explainability;representation learning", "primary_area": "", "supplementary_material": "/attachment/611676d4ed4f43fbe2bdf699e00d04e5ad075d42.zip", "author": "Jack Merullo;Carsten Eickhoff;Ellie Pavlick", "authorids": "~Jack_Merullo2;~Carsten_Eickhoff1;~Ellie_Pavlick1", "gender": "M;F;M", "homepage": "https://health-nlp.org;http://cs.brown.edu/people/epavlick/;https://jmerullo.github.io/", "dblp": "42/8700;141/4059;248/8361", "google_scholar": "QQi1_rAAAAAJ;sFyrSa8AAAAJ;7w0xLF4AAAAJ", "orcid": "0000-0001-9895-4061;;", "linkedin": ";;", "or_profile": "~Carsten_Eickhoff1;~Ellie_Pavlick1;~jack_merullo1", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Brown University;Brown University", "aff_domain": "uni-tuebingen.de;brown.edu;brown.edu", "position": "Full Professor;Assistant Professor;PhD student", "bibtex": "@misc{\nmerullo2023language,\ntitle={Language Models Implement Simple Word2Vec-style Vector Arithmetic},\nauthor={Jack Merullo and Carsten Eickhoff and Ellie Pavlick},\nyear={2023},\nurl={https://openreview.net/forum?id=0QpwcDPiHT}\n}", "github": "", "project": "", "reviewers": "fqRy;JWmt;Jkvc;oNPw;1YPx", "site": "https://openreview.net/forum?id=0QpwcDPiHT", "pdf_size": 1575905, "rating": "4;5;6;6;8", "confidence": "4;4;3;3;4", "soundness": "2;2;3;3;3", "novelty": "3;3;3;2;4", "presentation": "2;4;3;4;4", "wc_summary": "52;140;123;83;62", "wc_strengths": "23;87;46;48;98", "wc_weaknesses": "95;149;22;311;149", "wc_questions": "84;109;72;131;222", "wc_limitations": "10;12;1;1;14", "wc_review": "264;497;264;574;545", "wc_reply_reviewers": "545;610;40;179;12", "wc_reply_authors": "657;806;0;0;0", "reply_reviewers": "3;3;1;1;1", "reply_authors": "3;3;1;1;1", "rating_avg": [ 5.8, 1.32664991614216 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 92.0, 34.193566646373704 ], "wc_strengths_avg": [ 60.4, 27.86108397029807 ], "wc_weaknesses_avg": [ 145.2, 95.09658248328381 ], "wc_questions_avg": [ 123.6, 53.256361122404904 ], "wc_limitations_avg": [ 7.6, 5.535341001239219 ], "wc_review_avg": [ 428.8, 136.78801117057006 ], "wc_reply_reviewers_avg": [ 277.2, 252.47209746821528 ], "wc_reply_authors_avg": [ 292.6, 361.44465689784374 ], "reply_reviewers_avg": [ 1.8, 0.9797958971132713 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.1230914909793327, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1877407992689383917&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;1", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;Brown University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.brown.edu", "aff_unique_abbr": "Uni T\u00fcbingen;Brown", "aff_campus_unique_index": "0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Germany;United States" }, { "title": "OFCOURSE: A Multi-Agent Reinforcement Learning Environment for Order Fulfillment", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73723", "id": "0RSQEh9lRG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6d0cfc5db3feeabf6762129ba91bd3a1-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=0RSQEh9lRG", "openreview": "https://openreview.net/forum?id=0RSQEh9lRG", "poster": "/media/PosterPDFs/NeurIPS%202023/73723.png?t=1698992915.7880998", "slides": "https://nips.cc/virtual/2023/poster/73723", "video": "https://nips.cc/virtual/2023/poster/73723", "author_site": "Yiheng Zhu, Yang Zhan, Xuankun Huang, Yuwei Chen, yujie Chen, Jiangwen Wei, Wei Feng, Yinzhi Zhou, Haoyuan Hu, Jieping Ye", "tldr": "", "abstract": "The dramatic growth of global e-commerce has led to a surge in demand for efficient and cost-effective order fulfillment which can increase customers' service levels and sellers' competitiveness. However, managing order fulfillment is challenging due to a series of interdependent online sequential decision-making problems. To clear this hurdle, rather than solving the problems separately as attempted in some recent researches, this paper proposes a method based on multi-agent reinforcement learning to integratively solve the series of interconnected problems, encompassing order handling, packing and pickup, storage, order consolidation, and last-mile delivery. In particular, we model the integrated problem as a Markov game, wherein a team of agents learns a joint policy via interacting with a simulated environment. Since no simulated environment supporting the complete order fulfillment problem exists, we devise Order Fulfillment COoperative mUlti-agent Reinforcement learning Scalable Environment (OFCOURSE) in the OpenAI Gym style, which allows reproduction and re-utilization to build customized applications. By constructing the fulfillment system in OFCOURSE, we optimize a joint policy that solves the integrated problem, facilitating sequential order-wise operations across all fulfillment units and minimizing the total cost of fulfilling all orders within the promised time. With OFCOURSE, we also demonstrate that the joint policy learned by multi-agent reinforcement learning outperforms the combination of locally optimal policies. The source code of OFCOURSE is available at: https://github.com/GitYiheng/ofcourse.", "keywords": "order fulfillment;multi-agent reinforcement learning;e-commerce", "primary_area": "", "supplementary_material": "/attachment/d80873694acdaac5091dfcd327fbb40f1107f72d.pdf", "author": "Yiheng Zhu;Yang Zhan;Xuankun Huang;Yuwei Chen;yujie Chen;Jiangwen Wei;Wei Feng;Yinzhi Zhou;Haoyuan Hu;Jieping Ye", "authorids": "~Yiheng_Zhu1;~Yang_Zhan2;~Xuankun_Huang2;~Yuwei_Chen2;~yujie_Chen3;~Jiangwen_Wei2;~Wei_Feng9;~Yinzhi_Zhou1;~Haoyuan_Hu1;~Jieping_Ye4", "gender": "M;F;;F;;M;M;M;M;M", "homepage": ";;;;https://www.linkedin.com/in/wjiangwen;https://scholar.google.com/citations?user=JElwKO8AAAAJ&hl=en;;;http://yelabs.net/;https://github.com", "dblp": ";;;;;;;205/3156;03/5454;", "google_scholar": ";w7mGPG0AAAAJ;https://scholar.google.com.hk/citations?user=wggINsQAAAAJ;;;JElwKO8AAAAJ;https://scholar.google.com.sg/citations?user=QVWBKxoAAAAJ;;T9AzhwcAAAAJ;", "orcid": "0000-0002-8319-6234;0000-0002-3580-758X;;0000-0003-2104-6430;;;;;0000-0001-8662-5818;", "linkedin": ";;;;;;;;;", "or_profile": "~Yiheng_Zhu1;~Yang_Zhan2;~Yuwei_Chen2;~yujie_Chen3;~Jiangwen_Wei2;~Wei_Feng9;~Yinzhi_Zhou1;~Haoyuan_Hu1;~Jieping_Ye4;~Kun_Xian1", "aff": "Cainiao Network;Alibaba Group;;;Cainiao Network;Alibaba Group;;Cainiao Network;Alibaba DAMO Academy;Alibaba Group", "aff_domain": "cainiao.com;alibaba-inc.com;;;cainiao.com;alibaba-inc.com;;cainiao.com;alibaba-inc.com;alibaba-inc.com", "position": "Researcher;Researcher;;;Researcher;Researcher;;Principal Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nzhu2023ofcourse,\ntitle={{OFCOURSE}: A Multi-Agent Reinforcement Learning Environment for Order Fulfillment},\nauthor={Yiheng Zhu and Yang Zhan and Xuankun Huang and Yuwei Chen and yujie Chen and Jiangwen Wei and Wei Feng and Yinzhi Zhou and Haoyuan Hu and Jieping Ye},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=0RSQEh9lRG}\n}", "github": "", "project": "", "reviewers": "D3MQ;RuY6;h6Wq;HNVp", "pdf_size": 2762027, "rating": "4;6;6;10", "confidence": "4;4;3;4", "wc_summary_and_contributions": "103;110;70;64", "wc_strengths": "18;107;49;54", "wc_improvement": "165;335;88;41", "wc_limitations": "10;16;29;35", "wc_correctness": "50;21;26;11", "wc_clarity": "34;12;6;6", "wc_relation_to_prior_work": "5;39;5;10", "wc_documentation": "41;27;41;11", "wc_additional_feedback": "1;1;1;1", "wc_review": "427;668;315;233", "wc_reply_reviewers": "0;54;0;0", "wc_reply_authors": "1241;1009;636;509", "reply_reviewers": "0;1;0;0", "reply_authors": "2;3;1;1", "rating_avg": [ 6.5, 2.179449471770337 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 86.75, 20.017180121085985 ], "wc_strengths_avg": [ 57.0, 31.992186546092782 ], "wc_improvement_avg": [ 157.25, 111.76398122830092 ], "wc_limitations_avg": [ 22.5, 9.962429422585638 ], "wc_correctness_avg": [ 27.0, 14.33527118683145 ], "wc_clarity_avg": [ 14.5, 11.521718621802913 ], "wc_relation_to_prior_work_avg": [ 14.75, 14.148763196831021 ], "wc_documentation_avg": [ 30.0, 12.36931687685298 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 410.75, 163.71068230265243 ], "wc_reply_reviewers_avg": [ 13.5, 23.382685902179844 ], "wc_reply_authors_avg": [ 848.75, 291.64736840918005 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5078137740988250999&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cainiao.com;alibaba-inc.com;;;cainiao.com;alibaba-inc.com;;cainiao.com;alibaba-inc.com;alibaba-inc.com", "author_num": 10, "aff_unique_index": "0;1;0;1;0;1;1", "aff_unique_norm": "Cainiao Network;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.cainiao.com;https://www.alibaba.com", "aff_unique_abbr": "Cainiao;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Fast and Accurate Estimator for Large Scale Linear Model via Data Averaging", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73057", "id": "0Tq1RGJBid", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6de668dab370194fa304a08be5aacd85-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0Tq1RGJBid", "openreview": "https://openreview.net/forum?id=0Tq1RGJBid", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73057", "video": "https://nips.cc/virtual/2023/poster/73057", "author_site": "Rui Wang, Yanyan Ouyang, Yu Panpan, Wangli Xu", "tldr": "", "abstract": "This work is concerned with the estimation problem of linear model when the\nsample size is extremely large and the data dimension can vary with the sample\nsize. In this setting, the least square estimator based on the full data is not feasible\nwith limited computational resources. Many existing methods for this problem are\nbased on the sketching technique which uses the sketched data to perform least\nsquare estimation. We derive fine-grained lower bounds of the conditional mean\nsquared error for sketching methods. For sampling methods, our lower bound\nprovides an attainable optimal convergence rate. Our result implies that when the\ndimension is large, there is hardly a sampling method can have a faster convergence\nrate than the uniform sampling method. To achieve a better statistical performance,\nwe propose a new sketching method based on data averaging. The proposed\nmethod reduces the original data to a few averaged observations. These averaged\nobservations still satisfy the linear model and are used to estimate the regression\ncoefficients. The asymptotic behavior of the proposed estimation procedure is\nstudied. Our theoretical results show that the proposed method can achieve a\nfaster convergence rate than the optimal convergence rate for sampling methods.\nTheoretical and numerical results show that the proposed estimator has good\nstatistical performance as well as low computational cost.", "keywords": "Big data;Data averaging;Order statistic;Sampling method;Sketching method.", "primary_area": "", "supplementary_material": "/attachment/2f63f95294fb4fd227f2d2e22c295f408bd3bf4c.zip", "author": "Rui Wang;Yanyan Ouyang;Panpan Yu;Wangli Xu", "authorids": "~Rui_Wang21;~Yanyan_Ouyang1;yupanpan@navinfo.com;~Wangli_Xu1", "gender": "M;;;", "homepage": ";;;http://stat.ruc.edu.cn/wxu", "dblp": ";;;70/8506", "google_scholar": ";;;", "orcid": "0000-0002-4418-0774;;;", "linkedin": ";;;", "or_profile": "~Rui_Wang21;~Yanyan_Ouyang1;yupanpan@navinfo.com;~Wangli_Xu1", "aff": "Inspur (Beijing) Electronic Information Industry Co., Ltd.;;;Renmin University of China", "aff_domain": "inspur.com;;;ruc.edu.cn", "position": "Software Developer;;;Full Professor", "bibtex": "@inproceedings{\nwang2023a,\ntitle={A Fast and Accurate Estimator for Large Scale Linear Model via Data Averaging},\nauthor={Rui Wang and Yanyan Ouyang and Panpan Yu and Wangli Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0Tq1RGJBid}\n}", "github": "", "project": "", "reviewers": "sefb;A4mU;DaqC;neFN;EUfX", "pdf_size": 416585, "rating": "4;6;6;6;7", "confidence": "3;1;2;3;2", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;3", "presentation": "2;3;2;2;2", "wc_summary": "78;17;71;91;97", "wc_strengths": "58;5;91;58;114", "wc_weaknesses": "212;5;61;117;141", "wc_questions": "257;89;95;131;671", "wc_limitations": "62;5;5;11;169", "wc_review": "667;121;323;408;1192", "wc_reply_reviewers": "122;9;14;0;109", "wc_reply_authors": "415;15;14;0;119", "reply_reviewers": "2;1;1;0;1", "reply_authors": "3;2;2;1;2", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 2.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 70.8, 28.428155057970258 ], "wc_strengths_avg": [ 65.2, 36.809781308777154 ], "wc_weaknesses_avg": [ 107.2, 70.42840336114399 ], "wc_questions_avg": [ 248.6, 219.72127798645263 ], "wc_limitations_avg": [ 50.4, 63.047918284428704 ], "wc_review_avg": [ 542.2, 369.1294623841343 ], "wc_reply_reviewers_avg": [ 50.8, 53.17668662111245 ], "wc_reply_authors_avg": [ 112.6, 157.10709722988327 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4909902530309828, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16767762102284181071&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "inspur.com;;;ruc.edu.cn", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Inspur;Renmin University of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.inspur.com;http://www.ruc.edu.cn", "aff_unique_abbr": ";RUC", "aff_campus_unique_index": "0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Improved Frequency Estimation Algorithms with and without Predictions", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73056", "id": "0VcvYQ3uPh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2e49934cac6cb8604b0c67cfa0828718-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0VcvYQ3uPh", "openreview": "https://openreview.net/forum?id=0VcvYQ3uPh", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73056", "video": "https://nips.cc/virtual/2023/poster/73056", "author_site": "Anders Aamand, Justin Chen, Huy Nguyen, Sandeep Silwal, Ali Vakilian", "tldr": "", "abstract": "Estimating frequencies of elements appearing in a data stream is a key task in large-scale data analysis. Popular sketching approaches to this problem (e.g., CountMin and CountSketch) come with worst-case guarantees that probabilistically bound the error of the estimated frequencies for any possible input. The work of Hsu et al.~(2019) introduced the idea of using machine learning to tailor sketching algorithms to the specific data distribution they are being run on. In particular, their learning-augmented frequency estimation algorithm uses a learned heavy-hitter oracle which predicts which elements will appear many times in the stream. We give a novel algorithm, which in some parameter regimes, already theoretically outperforms the learning based algorithm of Hsu et al. *without* the use of any predictions. Augmenting our algorithm with heavy-hitter predictions further reduces the error and improves upon the state of the art. Empirically, our algorithms achieve superior performance in all experiments compared to prior approaches.", "keywords": "learning-augmented algorithms;algorithms with predictions;data-driven algorithms;sublinear;streaming;frequency estimation;sketching", "primary_area": "", "supplementary_material": "/attachment/7433548be99855c66d61f01ff751d1fc044ddf47.zip", "author": "Anders Aamand;Justin Y. Chen;Huy Nguyen;Sandeep Silwal;Ali Vakilian", "authorids": "~Anders_Aamand1;~Justin_Y._Chen1;~Huy_Nguyen1;~Sandeep_Silwal1;~Ali_Vakilian1", "gender": "M;M;M;;M", "homepage": "https://www.andersaamand.com/;https://www.khoury.northeastern.edu/~hlnguyen/;https://sandeepsilwal.com;http://www.mit.edu/~vakilian/;https://people.csail.mit.edu/justc/", "dblp": "205/2416;62/3796;225/4637;116/4679;254/0805.html", "google_scholar": "WpIvLroAAAAJ;https://scholar.google.com.tw/citations?user=MDCu0WEAAAAJ;MnDnUvcAAAAJ;uXZaVaAAAAAJ;X_myU1YAAAAJ", "orcid": "0000-0002-0402-0514;;;0000-0001-5049-7594;", "linkedin": ";;;;", "or_profile": "~Anders_Aamand1;~Huy_Nguyen1;~Sandeep_Silwal1;~Ali_Vakilian1;~Justin_Y_Chen1", "aff": "Massachusetts Institute of Technology;Northeastern University;Massachusetts Institute of Technology;Toyota Technological Institute at Chicago;Massachusetts Institute of Technology", "aff_domain": "mit.edu;northeastern.edu;mit.edu;ttic.edu;mit.edu", "position": "Postdoc;Associate Professor;PhD student;Research Assistant Professor;PhD student", "bibtex": "@inproceedings{\naamand2023improved,\ntitle={Improved Frequency Estimation Algorithms with and without Predictions},\nauthor={Anders Aamand and Justin Y. Chen and Huy Nguyen and Sandeep Silwal and Ali Vakilian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0VcvYQ3uPh}\n}", "github": "", "project": "", "reviewers": "iUr9;UWwz;TyeE;5HgB;yxpU", "pdf_size": 885303, "rating": "6;7;7;7;8", "confidence": "3;3;3;4;3", "soundness": "3;3;3;3;4", "novelty": "3;4;3;3;4", "presentation": "2;4;3;3;4", "wc_summary": "161;215;121;87;136", "wc_strengths": "39;144;65;64;43", "wc_weaknesses": "211;26;51;101;127", "wc_questions": "3;1;61;154;168", "wc_limitations": "1;2;7;50;6", "wc_review": "415;388;305;456;480", "wc_reply_reviewers": "140;0;5;6;20", "wc_reply_authors": "58;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 7.0, 0.6324555320336759 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 144.0, 42.82989610073786 ], "wc_strengths_avg": [ 71.0, 38.005262793460595 ], "wc_weaknesses_avg": [ 103.2, 64.61702562018775 ], "wc_questions_avg": [ 77.4, 71.71778022220153 ], "wc_limitations_avg": [ 13.2, 18.54076589572286 ], "wc_review_avg": [ 408.8, 60.89794742025383 ], "wc_reply_reviewers_avg": [ 34.2, 53.31566373965534 ], "wc_reply_authors_avg": [ 11.6, 23.2 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15005212930684718028&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "mit.edu;northeastern.edu;mit.edu;ttic.edu;mit.edu", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Massachusetts Institute of Technology;Northeastern University;Toyota Technological Institute at Chicago", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.northeastern.edu;https://www.tti-chicago.org", "aff_unique_abbr": "MIT;NEU;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "No-Regret Online Reinforcement Learning with Adversarial Losses and Transitions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73055", "id": "0WLMVDdvDF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/79358587d84628728199059f648824e6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0WLMVDdvDF", "openreview": "https://openreview.net/forum?id=0WLMVDdvDF", "poster": "/media/PosterPDFs/NeurIPS%202023/73055.png?t=1700380532.850445", "slides": "https://nips.cc/virtual/2023/poster/73055", "video": "https://nips.cc/virtual/2023/poster/73055", "author_site": "Tiancheng Jin, Junyan Liu, Chlo\u00e9 Rouyer, William Chang, Chen-Yu Wei, Haipeng Luo", "tldr": "", "abstract": "Existing online learning algorithms for adversarial Markov Decision Processes \nachieve $\\mathcal{O}(\\sqrt{T})$ regret after $T$ rounds of interactions even if the loss functions are chosen arbitrarily by an adversary, with the caveat that the transition function has to be fixed.\nThis is because it has been shown that adversarial transition functions make no-regret learning impossible.\nDespite such impossibility results, in this work, we develop algorithms that can handle both adversarial losses and adversarial transitions, with regret increasing smoothly in the degree of maliciousness of the adversary.\nMore concretely, we first propose an algorithm that enjoys $\\widetilde{\\mathcal{O}}(\\sqrt{T} + C^{P})$ regret where $C^{P}$ measures how adversarial the transition functions are and can be at most $\\mathcal{O}(T)$.\nWhile this algorithm itself requires knowledge of $C^{P}$, we further develop a black-box reduction approach that removes this requirement.\nMoreover, we also show that further refinements of the algorithm not only maintains the same regret bound, but also simultaneously adapts to easier environments (where losses are generated in a certain stochastically constrained manner as in [Jin et al. 2021]) and achieves $\\widetilde{\\mathcal{O}}(U + \\sqrt{UC^{L}} + C^{P})$ regret, where $U$ is some standard gap-dependent coefficient and $C^{L}$ is the amount of corruption on losses.", "keywords": "reinforcement Learning;best of both worlds;MDP;robust RL;adversarial corruption", "primary_area": "", "supplementary_material": "/attachment/c805cd1d1983332da8b17620b43ae753ad63ba39.pdf", "author": "Tiancheng Jin;Junyan Liu;Chlo\u00e9 Rouyer;William Chang;Chen-Yu Wei;Haipeng Luo", "authorids": "~Tiancheng_Jin2;~Junyan_Liu1;~Chlo\u00e9_Rouyer1;chang314@g.ucla.edu;~Chen-Yu_Wei1;~Haipeng_Luo1", "gender": "M;;F;;M;M", "homepage": ";;https://sites.google.com/view/chloerouyer/;;https://bahh723.github.io/;https://haipeng-luo.net/", "dblp": "233/1230;;271/1588;;183/1729;62/2576", "google_scholar": ";;roa690wAAAAJ;;2L2cR-kAAAAJ;ct2hw4UAAAAJ", "orcid": ";;0000-0002-9882-7799;;;", "linkedin": "tiancheng-jin-gray;;;;;", "or_profile": "~Tiancheng_Jin2;~Junyan_Liu1;~Chlo\u00e9_Rouyer1;chang314@g.ucla.edu;~Chen-Yu_Wei1;~Haipeng_Luo1", "aff": "University of Southern California;;Copenhagen University;;Massachusetts Institute of Technology;University of Southern California", "aff_domain": "usc.edu;;ku.dk;;mit.edu;usc.edu", "position": "PhD student;;Postdoc;;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\njin2023noregret,\ntitle={No-Regret Online Reinforcement Learning with Adversarial Losses and Transitions},\nauthor={Tiancheng Jin and Junyan Liu and Chlo{\\'e} Rouyer and William Chang and Chen-Yu Wei and Haipeng Luo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0WLMVDdvDF}\n}", "github": "", "project": "", "reviewers": "fP6g;P2en;7UE7;tv4V;ME4k", "pdf_size": 682543, "rating": "5;7;7;7;7", "confidence": "1;3;4;3;3", "soundness": "3;4;3;3;3", "novelty": "3;4;3;3;3", "presentation": "2;2;4;4;3", "wc_summary": "137;93;102;60;55", "wc_strengths": "117;59;121;26;79", "wc_weaknesses": "84;115;191;137;1", "wc_questions": "1;145;33;2;1", "wc_limitations": "20;1;1;1;1", "wc_review": "359;413;448;226;137", "wc_reply_reviewers": "0;19;44;8;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.7999999999999999 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 89.4, 29.950626036862733 ], "wc_strengths_avg": [ 80.4, 35.7972065949286 ], "wc_weaknesses_avg": [ 105.6, 62.88910875501416 ], "wc_questions_avg": [ 36.4, 55.66902190626309 ], "wc_limitations_avg": [ 4.8, 7.6000000000000005 ], "wc_review_avg": [ 316.6, 117.32109784689197 ], "wc_reply_reviewers_avg": [ 14.2, 16.448708155961672 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9185586535436918, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7613328835885996848&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "usc.edu;;ku.dk;;mit.edu;usc.edu", "author_num": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Southern California;University of Copenhagen;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.usc.edu;https://www.ku.dk;https://web.mit.edu", "aff_unique_abbr": "USC;UCPH;MIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Denmark" }, { "title": "BubbleML: A Multiphase Multiphysics Dataset and Benchmarks for Machine Learning", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73722", "id": "0Wmglu8zak", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/01726ae05d72ddba3ac784a5944fa1ef-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=0Wmglu8zak", "openreview": "https://openreview.net/forum?id=0Wmglu8zak", "poster": "/media/PosterPDFs/NeurIPS%202023/73722.png?t=1701819013.3780098", "slides": "https://nips.cc/virtual/2023/poster/73722", "video": "https://nips.cc/virtual/2023/poster/73722", "author_site": "Sheikh Md Shakeel Hassan, Arthur Feeney, Akash Dhruv, Jihoon Kim, Youngjoon Suh, Jaiyoung Ryu, Yoonjin Won, Aparna Chandramowlishwaran", "tldr": "", "abstract": "In the field of phase change phenomena, the lack of accessible and diverse datasets suitable for machine learning (ML) training poses a significant challenge. Existing experimental datasets are often restricted, with limited availability and sparse ground truth, impeding our understanding of this complex multiphysics phenomena. To bridge this gap, we present the BubbleML dataset which leverages physics-driven simulations to provide accurate ground truth information for various boiling scenarios, encompassing nucleate pool boiling, flow boiling, and sub-cooled boiling. This extensive dataset covers a wide range of parameters, including varying gravity conditions, flow rates, sub-cooling levels, and wall superheat, comprising 79 simulations. BubbleML is validated against experimental observations and trends, establishing it as an invaluable resource for ML research. Furthermore, we showcase its potential to facilitate the exploration of diverse downstream tasks by introducing two benchmarks: (a) optical flow analysis to capture bubble dynamics, and (b) neural PDE solvers for learning temperature and flow dynamics. The BubbleML dataset and its benchmarks aim to catalyze progress in ML-driven research on multiphysics phase change phenomena, providing robust baselines for the development and comparison of state-of-the-art techniques and models.", "keywords": "Multi-Physics Simulation;Scientific Machine Learning;Phase Change Physics;Operator Learning;Optical Flow", "primary_area": "", "supplementary_material": "/attachment/4a38dd476444fe959a039f8cd93b9d1181b75615.pdf", "author": "Sheikh Md Shakeel Hassan;Arthur Feeney;Akash Dhruv;Jihoon Kim;Youngjoon Suh;Jaiyoung Ryu;Yoonjin Won;Aparna Chandramowlishwaran", "authorids": "~Sheikh_Md_Shakeel_Hassan1;~Arthur_Feeney1;~Akash_Dhruv1;~Jihoon_Kim5;~Youngjoon_Suh1;~Jaiyoung_Ryu1;~Yoonjin_Won1;~Aparna_Chandramowlishwaran1", "gender": "M;M;M;M;M;M;F;F", "homepage": "https://sites.uci.edu/shakeel/;https://github.com/arthurfeeney;https://akashdhruv.github.io;;;https://cfdlab.korea.ac.kr/;http://won.eng.uci.edu;https://hpcforge.eng.uci.edu", "dblp": "353/2551;;;;;;;42/5990", "google_scholar": "tCLR39sAAAAJ;4WCVpTkAAAAJ;ZKSlt58AAAAJ;;7x8IFYIAAAAJ;https://scholar.google.com/citations?hl=en;;", "orcid": "0009-0003-1819-9361;;0000-0003-4997-321X;0000-0002-1856-2728;;;;", "linkedin": "smdshakeelhassan/;;akashdhruv/;;;;;", "or_profile": "~Sheikh_Md_Shakeel_Hassan1;~Arthur_Feeney1;~Akash_Dhruv1;~Jihoon_Kim5;~Youngjoon_Suh1;~Jaiyoung_Ryu1;~Yoonjin_Won1;~Aparna_Chandramowlishwaran1", "aff": "University of California, Irvine;University of California, Irvine;Argonne National Laboratory;Korea University;University of California, Irvine;;University of California, Irvine;University of California, Irvine", "aff_domain": "uci.edu;uci.edu;anl.gov;korea.ac.kr;uci.edu;;uci.edu;uci.edu", "position": "PhD student;PhD student;Postdoc;PhD student;Postdoc;;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nhassan2023bubbleml,\ntitle={Bubble{ML}: A Multiphase Multiphysics Dataset and Benchmarks for Machine Learning},\nauthor={Sheikh Md Shakeel Hassan and Arthur Feeney and Akash Dhruv and Jihoon Kim and Youngjoon Suh and Jaiyoung Ryu and Yoonjin Won and Aparna Chandramowlishwaran},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=0Wmglu8zak}\n}", "github": "", "project": "", "reviewers": "5sS9;EMa8;obr7;uCQg;RFDf", "pdf_size": 8850330, "rating": "7;7;8;8;9", "confidence": "4;3;4;3;4", "wc_summary_and_contributions": "95;75;38;238;114", "wc_strengths": "83;114;33;18;45", "wc_improvement": "233;175;43;186;14", "wc_limitations": "8;42;1;11;8", "wc_correctness": "27;31;1;2;15", "wc_clarity": "6;67;1;12;17", "wc_relation_to_prior_work": "1;12;1;2;10", "wc_documentation": "12;6;12;29;7", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "466;523;131;499;231", "wc_reply_reviewers": "12;15;12;50;0", "wc_reply_authors": "377;173;112;365;109", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.8, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 112.0, 67.84393856491529 ], "wc_strengths_avg": [ 58.6, 35.08047890209026 ], "wc_improvement_avg": [ 130.2, 85.78438086271882 ], "wc_limitations_avg": [ 14.0, 14.38054240979804 ], "wc_correctness_avg": [ 15.2, 12.367699866992245 ], "wc_clarity_avg": [ 20.6, 23.820999139414784 ], "wc_relation_to_prior_work_avg": [ 5.2, 4.791659420284375 ], "wc_documentation_avg": [ 13.2, 8.280096617793781 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 370.0, 158.5610292600297 ], "wc_reply_reviewers_avg": [ 17.8, 16.904437287292353 ], "wc_reply_authors_avg": [ 227.2, 119.67355597624731 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3273268353539886, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10304370618562756566&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "uci.edu;uci.edu;anl.gov;korea.ac.kr;uci.edu;;uci.edu;uci.edu", "author_num": 8, "aff_unique_index": "0;0;1;2;0;0;0", "aff_unique_norm": "University of California, Irvine;Argonne National Laboratory;Korea University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uci.edu;https://www.anl.gov;https://www.korea.ac.kr", "aff_unique_abbr": "UCI;ANL;KU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Irvine;", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;South Korea" }, { "title": "Score-based Generative Models with L\u00e9vy Processes", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73054", "id": "0Wp3VHX0Gm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8011b23e1dc3f57e1b6211ccad498919-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0Wp3VHX0Gm", "openreview": "https://openreview.net/forum?id=0Wp3VHX0Gm", "poster": "/media/PosterPDFs/NeurIPS%202023/73054.png?t=1697528205.516903", "slides": "https://nips.cc/virtual/2023/poster/73054", "video": "https://nips.cc/virtual/2023/poster/73054", "author_site": "EUN BI YOON, Keehun Park, Sungwoong Kim, Sungbin Lim", "tldr": "", "abstract": "Investigating the optimal stochastic process beyond Gaussian for noise injection in a score-based generative model remains an open question. Brownian motion is a light-tailed process with continuous paths, which leads to a slow convergence rate for the Number of Function Evaluation (NFE). Recent studies have shown that diffusion models suffer from mode-collapse issues on imbalanced data.\nIn order to overcome the limitations of Brownian motion, we introduce a novel score-based generative model referred to as L\u00e9vy-It\u014d Model (LIM). This model utilizes isotropic $\\alpha$-stable L\u00e9vy processes. We first derive an exact reverse-time stochastic differential equation driven by the L\u00e9vy process and develop the corresponding fractional denoising score matching. The proposed generative model takes advantage of the heavy-tailed properties of the L\u00e9vy process. Our experimental results show LIM allows for faster and more diverse sampling while maintaining high fidelity compared to existing diffusion models across various image datasets such as CIFAR10, CelebA, and imbalanced dataset CIFAR10LT. Comparing our results to those of DDPM with 3.21 Fr\u00e9chet Inception Distance (FID) and 0.6437 Recall on the CelebA dataset, we achieve 1.58 FID and 0.7006 Recall using the same architecture. LIM shows the best performance in NFE 500 with $2\\times$ faster total wall-clock time than the baseline.", "keywords": "Generative Model;Score-based Method;L\u00e9vy processes", "primary_area": "", "supplementary_material": "/attachment/9615dbcd06f5e1fbdb1375ee841615d8db86b731.zip", "author": "Eunbi Yoon;Keehun Park;Sungwoong Kim;Sungbin Lim", "authorids": "~Eunbi_Yoon1;~Keehun_Park1;~Sungwoong_Kim2;~Sungbin_Lim1", "gender": "F;M;M;M", "homepage": "https://www.notion.so/a40c00e4fc73410191966a7078c46ec3?v=dd6e536bca534182b442d1a4a87a1e4a;https://keeeehun.github.io/;;https://www.sungbin-lim.net", "dblp": ";;74/8063;206/6907", "google_scholar": ";;https://scholar.google.co.kr/citations?user=3DSA90AAAAAJ;https://scholar.google.com/citations?hl=ko", "orcid": ";;;0000-0003-2684-2022", "linkedin": ";;;sungbin-lim-43b739b5/", "or_profile": "~Eunbi_Yoon1;~Keehun_Park1;~Sungwoong_Kim2;~Sungbin_Lim1", "aff": "Ulsan National Institute of Science and Technology;;Kakao Brain;Ulsan National Institute of Science and Technology", "aff_domain": "unist.ac.kr;;kakaobrain.com;unist.ac.kr", "position": "MS student;;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nyoon2023scorebased,\ntitle={Score-based Generative Models with L\\'evy Processes},\nauthor={Eunbi Yoon and Keehun Park and Sungwoong Kim and Sungbin Lim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0Wp3VHX0Gm}\n}", "github": "", "project": "", "reviewers": "xP7H;Z4Ey;QCLF;9rZc", "pdf_size": 20463725, "rating": "5;7;7;7", "confidence": "3;4;4;4", "soundness": "2;4;3;4", "novelty": "2;3;3;4", "presentation": "3;4;3;4", "wc_summary": "111;159;258;111", "wc_strengths": "72;137;53;44", "wc_weaknesses": "157;30;219;83", "wc_questions": "4;112;240;158", "wc_limitations": "18;0;10;4", "wc_review": "362;438;780;400", "wc_reply_reviewers": "12;10;0;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 159.75, 60.014060852436906 ], "wc_strengths_avg": [ 76.5, 36.362755671153415 ], "wc_weaknesses_avg": [ 122.25, 71.7961524038719 ], "wc_questions_avg": [ 128.5, 85.25696452489967 ], "wc_limitations_avg": [ 8.0, 6.782329983125268 ], "wc_review_avg": [ 495.0, 166.7243233604503 ], "wc_reply_reviewers_avg": [ 8.5, 4.9749371855331 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4335799164305647001&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "unist.ac.kr;;kakaobrain.com;unist.ac.kr", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Ulsan National Institute of Science and Technology;Kakao Brain", "aff_unique_dep": ";", "aff_unique_url": "https://www.unist.ac.kr;https://brain.kakao.com", "aff_unique_abbr": "UNIST;Kakao Brain", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "On Occlusions in Video Action Detection: Benchmark Datasets And Training Recipes", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73721", "id": "0cltUI2Sto", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b3640c2d3e58f716c67066046318db0f-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=0cltUI2Sto", "openreview": "https://openreview.net/forum?id=0cltUI2Sto", "poster": "/media/PosterPDFs/NeurIPS%202023/73721.png?t=1699494223.0217297", "slides": "https://nips.cc/virtual/2023/poster/73721", "video": "https://nips.cc/virtual/2023/poster/73721", "author_site": "Rajat Modi, Vibhav Vineet, Yogesh Rawat", "tldr": "", "abstract": "This paper explores the impact of occlusions in video action detection. We facilitate\nthis study by introducing five new benchmark datasets namely O-UCF and O-\nJHMDB consisting of synthetically controlled static/dynamic occlusions, OVIS-\nUCF and OVIS-JHMDB consisting of occlusions with realistic motions and Real-\nOUCF for occlusions in realistic-world scenarios. We formally confirm an intuitive\nexpectation: existing models suffer a lot as occlusion severity is increased and\nexhibit different behaviours when occluders are static vs when they are moving.\nWe discover several intriguing phenomenon emerging in neural nets: 1) transformers\ncan naturally outperform CNN models which might have even used occlusion as a\nform of data augmentation during training 2) incorporating symbolic-components\nlike capsules to such backbones allows them to bind to occluders never even seen\nduring training and 3) Islands of agreement (similar to the ones hypothesized in\nHinton et Al\u2019s GLOM) can emerge in realistic images/videos without instance-level\nsupervision, distillation or contrastive-based objectives(eg. video-textual training).\nSuch emergent properties allow us to derive simple yet effective training recipes\nwhich lead to robust occlusion models inductively satisfying the first two stages of\nthe binding mechanism (grouping/segregation). Models leveraging these recipes\noutperform existing video action-detectors under occlusion by 32.3% on O-UCF,\n32.7% on O-JHMDB & 2.6% on Real-OUCF in terms of the vMAP metric. The code for this work has been released at https: //github.com/rajatmodi62/OccludedActionBenchmark.", "keywords": "mortal computation;occlusion;capsules;transformers;action detection", "primary_area": "", "supplementary_material": "/attachment/9b4e0a8ede8c50ae07e31f3dfe3e79b242946256.pdf", "author": "Rajat Modi;Vibhav Vineet;Yogesh S Rawat", "authorids": "~Rajat_Modi1;~Vibhav_Vineet5;~Yogesh_S_Rawat1", "gender": "male;;M", "homepage": "https://www.linkedin.com/in/rajat-modi-54377877/;;https://www.crcv.ucf.edu/person/rawat/", "dblp": ";;148/2258", "google_scholar": "https://scholar.google.ca/citations?user=Ypzln1UAAAAJ;;D_JvEcwAAAAJ", "orcid": ";;", "linkedin": "rajat-modi-54377877/;;", "or_profile": "~Rajat_Modi1;~Vibhav_Vineet5;~Yogesh_S_Rawat1", "aff": "University of Central Florida;;University of Central Florida", "aff_domain": "ucf.edu;;ucf.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nmodi2023on,\ntitle={On Occlusions in Video Action Detection: Benchmark Datasets And Training Recipes},\nauthor={Rajat Modi and Vibhav Vineet and Yogesh S Rawat},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=0cltUI2Sto}\n}", "github": "", "project": "", "reviewers": "cTJi;yxN6;NCed;2htk", "pdf_size": 23222895, "rating": "3;6;6;7", "confidence": "4;3;4;4", "wc_summary_and_contributions": "62;56;73;58", "wc_strengths": "32;57;24;44", "wc_improvement": "178;69;148;31", "wc_limitations": "13;1;2;24", "wc_correctness": "58;3;18;13", "wc_clarity": "71;27;66;15", "wc_relation_to_prior_work": "1;8;1;23", "wc_documentation": "11;1;7;35", "wc_additional_feedback": "1;1;1;1", "wc_review": "427;223;340;244", "wc_reply_reviewers": "0;0;38;0", "wc_reply_authors": "1665;449;937;865", "reply_reviewers": "0;0;1;0", "reply_authors": "4;2;3;3", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 62.25, 6.5717197140474575 ], "wc_strengths_avg": [ 39.25, 12.47747971346778 ], "wc_improvement_avg": [ 106.5, 59.03600596246328 ], "wc_limitations_avg": [ 10.0, 9.354143466934854 ], "wc_correctness_avg": [ 23.0, 20.91650066335189 ], "wc_clarity_avg": [ 44.75, 24.190649019817553 ], "wc_relation_to_prior_work_avg": [ 8.25, 8.98262211161084 ], "wc_documentation_avg": [ 13.5, 12.913171570144957 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 308.5, 81.40178130729082 ], "wc_reply_reviewers_avg": [ 9.5, 16.454482671904334 ], "wc_reply_authors_avg": [ 979.0, 437.68024858336935 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12266948153889131316&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ucf.edu;;ucf.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Central Florida", "aff_unique_dep": "", "aff_unique_url": "https://www.ucf.edu", "aff_unique_abbr": "UCF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "SAMoSSA: Multivariate Singular Spectrum Analysis with Stochastic Autoregressive Noise", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73053", "id": "0e4eiXoUn5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1a8d295871250443f9747d239925b89d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0e4eiXoUn5", "openreview": "https://openreview.net/forum?id=0e4eiXoUn5", "poster": "/media/PosterPDFs/NeurIPS%202023/73053.png?t=1701838758.491403", "slides": "https://nips.cc/virtual/2023/poster/73053", "video": "https://nips.cc/virtual/2023/poster/73053", "author_site": "Abdullah Alomar, Munther Dahleh, Sean Mann, Devavrat Shah", "tldr": "", "abstract": "The well-established practice of time series analysis involves estimating deterministic, non-stationary trend and seasonality components followed by learning the residual stochastic, stationary components. Recently, it has been shown that one can learn the deterministic non-stationary components accurately using multivariate Singular Spectrum Analysis (mSSA) in the absence of a correlated stationary component; meanwhile, in the absence of deterministic non-stationary components, the Autoregressive (AR) stationary component can also be learnt readily, e.g. via Ordinary Least Squares (OLS). However, a theoretical underpinning of multi-stage learning algorithms involving both deterministic and stationary components has been absent in the literature despite its pervasiveness. We resolve this open question by establishing desirable theoretical guarantees for a natural two-stage algorithm, where mSSA is first applied to estimate the non-stationary components despite the presence of a correlated stationary AR component, which is subsequently learned from the residual time series. We provide a finite-sample forecasting consistency bound for the proposed algorithm, SAMoSSA, which is data-driven and thus requires minimal parameter tuning. To establish theoretical guarantees, we overcome three hurdles: (i) we characterize the spectra of Page matrices of stable AR processes, thus extending the analysis of mSSA; (ii) we extend the analysis of AR process identification in the presence of arbitrary bounded perturbations; (iii) we characterize the out-of-sample or forecasting error, as opposed to solely considering model identification. Through representative empirical studies, we validate the superior performance of SAMoSSA compared to existing baselines. Notably, SAMoSSA's ability to account for AR noise structure yields improvements ranging from 5% to 37% across various benchmark datasets.", "keywords": "Time series;System Identification;Singular Spectrum Analysis", "primary_area": "", "supplementary_material": "/attachment/5fccfe8bb6cf53663b909c4d09d44f6534c3c1e0.zip", "author": "Abdullah Omar Alomar;Munther A. Dahleh;Sean Mann;Devavrat Shah", "authorids": "~Abdullah_Omar_Alomar1;~Munther_A._Dahleh1;seanmann@mit.edu;~Devavrat_Shah1", "gender": "M;M;;M", "homepage": "https://abdullaho.me;https://dahleh.lids.mit.edu/about;;http://devavrat.mit.edu", "dblp": "202/0155;24/2542;;73/3881", "google_scholar": "https://scholar.google.com/citations?hl=en;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Abdullah_Omar_Alomar1;~Munther_A._Dahleh1;seanmann@mit.edu;~Devavrat_Shah1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;;mit.edu", "position": "PhD student;Full Professor;;Professor", "bibtex": "@inproceedings{\nalomar2023samossa,\ntitle={{SAM}o{SSA}: Multivariate Singular Spectrum Analysis with Stochastic Autoregressive Noise},\nauthor={Abdullah Omar Alomar and Munther A. Dahleh and Sean Mann and Devavrat Shah},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0e4eiXoUn5}\n}", "github": "", "project": "", "reviewers": "ki9S;s3mh;rVQz;xDu7;KKGZ;Xzdt", "pdf_size": 1068124, "rating": "6;6;6;7;7;7", "confidence": "3;4;3;2;2;2", "soundness": "4;3;3;4;3;3", "novelty": "3;3;3;3;4;3", "presentation": "3;3;3;3;2;2", "wc_summary": "53;86;180;59;81;138", "wc_strengths": "15;49;72;81;95;248", "wc_weaknesses": "12;183;169;71;62;261", "wc_questions": "83;172;84;22;114;27", "wc_limitations": "2;12;13;17;11;12", "wc_review": "165;502;518;250;363;686", "wc_reply_reviewers": "0;174;34;83;0;48", "wc_reply_authors": "0;360;0;0;0;33", "reply_reviewers": "0;1;1;1;0;1", "reply_authors": "1;2;1;1;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 2.6666666666666665, 0.7453559924999298 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.1666666666666665, 0.3726779962499649 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 99.5, 45.2575960475145 ], "wc_strengths_avg": [ 93.33333333333333, 73.74882296612529 ], "wc_weaknesses_avg": [ 126.33333333333333, 85.08753662487175 ], "wc_questions_avg": [ 83.66666666666667, 51.21414735098974 ], "wc_limitations_avg": [ 11.166666666666666, 4.524623986832743 ], "wc_review_avg": [ 414.0, 175.13899242220924 ], "wc_reply_reviewers_avg": [ 56.5, 59.82126154916271 ], "wc_reply_authors_avg": [ 65.5, 132.25448952682098 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8944271909999159, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11759561769533458377&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 5, "email": "mit.edu;mit.edu;;mit.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Finite-Particle Convergence Rate for Stein Variational Gradient Descent", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73052", "id": "0eRDQQK2TW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/54e5d7af6250ccab796ad7fe75663ba5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0eRDQQK2TW", "openreview": "https://openreview.net/forum?id=0eRDQQK2TW", "poster": "/media/PosterPDFs/NeurIPS%202023/73052.png?t=1702168641.9336262", "slides": "https://nips.cc/virtual/2023/poster/73052", "video": "https://nips.cc/virtual/2023/poster/73052", "author_site": "Jiaxin Shi, Lester Mackey", "tldr": "", "abstract": "We provide the first finite-particle convergence rate for Stein variational gradient descent (SVGD), a popular algorithm for approximating a probability distribution with a collection of particles. Specifically, whenever the target distribution is sub-Gaussian with a Lipschitz score, SVGD with $n$ particles and an appropriate step size sequence drives the kernel Stein discrepancy to zero at an order ${1/}{\\sqrt{\\log\\log n}}$ rate. We suspect that the dependence on $n$ can be improved, and we hope that our explicit, non-asymptotic proof strategy will serve as a template for future refinements.", "keywords": "Stein Variational Gradient Descent;SVGD;variational inference;sampling;optimization;Stein's method", "primary_area": "", "supplementary_material": "", "author": "Jiaxin Shi;Lester Mackey", "authorids": "~Jiaxin_Shi1;~Lester_Mackey1", "gender": "M;M", "homepage": "http://jiaxins.io;https://stanford.edu/~lmackey", "dblp": "151/7509;05/2961", "google_scholar": "juZXbFoAAAAJ;erv7TP0AAAAJ", "orcid": ";0000-0002-1102-0387", "linkedin": ";lester-mackey-5902909", "or_profile": "~Jiaxin_Shi1;~Lester_Mackey1", "aff": "Stanford University;Microsoft Research New England", "aff_domain": "stanford.edu;microsoft.com", "position": "Postdoc;Principal Researcher", "bibtex": "@inproceedings{\nshi2023a,\ntitle={A Finite-Particle Convergence Rate for Stein Variational Gradient Descent},\nauthor={Jiaxin Shi and Lester Mackey},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0eRDQQK2TW}\n}", "github": "", "project": "", "reviewers": "bq3E;g2wP;awG8;igw1", "pdf_size": 387761, "rating": "5;6;7;8", "confidence": "1;3;2;2", "soundness": "2;4;3;3", "novelty": "2;3;3;4", "presentation": "2;3;4;4", "wc_summary": "130;99;132;92", "wc_strengths": "36;191;180;26", "wc_weaknesses": "37;129;86;1", "wc_questions": "1;56;136;127", "wc_limitations": "1;14;49;1", "wc_review": "205;489;583;247", "wc_reply_reviewers": "0;46;17;93", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 2.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 113.25, 17.93564885918544 ], "wc_strengths_avg": [ 108.25, 77.42859613863601 ], "wc_weaknesses_avg": [ 63.25, 48.48904515455011 ], "wc_questions_avg": [ 80.0, 55.14072904849917 ], "wc_limitations_avg": [ 16.25, 19.638928178492836 ], "wc_review_avg": [ 381.0, 159.21683328090657 ], "wc_reply_reviewers_avg": [ 39.0, 35.249113464029136 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3162277660168379, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15195628859570849120&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "stanford.edu;microsoft.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Stanford University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.stanford.edu;https://www.microsoft.com/en-us/research/group/microsoft-research-new-england", "aff_unique_abbr": "Stanford;MSR NE", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Stanford;New England", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Modeling Dynamics over Meshes with Gauge Equivariant Nonlinear Message Passing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73051", "id": "0eXniewIvr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/317470b3fde29f3bb8d6dee563afffc4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0eXniewIvr", "openreview": "https://openreview.net/forum?id=0eXniewIvr", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73051", "video": "https://nips.cc/virtual/2023/poster/73051", "author_site": "Jung Yeon Park, Lawson Wong, Robin Walters", "tldr": "", "abstract": "Data over non-Euclidean manifolds, often discretized as surface meshes, naturally arise in computer graphics and biological and physical systems. In particular, solutions to partial differential equations (PDEs) over manifolds depend critically on the underlying geometry. While graph neural networks have been successfully applied to PDEs, they do not incorporate surface geometry and do not consider local gauge symmetries of the manifold. Alternatively, recent works on gauge equivariant convolutional and attentional architectures on meshes leverage the underlying geometry but underperform in modeling surface PDEs with complex nonlinear dynamics. To address these issues, we introduce a new gauge equivariant architecture using nonlinear message passing. Our novel architecture achieves higher performance than either convolutional or attentional networks on domains with highly complex and nonlinear dynamics. However, similar to the non-mesh case, design trade-offs favor convolutional, attentional, or message passing networks for different tasks; we investigate in which circumstances our message passing method provides the most benefit.", "keywords": "message passing;dynamics;mesh;symmetry;equivariance", "primary_area": "", "supplementary_material": "", "author": "Jung Yeon Park;Lawson L.S. Wong;Robin Walters", "authorids": "~Jung_Yeon_Park1;~Lawson_L.S._Wong2;~Robin_Walters1", "gender": "M;M;M", "homepage": ";http://www.robinwalters.com;https://www.ccs.neu.edu/home/lsw/", "dblp": "240/2704;258/3416;35/2573", "google_scholar": "LZSRm9sAAAAJ;fnprJmUAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jung_Yeon_Park1;~Robin_Walters1;~Lawson_L._S._Wong1", "aff": "Northeastern University;Northeastern University ;Northeastern University", "aff_domain": "northeastern.edu;northeastern.edu;northeastern.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\npark2023modeling,\ntitle={Modeling Dynamics over Meshes with Gauge Equivariant Nonlinear Message Passing},\nauthor={Jung Yeon Park and Lawson L.S. Wong and Robin Walters},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0eXniewIvr}\n}", "github": "", "project": "", "reviewers": "P6sJ;ASko;CzKN;yPBJ;GgMe", "pdf_size": 17140564, "rating": "3;5;6;7;7", "confidence": "5;3;2;4;2", "soundness": "2;2;3;3;4", "novelty": "2;2;3;2;3", "presentation": "3;3;4;3;4", "wc_summary": "36;69;92;72;60", "wc_strengths": "22;47;64;33;61", "wc_weaknesses": "204;125;49;86;160", "wc_questions": "2;20;116;179;147", "wc_limitations": "2;8;69;9;13", "wc_review": "266;269;390;379;441", "wc_reply_reviewers": "96;38;157;211;129", "wc_reply_authors": "534;32;221;301;177", "reply_reviewers": "1;1;1;2;1", "reply_authors": "2;2;2;3;2", "rating_avg": [ 5.6, 1.4966629547095764 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 65.8, 18.203296404772406 ], "wc_strengths_avg": [ 45.4, 16.0822883943797 ], "wc_weaknesses_avg": [ 124.8, 54.33746405565869 ], "wc_questions_avg": [ 92.8, 69.92967896394205 ], "wc_limitations_avg": [ 20.2, 24.652788888886384 ], "wc_review_avg": [ 349.0, 69.76245408527427 ], "wc_reply_reviewers_avg": [ 126.2, 58.03240474079977 ], "wc_reply_authors_avg": [ 253.0, 165.4726563514347 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6416889479197478, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18265238166311994083&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "northeastern.edu;northeastern.edu;northeastern.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Rethinking Semi-Supervised Imbalanced Node Classification from Bias-Variance Decomposition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73050", "id": "0gvtoxhvMY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5d1233f819202ade06023346df80a6d2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0gvtoxhvMY", "openreview": "https://openreview.net/forum?id=0gvtoxhvMY", "poster": "/media/PosterPDFs/NeurIPS%202023/73050.png?t=1702115401.9719656", "slides": "https://nips.cc/virtual/2023/poster/73050", "video": "https://nips.cc/virtual/2023/poster/73050", "author_site": "Divin Yan, Gengchen Wei, Chen Yang, Shengzhong Zhang, zengfeng Huang", "tldr": "", "abstract": "This paper introduces a new approach to address the issue of class imbalance in graph neural networks (GNNs) for learning on graph-structured data. Our approach integrates imbalanced node classification and Bias-Variance Decomposition, establishing a theoretical framework that closely relates data imbalance to model variance. We also leverage graph augmentation technique to estimate the variance and design a regularization term to alleviate the impact of imbalance. Exhaustive tests are conducted on multiple benchmarks, including naturally imbalanced datasets and public-split class-imbalanced datasets, demonstrating that our approach outperforms state-of-the-art methods in various imbalanced scenarios. This work provides a novel theoretical perspective for addressing the problem of imbalanced node classification in GNNs.", "keywords": "Imbalanced Node Classification;Bias-Variance Decomposition;Graph Neural Networks", "primary_area": "", "supplementary_material": "/attachment/ae5d7621244810cf9aee1faba2e1a121dee5d177.zip", "author": "Divin Yan;Gengchen Wei;Chen Yang;Shengzhong Zhang;Zengfeng Huang", "authorids": "~Divin_Yan1;~Gengchen_Wei1;~Chen_Yang11;~Shengzhong_Zhang1;~Zengfeng_Huang1", "gender": "M;;M;M;M", "homepage": "https://divinyan.com/;https://wei-gongzi.github.io/;;https://szzhang17.github.io/;https://zengfenghuang.github.io/", "dblp": "359/6307.html;;;255/8703;97/9726", "google_scholar": "-Vv6hJsAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;bWD48lgAAAAJ;https://scholar.google.com.hk/citations?user=FwNBuXUAAAAJ", "orcid": "0009-0009-2880-3124;;;0000-0003-1783-6835;0000-0003-2671-7483", "linkedin": ";;;;", "or_profile": "~Divin_Yan1;~Gengchen_Wei1;~Chen_Yang11;~Shengzhong_Zhang1;~Zengfeng_Huang1", "aff": "ISTBI & School of Data Science, Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu", "position": "Applied Mathmatics Research Master Student;MS student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nyan2023rethinking,\ntitle={Rethinking Semi-Supervised Imbalanced Node Classification from Bias-Variance Decomposition},\nauthor={Divin Yan and Gengchen Wei and Chen Yang and Shengzhong Zhang and Zengfeng Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0gvtoxhvMY}\n}", "github": "", "project": "", "reviewers": "j7Kz;vtyV;bhDb;8yVE;zTQi", "pdf_size": 837093, "rating": "4;5;5;6;6", "confidence": "4;3;3;1;3", "soundness": "3;2;2;3;3", "novelty": "3;2;1;3;3", "presentation": "3;3;2;3;2", "wc_summary": "51;40;92;55;49", "wc_strengths": "35;28;13;25;72", "wc_weaknesses": "74;182;61;158;157", "wc_questions": "23;3;29;90;29", "wc_limitations": "1;1;16;1;79", "wc_review": "184;254;211;329;386", "wc_reply_reviewers": "0;21;45;56;14", "wc_reply_authors": "70;71;68;629;107", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;2;2;3;2", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 57.4, 17.984437717093076 ], "wc_strengths_avg": [ 34.6, 20.0059991002699 ], "wc_weaknesses_avg": [ 126.4, 49.09012120579863 ], "wc_questions_avg": [ 34.8, 29.205478938034897 ], "wc_limitations_avg": [ 19.6, 30.26284851100438 ], "wc_review_avg": [ 272.8, 74.87429465444066 ], "wc_reply_reviewers_avg": [ 27.2, 20.488045294756645 ], "wc_reply_authors_avg": [ 189.0, 220.47675614449702 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7637626158259733, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13983919002343018229&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "School of Data Science", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Described Object Detection: Liberating Object Detection with Flexible Expressions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73049", "id": "0hwq2vOHT4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f9fd24fd32eccc14cd3ecd3716a1cbf8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0hwq2vOHT4", "openreview": "https://openreview.net/forum?id=0hwq2vOHT4", "poster": "/media/PosterPDFs/NeurIPS%202023/73049.png?t=1699329882.1220973", "slides": "https://nips.cc/virtual/2023/poster/73049", "video": "https://nips.cc/virtual/2023/poster/73049", "author_site": "Chi Xie, Zhao Zhang, Yixuan Wu, Feng Zhu, Rui Zhao, Shuang Liang", "tldr": "", "abstract": "Detecting objects based on language information is a popular task that includes Open-Vocabulary object Detection (OVD) and Referring Expression Comprehension (REC). In this paper, we advance them to a more practical setting called *Described Object Detection* (DOD) by expanding category names to flexible language expressions for OVD and overcoming the limitation of REC only grounding the pre-existing object. We establish the research foundation for DOD by constructing a *Description Detection Dataset* ($D^3$). This dataset features flexible language expressions, whether short category names or long descriptions, and annotating all described objects on all images without omission. By evaluating previous SOTA methods on $D^3$, we find some troublemakers that fail current REC, OVD, and bi-functional methods. REC methods struggle with confidence scores, rejecting negative instances, and multi-target scenarios, while OVD methods face constraints with long and complex descriptions. Recent bi-functional methods also do not work well on DOD due to their separated training procedures and inference strategies for REC and OVD tasks. Building upon the aforementioned findings, we propose a baseline that largely improves REC methods by reconstructing the training data and introducing a binary classification sub-task, outperforming existing methods. Data and code are available at https://github.com/shikras/d-cube and related works are tracked in https://github.com/Charles-Xie/awesome-described-object-detection.", "keywords": "open-vocabulary object detection;referring expression comprehension;multi-modal detection", "primary_area": "", "supplementary_material": "/attachment/7f39cb9459994dafd8404fd61cf8097dda6afb31.pdf", "author": "Chi Xie;Zhao Zhang;Yixuan Wu;Feng Zhu;Rui Zhao;Shuang Liang", "authorids": "~Chi_Xie1;~Zhao_Zhang6;~Yixuan_Wu2;~Feng_Zhu1;~Rui_Zhao6;~Shuang_Liang5", "gender": "M;;M;M;F;F", "homepage": ";http://zhaozhang.net/;http://home.ustc.edu.cn/~zhufengx/;http://zhaorui.xyz/;https://liangshuang-cv.github.io/;https://blog.csdn.net/weixin_45538252?spm=1000.2115.3001.10640", "dblp": ";;71/2791-6;26/2578-1;20/1080-1.html;", "google_scholar": "1Yz2NrEAAAAJ;Wcj40PMAAAAJ;oO53gjEAAAAJ;1c9oQNMAAAAJ;DV3VBuQAAAAJ;zjAxJcwAAAAJ", "orcid": "0000-0002-5808-1742;0000-0002-1521-8163;;;0000-0003-0457-6093;", "linkedin": ";https://linkedin.com/in/zhao-zhang-7159461a1;;;;", "or_profile": "~Chi_Xie1;~Zhao_Zhang6;~Feng_Zhu1;~Rui_Zhao6;~Shuang_Liang5;~yixuan_Wu1", "aff": "Tongji University;Sensetime Research;SenseTime Group LTD;SenseTime Research;Tongji University;Zhejiang University", "aff_domain": "tongji.edu.cn;sensetime.com;sensetime.com;sensetime.com;tongji.edu.cn;zju.edu.cn", "position": "PhD student;Researcher;Researcher;Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\nxie2023described,\ntitle={Described Object Detection: Liberating Object Detection with Flexible Expressions},\nauthor={Chi Xie and Zhao Zhang and Yixuan Wu and Feng Zhu and Rui Zhao and Shuang Liang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0hwq2vOHT4}\n}", "github": "", "project": "", "reviewers": "YHXi;EwWL;SdJt;wkHv;Q5JG", "pdf_size": 2380553, "rating": "4;5;5;7;7", "confidence": "5;4;5;2;5", "soundness": "3;3;3;4;1", "novelty": "3;2;3;3;1", "presentation": "3;3;3;2;3", "wc_summary": "90;67;167;92;28", "wc_strengths": "21;72;120;188;16", "wc_weaknesses": "447;193;244;245;546", "wc_questions": "141;6;3;17;66", "wc_limitations": "14;9;32;11;1", "wc_review": "713;347;566;553;657", "wc_reply_reviewers": "1451;72;65;35;171", "wc_reply_authors": "2155;0;229;0;559", "reply_reviewers": "3;1;1;1;2", "reply_authors": "4;1;2;1;3", "rating_avg": [ 5.6, 1.2 ], "confidence_avg": [ 4.2, 1.16619037896906 ], "soundness_avg": [ 2.8, 0.9797958971132712 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 88.8, 45.38457887873369 ], "wc_strengths_avg": [ 83.4, 64.57120101097702 ], "wc_weaknesses_avg": [ 335.0, 136.82835963352042 ], "wc_questions_avg": [ 46.6, 52.3702205456498 ], "wc_limitations_avg": [ 13.4, 10.248902380255165 ], "wc_review_avg": [ 567.2, 124.91020774940694 ], "wc_reply_reviewers_avg": [ 358.8, 548.0125546007135 ], "wc_reply_authors_avg": [ 588.6, 809.5303823822799 ], "reply_reviewers_avg": [ 1.6, 0.8 ], "reply_authors_avg": [ 2.2, 1.16619037896906 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5144957554275265, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12900918181203680176&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 6, "email": "tongji.edu.cn;sensetime.com;sensetime.com;sensetime.com;tongji.edu.cn;zju.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;1;0;3", "aff_unique_norm": "Tongji University;SenseTime;SenseTime Group;Zhejiang University", "aff_unique_dep": ";Research;;", "aff_unique_url": "https://www.tongji.edu.cn;https://www.sensetime.com/;https://www.sensetime.com;https://www.zju.edu.cn", "aff_unique_abbr": "Tongji;SenseTime;SenseTime;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Model Sparsity Can Simplify Machine Unlearning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73048", "id": "0jZH883i34", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a204aa68ab4e970e1ceccfb5b5cdc5e4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0jZH883i34", "openreview": "https://openreview.net/forum?id=0jZH883i34", "poster": "/media/PosterPDFs/NeurIPS%202023/73048.png?t=1702344573.8024278", "slides": "https://nips.cc/virtual/2023/poster/73048", "video": "https://nips.cc/virtual/2023/poster/73048", "author_site": "jinghan jia, Jiancheng Liu, Parikshit Ram, Yuguang Yao, Gaowen Liu, Yang Liu, PRANAY SHARMA, Sijia Liu", "tldr": "", "abstract": "In response to recent data regulation requirements, machine unlearning (MU) has emerged as a critical process to remove the influence of specific examples from a given model. Although exact unlearning can be achieved through complete model retraining using the remaining dataset, the associated computational costs have driven the development of efficient, approximate unlearning techniques. Moving beyond data-centric MU approaches, our study introduces a novel model-based perspective: model sparsification via weight pruning, which is capable of reducing the gap between exact unlearning and approximate unlearning. We show in both theory and practice that model sparsity can boost the multi-criteria unlearning performance of an approximate unlearner, closing the approximation gap, while continuing to be efficient. This leads to a new MU paradigm, termed prune first, then unlearn, which infuses a sparse prior to the unlearning process. Building on this insight, we also develop a sparsity-aware unlearning method that utilizes sparsity regularization to enhance the training process of approximate unlearning. Extensive experiments show that our proposals consistently benefit MU in various unlearning scenarios. A notable highlight is the 77% unlearning efficacy gain of fine-tuning (one of the simplest approximate unlearning methods) when using our proposed sparsity-aware unlearning method. Furthermore, we showcase the practical impact of our proposed MU methods through two specific use cases: defending against backdoor attacks, and enhancing transfer learning through source class removal. These applications demonstrate the versatility and effectiveness of our approaches in addressing a variety of machine learning challenges beyond unlearning for data privacy. Codes are available at https://github.com/OPTML-Group/Unlearn-Sparse.", "keywords": "Machine unlearning;model pruning", "primary_area": "", "supplementary_material": "/attachment/3d18b32b8da080311c768e9cbbfc0628f75507a7.zip", "author": "Jinghan Jia;Jiancheng Liu;Parikshit Ram;Yuguang Yao;Gaowen Liu;Yang Liu;Pranay Sharma;Sijia Liu", "authorids": "~Jinghan_Jia1;~Jiancheng_Liu2;~Parikshit_Ram1;~Yuguang_Yao1;~Gaowen_Liu4;~Yang_Liu3;~Pranay_Sharma2;~Sijia_Liu1", "gender": "M;M;M;M;F;M;M;M", "homepage": "https://jinghanjia.netlify.app/;https://ljcc0930.github.io/;https://rithram.github.io/;https://www.cse.msu.edu/~yaoyugua/;;http://www.yliuu.com;https://lsjxjtu.github.io/;https://sites.google.com/view/pranay-sharma/home", "dblp": "286/5392;74/3002;99/8314;238/9467;136/1007;51/3710-18;128/6972-1;81/9976", "google_scholar": "bqP_zxYAAAAJ;ReWNzl4AAAAJ;JaXmmnkAAAAJ;-chIdAkAAAAJ;NIv_aeQAAAAJ;jKrIVCIAAAAJ;C7dO_UgAAAAJ;QR-VKssAAAAJ", "orcid": ";;0000-0002-9456-029X;;0009-0000-9194-1233;0000-0001-8420-6011;;", "linkedin": "jinghan-jia-5194451ba/;;parikshit-ram-4861325/;tonyyaomsu/;;;;", "or_profile": "~Jinghan_Jia1;~Jiancheng_Liu2;~Parikshit_Ram1;~Yuguang_Yao1;~Gaowen_Liu4;~Yang_Liu3;~Sijia_Liu1;~PRANAY_SHARMA1", "aff": "Michigan State University;Michigan State University;International Business Machines;Michigan State University;Cisco Systems;University of California, Santa Cruz;Michigan State University;Carnegie Mellon University", "aff_domain": "msu.edu;msu.edu;ibm.com;msu.edu;cisco.com;ucsc.edu;msu.edu;cmu.edu", "position": "PhD student;MS student;Principal Researcher;PhD student;Researcher;Assistant Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\njia2023model,\ntitle={Model Sparsity Can Simplify Machine Unlearning},\nauthor={Jinghan Jia and Jiancheng Liu and Parikshit Ram and Yuguang Yao and Gaowen Liu and Yang Liu and Pranay Sharma and Sijia Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0jZH883i34}\n}", "github": "", "project": "", "reviewers": "hHiz;r13y;MsfJ;8xJa", "pdf_size": 1897153, "rating": "6;7;7;8", "confidence": "4;4;4;4", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "2;4;4;4", "wc_summary": "50;122;57;29", "wc_strengths": "118;113;115;125", "wc_weaknesses": "325;137;271;109", "wc_questions": "26;468;194;87", "wc_limitations": "77;86;34;25", "wc_review": "596;926;671;375", "wc_reply_reviewers": "107;718;58;23", "wc_reply_authors": "847;1739;47;41", "reply_reviewers": "2;2;1;1", "reply_authors": "4;5;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 64.5, 34.7598906787694 ], "wc_strengths_avg": [ 117.75, 4.548351349665063 ], "wc_weaknesses_avg": [ 210.5, 90.1041064547005 ], "wc_questions_avg": [ 193.75, 169.37292434152513 ], "wc_limitations_avg": [ 55.5, 26.386549603917523 ], "wc_review_avg": [ 642.0, 196.7879569485897 ], "wc_reply_reviewers_avg": [ 226.5, 285.33182437295704 ], "wc_reply_authors_avg": [ 668.5, 699.6161447536784 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 117, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5268307963908119462&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 8, "email": "msu.edu;msu.edu;ibm.com;msu.edu;cisco.com;ucsc.edu;msu.edu;cmu.edu", "author_num": 8, "aff_unique_index": "0;0;1;0;2;3;0;4", "aff_unique_norm": "Michigan State University;International Business Machines Corporation;Cisco Systems;University of California, Santa Cruz;Carnegie Mellon University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.msu.edu;https://www.ibm.com;https://www.cisco.com;https://www.ucsc.edu;https://www.cmu.edu", "aff_unique_abbr": "MSU;IBM;Cisco;UCSC;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Graph Contrastive Learning with Stable and Scalable Spectral Encoding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73047", "id": "0kz5RmHxmE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8e9a6582caa59fda0302349702965171-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0kz5RmHxmE", "openreview": "https://openreview.net/forum?id=0kz5RmHxmE", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73047", "video": "https://nips.cc/virtual/2023/poster/73047", "author_site": "Deyu Bo, Yuan Fang, Yang Liu, Chuan Shi", "tldr": "", "abstract": "Graph contrastive learning (GCL) aims to learn representations by capturing the agreements between different graph views. Traditional GCL methods generate views in the spatial domain, but it has been recently discovered that the spectral domain also plays a vital role in complementing spatial views. However, existing spectral-based graph views either ignore the eigenvectors that encode valuable positional information or suffer from high complexity when trying to address the instability of spectral features. To tackle these challenges, we first design an informative, stable, and scalable spectral encoder, termed EigenMLP, to learn effective representations from the spectral features. Theoretically, EigenMLP is invariant to the rotation and reflection transformations on eigenvectors and robust against perturbations. Then, we propose a spatial-spectral contrastive framework (Sp$^{2}$GCL) to capture the consistency between the spatial information encoded by graph neural networks and the spectral information learned by EigenMLP, thus effectively fusing these two graph views. Experiments on the node- and graph-level datasets show that our method not only learns effective graph representations but also achieves a 2--10x speedup over other spectral-based methods.", "keywords": "Graph Contrastive Learning;Spectral Embedding", "primary_area": "", "supplementary_material": "/attachment/00e5aa15c106dca9fbd7b57d3cc895044cad77f3.zip", "author": "Deyu Bo;Yuan Fang;Yang Liu;Chuan Shi", "authorids": "~Deyu_Bo1;~Yuan_Fang1;~Yang_Liu105;~Chuan_Shi1", "gender": "M;M;F;M", "homepage": "https://bdy9527.github.io/;http://www.yfang.site;https://liuyang-tian.github.io/;http://www.shichuan.org/", "dblp": "258/0824;22/981-1;51/3710-348.html;64/3041-1", "google_scholar": "m4rsQCAAAAAJ;XkBJjPUAAAAJ;https://scholar.google.com/citations?view_op=list_works;tUq_v90AAAAJ", "orcid": "0000-0003-2063-8223;0000-0002-4265-5289;0000-0002-6230-0282;0000-0002-3734-0266", "linkedin": ";;;", "or_profile": "~Deyu_Bo1;~Yuan_Fang1;~Yang_Liu105;~Chuan_Shi1", "aff": "Beijing University of Post and Telecommunication;Singapore Management University;Beijing University of Posts and Telecommunications;Beijing University of Post and Telecommunication", "aff_domain": "bupt.edu.cn;smu.edu.sg;bupt.edu.cn;bupt.edu.cn", "position": "PhD student;Assistant Professor;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nbo2023graph,\ntitle={Graph Contrastive Learning with Stable and Scalable Spectral Encoding},\nauthor={Deyu Bo and Yuan Fang and Yang Liu and Chuan Shi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0kz5RmHxmE}\n}", "github": "", "project": "", "reviewers": "QJUG;xYrm;9zn6;ieZM", "pdf_size": 427278, "rating": "4;5;5;6", "confidence": "3;5;4;4", "soundness": "2;4;3;2", "novelty": "2;3;2;2", "presentation": "2;3;2;2", "wc_summary": "47;47;88;123", "wc_strengths": "33;27;75;21", "wc_weaknesses": "126;109;66;173", "wc_questions": "38;33;48;122", "wc_limitations": "1;1;1;22", "wc_review": "245;217;278;461", "wc_reply_reviewers": "0;0;23;36", "wc_reply_authors": "0;0;15;23", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 76.25, 31.75984099456419 ], "wc_strengths_avg": [ 39.0, 21.213203435596427 ], "wc_weaknesses_avg": [ 118.5, 38.31775045589185 ], "wc_questions_avg": [ 60.25, 36.05811281806079 ], "wc_limitations_avg": [ 6.25, 9.093266739736606 ], "wc_review_avg": [ 300.25, 95.28739423449463 ], "wc_reply_reviewers_avg": [ 14.75, 15.449514555480375 ], "wc_reply_authors_avg": [ 9.5, 9.912113800799505 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16984245300650710948&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "bupt.edu.cn;smu.edu.sg;bupt.edu.cn;bupt.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Singapore Management University", "aff_unique_dep": ";", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.smu.edu.sg", "aff_unique_abbr": "BUPT;SMU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Convex-Concave Zero-Sum Markov Stackelberg Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73046", "id": "0rEJx5QAxt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d2f6f1dfbf9cd89a78c5a58ef0dec245-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0rEJx5QAxt", "openreview": "https://openreview.net/forum?id=0rEJx5QAxt", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73046", "video": "https://nips.cc/virtual/2023/poster/73046", "author_site": "Denizalp Goktas, Arjun Prakash, Amy Greenwald", "tldr": "", "abstract": "Zero-sum Markov Stackelberg games can be used to model myriad problems, in domains ranging from economics to human robot interaction. In this paper, we develop policy gradient methods that solve these games in continuous state and action settings using noisy gradient estimates computed from observed trajectories of play. When the games are convex-concave, we prove that our algorithms converge to Stackelberg equilibrium in polynomial time. We also show that reach-avoid problems are naturally modeled as convex-concave zero-sum Markov Stackelberg games, and that Stackelberg equilibrium policies are more effective than their Nash counterparts in these problems.", "keywords": "Stackelberg games;Equilibrium Computation;Policy Gradient", "primary_area": "", "supplementary_material": "", "author": "Denizalp Goktas;Arjun Prakash;Amy Greenwald", "authorids": "~Denizalp_Goktas1;arjun_prakash@brown.edu;~Amy_Greenwald1", "gender": "M;;", "homepage": "https://www.denizalpgoktas.com/about/;;", "dblp": "297/4657;;", "google_scholar": "sokzE0sAAAAJ;;", "orcid": ";;", "linkedin": "denizalp-goktas/;;", "or_profile": "~Denizalp_Goktas1;arjun_prakash@brown.edu;~Amy_Greenwald1", "aff": "Brown University;;", "aff_domain": "brown.edu;;", "position": "PhD student;;", "bibtex": "@inproceedings{\ngoktas2023convexconcave,\ntitle={Convex-Concave Zero-Sum Stochastic Stackelberg Games},\nauthor={Denizalp Goktas and Arjun Prakash and Amy Greenwald},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0rEJx5QAxt}\n}", "github": "", "project": "", "reviewers": "sSzA;yZL6;5fZF;gXac", "pdf_size": 861064, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "36;43;52;130", "wc_strengths": "35;29;143;44", "wc_weaknesses": "316;169;72;38", "wc_questions": "225;2;170;37", "wc_limitations": "3;2;7;41", "wc_review": "615;245;444;290", "wc_reply_reviewers": "460;6;8;0", "wc_reply_authors": "963;0;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "3;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 65.25, 37.81120865563543 ], "wc_strengths_avg": [ 62.75, 46.63890543312525 ], "wc_weaknesses_avg": [ 148.75, 107.86420861434992 ], "wc_questions_avg": [ 108.5, 91.93611912627159 ], "wc_limitations_avg": [ 13.25, 16.13032857693854 ], "wc_review_avg": [ 398.5, 145.1525060066136 ], "wc_reply_reviewers_avg": [ 118.5, 197.18709389815552 ], "wc_reply_authors_avg": [ 240.75, 416.9912319222072 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6819050732928277126&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "brown.edu;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Brown University", "aff_unique_dep": "", "aff_unique_url": "https://www.brown.edu", "aff_unique_abbr": "Brown", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Transformer-based Planning for Symbolic Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73045", "id": "0rVXQEeFEL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8ffb4e3118280a66b192b6f06e0e2596-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0rVXQEeFEL", "openreview": "https://openreview.net/forum?id=0rVXQEeFEL", "poster": "/media/PosterPDFs/NeurIPS%202023/73045.png?t=1699219103.6478407", "slides": "https://nips.cc/virtual/2023/poster/73045", "video": "https://nips.cc/virtual/2023/poster/73045", "author_site": "Parshin Shojaee, Kazem Meidani, Amir Barati Farimani, Chandan Reddy", "tldr": "", "abstract": "Symbolic regression (SR) is a challenging task in machine learning that involves finding a mathematical expression for a function based on its values. Recent advancements in SR have demonstrated the effectiveness of pre-trained transformer models in generating equations as sequences, leveraging large-scale pre-training on synthetic datasets and offering notable advantages in terms of inference time over classical Genetic Programming (GP) methods. However, these models primarily rely on supervised pre-training objectives borrowed from text generation and overlook equation discovery goals like accuracy and complexity. To address this, we propose TPSR, a Transformer-based Planning strategy for Symbolic Regression that incorporates Monte Carlo Tree Search planning algorithm into the transformer decoding process. Unlike conventional decoding strategies, TPSR enables the integration of non-differentiable equation verification feedback, such as fitting accuracy and complexity, as external sources of knowledge into the transformer equation generation process. Extensive experiments on various datasets show that our approach outperforms state-of-the-art methods, enhancing the model's fitting-complexity trade-off, extrapolation abilities, and robustness to noise.", "keywords": "Symbolic Regression;Transformers;Planning;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/4025b899079abf2883447e3375cb57229609df8b.pdf", "author": "Parshin Shojaee;Kazem Meidani;Amir Barati Farimani;Chandan K. Reddy", "authorids": "~Parshin_Shojaee1;~Kazem_Meidani1;~Amir_Barati_Farimani2;~Chandan_K._Reddy1", "gender": "F;M;M;M", "homepage": "https://parshinsh.github.io/;https://mmeidani.github.io;https://sites.google.com/view/barati;https://creddy.net/", "dblp": "281/9859;277/0541;;42/1341", "google_scholar": "8k3qYv8AAAAJ;https://scholar.google.com/citations?hl=en;aH52nxkAAAAJ;LoXnMOIAAAAJ", "orcid": ";;0000-0002-2952-8576;", "linkedin": "parshinshojaee/;;amir-barati-farimani-a0b74169/;", "or_profile": "~Parshin_Shojaee1;~Kazem_Meidani1;~Amir_Barati_Farimani2;~Chandan_K._Reddy1", "aff": "Virginia Polytechnic Institute and State University;Carnegie Mellon University;Carnegie Mellon University;Amazon", "aff_domain": "vt.edu;cmu.edu;andrew.cmu.edu;amazon.com", "position": "PhD student;PhD student;Assistant Professor;Amazon Scholar", "bibtex": "@inproceedings{\nshojaee2023transformerbased,\ntitle={Transformer-based Planning for Symbolic Regression},\nauthor={Parshin Shojaee and Kazem Meidani and Amir Barati Farimani and Chandan K. Reddy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0rVXQEeFEL}\n}", "github": "", "project": "", "reviewers": "i1kS;ijKU;zZmW;QJmi;6B7y", "pdf_size": 9227216, "rating": "5;6;6;7;7", "confidence": "4;4;4;3;4", "soundness": "2;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;4", "wc_summary": "31;52;58;39;95", "wc_strengths": "45;48;76;63;102", "wc_weaknesses": "60;86;174;21;75", "wc_questions": "214;7;15;57;7", "wc_limitations": "1;1;20;37;7", "wc_review": "351;194;343;217;286", "wc_reply_reviewers": "17;31;0;0;0", "wc_reply_authors": "71;87;48;48;48", "reply_reviewers": "1;1;0;0;0", "reply_authors": "3;3;2;2;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 55.0, 22.135943621178654 ], "wc_strengths_avg": [ 66.8, 20.81730049742281 ], "wc_weaknesses_avg": [ 83.2, 50.45156092728945 ], "wc_questions_avg": [ 60.0, 79.20606037419107 ], "wc_limitations_avg": [ 13.2, 13.775340286178052 ], "wc_review_avg": [ 278.2, 63.86673625605116 ], "wc_reply_reviewers_avg": [ 9.6, 12.563439019631527 ], "wc_reply_authors_avg": [ 60.4, 16.007498243010993 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5345224838248487, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=722321252504795373&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "vt.edu;cmu.edu;andrew.cmu.edu;amazon.com", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Virginia Tech;Carnegie Mellon University;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.vt.edu;https://www.cmu.edu;https://www.amazon.com", "aff_unique_abbr": "VT;CMU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Collaborative Score Distillation for Consistent Visual Editing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73044", "id": "0tEjORCGFD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e7fd2c0a1a6f956c94024e955b34cc43-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0tEjORCGFD", "openreview": "https://openreview.net/forum?id=0tEjORCGFD", "poster": "/media/PosterPDFs/NeurIPS%202023/73044.png?t=1702224888.4124508", "slides": "https://nips.cc/virtual/2023/poster/73044", "video": "https://nips.cc/virtual/2023/poster/73044", "author_site": "Subin Kim, Kyungmin Lee, June Suk Choi, Jongheon Jeong, Kihyuk Sohn, Jinwoo Shin", "tldr": "", "abstract": "Generative priors of large-scale text-to-image diffusion models enable a wide range of new generation and editing applications on diverse visual modalities. However, when adapting these priors to complex visual modalities, often represented as multiple images (e.g., video or 3D scene), achieving consistency across a set of images is challenging. In this paper, we address this challenge with a novel method, Collaborative Score Distillation (CSD). CSD is based on the Stein Variational Gradient Descent (SVGD). Specifically, we propose to consider multiple samples as \u201cparticles\u201d in the SVGD update and combine their score functions to distill generative priors over a set of images synchronously. Thus, CSD facilitates the seamless integration of information across 2D images, leading to a consistent visual synthesis across multiple samples. We show the effectiveness of CSD in a variety of editing tasks, encompassing the visual editing of panorama images, videos, and 3D scenes. Our results underline the competency of CSD as a versatile method for enhancing inter-sample consistency, thereby broadening the applicability of text-to-image diffusion models.", "keywords": "Score Distillation Sampling;Diffusion model;Editing", "primary_area": "", "supplementary_material": "/attachment/f0fe3899d81c102201a6ff094e80518fa1278c51.zip", "author": "Subin Kim;Kyungmin Lee;June Suk Choi;Jongheon Jeong;Kihyuk Sohn;Jinwoo Shin", "authorids": "~Subin_Kim2;~Kyungmin_Lee1;~June_Suk_Choi1;~Jongheon_Jeong1;~Kihyuk_Sohn1;~Jinwoo_Shin1", "gender": "F;M;M;M;M;M", "homepage": "https://subin-kim-cv.github.io/;https://kyungmnlee.github.io/;https://choi403.github.io/;https://jh-jeong.github.io;https://sites.google.com/site/kihyuksml/;https://sites.google.com/site/mijirim/", "dblp": "183/9520-1.html;57/5118;;241/5923;53/10771;31/7062", "google_scholar": "https://scholar.google.co.kr/citations?user=gdhIzYUAAAAJ;6dpime0AAAAJ;;mZB2qfcAAAAJ;VxpypngAAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ", "orcid": ";;;0000-0002-4058-5774;;", "linkedin": ";;william-june-suk-choi-b03158350/;jongheonj/;;", "or_profile": "~Subin_Kim2;~Kyungmin_Lee1;~June_Suk_Choi1;~Jongheon_Jeong1;~Kihyuk_Sohn1;~Jinwoo_Shin1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Google;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.edu;kaist.ac.kr;google.com;kaist.ac.kr", "position": "PhD student;PhD student;Undergrad student;PhD student;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nkim2023collaborative,\ntitle={Collaborative Score Distillation for Consistent Visual Editing},\nauthor={Subin Kim and Kyungmin Lee and June Suk Choi and Jongheon Jeong and Kihyuk Sohn and Jinwoo Shin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0tEjORCGFD}\n}", "github": "", "project": "", "reviewers": "yGH2;j3eS;qUzm;v8pz", "pdf_size": 29687692, "rating": "4;5;6;8", "confidence": "4;4;4;3", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;3", "wc_summary": "84;58;85;109", "wc_strengths": "57;83;100;138", "wc_weaknesses": "288;175;133;66", "wc_questions": "96;67;162;79", "wc_limitations": "30;15;50;1", "wc_review": "555;398;530;393", "wc_reply_reviewers": "156;58;45;155", "wc_reply_authors": "340;59;59;372", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.0, 18.041618552668716 ], "wc_strengths_avg": [ 94.5, 29.415132160165456 ], "wc_weaknesses_avg": [ 165.5, 80.70470866064755 ], "wc_questions_avg": [ 101.0, 36.694686263817545 ], "wc_limitations_avg": [ 24.0, 18.179658962697843 ], "wc_review_avg": [ 469.0, 74.05065833603372 ], "wc_reply_reviewers_avg": [ 103.5, 52.203927055347094 ], "wc_reply_authors_avg": [ 207.5, 148.93035284991439 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8783100656536799, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11453761217549152548&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "kaist.ac.kr;kaist.ac.kr;kaist.edu;kaist.ac.kr;google.com;kaist.ac.kr", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.kaist.ac.kr;https://www.google.com", "aff_unique_abbr": "KAIST;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Two-Stage Predict+Optimize for MILPs with Unknown Parameters in Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73043", "id": "0tnhFpyWjb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2e14be0332c04c76742710e417cedb2a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0tnhFpyWjb", "openreview": "https://openreview.net/forum?id=0tnhFpyWjb", "poster": "/media/PosterPDFs/NeurIPS%202023/73043.png?t=1702302943.1854053", "slides": "https://nips.cc/virtual/2023/poster/73043", "video": "https://nips.cc/virtual/2023/poster/73043", "author_site": "Xinyi Hu, Jasper Lee, Jimmy Lee", "tldr": "", "abstract": "Consider the setting of constrained optimization, with some parameters unknown at solving time and requiring prediction from relevant features. Predict+Optimize is a recent framework for end-to-end training supervised learning models for such predictions, incorporating information about the optimization problem in the training process in order to yield better predictions in terms of the quality of the predicted solution under the true parameters. Almost all prior works have focused on the special case where the unknowns appear only in the optimization objective and not the constraints. Hu et al. proposed the first adaptation of Predict+Optimize to handle unknowns appearing in constraints, but the framework has somewhat ad-hoc elements, and they provided a training algorithm only for covering and packing linear programs. In this work, we give a new simpler and more powerful framework called Two-Stage Predict+Optimize, which we believe should be the canonical framework for the Predict+Optimize setting. We also give a training algorithm usable for all mixed integer linear programs, vastly generalizing the applicability of the framework. Experimental results demonstrate the superior prediction performance of our training framework over all classical and state-of-the-art methods.", "keywords": "Constraint optimization;Predict+Optimize", "primary_area": "", "supplementary_material": "/attachment/9652c45d05552fb0a580d6151f5cda963cc35fac.zip", "author": "Xinyi HU;Jasper C.H. Lee;Jimmy H.M. Lee", "authorids": "~Xinyi_HU2;~Jasper_C.H._Lee1;~Jimmy_H.M._Lee1", "gender": "Not Specified;M;M", "homepage": "https://elizabethxyhu.github.io/;https://jasperchlee.github.io/;http://www.cse.cuhk.edu.hk/~jlee", "dblp": ";150/4950;l/JimmyHoManLee", "google_scholar": "hANa7zAAAAAJ;z0Y4snAAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-9526-5850", "linkedin": ";;", "or_profile": "~Xinyi_HU2;~Jasper_C.H._Lee1;~Jimmy_H.M._Lee1", "aff": "Department of Computer Science and Engineering;University of Wisconsin - Madison;The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;wisc.edu;cse.cuhk.edu.hk", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nhu2023twostage,\ntitle={Two-Stage Predict+Optimize for {MILP}s with Unknown Parameters in Constraints},\nauthor={Xinyi HU and Jasper C.H. Lee and Jimmy H.M. Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0tnhFpyWjb}\n}", "github": "", "project": "", "reviewers": "FnYQ;Jj2a;Pw5Y;Xm8w", "pdf_size": 329949, "rating": "4;7;7;7", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "93;295;67;205", "wc_strengths": "104;113;83;46", "wc_weaknesses": "619;386;206;221", "wc_questions": "39;185;86;92", "wc_limitations": "7;20;28;4", "wc_review": "862;999;470;568", "wc_reply_reviewers": "189;375;89;433", "wc_reply_authors": "493;185;36;559", "reply_reviewers": "2;1;1;2", "reply_authors": "3;2;2;3", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 165.0, 91.22499657440389 ], "wc_strengths_avg": [ 86.5, 25.79244075305786 ], "wc_weaknesses_avg": [ 358.0, 166.41664580203508 ], "wc_questions_avg": [ 100.5, 52.92683629313205 ], "wc_limitations_avg": [ 14.75, 9.730750228014282 ], "wc_review_avg": [ 724.75, 214.19544229511513 ], "wc_reply_reviewers_avg": [ 271.5, 138.66055675641866 ], "wc_reply_authors_avg": [ 318.25, 215.5914829022705 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13759918065827483189&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "cse.cuhk.edu.hk;wisc.edu;cse.cuhk.edu.hk", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, San Diego;University of Wisconsin-Madison;Chinese University of Hong Kong", "aff_unique_dep": "Department of Computer Science and Engineering;;", "aff_unique_url": "https://cse.ucsd.edu;https://www.wisc.edu;https://www.cuhk.edu.hk", "aff_unique_abbr": "UCSD CSE;UW-Madison;CUHK", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Madison;Hong Kong SAR", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;China" }, { "title": "The Adversarial Consistency of Surrogate Risks for Binary Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73042", "id": "0uARg5G04K", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/81858558b55a8c63763cfe088090242a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0uARg5G04K", "openreview": "https://openreview.net/forum?id=0uARg5G04K", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73042", "video": "https://nips.cc/virtual/2023/poster/73042", "author_site": "Natalie Frank, Jonathan Niles-Weed", "tldr": "", "abstract": "We study the consistency of surrogate risks for robust binary classification.\n\tIt is common to learn robust classifiers by adversarial training, which seeks to minimize the expected $0$-$1$ loss when each example can be maliciously corrupted within a small ball.\n\tWe give a simple and complete characterization of the set of surrogate loss functions that are \\emph{consistent}, i.e., that can replace the $0$-$1$ loss without affecting the minimizing sequences of the original adversarial risk, for any data distribution.\n\tWe also prove a quantitative version of adversarial consistency for the $\\rho$-margin loss.\n\tOur results reveal that the class of adversarially consistent surrogates is substantially smaller than in the standard setting, where many common surrogates are known to be consistent.", "keywords": "Adversarial learning;surrogate risks;optimal transport", "primary_area": "", "supplementary_material": "/attachment/aeaaaaef0f71f2ca34a8def89e7add0fb1bdf57a.pdf", "author": "Natalie Frank;Jonathan Niles-Weed", "authorids": "~Natalie_Frank1;~Jonathan_Niles-Weed1", "gender": "F;M", "homepage": "https://natalie-frank.github.io/;http://jonathannilesweed.com", "dblp": "263/9872;160/8992", "google_scholar": "https://scholar.google.com/citations?hl=en;", "orcid": "0009-0007-5582-4487;", "linkedin": ";", "or_profile": "~Natalie_Frank1;~Jonathan_Weed1", "aff": "New York University;New York University", "aff_domain": "nyu.edu;nyu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nfrank2023the,\ntitle={The Adversarial Consistency of Surrogate Risks for Binary Classification},\nauthor={Natalie Frank and Jonathan Niles-Weed},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0uARg5G04K}\n}", "github": "", "project": "", "reviewers": "epdC;PJjd;6vgU;QDX6", "pdf_size": 322967, "rating": "6;6;7;8", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "4;4;3;3", "presentation": "2;2;3;3", "wc_summary": "72;60;31;97", "wc_strengths": "46;62;91;154", "wc_weaknesses": "101;108;40;25", "wc_questions": "8;86;42;141", "wc_limitations": "2;4;1;32", "wc_review": "229;320;205;449", "wc_reply_reviewers": "35;17;86;21", "wc_reply_authors": "0;0;64;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 65.0, 23.73815494093844 ], "wc_strengths_avg": [ 88.25, 41.245454294988676 ], "wc_weaknesses_avg": [ 68.5, 36.47259244967377 ], "wc_questions_avg": [ 69.25, 49.806500579743606 ], "wc_limitations_avg": [ 9.75, 12.891373084353738 ], "wc_review_avg": [ 300.75, 95.74020837662721 ], "wc_reply_reviewers_avg": [ 39.75, 27.52612395525385 ], "wc_reply_authors_avg": [ 16.0, 27.712812921102035 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8879046047474005772&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "email": "nyu.edu;nyu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Lov\u00e1sz Principle for Unsupervised Graph Representation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73041", "id": "0vdEHDwamk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b61da4f02b271cb7b5e3d538e2b78fb9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0vdEHDwamk", "openreview": "https://openreview.net/forum?id=0vdEHDwamk", "poster": "/media/PosterPDFs/NeurIPS%202023/73041.png?t=1698336296.573895", "slides": "https://nips.cc/virtual/2023/poster/73041", "video": "https://nips.cc/virtual/2023/poster/73041", "author_site": "Ziheng Sun, Chris Ding, Jicong Fan", "tldr": "", "abstract": "This paper focuses on graph-level representation learning that aims to represent graphs as vectors that can be directly utilized in downstream tasks such as graph classification. \nWe propose a novel graph-level representation learning principle called Lov\u00e1sz principle, which is motivated by the Lov\u00e1sz number in graph theory. The Lov\u00e1sz number of a graph is a real number that is an upper bound for graph Shannon capacity and is strongly connected with various global characteristics of the graph. Specifically, we show that the handle vector for computing the Lov\u00e1sz number is potentially a suitable choice for graph representation, as it captures a graph's global properties, though a direct application of the handle vector is difficult and problematic. We propose to use neural networks to address the problems and hence provide the Lov\u00e1sz principle. Moreover, we propose an enhanced Lov\u00e1sz principle that is able to exploit the subgraph Lov\u00e1sz numbers directly and efficiently. The experiments demonstrate that our Lov\u00e1sz principles achieve competitive performance compared to the baselines in unsupervised and semi-supervised graph-level representation learning tasks. The code of our Lov\u00e1sz principles is publicly available on GitHub.", "keywords": "Lov\u00e1sz Number;graph-level representation learning;unsupervised learning;semi-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Ziheng Sun;Chris Ding;Jicong Fan", "authorids": "~Ziheng_Sun1;~Chris_Ding1;~Jicong_Fan2", "gender": "M;M;M", "homepage": ";http://ranger.uta.edu/~chqding/;https://jicongfan.github.io/", "dblp": ";https://dblp.uni-trier.de/pers/hd/d/Ding:Chris;139/1570", "google_scholar": "https://scholar.google.com/citations?hl=en;q7FfnjgAAAAJ;vdJsnhIAAAAJ", "orcid": ";;0000-0001-9665-0355", "linkedin": ";;", "or_profile": "~Ziheng_Sun1;~Chris_Ding1;~Jicong_Fan2", "aff": "Chinese University of HongKong;University of Texas at Arlington;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;cse.uta.edu;cuhk.edu.cn", "position": "PhD student;Professor;Research Assistant Professor", "bibtex": "@inproceedings{\nsun2023lovsz,\ntitle={Lov\\'asz Principle for Unsupervised Graph Representation Learning},\nauthor={Ziheng Sun and Chris Ding and Jicong Fan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0vdEHDwamk}\n}", "github": "", "project": "", "reviewers": "JpeL;JWhm;1BFQ;2QoV;4DKt", "pdf_size": 1428644, "rating": "3;5;6;7;7", "confidence": "4;4;3;4;4", "soundness": "3;2;3;4;3", "novelty": "2;3;2;3;3", "presentation": "3;3;2;4;3", "wc_summary": "108;211;54;63;151", "wc_strengths": "74;168;24;97;48", "wc_weaknesses": "189;588;237;182;34", "wc_questions": "155;409;45;159;163", "wc_limitations": "1;37;1;45;2", "wc_review": "527;1413;361;546;398", "wc_reply_reviewers": "61;230;11;105;19", "wc_reply_authors": "20;1267;11;30;19", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 5.6, 1.4966629547095764 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 117.4, 58.23607129606186 ], "wc_strengths_avg": [ 82.2, 49.40607250126243 ], "wc_weaknesses_avg": [ 246.0, 184.02934548598492 ], "wc_questions_avg": [ 186.2, 119.85724842494925 ], "wc_limitations_avg": [ 17.2, 19.6 ], "wc_review_avg": [ 649.0, 388.62166692041245 ], "wc_reply_reviewers_avg": [ 85.2, 79.79072627818348 ], "wc_reply_authors_avg": [ 269.4, 498.8364862357203 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.13363062095621223, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4899687870028405986&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cuhk.edu.cn;cse.uta.edu;cuhk.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;University of Texas at Arlington", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.uta.edu", "aff_unique_abbr": "CUHK;UTA", "aff_campus_unique_index": "0;1;2", "aff_campus_unique": "Hong Kong SAR;Arlington;Shenzhen", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "On the Convergence of No-Regret Learning Dynamics in Time-Varying Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73040", "id": "0x2Ou3xHbH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/34f1c2e7ab91b6fa481ad0286a08ad02-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0x2Ou3xHbH", "openreview": "https://openreview.net/forum?id=0x2Ou3xHbH", "poster": "/media/PosterPDFs/NeurIPS%202023/73040.png?t=1701645552.4161701", "slides": "https://nips.cc/virtual/2023/poster/73040", "video": "https://nips.cc/virtual/2023/poster/73040", "author_site": "Ioannis Anagnostides, Ioannis Panageas, Gabriele Farina, Tuomas Sandholm", "tldr": "", "abstract": "Most of the literature on learning in games has focused on the restrictive setting where the underlying repeated game does not change over time. Much less is known about the convergence of no-regret learning algorithms in dynamic multiagent settings. In this paper, we characterize the convergence of optimistic gradient descent (OGD) in time-varying games. Our framework yields sharp convergence bounds for the equilibrium gap of OGD in zero-sum games parameterized on natural variation measures of the sequence of games, subsuming known results for static games. Furthermore, we establish improved second-order variation bounds under strong convexity-concavity, as long as each game is repeated multiple times. Our results also apply to time-varying general-sum multi-player games via a bilinear formulation of correlated equilibria, which has novel implications for meta-learning and for obtaining refined variation-dependent regret bounds, addressing questions left open in prior papers. Finally, we leverage our framework to also provide new insights on dynamic regret guarantees in static games.", "keywords": "no-regret learning;optimistic gradient descent;time-varying games;dynamic regret", "primary_area": "", "supplementary_material": "", "author": "Ioannis Anagnostides;Ioannis Panageas;Gabriele Farina;Tuomas Sandholm", "authorids": "~Ioannis_Anagnostides1;~Ioannis_Panageas1;~Gabriele_Farina1;~Tuomas_Sandholm1", "gender": "M;M;M;M", "homepage": ";https://panageas.github.io;http://www.cs.cmu.edu/~gfarina/about/;http://www.cs.cmu.edu/~sandholm", "dblp": "273/7648;139/3829;;s/TuomasSandholm", "google_scholar": "QVwDo_sAAAAJ;5NiFWuwAAAAJ;sktDNcEAAAAJ;0DpK1EMAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ioannis_Anagnostides1;~Ioannis_Panageas1;~Gabriele_Farina1;~Tuomas_Sandholm1", "aff": "Carnegie Mellon University;Donald Bren School of Information and Computer Sciences, University of California, Irvine;FAIR, Meta AI;Carnegie Mellon University", "aff_domain": "cmu.edu;ics.uci.edu;meta.com;cmu.edu", "position": "PhD student;Assistant Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nanagnostides2023on,\ntitle={On the Convergence of No-Regret Learning Dynamics in Time-Varying Games},\nauthor={Ioannis Anagnostides and Ioannis Panageas and Gabriele Farina and Tuomas Sandholm},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0x2Ou3xHbH}\n}", "github": "", "project": "", "reviewers": "3wR1;PH29;jL2o;PhTp", "pdf_size": 776238, "rating": "4;6;7;7", "confidence": "4;3;3;3", "soundness": "3;3;4;4", "novelty": "2;3;3;4", "presentation": "3;2;3;4", "wc_summary": "146;174;242;111", "wc_strengths": "50;90;163;86", "wc_weaknesses": "234;198;142;45", "wc_questions": "104;23;163;4", "wc_limitations": "3;1;1;1", "wc_review": "537;486;711;247", "wc_reply_reviewers": "294;10;17;13", "wc_reply_authors": "203;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 168.25, 48.07481149209012 ], "wc_strengths_avg": [ 97.25, 41.03276130118469 ], "wc_weaknesses_avg": [ 154.75, 71.34204580750401 ], "wc_questions_avg": [ 73.5, 63.87683461161801 ], "wc_limitations_avg": [ 1.5, 0.8660254037844386 ], "wc_review_avg": [ 495.25, 165.83481992633514 ], "wc_reply_reviewers_avg": [ 83.5, 121.55759951562058 ], "wc_reply_authors_avg": [ 50.75, 87.90157848412052 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9428090415820632, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9002603818544674398&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cmu.edu;ics.uci.edu;meta.com;cmu.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Carnegie Mellon University;University of California, Irvine;Meta", "aff_unique_dep": ";Donald Bren School of Information and Computer Sciences;Meta AI", "aff_unique_url": "https://www.cmu.edu;https://www.uci.edu;https://meta.ai", "aff_unique_abbr": "CMU;UCI;Meta AI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Irvine", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Fine-Grained Theoretical Analysis of Federated Zeroth-Order Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73039", "id": "0ycX03sMAT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aaa973f65b98c96e5f850d706464a3c4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0ycX03sMAT", "openreview": "https://openreview.net/forum?id=0ycX03sMAT", "poster": "/media/PosterPDFs/NeurIPS%202023/73039.png?t=1699537504.491326", "slides": "https://nips.cc/virtual/2023/poster/73039", "video": "https://nips.cc/virtual/2023/poster/73039", "author_site": "Jun Chen, Hong Chen, Bin Gu, Hao Deng", "tldr": "", "abstract": "Federated zeroth-order optimization (FedZO) algorithm enjoys the advantages of both zeroth-order optimization and federated learning, and has shown exceptional performance on black-box attack and softmax regression tasks. However, there is no generalization analysis for FedZO, and its analysis on computing convergence rate is slower than the corresponding first-order optimization setting. This paper aims to establish systematic theoretical assessments of FedZO by developing the analysis technique of on-average model stability. We establish the first generalization error bound of FedZO under the Lipschitz continuity and smoothness conditions. Then, refined generalization and optimization bounds are provided by replacing bounded gradient with heavy-tailed gradient noise and utilizing the second-order Taylor expansion for gradient approximation. With the help of a new error decomposition strategy, our theoretical analysis is also extended to the asynchronous case. For FedZO, our fine-grained analysis fills the theoretical gap on the generalization guarantees and polishes the convergence characterization of the computing algorithm.", "keywords": "Federated zeroth-order optimization;stability analysis;theoretical guarantee;non-convex optimization;sub-Weibull distribution", "primary_area": "", "supplementary_material": "/attachment/bb473f362784664d451167582f90c6101e527055.pdf", "author": "Jun Chen;Hong Chen;Bin Gu;Hao Deng", "authorids": "~Jun_Chen12;~Hong_Chen1;~Bin_Gu1;~Hao_Deng5", "gender": "M;;M;M", "homepage": "https://www.researchgate.net/profile/Jun-Chen-256;https://chenhongml.github.io/;https://mbzuai.ac.ae/study/faculty/bin-gu/;https://www.researchgate.net/profile/Hao-Deng-17", "dblp": ";https://dblp.uni-trier.de/pers/hd/c/Chen_0004:Hong;29/1758-1;", "google_scholar": "GkZkdRYAAAAJ;;Vo8OgCgAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-9810-5171;;0000-0001-6049-1815;0000-0003-3841-958X", "linkedin": "%E5%90%9B-%E9%99%88-4a8823276/;;;", "or_profile": "~Jun_Chen12;~Hong_Chen1;~Bin_Gu1;~Hao_Deng5", "aff": "Huazhong Agricultural University;Huazhong Agricultural University;Mohamed bin Zayed University of Artificial Intelligence;Huazhong Agricultural University", "aff_domain": "hzau.edu.cn;hzau.edu.cn;mbzuai.ac.ae;hzau.edu.cn", "position": "PhD student;Full Professor;Assistant Professor;Lecturer", "bibtex": "@inproceedings{\nchen2023finegrained,\ntitle={Fine-Grained Theoretical Analysis of Federated Zeroth-Order Optimization},\nauthor={Jun Chen and Hong Chen and Bin Gu and Hao Deng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0ycX03sMAT}\n}", "github": "", "project": "", "reviewers": "YCFd;xnUD;obsy;oYUF;QoKb;hpvH;E5dV", "pdf_size": 300421, "rating": "5;5;6;6;6;7;8", "confidence": "2;3;3;3;3;3;3", "soundness": "3;2;3;3;3;3;3", "novelty": "3;2;3;3;3;3;3", "presentation": "3;2;3;3;3;3;3", "wc_summary": "33;51;10;103;82;51;61", "wc_strengths": "21;44;20;90;59;25;81", "wc_weaknesses": "52;188;34;73;81;177;25", "wc_questions": "53;99;157;125;47;2;56", "wc_limitations": "9;8;1;1;1;7;11", "wc_review": "168;390;222;392;270;262;234", "wc_reply_reviewers": "9;53;53;0;16;6;17", "wc_reply_authors": "10;52;10;0;10;10;13", "reply_reviewers": "1;1;1;0;1;1;1", "reply_authors": "2;2;2;1;2;2;2", "rating_avg": [ 6.142857142857143, 0.989743318610787 ], "confidence_avg": [ 2.857142857142857, 0.34992710611188266 ], "soundness_avg": [ 2.857142857142857, 0.34992710611188266 ], "novelty_avg": [ 2.857142857142857, 0.34992710611188266 ], "presentation_avg": [ 2.857142857142857, 0.34992710611188266 ], "wc_summary_avg": [ 55.857142857142854, 28.296534726906412 ], "wc_strengths_avg": [ 48.57142857142857, 26.805421321906095 ], "wc_weaknesses_avg": [ 90.0, 61.35144660071187 ], "wc_questions_avg": [ 77.0, 48.88762624632127 ], "wc_limitations_avg": [ 5.428571428571429, 3.994894701174161 ], "wc_review_avg": [ 276.85714285714283, 78.39720944638012 ], "wc_reply_reviewers_avg": [ 22.0, 20.325915899237042 ], "wc_reply_authors_avg": [ 15.0, 15.57470476675011 ], "reply_reviewers_avg": [ 0.8571428571428571, 0.3499271061118826 ], "reply_authors_avg": [ 1.8571428571428572, 0.3499271061118826 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.47140452079103157, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14841458554763808415&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "hzau.edu.cn;hzau.edu.cn;mbzuai.ac.ae;hzau.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Huazhong Agricultural University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "http://www.hzau.edu.cn/;https://mbzuai.ac.ae", "aff_unique_abbr": "HAU;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United Arab Emirates" }, { "title": "Accelerating Monte Carlo Tree Search with Probability Tree State Abstraction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73038", "id": "0zeLTZAqaJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bf89c9fcd0ef605571a03666f6a6a44d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0zeLTZAqaJ", "openreview": "https://openreview.net/forum?id=0zeLTZAqaJ", "poster": "/media/PosterPDFs/NeurIPS%202023/73038.png?t=1698906385.5580587", "slides": "https://nips.cc/virtual/2023/poster/73038", "video": "https://nips.cc/virtual/2023/poster/73038", "author_site": "Yangqing Fu, Ming Sun, Buqing Nie, Yue Gao", "tldr": "", "abstract": "Monte Carlo Tree Search (MCTS) algorithms such as AlphaGo and MuZero have achieved superhuman performance in many challenging tasks. However, the computational complexity of MCTS-based algorithms is influenced by the size of the search space. To address this issue, we propose a novel probability tree state abstraction (PTSA) algorithm to improve the search efficiency of MCTS. A general tree state abstraction with path transitivity is defined. In addition, the probability tree state abstraction is proposed for fewer mistakes during the aggregation step. Furthermore, the theoretical guarantees of the transitivity and aggregation error bound are justified. To evaluate the effectiveness of the PTSA algorithm, we integrate it with state-of-the-art MCTS-based algorithms, such as Sampled MuZero and Gumbel MuZero. Experimental results on different tasks demonstrate that our method can accelerate the training process of state-of-the-art algorithms with 10%-45% search space reduction.", "keywords": "reinforcement learning;mento carlo tree search;state abstraction", "primary_area": "", "supplementary_material": "/attachment/73db9d7ee0ad2109ea415ba26b5a972462066f90.pdf", "author": "Yangqing Fu;Ming Sun;Buqing Nie;Yue Gao", "authorids": "~Yangqing_Fu1;~Ming_Sun7;~Buqing_Nie1;~Yue_Gao8", "gender": "M;M;M;F", "homepage": ";;;https://gaoyue.sjtu.edu.cn/", "dblp": "310/4032;39/1471;291/4785.html;", "google_scholar": "9uLitNAAAAAJ;;SqBTdgIAAAAJ;jlweMD8AAAAJ", "orcid": ";0000-0002-3214-2332;;", "linkedin": ";;;", "or_profile": "~Yangqing_Fu1;~Ming_Sun7;~Buqing_Nie1;~Yue_Gao8", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nfu2023accelerating,\ntitle={Accelerating Monte Carlo Tree Search with Probability Tree State Abstraction},\nauthor={Yangqing Fu and Ming Sun and Buqing Nie and Yue Gao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=0zeLTZAqaJ}\n}", "github": "", "project": "", "reviewers": "GJR1;Yoim;Bu9r;GnyW;CrLf", "pdf_size": 0, "rating": "4;7;7;7;7", "confidence": "5;4;4;2;3", "soundness": "3;3;2;3;3", "novelty": "2;4;2;3;3", "presentation": "3;3;3;2;1", "wc_summary": "57;134;41;112;180", "wc_strengths": "24;116;28;183;109", "wc_weaknesses": "120;47;195;205;287", "wc_questions": "45;231;314;209;24", "wc_limitations": "4;1;9;1;32", "wc_review": "250;529;587;710;632", "wc_reply_reviewers": "243;0;81;29;0", "wc_reply_authors": "695;0;79;0;0", "reply_reviewers": "2;0;2;1;0", "reply_authors": "3;1;2;1;1", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 104.8, 50.822829515878 ], "wc_strengths_avg": [ 92.0, 59.77624946414755 ], "wc_weaknesses_avg": [ 170.8, 81.44298619279624 ], "wc_questions_avg": [ 164.6, 112.04748993172493 ], "wc_limitations_avg": [ 9.4, 11.672189169131899 ], "wc_review_avg": [ 541.6, 157.34115799751825 ], "wc_reply_reviewers_avg": [ 70.6, 91.13638131942699 ], "wc_reply_authors_avg": [ 154.8, 271.8274452662939 ], "reply_reviewers_avg": [ 1.0, 0.8944271909999159 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6864064729836441, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10325482672579563362&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Decoding the Enigma: Benchmarking Humans and AIs on the Many Facets of Working Memory", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73720", "id": "10R4Fg1aA0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ea8758dbe6cc5e6e1764c009acb4c31e-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=10R4Fg1aA0", "openreview": "https://openreview.net/forum?id=10R4Fg1aA0", "poster": "/media/PosterPDFs/NeurIPS%202023/73720.png?t=1699545858.5788987", "slides": "https://nips.cc/virtual/2023/poster/73720", "video": "https://nips.cc/virtual/2023/poster/73720", "author_site": "Ankur Sikarwar, Mengmi Zhang", "tldr": "", "abstract": "Working memory (WM), a fundamental cognitive process facilitating the temporary storage, integration, manipulation, and retrieval of information, plays a vital role in reasoning and decision-making tasks. Robust benchmark datasets that capture the multifaceted nature of WM are crucial for the effective development and evaluation of AI WM models. Here, we introduce a comprehensive Working Memory (WorM) benchmark dataset for this purpose. WorM comprises 10 tasks and a total of 1 million trials, assessing 4 functionalities, 3 domains, and 11 behavioral and neural characteristics of WM. We jointly trained and tested state-of-the-art recurrent neural networks and transformers on all these tasks. We also include human behavioral benchmarks as an upper bound for comparison. Our results suggest that AI models replicate some characteristics of WM in the brain, most notably primacy and recency effects, and neural clusters and correlates specialized for different domains and functionalities of WM. In the experiments, we also reveal some limitations in existing models to approximate human behavior. This dataset serves as a valuable resource for communities in cognitive psychology, neuroscience, and AI, offering a standardized framework to compare and enhance WM models, investigate WM's neural underpinnings, and develop WM models with human-like capabilities. Our source code and data are available at: https://github.com/ZhangLab-DeepNeuroCogLab/WorM", "keywords": "working memory;neuroscience;cognitive science;RNNs;transformers;behavioral;neural;benchmark", "primary_area": "", "supplementary_material": "", "author": "Ankur Sikarwar;Mengmi Zhang", "authorids": "~Ankur_Sikarwar1;~Mengmi_Zhang1", "gender": "M;F", "homepage": ";https://a0091624.wixsite.com/deepneurocognition-1", "dblp": "311/4104;160/7116", "google_scholar": "eWDiT_wAAAAJ;https://scholar.google.com.sg/citations?user=G2sVOhcAAAAJ", "orcid": ";0000-0002-2694-7097", "linkedin": "sikarwar99/;", "or_profile": "~Ankur_Sikarwar1;~Mengmi_Zhang1", "aff": "I2R, A*STAR;A*STAR", "aff_domain": "astar.edu.sg;astar.edu.sg", "position": "Researcher;Principal Researcher", "bibtex": "@inproceedings{\nsikarwar2023decoding,\ntitle={Decoding the Enigma: Benchmarking Humans and {AI}s on the Many Facets of Working Memory},\nauthor={Ankur Sikarwar and Mengmi Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=10R4Fg1aA0}\n}", "github": "", "project": "", "reviewers": "AjzL;1qDZ;9dCi;E3J7;Unmh", "pdf_size": 10954324, "rating": "4;6;7;8;8", "confidence": "3;3;5;4;2", "wc_summary_and_contributions": "154;89;124;41;112", "wc_strengths": "16;49;92;80;146", "wc_improvement": "83;66;190;165;133", "wc_limitations": "49;82;4;46;1", "wc_correctness": "29;8;6;1;1", "wc_clarity": "25;42;1;89;1", "wc_relation_to_prior_work": "61;8;7;78;1", "wc_documentation": "43;22;36;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "461;367;461;502;397", "wc_reply_reviewers": "0;106;234;83;27", "wc_reply_authors": "2362;1614;1494;1793;864", "reply_reviewers": "0;1;1;1;1", "reply_authors": "5;5;5;4;3", "rating_avg": [ 6.6, 1.4966629547095764 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "wc_summary_and_contributions_avg": [ 104.0, 37.836490323495916 ], "wc_strengths_avg": [ 76.6, 43.58715407089571 ], "wc_improvement_avg": [ 127.4, 47.12791105067145 ], "wc_limitations_avg": [ 36.4, 30.440762145517972 ], "wc_correctness_avg": [ 9.0, 10.373041983911952 ], "wc_clarity_avg": [ 31.6, 32.61656021103391 ], "wc_relation_to_prior_work_avg": [ 31.0, 31.981244503614928 ], "wc_documentation_avg": [ 20.6, 17.3735431043872 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 437.6, 48.7343821136577 ], "wc_reply_reviewers_avg": [ 90.0, 81.37567204023571 ], "wc_reply_authors_avg": [ 1625.4, 483.2794636646586 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 4.4, 0.8 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.1048284836721918, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3888650057758566198&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "astar.edu.sg;astar.edu.sg", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "A*STAR;Agency for Science, Technology and Research", "aff_unique_dep": "Institute for Infocomm Research;", "aff_unique_url": "https://www.a-star.edu.sg;https://www.a-star.edu.sg", "aff_unique_abbr": "A*STAR;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "id": "14R8QBKzFH", "title": "Tight Bounds for Machine Unlearning via Differential Privacy", "track": "main", "status": "Reject", "tldr": "", "abstract": "We consider the formulation of \"machine unlearning\" of Sekhari, Acharya, Kamath, and Suresh (NeurIPS 2021), which formalizes the so-called \"right to be forgotten\" by requiring that a trained model, upon request, should be able to 'unlearn' a number of points from the training data, as if they had never been included in the first place. Sekhari et al. established some positive and negative results about the number of data points that can be successfully unlearnt by a trained model without impacting the model's accuracy (the \"deletion capacity\"), showing that machine unlearning could be achieved by using differentially private (DP) algorithms. However, their results left open a gap between upper and lower bounds on the deletion capacity of these algorithms: our work fully closes this gap, obtaining tight bounds on the deletion capacity achievable by DP-based machine unlearning algorithms.", "keywords": "machine unlearning;differential privacy;privacy", "primary_area": "", "supplementary_material": "/attachment/9fcebe79bfe3a0c9b068bc692c6d2a542a2f8cb5.pdf", "author": "Yiyang Huang;Clement Louis Canonne", "authorids": "~Yiyang_Huang2;~Clement_Louis_Canonne1", "gender": ";M", "homepage": ";https://ccanonne.github.io/", "dblp": ";28/9840L", "google_scholar": ";u_OXsBIAAAAJ", "orcid": ";0000-0001-7153-5211", "linkedin": ";", "or_profile": "~Yiyang_Huang2;~Clement_Louis_Canonne1", "aff": ";University of Sydney", "aff_domain": ";sydney.edu.au", "position": ";Lecturer", "bibtex": "@misc{\nhuang2023tight,\ntitle={Tight Bounds for Machine Unlearning via Differential Privacy},\nauthor={Yiyang Huang and Clement Louis Canonne},\nyear={2023},\nurl={https://openreview.net/forum?id=14R8QBKzFH}\n}", "github": "", "project": "", "reviewers": "Zwzy;Hbaj;QTS7;RqTt;tB6F", "site": "https://openreview.net/forum?id=14R8QBKzFH", "pdf_size": 277348, "rating": "4;4;5;5;7", "confidence": "3;3;3;4;3", "soundness": "3;3;2;3;3", "novelty": "2;2;2;2;3", "presentation": "2;3;3;3;4", "wc_summary": "87;351;73;29;104", "wc_strengths": "122;115;57;158;44", "wc_weaknesses": "242;141;84;113;24", "wc_questions": "8;131;48;18;238", "wc_limitations": "23;60;11;34;126", "wc_review": "482;798;273;352;536", "wc_reply_reviewers": "316;4;11;55;13", "wc_reply_authors": "88;0;0;0;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "3;1;1;1;1", "rating_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 128.8, 113.84972551569898 ], "wc_strengths_avg": [ 99.2, 42.555375688624814 ], "wc_weaknesses_avg": [ 120.8, 71.94831478221016 ], "wc_questions_avg": [ 88.6, 86.31013845429747 ], "wc_limitations_avg": [ 50.8, 40.93604768416218 ], "wc_review_avg": [ 488.2, 180.64373778240972 ], "wc_reply_reviewers_avg": [ 79.8, 119.45442645628498 ], "wc_reply_authors_avg": [ 17.6, 35.2 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12312347641109852726&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Sydney", "aff_unique_dep": "", "aff_unique_url": "https://www.sydney.edu.au", "aff_unique_abbr": "USYD", "aff_country_unique_index": "0", "aff_country_unique": "Australia" }, { "title": "Towards Understanding the Dynamics of Gaussian-Stein Variational Gradient Descent", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73037", "id": "14ZM7FfPx8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c0ae487420ebc8d0ed7c541b4e3f09d4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=14ZM7FfPx8", "openreview": "https://openreview.net/forum?id=14ZM7FfPx8", "poster": "/media/PosterPDFs/NeurIPS%202023/73037.png?t=1699637720.9632268", "slides": "https://nips.cc/virtual/2023/poster/73037", "video": "https://nips.cc/virtual/2023/poster/73037", "author_site": "Tianle Liu, Promit Ghosal, Krishnakumar Balasubramanian, Natesh Pillai", "tldr": "", "abstract": "Stein Variational Gradient Descent (SVGD) is a nonparametric particle-based deterministic sampling algorithm. Despite its wide usage, understanding the theoretical properties of SVGD has remained a challenging problem. For sampling from a Gaussian target, the SVGD dynamics with a bilinear kernel will remain Gaussian as long as the initializer is Gaussian. Inspired by this fact, we undertake a detailed theoretical study of the Gaussian-SVGD, i.e., SVGD projected to the family of Gaussian distributions via the bilinear kernel, or equivalently Gaussian variational inference (GVI) with SVGD. We present a complete picture by considering both the mean-field PDE and discrete particle systems. When the target is strongly log-concave, the mean-field Gaussian-SVGD dynamics is proven to converge linearly to the Gaussian distribution closest to the target in KL divergence. In the finite-particle setting, there is both uniform in time convergence to the mean-field limit and linear convergence in time to the equilibrium if the target is Gaussian. In the general case, we propose a density-based and a particle-based implementation of the Gaussian-SVGD, and show that several recent algorithms for GVI, proposed from different perspectives, emerge as special cases of our unified framework. Interestingly, one of the new particle-based instance from this framework empirically outperforms existing approaches. Our results make concrete contributions towards obtaining a deeper understanding of both SVGD and GVI.", "keywords": "Stein variational gradient descent;Gaussian variational inference;Rates of Convergence", "primary_area": "", "supplementary_material": "", "author": "Tianle Liu;Promit Ghosal;Krishna Balasubramanian;Natesh S. Pillai", "authorids": "~Tianle_Liu1;~Promit_Ghosal1;~Krishna_Balasubramanian1;~Natesh_S._Pillai1", "gender": "Not Specified;M;M;M", "homepage": "https://liutianle.com;https://sites.google.com/view/promit-ghosal/home;https://nateshpillai.com/;https://sites.google.com/view/kriznakumar/", "dblp": ";;;22/6780-2.html", "google_scholar": "JhZdZhcAAAAJ;Gp90OAUAAAAJ;iKRpHLgAAAAJ;", "orcid": "0000-0002-6873-142X;;;", "linkedin": "tianle-liu-8211a51b7/;promit-ghosal-84822439/;natesh-pillai-3b663427/;", "or_profile": "~Tianle_Liu1;~Promit_Ghosal1;~Natesh_S._Pillai1;~Krishnakumar_Balasubramanian1", "aff": "Harvard University;Massachusetts Institute of Technology;Harvard University;University of California, Davis", "aff_domain": "harvard.edu;mit.edu;harvard.edu;ucdavis.edu", "position": "PhD student;Instructor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nliu2023towards,\ntitle={Towards Understanding the Dynamics of Gaussian-Stein Variational Gradient Descent},\nauthor={Tianle Liu and Promit Ghosal and Krishna Balasubramanian and Natesh S. Pillai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=14ZM7FfPx8}\n}", "github": "", "project": "", "reviewers": "1EPQ;EWuB;Bt7r;ZXYv;BMUk", "pdf_size": 847053, "rating": "3;5;6;6;7", "confidence": "4;4;3;2;4", "soundness": "3;4;3;3;4", "novelty": "2;2;3;3;4", "presentation": "1;3;3;3;4", "wc_summary": "93;32;53;92;136", "wc_strengths": "19;29;105;71;115", "wc_weaknesses": "493;60;23;118;258", "wc_questions": "165;10;247;25;127", "wc_limitations": "79;23;7;1;33", "wc_review": "849;154;435;307;669", "wc_reply_reviewers": "647;25;0;150;16", "wc_reply_authors": "510;74;0;280;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "3;3;1;2;1", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 81.2, 35.98555265658706 ], "wc_strengths_avg": [ 67.8, 38.75254830330516 ], "wc_weaknesses_avg": [ 190.4, 171.11703597245952 ], "wc_questions_avg": [ 114.8, 88.53564254016571 ], "wc_limitations_avg": [ 28.6, 27.63765547219952 ], "wc_review_avg": [ 482.8, 249.0272274270426 ], "wc_reply_reviewers_avg": [ 167.6, 245.57736052006098 ], "wc_reply_authors_avg": [ 172.8, 197.32045002989426 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.0, 0.8944271909999159 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.33174440134851857, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6263405686048203770&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "harvard.edu;mit.edu;harvard.edu;ucdavis.edu", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Harvard University;Massachusetts Institute of Technology;University of California, Davis", "aff_unique_dep": ";;", "aff_unique_url": "https://www.harvard.edu;https://web.mit.edu;https://www.ucdavis.edu", "aff_unique_abbr": "Harvard;MIT;UC Davis", "aff_campus_unique_index": "1", "aff_campus_unique": ";Davis", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Deciphering Spatio-Temporal Graph Forecasting: A Causal Lens and Treatment", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73036", "id": "17Zkztjlgt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/74fa3651b41560e1c7555e0958c70333-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=17Zkztjlgt", "openreview": "https://openreview.net/forum?id=17Zkztjlgt", "poster": "/media/PosterPDFs/NeurIPS%202023/73036.png?t=1699081236.652565", "slides": "https://nips.cc/virtual/2023/poster/73036", "video": "https://nips.cc/virtual/2023/poster/73036", "author_site": "Yutong Xia, Yuxuan Liang, Haomin Wen, Xu Liu, Kun Wang, Zhengyang Zhou, Roger Zimmermann", "tldr": "", "abstract": "Spatio-Temporal Graph (STG) forecasting is a fundamental task in many real-world applications. Spatio-Temporal Graph Neural Networks have emerged as the most popular method for STG forecasting, but they often struggle with temporal out-of-distribution (OoD) issues and dynamic spatial causation. In this paper, we propose a novel framework called CaST to tackle these two challenges via causal treatments. Concretely, leveraging a causal lens, we first build a structural causal model to decipher the data generation process of STGs. To handle the temporal OoD issue, we employ the back-door adjustment by a novel disentanglement block to separate the temporal environments from input data. Moreover, we utilize the front-door adjustment and adopt edge-level convolution to model the ripple effect of causation. Experiments results on three real-world datasets demonstrate the effectiveness of CaST, which consistently outperforms existing methods with good interpretability. Our source code is available at https://github.com/yutong-xia/CaST.", "keywords": "Spatio-temporal forecasting", "primary_area": "", "supplementary_material": "", "author": "Yutong Xia;Yuxuan Liang;Haomin Wen;Xu Liu;Kun Wang;Zhengyang Zhou;Roger Zimmermann", "authorids": "~Yutong_Xia1;~Yuxuan_Liang1;~Haomin_Wen2;~Xu_Liu9;~Kun_Wang15;~Zhengyang_Zhou1;~Roger_Zimmermann1", "gender": "F;M;M;;M;M;M", "homepage": "https://yutong-xia.github.io/;https://yuxuanliang.com;https://wenhaomin.github.io/;;http://home.ustc.edu.cn/~wk520529/#home;http://home.ustc.edu.cn/~zzy0929/Home/;https://www.comp.nus.edu.sg/cs/bio/rogerz/", "dblp": "307/5917;183/0977;292/7181;93/3167-14;;246/8238;79/1490", "google_scholar": "V7b4y2oAAAAJ;n9cODgcAAAAJ;https://scholar.google.com/citations?hl=zh-CN;JTzLTycAAAAJ;UnyqjWQAAAAJ;dPElQLUAAAAJ;https://scholar.google.com.tw/citations?user=IDREwXEAAAAJ", "orcid": "0000-0001-9026-0049;0000-0003-2817-7337;0000-0001-6130-126X;0000-0003-2708-0584;0000-0003-0602-169X;0000-0003-4728-7347;0000-0002-7410-2590", "linkedin": "yutong-xia/;yoshall/;;liuxu-187825160/;;;roger-zimmermann-76b56b6/", "or_profile": "~Yutong_Xia1;~Yuxuan_Liang1;~Haomin_Wen2;~Xu_Liu9;~Kun_Wang15;~Zhengyang_Zhou1;~Roger_Zimmermann1", "aff": "National University of Singapore;The Hong Kong University of Science and Technology (Guangzhou);Beijing Jiaotong University;National University of Singapore;University of Science and Technology of China;University of Science and Technology of China;National University of Singapore", "aff_domain": "u.nus.edu;hkust-gz.edu.cn;bjtu.edu.cn;nus.edu.sg;ustc.edu.cn;ustc.edu.cn;nus.edu.sg", "position": "PhD student;Assistant Professor;PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nxia2023deciphering,\ntitle={Deciphering Spatio-Temporal Graph Forecasting: A Causal Lens and Treatment},\nauthor={Yutong Xia and Yuxuan Liang and Haomin Wen and Xu Liu and Kun Wang and Zhengyang Zhou and Roger Zimmermann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=17Zkztjlgt}\n}", "github": "", "project": "", "reviewers": "u4Nz;ya77;kwLE;o65E", "pdf_size": 2741646, "rating": "5;6;6;6", "confidence": "3;5;3;2", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "34;61;104;73", "wc_strengths": "35;13;121;121", "wc_weaknesses": "44;27;399;317", "wc_questions": "188;15;123;179", "wc_limitations": "1;43;2;65", "wc_review": "302;159;749;755", "wc_reply_reviewers": "131;11;12;32", "wc_reply_authors": "232;10;19;22", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.0, 25.129663746258124 ], "wc_strengths_avg": [ 72.5, 49.11975162803656 ], "wc_weaknesses_avg": [ 196.75, 163.9456846031636 ], "wc_questions_avg": [ 126.25, 68.888950492804 ], "wc_limitations_avg": [ 27.75, 27.380421837510102 ], "wc_review_avg": [ 491.25, 265.6147350957774 ], "wc_reply_reviewers_avg": [ 46.5, 49.5 ], "wc_reply_authors_avg": [ 70.75, 93.20240072015312 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2343017210107993916&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "u.nus.edu;hkust-gz.edu.cn;bjtu.edu.cn;nus.edu.sg;ustc.edu.cn;ustc.edu.cn;nus.edu.sg", "author_num": 7, "aff_unique_index": "0;1;2;0;3;3;0", "aff_unique_norm": "National University of Singapore;Hong Kong University of Science and Technology;Beijing Jiao Tong University;University of Science and Technology of China", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nus.edu.sg;https://www.ust.hk;http://www.njtu.edu.cn/en;http://www.ustc.edu.cn", "aff_unique_abbr": "NUS;HKUST;BJTU;USTC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Guangzhou", "aff_country_unique_index": "0;1;1;0;1;1;0", "aff_country_unique": "Singapore;China" }, { "title": "Instructing Goal-Conditioned Reinforcement Learning Agents with Temporal Logic Objectives", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73035", "id": "19AgWnmyoV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7b35a69f434b5eb07ed1b1ef16ace52c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=19AgWnmyoV", "openreview": "https://openreview.net/forum?id=19AgWnmyoV", "poster": "/media/PosterPDFs/NeurIPS%202023/73035.png?t=1699948400.094892", "slides": "https://nips.cc/virtual/2023/poster/73035", "video": "https://nips.cc/virtual/2023/poster/73035", "author_site": "Wenjie Qiu, Wensen Mao, He Zhu", "tldr": "", "abstract": "Goal-conditioned reinforcement learning (RL) is a powerful approach for learning general-purpose skills by reaching diverse goals. However, it has limitations when it comes to task-conditioned policies, where goals are specified by temporally extended instructions written in the Linear Temporal Logic (LTL) formal language. Existing approaches for finding LTL-satisfying policies rely on sampling a large set of LTL instructions during training to adapt to unseen tasks at inference time. However, these approaches do not guarantee generalization to out-of-distribution LTL objectives, which may have increased complexity. In this paper, we propose a novel approach to address this challenge. We show that simple goal-conditioned RL agents can be instructed to follow arbitrary LTL specifications without additional training over the LTL task space. Unlike existing approaches that focus on LTL specifications expressible as regular expressions, our technique is unrestricted and generalizes to $\\omega$-regular expressions. Experiment results demonstrate the effectiveness of our approach in adapting goal-conditioned RL agents to satisfy complex temporal logic task specifications zero-shot.", "keywords": "Goal-Conditioned Reinforcement Learning;Linear Temporal Logic", "primary_area": "", "supplementary_material": "", "author": "Wenjie Qiu;Wensen Mao;He Zhu", "authorids": "~Wenjie_Qiu1;wm300@cs.rutgers.edu;~He_Zhu4", "gender": "M;;M", "homepage": "https://github.com/Roadsong;;https://herowanzhu.github.io", "dblp": "120/1151-2;;59/2802-1", "google_scholar": "sc4btRMAAAAJ;;3X9GC2gAAAAJ", "orcid": "0000-0002-2271-6443;;", "linkedin": "qiuwenjie/;;", "or_profile": "~Wenjie_Qiu1;wm300@cs.rutgers.edu;~He_Zhu4", "aff": "Rutgers University;;Rutgers University", "aff_domain": "rutgers.edu;;rutgers.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nqiu2023instructing,\ntitle={Instructing Goal-Conditioned Reinforcement Learning Agents with Temporal Logic Objectives},\nauthor={Wenjie Qiu and Wensen Mao and He Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=19AgWnmyoV}\n}", "github": "", "project": "", "reviewers": "szt3;VFCf;oQVB;Q7B2", "pdf_size": 10331336, "rating": "5;5;7;8", "confidence": "4;4;4;4", "soundness": "2;2;2;3", "novelty": "2;1;3;4", "presentation": "2;2;2;3", "wc_summary": "142;159;129;179", "wc_strengths": "123;25;110;246", "wc_weaknesses": "196;178;180;563", "wc_questions": "96;23;16;160", "wc_limitations": "19;3;48;20", "wc_review": "576;388;483;1168", "wc_reply_reviewers": "38;125;36;304", "wc_reply_authors": "421;436;29;105", "reply_reviewers": "1;2;1;1", "reply_authors": "3;4;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 152.25, 18.7533330370897 ], "wc_strengths_avg": [ 126.0, 78.84478422825444 ], "wc_weaknesses_avg": [ 279.25, 163.97160577368265 ], "wc_questions_avg": [ 73.75, 58.831857866295536 ], "wc_limitations_avg": [ 22.5, 16.194134740701646 ], "wc_review_avg": [ 653.75, 304.2518488029284 ], "wc_reply_reviewers_avg": [ 125.75, 109.0054471116008 ], "wc_reply_authors_avg": [ 247.75, 182.81325854543482 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8469385075067877407&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "rutgers.edu;;rutgers.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Rutgers University", "aff_unique_dep": "", "aff_unique_url": "https://www.rutgers.edu", "aff_unique_abbr": "Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Task-aware Distributed Source Coding under Dynamic Bandwidth", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73034", "id": "1A4ZqTmnye", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/016c63403370d81c24c1ca0123de6cfa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1A4ZqTmnye", "openreview": "https://openreview.net/forum?id=1A4ZqTmnye", "poster": "/media/PosterPDFs/NeurIPS%202023/73034.png?t=1698696734.64857", "slides": "https://nips.cc/virtual/2023/poster/73034", "video": "https://nips.cc/virtual/2023/poster/73034", "author_site": "Po-han Li, Sravan Kumar Ankireddy, Ruihan (Philip) Zhao, Hossein Nourkhiz Mahjoub, Ehsan Moradi Pari, Ufuk Topcu, Sandeep Chinchali, Hyeji Kim", "tldr": "", "abstract": "Efficient compression of correlated data is essential to minimize communication overload in multi-sensor networks. In such networks, each sensor independently compresses the data and transmits them to a central node. A decoder at the central node decompresses and passes the data to a pre-trained machine learning-based task model to generate the final output. Due to limited communication bandwidth, it is important for the compressor to learn only the features that are relevant to the task. Additionally, the final performance depends heavily on the total available bandwidth. In practice, it is common to encounter varying availability in bandwidth. Since higher bandwidth results in better performance, it is essential for the compressor to dynamically take advantage of the maximum available bandwidth at any instant. In this work, we propose a novel distributed compression framework composed of independent encoders and a joint decoder, which we call neural distributed principal component analysis (NDPCA). NDPCA flexibly compresses data from multiple sources to any available bandwidth with a single model, reducing compute and storage overhead. NDPCA achieves this by learning low-rank task representations and efficiently distributing bandwidth among sensors, thus providing a graceful trade-off between performance and bandwidth. Experiments show that NDPCA improves the success rate of multi-view robotic arm manipulation by 9% and the accuracy of object detection tasks on satellite imagery by 14% compared to an autoencoder with uniform bandwidth allocation.", "keywords": "Data Compression;Distributed Source Coding;Semantic Communication;Multi-sensor Networks;Bandwidth Allocation;Information Theory", "primary_area": "", "supplementary_material": "/attachment/a785ad0191965b4c766e7295911d16d14fcbc86f.pdf", "author": "Po-han Li;Sravan Kumar Ankireddy;Ruihan Zhao;Hossein Nourkhiz Mahjoub;Ehsan Moradi Pari;ufuk topcu;Sandeep P. Chinchali;Hyeji Kim", "authorids": "~Po-han_Li1;~Sravan_Kumar_Ankireddy1;~Ruihan_Zhao1;~Hossein_Nourkhiz_Mahjoub1;~Ehsan_Moradi_Pari1;~ufuk_topcu1;~Sandeep_P._Chinchali1;~Hyeji_Kim1", "gender": "M;M;M;;M;Unspecified;;", "homepage": "https://d31003.github.io/;https://sravan-ankireddy.github.io;https://philipzrh.com;;;https://autonomy.oden.utexas.edu/;;", "dblp": "311/3416;321/0836;236/4741-1;;;12/6659.html;;", "google_scholar": "x0WbtmoAAAAJ;j34sU94AAAAJ;;;2HQUXxsAAAAJ;jeNGFfQAAAAJ;;", "orcid": ";;;;0000-0002-4822-3196;0000-0003-0819-9985;;", "linkedin": "po-han-li-9760161bb/;;;;;;;", "or_profile": "~Po-han_Li1;~Sravan_Kumar_Ankireddy1;~Ruihan_Zhao1;~Hossein_Nourkhiz_Mahjoub1;~Ehsan_Moradi_Pari1;~ufuk_topcu1;~Sandeep_P._Chinchali1;~Hyeji_Kim1", "aff": "University of Texas, Austin;University of Texas at Austin;University of Texas at Austin;;Honda Research Institution US;University of Texas, Austin;;", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;;honda-ri.com;utexas.edu;;", "position": "PhD student;PhD student;PhD student;;Principal Researcher;Full Professor;;", "bibtex": "@inproceedings{\nli2023taskaware,\ntitle={Task-aware Distributed Source Coding under Dynamic Bandwidth},\nauthor={Po-han Li and Sravan Kumar Ankireddy and Ruihan Zhao and Hossein Nourkhiz Mahjoub and Ehsan Moradi Pari and ufuk topcu and Sandeep P. Chinchali and Hyeji Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1A4ZqTmnye}\n}", "github": "", "project": "", "reviewers": "uwj4;A8qh;84yk;dRdb;SHcb;V3iA", "pdf_size": 774641, "rating": "3;4;5;5;5;6", "confidence": "4;3;3;2;2;3", "soundness": "2;2;2;2;3;3", "novelty": "1;2;2;2;3;3", "presentation": "2;3;2;3;2;4", "wc_summary": "47;64;167;186;155;232", "wc_strengths": "60;28;100;71;36;69", "wc_weaknesses": "71;70;120;282;135;40", "wc_questions": "26;36;89;17;15;55", "wc_limitations": "3;1;6;2;7;7", "wc_review": "207;199;482;558;348;403", "wc_reply_reviewers": "0;0;64;0;10;47", "wc_reply_authors": "0;0;11;0;0;0", "reply_reviewers": "0;0;1;0;1;1", "reply_authors": "1;1;2;1;1;1", "rating_avg": [ 4.666666666666667, 0.9428090415820632 ], "confidence_avg": [ 2.8333333333333335, 0.6871842709362768 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.1666666666666665, 0.6871842709362768 ], "presentation_avg": [ 2.6666666666666665, 0.7453559924999298 ], "wc_summary_avg": [ 141.83333333333334, 65.75058090153189 ], "wc_strengths_avg": [ 60.666666666666664, 23.802427513922936 ], "wc_weaknesses_avg": [ 119.66666666666667, 79.34033582541032 ], "wc_questions_avg": [ 39.666666666666664, 25.791902260636423 ], "wc_limitations_avg": [ 4.333333333333333, 2.4267032964268394 ], "wc_review_avg": [ 366.1666666666667, 132.40268963364085 ], "wc_reply_reviewers_avg": [ 20.166666666666668, 25.70613848005094 ], "wc_reply_authors_avg": [ 1.8333333333333333, 4.099457958749615 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.6002450479987811, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14515916256612920048&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 8, "email": "utexas.edu;utexas.edu;utexas.edu;;honda-ri.com;utexas.edu;;", "author_num": 8, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Texas at Austin;Honda Research Institute", "aff_unique_dep": ";Honda Research Institute", "aff_unique_url": "https://www.utexas.edu;https://honda-ri.com", "aff_unique_abbr": "UT Austin;HRI", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "De novo Drug Design using Reinforcement Learning with Multiple GPT Agents", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73033", "id": "1B6YKnHYBb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1737656c4dc65027939e47e4587ce95e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1B6YKnHYBb", "openreview": "https://openreview.net/forum?id=1B6YKnHYBb", "poster": "/media/PosterPDFs/NeurIPS%202023/73033.png?t=1698842573.1901612", "slides": "https://nips.cc/virtual/2023/poster/73033", "video": "https://nips.cc/virtual/2023/poster/73033", "author_site": "Xiuyuan Hu, Guoqing Liu, Yang Zhao, Hao Zhang", "tldr": "", "abstract": "*De novo* drug design is a pivotal issue in pharmacology and a new area of focus in AI for science research. A central challenge in this field is to generate molecules with specific properties while also producing a wide range of diverse candidates. Although advanced technologies such as transformer models and reinforcement learning have been applied in drug design, their potential has not been fully realized. Therefore, we propose MolRL-MGPT, a reinforcement learning algorithm with multiple GPT agents for drug molecular generation. To promote molecular diversity, we encourage the agents to collaborate in searching for desirable molecules in diverse directions. Our algorithm has shown promising results on the GuacaMol benchmark and exhibits efficacy in designing inhibitors against SARS-CoV-2 protein targets. The codes are available at: https://github.com/HXYfighter/MolRL-MGPT.", "keywords": "De novo drug design;Molecular generation;Multi-agent reinforcement learning;GPT", "primary_area": "", "supplementary_material": "", "author": "Xiuyuan Hu;Guoqing Liu;Yang Zhao;Hao Zhang", "authorids": "~Xiuyuan_Hu1;~Guoqing_Liu3;~Yang_Zhao11;~Hao_Zhang37", "gender": "M;M;M;M", "homepage": "https://hxyfighter.github.io/;https://www.microsoft.com/en-us/research/people/guoqingliu/;;http://ee.tsinghua.edu.cn", "dblp": "180/4559;;50/2082-16;", "google_scholar": "8cRupWIAAAAJ;h-eHvyoAAAAJ;KF9ag1sAAAAJ;", "orcid": "0009-0003-5543-0972;;0000-0001-5883-2799;", "linkedin": "xiuyuan-hu-30b7a8201/;;;", "or_profile": "~Xiuyuan_Hu1;~Guoqing_Liu3;~Yang_Zhao11;~Hao_Zhang37", "aff": "Microsoft Research;Microsoft Research ;Tsinghua University;", "aff_domain": "microsoft.com;microsoft.com;tsinghua.edu.cn;", "position": "Intern;Researcher;PhD student;", "bibtex": "@inproceedings{\nhu2023de,\ntitle={De novo Drug Design using Reinforcement Learning with Multiple {GPT} Agents},\nauthor={Xiuyuan Hu and Guoqing Liu and Yang Zhao and Hao Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1B6YKnHYBb}\n}", "github": "", "project": "", "reviewers": "xd9X;AzpA;4Wce", "pdf_size": 531672, "rating": "4;6;7", "confidence": "3;3;4", "soundness": "2;4;3", "novelty": "3;2;3", "presentation": "2;4;3", "wc_summary": "87;47;96", "wc_strengths": "46;25;101", "wc_weaknesses": "572;152;92", "wc_questions": "76;44;274", "wc_limitations": "34;5;7", "wc_review": "815;273;570", "wc_reply_reviewers": "1068;237;54", "wc_reply_authors": "2099;430;54", "reply_reviewers": "2;1;1", "reply_authors": "5;2;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 76.66666666666667, 21.296843793284385 ], "wc_strengths_avg": [ 57.333333333333336, 32.04510709747884 ], "wc_weaknesses_avg": [ 272.0, 213.5415650406262 ], "wc_questions_avg": [ 131.33333333333334, 101.72293524842642 ], "wc_limitations_avg": [ 15.333333333333334, 13.22455628325158 ], "wc_review_avg": [ 552.6666666666666, 221.6097671333303 ], "wc_reply_reviewers_avg": [ 453.0, 441.2414305116871 ], "wc_reply_authors_avg": [ 861.0, 888.754559294447 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2809142336059301244&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "microsoft.com;microsoft.com;tsinghua.edu.cn;", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Microsoft;Tsinghua University", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.tsinghua.edu.cn", "aff_unique_abbr": "MSR;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;China" }, { "title": "PoET: A generative model of protein families as sequences-of-sequences", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73032", "id": "1CJ8D7P8RZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f4366126eba252699b280e8f93c0ab2f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1CJ8D7P8RZ", "openreview": "https://openreview.net/forum?id=1CJ8D7P8RZ", "poster": "/media/PosterPDFs/NeurIPS%202023/73032.png?t=1701805052.1083062", "slides": "https://nips.cc/virtual/2023/poster/73032", "video": "https://nips.cc/virtual/2023/poster/73032", "author_site": "Timothy Truong Jr, Tristan Bepler", "tldr": "", "abstract": "Generative protein language models are a natural way to design new proteins with desired functions. However, current models are either difficult to direct to produce a protein from a specific family of interest, or must be trained on a large multiple sequence alignment (MSA) from the specific family of interest, making them unable to benefit from transfer learning across families. To address this, we propose **P**r**o**tein **E**volutionary **T**ransformer (PoET), an autoregressive generative model of whole protein families that learns to generate sets of related proteins as sequences-of-sequences across tens of millions of natural protein sequence clusters. PoET can be used as a retrieval-augmented language model to generate and score arbitrary modifications conditioned on any protein family of interest, and can extrapolate from short context lengths to generalize well even for small families. This is enabled by a unique Transformer layer; we model tokens sequentially within sequences while attending between sequences order invariantly, allowing PoET to scale to context lengths beyond those used during training. In extensive experiments on deep mutational scanning datasets, we show that PoET outperforms existing protein language models and evolutionary sequence models for variant function prediction across proteins of all MSA depths. We also demonstrate PoET's ability to controllably generate new protein sequences.", "keywords": "protein fitness prediction;transformer;retrieval;language model;MSA;generative model;protein engineering", "primary_area": "", "supplementary_material": "", "author": "Timothy Fei Truong Jr;Tristan Bepler", "authorids": "~Timothy_Fei_Truong_Jr1;~Tristan_Bepler1", "gender": ";M", "homepage": ";", "dblp": ";217/3335", "google_scholar": ";Roxjki8AAAAJ", "orcid": ";0000-0001-5595-9954", "linkedin": "timothy-truong-jr-6b3b25a7/;", "or_profile": "~Timothy_Fei_Truong_Jr1;~Tristan_Bepler1", "aff": ";New York Structural Biology Center", "aff_domain": ";nysbc.org", "position": ";Group Leader", "bibtex": "@inproceedings{\njr2023poet,\ntitle={Po{ET}: A generative model of protein families as sequences-of-sequences},\nauthor={Timothy Fei Truong Jr and Tristan Bepler},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1CJ8D7P8RZ}\n}", "github": "", "project": "", "reviewers": "UQRE;Uh6C;jcwZ;n5MZ;sUvw", "pdf_size": 4723944, "rating": "5;5;7;7;8", "confidence": "3;3;4;5;3", "soundness": "3;3;2;3;3", "novelty": "3;3;3;3;3", "presentation": "2;3;3;4;4", "wc_summary": "96;151;130;151;96", "wc_strengths": "55;61;99;50;48", "wc_weaknesses": "100;47;84;227;159", "wc_questions": "137;99;406;354;359", "wc_limitations": "33;25;18;16;18", "wc_review": "421;383;737;798;680", "wc_reply_reviewers": "128;43;36;165;62", "wc_reply_authors": "116;0;0;218;0", "reply_reviewers": "1;1;1;2;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 124.8, 24.733782565551916 ], "wc_strengths_avg": [ 62.6, 18.74673304872078 ], "wc_weaknesses_avg": [ 123.4, 63.1461796152388 ], "wc_questions_avg": [ 271.0, 126.8053626626256 ], "wc_limitations_avg": [ 22.0, 6.29285308902091 ], "wc_review_avg": [ 603.8, 169.36989106685994 ], "wc_reply_reviewers_avg": [ 86.8, 50.84643546995207 ], "wc_reply_authors_avg": [ 66.8, 87.94179893543229 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.37499999999999994, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18201589041708158157&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";nysbc.org", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "New York Structural Biology Center", "aff_unique_dep": "", "aff_unique_url": "https://www.nysbc.org", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Should I Stop or Should I Go: Early Stopping with Heterogeneous Populations", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73031", "id": "1CpVHL10fh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3322a9a72a1707de14badd5e552ff466-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1CpVHL10fh", "openreview": "https://openreview.net/forum?id=1CpVHL10fh", "poster": "/media/PosterPDFs/NeurIPS%202023/73031.png?t=1701986498.0704446", "slides": "https://nips.cc/virtual/2023/poster/73031", "video": "https://nips.cc/virtual/2023/poster/73031", "author_site": "Hammaad Adam, Fan Yin, Huibin Hu, Neil Tenenholtz, Lorin Crawford, Lester Mackey, Allison Koenecke", "tldr": "", "abstract": "Randomized experiments often need to be stopped prematurely due to the treatment having an unintended harmful effect. Existing methods that determine when to stop an experiment early are typically applied to the data in aggregate and do not account for treatment effect heterogeneity. In this paper, we study the early stopping of experiments for harm on heterogeneous populations. We first establish that current methods often fail to stop experiments when the treatment harms a minority group of participants. We then use causal machine learning to develop CLASH, the first broadly-applicable method for heterogeneous early stopping. We demonstrate CLASH's performance on simulated and real data and show that it yields effective early stopping for both clinical trials and A/B tests.", "keywords": "Randomized experiments;heterogeneous effects;causal machine learning;fairness;sequential testing;clinical trials;A/B testing", "primary_area": "", "supplementary_material": "", "author": "Hammaad Adam;Fan Yin;Mary Hu;Neil Tenenholtz;Lorin Crawford;Lester Mackey;Allison Koenecke", "authorids": "~Hammaad_Adam1;~Fan_Yin2;maryhu@microsoft.com;~Neil_Tenenholtz1;~Lorin_Crawford1;~Lester_Mackey1;~Allison_Koenecke1", "gender": "M;M;;;M;M;", "homepage": "https://hammaadadam1.github.io./;https://fyin-stats.github.io/;;;https://lorincrawford.com/;https://stanford.edu/~lmackey;", "dblp": "319/9859;24/8079;;75/10171;;05/2961;", "google_scholar": "https://scholar.google.com/;QWJznZEAAAAJ;;SGl2QI8AAAAJ;ssZy1zYAAAAJ;erv7TP0AAAAJ;", "orcid": "0000-0001-6910-7074;0000-0002-8028-2217;;0000-0003-1250-3716;0000-0003-0178-8242;0000-0002-1102-0387;", "linkedin": ";fan-y-a7973689/;;neil-tenenholtz/;lorin-crawford;lester-mackey-5902909;", "or_profile": "~Hammaad_Adam1;~Fan_Yin2;maryhu@microsoft.com;~Neil_Tenenholtz1;~Lorin_Crawford1;~Lester_Mackey1;~Allison_Koenecke1", "aff": "Massachusetts Institute of Technology;Amazon;;American College of Radiology Data Science Institute;Microsoft;Microsoft Research New England;", "aff_domain": "mit.edu;amazon.com;;acr.org;microsoft.com;microsoft.com;", "position": "PhD student;Machine Learning Scientist;;Senior Scientist;Principal Researcher;Principal Researcher;", "bibtex": "@inproceedings{\nadam2023should,\ntitle={Should I Stop or Should I Go: Early Stopping with Heterogeneous Populations},\nauthor={Hammaad Adam and Fan Yin and Mary Hu and Neil Tenenholtz and Lorin Crawford and Lester Mackey and Allison Koenecke},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1CpVHL10fh}\n}", "github": "", "project": "", "reviewers": "tX2R;SD94;R8bL;K2p1;af9S;wJ1C", "pdf_size": 626483, "rating": "6;6;7;7;7;7", "confidence": "4;3;3;3;4;4", "soundness": "3;3;3;3;3;4", "novelty": "3;3;3;3;4;3", "presentation": "4;3;4;4;3;4", "wc_summary": "105;103;104;33;102;108", "wc_strengths": "85;67;48;30;48;55", "wc_weaknesses": "154;154;47;151;23;59", "wc_questions": "70;165;41;207;365;2", "wc_limitations": "139;85;35;1;31;9", "wc_review": "553;574;275;422;569;233", "wc_reply_reviewers": "54;0;46;14;56;25", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "1;0;1;1;1;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 3.1666666666666665, 0.3726779962499649 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 92.5, 26.67551936389118 ], "wc_strengths_avg": [ 55.5, 17.153716798408443 ], "wc_weaknesses_avg": [ 98.0, 56.01785429664367 ], "wc_questions_avg": [ 141.66666666666666, 122.11151551848918 ], "wc_limitations_avg": [ 50.0, 47.982635748084256 ], "wc_review_avg": [ 437.6666666666667, 140.07815278939427 ], "wc_reply_reviewers_avg": [ 32.5, 21.021814066979726 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.372677996249965 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9521814941607485629&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "email": "mit.edu;amazon.com;;acr.org;microsoft.com;microsoft.com;", "author_num": 7, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Massachusetts Institute of Technology;Amazon;American College of Radiology;Microsoft", "aff_unique_dep": ";Amazon.com, Inc.;Data Science Institute;Microsoft Corporation", "aff_unique_url": "https://web.mit.edu;https://www.amazon.com;https://www.acr.org;https://www.microsoft.com", "aff_unique_abbr": "MIT;Amazon;ACR;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";New England", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Cascading Contextual Assortment Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73030", "id": "1DTCoyAFiV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/49528141137087b8e94126d5f50b22da-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1DTCoyAFiV", "openreview": "https://openreview.net/forum?id=1DTCoyAFiV", "poster": "/media/PosterPDFs/NeurIPS%202023/73030.png?t=1699758380.4885643", "slides": "https://nips.cc/virtual/2023/poster/73030", "video": "https://nips.cc/virtual/2023/poster/73030", "author_site": "Hyun-jun Choi, Rajan Udwani, Min-hwan Oh", "tldr": "", "abstract": "We present a new combinatorial bandit model, the \\textit{cascading contextual assortment bandit}. This model serves as a generalization of both existing cascading bandits and assortment bandits, broadening their applicability in practice. For this model, we propose our first UCB bandit algorithm, UCB-CCA. We prove that this algorithm achieves a $T$-step regret upper-bound of $\\tilde{\\mathcal{O}}(\\frac{1}{\\kappa}d\\sqrt{T})$, sharper than existing bounds for cascading contextual bandits by eliminating dependence on cascade length $K$. To improve the dependence on problem-dependent constant $\\kappa$, we introduce our second algorithm, UCB-CCA+, which leverages a new Bernstein-type concentration result. This algorithm achieves $\\tilde{\\mathcal{O}}(d\\sqrt{T})$ without dependence on $\\kappa$ in the leading term. We substantiate our theoretical claims with numerical experiments, demonstrating the practical efficacy of our proposed methods.", "keywords": "cascade bandit;assortment bandit;upper confidence bound;exploration and exploitation;combinatorial optimization", "primary_area": "", "supplementary_material": "/attachment/98ccaedc7884c338101c14e8fa537adcbf58b3f1.pdf", "author": "Hyunjun Choi;Rajan Udwani;Min-hwan Oh", "authorids": "~Hyunjun_Choi3;~Rajan_Udwani1;~Min-hwan_Oh1", "gender": "M;;", "homepage": "https://github.com/snunschj1;;https://minoh.io", "dblp": ";133/3845;172/0531", "google_scholar": ";;KzVALFwAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Hyunjun_Choi3;~Rajan_Udwani1;~Min-hwan_Oh1", "aff": "Seoul National University;University of California, Berkeley;Seoul National University", "aff_domain": "snu.ac.kr;berkeley.edu;snu.ac.kr", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nchoi2023cascading,\ntitle={Cascading Contextual Assortment Bandits},\nauthor={Hyunjun Choi and Rajan Udwani and Min-hwan Oh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1DTCoyAFiV}\n}", "github": "", "project": "", "reviewers": "jQVq;tk8N;anEU;nP45;Q2NZ", "pdf_size": 1349987, "rating": "5;5;6;6;7", "confidence": "3;3;4;3;3", "soundness": "2;3;2;2;3", "novelty": "3;3;3;3;3", "presentation": "2;3;3;3;4", "wc_summary": "17;51;86;55;117", "wc_strengths": "24;37;113;124;105", "wc_weaknesses": "127;201;181;120;94", "wc_questions": "110;26;9;223;9", "wc_limitations": "6;10;2;18;17", "wc_review": "284;325;391;540;342", "wc_reply_reviewers": "407;0;57;161;207", "wc_reply_authors": "1428;0;20;57;275", "reply_reviewers": "2;0;1;1;2", "reply_authors": "4;1;2;2;3", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 65.2, 33.896312483808615 ], "wc_strengths_avg": [ 80.6, 41.552857904120145 ], "wc_weaknesses_avg": [ 144.6, 39.95297235500758 ], "wc_questions_avg": [ 75.4, 82.75409355433723 ], "wc_limitations_avg": [ 10.6, 6.1838499334961226 ], "wc_review_avg": [ 376.4, 88.70310028403742 ], "wc_reply_reviewers_avg": [ 166.4, 140.87100482356192 ], "wc_reply_authors_avg": [ 356.0, 544.9363265556811 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.13363062095621217, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7763205103666425979&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "snu.ac.kr;berkeley.edu;snu.ac.kr", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Seoul National University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.berkeley.edu", "aff_unique_abbr": "SNU;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "HeadSculpt: Crafting 3D Head Avatars with Text", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73029", "id": "1DmP6ySKYq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0fb98d483fa580e0354bcdd3a003a3f3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1DmP6ySKYq", "openreview": "https://openreview.net/forum?id=1DmP6ySKYq", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73029", "video": "https://nips.cc/virtual/2023/poster/73029", "author_site": "Xiao Han, Yukang Cao, Kai Han, Xiatian Zhu, Jiankang Deng, Yi-Zhe Song, Tao Xiang, Kwan-Yee K. Wong", "tldr": "", "abstract": "Recently, text-guided 3D generative methods have made remarkable advancements in producing high-quality textures and geometry, capitalizing on the proliferation of large vision-language and image diffusion models. \nHowever, existing methods still struggle to create high-fidelity 3D head avatars in two aspects: \n(1) They rely mostly on a pre-trained text-to-image diffusion model whilst missing the necessary 3D awareness and head priors. \nThis makes them prone to inconsistency and geometric distortions in the generated avatars. \n(2) They fall short in fine-grained editing. This is primarily due to the inherited limitations from the pre-trained 2D image diffusion models, which become more pronounced when it comes to 3D head avatars. \nIn this work, we address these challenges by introducing a versatile coarse-to-fine pipeline dubbed HeadSculpt for crafting (i.e., generating and editing) 3D head avatars from textual prompts. \nSpecifically, we first equip the diffusion model with 3D awareness by leveraging landmark-based control and a learned textual embedding representing the back view appearance of heads, enabling 3D-consistent head avatar generations. \nWe further propose a novel identity-aware editing score distillation strategy to optimize a textured mesh with a high-resolution differentiable rendering technique. \nThis enables identity preservation while following the editing instruction.\nWe showcase HeadSculpt's superior fidelity and editing capabilities through comprehensive experiments and comparisons with existing methods.", "keywords": "3D generative model;head avatar;diffusion models;neural rendering", "primary_area": "", "supplementary_material": "/attachment/5391261f69537e1320c7a81e5bafc6a3acd3f160.zip", "author": "Xiao Han;Yukang Cao;Kai Han;Xiatian Zhu;Jiankang Deng;Yi-Zhe Song;Tao Xiang;Kwan-Yee K. Wong", "authorids": "~Xiao_Han6;~Yukang_Cao2;~Kai_Han1;~Xiatian_Zhu3;~Jiankang_Deng1;~Yi-Zhe_Song2;~Tao_Xiang1;~Kwan-Yee_K._Wong1", "gender": "M;M;;M;M;M;M;M", "homepage": "https://brandonhanx.github.io;http://www.kaihan.org/;https://x-up-lab.github.io;https://jiankangdeng.github.io/;http://personal.ee.surrey.ac.uk/Personal/Y.Song/;https://www.surrey.ac.uk/people/tao-xiang;https://i.cs.hku.hk/~kykwong/;https://yukangcao.github.io/", "dblp": ";51/4757-1.html;128/7935;156/7808;98/1684;22/4460-2.html;w/KwanYeeKennethWong;318/9161", "google_scholar": "sRelAa4AAAAJ;tG8S_vMAAAAJ;ZbA-z1cAAAAJ;Z_UoQFsAAAAJ;https://scholar.google.co.uk/citations?user=irZFP_AAAAAJ;MeS5d4gAAAAJ;https://scholar.google.com/citations?sortby=pubdate;1rIzYQgAAAAJ", "orcid": ";0000-0002-7995-9999;0000-0002-9284-2955;0000-0002-3709-6216;;0000-0002-2530-1059;0000-0001-8560-9007;0009-0001-0125-0015", "linkedin": "xiao-han-9652311a6/;kaihancs/;;jiankang-deng-b45b21b4/?originalSubdomain=uk;;;kenneth-wong-94a4621a9/;ykcao/", "or_profile": "~Xiao_Han6;~Kai_Han1;~Xiatian_Zhu3;~Jiankang_Deng1;~Yi-Zhe_Song2;~Tao_Xiang1;~Kwan-Yee_Kenneth_Wong2;~Yukang_CAO1", "aff": "University of Surrey;The University of Hong Kong;University of Surrey;;University of Surrey;University of Surrey;The University of Hong Kong;University of Hong Kong", "aff_domain": "surrey.ac.uk;hku.hk;surrey.ac.uk;;surrey.ac.uk;surrey.ac.uk;hku.hk;hku.hk", "position": "PhD student;Assistant Professor;Associate Professor;;Professor;Full Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nhan2023headsculpt,\ntitle={HeadSculpt: Crafting 3D Head Avatars with Text},\nauthor={Xiao Han and Yukang Cao and Kai Han and Xiatian Zhu and Jiankang Deng and Yi-Zhe Song and Tao Xiang and Kwan-Yee K. Wong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1DmP6ySKYq}\n}", "github": "", "project": "", "reviewers": "Gcqm;thuE;ztub;ooKH", "pdf_size": 7677747, "rating": "6;6;6;7", "confidence": "5;5;4;4", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "3;3;4;4", "wc_summary": "54;99;268;63", "wc_strengths": "31;52;310;48", "wc_weaknesses": "120;217;295;95", "wc_questions": "41;71;24;128", "wc_limitations": "5;1;106;46", "wc_review": "251;440;1003;380", "wc_reply_reviewers": "22;195;115;36", "wc_reply_authors": "0;276;137;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 121.0, 86.52456298647223 ], "wc_strengths_avg": [ 110.25, 115.59492852197279 ], "wc_weaknesses_avg": [ 181.75, 79.69747486589522 ], "wc_questions_avg": [ 66.0, 39.55376088313221 ], "wc_limitations_avg": [ 39.5, 42.24038352098617 ], "wc_review_avg": [ 518.5, 287.94140028832254 ], "wc_reply_reviewers_avg": [ 92.0, 69.23510670173044 ], "wc_reply_authors_avg": [ 103.25, 114.34897244837839 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10995432071298720102&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "surrey.ac.uk;hku.hk;surrey.ac.uk;;surrey.ac.uk;surrey.ac.uk;hku.hk;hku.hk", "author_num": 8, "aff_unique_index": "0;1;0;0;0;1;1", "aff_unique_norm": "University of Surrey;University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.surrey.ac.uk;https://www.hku.hk", "aff_unique_abbr": "Surrey;HKU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;0;1;1", "aff_country_unique": "United Kingdom;China" }, { "title": "Large language models transition from integrating across position-yoked, exponential windows to structure-yoked, power-law windows", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73028", "id": "1EYKYJeZtR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/020ad0ac6a1974e6748e4a5a48110a07-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1EYKYJeZtR", "openreview": "https://openreview.net/forum?id=1EYKYJeZtR", "poster": "/media/PosterPDFs/NeurIPS%202023/73028.png?t=1702103001.9174392", "slides": "https://nips.cc/virtual/2023/poster/73028", "video": "https://nips.cc/virtual/2023/poster/73028", "author_site": "David Skrill, Samuel Norman-Haignere", "tldr": "", "abstract": "Modern language models excel at integrating across long temporal scales needed to encode linguistic meaning and show non-trivial similarities to biological neural systems. Prior work suggests that human brain responses to language exhibit hierarchically organized \"integration windows\" that substantially constrain the overall influence of an input token (e.g., a word) on the neural response. However, little prior work has attempted to use integration windows to characterize computations in large language models (LLMs). We developed a simple word-swap procedure for estimating integration windows from black-box language models that does not depend on access to gradients or knowledge of the model architecture (e.g., attention weights). Using this method, we show that trained LLMs exhibit stereotyped integration windows that are well-fit by a convex combination of an exponential and a power-law function, with a partial transition from exponential to power-law dynamics across network layers. We then introduce a metric for quantifying the extent to which these integration windows vary with structural boundaries (e.g., sentence boundaries), and using this metric, we show that integration windows become increasingly yoked to structure at later network layers. None of these findings were observed in an untrained model, which as expected integrated uniformly across its input. These results suggest that LLMs learn to integrate information in natural language using a stereotyped pattern: integrating across position-yoked, exponential windows at early layers, followed by structure-yoked, power-law windows at later layers. The methods we describe in this paper provide a general-purpose toolkit for understanding temporal integration in language models, facilitating cross-disciplinary research at the intersection of biological and artificial intelligence.", "keywords": "language modeling;temporal integration;transformers;timescales;model interpretation", "primary_area": "", "supplementary_material": "/attachment/bc41652cbbcdfbf439982b1ff0143c271a7fe0ee.pdf", "author": "David Skrill;Samuel Victor Norman-Haignere", "authorids": "~David_Skrill1;~Samuel_Victor_Norman-Haignere1", "gender": "M;M", "homepage": ";https://www.urmc.rochester.edu/labs/computational-neuroscience-audition.aspx", "dblp": ";https://dblp.uni-trier.de/pers/hd/n/Norman=Haignere:Sam", "google_scholar": ";NaIiKJwAAAAJ", "orcid": "0000-0002-4970-010X;", "linkedin": ";", "or_profile": "~David_Skrill1;~Samuel_Victor_Norman-Haignere1", "aff": "University of Rochester;University of Rochester Medical Center", "aff_domain": "rochester.edu;urmc.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nskrill2023large,\ntitle={Large language models transition from integrating across position-yoked, exponential windows to structure-yoked, power-law windows},\nauthor={David Skrill and Samuel Victor Norman-Haignere},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1EYKYJeZtR}\n}", "github": "", "project": "", "reviewers": "NDTr;KfF7;AsxG;hKex;3ifa", "pdf_size": 6339021, "rating": "3;5;8;8;8", "confidence": "5;3;5;4;5", "soundness": "2;3;4;4;4", "novelty": "2;3;4;3;3", "presentation": "2;3;4;3;4", "wc_summary": "110;112;116;172;109", "wc_strengths": "52;43;50;78;86", "wc_weaknesses": "323;147;32;168;85", "wc_questions": "37;20;89;48;45", "wc_limitations": "46;54;5;29;8", "wc_review": "568;376;292;495;333", "wc_reply_reviewers": "13;23;24;67;39", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 2.0591260281974 ], "confidence_avg": [ 4.4, 0.7999999999999999 ], "soundness_avg": [ 3.4, 0.8 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 123.8, 24.219000805152966 ], "wc_strengths_avg": [ 61.8, 16.951696080333672 ], "wc_weaknesses_avg": [ 151.0, 98.39308918821484 ], "wc_questions_avg": [ 47.8, 22.7806935803105 ], "wc_limitations_avg": [ 28.4, 19.64281038955475 ], "wc_review_avg": [ 412.8, 103.11042624293626 ], "wc_reply_reviewers_avg": [ 33.2, 18.829763673503713 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.1456928793535896, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14820617629120111579&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "rochester.edu;urmc.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Rochester", "aff_unique_dep": "", "aff_unique_url": "https://www.rochester.edu", "aff_unique_abbr": "U of R", "aff_campus_unique_index": "1", "aff_campus_unique": ";Rochester", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "How a Student becomes a Teacher: learning and forgetting through Spectral methods", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73027", "id": "1FVmMlifl7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bd9ea5d671ee761a69dba811348d78ba-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1FVmMlifl7", "openreview": "https://openreview.net/forum?id=1FVmMlifl7", "poster": "/media/PosterPDFs/NeurIPS%202023/73027.png?t=1697300534.4729104", "slides": "https://nips.cc/virtual/2023/poster/73027", "video": "https://nips.cc/virtual/2023/poster/73027", "author_site": "Lorenzo Giambagli, Lorenzo Buffoni, Lorenzo Chicchi, Duccio Fanelli", "tldr": "", "abstract": "In theoretical Machine Learning, the teacher-student paradigm is often employed as an effective metaphor for real-life tuition. A student network is trained on data generated by a fixed teacher network until it matches the instructor\u2019s ability to cope with the assigned task. The above scheme proves particularly relevant when the student network is overparameterized (namely, when larger layer sizes are employed) as compared to the underlying teacher network. Under these operating conditions, it is tempting to speculate that the student ability to handle the given task could be eventually stored in a sub-portion of the whole network. This latter should be to some extent reminiscent of the frozen teacher structure, according to suitable metrics, while being approximately invariant across different architectures of the student candidate network. Unfortunately, state-of-the-art conventional learning techniques could not help in identifying the existence of such an invariant subnetwork, due to the inherent degree of non-convexity that characterizes the examined problem. In this work, we take a decisive leap forward by proposing a radically different optimization scheme which builds on a spectral representation of the linear transfer of information between layers. The gradient is hence calculated with respect to both eigenvalues and eigenvectors with negligible increase in terms of computational and complexity load, as compared to standard training algorithms. Working in this framework, we could isolate a stable student substructure, that mirrors the true complexity of the teacher in terms of computing neurons, path distribution and topological attributes. When pruning unimportant nodes of the trained student, as follows a ranking that reflects the optimized eigenvalues, no degradation in the recorded performance is seen above a threshold that corresponds to the effective teacher size. The observed behavior can be pictured as a genuine second-order phase transition that bears universality traits.", "keywords": "Network Slimming;Spectral Analysis;Node Pruning;Teacher-Student", "primary_area": "", "supplementary_material": "/attachment/d19b9727668ee9c2c5a15d3f0ff3827fcffd615e.pdf", "author": "Lorenzo Giambagli;Lorenzo Buffoni;Lorenzo Chicchi;Duccio Fanelli", "authorids": "~Lorenzo_Giambagli1;lorenzo.buffoni@unifi.it;lorenzo.chicchi@unifi.it;duccio.fanelli@unifi.it", "gender": "M;;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": "Sf4XVy0AAAAJ;;;", "orcid": "0000-0002-0045-6839;;;", "linkedin": ";;;", "or_profile": "~Lorenzo_Giambagli1;lorenzo.buffoni@unifi.it;lorenzo.chicchi@unifi.it;duccio.fanelli@unifi.it", "aff": "University of Namur;;;", "aff_domain": "unamur.be;;;", "position": "PhD student;;;", "bibtex": "@inproceedings{\ngiambagli2023how,\ntitle={How a Student becomes a Teacher: learning and forgetting through Spectral methods},\nauthor={Lorenzo Giambagli and Lorenzo Buffoni and Lorenzo Chicchi and Duccio Fanelli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1FVmMlifl7}\n}", "github": "", "project": "", "reviewers": "uE1J;Cz6u;EH9K;YyTW;dofn", "pdf_size": 1430651, "rating": "5;6;6;7;7", "confidence": "3;2;1;4;3", "soundness": "2;3;2;3;4", "novelty": "2;3;2;3;3", "presentation": "2;4;2;3;3", "wc_summary": "67;108;67;33;110", "wc_strengths": "1;13;29;36;180", "wc_weaknesses": "201;29;32;31;265", "wc_questions": "1;1;13;393;636", "wc_limitations": "73;30;13;23;25", "wc_review": "343;181;154;516;1216", "wc_reply_reviewers": "82;0;41;20;180", "wc_reply_authors": "108;0;0;0;0", "reply_reviewers": "2;0;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 2.6, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 77.0, 28.934408582170814 ], "wc_strengths_avg": [ 51.8, 65.25457838343605 ], "wc_weaknesses_avg": [ 111.6, 101.17232823257554 ], "wc_questions_avg": [ 208.8, 261.200612556709 ], "wc_limitations_avg": [ 32.8, 20.846102753272614 ], "wc_review_avg": [ 482.0, 389.20894131558697 ], "wc_reply_reviewers_avg": [ 64.6, 63.779620569583194 ], "wc_reply_authors_avg": [ 21.6, 43.2 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3668996928526713, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9440910178346603794&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 3, "email": "unamur.be;;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Namur", "aff_unique_dep": "", "aff_unique_url": "https://www.unamur.be", "aff_unique_abbr": "UNamur", "aff_country_unique_index": "0", "aff_country_unique": "Belgium" }, { "title": "Adapting Neural Link Predictors for Data-Efficient Complex Query Answering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73026", "id": "1G7CBp8o7L", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/55c518a17bd17dcb69aa14d69d085994-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1G7CBp8o7L", "openreview": "https://openreview.net/forum?id=1G7CBp8o7L", "poster": "/media/PosterPDFs/NeurIPS%202023/73026.png?t=1701963147.591222", "slides": "https://nips.cc/virtual/2023/poster/73026", "video": "https://nips.cc/virtual/2023/poster/73026", "author_site": "Erik Arakelyan, Pasquale Minervini, Daniel Daza, Michael Cochez, Isabelle Augenstein", "tldr": "", "abstract": "Answering complex queries on incomplete knowledge graphs is a challenging task where a model needs to answer complex logical queries in the presence of missing knowledge. Prior work in the literature has proposed to address this problem by designing architectures trained end-to-end for the complex query answering task with a reasoning process that is hard to interpret while requiring data and resource-intensive training. Other lines of research have proposed re-using simple neural link predictors to answer complex queries, reducing the amount of training data by orders of magnitude while providing interpretable answers. The neural link predictor used in such approaches is not explicitly optimised for the complex query answering task, implying that its scores are not calibrated to interact together. We propose to address these problems via CQD$^{\\mathcal{A}}$, a parameter-efficient score \\emph{adaptation} model optimised to re-calibrate neural link prediction scores for the complex query answering task. While the neural link predictor is frozen, the adaptation component -- which only increases the number of model parameters by $0.03\\%$ -- is trained on the downstream complex query answering task. Furthermore, the calibration component enables us to support reasoning over queries that include atomic negations, which was previously impossible with link predictors. In our experiments, CQD$^{\\mathcal{A}}$ produces significantly more accurate results than current state-of-the-art methods, improving from $34.4$ to $35.1$ Mean Reciprocal Rank values averaged across all datasets and query types while using $\\leq 30\\%$ of the available training query types. We further show that CQD$^{\\mathcal{A}}$ is data-efficient, achieving competitive results with only $1\\%$ of the complex training queries and robust in out-of-domain evaluations. Source code and datasets are available at https://github.com/EdinburghNLP/adaptive-cqd.", "keywords": "complex query answering;neural link prediction;knowledge graph embeddings;knowledge graphs;relational learning;adapters", "primary_area": "", "supplementary_material": "", "author": "Erik Arakelyan;Pasquale Minervini;Daniel Daza;Michael Cochez;Isabelle Augenstein", "authorids": "~Erik_Arakelyan1;~Pasquale_Minervini4;~Daniel_Daza1;~Michael_Cochez2;~Isabelle_Augenstein1", "gender": "M;M;M;F;M", "homepage": "https://github.com/osoblanco;https://dfdazac.github.io/;https://www.cochez.nl;http://isabelleaugenstein.github.io/;https://www.neuralnoise.com", "dblp": "175/1770;258/0746;83/11448;93/11424.html;58/10142", "google_scholar": "63BfrxMAAAAJ;VR4a4QIAAAAJ;https://scholar.google.fi/citations?user=JuZrOtoAAAAJ;https://scholar.google.co.uk/citations?user=DjJp0dcAAAAJ;https://scholar.google.it/citations?user=9sk6CSgAAAA", "orcid": ";0000-0002-5357-3705;0000-0001-5726-4638;0000-0003-1562-7909;0000-0002-8442-602X", "linkedin": "erik-arakelyan-a6a84470/;daniel-daza/;michaelcochez/;isabelle-augenstein-82436b7a/;pasquale-mauro-minervini-47a08324/", "or_profile": "~Erik_Arakelyan1;~Daniel_Daza1;~Michael_Cochez2;~Isabelle_Augenstein1;~Pasquale_Minervini1", "aff": "University of Copenhagen;Vrije Universiteit Amsterdam;VU Amsterdam;University of Copenhagen;University of Edinburgh, University of Edinburgh", "aff_domain": "diku.dk;vu.nl;vu.nl;ku.dk;ed.ac.uk", "position": "PhD student;PhD student;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\narakelyan2023adapting,\ntitle={Adapting Neural Link Predictors for Data-Efficient Complex Query Answering},\nauthor={Erik Arakelyan and Pasquale Minervini and Daniel Daza and Michael Cochez and Isabelle Augenstein},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1G7CBp8o7L}\n}", "github": "", "project": "", "reviewers": "oS1P;5Xze;pnJH;jfF5;jjs4", "pdf_size": 658263, "rating": "5;7;7;7;7", "confidence": "4;1;5;4;4", "soundness": "3;3;3;4;3", "novelty": "2;3;3;4;3", "presentation": "4;3;3;4;3", "wc_summary": "57;113;69;121;84", "wc_strengths": "24;199;85;127;111", "wc_weaknesses": "31;12;133;117;46", "wc_questions": "49;1;43;2;59", "wc_limitations": "1;5;1;6;37", "wc_review": "162;330;331;373;337", "wc_reply_reviewers": "0;4;18;65;4", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.7999999999999999 ], "confidence_avg": [ 3.6, 1.3564659966250538 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 88.8, 24.69331893448104 ], "wc_strengths_avg": [ 109.2, 56.97157185825225 ], "wc_weaknesses_avg": [ 67.8, 48.19709534816387 ], "wc_questions_avg": [ 30.8, 24.465485893396846 ], "wc_limitations_avg": [ 10.0, 13.65283853270081 ], "wc_review_avg": [ 306.6, 74.00702669341608 ], "wc_reply_reviewers_avg": [ 18.2, 24.185946332529557 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.1474419561548971, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16943831137526024730&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "diku.dk;vu.nl;vu.nl;ku.dk;ed.ac.uk", "author_num": 5, "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "University of Copenhagen;Vrije Universiteit Amsterdam;University of Edinburgh", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ku.dk;https://www.vu.nl;https://www.ed.ac.uk", "aff_unique_abbr": "UCPH;VU Amsterdam;Edinburgh", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amsterdam", "aff_country_unique_index": "0;1;1;0;2", "aff_country_unique": "Denmark;Netherlands;United Kingdom" }, { "title": "On Computing Pairwise Statistics with Local Differential Privacy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73025", "id": "1GxKVprbwM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5642b9811a9ac5281be1cc84c275f251-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1GxKVprbwM", "openreview": "https://openreview.net/forum?id=1GxKVprbwM", "poster": "/media/PosterPDFs/NeurIPS%202023/73025.png?t=1702268890.2845984", "slides": "https://nips.cc/virtual/2023/poster/73025", "video": "https://nips.cc/virtual/2023/poster/73025", "author_site": "Badih Ghazi, Pritish Kamath, Ravi Kumar, Pasin Manurangsi, Adam Sealfon", "tldr": "", "abstract": "We study the problem of computing pairwise statistics, i.e., ones of the form $\\binom{n}{2}^{-1} \\sum_{i \\ne j} f(x_i, x_j)$, where $x_i$ denotes the input to the $i$th user, with differential privacy (DP) in the local model. This formulation captures important metrics such as Kendall's $\\tau$ coefficient, Area Under Curve, Gini's mean difference, Gini's entropy, etc. We give several novel and generic algorithms for the problem, leveraging techniques from DP algorithms for linear queries.", "keywords": "differential privacy;local differential privacy;pairwise statistics", "primary_area": "", "supplementary_material": "", "author": "Badih Ghazi;Pritish Kamath;Ravi Kumar;Pasin Manurangsi;Adam Sealfon", "authorids": "~Badih_Ghazi1;~Pritish_Kamath2;~Ravi_Kumar1;~Pasin_Manurangsi2;~Adam_Sealfon1", "gender": ";M;M;M;", "homepage": "https://sites.google.com/view/badihghazi/home;https://pritishkamath.github.io/;https://sites.google.com/site/ravik53/;https://pasin30055.github.io/;https://asealfon.github.io/", "dblp": "125/2134;https://dblp.org/pers/k/Kamath:Pritish.html;k/RaviKumar.html;133/2059;150/6253", "google_scholar": "GBJLTN8AAAAJ;1JFARhUAAAAJ;J_XhIsgAAAAJ;35hM-PkAAAAJ;nrlhJMcAAAAJ", "orcid": ";;0000-0002-2203-2586;;", "linkedin": "badih-ghazi-608379132/;;ravi-kumar-a3a9631;;", "or_profile": "~Badih_Ghazi1;~Pritish_Kamath2;~Ravi_Kumar1;~Pasin_Manurangsi2;~Adam_Sealfon1", "aff": "Google;Google Research;Google;Google;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com", "position": "Researcher;Research Scientist;Research Scientist;Research Scientist;Researcher", "bibtex": "@inproceedings{\nghazi2023on,\ntitle={On Computing Pairwise Statistics with Local Differential Privacy},\nauthor={Badih Ghazi and Pritish Kamath and Ravi Kumar and Pasin Manurangsi and Adam Sealfon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1GxKVprbwM}\n}", "github": "", "project": "", "reviewers": "xtaj;YTda;6AxR;q1oD", "pdf_size": 459925, "rating": "3;5;6;8", "confidence": "3;3;4;3", "soundness": "2;3;4;4", "novelty": "2;3;3;3", "presentation": "2;2;3;4", "wc_summary": "38;50;169;101", "wc_strengths": "9;28;64;64", "wc_weaknesses": "73;31;84;28", "wc_questions": "43;62;14;73", "wc_limitations": "19;42;1;32", "wc_review": "182;213;332;298", "wc_reply_reviewers": "0;11;7;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 89.5, 51.63574343417552 ], "wc_strengths_avg": [ 41.25, 23.72103496898902 ], "wc_weaknesses_avg": [ 54.0, 24.829418035870273 ], "wc_questions_avg": [ 48.0, 22.371857321197094 ], "wc_limitations_avg": [ 23.5, 15.337861650177967 ], "wc_review_avg": [ 256.25, 60.9605405159764 ], "wc_reply_reviewers_avg": [ 9.5, 7.22841614740048 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.16012815380508713, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10167900188127121558&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 7, "email": "google.com;google.com;google.com;google.com;google.com", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning and Collusion in Multi-unit Auctions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73024", "id": "1HKJ3lPz6m", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4661b55200c03a8c4bb9c2974b4fb12d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1HKJ3lPz6m", "openreview": "https://openreview.net/forum?id=1HKJ3lPz6m", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73024", "video": "https://nips.cc/virtual/2023/poster/73024", "author_site": "Simina Branzei, Mahsa Derakhshan, Negin Golrezaei, Yanjun Han", "tldr": "", "abstract": "In a carbon auction, licenses for CO2 emissions are allocated among multiple interested players. Inspired by this setting, we consider repeated multi-unit auctions with uniform pricing, which are widely used in practice. Our contribution is to analyze these auctions in both the offline and online settings, by designing efficient bidding algorithms with low regret and giving regret lower bounds. We also analyze the quality of the equilibria in two main variants of the auction, finding that one variant is susceptible to collusion among the bidders while the other is not.", "keywords": "multi-unit auctions;repeated auctions;online learning;collusion;games and learning;lower bounds;multiplicative weight updates;bandit learning", "primary_area": "", "supplementary_material": "/attachment/b5aab758aa6e1d4c8f11b690e9cb807ac60f6046.pdf", "author": "Simina Branzei;Mahsa Derakhshan;Negin Golrezaei;Yanjun Han", "authorids": "~Simina_Branzei1;mah.derakhshan@gmail.com;~Negin_Golrezaei1;~Yanjun_Han1", "gender": "F;;F;M", "homepage": "https://simina.info;;https://www.mit.edu/~golrezae/;https://yanjunhan2021.github.io", "dblp": "90/7113;;37/10099.html;35/7252", "google_scholar": "https://scholar.google.com.tw/citations?user=SJYC6DQAAAAJ;;k9uWzAIAAAAJ;hdTDzlQAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Simina_Branzei1;mah.derakhshan@gmail.com;~Negin_Golrezaei1;~Yanjun_Han1", "aff": "Purdue University;;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "purdue.edu;;mit.edu;mit.edu", "position": "Assistant Professor;;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nbranzei2023learning,\ntitle={Learning and Collusion in Multi-unit Auctions},\nauthor={Simina Branzei and Mahsa Derakhshan and Negin Golrezaei and Yanjun Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1HKJ3lPz6m}\n}", "github": "", "project": "", "reviewers": "BfUK;5cGR;ZdSw;Ge2c;bLMv;KDVH", "pdf_size": 625926, "rating": "5;5;5;6;6;7", "confidence": "4;3;2;3;3;2", "soundness": "3;3;3;4;4;3", "novelty": "3;2;3;3;3;3", "presentation": "3;2;3;2;4;3", "wc_summary": "160;111;59;193;575;155", "wc_strengths": "19;26;39;149;43;35", "wc_weaknesses": "37;32;44;179;62;30", "wc_questions": "40;73;6;275;24;123", "wc_limitations": "4;0;6;1;12;1", "wc_review": "260;242;154;797;716;344", "wc_reply_reviewers": "10;38;0;10;12;57", "wc_reply_authors": "0;0;0;0;0;18", "reply_reviewers": "1;1;0;1;1;1", "reply_authors": "1;1;1;1;1;2", "rating_avg": [ 5.666666666666667, 0.7453559924999299 ], "confidence_avg": [ 2.8333333333333335, 0.6871842709362768 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 2.8333333333333335, 0.6871842709362768 ], "wc_summary_avg": [ 208.83333333333334, 169.13152344321333 ], "wc_strengths_avg": [ 51.833333333333336, 44.18301584193737 ], "wc_weaknesses_avg": [ 64.0, 52.50079364479487 ], "wc_questions_avg": [ 90.16666666666667, 90.8431187389679 ], "wc_limitations_avg": [ 4.0, 4.123105625617661 ], "wc_review_avg": [ 418.8333333333333, 246.15470519347966 ], "wc_reply_reviewers_avg": [ 21.166666666666668, 19.7856569823249 ], "wc_reply_authors_avg": [ 3.0, 6.708203932499369 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.372677996249965 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.43386091563731244, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5682738526552102227&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "email": "purdue.edu;;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Purdue University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://web.mit.edu", "aff_unique_abbr": "Purdue;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Banana: Banach Fixed-Point Network for Pointcloud Segmentation with Inter-Part Equivariance", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73023", "id": "1IOU2329Za", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6b8c6f846c3575e1d1ad496abea28826-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1IOU2329Za", "openreview": "https://openreview.net/forum?id=1IOU2329Za", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73023", "video": "https://nips.cc/virtual/2023/poster/73023", "author_site": "Congyue Deng, Jiahui Lei, William B Shen, Kostas Daniilidis, Leonidas Guibas", "tldr": "", "abstract": "Equivariance has gained strong interest as a desirable network property that inherently ensures robust generalization. However, when dealing with complex systems such as articulated objects or multi-object scenes, effectively capturing inter-part transformations poses a challenge, as it becomes entangled with the overall structure and local transformations. The interdependence of part assignment and per-part group action necessitates a novel equivariance formulation that allows for their co-evolution. In this paper, we present Banana, a Banach fixed-point network for equivariant segmentation with inter-part equivariance by construction. Our key insight is to iteratively solve a fixed-point problem, where point-part assignment labels and per-part SE(3)-equivariance co-evolve simultaneously. We provide theoretical derivations of both per-step equivariance and global convergence, which induces an equivariant final convergent state. Our formulation naturally provides a strict definition of inter-part equivariance that generalizes to unseen inter-part configurations. Through experiments conducted on both articulated objects and multi-object scans, we demonstrate the efficacy of our approach in achieving strong generalization under inter-part transformations, even when confronted with substantial changes in pointcloud geometry and topology.", "keywords": "3D deep learning;equivariant network;pointcloud segmentation;multi-body system", "primary_area": "", "supplementary_material": "/attachment/b299aa60669ad44fccd7f456600bb5de2ea1639f.pdf", "author": "Congyue Deng;Jiahui Lei;Bokui Shen;Kostas Daniilidis;Leonidas Guibas", "authorids": "~Congyue_Deng1;~Jiahui_Lei1;~Bokui_Shen1;~Kostas_Daniilidis1;~Leonidas_Guibas1", "gender": "F;M;M;M;M", "homepage": "https://cs.stanford.edu/~congyue/;https://www.cis.upenn.edu/~leijh/;http://www.cis.upenn.edu/~kostas;http://geometry.stanford.edu/;https://cs.stanford.edu/~bshen88", "dblp": "267/5521;252/0106.html;d/KostasDaniilidis;g/LeonidasJGuibas;280/3036", "google_scholar": "XJZ8UBcAAAAJ;rX6SwdIAAAAJ;dGs2BcIAAAAJ;https://scholar.google.com.tw/citations?user=5JlEyTAAAAAJ;mOMChFIAAAAJ", "orcid": ";;0000-0003-0498-0758;;0000-0002-8183-3607", "linkedin": ";;;;", "or_profile": "~Congyue_Deng1;~Jiahui_Lei1;~Kostas_Daniilidis1;~Leonidas_Guibas1;~William_B._Shen1", "aff": "Stanford University;University of Pennsylvania;University of Pennsylvania;Stanford University;NVIDIA", "aff_domain": "stanford.edu;cis.upenn.edu;upenn.edu;stanford.edu;nvidia.com", "position": "PhD student;PhD student;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\ndeng2023banana,\ntitle={Banana: Banach Fixed-Point Network for Pointcloud Segmentation with Inter-Part Equivariance},\nauthor={Congyue Deng and Jiahui Lei and Bokui Shen and Kostas Daniilidis and Leonidas Guibas},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1IOU2329Za}\n}", "github": "", "project": "", "reviewers": "T7gc;iMm7;4UMu;DYXN;tamA", "pdf_size": 4732010, "rating": "6;6;7;7;8", "confidence": "4;3;4;1;4", "soundness": "3;3;4;3;4", "novelty": "3;3;3;2;4", "presentation": "4;3;4;3;4", "wc_summary": "118;50;169;43;87", "wc_strengths": "127;62;55;16;27", "wc_weaknesses": "215;194;142;16;199", "wc_questions": "78;4;307;15;12", "wc_limitations": "7;5;10;1;4", "wc_review": "545;315;683;91;329", "wc_reply_reviewers": "223;61;43;13;15", "wc_reply_authors": "20;88;13;9;4", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 93.4, 46.443944707571944 ], "wc_strengths_avg": [ 57.4, 38.75358047974406 ], "wc_weaknesses_avg": [ 153.2, 72.8433936606471 ], "wc_questions_avg": [ 83.2, 114.9841728239152 ], "wc_limitations_avg": [ 5.4, 3.0066592756745814 ], "wc_review_avg": [ 392.6, 204.24847612650626 ], "wc_reply_reviewers_avg": [ 71.0, 78.08713082192226 ], "wc_reply_authors_avg": [ 26.8, 31.044484212175277 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.04583492485141061, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12026352217899294886&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "stanford.edu;cis.upenn.edu;upenn.edu;stanford.edu;nvidia.com", "author_num": 5, "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "Stanford University;University of Pennsylvania;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.stanford.edu;https://www.upenn.edu;https://www.nvidia.com", "aff_unique_abbr": "Stanford;UPenn;NVIDIA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Blurred-Dilated Method for Adversarial Attacks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73022", "id": "1JlAV2paGu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b6fa3ed9624c184bd73e435123bd576a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1JlAV2paGu", "openreview": "https://openreview.net/forum?id=1JlAV2paGu", "poster": "/media/PosterPDFs/NeurIPS%202023/73022.png?t=1701848400.0266314", "slides": "https://nips.cc/virtual/2023/poster/73022", "video": "https://nips.cc/virtual/2023/poster/73022", "author_site": "Yang Deng, Weibin Wu, Jianping Zhang, Zibin Zheng", "tldr": "", "abstract": "Deep neural networks (DNNs) are vulnerable to adversarial attacks, which lead to incorrect predictions. In black-box settings, transfer attacks can be conveniently used to generate adversarial examples. However, such examples tend to overfit the specific architecture and feature representations of the source model, resulting in poor attack performance against other target models. To overcome this drawback, we propose a novel model modification-based transfer attack: Blurred-Dilated method (BD) in this paper. In summary, BD works by reducing downsampling while introducing BlurPool and dilated convolutions in the source model. Then BD employs the modified source model to generate adversarial samples. We think that BD can more comprehensively preserve the feature information than the original source model. It thus enables more thorough destruction of the image features, which can improve the transferability of the generated adversarial samples. Extensive experiments on the ImageNet dataset show that adversarial examples generated by BD achieve significantly higher transferability than the state-of-the-art baselines. Besides, BD can be conveniently combined with existing black-box attack techniques to further improve their performance.", "keywords": "Transferable adversarial example", "primary_area": "", "supplementary_material": "/attachment/d1b13abba9e65d3ea0c2c91dfa0139838946a706.pdf", "author": "Yang Deng;Weibin Wu;Jianping Zhang;Zibin Zheng", "authorids": "~Yang_Deng5;~Weibin_Wu1;~Jianping_Zhang3;~Zibin_Zheng1", "gender": "M;;;M", "homepage": "https://github.com/YangDeng2002;;;https://www.zibinzheng.com/", "dblp": ";07/10638-2;;z/ZibinZheng", "google_scholar": ";https://scholar.google.com.hk/citations?user=6mtEjCEAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;WPC6ED4AAAAJ", "orcid": ";;;0000-0002-7878-4330", "linkedin": ";;;", "or_profile": "~Yang_Deng5;~Weibin_Wu1;~Jianping_Zhang3;~Zibin_Zheng1", "aff": "SUN YAT-SEN UNIVERSITY;Sun Yat-sen University;The Chinese University of Hong Kong;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;sysu.edu.cn;cuhk.edu.hk;sysu.edu.cn", "position": "Undergrad student;Assistant Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\ndeng2023blurreddilated,\ntitle={Blurred-Dilated Method for Adversarial Attacks},\nauthor={Yang Deng and Weibin Wu and Jianping Zhang and Zibin Zheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1JlAV2paGu}\n}", "github": "", "project": "", "reviewers": "MuF1;1sbs;hwet;eif6;VEyj", "pdf_size": 188226, "rating": "5;5;5;6;6", "confidence": "3;4;2;4;3", "soundness": "3;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;4", "wc_summary": "40;94;44;81;54", "wc_strengths": "29;96;41;83;76", "wc_weaknesses": "165;233;62;250;47", "wc_questions": "2;34;1;102;14", "wc_limitations": "2;41;1;32;10", "wc_review": "238;498;149;548;201", "wc_reply_reviewers": "108;81;0;171;151", "wc_reply_authors": "400;95;55;72;16", "reply_reviewers": "2;1;0;1;1", "reply_authors": "3;3;2;2;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 62.6, 21.237702323933256 ], "wc_strengths_avg": [ 65.0, 25.60468707092512 ], "wc_weaknesses_avg": [ 151.4, 84.21068815773921 ], "wc_questions_avg": [ 30.6, 37.62764940838054 ], "wc_limitations_avg": [ 17.2, 16.314410807626487 ], "wc_review_avg": [ 326.8, 163.43977484076512 ], "wc_reply_reviewers_avg": [ 102.2, 60.071290979968126 ], "wc_reply_authors_avg": [ 127.6, 138.62265327138994 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.32732683535398854, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4019955314901528580&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sysu.edu.cn;sysu.edu.cn;cuhk.edu.hk;sysu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Sun Yat-sen University;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.sysu.edu.cn;https://www.cuhk.edu.hk", "aff_unique_abbr": "SYSU;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Re-Think and Re-Design Graph Neural Networks in Spaces of Continuous Graph Diffusion Functionals", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73021", "id": "1M8nDkUU9b", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b9fd027eb16434174b8bb3d3b18110af-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1M8nDkUU9b", "openreview": "https://openreview.net/forum?id=1M8nDkUU9b", "poster": "/media/PosterPDFs/NeurIPS%202023/73021.png?t=1697320699.9114034", "slides": "https://nips.cc/virtual/2023/poster/73021", "video": "https://nips.cc/virtual/2023/poster/73021", "author_site": "Tingting Dan, Jiaqi Ding, Ziquan Wei, Shahar Kovalsky, Minjeong Kim, Won Hwa Kim, Guorong Wu", "tldr": "", "abstract": "Graphs are ubiquitous in various domains, such as social networks and biological systems. Despite the great successes of graph neural networks (GNNs) in modeling and analyzing complex graph data, the inductive bias of locality assumption, which involves exchanging information only within neighboring connected nodes, restricts GNNs in capturing long-range dependencies and global patterns in graphs. Inspired by the classic Brachistochrone problem, we seek how to devise a new inductive bias for cutting-edge graph application and present a general framework through the lens of variational analysis. The backbone of our framework is a two-way mapping between the discrete GNN model and continuous diffusion functional, which allows us to design application-specific objective function in the continuous domain and engineer discrete deep model with mathematical guarantees. First, we address over-smoothing in current GNNs. Specifically, our inference reveals that the existing layer-by-layer models of graph embedding learning are equivalent to a ${\\ell _2}$-norm integral functional of graph gradients, which is the underlying cause of the over-smoothing problem. Similar to edge-preserving filters in image denoising, we introduce the total variation (TV) to promote alignment of the graph diffusion pattern with the global information present in community topologies. On top of this, we devise a new selective mechanism for inductive bias that can be easily integrated into existing GNNs and effectively address the trade-off between model depth and over-smoothing. Second, we devise a novel generative adversarial network (GAN) to predict the spreading flows in the graph through a neural transport equation. To avoid the potential issue of vanishing flows, we tailor the objective function to minimize the transportation within each community while maximizing the inter-community flows. Our new GNN models achieve state-of-the-art (SOTA) performance on graph learning benchmarks such as Cora, Citeseer, and Pubmed.", "keywords": "graph neural networks (GNNs);total variation (TV);Euler\u2013Lagrange equation;calculus of variations;over-smoothing;min-max optimization", "primary_area": "", "supplementary_material": "/attachment/fcba5f4ef5a4e4a5e058763a3d85b48c7d050550.pdf", "author": "Tingting Dan;Jiaqi Ding;Ziquan Wei;Shahar Z Kovalsky;Minjeong Kim;Won Hwa Kim;Guorong Wu", "authorids": "~Tingting_Dan1;~Jiaqi_Ding1;~Ziquan_Wei1;~Shahar_Z_Kovalsky1;~Minjeong_Kim1;~Won_Hwa_Kim4;~Guorong_Wu1", "gender": "F;F;M;M;F;M;M", "homepage": "https://www.researchgate.net/profile/Tingting_Dan;;https://ziquanw.com/;https://shaharkov.github.io/;https://sites.google.com/view/minjeongkim;https://www.acmlab.org/;https://wwplato.github.io/", "dblp": "223/8556;253/0206;206/5669;47/6259;http://dblp.uni-trier.de/pers/hd/k/Kim:Minjeong;03/5225-1.html;12/10278", "google_scholar": "FMcmg0gAAAAJ;https://scholar.google.com/citations?hl=en;z1IYb2oAAAAJ;gz1zcR4AAAAJ;m26d_0cAAAAJ;XVsMB2kAAAAJ;aWPSHNwAAAAJ", "orcid": ";0009-0005-0131-4348;0000-0001-6553-4482;;;0000-0002-0550-6145;", "linkedin": ";;weiziquan142857/;;;;", "or_profile": "~Tingting_Dan1;~Jiaqi_Ding1;~Ziquan_Wei1;~Shahar_Z_Kovalsky1;~Minjeong_Kim1;~Guorong_Wu1;~Won_Hwa_Kim1", "aff": "South China University of Technology;Department of Computer Science, University of North Carolina at Chapel Hill;University of North Carolina at Chapel Hill;University of North Carolina at Chapel Hill;University of North Carolina, Greensboro;University of North Carolina, Chapel Hill;University of Texas, Arlington", "aff_domain": "scut.edu.cn;cs.unc.edu;unc.edu;unc.edu;uncg.edu;unc.edu;uta.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ndan2023rethink,\ntitle={Re-Think and Re-Design Graph Neural Networks in Spaces of Continuous Graph Diffusion Functionals},\nauthor={Tingting Dan and Jiaqi Ding and Ziquan Wei and Shahar Z Kovalsky and Minjeong Kim and Won Hwa Kim and Guorong Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1M8nDkUU9b}\n}", "github": "", "project": "", "reviewers": "VF1D;HWVs;paGB;sJUp;CRUA", "pdf_size": 1741725, "rating": "3;4;6;7;8", "confidence": "4;3;2;3;3", "soundness": "3;3;3;3;4", "novelty": "2;2;3;4;4", "presentation": "2;2;1;3;3", "wc_summary": "231;96;77;81;53", "wc_strengths": "106;62;79;101;58", "wc_weaknesses": "582;350;436;60;51", "wc_questions": "76;5;57;48;58", "wc_limitations": "108;5;50;10;1", "wc_review": "1103;518;699;300;221", "wc_reply_reviewers": "372;128;72;50;0", "wc_reply_authors": "903;342;114;11;0", "reply_reviewers": "1;1;2;1;0", "reply_authors": "3;2;3;2;1", "rating_avg": [ 5.6, 1.8547236990991407 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 107.6, 63.22531138713356 ], "wc_strengths_avg": [ 81.2, 19.58979326077741 ], "wc_weaknesses_avg": [ 295.8, 209.77740583771168 ], "wc_questions_avg": [ 48.8, 23.709913538433664 ], "wc_limitations_avg": [ 34.8, 40.58275495823319 ], "wc_review_avg": [ 568.2, 315.6323177369516 ], "wc_reply_reviewers_avg": [ 124.4, 130.44171112033146 ], "wc_reply_authors_avg": [ 274.0, 337.6714379393081 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5114957546028551, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11900982858925170461&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "scut.edu.cn;cs.unc.edu;unc.edu;unc.edu;uncg.edu;unc.edu;uta.edu", "author_num": 7, "aff_unique_index": "0;1;2;2;3;2;4", "aff_unique_norm": "South China University of Technology;University of North Carolina at Chapel Hill;University of North Carolina;University of North Carolina at Greensboro;University of Texas at Arlington", "aff_unique_dep": ";Department of Computer Science;;;", "aff_unique_url": "https://www.scut.edu.cn;https://www.unc.edu;https://www.unc.edu;https://www.uncg.edu;https://www.uta.edu", "aff_unique_abbr": "SCUT;UNC Chapel Hill;UNC;UNCG;UTA", "aff_campus_unique_index": "1;1;1;2;1;3", "aff_campus_unique": ";Chapel Hill;Greensboro;Arlington", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Budgeting Counterfactual for Offline RL", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73020", "id": "1MUxtSBUox", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/121db870b0470dd63bb5bc59c724275a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1MUxtSBUox", "openreview": "https://openreview.net/forum?id=1MUxtSBUox", "poster": "/media/PosterPDFs/NeurIPS%202023/73020.png?t=1702367979.9467692", "slides": "https://nips.cc/virtual/2023/poster/73020", "video": "https://nips.cc/virtual/2023/poster/73020", "author_site": "Yao Liu, Yao Liu, Pratik Chaudhari, Rasool Fakoor", "tldr": "", "abstract": "The main challenge of offline reinforcement learning, where data is limited, arises from a sequence of counterfactual reasoning dilemmas within the realm of potential actions: What if we were to choose a different course of action? These circumstances frequently give rise to extrapolation errors, which tend to accumulate exponentially with the problem horizon. Hence, it becomes crucial to acknowledge that not all decision steps are equally important to the final outcome, and to budget the number of counterfactual decisions a policy make in order to control the extrapolation. Contrary to existing approaches that use regularization on either the policy or value function, we propose an approach to explicitly bound the amount of out-of-distribution actions during training. Specifically, our method utilizes dynamic programming to decide where to extrapolate and where not to, with an upper bound on the decisions different from behavior policy. It balances between the potential for improvement from taking out-of-distribution actions and the risk of making errors due to extrapolation. Theoretically, we justify our method by the constrained optimality of the fixed point solution to our $Q$ updating rules. Empirically, we show that the overall performance of our method is better than the state-of-the-art offline RL methods on tasks in the widely-used D4RL benchmarks.", "keywords": "reinforcement learning;offline reinforcement learning;counterfactual reasoning", "primary_area": "", "supplementary_material": "", "author": "Yao Liu;Pratik Chaudhari;Rasool Fakoor", "authorids": "~Yao_Liu1;~Pratik_Chaudhari1;~Rasool_Fakoor1", "gender": "M;M;M", "homepage": "http://yao-liu.com/;https://pratikac.github.io/;http://rasoolfa.github.io", "dblp": "64/424-9.html;;123/2447", "google_scholar": "umAny5UAAAAJ;c_z5hWEAAAAJ;nVsOPtQAAAAJ", "orcid": ";;", "linkedin": ";pratik-chaudhari-59508765;rasool-fakoor-695b5845/", "or_profile": "~Yao_Liu1;~Pratik_Chaudhari1;~Rasool_Fakoor1", "aff": "Amazon;School of Engineering and Applied Science, University of Pennsylvania;Amazon Web Services", "aff_domain": "amazon.com;seas.upenn.edu;amazon.com", "position": "Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nliu2023budgeting,\ntitle={Budgeting Counterfactual for Offline {RL}},\nauthor={Yao Liu and Pratik Chaudhari and Rasool Fakoor},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1MUxtSBUox}\n}", "github": "", "project": "", "reviewers": "4X4f;bnLz;FoC4;wQce", "pdf_size": 2465474, "rating": "4;5;7;7", "confidence": "2;4;4;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;4", "wc_summary": "17;92;67;73", "wc_strengths": "11;54;94;78", "wc_weaknesses": "84;358;353;51", "wc_questions": "32;130;68;42", "wc_limitations": "50;18;35;2", "wc_review": "194;652;617;246", "wc_reply_reviewers": "0;552;400;0", "wc_reply_authors": "56;1426;1387;0", "reply_reviewers": "0;3;2;0", "reply_authors": "2;4;3;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 62.25, 27.707174161216802 ], "wc_strengths_avg": [ 59.25, 31.283981524096323 ], "wc_weaknesses_avg": [ 211.5, 144.48269792608386 ], "wc_questions_avg": [ 68.0, 38.13135192987524 ], "wc_limitations_avg": [ 26.25, 18.005207580030838 ], "wc_review_avg": [ 427.25, 208.43149354164308 ], "wc_reply_reviewers_avg": [ 238.0, 243.99180314100718 ], "wc_reply_authors_avg": [ 717.25, 689.6721594351914 ], "reply_reviewers_avg": [ 1.25, 1.299038105676658 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7777777777777777, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5308844464380415037&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "amazon.com;seas.upenn.edu;amazon.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Amazon;University of Pennsylvania", "aff_unique_dep": "Amazon.com, Inc.;School of Engineering and Applied Science", "aff_unique_url": "https://www.amazon.com;https://www.upenn.edu", "aff_unique_abbr": "Amazon;UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "1NY5i5fq5e", "title": "Optical Transformers", "track": "main", "status": "Reject", "tldr": "", "abstract": "The rapidly increasing size of deep-learning models has caused renewed and growing interest in alternatives to digital computers to dramatically reduce the energy cost of running state-of-the-art neural networks. Optical matrix-vector multipliers are best suited to performing computations with very large operands, which leads us to hypothesize that large Transformer models might achieve asymptotic energy advantages with optics over running digitally. To test this idea, we performed small-scale optical experiments with a prototype accelerator to demonstrate that Transformer operations can run on optical hardware despite noise and errors. Using experiment-calibrated simulations of our hardware, we studied the behavior of running Transformers optically, identifying scaling laws for model performance with respect to optical energy usage and estimating total system power consumption. We found that the optical energy per multiply-accumulate (MAC) scales as $\\frac{1}{d}$ where $d$ is the Transformer width, an asymptotic advantage over digital systems. Should well-engineered, large-scale optical hardware be developed, it might achieve a $100 \\times$ energy-efficiency advantage for running some of the largest current Transformer models, and if both the models and the optical hardware are scaled to the quadrillion-parameter regime, optical computers could have a $>8,000\\times$ energy-efficiency advantage over state-of-the-art digital-electronic processors (300 fJ/MAC). We discussed how these results motivate and inform the construction of future optical accelerators and optics-amenable deep-learning approaches. With assumptions about future improvements to electronics and Transformer quantization techniques (5\u00d7 cheaper memory access, double the digital\u2013analog conversion efficiency, and 4-bit precision), we estimated that optical computers' advantage against these digital processors could grow to $>100,000\\times$.", "keywords": "Optics;Transformers;Accelerator;Energy Efficiency;Power Consumption;LLM;Large Language Models;Hardware;Optical Neural Networks;Scaling;Scaling Laws;Quantization", "primary_area": "", "supplementary_material": "/attachment/8ca94f774c99bd313d11f39ad595ed462f1bc110.zip", "author": "Maxwell Anderson;Shi-Yuan Ma;Tianyu Wang;Logan Wright;Peter McMahon", "authorids": "~Maxwell_Anderson1;~Shi-Yuan_Ma1;~Tianyu_Wang9;lgw32@cornell.edu;~Peter_McMahon1", "gender": "M;;M;;Not Specified", "homepage": ";;https://scholar.google.com/citations?user=lqIvJCgAAAAJ&hl=en;;https://mcmahon.aep.cornell.edu/", "dblp": "325/5057;291/4258;35/8397;;94/8121", "google_scholar": ";;lqIvJCgAAAAJ;;MR_WZxkAAAAJ", "orcid": ";0000-0001-8299-6742;0000-0002-6087-6376;;", "linkedin": "maxwell-anderson/;;;;", "or_profile": "~Maxwell_Anderson1;~Shi-Yuan_Ma1;~Tianyu_Wang9;lgw32@cornell.edu;~Peter_McMahon1", "aff": "Cornell University;Cornell University;;;Cornell University", "aff_domain": "cornell.edu;cornell.edu;;;cornell.edu", "position": "PhD student;PhD student;;;Assistant Professor", "bibtex": "@misc{\nanderson2023optical,\ntitle={Optical Transformers},\nauthor={Maxwell Anderson and Shi-Yuan Ma and Tianyu Wang and Logan Wright and Peter McMahon},\nyear={2023},\nurl={https://openreview.net/forum?id=1NY5i5fq5e}\n}", "github": "", "project": "", "reviewers": "5GP2;F5Gn;DVPz;32hz", "site": "https://openreview.net/forum?id=1NY5i5fq5e", "pdf_size": 16788491, "rating": "4;5;6;8", "confidence": "5;4;3;3", "soundness": "3;3;3;3", "novelty": "1;3;3;3", "presentation": "3;2;3;4", "wc_summary": "55;42;166;136", "wc_strengths": "31;13;15;256", "wc_weaknesses": "510;145;43;191", "wc_questions": "8;26;360;269", "wc_limitations": "1;5;41;134", "wc_review": "605;231;625;986", "wc_reply_reviewers": "297;8;0;231", "wc_reply_authors": "765;0;0;244", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 99.75, 52.53748661670065 ], "wc_strengths_avg": [ 78.75, 102.57283997238255 ], "wc_weaknesses_avg": [ 222.25, 174.55282151830144 ], "wc_questions_avg": [ 165.75, 152.32264276856543 ], "wc_limitations_avg": [ 45.25, 53.555461906326606 ], "wc_review_avg": [ 611.75, 267.04622727160927 ], "wc_reply_reviewers_avg": [ 134.0, 132.10791043688488 ], "wc_reply_authors_avg": [ 252.25, 312.34626218349405 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8664002254439633, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6376587038386436089&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Diverse Community Data for Benchmarking Data Privacy Algorithms", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73719", "id": "1ODvxEwsGk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a15032f8199511ced4d7a8e2bbb487a5-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=1ODvxEwsGk", "openreview": "https://openreview.net/forum?id=1ODvxEwsGk", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73719", "video": "https://nips.cc/virtual/2023/poster/73719", "author_site": "Aniruddha Sen, Christine Task, Dhruv Kapur, Gary Howarth, Karan Bhagat", "tldr": "", "abstract": "The Collaborative Research Cycle (CRC) is a National Institute of Standards and Technology (NIST) benchmarking program intended to strengthen understanding of tabular data deidentification technologies. Deidentification algorithms are vulnerable to the same bias and privacy issues that impact other data analytics and machine learning applications, and it can even amplify those issues by contaminating downstream applications. This paper summarizes four CRC contributions: theoretical work on the relationship between diverse populations and challenges for equitable deidentification; public benchmark data focused on diverse populations and challenging features; a comprehensive open source suite of evaluation metrology for deidentified datasets; and an archive of more than 450 deidentified data samples from a broad range of techniques. The initial set of evaluation results demonstrate the value of the CRC tools for investigations in this field.", "keywords": "privacy;data deidentification;synthetic data;benchmarks;data evaluation", "primary_area": "", "supplementary_material": "/attachment/dd68624a126446b1e5a2defeaab7f2d6061cad19.pdf", "author": "Aniruddha Sen;Christine Task;Dhruv Kapur;Gary Stanley Howarth;Karan Bhagat", "authorids": "~Aniruddha_Sen1;~Christine_Task1;~Dhruv_Kapur1;~Gary_Stanley_Howarth1;~Karan_Bhagat1", "gender": ";F;M;M;M", "homepage": "https://aniruddhasen.github.io;https://Knexusresearch.com/privacy;;https://www.nist.gov/people/gary-howarth;", "dblp": "324/6704;65/2407;;;", "google_scholar": ";7dRs2SsAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": ";;;0000-0002-3587-0546;", "linkedin": "senaniruddha/;;dhruvk19/;gshowarth/;karanbhagat1/", "or_profile": "~Aniruddha_Sen1;~Christine_Task1;~Dhruv_Kapur1;~Gary_Stanley_Howarth1;~Karan_Bhagat1", "aff": "Department of Computer Science, University of Illinois at Urbana-Champaign;Knexus Research Corporation;Knexus Research Corperation;National Institute of Standards and Technology;", "aff_domain": "cs.illinois.edu;knexusresearch.com;knexusresearch.com;nist.gov;", "position": "Intern;Principal Researcher;Intern;Principal Researcher;", "bibtex": "@inproceedings{\nsen2023diverse,\ntitle={Diverse Community Data for Benchmarking Data Privacy Algorithms},\nauthor={Aniruddha Sen and Christine Task and Dhruv Kapur and Gary Stanley Howarth and Karan Bhagat},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=1ODvxEwsGk}\n}", "github": "", "project": "", "reviewers": "aPv5;efUU;wNKf;oxgo", "pdf_size": 2989680, "rating": "5;7;7;8", "confidence": "3;4;4;3", "wc_summary_and_contributions": "57;167;30;193", "wc_strengths": "62;28;82;186", "wc_improvement": "102;136;84;741", "wc_limitations": "12;46;52;83", "wc_correctness": "74;9;1;189", "wc_clarity": "49;94;8;5", "wc_relation_to_prior_work": "29;44;46;86", "wc_documentation": "4;2;1;33", "wc_additional_feedback": "1;1;1;1", "wc_review": "390;527;305;1517", "wc_reply_reviewers": "0;34;10;57", "wc_reply_authors": "785;1080;146;46", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 111.75, 69.52472581751042 ], "wc_strengths_avg": [ 89.5, 58.9639720507362 ], "wc_improvement_avg": [ 265.75, 275.0203401568691 ], "wc_limitations_avg": [ 48.25, 25.202926417382564 ], "wc_correctness_avg": [ 68.25, 75.2441858218959 ], "wc_clarity_avg": [ 39.0, 36.20082871979591 ], "wc_relation_to_prior_work_avg": [ 51.25, 21.111312133545844 ], "wc_documentation_avg": [ 10.0, 13.322912594474229 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 684.75, 486.9837651298039 ], "wc_reply_reviewers_avg": [ 25.25, 22.106277389013286 ], "wc_reply_authors_avg": [ 514.25, 432.5057080548186 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.2294157338705618, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11357306977919118123&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cs.illinois.edu;knexusresearch.com;knexusresearch.com;nist.gov;", "author_num": 5, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;Knexus Research Corporation;National Institute of Standards and Technology", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://illinois.edu;;https://www.nist.gov", "aff_unique_abbr": "UIUC;;NIST", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Gaussian Partial Information Decomposition: Bias Correction and Application to High-dimensional Data", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73019", "id": "1PnSOKQKvq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ec0bff8bf4b11e36f874790046dfdb65-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1PnSOKQKvq", "openreview": "https://openreview.net/forum?id=1PnSOKQKvq", "poster": "/media/PosterPDFs/NeurIPS%202023/73019.png?t=1702306488.6764288", "slides": "https://nips.cc/virtual/2023/poster/73019", "video": "https://nips.cc/virtual/2023/poster/73019", "author_site": "Praveen Venkatesh, Corbett Bennett, Sam Gale, Tamina Ramirez, Greggory Heller, Severine Durand, Shawn Olsen, Stefan Mihalas", "tldr": "", "abstract": "Recent advances in neuroscientific experimental techniques have enabled us to simultaneously record the activity of thousands of neurons across multiple brain regions. This has led to a growing need for computational tools capable of analyzing how task-relevant information is represented and communicated between several brain regions. Partial information decompositions (PIDs) have emerged as one such tool, quantifying how much unique, redundant and synergistic information two or more brain regions carry about a task-relevant message. However, computing PIDs is computationally challenging in practice, and statistical issues such as the bias and variance of estimates remain largely unexplored. In this paper, we propose a new method for efficiently computing and estimating a PID definition on multivariate Gaussian distributions. We show empirically that our method satisfies an intuitive additivity property, and recovers the ground truth in a battery of canonical examples, even at high dimensionality. We also propose and evaluate, for the first time, a method to correct the bias in PID estimates at finite sample sizes. Finally, we demonstrate that our Gaussian PID effectively characterizes inter-areal interactions in the mouse brain, revealing higher redundancy between visual areas when a stimulus is behaviorally relevant.", "keywords": "partial information decomposition;estimation;bias;inter-area interaction;neuroscience", "primary_area": "", "supplementary_material": "", "author": "Praveen Venkatesh;Corbett Bennett;Sam Gale;Tamina K. Ramirez;Greggory Heller;Severine Durand;Shawn R Olsen;Stefan Mihalas", "authorids": "~Praveen_Venkatesh1;corbettb@alleninstitute.org;samg@alleninstitute.org;tkr2116@cumc.columbia.edu;greggh@mit.edu;severined@alleninstitute.org;~Shawn_R_Olsen1;~Stefan_Mihalas1", "gender": "M;;;;;;;", "homepage": "https://praveenv253.github.io;;;;;;https://scholar.google.com/citations?user=huDkgmYAAAAJ&hl=en;https://alleninstitute.org/person/stefan-mihalas/", "dblp": "178/5175;;;;;;;90/7228", "google_scholar": "gQckdIMAAAAJ;;;;;;huDkgmYAAAAJ;hwK-jm4AAAAJ", "orcid": "0000-0003-0752-1506;;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Praveen_Venkatesh1;corbettb@alleninstitute.org;samg@alleninstitute.org;tkr2116@cumc.columbia.edu;greggh@mit.edu;severined@alleninstitute.org;~Shawn_R_Olsen1;~Stefan_Mihalas1", "aff": "University of Washington;;;;;;Allen Institute;Allen Institute", "aff_domain": "uw.edu;;;;;;alleninstitute.org;alleninstitute.org", "position": "Postdoc;;;;;;Associate Investigator;Full Professor", "bibtex": "@inproceedings{\nvenkatesh2023gaussian,\ntitle={Gaussian Partial Information Decomposition: Bias Correction and Application to High-dimensional Data},\nauthor={Praveen Venkatesh and Corbett Bennett and Sam Gale and Tamina K. Ramirez and Greggory Heller and Severine Durand and Shawn R Olsen and Stefan Mihalas},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1PnSOKQKvq}\n}", "github": "", "project": "", "reviewers": "DDWz;wULj;Tp5P;jMyH;YrjF", "pdf_size": 2056387, "rating": "6;6;6;7;8", "confidence": "3;3;4;3;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;2;3", "presentation": "3;3;3;4;4", "wc_summary": "156;99;78;153;38", "wc_strengths": "35;29;29;72;74", "wc_weaknesses": "150;96;362;53;24", "wc_questions": "33;59;21;166;26", "wc_limitations": "15;18;9;56;8", "wc_review": "389;301;499;500;170", "wc_reply_reviewers": "20;0;408;34;15", "wc_reply_authors": "128;129;1681;0;0", "reply_reviewers": "1;0;3;1;1", "reply_authors": "2;2;4;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 104.8, 45.07504853020127 ], "wc_strengths_avg": [ 47.8, 20.701690752206687 ], "wc_weaknesses_avg": [ 137.0, 120.24974012445931 ], "wc_questions_avg": [ 61.0, 54.10730080127819 ], "wc_limitations_avg": [ 21.2, 17.792133093027378 ], "wc_review_avg": [ 371.8, 125.41674529344158 ], "wc_reply_reviewers_avg": [ 95.4, 156.67750317132325 ], "wc_reply_authors_avg": [ 387.6, 649.2483654195828 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.375, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17336195640867935513&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "uw.edu;;;;;;alleninstitute.org;alleninstitute.org", "author_num": 8, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "UW;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Single-Stage Visual Query Localization in Egocentric Videos", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73018", "id": "1SAzP7W43j", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4bfe7af38d4e5cd85ae0da639a933652-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1SAzP7W43j", "openreview": "https://openreview.net/forum?id=1SAzP7W43j", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73018", "video": "https://nips.cc/virtual/2023/poster/73018", "author_site": "Hanwen Jiang, Santhosh Kumar Ramakrishnan, Kristen Grauman", "tldr": "", "abstract": "Visual Query Localization on long-form egocentric videos requires spatio-temporal search and localization of visually specified objects and is vital to build episodic memory systems. Prior work develops complex multi-stage pipelines that leverage well-established object detection and tracking methods to perform VQL. However, each stage is independently trained and the complexity of the pipeline results in slow inference speeds. We propose VQLoC, a novel single-stage VQL framework that is end-to-end trainable. Our key idea is to first build a holistic understanding of the query-video relationship and then perform spatio-temporal localization in a single shot manner. Specifically, we establish the query-video relationship by jointly considering query-to-frame correspondences between the query and each video frame and frame-to-frame correspondences between nearby video frames. Our experiments demonstrate that our approach outperforms prior VQL methods by $20$% accuracy while obtaining a $10\\times$ improvement in inference speed. VQLoC is also the top entry on the Ego4D VQ2D challenge leaderboard.", "keywords": "Visual Query Localization;Egocentric Video;Spatial-Temporal Correspondence;Episodic Memory", "primary_area": "", "supplementary_material": "", "author": "Hanwen Jiang;Santhosh Kumar Ramakrishnan;Kristen Grauman", "authorids": "~Hanwen_Jiang1;~Santhosh_Kumar_Ramakrishnan1;~Kristen_Grauman1", "gender": "M;M;F", "homepage": "https://hwjiang1510.github.io/;https://srama2512.github.io/;http://www.cs.utexas.edu/~grauman/", "dblp": "237/9854;199/1913;57/4553", "google_scholar": "https://scholar.google.com.sg/citations?user=HJHSuxUAAAAJ;zr9B1YgAAAAJ;Jp6Mz1sAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Hanwen_Jiang1;~Santhosh_Kumar_Ramakrishnan1;~Kristen_Grauman1", "aff": "University of Texas at Austin;University of Texas, Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;PhD student;Professor", "bibtex": "@inproceedings{\njiang2023singlestage,\ntitle={Single-Stage Visual Query Localization in Egocentric Videos},\nauthor={Hanwen Jiang and Santhosh Kumar Ramakrishnan and Kristen Grauman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1SAzP7W43j}\n}", "github": "", "project": "", "reviewers": "bR3g;8x9A;4CTN;YaTy;59Vz", "pdf_size": 1459655, "rating": "4;6;7;7;7", "confidence": "5;3;5;4;5", "soundness": "3;3;3;4;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;4;4", "wc_summary": "66;42;58;86;110", "wc_strengths": "66;37;78;184;151", "wc_weaknesses": "340;119;131;314;132", "wc_questions": "1;8;96;6;127", "wc_limitations": "1;9;43;5;8", "wc_review": "474;215;406;595;528", "wc_reply_reviewers": "13;23;60;38;25", "wc_reply_authors": "52;15;16;16;20", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;2;2;2;2", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 4.4, 0.7999999999999999 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 72.4, 23.54230235129946 ], "wc_strengths_avg": [ 103.2, 55.163031098734955 ], "wc_weaknesses_avg": [ 207.2, 98.2677973702474 ], "wc_questions_avg": [ 47.6, 53.13605179160379 ], "wc_limitations_avg": [ 13.2, 15.157836257197133 ], "wc_review_avg": [ 443.6, 130.10857004824854 ], "wc_reply_reviewers_avg": [ 31.8, 16.191355718407276 ], "wc_reply_authors_avg": [ 23.8, 14.204224723651764 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.08574929257125449, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16281076552944651333&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "utexas.edu;utexas.edu;utexas.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CommonScenes: Generating Commonsense 3D Indoor Scenes with Scene Graph Diffusion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73017", "id": "1SF2tiopYJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5fba70900a84a8fb755c48ba99420c95-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1SF2tiopYJ", "openreview": "https://openreview.net/forum?id=1SF2tiopYJ", "poster": "/media/PosterPDFs/NeurIPS%202023/73017.png?t=1702220165.114142", "slides": "https://nips.cc/virtual/2023/poster/73017", "video": "https://nips.cc/virtual/2023/poster/73017", "author_site": "Guangyao Zhai, Evin P\u0131nar \u00d6rnek, Shun-Cheng Wu, Yan Di, Federico Tombari, Nassir Navab, Benjamin Busam", "tldr": "", "abstract": "Controllable scene synthesis aims to create interactive environments for numerous industrial use cases. Scene graphs provide a highly suitable interface to facilitate these applications by abstracting the scene context in a compact manner. Existing methods, reliant on retrieval from extensive databases or pre-trained shape embeddings, often overlook scene-object and object-object relationships, leading to inconsistent results due to their limited generation capacity. To address this issue, we present CommonScenes, a fully generative model that converts scene graphs into corresponding controllable 3D scenes, which are semantically realistic and conform to commonsense. Our pipeline consists of two branches, one predicting the overall scene layout via a variational auto-encoder and the other generating compatible shapes via latent diffusion, capturing global scene-object and local inter-object relationships in the scene graph while preserving shape diversity. The generated scenes can be manipulated by editing the input scene graph and sampling the noise in the diffusion model. Due to the lack of a scene graph dataset offering high-quality object-level meshes with relations, we also construct SG-FRONT, enriching the off-the-shelf indoor dataset 3D-FRONT with additional scene graph labels. Extensive experiments are conducted on SG-FRONT, where CommonScenes shows clear advantages over other methods regarding generation consistency, quality, and diversity. Codes and the dataset are available on the website.", "keywords": "Scene Graph;Scene Synthesis;Diffusion Model;Graph Convolution Network", "primary_area": "", "supplementary_material": "/attachment/2d2c400134356f04e2d5fec7ed265be857874cf6.pdf", "author": "Guangyao Zhai;Evin Pinar \u00d6rnek;Shun-Cheng Wu;Yan Di;Federico Tombari;Nassir Navab;Benjamin Busam", "authorids": "~Guangyao_Zhai1;~Evin_Pinar_\u00d6rnek1;~Shun-Cheng_Wu1;~Yan_Di2;~Federico_Tombari1;~Nassir_Navab1;~Benjamin_Busam2", "gender": "M;F;M;M;M;M;", "homepage": "https://ymxlzgy.com/;https://evinpinar.github.io/;http://campar.in.tum.de/Main/ShunChengWu;;https://federicotombari.github.io/;https://www.cs.cit.tum.de/camp/members/cv-nassir-navab/nassir-navab/;http://campar.in.tum.de/view/Main/BenjaminBusam", "dblp": "243/2753;217/1758;;274/9623;16/3539;n/NassirNavab.html;168/5446", "google_scholar": "X_djKCUAAAAJ;OCAKQzcAAAAJ;qEo9eiMAAAAJ;HSlGGvwAAAAJ;TFsE4BIAAAAJ;https://scholar.google.com.tw/citations?user=kzoVUPYAAAAJ;https://scholar.google.de/citations?user=u4rJZwUAAAAJ", "orcid": "0000-0002-6702-8302;0000-0003-1023-2852;0000-0002-6498-1579;0000-0003-0671-8323;0000-0001-5598-5212;;0000-0002-0620-5774", "linkedin": "guangyao-zhai-1525a6b7/;;shun-cheng-wu-58a14b119/;;fedet/;;benjaminbusam", "or_profile": "~Guangyao_Zhai1;~Evin_Pinar_\u00d6rnek1;~Shun-Cheng_Wu1;~Yan_Di2;~Federico_Tombari1;~Nassir_Navab1;~Benjamin_Busam2", "aff": "Technische Universit\u00e4t M\u00fcnchen;Meta;Technical University Munich;Technische Universit\u00e4t M\u00fcnchen;Technical University Munich (TUM);Technical University of Munich;Technical University Munich", "aff_domain": "tum.de;meta.com;tum.de;tum.de;in.tum.de;tum.de;tum.de", "position": "PhD student;Intern;PhD student;PhD student;Lecturer;Full Professor;Researcher", "bibtex": "@inproceedings{\nzhai2023commonscenes,\ntitle={CommonScenes: Generating Commonsense 3D Indoor Scenes with Scene Graphs},\nauthor={Guangyao Zhai and Evin Pinar {\\\"O}rnek and Shun-Cheng Wu and Yan Di and Federico Tombari and Nassir Navab and Benjamin Busam},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1SF2tiopYJ}\n}", "github": "", "project": "", "reviewers": "Wi3K;NT6V;ANPx;zttF", "pdf_size": 6599394, "rating": "4;6;6;7", "confidence": "4;3;5;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;2", "wc_summary": "291;93;329;118", "wc_strengths": "83;56;226;35", "wc_weaknesses": "572;145;244;161", "wc_questions": "6;31;276;148", "wc_limitations": "63;27;48;32", "wc_review": "1015;352;1123;494", "wc_reply_reviewers": "596;34;88;21", "wc_reply_authors": "763;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 207.75, 103.50694421148756 ], "wc_strengths_avg": [ 100.0, 74.70943715488693 ], "wc_weaknesses_avg": [ 280.5, 172.44201924125105 ], "wc_questions_avg": [ 115.25, 107.17596512278301 ], "wc_limitations_avg": [ 42.5, 14.150971698084906 ], "wc_review_avg": [ 746.0, 329.10104831191285 ], "wc_reply_reviewers_avg": [ 184.75, 238.76073274305386 ], "wc_reply_authors_avg": [ 190.75, 330.38869154376334 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3458572319330373, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9742069538247938173&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "tum.de;meta.com;tum.de;tum.de;in.tum.de;tum.de;tum.de", "author_num": 7, "aff_unique_index": "0;1;2;0;3;2;2", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Meta;Technical University of Munich;Technical University Munich", "aff_unique_dep": ";Meta Platforms, Inc.;;", "aff_unique_url": "https://www.tum.de;https://meta.com;https://www.tum.de;https://www.tum.de", "aff_unique_abbr": "TUM;Meta;TUM;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "Germany;United States" }, { "title": "Adaptive Topological Feature via Persistent Homology: Filtration Learning for Point Clouds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73016", "id": "1TJaITmK2Q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1d49235669869ab737c1da9d64b7c769-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1TJaITmK2Q", "openreview": "https://openreview.net/forum?id=1TJaITmK2Q", "poster": "/media/PosterPDFs/NeurIPS%202023/73016.png?t=1701666124.9437394", "slides": "https://nips.cc/virtual/2023/poster/73016", "video": "https://nips.cc/virtual/2023/poster/73016", "author_site": "Naoki Nishikawa, Yuichi Ike, Kenji Yamanishi", "tldr": "", "abstract": "Machine learning for point clouds has been attracting much attention, with many applications in various fields, such as shape recognition and material science. For enhancing the accuracy of such machine learning methods, it is often effective to incorporate global topological features, which are typically extracted by persistent homology. In the calculation of persistent homology for a point cloud, we choose a filtration for the point cloud, an increasing sequence of spaces. Since the performance of machine learning methods combined with persistent homology is highly affected by the choice of a filtration, we need to tune it depending on data and tasks. In this paper, we propose a framework that learns a filtration adaptively with the use of neural networks. In order to make the resulting persistent homology isometry-invariant, we develop a neural network architecture with such invariance. Additionally, we show a theoretical result on a finite-dimensional approximation of filtration functions, which justifies the proposed network architecture. Experimental results demonstrated the efficacy of our framework in several classification tasks.", "keywords": "point cloud;persistence homology;isometry-invariant networks;filtration learning", "primary_area": "", "supplementary_material": "/attachment/68619104dda8b6f8ed41cf8a272a3998427d7298.zip", "author": "Naoki Nishikawa;Yuichi Ike;Kenji Yamanishi", "authorids": "~Naoki_Nishikawa1;~Yuichi_Ike1;~Kenji_Yamanishi1", "gender": "M;M;M", "homepage": "https://sites.google.com/view/n-nishikawa;https://sites.google.com/view/yuichi-ike;http://www.ibis.t.u-tokyo.ac.jp/yamanishi/index_e.html", "dblp": "24/2962.html;https://dblp.uni-trier.de/pid/230/3805;88/5479", "google_scholar": "https://scholar.google.co.jp/citations?user=JGwf2FAAAAAJ;https://scholar.google.com/citations?hl=ja;https://scholar.google.co.jp/citations?user=hSXFw2sAAAAJ", "orcid": ";0000-0002-8907-8319;", "linkedin": ";yuichi-ike-a74305169/;", "or_profile": "~Naoki_Nishikawa1;~Yuichi_Ike1;~Kenji_Yamanishi1", "aff": "Graduate School of Information Science and Technology, The University of Tokyo;The University of Tokyo;The University of Tokyo", "aff_domain": "g.ecc.u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp", "position": "MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nnishikawa2023adaptive,\ntitle={Adaptive Topological Feature via Persistent Homology: Filtration Learning for Point Clouds},\nauthor={Naoki Nishikawa and Yuichi Ike and Kenji Yamanishi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1TJaITmK2Q}\n}", "github": "", "project": "", "reviewers": "k7w9;kFjr;xfZT;L8GX;q2Rp", "pdf_size": 5499471, "rating": "4;5;5;6;7", "confidence": "4;3;3;3;4", "soundness": "3;2;2;2;3", "novelty": "2;3;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "29;76;95;26;63", "wc_strengths": "30;142;65;36;24", "wc_weaknesses": "324;293;33;326;20", "wc_questions": "137;251;94;495;598", "wc_limitations": "220;30;16;36;17", "wc_review": "740;792;303;919;722", "wc_reply_reviewers": "1202;63;0;482;9", "wc_reply_authors": "1627;14;0;725;0", "reply_reviewers": "3;1;0;1;1", "reply_authors": "5;2;1;2;1", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 57.8, 26.7686383665662 ], "wc_strengths_avg": [ 59.4, 43.63301502303044 ], "wc_weaknesses_avg": [ 199.2, 141.55338215669735 ], "wc_questions_avg": [ 315.0, 198.5497418784522 ], "wc_limitations_avg": [ 63.8, 78.47139606251439 ], "wc_review_avg": [ 695.2, 207.84744405452764 ], "wc_reply_reviewers_avg": [ 351.2, 461.4045513429619 ], "wc_reply_authors_avg": [ 473.2, 640.8367654871246 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.2, 1.469693845669907 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.08006407690254366, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5412423267676507979&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "g.ecc.u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "Graduate School of Information Science and Technology", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "0", "aff_campus_unique": "Tokyo;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "title": "Robust Knowledge Transfer in Tiered Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73015", "id": "1WMdoiVMov", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a39ab46bf619ada0e90ceed846648a81-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1WMdoiVMov", "openreview": "https://openreview.net/forum?id=1WMdoiVMov", "poster": "/media/PosterPDFs/NeurIPS%202023/73015.png?t=1701879575.9163074", "slides": "https://nips.cc/virtual/2023/poster/73015", "video": "https://nips.cc/virtual/2023/poster/73015", "author_site": "Jiawei Huang, Niao He", "tldr": "", "abstract": "In this paper, we study the Tiered Reinforcement Learning setting, a parallel transfer learning framework, where the goal is to transfer knowledge from the low-tier (source) task to the high-tier (target) task to reduce the exploration risk of the latter while solving the two tasks in parallel. Unlike previous work, we do not assume the low-tier and high-tier tasks share the same dynamics or reward functions, and focus on robust knowledge transfer without prior knowledge on the task similarity. We identify a natural and necessary condition called the ``Optimal Value Dominance'' for our objective. Under this condition, we propose novel online learning algorithms such that, for the high-tier task, it can achieve constant regret on partial states depending on the task similarity and retain near-optimal regret when the two tasks are dissimilar, while for the low-tier task, it can keep near-optimal without making sacrifice. Moreover, we further study the setting with multiple low-tier tasks, and propose a novel transfer source selection mechanism, which can ensemble the information from all low-tier tasks and allow provable benefits on a much larger state-action space.", "keywords": "Reinforcement Learning Theory;Transfer RL;Tiered RL", "primary_area": "", "supplementary_material": "/attachment/9f3a644062432d1e45285cd5eacd114682fe2b10.pdf", "author": "Jiawei Huang;Niao He", "authorids": "~Jiawei_Huang3;~Niao_He3", "gender": ";", "homepage": "https://jiaweihhuang.github.io;http://people.inf.ethz.ch/niaohe", "dblp": "13/4208;https://dblp.uni-trier.de/pers/h/He:Niao.html", "google_scholar": "6IcfJiIAAAAJ;iNcA81MAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Jiawei_Huang3;~Niao_He1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "inf.ethz.ch;ethz.ch", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhuang2023robust,\ntitle={Robust Knowledge Transfer in Tiered Reinforcement Learning},\nauthor={Jiawei Huang and Niao He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1WMdoiVMov}\n}", "github": "", "project": "", "reviewers": "MBdo;BLAr;9F2D;G1Lf;5dMC", "pdf_size": 477013, "rating": "5;5;5;6;7", "confidence": "1;3;4;3;3", "soundness": "3;2;2;3;4", "novelty": "2;3;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "42;171;97;180;98", "wc_strengths": "70;15;149;174;54", "wc_weaknesses": "215;30;196;155;18", "wc_questions": "67;24;19;47;26", "wc_limitations": "5;24;19;46;6", "wc_review": "399;264;480;602;202", "wc_reply_reviewers": "10;67;7;176;0", "wc_reply_authors": "0;0;39;19;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;2;2;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 117.6, 51.51543458032747 ], "wc_strengths_avg": [ 92.4, 59.7146548177246 ], "wc_weaknesses_avg": [ 122.8, 83.05516239223184 ], "wc_questions_avg": [ 36.6, 17.962182495454165 ], "wc_limitations_avg": [ 20.0, 14.926486525636232 ], "wc_review_avg": [ 389.4, 144.4598214037384 ], "wc_reply_reviewers_avg": [ 52.0, 66.47405508918499 ], "wc_reply_authors_avg": [ 11.6, 15.551205741035002 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.15309310892394865, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15610842243859857548&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "inf.ethz.ch;ethz.ch", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Tanh Works Better with Asymmetry", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73014", "id": "1WpmOipyYI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/298281b9e89197195eb461e68ad20136-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1WpmOipyYI", "openreview": "https://openreview.net/forum?id=1WpmOipyYI", "poster": "/media/PosterPDFs/NeurIPS%202023/73014.png?t=1701928316.9772823", "slides": "https://nips.cc/virtual/2023/poster/73014", "video": "https://nips.cc/virtual/2023/poster/73014", "author_site": "Dongjin Kim, Woojeong Kim, Suhyun Kim", "tldr": "", "abstract": "Batch Normalization is commonly located in front of activation functions, as proposed by the original paper. Swapping the order, i.e., using Batch Normalization after activation functions, has also been attempted, but its performance is generally not much different from the conventional order when ReLU or a similar activation function is used. However, in the case of bounded activation functions like Tanh, we discovered that the swapped order achieves considerably better performance than the conventional order on various benchmarks and architectures. This paper reports this remarkable phenomenon and closely examines what contributes to this performance improvement. By looking at the output distributions of individual activation functions, not the whole layers, we found that many of them are asymmetrically saturated. The experiments designed to induce a different degree of asymmetric saturation support the hypothesis that asymmetric saturation helps improve performance. In addition, Batch Normalization after bounded activation functions relocates the asymmetrically saturated output of activation functions near zero, enabling the swapped model to have high sparsity, further improving performance. Extensive experiments with Tanh, LeCun Tanh, and Softsign show that the swapped models achieve improved performance with a high degree of asymmetric saturation. Finally, based on this investigation, we test a Tanh function shifted to be asymmetric. This shifted Tanh function that is manipulated to have consistent asymmetry shows even higher accuracy than the original Tanh used in the swapped order, confirming the asymmetry's importance. The code is available at https://github.com/hipros/tanh_works_better_with_asymmetry.", "keywords": "Batch Normalization;Activation Functions;Saturation;Sparsity", "primary_area": "", "supplementary_material": "/attachment/bea361f0264ec774be89134278dfb467147c4e37.zip", "author": "Dongjin Kim;Woojeong Kim;Suhyun Kim", "authorids": "~Dongjin_Kim1;~Woojeong_Kim1;~Suhyun_Kim1", "gender": "M;F;", "homepage": ";https://sites.google.com/view/woojeongkim/;https://kdst.tistory.com/", "dblp": ";243/0064;45/6898-1", "google_scholar": ";fGCEQQgAAAAJ;", "orcid": ";;", "linkedin": "%EB%8F%99%EC%A7%84-%EA%B9%80-ba872923a/;woojeong-kim-072ab4160/;", "or_profile": "~Dongjin_Kim1;~Woojeong_Kim1;~Suhyun_Kim1", "aff": "Korea University;Cornell University;Korea Institute of Science and Technology", "aff_domain": "korea.ac.kr;cornell.edu;kist.re.kr", "position": "MS student;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nkim2023tanh,\ntitle={Tanh Works Better with Asymmetry},\nauthor={Dongjin Kim and Woojeong Kim and Suhyun Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1WpmOipyYI}\n}", "github": "", "project": "", "reviewers": "hcmY;s3KQ;AMuf;yn6T", "pdf_size": 681345, "rating": "4;6;6;7", "confidence": "4;3;4;3", "soundness": "2;3;3;2", "novelty": "1;2;2;3", "presentation": "2;4;3;3", "wc_summary": "83;86;72;97", "wc_strengths": "37;185;74;56", "wc_weaknesses": "166;309;52;69", "wc_questions": "37;2;2;34", "wc_limitations": "9;1;1;4", "wc_review": "332;583;201;260", "wc_reply_reviewers": "172;33;15;0", "wc_reply_authors": "215;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 84.5, 8.902246907382429 ], "wc_strengths_avg": [ 88.0, 57.510868538042445 ], "wc_weaknesses_avg": [ 149.0, 102.10044074341697 ], "wc_questions_avg": [ 18.75, 16.78354849249705 ], "wc_limitations_avg": [ 3.75, 3.2691742076555053 ], "wc_review_avg": [ 344.0, 145.57644040159795 ], "wc_reply_reviewers_avg": [ 55.0, 68.55289928223313 ], "wc_reply_authors_avg": [ 53.75, 93.09773090682715 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1747430527777967223&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "korea.ac.kr;cornell.edu;kist.re.kr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Korea University;Cornell University;Korea Institute of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.korea.ac.kr;https://www.cornell.edu;https://www.kist.re.kr", "aff_unique_abbr": "KU;Cornell;KIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Langevin Quasi-Monte Carlo", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73013", "id": "1YEF6TA8Di", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ee56aa4fe26a189782f507d843fd5272-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1YEF6TA8Di", "openreview": "https://openreview.net/forum?id=1YEF6TA8Di", "poster": "/media/PosterPDFs/NeurIPS%202023/73013.png?t=1698513615.930798", "slides": "https://nips.cc/virtual/2023/poster/73013", "video": "https://nips.cc/virtual/2023/poster/73013", "tldr": "", "abstract": "Langevin Monte Carlo (LMC) and its stochastic gradient versions are powerful algorithms for sampling from complex high-dimensional distributions. To sample from a distribution with density $\\pi(\\theta)\\propto \\exp(-U(\\theta)) $, LMC iteratively generates the next sample by taking a step in the gradient direction $\\nabla U$ with added Gaussian perturbations. Expectations w.r.t. the target distribution $\\pi$ are estimated by averaging over LMC samples. In ordinary Monte Carlo, it is well known that the estimation error can be substantially reduced by replacing independent random samples by quasi-random samples like low-discrepancy sequences. In this work, we show that the estimation error of LMC can also be reduced by using quasi-random samples. Specifically, we propose to use completely uniformly distributed (CUD) sequences with certain low-discrepancy property to generate the Gaussian perturbations. Under smoothness and convexity conditions, we prove that LMC with a low-discrepancy CUD sequence achieves smaller error than standard LMC. The theoretical analysis is supported by compelling numerical experiments, which demonstrate the effectiveness of our approach.", "keywords": "Completely uniformly distributed; log-concave sampling; low-discrepancy; MCMC;", "primary_area": "", "supplementary_material": "/attachment/e23a46a2c92432af1c84dc0aea26e5effdbb5293.zip", "author": "Sifan Liu", "authorids": "~Sifan_Liu1", "gender": "F", "homepage": "https://liusf15.github.io", "dblp": "222/1922", "google_scholar": "K-fIESYAAAAJ", "orcid": "0000-0002-1608-4216", "linkedin": "", "or_profile": "~Sifan_Liu1", "aff": "Stanford University", "aff_domain": "stanford.edu", "position": "PhD student", "bibtex": "@inproceedings{\nliu2023langevin,\ntitle={Langevin Quasi-Monte Carlo},\nauthor={Sifan Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1YEF6TA8Di}\n}", "github": "", "project": "", "reviewers": "x8C1;pgYw;fUNL;amBd", "pdf_size": 424597, "rating": "6;7;7;7", "confidence": "3;4;3;3", "soundness": "4;4;4;3", "novelty": "3;3;3;3", "presentation": "4;4;4;3", "wc_summary": "49;65;103;96", "wc_strengths": "111;46;97;156", "wc_weaknesses": "119;47;95;145", "wc_questions": "100;77;105;80", "wc_limitations": "8;4;16;63", "wc_review": "387;239;416;540", "wc_reply_reviewers": "40;38;14;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 78.25, 22.128883839904805 ], "wc_strengths_avg": [ 102.5, 39.23327669211431 ], "wc_weaknesses_avg": [ 101.5, 36.093628246547894 ], "wc_questions_avg": [ 90.5, 12.175795661885921 ], "wc_limitations_avg": [ 22.75, 23.636571240347024 ], "wc_review_avg": [ 395.5, 107.08057713703265 ], "wc_reply_reviewers_avg": [ 27.0, 12.041594578792296 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12815662783782185762&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 8, "email": "stanford.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Convolutional State Space Models for Long-Range Spatiotemporal Modeling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73012", "id": "1ZvEtnrHS1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ff9783ec29688387d44779d67d06ef66-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1ZvEtnrHS1", "openreview": "https://openreview.net/forum?id=1ZvEtnrHS1", "poster": "/media/PosterPDFs/NeurIPS%202023/73012.png?t=1701985581.6193774", "slides": "https://nips.cc/virtual/2023/poster/73012", "video": "https://nips.cc/virtual/2023/poster/73012", "author_site": "Jimmy Smith, Shalini De Mello, Jan Kautz, Scott Linderman, Wonmin Byeon", "tldr": "", "abstract": "Effectively modeling long spatiotemporal sequences is challenging due to the need to model complex spatial correlations and long-range temporal dependencies simultaneously. ConvLSTMs attempt to address this by updating tensor-valued states with recurrent neural networks, but their sequential computation makes them slow to train. In contrast, Transformers can process an entire spatiotemporal sequence, compressed into tokens, in parallel. However, the cost of attention scales quadratically in length, limiting their scalability to longer sequences. Here, we address the challenges of prior methods and introduce convolutional state space models (ConvSSM) that combine the tensor modeling ideas of ConvLSTM with the long sequence modeling approaches of state space methods such as S4 and S5. First, we demonstrate how parallel scans can be applied to convolutional recurrences to achieve subquadratic parallelization and fast autoregressive generation. We then establish an equivalence between the dynamics of ConvSSMs and SSMs, which motivates parameterization and initialization strategies for modeling long-range dependencies. \nThe result is ConvS5, an efficient ConvSSM variant for long-range spatiotemporal modeling. ConvS5 significantly outperforms Transformers and ConvLSTM on a long horizon Moving-MNIST experiment \nwhile training $3\\times$ faster than ConvLSTM and generating samples \n$400\\times$ faster than Transformers. In addition, ConvS5 matches or exceeds the performance of state-of-the-art methods on challenging DMLab, Minecraft and Habitat prediction benchmarks and \nenables new directions for modeling long spatiotemporal sequences.", "keywords": "spatiotemporal modeling;ConvLSTM;RNN;state spaces;SSM;S4;S5;long-range dependencies;video prediction", "primary_area": "", "supplementary_material": "/attachment/471ce13ae3003e9a8ce9693c01a596f83364461d.zip", "author": "Jimmy T.H. Smith;Shalini De Mello;Jan Kautz;Scott Linderman;Wonmin Byeon", "authorids": "~Jimmy_T.H._Smith1;~Shalini_De_Mello1;~Jan_Kautz1;~Scott_Linderman1;~Wonmin_Byeon1", "gender": "M;Not Specified;;;M", "homepage": "https://jimmysmith1919.github.io/;https://research.nvidia.com/person/shalini-de-mello;http://jankautz.com;https://wonmin-byeon.github.io/;https://web.stanford.edu/~swl1/", "dblp": "305/3641;206/7364;48/6214;40/10201;142/2484", "google_scholar": "GC9Vv1wAAAAJ;xQM4BlMAAAAJ;P9FclNEAAAAJ;0497CHoAAAAJ;6mD3I24AAAAJ", "orcid": "0000-0003-2016-2480;;;;", "linkedin": "jimmy-t-h-smith-1679b122/;shalini-de-mello-02b8251/;;;", "or_profile": "~Jimmy_T.H._Smith1;~Shalini_De_Mello1;~Jan_Kautz1;~Wonmin_Byeon1;~Scott_W_Linderman1", "aff": "NVIDIA;NVIDIA;NVIDIA;NVIDIA;Stanford University", "aff_domain": "nvidia.com;nvidia.com;nvidia.com;nvidia.com;stanford.edu", "position": "Intern;Principal Researcher;VP Research;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nsmith2023convolutional,\ntitle={Convolutional State Space Models for Long-Range Spatiotemporal Modeling},\nauthor={Jimmy T.H. Smith and Shalini De Mello and Jan Kautz and Scott Linderman and Wonmin Byeon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1ZvEtnrHS1}\n}", "github": "", "project": "", "reviewers": "P1WH;rhqo;R5qx;Kz49;xT6A", "pdf_size": 4490668, "rating": "4;5;5;6;7", "confidence": "2;3;4;4;4", "soundness": "2;3;2;3;3", "novelty": "2;2;2;2;2", "presentation": "3;3;3;4;3", "wc_summary": "75;92;49;158;85", "wc_strengths": "23;43;29;118;72", "wc_weaknesses": "128;126;28;194;134", "wc_questions": "4;107;126;26;132", "wc_limitations": "18;1;1;1;5", "wc_review": "248;369;233;497;428", "wc_reply_reviewers": "37;56;22;0;352", "wc_reply_authors": "0;351;33;0;643", "reply_reviewers": "1;1;1;0;3", "reply_authors": "1;2;2;1;5", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 91.8, 36.17402382926179 ], "wc_strengths_avg": [ 57.0, 34.876926470089074 ], "wc_weaknesses_avg": [ 122.0, 53.321665390345785 ], "wc_questions_avg": [ 79.0, 53.35916041318491 ], "wc_limitations_avg": [ 5.2, 6.584831053261731 ], "wc_review_avg": [ 355.0, 102.00196076546764 ], "wc_reply_reviewers_avg": [ 93.4, 130.59494630344622 ], "wc_reply_authors_avg": [ 205.4, 255.65335906261822 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 2.2, 1.469693845669907 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7844645405527363, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13123592303208461342&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "nvidia.com;nvidia.com;nvidia.com;nvidia.com;stanford.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "NVIDIA;Stanford University", "aff_unique_dep": "NVIDIA Corporation;", "aff_unique_url": "https://www.nvidia.com;https://www.stanford.edu", "aff_unique_abbr": "NVIDIA;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unified Lower Bounds for Interactive High-dimensional Estimation under Information Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73011", "id": "1ZzG6td0el", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a07e87ecfa8a651d62257571669b0150-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1ZzG6td0el", "openreview": "https://openreview.net/forum?id=1ZzG6td0el", "poster": "/media/PosterPDFs/NeurIPS%202023/73011.png?t=1702243323.2257855", "slides": "https://nips.cc/virtual/2023/poster/73011", "video": "https://nips.cc/virtual/2023/poster/73011", "author_site": "Jayadev Acharya, Cl\u00e9ment L Canonne, Ziteng Sun, Himanshu Tyagi", "tldr": "", "abstract": "We consider distributed parameter estimation using interactive protocols subject to local information constraints such as bandwidth limitations, local differential privacy, and restricted measurements. We provide a unified framework enabling us to derive a variety of (tight) minimax lower bounds for different parametric families of distributions, both continuous and discrete, under any $\\ell_p$ loss. Our lower bound framework is versatile and yields \u201cplug-and-play\u201d bounds that are widely applicable to a large range of estimation problems, and, for the prototypical case of the Gaussian family, circumvents limitations of previous techniques. In particular, our approach recovers bounds obtained using data processing inequalities and Cram\u00e9r\u2013Rao bounds, two other alternative approaches for proving lower bounds in our setting of interest. Further, for the families considered, we complement our lower bounds with matching upper bounds.", "keywords": "statistical estimation; interactivity; local differential privacy; communication constraint", "primary_area": "", "supplementary_material": "/attachment/10b2069c16e21e25360e3d4b25639127e3cada4a.pdf", "author": "Jayadev Acharya;Clement Louis Canonne;Ziteng Sun;Himanshu Tyagi", "authorids": "~Jayadev_Acharya2;~Clement_Louis_Canonne1;~Ziteng_Sun1;~Himanshu_Tyagi1", "gender": "M;M;M;M", "homepage": "https://people.ece.cornell.edu/acharya/;https://ccanonne.github.io/;http://www.zitengsun.com/;", "dblp": "74/5865;28/9840L;194/8609;11/4803", "google_scholar": "70vJVxcAAAAJ;u_OXsBIAAAAJ;;", "orcid": ";0000-0001-7153-5211;;", "linkedin": ";;;", "or_profile": "~Jayadev_Acharya2;~Clement_Louis_Canonne1;~Ziteng_Sun1;~Himanshu_Tyagi1", "aff": "Cornell University;University of Sydney;Google;Indian Institute of Science", "aff_domain": "cornell.edu;sydney.edu.au;google.com;iisc.ac.in", "position": "Assistant Professor;Lecturer;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nacharya2023unified,\ntitle={Unified Lower Bounds for Interactive High-dimensional Estimation under Information Constraints},\nauthor={Jayadev Acharya and Clement Louis Canonne and Ziteng Sun and Himanshu Tyagi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1ZzG6td0el}\n}", "github": "", "project": "", "reviewers": "Zd1o;DETb;VD3C;7efN;D6mk", "pdf_size": 575682, "rating": "6;6;7;7;8", "confidence": "4;4;2;2;4", "soundness": "3;3;3;3;4", "novelty": "3;3;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "135;142;64;116;218", "wc_strengths": "99;27;47;26;113", "wc_weaknesses": "114;54;20;17;3", "wc_questions": "228;41;1;352;122", "wc_limitations": "14;2;1;1;22", "wc_review": "590;266;133;512;478", "wc_reply_reviewers": "16;16;0;0;8", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 135.0, 49.678969393496885 ], "wc_strengths_avg": [ 62.4, 36.64751014734835 ], "wc_weaknesses_avg": [ 41.6, 39.892856503389176 ], "wc_questions_avg": [ 148.8, 127.86461590291506 ], "wc_limitations_avg": [ 8.0, 8.555699854482976 ], "wc_review_avg": [ 395.8, 169.637731651894 ], "wc_reply_reviewers_avg": [ 8.0, 7.155417527999327 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.21821789023599233, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11667047825475879758&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cornell.edu;sydney.edu.au;google.com;iisc.ac.in", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Cornell University;University of Sydney;Google;Indian Institute of Science", "aff_unique_dep": ";;Google;", "aff_unique_url": "https://www.cornell.edu;https://www.sydney.edu.au;https://www.google.com;https://www.iisc.ac.in", "aff_unique_abbr": "Cornell;USYD;Google;IISc", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "United States;Australia;India" }, { "title": "Incentivized Communication for Federated Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73010", "id": "1aQivXgZKj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aa61d142c0081a8259a6372a3bb0af2b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1aQivXgZKj", "openreview": "https://openreview.net/forum?id=1aQivXgZKj", "poster": "/media/PosterPDFs/NeurIPS%202023/73010.png?t=1701723696.1220233", "slides": "https://nips.cc/virtual/2023/poster/73010", "video": "https://nips.cc/virtual/2023/poster/73010", "author_site": "Zhepei Wei, Chuanhao Li, Haifeng Xu, Hongning Wang", "tldr": "", "abstract": "Most existing works on federated bandits take it for granted that all clients are altruistic about sharing their data with the server for the collective good whenever needed. Despite their compelling theoretical guarantee on performance and communication efficiency, this assumption is overly idealistic and oftentimes violated in practice, especially when the algorithm is operated over self-interested clients, who are reluctant to share data without explicit benefits. Negligence of such self-interested behaviors can significantly affect the learning efficiency and even the practical operability of federated bandit learning. In light of this, we aim to spark new insights into this under-explored research area by formally introducing an incentivized communication problem for federated bandits, where the server shall motivate clients to share data by providing incentives. Without loss of generality, we instantiate this bandit problem with the contextual linear setting and propose the first incentivized communication protocol, namely, Inc-FedUCB, that achieves near-optimal regret with provable communication and incentive cost guarantees. Extensive empirical experiments on both synthetic and real-world datasets further validate the effectiveness of the proposed method across various environments.", "keywords": "contextual bandit;federated learning;incentive mechanism", "primary_area": "", "supplementary_material": "", "author": "Zhepei Wei;Chuanhao Li;Haifeng Xu;Hongning Wang", "authorids": "~Zhepei_Wei1;~Chuanhao_Li1;~Haifeng_Xu1;~Hongning_Wang1", "gender": "M;;M;M", "homepage": "https://weizhepei.com;https://cyrilli.github.io/;http://www.haifeng-xu.com/;http://www.cs.virginia.edu/~hw5x/", "dblp": "247/2560;195/9947;04/1895;05/6545", "google_scholar": "qiK3538AAAAJ;w2ShljkAAAAJ;nLgg388AAAAJ;qkdvKNoAAAAJ", "orcid": ";;;0000-0002-6524-9195", "linkedin": "weizhepei/;;;", "or_profile": "~Zhepei_Wei1;~Chuanhao_Li1;~Haifeng_Xu1;~Hongning_Wang1", "aff": "University of Virginia;University of Virginia;University of Chicago;University of Virginia", "aff_domain": "virginia.edu;virginia.edu;cs.uchicago.edu;virginia.edu", "position": "PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwei2023incentivized,\ntitle={Incentivized Communication for Federated Bandits},\nauthor={Zhepei Wei and Chuanhao Li and Haifeng Xu and Hongning Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1aQivXgZKj}\n}", "github": "", "project": "", "reviewers": "U6am;NbUe;rmwW;zWH8", "pdf_size": 1563303, "rating": "5;5;5;7", "confidence": "3;2;4;2", "soundness": "3;3;3;3", "novelty": "3;4;4;2", "presentation": "2;2;3;3", "wc_summary": "63;122;89;37", "wc_strengths": "44;71;55;28", "wc_weaknesses": "138;93;160;9", "wc_questions": "6;8;38;15", "wc_limitations": "1;9;31;14", "wc_review": "252;303;373;103", "wc_reply_reviewers": "9;15;12;12", "wc_reply_authors": "137;130;124;82", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 77.75, 31.47518864121389 ], "wc_strengths_avg": [ 49.5, 15.692354826475215 ], "wc_weaknesses_avg": [ 100.0, 57.82300580218915 ], "wc_questions_avg": [ 16.75, 12.71563997602952 ], "wc_limitations_avg": [ 13.75, 10.985786271359915 ], "wc_review_avg": [ 257.75, 99.13469372525444 ], "wc_reply_reviewers_avg": [ 12.0, 2.1213203435596424 ], "wc_reply_authors_avg": [ 118.25, 21.428660714099703 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17240518796929859841&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "virginia.edu;virginia.edu;cs.uchicago.edu;virginia.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Virginia;University of Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.virginia.edu;https://www.uchicago.edu", "aff_unique_abbr": "UVA;UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "EPIC Fields: Marrying 3D Geometry and Video Understanding", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73718", "id": "1agtIRxlCY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/543d4e171150cb931f1d401cacc3d7af-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=1agtIRxlCY", "openreview": "https://openreview.net/forum?id=1agtIRxlCY", "poster": "/media/PosterPDFs/NeurIPS%202023/73718.png?t=1699548027.36458", "slides": "https://nips.cc/virtual/2023/poster/73718", "video": "https://nips.cc/virtual/2023/poster/73718", "author_site": "Vadim Tschernezki, Ahmad Darkhalil, Zhifan Zhu, David Fouhey, Iro Laina, Diane Larlus, Dima Damen, Andrea Vedaldi", "tldr": "", "abstract": "Neural rendering is fuelling a unification of learning, 3D geometry and video understanding that has been waiting for more than two decades. Progress, however, is still hampered by a lack of suitable datasets and benchmarks. To address this gap, we introduce EPIC Fields, an augmentation of EPIC-KITCHENS with 3D camera information. Like other datasets for neural rendering, EPIC Fields removes the complex and expensive step of reconstructing cameras using photogrammetry, and allows researchers to focus on modelling problems. We illustrate the challenge of photogrammetry in egocentric videos of dynamic actions and propose innovations to address them. Compared to other neural rendering datasets, EPIC Fields is better tailored to video understanding because it is paired with labelled action segments and the recent VISOR segment annotations. To further motivate the community, we also evaluate two benchmark tasks in neural rendering and segmenting dynamic objects, with strong baselines that showcase what is not possible today. We also highlight the advantage of geometry in semi-supervised video object segmentations on the VISOR annotations. EPIC Fields reconstructs 96\\% of videos in EPIC-KITCHENS, registering 19M frames in 99 hours recorded in 45 kitchens, and is available from: http://epic-kitchens.github.io/epic-fields", "keywords": "Egocentric; Video; 3D", "primary_area": "", "supplementary_material": "/attachment/27da336705eb19c9635b2ee1c1c9ba96e5a269d4.pdf", "author": "Vadim Tschernezki;Ahmad Darkhalil;Zhifan Zhu;David Fouhey;Iro Laina;Diane Larlus;Dima Damen;Andrea Vedaldi", "authorids": "~Vadim_Tschernezki1;~Ahmad_Darkhalil1;~Zhifan_Zhu2;~David_Fouhey2;~Iro_Laina1;~Diane_Larlus1;~Dima_Damen1;~Andrea_Vedaldi1", "gender": "Not Specified;M;M;;F;F;M;", "homepage": "https://github.com/dichotomies;https://github.com/AhmadDarKhalil;https://zhifanzhu.github.io/;;https://dlarlus.github.io/;http://dimadamen.github.io/;https://www.robots.ox.ac.uk/~vedaldi/;", "dblp": "243/2959;330/3609;72/888-1;29/8613;48/4033;95/3618;99/2825;182/2070", "google_scholar": "-Udk-5IAAAAJ;hWn-cAUAAAAJ;9_7rBUIAAAAJ;FLcpd34AAAAJ;https://scholar.google.fr/citations?user=nI2oJqkAAAAJ;https://scholar.google.co.uk/citations?user=OxL9Wn8AAAAJ;bRT7t28AAAAJ;n9nXAPcAAAAJ", "orcid": ";0009-0003-3640-9481;0000-0002-0508-128X;;;0000-0001-8804-6238;0000-0003-1374-2858;0000-0001-8857-7709", "linkedin": ";ahmad-darkhalil-88b9b7108;;;;dimadamen;;", "or_profile": "~Vadim_Tschernezki1;~Ahmad_Darkhalil1;~Zhifan_Zhu2;~David_Fouhey2;~Diane_Larlus1;~Dima_Damen1;~Andrea_Vedaldi1;~Iro_Laina2", "aff": "University of Oxford;University of Bristol;University of Bristol;University of Michigan;NAVER LABS Europe;University of Bristol;Meta;University of Oxford", "aff_domain": "oxford.ac.uk;bristol.ac.uk;bris.ac.uk;umich.edu;naverlabs.com;bristol.ac.uk;meta.com;ox.ac.uk", "position": "PhD student;PhD student;PhD student;Assistant Professor;Principal Researcher;Full Professor;Researcher;Postdoc", "bibtex": "@inproceedings{\ntschernezki2023epic,\ntitle={{EPIC} Fields: Marrying 3D Geometry and Video Understanding},\nauthor={Vadim Tschernezki and Ahmad Darkhalil and Zhifan Zhu and David Fouhey and Iro Laina and Diane Larlus and Dima Damen and Andrea Vedaldi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=1agtIRxlCY}\n}", "github": "", "project": "", "reviewers": "Pv5q;FW6P;cueE;ZiM7", "pdf_size": 17929269, "rating": "6;7;7;7", "confidence": "4;4;5;4", "wc_summary_and_contributions": "46;68;60;178", "wc_strengths": "55;77;33;157", "wc_improvement": "122;37;320;80", "wc_limitations": "1;27;12;18", "wc_correctness": "1;4;257;64", "wc_clarity": "1;5;3;39", "wc_relation_to_prior_work": "1;10;2;22", "wc_documentation": "1;7;26;36", "wc_additional_feedback": "1;1;1;1", "wc_review": "229;236;714;595", "wc_reply_reviewers": "0;0;14;0", "wc_reply_authors": "428;318;1363;757", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 88.0, 52.55473337388365 ], "wc_strengths_avg": [ 80.5, 46.82680856090878 ], "wc_improvement_avg": [ 139.75, 108.31983890312983 ], "wc_limitations_avg": [ 14.5, 9.447221813845593 ], "wc_correctness_avg": [ 81.5, 104.39468377269026 ], "wc_clarity_avg": [ 12.0, 15.652475842498529 ], "wc_relation_to_prior_work_avg": [ 8.75, 8.407585860400118 ], "wc_documentation_avg": [ 17.5, 14.115594213493104 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 443.5, 215.16795765169124 ], "wc_reply_reviewers_avg": [ 3.5, 6.06217782649107 ], "wc_reply_authors_avg": [ 716.5, 406.7053601810529 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16929006648828329735&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "email": "oxford.ac.uk;bristol.ac.uk;bris.ac.uk;umich.edu;naverlabs.com;bristol.ac.uk;meta.com;ox.ac.uk", "author_num": 8, "aff_unique_index": "0;1;1;2;3;1;4;0", "aff_unique_norm": "University of Oxford;University of Bristol;University of Michigan;NAVER LABS;Meta", "aff_unique_dep": ";;;Europe;Meta Platforms, Inc.", "aff_unique_url": "https://www.ox.ac.uk;https://www.bristol.ac.uk;https://www.umich.edu;https://www.naverlabs.eu;https://meta.com", "aff_unique_abbr": "Oxford;Bristol;UM;NAVER LABS;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;2;0;1;0", "aff_country_unique": "United Kingdom;United States;France" }, { "title": "A Theoretical Analysis of Optimistic Proximal Policy Optimization in Linear Markov Decision Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73009", "id": "1bTG4sJ7tN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e9721921b799b6ea98d37f9e77f1a7fe-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1bTG4sJ7tN", "openreview": "https://openreview.net/forum?id=1bTG4sJ7tN", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73009", "video": "https://nips.cc/virtual/2023/poster/73009", "author_site": "Han Zhong, Tong Zhang", "tldr": "", "abstract": "The proximal policy optimization (PPO) algorithm stands as one of the most prosperous methods in the field of reinforcement learning (RL). Despite its success, the theoretical understanding of PPO remains deficient. Specifically, it is unclear whether PPO or its optimistic variants can effectively solve linear Markov decision processes (MDPs), which are arguably the simplest models in RL with function approximation. \n To bridge this gap, we propose an optimistic variant of PPO for episodic adversarial linear MDPs with full-information feedback, and establish a $\\tilde{\\mathcal{O}}(d^{3/4}H^2K^{3/4})$ regret for it. Here $d$ is the ambient dimension of linear MDPs, $H$ is the length of each episode, and $K$ is the number of episodes. Compared with existing policy-based algorithms, we achieve the state-of-the-art regret bound in both stochastic linear MDPs and adversarial linear MDPs with full information. Additionally, our algorithm design features a novel multi-batched updating mechanism and the theoretical analysis utilizes a new covering number argument of value and policy classes, which might be of independent interest.", "keywords": "policy optimization;adversarial lienar MDPs;RL theory", "primary_area": "", "supplementary_material": "", "author": "Han Zhong;Tong Zhang", "authorids": "~Han_Zhong1;~Tong_Zhang2", "gender": ";M", "homepage": "https://hanzhong-ml.github.io/;http://tongzhang-ml.org", "dblp": "137/8096.html;07/4227-1", "google_scholar": "Bk5q_pAAAAAJ;LurWtuYAAAAJ", "orcid": ";0000-0002-5511-2558", "linkedin": ";", "or_profile": "~Han_Zhong1;~Tong_Zhang2", "aff": "Peking University;Hong Kong University of Science and Technology", "aff_domain": "stu.pku.edu.cn;ust.hk", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nzhong2023a,\ntitle={A Theoretical Analysis of Optimistic Proximal Policy Optimization in Linear Markov Decision Processes},\nauthor={Han Zhong and Tong Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1bTG4sJ7tN}\n}", "github": "", "project": "", "reviewers": "Eaqc;c9AM;r8ZK;t7wE", "pdf_size": 1131478, "rating": "6;6;7;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "4;3;3;4", "wc_summary": "134;81;45;51", "wc_strengths": "137;251;33;37", "wc_weaknesses": "33;169;195;30", "wc_questions": "161;138;172;118", "wc_limitations": "52;15;30;1", "wc_review": "517;654;475;237", "wc_reply_reviewers": "23;25;11;11", "wc_reply_authors": "22;18;18;18", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 77.75, 35.223394214640926 ], "wc_strengths_avg": [ 114.5, 89.14454554261859 ], "wc_weaknesses_avg": [ 106.75, 75.81680222747461 ], "wc_questions_avg": [ 147.25, 20.873128658636684 ], "wc_limitations_avg": [ 24.5, 18.9010581714358 ], "wc_review_avg": [ 470.75, 150.31363045312958 ], "wc_reply_reviewers_avg": [ 17.5, 6.5383484153110105 ], "wc_reply_authors_avg": [ 19.0, 1.7320508075688772 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17737089852577817940&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "stu.pku.edu.cn;ust.hk", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Peking University;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.ust.hk", "aff_unique_abbr": "Peking U;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "1cY5WLTN0k", "title": "Monte Carlo Neural PDE Solver", "track": "main", "status": "Reject", "tldr": "", "abstract": "Training neural PDE solver in an unsupervised manner is essential in scenarios with limited available or high-quality data. However, the performance and efficiency of existing methods are limited by the properties of numerical algorithms integrated during the training stage (like FDM and PSM), which require careful spatiotemporal discretization to obtain reasonable accuracy, especially in cases with high-frequency components and long periods. To overcome these limitations, we propose Monte Carlo Neural PDE Solver (MCNP Solver) for training unsupervised neural solvers via a Monte Carlo view, which regards macroscopic phenomena as ensembles of random particles. MCNP Solver naturally inherits the advantages of the Monte Carlo method (MCM), which is robust against spatial-temporal variations and can tolerate coarse time steps compared to other unsupervised methods. In practice, we develop one-step rollout and Fourier Interpolation techniques that help reduce computational costs or errors arising from time and space, respectively. Furthermore, we design a multi-scale framework to improve performance in long-time simulation tasks. In theory, we characterize the approximation error and robustness of the MCNP Solver on convection-diffusion equations. Numerical experiments on diffusion and Navier-Stokes equations demonstrate significant accuracy improvements compared to other unsupervised baselines in cases with highly variable fields and long-time simulation settings.", "keywords": "Neural PDE Solver;Feynman-Kac Formula;AI for PDE", "primary_area": "", "supplementary_material": "/attachment/98f29e868c68c431eef92d0b707903a048074bdb.pdf", "author": "Rui Zhang;Qi Meng;Rongchan Zhu;Yue Wang;Wenlei Shi;Shihua Zhang;Zhi-Ming Ma;Tie-Yan Liu", "authorids": "~Rui_Zhang22;~Qi_Meng1;~Rongchan_Zhu1;~Yue_Wang15;~Wenlei_Shi1;~Shihua_Zhang1;~Zhi-Ming_Ma1;~Tie-Yan_Liu1", "gender": ";F;F;M;M;M;;M", "homepage": "https://optray.github.io/;;https://math.bit.edu.cn/szdw/jgml/glyjrsxx/zrc1/index.htm;https://scholar.google.com/citations?hl=zh-CN&user=fGv5irIAAAAJ;;http://www.zhanglab-amss.org/homepage/index.html;http://homepage.amss.ac.cn/research/homePage/8eb59241e2e74d828fb84eec0efadba5/myHomePage.html;http://member.acm.org/~tieyanliu", "dblp": "60/2536-52;;247/9547;33/4822-17.html;138/8360;96/6073;;l/TieYanLiu", "google_scholar": "8V50qncAAAAJ;t-z3K34AAAAJ;;https://scholar.google.com/citations?hl=zh-CN;Q7ICmxMAAAAJ;81AqAAYAAAAJ;;Nh832fgAAAAJ", "orcid": "0000-0002-7979-3052;;;;0000-0002-4036-3258;;;0000-0002-0476-8020", "linkedin": ";;;;;;;", "or_profile": "~Rui_Zhang22;~Qi_Meng1;~Rongchan_Zhu1;~Yue_Wang15;~Wenlei_Shi1;~Shihua_Zhang1;~Zhi-Ming_Ma1;~Tie-Yan_Liu1", "aff": "Microsoft;Microsoft;Beijing Institute of Technology;Microsoft Research Aisa;Microsoft Research;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;Microsoft", "aff_domain": "microsoft.com;microsoft.com;bit.edu.cn;microsoft.com;microsoft.com;amss.ac.cn;amss.ac.cn;microsoft.com", "position": "Intern;associate researcher;Full Professor;Researcher;Researcher;Full Professor;Full Professor;Distinguished Scientist", "bibtex": "@misc{\nzhang2023monte,\ntitle={Monte Carlo Neural {PDE} Solver},\nauthor={Rui Zhang and Qi Meng and Rongchan Zhu and Yue Wang and Wenlei Shi and Shihua Zhang and Zhi-Ming Ma and Tie-Yan Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=1cY5WLTN0k}\n}", "github": "", "project": "", "reviewers": "MGTF;dyHf;urJf;R6Cs;xU1D", "site": "https://openreview.net/forum?id=1cY5WLTN0k", "pdf_size": 632766, "rating": "3;5;6;6;6", "confidence": "4;4;5;2;3", "soundness": "1;3;2;3;3", "novelty": "2;2;4;3;3", "presentation": "1;3;4;2;3", "wc_summary": "75;67;67;44;122", "wc_strengths": "10;117;93;37;71", "wc_weaknesses": "240;62;316;13;108", "wc_questions": "10;115;212;56;228", "wc_limitations": "1;25;5;15;9", "wc_review": "336;386;693;165;538", "wc_reply_reviewers": "0;5;272;20;28", "wc_reply_authors": "0;0;450;226;0", "reply_reviewers": "0;1;2;1;1", "reply_authors": "1;1;3;2;1", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 75.0, 25.682678987987217 ], "wc_strengths_avg": [ 65.6, 38.29151342007782 ], "wc_weaknesses_avg": [ 147.8, 113.05644607893882 ], "wc_questions_avg": [ 124.2, 85.15961484177815 ], "wc_limitations_avg": [ 11.0, 8.390470785361213 ], "wc_review_avg": [ 423.6, 179.79165720355323 ], "wc_reply_reviewers_avg": [ 65.0, 103.98846089831314 ], "wc_reply_authors_avg": [ 135.2, 180.10041643483228 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.269069117598525, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pfahhBJuEqIJ:scholar.google.com/&scioq=Monte+Carlo+Neural+PDE+Solver&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;0;2;2;0", "aff_unique_norm": "Microsoft;Beijing Institute of Technology;Chinese Academy of Sciences", "aff_unique_dep": "Microsoft Corporation;;Academy of Mathematics and Systems Science", "aff_unique_url": "https://www.microsoft.com;http://www.bit.edu.cn/;http://www.cas.cn", "aff_unique_abbr": "Microsoft;BIT;CAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;1;1;0;1;1;0", "aff_country_unique": "United States;China" }, { "title": "Learning Unseen Modality Interaction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73008", "id": "1g0A9kE8Id", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/abb4847bbd60f38b1b7649d26c7a0067-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1g0A9kE8Id", "openreview": "https://openreview.net/forum?id=1g0A9kE8Id", "poster": "/media/PosterPDFs/NeurIPS%202023/73008.png?t=1701961223.4898567", "slides": "https://nips.cc/virtual/2023/poster/73008", "video": "https://nips.cc/virtual/2023/poster/73008", "author_site": "Yunhua Zhang, Hazel Doughty, Cees Snoek", "tldr": "", "abstract": "Multimodal learning assumes all modality combinations of interest are available during training to learn cross-modal correspondences. In this paper, we challenge this modality-complete assumption for multimodal learning and instead strive for generalization to unseen modality combinations during inference. We pose the problem of unseen modality interaction and introduce a first solution. It exploits a module that projects the multidimensional features of different modalities into a common space with rich information preserved. This allows the information to be accumulated with a simple summation operation across available modalities. To reduce overfitting to less discriminative modality combinations during training, we further improve the model learning with pseudo-supervision indicating the reliability of a modality\u2019s prediction. We demonstrate that our approach is effective for diverse tasks and modalities by evaluating it for multimodal video classification, robot state regression, and multimedia retrieval. Project website: https://xiaobai1217.github.io/Unseen-Modality-Interaction/.", "keywords": "Multimodal Learning", "primary_area": "", "supplementary_material": "/attachment/95f82b8a02821d6f9286c5808287a1681382e34f.pdf", "author": "Yunhua Zhang;Hazel Doughty;Cees G. M. Snoek", "authorids": "~Yunhua_Zhang1;~Hazel_Doughty1;~Cees_G._M._Snoek1", "gender": "F;F;M", "homepage": "https://xiaobai1217.github.io/;https://hazeldoughty.github.io/;http://www.ceessnoek.info", "dblp": ";198/0823;s/CeesSnoek", "google_scholar": "Yrw15pUAAAAJ;b3koBVwAAAAJ;https://scholar.google.nl/citations?user=0uKdbscAAAAJ", "orcid": "0000-0002-0692-4963;;0000-0001-9092-1556", "linkedin": ";;cgmsnoek/", "or_profile": "~Yunhua_Zhang1;~Hazel_Doughty1;~Cees_Snoek1", "aff": "University of Amsterdam;University of Amsterdam;University of Amsterdam", "aff_domain": "uva.nl;uva.nl;uva.nl", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzhang2023learning,\ntitle={Learning Unseen Modality Interaction},\nauthor={Yunhua Zhang and Hazel Doughty and Cees G. M. Snoek},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1g0A9kE8Id}\n}", "github": "", "project": "", "reviewers": "adMX;ctBz;d7nB;2qSh", "pdf_size": 585852, "rating": "5;5;6;6", "confidence": "4;4;3;2", "soundness": "2;2;4;2", "novelty": "2;3;3;2", "presentation": "2;2;2;3", "wc_summary": "19;81;142;68", "wc_strengths": "51;27;118;29", "wc_weaknesses": "189;185;79;307", "wc_questions": "2;90;369;447", "wc_limitations": "18;13;4;13", "wc_review": "279;396;712;864", "wc_reply_reviewers": "23;296;9;140", "wc_reply_authors": "0;492;0;230", "reply_reviewers": "1;2;1;2", "reply_authors": "1;2;1;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 77.5, 43.83206588788623 ], "wc_strengths_avg": [ 56.25, 36.873940662749895 ], "wc_weaknesses_avg": [ 190.0, 80.67837380611982 ], "wc_questions_avg": [ 227.0, 185.7134890092801 ], "wc_limitations_avg": [ 12.0, 5.049752469181039 ], "wc_review_avg": [ 562.75, 235.23751295233504 ], "wc_reply_reviewers_avg": [ 117.0, 115.18463439191879 ], "wc_reply_authors_avg": [ 180.5, 202.88112282812318 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1660407138692266566&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uva.nl;uva.nl;uva.nl", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "title": "Exact Verification of ReLU Neural Control Barrier Functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73007", "id": "1h2TAUEfc4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/120ed726cf129dbeb8375b6f8a0686f8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1h2TAUEfc4", "openreview": "https://openreview.net/forum?id=1h2TAUEfc4", "poster": "/media/PosterPDFs/NeurIPS%202023/73007.png?t=1699567324.5536392", "slides": "https://nips.cc/virtual/2023/poster/73007", "video": "https://nips.cc/virtual/2023/poster/73007", "author_site": "Hongchao Zhang, Junlin Wu, Yevgeniy Vorobeychik, Andrew Clark", "tldr": "", "abstract": "Control Barrier Functions (CBFs) are a popular approach for safe control of nonlinear systems. In CBF-based control, the desired safety properties of the system are mapped to nonnegativity of a CBF, and the control input is chosen to ensure that the CBF remains nonnegative for all time. Recently, machine learning methods that represent CBFs as neural networks (neural control barrier functions, or NCBFs) have shown great promise due to the universal representability of neural networks. However, verifying that a learned CBF guarantees safety remains a challenging research problem. This paper presents novel exact conditions and algorithms for verifying safety of feedforward NCBFs with ReLU activation functions. The key challenge in doing so is that, due to the piecewise linearity of the ReLU function, the NCBF will be nondifferentiable at certain points, thus invalidating traditional safety verification methods that assume a smooth barrier function. We resolve this issue by leveraging a generalization of Nagumo's theorem for proving invariance of sets with nonsmooth boundaries to derive necessary and sufficient conditions for safety. Based on this condition, we propose an algorithm for safety verification of NCBFs that first decomposes the NCBF into piecewise linear segments and then solves a nonlinear program to verify safety of each segment as well as the intersections of the linear segments. We mitigate the complexity by only considering the boundary of the safe region and by pruning the segments with Interval Bound Propagation (IBP) and linear relaxation. We evaluate our approach through numerical studies with comparison to state-of-the-art SMT-based methods. Our code is available at https://github.com/HongchaoZhang-HZ/exactverif-reluncbf-nips23.", "keywords": "Safety;Neural Barrier Function;Verification", "primary_area": "", "supplementary_material": "/attachment/3a5b767a36f2a54bd0f8edee7953b7c46b22b401.pdf", "author": "Hongchao Zhang;Junlin Wu;Yevgeniy Vorobeychik;Andrew Clark", "authorids": "~Hongchao_Zhang2;~Junlin_Wu2;~Yevgeniy_Vorobeychik1;~Andrew_Clark1", "gender": "M;;M;M", "homepage": ";https://jlwu002.github.io/;http://vorobeychik.com;https://awclark587.wixsite.com/mysite", "dblp": ";188/8292-1;70/2217;", "google_scholar": "J9NlFfgAAAAJ;;https://scholar.google.com.tw/citations?user=ptI-HHkAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Hongchao_Zhang2;~Junlin_Wu2;~Yevgeniy_Vorobeychik1;~Andrew_Clark1", "aff": "Washington University, Saint Louis;Washington University, St. Louis;Washington University, St. Louis;Washington University, Saint Louis", "aff_domain": "wustl.edu;wustl.edu;wustl.edu;wustl.edu", "position": "PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2023exact,\ntitle={Exact Verification of Re{LU} Neural Control Barrier Functions},\nauthor={Hongchao Zhang and Junlin Wu and Yevgeniy Vorobeychik and Andrew Clark},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1h2TAUEfc4}\n}", "github": "", "project": "", "reviewers": "hbj4;yuYJ;U7BK;wvvT;5o49", "pdf_size": 1223014, "rating": "6;6;6;6;6", "confidence": "3;4;3;2;3", "soundness": "3;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "2;2;3;3;3", "wc_summary": "123;132;70;121;110", "wc_strengths": "73;46;35;73;104", "wc_weaknesses": "221;130;291;143;106", "wc_questions": "67;126;79;1;296", "wc_limitations": "36;25;4;112;63", "wc_review": "520;459;479;450;679", "wc_reply_reviewers": "82;63;171;83;131", "wc_reply_authors": "16;67;25;93;204", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 111.2, 21.756838005555863 ], "wc_strengths_avg": [ 66.2, 24.094812719753605 ], "wc_weaknesses_avg": [ 178.2, 68.30929658545753 ], "wc_questions_avg": [ 113.8, 99.46939227722264 ], "wc_limitations_avg": [ 48.0, 37.22902093797257 ], "wc_review_avg": [ 517.4, 84.3198671725709 ], "wc_reply_reviewers_avg": [ 106.0, 39.50696141188284 ], "wc_reply_authors_avg": [ 81.0, 67.5721836261046 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8321204446180251278&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "wustl.edu;wustl.edu;wustl.edu;wustl.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Washington University in St. Louis", "aff_unique_dep": "", "aff_unique_url": "https://wustl.edu", "aff_unique_abbr": "WUSTL", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Saint Louis;St. Louis", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Reliable learning in challenging environments", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73006", "id": "1h7Uh9zUXc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/96189e90e599ccc43f00434ff3ed0312-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1h7Uh9zUXc", "openreview": "https://openreview.net/forum?id=1h7Uh9zUXc", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73006", "video": "https://nips.cc/virtual/2023/poster/73006", "author_site": "Maria-Florina Balcan, Steve Hanneke, Rattana Pukdee, Dravyansh Sharma", "tldr": "", "abstract": "The problem of designing learners that provide guarantees that their predictions are provably correct is of increasing importance in machine learning. However, learning theoretic guarantees have only been considered in very specific settings. In this work, we consider the design and analysis of reliable learners in challenging test-time environments as encountered in modern machine learning problems: namely adversarial test-time attacks (in several variations) and natural distribution shifts. In this work, we provide a reliable learner with provably optimal guarantees in such settings. We discuss computationally feasible implementations of the learner and further show that our algorithm achieves strong positive performance guarantees on several natural examples: for example, linear separators under log-concave distributions or smooth boundary classifiers under smooth probability distributions.", "keywords": "Reliable machine learning;adversarial robustness;distribution shift;theory", "primary_area": "", "supplementary_material": "/attachment/6da128040a6bae7855cb89b4515554834528ee78.pdf", "author": "Nina Balcan;Steve Hanneke;Rattana Pukdee;Dravyansh Sharma", "authorids": "~Nina_Balcan1;~Steve_Hanneke1;~Rattana_Pukdee1;~Dravyansh_Sharma1", "gender": "F;M;M;M", "homepage": "http://www.cs.cmu.edu/~ninamf/;http://www.stevehanneke.com;;http://www.cs.cmu.edu/~dravyans/", "dblp": "b/MariaFlorinaBalcan;40/154;;164/7289", "google_scholar": "https://scholar.google.com.tw/citations?user=LWlN_BUAAAAJ;fEhNO7YAAAAJ;KhnQ8zoAAAAJ;", "orcid": ";;;", "linkedin": ";;rattana-pukdee/;", "or_profile": "~Nina_Balcan1;~Steve_Hanneke1;~Rattana_Pukdee1;~Dravyansh_Sharma1", "aff": "Carnegie Mellon University;Purdue University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;purdue.edu;andrew.cmu.edu;cmu.edu", "position": "Full Professor;Assistant Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nbalcan2023reliable,\ntitle={Reliable learning in challenging environments},\nauthor={Nina Balcan and Steve Hanneke and Rattana Pukdee and Dravyansh Sharma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1h7Uh9zUXc}\n}", "github": "", "project": "", "reviewers": "LDC7;nCeL;8X7Z;uQQ5", "pdf_size": 577312, "rating": "4;5;7;7", "confidence": "3;3;3;3", "soundness": "3;3;4;4", "novelty": "2;3;3;3", "presentation": "2;3;4;3", "wc_summary": "234;46;17;76", "wc_strengths": "54;52;68;72", "wc_weaknesses": "398;287;47;74", "wc_questions": "74;76;69;17", "wc_limitations": "1;29;4;5", "wc_review": "761;490;205;244", "wc_reply_reviewers": "0;83;26;22", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 93.25, 83.896886116232 ], "wc_strengths_avg": [ 61.5, 8.645808232895291 ], "wc_weaknesses_avg": [ 201.5, 146.67054919103563 ], "wc_questions_avg": [ 59.0, 24.38237068047322 ], "wc_limitations_avg": [ 9.75, 11.211043662389331 ], "wc_review_avg": [ 425.0, 222.6443352075233 ], "wc_reply_reviewers_avg": [ 32.75, 30.65432269680738 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5777085918672547068&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cmu.edu;purdue.edu;andrew.cmu.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Carnegie Mellon University;Purdue University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.purdue.edu", "aff_unique_abbr": "CMU;Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Momentum Provably Improves Error Feedback!", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73005", "id": "1h92PmnKov", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f0b1515be276f6ba82b4f2b25e50bef0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1h92PmnKov", "openreview": "https://openreview.net/forum?id=1h92PmnKov", "poster": "/media/PosterPDFs/NeurIPS%202023/73005.png?t=1699437142.3101797", "slides": "https://nips.cc/virtual/2023/poster/73005", "video": "https://nips.cc/virtual/2023/poster/73005", "author_site": "Ilyas Fatkhullin, Alexander Tyurin, Peter Richtarik", "tldr": "", "abstract": "Due to the high communication overhead when training machine learning models in a distributed environment, modern algorithms invariably rely on lossy communication compression. However, when untreated, the errors caused by compression propagate, and can lead to severely unstable behavior, including exponential divergence. Almost a decade ago, Seide et al. [2014] proposed an error feedback (EF) mechanism, which we refer to as EF14, as an immensely effective heuristic for mitigating this issue. However, despite steady algorithmic and theoretical advances in the EF field in the last decade, our understanding is far from complete. In this work we address one of the most pressing issues. In particular, in the canonical nonconvex setting, all known variants of EF rely on very large batch sizes to converge, which can be prohibitive in practice. We propose a surprisingly simple fix which removes this issue both theoretically, and in practice: the application of Polyak's momentum to the latest incarnation of EF due to Richt\u00e1rik et al. [2021] known as EF21. Our algorithm, for which we coin the name EF21-SGDM, improves the communication and sample complexities of previous error feedback algorithms under standard smoothness and bounded variance assumptions, and does not require any further strong assumptions such as bounded gradient dissimilarity. Moreover, we propose a double momentum version of our method that improves the complexities even further. Our proof seems to be novel even when compression is removed form the method, and as such, our proof technique is of independent interest in the study of nonconvex stochastic optimization enriched with Polyak's momentum.", "keywords": "Heavy-ball momentum;Polyak momentum;Error feedback;Federated Learning;Distributed Optimization;Stochastic optimization;Nonconvex optimization", "primary_area": "", "supplementary_material": "/attachment/a3ca915725d8a1aa536600891015e2860c92ca14.zip", "author": "Ilyas Fatkhullin;Alexander Tyurin;Peter Richt\u00e1rik", "authorids": "~Ilyas_Fatkhullin1;~Alexander_Tyurin1;~Peter_Richt\u00e1rik1", "gender": "Not Specified;M;M", "homepage": "https://ai.ethz.ch/people/ilyas-fatkhullin.html;https://k3nfalt.github.io/;https://richtarik.org", "dblp": "294/8711;203/8919;62/8001", "google_scholar": "UCOWHb4AAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-4380-5848", "linkedin": ";;richtarik/", "or_profile": "~Ilyas_Fatkhullin1;~Alexander_Tyurin1;~Peter_Richtarik1", "aff": "ETHZ - ETH Zurich;KAUST;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "ethz.ch;kaust.edu.sa;kaust.edu.sa", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nfatkhullin2023momentum,\ntitle={Momentum Provably Improves Error Feedback!},\nauthor={Ilyas Fatkhullin and Alexander Tyurin and Peter Richt{\\'a}rik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1h92PmnKov}\n}", "github": "", "project": "", "reviewers": "Pi6q;hi3u;naHZ;5FHY", "pdf_size": 4031494, "rating": "4;5;6;7", "confidence": "3;3;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "77;61;51;77", "wc_strengths": "40;60;29;67", "wc_weaknesses": "134;46;65;179", "wc_questions": "9;19;25;2", "wc_limitations": "20;22;1;1", "wc_review": "280;208;171;326", "wc_reply_reviewers": "174;0;11;53", "wc_reply_authors": "1001;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 66.5, 11.07925990308017 ], "wc_strengths_avg": [ 49.0, 15.215124054702938 ], "wc_weaknesses_avg": [ 106.0, 53.37134062397159 ], "wc_questions_avg": [ 13.75, 8.870597499605086 ], "wc_limitations_avg": [ 11.0, 10.024968827881711 ], "wc_review_avg": [ 246.25, 60.46641629863639 ], "wc_reply_reviewers_avg": [ 59.5, 69.00181157042184 ], "wc_reply_authors_avg": [ 250.25, 433.44571459411156 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8944271909999159, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15691613414977797483&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "ethz.ch;kaust.edu.sa;kaust.edu.sa", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "ETH Zurich;King Abdullah University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.kaust.edu.sa", "aff_unique_abbr": "ETHZ;KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Switzerland;Saudi Arabia" }, { "title": "Find What You Want: Learning Demand-conditioned Object Attribute Space for Demand-driven Navigation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73004", "id": "1hZwxBgQ3G", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/34e278fbbd7d6d7d788c98065988e1a9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1hZwxBgQ3G", "openreview": "https://openreview.net/forum?id=1hZwxBgQ3G", "poster": "/media/PosterPDFs/NeurIPS%202023/73004.png?t=1701251527.6189756", "slides": "https://nips.cc/virtual/2023/poster/73004", "video": "https://nips.cc/virtual/2023/poster/73004", "author_site": "Hongcheng Wang, Andy Guan Hong Chen, Xiaoqi Li, Mingdong Wu, Hao Dong", "tldr": "", "abstract": "The task of Visual Object Navigation (VON) involves an agent's ability to locate a particular object within a given scene. To successfully accomplish the VON task, two essential conditions must be fulfiled: 1) the user knows the name of the desired object; and 2) the user-specified object actually is present within the scene. To meet these conditions, a simulator can incorporate predefined object names and positions into the metadata of the scene. However, in real-world scenarios, it is often challenging to ensure that these conditions are always met. Humans in an unfamiliar environment may not know which objects are present in the scene, or they may mistakenly specify an object that is not actually present. Nevertheless, despite these challenges, humans may still have a demand for an object, which could potentially be fulfilled by other objects present within the scene in an equivalent manner. Hence, this paper proposes Demand-driven Navigation (DDN), which leverages the user's demand as the task instruction and prompts the agent to find an object which matches the specified demand. DDN aims to relax the stringent conditions of VON by focusing on fulfilling the user's demand rather than relying solely on specified object names. This paper proposes a method of acquiring textual attribute features of objects by extracting common sense knowledge from a large language model (LLM). These textual attribute features are subsequently aligned with visual attribute features using Contrastive Language-Image Pre-training (CLIP). Incorporating the visual attribute features as prior knowledge, enhances the navigation process. Experiments on AI2Thor with the ProcThor dataset demonstrate that the visual attribute features improve the agent's navigation performance and outperform the baseline methods commonly used in the VON and VLN task and methods with LLMs. The codes and demonstrations can be viewed at https://sites.google.com/view/demand-driven-navigation.", "keywords": "Visual Navigation;Demand-Driven Navigation", "primary_area": "", "supplementary_material": "/attachment/6b530c243a4bcfd07f5c2dc3a2edd96115f0d9e9.zip", "author": "Hongcheng Wang;Andy Guan Hong Chen;Xiaoqi Li;Mingdong Wu;Hao Dong", "authorids": "~Hongcheng_Wang6;~Andy_Guan_Hong_Chen1;~Xiaoqi_Li3;~Mingdong_Wu1;~Hao_Dong3", "gender": "M;M;M;M;F", "homepage": ";https://github.com/cghAndy;https://aaronanima.github.io/;https://zsdonghao.github.io;https://clorislili.github.io/clorisLi/", "dblp": "76/1170;;315/5136;14/1525-3.html;357/1937", "google_scholar": ";;https://scholar.google.com/citations?hl=en;xLFL4sMAAAAJ;vkQ5_LIAAAAJ", "orcid": ";;;0000-0003-2261-9122;", "linkedin": "%E9%B8%BF%E9%93%96-%E7%8E%8B-4a66451b9/;;;;xiaoqi-li/", "or_profile": "~Hongcheng_Wang6;~Andy_Guan_Hong_Chen1;~Mingdong_Wu1;~Hao_Dong3;~Xiaoqi_Cloris_Li1", "aff": "Peking University;Peking University;Center on Frontiers of Computing Studies,Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;Undergrad student;PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nwang2023find,\ntitle={Find What You Want: Learning Demand-conditioned Object Attribute Space for Demand-driven Navigation},\nauthor={Hongcheng Wang and Andy Guan Hong Chen and Xiaoqi Li and Mingdong Wu and Hao Dong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1hZwxBgQ3G}\n}", "github": "", "project": "", "reviewers": "i2zg;7yMw;nwVS;FyRT;JLnR", "pdf_size": 2053300, "rating": "5;5;6;7;7", "confidence": "5;4;5;4;4", "soundness": "3;2;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;3;3;4", "wc_summary": "119;346;75;93;148", "wc_strengths": "54;97;91;25;108", "wc_weaknesses": "149;237;189;226;609", "wc_questions": "112;107;17;28;22", "wc_limitations": "6;97;7;9;5", "wc_review": "440;884;379;381;892", "wc_reply_reviewers": "0;180;23;54;495", "wc_reply_authors": "325;218;25;35;896", "reply_reviewers": "0;1;1;1;2", "reply_authors": "3;3;2;2;5", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 156.2, 98.04366374223271 ], "wc_strengths_avg": [ 75.0, 30.886890422961002 ], "wc_weaknesses_avg": [ 282.0, 166.38990353984823 ], "wc_questions_avg": [ 57.2, 42.87376820387963 ], "wc_limitations_avg": [ 24.8, 36.124230095602044 ], "wc_review_avg": [ 595.2, 240.08615120410423 ], "wc_reply_reviewers_avg": [ 150.4, 183.177072801156 ], "wc_reply_authors_avg": [ 299.8, 318.87765679018656 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 3.0, 1.0954451150103321 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4564354645876384, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4941089538177717572&as_sdt=5,38&sciodt=0,38&hl=en", "gs_version_total": 7, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Towards Efficient Image Compression Without Autoregressive Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73003", "id": "1ihGy9vAIg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/170dc3e41f2d03e327e04dbab0fccbfb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1ihGy9vAIg", "openreview": "https://openreview.net/forum?id=1ihGy9vAIg", "poster": "/media/PosterPDFs/NeurIPS%202023/73003.png?t=1701693501.2617736", "slides": "https://nips.cc/virtual/2023/poster/73003", "video": "https://nips.cc/virtual/2023/poster/73003", "author_site": "Muhammad Salman Ali, Yeongwoong Kim, Maryam Qamar, Sung-Chang Lim, Donghyun Kim, Chaoning Zhang, Sung-Ho Bae, Hui Yong Kim", "tldr": "", "abstract": "Recently, learned image compression (LIC) has garnered increasing interest with its rapidly improving performance surpassing conventional codecs. A key ingredient of LIC is a hyperprior-based entropy model, where the underlying joint probability of the latent image features is modeled as a product of Gaussian distributions from each latent element. Since latents from the actual images are not spatially independent, autoregressive (AR) context based entropy models were proposed to handle the discrepancy between the assumed distribution and the actual distribution. Though the AR-based models have proven effective, the computational complexity is significantly increased due to the inherent sequential nature of the algorithm. \nIn this paper, we present a novel alternative to the AR-based approach that can provide a significantly better trade-off between performance and complexity. To minimize the discrepancy, we introduce a correlation loss that forces the latents to be spatially decorrelated and better fitted to the independent probability model. Our correlation loss is proved to act as a general plug-in for the hyperprior (HP) based learned image compression methods. The performance gain from our correlation loss is \u2018free\u2019 in terms of computation complexity for both inference time and decoding time. To our knowledge, our method gives the best trade-off between the complexity and performance: combined with the Checkerboard-CM, it attains **90%** and when combined with ChARM-CM, it attains **98%** of the AR-based BD-Rate gains yet is around **50 times** and **30 times** faster than AR-based methods respectively", "keywords": "Image Compression;Correlation", "primary_area": "", "supplementary_material": "/attachment/2041796e0f1084552e28ff7d89cc978363b5d151.pdf", "author": "Muhammad Salman Ali;Yeongwoong Kim;Maryam Qamar;Sung-Chang Lim;Donghyun Kim;Chaoning Zhang;Sung-Ho Bae;Hui Yong Kim", "authorids": "~Muhammad_Salman_Ali1;~Yeongwoong_Kim1;~Maryam_Qamar2;~Sung-Chang_Lim1;~Donghyun_Kim10;~Chaoning_Zhang1;~Sung-Ho_Bae1;~Hui_Yong_Kim2", "gender": "M;M;;M;M;M;M;M", "homepage": ";https://github.com/herok97;;https://sites.google.com/view/sung-changlim/home;https://scholar.google.com/citations?user=2LCfJX0AAAAJ&hl=ko;;https://sites.google.com/a/khu.ac.kr/mlvc/;http://vmlab.khu.ac.kr", "dblp": "273/9219;350/5284;314/0016;92/1405;;;76/2068;59/8842", "google_scholar": "qbreZUIAAAAJ;;;https://scholar.google.co.kr/citations?user=m8nqi7wAAAAJ;2LCfJX0AAAAJ;https://scholar.google.co.kr/citations?user=lvhxhyQAAAAJ;https://scholar.google.co.kr/citations?user=EULut5oAAAAJ;qbiBc50AAAAJ", "orcid": "0000-0002-8548-3827;0000-0001-7378-7367;;0000-0002-0110-7489;0000-0002-1289-0667;;;0000-0001-7308-133X", "linkedin": "muhammad-salman-ali-046a74125/;;maryam-qamar-091464139?originalSubdomain=pk;;;;;", "or_profile": "~Muhammad_Salman_Ali1;~Yeongwoong_Kim1;~Maryam_Qamar2;~Sung-Chang_Lim1;~Donghyun_Kim10;~Chaoning_Zhang1;~Sung-Ho_Bae1;~Hui_Yong_Kim2", "aff": "Kyung Hee University;Kyung Hee University;Kyung Hee University;Electronics and Telecommunications Research Institute (ETRI);Electronics and Telecommunications Research Institute (ETRI);Kyung Hee Universityniversity;Kyung Hee University;Kyung Hee University", "aff_domain": "khu.ac.kr;khu.ac.kr;khu.ac.kr;etri.re.kr;etri.re.kr;khu.ac.kr;khu.ac.kr;khu.ac.kr", "position": "PhD student;MS student;PhD student;Principal Researcher;Researcher;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nali2023towards,\ntitle={Towards Efficient Image Compression Without Autoregressive Models},\nauthor={Muhammad Salman Ali and Yeongwoong Kim and Maryam Qamar and Sung-Chang Lim and Donghyun Kim and Chaoning Zhang and Sung-Ho Bae and Hui Yong Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1ihGy9vAIg}\n}", "github": "", "project": "", "reviewers": "qpdX;vwAa;7bvo;xepj", "pdf_size": 1606068, "rating": "5;5;6;8", "confidence": "4;5;4;5", "soundness": "2;3;4;4", "novelty": "2;3;3;4", "presentation": "2;2;3;4", "wc_summary": "46;51;163;79", "wc_strengths": "76;49;133;74", "wc_weaknesses": "248;100;226;153", "wc_questions": "1;39;55;55", "wc_limitations": "4;1;2;1", "wc_review": "375;240;579;362", "wc_reply_reviewers": "0;11;0;21", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 84.75, 46.89549551929268 ], "wc_strengths_avg": [ 83.0, 30.76524012583032 ], "wc_weaknesses_avg": [ 181.75, 58.8573487340366 ], "wc_questions_avg": [ 37.5, 22.06241147291021 ], "wc_limitations_avg": [ 2.0, 1.224744871391589 ], "wc_review_avg": [ 389.0, 121.68196250882873 ], "wc_reply_reviewers_avg": [ 8.0, 8.74642784226795 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1185200562524744063&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "email": "khu.ac.kr;khu.ac.kr;khu.ac.kr;etri.re.kr;etri.re.kr;khu.ac.kr;khu.ac.kr;khu.ac.kr", "author_num": 8, "aff_unique_index": "0;0;0;1;1;0;0;0", "aff_unique_norm": "Kyung Hee University;Electronics and Telecommunications Research Institute", "aff_unique_dep": ";", "aff_unique_url": "http://www.khu.ac.kr;https://www.etri.re.kr", "aff_unique_abbr": "KHU;ETRI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Neural Sculpting: Uncovering hierarchically modular task structure in neural networks through pruning and network analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73002", "id": "1jhmWkZGy6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3b1675de6b49cc00084374213f8c38ae-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1jhmWkZGy6", "openreview": "https://openreview.net/forum?id=1jhmWkZGy6", "poster": "/media/PosterPDFs/NeurIPS%202023/73002.png?t=1701706696.183266", "slides": "https://nips.cc/virtual/2023/poster/73002", "video": "https://nips.cc/virtual/2023/poster/73002", "author_site": "Shreyas Malakarjun Patil, Loizos Michael, Constantine Dovrolis", "tldr": "", "abstract": "Natural target functions and tasks typically exhibit hierarchical modularity -- they can be broken down into simpler sub-functions that are organized in a hierarchy. Such sub-functions have two important features: they have a distinct set of inputs (input-separability) and they are reused as inputs higher in the hierarchy (reusability). Previous studies have established that hierarchically modular neural networks, which are inherently sparse, offer benefits such as learning efficiency, generalization, multi-task learning, and transfer. However, identifying the underlying sub-functions and their hierarchical structure for a given task can be challenging. The high-level question in this work is: if we learn a task using a sufficiently deep neural network, how can we uncover the underlying hierarchy of sub-functions in that task? As a starting point, we examine the domain of Boolean functions, where it is easier to determine whether a task is hierarchically modular. We propose an approach based on iterative unit and edge pruning (during training), combined with network analysis for module detection and hierarchy inference. Finally, we demonstrate that this method can uncover the hierarchical modularity of a wide range of Boolean functions and two vision tasks based on the MNIST digits dataset.", "keywords": "Neural networks;Hierarchical modularity;Pruning;Sparsity", "primary_area": "", "supplementary_material": "", "author": "Shreyas Malakarjun Patil;Loizos Michael;Constantine Dovrolis", "authorids": "~Shreyas_Malakarjun_Patil1;~Loizos_Michael2;~Constantine_Dovrolis1", "gender": "M;M;", "homepage": ";http://www.cc.gatech.edu/~dovrolis/;http://cognition.ouc.ac.cy/loizos/", "dblp": "206/6500;d/ConstantinosDovrolis;21/6730", "google_scholar": "https://scholar.google.co.in/citations?user=iKVRiDsAAAAJ;https://scholar.google.com/citations?hl=en;8w5R3AQAAAAJ", "orcid": ";;", "linkedin": "shreyas-malakarjun-patil;;", "or_profile": "~Shreyas_Malakarjun_Patil1;~Constantine_Dovrolis1;~Loizos_Michael1", "aff": "Georgia Institute of Technology;College of Computing, Georgia Institute of Technology;Open University of Cyprus", "aff_domain": "gatech.edu;cc.gatech.edu;ouc.ac.cy", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\npatil2023neural,\ntitle={Neural Sculpting: Uncovering hierarchically modular task structure in neural networks through pruning and network analysis},\nauthor={Shreyas Malakarjun Patil and Loizos Michael and Constantine Dovrolis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1jhmWkZGy6}\n}", "github": "", "project": "", "reviewers": "znid;Czk6;fXf6", "pdf_size": 25561298, "rating": "6;7;8", "confidence": "4;3;5", "soundness": "2;4;4", "novelty": "2;3;4", "presentation": "3;3;4", "wc_summary": "49;99;65", "wc_strengths": "51;81;56", "wc_weaknesses": "142;87;27", "wc_questions": "113;91;88", "wc_limitations": "1;40;1", "wc_review": "356;398;237", "wc_reply_reviewers": "12;10;15", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 71.0, 20.848661028149188 ], "wc_strengths_avg": [ 62.666666666666664, 13.123346456686352 ], "wc_weaknesses_avg": [ 85.33333333333333, 46.96334267868457 ], "wc_questions_avg": [ 97.33333333333333, 11.14550233153366 ], "wc_limitations_avg": [ 14.0, 18.384776310850235 ], "wc_review_avg": [ 330.3333333333333, 68.18764958227814 ], "wc_reply_reviewers_avg": [ 12.333333333333334, 2.0548046676563256 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4125503240176415267&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "gatech.edu;cc.gatech.edu;ouc.ac.cy", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Georgia Institute of Technology;Open University of Cyprus", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.ouc.ac.cy", "aff_unique_abbr": "Georgia Tech;OUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Atlanta", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Cyprus" }, { "title": "Revealing the unseen: Benchmarking video action recognition under occlusion", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73717", "id": "1jrYSOG7DR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cef53466b62aebbcf8aa2210a89b33a1-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=1jrYSOG7DR", "openreview": "https://openreview.net/forum?id=1jrYSOG7DR", "poster": "/media/PosterPDFs/NeurIPS%202023/73717.png?t=1701376591.0304701", "slides": "https://nips.cc/virtual/2023/poster/73717", "video": "https://nips.cc/virtual/2023/poster/73717", "author_site": "Shresth Grover, Vibhav Vineet, Yogesh Rawat", "tldr": "", "abstract": "In this work, we study the effect of occlusion on video action recognition. To\nfacilitate this study, we propose three benchmark datasets and experiment with\nseven different video action recognition models. These datasets include two synthetic benchmarks, UCF-101-O and K-400-O, which enabled understanding the\n effects of fundamental properties of occlusion via controlled experiments. We also\n propose a real-world occlusion dataset, UCF-101-Y-OCC, which helps in further\n validating the findings of this study. We find several interesting insights such as 1)\n transformers are more robust than CNN counterparts, 2) pretraining make models\nrobust against occlusions, and 3) augmentation helps, but does not generalize\n well to real-world occlusions. In addition, we propose a simple transformer based\n compositional model, termed as CTx-Net, which generalizes well under this distribution shift. We observe that CTx-Net outperforms models which are trained\n using occlusions as augmentation, performing significantly better under natural\n occlusions. We believe this benchmark will open up interesting future research in\n robust video action recognition", "keywords": "video action recognition;occlusion;benchmark;compositional", "primary_area": "", "supplementary_material": "/attachment/62aa6ff8b77920a70bc53753dfc29d58ceaa1048.zip", "author": "Shresth Grover;Vibhav Vineet;Yogesh S Rawat", "authorids": "~Shresth_Grover1;~Vibhav_Vineet5;~Yogesh_S_Rawat1", "gender": "M;;M", "homepage": ";;https://www.crcv.ucf.edu/person/rawat/", "dblp": ";;148/2258", "google_scholar": ";;D_JvEcwAAAAJ", "orcid": ";;", "linkedin": "shrgo/;;", "or_profile": "~Shresth_Grover1;~Vibhav_Vineet5;~Yogesh_S_Rawat1", "aff": "Indian Institute of Technology, Kanpur, Dhirubhai Ambani Institute Of Information and Communication Technology;;University of Central Florida", "aff_domain": "iitk.ac.in;;ucf.edu", "position": "Undergrad student;;Assistant Professor", "bibtex": "@inproceedings{\ngrover2023revealing,\ntitle={Revealing the unseen: Benchmarking video action recognition under occlusion},\nauthor={Shresth Grover and Vibhav Vineet and Yogesh S Rawat},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=1jrYSOG7DR}\n}", "github": "", "project": "", "reviewers": "AuqS;EmRM;pkpL;6zfo", "pdf_size": 15212574, "rating": "6;6;6;7", "confidence": "3;4;5;3", "wc_summary_and_contributions": "60;77;103;149", "wc_strengths": "37;126;158;55", "wc_improvement": "33;130;97;259", "wc_limitations": "46;3;166;1", "wc_correctness": "8;8;15;64", "wc_clarity": "6;24;10;5", "wc_relation_to_prior_work": "20;5;18;16", "wc_documentation": "25;4;25;11", "wc_additional_feedback": "1;1;1;1", "wc_review": "236;378;593;561", "wc_reply_reviewers": "36;53;60;64", "wc_reply_authors": "363;566;825;1413", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 97.25, 33.573613150806395 ], "wc_strengths_avg": [ 94.0, 49.72423956180728 ], "wc_improvement_avg": [ 129.75, 82.36921451610425 ], "wc_limitations_avg": [ 54.0, 67.11557196359128 ], "wc_correctness_avg": [ 23.75, 23.41340428045439 ], "wc_clarity_avg": [ 11.25, 7.595228765481656 ], "wc_relation_to_prior_work_avg": [ 14.75, 5.80409338312195 ], "wc_documentation_avg": [ 16.25, 9.093266739736606 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 442.0, 144.47664171069314 ], "wc_reply_reviewers_avg": [ 53.25, 10.709224995301948 ], "wc_reply_authors_avg": [ 791.75, 394.2863014358982 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1256573851598498295&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "iitk.ac.in;;ucf.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Indian Institute of Technology, Kanpur;University of Central Florida", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitk.ac.in;https://www.ucf.edu", "aff_unique_abbr": "IIT Kanpur;UCF", "aff_campus_unique_index": "0", "aff_campus_unique": "Kanpur;", "aff_country_unique_index": "0;1", "aff_country_unique": "India;United States" }, { "title": "Implicit Transfer Operator Learning: Multiple Time-Resolution Models for Molecular Dynamics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73001", "id": "1kZx7JiuA2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7274ed909a312d4d869cc328ad1c5f04-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1kZx7JiuA2", "openreview": "https://openreview.net/forum?id=1kZx7JiuA2", "poster": "/media/PosterPDFs/NeurIPS%202023/73001.png?t=1701623679.0304482", "slides": "https://nips.cc/virtual/2023/poster/73001", "video": "https://nips.cc/virtual/2023/poster/73001", "author_site": "Mathias Schreiner, Ole Winther, Simon Olsson", "tldr": "", "abstract": "Computing properties of molecular systems rely on estimating expectations of the (unnormalized) Boltzmann distribution. Molecular dynamics (MD) is a broadly adopted technique to approximate such quantities. However, stable simulations rely on very small integration time-steps ($10^{-15}\\,\\mathrm{s}$), whereas convergence of some moments, e.g. binding free energy or rates, might rely on sampling processes on time-scales as long as $10^{-1}\\, \\mathrm{s}$, and these simulations must be repeated for every molecular system independently. Here, we present Implicit Transfer Operator (ITO) Learning, a framework to learn surrogates of the simulation process with multiple time-resolutions. We implement ITO with denoising diffusion probabilistic models with a new SE(3) equivariant architecture and show the resulting models can generate self-consistent stochastic dynamics across multiple time-scales, even when the system is only partially observed. Finally, we present a coarse-grained CG-SE3-ITO model which can quantitatively model all-atom molecular dynamics using only coarse molecular representations. As such, ITO provides an important step towards multiple time- and space-resolution acceleration of MD. Code is available at \\href{https://github.com/olsson-group/ito}{https://github.com/olsson-group/ito}.", "keywords": "AI4Science;Molecular Dynamics;equivariant neural networks;stochastic dynamics", "primary_area": "", "supplementary_material": "/attachment/10eef823cf82ccb20f470315a6dee70827160f6c.pdf", "author": "Mathias Schreiner;Ole Winther;Simon Olsson", "authorids": "matschreiner@gmail.com;~Ole_Winther1;~Simon_Olsson1", "gender": ";M;Not Specified", "homepage": ";https://olewinther.github.io/;http://www.cse.chalmers.se/~simonols/", "dblp": ";36/1568;", "google_scholar": ";7VAwhzUAAAAJ;", "orcid": ";0000-0002-1966-3205;", "linkedin": ";owinther/;", "or_profile": "matschreiner@gmail.com;~Ole_Winther1;~Simon_Olsson1", "aff": ";Technical University of Denmark;Chalmers University of Technology and University of Gothenburg", "aff_domain": ";dtu.dk;chalmers.se", "position": ";Full Professor;Associate Professor", "bibtex": "@inproceedings{\nschreiner2023implicit,\ntitle={Implicit Transfer Operator Learning: Multiple Time-Resolution Models for Molecular Dynamics},\nauthor={Mathias Schreiner and Ole Winther and Simon Olsson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1kZx7JiuA2}\n}", "github": "", "project": "", "reviewers": "Ru8L;Dude;nRNz;r4Zz;BcfP", "pdf_size": 6361589, "rating": "4;5;5;6;7", "confidence": "2;2;4;4;2", "soundness": "2;3;2;4;3", "novelty": "2;2;3;2;3", "presentation": "3;2;2;1;3", "wc_summary": "102;50;76;160;52", "wc_strengths": "13;74;33;178;62", "wc_weaknesses": "134;224;386;643;26", "wc_questions": "219;171;108;142;41", "wc_limitations": "76;1;4;121;42", "wc_review": "544;520;607;1244;223", "wc_reply_reviewers": "632;18;88;410;0", "wc_reply_authors": "353;2;71;342;0", "reply_reviewers": "2;1;2;3;0", "reply_authors": "3;2;3;4;1", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 88.0, 40.65464303127012 ], "wc_strengths_avg": [ 72.0, 57.16992216192008 ], "wc_weaknesses_avg": [ 282.6, 215.37836474446547 ], "wc_questions_avg": [ 136.2, 59.93129399570812 ], "wc_limitations_avg": [ 48.8, 45.36694832143771 ], "wc_review_avg": [ 627.6, 335.4522916898914 ], "wc_reply_reviewers_avg": [ 229.6, 249.80440348400583 ], "wc_reply_authors_avg": [ 153.6, 160.40773048703107 ], "reply_reviewers_avg": [ 1.6, 1.019803902718557 ], "reply_authors_avg": [ 2.6, 1.019803902718557 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.08006407690254366, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=52154105619033207&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";dtu.dk;chalmers.se", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Technical University of Denmark;Chalmers University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.tek.dk;https://www.chalmers.se", "aff_unique_abbr": "DTU;Chalmers", "aff_campus_unique_index": "1", "aff_campus_unique": ";Gothenburg", "aff_country_unique_index": "0;1", "aff_country_unique": "Denmark;Sweden" }, { "title": "Exponentially Convergent Algorithms for Supervised Matrix Factorization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73000", "id": "1kgK0r8PGg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f2c80b3c9cf8102d38c4b21af25d9740-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1kgK0r8PGg", "openreview": "https://openreview.net/forum?id=1kgK0r8PGg", "poster": "/media/PosterPDFs/NeurIPS%202023/73000.png?t=1702337013.7321205", "slides": "https://nips.cc/virtual/2023/poster/73000", "video": "https://nips.cc/virtual/2023/poster/73000", "author_site": "Joowon Lee, Hanbaek Lyu, Weixin Yao", "tldr": "", "abstract": "Supervised matrix factorization (SMF) is a classical machine learning method that simultaneously seeks feature extraction and classification tasks, which are not necessarily a priori aligned objectives. Our goal is to use SMF to learn low-rank latent factors that offer interpretable, data-reconstructive, and class-discriminative features, addressing challenges posed by high-dimensional data. Training SMF model involves solving a nonconvex and possibly constrained optimization with at least three blocks of parameters. Known algorithms are either heuristic or provide weak convergence guarantees for special cases. In this paper, we provide a novel framework that `lifts' SMF as a low-rank matrix estimation problem in a combined factor space and propose an efficient algorithm that provably converges exponentially fast to a global minimizer of the objective with arbitrary initialization under mild assumptions. Our framework applies to a wide range of SMF-type problems for multi-class classification with auxiliary features. To showcase an application, we demonstrate that our algorithm successfully identified well-known cancer-associated gene groups for various cancers.", "keywords": "Supervised matrix factorization;multi-objective optimization;global convergence;linear convergence;statistical estimation", "primary_area": "", "supplementary_material": "/attachment/9488ae0dd7985b87b9911bd9a67cd1e3e0ae8c89.pdf", "author": "Joowon Lee;Hanbaek Lyu;Weixin Yao", "authorids": "~Joowon_Lee1;hlyu@math.wisc.edu;weixin.yao@ucr.edu", "gender": "F;;", "homepage": "https://ljw9510.github.io/joowonlee/;;", "dblp": ";;", "google_scholar": "uBtJWX8AAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Joowon_Lee1;hlyu@math.wisc.edu;weixin.yao@ucr.edu", "aff": "University of Wisconsin - Madison;;", "aff_domain": "wisc.edu;;", "position": "PhD student;;", "bibtex": "@inproceedings{\nlee2023exponentially,\ntitle={Exponentially Convergent Algorithms for Supervised Matrix Factorization},\nauthor={Joowon Lee and Hanbaek Lyu and Weixin Yao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1kgK0r8PGg}\n}", "github": "", "project": "", "reviewers": "pg2E;zTTY;ucPD;bLMw", "pdf_size": 1545552, "rating": "4;6;7;7", "confidence": "4;2;3;4", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "3;3;4;3", "wc_summary": "32;68;69;149", "wc_strengths": "30;53;47;45", "wc_weaknesses": "564;269;32;63", "wc_questions": "2;2;26;130", "wc_limitations": "2;1;7;7", "wc_review": "630;393;181;394", "wc_reply_reviewers": "877;18;0;35", "wc_reply_authors": "2095;0;0;16", "reply_reviewers": "4;1;0;1", "reply_authors": "4;1;1;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 79.5, 42.80478945164898 ], "wc_strengths_avg": [ 43.75, 8.46684711093805 ], "wc_weaknesses_avg": [ 232.0, 212.2227603250886 ], "wc_questions_avg": [ 40.0, 52.87721626560914 ], "wc_limitations_avg": [ 4.25, 2.7726341266023544 ], "wc_review_avg": [ 399.5, 158.85921440067617 ], "wc_reply_reviewers_avg": [ 232.5, 372.30800421156675 ], "wc_reply_authors_avg": [ 527.75, 904.8757856744759 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.24618298195866545, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5054451937126564124&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "wisc.edu;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Wisconsin-Madison", "aff_unique_dep": "", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW-Madison", "aff_campus_unique_index": "0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Laplacian Canonization: A Minimalist Approach to Sign and Basis Invariant Spectral Embedding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72999", "id": "1mAYtdoYw6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/257b3a7438b1f3709e91a86adf2fdc0a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1mAYtdoYw6", "openreview": "https://openreview.net/forum?id=1mAYtdoYw6", "poster": "/media/PosterPDFs/NeurIPS%202023/72999.png?t=1702348271.4695568", "slides": "https://nips.cc/virtual/2023/poster/72999", "video": "https://nips.cc/virtual/2023/poster/72999", "author_site": "George Ma, Yifei Wang, Yisen Wang", "tldr": "", "abstract": "Spectral embedding is a powerful graph embedding technique that has received a lot of attention recently due to its effectiveness on Graph Transformers. However, from a theoretical perspective, the universal expressive power of spectral embedding comes at the price of losing two important invariance properties of graphs, sign and basis invariance, which also limits its effectiveness on graph data. To remedy this issue, many previous methods developed costly approaches to learn new invariants and suffer from high computation complexity. In this work, we explore a minimal approach that resolves the ambiguity issues by directly finding canonical directions for the eigenvectors, named Laplacian Canonization (LC). As a pure pre-processing method, LC is light-weighted and can be applied to any existing GNNs. We provide a thorough investigation, from theory to algorithm, on this approach, and discover an efficient algorithm named Maximal Axis Projection (MAP) that works for both sign and basis invariance and successfully canonizes more than 90\\% of all eigenvectors. Experiments on real-world benchmark datasets like ZINC, MOLTOX21, and MOLPCBA show that MAP consistently outperforms existing methods while bringing minimal computation overhead. Code is available at https://github.com/PKU-ML/LaplacianCanonization.", "keywords": "Graph Neural Networks;Positional Encoding;Spectral Embedding;Laplacian Eigenvectors", "primary_area": "", "supplementary_material": "/attachment/1a3af39b7e69629218067f382c21a9cc126281b5.zip", "author": "George Ma;Yifei Wang;Yisen Wang", "authorids": "~George_Ma1;~Yifei_Wang1;~Yisen_Wang1", "gender": "M;M;M", "homepage": "https://github.com/GeorgeMLP;https://yifeiwang77.com;https://yisenwang.github.io/", "dblp": "86/8408;00/555-1;172/1346-1", "google_scholar": "kiYSRMkAAAAJ;-CLy6YsAAAAJ;uMWPDboAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~George_Ma1;~Yifei_Wang1;~Yisen_Wang1", "aff": "Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nma2023laplacian,\ntitle={Laplacian Canonization: A Minimalist Approach to Sign and Basis Invariant Spectral Embedding},\nauthor={George Ma and Yifei Wang and Yisen Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1mAYtdoYw6}\n}", "github": "", "project": "", "reviewers": "NB7V;zwuu;pkjp;R8b6;ZUZt", "pdf_size": 642383, "rating": "5;6;6;7;7", "confidence": "2;4;3;3;2", "soundness": "2;3;3;4;3", "novelty": "2;3;3;3;3", "presentation": "2;4;3;4;4", "wc_summary": "68;46;80;106;62", "wc_strengths": "17;104;110;74;20", "wc_weaknesses": "109;242;84;51;119", "wc_questions": "272;4;2;214;79", "wc_limitations": "60;6;30;3;16", "wc_review": "526;402;306;448;296", "wc_reply_reviewers": "1192;33;17;20;33", "wc_reply_authors": "1556;0;0;0;0", "reply_reviewers": "4;1;1;1;1", "reply_authors": "4;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 72.4, 20.05592181875468 ], "wc_strengths_avg": [ 65.0, 39.88984833262719 ], "wc_weaknesses_avg": [ 121.0, 64.89684121742752 ], "wc_questions_avg": [ 114.2, 110.30213053246071 ], "wc_limitations_avg": [ 23.0, 20.765355763867856 ], "wc_review_avg": [ 395.6, 86.878305692503 ], "wc_reply_reviewers_avg": [ 259.0, 466.54603202685155 ], "wc_reply_authors_avg": [ 311.2, 622.3999999999999 ], "reply_reviewers_avg": [ 1.6, 1.2000000000000002 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0714285714285715, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6748014451676080528&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Exploring the Optimal Choice for Generative Processes in Diffusion Models: Ordinary vs Stochastic Differential Equations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72998", "id": "1mJQq6zYaE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6a27ee6f66d13557f15f070274c51721-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1mJQq6zYaE", "openreview": "https://openreview.net/forum?id=1mJQq6zYaE", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72998", "video": "https://nips.cc/virtual/2023/poster/72998", "author_site": "Yu Cao, Jingrun Chen, Yixin Luo, Xiang ZHOU", "tldr": "", "abstract": "The diffusion model has shown remarkable success in computer vision, but it remains unclear whether the ODE-based probability flow or the SDE-based diffusion model is more superior and under what circumstances. Comparing the two is challenging due to dependencies on data distributions, score training, and other numerical issues. In this paper, we study the problem mathematically for two limiting scenarios: the zero diffusion (ODE) case and the large diffusion case. We first introduce a pulse-shape error to perturb the score function and analyze error accumulation of sampling quality, followed by a thorough analysis for generalization to arbitrary error. Our findings indicate that when the perturbation occurs at the end of the generative process, the ODE model outperforms the SDE model with a large diffusion coefficient. However, when the perturbation occurs earlier, the SDE model outperforms the ODE model, and we demonstrate that the error of sample generation due to such a pulse-shape perturbation is exponentially suppressed as the diffusion term's magnitude increases to infinity. Numerical validation of this phenomenon is provided using Gaussian, Gaussian mixture, and Swiss roll distribution, as well as realistic datasets like MNIST and CIFAR-10.", "keywords": "diffusion models; stochastic differential equations; score-based generative models; asymptotic analysis", "primary_area": "", "supplementary_material": "", "author": "Yu Cao;Jingrun Chen;Yixin Luo;Xiang ZHOU", "authorids": "~Yu_Cao7;~Jingrun_Chen2;~Yixin_Luo1;~Xiang_ZHOU10", "gender": ";M;M;M", "homepage": ";https://sz.ustc.edu.cn/rcdw_show/46.html;;", "dblp": ";166/4314.html;;", "google_scholar": ";;;Z0RHcg4AAAAJ", "orcid": "0000-0002-2630-2475;;;0000-0002-3835-3894", "linkedin": ";;;", "or_profile": "~Yu_Cao7;~Jingrun_Chen2;~Yixin_Luo1;~Xiang_ZHOU10", "aff": "Shanghai Jiaotong University;University of Science and Technology of China;;City University of Hong Kong", "aff_domain": "sjtu.edu.cn;ustc.edu.cn;;cityu.edu.hk", "position": "Associate Professor;Full Professor;;Associate Professor", "bibtex": "@inproceedings{\ncao2023exploring,\ntitle={Exploring the Optimal Choice for Generative Processes in Diffusion Models: Ordinary vs Stochastic Differential Equations},\nauthor={Yu Cao and Jingrun Chen and Yixin Luo and Xiang ZHOU},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1mJQq6zYaE}\n}", "github": "", "project": "", "reviewers": "LAed;wiU7;Luhj;pYKm;GpQ2", "pdf_size": 3655329, "rating": "3;3;7;7;7", "confidence": "4;1;4;3;4", "soundness": "2;2;4;4;4", "novelty": "1;2;2;3;3", "presentation": "2;2;2;3;4", "wc_summary": "20;32;390;137;91", "wc_strengths": "18;29;208;132;97", "wc_weaknesses": "138;46;174;41;18", "wc_questions": "2;1;178;293;42", "wc_limitations": "1;24;1;276;20", "wc_review": "179;132;951;879;268", "wc_reply_reviewers": "360;68;0;7;137", "wc_reply_authors": "590;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.4, 1.9595917942265424 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 3.2, 0.9797958971132712 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 134.0, 134.7545917585 ], "wc_strengths_avg": [ 96.8, 69.87245523094205 ], "wc_weaknesses_avg": [ 83.4, 61.0953353374871 ], "wc_questions_avg": [ 103.2, 114.9354601504688 ], "wc_limitations_avg": [ 64.4, 106.22353788120597 ], "wc_review_avg": [ 481.8, 357.12037186360567 ], "wc_reply_reviewers_avg": [ 114.4, 132.36706539014907 ], "wc_reply_authors_avg": [ 118.0, 236.0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4900980294098034, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4268663169327589650&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "sjtu.edu.cn;ustc.edu.cn;;cityu.edu.hk", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Shanghai Jiao Tong University;University of Science and Technology of China;City University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.ustc.edu.cn;https://www.cityu.edu.hk", "aff_unique_abbr": "SJTU;USTC;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Adjustable Robust Reinforcement Learning for Online 3D Bin Packing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72997", "id": "1mdTYi1jAW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a345ed605675c7c484e740a8ceaa6b45-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1mdTYi1jAW", "openreview": "https://openreview.net/forum?id=1mdTYi1jAW", "poster": "/media/PosterPDFs/NeurIPS%202023/72997.png?t=1698161592.3872242", "slides": "https://nips.cc/virtual/2023/poster/72997", "video": "https://nips.cc/virtual/2023/poster/72997", "author_site": "Yuxin Pan, Yize Chen, Fangzhen Lin", "tldr": "", "abstract": "Designing effective policies for the online 3D bin packing problem (3D-BPP) has been a long-standing challenge, primarily due to the unpredictable nature of incoming box sequences and stringent physical constraints. While current deep reinforcement learning (DRL) methods for online 3D-BPP have shown promising results in optimizing average performance over an underlying box sequence distribution, they often fail in real-world settings where some worst-case scenarios can materialize. Standard robust DRL algorithms tend to overly prioritize optimizing the worst-case performance at the expense of performance under normal problem instance distribution. To address these issues, we first introduce a permutation-based attacker to investigate the practical robustness of both DRL-based and heuristic methods proposed for solving online 3D-BPP. Then, we propose an adjustable robust reinforcement learning (AR2L) framework that allows efficient adjustment of robustness weights to achieve the desired balance of the policy's performance in average and worst-case environments. Specifically, we formulate the objective function as a weighted sum of expected and worst-case returns, and derive the lower performance bound by relating to the return under a mixture dynamics. To realize this lower bound, we adopt an iterative procedure that searches for the associated mixture dynamics and improves the corresponding policy. We integrate this procedure into two popular robust adversarial algorithms to develop the exact and approximate AR2L algorithms. Experiments demonstrate that AR2L is versatile in the sense that it improves policy robustness while maintaining an acceptable level of performance for the nominal case.", "keywords": "online 3D bin packing problem;combinatorial optimization problem;reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/646174dcf91e092df8ccb5bca99c21a1c1815ac3.zip", "author": "Yuxin Pan;Yize Chen;Fangzhen Lin", "authorids": "~Yuxin_Pan1;~Yize_Chen1;~Fangzhen_Lin1", "gender": "M;M;M", "homepage": ";https://sites.google.com/view/yizechen;http://www.cs.ust.hk/~flin/", "dblp": "29/3085;198/0893;73/6980", "google_scholar": ";G1NiRmwAAAAJ;https://scholar.google.com.tw/citations?user=klFoxpYAAAAJ", "orcid": "0000-0002-4297-2954;0000-0003-4481-3858;0000-0002-3141-8675", "linkedin": "yuxin-pan-222705302/;;", "or_profile": "~Yuxin_Pan1;~Yize_Chen1;~Fangzhen_Lin1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Department of Computer Science and Engineering, Hong Kong University of Science and Technology", "aff_domain": "ust.hk;ust.hk;cse.ust.hk", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\npan2023adjustable,\ntitle={Adjustable Robust Reinforcement Learning for Online 3D Bin Packing},\nauthor={Yuxin Pan and Yize Chen and Fangzhen Lin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1mdTYi1jAW}\n}", "github": "", "project": "", "reviewers": "Ua6d;fVdw;HsT1;Em3c;HCvp", "pdf_size": 5236639, "rating": "4;4;5;6;7", "confidence": "4;4;4;2;5", "soundness": "2;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "3;2;2;3;4", "wc_summary": "29;100;55;96;113", "wc_strengths": "27;61;48;161;204", "wc_weaknesses": "100;532;64;40;42", "wc_questions": "209;6;78;15;295", "wc_limitations": "11;1;29;12;106", "wc_review": "376;700;274;324;760", "wc_reply_reviewers": "28;423;0;16;131", "wc_reply_authors": "35;1519;0;0;281", "reply_reviewers": "1;3;0;1;1", "reply_authors": "2;4;1;1;3", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 78.6, 31.499841269441344 ], "wc_strengths_avg": [ 100.2, 69.41296708828978 ], "wc_weaknesses_avg": [ 155.6, 189.43452694796687 ], "wc_questions_avg": [ 120.6, 113.42768621460988 ], "wc_limitations_avg": [ 31.8, 38.17538473938409 ], "wc_review_avg": [ 486.8, 202.06771142367106 ], "wc_reply_reviewers_avg": [ 119.6, 158.4987066193286 ], "wc_reply_authors_avg": [ 367.0, 585.508667741136 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.2, 1.16619037896906 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.035007002100700284, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14529444904111876619&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "email": "ust.hk;ust.hk;cse.ust.hk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Energy Guided Diffusion for Generating Neurally Exciting Images", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72996", "id": "1moStpWGUj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/67226725b09ca9363637f63f85ed4bba-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1moStpWGUj", "openreview": "https://openreview.net/forum?id=1moStpWGUj", "poster": "/media/PosterPDFs/NeurIPS%202023/72996.png?t=1701849741.5601156", "slides": "https://nips.cc/virtual/2023/poster/72996", "video": "https://nips.cc/virtual/2023/poster/72996", "author_site": "Pawel Pierzchlewicz, Konstantin Willeke, Arne Nix, Pavithra Elumalai, Kelli Restivo, Tori Shinn, Cate Nealley, Gabrielle Rodriguez, Saumil Patel, Katrin Franke, Andreas Tolias, Fabian Sinz", "tldr": "", "abstract": "In recent years, most exciting inputs (MEIs) synthesized from encoding models of neuronal activity have become an established method for studying tuning properties of biological and artificial visual systems.\n However, as we move up the visual hierarchy, the complexity of neuronal computations increases. \n Consequently, it becomes more challenging to model neuronal activity, requiring more complex models.\n In this study, we introduce a novel readout architecture inspired by the mechanism of visual attention. This new architecture, which we call attention readout, together with a data-driven convolutional core outperforms previous task-driven models in predicting the activity of neurons in macaque area V4.\n However, as our predictive network becomes deeper and more complex, synthesizing MEIs via straightforward gradient ascent (GA) can struggle to produce qualitatively good results and overfit to idiosyncrasies of a more complex model, potentially decreasing the MEI's model-to-brain transferability.\n To solve this problem, we propose a diffusion-based method for generating MEIs via Energy Guidance (EGG).\n We show that for models of macaque V4, EGG generates single neuron MEIs that generalize better across varying model architectures than the state-of-the-art GA, while at the same time reducing computational costs by a factor of 4.7x, facilitating experimentally challenging closed-loop experiments.\n Furthermore, EGG diffusion can be used to generate other neurally exciting images, like most exciting naturalistic images that are on par with a selection of highly activating natural images, or image reconstructions that generalize better across architectures.\n Finally, EGG is simple to implement, requires no retraining of the diffusion model, and can easily be generalized to provide other characterizations of the visual system, such as invariances.\n Thus, EGG provides a general and flexible framework to study the coding properties of the visual system in the context of natural images.", "keywords": "most exciting inputs;diffusion models;energy guidance;attention;macaque V4", "primary_area": "", "supplementary_material": "", "author": "Pawe\u0142 A. Pierzchlewicz;Konstantin Friedrich Willeke;Arne Nix;Pavithra Elumalai;Kelli Restivo;Tori Shinn;Cate Nealley;Gabrielle Rodriguez;Saumil Patel;Katrin Franke;Andreas S. Tolias;Fabian H. Sinz", "authorids": "~Pawe\u0142_A._Pierzchlewicz1;~Konstantin_Friedrich_Willeke1;~Arne_Nix1;~Pavithra_Elumalai1;~Kelli_Restivo1;toriw@bcm.edu;cate.nealley@bcm.edu;gabrielle.rodriguez@bcm.edu;spatel@bcm.edu;~Katrin_Franke1;~Andreas_S._Tolias1;~Fabian_H._Sinz1", "gender": ";M;M;F;F;;;;;F;;M", "homepage": ";https://sinzlab.org/team.html;;;;;;;;https://kfranke.com/;;https://sinzlab.org", "dblp": ";;229/3118;;;;;;;;32/3057;53/5834", "google_scholar": ";sc3jZTsAAAAJ;bYndKeEAAAAJ;PbyAGN0AAAAJ;;;;;;zaBjCS8AAAAJ;;https://scholar.google.com/citations?hl=de", "orcid": ";0000-0003-4445-6408;;;;;;;;0000-0002-4899-9410;;0000-0002-1348-9736", "linkedin": ";;;;kellirestivo/;;;;;;;", "or_profile": "~Pawe\u0142_A._Pierzchlewicz1;~Konstantin_Friedrich_Willeke1;~Arne_Nix1;~Pavithra_Elumalai1;~Kelli_Restivo1;toriw@bcm.edu;cate.nealley@bcm.edu;gabrielle.rodriguez@bcm.edu;spatel@bcm.edu;~Katrin_Franke1;~Andreas_S._Tolias1;~Fabian_H._Sinz1", "aff": ";University of Tuebingen;University of Tuebingen;Georg-August Universit\u00e4t G\u00f6ttingen;Baylor College of Medicine;;;;;;Baylor College of Medicine;Baylor College of Medicine", "aff_domain": ";uni-tuebingen.de;uni-tuebingen.de;uni-goettingen.de;bcm.edu;;;;;;bcm.edu;bcm.edu", "position": ";PhD student;PhD student;PhD student;PhD student;;;;;;Professor;Assistant Professor", "bibtex": "@inproceedings{\npierzchlewicz2023energy,\ntitle={Energy Guided Diffusion for Generating Neurally Exciting Images},\nauthor={Pawe{\\l} A. Pierzchlewicz and Konstantin Friedrich Willeke and Arne Nix and Pavithra Elumalai and Kelli Restivo and Tori Shinn and Cate Nealley and Gabrielle Rodriguez and Saumil Patel and Katrin Franke and Andreas S. Tolias and Fabian H. Sinz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1moStpWGUj}\n}", "github": "", "project": "", "reviewers": "H4Az;BnXF;YaDz;GN1s", "pdf_size": 27044914, "rating": "5;6;6;7", "confidence": "3;3;4;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "375;71;113;132", "wc_strengths": "104;231;126;64", "wc_weaknesses": "891;36;390;127", "wc_questions": "336;10;556;325", "wc_limitations": "9;1;119;87", "wc_review": "1715;349;1304;735", "wc_reply_reviewers": "1629;0;192;285", "wc_reply_authors": "2447;0;251;522", "reply_reviewers": "7;0;1;1", "reply_authors": "6;1;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 172.75, 118.83681037456365 ], "wc_strengths_avg": [ 131.25, 61.730766235322236 ], "wc_weaknesses_avg": [ 361.0, 332.4612759405221 ], "wc_questions_avg": [ 306.75, 194.53454063481888 ], "wc_limitations_avg": [ 54.0, 50.368641037852115 ], "wc_review_avg": [ 1025.75, 523.2147623108507 ], "wc_reply_reviewers_avg": [ 526.5, 644.7714711430709 ], "wc_reply_authors_avg": [ 805.0, 965.8149408660025 ], "reply_reviewers_avg": [ 2.25, 2.7726341266023544 ], "reply_authors_avg": [ 2.75, 1.920286436967152 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18099631398253469926&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": ";uni-tuebingen.de;uni-tuebingen.de;uni-goettingen.de;bcm.edu;;;;;;bcm.edu;bcm.edu", "author_num": 12, "aff_unique_index": "0;0;1;2;2;2", "aff_unique_norm": "University of Tuebingen;Georg-August Universit\u00e4t G\u00f6ttingen;Baylor College of Medicine", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-goettingen.de;https://www.bcm.edu", "aff_unique_abbr": "Uni T\u00fcbingen;GAU;BCM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1;1", "aff_country_unique": "Germany;United States" }, { "title": "What can Large Language Models do in chemistry? A comprehensive benchmark on eight tasks", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73716", "id": "1ngbR3SZHW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bbb330189ce02be00cf7346167028ab1-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=1ngbR3SZHW", "openreview": "https://openreview.net/forum?id=1ngbR3SZHW", "poster": "/media/PosterPDFs/NeurIPS%202023/73716.png?t=1697211707.5975196", "slides": "https://nips.cc/virtual/2023/poster/73716", "video": "https://nips.cc/virtual/2023/poster/73716", "author_site": "Taicheng Guo, kehan Guo, Bozhao Nan, Zhenwen Liang, Zhichun Guo, Nitesh Chawla, Olaf Wiest, Xiangliang Zhang", "tldr": "", "abstract": "Large Language Models (LLMs) with strong abilities in natural language processing tasks have emerged and have been applied in various kinds of areas such as science, finance and software engineering. However, the capability of LLMs to advance the field of chemistry remains unclear. In this paper, rather than pursuing state-of-the-art performance, we aim to evaluate capabilities of LLMs in a wide range of tasks across the chemistry domain. We identify three key chemistry-related capabilities including understanding, reasoning and explaining to explore in LLMs and establish a benchmark containing eight chemistry tasks. Our analysis draws on widely recognized datasets facilitating a broad exploration of the capacities of LLMs within the context of practical chemistry. Five LLMs (GPT-4,GPT-3.5, Davinci-003, Llama and Galactica) are evaluated for each chemistry task in zero-shot and few-shot in-context learning settings with carefully selected demonstration examples and specially crafted prompts. Our investigation found that GPT-4 outperformed other models and LLMs exhibit different competitive levels in eight chemistry tasks. In addition to the key findings from the comprehensive benchmark analysis, our work provides insights into the limitation of current LLMs and the impact of in-context learning settings on LLMs\u2019 performance across various chemistry tasks. The code and datasets used in this study are available at https://github.com/ChemFoundationModels/ChemLLMBench.", "keywords": "Large Language Models;AI for Chemistry", "primary_area": "", "supplementary_material": "", "author": "Taicheng Guo;Kehan Guo;Bozhao Nan;Zhenwen Liang;Zhichun Guo;Nitesh V Chawla;Olaf Wiest;Xiangliang Zhang", "authorids": "~Taicheng_Guo1;~Kehan_Guo1;~Bozhao_Nan1;~Zhenwen_Liang1;~Zhichun_Guo1;~Nitesh_V_Chawla1;~Olaf_Wiest1;~Xiangliang_Zhang1", "gender": "M;M;M;M;;;M;F", "homepage": "https://taichengguo.github.io/;https://kehanguo2.github.io/KehanGuo/;;https://zhenwen-nlp.github.io/;;;https://chemistry.nd.edu/people/olaf-wiest/;https://sites.nd.edu/xiangliang-zhang/", "dblp": "325/5109;;;226/6083;;;;74/1890-1", "google_scholar": "OA_UdcIAAAAJ;t8iRCLUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;4rKhF2AAAAAJ;;;bfywzJwAAAAJ;BhRJe4wAAAAJ", "orcid": "0000-0001-7919-6912;;;;;;0000-0001-9316-7720;0000-0002-3574-5665", "linkedin": ";kehan98/;;;;;;", "or_profile": "~Taicheng_Guo1;~Kehan_Guo1;~Bozhao_Nan1;~Zhenwen_Liang1;~Zhichun_Guo1;~Nitesh_V_Chawla1;~Olaf_Wiest1;~Xiangliang_Zhang1", "aff": "University of Notre Dame;University of Notre Dame;University of Notre Dame;University of Notre Dame;;;University of Notre Dame;University of Notre Dame", "aff_domain": "nd.edu;nd.edu;nd.edu;nd.edu;;;nd.edu;nd.edu", "position": "PhD student;PhD student;PhD student;PhD student;;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nguo2023what,\ntitle={What can Large Language Models do in chemistry? A comprehensive benchmark on eight tasks},\nauthor={Taicheng Guo and Kehan Guo and Bozhao Nan and Zhenwen Liang and Zhichun Guo and Nitesh V Chawla and Olaf Wiest and Xiangliang Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=1ngbR3SZHW}\n}", "github": "", "project": "", "reviewers": "K37q;eMtr;5TAm;nBsG;FiPp", "pdf_size": 3464737, "rating": "6;7;7;7;8", "confidence": "4;4;3;4;4", "wc_summary_and_contributions": "73;49;53;161;79", "wc_strengths": "56;62;42;57;31", "wc_improvement": "246;131;128;147;289", "wc_limitations": "24;1;47;23;4", "wc_correctness": "1;12;1;13;4", "wc_clarity": "1;1;1;43;186", "wc_relation_to_prior_work": "1;1;1;24;101", "wc_documentation": "1;8;1;12;60", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "404;266;275;481;755", "wc_reply_reviewers": "73;26;69;34;17", "wc_reply_authors": "1782;843;1628;1132;1147", "reply_reviewers": "1;1;2;1;1", "reply_authors": "5;3;6;3;3", "rating_avg": [ 7.0, 0.6324555320336759 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 83.0, 40.63496031744094 ], "wc_strengths_avg": [ 49.6, 11.42978564978364 ], "wc_improvement_avg": [ 188.2, 66.47525855534525 ], "wc_limitations_avg": [ 19.8, 16.557777628655362 ], "wc_correctness_avg": [ 6.2, 5.2687759489277965 ], "wc_clarity_avg": [ 46.4, 71.67035649416013 ], "wc_relation_to_prior_work_avg": [ 25.6, 38.73809494541517 ], "wc_documentation_avg": [ 16.4, 22.204504047602594 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 436.2, 178.67892992739797 ], "wc_reply_reviewers_avg": [ 43.8, 22.88580345978703 ], "wc_reply_authors_avg": [ 1306.4, 346.4636200238057 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 4.0, 1.2649110640673518 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 155, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12456636741484874039&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 8, "email": "nd.edu;nd.edu;nd.edu;nd.edu;;;nd.edu;nd.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Notre Dame", "aff_unique_dep": "", "aff_unique_url": "https://www.nd.edu", "aff_unique_abbr": "Notre Dame", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Online Convex Optimization with Unbounded Memory", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72995", "id": "1osmdAfD4P", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/531230cfac80c65017ad0f85d3031edc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1osmdAfD4P", "openreview": "https://openreview.net/forum?id=1osmdAfD4P", "poster": "/media/PosterPDFs/NeurIPS%202023/72995.png?t=1701832840.4079645", "slides": "https://nips.cc/virtual/2023/poster/72995", "video": "https://nips.cc/virtual/2023/poster/72995", "author_site": "Raunak Kumar, Sarah Dean, Robert Kleinberg", "tldr": "", "abstract": "Online convex optimization (OCO) is a widely used framework in online\n learning. In each round, the learner chooses a decision in a convex set and an\n adversary chooses a convex loss function, and then the learner suffers the\n loss associated with their current decision. However, in many applications the\n learner's loss depends not only on the current decision but on the entire\n history of decisions until that point. The OCO framework and its existing\n generalizations do not capture this, and they can only be applied to many\n settings of interest after a long series of approximation arguments. They also\n leave open the question of whether the dependence on memory is tight because\n there are no non-trivial lower bounds. In this work we introduce a\n generalization of the OCO framework, ``Online Convex Optimization with\n Unbounded Memory'', that captures long-term dependence on past decisions. We\n introduce the notion of $p$-effective memory capacity, $H_p$, that quantifies\n the maximum influence of past decisions on present losses. We prove an\n $O(\\sqrt{H_p T})$ upper bound on the policy regret and a matching (worst-case)\n lower bound. As a special case, we prove the first non-trivial lower bound for\n OCO with finite memory~\\citep{anavaHM2015online}, which could be of\n independent interest, and also improve existing upper bounds. We demonstrate\n the broad applicability of our framework by using it to derive regret bounds,\n and to improve and simplify existing regret bound derivations, for a variety\n of online learning problems including online linear control and an online\n variant of performative prediction.", "keywords": "online learning;online convex optimization;online linear control", "primary_area": "", "supplementary_material": "/attachment/578fed770228f15c766ead35714b9d104b305d85.pdf", "author": "Raunak Kumar;Sarah Dean;Robert Kleinberg", "authorids": "~Raunak_Kumar1;~Sarah_Dean2;~Robert_Kleinberg1", "gender": ";F;M", "homepage": ";https://sdean.website/;http://www.cs.cornell.edu/~rdk/", "dblp": ";207/8292;k/RDKleinberg", "google_scholar": ";xhKqjpYAAAAJ;https://scholar.google.com.tw/citations?user=zkvW8FQAAAAJ", "orcid": ";;0000-0002-8306-3407", "linkedin": ";;", "or_profile": "~Raunak_Kumar1;~Sarah_Dean2;~Robert_Kleinberg1", "aff": ";Cornell University;Cornell University", "aff_domain": ";cornell.edu;cornell.edu", "position": ";Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nkumar2023online,\ntitle={Online Convex Optimization with Unbounded Memory},\nauthor={Raunak Kumar and Sarah Dean and Robert Kleinberg},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1osmdAfD4P}\n}", "github": "", "project": "", "reviewers": "58kP;xqDp;i8qJ;cA1Y", "pdf_size": 875008, "rating": "3;4;6;6", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "2;2;3;2", "presentation": "3;2;3;3", "wc_summary": "113;51;59;84", "wc_strengths": "33;55;57;42", "wc_weaknesses": "167;141;21;312", "wc_questions": "5;37;1;38", "wc_limitations": "5;9;9;10", "wc_review": "323;293;147;486", "wc_reply_reviewers": "123;4;38;278", "wc_reply_authors": "144;0;0;593", "reply_reviewers": "1;1;1;2", "reply_authors": "2;1;1;2", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 76.75, 24.21130934088448 ], "wc_strengths_avg": [ 46.75, 9.807522622966516 ], "wc_weaknesses_avg": [ 160.25, 103.48278842396932 ], "wc_questions_avg": [ 20.25, 17.311484627264065 ], "wc_limitations_avg": [ 8.25, 1.920286436967152 ], "wc_review_avg": [ 312.25, 120.39803777470794 ], "wc_reply_reviewers_avg": [ 110.75, 105.84274892499722 ], "wc_reply_authors_avg": [ 184.25, 243.20400387329153 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7777777777777777, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9291690394644530016&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": ";cornell.edu;cornell.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Alternating Updates for Efficient Transformers", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72994", "id": "1p6teT6F73", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f2059277ac6ce66e7e5543001afa8bb5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1p6teT6F73", "openreview": "https://openreview.net/forum?id=1p6teT6F73", "poster": "/media/PosterPDFs/NeurIPS%202023/72994.png?t=1699898309.7597518", "slides": "https://nips.cc/virtual/2023/poster/72994", "video": "https://nips.cc/virtual/2023/poster/72994", "author_site": "Cenk Baykal, Dylan Cutler, Nishanth Dikkala, Nikhil Ghosh, Rina Panigrahy, Xin Wang", "tldr": "", "abstract": "It has been well established that increasing scale in deep transformer networks leads to improved quality and performance. However, this increase in scale often comes with prohibitive increases in compute cost and inference latency. We introduce Alternating Updates (AltUp), a simple-to-implement method to increase a model's capacity without the computational burden. AltUp enables the widening of the learned representation, i.e., the token embedding, while only incurring a negligible increase in latency. AltUp achieves this by working on a subblock of the widened representation at each layer and using a predict-and-correct mechanism to update the inactivated blocks. We present extensions of AltUp, such as its applicability to the sequence dimension, and demonstrate how AltUp can be synergistically combined with existing approaches, such as Sparse Mixture-of-Experts models, to obtain efficient models with even \nhigher capacity. Our experiments on benchmark transformer models and language tasks demonstrate the consistent effectiveness of AltUp on a diverse set of scenarios. Notably, on SuperGLUE and SQuAD benchmarks, AltUp enables up to $87\\%$ speedup relative to the dense baselines at the same accuracy.", "keywords": "efficiency;efficient transformers", "primary_area": "", "supplementary_material": "/attachment/482743877b8911b0a43a988be5dd5626d7cbad26.pdf", "author": "Cenk Baykal;Dylan J Cutler;Nishanth Dikkala;Nikhil Ghosh;Rina Panigrahy;Xin Wang", "authorids": "~Cenk_Baykal1;~Dylan_J_Cutler1;~Nishanth_Dikkala1;~Nikhil_Ghosh1;~Rina_Panigrahy1;~Xin_Wang30", "gender": "M;M;M;M;;M", "homepage": "https://people.csail.mit.edu/baykal/;https://github.com/DCtheTall;http://people.csail.mit.edu/nishanthd/;;;", "dblp": "151/9349;;138/8092;251/8779;p/RinaPanigrahy;", "google_scholar": "lRxoOlwAAAAJ;;CMZoOTIAAAAJ;0Fv4bikAAAAJ;;7BjA8ccAAAAJ", "orcid": ";;;;;", "linkedin": ";;;nikhil-ghosh-03389199/;;", "or_profile": "~Cenk_Baykal1;~Dylan_J_Cutler1;~Nishanth_Dikkala1;~Nikhil_Ghosh1;~Rina_Panigrahy1;~Xin_Wang30", "aff": "Google;Google;Google;University of California, Berkeley;Google;Google", "aff_domain": "google.com;google.com;google.com;berkeley.edu;google.com;google.com", "position": "Research Scientist;Researcher;Google Research;PhD student;Research Scientist;Software Engineer", "bibtex": "@inproceedings{\nbaykal2023alternating,\ntitle={Alternating Updates for Efficient Transformers},\nauthor={Cenk Baykal and Dylan J Cutler and Nishanth Dikkala and Nikhil Ghosh and Rina Panigrahy and Xin Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1p6teT6F73}\n}", "github": "", "project": "", "reviewers": "2iUa;RDoa;UGzF;raib", "pdf_size": 982726, "rating": "6;6;7;8", "confidence": "4;3;3;4", "soundness": "3;2;3;3", "novelty": "2;3;2;4", "presentation": "3;3;3;3", "wc_summary": "50;36;110;110", "wc_strengths": "49;31;9;53", "wc_weaknesses": "74;98;25;52", "wc_questions": "76;21;34;37", "wc_limitations": "66;1;57;9", "wc_review": "315;187;235;261", "wc_reply_reviewers": "17;23;11;11", "wc_reply_authors": "47;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 76.5, 33.86369737639409 ], "wc_strengths_avg": [ 35.5, 17.399712641305314 ], "wc_weaknesses_avg": [ 62.25, 26.966414296305693 ], "wc_questions_avg": [ 42.0, 20.530465167647808 ], "wc_limitations_avg": [ 33.25, 28.568995432111365 ], "wc_review_avg": [ 249.5, 46.203354856546945 ], "wc_reply_reviewers_avg": [ 15.5, 4.9749371855331 ], "wc_reply_authors_avg": [ 11.75, 20.351596988934308 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7444673157224969143&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "google.com;google.com;google.com;berkeley.edu;google.com;google.com", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Google;University of California, Berkeley", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.berkeley.edu", "aff_unique_abbr": "Google;UC Berkeley", "aff_campus_unique_index": "0;0;0;1;0;0", "aff_campus_unique": "Mountain View;Berkeley", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Uncertainty-Aware Instance Reweighting for Off-Policy Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72993", "id": "1pWNhmbllE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e97ac22927560eb2de6b658498cbc575-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1pWNhmbllE", "openreview": "https://openreview.net/forum?id=1pWNhmbllE", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72993", "video": "https://nips.cc/virtual/2023/poster/72993", "author_site": "Xiaoying Zhang, Junpu Chen, Hongning Wang, Hong Xie, Yang Liu, John C.S. Lui, Hang Li", "tldr": "", "abstract": "Off-policy learning, referring to the procedure of policy optimization with access only to logged feedback data, has shown importance in various important real-world applications, such as search engines and recommender systems. While the ground-truth logging policy is usually unknown, previous work simply takes its estimated value for the off-policy learning, ignoring the negative impact from both high bias and high variance resulted from such an estimator. And these impact is often magnified on samples with small and inaccurately estimated logging probabilities. The contribution of this work is to explicitly model the uncertainty in the estimated logging policy, and propose an Uncertainty-aware Inverse Propensity Score estimator (UIPS) for improved off-policy learning, with a theoretical convergence guarantee. Experiment results on the synthetic and real-world recommendation datasets demonstrate that UIPS significantly improves the quality of the discovered policy, when compared against an extensive list of state-of-the-art baselines.", "keywords": "off-policy learning;uncertainty", "primary_area": "", "supplementary_material": "", "author": "Xiaoying Zhang;Junpu Chen;Hongning Wang;Hong Xie;Yang Liu;John C.S. Lui;Hang Li", "authorids": "~Xiaoying_Zhang3;~Junpu_Chen1;~Hongning_Wang1;~Hong_Xie2;~Yang_Liu3;~John_C.S._Lui2;~Hang_Li4", "gender": "F;M;M;M;M;M;M", "homepage": "https://github.com/Xiaoyinggit;https://sites.google.com/view/junpu-chen;http://www.cs.virginia.edu/~hw5x/;https://hongxie.github.io/;http://www.yliuu.com;http://www.cse.cuhk.edu.hk/~cslui/Index.html;https://hangli-hl.github.io/", "dblp": "46/7725;72/3514;05/6545;39/3657-4;51/3710-18;l/JohnCSLui;https://dblp.org/pers/hd/l/Li_0001:Hang", "google_scholar": "lwKg4C4AAAAJ;;qkdvKNoAAAAJ;https://scholar.google.com/citations?view_op=list_works;jKrIVCIAAAAJ;https://scholar.google.com.tw/citations?user=7LVjQ7MAAAAJ;nTl5mSwAAAAJ", "orcid": ";;0000-0002-6524-9195;0000-0001-7935-7210;0000-0001-8420-6011;0000-0001-7466-0384;0000-0001-9628-3487", "linkedin": ";;;;;;hang-li-84aa6314/", "or_profile": "~Xiaoying_Zhang3;~Junpu_Chen1;~Hongning_Wang1;~Hong_Xie2;~Yang_Liu3;~John_C.S._Lui2;~Hang_Li4", "aff": "ByteDance AILab;ChongQing University;University of Virginia;Chongqing Institute of Green and Intelligent Technology, Chinese Academy of Sciences;University of California, Santa Cruz;The Chinese University of Hong Kong;ByteDance Technology", "aff_domain": "bytedance.com;cqu.edu.cn;virginia.edu;cigit.ac.cn;ucsc.edu;cse.cuhk.edu.hk;bytedance.com", "position": "Researcher;MS student;Associate Professor;Researcher;Assistant Professor;Full Professor;Head of Research", "bibtex": "@inproceedings{\nzhang2023uncertaintyaware,\ntitle={Uncertainty-Aware Instance Reweighting for Off-Policy Learning},\nauthor={Xiaoying Zhang and Junpu Chen and Hongning Wang and Hong Xie and Yang Liu and John C.S. Lui and Hang Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1pWNhmbllE}\n}", "github": "", "project": "", "reviewers": "19wU;QiPS;ha2F;YnY8", "pdf_size": 1157608, "rating": "5;5;6;7", "confidence": "4;4;3;3", "soundness": "3;2;3;3", "novelty": "2;2;2;2", "presentation": "3;2;3;3", "wc_summary": "141;93;81;90", "wc_strengths": "74;57;56;63", "wc_weaknesses": "237;202;115;219", "wc_questions": "123;122;66;27", "wc_limitations": "19;38;14;18", "wc_review": "594;512;332;417", "wc_reply_reviewers": "292;17;26;152", "wc_reply_authors": "909;103;97;351", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 101.25, 23.370654676324325 ], "wc_strengths_avg": [ 62.5, 7.158910531638177 ], "wc_weaknesses_avg": [ 193.25, 46.84215515964226 ], "wc_questions_avg": [ 84.5, 40.425858061394315 ], "wc_limitations_avg": [ 22.25, 9.283722313813572 ], "wc_review_avg": [ 463.75, 98.53520944312241 ], "wc_reply_reviewers_avg": [ 121.75, 111.8489494809853 ], "wc_reply_authors_avg": [ 365.0, 330.3785707336358 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16986418224963202877&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "bytedance.com;cqu.edu.cn;virginia.edu;cigit.ac.cn;ucsc.edu;cse.cuhk.edu.hk;bytedance.com", "author_num": 7, "aff_unique_index": "0;1;2;3;4;5;0", "aff_unique_norm": "ByteDance;Chongqing University;University of Virginia;Chinese Academy of Sciences;University of California, Santa Cruz;Chinese University of Hong Kong", "aff_unique_dep": "AILab;;;Institute of Green and Intelligent Technology;;", "aff_unique_url": "https://ailab.bytedance.com/;https://www.cqu.edu.cn/;https://www.virginia.edu;http://www.cas.cn/;https://www.ucsc.edu;https://www.cuhk.edu.hk", "aff_unique_abbr": "ByteDance AILab;CQU;UVA;CAS;UCSC;CUHK", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Chongqing;Santa Cruz;Hong Kong SAR", "aff_country_unique_index": "0;0;1;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "MVDoppler: Unleashing the Power of Multi-View Doppler for MicroMotion-based Gait Classification", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73715", "id": "1plAfmP5ms", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b5727c1bab903e0ff21cec84a9a7f5a6-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=1plAfmP5ms", "openreview": "https://openreview.net/forum?id=1plAfmP5ms", "poster": "/media/PosterPDFs/NeurIPS%202023/73715.png?t=1701798844.5727768", "slides": "https://nips.cc/virtual/2023/poster/73715", "video": "https://nips.cc/virtual/2023/poster/73715", "author_site": "Soheil Hor, Shubo Yang, Jaeho Choi, Amin Arbabian", "tldr": "", "abstract": "Modern perception systems rely heavily on high-resolution cameras, LiDARs, and advanced deep neural networks, enabling exceptional performance across various applications. However, these optical systems predominantly depend on geometric features and shapes of objects, which can be challenging to capture in long-range perception applications. To overcome this limitation, alternative approaches such as Doppler-based perception using high-resolution radars have been proposed. \nDoppler-based systems are capable of measuring micro-motions of targets remotely and with very high precision. When compared to geometric features, the resolution of micro-motion features exhibits significantly greater resilience to the influence of distance. However, the true potential of Doppler-based perception has yet to be fully realized due to several factors. These include the unintuitive nature of Doppler signals, the limited availability of public Doppler datasets, and the current datasets' inability to capture the specific co-factors that are unique to Doppler-based perception, such as the effect of the radar's observation angle and the target's motion trajectory.\nThis paper introduces a new large multi-view Doppler dataset together with baseline perception models for micro-motion-based gait analysis and classification. The dataset captures the impact of the subject's walking trajectory and radar's observation angle on the classification performance. Additionally, baseline multi-view data fusion techniques are provided to mitigate these effects. This work demonstrates that sub-second micro-motion snapshots can be sufficient for reliable detection of hand movement patterns and even changes in a pedestrian's walking behavior when distracted by their phone. Overall, this research not only showcases the potential of Doppler-based perception, but also offers valuable solutions to tackle its fundamental challenges.", "keywords": "micro-Doppler analysis;Gait analysis;multi-view sensor Fusion;Doppler-based Perception;mmWave Sensing", "primary_area": "", "supplementary_material": "/attachment/261092c9a00a2f5445ecf52c46b638912a4c71d1.pdf", "author": "Soheil Hor;Shubo Yang;Jae-Ho Choi;Amin Arbabian", "authorids": "~Soheil_Hor1;~Shubo_Yang1;~Jae-Ho_Choi1;~Amin_Arbabian1", "gender": "M;F;M;M", "homepage": "https://www.linkedin.com/mwlite/in/soheil-hor-80503239;;;https://arbabianlab.stanford.edu/", "dblp": ";;;00/10276", "google_scholar": "WhlJp00AAAAJ;;ywDewK4AAAAJ;qh1plBkAAAAJ", "orcid": ";;;", "linkedin": ";shubo-yang-3429a326a/;;amin-arbabian/", "or_profile": "~Soheil_Hor1;~Shubo_Yang1;~Jae-Ho_Choi1;~Amin_Arbabian1", "aff": "Stanford University;Stanford University;POSTECH;Stanford University", "aff_domain": "stanford.edu;stanford.edu;postech.ac.kr;stanford.edu", "position": "PhD student;MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nhor2023mvdoppler,\ntitle={{MVD}oppler: Unleashing the Power of Multi-View Doppler for MicroMotion-based Gait Classification},\nauthor={Soheil Hor and Shubo Yang and Jae-Ho Choi and Amin Arbabian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=1plAfmP5ms}\n}", "github": "", "project": "", "reviewers": "EN5b;reAu;MPVr;cCoP", "pdf_size": 11875499, "rating": "6;6;7;7", "confidence": "4;2;5;3", "wc_summary_and_contributions": "32;185;65;20", "wc_strengths": "35;114;27;9", "wc_improvement": "374;17;37;28", "wc_limitations": "42;322;78;27", "wc_correctness": "54;10;11;10", "wc_clarity": "5;23;6;8", "wc_relation_to_prior_work": "10;29;15;9", "wc_documentation": "24;41;27;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "577;742;267;117", "wc_reply_reviewers": "50;40;33;0", "wc_reply_authors": "1765;508;335;31", "reply_reviewers": "1;1;1;0", "reply_authors": "3;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "wc_summary_and_contributions_avg": [ 75.5, 65.3318452211477 ], "wc_strengths_avg": [ 46.25, 40.23291562887283 ], "wc_improvement_avg": [ 114.0, 150.2780755799062 ], "wc_limitations_avg": [ 117.25, 119.65653972934366 ], "wc_correctness_avg": [ 21.25, 18.91262805640718 ], "wc_clarity_avg": [ 10.5, 7.297259759663212 ], "wc_relation_to_prior_work_avg": [ 15.75, 7.980444849756184 ], "wc_documentation_avg": [ 24.25, 12.833062767710599 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 425.75, 246.6874287433391 ], "wc_reply_reviewers_avg": [ 30.75, 18.7533330370897 ], "wc_reply_authors_avg": [ 659.75, 660.5669439958376 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4472135954999579, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8656851808923293422&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "stanford.edu;stanford.edu;postech.ac.kr;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Stanford University;Pohang University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.postech.ac.kr", "aff_unique_abbr": "Stanford;POSTECH", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Stanford;Pohang", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;South Korea" }, { "title": "Large Language Models are Visual Reasoning Coordinators", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72992", "id": "1q0feiJ2i4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ddfe6bae7b869e819f842753009b94ad-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1q0feiJ2i4", "openreview": "https://openreview.net/forum?id=1q0feiJ2i4", "poster": "/media/PosterPDFs/NeurIPS%202023/72992.png?t=1698202947.7407672", "slides": "https://nips.cc/virtual/2023/poster/72992", "video": "https://nips.cc/virtual/2023/poster/72992", "author_site": "Liangyu Chen, Bo Li, Sheng Shen, Jingkang Yang, Chunyuan Li, Kurt Keutzer, Trevor Darrell, Ziwei Liu", "tldr": "", "abstract": "Visual reasoning requires multimodal perception and commonsense cognition of the world. Recently, multiple vision-language models (VLMs) have been proposed with excellent commonsense reasoning ability in various domains. However, how to harness the collective power of these complementary VLMs is rarely explored. Existing methods like ensemble still struggle to aggregate these models with the desired higher-order communications. In this work, we propose Cola, a novel paradigm that coordinates multiple VLMs for visual reasoning. Our key insight is that a large language model (LLM) can efficiently coordinate multiple VLMs by facilitating natural language communication that leverages their distinct and complementary capabilities. Extensive experiments demonstrate that our instruction tuning variant, Cola-FT, achieves state-of-the-art performance on visual question answering (VQA), outside knowledge VQA, visual entailment, and visual spatial reasoning tasks. Moreover, we show that our in-context learning variant, Cola-Zero, exhibits competitive performance in zero and few-shot settings, without finetuning. Through systematic ablation studies and visualizations, we validate that a coordinator LLM indeed comprehends the instruction prompts as well as the separate functionalities of VLMs; it then coordinates them to enable impressive visual reasoning capabilities.", "keywords": "visual reasoning;large language models", "primary_area": "", "supplementary_material": "/attachment/244afe0fea5e7d3767573bb17775e870d17862df.pdf", "author": "Liangyu Chen;Bo Li;Sheng Shen;Jingkang Yang;Chunyuan Li;Kurt Keutzer;Trevor Darrell;Ziwei Liu", "authorids": "~Liangyu_Chen3;~Bo_Li23;~Sheng_Shen2;~Jingkang_Yang1;~Chunyuan_Li1;~Kurt_Keutzer1;~Trevor_Darrell2;~Ziwei_Liu1", "gender": ";M;M;M;;M;M;M", "homepage": "https://www.cliangyu.com/;https://www.brianboli.com/;https://sincerass.github.io;https://jingkang50.github.io/;http://chunyuan.li/;https://people.eecs.berkeley.edu/~keutzer/;https://liuziwei7.github.io/;https://people.eecs.berkeley.edu/~trevor/", "dblp": ";50/3402-80;138/5764-1.html;175/5365.html;64/9590;k/KurtKeutzer.html;05/6300-2;d/TrevorDarrell", "google_scholar": "vi5Zt9oAAAAJ;1_zc1-IAAAAJ;https://scholar.google.com/citations?hl=en;S-YjbUYAAAAJ;Zd7WmXUAAAAJ;ID9QePIAAAAJ;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ;https://scholar.google.com.tw/citations?user=bh-uRFMAAAAJ", "orcid": ";;;;;0000-0003-3868-8501;;", "linkedin": "chen-liangyu/;brianbo1121/;sheng-s-ab198a174/;;;kurtkeutzer/;;", "or_profile": "~Liangyu_Chen3;~Bo_Li23;~Sheng_Shen2;~Jingkang_Yang1;~Chunyuan_Li1;~Kurt_Keutzer1;~Ziwei_Liu1;~trevor_darrell1", "aff": "Nanyang Technological University;Nanyang Technological University;University of California, Berkeley;Nanyang Technological University;Microsoft Research;University of California, Berkeley;Nanyang Technological University;Electrical Engineering & Computer Science Department", "aff_domain": "ntu.edu.sg;ntu.edu.sg;berkeley.edu;ntu.edu.sg;microsoft.com;berkeley.edu;ntu.edu.sg;eecs.berkeley.edu", "position": "Researcher;PhD student;PhD student;PhD student;Principal Researcher;Full Professor;Assistant Professor;Professor", "bibtex": "@inproceedings{\nchen2023large,\ntitle={Large Language Models are Visual Reasoning Coordinators},\nauthor={Liangyu Chen and Bo Li and Sheng Shen and Jingkang Yang and Chunyuan Li and Kurt Keutzer and Trevor Darrell and Ziwei Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1q0feiJ2i4}\n}", "github": "", "project": "", "reviewers": "3ict;4gyW;Yu6H;qcEV", "pdf_size": 5508711, "rating": "5;6;7;7", "confidence": "4;4;4;4", "soundness": "3;4;3;3", "novelty": "2;4;3;3", "presentation": "3;4;3;3", "wc_summary": "51;49;50;65", "wc_strengths": "172;34;28;68", "wc_weaknesses": "85;40;66;126", "wc_questions": "5;20;39;79", "wc_limitations": "5;11;9;10", "wc_review": "318;154;192;348", "wc_reply_reviewers": "51;16;0;31", "wc_reply_authors": "162;45;84;23", "reply_reviewers": "1;1;0;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 53.75, 6.53356717268599 ], "wc_strengths_avg": [ 75.5, 57.76460854190912 ], "wc_weaknesses_avg": [ 79.25, 31.363792819109108 ], "wc_questions_avg": [ 35.75, 27.725214156071004 ], "wc_limitations_avg": [ 8.75, 2.277608394786075 ], "wc_review_avg": [ 253.0, 81.81075723888638 ], "wc_reply_reviewers_avg": [ 24.5, 18.82153022471871 ], "wc_reply_authors_avg": [ 78.5, 52.92683629313205 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11672938079933912718&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "ntu.edu.sg;ntu.edu.sg;berkeley.edu;ntu.edu.sg;microsoft.com;berkeley.edu;ntu.edu.sg;eecs.berkeley.edu", "author_num": 8, "aff_unique_index": "0;0;1;0;2;1;0;3", "aff_unique_norm": "Nanyang Technological University;University of California, Berkeley;Microsoft;Electrical Engineering & Computer Science Department", "aff_unique_dep": ";;Microsoft Research;Electrical Engineering & Computer Science", "aff_unique_url": "https://www.ntu.edu.sg;https://www.berkeley.edu;https://www.microsoft.com/en-us/research;", "aff_unique_abbr": "NTU;UC Berkeley;MSR;", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;1;0;1;1;0", "aff_country_unique": "Singapore;United States;" }, { "title": "Energy Discrepancies: A Score-Independent Loss for Energy-Based Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72991", "id": "1qFnxhdbxg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8e176ef071f00f1b233461c5ad5e1b24-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1qFnxhdbxg", "openreview": "https://openreview.net/forum?id=1qFnxhdbxg", "poster": "/media/PosterPDFs/NeurIPS%202023/72991.png?t=1702184503.2262392", "slides": "https://nips.cc/virtual/2023/poster/72991", "video": "https://nips.cc/virtual/2023/poster/72991", "author_site": "Tobias Schr\u00f6der, Zijing Ou, Jen Lim, Yingzhen Li, Sebastian Vollmer, Andrew Duncan", "tldr": "", "abstract": "Energy-based models are a simple yet powerful class of probabilistic models, but their widespread adoption has been limited by the computational burden of training them. We propose a novel loss function called Energy Discrepancy (ED) which does not rely on the computation of scores or expensive Markov chain Monte Carlo. We show that energy discrepancy approaches the explicit score matching and negative log-likelihood loss under different limits, effectively interpolating between both. Consequently, minimum energy discrepancy estimation overcomes the problem of nearsightedness encountered in score-based estimation methods, while also enjoying theoretical guarantees. Through numerical experiments, we demonstrate that ED learns low-dimensional data distributions faster and more accurately than explicit score matching or contrastive divergence. For high-dimensional image data, we describe how the manifold hypothesis puts limitations on our approach and demonstrate the effectiveness of energy discrepancy by training the energy-based model as a prior of a variational decoder model.", "keywords": "Energy-based models;statistical discrepancy;latent-variable model;density estimation", "primary_area": "", "supplementary_material": "/attachment/ec0ec5c916c2d621920ccc22f163995297975e38.zip", "author": "Tobias Schr\u00f6der;Zijing Ou;Jen Ning Lim;Yingzhen Li;Sebastian Josef Vollmer;Andrew Duncan", "authorids": "~Tobias_Schr\u00f6der2;~Zijing_Ou1;~Jen_Ning_Lim1;~Yingzhen_Li1;~Sebastian_Josef_Vollmer1;~Andrew_Duncan1", "gender": ";;F;M;M;M", "homepage": "https://j-zin.github.io/;;http://yingzhenli.net/home/en/;https://sebastian.vollmer.ms;;https://tobias-schroeder.github.io", "dblp": "246/3072;250/9539;117/9230;173/5140.html;189/0076;", "google_scholar": "zZg3Cm0AAAAJ;Uryp_N8AAAAJ;https://scholar.google.se/citations?hl=en;WoqSEpYAAAAJ;https://scholar.google.co.uk/citations?user=3ZzC72cAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;andrew-duncan-404690140/;tobias-schroeder-3295b3215/", "or_profile": "~Zijing_Ou1;~Jen_Ning_Lim1;~Yingzhen_Li1;~Sebastian_Josef_Vollmer1;~Andrew_Duncan1;~Tobias_Schroeder1", "aff": "Imperial College London;The University of Warwick;Imperial College London;University of Kaiserslautern;Imperial College London;Imperial College London", "aff_domain": "imperial.ac.uk;warwick.ac.uk;imperial.ac.uk;rptu.de;imperial.ac.uk;ic.ac.uk", "position": "PhD student;PhD student;Lecturer;Full Professor;Senior Lecturer;PhD student", "bibtex": "@inproceedings{\nschr{\\\"o}der2023energy,\ntitle={Energy Discrepancies: A Score-Independent Loss for Energy-Based Models},\nauthor={Tobias Schr{\\\"o}der and Zijing Ou and Jen Ning Lim and Yingzhen Li and Sebastian Josef Vollmer and Andrew Duncan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1qFnxhdbxg}\n}", "github": "", "project": "", "reviewers": "BJCK;BMfu;37sQ;M9yD;NEwe", "pdf_size": 14419133, "rating": "6;6;7;7;7", "confidence": "4;2;3;3;2", "soundness": "3;3;4;4;3", "novelty": "3;3;4;4;3", "presentation": "3;3;3;3;3", "wc_summary": "103;117;64;36;128", "wc_strengths": "26;109;24;45;45", "wc_weaknesses": "68;27;5;31;96", "wc_questions": "141;83;22;146;17", "wc_limitations": "5;10;6;6;1", "wc_review": "343;346;121;264;287", "wc_reply_reviewers": "0;41;15;15;18", "wc_reply_authors": "0;30;15;0;26", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;2;1;2", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.6, 34.44764142869581 ], "wc_strengths_avg": [ 49.8, 30.92830418888174 ], "wc_weaknesses_avg": [ 45.4, 32.401234544381175 ], "wc_questions_avg": [ 81.8, 55.50279272252884 ], "wc_limitations_avg": [ 5.6, 2.870540018881465 ], "wc_review_avg": [ 272.2, 81.98390085864419 ], "wc_reply_reviewers_avg": [ 17.8, 13.196969349058898 ], "wc_reply_authors_avg": [ 14.2, 12.592060990957755 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.21821789023599236, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5050904435283732909&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "imperial.ac.uk;warwick.ac.uk;imperial.ac.uk;rptu.de;imperial.ac.uk;ic.ac.uk", "author_num": 6, "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "Imperial College London;University of Warwick;University of Kaiserslautern", "aff_unique_dep": ";;", "aff_unique_url": "https://www.imperial.ac.uk;https://warwick.ac.uk;https://www.uni-kl.de", "aff_unique_abbr": "ICL;Warwick;Uni KL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "Is Your Code Generated by ChatGPT Really Correct? Rigorous Evaluation of Large Language Models for Code Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72990", "id": "1qvx610Cu7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/43e9d647ccd3e4b7b5baab53f0368686-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1qvx610Cu7", "openreview": "https://openreview.net/forum?id=1qvx610Cu7", "poster": "/media/PosterPDFs/NeurIPS%202023/72990.png?t=1699930149.4471667", "slides": "https://nips.cc/virtual/2023/poster/72990", "video": "https://nips.cc/virtual/2023/poster/72990", "author_site": "Jiawei Liu, Chunqiu Steven Xia, Yuyao Wang, LINGMING ZHANG", "tldr": "", "abstract": "Program synthesis has been long studied with recent approaches focused on directly using the power of Large Language Models (LLMs) to generate code. Programming benchmarks, with curated synthesis problems and test-cases, are used to measure the performance of various LLMs on code synthesis. However, these test-cases can be limited in both quantity and quality for fully assessing the functional correctness of the generated code. Such limitation in the existing benchmarks begs the following question: In the era of LLMs, is the code generated really correct? To answer this, we propose EvalPlus \u2013 a code synthesis evaluation framework to rigorously benchmark the functional correctness of LLM-synthesized code. EvalPlus augments a given evaluation dataset with large amounts of test-cases newly produced by an automatic test input generator, powered by both LLM- and mutation-based strategies. While EvalPlus is general, we extend the test-cases of the popular HumanEval benchmark by 80x to build HumanEval+. Our extensive evaluation across 26 popular LLMs (e.g., GPT-4 and ChatGPT) demonstrates that HumanEval+ is able to catch significant amounts of previously undetected wrong code synthesized by LLMs, reducing the pass@k by up-to 19.3-28.9%. We also surprisingly found that test insufficiency can lead to mis-ranking. For example, both WizardCoder-CodeLlama and Phind-CodeLlama now outperform ChatGPT on HumanEval+, while none of them could on HumanEval. Our work not only indicates that prior popular code synthesis evaluation results do not accurately reflect the true performance of LLMs for code synthesis, but also opens up a new direction to improve such programming benchmarks through automated testing. We have open-sourced our tools, enhanced datasets as well as all LLM-generated code at https://github.com/evalplus/evalplus to facilitate and accelerate future LLM-for-code research.", "keywords": "LLM4Code;ChatGPT;Automated Test Generation", "primary_area": "", "supplementary_material": "/attachment/ed76e10c64021f99e1b08bac85918f318d2b90d0.zip", "author": "Jiawei Liu;Chunqiu Steven Xia;Yuyao Wang;LINGMING ZHANG", "authorids": "~Jiawei_Liu11;~Chunqiu_Steven_Xia1;yuyao6@outlook.com;~LINGMING_ZHANG2", "gender": "M;M;;M", "homepage": "https://jiawei-site.github.io/;https://steven-site.github.io/;;http://lingming.cs.illinois.edu/", "dblp": "12/8228-4;324/4827;;27/7057-1", "google_scholar": "Vw6el1AAAAAJ;-PCjRp8AAAAJ;;zzbWQE4AAAAJ", "orcid": "0000-0001-7122-8625;;;", "linkedin": "jiawei-liu-uiuc/;;;", "or_profile": "~Jiawei_Liu11;~Chunqiu_Steven_Xia1;yuyao6@outlook.com;~LINGMING_ZHANG2", "aff": "Google;University of Illinois, Urbana Champaign;;University of Illinois Urbana-Champaign", "aff_domain": "google.com;cs.illinois.edu;;cs.illinois.edu", "position": "Intern;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nliu2023is,\ntitle={Is Your Code Generated by Chat{GPT} Really Correct? Rigorous Evaluation of Large Language Models for Code Generation},\nauthor={Jiawei Liu and Chunqiu Steven Xia and Yuyao Wang and LINGMING ZHANG},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1qvx610Cu7}\n}", "github": "", "project": "", "reviewers": "yy3N;jsgr;VxVU;9v6g;SGiT", "pdf_size": 860939, "rating": "4;5;6;6;6", "confidence": "4;4;4;3;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "47;108;82;62;94", "wc_strengths": "175;40;83;96;83", "wc_weaknesses": "150;397;83;24;135", "wc_questions": "64;50;76;88;52", "wc_limitations": "62;8;94;29;11", "wc_review": "498;603;418;299;375", "wc_reply_reviewers": "0;10;0;15;20", "wc_reply_authors": "0;24;0;15;30", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;2;1;2;2", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 78.6, 21.85040045399626 ], "wc_strengths_avg": [ 95.4, 44.07538995856985 ], "wc_weaknesses_avg": [ 157.8, 127.51062700810469 ], "wc_questions_avg": [ 66.0, 14.422205101855956 ], "wc_limitations_avg": [ 40.8, 32.81097377402871 ], "wc_review_avg": [ 438.6, 104.41570763060508 ], "wc_reply_reviewers_avg": [ 9.0, 8.0 ], "wc_reply_authors_avg": [ 13.8, 12.237646832622685 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.375, "gs_citation": 943, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2867890409246450427&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "google.com;cs.illinois.edu;;cs.illinois.edu", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Google;University of Illinois Urbana-Champaign", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://illinois.edu", "aff_unique_abbr": "Google;UIUC", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Mountain View;Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Decorate3D: Text-Driven High-Quality Texture Generation for Mesh Decoration in the Wild", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72989", "id": "1recIOnzOF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/73af055566f5514b9863315133b84eda-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1recIOnzOF", "openreview": "https://openreview.net/forum?id=1recIOnzOF", "poster": "/media/PosterPDFs/NeurIPS%202023/72989.png?t=1702257478.1260078", "slides": "https://nips.cc/virtual/2023/poster/72989", "video": "https://nips.cc/virtual/2023/poster/72989", "author_site": "Yanhui Guo, Xinxin Zuo, Peng Dai, Juwei Lu, Xiaolin Wu, Li cheng, Youliang Yan, Songcen Xu, Xiaofei Wu", "tldr": "", "abstract": "This paper presents Decorate3D, a versatile and user-friendly method for the creation and editing of 3D objects using images. Decorate3D models a real-world object of interest by neural radiance field (NeRF) and decomposes the NeRF representation into an explicit mesh representation, a view-dependent texture, and a diffuse UV texture. Subsequently, users can either manually edit the UV or provide a prompt for the automatic generation of a new 3D-consistent texture. To achieve high-quality 3D texture generation, we propose a structure-aware score distillation sampling method to optimize a neural UV texture based on user-defined text and empower an image diffusion model with 3D-consistent generation capability. Furthermore, we introduce a few-view resampling training method and utilize a super-resolution model to obtain refined high-resolution UV textures (2048$\\times$2048) for 3D texturing. Extensive experiments collectively validate the superior performance of Decorate3D in retexturing real-world 3D objects. Project page: https://decorate3d.github.io/Decorate3D/.", "keywords": "Texture Generation;Text-Driven;3D-Consistent Editing;Neural Radiance Field", "primary_area": "", "supplementary_material": "/attachment/69ba365dc83d2d0beb3559d5007cbe2974151a7e.pdf", "author": "Yanhui Guo;Xinxin Zuo;Peng Dai;Juwei Lu;Xiaolin Wu;Li Cheng;Youliang Yan;Songcen Xu;Xiaofei Wu", "authorids": "~Yanhui_Guo1;~Xinxin_Zuo1;~Peng_Dai2;~Juwei_Lu2;~Xiaolin_Wu2;~Li_Cheng1;~Youliang_Yan1;~Songcen_Xu1;~Xiaofei_Wu1", "gender": "M;F;M;M;;Not Specified;M;M;M", "homepage": ";https://sites.google.com/site/xinxinzuohome/;http://pdaicode.github.io/;http://www.dsp.utoronto.ca/juwei/;http://www.ece.mcmaster.ca/~xwu;https://www.ece.ualberta.ca/~lcheng5/;;http://www.xusongcen.com/;", "dblp": ";167/3181;08/3547-2.html;06/827;w/XiaolinWu;13/4938-1;135/5316.html;131/6572;", "google_scholar": "XwxwxfQAAAAJ;lv0UjhIAAAAJ;https://scholar.google.ca/citations?user=pOpgtRgAAAAJ;https://scholar.google.ca/citations?user=Asz24wcAAAAJ;ZuQnEIgAAAAJ;https://scholar.google.ca/citations?user=9IRFiEQAAAAJ;;_xVW9SgAAAAJ;CseafDAAAAAJ", "orcid": "0000-0002-9908-3795;0000-0002-7116-9634;;;;0000-0003-3261-3533;;;", "linkedin": ";xinxin-zuo-898419199/;peng-dai-6b426120/;https://linkedin.com/in/juwei-lu-35642621;;;;songcen-xu-2b313465/;", "or_profile": "~Yanhui_Guo1;~Xinxin_Zuo1;~Peng_Dai2;~Juwei_Lu2;~Xiaolin_Wu2;~Li_Cheng1;~Youliang_Yan1;~Songcen_Xu1;~Xiaofei_Wu1", "aff": "McMaster University;Huawei Technologies Ltd.;Huawei Technologies Canada;Huawei Technologies Ltd.;McMaster University;University of Alberta;Huawei Technologies Ltd.;Huawei Noah's Ark Lab;Huawei Technologies Ltd.", "aff_domain": "mcmaster.ca;huawei.com;huawei.com;huawei.com;mcmaster.ca;ualberta.ca;huawei.com;huawei.com;huawei.com", "position": "Ph.D.;Researcher;Researcher;Sr Principal Scientist;Full Professor;Full Professor;Principal Researcher;Principal Engineer;Researcher", "bibtex": "@inproceedings{\nguo2023decorated,\ntitle={Decorate3D: Text-Driven High-Quality Texture Generation for Mesh Decoration in the Wild},\nauthor={Yanhui Guo and Xinxin Zuo and Peng Dai and Juwei Lu and Xiaolin Wu and Li Cheng and Youliang Yan and Songcen Xu and Xiaofei Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1recIOnzOF}\n}", "github": "", "project": "", "reviewers": "7jco;U4jz;xCPF;MiDH", "pdf_size": 28811467, "rating": "6;6;7;8", "confidence": "3;4;3;4", "soundness": "3;4;4;3", "novelty": "3;2;4;3", "presentation": "3;4;2;4", "wc_summary": "126;130;165;70", "wc_strengths": "159;106;191;47", "wc_weaknesses": "431;221;661;114", "wc_questions": "84;84;87;16", "wc_limitations": "8;109;65;8", "wc_review": "808;650;1169;255", "wc_reply_reviewers": "32;565;71;36", "wc_reply_authors": "102;1090;102;69", "reply_reviewers": "1;1;1;1", "reply_authors": "3;4;3;3", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 122.75, 34.02480712656576 ], "wc_strengths_avg": [ 125.75, 54.66888969057265 ], "wc_weaknesses_avg": [ 356.75, 209.42585203360161 ], "wc_questions_avg": [ 67.75, 29.902968080108703 ], "wc_limitations_avg": [ 47.5, 42.45291509425472 ], "wc_review_avg": [ 720.5, 328.05068205995246 ], "wc_reply_reviewers_avg": [ 176.0, 225.10108840252195 ], "wc_reply_authors_avg": [ 340.75, 432.7894262802639 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16378955051264023248&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "mcmaster.ca;huawei.com;huawei.com;huawei.com;mcmaster.ca;ualberta.ca;huawei.com;huawei.com;huawei.com", "author_num": 9, "aff_unique_index": "0;1;1;1;0;2;1;1;1", "aff_unique_norm": "McMaster University;Huawei;University of Alberta", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.mcmaster.ca;https://www.huawei.com;https://www.ualberta.ca", "aff_unique_abbr": "McMaster;Huawei;UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0;0;1;1;1", "aff_country_unique": "Canada;China" }, { "title": "Topological Obstructions and How to Avoid Them", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72988", "id": "1tviRBNxI9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1c12ccfc7720f6b680edea17300bfc2b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1tviRBNxI9", "openreview": "https://openreview.net/forum?id=1tviRBNxI9", "poster": "/media/PosterPDFs/NeurIPS%202023/72988.png?t=1702408568.219549", "slides": "https://nips.cc/virtual/2023/poster/72988", "video": "https://nips.cc/virtual/2023/poster/72988", "author_site": "Babak Esmaeili, Robin Walters, Heiko Zimmermann, Jan-Willem van de Meent", "tldr": "", "abstract": "Incorporating geometric inductive biases into models can aid interpretability and generalization, but encoding to a specific geometric structure can be challenging due to the imposed topological constraints. In this paper, we theoretically and empirically characterize obstructions to training encoders with geometric latent spaces. We show that local optima can arise due to singularities (e.g. self-intersection) or due to an incorrect degree or winding number. We then discuss how normalizing flows can potentially circumvent these obstructions by defining multimodal variational distributions. Inspired by this observation, we propose a new flow-based model that maps data points to multimodal distributions over geometric spaces and empirically evaluate our model on 2 domains. We observe improved stability during training and a higher chance of converging to a homeomorphic encoder.", "keywords": "representation learning;variational autoencoders;homeomorphism;topological;equivariant;lie groups;normalizing flows", "primary_area": "", "supplementary_material": "", "author": "Babak Esmaeili;Robin Walters;Heiko Zimmermann;Jan-Willem van de Meent", "authorids": "~Babak_Esmaeili2;~Robin_Walters1;~Heiko_Zimmermann1;~Jan-Willem_van_de_Meent1", "gender": "M;;M;M", "homepage": "http://www.robinwalters.com;;https://jwvdm.github.io/;https://babak0032.github.io/", "dblp": "258/3416;96/10433;137/3263;34/3055-1.html", "google_scholar": "fnprJmUAAAAJ;6etmkQYAAAAJ;CX9Lu38AAAAJ;Sxgjz3QAAAAJ", "orcid": ";;0000-0001-9465-5398;", "linkedin": ";;;", "or_profile": "~Robin_Walters1;~Heiko_Zimmermann1;~Jan-Willem_van_de_Meent1;~Babak_Esmaeili1", "aff": "Northeastern University ;University of Amsterdam;Northeastern University;University of Amsterdam", "aff_domain": "northeastern.edu;uva.nl;northeastern.edu;uva.nl", "position": "Assistant Professor;PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nesmaeili2023topological,\ntitle={Topological Obstructions and How to Avoid Them},\nauthor={Babak Esmaeili and Robin Walters and Heiko Zimmermann and Jan-Willem van de Meent},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1tviRBNxI9}\n}", "github": "", "project": "", "reviewers": "1s1k;mNP4;WSwE;qXsa", "pdf_size": 5330460, "rating": "5;5;6;6", "confidence": "3;4;3;1", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "28;133;87;53", "wc_strengths": "219;145;61;52", "wc_weaknesses": "226;239;376;101", "wc_questions": "85;4;196;23", "wc_limitations": "16;14;1;7", "wc_review": "574;535;721;236", "wc_reply_reviewers": "14;0;57;62", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 75.25, 39.37242055043098 ], "wc_strengths_avg": [ 119.25, 68.06017558014378 ], "wc_weaknesses_avg": [ 235.5, 97.3819798525374 ], "wc_questions_avg": [ 77.0, 74.94998332221296 ], "wc_limitations_avg": [ 9.5, 5.937171043518958 ], "wc_review_avg": [ 516.5, 176.17391974977454 ], "wc_reply_reviewers_avg": [ 33.25, 26.771019778857884 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5697271251248778807&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "northeastern.edu;uva.nl;northeastern.edu;uva.nl", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Northeastern University;University of Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.northeastern.edu;https://www.uva.nl", "aff_unique_abbr": "NEU;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United States;Netherlands" }, { "title": "MM-Fi: Multi-Modal Non-Intrusive 4D Human Dataset for Versatile Wireless Sensing", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73714", "id": "1uAsASS1th", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3baf7a39d07e9f4f1e258a412df94521-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=1uAsASS1th", "openreview": "https://openreview.net/forum?id=1uAsASS1th", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73714", "video": "https://nips.cc/virtual/2023/poster/73714", "author_site": "Jianfei Yang, He Huang, Yunjiao Zhou, Xinyan Chen, Yuecong Xu, Shenghai Yuan, Han Zou, Chris Xiaoxuan Lu, Lihua Xie", "tldr": "", "abstract": "4D human perception plays an essential role in a myriad of applications, such as home automation and metaverse avatar simulation. However, existing solutions which mainly rely on cameras and wearable devices are either privacy intrusive or inconvenient to use. To address these issues, wireless sensing has emerged as a promising alternative, leveraging LiDAR, mmWave radar, and WiFi signals for device-free human sensing. In this paper, we propose MM-Fi, the first multi-modal non-intrusive 4D human dataset with 27 daily or rehabilitation action categories, to bridge the gap between wireless sensing and high-level human perception tasks. MM-Fi consists of over 320k synchronized frames of five modalities from 40 human subjects. Various annotations are provided to support potential sensing tasks, e.g., human pose estimation and action recognition. Extensive experiments have been conducted to compare the sensing capacity of each or several modalities in terms of multiple tasks. We envision that MM-Fi can contribute to wireless sensing research with respect to action recognition, human pose estimation, multi-modal learning, cross-modal supervision, and interdisciplinary healthcare research.", "keywords": "wireless sensing;multi-modal dataset;human pose estimation;non-intrusive", "primary_area": "", "supplementary_material": "/attachment/b93e7400c7d1ca12ea4d7f6cb64776f579893295.pdf", "author": "Jianfei Yang;He Huang;Yunjiao Zhou;Xinyan Chen;Yuecong Xu;Shenghai Yuan;Han Zou;Chris Xiaoxuan Lu;Lihua Xie", "authorids": "~Jianfei_Yang4;~He_Huang8;~Yunjiao_Zhou1;~Xinyan_Chen2;~Yuecong_Xu1;~Shenghai_Yuan1;~Han_Zou2;~Chris_Xiaoxuan_Lu1;~Lihua_Xie2", "gender": ";M;;M;M;M;;;M", "homepage": ";;;;https://xuyu0010.github.io;https://www.linkedin.com/in/shenghai-yuan-0613/?originalSubdomain=sg;;;https://personal.ntu.edu.sg/elhxie/", "dblp": ";;;;242/7964;133/3411;;;40/2499", "google_scholar": ";;;XGQNPHAAAAAJ;cqeOXE4AAAAJ;XcV_sesAAAAJ;;;Fmrv3J8AAAAJ", "orcid": ";;;0000-0002-9174-6558;0000-0002-4292-7379;0009-0003-1887-6342;;;0000-0002-7137-4136", "linkedin": ";he-huang-a5b433269/;;;xuyu0014/;shenghai-yuan-0613/;;;https://www.linkedin.com/posts/ntueee_research-project-collaboration-activity-7054010243399188480-ZJaM", "or_profile": "~Jianfei_Yang4;~He_Huang8;~Yunjiao_Zhou1;~Xinyan_Chen2;~Yuecong_Xu1;~Shenghai_Yuan1;~Han_Zou2;~Chris_Xiaoxuan_Lu1;~Lihua_Xie2", "aff": ";Nanyang Technological University;;Nanyang Technological University;Institute for Infocomm Research, A*STAR;Nanyang Technological University;;;Nanyang Technological University", "aff_domain": ";ntu.edu.sg;;ntu.edu.sg;i2r.a-star.edu.sg;ntu.edu;;;ntu.edu.sg", "position": ";PhD student;;Undergrad student;Researcher;Postdoc;;;Full Professor", "bibtex": "@inproceedings{\nyang2023mmfi,\ntitle={{MM}-Fi: Multi-Modal Non-Intrusive 4D Human Dataset for Versatile Wireless Sensing},\nauthor={Jianfei Yang and He Huang and Yunjiao Zhou and Xinyan Chen and Yuecong Xu and Shenghai Yuan and Han Zou and Chris Xiaoxuan Lu and Lihua Xie},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=1uAsASS1th}\n}", "github": "", "project": "", "reviewers": "j8DT;bm2n;boR4;2naR;RJAA", "pdf_size": 1621925, "rating": "6;6;6;7;7", "confidence": "4;4;3;5;4", "wc_summary_and_contributions": "52;46;24;65;77", "wc_strengths": "71;54;59;37;100", "wc_improvement": "51;173;136;78;131", "wc_limitations": "143;131;39;376;17", "wc_correctness": "9;142;1;129;17", "wc_clarity": "6;5;3;10;21", "wc_relation_to_prior_work": "12;19;1;45;20", "wc_documentation": "6;4;21;26;89", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "351;575;285;767;473", "wc_reply_reviewers": "29;217;76;22;154", "wc_reply_authors": "1881;2223;1866;1676;1329", "reply_reviewers": "1;1;1;1;1", "reply_authors": "5;6;5;4;4", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 52.8, 17.948816116947658 ], "wc_strengths_avg": [ 64.2, 20.970455407548975 ], "wc_improvement_avg": [ 113.8, 43.63209827638364 ], "wc_limitations_avg": [ 141.2, 127.36467328109471 ], "wc_correctness_avg": [ 59.6, 62.31404336102738 ], "wc_clarity_avg": [ 9.0, 6.418722614352485 ], "wc_relation_to_prior_work_avg": [ 19.4, 14.485855169785456 ], "wc_documentation_avg": [ 29.2, 31.070242998727895 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 490.2, 170.639268634157 ], "wc_reply_reviewers_avg": [ 99.6, 75.23988304084477 ], "wc_reply_authors_avg": [ 1795.0, 292.21156719062304 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 4.8, 0.7483314773547882 ], "replies_avg": [ 37, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.6454972243679027, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1624253686676011645&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 7, "email": ";ntu.edu.sg;;ntu.edu.sg;i2r.a-star.edu.sg;ntu.edu;;;ntu.edu.sg", "author_num": 9, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Nanyang Technological University;Institute for Infocomm Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.i2r.a-star.edu.sg", "aff_unique_abbr": "NTU;I2R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Explaining V1 Properties with a Biologically Constrained Deep Learning Architecture", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72987", "id": "1uirUsR9E7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2d1ef4aba0503226330661d74fdb236e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1uirUsR9E7", "openreview": "https://openreview.net/forum?id=1uirUsR9E7", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72987", "video": "https://nips.cc/virtual/2023/poster/72987", "author_site": "Galen Pogoncheff, Jacob Granley, Michael Beyeler", "tldr": "", "abstract": "Convolutional neural networks (CNNs) have recently emerged as promising models of the ventral visual stream, despite their lack of biological specificity.\nWhile current state-of-the-art models of the primary visual cortex (V1) have surfaced from training with adversarial examples and extensively augmented data, these models are still unable to explain key neural properties observed in V1 that arise from biological circuitry.\nTo address this gap, we systematically incorporated neuroscience-derived architectural components into CNNs to identify a set of mechanisms and architectures that more comprehensively explain V1 activity.\nUpon enhancing task-driven CNNs with architectural components that simulate center-surround antagonism, local receptive fields, tuned normalization, and cortical magnification, we uncover models with latent representations that yield state-of-the-art explanation of V1 neural activity and tuning properties.\nMoreover, analyses of the learned parameters of these components and stimuli that maximally activate neurons of the evaluated networks provide support for their role in explaining neural properties of V1.\nOur results highlight an important advancement in the field of NeuroAI, as we systematically establish a set of architectural components that contribute to unprecedented explanation of V1.\nThe neuroscience insights that could be gleaned from increasingly accurate in-silico models of the brain have the potential to greatly advance the fields of both neuroscience and artificial intelligence.", "keywords": "NeuroAI;Neuroscience;Visual Stream;Convolutional Neural Networks;Biologically inspired deep learning", "primary_area": "", "supplementary_material": "/attachment/fe2bc2336ffa52f547234e9199bdad4caa7d6367.zip", "author": "Galen Pogoncheff;Jacob Granley;Michael Beyeler", "authorids": "~Galen_Pogoncheff1;~Jacob_Granley1;~Michael_Beyeler1", "gender": "M;M;M", "homepage": ";;", "dblp": "313/2041;260/5940;136/0857", "google_scholar": "mkmrwW4AAAAJ;0jACZrEAAAAJ;dK-0kG4AAAAJ", "orcid": "0000-0001-6248-0992;0000-0002-9024-2454;0000-0001-5233-844X", "linkedin": ";;", "or_profile": "~Galen_Pogoncheff1;~Jacob_Granley1;~Michael_Beyeler1", "aff": "University of California, Santa Barbara;University of California, Santa Barbara;University of California, Santa Barbara", "aff_domain": "ucsb.edu;cs.ucsb.edu;ucsb.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\npogoncheff2023explaining,\ntitle={Explaining V1 Properties with a Biologically Constrained Deep Learning Architecture},\nauthor={Galen Pogoncheff and Jacob Granley and Michael Beyeler},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1uirUsR9E7}\n}", "github": "", "project": "", "reviewers": "EW9X;bbGj;Y5Aa;yZMG", "pdf_size": 7417267, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "89;73;113;36", "wc_strengths": "32;89;121;105", "wc_weaknesses": "104;356;148;150", "wc_questions": "37;379;12;296", "wc_limitations": "14;8;10;6", "wc_review": "276;905;404;593", "wc_reply_reviewers": "122;241;66;162", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.75, 27.99441908666797 ], "wc_strengths_avg": [ 86.75, 33.573613150806395 ], "wc_weaknesses_avg": [ 189.5, 97.87108868302222 ], "wc_questions_avg": [ 181.0, 159.47256817396527 ], "wc_limitations_avg": [ 9.5, 2.958039891549808 ], "wc_review_avg": [ 544.5, 236.71977103740196 ], "wc_reply_reviewers_avg": [ 147.75, 63.72744699107285 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=748863609542368139&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ucsb.edu;cs.ucsb.edu;ucsb.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Boosting with Tempered Exponential Measures", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72986", "id": "1vvsIJtnnr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/82d3258eb58ceac31744a88005b7ddef-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1vvsIJtnnr", "openreview": "https://openreview.net/forum?id=1vvsIJtnnr", "poster": "/media/PosterPDFs/NeurIPS%202023/72986.png?t=1700083881.0280643", "slides": "https://nips.cc/virtual/2023/poster/72986", "video": "https://nips.cc/virtual/2023/poster/72986", "author_site": "Richard Nock, Ehsan Amid, Manfred Warmuth", "tldr": "", "abstract": "One of the most popular ML algorithms, AdaBoost, can be\nderived from the dual of a relative entropy\nminimization problem subject to the fact that the positive weights\non the examples sum to one. Essentially, harder examples receive higher probabilities. We generalize this setup to the recently introduced *tempered\nexponential measure*s (TEMs) where normalization is enforced on a specific power of the measure and not the measure itself.\nTEMs are indexed by a parameter $t$ and generalize exponential families ($t=1$). Our algorithm, $t$-AdaBoost, recovers AdaBoost as a special case ($t=1$). We show that $t$-AdaBoost retains AdaBoost's celebrated exponential convergence rate when $t\\in [0,1)$ while allowing a slight improvement of the rate's hidden constant compared to $t=1$. $t$-AdaBoost partially computes on a generalization of classical arithmetic over the reals and brings notable properties like guaranteed bounded leveraging coefficients for $t\\in [0,1)$. From the loss that $t$-AdaBoost minimizes (a generalization of the exponential loss), we show how to derive a new family of *tempered* losses for the induction of domain-partitioning classifiers like decision trees. Crucially, strict properness is ensured for all while their boosting rates span the full known spectrum. Experiments using $t$-AdaBoost+trees display that significant leverage can be achieved by tuning $t$.", "keywords": "Boosting;optimization;exponential families", "primary_area": "", "supplementary_material": "", "author": "Richard Nock;Ehsan Amid;Manfred K Warmuth", "authorids": "~Richard_Nock1;~Ehsan_Amid1;~Manfred_K_Warmuth1", "gender": ";M;M", "homepage": "http://users.cecs.anu.edu.au/~rnock/;https://sites.google.com/corp/view/eamid/;https://mwarmuth.bitbucket.io/", "dblp": "n/RichardNock;142/5754;w/ManfredKWarmuth.html", "google_scholar": "https://scholar.google.fr/citations?user=0J2s3YQAAAAJ;https://scholar.google.fi/citations?user=F6omR3gAAAAJ;LR6kjO4AAAAJ", "orcid": ";;", "linkedin": ";ehsan-amid-63aba754;", "or_profile": "~Richard_Nock1;~Ehsan_Amid1;~Manfred_K_Warmuth1", "aff": "Google Research;Google DeepMind;Google Research", "aff_domain": "google.com;google.com;google.com", "position": "Researcher;Research Scientist;Principal Researcher", "bibtex": "@inproceedings{\nnock2023boosting,\ntitle={Boosting with Tempered Exponential Measures},\nauthor={Richard Nock and Ehsan Amid and Manfred K Warmuth},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1vvsIJtnnr}\n}", "github": "", "project": "", "reviewers": "J18J;4pmA;tiN4;KBAC;6L9a;XQ4B", "pdf_size": 6374163, "rating": "6;6;6;6;6;7", "confidence": "4;3;2;2;3;4", "soundness": "3;3;3;3;3;4", "novelty": "3;2;3;3;3;3", "presentation": "3;3;3;2;3;4", "wc_summary": "35;57;194;101;143;112", "wc_strengths": "44;93;21;105;113;71", "wc_weaknesses": "244;123;35;191;222;399", "wc_questions": "5;39;175;180;223;266", "wc_limitations": "2;29;10;15;73;15", "wc_review": "330;341;435;592;774;863", "wc_reply_reviewers": "8;52;10;440;252;39", "wc_reply_authors": "0;39;0;0;41;0", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "1;2;1;1;2;1", "rating_avg": [ 6.166666666666667, 0.37267799624996495 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 107.0, 52.614953514503206 ], "wc_strengths_avg": [ 74.5, 33.06433123473088 ], "wc_weaknesses_avg": [ 202.33333333333334, 111.96973797514319 ], "wc_questions_avg": [ 148.0, 94.54452213992445 ], "wc_limitations_avg": [ 24.0, 23.338094752285727 ], "wc_review_avg": [ 555.8333333333334, 206.15966034982586 ], "wc_reply_reviewers_avg": [ 133.5, 160.49896157504156 ], "wc_reply_authors_avg": [ 13.333333333333334, 18.865017595774695 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5477225575051662, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11730081620744929270&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "google.com;google.com;google.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google Research", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Unexpected Improvements to Expected Improvement for Bayesian Optimization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72985", "id": "1vyAG6j9PE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/419f72cbd568ad62183f8132a3605a2a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1vyAG6j9PE", "openreview": "https://openreview.net/forum?id=1vyAG6j9PE", "poster": "/media/PosterPDFs/NeurIPS%202023/72985.png?t=1702406342.031816", "slides": "https://nips.cc/virtual/2023/poster/72985", "video": "https://nips.cc/virtual/2023/poster/72985", "author_site": "Sebastian Ament, Samuel Daulton, David Eriksson, Maximilian Balandat, Eytan Bakshy", "tldr": "", "abstract": "Expected Improvement (EI) is arguably the most popular acquisition function in Bayesian optimization and has found countless successful applications, but its performance is often exceeded by that of more recent methods. Notably, EI and its variants, including for the parallel and multi-objective settings, are challenging to optimize because their acquisition values vanish numerically in many regions. This difficulty generally increases as the number of observations, dimensionality of the search space, or the number of constraints grow, resulting in performance that is inconsistent across the literature and most often sub-optimal. Herein, we propose LogEI, a new family of acquisition functions whose members either have identical or approximately equal optima as their canonical counterparts, but are substantially easier to optimize numerically. We demonstrate that numerical pathologies manifest themselves in \u201cclassic\u201d analytic EI, Expected Hypervolume Improvement (EHVI), as well as their constrained, noisy, and parallel variants, and propose corresponding reformulations that remedy these pathologies. Our empirical results show that members of the LogEI family of acquisition functions substantially improve on the optimization performance of their canonical counterparts and surprisingly, are on par with or exceed the performance of recent state-of-the-art acquisition functions, highlighting the understated role of numerical optimization in the literature.", "keywords": "Bayesian Optimization;Gaussian Process;Multi-Objective Optimization", "primary_area": "", "supplementary_material": "/attachment/405d93626d8e98cfe955208d89d5d300ba70895c.zip", "author": "Sebastian Ament;Sam Daulton;David Eriksson;Maximilian Balandat;Eytan Bakshy", "authorids": "~Sebastian_Ament1;~Sam_Daulton1;~David_Eriksson2;~Maximilian_Balandat1;~Eytan_Bakshy1", "gender": "M;M;;M;M", "homepage": "https://sebastianament.github.io/;;https://research.facebook.com/people/balandat-max/;http://eytan.github.io;https://sdaulton.github.io/", "dblp": ";29/2816;41/9185;58/2226;202/1749", "google_scholar": "1vkpStcAAAAJ;SWQjkN4AAAAJ;N0iLicUAAAAJ;8y9rrq0AAAAJ;beXm1FwAAAAJ", "orcid": ";;0000-0002-8214-8935;;", "linkedin": ";davideriksson89/;maximilian-balandat-b5843946/;;samuel-daulton/", "or_profile": "~Sebastian_Ament1;~David_Eriksson2;~Maximilian_Balandat1;~Eytan_Bakshy1;~Samuel_Daulton1", "aff": "Meta;Meta;Meta;Meta;University of Oxford", "aff_domain": "meta.com;meta.com;meta.com;meta.com;ox.ac.uk", "position": "Researcher;Research scientist;Research Scientist Manager;Principal Researcher;PhD student", "bibtex": "@inproceedings{\nament2023unexpected,\ntitle={Unexpected Improvements to Expected Improvement for Bayesian Optimization},\nauthor={Sebastian Ament and Sam Daulton and David Eriksson and Maximilian Balandat and Eytan Bakshy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1vyAG6j9PE}\n}", "github": "", "project": "", "reviewers": "u8yk;o9Cr;QK5L;UCkN", "pdf_size": 2504012, "rating": "7;7;7;8", "confidence": "5;4;3;5", "soundness": "4;4;3;4", "novelty": "3;4;3;4", "presentation": "4;4;3;4", "wc_summary": "68;74;85;146", "wc_strengths": "83;95;108;146", "wc_weaknesses": "521;14;6;183", "wc_questions": "42;43;25;90", "wc_limitations": "17;13;1;133", "wc_review": "731;239;225;698", "wc_reply_reviewers": "309;41;0;110", "wc_reply_authors": "596;0;0;20", "reply_reviewers": "2;1;0;1", "reply_authors": "3;1;1;2", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 93.25, 31.059418861272984 ], "wc_strengths_avg": [ 108.0, 23.65375234502974 ], "wc_weaknesses_avg": [ 181.0, 208.6372449971481 ], "wc_questions_avg": [ 50.0, 24.176434807473164 ], "wc_limitations_avg": [ 41.0, 53.44155686354955 ], "wc_review_avg": [ 473.25, 241.5826721849065 ], "wc_reply_reviewers_avg": [ 115.0, 118.70341191389572 ], "wc_reply_authors_avg": [ 154.0, 255.3194078012872 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 86, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17477126380898187572&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "meta.com;meta.com;meta.com;meta.com;ox.ac.uk", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Meta;University of Oxford", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.ox.ac.uk", "aff_unique_abbr": "Meta;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Rethinking Bias Mitigation: Fairer Architectures Make for Fairer Face Recognition", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72984", "id": "1vzF4zWQ1E", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eb3c42ddfa16d8421fdba13528107cc1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1vzF4zWQ1E", "openreview": "https://openreview.net/forum?id=1vzF4zWQ1E", "poster": "/media/PosterPDFs/NeurIPS%202023/72984.png?t=1701949747.6648953", "slides": "https://nips.cc/virtual/2023/poster/72984", "video": "https://nips.cc/virtual/2023/poster/72984", "author_site": "Samuel Dooley, Rhea Sukthanker, John Dickerson, Colin White, Frank Hutter, Micah Goldblum", "tldr": "", "abstract": "Face recognition systems are widely deployed in safety-critical applications, including law enforcement, yet they exhibit bias across a range of socio-demographic dimensions, such as gender and race. Conventional wisdom dictates that model biases arise from biased training data. As a consequence, previous works on bias mitigation largely focused on pre-processing the training data, adding penalties to prevent bias from effecting the model during training, or post-processing predictions to debias them, yet these approaches have shown limited success on hard problems such as face recognition. In our work, we discover that biases are actually inherent to neural network architectures themselves. Following this reframing, we conduct the first neural architecture search for fairness, jointly with a search for hyperparameters. Our search outputs a suite of models which Pareto-dominate all other high-performance architectures and existing bias mitigation methods in terms of accuracy and fairness, often by large margins, on the two most widely used datasets for face identification, CelebA and VGGFace2. Furthermore, these models generalize to other datasets and sensitive attributes. We release our code, models and raw data files at https://github.com/dooleys/FR-NAS.", "keywords": "Bias Mitigation;Fairness;Facial Recognition", "primary_area": "", "supplementary_material": "", "author": "Samuel Dooley;Rhea Sanjay Sukthanker;John P Dickerson;Colin White;Frank Hutter;Micah Goldblum", "authorids": "~Samuel_Dooley1;~Rhea_Sanjay_Sukthanker3;~John_P_Dickerson1;~Colin_White1;~Frank_Hutter1;~Micah_Goldblum1", "gender": ";F;M;M;M;", "homepage": ";https://rheasukthanker.github.io/;https://jpdickerson.com/;https://crwhite.ml/;http://ml.informatik.uni-freiburg.de/~hutter/;", "dblp": ";277/5077;75/8479;136/9162;89/5383;241/7231", "google_scholar": ";OsamqmMAAAAJ;https://scholar.google.com.tw/citations?user=QgDpfCQAAAAJ;LS6HY-gAAAAJ;https://scholar.google.de/citations?user=YUrxwrkAAAAJ;pGDKzuUAAAAJ", "orcid": ";;0000-0003-2231-680X;;0000-0002-2037-3694;", "linkedin": ";rhea-sukthanker-006502116/;john-dickerson-83a74a7/;;frank-hutter-9190b24b/;", "or_profile": "~Samuel_Dooley1;~Rhea_Sanjay_Sukthanker3;~John_P_Dickerson1;~Colin_White1;~Frank_Hutter1;~Micah_Goldblum1", "aff": ";University of Freiburg, Albert-Ludwigs-Universit\u00e4t Freiburg;Optimized Markets, Inc;Abacus.AI;Albert-Ludwigs-Universit\u00e4t Freiburg;New York University", "aff_domain": ";cs.uni-freiburg.de;optimizedmarkets.com;abacus.ai;uni-freiburg.de;nyu.edu", "position": ";PhD student;Consultant;Head of Research;Full Professor;Postdoc", "bibtex": "@inproceedings{\ndooley2023rethinking,\ntitle={Rethinking Bias Mitigation: Fairer Architectures Make for Fairer Face Recognition},\nauthor={Samuel Dooley and Rhea Sanjay Sukthanker and John P Dickerson and Colin White and Frank Hutter and Micah Goldblum},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1vzF4zWQ1E}\n}", "github": "", "project": "", "reviewers": "mXr9;Kze1;jfj2;3GhH;Nw7v", "pdf_size": 1559645, "rating": "6;6;7;8;8", "confidence": "5;4;4;4;4", "soundness": "2;3;3;4;4", "novelty": "4;3;4;4;4", "presentation": "3;3;3;4;4", "wc_summary": "33;70;76;183;187", "wc_strengths": "28;77;86;93;53", "wc_weaknesses": "78;68;231;142;306", "wc_questions": "1;5;20;68;32", "wc_limitations": "1;1;2;21;30", "wc_review": "141;221;415;507;608", "wc_reply_reviewers": "12;18;32;33;22", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 0.8944271909999159 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 109.8, 63.155047304233726 ], "wc_strengths_avg": [ 67.4, 23.888072337465825 ], "wc_weaknesses_avg": [ 165.0, 91.37176806869833 ], "wc_questions_avg": [ 25.2, 24.078205913231987 ], "wc_limitations_avg": [ 11.0, 12.181953866272849 ], "wc_review_avg": [ 378.4, 174.19942594624126 ], "wc_reply_reviewers_avg": [ 23.4, 8.089499366462674 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5590169943749476, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5599345963435592307&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 7, "email": ";cs.uni-freiburg.de;optimizedmarkets.com;abacus.ai;uni-freiburg.de;nyu.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "University of Freiburg;Optimized Markets, Inc;Abacus.AI;Albert-Ludwigs-Universit\u00e4t Freiburg;New York University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.uni-freiburg.de;;https://www.abacus.ai;https://www.uni-freiburg.de;https://www.nyu.edu", "aff_unique_abbr": "UoF;;Abacus.AI;Albert-Ludwigs-Universit\u00e4t;NYU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Freiburg;", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "Germany;United States" }, { "title": "Hierarchical VAEs provide a normative account of motion processing in the primate brain", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72983", "id": "1wOkHN9JK8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/909d6b6a7c6ac13ea51de4c4cace35db-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1wOkHN9JK8", "openreview": "https://openreview.net/forum?id=1wOkHN9JK8", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72983", "video": "https://nips.cc/virtual/2023/poster/72983", "author_site": "Hadi Vafaii, Jacob Yates, Daniel Butts", "tldr": "", "abstract": "The relationship between perception and inference, as postulated by Helmholtz in the 19th century, is paralleled in modern machine learning by generative models like Variational Autoencoders (VAEs) and their hierarchical variants. Here, we evaluate the role of hierarchical inference and its alignment with brain function in the domain of motion perception. We first introduce a novel synthetic data framework, Retinal Optic Flow Learning (ROFL), which enables control over motion statistics and their causes. We then present a new hierarchical VAE and test it against alternative models on two downstream tasks: (i) predicting ground truth causes of retinal optic flow (e.g., self-motion); and (ii) predicting the responses of neurons in the motion processing pathway of primates. We manipulate the model architectures (hierarchical versus non-hierarchical), loss functions, and the causal structure of the motion stimuli. We find that hierarchical latent structure in the model leads to several improvements. First, it improves the linear decodability of ground truth variables and does so in a sparse and disentangled manner. Second, our hierarchical VAE outperforms previous state-of-the-art models in predicting neuronal responses and exhibits sparse latent-to-neuron relationships. These results depend on the causal structure of the world, indicating that alignment between brains and artificial neural networks depends not only on architecture but also on matching ecologically relevant stimulus statistics. Taken together, our results suggest that hierarchical Bayesian inference underlines the brain's understanding of the world, and hierarchical VAEs can effectively model this understanding.", "keywords": "NeuroAI;VAE;Dorsal stream;Hierarchical Bayesian Inference", "primary_area": "", "supplementary_material": "", "author": "Hadi Vafaii;Jacob L. Yates;Daniel A. Butts", "authorids": "~Hadi_Vafaii1;~Jacob_L._Yates1;~Daniel_A._Butts1", "gender": "M;Not Specified;M", "homepage": ";https://jake.vision/;https://neurotheory.umd.edu", "dblp": ";91/11540;155/2721.html", "google_scholar": "caQ3wQIAAAAJ;UJm-TkYAAAAJ;Fn-1N4AAAAAJ", "orcid": "0000-0002-4153-5373;0000-0001-8322-5982;0000-0002-0158-5317", "linkedin": ";;", "or_profile": "~Hadi_Vafaii1;~Jacob_L._Yates1;~Daniel_A._Butts1", "aff": "University of Maryland, College Park;University of California, Berkeley;University of Maryland, College Park", "aff_domain": "umd.edu;berkeley.edu;umd.edu", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nvafaii2023hierarchical,\ntitle={Hierarchical {VAE}s provide a normative account of motion processing in the primate brain},\nauthor={Hadi Vafaii and Jacob L. Yates and Daniel A. Butts},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1wOkHN9JK8}\n}", "github": "", "project": "", "reviewers": "nG5y;Y46x;ZQpT;Hbpj", "pdf_size": 4440460, "rating": "4;5;7;7", "confidence": "5;3;4;4", "soundness": "2;3;2;4", "novelty": "2;3;3;4", "presentation": "3;2;2;4", "wc_summary": "153;108;84;110", "wc_strengths": "52;32;101;71", "wc_weaknesses": "151;309;232;95", "wc_questions": "230;75;529;97", "wc_limitations": "5;4;12;8", "wc_review": "591;528;958;381", "wc_reply_reviewers": "323;147;85;0", "wc_reply_authors": "902;600;216;0", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 113.75, 24.863376681376163 ], "wc_strengths_avg": [ 64.0, 25.42636427018224 ], "wc_weaknesses_avg": [ 196.75, 81.06902922818307 ], "wc_questions_avg": [ 232.75, 181.02814007772383 ], "wc_limitations_avg": [ 7.25, 3.112474899497183 ], "wc_review_avg": [ 614.5, 212.45528941403177 ], "wc_reply_reviewers_avg": [ 138.75, 118.48707735445245 ], "wc_reply_authors_avg": [ 429.5, 347.26754815271755 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2721655269759087, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14240048483741597815&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "email": "umd.edu;berkeley.edu;umd.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Maryland;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www/umd.edu;https://www.berkeley.edu", "aff_unique_abbr": "UMD;UC Berkeley", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "College Park;Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Computing a human-like reaction time metric from stable recurrent vision models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72982", "id": "1xPsn2gCOe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2e351740d4ec4200df6160f34cd181c3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1xPsn2gCOe", "openreview": "https://openreview.net/forum?id=1xPsn2gCOe", "poster": "/media/PosterPDFs/NeurIPS%202023/72982.png?t=1702047632.3692036", "slides": "https://nips.cc/virtual/2023/poster/72982", "video": "https://nips.cc/virtual/2023/poster/72982", "author_site": "Lore Goetschalckx, Lakshmi Narasimhan Govindarajan, Alekh Karkada Ashok, Aarit Ahuja, David Sheinberg, Thomas Serre", "tldr": "", "abstract": "The meteoric rise in the adoption of deep neural networks as computational models of vision has inspired efforts to ``align\u201d these models with humans. One dimension of interest for alignment includes behavioral choices, but moving beyond characterizing choice patterns to capturing temporal aspects of visual decision-making has been challenging. Here, we sketch a general-purpose methodology to construct computational accounts of reaction times from a stimulus-computable, task-optimized model. Specifically, we introduce a novel metric leveraging insights from subjective logic theory summarizing evidence accumulation in recurrent vision models. We demonstrate that our metric aligns with patterns of human reaction times for stimulus manipulations across four disparate visual decision-making tasks spanning perceptual grouping, mental simulation, and scene categorization. This work paves the way for exploring the temporal alignment of model and human visual strategies in the context of various other cognitive tasks toward generating testable hypotheses for neuroscience. Links to the code and data can be found on the project page: https://serre-lab.github.io/rnn_rts_site/.", "keywords": "alignment;RNNs;reaction times;equilibrium dynamics;perceptual grouping;decision making", "primary_area": "", "supplementary_material": "/attachment/3301fdd8ff2004e158a84d988dacacbdeb311f37.zip", "author": "Lore Goetschalckx;Lakshmi Narasimhan Govindarajan;Alekh Karkada Ashok;Aarit Ahuja;David Sheinberg;Thomas Serre", "authorids": "~Lore_Goetschalckx1;~Lakshmi_Narasimhan_Govindarajan3;~Alekh_Karkada_Ashok1;~Aarit_Ahuja1;~David_Sheinberg1;~Thomas_Serre1", "gender": "F;;M;M;M;M", "homepage": "https://loregoetschalckx.github.io/;;;;https://sheinberglab.org;https://serre-lab.clps.brown.edu/", "dblp": "249/8615;;230/2212;;;", "google_scholar": "9nZ0bZkAAAAJ;;;;;kZlPW4wAAAAJ", "orcid": "0000-0002-9638-7881;;;;;", "linkedin": "lore-goetschalckx/;;;aaritahuja/;;", "or_profile": "~Lore_Goetschalckx1;~Lakshmi_Narasimhan_Govindarajan3;~Alekh_Karkada_Ashok1;~Aarit_Ahuja1;~David_Sheinberg1;~Thomas_Serre1", "aff": "Brown University;;Brown University;Exponent Inc.;Brown University;Universit\u00e9 de Toulouse", "aff_domain": "brown.edu;;brown.edu;exponent.com;brown.edu;univ-toulouse.fr", "position": "Postdoc;;PhD student;Scientist;Full Professor;Full Professor", "bibtex": "@inproceedings{\ngoetschalckx2023computing,\ntitle={Computing a human-like reaction time metric from stable recurrent vision models},\nauthor={Lore Goetschalckx and Lakshmi Narasimhan Govindarajan and Alekh Karkada Ashok and Aarit Ahuja and David Sheinberg and Thomas Serre},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1xPsn2gCOe}\n}", "github": "", "project": "", "reviewers": "4B3C;JuhY;fhSL;yVHQ", "pdf_size": 20795776, "rating": "6;7;8;8", "confidence": "4;5;4;3", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "3;3;4;4", "wc_summary": "77;384;128;37", "wc_strengths": "92;13;178;48", "wc_weaknesses": "302;62;151;144", "wc_questions": "252;18;8;38", "wc_limitations": "10;1;9;25", "wc_review": "733;478;474;292", "wc_reply_reviewers": "424;0;0;0", "wc_reply_authors": "399;0;0;0", "reply_reviewers": "2;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 156.5, 135.24884472704377 ], "wc_strengths_avg": [ 82.75, 61.70646238442129 ], "wc_weaknesses_avg": [ 164.75, 86.62382755339318 ], "wc_questions_avg": [ 79.0, 100.46392387319938 ], "wc_limitations_avg": [ 11.25, 8.671072598012312 ], "wc_review_avg": [ 494.25, 156.98785781072368 ], "wc_reply_reviewers_avg": [ 106.0, 183.597385602301 ], "wc_reply_authors_avg": [ 99.75, 172.77206805499551 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18076036905430246100&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "brown.edu;;brown.edu;exponent.com;brown.edu;univ-toulouse.fr", "author_num": 6, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Brown University;Exponent Inc.;Universit\u00e9 de Toulouse", "aff_unique_dep": ";;", "aff_unique_url": "https://www.brown.edu;https://www.exponent.com;https://www.univ-toulouse.fr", "aff_unique_abbr": "Brown;;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;France" }, { "title": "Battle of the Backbones: A Large-Scale Comparison of Pretrained Models across Computer Vision Tasks", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73713", "id": "1yOnfDpkVe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5d9571470bb750f0e2325a030016f63f-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=1yOnfDpkVe", "openreview": "https://openreview.net/forum?id=1yOnfDpkVe", "poster": "/media/PosterPDFs/NeurIPS%202023/73713.png?t=1701381490.3201063", "slides": "https://nips.cc/virtual/2023/poster/73713", "video": "https://nips.cc/virtual/2023/poster/73713", "author_site": "Micah Goldblum, Hossein Souri, Renkun Ni, Manli Shu, Viraj Prabhu, Gowthami Somepalli, Prithvijit Chattopadhyay, Mark Ibrahim, Adrien Bardes, Judy Hoffman, Rama Chellappa, Andrew Wilson, Tom Goldstein", "tldr": "", "abstract": "Neural network based computer vision systems are typically built on a backbone, a pretrained or randomly initialized feature extractor. Several years ago, the default option was an ImageNet-trained convolutional neural network. However, the recent past has seen the emergence of countless backbones pretrained using various algorithms and datasets. While this abundance of choice has led to performance increases for a range of systems, it is difficult for practitioners to make informed decisions about which backbone to choose. Battle of the Backbones (BoB) makes this choice easier by benchmarking a diverse suite of pretrained models, including vision-language models, those trained via self-supervised learning, and the Stable Diffusion backbone, across a diverse set of computer vision tasks ranging from classification to object detection to OOD generalization and more. Furthermore, BoB sheds light on promising directions for the research community to advance computer vision by illuminating strengths and weakness of existing approaches through a comprehensive analysis conducted on more than 1500 training runs. While vision transformers (ViTs) and self-supervised learning (SSL) are increasingly popular, we find that convolutional neural networks pretrained in a supervised fashion on large training sets still perform best on most tasks among the models we consider. Moreover, in apples-to-apples comparisons on the same architectures and similarly sized pretraining datasets, we find that SSL backbones are highly competitive, indicating that future works should perform SSL pretraining with advanced architectures and larger pretraining datasets. We release the raw results of our experiments along with code that allows researchers to put their own backbones through the gauntlet here: https://github.com/hsouri/Battle-of-the-Backbones.", "keywords": "self-supervised learning;object detection;segmentation;out-of-distribution;domain shift;image retrieval;transfer learning", "primary_area": "", "supplementary_material": "", "author": "Micah Goldblum;Hossein Souri;Renkun Ni;Manli Shu;Viraj Uday Prabhu;Gowthami Somepalli;Prithvijit Chattopadhyay;Mark Ibrahim;Adrien Bardes;Judy Hoffman;Rama Chellappa;Andrew Gordon Wilson;Tom Goldstein", "authorids": "~Micah_Goldblum1;~Hossein_Souri1;~Renkun_Ni1;~Manli_Shu1;~Viraj_Uday_Prabhu1;~Gowthami_Somepalli1;~Prithvijit_Chattopadhyay1;~Mark_Ibrahim1;~Adrien_Bardes1;~Judy_Hoffman1;~Rama_Chellappa1;~Andrew_Gordon_Wilson1;~Tom_Goldstein1", "gender": ";M;M;F;M;F;M;;M;F;;Not Specified;M", "homepage": ";https://hsouri.github.io/;https://www.cs.umd.edu/~rn9zm/;https://azshue.github.io/;http://virajprabhu.github.io;https://somepago.github.io/;https://prithv1.xyz/;https://markibrahim.me/;;https://www.cc.gatech.edu/~judy/;;https://cims.nyu.edu/~andrewgw;https://www.cs.umd.edu/~tomg/", "dblp": "241/7231;250/2286;183/7067;263/3503;199/1973;286/5012;179/2452;180/5660;292/3848.html;45/10336;;65/10453;25/8184", "google_scholar": "pGDKzuUAAAAJ;rurbhy0AAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;T2ezBDsAAAAJ;https://scholar.google.co.in/citations?user=rIK7AMkAAAAJ;AqYyoCMAAAAJ;SvRU8F8AAAAJ;mqpjAt4AAAAJ;;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ;KmSuVtgAAAAJ", "orcid": ";0000-0001-5264-798X;;;;;;;;;;;", "linkedin": ";hossein-souri-b7574795/;;manli-shu-a804a8164/;viraj-prabhu-0a2a9435/;;;;adrien-bardes-48a080129/;;;;", "or_profile": "~Micah_Goldblum1;~Hossein_Souri1;~Renkun_Ni1;~Manli_Shu1;~Viraj_Uday_Prabhu1;~Gowthami_Somepalli1;~Prithvijit_Chattopadhyay1;~Mark_Ibrahim1;~Adrien_Bardes1;~Judy_Hoffman1;~Rama_Chellappa1;~Andrew_Gordon_Wilson1;~Tom_Goldstein1", "aff": "New York University;Johns Hopkins University;Department of Computer Science, University of Maryland, College Park;Department of Computer Science, University of Maryland, College Park;Georgia Institute of Technology;University of Maryland, College Park;Georgia Institute of Technology;Facebook AI Research (FAIR) Meta;INRIA;Georgia Institute of Technology;;New York University;University of Maryland, College Park", "aff_domain": "nyu.edu;jhu.edu;cs.umd.edu;cs.umd.edu;gatech.edu;umd.edu;gatech.edu;ai.facebook.com;inria.fr;gatech.edu;;nyu.edu;umd.edu", "position": "Postdoc;PhD student;PhD student;PhD student;PhD student;PhD student;PhD;Researcher;PhD student;Assistant Professor;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ngoldblum2023battle,\ntitle={Battle of the Backbones: A Large-Scale Comparison of Pretrained Models across Computer Vision Tasks},\nauthor={Micah Goldblum and Hossein Souri and Renkun Ni and Manli Shu and Viraj Uday Prabhu and Gowthami Somepalli and Prithvijit Chattopadhyay and Mark Ibrahim and Adrien Bardes and Judy Hoffman and Rama Chellappa and Andrew Gordon Wilson and Tom Goldstein},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=1yOnfDpkVe}\n}", "github": "", "project": "", "reviewers": "eLJh;YjWA;fJFu;SBvp;eVj3", "pdf_size": 622114, "rating": "5;6;6;6;8", "confidence": "5;4;4;5;4", "wc_summary_and_contributions": "91;80;60;82;41", "wc_strengths": "113;126;75;26;72", "wc_improvement": "164;280;177;2;61", "wc_limitations": "1;17;38;281;43", "wc_correctness": "49;53;14;1;7", "wc_clarity": "31;26;12;1;8", "wc_relation_to_prior_work": "31;11;13;1;9", "wc_documentation": "17;8;18;1;13", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "498;602;408;396;255", "wc_reply_reviewers": "31;164;22;0;0", "wc_reply_authors": "680;1294;529;503;326", "reply_reviewers": "1;1;1;0;0", "reply_authors": "2;3;2;1;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "wc_summary_and_contributions_avg": [ 70.8, 18.015548839821673 ], "wc_strengths_avg": [ 82.4, 35.160204777560665 ], "wc_improvement_avg": [ 136.8, 96.72517769433148 ], "wc_limitations_avg": [ 76.0, 103.59922779634991 ], "wc_correctness_avg": [ 24.8, 21.82109071517737 ], "wc_clarity_avg": [ 15.6, 11.217842929904126 ], "wc_relation_to_prior_work_avg": [ 13.0, 9.879271228182775 ], "wc_documentation_avg": [ 11.4, 6.2801273872430325 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 431.8, 115.28816071045631 ], "wc_reply_reviewers_avg": [ 43.4, 61.51942782568772 ], "wc_reply_authors_avg": [ 666.4, 333.3218264680548 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.5833333333333333, "gs_citation": 82, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8293754657423982632&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": "nyu.edu;jhu.edu;cs.umd.edu;cs.umd.edu;gatech.edu;umd.edu;gatech.edu;ai.facebook.com;inria.fr;gatech.edu;;nyu.edu;umd.edu", "author_num": 13, "aff_unique_index": "0;1;2;2;3;4;3;5;6;3;0;4", "aff_unique_norm": "New York University;Johns Hopkins University;University of Maryland, College Park;Georgia Institute of Technology;University of Maryland;Meta;INRIA", "aff_unique_dep": ";;Department of Computer Science;;;Facebook AI Research;", "aff_unique_url": "https://www.nyu.edu;https://www.jhu.edu;https://www/umd.edu;https://www.gatech.edu;https://www/umd.edu;https://www.meta.com;https://www.inria.fr", "aff_unique_abbr": "NYU;JHU;UMD;Georgia Tech;UMD;Meta AI;INRIA", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;0;0;0;0;0;0;1;0;0;0", "aff_country_unique": "United States;France" }, { "title": "Comparing Causal Frameworks: Potential Outcomes, Structural Models, Graphs, and Abstractions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72981", "id": "1zKRwh5Rl2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fd83f4e0dcaf1c64ea15bbb1695bb40f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1zKRwh5Rl2", "openreview": "https://openreview.net/forum?id=1zKRwh5Rl2", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72981", "video": "https://nips.cc/virtual/2023/poster/72981", "author_site": "Duligur Ibeling, Thomas Icard", "tldr": "", "abstract": "The aim of this paper is to make clear and precise the relationship between the Rubin causal model (RCM) and structural causal model (SCM) frameworks for causal inference. Adopting a neutral logical perspective, and drawing on previous work, we show what is required for an RCM to be representable by an SCM. A key result then shows that every RCM---including those that violate algebraic principles implied by the SCM framework---emerges as an abstraction of some representable RCM. Finally, we illustrate the power of this ameliorative perspective by pinpointing an important role for SCM principles in classic applications of RCMs; conversely, we offer a characterization of the algebraic constraints implied by a graph, helping to substantiate further comparisons between the two frameworks.", "keywords": "potential outcomes framework;structural causal model;causal inference;logic;probability;graphical causal models;causal abstraction;causal machine learning", "primary_area": "", "supplementary_material": "/attachment/dd74b7fab4e82883e800362dbf6a3cfdc25f4364.pdf", "author": "Duligur Ibeling;Thomas Icard", "authorids": "~Duligur_Ibeling1;~Thomas_Icard1", "gender": ";", "homepage": ";https://web.stanford.edu/~icard/", "dblp": "220/3264;149/3822", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Duligur_Ibeling1;~Thomas_F_Icard1", "aff": "Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nibeling2023comparing,\ntitle={Comparing Causal Frameworks: Potential Outcomes, Structural Models, Graphs, and Abstractions},\nauthor={Duligur Ibeling and Thomas Icard},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1zKRwh5Rl2}\n}", "github": "", "project": "", "reviewers": "vtiC;Qo71;w4ki;Ef3C", "pdf_size": 454465, "rating": "4;6;6;8", "confidence": "3;3;3;4", "soundness": "3;3;4;4", "novelty": "4;3;2;4", "presentation": "2;3;2;4", "wc_summary": "102;112;76;68", "wc_strengths": "100;68;170;74", "wc_weaknesses": "174;307;207;124", "wc_questions": "86;168;112;382", "wc_limitations": "4;4;4;1", "wc_review": "466;659;569;649", "wc_reply_reviewers": "93;15;20;12", "wc_reply_authors": "436;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 89.5, 18.07622748252522 ], "wc_strengths_avg": [ 103.0, 40.50925820105819 ], "wc_weaknesses_avg": [ 203.0, 66.9215959164155 ], "wc_questions_avg": [ 187.0, 116.41735265844177 ], "wc_limitations_avg": [ 3.25, 1.299038105676658 ], "wc_review_avg": [ 585.75, 77.43828187660158 ], "wc_reply_reviewers_avg": [ 35.0, 33.60803475361212 ], "wc_reply_authors_avg": [ 109.0, 188.79353802500762 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15896203318832892819&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "stanford.edu;stanford.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "DiffuseBot: Breeding Soft Robots With Physics-Augmented Generative Diffusion Models", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72980", "id": "1zo4iioUEs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8b1008098947ad59144c18a78337f937-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1zo4iioUEs", "openreview": "https://openreview.net/forum?id=1zo4iioUEs", "poster": "/media/PosterPDFs/NeurIPS%202023/72980.png?t=1702222328.5188706", "slides": "https://nips.cc/virtual/2023/poster/72980", "video": "https://nips.cc/virtual/2023/poster/72980", "author_site": "Tsun-Hsuan Johnson Wang, Juntian Zheng, Pingchuan Ma, Yilun Du, Byungchul Kim, Andrew Spielberg, Josh Tenenbaum, Chuang Gan, Daniela Rus", "tldr": "", "abstract": "Nature evolves creatures with a high complexity of morphological and behavioral intelligence, meanwhile computational methods lag in approaching that diversity and efficacy. Co-optimization of artificial creatures' morphology and control in silico shows promise for applications in physical soft robotics and virtual character creation; such approaches, however, require developing new learning algorithms that can reason about function atop pure structure. In this paper, we present DiffuseBot, a physics-augmented diffusion model that generates soft robot morphologies capable of excelling in a wide spectrum of tasks. \\name bridges the gap between virtually generated content and physical utility by (i) augmenting the diffusion process with a physical dynamical simulation which provides a certificate of performance, and (ii) introducing a co-design procedure that jointly optimizes physical design and control by leveraging information about physical sensitivities from differentiable simulation. We showcase a range of simulated and fabricated robots along with their capabilities. Check our website: https://diffusebot.github.io/", "keywords": "soft robot;diffusion model;co-design", "primary_area": "", "supplementary_material": "/attachment/ffc986d9683f1056512609ca36966932ba11a6bd.pdf", "author": "Tsun-Hsuan Wang;Juntian Zheng;Pingchuan Ma;Yilun Du;Byungchul Kim;Andrew Everett Spielberg;Joshua B. Tenenbaum;Chuang Gan;Daniela Rus", "authorids": "~Tsun-Hsuan_Wang2;~Juntian_Zheng1;~Pingchuan_Ma3;~Yilun_Du1;~Byungchul_Kim1;~Andrew_Everett_Spielberg1;~Joshua_B._Tenenbaum1;~Chuang_Gan1;~Daniela_Rus1", "gender": "M;M;M;;M;M;;M;F", "homepage": "https://zswang666.github.io/;https://github.com/Alif-01;https://people.csail.mit.edu/pcma;https://yilundu.github.io;https://bc-kim.github.io;http://www.andrewspielberg.com;;http://people.csail.mit.edu/ganchuang/;https://www.csail.mit.edu/person/daniela-rus", "dblp": "217/1809.html;;215/4446-2;204/4379;;;t/JoshuaBTenenbaum;139/6993;r/DanielaRus", "google_scholar": "xE3WSuYAAAAJ;;EtCZmkwAAAAJ;;KPQ53S0AAAAJ;8JeQMMUAAAAJ;;PTeSCbIAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0002-4659-3310;;;;", "linkedin": ";;;;byungchul-kim/;;;;", "or_profile": "~Tsun-Hsuan_Wang2;~Juntian_Zheng1;~Pingchuan_Ma3;~Yilun_Du1;~Byungchul_Kim1;~Andrew_Everett_Spielberg1;~Joshua_B._Tenenbaum1;~Chuang_Gan1;~Daniela_Rus1", "aff": "Massachusetts Institute of Technology;Tsinghua University;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;School of Engineering and Applied Sciences, Harvard University;Massachusetts Institute of Technology;MIT-IBM Watson AI Lab;Massachusetts Institute of Technology", "aff_domain": "mit.edu;tsinghua.edu.cn;mit.edu;mit.edu;mit.edu;seas.harvard.edu;mit.edu;ibm.com;mit.edu", "position": "PhD student;Undergrad student;PhD student;PhD student;Postdoc;Postdoc;Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2023diffusebot,\ntitle={DiffuseBot: Breeding Soft Robots With Physics-Augmented Generative Diffusion Models},\nauthor={Tsun-Hsuan Wang and Juntian Zheng and Pingchuan Ma and Yilun Du and Byungchul Kim and Andrew Everett Spielberg and Joshua B. Tenenbaum and Chuang Gan and Daniela Rus},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=1zo4iioUEs}\n}", "github": "", "project": "", "reviewers": "vV19;zr6A;5Fqn;syw2;7Vvn", "pdf_size": 8104476, "rating": "6;6;6;7;8", "confidence": "3;2;2;3;4", "soundness": "3;3;3;4;4", "novelty": "2;3;3;3;3", "presentation": "3;3;2;4;4", "wc_summary": "38;113;89;93;232", "wc_strengths": "40;80;80;68;98", "wc_weaknesses": "220;288;53;105;12", "wc_questions": "12;38;150;64;74", "wc_limitations": "67;2;25;6;7", "wc_review": "377;521;397;336;423", "wc_reply_reviewers": "210;54;29;25;47", "wc_reply_authors": "91;0;0;0;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 113.0, 64.43911855387222 ], "wc_strengths_avg": [ 73.2, 19.1666376811375 ], "wc_weaknesses_avg": [ 135.6, 103.32976337919293 ], "wc_questions_avg": [ 67.6, 46.499892472993956 ], "wc_limitations_avg": [ 21.4, 24.137936945812086 ], "wc_review_avg": [ 410.8, 62.00129030915405 ], "wc_reply_reviewers_avg": [ 73.0, 69.34839579975878 ], "wc_reply_authors_avg": [ 18.2, 36.39999999999999 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.8685990362153793, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15716370057566050503&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "mit.edu;tsinghua.edu.cn;mit.edu;mit.edu;mit.edu;seas.harvard.edu;mit.edu;ibm.com;mit.edu", "author_num": 9, "aff_unique_index": "0;1;0;0;0;2;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Tsinghua University;Harvard University", "aff_unique_dep": ";;School of Engineering and Applied Sciences", "aff_unique_url": "https://web.mit.edu;https://www.tsinghua.edu.cn;https://www.harvard.edu", "aff_unique_abbr": "MIT;THU;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "RenderMe-360: A Large Digital Asset Library and Benchmarks Towards High-fidelity Head Avatars", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73712", "id": "22RlsVAOTT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1909ac72220bf5016b6c93f08b66cf36-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=22RlsVAOTT", "openreview": "https://openreview.net/forum?id=22RlsVAOTT", "poster": "/media/PosterPDFs/NeurIPS%202023/73712.png?t=1697525783.841379", "slides": "https://nips.cc/virtual/2023/poster/73712", "video": "https://nips.cc/virtual/2023/poster/73712", "author_site": "Dongwei Pan, Long Zhuo, Jingtan Piao, Huiwen Luo, Wei Cheng, Yuxin WANG, Siming Fan, Shengqi Liu, Lei Yang, Bo Dai, Ziwei Liu, Chen Change Loy, Chen Qian, Wayne Wu, Dahua Lin, Kwan-Yee Lin", "tldr": "", "abstract": "Synthesizing high-fidelity head avatars is a central problem for computer vision and graphics. While head avatar synthesis algorithms have advanced rapidly, the best ones still face great obstacles in real-world scenarios. One of the vital causes is the inadequate datasets -- 1) current public datasets can only support researchers to explore high-fidelity head avatars in one or two task directions; 2) these datasets usually contain digital head assets with limited data volume, and narrow distribution over different attributes, such as expressions, ages, and accessories. In this paper, we present RenderMe-360, a comprehensive 4D human head dataset to drive advance in head avatar algorithms across different scenarios. It contains massive data assets, with 243+ million complete head frames and over 800k video sequences from 500 different identities captured by multi-view cameras at 30 FPS. It is a large-scale digital library for head avatars with three key attributes: 1) High Fidelity: all subjects are captured in 360 degrees via 60 synchronized, high-resolution 2K cameras. 2) High Diversity: The collected subjects vary from different ages, eras, ethnicities, and cultures, providing abundant materials with distinctive styles in appearance and geometry. Moreover, each subject is asked to perform various dynamic motions, such as expressions and head rotations, which further extend the richness of assets. 3) Rich Annotations: the dataset provides annotations with different granularities: cameras' parameters, background matting, scan, 2D/3D facial landmarks, FLAME fitting, and text description.\n\n \n Based on the dataset, we build a comprehensive benchmark for head avatar research, with 16 state-of-the-art methods performed on five main tasks: novel view synthesis, novel expression synthesis, hair rendering, hair editing, and talking head generation. Our experiments uncover the strengths and flaws of state-of-the-art methods. RenderMe-360 opens the door for future exploration in modern head avatars. All of the data, code, and models will be publicly available at https://renderme-360.github.io/.", "keywords": "Head Avatar;Metaverse;Multi-view Human Head Dataset;High-Fidelity;High Diversity;Rich Annotation;Benchmarks", "primary_area": "", "supplementary_material": "/attachment/2d301104de78a72c054e9a9378638f968103dfe4.zip", "author": "Dongwei Pan;Long Zhuo;Jingtan Piao;Huiwen Luo;Wei Cheng;Yuxin WANG;Siming Fan;Shengqi Liu;Lei Yang;Bo Dai;Ziwei Liu;Chen Change Loy;Chen Qian;Wayne Wu;Dahua Lin;Kwan-Yee Lin", "authorids": "~Dongwei_Pan2;~Long_Zhuo1;~Jingtan_Piao1;~Huiwen_Luo2;~Wei_Cheng7;~Yuxin_WANG5;~Siming_Fan1;~Shengqi_Liu1;~Lei_Yang7;~Bo_Dai2;~Ziwei_Liu1;~Chen_Change_Loy2;~Chen_Qian1;~Wayne_Wu1;~Dahua_Lin1;~Kwan-Yee_Lin2", "gender": "M;M;M;F;;M;M;M;M;M;M;M;M;;M;F", "homepage": "https://github.com/pandongwei;;https://westlypark.github.io/;;;https://w-ted.github.io/;https://simon3dv.github.io/;https://github.com/LSQsjtu;https://www.yanglei.me;http://daibo.info/;https://liuziwei7.github.io/;https://www.mmlab-ntu.com/person/ccloy/index.html;;;http://dahua.site;https://kwanyeelin.github.io/", "dblp": "187/3761;;;;;;;195/9149;50/2484-45;64/2903-2;05/6300-2;01/5855;;;53/6088;218/5452", "google_scholar": ";_DVnyb8AAAAJ;;;;https://scholar.google.com/citations?hl=en;;;jZH2IPYAAAAJ;https://scholar.google.com.hk/citations?user=KNWTvgEAAAAJ;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ;https://scholar.google.co.uk/citations?user=559LF80AAAAJ;AerkT0YAAAAJ;;GMzzRRUAAAAJ;https://scholar.google.com.hk/citations?user=beGt3cAAAAAJ", "orcid": ";;;;;0000-0002-9570-5577;;0009-0004-4596-5524;0000-0002-0571-5924;0000-0003-0777-9232;;0000-0001-5345-1591;;;;", "linkedin": ";;;huiwen-luo/;;yuxin-wang-1a6871193/;;shengqi-liu-3270aa209/;;;;;;;;", "or_profile": "~Dongwei_Pan2;~Long_Zhuo1;~Jingtan_Piao1;~Huiwen_Luo2;~Wei_Cheng7;~Yuxin_WANG5;~Siming_Fan1;~Shengqi_Liu1;~Lei_Yang7;~Bo_Dai2;~Ziwei_Liu1;~Chen_Change_Loy2;~Chen_Qian1;~Wayne_Wu1;~Dahua_Lin1;~Kwan-Yee_Lin2", "aff": "Sensetime;Shanghai AI Laboratory;;Shanghai Artificial Intelligence Laboratory;;Hong Kong University of Science and Technology;Sensetime Research;Shanghai Jiaotong University;Sensetime Ltd.;Shanghai AI Laboratory;Nanyang Technological University;Nanyang Technological University;Tsinghua University;;The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": "sensetime.com;pjlab.org.cn;;pjlab.org.cn;;ust.hk;sensetime.com;sjtu.edu.cn;sensetime.com;pjlab.org.cn;ntu.edu.sg;ntu.edu.sg;mails.tsinghua.edu.cn;;cuhk.edu.hk;cuhk.edu.hk", "position": "Researcher;Researcher;;Researcher;;PhD student;Researcher;Undergrad student;Researcher;Scientist;Assistant Professor;Full Professor;PhD student;;Associate Professor;Postdoc", "bibtex": "@inproceedings{\npan2023renderme,\ntitle={RenderMe-360: A Large Digital Asset Library and Benchmarks Towards High-fidelity Head Avatars},\nauthor={Dongwei Pan and Long Zhuo and Jingtan Piao and Huiwen Luo and Wei Cheng and Yuxin WANG and Siming Fan and Shengqi Liu and Lei Yang and Bo Dai and Ziwei Liu and Chen Change Loy and Chen Qian and Wayne Wu and Dahua Lin and Kwan-Yee Lin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=22RlsVAOTT}\n}", "github": "", "project": "", "reviewers": "q6aZ;CynN;8MTN", "pdf_size": 9787522, "rating": "7;7;9", "confidence": "5;4;3", "wc_summary_and_contributions": "59;74;22", "wc_strengths": "69;48;82", "wc_improvement": "31;1;15", "wc_limitations": "18;31;17", "wc_correctness": "22;1;18", "wc_clarity": "9;1;29", "wc_relation_to_prior_work": "21;1;9", "wc_documentation": "22;1;8", "wc_additional_feedback": "1;1;1", "wc_review": "252;159;201", "wc_reply_reviewers": "20;0;0", "wc_reply_authors": "650;136;552", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 7.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 51.666666666666664, 21.853044537445015 ], "wc_strengths_avg": [ 66.33333333333333, 14.007934259633796 ], "wc_improvement_avg": [ 15.666666666666666, 12.256517540566824 ], "wc_limitations_avg": [ 22.0, 6.377042156569663 ], "wc_correctness_avg": [ 13.666666666666666, 9.104333522498441 ], "wc_clarity_avg": [ 13.0, 11.775681155103795 ], "wc_relation_to_prior_work_avg": [ 10.333333333333334, 8.219218670625303 ], "wc_documentation_avg": [ 10.333333333333334, 8.73053390247253 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 204.0, 38.02630668366309 ], "wc_reply_reviewers_avg": [ 6.666666666666667, 9.428090415820632 ], "wc_reply_authors_avg": [ 446.0, 222.82429550357983 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8025824024416535&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "sensetime.com;pjlab.org.cn;;pjlab.org.cn;;ust.hk;sensetime.com;sjtu.edu.cn;sensetime.com;pjlab.org.cn;ntu.edu.sg;ntu.edu.sg;mails.tsinghua.edu.cn;;cuhk.edu.hk;cuhk.edu.hk", "author_num": 16, "aff_unique_index": "0;1;2;3;0;4;0;1;5;5;6;7;7", "aff_unique_norm": "SenseTime;Shanghai AI Laboratory;Shanghai Artificial Intelligence Laboratory;Hong Kong University of Science and Technology;Shanghai Jiao Tong University;Nanyang Technological University;Tsinghua University;Chinese University of Hong Kong", "aff_unique_dep": ";;;;;;;", "aff_unique_url": "https://www.sensetime.com;https://www.shanghai-ai-lab.com;http://www.shailab.org/;https://www.ust.hk;https://www.sjtu.edu.cn;https://www.ntu.edu.sg;https://www.tsinghua.edu.cn;https://www.cuhk.edu.hk", "aff_unique_abbr": "SenseTime;SAIL;Shanghai AI Lab;HKUST;SJTU;NTU;THU;CUHK", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;1;1;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "CAPro: Webly Supervised Learning with Cross-modality Aligned Prototypes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72979", "id": "25HiFHPcXg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a7e0d77325db843fd5baf1298163e89a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=25HiFHPcXg", "openreview": "https://openreview.net/forum?id=25HiFHPcXg", "poster": "/media/PosterPDFs/NeurIPS%202023/72979.png?t=1698123461.4986002", "slides": "https://nips.cc/virtual/2023/poster/72979", "video": "https://nips.cc/virtual/2023/poster/72979", "author_site": "Yulei Qin, Xingyu Chen, Yunhang Shen, Chaoyou Fu, Yun Gu, Ke Li, Xing Sun, Rongrong Ji", "tldr": "", "abstract": "Webly supervised learning has attracted increasing attention for its effectiveness in exploring publicly accessible data at scale without manual annotation. However, most existing methods of learning with web datasets are faced with challenges from label noise, and they have limited assumptions on clean samples under various noise. For instance, web images retrieved with queries of \u201dtiger cat\u201c (a cat species) and \u201ddrumstick\u201c (a musical instrument) are almost dominated by images of tigers and chickens, which exacerbates the challenge of fine-grained visual concept learning. In this case, exploiting both web images and their associated texts is a requisite solution to combat real-world noise. In this paper, we propose Cross-modality Aligned Prototypes (CAPro), a unified prototypical contrastive learning framework to learn visual representations with correct semantics. For one thing, we leverage textual prototypes, which stem from the distinct concept definition of classes, to select clean images by text matching and thus disambiguate the formation of visual prototypes. For another, to handle missing and mismatched noisy texts, we resort to the visual feature space to complete and enhance individual texts and thereafter improve text matching. Such semantically aligned visual prototypes are further polished up with high-quality samples, and engaged in both cluster regularization and noise removal. Besides, we propose collective bootstrapping to encourage smoother and wiser label reference from appearance-similar instances in a manner of dictionary look-up. Extensive experiments on WebVision1k and NUS-WIDE (Web) demonstrate that CAPro well handles realistic noise under both single-label and multi-label scenarios. CAPro achieves new state-of-the-art performance and exhibits robustness to open-set recognition. Codes are available at https://github.com/yuleiqin/capro.", "keywords": "webly supervised learning;representation learning;visual-semantic alignment;collective bootstrapping", "primary_area": "", "supplementary_material": "/attachment/381592a7108de16022158578d74dfdfc9a9deb17.pdf", "author": "Yulei Qin;Xingyu Chen;Yunhang Shen;Chaoyou Fu;Yun Gu;Ke Li;Xing Sun;Rongrong Ji", "authorids": "~Yulei_Qin1;~Xingyu_Chen3;~Yunhang_Shen1;~Chaoyou_Fu1;~Yun_Gu1;~Ke_Li4;~Xing_Sun1;~Rongrong_Ji5", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://yuleichin.github.io/;;https://shenyunhang.github.io/;https://scholar.google.com.hk/citations?user=4A1xYQwAAAAJ&hl=zh-CN;;http://keli.info;https://www.sunxing.org;http://mac.xmu.edu.cn/rrji-en.html", "dblp": "226/3329;59/7651.html;146/1800;https://dblp.uni-trier.de/pid/221/6738;;;;86/5681", "google_scholar": "vBnuTjwAAAAJ;https://scholar.google.com/citations?hl=en;29teR74AAAAJ;https://scholar.google.com.hk/citations?user=4A1xYQwAAAAJ;https://scholar.google.com.hk/citations?user=0pX32mkAAAAJ;mfWsFM0AAAAJ;IUtix9IAAAAJ;", "orcid": "0000-0002-0996-3984;;0000-0002-3970-7519;;;0000-0001-7998-0731;0000-0001-8132-9083;", "linkedin": ";;;;;;sunxings/;", "or_profile": "~Yulei_Qin1;~Xingyu_Chen3;~Yunhang_Shen1;~Chaoyou_Fu1;~Yun_Gu1;~Ke_Li4;~Xing_Sun1;~Rongrong_Ji5", "aff": "Tencent;;Tencent;Institute of Automation, Chinese Academy of Sciences;Shanghai Jiaotong University;Tencent;Tencent YouTu Lab;Xiamen University", "aff_domain": "tencent.com;;tencent.com;ia.ac.cn;sjtu.edu.cn;tencent.com;tencent.com;xmu.edu.cn", "position": "Researcher;;Researcher;PhD student;Assistant Professor;Principal Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nqin2023capro,\ntitle={{CAP}ro: Webly Supervised Learning with Cross-modality Aligned Prototypes},\nauthor={Yulei Qin and Xingyu Chen and Yunhang Shen and Chaoyou Fu and Yun Gu and Ke Li and Xing Sun and Rongrong Ji},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=25HiFHPcXg}\n}", "github": "", "project": "", "reviewers": "QoQL;SPGi;XBs5;HQEo", "pdf_size": 26229863, "rating": "5;5;6;6", "confidence": "5;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "106;97;90;232", "wc_strengths": "40;50;67;128", "wc_weaknesses": "193;257;138;197", "wc_questions": "1;21;7;61", "wc_limitations": "10;6;1;58", "wc_review": "350;431;303;676", "wc_reply_reviewers": "0;26;18;75", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 131.25, 58.443883341201754 ], "wc_strengths_avg": [ 71.25, 34.15680752061 ], "wc_weaknesses_avg": [ 196.25, 42.115169476092575 ], "wc_questions_avg": [ 22.5, 23.382685902179844 ], "wc_limitations_avg": [ 18.75, 22.884219453588535 ], "wc_review_avg": [ 440.0, 143.7410866801834 ], "wc_reply_reviewers_avg": [ 29.75, 27.770262872360426 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5799511879504804124&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tencent.com;;tencent.com;ia.ac.cn;sjtu.edu.cn;tencent.com;tencent.com;xmu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;2;0;0;3", "aff_unique_norm": "Tencent;Chinese Academy of Sciences;Shanghai Jiao Tong University;Xiamen University", "aff_unique_dep": "Tencent Holdings Limited;Institute of Automation;;", "aff_unique_url": "https://www.tencent.com;http://www.ia.cas.cn;https://www.sjtu.edu.cn;https://www.xmu.edu.cn", "aff_unique_abbr": "Tencent;CAS;SJTU;XMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Beyond Normal: On the Evaluation of Mutual Information Estimators", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72978", "id": "25vRtG56YH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/36b80eae70ff629d667f210e13497edf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=25vRtG56YH", "openreview": "https://openreview.net/forum?id=25vRtG56YH", "poster": "/media/PosterPDFs/NeurIPS%202023/72978.png?t=1700229635.6510572", "slides": "https://nips.cc/virtual/2023/poster/72978", "video": "https://nips.cc/virtual/2023/poster/72978", "author_site": "Pawe\u0142 Czy\u017c, Frederic Grabowski, Julia Vogt, Niko Beerenwinkel, Alexander Marx", "tldr": "", "abstract": "Mutual information is a general statistical dependency measure which has found applications in representation learning, causality, domain generalization and computational biology. However, mutual information estimators are typically evaluated on simple families of probability distributions, namely multivariate normal distribution and selected distributions with one-dimensional random variables. In this paper, we show how to construct a diverse family of distributions with known ground-truth mutual information and propose a language-independent benchmarking platform for mutual information estimators. We discuss the general applicability and limitations of classical and neural estimators in settings involving high dimensions, sparse interactions, long-tailed distributions, and high mutual information. Finally, we provide guidelines for practitioners on how to select appropriate estimator adapted to the difficulty of problem considered and issues one needs to consider when applying an estimator to a new data set.", "keywords": "Mutual Information;Invariance;Benchmark;Geometric Machine Learning", "primary_area": "", "supplementary_material": "", "author": "Pawe\u0142 Czy\u017c;Frederic Grabowski;Julia E Vogt;Niko Beerenwinkel;Alexander Marx", "authorids": "~Pawe\u0142_Czy\u017c1;~Frederic_Grabowski1;~Julia_E_Vogt1;~Niko_Beerenwinkel1;~Alexander_Marx1", "gender": "M;M;F;M;", "homepage": "https://pawel-czyz.github.io/;https://pmbm.ippt.pan.pl/web/Frederic_Grabowski;http://mds.inf.ethz.ch;https://bsse.ethz.ch/cbg;http://a-marx.com", "dblp": ";;13/8412;58/2558;", "google_scholar": "7rzyLvYAAAAJ;phZuKuEAAAAJ;UoeV-8kAAAAJ;https://scholar.google.com/citations?hl=en;fJ9u_woAAAAJ", "orcid": "0000-0002-6262-0614;0000-0003-4070-9500;;0000-0002-0573-6119;", "linkedin": ";;julia-vogt-50b53895;;", "or_profile": "~Pawe\u0142_Czy\u017c1;~Frederic_Grabowski1;~Julia_E_Vogt1;~Niko_Beerenwinkel1;~Alexander_Marx1", "aff": "ETHZ - ETH Zurich;Institute of Fundamental Technological Research, Polish Academy of Sciences;Swiss Federal Institute of Technology;ETHZ - ETH Zurich;ETHZ - ETH Zurich", "aff_domain": "ethz.ch;ippt.pan.pl;ethz.ch;ethz.ch;ethz.ch", "position": "PhD student;PhD student;Assistant Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nczy{\\.z}2023beyond,\ntitle={Beyond Normal: On the Evaluation of Mutual Information Estimators},\nauthor={Pawe{\\l} Czy{\\.z} and Frederic Grabowski and Julia E Vogt and Niko Beerenwinkel and Alexander Marx},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=25vRtG56YH}\n}", "github": "", "project": "", "reviewers": "FjRx;62uG;AkdF;vCqe", "pdf_size": 12810116, "rating": "5;6;6;7", "confidence": "4;4;3;3", "soundness": "4;3;3;4", "novelty": "2;3;3;3", "presentation": "4;3;3;4", "wc_summary": "38;94;214;77", "wc_strengths": "88;29;220;92", "wc_weaknesses": "144;146;128;15", "wc_questions": "15;38;3;116", "wc_limitations": "73;9;3;9", "wc_review": "358;316;568;309", "wc_reply_reviewers": "187;161;0;29", "wc_reply_authors": "98;117;0;0", "reply_reviewers": "1;2;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 105.75, 65.71291729941686 ], "wc_strengths_avg": [ 107.25, 69.71145888589622 ], "wc_weaknesses_avg": [ 108.25, 54.28800512083678 ], "wc_questions_avg": [ 43.0, 43.982951242498494 ], "wc_limitations_avg": [ 23.5, 28.683619018526933 ], "wc_review_avg": [ 387.75, 105.74113438014555 ], "wc_reply_reviewers_avg": [ 94.25, 80.93013962671756 ], "wc_reply_authors_avg": [ 53.75, 54.16814100557633 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15024724386780996844&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ethz.ch;ippt.pan.pl;ethz.ch;ethz.ch;ethz.ch", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "ETH Zurich;Institute of Fundamental Technological Research;Swiss Federal Institute of Technology", "aff_unique_dep": ";Polish Academy of Sciences;", "aff_unique_url": "https://www.ethz.ch;https://www.ippt.pan.pl;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;IPPT PAS;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Switzerland;Poland" }, { "title": "General Munchausen Reinforcement Learning with Tsallis Kullback-Leibler Divergence", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72977", "id": "26qqUHi9XF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b3e866c228f8f4ea18021ae63aea5453-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=26qqUHi9XF", "openreview": "https://openreview.net/forum?id=26qqUHi9XF", "poster": "/media/PosterPDFs/NeurIPS%202023/72977.png?t=1700107752.0813403", "slides": "https://nips.cc/virtual/2023/poster/72977", "video": "https://nips.cc/virtual/2023/poster/72977", "author_site": "Lingwei Zhu, Zheng Chen, Matthew Schlegel, Martha White", "tldr": "", "abstract": "Many policy optimization approaches in reinforcement learning incorporate a Kullback-Leilbler (KL) divergence to the previous policy, to prevent the policy from changing too quickly. This idea was initially proposed in a seminal paper on Conservative Policy Iteration, with approximations given by algorithms like TRPO and Munchausen Value Iteration (MVI). We continue this line of work by investigating a generalized KL divergence---called the Tsallis KL divergence. Tsallis KL defined by the $q$-logarithm is a strict generalization, as $q = 1$ corresponds to the standard KL divergence; $q > 1$ provides a range of new options. We characterize the types of policies learned under the Tsallis KL, and motivate when $q >1$ could be beneficial. To obtain a practical algorithm that incorporates Tsallis KL regularization, we extend MVI, which is one of the simplest approaches to incorporate KL regularization. We show that this generalized MVI($q$) obtains significant improvements over the standard MVI($q = 1$) across 35 Atari games.", "keywords": "reinforcement learning;entropy regularization;Tsallis KL divergence", "primary_area": "", "supplementary_material": "/attachment/009ab7f19c522822c2a170715dd9c075dc532fb0.zip", "author": "Lingwei Zhu;Zheng Chen;Matthew Kyle Schlegel;Martha White", "authorids": "~Lingwei_Zhu1;~Zheng_Chen9;~Matthew_Kyle_Schlegel1;~Martha_White1", "gender": "M;M;M;F", "homepage": "https://lingweizhu.github.io/;https://zhengchen3.github.io;http://mkschleg.github.io;http://marthawhite.ca", "dblp": "231/4574;33/2592-12;203/4463;60/7057", "google_scholar": "1_jwNFIAAAAJ;571LAh4AAAAJ;-iAxatcAAAAJ;t5zdD_IAAAAJ", "orcid": ";0000-0001-6776-7159;;0000-0002-5356-2950", "linkedin": ";;mkschleg/;", "or_profile": "~Lingwei_Zhu1;~Zheng_Chen9;~Matthew_Kyle_Schlegel1;~Martha_White1", "aff": "University of Alberta;ISIR, Osaka University, Japan;University of Alberta;University of Alberta", "aff_domain": "ualberta.ca;osaka-u.ac.jp;ualberta.ca;ualberta.ca", "position": "Postdoc;Assistant Professor;PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhu2023general,\ntitle={General Munchausen Reinforcement Learning with Tsallis Kullback-Leibler Divergence},\nauthor={Lingwei Zhu and Zheng Chen and Matthew Kyle Schlegel and Martha White},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=26qqUHi9XF}\n}", "github": "", "project": "", "reviewers": "3pbN;6PUw;FCv3;QmDX", "pdf_size": 8623473, "rating": "6;6;6;7", "confidence": "3;3;2;3", "soundness": "2;3;2;4", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "35;112;98;123", "wc_strengths": "21;29;79;75", "wc_weaknesses": "41;108;257;237", "wc_questions": "45;84;44;5", "wc_limitations": "16;10;1;7", "wc_review": "158;343;479;447", "wc_reply_reviewers": "70;0;0;62", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 92.0, 34.08078637590395 ], "wc_strengths_avg": [ 51.0, 26.19160170741759 ], "wc_weaknesses_avg": [ 160.75, 89.72283711519604 ], "wc_questions_avg": [ 44.5, 27.932955446926844 ], "wc_limitations_avg": [ 8.5, 5.408326913195984 ], "wc_review_avg": [ 356.75, 125.2804354238921 ], "wc_reply_reviewers_avg": [ 33.0, 33.12099032335839 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5788748112029169649&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ualberta.ca;osaka-u.ac.jp;ualberta.ca;ualberta.ca", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Alberta;Osaka University", "aff_unique_dep": ";Institute of Scientific and Industrial Research", "aff_unique_url": "https://www.ualberta.ca;https://www.isir.osaka-u.ac.jp", "aff_unique_abbr": "UAlberta;ISIR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Canada;Japan" }, { "title": "Noether Embedding: Efficient Learning of Temporal Regularities", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72976", "id": "27CRbwewyb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/96c6f409a374b5c81d2efa4bc5526f27-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=27CRbwewyb", "openreview": "https://openreview.net/forum?id=27CRbwewyb", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72976", "video": "https://nips.cc/virtual/2023/poster/72976", "author_site": "Chi Gao, Zidong Zhou, Luping Shi", "tldr": "", "abstract": "Learning to detect and encode temporal regularities (TRs) in events is a prerequisite for human-like intelligence. These regularities should be formed from limited event samples and stored as easily retrievable representations. Existing event embeddings, however, cannot effectively decode TR validity with well-trained vectors, let alone satisfy the efficiency requirements. We develop Noether Embedding (NE) as the first efficient TR learner with event embeddings. Specifically, NE possesses the intrinsic time-translation symmetries of TRs indicated as conserved local energies in the embedding space. This structural bias reduces the calculation of each TR validity to embedding each event sample, enabling NE to achieve data-efficient TR formation insensitive to sample size and time-efficient TR retrieval in constant time complexity. To comprehensively evaluate the TR learning capability of embedding models, we define complementary tasks of TR detection and TR query, formulate their evaluation metrics, and assess embeddings on classic ICEWS14, ICEWS18, and GDELT datasets. Our experiments demonstrate that NE consistently achieves about double the F1 scores for detecting valid TRs compared to classic embeddings, and it provides over ten times higher confidence scores for querying TR intervals. Additionally, we showcase NE's potential applications in social event prediction, personal decision-making, and memory-constrained scenarios.", "keywords": "Schema Learning;Temporal Regularity;Event Embedding", "primary_area": "", "supplementary_material": "", "author": "Chi Gao;Zidong Zhou;Luping Shi", "authorids": "~Chi_Gao1;~Zidong_Zhou1;~Luping_Shi1", "gender": "M;M;M", "homepage": "https://github.com/KevinGao7;https://github.com/z123z123d;", "dblp": ";;84/7231.html", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Chi_Gao1;~Zidong_Zhou1;~Luping_Shi1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ngao2023noether,\ntitle={Noether Embedding: Efficient Learning of Temporal Regularities},\nauthor={Chi Gao and Zidong Zhou and Luping Shi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=27CRbwewyb}\n}", "github": "", "project": "", "reviewers": "tTxw;zRYw;Uoii;7yn1;w2A9;bn9B", "pdf_size": 6449083, "rating": "3;4;4;5;6;6", "confidence": "3;3;3;2;3;4", "soundness": "2;2;2;2;3;3", "novelty": "2;2;1;2;3;3", "presentation": "3;2;1;2;3;3", "wc_summary": "18;107;53;444;42;96", "wc_strengths": "10;69;29;77;152;55", "wc_weaknesses": "204;15;148;491;47;26", "wc_questions": "4;173;62;139;43;113", "wc_limitations": "1;35;31;152;68;7", "wc_review": "237;399;323;1303;352;297", "wc_reply_reviewers": "0;1831;0;1123;0;0", "wc_reply_authors": "51;1233;51;664;51;51", "reply_reviewers": "0;6;0;2;0;0", "reply_authors": "2;10;2;5;2;2", "rating_avg": [ 4.666666666666667, 1.1055415967851332 ], "confidence_avg": [ 3.0, 0.5773502691896257 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.1666666666666665, 0.6871842709362768 ], "presentation_avg": [ 2.3333333333333335, 0.7453559924999298 ], "wc_summary_avg": [ 126.66666666666667, 145.1615957782529 ], "wc_strengths_avg": [ 65.33333333333333, 44.99876541516321 ], "wc_weaknesses_avg": [ 155.16666666666666, 164.94990822132104 ], "wc_questions_avg": [ 89.0, 58.02585630561603 ], "wc_limitations_avg": [ 49.0, 50.92150822589606 ], "wc_review_avg": [ 485.1666666666667, 369.0756095376062 ], "wc_reply_reviewers_avg": [ 492.3333333333333, 725.641938026064 ], "wc_reply_authors_avg": [ 350.1666666666667, 453.8518174715424 ], "reply_reviewers_avg": [ 1.3333333333333333, 2.211083193570267 ], "reply_authors_avg": [ 3.8333333333333335, 2.967415635794143 ], "replies_avg": [ 47, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.2611164839335468, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vAopGvIH7EoJ:scholar.google.com/&scioq=Noether+Embedding:+Efficient+Learning+of+Temporal+Regularities&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Going beyond persistent homology using persistent homology", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72975", "id": "27TdrEvqLD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c78f81a878a72566422f37279bca0fd0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=27TdrEvqLD", "openreview": "https://openreview.net/forum?id=27TdrEvqLD", "poster": "/media/PosterPDFs/NeurIPS%202023/72975.png?t=1702492983.0128593", "slides": "https://nips.cc/virtual/2023/poster/72975", "video": "https://nips.cc/virtual/2023/poster/72975", "author_site": "Johanna Immonen, Amauri Souza, Vikas Garg", "tldr": "", "abstract": "Representational limits of message-passing graph neural networks (MP-GNNs), e.g., in terms of the Weisfeiler-Leman (WL) test for isomorphism, are well understood. Augmenting these graph models with topological features via persistent homology (PH) has gained prominence, but identifying the class of attributed graphs that PH can recognize remains open. We introduce a novel concept of color-separating sets to provide a complete resolution to this important problem. Specifically, we establish the necessary and sufficient conditions for distinguishing graphs based on the persistence of their connected components, obtained from filter functions on vertex and edge colors. Our constructions expose the limits of vertex- and edge-level PH, proving that neither category subsumes the other. Leveraging these theoretical insights, we propose RePHINE for learning topological features on graphs. RePHINE efficiently combines vertex- and edge-level PH, achieving a scheme that is provably more powerful than both. Integrating RePHINE into MP-GNNs boosts their expressive power, resulting in gains over standard PH on several benchmarks for graph classification.", "keywords": "graph representation learning;topological deep learning;persistent homology;graph neural networks", "primary_area": "", "supplementary_material": "", "author": "Johanna Emilia Immonen;Amauri H Souza;Vikas Garg", "authorids": "~Johanna_Emilia_Immonen1;~Amauri_H_Souza1;~Vikas_Garg2", "gender": ";M;", "homepage": ";http://www.amauriholanda.org;", "dblp": ";131/3352;", "google_scholar": ";lP0LBI4AAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Johanna_Emilia_Immonen1;~Amauri_H_Souza1;~Vikas_Garg2", "aff": ";Federal Institute of Cear\u00e1;", "aff_domain": ";ifce.edu.br;", "position": ";Associate Professor;", "bibtex": "@inproceedings{\nimmonen2023going,\ntitle={Going beyond persistent homology using persistent homology},\nauthor={Johanna Emilia Immonen and Amauri H Souza and Vikas Garg},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=27TdrEvqLD}\n}", "github": "", "project": "", "reviewers": "oULm;x2he;BFX1;rXNh;PDW5", "pdf_size": 676753, "rating": "5;6;7;7;7", "confidence": "3;3;5;3;4", "soundness": "2;3;4;4;4", "novelty": "2;2;3;4;3", "presentation": "3;2;4;4;3", "wc_summary": "128;98;77;71;92", "wc_strengths": "69;64;69;89;108", "wc_weaknesses": "108;75;57;389;107", "wc_questions": "6;309;115;435;50", "wc_limitations": "6;47;40;7;42", "wc_review": "317;593;358;991;399", "wc_reply_reviewers": "45;48;118;28;46", "wc_reply_authors": "271;0;393;24;129", "reply_reviewers": "1;1;2;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 3.4, 0.8 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 93.2, 19.95394697797907 ], "wc_strengths_avg": [ 79.8, 16.50939126679115 ], "wc_weaknesses_avg": [ 147.2, 122.44900979591465 ], "wc_questions_avg": [ 183.0, 163.1085528106972 ], "wc_limitations_avg": [ 28.4, 18.0288657435791 ], "wc_review_avg": [ 531.6, 248.43155999188187 ], "wc_reply_reviewers_avg": [ 57.0, 31.330496325465386 ], "wc_reply_authors_avg": [ 163.4, 149.40495306381246 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5625, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6292601190197224698&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 13, "email": ";ifce.edu.br;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Federal Institute of Cear\u00e1", "aff_unique_dep": "", "aff_unique_url": "http://www.ifce.edu.br", "aff_unique_abbr": "IFCE", "aff_country_unique_index": "0", "aff_country_unique": "Brazil" }, { "title": "ProteinShake: Building datasets and benchmarks for deep learning on protein structures", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73711", "id": "27vPcG4vKV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b6167294ed3d6fc61e11e1592ce5cb77-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=27vPcG4vKV", "openreview": "https://openreview.net/forum?id=27vPcG4vKV", "poster": "/media/PosterPDFs/NeurIPS%202023/73711.png?t=1697197010.3631215", "slides": "https://nips.cc/virtual/2023/poster/73711", "video": "https://nips.cc/virtual/2023/poster/73711", "author_site": "Tim Kucera, Carlos Oliver, Dexiong Chen, Karsten Borgwardt", "tldr": "", "abstract": "We present ProteinShake, a Python software package that simplifies dataset\ncreation and model evaluation for deep learning on protein structures. Users\ncan create custom datasets or load an extensive set of pre-processed datasets from\nbiological data repositories such as the Protein Data Bank (PDB) and AlphaFoldDB.\nEach dataset is associated with prediction tasks and evaluation functions covering\na broad array of biological challenges. A benchmark on these tasks shows that pre-\ntraining almost always improves performance, the optimal data modality (graphs,\nvoxel grids, or point clouds) is task-dependent, and models struggle to generalize\nto new structures. ProteinShake makes protein structure data easily accessible\nand comparison among models straightforward, providing challenging benchmark\nsettings with real-world implications.\n\nProteinShake is available at: https://proteinshake.ai", "keywords": "protein structure;benchmarking;datasets", "primary_area": "", "supplementary_material": "/attachment/eef9fc95a654527630de7e373ed3604537cbf8d3.pdf", "author": "Tim Kucera;Carlos Oliver;Dexiong Chen;Karsten Borgwardt", "authorids": "~Tim_Kucera1;~Carlos_Oliver2;~Dexiong_Chen1;~Karsten_Borgwardt2", "gender": "M;M;M;", "homepage": "https://timkucera.github.io;https://carlosoliver.co;https://dexiong.me;https://www.biochem.mpg.de/borgwardt", "dblp": "324/0232;;240/6347;11/3733.html", "google_scholar": "9Ea4hQIAAAAJ;https://scholar.google.ch/citations?user=UnTr7qIAAAAJ;goM0yAIAAAAJ;v3JsjMYAAAAJ", "orcid": "0000-0003-4358-7932;0000-0001-8742-8795;;0000-0001-7221-2393", "linkedin": "tim-kucera-5a5a7919a/;;;", "or_profile": "~Tim_Kucera1;~Carlos_Oliver2;~Dexiong_Chen1;~Karsten_Borgwardt2", "aff": "ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "PhD student;Postdoc;Postdoc;Full Professor", "bibtex": "@inproceedings{\nkucera2023proteinshake,\ntitle={ProteinShake: Building datasets and benchmarks for deep learning on protein structures},\nauthor={Tim Kucera and Carlos Oliver and Dexiong Chen and Karsten Borgwardt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=27vPcG4vKV}\n}", "github": "", "project": "", "reviewers": "8GvG;j1GE;XEHS;Da4b", "pdf_size": 1067449, "rating": "6;7;8;9", "confidence": "4;3;4;3", "wc_summary_and_contributions": "400;72;55;84", "wc_strengths": "77;60;199;60", "wc_improvement": "134;123;697;19", "wc_limitations": "247;45;12;1", "wc_correctness": "49;9;17;1", "wc_clarity": "23;36;14;1", "wc_relation_to_prior_work": "32;5;15;1", "wc_documentation": "14;29;65;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "977;380;1075;169", "wc_reply_reviewers": "13;0;71;14", "wc_reply_authors": "1461;453;1548;61", "reply_reviewers": "1;0;1;1", "reply_authors": "3;1;4;1", "rating_avg": [ 7.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 152.75, 143.12123357489622 ], "wc_strengths_avg": [ 99.0, 58.15066637623339 ], "wc_improvement_avg": [ 243.25, 265.78786183721786 ], "wc_limitations_avg": [ 76.25, 99.90339083334459 ], "wc_correctness_avg": [ 19.0, 18.2208671582886 ], "wc_clarity_avg": [ 18.5, 12.776932339180638 ], "wc_relation_to_prior_work_avg": [ 13.25, 11.96609794377432 ], "wc_documentation_avg": [ 27.25, 23.94133454926855 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 650.25, 384.64748471815074 ], "wc_reply_reviewers_avg": [ 24.5, 27.408940147331492 ], "wc_reply_authors_avg": [ 880.75, 639.7016394382619 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4472135954999579, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17606393305978953833&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Improving Graph Matching with Positional Reconstruction Encoder-Decoder Network", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72974", "id": "28RTu9MOT6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6cd3ac24cdb789beeaa9f7145670fcae-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=28RTu9MOT6", "openreview": "https://openreview.net/forum?id=28RTu9MOT6", "poster": "/media/PosterPDFs/NeurIPS%202023/72974.png?t=1701744606.1570632", "slides": "https://nips.cc/virtual/2023/poster/72974", "video": "https://nips.cc/virtual/2023/poster/72974", "author_site": "Yixiao Zhou, Ruiqi Jia, Hongxiang Lin, Hefeng Quan, Yumeng Zhao, Xiaoqing Lyu", "tldr": "", "abstract": "Deriving from image matching and understanding, semantic keypoint matching aims at establishing correspondence between keypoint sets in images. As graphs are powerful tools to represent points and their complex relationships, graph matching provides an effective way to find desired semantic keypoint correspondences. Recent deep graph matching methods have shown excellent performance, but there is still a lack of exploration and utilization of spatial information of keypoints as nodes in graphs. More specifically, existing methods are insufficient to capture the relative spatial relations through current graph construction approaches from the locations of semantic keypoints. To address these issues, we introduce a positional reconstruction encoder-decoder (PR-EnDec) to model intrinsic graph spatial structure, and present an end-to-end graph matching network PREGM based on PR-EnDec. Our PR-EnDec consists of a positional encoder that learns effective node spatial embedding with the affine transformation invariance, and a spatial relation decoder that further utilizes the high-order spatial information by reconstructing the locational structure of graphs contained in the node coordinates. Extensive experimental results on three public keypoint matching datasets demonstrate the effectiveness of our proposed PREGM.", "keywords": "Graph Matching;Positional Encoding", "primary_area": "", "supplementary_material": "", "author": "Yixiao Zhou;Ruiqi Jia;Hongxiang Lin;Hefeng Quan;Yumeng Zhao;Xiaoqing Lyu", "authorids": "~Yixiao_Zhou1;~Ruiqi_Jia1;~Hongxiang_Lin5;~Hefeng_Quan1;~Yumeng_Zhao1;~Xiaoqing_Lyu2", "gender": "M;F;M;M;M;M", "homepage": ";;https://www.researchgate.net/profile/Hongxiang-Lin-2?ev=hdr_xprf&_sg=ogZWGlHYddNSuVAnPBRXWdSZMjnfvYikmi5ZjZ77cTGad9Rwm-I74tcDBCxH5rjqibHSUIxyQfOPRxUlsuMRYajp;https://hefengquan.github.io/;;https://www.researchgate.net/profile/Xiaoqing_Lu6", "dblp": ";257/7745;;;324/8619;", "google_scholar": ";;;;https://scholar.google.com.hk/citations?hl=zh-CN;", "orcid": "0000-0002-6142-5956;0000-0002-6434-6313;0009-0000-4822-7165;;0009-0003-2658-135X;", "linkedin": ";;;;;", "or_profile": "~Yixiao_Zhou1;~Ruiqi_Jia1;~Hongxiang_Lin5;~Hefeng_Quan1;~Yumeng_Zhao1;~Xiaoqing_Lu1", "aff": "Peking University;Peking University;Peking University;Nanjing University of Science and Technology;Beijing University of Posts and Telecommunications;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;njust.edu.cn;bupt.edu.cn;pku.edu.cn", "position": "Undergrad student;MS student;MS student;Undergrad student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nzhou2023improving,\ntitle={Improving Graph Matching with Positional Reconstruction Encoder-Decoder Network},\nauthor={Yixiao Zhou and Ruiqi Jia and Hongxiang Lin and Hefeng Quan and Yumeng Zhao and Xiaoqing Lyu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=28RTu9MOT6}\n}", "github": "", "project": "", "reviewers": "1xTE;LuNk;TCc6;pkGJ;Pb59", "pdf_size": 923343, "rating": "6;6;6;6;7", "confidence": "4;4;5;4;1", "soundness": "4;3;3;3;4", "novelty": "3;3;3;3;4", "presentation": "4;3;2;3;4", "wc_summary": "95;47;82;107;68", "wc_strengths": "70;31;97;141;29", "wc_weaknesses": "98;115;388;63;13", "wc_questions": "60;23;9;37;16", "wc_limitations": "1;4;14;7;10", "wc_review": "324;220;590;355;136", "wc_reply_reviewers": "147;19;78;0;0", "wc_reply_authors": "213;0;0;0;0", "reply_reviewers": "2;1;1;0;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.6, 1.3564659966250536 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 79.8, 20.932271735289508 ], "wc_strengths_avg": [ 73.6, 42.20710840604933 ], "wc_weaknesses_avg": [ 135.4, 131.03068342949294 ], "wc_questions_avg": [ 29.0, 18.05547008526779 ], "wc_limitations_avg": [ 7.2, 4.534313619501853 ], "wc_review_avg": [ 325.0, 153.53957144658182 ], "wc_reply_reviewers_avg": [ 48.8, 56.82745815184768 ], "wc_reply_authors_avg": [ 42.6, 85.20000000000002 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9583727150068315, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18297149117594583342&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;njust.edu.cn;bupt.edu.cn;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Peking University;Nanjing University of Science and Technology;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;http://www.nust.edu.cn/;http://www.bupt.edu.cn/", "aff_unique_abbr": "Peking U;NUST;BUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Sharpness-Aware Minimization Leads to Low-Rank Features", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72973", "id": "29WbraPk8U", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/92dd1adab39f362046f99dfe3c39d90f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=29WbraPk8U", "openreview": "https://openreview.net/forum?id=29WbraPk8U", "poster": "/media/PosterPDFs/NeurIPS%202023/72973.png?t=1701723972.1245942", "slides": "https://nips.cc/virtual/2023/poster/72973", "video": "https://nips.cc/virtual/2023/poster/72973", "author_site": "Maksym Andriushchenko, Dara Bahri, Hossein Mobahi, Nicolas Flammarion", "tldr": "", "abstract": "Sharpness-aware minimization (SAM) is a recently proposed method that minimizes the sharpness of the training loss of a neural network. While its generalization improvement is well-known and is the primary motivation, we uncover an additional intriguing effect of SAM: reduction of the feature rank which happens at different layers of a neural network. We show that this low-rank effect occurs very broadly: for different architectures such as fully-connected networks, convolutional networks, vision transformers and for different objectives such as regression, classification, language-image contrastive training. To better understand this phenomenon, we provide a mechanistic understanding of how low-rank features arise in a simple two-layer network. We observe that a significant number of activations gets entirely pruned by SAM which directly contributes to the rank reduction. We confirm this effect theoretically and check that it can also occur in deep networks, although the overall rank reduction mechanism can be more complex, especially for deep networks with pre-activation skip connections and self-attention layers.", "keywords": "sharpness-aware minimization;low-rank features;understanding feature learning", "primary_area": "", "supplementary_material": "", "author": "Maksym Andriushchenko;Dara Bahri;Hossein Mobahi;Nicolas Flammarion", "authorids": "~Maksym_Andriushchenko1;~Dara_Bahri1;~Hossein_Mobahi2;~Nicolas_Flammarion1", "gender": "M;M;M;M", "homepage": "https://www.andriushchenko.me/;http://www.dara.run;;http://people.csail.mit.edu/hmobahi/", "dblp": "200/8865;231/7656;164/7417;94/1490", "google_scholar": "ZNtuJYoAAAAJ;j5PpTOwAAAAJ;;GSHmKZkAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Maksym_Andriushchenko1;~Dara_Bahri1;~Nicolas_Flammarion1;~Hossein_Mobahi1", "aff": "Swiss Federal Institute of Technology Lausanne;Google Research;Swiss Federal Institute of Technology Lausanne;Google", "aff_domain": "epfl.ch;google.com;epfl.ch;google.com", "position": "PhD Student;Research Scientist;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nandriushchenko2023sharpnessaware,\ntitle={Sharpness-Aware Minimization Leads to Low-Rank Features},\nauthor={Maksym Andriushchenko and Dara Bahri and Hossein Mobahi and Nicolas Flammarion},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=29WbraPk8U}\n}", "github": "", "project": "", "reviewers": "jzeR;wq2P;2Roy;fXy2", "pdf_size": 1140860, "rating": "5;6;6;6", "confidence": "3;4;3;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "114;94;47;86", "wc_strengths": "79;80;70;228", "wc_weaknesses": "95;546;314;219", "wc_questions": "79;4;446;14", "wc_limitations": "6;13;84;1", "wc_review": "373;737;961;548", "wc_reply_reviewers": "22;38;63;61", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 85.25, 24.324627438051337 ], "wc_strengths_avg": [ 114.25, 65.78896184011418 ], "wc_weaknesses_avg": [ 293.5, 165.17339374124393 ], "wc_questions_avg": [ 135.75, 181.42267636654466 ], "wc_limitations_avg": [ 26.0, 33.7564808592365 ], "wc_review_avg": [ 654.75, 218.70799596722566 ], "wc_reply_reviewers_avg": [ 46.0, 16.98528775146303 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2072139949421837298&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "epfl.ch;google.com;epfl.ch;google.com", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.epfl.ch;https://research.google", "aff_unique_abbr": "EPFL;Google Research", "aff_campus_unique_index": "0;1;0;1", "aff_campus_unique": "Lausanne;Mountain View", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Switzerland;United States" }, { "title": "Learning Functional Transduction", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72972", "id": "2BFZ8cPIf6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e9b8a3362a6d9a7f9f842bd2d919e1a0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2BFZ8cPIf6", "openreview": "https://openreview.net/forum?id=2BFZ8cPIf6", "poster": "/media/PosterPDFs/NeurIPS%202023/72972.png?t=1702677815.6621747", "slides": "https://nips.cc/virtual/2023/poster/72972", "video": "https://nips.cc/virtual/2023/poster/72972", "author_site": "Mathieu Chalvidal, Thomas Serre, Rufin VanRullen", "tldr": "", "abstract": "Research in statistical learning has polarized into two general approaches to perform regression analysis: Transductive methods construct estimates directly based on exemplar data using generic relational principles which might suffer from the curse of dimensionality. Conversely, inductive methods can potentially fit highly complex functions at the cost of compute-intensive solution searches. In this work, we leverage the theory of vector-valued Reproducing Kernel Banach Spaces (RKBS) to propose a hybrid approach: We show that transductive regression systems can be meta-learned with gradient descent to form efficient _in-context_ neural approximators of function defined over both finite and infinite-dimensional spaces (operator regression). Once trained, our _Transducer_ can almost instantaneously capture new functional relationships and produce original image estimates, given a few pairs of input and output examples. We demonstrate the benefit of our meta-learned transductive approach to model physical systems influenced by varying external factors with little data at a fraction of the usual deep learning training costs for partial differential equations and climate modeling applications.", "keywords": "Meta-learning;Neural Operators;Kernel methods;In-context learning", "primary_area": "", "supplementary_material": "/attachment/9af8d6800bb25b88e10dad582c2949ad3344de69.pdf", "author": "Mathieu Chalvidal;Thomas Serre;Rufin VanRullen", "authorids": "~Mathieu_Chalvidal1;~Thomas_Serre1;~Rufin_VanRullen1", "gender": "M;M;M", "homepage": ";https://serre-lab.clps.brown.edu/;https://rufinv.github.io", "dblp": "258/0419;;83/2121", "google_scholar": "LB9Moj8AAAAJ;kZlPW4wAAAAJ;1pwyaYgAAAAJ", "orcid": ";;0000-0002-3611-7716", "linkedin": ";;", "or_profile": "~Mathieu_Chalvidal1;~Thomas_Serre1;~Rufin_VanRullen1", "aff": "Brown University;Universit\u00e9 de Toulouse;CNRS", "aff_domain": "brown.edu;univ-toulouse.fr;cnrs.fr", "position": "PhD student;Full Professor;Research Director", "bibtex": "@inproceedings{\nchalvidal2023learning,\ntitle={Learning Functional Transduction},\nauthor={Mathieu Chalvidal and Thomas Serre and Rufin VanRullen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2BFZ8cPIf6}\n}", "github": "", "project": "", "reviewers": "w6wT;f2BS;Sczv", "pdf_size": 14700879, "rating": "5;7;8", "confidence": "2;3;4", "soundness": "3;4;4", "novelty": "2;3;4", "presentation": "3;4;4", "wc_summary": "57;163;157", "wc_strengths": "35;44;91", "wc_weaknesses": "56;49;355", "wc_questions": "81;48;143", "wc_limitations": "1;31;45", "wc_review": "230;335;791", "wc_reply_reviewers": "61;42;129", "wc_reply_authors": "223;0;255", "reply_reviewers": "1;1;2", "reply_authors": "2;1;2", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 125.66666666666667, 48.61641240934542 ], "wc_strengths_avg": [ 56.666666666666664, 24.553795814270526 ], "wc_weaknesses_avg": [ 153.33333333333334, 142.62849956754397 ], "wc_questions_avg": [ 90.66666666666667, 39.38132665222045 ], "wc_limitations_avg": [ 25.666666666666668, 18.354533197248273 ], "wc_review_avg": [ 452.0, 243.5118066952812 ], "wc_reply_reviewers_avg": [ 77.33333333333333, 37.3482113211448 ], "wc_reply_authors_avg": [ 159.33333333333334, 113.42055467252054 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9819805060619659, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15509109858700600463&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 9, "email": "brown.edu;univ-toulouse.fr;cnrs.fr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Brown University;Universit\u00e9 de Toulouse;Centre National de la Recherche Scientifique", "aff_unique_dep": ";;", "aff_unique_url": "https://www.brown.edu;https://www.univ-toulouse.fr;https://www.cnrs.fr", "aff_unique_abbr": "Brown;UT;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;France" }, { "title": "Solving Inverse Physics Problems with Score Matching", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72971", "id": "2BpoGPSDCR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c2f2230abc7ccf669f403be881d3ffb7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2BpoGPSDCR", "openreview": "https://openreview.net/forum?id=2BpoGPSDCR", "poster": "/media/PosterPDFs/NeurIPS%202023/72971.png?t=1702066158.28047", "slides": "https://nips.cc/virtual/2023/poster/72971", "video": "https://nips.cc/virtual/2023/poster/72971", "author_site": "Benjamin Holzschuh, Simona Vegetti, Nils Thuerey", "tldr": "", "abstract": "We propose to solve inverse problems involving the temporal evolution of physics systems by leveraging recent advances from diffusion models. \nOur method moves the system's current state backward in time step by step by combining an approximate inverse physics simulator and a learned correction function. \nA central insight of our work is that training the learned correction with a single-step loss is equivalent to a score matching objective, while recursively predicting longer parts of the trajectory during training relates to maximum likelihood training of a corresponding probability flow.\nWe highlight the advantages of our algorithm compared to standard denoising score matching and implicit score matching, as well as fully learned baselines for a wide range of inverse physics problems. The resulting inverse solver has excellent accuracy and temporal stability and, in contrast to other learned inverse solvers, allows for sampling the posterior of the solutions. Code and experiments are available at https://github.com/tum-pbs/SMDP.", "keywords": "inverse problems;diffusion models;learned corrections;score matching", "primary_area": "", "supplementary_material": "", "author": "Benjamin Holzschuh;Simona Vegetti;Nils Thuerey", "authorids": "~Benjamin_Holzschuh1;~Simona_Vegetti1;~Nils_Thuerey1", "gender": "M;;M", "homepage": "https://ge.in.tum.de/about/;https://www.mpa-garching.mpg.de/444643/dark-matter-mprg;https://ge.in.tum.de", "dblp": "338/7038;;42/478", "google_scholar": ";;https://scholar.google.com.tw/citations?user=GEehwv8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Benjamin_Holzschuh1;~Simona_Vegetti1;~Nils_Thuerey1", "aff": "School of Computation, Information and Technology, Technische Universit\u00e4t M\u00fcnchen;Max-Planck Institute;Technical University Munich", "aff_domain": "in.tum.de;mpg.de;tum.de", "position": "PhD student;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nholzschuh2023solving,\ntitle={Solving Inverse Physics Problems with Score Matching},\nauthor={Benjamin Holzschuh and Simona Vegetti and Nils Thuerey},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2BpoGPSDCR}\n}", "github": "", "project": "", "reviewers": "mtJA;og5h;riEs;h6fX;SQFk;3QqF", "pdf_size": 6037677, "rating": "4;4;6;6;6;7", "confidence": "3;2;3;3;2;4", "soundness": "2;4;3;2;3;4", "novelty": "2;2;2;2;2;4", "presentation": "3;4;2;3;2;3", "wc_summary": "114;60;122;157;99;147", "wc_strengths": "107;25;63;58;41;320", "wc_weaknesses": "332;47;86;278;139;255", "wc_questions": "176;21;1065;329;39;189", "wc_limitations": "1;11;1;30;24;7", "wc_review": "730;164;1337;852;342;918", "wc_reply_reviewers": "0;258;561;35;0;39", "wc_reply_authors": "0;418;589;0;0;0", "reply_reviewers": "0;1;3;1;0;1", "reply_authors": "1;2;3;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 2.8333333333333335, 0.6871842709362768 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.7453559924999298 ], "presentation_avg": [ 2.8333333333333335, 0.6871842709362768 ], "wc_summary_avg": [ 116.5, 31.899582024429932 ], "wc_strengths_avg": [ 102.33333333333333, 100.54462801274977 ], "wc_weaknesses_avg": [ 189.5, 104.87889841781012 ], "wc_questions_avg": [ 303.1666666666667, 355.82412166062534 ], "wc_limitations_avg": [ 12.333333333333334, 11.070481270277076 ], "wc_review_avg": [ 723.8333333333334, 384.9997474746646 ], "wc_reply_reviewers_avg": [ 148.83333333333334, 204.58202647240435 ], "wc_reply_authors_avg": [ 167.83333333333334, 242.43103257536058 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 1.5, 0.7637626158259734 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5423261445466405, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9605669587003951279&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "in.tum.de;mpg.de;tum.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.;Technical University of Munich", "aff_unique_dep": "School of Computation, Information and Technology;;", "aff_unique_url": "https://www.tum.de;https://www.mpg.de;https://www.tum.de", "aff_unique_abbr": "TUM;MPG;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "ISP: Multi-Layered Garment Draping with Implicit Sewing Patterns", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72970", "id": "2BrHBj1Puu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7e976afe805026f7d378a583af5ea9a2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2BrHBj1Puu", "openreview": "https://openreview.net/forum?id=2BrHBj1Puu", "poster": "/media/PosterPDFs/NeurIPS%202023/72970.png?t=1699993765.3819249", "slides": "https://nips.cc/virtual/2023/poster/72970", "video": "https://nips.cc/virtual/2023/poster/72970", "author_site": "Ren Li, Beno\u00eet Guillard, Benoit Guillard, Pascal Fua", "tldr": "", "abstract": "Many approaches to draping individual garments on human body models are realistic, fast, and yield outputs that are differentiable with respect to the body shape on which they are draped. However, they are either unable to handle multi-layered clothing, which is prevalent in everyday dress, or restricted to bodies in T-pose. In this paper, we introduce a parametric garment representation model that addresses these limitations. As in models used by clothing designers, each garment consists of individual 2D panels. Their 2D shape is defined by a Signed Distance Function and 3D shape by a 2D to 3D mapping. The 2D parameterization enables easy detection of potential collisions and the 3D parameterization handles complex shapes effectively. We show that this combination is faster and yields higher quality reconstructions than purely implicit surface representations, and makes the recovery of layered garments from images possible thanks to its differentiability. Furthermore, it supports rapid editing of garment shapes and texture by modifying individual 2D panels.", "keywords": "garment modeling;draping;deformation;human body modeling", "primary_area": "", "supplementary_material": "/attachment/73989c329a00fd9ad2db29309734e0325d96a61e.zip", "author": "Ren Li;Beno\u00eet Guillard;Pascal Fua", "authorids": "~Ren_Li1;~Beno\u00eet_Guillard1;~Pascal_Fua1", "gender": "M;M;M", "homepage": "https://liren2515.github.io/page/;https://people.epfl.ch/pascal.fua/bio?lang=en;", "dblp": ";f/PFua;266/9577", "google_scholar": "dZU-_FgAAAAJ;https://scholar.google.com/citations?view_op=list_works;CwnF8wEAAAAJ", "orcid": ";;", "linkedin": ";pascal-fua-epfl/?lipi=urn%3Ali%3Apage%3Ad_flagship3_search_srp_top%3BOz8ffqlCTcmui5v37AilTQ%3D%3D&licu=urn%3Ali%3Acontrol%3Ad_flagship3_search_srp_top-search_srp_result&lici=IhLn%2B0y4Rj23iI9XNMDNwA%3D%3D;", "or_profile": "~Ren_Li1;~Pascal_Fua1;~Benoit_Guillard1", "aff": "EPFL - EPF Lausanne;EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne", "aff_domain": "epfl.ch;epfl.ch;epfl.ch", "position": "PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nli2023isp,\ntitle={{ISP}: Multi-Layered Garment Draping with Implicit Sewing Patterns},\nauthor={Ren Li and Beno{\\^\\i}t Guillard and Pascal Fua},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2BrHBj1Puu}\n}", "github": "", "project": "", "reviewers": "LvBx;5F53;kV1W;cVr6", "pdf_size": 6847494, "rating": "3;6;6;7", "confidence": "4;4;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "92;136;80;140", "wc_strengths": "40;82;68;326", "wc_weaknesses": "304;209;148;75", "wc_questions": "22;6;33;181", "wc_limitations": "175;11;2;17", "wc_review": "633;444;331;739", "wc_reply_reviewers": "722;155;0;101", "wc_reply_authors": "853;191;0;10", "reply_reviewers": "2;1;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 112.0, 26.38181191654584 ], "wc_strengths_avg": [ 129.0, 114.73883387938018 ], "wc_weaknesses_avg": [ 184.0, 83.9672555226143 ], "wc_questions_avg": [ 60.5, 70.22997935354958 ], "wc_limitations_avg": [ 51.25, 71.64626647634893 ], "wc_review_avg": [ 536.75, 158.98486563192108 ], "wc_reply_reviewers_avg": [ 244.5, 281.2423332288367 ], "wc_reply_authors_avg": [ 263.5, 348.7337809848653 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15511622593965778628&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "epfl.ch;epfl.ch;epfl.ch", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "EPFL;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "DOSE: Diffusion Dropout with Adaptive Prior for Speech Enhancement", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72969", "id": "2C2WZfCfo9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7e966a12c2d6307adb8809aaa9acf057-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2C2WZfCfo9", "openreview": "https://openreview.net/forum?id=2C2WZfCfo9", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72969", "video": "https://nips.cc/virtual/2023/poster/72969", "author_site": "Wenxin Tai, Yue Lei, Fan Zhou, Goce Trajcevski, Ting Zhong", "tldr": "", "abstract": "Speech enhancement (SE) aims to improve the intelligibility and quality of speech in the presence of non-stationary additive noise. Deterministic deep learning models have traditionally been used for SE, but recent studies have shown that generative approaches, such as denoising diffusion probabilistic models (DDPMs), can also be effective. However, incorporating condition information into DDPMs for SE remains a challenge. We propose a model-agnostic method called DOSE that employs two efficient condition-augmentation techniques to address this challenge, based on two key insights: (1) We force the model to prioritize the condition factor when generating samples by training it with dropout operation; (2) We inject the condition information into the sampling process by providing an informative adaptive prior. Experiments demonstrate that our approach yields substantial improvements in high-quality and stable speech generation, consistency with the condition factor, and inference efficiency. Codes are publicly available at https://github.com/ICDM-UESTC/DOSE.", "keywords": "speech enhancement;diffusion models;adaptive prior;dropout;generalization", "primary_area": "", "supplementary_material": "", "author": "Wenxin Tai;Yue Lei;Fan Zhou;Goce Trajcevski;Ting Zhong", "authorids": "~Wenxin_Tai1;leiyue828@gmail.com;~Fan_Zhou11;~Goce_Trajcevski2;~Ting_Zhong2", "gender": "M;;M;M;F", "homepage": "https://wxtai.github.io/;;https://sise.uestc.edu.cn/info/1035/9375.htm;;", "dblp": "284/4234;;63/3122-2;66/974;73/9481.html", "google_scholar": "YyxocAIAAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;Mdr0XDkAAAAJ", "orcid": "0000-0001-7364-8324;;0000-0002-8038-8150;;0000-0002-8163-3146", "linkedin": ";;;;", "or_profile": "~Wenxin_Tai1;leiyue828@gmail.com;~Fan_Zhou11;~Goce_Trajcevski2;~Ting_Zhong2", "aff": "University of Electronic Science and Technology of China;;University of Electronic Science and Technology of China;Iowa State University;University of Electronic Science and Technology of China", "aff_domain": "uestc.edu.cn;;uestc.edu.cn;iastate.edu;uestc.edu.cn", "position": "PhD student;;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ntai2023dose,\ntitle={{DOSE}: Diffusion Dropout with Adaptive Prior for Speech Enhancement},\nauthor={Wenxin Tai and Yue Lei and Fan Zhou and Goce Trajcevski and Ting Zhong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2C2WZfCfo9}\n}", "github": "", "project": "", "reviewers": "Zy9S;RTAK;boQC;CtVJ;k89b", "pdf_size": 3317716, "rating": "5;6;6;6;7", "confidence": "4;5;5;3;4", "soundness": "3;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;2;3;3", "wc_summary": "36;89;68;68;184", "wc_strengths": "24;34;123;77;178", "wc_weaknesses": "72;197;420;28;114", "wc_questions": "24;128;7;14;1", "wc_limitations": "1;8;9;10;19", "wc_review": "157;456;627;197;496", "wc_reply_reviewers": "0;0;812;9;19", "wc_reply_authors": "0;0;1354;0;0", "reply_reviewers": "0;0;3;1;1", "reply_authors": "1;1;4;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 89.0, 50.430149712250504 ], "wc_strengths_avg": [ 87.2, 57.366889404952055 ], "wc_weaknesses_avg": [ 166.2, 138.60793628071949 ], "wc_questions_avg": [ 34.8, 47.22456987628368 ], "wc_limitations_avg": [ 9.4, 5.748043145279966 ], "wc_review_avg": [ 386.6, 180.68823979440387 ], "wc_reply_reviewers_avg": [ 168.0, 322.0763884546646 ], "wc_reply_authors_avg": [ 270.8, 541.5999999999999 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2027353674081841850&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "email": "uestc.edu.cn;;uestc.edu.cn;iastate.edu;uestc.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Electronic Science and Technology of China;Iowa State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uestc.edu.cn;https://www.iastate.edu", "aff_unique_abbr": "UESTC;ISU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "MultiVENT: Multilingual Videos of Events and Aligned Natural Text", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73710", "id": "2CJUQe6IoR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a054ff49751dbc991ec30ae479397c3d-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=2CJUQe6IoR", "openreview": "https://openreview.net/forum?id=2CJUQe6IoR", "poster": "/media/PosterPDFs/NeurIPS%202023/73710.png?t=1701379470.3384252", "slides": "https://nips.cc/virtual/2023/poster/73710", "video": "https://nips.cc/virtual/2023/poster/73710", "author_site": "Kate Sanders, David Etter, Reno Kriz, Benjamin Van Durme", "tldr": "", "abstract": "Everyday news coverage has shifted from traditional broadcasts towards a wide range of presentation formats such as first-hand, unedited video footage. Datasets that reflect the diverse array of multimodal, multilingual news sources available online could be used to teach models to benefit from this shift, but existing news video datasets focus on traditional news broadcasts produced for English-speaking audiences. We address this limitation by constructing MultiVENT, a dataset of multilingual, event-centric videos grounded in text documents across five target languages. MultiVENT includes both news broadcast videos and non-professional event footage, which we use to analyze the state of online news videos and how they can be leveraged to build robust, factually accurate models. Finally, we provide a model for complex, multilingual video retrieval to serve as a baseline for information retrieval using MultiVENT.", "keywords": "video retrieval;information retrieval;multimodal;dataset;multilingual;video;ocr", "primary_area": "", "supplementary_material": "/attachment/1d43b8c4c4f48c4925b1caa44131b6f36235d9c7.zip", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nsanders2023multivent,\ntitle={Multi{VENT}: Multilingual Videos of Events and Aligned Natural Text},\nauthor={Kate Sanders and David Etter and Reno Kriz and Benjamin Van Durme},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=2CJUQe6IoR}\n}", "github": "", "project": "", "reviewers": "cNPz;D4Di;ZCm2;uXbS", "pdf_size": 4375209, "rating": "5;6;6;7", "confidence": "2;3;3;5", "wc_summary_and_contributions": "21;53;18;38", "wc_strengths": "36;56;16;25", "wc_improvement": "12;77;172;31", "wc_limitations": "24;50;29;93", "wc_correctness": "10;29;12;33", "wc_clarity": "8;61;14;11", "wc_relation_to_prior_work": "12;15;3;19", "wc_documentation": "9;18;20;12", "wc_additional_feedback": "1;1;1;1", "wc_review": "133;360;285;263", "wc_reply_reviewers": "0;0;10;0", "wc_reply_authors": "468;920;889;660", "reply_reviewers": "0;0;1;0", "reply_authors": "2;3;3;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "wc_summary_and_contributions_avg": [ 32.5, 14.080127840328723 ], "wc_strengths_avg": [ 33.25, 14.922717580923388 ], "wc_improvement_avg": [ 73.0, 61.8506265125908 ], "wc_limitations_avg": [ 49.0, 27.212129648375555 ], "wc_correctness_avg": [ 21.0, 10.124228365658293 ], "wc_clarity_avg": [ 23.5, 21.754309917807092 ], "wc_relation_to_prior_work_avg": [ 12.25, 5.889609494694874 ], "wc_documentation_avg": [ 14.75, 4.437059837324712 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 260.25, 81.79662279091967 ], "wc_reply_reviewers_avg": [ 2.5, 4.330127018922194 ], "wc_reply_authors_avg": [ 734.25, 183.6115124386268 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": 0.9733285267845752, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2040727562798209899&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "", "author_num": 1 }, { "title": "Characterizing the Optimal $0-1$ Loss for Multi-class Classification with a Test-time Attacker", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72968", "id": "2CRaOpEKWh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9b867f0e56c4c085ef1cfdad691db5f6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2CRaOpEKWh", "openreview": "https://openreview.net/forum?id=2CRaOpEKWh", "poster": "/media/PosterPDFs/NeurIPS%202023/72968.png?t=1702250813.2197962", "slides": "https://nips.cc/virtual/2023/poster/72968", "video": "https://nips.cc/virtual/2023/poster/72968", "author_site": "Sihui Dai, Wenxin Ding, Arjun Nitin Bhagoji, Daniel Cullina, Heather Zheng, Ben Zhao, Prateek Mittal", "tldr": "", "abstract": "Finding classifiers robust to adversarial examples is critical for their safe\ndeployment. Determining the robustness of the best possible classifier under a\ngiven threat model for a fixed data distribution and comparing it to that\nachieved by state-of-the-art training methods is thus an important diagnostic\ntool. In this paper, we find achievable information-theoretic lower bounds on\nrobust loss in the presence of a test-time attacker for *multi-class\nclassifiers on any discrete dataset*. We provide a general framework for finding\nthe optimal $0-1$ loss that revolves around the construction of a conflict\nhypergraph from the data and adversarial constraints. The prohibitive cost of\nthis formulation in practice leads us to formulate other variants of the attacker-classifier\ngame that more efficiently determine the range of the optimal loss. Our\nvaluation shows, for the first time, an analysis of the gap to optimal\nrobustness for classifiers in the multi-class setting on benchmark datasets.", "keywords": "adversarial robustness;graph theory;fundamental bounds", "primary_area": "", "supplementary_material": "/attachment/f7a8e6da095d3f02fbf52fb53f0237fb69dfd4b5.zip", "author": "Sihui Dai;Wenxin Ding;Arjun Nitin Bhagoji;Daniel Cullina;Haitao Zheng;Ben Y. Zhao;Prateek Mittal", "authorids": "~Sihui_Dai1;~Wenxin_Ding1;~Arjun_Nitin_Bhagoji1;~Daniel_Cullina1;~Haitao_Zheng2;~Ben_Y._Zhao2;~Prateek_Mittal1", "gender": "F;F;;;F;;", "homepage": ";https://wenxind.github.io;;;http://people.cs.uchicago.edu/~htzheng/;;http://www.princeton.edu/~pmittal/", "dblp": "244/9642;254/8202;;04/7480;43/4261;;", "google_scholar": ";;;tmYOiO0AAAAJ;;;https://scholar.google.com.tw/citations?user=xTKD8J4AAAAJ", "orcid": ";;;;;;0000-0002-4057-0118", "linkedin": ";;;;;;", "or_profile": "~Sihui_Dai1;~Wenxin_Ding1;~Arjun_Nitin_Bhagoji1;~Daniel_Cullina1;~Haitao_Zheng2;~Ben_Y._Zhao2;~Prateek_Mittal1", "aff": "Princeton University;University of Chicago;;Pennsylvania State University;UC Santa Barbara;;Princeton University", "aff_domain": "princeton.edu;uchicago.edu;;psu.edu;ucsb.edu;;princeton.edu", "position": "PhD student;PhD student;;Assistant Professor;Full Professor;;Full Professor", "bibtex": "@inproceedings{\ndai2023characterizing,\ntitle={Characterizing the Optimal \\$0-1\\$ Loss for Multi-class Classification with a Test-time Attacker},\nauthor={Sihui Dai and Wenxin Ding and Arjun Nitin Bhagoji and Daniel Cullina and Haitao Zheng and Ben Y. Zhao and Prateek Mittal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2CRaOpEKWh}\n}", "github": "", "project": "", "reviewers": "yCc4;baps;QPxv;Q9vC", "pdf_size": 993432, "rating": "6;6;7;8", "confidence": "2;3;3;4", "soundness": "2;3;3;4", "novelty": "3;3;3;3", "presentation": "1;3;3;4", "wc_summary": "123;110;91;19", "wc_strengths": "35;112;83;138", "wc_weaknesses": "368;222;61;98", "wc_questions": "97;23;40;474", "wc_limitations": "33;39;1;6", "wc_review": "656;506;276;735", "wc_reply_reviewers": "246;40;0;158", "wc_reply_authors": "568;0;0;0", "reply_reviewers": "2;1;0;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 85.75, 40.18317433951678 ], "wc_strengths_avg": [ 92.0, 38.22956970723055 ], "wc_weaknesses_avg": [ 187.25, 120.19021382791529 ], "wc_questions_avg": [ 158.5, 184.20437019788645 ], "wc_limitations_avg": [ 19.75, 16.48294573187693 ], "wc_review_avg": [ 543.25, 174.8504718323631 ], "wc_reply_reviewers_avg": [ 111.0, 97.205966895042 ], "wc_reply_authors_avg": [ 142.0, 245.95121467478057 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7110118085400271417&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "princeton.edu;uchicago.edu;;psu.edu;ucsb.edu;;princeton.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Princeton University;University of Chicago;Pennsylvania State University;University of California, Santa Barbara", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.princeton.edu;https://www.uchicago.edu;https://www.psu.edu;https://www.ucsb.edu", "aff_unique_abbr": "Princeton;UChicago;PSU;UCSB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Discriminative Calibration: Check Bayesian Computation from Simulations and Flexible Classifier", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72967", "id": "2Cmdh5z6ph", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7103cd82de95a7b30983fcf74ba499ac-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2Cmdh5z6ph", "openreview": "https://openreview.net/forum?id=2Cmdh5z6ph", "poster": "/media/PosterPDFs/NeurIPS%202023/72967.png?t=1699567493.531458", "slides": "https://nips.cc/virtual/2023/poster/72967", "video": "https://nips.cc/virtual/2023/poster/72967", "author_site": "Yuling Yao, Justin Domke", "tldr": "", "abstract": "To check the accuracy of Bayesian computations, it is common to use rank-based simulation-based calibration (SBC). However, SBC has drawbacks: The test statistic is somewhat ad-hoc, interactions are difficult to examine, multiple testing is a challenge, and the resulting p-value is not a divergence metric. We propose to replace the marginal rank test with a flexible classification approach that learns test statistics from data. This measure typically has a higher statistical power than the SBC test and returns an interpretable divergence measure of miscalibration, computed from classification accuracy. This approach can be used with different data generating processes to address simulation-based inference or traditional inference methods like Markov chain Monte Carlo or variational inference. We illustrate an automated implementation using neural networks and statistically-inspired features, and validate the method with numerical and real data experiments.", "keywords": "simulation based calibration;simulation based inference;Bayesian computation;diagnostics;classifier two-sample test;likelihood-free", "primary_area": "", "supplementary_material": "", "author": "Yuling Yao;Justin Domke", "authorids": "~Yuling_Yao2;~Justin_Domke1", "gender": "M;Unspecified", "homepage": "https://www.yulingyao.com;https://people.cs.umass.edu/~domke/", "dblp": "87/6597;39/5186", "google_scholar": "https://scholar.google.com/citations?hl=en;", "orcid": "0000-0002-0985-7233;", "linkedin": ";", "or_profile": "~Yuling_Yao2;~Justin_Domke1", "aff": "Flatiron Institute;University of Massachusetts at Amherst", "aff_domain": "flatironinstitute.org;umass.edu", "position": "Postdoc;Associate Professor", "bibtex": "@inproceedings{\nyao2023discriminative,\ntitle={Discriminative Calibration: Check Bayesian Computation from Simulations and Flexible Classifier},\nauthor={Yuling Yao and Justin Domke},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2Cmdh5z6ph}\n}", "github": "", "project": "", "reviewers": "TMBR;aBBC;gi2H;eYcd", "pdf_size": 1882415, "rating": "6;7;7;7", "confidence": "3;3;3;4", "soundness": "3;4;4;3", "novelty": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "115;160;143;210", "wc_strengths": "55;91;56;107", "wc_weaknesses": "61;200;65;98", "wc_questions": "110;170;106;136", "wc_limitations": "1;42;19;8", "wc_review": "342;663;389;559", "wc_reply_reviewers": "26;110;29;44", "wc_reply_authors": "0;118;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 157.0, 34.56153931757091 ], "wc_strengths_avg": [ 77.25, 22.47637648732553 ], "wc_weaknesses_avg": [ 106.0, 56.138222273242675 ], "wc_questions_avg": [ 130.5, 25.548972582082435 ], "wc_limitations_avg": [ 17.5, 15.532224567009067 ], "wc_review_avg": [ 488.25, 129.21179319241722 ], "wc_reply_reviewers_avg": [ 52.25, 34.03215391361528 ], "wc_reply_authors_avg": [ 29.5, 51.09549882328188 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4437909611989309728&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 6, "email": "flatironinstitute.org;umass.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Flatiron Institute;University of Massachusetts Amherst", "aff_unique_dep": ";", "aff_unique_url": "https://flatironinstitute.org;https://www.umass.edu", "aff_unique_abbr": "Flatiron;UMass Amherst", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Navigating Data Heterogeneity in Federated Learning: A Semi-Supervised Federated Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72966", "id": "2D7ou48q0E", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/066e4dbfeccb5dc2851acd5eca584937-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2D7ou48q0E", "openreview": "https://openreview.net/forum?id=2D7ou48q0E", "poster": "/media/PosterPDFs/NeurIPS%202023/72966.png?t=1701912121.4126241", "slides": "https://nips.cc/virtual/2023/poster/72966", "video": "https://nips.cc/virtual/2023/poster/72966", "author_site": "Taehyeon Kim, Eric Lin, Junu Lee, Christian Lau, Vaikkunth Mugunthan", "tldr": "", "abstract": "Federated Learning (FL) has emerged as a potent framework for training models across distributed data sources while maintaining data privacy. Nevertheless, it faces challenges with limited high-quality labels and non-IID client data, particularly in applications like autonomous driving. To address these hurdles, we navigate the uncharted waters of Semi-Supervised Federated Object Detection (SSFOD). We present a pioneering SSFOD framework, designed for scenarios where labeled data reside only at the server while clients possess unlabeled data. Notably, our method represents the inaugural implementation of SSFOD for clients with 0% labeled non-IID data, a stark contrast to previous studies that maintain some subset of labels at each client. We propose FedSTO, a two-stage strategy encompassing Selective Training followed by Orthogonally enhanced full-parameter training, to effectively address data shift (e.g. weather conditions) between server and clients. Our contributions include selectively refining the backbone of the detector to avert overfitting, orthogonality regularization to boost representation divergence, and local EMA-driven pseudo label assignment to yield high-quality pseudo labels. Extensive validation on prominent autonomous driving datasets (BDD100K, Cityscapes, and SODA10M) attests to the efficacy of our approach, demonstrating state-of-the-art results. Remarkably, FedSTO, using just 20-30% of labels, performs nearly as well as fully-supervised centralized training methods.", "keywords": "Federated Learning;Semi-Supervised Learning;Object Detection", "primary_area": "", "supplementary_material": "/attachment/9313d56b1b9be92c5f34b69892edbbe0db381079.pdf", "author": "Taehyeon Kim;Eric Lin;Junu Lee;Christian Lau;Vaikkunth Mugunthan", "authorids": "~Taehyeon_Kim1;~Eric_Lin1;junu@dynamofl.com;christian@dynamofl.com;~Vaikkunth_Mugunthan1", "gender": "M;;;;M", "homepage": "https://taehyeon.oopy.io/;;;;", "dblp": ";77/5363;;;", "google_scholar": "https://scholar.google.co.kr/citations?user=wDEaSpwAAAAJ;https://scholar.google.com/citations?hl=en;;;qtMAF_oAAAAJ", "orcid": ";;;;", "linkedin": "taehyeon-k-6a1239207/;;;;", "or_profile": "~Taehyeon_Kim1;~Eric_Lin1;junu@dynamofl.com;christian@dynamofl.com;~Vaikkunth_Mugunthan1", "aff": "Dynamo AI;Dynamo AI;;;", "aff_domain": "dynamo.ai;dynamo.ai;;;", "position": "PhD Intern;Researcher;;;", "bibtex": "@inproceedings{\nkim2023navigating,\ntitle={Navigating Data Heterogeneity in Federated Learning: A Semi-Supervised Approach for Object Detection},\nauthor={Taehyeon Kim and Eric Lin and Junu Lee and Christian Lau and Vaikkunth Mugunthan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2D7ou48q0E}\n}", "github": "", "project": "", "reviewers": "1XGa;8JqK;qq85;AM8c;2CR4", "pdf_size": 3843074, "rating": "5;5;5;6;6", "confidence": "4;5;3;4;4", "soundness": "2;3;3;3;3", "novelty": "3;3;3;2;3", "presentation": "2;3;3;3;3", "wc_summary": "72;41;93;191;92", "wc_strengths": "71;40;54;101;54", "wc_weaknesses": "73;206;39;183;572", "wc_questions": "10;75;2;5;6", "wc_limitations": "6;21;13;5;29", "wc_review": "232;383;201;485;753", "wc_reply_reviewers": "34;39;26;0;52", "wc_reply_authors": "226;203;253;148;156", "reply_reviewers": "1;1;1;0;1", "reply_authors": "3;3;3;2;3", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 97.8, 50.26887705131277 ], "wc_strengths_avg": [ 64.0, 20.94755355644186 ], "wc_weaknesses_avg": [ 214.6, 189.57489285240277 ], "wc_questions_avg": [ 19.6, 27.817979797246238 ], "wc_limitations_avg": [ 14.8, 9.130169768410662 ], "wc_review_avg": [ 410.8, 199.68214742435038 ], "wc_reply_reviewers_avg": [ 30.2, 17.302023003105734 ], "wc_reply_authors_avg": [ 197.2, 40.236302016959755 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.8, 0.39999999999999997 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7495598408674675634&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "dynamo.ai;dynamo.ai;;;", "author_num": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Dynamo AI", "aff_unique_dep": "", "aff_unique_url": "https://www.dynamo.ai", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning to Compress Prompts with Gist Tokens", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72965", "id": "2DtxPCL3T5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3d77c6dcc7f143aa2154e7f4d5e22d68-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2DtxPCL3T5", "openreview": "https://openreview.net/forum?id=2DtxPCL3T5", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72965", "video": "https://nips.cc/virtual/2023/poster/72965", "author_site": "Jesse Mu, Xiang Li, Noah Goodman", "tldr": "", "abstract": "Prompting is the primary way to utilize the multitask capabilities of language models (LMs), but prompts occupy valuable space in the input context window, and repeatedly encoding the same prompt is computationally inefficient. Finetuning and distillation methods allow for specialization of LMs without prompting, but require retraining the model for each task. To avoid this trade-off entirely, we present gisting, which trains an LM to compress prompts into smaller sets of \"gist\" tokens which can be cached and reused for compute efficiency. Gist models can be trained with no additional cost over standard instruction finetuning by simply modifying Transformer attention masks to encourage prompt compression. On decoder (LLaMA-7B) and encoder-decoder (FLAN-T5-XXL) LMs, gisting enables up to 26x compression of prompts, resulting in up to 40% FLOPs reductions, 4.2% wall time speedups, and storage savings, all with minimal loss in output quality.", "keywords": "language models;instruction finetuning;prompt compression;distillation;context distillation;prompting;soft prompting;efficiency", "primary_area": "", "supplementary_material": "/attachment/a3222889913e890e0678317f13f8a9717bdc1613.zip", "author": "Jesse Mu;Xiang Lisa Li;Noah Goodman", "authorids": "~Jesse_Mu1;~Xiang_Lisa_Li1;~Noah_Goodman1", "gender": ";F;", "homepage": "https://www.jesse.mu/;https://xiangli1999.github.io;https://cocolab.stanford.edu/", "dblp": "205/9022;40/1491-63;96/1216", "google_scholar": "djLcGEQAAAAJ;nzA4P0oAAAAJ;OUpIbcQAAAAJ", "orcid": "0000-0002-0812-2710;;", "linkedin": "jayelm;;", "or_profile": "~Jesse_Mu1;~Xiang_Lisa_Li1;~Noah_Goodman1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nmu2023learning,\ntitle={Learning to Compress Prompts with Gist Tokens},\nauthor={Jesse Mu and Xiang Lisa Li and Noah Goodman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2DtxPCL3T5}\n}", "github": "", "project": "", "reviewers": "nmKf;tZ74;W7yi;3Sa1", "pdf_size": 1360447, "rating": "5;5;6;7", "confidence": "4;4;3;4", "soundness": "2;2;2;4", "novelty": "3;2;2;3", "presentation": "3;3;4;3", "wc_summary": "95;55;195;113", "wc_strengths": "105;55;62;116", "wc_weaknesses": "149;59;367;169", "wc_questions": "18;20;117;17", "wc_limitations": "92;17;18;8", "wc_review": "459;206;759;423", "wc_reply_reviewers": "64;48;92;31", "wc_reply_authors": "79;66;121;50", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 114.5, 50.997548960709864 ], "wc_strengths_avg": [ 84.5, 26.405491853021786 ], "wc_weaknesses_avg": [ 186.0, 112.4144118874444 ], "wc_questions_avg": [ 43.0, 42.737571292716204 ], "wc_limitations_avg": [ 33.75, 33.855391003501936 ], "wc_review_avg": [ 461.75, 197.02458602925677 ], "wc_reply_reviewers_avg": [ 58.75, 22.465250944514285 ], "wc_reply_authors_avg": [ 79.0, 26.334388164527386 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 198, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9126473043769056301&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "stanford.edu;stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Any-to-Any Generation via Composable Diffusion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72964", "id": "2EDqbSCnmF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/33edf072fe44f19079d66713a1831550-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2EDqbSCnmF", "openreview": "https://openreview.net/forum?id=2EDqbSCnmF", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72964", "video": "https://nips.cc/virtual/2023/poster/72964", "author_site": "Zineng Tang, Ziyi Yang, Chenguang Zhu, Michael Zeng, Mohit Bansal", "tldr": "", "abstract": "We present Composable Diffusion (CoDi), a novel generative model capable of generating any combination of output modalities, such as language, image, video, or audio, from any combination of input modalities. Unlike existing generative AI systems, CoDi can generate multiple modalities in parallel and its input is not limited to a subset of modalities like text or image. Despite the absence of training datasets for many combinations of modalities, we propose to align modalities in both the input and output space. This allows CoDi to freely condition on any input combination and generate any group of modalities, even if they are not present in the training data. CoDi employs a novel composable generation strategy which involves building a shared multimodal space by bridging alignment in the diffusion process, enabling the synchronized generation of intertwined modalities, such as temporally aligned video and audio. Highly customizable and flexible, CoDi achieves strong joint-modality generation quality, and outperforms or is on par with the unimodal state-of-the-art for single-modality synthesis.", "keywords": "Generative AI;Diffusion Model;Multimodal Generation;Audio-Video Generation", "primary_area": "", "supplementary_material": "/attachment/be6ff3127c8021636f141385d5adf5a1c2c7e286.zip", "author": "Zineng Tang;Ziyi Yang;Chenguang Zhu;Michael Zeng;Mohit Bansal", "authorids": "~Zineng_Tang1;~Ziyi_Yang1;~Chenguang_Zhu1;~Michael_Zeng1;~Mohit_Bansal2", "gender": "M;M;M;M;M", "homepage": "https://zinengtang.github.io/;;;https://www.microsoft.com/en-us/research/people/nzeng/;https://www.cs.unc.edu/~mbansal/", "dblp": "251/9569;;48/7536-1.html;232/1866-1.html;32/5243.html", "google_scholar": "bZy4vtwAAAAJ;JkyLIM0AAAAJ;1b2kKWoAAAAJ;;DN8QtscAAAAJ", "orcid": ";;;;", "linkedin": ";ziyi-yang;;michaelnanshanzeng/;", "or_profile": "~Zineng_Tang1;~Ziyi_Yang1;~Chenguang_Zhu1;~Michael_Zeng1;~Mohit_Bansal2", "aff": "University of California, Berkeley;Microsoft;Zoom;Microsoft;University of North Carolina at Chapel Hill", "aff_domain": "berkeley.edu;microsoft.com;zoom.us;microsoft.com;unc.edu", "position": "PhD student;Principal Researcher;Principal Researcher;Vice President Research Manager;Full Professor", "bibtex": "@inproceedings{\ntang2023anytoany,\ntitle={Any-to-Any Generation via Composable Diffusion},\nauthor={Zineng Tang and Ziyi Yang and Chenguang Zhu and Michael Zeng and Mohit Bansal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2EDqbSCnmF}\n}", "github": "", "project": "", "reviewers": "mZPw;dG2H;7M7p;hGLR", "pdf_size": 3399729, "rating": "5;6;6;7", "confidence": "5;4;4;3", "soundness": "3;3;2;4", "novelty": "3;3;3;4", "presentation": "3;3;2;3", "wc_summary": "81;166;162;81", "wc_strengths": "56;49;207;41", "wc_weaknesses": "110;37;298;17", "wc_questions": "147;4;204;2", "wc_limitations": "11;1;15;2", "wc_review": "405;257;886;143", "wc_reply_reviewers": "37;9;0;9", "wc_reply_authors": "169;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 122.5, 41.52408939398912 ], "wc_strengths_avg": [ 88.25, 68.7654528088051 ], "wc_weaknesses_avg": [ 115.5, 110.9064921454105 ], "wc_questions_avg": [ 89.25, 88.5758855445431 ], "wc_limitations_avg": [ 7.25, 5.931905258852336 ], "wc_review_avg": [ 422.75, 283.1292770096374 ], "wc_reply_reviewers_avg": [ 13.75, 13.91716565971678 ], "wc_reply_authors_avg": [ 42.25, 73.17914661978507 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 184, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11699928415124917361&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 8, "email": "berkeley.edu;microsoft.com;zoom.us;microsoft.com;unc.edu", "author_num": 5, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "University of California, Berkeley;Microsoft;Zoom Video Communications Inc.;University of North Carolina", "aff_unique_dep": ";Microsoft Corporation;;", "aff_unique_url": "https://www.berkeley.edu;https://www.microsoft.com;https://zoom.us;https://www.unc.edu", "aff_unique_abbr": "UC Berkeley;Microsoft;Zoom;UNC", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Berkeley;;Chapel Hill", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Improved Bayesian Regret Bounds for Thompson Sampling in Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72963", "id": "2EVTB1idyR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4a17cd29ced0443bcff689fbb0d32d5e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2EVTB1idyR", "openreview": "https://openreview.net/forum?id=2EVTB1idyR", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72963", "video": "https://nips.cc/virtual/2023/poster/72963", "author_site": "Ahmadreza Moradipari, Mohammad Pedramfar, Modjtaba Shokrian Zini, Vaneet Aggarwal", "tldr": "", "abstract": "In this paper, we prove state-of-the-art Bayesian regret bounds for Thompson Sampling in reinforcement learning in a multitude of settings. We present a refined analysis of the information ratio, and show an upper bound of order $\\widetilde{O}(H\\sqrt{d_{l_1}T})$ in the time inhomogeneous reinforcement learning problem where $H$ is the episode length and $d_{l_1}$ is the Kolmogorov $l_1-$dimension of the space of environments. We then find concrete bounds of $d_{l_1}$ in a variety of settings, such as tabular, linear and finite mixtures, and discuss how our results improve the state-of-the-art.", "keywords": "Thompson Sampling;Reinforcement Learning;Bayesian Regret", "primary_area": "", "supplementary_material": "/attachment/1b3b61ca37ccd3144270305fe184e5dbcec2f975.pdf", "author": "Ahmadreza Moradipari;Mohammad Pedramfar;Modjtaba Shokrian Zini;Vaneet Aggarwal", "authorids": "~Ahmadreza_Moradipari1;~Mohammad_Pedramfar1;~Modjtaba_Shokrian_Zini1;~Vaneet_Aggarwal1", "gender": "M;;;M", "homepage": ";;;", "dblp": "230/4643;;;91/6560", "google_scholar": "https://scholar.google.com/citations?hl=en;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ahmadreza_Moradipari1;~Mohammad_Pedramfar1;~Modjtaba_Shokrian_Zini1;~Vaneet_Aggarwal1", "aff": "Toyota Motor Corporation;;;Purdue University", "aff_domain": "toyota.com;;;purdue.edu", "position": "Research Scientist;;;Full Professor", "bibtex": "@inproceedings{\nmoradipari2023improved,\ntitle={Improved Bayesian Regret Bounds for Thompson Sampling in Reinforcement Learning},\nauthor={Ahmadreza Moradipari and Mohammad Pedramfar and Modjtaba Shokrian Zini and Vaneet Aggarwal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2EVTB1idyR}\n}", "github": "", "project": "", "reviewers": "4ohk;DrLb;pUAk;z7jd;bSE3", "pdf_size": 377200, "rating": "5;6;6;7;7", "confidence": "2;2;3;2;2", "soundness": "3;3;2;3;3", "novelty": "2;3;2;3;3", "presentation": "3;2;2;3;3", "wc_summary": "28;78;37;45;211", "wc_strengths": "47;52;35;56;86", "wc_weaknesses": "68;83;98;77;93", "wc_questions": "59;230;228;28;44", "wc_limitations": "1;108;24;6;47", "wc_review": "203;551;422;212;481", "wc_reply_reviewers": "10;25;67;16;37", "wc_reply_authors": "19;0;18;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;2;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 2.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 79.8, 67.73891053154014 ], "wc_strengths_avg": [ 55.2, 16.93989374228776 ], "wc_weaknesses_avg": [ 83.8, 10.796295661012623 ], "wc_questions_avg": [ 117.8, 91.32447645620532 ], "wc_limitations_avg": [ 37.2, 38.90192797278819 ], "wc_review_avg": [ 373.8, 141.821578047912 ], "wc_reply_reviewers_avg": [ 31.0, 20.169283576765935 ], "wc_reply_authors_avg": [ 7.4, 9.068627239003707 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.13363062095621223, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18066739382380542517&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "toyota.com;;;purdue.edu", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Toyota Motor Corporation;Purdue University", "aff_unique_dep": ";", "aff_unique_url": "https://www.toyota-global.com;https://www.purdue.edu", "aff_unique_abbr": "Toyota;Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Japan;United States" }, { "title": "ViSt3D: Video Stylization with 3D CNN", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72962", "id": "2EiqizElGO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8203a5156918d467328d5a90147ab307-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2EiqizElGO", "openreview": "https://openreview.net/forum?id=2EiqizElGO", "poster": "/media/PosterPDFs/NeurIPS%202023/72962.png?t=1702106937.2619047", "slides": "https://nips.cc/virtual/2023/poster/72962", "video": "https://nips.cc/virtual/2023/poster/72962", "author_site": "Ayush Pande, Gaurav Sharma", "tldr": "", "abstract": "Visual stylization has been a very popular research area in recent times. While image stylization has seen a rapid advancement in the recent past, video stylization, while being more challenging, is relatively less explored. The immediate method of stylizing videos by stylizing each frame independently has been tried with some success. To the best of our knowledge, we present the first approach to video stylization using 3D CNN directly, building upon insights from 2D image stylization. Stylizing video is highly challenging, as the appearance and video motion, which includes both camera and subject motions, are inherently entangled in the representations learnt by a 3D CNN. Hence, a naive extension of 2D CNN stylization methods to 3D CNN does not work. To perform stylization with 3D CNN, we propose to explicitly disentangle motion and appearance, stylize the appearance part, and then add back the motion component and decode the final stylized video. In addition, we propose a dataset, curated from existing datasets, to train video stylization networks. We also provide an independently collected test set to study the generalization of video stylization methods. We provide results on this test dataset comparing the proposed method with 2D stylization methods applied frame by frame. We show successful stylization with 3D CNN for the first time, and obtain better stylization in terms of texture cf.\\ the existing 2D methods.", "keywords": "Video style transfer", "primary_area": "", "supplementary_material": "/attachment/ddbc45f586b77083066d8466740f5c2b33f7e5a2.zip", "author": "Ayush Pande;Gaurav Sharma", "authorids": "~Ayush_Pande1;~Gaurav_Sharma1", "gender": "M;M", "homepage": ";http://www.grvsharma.com/research.html", "dblp": "369/7183;s/GauravSharma4", "google_scholar": "HYr7bkAAAAAJ;tmZ8MaAAAAAJ", "orcid": ";", "linkedin": "ayush-pande-a296a063;gaurav-sharma-b9b0b13", "or_profile": "~Ayush_Pande1;~Gaurav_Sharma1", "aff": "IIT Kanpur, Indian Institute of Technology, Kanpur;TensorTour Inc.", "aff_domain": "cse.iitk.ac.in;tensortour.com", "position": "PhD student;CEO", "bibtex": "@inproceedings{\npande2023vistd,\ntitle={ViSt3D: Video Stylization with 3D {CNN}},\nauthor={Ayush Pande and Gaurav Sharma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2EiqizElGO}\n}", "github": "", "project": "", "reviewers": "qZ5E;fJLx;Syan;9xQH", "pdf_size": 0, "rating": "4;5;5;6", "confidence": "4;3;4;3", "soundness": "3;2;2;3", "novelty": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "29;80;58;52", "wc_strengths": "32;73;43;76", "wc_weaknesses": "171;45;205;54", "wc_questions": "59;132;48;12", "wc_limitations": "53;92;56;6", "wc_review": "344;422;410;200", "wc_reply_reviewers": "0;15;75;24", "wc_reply_authors": "0;15;53;24", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;3;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 54.75, 18.15729880791744 ], "wc_strengths_avg": [ 56.0, 18.934096228761486 ], "wc_weaknesses_avg": [ 118.75, 70.3575688892105 ], "wc_questions_avg": [ 62.75, 43.59687488800086 ], "wc_limitations_avg": [ 51.75, 30.548117781624452 ], "wc_review_avg": [ 344.0, 88.28363381737297 ], "wc_reply_reviewers_avg": [ 28.5, 28.182441342083905 ], "wc_reply_authors_avg": [ 23.0, 19.32614809008769 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12010508149132965729&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cse.iitk.ac.in;tensortour.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Indian Institute of Technology Kanpur;TensorTour Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitk.ac.in;", "aff_unique_abbr": "IITK;", "aff_campus_unique_index": "0", "aff_campus_unique": "Kanpur;", "aff_country_unique_index": "0;1", "aff_country_unique": "India;United States" }, { "title": "A General Theory of Correct, Incorrect, and Extrinsic Equivariance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72961", "id": "2FMJtNDLeE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7dc7793c89b93887e126a86f22ef63c6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2FMJtNDLeE", "openreview": "https://openreview.net/forum?id=2FMJtNDLeE", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72961", "video": "https://nips.cc/virtual/2023/poster/72961", "author_site": "Dian Wang, Xupeng Zhu, Jung Yeon Park, Mingxi Jia, Guanang Su, Robert Platt, Robin Walters", "tldr": "", "abstract": "Although equivariant machine learning has proven effective at many tasks, success depends heavily on the assumption that the ground truth function is symmetric over the entire domain matching the symmetry in an equivariant neural network. A missing piece in the equivariant learning literature is the analysis of equivariant networks when symmetry exists only partially in the domain. In this work, we present a general theory for such a situation. We propose pointwise definitions of correct, incorrect, and extrinsic equivariance, which allow us to quantify continuously the degree of each type of equivariance a function displays. We then study the impact of various degrees of incorrect or extrinsic symmetry on model error. We prove error lower bounds for invariant or equivariant networks in classification or regression settings with partially incorrect symmetry. We also analyze the potentially harmful effects of extrinsic equivariance. Experiments validate these results in three different environments.", "keywords": "Equivariance;Deep Learning;Error Bound;Symmetry", "primary_area": "", "supplementary_material": "", "author": "Dian Wang;Xupeng Zhu;Jung Yeon Park;Mingxi Jia;Guanang Su;Robert Platt;Robin Walters", "authorids": "~Dian_Wang1;~Xupeng_Zhu1;~Jung_Yeon_Park1;~Mingxi_Jia1;su.gu@northeastern.edu;~Robert_Platt1;~Robin_Walters1", "gender": "M;M;M;M;;;M", "homepage": "https://pointw.github.io/;https://zxp-s-works.github.io/;;https://saulbatman.github.io/;;http://www.ccs.neu.edu/home/rplatt/;http://www.robinwalters.com", "dblp": "191/1369-1;257/4426;240/2704;315/4688;;39/5434;258/3416", "google_scholar": "CckjtfQAAAAJ;mwxz-8MAAAAJ;LZSRm9sAAAAJ;1iNSPQIAAAAJ;;Z4Y5S2oAAAAJ;fnprJmUAAAAJ", "orcid": ";;;;;;", "linkedin": "dianwang1007;xupengzhu-skunk;;https://www.linkedin.com/mwlite/in/mingxi-jia-6997b9183;;;", "or_profile": "~Dian_Wang1;~Xupeng_Zhu1;~Jung_Yeon_Park1;~Mingxi_Jia1;su.gu@northeastern.edu;~Robert_Platt1;~Robin_Walters1", "aff": "Boston Dynamics AI Institute;Boston Dynamics AI Institute;Northeastern University;Northeastern University;;Northeastern University;Northeastern University ", "aff_domain": "theaiinstitute.com;theaiinstitute.com;northeastern.edu;northeastern.edu;;neu.edu;northeastern.edu", "position": "Intern;Intern;PhD student;MS student;;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023a,\ntitle={A General Theory of Correct, Incorrect, and Extrinsic Equivariance},\nauthor={Dian Wang and Xupeng Zhu and Jung Yeon Park and Mingxi Jia and Guanang Su and Robert Platt and Robin Walters},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2FMJtNDLeE}\n}", "github": "", "project": "", "reviewers": "c3pV;1dq7;yJf8;bWzk;racn;FMJP", "pdf_size": 5912066, "rating": "5;5;6;7;7;8", "confidence": "3;4;2;4;2;4", "soundness": "2;3;3;3;3;3", "novelty": "2;2;3;3;2;3", "presentation": "2;3;3;3;3;3", "wc_summary": "76;51;125;430;71;83", "wc_strengths": "67;51;86;117;44;91", "wc_weaknesses": "490;143;114;83;78;455", "wc_questions": "51;35;19;1;3;8", "wc_limitations": "9;34;12;1;1;23", "wc_review": "693;314;356;632;197;660", "wc_reply_reviewers": "588;0;29;0;0;132", "wc_reply_authors": "645;0;0;0;0;19", "reply_reviewers": "1;0;1;0;0;1", "reply_authors": "2;1;1;1;1;2", "rating_avg": [ 6.333333333333333, 1.1055415967851332 ], "confidence_avg": [ 3.1666666666666665, 0.8975274678557507 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 139.33333333333334, 131.87704711417965 ], "wc_strengths_avg": [ 76.0, 24.953289696283868 ], "wc_weaknesses_avg": [ 227.16666666666666, 175.07847446851432 ], "wc_questions_avg": [ 19.5, 18.1636083052533 ], "wc_limitations_avg": [ 13.333333333333334, 11.86966254317657 ], "wc_review_avg": [ 475.3333333333333, 193.11625744325332 ], "wc_reply_reviewers_avg": [ 124.83333333333333, 212.33182888007056 ], "wc_reply_authors_avg": [ 110.66666666666667, 239.0618237100093 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.11197850219117082, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1701784616901517786&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "theaiinstitute.com;theaiinstitute.com;northeastern.edu;northeastern.edu;;neu.edu;northeastern.edu", "author_num": 7, "aff_unique_index": "0;0;1;1;1;1", "aff_unique_norm": "Boston Dynamics AI Institute;Northeastern University", "aff_unique_dep": "AI Institute;", "aff_unique_url": "https://www.bostondynamics.com/;https://www.northeastern.edu", "aff_unique_abbr": "BD AI;NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Constructing Non-isotropic Gaussian Diffusion Model Using Isotropic Gaussian Diffusion Model for Image Editing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72960", "id": "2Ibp83esmb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f25602918e8a0d0c86e3c752ecfbbaa1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2Ibp83esmb", "openreview": "https://openreview.net/forum?id=2Ibp83esmb", "poster": "/media/PosterPDFs/NeurIPS%202023/72960.png?t=1699450849.0444105", "slides": "https://nips.cc/virtual/2023/poster/72960", "video": "https://nips.cc/virtual/2023/poster/72960", "author_site": "Xi Yu, Xiang Gu, Haozhi Liu, Jian Sun", "tldr": "", "abstract": "Score-based diffusion models (SBDMs) have achieved state-of-the-art results in image generation. In this paper, we propose a Non-isotropic Gaussian Diffusion Model (NGDM) for image editing, which requires editing the source image while preserving the image regions irrelevant to the editing task. We construct NGDM by adding independent Gaussian noises with different variances to different image pixels. Instead of specifically training the NGDM, we rectify the NGDM into an isotropic Gaussian diffusion model with different pixels having different total forward diffusion time. We propose to reverse the diffusion by designing a sampling method that starts at different time for different pixels for denoising to generate images using the pre-trained isotropic Gaussian diffusion model. Experimental results show that NGDM achieves state-of-the-art performance for image editing tasks, considering the trade-off between the fidelity to the source image and alignment with the desired editing target.", "keywords": "score-based diffusion model;non-isotropic Gaussian diffusion model;image editing", "primary_area": "", "supplementary_material": "/attachment/62aae03f5cf452ec765f0134904c9401dabd4abf.pdf", "author": "Xi Yu;Xiang Gu;Haozhi Liu;Jian Sun", "authorids": "~Xi_Yu3;~Xiang_Gu1;~Haozhi_Liu1;~Jian_Sun1", "gender": "F;M;;M", "homepage": "https://blog.csdn.net/weixin_56184300?spm=1000.2115.3001.5343;https://xjtu-xgu.github.io/xianggu/;http://www.xjtu.edu.cn/;https://gr.xjtu.edu.cn/en/web/jiansun/publications", "dblp": ";57/7710-5;;68/4942-9.html", "google_scholar": "https://scholar.google.com/citations?hl=en;51GDv0EAAAAJ;;SSgNWOMAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xi_Yu3;~Xiang_Gu1;~Haozhi_Liu1;~Jian_Sun1", "aff": "Alibaba Group;Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University", "aff_domain": "alibaba-inc.com;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn", "position": "Intern;PhD student;MS student;Professor", "bibtex": "@inproceedings{\nyu2023constructing,\ntitle={Constructing Non-isotropic Gaussian Diffusion Model Using Isotropic Gaussian Diffusion Model for Image Editing},\nauthor={Xi Yu and Xiang Gu and Haozhi Liu and Jian Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2Ibp83esmb}\n}", "github": "", "project": "", "reviewers": "LtuA;ev4D;rj2K;Cf2x;Jr5M;xHHG", "pdf_size": 48276734, "rating": "4;5;5;5;5;6", "confidence": "4;1;3;4;3;3", "soundness": "3;2;3;3;3;3", "novelty": "2;2;2;3;3;3", "presentation": "2;2;3;2;3;3", "wc_summary": "242;298;13;61;88;49", "wc_strengths": "40;255;13;37;148;39", "wc_weaknesses": "72;302;44;440;149;88", "wc_questions": "31;277;4;10;63;2", "wc_limitations": "36;245;1;23;77;2", "wc_review": "421;1377;75;571;525;180", "wc_reply_reviewers": "0;7;11;140;29;0", "wc_reply_authors": "435;17;17;1924;82;0", "reply_reviewers": "0;1;1;1;1;0", "reply_authors": "2;2;2;4;3;1", "rating_avg": [ 5.0, 0.5773502691896257 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 125.16666666666667, 105.9896483415034 ], "wc_strengths_avg": [ 88.66666666666667, 86.04004235754935 ], "wc_weaknesses_avg": [ 182.5, 142.61340516702253 ], "wc_questions_avg": [ 64.5, 97.31178414423061 ], "wc_limitations_avg": [ 64.0, 84.86067012069451 ], "wc_review_avg": [ 524.8333333333334, 420.3888742052477 ], "wc_reply_reviewers_avg": [ 31.166666666666668, 49.63673594783964 ], "wc_reply_authors_avg": [ 412.5, 692.5033694262193 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.28867513459481287, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12569877159881970118&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 4, "email": "alibaba-inc.com;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Alibaba Group;Xi'an Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.xjtu.edu.cn", "aff_unique_abbr": "Alibaba;XJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Unifying Predictions of Deterministic and Stochastic Physics in Mesh-reduced Space with Sequential Flow Generative Model", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72959", "id": "2JtwuJtoa0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bea78e2bb0abccc14404b24b90d9299f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2JtwuJtoa0", "openreview": "https://openreview.net/forum?id=2JtwuJtoa0", "poster": "/media/PosterPDFs/NeurIPS%202023/72959.png?t=1701672242.9638832", "slides": "https://nips.cc/virtual/2023/poster/72959", "video": "https://nips.cc/virtual/2023/poster/72959", "author_site": "Luning Sun, Xu Han, Han Gao, Jian-Xun Wang, Liping Liu", "tldr": "", "abstract": "Accurate prediction of dynamical systems in unstructured meshes has recently shown successes in scientific simulations. Many dynamical systems have a nonnegligible level of stochasticity introduced by various factors (e.g. chaoticity), so there is a need for a unified framework that captures both deterministic and stochastic components in the rollouts of these systems. Inspired by regeneration learning, we propose a new model that combines generative and sequential networks to model dynamical systems. Specifically, we use an autoencoder to learn compact representations of full-space physical variables in a low-dimensional space. We then integrate a transformer with a conditional normalizing flow model to model the temporal sequence of latent representations. We evaluate the new model in both deterministic and stochastic systems. The model outperforms several competitive baseline models and makes more accurate predictions of deterministic systems. Its own prediction error is also reflected in its uncertainty estimations. When predicting stochastic systems, the proposed model generates high-quality rollout samples. The mean and variance of these samples well match the statistics of samples computed from expensive numerical simulations.", "keywords": "AI4Science;Fluid Dynamics;Generative Models;Graph Neural Network", "primary_area": "", "supplementary_material": "", "author": "Luning Sun;Xu Han;Han Gao;Jian-Xun Wang;Liping Liu", "authorids": "~Luning_Sun1;~Xu_Han9;~Han_Gao3;~Jian-Xun_Wang1;~Liping_Liu1", "gender": "M;M;M;;M", "homepage": ";https://gaohan1234.github.io/;http://sites.nd.edu/jianxun-wang/;https://www.eecs.tufts.edu/~liulp/;", "dblp": "284/2269;;163/4396;47/5615-1;", "google_scholar": "Bssgd2gAAAAJ;ozQz4CQAAAAJ;1cXHUD4AAAAJ;https://scholar.google.com/citations?hl=en;eFsFAJoAAAAJ", "orcid": "0000-0002-9568-1165;0000-0002-7733-8996;;0000-0002-3690-3928;", "linkedin": "https://www.linkedin.com/feed/;%E6%B6%B5-han-%E9%AB%98-gao-87038a143/;;;", "or_profile": "~Luning_Sun1;~Han_Gao3;~Jian-Xun_Wang1;~Liping_Liu1;~XU_HAN6", "aff": "University of Notre Dame;University of Notre Dame;University of Notre Dame;Tufts University;Tufts University", "aff_domain": "nd.edu;nd.edu;nd.edu;tufts.edu;tufts.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nsun2023unifying,\ntitle={Unifying Predictions of Deterministic and Stochastic Physics in Mesh-reduced Space with Sequential Flow Generative Model},\nauthor={Luning Sun and Xu Han and Han Gao and Jian-Xun Wang and Liping Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2JtwuJtoa0}\n}", "github": "", "project": "", "reviewers": "9WF1;8v2N;YdeK;fgSk", "pdf_size": 8779232, "rating": "5;6;8;8", "confidence": "4;4;3;5", "soundness": "3;4;4;4", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "129;66;120;66", "wc_strengths": "85;46;30;189", "wc_weaknesses": "245;33;193;24", "wc_questions": "166;66;38;211", "wc_limitations": "14;27;8;33", "wc_review": "639;238;389;523", "wc_reply_reviewers": "35;0;373;151", "wc_reply_authors": "278;0;516;90", "reply_reviewers": "1;0;3;2", "reply_authors": "3;1;4;2", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 95.25, 29.422567868899545 ], "wc_strengths_avg": [ 87.5, 61.92132104533946 ], "wc_weaknesses_avg": [ 123.75, 97.0602261485105 ], "wc_questions_avg": [ 120.25, 70.77561373806658 ], "wc_limitations_avg": [ 20.5, 9.962429422585638 ], "wc_review_avg": [ 447.25, 149.7370612106435 ], "wc_reply_reviewers_avg": [ 139.75, 145.80359220540487 ], "wc_reply_authors_avg": [ 221.0, 197.6587969203496 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7071824759019170871&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "nd.edu;nd.edu;nd.edu;tufts.edu;tufts.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;1", "aff_unique_norm": "University of Notre Dame;Tufts University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nd.edu;https://www.tufts.edu", "aff_unique_abbr": "Notre Dame;Tufts", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2MRz5bSnan", "title": "Permutation Decision Trees using Structural Impurity", "track": "main", "status": "Reject", "tldr": "", "abstract": "Decision Tree is a well understood Machine Learning model that is based on minimizing impurities in the internal nodes. The most common impurity measures are \\emph{Shannon entropy} and \\emph{Gini impurity}. These impurity measures are insensitive to the order of training data and hence the final tree obtained is invariant to a permutation of the data. This leads to a serious limitation in modeling data instances that have order dependencies. In this work, we use~\\emph{Effort-To-Compress} (ETC) - a complexity measure, for the first time, as an impurity measure. Unlike Shannon entropy and Gini impurity, structural impurity based on ETC is able to capture order dependencies in the data, thus obtaining potentially different decision trees for different permutation of the same data instances (\\emph{Permutation Decision Trees}). We then introduce the notion of {\\it Permutation Bagging} achieved using permutation decision trees without the need for random feature selection and sub-sampling. We compare the performance of the proposed permutation bagged decision trees with Random Forest. Our model does not assume independent and identical distribution of data instances. Potential applications include scenarios where a temporal order is present in the data instances. ", "keywords": "Decision Tree;Effort-To-Compress;Structural Impurity;Permutation Bagging;Machine Learning", "primary_area": "", "supplementary_material": "", "author": "Harikrishnan N B;Nithin Nagaraj", "authorids": "~Harikrishnan_N_B1;~Nithin_Nagaraj1", "gender": "M;M", "homepage": "https://sites.google.com/site/harikrishnannb8/home;https://sites.google.com/site/nithinnagaraj2/", "dblp": ";", "google_scholar": "9fMmKMEAAAAJ;https://scholar.google.co.in/citations?hl=en", "orcid": "0000-0002-4575-3968;0000-0003-0097-4131", "linkedin": ";nithin-nagaraj-14b07934/?trk=profile-badge&originalSubdomain=in", "or_profile": "~Harikrishnan_N_B1;~Nithin_Nagaraj1", "aff": "BITS Pilani KK Birla Goa Campus;National Institute of Advanced Studies", "aff_domain": "goa.bits-pilani.ac.in;nias.res.in", "position": "Visiting Faculty;Associate Professor", "bibtex": "@misc{\nb2023permutation,\ntitle={Permutation Decision Trees using Structural Impurity},\nauthor={Harikrishnan N B and Nithin Nagaraj},\nyear={2023},\nurl={https://openreview.net/forum?id=2MRz5bSnan}\n}", "github": "", "project": "", "reviewers": "7dHM;Q9hV;maQx;iLmx", "site": "https://openreview.net/forum?id=2MRz5bSnan", "pdf_size": 212179, "rating": "2;2;3;4", "confidence": "4;5;4;5", "soundness": "1;1;2;2", "novelty": "1;1;2;4", "presentation": "2;1;3;2", "wc_summary": "52;72;77;122", "wc_strengths": "21;27;61;82", "wc_weaknesses": "146;307;134;402", "wc_questions": "32;28;31;97", "wc_limitations": "9;36;13;61", "wc_review": "260;470;316;764", "wc_reply_reviewers": "139;298;165;225", "wc_reply_authors": "16;0;109;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 2.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 1.5, 0.5 ], "novelty_avg": [ 2.0, 1.224744871391589 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 80.75, 25.586861863073402 ], "wc_strengths_avg": [ 47.75, 24.973736204260668 ], "wc_weaknesses_avg": [ 247.25, 112.46638386646919 ], "wc_questions_avg": [ 47.0, 28.905016865589268 ], "wc_limitations_avg": [ 29.75, 20.777090749188154 ], "wc_review_avg": [ 452.5, 195.59332810707016 ], "wc_reply_reviewers_avg": [ 206.75, 61.22244278040529 ], "wc_reply_authors_avg": [ 31.25, 45.36174048689049 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zoPP1JqEhpoJ:scholar.google.com/&scioq=Permutation+Decision+Trees+using+Structural+Impurity&hl=en&as_sdt=0,6", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Birla Institute of Technology and Science, Pilani;National Institute of Advanced Studies", "aff_unique_dep": ";", "aff_unique_url": "https://www.bits-pilani.ac.in/goa/;https://www.nias.res.in", "aff_unique_abbr": "BITS Pilani;NIAS", "aff_campus_unique_index": "0", "aff_campus_unique": "Goa;", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "title": "Gaussian Membership Inference Privacy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72958", "id": "2NUFe4TZMS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e9df36b21ff4ee211a8b71ee8b7e9f57-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2NUFe4TZMS", "openreview": "https://openreview.net/forum?id=2NUFe4TZMS", "poster": "/media/PosterPDFs/NeurIPS%202023/72958.png?t=1701176600.713507", "slides": "https://nips.cc/virtual/2023/poster/72958", "video": "https://nips.cc/virtual/2023/poster/72958", "author_site": "Tobias Leemann, Martin Pawelczyk, Gjergji Kasneci", "tldr": "", "abstract": "We propose a novel and practical privacy notion called $f$-Membership Inference Privacy ($f$-MIP), which explicitly considers the capabilities of realistic adversaries under the membership inference attack threat model. Consequently, $f$-MIP offers interpretable privacy guarantees and improved utility (e.g., better classification accuracy). In particular, we derive a parametric family of $f$-MIP guarantees that we refer to as $\\mu$-Gaussian Membership Inference Privacy ($\\mu$-GMIP) by theoretically analyzing likelihood ratio-based membership inference attacks on stochastic gradient descent (SGD). Our analysis highlights that models trained with standard SGD already offer an elementary level of MIP. Additionally, we show how $f$-MIP can be amplified by adding noise to gradient updates. Our analysis further yields an analytical membership inference attack that offers two distinct advantages over previous approaches. First, unlike existing state-of-the-art attacks that require training hundreds of shadow models, our attack does not require any shadow model. Second, our analytical attack enables straightforward auditing of our privacy notion $f$-MIP. Finally, we quantify how various hyperparameters (e.g., batch size, number of model parameters) and specific data characteristics determine an attacker's ability to accurately infer a point's membership in the training set. We demonstrate the effectiveness of our method on models trained on vision and tabular datasets.", "keywords": "Privacy;Membership Inference Attacks", "primary_area": "", "supplementary_material": "/attachment/0cd892894cbbedc20dba34ccf75af65e435a52f4.zip", "author": "Tobias Leemann;Martin Pawelczyk;Gjergji Kasneci", "authorids": "~Tobias_Leemann1;~Martin_Pawelczyk1;~Gjergji_Kasneci2", "gender": "M;M;M", "homepage": "https://uni-tuebingen.de/en/209071;https://sites.google.com/view/martinpawelczyk/;https://www.gov.sot.tum.de/rds/prof-dr-gjergji-kasneci/", "dblp": "303/4480;251/3229;69/3216", "google_scholar": "VsNjvo0AAAAJ;oYAf_hgAAAAJ;Zbc8GK4AAAAJ", "orcid": "0000-0001-9333-228X;;0000-0002-3123-7268", "linkedin": "tobias-leemann/;;", "or_profile": "~Tobias_Leemann1;~Martin_Pawelczyk1;~Gjergji_Kasneci2", "aff": "University of Tuebingen;University of Tuebingen;University of Tuebingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "position": "PhD student;PhD student;Professor", "bibtex": "@inproceedings{\nleemann2023gaussian,\ntitle={Gaussian Membership Inference Privacy},\nauthor={Tobias Leemann and Martin Pawelczyk and Gjergji Kasneci},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2NUFe4TZMS}\n}", "github": "", "project": "", "reviewers": "PQQ4;vdUQ;Uu8E;qYbt;DVNo", "pdf_size": 666665, "rating": "4;5;5;5;5", "confidence": "3;3;4;3;4", "soundness": "3;3;2;3;2", "novelty": "3;3;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "118;69;144;67;247", "wc_strengths": "121;52;45;23;79", "wc_weaknesses": "161;75;163;43;179", "wc_questions": "4;75;90;98;76", "wc_limitations": "4;46;17;95;20", "wc_review": "408;317;459;326;601", "wc_reply_reviewers": "0;65;167;30;227", "wc_reply_authors": "0;589;253;0;117", "reply_reviewers": "0;2;2;1;1", "reply_authors": "1;2;2;1;2", "rating_avg": [ 4.8, 0.39999999999999997 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 129.0, 65.89992412742218 ], "wc_strengths_avg": [ 64.0, 33.645207682521445 ], "wc_weaknesses_avg": [ 124.2, 54.54686058793851 ], "wc_questions_avg": [ 68.6, 33.44009569364298 ], "wc_limitations_avg": [ 36.4, 32.314702536152176 ], "wc_review_avg": [ 422.2, 103.77552698011223 ], "wc_reply_reviewers_avg": [ 97.8, 85.69574085098978 ], "wc_reply_authors_avg": [ 191.8, 219.40045578804072 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11653729188483637&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Tuebingen", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Segment Anything in 3D with NeRFs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72957", "id": "2NkGfA66Ne", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/525d24400247f884c3419b0b7b1c4829-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2NkGfA66Ne", "openreview": "https://openreview.net/forum?id=2NkGfA66Ne", "poster": "/media/PosterPDFs/NeurIPS%202023/72957.png?t=1698758323.7419257", "slides": "https://nips.cc/virtual/2023/poster/72957", "video": "https://nips.cc/virtual/2023/poster/72957", "author_site": "Jiazhong Cen, Zanwei Zhou, Jiemin Fang, chen yang, Wei Shen, Lingxi Xie, Dongsheng Jiang, XIAOPENG ZHANG, Qi Tian", "tldr": "", "abstract": "Recently, the Segment Anything Model (SAM) emerged as a powerful vision foundation model which is capable to segment anything in 2D images. This paper aims to generalize SAM to segment 3D objects. Rather than replicating the data acquisition and annotation procedure which is costly in 3D, we design an efficient solution, leveraging the Neural Radiance Field (NeRF) as a cheap and off-the-shelf prior that connects multi-view 2D images to the 3D space. We refer to the proposed solution as SA3D, for Segment Anything in 3D. It is only required to provide a manual segmentation prompt (e.g., rough points) for the target object in a single view, which is used to generate its 2D mask in this view with SAM. Next, SA3D alternately performs mask inverse rendering and cross-view self-prompting across various views to iteratively complete the 3D mask of the target object constructed with voxel grids. The former projects the 2D mask obtained by SAM in the current view onto 3D mask with guidance of the density distribution learned by the NeRF; The latter extracts reliable prompts automatically as the input to SAM from the NeRF-rendered 2D mask in another view. We show in experiments that SA3D adapts to various scenes and achieves 3D segmentation within minutes. Our research offers a generic and efficient methodology to lift a 2D vision foundation model to 3D, as long as the 2D model can steadily address promptable segmentation across multiple views.", "keywords": "Segmentation;NeRF;3D segmentation", "primary_area": "", "supplementary_material": "/attachment/56e3c007a88efc9fde5d3c8ede400a2a125fdc5a.zip", "author": "Jiazhong Cen;Zanwei Zhou;Jiemin Fang;chen yang;Wei Shen;Lingxi Xie;Dongsheng Jiang;XIAOPENG ZHANG;Qi Tian", "authorids": "~Jiazhong_Cen1;~Zanwei_Zhou1;~Jiemin_Fang1;~chen_yang3;~Wei_Shen2;~Lingxi_Xie1;~Dongsheng_Jiang2;~XIAOPENG_ZHANG7;~Qi_Tian3", "gender": "M;M;M;M;M;M;M;M;M", "homepage": "https://github.com/Jumpat;;https://jaminfong.cn;https://chensjtu.github.io/;https://shenwei1231.github.io/;http://lingxixie.com/;https://sites.google.com/site/zxphistory/;https://www.qitian1987.com/index.html;https://sites.google.com/site/dongshengjiangbme/", "dblp": "307/6640;317/0397;233/1239;01/2478-23;71/3692-2;123/2869;;78/1467-1.html;85/8729", "google_scholar": ";;-JcFoOoAAAAJ;https://scholar.google.com/citations?hl=en;Ae2kRCEAAAAJ;EEMm7hwAAAAJ;Ud6aBAcAAAAJ;https://scholar.google.com/citations?hl=en;-eGIgsoAAAAJ", "orcid": ";0000-0003-2222-4016;;0000-0003-4496-7849;;;;0000-0002-7252-5047;", "linkedin": ";;;;;;;;", "or_profile": "~Jiazhong_Cen1;~Zanwei_Zhou1;~Jiemin_Fang1;~chen_yang3;~Wei_Shen2;~Lingxi_Xie1;~XIAOPENG_ZHANG7;~Qi_Tian3;~Dongsheng_Jiang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Huazhong University of Science and Technology;Shanghai Jiaotong University;Shanghai Jiaotong University;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;hust.edu.cn;sjtu.edu.cn;sjtu.edu.cn;huawei.com;huawei.com;huawei.com;huawei.com", "position": "MS student;PhD student;PhD student;PhD student;Associate Professor;Researcher;Principal Researcher;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\ncen2023segment,\ntitle={Segment Anything in 3D with Ne{RF}s},\nauthor={Jiazhong Cen and Zanwei Zhou and Jiemin Fang and chen yang and Wei Shen and Lingxi Xie and Dongsheng Jiang and XIAOPENG ZHANG and Qi Tian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2NkGfA66Ne}\n}", "github": "", "project": "", "reviewers": "jjUN;JhYG;b3JM;Xgwo", "pdf_size": 40750761, "rating": "5;5;7;7", "confidence": "4;5;5;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "4;3;4;3", "wc_summary": "115;126;164;128", "wc_strengths": "74;57;57;61", "wc_weaknesses": "226;449;82;248", "wc_questions": "70;41;15;91", "wc_limitations": "1;1;1;1", "wc_review": "486;674;319;529", "wc_reply_reviewers": "0;13;54;27", "wc_reply_authors": "67;36;27;21", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 133.25, 18.430613120566555 ], "wc_strengths_avg": [ 62.25, 6.977642868476432 ], "wc_weaknesses_avg": [ 251.25, 130.76577342714722 ], "wc_questions_avg": [ 54.25, 28.78693279944913 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 502.0, 126.54840970948628 ], "wc_reply_reviewers_avg": [ 23.5, 20.03122562401013 ], "wc_reply_authors_avg": [ 37.75, 17.711225254058512 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 165, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13456653540115363410&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;sjtu.edu.cn;hust.edu.cn;sjtu.edu.cn;sjtu.edu.cn;huawei.com;huawei.com;huawei.com;huawei.com", "author_num": 9, "aff_unique_index": "0;0;1;0;0;2;2;2;2", "aff_unique_norm": "Shanghai Jiao Tong University;Huazhong University of Science and Technology;Huawei", "aff_unique_dep": ";;Huawei Technologies", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.hust.edu.cn;https://www.huawei.com", "aff_unique_abbr": "SJTU;HUST;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "CoLLAT: On Adding Fine-grained Audio Understanding to Language Models using Token-Level Locked-Language Tuning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72956", "id": "2NncD8AaFK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c7b5a35ea98b62512a869c19ea7b03cb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2NncD8AaFK", "openreview": "https://openreview.net/forum?id=2NncD8AaFK", "poster": "/media/PosterPDFs/NeurIPS%202023/72956.png?t=1701512668.019242", "slides": "https://nips.cc/virtual/2023/poster/72956", "video": "https://nips.cc/virtual/2023/poster/72956", "author_site": "Dadallage A R Silva, Spencer Whitehead, Christopher Lengerich, Hugh Leather", "tldr": "", "abstract": "Humans can easily understand various audio concepts, but conventional audio classification models fail due to their inability to predict unseen classes during training. To address this challenge, recent literature has explored contrastive language-audio pretraining to learn an audio understanding model using natural language supervision from a pretrained language model. However, despite their reasonable zero-shot performance in audio understanding, these models typically fail to achieve optimal performance while preserving the text understanding capabilities of the pretrained language model. They also perform poorly when comprehending audio clips with multiple audio concepts. To bridge these gaps, we propose $CoLLAT$: $Co$ntrastive $L$ocked $L$anguage and $A$udio $T$uning. This is a framework to effectively learn an audio understanding model with a locked language model, which is learned using a novel pretraining objective for audio-to-text grounding to yield fine-grained audio understanding. Our extensive experiments, which include several downstream applications such as audio classification, cross-modal retrieval, and audio-guided image generation, demonstrate that $CoLLAT$ yields state-of-the-art performance for audio understanding. Additionally, it unlocks audio guidance to applications built on top of pretrained language models.", "keywords": "Audio Understanding;Contrastive Learning;Audio-Language Grounding", "primary_area": "", "supplementary_material": "", "author": "Amila Silva;Spencer Whitehead;Chris Lengerich;Hugh James Leather", "authorids": "~Amila_Silva1;~Spencer_Whitehead1;~Chris_Lengerich1;~Hugh_James_Leather1", "gender": "M;;;M", "homepage": ";;;https://homepages.inf.ed.ac.uk/hleather/", "dblp": ";;;", "google_scholar": "https://scholar.google.com.au/citations?hl=en;;;", "orcid": ";;;", "linkedin": "amila-silva-67360a101/;;;", "or_profile": "~Amila_Silva1;~Spencer_Whitehead1;~Chris_Lengerich1;~Hugh_James_Leather1", "aff": "University of Melbourne;;;Meta Facebook", "aff_domain": "unimelb.edu;;;fb.com", "position": "PhD student;;;Researcher", "bibtex": "@inproceedings{\nsilva2023collat,\ntitle={Co{LLAT}: On Adding Fine-grained Audio Understanding to Language Models using Token-Level Locked-Language Tuning},\nauthor={Amila Silva and Spencer Whitehead and Chris Lengerich and Hugh James Leather},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2NncD8AaFK}\n}", "github": "", "project": "", "reviewers": "GZR1;Ujf9;swwz;NoKh", "pdf_size": 6536695, "rating": "6;6;6;7", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "60;94;51;133", "wc_strengths": "47;136;61;90", "wc_weaknesses": "184;161;195;222", "wc_questions": "63;114;139;103", "wc_limitations": "32;49;1;1", "wc_review": "386;554;447;549", "wc_reply_reviewers": "50;84;94;0", "wc_reply_authors": "24;24;24;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.5, 32.26840560052511 ], "wc_strengths_avg": [ 83.5, 34.04776057246644 ], "wc_weaknesses_avg": [ 190.5, 21.937410968480304 ], "wc_questions_avg": [ 104.75, 27.407799984675894 ], "wc_limitations_avg": [ 20.75, 20.64430914319973 ], "wc_review_avg": [ 484.0, 70.88370757797591 ], "wc_reply_reviewers_avg": [ 57.0, 36.72873534441391 ], "wc_reply_authors_avg": [ 18.0, 10.392304845413264 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12324417822273957293&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "unimelb.edu;;;fb.com", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Melbourne;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.unimelb.edu.au;https://meta.com", "aff_unique_abbr": "UniMelb;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Australia;United States" }, { "title": "Group Robust Classification Without Any Group Information", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72955", "id": "2OcNWFHFpk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b0d9ceb3d11d013e55da201d2a2c07b2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2OcNWFHFpk", "openreview": "https://openreview.net/forum?id=2OcNWFHFpk", "poster": "/media/PosterPDFs/NeurIPS%202023/72955.png?t=1702165911.0703194", "slides": "https://nips.cc/virtual/2023/poster/72955", "video": "https://nips.cc/virtual/2023/poster/72955", "author_site": "Christos Tsirigotis, Joao Monteiro, Pau Rodriguez, David Vazquez, Aaron Courville", "tldr": "", "abstract": "Empirical risk minimization (ERM) is sensitive to spurious correlations present in training data, which poses a significant risk when deploying systems trained under this paradigm in high-stake applications. While the existing literature focuses on maximizing group-balanced or worst-group accuracy, estimating these quantities is hindered by costly bias annotations. This study contends that current bias-unsupervised approaches to group robustness continue to rely on group information to achieve optimal performance. Firstly, these methods implicitly assume that all group combinations are represented during training. To illustrate this, we introduce a systematic generalization task on the MPI3D dataset and discover that current algorithms fail to improve the ERM baseline when combinations of observed attribute values are missing. Secondly, bias labels are still crucial for effective model selection, restricting the practicality of these methods in real-world scenarios. To address these limitations, we propose a revised methodology for training and validating debiased models in an entirely bias-unsupervised manner. We achieve this by employing pretrained self-supervised models to reliably extract bias information, which enables the integration of a logit adjustment training loss with our validation criterion. Our empirical analysis on synthetic and real-world tasks provides evidence that our approach overcomes the identified challenges and consistently enhances robust accuracy, attaining performance which is competitive with or outperforms that of state-of-the-art methods, which, conversely, rely on bias labels for validation.", "keywords": "out-of-distribution generalization;robustness;fairness;spurious correlations;systematic generalization;model selection", "primary_area": "", "supplementary_material": "/attachment/d46b81a8828181bf4f77ba1470f7235d4f957b1b.zip", "author": "Christos Tsirigotis;Joao Monteiro;Pau Rodriguez;David Vazquez;Aaron Courville", "authorids": "~Christos_Tsirigotis1;~Joao_Monteiro1;~Pau_Rodriguez2;~David_Vazquez1;~Aaron_Courville3", "gender": ";M;M;;", "homepage": ";;http://www.david-vazquez.com;;https://prlz77.github.io", "dblp": "215/5173;215/5354-2;94/8653;56/1688;190/7735", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?hl=en;1jHvtfsAAAAJ;https://scholar.google.ca/citations?user=km6CP8cAAAAJ;https://scholar.google.es/citations?user=IwBx73wAAAAJ", "orcid": ";;0000-0002-2845-8158;;0000-0002-1689-8084", "linkedin": "tsirif/;joao-monteiro-47180256/;https://www.linkedin.com/company/david-vazquez/;;", "or_profile": "~Christos_Tsirigotis1;~Joao_Monteiro1;~David_Vazquez1;~Aaron_Courville3;~Pau_Rodriguez_Lopez1", "aff": "Mila, Quebec Artificial Intelligence Institute;ServiceNow Research;ServiceNow research;Universit\u00e9 de Montr\u00e9al;Apple", "aff_domain": "mila.quebec;servicenow.com;servicenow.com; ;apple.com", "position": "Researcher;Researcher;Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\ntsirigotis2023group,\ntitle={Group Robust Classification Without Any Group Information},\nauthor={Christos Tsirigotis and Joao Monteiro and Pau Rodriguez and David Vazquez and Aaron Courville},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2OcNWFHFpk}\n}", "github": "", "project": "", "reviewers": "Y39n;Tfdv;R4tq;knxG", "pdf_size": 786583, "rating": "5;6;6;6", "confidence": "4;4;3;3", "soundness": "3;3;3;3", "novelty": "2;4;3;4", "presentation": "2;3;4;2", "wc_summary": "92;39;202;71", "wc_strengths": "44;44;122;48", "wc_weaknesses": "212;118;103;203", "wc_questions": "12;108;37;74", "wc_limitations": "9;7;2;27", "wc_review": "369;316;466;423", "wc_reply_reviewers": "355;23;0;33", "wc_reply_authors": "1058;65;0;0", "reply_reviewers": "2;1;0;1", "reply_authors": "3;2;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 101.0, 61.29029286926275 ], "wc_strengths_avg": [ 64.5, 33.237779709240506 ], "wc_weaknesses_avg": [ 159.0, 48.89273974732854 ], "wc_questions_avg": [ 57.75, 36.44430682562093 ], "wc_limitations_avg": [ 11.25, 9.443913383762052 ], "wc_review_avg": [ 393.5, 56.420297765963625 ], "wc_reply_reviewers_avg": [ 102.75, 146.1272989553971 ], "wc_reply_authors_avg": [ 280.75, 449.52940671328724 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6788323167218348197&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "mila.quebec;servicenow.com;servicenow.com; ;apple.com", "author_num": 5, "aff_unique_index": "0;1;1;2;3", "aff_unique_norm": "Quebec Artificial Intelligence Institute;ServiceNow;Universit\u00e9 de Montr\u00e9al;Apple", "aff_unique_dep": "Artificial Intelligence;Research;;Apple Inc.", "aff_unique_url": "https://mila.quebec;https://www.servicenow.com;https://www.umontreal.ca;https://www.apple.com", "aff_unique_abbr": "Mila;ServiceNow;UdeM;Apple", "aff_campus_unique_index": "0", "aff_campus_unique": "Quebec;", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "Canada;United States" }, { "title": "DoWG Unleashed: An Efficient Universal Parameter-Free Gradient Descent Method", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72954", "id": "2RQhgx1WLA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/15ce36d35622f126f38e90167de1a350-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2RQhgx1WLA", "openreview": "https://openreview.net/forum?id=2RQhgx1WLA", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72954", "video": "https://nips.cc/virtual/2023/poster/72954", "author_site": "Ahmed Khaled, Konstantin Mishchenko, Chi Jin", "tldr": "", "abstract": "This paper proposes a new easy-to-implement parameter-free gradient-based optimizer: DoWG (Distance over Weighted Gradients). We prove that DoWG is efficient---matching the convergence rate of optimally tuned gradient descent in convex optimization up to a logarithmic factor without tuning any parameters, and universal---automatically adapting to both smooth and nonsmooth problems. While popular algorithms following the AdaGrad framework compute a running average of the squared gradients, DoWG maintains a new distance-based weighted version of the running average, which is crucial to achieve the desired properties. To complement our theory, we also show empirically that DoWG trains at the edge of stability, and validate its effectiveness on practical machine learning tasks.", "keywords": "normalized gradient descent;gradient descent;adagrad;adaptive optimization;parameter-free;smooth optimization;convex optimization;edge of stability", "primary_area": "", "supplementary_material": "", "author": "Ahmed Khaled;Konstantin Mishchenko;Chi Jin", "authorids": "~Ahmed_Khaled1;~Konstantin_Mishchenko1;~Chi_Jin1", "gender": "M;;M", "homepage": "https://www.akhaled.net;https://konstmish.com/;https://sites.google.com/view/cjin/home", "dblp": "154/3591-1;222/9853;126/1802-1", "google_scholar": "Bc3wOdsAAAAJ;Z8Y8nhQAAAAJ;GINhGvwAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ahmed_Khaled1;~Konstantin_Mishchenko1;~Chi_Jin1", "aff": "Princeton University;Samsung;Princeton University", "aff_domain": "princeton.edu;samsung.com;princeton.edu", "position": "PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nkhaled2023dowg,\ntitle={Do{WG} Unleashed: An Efficient Universal Parameter-Free Gradient Descent Method},\nauthor={Ahmed Khaled and Konstantin Mishchenko and Chi Jin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2RQhgx1WLA}\n}", "github": "", "project": "", "reviewers": "Uwk6;jMMU;oAov;WSp1;qfrF;jvtQ", "pdf_size": 773787, "rating": "5;6;6;6;7;8", "confidence": "3;1;4;4;3;2", "soundness": "3;4;2;3;4;4", "novelty": "2;2;2;2;3;4", "presentation": "3;3;3;2;4;4", "wc_summary": "142;148;86;101;147;79", "wc_strengths": "87;103;64;39;75;96", "wc_weaknesses": "249;88;141;144;70;38", "wc_questions": "80;22;826;158;119;1", "wc_limitations": "2;3;1;1;1;1", "wc_review": "560;364;1118;443;412;215", "wc_reply_reviewers": "148;5;150;221;65;11", "wc_reply_authors": "101;0;155;497;0;0", "reply_reviewers": "3;1;1;2;1;1", "reply_authors": "3;1;2;2;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.8333333333333335, 1.0671873729054748 ], "soundness_avg": [ 3.3333333333333335, 0.7453559924999298 ], "novelty_avg": [ 2.5, 0.7637626158259734 ], "presentation_avg": [ 3.1666666666666665, 0.6871842709362768 ], "wc_summary_avg": [ 117.16666666666667, 29.288317731743412 ], "wc_strengths_avg": [ 77.33333333333333, 21.421692017412838 ], "wc_weaknesses_avg": [ 121.66666666666667, 68.17787193967132 ], "wc_questions_avg": [ 201.0, 284.5698508275253 ], "wc_limitations_avg": [ 1.5, 0.7637626158259734 ], "wc_review_avg": [ 518.6666666666666, 286.98064201072674 ], "wc_reply_reviewers_avg": [ 100.0, 79.17912182724265 ], "wc_reply_authors_avg": [ 125.5, 176.41499369384678 ], "reply_reviewers_avg": [ 1.5, 0.7637626158259734 ], "reply_authors_avg": [ 1.6666666666666667, 0.74535599249993 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.27607881518711636, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18252427645846739900&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "email": "princeton.edu;samsung.com;princeton.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Princeton University;Samsung", "aff_unique_dep": ";Samsung", "aff_unique_url": "https://www.princeton.edu;https://www.samsung.com", "aff_unique_abbr": "Princeton;Samsung", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;South Korea" }, { "title": "On the Connection between Pre-training Data Diversity and Fine-tuning Robustness", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72953", "id": "2SScUiWUbn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d1786f5246c67eefde011599d31b2006-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2SScUiWUbn", "openreview": "https://openreview.net/forum?id=2SScUiWUbn", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72953", "video": "https://nips.cc/virtual/2023/poster/72953", "author_site": "Vivek Ramanujan, Thao Nguyen, Sewoong Oh, Ali Farhadi, Ludwig Schmidt", "tldr": "", "abstract": "Pre-training has been widely adopted in deep learning to improve model performance, especially when the training data for a target task is limited. In our work, we seek to understand the implications of this training strategy on the generalization properties of downstream models. More specifically, we ask the following question: how do properties of the pre-training distribution affect the robustness of a fine-tuned model? The properties we explore include the label space, label semantics, image diversity, data domains, and data quantity of the pre-training distribution. We find that the primary factor influencing downstream effective robustness (Taori et al., 2020) is data quantity, while other factors have limited significance. For example, reducing the number of ImageNet pre-training classes by 4x while increasing the number of images per class by 4x (that is, keeping total data quantity fixed) does not impact the robustness of fine-tuned models. We demonstrate our findings on pre-training distributions drawn from various natural and synthetic data sources, primarily using the iWildCam-WILDS distribution shift as a test for robustness.", "keywords": "robustness;out-of-distribution shifts;finetuning;pretraining", "primary_area": "", "supplementary_material": "/attachment/a0b51c714ae9b2cde88fc0a053ad646c8d08e605.pdf", "author": "Vivek Ramanujan;Thao Nguyen;Sewoong Oh;Ali Farhadi;Ludwig Schmidt", "authorids": "~Vivek_Ramanujan1;~Thao_Nguyen3;~Sewoong_Oh1;~Ali_Farhadi3;~Ludwig_Schmidt1", "gender": "M;F;M;M;M", "homepage": "https://vkramanuj.github.io;https://thaonguyen19.github.io/;https://homes.cs.washington.edu/~sewoong/;https://homes.cs.washington.edu/~ali/;http://people.csail.mit.edu/ludwigs/", "dblp": "225/4845;77/2922;80/4366;37/5826;141/2720", "google_scholar": "yXFPyNMAAAAJ;DvJG-_8AAAAJ;55TAOdgAAAAJ;jeOFRDsAAAAJ;SWMKy70AAAAJ", "orcid": ";;;;", "linkedin": ";;;;ludwig-schmidt-87ba3612/", "or_profile": "~Vivek_Ramanujan1;~Thao_Nguyen3;~Sewoong_Oh1;~Ali_Farhadi3;~Ludwig_Schmidt1", "aff": "Apple;Meta;University of Washington;University of Washington;Allen Institute for Artificial Intelligence", "aff_domain": "apple.com;meta.com;uw.edu;cs.uw.edu;allenai.org", "position": "Intern;Visiting Researcher;Associate Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nramanujan2023on,\ntitle={On the Connection between Pre-training Data Diversity and Fine-tuning Robustness},\nauthor={Vivek Ramanujan and Thao Nguyen and Sewoong Oh and Ali Farhadi and Ludwig Schmidt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2SScUiWUbn}\n}", "github": "", "project": "", "reviewers": "t8Yd;ev5Y;6SSm;XBK9", "pdf_size": 28841591, "rating": "6;6;6;6", "confidence": "3;4;4;3", "soundness": "2;4;3;3", "novelty": "3;4;3;3", "presentation": "2;3;3;3", "wc_summary": "71;84;79;71", "wc_strengths": "47;28;45;57", "wc_weaknesses": "210;77;63;62", "wc_questions": "4;163;51;39", "wc_limitations": "8;24;1;1", "wc_review": "340;376;239;230", "wc_reply_reviewers": "28;13;0;19", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 76.25, 5.539629951540085 ], "wc_strengths_avg": [ 44.25, 10.425329730996522 ], "wc_weaknesses_avg": [ 103.0, 62.060454397305215 ], "wc_questions_avg": [ 64.25, 59.570861165506074 ], "wc_limitations_avg": [ 8.5, 9.394147114027968 ], "wc_review_avg": [ 296.25, 63.12834149571807 ], "wc_reply_reviewers_avg": [ 15.0, 10.173494974687902 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15595765488141710708&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "apple.com;meta.com;uw.edu;cs.uw.edu;allenai.org", "author_num": 5, "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "Apple;Meta;University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": "Apple Inc.;Meta Platforms, Inc.;;", "aff_unique_url": "https://www.apple.com;https://meta.com;https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "Apple;Meta;UW;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Revisiting Implicit Differentiation for Learning Problems in Optimal Control", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72952", "id": "2URr3mkagy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bcfcf7232cb74e1ef82d751880ff835b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2URr3mkagy", "openreview": "https://openreview.net/forum?id=2URr3mkagy", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72952", "video": "https://nips.cc/virtual/2023/poster/72952", "author_site": "Ming Xu, Timothy L. Molloy, Stephen Gould", "tldr": "", "abstract": "This paper proposes a new method for differentiating through optimal trajectories arising from non-convex, constrained discrete-time optimal control (COC) problems using the implicit function theorem (IFT). Previous works solve a differential Karush-Kuhn-Tucker (KKT) system for the trajectory derivative, and achieve this efficiently by solving an auxiliary Linear Quadratic Regulator (LQR) problem. In contrast, we directly evaluate the matrix equations which arise from applying variable elimination on the Lagrange multiplier terms in the (differential) KKT system. By appropriately accounting for the structure of the terms within the resulting equations, we show that the trajectory derivatives scale linearly with the number of timesteps. Furthermore, our approach allows for easy parallelization, significantly improved scalability with model size, direct computation of vector-Jacobian products and improved numerical stability compared to prior works. As an additional contribution, we unify prior works, addressing claims that computing trajectory derivatives using IFT scales quadratically with the number of timesteps. We evaluate our method on a both synthetic benchmark and four challenging, learning from demonstration benchmarks including a 6-DoF maneuvering quadrotor and 6-DoF rocket powered landing.", "keywords": "implicit differentiation;bi-level optimization; constrained learning and control; safe learning for control", "primary_area": "", "supplementary_material": "/attachment/32c42331d10255562cfbdce0619f45392836e258.zip", "author": "Ming Xu;Timothy L Molloy;Stephen Gould", "authorids": "~Ming_Xu5;~Timothy_L_Molloy1;~Stephen_Gould1", "gender": "M;;M", "homepage": ";;http://users.cecs.anu.edu.au/~sgould/", "dblp": "43/3362-15;;89/1569.html", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=GX6_KKYAAAAJ;YvdzeM8AAAAJ", "orcid": ";;0000-0001-8929-7899", "linkedin": "ming-xu-2a21a754/;;", "or_profile": "~Ming_Xu5;~Timothy_L_Molloy1;~Stephen_Gould1", "aff": "Australian National University;Australian National University;Australian National University", "aff_domain": "anu.edu.au;anu.edu.au;anu.edu.au", "position": "Postdoc;Lecturer;Full Professor", "bibtex": "@inproceedings{\nxu2023revisiting,\ntitle={Revisiting Implicit Differentiation for Learning Problems in Optimal Control},\nauthor={Ming Xu and Timothy L Molloy and Stephen Gould},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2URr3mkagy}\n}", "github": "", "project": "", "reviewers": "CbeP;ZYtx;tsv7;dzG6;fBmg;kJe4", "pdf_size": 1234095, "rating": "4;4;5;6;6;7", "confidence": "2;3;4;3;5;4", "soundness": "2;3;3;3;3;3", "novelty": "3;2;3;3;3;2", "presentation": "2;2;2;3;4;3", "wc_summary": "36;103;104;68;160;214", "wc_strengths": "28;159;52;64;132;246", "wc_weaknesses": "113;92;296;498;207;342", "wc_questions": "46;93;136;4;8;54", "wc_limitations": "1;22;9;4;6;74", "wc_review": "224;469;597;638;513;930", "wc_reply_reviewers": "0;135;134;42;0;49", "wc_reply_authors": "0;49;173;0;0;0", "reply_reviewers": "0;1;2;1;0;1", "reply_authors": "1;2;2;1;1;1", "rating_avg": [ 5.333333333333333, 1.1055415967851332 ], "confidence_avg": [ 3.5, 0.9574271077563381 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.7453559924999299 ], "wc_summary_avg": [ 114.16666666666667, 58.504748145390344 ], "wc_strengths_avg": [ 113.5, 74.7345301718021 ], "wc_weaknesses_avg": [ 258.0, 139.810586151407 ], "wc_questions_avg": [ 56.833333333333336, 46.3264383358886 ], "wc_limitations_avg": [ 19.333333333333332, 25.33552622087982 ], "wc_review_avg": [ 561.8333333333334, 211.2340066266688 ], "wc_reply_reviewers_avg": [ 60.0, 55.89573627150226 ], "wc_reply_authors_avg": [ 37.0, 63.39821238278989 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.6871842709362768 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6298366572977735, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=820175887396031063&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "anu.edu.au;anu.edu.au;anu.edu.au", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Australian National University", "aff_unique_dep": "", "aff_unique_url": "https://www.anu.edu.au", "aff_unique_abbr": "ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "Scaling laws for language encoding models in fMRI", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72951", "id": "2W4LxJbgec", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4533e4a352440a32558c1c227602c323-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2W4LxJbgec", "openreview": "https://openreview.net/forum?id=2W4LxJbgec", "poster": "/media/PosterPDFs/NeurIPS%202023/72951.png?t=1702109230.768798", "slides": "https://nips.cc/virtual/2023/poster/72951", "video": "https://nips.cc/virtual/2023/poster/72951", "author_site": "Richard Antonello, Aditya Vaidya, Alexander Huth", "tldr": "", "abstract": "Representations from transformer-based unidirectional language models are known to be effective at predicting brain responses to natural language. However, most studies comparing language models to brains have used GPT-2 or similarly sized language models. Here we tested whether larger open-source models such as those from the OPT and LLaMA families are better at predicting brain responses recorded using fMRI. Mirroring scaling results from other contexts, we found that brain prediction performance scales logarithmically with model size from 125M to 30B parameter models, with ~15% increased encoding performance as measured by correlation with a held-out test set across 3 subjects. Similar log-linear behavior was observed when scaling the size of the fMRI training set. We also characterized scaling for acoustic encoding models that use HuBERT, WavLM, and Whisper, and we found comparable improvements with model size. A noise ceiling analysis of these large, high-performance encoding models showed that performance is nearing the theoretical maximum for brain areas such as the precuneus and higher auditory cortex. These results suggest that increasing scale in both models and data will yield incredibly effective models of language processing in the brain, enabling better scientific understanding as well as applications such as decoding.", "keywords": "Encoding Models;Language Models;Neuroscience;Scaling Laws", "primary_area": "", "supplementary_material": "/attachment/9a9962308779bad3836bb6956b4221a08499f23c.pdf", "author": "Richard Antonello;Aditya Vaidya;Alexander Huth", "authorids": "~Richard_Antonello1;avaidya@utexas.edu;~Alexander_Huth1", "gender": "M;;", "homepage": "https://www.cs.utexas.edu/~huth/people.html;;https://www.cs.utexas.edu/~huth/", "dblp": ";;44/8860.html", "google_scholar": ";;JNXWWkIAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Richard_Antonello1;avaidya@utexas.edu;~Alexander_Huth1", "aff": "University of Texas, Austin;;The University of Texas at Austin", "aff_domain": "utexas.edu;;utexas.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nantonello2023scaling,\ntitle={Scaling laws for language encoding models in f{MRI}},\nauthor={Richard Antonello and Aditya Vaidya and Alexander Huth},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2W4LxJbgec}\n}", "github": "", "project": "", "reviewers": "dAos;NDv4;Tsw4;fnMd;jqqd", "pdf_size": 8205941, "rating": "5;6;7;7;8", "confidence": "5;4;5;5;3", "soundness": "3;3;3;3;4", "novelty": "3;3;4;2;4", "presentation": "3;4;4;3;3", "wc_summary": "344;107;178;54;66", "wc_strengths": "30;50;57;36;158", "wc_weaknesses": "42;33;33;229;28", "wc_questions": "21;95;274;153;52", "wc_limitations": "1;9;8;73;5", "wc_review": "438;294;550;545;309", "wc_reply_reviewers": "0;18;320;33;11", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 4.4, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 149.8, 106.33983261224365 ], "wc_strengths_avg": [ 66.2, 46.89733467906252 ], "wc_weaknesses_avg": [ 73.0, 78.13065979498701 ], "wc_questions_avg": [ 119.0, 89.27485648266257 ], "wc_limitations_avg": [ 19.2, 27.043668390216588 ], "wc_review_avg": [ 427.2, 110.2604190088175 ], "wc_reply_reviewers_avg": [ 76.4, 122.26953831596813 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5393193716300062, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=636349432506026986&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "utexas.edu;;utexas.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Structured Federated Learning through Clustered Additive Modeling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72950", "id": "2XT3UpOv48", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8668fdc7b2ddf55a0e235824c66f2eee-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2XT3UpOv48", "openreview": "https://openreview.net/forum?id=2XT3UpOv48", "poster": "/media/PosterPDFs/NeurIPS%202023/72950.png?t=1701389685.8157904", "slides": "https://nips.cc/virtual/2023/poster/72950", "video": "https://nips.cc/virtual/2023/poster/72950", "author_site": "Jie Ma, Tianyi Zhou, Guodong Long, Jing Jiang, Chengqi Zhang", "tldr": "", "abstract": "Heterogeneous federated learning without assuming any structure is challenging due to the conflicts among non-identical data distributions of clients. In practice, clients often comprise near-homogeneous clusters so training a server-side model per cluster mitigates the conflicts. However, FL with client clustering often suffers from \u201cclustering collapse'', i.e., one cluster's model excels on increasing clients, and reduces to single-model FL. Moreover, cluster-wise models hinder knowledge sharing between clusters and each model depends on fewer clients. Furthermore, the static clustering assumption on data may not hold for dynamically changing models, which are sensitive to cluster imbalance/initialization or outliers. To address these challenges, we propose ''Clustered Additive Modeling (CAM)'', which applies a globally shared model $\\Theta_g$ on top of the cluster-wise models $\\Theta_{1:K}$, i.e., $y=h(x;\\Theta_g)+f(x;\\Theta_k)$ for clients of cluster-$k$. The global model captures the features shared by all clusters so $\\Theta_{1:K}$ are enforced to focus on the difference among clusters. To train CAM, we develop a novel Fed-CAM algorithm that alternates between client clustering and training global/cluster models to predict the residual of each other. We can easily modify any existing clustered FL methods by CAM and significantly improve their performance without \u2018\u2019clustering collapse'' in different non-IID settings. We also provide a convergence analysis of Fed-CAM algorithm.", "keywords": "Federated Learning", "primary_area": "", "supplementary_material": "/attachment/78eb2896aafd2a8a479afc6e8c3681f3efbf43e6.zip", "author": "Jie Ma;Tianyi Zhou;Guodong Long;Jing Jiang;Chengqi Zhang", "authorids": "~Jie_Ma4;~Tianyi_Zhou1;~Guodong_Long2;~Jing_Jiang6;~Chengqi_Zhang1", "gender": "M;M;M;F;M", "homepage": "https://scholar.google.com.au/citations?user=tSmDoz0AAAAJ&hl=en;https://tianyizhou.github.io/;https://www.uts.edu.au/staff/guodong.long;https://www.uts.edu.au/staff/jing.jiang;https://research.polyu.edu.hk/en/persons/chengqi-zhang", "dblp": "62/5110;88/8205-1;34/10089;68/1974-2;71/964", "google_scholar": "https://scholar.google.com.au/citations?user=tSmDoz0AAAAJ;OKvgizMAAAAJ;https://scholar.google.com.au/citations?user=Pl8m7hMAAAAJ;https://scholar.google.com.au/citations?hl=en;https://scholar.google.com.au/citations?user=B6lBmqEAAAAJ", "orcid": ";0000-0001-5348-0632;0000-0003-3740-9515;;0000-0001-5715-7154", "linkedin": ";tianyizhou;;;chengqi-zhang-55aa8910/", "or_profile": "~Jie_Ma4;~Tianyi_Zhou1;~Guodong_Long2;~Jing_Jiang6;~Chengqi_Zhang1", "aff": "University of Technology Sydney;University of Maryland, College Park;University of Technology Sydney;University of Technology Sydney;University of Technology Sydney", "aff_domain": "uts.edu.au;umd.edu;uts.edu.au;uts.edu.au;uts.edu.au", "position": "PhD student;Assistant Professor;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nma2023structured,\ntitle={Structured Federated Learning through Clustered Additive Modeling},\nauthor={Jie Ma and Tianyi Zhou and Guodong Long and Jing Jiang and Chengqi Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2XT3UpOv48}\n}", "github": "", "project": "", "reviewers": "HLnM;dS6T;VZR1;WQSD", "pdf_size": 1918544, "rating": "3;6;8;8", "confidence": "5;4;5;5", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "98;44;119;113", "wc_strengths": "108;78;17;170", "wc_weaknesses": "501;122;43;95", "wc_questions": "186;171;127;93", "wc_limitations": "2;16;25;7", "wc_review": "895;431;331;478", "wc_reply_reviewers": "171;28;12;0", "wc_reply_authors": "460;36;12;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 93.5, 29.58462438497403 ], "wc_strengths_avg": [ 93.25, 55.12429137866536 ], "wc_weaknesses_avg": [ 190.25, 181.64439848230938 ], "wc_questions_avg": [ 144.25, 36.68361350794112 ], "wc_limitations_avg": [ 12.5, 8.789197915623474 ], "wc_review_avg": [ 533.75, 215.21776762154187 ], "wc_reply_reviewers_avg": [ 52.75, 68.99048847486152 ], "wc_reply_authors_avg": [ 127.0, 192.69405802982095 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.07053456158585983, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9791174004911743062&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "uts.edu.au;umd.edu;uts.edu.au;uts.edu.au;uts.edu.au", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Technology Sydney;University of Maryland", "aff_unique_dep": ";", "aff_unique_url": "https://www.uts.edu.au;https://www/umd.edu", "aff_unique_abbr": "UTS;UMD", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Australia;United States" }, { "title": "CLIP-OGD: An Experimental Design for Adaptive Neyman Allocation in Sequential Experiments", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72949", "id": "2Xqvk2KVAq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/661d4fda173120a2f119e0319e6bcf97-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2Xqvk2KVAq", "openreview": "https://openreview.net/forum?id=2Xqvk2KVAq", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72949", "video": "https://nips.cc/virtual/2023/poster/72949", "author_site": "Jessica Dai, Paula Gradu, Christopher Harshaw", "tldr": "", "abstract": "From clinical development of cancer therapies to investigations into partisan bias, adaptive sequential designs have become increasingly popular method for causal inference, as they offer the possibility of improved precision over their non-adaptive counterparts. However, even in simple settings (e.g. two treatments) the extent to which adaptive designs can improve precision is not sufficiently well understood. In this work, we study the problem of Adaptive Neyman Allocation in a design-based potential outcomes framework, where the experimenter seeks to construct an adaptive design which is nearly as efficient as the optimal (but infeasible) non-adaptive Neyman design, which has access to all potential outcomes. Motivated by connections to online optimization, we propose Neyman Ratio and Neyman Regret as two (equivalent) performance measures of adaptive designs for this problem. We present Clip-OGD, an adaptive design which achieves $\\widetilde{\\mathcal{O}}(\\sqrt{T})$ expected Neyman regret and thereby recovers the optimal Neyman variance in large samples. Finally, we construct a conservative variance estimator which facilitates the development of asymptotically valid confidence intervals. To complement our theoretical results, we conduct simulations using data from a microeconomic experiment.", "keywords": "causal inference;randomized experiments;online optimization", "primary_area": "", "supplementary_material": "/attachment/08104d8c6267737453d95e549fc7d084ea982c53.zip", "author": "Jessica Dai;Paula Gradu;Christopher Harshaw", "authorids": "~Jessica_Dai1;~Paula_Gradu1;~Christopher_Harshaw1", "gender": ";F;M", "homepage": ";https://paula-gradu.github.io;http://www.chrisharshaw.com/", "dblp": ";270/0124.html;199/2237", "google_scholar": ";;5akIyZ0AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jessica_Dai1;~Paula_Gradu1;~Christopher_Harshaw1", "aff": ";University of California, Berkeley;University of California, Berkeley", "aff_domain": ";berkeley.edu;berkeley.edu", "position": ";PhD student;Postdoc", "bibtex": "@inproceedings{\ndai2023clipogd,\ntitle={{CLIP}-{OGD}: An Experimental Design for Adaptive Neyman Allocation in Sequential Experiments},\nauthor={Jessica Dai and Paula Gradu and Christopher Harshaw},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2Xqvk2KVAq}\n}", "github": "", "project": "", "reviewers": "NsFr;xwuB;wYBx;etbf;2JiC", "pdf_size": 489582, "rating": "4;5;6;7;7", "confidence": "3;1;2;3;3", "soundness": "3;3;3;3;3", "novelty": "3;2;2;3;3", "presentation": "4;2;3;3;4", "wc_summary": "156;28;47;54;94", "wc_strengths": "70;29;25;86;36", "wc_weaknesses": "240;55;5;10;16", "wc_questions": "73;17;92;216;173", "wc_limitations": "55;4;2;10;18", "wc_review": "594;133;171;376;337", "wc_reply_reviewers": "0;0;37;0;0", "wc_reply_authors": "0;0;27;0;0", "reply_reviewers": "0;0;1;0;0", "reply_authors": "1;1;2;1;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 2.4, 0.8 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 75.8, 45.50340646589001 ], "wc_strengths_avg": [ 49.2, 24.30966885829587 ], "wc_weaknesses_avg": [ 65.2, 89.16366973156724 ], "wc_questions_avg": [ 114.2, 71.35656942426534 ], "wc_limitations_avg": [ 17.8, 19.415457759218555 ], "wc_review_avg": [ 322.2, 164.70992684109845 ], "wc_reply_reviewers_avg": [ 7.4, 14.8 ], "wc_reply_authors_avg": [ 5.4, 10.8 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30012252399939043, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4238671547798810105&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Implicit Convolutional Kernels for Steerable CNNs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72948", "id": "2YtdxqvdjX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/389a55c90f839d58188060a42bb9138a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2YtdxqvdjX", "openreview": "https://openreview.net/forum?id=2YtdxqvdjX", "poster": "/media/PosterPDFs/NeurIPS%202023/72948.png?t=1702327450.7729275", "slides": "https://nips.cc/virtual/2023/poster/72948", "video": "https://nips.cc/virtual/2023/poster/72948", "author_site": "Maksim Zhdanov, Nico Hoffmann, Gabriele Cesa", "tldr": "", "abstract": "Steerable convolutional neural networks (CNNs) provide a general framework for building neural networks equivariant to translations and transformations of an origin-preserving group $G$, such as reflections and rotations. They rely on standard convolutions with $G$-steerable kernels obtained by analytically solving the group-specific equivariance constraint imposed onto the kernel space. As the solution is tailored to a particular group $G$, implementing a kernel basis does not generalize to other symmetry transformations, complicating the development of general group equivariant models. We propose using implicit neural representation via multi-layer perceptrons (MLPs) to parameterize $G$-steerable kernels. The resulting framework offers a simple and flexible way to implement Steerable CNNs and generalizes to any group $G$ for which a $G$-equivariant MLP can be built. We prove the effectiveness of our method on multiple tasks, including N-body simulations, point cloud classification and molecular property prediction.", "keywords": "equivariance; group convolutions; implicit kernels; physical simulations", "primary_area": "", "supplementary_material": "/attachment/fe1e934d4962a24e44a146916828d6c2d32037b5.pdf", "author": "Maksim Zhdanov;Nico Hoffmann;Gabriele Cesa", "authorids": "~Maksim_Zhdanov1;~Nico_Hoffmann1;~Gabriele_Cesa1", "gender": "M;;M", "homepage": "https://maxxxzdn.github.io/;;https://github.com/Gabri95", "dblp": "322/0190;;254/1536", "google_scholar": "Llnm6XgAAAAJ;;hTplhaMAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Maksim_Zhdanov1;~Nico_Hoffmann1;~Gabriele_Cesa1", "aff": "University of Amsterdam;;Qualcomm Inc, QualComm", "aff_domain": "uva.nl;;qti.qualcomm.com", "position": "PhD student;;Researcher", "bibtex": "@inproceedings{\nzhdanov2023implicit,\ntitle={Implicit Convolutional Kernels for Steerable {CNN}s},\nauthor={Maksim Zhdanov and Nico Hoffmann and Gabriele Cesa},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2YtdxqvdjX}\n}", "github": "", "project": "", "reviewers": "TwdU;fqKN;AqiD;GuaE", "pdf_size": 881799, "rating": "6;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;4;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "217;28;71;110", "wc_strengths": "42;113;47;100", "wc_weaknesses": "324;326;122;143", "wc_questions": "360;62;94;473", "wc_limitations": "120;104;1;7", "wc_review": "1063;633;335;833", "wc_reply_reviewers": "173;0;7;229", "wc_reply_authors": "178;0;0;143", "reply_reviewers": "2;0;1;2", "reply_authors": "2;1;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 106.5, 70.08031107236896 ], "wc_strengths_avg": [ 75.5, 31.388692231439016 ], "wc_weaknesses_avg": [ 228.75, 96.53852857797243 ], "wc_questions_avg": [ 247.25, 174.26900900619134 ], "wc_limitations_avg": [ 58.0, 54.33691194758863 ], "wc_review_avg": [ 716.0, 267.46401627134816 ], "wc_reply_reviewers_avg": [ 102.25, 100.74565747465248 ], "wc_reply_authors_avg": [ 80.25, 81.19844518215851 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5191479260979826268&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uva.nl;;qti.qualcomm.com", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Amsterdam;Qualcomm Incorporated", "aff_unique_dep": ";", "aff_unique_url": "https://www.uva.nl;https://www.qualcomm.com", "aff_unique_abbr": "UvA;Qualcomm", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Netherlands;United States" }, { "id": "2ZtGWNn37W", "title": "Multi-Fidelity Active Learning with GFlowNets", "track": "main", "status": "Reject", "tldr": "", "abstract": "In the last decades, the capacity to generate large amounts of data in science and engineering applications has been growing steadily. Meanwhile, the progress in machine learning has turned it into a suitable tool to process and utilise the available data. Nonetheless, many relevant scientific and engineering problems present challenges where current machine learning methods cannot yet efficiently leverage the available data and resources. For example, in scientific discovery, we are often faced with the problem of exploring very large, high-dimensional spaces, where querying a high fidelity, black-box objective function is very expensive. Progress in machine learning methods that can efficiently tackle such problems would help accelerate currently crucial areas such as drug and materials discovery. In this paper, we propose the use of GFlowNets for multi-fidelity active learning, where multiple approximations of the black-box function are available at lower fidelity and cost. GFlowNets are recently proposed methods for amortised probabilistic inference that have proven efficient for exploring large, high-dimensional spaces and can hence be practical in the multi-fidelity setting too. Here, we describe our algorithm for multi-fidelity active learning with GFlowNets and evaluate its performance in both well-studied synthetic tasks and practically relevant applications of molecular discovery. Our results show that multi-fidelity active learning with GFlowNets can efficiently leverage the availability of multiple oracles with different costs and fidelities to accelerate scientific discovery and engineering design.", "keywords": "gflownets;multi-fidelity;active learning;bayesian optimization;scientific discovery;biological sequence design;molecular modelling;material discovery", "primary_area": "", "supplementary_material": "/attachment/cbc18d26bfce16d3b32e80fa78ad37ee356d9658.zip", "author": "Alex Hern\u00e1ndez-Garc\u00eda;Nikita Saxena;Moksh Jain;Cheng-Hao Liu;Yoshua Bengio", "authorids": "~Alex_Hern\u00e1ndez-Garc\u00eda1;nikita.saxena@mila.quebec;~Moksh_Jain1;~Cheng-Hao_Liu1;~Yoshua_Bengio1", "gender": ";;M;M;M", "homepage": "https://alexhernandezgarcia.github.io;;https://mj10.github.io;https://pchliu.github.io/;http://yoshuabengio.org", "dblp": "213/8573;;249/9368;;56/953", "google_scholar": "f8vQCOAAAAAJ;;TD07G_wAAAAJ;iVJGx0cAAAAJ;kukA0LcAAAAJ", "orcid": ";;;0000-0001-7923-6806;", "linkedin": ";;;chenghao-peter-liu/;yoshuabengio/?originalSubdomain=ca", "or_profile": "~Alex_Hern\u00e1ndez-Garc\u00eda1;nikita.saxena@mila.quebec;~Moksh_Jain1;~Cheng-Hao_Liu1;~Yoshua_Bengio1", "aff": "Universit\u00e9 de Montr\u00e9al;;Universit\u00e9 de Montr\u00e9al;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;University of Montreal", "aff_domain": "umontreal.ca;;umontreal.ca;mila.umontreal.ca;umontreal.ca", "position": "Postdoc;;MS student;PhD student intern;Full Professor", "bibtex": "@misc{\nhern{\\'a}ndez-garc{\\'\\i}a2023multifidelity,\ntitle={Multi-Fidelity Active Learning with {GF}lowNets},\nauthor={Alex Hern{\\'a}ndez-Garc{\\'\\i}a and Nikita Saxena and Moksh Jain and Cheng-Hao Liu and Yoshua Bengio},\nyear={2023},\nurl={https://openreview.net/forum?id=2ZtGWNn37W}\n}", "github": "", "project": "", "reviewers": "KJZ2;3rWi;TG5X;qmsr;tpVK", "site": "https://openreview.net/forum?id=2ZtGWNn37W", "pdf_size": 421790, "rating": "4;5;5;6;7", "confidence": "4;4;4;4;3", "soundness": "3;3;2;4;4", "novelty": "2;2;2;2;3", "presentation": "3;3;3;4;2", "wc_summary": "194;117;21;42;192", "wc_strengths": "79;99;20;133;73", "wc_weaknesses": "501;462;146;152;207", "wc_questions": "58;7;22;90;234", "wc_limitations": "8;15;4;10;1", "wc_review": "840;700;213;427;707", "wc_reply_reviewers": "140;58;41;51;106", "wc_reply_authors": "273;31;154;67;196", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 113.2, 72.55728771115965 ], "wc_strengths_avg": [ 80.8, 36.923705122861115 ], "wc_weaknesses_avg": [ 293.6, 155.37644609142018 ], "wc_questions_avg": [ 82.2, 81.2019704194424 ], "wc_limitations_avg": [ 7.6, 4.841487374764082 ], "wc_review_avg": [ 577.4, 226.35953702020157 ], "wc_reply_reviewers_avg": [ 79.2, 37.732744400586604 ], "wc_reply_authors_avg": [ 144.2, 87.34162810481608 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7844645405527363, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14990110088713039119&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;University of Montreal", "aff_unique_dep": ";Montreal Institute for Learning Algorithms", "aff_unique_url": "https://www.umontreal.ca;https://www.mila.quebec", "aff_unique_abbr": "UdeM;MILA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Enhancing CLIP with CLIP: Exploring Pseudolabeling for Limited-Label Prompt Tuning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72947", "id": "2b9aY2NgXE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bf85879363044ca21f7868a3d1b4021c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2b9aY2NgXE", "openreview": "https://openreview.net/forum?id=2b9aY2NgXE", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72947", "video": "https://nips.cc/virtual/2023/poster/72947", "author_site": "Cristina Menghini, Andrew Delworth, Stephen Bach", "tldr": "", "abstract": "Fine-tuning vision-language models (VLMs) like CLIP to downstream tasks is often necessary to optimize their performance. However, a major obstacle is the limited availability of labeled data. We study the use of pseudolabels, i.e., heuristic labels for unlabeled data, to enhance CLIP via prompt tuning. Conventional pseudolabeling trains a model on labeled data and then generates labels for unlabeled data. VLMs' zero-shot capabilities enable a ``second generation'' of pseudolabeling approaches that do not require task-specific training on labeled data. By using zero-shot pseudolabels as a source of supervision, we observe that learning paradigms such as semi-supervised, transductive zero-shot, and unsupervised learning can all be seen as optimizing the same loss function. This unified view enables the development of versatile training strategies that are applicable across learning paradigms. We investigate them on image classification tasks where CLIP exhibits limitations, by varying prompt modalities, e.g., textual or visual prompts, and learning paradigms. We find that\n(1) unexplored prompt tuning strategies that iteratively refine pseudolabels consistently improve CLIP accuracy, by 19.5 points in semi-supervised learning, by 28.4 points in transductive zero-shot learning, and by 15.2 points in unsupervised learning, and (2) unlike conventional semi-supervised pseudolabeling, which exacerbates model biases toward classes with higher-quality pseudolabels, prompt tuning leads to a more equitable distribution of per-class accuracy. The code to reproduce the experiments is at https://github.com/BatsResearch/menghini-neurips23-code.", "keywords": "vision-language models;prompt-tuning;pseudolabels;self-training", "primary_area": "", "supplementary_material": "/attachment/08252456872a41bedfbd75fcd4177774411fee07.zip", "author": "Cristina Menghini;Andrew Delworth;Stephen Bach", "authorids": "~Cristina_Menghini1;andrew_delworth@brown.edu;~Stephen_Bach1", "gender": "F;;M", "homepage": ";;http://stephenbach.net", "dblp": "228/2522;;90/1077", "google_scholar": "https://scholar.google.it/citations?user=TDk55OQAAAAJ;;hs6pGXoAAAAJ", "orcid": ";;0000-0003-3857-3560", "linkedin": ";;", "or_profile": "~Cristina_Menghini1;andrew_delworth@brown.edu;~Stephen_Bach1", "aff": "Brown University;;Snorkel AI", "aff_domain": "cs.brown.edu;;snorkel.ai", "position": "Postdoc;;Researcher", "bibtex": "@inproceedings{\nmenghini2023enhancing,\ntitle={Enhancing {CLIP} with {CLIP}: Exploring Pseudolabeling for Limited-Label Prompt Tuning},\nauthor={Cristina Menghini and Andrew Delworth and Stephen Bach},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2b9aY2NgXE}\n}", "github": "", "project": "", "reviewers": "qyTk;Qe43;sr7c;1u3c;kGE6", "pdf_size": 1052283, "rating": "4;5;6;6;7", "confidence": "4;4;4;3;4", "soundness": "2;3;2;3;3", "novelty": "2;3;2;2;3", "presentation": "3;2;2;3;3", "wc_summary": "67;74;76;55;145", "wc_strengths": "26;47;58;48;49", "wc_weaknesses": "156;30;200;78;204", "wc_questions": "142;181;101;47;589", "wc_limitations": "7;12;8;18;33", "wc_review": "398;344;443;246;1020", "wc_reply_reviewers": "81;48;21;81;900", "wc_reply_authors": "465;173;22;41;834", "reply_reviewers": "1;1;1;1;2", "reply_authors": "2;2;2;2;3", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 83.4, 31.664491153340837 ], "wc_strengths_avg": [ 45.6, 10.556514576317317 ], "wc_weaknesses_avg": [ 133.6, 68.83487488185041 ], "wc_questions_avg": [ 212.0, 193.66775673818293 ], "wc_limitations_avg": [ 15.6, 9.520504188329523 ], "wc_review_avg": [ 490.2, 272.9193287401975 ], "wc_reply_reviewers_avg": [ 226.2, 337.64916703584504 ], "wc_reply_authors_avg": [ 307.0, 307.4508090735817 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.1961161351381841, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14351953237640024512&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cs.brown.edu;;snorkel.ai", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Brown University;Snorkel AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.brown.edu;https://www.snorkelai.com", "aff_unique_abbr": "Brown;Snorkel AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Towards Accelerated Model Training via Bayesian Data Selection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72946", "id": "2bRG4Hj8qd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1af3e0bf5905e33789979f666c31192d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2bRG4Hj8qd", "openreview": "https://openreview.net/forum?id=2bRG4Hj8qd", "poster": "/media/PosterPDFs/NeurIPS%202023/72946.png?t=1700031620.8320801", "slides": "https://nips.cc/virtual/2023/poster/72946", "video": "https://nips.cc/virtual/2023/poster/72946", "author_site": "Zhijie Deng, Peng Cui, Jun Zhu", "tldr": "", "abstract": "Mislabeled, duplicated, or biased data in real-world scenarios can lead to prolonged training and even hinder model convergence. Traditional solutions prioritizing easy or hard samples lack the flexibility to handle such a variety simultaneously. Recent work has proposed a more reasonable data selection principle by examining the data's impact on the model's generalization loss. However, its practical adoption relies on less principled approximations and additional holdout data. This work solves these problems by leveraging a lightweight Bayesian treatment and incorporating off-the-shelf zero-shot predictors built on large-scale pre-trained models. The resulting algorithm is efficient and easy to implement. We perform extensive empirical studies on challenging benchmarks with considerable data noise and imbalance in the online batch selection scenario, and observe superior training efficiency over competitive baselines. Notably, on the challenging WebVision benchmark, our method can achieve similar predictive performance with significantly fewer training iterations than leading data selection methods.", "keywords": "data selection;training acceleration;probabilistic modeling;Bayesian methods", "primary_area": "", "supplementary_material": "/attachment/060532afceb19915173a1b496c6b8f795705cfe6.zip", "author": "Zhijie Deng;Peng Cui;Jun Zhu", "authorids": "~Zhijie_Deng1;~Peng_Cui6;~Jun_Zhu2", "gender": "M;M;M", "homepage": "https://thudzj.github.io/;https://scholar.google.com/citations?user=c_VTs5MAAAAJ&hl=zh-CN;http://ml.cs.tsinghua.edu.cn/~jun", "dblp": "209/4959;31/891-7;50/2644-1", "google_scholar": "J3dR0sUAAAAJ;c_VTs5MAAAAJ;axsP38wAAAAJ", "orcid": "0000-0002-0932-1631;;", "linkedin": ";;", "or_profile": "~Zhijie_Deng1;~Peng_Cui6;~Jun_Zhu2", "aff": "Shanghai Jiaotong University;Tsinghua University;Tsinghua University", "aff_domain": "sjtu.edu.cn;cs.tsinghua.edu.cn;mail.tsinghua.edu.cn", "position": "Assistant Professor;PhD student;Professor", "bibtex": "@inproceedings{\ndeng2023towards,\ntitle={Towards Accelerated Model Training via Bayesian Data Selection},\nauthor={Zhijie Deng and Peng Cui and Jun Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2bRG4Hj8qd}\n}", "github": "", "project": "", "reviewers": "UrxY;GG6S;sAWs;agRJ", "pdf_size": 583613, "rating": "5;6;7;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "34;50;56;204", "wc_strengths": "19;22;54;183", "wc_weaknesses": "97;131;164;358", "wc_questions": "143;6;3;94", "wc_limitations": "18;1;1;128", "wc_review": "311;210;278;967", "wc_reply_reviewers": "24;0;145;182", "wc_reply_authors": "22;0;289;106", "reply_reviewers": "1;0;3;1", "reply_authors": "2;1;4;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.0, 68.60029154456998 ], "wc_strengths_avg": [ 69.5, 66.9496079152074 ], "wc_weaknesses_avg": [ 187.5, 101.2484567783628 ], "wc_questions_avg": [ 61.5, 59.583974355526166 ], "wc_limitations_avg": [ 37.0, 52.99528280894442 ], "wc_review_avg": [ 441.5, 305.57527714132897 ], "wc_reply_reviewers_avg": [ 87.75, 77.33813742261964 ], "wc_reply_authors_avg": [ 104.25, 113.76373543445204 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16738813061515736688&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "sjtu.edu.cn;cs.tsinghua.edu.cn;mail.tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Shanghai Jiao Tong University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "SJTU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Setting the Trap: Capturing and Defeating Backdoors in Pretrained Language Models through Honeypots", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72945", "id": "2cYxNWNzk3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e7938ede51225b490bb69f7b361a9259-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2cYxNWNzk3", "openreview": "https://openreview.net/forum?id=2cYxNWNzk3", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72945", "video": "https://nips.cc/virtual/2023/poster/72945", "author_site": "Ruixiang (Ryan) Tang, Jiayi Yuan, Yiming Li, Zirui Liu, Rui Chen, Xia Hu", "tldr": "", "abstract": "In the field of natural language processing, the prevalent approach involves fine-tuning pretrained language models (PLMs) using local samples. Recent research has exposed the susceptibility of PLMs to backdoor attacks, wherein the adversaries can embed malicious prediction behaviors by manipulating a few training samples. In this study, our objective is to develop a backdoor-resistant tuning procedure that yields a backdoor-free model, no matter whether the fine-tuning dataset contains poisoned samples. To this end, we propose and integrate an \\emph{honeypot module} into the original PLM, specifically designed to absorb backdoor information exclusively. Our design is motivated by the observation that lower-layer representations in PLMs carry sufficient backdoor features while carrying minimal information about the original tasks. Consequently, we can impose penalties on the information acquired by the honeypot module to inhibit backdoor creation during the fine-tuning process of the stem network. Comprehensive experiments conducted on benchmark datasets substantiate the effectiveness and robustness of our defensive strategy. Notably, these results indicate a substantial reduction in the attack success rate ranging from 10\\% to 40\\% when compared to prior state-of-the-art methods.", "keywords": "Backdoor Defense;Honeypot", "primary_area": "", "supplementary_material": "/attachment/303bd29554ee90f339b107ab2230107d866c74be.pdf", "author": "Ruixiang Tang;Jiayi Yuan;Yiming Li;Zirui Liu;Rui Chen;Xia Hu", "authorids": "~Ruixiang_Tang1;~Jiayi_Yuan1;~Yiming_Li1;~Zirui_Liu1;~Rui_Chen4;~Xia_Hu4", "gender": "M;;M;M;;", "homepage": "https://www.ruixiangtang.net/;https://jy-yuan.github.io/;http://liyiming.tech;https://zirui-ray-liu.github.io/;;", "dblp": "239/1928;251/4029-1.html;l/YimingLi-4;196/8629-1.html;;", "google_scholar": "T575jsoAAAAJ;XMrlrV8AAAAJ;mSW7kU8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;", "orcid": ";;0000-0002-2258-265X;;;", "linkedin": "ruixiang-tang-91660717b/;;yiming-li-thu/;;;", "or_profile": "~Ruixiang_Tang1;~Jiayi_Yuan1;~Yiming_Li1;~Zirui_Liu1;~Rui_Chen4;~Xia_Hu4", "aff": "Rice University;Rice University;Tsinghua University;Rice University;;", "aff_domain": "rice.edu;rice.edu;mails.tsinghua.edu.cn;rice.edu;;", "position": "PhD student;PhD student;PhD student;PhD student;;", "bibtex": "@inproceedings{\ntang2023setting,\ntitle={Setting the Trap: Capturing and Defeating Backdoors in Pretrained Language Models through Honeypots},\nauthor={Ruixiang Tang and Jiayi Yuan and Yiming Li and Zirui Liu and Rui Chen and Xia Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2cYxNWNzk3}\n}", "github": "", "project": "", "reviewers": "5e6e;Fa4H;hYyY;kn4m", "pdf_size": 8486710, "rating": "3;5;6;7", "confidence": "4;4;4;5", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "50;75;102;84", "wc_strengths": "14;62;56;89", "wc_weaknesses": "257;144;443;99", "wc_questions": "2;31;119;176", "wc_limitations": "7;5;1;8", "wc_review": "330;317;721;456", "wc_reply_reviewers": "616;79;201;13", "wc_reply_authors": "1101;535;394;11", "reply_reviewers": "2;2;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.75, 18.73999733191016 ], "wc_strengths_avg": [ 55.25, 26.864242032858474 ], "wc_weaknesses_avg": [ 235.75, 132.78059910996035 ], "wc_questions_avg": [ 82.0, 69.29285677470658 ], "wc_limitations_avg": [ 5.25, 2.680951323690902 ], "wc_review_avg": [ 456.0, 162.3437710538966 ], "wc_reply_reviewers_avg": [ 227.25, 234.3590994606354 ], "wc_reply_authors_avg": [ 510.25, 391.26805070181746 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6831300510639732, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16220040684330858922&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "rice.edu;rice.edu;mails.tsinghua.edu.cn;rice.edu;;", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Rice University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.rice.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Rice;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Robust Second-Order Nonconvex Optimization and Its Application to Low Rank Matrix Sensing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72944", "id": "2ccH4zjKVs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aa5f224975a67914067519faddeacba3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2ccH4zjKVs", "openreview": "https://openreview.net/forum?id=2ccH4zjKVs", "poster": "/media/PosterPDFs/NeurIPS%202023/72944.png?t=1702273893.4111793", "slides": "https://nips.cc/virtual/2023/poster/72944", "video": "https://nips.cc/virtual/2023/poster/72944", "author_site": "Shuyao Li, Yu Cheng, Ilias Diakonikolas, Jelena Diakonikolas, Rong Ge, Stephen Wright", "tldr": "", "abstract": "Finding an approximate second-order stationary point (SOSP) \nis a well-studied and fundamental problem in stochastic nonconvex optimization with many applications in machine learning.\nHowever, this problem is poorly understood in the presence of outliers, limiting the use of existing nonconvex algorithms in adversarial settings.\n\nIn this paper, we study the problem of finding SOSPs in the strong contamination model, \nwhere a constant fraction of datapoints are arbitrarily corrupted.\nWe introduce a general framework for efficiently finding an approximate SOSP with \\emph{dimension-independent} accuracy guarantees, using $\\widetilde{O}({D^2}/{\\epsilon})$ samples where $D$ is the ambient dimension and $\\epsilon$ is the fraction of corrupted datapoints.\n\nAs a concrete application of our framework, we apply it to the problem of low rank matrix sensing, developing efficient and provably robust algorithms that can tolerate corruptions in both the sensing matrices and the measurements.\nIn addition, we establish a Statistical Query lower bound providing evidence that the quadratic dependence on $D$ in the sample complexity is necessary for computationally efficient algorithms.", "keywords": "low rank matrix sensing;non-convex optimization;high-dimensional robust statistics;second-order optimization;statistical query model", "primary_area": "", "supplementary_material": "/attachment/9d910fd0724e93040066592f3218749aff0e6bd6.pdf", "author": "Shuyao Li;Yu Cheng;Ilias Diakonikolas;Jelena Diakonikolas;Rong Ge;Stephen Wright", "authorids": "~Shuyao_Li1;~Yu_Cheng2;~Ilias_Diakonikolas1;~Jelena_Diakonikolas2;~Rong_Ge1;~Stephen_Wright1", "gender": "M;M;M;F;M;M", "homepage": "https://shuyaoli.github.io/;https://cs.brown.edu/people/ycheng79/;http://www.iliasdiakonikolas.org/;http://www.jelena-diakonikolas.com/;https://users.cs.duke.edu/~rongge/;https://wrightstephen.github.io/sw_proj/", "dblp": "183/5509;96/3060-2;d/IliasDiakonikolas;147/5178;89/6869-1.html;75/2677", "google_scholar": "D8A-8x8AAAAJ;lVoOIv4AAAAJ;Vb3FLmkAAAAJ;J8ixfu8AAAAJ;https://scholar.google.com.tw/citations?user=MVxcjEoAAAAJ;VFQRIOwAAAAJ", "orcid": "0009-0000-0170-1018;0000-0002-0019-2570;;0000-0003-3439-0310;;", "linkedin": ";yu-cheng-40401632/;;;;", "or_profile": "~Shuyao_Li1;~Yu_Cheng2;~Ilias_Diakonikolas1;~Jelena_Diakonikolas2;~Rong_Ge1;~Stephen_Wright1", "aff": "Department of Computer Science, University of Wisconsin - Madison;Brown University;University of Wisconsin, Madison;University of Wisconsin, Madison;Google (visiting);University of Wisconsin, Madison", "aff_domain": "cs.wisc.edu;brown.edu;wisc.edu;wisc.edu;google.com;wisc.edu", "position": "PhD student;Assistant Professor;Associate Professor;Assistant Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nli2023robust,\ntitle={Robust Second-Order Nonconvex Optimization and Its Application to Low Rank Matrix Sensing},\nauthor={Shuyao Li and Yu Cheng and Ilias Diakonikolas and Jelena Diakonikolas and Rong Ge and Stephen Wright},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2ccH4zjKVs}\n}", "github": "", "project": "", "reviewers": "emkc;d53A;jUUi;kWga", "pdf_size": 597776, "rating": "5;6;6;6", "confidence": "3;2;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;4;3", "wc_summary": "47;123;68;51", "wc_strengths": "19;29;66;80", "wc_weaknesses": "60;114;82;134", "wc_questions": "376;2;72;56", "wc_limitations": "7;20;101;9", "wc_review": "509;288;389;330", "wc_reply_reviewers": "25;17;37;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 72.25, 30.34283276162593 ], "wc_strengths_avg": [ 48.5, 25.243811122728676 ], "wc_weaknesses_avg": [ 97.5, 28.508770580296865 ], "wc_questions_avg": [ 126.5, 146.36512562765762 ], "wc_limitations_avg": [ 34.25, 38.854697270729055 ], "wc_review_avg": [ 379.0, 83.18954261203749 ], "wc_reply_reviewers_avg": [ 22.5, 9.733961166965893 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8441401510716049672&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "cs.wisc.edu;brown.edu;wisc.edu;wisc.edu;google.com;wisc.edu", "author_num": 6, "aff_unique_index": "0;1;2;2;3;2", "aff_unique_norm": "University of Wisconsin-Madison;Brown University;University of Wisconsin;Google", "aff_unique_dep": "Department of Computer Science;;;Google", "aff_unique_url": "https://www.wisc.edu;https://www.brown.edu;https://www.wisc.edu;https://www.google.com", "aff_unique_abbr": "UW-Madison;Brown;UW;Google", "aff_campus_unique_index": "0;0;0;2;0", "aff_campus_unique": "Madison;;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient Online Clustering with Moving Costs", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72943", "id": "2doqt9r0r0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4ff08be7b0105049ff3e0ce3d70658c5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2doqt9r0r0", "openreview": "https://openreview.net/forum?id=2doqt9r0r0", "poster": "/media/PosterPDFs/NeurIPS%202023/72943.png?t=1702080552.296954", "slides": "https://nips.cc/virtual/2023/poster/72943", "video": "https://nips.cc/virtual/2023/poster/72943", "author_site": "Dimitrios Christou, Stratis Skoulakis, Volkan Cevher", "tldr": "", "abstract": "In this work we consider an online learning problem, called Online $k$-Clustering with Moving Costs, at which a learner maintains a set of $k$ facilities over $T$ rounds so as to minimize the connection cost of an adversarially selected sequence of clients. The learner is informed on the positions of the clients at each round $t$ only after its facility-selection and can use this information to update its decision in the next round. However, updating the facility positions comes with an additional moving cost based on the moving distance of the facilities. We present the first $\\mathcal{O}(\\log n)$-regret polynomial-time online learning algorithm guaranteeing that the overall cost (connection $+$ moving) is at most $\\mathcal{O}(\\log n)$ times the time-averaged connection cost of the best fixed solution. Our work improves on the recent result of (Fotakis et al., 2021) establishing $\\mathcal{O}(k)$-regret guarantees only on the connection cost.", "keywords": "Online Learning;Regret Analysis;Clustering;k-Median", "primary_area": "", "supplementary_material": "/attachment/f29ff9bdfb09832fd844ece9e5387350cf26e944.zip", "author": "Dimitris Christou;EFSTRATIOS PANTELEIMON SKOULAKIS;Volkan Cevher", "authorids": "~Dimitris_Christou1;~EFSTRATIOS_PANTELEIMON_SKOULAKIS1;~Volkan_Cevher1", "gender": "M;M;M", "homepage": "http://lions.epfl.ch;;http://www.corelab.ntua.gr/~sskoul/", "dblp": "70/5301;244/9936;183/0979.html", "google_scholar": "https://scholar.google.ch/citations?user=hlWhzU8AAAAJ;t46iMM8AAAAJ;Juo2Tk8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Volkan_Cevher1;~Dimitrios_Christou1;~Stratis_Skoulakis2", "aff": "Amazon Development Center Germany;University of Texas at Austin;EPFL - EPF Lausanne", "aff_domain": "amazon.de;utexas.edu;epfl.ch", "position": "Amazon Scholar;PhD student;Postdoc", "bibtex": "@inproceedings{\nchristou2023efficient,\ntitle={Efficient Online Clustering with Moving Costs},\nauthor={Dimitris Christou and EFSTRATIOS PANTELEIMON SKOULAKIS and Volkan Cevher},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2doqt9r0r0}\n}", "github": "", "project": "", "reviewers": "d1Rn;wiCY;HLbh;5XPD", "pdf_size": 676035, "rating": "5;6;7;7", "confidence": "4;4;3;3", "soundness": "3;4;4;4", "novelty": "2;3;4;3", "presentation": "3;3;3;3", "wc_summary": "364;295;320;362", "wc_strengths": "36;32;52;99", "wc_weaknesses": "28;114;6;120", "wc_questions": "142;36;2;57", "wc_limitations": "4;1;2;3", "wc_review": "574;478;382;641", "wc_reply_reviewers": "0;70;0;109", "wc_reply_authors": "0;149;0;57", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 335.25, 29.13224158900238 ], "wc_strengths_avg": [ 54.75, 26.621185172715357 ], "wc_weaknesses_avg": [ 67.0, 50.6458290484024 ], "wc_questions_avg": [ 59.25, 51.64966117991482 ], "wc_limitations_avg": [ 2.5, 1.118033988749895 ], "wc_review_avg": [ 518.75, 97.926949814645 ], "wc_reply_reviewers_avg": [ 44.75, 46.826141203391934 ], "wc_reply_authors_avg": [ 51.5, 60.91182151274086 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2057145733279319125&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "amazon.de;utexas.edu;epfl.ch", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Amazon;University of Texas at Austin;EPFL", "aff_unique_dep": "Development Center;;", "aff_unique_url": "https://www.amazon.de;https://www.utexas.edu;https://www.epfl.ch", "aff_unique_abbr": "Amazon;UT Austin;EPFL", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Austin;Lausanne", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Germany;United States;Switzerland" }, { "title": "Bilevel Coreset Selection in Continual Learning: A New Formulation and Algorithm", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72942", "id": "2dtU9ZbgSN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a0251e494a7e75d59e06d37e646f46b7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2dtU9ZbgSN", "openreview": "https://openreview.net/forum?id=2dtU9ZbgSN", "poster": "/media/PosterPDFs/NeurIPS%202023/72942.png?t=1701672891.9849696", "slides": "https://nips.cc/virtual/2023/poster/72942", "video": "https://nips.cc/virtual/2023/poster/72942", "author_site": "Jie Hao, Kaiyi Ji, Mingrui Liu", "tldr": "", "abstract": "Coreset is a small set that provides a data summary for a large dataset, such that training solely on the small set achieves competitive performance compared with a large dataset. In rehearsal-based continual learning, the coreset is typically used in the memory replay buffer to stand for representative samples in previous tasks, and the coreset selection procedure is typically formulated as a bilevel problem. However, the typical bilevel formulation for coreset selection explicitly performs optimization over discrete decision variables with greedy search, which is computationally expensive. Several works consider other formulations to address this issue, but they ignore the nested nature of bilevel optimization problems and may not solve the bilevel coreset selection problem accurately. To address these issues, we propose a new bilevel formulation, where the inner problem tries to find a model which minimizes the expected training error sampled from a given probability distribution, and the outer problem aims to learn the probability distribution with approximately $K$ (coreset size) nonzero entries such that learned model in the inner problem minimizes the training error over the whole data. To ensure the learned probability has approximately $K$ nonzero entries, we introduce a novel regularizer based on the smoothed top-$K$ loss in the upper problem. We design a new optimization algorithm that provably converges to the $\\epsilon$-stationary point with $O(1/\\epsilon^4)$ computational complexity. We conduct extensive experiments in various settings in continual learning, including balanced data, imbalanced data, and label noise, to show that our proposed formulation and new algorithm significantly outperform competitive baselines. From bilevel optimization point of view, our algorithm significantly improves the vanilla greedy coreset selection method in terms of running time on continual learning benchmark datasets. The code is available at https://github.com/MingruiLiu-ML-Lab/Bilevel-Coreset-Selection-via-Regularization.", "keywords": "Coreset Selection;Continual Learning;Bilevel Optimization", "primary_area": "", "supplementary_material": "/attachment/baa4030a0f895ab693351dd4e010f13b195335e8.zip", "author": "Jie Hao;Kaiyi Ji;Mingrui Liu", "authorids": "~Jie_Hao3;~Kaiyi_Ji1;~Mingrui_Liu2", "gender": "M;M;", "homepage": "https://jhao6.github.io/JieHao.github.io/;https://cse.buffalo.edu/~kaiyiji/;https://mingrliu.github.io", "dblp": ";205/3164;", "google_scholar": "S8ZTkikAAAAJ;E0A3lSIAAAAJ;KFoEnFQAAAAJ", "orcid": ";;", "linkedin": ";;mingrui-liu-447a2aab/", "or_profile": "~Jie_Hao3;~Kaiyi_Ji1;~Mingrui_Liu2", "aff": "George Mason University;State University of New York at Buffalo;George Mason University", "aff_domain": "gmu.edu;buffalo.edu;gmu.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nhao2023bilevel,\ntitle={Bilevel Coreset Selection in Continual Learning: A New Formulation and Algorithm},\nauthor={Jie Hao and Kaiyi Ji and Mingrui Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2dtU9ZbgSN}\n}", "github": "", "project": "", "reviewers": "vGFs;b44u;AXy9;FPqc", "pdf_size": 664372, "rating": "5;6;7;7", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "95;65;129;82", "wc_strengths": "54;23;185;145", "wc_weaknesses": "206;110;107;31", "wc_questions": "55;26;59;39", "wc_limitations": "1;5;1;25", "wc_review": "411;229;481;322", "wc_reply_reviewers": "17;27;4;5", "wc_reply_authors": "102;405;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 92.75, 23.47738273317535 ], "wc_strengths_avg": [ 101.75, 65.73193668225515 ], "wc_weaknesses_avg": [ 113.5, 62.082606259724635 ], "wc_questions_avg": [ 44.75, 13.160072188251856 ], "wc_limitations_avg": [ 8.0, 9.9498743710662 ], "wc_review_avg": [ 360.75, 94.6635489510086 ], "wc_reply_reviewers_avg": [ 13.25, 9.443913383762052 ], "wc_reply_authors_avg": [ 126.75, 165.95688446099487 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10813476148308684047&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "gmu.edu;buffalo.edu;gmu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "George Mason University;State University of New York at Buffalo", "aff_unique_dep": ";", "aff_unique_url": "https://www.gmu.edu;https://www.buffalo.edu", "aff_unique_abbr": "GMU;SUNY Buffalo", "aff_campus_unique_index": "1", "aff_campus_unique": ";Buffalo", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Probabilistic Exponential Integrators", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72941", "id": "2dx5MNs2Ip", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7f64034009f4a5fa417a57e1a987c5cd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2dx5MNs2Ip", "openreview": "https://openreview.net/forum?id=2dx5MNs2Ip", "poster": "/media/PosterPDFs/NeurIPS%202023/72941.png?t=1702262437.8082037", "slides": "https://nips.cc/virtual/2023/poster/72941", "video": "https://nips.cc/virtual/2023/poster/72941", "author_site": "Nathanael Bosch, Philipp Hennig, Filip Tronarp", "tldr": "", "abstract": "Probabilistic solvers provide a flexible and efficient framework for simulation, uncertainty quantification, and inference in dynamical systems. However, like standard solvers, they suffer performance penalties for certain stiff systems, where small steps are required not for reasons of numerical accuracy but for the sake of stability. This issue is greatly alleviated in semi-linear problems by the probabilistic exponential integrators developed in this paper. By including the fast, linear dynamics in the prior, we arrive at a class of probabilistic integrators with favorable properties. Namely, they are proven to be L-stable, and in a certain case reduce to a classic exponential integrator---with the added benefit of providing a probabilistic account of the numerical error. The method is also generalized to arbitrary non-linear systems by imposing piece-wise semi-linearity on the prior via Jacobians of the vector field at the previous estimates, resulting in probabilistic exponential Rosenbrock methods. We evaluate the proposed methods on multiple stiff differential equations and demonstrate their improved stability and efficiency over established probabilistic solvers. The present contribution thus expands the range of problems that can be effectively tackled within probabilistic numerics.", "keywords": "Probabilistic numerics;differential equations;exponential integrators;Kalman filters;Gaussian processes", "primary_area": "", "supplementary_material": "/attachment/112ee6efccb653dd53a9eabdbdf0ba1d17475809.pdf", "author": "Nathanael Bosch;Philipp Hennig;Filip Tronarp", "authorids": "~Nathanael_Bosch1;~Philipp_Hennig1;~Filip_Tronarp1", "gender": "M;M;M", "homepage": "https://nathanaelbosch.github.io;http://mml.inf.uni-tuebingen.de;https://filtron.github.io/", "dblp": "264/9948;08/9077;184/0638", "google_scholar": "2vejDygAAAAJ;https://scholar.google.de/citations?user=UeG5w08AAAAJ;q0rtB0EAAAAJ", "orcid": "0000-0003-0139-4622;0000-0001-7293-6092;", "linkedin": ";;filip-tronarp-93097065/", "or_profile": "~Nathanael_Bosch1;~Philipp_Hennig1;~Filip_Tronarp1", "aff": "University of Tuebingen;University of T\u00fcbingen;University of Tuebingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "position": "PhD student;Full Professor;Postdoc", "bibtex": "@inproceedings{\nbosch2023probabilistic,\ntitle={Probabilistic Exponential Integrators},\nauthor={Nathanael Bosch and Philipp Hennig and Filip Tronarp},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2dx5MNs2Ip}\n}", "github": "", "project": "", "reviewers": "goRD;vChc;V9iy;Lu6G", "pdf_size": 1106218, "rating": "5;6;7;8", "confidence": "4;3;1;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "47;79;43;75", "wc_strengths": "24;39;63;42", "wc_weaknesses": "26;84;65;210", "wc_questions": "93;50;2;68", "wc_limitations": "8;3;2;7", "wc_review": "198;255;175;402", "wc_reply_reviewers": "22;9;9;24", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 61.0, 16.1245154965971 ], "wc_strengths_avg": [ 42.0, 13.910427743243556 ], "wc_weaknesses_avg": [ 96.25, 68.92160401499663 ], "wc_questions_avg": [ 53.25, 33.29695932063467 ], "wc_limitations_avg": [ 5.0, 2.5495097567963922 ], "wc_review_avg": [ 257.5, 88.3643027472067 ], "wc_reply_reviewers_avg": [ 16.0, 7.035623639735144 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.18257418583505533, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=820657576086249059&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Tuebingen;University of T\u00fcbingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Learning Invariant Representations with a Nonparametric Nadaraya-Watson Head", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72940", "id": "2ePf1sBgLU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0f6931a9e339a012a9909306d7c758b4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2ePf1sBgLU", "openreview": "https://openreview.net/forum?id=2ePf1sBgLU", "poster": "/media/PosterPDFs/NeurIPS%202023/72940.png?t=1702001452.173476", "slides": "https://nips.cc/virtual/2023/poster/72940", "video": "https://nips.cc/virtual/2023/poster/72940", "author_site": "Alan Wang, Minh Nguyen, Mert Sabuncu", "tldr": "", "abstract": "Machine learning models will often fail when deployed in an environment with a data distribution that is different than the training distribution. When multiple environments are available during training, many methods exist that learn representations which are invariant across the different distributions, with the hope that these representations will be transportable to unseen domains. In this work, we present a nonparametric strategy for learning invariant representations based on the recently-proposed Nadaraya-Watson (NW) head. The NW head makes a prediction by comparing the learned representations of the query to the elements of a support set that consists of labeled data. We demonstrate that by manipulating the support set, one can encode different causal assumptions. In particular, restricting the support set to a single environment encourages the model to learn invariant features that do not depend on the environment. We present a causally-motivated setup for our modeling and training strategy and validate on three challenging real-world domain generalization tasks in computer vision.", "keywords": "Invariant representations;causality;domain generalization", "primary_area": "", "supplementary_material": "/attachment/ad6adb3d0273c524f532918ba063b05ba7c1c4ad.pdf", "author": "Alan Q. Wang;Minh Nguyen;Mert R. Sabuncu", "authorids": "~Alan_Q._Wang1;~Minh_Nguyen2;~Mert_R._Sabuncu1", "gender": "M;M;M", "homepage": ";http://sabuncu.engineering.cornell.edu;https://alanqrwang.github.io", "dblp": "83/2833-2;36/4898;271/4734", "google_scholar": "Fv-9At4AAAAJ;;P7nRvlIAAAAJ", "orcid": ";;0000-0003-0149-6055", "linkedin": ";;", "or_profile": "~Minh_Nguyen2;~Mert_R._Sabuncu1;~Alan_Wang2", "aff": "Cornell University;Cornell Tech;Cornell University", "aff_domain": "cornell.edu;cornell.edu;cornell.edu", "position": "PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nwang2023learning,\ntitle={Learning Invariant Representations with a Nonparametric Nadaraya-Watson Head},\nauthor={Alan Q. Wang and Minh Nguyen and Mert R. Sabuncu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2ePf1sBgLU}\n}", "github": "", "project": "", "reviewers": "f7MY;xs7Q;48ug;d2Ui", "pdf_size": 8671964, "rating": "5;5;5;5", "confidence": "3;3;4;3", "soundness": "3;2;3;2", "novelty": "2;2;2;2", "presentation": "3;3;3;4", "wc_summary": "93;112;63;129", "wc_strengths": "105;48;53;132", "wc_weaknesses": "266;146;147;528", "wc_questions": "67;233;201;2", "wc_limitations": "1;10;5;11", "wc_review": "532;549;469;802", "wc_reply_reviewers": "42;34;148;85", "wc_reply_authors": "11;11;99;32", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 99.25, 24.498724456591614 ], "wc_strengths_avg": [ 84.5, 35.358874416474286 ], "wc_weaknesses_avg": [ 271.75, 155.7825006218606 ], "wc_questions_avg": [ 125.75, 94.7770409962244 ], "wc_limitations_avg": [ 6.75, 4.02336923485777 ], "wc_review_avg": [ 588.0, 127.09642009120478 ], "wc_reply_reviewers_avg": [ 77.25, 45.21822088494858 ], "wc_reply_authors_avg": [ 38.25, 36.10661296770994 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2552486482708475971&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "cornell.edu;cornell.edu;cornell.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York City", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Foundation Model is Efficient Multimodal Multitask Model Selector", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72939", "id": "2ep5PXEZiw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/687b7b2bdcc2ced577c0a989b44e7078-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2ep5PXEZiw", "openreview": "https://openreview.net/forum?id=2ep5PXEZiw", "poster": "/media/PosterPDFs/NeurIPS%202023/72939.png?t=1702302964.3871436", "slides": "https://nips.cc/virtual/2023/poster/72939", "video": "https://nips.cc/virtual/2023/poster/72939", "author_site": "Fanqing Meng, Wenqi Shao, zhanglin peng, Chonghe Jiang, Kaipeng Zhang, Yu Qiao, Ping Luo", "tldr": "", "abstract": "This paper investigates an under-explored but important problem: given a collection of pre-trained neural networks, predicting their performance on each multi-modal task without fine-tuning them, such as image recognition, referring, captioning, visual question answering, and text question answering.A brute-force approach is to finetune all models on all target datasets, bringing high computational costs. Although recent-advanced approaches employed lightweight metrics to measure models\u2019 transferability, they often depend heavily on the prior knowledge of a single task, making them inapplicable in a multi-modal multi-task scenario. To tackle this issue, we propose an efficient multi-task model selector (EMMS), which employs large-scale foundation models to transform diverse label formats such as categories, texts, and bounding boxes of different downstream tasks into a unified noisy label embedding. EMMS can estimate a model\u2019s transferability through a simple weighted linear regression, which can be efficiently solved by an alternating minimization algorithm with a convergence guarantee. Extensive experiments on 5 downstream tasks with 24 datasets show that EMMS is fast, effective, and generic enough to assess the transferability of pre-trained models, making it the first model selection method in the multi-task scenario. For instance, compared with the state- of-the-art method LogME enhanced by our label embeddings, EMMS achieves 9.0%, 26.3%, 20.1%, 54.8%, 12.2% performance gain on image recognition, referring, captioning, visual question answering, and text question answering, while bringing 5.13\u00d7, 6.29\u00d7, 3.59\u00d7, 6.19\u00d7, and 5.66\u00d7 speedup in wall-clock time, respectively. The code is available at https://github.com/OpenGVLab/Multitask-Model-Selector.", "keywords": "transfer learning;model selection;foundation model", "primary_area": "", "supplementary_material": "/attachment/7fb6e7909a0f6bed7d49282f020c1517a4011102.pdf", "author": "Fanqing Meng;Wenqi Shao;zhanglin peng;Chonghe Jiang;Kaipeng Zhang;Yu Qiao;Ping Luo", "authorids": "~Fanqing_Meng1;~Wenqi_Shao2;~zhanglin_peng1;~Chonghe_Jiang1;~Kaipeng_Zhang1;~Yu_Qiao1;~Ping_Luo2", "gender": "M;M;;M;M;;", "homepage": "https://github.com/FanqingM;https://wqshao126.github.io/;;;http://kpzhang93.github.io/;;", "dblp": ";227/3122;;354/8890.html;179/2126;;", "google_scholar": "iUIC-JEAAAAJ;Bs9mrwwAAAAJ;;;4OqZBmYAAAAJ;;", "orcid": "0000-0002-0920-3539;;;;;;", "linkedin": ";;;;;;", "or_profile": "~Fanqing_Meng1;~Wenqi_Shao2;~zhanglin_peng1;~Chonghe_Jiang1;~Kaipeng_Zhang1;~Yu_Qiao1;~Ping_Luo2", "aff": "Tongji University;Shanghai AI Laboratory;;Tongji University;Shanghai AI Laboratory;;", "aff_domain": "tongji.edu.cn;pjlab.org.cn;;tongji.edu.cn;pjlab.org.cn;;", "position": "Undergrad student;Researcher;;Undergrad student;Researcher;;", "bibtex": "@inproceedings{\nmeng2023foundation,\ntitle={Foundation Model is Efficient Multimodal Multitask Model Selector},\nauthor={Fanqing Meng and Wenqi Shao and zhanglin peng and Chonghe Jiang and Kaipeng Zhang and Yu Qiao and Ping Luo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2ep5PXEZiw}\n}", "github": "", "project": "", "reviewers": "z1Vu;vJKA;gNw5;kTYc;4U2K", "pdf_size": 1347464, "rating": "4;4;5;5;6", "confidence": "3;3;3;2;4", "soundness": "3;3;2;3;2", "novelty": "3;3;2;3;3", "presentation": "3;2;2;3;2", "wc_summary": "70;63;117;47;49", "wc_strengths": "56;110;138;67;47", "wc_weaknesses": "52;139;269;2;125", "wc_questions": "6;7;130;2;1", "wc_limitations": "1;1;107;2;1", "wc_review": "185;320;761;120;223", "wc_reply_reviewers": "0;0;22;74;0", "wc_reply_authors": "134;155;41;190;145", "reply_reviewers": "0;0;1;1;0", "reply_authors": "4;4;2;2;4", "rating_avg": [ 4.8, 0.7483314773547882 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 69.2, 25.396062686959958 ], "wc_strengths_avg": [ 83.6, 34.736724082734106 ], "wc_weaknesses_avg": [ 117.4, 90.66553920867621 ], "wc_questions_avg": [ 29.2, 50.45156092728945 ], "wc_limitations_avg": [ 22.4, 42.30177301248731 ], "wc_review_avg": [ 321.8, 228.95361975736483 ], "wc_reply_reviewers_avg": [ 19.2, 28.694250295137525 ], "wc_reply_authors_avg": [ 133.0, 49.682995078799344 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 3.2, 0.9797958971132712 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.42257712736425823, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15761484087073343115&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tongji.edu.cn;pjlab.org.cn;;tongji.edu.cn;pjlab.org.cn;;", "author_num": 7, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Tongji University;Shanghai AI Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.tongji.edu.cn;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "Tongji;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Effective Targeted Attacks for Adversarial Self-Supervised Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72938", "id": "2f0dlMZlNb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b28ae1166e1035c26b89d20f0286c9eb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2f0dlMZlNb", "openreview": "https://openreview.net/forum?id=2f0dlMZlNb", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72938", "video": "https://nips.cc/virtual/2023/poster/72938", "author_site": "Minseon Kim, Hyeonjeong Ha, Sooel Son, Sung Ju Hwang", "tldr": "", "abstract": "Recently, unsupervised adversarial training (AT) has been highlighted as a means of achieving robustness in models without any label information. Previous studies in unsupervised AT have mostly focused on implementing self-supervised learning (SSL) frameworks, which maximize the instance-wise classification loss to generate adversarial examples. However, we observe that simply maximizing the self-supervised training loss with an untargeted adversarial attack often results in generating ineffective adversaries that may not help improve the robustness of the trained model, especially for non-contrastive SSL frameworks without negative examples. To tackle this problem, we propose a novel positive mining for targeted adversarial attack to generate effective adversaries for adversarial SSL frameworks. Specifically, we introduce an algorithm that selects the most confusing yet similar target example for a given instance based on entropy and similarity, and subsequently perturbs the given instance towards the selected target. Our method demonstrates significant enhancements in robustness when applied to non-contrastive SSL frameworks, and less but consistent robustness improvements with contrastive SSL frameworks, on the benchmark datasets.", "keywords": "Adversarial self supervised learning;targeted attack;self supervised learning;contrastive learning;positive mining", "primary_area": "", "supplementary_material": "", "author": "Minseon Kim;Hyeonjeong Ha;Sooel Son;Sung Ju Hwang", "authorids": "~Minseon_Kim1;~Hyeonjeong_Ha1;~Sooel_Son1;~Sung_Ju_Hwang1", "gender": ";;;", "homepage": "https://kim-minseon.github.io/;https://hyeonjeongha.github.io/;;", "dblp": "247/5952;331/5333;;", "google_scholar": "ZwObZNwAAAAJ;https://scholar.google.com/citations?hl=ko;;", "orcid": ";;;", "linkedin": "minseon-kim-707a84174;hyeonjeong-ha-bb93b0285/;;", "or_profile": "~Minseon_Kim1;~Hyeonjeong_Ha1;~Sooel_Son1;~Sung_Ju_Hwang1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;;", "aff_domain": "kaist.ac.kr;kaist.edu;;", "position": "PhD student;MS student;;", "bibtex": "@inproceedings{\nkim2023effective,\ntitle={Effective Targeted Attacks for Adversarial Self-Supervised Learning},\nauthor={Minseon Kim and Hyeonjeong Ha and Sooel Son and Sung Ju Hwang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2f0dlMZlNb}\n}", "github": "", "project": "", "reviewers": "aDD2;A38P;Uc5D;zysz", "pdf_size": 1613946, "rating": "4;6;7;7", "confidence": "5;3;5;4", "soundness": "2;3;2;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "60;74;45;63", "wc_strengths": "31;17;54;40", "wc_weaknesses": "95;79;505;271", "wc_questions": "2;13;115;32", "wc_limitations": "1;7;4;7", "wc_review": "189;190;723;413", "wc_reply_reviewers": "40;36;204;90", "wc_reply_authors": "99;160;196;62", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 60.5, 10.35615758860399 ], "wc_strengths_avg": [ 35.5, 13.46291201783626 ], "wc_weaknesses_avg": [ 237.5, 171.8334949886081 ], "wc_questions_avg": [ 40.5, 44.33114029663573 ], "wc_limitations_avg": [ 4.75, 2.48746859276655 ], "wc_review_avg": [ 378.75, 218.6965649021493 ], "wc_reply_reviewers_avg": [ 92.5, 67.79933627993714 ], "wc_reply_authors_avg": [ 129.25, 52.054658773254864 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.24618298195866545, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17708801808636982420&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "kaist.ac.kr;kaist.edu;;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "TopP&R: Robust Support Estimation Approach for Evaluating Fidelity and Diversity in Generative Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72937", "id": "2gUCMr6fDY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/185969291540b3cd86e70c51e8af5d08-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2gUCMr6fDY", "openreview": "https://openreview.net/forum?id=2gUCMr6fDY", "poster": "/media/PosterPDFs/NeurIPS%202023/72937.png?t=1701863977.4500775", "slides": "https://nips.cc/virtual/2023/poster/72937", "video": "https://nips.cc/virtual/2023/poster/72937", "author_site": "Pum Jun Kim, Yoojin Jang, Jisu Kim, Jaejun Yoo", "tldr": "", "abstract": "We propose a robust and reliable evaluation metric for generative models called Topological Precision and Recall (TopP&R, pronounced \u201ctopper\u201d), which systematically estimates supports by retaining only topologically and statistically significant features with a certain level of confidence. Existing metrics, such as Inception Score (IS), Frechet Inception Distance (FID), and various Precision and Recall (P&R) variants, rely heavily on support estimates derived from sample features. However, the reliability of these estimates has been overlooked, even though the quality of the evaluation hinges entirely on their accuracy. In this paper, we demonstrate that current methods not only fail to accurately assess sample quality when support estimation is unreliable, but also yield inconsistent results. In contrast, TopP&R reliably evaluates the sample quality and ensures statistical consistency in its results. Our theoretical and experimental findings reveal that TopP&R provides a robust evaluation, accurately capturing the true trend of change in samples, even in the presence of outliers and non-independent and identically distributed (Non-IID) perturbations where other methods result in inaccurate support estimations. To our knowledge, TopP&R is the first evaluation metric specifically focused on the robust estimation of supports, offering statistical consistency under noise conditions.", "keywords": "GAN;Evaluation;Support Estimation", "primary_area": "", "supplementary_material": "/attachment/f35f89b0dae5f152f704436c879b244ced26694c.pdf", "author": "Pum Jun Kim;Yoojin Jang;Jisu Kim;Jaejun Yoo", "authorids": "~Pum_Jun_Kim1;~Yoojin_Jang1;~Jisu_Kim1;~Jaejun_Yoo1", "gender": "M;F;M;M", "homepage": ";;https://pages.saclay.inria.fr/jisu.kim/;", "dblp": "349/4625;96/2135-1;;141/8878-1", "google_scholar": "WGJgXskAAAAJ;-_a_pDYAAAAJ;;https://scholar.google.co.kr/citations?user=7NBlQw4AAAAJ", "orcid": "0000-0001-8220-0951;0000-0001-8150-3715;0000-0003-0573-4495;0000-0001-5252-9668", "linkedin": ";;;jaejunyoo/", "or_profile": "~Pum_Jun_Kim1;~Yoojin_Jang1;~Jisu_Kim1;~Jaejun_Yoo1", "aff": "Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology;INRIA;Ulsan National Institute of Science and Technology", "aff_domain": "unist.ac.kr;unist.ac.kr;inria.fr;unist.ac.kr", "position": "PhD student;MS student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nkim2023toppr,\ntitle={TopP\\&R: Robust Support Estimation Approach for Evaluating Fidelity and Diversity in Generative Models},\nauthor={Pum Jun Kim and Yoojin Jang and Jisu Kim and Jaejun Yoo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2gUCMr6fDY}\n}", "github": "", "project": "", "reviewers": "fxDp;PyMg;4EiX;UDSM;YKk3", "pdf_size": 2880965, "rating": "4;4;5;5;8", "confidence": "3;4;4;4;4", "soundness": "3;2;3;3;4", "novelty": "2;2;2;3;4", "presentation": "3;3;1;3;4", "wc_summary": "105;105;67;108;58", "wc_strengths": "166;67;49;261;48", "wc_weaknesses": "212;486;177;270;42", "wc_questions": "108;181;279;10;92", "wc_limitations": "116;17;9;1;19", "wc_review": "707;856;581;650;259", "wc_reply_reviewers": "0;136;38;46;0", "wc_reply_authors": "0;1056;0;158;0", "reply_reviewers": "0;2;1;1;0", "reply_authors": "1;3;1;2;1", "rating_avg": [ 5.2, 1.469693845669907 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 88.6, 21.527656630483495 ], "wc_strengths_avg": [ 118.2, 83.68369016720045 ], "wc_weaknesses_avg": [ 237.4, 145.14764896476967 ], "wc_questions_avg": [ 134.0, 90.60905032059435 ], "wc_limitations_avg": [ 32.4, 42.28285704632552 ], "wc_review_avg": [ 610.6, 197.77016964143 ], "wc_reply_reviewers_avg": [ 44.0, 49.75138189035557 ], "wc_reply_authors_avg": [ 242.8, 411.1789877899891 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.40824829046386313, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14001730375749969859&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "unist.ac.kr;unist.ac.kr;inria.fr;unist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Ulsan National Institute of Science and Technology;INRIA", "aff_unique_dep": ";", "aff_unique_url": "https://www.unist.ac.kr;https://www.inria.fr", "aff_unique_abbr": "UNIST;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "South Korea;France" }, { "title": "Mode Connectivity in Auction Design", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72936", "id": "2gn9WFlqJ4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a5e4907a40c0dcb8433a35c714ba9d79-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2gn9WFlqJ4", "openreview": "https://openreview.net/forum?id=2gn9WFlqJ4", "poster": "/media/PosterPDFs/NeurIPS%202023/72936.png?t=1699619244.9436107", "slides": "https://nips.cc/virtual/2023/poster/72936", "video": "https://nips.cc/virtual/2023/poster/72936", "author_site": "Christoph Hertrich, Yixin Tao, L\u00e1szl\u00f3 A. V\u00e9gh", "tldr": "", "abstract": "Optimal auction design is a fundamental problem in algorithmic game theory. This problem is notoriously difficult already in very simple settings. Recent work in differentiable economics showed that neural networks can efficiently learn known optimal auction mechanisms and discover interesting new ones. In an attempt to theoretically justify their empirical success, we focus on one of the first such networks, RochetNet, and a generalized version for affine maximizer auctions. We prove that they satisfy mode connectivity, i.e., locally optimal solutions are connected by a simple, piecewise linear path such that every solution on the path is almost as good as one of the two local optima. Mode connectivity has been recently investigated as an intriguing empirical and theoretically justifiable property of neural networks used for prediction problems. Our results give the first such analysis in the context of differentiable economics, where neural networks are used directly for solving non-convex optimization problems.", "keywords": "Differentiable Economics;Mechanism Design;Neural Network Theory;Mode Connectivity;RochetNet", "primary_area": "", "supplementary_material": "/attachment/ea1aa199327c8a6beafac8980b2ff3deaf4674c0.pdf", "author": "Christoph Hertrich;Yixin Tao;L\u00e1szl\u00f3 A. V\u00e9gh", "authorids": "~Christoph_Hertrich1;~Yixin_Tao1;~L\u00e1szl\u00f3_A._V\u00e9gh1", "gender": ";M;M", "homepage": "https://christophhertrich.gitlab.io;https://tomtao26.github.io/;https://personal.lse.ac.uk/vegh", "dblp": "234/8939;133/3849;12/2680", "google_scholar": "bbMbGU4AAAAJ;YQQ_K8YAAAAJ;2Q1BK6gAAAAJ", "orcid": "0000-0001-5646-8567;;0000-0003-1152-200X", "linkedin": ";;", "or_profile": "~Christoph_Hertrich1;~Yixin_Tao1;~L\u00e1szl\u00f3_A._V\u00e9gh1", "aff": "London School of Economics and Political Science;London School of Economics;London School of Economics and Political Science, University of London", "aff_domain": "lse.ac.uk;lse.ac.uk;lse.ac.uk", "position": "Postdoc;Postdoc;Full Professor", "bibtex": "@inproceedings{\nhertrich2023mode,\ntitle={Mode Connectivity in Auction Design},\nauthor={Christoph Hertrich and Yixin Tao and L{\\'a}szl{\\'o} A. V{\\'e}gh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2gn9WFlqJ4}\n}", "github": "", "project": "", "reviewers": "HRyy;aV1n;Cnd8;xpkv;hKdM", "pdf_size": 372702, "rating": "5;5;6;7;8", "confidence": "5;3;4;2;4", "soundness": "3;4;4;3;4", "novelty": "3;2;3;3;4", "presentation": "1;4;4;3;4", "wc_summary": "69;47;111;54;458", "wc_strengths": "38;33;89;46;48", "wc_weaknesses": "212;133;121;91;7", "wc_questions": "34;17;3;209;75", "wc_limitations": "10;5;1;1;41", "wc_review": "363;235;325;401;629", "wc_reply_reviewers": "42;10;23;260;8", "wc_reply_authors": "0;0;0;262;0", "reply_reviewers": "1;1;1;2;1", "reply_authors": "1;1;1;3;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 1.16619037896906 ], "wc_summary_avg": [ 147.8, 156.6823538245453 ], "wc_strengths_avg": [ 50.8, 19.85346317396539 ], "wc_weaknesses_avg": [ 112.8, 66.30957698553054 ], "wc_questions_avg": [ 67.6, 74.71438951099044 ], "wc_limitations_avg": [ 11.6, 15.067846561469889 ], "wc_review_avg": [ 390.6, 131.31580255247272 ], "wc_reply_reviewers_avg": [ 68.6, 96.46470857261737 ], "wc_reply_authors_avg": [ 52.4, 104.8 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.269069117598525, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16239977499417840986&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "lse.ac.uk;lse.ac.uk;lse.ac.uk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "London School of Economics and Political Science;London School of Economics", "aff_unique_dep": ";", "aff_unique_url": "https://www.lse.ac.uk;https://www.lse.ac.uk", "aff_unique_abbr": "LSE;LSE", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "What is the Inductive Bias of Flatness Regularization? A Study of Deep Matrix Factorization Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72935", "id": "2hQ7MBQApp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5927edd18c5dd83aa8936a4610c72029-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2hQ7MBQApp", "openreview": "https://openreview.net/forum?id=2hQ7MBQApp", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72935", "video": "https://nips.cc/virtual/2023/poster/72935", "author_site": "Khashayar Gatmiry, Zhiyuan Li, Tengyu Ma, Sashank Reddi, Stefanie Jegelka, Ching-Yao Chuang", "tldr": "", "abstract": "Recent works on over-parameterized neural networks have shown that the stochasticity in optimizers has the implicit regularization effect of minimizing the sharpness of the loss function (in particular, the trace of its Hessian) over the family zero-loss solutions. More explicit forms of flatness regularization also empirically improve the generalization performance. However, it remains unclear why and when flatness regularization leads to better generalization. \nThis work takes the first step towards understanding the inductive bias of the minimum trace of the Hessian solutions in an important setting: learning deep linear networks from linear measurements, also known as \\emph{deep matrix factorization}. We show that with the standard Restricted Isometry Property (RIP) on the measurements, minimizing the trace of Hessian is approximately equivalent to minimizing the Schatten 1-norm of the corresponding end-to-end matrix parameters (i.e., the product of all layer matrices), which in turn leads to better generalization.", "keywords": "Sharpness minimization;Deep learning;Matrix factorization;Deep linear networks;Implicit bias;SGD;Trace of Hessian regularizer", "primary_area": "", "supplementary_material": "/attachment/0f0ed4619c0edc162df334fe30e3e932f1b85e2d.pdf", "author": "Khashayar Gatmiry;Zhiyuan Li;Tengyu Ma;Sashank J. Reddi;Stefanie Jegelka;Ching-Yao Chuang", "authorids": "~Khashayar_Gatmiry1;~Zhiyuan_Li2;~Tengyu_Ma1;~Sashank_J._Reddi1;~Stefanie_Jegelka3;~Ching-Yao_Chuang1", "gender": "M;M;M;M;F;M", "homepage": "http://ce.sharif.edu/~kgatmiry/;https://zhiyuanli.ttic.edu;http://ai.stanford.edu/~tengyuma/;;http://people.csail.mit.edu/stefje/;https://chingyaoc.github.io/", "dblp": ";l/ZhiyuanLi;54/9061;50/10452;38/7003;190/7522", "google_scholar": ";https://scholar.google.com/citations?hl=en;i38QlUwAAAAJ;70lgwYwAAAAJ;gTWUZlsAAAAJ;fpUICd0AAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Khashayar_Gatmiry1;~Zhiyuan_Li2;~Tengyu_Ma1;~Sashank_J._Reddi1;~Stefanie_Jegelka3;~Ching-Yao_Chuang1", "aff": "Massachusetts Institute of Technology;Computer Science Department, Stanford University;Facebook AI Research;Google;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;cs.stanford.edu;fb.com;google.com;mit.edu;mit.edu", "position": "PhD student;Postdoc;Visiting Scientist;Research Scientist;Associate Professor;PhD student", "bibtex": "@inproceedings{\ngatmiry2023what,\ntitle={What is the Inductive Bias of Flatness Regularization? A Study of Deep Matrix Factorization Models},\nauthor={Khashayar Gatmiry and Zhiyuan Li and Tengyu Ma and Sashank J. Reddi and Stefanie Jegelka and Ching-Yao Chuang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2hQ7MBQApp}\n}", "github": "", "project": "", "reviewers": "sgkx;PGyv;JwkB;ecS4", "pdf_size": 1828121, "rating": "4;6;7;7", "confidence": "2;3;4;4", "soundness": "3;3;4;4", "novelty": "2;3;3;3", "presentation": "4;3;3;4", "wc_summary": "89;52;103;120", "wc_strengths": "143;52;37;172", "wc_weaknesses": "390;105;120;271", "wc_questions": "66;187;48;49", "wc_limitations": "12;42;13;9", "wc_review": "700;438;321;621", "wc_reply_reviewers": "207;29;8;26", "wc_reply_authors": "66;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 91.0, 25.0499500997507 ], "wc_strengths_avg": [ 101.0, 57.66714836022326 ], "wc_weaknesses_avg": [ 221.5, 116.9583259114117 ], "wc_questions_avg": [ 87.5, 57.88998186214952 ], "wc_limitations_avg": [ 19.0, 13.360389215887388 ], "wc_review_avg": [ 520.0, 149.1023138653455 ], "wc_reply_reviewers_avg": [ 67.5, 80.93979243857746 ], "wc_reply_authors_avg": [ 16.5, 28.578838324886476 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9847319278346618, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8139929201022589212&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mit.edu;cs.stanford.edu;fb.com;google.com;mit.edu;mit.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Stanford University;Meta;Google", "aff_unique_dep": ";Computer Science Department;Facebook AI Research;Google", "aff_unique_url": "https://web.mit.edu;https://www.stanford.edu;https://research.facebook.com;https://www.google.com", "aff_unique_abbr": "MIT;Stanford;FAIR;Google", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Estimating Propensity for Causality-based Recommendation without Exposure Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72934", "id": "2hhIDEHhkk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a237f11d6aad94f59a182d70405d3fdb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2hhIDEHhkk", "openreview": "https://openreview.net/forum?id=2hhIDEHhkk", "poster": "/media/PosterPDFs/NeurIPS%202023/72934.png?t=1701348288.7601101", "slides": "https://nips.cc/virtual/2023/poster/72934", "video": "https://nips.cc/virtual/2023/poster/72934", "author_site": "Zhongzhou Liu, Yuan Fang, Min Wu", "tldr": "", "abstract": "Causality-based recommendation systems focus on the causal effects of user-item interactions resulting from item exposure (i.e., which items are recommended or exposed to the user), as opposed to conventional correlation-based recommendation. They are gaining popularity due to their multi-sided benefits to users, sellers and platforms alike. However, existing causality-based recommendation methods require additional input in the form of exposure data and/or propensity scores (i.e., the probability of exposure) for training. Such data, crucial for modeling causality in recommendation, are often not available in real-world situations due to technical or privacy constraints. In this paper, we bridge the gap by proposing a new framework, called Propensity Estimation for Causality-based Recommendation (PropCare). It can estimate the propensity and exposure from a more practical setup, where only interaction data are available *without* any ground truth on exposure or propensity in training and inference. We demonstrate that, by relating the pairwise characteristics between propensity and item popularity, PropCare enables competitive causality-based recommendation given only the conventional interaction data. We further present a theoretical analysis on the bias of the causal effect under our model estimation. Finally, we empirically evaluate PropCare through both quantitative and qualitative experiments.", "keywords": "recommendation systems;causal effect;propensity score;propensity estimation", "primary_area": "", "supplementary_material": "", "author": "Zhongzhou Liu;Yuan Fang;Min Wu", "authorids": "~Zhongzhou_Liu1;~Yuan_Fang1;~Min_Wu2", "gender": "M;M;M", "homepage": "https://www.linkedin.com/in/zhongzhou-liu-101b29106/;http://www.yfang.site;https://sites.google.com/site/wumincf/", "dblp": "246/3207;22/981-1;16/0-8", "google_scholar": "UTteZS0AAAAJ;XkBJjPUAAAAJ;https://scholar.google.com.sg/citations?user=Hji1uWQAAAAJ", "orcid": "0000-0002-3345-1719;0000-0002-4265-5289;0000-0003-0977-3600", "linkedin": "zhongzhou-liu-101b29106/;;", "or_profile": "~Zhongzhou_Liu1;~Yuan_Fang1;~Min_Wu2", "aff": "Singapore Management University;Singapore Management University;Institute for Infocomm Research (I2R), A*STAR", "aff_domain": "smu.edu.sg;smu.edu.sg;i2r.a-star.edu.sg", "position": "PhD student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nliu2023estimating,\ntitle={Estimating Propensity for Causality-based Recommendation without Exposure Data},\nauthor={Zhongzhou Liu and Yuan Fang and Min Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2hhIDEHhkk}\n}", "github": "", "project": "", "reviewers": "Pdiw;KaLM;Vbya;5c9m;xH3J", "pdf_size": 556114, "rating": "3;4;6;7;8", "confidence": "5;4;4;5;4", "soundness": "2;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;3;4;3", "wc_summary": "41;42;88;162;100", "wc_strengths": "47;51;65;54;126", "wc_weaknesses": "233;134;210;1;110", "wc_questions": "70;39;56;130;2", "wc_limitations": "1;2;4;1;1", "wc_review": "392;268;423;348;339", "wc_reply_reviewers": "0;170;14;34;0", "wc_reply_authors": "0;381;14;16;0", "reply_reviewers": "0;2;1;1;0", "reply_authors": "1;3;2;2;1", "rating_avg": [ 5.6, 1.8547236990991407 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 86.6, 44.57622684795113 ], "wc_strengths_avg": [ 68.6, 29.31620712165883 ], "wc_weaknesses_avg": [ 137.6, 82.19148374375536 ], "wc_questions_avg": [ 59.4, 41.998095194901396 ], "wc_limitations_avg": [ 1.8, 1.1661903789690604 ], "wc_review_avg": [ 354.0, 52.653584873206874 ], "wc_reply_reviewers_avg": [ 43.6, 64.41614704404479 ], "wc_reply_authors_avg": [ 82.2, 149.55186391349324 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.26413527189768715, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9009253124329441424&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "email": "smu.edu.sg;smu.edu.sg;i2r.a-star.edu.sg", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Singapore Management University;Institute for Infocomm Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.smu.edu.sg;https://www.i2r.a-star.edu.sg", "aff_unique_abbr": "SMU;I2R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Neuro-symbolic Learning Yielding Logical Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72933", "id": "2ioRi2uwLR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4459c3c143db74ee52afebdf56836375-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2ioRi2uwLR", "openreview": "https://openreview.net/forum?id=2ioRi2uwLR", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72933", "video": "https://nips.cc/virtual/2023/poster/72933", "author_site": "Zenan Li, Yunpeng Huang, Zhaoyu Li, Yuan Yao, Jingwei Xu, Taolue Chen, Xiaoxing Ma, Jian Lu", "tldr": "", "abstract": "Neuro-symbolic systems combine the abilities of neural perception and logical reasoning. However, end-to-end learning of neuro-symbolic systems is still an unsolved challenge. This paper proposes a natural framework that fuses neural network training, symbol grounding, and logical constraint synthesis into a coherent and efficient end-to-end learning process. The capability of this framework comes from the improved interactions between the neural and the symbolic parts of the system in both the training and inference stages. Technically, to bridge the gap between the continuous neural network and the discrete logical constraint, we introduce a difference-of-convex programming technique to relax the logical constraints while maintaining their precision. We also employ cardinality constraints as the language for logical constraint learning and incorporate a trust region method to avoid the degeneracy of logical constraint in learning. Both theoretical analyses and empirical evaluations substantiate the effectiveness of the proposed framework.", "keywords": "Neuro-symbolic learning;logical constraint learning;symbol grounding;difference-of-convex relaxation", "primary_area": "", "supplementary_material": "/attachment/93324c4277df1ac0fd37ca5b04d93625ef345fa2.pdf", "author": "Zenan Li;Yunpeng Huang;Zhaoyu Li;Yuan Yao;Jingwei Xu;Taolue Chen;Xiaoxing Ma;Jian Lu", "authorids": "~Zenan_Li3;~Yunpeng_Huang3;~Zhaoyu_Li3;~Yuan_Yao7;~Jingwei_Xu3;~Taolue_Chen2;~Xiaoxing_Ma1;~Jian_Lu5", "gender": "M;M;M;M;M;;;M", "homepage": "https://lizn-zn.github.io/;https://cs.nju.edu.cn/ics/allpeople/index.html;https://www.zhaoyu-li.com/;;http://ics.nju.edu.cn/people/jingweixu/;;;https://cs.nju.edu.cn/ics/centers/index.html", "dblp": "242/2285;;;25/4120-1;148/9997-1;;;", "google_scholar": "eu4eqTcAAAAJ;whzKjcIAAAAJ;;;15maGTwAAAAJ;;;", "orcid": ";;;;;;;", "linkedin": ";;zhaoyu-li-9171892a5/;;;;;", "or_profile": "~Zenan_Li3;~Yunpeng_Huang3;~Zhaoyu_Li3;~Yuan_Yao7;~Jingwei_Xu3;~Taolue_Chen2;~Xiaoxing_Ma1;~Jian_Lu5", "aff": "Microsoft Research;Nanjing University;McGill University;Nanjing University;Nanjing University;;;Nanjing University", "aff_domain": "research.microsoft.com;nju.edu.cn;cs.mcgill.ca;nju.edu.cn;nju.edu.cn;;;nju.edu.cn", "position": "Intern;MS student;PhD student;Associate Professor;Assistant Professor;;;Full Professor", "bibtex": "@inproceedings{\nli2023neurosymbolic,\ntitle={Neuro-symbolic Learning Yielding Logical Constraints},\nauthor={Zenan Li and Yunpeng Huang and Zhaoyu Li and Yuan Yao and Jingwei Xu and Taolue Chen and Xiaoxing Ma and Jian Lu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2ioRi2uwLR}\n}", "github": "", "project": "", "reviewers": "X5tc;MqQr;MGh8;w2hx", "pdf_size": 3410010, "rating": "5;7;7;7", "confidence": "3;3;3;1", "soundness": "3;3;3;3", "novelty": "3;4;3;3", "presentation": "2;2;3;3", "wc_summary": "85;67;52;58", "wc_strengths": "72;57;235;38", "wc_weaknesses": "68;110;48;56", "wc_questions": "55;157;80;34", "wc_limitations": "23;25;6;16", "wc_review": "303;416;421;202", "wc_reply_reviewers": "0;26;6;5", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 65.5, 12.459935794377111 ], "wc_strengths_avg": [ 100.5, 78.58275892331599 ], "wc_weaknesses_avg": [ 70.5, 23.89037463080058 ], "wc_questions_avg": [ 81.5, 46.53224688320993 ], "wc_limitations_avg": [ 17.5, 7.433034373659253 ], "wc_review_avg": [ 335.5, 90.37283883999662 ], "wc_reply_reviewers_avg": [ 9.25, 9.934158243152764 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5050672087291605524&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "research.microsoft.com;nju.edu.cn;cs.mcgill.ca;nju.edu.cn;nju.edu.cn;;;nju.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "Microsoft;Nanjing University;McGill University", "aff_unique_dep": "Microsoft Research;;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.nju.edu.cn;https://www.mcgill.ca", "aff_unique_abbr": "MSR;Nanjing U;McGill", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1;1;1", "aff_country_unique": "United States;China;Canada" }, { "title": "A Combinatorial Algorithm for Approximating the Optimal Transport in the Parallel and MPC Settings", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72932", "id": "2izFpGERjU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/448444518637da106d978ae7409d9789-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2izFpGERjU", "openreview": "https://openreview.net/forum?id=2izFpGERjU", "poster": "/media/PosterPDFs/NeurIPS%202023/72932.png?t=1702054272.0042584", "slides": "https://nips.cc/virtual/2023/poster/72932", "video": "https://nips.cc/virtual/2023/poster/72932", "author_site": "Nathaniel Lahn, Sharath Raghvendra, Kaiyi Zhang", "tldr": "", "abstract": "Optimal Transport is a popular distance metric for measuring similarity between distributions. Exact and approximate combinatorial algorithms for computing the optimal transport distance are hard to parallelize. This has motivated the development of numerical solvers (e.g. Sinkhorn method) that can exploit GPU parallelism and produce approximate solutions. \n\nWe introduce the first parallel combinatorial algorithm to find an additive $\\varepsilon$-approximation of the OT distance. The parallel complexity of our algorithm is $O(\\log(n)/ \\varepsilon^2)$ where $n$ is the total support size for the input distributions. In Massive Parallel Computation (MPC) frameworks such as Hadoop and MapReduce, our algorithm computes an $\\varepsilon$-approximate transport plan in $O(\\log (\\log (n/\\varepsilon))/\\varepsilon^2)$ rounds with $O(n/\\varepsilon)$ space per machine; all prior algorithms in the MPC framework take $\\Omega(\\log n)$ rounds. \nWe also provide a GPU-friendly matrix-based interpretation of our algorithm where each step of the algorithm is row or column manipulation of the matrix. Experiments suggest that our combinatorial algorithm is faster than the state-of-the-art approximate solvers in the GPU, especially for higher values of $n$.", "keywords": "Optimal Transport;Combinatorial Optimization", "primary_area": "", "supplementary_material": "/attachment/8921afa945712fed789baa22211b6cafab88b904.zip", "author": "Nathaniel Lahn;Sharath Raghvendra;Kaiyi Zhang", "authorids": "~Nathaniel_Lahn1;~Sharath_Raghvendra1;~Kaiyi_Zhang2", "gender": "M;M;M", "homepage": "http://people.cs.vt.edu/~sharathr/;https://kaiyiz.github.io/;", "dblp": "149/2582;254/0055-4;211/8127", "google_scholar": "https://scholar.google.com.tw/citations?user=kOfRa7MAAAAJ;n-Hg5SwAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Sharath_Raghvendra1;~Kaiyi_Zhang2;~Nathaniel_Adam_Lahn1", "aff": "Virginia Tech;Virginia Polytechnic Institute and State University;Radford University", "aff_domain": "vt.edu;vt.edu;radford.edu", "position": "Associate Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlahn2023a,\ntitle={A Combinatorial Algorithm for Approximating the Optimal Transport in the Parallel and {MPC} Settings},\nauthor={Nathaniel Lahn and Sharath Raghvendra and Kaiyi Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2izFpGERjU}\n}", "github": "", "project": "", "reviewers": "i1bb;dyx3;pKrQ;mqPb", "pdf_size": 1031136, "rating": "6;6;6;6", "confidence": "4;3;2;3", "soundness": "3;4;4;3", "novelty": "3;3;2;3", "presentation": "3;4;4;3", "wc_summary": "163;84;72;201", "wc_strengths": "105;87;23;69", "wc_weaknesses": "65;70;142;131", "wc_questions": "241;109;39;59", "wc_limitations": "1;5;1;8", "wc_review": "575;355;277;468", "wc_reply_reviewers": "24;15;34;35", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 130.0, 53.874854988203914 ], "wc_strengths_avg": [ 71.0, 30.495901363953813 ], "wc_weaknesses_avg": [ 102.0, 34.763486591537394 ], "wc_questions_avg": [ 112.0, 78.7210264160726 ], "wc_limitations_avg": [ 3.75, 2.947456530637899 ], "wc_review_avg": [ 418.75, 112.91230003856975 ], "wc_reply_reviewers_avg": [ 27.0, 8.154753215150045 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6286579725494222726&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "vt.edu;vt.edu;radford.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Virginia Tech;Radford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.vt.edu;https://www.radford.edu", "aff_unique_abbr": "VT;RU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Memory-Efficient Fine-Tuning of Compressed Large Language Models via sub-4-bit Integer Quantization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72931", "id": "2jUKhUrBxP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7183f4fc87598f6c6e947b96714acbd6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2jUKhUrBxP", "openreview": "https://openreview.net/forum?id=2jUKhUrBxP", "poster": "/media/PosterPDFs/NeurIPS%202023/72931.png?t=1701751656.94606", "slides": "https://nips.cc/virtual/2023/poster/72931", "video": "https://nips.cc/virtual/2023/poster/72931", "author_site": "Jeonghoon Kim, Jung Hyun Lee, Sungdong Kim, Joonsuk Park, Kang Min Yoo, Se Jung Kwon, Dongsoo Lee", "tldr": "", "abstract": "Large language models (LLMs) face the challenges in fine-tuning and deployment due to their high memory demands and computational costs. While parameter-efficient fine-tuning (PEFT) methods aim to reduce the memory usage of the optimizer state during fine-tuning, the inherent size of pre-trained LLM weights continues to be a pressing concern. \nEven though quantization techniques are widely proposed to ease memory demands and accelerate LLM inference, most of these techniques are geared towards the deployment phase.\nTo bridge this gap, this paper presents Parameter-Efficient and Quantization-aware Adaptation (PEQA) \u2013 a simple yet effective method that combines the advantages of PEFT with quantized LLMs. \nBy updating solely the quantization scales, PEQA can be directly applied to quantized LLMs, ensuring seamless task transitions. Parallel to existing PEFT methods, PEQA significantly reduces the memory overhead associated with the optimizer state. Furthermore, it leverages the advantages of quantization to substantially reduce model sizes. Even after fine-tuning, the quantization structure of a PEQA-tuned LLM remains intact, allowing for accelerated inference on the deployment stage.\nWe employ PEQA-tuning for task-specific adaptation on LLMs with up to $65$ billion parameters. To assess the logical reasoning and language comprehension of PEQA-tuned LLMs, we fine-tune low-bit quantized LLMs using a instruction dataset. \nOur results show that even when LLMs are quantized to below 4-bit precision, their capabilities in language modeling, few-shot in-context learning, and comprehension can be resiliently restored to (or even improved over) their full-precision original performances with PEQA.", "keywords": "Large Language Models;Parameter-Efficient Fine-Tuning;Neural Network Quantization", "primary_area": "", "supplementary_material": "", "author": "Jeonghoon Kim;Jung Hyun Lee;Sungdong Kim;Joonsuk Park;Kang Min Yoo;Se Jung Kwon;Dongsoo Lee", "authorids": "~Jeonghoon_Kim1;~Jung_Hyun_Lee1;~Sungdong_Kim1;~Joonsuk_Park1;~Kang_Min_Yoo2;~Se_Jung_Kwon1;~Dongsoo_Lee1", "gender": "M;M;;M;M;M;M", "homepage": ";;;http://www.joonsuk.org;;;", "dblp": ";132/2899;118/1568;50/9717;163/5657;119/5676;11/9680", "google_scholar": "https://scholar.google.com/citations?hl=ko;;xKrSnDoAAAAJ;3SPMM3oAAAAJ;BqaWtH8AAAAJ;https://scholar.google.co.kr/citations?user=8eTxKOkAAAAJ;ALiieEkAAAAJ", "orcid": "0000-0002-6068-6476;;;0000-0002-1182-4836;;;", "linkedin": "jeonghoon-kim-804892175/;;;;;se-jung-kwon-305503175/;", "or_profile": "~Jeonghoon_Kim1;~Jung_Hyun_Lee1;~Sungdong_Kim1;~Joonsuk_Park1;~Kang_Min_Yoo2;~Se_Jung_Kwon1;~Dongsoo_Lee1", "aff": "NAVER;NAVER CLOVA;NAVER;University of Richmond;NAVER;NAVER Cloud;NAVER CLOVA", "aff_domain": "navercorp.com;navercorp.com;navercorp.com;richmond.edu;navercorp.com;navercorp.com;navercorp.com", "position": "Researcher;Researcher;Researcher;Assistant Professor;Researcher;AI Researcher;Executive Officer", "bibtex": "@inproceedings{\nkim2023memoryefficient,\ntitle={Memory-Efficient Fine-Tuning of Compressed Large Language Models via sub-4-bit Integer Quantization},\nauthor={Jeonghoon Kim and Jung Hyun Lee and Sungdong Kim and Joonsuk Park and Kang Min Yoo and Se Jung Kwon and Dongsoo Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2jUKhUrBxP}\n}", "github": "", "project": "", "reviewers": "9H2h;qn8N;rGqm;jX2S", "pdf_size": 651040, "rating": "5;5;5;5", "confidence": "3;4;5;5", "soundness": "4;2;2;3", "novelty": "2;3;2;2", "presentation": "4;3;2;3", "wc_summary": "86;72;52;105", "wc_strengths": "20;46;69;129", "wc_weaknesses": "28;71;169;354", "wc_questions": "60;2;86;127", "wc_limitations": "15;2;2;10", "wc_review": "209;193;378;725", "wc_reply_reviewers": "17;0;14;50", "wc_reply_authors": "22;228;28;74", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 78.75, 19.382659776202026 ], "wc_strengths_avg": [ 66.0, 40.29267923581156 ], "wc_weaknesses_avg": [ 155.5, 125.48007810007132 ], "wc_questions_avg": [ 68.75, 45.33969011804117 ], "wc_limitations_avg": [ 7.25, 5.539629951540085 ], "wc_review_avg": [ 376.25, 213.99926985856752 ], "wc_reply_reviewers_avg": [ 20.25, 18.335416548308903 ], "wc_reply_authors_avg": [ 88.0, 83.2946576918352 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 110, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16342210724917451412&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "navercorp.com;navercorp.com;navercorp.com;richmond.edu;navercorp.com;navercorp.com;navercorp.com", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0;0", "aff_unique_norm": "NAVER Corporation;University of Richmond", "aff_unique_dep": ";", "aff_unique_url": "https://www.naver.com;https://www.richmond.edu", "aff_unique_abbr": "NAVER;UR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "South Korea;United States" }, { "title": "Density of States Prediction of Crystalline Materials via Prompt-guided Multi-Modal Transformer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72930", "id": "2lWh1G1W1I", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c23fdcb9f8e28af705a87de1375a705c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2lWh1G1W1I", "openreview": "https://openreview.net/forum?id=2lWh1G1W1I", "poster": "/media/PosterPDFs/NeurIPS%202023/72930.png?t=1699444060.9427392", "slides": "https://nips.cc/virtual/2023/poster/72930", "video": "https://nips.cc/virtual/2023/poster/72930", "author_site": "Namkyeong Lee, Heewoong Noh, Sungwon Kim, Dongmin Hyun, Gyoung S. Na, Chanyoung Park", "tldr": "", "abstract": "The density of states (DOS) is a spectral property of crystalline materials, which provides fundamental insights into various characteristics of the materials.\nWhile previous works mainly focus on obtaining high-quality representations of crystalline materials for DOS prediction, we focus on predicting the DOS from the obtained representations by reflecting the nature of DOS: DOS determines the general distribution of states as a function of energy.\nThat is, DOS is not solely determined by the crystalline material but also by the energy levels, which has been neglected in previous works.\nIn this paper, we propose to integrate heterogeneous information obtained from the crystalline materials and the energies via a multi-modal transformer, thereby modeling the complex relationships between the atoms in the crystalline materials and various energy levels for DOS prediction.\nMoreover, we propose to utilize prompts to guide the model to learn the crystal structural system-specific interactions between crystalline materials and energies.\nExtensive experiments on two types of DOS, i.e., Phonon DOS and Electron DOS, with various real-world scenarios demonstrate the superiority of DOSTransformer.\nThe source code for DOSTransformer is available at https://github.com/HeewoongNoh/DOSTransformer.", "keywords": "ML4Materials;AI4Science;Graph Neural Networks", "primary_area": "", "supplementary_material": "/attachment/1be6f287cb1c6e5c556dd571f54dd9307fb463a9.zip", "author": "Namkyeong Lee;Heewoong Noh;Sungwon Kim;Dongmin Hyun;Gyoung S. Na;Chanyoung Park", "authorids": "~Namkyeong_Lee1;~Heewoong_Noh2;~Sungwon_Kim3;~Dongmin_Hyun1;~Gyoung_S._Na2;~Chanyoung_Park1", "gender": "M;M;M;;;M", "homepage": "https://namkyeong.github.io/;https://github.com/HeewoongNoh;https://sung-won-kim.github.io;https://dmhyun.github.io;;https://dsail.kaist.ac.kr/", "dblp": "308/0443;342/4417;59/5163-2;222/1225;;170/5430.html", "google_scholar": "88ZqjpwAAAAJ;VIAHsCMAAAAJ;https://scholar.google.co.kr/citations?hl=ko;UkmiMawAAAAJ;;lWk2LtQAAAAJ", "orcid": ";;0000-0001-8605-2618;0000-0001-7757-3227;;0000-0002-5957-5816", "linkedin": ";;sungwon-kim/;dmhyun/;;", "or_profile": "~Namkyeong_Lee1;~Heewoong_Noh2;~Sungwon_Kim3;~Dongmin_Hyun1;~Gyoung_S._Na2;~Chanyoung_Park1", "aff": "Korea Advanced Institute of Science & Technology;Korea University;Korea Advanced Institute of Science & Technology;Pohang University of Science and Technology;;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.edu;korea.ac.kr;kaist.ac.kr;postech.ac.kr;;kaist.ac.kr", "position": "MS student;Undergrad student;MS student;Postdoc;;Assistant Professor", "bibtex": "@inproceedings{\nlee2023density,\ntitle={Density of States Prediction of Crystalline Materials via Prompt-guided Multi-Modal Transformer},\nauthor={Namkyeong Lee and Heewoong Noh and Sungwon Kim and Dongmin Hyun and Gyoung S. Na and Chanyoung Park},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2lWh1G1W1I}\n}", "github": "", "project": "", "reviewers": "pieM;E5Yg;xyRJ;pHUW;9USD", "pdf_size": 1364757, "rating": "6;6;6;7;7", "confidence": "4;3;4;4;2", "soundness": "3;3;3;3;3", "novelty": "2;2;3;4;3", "presentation": "3;4;3;4;3", "wc_summary": "46;49;70;133;53", "wc_strengths": "36;53;45;140;84", "wc_weaknesses": "213;280;83;28;99", "wc_questions": "5;7;27;1;133", "wc_limitations": "8;29;5;1;25", "wc_review": "308;418;230;303;394", "wc_reply_reviewers": "48;36;0;31;20", "wc_reply_authors": "19;20;0;36;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;2;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 70.2, 32.480147782915026 ], "wc_strengths_avg": [ 71.6, 37.82380202993877 ], "wc_weaknesses_avg": [ 140.6, 92.08821857327897 ], "wc_questions_avg": [ 34.6, 50.014397926996985 ], "wc_limitations_avg": [ 13.6, 11.235657524150511 ], "wc_review_avg": [ 330.6, 67.89874814751741 ], "wc_reply_reviewers_avg": [ 27.0, 16.2234398325386 ], "wc_reply_authors_avg": [ 15.0, 13.65283853270081 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14598182067363600701&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "kaist.edu;korea.ac.kr;kaist.ac.kr;postech.ac.kr;;kaist.ac.kr", "author_num": 6, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Korea University;Pohang University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.korea.ac.kr;https://www.postech.ac.kr", "aff_unique_abbr": "KAIST;KU;POSTECH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pohang", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Double Auctions with Two-sided Bandit Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72929", "id": "2nTpPxJ5Bs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0bcfb525c8f8f07ae10a93d0b2a40e00-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2nTpPxJ5Bs", "openreview": "https://openreview.net/forum?id=2nTpPxJ5Bs", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72929", "video": "https://nips.cc/virtual/2023/poster/72929", "author_site": "Soumya Basu, Abishek Sankararaman", "tldr": "", "abstract": "Double Auction enables decentralized transfer of goods between multiple buyers and sellers, thus underpinning functioning of many online marketplaces. Buyers and sellers compete in these markets through bidding, but do not often know their own valuation a-priori. As the allocation and pricing happens through bids, the profitability of participants, hence sustainability of such markets, depends crucially on learning respective valuations through repeated interactions. We initiate the study of Double Auction markets under bandit feedback on both buyers' and sellers' side. We show with confidence bound based bidding, and `Average Pricing' there is an efficient price discovery among the participants. In particular, the regret on combined valuation of the buyers and the sellers -- a.k.a. the social regret -- is $O(\\log(T)/\\Delta)$ in $T$ rounds, where $\\Delta$ is the minimum price gap. Moreover, the buyers and sellers exchanging goods attain $O(\\sqrt{T})$ regret, individually. The buyers and sellers who do not benefit from exchange in turn only experience $O(\\log{T}/ \\Delta)$ regret individually in $T$ rounds. We augment our upper bound by showing that $\\omega(\\sqrt{T})$ individual regret, and $\\omega(\\log{T})$ social regret is unattainable in certain Double Auction markets. Our paper is the first to provide decentralized learning algorithms in a two-sided market where \\emph{both sides have uncertain preference} that need to be learned.", "keywords": "Double Auction;Markets;Bandits;Regret", "primary_area": "", "supplementary_material": "/attachment/88b4378dc8d4953585c56f664dee35f272f035ce.pdf", "author": "Soumya Basu;Abishek Sankararaman", "authorids": "~Soumya_Basu2;~Abishek_Sankararaman1", "gender": "M;M", "homepage": "https://basusoumya.github.io/;http://abishek90.github.io/", "dblp": "153/0318-1;https://dblp.uni-trier.de/pers/hd/s/Sankararaman:Abishek", "google_scholar": "VNQp_doAAAAJ;3T9FHn0AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Soumya_Basu2;~Abishek_Sankararaman1", "aff": "Google;Amazon", "aff_domain": "google.com;amazon.com", "position": "SWE;Researcher", "bibtex": "@inproceedings{\nbasu2023double,\ntitle={Double Auctions with Two-sided Bandit Feedback},\nauthor={Soumya Basu and Abishek Sankararaman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2nTpPxJ5Bs}\n}", "github": "", "project": "", "reviewers": "bD4t;51dr;BEVv;XUqU", "pdf_size": 949109, "rating": "5;5;7;8", "confidence": "3;3;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;2;3;4", "wc_summary": "102;153;262;111", "wc_strengths": "20;28;95;130", "wc_weaknesses": "91;107;186;39", "wc_questions": "26;25;1;77", "wc_limitations": "1;14;2;27", "wc_review": "240;327;546;384", "wc_reply_reviewers": "104;41;0;10", "wc_reply_authors": "82;35;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 157.0, 63.604245141342574 ], "wc_strengths_avg": [ 68.25, 46.03463370116026 ], "wc_weaknesses_avg": [ 105.75, 52.71325734575696 ], "wc_questions_avg": [ 32.25, 27.707174161216802 ], "wc_limitations_avg": [ 11.0, 10.559356040971437 ], "wc_review_avg": [ 374.25, 111.63416815652813 ], "wc_reply_reviewers_avg": [ 38.75, 40.59171713539598 ], "wc_reply_authors_avg": [ 29.25, 33.640563312762765 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9622504486493761, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1119412234927019752&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "google.com;amazon.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Google;Amazon", "aff_unique_dep": "Google;Amazon.com, Inc.", "aff_unique_url": "https://www.google.com;https://www.amazon.com", "aff_unique_abbr": "Google;Amazon", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "2pVogxJyDA", "title": "PromptCoT: Align Prompt Distribution via Adapted Chain of Thought", "track": "main", "status": "Reject", "tldr": "", "abstract": "Diffusion-based generative models have exhibited remarkable capability in the production of high-fidelity visual content such as images and videos. However, their performance is significantly contingent upon the quality of textual inputs, commonly referred to as \"prompts\". The process of traditional prompt engineering, while effective, necessitates empirical expertise and poses challenges for inexperienced users. In this paper, we introduce PromptCoT, an innovative enhancer that autonomously refines prompts for users. The design of PromptCoT is based on the observation that, prompts resembling textual information corresponding to high-quality images within the training set tend to yield superior generation performance. As such, we fine-tune the pre-trained Large Language Models (LLM) using a curated text dataset comprising solely of high-quality visual content descriptions. By doing so, the LLM becomes capable of capturing the distribution of high-quality training texts, enabling it to generate aligned continuations and revisions to boost the original texts. Nonetheless, one drawback of pre-trained LLMs is their tendency to generate extraneous or irrelevant information. To enhance the alignment between the original text prompts and the refined counterparts, we leverage the Chain-of-Thought (CoT) mechanism. CoT can extract and amalgamate crucial information from the aligned continuation and revision, enabling reasonable inferences based on the contextual cues to produce a more comprehensive and nuanced final output. Considering computational efficiency, instead of allocating a dedicated LLM for prompt enhancement to each individual model or dataset, we integrate adapters that facilitate dataset-specific adaptation, leveraging a shared pre-trained LLM as the foundation for this process. By fine-tuning these adapters independently, we can adapt PromptCoT to new datasets with minimal increase in training cost and memory usage. We assess the performance of PromptCoT on widely-used latent diffusion models for image and video generation to validate the effectiveness. The results demonstrate significant improvements in key performance metrics. ", "keywords": "text-to-image generative models;prompt engineering;Chain of Thought;parameter efficient adaptation;Large Language Models", "primary_area": "", "supplementary_material": "/attachment/f1325f5da4e0eb6f12608b1c1d6fad98ecf7113b.pdf", "author": "Junyi Yao;Yijiang Liu;Zhen Dong;Mingfei Guo;Jiashi Feng;Kurt Keutzer;Li Du;Daquan Zhou;Shanghang Zhang", "authorids": "~Junyi_Yao1;~Yijiang_Liu2;~Zhen_Dong3;~Mingfei_Guo1;~Jiashi_Feng1;~Kurt_Keutzer1;~Li_Du5;~Daquan_Zhou1;~Shanghang_Zhang4", "gender": "M;M;M;F;M;M;M;M;F", "homepage": ";;https://dong-zhen.com/;https://www.linkedin.com/in/mingfeiguo/;https://people.eecs.berkeley.edu/~keutzer/;;https://sites.google.com/site/jshfeng/;https://iscl.nju.edu.cn/main.psp;https://www.shanghangzhang.com/", "dblp": ";;;;k/KurtKeutzer.html;244/9623;56/8278;;95/11531", "google_scholar": ";uOyz518AAAAJ;czxMUzcAAAAJ;;ID9QePIAAAAJ;DdCAbWwAAAAJ;https://scholar.google.com.sg/citations?user=Q8iay0gAAAAJ;;voqw10cAAAAJ", "orcid": "0009-0002-1437-2836;0000-0001-5914-1607;;;0000-0003-3868-8501;;0000-0001-6843-0064;0000-0003-2687-6978;", "linkedin": ";;zhen-dong/;;kurtkeutzer/;;;;", "or_profile": "~Junyi_Yao1;~Yijiang_Liu2;~Zhen_Dong3;~Mingfei_Guo1;~Kurt_Keutzer1;~Zhou_Daquan1;~Jiashi_Feng2;~LI_DU4;~Shanghang_Zhang1", "aff": "Peking University;Nanjing Universiy;University of California, Berkeley;Stanford University;University of California, Berkeley;Bytedance;ByteDance;Nanjing University;Peking University", "aff_domain": "stu.pku.edu.cn;nju.edu.cn;berkeley.edu;stanford.edu;berkeley.edu;bytedance.com;bytedance.com;nju.edu.cn;pku.edu.cn", "position": "Undergrad student;PhD student;Postdoc;MS student;Full Professor;Researcher;Research Lead;Associate Professor;Assistant Professor", "bibtex": "@misc{\nyao2023promptcot,\ntitle={PromptCoT: Align Prompt Distribution via Adapted Chain of Thought},\nauthor={Junyi Yao and Yijiang Liu and Zhen Dong and Mingfei Guo and Jiashi Feng and Kurt Keutzer and Li Du and Daquan Zhou and Shanghang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=2pVogxJyDA}\n}", "github": "", "project": "", "reviewers": "2oA6;MonK;u5y1;3t2z", "site": "https://openreview.net/forum?id=2pVogxJyDA", "pdf_size": 8779763, "rating": "4;5;5;7", "confidence": "5;3;4;4", "soundness": "2;2;3;4", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "47;106;80;128", "wc_strengths": "28;14;54;127", "wc_weaknesses": "159;131;95;60", "wc_questions": "5;20;37;6", "wc_limitations": "6;38;5;11", "wc_review": "245;309;271;332", "wc_reply_reviewers": "65;16;11;39", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 90.25, 30.202441954252638 ], "wc_strengths_avg": [ 55.75, 43.56819367382587 ], "wc_weaknesses_avg": [ 111.25, 37.28521825066872 ], "wc_questions_avg": [ 17.0, 12.98075498574717 ], "wc_limitations_avg": [ 15.0, 13.47219358530748 ], "wc_review_avg": [ 289.25, 33.573613150806395 ], "wc_reply_reviewers_avg": [ 32.75, 21.405314760591587 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.3244428422615251, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2816106117694485180&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;3;2;4;4;1;0", "aff_unique_norm": "Peking University;Nanjing University;University of California, Berkeley;Stanford University;ByteDance", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.pku.edu.cn;http://www.nju.edu.cn;https://www.berkeley.edu;https://www.stanford.edu;https://www.bytedance.com", "aff_unique_abbr": "Peking U;Nanjing U;UC Berkeley;Stanford;Bytedance", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Berkeley;Stanford", "aff_country_unique_index": "0;0;1;1;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "What Do Deep Saliency Models Learn about Visual Attention?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72928", "id": "2rq4LwwjfE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1e680f115a22d60cbc228a0c6dae5936-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2rq4LwwjfE", "openreview": "https://openreview.net/forum?id=2rq4LwwjfE", "poster": "/media/PosterPDFs/NeurIPS%202023/72928.png?t=1699828993.360555", "slides": "https://nips.cc/virtual/2023/poster/72928", "video": "https://nips.cc/virtual/2023/poster/72928", "author_site": "Shi Chen, Ming Jiang, Qi Zhao", "tldr": "", "abstract": "In recent years, deep saliency models have made significant progress in predicting human visual attention. However, the mechanisms behind their success remain largely unexplained due to the opaque nature of deep neural networks. In this paper, we present a novel analytic framework that sheds light on the implicit features learned by saliency models and provides principled interpretation and quantification of their contributions to saliency prediction. Our approach decomposes these implicit features into interpretable bases that are explicitly aligned with semantic attributes and reformulates saliency prediction as a weighted combination of probability maps connecting the bases and saliency. By applying our framework, we conduct extensive analyses from various perspectives, including the positive and negative weights of semantics, the impact of training data and architectural designs, the progressive influences of fine-tuning, and common error patterns of state-of-the-art deep saliency models. Additionally, we demonstrate the effectiveness of our framework by exploring visual attention characteristics in various application scenarios, such as the atypical attention of people with autism spectrum disorder, attention to emotion-eliciting stimuli, and attention evolution over time. Our code is publicly available at \\url{https://github.com/szzexpoi/saliency_analysis}.", "keywords": "Saliency prediction;human attention;low-level vision", "primary_area": "", "supplementary_material": "/attachment/6d92114c331fb95a23d7dd64c2d2179c67f07e48.pdf", "author": "Shi Chen;Ming Jiang;Qi Zhao", "authorids": "~Shi_Chen1;~Ming_Jiang1;~Qi_Zhao1", "gender": "M;F;M", "homepage": ";https://www-users.cs.umn.edu/~qzhao;", "dblp": "24/2311-1;05/490-1.html;", "google_scholar": "hwODS_4AAAAJ;;JbLTK4AAAAAJ", "orcid": ";;0000-0001-6439-5476", "linkedin": ";;jiangming", "or_profile": "~Shi_Chen1;~Qi_Zhao1;~Ming_Jiang5", "aff": "University of Minnesota, Twin Cities;University of Minnesota, Minneapolis;University of Minnesota, Minneapolis", "aff_domain": "umn.edu;cs.umn.edu;umn.edu", "position": "PhD student;Associate Professor;Researcher", "bibtex": "@inproceedings{\nchen2023what,\ntitle={What Do Deep Saliency Models Learn about Visual Attention?},\nauthor={Shi Chen and Ming Jiang and Qi Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2rq4LwwjfE}\n}", "github": "", "project": "", "reviewers": "UKcT;srbe;ZMfD;u4m5", "pdf_size": 4298924, "rating": "4;6;6;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;2;3", "wc_summary": "94;65;71;183", "wc_strengths": "33;111;73;236", "wc_weaknesses": "495;52;528;63", "wc_questions": "2;64;440;26", "wc_limitations": "30;14;5;300", "wc_review": "654;306;1117;808", "wc_reply_reviewers": "400;0;0;21", "wc_reply_authors": "985;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "4;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 103.25, 47.2989164780759 ], "wc_strengths_avg": [ 113.25, 76.04727148294013 ], "wc_weaknesses_avg": [ 284.5, 227.33290566919695 ], "wc_questions_avg": [ 133.0, 178.61970775925036 ], "wc_limitations_avg": [ 87.25, 123.15716584917014 ], "wc_review_avg": [ 721.25, 292.0182999402606 ], "wc_reply_reviewers_avg": [ 105.25, 170.38981043477924 ], "wc_reply_authors_avg": [ 246.25, 426.51751136383604 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10621976808396912672&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "umn.edu;cs.umn.edu;umn.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Minnesota", "aff_unique_dep": "", "aff_unique_url": "https://www.minnesota.edu", "aff_unique_abbr": "UMN", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Twin Cities;Minneapolis", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "MARBLE: Music Audio Representation Benchmark for Universal Evaluation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73709", "id": "2s7ZZUhEGS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7cbeec46f979618beafb4f46d8f39f36-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=2s7ZZUhEGS", "openreview": "https://openreview.net/forum?id=2s7ZZUhEGS", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73709", "video": "https://nips.cc/virtual/2023/poster/73709", "author_site": "Ruibin Yuan, Yinghao Ma, Yizhi Li, Ge Zhang, Xingran Chen, Hanzhi Yin, zhuo le, Yiqi Liu, Jiawen Huang, Zeyue Tian, Binyue Deng, Ningzhi Wang, Chenghua Lin, Emmanouil Benetos, Anton Ragni, Norbert Gyenge, Roger Dannenberg, Wenhu Chen, Gus Xia, Wei Xue, Si Liu, Shi Wang, Ruibo Liu, Yike Guo, Jie Fu", "tldr": "", "abstract": "In the era of extensive intersection between art and Artificial Intelligence (AI), such as image generation and fiction co-creation, AI for music remains relatively nascent, particularly in music understanding. This is evident in the limited work on deep music representations, the scarcity of large-scale datasets, and the absence of a universal and community-driven benchmark. To address this issue, we introduce the Music Audio Representation Benchmark for universaL Evaluation, termed MARBLE. It aims to provide a benchmark for various Music Information Retrieval (MIR) tasks by defining a comprehensive taxonomy with four hierarchy levels, including acoustic, performance, score, and high-level description. We then establish a unified protocol based on 18 tasks on 12 public-available datasets, providing a fair and standard assessment of representations of all open-sourced pre-trained models developed on music recordings as baselines. Besides, MARBLE offers an easy-to-use, extendable, and reproducible suite for the community, with a clear statement on copyright issues on datasets. Results suggest recently proposed large-scale pre-trained musical language models perform the best in most tasks, with room for further improvement. The leaderboard and toolkit repository are published to promote future music AI research.", "keywords": "music;representation learning;self-supervised learning;evaluation", "primary_area": "", "supplementary_material": "/attachment/7bfce2d343d924ac17abdbf0f7fc1ecb7b50f840.pdf", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nyuan2023marble,\ntitle={{MARBLE}: Music Audio Representation Benchmark for Universal Evaluation},\nauthor={Ruibin Yuan and Yinghao Ma and Yizhi LI and Ge Zhang and Xingran Chen and Hanzhi Yin and Le Zhuo and Yiqi Liu and Jiawen Huang and Zeyue Tian and Binyue Deng and Ningzhi Wang and Chenghua Lin and Emmanouil Benetos and Anton Ragni and Norbert Gyenge and Roger Dannenberg and Wenhu Chen and Gus Xia and Wei Xue and Si Liu and Shi Wang and Ruibo Liu and Yike Guo and Jie Fu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=2s7ZZUhEGS}\n}", "github": "", "project": "", "reviewers": "Gtoe;NaXM;fqYR;Rutv;XGB3", "pdf_size": 687097, "rating": "5;5;5;6;6", "confidence": "4;3;3;2;5", "wc_summary_and_contributions": "170;55;113;77;49", "wc_strengths": "15;50;51;40;70", "wc_improvement": "79;123;144;38;77", "wc_limitations": "1;84;9;92;54", "wc_correctness": "1;7;1;4;2", "wc_clarity": "1;8;1;4;7", "wc_relation_to_prior_work": "25;9;1;4;36", "wc_documentation": "1;1;12;3;8", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "294;338;333;263;304", "wc_reply_reviewers": "0;26;0;152;29", "wc_reply_authors": "311;152;117;564;300", "reply_reviewers": "0;1;0;2;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "wc_summary_and_contributions_avg": [ 92.8, 44.64258057057186 ], "wc_strengths_avg": [ 45.2, 17.948816116947658 ], "wc_improvement_avg": [ 92.2, 37.34916331057498 ], "wc_limitations_avg": [ 48.0, 37.41122826104484 ], "wc_correctness_avg": [ 3.0, 2.280350850198276 ], "wc_clarity_avg": [ 4.2, 2.9257477676655586 ], "wc_relation_to_prior_work_avg": [ 15.0, 13.371611720357423 ], "wc_documentation_avg": [ 5.0, 4.33589667773576 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 306.4, 27.383206532471686 ], "wc_reply_reviewers_avg": [ 41.4, 56.658979870802476 ], "wc_reply_authors_avg": [ 288.8, 157.8498020271169 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": 0.08006407690254358, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9647000426883667434&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "", "author_num": 1 }, { "title": "Training on Foveated Images Improves Robustness to Adversarial Attacks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72927", "id": "2tfG9QaFA7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e0c256700465c158de71081b4cf5e8c3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2tfG9QaFA7", "openreview": "https://openreview.net/forum?id=2tfG9QaFA7", "poster": "/media/PosterPDFs/NeurIPS%202023/72927.png?t=1702043524.5536292", "slides": "https://nips.cc/virtual/2023/poster/72927", "video": "https://nips.cc/virtual/2023/poster/72927", "author_site": "Muhammad Shah, Aqsa Kashaf, Bhiksha Raj", "tldr": "", "abstract": "Deep neural networks (DNNs) have been shown to be vulnerable to adversarial attacks\n-- subtle, perceptually indistinguishable perturbations of inputs that change the response of the model. In the context of vision, we hypothesize that an important contributor to the robustness of human visual perception is constant exposure to low-fidelity visual stimuli in our peripheral vision. To investigate this hypothesis, we develop RBlur, an image transform that simulates the loss in fidelity of peripheral vision by blurring the image and reducing its color saturation based on the distance from a given fixation point. We show that compared to DNNs trained on the original images, DNNs trained on images transformed by RBlur are substantially more robust to adversarial attacks, as well as other, non-adversarial, corruptions, achieving up to 25% higher accuracy on perturbed data.", "keywords": "adversarial robustness;computer vision;biologically-inspired;retina;blurring", "primary_area": "", "supplementary_material": "/attachment/a90671eee220b8a743c215b4fa84b91479829bdd.zip", "author": "Muhammad A Shah;Aqsa Kashaf;Bhiksha Raj", "authorids": "~Muhammad_A_Shah1;~Aqsa_Kashaf1;~Bhiksha_Raj1", "gender": ";F;M", "homepage": ";https://www.andrew.cmu.edu/user/akashaf/;https://www.cs.cmu.edu/directory/bhikshar/", "dblp": "142/5481;;60/3996", "google_scholar": "74MwzTcAAAAJ;7XROKrEAAAAJ;", "orcid": ";;", "linkedin": ";aqsakashaf/;", "or_profile": "~Muhammad_A_Shah1;~Aqsa_Kashaf1;~Bhiksha_Raj1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "cmu.edu;cmu.edu;mbzuai.ac.ae", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nshah2023training,\ntitle={Training on Foveated Images Improves Robustness to Adversarial Attacks},\nauthor={Muhammad A Shah and Aqsa Kashaf and Bhiksha Raj},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2tfG9QaFA7}\n}", "github": "", "project": "", "reviewers": "6sqj;74Y4;atHR;ysro", "pdf_size": 10653132, "rating": "5;6;7;7", "confidence": "4;3;5;3", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "88;84;60;82", "wc_strengths": "96;29;110;46", "wc_weaknesses": "133;111;166;243", "wc_questions": "237;227;136;186", "wc_limitations": "14;58;3;24", "wc_review": "568;509;475;581", "wc_reply_reviewers": "33;126;136;33", "wc_reply_authors": "219;774;40;0", "reply_reviewers": "2;2;1;1", "reply_authors": "3;2;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.5, 10.897247358851684 ], "wc_strengths_avg": [ 70.25, 33.662850443775554 ], "wc_weaknesses_avg": [ 163.25, 50.03186484631569 ], "wc_questions_avg": [ 196.5, 39.81519810323691 ], "wc_limitations_avg": [ 24.75, 20.58367071248469 ], "wc_review_avg": [ 533.25, 43.210965043609015 ], "wc_reply_reviewers_avg": [ 82.0, 49.12738543826651 ], "wc_reply_authors_avg": [ 258.25, 308.9760306237362 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16506538968533346522&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cmu.edu;cmu.edu;mbzuai.ac.ae", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://mbzuai.ac.ae", "aff_unique_abbr": "CMU;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;United Arab Emirates" }, { "title": "Compressed Video Prompt Tuning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72926", "id": "2vADOf3K00", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/656678aa961a99a6a3d59bfbf88daf77-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2vADOf3K00", "openreview": "https://openreview.net/forum?id=2vADOf3K00", "poster": "/media/PosterPDFs/NeurIPS%202023/72926.png?t=1701803898.9060218", "slides": "https://nips.cc/virtual/2023/poster/72926", "video": "https://nips.cc/virtual/2023/poster/72926", "author_site": "Bing Li, Jiaxin Chen, Xiuguo Bao, Di Huang", "tldr": "", "abstract": "Compressed videos offer a compelling alternative to raw videos, showing the possibility to significantly reduce the on-line computational and storage cost. However, current approaches to compressed video processing generally follow the resource-consuming pre-training and fine-tuning paradigm, which does not fully take advantage of such properties, making them not favorable enough for widespread applications. Inspired by recent successes of prompt tuning techniques in computer vision, this paper presents the first attempt to build a prompt based representation learning framework, which enables effective and efficient adaptation of pre-trained raw video models to compressed video understanding tasks. To this end, we propose a novel prompt tuning approach, namely Compressed Video Prompt Tuning (CVPT), emphatically dealing with the challenging issue caused by the inconsistency between pre-training and downstream data modalities. Specifically, CVPT replaces the learnable prompts with compressed modalities (\\emph{e.g.} Motion Vectors and Residuals) by re-parameterizing them into conditional prompts followed by layer-wise refinement. The conditional prompts exhibit improved adaptability and generalizability to instances compared to conventional individual learnable ones, and the Residual prompts enhance the noisy motion cues in the Motion Vector prompts for further fusion with the visual cues from I-frames. Additionally, we design Selective Cross-modal Complementary Prompt (SCCP) blocks. After inserting them into the backbone, SCCP blocks leverage semantic relations across diverse levels and modalities to improve cross-modal interactions between prompts and input flows. Extensive evaluations on HMDB-51, UCF-101 and Something-Something v2 demonstrate that CVPT remarkably outperforms the state-of-the-art counterparts, delivering a much better balance between accuracy and efficiency.", "keywords": "Compressed video;Action Recognition;Prompt Tuning", "primary_area": "", "supplementary_material": "", "author": "Bing Li;Jiaxin Chen;Xiuguo Bao;Di Huang", "authorids": "~Bing_Li15;~Jiaxin_Chen4;~Xiuguo_Bao3;~Di_Huang4", "gender": "M;M;M;M", "homepage": ";https://dblp.org/rec/conf/ijcai/LiCZB022;http://irip.buaa.edu.cn/dihuang/index.html;https://dr-jiaxin-chen.github.io/page/", "dblp": ";;45/780-1;65/1392-2.html", "google_scholar": "https://scholar.google.com.hk/citations?user=q2DP1C0AAAAJ;;https://scholar.google.com/citations?hl=en;eNlGf7EAAAAJ", "orcid": ";;0000-0002-2412-9330;", "linkedin": ";;;", "or_profile": "~Bing_Li15;~Xiuguo_Bao3;~Di_Huang4;~Jiaxin_Chen2", "aff": "Beihang University;Coordination Center of China;Beihang University;Beihang University", "aff_domain": "buaa.edu.cn;cert.org.cn;buaa.edu.cn;buaa.edu.cn", "position": "PhD student;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nli2023compressed,\ntitle={Compressed Video Prompt Tuning},\nauthor={Bing Li and Jiaxin Chen and Xiuguo Bao and Di Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2vADOf3K00}\n}", "github": "", "project": "", "reviewers": "p8gQ;5c9t;sECu;XoEQ;Eouy", "pdf_size": 1243258, "rating": "4;4;6;6;8", "confidence": "5;4;3;4;4", "soundness": "2;4;3;3;4", "novelty": "3;2;3;2;3", "presentation": "2;3;4;3;3", "wc_summary": "44;144;62;94;56", "wc_strengths": "83;65;39;40;34", "wc_weaknesses": "268;135;66;223;48", "wc_questions": "25;11;25;32;66", "wc_limitations": "8;3;1;9;1", "wc_review": "428;358;193;398;205", "wc_reply_reviewers": "119;163;0;89;0", "wc_reply_authors": "282;704;0;133;0", "reply_reviewers": "1;1;0;1;0", "reply_authors": "2;2;1;2;1", "rating_avg": [ 5.6, 1.4966629547095764 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 80.0, 36.02221536774216 ], "wc_strengths_avg": [ 52.2, 18.79787222001469 ], "wc_weaknesses_avg": [ 148.0, 85.92787673392145 ], "wc_questions_avg": [ 31.8, 18.410866356584094 ], "wc_limitations_avg": [ 4.4, 3.4409301068170506 ], "wc_review_avg": [ 316.4, 98.46948765988375 ], "wc_reply_reviewers_avg": [ 74.2, 64.99661529649063 ], "wc_reply_authors_avg": [ 223.8, 261.68561290219986 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8665551239626338257&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "buaa.edu.cn;cert.org.cn;buaa.edu.cn;buaa.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Beihang University;Coordination Center of China", "aff_unique_dep": ";", "aff_unique_url": "http://www.buaa.edu.cn/;", "aff_unique_abbr": "BUAA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "A Diffusion-Model of Joint Interactive Navigation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72925", "id": "2yXExAl0FW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aeeddfbab4e99763ebac9221732c80dd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2yXExAl0FW", "openreview": "https://openreview.net/forum?id=2yXExAl0FW", "poster": "/media/PosterPDFs/NeurIPS%202023/72925.png?t=1702262511.414226", "slides": "https://nips.cc/virtual/2023/poster/72925", "video": "https://nips.cc/virtual/2023/poster/72925", "author_site": "Matthew Niedoba, Jonathan Lavington, Yunpeng Liu, Vasileios Lioutas, Justice Sefas, Xiaoxuan Liang, Dylan Green, Setareh Dabiri, Berend Zwartsenberg, Adam Scibior, Frank Wood", "tldr": "", "abstract": "Simulation of autonomous vehicle systems requires that simulated traffic participants exhibit diverse and realistic behaviors. The use of prerecorded real-world traffic scenarios in simulation ensures realism but the rarity of safety critical events makes large scale collection of driving scenarios expensive. In this paper, we present DJINN -- a diffusion based method of generating traffic scenarios. Our approach jointly diffuses the trajectories of all agents, conditioned on a flexible set of state observations from the past, present, or future. On popular trajectory forecasting datasets, we report state of the art performance on joint trajectory metrics. In addition, we demonstrate how DJINN flexibly enables direct test-time sampling from a variety of valuable conditional distributions including goal-based sampling, behavior-class sampling, and scenario editing.", "keywords": "Diffusion Models;Trajecotry Forecasting;Autonomous Vehicles;Motion Forecasting;Simulation", "primary_area": "", "supplementary_material": "", "author": "Matthew Niedoba;Jonathan Wilder Lavington;Yunpeng Liu;Vasileios Lioutas;Justice Sefas;Xiaoxuan Liang;Dylan Green;Setareh Dabiri;Berend Zwartsenberg;Adam Scibior;Frank Wood", "authorids": "~Matthew_Niedoba2;~Jonathan_Wilder_Lavington1;~Yunpeng_Liu1;~Vasileios_Lioutas1;~Justice_Sefas1;liang51@cs.ubc.ca;~Dylan_Green1;~Setareh_Dabiri1;~Berend_Zwartsenberg1;~Adam_Scibior1;~Frank_Wood2", "gender": "M;M;M;M;M;;M;;M;;M", "homepage": ";https://wilderlavington.github.io/;;http://www.vlioutas.com/;;;;;https://bzwartsenberg.github.io/;https://www.cs.ubc.ca/~ascibior/;http://www.robots.ox.ac.uk/~fwood/", "dblp": "243/2863;282/4019;02/8137-7.html;224/6571;321/4203;;;;;167/6446;44/4750", "google_scholar": "uSl2vYwAAAAJ;;;2jhOrwoAAAAJ;https://scholar.google.com/citations?hl%3Den=;;;;;https://scholar.google.co.uk/citations?user=Gpw8Z0cAAAAJ;d4yNzXIAAAAJ", "orcid": ";;;;;;;;;;", "linkedin": ";;larry-liu-323b51126/;vasileioslioutas/;;;dylangreen90/;;;;frank-wood-43529114?trk=hp-identity-name", "or_profile": "~Matthew_Niedoba2;~Jonathan_Wilder_Lavington1;~Yunpeng_Liu1;~Vasileios_Lioutas1;~Justice_Sefas1;liang51@cs.ubc.ca;~Dylan_Green1;~Setareh_Dabiri1;~Berend_Zwartsenberg1;~Adam_Scibior1;~Frank_Wood2", "aff": "Inverted AI;;University of British Columbia;University of British Columbia;Inverted AI;;University of British Columbia;;Inverted AI;Inverted AI;University of British Columbia", "aff_domain": "inverted.ai;;cs.ubc.ca;ubc.ca;inverted.ai;;cs.ubc.ca;;inverted.ai;inverted.ai;cs.ubc.ca", "position": "Researcher;;PhD student;PhD student;Researcher;;MS student;;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nniedoba2023a,\ntitle={A Diffusion-Model of Joint Interactive Navigation},\nauthor={Matthew Niedoba and Jonathan Wilder Lavington and Yunpeng Liu and Vasileios Lioutas and Justice Sefas and Xiaoxuan Liang and Dylan Green and Setareh Dabiri and Berend Zwartsenberg and Adam Scibior and Frank Wood},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2yXExAl0FW}\n}", "github": "", "project": "", "reviewers": "1Rcd;TiFk;wJNr;1Qe5", "pdf_size": 2260224, "rating": "6;6;7;8", "confidence": "4;3;4;3", "soundness": "3;4;3;4", "novelty": "3;2;3;3", "presentation": "4;4;3;3", "wc_summary": "73;46;90;171", "wc_strengths": "99;29;151;303", "wc_weaknesses": "152;68;515;360", "wc_questions": "2;25;188;88", "wc_limitations": "12;18;35;88", "wc_review": "338;186;979;1010", "wc_reply_reviewers": "88;0;72;0", "wc_reply_authors": "0;0;91;0", "reply_reviewers": "1;0;2;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 95.0, 46.59935621872903 ], "wc_strengths_avg": [ 145.5, 100.71122082469262 ], "wc_weaknesses_avg": [ 273.75, 175.21183607279502 ], "wc_questions_avg": [ 75.75, 72.0498959055459 ], "wc_limitations_avg": [ 38.25, 29.93639089803579 ], "wc_review_avg": [ 628.25, 370.3338865132382 ], "wc_reply_reviewers_avg": [ 40.0, 40.39801975344831 ], "wc_reply_authors_avg": [ 22.75, 39.40415587219196 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13369325745653127784&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "inverted.ai;;cs.ubc.ca;ubc.ca;inverted.ai;;cs.ubc.ca;;inverted.ai;inverted.ai;cs.ubc.ca", "author_num": 11, "aff_unique_index": "0;1;1;0;1;0;0;1", "aff_unique_norm": "Inverted AI;University of British Columbia", "aff_unique_dep": ";", "aff_unique_url": "https://www.inverted.ai;https://www.ubc.ca", "aff_unique_abbr": "Inverted AI;UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1;0;0;1", "aff_country_unique": "United States;Canada" }, { "title": "Mutual Information Regularized Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72924", "id": "2z8noau98f", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3c6bd2021c10462c5164638d22f3d5d8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2z8noau98f", "openreview": "https://openreview.net/forum?id=2z8noau98f", "poster": "/media/PosterPDFs/NeurIPS%202023/72924.png?t=1702094386.5551014", "slides": "https://nips.cc/virtual/2023/poster/72924", "video": "https://nips.cc/virtual/2023/poster/72924", "author_site": "Xiao Ma, Bingyi Kang, Zhongwen Xu, Min Lin, Shuicheng Yan", "tldr": "", "abstract": "The major challenge of offline RL is the distribution shift that appears when out-of-distribution actions are queried, which makes the policy improvement direction biased by extrapolation errors. Most existing methods address this problem by penalizing the policy or value for deviating from the behavior policy during policy improvement or evaluation. In this work, we propose a novel MISA framework to approach offline RL from the perspective of Mutual Information between States and Actions in the dataset by directly constraining the policy improvement direction. MISA constructs lower bounds of mutual information parameterized by the policy and Q-values. We show that optimizing this lower bound is equivalent to maximizing the likelihood of a one-step improved policy on the offline dataset. Hence, we constrain the policy improvement direction to lie in the data manifold. The resulting algorithm simultaneously augments the policy evaluation and improvement by adding mutual information regularizations. MISA is a general framework that unifies conservative Q-learning (CQL) and behavior regularization methods (e.g., TD3+BC) as special cases. We introduce 3 different variants of MISA, and empirically demonstrate that tighter mutual information lower bound gives better offline RL performance. In addition, our extensive experiments show MISA significantly outperforms a wide range of baselines on various tasks of the D4RL benchmark, e.g., achieving 742.9 total points on gym-locomotion tasks. Our code is attached and will be released upon publication.", "keywords": "Mutual Information;Offline Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/e6dc1b2376b1b11b002ec5785b91c4fdd6750735.zip", "author": "Xiao Ma;Bingyi Kang;Zhongwen Xu;Min Lin;Shuicheng YAN", "authorids": "~Xiao_Ma2;~Bingyi_Kang1;~Zhongwen_Xu1;~Min_Lin1;~Shuicheng_YAN3", "gender": "M;;M;M;M", "homepage": "https://yusufma03.github.io/;https://bingykang.github.io/;https://zhongwen.one/;https://linmin.me;https://yanshuicheng.ai/", "dblp": "35/573-6;;130/5077;;y/ShuichengYan", "google_scholar": "hR4G6hoAAAAJ;https://scholar.google.com.sg/citations?user=NmHgX-wAAAAJ;https://scholar.google.co.uk/citations?user=T4xuHn8AAAAJ;BGONmkIAAAAJ;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ", "orcid": ";;;;", "linkedin": ";;;min-lin-08a3a422/;", "or_profile": "~Xiao_Ma2;~Bingyi_Kang1;~Zhongwen_Xu1;~Min_Lin1;~Shuicheng_YAN3", "aff": "SEA AI Lab;Sea AI Lab;Sea AI Lab;Sea AI Lab;sea Group", "aff_domain": "sea.com;sea.com;sea.com;sea.com;sea.com", "position": "Research Scientist;Researcher;Principal Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nma2023mutual,\ntitle={Mutual Information Regularized Offline Reinforcement Learning},\nauthor={Xiao Ma and Bingyi Kang and Zhongwen Xu and Min Lin and Shuicheng YAN},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=2z8noau98f}\n}", "github": "", "project": "", "reviewers": "TqeM;XdYK;2yR3;Jrh8", "pdf_size": 771551, "rating": "4;6;6;7", "confidence": "4;4;4;4", "soundness": "3;2;2;4", "novelty": "3;2;2;3", "presentation": "3;2;2;3", "wc_summary": "59;112;81;101", "wc_strengths": "44;37;58;86", "wc_weaknesses": "110;243;100;114", "wc_questions": "103;387;67;203", "wc_limitations": "15;12;4;1", "wc_review": "331;791;310;505", "wc_reply_reviewers": "140;30;48;13", "wc_reply_authors": "733;38;21;0", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 88.25, 20.216020874544032 ], "wc_strengths_avg": [ 56.25, 18.766659265836314 ], "wc_weaknesses_avg": [ 141.75, 58.67868011467197 ], "wc_questions_avg": [ 190.0, 124.17326604386308 ], "wc_limitations_avg": [ 8.0, 5.70087712549569 ], "wc_review_avg": [ 484.25, 192.59721571196195 ], "wc_reply_reviewers_avg": [ 57.75, 49.07328703072579 ], "wc_reply_authors_avg": [ 198.0, 309.17551649508084 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14536105478928511397&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "sea.com;sea.com;sea.com;sea.com;sea.com", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Sea AI Lab;Sea Group", "aff_unique_dep": ";", "aff_unique_url": ";", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "Unknown;" }, { "title": "Evolving Connectivity for Recurrent Spiking Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72923", "id": "30o4ARmfC3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/08f9de0232c0b485110237f6e6cf88f1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=30o4ARmfC3", "openreview": "https://openreview.net/forum?id=30o4ARmfC3", "poster": "/media/PosterPDFs/NeurIPS%202023/72923.png?t=1699616987.7587023", "slides": "https://nips.cc/virtual/2023/poster/72923", "video": "https://nips.cc/virtual/2023/poster/72923", "author_site": "Guan Wang, Yuhao Sun, Sijie Cheng, Sen Song", "tldr": "", "abstract": "Recurrent spiking neural networks (RSNNs) hold great potential for advancing artificial general intelligence, as they draw inspiration from the biological nervous system and show promise in modeling complex dynamics.\nHowever, the widely-used surrogate gradient-based training methods for RSNNs are inherently inaccurate and unfriendly to neuromorphic hardware.\nTo address these limitations, we propose the evolving connectivity (EC) framework, an inference-only method for training RSNNs.\nThe EC framework reformulates weight-tuning as a search into parameterized connection probability distributions, and employs Natural Evolution Strategies (NES) for optimizing these distributions.\nOur EC framework circumvents the need for gradients and features hardware-friendly characteristics, including sparse boolean connections and high scalability.\nWe evaluate EC on a series of standard robotic locomotion tasks, where it achieves comparable performance with deep neural networks and outperforms gradient-trained RSNNs, even solving the complex 17-DoF humanoid task.\nAdditionally, the EC framework demonstrates a two to three fold speedup in efficiency compared to directly evolving parameters.\nBy providing a performant and hardware-friendly alternative, the EC framework lays the groundwork for further energy-efficient applications of RSNNs and advances the development of neuromorphic devices.\nOur code is publicly available at https://github.com/imoneoi/EvolvingConnectivity.", "keywords": "neuromorphic computing;spiking neural networks;evolutionary algorithms;inference-only approach;hardware-friendly;robotic locomotion tasks", "primary_area": "", "supplementary_material": "", "author": "Guan Wang;Yuhao Sun;Sijie Cheng;Sen Song", "authorids": "~Guan_Wang3;~Yuhao_Sun1;~Sijie_Cheng1;~Sen_Song1", "gender": ";M;F;M", "homepage": ";;https://adacheng.github.io/;https://brain.tsinghua.edu.cn/en/info/1010/1012.htm", "dblp": ";;160/7320;33/3456", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;pruwctkAAAAJ;cYgtRP4AAAAJ", "orcid": ";;;0000-0001-5587-0730", "linkedin": ";;;", "or_profile": "~Guan_Wang3;~Yuhao_Sun1;~Sijie_Cheng1;~Sen_Song1", "aff": ";Tsinghua University;Fudan University;", "aff_domain": ";tsinghua.edu.cn;fdu.edu;", "position": ";PhD student;MS student;", "bibtex": "@inproceedings{\nwang2023evolving,\ntitle={Evolving Connectivity for Recurrent Spiking Neural Networks},\nauthor={Guan Wang and Yuhao Sun and Sijie Cheng and Sen Song},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=30o4ARmfC3}\n}", "github": "", "project": "", "reviewers": "mDuJ;SW36;Apb4;ho2W", "pdf_size": 1635794, "rating": "6;6;7;8", "confidence": "4;3;3;5", "soundness": "3;2;3;4", "novelty": "3;2;3;4", "presentation": "4;3;3;4", "wc_summary": "58;82;101;141", "wc_strengths": "1;25;89;174", "wc_weaknesses": "160;57;147;484", "wc_questions": "1;43;107;128", "wc_limitations": "2;3;35;14", "wc_review": "222;210;479;941", "wc_reply_reviewers": "70;123;132;155", "wc_reply_authors": "23;805;19;151", "reply_reviewers": "1;2;1;2", "reply_authors": "2;3;2;3", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 95.5, 30.36856927812043 ], "wc_strengths_avg": [ 72.25, 66.97527528872128 ], "wc_weaknesses_avg": [ 212.0, 161.97067635840753 ], "wc_questions_avg": [ 69.75, 50.55380796735297 ], "wc_limitations_avg": [ 13.5, 13.275918047351754 ], "wc_review_avg": [ 463.0, 296.1545204787528 ], "wc_reply_reviewers_avg": [ 120.0, 31.13679495388053 ], "wc_reply_authors_avg": [ 249.5, 325.0826817903408 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15708772956384049279&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";tsinghua.edu.cn;fdu.edu;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Tsinghua University;Fudan University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.fudan.edu.cn", "aff_unique_abbr": "THU;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "31zVEkOGYU", "title": "Enemy is Inside: Alleviating VAE's Overestimation in Unsupervised OOD Detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep generative models (DGMs) aim at characterizing the distribution of the training set by maximizing the marginal likelihood of inputs \nin an unsupervised manner, making them a promising option for unsupervised out-of-distribution (OOD) detection.\nHowever, recent works have reported that DGMs often assign higher likelihoods to OOD data than in-distribution (ID) data, $\\textit{i.e.}$, $\\textbf{\\textit{overestimation}}$, leading to their failures in OOD detection.\nAlthough several pioneer works have tried to analyze this phenomenon, and some VAE-based methods have also attempted to alleviate this issue by modifying their score functions for OOD detection, the root cause of the $\\textit{overestimation}$ in VAE has never been revealed to our best knowledge.\nTo fill this gap, this paper will provide a thorough theoretical analysis on the $\\textit{overestimation}$ issue of VAE, and reveal that this phenomenon arises from two Inside-Enemy aspects: 1) the improper design of prior distribution; 2) the gap of dataset entropies between ID and OOD datasets.\nBased on these findings, we propose a novel score function to $\\textbf{A}$lleviate $\\textbf{V}$AE's $\\textbf{O}$verestimation $\\textbf{I}$n unsupervised OOD $\\textbf{D}$etection, named $\\textbf{``AVOID''}$, which contains two novel techniques, specifically post-hoc prior and dataset entropy calibration.\nExperimental results verify our analysis, demonstrating that the proposed method is effective in alleviating $\\textit{overestimation}$ and improving unsupervised OOD detection performance.", "keywords": "Unsupervised Out-of-Distribution Detection;VAE", "primary_area": "", "supplementary_material": "/attachment/6f562f20a66b0d582ecd7fe6405540d77084abe1.zip", "author": "Yewen Li;Chaojie Wang;Xiaobo Xia;Hongxin Wei;Tongliang Liu;Bo An;Xinrun Wang", "authorids": "~Yewen_Li1;~Chaojie_Wang1;~Xiaobo_Xia1;~Hongxin_Wei1;~Tongliang_Liu1;~Bo_An2;~Xinrun_Wang1", "gender": "M;M;M;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=W5796yEAAAAJ&hl=zh-CN;https://chaojiewang94.github.io/;https://xiaoboxia.github.io/;https://hongxin001.github.io/;https://tongliang-liu.github.io/;https://personal.ntu.edu.sg/boan/;https://rainwangphy.github.io/", "dblp": "55/2231;134/9314-1;242/8072;150/6350;150/6667;42/6178-1.html;199/6413", "google_scholar": "W5796yEAAAAJ;https://scholar.google.com/citations?hl=en;jRsugY0AAAAJ;cABH034AAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;PEEpuNwAAAAJ;ROANfPUAAAAJ", "orcid": "0009-0008-0073-123X;;;;;0000-0002-7064-7438;", "linkedin": ";;;;;;", "or_profile": "~Yewen_Li1;~Chaojie_Wang1;~Xiaobo_Xia1;~Hongxin_Wei1;~Tongliang_Liu1;~Bo_An2;~Xinrun_Wang1", "aff": "Nanyang Technological University;Nanyang Technological University;The University of Sydney;Southern University of Science and Technology;University of Sydney;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu;sydney.edu.au;sustech.edu.cn;sydney.edu.au;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;Researcher;PhD student;Assistant Professor;Lecturer;Full Professor;Postdoc", "bibtex": "@misc{\nli2023enemy,\ntitle={Enemy is Inside: Alleviating {VAE}'s Overestimation in Unsupervised {OOD} Detection},\nauthor={Yewen Li and Chaojie Wang and Xiaobo Xia and Hongxin Wei and Tongliang Liu and Bo An and Xinrun Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=31zVEkOGYU}\n}", "github": "", "project": "", "reviewers": "LEk9;GLsZ;ogin;Bzqi", "site": "https://openreview.net/forum?id=31zVEkOGYU", "pdf_size": 7067159, "rating": "4;5;5;6", "confidence": "4;3;4;4", "soundness": "2;2;3;3", "novelty": "3;2;2;3", "presentation": "2;3;3;3", "wc_summary": "71;191;41;89", "wc_strengths": "42;122;36;30", "wc_weaknesses": "425;200;195;146", "wc_questions": "160;113;15;146", "wc_limitations": "60;1;1;1", "wc_review": "758;627;288;412", "wc_reply_reviewers": "138;582;172;958", "wc_reply_authors": "949;1588;627;1664", "reply_reviewers": "1;3;2;2", "reply_authors": "4;5;4;4", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 98.0, 56.36488268416781 ], "wc_strengths_avg": [ 57.5, 37.47999466382032 ], "wc_weaknesses_avg": [ 241.5, 108.02430282117075 ], "wc_questions_avg": [ 108.5, 56.61492736019362 ], "wc_limitations_avg": [ 15.75, 25.54774941164094 ], "wc_review_avg": [ 521.25, 182.73939777727188 ], "wc_reply_reviewers_avg": [ 462.5, 335.2204498535255 ], "wc_reply_authors_avg": [ 1207.0, 435.021263848102 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 4.25, 0.4330127018922193 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:joSZIEoF3DQJ:scholar.google.com/&scioq=Enemy+is+Inside:+Alleviating+VAE%27s+Overestimation+in+Unsupervised+OOD+Detection&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;1;0;0", "aff_unique_norm": "Nanyang Technological University;University of Sydney;Southern University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.sydney.edu.au;https://www.sustech.edu.cn", "aff_unique_abbr": "NTU;USYD;SUSTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;1;0;0", "aff_country_unique": "Singapore;Australia;China" }, { "title": "Idempotent Learned Image Compression with Right-Inverse", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72922", "id": "35dOU92OJM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2a25d9d873e9ae6d242c62e36f89ee3a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=35dOU92OJM", "openreview": "https://openreview.net/forum?id=35dOU92OJM", "poster": "/media/PosterPDFs/NeurIPS%202023/72922.png?t=1699705534.8934684", "slides": "https://nips.cc/virtual/2023/poster/72922", "video": "https://nips.cc/virtual/2023/poster/72922", "author_site": "Yanghao Li, Tongda Xu, Yan Wang, Jingjing Liu, Ya-Qin Zhang", "tldr": "", "abstract": "We consider the problem of idempotent learned image compression (LIC).\nThe idempotence of codec refers to the stability of codec to re-compression.\nTo achieve idempotence, previous codecs adopt invertible transforms such as DCT and normalizing flow.\nIn this paper, we first identify that invertibility of transform is sufficient but not necessary for idempotence. Instead, it can be relaxed into right-invertibility. And such relaxation allows wider family of transforms.\nBased on this identification, we implement an idempotent codec using our proposed blocked convolution and null-space enhancement.\nEmpirical results show that we achieve state-of-the-art rate-distortion performance among idempotent codecs. Furthermore, our codec can be extended into near-idempotent codec by relaxing the right-invertibility. And this near-idempotent codec has significantly less quality decay after $50$ rounds of re-compression compared with other near-idempotent codecs.", "keywords": "learned image compression;idempotent compression;right-inverse", "primary_area": "", "supplementary_material": "/attachment/f6d8c334e7c96dabe75dbc1a177bf9321dae7f0e.zip", "author": "Yanghao Li;Tongda Xu;Yan Wang;Jingjing Liu;Ya-Qin Zhang", "authorids": "~Yanghao_Li2;~Tongda_Xu1;~Yan_Wang12;~Jingjing_Liu2;~Ya-Qin_Zhang1", "gender": ";Non-Binary;;;M", "homepage": ";https://tongdaxu.github.io/;http://researchgate.net/profile/Yan_Wang154?ev=hdr_xprf;https://air.tsinghua.edu.cn/en/info/1046/1194.htm#:~:text=Jingjing%20Liu%20is%20Professor%2C%20Principal,CVPR%2C%20ACL%2C%20etc.);https://air.tsinghua.edu.cn/en/info/1046/1188.htm", "dblp": ";227/8096;59/2227-80;30/3008-1;09/2187", "google_scholar": ";LO8GS7sAAAAJ;QOZnsYYAAAAJ;BzJ_GboAAAAJ;mDOMfxIAAAAJ", "orcid": ";;;;", "linkedin": ";;;jingjing-liu-65703431/;", "or_profile": "~Yanghao_Li2;~Tongda_Xu1;~Yan_Wang12;~Jingjing_Liu2;~Ya-Qin_Zhang1", "aff": ";Tsinghua University;Tsinghua University;Tsinghua University;AIR, Tsinghua University", "aff_domain": ";air.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": ";Researcher;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2023idempotent,\ntitle={Idempotent Learned Image Compression with Right-Inverse},\nauthor={Yanghao Li and Tongda Xu and Yan Wang and Jingjing Liu and Ya-Qin Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=35dOU92OJM}\n}", "github": "", "project": "", "reviewers": "4J8x;H19p;fD4t;FGoX", "pdf_size": 14430002, "rating": "5;5;6;6", "confidence": "3;4;3;4", "soundness": "3;2;3;3", "novelty": "3;1;2;3", "presentation": "3;2;2;3", "wc_summary": "57;53;33;120", "wc_strengths": "62;11;15;89", "wc_weaknesses": "217;205;68;235", "wc_questions": "108;7;1;15", "wc_limitations": "39;13;14;8", "wc_review": "483;289;131;467", "wc_reply_reviewers": "44;137;12;19", "wc_reply_authors": "618;433;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 65.75, 32.61422235773835 ], "wc_strengths_avg": [ 44.25, 32.706077416896086 ], "wc_weaknesses_avg": [ 181.25, 66.25094338950956 ], "wc_questions_avg": [ 32.75, 43.728566178186085 ], "wc_limitations_avg": [ 18.5, 12.05197079319395 ], "wc_review_avg": [ 342.5, 143.90535083866757 ], "wc_reply_reviewers_avg": [ 53.0, 49.934957694985584 ], "wc_reply_authors_avg": [ 262.75, 270.7686974153401 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11899914904839255059&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": ";air.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Equivariant Spatio-Temporal Attentive Graph Networks to Simulate Physical Dynamics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72921", "id": "35nFSbEBks", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8e2a75e0c7b579a6cf176dc0858cde55-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=35nFSbEBks", "openreview": "https://openreview.net/forum?id=35nFSbEBks", "poster": "/media/PosterPDFs/NeurIPS%202023/72921.png?t=1701438634.5432255", "slides": "https://nips.cc/virtual/2023/poster/72921", "video": "https://nips.cc/virtual/2023/poster/72921", "author_site": "Liming Wu, Zhichao Hou, Jirui Yuan, Yu Rong, Wenbing Huang", "tldr": "", "abstract": "Learning to represent and simulate the dynamics of physical systems is a crucial yet challenging task. Existing equivariant Graph Neural Network (GNN) based methods have encapsulated the symmetry of physics, \\emph{e.g.}, translations, rotations, etc, leading to better generalization ability. Nevertheless, their frame-to-frame formulation of the task overlooks the non-Markov property mainly incurred by unobserved dynamics in the environment. In this paper, we reformulate dynamics simulation as a spatio-temporal prediction task, by employing the trajectory in the past period to recover the Non-Markovian interactions. We propose Equivariant Spatio-Temporal Attentive Graph Networks (ESTAG), an equivariant version of spatio-temporal GNNs, to fulfil our purpose. At its core, we design a novel Equivariant Discrete Fourier Transform (EDFT) to extract periodic patterns from the history frames, and then construct an Equivariant Spatial Module (ESM) to accomplish spatial message passing, and an Equivariant Temporal Module (ETM) with the forward attention and equivariant pooling mechanisms to aggregate temporal message. We evaluate our model on three real datasets corresponding to the molecular-, protein- and macro-level. Experimental results verify the effectiveness of ESTAG compared to typical spatio-temporal GNNs and equivariant GNNs.", "keywords": "Equivariance;Spatio-Temporal GNNs;Physical Dynamics", "primary_area": "", "supplementary_material": "", "author": "Liming Wu;Zhichao Hou;Jirui Yuan;Yu Rong;Wenbing Huang", "authorids": "~Liming_Wu1;~Zhichao_Hou1;~Jirui_Yuan1;~Yu_Rong1;~Wenbing_Huang1", "gender": "M;M;F;M;M", "homepage": "https://github.com/ManlioWu;https://chris-hzc.github.io/;https://air.tsinghua.edu.cn/en/info/1012/1219.htm;https://royrong.me/;https://gsai.ruc.edu.cn/english/wenbing_huang", "dblp": ";188/4064;142/4033;24/10036-1;155/3181-1.html", "google_scholar": "d3dyT4wAAAAJ;rraC4ZMAAAAJ;;https://scholar.google.com.hk/citations?user=itezhEMAAAAJ;0yNkmO4AAAAJ", "orcid": ";0000-0002-3989-2654;;0000-0001-7387-302X;", "linkedin": ";zhichao-hou-b022931a4/;;;", "or_profile": "~Liming_Wu1;~Zhichao_Hou1;~Jirui_Yuan1;~Yu_Rong1;~Wenbing_Huang1", "aff": "Shandong University;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;Tsinghua University;Tencent AI Lab;Renmin University of China", "aff_domain": "sdu.edu.cn;amss.ac.cn;tsinghua.edu.cn;tencent.com;ruc.edu.cn", "position": "Undergrad student;MS student;Instructor;Senior Researcher;Associate Professor", "bibtex": "@inproceedings{\nwu2023equivariant,\ntitle={Equivariant Spatio-Temporal Attentive Graph Networks to Simulate Physical Dynamics},\nauthor={Liming Wu and Zhichao Hou and Jirui Yuan and Yu Rong and Wenbing Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=35nFSbEBks}\n}", "github": "", "project": "", "reviewers": "iyxR;pTt6;Gpx3;TWyG;Hnbu", "pdf_size": 0, "rating": "5;6;6;7;7", "confidence": "4;3;4;5;3", "soundness": "3;3;2;4;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "49;74;71;94;88", "wc_strengths": "23;111;48;37;76", "wc_weaknesses": "151;71;146;195;145", "wc_questions": "4;4;63;6;3", "wc_limitations": "1;11;7;49;1", "wc_review": "228;271;335;381;313", "wc_reply_reviewers": "52;28;62;75;0", "wc_reply_authors": "24;18;29;48;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;2;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 75.2, 15.63841424185969 ], "wc_strengths_avg": [ 59.0, 31.285779517218362 ], "wc_weaknesses_avg": [ 141.6, 39.88784275941731 ], "wc_questions_avg": [ 16.0, 23.52020408074726 ], "wc_limitations_avg": [ 13.8, 18.00444389588304 ], "wc_review_avg": [ 305.6, 52.579844046934944 ], "wc_reply_reviewers_avg": [ 43.4, 26.605262637305422 ], "wc_reply_authors_avg": [ 23.8, 15.57433786714543 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.07142857142857145, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1850481598507615806&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "sdu.edu.cn;amss.ac.cn;tsinghua.edu.cn;tencent.com;ruc.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Shandong University;Chinese Academy of Sciences;Tsinghua University;Tencent;Renmin University of China", "aff_unique_dep": ";Academy of Mathematics and Systems Science;;Tencent AI Lab;", "aff_unique_url": "http://www.sdu.edu.cn;http://www.cas.cn;https://www.tsinghua.edu.cn;https://ai.tencent.com;http://www.ruc.edu.cn", "aff_unique_abbr": "SDU;CAS;THU;Tencent AI Lab;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Representational Strengths and Limitations of Transformers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72920", "id": "36DxONZ9bA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/73bf692447f174984f30499ec9b20e04-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=36DxONZ9bA", "openreview": "https://openreview.net/forum?id=36DxONZ9bA", "poster": "/media/PosterPDFs/NeurIPS%202023/72920.png?t=1701963017.3285682", "slides": "https://nips.cc/virtual/2023/poster/72920", "video": "https://nips.cc/virtual/2023/poster/72920", "author_site": "Clayton Sanford, Daniel Hsu, Matus Telgarsky, Matus Telgarsky", "tldr": "", "abstract": "Attention layers, as commonly used in transformers, form the backbone of modern deep learning, yet there is no mathematical description of their benefits and deficiencies as compared with other architectures. In this work we establish both positive and negative results on the representation power of attention layers, with a focus on intrinsic complexity parameters such as width, depth, and embedding dimension. On the positive side, we present a sparse averaging task, where recurrent networks and feedforward networks all have complexity scaling polynomially in the input size, whereas transformers scale merely logarithmically in the input size; furthermore, we use the same construction to show the necessity and role of a large embedding dimension in a transformer. On the negative side, we present a triple detection task, where attention layers in turn have complexity scaling linearly in the input size; as this scenario seems rare in practice, we also present natural variants that can be efficiently solved by attention layers. The proof techniques emphasize the value of communication complexity in the analysis of transformers and related models, and the role of sparse averaging as a prototypical attention task, which even finds use in the analysis of triple detection.", "keywords": "self-attention;approximation theory;communication complexity", "primary_area": "", "supplementary_material": "/attachment/81359e4305b3321fc55aacedf7e02cc01981f8e5.pdf", "author": "Clayton Sanford;Daniel Hsu;Matus Telgarsky", "authorids": "~Clayton_Sanford1;~Daniel_Hsu1;~Matus_Telgarsky1", "gender": "M;M;M", "homepage": "https://www.cs.columbia.edu/~djhsu/;https://cims.nyu.edu/~matus/;https://claytonsanford.com/", "dblp": "h/DanielHsu.html;05/9061;232/1797", "google_scholar": "Bp6tvy0AAAAJ;https://scholar.google.com/citations?hl=en;Qo18yHAAAAAJ", "orcid": "0000-0002-3495-7113;;", "linkedin": ";;claytonsanford/", "or_profile": "~Daniel_Hsu1;~Matus_Telgarsky1;~Clayton_Hendrick_Sanford1", "aff": "Columbia University;Department of Computer Science, University of Illinois, Urbana Champaign;Microsoft Research", "aff_domain": "columbia.edu;cs.illinois.edu;research.microsoft.com", "position": "Associate Professor;Assistant Professor;Intern", "bibtex": "@inproceedings{\nsanford2023representational,\ntitle={Representational Strengths and Limitations of Transformers},\nauthor={Clayton Sanford and Daniel Hsu and Matus Telgarsky},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=36DxONZ9bA}\n}", "github": "", "project": "", "reviewers": "SrVL;iCyX;CgC3;jpuU;bmhz", "pdf_size": 610212, "rating": "4;5;6;6;7", "confidence": "4;3;2;2;4", "soundness": "4;2;3;3;4", "novelty": "2;3;3;3;4", "presentation": "3;2;3;2;3", "wc_summary": "113;68;168;83;137", "wc_strengths": "24;33;266;55;100", "wc_weaknesses": "168;63;148;63;79", "wc_questions": "219;37;4;7;49", "wc_limitations": "9;1;2;14;14", "wc_review": "533;202;588;222;379", "wc_reply_reviewers": "566;0;0;0;45", "wc_reply_authors": "414;0;0;0;0", "reply_reviewers": "1;0;0;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 113.8, 36.1186932211009 ], "wc_strengths_avg": [ 95.6, 89.16187526067407 ], "wc_weaknesses_avg": [ 104.2, 44.76337788862677 ], "wc_questions_avg": [ 63.2, 79.78069942034854 ], "wc_limitations_avg": [ 8.0, 5.621387729022079 ], "wc_review_avg": [ 384.8, 156.97566690414155 ], "wc_reply_reviewers_avg": [ 122.2, 222.58337763633654 ], "wc_reply_authors_avg": [ 82.8, 165.6 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.21926450482675733, "gs_citation": 100, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9565464334639819750&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 11, "email": "columbia.edu;cs.illinois.edu;research.microsoft.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Columbia University;University of Illinois Urbana-Champaign;Microsoft", "aff_unique_dep": ";Department of Computer Science;Microsoft Research", "aff_unique_url": "https://www.columbia.edu;https://illinois.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Columbia;UIUC;MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Explore to Generalize in Zero-Shot RL", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72919", "id": "37cADkATD0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c793577b644268259b1416464a6cdb8c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=37cADkATD0", "openreview": "https://openreview.net/forum?id=37cADkATD0", "poster": "/media/PosterPDFs/NeurIPS%202023/72919.png?t=1701724557.0244186", "slides": "https://nips.cc/virtual/2023/poster/72919", "video": "https://nips.cc/virtual/2023/poster/72919", "author_site": "Ev Zisselman, Itai Lavie, Daniel Soudry, Aviv Tamar", "tldr": "", "abstract": "We study zero-shot generalization in reinforcement learning - optimizing a policy on a set of training tasks to perform well on a similar but unseen test task. \nTo mitigate overfitting, previous work explored different notions of invariance to the task. However, on problems such as the ProcGen Maze, an adequate solution that is invariant to the task visualization does not exist, and therefore invariance-based approaches fail. \nOur insight is that learning a policy that effectively $\\textit{explores}$ the domain is harder to memorize than a policy that maximizes reward for a specific task, and therefore we expect such learned behavior to generalize well; we indeed demonstrate this empirically on several domains that are difficult for invariance-based approaches. Our $\\textit{Explore to Generalize}$ algorithm (ExpGen) builds on this insight: we train an additional ensemble of agents that optimize reward. At test time, either the ensemble agrees on an action, and we generalize well, or we take exploratory actions, which generalize well and drive us to a novel part of the state space, where the ensemble may potentially agree again. We show that our approach is the state-of-the-art on tasks of the ProcGen challenge that have thus far eluded effective generalization, yielding a success rate of 83% on the Maze task and 74% on Heist with $200$ training levels. ExpGen can also be combined with an invariance based approach to gain the best of both worlds, setting new state-of-the-art results on ProcGen.\nCode available at [https://github.com/EvZissel/expgen](https://github.com/EvZissel/expgen).", "keywords": "Reinforcement Learning;Generalization;State Space Maximum Entropy Exploration", "primary_area": "", "supplementary_material": "", "author": "Ev Zisselman;Itai Lavie;Daniel Soudry;Aviv Tamar", "authorids": "~Ev_Zisselman1;~Itai_Lavie1;~Daniel_Soudry1;~Aviv_Tamar2", "gender": ";M;M;M", "homepage": "https://evzissel.github.io/;;https://soudry.github.io/;https://avivt.github.io/avivt/", "dblp": "230/3516;;126/1779;49/10622", "google_scholar": "wfLDM2oAAAAJ;;https://scholar.google.co.il/citations?user=AEBWEm8AAAAJ;https://scholar.google.co.il/citations?user=kppa2vgAAAAJ", "orcid": ";;0000-0001-9368-6352;", "linkedin": ";https://www.linkedin.com/feed/;daniel-soudry-2aa3a88/;", "or_profile": "~Ev_Zisselman1;~Itai_Lavie1;~Daniel_Soudry1;~Aviv_Tamar2", "aff": "Technion, Technion;Technion - Israel Institute of Technology, Technion;Technion - Israel Institute of Technology, Technion;Technion, Technion", "aff_domain": "technion.ac.il;technion.ac.il;technion.ac.il;technion.ac.il", "position": "PhD student;MS student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nzisselman2023explore,\ntitle={Explore to Generalize in Zero-Shot {RL}},\nauthor={Ev Zisselman and Itai Lavie and Daniel Soudry and Aviv Tamar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=37cADkATD0}\n}", "github": "", "project": "", "reviewers": "VwcD;LrJL;W3Xj;gmPQ", "pdf_size": 8151991, "rating": "4;4;7;7", "confidence": "3;3;4;4", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;2", "wc_summary": "113;59;96;507", "wc_strengths": "35;115;34;44", "wc_weaknesses": "114;111;145;55", "wc_questions": "367;19;149;475", "wc_limitations": "50;9;29;1", "wc_review": "679;313;453;1082", "wc_reply_reviewers": "310;121;404;29", "wc_reply_authors": "488;377;540;0", "reply_reviewers": "2;1;2;1", "reply_authors": "3;3;3;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 193.75, 181.90571046561456 ], "wc_strengths_avg": [ 57.0, 33.712015662075146 ], "wc_weaknesses_avg": [ 106.25, 32.44514601600677 ], "wc_questions_avg": [ 252.5, 178.7812909674835 ], "wc_limitations_avg": [ 22.25, 18.9917745353087 ], "wc_review_avg": [ 631.75, 290.90838334431 ], "wc_reply_reviewers_avg": [ 216.0, 148.47053579751102 ], "wc_reply_authors_avg": [ 351.25, 211.16743948819382 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5406024534330369010&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "technion.ac.il;technion.ac.il;technion.ac.il;technion.ac.il", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Israel" }, { "title": "Does Continual Learning Meet Compositionality? New Benchmarks and An Evaluation Framework", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73708", "id": "38bZuqQOhC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6a42b45af2b72e6e5b5e3a6fe695809f-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=38bZuqQOhC", "openreview": "https://openreview.net/forum?id=38bZuqQOhC", "poster": "/media/PosterPDFs/NeurIPS%202023/73708.png?t=1701742041.6189964", "slides": "https://nips.cc/virtual/2023/poster/73708", "video": "https://nips.cc/virtual/2023/poster/73708", "author_site": "Weiduo Liao, Ying Wei, Mingchen Jiang, Qingfu Zhang, Hisao Ishibuchi", "tldr": "", "abstract": "Compositionality facilitates the comprehension of novel objects using acquired concepts and the maintenance of a knowledge pool. This is particularly crucial for continual learners to prevent catastrophic forgetting and enable compositionally forward transfer of knowledge. However, the existing state-of-the-art benchmarks inadequately evaluate the capability of compositional generalization, leaving an intriguing question unanswered. To comprehensively assess this capability, we introduce two vision benchmarks, namely Compositional GQA (CGQA) and Compositional OBJects365 (COBJ), along with a novel evaluation framework called Compositional Few-Shot Testing (CFST). These benchmarks evaluate the systematicity, productivity, and substitutivity aspects of compositional generalization. Experimental results on five baselines and two modularity-based methods demonstrate that current continual learning techniques do exhibit somewhat favorable compositionality in their learned feature extractors. Nonetheless, further efforts are required in developing modularity-based approaches to enhance compositional generalization. We anticipate that our proposed benchmarks and evaluation protocol will foster research on continual learning and compositionality.", "keywords": "continual learning", "primary_area": "", "supplementary_material": "/attachment/2183b6de8523e06fd5f48e74a7f32b550ae0b537.pdf", "author": "Weiduo Liao;Ying Wei;Mingchen Jiang;Qingfu Zhang;Hisao Ishibuchi", "authorids": "~Weiduo_Liao1;~Ying_Wei1;~Mingchen_Jiang1;~Qingfu_Zhang1;~Hisao_Ishibuchi1", "gender": "M;F;M;M;M", "homepage": ";https://wei-ying.net/;http://jiangmingchen.top;https://www.cs.cityu.edu.hk/~qzhan7/index.html;", "dblp": ";14/4899-1;;98/1240.html;i/HisaoIshibuchi", "google_scholar": "tUNrOg8AAAAJ;5UpFdKsAAAAJ;;https://scholar.google.co.uk/citations?user=nhL9PHwAAAAJ;vx9EZN4AAAAJ", "orcid": ";;;;0000-0001-9186-6472", "linkedin": ";;;;", "or_profile": "~Weiduo_Liao1;~Ying_Wei1;~Mingchen_Jiang1;~Qingfu_Zhang1;~Hisao_Ishibuchi1", "aff": "City University of Hong Kong;City University of Hong Kong;City University of Hong Kong;City University of Hong Kong;Southern University of Science and Technology", "aff_domain": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;sustech.edu.cn", "position": "PhD student;Assistant Professor;MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nliao2023does,\ntitle={Does Continual Learning Meet Compositionality? New Benchmarks and An Evaluation Framework},\nauthor={Weiduo Liao and Ying Wei and Mingchen Jiang and Qingfu Zhang and Hisao Ishibuchi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=38bZuqQOhC}\n}", "github": "", "project": "", "reviewers": "xdij;fvLh;6pHG;9Qe4;RpQ2", "pdf_size": 3295687, "rating": "4;5;7;8;8", "confidence": "4;4;4;5;4", "wc_summary_and_contributions": "270;68;44;83;78", "wc_strengths": "90;42;52;78;52", "wc_improvement": "286;175;6;244;24", "wc_limitations": "38;511;6;9;42", "wc_correctness": "14;24;214;15;1", "wc_clarity": "9;45;55;15;6", "wc_relation_to_prior_work": "12;130;70;28;5", "wc_documentation": "1;6;24;9;11", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "721;1002;472;482;220", "wc_reply_reviewers": "0;152;213;36;0", "wc_reply_authors": "626;3703;1227;1379;303", "reply_reviewers": "0;2;1;1;0", "reply_authors": "2;6;3;2;1", "rating_avg": [ 6.4, 1.624807680927192 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 108.6, 81.80855700964295 ], "wc_strengths_avg": [ 62.8, 18.093092604637825 ], "wc_improvement_avg": [ 147.0, 113.59929577246507 ], "wc_limitations_avg": [ 121.2, 195.44758888254415 ], "wc_correctness_avg": [ 53.6, 80.5347130124644 ], "wc_clarity_avg": [ 26.0, 20.059910268991732 ], "wc_relation_to_prior_work_avg": [ 49.0, 46.3637789659126 ], "wc_documentation_avg": [ 10.2, 7.678541528180987 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 579.4, 264.1329968027471 ], "wc_reply_reviewers_avg": [ 80.2, 86.7280808043162 ], "wc_reply_authors_avg": [ 1447.6, 1193.761718267092 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.8, 1.7204650534085253 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4923659639173309, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3252686864188291891&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 6, "email": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;sustech.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "City University of Hong Kong;Southern University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cityu.edu.hk;https://www.sustech.edu.cn", "aff_unique_abbr": "CityU;SUSTech", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Fairness Aware Counterfactuals for Subgroups", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72918", "id": "38dQv3OwN3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b60161e93f3e0e4207081a3b4ef5e8d8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=38dQv3OwN3", "openreview": "https://openreview.net/forum?id=38dQv3OwN3", "poster": "/media/PosterPDFs/NeurIPS%202023/72918.png?t=1701803626.8437908", "slides": "https://nips.cc/virtual/2023/poster/72918", "video": "https://nips.cc/virtual/2023/poster/72918", "author_site": "Loukas Kavouras, Konstantinos Tsopelas, Giorgos Giannopoulos, Dimitris Sacharidis, Eleni Psaroudaki, Nikolaos Theologitis, Dimitrios Rontogiannis, Dimitris Fotakis, Ioannis Emiris", "tldr": "", "abstract": "In this work, we present Fairness Aware Counterfactuals for Subgroups (FACTS), a framework for auditing subgroup fairness through counterfactual explanations. We start with revisiting (and generalizing) existing notions and introducing new, more refined notions of subgroup fairness. We aim to (a) formulate different aspects of the difficulty of individuals in certain subgroups to achieve recourse, i.e. receive the desired outcome, either at the micro level, considering members of the subgroup individually, or at the macro level, considering the subgroup as a whole, and (b) introduce notions of subgroup fairness that are robust, if not totally oblivious, to the cost of achieving recourse. We accompany these notions with an efficient, model-agnostic, highly parameterizable, and explainable framework for evaluating subgroup fairness. We demonstrate the advantages, the wide applicability, and the efficiency of our approach through a thorough experimental evaluation on different benchmark datasets.", "keywords": "subgroup fairness;recourse;counterfactual explanations", "primary_area": "", "supplementary_material": "/attachment/9de804a3018ba42c89b263a73e43d01095a6462c.zip", "author": "Loukas Kavouras;Konstantinos Tsopelas;Giorgos Giannopoulos;Dimitris Sacharidis;Eleni Psaroudaki;Nikolaos Theologitis;Dimitrios Rontogiannis;Dimitris Fotakis;Ioannis Emiris", "authorids": "~Loukas_Kavouras1;~Konstantinos_Tsopelas1;~Giorgos_Giannopoulos1;~Dimitris_Sacharidis1;~Eleni_Psaroudaki1;~Nikolaos_Theologitis1;~Dimitrios_Rontogiannis1;~Dimitris_Fotakis1;~Ioannis_Emiris1", "gender": "M;M;M;M;F;M;M;M;M", "homepage": "https://www.linkedin.com/in/loukas-kavouras-phd-4a6508123/;;https://www.linkedin.com/in/giorgos-giannopoulos-595a3a67/?originalSubdomain=gr;https://dsachar.net/;https://pseleni.github.io/;https://www.linkedin.com/in/nikolas-theol95/;https://www.linkedin.com/in/dimitrios-rontogiannis/;http://www.softlab.ntua.gr/~fotakis/;https://cgi.di.uoa.gr/~emiris/", "dblp": "183/6212;350/3933;42/4419.html;19/4428;305/4390;350/3995;350/4384.html;95/4731;e/IZEmiris", "google_scholar": ";WRIDH3oAAAAJ;https://scholar.google.gr/citations?user=0SLe_RcAAAAJ;;z-nUpjkAAAAJ;cwIMqBoAAAAJ;CjmIMmgAAAAJ;zFDLf0UAAAAJ;https://scholar.google.gr/citations?user=ZK6y-cIAAAAJ", "orcid": ";;0000-0002-8252-9869;0000-0001-5022-1483;0000-0003-4668-9029;0009-0003-6021-4254;;0000-0001-6864-8960;0000-0002-2339-5303", "linkedin": ";konstantinos-a-tsopelas/;giorgos-giannopoulos-595a3a67/?originalSubdomain=gr;dsachar/;pseleni;nikolas-theol95/;dimitrios-rontogiannis/;;ioannis-emiris-75132a3/", "or_profile": "~Loukas_Kavouras1;~Konstantinos_Tsopelas1;~Giorgos_Giannopoulos1;~Dimitris_Sacharidis1;~Eleni_Psaroudaki1;~Nikolaos_Theologitis1;~Dimitrios_Rontogiannis1;~Dimitris_Fotakis1;~Ioannis_Emiris1", "aff": "IMIS - \"Athena\" Research Center;National Technical University of Athens;Athena Research Center;Universit\u00e9 Libre de Bruxelles;IMIS - \"Athena\" Research Center;IMIS - \"Athena\" Research Center;University of Athens;National Technical University of Athens;Athena Research Center, Greece", "aff_domain": "imis.athena-innovation.gr;ntua.gr;athenarc.gr;ulb.be;imis.athena-innovation.gr;imis.athena-innovation.gr;uoa.gr;ntua.gr;athenarc.gr", "position": "Researcher;MS student;Researcher;Assistant Professor;Researcher;Researcher;Undergrad student;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nkavouras2023fairness,\ntitle={Fairness Aware Counterfactuals for Subgroups},\nauthor={Loukas Kavouras and Konstantinos Tsopelas and Giorgos Giannopoulos and Dimitris Sacharidis and Eleni Psaroudaki and Nikolaos Theologitis and Dimitrios Rontogiannis and Dimitris Fotakis and Ioannis Emiris},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=38dQv3OwN3}\n}", "github": "", "project": "", "reviewers": "m3rQ;EeTT;CDVq;ZFdX;fCWq", "pdf_size": 523642, "rating": "5;6;6;7;7", "confidence": "4;3;4;4;3", "soundness": "2;3;3;3;3", "novelty": "2;2;3;4;3", "presentation": "4;3;3;3;3", "wc_summary": "85;91;46;54;186", "wc_strengths": "82;76;84;185;114", "wc_weaknesses": "243;178;19;341;77", "wc_questions": "253;126;21;82;185", "wc_limitations": "43;30;13;88;4", "wc_review": "706;501;183;750;566", "wc_reply_reviewers": "484;37;13;34;15", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 92.4, 49.890279614369774 ], "wc_strengths_avg": [ 108.2, 40.59753687109601 ], "wc_weaknesses_avg": [ 171.6, 114.95320787172491 ], "wc_questions_avg": [ 133.4, 80.37064140592634 ], "wc_limitations_avg": [ 35.6, 29.4659125092029 ], "wc_review_avg": [ 541.2, 200.6463555612212 ], "wc_reply_reviewers_avg": [ 116.6, 183.9549944959364 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.3273268353539886, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16222256698032685127&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 12, "email": "imis.athena-innovation.gr;ntua.gr;athenarc.gr;ulb.be;imis.athena-innovation.gr;imis.athena-innovation.gr;uoa.gr;ntua.gr;athenarc.gr", "author_num": 9, "aff_unique_index": "0;1;0;2;0;0;3;1;0", "aff_unique_norm": "Athena Research Center;National Technical University of Athens;Universit\u00e9 Libre de Bruxelles;University of Athens", "aff_unique_dep": "IMIS;;;", "aff_unique_url": "https://www.athena rc.gr;https://www.ntua.gr;https://www.ulb.ac.be;https://www.uoa.gr", "aff_unique_abbr": "ARC;NTUA;ULB;UoA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0", "aff_country_unique": "Greece;Belgium" }, { "title": "Im-Promptu: In-Context Composition from Image Prompts", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72917", "id": "38o372YoYt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a71c1931d3fb8ba564f7458d0657d0b1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=38o372YoYt", "openreview": "https://openreview.net/forum?id=38o372YoYt", "poster": "/media/PosterPDFs/NeurIPS%202023/72917.png?t=1697405690.298754", "slides": "https://nips.cc/virtual/2023/poster/72917", "video": "https://nips.cc/virtual/2023/poster/72917", "author_site": "Bhishma Dedhia, Michael Chang, Jake Snell, Tom Griffiths, Niraj Jha", "tldr": "", "abstract": "Large language models are few-shot learners that can solve diverse tasks from a handful of demonstrations. This implicit understanding of tasks suggests that the attention mechanisms over word tokens may play a role in analogical reasoning. In this work, we investigate whether analogical reasoning can enable in-context composition over composable elements of visual stimuli. First, we introduce a suite of three benchmarks to test the generalization properties of a visual in-context learner. We formalize the notion of an analogy-based in-context learner and use it to design a meta-learning framework called Im-Promptu. Whereas the requisite token granularity for language is well established, the appropriate compositional granularity for enabling in-context generalization in visual stimuli is usually unspecified. To this end, we use Im-Promptu to train multiple agents with different levels of compositionality, including vector representations, patch representations, and object slots. Our experiments reveal tradeoffs between extrapolation abilities and the degree of compositionality, with non-compositional representations extending learned composition rules to unseen domains but performing poorly on combinatorial tasks. Patch-based representations require patches to contain entire objects for robust extrapolation. At the same time, object-centric tokenizers coupled with a cross-attention module generate consistent and high-fidelity solutions, with these inductive biases being particularly crucial for compositional generalization. Lastly, we demonstrate a use case of Im-Promptu as an intuitive programming interface for image generation.", "keywords": "in-context learning;compositionality;generative models", "primary_area": "", "supplementary_material": "/attachment/8a1ed675a0b9342183d888720a5e24330a851b61.pdf", "author": "Bhishma Dedhia;Michael Chang;Jake Snell;Thomas L. Griffiths;Niraj Jha", "authorids": "~Bhishma_Dedhia1;~Michael_Chang1;~Jake_Snell1;~Thomas_L._Griffiths1;~Niraj_Jha1", "gender": ";M;M;;M", "homepage": "https://bhishmadedhia.com/;http://mbchang.github.io/;https://www.jakesnell.com;http://cocosci.princeton.edu/tom/;https://www.princeton.edu/~jha/", "dblp": "243/6678;192/1567;172/1406;34/4472;", "google_scholar": ";vgfGtykAAAAJ;MbXKAK8AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?hl=en", "orcid": ";;;;", "linkedin": ";mbchang;;;", "or_profile": "~Bhishma_Dedhia1;~Michael_Chang1;~Jake_Snell1;~Thomas_L._Griffiths1;~Niraj_Jha1", "aff": "Princeton University;University of California, Berkeley;Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;berkeley.edu;princeton.edu;princeton.edu;princeton.edu", "position": "PhD student;PhD student;Postdoc;Professor;Full Professor", "bibtex": "@inproceedings{\ndedhia2023impromptu,\ntitle={Im-Promptu: In-Context Composition from Image Prompts},\nauthor={Bhishma Dedhia and Michael Chang and Jake Snell and Thomas L. Griffiths and Niraj Jha},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=38o372YoYt}\n}", "github": "", "project": "", "reviewers": "1AXW;4dpY;Lzbc;JrRD;FiDL", "pdf_size": 3730466, "rating": "4;4;7;8;8", "confidence": "4;4;4;3;4", "soundness": "3;2;3;4;4", "novelty": "2;1;3;3;4", "presentation": "2;2;3;4;4", "wc_summary": "80;82;92;37;91", "wc_strengths": "57;69;62;129;106", "wc_weaknesses": "305;200;84;75;12", "wc_questions": "62;68;79;1;26", "wc_limitations": "1;11;25;8;18", "wc_review": "505;430;342;250;253", "wc_reply_reviewers": "72;56;49;3;12", "wc_reply_authors": "121;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.2, 1.8330302779823358 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 1.019803902718557 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 76.4, 20.264254242384546 ], "wc_strengths_avg": [ 84.6, 28.089855820206694 ], "wc_weaknesses_avg": [ 135.2, 104.35976236078731 ], "wc_questions_avg": [ 47.2, 29.143781497945664 ], "wc_limitations_avg": [ 12.6, 8.260750571225353 ], "wc_review_avg": [ 356.0, 99.71760125474339 ], "wc_reply_reviewers_avg": [ 38.4, 26.46204829562519 ], "wc_reply_authors_avg": [ 24.2, 48.39999999999999 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.49099025303098287, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14673624754875096897&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "princeton.edu;berkeley.edu;princeton.edu;princeton.edu;princeton.edu", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Princeton University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.berkeley.edu", "aff_unique_abbr": "Princeton;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "ChimpACT: A Longitudinal Dataset for Understanding Chimpanzee Behaviors", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73707", "id": "393EoKpJN3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/57a95cd3898bf4912269848a01f53620-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=393EoKpJN3", "openreview": "https://openreview.net/forum?id=393EoKpJN3", "poster": "/media/PosterPDFs/NeurIPS%202023/73707.png?t=1698476277.215272", "slides": "https://nips.cc/virtual/2023/poster/73707", "video": "https://nips.cc/virtual/2023/poster/73707", "author_site": "Xiaoxuan Ma, Stephan Kaufhold, Jiajun Su, Wentao Zhu, Jack Terwilliger, Andres Meza, Yixin Zhu, Federico Rossano, Yizhou Wang", "tldr": "", "abstract": "Understanding the behavior of non-human primates is crucial for improving animal welfare, modeling social behavior, and gaining insights into distinctively human and phylogenetically shared behaviors. However, the lack of datasets on non-human primate behavior hinders in-depth exploration of primate social interactions, posing challenges to research on our closest living relatives. To address these limitations, we present ChimpACT, a comprehensive dataset for quantifying the longitudinal behavior and social relations of chimpanzees within a social group. Spanning from 2015 to 2018, ChimpACT features videos of a group of over 20 chimpanzees residing at the Leipzig Zoo, Germany, with a particular focus on documenting the developmental trajectory of one young male, Azibo. ChimpACT is both comprehensive and challenging, consisting of 163 videos with a cumulative 160,500 frames, each richly annotated with detection, identification, pose estimation, and fine-grained spatiotemporal behavior labels. We benchmark representative methods of three tracks on ChimpACT: (i) tracking and identification, (ii) pose estimation, and (iii) spatiotemporal action detection of the chimpanzees. Our experiments reveal that ChimpACT offers ample opportunities for both devising new methods and adapting existing ones to solve fundamental computer vision tasks applied to chimpanzee groups, such as detection, pose estimation, and behavior analysis, ultimately deepening our comprehension of communication and sociality in non-human primates.", "keywords": "Computer Vision;Non-human primates behavior", "primary_area": "", "supplementary_material": "/attachment/192cdb8af4183b0cc23e4fbb873b7bca0f9647fe.zip", "author": "Xiaoxuan Ma;Stephan Paul Kaufhold;Jiajun Su;Wentao Zhu;Jack Terwilliger;Andres Meza;Yixin Zhu;Federico Rossano;Yizhou Wang", "authorids": "~Xiaoxuan_Ma2;~Stephan_Paul_Kaufhold1;~Jiajun_Su1;~Wentao_Zhu3;~Jack_Terwilliger1;~Andres_Meza1;~Yixin_Zhu1;~Federico_Rossano1;~Yizhou_Wang1", "gender": "F;Not Specified;M;M;M;;M;M;M", "homepage": "https://shirleymaxx.github.io/;;;https://wentao.live;http://jackterwilliger.com/;;https://yzhu.io/;;https://cfcs.pku.edu.cn/wangyizhou/", "dblp": ";;210/2464;117/0354-4;;;91/1103-1.html;;71/3387-1", "google_scholar": "mjP_5SEAAAAJ;https://scholar.google.de/citations?user=ZLg4jF8AAAAJ;DoUvUz4AAAAJ;https://scholar.google.com/citations?hl=en;;;qG9l6JEAAAAJ;tkbxHjsAAAAJ;831z_VcAAAAJ", "orcid": "0000-0003-0571-2659;0000-0001-6316-4334;;;;0000-0002-4283-0833;0000-0001-7024-1545;0000-0002-6544-7685;", "linkedin": ";;;;;;;federico-rossano-04b3251b5/;", "or_profile": "~Xiaoxuan_Ma2;~Stephan_Paul_Kaufhold1;~Jiajun_Su1;~Wentao_Zhu3;~Jack_Terwilliger1;~Andres_Meza1;~Yixin_Zhu1;~Federico_Rossano1;~Yizhou_Wang1", "aff": "Peking University;University of California, San Diego;International Digital Economy Academy;Peking University;University of California, San Diego;University of California, San Diego;Peking University;University of California, San Diego;Peking University", "aff_domain": "pku.edu.cn;ucsd.edu;idea.edu.cn;pku.edu.cn;ucsd.edu;ucsd.edu;pku.edu.cn;ucsd.edu;pku.edu.cn", "position": "PhD student;PhD student;Researcher;PhD student;PhD student;Researcher;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nma2023chimpact,\ntitle={Chimp{ACT}: A Longitudinal Dataset for Understanding Chimpanzee Behaviors},\nauthor={Xiaoxuan Ma and Stephan Paul Kaufhold and Jiajun Su and Wentao Zhu and Jack Terwilliger and Andres Meza and Yixin Zhu and Federico Rossano and Yizhou Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=393EoKpJN3}\n}", "github": "", "project": "", "reviewers": "8sjD;H6yA;TrcR;zjvm;nT7y", "pdf_size": 37734496, "rating": "6;6;7;7;7", "confidence": "4;4;4;4;3", "wc_summary_and_contributions": "69;19;96;93;166", "wc_strengths": "44;22;33;75;42", "wc_improvement": "436;37;209;311;99", "wc_limitations": "18;37;8;43;18", "wc_correctness": "4;13;10;63;1", "wc_clarity": "6;8;9;5;1", "wc_relation_to_prior_work": "1;3;36;119;49", "wc_documentation": "16;5;157;44;39", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "595;145;559;754;416", "wc_reply_reviewers": "0;0;31;0;62", "wc_reply_authors": "738;252;798;716;748", "reply_reviewers": "0;0;1;0;1", "reply_authors": "1;2;2;2;2", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 88.6, 47.525151235950844 ], "wc_strengths_avg": [ 43.2, 17.70197729068705 ], "wc_improvement_avg": [ 218.4, 143.64901670390927 ], "wc_limitations_avg": [ 24.8, 13.075167302944921 ], "wc_correctness_avg": [ 18.2, 22.79824554653274 ], "wc_clarity_avg": [ 5.8, 2.7856776554368237 ], "wc_relation_to_prior_work_avg": [ 41.6, 42.93064173757481 ], "wc_documentation_avg": [ 52.2, 54.337464055658685 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 493.8, 204.8954855530009 ], "wc_reply_reviewers_avg": [ 18.6, 24.8 ], "wc_reply_authors_avg": [ 650.4, 201.0030845534466 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8071040176764276791&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "pku.edu.cn;ucsd.edu;idea.edu.cn;pku.edu.cn;ucsd.edu;ucsd.edu;pku.edu.cn;ucsd.edu;pku.edu.cn", "author_num": 9, "aff_unique_index": "0;1;2;0;1;1;0;1;0", "aff_unique_norm": "Peking University;University of California, San Diego;International Digital Economy Academy", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.ucsd.edu;", "aff_unique_abbr": "Peking U;UCSD;", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;1;0;1;1;0;1;0", "aff_country_unique": "China;United States;" }, { "title": "Time-uniform confidence bands for the CDF under nonstationarity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72916", "id": "39cFjnRpYm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/148bbc25b934211d80435b5cad5a7198-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=39cFjnRpYm", "openreview": "https://openreview.net/forum?id=39cFjnRpYm", "poster": "/media/PosterPDFs/NeurIPS%202023/72916.png?t=1701455310.0744882", "slides": "https://nips.cc/virtual/2023/poster/72916", "video": "https://nips.cc/virtual/2023/poster/72916", "author_site": "Paul Mineiro, Steven Howard", "tldr": "", "abstract": "Estimation of a complete univariate distribution from a sequence of observations is a useful primitive for both manual and automated decision making. This problem has received extensive attention in the i.i.d. setting, but the arbitrary data dependent setting remains largely unaddressed. We present computationally felicitous time-uniform and value-uniform bounds on the CDF of the running averaged conditional distribution of a sequence of real-valued random variables. Consistent with known impossibility results, our CDF bounds are always valid but sometimes trivial when the instance is too hard, and we give an instance-dependent convergence guarantee. The importance-weighted extension is appropriate for estimating complete counterfactual distributions of rewards given data from a randomized experiment, e.g., from an A/B test or a contextual bandit.", "keywords": "off-policy evaluation;anytime-valid", "primary_area": "", "supplementary_material": "/attachment/06f342a564759640f756e0132d1773d050a02a83.zip", "author": "Paul Mineiro;Steven R Howard", "authorids": "~Paul_Mineiro1;~Steven_R_Howard1", "gender": ";", "homepage": ";https://www.stevehoward.org", "dblp": "35/5613;", "google_scholar": ";Blvj9KwAAAAJ", "orcid": ";0000-0001-5264-9088", "linkedin": ";gostevehoward/", "or_profile": "~Paul_Mineiro1;~Steven_R_Howard1", "aff": ";LinkedIn", "aff_domain": ";linkedin.com", "position": ";Researcher", "bibtex": "@inproceedings{\nmineiro2023timeuniform,\ntitle={Time-uniform confidence bands for the {CDF} under nonstationarity},\nauthor={Paul Mineiro and Steven R Howard},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=39cFjnRpYm}\n}", "github": "", "project": "", "reviewers": "gT4m;hk3u;jBxN;gyN9", "pdf_size": 839413, "rating": "4;4;5;6", "confidence": "3;2;2;3", "soundness": "3;2;3;2", "novelty": "3;2;3;3", "presentation": "1;2;2;4", "wc_summary": "105;121;110;71", "wc_strengths": "38;53;69;69", "wc_weaknesses": "93;177;180;83", "wc_questions": "256;5;20;52", "wc_limitations": "4;8;1;30", "wc_review": "496;364;380;305", "wc_reply_reviewers": "73;40;20;114", "wc_reply_authors": "0;56;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 101.75, 18.673175948402566 ], "wc_strengths_avg": [ 57.25, 12.891373084353738 ], "wc_weaknesses_avg": [ 133.25, 45.40030286242593 ], "wc_questions_avg": [ 83.25, 101.171574565191 ], "wc_limitations_avg": [ 10.75, 11.388041973930374 ], "wc_review_avg": [ 386.25, 69.24729236583912 ], "wc_reply_reviewers_avg": [ 61.75, 35.611620294504995 ], "wc_reply_authors_avg": [ 14.0, 24.24871130596428 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8htvGvq37XMJ:scholar.google.com/&scioq=Time-uniform+confidence+bands+for+the+CDF+under+nonstationarity&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": ";linkedin.com", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "LinkedIn Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.linkedin.com", "aff_unique_abbr": "LinkedIn", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Schema-learning and rebinding as mechanisms of in-context learning and emergence", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72915", "id": "3AreDQZ8eO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5bc3356e0fa1753fff7e8d6628e71b22-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3AreDQZ8eO", "openreview": "https://openreview.net/forum?id=3AreDQZ8eO", "poster": "/media/PosterPDFs/NeurIPS%202023/72915.png?t=1702277601.6955233", "slides": "https://nips.cc/virtual/2023/poster/72915", "video": "https://nips.cc/virtual/2023/poster/72915", "author_site": "Sivaramakrishnan Swaminathan, Antoine Dedieu, Rajkumar Vasudeva Raju, Murray Shanahan, Miguel Lazaro-Gredilla, Dileep George", "tldr": "", "abstract": "In-context learning (ICL) is one of the most powerful and most unexpected capabilities to emerge in recent transformer-based large language models (LLMs). Yet the mechanisms that underlie it are poorly understood. In this paper, we demonstrate that comparable ICL capabilities can be acquired by an alternative sequence prediction learning method using clone-structured causal graphs (CSCGs). Moreover, a key property of CSCGs is that, unlike transformer-based LLMs, they are {\\em interpretable}, which considerably simplifies the task of explaining how ICL works. Specifically, we show that it uses a combination of (a) learning template (schema) circuits for pattern completion, (b) retrieving relevant templates in a context-sensitive manner, and (c) rebinding of novel tokens to appropriate slots in the templates. We go on to marshall evidence for the hypothesis that similar mechanisms underlie ICL in LLMs. For example, we find that, with CSCGs as with LLMs, different capabilities emerge at different levels of overparameterization, suggesting that overparameterization helps in learning more complex template (schema) circuits. By showing how ICL can be achieved with small models and datasets, we open up a path to novel architectures, and take a vital step towards a more general understanding of the mechanics behind this important capability.", "keywords": "mechanistic interpretability;in-context learning;emergence;large language models", "primary_area": "", "supplementary_material": "/attachment/80224f38022c5058d024b8ac4d096a23f1185fb0.pdf", "author": "Sivaramakrishnan Swaminathan;Antoine Dedieu;Rajkumar Vasudeva Raju;Murray Shanahan;Miguel Lazaro-Gredilla;Dileep George", "authorids": "~Sivaramakrishnan_Swaminathan1;~Antoine_Dedieu1;~Rajkumar_Vasudeva_Raju2;~Murray_Shanahan1;~Miguel_Lazaro-Gredilla1;~Dileep_George1", "gender": ";M;;M;M;", "homepage": "http://sivark.me;https://antoine-dedieu.github.io;;https://www.doc.ic.ac.uk/~mpsha/;;", "dblp": "342/7709.html;217/3589.html;;11/5268;77/4660;", "google_scholar": "oDVFD5oAAAAJ;Hgoc3FUAAAAJ;;https://scholar.google.co.uk/citations?user=00bnGpAAAAAJ;SFjDQk8AAAAJ;", "orcid": ";;;0000-0001-5984-2964;;", "linkedin": ";;;;miguel-lazaro-g/;", "or_profile": "~Sivaramakrishnan_Swaminathan1;~Antoine_Dedieu1;~Rajkumar_Vasudeva_Raju2;~Murray_Shanahan1;~Miguel_Lazaro-Gredilla1;~Dileep_George1", "aff": "Google DeepMind;Google DeepMind;;Imperial College London;Google Deepmind;Vicarious AI", "aff_domain": "deepmind.com;deepmind.com;;;google.com;vicarious.com", "position": "Research Engineer;Researcher;;Full Professor;Research Scientist;Co-founder", "bibtex": "@inproceedings{\nswaminathan2023schemalearning,\ntitle={Schema-learning and rebinding as mechanisms of in-context learning and emergence},\nauthor={Sivaramakrishnan Swaminathan and Antoine Dedieu and Rajkumar Vasudeva Raju and Murray Shanahan and Miguel Lazaro-Gredilla and Dileep George},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3AreDQZ8eO}\n}", "github": "", "project": "", "reviewers": "ZCs7;2Tdj;PPgs;kc5K", "pdf_size": 4829220, "rating": "6;7;7;8", "confidence": "1;3;3;4", "soundness": "2;4;2;4", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "77;114;55;128", "wc_strengths": "56;43;100;83", "wc_weaknesses": "118;170;108;57", "wc_questions": "119;3;110;27", "wc_limitations": "16;1;34;48", "wc_review": "386;331;407;343", "wc_reply_reviewers": "26;30;22;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 93.5, 29.004310024546353 ], "wc_strengths_avg": [ 70.5, 22.321514285549714 ], "wc_weaknesses_avg": [ 113.25, 40.10844674130376 ], "wc_questions_avg": [ 64.75, 50.56864146879961 ], "wc_limitations_avg": [ 24.75, 17.795715776557007 ], "wc_review_avg": [ 366.75, 30.95460385790779 ], "wc_reply_reviewers_avg": [ 23.5, 5.172040216394301 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9733285267845752, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16126942274965989611&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "deepmind.com;deepmind.com;;;google.com;vicarious.com", "author_num": 6, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Google;Imperial College London;DeepMind;Vicarious AI", "aff_unique_dep": "Google DeepMind;;DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.imperial.ac.uk;https://deepmind.com;https://www.vicarious.com", "aff_unique_abbr": "DeepMind;ICL;DeepMind;Vicarious AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "$\\mathbf{\\mathbb{E}^{FWI}}$: Multiparameter Benchmark Datasets for Elastic Full Waveform Inversion of Geophysical Properties", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73706", "id": "3BQaMV9jxK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4aa8d18aad014fb3d0076e0afd2e3b2e-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=3BQaMV9jxK", "openreview": "https://openreview.net/forum?id=3BQaMV9jxK", "poster": "/media/PosterPDFs/NeurIPS%202023/73706.png?t=1701817658.0711956", "slides": "https://nips.cc/virtual/2023/poster/73706", "video": "https://nips.cc/virtual/2023/poster/73706", "author_site": "Shihang Feng, Hanchen Wang, Chengyuan Deng, Yinan Feng, Yanhua Liu, Min Zhu, Peng Jin, Yinpeng Chen, Youzuo Lin", "tldr": "", "abstract": "Elastic geophysical properties (such as P- and S-wave velocities) are of great importance to various subsurface applications like CO$_2$ sequestration and energy exploration (e.g., hydrogen and geothermal). Elastic full waveform inversion (FWI) is widely applied for characterizing reservoir properties. In this paper, we introduce $\\mathbf{\\mathbb{E}^{FWI}}$, a comprehensive benchmark dataset that is specifically designed for elastic FWI. $\\mathbf{\\mathbb{E}^{FWI}}$ encompasses 8 distinct datasets that cover diverse subsurface geologic structures (flat, curve, faults, etc). The benchmark results produced by three different deep learning methods are provided. In contrast to our previously presented dataset (pressure recordings) for acoustic FWI (referred to as OpenFWI), the seismic dataset in $\\mathbf{\\mathbb{E}^{FWI}}$ has both vertical and horizontal components. Moreover, the velocity maps in $\\mathbf{\\mathbb{E}^{FWI}}$ incorporate both P- and S-wave velocities. While the multicomponent data and the added S-wave velocity make the data more realistic, more challenges are introduced regarding the convergence and computational cost of the inversion. We conduct comprehensive numerical experiments to explore the relationship between P-wave and S-wave velocities in seismic data. The relation between P- and S-wave velocities provides crucial insights into the subsurface properties such as lithology, porosity, fluid content, etc. We anticipate that $\\mathbf{\\mathbb{E}^{FWI}}$ will facilitate future research on multiparameter inversions and stimulate endeavors in several critical research topics of carbon-zero and new energy exploration. All datasets, codes and relevant information can be accessed through our website at https://efwi-lanl.github.io/", "keywords": "elastic; full waveform inversion; seismic; open dataset; data-driven", "primary_area": "", "supplementary_material": "/attachment/4a5a1785ff2ea0d65b7980f52f2d060190970fbc.pdf", "author": "Shihang Feng;Hanchen Wang;Chengyuan Deng;Yinan Feng;Yanhua Liu;Min Zhu;Peng Jin;Yinpeng Chen;Youzuo Lin", "authorids": "~Shihang_Feng1;~Hanchen_Wang3;~Chengyuan_Deng1;~Yinan_Feng1;~Yanhua_Liu1;~Min_Zhu1;~Peng_Jin6;~Yinpeng_Chen1;~Youzuo_Lin1", "gender": "M;M;;M;F;M;;M;M", "homepage": ";;;;;https://scholar.google.com/citations?user=izpcqs0AAAAJ&hl=zh-CN;https://ist.psu.edu/directory/pqj5125;https://scholar.google.com/citations?user=V_VpLksAAAAJ&hl=en;https://sites.google.com/site/youzuolin044/", "dblp": ";;246/4646;154/0112;;;;45/6977;", "google_scholar": "m304bMcAAAAJ;laVu-TEAAAAJ;QPaVr9QAAAAJ;LySxJYUAAAAJ;https://scholar.google.com/citations?hl=en;izpcqs0AAAAJ;;;CMXuHYgAAAAJ", "orcid": ";0000-0001-8845-0820;;;;0000-0003-4550-5662;;;", "linkedin": ";wanghanchen/;;;https://www.linkedin.com/jobs/collections/similar-jobs/?currentJobId=2847511070&referenceJobId=2878529510;;;;", "or_profile": "~Shihang_Feng1;~Hanchen_Wang3;~Chengyuan_Deng1;~Yinan_Feng1;~Yanhua_Liu1;~Min_Zhu1;~Peng_Jin6;~Yinpeng_Chen1;~Youzuo_Lin1", "aff": "Los Alamos National Laboratory;Los Alamos National Laboratory;Rutgers University;Los Alamos National Laboratory;Colorado School of Mines;University of Pennsylvania;Pennsylvania State University;Microsoft;Los Alamos National Laboratory", "aff_domain": "lanl.gov;lanl.gov;rutgers.edu;lanl.gov;mines.edu;upenn.edu;psu.edu;microsoft.com;lanl.gov", "position": "Postdoc;Postdoc;PhD student;PostMaster;PhD student;PhD student;PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\nfeng2023mathbfmathbbefwi,\ntitle={\\${\\textbackslash}mathbf\\{{\\textbackslash}mathbb\\{E\\}{\\textasciicircum}\\{{FWI}\\}\\}\\$: Multiparameter Benchmark Datasets for Elastic Full Waveform Inversion of Geophysical Properties},\nauthor={Shihang Feng and Hanchen Wang and Chengyuan Deng and Yinan Feng and Yanhua Liu and Min Zhu and Peng Jin and Yinpeng Chen and Youzuo Lin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=3BQaMV9jxK}\n}", "github": "", "project": "", "reviewers": "6MVi;fegx;yFAK", "pdf_size": 2099443, "rating": "6;7;7", "confidence": "3;3;3", "wc_summary_and_contributions": "27;38;69", "wc_strengths": "22;25;136", "wc_improvement": "48;9;201", "wc_limitations": "128;4;7", "wc_correctness": "10;11;41", "wc_clarity": "17;5;24", "wc_relation_to_prior_work": "16;6;24", "wc_documentation": "22;4;14", "wc_additional_feedback": "1;1;1", "wc_review": "291;103;517", "wc_reply_reviewers": "12;0;32", "wc_reply_authors": "395;36;451", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 44.666666666666664, 17.78263822446552 ], "wc_strengths_avg": [ 61.0, 53.04714883949938 ], "wc_improvement_avg": [ 86.0, 82.86132994346639 ], "wc_limitations_avg": [ 46.333333333333336, 57.76004001229762 ], "wc_correctness_avg": [ 20.666666666666668, 14.38363267359428 ], "wc_clarity_avg": [ 15.333333333333334, 7.84573486395988 ], "wc_relation_to_prior_work_avg": [ 15.333333333333334, 7.363574011458175 ], "wc_documentation_avg": [ 13.333333333333334, 7.363574011458175 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 303.6666666666667, 169.25194894659919 ], "wc_reply_reviewers_avg": [ 14.666666666666666, 13.199326582148888 ], "wc_reply_authors_avg": [ 294.0, 183.86045433063268 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "lanl.gov;lanl.gov;rutgers.edu;lanl.gov;mines.edu;upenn.edu;psu.edu;microsoft.com;lanl.gov", "author_num": 9, "aff_unique_index": "0;0;1;0;2;3;4;5;0", "aff_unique_norm": "Los Alamos National Laboratory;Rutgers University;Colorado School of Mines;University of Pennsylvania;Pennsylvania State University;Microsoft", "aff_unique_dep": ";;;;;Microsoft Corporation", "aff_unique_url": "https://www.lanl.gov;https://www.rutgers.edu;https://www.mines.edu;https://www.upenn.edu;https://www.psu.edu;https://www.microsoft.com", "aff_unique_abbr": "LANL;Rutgers;CSM;UPenn;PSU;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Ego4D Goal-Step: Toward Hierarchical Understanding of Procedural Activities", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73705", "id": "3BxYAaovKr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7a65606fa1a6849450550325832036e5-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=3BxYAaovKr", "openreview": "https://openreview.net/forum?id=3BxYAaovKr", "poster": "/media/PosterPDFs/NeurIPS%202023/73705.png?t=1702192187.8106616", "slides": "https://nips.cc/virtual/2023/poster/73705", "video": "https://nips.cc/virtual/2023/poster/73705", "author_site": "Yale Song, Eugene Byrne, Tushar Nagarajan, Huiyu Wang, Miguel Martin, Lorenzo Torresani", "tldr": "", "abstract": "Human activities are goal-oriented and hierarchical, comprising primary goals at the top level, sequences of steps and substeps in the middle, and atomic actions at the lowest level. Recognizing human activities thus requires relating atomic actions and steps to their functional objectives (what the actions contribute to) and modeling their sequential and hierarchical dependencies towards achieving the goals. Current activity recognition research has primarily focused on only the lowest levels of this hierarchy, i.e., atomic or low-level actions, often in trimmed videos with annotations spanning only a few seconds. In this work, we introduce Ego4D Goal-Step, a new set of annotations on the recently released Ego4D with a novel hierarchical taxonomy of goal-oriented activity labels. It provides dense annotations for 48K procedural step segments (430 hours) and high-level goal annotations for 2,807 hours of Ego4D videos. Compared to existing procedural video datasets, it is substantially larger in size, contains hierarchical action labels (goals - steps - substeps), and provides goal-oriented auxiliary information including natural language summary description, step completion status, and step-to-goal relevance information. We take a data-driven approach to build our taxonomy, resulting in dense step annotations that do not suffer from poor label-data alignment issues resulting from a taxonomy defined a priori. Through comprehensive evaluations and analyses, we demonstrate how Ego4D Goal-Step supports exploring various questions in procedural activity understanding, including goal inference, step prediction, hierarchical relation learning, and long-term temporal modeling.", "keywords": "procedural activity recognition;egocentric perception;hierarchical activity labels", "primary_area": "", "supplementary_material": "", "author": "Yale Song;Gene Byrne;Tushar Nagarajan;Huiyu Wang;Miguel Martin;Lorenzo Torresani", "authorids": "~Yale_Song1;eebyrne@gmail.com;~Tushar_Nagarajan1;~Huiyu_Wang1;~Miguel_Martin1;~Lorenzo_Torresani1", "gender": "M;;;;M;M", "homepage": "https://people.csail.mit.edu/yalesong;;https://tushar-n.github.io/;http://csrhddlam.github.io/;https://miguel-martin.com/;https://ltorresa.github.io", "dblp": "31/9606.html;;207/8308;;;75/2854", "google_scholar": "dNHNpxoAAAAJ;;KAKqSwIAAAAJ;SnmuYloAAAAJ;;ss8KR5gAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Yale_Song1;eebyrne@gmail.com;~Tushar_Nagarajan1;~Huiyu_Wang1;~Miguel_Martin1;~Lorenzo_Torresani1", "aff": "FAIR, Meta;;University of Texas, Austin;Meta Platforms;Meta Platforms, Inc.;Meta", "aff_domain": "meta.com;;utexas.edu;meta.com;meta.com;meta.com", "position": "Research Scientist;;PhD student;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nsong2023egod,\ntitle={Ego4D Goal-Step: Toward Hierarchical Understanding of Procedural Activities},\nauthor={Yale Song and Gene Byrne and Tushar Nagarajan and Huiyu Wang and Miguel Martin and Lorenzo Torresani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=3BxYAaovKr}\n}", "github": "", "project": "", "reviewers": "iAEY;hMCZ;1pxF;YdTM", "pdf_size": 2551465, "rating": "5;7;7;7", "confidence": "3;3;3;3", "wc_summary_and_contributions": "145;37;61;82", "wc_strengths": "93;35;39;120", "wc_improvement": "268;30;37;83", "wc_limitations": "43;1;1;44", "wc_correctness": "83;1;1;8", "wc_clarity": "12;1;1;1", "wc_relation_to_prior_work": "54;1;1;28", "wc_documentation": "50;1;17;7", "wc_additional_feedback": "1;1;1;1", "wc_review": "749;108;159;374", "wc_reply_reviewers": "0;0;0;14", "wc_reply_authors": "827;175;74;205", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 81.25, 40.10221315588455 ], "wc_strengths_avg": [ 71.75, 36.06504540410285 ], "wc_improvement_avg": [ 104.5, 96.5673340213967 ], "wc_limitations_avg": [ 22.25, 21.25294097295713 ], "wc_correctness_avg": [ 23.25, 34.61484508126535 ], "wc_clarity_avg": [ 3.75, 4.763139720814412 ], "wc_relation_to_prior_work_avg": [ 21.0, 22.01136070305514 ], "wc_documentation_avg": [ 18.75, 18.925842121290138 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 347.5, 252.38710347400874 ], "wc_reply_reviewers_avg": [ 3.5, 6.06217782649107 ], "wc_reply_authors_avg": [ 320.25, 296.56986950801326 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2713460163195220731&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "meta.com;;utexas.edu;meta.com;meta.com;meta.com", "author_num": 6, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Meta;University of Texas at Austin", "aff_unique_dep": "Facebook AI Research (FAIR);", "aff_unique_url": "https://meta.com;https://www.utexas.edu", "aff_unique_abbr": "Meta;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "On the Last-iterate Convergence in Time-varying Zero-sum Games: Extra Gradient Succeeds where Optimism Fails", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72914", "id": "3CJOaJugMG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/457ab261562014550e53351422f69834-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3CJOaJugMG", "openreview": "https://openreview.net/forum?id=3CJOaJugMG", "poster": "/media/PosterPDFs/NeurIPS%202023/72914.png?t=1702182908.331765", "slides": "https://nips.cc/virtual/2023/poster/72914", "video": "https://nips.cc/virtual/2023/poster/72914", "author_site": "Yi Feng, Hu Fu, Qun Hu, Ping Li, Ioannis Panageas, bo peng, Xiao Wang", "tldr": "", "abstract": "Last-iterate convergence has received extensive study in two player zero-sum games starting from bilinear, convex-concave up to settings that satisfy the MVI condition. Typical methods that exhibit last-iterate convergence for the aforementioned games include extra-gradient (EG) and optimistic gradient descent ascent (OGDA). However, all the established last-iterate convergence results hold for the restrictive setting where the underlying repeated game does not change over time.\nRecently, a line of research has focused on regret analysis of OGDA in time-varying games, i.e., games where payoffs evolve with time; the last-iterate behavior of OGDA and EG in time-varying environments remains unclear though. In this paper, we study the last-iterate behavior of various algorithms in two types of unconstrained, time-varying, bilinear zero-sum games: periodic and convergent perturbed games. These models expand upon the usual repeated game formulation and incorporate external environmental factors, such as the seasonal effects on species competition and vanishing external noise. In periodic games, we prove that EG will converge while OGDA and momentum method will diverge. This is quite surprising, as to the best of our knowledge, it is the first result that indicates EG and OGDA have qualitatively different last-iterate behaviors and do not exhibit similar behavior. In convergent perturbed games, we prove all these algorithms converge as long as the game itself stabilizes with a faster rate than $1/t$.", "keywords": "zero sum game;time-varying game;optimistic gradient;extra gradient;momentum method", "primary_area": "", "supplementary_material": "/attachment/95275ffd5f33447682d4bdf9f86e4f4e3153c308.pdf", "author": "Yi Feng;Hu Fu;Qun Hu;Ping Li;Ioannis Panageas;bo peng;Xiao Wang", "authorids": "~Yi_Feng3;fuhu@mail.shufe.edu.cn;2019212804@163.sufe.edu.cn;~Ping_Li14;~Ioannis_Panageas1;~bo_peng20;~Xiao_Wang4", "gender": "M;;;;M;;", "homepage": "https://sites.google.com/view/yifeng95524/home;;;;https://panageas.github.io;;", "dblp": ";;;;139/3829;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;5NiFWuwAAAAJ;;", "orcid": ";;;0009-0007-4367-6958;;0000-0003-0910-9267;", "linkedin": ";;;;;;", "or_profile": "~Yi_Feng3;fuhu@mail.shufe.edu.cn;2019212804@163.sufe.edu.cn;~Ping_Li14;~Ioannis_Panageas1;~bo_peng20;~Xiao_Wang4", "aff": "Shanghai University of Finance and Economics;;;Shanghai University of Finance and Economics;Donald Bren School of Information and Computer Sciences, University of California, Irvine;Shanghai University of Finance and Economics;", "aff_domain": "shufe.edu;;;shufe.edu.cn;ics.uci.edu;sufe.edu;", "position": "PhD student;;;PhD student;Assistant Professor;PhD student;", "bibtex": "@inproceedings{\nfeng2023on,\ntitle={On the Last-iterate Convergence in Time-varying Zero-sum Games: Extra Gradient Succeeds where Optimism Fails},\nauthor={Yi Feng and Hu Fu and Qun Hu and Ping Li and Ioannis Panageas and bo peng and Xiao Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3CJOaJugMG}\n}", "github": "", "project": "", "reviewers": "tqyN;abUR;PugT;qpgT;oHHw", "pdf_size": 7425709, "rating": "5;5;6;7;7", "confidence": "3;3;3;3;3", "soundness": "3;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "151;107;58;76;53", "wc_strengths": "81;81;22;50;36", "wc_weaknesses": "122;293;20;40;241", "wc_questions": "51;141;122;72;7", "wc_limitations": "6;7;2;6;10", "wc_review": "411;629;224;244;347", "wc_reply_reviewers": "0;11;16;0;4", "wc_reply_authors": "0;14;22;0;18", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;2;2;1;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.0, 36.31528603770043 ], "wc_strengths_avg": [ 54.0, 23.757104200638594 ], "wc_weaknesses_avg": [ 143.2, 107.96555006111903 ], "wc_questions_avg": [ 78.6, 48.39256141185337 ], "wc_limitations_avg": [ 6.2, 2.5612496949731396 ], "wc_review_avg": [ 371.0, 145.9301202630903 ], "wc_reply_reviewers_avg": [ 6.2, 6.337191807101945 ], "wc_reply_authors_avg": [ 10.8, 9.173875952943773 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12157452906822154267&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "shufe.edu;;;shufe.edu.cn;ics.uci.edu;sufe.edu;", "author_num": 7, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Shanghai University of Finance and Economics;University of California, Irvine", "aff_unique_dep": ";Donald Bren School of Information and Computer Sciences", "aff_unique_url": "http://www.sufe.edu.cn;https://www.uci.edu", "aff_unique_abbr": "SUFE;UCI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Irvine", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "HotBEV: Hardware-oriented Transformer-based Multi-View 3D Detector for BEV Perception", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72913", "id": "3Cj67k38st", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/081b08068e4733ae3e7ad019fe8d172f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3Cj67k38st", "openreview": "https://openreview.net/forum?id=3Cj67k38st", "poster": "/media/PosterPDFs/NeurIPS%202023/72913.png?t=1701733968.7144318", "slides": "https://nips.cc/virtual/2023/poster/72913", "video": "https://nips.cc/virtual/2023/poster/72913", "author_site": "Peiyan Dong, Zhenglun Kong, Xin Meng, Pinrui Yu, Yifan Gong, Geng Yuan, Hao Tang, Yanzhi Wang", "tldr": "", "abstract": "The bird's-eye-view (BEV) perception plays a critical role in autonomous driving systems, involving the accurate and efficient detection and tracking of objects from a top-down perspective. To achieve real-time decision-making in self-driving scenarios, low-latency computation is essential. While recent approaches to BEV detection have focused on improving detection precision using Lift-Splat-Shoot (LSS)-based or transformer-based schemas, the substantial computational and memory burden of these approaches increases the risk of system crashes when multiple on-vehicle tasks run simultaneously. Unfortunately, there is a dearth of literature on efficient BEV detector paradigms, let alone achieving realistic speedups.\nUnlike existing works that focus on reducing computation costs, this paper focuses on developing an efficient model design that prioritizes actual on-device latency.\nTo achieve this goal, we propose a latency-aware design methodology that considers key hardware properties, such as memory access cost and degree of parallelism.\nGiven the prevalence of GPUs as the main computation platform for autonomous driving systems, we develop a theoretical latency prediction model and introduce efficient building operators.\nBy leveraging these operators and following an effective local-to-global visual modeling process, we propose a hardware-oriented backbone that is also optimized for strong feature capturing and fusing.\nUsing these insights, we present a new hardware-oriented framework for efficient yet accurate camera-view BEV detectors.\nExperiments show that HotBEV achieves a 2\\%$\\sim$23\\% NDS gain, and 2\\%$\\sim$7.8\\% mAP gain with a 1.1$\\times$$\\sim$3.4$\\times$ speedups compared to existing works on V100;\nOn multiple GPU devices such as GPU GTX 2080 and the low-end GTX 1080, HotBEV achieves 1.1$\\times$$\\sim$6.3$\\times$ faster than others.", "keywords": "Multi-view 3D detection;Hardware efficiency;Autonomous driving", "primary_area": "", "supplementary_material": "/attachment/a8695c3659e4f811c49cbd4fcef48a368e7a647b.pdf", "author": "Peiyan Dong;Zhenglun Kong;Xin Meng;Pinrui Yu;Yifan Gong;Geng Yuan;Hao Tang;Yanzhi Wang", "authorids": "~Peiyan_Dong1;~Zhenglun_Kong1;~Xin_Meng1;~Pinrui_Yu1;~Yifan_Gong2;~Geng_Yuan1;~Hao_Tang6;~Yanzhi_Wang3", "gender": "F;M;M;M;F;M;M;M", "homepage": "https://peiyanflying.github.io/Peggy_Peiyan.github.io/;https://sites.google.com/husky.neu.edu/zlk/home?authuser=1;https://www.linkedin.com/in/%E9%91%AB-%E5%AD%9F-b45849175/;;https://yifanfanfanfan.github.io/;;https://ha0tang.github.io/;https://web.northeastern.edu/yanzhiwang/", "dblp": "254/1329;211/6323;;;49/3073-4.html;205/3007;07/5751-5;", "google_scholar": "OGU3CVoAAAAJ;XYa4NVYAAAAJ;;https://scholar.google.com/citations?view_op=list_works;U_gevVgAAAAJ;tBIAgtgAAAAJ;9zJkeEMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-8120-4456;0000-0003-2228-0587;;0000-0002-3912-097X;0000-0001-9844-992X;0000-0002-2077-1246;", "linkedin": ";zhenglun-kong-35b527150/;%E9%91%AB-%E5%AD%9F-b45849175/;pinrui-yu-237535180;yifan-gong-3059b8132/;;hao-tang-887475138/;", "or_profile": "~Peiyan_Dong1;~Zhenglun_Kong1;~Xin_Meng1;~Pinrui_Yu1;~Yifan_Gong2;~Geng_Yuan1;~Hao_Tang6;~Yanzhi_Wang3", "aff": "Northeastern University;Northeastern University;NVIDIA;Northeastern University;Northeastern University;Northeastern University;ETH Zurich;Northeastern University", "aff_domain": "northeastern.edu;northeastern.edu;nvidia.com;neu.edu;neu.edu;northeastern.edu;vision.ee.ethz.ch;northeastern.edu", "position": "PhD student;PhD student;Researcher;PhD student;PhD student;PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\ndong2023hotbev,\ntitle={Hot{BEV}: Hardware-oriented Transformer-based Multi-View 3D Detector for {BEV} Perception},\nauthor={Peiyan Dong and Zhenglun Kong and Xin Meng and Pinrui Yu and Yifan Gong and Geng Yuan and Hao Tang and Yanzhi Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3Cj67k38st}\n}", "github": "", "project": "", "reviewers": "BJ1v;H5E6;Z9GJ;Mv7q", "pdf_size": 4112747, "rating": "5;5;5;6", "confidence": "4;4;4;3", "soundness": "2;3;2;3", "novelty": "3;2;2;3", "presentation": "2;3;3;3", "wc_summary": "79;51;80;112", "wc_strengths": "30;49;56;99", "wc_weaknesses": "207;128;109;98", "wc_questions": "91;58;28;134", "wc_limitations": "37;118;5;40", "wc_review": "444;404;278;483", "wc_reply_reviewers": "208;851;50;64", "wc_reply_authors": "199;1042;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "3;4;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.5, 21.592822881689184 ], "wc_strengths_avg": [ 58.5, 25.243811122728676 ], "wc_weaknesses_avg": [ 135.5, 42.652666974059194 ], "wc_questions_avg": [ 77.75, 39.38511774769754 ], "wc_limitations_avg": [ 50.0, 41.587257663856604 ], "wc_review_avg": [ 402.25, 76.98173484665047 ], "wc_reply_reviewers_avg": [ 293.25, 327.9019479966534 ], "wc_reply_authors_avg": [ 310.25, 430.2164426192937 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8380519675641254258&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "northeastern.edu;northeastern.edu;nvidia.com;neu.edu;neu.edu;northeastern.edu;vision.ee.ethz.ch;northeastern.edu", "author_num": 8, "aff_unique_index": "0;0;1;0;0;0;2;0", "aff_unique_norm": "Northeastern University;NVIDIA;ETH Zurich", "aff_unique_dep": ";NVIDIA Corporation;", "aff_unique_url": "https://www.northeastern.edu;https://www.nvidia.com;https://www.ethz.ch", "aff_unique_abbr": "NEU;NVIDIA;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;0", "aff_country_unique": "United States;Switzerland" }, { "title": "Posthoc privacy guarantees for collaborative inference with modified Propose-Test-Release", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72912", "id": "3DMDNwd7ND", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5433b79562b9fa85bd5da0c95a78c907-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3DMDNwd7ND", "openreview": "https://openreview.net/forum?id=3DMDNwd7ND", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72912", "video": "https://nips.cc/virtual/2023/poster/72912", "author_site": "Abhishek Singh, Praneeth Vepakomma, Vivek Sharma, Ramesh Raskar", "tldr": "", "abstract": "Cloud-based machine learning inference is an emerging paradigm where users query by sending their data through a service provider who runs an ML model on that data and returns back the answer. Due to increased concerns over data privacy, recent works have proposed Collaborative Inference (CI) to learn a privacy-preserving encoding of sensitive user data before it is shared with an untrusted service provider. Existing works so far evaluate the privacy of these encodings through empirical reconstruction attacks. In this work, we develop a new framework that provides formal privacy guarantees for an arbitrarily trained neural network by linking its local Lipschitz constant with its local sensitivity. To guarantee privacy using local sensitivity, we extend the Propose-Test-Release (PTR) framework to make it tractable for neural network queries. We verify the efficacy of our framework experimentally on real-world datasets and elucidate the role of Adversarial Representation Learning (ARL) in improving the privacy-utility trade-off.", "keywords": "privacy;deep learning;neural networks;adversarial learning;reconstruction guarantees;collaborative inference;MLaaS", "primary_area": "", "supplementary_material": "/attachment/6c7e3c3ac09bdf33699bbd2dc344aeb9a27c56cc.pdf", "author": "Abhishek Singh;Praneeth Vepakomma;Vivek Sharma;Ramesh Raskar", "authorids": "~Abhishek_Singh5;~Praneeth_Vepakomma2;~Vivek_Sharma1;~Ramesh_Raskar1", "gender": "M;;M;M", "homepage": "https://tremblerz.github.io/;https://praneeth.mit.edu/;https://vivoutlaw.github.io/;https://www.media.mit.edu/people/raskar/overview/", "dblp": "27/2328-5;131/6694;;r/RameshRaskar", "google_scholar": "https://scholar.google.co.in/citations?user=3QygpzAAAAAJ;T_mPgZIAAAAJ;fNbVXwQAAAAJ;", "orcid": "0000-0003-0217-9801;;;0000-0002-3254-3224", "linkedin": "tremblerz/;;vivoutlaw/;", "or_profile": "~Abhishek_Singh5;~Praneeth_Vepakomma2;~Vivek_Sharma1;~Ramesh_Raskar1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Sony Research;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;sony.com;mit.edu", "position": "PhD student;PhD student;Senior Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nsingh2023posthoc,\ntitle={Posthoc privacy guarantees for collaborative inference with modified Propose-Test-Release},\nauthor={Abhishek Singh and Praneeth Vepakomma and Vivek Sharma and Ramesh Raskar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3DMDNwd7ND}\n}", "github": "", "project": "", "reviewers": "UmFA;MzZs;uj9h;ybRi", "pdf_size": 450713, "rating": "3;6;6;7", "confidence": "5;4;4;2", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "65;56;99;65", "wc_strengths": "17;104;23;16", "wc_weaknesses": "23;129;69;197", "wc_questions": "226;35;46;22", "wc_limitations": "1;1;1;8", "wc_review": "332;325;238;308", "wc_reply_reviewers": "242;143;11;10", "wc_reply_authors": "394;205;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 71.25, 16.43738117827776 ], "wc_strengths_avg": [ 40.0, 37.04726710568541 ], "wc_weaknesses_avg": [ 104.5, 65.30505340323978 ], "wc_questions_avg": [ 82.25, 83.42773819300149 ], "wc_limitations_avg": [ 2.75, 3.031088913245535 ], "wc_review_avg": [ 300.75, 37.265097611572145 ], "wc_reply_reviewers_avg": [ 101.5, 97.5 ], "wc_reply_authors_avg": [ 149.75, 163.98227800588697 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8411910241920598, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7987883901455796107&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 4, "email": "mit.edu;mit.edu;sony.com;mit.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Sony", "aff_unique_dep": ";Research", "aff_unique_url": "https://web.mit.edu;https://www.sony.com", "aff_unique_abbr": "MIT;Sony", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Japan" }, { "id": "3Da0eESvN1", "title": "An Efficient Tester-Learner for Halfspaces", "track": "main", "status": "Reject", "tldr": "", "abstract": "We give the first efficient algorithm for learning halfspaces in the testable learning model recently defined by Rubinfeld and Vasilyan [2022]. In this model, a learner certifies that the accuracy of its output hypothesis is near optimal whenever the training set passes an associated test, and training sets drawn from some target distribution must pass the test. This model is more challenging than distribution-specific agnostic or Massart noise models where the learner is allowed to fail arbitrarily if the distributional assumption does not hold. We consider the setting where the target distribution is the standard Gaussian in $d$ dimensions and the label noise is either Massart or adversarial (agnostic). For Massart noise, our tester-learner runs in polynomial time and outputs a hypothesis with (information-theoretically optimal) error $\\mathrm{opt} + \\epsilon$ (and extends to any fixed strongly log-concave target distribution). For adversarial noise, our tester-learner obtains error $O(\\mathrm{opt}) + \\epsilon$ in polynomial time. Prior work on testable learning ignores the labels in the training set and checks that the empirical moments of the covariates are close to the moments of the base distribution. Here we develop new tests of independent interest that make critical use of the labels and combine them with the moment-matching approach of Gollakota et al. [2022]. This enables us to implement a testable variant of the algorithm of Diakonikolas et al. [2020a, 2020b] for learning noisy halfspaces using nonconvex SGD.", "keywords": "testable learning;pac learning;agnostic learning;Massart label noise;adversarial label noise;distribution testing", "primary_area": "", "supplementary_material": "/attachment/eda6a1b825c315f744bb1b3a31e73340ed62754f.pdf", "author": "Aravind Gollakota;Adam Klivans;Konstantinos Stavropoulos;Arsen Vasilyan", "authorids": "~Aravind_Gollakota1;~Adam_Klivans1;~Konstantinos_Stavropoulos1;~Arsen_Vasilyan1", "gender": "M;M;;", "homepage": "https://aravind-pg.github.io;http://www.cs.utexas.edu/~klivans;;", "dblp": "264/1576;k/AdamRKlivans;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Aravind_Gollakota1;~Adam_Klivans1;~Konstantinos_Stavropoulos1;~Arsen_Vasilyan1", "aff": "University of Texas, Austin;University of Texas, Austin;;", "aff_domain": "utexas.edu;cs.utexas.edu;;", "position": "PhD student;Professor;;", "bibtex": "@misc{\ngollakota2023an,\ntitle={An Efficient Tester-Learner for Halfspaces},\nauthor={Aravind Gollakota and Adam Klivans and Konstantinos Stavropoulos and Arsen Vasilyan},\nyear={2023},\nurl={https://openreview.net/forum?id=3Da0eESvN1}\n}", "github": "", "project": "", "reviewers": "oxSi;Vb7c;pNna;GfRi", "site": "https://openreview.net/forum?id=3Da0eESvN1", "pdf_size": 415864, "rating": "3;7;7;7", "confidence": "4;4;3;4", "soundness": "3;3;4;4", "novelty": "2;4;3;3", "presentation": "3;3;4;4", "wc_summary": "426;107;231;289", "wc_strengths": "67;91;82;156", "wc_weaknesses": "16;193;31;12", "wc_questions": "14;71;26;57", "wc_limitations": "24;9;57;3", "wc_review": "547;471;427;517", "wc_reply_reviewers": "11;14;17;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.7320508075688772 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 263.25, 114.67862704096173 ], "wc_strengths_avg": [ 99.0, 34.007352146263905 ], "wc_weaknesses_avg": [ 63.0, 75.38899123877438 ], "wc_questions_avg": [ 42.0, 22.94558781116753 ], "wc_limitations_avg": [ 23.25, 20.932928605429293 ], "wc_review_avg": [ 490.5, 45.57137259288994 ], "wc_reply_reviewers_avg": [ 13.25, 2.48746859276655 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15030715879021468993&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "GLIME: General, Stable and Local LIME Explanation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72911", "id": "3FJaFElIVN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/71ed042903ed67c7f6355e5dd0539eec-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3FJaFElIVN", "openreview": "https://openreview.net/forum?id=3FJaFElIVN", "poster": "/media/PosterPDFs/NeurIPS%202023/72911.png?t=1702178535.7862027", "slides": "https://nips.cc/virtual/2023/poster/72911", "video": "https://nips.cc/virtual/2023/poster/72911", "author_site": "Zeren Tan, Yang Tian, Jian Li", "tldr": "", "abstract": "As black-box machine learning models become more complex and are applied in high-stakes settings, the need for providing explanations for their predictions becomes crucial. Although Local Interpretable Model-agnostic Explanations (LIME) \\cite{ribeiro2016should} is a widely adopted method for understanding model behavior, it suffers from instability with respect to random seeds \\cite{zafar2019dlime, shankaranarayana2019alime, bansal2020sam} and exhibits low local fidelity (i.e., how the explanation explains model's local behaviors) \\cite{rahnama2019study, laugel2018defining}. Our study demonstrates that this instability is caused by small sample weights, resulting in the dominance of regularization and slow convergence. Additionally, LIME's sampling approach is non-local and biased towards the reference, leading to diminished local fidelity and instability to references. To address these challenges, we propose \\textsc{Glime}, an enhanced framework that extends LIME and unifies several previous methods. Within the \\textsc{Glime} framework, we derive an equivalent formulation of LIME that achieves significantly faster convergence and improved stability. By employing a local and unbiased sampling distribution, \\textsc{Glime} generates explanations with higher local fidelity compared to LIME, while being independent of the reference choice. Moreover, \\textsc{Glime} offers users the flexibility to choose sampling distribution based on their specific scenarios.", "keywords": "Explanation;LIME;Stability;Local fidelity;Interpretability", "primary_area": "", "supplementary_material": "/attachment/85b1a8bf14bfe8b4eaff34ee5511db6d62da9814.zip", "author": "Zeren Tan;Yang Tian;Jian Li", "authorids": "~Zeren_Tan1;~Yang_Tian2;~Jian_Li2", "gender": "M;M;M", "homepage": ";;http://iiis.tsinghua.edu.cn/~jianli", "dblp": "220/5551;64/5869;33/5448-15", "google_scholar": "GSgL6zEAAAAJ;mRAghwIAAAAJ;zX7i1EkAAAAJ", "orcid": "0009-0000-2266-8739;0000-0003-1970-0413;", "linkedin": ";;", "or_profile": "~Zeren_Tan1;~Yang_Tian2;~Jian_Li2", "aff": "Tsinghua University;;Tsinghua University", "aff_domain": "tsinghua.edu.cn;;tsinghua.edu.cn", "position": "PhD student;;Associate Professor", "bibtex": "@inproceedings{\ntan2023glime,\ntitle={{GLIME}: General, Stable and Local {LIME} Explanation},\nauthor={Zeren Tan and Yang Tian and Jian Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3FJaFElIVN}\n}", "github": "", "project": "", "reviewers": "jFSL;sX7G;DXPw;NTaD", "pdf_size": 2052383, "rating": "7;7;7;7", "confidence": "4;3;4;4", "soundness": "2;4;4;4", "novelty": "2;4;3;3", "presentation": "3;4;3;3", "wc_summary": "137;81;105;82", "wc_strengths": "99;50;81;68", "wc_weaknesses": "212;84;123;53", "wc_questions": "35;28;91;43", "wc_limitations": "50;2;11;11", "wc_review": "533;245;411;257", "wc_reply_reviewers": "76;44;17;37", "wc_reply_authors": "479;20;18;19", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.8660254037844386 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 101.25, 22.76373211931646 ], "wc_strengths_avg": [ 74.5, 17.92344832893492 ], "wc_weaknesses_avg": [ 118.0, 59.669925423114115 ], "wc_questions_avg": [ 49.25, 24.681724007856502 ], "wc_limitations_avg": [ 18.5, 18.553975315279473 ], "wc_review_avg": [ 361.5, 118.696040372036 ], "wc_reply_reviewers_avg": [ 43.5, 21.219095173922945 ], "wc_reply_authors_avg": [ 134.0, 199.1870979757474 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18020420863163872893&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;;tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Neural Frailty Machine: Beyond proportional hazard assumption in neural survival regressions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72910", "id": "3Fc9gnR0fa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/11a7f429d75f9f8c6e9c630aeb6524b5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3Fc9gnR0fa", "openreview": "https://openreview.net/forum?id=3Fc9gnR0fa", "poster": "/media/PosterPDFs/NeurIPS%202023/72910.png?t=1699165594.8467023", "slides": "https://nips.cc/virtual/2023/poster/72910", "video": "https://nips.cc/virtual/2023/poster/72910", "author_site": "Ruofan Wu, Jiawei Qiao, Mingzhe Wu, Wen Yu, Ming Zheng, Tengfei LIU, Tianyi Zhang, Weiqiang Wang", "tldr": "", "abstract": "We present neural frailty machine (NFM), a powerful and flexible neural modeling framework for survival regressions. The NFM framework utilizes the classical idea of multiplicative frailty in survival analysis as a principled way of extending the proportional hazard assumption, at the same time being able to leverage the strong approximation power of neural architectures for handling nonlinear covariate dependence. Two concrete models are derived under the framework that extends neural proportional hazard models and nonparametric hazard regression models. Both models allow efficient training under the likelihood objective. Theoretically, for both proposed models, we establish statistical guarantees of neural function approximation with respect to nonparametric components via characterizing their rate of convergence. Empirically, we provide synthetic experiments that verify our theoretical statements. We also conduct experimental evaluations over $6$ benchmark datasets of different scales, showing that the proposed NFM models achieve predictive performance comparable to or sometimes surpassing state-of-the-art survival models. Our code is publicly availabel at https://github.com/Rorschach1989/nfm", "keywords": "Survival Analysis;Theory;Semiparametric statistics", "primary_area": "", "supplementary_material": "/attachment/ebc2dc5dd3548fe6e2c61ba7fc63da9530d8005d.zip", "author": "Ruofan Wu;Jiawei Qiao;Mingzhe Wu;Wen Yu;Ming Zheng;Tengfei LIU;Tianyi Zhang;Weiqiang Wang", "authorids": "~Ruofan_Wu1;~Jiawei_Qiao1;~Mingzhe_Wu1;~Wen_Yu1;~Ming_Zheng1;~Tengfei_LIU2;~Tianyi_Zhang5;~Weiqiang_Wang4", "gender": "M;M;M;M;F;;M;M", "homepage": "https://rorschach1989.github.io/;;;https://www.fdsm.fudan.edu.cn/AboutUs/preview.html?uid=012077;https://www.fdsm.fudan.edu.cn/AboutUs/preview.html?uid=011916;;;https://www.linkedin.com/in/weiqiang-wang-489b925/", "dblp": ";342/9195;;;;;;", "google_scholar": ";;;;;;;", "orcid": ";;;;;;;0000-0002-6159-619X", "linkedin": ";https://www.linkedin.cn/incareer/in/ACoAAC5mkMQBMjdfr90k5lIqIbS7BwXWIuR0brw;mingzhe-wu-4904a6148/;;;;tianyi-zhang-178a491a/;weiqiang-wang-489b925/", "or_profile": "~Ruofan_Wu1;~Jiawei_Qiao1;~Mingzhe_Wu1;~Wen_Yu1;~Ming_Zheng1;~Tengfei_LIU2;~Tianyi_Zhang5;~Weiqiang_Wang4", "aff": "Ant Group;Fudan University;;Fudan University;;;Alipay;Ant Group", "aff_domain": "antgroup.com;fudan.edu.cn;;fdu.edu;;;alipay.com;antgroup.com", "position": "Researcher;PhD student;;Full Professor;;;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nwu2023neural,\ntitle={Neural Frailty Machine: Beyond proportional hazard assumption in neural survival regressions},\nauthor={Ruofan Wu and Jiawei Qiao and Mingzhe Wu and Wen Yu and Ming Zheng and Tengfei LIU and Tianyi Zhang and Weiqiang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3Fc9gnR0fa}\n}", "github": "", "project": "", "reviewers": "PAZm;U6h7;nR17;82in", "pdf_size": 704451, "rating": "4;4;5;7", "confidence": "3;3;4;3", "soundness": "3;3;2;3", "novelty": "2;2;2;3", "presentation": "3;3;2;3", "wc_summary": "45;54;27;88", "wc_strengths": "33;39;8;94", "wc_weaknesses": "305;25;49;120", "wc_questions": "4;135;55;144", "wc_limitations": "7;8;1;87", "wc_review": "394;261;140;533", "wc_reply_reviewers": "143;52;7;106", "wc_reply_authors": "158;180;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 53.5, 22.1641602593015 ], "wc_strengths_avg": [ 43.5, 31.388692231439016 ], "wc_weaknesses_avg": [ 124.75, 109.77334603627604 ], "wc_questions_avg": [ 84.5, 57.967663399519566 ], "wc_limitations_avg": [ 25.75, 35.4638900855504 ], "wc_review_avg": [ 332.0, 146.7566012143917 ], "wc_reply_reviewers_avg": [ 77.0, 51.77354536826699 ], "wc_reply_authors_avg": [ 84.5, 84.85723304468512 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=420681692207483073&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "email": "antgroup.com;fudan.edu.cn;;fdu.edu;;;alipay.com;antgroup.com", "author_num": 8, "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "Ant Group;Fudan University;Alipay", "aff_unique_dep": ";;", "aff_unique_url": "https://www.antgroup.com;https://www.fudan.edu.cn;https://www.alipay.com", "aff_unique_abbr": "Ant Group;Fudan;Alipay", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Addressing Negative Transfer in Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72909", "id": "3G2ec833mW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/56a7b9a07ae01ddea762dcc51280298b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3G2ec833mW", "openreview": "https://openreview.net/forum?id=3G2ec833mW", "poster": "/media/PosterPDFs/NeurIPS%202023/72909.png?t=1702384007.843983", "slides": "https://nips.cc/virtual/2023/poster/72909", "video": "https://nips.cc/virtual/2023/poster/72909", "author_site": "Hyojun Go, Kim, Yunsung Lee, Seunghyun Lee, Shinhyeok Oh, Hyeongdon Moon, Seungtaek Choi", "tldr": "", "abstract": "Diffusion-based generative models have achieved remarkable success in various domains. It trains a shared model on denoising tasks that encompass different noise levels simultaneously, representing a form of multi-task learning (MTL). However, analyzing and improving diffusion models from an MTL perspective remains under-explored. In particular, MTL can sometimes lead to the well-known phenomenon of $\\textit{negative transfer}$, which results in the performance degradation of certain tasks due to conflicts between tasks. In this paper, we first aim to analyze diffusion training from an MTL standpoint, presenting two key observations: $\\textbf{(O1)}$ the task affinity between denoising tasks diminishes as the gap between noise levels widens, and $\\textbf{(O2)}$ negative transfer can arise even in diffusion training. Building upon these observations, we aim to enhance diffusion training by mitigating negative transfer. To achieve this, we propose leveraging existing MTL methods, but the presence of a huge number of denoising tasks makes this computationally expensive to calculate the necessary per-task loss or gradient. To address this challenge, we propose clustering the denoising tasks into small task clusters and applying MTL methods to them. Specifically, based on $\\textbf{(O2)}$, we employ interval clustering to enforce temporal proximity among denoising tasks within clusters. We show that interval clustering can be solved using dynamic programming, utilizing signal-to-noise ratio, timestep, and task affinity for clustering objectives. Through this, our approach addresses the issue of negative transfer in diffusion models by allowing for efficient computation of MTL methods. We validate the efficacy of proposed clustering and its integration with MTL methods through various experiments, demonstrating 1) improved generation quality and 2) faster training convergence of diffusion models. Our project page is available at https://gohyojun15.github.io/ANT_diffusion/.", "keywords": "Diffusion Models;Multi-Task Learning", "primary_area": "", "supplementary_material": "/attachment/06cc42896b4fb000d46bd1f5c64929d2981bd17d.pdf", "author": "Hyojun Go;Jinyoung Kim;Yunsung Lee;Seunghyun Lee;Shinhyeok Oh;Hyeongdon Moon;Seungtaek Choi", "authorids": "~Hyojun_Go2;~Jinyoung_Kim1;~Yunsung_Lee1;~Seunghyun_Lee2;~Shinhyeok_Oh1;~Hyeongdon_Moon1;~Seungtaek_Choi1", "gender": "M;M;M;M;M;M;M", "homepage": "https://gohyojun15.github.io/;;;https://sites.google.com/view/seunghyun-lee/home;https://shinhyeokoh.github.io/;https://hist0613.github.io/;https://donimoon.com", "dblp": "283/5331;89/3500;227/9311;23/774;283/2923;218/7548;314/9698", "google_scholar": "xfiZvzsAAAAJ;https://scholar.google.co.kr/citations?user=7VupJX4AAAAJ;https://scholar.google.co.kr/citations?user=7iaKhrEAAAAJ;NOJNXdAAAAAJ;https://scholar.google.co.kr/citations?user=H6BKgo8AAAAJ;QbZ4a5sAAAAJ;xBqZRT0AAAAJ", "orcid": "0000-0002-5470-042X;;;;0000-0002-9360-6700;0000-0003-3570-0907;0000-0001-5759-2017", "linkedin": ";;;;https://linkedin.com/in/shinhyeok-oh-5082b51a8;seungtaek-choi-969425a4/;hyeong-don-mun-8477571b4/", "or_profile": "~Hyojun_Go2;~Jinyoung_Kim1;~Yunsung_Lee1;~Seunghyun_Lee2;~Shinhyeok_Oh1;~Seungtaek_Choi1;~Hyeongdon_Mun1", "aff": "Riiid AI research;Riiid;Riiid;Riiid;Riiid AI Research;Riiid;Korea University", "aff_domain": "riiid.co;riiid.co;riiid.co;riiid.co;riiid.co;riiid.co;korea.ac.kr", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Undergrad student", "bibtex": "@inproceedings{\ngo2023addressing,\ntitle={Addressing Negative Transfer in Diffusion Models},\nauthor={Hyojun Go and Jinyoung Kim and Yunsung Lee and Seunghyun Lee and Shinhyeok Oh and Hyeongdon Moon and Seungtaek Choi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3G2ec833mW}\n}", "github": "", "project": "", "reviewers": "7mPc;sPtA;8Ptq", "pdf_size": 22539040, "rating": "5;6;7", "confidence": "4;4;5", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "4;4;4", "wc_summary": "80;141;33", "wc_strengths": "105;89;38", "wc_weaknesses": "40;109;89", "wc_questions": "59;66;40", "wc_limitations": "1;4;2", "wc_review": "285;409;202", "wc_reply_reviewers": "15;0;9", "wc_reply_authors": "20;0;19", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 4.0, 0.0 ], "wc_summary_avg": [ 84.66666666666667, 44.21412544525662 ], "wc_strengths_avg": [ 77.33333333333333, 28.56960311628816 ], "wc_weaknesses_avg": [ 79.33333333333333, 28.986586936412884 ], "wc_questions_avg": [ 55.0, 10.98483803552272 ], "wc_limitations_avg": [ 2.3333333333333335, 1.247219128924647 ], "wc_review_avg": [ 298.6666666666667, 85.05815004388991 ], "wc_reply_reviewers_avg": [ 8.0, 6.164414002968976 ], "wc_reply_authors_avg": [ 13.0, 9.201449161228174 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1563426263429143261&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "riiid.co;riiid.co;riiid.co;riiid.co;riiid.co;riiid.co;korea.ac.kr", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "Riiid;Korea University", "aff_unique_dep": "AI research;", "aff_unique_url": "https://www.riiid.com;https://www.korea.ac.kr", "aff_unique_abbr": "Riiid;KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "The Pursuit of Human Labeling: A New Perspective on Unsupervised Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72908", "id": "3GpIeVYw8X", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/be38c74290c251820e396680a82ce12d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3GpIeVYw8X", "openreview": "https://openreview.net/forum?id=3GpIeVYw8X", "poster": "/media/PosterPDFs/NeurIPS%202023/72908.png?t=1701850238.496618", "slides": "https://nips.cc/virtual/2023/poster/72908", "video": "https://nips.cc/virtual/2023/poster/72908", "author_site": "Artyom Gadetsky, Maria Brbic", "tldr": "", "abstract": "We present HUME, a simple model-agnostic framework for inferring human labeling of a given dataset without any external supervision. The key insight behind our approach is that classes defined by many human labelings are linearly separable regardless of the representation space used to represent a dataset. HUME utilizes this insight to guide the search over all possible labelings of a dataset to discover an underlying human labeling. We show that the proposed optimization objective is strikingly well-correlated with the ground truth labeling of the dataset. In effect, we only train linear classifiers on top of pretrained representations that remain fixed during training, making our framework compatible with any large pretrained and self-supervised model. Despite its simplicity, HUME outperforms a supervised linear classifier on top of self-supervised representations on the STL-10 dataset by a large margin and achieves comparable performance on the CIFAR-10 dataset. Compared to the existing unsupervised baselines, HUME achieves state-of-the-art performance on four benchmark image classification datasets including the large-scale ImageNet-1000 dataset. Altogether, our work provides a fundamentally new view to tackle unsupervised learning by searching for consistent labelings between different representation spaces.", "keywords": "unsupervised learning;deep learning;generalization;self-supervised learning;clustering", "primary_area": "", "supplementary_material": "/attachment/92ae38550b5a67baf4e429155c54fcad75293dd0.pdf", "author": "Artyom Gadetsky;Maria Brbic", "authorids": "~Artyom_Gadetsky1;~Maria_Brbic1", "gender": "M;F", "homepage": "https://agadetsky.github.io;https://brbiclab.epfl.ch/", "dblp": "222/2900;130/3233", "google_scholar": "J48uBYgAAAAJ;ltxmeroAAAAJ", "orcid": ";0000-0002-1120-1778", "linkedin": ";", "or_profile": "~Artyom_Gadetsky1;~Maria_Brbic1", "aff": "EPFL - EPF Lausanne;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\ngadetsky2023the,\ntitle={The Pursuit of Human Labeling: A New Perspective on Unsupervised Learning},\nauthor={Artyom Gadetsky and Maria Brbic},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3GpIeVYw8X}\n}", "github": "", "project": "", "reviewers": "J4o2;BUAb;VWGE;BH9W;uZTE", "pdf_size": 1851646, "rating": "5;7;7;8;8", "confidence": "2;3;3;3;4", "soundness": "3;3;3;3;3", "novelty": "3;3;3;4;4", "presentation": "3;2;3;3;4", "wc_summary": "93;108;87;58;44", "wc_strengths": "57;72;77;91;31", "wc_weaknesses": "354;122;118;8;71", "wc_questions": "37;218;90;47;95", "wc_limitations": "45;6;7;20;5", "wc_review": "586;526;379;224;246", "wc_reply_reviewers": "153;18;39;0;56", "wc_reply_authors": "721;24;49;0;45", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 7.0, 1.0954451150103321 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 78.0, 23.50319127267614 ], "wc_strengths_avg": [ 65.6, 20.431348462595412 ], "wc_weaknesses_avg": [ 134.6, 117.16927925015158 ], "wc_questions_avg": [ 97.4, 64.48751817212381 ], "wc_limitations_avg": [ 16.6, 15.21315220458929 ], "wc_review_avg": [ 392.2, 145.1211907338139 ], "wc_reply_reviewers_avg": [ 53.2, 53.36440761406426 ], "wc_reply_authors_avg": [ 167.8, 277.1493460212382 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13195287412813478748&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "epfl.ch;epfl.ch", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Post Hoc Explanations of Language Models Can Improve Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72907", "id": "3H37XciUEv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ce65173b994cf7c925c71b482ee14a8d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3H37XciUEv", "openreview": "https://openreview.net/forum?id=3H37XciUEv", "poster": "/media/PosterPDFs/NeurIPS%202023/72907.png?t=1697514532.1567976", "slides": "https://nips.cc/virtual/2023/poster/72907", "video": "https://nips.cc/virtual/2023/poster/72907", "author_site": "Satyapriya Krishna, Jiaqi Ma, Dylan Slack, Asma Ghandeharioun, Sameer Singh, Himabindu Lakkaraju", "tldr": "", "abstract": "Large Language Models (LLMs) have demonstrated remarkable capabilities in performing complex tasks. Moreover, recent research has shown that incorporating human-annotated rationales (e.g., Chain-of-Thought prompting) during in-context learning can significantly enhance the performance of these models, particularly on tasks that require reasoning capabilities. However, incorporating such rationales poses challenges in terms of scalability as this requires a high degree of human involvement. In this work, we present a novel framework, Amplifying Model Performance by Leveraging In-Context Learning with Post Hoc Explanations (AMPLIFY), which addresses the aforementioned challenges by automating the process of rationale generation. To this end, we leverage post hoc explanation methods which output attribution scores (explanations) capturing the influence of each of the input features on model predictions. More specifically, we construct automated natural language rationales that embed insights from post hoc explanations to provide corrective signals to LLMs. Extensive experimentation with real-world datasets demonstrates that our framework, AMPLIFY, leads to prediction accuracy improvements of about 10-25% over a wide range of tasks, including those where prior approaches which rely on human-annotated rationales such as Chain-of-Thought prompting fall short. Our work makes one of the first attempts at highlighting the potential of post hoc explanations as valuable tools for enhancing the effectiveness of LLMs. Furthermore, we conduct additional empirical analyses and ablation studies to demonstrate the impact of each of the components of AMPLIFY, which, in turn, lead to critical insights for refining in context learning.", "keywords": "Machine Learning Explainability;Large Language Models", "primary_area": "", "supplementary_material": "/attachment/6716b64bfb15d0c8eb8300e300ecca953a4b90df.zip", "author": "Satyapriya Krishna;Jiaqi Ma;Dylan Z Slack;Asma Ghandeharioun;Sameer Singh;Himabindu Lakkaraju", "authorids": "~Satyapriya_Krishna2;~Jiaqi_Ma1;~Dylan_Z_Slack1;~Asma_Ghandeharioun1;~Sameer_Singh1;~Himabindu_Lakkaraju1", "gender": "M;;M;;M;F", "homepage": "http://satyapriyakrishna.com/;https://jiaqima.github.io;https://dylanslacks.website;https://alum.mit.edu/www/asma_gh;http://sameersingh.org;http://web.stanford.edu/~himalv", "dblp": "251/9225;155/2199-1;https://dblp.org/pers/s/Slack:Dylan.html;124/3110;13/3568-1;68/9376", "google_scholar": "Q5bfPlkAAAAJ;Z9X2A1MAAAAJ;pyhz-gUAAAAJ;CkfQy2gAAAAJ;-hGZC54AAAAJ;", "orcid": ";0000-0001-8292-5901;;;0000-0003-0621-6323;", "linkedin": "satyapriya-krishna-50553084/;;;;sameersingh/;", "or_profile": "~Satyapriya_Krishna2;~Jiaqi_Ma1;~Dylan_Z_Slack1;~Asma_Ghandeharioun1;~Sameer_Singh1;~Hima_Lakkaraju1", "aff": "Harvard University;Harvard University;University of California, Irvine;Google;Allen Institute for Artificial Intelligence;Harvard University", "aff_domain": "harvard.edu;harvard.edu;uci.edu;google.com;allenai.org;harvard.edu", "position": "PhD student;Postdoc;PhD student;Research Scientist;Allen AI Fellow;Assistant Professor", "bibtex": "@inproceedings{\nkrishna2023post,\ntitle={Post Hoc Explanations of Language Models Can Improve Language Models},\nauthor={Satyapriya Krishna and Jiaqi Ma and Dylan Z Slack and Asma Ghandeharioun and Sameer Singh and Himabindu Lakkaraju},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3H37XciUEv}\n}", "github": "", "project": "", "reviewers": "LsYX;sqcU;4Npw;mmJT", "pdf_size": 901705, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "150;72;49;91", "wc_strengths": "55;36;24;82", "wc_weaknesses": "620;139;121;144", "wc_questions": "2;74;69;119", "wc_limitations": "7;9;14;7", "wc_review": "834;330;277;443", "wc_reply_reviewers": "24;9;31;127", "wc_reply_authors": "23;23;18;18", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 90.5, 37.43327396848959 ], "wc_strengths_avg": [ 49.25, 21.901769334919038 ], "wc_weaknesses_avg": [ 256.0, 210.3295033988337 ], "wc_questions_avg": [ 66.0, 41.7672120209142 ], "wc_limitations_avg": [ 9.25, 2.8613807855648994 ], "wc_review_avg": [ 471.0, 217.98509123332266 ], "wc_reply_reviewers_avg": [ 47.75, 46.44014965522829 ], "wc_reply_authors_avg": [ 20.5, 2.5 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 72, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3237891034015392761&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "harvard.edu;harvard.edu;uci.edu;google.com;allenai.org;harvard.edu", "author_num": 6, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Harvard University;University of California, Irvine;Google;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";;Google;", "aff_unique_url": "https://www.harvard.edu;https://www.uci.edu;https://www.google.com;https://allenai.org", "aff_unique_abbr": "Harvard;UCI;Google;AI2", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Irvine;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DFRD: Data-Free Robustness Distillation for Heterogeneous Federated Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72906", "id": "3H9QH1v6U9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/39ca8893ea38905a9d2ffe786e85af0f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3H9QH1v6U9", "openreview": "https://openreview.net/forum?id=3H9QH1v6U9", "poster": "/media/PosterPDFs/NeurIPS%202023/72906.png?t=1697179760.6874776", "slides": "https://nips.cc/virtual/2023/poster/72906", "video": "https://nips.cc/virtual/2023/poster/72906", "author_site": "kangyang Luo, Shuai Wang, Yexuan Fu, Xiang Li, Yunshi Lan, Ming Gao", "tldr": "", "abstract": "Federated Learning (FL) is a privacy-constrained decentralized machine learning paradigm in which clients enable collaborative training without compromising private data. However, how to learn a robust global model in the data-heterogeneous and model-heterogeneous FL scenarios is challenging. To address it, we resort to data-free knowledge distillation to propose a new FL method (namely DFRD).\nDFRD equips a conditional generator on the server to approximate the training space of the local models uploaded by clients, and systematically investigates its training in terms of fidelity, transferability and diversity. To overcome the catastrophic forgetting of the global model caused by the distribution shifts of the generator across communication rounds, we maintain an exponential moving average copy of the generator on the server. Additionally, we propose dynamic weighting and label sampling to accurately extract knowledge from local models. Finally, our extensive experiments on various image classification tasks illustrate that DFRD achieves significant performance gains compared to SOTA baselines.", "keywords": "Federated Learning;Data Heterogeneity;Model Heterogeneity;Data-Free Distillation", "primary_area": "", "supplementary_material": "/attachment/13834395c825f154ce68c7bf09c3d8760b0d4f2d.pdf", "author": "Kangyang Luo;Shuai Wang;Yexuan Fu;Xiang Li;Yunshi Lan;Ming Gao", "authorids": "~Kangyang_Luo1;~Shuai_Wang20;~Yexuan_Fu1;~Xiang_Li24;~Yunshi_Lan1;~Ming_Gao1", "gender": "M;M;F;M;F;M", "homepage": ";https://github.com/ScottWong98;https://github.com/fuyexuan;https://lixiang3776.github.io;https://lanyunshi.github.io;http://dase.ecnu.edu.cn/mgao/", "dblp": ";42/1503;;40/1491-67.html;185/6830.html;71/4173-1", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;;;JnxxNtsAAAAJ;Q0F92XIAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;0009-0003-0142-2483;0000-0002-0192-8498;0000-0002-5603-2680", "linkedin": ";;;;;", "or_profile": "~Kangyang_Luo1;~Shuai_Wang20;~Yexuan_Fu1;~Xiang_Li24;~Yunshi_Lan1;~Ming_Gao1", "aff": "East China Normal University;East China Normal University;East China Normal University;East China Normal University;East China Normal University;East China Normal University", "aff_domain": "ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn", "position": "PhD student;MS student;MS student;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nluo2023dfrd,\ntitle={{DFRD}: Data-Free Robustness Distillation for Heterogeneous Federated Learning},\nauthor={Kangyang Luo and Shuai Wang and Yexuan Fu and Xiang Li and Yunshi Lan and Ming Gao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3H9QH1v6U9}\n}", "github": "", "project": "", "reviewers": "i49s;XBHz;Yxnp;deo9;1PRp", "pdf_size": 875021, "rating": "5;5;5;6;6", "confidence": "4;4;4;3;4", "soundness": "2;3;3;3;4", "novelty": "2;3;3;3;3", "presentation": "2;2;3;3;4", "wc_summary": "111;37;64;77;186", "wc_strengths": "36;44;29;81;141", "wc_weaknesses": "50;106;258;141;413", "wc_questions": "348;52;111;37;153", "wc_limitations": "69;12;5;1;14", "wc_review": "614;251;467;337;907", "wc_reply_reviewers": "453;45;0;21;52", "wc_reply_authors": "1115;415;25;175;318", "reply_reviewers": "1;2;0;1;1", "reply_authors": "4;4;2;3;3", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 95.0, 51.35367562307493 ], "wc_strengths_avg": [ 66.2, 41.479633556722746 ], "wc_weaknesses_avg": [ 193.6, 129.1086364268479 ], "wc_questions_avg": [ 140.2, 111.91675477782583 ], "wc_limitations_avg": [ 20.2, 24.846730167166864 ], "wc_review_avg": [ 515.2, 231.1487832544225 ], "wc_reply_reviewers_avg": [ 114.2, 170.39413135433978 ], "wc_reply_authors_avg": [ 409.6, 376.55363495789015 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 3.2, 0.7483314773547882 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6123724356957945, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5749018234156091429&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "East China Normal University", "aff_unique_dep": "", "aff_unique_url": "http://www.ecnu.edu.cn", "aff_unique_abbr": "ECNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3HlULdiKFM", "title": "CoVR: Learning Composed Video Retrieval from Web Video Captions", "track": "main", "status": "Reject", "tldr": "", "abstract": "Composed Image Retrieval (CoIR) has recently gained popularity as a task that considers both text and image queries together, to search for relevant images in a database. Most CoIR approaches require manually annotated datasets, containing image-text-image triplets, where the text describes a modification from the query image to the target image. However, manual curation of CoIR triplets is expensive and prevents scalability. In this work, we instead propose a scalable automatic dataset creation methodology that generates triplets given video-caption pairs. To this end, we mine paired videos with a similar caption from a large database, and leverage a large language model to generate the corresponding modification text. We automatically construct our WebVid-CoVR dataset by applying this procedure to the large WebVid2M collection, resulting in 1.6M triplets. Moreover, we introduce a new benchmark for composed video retrieval (CoVR) and contribute a manually annotated evaluation set, along with baseline results. We further show that training a CoVR model on our dataset transfers well to CoIR, improving the state of the art in the zero-shot setup on both the CIRR and FashionIQ benchmarks. Our code, datasets, and models will be made publicly available", "keywords": "composed image retrieval;deep learning;vision and language;computer vision", "primary_area": "", "supplementary_material": "/attachment/ea5dc1d5db87c19c2f461d0c34b811b3d9c57628.zip", "author": "Lucas Ventura;Antoine Yang;Cordelia Schmid;G\u00fcl Varol", "authorids": "~Lucas_Ventura1;~Antoine_Yang1;~Cordelia_Schmid1;~G\u00fcl_Varol1", "gender": "M;M;F;F", "homepage": "http://www.lucasventura.com/;https://antoyang.github.io/;https://cordeliaschmid.github.io/;http://imagine.enpc.fr/~varolg/", "dblp": "82/9935;248/7734;s/CordeliaSchmid;142/3066", "google_scholar": "gA_CYCQAAAAJ;https://scholar.google.fr/citations?hl=fr;IvqCXP4AAAAJ;https://scholar.google.fr/citations?user=ceSzF9YAAAAJ", "orcid": "0000-0001-5795-0064;0000-0002-7258-571X;;0000-0002-8438-6152", "linkedin": "lucasventurar/;antoine-y-49a28814b/;cordelia-schmid-47985a9;gulvarol/", "or_profile": "~Lucas_Ventura1;~Antoine_Yang1;~Cordelia_Schmid1;~Gul_Varol1", "aff": "ENPC, Ecole Nationale des Ponts et Chausees;INRIA;Inria;Max Planck Institute for Intelligent Systems, Max-Planck Institute", "aff_domain": "imagine.enpc.fr;inria.fr;inria.fr;tuebingen.mpg.de", "position": "PhD student;PhD student;Researcher;Guest Scientist", "bibtex": "@misc{\nventura2023covr,\ntitle={Co{VR}: Learning Composed Video Retrieval from Web Video Captions},\nauthor={Lucas Ventura and Antoine Yang and Cordelia Schmid and G{\\\"u}l Varol},\nyear={2023},\nurl={https://openreview.net/forum?id=3HlULdiKFM}\n}", "github": "", "project": "", "reviewers": "xJ3a;4ap2;xqD8;k4Sb", "site": "https://openreview.net/forum?id=3HlULdiKFM", "pdf_size": 34375870, "rating": "5;5;5;5", "confidence": "5;4;3;4", "soundness": "3;3;3;3", "novelty": "4;2;3;3", "presentation": "3;3;3;3", "wc_summary": "104;108;31;71", "wc_strengths": "64;31;6;24", "wc_weaknesses": "216;112;9;84", "wc_questions": "101;10;9;4", "wc_limitations": "30;56;9;4", "wc_review": "515;317;64;187", "wc_reply_reviewers": "31;17;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.5, 30.955613384328213 ], "wc_strengths_avg": [ 31.25, 20.99255820523073 ], "wc_weaknesses_avg": [ 105.25, 74.20705828962633 ], "wc_questions_avg": [ 31.0, 40.47838929601819 ], "wc_limitations_avg": [ 24.75, 20.51066795596867 ], "wc_review_avg": [ 270.75, 167.00056137630196 ], "wc_reply_reviewers_avg": [ 12.0, 12.98075498574717 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3753839155534362809&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 11, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Ecole Nationale des Ponts et Chaussees;INRIA;Max Planck Institute for Intelligent Systems", "aff_unique_dep": ";;Intelligent Systems", "aff_unique_url": "https://www.enpc.fr;https://www.inria.fr;https://www.mpi-is.mpg.de", "aff_unique_abbr": "ENPC;INRIA;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "France;Germany" }, { "title": "CAMEL: Communicative Agents for \"Mind\" Exploration of Large Language Model Society", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72905", "id": "3IyL2XWDkG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a3621ee907def47c1b952ade25c67698-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3IyL2XWDkG", "openreview": "https://openreview.net/forum?id=3IyL2XWDkG", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72905", "video": "https://nips.cc/virtual/2023/poster/72905", "author_site": "Guohao Li, Hasan Hammoud, Hani Itani, Dmitrii Khizbullin, Bernard Ghanem", "tldr": "", "abstract": "The rapid advancement of chat-based language models has led to remarkable progress in complex task-solving. However, their success heavily relies on human input to guide the conversation, which can be challenging and time-consuming. This paper explores the potential of building scalable techniques to facilitate autonomous cooperation among communicative agents, and provides insight into their \u201ccognitive\u201d processes. To address the challenges of achieving autonomous cooperation, we propose a novel communicative agent framework named role-playing . Our approach involves using inception prompting to guide chat agents toward task completion while maintaining consistency with human intentions. We showcase how role-playing can be used to generate conversational data for studying the behaviors and capabilities of a society of agents, providing a valuable resource for investigating conversational language models. In particular, we conduct comprehensive studies on instruction-following cooperation in multi-agent settings. Our contributions include introducing a novel communicative agent framework, offering a scalable approach for studying the cooperative behaviors and capabilities of multi-agent systems, and open-sourcing our library to support research on communicative agents and beyond: https://github.com/camel-ai/camel.", "keywords": "Communicative Agents;Large Language Models;AI Society;Role-Playing;Society of Mind", "primary_area": "", "supplementary_material": "/attachment/d5c91529ae3fea0f988e0c74e844457823143a5b.pdf", "author": "Guohao Li;Hasan Abed Al Kader Hammoud;Hani Itani;Dmitrii Khizbullin;Bernard Ghanem", "authorids": "~Guohao_Li1;~Hasan_Abed_Al_Kader_Hammoud1;~Hani_Itani1;~Dmitrii_Khizbullin2;~Bernard_Ghanem1", "gender": "M;M;M;M;M", "homepage": "https://ghli.org/;https://cemse.kaust.edu.sa/vcc/people/person/hasan-abed-al-kader-hammoud;;https://khizbullin.tech;https://ivul.kaust.edu.sa", "dblp": "211/7175-1;259/0615;208/4215;;37/2516", "google_scholar": "J9K-D0sAAAAJ;Plf1JSIAAAAJ;0GkfZ64AAAAJ;;rVsGTeEAAAAJ", "orcid": "0000-0003-0260-5129;;;;0000-0002-5534-587X", "linkedin": ";hasan-abed-al-kader-hammoud-56392a147/;;dmitrii-khizbullin;bernardghanem/", "or_profile": "~Guohao_Li1;~Hasan_Abed_Al_Kader_Hammoud1;~Hani_Itani1;~Dmitrii_Khizbullin2;~Bernard_Ghanem1", "aff": ";KAUST;King Abdullah University of Science and Technology;King Abdullah University of Science and Technology;King Abdullah University of Science and Technology", "aff_domain": ";kaust.edu.sa;kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "position": ";PhD student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nli2023camel,\ntitle={{CAMEL}: Communicative Agents for ''Mind'' Exploration of Large Language Model Society},\nauthor={Guohao Li and Hasan Abed Al Kader Hammoud and Hani Itani and Dmitrii Khizbullin and Bernard Ghanem},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3IyL2XWDkG}\n}", "github": "", "project": "", "reviewers": "Z1bn;wYFm;6oPa", "pdf_size": 659557, "rating": "4;7;8", "confidence": "4;4;3", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "3;4;3", "wc_summary": "86;215;228", "wc_strengths": "76;95;160", "wc_weaknesses": "227;260;118", "wc_questions": "56;244;46", "wc_limitations": "10;48;1", "wc_review": "455;862;553", "wc_reply_reviewers": "144;69;68", "wc_reply_authors": "255;164;128", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 1.699673171197595 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 176.33333333333334, 64.09541498595841 ], "wc_strengths_avg": [ 110.33333333333333, 35.96603335865043 ], "wc_weaknesses_avg": [ 201.66666666666666, 60.67582348477485 ], "wc_questions_avg": [ 115.33333333333333, 91.07262059599594 ], "wc_limitations_avg": [ 19.666666666666668, 20.368821489936252 ], "wc_review_avg": [ 623.3333333333334, 173.44035157815944 ], "wc_reply_reviewers_avg": [ 93.66666666666667, 35.593382655893905 ], "wc_reply_authors_avg": [ 182.33333333333334, 53.443635937520405 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6933752452815364, "gs_citation": 471, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3976259482297250805&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";kaust.edu.sa;kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaust.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Saudi Arabia" }, { "id": "3Lx8vMNJZ1", "title": "Borda Regret Minimization for Generalized Linear Dueling Bandits", "track": "main", "status": "Reject", "tldr": "", "abstract": "Dueling bandits are widely used to model preferential feedback prevalent in many applications such as recommendation systems and ranking. In this paper, we study the Borda regret minimization problem for dueling bandits, which aims to identify the item with the highest Borda score while minimizing the cumulative regret. We propose a rich class of generalized linear dueling bandit models, which cover many existing models. We first prove a regret lower bound of order $\\Omega(d^{2/3} T^{2/3})$ for the Borda regret minimization problem, where $d$ is the dimension of contextual vectors and $T$ is the time horizon. To attain this lower bound, we propose an explore-then-commit type algorithm for the stochastic setting, which has a nearly matching regret upper bound $\\tilde{O}(d^{2/3} T^{2/3})$. We also propose an EXP3-type algorithm for the adversarial setting, where the underlying model parameter can change at each round. Our algorithm achieves an $\\tilde{O}(d^{2/3} T^{2/3})$ regret, which is also optimal. Empirical evaluations on both synthetic data and a simulated real-world environment are conducted to corroborate our theoretical analysis.", "keywords": "Borda score;dueling bandit;generalized linear model;linear bandit;regret minimization", "primary_area": "", "supplementary_material": "/attachment/7801490f75493b8dd6a9ea463a3863db0dfb2ce7.zip", "author": "Yue Wu;Tao Jin;Qiwei Di;Hao Lou;Farzad Farnoud;Quanquan Gu", "authorids": "~Yue_Wu12;~Tao_Jin3;~Qiwei_Di1;~Hao_Lou1;~Farzad_Farnoud1;~Quanquan_Gu1", "gender": "M;M;M;M;;M", "homepage": "https://yuewu.us/;https://tao-j.me;https://qiwei-di1234.github.io/;http://ips.lab.virginia.edu/;http://www.ece.virginia.edu/~ffh8x;http://web.cs.ucla.edu/~qgu/", "dblp": "41/5979-11;88/4850-2;354/3878;44/6250;88/7890.html;50/4597", "google_scholar": "kSQ1mLYAAAAJ;0kCyQGsAAAAJ;SewL0pkAAAAJ;;https://scholar.google.com/citations?hl=en;GU9HgNAAAAAJ", "orcid": ";;;;0000-0002-8684-4487;", "linkedin": ";;qiwei-di-00776a253/;;farzad-farnoud-b7993315/;", "or_profile": "~Yue_Wu12;~Tao_Jin3;~Qiwei_Di1;~Hao_Lou1;~Farzad_Farnoud1;~Quanquan_Gu1", "aff": "University of California, Los Angeles;;University of California, Los Angeles;University of Virginia;University of Virginia;University of California, Los Angeles", "aff_domain": "ucla.edu;;ucla.edu;virginia.edu;virginia.edu;cs.ucla.edu", "position": "PhD student;;PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nwu2023borda,\ntitle={Borda Regret Minimization for Generalized Linear Dueling Bandits},\nauthor={Yue Wu and Tao Jin and Qiwei Di and Hao Lou and Farzad Farnoud and Quanquan Gu},\nyear={2023},\nurl={https://openreview.net/forum?id=3Lx8vMNJZ1}\n}", "github": "", "project": "", "reviewers": "ECQz;43D4;LMZ8;7FMh", "site": "https://openreview.net/forum?id=3Lx8vMNJZ1", "pdf_size": 801098, "rating": "6;6;7;7", "confidence": "3;4;3;3", "soundness": "3;3;4;4", "novelty": "2;3;3;3", "presentation": "3;4;4;4", "wc_summary": "128;242;68;223", "wc_strengths": "22;120;64;106", "wc_weaknesses": "36;183;171;64", "wc_questions": "42;12;37;50", "wc_limitations": "2;34;9;58", "wc_review": "230;591;349;501", "wc_reply_reviewers": "59;117;8;117", "wc_reply_authors": "0;26;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 165.25, 70.83563721743455 ], "wc_strengths_avg": [ 78.0, 38.34057902536163 ], "wc_weaknesses_avg": [ 113.5, 64.40690956721957 ], "wc_questions_avg": [ 35.25, 14.201672436723781 ], "wc_limitations_avg": [ 25.75, 22.094965489902897 ], "wc_review_avg": [ 417.75, 138.67475437151492 ], "wc_reply_reviewers_avg": [ 75.25, 45.477329517024195 ], "wc_reply_authors_avg": [ 6.5, 11.258330249197702 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14660110423878220029&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "University of California, Los Angeles;University of Virginia", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;https://www.virginia.edu", "aff_unique_abbr": "UCLA;UVA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Undirected Probabilistic Model for Tensor Decomposition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72904", "id": "3NWWgB2SuF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/51f9d542dea8bed1f66c8add6ec23c69-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3NWWgB2SuF", "openreview": "https://openreview.net/forum?id=3NWWgB2SuF", "poster": "/media/PosterPDFs/NeurIPS%202023/72904.png?t=1702301443.4706547", "slides": "https://nips.cc/virtual/2023/poster/72904", "video": "https://nips.cc/virtual/2023/poster/72904", "author_site": "Zerui Tao, Toshihisa Tanaka, Qibin Zhao", "tldr": "", "abstract": "Tensor decompositions (TDs) serve as a powerful tool for analyzing multiway data. Traditional TDs incorporate prior knowledge about the data into the model, such as a directed generative process from latent factors to observations. In practice, selecting proper structural or distributional assumptions beforehand is crucial for obtaining a promising TD representation. However, since such prior knowledge is typically unavailable in real-world applications, choosing an appropriate TD model can be challenging. This paper aims to address this issue by introducing a flexible TD framework that discards the structural and distributional assumptions, in order to learn as much information from the data. Specifically, we construct a TD model that captures the joint probability of the data and latent tensor factors through a deep energy-based model (EBM). Neural networks are then employed to parameterize the joint energy function of tensor factors and tensor entries. The flexibility of EBM and neural networks enables the learning of underlying structures and distributions. In addition, by designing the energy function, our model unifies the learning process of different types of tensors, such as static tensors and dynamic tensors with time stamps. The resulting model presents a doubly intractable nature due to the presence of latent tensor factors and the unnormalized probability function. To efficiently train the model, we derive a variational upper bound of the conditional noise-contrastive estimation objective that learns the unnormalized joint probability by distinguishing data from conditional noises. We show advantages of our model on both synthetic and several real-world datasets.", "keywords": "Tensor decomposition;tensor completion;probabilistic methods", "primary_area": "", "supplementary_material": "/attachment/795016998fbd3a7c7cc6da48ede3c7370f72efa8.pdf", "author": "Zerui Tao;Toshihisa Tanaka;Qibin Zhao", "authorids": "~Zerui_Tao1;tanakat@cc.tuat.ac.jp;~Qibin_Zhao1", "gender": ";;M", "homepage": ";;https://qibinzhao.github.io", "dblp": "296/4527;;13/1193", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.co.jp/citations?hl=en", "orcid": "0009-0003-9230-721X;;0000-0002-4442-3182", "linkedin": "zerui-tao-9a3093117;;", "or_profile": "~Zerui_Tao1;tanakat@cc.tuat.ac.jp;~Qibin_Zhao1", "aff": "Tokyo University of Agriculture and Technology;;RIKEN", "aff_domain": "tuat.ac.jp;;riken.jp", "position": "PhD student;;Team Leader", "bibtex": "@inproceedings{\ntao2023undirected,\ntitle={Undirected Probabilistic Model for Tensor Decomposition},\nauthor={Zerui Tao and Toshihisa Tanaka and Qibin Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3NWWgB2SuF}\n}", "github": "", "project": "", "reviewers": "7dHC;Duah;32MU;buha", "pdf_size": 541073, "rating": "6;6;6;7", "confidence": "2;4;3;3", "soundness": "3;4;3;3", "novelty": "3;4;2;3", "presentation": "2;4;3;3", "wc_summary": "78;98;80;72", "wc_strengths": "60;46;70;64", "wc_weaknesses": "170;44;86;136", "wc_questions": "166;21;19;2", "wc_limitations": "42;28;16;2", "wc_review": "516;237;271;276", "wc_reply_reviewers": "174;31;28;11", "wc_reply_authors": "518;48;47;13", "reply_reviewers": "2;1;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 82.0, 9.695359714832659 ], "wc_strengths_avg": [ 60.0, 8.831760866327848 ], "wc_weaknesses_avg": [ 109.0, 47.968739820845826 ], "wc_questions_avg": [ 52.0, 66.23065755373413 ], "wc_limitations_avg": [ 22.0, 14.7648230602334 ], "wc_review_avg": [ 325.0, 111.2901612902057 ], "wc_reply_reviewers_avg": [ 61.0, 65.68485365744526 ], "wc_reply_authors_avg": [ 156.5, 209.18711719415228 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3158460561832051419&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "tuat.ac.jp;;riken.jp", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Tokyo University of Agriculture and Technology;RIKEN", "aff_unique_dep": ";", "aff_unique_url": "https://www.tuat.ac.jp;https://www.riken.jp", "aff_unique_abbr": "TUAT;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "From Pixels to UI Actions: Learning to Follow Instructions via Graphical User Interfaces", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72903", "id": "3PjCt4kmRx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6c52a8a4fadc9129c6e1d1745f2dfd0f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3PjCt4kmRx", "openreview": "https://openreview.net/forum?id=3PjCt4kmRx", "poster": "/media/PosterPDFs/NeurIPS%202023/72903.png?t=1701378944.6553218", "slides": "https://nips.cc/virtual/2023/poster/72903", "video": "https://nips.cc/virtual/2023/poster/72903", "author_site": "Peter Shaw, Mandar Joshi, James Cohan, Jonathan Berant, Panupong Pasupat, Hexiang Hu, Urvashi Khandelwal, Kenton Lee, Kristina N Toutanova", "tldr": "", "abstract": "Much of the previous work towards digital agents for graphical user interfaces (GUIs) has relied on text-based representations (derived from HTML or other structured data sources), which are not always readily available. These input representations have been often coupled with custom, task-specific action spaces. This paper focuses on creating agents that interact with the digital world using the same conceptual interface that humans commonly use \u2014 via pixel-based screenshots and a generic action space corresponding to keyboard and mouse actions. Building upon recent progress in pixel-based pretraining, we show, for the first time, that it is possible for such agents to outperform human crowdworkers on the MiniWob++ benchmark of GUI-based instruction following tasks.", "keywords": "instruction following;web tasks;user interface tasks;vision and language;representation learning;reinforcement learning;imitation learning;tree search;language grounding;web agents;computer control", "primary_area": "", "supplementary_material": "", "author": "Peter Shaw;Mandar Joshi;James Cohan;Jonathan Berant;Panupong Pasupat;Hexiang Hu;Urvashi Khandelwal;Kenton Lee;Kristina Toutanova", "authorids": "~Peter_Shaw1;~Mandar_Joshi1;jamesfcohan@google.com;~Jonathan_Berant1;~Panupong_Pasupat1;~Hexiang_Hu1;~Urvashi_Khandelwal1;~Kenton_Lee1;~Kristina_Toutanova1", "gender": "M;;;M;M;;F;M;F", "homepage": "http://www.ptshaw.com;https://homes.cs.washington.edu/~mandar90;;http://www.cs.tau.ac.il/~joberant/;https://ppasupat.github.io/;;;https://kentonl.com/;http://kristinatoutanova.com/", "dblp": "217/1471;85/1261;;31/8178;124/9178;;135/6699;121/7560;25/1520", "google_scholar": "SmGaQicAAAAJ;;;https://scholar.google.co.il/citations?user=xCYHonIAAAAJ;BqKXIA8AAAAJ;;2ITGSdgAAAAJ;qXwJkr8AAAAJ;9qY7NPEAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Peter_Shaw1;~Mandar_Joshi1;jamesfcohan@google.com;~Jonathan_Berant1;~Panupong_Pasupat1;~Hexiang_Hu1;~Urvashi_Khandelwal1;~Kenton_Lee1;~Kristina_Toutanova1", "aff": "Google DeepMind;Google DeepMind;;Tel Aviv University;Google;;Google;Google Research;Google", "aff_domain": "google.com;google.com;;tau.ac.il;google.com;;google.com;google.com;google.com", "position": "Research Scientist;Researcher;;Associate Professor;Employee;;Research Scientist;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nshaw2023from,\ntitle={From Pixels to {UI} Actions: Learning to Follow Instructions via Graphical User Interfaces},\nauthor={Peter Shaw and Mandar Joshi and James Cohan and Jonathan Berant and Panupong Pasupat and Hexiang Hu and Urvashi Khandelwal and Kenton Lee and Kristina Toutanova},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3PjCt4kmRx}\n}", "github": "", "project": "", "reviewers": "X3Sy;XzpC;uTRS;sYmC;5HMX;CAUm", "pdf_size": 933611, "rating": "4;5;6;6;6;7", "confidence": "4;2;3;4;4;4", "soundness": "3;3;3;3;3;3", "novelty": "2;2;3;3;3;3", "presentation": "2;3;2;3;3;3", "wc_summary": "53;57;64;170;188;70", "wc_strengths": "54;121;58;110;175;96", "wc_weaknesses": "93;228;166;158;466;56", "wc_questions": "48;54;33;92;428;45", "wc_limitations": "11;8;21;17;67;49", "wc_review": "259;468;342;547;1324;316", "wc_reply_reviewers": "179;14;11;14;36;16", "wc_reply_authors": "1158;0;0;16;0;0", "reply_reviewers": "3;1;1;1;1;1", "reply_authors": "5;1;1;2;1;1", "rating_avg": [ 5.666666666666667, 0.9428090415820632 ], "confidence_avg": [ 3.5, 0.7637626158259734 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 100.33333333333333, 56.12090123613087 ], "wc_strengths_avg": [ 102.33333333333333, 40.884661617231906 ], "wc_weaknesses_avg": [ 194.5, 133.20629364510773 ], "wc_questions_avg": [ 116.66666666666667, 140.42514811180448 ], "wc_limitations_avg": [ 28.833333333333332, 21.667307682825346 ], "wc_review_avg": [ 542.6666666666666, 362.44248585886777 ], "wc_reply_reviewers_avg": [ 45.0, 60.49242376804994 ], "wc_reply_authors_avg": [ 195.66666666666666, 430.40820417624735 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.74535599249993 ], "reply_authors_avg": [ 1.8333333333333333, 1.462494064565354 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.2314550249431379, "gs_citation": 75, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11322158500840800872&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "google.com;google.com;;tau.ac.il;google.com;;google.com;google.com;google.com", "author_num": 9, "aff_unique_index": "0;0;1;0;0;0;0", "aff_unique_norm": "Google;Tel Aviv University", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.tau.ac.il", "aff_unique_abbr": "DeepMind;TAU", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;2;2;2;2", "aff_country_unique": "United Kingdom;Israel;United States" }, { "title": "DP-HyPO: An Adaptive Private Framework for Hyperparameter Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72902", "id": "3Py8A1j5N3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/82d7d58cba24731c0ca952dff1de46ae-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3Py8A1j5N3", "openreview": "https://openreview.net/forum?id=3Py8A1j5N3", "poster": "/media/PosterPDFs/NeurIPS%202023/72902.png?t=1702501164.227099", "slides": "https://nips.cc/virtual/2023/poster/72902", "video": "https://nips.cc/virtual/2023/poster/72902", "author_site": "Hua Wang, Sheng Gao, Huanyu Zhang, Weijie Su, Milan Shen", "tldr": "", "abstract": "Hyperparameter optimization, also known as hyperparameter tuning, is a widely recognized technique for improving model performance. Regrettably, when training private ML models, many practitioners often overlook the privacy risks associated with hyperparameter optimization, which could potentially expose sensitive information about the underlying dataset.\nCurrently, the sole existing approach to allow privacy-preserving hyperparameter optimization is to uniformly and randomly select hyperparameters for a number of runs, subsequently reporting the best-performing hyperparameter.\nIn contrast, in non-private settings, practitioners commonly utilize \"adaptive\" hyperparameter optimization methods such as Gaussian Process-based optimization, which select the next candidate based on information gathered from previous outputs.\nThis substantial contrast between private and non-private hyperparameter optimization underscores a critical concern. In our paper, we introduce DP-HyPO, a pioneering framework for \"adaptive\" private hyperparameter optimization, aiming to bridge the gap between private and non-private hyperparameter optimization. To accomplish this, we provide a comprehensive differential privacy analysis of our framework. Furthermore, we empirically demonstrate the effectiveness of DP-HyPO on a diverse set of real-world datasets.", "keywords": "Differential Privacy;Hyperparameter Tuning;Deep Learning", "primary_area": "", "supplementary_material": "", "author": "Hua Wang;Sheng Gao;Huanyu Zhang;Weijie J Su;Milan Shen", "authorids": "~Hua_Wang7;~Sheng_Gao2;~Huanyu_Zhang2;~Weijie_J_Su1;~Milan_Shen1", "gender": "M;M;M;M;F", "homepage": "https://statistics.wharton.upenn.edu/profile/wanghua/;https://sggao.github.io/;https://huanyuzhang.github.io;http://stat.wharton.upenn.edu/~suw/;", "dblp": ";;163/7342;228/9127;", "google_scholar": ";cZrdt4EAAAAJ;;Uhf4nBkAAAAJ;", "orcid": ";;;;", "linkedin": ";sheng-gao-8001aa146/;;;milan-shen-860b5062/", "or_profile": "~Hua_Wang7;~Sheng_Gao2;~Huanyu_Zhang2;~Weijie_J_Su1;~Milan_Shen1", "aff": "The Wharton School, University of Pennsylvania;The Wharton School, University of Pennsylvania;Meta;University of Pennsylvania;Research, Facebook", "aff_domain": "wharton.upenn.edu;wharton.upenn.edu;fb.com;upenn.edu;research.facebook.com", "position": "PhD student;PhD student;Researcher;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nwang2023dphypo,\ntitle={{DP}-Hy{PO}: An Adaptive Private Framework for Hyperparameter Optimization},\nauthor={Hua Wang and Sheng Gao and Huanyu Zhang and Weijie J Su and Milan Shen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3Py8A1j5N3}\n}", "github": "", "project": "", "reviewers": "pfPa;sjks;6uG2;11th;o9LM", "pdf_size": 747265, "rating": "4;5;6;6;7", "confidence": "3;3;2;4;3", "soundness": "2;2;3;3;4", "novelty": "2;3;3;3;4", "presentation": "2;2;2;4;4", "wc_summary": "64;85;62;78;75", "wc_strengths": "44;42;26;27;55", "wc_weaknesses": "70;249;154;21;75", "wc_questions": "45;82;50;135;33", "wc_limitations": "48;14;9;15;1", "wc_review": "271;472;301;276;239", "wc_reply_reviewers": "0;45;0;8;18", "wc_reply_authors": "0;16;0;0;14", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;2;1;1;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 72.8, 8.657944328765346 ], "wc_strengths_avg": [ 38.8, 10.979981785048643 ], "wc_weaknesses_avg": [ 113.8, 79.91345318530541 ], "wc_questions_avg": [ 69.0, 36.764112936394916 ], "wc_limitations_avg": [ 17.4, 16.0822883943797 ], "wc_review_avg": [ 311.8, 82.49460588426372 ], "wc_reply_reviewers_avg": [ 14.2, 16.76186147180557 ], "wc_reply_authors_avg": [ 6.0, 7.37563556583431 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6305803736240545241&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "wharton.upenn.edu;wharton.upenn.edu;fb.com;upenn.edu;research.facebook.com", "author_num": 5, "aff_unique_index": "0;0;1;0;1", "aff_unique_norm": "University of Pennsylvania;Meta", "aff_unique_dep": "The Wharton School;Meta Platforms, Inc.", "aff_unique_url": "https://www.wharton.upenn.edu;https://meta.com", "aff_unique_abbr": "UPenn Wharton;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3S9Oiu6gMf", "title": "Learning bounded-degree polytrees with samples", "track": "main", "status": "Reject", "tldr": "", "abstract": "We establish finite-sample guarantees for efficient proper learning of bounded-degree polytrees, a rich class of high-dimensional probability distributions and a subclass of Bayesian networks, a widely-studied type of graphical models. Very recently, Bhattacharyya et al. [2021] obtained finite-sample guarantees for recovering tree-structured Bayesian networks, i.e., 1-polytrees. We considerably extend their results by providing an efficient algorithm which learns d-polytrees in polynomial time and sample complexity when the in-degree d is constant, provided that the underlying undirected graph (skeleton) is known. We complement our algorithm with an information-theoretic lower bound, showing that the dependence of our sample complexity is nearly tight in both the dimension and target accuracy parameters.", "keywords": "Bayesian networks;finite samples;polytrees;learning", "primary_area": "", "supplementary_material": "/attachment/3ee686bbfe8051b7b3212afb9b70bc477301c8d4.zip", "author": "Qiping Yang;Davin Choo;Arnab Bhattacharyya;Clement Louis Canonne", "authorids": "~Qiping_Yang1;~Davin_Choo1;~Arnab_Bhattacharyya1;~Clement_Louis_Canonne1", "gender": "M;;M;M", "homepage": "https://twitter.com/nerd_qp;http://davinchoo.com/;https://warwick.ac.uk/fac/sci/dcs/people/arnab_bhattacharyya/;https://ccanonne.github.io/", "dblp": "298/4926;230/4363.html;64/574.html;28/9840L", "google_scholar": ";cPtzhPsAAAAJ;eECXWqUAAAAJ;u_OXsBIAAAAJ", "orcid": "0009-0009-6841-9370;0000-0002-4545-7341;;0000-0001-7153-5211", "linkedin": ";;;", "or_profile": "~Qiping_Yang1;~Davin_Choo1;~Arnab_Bhattacharyya1;~Clement_Louis_Canonne1", "aff": "University of Sydney;National University of Singapore;National University of Singapore;University of Sydney", "aff_domain": "sydney.edu.au;u.nus.edu;nus.edu.sg;sydney.edu.au", "position": "PhD student;PhD student;Assistant Professor;Lecturer", "bibtex": "@misc{\nyang2023learning,\ntitle={Learning bounded-degree polytrees with samples},\nauthor={Qiping Yang and Davin Choo and Arnab Bhattacharyya and Clement Louis Canonne},\nyear={2023},\nurl={https://openreview.net/forum?id=3S9Oiu6gMf}\n}", "github": "", "project": "", "reviewers": "TyxN;tvTb;iYmi;ZTUL", "site": "https://openreview.net/forum?id=3S9Oiu6gMf", "pdf_size": 349827, "rating": "6;6;6;7", "confidence": "2;4;2;3", "soundness": "3;4;3;3", "novelty": "2;4;3;3", "presentation": "2;4;3;3", "wc_summary": "79;78;196;42", "wc_strengths": "26;131;80;54", "wc_weaknesses": "121;129;72;31", "wc_questions": "129;241;565;39", "wc_limitations": "2;8;2;10", "wc_review": "357;587;915;176", "wc_reply_reviewers": "45;50;283;6", "wc_reply_authors": "18;19;439;19", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 98.75, 58.09206055908157 ], "wc_strengths_avg": [ 72.75, 38.674119253061214 ], "wc_weaknesses_avg": [ 88.25, 39.60665979352462 ], "wc_questions_avg": [ 243.5, 198.93403429277757 ], "wc_limitations_avg": [ 5.5, 3.570714214271425 ], "wc_review_avg": [ 508.75, 276.0945263854392 ], "wc_reply_reviewers_avg": [ 96.0, 109.3000457456446 ], "wc_reply_authors_avg": [ 123.75, 182.0101302125791 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qzC1VUbRVakJ:scholar.google.com/&scioq=Learning+bounded-degree+polytrees+with+samples&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Sydney;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.sydney.edu.au;https://www.nus.edu.sg", "aff_unique_abbr": "USYD;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Australia;Singapore" }, { "title": "Why Does Sharpness-Aware Minimization Generalize Better Than SGD?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72901", "id": "3WAnGWLpSQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e4d3fe32495088805bbbb4f1de63e947-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3WAnGWLpSQ", "openreview": "https://openreview.net/forum?id=3WAnGWLpSQ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72901", "video": "https://nips.cc/virtual/2023/poster/72901", "author_site": "Zixiang Chen, Junkai Zhang, Yiwen Kou, Xiangning Chen, Cho-Jui Hsieh, Quanquan Gu", "tldr": "", "abstract": "The challenge of overfitting, in which the model memorizes the training data and fails to generalize to test data, has become increasingly significant in the training of large neural networks. To tackle this challenge, Sharpness-Aware Minimization (SAM) has emerged as a promising training method, which can improve the generalization of neural networks even in the presence of label noise. However, a deep understanding of how SAM works, especially in the setting of nonlinear neural networks and classification tasks, remains largely missing. This paper fills this gap by demonstrating why SAM generalizes better than Stochastic Gradient Descent (SGD) for a certain data model and two-layer convolutional ReLU networks. The loss landscape of our studied problem is nonsmooth, thus current explanations for the success of SAM based on the Hessian information are insufficient. Our result explains the benefits of SAM, particularly its ability to prevent noise learning in the early stages, thereby facilitating more effective learning of features. Experiments on both synthetic and real data corroborate our theory.", "keywords": "Sharpness Aware Algorithm;Deep Learning Theory", "primary_area": "", "supplementary_material": "/attachment/58ac817bff187167670aec5b1209ad6800888d93.zip", "author": "Zixiang Chen;Junkai Zhang;Yiwen Kou;Xiangning Chen;Cho-Jui Hsieh;Quanquan Gu", "authorids": "~Zixiang_Chen1;~Junkai_Zhang2;~Yiwen_Kou1;~Xiangning_Chen1;~Cho-Jui_Hsieh1;~Quanquan_Gu1", "gender": "M;;F;M;M;M", "homepage": "https://sites.google.com/view/zxchen;;https://evankou.github.io/;;http://web.cs.ucla.edu/~chohsieh/index.html;http://web.cs.ucla.edu/~qgu/", "dblp": "137/3624;;323/9058;56/7393;14/2770;50/4597", "google_scholar": "6nrCHr0AAAAJ;;https://scholar.google.com/citations?hl=en;vNcBx1sAAAAJ;Wy89g4IAAAAJ;GU9HgNAAAAAJ", "orcid": ";;;;;", "linkedin": ";;yiwen-kou-5a444916b/;;;", "or_profile": "~Zixiang_Chen1;~Junkai_Zhang2;~Yiwen_Kou1;~Xiangning_Chen1;~Cho-Jui_Hsieh1;~Quanquan_Gu1", "aff": " University of California, Los Angeles;;University of California, Los Angeles;University of California, Los Angeles;Amazon;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;;ucla.edu;cs.ucla.edu;amazon.com;cs.ucla.edu", "position": "PhD student;;PhD student;PhD student;visiting scholar;Associate Professor", "bibtex": "@inproceedings{\nchen2023why,\ntitle={Why Does Sharpness-Aware Minimization Generalize Better Than {SGD}?},\nauthor={Zixiang Chen and Junkai Zhang and Yiwen Kou and Xiangning Chen and Cho-Jui Hsieh and Quanquan Gu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3WAnGWLpSQ}\n}", "github": "", "project": "", "reviewers": "TY4G;jpeN;EhwP;CEVG", "pdf_size": 917339, "rating": "5;6;7;8", "confidence": "3;4;4;4", "soundness": "3;4;3;4", "novelty": "4;3;4;4", "presentation": "1;3;3;4", "wc_summary": "94;92;247;80", "wc_strengths": "84;70;181;41", "wc_weaknesses": "834;174;169;13", "wc_questions": "79;2;20;508", "wc_limitations": "10;1;35;8", "wc_review": "1101;339;652;650", "wc_reply_reviewers": "53;21;96;163", "wc_reply_authors": "29;0;61;75", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 128.25, 68.76908825918808 ], "wc_strengths_avg": [ 94.0, 52.56900227320279 ], "wc_weaknesses_avg": [ 297.5, 316.4399627101482 ], "wc_questions_avg": [ 152.25, 207.3576318826968 ], "wc_limitations_avg": [ 13.5, 12.854960132182441 ], "wc_review_avg": [ 685.5, 271.6086338833874 ], "wc_reply_reviewers_avg": [ 83.25, 53.180706087828504 ], "wc_reply_authors_avg": [ 41.25, 29.07210862665452 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4327729152990714605&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cs.ucla.edu;;ucla.edu;cs.ucla.edu;amazon.com;cs.ucla.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of California, Los Angeles;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://www.amazon.com", "aff_unique_abbr": "UCLA;Amazon", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning via Wasserstein-Based High Probability Generalisation Bounds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72900", "id": "3Wrolscjbx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/af2bb2b2280d36f8842e440b4e275152-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3Wrolscjbx", "openreview": "https://openreview.net/forum?id=3Wrolscjbx", "poster": "/media/PosterPDFs/NeurIPS%202023/72900.png?t=1702073048.0397415", "slides": "https://nips.cc/virtual/2023/poster/72900", "video": "https://nips.cc/virtual/2023/poster/72900", "author_site": "Paul Viallard, Maxime Haddouche, Umut Simsekli, Benjamin Guedj", "tldr": "", "abstract": "Minimising upper bounds on the population risk or the generalisation gap has been widely used in structural risk minimisation (SRM) -- this is in particular at the core of PAC-Bayesian learning. Despite its successes and unfailing surge of interest in recent years, a limitation of the PAC-Bayesian framework is that most bounds involve a Kullback-Leibler (KL) divergence term (or its variations), which might exhibit erratic behavior and fail to capture the underlying geometric structure of the learning problem -- hence restricting its use in practical applications.\nAs a remedy, recent studies have attempted to replace the KL divergence in the PAC-Bayesian bounds with the Wasserstein distance. Even though these bounds alleviated the aforementioned issues to a certain extent, they either hold in expectation, are for bounded losses, or are nontrivial to minimize in an SRM framework. In this work, we contribute to this line of research and prove novel Wasserstein distance-based PAC-Bayesian generalisation bounds for both batch learning with independent and identically distributed (i.i.d.) data, and online learning with potentially non-i.i.d. data. Contrary to previous art, our bounds are stronger in the sense that (i) they hold with high probability, (ii) they apply to unbounded (potentially heavy-tailed) losses, and (iii) they lead to optimizable training objectives that can be used in SRM. As a result we derive novel Wasserstein-based PAC-Bayesian learning algorithms and we illustrate their empirical advantage on a variety of experiments.", "keywords": "Wasserstein;PAC-Bayes;Generalisation Bound;Algorithm", "primary_area": "", "supplementary_material": "/attachment/f61ba4ec41adc737d62a101909215cd39b3c7c0e.pdf", "author": "Paul Viallard;Maxime Haddouche;Umut Simsekli;Benjamin Guedj", "authorids": "~Paul_Viallard1;~Maxime_Haddouche1;~Umut_Simsekli1;~Benjamin_Guedj1", "gender": "M;;M;M", "homepage": "https://paulviallard.github.io;https://maximehaddouche.github.io/;https://www.di.ens.fr/~simsekli/;https://bguedj.github.io", "dblp": "285/5954;267/5693.html;https://dblp.org/pers/s/Simsekli:Umut.html;177/7258", "google_scholar": "k-5mpncAAAAJ;0U7gG1sAAAAJ;https://scholar.google.fr/citations?user=CuArAkgAAAAJ;https://scholar.google.fr/citations?user=q-JTC2sAAAAJ", "orcid": ";0000-0001-9292-5112;;0000-0003-1237-7430", "linkedin": ";;;benjaminguedj/", "or_profile": "~Paul_Viallard1;~Maxime_Haddouche1;~Umut_Simsekli1;~Benjamin_Guedj1", "aff": "Inria;INRIA;INRIA;University College London, University of London", "aff_domain": "inria.fr;inria.fr;inria.fr;ucl.ac.uk", "position": "Postdoc;PhD student;Research Faculty;Principal Researcher", "bibtex": "@inproceedings{\nviallard2023learning,\ntitle={Learning via Wasserstein-Based High Probability Generalisation Bounds},\nauthor={Paul Viallard and Maxime Haddouche and Umut Simsekli and Benjamin Guedj},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3Wrolscjbx}\n}", "github": "", "project": "", "reviewers": "gr5G;jPqk;mMbV;tL5n", "pdf_size": 475705, "rating": "6;7;8;8", "confidence": "3;4;3;3", "soundness": "3;3;3;4", "novelty": "4;4;3;4", "presentation": "3;4;3;4", "wc_summary": "89;65;251;213", "wc_strengths": "89;40;66;76", "wc_weaknesses": "202;64;344;135", "wc_questions": "8;284;85;61", "wc_limitations": "54;7;10;28", "wc_review": "442;460;756;513", "wc_reply_reviewers": "9;10;128;29", "wc_reply_authors": "17;13;30;54", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 154.5, 79.11226200785818 ], "wc_strengths_avg": [ 67.75, 17.977416388346796 ], "wc_weaknesses_avg": [ 186.25, 103.32563815433225 ], "wc_questions_avg": [ 109.5, 104.52870419171951 ], "wc_limitations_avg": [ 24.75, 18.699933154960743 ], "wc_review_avg": [ 542.75, 125.85582028654852 ], "wc_reply_reviewers_avg": [ 44.0, 49.14773646873272 ], "wc_reply_authors_avg": [ 28.5, 16.00781059358212 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14652567430035023920&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 20, "email": "inria.fr;inria.fr;inria.fr;ucl.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "INRIA;University College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.inria.fr;https://www.ucl.ac.uk", "aff_unique_abbr": "Inria;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "France;United Kingdom" }, { "title": "Birth of a Transformer: A Memory Viewpoint", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72899", "id": "3X2EbBLNsk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0561738a239a995c8cd2ef0e50cfa4fd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3X2EbBLNsk", "openreview": "https://openreview.net/forum?id=3X2EbBLNsk", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72899", "video": "https://nips.cc/virtual/2023/poster/72899", "author_site": "Alberto Bietti, Vivien Cabannes, Diane Bouchacourt, Herve Jegou, Leon Bottou", "tldr": "", "abstract": "Large language models based on transformers have achieved great empirical successes. However, as they are deployed more widely, there is a growing need to better understand their internal mechanisms in order to make them more reliable. These models appear to store vast amounts of knowledge from their training data, and to adapt quickly to new information provided in their context or prompt. We study how transformers balance these two types of knowledge by considering a synthetic setup where tokens are generated from either global or context-specific bigram distributions. By a careful empirical analysis of the training process on a simplified two-layer transformer, we illustrate the fast learning of global bigrams and the slower development of an \"induction head\" mechanism for the in-context bigrams. We highlight the role of weight matrices as associative memories, provide theoretical insights on how gradients enable their learning during training, and study the role of data-distributional properties.", "keywords": "transformers;language models;deep learning theory;interpretability", "primary_area": "", "supplementary_material": "", "author": "Alberto Bietti;Vivien Cabannes;Diane Bouchacourt;Herve Jegou;Leon Bottou", "authorids": "~Alberto_Bietti1;~Vivien_Cabannes1;~Diane_Bouchacourt3;~Herve_Jegou1;~Leon_Bottou1", "gender": "M;Not Specified;Unspecified;M;F", "homepage": "http://alberto.bietti.me;https://viviencabannes.github.io/;;http://leon.bottou.org;https://dianebouchacourt.github.io/", "dblp": "166/6461;;19/2115;30/1046;176/1498", "google_scholar": "iT7Tp70AAAAJ;;1lcY2z4AAAAJ;kbN88gsAAAAJ;", "orcid": ";;;0000-0002-9894-8128;", "linkedin": ";;;;", "or_profile": "~Alberto_Bietti1;~Vivien_Cabannes1;~Herve_Jegou1;~Leon_Bottou1;~Diane_Nicole_Bouchacourt1", "aff": "Meta;META;Meta;New York University;Meta AI Research", "aff_domain": "meta.com;meta.com;fb.com;nyu.edu;meta.com", "position": "Researcher;Postdoc;Researcher;Visiting faculty;Researcher", "bibtex": "@inproceedings{\nbietti2023birth,\ntitle={Birth of a Transformer: A Memory Viewpoint},\nauthor={Alberto Bietti and Vivien Cabannes and Diane Bouchacourt and Herve Jegou and Leon Bottou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3X2EbBLNsk}\n}", "github": "", "project": "", "reviewers": "k5yo;hu6p;PfRr;uvp3;hHJQ", "pdf_size": 825544, "rating": "5;6;7;7;8", "confidence": "4;4;3;3;3", "soundness": "2;4;3;4;3", "novelty": "2;3;3;2;3", "presentation": "3;3;2;4;3", "wc_summary": "42;135;214;45;56", "wc_strengths": "104;37;154;82;130", "wc_weaknesses": "268;53;255;190;80", "wc_questions": "511;1;1223;80;55", "wc_limitations": "15;7;20;1;22", "wc_review": "940;233;1866;398;343", "wc_reply_reviewers": "18;26;300;204;16", "wc_reply_authors": "13;10;763;357;9", "reply_reviewers": "1;1;2;1;1", "reply_authors": "2;2;3;3;2", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 98.4, 67.13151271943751 ], "wc_strengths_avg": [ 101.4, 40.28697059844535 ], "wc_weaknesses_avg": [ 169.2, 88.3343647738523 ], "wc_questions_avg": [ 374.0, 461.9298648063362 ], "wc_limitations_avg": [ 13.0, 7.92464510246358 ], "wc_review_avg": [ 756.0, 606.3362103651735 ], "wc_reply_reviewers_avg": [ 112.8, 117.68840214736541 ], "wc_reply_authors_avg": [ 230.4, 298.17686026920336 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8807048459279795, "gs_citation": 86, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13530023411742939535&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "meta.com;meta.com;fb.com;nyu.edu;meta.com", "author_num": 5, "aff_unique_index": "0;0;2;0", "aff_unique_norm": "Meta;;New York University", "aff_unique_dep": "Meta Platforms, Inc.;;", "aff_unique_url": "https://meta.com;;https://www.nyu.edu", "aff_unique_abbr": "Meta;;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "title": "A General Framework for Equivariant Neural Networks on Reductive Lie Groups", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72898", "id": "3XStpETaO8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ad1f2197941348b1c4373fd6c19ee0b4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3XStpETaO8", "openreview": "https://openreview.net/forum?id=3XStpETaO8", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72898", "video": "https://nips.cc/virtual/2023/poster/72898", "author_site": "Ilyes Batatia, Mario Geiger, Jose Munoz, Tess Smidt, Lior Silberman, Christoph Ortner", "tldr": "", "abstract": "Reductive Lie Groups, such as the orthogonal groups, the Lorentz group, or the unitary groups, play essential roles across scientific fields as diverse as high energy physics, quantum mechanics, quantum chromodynamics, molecular dynamics, computer vision, and imaging. In this paper, we present a general Equivariant Neural Network architecture capable of respecting the symmetries of the finite-dimensional representations of any reductive Lie Group. Our approach generalizes the successful ACE and MACE architectures for atomistic point clouds to any data equivariant to a reductive Lie group action. We also introduce the lie-nn software library, which provides all the necessary tools to develop and implement such general G-equivariant neural networks. It implements routines for the reduction of generic tensor products of representations into irreducible representations, making it easy to apply our architecture to a wide range of problems and groups. The generality and performance of our approach are demonstrated by applying it to the tasks of top quark decay tagging (Lorentz group) and shape recognition (orthogonal group).", "keywords": "equivariance;point clouds;machine learning;particle physics", "primary_area": "", "supplementary_material": "/attachment/69de6f815d39da9dc27c3cb076a85d8c6bc21d8e.zip", "author": "Ilyes Batatia;Mario Geiger;Jose M Munoz;Tess Smidt;Lior Silberman;Christoph Ortner", "authorids": "~Ilyes_Batatia1;~Mario_Geiger1;~Jose_M_Munoz1;~Tess_Smidt1;~Lior_Silberman1;~Christoph_Ortner1", "gender": "M;M;M;F;Not Specified;Not Specified", "homepage": ";;https://munozariasjm.github.io/;https://blondegeek.github.io/;https://www.math.ubc.ca/~lior/;https://personal.math.ubc.ca/~ortner/", "dblp": "282/4552;206/7093;;215/4978.html;56/8030;", "google_scholar": "fTVuWFMAAAAJ;;YA-9__4AAAAJ;;3KZth8UAAAAJ;", "orcid": ";0000-0001-5433-0900;;0000-0001-5581-5344;0000-0001-8756-3575;", "linkedin": ";;;;lior-silberman-248569169/;", "or_profile": "~Ilyes_Batatia1;~Mario_Geiger1;~Jose_M_Munoz1;~Tess_Smidt1;~Lior_Silberman1;~Christoph_Ortner1", "aff": "University of Cambridge;Massachusetts Institute of Technology;Universidad EIA;Massachusetts Institute of Technology;University of British Columbia;University of British Columbia", "aff_domain": "cam.ac.uk;mit.edu;eia.edu.co;mit.edu;ubc.ca;ubc.ca", "position": "MS student;Postdoc;Undergrad student;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nbatatia2023a,\ntitle={A General Framework for Equivariant Neural Networks on Reductive Lie Groups},\nauthor={Ilyes Batatia and Mario Geiger and Jose M Munoz and Tess Smidt and Lior Silberman and Christoph Ortner},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3XStpETaO8}\n}", "github": "", "project": "", "reviewers": "KJSX;5uTH;jt5w;Mk5u", "pdf_size": 983523, "rating": "5;6;6;8", "confidence": "5;4;2;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "99;78;51;83", "wc_strengths": "56;90;41;46", "wc_weaknesses": "8;344;56;133", "wc_questions": "79;12;57;15", "wc_limitations": "10;82;7;14", "wc_review": "252;606;212;291", "wc_reply_reviewers": "26;78;13;11", "wc_reply_authors": "0;58;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 77.75, 17.282577932704367 ], "wc_strengths_avg": [ 58.25, 19.109879643786353 ], "wc_weaknesses_avg": [ 135.25, 128.50559326348406 ], "wc_questions_avg": [ 40.75, 28.358199872347328 ], "wc_limitations_avg": [ 28.25, 31.13177637077589 ], "wc_review_avg": [ 340.25, 155.95251681200915 ], "wc_reply_reviewers_avg": [ 32.0, 27.175356483402386 ], "wc_reply_authors_avg": [ 14.5, 25.11473670974872 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.15789473684210528, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5854966146763425530&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "email": "cam.ac.uk;mit.edu;eia.edu.co;mit.edu;ubc.ca;ubc.ca", "author_num": 6, "aff_unique_index": "0;1;2;1;3;3", "aff_unique_norm": "University of Cambridge;Massachusetts Institute of Technology;Escuela Internacional de Astronomia;University of British Columbia", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cam.ac.uk;https://web.mit.edu;https://www.universidadeia.edu.co;https://www.ubc.ca", "aff_unique_abbr": "Cambridge;MIT;EIA;UBC", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;2;1;3;3", "aff_country_unique": "United Kingdom;United States;Colombia;Canada" }, { "title": "Learning Large Graph Property Prediction via Graph Segment Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72897", "id": "3YDukx2cpr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/48f8143cebe113f4596e1781771578cd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3YDukx2cpr", "openreview": "https://openreview.net/forum?id=3YDukx2cpr", "poster": "/media/PosterPDFs/NeurIPS%202023/72897.png?t=1702015788.0771518", "slides": "https://nips.cc/virtual/2023/poster/72897", "video": "https://nips.cc/virtual/2023/poster/72897", "author_site": "Kaidi Cao, Mangpo Phothilimthana, Sami Abu-El-Haija, Dustin Zelle, Yanqi Zhou, Charith Mendis, Jure Leskovec, Bryan Perozzi", "tldr": "", "abstract": "Learning to predict properties of large graphs is challenging because each prediction requires the knowledge of an entire graph, while the amount of memory available during training is bounded. Here we propose Graph Segment Training (GST), a general framework that utilizes a divide-and-conquer approach to allow learning large graph property prediction with a constant memory footprint. GST first divides a large graph into segments and then backpropagates through only a few segments sampled per training iteration. We refine the GST paradigm by introducing a historical embedding table to efficiently obtain embeddings for segments not sampled for backpropagation. To mitigate the staleness of historical embeddings, we design two novel techniques. First, we finetune the prediction head to fix the input distribution shift. Second, we introduce Stale Embedding Dropout to drop some stale embeddings during training to reduce bias. We evaluate our complete method GST-EFD (with all the techniques together) on two large graph property prediction benchmarks: MalNet and TpuGraphs. Our experiments show that GST-EFD is both memory-efficient and fast, while offering a slight boost on test accuracy over a typical full graph training regime.", "keywords": "Graph Neural Networks;Graph Property Prediction", "primary_area": "", "supplementary_material": "", "author": "Kaidi Cao;Phitchaya Mangpo Phothilimthana;Sami Abu-El-Haija;Dustin Zelle;Yanqi Zhou;Charith Mendis;Jure Leskovec;Bryan Perozzi", "authorids": "~Kaidi_Cao1;~Phitchaya_Mangpo_Phothilimthana1;~Sami_Abu-El-Haija1;~Dustin_Zelle1;~Yanqi_Zhou1;~Charith_Mendis1;~Jure_Leskovec1;~Bryan_Perozzi1", "gender": "M;F;M;M;F;M;;", "homepage": "https://ai.stanford.edu/~kaidicao/;https://mangpo.net/;http://www.haija.org;;https://zhouyanqi.github.io/;https://charithmendis.com;http://cs.stanford.edu/~jure/;http://www.perozzi.net/", "dblp": "203/8207;127/3128;127/6620;239/6070;;163/3175;l/JureLeskovec;91/10813", "google_scholar": "https://scholar.google.com.hk/citations?user=4Zw1PJ8AAAAJ;7Fxbm0AAAAAJ;t80qlTcAAAAJ;cv6419kAAAAJ;ZKEDQXYAAAAJ;utZ3JYUAAAAJ;Q_kKkIUAAAAJ;rZgbMs4AAAAJ", "orcid": ";;;;;0000-0002-8140-2321;0000-0002-5411-923X;", "linkedin": ";;samihaija/;dustin-zelle-58b36a83/;;charith-mendis-36650728/;leskovec/;", "or_profile": "~Kaidi_Cao1;~Phitchaya_Mangpo_Phothilimthana1;~Sami_Abu-El-Haija1;~Dustin_Zelle1;~Yanqi_Zhou1;~Charith_Mendis1;~Jure_Leskovec1;~Bryan_Perozzi1", "aff": "Stanford University;Google;Research, Google;Research, Google;Google Brain;University of Illinois, Urbana Champaign;Kumo.AI;Google", "aff_domain": "stanford.edu;google.com;research.google.com;research.google.com;google.com;illinois.edu;kumo.ai;google.com", "position": "PhD student;Researcher;Research Scientist;Researcher;Research Scientist;Assistant Professor;Chief Scientist;Researcher", "bibtex": "@inproceedings{\ncao2023learning,\ntitle={Learning Large Graph Property Prediction via Graph Segment Training},\nauthor={Kaidi Cao and Phitchaya Mangpo Phothilimthana and Sami Abu-El-Haija and Dustin Zelle and Yanqi Zhou and Charith Mendis and Jure Leskovec and Bryan Perozzi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3YDukx2cpr}\n}", "github": "", "project": "", "reviewers": "U5zV;vMFq;rf6m;Uc8u", "pdf_size": 530828, "rating": "5;5;6;8", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "73;69;85;146", "wc_strengths": "84;47;77;125", "wc_weaknesses": "154;21;69;5", "wc_questions": "185;45;15;75", "wc_limitations": "1;20;30;15", "wc_review": "497;202;276;366", "wc_reply_reviewers": "130;9;0;21", "wc_reply_authors": "64;0;0;17", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 93.25, 31.01914731258743 ], "wc_strengths_avg": [ 83.25, 27.82422505659412 ], "wc_weaknesses_avg": [ 62.25, 57.97143693233764 ], "wc_questions_avg": [ 80.0, 64.22616289332565 ], "wc_limitations_avg": [ 16.5, 10.452272480183437 ], "wc_review_avg": [ 335.25, 109.9713030749386 ], "wc_reply_reviewers_avg": [ 40.0, 52.492856656882374 ], "wc_reply_authors_avg": [ 20.25, 26.1951808545007 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3006451252983209&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 13, "email": "stanford.edu;google.com;research.google.com;research.google.com;google.com;illinois.edu;kumo.ai;google.com", "author_num": 8, "aff_unique_index": "0;1;1;1;1;2;3;1", "aff_unique_norm": "Stanford University;Google;University of Illinois Urbana-Champaign;Kumo.AI", "aff_unique_dep": ";Google;;", "aff_unique_url": "https://www.stanford.edu;https://www.google.com;https://illinois.edu;https://www.kumo.ai", "aff_unique_abbr": "Stanford;Google;UIUC;Kumo.AI", "aff_campus_unique_index": "0;1;1;1;1;2;1", "aff_campus_unique": "Stanford;Mountain View;Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "ReTR: Modeling Rendering Via Transformer for Generalizable Neural Surface Reconstruction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72896", "id": "3ZICE99e6n", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c47ec10bc135be5c3663ba344d29a6a5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3ZICE99e6n", "openreview": "https://openreview.net/forum?id=3ZICE99e6n", "poster": "/media/PosterPDFs/NeurIPS%202023/72896.png?t=1701395964.9893305", "slides": "https://nips.cc/virtual/2023/poster/72896", "video": "https://nips.cc/virtual/2023/poster/72896", "author_site": "Yixun Liang, Hao He, Yingcong Chen", "tldr": "", "abstract": "Generalizable neural surface reconstruction techniques have attracted great attention in recent years. However, they encounter limitations of low confidence depth distribution and inaccurate surface reasoning due to the oversimplified volume rendering process employed. In this paper, we present Reconstruction TRansformer (ReTR), a novel framework that leverages the transformer architecture to redesign the rendering process, enabling complex render interaction modeling. It introduces a learnable $\\textit{meta-ray token}$ and utilizes the cross-attention mechanism to simulate the interaction of rendering process with sampled points and render the observed color. Meanwhile, by operating within a high-dimensional feature space rather than the color space, ReTR mitigates sensitivity to projected colors in source views. Such improvements result in accurate surface assessment with high confidence. We demonstrate the effectiveness of our approach on various datasets, showcasing how our method outperforms the current state-of-the-art approaches in terms of reconstruction quality and generalization ability. $\\textit{Our code is available at }$ https://github.com/YixunLiang/ReTR.", "keywords": "3D vision;3D reconstruction;Generalizable Neural Surface Reconstruction", "primary_area": "", "supplementary_material": "/attachment/3ff59e8d25014f7a4ab0e1f8f93b47294e956515.zip", "author": "Yixun Liang;Hao He;Ying-Cong Chen", "authorids": "~Yixun_Liang1;~Hao_He6;~Ying-Cong_Chen1", "gender": "M;M;M", "homepage": "https://yixunliang.github.io;;https://www.yingcong.me/", "dblp": "320/7091;;137/6578", "google_scholar": "https://scholar.google.com/citations?hl=en;lOEX3aUAAAAJ;https://scholar.google.com.hk/citations?user=n7j4bJUAAAAJ", "orcid": "0000-0003-4750-8875;0000-0001-8074-746X;", "linkedin": ";;", "or_profile": "~Yixun_Liang1;~Hao_He6;~Ying-Cong_Chen1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "hkust.edu;ust.hk;hkust-gz.edu.cn", "position": "MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nliang2023retr,\ntitle={Re{TR}: Modeling Rendering Via Transformer for Generalizable Neural Surface Reconstruction},\nauthor={Yixun Liang and Hao He and Ying-Cong Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3ZICE99e6n}\n}", "github": "", "project": "", "reviewers": "TPgh;DVpn;1Qeo;RozF;feWL", "pdf_size": 9950271, "rating": "5;5;6;6;7", "confidence": "3;4;5;4;5", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;2;3;4", "wc_summary": "66;34;73;88;165", "wc_strengths": "24;38;46;133;52", "wc_weaknesses": "240;64;238;327;70", "wc_questions": "69;4;58;113;56", "wc_limitations": "24;4;70;55;43", "wc_review": "423;144;485;716;386", "wc_reply_reviewers": "0;16;46;79;22", "wc_reply_authors": "56;24;38;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;2;2;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 85.2, 43.622929750304486 ], "wc_strengths_avg": [ 58.6, 38.364566985696584 ], "wc_weaknesses_avg": [ 187.8, 103.75432521104842 ], "wc_questions_avg": [ 60.0, 34.77355316903925 ], "wc_limitations_avg": [ 39.2, 23.163764806265842 ], "wc_review_avg": [ 430.8, 183.62940940927737 ], "wc_reply_reviewers_avg": [ 32.6, 27.507089995126712 ], "wc_reply_authors_avg": [ 23.6, 21.77705214210592 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7857142857142858, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10172799179139933136&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "hkust.edu;ust.hk;hkust-gz.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Deep learning with kernels through RKHM and the Perron-Frobenius operator", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72895", "id": "3ZrGmenVM2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9ef237e007e26180ce4d16738efdf83f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3ZrGmenVM2", "openreview": "https://openreview.net/forum?id=3ZrGmenVM2", "poster": "/media/PosterPDFs/NeurIPS%202023/72895.png?t=1701522077.3531644", "slides": "https://nips.cc/virtual/2023/poster/72895", "video": "https://nips.cc/virtual/2023/poster/72895", "author_site": "Yuka Hashimoto, Masahiro Ikeda, Hachem Kadri", "tldr": "", "abstract": "Reproducing kernel Hilbert $C^*$-module (RKHM) is a generalization of reproducing kernel Hilbert space (RKHS) by means of $C^*$-algebra, and the Perron-Frobenius operator is a linear operator related to the composition of functions. Combining these two concepts, we present deep RKHM, a deep learning framework for kernel methods. We derive a new Rademacher generalization bound in this setting and provide a theoretical interpretation of benign overfitting by means of Perron-Frobenius operators. By virtue of $C^*$-algebra, the dependency of the bound on output dimension is milder than existing bounds. We show that $C^*$-algebra is a suitable tool for deep learning with kernels, enabling us to take advantage of the product structure of operators and to provide a clear connection with convolutional neural networks. Our theoretical analysis provides a new lens through which one can design and analyze deep kernel methods.", "keywords": "kernel method. generalization bound. C*-algebra. Perron-Frobenius operator and Koopman operator.", "primary_area": "", "supplementary_material": "/attachment/2e69b42ad6892315308a090f1c647620ab181170.zip", "author": "Yuka Hashimoto;Masahiro Ikeda;Hachem Kadri", "authorids": "~Yuka_Hashimoto2;~Masahiro_Ikeda1;~Hachem_Kadri3", "gender": ";M;M", "homepage": "https://www.rd.ntt/e/ns/qos/person/hashimoto/index.html;https://sites.google.com/view/masahiroikedaswebpage/home;https://hachem-kadri.pedaweb.univ-amu.fr", "dblp": "220/5306;43/5572;88/9175", "google_scholar": ";https://scholar.google.com.tr/citations?user=6ozp0qMAAAAJ;E2XUpSkAAAAJ", "orcid": "0000-0002-1424-4298;;", "linkedin": ";;hachem-kadri-8266aa1a9", "or_profile": "~Yuka_Hashimoto2;~Masahiro_Ikeda1;~hachem_kadri2", "aff": "NTT;RIKEN;Aix Marseille University", "aff_domain": "ntt.co.jp;riken.jp;univ-amu.fr", "position": "Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nhashimoto2023deep,\ntitle={Deep learning with kernels through {RKHM} and the Perron-Frobenius operator},\nauthor={Yuka Hashimoto and Masahiro Ikeda and Hachem Kadri},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3ZrGmenVM2}\n}", "github": "", "project": "", "reviewers": "eQG4;HsSA;gE5P;Hurc", "pdf_size": 696254, "rating": "5;6;7;7", "confidence": "3;3;3;3", "soundness": "3;3;4;3", "novelty": "2;2;4;3", "presentation": "3;2;4;3", "wc_summary": "32;45;50;130", "wc_strengths": "27;74;21;83", "wc_weaknesses": "377;114;32;6", "wc_questions": "67;41;13;18", "wc_limitations": "3;60;9;10", "wc_review": "506;334;125;247", "wc_reply_reviewers": "0;0;2;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 64.25, 38.525154120392564 ], "wc_strengths_avg": [ 51.25, 27.517040175135115 ], "wc_weaknesses_avg": [ 132.25, 146.82025575512392 ], "wc_questions_avg": [ 34.75, 21.405314760591587 ], "wc_limitations_avg": [ 20.5, 22.96192500641007 ], "wc_review_avg": [ 303.0, 138.73535958795796 ], "wc_reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7614406837706518068&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "ntt.co.jp;riken.jp;univ-amu.fr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "NTT Corporation;RIKEN;Aix Marseille University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntt.co.jp;https://www.riken.jp;https://www.univ-amu.fr", "aff_unique_abbr": "NTT;RIKEN;AMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Japan;France" }, { "title": "Volume Feature Rendering for Fast Neural Radiance Field Reconstruction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72894", "id": "3aVZhMfsyz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ce182e31662883d4decc84a0255335b6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3aVZhMfsyz", "openreview": "https://openreview.net/forum?id=3aVZhMfsyz", "poster": "/media/PosterPDFs/NeurIPS%202023/72894.png?t=1701683625.9705112", "slides": "https://nips.cc/virtual/2023/poster/72894", "video": "https://nips.cc/virtual/2023/poster/72894", "author_site": "Kang Han, Wei Xiang, Lu Yu", "tldr": "", "abstract": "Neural radiance fields (NeRFs) are able to synthesize realistic novel views from multi-view images captured from distinct positions and perspectives. In NeRF's rendering pipeline, neural networks are used to represent a scene independently or transform queried learnable feature vector of a point to the expected color or density. With the aid of geometry guides either in the form of occupancy grids or proposal networks, the number of color neural network evaluations can be reduced from hundreds to dozens in the standard volume rendering framework. However, many evaluations of the color neural network are still a bottleneck for fast NeRF reconstruction. This paper revisits volume feature rendering (VFR) for the purpose of fast NeRF reconstruction. The VFR integrates the queried feature vectors of a ray into one feature vector, which is then transformed to the final pixel color by a color neural network. This fundamental change to the standard volume rendering framework requires only one single color neural network evaluation to render a pixel, which substantially lowers the high computational complexity of the rendering framework attributed to a large number of color neural network evaluations. Consequently, we can use a comparably larger color neural network to achieve a better rendering quality while maintaining the same training and rendering time costs. This approach achieves the state-of-the-art rendering quality on both synthetic and real-world datasets while requiring less training time compared with existing methods.", "keywords": "neural rendering;volume rendering;view synthesis;3D reconstruction", "primary_area": "", "supplementary_material": "/attachment/0b221ce043baa9bb136be488245a8b6f0ed9e305.zip", "author": "Kang Han;Wei Xiang;Lu Yu", "authorids": "~Kang_Han1;~Wei_Xiang3;~Lu_Yu8", "gender": "M;M;F", "homepage": ";https://scholars.latrobe.edu.au/wxiang;", "dblp": "178/7281;37/1682-1;", "google_scholar": "https://scholar.google.com.au/citations?user=nIZmei8AAAAJ;https://scholar.google.com.au/citations?user=VxQUr90AAAAJ;", "orcid": ";0000-0002-0608-065X;0000-0002-3913-7683", "linkedin": ";;", "or_profile": "~Kang_Han1;~Wei_Xiang3;~Lu_Yu8", "aff": "James Cook University;La Trobe University;James Cook University", "aff_domain": "jcu.edu.au;latrobe.edu.au;jcu.edu.au", "position": "PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nhan2023volume,\ntitle={Volume Feature Rendering for Fast Neural Radiance Field Reconstruction},\nauthor={Kang Han and Wei Xiang and Lu Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3aVZhMfsyz}\n}", "github": "", "project": "", "reviewers": "y3xd;WTsx;4LZz;q1tF", "pdf_size": 9114764, "rating": "4;5;5;9", "confidence": "5;5;4;5", "soundness": "3;2;3;3", "novelty": "1;2;3;4", "presentation": "3;3;3;3", "wc_summary": "127;44;101;101", "wc_strengths": "79;39;131;35", "wc_weaknesses": "2;132;688;15", "wc_questions": "553;10;102;84", "wc_limitations": "6;11;59;7", "wc_review": "767;236;1081;242", "wc_reply_reviewers": "122;9;21;11", "wc_reply_authors": "418;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.75, 1.920286436967152 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.25, 30.35107082130711 ], "wc_strengths_avg": [ 71.0, 38.67815921162743 ], "wc_weaknesses_avg": [ 209.25, 281.0047819877804 ], "wc_questions_avg": [ 187.25, 213.96188328765476 ], "wc_limitations_avg": [ 20.75, 22.16275028059469 ], "wc_review_avg": [ 581.5, 360.04895500473265 ], "wc_reply_reviewers_avg": [ 40.75, 47.129475914760604 ], "wc_reply_authors_avg": [ 104.5, 180.99930939094767 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.22549380840084865, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15119503245352519489&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "jcu.edu.au;latrobe.edu.au;jcu.edu.au", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "James Cook University;La Trobe University", "aff_unique_dep": ";", "aff_unique_url": "https://www.jcu.edu.au;https://www.latrobe.edu.au", "aff_unique_abbr": "JCU;LTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "id": "3b5e2AFs7f", "title": "On Formal Feature Attribution and Its Approximation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent years have witnessed the widespread use of artificial intelligence (AI) algorithms and machine learning (ML) models. Despite their tremendous success, a number of vital problems like ML model brittleness, their fairness, and the lack of interpretability warrant the need for the active developments in explainable artificial intelligence (XAI) and formal ML model verification. The two major lines of work in XAI include feature selection methods, e.g. Anchors, and feature attribution techniques, e.g. LIME and SHAP. Despite their promise, most of the existing feature selection and attribution approaches are susceptible to a range of critical issues, including explanation unsoundness and out-of-distribution sampling. A recent formal approach to XAI (FXAI) although serving as an alternative to the above and free of these issues suffers from a few other limitations. For instance and besides the scalability limitation, the formal approach is unable to tackle the feature attribution problem. Additionally, a formal explanation despite being formally sound is typically quite large, which hampers its applicability in practical settings. Motivated by the above, this paper proposes a way to apply the apparatus of formal XAI to the case of feature attribution based on formal explanation enumeration. Formal feature attribution (FFA) is argued to be advantageous over the existing methods, both formal and non-formal. Given the practical complexity of the problem, the paper then proposes an efficient technique for approximating exact FFA. Finally, it offers experimental evidence of the effectiveness of the proposed approximate FFA in comparison to the existing feature attribution algorithms not only in terms of feature importance and but also in terms of their relative order.", "keywords": "Feature Attribution;Explainable AI;Formal Explanation", "primary_area": "", "supplementary_material": "/attachment/6d5647a452c88b67e59a3392d033b082d4a2da19.zip", "author": "Jinqiang Yu;Alexey Ignatiev;Peter J. Stuckey", "authorids": "~Jinqiang_Yu1;~Alexey_Ignatiev1;~Peter_J._Stuckey1", "gender": "M;M;M", "homepage": ";https://alexeyignatiev.github.io/;https://people.eng.unimelb.edu.au/pstuckey/", "dblp": "137/6194;26/9729;s/PeterJStuckey", "google_scholar": "2_o5eloAAAAJ;https://scholar.google.pt/citations?user=CkHZ6fMAAAAJ;https://scholar.google.com.au/citations?user=tvFekxwAAAAJ", "orcid": ";0000-0002-4535-2902;0000-0003-2186-0459", "linkedin": ";;peter-stuckey-564620109/", "or_profile": "~Jinqiang_Yu1;~Alexey_Ignatiev1;~Peter_J._Stuckey1", "aff": "Monash University;Monash University;Monash University", "aff_domain": "monash.edu;monash.edu;monash.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nyu2023on,\ntitle={On Formal Feature Attribution and Its Approximation},\nauthor={Jinqiang Yu and Alexey Ignatiev and Peter J. Stuckey},\nyear={2023},\nurl={https://openreview.net/forum?id=3b5e2AFs7f}\n}", "github": "", "project": "", "reviewers": "CksZ;aL7B;gWVL;FoWS", "site": "https://openreview.net/forum?id=3b5e2AFs7f", "pdf_size": 568174, "rating": "4;5;6;7", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "4;2;3;4", "wc_summary": "68;50;136;78", "wc_strengths": "80;31;67;128", "wc_weaknesses": "308;85;73;237", "wc_questions": "28;375;459;38", "wc_limitations": "18;108;24;93", "wc_review": "502;649;759;574", "wc_reply_reviewers": "430;474;192;90", "wc_reply_authors": "226;729;430;318", "reply_reviewers": "2;2;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 83.0, 32.202484376209235 ], "wc_strengths_avg": [ 76.5, 34.73110997362451 ], "wc_weaknesses_avg": [ 175.75, 100.04342807001368 ], "wc_questions_avg": [ 225.0, 194.31546515910668 ], "wc_limitations_avg": [ 60.75, 40.158280590682665 ], "wc_review_avg": [ 621.0, 95.12885997424756 ], "wc_reply_reviewers_avg": [ 296.5, 160.38313502360526 ], "wc_reply_authors_avg": [ 425.75, 189.39954461402488 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12994428242656177989&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Monash University", "aff_unique_dep": "", "aff_unique_url": "https://www.monash.edu", "aff_unique_abbr": "Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "A Data-Free Approach to Mitigate Catastrophic Forgetting in Federated Class Incremental Learning for Vision Tasks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72893", "id": "3b9sqxCW1x", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d160ea01902c33e30660851dfbac5980-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3b9sqxCW1x", "openreview": "https://openreview.net/forum?id=3b9sqxCW1x", "poster": "/media/PosterPDFs/NeurIPS%202023/72893.png?t=1702218534.608979", "slides": "https://nips.cc/virtual/2023/poster/72893", "video": "https://nips.cc/virtual/2023/poster/72893", "author_site": "Sara Babakniya, Zalan Fabian, Chaoyang He, Mahdi Soltanolkotabi, Salman Avestimehr", "tldr": "", "abstract": "Deep learning models often suffer from forgetting previously learned information when trained on new data. This problem is exacerbated in federated learning (FL), where the data is distributed and can change independently for each user. Many solutions are proposed to resolve this catastrophic forgetting in a centralized setting. However, they do not apply directly to FL because of its unique complexities, such as privacy concerns and resource limitations. To overcome these challenges, this paper presents a framework for \\textbf{federated class incremental learning} that utilizes a generative model to synthesize samples from past distributions. This data can be later exploited alongside the training data to mitigate catastrophic forgetting. To preserve privacy, the generative model is trained on the server using data-free methods at the end of each task without requesting data from clients. Moreover, our solution does not demand the users to store old data or models, which gives them the freedom to join/leave the training at any time. Additionally, we introduce SuperImageNet, a new regrouping of the ImageNet dataset specifically tailored for federated continual learning. We demonstrate significant improvements compared to existing baselines through extensive experiments on multiple datasets.", "keywords": "federated learning;class incremental learning;generative models;data-free;continual learning", "primary_area": "", "supplementary_material": "", "author": "Sara Babakniya;Zalan Fabian;Chaoyang He;Mahdi Soltanolkotabi;Salman Avestimehr", "authorids": "~Sara_Babakniya1;~Zalan_Fabian1;~Chaoyang_He1;~Mahdi_Soltanolkotabi1;~Salman_Avestimehr1", "gender": "F;M;M;M;", "homepage": "https://sarababakn.github.io/;https://z-fabian.github.io/;http://chaoyanghe.com;http://www-bcf.usc.edu/~soltanol/;", "dblp": "292/9826;192/2874;222/6721-1.html;75/6691;", "google_scholar": "https://scholar.google.com/citations?hl=en;5EKjsXQAAAAJ;2z2camUAAAAJ;narJyMAAAAAJ;", "orcid": "0000-0002-7391-8766;;;;", "linkedin": "sara-babakniya;;;;", "or_profile": "~Sara_Babakniya1;~Zalan_Fabian1;~Chaoyang_He1;~Mahdi_Soltanolkotabi1;~Salman_Avestimehr1", "aff": "University of Southern California;University of Southern California;TensorOpera AI;University of Southern California;", "aff_domain": "usc.edu;usc.edu;tensoropera.ai;usc.edu;", "position": "PhD student;PhD student;Researcher;Associate Professor;", "bibtex": "@inproceedings{\nbabakniya2023a,\ntitle={A Data-Free Approach to Mitigate Catastrophic Forgetting in Federated Class Incremental Learning for Vision Tasks},\nauthor={Sara Babakniya and Zalan Fabian and Chaoyang He and Mahdi Soltanolkotabi and Salman Avestimehr},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3b9sqxCW1x}\n}", "github": "", "project": "", "reviewers": "zVmx;NkkA;p34V;4Enn", "pdf_size": 4625536, "rating": "5;5;6;6", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "4;3;3;2", "wc_summary": "59;37;41;112", "wc_strengths": "57;54;130;116", "wc_weaknesses": "234;30;115;249", "wc_questions": "3;148;159;231", "wc_limitations": "1;32;8;142", "wc_review": "354;301;453;850", "wc_reply_reviewers": "13;0;47;0", "wc_reply_authors": "52;371;16;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 62.25, 29.894606536965828 ], "wc_strengths_avg": [ 89.25, 34.127518222103404 ], "wc_weaknesses_avg": [ 157.0, 89.84152714641488 ], "wc_questions_avg": [ 135.25, 82.74169142578607 ], "wc_limitations_avg": [ 45.75, 56.74669593905887 ], "wc_review_avg": [ 489.5, 215.16563387307 ], "wc_reply_reviewers_avg": [ 15.0, 19.222382786741086 ], "wc_reply_authors_avg": [ 109.75, 152.00390619980791 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11892676141052444122&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "usc.edu;usc.edu;tensoropera.ai;usc.edu;", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Southern California;TensorOpera AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.usc.edu;", "aff_unique_abbr": "USC;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "title": "SoTTA: Robust Test-Time Adaptation on Noisy Data Streams", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72892", "id": "3bdXag2rUd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2da53cd1abdae59150e35f4693834f32-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3bdXag2rUd", "openreview": "https://openreview.net/forum?id=3bdXag2rUd", "poster": "/media/PosterPDFs/NeurIPS%202023/72892.png?t=1697698472.1624212", "slides": "https://nips.cc/virtual/2023/poster/72892", "video": "https://nips.cc/virtual/2023/poster/72892", "author_site": "Taesik Gong, Yewon Kim, Taeckyung Lee, Sorn Chottananurak, Sung-Ju Lee", "tldr": "", "abstract": "Test-time adaptation (TTA) aims to address distributional shifts between training and testing data using only unlabeled test data streams for continual model adaptation. However, most TTA methods assume benign test streams, while test samples could be unexpectedly diverse in the wild. For instance, an unseen object or noise could appear in autonomous driving. This leads to a new threat to existing TTA algorithms; we found that prior TTA algorithms suffer from those noisy test samples as they blindly adapt to incoming samples. To address this problem, we present Screening-out Test-Time Adaptation (SoTTA), a novel TTA algorithm that is robust to noisy samples. The key enabler of SoTTA is two-fold: (i) input-wise robustness via high-confidence uniform-class sampling that effectively filters out the impact of noisy samples and (ii) parameter-wise robustness via entropy-sharpness minimization that improves the robustness of model parameters against large gradients from noisy samples. Our evaluation with standard TTA benchmarks with various noisy scenarios shows that our method outperforms state-of-the-art TTA methods under the presence of noisy samples and achieves comparable accuracy to those methods without noisy samples. The source code is available at https://github.com/taeckyung/SoTTA.", "keywords": "test-time adaptation;domain adaptation;deep learning;machine learning", "primary_area": "", "supplementary_material": "/attachment/247085d6723f7ad433a146c0736efd39282e0c16.zip", "author": "Taesik Gong;Yewon Kim;Taeckyung Lee;Sorn Chottananurak;Sung-Ju Lee", "authorids": "~Taesik_Gong1;~Yewon_Kim1;~Taeckyung_Lee1;~Sorn_Chottananurak1;~Sung-Ju_Lee1", "gender": "M;F;;M;M", "homepage": "https://taesikgong.com/;https://yewon-kim.com;https://taeckyung.github.io/;https://s6007541.github.io/sornshaman.github.io/;https://nmsl.kaist.ac.kr/sjlee", "dblp": "206/1779;;322/6749;;28/1552", "google_scholar": "bcmJw7AAAAAJ;;RKaOmicAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-8967-3652;;;;0000-0002-5518-2126", "linkedin": "taesik-gong-70a507a6;;taeckyung/;;sungjulee/", "or_profile": "~Taesik_Gong1;~Yewon_Kim1;~Taeckyung_Lee1;~Sorn_Chottananurak1;~Sung-Ju_Lee1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;KAIST;KAIST;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;ee.kaist.ac.kr;kaist.ac.kr", "position": "PhD student;MS student;PhD student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\ngong2023sotta,\ntitle={So{TTA}: Robust Test-Time Adaptation on Noisy Data Streams},\nauthor={Taesik Gong and Yewon Kim and Taeckyung Lee and Sorn Chottananurak and Sung-Ju Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3bdXag2rUd}\n}", "github": "", "project": "", "reviewers": "gePD;HUhd;6Z6q;kiEt", "pdf_size": 1383303, "rating": "4;5;5;7", "confidence": "4;5;5;5", "soundness": "2;2;1;3", "novelty": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "55;49;119;42", "wc_strengths": "42;20;71;57", "wc_weaknesses": "119;137;293;59", "wc_questions": "47;3;155;11", "wc_limitations": "29;3;7;4", "wc_review": "292;212;645;173", "wc_reply_reviewers": "48;133;29;0", "wc_reply_authors": "96;506;33;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 66.25, 30.80077109424373 ], "wc_strengths_avg": [ 47.5, 18.9010581714358 ], "wc_weaknesses_avg": [ 152.0, 86.37708029332781 ], "wc_questions_avg": [ 54.0, 60.6217782649107 ], "wc_limitations_avg": [ 10.75, 10.638961415476606 ], "wc_review_avg": [ 330.5, 186.575051922812 ], "wc_reply_reviewers_avg": [ 52.5, 49.52019789944301 ], "wc_reply_authors_avg": [ 158.75, 203.42980976248293 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7024797526382472115&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;ee.kaist.ac.kr;kaist.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Mitigating Over-smoothing in Transformers via Regularized Nonlocal Functionals", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72891", "id": "3fd776zKmo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fde1a69a5b6e554b2f1f727197d2651d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3fd776zKmo", "openreview": "https://openreview.net/forum?id=3fd776zKmo", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72891", "video": "https://nips.cc/virtual/2023/poster/72891", "author_site": "Tam Nguyen, Tan Nguyen, Richard Baraniuk", "tldr": "", "abstract": "Transformers have achieved remarkable success in a wide range of natural language processing and computer vision applications. However, the representation capacity of a deep transformer model is degraded due to the over-smoothing issue in which the token representations become identical when the model's depth grows. In this work, we show that self-attention layers in transformers minimize a functional which promotes smoothness, thereby causing token uniformity. We then propose a novel regularizer that penalizes the norm of the difference between the smooth output tokens from self-attention and the input tokens to preserve the fidelity of the tokens. Minimizing the resulting regularized energy functional, we derive the Neural Transformer with a Regularized Nonlocal Functional (NeuTRENO), a novel class of transformer models that can mitigate the over-smoothing issue. We empirically demonstrate the advantages of NeuTRENO over the baseline transformers and state-of-the-art methods in reducing the over-smoothing of token representations on various practical tasks, including object classification, image segmentation, and language modeling.", "keywords": "transformers;self-attention;total variation;nonlocal functionals;over-smoothing", "primary_area": "", "supplementary_material": "/attachment/fdf4df61bb358fc6dd51b6f54bb8c91d2cd5b296.zip", "author": "Tam Minh Nguyen;Tan Minh Nguyen;Richard Baraniuk", "authorids": "~Tam_Minh_Nguyen1;~Tan_Minh_Nguyen1;~Richard_Baraniuk1", "gender": "F;M;", "homepage": ";https://tanmnguyen89.github.io/;http://richb.rice.edu/", "dblp": "251/1464;255/4725;32/2804", "google_scholar": ";OizOh88AAAAJ;https://scholar.google.com.tw/citations?user=N-BBA20AAAAJ", "orcid": ";;", "linkedin": "tam-nguyen-6a3935132/;;richard-baraniuk", "or_profile": "~Tam_Minh_Nguyen1;~Tan_Minh_Nguyen1;~Richard_Baraniuk1", "aff": "FPT Software;University of California, Los Angeles;William Marsh Rice University", "aff_domain": "fsoft.com.vn;ucla.edu;rice.edu", "position": "FPT AI Residency;Postdoc;C. Sidney Burrus Professor", "bibtex": "@inproceedings{\nnguyen2023mitigating,\ntitle={Mitigating Over-smoothing in Transformers via Regularized Nonlocal Functionals},\nauthor={Tam Minh Nguyen and Tan Minh Nguyen and Richard Baraniuk},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3fd776zKmo}\n}", "github": "", "project": "", "reviewers": "qvvE;YNWk;qcqa;h3ak;k2Mg", "pdf_size": 1504733, "rating": "4;5;6;6;7", "confidence": "5;4;3;4;3", "soundness": "2;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "2;3;2;3;3", "wc_summary": "80;55;47;65;140", "wc_strengths": "40;16;99;88;12", "wc_weaknesses": "312;121;100;54;71", "wc_questions": "107;55;64;4;65", "wc_limitations": "6;1;6;16;1", "wc_review": "545;248;316;227;289", "wc_reply_reviewers": "0;0;187;12;13", "wc_reply_authors": "73;73;770;9;10", "reply_reviewers": "0;0;2;1;1", "reply_authors": "2;2;4;2;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 77.4, 33.18192278937434 ], "wc_strengths_avg": [ 51.0, 36.16628264005025 ], "wc_weaknesses_avg": [ 131.6, 93.1119755992751 ], "wc_questions_avg": [ 59.0, 32.88160580020386 ], "wc_limitations_avg": [ 6.0, 5.477225575051661 ], "wc_review_avg": [ 325.0, 114.28910709249591 ], "wc_reply_reviewers_avg": [ 42.4, 72.5164808853822 ], "wc_reply_authors_avg": [ 187.0, 292.8801802785569 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.4, 0.8 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8910421112136307, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8730476170278175559&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "fsoft.com.vn;ucla.edu;rice.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "FPT Corporation;University of California, Los Angeles;Rice University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.fpt-software.com;https://www.ucla.edu;https://www.rice.edu", "aff_unique_abbr": "FPT;UCLA;Rice", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Vietnam;United States" }, { "title": "QuantSR: Accurate Low-bit Quantization for Efficient Image Super-Resolution", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72890", "id": "3gamyee9Yh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b2169d573d75ff90c7b12dc3a5fc2898-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3gamyee9Yh", "openreview": "https://openreview.net/forum?id=3gamyee9Yh", "poster": "/media/PosterPDFs/NeurIPS%202023/72890.png?t=1701439753.4987035", "slides": "https://nips.cc/virtual/2023/poster/72890", "video": "https://nips.cc/virtual/2023/poster/72890", "author_site": "Haotong Qin, Yulun Zhang, Yifu Ding, Yifan liu, Xianglong Liu, Martin Danelljan, Fisher Yu", "tldr": "", "abstract": "Low-bit quantization in image super-resolution (SR) has attracted copious attention in recent research due to its ability to reduce parameters and operations significantly. However, many quantized SR models suffer from accuracy degradation compared to their full-precision counterparts, especially at ultra-low bit widths (2-4 bits), limiting their practical applications. To address this issue, we propose a novel quantized image SR network, called QuantSR, which achieves accurate and efficient SR processing under low-bit quantization. To overcome the representation homogeneity caused by quantization in the network, we introduce the Redistribution-driven Learnable Quantizer (RLQ). This is accomplished through an inference-agnostic efficient redistribution design, which adds additional information in both forward and backward passes to improve the representation ability of quantized networks. Furthermore, to achieve flexible inference and break the upper limit of accuracy, we propose the Depth-dynamic Quantized Architecture (DQA). Our DQA allows for the trade-off between efficiency and accuracy during inference through weight sharing. Our comprehensive experiments show that QuantSR outperforms existing state-of-the-art quantized SR networks in terms of accuracy while also providing more competitive computational efficiency. In addition, we demonstrate the scheme's satisfactory architecture generality by providing QuantSR-C and QuantSR-T for both convolution and Transformer versions, respectively. Our code and models are released at https://github.com/htqin/QuantSR .", "keywords": "Super Resolution;Model Quantization;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/fb945c444662d777c4f66f5757710d1757fba64f.zip", "author": "Haotong Qin;Yulun Zhang;Yifu Ding;Yifan liu;Xianglong Liu;Martin Danelljan;Fisher Yu", "authorids": "~Haotong_Qin1;~Yulun_Zhang1;~Yifu_Ding2;~Yifan_liu3;~Xianglong_Liu3;~Martin_Danelljan4;~Fisher_Yu2", "gender": "M;M;F;F;;M;M", "homepage": "https://htqin.github.io/;http://yulunzhang.com/;https://yifu-ding.github.io/;https://irfanicmll.github.io/;;https://martin-danelljan.github.io/;https://www.yf.io/", "dblp": "262/3626.html;166/2763-1.html;;23/4955-1;;151/8848;117/6314", "google_scholar": "mK6n-KgAAAAJ;ORmLjWoAAAAJ;RCEI1r0AAAAJ;ksQ4JnQAAAAJ;;NCSSpMkAAAAJ;-XCiamcAAAAJ", "orcid": ";0000-0002-2288-5079;0000-0002-3612-8757;;;;", "linkedin": ";yulun-zhang-1116b5b9/;yifu-ding-253614186/;;;;", "or_profile": "~Haotong_Qin1;~Yulun_Zhang1;~Yifu_Ding2;~Yifan_liu3;~Xianglong_Liu3;~Martin_Danelljan4;~Fisher_Yu2", "aff": "Beihang University;Swiss Federal Institute of Technology;Beihang University;University of Adelaide;;ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "buaa.edu.cn;ethz.ch;buaa.edu.cn;adelaide.edu.au;;vision.ee.ethz.ch;ethz.ch", "position": "PhD student;Postdoc;PhD student;Assistant Professor;;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nqin2023quantsr,\ntitle={Quant{SR}: Accurate Low-bit Quantization for Efficient Image Super-Resolution},\nauthor={Haotong Qin and Yulun Zhang and Yifu Ding and Yifan liu and Xianglong Liu and Martin Danelljan and Fisher Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3gamyee9Yh}\n}", "github": "", "project": "", "reviewers": "E4FQ;j1Cc;G4U2;VLLV;otEX", "pdf_size": 1731463, "rating": "5;7;7;7;8", "confidence": "4;5;5;4;5", "soundness": "3;3;3;3;3", "novelty": "3;3;3;2;4", "presentation": "3;4;3;3;3", "wc_summary": "109;103;60;61;70", "wc_strengths": "40;212;38;50;220", "wc_weaknesses": "244;285;155;209;182", "wc_questions": "4;2;109;8;121", "wc_limitations": "10;44;8;2;5", "wc_review": "407;646;370;330;598", "wc_reply_reviewers": "886;29;27;19;64", "wc_reply_authors": "1199;0;0;0;0", "reply_reviewers": "4;1;1;1;1", "reply_authors": "5;1;1;1;1", "rating_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 80.6, 21.11492363235065 ], "wc_strengths_avg": [ 112.0, 85.05057319030837 ], "wc_weaknesses_avg": [ 215.0, 45.75150270756142 ], "wc_questions_avg": [ 48.8, 54.219553668395314 ], "wc_limitations_avg": [ 13.8, 15.341447128612087 ], "wc_review_avg": [ 470.2, 127.22326831205052 ], "wc_reply_reviewers_avg": [ 205.0, 340.8512872206881 ], "wc_reply_authors_avg": [ 239.8, 479.6000000000001 ], "reply_reviewers_avg": [ 1.6, 1.2000000000000002 ], "reply_authors_avg": [ 1.8, 1.6000000000000003 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6666666666666664, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5735132584359711096&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "buaa.edu.cn;ethz.ch;buaa.edu.cn;adelaide.edu.au;;vision.ee.ethz.ch;ethz.ch", "author_num": 7, "aff_unique_index": "0;1;0;2;3;1", "aff_unique_norm": "Beihang University;Swiss Federal Institute of Technology;University of Adelaide;ETH Zurich", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.ethz.ch;https://www.adelaide.edu.au;https://www.ethz.ch", "aff_unique_abbr": "BUAA;ETH Zurich;Adelaide;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;1;1", "aff_country_unique": "China;Switzerland;Australia" }, { "title": "Differentiable and Stable Long-Range Tracking of Multiple Posterior Modes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72889", "id": "3gxiOEf2D6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/85b2ff7574ef265f3a4800db9112ce14-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3gxiOEf2D6", "openreview": "https://openreview.net/forum?id=3gxiOEf2D6", "poster": "/media/PosterPDFs/NeurIPS%202023/72889.png?t=1702952385.6151671", "slides": "https://nips.cc/virtual/2023/poster/72889", "video": "https://nips.cc/virtual/2023/poster/72889", "author_site": "Ali Younis, Erik Sudderth", "tldr": "", "abstract": "Particle filters flexibly represent multiple posterior modes nonparametrically, via a collection of weighted samples, but have classically been applied to tracking problems with known dynamics and observation likelihoods. Such generative models may be inaccurate or unavailable for high-dimensional observations like images. We instead leverage training data to discriminatively learn particle-based representations of uncertainty in latent object states, conditioned on arbitrary observations via deep neural network encoders. While prior discriminative particle filters have used heuristic relaxations of discrete particle resampling, or biased learning by truncating gradients at resampling steps, we achieve unbiased and low-variance gradient estimates by representing posteriors as continuous mixture densities. Our theory and experiments expose dramatic failures of existing reparameterization-based estimators for mixture gradients, an issue we address via an importance-sampling gradient estimator. Unlike standard recurrent neural networks, our mixture density particle filter represents multimodal uncertainty in continuous latent states, improving accuracy and robustness. On a range of challenging tracking and robot localization problems, our approach achieves dramatic improvements in accuracy, will also showing much greater stability across multiple training runs.", "keywords": "particle;filter;mixture;belief propagation;nonparametric;deep learning;generative;discriminative;graphical model;multiple modes;mutli-modal", "primary_area": "", "supplementary_material": "", "author": "Ali Younis;Erik B. Sudderth", "authorids": "~Ali_Younis1;~Erik_B._Sudderth2", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Ali_Younis1;~Erik_B._Sudderth2", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@inproceedings{\nyounis2023differentiable,\ntitle={Differentiable and Stable Long-Range Tracking of Multiple Posterior Modes},\nauthor={Ali Younis and Erik B. Sudderth},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3gxiOEf2D6}\n}", "github": "", "project": "", "reviewers": "FW5K;FRJy;fwhN;53pr;wjUs", "pdf_size": 18857175, "rating": "5;5;5;6;7", "confidence": "3;5;4;2;4", "soundness": "3;3;3;3;4", "novelty": "2;2;2;3;4", "presentation": "2;3;2;3;3", "wc_summary": "86;68;48;205;128", "wc_strengths": "118;34;31;126;183", "wc_weaknesses": "412;300;28;90;99", "wc_questions": "157;310;84;47;53", "wc_limitations": "42;7;21;24;5", "wc_review": "815;719;212;492;468", "wc_reply_reviewers": "75;51;18;29;12", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 107.0, 55.65608681896347 ], "wc_strengths_avg": [ 98.4, 58.297855878239645 ], "wc_weaknesses_avg": [ 185.8, 145.4790706596657 ], "wc_questions_avg": [ 130.2, 98.04366374223274 ], "wc_limitations_avg": [ 19.8, 13.377593206552516 ], "wc_review_avg": [ 541.2, 211.0596124321278 ], "wc_reply_reviewers_avg": [ 37.0, 23.194827009486403 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.196116135138184, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2196724086136787434&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";", "author_num": 2 }, { "title": "Learning Interpretable Low-dimensional Representation via Physical Symmetry", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72888", "id": "3iSj4l8ZGT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9850e6a5410331290dc1deefb7514448-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3iSj4l8ZGT", "openreview": "https://openreview.net/forum?id=3iSj4l8ZGT", "poster": "/media/PosterPDFs/NeurIPS%202023/72888.png?t=1699253027.195602", "slides": "https://nips.cc/virtual/2023/poster/72888", "video": "https://nips.cc/virtual/2023/poster/72888", "author_site": "Xuanjie Liu, Daniel Chin, Yichen Huang, Gus Xia", "tldr": "", "abstract": "We have recently seen great progress in learning interpretable music representations, ranging from basic factors, such as pitch and timbre, to high-level concepts, such as chord and texture. However, most methods rely heavily on music domain knowledge. It remains an open question what general computational principles *give rise to* interpretable representations, especially low-dim factors that agree with human perception. In this study, we take inspiration from modern physics and use *physical symmetry* as a self-consistency constraint for the latent space. Specifically, it requires the prior model that characterises the dynamics of the latent states to be *equivariant* with respect to certain group transformations. We show that physical symmetry leads the model to learn a *linear* pitch factor from unlabelled monophonic music audio in a self-supervised fashion. In addition, the same methodology can be applied to computer vision, learning a 3D Cartesian space from videos of a simple moving object without labels. Furthermore, physical symmetry naturally leads to *counterfactual representation augmentation*, a new technique which improves sample efficiency.", "keywords": "Physics Symmetry;Time series data;Self-supervised Learning;Representation Augmentation", "primary_area": "", "supplementary_material": "/attachment/cfe158bc86b7d1578a24f344242a751a0b8b72ab.zip", "author": "Xuanjie Liu;Daniel Chin;Yichen Huang;Gus Xia", "authorids": "~Xuanjie_Liu1;~Daniel_Chin2;~Yichen_Huang1;~Gus_Xia1", "gender": ";M;M;M", "homepage": ";https://inspiring-yonath-a67980.netlify.app/;https://www.yichenwilliamhuang.com/;http://www.musicxlab.com", "dblp": "340/9059;03/2238;;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;Ctq4-fEAAAAJ;;", "orcid": ";0000-0002-3406-5318;;", "linkedin": ";;;", "or_profile": "~Xuanjie_Liu1;~Daniel_Chin2;~Yichen_Huang1;~Gus_Xia1", "aff": "New York University Shanghai;New York University;Mohamed bin Zayed University of Artificial Intelligence;New York University", "aff_domain": "cs.nyu.edu;nyu.edu;mbzuai.ac.ae;nyu.edu", "position": "Intern;PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nliu2023learning,\ntitle={Learning Interpretable Low-dimensional Representation via Physical Symmetry},\nauthor={Xuanjie Liu and Daniel Chin and Yichen Huang and Gus Xia},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3iSj4l8ZGT}\n}", "github": "", "project": "", "reviewers": "TYBC;mck6;dLV3;5wLb;wTJe", "pdf_size": 0, "rating": "4;5;7;7;7", "confidence": "3;4;4;4;5", "soundness": "3;2;3;4;4", "novelty": "3;2;3;3;4", "presentation": "2;3;4;4;4", "wc_summary": "127;75;72;89;498", "wc_strengths": "134;52;62;89;270", "wc_weaknesses": "483;156;62;116;172", "wc_questions": "163;53;62;155;82", "wc_limitations": "15;208;24;100;163", "wc_review": "922;544;282;549;1185", "wc_reply_reviewers": "203;211;81;230;0", "wc_reply_authors": "91;430;22;89;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;2;1", "rating_avg": [ 6.0, 1.2649110640673518 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 172.2, 164.0724230332447 ], "wc_strengths_avg": [ 121.4, 79.54269293907518 ], "wc_weaknesses_avg": [ 197.8, 147.57018669094379 ], "wc_questions_avg": [ 103.0, 46.74612283387789 ], "wc_limitations_avg": [ 102.0, 75.64919034596471 ], "wc_review_avg": [ 696.4, 318.2153987474522 ], "wc_reply_reviewers_avg": [ 145.0, 89.51647892985962 ], "wc_reply_authors_avg": [ 126.4, 156.01358915171463 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7499999999999999, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4711188146538435576&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.nyu.edu;nyu.edu;mbzuai.ac.ae;nyu.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "New York University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://mbzuai.ac.ae", "aff_unique_abbr": "NYU;MBZUAI", "aff_campus_unique_index": "0", "aff_campus_unique": "Shanghai;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;United Arab Emirates" }, { "title": "PERFOGRAPH: A Numerical Aware Program Graph Representation for Performance Optimization and Program Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72887", "id": "3jAsfo8x8k", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b41907dd4df5c60f86216b73fe0c7465-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3jAsfo8x8k", "openreview": "https://openreview.net/forum?id=3jAsfo8x8k", "poster": "/media/PosterPDFs/NeurIPS%202023/72887.png?t=1701385721.9432535", "slides": "https://nips.cc/virtual/2023/poster/72887", "video": "https://nips.cc/virtual/2023/poster/72887", "author_site": "Ali TehraniJamsaz, Quazi Ishtiaque Mahmud, Le Chen, Nesreen K. Ahmed, Ali Jannesari", "tldr": "", "abstract": "The remarkable growth and significant success of machine learning have expanded its applications into programming languages and program analysis. However, a key challenge in adopting the latest machine learning methods is the representation of programming languages which has a direct impact on the ability of machine learning methods to reason about programs. The absence of numerical awareness, aggregate data structure information, and improper way of presenting variables in previous representation works have limited their performances. To overcome the limitations and challenges of current program representations, we propose a novel graph-based program representation called PERFOGRAPH. PERFOGRAPH can capture numerical information and the aggregate data structure by introducing new nodes and edges. Furthermore, we propose an adapted embedding method to incorporate numerical awareness.\nThese enhancements make PERFOGRAPH a highly flexible and scalable representation that can effectively capture programs' intricate dependencies and semantics. Consequently, it serves as a powerful tool for various applications such as program analysis, performance optimization, and parallelism discovery. Our experimental results demonstrate that PERFOGRAPH outperforms existing representations and sets new state-of-the-art results by reducing the error rate by 7.4% (AMD dataset) and 10% (NVIDIA dataset) in the well-known Device Mapping challenge. It also sets new state-of-the-art results in various performance optimization tasks like Parallelism Discovery and Numa and Prefetchers Configuration prediction.", "keywords": "program representation;graph representation;program analysis;graph neural networks;performance optimization", "primary_area": "", "supplementary_material": "/attachment/c9400ba7e7e3e11e129b71a0838660ef94faef90.pdf", "author": "Ali TehraniJamsaz;Quazi Ishtiaque Mahmud;Le Chen;Nesreen K. Ahmed;Ali Jannesari", "authorids": "~Ali_TehraniJamsaz1;~Quazi_Ishtiaque_Mahmud1;~Le_Chen2;~Nesreen_K._Ahmed2;~Ali_Jannesari1", "gender": "M;M;M;M;F", "homepage": "http://www.tehrani.xyz;;https://www.cs.iastate.edu/;https://www.cs.iastate.edu/swapp/;http://nesreenahmed.com", "dblp": ";;;74/1277;33/11518", "google_scholar": "XZ1al70AAAAJ;1wYOnLkAAAAJ;D7bxqR4AAAAJ;https://scholar.google.de/citations?user=YhWnhQEAAAAJ;AFV0nLcAAAAJ", "orcid": "0009-0001-3678-5730;;0000-0002-7188-6756;0000-0001-8672-5317;", "linkedin": "tehranixyz/;;;ali-jannesari-6ab8a56b/;nkahmed/", "or_profile": "~Ali_TehraniJamsaz1;~Quazi_Ishtiaque_Mahmud1;~Le_Chen2;~Ali_Jannesari1;~Nesreen_Ahmed1", "aff": "Iowa State University;Iowa State University;Iowa State University;Iowa State University;Intel AI Research", "aff_domain": "iastate.edu;iastate.edu;iastate.edu;isu.edu;intel.com", "position": "PhD student;PhD student;PhD student;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\ntehranijamsaz2023perfograph,\ntitle={{PERFOGRAPH}: A Numerical Aware Program Graph Representation for Performance Optimization and Program Analysis},\nauthor={Ali TehraniJamsaz and Quazi Ishtiaque Mahmud and Le Chen and Nesreen K. Ahmed and Ali Jannesari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3jAsfo8x8k}\n}", "github": "", "project": "", "reviewers": "inFa;f7Gk;ubPp;vHZH", "pdf_size": 411111, "rating": "6;6;7;7", "confidence": "3;4;5;4", "soundness": "3;3;3;2", "novelty": "3;3;3;3", "presentation": "4;4;4;2", "wc_summary": "127;124;65;103", "wc_strengths": "38;157;88;74", "wc_weaknesses": "48;117;68;449", "wc_questions": "159;105;182;132", "wc_limitations": "1;1;12;10", "wc_review": "373;504;415;768", "wc_reply_reviewers": "64;112;39;0", "wc_reply_authors": "0;89;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 104.75, 24.742423082632794 ], "wc_strengths_avg": [ 89.25, 43.15886351608439 ], "wc_weaknesses_avg": [ 170.5, 162.73982303050474 ], "wc_questions_avg": [ 144.5, 28.86607004772212 ], "wc_limitations_avg": [ 6.0, 5.049752469181039 ], "wc_review_avg": [ 515.0, 153.5366405780718 ], "wc_reply_reviewers_avg": [ 53.75, 40.634806508706305 ], "wc_reply_authors_avg": [ 22.25, 38.53813046840752 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=842462119498460380&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "iastate.edu;iastate.edu;iastate.edu;isu.edu;intel.com", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Iowa State University;Intel", "aff_unique_dep": ";Intel AI Research", "aff_unique_url": "https://www.iastate.edu;https://www.intel.com/research", "aff_unique_abbr": "ISU;Intel AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Beyond probability partitions: Calibrating neural networks with semantic aware grouping", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72886", "id": "3kitbpEZZO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b693a240cf1009bff9fa4422141c9392-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3kitbpEZZO", "openreview": "https://openreview.net/forum?id=3kitbpEZZO", "poster": "/media/PosterPDFs/NeurIPS%202023/72886.png?t=1701958422.5569813", "slides": "https://nips.cc/virtual/2023/poster/72886", "video": "https://nips.cc/virtual/2023/poster/72886", "author_site": "Jia-Qi Yang, De-Chuan Zhan, Le Gan", "tldr": "", "abstract": "Research has shown that deep networks tend to be overly optimistic about their predictions, leading to an underestimation of prediction errors. Due to the limited nature of data, existing studies have proposed various methods based on model prediction probabilities to bin the data and evaluate calibration error. We propose a more generalized definition of calibration error called Partitioned Calibration Error (PCE), revealing that the key difference among these calibration error metrics lies in how the data space is partitioned. We put forth an intuitive proposition that an accurate model should be calibrated across any partition, suggesting that the input space partitioning can extend beyond just the partitioning of prediction probabilities, and include partitions directly related to the input. Through semantic-related partitioning functions, we demonstrate that the relationship between model accuracy and calibration lies in the granularity of the partitioning function. This highlights the importance of partitioning criteria for training a calibrated and accurate model. To validate the aforementioned analysis, we propose a method that involves jointly learning a semantic aware grouping function based on deep model features and logits to partition the data space into subsets. Subsequently, a separate calibration function is learned for each subset. Experimental results demonstrate that our approach achieves significant performance improvements across multiple datasets and network architectures, thus highlighting the importance of the partitioning function for calibration.", "keywords": "Uncertainty calibration;Deep neural networks", "primary_area": "", "supplementary_material": "/attachment/1f29bf07e50d28e7384b01c2ddf7e164141696ef.pdf", "author": "Jia-Qi Yang;De-Chuan Zhan;Le Gan", "authorids": "~Jia-Qi_Yang1;~De-Chuan_Zhan1;~Le_Gan1", "gender": "M;M;M", "homepage": "http://www.lamda.nju.edu.cn/zhandc/;;https://lamda.thyrixyang.com/", "dblp": "74/498;199/0588.html;230/9976", "google_scholar": "mYJf4TcAAAAJ;cCD5SDoAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-3533-2078;0000-0002-8260-6932;0000-0002-6331-0829", "linkedin": ";;", "or_profile": "~De-Chuan_Zhan1;~Le_Gan1;~Jiaqi_Yang1", "aff": "Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "Full Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nyang2023beyond,\ntitle={Beyond probability partitions: Calibrating neural networks with semantic aware grouping},\nauthor={Jia-Qi Yang and De-Chuan Zhan and Le Gan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3kitbpEZZO}\n}", "github": "", "project": "", "reviewers": "mMe6;TM5v;yhZK;KvC9;BrmR", "pdf_size": 685209, "rating": "5;5;5;6;7", "confidence": "4;5;5;3;4", "soundness": "1;1;3;2;3", "novelty": "2;2;3;2;3", "presentation": "2;2;2;3;3", "wc_summary": "75;124;67;50;90", "wc_strengths": "33;48;35;18;113", "wc_weaknesses": "251;579;230;46;152", "wc_questions": "9;49;54;45;4", "wc_limitations": "1;51;11;1;17", "wc_review": "369;851;397;160;376", "wc_reply_reviewers": "22;94;0;0;0", "wc_reply_authors": "32;517;0;0;0", "reply_reviewers": "1;1;0;0;0", "reply_authors": "2;2;1;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.0, 0.8944271909999159 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 81.2, 24.991198450654583 ], "wc_strengths_avg": [ 49.4, 33.19397535698308 ], "wc_weaknesses_avg": [ 251.6, 178.8011185647338 ], "wc_questions_avg": [ 32.2, 21.23581879749401 ], "wc_limitations_avg": [ 16.2, 18.443427013437606 ], "wc_review_avg": [ 430.6, 227.09698368758663 ], "wc_reply_reviewers_avg": [ 23.2, 36.41098735272088 ], "wc_reply_authors_avg": [ 109.8, 203.9768614328596 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5345224838248488, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5233204070776456224&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "RECESS Vaccine for Federated Learning: Proactive Defense Against Model Poisoning Attacks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72885", "id": "3n8PNUdvSg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1b80fe066fdbceb3a2960117bac33917-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3n8PNUdvSg", "openreview": "https://openreview.net/forum?id=3n8PNUdvSg", "poster": "/media/PosterPDFs/NeurIPS%202023/72885.png?t=1701609459.975402", "slides": "https://nips.cc/virtual/2023/poster/72885", "video": "https://nips.cc/virtual/2023/poster/72885", "author_site": "Haonan Yan, Wenjing Zhang, Qian Chen, Xiaoguang Li, Wenhai Sun, HUI LI, Xiaodong Lin", "tldr": "", "abstract": "Model poisoning attacks greatly jeopardize the application of federated learning (FL). The effectiveness of existing defenses is susceptible to the latest model poisoning attacks, leading to a decrease in prediction accuracy. Besides, these defenses are intractable to distinguish benign outliers from malicious gradients, which further compromises the model generalization. In this work, we propose a novel defense including detection and aggregation, named RECESS, to serve as a \u201cvaccine\u201d for FL against model poisoning attacks. Different from the passive analysis in previous defenses, RECESS proactively queries each participating client with a delicately constructed aggregation gradient, accompanied by the detection of malicious clients according to their responses with higher accuracy. Further, RECESS adopts a newly proposed trust scoring based mechanism to robustly aggregate gradients. Rather than previous methods of scoring in each iteration, RECESS takes into account the correlation of clients\u2019 performance over multiple iterations to estimate the trust score, bringing in a significant increase in detection fault tolerance. Finally, we extensively evaluate RECESS on typical model architectures and four datasets under various settings including white/black-box, cross-silo/device FL, etc. Experimental results show the superiority of RECESS in terms of reducing accuracy loss caused by the latest model poisoning attacks over five classic and two state-of-the-art defenses.", "keywords": "Federated Learning;Model Poisoning Attacks;Proactive Detection;Robust Aggregation;Benign Outlier Identification", "primary_area": "", "supplementary_material": "/attachment/62d9df27ae73d0bcbef79a5c59c72c571ba0b8dc.pdf", "author": "Haonan Yan;Wenjing Zhang;Qian Chen;Xiaoguang Li;Wenhai Sun;HUI LI;Xiaodong Lin", "authorids": "~Haonan_Yan1;~Wenjing_Zhang1;~Qian_Chen13;~Xiaoguang_Li3;~Wenhai_Sun1;~HUI_LI17;~Xiaodong_Lin3", "gender": ";F;M;M;;M;M", "homepage": ";;;;;https://web.xidian.edu.cn/lihui/en/index.html;https://socs.uoguelph.ca/~xlin08/", "dblp": ";27/3057;;46/1349;;l/HuiLi6;59/554.html", "google_scholar": ";6kulkakAAAAJ;https://scholar.google.com.sg/citations?user=h3twdkgAAA;https://scholar.google.com/citations?hl=zh-CN;;oEcRS84AAAAJ;https://scholar.google.ca/citations?user=om3xUIcAAAAJ", "orcid": ";0000-0002-3066-7186;0000-0002-6956-8185;;;0000-0001-8310-7169;0000-0001-8916-6645", "linkedin": ";;;;;;https://ca.linkedin.com/in/xiaodong-lin-634849", "or_profile": "~Haonan_Yan1;~Wenjing_Zhang1;~Qian_Chen13;~Xiaoguang_Li3;~Wenhai_Sun1;~HUI_LI17;~Xiaodong_Lin3", "aff": ";University of Guelph;Xidian University;Xidian University;;xidian university;University of Guelph", "aff_domain": ";uoguelph.ca;xidian.edu.cn;xidian.edu.cn;;xidian.edu.cn;uoguelph.ca", "position": ";PhD student;PhD student;Lecturer;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyan2023recess,\ntitle={{RECESS} Vaccine for Federated Learning: Proactive Defense Against Model Poisoning Attacks},\nauthor={Haonan Yan and Wenjing Zhang and Qian Chen and Xiaoguang Li and Wenhai Sun and HUI LI and Xiaodong Lin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3n8PNUdvSg}\n}", "github": "", "project": "", "reviewers": "nW5D;MKss;St5y;tdiM", "pdf_size": 1392753, "rating": "5;5;5;7", "confidence": "5;4;4;4", "soundness": "3;2;3;3", "novelty": "3;2;2;3", "presentation": "3;3;2;3", "wc_summary": "146;58;93;62", "wc_strengths": "81;32;40;15", "wc_weaknesses": "179;212;48;151", "wc_questions": "2;2;47;2", "wc_limitations": "2;7;137;2", "wc_review": "410;311;365;232", "wc_reply_reviewers": "0;35;0;0", "wc_reply_authors": "59;771;146;75", "reply_reviewers": "0;1;0;0", "reply_authors": "2;3;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 89.75, 35.187888541371734 ], "wc_strengths_avg": [ 42.0, 24.259018941416407 ], "wc_weaknesses_avg": [ 147.5, 61.36978083715144 ], "wc_questions_avg": [ 13.25, 19.48557158514987 ], "wc_limitations_avg": [ 37.0, 57.77110004145671 ], "wc_review_avg": [ 329.5, 66.31176366226433 ], "wc_reply_reviewers_avg": [ 8.75, 15.155444566227676 ], "wc_reply_authors_avg": [ 262.75, 295.25952567190785 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13980926818494669322&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": ";uoguelph.ca;xidian.edu.cn;xidian.edu.cn;;xidian.edu.cn;uoguelph.ca", "author_num": 7, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "University of Guelph;Xidian University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uoguelph.ca;http://www.xidian.edu.cn/", "aff_unique_abbr": "U of G;Xidian", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "Canada;China" }, { "id": "3o4jU8fWVj", "title": "EquiformerV2: Improved Equivariant Transformer for Scaling to Higher-Degree Representations", "track": "main", "status": "Reject", "tldr": "", "abstract": "Equivariant Transformers such as Equiformer have demonstrated the efficacy of applying Transformers to the domain of 3D atomistic systems. However, they are still limited to small degrees of equivariant representations due to their computational complexity. In this paper, we investigate whether these architectures can scale well to higher degrees. Starting from Equiformer, we first replace $SO(3)$ convolutions with eSCN convolutions to efficiently incorporate higher-degree tensors. Then, to better leverage the power of higher degrees, we propose three architectural improvements \u2013 attention re-normalization, separable $S^2$ activation and separable layer normalization. Putting this all together, we propose EquiformerV2, which outperforms previous state-of-the-art methods on the large-scale OC20 dataset by up to 15% on forces, 5% on energies, offers better speed-accuracy trade-offs, and 2$\\times$ reduction in DFT calculations needed for computing adsorption energies.", "keywords": "equivariant neural networks;graph neural networks;computational physics;transformer networks", "primary_area": "", "supplementary_material": "/attachment/79614de346c21320defe8e32ea02311eddf7f2d6.zip", "author": "Yi-Lun Liao;Brandon M Wood;Abhishek Das;Tess Smidt", "authorids": "~Yi-Lun_Liao1;~Brandon_M_Wood1;~Abhishek_Das1;~Tess_Smidt1", "gender": "M;M;M;F", "homepage": ";https://www.bmwood.org;https://abhishekdas.com/;https://blondegeek.github.io/", "dblp": "225/6644.html;276/7546;40/5262;215/4978.html", "google_scholar": ";KbqboRgAAAAJ;t6exkOAAAAAJ;", "orcid": ";0000-0002-7251-337X;;0000-0001-5581-5344", "linkedin": "yilunliao/;;;", "or_profile": "~Yi-Lun_Liao1;~Brandon_M_Wood1;~Abhishek_Das1;~Tess_Smidt1", "aff": "Meta Facebook;FAIR at Meta;FAIR, Meta AI;Massachusetts Institute of Technology", "aff_domain": "meta.com;meta.com;meta.com;mit.edu", "position": "Intern;Researcher;Research Scientist;Assistant Professor", "bibtex": "@misc{\nliao2023equiformerv,\ntitle={EquiformerV2: Improved Equivariant Transformer for Scaling to Higher-Degree Representations},\nauthor={Yi-Lun Liao and Brandon M Wood and Abhishek Das and Tess Smidt},\nyear={2023},\nurl={https://openreview.net/forum?id=3o4jU8fWVj}\n}", "github": "", "project": "", "reviewers": "GdTP;6dKw;3XDb;9yvD", "site": "https://openreview.net/forum?id=3o4jU8fWVj", "pdf_size": 2582643, "rating": "5;5;5;6", "confidence": "5;4;3;4", "soundness": "4;3;3;2", "novelty": "3;3;3;3", "presentation": "4;2;3;3", "wc_summary": "104;70;27;102", "wc_strengths": "94;97;86;104", "wc_weaknesses": "69;303;155;418", "wc_questions": "111;109;45;309", "wc_limitations": "29;12;2;7", "wc_review": "407;591;315;940", "wc_reply_reviewers": "0;0;17;12", "wc_reply_authors": "0;0;22;20", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.75, 31.21197686786276 ], "wc_strengths_avg": [ 95.25, 6.456585785072479 ], "wc_weaknesses_avg": [ 236.25, 134.2225297779773 ], "wc_questions_avg": [ 143.5, 99.17030805639358 ], "wc_limitations_avg": [ 12.5, 10.161200716450788 ], "wc_review_avg": [ 563.25, 239.1405183150693 ], "wc_reply_reviewers_avg": [ 7.25, 7.46240577829965 ], "wc_reply_authors_avg": [ 10.5, 10.523782589924593 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 164, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7055967944586657720&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Meta;Massachusetts Institute of Technology", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://web.mit.edu", "aff_unique_abbr": "Meta;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "DisDiff: Unsupervised Disentanglement of Diffusion Probabilistic Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72884", "id": "3ofe0lpwQP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/da47bfaf3f3a8d5bbab0d60c5195dc18-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3ofe0lpwQP", "openreview": "https://openreview.net/forum?id=3ofe0lpwQP", "poster": "/media/PosterPDFs/NeurIPS%202023/72884.png?t=1702344461.5611892", "slides": "https://nips.cc/virtual/2023/poster/72884", "video": "https://nips.cc/virtual/2023/poster/72884", "author_site": "Tao Yang, Yuwang Wang, Yan Lu, Nanning Zheng", "tldr": "", "abstract": "Targeting to understand the underlying explainable factors behind observations and modeling the conditional generation process on these factors, we connect disentangled representation learning to diffusion probabilistic models (DPMs) to take advantage of the remarkable modeling ability of DPMs. We propose a new task, disentanglement of (DPMs): given a pre-trained DPM, without any annotations of the factors, the task is to automatically discover the inherent factors behind the observations and disentangle the gradient fields of DPM into sub-gradient fields, each conditioned on the representation of each discovered factor. With disentangled DPMs, those inherent factors can be automatically discovered, explicitly represented and clearly injected into the diffusion process via the sub-gradient fields. To tackle this task, we devise an unsupervised approach, named DisDiff, and for the first time achieving disentangled representation learning in the framework of DPMs. Extensive experiments on synthetic and real-world datasets demonstrate the effectiveness of DisDiff.", "keywords": "Diffusion Probabilistic Model;Disentangled representation", "primary_area": "", "supplementary_material": "", "author": "Tao Yang;Yuwang Wang;Yan Lu;Nanning Zheng", "authorids": "~Tao_Yang9;~Yuwang_Wang3;~Yan_Lu7;~Nanning_Zheng1", "gender": "M;M;M;M", "homepage": "https://github.com/ThomasMrY;;https://www.microsoft.com/en-us/research/people/yanlu/;", "dblp": ";161/2633;15/4830-1;07/256-1", "google_scholar": "https://scholar.google.com.hk/citations?user=qT5psCEAAAAJ;;djk5l-4AAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0000-0001-5383-6424;", "linkedin": ";;;", "or_profile": "~Tao_Yang9;~Yuwang_Wang3;~Yan_Lu7;~Nanning_Zheng1", "aff": "Xi'an Jiaotong University;Tsinghua University;Microsoft Research Asia;Xi'an Jiaotong University", "aff_domain": "xjtu.edu.cn;tsinghua.edu.cn;microsoft.com;xjtu.edu.cn", "position": "PhD student;Researcher;Partner Research Manager;Full Professor", "bibtex": "@inproceedings{\nyang2023disdiff,\ntitle={DisDiff: Unsupervised Disentanglement of Diffusion Probabilistic Models},\nauthor={Tao Yang and Yuwang Wang and Yan Lu and Nanning Zheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3ofe0lpwQP}\n}", "github": "", "project": "", "reviewers": "kfVZ;ZKsn;6mFa", "pdf_size": 35522414, "rating": "5;5;5", "confidence": "3;2;5", "soundness": "2;2;3", "novelty": "2;2;3", "presentation": "2;2;2", "wc_summary": "125;47;123", "wc_strengths": "2;33;82", "wc_weaknesses": "2;144;332", "wc_questions": "2;87;90", "wc_limitations": "2;24;8", "wc_review": "133;335;635", "wc_reply_reviewers": "9;193;449", "wc_reply_authors": "23;391;1986", "reply_reviewers": "1;2;4", "reply_authors": "2;2;6", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 98.33333333333333, 36.307330144506935 ], "wc_strengths_avg": [ 39.0, 32.93427798915086 ], "wc_weaknesses_avg": [ 159.33333333333334, 135.15752126890888 ], "wc_questions_avg": [ 59.666666666666664, 40.79487985301859 ], "wc_limitations_avg": [ 11.333333333333334, 9.285592184789412 ], "wc_review_avg": [ 367.6666666666667, 206.23826565946052 ], "wc_reply_reviewers_avg": [ 217.0, 180.42911812306423 ], "wc_reply_authors_avg": [ 800.0, 851.9792642234121 ], "reply_reviewers_avg": [ 2.3333333333333335, 1.247219128924647 ], "reply_authors_avg": [ 3.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14773613573256232566&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "xjtu.edu.cn;tsinghua.edu.cn;microsoft.com;xjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Xi'an Jiao Tong University;Tsinghua University;Microsoft", "aff_unique_dep": ";;Research", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.tsinghua.edu.cn;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "XJTU;THU;MSR Asia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "ReHLine: Regularized Composite ReLU-ReHU Loss Minimization with Linear Computation and Linear Convergence", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72883", "id": "3pEBW2UPAD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2e37e56599e3f49cc899f40ae4f5d1fa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3pEBW2UPAD", "openreview": "https://openreview.net/forum?id=3pEBW2UPAD", "poster": "/media/PosterPDFs/NeurIPS%202023/72883.png?t=1698306855.1917112", "slides": "https://nips.cc/virtual/2023/poster/72883", "video": "https://nips.cc/virtual/2023/poster/72883", "author_site": "Ben Dai, Yixuan Qiu", "tldr": "", "abstract": "Empirical risk minimization (ERM) is a crucial framework that offers a general approach to handling a broad range of machine learning tasks. In this paper, we propose a novel algorithm, called ReHLine, for minimizing a set of regularized ERMs with convex piecewise linear-quadratic loss functions and optional linear constraints. The proposed algorithm can effectively handle diverse combinations of loss functions, regularization, and constraints, making it particularly well-suited for complex domain-specific problems. Examples of such problems include FairSVM, elastic net regularized quantile regression, Huber minimization, etc. In addition, ReHLine enjoys a provable linear convergence rate and exhibits a per-iteration computational complexity that scales linearly with the sample size. The algorithm is implemented with both Python and R interfaces, and its performance is benchmarked on various tasks and datasets. Our experimental results demonstrate that ReHLine significantly surpasses generic optimization solvers in terms of computational efficiency on large-scale datasets. Moreover, it also outperforms specialized solvers such as Liblinear in SVMs, hqreg in Huber minimization, and Lightning (SAGA, SAG, SDCA, SVRG) in smoothed SVMs, exhibiting exceptional flexibility and efficiency. The source code, project page, accompanying software, and the Python/R interface can be accessed through the link: https://github.com/softmin/ReHLine.", "keywords": "coordinate descent;linear convergence;primal-dual methods;empirical risk minimization;linear constraints;quantile regression", "primary_area": "", "supplementary_material": "", "author": "Ben Dai;Yixuan Qiu", "authorids": "~Ben_Dai1;~Yixuan_Qiu1", "gender": "M;", "homepage": "https://www.bendai.org/;https://statr.me", "dblp": "235/9801;209/7159", "google_scholar": "dl9PxlIAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Ben_Dai1;~Yixuan_Qiu1", "aff": "The Chinese University of Hong Kong;Shanghai University of Finance and Economics", "aff_domain": "cuhk.edu.hk;sufe.edu.cn", "position": "Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\ndai2023rehline,\ntitle={Re{HL}ine: Regularized Composite Re{LU}-Re{HU} Loss Minimization with Linear Computation and Linear Convergence},\nauthor={Ben Dai and Yixuan Qiu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3pEBW2UPAD}\n}", "github": "", "project": "", "reviewers": "gb3q;7RWz;xA17;kbfD;2E2D;xSEV", "pdf_size": 499619, "rating": "5;5;6;6;7;7", "confidence": "5;3;2;3;3;3", "soundness": "3;3;3;3;4;3", "novelty": "2;2;3;2;4;3", "presentation": "3;3;3;3;3;3", "wc_summary": "58;30;85;104;65;91", "wc_strengths": "23;129;91;79;64;64", "wc_weaknesses": "119;2;47;367;98;28", "wc_questions": "57;2;8;71;116;65", "wc_limitations": "39;2;2;7;28;12", "wc_review": "296;165;233;628;371;260", "wc_reply_reviewers": "0;0;10;63;30;22", "wc_reply_authors": "0;0;0;26;0;0", "reply_reviewers": "0;0;1;1;1;1", "reply_authors": "1;1;1;2;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.1666666666666665, 0.8975274678557507 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.7453559924999299 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.16666666666667, 24.368125811304314 ], "wc_strengths_avg": [ 75.0, 31.973947728319903 ], "wc_weaknesses_avg": [ 110.16666666666667, 121.5393169124936 ], "wc_questions_avg": [ 53.166666666666664, 38.89908596469702 ], "wc_limitations_avg": [ 15.0, 13.880441875771343 ], "wc_review_avg": [ 325.5, 148.91468474711732 ], "wc_reply_reviewers_avg": [ 20.833333333333332, 21.790033399597064 ], "wc_reply_authors_avg": [ 4.333333333333333, 9.68962790249909 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4548588261473421, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9266890670005913699&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cuhk.edu.hk;sufe.edu.cn", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Chinese University of Hong Kong;Shanghai University of Finance and Economics", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.hk;http://www.sufe.edu.cn", "aff_unique_abbr": "CUHK;SUFE", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Projection Regret: Reducing Background Bias for Novelty Detection via Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72882", "id": "3qHlPqzjM1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3d27d607586984908900eaa8ce19c96c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3qHlPqzjM1", "openreview": "https://openreview.net/forum?id=3qHlPqzjM1", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72882", "video": "https://nips.cc/virtual/2023/poster/72882", "author_site": "Sungik Choi, Hankook Lee, Honglak Lee, Moontae Lee", "tldr": "", "abstract": "Novelty detection is a fundamental task of machine learning which aims to detect abnormal (*i.e.* out-of-distribution (OOD)) samples. Since diffusion models have recently emerged as the de facto standard generative framework with surprising generation results, novelty detection via diffusion models has also gained much attention. Recent methods have mainly utilized the reconstruction property of in-distribution samples. However, they often suffer from detecting OOD samples that share similar background information to the in-distribution data. Based on our observation that diffusion models can *project* any sample to an in-distribution sample with similar background information, we propose *Projection Regret (PR)*, an efficient novelty detection method that mitigates the bias of non-semantic information. To be specific, PR computes the perceptual distance between the test image and its diffusion-based projection to detect abnormality. Since the perceptual distance often fails to capture semantic changes when the background information is dominant, we cancel out the background bias by comparing it against recursive projections. Extensive experiments demonstrate that PR outperforms the prior art of generative-model-based novelty detection methods by a significant margin.", "keywords": "Novelty detection;out-of-distribution detection;consistency models;diffusion models;score-based generative models", "primary_area": "", "supplementary_material": "/attachment/4c9cafbceaa2f2a67bbc702af30affb2e988ef4c.zip", "author": "Sungik Choi;Hankook Lee;Honglak Lee;Moontae Lee", "authorids": "~Sungik_Choi1;~Hankook_Lee1;~Honglak_Lee2;~Moontae_Lee1", "gender": "M;;M;M", "homepage": "https://hankook.github.io;https://moontae.people.uic.edu;http://web.eecs.umich.edu/~honglak;", "dblp": "223/4393;132/1761;58/2562;184/4055.html", "google_scholar": "CgqswXUAAAAJ;BMvYy9cAAAAJ;fmSHtE8AAAAJ;H0QB0PwAAAAJ", "orcid": ";0000-0001-5542-3463;;", "linkedin": ";moontae-lee-975248123/;;", "or_profile": "~Hankook_Lee1;~Moontae_Lee1;~Honglak_Lee1;~Choi_sungik1", "aff": "Korea Advanced Institute of Science & Technology;University of Illinois, Chicago;University of Michigan;LG AI Research", "aff_domain": "kaist.ac.kr;uic.edu;umich.edu;lgresearch.ai", "position": "Postdoc;Assistant Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\nchoi2023projection,\ntitle={Projection Regret: Reducing Background Bias for Novelty Detection via Diffusion Models},\nauthor={Sungik Choi and Hankook Lee and Honglak Lee and Moontae Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3qHlPqzjM1}\n}", "github": "", "project": "", "reviewers": "SZQH;9KBe;ZGie;mygo", "pdf_size": 1404189, "rating": "5;5;5;5", "confidence": "4;2;4;3", "soundness": "2;2;3;3", "novelty": "2;2;3;2", "presentation": "3;2;2;3", "wc_summary": "92;262;187;48", "wc_strengths": "25;236;108;24", "wc_weaknesses": "138;294;298;70", "wc_questions": "2;214;106;4", "wc_limitations": "1;257;1;1", "wc_review": "258;1263;700;147", "wc_reply_reviewers": "15;7;101;65", "wc_reply_authors": "22;100;42;502", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 147.25, 83.14257333039434 ], "wc_strengths_avg": [ 98.25, 86.52853575555292 ], "wc_weaknesses_avg": [ 200.0, 98.97474425326898 ], "wc_questions_avg": [ 81.5, 87.29690716170877 ], "wc_limitations_avg": [ 65.0, 110.85125168440814 ], "wc_review_avg": [ 592.0, 439.1713788488498 ], "wc_reply_reviewers_avg": [ 47.0, 38.28837943815329 ], "wc_reply_authors_avg": [ 166.5, 195.80794161626847 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7414380484100760351&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "kaist.ac.kr;uic.edu;umich.edu;lgresearch.ai", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;University of Illinois at Chicago;University of Michigan;LG", "aff_unique_dep": ";;;LG AI Research", "aff_unique_url": "https://www.kaist.ac.kr;https://www.uic.edu;https://www.umich.edu;https://www.lgaires.com", "aff_unique_abbr": "KAIST;UIC;UM;LG AI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "South Korea;United States" }, { "id": "3s6aE1LeiR", "title": "From One to Zero: Causal Zero-Shot Neural Architecture Search by Intrinsic One-Shot Interventional Information", "track": "main", "status": "Reject", "tldr": "", "abstract": "''Zero-shot'' neural architecture search (ZNAS) is key to achieving real-time neural architecture search. ZNAS comes from ''one-shot'' neural architecture search but searches in a weight-agnostic supernet and consequently largely reduce the search cost. \n However, the weight parameters are agnostic in the zero-shot NAS and none of the previous methods try to explain it. \n We question whether there exists a way to unify the one-shot and zero-shot experiences for interpreting the agnostic weight messages. To answer this question, we propose a causal definition for ''zero-shot NAS'' and facilitate it with interventional data from ''one-shot'' knowledge.\n The experiments on the standard NAS-bench-201 and CIFAR-10 benchmarks demonstrate a breakthrough of search cost which requires merely 8 GPU seconds on CIFAR-10 while maintaining competitive precision. ", "keywords": "Neural Architecture Search", "primary_area": "", "supplementary_material": "/attachment/b244f4f95e3143a8fafa450bd1415ce98974472f.pdf", "author": "Qian Li;Chao Ma;Zhengqin Xu;Xiaokang Yang", "authorids": "~Qian_Li5;~Chao_Ma3;~Zhengqin_Xu1;~Xiaokang_Yang1", "gender": ";M;M;M", "homepage": ";https://vision.sjtu.edu.cn/;;https://icne.sjtu.edu.cn/info/1064/1078.htm", "dblp": ";79/1552-4;240/7110;06/3071-1.html", "google_scholar": ";syoPhv8AAAAJ;https://scholar.google.com.hk/citations?view_op=list_works;yDEavdMAAAAJ", "orcid": ";;;0000-0003-4029-3322", "linkedin": "https://www.linkedin.com/feed/;;;", "or_profile": "~Qian_Li5;~Chao_Ma3;~Zhengqin_Xu1;~Xiaokang_Yang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Associate Professor;Postdoc;Full Professor", "bibtex": "@misc{\nli2023from,\ntitle={From One to Zero: Causal Zero-Shot Neural Architecture Search by Intrinsic One-Shot Interventional Information},\nauthor={Qian Li and Chao Ma and Zhengqin Xu and Xiaokang Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=3s6aE1LeiR}\n}", "github": "", "project": "", "reviewers": "CY83;KBjx;7wS7;C1bZ", "site": "https://openreview.net/forum?id=3s6aE1LeiR", "pdf_size": 341945, "rating": "2;3;3;3", "confidence": "4;4;3;4", "soundness": "2;3;2;2", "novelty": "2;2;3;2", "presentation": "3;2;3;2", "wc_summary": "68;35;81;59", "wc_strengths": "44;29;16;13", "wc_weaknesses": "85;37;221;132", "wc_questions": "153;74;77;4", "wc_limitations": "1;11;33;1", "wc_review": "351;186;428;209", "wc_reply_reviewers": "12;13;16;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 2.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 60.75, 16.798437427332342 ], "wc_strengths_avg": [ 25.5, 12.257650672131263 ], "wc_weaknesses_avg": [ 118.75, 67.9204497923858 ], "wc_questions_avg": [ 77.0, 52.7114788257738 ], "wc_limitations_avg": [ 11.5, 13.06713434537198 ], "wc_review_avg": [ 293.5, 100.11618250812403 ], "wc_reply_reviewers_avg": [ 10.25, 6.098155458825234 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:nhor4Uz46X8J:scholar.google.com/&scioq=From+One+to+Zero:+Causal+Zero-Shot+Neural+Architecture+Search+by+Intrinsic+One-Shot+Interventional+Information&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "INSPECT: A Multimodal Dataset for Pulmonary Embolism Diagnosis and Prognosis", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73704", "id": "3sRR2u72oQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/39736af1b9d87a1fddad9f84a6bcf64c-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=3sRR2u72oQ", "openreview": "https://openreview.net/forum?id=3sRR2u72oQ", "poster": "/media/PosterPDFs/NeurIPS%202023/73704.png?t=1701583873.6423988", "slides": "https://nips.cc/virtual/2023/poster/73704", "video": "https://nips.cc/virtual/2023/poster/73704", "author_site": "Shih-Cheng Huang, Zepeng Huo, Ethan Steinberg, Chia-Chun Chiang, Curtis Langlotz, Matthew Lungren, Serena Yeung, Nigam Shah, Jason Fries", "tldr": "", "abstract": "Synthesizing information from various data sources plays a crucial role in the practice of modern medicine. Current applications of artificial intelligence in medicine often focus on single-modality data due to a lack of publicly available, multimodal medical datasets. To address this limitation, we introduce INSPECT, which contains de-identified longitudinal records from a large cohort of pulmonary embolism (PE) patients, along with ground truth labels for multiple outcomes. INSPECT contains data from 19,402 patients, including CT images, sections of radiology reports, and structured electronic health record (EHR) data (including demographics, diagnoses, procedures, and vitals). Using our provided dataset, we develop and release a benchmark for evaluating several baseline modeling approaches on a variety of important PE related tasks. We evaluate image-only, EHR-only, and fused models. Trained models and the de-identified dataset are made available for non-commercial use under a data use agreement. To the best our knowledge, INSPECT is the largest multimodal dataset for enabling reproducible research on strategies for integrating 3D medical imaging and EHR data.", "keywords": "Multimodal Fusion;Medical Imaging;Electronic Health Records", "primary_area": "", "supplementary_material": "/attachment/c64087240a5f9abd3d1aec90833a559bd9a30636.zip", "author": "Shih-Cheng Huang;Zepeng Huo;Ethan Steinberg;Chia-Chun Chiang;Matthew P. Lungren;Curtis Langlotz;Serena Yeung;Nigam Shah;Jason Alan Fries", "authorids": "~Shih-Cheng_Huang1;~Zepeng_Huo1;~Ethan_Steinberg1;~Chia-Chun_Chiang1;~Matthew_P._Lungren1;~Curtis_Langlotz1;~Serena_Yeung1;~Nigam_Shah1;~Jason_Alan_Fries1", "gender": ";M;M;F;;M;F;M;M", "homepage": "https://www.linkedin.com/in/mschuang/;https://zepenghuo.github.io/;;https://www.mayo.edu/research/faculty/chiang-chia-chun-m-d/bio-20146997;;https://profiles.stanford.edu/curtis-langlotz;http://ai.stanford.edu/~syyeung/;https://shahlab.stanford.edu/nigam_shah;https://web.stanford.edu/~jfries/", "dblp": ";218/7183;241/9476;;;12/1751;147/5023;s/NHShah;182/2122", "google_scholar": ";;;;z1UtMSYAAAAJ;WQkBYwQAAAAJ;Tw2m5kUAAAAJ;n63DmP8AAAAJ;wywWmwoAAAAJ", "orcid": ";;0000-0001-7166-5032;0000-0001-7802-7172;;0000-0002-8972-8051;0000-0003-0529-0628;0000-0001-9385-7158;0000-0001-9316-5768", "linkedin": ";;;;;langlotz/;;;jason-fries/", "or_profile": "~Shih-Cheng_Huang1;~Zepeng_Huo1;~Ethan_Steinberg1;~Chia-Chun_Chiang1;~Matthew_P._Lungren1;~Curtis_Langlotz1;~Serena_Yeung1;~Nigam_Shah1;~Jason_Alan_Fries1", "aff": "Stanford University;Stanford University;Stanford University;Mayo Clinic;Microsoft;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;mayo.edu;microsoft.com;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Postdoc;PhD student;Assistant Professor;Principal Researcher;Full Professor;Assistant Professor;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nhuang2023inspect,\ntitle={{INSPECT}: A Multimodal Dataset for Pulmonary Embolism Diagnosis and Prognosis},\nauthor={Shih-Cheng Huang and Zepeng Huo and Ethan Steinberg and Chia-Chun Chiang and Matthew P. Lungren and Curtis Langlotz and Serena Yeung and Nigam Shah and Jason Alan Fries},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=3sRR2u72oQ}\n}", "github": "", "project": "", "reviewers": "CGfA;PLZb;vbzo", "pdf_size": 2870721, "rating": "3;7;7", "confidence": "5;4;4", "wc_summary_and_contributions": "154;106;90", "wc_strengths": "39;70;48", "wc_improvement": "163;684;124", "wc_limitations": "28;13;45", "wc_correctness": "11;84;39", "wc_clarity": "5;10;37", "wc_relation_to_prior_work": "26;13;32", "wc_documentation": "38;242;1", "wc_additional_feedback": "1;1;1", "wc_review": "465;1223;417", "wc_reply_reviewers": "0;53;0", "wc_reply_authors": "503;1000;279", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "rating_avg": [ 5.666666666666667, 1.8856180831641267 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 116.66666666666667, 27.19477073916152 ], "wc_strengths_avg": [ 52.333333333333336, 13.021349989749739 ], "wc_improvement_avg": [ 323.6666666666667, 255.2911192257881 ], "wc_limitations_avg": [ 28.666666666666668, 13.072447700751718 ], "wc_correctness_avg": [ 44.666666666666664, 30.07028803025043 ], "wc_clarity_avg": [ 17.333333333333332, 14.055445761538678 ], "wc_relation_to_prior_work_avg": [ 23.666666666666668, 7.93025150224688 ], "wc_documentation_avg": [ 93.66666666666667, 105.9695973171341 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 701.6666666666666, 369.15880352799695 ], "wc_reply_reviewers_avg": [ 17.666666666666668, 24.984439601924677 ], "wc_reply_authors_avg": [ 594.0, 301.2983017985111 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1628171309474707466&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "stanford.edu;stanford.edu;stanford.edu;mayo.edu;microsoft.com;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 9, "aff_unique_index": "0;0;0;1;2;0;0;0;0", "aff_unique_norm": "Stanford University;Mayo Clinic;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.stanford.edu;https://www.mayoclinic.org;https://www.microsoft.com", "aff_unique_abbr": "Stanford;Mayo Clinic;Microsoft", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Quantization Model of Neural Scaling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72881", "id": "3tbTw2ga8K", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5b6346a05a537d4cdb2f50323452a9fe-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3tbTw2ga8K", "openreview": "https://openreview.net/forum?id=3tbTw2ga8K", "poster": "/media/PosterPDFs/NeurIPS%202023/72881.png?t=1701573842.82847", "slides": "https://nips.cc/virtual/2023/poster/72881", "video": "https://nips.cc/virtual/2023/poster/72881", "author_site": "Eric Michaud, Ziming Liu, Uzay Girit, Max Tegmark", "tldr": "", "abstract": "We propose the Quantization Model of neural scaling laws, explaining both the observed power law dropoff of loss with model and data size, and also the sudden emergence of new capabilities with scale. We derive this model from what we call the Quantization Hypothesis, \nwhere network knowledge and skills are \"quantized\" into discrete chunks (quanta). We show that when quanta are learned in order of decreasing use frequency, then a power law in use frequencies explains observed power law scaling of loss. We validate this prediction on toy datasets, then study how scaling curves decompose for large language models. Using language model gradients, we automatically decompose model behavior into a diverse set of skills (quanta). We tentatively find that the frequency at which these quanta are used in the training distribution roughly follows a power law corresponding with the empirical scaling exponent for language models, a prediction of our theory.", "keywords": "scaling laws;emergence;language models;science of deep learning", "primary_area": "", "supplementary_material": "", "author": "Eric J Michaud;Ziming Liu;Uzay Girit;Max Tegmark", "authorids": "~Eric_J_Michaud1;~Ziming_Liu2;~Uzay_Girit1;~Max_Tegmark1", "gender": "M;M;M;", "homepage": "https://ericjmichaud.com;https://kindxiaoming.github.io/;https://uzpg.me;https://space.mit.edu/home/tegmark/", "dblp": "277/5275;;344/2052;25/6578", "google_scholar": "X52GetkAAAAJ;0b32RKAAAAAJ;qvZ_Q_IAAAAJ;eBXEZxgAAAAJ", "orcid": "0000-0001-7912-1953;;;", "linkedin": "ericjmichaud0101/;;uzay-girit-a208161a2/;", "or_profile": "~Eric_J_Michaud1;~Ziming_Liu2;~Uzay_Girit1;~Max_Tegmark1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nmichaud2023the,\ntitle={The Quantization Model of Neural Scaling},\nauthor={Eric J Michaud and Ziming Liu and Uzay Girit and Max Tegmark},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3tbTw2ga8K}\n}", "github": "", "project": "", "reviewers": "S4dM;2SwD;Kziq;7dsB;7ZcN", "pdf_size": 2014403, "rating": "5;5;7;8;8", "confidence": "4;3;4;4;4", "soundness": "2;3;4;3;4", "novelty": "3;3;3;4;4", "presentation": "2;3;4;4;4", "wc_summary": "85;26;108;131;158", "wc_strengths": "110;24;144;7;104", "wc_weaknesses": "127;67;19;54;100", "wc_questions": "113;69;76;3;45", "wc_limitations": "27;7;15;6;10", "wc_review": "462;193;362;201;417", "wc_reply_reviewers": "69;13;24;0;70", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 1.3564659966250536 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 101.6, 44.893652112520314 ], "wc_strengths_avg": [ 77.8, 52.93921042101025 ], "wc_weaknesses_avg": [ 73.4, 37.30201066966766 ], "wc_questions_avg": [ 61.2, 36.367018024578265 ], "wc_limitations_avg": [ 13.0, 7.6681158050723255 ], "wc_review_avg": [ 327.0, 110.79891696221583 ], "wc_reply_reviewers_avg": [ 35.2, 29.019993108200424 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5897678246195884, "gs_citation": 81, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2558558693587958226&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Estimating Noise Correlations Across Continuous Conditions With Wishart Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72880", "id": "3ucmcMzCXD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a935ba2236c6ba0fb620f23354e789ff-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3ucmcMzCXD", "openreview": "https://openreview.net/forum?id=3ucmcMzCXD", "poster": "/media/PosterPDFs/NeurIPS%202023/72880.png?t=1701455940.245987", "slides": "https://nips.cc/virtual/2023/poster/72880", "video": "https://nips.cc/virtual/2023/poster/72880", "author_site": "Amin Nejatbakhsh, Isabel Garon, Alex Williams", "tldr": "", "abstract": "The signaling capacity of a neural population depends on the scale and orientation of its covariance across trials. Estimating this \"noise\" covariance is challenging and is thought to require a large number of stereotyped trials. New approaches are therefore needed to interrogate the structure of neural noise across rich, naturalistic behaviors and sensory experiences, with few trials per condition. Here, we exploit the fact that conditions are smoothly parameterized in many experiments and leverage Wishart process models to pool statistical power from trials in neighboring conditions. We demonstrate that these models perform favorably on experimental data from the mouse visual cortex and monkey motor cortex relative to standard covariance estimators. Moreover, they produce smooth estimates of covariance as a function of stimulus parameters, enabling estimates of noise correlations in entirely unseen conditions as well as continuous estimates of Fisher information—a commonly used measure of signal fidelity. Together, our results suggest that Wishart processes are broadly applicable tools for quantification and uncertainty estimation of noise correlations in trial-limited regimes, paving the way toward understanding the role of noise in complex neural computations and behavior.", "keywords": "Noise Correlations;Wishart Process;Variational Inference", "primary_area": "", "supplementary_material": "/attachment/26c62b0171e90c6dbb9f537180ea7836cfabd11e.pdf", "author": "Amin Nejatbakhsh;Isabel Garon;Alex H Williams", "authorids": "~Amin_Nejatbakhsh1;igaron@flatironinstitute.org;~Alex_H_Williams1", "gender": "M;;M", "homepage": "http://www.columbia.edu/~mn2822/;;http://alexhwilliams.info", "dblp": "242/9017;;126/4222", "google_scholar": "kLM31YoAAAAJ;;7_GzzXMAAAAJ", "orcid": "0000-0001-5155-4757;;0000-0001-5853-103X", "linkedin": "aminejat;;", "or_profile": "~Amin_Nejatbakhsh1;igaron@flatironinstitute.org;~Alex_H_Williams1", "aff": "Flatiron Institute;;Flatiron Institute", "aff_domain": "flatiron.org;;flatironinstitute.org", "position": "Research Fellow;;Researcher", "bibtex": "@inproceedings{\nnejatbakhsh2023estimating,\ntitle={Estimating Noise Correlations Across Continuous Conditions With Wishart Processes},\nauthor={Amin Nejatbakhsh and Isabel Garon and Alex H Williams},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3ucmcMzCXD}\n}", "github": "", "project": "", "reviewers": "yodS;ruJu;mr49;sHno", "pdf_size": 4098119, "rating": "5;7;7;7", "confidence": "4;4;3;2", "soundness": "2;2;4;3", "novelty": "2;3;3;3", "presentation": "3;4;4;3", "wc_summary": "147;94;189;47", "wc_strengths": "33;44;58;46", "wc_weaknesses": "80;415;209;8", "wc_questions": "154;63;425;152", "wc_limitations": "89;1;35;8", "wc_review": "503;617;916;261", "wc_reply_reviewers": "702;19;122;13", "wc_reply_authors": "783;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 119.25, 53.60212215948171 ], "wc_strengths_avg": [ 45.25, 8.870597499605086 ], "wc_weaknesses_avg": [ 178.0, 154.6237368582198 ], "wc_questions_avg": [ 198.5, 135.83537830771482 ], "wc_limitations_avg": [ 33.25, 34.60039739656179 ], "wc_review_avg": [ 574.25, 235.4903129642491 ], "wc_reply_reviewers_avg": [ 214.0, 285.0587658711796 ], "wc_reply_authors_avg": [ 195.75, 339.0489455816077 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2210934897808095135&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "flatiron.org;;flatironinstitute.org", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Flatiron Institute", "aff_unique_dep": "", "aff_unique_url": "https://flatironinstitute.org", "aff_unique_abbr": "Flatiron", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "3xRaWBD2YB", "title": "Polynomial Width is Sufficient for Set Representation with High-dimensional Features", "track": "main", "status": "Reject", "tldr": "", "abstract": "Set representation has become ubiquitous in deep learning for modeling the inductive bias of neural networks that are insensitive to the input order. DeepSets is the most widely used neural network architecture for set representation. It involves embedding each set element into a latent space with dimension $L$, followed by a sum pooling to obtain a whole-set embedding, and finally mapping the whole-set embedding to the output. In this work, we investigate the impact of the dimension $L$ on the expressive power of DeepSets. Previous analyses either oversimplified high-dimensional features to be one-dimensional features or were limited to analytic activations, thereby diverging from practical use and resulting in $L$ that grows exponentially with the set size $N$ and feature dimension $D$. To investigate the minimal value of $L$ that achieves sufficient expressive power, we present two set-element embedding layers: (a) linear + power activation (LP) and (b) logarithm + linear + exponential activations (LLE). We demonstrate that $L$ being $\\operatorname{poly}(N, D)$ is sufficient for set representation using both embedding layers. We also provide a lower bound of $L$ for the LP embedding layer. Furthermore, we extend our results to permutation-equivariant set functions and the complex field.", "keywords": "Set Representation; Permutation Invariance; Permutation Equivariance", "primary_area": "", "supplementary_material": "/attachment/d953a41e81914343dcc7b105176c158d1c0d5c7b.pdf", "author": "Peihao Wang;Shenghao Yang;Shu Li;Zhangyang Wang;Pan Li", "authorids": "~Peihao_Wang1;~Shenghao_Yang1;~Shu_Li6;~Zhangyang_Wang1;~Pan_Li2", "gender": "M;M;Not Specified;M;", "homepage": "https://peihaowang.github.io/;https://cs.uwaterloo.ca/~s286yang/;https://www.shuli.me/Shu-Li-087d71e8ce9340c5b4f7c08c7babe814;https://vita-group.github.io;", "dblp": "239/4075;41/4482-2;66/6852;119/4026;https://dblp.org/pers/hd/l/Li_0005:Pan", "google_scholar": "fqf2tBsAAAAJ;ocLDM-AAAAAJ;knBDWikAAAAJ;pxFyKAIAAAAJ;IroP0EwAAAAJ", "orcid": ";;;;", "linkedin": "peihao-wang-25a411162/;;;;pan-li-b951105a/", "or_profile": "~Peihao_Wang1;~Shenghao_Yang1;~Shu_Li6;~Zhangyang_Wang1;~Pan_Li2", "aff": "University of Texas, Austin;University of Waterloo;Purdue University;University of Texas, Austin;Purdue University", "aff_domain": "utexas.edu;uwaterloo.ca;purdue.edu;utexas.edu;purdue.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nwang2023polynomial,\ntitle={Polynomial Width is Sufficient for Set Representation with High-dimensional Features},\nauthor={Peihao Wang and Shenghao Yang and Shu Li and Zhangyang Wang and Pan Li},\nyear={2023},\nurl={https://openreview.net/forum?id=3xRaWBD2YB}\n}", "github": "", "project": "", "reviewers": "YiWd;S2JM;gdbo;zB8R", "site": "https://openreview.net/forum?id=3xRaWBD2YB", "pdf_size": 365535, "rating": "5;6;6;6", "confidence": "3;4;3;4", "soundness": "3;4;3;1", "novelty": "2;3;3;2", "presentation": "2;4;3;2", "wc_summary": "73;45;135;133", "wc_strengths": "31;89;38;45", "wc_weaknesses": "355;378;183;545", "wc_questions": "58;1;127;5", "wc_limitations": "8;1;2;21", "wc_review": "525;514;485;749", "wc_reply_reviewers": "81;0;0;365", "wc_reply_authors": "437;0;0;621", "reply_reviewers": "1;0;0;3", "reply_authors": "3;1;1;4", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 96.5, 38.79110722833263 ], "wc_strengths_avg": [ 50.75, 22.63155982251334 ], "wc_weaknesses_avg": [ 365.25, 128.25048732850883 ], "wc_questions_avg": [ 47.75, 50.98713072923402 ], "wc_limitations_avg": [ 8.0, 7.968688725254614 ], "wc_review_avg": [ 568.25, 105.37403617590056 ], "wc_reply_reviewers_avg": [ 111.5, 150.04749248154732 ], "wc_reply_authors_avg": [ 264.5, 272.38254349352127 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13203073421312026361&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "University of Texas at Austin;University of Waterloo;Purdue University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://uwaterloo.ca;https://www.purdue.edu", "aff_unique_abbr": "UT Austin;UW;Purdue", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Uncoupled and Convergent Learning in Two-Player Zero-Sum Markov Games with Bandit Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72879", "id": "3xSwxlB0fd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/722fcbc1a6667f2075d75ea79a1b3552-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3xSwxlB0fd", "openreview": "https://openreview.net/forum?id=3xSwxlB0fd", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72879", "video": "https://nips.cc/virtual/2023/poster/72879", "author_site": "Yang Cai, Haipeng Luo, Chen-Yu Wei, Weiqiang Zheng", "tldr": "", "abstract": "We revisit the problem of learning in two-player zero-sum Markov games, focusing on developing an algorithm that is *uncoupled*, *convergent*, and *rational*, with non-asymptotic convergence rates to Nash equilibrium. We start from the case of stateless matrix game with bandit feedback as a warm-up, showing an $\\tilde{\\mathcal{O}}(t^{-\\frac{1}{8}})$ last-iterate convergence rate. To the best of our knowledge, this is the first result that obtains finite last-iterate convergence rate given access to only bandit feedback. We extend our result to the case of irreducible Markov games, providing a last-iterate convergence rate of $\\tilde{\\mathcal{O}}(t^{-\\frac{1}{9+\\varepsilon}})$ for any $\\varepsilon>0$. Finally, we study Markov games without any assumptions on the dynamics, and show a *path convergence* rate, a new notion of convergence we defined, of $\\tilde{\\mathcal{O}}(t^{-\\frac{1}{10}})$. Our algorithm removes the synchronization and prior knowledge requirement of Wei et al. (2021), which pursued the same goals as us for irreducible Markov games. Our algorithm is related to Chen et al. (2021) and Cen et al. (2021)'s and also builds on the entropy regularization technique. However, we remove their requirement of communications on the entropy values, making our algorithm entirely uncoupled.", "keywords": "two-player zero-sum Markov game;last-iterate convergence;path convergence;learning in games", "primary_area": "", "supplementary_material": "", "author": "Yang Cai;Haipeng Luo;Chen-Yu Wei;Weiqiang Zheng", "authorids": "~Yang_Cai1;~Haipeng_Luo1;~Chen-Yu_Wei1;~Weiqiang_Zheng1", "gender": ";M;M;M", "homepage": ";https://haipeng-luo.net/;https://bahh723.github.io/;https://weiqiang-zheng.com/", "dblp": ";62/2576;183/1729;277/5088", "google_scholar": ";ct2hw4UAAAAJ;2L2cR-kAAAAJ;YrfhnIwAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yang_Cai1;~Haipeng_Luo1;~Chen-Yu_Wei1;~Weiqiang_Zheng1", "aff": ";University of Southern California;Massachusetts Institute of Technology;Yale University", "aff_domain": ";usc.edu;mit.edu;yale.edu", "position": ";Assistant Professor;Postdoc;PhD student", "bibtex": "@inproceedings{\ncai2023uncoupled,\ntitle={Uncoupled and Convergent Learning in Two-Player Zero-Sum Markov Games with Bandit Feedback},\nauthor={Yang Cai and Haipeng Luo and Chen-Yu Wei and Weiqiang Zheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=3xSwxlB0fd}\n}", "github": "", "project": "", "reviewers": "UUTi;EDuK;Zo84;qUKM;dxZp", "pdf_size": 525678, "rating": "6;6;6;6;7", "confidence": "3;3;2;3;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;2;3;4", "wc_summary": "94;111;72;69;203", "wc_strengths": "27;58;49;62;39", "wc_weaknesses": "36;170;80;2;1", "wc_questions": "8;50;45;65;57", "wc_limitations": "1;8;6;10;1", "wc_review": "166;397;252;208;301", "wc_reply_reviewers": "44;30;0;52;0", "wc_reply_authors": "0;0;0;179;0", "reply_reviewers": "1;1;0;1;0", "reply_authors": "1;1;1;2;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 109.8, 49.05262480234875 ], "wc_strengths_avg": [ 47.0, 12.759310326189265 ], "wc_weaknesses_avg": [ 57.8, 63.082168637420835 ], "wc_questions_avg": [ 45.0, 19.687559523719543 ], "wc_limitations_avg": [ 5.2, 3.655133376499413 ], "wc_review_avg": [ 264.8, 79.9234633884193 ], "wc_reply_reviewers_avg": [ 25.2, 21.7476435505091 ], "wc_reply_authors_avg": [ 35.8, 71.6 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.25000000000000006, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3554578550995349199&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";usc.edu;mit.edu;yale.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Southern California;Massachusetts Institute of Technology;Yale University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.usc.edu;https://web.mit.edu;https://www.yale.edu", "aff_unique_abbr": "USC;MIT;Yale", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "ClimateSet: A Large-Scale Climate Model Dataset for Machine Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73703", "id": "3z9YV29Ogn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/44a6769fe6c695f8dfb347c649f7c9f0-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=3z9YV29Ogn", "openreview": "https://openreview.net/forum?id=3z9YV29Ogn", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73703", "video": "https://nips.cc/virtual/2023/poster/73703", "author_site": "Julia Kaltenborn, Charlotte Lange, Venkatesh Ramesh, Philippe Brouillard, Yaniv Gurwicz, Chandni Nagda, Jakob Runge, Peer Nowack, David Rolnick", "tldr": "", "abstract": "Climate models have been key for assessing the impact of climate change and simulating future climate scenarios. The machine learning (ML) community has taken an increased interest in supporting climate scientists\u2019 efforts on various tasks such as climate model emulation, downscaling, and prediction tasks. Many of those tasks have been addressed on datasets created with single climate models. However, both the climate science and ML communities have suggested that to address those tasks at scale, we need large, consistent, and ML-ready climate model datasets. Here, we introduce ClimateSet, a dataset containing the inputs and outputs of 36 climate models from the Input4MIPs and CMIP6 archives. In addition, we provide a modular dataset pipeline for retrieving and preprocessing additional climate models and scenarios. We showcase the potential of our dataset by using it as a benchmark for ML-based climate model emulation. We gain new insights about the performance and generalization capabilities of the different ML models by analyzing their performance across different climate models. Furthermore, the dataset can be used to train an ML emulator on several climate models instead of just one. Such a \u201csuper-emulator\u201d can quickly project new climate change scenarios, complementing existing scenarios already provided to policymakers. We believe ClimateSet will create the basis needed for the ML community to tackle climate-related tasks at scale.", "keywords": "large-scale dataset;climate models;climate change;emulator;climate-science;climate-projection", "primary_area": "", "supplementary_material": "", "author": "Julia Kaltenborn;Charlotte Emilie Elektra Lange;Venkatesh Ramesh;Philippe Brouillard;Yaniv Gurwicz;Chandni Nagda;Jakob Runge;Peer Nowack;David Rolnick", "authorids": "~Julia_Kaltenborn1;~Charlotte_Emilie_Elektra_Lange1;~Venkatesh_Ramesh1;~Philippe_Brouillard2;~Yaniv_Gurwicz1;~Chandni_Nagda1;~Jakob_Runge2;~Peer_Nowack1;~David_Rolnick1", "gender": "F;F;M;;;;M;;M", "homepage": "https://liellnima.github.io/;;;;;https://cnagda.github.io/;https://www.causalinferencelab.com;;http://www.davidrolnick.com/", "dblp": ";;;242/7928;83/4274;;120/7695;;37/10718", "google_scholar": "pUAkKYsAAAAJ;;;https://scholar.google.com/citations?hl=en;;;https://scholar.google.de/citations?user=wtXVvuUAAAAJ;v1Pf21sAAAAJ;P_luG3cAAAAJ", "orcid": "0000-0002-9292-9655;;;;;;0000-0002-0629-1772;0000-0003-4588-7832;", "linkedin": ";charlotte-lange-79b063227/;https://linkedin.com/in/venka7/;;;;;;", "or_profile": "~Julia_Kaltenborn1;~Charlotte_Emilie_Elektra_Lange1;~Venkatesh_Ramesh1;~Philippe_Brouillard2;~Yaniv_Gurwicz1;~Chandni_Nagda1;~Jakob_Runge2;~Peer_Nowack1;~David_Rolnick1", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Universit\u00e4t Osnabr\u00fcck;Mila Quebec AI Institute, University of Montreal;University of Montreal;Intel;University of Illinois, Urbana Champaign;German Aerospace Center, Institute of Data Science;University of East Anglia;McGill University", "aff_domain": "mila.umontreal.ca;uni-osnabrueck.de;mila.umontreal.ca;umontreal.ca;intel.com;illinois.edu;dlr.de;uea.ac.uk;cs.mcgill.ca", "position": "PhD student;Undergrad student;PhD student;PhD student;Research Scientist;MS student;Principal Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nkaltenborn2023climateset,\ntitle={ClimateSet: A Large-Scale Climate Model Dataset for Machine Learning},\nauthor={Julia Kaltenborn and Charlotte Emilie Elektra Lange and Venkatesh Ramesh and Philippe Brouillard and Yaniv Gurwicz and Chandni Nagda and Jakob Runge and Peer Nowack and David Rolnick},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=3z9YV29Ogn}\n}", "github": "", "project": "", "reviewers": "NfhR;jcks;MSxX;fdkb;PvGq", "pdf_size": 2631422, "rating": "6;6;6;7;8", "confidence": "3;3;3;2;3", "wc_summary_and_contributions": "99;151;64;203;57", "wc_strengths": "130;55;119;13;29", "wc_improvement": "181;172;353;19;13", "wc_limitations": "21;38;65;9;11", "wc_correctness": "10;16;37;3;1", "wc_clarity": "52;58;66;20;118", "wc_relation_to_prior_work": "1;47;1;9;1", "wc_documentation": "1;60;14;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "496;598;720;278;232", "wc_reply_reviewers": "15;17;0;0;9", "wc_reply_authors": "847;930;876;366;222", "reply_reviewers": "1;1;0;0;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 114.8, 55.24635734598255 ], "wc_strengths_avg": [ 69.2, 47.22880476997063 ], "wc_improvement_avg": [ 147.6, 125.34368751556657 ], "wc_limitations_avg": [ 28.8, 20.807690885823924 ], "wc_correctness_avg": [ 13.4, 12.93986089569745 ], "wc_clarity_avg": [ 62.8, 31.713719428663676 ], "wc_relation_to_prior_work_avg": [ 11.8, 17.870646322950943 ], "wc_documentation_avg": [ 15.4, 22.861321046693696 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 464.8, 185.9746219246056 ], "wc_reply_reviewers_avg": [ 8.2, 7.19444229944198 ], "wc_reply_authors_avg": [ 648.2, 293.97578131540024 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.25000000000000006, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1043220409365133507&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "mila.umontreal.ca;uni-osnabrueck.de;mila.umontreal.ca;umontreal.ca;intel.com;illinois.edu;dlr.de;uea.ac.uk;cs.mcgill.ca", "author_num": 9, "aff_unique_index": "0;1;0;0;2;3;4;5;6", "aff_unique_norm": "University of Montreal;University of Osnabr\u00fcck;Intel;University of Illinois Urbana-Champaign;German Aerospace Center;University of East Anglia;McGill University", "aff_unique_dep": "Montreal Institute for Learning Algorithms;;Intel Corporation;;Institute of Data Science;;", "aff_unique_url": "https://www.mila.quebec;https://www.uni-osnabrueck.de;https://www.intel.com;https://illinois.edu;https://www.dlr.de;https://www.uea.ac.uk;https://www.mcgill.ca", "aff_unique_abbr": "MILA;UOS;Intel;UIUC;DLR;UEA;McGill", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Montreal;;Urbana-Champaign", "aff_country_unique_index": "0;1;0;0;2;2;1;3;0", "aff_country_unique": "Canada;Germany;United States;United Kingdom" }, { "title": "The Pick-to-Learn Algorithm: Empowering Compression for Tight Generalization Bounds and Improved Post-training Performance", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72878", "id": "40L3viVWQN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3a4f287883609241031e6818bd01133e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=40L3viVWQN", "openreview": "https://openreview.net/forum?id=40L3viVWQN", "poster": "/media/PosterPDFs/NeurIPS%202023/72878.png?t=1702269051.2617846", "slides": "https://nips.cc/virtual/2023/poster/72878", "video": "https://nips.cc/virtual/2023/poster/72878", "author_site": "Dario Paccagnan, Marco Campi, Simone Garatti", "tldr": "", "abstract": "Generalization bounds are valuable both for theory and applications. On the one hand, they shed light on the mechanisms that underpin the learning processes; on the other, they certify how well a learned model performs against unseen inputs. In this work we build upon a recent breakthrough in compression theory to develop a new framework yielding tight generalization bounds of wide practical applicability. The core idea is to embed any given learning algorithm into a suitably-constructed meta-algorithm (here called Pick-to-Learn, P2L) in order to instill desirable compression properties. When applied to the MNIST classification dataset and to a synthetic regression problem, P2L not only attains generalization bounds that compare favorably with the state of the art (test-set and PAC-Bayes bounds), but it also learns models with better post-training performance.", "keywords": "Statistical learning theory;Compression theory;Generalization bounds", "primary_area": "", "supplementary_material": "/attachment/ab80b2575286ae7148bb5c8a1d7c671d1bc6840a.zip", "author": "Dario Paccagnan;Marco Campi;Simone Garatti", "authorids": "~Dario_Paccagnan1;~Marco_Campi1;~Simone_Garatti1", "gender": "Not Specified;M;M", "homepage": "https://www.doc.ic.ac.uk/~dp414/;https://marco-campi.unibs.it/;https://garatti.faculty.polimi.it/", "dblp": "175/9343;89/2249;95/650", "google_scholar": "https://scholar.google.ch/citations?user=Y7wBGG8AAAAJ;;rUj9gRgAAAAJ", "orcid": ";;0000-0002-5451-6892", "linkedin": ";;", "or_profile": "~Dario_Paccagnan1;~Marco_Campi1;~Simone_Garatti1", "aff": "Imperial College London;University of Brescia;Polytechnic Institute of Milan", "aff_domain": "imperial.ac.uk;unibs.it;polimi.it", "position": "Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\npaccagnan2023the,\ntitle={The Pick-to-Learn Algorithm: Empowering Compression for Tight Generalization Bounds and Improved Post-training Performance},\nauthor={Dario Paccagnan and Marco Campi and Simone Garatti},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=40L3viVWQN}\n}", "github": "", "project": "", "reviewers": "q61g;pkHc;3uwv", "pdf_size": 565915, "rating": "6;6;7", "confidence": "4;3;3", "soundness": "4;3;3", "novelty": "3;2;3", "presentation": "3;3;3", "wc_summary": "648;128;271", "wc_strengths": "151;28;115", "wc_weaknesses": "743;19;327", "wc_questions": "34;198;341", "wc_limitations": "84;1;4", "wc_review": "1660;374;1058", "wc_reply_reviewers": "205;348;65", "wc_reply_authors": "435;286;286", "reply_reviewers": "2;2;1", "reply_authors": "3;3;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 349.0, 219.33687940395856 ], "wc_strengths_avg": [ 98.0, 51.633322573702344 ], "wc_weaknesses_avg": [ 363.0, 296.66591760205057 ], "wc_questions_avg": [ 191.0, 125.42992731667617 ], "wc_limitations_avg": [ 29.666666666666668, 38.43898484033567 ], "wc_review_avg": [ 1030.6666666666667, 525.3629433279647 ], "wc_reply_reviewers_avg": [ 206.0, 115.53643004120677 ], "wc_reply_authors_avg": [ 335.6666666666667, 70.23927359786371 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=627396318000285644&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "imperial.ac.uk;unibs.it;polimi.it", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Imperial College London;University of Brescia;Polytechnic Institute of Milan", "aff_unique_dep": ";;", "aff_unique_url": "https://www.imperial.ac.uk;https://www.unibs.it;https://www.polimi.it/", "aff_unique_abbr": "ICL;UNIBS;Politecnico di Milano", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United Kingdom;Italy" }, { "id": "42zI5dyNwc", "title": "A database-based rather than a language model-based natural language processing method", "track": "main", "status": "Reject", "tldr": "", "abstract": " Language models applied to NLP tasks take natural language as the direct modeling object. But we believe that natural language is essentially a way of encoding information, therefore, the object of study for natural language should be the information encoded in language, and the organizational and compositional structure of the information described in language. Based on this understanding, we propose a database-based natural language processing method that changes the modeling object from natural language to the information encoded in natural language. On this basis, the sentence generation task is transformed into read operations implemented on the database and some sentence encoding rules to be followed; The sentence understanding task is transformed into sentence decoding rules and a series of Boolean operations implemented on the database. Our method is closer to the information processing mechanism of the human brain and has excellent interpretability and scalability.", "keywords": "sentence generation;sentence understanding;relative spatial relationship;Tree-graph hybrid model.", "primary_area": "", "supplementary_material": "", "author": "Limin Zhang", "authorids": "~Limin_Zhang1", "gender": "F", "homepage": "", "dblp": "92/3585", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Limin_Zhang1", "aff": "Renmin University of China", "aff_domain": "math.edu.cn", "position": "MS student", "bibtex": "@misc{\nzhang2023a,\ntitle={A database-based rather than a language model-based natural language processing method},\nauthor={Limin Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=42zI5dyNwc}\n}", "github": "", "project": "", "reviewers": "s4Sj;zcb5;QFi3;fkGC", "site": "https://openreview.net/forum?id=42zI5dyNwc", "pdf_size": 924119, "rating": "1;1;3;4", "confidence": "5;5;4;4", "soundness": "2;1;2;1", "novelty": "1;1;1;2", "presentation": "2;3;3;2", "wc_summary": "36;89;106;37", "wc_strengths": "33;14;24;17", "wc_weaknesses": "15;265;17;19", "wc_questions": "14;2;10;17", "wc_limitations": "45;2;6;14", "wc_review": "143;372;163;104", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;67;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;0;1", "rating_avg": [ 2.25, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 1.5, 0.5 ], "novelty_avg": [ 1.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 67.0, 31.088583113419627 ], "wc_strengths_avg": [ 22.0, 7.314369419163897 ], "wc_weaknesses_avg": [ 79.0, 107.39646176667088 ], "wc_questions_avg": [ 10.75, 5.629165124598851 ], "wc_limitations_avg": [ 16.75, 16.872685026396955 ], "wc_review_avg": [ 195.5, 104.08770340438875 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 16.75, 29.011851026778693 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.9622504486493761, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KcP-jskoL94J:scholar.google.com/&scioq=A+database-based+rather+than+a+language+model-based+natural+language+processing+method&hl=en&as_sdt=0,44", "gs_version_total": 2, "aff_unique_index": "0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "A Unified Framework for U-Net Design and Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72877", "id": "43ruO2fMjq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/58575be50c9b47902359920a4d5d1ab4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=43ruO2fMjq", "openreview": "https://openreview.net/forum?id=43ruO2fMjq", "poster": "/media/PosterPDFs/NeurIPS%202023/72877.png?t=1702394503.579915", "slides": "https://nips.cc/virtual/2023/poster/72877", "video": "https://nips.cc/virtual/2023/poster/72877", "author_site": "Christopher Williams, Fabian Falck, George Deligiannidis, Chris C Holmes, Arnaud Doucet, Saifuddin Syed", "tldr": "", "abstract": "U-Nets are a go-to neural architecture across numerous tasks for continuous signals on a square such as images and Partial Differential Equations (PDE), however their design and architecture is understudied. In this paper, we provide a framework for designing and analysing general U-Net architectures. We present theoretical results which characterise the role of the encoder and decoder in a U-Net, their high-resolution scaling limits and their conjugacy to ResNets via preconditioning. We propose Multi-ResNets, U-Nets with a simplified, wavelet-based encoder without learnable parameters. Further, we show how to design novel U-Net architectures which encode function constraints, natural bases, or the geometry of the data. In diffusion models, our framework enables us to identify that high-frequency information is dominated by noise exponentially faster, and show how U-Nets with average pooling exploit this. In our experiments, we demonstrate how Multi-ResNets achieve competitive and often superior performance compared to classical U-Nets in image segmentation, PDE surrogate modelling, and generative modelling with diffusion models. Our U-Net framework paves the way to study the theoretical properties of U-Nets and design natural, scalable neural architectures for a multitude of problems beyond the square.", "keywords": "U-Net;ResNet;Multi-ResNet;Generalised U-Net;Wavelets;Diffusion models;Generative modelling;PDE Modelling;Image Segmentation", "primary_area": "", "supplementary_material": "", "author": "Christopher Williams;Fabian Falck;George Deligiannidis;Christopher C. Holmes;Arnaud Doucet;Saifuddin Syed", "authorids": "~Christopher_Williams4;~Fabian_Falck1;~George_Deligiannidis2;~Christopher_C._Holmes1;~Arnaud_Doucet2;~Saifuddin_Syed1", "gender": ";;M;M;;M", "homepage": ";;https://www.stats.ox.ac.uk/~deligian;;https://www.stats.ox.ac.uk/~doucet/;", "dblp": ";;;08/6129;68/1628;", "google_scholar": ";;https://scholar.google.co.uk/citations?user=EF1FwN4AAAAJ;;W4SZGV8AAAAJ;", "orcid": ";;;;0000-0002-7662-419X; 0000-0002-8499-8255", "linkedin": "chris-w-387b53198/;;;;;", "or_profile": "~Christopher_Williams4;~Fabian_Falck1;~George_Deligiannidis2;~Christopher_C._Holmes1;~Arnaud_Doucet2;~Saifuddin_Syed1", "aff": "University of Oxford;;Oxford, University of Oxford;University of Oxford;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;;stats.ox.ac.uk;ox.ac.uk;ox.ac.uk;oxford.ac.uk", "position": "PhD student;;Associate Professor;Full Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nwilliams2023a,\ntitle={A Unified Framework for U-Net Design and Analysis},\nauthor={Christopher Williams and Fabian Falck and George Deligiannidis and Christopher C. Holmes and Arnaud Doucet and Saifuddin Syed},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=43ruO2fMjq}\n}", "github": "", "project": "", "reviewers": "ndAy;7MgH;go4z;Q8QJ", "pdf_size": 12940997, "rating": "5;6;7;7", "confidence": "3;4;3;2", "soundness": "2;4;3;4", "novelty": "2;3;3;3", "presentation": "2;4;2;3", "wc_summary": "99;35;61;63", "wc_strengths": "64;65;74;222", "wc_weaknesses": "245;81;824;11", "wc_questions": "6;2;80;38", "wc_limitations": "8;2;1;82", "wc_review": "422;185;1040;416", "wc_reply_reviewers": "114;0;535;86", "wc_reply_authors": "101;0;1740;52", "reply_reviewers": "1;0;2;1", "reply_authors": "2;1;5;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 64.5, 22.776083947860748 ], "wc_strengths_avg": [ 106.25, 66.94167237229736 ], "wc_weaknesses_avg": [ 290.25, 319.6493195675536 ], "wc_questions_avg": [ 31.5, 31.284980421921315 ], "wc_limitations_avg": [ 23.25, 34.02480712656576 ], "wc_review_avg": [ 515.75, 317.40067343973925 ], "wc_reply_reviewers_avg": [ 183.75, 207.0994628191971 ], "wc_reply_authors_avg": [ 473.25, 732.2299416849874 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6869198474671818875&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "ox.ac.uk;;stats.ox.ac.uk;ox.ac.uk;ox.ac.uk;oxford.ac.uk", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Oxford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Accelerated On-Device Forward Neural Network Training with Module-Wise Descending Asynchronism", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72876", "id": "45RBLZBJid", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a42d8f43fae4d267e3084b10056153f7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=45RBLZBJid", "openreview": "https://openreview.net/forum?id=45RBLZBJid", "poster": "/media/PosterPDFs/NeurIPS%202023/72876.png?t=1702104733.6224148", "slides": "https://nips.cc/virtual/2023/poster/72876", "video": "https://nips.cc/virtual/2023/poster/72876", "author_site": "Xiaohan Zhao, Hualin Zhang, Zhouyuan Huo, Bin Gu", "tldr": "", "abstract": "On-device learning faces memory constraints when optimizing or fine-tuning on edge devices with limited resources. Current techniques for training deep models on edge devices rely heavily on backpropagation. However, its high memory usage calls for a reassessment of its dominance.\nIn this paper, we propose forward gradient descent (FGD) as a potential solution to overcome the memory capacity limitation in on-device learning. However, FGD's dependencies across layers hinder parallel computation and can lead to inefficient resource utilization.\nTo mitigate this limitation, we propose AsyncFGD, an asynchronous framework that decouples dependencies, utilizes module-wise stale parameters, and maximizes parallel computation. We demonstrate its convergence to critical points through rigorous theoretical analysis.\nEmpirical evaluations conducted on NVIDIA's AGX Orin, a popular embedded device, show that AsyncFGD reduces memory consumption and enhances hardware efficiency, offering a novel approach to on-device learning.", "keywords": "asynchronous algorithm;one-device learning;forward gradient descent;directional derivative;forward algorithms", "primary_area": "", "supplementary_material": "/attachment/614d0b25d42fcdd0a0a5c9abf4bbea9d033c5989.pdf", "author": "Xiaohan Zhao;Hualin Zhang;Zhouyuan Huo;Bin Gu", "authorids": "~Xiaohan_Zhao3;~Hualin_Zhang1;~Zhouyuan_Huo1;~Bin_Gu1", "gender": ";M;;M", "homepage": ";https://github.com/zhanghualin0;;https://mbzuai.ac.ae/study/faculty/bin-gu/", "dblp": ";303/7916;;29/1758-1", "google_scholar": ";;;Vo8OgCgAAAAJ", "orcid": ";;;0000-0001-6049-1815", "linkedin": ";;;", "or_profile": "~Xiaohan_Zhao3;~Hualin_Zhang1;~Zhouyuan_Huo1;~Bin_Gu1", "aff": ";NUIST;;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": ";nuist.edu.cn;;mbzuai.ac.ae", "position": ";MS student;;Assistant Professor", "bibtex": "@inproceedings{\nzhao2023accelerated,\ntitle={Accelerated On-Device Forward Neural Network Training with Module-Wise Descending Asynchronism},\nauthor={Xiaohan Zhao and Hualin Zhang and Zhouyuan Huo and Bin Gu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=45RBLZBJid}\n}", "github": "", "project": "", "reviewers": "bhPv;yH2P;Kesv;empf", "pdf_size": 1109982, "rating": "5;5;5;7", "confidence": "4;5;3;3", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "1;3;3;3", "wc_summary": "85;33;92;41", "wc_strengths": "48;28;52;40", "wc_weaknesses": "695;99;142;63", "wc_questions": "33;34;29;17", "wc_limitations": "8;6;4;28", "wc_review": "869;200;319;189", "wc_reply_reviewers": "144;0;74;0", "wc_reply_authors": "663;71;244;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;2;4;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 62.75, 26.022826518270456 ], "wc_strengths_avg": [ 42.0, 9.16515138991168 ], "wc_weaknesses_avg": [ 249.75, 258.5820711108951 ], "wc_questions_avg": [ 28.25, 6.7592529172978875 ], "wc_limitations_avg": [ 11.5, 9.630680142129112 ], "wc_review_avg": [ 394.25, 278.79685704828165 ], "wc_reply_reviewers_avg": [ 54.5, 59.85607738567572 ], "wc_reply_authors_avg": [ 244.5, 257.40289431162194 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18005932612432825228&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";nuist.edu.cn;;mbzuai.ac.ae", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Nanjing University of Information Science & Technology;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "http://www.nuist.edu.cn/;https://mbzuai.ac.ae", "aff_unique_abbr": "NUIST;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United Arab Emirates" }, { "title": "Unsupervised Protein-Ligand Binding Energy Prediction via Neural Euler's Rotation Equation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72875", "id": "46gYakmj4e", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6a45a1b0697ee086bd8bf494cacc6567-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=46gYakmj4e", "openreview": "https://openreview.net/forum?id=46gYakmj4e", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72875", "video": "https://nips.cc/virtual/2023/poster/72875", "author_site": "Wengong Jin, Siranush Sarkizova, Xun Chen, Nir HaCohen, Caroline Uhler", "tldr": "", "abstract": "Protein-ligand binding prediction is a fundamental problem in AI-driven drug discovery. Previous work focused on supervised learning methods for small molecules where binding affinity data is abundant, but it is hard to apply the same strategy to other ligand classes like antibodies where labelled data is limited. In this paper, we explore unsupervised approaches and reformulate binding energy prediction as a generative modeling task. Specifically, we train an energy-based model on a set of unlabelled protein-ligand complexes using SE(3) denoising score matching (DSM) and interpret its log-likelihood as binding affinity. Our key contribution is a new equivariant rotation prediction network called Neural Euler's Rotation Equations (NERE) for SE(3) DSM. It predicts a rotation by modeling the force and torque between protein and ligand atoms, where the force is defined as the gradient of an energy function with respect to atom coordinates. Using two protein-ligand and antibody-antigen binding affinity prediction benchmarks, we show that NERE outperforms all unsupervised baselines (physics-based potentials and protein language models) in both cases and surpasses supervised baselines in the antibody case.", "keywords": "Energy-based Models;Denoising Score Matching;Equivariant Neural Networks", "primary_area": "", "supplementary_material": "", "author": "Wengong Jin;Siranush Sarkizova;Xun Chen;Nir Hacohen;Caroline Uhler", "authorids": "~Wengong_Jin1;~Siranush_Sarkizova1;~Xun_Chen3;~Nir_Hacohen1;~Caroline_Uhler1", "gender": ";;M;M;F", "homepage": "http://people.csail.mit.edu/wengong;;;https://www.massgeneral.org/cancer-center/clinical-trials-and-research/center-for-cancer-research/investigators/hacohen-lab;https://www.carolineuhler.com/", "dblp": "173/6620;162/8972;;;66/10813", "google_scholar": "IE5D8_QAAAAJ;;PrmkiywAAAAJ;;https://scholar.google.com.tw/citations?user=dIJFcaoAAAAJ", "orcid": ";;0000-0001-6871-2758;;", "linkedin": ";;;;", "or_profile": "~Wengong_Jin1;~Siranush_Sarkizova1;~Xun_Chen3;~Nir_Hacohen1;~Caroline_Uhler1", "aff": "Broad Institute;Broad Institute;Broad Institute;Broad Institute;Electrical Engineering & Computer Science, Massachusetts Institute of Technology", "aff_domain": "broadinstitute.org;broadinstitute.org;broadinstitute.org;broadinstitute.org;eecs.mit.edu", "position": "Postdoc;Researcher;Postdoc;Full Professor;Associate Professor", "bibtex": "@inproceedings{\njin2023unsupervised,\ntitle={Unsupervised Protein-Ligand Binding Energy Prediction via Neural Euler's Rotation Equation},\nauthor={Wengong Jin and Siranush Sarkizova and Xun Chen and Nir Hacohen and Caroline Uhler},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=46gYakmj4e}\n}", "github": "", "project": "", "reviewers": "UbeA;3x52;j9Y6;X8hw;KYrm", "pdf_size": 8889336, "rating": "4;6;6;6;7", "confidence": "4;3;4;3;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "44;65;59;131;84", "wc_strengths": "43;29;59;175;92", "wc_weaknesses": "116;98;51;212;49", "wc_questions": "4;409;174;3;94", "wc_limitations": "5;43;31;1;33", "wc_review": "212;644;374;522;352", "wc_reply_reviewers": "11;174;102;27;180", "wc_reply_authors": "0;611;217;21;363", "reply_reviewers": "1;2;2;1;2", "reply_authors": "1;3;2;2;2", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 76.6, 30.070583632513685 ], "wc_strengths_avg": [ 79.6, 52.113721801460315 ], "wc_weaknesses_avg": [ 105.2, 59.44880150179649 ], "wc_questions_avg": [ 136.8, 150.23102209597056 ], "wc_limitations_avg": [ 22.6, 16.56019323558756 ], "wc_review_avg": [ 420.8, 148.71502950273722 ], "wc_reply_reviewers_avg": [ 98.8, 70.88413080513861 ], "wc_reply_authors_avg": [ 242.4, 227.52195498456848 ], "reply_reviewers_avg": [ 1.6, 0.4898979485566356 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6666666666666667, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16862033349656271572&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "email": "broadinstitute.org;broadinstitute.org;broadinstitute.org;broadinstitute.org;eecs.mit.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Broad Institute;Massachusetts Institute of Technology", "aff_unique_dep": ";Electrical Engineering & Computer Science", "aff_unique_url": "https://www.broadinstitute.org;https://web.mit.edu", "aff_unique_abbr": "Broad;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Zeroth-Order Methods for Nondifferentiable, Nonconvex, and Hierarchical Federated Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72874", "id": "46x3zvYCyQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0a70c9cd8179fe6f8f6135fafa2a8798-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=46x3zvYCyQ", "openreview": "https://openreview.net/forum?id=46x3zvYCyQ", "poster": "/media/PosterPDFs/NeurIPS%202023/72874.png?t=1699387657.060764", "slides": "https://nips.cc/virtual/2023/poster/72874", "video": "https://nips.cc/virtual/2023/poster/72874", "author_site": "Yuyang Qiu, Uday Shanbhag, Farzad Yousefian", "tldr": "", "abstract": "Federated learning (FL) has emerged as an enabling framework for communication-efficient decentralized training. We study three broadly applicable problem classes in FL: (i) Nondifferentiable nonconvex federated optimization; (ii) Federated bilevel optimization; (iii) Federated minimax problems. Notably, in an implicit sense, both (ii) and (iii) are instances of (i). However, the hierarchical problems in (ii) and (iii) are often complicated by the absence of a closed-form expression for the implicit objective function. Unfortunately, research on these problems has been limited and afflicted by reliance on strong assumptions, including the need for differentiability and L-smoothness of the implicit function. We address this shortcoming by making the following contributions. In (i), by leveraging convolution-based smoothing and Clarke\u2019s subdifferential calculus, we devise a randomized smoothing-enabled zeroth-order FL method and derive communication and iteration complexity guarantees for computing an approximate Clarke stationary point. To contend with (ii) and (iii), we devise a unified randomized implicit zeroth-order FL framework, equipped with explicit communication and iteration complexities. Importantly, our method utilizes delays during local steps to skip making calls to the inexact lower-level FL oracle. This results in significant reduction in communication overhead when addressing hierarchical problems. We empirically validate the theory on nonsmooth and hierarchical ML problems.", "keywords": "Federated Learning;Nonsmooth Optimization;Nonconvex Optimization;Bilevel Optimization", "primary_area": "", "supplementary_material": "/attachment/43fb44a97648aae8832a0eb68c7e0add81be000a.pdf", "author": "Yuyang Qiu;Uday Shanbhag;Farzad Yousefian", "authorids": "~Yuyang_Qiu1;~Uday_Shanbhag1;~Farzad_Yousefian1", "gender": "M;M;M", "homepage": "https://yuyangqiu2023.github.io/YuyangQiu/;https://udaybag2.github.io/;https://ise.rutgers.edu/farzad-yousefian", "dblp": "71/2784;76/8131.html;", "google_scholar": "0-HIaPUAAAAJ;https://scholar.google.com/citations?hl=en;N9pIpQQAAAAJ", "orcid": "0009-0002-6470-9708;;0000-0003-2628-741X", "linkedin": ";;", "or_profile": "~Yuyang_Qiu1;~Uday_Shanbhag1;~Farzad_Yousefian1", "aff": "Rutgers University;Pennsylvania State University;Rutgers University, New Brunswick", "aff_domain": "rutgers.edu;psu.edu;rutgers.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nqiu2023zerothorder,\ntitle={Zeroth-Order Methods for Nondifferentiable, Nonconvex, and Hierarchical Federated Optimization},\nauthor={Yuyang Qiu and Uday Shanbhag and Farzad Yousefian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=46x3zvYCyQ}\n}", "github": "", "project": "", "reviewers": "LNaU;CtdY;Lumm;U9HM", "pdf_size": 515799, "rating": "5;5;5;7", "confidence": "4;4;3;2", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;4;2;3", "wc_summary": "85;69;81;50", "wc_strengths": "11;28;52;25", "wc_weaknesses": "191;320;129;70", "wc_questions": "1;6;60;173", "wc_limitations": "15;1;4;84", "wc_review": "303;424;326;402", "wc_reply_reviewers": "127;98;0;12", "wc_reply_authors": "265;502;0;23", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 71.25, 13.608361400256829 ], "wc_strengths_avg": [ 29.0, 14.747881203752625 ], "wc_weaknesses_avg": [ 177.5, 92.73214113779537 ], "wc_questions_avg": [ 60.0, 69.22066165531791 ], "wc_limitations_avg": [ 26.0, 33.88952640566109 ], "wc_review_avg": [ 363.75, 50.51917952619579 ], "wc_reply_reviewers_avg": [ 59.25, 54.39381858262941 ], "wc_reply_authors_avg": [ 197.5, 204.16476189587664 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4176843773533247803&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "rutgers.edu;psu.edu;rutgers.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Rutgers University;Pennsylvania State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.rutgers.edu;https://www.psu.edu", "aff_unique_abbr": "Rutgers;PSU", "aff_campus_unique_index": "1", "aff_campus_unique": ";New Brunswick", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Lightweight Vision Transformer with Bidirectional Interaction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72873", "id": "492Hfmgejy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3170de57bc1899315b97712043d8bb22-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=492Hfmgejy", "openreview": "https://openreview.net/forum?id=492Hfmgejy", "poster": "/media/PosterPDFs/NeurIPS%202023/72873.png?t=1697464755.7946181", "slides": "https://nips.cc/virtual/2023/poster/72873", "video": "https://nips.cc/virtual/2023/poster/72873", "author_site": "Qihang Fan, Huaibo Huang, Xiaoqiang Zhou, Ran He", "tldr": "", "abstract": "Recent advancements in vision backbones have significantly improved their performance by simultaneously modeling images\u2019 local and global contexts. However, the bidirectional interaction between these two contexts has not been well explored and exploited, which is important in the human visual system. This paper proposes a **F**ully **A**daptive **S**elf-**A**ttention (FASA) mechanism for vision transformer to model the local and global information as well as the bidirectional interaction between them in context-aware ways. Specifically, FASA employs self-modulated convolutions to adaptively extract local representation while utilizing self-attention in down-sampled space to extract global representation. Subsequently, it conducts a bidirectional adaptation process between local and global representation to model their interaction. In addition, we introduce a fine-grained downsampling strategy to enhance the down-sampled self-attention mechanism for finer-grained global perception capability. Based on FASA, we develop a family of lightweight vision backbones, **F**ully **A**daptive **T**ransformer (FAT) family. Extensive experiments on multiple vision tasks demonstrate that FAT achieves impressive performance. Notably, FAT accomplishes a **77.6%** accuracy on ImageNet-1K using only **4.5M** parameters and **0.7G** FLOPs, which surpasses the most advanced ConvNets and Transformers with similar model size and computational costs. Moreover, our model exhibits faster speed on modern GPU compared to other models.", "keywords": "Vision Transformer;Lightweight Vision Backbone;Convolution Neural Network", "primary_area": "", "supplementary_material": "/attachment/c696014a7580f64bfb917f8dec219bbdf0c6a24a.zip", "author": "Qihang Fan;Huaibo Huang;Xiaoqiang Zhou;Ran He", "authorids": "~Qihang_Fan1;~Huaibo_Huang1;~Xiaoqiang_Zhou2;~Ran_He1", "gender": "M;M;M;M", "homepage": "https://github.com/qhfan;https://people.ucas.edu.cn/~huanghuaibo;https://xiaoqiangzhou.cn/;https://rhe-web.github.io/", "dblp": "344/3915;211/7251.html;13/1515;61/6198-1", "google_scholar": "9HGN_c0AAAAJ;XMvLciUAAAAJ;Z2BTkNIAAAAJ;ayrg9AUAAAAJ", "orcid": "0000-0002-6115-5503;0000-0001-5866-2283;;0000-0002-3807-991X", "linkedin": ";;;", "or_profile": "~Qihang_Fan1;~Huaibo_Huang1;~Xiaoqiang_Zhou2;~Ran_He1", "aff": "Tsinghua University;Institute of Automation, Chinese Academy of Sciences;University of Science and Technology of China;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "tsinghua.edu.cn;ia.ac.cn;ustc.edu;ia.ac.cn", "position": "Undergrad student;Associate Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nfan2023lightweight,\ntitle={Lightweight Vision Transformer with Bidirectional Interaction},\nauthor={Qihang Fan and Huaibo Huang and Xiaoqiang Zhou and Ran He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=492Hfmgejy}\n}", "github": "", "project": "", "reviewers": "DXVA;cvP4;DKrb;c9kv", "pdf_size": 1612339, "rating": "6;6;6;7", "confidence": "5;3;5;4", "soundness": "3;3;2;2", "novelty": "2;3;2;2", "presentation": "3;3;3;3", "wc_summary": "58;69;55;37", "wc_strengths": "19;80;40;35", "wc_weaknesses": "120;211;114;285", "wc_questions": "3;98;67;2", "wc_limitations": "3;9;39;1", "wc_review": "203;467;315;360", "wc_reply_reviewers": "31;55;12;39", "wc_reply_authors": "53;306;37;51", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 54.75, 11.497282287566918 ], "wc_strengths_avg": [ 43.5, 22.45551157288562 ], "wc_weaknesses_avg": [ 182.5, 70.56380091803446 ], "wc_questions_avg": [ 42.5, 41.47589661478098 ], "wc_limitations_avg": [ 13.0, 15.297058540778355 ], "wc_review_avg": [ 336.25, 94.69259474742468 ], "wc_reply_reviewers_avg": [ 34.25, 15.481844205391036 ], "wc_reply_authors_avg": [ 111.75, 112.31957754550183 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2930684057731359219&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "tsinghua.edu.cn;ia.ac.cn;ustc.edu;ia.ac.cn", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Tsinghua University;Chinese Academy of Sciences;University of Science and Technology of China", "aff_unique_dep": ";Institute of Automation;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ia.cas.cn;http://www.ustc.edu.cn", "aff_unique_abbr": "THU;CAS;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Direct Diffusion Bridge using Data Consistency for Inverse Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72872", "id": "497CevPdOg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/165b0e600b1721bd59526131eb061092-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=497CevPdOg", "openreview": "https://openreview.net/forum?id=497CevPdOg", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72872", "video": "https://nips.cc/virtual/2023/poster/72872", "author_site": "Hyungjin Chung, Jeongsol Kim, Jong Chul Ye", "tldr": "", "abstract": "Diffusion model-based inverse problem solvers have shown impressive performance, but are limited in speed, mostly as they require reverse diffusion sampling starting from noise. Several recent works have tried to alleviate this problem by building a diffusion process, directly bridging the clean and the corrupted for specific inverse problems. In this paper, we first unify these existing works under the name Direct Diffusion Bridges (DDB), showing that while motivated by different theories, the resulting algorithms only differ in the choice of parameters. Then, we highlight a critical limitation of the current DDB framework, namely that it does not ensure data consistency. To address this problem, we propose a modified inference procedure that imposes data consistency without the need for fine-tuning. We term the resulting method data Consistent DDB (CDDB), which outperforms its inconsistent counterpart in terms of both perception and distortion metrics, thereby effectively pushing the Pareto-frontier toward the optimum. Our proposed method achieves state-of-the-art results on both evaluation criteria, showcasing its superiority over existing methods. Code is open-sourced [here](https://github.com/HJ-harry/CDDB).", "keywords": "Diffusion models;Inverse problems;Diffusion bridge", "primary_area": "", "supplementary_material": "/attachment/28afb6d169025ed3bf29885300c5870b4c23a983.pdf", "author": "Hyungjin Chung;Jeongsol Kim;Jong Chul Ye", "authorids": "~Hyungjin_Chung1;~Jeongsol_Kim1;~Jong_Chul_Ye1", "gender": "M;M;M", "homepage": "https://www.hj-chung.com/;https://bispl.weebly.com/;https://bispl.weebly.com/", "dblp": "262/0382;282/3103;15/5613", "google_scholar": "https://scholar.google.co.kr/citations?user=KdchEyoAAAAJ;ZaVNwcQAAAAJ;HNMjoNEAAAAJ", "orcid": "0000-0003-3202-0893;;", "linkedin": "hyungjin-chung-060b42148/;;", "or_profile": "~Hyungjin_Chung1;~Jeongsol_Kim1;~Jong_Chul_Ye1", "aff": "NVIDIA;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "nvidia.com;kaist.ac.kr;kaist.ac.kr", "position": "Intern;PhD student;Full Professor", "bibtex": "@inproceedings{\nchung2023direct,\ntitle={Direct Diffusion Bridge using Data Consistency for Inverse Problems},\nauthor={Hyungjin Chung and Jeongsol Kim and Jong Chul Ye},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=497CevPdOg}\n}", "github": "", "project": "", "reviewers": "Qo4y;7H1C;TLjQ;UzBK", "pdf_size": 1519814, "rating": "5;5;6;7", "confidence": "2;3;4;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "58;103;45;159", "wc_strengths": "32;62;77;113", "wc_weaknesses": "82;332;434;328", "wc_questions": "6;281;31;180", "wc_limitations": "8;6;18;1", "wc_review": "186;784;605;781", "wc_reply_reviewers": "43;156;0;467", "wc_reply_authors": "0;0;0;589", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 91.25, 44.64512851364637 ], "wc_strengths_avg": [ 71.0, 29.16333314283537 ], "wc_weaknesses_avg": [ 294.0, 129.56079653969405 ], "wc_questions_avg": [ 124.5, 112.20182707959795 ], "wc_limitations_avg": [ 8.25, 6.179603547154137 ], "wc_review_avg": [ 589.0, 243.6975584613026 ], "wc_reply_reviewers_avg": [ 166.5, 182.6095561573928 ], "wc_reply_authors_avg": [ 147.25, 255.04448141451718 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7455737273404594469&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "nvidia.com;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "NVIDIA;Korea Advanced Institute of Science and Technology", "aff_unique_dep": "NVIDIA Corporation;", "aff_unique_url": "https://www.nvidia.com;https://www.kaist.ac.kr", "aff_unique_abbr": "NVIDIA;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;South Korea" }, { "title": "On the Exploitability of Instruction Tuning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72871", "id": "4AQ4Fnemox", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c2a8060fd22744b38177d9e428a052e0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4AQ4Fnemox", "openreview": "https://openreview.net/forum?id=4AQ4Fnemox", "poster": "/media/PosterPDFs/NeurIPS%202023/72871.png?t=1701306042.5043871", "slides": "https://nips.cc/virtual/2023/poster/72871", "video": "https://nips.cc/virtual/2023/poster/72871", "author_site": "Manli Shu, Jiongxiao Wang, Chen Zhu, Chen Zhu, Jonas Geiping, Chaowei Xiao, Tom Goldstein", "tldr": "", "abstract": "Instruction tuning is an effective technique to align large language models (LLMs) with human intent. In this work, we investigate how an adversary can exploit instruction tuning by injecting specific instruction-following examples into the training data that intentionally changes the model's behavior. For example, an adversary can achieve content injection by injecting training examples that mention target content and eliciting such behavior from downstream models. To achieve this goal, we propose \\textit{AutoPoison}, an automated data poisoning pipeline. It naturally and coherently incorporates versatile attack goals into poisoned data with the help of an oracle LLM. We showcase two example attacks: content injection and over-refusal attacks, each aiming to induce a specific exploitable behavior. We quantify and benchmark the strength and the stealthiness of our data poisoning scheme. Our results show that AutoPoison allows an adversary to change a model's behavior by poisoning only a small fraction of data while maintaining a high level of stealthiness in the poisoned examples. We hope our work sheds light on how data quality affects the behavior of instruction-tuned models and raises awareness of the importance of data quality for responsible deployments of LLMs.", "keywords": "Trustworthy machine learning;Large language models;Supervised fine-tuning;instruction tuning", "primary_area": "", "supplementary_material": "/attachment/8a2e07e6d9f4917594387aae8f024c00552c2f74.zip", "author": "Manli Shu;Jiongxiao Wang;Chen Zhu;Jonas Geiping;Chaowei Xiao;Tom Goldstein", "authorids": "~Manli_Shu1;~Jiongxiao_Wang1;~Chen_Zhu2;~Jonas_Geiping1;~Chaowei_Xiao2;~Tom_Goldstein1", "gender": "F;;M;M;M;M", "homepage": "https://azshue.github.io/;https://jayfeather1024.github.io/jxwang.github.io/;http://www.cs.umd.edu/~chenzhu/;https://jonasgeiping.github.io/;https://www.cs.umd.edu/~tomg/;https://xiaocw11.github.io/", "dblp": "263/3503;322/5991;59/10522-1.html;190/7229;25/8184;150/3317", "google_scholar": "https://scholar.google.com/citations?hl=en;sIGapHMAAAAJ;m-om5O8AAAAJ;https://scholar.google.de/citations?user=206vNCEAAAAJ;KmSuVtgAAAAJ;Juoqtj8AAAAJ", "orcid": ";;;;;0000-0002-7043-4926", "linkedin": "manli-shu-a804a8164/;;;;;", "or_profile": "~Manli_Shu1;~Jiongxiao_Wang1;~Chen_Zhu2;~Jonas_Geiping1;~Tom_Goldstein1;~chaowei_xiao1", "aff": "Department of Computer Science, University of Maryland, College Park;Arizona State University;Google;University of Maryland, College Park;University of Maryland, College Park;Arizona State University", "aff_domain": "cs.umd.edu;asu.edu;google.com;umd.edu;umd.edu;asu.edu", "position": "PhD student;PhD student;Research Scientist;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nshu2023on,\ntitle={On the Exploitability of Instruction Tuning},\nauthor={Manli Shu and Jiongxiao Wang and Chen Zhu and Jonas Geiping and Chaowei Xiao and Tom Goldstein},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4AQ4Fnemox}\n}", "github": "", "project": "", "reviewers": "oPzU;9ytD;AQ2w;uhYd", "pdf_size": 815907, "rating": "6;6;7;7", "confidence": "5;4;4;4", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "52;66;55;79", "wc_strengths": "27;113;43;92", "wc_weaknesses": "75;364;168;108", "wc_questions": "4;58;10;237", "wc_limitations": "14;6;15;5", "wc_review": "172;607;291;521", "wc_reply_reviewers": "40;16;34;75", "wc_reply_authors": "0;0;0;81", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 63.0, 10.606601717798213 ], "wc_strengths_avg": [ 68.75, 35.01696017646306 ], "wc_weaknesses_avg": [ 178.75, 112.02985093268668 ], "wc_questions_avg": [ 77.25, 94.57635803941702 ], "wc_limitations_avg": [ 10.0, 4.527692569068709 ], "wc_review_avg": [ 397.75, 174.1656897899239 ], "wc_reply_reviewers_avg": [ 41.25, 21.39363223017541 ], "wc_reply_authors_avg": [ 20.25, 35.074028853269766 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 105, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14521585596237389103&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "cs.umd.edu;asu.edu;google.com;umd.edu;umd.edu;asu.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;3;1", "aff_unique_norm": "University of Maryland, College Park;Arizona State University;Google;University of Maryland", "aff_unique_dep": "Department of Computer Science;;Google;", "aff_unique_url": "https://www/umd.edu;https://www.asu.edu;https://www.google.com;https://www/umd.edu", "aff_unique_abbr": "UMD;ASU;Google;UMD", "aff_campus_unique_index": "0;2;0;0", "aff_campus_unique": "College Park;;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Block-Coordinate Methods and Restarting for Solving Extensive-Form Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72870", "id": "4AmJVaJ78I", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2280faacd674566a5eace1bd1098f507-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4AmJVaJ78I", "openreview": "https://openreview.net/forum?id=4AmJVaJ78I", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72870", "video": "https://nips.cc/virtual/2023/poster/72870", "author_site": "Darshan Chakrabarti, Jelena Diakonikolas, Christian Kroer", "tldr": "", "abstract": "Coordinate descent methods are popular in machine learning and optimization for their simple sparse updates and excellent practical performance. \nIn the context of large-scale sequential game solving, these same properties would be attractive, but until now no such methods were known, because the strategy spaces do not satisfy the typical separable block structure exploited by such methods.\nWe present the first cyclic coordinate-descent-like method for the polytope of sequence-form strategies, which form the strategy spaces for the players in an extensive-form game (EFG). \nOur method exploits the recursive structure of the proximal update induced by what are known as dilated regularizers, in order to allow for a pseudo block-wise update.\nWe show that our method enjoys a O(1/T) convergence rate to a two-player zero-sum Nash equilibrium, while avoiding the worst-case polynomial scaling with the number of blocks common to cyclic methods. We empirically show that our algorithm usually performs better than other state-of-the-art first-order methods (i.e., mirror prox), and occasionally can even beat CFR$^+$, a state-of-the-art algorithm for numerical equilibrium computation in zero-sum EFGs. \nWe then introduce a restarting heuristic for EFG solving. We show empirically that restarting can lead to speedups, sometimes huge, both for our cyclic method, as well as for existing methods such as mirror prox and predictive CFR$^+$.", "keywords": "extensive-form games;first-order methods;coordinate descent", "primary_area": "", "supplementary_material": "/attachment/01d39da5a87292e8808d2e59529e8fe2ae0b6a0c.pdf", "author": "Darshan Chakrabarti;Jelena Diakonikolas;Christian Kroer", "authorids": "~Darshan_Chakrabarti1;~Jelena_Diakonikolas2;~Christian_Kroer1", "gender": "M;F;M", "homepage": "https://darshanchakrabarti.com/;http://www.jelena-diakonikolas.com/;http://www.columbia.edu/~ck2945/", "dblp": "270/2232;147/5178;64/10660", "google_scholar": "Q6w5s2sAAAAJ;J8ixfu8AAAAJ;https://scholar.google.ch/citations?user=ckHwjPAAAAAJ", "orcid": "0000-0002-3907-044X;0000-0003-3439-0310;0000-0002-9009-8683", "linkedin": ";;", "or_profile": "~Darshan_Chakrabarti1;~Jelena_Diakonikolas2;~Christian_Kroer1", "aff": "Columbia University;University of Wisconsin, Madison;Columbia University", "aff_domain": "columbia.edu;wisc.edu;columbia.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nchakrabarti2023blockcoordinate,\ntitle={Block-Coordinate Methods and Restarting for Solving Extensive-Form Games},\nauthor={Darshan Chakrabarti and Jelena Diakonikolas and Christian Kroer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4AmJVaJ78I}\n}", "github": "", "project": "", "reviewers": "XCKi;u8q2;c8Uw;rJqj;udTT;Y4SP;Lrca", "pdf_size": 3877642, "rating": "5;5;5;6;6;7;7", "confidence": "1;1;3;3;3;3;1", "soundness": "2;3;2;3;3;3;3", "novelty": "2;2;3;3;2;3;3", "presentation": "2;3;2;3;4;3;3", "wc_summary": "21;62;78;60;149;76;197", "wc_strengths": "23;39;53;41;76;65;42", "wc_weaknesses": "175;91;78;237;209;107;57", "wc_questions": "1;71;94;1;88;7;1", "wc_limitations": "9;30;54;12;11;1;20", "wc_review": "229;293;357;351;533;256;317", "wc_reply_reviewers": "5;5;0;18;90;5;25", "wc_reply_authors": "0;0;0;0;243;0;0", "reply_reviewers": "1;1;0;1;1;1;1", "reply_authors": "1;1;1;1;2;1;1", "rating_avg": [ 5.857142857142857, 0.8329931278350429 ], "confidence_avg": [ 2.142857142857143, 0.989743318610787 ], "soundness_avg": [ 2.7142857142857144, 0.4517539514526256 ], "novelty_avg": [ 2.5714285714285716, 0.49487165930539345 ], "presentation_avg": [ 2.857142857142857, 0.6388765649999398 ], "wc_summary_avg": [ 91.85714285714286, 55.66591306671454 ], "wc_strengths_avg": [ 48.42857142857143, 16.421737709138917 ], "wc_weaknesses_avg": [ 136.28571428571428, 64.94110361531833 ], "wc_questions_avg": [ 37.57142857142857, 41.04303366278982 ], "wc_limitations_avg": [ 19.571428571428573, 16.39561912617551 ], "wc_review_avg": [ 333.7142857142857, 92.19345233291367 ], "wc_reply_reviewers_avg": [ 21.142857142857142, 29.264452211946598 ], "wc_reply_authors_avg": [ 34.714285714285715, 85.03228678518747 ], "reply_reviewers_avg": [ 0.8571428571428571, 0.3499271061118826 ], "reply_authors_avg": [ 1.1428571428571428, 0.3499271061118826 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.19802950859533489, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2061294773412976849&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "columbia.edu;wisc.edu;columbia.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Columbia University;University of Wisconsin", "aff_unique_dep": ";", "aff_unique_url": "https://www.columbia.edu;https://www.wisc.edu", "aff_unique_abbr": "Columbia;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Removing Hidden Confounding in Recommendation: A Unified Multi-Task Learning Approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72869", "id": "4IWJZjbRFj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ab3f114401f0523ca1cc09de0621f400-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4IWJZjbRFj", "openreview": "https://openreview.net/forum?id=4IWJZjbRFj", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72869", "video": "https://nips.cc/virtual/2023/poster/72869", "author_site": "Haoxuan Li, Kunhan Wu, Chunyuan Zheng, Yanghao Xiao, Hao Wang, Zhi Geng, Fuli Feng, Xiangnan He, Peng Wu", "tldr": "", "abstract": "In recommender systems, the collected data used for training is always subject to selection bias, which poses a great challenge for unbiased learning. Previous studies proposed various debiasing methods based on observed user and item features, but ignored the effect of hidden confounding. To address this problem, recent works suggest the use of sensitivity analysis for worst-case control of the unknown true propensity, but only valid when the true propensity is near to the nominal propensity within a finite bound. In this paper, we first perform theoretical analysis to reveal the possible failure of previous approaches, including propensity-based, multi-task learning, and bi-level optimization methods, in achieving unbiased learning when hidden confounding is present. Then, we propose a unified multi-task learning approach to remove hidden confounding, which uses a few unbiased ratings to calibrate the learned nominal propensities and nominal error imputations from biased data. We conduct extensive experiments on three publicly available benchmark datasets containing a fully exposed large-scale industrial dataset, validating the effectiveness of the proposed methods in removing hidden confounding.", "keywords": "Debiased recommender system;Multi-task learning;Causal inference", "primary_area": "", "supplementary_material": "", "author": "Haoxuan Li;Kunhan Wu;Chunyuan Zheng;Yanghao Xiao;Hao Wang;Zhi Geng;Fuli Feng;Xiangnan He;Peng Wu", "authorids": "~Haoxuan_Li6;~Kunhan_Wu1;~Chunyuan_Zheng1;~Yanghao_Xiao1;~Hao_Wang28;~Zhi_Geng1;~Fuli_Feng1;~Xiangnan_He1;~Peng_Wu5", "gender": "M;;M;;;M;M;M;M", "homepage": "https://haoxuanli-pku.github.io/;;;;;https://stxy.btbu.edu.cn/szdw/bssds/34339356074b408c8650309f05f24558.htm;https://fulifeng.github.io/;http://staff.ustc.edu.cn/~hexn;https://pengwu.site/", "dblp": "145/4965-1.html;;;322/6462;;;183/9198;59/1007;15/6146-12", "google_scholar": "gtDqiucAAAAJ;;https://scholar.google.com/citations?hl=en;hzfFzKUAAAAJ;;;https://scholar.google.com.sg/citations?user=QePM4u8AAAAJ;https://scholar.google.com.sg/citations?user=X45Go24AAAAJ;https://scholar.google.com/citations?view_op=list_works", "orcid": "0000-0003-3620-3769;0000-0002-8456-350X;0000-0002-0306-7310;0000-0001-9929-4448;;;0000-0002-5828-9842;0000-0001-8472-7992;0000-0001-7154-8880", "linkedin": ";;;;;;;;", "or_profile": "~Haoxuan_Li6;~Kunhan_Wu1;~Chunyuan_Zheng1;~Yanghao_Xiao1;~Hao_Wang28;~Zhi_Geng1;~Fuli_Feng1;~Xiangnan_He1;~Peng_Wu5", "aff": "Peking University;Carnegie Mellon University;Department of Computer Science, University of Illinois at Urbana-Champaign;University of Chinese Academy of Sciences;;School of mathematical Science, Peking University, Peking University;University of Science and Technology of China;University of Science and Technology of China;Beijing Technology and Business University", "aff_domain": "pku.edu.cn;cmu.edu;cs.illinois.edu;ucas.ac.cn;;math.pku.edu.cn;ustc.edu.cn;ustc.edu.cn;btbu.edu.cn", "position": "PhD student;MS student;MS student;PhD student;;Full Professor;Full Professor;Professor;Associate Professor", "bibtex": "@inproceedings{\nli2023removing,\ntitle={Removing Hidden Confounding in Recommendation: A Unified Multi-Task Learning Approach},\nauthor={Haoxuan Li and Kunhan Wu and Chunyuan Zheng and Yanghao Xiao and Hao Wang and Zhi Geng and Fuli Feng and Xiangnan He and Peng Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4IWJZjbRFj}\n}", "github": "", "project": "", "reviewers": "KtZ4;csfC;J6YV;Uwjf;y77Q", "pdf_size": 2937388, "rating": "4;5;6;6;6", "confidence": "4;4;4;3;3", "soundness": "3;3;3;3;2", "novelty": "2;3;3;3;3", "presentation": "3;3;4;3;2", "wc_summary": "49;158;68;66;88", "wc_strengths": "57;59;116;70;81", "wc_weaknesses": "209;105;97;50;117", "wc_questions": "4;38;58;25;66", "wc_limitations": "8;57;1;12;1", "wc_review": "327;417;340;223;353", "wc_reply_reviewers": "57;30;0;0;21", "wc_reply_authors": "49;34;0;0;34", "reply_reviewers": "1;1;0;0;1", "reply_authors": "2;2;1;1;2", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 85.8, 38.15966456875637 ], "wc_strengths_avg": [ 76.6, 21.490463001061656 ], "wc_weaknesses_avg": [ 115.6, 51.93688477373282 ], "wc_questions_avg": [ 38.2, 22.4 ], "wc_limitations_avg": [ 15.8, 21.027600909281116 ], "wc_review_avg": [ 332.0, 62.66737588251163 ], "wc_reply_reviewers_avg": [ 21.6, 21.247117451550928 ], "wc_reply_authors_avg": [ 23.4, 19.875613198087752 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.6123724356957947, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9407503735822325447&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;cmu.edu;cs.illinois.edu;ucas.ac.cn;;math.pku.edu.cn;ustc.edu.cn;ustc.edu.cn;btbu.edu.cn", "author_num": 9, "aff_unique_index": "0;1;2;3;0;4;4;5", "aff_unique_norm": "Peking University;Carnegie Mellon University;University of Illinois Urbana-Champaign;University of Chinese Academy of Sciences;University of Science and Technology of China;Beijing Technology and Business University", "aff_unique_dep": ";;Department of Computer Science;;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.cmu.edu;https://illinois.edu;http://www.ucas.ac.cn;http://www.ustc.edu.cn;http://www.btbu.edu.cn", "aff_unique_abbr": "Peking U;CMU;UIUC;UCAS;USTC;BTBU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Urbana-Champaign;Peking", "aff_country_unique_index": "0;1;1;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Learning to Receive Help: Intervention-Aware Concept Embedding Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72868", "id": "4ImZxqmT1K", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/770cabd044c4eacb6dc5924d9a686dce-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4ImZxqmT1K", "openreview": "https://openreview.net/forum?id=4ImZxqmT1K", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72868", "video": "https://nips.cc/virtual/2023/poster/72868", "author_site": "Mateo Espinosa Zarlenga, Katie Collins, Krishnamurthy Dvijotham, Adrian Weller, Zohreh Shams, Mateja Jamnik", "tldr": "", "abstract": "Concept Bottleneck Models (CBMs) tackle the opacity of neural architectures by constructing and explaining their predictions using a set of high-level concepts. A special property of these models is that they permit concept interventions, wherein users can correct mispredicted concepts and thus improve the model's performance. Recent work, however, has shown that intervention efficacy can be highly dependent on the order in which concepts are intervened on and on the model's architecture and training hyperparameters. We argue that this is rooted in a CBM's lack of train-time incentives for the model to be appropriately receptive to concept interventions. To address this, we propose Intervention-aware Concept Embedding models (IntCEMs), a novel CBM-based architecture and training paradigm that improves a model's receptiveness to test-time interventions. Our model learns a concept intervention policy in an end-to-end fashion from where it can sample meaningful intervention trajectories at train-time. This conditions IntCEMs to effectively select and receive concept interventions when deployed at test-time. Our experiments show that IntCEMs significantly outperform state-of-the-art concept-interpretable models when provided with test-time concept interventions, demonstrating the effectiveness of our approach.", "keywords": "Explainable Artificial Intelligence;Concept Bottleneck Models;Concept-based Explainability;Interpretability;XAI;Concept Interventions", "primary_area": "", "supplementary_material": "/attachment/82a103495f8d1097640679212edd9bcff42ba3f7.zip", "author": "Mateo Espinosa Zarlenga;Katherine M. Collins;Krishnamurthy Dj Dvijotham;Adrian Weller;Zohreh Shams;Mateja Jamnik", "authorids": "~Mateo_Espinosa_Zarlenga1;~Katherine_M._Collins1;~Krishnamurthy_Dj_Dvijotham1;~Adrian_Weller1;~Zohreh_Shams1;~Mateja_Jamnik1", "gender": "M;F;;M;;F", "homepage": "https://mateoespinosa.github.io/;https://collinskatie.github.io/;;http://mlg.eng.cam.ac.uk/adrian/;;http://www.cl.cam.ac.uk/~mj201", "dblp": "307/3045.html;284/4959.html;;73/8324;;41/1392", "google_scholar": "4ikoEiMAAAAJ;48ZphCEAAAAJ;;https://scholar.google.co.uk/citations?user=Ek4hM10AAAAJ;;d5QiyJkAAAAJ", "orcid": ";0000-0002-7032-716X;;;;0000-0003-2772-2532", "linkedin": "mateoespinosa/;katie-collins-474121175/;;;;", "or_profile": "~Mateo_Espinosa_Zarlenga1;~Katherine_M._Collins1;~Krishnamurthy_Dj_Dvijotham1;~Adrian_Weller1;~Zohreh_Shams1;~Mateja_Jamnik1", "aff": "University of Cambridge;Google;;University of Cambridge;;University of Cambridge", "aff_domain": "cam.ac.uk;google.com;;cam.ac.uk;;cam.ac.uk", "position": "PhD student;Intern;;Principal Researcher;;Professor in Artificial Intelligence", "bibtex": "@inproceedings{\nzarlenga2023learning,\ntitle={Learning to Receive Help: Intervention-Aware Concept Embedding Models},\nauthor={Mateo Espinosa Zarlenga and Katherine M. Collins and Krishnamurthy Dj Dvijotham and Adrian Weller and Zohreh Shams and Mateja Jamnik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4ImZxqmT1K}\n}", "github": "", "project": "", "reviewers": "4KHx;Zgwp;23Ef;oMUi", "pdf_size": 1600555, "rating": "5;7;9;9", "confidence": "4;4;5;4", "soundness": "2;3;4;4", "novelty": "2;2;4;4", "presentation": "3;3;4;4", "wc_summary": "160;171;82;201", "wc_strengths": "144;50;131;117", "wc_weaknesses": "419;377;69;101", "wc_questions": "370;179;53;135", "wc_limitations": "41;26;1;41", "wc_review": "1134;803;336;595", "wc_reply_reviewers": "307;230;42;16", "wc_reply_authors": "320;460;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 7.5, 1.6583123951777 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 153.5, 43.92322847879013 ], "wc_strengths_avg": [ 110.5, 36.21118611699981 ], "wc_weaknesses_avg": [ 241.5, 157.609485755141 ], "wc_questions_avg": [ 184.25, 116.38594202050349 ], "wc_limitations_avg": [ 27.25, 16.345871038277526 ], "wc_review_avg": [ 717.0, 292.1172709717794 ], "wc_reply_reviewers_avg": [ 148.75, 123.14904587531322 ], "wc_reply_authors_avg": [ 195.0, 201.18399538730708 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2336598308180676909&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cam.ac.uk;google.com;;cam.ac.uk;;cam.ac.uk", "author_num": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Cambridge;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.cam.ac.uk;https://www.google.com", "aff_unique_abbr": "Cambridge;Google", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Cambridge;Mountain View", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Neural approximation of Wasserstein distance via a universal architecture for symmetric and factorwise group invariant functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72867", "id": "4JB42GBxGs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1e5f58d98523298cba093f658cfdf2d6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4JB42GBxGs", "openreview": "https://openreview.net/forum?id=4JB42GBxGs", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72867", "video": "https://nips.cc/virtual/2023/poster/72867", "author_site": "Samantha Chen, Yusu Wang", "tldr": "", "abstract": "Learning distance functions between complex objects, such as the Wasserstein distance to compare point sets, is a common goal in machine learning applications. However, functions on such complex objects (e.g., point sets and graphs) are often required to be invariant to a wide variety of group actions e.g. permutation or rigid transformation. \nTherefore, continuous and symmetric *product* functions (such as distance functions) on such complex objects must also be invariant to the *product* of such group actions. \nWe call these functions symmetric and factor-wise group invariant functions (or SGFI functions} in short).\nIn this paper, we first present a general neural network architecture for approximating SFGI functions. The main contribution of this paper combines this general NN with a sketching idea in order to develop a specific and efficient neural network which can approximate the $p$-th Wasserstein distance between point sets.\nVery importantly, the required model complexity is *independent* of the sizes of input point sets. \nOn the theoretical front, to the best of our knowledge, this is the first result showing that there exists a neural network with the capacity to approximate Wasserstein distance with bounded model complexity. Our work provides an interesting integration of sketching ideas for geometric problems with universal approximation of symmetric functions. \nOn the empirical front, we present a range of results showing that our newly proposed neural network architecture performs comparatively or better than other models (including a SOTA Siamese Autoencoder based approach). In particular, our NN generalizes significantly better and trains much faster than the SOTA Siamese AE.\nFinally, this line of investigation could be useful in exploring effective neural network design for solving a broad range of geometric optimization problems (e.g., $k$-means in a metric space).", "keywords": "neural networks;Wasserstein distance;universal approximation;optimal transport", "primary_area": "", "supplementary_material": "/attachment/17583b0d5b0472a150d817423b0c94bca0738f82.zip", "author": "Samantha Chen;Yusu Wang", "authorids": "~Samantha_Chen1;~Yusu_Wang1", "gender": ";", "homepage": "http://chens5.github.io;", "dblp": "211/1400;", "google_scholar": "W_tx4c0AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Samantha_Chen1;~Yusu_Wang1", "aff": "University of California, San Diego;", "aff_domain": "ucsd.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nchen2023neural,\ntitle={Neural approximation of Wasserstein distance via a universal architecture for symmetric and factorwise group invariant functions},\nauthor={Samantha Chen and Yusu Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4JB42GBxGs}\n}", "github": "", "project": "", "reviewers": "xMX9;1U84;vLSh;CxcX", "pdf_size": 359898, "rating": "5;5;6;6", "confidence": "4;4;5;4", "soundness": "3;3;4;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "144;69;78;80", "wc_strengths": "194;26;85;79", "wc_weaknesses": "116;76;75;49", "wc_questions": "3;179;429;43", "wc_limitations": "13;8;12;1", "wc_review": "470;358;679;252", "wc_reply_reviewers": "0;23;14;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 92.75, 29.877876430563134 ], "wc_strengths_avg": [ 96.0, 61.06144446375307 ], "wc_weaknesses_avg": [ 79.0, 23.947860029656095 ], "wc_questions_avg": [ 163.5, 166.59156641318913 ], "wc_limitations_avg": [ 8.5, 4.716990566028302 ], "wc_review_avg": [ 439.75, 158.18403048348463 ], "wc_reply_reviewers_avg": [ 9.25, 9.781998773256925 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6551200452570356098&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ucsd.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Effectively Learning Initiation Sets in Hierarchical Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72866", "id": "4JCVw8oMlf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e8da56eb93676e8f60ed2b696e44e7dc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4JCVw8oMlf", "openreview": "https://openreview.net/forum?id=4JCVw8oMlf", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72866", "video": "https://nips.cc/virtual/2023/poster/72866", "author_site": "Akhil Bagaria, Ben Abbatematteo, Ben Abbatematteo, Omer Gottesman, Matt Corsaro, Sreehari Rammohan, George Konidaris", "tldr": "", "abstract": "An agent learning an option in hierarchical reinforcement learning must solve three problems: identify the option's subgoal (termination condition), learn a policy, and learn where that policy will succeed (initiation set). The termination condition is typically identified first, but the option policy and initiation set must be learned simultaneously, which is challenging because the initiation set depends on the option policy, which changes as the agent learns. Consequently, data obtained from option execution becomes invalid over time, leading to an inaccurate initiation set that subsequently harms downstream task performance. We highlight three issues---data non-stationarity, temporal credit assignment, and pessimism---specific to learning initiation sets, and propose to address them using tools from off-policy value estimation and classification. We show that our method learns higher-quality initiation sets faster than existing methods (in MiniGrid and Montezuma's Revenge), can automatically discover promising grasps for robot manipulation (in Robosuite), and improves the performance of a state-of-the-art option discovery method in a challenging maze navigation task in MuJoCo.", "keywords": "hierarchical reinforcment learning", "primary_area": "", "supplementary_material": "/attachment/b5a53fbe6ab2564a169b30c67ddc33b12a96155b.pdf", "author": "Akhil Bagaria;Ben M Abbatematteo;Omer Gottesman;Matt Corsaro;Sreehari Rammohan;George Konidaris", "authorids": "~Akhil_Bagaria1;~Ben_M_Abbatematteo1;~Omer_Gottesman1;~Matt_Corsaro1;~Sreehari_Rammohan1;~George_Konidaris1", "gender": "M;M;M;M;M;M", "homepage": "https://abagaria.github.io;https://babbatem.github.io/;https://omergott.github.io/;https://mattcorsaro1.github.io/;https://sreeharirammohan.com/;http://cs.brown.edu/people/gdk/", "dblp": "155/9746;265/7692;;268/5826;277/6972;56/6762", "google_scholar": "obLF_-IAAAAJ;rz3VnGAAAAAJ;glNJx5zYUbsC;NZXTTVQAAAAJ;;9UERvVEAAAAJ", "orcid": ";;;;;", "linkedin": ";;;mattcorsaro;;", "or_profile": "~Akhil_Bagaria1;~Ben_M_Abbatematteo1;~Omer_Gottesman1;~Matt_Corsaro1;~Sreehari_Rammohan1;~George_Konidaris1", "aff": "Brown University;Brown University;Amazon;Brown University;Brown University;Brown University", "aff_domain": "brown.edu;brown.edu;amazon.com;brown.edu;brown.edu;brown.edu", "position": "PhD student;PhD student;Researcher;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nbagaria2023effectively,\ntitle={Effectively Learning Initiation Sets in Hierarchical Reinforcement Learning},\nauthor={Akhil Bagaria and Ben M Abbatematteo and Omer Gottesman and Matt Corsaro and Sreehari Rammohan and George Konidaris},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4JCVw8oMlf}\n}", "github": "", "project": "", "reviewers": "uZYk;zsHA;pGKy;y3Ji;RLJn", "pdf_size": 12239972, "rating": "3;3;5;7;7", "confidence": "5;3;2;4;4", "soundness": "2;3;3;4;3", "novelty": "2;2;3;4;3", "presentation": "2;3;2;3;3", "wc_summary": "195;141;112;96;47", "wc_strengths": "106;36;80;84;64", "wc_weaknesses": "391;153;82;86;280", "wc_questions": "219;47;135;7;29", "wc_limitations": "10;15;2;34;8", "wc_review": "921;392;411;307;428", "wc_reply_reviewers": "952;0;0;8;0", "wc_reply_authors": "1088;84;33;0;0", "reply_reviewers": "2;0;0;1;0", "reply_authors": "3;2;2;1;1", "rating_avg": [ 5.0, 1.7888543819998317 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 118.2, 49.02815517638819 ], "wc_strengths_avg": [ 74.0, 23.25510696599781 ], "wc_weaknesses_avg": [ 198.4, 119.99766664398103 ], "wc_questions_avg": [ 87.4, 78.85581779425029 ], "wc_limitations_avg": [ 13.8, 10.925200226998129 ], "wc_review_avg": [ 491.8, 218.59679778075434 ], "wc_reply_reviewers_avg": [ 192.0, 380.0126313690112 ], "wc_reply_authors_avg": [ 241.0, 424.61370679713104 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5624104172402289171&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "brown.edu;brown.edu;amazon.com;brown.edu;brown.edu;brown.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Brown University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.brown.edu;https://www.amazon.com", "aff_unique_abbr": "Brown;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "On the Variance, Admissibility, and Stability of Empirical Risk Minimization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72865", "id": "4KV2xLeqPN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7644353d580a9e027e0069d6480d971b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4KV2xLeqPN", "openreview": "https://openreview.net/forum?id=4KV2xLeqPN", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72865", "video": "https://nips.cc/virtual/2023/poster/72865", "author_site": "Gil Kur, Eli Putterman, Alexander Rakhlin", "tldr": "", "abstract": "It is well known that Empirical Risk Minimization (ERM) may attain minimax suboptimal rates in terms of the mean squared error (Birg\u00e9 and Massart, 1993). In this paper, we prove that, under relatively mild assumptions, the suboptimality of ERM must be due to its bias. Namely, the variance error term of ERM (in terms of the bias and variance decomposition) enjoys the minimax rate. In the fixed design setting, we provide an elementary proof of this result using the probabilistic method. Then, we extend our proof to the random design setting for various models. In addition, we provide a simple proof of Chatterjee\u2019s admissibility theorem (Chatterjee, 2014, Theorem 1.4), which states that in the fixed design setting, ERM cannot be ruled out as an optimal method, and then we extend this result to the random design setting. We also show that our estimates imply stability of ERM, complementing the main result of Caponnetto and Rakhlin (2006) for non-Donsker classes. Finally, we highlight the somewhat irregular nature of the loss landscape of ERM in the non-Donsker regime, by showing that functions can be close to ERM, in terms of $L_2$ distance, while still being far from almost-minimizers of the empirical loss.", "keywords": "empirical risk minimization;bias-variance decomposition;admissibility", "primary_area": "", "supplementary_material": "/attachment/9b0fd40e5940bdb2d2fff768d706d3559c046494.pdf", "author": "Gil Kur;Eli Putterman;Alexander Rakhlin", "authorids": "~Gil_Kur2;~Eli_Putterman1;~Alexander_Rakhlin1", "gender": "M;;M", "homepage": ";;http://www.mit.edu/~rakhlin/", "dblp": "236/4833;320/0730;59/407", "google_scholar": "yDkAhccAAAAJ;;https://scholar.google.com.tw/citations?user=fds2VpgAAAAJ", "orcid": ";0000-0002-3636-7547;", "linkedin": ";;", "or_profile": "~Gil_Kur2;~Eli_Putterman1;~Alexander_Rakhlin1", "aff": "Massachusetts Institute of Technology;Tel Aviv University;Massachusetts Institute of Technology", "aff_domain": "mit.edu;tau.ac.il;mit.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkur2023on,\ntitle={On the Variance, Admissibility, and Stability of Empirical Risk Minimization},\nauthor={Gil Kur and Eli Putterman and Alexander Rakhlin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4KV2xLeqPN}\n}", "github": "", "project": "", "reviewers": "vswn;XB3R;hXZP;ZyPC;DCZr", "pdf_size": 350758, "rating": "6;6;7;7;7", "confidence": "2;3;3;3;3", "soundness": "3;4;3;3;3", "novelty": "2;3;4;3;3", "presentation": "2;2;3;3;3", "wc_summary": "27;191;126;67;91", "wc_strengths": "61;55;34;28;122", "wc_weaknesses": "73;464;15;72;29", "wc_questions": "27;79;22;21;13", "wc_limitations": "18;1;1;7;4", "wc_review": "206;790;198;195;259", "wc_reply_reviewers": "0;73;6;11;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 100.4, 55.59712222768369 ], "wc_strengths_avg": [ 60.0, 33.37663853655727 ], "wc_weaknesses_avg": [ 130.6, 168.2814309423354 ], "wc_questions_avg": [ 32.4, 23.728463919942225 ], "wc_limitations_avg": [ 6.2, 6.305553108173778 ], "wc_review_avg": [ 329.6, 231.37208128899218 ], "wc_reply_reviewers_avg": [ 18.0, 27.806474066303338 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6123724356957946, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13538077449398386778&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "mit.edu;tau.ac.il;mit.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Tel Aviv University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.tau.ac.il", "aff_unique_abbr": "MIT;TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Israel" }, { "title": "When Does Confidence-Based Cascade Deferral Suffice?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72864", "id": "4KZhZJSPYU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1f09e1ee5035a4c3fe38a5681cae5815-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4KZhZJSPYU", "openreview": "https://openreview.net/forum?id=4KZhZJSPYU", "poster": "/media/PosterPDFs/NeurIPS%202023/72864.png?t=1701704039.5301597", "slides": "https://nips.cc/virtual/2023/poster/72864", "video": "https://nips.cc/virtual/2023/poster/72864", "author_site": "Wittawat Jitkrittum, Neha Gupta, Aditya Menon, Harikrishna Narasimhan, Ankit Rawat, Sanjiv Kumar", "tldr": "", "abstract": "Cascades are a classical strategy to enable inference cost to vary adaptively across samples, wherein a sequence of classifiers are invoked in turn. A deferral rule determines whether to invoke the next classifier in the sequence, or to terminate prediction. One simple deferral rule employs the confidence of the current classifier, e.g., based on the maximum predicted softmax probability. Despite being oblivious to the structure of the cascade --- e.g., not modelling the errors of downstream models --- such confidence-based deferral often works remarkably well in practice. In this paper, we seek to better understand the conditions under which confidence-based deferral may fail, and when alternate deferral strategies can perform better. We first present a theoretical characterisation of the optimal deferral rule, which precisely characterises settings under which confidence-based deferral may suffer. We then study post-hoc deferral mechanisms, and demonstrate they can significantly improve upon confidence-based deferral in settings where (i) downstream models are specialists that only work well on a subset of inputs, (ii) samples are subject to label noise, and (iii) there is distribution shift between the train and test set.", "keywords": "cascades;deferral rules;adaptive computation;model confidence", "primary_area": "", "supplementary_material": "/attachment/4a5de554f8b1448689f6261db5392f3e7271d0a3.pdf", "author": "Wittawat Jitkrittum;Neha Gupta;Aditya Krishna Menon;Harikrishna Narasimhan;Ankit Singh Rawat;Sanjiv Kumar", "authorids": "~Wittawat_Jitkrittum1;~Neha_Gupta1;~Aditya_Krishna_Menon1;~Harikrishna_Narasimhan1;~Ankit_Singh_Rawat1;~Sanjiv_Kumar1", "gender": "M;F;M;M;;M", "homepage": "http://wittawat.com;;https://hari-research.github.io/;https://ankitsrawat.github.io/home/;http://www.sanjivk.com/;https://akmenon.github.io/", "dblp": "95/3398.html;09/6861-2;56/7573;https://dblp.org/pers/hd/r/Rawat:Ankit_Singh;;89/3514", "google_scholar": "https://scholar.google.co.uk/citations?hl=en;;7X_oT4YAAAAJ;http://scholar.google.com/citations?user=U0_ab4cAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0002-9400-9262;;;;;", "linkedin": "wittawat-jitkrittum/;;;;;", "or_profile": "~Wittawat_Jitkrittum1;~Neha_Gupta1;~Harikrishna_Narasimhan1;~Ankit_Singh_Rawat1;~Sanjiv_Kumar1;~Aditya_Menon1", "aff": "Google Research;Google;Google;Google;Google;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com", "position": "Research Scientist;Researcher;Research Scientist;Research Scientist;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\njitkrittum2023when,\ntitle={When Does Confidence-Based Cascade Deferral Suffice?},\nauthor={Wittawat Jitkrittum and Neha Gupta and Aditya Krishna Menon and Harikrishna Narasimhan and Ankit Singh Rawat and Sanjiv Kumar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4KZhZJSPYU}\n}", "github": "", "project": "", "reviewers": "V2z7;ahRx;6Hfw;kqwH;UXcD", "pdf_size": 526161, "rating": "6;7;7;7;7", "confidence": "3;4;3;4;4", "soundness": "3;2;4;3;4", "novelty": "3;2;3;3;3", "presentation": "4;2;3;3;4", "wc_summary": "254;90;53;84;100", "wc_strengths": "184;35;33;27;83", "wc_weaknesses": "92;506;55;42;192", "wc_questions": "25;149;135;161;5", "wc_limitations": "7;9;6;0;8", "wc_review": "562;789;282;314;388", "wc_reply_reviewers": "0;536;14;12;17", "wc_reply_authors": "0;660;0;0;0", "reply_reviewers": "0;2;1;1;1", "reply_authors": "1;3;1;1;1", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 116.2, 70.6665408803912 ], "wc_strengths_avg": [ 72.4, 59.29451913963044 ], "wc_weaknesses_avg": [ 177.4, 172.51040548326355 ], "wc_questions_avg": [ 95.0, 66.1392470474226 ], "wc_limitations_avg": [ 6.0, 3.1622776601683795 ], "wc_review_avg": [ 467.0, 187.92764565119205 ], "wc_reply_reviewers_avg": [ 115.8, 210.17935198301473 ], "wc_reply_authors_avg": [ 132.0, 264.0 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6123724356957948, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5446995736922050463&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "google.com;google.com;google.com;google.com;google.com;google.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google Research", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Direction-oriented Multi-objective Learning: Simple and Provable Stochastic Algorithms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72863", "id": "4Ks8RPcXd9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0e5b96f97c1813bb75f6c28532c2ecc7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4Ks8RPcXd9", "openreview": "https://openreview.net/forum?id=4Ks8RPcXd9", "poster": "/media/PosterPDFs/NeurIPS%202023/72863.png?t=1701716971.751499", "slides": "https://nips.cc/virtual/2023/poster/72863", "video": "https://nips.cc/virtual/2023/poster/72863", "author_site": "Peiyao Xiao, Hao Ban, Hao Ban, Kaiyi Ji", "tldr": "", "abstract": "Multi-objective optimization (MOO) has become an influential framework in many machine learning problems with multiple objectives such as learning with multiple criteria and multi-task learning (MTL). In this paper, we propose a new direction-oriented multi-objective formulation by regularizing the common descent direction within a neighborhood of a direction that optimizes a linear combination of objectives such as the average loss in MTL or a weighted loss that places higher emphasis on some tasks than the others. This formulation includes GD and MGDA as special cases, enjoys the direction-oriented benefit as in CAGrad, and facilitates the design of stochastic algorithms. To solve this problem, we propose Stochastic Direction-oriented Multi-objective Gradient descent (SDMGrad) with simple SGD type of updates, and its variant SDMGrad-OS with an efficient objective sampling. We develop a comprehensive convergence analysis for the proposed methods with different loop sizes and regularization coefficients. We show that both SDMGrad and SDMGrad-OS achieve improved sample complexities to find an $\\epsilon$-accurate Pareto stationary point while achieving a small $\\epsilon$-level distance toward a conflict-avoidant (CA) direction. For a constant-level CA distance, their sample complexities match the best known $\\mathcal{O}(\\epsilon^{-2})$ without bounded function value assumption. Extensive experiments show that our methods achieve competitive or improved performance compared to existing gradient manipulation approaches in a series of tasks on multi-task supervised learning and reinforcement learning. Code is available at https://github.com/ml-opt-lab/sdmgrad.", "keywords": "Multi-objective optimization;multi-task leaning;stochastic algorithms;convergence and complexity;Pareto stationarity", "primary_area": "", "supplementary_material": "/attachment/8ac1ffe2ab8352fe1a3bcc273d6744a40f79018d.zip", "author": "Peiyao Xiao;Hao Ban;Kaiyi Ji", "authorids": "~Peiyao_Xiao1;~Hao_Ban1;~Kaiyi_Ji1", "gender": "M;;M", "homepage": "https://xiaopeiyao.github.io/index.html;;https://cse.buffalo.edu/~kaiyiji/", "dblp": ";;205/3164", "google_scholar": "_gf0LboAAAAJ;;E0A3lSIAAAAJ", "orcid": ";;", "linkedin": "xiao-peiyao-915430266;;", "or_profile": "~Peiyao_Xiao1;~Hao_Ban1;~Kaiyi_Ji1", "aff": "State University of New York at Buffalo;;State University of New York at Buffalo", "aff_domain": "buffalo.edu;;buffalo.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nxiao2023directionoriented,\ntitle={Direction-oriented Multi-objective Learning: Simple and Provable Stochastic Algorithms},\nauthor={Peiyao Xiao and Hao Ban and Kaiyi Ji},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4Ks8RPcXd9}\n}", "github": "", "project": "", "reviewers": "81UB;tz6S;ZeY5;5qpL;ft1r;QQfF", "pdf_size": 939803, "rating": "2;4;5;6;6;7", "confidence": "4;4;3;4;3;3", "soundness": "2;2;3;4;3;3", "novelty": "2;2;3;3;2;2", "presentation": "2;3;3;4;3;3", "wc_summary": "133;47;51;80;129;71", "wc_strengths": "26;15;54;93;94;54", "wc_weaknesses": "91;201;28;136;144;461", "wc_questions": "732;1;191;77;54;387", "wc_limitations": "1;3;3;5;1;29", "wc_review": "983;267;327;391;422;1002", "wc_reply_reviewers": "2050;0;209;336;18;312", "wc_reply_authors": "2213;63;768;18;23;580", "reply_reviewers": "3;0;2;3;1;3", "reply_authors": "5;2;3;2;2;4", "rating_avg": [ 5.0, 1.632993161855452 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 85.16666666666667, 34.304599626807416 ], "wc_strengths_avg": [ 56.0, 30.0055550412475 ], "wc_weaknesses_avg": [ 176.83333333333334, 137.5977430370458 ], "wc_questions_avg": [ 240.33333333333334, 253.10055094544714 ], "wc_limitations_avg": [ 7.0, 9.93310961716756 ], "wc_review_avg": [ 565.3333333333334, 306.01343470871046 ], "wc_reply_reviewers_avg": [ 487.5, 710.7282063724032 ], "wc_reply_authors_avg": [ 610.8333333333334, 773.4889390863252 ], "reply_reviewers_avg": [ 2.0, 1.1547005383792515 ], "reply_authors_avg": [ 3.0, 1.1547005383792515 ], "replies_avg": [ 43, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6123724356957946, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18263008337050310156&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "buffalo.edu;;buffalo.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "State University of New York at Buffalo", "aff_unique_dep": "", "aff_unique_url": "https://www.buffalo.edu", "aff_unique_abbr": "SUNY Buffalo", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Buffalo", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "FIRAL: An Active Learning Algorithm for Multinomial Logistic Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72862", "id": "4L2OlXhiTM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cfcadfe84ee49908cde1fc2992c38d20-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4L2OlXhiTM", "openreview": "https://openreview.net/forum?id=4L2OlXhiTM", "poster": "/media/PosterPDFs/NeurIPS%202023/72862.png?t=1701375045.856007", "slides": "https://nips.cc/virtual/2023/poster/72862", "video": "https://nips.cc/virtual/2023/poster/72862", "author_site": "Youguang Chen, George Biros", "tldr": "", "abstract": "We investigate theory and algorithms for pool-based active learning for multiclass classification using multinomial logistic regression. Using finite sample analysis, we prove that the Fisher Information Ratio (FIR) lower and upper bounds the excess risk. Based on our theoretical analysis, we propose an active learning algorithm that employs regret minimization to minimize the FIR. To verify our derived excess risk bounds, we conduct experiments on synthetic datasets. Furthermore, we compare FIRAL with five other methods and found that our scheme outperforms them: it consistently produces the smallest classification error in the multiclass logistic regression setting, as demonstrated through experiments on MNIST, CIFAR-10, and 50-class ImageNet.", "keywords": "statistical learning;active learning;logistic regression;regret minimization", "primary_area": "", "supplementary_material": "/attachment/4de530a35a46e1556b4ab6a992735c97b0fd95cb.zip", "author": "Youguang Chen;George Biros", "authorids": "~Youguang_Chen1;~George_Biros3", "gender": "M;M", "homepage": ";https://www.oden.utexas.edu/~biros", "dblp": "07/9180;", "google_scholar": "3_dHz4cAAAAJ;O4_jWCsAAAAJ", "orcid": ";0000-0002-0033-3994", "linkedin": ";", "or_profile": "~Youguang_Chen1;~George_Biros3", "aff": "University of Texas, Austin;The University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu", "position": "PhD student;Professor", "bibtex": "@inproceedings{\nchen2023firal,\ntitle={{FIRAL}: An Active Learning Algorithm for Multinomial Logistic Regression},\nauthor={Youguang Chen and George Biros},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4L2OlXhiTM}\n}", "github": "", "project": "", "reviewers": "B8jR;zZMa;C5TF;PwwV;3UFt", "pdf_size": 4737376, "rating": "6;6;6;7;7", "confidence": "3;3;3;3;3", "soundness": "3;3;4;3;4", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "89;89;66;79;198", "wc_strengths": "54;13;127;21;113", "wc_weaknesses": "427;49;218;22;203", "wc_questions": "78;81;85;15;178", "wc_limitations": "46;38;1;1;4", "wc_review": "694;270;497;138;696", "wc_reply_reviewers": "240;23;0;75;75", "wc_reply_authors": "367;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 104.2, 47.65459054487825 ], "wc_strengths_avg": [ 65.6, 46.70588827974477 ], "wc_weaknesses_avg": [ 183.8, 144.93777975393442 ], "wc_questions_avg": [ 87.4, 52.12523381242524 ], "wc_limitations_avg": [ 18.0, 19.788885769542457 ], "wc_review_avg": [ 459.0, 224.32119828495925 ], "wc_reply_reviewers_avg": [ 82.6, 83.98237910419067 ], "wc_reply_authors_avg": [ 73.4, 146.79999999999998 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6227026298396197673&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "utexas.edu;utexas.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Object-centric Learning with Cyclic Walks between Parts and Whole", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72861", "id": "4L3RfWnDzL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1e0d38c676d5855bcfab7f6d29d20ad9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4L3RfWnDzL", "openreview": "https://openreview.net/forum?id=4L3RfWnDzL", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72861", "video": "https://nips.cc/virtual/2023/poster/72861", "author_site": "Ziyu Wang, Mike Zheng Shou, Mengmi Zhang", "tldr": "", "abstract": "Learning object-centric representations from complex natural environments enables both humans and machines with reasoning abilities from low-level perceptual features. To capture compositional entities of the scene, we proposed cyclic walks between perceptual features extracted from vision transformers and object entities. First, a slot-attention module interfaces with these perceptual features and produces a finite set of slot representations. These slots can bind to any object entities in the scene via inter-slot competitions for attention. Next, we establish entity-feature correspondence with cyclic walks along high transition probability based on the pairwise similarity between perceptual features (aka \"parts\") and slot-binded object representations (aka \"whole\"). The whole is greater than its parts and the parts constitute the whole. The part-whole interactions form cycle consistencies, as supervisory signals, to train the slot-attention module. Our rigorous experiments on \\textit{seven} image datasets in \\textit{three} \\textit{unsupervised} tasks demonstrate that the networks trained with our cyclic walks can disentangle foregrounds and backgrounds, discover objects, and segment semantic objects in complex scenes. In contrast to object-centric models attached with a decoder for the pixel-level or feature-level reconstructions, our cyclic walks provide strong learning signals, avoiding computation overheads and enhancing memory efficiency. Our source code and data are available at: \\href{https://github.com/ZhangLab-DeepNeuroCogLab/Parts-Whole-Object-Centric-Learning/}{link}.", "keywords": "object representation learning;slot attention;object-centric;contrastive random walks", "primary_area": "", "supplementary_material": "/attachment/b33491b57c863ee978d1bd1df93f97e230d120f2.pdf", "author": "Ziyu Wang;Mike Zheng Shou;Mengmi Zhang", "authorids": "~Ziyu_Wang6;~Mike_Zheng_Shou1;~Mengmi_Zhang1", "gender": "M;F;", "homepage": "https://github.com/ziyuwwang;https://a0091624.wixsite.com/deepneurocognition-1;http://www.columbia.edu/~zs2262/", "dblp": ";160/7116;284/0807", "google_scholar": "zgcP9-EAAAAJ;https://scholar.google.com.sg/citations?user=G2sVOhcAAAAJ;h1-3lSoAAAAJ", "orcid": ";0000-0002-2694-7097;", "linkedin": ";;", "or_profile": "~Ziyu_Wang6;~Mengmi_Zhang1;~Zheng_Shou1", "aff": "National University of Singapore;A*STAR;National University of Singapore", "aff_domain": "nus.edu.sg;astar.edu.sg;nus.edu.sg", "position": "PhD student;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nwang2023objectcentric,\ntitle={Object-centric Learning with Cyclic Walks between Parts and Whole},\nauthor={Ziyu Wang and Mike Zheng Shou and Mengmi Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4L3RfWnDzL}\n}", "github": "", "project": "", "reviewers": "38HN;oB3o;8Kba;VTwm", "pdf_size": 13011117, "rating": "5;6;6;7", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;2;3;3", "wc_summary": "96;98;78;98", "wc_strengths": "32;98;206;103", "wc_weaknesses": "88;133;72;97", "wc_questions": "61;134;91;141", "wc_limitations": "1;14;13;31", "wc_review": "278;477;460;470", "wc_reply_reviewers": "256;17;61;192", "wc_reply_authors": "957;23;23;582", "reply_reviewers": "2;1;1;2", "reply_authors": "4;2;2;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 92.5, 8.411301920630361 ], "wc_strengths_avg": [ 109.75, 62.23493793682131 ], "wc_weaknesses_avg": [ 97.5, 22.36626924634504 ], "wc_questions_avg": [ 106.75, 32.62188682464581 ], "wc_limitations_avg": [ 14.75, 10.685855136581255 ], "wc_review_avg": [ 421.25, 82.92579514240427 ], "wc_reply_reviewers_avg": [ 131.5, 96.48963674923851 ], "wc_reply_authors_avg": [ 396.25, 396.0980781321717 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3164960571603600778&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 7, "email": "nus.edu.sg;astar.edu.sg;nus.edu.sg", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "National University of Singapore;Agency for Science, Technology and Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.a-star.edu.sg", "aff_unique_abbr": "NUS;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Generalization in the Face of Adaptivity: A Bayesian Perspective", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72860", "id": "4L9g1jUDtO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a3c01875a052f81d27a5211df096cd91-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4L9g1jUDtO", "openreview": "https://openreview.net/forum?id=4L9g1jUDtO", "poster": "/media/PosterPDFs/NeurIPS%202023/72860.png?t=1701604378.7494743", "slides": "https://nips.cc/virtual/2023/poster/72860", "video": "https://nips.cc/virtual/2023/poster/72860", "author_site": "Moshe Shenfeld, Katrina Ligett", "tldr": "", "abstract": "Repeated use of a data sample via adaptively chosen queries can rapidly lead to overfitting, wherein the empirical evaluation of queries on the sample significantly deviates from their mean with respect to the underlying data distribution. It turns out that simple noise addition algorithms suffice to prevent this issue, and differential privacy-based analysis of these algorithms shows that they can handle an asymptotically optimal number of queries. However, differential privacy's worst-case nature entails scaling such noise to the range of the queries even for highly-concentrated queries, or introducing more complex algorithms.\n\nIn this paper, we prove that straightforward noise-addition algorithms already provide variance-dependent guarantees that also extend to unbounded queries. This improvement stems from a novel characterization that illuminates the core problem of adaptive data analysis. We show that the harm of adaptivity results from the covariance between the new query and a Bayes factor-based measure of how much information about the data sample was encoded in the responses given to past queries. We then leverage this characterization to introduce a new data-dependent stability notion that can bound this covariance.", "keywords": "Differential Privacy;Adaptive Data Analysis", "primary_area": "", "supplementary_material": "/attachment/b1841e77cc2e743a5b189ac0d4d5c4d38a643fd2.pdf", "author": "Moshe Shenfeld;Katrina Ligett", "authorids": "~Moshe_Shenfeld1;~Katrina_Ligett1", "gender": "M;", "homepage": ";", "dblp": "242/8142;", "google_scholar": "https://scholar.google.com/citations?hl=en;", "orcid": "0000-0002-7059-7557;", "linkedin": "moshe-shenfeld-0b304688/;", "or_profile": "~Moshe_Shenfeld1;~Katrina_Ligett1", "aff": "Hebrew University of Jerusalem;", "aff_domain": "huji.ac.il;", "position": "PhD student;", "bibtex": "@inproceedings{\nshenfeld2023generalization,\ntitle={Generalization in the Face of Adaptivity: A Bayesian Perspective},\nauthor={Moshe Shenfeld and Katrina Ligett},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4L9g1jUDtO}\n}", "github": "", "project": "", "reviewers": "GhwK;KJvw;WLLe;wAxx", "pdf_size": 607708, "rating": "5;6;7;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;4;3;3", "wc_summary": "48;128;341;151", "wc_strengths": "52;70;151;67", "wc_weaknesses": "71;104;232;89", "wc_questions": "16;1;122;2", "wc_limitations": "43;20;21;9", "wc_review": "230;323;867;318", "wc_reply_reviewers": "11;0;108;15", "wc_reply_authors": "0;0;356;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 167.0, 107.48720854129574 ], "wc_strengths_avg": [ 85.0, 38.71046370169182 ], "wc_weaknesses_avg": [ 124.0, 63.43894702783141 ], "wc_questions_avg": [ 35.25, 50.43498289877771 ], "wc_limitations_avg": [ 23.25, 12.336429791475327 ], "wc_review_avg": [ 434.5, 252.4287028053664 ], "wc_reply_reviewers_avg": [ 33.5, 43.36184959154764 ], "wc_reply_authors_avg": [ 89.0, 154.1525218736301 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=300519128185785364&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "huji.ac.il;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Hebrew University of Jerusalem", "aff_unique_dep": "", "aff_unique_url": "https://www.huji.ac.il", "aff_unique_abbr": "HUJI", "aff_campus_unique_index": "0", "aff_campus_unique": "Jerusalem", "aff_country_unique_index": "0", "aff_country_unique": "Israel" }, { "title": "High Precision Causal Model Evaluation with Conditional Randomization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72859", "id": "4PkBhz18in", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a4dfcbcfa1f0425cd18aafa35a68019a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4PkBhz18in", "openreview": "https://openreview.net/forum?id=4PkBhz18in", "poster": "/media/PosterPDFs/NeurIPS%202023/72859.png?t=1702246727.3568537", "slides": "https://nips.cc/virtual/2023/poster/72859", "video": "https://nips.cc/virtual/2023/poster/72859", "author_site": "Chao Ma, Cheng Zhang", "tldr": "", "abstract": "The gold standard for causal model evaluation involves comparing model predictions with true effects estimated from randomized controlled trials (RCT). However, RCTs are not always feasible or ethical to perform. In contrast, conditionally randomized experiments based on inverse probability weighting (IPW) offer a more realistic approach but may suffer from high estimation variance. To tackle this challenge and enhance causal model evaluation in real-world conditional randomization settings, we introduce a novel low-variance estimator for causal error, dubbed as the pairs estimator. By applying the same IPW estimator to both the model and true experimental effects, our estimator effectively cancels out the variance due to IPW and achieves a smaller asymptotic variance. Empirical studies demonstrate the improved of our estimator, highlighting its potential on achieving near-RCT performance. Our method offers a simple yet powerful solution to evaluate causal inference models in conditional randomization settings without complicated modification of the IPW estimator itself, paving the way for more robust and reliable model assessments.", "keywords": "causality;causal inference;causal model evaluation", "primary_area": "", "supplementary_material": "/attachment/ff91efddfee7791663bcdc671c65bb413d375c41.pdf", "author": "Chao Ma;Cheng Zhang", "authorids": "~Chao_Ma2;~Cheng_Zhang1", "gender": "M;F", "homepage": ";http://cheng-zhang.org", "dblp": ";82/6384-5", "google_scholar": "https://scholar.google.co.uk/citations?user=UWP3kWEAAAAJ;r40iAwIAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Chao_Ma2;~Cheng_Zhang1", "aff": "Microsoft;Microsoft", "aff_domain": "microsoft.com;microsoft.com", "position": "Researcher;Principal Researcher", "bibtex": "@inproceedings{\nma2023high,\ntitle={High Precision Causal Model Evaluation with Conditional Randomization},\nauthor={Chao Ma and Cheng Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4PkBhz18in}\n}", "github": "", "project": "", "reviewers": "zDTG;yvJa;t8vT;PdWH;r45Q", "pdf_size": 2196114, "rating": "5;6;6;6;7", "confidence": "3;3;3;3;3", "soundness": "2;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "2;3;4;3;3", "wc_summary": "47;51;82;96;268", "wc_strengths": "36;47;65;21;72", "wc_weaknesses": "130;401;176;107;41", "wc_questions": "50;45;179;37;121", "wc_limitations": "30;44;10;82;23", "wc_review": "293;588;512;343;525", "wc_reply_reviewers": "19;13;0;0;21", "wc_reply_authors": "21;20;35;36;19", "reply_reviewers": "1;1;0;0;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 108.8, 81.71511488090805 ], "wc_strengths_avg": [ 48.2, 18.648324321504063 ], "wc_weaknesses_avg": [ 171.0, 122.96503568087962 ], "wc_questions_avg": [ 86.4, 55.22897790109826 ], "wc_limitations_avg": [ 37.8, 24.67711490429949 ], "wc_review_avg": [ 452.2, 113.65456436060983 ], "wc_reply_reviewers_avg": [ 10.6, 9.046546302318914 ], "wc_reply_authors_avg": [ 26.2, 7.626270385975047 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iFqlXsTd_QsJ:scholar.google.com/&scioq=High+Precision+Causal+Model+Evaluation+with+Conditional+Randomization&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "microsoft.com;microsoft.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "CRoSS: Diffusion Model Makes Controllable, Robust and Secure Image Steganography", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72858", "id": "4R2Y5B12jm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ff99390b6e942fb1dd7023f787fb0a27-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4R2Y5B12jm", "openreview": "https://openreview.net/forum?id=4R2Y5B12jm", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72858", "video": "https://nips.cc/virtual/2023/poster/72858", "author_site": "Jiwen Yu, Xuanyu Zhang, Youmin Xu, Jian Zhang", "tldr": "", "abstract": "Current image steganography techniques are mainly focused on cover-based methods, which commonly have the risk of leaking secret images and poor robustness against degraded container images. Inspired by recent developments in diffusion models, we discovered that two properties of diffusion models, the ability to achieve translation between two images without training, and robustness to noisy data, can be used to improve security and natural robustness in image steganography tasks. For the choice of diffusion model, we selected Stable Diffusion, a type of conditional diffusion model, and fully utilized the latest tools from open-source communities, such as LoRAs and ControlNets, to improve the controllability and diversity of container images. In summary, we propose a novel image steganography framework, named Controllable, Robust and Secure Image Steganography (CRoSS), which has significant advantages in controllability, robustness, and security compared to cover-based image steganography methods. These benefits are obtained without additional training. To our knowledge, this is the first work to introduce diffusion models to the field of image steganography. In the experimental section, we conducted detailed experiments to demonstrate the advantages of our proposed CRoSS framework in controllability, robustness, and security.", "keywords": "Diffusion models;image steganography;Stable Diffusion;coverless steganography", "primary_area": "", "supplementary_material": "/attachment/5c9fd770098c1cab912484625bc92bc6304c0a3b.pdf", "author": "Jiwen Yu;Xuanyu Zhang;Youmin Xu;Jian Zhang", "authorids": "~Jiwen_Yu1;~Xuanyu_Zhang2;~Youmin_Xu1;~Jian_Zhang22", "gender": "M;M;M;M", "homepage": "https://yujiwen.github.io/;https://villa.jianzhang.tech/;https://zirconium2159.github.io;http://jianzhang.tech/", "dblp": "259/4277;323/9396;292/5863;07/314-18", "google_scholar": "https://scholar.google.com.hk/citations?user=uoRPLHIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;7brFI_4AAAAJ", "orcid": ";0000-0002-6713-4500;;0000-0001-5486-3125", "linkedin": ";;;", "or_profile": "~Jiwen_Yu1;~Xuanyu_Zhang2;~Youmin_Xu1;~Jian_Zhang22", "aff": "Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "MS student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyu2023cross,\ntitle={{CR}o{SS}: Diffusion Model Makes Controllable, Robust and Secure Image Steganography},\nauthor={Jiwen Yu and Xuanyu Zhang and Youmin Xu and Jian Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4R2Y5B12jm}\n}", "github": "", "project": "", "reviewers": "g14p;1iwK;mtD2;Nci3", "pdf_size": 5018079, "rating": "3;4;6;7", "confidence": "5;4;3;4", "soundness": "2;3;3;3", "novelty": "1;2;3;3", "presentation": "1;2;3;3", "wc_summary": "46;24;88;51", "wc_strengths": "19;20;105;87", "wc_weaknesses": "108;319;163;100", "wc_questions": "23;67;35;12", "wc_limitations": "10;1;71;5", "wc_review": "206;431;462;255", "wc_reply_reviewers": "178;175;23;0", "wc_reply_authors": "745;940;43;71", "reply_reviewers": "2;1;1;0", "reply_authors": "4;5;2;2", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 52.25, 23.004075725836064 ], "wc_strengths_avg": [ 57.75, 38.77740966078059 ], "wc_weaknesses_avg": [ 172.5, 87.99005625637479 ], "wc_questions_avg": [ 34.25, 20.58367071248469 ], "wc_limitations_avg": [ 21.75, 28.612715704735194 ], "wc_review_avg": [ 338.5, 109.92838577910621 ], "wc_reply_reviewers_avg": [ 94.0, 82.90657392511163 ], "wc_reply_authors_avg": [ 449.75, 398.87803587061546 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.670820393249937, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7472944508336251635&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Binary Classification with Confidence Difference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72857", "id": "4RoD1o7yq6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/12c118ef87fde56a10bd858842781b34-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4RoD1o7yq6", "openreview": "https://openreview.net/forum?id=4RoD1o7yq6", "poster": "/media/PosterPDFs/NeurIPS%202023/72857.png?t=1701428687.1543791", "slides": "https://nips.cc/virtual/2023/poster/72857", "video": "https://nips.cc/virtual/2023/poster/72857", "author_site": "Wei Wang, Lei Feng, Yuchen Jiang, Gang Niu, Min-Ling Zhang, Masashi Sugiyama", "tldr": "", "abstract": "Recently, learning with soft labels has been shown to achieve better performance than learning with hard labels in terms of model generalization, calibration, and robustness. However, collecting pointwise labeling confidence for all training examples can be challenging and time-consuming in real-world scenarios. This paper delves into a novel weakly supervised binary classification problem called confidence-difference (ConfDiff) classification. Instead of pointwise labeling confidence, we are given only unlabeled data pairs with confidence difference that specifies the difference in the probabilities of being positive. We propose a risk-consistent approach to tackle this problem and show that the estimation error bound achieves the optimal convergence rate. We also introduce a risk correction approach to mitigate overfitting problems, whose consistency and convergence rate are also proven. Extensive experiments on benchmark data sets and a real-world recommender system data set validate the effectiveness of our proposed approaches in exploiting the supervision information of the confidence difference.", "keywords": "Weakly supervised learning;binary classification;unbiased risk estimator", "primary_area": "", "supplementary_material": "/attachment/c219b52cee85934c16b87356fe1d6ee0d7032ad1.zip", "author": "Wei Wang;Lei Feng;Yuchen Jiang;Gang Niu;Min-Ling Zhang;Masashi Sugiyama", "authorids": "~Wei_Wang68;~Lei_Feng1;~Yuchen_Jiang2;~Gang_Niu1;~Min-Ling_Zhang2;~Masashi_Sugiyama1", "gender": "M;M;;M;M;M", "homepage": "https://wwangwitsel.github.io/;https://lfeng1995.github.io/;https://scholar.google.com/citations?view_op=list_works&hl=en&user=xCmUCT0AAAAJ;https://niug1984.github.io;http://palm.seu.edu.cn/zhangml/;http://www.ms.k.u-tokyo.ac.jp/sugi/", "dblp": "35/7092-373.html;76/847-6;189/0003;26/3367-1;84/271.html;35/1228", "google_scholar": "a38jZkwAAAAJ;https://scholar.google.com.sg/citations?user=KomQOFkAAAAJ;;https://scholar.google.co.jp/citations?user=HOkcy00AAAAJ;uFHCIM0AAAAJ;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ", "orcid": "0000-0002-8860-0494;0000-0003-2839-5799;;;0000-0003-1880-5918;0000-0001-6658-6743", "linkedin": ";;;;;", "or_profile": "~Wei_Wang68;~Lei_Feng1;~Yuchen_Jiang2;~Gang_Niu1;~Min-Ling_Zhang2;~Masashi_Sugiyama1", "aff": "The University of Tokyo;Nanyang Technological University;Alibaba Group;RIKEN;Southeast University;The University of Tokyo", "aff_domain": "u-tokyo.ac.jp;ntu.edu.sg;alibaba-inc.com;riken.jp;seu.edu.cn;u-tokyo.ac.jp", "position": "PhD student;Visiting Professor;Researcher;Research Scientist (tenured);Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023binary,\ntitle={Binary Classification with Confidence Difference},\nauthor={Wei Wang and Lei Feng and Yuchen Jiang and Gang Niu and Min-Ling Zhang and Masashi Sugiyama},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4RoD1o7yq6}\n}", "github": "", "project": "", "reviewers": "XYQ6;DX8U;Qs9H;G7aZ", "pdf_size": 2877611, "rating": "6;6;6;9", "confidence": "3;3;4;3", "soundness": "3;3;3;4", "novelty": "2;3;2;4", "presentation": "4;3;3;4", "wc_summary": "112;102;123;100", "wc_strengths": "26;209;100;66", "wc_weaknesses": "201;271;52;23", "wc_questions": "55;13;39;197", "wc_limitations": "1;29;52;11", "wc_review": "395;624;366;397", "wc_reply_reviewers": "182;37;22;18", "wc_reply_authors": "236;0;0;18", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 109.25, 9.148087231765993 ], "wc_strengths_avg": [ 100.25, 68.03078347336594 ], "wc_weaknesses_avg": [ 136.75, 102.80169016120308 ], "wc_questions_avg": [ 76.0, 71.449282711585 ], "wc_limitations_avg": [ 23.25, 19.395553614166314 ], "wc_review_avg": [ 445.5, 103.78463277383604 ], "wc_reply_reviewers_avg": [ 64.75, 68.06384870105421 ], "wc_reply_authors_avg": [ 63.5, 99.86365705300402 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7732515859268268957&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "u-tokyo.ac.jp;ntu.edu.sg;alibaba-inc.com;riken.jp;seu.edu.cn;u-tokyo.ac.jp", "author_num": 6, "aff_unique_index": "0;1;2;3;4;0", "aff_unique_norm": "University of Tokyo;Nanyang Technological University;Alibaba Group;RIKEN;Southeast University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.ntu.edu.sg;https://www.alibaba.com;https://www.riken.jp;https://www.seu.edu.cn/", "aff_unique_abbr": "UTokyo;NTU;Alibaba;RIKEN;SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;2;0", "aff_country_unique": "Japan;Singapore;China" }, { "title": "Cal-DETR: Calibrated Detection Transformer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72856", "id": "4SkPTD6XNP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e271e30de7a2e462ca1f85cefa816380-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4SkPTD6XNP", "openreview": "https://openreview.net/forum?id=4SkPTD6XNP", "poster": "/media/PosterPDFs/NeurIPS%202023/72856.png?t=1701590133.8609548", "slides": "https://nips.cc/virtual/2023/poster/72856", "video": "https://nips.cc/virtual/2023/poster/72856", "author_site": "Muhammad Akhtar Munir, Salman Khan, Muhammad Haris Khan, Mohsen Ali, Fahad Shahbaz Khan", "tldr": "", "abstract": "Albeit revealing impressive predictive performance for several computer vision tasks, deep neural networks (DNNs) are prone to making overconfident predictions. This limits the adoption and wider utilization of DNNs in many safety-critical applications. There have been recent efforts toward calibrating DNNs, however, almost all of them focus on the classification task. Surprisingly, very little attention has been devoted to calibrating modern DNN-based object detectors, especially detection transformers, which have recently demonstrated promising detection performance and are influential in many decision-making systems. In this work, we address the problem by proposing a mechanism for calibrated detection transformers (Cal-DETR), particularly for Deformable-DETR, UP-DETR, and DINO. We pursue the train-time calibration route and make the following contributions. First, we propose a simple yet effective approach for quantifying uncertainty in transformer-based object detectors. Second, we develop an uncertainty-guided logit modulation mechanism that leverages the uncertainty to modulate the class logits. Third, we develop a logit mixing approach that acts as a regularizer with detection-specific losses and is also complementary to the uncertainty-guided logit modulation technique to further improve the calibration performance. Lastly, we conduct extensive experiments across three in-domain and four out-domain scenarios. Results corroborate the effectiveness of Cal-DETR against the competing train-time methods in calibrating both in-domain and out-domain detections while maintaining or even improving the detection performance. Our codebase and pre-trained models can be accessed at \\url{https://github.com/akhtarvision/cal-detr}.", "keywords": "Model Calibration;Object Detection;Detection Transformers;Uncertainty", "primary_area": "", "supplementary_material": "/attachment/17ea186d4b9879aecf053de614a8397a18f06827.pdf", "author": "Muhammad Akhtar Munir;Salman Khan;Muhammad Haris Khan;Mohsen Ali;Fahad Khan", "authorids": "~Muhammad_Akhtar_Munir1;~Salman_Khan4;~Muhammad_Haris_Khan3;~Mohsen_Ali2;~Fahad_Khan1", "gender": "M;M;M;;M", "homepage": ";https://salman-h-khan.github.io/;https://m-haris-khan.com;https://mohsenali.github.io/;https://sites.google.com/view/fahadkhans/home", "dblp": "239/6076;32/11535-1;155/3076;02/10964;05/8618", "google_scholar": "https://scholar.google.com.pk/citations?user=sT-epZAAAAAJ;https://scholar.google.es/citations?user=M59O9lkAAAAJ;ZgERfFwAAAAJ;https://scholar.google.com.pk/citations?hl=en;zvaeYnUAAAAJ", "orcid": ";0000-0002-9502-1749;0000-0001-9746-276X;;", "linkedin": ";;muhammad-haris-khan-1516714b/;mohsen-ali-5666394/;", "or_profile": "~Muhammad_Akhtar_Munir1;~Salman_Khan4;~Muhammad_Haris_Khan3;~Mohsen_Ali2;~Fahad_Khan1", "aff": "Information Technology University Lahore, Pakistan;Australian National University;Mohamed Bin Zayed University of Artificial Intelligence;Information Technology University;Link\u00f6ping University", "aff_domain": "itu.edu.pk;anu.edu.au;mbzuai.ac.ae;iml.itu.edu.pk;liu.se", "position": "PhD student;Lecturer;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nmunir2023caldetr,\ntitle={Cal-{DETR}: Calibrated Detection Transformer},\nauthor={Muhammad Akhtar Munir and Salman Khan and Muhammad Haris Khan and Mohsen Ali and Fahad Khan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4SkPTD6XNP}\n}", "github": "", "project": "", "reviewers": "95V6;wX9Z;co8a;Gc9J", "pdf_size": 868960, "rating": "5;5;6;7", "confidence": "3;4;2;5", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "68;93;51;120", "wc_strengths": "21;57;31;77", "wc_weaknesses": "119;118;42;373", "wc_questions": "127;6;15;143", "wc_limitations": "4;1;9;2", "wc_review": "339;275;148;715", "wc_reply_reviewers": "29;22;26;285", "wc_reply_authors": "92;0;0;150", "reply_reviewers": "1;1;1;2", "reply_authors": "2;1;1;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.0, 26.06722079547415 ], "wc_strengths_avg": [ 46.5, 21.97157254271983 ], "wc_weaknesses_avg": [ 163.0, 125.20183704722547 ], "wc_questions_avg": [ 72.75, 62.58743883560023 ], "wc_limitations_avg": [ 4.0, 3.082207001484488 ], "wc_review_avg": [ 369.25, 211.1236308422153 ], "wc_reply_reviewers_avg": [ 90.5, 112.32208153341888 ], "wc_reply_authors_avg": [ 60.5, 63.88074827363875 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.40451991747794525, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15235270713366271727&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "itu.edu.pk;anu.edu.au;mbzuai.ac.ae;iml.itu.edu.pk;liu.se", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Information Technology University;Australian National University;Mohamed bin Zayed University of Artificial Intelligence;Link\u00f6ping University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.itu.edu.pk;https://www.anu.edu.au;https://www.mbzuai.ac.ae;https://www.liu.se", "aff_unique_abbr": ";ANU;MBZUAI;LiU", "aff_campus_unique_index": "0", "aff_campus_unique": "Lahore;", "aff_country_unique_index": "0;1;2;3;4", "aff_country_unique": "Pakistan;Australia;United Arab Emirates;Iran;Sweden" }, { "title": "Reference-Based POMDPs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72855", "id": "4Sn2vUs0zA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7ffb2b550ff6a75c536b279348a93fb0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4Sn2vUs0zA", "openreview": "https://openreview.net/forum?id=4Sn2vUs0zA", "poster": "/media/PosterPDFs/NeurIPS%202023/72855.png?t=1701314051.4674454", "slides": "https://nips.cc/virtual/2023/poster/72855", "video": "https://nips.cc/virtual/2023/poster/72855", "author_site": "Edward Kim, Yohan Karunanayake, Hanna Kurniawati", "tldr": "", "abstract": "Making good decisions in partially observable and non-deterministic scenarios is a crucial capability for robots. A Partially Observable Markov Decision Process (POMDP) is a general framework for the above problem. Despite advances in POMDP solving, problems with long planning horizons and evolving environments remain difficult to solve even by the best approximate solvers today. To alleviate this difficulty, we propose a slightly modified POMDP problem, called a Reference-Based POMDP, where the objective is to balance between maximizing the expected total reward and being close to a given reference (stochastic) policy. The optimal policy of a Reference-Based POMDP can be computed via iterative expectations using the given reference policy, thereby avoiding exhaustive enumeration of actions at each belief node of the search tree. We demonstrate theoretically that the standard POMDP under stochastic policies is related to the Reference-Based POMDP. To demonstrate the feasibility of exploiting the formulation, we present a basic algorithm RefSolver. Results from experiments on long-horizon navigation problems indicate that this basic algorithm substantially outperforms POMCP.", "keywords": "POMDP;planning under uncertainty;long horizon", "primary_area": "", "supplementary_material": "/attachment/0c51531efecb8b00f7bd4895581675952753d7aa.zip", "author": "Edward Kim;Yohan Karunanayake;Hanna Kurniawati", "authorids": "edward.kim@anu.edu.au;~Yohan_Karunanayake1;~Hanna_Kurniawati1", "gender": ";;F", "homepage": ";;http://users.cecs.anu.edu.au/~hannakur/", "dblp": ";;https://dblp.uni-trier.de/pers/k/Kurniawati:Hanna.html", "google_scholar": ";tSF2gWkAAAAJ;https://scholar.google.com.au/citations?user=JkjFXbAAAAAJ", "orcid": ";0000-0002-9384-9921;", "linkedin": ";;", "or_profile": "edward.kim@anu.edu.au;~Yohan_Karunanayake1;~Hanna_Kurniawati1", "aff": ";Australian National University;Australian National University", "aff_domain": ";anu.edu.au;anu.edu.au", "position": ";PhD student;Associate Professor", "bibtex": "@inproceedings{\nkim2023referencebased,\ntitle={Reference-Based {POMDP}s},\nauthor={Edward Kim and Yohan Karunanayake and Hanna Kurniawati},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4Sn2vUs0zA}\n}", "github": "", "project": "", "reviewers": "Hi3x;RFcX;poha;Kh8M;NtUC", "pdf_size": 4071892, "rating": "5;5;5;6;7", "confidence": "3;2;3;5;3", "soundness": "3;3;3;3;4", "novelty": "2;2;3;3;3", "presentation": "3;2;3;2;3", "wc_summary": "54;20;80;25;282", "wc_strengths": "47;18;37;13;19", "wc_weaknesses": "105;2;26;141;63", "wc_questions": "47;216;399;20;79", "wc_limitations": "6;1;18;5;38", "wc_review": "259;257;560;204;481", "wc_reply_reviewers": "4;4;0;71;31", "wc_reply_authors": "0;0;0;382;0", "reply_reviewers": "1;1;0;2;1", "reply_authors": "1;1;1;3;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 92.2, 97.32502247623681 ], "wc_strengths_avg": [ 26.8, 12.967652061957864 ], "wc_weaknesses_avg": [ 67.4, 50.67780579306883 ], "wc_questions_avg": [ 152.2, 140.6291577163143 ], "wc_limitations_avg": [ 13.6, 13.455110553243328 ], "wc_review_avg": [ 352.2, 141.05516651296398 ], "wc_reply_reviewers_avg": [ 22.0, 26.884940022250372 ], "wc_reply_authors_avg": [ 76.4, 152.8 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3572172541558802, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4197485885299570291&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";anu.edu.au;anu.edu.au", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Australian National University", "aff_unique_dep": "", "aff_unique_url": "https://www.anu.edu.au", "aff_unique_abbr": "ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Australia" }, { "title": "Reversible and irreversible bracket-based dynamics for deep graph neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72854", "id": "4SoTUaTK8N", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7903af0a1cffb43dbb2f8160d110a5f3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4SoTUaTK8N", "openreview": "https://openreview.net/forum?id=4SoTUaTK8N", "poster": "/media/PosterPDFs/NeurIPS%202023/72854.png?t=1701965088.537998", "slides": "https://nips.cc/virtual/2023/poster/72854", "video": "https://nips.cc/virtual/2023/poster/72854", "author_site": "Anthony Gruber, Kookjin Lee, Nathaniel Trask", "tldr": "", "abstract": "Recent works have shown that physics-inspired architectures allow the training of deep graph neural networks (GNNs) without oversmoothing. The role of these physics is unclear, however, with successful examples of both reversible (e.g., Hamiltonian) and irreversible (e.g., diffusion) phenomena producing comparable results despite diametrically opposed mechanisms, and further complications arising due to empirical departures from mathematical theory. This work presents a series of novel GNN architectures based upon structure-preserving bracket-based dynamical systems, which are provably guaranteed to either conserve energy or generate positive dissipation with increasing depth. It is shown that the theoretically principled framework employed here allows for inherently explainable constructions, which contextualize departures from theory in current architectures and better elucidate the roles of reversibility and irreversibility in network performance. Code is available at the Github repository \\url{https://github.com/natrask/BracketGraphs}.", "keywords": "graph neural networks;structure preserving machine learning;neural ordinary differential equations;hamiltonian dynamics;metriplectic dynamics", "primary_area": "", "supplementary_material": "/attachment/2060a3f3fa2523cc0b79e36d00df7c48c79e733b.pdf", "author": "Anthony Gruber;Kookjin Lee;Nathaniel Trask", "authorids": "adgrube@sandia.gov;~Kookjin_Lee1;~Nathaniel_Trask2", "gender": ";M;M", "homepage": ";https://scholar.google.com/citations?hl=en&user=KL89hVQAAAAJ&view_op=list_works;https://www.sandia.gov/ccr/staff/nathaniel-albert-trask/", "dblp": ";122/5103;188/8236", "google_scholar": ";https://scholar.google.com/citations?hl=en;6iLMZkwAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "adgrube@sandia.gov;~Kookjin_Lee1;~Nathaniel_Trask2", "aff": ";Arizona State University;Sandia National Laboratories", "aff_domain": ";asu.edu;sandia.gov", "position": ";Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\ngruber2023reversible,\ntitle={Reversible and irreversible bracket-based dynamics for deep graph neural networks},\nauthor={Anthony Gruber and Kookjin Lee and Nathaniel Trask},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4SoTUaTK8N}\n}", "github": "", "project": "", "reviewers": "dWFZ;7QNL;y9dF;Ui2h", "pdf_size": 1323009, "rating": "5;6;6;6", "confidence": "2;2;3;2", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;2", "wc_summary": "149;75;40;95", "wc_strengths": "76;30;69;29", "wc_weaknesses": "35;19;246;160", "wc_questions": "121;36;13;248", "wc_limitations": "1;6;6;15", "wc_review": "382;166;374;547", "wc_reply_reviewers": "26;0;122;291", "wc_reply_authors": "0;0;34;420", "reply_reviewers": "1;0;1;3", "reply_authors": "1;1;2;4", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 89.75, 39.46754996196242 ], "wc_strengths_avg": [ 51.0, 21.644860821913362 ], "wc_weaknesses_avg": [ 115.0, 93.27647077371657 ], "wc_questions_avg": [ 104.5, 92.0991313748398 ], "wc_limitations_avg": [ 7.0, 5.049752469181039 ], "wc_review_avg": [ 367.25, 135.16170870479553 ], "wc_reply_reviewers_avg": [ 109.75, 114.08412466246125 ], "wc_reply_authors_avg": [ 113.5, 177.50140844511628 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4076909951556054202&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": ";asu.edu;sandia.gov", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Arizona State University;Sandia National Laboratories", "aff_unique_dep": ";", "aff_unique_url": "https://www.asu.edu;https://www.sandia.gov", "aff_unique_abbr": "ASU;SNL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "MuSe-GNN: Learning Unified Gene Representation From Multimodal Biological Graph Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72853", "id": "4UCktT9XZx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4db8a681ae1e58376dc6227978829063-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4UCktT9XZx", "openreview": "https://openreview.net/forum?id=4UCktT9XZx", "poster": "/media/PosterPDFs/NeurIPS%202023/72853.png?t=1699302453.3640988", "slides": "https://nips.cc/virtual/2023/poster/72853", "video": "https://nips.cc/virtual/2023/poster/72853", "author_site": "Tianyu Liu, Yuge Wang, Rex Ying, Hongyu Zhao", "tldr": "", "abstract": "Discovering genes with similar functions across diverse biomedical contexts poses a significant challenge in gene representation learning due to data heterogeneity. In this study, we resolve this problem by introducing a novel model called Multimodal Similarity Learning Graph Neural Network, which combines Multimodal Machine Learning and Deep Graph Neural Networks to learn gene representations from single-cell sequencing and spatial transcriptomic data. Leveraging 82 training datasets from 10 tissues, three sequencing techniques, and three species, we create informative graph structures for model training and gene representations generation, while incorporating regularization with weighted similarity learning and contrastive learning to learn cross-data gene-gene relationships. This novel design ensures that we can offer gene representations containing functional similarity across different contexts in a joint space. Comprehensive benchmarking analysis shows our model's capacity to effectively capture gene function similarity across multiple modalities, outperforming state-of-the-art methods in gene representation learning by up to $\\textbf{100.4}$%. Moreover, we employ bioinformatics tools in conjunction with gene representations to uncover pathway enrichment, regulation causal networks, and functions of disease-associated genes. Therefore, our model efficiently produces unified gene representations for the analysis of gene functions, tissue functions, diseases, and species evolution.", "keywords": "Multimodal Learning; Representation Learning; Graph Neural Network; Similarity Learning; Contrastive Learning; Computational Biology and Bioinformatics; Single-cell genomics", "primary_area": "", "supplementary_material": "/attachment/aadb0be0f95096047e719dc8d8fa48d17f007881.zip", "author": "Tianyu Liu;Yuge Wang;Zhitao Ying;Hongyu Zhao", "authorids": "~Tianyu_Liu4;~Yuge_Wang1;~Zhitao_Ying1;~Hongyu_Zhao1", "gender": "M;Not Specified;M;M", "homepage": "https://helloworldlty.github.io/;;https://www.cs.yale.edu/homes/ying-rex;https://ysph.yale.edu/profile/hongyu-zhao/", "dblp": "134/1099-5;326/9720;209/4936;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;6fqNXooAAAAJ;__z1kpoAAAAJ", "orcid": "0000-0002-9412-6573;0000-0001-7389-1519;;", "linkedin": ";yuge-wang-2871a5190;rex-ying-92770148/;", "or_profile": "~Tianyu_Liu4;~Yuge_Wang1;~Zhitao_Ying1;~Hongyu_Zhao1", "aff": "Yale University;Yale University;Yale University;Yale University", "aff_domain": "yale.edu;yale.edu;yale.edu;yale.edu", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023musegnn,\ntitle={MuSe-{GNN}: Learning Unified Gene Representation From Multimodal Biological Graph Data},\nauthor={Tianyu Liu and Yuge Wang and Zhitao Ying and Hongyu Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4UCktT9XZx}\n}", "github": "", "project": "", "reviewers": "mFBG;anSh;cB5G;vvZM", "pdf_size": 26450720, "rating": "6;6;7;8", "confidence": "4;4;4;4", "soundness": "3;3;2;4", "novelty": "3;3;3;4", "presentation": "3;2;3;4", "wc_summary": "44;46;263;129", "wc_strengths": "51;49;52;85", "wc_weaknesses": "24;31;71;80", "wc_questions": "67;22;132;2", "wc_limitations": "1;1;58;16", "wc_review": "187;149;576;312", "wc_reply_reviewers": "0;17;33;18", "wc_reply_authors": "0;14;16;16", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 120.5, 89.13613184337763 ], "wc_strengths_avg": [ 59.25, 14.905955185763842 ], "wc_weaknesses_avg": [ 51.5, 24.336187047275914 ], "wc_questions_avg": [ 55.75, 49.92181386928964 ], "wc_limitations_avg": [ 19.0, 23.33452377915607 ], "wc_review_avg": [ 306.0, 167.14215506568056 ], "wc_reply_reviewers_avg": [ 17.0, 11.683321445547923 ], "wc_reply_authors_avg": [ 11.5, 6.689544080129826 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9967900312905700304&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "yale.edu;yale.edu;yale.edu;yale.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Training-free Diffusion Model Adaptation for Variable-Sized Text-to-Image Synthesis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72852", "id": "4ULTSBBY4U", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e0378e0c642b1d292fcb224e8d5a39b3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4ULTSBBY4U", "openreview": "https://openreview.net/forum?id=4ULTSBBY4U", "poster": "/media/PosterPDFs/NeurIPS%202023/72852.png?t=1702037122.1871297", "slides": "https://nips.cc/virtual/2023/poster/72852", "video": "https://nips.cc/virtual/2023/poster/72852", "author_site": "Zhiyu Jin, Xuli Shen, Bin Li, Xiangyang Xue", "tldr": "", "abstract": "Diffusion models (DMs) have recently gained attention with state-of-the-art performance in text-to-image synthesis. Abiding by the tradition in deep learning, DMs are trained and evaluated on the images with fixed sizes. However, users are demanding for various images with specific sizes and various aspect ratio. This paper focuses on adapting text-to-image diffusion models to handle such variety while maintaining visual fidelity. First we observe that, during the synthesis, lower resolution images suffer from incomplete object portrayal, while higher resolution images exhibit repetitively disordered presentation. Next, we establish a statistical relationship indicating that attention entropy changes with token quantity, suggesting that models aggregate spatial information in proportion to image resolution. The subsequent interpretation on our observations is that objects are incompletely depicted due to limited spatial information for low resolutions, while repetitively disorganized presentation arises from redundant spatial information for high resolutions. From this perspective, we propose a scaling factor to alleviate the change of attention entropy and mitigate the defective pattern observed. Extensive experimental results validate the efficacy of the proposed scaling factor, enabling models to achieve better visual effects, image quality, and text alignment. Notably, these improvements are achieved without additional training or fine-tuning techniques.", "keywords": "Text-to-Image Synthesis;Variable-Sized Image Synthesis;Entropy", "primary_area": "", "supplementary_material": "/attachment/a733d18c8e58dd463d86272f470f69b59d911b7b.pdf", "author": "Zhiyu Jin;Xuli Shen;Bin Li;Xiangyang Xue", "authorids": "~Zhiyu_Jin2;~Xuli_Shen1;~Bin_Li4;~Xiangyang_Xue2", "gender": "M;M;M;M", "homepage": "https://github.com/Dimlife;;https://aimpressionist.github.io/publications;http://homepage.fudan.edu.cn//xyxue", "dblp": ";277/5252;89/6764-15;84/3791", "google_scholar": ";OHUOaj4AAAAJ;8t97oL8AAAAJ;", "orcid": ";;0000-0002-9633-0033;0000-0002-4897-9209", "linkedin": ";%E6%97%AD%E7%AB%8B-%E6%B2%88-2074b2124/;;", "or_profile": "~Zhiyu_Jin2;~Xuli_Shen1;~Bin_Li4;~Xiangyang_Xue2", "aff": "Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "MS student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\njin2023trainingfree,\ntitle={Training-free Diffusion Model Adaptation for Variable-Sized Text-to-Image Synthesis},\nauthor={Zhiyu Jin and Xuli Shen and Bin Li and Xiangyang Xue},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4ULTSBBY4U}\n}", "github": "", "project": "", "reviewers": "QLpB;wERV;p6J2;XLhR", "pdf_size": 15035869, "rating": "5;5;5;7", "confidence": "5;4;5;4", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "4;2;3;4", "wc_summary": "67;98;71;133", "wc_strengths": "64;117;53;88", "wc_weaknesses": "183;231;109;42", "wc_questions": "48;7;187;13", "wc_limitations": "1;11;15;21", "wc_review": "363;464;435;297", "wc_reply_reviewers": "234;122;25;26", "wc_reply_authors": "107;316;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 92.25, 26.37588861062315 ], "wc_strengths_avg": [ 80.5, 24.58149710656371 ], "wc_weaknesses_avg": [ 141.25, 71.91792196664194 ], "wc_questions_avg": [ 63.75, 72.86074045739585 ], "wc_limitations_avg": [ 12.0, 7.280109889280518 ], "wc_review_avg": [ 389.75, 64.95912176130463 ], "wc_reply_reviewers_avg": [ 101.75, 85.91965723860868 ], "wc_reply_authors_avg": [ 105.75, 129.008478403553 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14652706640097785711&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Adaptive Selective Sampling for Online Prediction with Experts", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72851", "id": "4VAF3d5jNg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/00b67df24009747e8bbed4c2c6f9c825-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4VAF3d5jNg", "openreview": "https://openreview.net/forum?id=4VAF3d5jNg", "poster": "/media/PosterPDFs/NeurIPS%202023/72851.png?t=1700235331.9448376", "slides": "https://nips.cc/virtual/2023/poster/72851", "video": "https://nips.cc/virtual/2023/poster/72851", "author_site": "Rui Castro, Fredrik Hellstr\u00f6m, Tim van Erven", "tldr": "", "abstract": "We consider online prediction of a binary sequence with expert advice. For this setting, we devise label-efficient forecasting algorithms, which use a selective sampling scheme that enables collecting much fewer labels than standard procedures. For the general case without a perfect expert, we prove best-of-both-worlds guarantees, demonstrating that the proposed forecasting algorithm always queries sufficiently many labels in the worst case to obtain optimal regret guarantees, while simultaneously querying much fewer labels in more benign settings. Specifically, for a scenario where one expert is strictly better than the others in expectation, we show that the label complexity of the label-efficient forecaster is roughly upper-bounded by the square root of the number of rounds. Finally, we present numerical experiments empirically showing that the normalized regret of the label-efficient forecaster can asymptotically match known minimax rates for pool-based active learning, suggesting it can optimally adapt to benign settings.", "keywords": "Online learning;prediction with experts;selective sampling;active learning", "primary_area": "", "supplementary_material": "/attachment/fdd93594d3855030f8f737c2f66634e801f89b73.zip", "author": "Rui M. Castro;Fredrik Hellstr\u00f6m;Tim van Erven", "authorids": "~Rui_M._Castro1;~Fredrik_Hellstr\u00f6m1;~Tim_van_Erven1", "gender": "M;;M", "homepage": "https://www.win.tue.nl/~rmcastro/;https://fredrikhellstrom.github.io/;http://www.timvanerven.nl", "dblp": "11/2943;167/6308;82/1868", "google_scholar": "Koez6qoAAAAJ;zTJcV04AAAAJ;https://scholar.google.nl/citations?user=kdxqEMQAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Rui_M._Castro1;~Fredrik_Hellstr\u00f6m1;~Tim_van_Erven1", "aff": "Eindhoven University of Technology;Chalmers University;University of Amsterdam", "aff_domain": "tue.nl;chalmers.se;uva.nl", "position": "Associate Professor;PhD student;Associate Professor", "bibtex": "@inproceedings{\ncastro2023adaptive,\ntitle={Adaptive Selective Sampling for Online Prediction with Experts},\nauthor={Rui M. Castro and Fredrik Hellstr{\\\"o}m and Tim van Erven},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4VAF3d5jNg}\n}", "github": "", "project": "", "reviewers": "1FCH;naDi;G2NF;VgJf", "pdf_size": 600943, "rating": "6;7;8;8", "confidence": "3;4;4;4", "soundness": "3;4;3;4", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "75;84;134;213", "wc_strengths": "23;70;155;15", "wc_weaknesses": "200;33;124;3", "wc_questions": "3;91;311;74", "wc_limitations": "13;1;1;1", "wc_review": "314;279;725;306", "wc_reply_reviewers": "23;22;78;36", "wc_reply_authors": "8;5;8;61", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 126.5, 54.76540879058605 ], "wc_strengths_avg": [ 65.75, 55.647888549342106 ], "wc_weaknesses_avg": [ 90.0, 77.57899200170108 ], "wc_questions_avg": [ 119.75, 115.24620384203551 ], "wc_limitations_avg": [ 4.0, 5.196152422706632 ], "wc_review_avg": [ 406.0, 184.63071250471845 ], "wc_reply_reviewers_avg": [ 39.75, 22.76373211931646 ], "wc_reply_authors_avg": [ 20.5, 23.41473894793619 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14206303247466029652&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "tue.nl;chalmers.se;uva.nl", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Eindhoven University of Technology;Chalmers University of Technology;University of Amsterdam", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tue.nl;https://www.chalmers.se;https://www.uva.nl", "aff_unique_abbr": "TU/e;Chalmers;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Netherlands;Sweden" }, { "title": "Structured State Space Models for In-Context Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72850", "id": "4W9FVg1j6I", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/92d3d2a9801211ca3693ccb2faa1316f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4W9FVg1j6I", "openreview": "https://openreview.net/forum?id=4W9FVg1j6I", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72850", "video": "https://nips.cc/virtual/2023/poster/72850", "author_site": "Chris Lu, Yannick Schroecker, Albert Gu, Emilio Parisotto, Jakob Foerster, Satinder Singh, Feryal Behbahani", "tldr": "", "abstract": "Structured state space sequence (S4) models have recently achieved state-of-the-art performance on long-range sequence modeling tasks. These models also have fast inference speeds and parallelisable training, making them potentially useful in many reinforcement learning settings. We propose a modification to a variant of S4 that enables us to initialise and reset the hidden state in parallel, allowing us to tackle reinforcement learning tasks. We show that our modified architecture runs asymptotically faster than Transformers in sequence length and performs better than RNN's on a simple memory-based task. We evaluate our modified architecture on a set of partially-observable environments and find that, in practice, our model outperforms RNN's while also running over five times faster. Then, by leveraging the model\u2019s ability to handle long-range sequences, we achieve strong performance on a challenging meta-learning task in which the agent is given a randomly-sampled continuous control environment, combined with a randomly-sampled linear projection of the environment's observations and actions. Furthermore, we show the resulting model can adapt to out-of-distribution held-out tasks. Overall, the results presented in this paper show that structured state space models are fast and performant for in-context reinforcement learning tasks. We provide code at https://github.com/luchris429/s5rl.", "keywords": "Reinforcement Learning;Meta-Learning;State Space Models", "primary_area": "", "supplementary_material": "/attachment/28dde0e402e34d8bf61f59cb22fbf7495a3c2aa8.zip", "author": "Chris Lu;Yannick Schroecker;Albert Gu;Emilio Parisotto;Jakob Nicolaus Foerster;Satinder Singh;Feryal Behbahani", "authorids": "~Chris_Lu1;~Yannick_Schroecker1;~Albert_Gu1;~Emilio_Parisotto1;~Jakob_Nicolaus_Foerster1;~Satinder_Singh2;~Feryal_Behbahani1", "gender": ";M;M;M;M;F;", "homepage": ";;;;https://www.jakobfoerster.com;https://feryal.github.io;", "dblp": "77/9579;180/1434;130/0612;https://dblp.uni-trier.de/pers/hd/p/Parisotto:Emilio;176/5095;;", "google_scholar": "4WLoIRsAAAAJ;dNqsv5MAAAAJ;DVCHv1kAAAAJ;;6z4lQzMAAAAJ;;", "orcid": ";;0000-0002-4946-6042;;;;", "linkedin": ";;;;;;", "or_profile": "~Chris_Lu1;~Yannick_Schroecker1;~Albert_Gu1;~Emilio_Parisotto1;~Jakob_Nicolaus_Foerster1;~Feryal_Behbahani1;~Satinder_Baveja2", "aff": "University of Oxford;Google DeepMind;Carnegie Mellon University;Department of Computer Science, University of Toronto;University of Oxford, University of Oxford;Google DeepMind;Google DeepMind", "aff_domain": "ox.ac.uk;google.com;cmu.edu;cs.toronto.edu;eng.ox.ac.uk;google.com;google.com", "position": "PhD student;Research Scientist;Assistant Professor;PhD student;Associate Professor;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nlu2023structured,\ntitle={Structured State Space Models for In-Context Reinforcement Learning},\nauthor={Chris Lu and Yannick Schroecker and Albert Gu and Emilio Parisotto and Jakob Nicolaus Foerster and Satinder Singh and Feryal Behbahani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4W9FVg1j6I}\n}", "github": "", "project": "", "reviewers": "R3ow;Zjtv;gUv9;R54Y", "pdf_size": 1399103, "rating": "5;5;7;7", "confidence": "3;3;3;4", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "2;1;3;3", "wc_summary": "131;82;125;60", "wc_strengths": "65;79;73;196", "wc_weaknesses": "178;243;220;330", "wc_questions": "104;426;98;98", "wc_limitations": "46;31;18;38", "wc_review": "524;861;534;722", "wc_reply_reviewers": "64;25;181;246", "wc_reply_authors": "653;62;688;394", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 99.5, 29.618406439239774 ], "wc_strengths_avg": [ 103.25, 53.77906191074738 ], "wc_weaknesses_avg": [ 242.75, 55.50394130149678 ], "wc_questions_avg": [ 181.5, 141.18339137448143 ], "wc_limitations_avg": [ 33.25, 10.280442597476044 ], "wc_review_avg": [ 660.25, 140.19339321094986 ], "wc_reply_reviewers_avg": [ 129.0, 88.64818103040807 ], "wc_reply_authors_avg": [ 449.25, 250.7642069753975 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 106, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14984513224785183353&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ox.ac.uk;google.com;cmu.edu;cs.toronto.edu;eng.ox.ac.uk;google.com;google.com", "author_num": 7, "aff_unique_index": "0;1;2;3;0;1;1", "aff_unique_norm": "University of Oxford;Google;Carnegie Mellon University;University of Toronto", "aff_unique_dep": ";Google DeepMind;;Department of Computer Science", "aff_unique_url": "https://www.ox.ac.uk;https://deepmind.com;https://www.cmu.edu;https://www.utoronto.ca", "aff_unique_abbr": "Oxford;DeepMind;CMU;U of T", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;1;2;0;0;0", "aff_country_unique": "United Kingdom;United States;Canada" }, { "title": "Learning Sample Difficulty from Pre-trained Models for Reliable Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72849", "id": "4WPhXYMK6N", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/50251f54848a433f3e47ae3b7cbded53-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4WPhXYMK6N", "openreview": "https://openreview.net/forum?id=4WPhXYMK6N", "poster": "/media/PosterPDFs/NeurIPS%202023/72849.png?t=1700031201.193857", "slides": "https://nips.cc/virtual/2023/poster/72849", "video": "https://nips.cc/virtual/2023/poster/72849", "author_site": "Peng Cui, Dan Zhang, Zhijie Deng, Yinpeng Dong, Jun Zhu", "tldr": "", "abstract": "Large-scale pre-trained models have achieved remarkable success in many applications, but how to leverage them to improve the prediction reliability of downstream models is undesirably under-explored. Moreover, modern neural networks have been found to be poorly calibrated and make overconfident predictions regardless of inherent sample difficulty and data uncertainty. To address this issue, we propose to utilize large-scale pre-trained models to guide downstream model training with sample difficulty-aware entropy regularization. Pre-trained models that have been exposed to large-scale datasets and do not overfit the downstream training classes enable us to measure each training sample\u2019s difficulty via feature-space Gaussian modeling and relative Mahalanobis distance computation. Importantly, by adaptively penalizing overconfident prediction based on the sample difficulty, we simultaneously improve accuracy and uncertainty calibration across challenging benchmarks (e.g., +0.55% ACC and \u22123.7% ECE on ImageNet1k using ResNet34), consistently surpassing competitive baselines for reliable prediction. The improved uncertainty estimate further improves selective classification (abstaining from erroneous predictions) and out-of-distribution detection.", "keywords": "uncertainty calibration;sample difficulty;reliable prediction", "primary_area": "", "supplementary_material": "/attachment/0c7cfdae12b0598ad84253c3cae99565cea8424d.zip", "author": "Peng Cui;Dan Zhang;Zhijie Deng;Yinpeng Dong;Jun Zhu", "authorids": "~Peng_Cui6;~Dan_Zhang1;~Zhijie_Deng1;~Yinpeng_Dong2;~Jun_Zhu2", "gender": "M;;M;M;M", "homepage": "https://scholar.google.com/citations?user=c_VTs5MAAAAJ&hl=zh-CN;;https://thudzj.github.io/;https://dongyp13.github.io;http://ml.cs.tsinghua.edu.cn/~jun", "dblp": "31/891-7;21/802-17;209/4959;183/0980;50/2644-1", "google_scholar": "c_VTs5MAAAAJ;https://scholar.google.de/citations?user=yazO-mMAAAAJ;J3dR0sUAAAAJ;6_4ad84AAAAJ;axsP38wAAAAJ", "orcid": ";0000-0003-0930-9162;0000-0002-0932-1631;;", "linkedin": ";;;;", "or_profile": "~Peng_Cui6;~Dan_Zhang1;~Zhijie_Deng1;~Yinpeng_Dong2;~Jun_Zhu2", "aff": "Tsinghua University;Robert Bosch GmbH, Bosch;Shanghai Jiaotong University;Tsinghua University;Tsinghua University", "aff_domain": "cs.tsinghua.edu.cn;de.bosch.com;sjtu.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn", "position": "PhD student;Research Scientist;Assistant Professor;Postdoc;Professor", "bibtex": "@inproceedings{\ncui2023learning,\ntitle={Learning Sample Difficulty from Pre-trained Models for Reliable Prediction},\nauthor={Peng Cui and Dan Zhang and Zhijie Deng and Yinpeng Dong and Jun Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4WPhXYMK6N}\n}", "github": "", "project": "", "reviewers": "Ye8r;Lxfs;YeMZ;FmxN;93FC", "pdf_size": 2588347, "rating": "5;6;6;6;7", "confidence": "3;4;4;4;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;2;3", "presentation": "3;4;3;3;3", "wc_summary": "56;75;55;154;85", "wc_strengths": "70;114;133;135;46", "wc_weaknesses": "98;113;145;48;179", "wc_questions": "142;32;114;20;139", "wc_limitations": "7;1;18;35;12", "wc_review": "373;335;465;392;461", "wc_reply_reviewers": "99;99;0;0;62", "wc_reply_authors": "30;111;0;0;36", "reply_reviewers": "1;1;0;0;1", "reply_authors": "2;2;1;1;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 85.0, 36.33730865102698 ], "wc_strengths_avg": [ 99.6, 35.56740080466943 ], "wc_weaknesses_avg": [ 116.6, 44.193212148473656 ], "wc_questions_avg": [ 89.4, 52.807575214167905 ], "wc_limitations_avg": [ 14.6, 11.637869220780924 ], "wc_review_avg": [ 405.2, 50.653331578485535 ], "wc_reply_reviewers_avg": [ 52.0, 44.55558326405345 ], "wc_reply_authors_avg": [ 35.4, 40.623146111545815 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7905694150420949, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1227374495836468420&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cs.tsinghua.edu.cn;de.bosch.com;sjtu.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Tsinghua University;Robert Bosch GmbH;Shanghai Jiao Tong University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.bosch.com;https://www.sjtu.edu.cn", "aff_unique_abbr": "THU;Bosch;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Germany" }, { "title": "Breaking the Communication-Privacy-Accuracy Tradeoff with $f$-Differential Privacy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72848", "id": "4ZaPpVDjGQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ead13878cd158f013becb6a559a60364-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4ZaPpVDjGQ", "openreview": "https://openreview.net/forum?id=4ZaPpVDjGQ", "poster": "/media/PosterPDFs/NeurIPS%202023/72848.png?t=1699929912.835852", "slides": "https://nips.cc/virtual/2023/poster/72848", "video": "https://nips.cc/virtual/2023/poster/72848", "author_site": "Richeng Jin, Zhonggen Su, caijun zhong, Zhaoyang Zhang, Tony Quek, Huaiyu Dai", "tldr": "", "abstract": "We consider a federated data analytics problem in which a server coordinates the collaborative data analysis of multiple users with privacy concerns and limited communication capability. The commonly adopted compression schemes introduce information loss into local data while improving communication efficiency, and it remains an open problem whether such discrete-valued mechanisms provide any privacy protection. In this paper, we study the local differential privacy guarantees of discrete-valued mechanisms with finite output space through the lens of $f$-differential privacy (DP). More specifically, we advance the existing literature by deriving tight $f$-DP guarantees for a variety of discrete-valued mechanisms, including the binomial noise and the binomial mechanisms that are proposed for privacy preservation, and the sign-based methods that are proposed for data compression, in closed-form expressions. We further investigate the amplification in privacy by sparsification and propose a ternary stochastic compressor. By leveraging compression for privacy amplification, we improve the existing methods by removing the dependency of accuracy (in terms of mean square error) on communication cost in the popular use case of distributed mean estimation, therefore breaking the three-way tradeoff between privacy, communication, and accuracy.", "keywords": "Differential privacy;federated data analytics;discrete valued-mechanism;distributed mean estimation", "primary_area": "", "supplementary_material": "/attachment/db9207b72facc1e9ccfa908e03cb1afb61367e07.zip", "author": "Richeng Jin;Zhonggen Su;Caijun Zhong;Zhaoyang Zhang;Tony Quek;Huaiyu Dai", "authorids": "~Richeng_Jin1;suzhonggen@zju.edu.cn;caijunzhong@zju.edu.cn;~Zhaoyang_Zhang4;~Tony_Quek1;~Huaiyu_Dai2", "gender": "M;;;M;M;M", "homepage": ";;;http://person.zju.edu.cn/en/zhaoyangzhang;https://people.sutd.edu.sg/~tonyquek/;https://ece.ncsu.edu/people/hdai/", "dblp": "194/6950;;;;65/1128;09/5360.html", "google_scholar": "gZXBWgEAAAAJ;;;xdQSx54AAAAJ;https://scholar.google.com.tw/citations?user=0o1tkokAAAAJ;HOSH65oAAAAJ", "orcid": ";;;;0000-0002-4037-3149;", "linkedin": ";;;;;", "or_profile": "~Richeng_Jin1;suzhonggen@zju.edu.cn;caijunzhong@zju.edu.cn;~Zhaoyang_Zhang4;~Tony_Quek1;~Huaiyu_Dai2", "aff": "Zhejiang University;;;Zhejiang University;Singapore University of Technology and Design;North Carolina State University", "aff_domain": "zju.edu.cn;;;zju.edu.cn;sutd.edu.sg;ncsu.edu", "position": "Assistant Professor;;;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\njin2023breaking,\ntitle={Breaking the Communication-Privacy-Accuracy Tradeoff with \\$f\\$-Differential Privacy},\nauthor={Richeng Jin and Zhonggen Su and Caijun Zhong and Zhaoyang Zhang and Tony Quek and Huaiyu Dai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4ZaPpVDjGQ}\n}", "github": "", "project": "", "reviewers": "mVNo;Mzdj;191s;VYNX", "pdf_size": 539614, "rating": "3;4;6;7", "confidence": "3;2;4;2", "soundness": "3;3;3;3", "novelty": "1;2;3;3", "presentation": "3;2;3;3", "wc_summary": "55;64;145;216", "wc_strengths": "26;33;56;36", "wc_weaknesses": "283;123;6;24", "wc_questions": "149;30;53;48", "wc_limitations": "1;16;1;8", "wc_review": "514;266;261;332", "wc_reply_reviewers": "39;25;0;19", "wc_reply_authors": "475;803;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;3;1;1", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 120.0, 65.57819759645731 ], "wc_strengths_avg": [ 37.75, 11.143944544011335 ], "wc_weaknesses_avg": [ 109.0, 109.89312990355675 ], "wc_questions_avg": [ 70.0, 46.40581860068843 ], "wc_limitations_avg": [ 6.5, 6.18465843842649 ], "wc_review_avg": [ 343.25, 102.48749923771192 ], "wc_reply_reviewers_avg": [ 20.75, 14.00669482783144 ], "wc_reply_authors_avg": [ 319.5, 339.8944689164565 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18406942056644600866&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "zju.edu.cn;;;zju.edu.cn;sutd.edu.sg;ncsu.edu", "author_num": 6, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Zhejiang University;Singapore University of Technology and Design;North Carolina State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.sutd.edu.sg;https://www.ncsu.edu", "aff_unique_abbr": "ZJU;SUTD;NCSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "China;Singapore;United States" }, { "title": "What Makes Data Suitable for a Locally Connected Neural Network? A Necessary and Sufficient Condition Based on Quantum Entanglement.", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72847", "id": "4aIpgq1nuI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/808a79d149c9dd8338d789881c9dab4c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4aIpgq1nuI", "openreview": "https://openreview.net/forum?id=4aIpgq1nuI", "poster": "/media/PosterPDFs/NeurIPS%202023/72847.png?t=1702069320.0909808", "slides": "https://nips.cc/virtual/2023/poster/72847", "video": "https://nips.cc/virtual/2023/poster/72847", "author_site": "\u202aYotam Alexander\u202c\u200f, Nimrod De La Vega, Noam Razin, Nadav Cohen", "tldr": "", "abstract": "The question of what makes a data distribution suitable for deep learning is a fundamental open problem. Focusing on locally connected neural networks (a prevalent family of architectures that includes convolutional and recurrent neural networks as well as local self-attention models), we address this problem by adopting theoretical tools from quantum physics. Our main theoretical result states that a certain locally connected neural network is capable of accurate prediction over a data distribution if and only if the data distribution admits low quantum entanglement under certain canonical partitions of features. As a practical application of this result, we derive a preprocessing method for enhancing the suitability of a data distribution to locally connected neural networks. Experiments with widespread models over various datasets demonstrate our findings. We hope that our use of quantum entanglement will encourage further adoption of tools from physics for formally reasoning about the relation between deep learning and real-world data.", "keywords": "Deep Learning;Locally Connected Neural Networks;Data Distributions;Quantum Entanglement;Tensor Networks", "primary_area": "", "supplementary_material": "/attachment/56f13b78aee1749116ae441c18f1e5427c65b955.zip", "author": "Yotam Alexander;Nimrod De La Vega;Noam Razin;Nadav Cohen", "authorids": "~Yotam_Alexander1;~Nimrod_De_La_Vega2;~Noam_Razin1;~Nadav_Cohen1", "gender": "M;M;M;M", "homepage": "https://www.cohennadav.com/group.html;https://en-exact-sciences.tau.ac.il/computer;https://noamrazin.github.io/;http://www.cohennadav.com", "dblp": "342/9141.html;342/8930;247/1241;119/7155", "google_scholar": ";;tDsd50oAAAAJ;AfLwLQ0AAAAJ", "orcid": ";;;", "linkedin": ";;;cohennadav/", "or_profile": "~Yotam_Alexander1;~Nimrod_De_La_Vega2;~Noam_Razin1;~Nadav_Cohen1", "aff": "Tel Aviv University, Tel Aviv University;School of Computer Science, Tel Aviv University;Tel Aviv University;School of Computer Science, Tel Aviv University", "aff_domain": "tauex.tau.ac.il;cs.tau.ac.il;tau.ac.il;cs.tau.ac.il", "position": "PhD student;MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nalexander2023what,\ntitle={What Makes Data Suitable for a Locally Connected Neural Network? A Necessary and Sufficient Condition Based on Quantum Entanglement.},\nauthor={Yotam Alexander and Nimrod De La Vega and Noam Razin and Nadav Cohen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4aIpgq1nuI}\n}", "github": "", "project": "", "reviewers": "AvgR;jhrZ;SJKu;tJGs;CJsr", "pdf_size": 1245529, "rating": "6;6;6;7;8", "confidence": "3;3;3;3;3", "soundness": "2;2;3;4;3", "novelty": "3;3;3;4;4", "presentation": "3;3;2;3;3", "wc_summary": "66;59;96;221;120", "wc_strengths": "35;55;47;117;48", "wc_weaknesses": "26;105;21;199;136", "wc_questions": "126;185;98;26;11", "wc_limitations": "0;1;2;1;1", "wc_review": "253;405;264;564;316", "wc_reply_reviewers": "15;20;15;0;28", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 112.4, 58.52384129566342 ], "wc_strengths_avg": [ 60.4, 29.021371435547287 ], "wc_weaknesses_avg": [ 97.4, 67.53547216093185 ], "wc_questions_avg": [ 89.2, 64.37204362143554 ], "wc_limitations_avg": [ 1.0, 0.6324555320336759 ], "wc_review_avg": [ 360.4, 115.10099912685381 ], "wc_reply_reviewers_avg": [ 15.6, 9.13454979733539 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10183941173895387755&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "tauex.tau.ac.il;cs.tau.ac.il;tau.ac.il;cs.tau.ac.il", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tel Aviv University", "aff_unique_dep": "", "aff_unique_url": "https://www.tau.ac.il", "aff_unique_abbr": "TAU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Tel Aviv;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Israel" }, { "title": "Likelihood Ratio Confidence Sets for Sequential Decision Making", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72846", "id": "4anryczeED", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5491280797f3192b895bce84eb83df8d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4anryczeED", "openreview": "https://openreview.net/forum?id=4anryczeED", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72846", "video": "https://nips.cc/virtual/2023/poster/72846", "author_site": "Nicolas Emmenegger, Mojmir Mutny, Andreas Krause", "tldr": "", "abstract": "Certifiable, adaptive uncertainty estimates for unknown quantities are an essential ingredient of sequential decision-making algorithms. Standard approaches rely on problem-dependent concentration results and are limited to a specific combination of parameterization, noise family, and estimator. In this paper, we revisit the likelihood-based inference principle and propose to use \\emph{likelihood ratios} to construct \\emph{any-time valid} confidence sequences without requiring specialized treatment in each application scenario. Our method is especially suitable for problems with well-specified likelihoods, and the resulting sets always maintain the prescribed coverage in a model-agnostic manner. The size of the sets depends on a choice of estimator sequence in the likelihood ratio. We discuss how to provably choose the best sequence of estimators and shed light on connections to online convex optimization with algorithms such as Follow-the-Regularized-Leader. To counteract the initially large bias of the estimators, we propose a reweighting scheme that also opens up deployment in non-parametric settings such as RKHS function classes. We provide a \\emph{non-asymptotic} analysis of the likelihood ratio confidence sets size for generalized linear models, using insights from convex duality and online learning. We showcase the practical strength of our method on generalized linear bandit problems, survival analysis, and bandits with various additive noise distributions.", "keywords": "confidence sets;uncertainty quantification;bandits;active learning;testing", "primary_area": "", "supplementary_material": "/attachment/dbb45b5f45d0e5e7ea445ea279a87cf3c8070131.pdf", "author": "Nicolas Emmenegger;Mojmir Mutny;Andreas Krause", "authorids": "~Nicolas_Emmenegger1;~Mojmir_Mutny1;~Andreas_Krause1", "gender": "Not Specified;M;M", "homepage": ";;https://las.inf.ethz.ch/krausea", "dblp": ";173/5114;87/1831-1.html", "google_scholar": "_Ngo54EAAAAJ;;https://scholar.google.ch/citations?user=eDHv58AAAAAJ", "orcid": ";;0000-0001-7260-9673", "linkedin": ";;krausea/", "or_profile": "~Nicolas_Emmenegger1;~Mojmir_Mutny1;~Andreas_Krause1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;Swiss Federal Institute of Technology;ETH Zurich", "aff_domain": "inf.ethz.ch;ethz.ch;ethz.ch", "position": "MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nemmenegger2023likelihood,\ntitle={Likelihood Ratio Confidence Sets for Sequential Decision Making},\nauthor={Nicolas Emmenegger and Mojmir Mutny and Andreas Krause},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4anryczeED}\n}", "github": "", "project": "", "reviewers": "XFVf;4g2j;DGax;CfDQ", "pdf_size": 1420770, "rating": "6;6;6;8", "confidence": "3;4;3;3", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;1;3;4", "wc_summary": "76;311;73;206", "wc_strengths": "13;32;81;54", "wc_weaknesses": "185;82;111;16", "wc_questions": "1;312;57;110", "wc_limitations": "1;1;5;4", "wc_review": "276;738;327;390", "wc_reply_reviewers": "39;67;139;61", "wc_reply_authors": "0;29;294;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 166.5, 99.21315436977095 ], "wc_strengths_avg": [ 45.0, 25.347583711273153 ], "wc_weaknesses_avg": [ 98.5, 60.656821545478294 ], "wc_questions_avg": [ 120.0, 117.36055555424062 ], "wc_limitations_avg": [ 2.75, 1.7853571071357126 ], "wc_review_avg": [ 432.75, 180.80289682413832 ], "wc_reply_reviewers_avg": [ 76.5, 37.559952076646745 ], "wc_reply_authors_avg": [ 80.75, 123.68786318794581 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15931853653833935088&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "inf.ethz.ch;ethz.ch;ethz.ch", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Benchmarking Robustness of Adaptation Methods on Pre-trained Vision-Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73702", "id": "4d8dO5sAeM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a2a544e43acb8b954dc5846ff0d77ad5-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=4d8dO5sAeM", "openreview": "https://openreview.net/forum?id=4d8dO5sAeM", "poster": "/media/PosterPDFs/NeurIPS%202023/73702.png?t=1701941756.1838522", "slides": "https://nips.cc/virtual/2023/poster/73702", "video": "https://nips.cc/virtual/2023/poster/73702", "author_site": "Shuo Chen, Jindong Gu, Zhen Han, Yunpu Ma, Philip Torr, Volker Tresp", "tldr": "", "abstract": "Various adaptation methods, such as LoRA, prompts, and adapters, have been proposed to enhance the performance of pre-trained vision-language models in specific domains. As test samples in real-world applications usually differ from adaptation data, the robustness of these adaptation methods against distribution shifts are essential. In this study, we assess the robustness of 11 widely-used adaptation methods across 4 vision-language datasets under multimodal corruptions. Concretely, we introduce 7 benchmark datasets, including 96 visual and 87 textual corruptions, to investigate the robustness of different adaptation methods, the impact of available adaptation examples, and the influence of trainable parameter size during adaptation. Our analysis reveals that: 1) Adaptation methods are more sensitive to text corruptions than visual corruptions. 2) Full fine-tuning does not consistently provide the highest robustness; instead, adapters can achieve better robustness with comparable clean performance. 3) Contrary to expectations, our findings indicate that increasing the number of adaptation data and parameters does not guarantee enhanced robustness; instead, it results in even lower robustness. We hope this study could benefit future research in the development of robust multimodal adaptation methods. The benchmark, code, and dataset used in this study can be accessed at https://adarobustness.github.io.", "keywords": "Robustness Benchmark;Multimodal Foundation Models;Model Adaptation", "primary_area": "", "supplementary_material": "/attachment/eecd8682014b770204d1558cc0b40388fa2ca194.pdf", "author": "Shuo Chen;Jindong Gu;Zhen Han;Yunpu Ma;Philip Torr;Volker Tresp", "authorids": "~Shuo_Chen12;~Jindong_Gu1;~Zhen_Han3;~Yunpu_Ma1;~Philip_Torr1;~Volker_Tresp1", "gender": "M;;M;M;;M", "homepage": "https://chenxshuo.github.io;;https://sites.google.com/view/zhenhan/home;https://dblp.org/pid/199/8143.html;http://www.robots.ox.ac.uk/~tvg/;https://www.dbs.ifi.lmu.de/~tresp/", "dblp": "00/6472-14;;;199/8143.html;;t/VolkerTresp", "google_scholar": "BKvdGiwAAAAJ;;HMdgrwoAAAAJ;fj5DzgcAAAAJ;;xIJHTUwAAAAJ", "orcid": "0000-0001-7305-3793;;;;;0000-0001-9428-3686", "linkedin": ";;zhen-han-08a769128/;yunpu-ma-05a9b41b0/?originalSubdomain=de;;volker-tresp-8110a118/", "or_profile": "~Shuo_Chen12;~Jindong_Gu1;~Zhen_Han3;~Yunpu_Ma1;~Philip_Torr1;~Volker_Tresp1", "aff": "University of Munich, Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;;Amazon;Institut f\u00fcr Informatik;University of Oxford;Siemens Corporate Research", "aff_domain": "campus.lmu.de;;amazon.com;lmu.de;ox.ac.uk;siemens.com", "position": "PhD student;;Researcher;Principal Researcher;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nchen2023benchmarking,\ntitle={Benchmarking Robustness of Adaptation Methods on Pre-trained Vision-Language Models},\nauthor={Shuo Chen and Jindong Gu and Zhen Han and Yunpu Ma and Philip Torr and Volker Tresp},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=4d8dO5sAeM}\n}", "github": "", "project": "", "reviewers": "gdTB;UNok;rME1;Ssg9;Npuc", "pdf_size": 3187126, "rating": "4;5;6;7;7", "confidence": "5;5;3;3;3", "wc_summary_and_contributions": "22;50;52;160;70", "wc_strengths": "39;37;33;164;115", "wc_improvement": "443;190;136;574;104", "wc_limitations": "53;18;12;36;16", "wc_correctness": "5;1;1;39;28", "wc_clarity": "8;1;24;82;12", "wc_relation_to_prior_work": "5;1;21;61;24", "wc_documentation": "8;1;22;83;14", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "584;300;302;1200;384", "wc_reply_reviewers": "487;75;18;126;14", "wc_reply_authors": "2833;1311;636;1253;354", "reply_reviewers": "1;1;1;1;1", "reply_authors": "6;4;1;3;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "wc_summary_and_contributions_avg": [ 70.8, 47.16948165922538 ], "wc_strengths_avg": [ 77.6, 52.898393170303386 ], "wc_improvement_avg": [ 289.4, 185.674553991655 ], "wc_limitations_avg": [ 27.0, 15.388307249337076 ], "wc_correctness_avg": [ 14.8, 15.727682601069999 ], "wc_clarity_avg": [ 25.4, 29.268413007882746 ], "wc_relation_to_prior_work_avg": [ 22.4, 21.237702323933256 ], "wc_documentation_avg": [ 25.6, 29.520162601178196 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 554.0, 339.15660099723846 ], "wc_reply_reviewers_avg": [ 144.0, 176.35759127409287 ], "wc_reply_authors_avg": [ 1277.4, 858.565920590842 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 1.8973665961010275 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9101820546182064, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=581273864485120180&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "campus.lmu.de;;amazon.com;lmu.de;ox.ac.uk;siemens.com", "author_num": 6, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Amazon;Institut f\u00fcr Informatik;University of Oxford;Siemens AG", "aff_unique_dep": ";Amazon.com, Inc.;Department of Computer Science;;Corporate Research", "aff_unique_url": "https://www.lmu.de;https://www.amazon.com;;https://www.ox.ac.uk;https://www.siemens.com/research", "aff_unique_abbr": "LMU;Amazon;;Oxford;Siemens", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0", "aff_country_unique": "Germany;United States;United Kingdom" }, { "title": "Knowledge-based in silico models and dataset for the comparative evaluation of mammography AI for a range of breast characteristics, lesion conspicuities and doses", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73701", "id": "4dsMX3RnF0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/75d0956c9594f47bfb86a07bef58d4b0-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=4dsMX3RnF0", "openreview": "https://openreview.net/forum?id=4dsMX3RnF0", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73701", "video": "https://nips.cc/virtual/2023/poster/73701", "author_site": "Elena Sizikova, Niloufar Saharkhiz, Diksha Sharma, Miguel Lago, Berkman Sahiner, Jana Delfino, Aldo Badano", "tldr": "", "abstract": "To generate evidence regarding the safety and efficacy of artificial intelligence (AI) enabled medical devices, AI models need to be evaluated on a diverse population of patient cases, some of which may not be readily available. We propose an evaluation approach for testing medical imaging AI models that relies on in silico imaging pipelines in which stochastic digital models of human anatomy (in object space) with and without pathology are imaged using a digital replica imaging acquisition system to generate realistic synthetic image datasets. Here, we release M-SYNTH, a dataset of cohorts with four breast fibroglandular density distributions imaged at different exposure levels using Monte Carlo x-ray simulations with the publicly available Virtual Imaging Clinical Trial for Regulatory Evaluation (VICTRE) toolkit. We utilize the synthetic dataset to analyze AI model performance and find that model performance decreases with increasing breast density and increases with higher mass density, as expected. As exposure levels decrease, AI model performance drops with the highest performance achieved at exposure levels lower than the nominal recommended dose for the breast type.", "keywords": "synthetic data; medical imaging; AI testing; digital twins", "primary_area": "", "supplementary_material": "/attachment/7392144d66579edbfb7028e782f561e4aa5a6711.pdf", "author": "Elena Sizikova;Niloufar Saharkhiz;Diksha Sharma;Miguel Lago;Berkman Sahiner;Jana Gut Delfino;Aldo Badano", "authorids": "~Elena_Sizikova1;~Niloufar_Saharkhiz1;~Diksha_Sharma1;~Miguel_Lago1;~Berkman_Sahiner1;~Jana_Gut_Delfino1;~Aldo_Badano1", "gender": "F;F;F;;M;F;M", "homepage": "https://elenasizikova.github.io;;;;;;", "dblp": "123/6103;291/7003;;;29/6637;359/6527;62/6585", "google_scholar": "https://scholar.google.com/citations?hl=en;qCb6TTUAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;bBkESg0AAAAJ;N_NpArsAAAAJ", "orcid": ";0000-0002-1656-6282;;;0000-0003-2804-2264;0000-0001-9957-2866;0000-0003-3712-6670", "linkedin": ";niloufar-saharkhiz;;milaan;;janadelfino/;aldobadano/", "or_profile": "~Elena_Sizikova1;~Niloufar_Saharkhiz1;~Diksha_Sharma1;~Miguel_Lago1;~Berkman_Sahiner1;~Jana_Gut_Delfino1;~Aldo_Badano1", "aff": "Food and Drug Administration;Columbia University;FDA;US Food and Drug Administration;US Food and Drug Administration;US Food and Drug Administration;Food and Drug Administration", "aff_domain": "fda.hhs.gov;columbia.edu;fda.gov;fda.gov;fda.hhs.gov;fda.gov;fda.gov", "position": "Researcher;Postdoc;Researcher;Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nsizikova2023knowledgebased,\ntitle={Knowledge-based in silico models and dataset for the comparative evaluation of mammography {AI} for a range of breast characteristics, lesion conspicuities and doses},\nauthor={Elena Sizikova and Niloufar Saharkhiz and Diksha Sharma and Miguel Lago and Berkman Sahiner and Jana Gut Delfino and Aldo Badano},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=4dsMX3RnF0}\n}", "github": "", "project": "", "reviewers": "6Vc7;ctVH;gzvQ", "pdf_size": 1414414, "rating": "4;8;8", "confidence": "3;3;3", "wc_summary_and_contributions": "143;80;73", "wc_strengths": "116;172;179", "wc_improvement": "57;185;34", "wc_limitations": "6;82;72", "wc_correctness": "14;36;24", "wc_clarity": "8;3;15", "wc_relation_to_prior_work": "1;1;41", "wc_documentation": "1;16;20", "wc_additional_feedback": "1;1;1", "wc_review": "347;576;459", "wc_reply_reviewers": "6;2;0", "wc_reply_authors": "200;769;930", "reply_reviewers": "1;1;0", "reply_authors": "1;1;2", "rating_avg": [ 6.666666666666667, 1.8856180831641267 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 98.66666666666667, 31.47838764754143 ], "wc_strengths_avg": [ 155.66666666666666, 28.193773938387334 ], "wc_improvement_avg": [ 92.0, 66.42790578263526 ], "wc_limitations_avg": [ 53.333333333333336, 33.717782977071444 ], "wc_correctness_avg": [ 24.666666666666668, 8.993825042154695 ], "wc_clarity_avg": [ 8.666666666666666, 4.921607686744467 ], "wc_relation_to_prior_work_avg": [ 14.333333333333334, 18.856180831641264 ], "wc_documentation_avg": [ 12.333333333333334, 8.178562764256865 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 460.6666666666667, 93.4962863195943 ], "wc_reply_reviewers_avg": [ 2.6666666666666665, 2.494438257849294 ], "wc_reply_authors_avg": [ 633.0, 313.1527848617455 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12492585538778803866&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "fda.hhs.gov;columbia.edu;fda.gov;fda.gov;fda.hhs.gov;fda.gov;fda.gov", "author_num": 7, "aff_unique_index": "0;1;0;0;0;0;0", "aff_unique_norm": "Food and Drug Administration;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.fda.gov;https://www.columbia.edu", "aff_unique_abbr": "FDA;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Importance Weighted Actor-Critic for Optimal Conservative Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72845", "id": "4e0NJbkkd8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9bb93a3c1a424654aaea6f5b594e94d5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4e0NJbkkd8", "openreview": "https://openreview.net/forum?id=4e0NJbkkd8", "poster": "/media/PosterPDFs/NeurIPS%202023/72845.png?t=1701931809.8829694", "slides": "https://nips.cc/virtual/2023/poster/72845", "video": "https://nips.cc/virtual/2023/poster/72845", "author_site": "Hanlin Zhu, Paria Rashidinejad, Jiantao Jiao", "tldr": "", "abstract": "We propose A-Crab (Actor-Critic Regularized by Average Bellman error), a new practical algorithm for offline reinforcement learning (RL) in complex environments with insufficient data coverage. Our algorithm combines the marginalized importance sampling framework with the actor-critic paradigm, where the critic returns evaluations of the actor (policy) that are pessimistic relative to the offline data and have a small average (importance-weighted) Bellman error. Compared to existing methods, our algorithm simultaneously offers a number of advantages:\n(1) It achieves the optimal statistical rate of $1/\\sqrt{N}$---where $N$ is the size of offline dataset---in converging to the best policy covered in the offline dataset, even when combined with general function approximators.\n(2) It relies on a weaker \\textit{average} notion of policy coverage (compared to the $\\ell_\\infty$ single-policy concentrability) that exploits the structure of policy visitations.\n(3) It outperforms the data-collection behavior policy over a wide range of specific hyperparameters.\n We provide both theoretical analysis and experimental results to validate the effectiveness of our proposed algorithm. The code is available at https://github.com/zhuhl98/ACrab.", "keywords": "offline RL;actor-critic;l_2 single-policy concentrability;average bellman error", "primary_area": "", "supplementary_material": "/attachment/0da91f6b85b4c8cda1053df0ca79299363fcd3c2.pdf", "author": "Hanlin Zhu;Paria Rashidinejad;Jiantao Jiao", "authorids": "~Hanlin_Zhu2;~Paria_Rashidinejad1;~Jiantao_Jiao1", "gender": "M;F;M", "homepage": "https://hanlinzhu.com/;;https://scholar.google.com/citations?user=aO8KpGcAAAAJ&hl=en", "dblp": ";;43/8919", "google_scholar": "yDVn5LEAAAAJ;BgQkdsYAAAAJ;aO8KpGcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Hanlin_Zhu2;~Paria_Rashidinejad1;~Jiantao_Jiao1", "aff": "Electrical Engineering & Computer Science Department, University of California Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "eecs.berkeley.edu;berkeley.edu;berkeley.edu", "position": "PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nzhu2023importance,\ntitle={Importance Weighted Actor-Critic for Optimal Conservative Offline Reinforcement Learning},\nauthor={Hanlin Zhu and Paria Rashidinejad and Jiantao Jiao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4e0NJbkkd8}\n}", "github": "", "project": "", "reviewers": "PQdU;22cc;htrq;W5Wf", "pdf_size": 629345, "rating": "5;6;6;6", "confidence": "2;3;3;3", "soundness": "3;2;4;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "45;88;85;67", "wc_strengths": "46;105;115;97", "wc_weaknesses": "42;148;40;69", "wc_questions": "57;27;38;38", "wc_limitations": "6;75;52;49", "wc_review": "196;443;330;320", "wc_reply_reviewers": "88;37;21;12", "wc_reply_authors": "365;45;62;19", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;3;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 71.25, 17.151894939043906 ], "wc_strengths_avg": [ 90.75, 26.61179249881526 ], "wc_weaknesses_avg": [ 74.75, 43.81423855323746 ], "wc_questions_avg": [ 40.0, 10.793516572461451 ], "wc_limitations_avg": [ 45.5, 24.924887161229034 ], "wc_review_avg": [ 322.25, 87.44248109471734 ], "wc_reply_reviewers_avg": [ 39.5, 29.3981291921782 ], "wc_reply_authors_avg": [ 122.75, 140.6989250136617 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11990223589606115651&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "eecs.berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "Electrical Engineering & Computer Science Department", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Recovering from Out-of-sample States via Inverse Dynamics in Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72844", "id": "4gLWjSaw4o", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7a0f7e9d9b42b26e5bfc9ba4c6e5287c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4gLWjSaw4o", "openreview": "https://openreview.net/forum?id=4gLWjSaw4o", "poster": "/media/PosterPDFs/NeurIPS%202023/72844.png?t=1697090374.5903537", "slides": "https://nips.cc/virtual/2023/poster/72844", "video": "https://nips.cc/virtual/2023/poster/72844", "author_site": "Ke Jiang, Jia-Yu Yao, Xiaoyang Tan", "tldr": "", "abstract": "In this paper we deal with the state distributional shift problem commonly encountered in offline reinforcement learning during test, where the agent tends to take unreliable actions at out-of-sample (unseen) states. Our idea is to encourage the agent to follow the so called state recovery principle when taking actions, i.e., besides long-term return, the immediate consequences of the current action should also be taken into account and those capable of recovering the state distribution of the behavior policy are preferred. For this purpose, an inverse dynamics model is learned and employed to guide the state recovery behavior of the new policy. Theoretically, we show that the proposed method helps aligning the transited state distribution of the new policy with the offline dataset at out-of-sample states, without the need of explicitly predicting the transited state distribution, which is usually difficult in high-dimensional and complicated environments. The effectiveness and feasibility of the proposed method is demonstrated with the state-of-the-art performance on the general offline RL benchmarks.", "keywords": "Offline reinforcement learning;state distributional shift;state recovery;inverse dynamics model", "primary_area": "", "supplementary_material": "/attachment/5e6dee6245491ded80a7c9089c8629c2de39e1fe.pdf", "author": "Ke Jiang;Jia-Yu Yao;Xiaoyang Tan", "authorids": "~Ke_Jiang2;~Jia-Yu_Yao1;~Xiaoyang_Tan2", "gender": "M;M;M", "homepage": "https://jack10843.github.io/;https://parnec.nuaa.edu.cn/2020/0623/c12783a205851/page.htm;http://parnec.nuaa.edu.cn/xtan", "dblp": ";;79/768", "google_scholar": "lJ3AaHoAAAAJ;;rHMtSOYAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ke_Jiang2;~Jia-Yu_Yao1;~Xiaoyang_Tan2", "aff": "Nanjing University of Aeronautics and Astronautics;Peking University;Nanjing University of Aeronautics and Astronautics", "aff_domain": "nuaa.edu.cn;pku.edu.cn;nuaa.edu.cn", "position": "PhD student;RA;Full Professor", "bibtex": "@inproceedings{\njiang2023recovering,\ntitle={Recovering from Out-of-sample States via Inverse Dynamics in Offline Reinforcement Learning},\nauthor={Ke Jiang and Jia-Yu Yao and Xiaoyang Tan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4gLWjSaw4o}\n}", "github": "", "project": "", "reviewers": "kCan;tm1v;tZMd;cN6c", "pdf_size": 2841895, "rating": "5;5;6;7", "confidence": "4;3;4;4", "soundness": "2;3;3;4", "novelty": "2;3;2;3", "presentation": "3;3;3;4", "wc_summary": "72;156;110;98", "wc_strengths": "45;78;70;92", "wc_weaknesses": "70;147;92;314", "wc_questions": "55;148;48;150", "wc_limitations": "18;35;26;35", "wc_review": "260;564;346;689", "wc_reply_reviewers": "239;6;0;254", "wc_reply_authors": "511;21;0;492", "reply_reviewers": "2;1;0;3", "reply_authors": "4;2;1;4", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 109.0, 30.4138126514911 ], "wc_strengths_avg": [ 71.25, 17.07886120325357 ], "wc_weaknesses_avg": [ 155.75, 95.5729433469536 ], "wc_questions_avg": [ 100.25, 48.81790142970097 ], "wc_limitations_avg": [ 28.5, 7.088723439378913 ], "wc_review_avg": [ 464.75, 170.41328440001385 ], "wc_reply_reviewers_avg": [ 124.75, 121.88390993072055 ], "wc_reply_authors_avg": [ 256.0, 245.70409032004332 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3871483011109824866&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nuaa.edu.cn;pku.edu.cn;nuaa.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Nanjing University of Aeronautics and Astronautics;Peking University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nuaa.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "NUAA;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Make the U in UDA Matter: Invariant Consistency Learning for Unsupervised Domain Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72843", "id": "4hYIxI8ds0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5555cc3fb226ed067fa946e35355f938-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4hYIxI8ds0", "openreview": "https://openreview.net/forum?id=4hYIxI8ds0", "poster": "/media/PosterPDFs/NeurIPS%202023/72843.png?t=1701412955.0249796", "slides": "https://nips.cc/virtual/2023/poster/72843", "video": "https://nips.cc/virtual/2023/poster/72843", "author_site": "Zhongqi Yue, QIANRU SUN, Hanwang Zhang", "tldr": "", "abstract": "Domain Adaptation (DA) is always challenged by the spurious correlation between the domain-invariant features (e.g., class identity) and the domain-specific ones (e.g., environment) that does not generalize to the target domain. Unfortunately, even enriched with additional unsupervised target domains, existing Unsupervised DA (UDA) methods still suffer from it. This is because the source domain supervision only considers the target domain samples as auxiliary data (e.g., by pseudo-labeling), yet the inherent distribution in the target domain---where the valuable de-correlation clues hide---is disregarded. We propose to make the U in UDA matter by giving equal status to the two domains. Specifically, we learn an invariant classifier whose prediction is simultaneously consistent with the labels in the source domain and clusters in the target domain, hence the spurious correlation inconsistent in the target domain is removed. We dub our approach \"Invariant CONsistency learning\" (ICON). Extensive experiments show that ICON achieves the state-of-the-art performance on the classic UDA benchmarks: Office-Home and VisDA-2017, and outperforms all the conventional methods on the challenging WILDS 2.0 benchmark. Codes are in https://github.com/yue-zhongqi/ICON.", "keywords": "unsupervised domain adaptation;transfer learning", "primary_area": "", "supplementary_material": "/attachment/1ab0ded3c686c3bc86de04c80cb24926eef9cc00.pdf", "author": "Zhongqi Yue;Qianru Sun;Hanwang Zhang", "authorids": "~Zhongqi_Yue1;~Qianru_Sun2;~Hanwang_Zhang3", "gender": ";F;M", "homepage": "https://github.com/yue-zhongqi;https://qianrusun.com/;https://mreallab.github.io/index.html", "dblp": "275/3790;127/6132.html;79/8116.html", "google_scholar": "7Iyz9ZYAAAAJ;https://scholar.google.de/citations?user=fNfrGMIAAAAJ;YG0DFyYAAAAJ", "orcid": ";0000-0003-2689-317X;", "linkedin": ";;", "or_profile": "~Zhongqi_Yue1;~Qianru_Sun2;~Hanwang_Zhang3", "aff": "Nanyang Technological University;Singapore Management University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;smu.edu.sg;ntu.edu.sg", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nyue2023make,\ntitle={Make the U in {UDA} Matter: Invariant Consistency Learning for Unsupervised Domain Adaptation},\nauthor={Zhongqi Yue and Qianru Sun and Hanwang Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4hYIxI8ds0}\n}", "github": "", "project": "", "reviewers": "gs8L;RV7L;cknk;CYwX;Q3G5", "pdf_size": 1966528, "rating": "5;5;6;7;7", "confidence": "4;4;3;3;3", "soundness": "3;2;3;3;4", "novelty": "3;2;3;3;3", "presentation": "2;3;4;3;3", "wc_summary": "31;136;90;90;113", "wc_strengths": "66;30;61;66;75", "wc_weaknesses": "56;231;139;253;53", "wc_questions": "34;4;155;37;93", "wc_limitations": "8;1;12;16;25", "wc_review": "195;402;457;462;359", "wc_reply_reviewers": "0;0;280;43;34", "wc_reply_authors": "0;0;243;153;0", "reply_reviewers": "0;0;2;1;1", "reply_authors": "1;1;2;2;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 92.0, 34.94567212116545 ], "wc_strengths_avg": [ 59.6, 15.473848907107762 ], "wc_weaknesses_avg": [ 146.4, 84.22731148505216 ], "wc_questions_avg": [ 64.6, 53.570887616316384 ], "wc_limitations_avg": [ 12.4, 8.014985963805552 ], "wc_review_avg": [ 375.0, 97.65039682459053 ], "wc_reply_reviewers_avg": [ 71.4, 105.74989361696777 ], "wc_reply_authors_avg": [ 79.2, 101.0888717911126 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9128709291752769, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14836031652844129830&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "ntu.edu.sg;smu.edu.sg;ntu.edu.sg", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Nanyang Technological University;Singapore Management University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.smu.edu.sg", "aff_unique_abbr": "NTU;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "id": "4hiJ3KPDYe", "title": "Tackling Unconditional Generation for Highly Multimodal Distributions with Hat Diffusion EBM", "track": "main", "status": "Reject", "tldr": "", "abstract": "This work introduces the Hat Diffusion Energy-Based Model (HDEBM), a hybrid of EBMs and diffusion models that can perform high-quality unconditional generation for multimodal image distributions. Our method is motivated by the observation that a partial forward and reverse diffusion defines an MCMC process whose steady-state is the data distribution when the diffusion is perfectly trained. The components of HDEBM are a generator network that proposes initial model samples, a truncated diffusion model that adds and removes noise to generator samples as an approximate MCMC step that pushes towards realistic images, and an energy network that further refines diffusion outputs with Langevin MCMC. All networks are incorporated into a single unnormalized density. MCMC with the energy network is crucial for driving multimodal generation, while the truncated diffusion can generate fine details needed for high-quality images. Experiments show HDEBM is effective for unconditional generation with sampling costs significantly lower than diffusion models. We achieve an FID score of 21.82 on unconditional ImageNet at 128x128 resolution, which to our knowledge is state-of-the-art among unconditional models which do not use separate retrieval data.", "keywords": "generative model;energy-based model;EBM;diffusion model;hybrid generative model;mcmc;langevin", "primary_area": "", "supplementary_material": "/attachment/a14e96df64aefc16da379920505721a4df528f4f.pdf", "author": "Mitch Hill;Ziwei Xuan;Yutao Han;Guo-Jun Qi", "authorids": "~Mitch_Hill1;~Ziwei_Xuan1;~Yutao_Han1;~Guo-Jun_Qi1", "gender": "M;;M;M", "homepage": ";;;http://maple-lab.net/gqi/", "dblp": "217/3317;120/1384.html;;41/943", "google_scholar": "ycEHnWoAAAAJ;;https://scholar.google.co.uk/citations?user=x4PaP8sAAAAJ;https://scholar.google.com.tw/citations?user=Nut-uvoAAAAJ", "orcid": ";0000-0002-0279-5260;;0000-0003-3508-1851", "linkedin": ";;yh675;", "or_profile": "~Mitch_Hill1;~Ziwei_Xuan1;~Yutao_Han1;~Guo-Jun_Qi1", "aff": "InnoPeak Technology;Innopeak Technology;Innopeak Technology (OPPO Research USA);Guangdong OPPO Mobile Telecommunications Corp.,Ltd.", "aff_domain": "innopeaktech.com;innopeaktech.com;innopeaktech.com;oppo.com", "position": "Researcher;Researcher;Researcher;Dean and Chief Scientist", "bibtex": "@misc{\nhill2023tackling,\ntitle={Tackling Unconditional Generation for Highly Multimodal Distributions with Hat Diffusion {EBM}},\nauthor={Mitch Hill and Ziwei Xuan and Yutao Han and Guo-Jun Qi},\nyear={2023},\nurl={https://openreview.net/forum?id=4hiJ3KPDYe}\n}", "github": "", "project": "", "reviewers": "DvWX;88Mj;JNTT;HLKM;iE1P", "site": "https://openreview.net/forum?id=4hiJ3KPDYe", "pdf_size": 6068297, "rating": "4;4;6;6;7", "confidence": "4;3;3;2;4", "soundness": "2;2;3;4;3", "novelty": "2;2;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "195;91;50;114;419", "wc_strengths": "89;76;60;72;98", "wc_weaknesses": "804;361;100;118;787", "wc_questions": "415;129;288;127;128", "wc_limitations": "28;19;10;9;94", "wc_review": "1531;676;508;440;1526", "wc_reply_reviewers": "56;852;0;23;103", "wc_reply_authors": "33;1454;0;0;74", "reply_reviewers": "1;3;0;1;1", "reply_authors": "2;6;1;1;2", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 173.8, 131.40076103280376 ], "wc_strengths_avg": [ 79.0, 13.2664991614216 ], "wc_weaknesses_avg": [ 434.0, 309.2733418838423 ], "wc_questions_avg": [ 217.4, 116.62692656500899 ], "wc_limitations_avg": [ 32.0, 31.75531451584128 ], "wc_review_avg": [ 936.2, 489.6784250914063 ], "wc_reply_reviewers_avg": [ 206.8, 324.4524002068717 ], "wc_reply_authors_avg": [ 312.2, 571.5482131894037 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.4, 1.8547236990991407 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.08908708063747484, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yzjCn_Z7908J:scholar.google.com/&scioq=Tackling+Unconditional+Generation+for+Highly+Multimodal+Distributions+with+Hat+Diffusion+EBM&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "InnoPeak Technology;OPPO Research USA;OPPO Mobile Telecommunications Corp.,Ltd.", "aff_unique_dep": ";Innopeak Technology;", "aff_unique_url": ";https://www.oppo.com/en;https://www.oppo.com", "aff_unique_abbr": ";OPPO;OPPO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;2", "aff_country_unique": ";United States;China" }, { "title": "AlpacaFarm: A Simulation Framework for Methods that Learn from Human Feedback", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72842", "id": "4hturzLcKX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5fc47800ee5b30b8777fdd30abcaaf3b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4hturzLcKX", "openreview": "https://openreview.net/forum?id=4hturzLcKX", "poster": "/media/PosterPDFs/NeurIPS%202023/72842.png?t=1701554545.2264411", "slides": "https://nips.cc/virtual/2023/poster/72842", "video": "https://nips.cc/virtual/2023/poster/72842", "author_site": "Yann Dubois, Chen Xuechen Li, Rohan Taori, Tianyi Zhang, Ishaan Gulrajani, Jimmy Ba, Carlos Guestrin, Percy Liang, Tatsunori Hashimoto", "tldr": "", "abstract": "Large language models (LLMs) such as ChatGPT have seen widespread adoption due to their ability to follow user instructions well.\nDeveloping these LLMs involves a complex yet poorly understood workflow requiring training with human feedback. Replicating and understanding this instruction-following process faces three major challenges: the high cost of data collection, the lack of trustworthy evaluation, and the absence of reference method implementations. We address these bottlenecks with AlpacaFarm, a simulator that enables research and development for learning from feedback at a low cost. First, we design LLM based simulator for human feedback that is 45x cheaper than crowdworkers and displays high agreement with humans. Second, we identify an evaluation dataset representative of real-world instructions and propose an automatic evaluation procedure. Third, we contribute reference implementations for several methods (PPO, best-of-n, expert iteration, among others) that learn from pairwise feedback. Finally, as an end-to-end validation of AlpacaFarm, we train and evaluate eleven models on 10k pairs of human feedback and show that rankings of models trained in AlpacaFarm match rankings of models trained on human data. As a demonstration of the research possible in AlpacaFarm, we find that methods that use a reward model can substantially improve over supervised fine-tuning and that our reference PPO implementation leads to a +10% win-rate improvement against Davinci003.", "keywords": "Instruction-Following;Reinforcement Learning from Human Feedback;Artificial General Intelligence;Large Language Models", "primary_area": "", "supplementary_material": "/attachment/66367f91c2c42cc09b79823a7cb80f309229e32f.pdf", "author": "Yann Dubois;Xuechen Li;Rohan Taori;Tianyi Zhang;Ishaan Gulrajani;Jimmy Ba;Carlos Guestrin;Percy Liang;Tatsunori Hashimoto", "authorids": "~Yann_Dubois1;~Xuechen_Li1;~Rohan_Taori1;~Tianyi_Zhang2;~Ishaan_Gulrajani1;~Jimmy_Ba1;~Carlos_Guestrin1;~Percy_Liang1;~Tatsunori_Hashimoto1", "gender": "M;M;M;M;M;M;M;;M", "homepage": "http://yanndubs.github.io/;https://www.lxuechen.com/;http://rohantaori.com/;;https://ishaan.io;http://jimmylba.github.io;https://guestrin.stanford.edu;https://cs.stanford.edu/~pliang/;https://thashim.github.io", "dblp": "198/7527;;220/3868;17/322;164/5562;https://dblp.org/pers/b/Ba:Jimmy.html;38/769;04/1701;", "google_scholar": "bfM1kzAAAAAJ;GaYmpIgAAAAJ;juoUSMgAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.ca/citations?user=ymzxRhAAAAAJ;DpLFv4gAAAAJ;pouyVyUAAAAJ;5ygiTwsAAAAJ", "orcid": ";;;;;;;;", "linkedin": "duboisyann/;;rtaori/;;;;carlos-guestrin-5352a869/;;", "or_profile": "~Yann_Dubois1;~Xuechen_Li1;~Rohan_Taori1;~Tianyi_Zhang2;~Ishaan_Gulrajani1;~Jimmy_Ba1;~Carlos_Guestrin1;~Percy_Liang1;~Tatsunori_Hashimoto1", "aff": "Stanford University;Computer Science Department, Stanford University;Stanford University;Stanford University;Stanford University;Department of Computer Science, University of Toronto;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;cs.stanford.edu;stanford.edu;stanford.edu;stanford.edu;cs.toronto.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Assistant Professor;Full Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ndubois2023alpacafarm,\ntitle={AlpacaFarm: A Simulation Framework for Methods that Learn from Human Feedback},\nauthor={Yann Dubois and Xuechen Li and Rohan Taori and Tianyi Zhang and Ishaan Gulrajani and Jimmy Ba and Carlos Guestrin and Percy Liang and Tatsunori Hashimoto},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4hturzLcKX}\n}", "github": "", "project": "", "reviewers": "NPeD;JtTg;pSDY;goj1", "pdf_size": 1254154, "rating": "7;8;8;8", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "3;4;3;4", "presentation": "3;3;4;4", "wc_summary": "173;140;90;432", "wc_strengths": "77;183;76;5", "wc_weaknesses": "50;183;379;8", "wc_questions": "134;105;103;13", "wc_limitations": "1;86;118;3", "wc_review": "435;697;766;461", "wc_reply_reviewers": "16;130;83;228", "wc_reply_authors": "0;11;44;73", "reply_reviewers": "1;1;2;1", "reply_authors": "1;2;3;2", "rating_avg": [ 7.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 208.75, 132.23723945999478 ], "wc_strengths_avg": [ 85.25, 63.53886605849998 ], "wc_weaknesses_avg": [ 155.0, 144.5631349964437 ], "wc_questions_avg": [ 88.75, 45.42232380669223 ], "wc_limitations_avg": [ 52.0, 51.268898954434356 ], "wc_review_avg": [ 589.75, 144.12733085712784 ], "wc_reply_reviewers_avg": [ 114.25, 77.16338185953231 ], "wc_reply_authors_avg": [ 32.0, 28.679260799399973 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 523, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14031083920930555364&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "stanford.edu;cs.stanford.edu;stanford.edu;stanford.edu;stanford.edu;cs.toronto.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 9, "aff_unique_index": "0;0;0;0;0;1;0;0;0", "aff_unique_norm": "Stanford University;University of Toronto", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.stanford.edu;https://www.utoronto.ca", "aff_unique_abbr": "Stanford;U of T", "aff_campus_unique_index": "0;0;0;0;0;1;0;0;0", "aff_campus_unique": "Stanford;Toronto", "aff_country_unique_index": "0;0;0;0;0;1;0;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Embroid: Unsupervised Prediction Smoothing Can Improve Few-Shot Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72841", "id": "4iMpwAlza1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c7f35864fef057d6fa315afa0275b3ad-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4iMpwAlza1", "openreview": "https://openreview.net/forum?id=4iMpwAlza1", "poster": "/media/PosterPDFs/NeurIPS%202023/72841.png?t=1702308393.1930475", "slides": "https://nips.cc/virtual/2023/poster/72841", "video": "https://nips.cc/virtual/2023/poster/72841", "author_site": "Neel Guha, Mayee Chen, Kush Bhatia, Azalia Mirhoseini, Frederic Sala, Christopher R\u00e9", "tldr": "", "abstract": "Recent work has shown that language models' (LMs) prompt-based learning capabilities make them well suited for automating data labeling in domains where manual annotation is expensive. The challenge is that while writing an initial prompt is cheap, improving a prompt is costly---practitioners often require significant labeled data in order to evaluate the impact of prompt modifications. Our work asks whether it is possible to improve prompt-based learning _without_ additional labeled data. We approach this problem by attempting to modify the predictions of a prompt, rather than the prompt itself. Our intuition is that accurate predictions should also be consistent: samples which are similar under some feature representation should receive the same prompt prediction. We propose Embroid, a method which computes multiple representations of a dataset under different embedding functions, and uses the consistency between the LM predictions for neighboring samples to identify mispredictions. Embroid then uses these neighborhoods to create additional predictions for each sample, and combines these predictions with a simple latent variable graphical model in order to generate a final corrected prediction. In addition to providing a theoretical analysis of Embroid, we conduct a rigorous empirical evaluation across six different LMs and up to 95 different tasks. We find that (1) Embroid substantially improves performance over original prompts (e.g., by an average of 7.3 points on GPT-JT), (2) also realizes improvements for more sophisticated prompting strategies (e.g., chain-of-thought), and (3) can be specialized to domains like law through the embedding functions.", "keywords": "language models;prompting;embeddings;weak supervision", "primary_area": "", "supplementary_material": "/attachment/73d9fc3a85eeeeba891aa1c1c622f9e2680a58d0.zip", "author": "Neel Guha;Mayee F Chen;Kush Bhatia;Azalia Mirhoseini;Frederic Sala;Christopher Re", "authorids": "~Neel_Guha1;~Mayee_F_Chen1;~Kush_Bhatia3;~Azalia_Mirhoseini3;~Frederic_Sala1;~Christopher_Re1", "gender": "M;;;;M;", "homepage": "http://neelguha.com;;;;https://pages.cs.wisc.edu/~fredsala/;", "dblp": "130/0311;;;;133/3602;", "google_scholar": "YI5N4HQAAAAJ;;;;9KhIkNkAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Neel_Guha1;~Mayee_F_Chen1;~Kush_Bhatia3;~Azalia_Mirhoseini3;~Frederic_Sala1;~Christopher_Re1", "aff": "Stanford Law;;;;University of Wisconsin, Madison;", "aff_domain": "law.stanford.edu;;;;wisc.edu;", "position": "JD;;;;Assistant Professor;", "bibtex": "@inproceedings{\nguha2023embroid,\ntitle={Embroid: Unsupervised Prediction Smoothing Can Improve Few-Shot Classification},\nauthor={Neel Guha and Mayee F Chen and Kush Bhatia and Azalia Mirhoseini and Frederic Sala and Christopher Re},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4iMpwAlza1}\n}", "github": "", "project": "", "reviewers": "bLRH;Bw2X;XCbh;YSHe;RD7v", "pdf_size": 1211526, "rating": "5;6;7;7;8", "confidence": "4;3;2;4;4", "soundness": "3;3;4;3;4", "novelty": "3;3;3;4;4", "presentation": "3;3;3;4;4", "wc_summary": "118;109;98;27;120", "wc_strengths": "110;36;56;16;130", "wc_weaknesses": "145;72;69;145;106", "wc_questions": "137;10;2;3;1", "wc_limitations": "1;21;22;28;1", "wc_review": "511;248;247;219;358", "wc_reply_reviewers": "23;20;9;31;12", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 94.4, 34.5867026471157 ], "wc_strengths_avg": [ 69.6, 43.51367601111173 ], "wc_weaknesses_avg": [ 107.4, 33.33826630165402 ], "wc_questions_avg": [ 30.6, 53.293902090201655 ], "wc_limitations_avg": [ 14.6, 11.359577456930342 ], "wc_review_avg": [ 316.6, 108.24158165880615 ], "wc_reply_reviewers_avg": [ 19.0, 7.874007874011811 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.04902903378454606, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6974951404194796046&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "law.stanford.edu;;;;wisc.edu;", "author_num": 6, "aff_unique_index": "0;1", "aff_unique_norm": "Stanford University;University of Wisconsin", "aff_unique_dep": "Stanford Law School;", "aff_unique_url": "https://law.stanford.edu;https://www.wisc.edu", "aff_unique_abbr": "Stanford Law;UW", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Stanford;Madison", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Data-Dependent Bounds for Online Portfolio Selection Without Lipschitzness and Smoothness", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72840", "id": "4iTAUsyisM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c6483c8a68083af3383f91ee0dc6db95-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4iTAUsyisM", "openreview": "https://openreview.net/forum?id=4iTAUsyisM", "poster": "/media/PosterPDFs/NeurIPS%202023/72840.png?t=1701400913.4614496", "slides": "https://nips.cc/virtual/2023/poster/72840", "video": "https://nips.cc/virtual/2023/poster/72840", "author_site": "Chung-En Tsai, Ying-Ting Lin, Yen-Huan Li", "tldr": "", "abstract": "This work introduces the first small-loss and gradual-variation regret bounds for online portfolio selection, marking the first instances of data-dependent bounds for online convex optimization with non-Lipschitz, non-smooth losses. \nThe algorithms we propose exhibit sublinear regret rates in the worst cases and achieve logarithmic regrets when the data is \"easy,\" with per-round time almost linear in the number of investment alternatives. \nThe regret bounds are derived using novel smoothness characterizations of the logarithmic loss, a local norm-based analysis of following the regularized leader (FTRL) with self-concordant regularizers, which are not necessarily barriers, and an implicit variant of optimistic FTRL with the log-barrier.", "keywords": "Online portfolio selection;small-loss bound;gradual-variation bound;second-order bound;optimistic FTRL with self-concordant regularizers", "primary_area": "", "supplementary_material": "", "author": "Chung-En Tsai;Ying-Ting Lin;Yen-Huan Li", "authorids": "~Chung-En_Tsai1;~Ying-Ting_Lin1;~Yen-Huan_Li1", "gender": "M;M;", "homepage": "https://people.inf.ethz.ch/chtsai;;https://sites.google.com/site/yenhuanli/", "dblp": "325/7910;;70/1370", "google_scholar": "j01SlvUAAAAJ;;Mqz_yhAAAAAJ", "orcid": ";;", "linkedin": ";ying-ting-lin-a71113158/;", "or_profile": "~Chung-En_Tsai1;~Ying-Ting_Lin1;~Yen-Huan_Li1", "aff": "National Taiwan University;Department of computer science and informational engineering, National Taiwan University;National Taiwan University", "aff_domain": "ntu.edu.tw;csie.ntu.edu.tw;ntu.edu.tw", "position": "Undergrad student;MS student;Assistant Professor", "bibtex": "@inproceedings{\ntsai2023datadependent,\ntitle={Data-Dependent Bounds for Online Portfolio Selection Without Lipschitzness and Smoothness},\nauthor={Chung-En Tsai and Ying-Ting Lin and Yen-Huan Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4iTAUsyisM}\n}", "github": "", "project": "", "reviewers": "q9Qg;DgZU;Xwzr;cmFV;mCNh", "pdf_size": 361845, "rating": "6;6;7;7;7", "confidence": "4;2;2;2;4", "soundness": "3;3;4;3;4", "novelty": "3;3;3;3;4", "presentation": "3;2;3;3;4", "wc_summary": "112;110;260;127;45", "wc_strengths": "57;91;32;99;220", "wc_weaknesses": "316;193;14;173;115", "wc_questions": "3;122;60;35;1", "wc_limitations": "1;26;1;5;1", "wc_review": "489;542;367;439;382", "wc_reply_reviewers": "0;0;17;50;99", "wc_reply_authors": "0;0;0;10;56", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;2;2", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 130.8, 70.5050352811769 ], "wc_strengths_avg": [ 99.8, 64.73762429993859 ], "wc_weaknesses_avg": [ 162.2, 98.9048027145295 ], "wc_questions_avg": [ 44.2, 44.61120935370392 ], "wc_limitations_avg": [ 6.8, 9.724196624914574 ], "wc_review_avg": [ 443.8, 65.4626611130345 ], "wc_reply_reviewers_avg": [ 33.2, 37.62658634529579 ], "wc_reply_authors_avg": [ 13.2, 21.747643550509096 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.16666666666666669, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12732952642910885779&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ntu.edu.tw;csie.ntu.edu.tw;ntu.edu.tw", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "National Taiwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.tw", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Train 'n Trade: Foundations of Parameter Markets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72839", "id": "4iV26fZPUD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5a9c1af5f76da0bd37903b6f23e96c74-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4iV26fZPUD", "openreview": "https://openreview.net/forum?id=4iV26fZPUD", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72839", "video": "https://nips.cc/virtual/2023/poster/72839", "author_site": "Tzu-Heng Huang, Harit Vishwakarma, Frederic Sala", "tldr": "", "abstract": "Organizations typically train large models individually. This is costly and time-consuming, particularly for large-scale foundation models. Such vertical production is known to be suboptimal. Inspired by this economic insight, we ask whether it is possible to leverage others' expertise by trading the constituent parts in models, i.e., sets of weights, as if they were market commodities. While recent advances in aligning and interpolating models suggest that doing so may be possible, a number of fundamental questions must be answered to create viable parameter markets. In this work, we address these basic questions, propose a framework containing the infrastructure necessary for market operations to take place, study strategies for exchanging parameters, and offer means for agents to monetize parameters. Excitingly, compared to agents who train siloed models from scratch, we show that it is possible to mutually gain by using the market, even in competitive settings. This suggests that the notion of parameter markets may be a useful paradigm for improving large-scale model training in the future.", "keywords": "Parameter Market;Pricing;Efficient Model Training", "primary_area": "", "supplementary_material": "/attachment/caf3774d6728e1fa309cf6b3f3075a30aac79ded.pdf", "author": "Tzu-Heng Huang;Harit Vishwakarma;Frederic Sala", "authorids": "~Tzu-Heng_Huang1;~Harit_Vishwakarma1;~Frederic_Sala1", "gender": "M;M;M", "homepage": "https://zihengh1.github.io/;https://harit7.github.io;https://pages.cs.wisc.edu/~fredsala/", "dblp": "185/7539;207/7622;133/3602", "google_scholar": "yIZ8NCQAAAAJ;pJF_ZZUAAAAJ;9KhIkNkAAAAJ", "orcid": ";;", "linkedin": "zihengh1/;harit7;", "or_profile": "~Tzu-Heng_Huang1;~Harit_Vishwakarma1;~Frederic_Sala1", "aff": "University of Wisconsin - Madison;University of Wisconsin, Madison;University of Wisconsin, Madison", "aff_domain": "wisc.edu;wisc.edu;wisc.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhuang2023train,\ntitle={Train 'n Trade: Foundations of Parameter Markets},\nauthor={Tzu-Heng Huang and Harit Vishwakarma and Frederic Sala},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4iV26fZPUD}\n}", "github": "", "project": "", "reviewers": "iN1r;Tv8J;Mu3U;91tn", "pdf_size": 1138925, "rating": "4;5;6;6", "confidence": "2;3;3;3", "soundness": "2;2;4;3", "novelty": "2;2;4;3", "presentation": "2;2;4;3", "wc_summary": "129;361;104;94", "wc_strengths": "43;40;82;40", "wc_weaknesses": "160;359;65;83", "wc_questions": "20;66;13;15", "wc_limitations": "6;40;28;5", "wc_review": "358;866;292;237", "wc_reply_reviewers": "39;21;4;0", "wc_reply_authors": "52;37;40;36", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 172.0, 109.86127616225838 ], "wc_strengths_avg": [ 51.25, 17.795715776557007 ], "wc_weaknesses_avg": [ 166.75, 116.58982588545194 ], "wc_questions_avg": [ 28.5, 21.80022935659164 ], "wc_limitations_avg": [ 19.75, 14.872373717735847 ], "wc_review_avg": [ 438.25, 250.64953121839267 ], "wc_reply_reviewers_avg": [ 16.0, 15.443445211480501 ], "wc_reply_authors_avg": [ 41.25, 6.378675411086537 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6827150848158084711&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "wisc.edu;wisc.edu;wisc.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Wisconsin-Madison;University of Wisconsin", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "UW-Madison;UW", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Operator Learning with Neural Fields: Tackling PDEs on General Geometries", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72838", "id": "4jEjq5nhg1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/df54302388bbc145aacaa1a54a4a5933-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4jEjq5nhg1", "openreview": "https://openreview.net/forum?id=4jEjq5nhg1", "poster": "/media/PosterPDFs/NeurIPS%202023/72838.png?t=1701963975.0949147", "slides": "https://nips.cc/virtual/2023/poster/72838", "video": "https://nips.cc/virtual/2023/poster/72838", "author_site": "Louis Serrano, Lise Le Boudec, Armand Kassa\u00ef Koupa\u00ef, Thomas X Wang, Yuan Yin, Jean-No\u00ebl Vittaut, Patrick Gallinari", "tldr": "", "abstract": "Machine learning approaches for solving partial differential equations require learning mappings between function spaces. While convolutional or graph neural networks are constrained to discretized functions, neural operators present a promising milestone toward mapping functions directly. Despite impressive results they still face challenges with respect to the domain geometry and typically rely on some form of discretization. In order to alleviate such limitations, we present CORAL, a new method that leverages coordinate-based networks for solving PDEs on general geometries. CORAL is designed to remove constraints on the input mesh, making it applicable to any spatial sampling and geometry. Its ability extends to diverse problem domains, including PDE solving, spatio-temporal forecasting, and inverse problems like geometric design. CORAL demonstrates robust performance across multiple resolutions and performs well in both convex and non-convex domains, surpassing or performing on par with state-of-the-art models.", "keywords": "PDEs;Physics;Operator Learning;Deep Learning;Spatiotemporal", "primary_area": "", "supplementary_material": "/attachment/12fa3906552b521958bf8932951f9e535a0d0e9e.pdf", "author": "Louis Serrano;Lise Le Boudec;Armand Kassa\u00ef Koupa\u00ef;Thomas X Wang;Yuan Yin;Jean-No\u00ebl Vittaut;patrick gallinari", "authorids": "~Louis_Serrano1;~Lise_Le_Boudec2;~Armand_Kassa\u00ef_Koupa\u00ef1;~Thomas_X_Wang1;~Yuan_Yin1;~Jean-No\u00ebl_Vittaut1;~patrick_gallinari1", "gender": "M;F;M;Not Specified;M;M;M", "homepage": "https://www.isir.upmc.fr/personnel/serrano/;https://2ailesb.github.io;https://www.isir.upmc.fr/personnel/kassai/;https://thomasxwang.github.io/;https://www.isir.upmc.fr/personnel/yin/;https://webia.lip6.fr/~vittaut/;", "dblp": "349/0965;349/2988;329/7749;;;12/3351;g/PatrickGallinari", "google_scholar": ";HMYBPrEAAAAJ;uG3VApUAAAAJ;;https://scholar.google.com/citations?hl=fr;https://scholar.google.fr/citations?hl=fr;rFaxB20AAAAJ", "orcid": ";;;;0000-0003-1515-0696;0000-0001-6654-4199;", "linkedin": "louis-serrano-a0596578/;lise-le-boudec/;;;yuan-yin-nn/;vittaut/;", "or_profile": "~Louis_Serrano1;~Lise_Le_Boudec2;~Armand_Kassa\u00ef_Koupa\u00ef1;~Thomas_X_Wang1;~Yuan_Yin1;~Jean-No\u00ebl_Vittaut1;~patrick_gallinari1", "aff": "Universit\u00e9 Pierre et Marie Curie - Paris 6, Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);Universit\u00e9 Pierre et Marie Curie - Paris 6, Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);Universit\u00e9 Pierre et Marie Curie - Paris 6, Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);Sorbonne Universite;Sorbonne Universit\u00e9, CNRS, ISIR;Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);Sorbonne Universite", "aff_domain": "isir.upmc.fr;isir.upmc.fr;isir.upmc.fr;sorbonne-universite.fr;isir.upmc.fr;sorbonne-universite.fr;sorbonne-universite.fr", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nserrano2023operator,\ntitle={Operator Learning with Neural Fields: Tackling {PDE}s on General Geometries},\nauthor={Louis Serrano and Lise Le Boudec and Armand Kassa{\\\"\\i} Koupa{\\\"\\i} and Thomas X Wang and Yuan Yin and Jean-No{\\\"e}l Vittaut and patrick gallinari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4jEjq5nhg1}\n}", "github": "", "project": "", "reviewers": "A6MB;K5kw;e6Go;5RAL", "pdf_size": 13739758, "rating": "5;6;6;7", "confidence": "4;5;3;5", "soundness": "3;4;3;4", "novelty": "2;4;3;3", "presentation": "3;4;3;4", "wc_summary": "55;86;164;269", "wc_strengths": "30;30;94;153", "wc_weaknesses": "116;125;445;721", "wc_questions": "101;1;3;186", "wc_limitations": "7;1;4;46", "wc_review": "309;243;710;1375", "wc_reply_reviewers": "0;0;48;390", "wc_reply_authors": "0;0;18;362", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 143.5, 82.62717470663026 ], "wc_strengths_avg": [ 76.75, 51.192650839744566 ], "wc_weaknesses_avg": [ 351.75, 251.01531327789544 ], "wc_questions_avg": [ 72.75, 76.87123974543405 ], "wc_limitations_avg": [ 14.5, 18.309833423600555 ], "wc_review_avg": [ 659.25, 450.2257072846907 ], "wc_reply_reviewers_avg": [ 109.5, 163.12801721347563 ], "wc_reply_authors_avg": [ 95.0, 154.32757368662283 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15177533336959197621&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "isir.upmc.fr;isir.upmc.fr;isir.upmc.fr;sorbonne-universite.fr;isir.upmc.fr;sorbonne-universite.fr;sorbonne-universite.fr", "author_num": 7, "aff_unique_index": "0;0;0;1;2;2;1", "aff_unique_norm": "Universit\u00e9 Pierre et Marie Curie - Paris 6;Sorbonne University;Sorbonne Universit\u00e9", "aff_unique_dep": "Facult\u00e9 des Sciences;;CNRS, ISIR", "aff_unique_url": "https://www.upmc.fr;https://www.sorbonne-universite.fr;https://www.sorbonne-universite.fr", "aff_unique_abbr": "UPMC;Sorbonne;Sorbonne U", "aff_campus_unique_index": "0;0;0;2", "aff_campus_unique": "Paris;;Paris VI", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "France" }, { "title": "WCLD: Curated Large Dataset of Criminal Cases from Wisconsin Circuit Courts", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73700", "id": "4kV7qDi0EB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/29c80c549ed67ddd7259559c1bb07c1b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=4kV7qDi0EB", "openreview": "https://openreview.net/forum?id=4kV7qDi0EB", "poster": "/media/PosterPDFs/NeurIPS%202023/73700.png?t=1699628306.1286807", "slides": "https://nips.cc/virtual/2023/poster/73700", "video": "https://nips.cc/virtual/2023/poster/73700", "author_site": "Elliott Ash, Naman Goel, Nianyun Li, Claudia Marangon, Peiyao Sun", "tldr": "", "abstract": "Machine learning based decision-support tools in criminal justice systems are subjects of intense discussions and academic research. There are important open questions about the utility and fairness of such tools. Academic researchers often rely on a few small datasets that are not sufficient to empirically study various real-world aspects of these questions. In this paper, we contribute WCLD, a curated large dataset of 1.5 million criminal cases from circuit courts in the U.S. state of Wisconsin. We used reliable public data from 1970 to 2020 to curate attributes like prior criminal counts and recidivism outcomes. The dataset contains large number of samples from five racial groups, in addition to information like sex and age (at judgment and first offense). Other attributes in this dataset include neighborhood characteristics obtained from census data, detailed types of offense, charge severity, case decisions, sentence lengths, year of filing etc. We also provide pseudo-identifiers for judge, county and zipcode. The dataset will not only enable researchers to more rigorously study algorithmic fairness in the context of criminal justice, but also relate algorithmic challenges with various systemic issues. We also discuss in detail the process of constructing the dataset and provide a datasheet. The WCLD dataset is available at https://clezdata.github.io/wcld/.", "keywords": "Algorithmic Fairness;Machine Learning;Dataset;Criminal Justice", "primary_area": "", "supplementary_material": "/attachment/e2483db50ead129df53a7bcba880250d31c1fe66.pdf", "author": "Elliott Ash;Naman Goel;Nianyun Li;Claudia Marangon;Peiyao Sun", "authorids": "~Elliott_Ash1;~Naman_Goel1;~Nianyun_Li1;~Claudia_Marangon1;~Peiyao_Sun1", "gender": ";M;F;F;F", "homepage": "https://elliottash.com;http://goelnaman.github.io;;https://lawecondata.ethz.ch/group/scientific-team/marangon.html;", "dblp": "271/7737;163/3862;;;", "google_scholar": "o5uDfHMAAAAJ;;;;", "orcid": "0000-0002-6817-7529;;;;", "linkedin": ";;https://linkedin.com/in/nianyun-li;;peiyao-sun/", "or_profile": "~Elliott_Ash1;~Naman_Goel1;~Nianyun_Li1;~Claudia_Marangon1;~Peiyao_Sun1", "aff": "Swiss Federal Institute of Technology;University of Oxford;;ETHZ - ETH Zurich;ETH-Zurich", "aff_domain": "ethz.ch;oxford.ac.uk;;ethz.ch;gess.ethz.ch", "position": "Assistant Professor;Researcher;;PhD student;Researcher", "bibtex": "@inproceedings{\nash2023wcld,\ntitle={{WCLD}: Curated Large Dataset of Criminal Cases from Wisconsin Circuit Courts},\nauthor={Elliott Ash and Naman Goel and Nianyun Li and Claudia Marangon and Peiyao Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=4kV7qDi0EB}\n}", "github": "", "project": "", "reviewers": "HBKu;qVAQ;byDS", "pdf_size": 363168, "rating": "5;7;8", "confidence": "5;4;4", "wc_summary_and_contributions": "49;111;39", "wc_strengths": "61;149;60", "wc_improvement": "344;317;33", "wc_limitations": "106;138;36", "wc_correctness": "10;55;21", "wc_clarity": "5;34;23", "wc_relation_to_prior_work": "40;35;9", "wc_documentation": "33;81;12", "wc_additional_feedback": "1;1;1", "wc_review": "649;921;234", "wc_reply_reviewers": "104;0;0", "wc_reply_authors": "1155;590;101", "reply_reviewers": "1;0;0", "reply_authors": "5;4;1", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 66.33333333333333, 31.846855766656496 ], "wc_strengths_avg": [ 90.0, 41.72129751897305 ], "wc_improvement_avg": [ 231.33333333333334, 140.67535518190653 ], "wc_limitations_avg": [ 93.33333333333333, 42.59368758656876 ], "wc_correctness_avg": [ 28.666666666666668, 19.154343864744856 ], "wc_clarity_avg": [ 20.666666666666668, 11.953614051360738 ], "wc_relation_to_prior_work_avg": [ 28.0, 13.589211407093005 ], "wc_documentation_avg": [ 42.0, 28.879058156387302 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 601.3333333333334, 282.4846111836104 ], "wc_reply_reviewers_avg": [ 34.666666666666664, 49.026070162267295 ], "wc_reply_authors_avg": [ 615.3333333333334, 430.66640866865333 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 1.699673171197595 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.944911182523068, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9102957796600493034&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ethz.ch;oxford.ac.uk;;ethz.ch;gess.ethz.ch", "author_num": 5, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Swiss Federal Institute of Technology;University of Oxford;ETH Zurich", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ethz.ch;https://www.ox.ac.uk;https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich;Oxford;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Switzerland;United Kingdom" }, { "title": "Multi-Modal Inverse Constrained Reinforcement Learning from a Mixture of Demonstrations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72837", "id": "4mPiqh4pLb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bdc48324d6158a7edef88d673855a3f4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4mPiqh4pLb", "openreview": "https://openreview.net/forum?id=4mPiqh4pLb", "poster": "/media/PosterPDFs/NeurIPS%202023/72837.png?t=1697432333.8094409", "slides": "https://nips.cc/virtual/2023/poster/72837", "video": "https://nips.cc/virtual/2023/poster/72837", "author_site": "Guanren Qiao, Guiliang Liu, Pascal Poupart, Zhiqiang Xu", "tldr": "", "abstract": "Inverse Constraint Reinforcement Learning (ICRL) aims to recover the underlying constraints respected by expert agents in a data-driven manner. Existing ICRL algorithms typically assume that the demonstration data is generated by a single type of expert. However, in practice, demonstrations often comprise a mixture of trajectories collected from various expert agents respecting different constraints, making it challenging to explain expert behaviors with a unified constraint function. To tackle this issue, we propose a Multi-Modal Inverse Constrained Reinforcement Learning (MMICRL) algorithm for simultaneously estimating multiple constraints corresponding to different types of experts. MMICRL constructs a flow-based density estimator that enables unsupervised expert identification from demonstrations, so as to infer the agent-specific constraints. Following these constraints, MMICRL imitates expert policies with a novel multi-modal constrained policy optimization objective that minimizes the agent-conditioned policy entropy and maximizes the unconditioned one. To enhance robustness, we incorporate this objective into the contrastive learning framework. This approach enables imitation policies to capture the diversity of behaviors among expert agents. Extensive experiments in both discrete and continuous environments show that MMICRL outperforms other baselines in terms of constraint recovery and control performance.", "keywords": "Inverse Constrained Reinforcement Learning;Learning from Demonstrations;Muti-Modal Learning", "primary_area": "", "supplementary_material": "/attachment/85f001c4ebc43b04e65d33d80b276395612b3690.pdf", "author": "Guanren Qiao;Guiliang Liu;Pascal Poupart;zhiqiang xu", "authorids": "~Guanren_Qiao1;~Guiliang_Liu1;~Pascal_Poupart2;~zhiqiang_xu1", "gender": "M;M;M;M", "homepage": "http://guiliang.me/;https://cs.uwaterloo.ca/~ppoupart;https://scholar.google.com/citations?user=0R20iBMAAAAJ&hl=en;https://github.com/qiaoguanren", "dblp": "220/5411;26/2122;72/51-3.html;337/4221", "google_scholar": "CuMylvEAAAAJ;https://scholar.google.ca/citations?user=KhAJWroAAAAJ;;https://scholar.google.com.hk/citations?view_op=list_works", "orcid": ";;0000-0002-5693-8933;", "linkedin": ";;;", "or_profile": "~Guiliang_Liu1;~Pascal_Poupart2;~zhiqiang_xu1;~Guanren_Qiao2", "aff": "The Chinese University of Hong Kong, Shenzhen;University of Waterloo;Mohamed bin Zayed University of Artificial Intelligence;Beijing University of Posts and Telecommunications", "aff_domain": "cuhk.edu.hk;uwaterloo.ca;mbzuai.ac.ae;bupt.edu.cn", "position": "Assistant Professor;Full Professor;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\nqiao2023multimodal,\ntitle={Multi-Modal Inverse Constrained Reinforcement Learning from a Mixture of Demonstrations},\nauthor={Guanren Qiao and Guiliang Liu and Pascal Poupart and zhiqiang xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4mPiqh4pLb}\n}", "github": "", "project": "", "reviewers": "ZL9H;ieEm;KNkP;Gwnr", "pdf_size": 5841576, "rating": "4;6;7;8", "confidence": "2;2;3;4", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "97;106;457;55", "wc_strengths": "192;148;68;87", "wc_weaknesses": "333;94;71;142", "wc_questions": "186;1;45;61", "wc_limitations": "5;1;45;7", "wc_review": "813;350;686;352", "wc_reply_reviewers": "0;0;129;37", "wc_reply_authors": "68;68;579;27", "reply_reviewers": "0;0;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 178.75, 161.79674749512117 ], "wc_strengths_avg": [ 123.75, 49.25634476897367 ], "wc_weaknesses_avg": [ 160.0, 103.11401456640121 ], "wc_questions_avg": [ 73.25, 68.70362071972626 ], "wc_limitations_avg": [ 14.5, 17.741194999210173 ], "wc_review_avg": [ 550.25, 204.2478580059042 ], "wc_reply_reviewers_avg": [ 41.5, 52.72807601268986 ], "wc_reply_authors_avg": [ 185.5, 227.8030947989952 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8664002254439633, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4961877351221634509&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "cuhk.edu.hk;uwaterloo.ca;mbzuai.ac.ae;bupt.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Chinese University of Hong Kong;University of Waterloo;Mohamed bin Zayed University of Artificial Intelligence;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cuhk.edu.cn;https://uwaterloo.ca;https://mbzuai.ac.ae;http://www.bupt.edu.cn/", "aff_unique_abbr": "CUHK;UW;MBZUAI;BUPT", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Shenzhen;;Beijing", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "China;Canada;United Arab Emirates" }, { "title": "Online Pricing for Multi-User Multi-Item Markets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72836", "id": "4mXYJzoPhf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5eee634cb9729b8bcc2ec9f2a46a74ae-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4mXYJzoPhf", "openreview": "https://openreview.net/forum?id=4mXYJzoPhf", "poster": "/media/PosterPDFs/NeurIPS%202023/72836.png?t=1702322008.9527218", "slides": "https://nips.cc/virtual/2023/poster/72836", "video": "https://nips.cc/virtual/2023/poster/72836", "author_site": "Yigit Efe Erginbas, Thomas Courtade, Kannan Ramchandran, Soham Phade", "tldr": "", "abstract": "Online pricing has been the focus of extensive research in recent years, particularly in the context of selling an item to sequentially arriving users. However, what if a provider wants to maximize revenue by selling multiple items to multiple users in each round? This presents a complex problem, as the provider must intelligently offer the items to those users who value them the most without exceeding their highest acceptable prices. In this study, we tackle this challenge by designing online algorithms that can efficiently offer and price items while learning user valuations from accept/reject feedback. We focus on three user valuation models (fixed valuations, random experiences, and random valuations) and provide algorithms with nearly-optimal revenue regret guarantees. In particular, for any market setting with $N$ users, $M$ items, and load $L$ (which roughly corresponds to the maximum number of simultaneous allocations possible), our algorithms achieve regret of order $O(NM\\log\\log(LT))$ under fixed valuations model, $\\widetilde{O}(\\sqrt{NMLT})$ under random experiences model and $\\widetilde{O}(\\sqrt{NMLT})$ under random valuations model in $T$ rounds.", "keywords": "revenue;price;offer;online", "primary_area": "", "supplementary_material": "/attachment/684fcdb209e862dc9e7d9168d9484aa73f1c9573.zip", "author": "Yigit Efe Erginbas;Thomas Courtade;Kannan Ramchandran;Soham Rajesh Phade", "authorids": "~Yigit_Efe_Erginbas1;~Thomas_Courtade1;~Kannan_Ramchandran1;~Soham_Rajesh_Phade1", "gender": "M;M;M;M", "homepage": "https://erginbas.github.io/;https://people.eecs.berkeley.edu/~courtade/;https://www.eecs.berkeley.edu/~kannanr/;", "dblp": ";23/7883.html;53/5765;206/9094.html", "google_scholar": ";https://scholar.google.com.tw/citations?user=xRmmtzIAAAAJ;https://scholar.google.com.tw/citations?user=DcV-5RAAAAAJ;", "orcid": "0000-0001-5010-9766;;0000-0002-4567-328X;", "linkedin": ";;;", "or_profile": "~Yigit_Efe_Erginbas1;~Thomas_Courtade1;~Kannan_Ramchandran1;~Soham_Rajesh_Phade1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;SalesForce.com", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;salesforce.com", "position": "PhD student;Associate Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nerginbas2023online,\ntitle={Online Pricing for Multi-User Multi-Item Markets},\nauthor={Yigit Efe Erginbas and Thomas Courtade and Kannan Ramchandran and Soham Rajesh Phade},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4mXYJzoPhf}\n}", "github": "", "project": "", "reviewers": "bZxp;PhX7;ABaL;Hkdu;2rbX", "pdf_size": 3480041, "rating": "5;5;5;6;7", "confidence": "3;4;3;4;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;2;3;4", "wc_summary": "191;52;100;97;172", "wc_strengths": "29;49;93;85;64", "wc_weaknesses": "207;55;157;205;52", "wc_questions": "92;56;8;155;49", "wc_limitations": "1;14;104;1;10", "wc_review": "520;226;462;543;347", "wc_reply_reviewers": "38;0;31;0;13", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 122.4, 51.51543458032747 ], "wc_strengths_avg": [ 64.0, 23.375200533899168 ], "wc_weaknesses_avg": [ 135.2, 69.07503166846904 ], "wc_questions_avg": [ 72.0, 49.33558553417604 ], "wc_limitations_avg": [ 26.0, 39.329378332234036 ], "wc_review_avg": [ 419.6, 118.20084602066095 ], "wc_reply_reviewers_avg": [ 16.4, 15.679285698015711 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.1020620726159658, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4587862498720464004&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 5, "email": "berkeley.edu;berkeley.edu;berkeley.edu;salesforce.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of California, Berkeley;Salesforce", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.salesforce.com", "aff_unique_abbr": "UC Berkeley;Salesforce", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Stable Vectorization of Multiparameter Persistent Homology using Signed Barcodes as Measures", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72835", "id": "4mwORQjAim", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d75c474bc01735929a1fab5d0de3b189-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4mwORQjAim", "openreview": "https://openreview.net/forum?id=4mwORQjAim", "poster": "/media/PosterPDFs/NeurIPS%202023/72835.png?t=1699883488.4890378", "slides": "https://nips.cc/virtual/2023/poster/72835", "video": "https://nips.cc/virtual/2023/poster/72835", "author_site": "David Loiseaux, Luis Scoccola, Mathieu Carri\u00e8re, Magnus Bakke Botnan, Steve OUDOT", "tldr": "", "abstract": "Persistent homology (PH) provides topological descriptors for geometric data, such as weighted graphs, which are interpretable, stable to perturbations, and invariant under, e.g., relabeling. Most applications of PH focus on the one-parameter case---where the descriptors summarize the changes in topology of data as it is filtered by a single quantity of interest---and there is now a wide array of methods enabling the use of one-parameter PH descriptors in data science, which rely on the stable vectorization of these descriptors as elements of a Hilbert space. Although the multiparameter PH (MPH) of data that is filtered by several quantities of interest encodes much richer information than its one-parameter counterpart, the scarceness of stability results for MPH descriptors has so far limited the available options for the stable vectorization of MPH. In this paper, we aim to bring together the best of both worlds by showing how the interpretation of signed barcodes---a recent family of MPH descriptors---as signed Radon measures leads to natural extensions of vectorization strategies from one parameter to multiple parameters. The resulting feature vectors are easy to define and to compute, and provably stable. While, as a proof of concept, we focus on simple choices of signed barcodes and vectorizations, we already see notable performance improvements when comparing our feature vectors to state-of-the-art topology-based methods on various types of data.", "keywords": "topological data analysis;multiparameter persistent homology;kernel methods;optimal transport", "primary_area": "", "supplementary_material": "/attachment/ba0a6c3b7479cad870e16fdc649e598e377b12b7.zip", "author": "David Loiseaux;Luis Scoccola;Mathieu Carri\u00e8re;Magnus Bakke Botnan;Steve Oudot", "authorids": "~David_Loiseaux1;~Luis_Scoccola1;~Mathieu_Carri\u00e8re1;~Magnus_Bakke_Botnan1;~Steve_Oudot1", "gender": "M;;;;M", "homepage": "https://davidlapous.github.io/;;https://mathieucarriere.github.io/website/;;https://geometrica.saclay.inria.fr/team/Steve.Oudot/", "dblp": "322/2006;;167/1015;;28/6883", "google_scholar": "oAjKKKcAAAAJ;;;;", "orcid": "0009-0003-5559-3712;;;;", "linkedin": "david-loiseaux/;;;;", "or_profile": "~David_Loiseaux1;~Luis_Scoccola1;~Mathieu_Carri\u00e8re1;~Magnus_Bakke_Botnan1;~Steve_Oudot1", "aff": "INRIA;;INRIA;;\u00c9cole Polytechnique", "aff_domain": "inria.fr;;inria.fr;;polytechnique.fr", "position": "PhD student;;Researcher;;Full Professor", "bibtex": "@inproceedings{\nloiseaux2023stable,\ntitle={Stable Vectorization of Multiparameter Persistent Homology using Signed Barcodes as Measures},\nauthor={David Loiseaux and Luis Scoccola and Mathieu Carri{\\`e}re and Magnus Bakke Botnan and Steve Oudot},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4mwORQjAim}\n}", "github": "", "project": "", "reviewers": "VRYZ;cn3Z;61ca;csyn", "pdf_size": 821580, "rating": "4;6;6;8", "confidence": "4;3;3;5", "soundness": "3;3;3;4", "novelty": "2;3;2;4", "presentation": "2;4;2;3", "wc_summary": "27;42;95;162", "wc_strengths": "15;23;67;134", "wc_weaknesses": "172;34;112;95", "wc_questions": "305;238;60;53", "wc_limitations": "290;9;13;35", "wc_review": "809;346;347;479", "wc_reply_reviewers": "932;122;0;18", "wc_reply_authors": "604;326;0;0", "reply_reviewers": "2;1;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 81.5, 52.89848769104841 ], "wc_strengths_avg": [ 59.75, 47.21956691881026 ], "wc_weaknesses_avg": [ 103.25, 49.15981590689697 ], "wc_questions_avg": [ 164.0, 110.10676636792128 ], "wc_limitations_avg": [ 86.75, 117.7632688914502 ], "wc_review_avg": [ 495.25, 189.0481089564241 ], "wc_reply_reviewers_avg": [ 268.0, 386.1787151048074 ], "wc_reply_authors_avg": [ 232.5, 252.42177005955728 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12681029966475741019&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "inria.fr;;inria.fr;;polytechnique.fr", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "INRIA;Ecole Polytechnique", "aff_unique_dep": ";", "aff_unique_url": "https://www.inria.fr;https://www.polytechnique.edu", "aff_unique_abbr": "INRIA;X", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Energy-Based Models for Anomaly Detection: A Manifold Diffusion Recovery Approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72834", "id": "4nSDDokpfK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9b6d7202750e8e32cd5270eb7fc131f7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4nSDDokpfK", "openreview": "https://openreview.net/forum?id=4nSDDokpfK", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72834", "video": "https://nips.cc/virtual/2023/poster/72834", "author_site": "Sangwoong Yoon, Young-Uk Jin, Yung-Kyun Noh, Frank Park", "tldr": "", "abstract": "We present a new method of training energy-based models (EBMs) for anomaly detection that leverages low-dimensional structures within data. The proposed algorithm, Manifold Projection-Diffusion Recovery (MPDR), first perturbs a data point along a low-dimensional manifold that approximates the training dataset. Then, EBM is trained to maximize the probability of recovering the original data. The training involves the generation of negative samples via MCMC, as in conventional EBM training, but from a different distribution concentrated near the manifold. The resulting near-manifold negative samples are highly informative, reflecting relevant modes of variation in data. An energy function of MPDR effectively learns accurate boundaries of the training data distribution and excels at detecting out-of-distribution samples. Experimental results show that MPDR exhibits strong performance across various anomaly detection tasks involving diverse data types, such as images, vectors, and acoustic signals.", "keywords": "Energy-based Models;Anomaly Detection;Generative Models;Out-of-Distribution Detection;Recovery Likelihood", "primary_area": "", "supplementary_material": "", "author": "Sangwoong Yoon;Young-Uk Jin;Yung-Kyun Noh;Frank C. Park", "authorids": "~Sangwoong_Yoon1;~Young-Uk_Jin1;~Yung-Kyun_Noh1;~Frank_C._Park1", "gender": "M;M;M;M", "homepage": "https://swyoon.github.io/;;http://aais.hanyang.ac.kr;http://robotics.snu.ac.kr", "dblp": "237/1318;;54/6443;p/FrankChongwooPark", "google_scholar": "https://scholar.google.co.kr/citations?user=cH2rjfIAAAAJ;;https://scholar.google.com/citations?hl=en;u-h3PJIAAAAJ", "orcid": "0000-0002-7251-3230;;;0000-0002-0293-6975", "linkedin": ";eric-young-uk-jin-34037818a/;;", "or_profile": "~Sangwoong_Yoon1;~Young-Uk_Jin1;~Yung-Kyun_Noh1;~Frank_C._Park1", "aff": "Seoul National University;Samsung;Korea Institute for Advanced Study;Seoul National University", "aff_domain": "snu.ac.kr;samsung.com;kias.re.kr;snu.ac.kr", "position": "PhD student;Researcher;Affiliate Professor;Full Professor", "bibtex": "@inproceedings{\nyoon2023energybased,\ntitle={Energy-Based Models for Anomaly Detection: A Manifold Diffusion Recovery Approach},\nauthor={Sangwoong Yoon and Young-Uk Jin and Yung-Kyun Noh and Frank C. Park},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4nSDDokpfK}\n}", "github": "", "project": "", "reviewers": "Tcts;Pi2D;gSqM;dsqA;ekma", "pdf_size": 885928, "rating": "5;5;5;6;7", "confidence": "1;3;5;4;4", "soundness": "2;2;2;3;3", "novelty": "2;3;2;3;2", "presentation": "2;3;3;3;3", "wc_summary": "220;22;58;81;164", "wc_strengths": "278;15;10;61;93", "wc_weaknesses": "256;14;95;49;41", "wc_questions": "188;29;323;29;71", "wc_limitations": "248;6;5;1;44", "wc_review": "1190;86;491;221;413", "wc_reply_reviewers": "7;10;29;25;24", "wc_reply_authors": "0;0;304;239;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;2;2;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.4, 1.3564659966250536 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 109.0, 72.52585745787498 ], "wc_strengths_avg": [ 91.4, 98.19287143168795 ], "wc_weaknesses_avg": [ 91.0, 86.5262965808661 ], "wc_questions_avg": [ 128.0, 113.55703412823003 ], "wc_limitations_avg": [ 60.8, 94.88814467571804 ], "wc_review_avg": [ 480.2, 382.3680949033274 ], "wc_reply_reviewers_avg": [ 19.0, 8.78635305459552 ], "wc_reply_authors_avg": [ 108.6, 134.58618056843727 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.33174440134851857, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14162625238928482262&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "snu.ac.kr;samsung.com;kias.re.kr;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Seoul National University;Samsung;Korea Institute for Advanced Study", "aff_unique_dep": ";Samsung;", "aff_unique_url": "https://www.snu.ac.kr;https://www.samsung.com;http://www.kaist.edu", "aff_unique_abbr": "SNU;Samsung;KIAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Censored Sampling of Diffusion Models Using 3 Minutes of Human Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72833", "id": "4qG2RKuZaA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a5755ccd0efeca8852ae0a1193f319f6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4qG2RKuZaA", "openreview": "https://openreview.net/forum?id=4qG2RKuZaA", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72833", "video": "https://nips.cc/virtual/2023/poster/72833", "author_site": "TaeHo Yoon, Kibeom Myoung, Keon Lee, Jaewoong Cho, Albert No, Ernest Ryu", "tldr": "", "abstract": "Diffusion models have recently shown remarkable success in high-quality image generation. Sometimes, however, a pre-trained diffusion model exhibits partial misalignment in the sense that the model can generate good images, but it sometimes outputs undesirable images. If so, we simply need to prevent the generation of the bad images, and we call this task censoring. In this work, we present censored generation with a pre-trained diffusion model using a reward model trained on minimal human feedback. We show that censoring can be accomplished with extreme human feedback efficiency and that labels generated with a mere few minutes of human feedback are sufficient.", "keywords": "Generative models;Diffusion probabilistic models;Controlled generation;Human Feedback;RLHF", "primary_area": "", "supplementary_material": "/attachment/5d29fb87b4913ad84b5f4249db5499ad908787cb.zip", "author": "TaeHo Yoon;Kibeom Myoung;Keon Lee;Jaewoong Cho;Albert No;Ernest K. Ryu", "authorids": "~TaeHo_Yoon1;~Kibeom_Myoung1;~Keon_Lee1;~Jaewoong_Cho1;~Albert_No1;~Ernest_K._Ryu1", "gender": "M;M;M;;;M", "homepage": "https://tetrzim.github.io/;;https://sites.google.com/view/keonlee9420;https://sites.google.com/view/jaewoongcho;http://albert-no.github.io/;http://www.math.snu.ac.kr/~ernestryu/", "dblp": "285/5543;;130/7625;184/3848;https://dblp.uni-trier.de/pid/23/11268;165/5192", "google_scholar": "YHkh8eYAAAAJ;GGw-0nIAAAAJ;V9uj_6cAAAAJ;;Kzj3HC8AAAAJ;CNOqUZoAAAAJ", "orcid": ";;;;;0000-0001-6820-9095", "linkedin": ";%EA%B8%B0%EB%B2%94-%EB%AA%85-6321a4257/;keonlee9420/;;;", "or_profile": "~TaeHo_Yoon1;~Kibeom_Myoung1;~Keon_Lee1;~Jaewoong_Cho1;~Albert_No1;~Ernest_K._Ryu1", "aff": "Seoul National University;Seoul National University, Seoul National University;KRAFTON;KRAFTON;Hongik University;Seoul National University", "aff_domain": "snu.ac.kr;math.snu.ac.kr;krafton.com;krafton.com;hongik.ac.kr;snu.ac.kr", "position": "PhD student;Researcher;Researcher;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nyoon2023censored,\ntitle={Censored Sampling of Diffusion Models Using 3 Minutes of Human Feedback},\nauthor={TaeHo Yoon and Kibeom Myoung and Keon Lee and Jaewoong Cho and Albert No and Ernest K. Ryu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4qG2RKuZaA}\n}", "github": "", "project": "", "reviewers": "6Ndg;GFro;iXGj;zmCo;ADeQ;W62Q", "pdf_size": 30983407, "rating": "6;6;6;6;6;8", "confidence": "4;3;4;3;2;4", "soundness": "3;4;3;2;3;3", "novelty": "3;4;2;3;3;3", "presentation": "4;4;3;3;2;3", "wc_summary": "31;50;37;58;105;67", "wc_strengths": "61;48;57;137;47;78", "wc_weaknesses": "93;31;112;146;347;61", "wc_questions": "20;32;47;98;74;357", "wc_limitations": "2;9;88;2;1;1", "wc_review": "207;170;341;441;574;564", "wc_reply_reviewers": "12;14;67;5;30;0", "wc_reply_authors": "0;0;138;0;0;0", "reply_reviewers": "1;1;1;1;1;0", "reply_authors": "1;1;2;1;1;1", "rating_avg": [ 6.333333333333333, 0.7453559924999298 ], "confidence_avg": [ 3.3333333333333335, 0.7453559924999299 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 3.1666666666666665, 0.6871842709362768 ], "wc_summary_avg": [ 58.0, 24.23496097239138 ], "wc_strengths_avg": [ 71.33333333333333, 31.10555505943521 ], "wc_weaknesses_avg": [ 131.66666666666666, 102.94281044454839 ], "wc_questions_avg": [ 104.66666666666667, 115.78092339510088 ], "wc_limitations_avg": [ 17.166666666666668, 31.79841016291363 ], "wc_review_avg": [ 382.8333333333333, 158.49018546550096 ], "wc_reply_reviewers_avg": [ 21.333333333333332, 22.447469543110845 ], "wc_reply_authors_avg": [ 23.0, 51.42956348249516 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4000000000000001, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10375364995180173378&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "snu.ac.kr;math.snu.ac.kr;krafton.com;krafton.com;hongik.ac.kr;snu.ac.kr", "author_num": 6, "aff_unique_index": "0;0;1;1;2;0", "aff_unique_norm": "Seoul National University;KRAFTON Inc.;Hongik University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.snu.ac.kr;https://www.krafton.com;https://www.hongik.ac.kr", "aff_unique_abbr": "SNU;KRAFTON;HU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seoul", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "HiBug: On Human-Interpretable Model Debug", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72832", "id": "4sDHLxKb1L", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0f53ecc0d36a5d5d3d3e94d42c4b23ca-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4sDHLxKb1L", "openreview": "https://openreview.net/forum?id=4sDHLxKb1L", "poster": "/media/PosterPDFs/NeurIPS%202023/72832.png?t=1701944108.8961287", "slides": "https://nips.cc/virtual/2023/poster/72832", "video": "https://nips.cc/virtual/2023/poster/72832", "author_site": "Muxi Chen, YU LI, Qiang Xu", "tldr": "", "abstract": "Machine learning models can frequently produce systematic errors on critical subsets (or slices) of data that share common attributes. Discovering and explaining such model bugs is crucial for reliable model deployment. However, existing bug discovery and interpretation methods usually involve heavy human intervention and annotation, which can be cumbersome and have low bug coverage.\n\nIn this paper, we propose HiBug, an automated framework for interpretable model debugging. Our approach utilizes large pre-trained models, such as chatGPT, to suggest human-understandable attributes that are related to the targeted computer vision tasks. By leveraging pre-trained vision-language models, we can efficiently identify common visual attributes of underperforming data slices using human-understandable terms. This enables us to uncover rare cases in the training data, identify spurious correlations in the model, and use the interpretable debug results to select or generate new training data for model improvement. Experimental results demonstrate the efficacy of the HiBug framework.", "keywords": "model debugging;error slice discovery", "primary_area": "", "supplementary_material": "/attachment/0f567a1edbfec5d42a0178d837482784189592cd.zip", "author": "Muxi Chen;YU LI;Qiang Xu", "authorids": "~Muxi_Chen1;~YU_LI10;~Qiang_Xu1", "gender": "M;Not Specified;M", "homepage": "https://github.com/mixiancmx;http://liyu.one;https://github.com/cure-lab", "dblp": "316/2877;34/2997-7;43/1230-1", "google_scholar": ";M0zhrM8AAAAJ;https://scholar.google.com.tw/citations?user=eSiKPqUAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Muxi_Chen1;~YU_LI10;~Qiang_Xu1", "aff": "The Chinese University of Hong Kong;Harbin Institute of Technology (Shen Zhen);The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;hit.edu.cn;cuhk.edu.hk", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023hibug,\ntitle={HiBug: On Human-Interpretable Model Debug},\nauthor={Muxi Chen and YU LI and Qiang Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4sDHLxKb1L}\n}", "github": "", "project": "", "reviewers": "GQqq;Zx32;48rC;DbeT", "pdf_size": 4592489, "rating": "3;5;6;7", "confidence": "4;4;3;3", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;2;4;3", "wc_summary": "80;280;712;113", "wc_strengths": "55;32;116;63", "wc_weaknesses": "171;342;168;105", "wc_questions": "72;5;85;61", "wc_limitations": "52;49;75;46", "wc_review": "430;708;1156;388", "wc_reply_reviewers": "0;136;213;62", "wc_reply_authors": "97;30;520;0", "reply_reviewers": "0;1;2;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 296.25, 251.72244139130703 ], "wc_strengths_avg": [ 66.5, 30.76117683054405 ], "wc_weaknesses_avg": [ 196.5, 88.04118354497513 ], "wc_questions_avg": [ 55.75, 30.50717128807586 ], "wc_limitations_avg": [ 55.5, 11.4564392373896 ], "wc_review_avg": [ 670.5, 306.0894477109592 ], "wc_reply_reviewers_avg": [ 102.75, 79.8103220141355 ], "wc_reply_authors_avg": [ 161.75, 209.79558503457596 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8451542547285166, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16948734005299081574&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "cse.cuhk.edu.hk;hit.edu.cn;cuhk.edu.hk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Harbin Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.hk;http://www.hit.edu.cn/", "aff_unique_abbr": "CUHK;HIT", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Hong Kong SAR;Shenzhen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Unsupervised Behavior Extraction via Random Intent Priors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72831", "id": "4vGVQVz5KG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a1c8a68e52499c9396854e3f967e37c0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4vGVQVz5KG", "openreview": "https://openreview.net/forum?id=4vGVQVz5KG", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72831", "video": "https://nips.cc/virtual/2023/poster/72831", "author_site": "Hao Hu, Yiqin Yang, Jianing Ye, Ziqing Mai, Chongjie Zhang", "tldr": "", "abstract": "Reward-free data is abundant and contains rich prior knowledge of human behaviors, but it is not well exploited by offline reinforcement learning (RL) algorithms. In this paper, we propose UBER, an unsupervised approach to extract useful behaviors from offline reward-free datasets via diversified rewards. UBER assigns different pseudo-rewards sampled from a given prior distribution to different agents to extract a diverse set of behaviors, and reuse them as candidate policies to facilitate the learning of new tasks. Perhaps surprisingly, we show that rewards generated from random neural networks are sufficient to extract diverse and useful behaviors, some even close to expert ones. We provide both empirical and theoretical evidences to justify the use of random priors for the reward function. Experiments on multiple benchmarks showcase UBER's ability to learn effective and diverse behavior sets that enhance sample efficiency for online RL, outperforming existing baselines. By reducing reliance on human supervision, UBER broadens the applicability of RL to real-world scenarios with abundant reward-free data.", "keywords": "offline RL;reward-free;behavior extraction", "primary_area": "", "supplementary_material": "/attachment/6cae51765362e0116778f9d68d83d1871676c7ad.zip", "author": "Hao Hu;Yiqin Yang;Jianing Ye;Ziqing Mai;Chongjie Zhang", "authorids": "~Hao_Hu3;~Yiqin_Yang1;~Jianing_Ye1;~Ziqing_Mai1;~Chongjie_Zhang1", "gender": "M;M;M;F;", "homepage": "https://mousehu.github.io;https://www.researchgate.net/profile/Yiqin-Yang-2;https://heavycrab.github.io/;https://github.com/ZiqingMai;", "dblp": "67/6924-6;180/7725;287/5070;;29/6693", "google_scholar": "https://scholar.google.com/citations?hl=en;aHTi5IEAAAAJ;Sc9duQQAAAAJ;;LjxqXycAAAAJ", "orcid": ";;;;", "linkedin": "hao-hu-tsinghua;;;;", "or_profile": "~Hao_Hu3;~Yiqin_Yang1;~Jianing_Ye1;~Ziqing_Mai1;~Chongjie_Zhang1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;MS student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nhu2023unsupervised,\ntitle={Unsupervised Behavior Extraction via Random Intent Priors},\nauthor={Hao Hu and Yiqin Yang and Jianing Ye and Ziqing Mai and Chongjie Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4vGVQVz5KG}\n}", "github": "", "project": "", "reviewers": "wm6d;MUZS;XkfL;VsN9;822o", "pdf_size": 2911643, "rating": "4;5;6;6;7", "confidence": "4;4;3;4;3", "soundness": "1;3;3;2;3", "novelty": "2;3;3;2;3", "presentation": "3;3;2;3;3", "wc_summary": "151;55;83;93;147", "wc_strengths": "76;55;52;52;107", "wc_weaknesses": "230;52;261;343;44", "wc_questions": "39;15;97;119;21", "wc_limitations": "11;1;23;52;8", "wc_review": "507;178;516;659;327", "wc_reply_reviewers": "0;9;55;34;0", "wc_reply_authors": "69;100;202;22;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "2;2;4;2;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 105.8, 37.42940020892667 ], "wc_strengths_avg": [ 68.4, 21.284736315021615 ], "wc_weaknesses_avg": [ 186.0, 118.6001686339442 ], "wc_questions_avg": [ 58.2, 42.00190471871484 ], "wc_limitations_avg": [ 19.0, 17.966635745180564 ], "wc_review_avg": [ 437.4, 167.09590060800414 ], "wc_reply_reviewers_avg": [ 19.6, 21.63885394377438 ], "wc_reply_authors_avg": [ 78.6, 70.93546362715902 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7205766921228922, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17927396018525926626&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Med-UniC: Unifying Cross-Lingual Medical Vision-Language Pre-Training by Diminishing Bias", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72830", "id": "4vpsQdRBlK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/af38fb8e90d586f209235c94119ba193-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4vpsQdRBlK", "openreview": "https://openreview.net/forum?id=4vpsQdRBlK", "poster": "/media/PosterPDFs/NeurIPS%202023/72830.png?t=1699464967.2473576", "slides": "https://nips.cc/virtual/2023/poster/72830", "video": "https://nips.cc/virtual/2023/poster/72830", "author_site": "Zhongwei Wan, Che Liu, Mi Zhang, Jie Fu, Benyou Wang, Sibo Cheng, Lei Ma, C\u00e9sar Quilodr\u00e1n-Casas, Rossella Arcucci", "tldr": "", "abstract": "The scarcity of data presents a critical obstacle to the efficacy of medical vision-language pre-training (VLP). A potential solution lies in the combination of datasets from various language communities.\nNevertheless, the main challenge stems from the complexity of integrating diverse syntax and semantics, language-specific medical terminology, and culture-specific implicit knowledge. Therefore, one crucial aspect to consider is the presence of community bias caused by different languages.\nThis paper presents a novel framework named Unifying Cross-Lingual Medical Vision-Language Pre-Training (\\textbf{Med-UniC}), designed to integrate multi-modal medical data from the two most prevalent languages, English and Spanish. \nSpecifically, we propose \\textbf{C}ross-lingual \\textbf{T}ext Alignment \\textbf{R}egularization (\\textbf{CTR}) to explicitly unify cross-lingual semantic representations of medical reports originating from diverse language communities. \n\\textbf{CTR} is optimized through latent language disentanglement, rendering our optimization objective to not depend on negative samples, thereby significantly mitigating the bias from determining positive-negative sample pairs within analogous medical reports. Furthermore, it ensures that the cross-lingual representation is not biased toward any specific language community.\n\\textbf{Med-UniC} reaches superior performance across 5 medical image tasks and 10 datasets encompassing over 30 diseases, offering a versatile framework for unifying multi-modal medical data within diverse linguistic communities.\nThe experimental outcomes highlight the presence of community bias in cross-lingual VLP. Reducing this bias enhances the performance not only in vision-language tasks but also in uni-modal visual tasks.", "keywords": "Medical Vision Langauge Pretraining;Cross-lingual;Language bias", "primary_area": "", "supplementary_material": "/attachment/0c3b1c87f9d50c36fb165419f962e240a9134005.pdf", "author": "Zhongwei Wan;Che Liu;Mi Zhang;Jie Fu;Benyou Wang;Sibo Cheng;Lei Ma;C\u00e9sar Quilodr\u00e1n-Casas;Rossella Arcucci", "authorids": "~Zhongwei_Wan1;~Che_Liu3;~Mi_Zhang1;~Jie_Fu2;~Benyou_Wang2;~Sibo_Cheng1;~Lei_Ma3;~C\u00e9sar_Quilodr\u00e1n-Casas1;~Rossella_Arcucci1", "gender": "M;M;M;M;Not Specified;F;M;M;M", "homepage": "https://people.engineering.osu.edu/people/wan.512;https://mi-zhang.github.io/;https://wabyking.github.io/old.html;;https://nbic.pku.edu.cn/rcdw/kyry/02c5f5ce8e254b1e82a48bebd0a24c33.htm;https://www.imperial.ac.uk/people/r.arcucci;;;https://bigaidream.github.io/", "dblp": "260/6958.html;84/2519-2.html;169/1793;195/5965;20/6534-8;130/5772;;263/5584;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=r3A90uAAAAAJ;Jk4vJU8AAAAJ;Jj1UrwQAAAAJ;;oxy2ZQoAAAAJ;HED_458AAAAJ;;66osleIAAAAJ", "orcid": ";;0000-0002-1501-9914;0000-0002-8707-2589;0000-0001-6024-3854;0000-0002-9471-0585;;;0000-0002-4494-843X", "linkedin": ";mizhang/;;sibo-cheng-23a52711b/;maleiwhat/;https://www.linkedin.com/public-profile/settings?trk=d_flagship3_profile_self_view_public_profile;;cquilodran;", "or_profile": "~Zhongwei_Wan1;~Mi_Zhang1;~Benyou_Wang2;~Sibo_Cheng1;~Lei_Ma3;~Rossella_Arcucci1;~che_liu2;~Cesar_Quilodran1;~Jie_Fu1", "aff": "Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;The Ohio State University;The Chinese University of Hong Kong, Shenzhen;Imperial College London;Beijing Academy of Artifical Intelligence;Imperial College London ;Imperial College London;Imperial College London;Beijing Academy of Artificial Intelligence", "aff_domain": "siat.ac.cn;osu.edu;cuhk.edu.cn;ic.ac.uk;baai.ac.cn;imperial.ac.uk;ic.ac.uk;ic.ac.uk;baai.ac.cn", "position": "MS student;Associate Professor;Assistant Professor;Postdoc;Principal Researcher;Senior Lecturer;PhD student;Postdoc;Researcher", "bibtex": "@inproceedings{\nwan2023medunic,\ntitle={Med-UniC: Unifying Cross-Lingual Medical Vision-Language Pre-Training by Diminishing Bias},\nauthor={Zhongwei Wan and Che Liu and Mi Zhang and Jie Fu and Benyou Wang and Sibo Cheng and Lei Ma and C{\\'e}sar Quilodr{\\'a}n-Casas and Rossella Arcucci},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4vpsQdRBlK}\n}", "github": "", "project": "", "reviewers": "uyBA;NLjC;YFc1;ZNkV", "pdf_size": 0, "rating": "5;6;6;7", "confidence": "3;4;4;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "51;61;84;92", "wc_strengths": "42;37;118;51", "wc_weaknesses": "101;90;136;72", "wc_questions": "9;117;17;2", "wc_limitations": "7;4;8;1", "wc_review": "210;309;363;218", "wc_reply_reviewers": "0;17;27;0", "wc_reply_authors": "45;75;27;54", "reply_reviewers": "0;1;1;0", "reply_authors": "2;3;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.0, 16.62828914831589 ], "wc_strengths_avg": [ 62.0, 32.71849629796577 ], "wc_weaknesses_avg": [ 99.75, 23.34925052330374 ], "wc_questions_avg": [ 36.25, 46.92214296044033 ], "wc_limitations_avg": [ 5.0, 2.7386127875258306 ], "wc_review_avg": [ 275.0, 63.98046576885792 ], "wc_reply_reviewers_avg": [ 11.0, 11.554220008291344 ], "wc_reply_authors_avg": [ 50.25, 17.282577932704367 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 74, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12760421466493330720&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "siat.ac.cn;osu.edu;cuhk.edu.cn;ic.ac.uk;baai.ac.cn;imperial.ac.uk;ic.ac.uk;ic.ac.uk;baai.ac.cn", "author_num": 9, "aff_unique_index": "0;1;2;3;4;3;3;3;4", "aff_unique_norm": "Chinese Academy of Sciences;Ohio State University;Chinese University of Hong Kong;Imperial College London;Beijing Academy of Artificial Intelligence", "aff_unique_dep": "Shenzhen Institutes of Advanced Technology;;;;", "aff_unique_url": "http://www.cas.cn;https://www.osu.edu;https://www.cuhk.edu.cn;https://www.imperial.ac.uk;https://www.baaic.cn", "aff_unique_abbr": "CAS;OSU;CUHK;ICL;BAAI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;1;0;2;0;2;2;2;0", "aff_country_unique": "China;United States;United Kingdom" }, { "title": "Attention as Implicit Structural Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72829", "id": "4xckZu4MPG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4e8a74988bc611495c2d3a5edac8493f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4xckZu4MPG", "openreview": "https://openreview.net/forum?id=4xckZu4MPG", "poster": "/media/PosterPDFs/NeurIPS%202023/72829.png?t=1701719014.5267696", "slides": "https://nips.cc/virtual/2023/poster/72829", "video": "https://nips.cc/virtual/2023/poster/72829", "author_site": "Ryan Singh, Christopher L Buckley", "tldr": "", "abstract": "Attention mechanisms play a crucial role in cognitive systems by allowing them to flexibly allocate cognitive resources. Transformers, in particular, have become a dominant architecture in machine learning, with attention as their central innovation. However, the underlying intuition and formalism of attention in Transformers is based on ideas of keys and queries in database management systems. In this work, we pursue a structural inference perspective, building upon, and bringing together, previous theoretical descriptions of attention such as; Gaussian Mixture Models, alignment mechanisms and Hopfield Networks. Specifically, we demonstrate that attention can be viewed as inference over an implicitly defined set of possible adjacency structures in a graphical model, revealing the generality of such a mechanism. This perspective unifies different attentional architectures in machine learning and suggests potential modifications and generalizations of attention. Here we investigate two and demonstrate their behaviour on explanatory toy problems: (a) extending the value function to incorporate more nodes of a graphical model yielding a mechanism with a bias toward attending multiple tokens; (b) introducing a geometric prior (with conjugate hyper-prior) over the adjacency structures producing a mechanism which dynamically scales the context window depending on input. Moreover, by describing a link between structural inference and precision-regulation in Predictive Coding Networks, we discuss how this framework can bridge the gap between attentional mechanisms in machine learning and Bayesian conceptions of attention in Neuroscience. We hope by providing a new lens on attention architectures our work can guide the development of new and improved attentional mechanisms.", "keywords": "Attention;Structural Inference;Variational Inference;Predictive Coding;Graphical Models", "primary_area": "", "supplementary_material": "", "author": "Ryan Singh;Christopher Buckley", "authorids": "~Ryan_Singh1;~Christopher_Buckley1", "gender": "M;M", "homepage": ";https://christopherlbuckley.com/", "dblp": ";37/3540.html", "google_scholar": "Ukqus4oAAAAJ;https://scholar.google.co.uk/citations?user=nWuZ0XcAAAAJ", "orcid": "0009-0007-9598-4645;0000-0002-8551-9121", "linkedin": ";", "or_profile": "~Ryan_Singh1;~Christopher_Buckley1", "aff": "University of Sussex;University of Sussex", "aff_domain": "sussex.ac.uk;sussex.ac.uk", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nsingh2023attention,\ntitle={Attention as Implicit Structural Inference},\nauthor={Ryan Singh and Christopher Buckley},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4xckZu4MPG}\n}", "github": "", "project": "", "reviewers": "JZQo;tiWP;khyL;psYk;14Rb", "pdf_size": 509786, "rating": "5;5;6;7;8", "confidence": "1;3;3;4;3", "soundness": "2;3;3;3;3", "novelty": "2;2;2;3;4", "presentation": "1;1;3;3;2", "wc_summary": "99;65;38;130;132", "wc_strengths": "17;81;93;52;36", "wc_weaknesses": "41;98;189;42;63", "wc_questions": "36;197;3;19;272", "wc_limitations": "6;28;32;1;9", "wc_review": "199;469;355;244;512", "wc_reply_reviewers": "11;0;76;0;27", "wc_reply_authors": "0;0;147;0;0", "reply_reviewers": "1;0;1;0;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.0, 0.8944271909999159 ], "wc_summary_avg": [ 92.8, 36.70095366608339 ], "wc_strengths_avg": [ 55.8, 28.038544898050613 ], "wc_weaknesses_avg": [ 86.6, 55.210868495251916 ], "wc_questions_avg": [ 105.4, 108.54786962441962 ], "wc_limitations_avg": [ 15.2, 12.416118556135006 ], "wc_review_avg": [ 355.8, 121.90061525685586 ], "wc_reply_reviewers_avg": [ 22.8, 28.378865375486736 ], "wc_reply_authors_avg": [ 29.4, 58.8 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.560112033611204, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10018704682877589713&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sussex.ac.uk;sussex.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Sussex", "aff_unique_dep": "", "aff_unique_url": "https://www.sussex.ac.uk", "aff_unique_abbr": "Sussex", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "On Proper Learnability between Average- and Worst-case Robustness", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72828", "id": "4yXnnCK3r9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2a952768bb85041f95ed06a5b60cf4d5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4yXnnCK3r9", "openreview": "https://openreview.net/forum?id=4yXnnCK3r9", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72828", "video": "https://nips.cc/virtual/2023/poster/72828", "author_site": "Vinod Raman, UNIQUE SUBEDI, Ambuj Tewari", "tldr": "", "abstract": "Recently, Montasser at al. (2019) showed that finite VC dimension is not sufficient for proper adversarially robust PAC learning. In light of this hardness, there is a growing effort to study what type of relaxations to the adversarially robust PAC learning setup can enable proper learnability. In this work, we initiate the study of proper learning under relaxations of the worst-case robust loss. We give a family of robust loss relaxations under which VC classes are properly PAC learnable with sample complexity close to what one would require in the standard PAC learning setup. On the other hand, we show that for an existing and natural relaxation of the worst-case robust loss, finite VC dimension is not sufficient for proper learning. Lastly, we give new generalization guarantees for the adversarially robust empirical risk minimizer.", "keywords": "Adversarial Robustness;PAC Learning", "primary_area": "", "supplementary_material": "/attachment/63b6c3a3b544c51ecedd241dfd630a64b19977c0.pdf", "author": "Vinod Raman;UNIQUE SUBEDI;Ambuj Tewari", "authorids": "~Vinod_Raman1;~UNIQUE_SUBEDI1;~Ambuj_Tewari1", "gender": "M;M;M", "homepage": "https://vinodkraman.github.io;https://unique-subedi.github.io/;https://www.ambujtewari.com", "dblp": "126/5382;;24/567", "google_scholar": "Wn5QzOgAAAAJ;DO16ipsAAAAJ;ttbl4FsAAAAJ", "orcid": ";;0000-0001-6969-7844", "linkedin": ";;", "or_profile": "~Vinod_Raman1;~UNIQUE_SUBEDI1;~Ambuj_Tewari1", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;umich.edu;umich.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nraman2023on,\ntitle={On Proper Learnability between Average- and Worst-case Robustness},\nauthor={Vinod Raman and UNIQUE SUBEDI and Ambuj Tewari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4yXnnCK3r9}\n}", "github": "", "project": "", "reviewers": "8idw;dXD9;rPBv;yAnv", "pdf_size": 591185, "rating": "6;7;7;7", "confidence": "4;4;4;5", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "4;3;3;3", "wc_summary": "88;94;61;157", "wc_strengths": "32;238;44;26", "wc_weaknesses": "87;465;59;39", "wc_questions": "6;92;90;269", "wc_limitations": "18;1;1;4", "wc_review": "231;890;255;495", "wc_reply_reviewers": "20;112;13;0", "wc_reply_authors": "0;69;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 100.0, 35.17811819867572 ], "wc_strengths_avg": [ 85.0, 88.57200460642177 ], "wc_weaknesses_avg": [ 162.5, 175.47863117770208 ], "wc_questions_avg": [ 114.25, 95.84981742288298 ], "wc_limitations_avg": [ 6.0, 7.035623639735144 ], "wc_review_avg": [ 467.75, 264.7407930410423 ], "wc_reply_reviewers_avg": [ 36.25, 44.31915500096995 ], "wc_reply_authors_avg": [ 17.25, 29.877876430563134 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4554204763020465508&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "umich.edu;umich.edu;umich.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Unified Detection Framework for Inference-Stage Backdoor Defenses", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72827", "id": "4zWEyYGGfI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1868a3c73d0d2a44c42458575fa8514c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4zWEyYGGfI", "openreview": "https://openreview.net/forum?id=4zWEyYGGfI", "poster": "/media/PosterPDFs/NeurIPS%202023/72827.png?t=1701873334.846031", "slides": "https://nips.cc/virtual/2023/poster/72827", "video": "https://nips.cc/virtual/2023/poster/72827", "author_site": "Xun Xian, Ganghua Wang, Jayanth Srinivasa, Ashish Kundu, Xuan Bi, Mingyi Hong, Jie Ding", "tldr": "", "abstract": "Backdoor attacks involve inserting poisoned samples during training, resulting in a model containing a hidden backdoor that can trigger specific behaviors without impacting performance on normal samples. These attacks are challenging to detect, as the backdoored model appears normal until activated by the backdoor trigger, rendering them particularly stealthy. In this study, we devise a unified inference-stage detection framework to defend against backdoor attacks. We first rigorously formulate the inference-stage backdoor detection problem, encompassing various existing methods, and discuss several challenges and limitations. We then propose a framework with provable guarantees on the false positive rate or the probability of misclassifying a clean sample. Further, we derive the most powerful detection rule to maximize the detection power, namely the rate of accurately identifying a backdoor sample, given a false positive rate under classical learning scenarios. Based on the theoretically optimal detection rule, we suggest a practical and effective approach for real-world applications based on the latent representations of backdoored deep nets. We extensively evaluate our method on 14 different backdoor attacks using Computer Vision (CV) and Natural Language Processing (NLP) benchmark datasets. The experimental findings align with our theoretical results. We significantly surpass the state-of-the-art methods, e.g., up to 300\\% improvement on the detection power as evaluated by AUCROC, over the state-of-the-art defense against advanced adaptive backdoor attacks.", "keywords": "Backdoor attacks;Backdoor Defense;Security for AI", "primary_area": "", "supplementary_material": "/attachment/1a7d81f753ea7157430becf18a72cf70c7cd01ad.zip", "author": "Xun Xian;Ganghua Wang;Jayanth Srinivasa;Ashish Kundu;Xuan Bi;Mingyi Hong;Jie Ding", "authorids": "~Xun_Xian1;~Ganghua_Wang1;~Jayanth_Srinivasa1;~Ashish_Kundu1;~Xuan_Bi1;~Mingyi_Hong1;~Jie_Ding2", "gender": "M;M;M;;;M;M", "homepage": "https://jeremyxianx.github.io/;https://gwang.umn.edu;;;;http://people.ece.umn.edu/~mhong/mingyi.html;http://jding.org", "dblp": "262/3278;200/9632;285/5006;;;57/8053;94/1825-2", "google_scholar": "https://scholar.google.com/citations?hl=en;;HtNfeKYAAAAJ;;F3eRk9MAAAAJ;qRnP-p0AAAAJ;ZyqvoqcAAAAJ", "orcid": ";0000-0002-0888-167X;;;;;", "linkedin": ";;;;;;", "or_profile": "~Xun_Xian1;~Ganghua_Wang1;~Jayanth_Srinivasa1;~Ashish_Kundu1;~Xuan_Bi1;~Mingyi_Hong1;~Jie_Ding2", "aff": "University of Minnesota, Minneapolis;University of Minnesota, Minneapolis;Cisco;;University of Minnesota - Twin Cities;University of Minnesota, Minneapolis;University of Minnesota, Minneapolis", "aff_domain": "umn.edu;umn.edu;cisco.com;;umn.edu;umn.edu;umn.edu", "position": "PhD student;PhD student;Researcher;;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nxian2023a,\ntitle={A Unified Detection Framework for Inference-Stage Backdoor Defenses},\nauthor={Xun Xian and Ganghua Wang and Jayanth Srinivasa and Ashish Kundu and Xuan Bi and Mingyi Hong and Jie Ding},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=4zWEyYGGfI}\n}", "github": "", "project": "", "reviewers": "ibgE;1Ekh;JEgp;vAAp", "pdf_size": 2846293, "rating": "4;5;5;6", "confidence": "4;4;4;3", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;2", "wc_summary": "83;72;35;119", "wc_strengths": "69;144;96;75", "wc_weaknesses": "272;481;149;56", "wc_questions": "2;144;5;158", "wc_limitations": "1;11;5;6", "wc_review": "427;852;290;414", "wc_reply_reviewers": "54;257;0;33", "wc_reply_authors": "357;520;82;101", "reply_reviewers": "2;2;0;1", "reply_authors": "4;4;2;3", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.25, 29.953088321573787 ], "wc_strengths_avg": [ 96.0, 29.47032405658275 ], "wc_weaknesses_avg": [ 239.5, 159.0919545420195 ], "wc_questions_avg": [ 77.25, 73.92352467246134 ], "wc_limitations_avg": [ 5.75, 3.5619517121937516 ], "wc_review_avg": [ 495.75, 212.51867564992963 ], "wc_reply_reviewers_avg": [ 86.0, 100.58578428386389 ], "wc_reply_authors_avg": [ 265.0, 182.943980496763 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11836167941178901644&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "umn.edu;umn.edu;cisco.com;;umn.edu;umn.edu;umn.edu", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "University of Minnesota;Cisco Systems", "aff_unique_dep": ";", "aff_unique_url": "https://www.minnesota.edu;https://www.cisco.com", "aff_unique_abbr": "UMN;Cisco", "aff_campus_unique_index": "0;0;2;0;0", "aff_campus_unique": "Minneapolis;;Twin Cities", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "50I7q86igD", "title": "Deep Evidence Regression for Weibull targets", "track": "main", "status": "Reject", "tldr": "", "abstract": "Machine Learning has invariantly found its way into various Credit Risk applications. Due to the intrinsic nature of Credit Risk, quantifying the uncertainty of the predicted risk metrics is essential, and applying uncertainty-aware deep learning models to credit risk settings can be very helpful. In this work, we have explored the application of a scalable UQ-aware deep learning technique, Deep Evidence Regression and applied it to predicting Loss Given Default. We contribute to the literature by extending the Deep Evidence Regression methodology to learning target variables generated by a Weibull process and provide the relevant learning framework. We demonstrate the application of our approach to both simulated and real-world data.", "keywords": "Deep Learning;Probabilistic Methods;Computational Finance;Credit risk management", "primary_area": "", "supplementary_material": "/attachment/68893d050bdbd9e5a820fb22999465c25e2ac629.zip", "author": "Ashish Dhiman", "authorids": "~Ashish_Dhiman1", "gender": "M", "homepage": "https://ashish1610dhiman.github.io/ashish_portfolio/", "dblp": "", "google_scholar": "xQ348nQAAAAJ", "orcid": "0009-0008-5165-9100", "linkedin": "ashish1610dhiman/", "or_profile": "~Ashish_Dhiman1", "aff": "Georgia Institute of Technology", "aff_domain": "gatech.edu", "position": "MS student", "bibtex": "@misc{\ndhiman2023deep,\ntitle={Deep Evidence Regression for Weibull targets},\nauthor={Ashish Dhiman},\nyear={2023},\nurl={https://openreview.net/forum?id=50I7q86igD}\n}", "github": "", "project": "", "reviewers": "v9ki;aVLB;gHV7", "site": "https://openreview.net/forum?id=50I7q86igD", "pdf_size": 598434, "rating": "3;3;4", "confidence": "3;4;3", "soundness": "3;2;3", "novelty": "1;2;2", "presentation": "2;1;2", "wc_summary": "216;77;88", "wc_strengths": "86;22;44", "wc_weaknesses": "236;3;91", "wc_questions": "113;347;85", "wc_limitations": "88;66;55", "wc_review": "739;515;363", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.3333333333333335, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 127.0, 63.09252464964979 ], "wc_strengths_avg": [ 50.666666666666664, 26.5497436689865 ], "wc_weaknesses_avg": [ 110.0, 96.06594956938002 ], "wc_questions_avg": [ 181.66666666666666, 117.46583427627891 ], "wc_limitations_avg": [ 69.66666666666667, 13.719410418171117 ], "wc_review_avg": [ 539.0, 154.43661051274944 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_-EUVO2s9XAJ:scholar.google.com/&scioq=Deep+Evidence+Regression+for+Weibull+targets&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Recovering Simultaneously Structured Data via Non-Convex Iteratively Reweighted Least Squares", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72826", "id": "50hs53Zb3w", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e33a4d41305fb34316df6f3fa8a0e58c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=50hs53Zb3w", "openreview": "https://openreview.net/forum?id=50hs53Zb3w", "poster": "/media/PosterPDFs/NeurIPS%202023/72826.png?t=1702335923.698428", "slides": "https://nips.cc/virtual/2023/poster/72826", "video": "https://nips.cc/virtual/2023/poster/72826", "author_site": "Christian K\u00fcmmerle, Christian K\u00fcmmerle, Johannes Maly", "tldr": "", "abstract": "We propose a new algorithm for the problem of recovering data that adheres to multiple, heterogenous low-dimensional structures from linear observations. Focussing on data matrices that are simultaneously row-sparse and low-rank, we propose and analyze an iteratively reweighted least squares (IRLS) algorithm that is able to leverage both structures. In particular, it optimizes a combination of non-convex surrogates for row-sparsity and rank, a balancing of which is built into the algorithm. We prove locally quadratic convergence of the iterates to a simultaneously structured data matrix in a regime of minimal sample complexity (up to constants and a logarithmic factor), which is known to be impossible for a combination of convex surrogates. In experiments, we show that the IRLS method exhibits favorable empirical convergence, identifying simultaneously row-sparse and low-rank matrices from fewer measurements than state-of-the-art methods.", "keywords": "low-rank models;sparsity;iteratively reweighted least squares;non-convex optimization;quadratic convergence;simultaneously structured data", "primary_area": "", "supplementary_material": "/attachment/10ff04d02b33a594e5bfd95872990045e78efad4.zip", "author": "Christian K\u00fcmmerle;Johannes Maly", "authorids": "~Christian_K\u00fcmmerle1;~Johannes_Maly1", "gender": "M;M", "homepage": "http://ckuemmerle.com;https://johannes-maly.github.io/", "dblp": "198/0699;220/3056", "google_scholar": "https://scholar.google.de/citations?user=zElx1AYAAAAJ;e5tABOYAAAAJ", "orcid": "0000-0001-9267-5379;0000-0001-7134-2495", "linkedin": ";", "or_profile": "~Christian_K\u00fcmmerle1;~Johannes_Maly1", "aff": "University of North Carolina at Charlotte;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_domain": "charlotte.edu;lmu.de", "position": "Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nk{\\\"u}mmerle2023recovering,\ntitle={Recovering Simultaneously Structured Data via Non-Convex Iteratively Reweighted Least Squares},\nauthor={Christian K{\\\"u}mmerle and Johannes Maly},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=50hs53Zb3w}\n}", "github": "", "project": "", "reviewers": "W2en;bSFz;P5td;3C1j", "pdf_size": 737731, "rating": "5;5;6;7", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;2;4;3", "wc_summary": "64;103;60;69", "wc_strengths": "18;108;54;73", "wc_weaknesses": "107;113;60;155", "wc_questions": "97;78;31;37", "wc_limitations": "20;15;53;1", "wc_review": "306;417;258;335", "wc_reply_reviewers": "0;0;12;12", "wc_reply_authors": "54;363;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.0, 17.04406054905931 ], "wc_strengths_avg": [ 63.25, 32.52210786526605 ], "wc_weaknesses_avg": [ 108.75, 33.677700337166726 ], "wc_questions_avg": [ 60.75, 27.66202270261522 ], "wc_limitations_avg": [ 22.25, 19.070592544543548 ], "wc_review_avg": [ 329.0, 57.77110004145671 ], "wc_reply_reviewers_avg": [ 6.0, 6.0 ], "wc_reply_authors_avg": [ 104.25, 151.0072432037616 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10599540831456397672&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "charlotte.edu;lmu.de", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of North Carolina at Charlotte;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_unique_dep": ";", "aff_unique_url": "https://www.uncc.edu;https://www.lmu.de", "aff_unique_abbr": "UNCC;LMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Charlotte;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Germany" }, { "title": "Towards a fuller understanding of neurons with Clustered Compositional Explanations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72825", "id": "51PLYhMFWz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/debd0ae2083160397a22a4a8831c7230-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=51PLYhMFWz", "openreview": "https://openreview.net/forum?id=51PLYhMFWz", "poster": "/media/PosterPDFs/NeurIPS%202023/72825.png?t=1699108032.6396224", "slides": "https://nips.cc/virtual/2023/poster/72825", "video": "https://nips.cc/virtual/2023/poster/72825", "author_site": "Biagio La Rosa, Leilani Gilpin, Roberto Capobianco", "tldr": "", "abstract": "Compositional Explanations is a method for identifying logical formulas of concepts that approximate the neurons' behavior. However, these explanations are linked to the small spectrum of neuron activations (i.e., the highest ones) used to check the alignment, thus lacking completeness. In this paper, we propose a generalization, called Clustered Compositional Explanations, that combines Compositional Explanations with clustering and a novel search heuristic to approximate a broader spectrum of the neuron behavior. We define and address the problems connected to the application of these methods to multiple ranges of activations, analyze the insights retrievable by using our algorithm, and propose desiderata qualities that can be used to study the explanations returned by different algorithms.", "keywords": "compositional explanations;network dissection;explainable artificial intelligence;interpretability", "primary_area": "", "supplementary_material": "", "author": "Biagio La Rosa;Leilani H. Gilpin;Roberto Capobianco", "authorids": "~Biagio_La_Rosa1;~Leilani_H._Gilpin1;~Roberto_Capobianco1", "gender": ";F;", "homepage": "https://biagiomattialarosa.github.io/;http://lgilpin.com;http://robertocapobianco.com", "dblp": "269/4633;215/8848;132/9032", "google_scholar": "https://scholar.google.it/citations?user=qzonkqcAAAAJ;UFT_ijYAAAAJ;6VJaD6EAAAAJ", "orcid": "0000-0002-4071-170X;0000-0002-9741-2014;0000-0002-2219-215X", "linkedin": "larosabiagio/;leilanigilpin/;rcapobianco/", "or_profile": "~Biagio_La_Rosa1;~Leilani_H._Gilpin1;~Roberto_Capobianco1", "aff": "University of California, Santa Cruz;University of California, Santa Cruz;Sapienza University of Rome", "aff_domain": "ucsc.edu;ucsc.edu;uniroma1.it", "position": "PhD student;Assistant Professor;Research Collaborator", "bibtex": "@inproceedings{\nrosa2023towards,\ntitle={Towards a fuller understanding of neurons with Clustered Compositional Explanations},\nauthor={Biagio La Rosa and Leilani H. Gilpin and Roberto Capobianco},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=51PLYhMFWz}\n}", "github": "", "project": "", "reviewers": "4jti;2vbF;CMC6;rkdS;BxbC", "pdf_size": 1379819, "rating": "2;6;6;6;6", "confidence": "5;4;5;3;2", "soundness": "2;3;3;2;3", "novelty": "1;2;3;2;3", "presentation": "2;4;3;2;2", "wc_summary": "41;148;121;87;67", "wc_strengths": "3;71;181;50;50", "wc_weaknesses": "2;138;116;291;15", "wc_questions": "1;43;107;250;6", "wc_limitations": "37;16;31;13;6", "wc_review": "84;416;556;691;144", "wc_reply_reviewers": "0;66;11;19;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.2, 1.6000000000000003 ], "confidence_avg": [ 3.8, 1.16619037896906 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 92.8, 38.01262948021355 ], "wc_strengths_avg": [ 71.0, 59.3397000329459 ], "wc_weaknesses_avg": [ 112.4, 104.15488466701886 ], "wc_questions_avg": [ 81.4, 92.41774721340053 ], "wc_limitations_avg": [ 20.6, 11.568923891183656 ], "wc_review_avg": [ 378.2, 233.36186492227048 ], "wc_reply_reviewers_avg": [ 19.2, 24.47365930955156 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5144957554275263, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=394268981634648726&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "ucsc.edu;ucsc.edu;uniroma1.it", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of California, Santa Cruz;Sapienza University of Rome", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsc.edu;https://www.uniroma1.it", "aff_unique_abbr": "UCSC;Sapienza", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Santa Cruz;Rome", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Italy" }, { "title": "Quantifying the Cost of Learning in Queueing Systems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72824", "id": "54hYifmQZU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1502957929fc4257dd1b6daf7d869c2f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=54hYifmQZU", "openreview": "https://openreview.net/forum?id=54hYifmQZU", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72824", "video": "https://nips.cc/virtual/2023/poster/72824", "author_site": "Daniel Freund, Thodoris Lykouris, Wentao Weng", "tldr": "", "abstract": "Queueing systems are widely applicable stochastic models with use cases in communication networks, healthcare, service systems, etc. \nAlthough their optimal control has been extensively studied, most existing approaches assume perfect knowledge of the system parameters. Of course, this assumption rarely holds in practice where there is parameter uncertainty, thus motivating a recent line of work on bandit learning for queueing systems. This nascent stream of research focuses on the asymptotic performance of the proposed algorithms. \n\nIn this paper, we argue that an asymptotic metric, which focuses on late-stage performance, is insufficient to capture the intrinsic statistical complexity of learning in queueing systems which typically occurs in the early stage. Instead, we propose the *Cost of Learning in Queueing (CLQ)*, a new metric that quantifies the maximum increase in time-averaged queue length caused by parameter uncertainty.\nWe characterize the CLQ of a single-queue multi-server system, and then extend these results to multi-queue multi-server systems and networks of queues. In establishing our results, we propose a unified analysis framework for CLQ that bridges Lyapunov and bandit analysis, provides guarantees for a wide range of algorithms, and could be of independent interest.", "keywords": "bandits;learning;queueing systems;optimal control", "primary_area": "", "supplementary_material": "", "author": "Daniel Freund;Thodoris Lykouris;Wentao Weng", "authorids": "dfreund@mit.edu;~Thodoris_Lykouris1;~Wentao_Weng2", "gender": ";M;", "homepage": ";https://mitmgmtfaculty.mit.edu/tlykouris/;", "dblp": ";150/7250;", "google_scholar": ";Xda5bmoAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "dfreund@mit.edu;~Thodoris_Lykouris1;~Wentao_Weng2", "aff": ";Massachusetts Institute of Technology;", "aff_domain": ";mit.edu;", "position": ";Assistant Professor;", "bibtex": "@inproceedings{\nfreund2023quantifying,\ntitle={Quantifying the Cost of Learning in Queueing Systems},\nauthor={Daniel Freund and Thodoris Lykouris and Wentao Weng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=54hYifmQZU}\n}", "github": "", "project": "", "reviewers": "ZtnW;vTq6;KN9r;aVzT", "pdf_size": 603797, "rating": "4;6;7;8", "confidence": "4;4;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;4;4", "wc_summary": "80;81;74;209", "wc_strengths": "28;81;73;65", "wc_weaknesses": "176;190;105;5", "wc_questions": "59;292;54;89", "wc_limitations": "16;15;2;12", "wc_review": "359;659;308;380", "wc_reply_reviewers": "102;0;0;13", "wc_reply_authors": "288;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 111.0, 56.64362276549762 ], "wc_strengths_avg": [ 61.75, 20.29008378494283 ], "wc_weaknesses_avg": [ 119.0, 73.28369532167439 ], "wc_questions_avg": [ 123.5, 98.20005091648375 ], "wc_limitations_avg": [ 11.25, 5.539629951540085 ], "wc_review_avg": [ 426.5, 136.76348196795809 ], "wc_reply_reviewers_avg": [ 28.75, 42.62261723545376 ], "wc_reply_authors_avg": [ 72.0, 124.70765814495917 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14108931635526380522&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";mit.edu;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "GeoPhy: Differentiable Phylogenetic Inference via Geometric Gradients of Tree Topologies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72823", "id": "54z8M7NTbJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/732c5757aa5577de9b103332cf7ac0bf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=54z8M7NTbJ", "openreview": "https://openreview.net/forum?id=54z8M7NTbJ", "poster": "/media/PosterPDFs/NeurIPS%202023/72823.png?t=1701401079.358999", "slides": "https://nips.cc/virtual/2023/poster/72823", "video": "https://nips.cc/virtual/2023/poster/72823", "author_site": "Takahiro Mimori, Michiaki Hamada", "tldr": "", "abstract": "Phylogenetic inference, grounded in molecular evolution models, is essential for understanding the evolutionary relationships in biological data. Accounting for the uncertainty of phylogenetic tree variables, which include tree topologies and evolutionary distances on branches, is crucial for accurately inferring species relationships from molecular data and tasks requiring variable marginalization. Variational Bayesian methods are key to developing scalable, practical models; however, it remains challenging to conduct phylogenetic inference without restricting the combinatorially vast number of possible tree topologies. In this work, we introduce a novel, fully differentiable formulation of phylogenetic inference that leverages a unique representation of topological distributions in continuous geometric spaces. Through practical considerations on design spaces and control variates for gradient estimations, our approach, GeoPhy, enables variational inference without limiting the topological candidates. In experiments using real benchmark datasets, GeoPhy significantly outperformed other approximate Bayesian methods that considered whole topologies.", "keywords": "phylogenetic inference;variational inference;control variates;hyperbolic space", "primary_area": "", "supplementary_material": "/attachment/141267eb01cd724b7fb291a431ec3de786cdd045.zip", "author": "Takahiro Mimori;Michiaki Hamada", "authorids": "~Takahiro_Mimori1;mhamada@waseda.jp", "gender": "M;", "homepage": ";", "dblp": "136/5826;", "google_scholar": "gWtxMYoAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Takahiro_Mimori1;mhamada@waseda.jp", "aff": "RIKEN;", "aff_domain": "riken.jp;", "position": "Visiting Researcher;", "bibtex": "@inproceedings{\nmimori2023geophy,\ntitle={GeoPhy: Differentiable Phylogenetic Inference via Geometric Gradients of Tree Topologies},\nauthor={Takahiro Mimori and Michiaki Hamada},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=54z8M7NTbJ}\n}", "github": "", "project": "", "reviewers": "xhDC;AJKq;Wg7V;MJWZ;Bru6", "pdf_size": 3061902, "rating": "3;5;7;7;8", "confidence": "2;5;4;4;4", "soundness": "4;3;3;3;3", "novelty": "2;3;4;4;4", "presentation": "3;3;4;3;4", "wc_summary": "155;118;18;48;120", "wc_strengths": "43;57;97;5;132", "wc_weaknesses": "20;127;29;31;190", "wc_questions": "129;125;19;179;106", "wc_limitations": "14;3;8;47;13", "wc_review": "361;430;171;310;561", "wc_reply_reviewers": "0;145;13;22;351", "wc_reply_authors": "0;602;0;0;1101", "reply_reviewers": "0;2;1;1;2", "reply_authors": "1;3;1;1;3", "rating_avg": [ 6.0, 1.7888543819998317 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.4, 0.8 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 91.8, 50.67701648676646 ], "wc_strengths_avg": [ 66.8, 43.919927140194574 ], "wc_weaknesses_avg": [ 79.4, 67.68929014253288 ], "wc_questions_avg": [ 111.6, 52.21340823964665 ], "wc_limitations_avg": [ 17.0, 15.504837954651444 ], "wc_review_avg": [ 366.6, 129.07765104773173 ], "wc_reply_reviewers_avg": [ 106.2, 133.03142485894077 ], "wc_reply_authors_avg": [ 340.6, 445.9962331679495 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.570544330734548, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18258643596908041682&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "riken.jp;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "RIKEN", "aff_unique_dep": "", "aff_unique_url": "https://www.riken.jp", "aff_unique_abbr": "RIKEN", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "title": "Language Models are Weak Learners", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72822", "id": "559NJBfN20", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9f94298bac4668db4dc77ddb0a244301-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=559NJBfN20", "openreview": "https://openreview.net/forum?id=559NJBfN20", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72822", "video": "https://nips.cc/virtual/2023/poster/72822", "author_site": "Hariharan Manikandan, Yiding Jiang, J. Zico Kolter", "tldr": "", "abstract": "A central notion in practical and theoretical machine learning is that of a *weak learner*, classifiers that achieve better-than-random performance (on any given distribution over data), even by a small margin. Such weak learners form the practical basis for canonical machine learning methods such as boosting. In this work, we illustrate that prompt-based large language models can operate effectively as said weak learners. Specifically, we illustrate the use of a large language model (LLM) as a weak learner in a boosting algorithm applied to tabular data. We show that by providing (properly sampled according to the distribution of interest) text descriptions of tabular data samples, LLMs can produce a summary of the samples that serves as a template for classification, and achieves the aim of acting as a weak learner on this task. We incorporate these models into a boosting approach, which in many settings can leverage the knowledge within the LLM to outperform traditional tree-based boosting. The model outperforms both few-shot learning and occasionally even more involved fine-tuning procedures, particularly for some tasks involving small numbers of data points. The results illustrate the potential for prompt-based LLMs to function not just as few-shot learners themselves, but as components of larger machine learning models.", "keywords": "language model;prompting;tabular data;summarization;boosting;adaboost", "primary_area": "", "supplementary_material": "/attachment/4aa68b506741f022d24e5862efb1e6ad4c17478c.zip", "author": "Hariharan Manikandan;Yiding Jiang;J Zico Kolter", "authorids": "~Hariharan_Manikandan1;~Yiding_Jiang2;~J_Zico_Kolter1", "gender": "M;M;M", "homepage": ";https://yidingjiang.github.io/;http://www.zicokolter.com", "dblp": "290/7152;;67/2526", "google_scholar": "KKgKqr8AAAAJ;x9qzWg8AAAAJ;UXh1I6UAAAAJ", "orcid": "0000-0002-9382-568X;;", "linkedin": "hmanikan/;;", "or_profile": "~Hariharan_Manikandan1;~Yiding_Jiang2;~Zico_Kolter1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;andrew.cmu.edu;cmu.edu", "position": "MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nmanikandan2023language,\ntitle={Language Models are Weak Learners},\nauthor={Hariharan Manikandan and Yiding Jiang and J Zico Kolter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=559NJBfN20}\n}", "github": "", "project": "", "reviewers": "NJzp;LBci;gzhs;inEG;im25", "pdf_size": 818224, "rating": "4;4;6;6;7", "confidence": "4;2;4;4;4", "soundness": "3;2;2;3;3", "novelty": "2;2;2;2;3", "presentation": "3;2;3;4;3", "wc_summary": "97;73;193;120;111", "wc_strengths": "46;86;153;56;62", "wc_weaknesses": "163;427;170;40;110", "wc_questions": "97;3;5;1;221", "wc_limitations": "9;34;1;28;55", "wc_review": "412;623;522;245;559", "wc_reply_reviewers": "0;84;320;0;21", "wc_reply_authors": "0;0;769;0;10", "reply_reviewers": "0;1;2;0;1", "reply_authors": "1;1;3;1;2", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 3.6, 0.8000000000000002 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 118.8, 40.350464681339176 ], "wc_strengths_avg": [ 80.6, 38.52064381601118 ], "wc_weaknesses_avg": [ 182.0, 131.05571334360056 ], "wc_questions_avg": [ 65.4, 85.90599513421633 ], "wc_limitations_avg": [ 25.4, 19.085072700935672 ], "wc_review_avg": [ 472.2, 132.6640870770986 ], "wc_reply_reviewers_avg": [ 85.0, 121.46769117753082 ], "wc_reply_authors_avg": [ 155.8, 306.62446086377395 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5833333333333331, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5478794129779292698&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 6, "email": "cmu.edu;andrew.cmu.edu;cmu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Reward-Directed Conditional Diffusion: Provable Distribution Estimation and Reward Improvement", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72821", "id": "58HwnnEdtF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/be93b16564e96859da8401b917f307c6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=58HwnnEdtF", "openreview": "https://openreview.net/forum?id=58HwnnEdtF", "poster": "/media/PosterPDFs/NeurIPS%202023/72821.png?t=1702276197.9563897", "slides": "https://nips.cc/virtual/2023/poster/72821", "video": "https://nips.cc/virtual/2023/poster/72821", "author_site": "Hui Yuan, Kaixuan Huang, Chengzhuo Ni, Minshuo Chen, Mengdi Wang", "tldr": "", "abstract": "We explore the methodology and theory of reward-directed generation via conditional diffusion models. Directed generation aims to generate samples with desired properties as measured by a reward function, which has broad applications in generative AI, reinforcement learning, and computational biology. We consider the common learning scenario where the dataset consists of majorly unlabeled data and a small set of data with noisy reward labels. Our approach leverages a learned reward function on the smaller data set as a pseudolabeler to label the unlabelled data. After pseudo-labelling, a conditional diffusion model (CDM) is trained on the data and samples are generated by setting a target value $a$ as the condition in CDM. From a theoretical standpoint, we show that this directed generator can effectively learn and sample from the reward-conditioned data distribution: 1. our model is capable of recovering the data's latent subspace representation. 2. the model generates samples moving closer to the user-specified target. The improvement in rewards of samples is influenced by a interplay between the strength of the reward signal, the distribution shift, and the cost of off-support extrapolation. \nWe provide empirical results to validate our theory and highlight the relationship between the strength of extrapolation and the quality of generated samples.", "keywords": "Theory;Diffusion Model;Reward Optimization;Low-dimensional Data;Distribution estimation", "primary_area": "", "supplementary_material": "/attachment/29a9933b1873cedecae6f23f2004073e2ee72252.pdf", "author": "Hui Yuan;Kaixuan Huang;Chengzhuo Ni;Minshuo Chen;Mengdi Wang", "authorids": "~Hui_Yuan2;~Kaixuan_Huang1;~Chengzhuo_Ni1;~Minshuo_Chen1;~Mengdi_Wang1", "gender": "F;M;M;M;F", "homepage": ";https://hackyhuang.github.io/;;https://minshuochen.github.io;http://mwang.princeton.edu", "dblp": "21/780-2;;241/5404;217/1509;", "google_scholar": "https://scholar.google.com/citations?hl=en;EfxwV6oAAAAJ;;qU9WvTgAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Hui_Yuan2;~Kaixuan_Huang1;~Chengzhuo_Ni1;~Minshuo_Chen1;~Mengdi_Wang1", "aff": "Princeton University;Princeton University;Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu;princeton.edu;princeton.edu", "position": "PhD student;PhD student;Graduate student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nyuan2023rewarddirected,\ntitle={Reward-Directed Conditional Diffusion: Provable Distribution Estimation and Reward Improvement},\nauthor={Hui Yuan and Kaixuan Huang and Chengzhuo Ni and Minshuo Chen and Mengdi Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=58HwnnEdtF}\n}", "github": "", "project": "", "reviewers": "uwg2;mN9F;ZNG8;umuF", "pdf_size": 8468377, "rating": "6;6;6;6", "confidence": "2;3;2;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;2;2", "wc_summary": "70;112;127;103", "wc_strengths": "32;94;42;103", "wc_weaknesses": "28;96;153;186", "wc_questions": "572;66;264;124", "wc_limitations": "15;53;31;22", "wc_review": "717;421;617;538", "wc_reply_reviewers": "0;45;103;45", "wc_reply_authors": "130;25;127;112", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 103.0, 20.89258241577618 ], "wc_strengths_avg": [ 67.75, 31.115711465431737 ], "wc_weaknesses_avg": [ 115.75, 60.02655662288151 ], "wc_questions_avg": [ 256.5, 195.8590054095037 ], "wc_limitations_avg": [ 30.25, 14.306903927824496 ], "wc_review_avg": [ 573.25, 108.39828181295125 ], "wc_reply_reviewers_avg": [ 48.25, 36.56073713698891 ], "wc_reply_authors_avg": [ 98.5, 42.97964634568321 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7807449558496445198&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "princeton.edu;princeton.edu;princeton.edu;princeton.edu;princeton.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Bayesian Metric Learning for Uncertainty Quantification in Image Retrieval", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72820", "id": "58XMiu8kot", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/da7ce04b3683b173691ecbb801f2690f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=58XMiu8kot", "openreview": "https://openreview.net/forum?id=58XMiu8kot", "poster": "/media/PosterPDFs/NeurIPS%202023/72820.png?t=1699977136.884964", "slides": "https://nips.cc/virtual/2023/poster/72820", "video": "https://nips.cc/virtual/2023/poster/72820", "author_site": "Frederik Warburg, Marco Miani, Silas Brack, S\u00f8ren Hauberg", "tldr": "", "abstract": "We propose a Bayesian encoder for metric learning. Rather than relying on neural amortization as done in prior works, we learn a distribution over the network weights with the Laplace Approximation. We first prove that the contrastive loss is a negative log-likelihood on the spherical space. We propose three methods that ensure a positive definite covariance matrix. Lastly, we present a novel decomposition of the Generalized Gauss-Newton approximation. Empirically, we show that our Laplacian Metric Learner (LAM) yields well-calibrated uncertainties, reliably detects out-of-distribution examples, and has state-of-the-art predictive performance.", "keywords": "Laplace approximation;metric learning;uncertainty quantification;weight posterior;bayesian", "primary_area": "", "supplementary_material": "/attachment/bafd1ad59416ea8d2cd61e870f65fd429f21acb3.pdf", "author": "Frederik Rahb\u00e6k Warburg;Marco Miani;Silas Brack;S\u00f8ren Hauberg", "authorids": "~Frederik_Rahb\u00e6k_Warburg1;~Marco_Miani1;~Silas_Brack1;~S\u00f8ren_Hauberg1", "gender": "M;M;M;M", "homepage": "https://frederikwarburg.github.io/;https://www.linkedin.com/in/marco-miani/;;http://www2.compute.dtu.dk/~sohau/", "dblp": ";296/1592;;39/7226", "google_scholar": "0Ozzy4IAAAAJ;https://scholar.google.com/citations?hl=it;;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";;silasbrack/;", "or_profile": "~Frederik_Rahb\u00e6k_Warburg1;~Marco_Miani1;~Silas_Brack1;~S\u00f8ren_Hauberg1", "aff": "Technical University of Denmark;Technical University of Denmark;Technical University of Denmark;Technical University of Denmark", "aff_domain": "dtu.dk;dtu.dk;dtu.dk;dtu.dk", "position": "PhD student;PhD student;MS student;Professor", "bibtex": "@inproceedings{\nwarburg2023bayesian,\ntitle={Bayesian Metric Learning for Uncertainty Quantification in Image Retrieval},\nauthor={Frederik Rahb{\\ae}k Warburg and Marco Miani and Silas Brack and S{\\o}ren Hauberg},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=58XMiu8kot}\n}", "github": "", "project": "", "reviewers": "SAQp;Stu9;Z8Se", "pdf_size": 13768323, "rating": "5;6;7", "confidence": "4;4;5", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "4;2;3", "wc_summary": "65;69;65", "wc_strengths": "40;265;59", "wc_weaknesses": "70;185;163", "wc_questions": "25;121;147", "wc_limitations": "9;12;6", "wc_review": "209;652;440", "wc_reply_reviewers": "26;51;10", "wc_reply_authors": "0;32;0", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 66.33333333333333, 1.8856180831641267 ], "wc_strengths_avg": [ 121.33333333333333, 101.88337559298978 ], "wc_weaknesses_avg": [ 139.33333333333334, 49.84197249529981 ], "wc_questions_avg": [ 97.66666666666667, 52.46797965320267 ], "wc_limitations_avg": [ 9.0, 2.449489742783178 ], "wc_review_avg": [ 433.6666666666667, 180.90943099303092 ], "wc_reply_reviewers_avg": [ 29.0, 16.87206764645835 ], "wc_reply_authors_avg": [ 10.666666666666666, 15.084944665313014 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2262684557493776802&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "dtu.dk;dtu.dk;dtu.dk;dtu.dk", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Technical University of Denmark", "aff_unique_dep": "", "aff_unique_url": "https://www.tek.dk", "aff_unique_abbr": "DTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Denmark" }, { "title": "Efficient Meta Neural Heuristic for Multi-Objective Combinatorial Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72819", "id": "593fc38lhN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b1efde53be364a73914f58805a001731-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=593fc38lhN", "openreview": "https://openreview.net/forum?id=593fc38lhN", "poster": "/media/PosterPDFs/NeurIPS%202023/72819.png?t=1699956184.8729396", "slides": "https://nips.cc/virtual/2023/poster/72819", "video": "https://nips.cc/virtual/2023/poster/72819", "author_site": "Jinbiao Chen, Jiahai Wang, Zizhen Zhang, Zhiguang Cao, Te Ye, Siyuan Chen", "tldr": "", "abstract": "Recently, neural heuristics based on deep reinforcement learning have exhibited promise in solving multi-objective combinatorial optimization problems (MOCOPs). However, they are still struggling to achieve high learning efficiency and solution quality. To tackle this issue, we propose an efficient meta neural heuristic (EMNH), in which a meta-model is first trained and then fine-tuned with a few steps to solve corresponding single-objective subproblems. Specifically, for the training process, a (partial) architecture-shared multi-task model is leveraged to achieve parallel learning for the meta-model, so as to speed up the training; meanwhile, a scaled symmetric sampling method with respect to the weight vectors is designed to stabilize the training. For the fine-tuning process, an efficient hierarchical method is proposed to systematically tackle all the subproblems. Experimental results on the multi-objective traveling salesman problem (MOTSP), multi-objective capacitated vehicle routing problem (MOCVRP), and multi-objective knapsack problem (MOKP) show that, EMNH is able to outperform the state-of-the-art neural heuristics in terms of solution quality and learning efficiency, and yield competitive solutions to the strong traditional heuristics while consuming much shorter time.", "keywords": "neural heuristic;meta learning;deep reinforcement learning;multi-objective combinatorial optimization", "primary_area": "", "supplementary_material": "/attachment/35018e32d7ee240ef91629d41a2fa6473db1dc56.pdf", "author": "Jinbiao Chen;Jiahai Wang;Zizhen Zhang;Zhiguang Cao;Te Ye;Siyuan Chen", "authorids": "~Jinbiao_Chen1;~Jiahai_Wang1;zhangzzh7@mail.sysu.edu.cn;~Zhiguang_Cao1;yete@mail2.sysu.edu.cn;~Siyuan_Chen2", "gender": "M;M;;M;;M", "homepage": ";;;https://zhiguangcaosg.github.io/;;https://hilbert9221.github.io/", "dblp": ";00/2989;;178/8621;;84/5999-5", "google_scholar": ";;;https://scholar.google.com.sg/citations?user=2R-cOkYAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-7417-0430;;;0000-0002-4499-759X;;0000-0001-9272-4804", "linkedin": ";;;;;", "or_profile": "~Jinbiao_Chen1;~Jiahai_Wang1;zhangzzh7@mail.sysu.edu.cn;~Zhiguang_Cao1;yete@mail2.sysu.edu.cn;~Siyuan_Chen2", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;;Institute for Infocomm Research, A*STAR;;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;sysu.edu.cn;;i2r.a-star.edu.sg;;sysu.edu.cn", "position": "PhD student;Full Professor;;Scientist ;;PhD student", "bibtex": "@inproceedings{\nchen2023efficient,\ntitle={Efficient Meta Neural Heuristic for Multi-Objective Combinatorial Optimization},\nauthor={Jinbiao Chen and Jiahai Wang and Zizhen Zhang and Zhiguang Cao and Te Ye and Siyuan Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=593fc38lhN}\n}", "github": "", "project": "", "reviewers": "TTgk;EY2k;qYcb;R5xR;d8Lb", "pdf_size": 684921, "rating": "4;6;6;6;8", "confidence": "2;1;3;2;4", "soundness": "3;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "108;85;52;181;124", "wc_strengths": "17;79;19;86;61", "wc_weaknesses": "87;60;19;62;363", "wc_questions": "84;59;49;6;8", "wc_limitations": "23;78;10;78;13", "wc_review": "319;361;149;413;569", "wc_reply_reviewers": "34;19;20;20;79", "wc_reply_authors": "798;10;10;10;405", "reply_reviewers": "1;1;1;1;2", "reply_authors": "4;2;2;2;4", "rating_avg": [ 6.0, 1.2649110640673518 ], "confidence_avg": [ 2.4, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 110.0, 42.96510211788167 ], "wc_strengths_avg": [ 52.4, 29.254743205162473 ], "wc_weaknesses_avg": [ 118.2, 124.32763168338728 ], "wc_questions_avg": [ 41.2, 30.16885811561319 ], "wc_limitations_avg": [ 40.4, 31.00064515457703 ], "wc_review_avg": [ 362.2, 136.13875274880405 ], "wc_reply_reviewers_avg": [ 34.4, 22.983472322519066 ], "wc_reply_authors_avg": [ 246.6, 315.30023786860676 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.8, 0.9797958971132712 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6201736729460422, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7112426161313261943&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "sysu.edu.cn;sysu.edu.cn;;i2r.a-star.edu.sg;;sysu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Sun Yat-sen University;Institute for Infocomm Research", "aff_unique_dep": ";", "aff_unique_url": "http://www.sysu.edu.cn;https://www.i2r.a-star.edu.sg", "aff_unique_abbr": "SYSU;I2R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;Singapore" }, { "id": "59D5vAGhHQ", "title": "Analyzing and Improving Greedy 2-Coordinate Updates For Equality-Constrained Optimization via Steepest Descent in the 1-Norm", "track": "main", "status": "Reject", "tldr": "", "abstract": "We first consider minimizing a smooth function subject to a summation constraint over its variables. \nBy exploiting a connection between the greedy 2-coordinate update for this problem and equality-constrained steepest descent in the 1-norm, we give a convergence rate for greedy selection that is faster than random selection and independent of the problem dimension $n$ (under a proximal Polyak-Lojasiewicz assumption). We then consider minimizing with both a summation constraint and bound constraints, as they arise in the support vector machine dual problem. Existing greedy rules for this setting either only guarantee trivial progress or require $O(n^2)$ time to compute. We show that bound- and summation-constrained steepest descent in the L1-norm guarantees more progress per iteration than previous rules and can be computed in only $O(n \\log n)$.", "keywords": "Coordinate descent;SVM;LIBSVM;Steepest descent;convex optimization", "primary_area": "", "supplementary_material": "/attachment/c099e14fd9998936f1d891af39c3dafaa2bccf74.pdf", "author": "Amrutha Varshini Ramesh;Aaron Mishkin;Mark Schmidt;Yihan Zhou;Jonathan Wilder Lavington;Jennifer She", "authorids": "~Amrutha_Varshini_Ramesh1;~Aaron_Mishkin1;~Mark_Schmidt1;~Yihan_Zhou3;~Jonathan_Wilder_Lavington1;~Jennifer_She1", "gender": "F;M;;M;M;F", "homepage": ";https://www.cs.stanford.edu/~amishkin/;;https://joeyandbluewhale.github.io/;https://wilderlavington.github.io/;", "dblp": "321/6727;230/3809;35/2638;199/6805;282/4019;243/5760", "google_scholar": "dutIaCwAAAAJ;j7qgASIAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;Gjf_sd0AAAAJ", "orcid": ";0000-0002-5072-2314;;;;", "linkedin": "amrutha-varshini-ramesh;;;;;", "or_profile": "~Amrutha_Varshini_Ramesh1;~Aaron_Mishkin1;~Mark_Schmidt1;~Yihan_Zhou3;~Jonathan_Wilder_Lavington1;~Jennifer_She1", "aff": "University of British Columbia;Flatiron Institute;University of British Columbia;University of Texas at Austin;;Stanford University", "aff_domain": "ubc.ca;flatironinstitute.org;ubc.ca;cs.utexas.edu;;stanford.edu", "position": "PhD student;Intern;Assistant Professor;PhD student;;MS student", "bibtex": "@misc{\nramesh2023analyzing,\ntitle={Analyzing and Improving Greedy 2-Coordinate Updates For Equality-Constrained Optimization via Steepest Descent in the 1-Norm},\nauthor={Amrutha Varshini Ramesh and Aaron Mishkin and Mark Schmidt and Yihan Zhou and Jonathan Wilder Lavington and Jennifer She},\nyear={2023},\nurl={https://openreview.net/forum?id=59D5vAGhHQ}\n}", "github": "", "project": "", "reviewers": "pHfj;TqZB;yjwM;Ffqu", "site": "https://openreview.net/forum?id=59D5vAGhHQ", "pdf_size": 475325, "rating": "5;5;6;7", "confidence": "4;3;3;3", "soundness": "3;2;3;4", "novelty": "3;2;3;3", "presentation": "3;2;3;4", "wc_summary": "238;77;245;165", "wc_strengths": "46;36;79;169", "wc_weaknesses": "249;186;281;72", "wc_questions": "55;22;2;432", "wc_limitations": "27;1;1;1", "wc_review": "615;322;608;839", "wc_reply_reviewers": "154;117;70;121", "wc_reply_authors": "99;28;0;70", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 181.25, 67.8541634684269 ], "wc_strengths_avg": [ 82.5, 52.41421562896844 ], "wc_weaknesses_avg": [ 197.0, 79.85298992523698 ], "wc_questions_avg": [ 127.75, 176.67537321313347 ], "wc_limitations_avg": [ 7.5, 11.258330249197702 ], "wc_review_avg": [ 596.0, 183.4598048619915 ], "wc_reply_reviewers_avg": [ 115.5, 29.937434759845406 ], "wc_reply_authors_avg": [ 49.25, 38.022197464112985 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5204071239761978102&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "University of British Columbia;Flatiron Institute;University of Texas at Austin;Stanford University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ubc.ca;https://flatironinstitute.org;https://www.utexas.edu;https://www.stanford.edu", "aff_unique_abbr": "UBC;Flatiron;UT Austin;Stanford", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Austin;Stanford", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "Canada;United States" }, { "title": "trajdata: A Unified Interface to Multiple Human Trajectory Datasets", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73699", "id": "5ADv5OfQgU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/57bb67dbe17bfb660c8c63d089ea05b9-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=5ADv5OfQgU", "openreview": "https://openreview.net/forum?id=5ADv5OfQgU", "poster": "/media/PosterPDFs/NeurIPS%202023/73699.png?t=1699604248.9586158", "slides": "https://nips.cc/virtual/2023/poster/73699", "video": "https://nips.cc/virtual/2023/poster/73699", "author_site": "Boris Ivanovic, Guanyu Song, Igor Gilitschenski, Marco Pavone", "tldr": "", "abstract": "The field of trajectory forecasting has grown significantly in recent years, partially owing to the release of numerous large-scale, real-world human trajectory datasets for autonomous vehicles (AVs) and pedestrian motion tracking. While such datasets have been a boon for the community, they each use custom and unique data formats and APIs, making it cumbersome for researchers to train and evaluate methods across multiple datasets. To remedy this, we present trajdata: a unified interface to multiple human trajectory datasets. At its core, trajdata provides a simple, uniform, and efficient representation and API for trajectory and map data. As a demonstration of its capabilities, in this work we conduct a comprehensive empirical evaluation of existing trajectory datasets, providing users with a rich understanding of the data underpinning much of current pedestrian and AV motion forecasting research, and proposing suggestions for future datasets from these insights. trajdata is permissively licensed (Apache 2.0) and can be accessed online at https://github.com/NVlabs/trajdata.", "keywords": "multi-dataset benchmarking;data standardization;autonomous vehicles;pedestrian motion", "primary_area": "", "supplementary_material": "/attachment/de163e8bb5db98b8cfb9414e6c7bdc437c9e4eaf.zip", "author": "Boris Ivanovic;Guanyu Song;Igor Gilitschenski;Marco Pavone", "authorids": "~Boris_Ivanovic1;~Guanyu_Song1;~Igor_Gilitschenski1;~Marco_Pavone1", "gender": ";M;M;M", "homepage": "http://www.borisivanovic.com/;;https://www.gilitschenski.org/igor;https://web.stanford.edu/~pavone/", "dblp": "203/8356;;129/1281;91/3382-1.html", "google_scholar": "ey9AQcEAAAAJ;;Nuw1Y4oAAAAJ;RhOpyXcAAAAJ", "orcid": "0000-0002-8698-202X;;;", "linkedin": "boris-ivanovic-a3103064;guanyu-song-a53a811ba/;igorgilitschenski/;", "or_profile": "~Boris_Ivanovic1;~Guanyu_Song1;~Igor_Gilitschenski1;~Marco_Pavone1", "aff": "NVIDIA;University of Toronto;University of Toronto;Stanford University", "aff_domain": "nvidia.com;utoronto.ca;toronto.edu;stanford.edu", "position": "Researcher;Undergrad student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nivanovic2023trajdata,\ntitle={trajdata: A Unified Interface to Multiple Human Trajectory Datasets},\nauthor={Boris Ivanovic and Guanyu Song and Igor Gilitschenski and Marco Pavone},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=5ADv5OfQgU}\n}", "github": "", "project": "", "reviewers": "65pW;fsaD;8txH;LYJj", "pdf_size": 2057962, "rating": "6;7;7;7", "confidence": "3;5;4;4", "wc_summary_and_contributions": "102;65;141;37", "wc_strengths": "53;162;137;46", "wc_improvement": "63;88;90;78", "wc_limitations": "10;54;32;85", "wc_correctness": "13;32;23;19", "wc_clarity": "8;9;8;5", "wc_relation_to_prior_work": "10;9;1;41", "wc_documentation": "7;16;1;38", "wc_additional_feedback": "1;1;1;1", "wc_review": "267;436;434;350", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "269;217;47;380", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 86.25, 39.124001584705006 ], "wc_strengths_avg": [ 99.5, 50.835519078691426 ], "wc_improvement_avg": [ 79.75, 10.685855136581255 ], "wc_limitations_avg": [ 45.25, 27.725214156071004 ], "wc_correctness_avg": [ 21.75, 6.905613658466566 ], "wc_clarity_avg": [ 7.5, 1.5 ], "wc_relation_to_prior_work_avg": [ 15.25, 15.270478054075452 ], "wc_documentation_avg": [ 15.5, 14.044571905188139 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 371.75, 69.72938763534354 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 228.25, 120.06951111751893 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=450895103749966390&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "nvidia.com;utoronto.ca;toronto.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "NVIDIA;University of Toronto;Stanford University", "aff_unique_dep": "NVIDIA Corporation;;", "aff_unique_url": "https://www.nvidia.com;https://www.utoronto.ca;https://www.stanford.edu", "aff_unique_abbr": "NVIDIA;U of T;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;Canada" }, { "title": "Common Ground in Cooperative Communication", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72818", "id": "5AMa9fiyJq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/10b7e27c8eb9571fbbd2ae6a9f8c3855-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5AMa9fiyJq", "openreview": "https://openreview.net/forum?id=5AMa9fiyJq", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72818", "video": "https://nips.cc/virtual/2023/poster/72818", "author_site": "Xiaoran Hao, Yash Jhaveri, Patrick Shafto", "tldr": "", "abstract": "Cooperative communication plays a fundamental role in theories of human-human interaction--cognition, culture, development, language, etc.--as well as human-robot interaction. The core challenge in cooperative communication is the problem of common ground: having enough shared knowledge and understanding to successfully communicate. Prior models of cooperative communication, however, uniformly assume the strongest form of common ground, perfect and complete knowledge sharing, and, therefore, fail to capture the core challenge of cooperative communication. We propose a general theory of cooperative communication that is mathematically principled and explicitly defines a spectrum of common ground possibilities, going well beyond that of perfect and complete knowledge sharing, on spaces that permit arbitrary representations of data and hypotheses. Our framework is a strict generalization of prior models of cooperative communication. After considering a parametric form of common ground and viewing the data selection and hypothesis inference processes of communication as encoding and decoding, we establish a connection to variational autoencoding, a powerful model in modern machine learning. Finally, we carry out a series of empirical simulations to support and elaborate on our theoretical results.", "keywords": "Cooperative Communication;Common Ground;Bayesian Theory", "primary_area": "", "supplementary_material": "/attachment/e4d16a61755e5df41f31d8614daab79762ad4f0e.pdf", "author": "Xiaoran Hao;Yash Jhaveri;Patrick Shafto", "authorids": "~Xiaoran_Hao1;~Yash_Jhaveri1;~Patrick_Shafto2", "gender": "M;;", "homepage": ";https://sasn.rutgers.edu/about-us/faculty-staff/yash-jhaveri;http://www.shaftolab.com", "dblp": "187/8349;;03/5979", "google_scholar": "https://scholar.google.com/citations?hl=en;;HUi6F7wAAAAJ", "orcid": ";;", "linkedin": "xiaoranhao/;;", "or_profile": "~Xiaoran_Hao1;~Yash_Jhaveri1;~Patrick_Shafto1", "aff": "Rutgers University, Newark;Rutgers University;Rutgers University", "aff_domain": "rutgers.edu;rutgers.edu;rutgers.edu", "position": "PhD student;Postdoc;Professor", "bibtex": "@inproceedings{\nhao2023common,\ntitle={Common Ground in Cooperative Communication},\nauthor={Xiaoran Hao and Yash Jhaveri and Patrick Shafto},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5AMa9fiyJq}\n}", "github": "", "project": "", "reviewers": "bSj9;KfB9;Xkzf;UbJG", "pdf_size": 2265338, "rating": "5;5;6;7", "confidence": "3;3;3;2", "soundness": "3;2;3;3", "novelty": "3;3;2;3", "presentation": "3;1;2;2", "wc_summary": "160;122;329;98", "wc_strengths": "83;129;173;59", "wc_weaknesses": "94;151;150;84", "wc_questions": "84;91;150;103", "wc_limitations": "13;57;171;47", "wc_review": "434;550;973;391", "wc_reply_reviewers": "0;0;366;0", "wc_reply_authors": "0;0;112;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 177.25, 90.3586603486351 ], "wc_strengths_avg": [ 111.0, 43.749285708454714 ], "wc_weaknesses_avg": [ 119.75, 30.95460385790779 ], "wc_questions_avg": [ 107.0, 25.739075352467502 ], "wc_limitations_avg": [ 72.0, 59.43904440685432 ], "wc_review_avg": [ 587.0, 230.32042896799234 ], "wc_reply_reviewers_avg": [ 91.5, 158.48264889255228 ], "wc_reply_authors_avg": [ 28.0, 48.49742261192856 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14623275552003006596&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 8, "email": "rutgers.edu;rutgers.edu;rutgers.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Rutgers University", "aff_unique_dep": "", "aff_unique_url": "https://www.rutgers.edu", "aff_unique_abbr": "Rutgers", "aff_campus_unique_index": "0", "aff_campus_unique": "Newark;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Spectral Theory of Neural Prediction and Alignment", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72817", "id": "5B1ZK60jWn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9308d1b7d4ae2d3e2e67ae94b1078bf7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5B1ZK60jWn", "openreview": "https://openreview.net/forum?id=5B1ZK60jWn", "poster": "/media/PosterPDFs/NeurIPS%202023/72817.png?t=1702311953.2222102", "slides": "https://nips.cc/virtual/2023/poster/72817", "video": "https://nips.cc/virtual/2023/poster/72817", "author_site": "Abdulkadir Canatar, Abdulkadir Canatar, Jenelle Feather, Albert Wakhloo, SueYeon Chung", "tldr": "", "abstract": "The representations of neural networks are often compared to those of biological systems by performing regression between the neural network responses and those measured from biological systems. Many different state-of-the-art deep neural networks yield similar neural predictions, but it remains unclear how to differentiate among models that perform equally well at predicting neural responses. To gain insight into this, we use a recent theoretical framework that relates the generalization error from regression to the spectral properties of the model and the target. We apply this theory to the case of regression between model activations and neural responses and decompose the neural prediction error in terms of the model eigenspectra, alignment of model eigenvectors and neural responses, and the training set size. Using this decomposition, we introduce geometrical measures to interpret the neural prediction error. We test a large number of deep neural networks that predict visual cortical activity and show that there are multiple types of geometries that result in low neural prediction error as measured via regression. The work demonstrates that carefully decomposing representational metrics can provide interpretability of how models are capturing neural activity and points the way towards improved models of neural activity.", "keywords": "computational neuroscience;neural manifolds;neuro-AI;statistical physics;representational geometry", "primary_area": "", "supplementary_material": "", "author": "Abdulkadir Canatar;Jenelle Feather;Albert Wakhloo;SueYeon Chung", "authorids": "~Abdulkadir_Canatar1;~Jenelle_Feather1;awakhloo@flatironinstitute.org;~SueYeon_Chung1", "gender": "M;;;F", "homepage": ";https://jenellefeather.com;;https://sites.google.com/site/sueyeonchung/", "dblp": "258/0594;243/9963.html;;173/5418", "google_scholar": "_F4TER8AAAAJ;TtTfnKIAAAAJ;;h7yVv0QAAAAJ", "orcid": "0000-0002-0140-5718;0000-0001-9753-2393;;", "linkedin": ";;;", "or_profile": "~Abdulkadir_Canatar1;~Jenelle_Feather1;awakhloo@flatironinstitute.org;~SueYeon_Chung1", "aff": "Flatiron Institute;Flatiron Institute;;Flatiron Institute / Simons Foundation", "aff_domain": "flatironinstitute.org;flatironinstitute.org;;simonsfoundation.org", "position": "Postdoc;Postdoc;;Principal Investigator", "bibtex": "@inproceedings{\ncanatar2023a,\ntitle={A Spectral Theory of Neural Prediction and Alignment},\nauthor={Abdulkadir Canatar and Jenelle Feather and Albert Wakhloo and SueYeon Chung},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5B1ZK60jWn}\n}", "github": "", "project": "", "reviewers": "EnLw;RZx6;t8VM;3d9A;Q3vo;PvxB", "pdf_size": 8033440, "rating": "4;5;7;7;7;9", "confidence": "4;4;3;3;3;4", "soundness": "2;2;3;3;3;4", "novelty": "2;2;3;3;2;4", "presentation": "2;2;2;2;2;4", "wc_summary": "24;66;71;131;446;259", "wc_strengths": "19;53;44;35;48;38", "wc_weaknesses": "144;1006;516;99;232;345", "wc_questions": "66;180;58;1;77;56", "wc_limitations": "36;19;3;12;55;88", "wc_review": "289;1324;692;278;858;786", "wc_reply_reviewers": "14;57;11;24;52;0", "wc_reply_authors": "0;21;11;0;42;0", "reply_reviewers": "1;1;1;1;1;0", "reply_authors": "1;2;2;1;2;1", "rating_avg": [ 6.5, 1.6072751268321592 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "novelty_avg": [ 2.6666666666666665, 0.7453559924999298 ], "presentation_avg": [ 2.3333333333333335, 0.7453559924999298 ], "wc_summary_avg": [ 166.16666666666666, 145.7525490533718 ], "wc_strengths_avg": [ 39.5, 10.935416468216166 ], "wc_weaknesses_avg": [ 390.3333333333333, 307.5487314592961 ], "wc_questions_avg": [ 73.0, 53.559935275041795 ], "wc_limitations_avg": [ 35.5, 28.91798748184251 ], "wc_review_avg": [ 704.5, 357.9300443755269 ], "wc_reply_reviewers_avg": [ 26.333333333333332, 21.15550887016324 ], "wc_reply_authors_avg": [ 12.333333333333334, 15.347819244295117 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3110855084191276, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10197767208019494450&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "flatironinstitute.org;flatironinstitute.org;;simonsfoundation.org", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Flatiron Institute", "aff_unique_dep": "", "aff_unique_url": "https://flatironinstitute.org", "aff_unique_abbr": "Flatiron", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Adaptive Normalization for Non-stationary Time Series Forecasting: A Temporal Slice Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72816", "id": "5BqDSw8r5j", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2e19dab94882bc95ed094c4399cfda02-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5BqDSw8r5j", "openreview": "https://openreview.net/forum?id=5BqDSw8r5j", "poster": "/media/PosterPDFs/NeurIPS%202023/72816.png?t=1701181349.9899201", "slides": "https://nips.cc/virtual/2023/poster/72816", "video": "https://nips.cc/virtual/2023/poster/72816", "author_site": "Zhiding Liu, Mingyue Cheng, Zhi Li, Zhi Li, Zhenya Huang, Qi Liu, Yanhu Xie, Enhong Chen", "tldr": "", "abstract": "Deep learning models have progressively advanced time series forecasting due to their powerful capacity in capturing sequence dependence. Nevertheless, it is still challenging to make accurate predictions due to the existence of non-stationarity in real-world data, denoting the data distribution rapidly changes over time. To mitigate such a dilemma, several efforts have been conducted by reducing the non-stationarity with normalization operation. However, these methods typically overlook the distribution discrepancy between the input series and the horizon series, and assume that all time points within the same instance share the same statistical properties, which is too ideal and may lead to suboptimal relative improvements. To this end, we propose a novel slice-level adaptive normalization, referred to \\textbf{SAN}, which is a novel scheme for empowering time series forecasting with more flexible normalization and denormalization. SAN includes two crucial designs. First, SAN tries to eliminate the non-stationarity of time series in units of a local temporal slice (i.e., sub-series) rather than a global instance. Second, SAN employs a slight network module to independently model the evolving trends of statistical properties of raw time series. Consequently, SAN could serve as a general model-agnostic plugin and better alleviate the impact of the non-stationary nature of time series data. We instantiate the proposed SAN on four widely used forecasting models and test their prediction results on benchmark datasets to evaluate its effectiveness. Also, we report some insightful findings to deeply analyze and understand our proposed SAN. We make our codes publicly available.", "keywords": "Time series forecasting;deep learning;normalization", "primary_area": "", "supplementary_material": "/attachment/8293c3fd4a5851e80ad88dd50ef7fd1b138ec69c.zip", "author": "Zhiding Liu;Mingyue Cheng;Zhi Li;Zhenya Huang;Qi Liu;Yanhu Xie;Enhong Chen", "authorids": "~Zhiding_Liu1;~Mingyue_Cheng1;~Zhi_Li5;~Zhenya_Huang2;~Qi_Liu3;~Yanhu_Xie1;~Enhong_Chen1", "gender": "M;M;;M;M;M;M", "homepage": "https://github.com/icantnamemyself;https://mingyue-cheng.github.io/;;http://staff.ustc.edu.cn/~huangzhy/;http://staff.ustc.edu.cn/~qiliuql/;http://webvpn.ustc.edu.cn/https/77726476706e69737468656265737421fbf952d2243e635930068cb8/kcms2/author/detail?v=3uoqIhG8C45UgIk_lOaz15_RusC7NDQKI5VsYxVE1VeJdzfw85yP7-560rMDthvO_wTg1_0eHvciTyQkXiDdZg_bD0f87eCgHDhpUGpTbDxVRAl_12XSZYwcQH4n8NjM&uniplatform=NZKPT;http://staff.ustc.edu.cn/~cheneh", "dblp": "319/0321;240/6202;;178/8690;95/2446-3;;07/258", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;74IhSx8AAAAJ;;dVZuU90AAAAJ;5EoHAFwAAAAJ;;Q9h02J0AAAAJ", "orcid": "0000-0003-0994-473X;0000-0001-9873-7681;;0000-0003-1661-0420;0000-0001-6956-5550;0000-0003-1704-8249;0000-0002-4835-4102", "linkedin": ";;;;;;", "or_profile": "~Zhiding_Liu1;~Mingyue_Cheng1;~Zhi_Li5;~Zhenya_Huang2;~Qi_Liu3;~Yanhu_Xie1;~Enhong_Chen1", "aff": "University of Science and Technology of China;University of Science and Technology of China;;University of Science and Technology of China;University of Science and Technology of China;The First Affiliated Hospital of University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "mail.ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "MS student;PhD student;;Associate Professor;Full Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nliu2023adaptive,\ntitle={Adaptive Normalization for Non-stationary Time Series Forecasting: A Temporal Slice Perspective},\nauthor={Zhiding Liu and Mingyue Cheng and Zhi Li and Zhenya Huang and Qi Liu and Yanhu Xie and Enhong Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5BqDSw8r5j}\n}", "github": "", "project": "", "reviewers": "uWXn;3LTc;jRLw;roaz", "pdf_size": 677782, "rating": "5;6;6;6", "confidence": "2;3;4;4", "soundness": "2;3;3;2", "novelty": "2;3;3;2", "presentation": "2;2;3;3", "wc_summary": "168;59;86;85", "wc_strengths": "97;5;163;32", "wc_weaknesses": "475;176;146;97", "wc_questions": "8;30;2;62", "wc_limitations": "38;55;2;72", "wc_review": "786;325;399;348", "wc_reply_reviewers": "64;203;202;19", "wc_reply_authors": "0;200;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 99.5, 41.00304866714181 ], "wc_strengths_avg": [ 74.25, 61.18568051431642 ], "wc_weaknesses_avg": [ 223.5, 147.91636150203263 ], "wc_questions_avg": [ 25.5, 23.510635891017493 ], "wc_limitations_avg": [ 41.75, 25.907286619790966 ], "wc_review_avg": [ 464.5, 187.53999573424332 ], "wc_reply_reviewers_avg": [ 122.0, 82.05790638323647 ], "wc_reply_authors_avg": [ 50.0, 86.60254037844386 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9218949111426052630&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "mail.ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Physion++: Evaluating Physical Scene Understanding that Requires Online Inference of Different Physical Properties", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73698", "id": "5Exz7eaBXH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d3e8011c912e651ab2a76e7935a1e464-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=5Exz7eaBXH", "openreview": "https://openreview.net/forum?id=5Exz7eaBXH", "poster": "/media/PosterPDFs/NeurIPS%202023/73698.png?t=1701763496.4125628", "slides": "https://nips.cc/virtual/2023/poster/73698", "video": "https://nips.cc/virtual/2023/poster/73698", "author_site": "Hsiao-Yu Tung, Mingyu Ding, Zhenfang Chen, Daniel Bear, Chuang Gan, Josh Tenenbaum, Dan Yamins, Judith Fan, Kevin Smith", "tldr": "", "abstract": "General physical scene understanding requires more than simply localizing and recognizing objects -- it requires knowledge that objects can have different latent properties (e.g., mass or elasticity), and that those properties affect the outcome of physical events. While there has been great progress in physical and video prediction models in recent years, benchmarks to test their performance typically do not require an understanding that objects have individual physical properties, or at best test only those properties that are directly observable (e.g., size or color). This work proposes a novel dataset and benchmark, termed Physion++, that rigorously evaluates visual physical prediction in artificial systems under circumstances where those predictions rely on accurate estimates of the latent physical properties of objects in the scene. Specifically, we test scenarios where accurate prediction relies on estimates of properties such as mass, friction, elasticity, and deformability, and where the values of those properties can only be inferred by observing how objects move and interact with other objects or fluids. We evaluate the performance of a number of state-of-the-art prediction models that span a variety of levels of learning vs. built-in knowledge, and compare that performance to a set of human predictions. We find that models that have been trained using standard regimes and datasets do not spontaneously learn to make inferences about latent properties, but also that models that encode objectness and physical states tend to make better predictions. However, there is still a huge gap between all models and human performance, and all models' predictions correlate poorly with those made by humans, suggesting that no state-of-the-art model is learning to make physical predictions in a human-like way. These results show that current deep learning models that succeed in some settings nevertheless fail to achieve human-level physical prediction in other cases, especially those where latent property inference is required. Project page: https://dingmyu.github.io/physion_v2/", "keywords": "intuitive physics;visual dynamics learning;physical property inference", "primary_area": "", "supplementary_material": "/attachment/c7626bc687f48e0c3db966b5ccadc8667a9c7813.pdf", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\ntung2023physion,\ntitle={Physion++: Evaluating Physical Scene Understanding that Requires Online Inference of Different Physical Properties},\nauthor={Hsiao-Yu Tung and Mingyu Ding and Zhenfang Chen and Daniel Bear and Chuang Gan and Joshua B. Tenenbaum and Daniel LK Yamins and Judith E Fan and Kevin A. Smith},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=5Exz7eaBXH}\n}", "github": "", "project": "", "reviewers": "vxoX;WLS8;zV2y;MSm3", "pdf_size": 800925, "rating": "6;6;7;7", "confidence": "2;3;4;3", "wc_summary_and_contributions": "51;75;127;24", "wc_strengths": "35;21;166;29", "wc_improvement": "182;90;346;126", "wc_limitations": "12;107;25;1", "wc_correctness": "10;6;15;29", "wc_clarity": "1;12;6;1", "wc_relation_to_prior_work": "1;10;11;61", "wc_documentation": "1;9;21;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "294;331;718;273", "wc_reply_reviewers": "18;0;69;0", "wc_reply_authors": "981;1060;1366;1222", "reply_reviewers": "1;0;1;0", "reply_authors": "3;2;3;3", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 69.25, 37.91025586830033 ], "wc_strengths_avg": [ 62.75, 59.8179529907201 ], "wc_improvement_avg": [ 186.0, 98.02040603874276 ], "wc_limitations_avg": [ 36.25, 41.72154719087009 ], "wc_correctness_avg": [ 15.0, 8.689073598491383 ], "wc_clarity_avg": [ 5.0, 4.527692569068709 ], "wc_relation_to_prior_work_avg": [ 20.75, 23.562417108607512 ], "wc_documentation_avg": [ 8.0, 8.18535277187245 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 404.0, 182.47328571601926 ], "wc_reply_reviewers_avg": [ 21.75, 28.252212302756046 ], "wc_reply_authors_avg": [ 1157.25, 148.5687971951042 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3609127037978302385&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "", "author_num": 1 }, { "title": "Provable Guarantees for Neural Networks via Gradient Feature Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72815", "id": "5F04bU79eK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aebec8058f23a445353c83ede0e1ec48-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5F04bU79eK", "openreview": "https://openreview.net/forum?id=5F04bU79eK", "poster": "/media/PosterPDFs/NeurIPS%202023/72815.png?t=1697680939.2714145", "slides": "https://nips.cc/virtual/2023/poster/72815", "video": "https://nips.cc/virtual/2023/poster/72815", "author_site": "Zhenmei Shi, Junyi Wei, Yingyu Liang", "tldr": "", "abstract": "Neural networks have achieved remarkable empirical performance, while the current theoretical analysis is not adequate for understanding their success, e.g., the Neural Tangent Kernel approach fails to capture their key feature learning ability, while recent analyses on feature learning are typically problem-specific. This work proposes a unified analysis framework for two-layer networks trained by gradient descent. The framework is centered around the principle of feature learning from gradients, and its effectiveness is demonstrated by applications in several prototypical problems, such as mixtures of Gaussians and parity functions.\nThe framework also sheds light on interesting network learning phenomena such as feature learning beyond kernels and the lottery ticket hypothesis.", "keywords": "neural networks;gradient descent;feature learning;provable guarantees;theoretical analysis", "primary_area": "", "supplementary_material": "", "author": "Zhenmei Shi;Junyi Wei;Yingyu Liang", "authorids": "~Zhenmei_Shi1;~Junyi_Wei1;~Yingyu_Liang1", "gender": "M;F;", "homepage": "http://zhmeishi.github.io/;;", "dblp": "246/5216;166/6146;", "google_scholar": "0oeNnzMAAAAJ;Kb1GL40AAAAJ;", "orcid": ";;", "linkedin": "zhenmei-shi-56408a113/;Junyi-Jenny-Wei-04ba979b/;", "or_profile": "~Zhenmei_Shi1;~Junyi_Wei1;~Yingyu_Liang1", "aff": "University of Wisconsin - Madison;University of Wisconsin, Madison;", "aff_domain": "wisc.edu;wisc.edu;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\nshi2023provable,\ntitle={Provable Guarantees for Neural Networks via Gradient Feature Learning},\nauthor={Zhenmei Shi and Junyi Wei and Yingyu Liang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5F04bU79eK}\n}", "github": "", "project": "", "reviewers": "H9VP;zxbj;Sj27;3J3m", "pdf_size": 1163910, "rating": "5;6;7;7", "confidence": "3;3;3;3", "soundness": "3;4;4;4", "novelty": "3;3;4;3", "presentation": "2;2;4;3", "wc_summary": "78;40;140;94", "wc_strengths": "62;71;125;86", "wc_weaknesses": "158;63;134;68", "wc_questions": "132;68;16;109", "wc_limitations": "62;9;1;23", "wc_review": "492;251;416;380", "wc_reply_reviewers": "54;12;32;21", "wc_reply_authors": "40;15;21;19", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 88.0, 35.86084215408221 ], "wc_strengths_avg": [ 86.0, 24.093567606313517 ], "wc_weaknesses_avg": [ 105.75, 41.172654760168186 ], "wc_questions_avg": [ 81.25, 44.09861108923953 ], "wc_limitations_avg": [ 23.75, 23.44541533008106 ], "wc_review_avg": [ 384.75, 87.16471476463397 ], "wc_reply_reviewers_avg": [ 29.75, 15.690363284513205 ], "wc_reply_authors_avg": [ 23.75, 9.627434756984853 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15027933686957532275&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "wisc.edu;wisc.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Wisconsin-Madison;University of Wisconsin", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "UW-Madison;UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Squeeze, Recover and Relabel: Dataset Condensation at ImageNet Scale From A New Perspective", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72814", "id": "5Fgdk3hZpb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e91fb65c6324a984ea9ef39a5b84af04-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5Fgdk3hZpb", "openreview": "https://openreview.net/forum?id=5Fgdk3hZpb", "poster": "/media/PosterPDFs/NeurIPS%202023/72814.png?t=1698446297.2232969", "slides": "https://nips.cc/virtual/2023/poster/72814", "video": "https://nips.cc/virtual/2023/poster/72814", "author_site": "Zeyuan Yin, Eric Xing, Zhiqiang Shen", "tldr": "", "abstract": "We present a new dataset condensation framework termed Squeeze, Recover and Relabel (SRe$^2$L) that decouples the bilevel optimization of model and synthetic data during training, to handle varying scales of datasets, model architectures and image resolutions for efficient dataset condensation. The proposed method demonstrates flexibility across diverse dataset scales and exhibits multiple advantages in terms of arbitrary resolutions of synthesized images, low training cost and memory consumption with high-resolution synthesis, and the ability to scale up to arbitrary evaluation network architectures. Extensive experiments are conducted on Tiny-ImageNet and full ImageNet-1K datasets. Under 50 IPC, our approach achieves the highest 42.5\\% and 60.8\\% validation accuracy on Tiny-ImageNet and ImageNet-1K, outperforming all previous state-of-the-art methods by margins of 14.5\\% and 32.9\\%, respectively. Our approach also surpasses MTT in terms of speed by approximately 52$\\times$ (ConvNet-4) and 16$\\times$ (ResNet-18) faster with less memory consumption of 11.6$\\times$ and 6.4$\\times$ during data synthesis. Our code and condensed datasets of 50, 200 IPC with 4K recovery budget are available at https://github.com/VILA-Lab/SRe2L.", "keywords": "Dataset Condensation and Distillation;ImageNet Scale", "primary_area": "", "supplementary_material": "/attachment/2dff5b9f7344118296971d608f637e43e828da91.zip", "author": "Zeyuan Yin;Eric Xing;Zhiqiang Shen", "authorids": "~Zeyuan_Yin1;~Eric_Xing1;~Zhiqiang_Shen1", "gender": "M;M;", "homepage": "https://zeyuanyin.github.io/;http://www.cs.cmu.edu/~epxing/;", "dblp": "302/4051-1.html;36/3855;", "google_scholar": "QyV0vm8AAAAJ;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zeyuan_Yin1;~Eric_Xing1;~Zhiqiang_Shen1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;School of Computer Science, Carnegie Mellon University;", "aff_domain": "mbzuai.ac.ae;cs.cmu.edu;", "position": "MS student;Full Professor;", "bibtex": "@inproceedings{\nyin2023squeeze,\ntitle={Squeeze, Recover and Relabel: Dataset Condensation at ImageNet Scale From A New Perspective},\nauthor={Zeyuan Yin and Eric Xing and Zhiqiang Shen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5Fgdk3hZpb}\n}", "github": "", "project": "", "reviewers": "MBn1;7q3V;RJ8s;4huB;QGHe", "pdf_size": 3216589, "rating": "5;6;6;7;8", "confidence": "3;4;3;4;5", "soundness": "3;3;2;3;4", "novelty": "2;3;2;4;4", "presentation": "3;3;2;3;4", "wc_summary": "118;89;39;111;71", "wc_strengths": "52;96;30;128;48", "wc_weaknesses": "48;100;63;109;171", "wc_questions": "82;63;43;2;22", "wc_limitations": "8;9;1;1;32", "wc_review": "308;357;176;351;344", "wc_reply_reviewers": "0;49;19;118;313", "wc_reply_authors": "0;24;0;15;658", "reply_reviewers": "0;1;1;1;2", "reply_authors": "1;2;1;2;3", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 85.6, 28.6048946860498 ], "wc_strengths_avg": [ 70.8, 35.902089075707 ], "wc_weaknesses_avg": [ 98.2, 42.845769919561484 ], "wc_questions_avg": [ 42.4, 28.429562078934666 ], "wc_limitations_avg": [ 10.2, 11.408768557561329 ], "wc_review_avg": [ 307.2, 67.7743314242199 ], "wc_reply_reviewers_avg": [ 99.8, 113.89012248654403 ], "wc_reply_authors_avg": [ 139.4, 259.4622130484514 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8910421112136307, "gs_citation": 81, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11730713811655548339&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mbzuai.ac.ae;cs.cmu.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Carnegie Mellon University", "aff_unique_dep": ";School of Computer Science", "aff_unique_url": "https://mbzuai.ac.ae;https://www.cmu.edu", "aff_unique_abbr": "MBZUAI;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;1", "aff_country_unique": "United Arab Emirates;United States" }, { "title": "The Waymo Open Sim Agents Challenge", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73697", "id": "5FnttJZQFn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b96ce67b2f2d45e4ab315e13a6b5b9c5-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=5FnttJZQFn", "openreview": "https://openreview.net/forum?id=5FnttJZQFn", "poster": "/media/PosterPDFs/NeurIPS%202023/73697.png?t=1702031039.1984158", "slides": "https://nips.cc/virtual/2023/poster/73697", "video": "https://nips.cc/virtual/2023/poster/73697", "author_site": "Nico Montali, John Lambert, Paul Mougin, Alex Kuefler, Nicholas Rhinehart, Michelle Li, Cole Gulino, Tristan Emrich, Zoey Yang, Shimon Whiteson, Brandyn White, Dragomir Anguelov", "tldr": "", "abstract": "Simulation with realistic, interactive agents represents a key task for autonomous vehicle software development. In this work, we introduce the Waymo Open Sim Agents Challenge (WOSAC). WOSAC is the first public challenge to tackle this task and propose corresponding metrics. The goal of the challenge is to stimulate the design of realistic simulators that can be used to evaluate and train a behavior model for autonomous driving. We outline our evaluation methodology, present results for a number of different baseline simulation agent methods, and analyze several submissions to the 2023 competition which ran from March 16, 2023 to May 23, 2023. The WOSAC evaluation server remains open for submissions and we discuss open problems for the task.", "keywords": "simulation;autonomous driving", "primary_area": "", "supplementary_material": "", "author": "Nico Montali;John Lambert;Paul Mougin;Alex Kuefler;Nicholas Rhinehart;Michelle Li;Cole Gulino;Tristan Emrich;Zoey Zeyu Yang;Shimon Whiteson;Brandyn White;Dragomir Anguelov", "authorids": "~Nico_Montali1;~John_Lambert1;~Paul_Mougin1;alexkuefler@waymo.com;~Nicholas_Rhinehart1;~Michelle_Li3;~Cole_Gulino1;wosac_author1@gmail.com;~Zoey_Zeyu_Yang1;~Shimon_Whiteson1;~Brandyn_White1;~Dragomir_Anguelov1", "gender": "M;M;;;M;;M;;;;M;M", "homepage": ";https://johnwlambert.github.io/;;;https://leaf.utias.utoronto.ca/;;;;;;;", "dblp": ";47/3516;;;153/2193;;;;;https://dblp.uni-trier.de/pers/w/Whiteson:Shimon.html;63/3079;a/DragomirAnguelov", "google_scholar": "xPWCLvEAAAAJ;6GhZedEAAAAJ;;;xUGZX_MAAAAJ;;;;;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;;;;;;", "linkedin": ";;https://uk.linkedin.com/in/paul-mougin-763775118;;;;colegulino/;;;;;dragomiranguelov/", "or_profile": "~Nico_Montali1;~John_Lambert1;~Paul_Mougin1;alexkuefler@waymo.com;~Nicholas_Rhinehart1;~Michelle_Li3;~Cole_Gulino1;wosac_author1@gmail.com;~Zoey_Zeyu_Yang1;~Shimon_Whiteson1;~Brandyn_White1;~Dragomir_Anguelov1", "aff": "Google;Waymo;;;Waymo Research;;;;;University of Oxford;University of Central Florida;Waymo", "aff_domain": "google.com;google.com;;;waymo.com;;;;;ox.ac.uk;ucf.edu;waymo.com", "position": "Researcher;Researcher;;;Researcher;;;;;Professor;Undergrad student;Researcher", "bibtex": "@inproceedings{\nmontali2023the,\ntitle={The Waymo Open Sim Agents Challenge},\nauthor={Nico Montali and John Lambert and Paul Mougin and Alex Kuefler and Nicholas Rhinehart and Michelle Li and Cole Gulino and Tristan Emrich and Zoey Zeyu Yang and Shimon Whiteson and Brandyn White and Dragomir Anguelov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=5FnttJZQFn}\n}", "github": "", "project": "", "reviewers": "xmgp;9P4x;TuS8;58Fn;ZL1T", "pdf_size": 1719585, "rating": "6;6;8;8;10", "confidence": "4;4;5;4;4", "wc_summary_and_contributions": "31;37;158;14;63", "wc_strengths": "39;22;48;40;75", "wc_improvement": "156;24;260;46;52", "wc_limitations": "18;3;2;30;57", "wc_correctness": "23;1;9;8;11", "wc_clarity": "14;1;3;14;25", "wc_relation_to_prior_work": "42;1;7;8;18", "wc_documentation": "7;1;1;16;12", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "331;91;489;177;314", "wc_reply_reviewers": "13;12;0;0;0", "wc_reply_authors": "680;225;560;102;340", "reply_reviewers": "1;1;0;0;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 7.6, 1.4966629547095764 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 60.6, 51.18046502328794 ], "wc_strengths_avg": [ 44.8, 17.31357848626332 ], "wc_improvement_avg": [ 107.6, 88.81801619041039 ], "wc_limitations_avg": [ 22.0, 20.32732151563506 ], "wc_correctness_avg": [ 10.4, 7.144228439796701 ], "wc_clarity_avg": [ 11.4, 8.685620300243386 ], "wc_relation_to_prior_work_avg": [ 15.2, 14.469277798148738 ], "wc_documentation_avg": [ 7.4, 5.953150426454887 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 280.4, 136.94319990419385 ], "wc_reply_reviewers_avg": [ 5.0, 6.131883886702357 ], "wc_reply_authors_avg": [ 381.4, 212.2730317303637 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.13363062095621217, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10051394647703391721&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "google.com;google.com;;;waymo.com;;;;;ox.ac.uk;ucf.edu;waymo.com", "author_num": 12, "aff_unique_index": "0;1;1;2;3;1", "aff_unique_norm": "Google;Waymo;University of Oxford;University of Central Florida", "aff_unique_dep": "Google;;;", "aff_unique_url": "https://www.google.com;https://www.waymo.com;https://www.ox.ac.uk;https://www.ucf.edu", "aff_unique_abbr": "Google;Waymo;Oxford;UCF", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Diffusion Model for Graph Inverse Problems: Towards Effective Source Localization on Complex Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72813", "id": "5Fr8Nwi5KF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/46ab9d9645b6975b947231ddb48da1ab-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5Fr8Nwi5KF", "openreview": "https://openreview.net/forum?id=5Fr8Nwi5KF", "poster": "/media/PosterPDFs/NeurIPS%202023/72813.png?t=1699854954.0283651", "slides": "https://nips.cc/virtual/2023/poster/72813", "video": "https://nips.cc/virtual/2023/poster/72813", "author_site": "Xin Yan, Hui Fang, Qiang He", "tldr": "", "abstract": "Information diffusion problems, such as the spread of epidemics or rumors, are widespread in society. The inverse problems of graph diffusion, which involve locating the sources and identifying the paths of diffusion based on currently observed diffusion graphs, are crucial to controlling the spread of information. The problem of localizing the source of diffusion is highly ill-posed, presenting a major obstacle in accurately assessing the uncertainty involved. Besides, while comprehending how information diffuses through a graph is crucial, there is a scarcity of research on reconstructing the paths of information propagation. To tackle these challenges, we propose a probabilistic model called DDMSL (Discrete Diffusion Model for Source Localization). Our approach is based on the natural diffusion process of information propagation over complex networks, which can be formulated using a message-passing function. First, we model the forward diffusion of information using Markov chains. Then, we design a reversible residual network to construct a denoising-diffusion model in discrete space for both source localization and reconstruction of information diffusion paths. We provide rigorous theoretical guarantees for DDMSL and demonstrate its effectiveness through extensive experiments on five real-world datasets.", "keywords": "Diffusion Model;Graph Inverse Problems;Source Localization;Information Diffusion", "primary_area": "", "supplementary_material": "/attachment/77e8af4196e27c9ff8bd6fa6b6226058c919a298.pdf", "author": "Xin Yan;Hui Fang;Qiang He", "authorids": "~Xin_Yan4;~Hui_Fang7;~Qiang_He5", "gender": "M;F;M", "homepage": "https://www.semanticscholar.org/author/Xin-Yan/2187708135;https://fangh.org/;http://faculty.neu.edu.cn/heqiang/zh_CN/index.htm", "dblp": ";03/2511-2;97/6589-2", "google_scholar": "https://scholar.google.com/citations?hl=zh-TW;784G0hYAAAAJ;", "orcid": ";0000-0001-9788-6634;", "linkedin": ";;", "or_profile": "~Xin_Yan4;~Hui_Fang7;~Qiang_He5", "aff": "Northeastern University;Shanghai University of Finance and Economics;Northeastern University", "aff_domain": "neu.edu.cn;shufe.edu.cn;cn.edu", "position": "MS student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nyan2023diffusion,\ntitle={Diffusion Model for Graph Inverse Problems: Towards Effective Source Localization on Complex Networks},\nauthor={Xin Yan and Hui Fang and Qiang He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5Fr8Nwi5KF}\n}", "github": "", "project": "", "reviewers": "W7Kc;wXsK;c2eK;NqvF", "pdf_size": 35533038, "rating": "5;5;6;6", "confidence": "3;5;4;4", "soundness": "2;3;3;2", "novelty": "2;2;3;3", "presentation": "2;3;3;1", "wc_summary": "32;165;77;98", "wc_strengths": "29;76;85;36", "wc_weaknesses": "109;260;14;350", "wc_questions": "94;153;152;49", "wc_limitations": "71;24;9;4", "wc_review": "335;678;337;537", "wc_reply_reviewers": "0;82;0;13", "wc_reply_authors": "0;657;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 93.0, 47.92181131802094 ], "wc_strengths_avg": [ 56.5, 24.336187047275914 ], "wc_weaknesses_avg": [ 183.25, 130.2447215821048 ], "wc_questions_avg": [ 112.0, 43.514365444069156 ], "wc_limitations_avg": [ 27.0, 26.448062310876388 ], "wc_review_avg": [ 471.75, 144.61565440850447 ], "wc_reply_reviewers_avg": [ 23.75, 34.046842731742395 ], "wc_reply_authors_avg": [ 164.25, 284.4893451431881 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12951955845824618843&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "neu.edu.cn;shufe.edu.cn;cn.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Northeastern University;Shanghai University of Finance and Economics", "aff_unique_dep": ";", "aff_unique_url": "https://www.northeastern.edu;http://www.sufe.edu.cn", "aff_unique_abbr": "NEU;SUFE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "title": "Strong and Precise Modulation of Human Percepts via Robustified ANNs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72812", "id": "5GmTI4LNqX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d00904cebc0d5b69fada8ad33d0f1422-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5GmTI4LNqX", "openreview": "https://openreview.net/forum?id=5GmTI4LNqX", "poster": "/media/PosterPDFs/NeurIPS%202023/72812.png?t=1701382144.8138134", "slides": "https://nips.cc/virtual/2023/poster/72812", "video": "https://nips.cc/virtual/2023/poster/72812", "author_site": "Guy Gaziv, Michael Lee, James J DiCarlo", "tldr": "", "abstract": "The visual object category reports of artificial neural networks (ANNs) are notoriously sensitive to tiny, adversarial image perturbations. Because human category reports (aka human percepts) are thought to be insensitive to those same small-norm perturbations -- and locally stable in general -- this argues that ANNs are incomplete scientific models of human visual perception. Consistent with this, we show that when small-norm image perturbations are generated by standard ANN models, human object category percepts are indeed highly stable. However, in this very same \"human-presumed-stable\" regime, we find that robustified ANNs reliably discover low-norm image perturbations that strongly disrupt human percepts. These previously undetectable human perceptual disruptions are massive in amplitude, approaching the same level of sensitivity seen in robustified ANNs. Further, we show that robustified ANNs support precise perceptual state interventions: they guide the construction of low-norm image perturbations that strongly alter human category percepts toward specific prescribed percepts. In sum, these contemporary models of biological visual processing are now accurate enough to guide strong and precise interventions on human perception.", "keywords": "Vision;Object Recognition;Human;Primate;Ventral Stream;Adversarial Examples;Behavior Modulation;Behavioral Alignment", "primary_area": "", "supplementary_material": "/attachment/0614949a881ffd168a3f481ab83557e6e4bb1e6f.pdf", "author": "Guy Gaziv;Michael J. Lee;James J. DiCarlo", "authorids": "~Guy_Gaziv1;~Michael_J._Lee1;~James_J._DiCarlo1", "gender": ";M;", "homepage": "https://ggaziv.github.io/;http://dicarlolab.mit.edu;", "dblp": "244/2370;80/7658;18/1027", "google_scholar": "https://scholar.google.co.il/citations?user=M-8t0oQAAAAJ;;X_sNgwsAAAAJ", "orcid": "0000-0003-0890-6002;0000-0002-1592-5896;0000-0002-2576-6059", "linkedin": "guy-gaziv-56609434/;james-j-dicarlo/;", "or_profile": "~Guy_Gaziv1;~James_J._DiCarlo1;~Michael_J_Lee1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu", "position": "Postdoc;Full Professor;Postdoc", "bibtex": "@inproceedings{\ngaziv2023strong,\ntitle={Strong and Precise Modulation of Human Percepts via Robustified {ANN}s},\nauthor={Guy Gaziv and Michael J. Lee and James J. DiCarlo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5GmTI4LNqX}\n}", "github": "", "project": "", "reviewers": "keVx;LMUo;Xb7f;ibQ6;StMT", "pdf_size": 3423582, "rating": "4;5;6;7;8", "confidence": "5;3;3;4;5", "soundness": "4;2;3;3;4", "novelty": "2;2;3;2;3", "presentation": "4;3;4;3;4", "wc_summary": "359;98;135;94;95", "wc_strengths": "11;62;46;39;74", "wc_weaknesses": "84;117;359;269;187", "wc_questions": "39;78;47;83;216", "wc_limitations": "1;1;1;57;10", "wc_review": "494;356;588;542;582", "wc_reply_reviewers": "163;0;57;24;645", "wc_reply_authors": "135;0;38;0;78", "reply_reviewers": "1;0;1;1;3", "reply_authors": "2;1;2;1;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 156.2, 102.546379750823 ], "wc_strengths_avg": [ 46.4, 21.50906785520935 ], "wc_weaknesses_avg": [ 203.2, 100.50353227623395 ], "wc_questions_avg": [ 92.6, 64.0081244843184 ], "wc_limitations_avg": [ 14.0, 21.78072542409917 ], "wc_review_avg": [ 512.4, 85.12954833663808 ], "wc_reply_reviewers_avg": [ 177.8, 240.14778783074394 ], "wc_reply_authors_avg": [ 50.2, 51.28898517225702 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.15811388300841897, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18380765407599387774&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mit.edu;mit.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "PAC Learning Linear Thresholds from Label Proportions", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72811", "id": "5Gw9YkJkFF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d1d3cdc9e28b0c67b9df90fca4d1c1b3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5Gw9YkJkFF", "openreview": "https://openreview.net/forum?id=5Gw9YkJkFF", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72811", "video": "https://nips.cc/virtual/2023/poster/72811", "author_site": "Anand Brahmbhatt, Rishi Saket, Aravindan Raghuveer", "tldr": "", "abstract": "Learning from label proportions (LLP) is a generalization of supervised learning in which the training data is available as sets or bags of feature-vectors (instances) along with the average instance-label of each bag. The goal is to train a good instance classifier. While most previous works on LLP have focused on training models on such training data, computational learnability of LLP was only\nrecently explored by Saket (2021, 2022) who showed worst case intractability of properly learning linear threshold functions (LTFs) from label proportions. However, their work did not rule out efficient algorithms for this problem for natural distributions.\n\nIn this work we show that it is indeed possible to efficiently learn LTFs using LTFs when given access to random bags of some label proportion in which feature-vectors are, conditioned on their labels, independently sampled from a Gaussian distribution $N(\u00b5, \u03a3)$. Our work shows that a certain matrix \u2013 formed using covariances of the differences of feature-vectors sampled from the bags with and without replacement \u2013 necessarily has its principal component, after a transformation, in the direction of the normal vector of the LTF. Our algorithm estimates the means and covariance matrices using subgaussian concentration bounds which we show can be applied to efficiently sample bags for approximating the normal direction. Using this in conjunction with novel generalization error bounds in the bag setting, we show that a low error hypothesis LTF can be identified. For some special cases of the $N(0, I)$ distribution we provide a simpler mean estimation based algorithm. We include an experimental evaluation of our learning algorithms along with a comparison with those of Saket (2021, 2022) and random LTFs, demonstrating the effectiveness of our techniques.", "keywords": "PAC learning;Learning from label proportions;Linear thresholds", "primary_area": "", "supplementary_material": "", "author": "Anand Paresh Brahmbhatt;Rishi Saket;Aravindan Raghuveer", "authorids": "~Anand_Paresh_Brahmbhatt1;~Rishi_Saket1;~Aravindan_Raghuveer1", "gender": "M;M;M", "homepage": ";;", "dblp": ";73/3493;20/1664", "google_scholar": ";Gl4bKJgAAAAJ;", "orcid": ";;", "linkedin": "anand-brahmbhatt;;", "or_profile": "~Anand_Paresh_Brahmbhatt1;~Rishi_Saket1;~Aravindan_Raghuveer1", "aff": "Google;Google;Google", "aff_domain": "google.com;google.com;google.com", "position": "Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nbrahmbhatt2023pac,\ntitle={{PAC} Learning Linear Thresholds from Label Proportions},\nauthor={Anand Paresh Brahmbhatt and Rishi Saket and Aravindan Raghuveer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5Gw9YkJkFF}\n}", "github": "", "project": "", "reviewers": "UNzS;FWVX;VQSz;Xa3y", "pdf_size": 559237, "rating": "7;7;7;8", "confidence": "3;3;3;4", "soundness": "3;4;4;4", "novelty": "3;3;3;4", "presentation": "1;3;3;4", "wc_summary": "103;95;108;81", "wc_strengths": "76;21;150;73", "wc_weaknesses": "56;229;73;103", "wc_questions": "49;7;57;76", "wc_limitations": "113;12;2;1", "wc_review": "397;364;390;334", "wc_reply_reviewers": "12;15;0;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 96.75, 10.207227831296802 ], "wc_strengths_avg": [ 80.0, 45.951060923552134 ], "wc_weaknesses_avg": [ 115.25, 67.79518788232687 ], "wc_questions_avg": [ 47.25, 25.222757581200355 ], "wc_limitations_avg": [ 32.0, 46.96275119709236 ], "wc_review_avg": [ 371.25, 24.772716847370617 ], "wc_reply_reviewers_avg": [ 11.75, 7.361215932167728 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16656702386414596532&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "google.com;google.com;google.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Computational Guarantees for Doubly Entropic Wasserstein Barycenters", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72810", "id": "5HahZRA0fy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/28fa97d12d6e3877f1c10c605d2cffa0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5HahZRA0fy", "openreview": "https://openreview.net/forum?id=5HahZRA0fy", "poster": "/media/PosterPDFs/NeurIPS%202023/72810.png?t=1701775357.4412897", "slides": "https://nips.cc/virtual/2023/poster/72810", "video": "https://nips.cc/virtual/2023/poster/72810", "author_site": "Tomas Vaskevicius, L\u00e9na\u00efc Chizat", "tldr": "", "abstract": "We study the computation of doubly regularized Wasserstein barycenters, a recently introduced family of entropic barycenters governed by inner and outer regularization strengths. Previous research has demonstrated that various regularization parameter choices unify several notions of entropy-penalized barycenters while also revealing new ones, including a special case of debiased barycenters. In this paper, we propose and analyze an algorithm for computing doubly regularized Wasserstein barycenters. Our procedure builds on damped Sinkhorn iterations followed by exact maximization/minimization steps and guarantees convergence for any choice of regularization parameters. An inexact variant of our algorithm, implementable using approximate Monte Carlo sampling, offers the first non-asymptotic convergence guarantees for approximating Wasserstein barycenters between discrete point clouds in the free-support/grid-free setting.", "keywords": "Wasserstein barycenters;entropic penalization;optimal transport;Sinkhorn's algorithm", "primary_area": "", "supplementary_material": "/attachment/9c03040311575fb619666178b77b7044af3a44a7.pdf", "author": "Tomas Vaskevicius;L\u00e9na\u00efc Chizat", "authorids": "~Tomas_Vaskevicius1;~L\u00e9na\u00efc_Chizat1", "gender": "M;M", "homepage": "https://tomasvaskevicius.github.io/;https://lchizat.github.io/", "dblp": "248/9235;192/1488", "google_scholar": "dWY8_bgAAAAJ;https://scholar.google.fr/citations?user=jrJh9yIAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Tomas_Vaskevicius1;~L\u00e9na\u00efc_Chizat1", "aff": "EPFL - EPF Lausanne;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch", "position": "Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nvaskevicius2023computational,\ntitle={Computational Guarantees for Doubly Entropic Wasserstein Barycenters},\nauthor={Tomas Vaskevicius and L{\\'e}na{\\\"\\i}c Chizat},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5HahZRA0fy}\n}", "github": "", "project": "", "reviewers": "9mYE;uehn;nAci;bjAF;ofnZ", "pdf_size": 597732, "rating": "4;7;7;8;8", "confidence": "4;4;3;5;5", "soundness": "4;3;3;4;4", "novelty": "2;3;3;4;2", "presentation": "3;4;3;4;4", "wc_summary": "47;83;139;91;268", "wc_strengths": "44;71;22;65;89", "wc_weaknesses": "129;104;37;82;582", "wc_questions": "34;37;134;48;528", "wc_limitations": "4;41;24;1;45", "wc_review": "258;336;356;287;1512", "wc_reply_reviewers": "0;45;17;77;39", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 1.469693845669907 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 125.6, 77.00285708985089 ], "wc_strengths_avg": [ 58.2, 23.111901695879553 ], "wc_weaknesses_avg": [ 186.8, 199.9013756831103 ], "wc_questions_avg": [ 156.2, 189.51348236998865 ], "wc_limitations_avg": [ 23.0, 18.18790807102345 ], "wc_review_avg": [ 549.8, 482.3502461904627 ], "wc_reply_reviewers_avg": [ 35.6, 26.180909075125715 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.40006613209931935, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1138874991575271607&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "epfl.ch;epfl.ch", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "GLEMOS: Benchmark for Instantaneous Graph Learning Model Selection", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73696", "id": "5HisVXnx0n", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dcd18e50ebca0af89187c6e35dabb584-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=5HisVXnx0n", "openreview": "https://openreview.net/forum?id=5HisVXnx0n", "poster": "/media/PosterPDFs/NeurIPS%202023/73696.png?t=1701565776.4470434", "slides": "https://nips.cc/virtual/2023/poster/73696", "video": "https://nips.cc/virtual/2023/poster/73696", "author_site": "Namyong Park, Ryan Rossi, Xing Wang, Antoine Simoulin, Nesreen K. Ahmed, Christos Faloutsos", "tldr": "", "abstract": "The choice of a graph learning (GL) model (i.e., a GL algorithm and its hyperparameter settings) has a significant impact on the performance of downstream tasks. However, selecting the right GL model becomes increasingly difficult and time consuming as more and more GL models are developed. Accordingly, it is of great significance and practical value to equip users of GL with the ability to perform a near-instantaneous selection of an effective GL model without manual intervention. Despite the recent attempts to tackle this important problem, there has been no comprehensive benchmark environment to evaluate the performance of GL model selection methods. To bridge this gap, we present GLEMOS in this work, a comprehensive benchmark for instantaneous GL model selection that makes the following contributions. (i) GLEMOS provides extensive benchmark data for fundamental GL tasks, i.e., link prediction and node classification, including the performances of 366 models on 457 graphs on these tasks. (ii) GLEMOS designs multiple evaluation settings, and assesses how effectively representative model selection techniques perform in these different settings. (iii) GLEMOS is designed to be easily extended with new models, new graphs, and new performance records. (iv) Based on the experimental results, we discuss the limitations of existing approaches and highlight future research directions. To promote research on this significant problem, we make the benchmark data and code publicly available at https://namyongpark.github.io/glemos.", "keywords": "instantaneous graph learning model selection;automatic graph learning;meta-learning;node classification;link prediction", "primary_area": "", "supplementary_material": "/attachment/6dd9918b65dbd5682cb2d248a4c926cbf0155263.pdf", "author": "Namyong Park;Ryan A. Rossi;Xing Wang;Antoine Simoulin;Nesreen K. Ahmed;Christos Faloutsos", "authorids": "~Namyong_Park1;~Ryan_A._Rossi2;~Xing_Wang8;~Antoine_Simoulin1;~Nesreen_K._Ahmed2;~Christos_Faloutsos1", "gender": ";M;M;M;F;M", "homepage": "https://namyongpark.github.io/;https://scholar.google.com/citations?user=_nDiQQ0AAAAJ&hl=en;http://www.llf.cnrs.fr/fr/Gens/Simoulin;https://www.cs.cmu.edu/~christos/;http://nesreenahmed.com;http://ryanrossi.com", "dblp": "116/9404;;211/7662;f/CFaloutsos;33/11518;17/5085", "google_scholar": "YBTXGb8AAAAJ;;https://scholar.google.fr/citations?hl=en;nd8lQQIAAAAJ;AFV0nLcAAAAJ;_Dc6lbQAAAAJ", "orcid": ";;0000-0001-8433-7919;0000-0003-2996-9790;;0000-0001-9758-0635", "linkedin": ";;antoine-simoulin;christos-faloutsos-43a7aa2/;nkahmed/;", "or_profile": "~Namyong_Park1;~Xing_Wang8;~Antoine_Simoulin1;~Christos_Faloutsos1;~Nesreen_Ahmed1;~Ryan_Rossi1", "aff": "Meta AI;;Meta AI;Carnegie Mellon University;Intel AI Research;Adobe Research", "aff_domain": "meta.com;;meta.com;cmu.edu;intel.com;adobe.com", "position": "Researcher;;Postdoc;Full Professor;Principal Researcher;Senior Research Scientist", "bibtex": "@inproceedings{\npark2023glemos,\ntitle={{GLEMOS}: Benchmark for Instantaneous Graph Learning Model Selection},\nauthor={Namyong Park and Ryan A. Rossi and Xing Wang and Antoine Simoulin and Nesreen K. Ahmed and Christos Faloutsos},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=5HisVXnx0n}\n}", "github": "", "project": "", "reviewers": "yCME;o5XN;Yn6z;GJxQ;qJk8", "pdf_size": 402673, "rating": "6;6;6;7;8", "confidence": "3;3;4;3;5", "wc_summary_and_contributions": "65;45;68;218;58", "wc_strengths": "258;32;69;26;82", "wc_improvement": "169;87;217;32;86", "wc_limitations": "4;1;18;27;1", "wc_correctness": "12;1;15;53;1", "wc_clarity": "7;1;5;28;1", "wc_relation_to_prior_work": "2;3;8;17;1", "wc_documentation": "4;1;7;21;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "522;172;408;423;232", "wc_reply_reviewers": "0;23;12;15;0", "wc_reply_authors": "1012;523;740;935;629", "reply_reviewers": "0;1;1;1;0", "reply_authors": "2;2;1;3;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.6, 0.8 ], "wc_summary_and_contributions_avg": [ 90.8, 64.09180914906366 ], "wc_strengths_avg": [ 93.4, 85.00729380470831 ], "wc_improvement_avg": [ 118.2, 66.00424228790146 ], "wc_limitations_avg": [ 10.2, 10.49571341072154 ], "wc_correctness_avg": [ 16.4, 19.15828802372488 ], "wc_clarity_avg": [ 8.4, 10.071742649611338 ], "wc_relation_to_prior_work_avg": [ 6.2, 5.912698199637793 ], "wc_documentation_avg": [ 6.8, 7.4404300950953095 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 351.4, 129.51849288808143 ], "wc_reply_reviewers_avg": [ 10.0, 8.921883209278185 ], "wc_reply_authors_avg": [ 767.8, 183.05999016715805 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6875000000000001, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6903344581935712677&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "meta.com;;meta.com;cmu.edu;intel.com;adobe.com", "author_num": 6, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Meta;Carnegie Mellon University;Intel;Adobe", "aff_unique_dep": "Meta AI;;Intel AI Research;Adobe Research", "aff_unique_url": "https://meta.com;https://www.cmu.edu;https://www.intel.com/research;https://research.adobe.com", "aff_unique_abbr": "Meta;CMU;Intel AI;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "GeoTMI: Predicting Quantum Chemical Property with Easy-to-Obtain Geometry via Positional Denoising", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72809", "id": "5JcKKRX2iH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/903c5eb12f2389c4847574df90503d63-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5JcKKRX2iH", "openreview": "https://openreview.net/forum?id=5JcKKRX2iH", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72809", "video": "https://nips.cc/virtual/2023/poster/72809", "author_site": "Hyeonsu Kim, Jeheon Woo, SEONGHWAN KIM, Seokhyun Moon, Jun Hyeong Kim, Woo Youn Kim", "tldr": "", "abstract": "As quantum chemical properties have a dependence on their geometries, graph neural networks (GNNs) using 3D geometric information have achieved high prediction accuracy in many tasks. However, they often require 3D geometries obtained from high-level quantum mechanical calculations, which are practically infeasible, limiting their applicability to real-world problems. To tackle this, we propose a new training framework, GeoTMI, that employs denoising process to predict properties accurately using easy-to-obtain geometries (corrupted versions of correct geometries, such as those obtained from low-level calculations). Our starting point was the idea that the correct geometry is the best description of the target property. Hence, to incorporate information of the correct, GeoTMI aims to maximize mutual information between three variables: the correct and the corrupted geometries and the property. GeoTMI also explicitly updates the corrupted input to approach the correct geometry as it passes through the GNN layers, contributing to more effective denoising. We investigated the performance of the proposed method using 3D GNNs for three prediction tasks: molecular properties, a chemical reaction property, and relaxed energy in a heterogeneous catalytic system. Our results showed consistent improvements in accuracy across various tasks, demonstrating the effectiveness and robustness of GeoTMI.", "keywords": "Mutual information;Easy-to-obtain geometry;Denoising;3D Graph neural network;OC20", "primary_area": "", "supplementary_material": "/attachment/57176ebfededc5393edfed7b4f24785217aca126.pdf", "author": "Hyeonsu Kim;Jeheon Woo;SEONGHWAN KIM;Seokhyun Moon;Jun Hyeong Kim;Woo Youn Kim", "authorids": "~Hyeonsu_Kim1;~Jeheon_Woo1;~SEONGHWAN_KIM5;~Seokhyun_Moon1;~Jun_Hyeong_Kim1;wooyoun@kaist.ac.kr", "gender": "M;M;;M;M;", "homepage": "http://wooyoun.kaist.ac.kr/our-family-future/;https://wooyoun.kaist.ac.kr;https://seonghann.github.io;https://mseok.github.io/;http://wooyoun.kaist.ac.kr/;", "dblp": ";;;241/9786;;", "google_scholar": ";;qrUHQ90AAAAJ;https://scholar.google.co.kr/citations?hl=ko;https://scholar.google.co.kr/citations?user=CnrALnEAAAAJ;", "orcid": ";;0009-0007-9131-8864;;;", "linkedin": ";;;;;", "or_profile": "~Hyeonsu_Kim1;~Jeheon_Woo1;~SEONGHWAN_KIM5;~Seokhyun_Moon1;~Jun_Hyeong_Kim1;wooyoun@kaist.ac.kr", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;kaist;Korea Advanced Institute of Science and Technology ;", "aff_domain": "kaist.ac.kr;kaist.edu;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;", "bibtex": "@inproceedings{\nkim2023geotmi,\ntitle={Geo{TMI}: Predicting Quantum Chemical Property with Easy-to-Obtain Geometry via Positional Denoising},\nauthor={Hyeonsu Kim and Jeheon Woo and SEONGHWAN KIM and Seokhyun Moon and Jun Hyeong Kim and Woo Youn Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5JcKKRX2iH}\n}", "github": "", "project": "", "reviewers": "PXHR;rBMF;MDdx;WWJn", "pdf_size": 1024845, "rating": "5;6;6;6", "confidence": "2;4;4;4", "soundness": "3;2;4;3", "novelty": "2;2;4;3", "presentation": "3;3;3;3", "wc_summary": "58;102;35;62", "wc_strengths": "66;83;57;66", "wc_weaknesses": "195;364;44;209", "wc_questions": "87;3;65;54", "wc_limitations": "24;4;1;1", "wc_review": "430;556;202;392", "wc_reply_reviewers": "23;124;11;0", "wc_reply_authors": "0;484;0;0", "reply_reviewers": "1;2;1;0", "reply_authors": "1;3;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 64.25, 24.107830678018296 ], "wc_strengths_avg": [ 68.0, 9.40744386111339 ], "wc_weaknesses_avg": [ 203.0, 113.24972406147398 ], "wc_questions_avg": [ 52.25, 30.81700017847292 ], "wc_limitations_avg": [ 7.5, 9.604686356149273 ], "wc_review_avg": [ 395.0, 126.8897158953396 ], "wc_reply_reviewers_avg": [ 39.5, 49.45957945636012 ], "wc_reply_authors_avg": [ 121.0, 209.57814771583415 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6679726208958871212&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "kaist.ac.kr;kaist.edu;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Fast Bellman Updates for Wasserstein Distributionally Robust MDPs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72808", "id": "5La4Y8BnQw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/61779e9b0c26a31c5f36bd3e8c180dcf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5La4Y8BnQw", "openreview": "https://openreview.net/forum?id=5La4Y8BnQw", "poster": "/media/PosterPDFs/NeurIPS%202023/72808.png?t=1701444068.387442", "slides": "https://nips.cc/virtual/2023/poster/72808", "video": "https://nips.cc/virtual/2023/poster/72808", "author_site": "Zhuodong Yu, Ling Dai, Shaohang Xu, Siyang Gao, Chin Pang Ho", "tldr": "", "abstract": "Markov decision processes (MDPs) often suffer from the sensitivity issue under model ambiguity. In recent years, robust MDPs have emerged as an effective framework to overcome this challenge. Distributionally robust MDPs extend the robust MDP framework by incorporating distributional information of the uncertain model parameters to alleviate the conservative nature of robust MDPs. This paper proposes a computationally efficient solution framework for solving distributionally robust MDPs with Wasserstein ambiguity sets. By exploiting the specific problem structure, the proposed framework decomposes the optimization problems associated with distributionally robust Bellman updates into smaller subproblems, which can be solved efficiently. The overall complexity of the proposed algorithm is quasi-linear in both the numbers of states and actions when the distance metric of the Wasserstein distance is chosen to be $L_1$, $L_2$, or $L_{\\infty}$ norm, and so the computational cost of distributional robustness is substantially reduced. Our numerical experiments demonstrate that the proposed algorithms outperform other state-of-the-art solution methods.", "keywords": "Markov decision processes;distributionally robust optimization", "primary_area": "", "supplementary_material": "/attachment/d8e837f391be1c111b51fa573e27bf61eb9b872b.pdf", "author": "Zhuodong Yu;Ling Dai;Shaohang Xu;Siyang Gao;Chin Pang Ho", "authorids": "~Zhuodong_Yu1;~Ling_Dai2;~Shaohang_Xu2;~Siyang_Gao1;~Chin_Pang_Ho2", "gender": "M;;M;M;M", "homepage": ";;https://www.xushaohang.top;https://www.cityu.edu.hk/stfprofile/siyangao.htm;https://sites.google.com/view/clint-chin-pang-ho/home?authuser=0", "dblp": ";;152/7465;136/9876;143/4728", "google_scholar": ";;;NK6nQ9YAAAAJ;", "orcid": "0000-0003-0916-8080;0000-0001-5219-5856;0000-0002-6157-242X;0000-0002-3574-6393;", "linkedin": ";;;;", "or_profile": "~Zhuodong_Yu1;~Ling_Dai2;~Shaohang_Xu2;~Siyang_Gao1;~Chin_Pang_Ho2", "aff": "City University of Hong Kong;City University of Hong Kong;City University of Hong Kong;City University of Hong Kong;City University of Hong Kong", "aff_domain": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;cityu.edu.hk", "position": "PhD student;PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nyu2023fast,\ntitle={Fast Bellman Updates for Wasserstein Distributionally Robust {MDP}s},\nauthor={Zhuodong Yu and Ling Dai and Shaohang Xu and Siyang Gao and Chin Pang Ho},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5La4Y8BnQw}\n}", "github": "", "project": "", "reviewers": "SCaM;NtRj;JxmA;zZNP", "pdf_size": 484011, "rating": "5;5;6;7", "confidence": "4;1;2;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "112;46;101;18", "wc_strengths": "85;23;29;32", "wc_weaknesses": "232;39;31;87", "wc_questions": "123;115;65;52", "wc_limitations": "11;13;7;22", "wc_review": "563;236;233;211", "wc_reply_reviewers": "0;10;29;32", "wc_reply_authors": "0;15;14;21", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.25, 38.7387080321479 ], "wc_strengths_avg": [ 42.25, 24.893523254051445 ], "wc_weaknesses_avg": [ 97.25, 80.69192958406683 ], "wc_questions_avg": [ 88.75, 30.727634142575962 ], "wc_limitations_avg": [ 13.25, 5.494315243958978 ], "wc_review_avg": [ 310.75, 145.95611497981164 ], "wc_reply_reviewers_avg": [ 17.75, 13.273563952458284 ], "wc_reply_authors_avg": [ 12.5, 7.697402159170326 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4061811972299616, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16084044085326243850&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;cityu.edu.hk", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "City University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cityu.edu.hk", "aff_unique_abbr": "CityU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Global Optimality in Bivariate Gradient-based DAG Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72807", "id": "5MG5C5aS6m", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3a02b6df276223b68c69ca572cb3c4a8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5MG5C5aS6m", "openreview": "https://openreview.net/forum?id=5MG5C5aS6m", "poster": "/media/PosterPDFs/NeurIPS%202023/72807.png?t=1701459971.6515954", "slides": "https://nips.cc/virtual/2023/poster/72807", "video": "https://nips.cc/virtual/2023/poster/72807", "author_site": "Chang Deng, Kevin Bello, Pradeep Ravikumar, Bryon Aragam", "tldr": "", "abstract": "Recently, a new class of non-convex optimization problems motivated by the statistical problem of learning an acyclic directed graphical model from data has attracted significant interest. While existing work uses standard first-order optimization schemes to solve this problem, proving the global optimality of such approaches has proven elusive. The difficulty lies in the fact that unlike other non-convex problems in the literature, this problem is not \"benign\", and possesses multiple spurious solutions that standard approaches can easily get trapped in. In this paper, we prove that a simple path-following optimization scheme globally converges to the global minimum of the population loss in the bivariate setting.", "keywords": "global optimization;nonconvex optimization;graphical models;directed acyclic graphs;structure learning", "primary_area": "", "supplementary_material": "/attachment/36550b5b61964e3513ffa6ba842f23c30fa5f738.zip", "author": "Chang Deng;Kevin Bello;Pradeep Kumar Ravikumar;Bryon Aragam", "authorids": "~Chang_Deng1;~Kevin_Bello1;~Pradeep_Kumar_Ravikumar1;~Bryon_Aragam1", "gender": "M;M;M;", "homepage": "https://duntrain.github.io/;https://www.cs.cmu.edu/~kbello;http://www.cs.cmu.edu/~pradeepr/;http://bryonaragam.com/", "dblp": "16/1003;202/2531;94/3594;140/7564", "google_scholar": "51voxF8AAAAJ;pCS09UsAAAAJ;https://scholar.google.com.tw/citations?user=Q4DTPw4AAAAJ;u-W3_9QAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Chang_Deng1;~Kevin_Bello1;~Pradeep_Kumar_Ravikumar1;~Bryon_Aragam1", "aff": "University of Chicago;University of Chicago;Carnegie Mellon University;Booth School of Business", "aff_domain": "uchicago.edu;uchicago.edu;cmu.edu;chicagobooth.edu", "position": "PhD student;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ndeng2023global,\ntitle={Global Optimality in Bivariate Gradient-based {DAG} Learning},\nauthor={Chang Deng and Kevin Bello and Pradeep Kumar Ravikumar and Bryon Aragam},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5MG5C5aS6m}\n}", "github": "", "project": "", "reviewers": "xJpx;NK3q;Y11K;u4D3", "pdf_size": 4713973, "rating": "5;5;7;8", "confidence": "1;3;3;3", "soundness": "2;3;4;3", "novelty": "2;2;3;3", "presentation": "3;2;4;4", "wc_summary": "99;46;244;18", "wc_strengths": "56;57;222;35", "wc_weaknesses": "27;56;54;4", "wc_questions": "20;279;109;96", "wc_limitations": "3;5;1;1", "wc_review": "205;443;630;154", "wc_reply_reviewers": "33;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 101.75, 87.12742105674883 ], "wc_strengths_avg": [ 92.5, 75.28113973632439 ], "wc_weaknesses_avg": [ 35.25, 21.370248009791556 ], "wc_questions_avg": [ 126.0, 94.6493528768158 ], "wc_limitations_avg": [ 2.5, 1.6583123951777 ], "wc_review_avg": [ 358.0, 191.20277194643387 ], "wc_reply_reviewers_avg": [ 8.25, 14.289419162443238 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5278075051595611723&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "uchicago.edu;uchicago.edu;cmu.edu;chicagobooth.edu", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Chicago;Carnegie Mellon University;University of Chicago Booth School of Business", "aff_unique_dep": ";;Booth School of Business", "aff_unique_url": "https://www.uchicago.edu;https://www.cmu.edu;https://www.chicagobooth.edu", "aff_unique_abbr": "UChicago;CMU;Booth", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Debias Coarsely, Sample Conditionally: Statistical Downscaling through Optimal Transport and Probabilistic Diffusion Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72806", "id": "5NxJuc0T1P", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/94d13c2401fe119e57ba325b6fe526e0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5NxJuc0T1P", "openreview": "https://openreview.net/forum?id=5NxJuc0T1P", "poster": "/media/PosterPDFs/NeurIPS%202023/72806.png?t=1701402598.9031081", "slides": "https://nips.cc/virtual/2023/poster/72806", "video": "https://nips.cc/virtual/2023/poster/72806", "author_site": "Zhong Yi Wan, Ricardo Baptista, Anudhyan Boral, Yi-Fan Chen, John Anderson, Fei Sha, Leonardo Zepeda-N\u00fa\u00f1ez", "tldr": "", "abstract": "We introduce a two-stage probabilistic framework for statistical downscaling using unpaired data. Statistical downscaling seeks a probabilistic map to transform low-resolution data from a biased coarse-grained numerical scheme to high-resolution data that is consistent with a high-fidelity scheme. Our framework tackles the problem by\ncomposing two transformations: (i) a debiasing step via an optimal transport map, and (ii) an upsampling step achieved by a probabilistic diffusion model with a posteriori conditional sampling. This approach characterizes a conditional distribution without needing paired data, and faithfully recovers relevant physical statistics from biased samples. We demonstrate the utility of the proposed approach on one- and two-dimensional fluid flow problems, which are representative of the core difficulties present in numerical simulations of weather and climate. Our method produces realistic high-resolution outputs from low-resolution inputs, by upsampling resolutions of $8\\times$ and $16\\times$. Moreover, our procedure correctly matches the statistics of physical quantities, even when the low-frequency content of the inputs and outputs do not match, a crucial but difficult-to-satisfy assumption needed by current state-of-the-art alternatives. Code for this work is available at: https://github.com/google-research/swirl-dynamics/tree/main/swirl_dynamics/projects/probabilistic_diffusion.", "keywords": "optimal transport;probabilistic diffusion models;statistical downscaling", "primary_area": "", "supplementary_material": "/attachment/514edf61b984651176cb95b865507033668857b4.pdf", "author": "Zhong Yi Wan;Ricardo Baptista;Anudhyan Boral;Yi-Fan Chen;John Anderson;Fei Sha;Leonardo Zepeda-Nunez", "authorids": "~Zhong_Yi_Wan1;~Ricardo_Baptista1;~Anudhyan_Boral1;yifanchen@google.com;janders@google.com;~Fei_Sha3;~Leonardo_Zepeda-Nunez1", "gender": "M;M;;;;;M", "homepage": ";;;;;;https://www.math.wisc.edu/~lzepeda/", "dblp": "338/6288;136/6901;;;;;", "google_scholar": "T1FxBHsAAAAJ;;;;;;qbMVyzQAAAAJ", "orcid": ";;;;;;", "linkedin": "zhong1wan/;;;;;;", "or_profile": "~Zhong_Yi_Wan1;~Ricardo_Baptista1;~Anudhyan_Boral1;yifanchen@google.com;janders@google.com;~Fei_Sha3;~Leonardo_Zepeda-Nunez1", "aff": "Google;Deparment of Computing + Mathematical Sciences, California Institute of Technology;;;;;University of Wisconsin, Madison", "aff_domain": "google.com;cms.caltech.edu;;;;;wisc.edu", "position": "Researcher;Instructor;;;;;Assistant Professor", "bibtex": "@inproceedings{\nwan2023debias,\ntitle={Debias Coarsely, Sample Conditionally: Statistical Downscaling through Optimal Transport and Probabilistic Diffusion Models},\nauthor={Zhong Yi Wan and Ricardo Baptista and Anudhyan Boral and Yi-Fan Chen and John Anderson and Fei Sha and Leonardo Zepeda-Nunez},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5NxJuc0T1P}\n}", "github": "", "project": "", "reviewers": "wBNS;zUt7;Z19Y;fyEE", "pdf_size": 1199332, "rating": "3;5;8;8", "confidence": "3;3;5;3", "soundness": "2;2;4;4", "novelty": "2;2;4;4", "presentation": "2;4;4;4", "wc_summary": "51;30;56;78", "wc_strengths": "48;53;212;83", "wc_weaknesses": "501;99;11;138", "wc_questions": "3;68;80;158", "wc_limitations": "3;17;17;67", "wc_review": "606;267;376;524", "wc_reply_reviewers": "311;111;118;0", "wc_reply_authors": "1441;512;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "4;2;1;1", "rating_avg": [ 6.0, 2.1213203435596424 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 53.75, 17.06421694658152 ], "wc_strengths_avg": [ 99.0, 66.59954954802622 ], "wc_weaknesses_avg": [ 187.25, 186.89351914927389 ], "wc_questions_avg": [ 77.25, 55.06076189084201 ], "wc_limitations_avg": [ 26.0, 24.351591323771842 ], "wc_review_avg": [ 443.25, 130.95299729292185 ], "wc_reply_reviewers_avg": [ 135.0, 111.87716478352498 ], "wc_reply_authors_avg": [ 488.25, 588.4455688506797 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5443310539518174, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16728784170402423424&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com;cms.caltech.edu;;;;;wisc.edu", "author_num": 7, "aff_unique_index": "0;2;3", "aff_unique_norm": "Google;;California Institute of Technology;University of Wisconsin", "aff_unique_dep": "Google;;Mathematical Sciences;", "aff_unique_url": "https://www.google.com;;https://www.caltech.edu;https://www.wisc.edu", "aff_unique_abbr": "Google;;Caltech;UW", "aff_campus_unique_index": "0;2;3", "aff_campus_unique": "Mountain View;;Pasadena;Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "title": "SMACv2: An Improved Benchmark for Cooperative Multi-Agent Reinforcement Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73695", "id": "5OjLGiJW3u", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/764c18ad230f9e7bf6a77ffc2312c55e-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=5OjLGiJW3u", "openreview": "https://openreview.net/forum?id=5OjLGiJW3u", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73695", "video": "https://nips.cc/virtual/2023/poster/73695", "author_site": "Benjamin Ellis, Jonathan Cook, Skander Moalla, Mikayel Samvelyan, Mingfei Sun, Anuj Mahajan, Jakob Foerster, Shimon Whiteson", "tldr": "", "abstract": "The availability of challenging benchmarks has played a key role in the recent progress of machine learning. In cooperative multi-agent reinforcement learning, the StarCraft Multi-Agent Challenge (SMAC) \nhas become a popular testbed for centralised training with decentralised execution. However, after years of sustained improvement on SMAC, algorithms now achieve near-perfect performance. In this work, we conduct new analysis demonstrating that SMAC lacks the stochasticity and partial observability to require complex *closed-loop* policies. In particular, we show that an *open-loop* policy conditioned only on the timestep can achieve non-trivial win rates for many SMAC scenarios. To address this limitation, we introduce SMACv2, a new version of the benchmark where scenarios are procedurally generated and require agents to generalise to previously unseen settings (from the same distribution) during evaluation. We also introduce the extended partial observability challenge (EPO), which augments SMACv2 to ensure meaningful partial observability. We show that these changes ensure the benchmark\nrequires the use of *closed-loop* policies. We evaluate state-of-the-art algorithms on SMACv2 and show that it presents significant challenges not present in the original benchmark. Our analysis illustrates that SMACv2 addresses the discovered deficiencies of SMAC and can help benchmark the next generation of MARL methods. Videos of training are available on our [website](https://sites.google.com/view/smacv2).", "keywords": "Multi-Agent Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/e122427f903f6c350445d9d7077fcb359bff1ef9.pdf", "author": "Benjamin Ellis;Jonathan Cook;Skander Moalla;Mikayel Samvelyan;Mingfei Sun;Anuj Mahajan;Jakob Nicolaus Foerster;Shimon Whiteson", "authorids": "~Benjamin_Ellis1;~Jonathan_Cook3;~Skander_Moalla1;~Mikayel_Samvelyan1;~Mingfei_Sun1;~Anuj_Mahajan1;~Jakob_Nicolaus_Foerster1;~Shimon_Whiteson1", "gender": "M;M;M;M;M;M;M;", "homepage": "http://whirl.cs.ox.ac.uk/pages/people/ben.html;;https://skandermoalla.com/;https://www.samvelyan.com/;https://research.manchester.ac.uk/en/persons/mingfei-sun;https://anuj-mahajan.github.io/;https://www.jakobfoerster.com;", "dblp": ";;336/2569;170/0101;195/7934.html;99/3800;176/5095;https://dblp.uni-trier.de/pers/w/Whiteson:Shimon.html", "google_scholar": ";7tcPHHYAAAAJ;YEP65IMAAAAJ;2Qs19WAAAAAJ;2Uzgp5kAAAAJ;https://scholar.google.co.in/citations?user=a3AbXGcAAAAJ;6z4lQzMAAAAJ;", "orcid": ";;0000-0002-8494-8071;0009-0001-6748-8755;;;;", "linkedin": ";jonathan-cook-78339618a/;skander-moalla/;samvelyan;;anuj-m-bb0a26175/;;", "or_profile": "~Benjamin_Ellis1;~Jonathan_Cook3;~Skander_Moalla1;~Mikayel_Samvelyan1;~Mingfei_Sun1;~Anuj_Mahajan1;~Jakob_Nicolaus_Foerster1;~Shimon_Whiteson1", "aff": "Department of Computer Science, University of Oxford;University of Oxford;EPFL - EPF Lausanne;Meta (FAIR);University of Manchester ;Amazon;University of Oxford, University of Oxford;University of Oxford", "aff_domain": "cs.ox.ac.uk;ox.ac.uk;epfl.ch;fb.com;manchester.ac.uk;amazon.com;eng.ox.ac.uk;ox.ac.uk", "position": "PhD student;PhD student;PhD student;Research Assistant;Assistant Professor;Researcher;Associate Professor;Professor", "bibtex": "@inproceedings{\nellis2023smacv,\ntitle={{SMAC}v2: An Improved Benchmark for Cooperative Multi-Agent Reinforcement Learning},\nauthor={Benjamin Ellis and Jonathan Cook and Skander Moalla and Mikayel Samvelyan and Mingfei Sun and Anuj Mahajan and Jakob Nicolaus Foerster and Shimon Whiteson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=5OjLGiJW3u}\n}", "github": "", "project": "", "reviewers": "QzEt;9mMA;sKYb", "pdf_size": 24011794, "rating": "8;8;9", "confidence": "4;3;3", "wc_summary_and_contributions": "184;153;90", "wc_strengths": "110;0;26", "wc_improvement": "14;76;46", "wc_limitations": "184;0;1", "wc_correctness": "29;0;11", "wc_clarity": "6;82;17", "wc_relation_to_prior_work": "16;0;1", "wc_documentation": "26;0;4", "wc_additional_feedback": "1;1;1", "wc_review": "570;312;197", "wc_reply_reviewers": "0;12;0", "wc_reply_authors": "511;497;171", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "rating_avg": [ 8.333333333333334, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 142.33333333333334, 39.109532796436675 ], "wc_strengths_avg": [ 45.333333333333336, 46.942044646090515 ], "wc_improvement_avg": [ 45.333333333333336, 25.315783394730033 ], "wc_limitations_avg": [ 61.666666666666664, 86.50369292052733 ], "wc_correctness_avg": [ 13.333333333333334, 11.953614051360738 ], "wc_clarity_avg": [ 35.0, 33.53605025441527 ], "wc_relation_to_prior_work_avg": [ 5.666666666666667, 7.318166133366716 ], "wc_documentation_avg": [ 10.0, 11.430952132988164 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 359.6666666666667, 155.9622461438095 ], "wc_reply_reviewers_avg": [ 4.0, 5.656854249492381 ], "wc_reply_authors_avg": [ 393.0, 157.08171970877663 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 125, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11871804540270851135&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 8, "email": "cs.ox.ac.uk;ox.ac.uk;epfl.ch;fb.com;manchester.ac.uk;amazon.com;eng.ox.ac.uk;ox.ac.uk", "author_num": 8, "aff_unique_index": "0;0;1;2;3;4;0;0", "aff_unique_norm": "University of Oxford;EPFL;Meta;University of Manchester;Amazon", "aff_unique_dep": "Department of Computer Science;;FAIR;;Amazon.com, Inc.", "aff_unique_url": "https://www.ox.ac.uk;https://www.epfl.ch;https://meta.org;https://www.manchester.ac.uk;https://www.amazon.com", "aff_unique_abbr": "Oxford;EPFL;Meta;UoM;Amazon", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Oxford;;Lausanne", "aff_country_unique_index": "0;0;1;2;0;2;0;0", "aff_country_unique": "United Kingdom;Switzerland;United States" }, { "title": "Smoothed Analysis of Sequential Probability Assignment", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72805", "id": "5R9bZlpZKj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fc30caeb45721bab13507c50199e6403-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5R9bZlpZKj", "openreview": "https://openreview.net/forum?id=5R9bZlpZKj", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72805", "video": "https://nips.cc/virtual/2023/poster/72805", "author_site": "Alankrita Bhatt, Nika Haghtalab, Abhishek Shetty", "tldr": "", "abstract": "We initiate the study of smoothed analysis for the sequential probability assignment problem with contexts. We study information-theoretically optimal minmax rates as well as a framework for algorithmic reduction involving the maximum likelihood estimator oracle. Our approach establishes a general-purpose reduction from minimax rates for sequential probability assignment for smoothed adversaries to minimax rates for transductive learning. This leads to optimal (logarithmic) fast rates for parametric classes and classes with finite VC dimension. On the algorithmic front, we develop an algorithm that efficiently taps into the MLE oracle, for general classes of functions. We show that under general conditions this algorithmic approach yields sublinear regret.", "keywords": "Online learning;Log loss;Information theory;Smoothed Analysis;Beyond worst case analysis;Oracle Efficient Online Learning", "primary_area": "", "supplementary_material": "/attachment/71ab2609d668bc92610b4745d53c708d70351db1.pdf", "author": "Alankrita Bhatt;Nika Haghtalab;Abhishek Shetty", "authorids": "~Alankrita_Bhatt1;~Nika_Haghtalab2;~Abhishek_Shetty1", "gender": ";F;M", "homepage": ";https://people.eecs.berkeley.edu/~nika/;https://ashettyv.github.io/", "dblp": ";;223/4770", "google_scholar": ";;https://scholar.google.co.in/citations?user=M-y2aLUAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Alankrita_Bhatt1;~Nika_Haghtalab2;~Abhishek_Shetty1", "aff": ";University of California, Berkeley;University of California, Berkeley", "aff_domain": ";berkeley.edu;berkeley.edu", "position": ";Assistant Professor;PhD student", "bibtex": "@inproceedings{\nbhatt2023smoothed,\ntitle={Smoothed Analysis of Sequential Probability Assignment},\nauthor={Alankrita Bhatt and Nika Haghtalab and Abhishek Shetty},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5R9bZlpZKj}\n}", "github": "", "project": "", "reviewers": "4UMy;9j8s;X6XT;oQa8;w3DK", "pdf_size": 460205, "rating": "6;6;6;7;7", "confidence": "4;2;4;3;3", "soundness": "3;3;3;4;4", "novelty": "3;3;3;3;3", "presentation": "2;3;3;4;3", "wc_summary": "227;161;309;106;258", "wc_strengths": "43;55;14;51;131", "wc_weaknesses": "329;42;170;4;260", "wc_questions": "152;80;1;94;166", "wc_limitations": "6;18;1;1;1", "wc_review": "757;356;495;256;816", "wc_reply_reviewers": "13;31;33;34;16", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 212.2, 71.53572534055972 ], "wc_strengths_avg": [ 58.8, 38.84533434017526 ], "wc_weaknesses_avg": [ 161.0, 124.02902886018256 ], "wc_questions_avg": [ 98.6, 58.8 ], "wc_limitations_avg": [ 5.4, 6.590902821313633 ], "wc_review_avg": [ 536.0, 218.9620971766575 ], "wc_reply_reviewers_avg": [ 25.4, 9.00222194794152 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2182178902359924, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11431205566283559152&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": ";berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Inconsistency, Instability, and Generalization Gap of Deep Neural Network Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72804", "id": "5SIz31OGFV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1e58b1bf9f218fcd19e4539e982752a5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5SIz31OGFV", "openreview": "https://openreview.net/forum?id=5SIz31OGFV", "poster": "/media/PosterPDFs/NeurIPS%202023/72804.png?t=1701386294.142158", "slides": "https://nips.cc/virtual/2023/poster/72804", "video": "https://nips.cc/virtual/2023/poster/72804", "author_site": "Rie Johnson, Tong Zhang", "tldr": "", "abstract": "As deep neural networks are highly expressive, it is important to find solutions with small generalization gap (the difference between the performance on the training data and unseen data). Focusing on the stochastic nature of training, we first present a theoretical analysis in which the bound of generalization gap depends on what we call inconsistency and instability of model outputs, which can be estimated on unlabeled data. Our empirical study based on this analysis shows that instability and inconsistency are strongly predictive of generalization gap in various settings. In particular, our finding indicates that inconsistency is a more reliable indicator of generalization gap than the sharpness of the loss landscape. Furthermore, we show that algorithmic reduction of inconsistency leads to superior performance. The results also provide a theoretical basis for existing methods such as co-distillation and ensemble.", "keywords": "Deep neural network training;Generalization gap;Empirical study", "primary_area": "", "supplementary_material": "/attachment/0824552ca8a614a34547b0e4f8ed7600f369fa2f.pdf", "author": "Rie Johnson;Tong Zhang", "authorids": "~Rie_Johnson1;~Tong_Zhang2", "gender": "F;M", "homepage": "http://riejohnson.com;http://tongzhang-ml.org", "dblp": "66/1605;07/4227-1", "google_scholar": ";LurWtuYAAAAJ", "orcid": ";0000-0002-5511-2558", "linkedin": ";", "or_profile": "~Rie_Johnson1;~Tong_Zhang2", "aff": "RJ Research Consulting;Hong Kong University of Science and Technology", "aff_domain": "riejohnson.com;ust.hk", "position": "Researcher;Full Professor", "bibtex": "@inproceedings{\njohnson2023inconsistency,\ntitle={Inconsistency, Instability, and Generalization Gap of Deep Neural Network Training},\nauthor={Rie Johnson and Tong Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5SIz31OGFV}\n}", "github": "", "project": "", "reviewers": "k85B;TByp;3GRr;ARdH", "pdf_size": 1171692, "rating": "4;5;6;6", "confidence": "4;4;4;3", "soundness": "4;3;3;3", "novelty": "3;2;2;3", "presentation": "4;3;3;3", "wc_summary": "71;47;102;57", "wc_strengths": "83;59;139;49", "wc_weaknesses": "188;184;58;331", "wc_questions": "295;2;54;46", "wc_limitations": "1;2;3;6", "wc_review": "638;294;356;489", "wc_reply_reviewers": "0;0;5;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 69.25, 20.740961887048538 ], "wc_strengths_avg": [ 82.5, 34.88194375317981 ], "wc_weaknesses_avg": [ 190.25, 96.62394889467103 ], "wc_questions_avg": [ 99.25, 114.73747208301218 ], "wc_limitations_avg": [ 3.0, 1.8708286933869707 ], "wc_review_avg": [ 444.25, 132.1975321252254 ], "wc_reply_reviewers_avg": [ 4.5, 5.315072906367325 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9743523268748964346&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "email": "riejohnson.com;ust.hk", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "RJ Research Consulting;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": ";https://www.ust.hk", "aff_unique_abbr": ";HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "1", "aff_country_unique": ";China" }, { "title": "Variational Inference with Gaussian Score Matching", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72803", "id": "5TTV5IZnLL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5f9453c4848b89d4d8c5d6041f5fb9ec-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5TTV5IZnLL", "openreview": "https://openreview.net/forum?id=5TTV5IZnLL", "poster": "/media/PosterPDFs/NeurIPS%202023/72803.png?t=1701114919.4650567", "slides": "https://nips.cc/virtual/2023/poster/72803", "video": "https://nips.cc/virtual/2023/poster/72803", "author_site": "Chirag Modi, Robert Gower, Charles Margossian, Yuling Yao, David Blei, Lawrence Saul", "tldr": "", "abstract": "Variational inference (VI) is a method to approximate the computationally intractable posterior distributions that arise in\n Bayesian statistics. Typically, VI fits a simple parametric distribution to be close to the target posterior, optimizing an appropriate objective such as the evidence lower bound (ELBO). In this work, we present a new approach to VI. Our method is based on the principle of score matching---namely, that if two distributions are equal then their score functions (i.e., gradients of the log density) are equal at every point on their support. With this principle, we develop score-matching VI, an iterative algorithm that seeks to match the scores between the variational approximation and the exact posterior. At each iteration, score-matching VI solves an inner optimization, one that minimally adjusts the current variational estimate to match the scores at a newly sampled value of the latent variables. We show that when the variational family is a Gaussian, this inner optimization enjoys a closed-form solution, which we call Gaussian score matching VI (GSM-VI). GSM-VI is a ``black box'' variational algorithm in that it only requires a differentiable joint distribution, and as such it can be applied to a wide class of models. We compare GSM-VI to black box variational inference (BBVI), which has similar requirements but instead optimizes the ELBO. We first study how GSM-VI behaves as a function of the problem dimensionality, the condition number of the target covariance matrix (when the target is Gaussian), and the degree of mismatch between the approximating and exact posterior distribution. We then study GSM-VI on a collection of real-world Bayesian inference problems from the posteriorDB database of datasets and models. We find that GSM-VI is faster than BBVI and equally or more accurate. Specifically, over a wide range of target posteriors, GSM-VI requires 10-100x fewer gradient evaluations than BBVI to obtain a comparable quality of approximation.", "keywords": "Variational Inference;score matching;KL projection;polyak stepsize", "primary_area": "", "supplementary_material": "/attachment/c270656b6f0e44351a6d161b4bb19bfb5536cd8a.zip", "author": "Chirag Modi;Robert M. Gower;Charles Margossian;Yuling Yao;David Blei;Lawrence K. Saul", "authorids": "cmodi@flatironinstitute.org;~Robert_M._Gower1;~Charles_Margossian1;~Yuling_Yao2;~David_Blei2;~Lawrence_K._Saul3", "gender": ";M;M;M;M;", "homepage": ";https://gowerrobert.github.io/;https://charlesm93.github.io./;https://www.yulingyao.com;http://www.cs.columbia.edu/~blei/;", "dblp": ";143/0056;;87/6597;86/1910;", "google_scholar": ";okKw87MAAAAJ;nPtLsvIAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=8OYE6iEAAAAJ;", "orcid": ";;0000-0002-3274-5619;0000-0002-0985-7233;;", "linkedin": ";;charles-margossian-3428935b/;;;", "or_profile": "cmodi@flatironinstitute.org;~Robert_M._Gower1;~Charles_Margossian1;~Yuling_Yao2;~David_Blei2;~Lawrence_K._Saul3", "aff": ";Flatiron Institute;Flatiron Institute;Flatiron Institute;Columbia University;", "aff_domain": ";simonsfoundation.org;flatironinstitute.org;flatironinstitute.org;columbia.edu;", "position": ";Researcher;Postdoc;Postdoc;Full Professor;", "bibtex": "@inproceedings{\nmodi2023variational,\ntitle={Variational Inference with Gaussian Score Matching},\nauthor={Chirag Modi and Robert M. Gower and Charles Margossian and Yuling Yao and David Blei and Lawrence K. Saul},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5TTV5IZnLL}\n}", "github": "", "project": "", "reviewers": "CVEd;DLrt;h2vB;Ciug", "pdf_size": 5144138, "rating": "3;7;7;7", "confidence": "5;3;4;4", "soundness": "2;3;4;3", "novelty": "2;3;3;4", "presentation": "4;4;3;3", "wc_summary": "291;79;124;328", "wc_strengths": "87;66;102;54", "wc_weaknesses": "853;187;70;204", "wc_questions": "71;104;363;136", "wc_limitations": "32;4;22;31", "wc_review": "1334;440;681;753", "wc_reply_reviewers": "961;42;57;24", "wc_reply_authors": "1456;0;154;16", "reply_reviewers": "3;1;1;1", "reply_authors": "4;1;2;2", "rating_avg": [ 6.0, 1.7320508075688772 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 205.5, 106.02004527446685 ], "wc_strengths_avg": [ 77.25, 18.538810641462412 ], "wc_weaknesses_avg": [ 328.5, 307.1827631883013 ], "wc_questions_avg": [ 168.5, 114.62220552755038 ], "wc_limitations_avg": [ 22.25, 11.233320969330485 ], "wc_review_avg": [ 802.0, 328.2948369986954 ], "wc_reply_reviewers_avg": [ 271.0, 398.5429713343343 ], "wc_reply_authors_avg": [ 406.5, 608.8799142688154 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15228531311745276346&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";simonsfoundation.org;flatironinstitute.org;flatironinstitute.org;columbia.edu;", "author_num": 6, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Flatiron Institute;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://flatironinstitute.org;https://www.columbia.edu", "aff_unique_abbr": "Flatiron;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Uni3DETR: Unified 3D Detection Transformer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72802", "id": "5UOYGfobhC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7d60bfd8458b67acbbaf18b892338d00-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5UOYGfobhC", "openreview": "https://openreview.net/forum?id=5UOYGfobhC", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72802", "video": "https://nips.cc/virtual/2023/poster/72802", "author_site": "Zhenyu Wang, Ya-Li Li, Xi Chen, Hengshuang Zhao, Shengjin Wang", "tldr": "", "abstract": "Existing point cloud based 3D detectors are designed for the particular scene, either indoor or outdoor ones. Because of the substantial differences in object distribution and point density within point clouds collected from various environments, coupled with the intricate nature of 3D metrics, there is still a lack of a unified network architecture that can accommodate diverse scenes. In this paper, we propose Uni3DETR, a unified 3D detector that addresses indoor and outdoor 3D detection within the same framework. Specifically, we employ the detection transformer with point-voxel interaction for object prediction, which leverages voxel features and points for cross-attention and behaves resistant to the discrepancies from data. We then propose the mixture of query points, which sufficiently exploits global information for dense small-range indoor scenes and local information for large-range sparse outdoor ones. Furthermore, our proposed decoupled IoU provides an easy-to-optimize training target for localization by disentangling the $xy$ and $z$ space. Extensive experiments validate that Uni3DETR exhibits excellent performance consistently on both indoor and outdoor 3D detection. In contrast to previous specialized detectors, which may perform well on some particular datasets but suffer a substantial degradation on different scenes, Uni3DETR demonstrates the strong generalization ability under heterogeneous conditions (Fig. 1).", "keywords": "3d object detection;unified object detection;point clouds", "primary_area": "", "supplementary_material": "", "author": "Zhenyu Wang;Ya-Li Li;Xi Chen;Hengshuang Zhao;Shengjin Wang", "authorids": "~Zhenyu_Wang3;~Ya-Li_Li1;~Xi_Chen30;~Hengshuang_Zhao2;~Shengjin_Wang1", "gender": "M;F;M;M;M", "homepage": ";;;https://hszhao.github.io;http://www.ee.tsinghua.edu.cn/publish/eeen/8316/index.html", "dblp": "22/1486-5;05/1013-1.html;;185/7848;", "google_scholar": "x_-kOjoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;INISnXkAAAAJ;4uE10I0AAAAJ;", "orcid": ";;;0000-0001-8277-2706;", "linkedin": ";;;hengshuang-zhao-347b8391/?originalSubdomain=hk;", "or_profile": "~Zhenyu_Wang3;~Ya-Li_Li1;~Xi_Chen30;~Hengshuang_Zhao2;~Shengjin_Wang1", "aff": "Tsinghua University;Tsinghua University;the University of Hong Kong, University of Hong Kong;The University of Hong Kong;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;cs.hku.hk;hku.hk;tsinghua.edu.cn", "position": "PhD student;Lecturer;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023unidetr,\ntitle={Uni3{DETR}: Unified 3D Detection Transformer},\nauthor={Zhenyu Wang and Ya-Li Li and Xi Chen and Hengshuang Zhao and Shengjin Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5UOYGfobhC}\n}", "github": "", "project": "", "reviewers": "r5Yb;iCdw;Q3d3;bu7C;TGtP", "pdf_size": 2875211, "rating": "5;5;6;6;6", "confidence": "5;5;4;4;5", "soundness": "2;3;3;2;3", "novelty": "2;2;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "30;126;40;68;152", "wc_strengths": "9;126;56;40;78", "wc_weaknesses": "11;136;135;539;125", "wc_questions": "206;64;21;4;228", "wc_limitations": "10;46;1;8;46", "wc_review": "266;498;253;659;629", "wc_reply_reviewers": "6;28;15;433;0", "wc_reply_authors": "30;30;31;861;0", "reply_reviewers": "1;1;1;2;0", "reply_authors": "2;2;2;4;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.2, 47.9432998447124 ], "wc_strengths_avg": [ 61.8, 39.193877072828606 ], "wc_weaknesses_avg": [ 189.2, 181.11035309998155 ], "wc_questions_avg": [ 104.6, 94.09272022850651 ], "wc_limitations_avg": [ 22.2, 19.661129163911212 ], "wc_review_avg": [ 461.0, 173.25472576527315 ], "wc_reply_reviewers_avg": [ 96.4, 168.56405310741673 ], "wc_reply_authors_avg": [ 190.4, 335.50594629603813 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6666666666666665, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13584276950027334381&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;tsinghua.edu.cn;cs.hku.hk;hku.hk;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Tsinghua University;University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.hku.hk", "aff_unique_abbr": "THU;HKU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Dynamic Prompt Learning: Addressing Cross-Attention Leakage for Text-Based Image Editing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72801", "id": "5UXXhVI08r", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5321b1dabcd2be188d796c21b733e8c7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5UXXhVI08r", "openreview": "https://openreview.net/forum?id=5UXXhVI08r", "poster": "/media/PosterPDFs/NeurIPS%202023/72801.png?t=1697200364.9094558", "slides": "https://nips.cc/virtual/2023/poster/72801", "video": "https://nips.cc/virtual/2023/poster/72801", "author_site": "kai wang, Fei Yang, Shiqi Yang, Shiqi Yang, Muhammad Atif Butt, Joost van de Weijer", "tldr": "", "abstract": "Large-scale text-to-image generative models have been a ground-breaking development in generative AI, with diffusion models showing their astounding ability to synthesize convincing images following an input text prompt. The goal of image editing research is to give users control over the generated images by modifying the text prompt. Current image editing techniques are susceptible to unintended modifications of regions outside the targeted area, such as on the background or on distractor objects which have some semantic or visual relationship with the targeted object. According to our experimental findings, inaccurate cross-attention maps are at the root of this problem. Based on this observation, we propose $\\textit{Dynamic Prompt Learning}$ ($DPL$) to force cross-attention maps to focus on correct $\\textit{noun}$ words in the text prompt. By updating the dynamic tokens for nouns in the textual input with the proposed leakage repairment losses, we achieve fine-grained image editing over particular objects while preventing undesired changes to other image regions. Our method $DPL$, based on the publicly available $\\textit{Stable Diffusion}$, is extensively evaluated on a wide range of images, and consistently obtains superior results both quantitatively (CLIP score, Structure-Dist) and qualitatively (on user-evaluation). We show improved prompt editing results for Word-Swap, Prompt Refinement, and Attention Re-weighting, especially for complex multi-object scenes.", "keywords": "Diffusion Models; Text-guided Image Edit; Textual Inversion; Localization", "primary_area": "", "supplementary_material": "/attachment/53af7dbdacbe7a8cdd21a13a39cf6989a8ed1f2c.pdf", "author": "Kai Wang;Fei Yang;Shiqi Yang;Muhammad Atif Butt;Joost van de Weijer", "authorids": "~Kai_Wang7;~Fei_Yang4;~Shiqi_Yang1;~Muhammad_Atif_Butt1;~Joost_van_de_Weijer5", "gender": "M;M;M;M;M", "homepage": "https://wangkai930418.github.io/;;https://www.shiqiyang.xyz/;https://www.researchgate.net/profile/Muhammad-Butt-49;http://lamp.cvc.uab.es/", "dblp": ";19/2504-4;;287/7406;67/3379", "google_scholar": "j14vd0wAAAAJ;S1gksNwAAAAJ;p27Iqt4AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.es/citations?user=Gsw2iUEAAAAJ", "orcid": "0000-0002-9605-8279;;;;0000-0002-9656-9706", "linkedin": "kai-wang-43129b1b7/;;aquila147/;aatiibutt/;", "or_profile": "~Kai_Wang7;~Fei_Yang4;~Shiqi_Yang1;~Muhammad_Atif_Butt1;~Joost_van_de_Weijer1", "aff": "Computer Vision Center, Universitat Aut\u00f3noma de Barcelona;Computer Vision Center, Universitat Aut\u00f3noma de Barcelona;Computer Vision Center Barcelona, UAB;Computer Vision Center, Universitat Aut\u00f3noma de Barcelona;Computer Vision Center, Universitat Aut\u00f3noma de Barcelona", "aff_domain": "cvc.uab.es;cvc.uab.es;cvc.uab.es;cvc.uab.es;cvc.uab.es", "position": "Postdoc;Postdoc;PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\nwang2023dynamic,\ntitle={Dynamic Prompt Learning: Addressing Cross-Attention Leakage for Text-Based Image Editing},\nauthor={Kai Wang and Fei Yang and Shiqi Yang and Muhammad Atif Butt and Joost van de Weijer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5UXXhVI08r}\n}", "github": "", "project": "", "reviewers": "XeHW;zBnt;qcZn;4SLV;m2Wr;sXMN", "pdf_size": 31034475, "rating": "4;5;5;5;6;7", "confidence": "5;4;4;5;3;4", "soundness": "3;3;3;4;3;3", "novelty": "2;2;3;3;3;3", "presentation": "2;3;3;3;3;3", "wc_summary": "108;137;52;67;15;100", "wc_strengths": "81;120;34;66;17;33", "wc_weaknesses": "183;276;14;164;52;19", "wc_questions": "6;75;1;102;5;44", "wc_limitations": "6;29;1;80;1;6", "wc_review": "384;637;102;479;90;202", "wc_reply_reviewers": "19;35;16;30;14;0", "wc_reply_authors": "62;40;26;71;24;0", "reply_reviewers": "1;1;1;1;1;0", "reply_authors": "2;2;2;2;2;1", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.166666666666667, 0.6871842709362768 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 79.83333333333333, 40.02256308078676 ], "wc_strengths_avg": [ 58.5, 34.9225333178069 ], "wc_weaknesses_avg": [ 118.0, 96.84867922004237 ], "wc_questions_avg": [ 38.833333333333336, 38.68426323742282 ], "wc_limitations_avg": [ 20.5, 28.2651139510646 ], "wc_review_avg": [ 315.6666666666667, 201.70164985828836 ], "wc_reply_reviewers_avg": [ 19.0, 11.343133018115703 ], "wc_reply_authors_avg": [ 37.166666666666664, 23.975102826798377 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.8333333333333333, 0.3726779962499649 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6002450479987811, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4682341582324740941&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "cvc.uab.es;cvc.uab.es;cvc.uab.es;cvc.uab.es;cvc.uab.es", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Universitat Aut\u00f3noma de Barcelona;Universitat Aut\u00f2noma de Barcelona", "aff_unique_dep": "Computer Vision Center;Computer Vision Center", "aff_unique_url": "https://www.uab.cat;https://www.uab.cat", "aff_unique_abbr": "UAB;UAB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Barcelona", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Spain" }, { "title": "Evaluating the Robustness of Interpretability Methods through Explanation Invariance and Equivariance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72800", "id": "5UwnKSgY6u", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e1f418450107c4a0ddc16d008d131573-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5UwnKSgY6u", "openreview": "https://openreview.net/forum?id=5UwnKSgY6u", "poster": "/media/PosterPDFs/NeurIPS%202023/72800.png?t=1699477815.8987515", "slides": "https://nips.cc/virtual/2023/poster/72800", "video": "https://nips.cc/virtual/2023/poster/72800", "author_site": "Jonathan Crabb\u00e9, Mihaela van der Schaar", "tldr": "", "abstract": "Interpretability methods are valuable only if their explanations faithfully describe the explained model. In this work, we consider neural networks whose predictions are invariant under a specific symmetry group. This includes popular architectures, ranging from convolutional to graph neural networks. Any explanation that faithfully explains this type of model needs to be in agreement with this invariance property. We formalize this intuition through the notion of explanation invariance and equivariance by leveraging the formalism from geometric deep learning. Through this rigorous formalism, we derive (1) two metrics to measure the robustness of any interpretability method with respect to the model symmetry group; (2) theoretical robustness guarantees for some popular interpretability methods and (3) a systematic approach to increase the invariance of any interpretability method with respect to a symmetry group. By empirically measuring our metrics for explanations of models associated with various modalities and symmetry groups, we derive a set of 5 guidelines to allow users and developers of interpretability methods to produce robust explanations.", "keywords": "interpretability;explainability;robustness;invariance;equivariance;geometric deep learning", "primary_area": "", "supplementary_material": "/attachment/941f61a4a0ff89dc48775b3f81a7e80b3629e92d.zip", "author": "Jonathan Crabb\u00e9;Mihaela van der Schaar", "authorids": "~Jonathan_Crabb\u00e91;~Mihaela_van_der_Schaar2", "gender": "M;F", "homepage": "https://jonathancrabbe.github.io/;https://www.vanderschaar-lab.com", "dblp": "278/8353.html;", "google_scholar": "Y_Nmd2sAAAAJ;DZ3S--MAAAAJ", "orcid": "0000-0002-0341-7712;", "linkedin": "jonathan-crabb%C3%A9-4ab5701a5/;", "or_profile": "~Jonathan_Crabb\u00e91;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;University of California, Los Angeles", "aff_domain": "cam.ac.uk;ucla.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ncrabb{\\'e}2023evaluating,\ntitle={Evaluating the Robustness of Interpretability Methods through Explanation Invariance and Equivariance},\nauthor={Jonathan Crabb{\\'e} and Mihaela van der Schaar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5UwnKSgY6u}\n}", "github": "", "project": "", "reviewers": "wVxe;u2n8;NZiw;LQbZ;7REw", "pdf_size": 1952083, "rating": "5;5;5;6;6", "confidence": "4;3;3;4;4", "soundness": "3;2;3;3;3", "novelty": "3;2;2;3;3", "presentation": "2;4;2;3;3", "wc_summary": "82;111;91;66;112", "wc_strengths": "78;57;85;81;96", "wc_weaknesses": "178;121;82;99;260", "wc_questions": "94;81;90;35;38", "wc_limitations": "67;72;2;5;25", "wc_review": "499;442;350;286;531", "wc_reply_reviewers": "87;0;0;12;0", "wc_reply_authors": "258;0;0;17;0", "reply_reviewers": "1;0;0;1;0", "reply_authors": "2;1;1;2;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 92.4, 17.53396703544295 ], "wc_strengths_avg": [ 79.4, 12.753038853543888 ], "wc_weaknesses_avg": [ 148.0, 64.69930447848725 ], "wc_questions_avg": [ 67.6, 25.757329054077015 ], "wc_limitations_avg": [ 34.2, 29.929249907072514 ], "wc_review_avg": [ 421.6, 91.5305413509611 ], "wc_reply_reviewers_avg": [ 19.8, 33.919905660246165 ], "wc_reply_authors_avg": [ 55.0, 101.71332262786424 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6666666666666665, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3426075016735188450&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "cam.ac.uk;ucla.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;UCLA", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Cambridge;Los Angeles", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Replicable Clustering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72799", "id": "5VQFAvUHcd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7bc3fe234454107149fa9d44faacaa64-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5VQFAvUHcd", "openreview": "https://openreview.net/forum?id=5VQFAvUHcd", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72799", "video": "https://nips.cc/virtual/2023/poster/72799", "author_site": "Hossein Esfandiari, Hossein Esfandiari, Amin Karbasi, Vahab Mirrokni, Grigoris Velegkas, Felix Zhou", "tldr": "", "abstract": "We design replicable algorithms in the context of statistical clustering under the recently introduced notion of replicability from Impagliazzo et al. [2022]. According to this definition, a clustering algorithm is replicable if, with high probability, its output induces the exact same partition of the sample space after two executions on different inputs drawn from the same distribution, when its internal randomness is shared across the executions. We propose such algorithms for the statistical $k$-medians, statistical $k$-means, and statistical $k$-centers problems by utilizing approximation routines for their combinatorial counterparts in a black-box manner. In particular, we demonstrate a replicable $O(1)$-approximation algorithm for statistical Euclidean $k$-medians ($k$-means) with $\\operatorname{poly}(d)$ sample complexity. We also describe an $O(1)$-approximation algorithm with an additional $O(1)$-additive error for statistical Euclidean $k$-centers, albeit with $\\exp(d)$ sample complexity. In addition, we provide experiments on synthetic distributions in 2D using the $k$-means++ implementation from sklearn as a black-box that validate our theoretical results.", "keywords": "Theory;Clustering Theory;Statistical Learning Theory;Reproducibility;Replicability", "primary_area": "", "supplementary_material": "", "author": "Hossein Esfandiari;Amin Karbasi;Vahab Mirrokni;Grigoris Velegkas;Felix Zhou", "authorids": "~Hossein_Esfandiari1;~Amin_Karbasi3;~Vahab_Mirrokni2;~Grigoris_Velegkas1;~Felix_Zhou1", "gender": ";;M;M;", "homepage": "https://sites.google.com/corp/view/hossein-esfandiari;;https://people.csail.mit.edu/mirrokni/Welcome.html;;", "dblp": "146/7746;;m/VahabSMirrokni;254/1885;", "google_scholar": "Rt8ppJsAAAAJ;;opbZfw0AAAAJ;Ty1kgP0AAAAJ;", "orcid": "0000-0001-8130-6631;;;;", "linkedin": "hossein-esfandiari-10bb0281;;;;", "or_profile": "~Hossein_Esfandiari1;~Amin_Karbasi3;~Vahab_Mirrokni2;~Grigoris_Velegkas1;~Felix_Zhou1", "aff": "Google;;Google Research;Yale University;", "aff_domain": "google.com;;google.com;yale.edu;", "position": "Researcher;;VP, Google Fellow;PhD student;", "bibtex": "@inproceedings{\nesfandiari2023replicable,\ntitle={Replicable Clustering},\nauthor={Hossein Esfandiari and Amin Karbasi and Vahab Mirrokni and Grigoris Velegkas and Felix Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5VQFAvUHcd}\n}", "github": "", "project": "", "reviewers": "nt3r;fyTj;Jm7K;t2BD;RdwG", "pdf_size": 837339, "rating": "6;6;6;7;7", "confidence": "4;3;2;2;4", "soundness": "3;4;3;4;4", "novelty": "3;3;3;4;3", "presentation": "3;3;3;3;2", "wc_summary": "200;502;138;87;88", "wc_strengths": "89;174;50;112;77", "wc_weaknesses": "17;88;133;82;114", "wc_questions": "110;89;8;18;2", "wc_limitations": "14;15;21;17;6", "wc_review": "430;868;350;316;287", "wc_reply_reviewers": "0;0;0;25;13", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 203.0, 155.12317686277572 ], "wc_strengths_avg": [ 100.4, 41.87887295522648 ], "wc_weaknesses_avg": [ 86.8, 39.42283602177804 ], "wc_questions_avg": [ 45.4, 44.960427044235246 ], "wc_limitations_avg": [ 14.6, 4.923413450036469 ], "wc_review_avg": [ 450.2, 214.31229549421562 ], "wc_reply_reviewers_avg": [ 7.6, 10.05186549850325 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=412009509080389357&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "google.com;;google.com;yale.edu;", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "Google;Yale University", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.yale.edu", "aff_unique_abbr": "Google;Yale", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Characteristic Circuits", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72798", "id": "5W7cXno10k", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6b61c278e483954fee502b49fe71cd14-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5W7cXno10k", "openreview": "https://openreview.net/forum?id=5W7cXno10k", "poster": "/media/PosterPDFs/NeurIPS%202023/72798.png?t=1701683361.6808758", "slides": "https://nips.cc/virtual/2023/poster/72798", "video": "https://nips.cc/virtual/2023/poster/72798", "author_site": "Zhongjie Yu, Martin Trapp, Kristian Kersting", "tldr": "", "abstract": "In many real-world scenarios it is crucial to be able to reliably and efficiently reason under uncertainty while capturing complex relationships in data.\n Probabilistic circuits (PCs), a prominent family of tractable probabilistic models, offer a remedy to this challenge by composing simple, tractable distributions into a high-dimensional probability distribution. \n However, learning PCs on heterogeneous data is challenging and densities of some parametric distributions are not available in closed form, limiting their potential use. \n We introduce characteristic circuits (CCs), a family of tractable probabilistic models providing a unified formalization of distributions over heterogeneous data in the spectral domain.\n The one-to-one relationship between characteristic functions and probability measures enables us to learn high-dimensional distributions on heterogeneous data domains and facilitates efficient probabilistic inference even when no closed-form density function is available. \n We show that the structure and parameters of CCs can be learned efficiently from the data and find that CCs outperform state-of-the-art density estimators for heterogeneous data domains on common benchmark data sets.", "keywords": "Characteristic Circuit;Characteristic Function;Probabilistic Circuit;Heterogeneous Data;Density Estimation", "primary_area": "", "supplementary_material": "/attachment/2446f7a8dc485c31615fd77e147c1ab4793ad044.zip", "author": "Zhongjie Yu;Martin Trapp;Kristian Kersting", "authorids": "~Zhongjie_Yu2;~Martin_Trapp2;~Kristian_Kersting1", "gender": ";M;M", "homepage": "https://ml-research.github.io/people/zyu/;https://trappmartin.github.io;http://www.ml.informatik.tu-darmstadt.de/", "dblp": "158/9396;38/1893-1;40/3793", "google_scholar": "https://scholar.google.com/citations?hl=en;GwCrZP4AAAAJ;QY-earAAAAAJ", "orcid": ";0000-0003-1725-3381;0000-0002-2873-9152", "linkedin": ";;", "or_profile": "~Zhongjie_Yu2;~Martin_Trapp2;~Kristian_Kersting1", "aff": "TU Darmstadt;University of British Columbia;TU Darmstadt", "aff_domain": "tu-darmstadt.de;ubc.ca;tu-darmstadt.de", "position": "PhD student;Visitor;Full Professor", "bibtex": "@inproceedings{\nyu2023characteristic,\ntitle={Characteristic Circuits},\nauthor={Zhongjie Yu and Martin Trapp and Kristian Kersting},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5W7cXno10k}\n}", "github": "", "project": "", "reviewers": "twZC;HuCd;pe9b;1MK1;6Se4", "pdf_size": 458330, "rating": "5;6;7;7;10", "confidence": "4;4;3;3;1", "soundness": "3;3;4;4;4", "novelty": "2;3;3;4;4", "presentation": "4;3;4;3;4", "wc_summary": "65;127;144;73;8", "wc_strengths": "77;72;126;48;8", "wc_weaknesses": "144;63;72;115;8", "wc_questions": "80;114;38;44;8", "wc_limitations": "1;60;8;43;8", "wc_review": "367;436;388;323;40", "wc_reply_reviewers": "16;50;90;38;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;0", "rating_avg": [ 7.0, 1.6733200530681511 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 83.4, 48.384294972645826 ], "wc_strengths_avg": [ 66.2, 38.58704445795246 ], "wc_weaknesses_avg": [ 80.4, 46.59871242856395 ], "wc_questions_avg": [ 56.8, 36.630042315017874 ], "wc_limitations_avg": [ 24.0, 23.22929185317538 ], "wc_review_avg": [ 310.8, 140.19614830657795 ], "wc_reply_reviewers_avg": [ 38.8, 30.89595442772403 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 0.8, 0.4 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9819805060619659, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "tu-darmstadt.de;ubc.ca;tu-darmstadt.de", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt;University of British Columbia", "aff_unique_dep": ";", "aff_unique_url": "https://www.tu-darmstadt.de;https://www.ubc.ca", "aff_unique_abbr": "TU Darmstadt;UBC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Darmstadt;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;Canada" }, { "title": "Tree of Thoughts: Deliberate Problem Solving with Large Language Models", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72797", "id": "5Xc1ecxO1h", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/271db9922b8d1f4dd7aaef84ed5ac703-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5Xc1ecxO1h", "openreview": "https://openreview.net/forum?id=5Xc1ecxO1h", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72797", "video": "https://nips.cc/virtual/2023/poster/72797", "author_site": "Shunyu Yao, Dian Yu, Jeffrey Zhao, Izhak Shafran, Tom Griffiths, Yuan Cao, Karthik Narasimhan", "tldr": "", "abstract": "Language models are increasingly being deployed for general problem solving across a wide range of tasks, but are still confined to token-level, left-to-right decision-making processes during inference. This means they can fall short in tasks that require exploration, strategic lookahead, or where initial decisions play a pivotal role. To surmount these challenges, we introduce a new framework for language model inference, Tree of Thoughts (ToT), which generalizes over the popular Chain of Thought approach to prompting language models, and enables exploration over coherent units of text (thoughts) that serve as intermediate steps toward problem solving. ToT allows LMs to perform deliberate decision making by considering multiple different reasoning paths and self-evaluating choices to decide the next course of action, as well as looking ahead or backtracking when necessary to make global choices.\nOur experiments show that ToT significantly enhances language models\u2019 problem-solving abilities on three novel tasks requiring non-trivial planning or search: Game of 24, Creative Writing, and Mini Crosswords. For instance, in Game of 24, while GPT-4 with chain-of-thought prompting only solved 4\\% of tasks, our method achieved a success rate of 74\\%. Code repo with all prompts: https://github.com/princeton-nlp/tree-of-thought-llm.", "keywords": "large language model;general problem solving;heuristic search;reasoning;planning;decision making", "primary_area": "", "supplementary_material": "", "author": "Shunyu Yao;Dian Yu;Jeffrey Zhao;Izhak Shafran;Thomas L. Griffiths;Yuan Cao;Karthik R Narasimhan", "authorids": "~Shunyu_Yao1;~Dian_Yu2;~Jeffrey_Zhao1;~Izhak_Shafran1;~Thomas_L._Griffiths1;~Yuan_Cao2;~Karthik_R_Narasimhan1", "gender": "M;M;;M;;M;M", "homepage": "https://ysymyth.github.io;https://diandyu.github.io/;https://scholar.google.com/citations?user=A38Oib8AAAAJ&hl=en;;http://cocosci.princeton.edu/tom/;;http://www.karthiknarasimhan.com", "dblp": "156/1038;136/8648;257/6596;66/3591;34/4472;52/4472-7.html;147/0322", "google_scholar": "qJBXk9cAAAAJ;https://scholar.google.com/citations?hl=en;LIG-4BcAAAAJ;;https://scholar.google.com/citations?hl=en;Q82vvqcAAAAJ;euc0GX4AAAAJ", "orcid": ";;;;;0000-0002-1267-8930;", "linkedin": ";dianbyu;;;;;", "or_profile": "~Shunyu_Yao1;~Dian_Yu2;~Jeffrey_Zhao1;~Izhak_Shafran1;~Thomas_L._Griffiths1;~Yuan_Cao2;~Karthik_R_Narasimhan1", "aff": "Princeton University;Google;Google Brain;Google;Princeton University;Google DeepMind;Princeton University", "aff_domain": "princeton.edu;google.com;google.com;google.com;princeton.edu;google.com;princeton.edu", "position": "PhD student;researcher scientist;Software Engineer;Research Scientist;Professor;Research scientist;Assistant Professor", "bibtex": "@inproceedings{\nyao2023tree,\ntitle={Tree of Thoughts: Deliberate Problem Solving with Large Language Models},\nauthor={Shunyu Yao and Dian Yu and Jeffrey Zhao and Izhak Shafran and Thomas L. Griffiths and Yuan Cao and Karthik R Narasimhan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5Xc1ecxO1h}\n}", "github": "", "project": "", "reviewers": "fAU5;sEEP;3c3c;ysyd", "pdf_size": 716715, "rating": "5;6;8;8", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "3;3;3;4", "wc_summary": "168;100;72;100", "wc_strengths": "110;204;34;98", "wc_weaknesses": "320;424;269;34", "wc_questions": "99;327;3;5", "wc_limitations": "1;5;6;13", "wc_review": "698;1060;384;250", "wc_reply_reviewers": "138;21;16;114", "wc_reply_authors": "192;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 110.0, 35.38361202590826 ], "wc_strengths_avg": [ 111.5, 60.71861329114821 ], "wc_weaknesses_avg": [ 261.75, 142.8642275028987 ], "wc_questions_avg": [ 108.5, 131.9801121381551 ], "wc_limitations_avg": [ 6.25, 4.322904116447646 ], "wc_review_avg": [ 598.0, 312.3875797787102 ], "wc_reply_reviewers_avg": [ 72.25, 54.444352324185104 ], "wc_reply_authors_avg": [ 48.0, 83.13843876330611 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2737, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4359362721511170604&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "princeton.edu;google.com;google.com;google.com;princeton.edu;google.com;princeton.edu", "author_num": 7, "aff_unique_index": "0;1;1;1;0;1;0", "aff_unique_norm": "Princeton University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.princeton.edu;https://www.google.com", "aff_unique_abbr": "Princeton;Google", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Understanding Contrastive Learning via Distributionally Robust Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72796", "id": "5XshcizH9w", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/48aaa5ea741ae8430bd58e25917d267d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5XshcizH9w", "openreview": "https://openreview.net/forum?id=5XshcizH9w", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72796", "video": "https://nips.cc/virtual/2023/poster/72796", "author_site": "Junkang Wu, Jiawei Chen, Jiancan Wu, Wentao Shi, Xiang Wang, Xiangnan He", "tldr": "", "abstract": "This study reveals the inherent tolerance of contrastive learning (CL) towards sampling bias, wherein negative samples may encompass similar semantics (\\eg labels). However, existing theories fall short in providing explanations for this phenomenon. We bridge this research gap by analyzing CL through the lens of distributionally robust optimization (DRO), yielding several key insights: (1) CL essentially conducts DRO over the negative sampling distribution, thus enabling robust performance across a variety of potential distributions and demonstrating robustness to sampling bias; (2) The design of the temperature $\\tau$ is not merely heuristic but acts as a Lagrange Coefficient, regulating the size of the potential distribution set; (3) A theoretical connection is established between DRO and mutual information, thus presenting fresh evidence for ``InfoNCE as an estimate of MI'' and a new estimation approach for $\\phi$-divergence-based generalized mutual information. We also identify CL's potential shortcomings, including over-conservatism and sensitivity to outliers, and introduce a novel Adjusted InfoNCE loss (ADNCE) to mitigate these issues. It refines potential distribution, improving performance and accelerating convergence. Extensive experiments on various domains (image, sentence, and graph) validate the effectiveness of the proposal.", "keywords": "contrastive learning;distributionally robust optimization;mutual information", "primary_area": "", "supplementary_material": "/attachment/25c02f860d21d393637576da15b74f1c9bbb5007.pdf", "author": "Junkang Wu;Jiawei Chen;Jiancan Wu;Wentao Shi;Xiang Wang;Xiangnan He", "authorids": "~Junkang_Wu1;~Jiawei_Chen6;~Jiancan_Wu1;~Wentao_Shi1;~Xiang_Wang6;~Xiangnan_He1", "gender": "M;M;M;M;M;M", "homepage": "https://junkangwu.github.io/;https://jiawei-chen.github.io/;https://wujcan.github.io/;https://swt-user.github.io/;https://github.com/xiangwang1223;http://staff.ustc.edu.cn/~hexn", "dblp": "300/3885;03/1390-7;257/4945;120/6916-2;31/2864-10;59/1007", "google_scholar": "deBwV5oAAAAJ;;z9zW1UgAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;https://scholar.google.com.sg/citations?user=X45Go24AAAAJ", "orcid": ";0000-0002-4752-2629;0000-0002-6941-5218;0000-0002-2616-6880;0000-0002-6148-6329;0000-0001-8472-7992", "linkedin": ";;;;;", "or_profile": "~Junkang_Wu1;~Jiawei_Chen6;~Jiancan_Wu1;~Wentao_Shi1;~Xiang_Wang6;~Xiangnan_He1", "aff": "University of Science and Technology of China;Zhejiang University;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;zju.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;Researcher;Postdoc;PhD student;Full Professor;Professor", "bibtex": "@inproceedings{\nwu2023understanding,\ntitle={Understanding Contrastive Learning via Distributionally Robust Optimization},\nauthor={Junkang Wu and Jiawei Chen and Jiancan Wu and Wentao Shi and Xiang Wang and Xiangnan He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5XshcizH9w}\n}", "github": "", "project": "", "reviewers": "C48x;rLbF;sD47;CyLz", "pdf_size": 876208, "rating": "5;6;6;6", "confidence": "3;3;2;4", "soundness": "2;3;3;2", "novelty": "2;3;3;4", "presentation": "3;3;3;2", "wc_summary": "142;101;69;238", "wc_strengths": "112;53;69;46", "wc_weaknesses": "200;54;68;441", "wc_questions": "236;14;194;12", "wc_limitations": "24;4;2;1", "wc_review": "714;226;402;738", "wc_reply_reviewers": "41;0;0;40", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 137.5, 63.53148825582476 ], "wc_strengths_avg": [ 70.0, 25.64176280991617 ], "wc_weaknesses_avg": [ 190.75, 155.30514318592284 ], "wc_questions_avg": [ 114.0, 102.08819716304133 ], "wc_limitations_avg": [ 7.75, 9.443913383762052 ], "wc_review_avg": [ 520.0, 215.36016344718908 ], "wc_reply_reviewers_avg": [ 20.25, 20.253086184579377 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13510316741167811298&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "ustc.edu.cn;zju.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.zju.edu.cn", "aff_unique_abbr": "USTC;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Self-Adaptive Motion Tracking against On-body Displacement of Flexible Sensors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72795", "id": "5ZMBiS1uMq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f3da4165893c2465fd7e8df453c41ffa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5ZMBiS1uMq", "openreview": "https://openreview.net/forum?id=5ZMBiS1uMq", "poster": "/media/PosterPDFs/NeurIPS%202023/72795.png?t=1698977847.1583939", "slides": "https://nips.cc/virtual/2023/poster/72795", "video": "https://nips.cc/virtual/2023/poster/72795", "author_site": "Chengxu Zuo, Fang Jiawei, Shihui Guo, Yipeng Qin", "tldr": "", "abstract": "Flexible sensors are promising for ubiquitous sensing of human status due to their flexibility and easy integration as wearable systems. However, on-body displacement of sensors is inevitable since the device cannot be firmly worn at a fixed position across different sessions. This displacement issue causes complicated patterns and significant challenges to subsequent machine learning algorithms. Our work proposes a novel self-adaptive motion tracking network to address this challenge. Our network consists of three novel components: i) a light-weight learnable Affine Transformation layer whose parameters can be tuned to efficiently adapt to unknown displacements; ii) a Fourier-encoded LSTM network for better pattern identification; iii) a novel sequence discrepancy loss equipped with auxiliary regressors for unsupervised tuning of Affine Transformation parameters.", "keywords": "motion tracking;flexible sensor;on-body displacement;deep learning;domain adaptation", "primary_area": "", "supplementary_material": "/attachment/36bc30c3567967b6baaf5b0855c53feb1304f581.pdf", "author": "Chengxu Zuo;Jiawei Fang;Shihui Guo;Yipeng Qin", "authorids": "zuochengxu@stu.xmu.edu.cn;22920202204553@stu.xmu.edu.cn;~Shihui_Guo1;~Yipeng_Qin1", "gender": ";;M;", "homepage": ";;http://www.guoshihui.net;https://profiles.cardiff.ac.uk/staff/qiny16", "dblp": ";;;169/5516", "google_scholar": ";;https://scholar.google.jp/citations?user=RPAVxiAAAAAJ;ojgWPpgAAAAJ", "orcid": ";;;0000-0002-1551-9126", "linkedin": ";;;", "or_profile": "zuochengxu@stu.xmu.edu.cn;22920202204553@stu.xmu.edu.cn;~Shihui_Guo1;~Yipeng_Qin1", "aff": ";;Xiamen University;Cardiff University", "aff_domain": ";;xmu.edu.cn;cardiff.ac.uk", "position": ";;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nzuo2023selfadaptive,\ntitle={Self-Adaptive Motion Tracking against On-body Displacement of Flexible Sensors},\nauthor={Chengxu Zuo and Jiawei Fang and Shihui Guo and Yipeng Qin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5ZMBiS1uMq}\n}", "github": "", "project": "", "reviewers": "Bo9S;fsrB;f2Et;ncj6", "pdf_size": 2127729, "rating": "4;4;6;6", "confidence": "3;4;3;5", "soundness": "3;3;3;4", "novelty": "2;2;2;2", "presentation": "4;2;3;4", "wc_summary": "58;87;65;183", "wc_strengths": "55;102;32;122", "wc_weaknesses": "557;197;59;219", "wc_questions": "1;37;6;39", "wc_limitations": "43;19;6;7", "wc_review": "714;442;168;570", "wc_reply_reviewers": "226;83;0;0", "wc_reply_authors": "715;0;0;0", "reply_reviewers": "2;1;0;0", "reply_authors": "1;0;0;0", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 98.25, 50.08679965819338 ], "wc_strengths_avg": [ 77.75, 35.90525727522364 ], "wc_weaknesses_avg": [ 258.0, 183.1966156892643 ], "wc_questions_avg": [ 20.75, 17.354754391808605 ], "wc_limitations_avg": [ 18.75, 14.905955185763842 ], "wc_review_avg": [ 473.5, 200.91976010338058 ], "wc_reply_reviewers_avg": [ 77.25, 92.32381870351767 ], "wc_reply_authors_avg": [ 178.75, 309.6040818529368 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 0.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17641073952894743837&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";;xmu.edu.cn;cardiff.ac.uk", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Xiamen University;Cardiff University", "aff_unique_dep": ";", "aff_unique_url": "https://www.xmu.edu.cn;https://www.cardiff.ac.uk", "aff_unique_abbr": "XMU;Cardiff", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Approximately Equivariant Graph Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72794", "id": "5aeyKAZr0L", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6cde6435e111671b04f4574006cf3c47-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5aeyKAZr0L", "openreview": "https://openreview.net/forum?id=5aeyKAZr0L", "poster": "/media/PosterPDFs/NeurIPS%202023/72794.png?t=1701718233.4228437", "slides": "https://nips.cc/virtual/2023/poster/72794", "video": "https://nips.cc/virtual/2023/poster/72794", "author_site": "Ningyuan Huang, Ron Levie, Soledad Villar", "tldr": "", "abstract": "Graph neural networks (GNNs) are commonly described as being permutation equivariant with respect to node relabeling in the graph. This symmetry of GNNs is often compared to the translation equivariance of Euclidean convolution neural networks (CNNs). However, these two symmetries are fundamentally different: The translation equivariance of CNNs corresponds to symmetries of the fixed domain acting on the image signals (sometimes known as active symmetries), whereas in GNNs any permutation acts on both the graph signals and the graph domain (sometimes described as passive symmetries). In this work, we focus on the active symmetries of GNNs, by considering a learning setting where signals are supported on a fixed graph. In this case, the natural symmetries of GNNs are the automorphisms of the graph. Since real-world graphs tend to be asymmetric, we relax the notion of symmetries by formalizing approximate symmetries via graph coarsening. We present a bias-variance formula that quantifies the tradeoff between the loss in expressivity and the gain in the regularity of the learned estimator, depending on the chosen symmetry group. To illustrate our approach, we conduct extensive experiments on image inpainting, traffic flow prediction, and human pose estimation with different choices of symmetries. We show theoretically and empirically that the best generalization performance can be achieved by choosing a suitably larger group than the graph automorphism, but smaller than the permutation group.", "keywords": "graph neural networks;equivariant machine learning;symmetry;generalization;statistical learning", "primary_area": "", "supplementary_material": "/attachment/ecd39c4cab77024eb5ddc74712e43a5f38031a58.pdf", "author": "Ningyuan Teresa Huang;Ron Levie;Soledad Villar", "authorids": "~Ningyuan_Teresa_Huang1;~Ron_Levie1;~Soledad_Villar2", "gender": ";;", "homepage": "https://nhuang37.github.io/;;", "dblp": "277/6356;;", "google_scholar": "cUQa7_kAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ningyuan_Teresa_Huang1;~Ron_Levie1;~Soledad_Villar2", "aff": "Johns Hopkins University;;", "aff_domain": "jhu.edu;;", "position": "PhD student;;", "bibtex": "@inproceedings{\nhuang2023approximately,\ntitle={Approximately Equivariant Graph Networks},\nauthor={Ningyuan Teresa Huang and Ron Levie and Soledad Villar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5aeyKAZr0L}\n}", "github": "", "project": "", "reviewers": "RNWs;KAcz;van5;YT16", "pdf_size": 1193255, "rating": "5;6;7;8", "confidence": "3;3;3;3", "soundness": "2;3;3;4", "novelty": "3;4;3;4", "presentation": "1;2;3;3", "wc_summary": "59;105;232;74", "wc_strengths": "31;60;56;201", "wc_weaknesses": "65;93;236;73", "wc_questions": "32;59;205;105", "wc_limitations": "1;9;14;9", "wc_review": "188;326;743;462", "wc_reply_reviewers": "24;0;48;0", "wc_reply_authors": "35;0;109;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 117.5, 68.15607089614248 ], "wc_strengths_avg": [ 87.0, 66.74953183356419 ], "wc_weaknesses_avg": [ 116.75, 69.60019755719088 ], "wc_questions_avg": [ 100.25, 65.86871412134899 ], "wc_limitations_avg": [ 8.25, 4.656984002549289 ], "wc_review_avg": [ 429.75, 205.1662435684779 ], "wc_reply_reviewers_avg": [ 18.0, 19.8997487421324 ], "wc_reply_authors_avg": [ 36.0, 44.50280890011326 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6125087678291726734&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "jhu.edu;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "The Goldilocks of Pragmatic Understanding: Fine-Tuning Strategy Matters for Implicature Resolution by LLMs", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72793", "id": "5bWW9Eop7l", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4241fec6e94221526b0a9b24828bb774-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5bWW9Eop7l", "openreview": "https://openreview.net/forum?id=5bWW9Eop7l", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72793", "video": "https://nips.cc/virtual/2023/poster/72793", "author_site": "Laura Ruis, Akbir Khan, Stella Biderman, Sara Hooker, Tim Rockt\u00e4schel, Edward Grefenstette", "tldr": "", "abstract": "Despite widespread use of LLMs as conversational agents, evaluations of performance fail to capture a crucial aspect of communication: interpreting language in context---incorporating its pragmatics. Humans interpret language using beliefs and prior knowledge about the world. For example, we intuitively understand the response \"I wore gloves\" to the question \"Did you leave fingerprints?\" as meaning \"No\". To investigate whether LLMs have the ability to make this type of inference, known as an implicature, we design a simple task and evaluate four categories of widely used state-of-the-art models. We find that, despite only evaluating on utterances that require a binary inference (yes or no), models in three of these categories perform close to random. However, LLMs instruction-tuned at the example-level perform significantly better. These results suggest that certain fine-tuning strategies are far better at inducing pragmatic understanding in models. We present our findings as the starting point for further research into evaluating how LLMs interpret language in context and to drive the development of more pragmatic and useful models of human discourse.", "keywords": "large language models;pragmatics;natural language processing;communication;conversation;implicature;language model fine-tuning", "primary_area": "", "supplementary_material": "/attachment/84714cc99c2ae53153c15c045bc7d5cfcbe771d2.zip", "author": "Laura Eline Ruis;Akbir Khan;Stella Biderman;Sara Hooker;Tim Rockt\u00e4schel;Edward Grefenstette", "authorids": "~Laura_Eline_Ruis1;~Akbir_Khan1;~Stella_Biderman1;~Sara_Hooker2;~Tim_Rockt\u00e4schel1;~Edward_Grefenstette1", "gender": "F;M;F;M;;M", "homepage": ";https://akbir.dev;http://www.stellabiderman.com;http://egrefen.com/;https://www.sarahooker.me/;http://rockt.ai", "dblp": "256/5186;;239/5641;http://dblp.uni-trier.de/pers/hd/g/Grefenstette:Edward;210/2611;43/11537", "google_scholar": "k0vrm6kAAAAJ;https://scholar.google.com/citations?hl=en;bO7H0DAAAAAJ;https://scholar.google.co.uk/citations?user=ezllEwMAAAAJ;2xy6h3sAAAAJ;https://scholar.google.co.uk/citations?user=mWBY8aIAAAAJ", "orcid": ";;0000-0001-8228-1042;;;", "linkedin": ";;stellabiderman;;;rockt/", "or_profile": "~Laura_Eline_Ruis1;~Akbir_Khan1;~Stella_Biderman1;~Edward_Grefenstette1;~Sara_Hooker1;~Tim_Rocktaeschel1", "aff": "University College London, University of London;;Booz Allen Hamilton;Cohere;Cohere For AI;Department of Computer Science, University College London", "aff_domain": "ucl.ac.uk;;boozallen.com;cohere.com;cohere.com;cs.ucl.ac.uk", "position": "PhD student;;Industry researcher;Principal Researcher;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nruis2023the,\ntitle={The Goldilocks of Pragmatic Understanding: Fine-Tuning Strategy Matters for Implicature Resolution by {LLM}s},\nauthor={Laura Eline Ruis and Akbir Khan and Stella Biderman and Sara Hooker and Tim Rockt{\\\"a}schel and Edward Grefenstette},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5bWW9Eop7l}\n}", "github": "", "project": "", "reviewers": "VB9K;FPTW;SeJL;LHbR", "pdf_size": 8783347, "rating": "5;7;7;8", "confidence": "3;4;4;4", "soundness": "3;3;4;4", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "260;107;93;209", "wc_strengths": "49;108;37;49", "wc_weaknesses": "128;242;44;196", "wc_questions": "104;103;979;17", "wc_limitations": "42;58;1;9", "wc_review": "583;618;1154;480", "wc_reply_reviewers": "711;67;405;40", "wc_reply_authors": "904;54;82;0", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;2;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 167.25, 69.80105658226098 ], "wc_strengths_avg": [ 60.75, 27.716195626384224 ], "wc_weaknesses_avg": [ 152.5, 74.6240577829965 ], "wc_questions_avg": [ 300.75, 393.177043454981 ], "wc_limitations_avg": [ 27.5, 23.371991785040485 ], "wc_review_avg": [ 708.75, 262.02230343999344 ], "wc_reply_reviewers_avg": [ 305.75, 274.63737455051523 ], "wc_reply_authors_avg": [ 260.0, 372.9798922194064 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5044824091005640314&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ucl.ac.uk;;boozallen.com;cohere.com;cohere.com;cs.ucl.ac.uk", "author_num": 6, "aff_unique_index": "0;1;2;2;0", "aff_unique_norm": "University College London;Booz Allen Hamilton;Cohere", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.boozallen.com;https://cohere.ai", "aff_unique_abbr": "UCL;BAH;", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Replicability in Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72792", "id": "5cPz5hrjy6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ec4d2e436794d1bf55ca83f5ebb31887-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5cPz5hrjy6", "openreview": "https://openreview.net/forum?id=5cPz5hrjy6", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72792", "video": "https://nips.cc/virtual/2023/poster/72792", "author_site": "Amin Karbasi, Grigoris Velegkas, Lin Yang, Felix Zhou", "tldr": "", "abstract": "We initiate the mathematical study of replicability as an \n algorithmic property in the context of reinforcement learning (RL).\n We focus on the fundamental setting of discounted tabular MDPs with access to a generative model.\n Inspired by Impagliazzo et al. [2022], we say that an RL algorithm is replicable if,\n with high probability,\n it outputs the exact same policy\n after two executions on i.i.d. samples drawn from the generator\n when its internal randomness\n is the same.\n We first provide \n an efficient $\\rho$-replicable algorithm for $(\\varepsilon, \\delta)$-optimal policy estimation\n with sample and time complexity $\\widetilde O\\left(\\frac{N^3\\cdot\\log(1/\\delta)}{(1-\\gamma)^5\\cdot\\varepsilon^2\\cdot\\rho^2}\\right)$,\n where $N$ is the number of state-action pairs.\n Next,\n for the subclass of deterministic algorithms,\n we provide a lower bound of order $\\Omega\\left(\\frac{N^3}{(1-\\gamma)^3\\cdot\\varepsilon^2\\cdot\\rho^2}\\right)$.\n Then, we study a relaxed version of replicability proposed\n by Kalavasis et al. [2023] called TV indistinguishability.\n We design a computationally efficient TV indistinguishable algorithm for policy estimation\n whose sample complexity is $\\widetilde O\\left(\\frac{N^2\\cdot\\log(1/\\delta)}{(1-\\gamma)^5\\cdot\\varepsilon^2\\cdot\\rho^2}\\right)$.\n At the cost of $\\exp(N)$ running time,\n we transform these TV indistinguishable algorithms to $\\rho$-replicable ones without increasing their sample complexity.\n Finally,\n we introduce the notion of approximate-replicability\n where we only require that two outputted policies are close\n under an appropriate statistical divergence (e.g., Renyi)\n and show an improved sample complexity of $\\widetilde O\\left(\\frac{N\\cdot\\log(1/\\delta)}{(1-\\gamma)^5\\cdot\\varepsilon^2\\cdot\\rho^2}\\right)$.", "keywords": "Theory;Reinforcement Learning Theory;Statistical Learning Theory;Reproducibility;Replicability", "primary_area": "", "supplementary_material": "", "author": "Amin Karbasi;Grigoris Velegkas;Lin Yang;Felix Zhou", "authorids": "~Amin_Karbasi3;~Grigoris_Velegkas1;~Lin_Yang12;~Felix_Zhou1", "gender": ";M;;", "homepage": ";;;", "dblp": ";254/1885;;", "google_scholar": ";Ty1kgP0AAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Amin_Karbasi3;~Grigoris_Velegkas1;~Lin_Yang12;~Felix_Zhou1", "aff": ";Yale University;;", "aff_domain": ";yale.edu;;", "position": ";PhD student;;", "bibtex": "@inproceedings{\nkarbasi2023replicability,\ntitle={Replicability in Reinforcement Learning},\nauthor={Amin Karbasi and Grigoris Velegkas and Lin Yang and Felix Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5cPz5hrjy6}\n}", "github": "", "project": "", "reviewers": "i28M;1f6r;o1wo;GbLw", "pdf_size": 572244, "rating": "2;6;6;7", "confidence": "4;4;3;4", "soundness": "4;4;3;3", "novelty": "3;3;2;3", "presentation": "4;3;3;3", "wc_summary": "109;123;84;150", "wc_strengths": "37;88;129;47", "wc_weaknesses": "198;302;87;95", "wc_questions": "57;18;20;27", "wc_limitations": "26;1;6;22", "wc_review": "427;532;326;341", "wc_reply_reviewers": "243;17;5;11", "wc_reply_authors": "810;0;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 5.25, 1.920286436967152 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 116.5, 23.8589605808803 ], "wc_strengths_avg": [ 75.25, 36.44430682562093 ], "wc_weaknesses_avg": [ 170.5, 87.63703555004585 ], "wc_questions_avg": [ 30.5, 15.660459763365825 ], "wc_limitations_avg": [ 13.75, 10.497023387608508 ], "wc_review_avg": [ 406.5, 82.06856889211606 ], "wc_reply_reviewers_avg": [ 69.0, 100.54849576199537 ], "wc_reply_authors_avg": [ 202.5, 350.74028853269766 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.22549380840084865, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6448104909246713391&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";yale.edu;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Quantification of Uncertainty with Adversarial Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72791", "id": "5eu00pcLWa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3e0b96206965f5f05b0b4550c0e73ff0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5eu00pcLWa", "openreview": "https://openreview.net/forum?id=5eu00pcLWa", "poster": "/media/PosterPDFs/NeurIPS%202023/72791.png?t=1698234902.4702597", "slides": "https://nips.cc/virtual/2023/poster/72791", "video": "https://nips.cc/virtual/2023/poster/72791", "author_site": "Kajetan Schweighofer, Lukas Aichberger, Mykyta Ielanskyi, G\u00fcnter Klambauer, Sepp Hochreiter", "tldr": "", "abstract": "Quantifying uncertainty is important for actionable predictions in real-world applications. A crucial part of predictive uncertainty quantification is the estimation of epistemic uncertainty, which is defined as an integral of the product between a divergence function and the posterior. Current methods such as Deep Ensembles or MC dropout underperform at estimating the epistemic uncertainty, since they primarily consider the posterior when sampling models. We suggest Quantification of Uncertainty with Adversarial Models (QUAM) to better estimate the epistemic uncertainty. QUAM identifies regions where the whole product under the integral is large, not just the posterior. Consequently, QUAM has lower approximation error of the epistemic uncertainty compared to previous methods. Models for which the product is large correspond to adversarial models (not adversarial examples!). Adversarial models have both a high posterior as well as a high divergence between their predictions and that of a reference model. Our experiments show that QUAM excels in capturing epistemic uncertainty for deep learning models and outperforms previous methods on challenging tasks in the vision domain.", "keywords": "uncertainty;uncertainty quantification;predictive uncertainty;epistemic uncertainty;out of distribution;mc dropout;deep ensembles;sg-mcmc;adversarial model;adversarial model search;imagenet", "primary_area": "", "supplementary_material": "/attachment/04ef697f844af2a8a47adccab00c510eb64d1a99.pdf", "author": "Kajetan Schweighofer;Lukas Aichberger;Mykyta Ielanskyi;G\u00fcnter Klambauer;Sepp Hochreiter", "authorids": "~Kajetan_Schweighofer1;~Lukas_Aichberger1;~Mykyta_Ielanskyi1;~G\u00fcnter_Klambauer1;~Sepp_Hochreiter1", "gender": "M;M;Not Specified;M;M", "homepage": ";;https://www.jku.at/en/institute-for-machine-learning/about-us/team/mykyta-ielanskyi-msc/;http://www.bioinf.jku.at/people/klambauer/;https://www.jku.at/en/institute-for-machine-learning/about-us/team/sepp-hochreiter/", "dblp": "305/7871;;;119/4499;h/SeppHochreiter.html", "google_scholar": "9KMoqxEAAAAJ;W2mpUgIAAAAJ;;https://scholar.google.at/citations?user=rb2AvxIAAAAJ;https://scholar.google.at/citations?user=tvUH3WMAAAAJ", "orcid": ";;;0000-0003-2861-5552;0000-0001-7449-2528", "linkedin": "kajetan-schweighofer-a61113202/;lukas-aichberger/;;;https://linkedin.com/in/sepp-hochreiter-41514846", "or_profile": "~Kajetan_Schweighofer1;~Lukas_Aichberger1;~Mykyta_Ielanskyi1;~G\u00fcnter_Klambauer1;~Sepp_Hochreiter1", "aff": "Johannes Kepler Universit\u00e4t Linz;Johannes Kepler Universit\u00e4t Linz;Johannes Kepler Universit\u00e4t Linz;Johannes Kepler Universit\u00e4t Linz;Johannes Kepler University Linz", "aff_domain": "jku.at;jku.at;jku.at;jku.at;jku.at", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nschweighofer2023quantification,\ntitle={Quantification of Uncertainty with Adversarial Models},\nauthor={Kajetan Schweighofer and Lukas Aichberger and Mykyta Ielanskyi and G{\\\"u}nter Klambauer and Sepp Hochreiter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5eu00pcLWa}\n}", "github": "", "project": "", "reviewers": "EcJb;rwwy;T3Ph;DS26", "pdf_size": 3851942, "rating": "3;4;7;8", "confidence": "4;3;5;3", "soundness": "3;3;4;4", "novelty": "3;2;4;4", "presentation": "1;3;4;4", "wc_summary": "64;32;173;118", "wc_strengths": "98;12;185;68", "wc_weaknesses": "653;150;260;11", "wc_questions": "84;2;203;39", "wc_limitations": "9;1;35;13", "wc_review": "908;197;856;249", "wc_reply_reviewers": "282;119;127;49", "wc_reply_authors": "494;682;252;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.5, 2.0615528128088303 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 96.75, 53.69066492417467 ], "wc_strengths_avg": [ 90.75, 62.5594717049305 ], "wc_weaknesses_avg": [ 268.5, 238.8833397288308 ], "wc_questions_avg": [ 82.0, 75.65381682373996 ], "wc_limitations_avg": [ 14.5, 12.599603168354152 ], "wc_review_avg": [ 552.5, 330.52420486251833 ], "wc_reply_reviewers_avg": [ 144.25, 85.1216041907106 ], "wc_reply_authors_avg": [ 357.0, 256.3532718730151 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.07312724241271305, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3688214997346523599&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "jku.at;jku.at;jku.at;jku.at;jku.at", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Johannes Kepler University Linz;Johannes Kepler University", "aff_unique_dep": ";", "aff_unique_url": "https://www.jku.at;https://www.jku.at", "aff_unique_abbr": "JKU;JKU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Linz", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Austria" }, { "title": "A Cross-Moment Approach for Causal Effect Estimation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72790", "id": "5gz7npbQ6Z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1f6100363156cced8633f4e89dd8ceb1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5gz7npbQ6Z", "openreview": "https://openreview.net/forum?id=5gz7npbQ6Z", "poster": "/media/PosterPDFs/NeurIPS%202023/72790.png?t=1699614606.6554997", "slides": "https://nips.cc/virtual/2023/poster/72790", "video": "https://nips.cc/virtual/2023/poster/72790", "author_site": "Yaroslav Kivva, Saber Salehkaleybar, Saber Salehkaleybar, Negar Kiyavash", "tldr": "", "abstract": "We consider the problem of estimating the causal effect of a treatment on an outcome in linear structural causal models (SCM) with latent confounders when we have access to a single proxy variable.\nSeveral methods (such as difference-in-difference (DiD) estimator or negative outcome control) have been proposed in this setting in the literature. However, these approaches require either restrictive assumptions on the data generating model or having access to at least two proxy variables.\nWe propose a method to estimate the causal effect using cross moments between the treatment, the outcome, and the proxy variable. In particular, we show that the causal effect can be identified with simple arithmetic operations on the cross moments if the latent confounder in linear SCM is non-Gaussian.\nIn this setting, DiD estimator provides an unbiased estimate only in the special case where the latent confounder has exactly the same direct causal effects on the outcomes in the pre-treatment and post-treatment phases. This translates to the common trend assumption in DiD, which we effectively relax.\nAdditionally, we provide an impossibility result that shows the causal effect cannot be identified if the observational distribution over the treatment, the outcome, and the proxy is jointly Gaussian.\n Our experiments on both synthetic and real-world datasets showcase the effectiveness\nof the proposed approach in estimating the causal effect.", "keywords": "Causal inference;Difference-in-Difference;Structural causal models;Potential outcome;Proxy learning", "primary_area": "", "supplementary_material": "/attachment/fe922c051cb9e062203566882607734bcb48bdc1.pdf", "author": "Yaroslav Kivva;Saber Salehkaleybar;Negar Kiyavash", "authorids": "~Yaroslav_Kivva1;~Saber_Salehkaleybar1;~Negar_Kiyavash1", "gender": "M;;F", "homepage": "https://people.epfl.ch/yaroslav.kivva/?lang=en;;https://people.epfl.ch/negar.kiyavash?lang=en", "dblp": "314/7107;;85/4976", "google_scholar": "QiJQSTIAAAAJ;;7tBDvOwAAAAJ", "orcid": ";;0000-0002-8545-7709", "linkedin": ";;", "or_profile": "~Yaroslav_Kivva1;~Saber_Salehkaleybar1;~Negar_Kiyavash1", "aff": "Swiss Federal Institute of Technology Lausanne (EPFL);;Swiss Federal Institute of Technology Lausanne", "aff_domain": "epfl.ch;;epfl.ch", "position": "PhD student;;Associate Professor", "bibtex": "@inproceedings{\nkivva2023a,\ntitle={A Cross-Moment Approach for Causal Effect Estimation},\nauthor={Yaroslav Kivva and Saber Salehkaleybar and Negar Kiyavash},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5gz7npbQ6Z}\n}", "github": "", "project": "", "reviewers": "p1qb;dXHi;XfVt;5Myy", "pdf_size": 447290, "rating": "5;5;6;6", "confidence": "3;4;3;4", "soundness": "3;3;4;3", "novelty": "3;2;3;3", "presentation": "3;3;4;3", "wc_summary": "117;130;85;68", "wc_strengths": "144;118;77;37", "wc_weaknesses": "39;236;124;35", "wc_questions": "2;115;50;166", "wc_limitations": "1;23;15;46", "wc_review": "303;622;351;352", "wc_reply_reviewers": "22;12;84;79", "wc_reply_authors": "0;0;76;124", "reply_reviewers": "1;1;1;2", "reply_authors": "1;1;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 100.0, 24.68805379125702 ], "wc_strengths_avg": [ 94.0, 40.663251222694925 ], "wc_weaknesses_avg": [ 108.5, 81.74503042999005 ], "wc_questions_avg": [ 83.25, 62.375375750371234 ], "wc_limitations_avg": [ 21.25, 16.315253599009733 ], "wc_review_avg": [ 407.0, 125.70003977724112 ], "wc_reply_reviewers_avg": [ 49.25, 32.491345001399985 ], "wc_reply_authors_avg": [ 50.0, 52.80151512977634 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10708916640342732676&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "epfl.ch;;epfl.ch", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Evolving Standardization for Continual Domain Generalization over Temporal Drift", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72789", "id": "5hVXbiEGXB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/459a911eb49cd2e0192055ee156d04e5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5hVXbiEGXB", "openreview": "https://openreview.net/forum?id=5hVXbiEGXB", "poster": "/media/PosterPDFs/NeurIPS%202023/72789.png?t=1699178033.6299655", "slides": "https://nips.cc/virtual/2023/poster/72789", "video": "https://nips.cc/virtual/2023/poster/72789", "author_site": "Mixue Xie, Shuang Li, Longhui Yuan, Chi Liu, Zehui Dai", "tldr": "", "abstract": "The capability of generalizing to out-of-distribution data is crucial for the deployment of machine learning models in the real world. Existing domain generalization (DG) mainly embarks on offline and discrete scenarios, where multiple source domains are simultaneously accessible and the distribution shift among domains is abrupt and violent. Nevertheless, such setting may not be universally applicable to all real-world applications, as there are cases where the data distribution gradually changes over time due to various factors, e.g., the process of aging. Additionally, as the domain constantly evolves, new domains will continually emerge. Re-training and updating models with both new and previous domains using existing DG methods can be resource-intensive and inefficient. Therefore, in this paper, we present a problem formulation for Continual Domain Generalization over Temporal Drift (CDGTD). CDGTD addresses the challenge of gradually shifting data distributions over time, where domains arrive sequentially and models can only access the data of the current domain. The goal is to generalize to unseen domains that are not too far into the future. To this end, we propose an Evolving Standardization (EvoS) method, which characterizes the evolving pattern of feature distribution and mitigates the distribution shift by standardizing features with generated statistics of corresponding domain. Specifically, inspired by the powerful ability of transformers to model sequence relations, we design a multi-scale attention module (MSAM) to learn the evolving pattern under sliding time windows of different lengths. MSAM can generate statistics of current domain based on the statistics of previous domains and the learned evolving pattern. Experiments on multiple real-world datasets including images and texts validate the efficacy of our EvoS.", "keywords": "domain generalization;sequential learning;temporal drift;feature standardization", "primary_area": "", "supplementary_material": "/attachment/06378fb224f3ed341a844bbb0b14dadb72929490.zip", "author": "Mixue Xie;Shuang Li;Longhui Yuan;Chi Harold Liu;Zehui Dai", "authorids": "~Mixue_Xie2;~Shuang_Li6;~Longhui_Yuan1;~Chi_Harold_Liu1;~Zehui_Dai1", "gender": ";M;M;M;M", "homepage": ";https://shuangli.xyz;https://yuanlonghui.github.io/;;", "dblp": "289/0077;43/6294-8;307/5087;45/4723.html;248/2693.html", "google_scholar": ";VXCiAc4AAAAJ;https://scholar.google.cz/citations?user=fVnEIZEAAAAJ;3IgFTEkAAAAJ;", "orcid": ";0000-0001-6807-9905;;;0000-0002-0406-8219", "linkedin": ";;;;", "or_profile": "~Mixue_Xie2;~Shuang_Li6;~Longhui_Yuan1;~Chi_Harold_Liu1;~Zehui_Dai1", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Alibaba Group", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;alibaba-inc.com", "position": "MS student;Associate Professor;MS student;Full Professor;Researcher", "bibtex": "@inproceedings{\nxie2023evolving,\ntitle={Evolving Standardization for Continual Domain Generalization over Temporal Drift},\nauthor={Mixue Xie and Shuang Li and Longhui Yuan and Chi Harold Liu and Zehui Dai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5hVXbiEGXB}\n}", "github": "", "project": "", "reviewers": "Nwuj;YTjU;uaS8;P1pd", "pdf_size": 2556308, "rating": "5;6;6;8", "confidence": "3;3;4;5", "soundness": "3;3;3;3", "novelty": "2;4;3;3", "presentation": "3;4;3;3", "wc_summary": "129;94;126;109", "wc_strengths": "47;51;87;172", "wc_weaknesses": "311;40;145;154", "wc_questions": "36;97;6;29", "wc_limitations": "26;1;6;11", "wc_review": "549;283;370;475", "wc_reply_reviewers": "28;27;80;31", "wc_reply_authors": "95;0;916;0", "reply_reviewers": "1;1;2;1", "reply_authors": "2;1;3;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 114.5, 14.080127840328723 ], "wc_strengths_avg": [ 89.25, 50.25124376570196 ], "wc_weaknesses_avg": [ 162.5, 96.74321681647763 ], "wc_questions_avg": [ 42.0, 33.637776383108324 ], "wc_limitations_avg": [ 11.0, 9.354143466934854 ], "wc_review_avg": [ 419.25, 101.15921856163185 ], "wc_reply_reviewers_avg": [ 41.5, 22.276669409945463 ], "wc_reply_authors_avg": [ 252.75, 384.8865904393137 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.899228803025897, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6204860265115589134&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;alibaba-inc.com", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Beijing Institute of Technology;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.bit.edu.cn/;https://www.alibaba.com", "aff_unique_abbr": "BIT;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "LinGCN: Structural Linearized Graph Convolutional Network for Homomorphically Encrypted Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72788", "id": "5loV5tVzsY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/41bd71e7bf7f9fe68f1c936940fd06bd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5loV5tVzsY", "openreview": "https://openreview.net/forum?id=5loV5tVzsY", "poster": "/media/PosterPDFs/NeurIPS%202023/72788.png?t=1697776124.1760132", "slides": "https://nips.cc/virtual/2023/poster/72788", "video": "https://nips.cc/virtual/2023/poster/72788", "author_site": "Hongwu Peng, Ran Ran, Yukui Luo, Jiahui Zhao, Shaoyi Huang, Kiran Thorat, Tong Geng, Chenghong Wang, Xiaolin Xu, Wujie Wen, Caiwen Ding", "tldr": "", "abstract": "The growth of Graph Convolution Network (GCN) model sizes has revolutionized numerous applications, surpassing human performance in areas such as personal healthcare and financial systems. The deployment of GCNs in the cloud raises privacy concerns due to potential adversarial attacks on client data. To address security concerns, Privacy-Preserving Machine Learning (PPML) using Homomorphic Encryption (HE) secures sensitive client data. However, it introduces substantial computational overhead in practical applications. To tackle those challenges, we present LinGCN, a framework designed to reduce multiplication depth and optimize the performance of HE based GCN inference. LinGCN is structured around three key elements: (1) A differentiable structural linearization algorithm, complemented by a parameterized discrete indicator function, co-trained with model weights to meet the optimization goal. This strategy promotes fine-grained node-level non-linear location selection, resulting in a model with minimized multiplication depth. (2) A compact node-wise polynomial replacement policy with a second-order trainable activation function, steered towards superior convergence by a two-level distillation approach from an all-ReLU based teacher model. (3) an enhanced HE solution that enables finer-grained operator fusion for node-wise activation functions, further reducing multiplication level consumption in HE-based inference. Our experiments on the NTU-XVIEW skeleton joint dataset reveal that LinGCN excels in latency, accuracy, and scalability for homomorphically encrypted inference, outperforming solutions such as CryptoGCN. Remarkably, LinGCN achieves a 14.2\u00d7 latency speedup relative to CryptoGCN, while preserving an inference accuracy of ~75\\% and notably reducing multiplication depth. Additionally, LinGCN proves scalable for larger models, delivering a substantial 85.78\\% accuracy with 6371s latency, a 10.47\\% accuracy improvement over CryptoGCN.", "keywords": "Privacy-Preserving Machine Learning;efficient private inference;machine learning as a service;homomorphic encryption;non-linear pruning;ST-GCN", "primary_area": "", "supplementary_material": "/attachment/7388805c9586c5c76b9d0a373d446aff6bd382b5.zip", "author": "Hongwu Peng;Ran Ran;Yukui Luo;Jiahui Zhao;Shaoyi Huang;Kiran Thorat;Tong Geng;Chenghong Wang;Xiaolin Xu;Wujie Wen;Caiwen Ding", "authorids": "~Hongwu_Peng1;~Ran_Ran2;~Yukui_Luo1;~Jiahui_Zhao1;~Shaoyi_Huang1;~Kiran_Thorat1;~Tong_Geng1;~Chenghong_Wang1;~Xiaolin_Xu3;~Wujie_Wen2;~Caiwen_Ding1", "gender": "M;M;M;F;F;M;M;M;;M;M", "homepage": "https://harveyp123.github.io/;;;;https://www.shaoyihuang.com/;;https://tonytgeng.com;https://www.lovingmage.com;;https://www.lehigh.edu/~wuw219/;https://caiwending.cse.uconn.edu/", "dblp": "292/5365;;221/0729;;292/5980.html;;188/5531;76/2435;;70/11466.html;175/2489", "google_scholar": "9P2qtQoAAAAJ;zjgo17YAAAAJ;sOWfQKEAAAAJ;;Ybk2L10AAAAJ;https://scholar.google.com/citations?view_op=list_works;1B_nk28AAAAJ;;;QKQrD1wAAAAJ;7hR0r_EAAAAJ", "orcid": ";;0000-0002-5852-4195;;;;0000-0002-3644-2922;0000-0001-7837-5791;;;0000-0003-0891-1231", "linkedin": "hongwu-peng-374893119/;ranran0523/;;;;;;;;;caiwen-ding-47144489/", "or_profile": "~Hongwu_Peng1;~Ran_Ran2;~Yukui_Luo1;~Jiahui_Zhao1;~Shaoyi_Huang1;~Kiran_Thorat1;~Tong_Geng1;~Chenghong_Wang1;~Xiaolin_Xu3;~Wujie_Wen2;~Caiwen_Ding1", "aff": "University of Connecticut;Lehigh University;Northeastern University;University of Connecticut;University of Connecticut;University of Connecticut;University of Rochester;Duke University;;North Carolina State University;University of Connecticut", "aff_domain": "uconn.edu;lehigh.edu;neu.edu;uconn.edu;uconn.edu;uconn.edu;rochester.edu;duke.edu;;ncsu.edu;uconn.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;Assistant Professor;PhD student;;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\npeng2023lingcn,\ntitle={Lin{GCN}: Structural Linearized Graph Convolutional Network for Homomorphically Encrypted Inference},\nauthor={Hongwu Peng and Ran Ran and Yukui Luo and Jiahui Zhao and Shaoyi Huang and Kiran Thorat and Tong Geng and Chenghong Wang and Xiaolin Xu and Wujie Wen and Caiwen Ding},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5loV5tVzsY}\n}", "github": "", "project": "", "reviewers": "KG27;6Lak;Y6MP;iVPr", "pdf_size": 1180105, "rating": "5;5;6;7", "confidence": "5;3;2;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "62;50;42;65", "wc_strengths": "31;19;62;70", "wc_weaknesses": "96;53;62;257", "wc_questions": "53;31;5;116", "wc_limitations": "1;76;5;7", "wc_review": "243;229;176;515", "wc_reply_reviewers": "99;0;15;299", "wc_reply_authors": "379;0;0;414", "reply_reviewers": "1;0;1;2", "reply_authors": "3;1;1;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 54.75, 9.256754290786809 ], "wc_strengths_avg": [ 45.5, 21.12463017427761 ], "wc_weaknesses_avg": [ 117.0, 82.40449016892222 ], "wc_questions_avg": [ 51.25, 41.06321346412139 ], "wc_limitations_avg": [ 22.25, 31.107675901616307 ], "wc_review_avg": [ 290.75, 131.86048498318212 ], "wc_reply_reviewers_avg": [ 103.25, 119.14775490960793 ], "wc_reply_authors_avg": [ 198.25, 198.63581625678688 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.1348399724926484, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1300149667688344668&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "email": "uconn.edu;lehigh.edu;neu.edu;uconn.edu;uconn.edu;uconn.edu;rochester.edu;duke.edu;;ncsu.edu;uconn.edu", "author_num": 11, "aff_unique_index": "0;1;2;0;0;0;3;4;5;0", "aff_unique_norm": "University of Connecticut;Lehigh University;Northeastern University;University of Rochester;Duke University;North Carolina State University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.uconn.edu;https://www.lehigh.edu;https://www.northeastern.edu;https://www.rochester.edu;https://www.duke.edu;https://www.ncsu.edu", "aff_unique_abbr": "UConn;Lehigh;NEU;U of R;Duke;NCSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Cross-Scale MAE: A Tale of Multiscale Exploitation in Remote Sensing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72787", "id": "5oEVdOd6TV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3fadcbd0437f4717723ff3f6f7216800-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5oEVdOd6TV", "openreview": "https://openreview.net/forum?id=5oEVdOd6TV", "poster": "/media/PosterPDFs/NeurIPS%202023/72787.png?t=1699831040.6518404", "slides": "https://nips.cc/virtual/2023/poster/72787", "video": "https://nips.cc/virtual/2023/poster/72787", "author_site": "Maofeng Tang, Andrei Cozma, Konstantinos Georgiou, Hairong Qi", "tldr": "", "abstract": "Remote sensing images present unique challenges to image analysis due to the extensive geographic coverage, hardware limitations, and misaligned multi-scale images. This paper revisits the classical multi-scale representation learning prob- lem but under the general framework of self-supervised learning for remote sensing image understanding. We present Cross-Scale MAE, a self-supervised model built upon the Masked Auto-Encoder (MAE). During pre-training, Cross-Scale MAE employs scale augmentation techniques and enforces cross-scale consistency constraints through both contrastive and generative losses to ensure consistent and meaningful representations well-suited for a wide range of downstream tasks. Further, our implementation leverages the xFormers library to accelerate network pre-training on a single GPU while maintaining the quality of learned represen- tations. Experimental evaluations demonstrate that Cross-Scale MAE exhibits superior performance compared to standard MAE and other state-of-the-art remote sensing MAE methods.", "keywords": "Remote Sensting;Self-Supervised Learning", "primary_area": "", "supplementary_material": "/attachment/0278958f7f1825de92813d67633323ada871f06a.pdf", "author": "Maofeng Tang;Andrei Liviu Cozma;Konstantinos Georgiou;Hairong Qi", "authorids": "~Maofeng_Tang1;~Andrei_Liviu_Cozma1;~Konstantinos_Georgiou1;~Hairong_Qi1", "gender": "M;M;M;F", "homepage": ";https://www.andreicozma.com;https://gkos.tech;http://www.eecs.utk.edu/people/faculty/hqi/", "dblp": ";;77/4787;00/6984-1.html", "google_scholar": ";;b___QQ8AAAAJ;https://scholar.google.com.tw/citations?user=GqnNG-kAAAAJ", "orcid": "0000-0002-1596-3534;0009-0001-3670-0813;0000-0002-9567-8679;", "linkedin": ";andreicozma1/;konstantinos-georgiou/;hairong-qi-6a67602/", "or_profile": "~Maofeng_Tang1;~Andrei_Liviu_Cozma1;~Konstantinos_Georgiou1;~Hairong_Qi1", "aff": "University of Tennessee, Knoxville;University of Tennessee, Knoxville;University of Tennessee, Knoxville;University of Tennessee, Knoxville", "aff_domain": "utk.edu;eecs.utk.edu;utk.edu;vols.utk.edu", "position": "PhD student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\ntang2023crossscale,\ntitle={Cross-Scale {MAE}: A Tale of Multiscale Exploitation in Remote Sensing},\nauthor={Maofeng Tang and Andrei Liviu Cozma and Konstantinos Georgiou and Hairong Qi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5oEVdOd6TV}\n}", "github": "", "project": "", "reviewers": "TztZ;UGSM;CVyA;LSpN;14Vd", "pdf_size": 1127780, "rating": "3;3;3;6;7", "confidence": "4;5;4;3;4", "soundness": "3;2;2;3;3", "novelty": "2;2;2;3;3", "presentation": "2;2;3;3;3", "wc_summary": "41;80;92;43;116", "wc_strengths": "27;38;61;49;213", "wc_weaknesses": "162;29;361;15;138", "wc_questions": "39;177;87;65;83", "wc_limitations": "1;2;18;1;52", "wc_review": "270;326;619;173;602", "wc_reply_reviewers": "223;51;0;15;125", "wc_reply_authors": "0;0;0;0;198", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 4.4, 1.7435595774162693 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 74.4, 28.890136725187023 ], "wc_strengths_avg": [ 77.6, 68.63701625216527 ], "wc_weaknesses_avg": [ 141.0, 124.31411826498227 ], "wc_questions_avg": [ 90.2, 46.589269150739 ], "wc_limitations_avg": [ 14.8, 19.69162258423617 ], "wc_review_avg": [ 398.0, 180.36074961032958 ], "wc_reply_reviewers_avg": [ 82.8, 82.3417269675588 ], "wc_reply_authors_avg": [ 39.6, 79.2 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5441071875825088, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17209489033772814479&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "utk.edu;eecs.utk.edu;utk.edu;vols.utk.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Tennessee", "aff_unique_dep": "", "aff_unique_url": "https://www.utk.edu", "aff_unique_abbr": "UT", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Knoxville", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Compression with Bayesian Implicit Neural Representations", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72786", "id": "5otj6QKUMI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/060b2af0081a460f7f466f7f174d9052-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5otj6QKUMI", "openreview": "https://openreview.net/forum?id=5otj6QKUMI", "poster": "/media/PosterPDFs/NeurIPS%202023/72786.png?t=1702246728.6969635", "slides": "https://nips.cc/virtual/2023/poster/72786", "video": "https://nips.cc/virtual/2023/poster/72786", "author_site": "Zongyu Guo, Gergely Flamich, Jiajun He, Zhibo Chen, Jos\u00e9 Miguel Hern\u00e1ndez-Lobato", "tldr": "", "abstract": "Many common types of data can be represented as functions that map coordinates to signal values, such as pixel locations to RGB values in the case of an image. Based on this view, data can be compressed by overfitting a compact neural network to its functional representation and then encoding the network weights. However, most current solutions for this are inefficient, as quantization to low-bit precision substantially degrades the reconstruction quality. To address this issue, we propose overfitting variational Bayesian neural networks to the data and compressing an approximate posterior weight sample using relative entropy coding instead of quantizing and entropy coding it. This strategy enables direct optimization of the rate-distortion performance by minimizing the $\\beta$-ELBO, and target different rate-distortion trade-offs for a given network architecture by adjusting $\\beta$. Moreover, we introduce an iterative algorithm for learning prior weight distributions and employ a progressive refinement process for the variational posterior that significantly enhances performance. Experiments show that our method achieves strong performance on image and audio compression while retaining simplicity.", "keywords": "Neural Compression;Implicit Neural Representation;Relative Entropy Coding;Bayesian Neural Network", "primary_area": "", "supplementary_material": "/attachment/99fd911c04b8fb388d79b97f0bb050d554daee2b.pdf", "author": "Zongyu Guo;Gergely Flamich;Jiajun He;Zhibo Chen;Jos\u00e9 Miguel Hern\u00e1ndez-Lobato", "authorids": "~Zongyu_Guo1;~Gergely_Flamich1;~Jiajun_He3;~Zhibo_Chen1;~Jos\u00e9_Miguel_Hern\u00e1ndez-Lobato1", "gender": "M;M;M;M;", "homepage": ";https://gergely-flamich.github.io/;;https://faculty.ustc.edu.cn/chenzhibo;http://jmhl.org", "dblp": "247/4138;187/9709;205/5074-3;54/6561.html;40/6058", "google_scholar": "paus9RMAAAAJ;4Iw9TH8AAAAJ;;1ayDJfsAAAAJ;BEBccCQAAAAJ", "orcid": ";0009-0009-9831-7455;;;0000-0001-7610-949X", "linkedin": ";gergely-flamich-142773102;jiajun-he-76a59526b/;;", "or_profile": "~Zongyu_Guo1;~Gergely_Flamich1;~Jiajun_He3;~Zhibo_Chen1;~Jose_Miguel_Hernandez_Lobato1", "aff": "University of Science and Technology of China;University of Cambridge;University of Cambridge;University of Science and Technology of China;University of Cambridge", "aff_domain": "ustc.edu.cn;cam.ac.uk;cam.ac.uk;ustc.edu.cn;cam.ac.uk", "position": "PhD student;PhD student;Mphil Student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nguo2023compression,\ntitle={Compression with Bayesian Implicit Neural Representations},\nauthor={Zongyu Guo and Gergely Flamich and Jiajun He and Zhibo Chen and Jos{\\'e} Miguel Hern{\\'a}ndez-Lobato},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5otj6QKUMI}\n}", "github": "", "project": "", "reviewers": "WLVL;5R2p;7dCG;rYFn;D8f2", "pdf_size": 4437364, "rating": "5;6;6;7;8", "confidence": "2;4;4;4;3", "soundness": "3;3;3;2;4", "novelty": "2;3;3;3;4", "presentation": "3;2;3;3;4", "wc_summary": "113;67;75;72;229", "wc_strengths": "51;47;95;41;138", "wc_weaknesses": "163;59;141;333;132", "wc_questions": "32;50;2;2;52", "wc_limitations": "7;6;9;4;2", "wc_review": "366;229;322;452;553", "wc_reply_reviewers": "76;5;11;115;71", "wc_reply_authors": "0;0;0;483;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 111.2, 61.12413598571354 ], "wc_strengths_avg": [ 74.4, 37.10309960097673 ], "wc_weaknesses_avg": [ 165.6, 90.6942115021681 ], "wc_questions_avg": [ 27.6, 22.03270296627266 ], "wc_limitations_avg": [ 5.6, 2.4166091947189146 ], "wc_review_avg": [ 384.4, 110.7945847052102 ], "wc_reply_reviewers_avg": [ 55.6, 41.78803656550521 ], "wc_reply_authors_avg": [ 96.6, 193.2 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.2941742027072762, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1964917135141078284&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ustc.edu.cn;cam.ac.uk;cam.ac.uk;ustc.edu.cn;cam.ac.uk", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "University of Science and Technology of China;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.cam.ac.uk", "aff_unique_abbr": "USTC;Cambridge", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Pairwise Causality Guided Transformers for Event Sequences", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72785", "id": "5q8xovQF7r", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/91b047c5f5bd41ef56bfaf4ad0bd19e3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5q8xovQF7r", "openreview": "https://openreview.net/forum?id=5q8xovQF7r", "poster": "/media/PosterPDFs/NeurIPS%202023/72785.png?t=1701491132.276253", "slides": "https://nips.cc/virtual/2023/poster/72785", "video": "https://nips.cc/virtual/2023/poster/72785", "author_site": "Xiao Shou, Debarun Bhattacharjya, Tian Gao, Dharmashankar Subramanian, Oktie Hassanzadeh, Kristin P Bennett", "tldr": "", "abstract": "Although pairwise causal relations have been extensively studied in observational longitudinal analyses across many disciplines, incorporating knowledge of causal pairs into deep learning models for temporal event sequences remains largely unexplored. In this paper, we propose a novel approach for enhancing the performance of transformer-based models in multivariate event sequences by injecting pairwise qualitative causal knowledge such as `event Z amplifies future occurrences of event Y'. We establish a new framework for causal inference in temporal event sequences using a transformer architecture, providing a theoretical justification for our approach, and show how to obtain unbiased estimates of the proposed measure. Experimental results demonstrate that our approach outperforms several state-of-the-art models in terms of prediction accuracy by effectively leveraging knowledge about causal pairs. \nWe also consider a unique application where we extract knowledge around sequences of societal events by generating them from a large language model, and demonstrate how a causal knowledge graph can help with event prediction in such sequences. \nOverall, our framework offers a practical means of improving the performance of transformer-based models in multivariate event sequences by explicitly exploiting pairwise causal information.", "keywords": "temporal event sequences;causal inference;transformer;causal knowledge graph", "primary_area": "", "supplementary_material": "/attachment/f8d005f02423f66e0cf3f7c702992a1ef6f2bfa9.pdf", "author": "Xiao Shou;Debarun Bhattacharjya;Tian Gao;Dharmashankar Subramanian;Oktie Hassanzadeh;Kristin Bennett", "authorids": "~Xiao_Shou2;~Debarun_Bhattacharjya1;~Tian_Gao1;~Dharmashankar_Subramanian1;~Oktie_Hassanzadeh1;~Kristin_Bennett1", "gender": "M;M;;M;M;F", "homepage": "https://www.ecs.baylor.edu/person/dr-xiao-shou;https://researcher.watson.ibm.com/researcher/view.php?person=us-debarunb;https://sites.google.com/view/tiangao/home;http://researcher.watson.ibm.com/researcher/view.php?person=us-dharmash;http://oktie.com;https://science.rpi.edu/mathematical-sciences/faculty/kristin-bennett", "dblp": ";98/5604;;;h/OktieHassanzadeh;24/4209.html", "google_scholar": "https://scholar.google.com/citations?hl=en;pwfVt-MAAAAJ;5rweipAAAAAJ;j54RzcEAAAAJ;w6-6MLkAAAAJ;GX4ZXSkAAAAJ", "orcid": ";;0000-0002-0337-6682;;0000-0001-5307-9857;0000-0002-8782-105X", "linkedin": ";;;;oktie/;kristin-bennett-b337637/", "or_profile": "~Xiao_Shou2;~Debarun_Bhattacharjya1;~Tian_Gao1;~Dharmashankar_Subramanian1;~Oktie_Hassanzadeh1;~Kristin_Bennett1", "aff": "Rensselaer Polytechnic Institute;International Business Machines;Rensselaer Polytechnic Institute;International Business Machines;International Business Machines;Rensselaer Polytechnic Institute", "aff_domain": "rpi.edu;ibm.com;rpi.edu;ibm.com;ibm.com;rpi.edu", "position": "PhD student;Researcher;PhD student;Principal Researcher;Research Staff Member;Full Professor", "bibtex": "@inproceedings{\nshou2023pairwise,\ntitle={Pairwise Causality Guided Transformers for Event Sequences},\nauthor={Xiao Shou and Debarun Bhattacharjya and Tian Gao and Dharmashankar Subramanian and Oktie Hassanzadeh and Kristin Bennett},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5q8xovQF7r}\n}", "github": "", "project": "", "reviewers": "vJUR;Bmuq;fbyg;vaYz;WQuf", "pdf_size": 695059, "rating": "5;5;5;6;7", "confidence": "3;4;2;2;3", "soundness": "3;3;1;3;3", "novelty": "2;3;3;3;3", "presentation": "1;2;4;2;3", "wc_summary": "59;68;43;80;45", "wc_strengths": "57;44;21;50;22", "wc_weaknesses": "194;340;22;67;3", "wc_questions": "17;31;114;4;159", "wc_limitations": "36;10;1;12;1", "wc_review": "363;493;201;213;230", "wc_reply_reviewers": "16;272;33;19;25", "wc_reply_authors": "0;571;0;0;0", "reply_reviewers": "1;2;1;1;1", "reply_authors": "1;3;1;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 1.019803902718557 ], "wc_summary_avg": [ 59.0, 13.957077057894322 ], "wc_strengths_avg": [ 38.8, 14.715977711317723 ], "wc_weaknesses_avg": [ 125.2, 126.36676778330607 ], "wc_questions_avg": [ 65.0, 60.69266842049375 ], "wc_limitations_avg": [ 12.0, 12.821856339859686 ], "wc_review_avg": [ 300.0, 112.68362791461766 ], "wc_reply_reviewers_avg": [ 73.0, 99.66945369570358 ], "wc_reply_authors_avg": [ 114.2, 228.40000000000003 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.1336306209562122, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13959068318768150310&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "rpi.edu;ibm.com;rpi.edu;ibm.com;ibm.com;rpi.edu", "author_num": 6, "aff_unique_index": "0;1;0;1;1;0", "aff_unique_norm": "Rensselaer Polytechnic Institute;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.rpi.edu;https://www.ibm.com", "aff_unique_abbr": "RPI;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Composing Parameter-Efficient Modules with Arithmetic Operation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72784", "id": "5r3e27I9Gy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/299a08ee712d4752c890938da99a77c6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5r3e27I9Gy", "openreview": "https://openreview.net/forum?id=5r3e27I9Gy", "poster": "/media/PosterPDFs/NeurIPS%202023/72784.png?t=1702982170.112022", "slides": "https://nips.cc/virtual/2023/poster/72784", "video": "https://nips.cc/virtual/2023/poster/72784", "author_site": "Jinghan Zhang, shiqi chen, Junteng Liu, Junxian He", "tldr": "", "abstract": "As an efficient alternative to conventional full fine-tuning, parameter-efficient fine-tuning (PEFT) is becoming the prevailing method to adapt pretrained language models. In PEFT, a lightweight module is learned on each dataset while the underlying pretrained language model remains unchanged, resulting in multiple compact modules representing diverse skills when applied to various domains and tasks. In this paper, we propose to compose these parameter-efficient modules through linear arithmetic operations in the weight space, thereby integrating different module capabilities. Specifically, we first define an addition and negation operator for the module, and then further compose these two basic operators to perform flexible arithmetic. Our approach requires no additional training and enables highly flexible module composition. We apply different arithmetic operations to compose the parameter-efficient modules for (1) distribution generalization, (2) multi-tasking, (3) detoxifying, and (4) domain transfer. Additionally, we extend our approach to detoxify Alpaca-LoRA, the latest instruction-tuned large language model based on LLaMA. Empirical results demonstrate that our approach produces new and effective parameter-efficient modules that significantly outperform existing ones across all settings.", "keywords": "Parameter-efficient fine-tuning;module composition", "primary_area": "", "supplementary_material": "/attachment/f91f7c53c3197b15c6cf43f48b14ee6eb10e0ead.zip", "author": "Jinghan Zhang;Shiqi Chen;Junteng Liu;Junxian He", "authorids": "~Jinghan_Zhang1;~Shiqi_Chen3;~Junteng_Liu2;~Junxian_He1", "gender": "F;F;M;M", "homepage": "https://jinghan23.github.io/;;https://vicent0205.github.io/;https://jxhe.github.io", "dblp": ";;347/3273;188/6127.html", "google_scholar": "HqF5d38AAAAJ;4Tg7zOMAAAAJ;;BIFGeoUAAAAJ", "orcid": "0009-0002-1489-6162;;;", "linkedin": ";;;", "or_profile": "~Jinghan_Zhang1;~Shiqi_Chen3;~Junteng_Liu2;~Junxian_He1", "aff": "Southeast University;City University of Hong Kong;Shanghai Jiaotong University;Hong Kong University of Science and Technology", "aff_domain": "seu.edu.cn;cityu.edu.hk;sjtu.edu.cn;ust.hk", "position": "Undergrad student;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023composing,\ntitle={Composing Parameter-Efficient Modules with Arithmetic Operation},\nauthor={Jinghan Zhang and Shiqi Chen and Junteng Liu and Junxian He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5r3e27I9Gy}\n}", "github": "", "project": "", "reviewers": "hxSN;ZZbE;PsaT;wusG;EMdb", "pdf_size": 1820350, "rating": "3;4;7;7;7", "confidence": "4;4;3;3;5", "soundness": "3;2;3;3;3", "novelty": "2;3;3;2;2", "presentation": "3;3;3;3;3", "wc_summary": "47;68;66;100;36", "wc_strengths": "21;89;46;20;23", "wc_weaknesses": "229;104;346;58;190", "wc_questions": "1;24;73;19;6", "wc_limitations": "1;9;9;157;1", "wc_review": "299;294;540;354;256", "wc_reply_reviewers": "0;106;162;11;12", "wc_reply_authors": "0;68;28;0;27", "reply_reviewers": "0;2;1;1;1", "reply_authors": "1;2;2;1;2", "rating_avg": [ 5.6, 1.7435595774162693 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 63.4, 21.850400453996258 ], "wc_strengths_avg": [ 39.8, 26.40757467091592 ], "wc_weaknesses_avg": [ 185.4, 100.5695779050504 ], "wc_questions_avg": [ 24.6, 25.601562452319197 ], "wc_limitations_avg": [ 35.4, 60.9051721941577 ], "wc_review_avg": [ 348.6, 100.67690897122338 ], "wc_reply_reviewers_avg": [ 58.2, 64.51170436440196 ], "wc_reply_authors_avg": [ 24.6, 24.944738924270187 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2145987688197381, "gs_citation": 131, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3914196277753196289&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "seu.edu.cn;cityu.edu.hk;sjtu.edu.cn;ust.hk", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Southeast University;City University of Hong Kong;Shanghai Jiao Tong University;Hong Kong University of Science and Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.seu.edu.cn/;https://www.cityu.edu.hk;https://www.sjtu.edu.cn;https://www.ust.hk", "aff_unique_abbr": "SEU;CityU;SJTU;HKUST", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Module-wise Training of Neural Networks via the Minimizing Movement Scheme", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72783", "id": "5sV53leJCv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a6a1e4c756d700d9aedcc1896a7e6fb0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5sV53leJCv", "openreview": "https://openreview.net/forum?id=5sV53leJCv", "poster": "/media/PosterPDFs/NeurIPS%202023/72783.png?t=1695826907.4662569", "slides": "https://nips.cc/virtual/2023/poster/72783", "video": "https://nips.cc/virtual/2023/poster/72783", "author_site": "Skander Karkar, Ibrahim Ayed, Emmanuel de B\u00e9zenac, Patrick Gallinari", "tldr": "", "abstract": "Greedy layer-wise or module-wise training of neural networks is compelling in constrained and on-device settings where memory is limited, as it circumvents a number of problems of end-to-end back-propagation. However, it suffers from a stagnation problem, whereby early layers overfit and deeper layers stop increasing the test accuracy after a certain depth. We propose to solve this issue by introducing a simple module-wise regularization inspired by the minimizing movement scheme for gradient flows in distribution space. We call the method TRGL for Transport Regularized Greedy Learning and study it theoretically, proving that it leads to greedy modules that are regular and that progressively solve the task. Experimentally, we show improved accuracy of module-wise training of various architectures such as ResNets, Transformers and VGG, when our regularization is added, superior to that of other module-wise training methods and often to end-to-end training, with as much as 60% less memory usage.", "keywords": "Deep learning;greedy layerwise training;memory;optimal transport", "primary_area": "", "supplementary_material": "/attachment/9735552e944577cc0b34d9f41328092d2fc56912.pdf", "author": "Skander Karkar;Ibrahim Ayed;Emmanuel de Bezenac;patrick gallinari", "authorids": "~Skander_Karkar1;~Ibrahim_Ayed1;~Emmanuel_de_Bezenac2;~patrick_gallinari1", "gender": "M;;M;M", "homepage": "https://github.com/skander-karkar;;;", "dblp": "274/7334.html;236/6062;;g/PatrickGallinari", "google_scholar": "QfeddpUAAAAJ;;https://scholar.google.fr/citations?user=KvZw5gYAAAAJ;rFaxB20AAAAJ", "orcid": ";;;", "linkedin": "skander-karkar-8b11b2266/;;;", "or_profile": "~Skander_Karkar1;~Ibrahim_Ayed1;~Emmanuel_de_Bezenac2;~patrick_gallinari1", "aff": "Sorbonne University;LIP6;ETHZ - ETH Zurich;Sorbonne Universite", "aff_domain": "sorbonne-universite.fr;lip6.fr;ethz.ch;sorbonne-universite.fr", "position": "PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nkarkar2023modulewise,\ntitle={Module-wise Training of Neural Networks via the Minimizing Movement Scheme},\nauthor={Skander Karkar and Ibrahim Ayed and Emmanuel de Bezenac and patrick gallinari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5sV53leJCv}\n}", "github": "", "project": "", "reviewers": "N1H7;Ad5i;X5WU;U1e3", "pdf_size": 1022329, "rating": "5;5;6;6", "confidence": "4;3;4;3", "soundness": "3;2;3;2", "novelty": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "155;68;60;20", "wc_strengths": "22;75;83;36", "wc_weaknesses": "105;88;76;166", "wc_questions": "33;2;1;41", "wc_limitations": "4;26;1;3", "wc_review": "319;259;221;266", "wc_reply_reviewers": "0;0;0;22", "wc_reply_authors": "20;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 75.75, 49.23603863025538 ], "wc_strengths_avg": [ 54.0, 25.64176280991617 ], "wc_weaknesses_avg": [ 108.75, 34.62206666275137 ], "wc_questions_avg": [ 19.25, 17.977416388346796 ], "wc_limitations_avg": [ 8.5, 10.161200716450788 ], "wc_review_avg": [ 266.25, 34.93833854092092 ], "wc_reply_reviewers_avg": [ 5.5, 9.526279441628825 ], "wc_reply_authors_avg": [ 5.0, 8.660254037844387 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8066079693923042433&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 18, "email": "sorbonne-universite.fr;lip6.fr;ethz.ch;sorbonne-universite.fr", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Sorbonne University;Laboratoire d'Informatique de Paris 6;ETH Zurich", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sorbonne.universite.fr;http://www.lip6.fr;https://www.ethz.ch", "aff_unique_abbr": "Sorbonne;LIP6;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "France;Switzerland" }, { "title": "StreamNet: Memory-Efficient Streaming Tiny Deep Learning Inference on the Microcontroller", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72782", "id": "5t5u8PQa2T", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7526508f11bbe0a123af62b9dab1fbe1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5t5u8PQa2T", "openreview": "https://openreview.net/forum?id=5t5u8PQa2T", "poster": "/media/PosterPDFs/NeurIPS%202023/72782.png?t=1699504621.5232909", "slides": "https://nips.cc/virtual/2023/poster/72782", "video": "https://nips.cc/virtual/2023/poster/72782", "author_site": "Hong-Sheng Zheng, Yu-Yuan Liu, Chen-Fong Hsu, Tsung Tai Yeh", "tldr": "", "abstract": "With the emerging Tiny Machine Learning (TinyML) inference applications, there is a growing interest when deploying TinyML models on the low-power Microcontroller Unit (MCU). However, deploying TinyML models on MCUs reveals several challenges due to the MCU\u2019s resource constraints, such as small flash memory, tight SRAM memory budget, and slow CPU performance. Unlike typical layer-wise inference, patch-based inference reduces the peak usage of SRAM memory on MCUs by saving small patches rather than the entire tensor in the SRAM memory. However, the processing of patch-based inference tremendously increases the amount of MACs against the layer-wise method. Thus, this notoriously computational overhead makes patch-based inference undesirable on MCUs. This work designs StreamNet that employs the stream buffer to eliminate the redundant computation of patch-based inference. StreamNet uses 1D and 2D streaming processing and provides an parameter selection algorithm that automatically improve the performance of patch-based inference with minimal requirements on the MCU\u2019s SRAM memory space. In 10 TinyML models, StreamNet-2D achieves a geometric mean of 7.3X speedup and saves 81\\% of MACs over the state-of-the-art patch-based inference.", "keywords": "TinyML models;edge AIs;Microcontroller", "primary_area": "", "supplementary_material": "/attachment/b0d7687284214671ae5dbeef38bd8ad34a098fa9.pdf", "author": "Hong Sheng Zheng;Yu-Yuan Liu;Chen-Fong Hsu;Tsung Tai Yeh", "authorids": "~Hong_Sheng_Zheng1;yyliu.cs11@nycu.edu.tw;fonghsu.cs08@nycu.edu.tw;~Tsung_Tai_Yeh1", "gender": "M;;;M", "homepage": "https://rniczh.github.io/;;;https://www.cs.nycu.edu.tw/~ttyeh", "dblp": "369/7074;;;02/8471", "google_scholar": "https://scholar.google.com/citations?hl=en;;;", "orcid": "0009-0004-6408-9996;;;", "linkedin": "hongsheng-zheng-616896117/;;;", "or_profile": "~Hong_Sheng_Zheng1;yyliu.cs11@nycu.edu.tw;fonghsu.cs08@nycu.edu.tw;~Tsung_Tai_Yeh1", "aff": "MediaTek Inc.;;;National Yang-Ming Chiao Tung University", "aff_domain": "mediatek.com;;;nycu.edu.tw", "position": "Compiler Engineer;;;Assistant Professor", "bibtex": "@inproceedings{\nzheng2023streamnet,\ntitle={StreamNet: Memory-Efficient Streaming Tiny Deep Learning Inference on the Microcontroller},\nauthor={Hong Sheng Zheng and Yu-Yuan Liu and Chen-Fong Hsu and Tsung Tai Yeh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5t5u8PQa2T}\n}", "github": "", "project": "", "reviewers": "TjRD;xxe7;XmQf;bp8j", "pdf_size": 4227524, "rating": "6;6;6;6", "confidence": "3;4;4;5", "soundness": "3;2;4;2", "novelty": "3;2;3;3", "presentation": "3;3;4;4", "wc_summary": "82;97;74;74", "wc_strengths": "42;105;86;38", "wc_weaknesses": "94;165;18;39", "wc_questions": "52;60;46;80", "wc_limitations": "18;1;2;1", "wc_review": "288;428;226;232", "wc_reply_reviewers": "51;21;7;0", "wc_reply_authors": "29;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 81.75, 9.390819985496474 ], "wc_strengths_avg": [ 67.75, 28.586491565073178 ], "wc_weaknesses_avg": [ 79.0, 56.88145567757562 ], "wc_questions_avg": [ 59.5, 12.835497652993435 ], "wc_limitations_avg": [ 5.5, 7.22841614740048 ], "wc_review_avg": [ 293.5, 81.33111335768126 ], "wc_reply_reviewers_avg": [ 19.75, 19.562400159489634 ], "wc_reply_authors_avg": [ 7.25, 12.55736835487436 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16774287689367020250&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "mediatek.com;;;nycu.edu.tw", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "MediaTek Inc.;National Yang-Ming Chiao Tung University", "aff_unique_dep": ";", "aff_unique_url": "https://www.mediatek.com/;https://www.nycu.edu.tw", "aff_unique_abbr": "MediaTek;NYCU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "5uIL1E8h1E", "title": "Residual Scheduling: A New Reinforcement Learning Approach to Solving Job Shop Scheduling Problem", "track": "main", "status": "Reject", "tldr": "", "abstract": "Job-shop scheduling problem (JSP) is a mathematical optimization problem widely used in industries like manufacturing, and flexible JSP (FJSP) is also a common variant. Since they are NP-hard, it is intractable to find the optimal solution for all cases within reasonable times. Thus, it becomes important to develop efficient heuristics to solve JSP/FJSP. A kind of method of solving scheduling problems is construction heuristics, which constructs scheduling solutions via heuristics. Recently, many methods for construction heuristics leverage deep reinforcement learning (DRL) with graph neural networks (GNN). In this paper, we propose a new approach, named residual scheduling, to solving JSP/FJSP. In this new approach, we remove irrelevant machines and jobs such as those finished, such that the states include the remaining (or relevant) machines and jobs only. Our experiments show that our approach reaches state-of-the-art (SOTA) among all known construction heuristics on most well-known open JSP and FJSP benchmarks. In addition, we also observe that even though our model is trained for scheduling problems of smaller sizes, our method still performs well for scheduling problems of large sizes. Interestingly in our experiments, our approach even reaches zero gap for 49 among 50 JSP instances whose job numbers are more than 150 on 20 machines.", "keywords": "reinforcement learning;job-shop scheduling problem;flexible job-shop scheduling problem;graph neural network", "primary_area": "", "supplementary_material": "/attachment/ab3be8ad2bfb96dba43aa0a8f5d3652bcd98fdf9.zip", "author": "Kuo-Hao Ho;Ruei Yu Jheng;Ji-Han Wu;Fan Chiang;Yen-Chi Chen;Yuan-Yu Wu;I-Chen Wu", "authorids": "~Kuo-Hao_Ho1;~Ruei_Yu_Jheng2;~Ji-Han_Wu1;lalwin6404@gmail.com;zxkyjimmy@gmail.com;warren03wu@gmail.com;~I-Chen_Wu3", "gender": "M;;Not Specified;;;;M", "homepage": ";;https://github.com/Jerry-Github-Cloud;;;;https://cgilab.nctu.edu.tw/~icwu/", "dblp": "160/0321;;;;;;06/983", "google_scholar": "https://scholar.google.com.tw/citations?user=LfH40wMAAAAJ;;;;;;", "orcid": ";;;;;;0000-0003-2535-0587", "linkedin": ";;;;;;", "or_profile": "~Kuo-Hao_Ho1;~Ruei_Yu_Jheng2;~Ji-Han_Wu1;lalwin6404@gmail.com;zxkyjimmy@gmail.com;warren03wu@gmail.com;~I-Chen_Wu3", "aff": "National Chiao Tung University;;;;;;National Yang Ming Chiao Tung University", "aff_domain": "nctu.edu.tw;;;;;;nycu.edu.tw", "position": "PhD student;;;;;;Full Professor", "bibtex": "@misc{\nho2023residual,\ntitle={Residual Scheduling: A New Reinforcement Learning Approach to Solving Job Shop Scheduling Problem},\nauthor={Kuo-Hao Ho and Ruei Yu Jheng and Ji-Han Wu and Fan Chiang and Yen-Chi Chen and Yuan-Yu Wu and I-Chen Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=5uIL1E8h1E}\n}", "github": "", "project": "", "reviewers": "xp5N;PVMB;e9mT;8Fub", "site": "https://openreview.net/forum?id=5uIL1E8h1E", "pdf_size": 789047, "rating": "5;5;5;6", "confidence": "4;4;5;5", "soundness": "2;3;3;2", "novelty": "2;3;2;2", "presentation": "2;3;2;2", "wc_summary": "60;74;75;125", "wc_strengths": "57;20;39;51", "wc_weaknesses": "268;90;319;135", "wc_questions": "79;47;107;345", "wc_limitations": "9;34;50;6", "wc_review": "473;265;590;662", "wc_reply_reviewers": "87;111;14;14", "wc_reply_authors": "440;186;41;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 83.5, 24.682990094394967 ], "wc_strengths_avg": [ 41.75, 14.13108276106258 ], "wc_weaknesses_avg": [ 203.0, 93.64026911537579 ], "wc_questions_avg": [ 144.5, 117.68920936092654 ], "wc_limitations_avg": [ 24.75, 18.18481509391833 ], "wc_review_avg": [ 497.5, 150.22732774032826 ], "wc_reply_reviewers_avg": [ 56.5, 43.33878170876519 ], "wc_reply_authors_avg": [ 166.75, 172.2314939260529 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3453821861063077785&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1", "aff_unique_norm": "National Chiao Tung University;National Yang Ming Chiao Tung University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nctu.edu.tw;https://www.nycu.edu.tw", "aff_unique_abbr": "NCTU;NYCU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Amortized Reparametrization: Efficient and Scalable Variational Inference for Latent SDEs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72781", "id": "5yZiP9fZNv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f72d4fdfd5eb425cd81df9fe6272a533-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5yZiP9fZNv", "openreview": "https://openreview.net/forum?id=5yZiP9fZNv", "poster": "/media/PosterPDFs/NeurIPS%202023/72781.png?t=1701907205.0025733", "slides": "https://nips.cc/virtual/2023/poster/72781", "video": "https://nips.cc/virtual/2023/poster/72781", "author_site": "Kevin Course, Prasanth Nair", "tldr": "", "abstract": "We consider the problem of inferring latent stochastic differential equations (SDEs) with a time and memory cost that scales independently with the amount of data, the total length of the time series, and the stiffness of the approximate differential equations. This is in stark contrast to typical methods for inferring latent differential equations which, despite their constant memory cost, have a time complexity that is heavily dependent on the stiffness of the approximate differential equation. We achieve this computational advancement by removing the need to solve differential equations when approximating gradients using a novel amortization strategy coupled with a recently derived reparametrization of expectations under linear SDEs. We show that, in practice, this allows us to achieve similar performance to methods based on adjoint sensitivities with more than an order of magnitude fewer evaluations of the model in training.", "keywords": "variational inference;differential equations;dynamical systems;neural ordinary differential equations;latent stochastic differential equations", "primary_area": "", "supplementary_material": "", "author": "Kevin Course;Prasanth B. Nair", "authorids": "~Kevin_Course1;~Prasanth_B._Nair1", "gender": ";M", "homepage": ";http://arrow.utias.utoronto.ca/~pbn/index.html", "dblp": ";n/PrasanthBNair", "google_scholar": ";6MXRhVIAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Kevin_Course1;~Prasanth_B._Nair1", "aff": ";Toronto University", "aff_domain": ";utoronto.ca", "position": ";Full Professor", "bibtex": "@inproceedings{\ncourse2023amortized,\ntitle={Amortized Reparametrization: Efficient and Scalable Variational Inference for Latent {SDE}s},\nauthor={Kevin Course and Prasanth B. Nair},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5yZiP9fZNv}\n}", "github": "", "project": "", "reviewers": "8z7R;ayS4;6Z23;oJgT", "pdf_size": 929345, "rating": "3;5;6;7", "confidence": "4;2;3;2", "soundness": "2;3;3;3", "novelty": "2;2;3;4", "presentation": "3;3;3;3", "wc_summary": "137;137;178;159", "wc_strengths": "252;97;84;56", "wc_weaknesses": "824;408;107;88", "wc_questions": "90;222;154;42", "wc_limitations": "149;14;50;16", "wc_review": "1452;878;573;361", "wc_reply_reviewers": "415;64;0;0", "wc_reply_authors": "130;0;0;0", "reply_reviewers": "2;1;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 152.75, 17.122718826167766 ], "wc_strengths_avg": [ 122.25, 76.36221251378197 ], "wc_weaknesses_avg": [ 356.75, 298.1403822027469 ], "wc_questions_avg": [ 127.0, 67.72739475278819 ], "wc_limitations_avg": [ 57.25, 54.869732093386425 ], "wc_review_avg": [ 816.0, 410.61356528979894 ], "wc_reply_reviewers_avg": [ 119.75, 172.45343574426113 ], "wc_reply_authors_avg": [ 32.5, 56.29165124598851 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7644707871564383, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9847647698953573890&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";utoronto.ca", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "Many-body Approximation for Non-negative Tensors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72780", "id": "5yedZXV7wt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ea94957d81b1c1caf87ef5319fa6b467-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5yedZXV7wt", "openreview": "https://openreview.net/forum?id=5yedZXV7wt", "poster": "/media/PosterPDFs/NeurIPS%202023/72780.png?t=1698249769.865172", "slides": "https://nips.cc/virtual/2023/poster/72780", "video": "https://nips.cc/virtual/2023/poster/72780", "author_site": "KAZU GHALAMKARI, Mahito Sugiyama, Yoshinobu Kawahara", "tldr": "", "abstract": "We present an alternative approach to decompose non-negative tensors, called many-body approximation. Traditional decomposition methods assume low-rankness in the representation, resulting in difficulties in global optimization and target rank selection. We avoid these problems by energy-based modeling of tensors, where a tensor and its mode correspond to a probability distribution and a random variable, respectively. Our model can be globally optimized in terms of the KL divergence minimization by taking the interaction between variables (that is, modes), into account that can be tuned more intuitively than ranks. Furthermore, we visualize interactions between modes as tensor networks and reveal a nontrivial relationship between many-body approximation and low-rank approximation. We demonstrate the effectiveness of our approach in tensor completion and approximation.", "keywords": "Tensor decomposition;Energy based model;Tensor networks", "primary_area": "", "supplementary_material": "/attachment/836e500de68287fba98085dea3ca85c4bbdda184.zip", "author": "Kazu Ghalamkari;Mahito Sugiyama;Yoshinobu Kawahara", "authorids": "~Kazu_Ghalamkari1;~Mahito_Sugiyama1;~Yoshinobu_Kawahara1", "gender": "M;M;M", "homepage": "https://gkazunii.github.io;https://mahito.nii.ac.jp/;https://mls.ist.osaka-u.ac.jp/en/~kawahara/", "dblp": "267/1396.html;05/8421;09/4700", "google_scholar": "Oob5hCQAAAAJ;qLlRvTkAAAAJ;B8sRETUAAAAJ", "orcid": "0000-0002-4779-2856;0000-0001-5907-9831;0000-0001-7789-4709", "linkedin": "kazu-ghalamkari-880372134/;;", "or_profile": "~Kazu_Ghalamkari1;~Mahito_Sugiyama1;~Yoshinobu_Kawahara1", "aff": "National Institute of Informatics;National Institute of Informatics;Kyushu University", "aff_domain": "nii.ac.jp;nii.ac.jp;imi.kyushu-u.ac.jp", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nghalamkari2023manybody,\ntitle={Many-body Approximation for Non-negative Tensors},\nauthor={Kazu Ghalamkari and Mahito Sugiyama and Yoshinobu Kawahara},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5yedZXV7wt}\n}", "github": "", "project": "", "reviewers": "59t8;wHyr;gp7Q;xQBv;oVis", "pdf_size": 3723734, "rating": "5;6;6;6;7", "confidence": "2;3;4;3;3", "soundness": "3;2;3;3;3", "novelty": "2;3;4;2;3", "presentation": "2;3;3;3;3", "wc_summary": "21;105;164;31;77", "wc_strengths": "17;38;71;63;74", "wc_weaknesses": "82;123;206;116;37", "wc_questions": "13;131;45;3;26", "wc_limitations": "15;34;1;38;4", "wc_review": "148;431;487;251;218", "wc_reply_reviewers": "43;211;350;19;21", "wc_reply_authors": "113;26;790;12;12", "reply_reviewers": "1;1;2;1;1", "reply_authors": "2;2;3;2;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 79.6, 52.0983684965278 ], "wc_strengths_avg": [ 52.6, 21.84124538573751 ], "wc_weaknesses_avg": [ 112.8, 55.66830336915254 ], "wc_questions_avg": [ 43.6, 45.90250537824706 ], "wc_limitations_avg": [ 18.4, 15.16047492659778 ], "wc_review_avg": [ 307.0, 129.7027370567021 ], "wc_reply_reviewers_avg": [ 128.8, 131.7002657552368 ], "wc_reply_authors_avg": [ 190.6, 302.0566834221683 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.49999999999999994, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11065193394602665263&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "nii.ac.jp;nii.ac.jp;imi.kyushu-u.ac.jp", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "National Institute of Informatics;Kyushu University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nii.ac.jp/;https://www.kyushu-u.ac.jp", "aff_unique_abbr": "NII;Kyushu U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "title": "No Representation Rules Them All in Category Discovery", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72779", "id": "5ytypAqAsR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3f52ab4322e967efd312c38a68d07f01-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5ytypAqAsR", "openreview": "https://openreview.net/forum?id=5ytypAqAsR", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72779", "video": "https://nips.cc/virtual/2023/poster/72779", "author_site": "Sagar Vaze, Andrea Vedaldi, Andrew Zisserman", "tldr": "", "abstract": "In this paper we tackle the problem of Generalized Category Discovery (GCD). Specifically, given a dataset with labelled and unlabelled images, the task is to cluster all images in the unlabelled subset, whether or not they belong to the labelled categories. Our first contribution is to recognise that most existing GCD benchmarks only contain labels for a single clustering of the data, making it difficult to ascertain whether models are leveraging the available labels to solve the GCD task, or simply solving an unsupervised clustering problem. As such, we present a synthetic dataset, named 'Clevr-4', for category discovery. Clevr-4 contains four equally valid partitions of the data, i.e based on object 'shape', 'texture' or 'color' or 'count'. To solve the task, models are required to extrapolate the taxonomy specified by labelled set, rather than simply latch onto a single natural grouping of the data. We use this dataset to demonstrate the limitations of unsupervised clustering in the GCD setting, showing that even very strong unsupervised models fail on Clevr-4. We further use Clevr-4 to examine the weaknesses of existing GCD algorithms, and propose a new method which addresses these shortcomings, leveraging consistent findings from the representation learning literature to do so. Our simple solution, which is based on `Mean Teachers' and termed $\\mu$GCD, substantially outperforms implemented baselines on Clevr-4. Finally, when we transfer these findings to real data on the challenging Semantic Shift Benchmark suite, we find that $\\mu$GCD outperforms all prior work, setting a new state-of-the-art.", "keywords": "Category discovery;semi-supervised learning;self-supervised learning;classification", "primary_area": "", "supplementary_material": "", "author": "Sagar Vaze;Andrea Vedaldi;Andrew Zisserman", "authorids": "~Sagar_Vaze1;~Andrea_Vedaldi1;~Andrew_Zisserman1", "gender": "M;M;", "homepage": "https://sgvaze.github.io/;https://www.robots.ox.ac.uk/~vedaldi/;", "dblp": "226/4705;99/2825;", "google_scholar": "lvuOknUAAAAJ;bRT7t28AAAAJ;", "orcid": "0000-0003-2920-9345;0000-0003-1374-2858;", "linkedin": "sagar-vaze-2356ab171/;;", "or_profile": "~Sagar_Vaze1;~Andrea_Vedaldi1;~Andrew_Zisserman1", "aff": "University of Oxford;Meta;", "aff_domain": "ox.ac.uk;meta.com;", "position": "PhD student;Researcher;", "bibtex": "@inproceedings{\nvaze2023no,\ntitle={No Representation Rules Them All in Category Discovery},\nauthor={Sagar Vaze and Andrea Vedaldi and Andrew Zisserman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5ytypAqAsR}\n}", "github": "", "project": "", "reviewers": "dGiY;V8PZ;s1RT;aYTM", "pdf_size": 12522531, "rating": "5;5;6;7", "confidence": "5;3;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;4", "wc_summary": "77;80;107;102", "wc_strengths": "33;22;82;155", "wc_weaknesses": "144;42;93;31", "wc_questions": "115;66;25;72", "wc_limitations": "52;9;35;3", "wc_review": "421;219;342;363", "wc_reply_reviewers": "505;74;50;139", "wc_reply_authors": "141;186;0;0", "reply_reviewers": "3;1;1;2", "reply_authors": "2;2;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 91.5, 13.162446581088183 ], "wc_strengths_avg": [ 73.0, 52.454742397613586 ], "wc_weaknesses_avg": [ 77.5, 44.95831402532795 ], "wc_questions_avg": [ 69.5, 31.894356867634123 ], "wc_limitations_avg": [ 24.75, 19.803724397193573 ], "wc_review_avg": [ 336.25, 73.61852688012712 ], "wc_reply_reviewers_avg": [ 192.0, 183.62053262094628 ], "wc_reply_authors_avg": [ 81.75, 83.28377693164498 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5298211148253403060&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ox.ac.uk;meta.com;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Oxford;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.ox.ac.uk;https://meta.com", "aff_unique_abbr": "Oxford;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "MosaicBERT: A Bidirectional Encoder Optimized for Fast Pretraining", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72778", "id": "5zipcfLC2Z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/095a6917768712b7ccc61acbeecad1d8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=5zipcfLC2Z", "openreview": "https://openreview.net/forum?id=5zipcfLC2Z", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72778", "video": "https://nips.cc/virtual/2023/poster/72778", "author_site": "Jacob Portes, Alexander Trott, Sam Havens, DANIEL KING, Abhinav Venigalla, Moin Nadeem, Nikhil Sardana, Daya Khudia, Jonathan Frankle", "tldr": "", "abstract": "Although BERT-style encoder models are heavily used in NLP research, many researchers do not pretrain their own BERTs from scratch due to the high cost of training. In the past half-decade since BERT first rose to prominence, many advances have been made with other transformer architectures and training configurations that have yet to be systematically incorporated into BERT. Here, we introduce MosaicBERT, a BERT-style encoder architecture and training recipe that is empirically optimized for fast pretraining. This efficient architecture incorporates FlashAttention, Attention with Linear Biases (ALiBi), Gated Linear Units (GLU), a module to dynamically remove padded tokens, and low precision LayerNorm into the classic transformer encoder block. The training recipe includes a 30% masking ratio for the Masked Language Modeling (MLM) objective, bfloat16 precision, and vocabulary size optimized for GPU throughput, in addition to best-practices from RoBERTa and other encoder models. When pretrained from scratch on the C4 dataset, this base model achieves a downstream average GLUE (dev) score of 79.6 in 1.13 hours on 8 A100 80 GB GPUs at a cost of roughly $20. We plot extensive accuracy vs. pretraining speed Pareto curves and show that MosaicBERT base and large are consistently Pareto optimal when compared to a competitive BERT base and large. This empirical speed up in pretraining enables researchers and engineers to pretrain custom BERT-style models at low cost instead of finetune on existing generic models. We open source our model weights and code.", "keywords": "BERT;Pretraining;Efficiency;FlashAttention;ALiBi", "primary_area": "", "supplementary_material": "/attachment/68d6bbdf0fe327c3e08f60019dbc9048976407ab.pdf", "author": "Jacob Portes;Alexander R Trott;Sam Havens;DANIEL KING;Abhinav Venigalla;Moin Nadeem;Nikhil Sardana;Daya Khudia;Jonathan Frankle", "authorids": "~Jacob_Portes1;~Alexander_R_Trott1;~Sam_Havens1;~DANIEL_KING4;~Abhinav_Venigalla1;~Moin_Nadeem1;~Nikhil_Sardana1;daya@mosaicml.com;~Jonathan_Frankle1", "gender": "M;M;M;;;;;;M", "homepage": "https://jacobfulano.github.io;;https://www.samuelhavens.com/;;;;;;http://www.jfrankle.com", "dblp": "322/0370;;;98/11100;;183/6394;;;169/9776", "google_scholar": "CzH4cSEAAAAJ;rB4bvV0AAAAJ;;;;;;;MlLJapIAAAAJ", "orcid": "0000-0003-3102-012X;;;;;;;;", "linkedin": "jacob-portes-82804062/;;samhavens;;;;;;jfrankle/", "or_profile": "~Jacob_Portes1;~Alexander_R_Trott1;~Sam_Havens1;~DANIEL_KING4;~Abhinav_Venigalla1;~Moin_Nadeem1;~Nikhil_Sardana1;daya@mosaicml.com;~Jonathan_Frankle1", "aff": "MosaicML;MosaicML;MosaicML;MosaicML;;;;;Massachusetts Institute of Technology", "aff_domain": "mosaicml.com;mosaicml.com;mosaicml.com;mosaicml.com;;;;;mit.edu", "position": "Researcher;Researcher;Researcher;Researcher;;;;;PhD student", "bibtex": "@inproceedings{\nportes2023mosaicbert,\ntitle={Mosaic{BERT}: A Bidirectional Encoder Optimized for Fast Pretraining},\nauthor={Jacob Portes and Alexander R Trott and Sam Havens and DANIEL KING and Abhinav Venigalla and Moin Nadeem and Nikhil Sardana and Daya Khudia and Jonathan Frankle},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=5zipcfLC2Z}\n}", "github": "", "project": "", "reviewers": "WcGd;xi9s;VemN;NjCS;Ss7L", "pdf_size": 8806747, "rating": "3;6;7;7;7", "confidence": "3;5;4;3;4", "soundness": "2;3;3;4;3", "novelty": "2;2;4;2;3", "presentation": "2;2;4;4;3", "wc_summary": "122;67;89;244;64", "wc_strengths": "33;79;25;192;82", "wc_weaknesses": "75;430;237;190;46", "wc_questions": "39;304;49;71;32", "wc_limitations": "9;17;24;55;12", "wc_review": "278;897;424;752;236", "wc_reply_reviewers": "0;109;0;0;0", "wc_reply_authors": "0;51;0;0;0", "reply_reviewers": "0;1;0;0;0", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.0, 1.5491933384829668 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 117.2, 66.7035231453332 ], "wc_strengths_avg": [ 82.2, 59.596644200827285 ], "wc_weaknesses_avg": [ 195.6, 136.83069830999185 ], "wc_questions_avg": [ 99.0, 103.34215016149025 ], "wc_limitations_avg": [ 23.4, 16.596385148579795 ], "wc_review_avg": [ 517.4, 262.43292476364314 ], "wc_reply_reviewers_avg": [ 21.8, 43.599999999999994 ], "wc_reply_authors_avg": [ 10.2, 20.4 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.3450327796711771, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15903142390914851076&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mosaicml.com;mosaicml.com;mosaicml.com;mosaicml.com;;;;;mit.edu", "author_num": 9, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "MosaicML;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.mosaicml.com;https://web.mit.edu", "aff_unique_abbr": "MosaicML;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Counterfactual Conservative Q Learning for Offline Multi-agent Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72777", "id": "62zmO4mv8X", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f3f2ff9579ba6deeb89caa2fe1f0b99c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=62zmO4mv8X", "openreview": "https://openreview.net/forum?id=62zmO4mv8X", "poster": "/media/PosterPDFs/NeurIPS%202023/72777.png?t=1701420838.5169265", "slides": "https://nips.cc/virtual/2023/poster/72777", "video": "https://nips.cc/virtual/2023/poster/72777", "author_site": "Jianzhun Shao, Yun Qu, Chen Chen, Hongchang Zhang, Xiangyang Ji", "tldr": "", "abstract": "Offline multi-agent reinforcement learning is challenging due to the coupling effect of both distribution shift issue common in offline setting and the high dimension issue common in multi-agent setting, making the action out-of-distribution (OOD) and value overestimation phenomenon excessively severe. To mitigate this problem, we propose a novel multi-agent offline RL algorithm, named CounterFactual Conservative Q-Learning (CFCQL) to conduct conservative value estimation. Rather than regarding all the agents as a high dimensional single one and directly applying single agent conservative methods to it, CFCQL calculates conservative regularization for each agent separately in a counterfactual way and then linearly combines them to realize an overall conservative value estimation. We prove that it still enjoys the underestimation property and the performance guarantee as those single agent conservative methods do, but the induced regularization and safe policy improvement bound are independent of the agent number, which is therefore theoretically superior to the direct treatment referred to above, especially when the agent number is large. We further conduct experiments on four environments including both discrete and continuous action settings on both existing and our man-made datasets, demonstrating that CFCQL outperforms existing methods on most datasets and even with a remarkable margin on some of them.", "keywords": "multi-agent reinforcement learning;offline reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/aaf97e3915df2c8baac380409ce66329d9b4d1b3.pdf", "author": "Jianzhun Shao;Yun Qu;Chen Chen;Hongchang Zhang;Xiangyang Ji", "authorids": "~Jianzhun_Shao1;~Yun_Qu2;~Chen_Chen3;~Hongchang_Zhang1;~Xiangyang_Ji1", "gender": "M;M;F;M;", "homepage": "https://github.com/qyz55;https://github.com/cloud-qu;;;", "dblp": "263/2309;80/10774-2;;https://dblp.uni-trier.de/pid/36/9348;", "google_scholar": ";l9Ky9goAAAAJ;l8_g4oAAAAAJ;;", "orcid": ";0009-0000-1803-8435;;;", "linkedin": ";;;;", "or_profile": "~Jianzhun_Shao1;~Yun_Qu2;~Chen_Chen3;~Hongchang_Zhang1;~Xiangyang_Ji1", "aff": "Tsinghua University;Tencent TiMi Studio;Qiyuan Lab;Tsinghua University;", "aff_domain": "tsinghua.edu.cn;tencent.com;qiyuanlab.com;tsinghua.edu.cn;", "position": "PhD student;Intern;Researcher;PhD student;", "bibtex": "@inproceedings{\nshao2023counterfactual,\ntitle={Counterfactual Conservative Q Learning for Offline Multi-agent Reinforcement Learning},\nauthor={Jianzhun Shao and Yun Qu and Chen Chen and Hongchang Zhang and Xiangyang Ji},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=62zmO4mv8X}\n}", "github": "", "project": "", "reviewers": "dhyd;rt2G;PGy4;EZbv", "pdf_size": 841769, "rating": "4;4;5;7", "confidence": "4;4;3;3", "soundness": "2;3;3;3", "novelty": "2;3;2;4", "presentation": "1;3;2;2", "wc_summary": "159;125;112;34", "wc_strengths": "17;18;57;28", "wc_weaknesses": "688;62;50;109", "wc_questions": "112;3;63;120", "wc_limitations": "7;25;20;16", "wc_review": "983;233;302;307", "wc_reply_reviewers": "651;176;0;25", "wc_reply_authors": "763;697;0;0", "reply_reviewers": "3;1;0;1", "reply_authors": "3;2;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 107.5, 45.77390086064329 ], "wc_strengths_avg": [ 30.0, 16.170961628796228 ], "wc_weaknesses_avg": [ 227.25, 266.9263709340087 ], "wc_questions_avg": [ 74.5, 46.69314724882014 ], "wc_limitations_avg": [ 17.0, 6.59545297913646 ], "wc_review_avg": [ 456.25, 305.5219918434678 ], "wc_reply_reviewers_avg": [ 213.0, 261.6897017461711 ], "wc_reply_authors_avg": [ 365.0, 365.7451298377054 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8164965809277259, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16959947408590518830&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;tencent.com;qiyuanlab.com;tsinghua.edu.cn;", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Tsinghua University;Tencent;Qiyuan Lab", "aff_unique_dep": ";TiMi Studio;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://timi.qq.com;", "aff_unique_abbr": "THU;Tencent;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "title": "No-Regret Learning with Unbounded Losses: The Case of Logarithmic Pooling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72776", "id": "639RkUOmW8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/44ecfb60950e868a13172b935b7964a9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=639RkUOmW8", "openreview": "https://openreview.net/forum?id=639RkUOmW8", "poster": "/media/PosterPDFs/NeurIPS%202023/72776.png?t=1699172483.0170417", "slides": "https://nips.cc/virtual/2023/poster/72776", "video": "https://nips.cc/virtual/2023/poster/72776", "author_site": "Eric Neyman, Tim Roughgarden", "tldr": "", "abstract": "For each of $T$ time steps, $m$ experts report probability distributions over $n$ outcomes; we wish to learn to aggregate these forecasts in a way that attains a no-regret guarantee. We focus on the fundamental and practical aggregation method known as *logarithmic pooling* -- a weighted average of log odds -- which is in a certain sense the optimal choice of pooling method if one is interested in minimizing log loss (as we take to be our loss function). We consider the problem of learning the best set of parameters (i.e. expert weights) in an online adversarial setting. We assume (by necessity) that the adversarial choices of outcomes and forecasts are consistent, in the sense that experts report calibrated forecasts. Imposing this constraint creates a (to our knowledge) novel semi-adversarial setting in which the adversary retains a large amount of flexibility. In this setting, we present an algorithm based on online mirror descent that learns expert weights in a way that attains $O(\\sqrt{T} \\log T)$ expected regret as compared with the best weights in hindsight.", "keywords": "Logarithmic pooling;online learning;no-regret learning;calibrated experts;online mirror descent;prediction with expert advice", "primary_area": "", "supplementary_material": "/attachment/2b4466ad0e9d89928a6b3a0aa6c145921af851aa.pdf", "author": "Eric Neyman;Tim Roughgarden", "authorids": "~Eric_Neyman1;~Tim_Roughgarden1", "gender": "M;", "homepage": "https://sites.google.com/view/ericneyman/;https://timroughgarden.org", "dblp": "230/7993;r/TimRoughgarden", "google_scholar": "2lhvq3wAAAAJ;0lcJYs8AAAAJ", "orcid": "0000-0002-6848-8802;", "linkedin": ";", "or_profile": "~Eric_Neyman1;~Tim_Roughgarden1", "aff": "Columbia University;a16z Crypto", "aff_domain": "columbia.edu;a16z.com", "position": "PhD student;Principal Researcher", "bibtex": "@inproceedings{\nneyman2023noregret,\ntitle={No-Regret Learning with Unbounded Losses: The Case of Logarithmic Pooling},\nauthor={Eric Neyman and Tim Roughgarden},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=639RkUOmW8}\n}", "github": "", "project": "", "reviewers": "RmFD;XNSv;n3Hc;2zyV", "pdf_size": 497234, "rating": "5;6;6;7", "confidence": "3;3;3;4", "soundness": "3;4;4;3", "novelty": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "72;194;235;135", "wc_strengths": "78;52;130;70", "wc_weaknesses": "121;59;141;140", "wc_questions": "29;115;99;127", "wc_limitations": "1;43;2;1", "wc_review": "301;463;607;473", "wc_reply_reviewers": "20;21;35;367", "wc_reply_authors": "0;0;0;206", "reply_reviewers": "1;1;1;2", "reply_authors": "1;1;1;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 159.0, 61.53454314448105 ], "wc_strengths_avg": [ 82.5, 28.99568933479596 ], "wc_weaknesses_avg": [ 115.25, 33.439310698637314 ], "wc_questions_avg": [ 92.5, 37.9835490706174 ], "wc_limitations_avg": [ 11.75, 18.046814123273947 ], "wc_review_avg": [ 461.0, 108.47119433287347 ], "wc_reply_reviewers_avg": [ 110.75, 148.0648084454912 ], "wc_reply_authors_avg": [ 51.5, 89.20061658979718 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17908093362552828378&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "columbia.edu;a16z.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Columbia University;Andreessen Horowitz", "aff_unique_dep": ";Crypto", "aff_unique_url": "https://www.columbia.edu;https://a16z.com/", "aff_unique_abbr": "Columbia;a16z", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Computational Complexity of Learning Neural Networks: Smoothness and Degeneracy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72775", "id": "65aDEXIhih", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f0552f14388d95b19740dee809f5cad1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=65aDEXIhih", "openreview": "https://openreview.net/forum?id=65aDEXIhih", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72775", "video": "https://nips.cc/virtual/2023/poster/72775", "author_site": "Amit Daniely, Nati Srebro, Gal Vardi", "tldr": "", "abstract": "Understanding when neural networks can be learned efficiently\tis a fundamental question in learning theory.\tExisting hardness results suggest that assumptions on both the input distribution and the network's weights are necessary for obtaining efficient algorithms. Moreover, it was previously shown that depth-$2$ networks can be efficiently learned under the assumptions that the input distribution is Gaussian, and the weight matrix is non-degenerate. In this work, we study whether such assumptions may suffice for learning deeper networks and prove negative results. We show that learning depth-$3$ ReLU networks under the Gaussian input distribution is hard even in the smoothed-analysis framework, where a random noise is added to the network's parameters. It implies that learning depth-$3$ ReLU networks under the Gaussian distribution is hard even if the weight matrices are non-degenerate. Moreover, we consider depth-$2$ networks, and show hardness of learning in the smoothed-analysis framework, where both the network parameters and the input distribution are smoothed. Our hardness results are under a well-studied assumption on the existence of local pseudorandom generators.", "keywords": "Learning neural networks;Computational complexity;Hardness of learning;Smoothed analysis;Degenerate weights", "primary_area": "", "supplementary_material": "/attachment/66b2bddfaad2991abc2a90ba7f1d29dd17ba917d.pdf", "author": "Amit Daniely;Nathan Srebro;Gal Vardi", "authorids": "~Amit_Daniely2;~Nathan_Srebro1;~Gal_Vardi1", "gender": "M;M;M", "homepage": "https://www.cs.huji.ac.il/~amitd/;http://ttic.uchicago.edu/~nati/;https://sites.google.com/view/galvardi/home", "dblp": "19/7805;50/3633;https://dblp.uni-trier.de/pid/167/9638.html", "google_scholar": "https://scholar.google.com.tw/citations?user=jUtYwE0AAAAJ;https://scholar.google.com.tw/citations?user=ZnT-QpMAAAAJ;https://scholar.google.co.il/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Amit_Daniely2;~Nathan_Srebro1;~Gal_Vardi1", "aff": "Google;University of Chicago;Toyota Technological Institute at Chicago", "aff_domain": "google.com;uchicago.edu;ttic.edu", "position": "Researcher;Full Professor;Postdoc", "bibtex": "@inproceedings{\ndaniely2023computational,\ntitle={Computational Complexity of Learning Neural Networks: Smoothness and Degeneracy},\nauthor={Amit Daniely and Nathan Srebro and Gal Vardi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=65aDEXIhih}\n}", "github": "", "project": "", "reviewers": "fpWr;nyVW;fAAD;Hkzt", "pdf_size": 385895, "rating": "5;5;6;7", "confidence": "3;3;2;1", "soundness": "3;4;3;4", "novelty": "2;3;3;4", "presentation": "3;4;2;4", "wc_summary": "71;114;73;110", "wc_strengths": "37;34;87;58", "wc_weaknesses": "1186;208;106;6", "wc_questions": "10;18;248;92", "wc_limitations": "1;6;4;1", "wc_review": "1305;380;518;267", "wc_reply_reviewers": "1280;18;36;15", "wc_reply_authors": "1282;0;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 92.0, 20.062402647738878 ], "wc_strengths_avg": [ 54.0, 21.17781858454737 ], "wc_weaknesses_avg": [ 376.5, 472.7903869581106 ], "wc_questions_avg": [ 92.0, 95.57196241576293 ], "wc_limitations_avg": [ 3.0, 2.1213203435596424 ], "wc_review_avg": [ 617.5, 406.75944979803484 ], "wc_reply_reviewers_avg": [ 337.25, 544.3562137975464 ], "wc_reply_authors_avg": [ 320.5, 555.1222838258252 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6043125153213774302&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "google.com;uchicago.edu;ttic.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;University of Chicago;Toyota Technological Institute at Chicago", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.uchicago.edu;https://www.tti-chicago.org", "aff_unique_abbr": "Google;UChicago;TTI Chicago", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Mountain View;;Chicago", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Competitive Algorithm for Agnostic Active Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72774", "id": "66XhNDahk6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b7385cb3fa76a0aeedb23d4163640db0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=66XhNDahk6", "openreview": "https://openreview.net/forum?id=66XhNDahk6", "poster": "/media/PosterPDFs/NeurIPS%202023/72774.png?t=1702078543.2968743", "slides": "https://nips.cc/virtual/2023/poster/72774", "video": "https://nips.cc/virtual/2023/poster/72774", "author_site": "Yihan Zhou, Eric Price", "tldr": "", "abstract": "For some hypothesis classes and input distributions, \\emph{active}\n agnostic learning needs exponentially fewer samples than passive\n learning; for other classes and distributions, it offers little to\n no improvement. The most popular algorithms for agnostic active\n learning express their performance in terms of a parameter called\n the disagreement coefficient, but it is known that these algorithms\n are inefficient on some inputs.\n\n We take a different approach to agnostic active learning, getting an\n algorithm that is \\emph{competitive} with the optimal algorithm for\n any binary hypothesis class $H$ and distribution $\\mathcal{D}_X$ over $X$.\n In particular, if any algorithm can use $m^*$ queries to get\n $O(\\eta)$ error, then our algorithm uses $O(m^* \\log H)$ queries to\n get $O(\\eta)$ error. Our algorithm lies in the vein of the\n splitting-based approach of Dasgupta [2004], which gets a similar\n result for the realizable ($\\eta = 0$) setting.\n\n We also show that it is NP-hard to do better than our algorithm's\n $O(\\log H)$ overhead in general.", "keywords": "active learning;binary classification;competitive ratio", "primary_area": "", "supplementary_material": "", "author": "Yihan Zhou;Eric Price", "authorids": "~Yihan_Zhou3;~Eric_Price1", "gender": "M;", "homepage": "https://joeyandbluewhale.github.io/;", "dblp": "199/6805;", "google_scholar": "https://scholar.google.com/citations?hl=en;", "orcid": ";", "linkedin": ";", "or_profile": "~Yihan_Zhou3;~Eric_Price1", "aff": "University of Texas at Austin;", "aff_domain": "cs.utexas.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nzhou2023a,\ntitle={A Competitive Algorithm for Agnostic Active Learning},\nauthor={Yihan Zhou and Eric Price},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=66XhNDahk6}\n}", "github": "", "project": "", "reviewers": "wAoo;orr4;YT5S;LeWc", "pdf_size": 373195, "rating": "4;7;7;7", "confidence": "3;3;3;4", "soundness": "4;3;3;4", "novelty": "2;1;3;3", "presentation": "2;2;2;4", "wc_summary": "91;24;158;72", "wc_strengths": "17;34;93;50", "wc_weaknesses": "102;158;288;50", "wc_questions": "1;31;14;27", "wc_limitations": "2;2;1;2", "wc_review": "213;249;554;201", "wc_reply_reviewers": "0;0;119;31", "wc_reply_authors": "0;0;153;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 86.25, 48.0852108241193 ], "wc_strengths_avg": [ 48.5, 28.217902119044926 ], "wc_weaknesses_avg": [ 149.5, 88.61574352224326 ], "wc_questions_avg": [ 18.25, 11.776565713313877 ], "wc_limitations_avg": [ 1.75, 0.4330127018922193 ], "wc_review_avg": [ 304.25, 145.27108280728137 ], "wc_reply_reviewers_avg": [ 37.5, 48.72627627882106 ], "wc_reply_authors_avg": [ 38.25, 66.25094338950956 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=534182109511999224&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.utexas.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Revisit the Power of Vanilla Knowledge Distillation: from Small Scale to Large Scale", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72773", "id": "67MTWzhEOn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/204f828ba287fdecf41dd002e9a07d8c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=67MTWzhEOn", "openreview": "https://openreview.net/forum?id=67MTWzhEOn", "poster": "/media/PosterPDFs/NeurIPS%202023/72773.png?t=1697251221.0205212", "slides": "https://nips.cc/virtual/2023/poster/72773", "video": "https://nips.cc/virtual/2023/poster/72773", "author_site": "Zhiwei Hao, Jianyuan Guo, Kai Han, Han Hu, Chang Xu, Yunhe Wang", "tldr": "", "abstract": "The tremendous success of large models trained on extensive datasets demonstrates that scale is a key ingredient in achieving superior results. Therefore, the reflection on the rationality of designing knowledge distillation (KD) approaches for limited-capacity architectures solely based on small-scale datasets is now deemed imperative. In this paper, we identify the small data pitfall that presents in previous KD methods, which results in the underestimation of the power of vanilla KD framework on large-scale datasets such as ImageNet-1K. Specifically, we show that employing stronger data augmentation techniques and using larger datasets can directly decrease the gap between vanilla KD and other meticulously designed KD variants. This highlights the necessity of designing and evaluating KD approaches in the context of practical scenarios, casting off the limitations of small-scale datasets. Our investigation of the vanilla KD and its variants in more complex schemes, including stronger training strategies and different model capacities, demonstrates that vanilla KD is elegantly simple but astonishingly effective in large-scale scenarios. Without bells and whistles, we obtain state-of-the-art ResNet-50, ViT-S, and ConvNeXtV2-T models for ImageNet, which achieve 83.1%, 84.3%, and 85.0% top-1 accuracy, respectively. PyTorch code and checkpoints can be found at https://github.com/Hao840/vanillaKD.", "keywords": "knowledge distillation;small-data pitfall;vanilla kd", "primary_area": "", "supplementary_material": "/attachment/644a2aa1c250a5204ccbe85eb59079fa223e7895.pdf", "author": "Zhiwei Hao;Jianyuan Guo;Kai Han;Han Hu;Chang Xu;Yunhe Wang", "authorids": "~Zhiwei_Hao1;~Jianyuan_Guo1;~Kai_Han2;~Han_Hu6;~Chang_Xu4;~Yunhe_Wang1", "gender": ";M;M;;;M", "homepage": ";https://ggjy.github.io/;https://iamhankai.github.io;;;https://www.wangyunhe.site/", "dblp": "125/5604;190/0258;51/4757-2;;;63/8217-1", "google_scholar": "MwDSTNAAAAAJ;https://scholar.google.com/citations?hl=en;vThoBVcAAAAJ;;;https://scholar.google.com.sg/citations?user=isizOkYAAAAJ", "orcid": ";;0000-0002-9761-2702;;;0000-0002-0142-509X", "linkedin": ";;;;;", "or_profile": "~Zhiwei_Hao1;~Jianyuan_Guo1;~Kai_Han2;~Han_Hu6;~Chang_Xu4;~Yunhe_Wang1", "aff": "Beijing Institute of Technology;University of Sydney;Institute of Software, Chinese Academy of Sciences;;;Huawei Noah's Ark Lab", "aff_domain": "bit.edu.cn;usyd.edu.au;ios.ac.cn;;;huawei.com", "position": "PhD student;PhD student;PhD student;;;Principal Researcher", "bibtex": "@inproceedings{\nhao2023revisit,\ntitle={Revisit the Power of Vanilla Knowledge Distillation: from Small Scale to Large Scale},\nauthor={Zhiwei Hao and Jianyuan Guo and Kai Han and Han Hu and Chang Xu and Yunhe Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=67MTWzhEOn}\n}", "github": "", "project": "", "reviewers": "aER7;bACv;oNYG;mQmQ", "pdf_size": 1650253, "rating": "4;6;6;8", "confidence": "5;5;3;5", "soundness": "2;3;2;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "44;45;51;119", "wc_strengths": "49;67;72;107", "wc_weaknesses": "228;60;81;270", "wc_questions": "3;91;45;5", "wc_limitations": "1;1;6;1", "wc_review": "325;264;255;502", "wc_reply_reviewers": "95;87;19;29", "wc_reply_authors": "472;1133;38;34", "reply_reviewers": "2;2;1;1", "reply_authors": "5;6;2;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 64.75, 31.43544973433655 ], "wc_strengths_avg": [ 73.75, 21.016362672927016 ], "wc_weaknesses_avg": [ 159.75, 90.78098644540056 ], "wc_questions_avg": [ 36.0, 35.90264614203248 ], "wc_limitations_avg": [ 2.25, 2.165063509461097 ], "wc_review_avg": [ 336.5, 99.2736118009212 ], "wc_reply_reviewers_avg": [ 57.5, 33.80458548777074 ], "wc_reply_authors_avg": [ 419.25, 448.8849379295322 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.75, 1.7853571071357126 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5225390204437106252&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "bit.edu.cn;usyd.edu.au;ios.ac.cn;;;huawei.com", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Beijing Institute of Technology;University of Sydney;Chinese Academy of Sciences;Huawei", "aff_unique_dep": ";;Institute of Software;Noah's Ark Lab", "aff_unique_url": "http://www.bit.edu.cn/;https://www.sydney.edu.au;http://www.ios.ac.cn;https://www.huawei.com", "aff_unique_abbr": "BIT;USYD;CAS;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;Australia" }, { "title": "Counterfactual Memorization in Neural Language Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72772", "id": "67o9UQgTD0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7bc4f74e35bcfe8cfe43b0a860786d6a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=67o9UQgTD0", "openreview": "https://openreview.net/forum?id=67o9UQgTD0", "poster": "/media/PosterPDFs/NeurIPS%202023/72772.png?t=1701468528.1026165", "slides": "https://nips.cc/virtual/2023/poster/72772", "video": "https://nips.cc/virtual/2023/poster/72772", "author_site": "Chiyuan Zhang, Daphne Ippolito, Katherine Lee, Matthew Jagielski, Florian Tramer, Nicholas Carlini", "tldr": "", "abstract": "Modern neural language models that are widely used in various NLP tasks risk memorizing sensitive information from their training data.\nUnderstanding this memorization is important in real world applications and also from a learning-theoretical perspective. An open question in previous studies of language model memorization is how to filter out ``common'' memorization. In fact, most memorization criteria strongly correlate with the number of occurrences in the training set, capturing memorized familiar phrases, public knowledge, templated texts, or other repeated data.\nWe formulate a notion of counterfactual memorization which characterizes how a model's predictions change if a particular document is omitted during training.\nWe identify and study counterfactually-memorized training examples in standard text datasets.\nWe estimate the influence of each memorized training example on the validation set and on generated texts, showing how this can provide direct evidence of the source of memorization at test time.", "keywords": "Memorization;Language Models", "primary_area": "", "supplementary_material": "/attachment/966034dfed18d9aab4923821093171b9f7450fa3.pdf", "author": "Chiyuan Zhang;Daphne Ippolito;Katherine Lee;Matthew Jagielski;Florian Tram\u00e8r;Nicholas Carlini", "authorids": "~Chiyuan_Zhang1;~Daphne_Ippolito1;~Katherine_Lee1;~Matthew_Jagielski1;~Florian_Tram\u00e8r1;~Nicholas_Carlini1", "gender": "M;F;F;M;;M", "homepage": "http://pluskid.org;http://www.daphnei.com;https://katelee168.github.io/;https://jagielski.github.io/;http://nicholas.carlini.com;http://floriantramer.com", "dblp": "21/8315;192/2031.html;115/5082.html;218/5156;145/1806;158/7224", "google_scholar": "l_G2vr0AAAAJ;;bjdB4K8AAAAJ;_8rw_GMAAAAJ;;https://scholar.google.ch/citations?user=ijH0-a8AAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Chiyuan_Zhang1;~Daphne_Ippolito1;~Katherine_Lee1;~Matthew_Jagielski1;~Nicholas_Carlini1;~Florian_Tramer1", "aff": "Google;Carnegie Mellon University;Cornell University;Google;Google;ETHZ - ETH Zurich", "aff_domain": "google.com;cmu.edu;cornell.edu;google.com;google.com;ethz.ch", "position": "Research Scientist;Assistant Professor;PhD student;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023counterfactual,\ntitle={Counterfactual Memorization in Neural Language Models},\nauthor={Chiyuan Zhang and Daphne Ippolito and Katherine Lee and Matthew Jagielski and Florian Tram{\\`e}r and Nicholas Carlini},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=67o9UQgTD0}\n}", "github": "", "project": "", "reviewers": "dGf9;mGti;AGTS;PFxH;PCjJ;9fjM;DH4T", "pdf_size": 8582648, "rating": "6;6;6;6;7;8;8", "confidence": "3;4;5;4;3;4;3", "soundness": "3;4;3;2;3;3;4", "novelty": "3;3;3;2;3;4;3", "presentation": "2;4;3;2;3;2;4", "wc_summary": "85;201;101;50;104;27;134", "wc_strengths": "37;89;65;33;39;114;119", "wc_weaknesses": "317;111;237;175;72;196;45", "wc_questions": "155;5;32;59;55;111;36", "wc_limitations": "11;11;60;15;7;107;1", "wc_review": "605;417;495;332;277;555;335", "wc_reply_reviewers": "26;4;14;709;36;15;4", "wc_reply_authors": "0;0;0;939;0;0;0", "reply_reviewers": "1;1;1;3;1;1;1", "reply_authors": "1;1;1;4;1;1;1", "rating_avg": [ 6.714285714285714, 0.880630571852711 ], "confidence_avg": [ 3.7142857142857144, 0.6998542122237652 ], "soundness_avg": [ 3.142857142857143, 0.6388765649999398 ], "novelty_avg": [ 3.0, 0.5345224838248488 ], "presentation_avg": [ 2.857142857142857, 0.8329931278350429 ], "wc_summary_avg": [ 100.28571428571429, 52.681561658473164 ], "wc_strengths_avg": [ 70.85714285714286, 34.09410745664574 ], "wc_weaknesses_avg": [ 164.71428571428572, 88.89273517755508 ], "wc_questions_avg": [ 64.71428571428571, 47.63166499508579 ], "wc_limitations_avg": [ 30.285714285714285, 36.145397990869704 ], "wc_review_avg": [ 430.85714285714283, 115.06324791352 ], "wc_reply_reviewers_avg": [ 115.42857142857143, 242.55771457110816 ], "wc_reply_authors_avg": [ 134.14285714285714, 328.58155263905775 ], "reply_reviewers_avg": [ 1.2857142857142858, 0.6998542122237652 ], "reply_authors_avg": [ 1.4285714285714286, 1.0497813183356477 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.36424639819288707, "gs_citation": 172, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11355254777898331315&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "google.com;cmu.edu;cornell.edu;google.com;google.com;ethz.ch", "author_num": 6, "aff_unique_index": "0;1;2;0;0;3", "aff_unique_norm": "Google;Carnegie Mellon University;Cornell University;ETH Zurich", "aff_unique_dep": "Google;;;", "aff_unique_url": "https://www.google.com;https://www.cmu.edu;https://www.cornell.edu;https://www.ethz.ch", "aff_unique_abbr": "Google;CMU;Cornell;ETHZ", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;Switzerland" }, { "title": "Logarithmic-Regret Quantum Learning Algorithms for Zero-Sum Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72771", "id": "69dAz94zPv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/637df18481a6aa74238bd2cafff94cb9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=69dAz94zPv", "openreview": "https://openreview.net/forum?id=69dAz94zPv", "poster": "/media/PosterPDFs/NeurIPS%202023/72771.png?t=1701407594.9431574", "slides": "https://nips.cc/virtual/2023/poster/72771", "video": "https://nips.cc/virtual/2023/poster/72771", "author_site": "Minbo Gao, Zhengfeng Ji, Tongyang Li, Qisheng Wang", "tldr": "", "abstract": "We propose the first online quantum algorithm for zero-sum games with $\\widetilde O(1)$ regret under the game setting. Moreover, our quantum algorithm computes an $\\varepsilon$-approximate Nash equilibrium of an $m \\times n$ matrix zero-sum game in quantum time $\\widetilde O(\\sqrt{m+n}/\\varepsilon^{2.5})$. Our algorithm uses standard quantum inputs and generates classical outputs with succinct descriptions, facilitating end-to-end applications. Technically, our online quantum algorithm \"quantizes\" classical algorithms based on the optimistic multiplicative weight update method. At the heart of our algorithm is a fast quantum multi-sampling procedure for the Gibbs sampling problem, which may be of independent interest.", "keywords": "Online learning;quantum computing;zero-sum games;optimistic multiplicative weight update", "primary_area": "", "supplementary_material": "/attachment/38362c1ff2b832e53adbc068ea73f584960fc9fb.pdf", "author": "Minbo Gao;Zhengfeng Ji;Tongyang Li;Qisheng Wang", "authorids": "~Minbo_Gao1;~Zhengfeng_Ji1;~Tongyang_Li1;~Qisheng_Wang1", "gender": ";Not Specified;M;", "homepage": ";;https://www.tongyangli.com/;https://wangqs13.github.io/", "dblp": ";30/2575.html;142/1312;", "google_scholar": ";https://scholar.google.com.au/citations?user=2uXdu7AAAAAJ;ny0ZgiQAAAAJ;", "orcid": ";;0000-0002-0338-413X;0000-0001-5107-8279", "linkedin": ";;;", "or_profile": "~Minbo_Gao1;~Zhengfeng_Ji1;~Tongyang_Li1;~Qisheng_Wang1", "aff": ";Tsinghua University;Peking University;Nagoya University", "aff_domain": ";tsinghua.edu.cn;pku.edu.cn;nagoya-u.ac.jp", "position": ";Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ngao2023logarithmicregret,\ntitle={Logarithmic-Regret Quantum Learning Algorithms for Zero-Sum Games},\nauthor={Minbo Gao and Zhengfeng Ji and Tongyang Li and Qisheng Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=69dAz94zPv}\n}", "github": "", "project": "", "reviewers": "XSuW;UumW;42Uz;8qKg", "pdf_size": 358732, "rating": "6;6;6;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;3;4", "wc_summary": "89;81;148;77", "wc_strengths": "54;140;52;26", "wc_weaknesses": "100;245;146;33", "wc_questions": "6;556;34;33", "wc_limitations": "1;29;1;26", "wc_review": "250;1051;381;195", "wc_reply_reviewers": "14;34;24;48", "wc_reply_authors": "0;167;18;22", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 98.75, 28.760867511255636 ], "wc_strengths_avg": [ 68.0, 43.01162633521314 ], "wc_weaknesses_avg": [ 131.0, 77.11355263505891 ], "wc_questions_avg": [ 157.25, 230.49227210472804 ], "wc_limitations_avg": [ 14.25, 13.292385038058445 ], "wc_review_avg": [ 469.25, 342.60208332699904 ], "wc_reply_reviewers_avg": [ 30.0, 12.569805089976535 ], "wc_reply_authors_avg": [ 51.75, 67.05361660641431 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5517684952657501200&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": ";tsinghua.edu.cn;pku.edu.cn;nagoya-u.ac.jp", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Tsinghua University;Peking University;Nagoya University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.pku.edu.cn;https://www.nagoya-u.ac.jp", "aff_unique_abbr": "THU;Peking U;Nagoya U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;Japan" }, { "title": "Causal Fairness for Outcome Control", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72770", "id": "6AAbWSF6Qg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/948552777302d3abf92415b1d7e9de70-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6AAbWSF6Qg", "openreview": "https://openreview.net/forum?id=6AAbWSF6Qg", "poster": "/media/PosterPDFs/NeurIPS%202023/72770.png?t=1702485980.0973198", "slides": "https://nips.cc/virtual/2023/poster/72770", "video": "https://nips.cc/virtual/2023/poster/72770", "author_site": "Drago Plecko, Elias Bareinboim", "tldr": "", "abstract": "As society transitions towards an AI-based decision-making infrastructure, an ever-increasing number of decisions once under control of humans are now delegated to automated systems. Even though such developments make various parts of society more efficient, a large body of evidence suggests that a great deal of care needs to be taken to make such automated decision-making systems fair and equitable, namely, taking into account sensitive attributes such as gender, race, and religion. In this paper, we study a specific decision-making task called outcome control in which an automated system aims to optimize an outcome variable $Y$ while being fair and equitable. The interest in such a setting ranges from interventions related to criminal justice and welfare, all the way to clinical decision-making and public health. In this paper, we first analyze through causal lenses the notion of benefit, which captures how much a specific individual would benefit from a positive decision, counterfactually speaking, when contrasted with an alternative, negative one. We introduce the notion of benefit fairness, which can be seen as the minimal fairness requirement in decision-making, and develop an algorithm for satisfying it. We then note that the benefit itself may be influenced by the protected attribute, and propose causal tools which can be used to analyze this. Finally, if some of the variations of the protected attribute in the benefit are considered as discriminatory, the notion of benefit fairness may need to be strengthened, which leads us to articulating a notion of causal benefit fairness. Using this notion, we develop a new optimization procedure capable of maximizing $Y$ while ascertaining causal fairness in the decision process.", "keywords": "Fair Machine Learning;Causal Inference;Decision-Making", "primary_area": "", "supplementary_material": "/attachment/22b5538a9597d9c9cadf024e8c74534a9157a315.zip", "author": "Drago Plecko;Elias Bareinboim", "authorids": "~Drago_Plecko1;~Elias_Bareinboim2", "gender": "M;M", "homepage": "https://people.math.ethz.ch/~pleckod/;https://causalai.net", "dblp": "254/3058;85/9005", "google_scholar": ";r5U-D7YAAAAJ", "orcid": "0000-0002-5433-196X;", "linkedin": ";", "or_profile": "~Drago_Plecko1;~Elias_Bareinboim2", "aff": "Columbia University;Columbia University", "aff_domain": "cs.columbia.edu;columbia.edu", "position": "Postdoc;Associate Professor", "bibtex": "@inproceedings{\nplecko2023causal,\ntitle={Causal Fairness for Outcome Control},\nauthor={Drago Plecko and Elias Bareinboim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6AAbWSF6Qg}\n}", "github": "", "project": "", "reviewers": "pGG2;BFhw;H5h5;9Jxk", "pdf_size": 1628531, "rating": "6;7;7;7", "confidence": "4;4;4;3", "soundness": "3;4;3;3", "novelty": "2;4;3;3", "presentation": "2;3;3;3", "wc_summary": "177;49;73;76", "wc_strengths": "166;77;54;43", "wc_weaknesses": "635;77;166;34", "wc_questions": "18;96;124;140", "wc_limitations": "22;38;1;15", "wc_review": "1018;337;418;308", "wc_reply_reviewers": "42;87;72;6", "wc_reply_authors": "43;42;117;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 93.75, 49.190319169527655 ], "wc_strengths_avg": [ 85.0, 48.34769901453429 ], "wc_weaknesses_avg": [ 228.0, 239.75508336633865 ], "wc_questions_avg": [ 94.5, 46.89083066016212 ], "wc_limitations_avg": [ 19.0, 13.322912594474229 ], "wc_review_avg": [ 520.25, 290.18991626174744 ], "wc_reply_reviewers_avg": [ 51.75, 30.986892390170397 ], "wc_reply_authors_avg": [ 50.5, 42.13371571556442 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12873956028604995200&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cs.columbia.edu;columbia.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "In-Context Learning Unlocked for Diffusion Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72769", "id": "6BZS2EAkns", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1b3750390ca8b931fb9ca988647940cb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6BZS2EAkns", "openreview": "https://openreview.net/forum?id=6BZS2EAkns", "poster": "/media/PosterPDFs/NeurIPS%202023/72769.png?t=1701900733.7568343", "slides": "https://nips.cc/virtual/2023/poster/72769", "video": "https://nips.cc/virtual/2023/poster/72769", "author_site": "Zhendong Wang, Yifan Jiang, Yadong Lu, yelong shen, Pengcheng He, Weizhu Chen, Zhangyang \"Atlas\" Wang, Mingyuan Zhou", "tldr": "", "abstract": "We present Prompt Diffusion, a framework for enabling in-context learning in diffusion-based generative models. Given a pair of task-specific example images, such as depth from/to image and scribble from/to image, and a text guidance, our model automatically understands the underlying task and performs the same task on a new query image following the text guidance. To achieve this, we propose a vision-language prompt that can model a wide range of vision-language tasks and a diffusion model that takes it as input. The diffusion model is trained jointly on six different tasks using these prompts. The resulting Prompt Diffusion model becomes the first diffusion-based vision-language foundation model capable of in-context learning. It demonstrates high-quality in-context generation for the trained tasks and effectively generalizes to new, unseen vision tasks using their respective prompts. Our model also shows compelling text-guided image editing results. Our framework aims to facilitate research into in-context learning for computer vision. We share our code and pre-trained models at https://github.com/Zhendong-Wang/Prompt-Diffusion.", "keywords": "diffusion models;in-context learning", "primary_area": "", "supplementary_material": "/attachment/76b23d4a40035d51fe5a19db0bd4774df3abd2f7.zip", "author": "Zhendong Wang;Yifan Jiang;Yadong Lu;yelong shen;Pengcheng He;Weizhu Chen;Zhangyang Wang;Mingyuan Zhou", "authorids": "~Zhendong_Wang1;~Yifan_Jiang2;~Yadong_Lu1;~yelong_shen1;~Pengcheng_He2;~Weizhu_Chen1;~Zhangyang_Wang1;~Mingyuan_Zhou1", "gender": "M;M;M;;M;M;M;M", "homepage": "https://zhendong-wang.github.io/;https://yifanjiang19.github.io/;https://adamlu123.github.io/;;;https://www.microsoft.com/en-us/research/people/wzchen/;https://vita-group.github.io;http://mingyuanzhou.github.io", "dblp": ";81/7246-1;64/7873;;116/8665;79/2536;119/4026;", "google_scholar": "lRiIjhcAAAAJ;PMeFEOIAAAAJ;Y69ahdAAAAAJ;;https://scholar.google.com/citations?hl=en;LG_E-4EAAAAJ;pxFyKAIAAAAJ;LXwCIisAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Zhendong_Wang1;~Yifan_Jiang2;~Yadong_Lu1;~yelong_shen1;~Pengcheng_He2;~Weizhu_Chen1;~Zhangyang_Wang1;~Mingyuan_Zhou1", "aff": "University of Texas at Austin;University of Texas, Austin;Microsoft;;Microsoft;Microsoft GenAI;University of Texas, Austin;Google", "aff_domain": "utexas.edu;utexas.edu;microsoft.com;;microsoft.com;microsoft.com;utexas.edu;google.com", "position": "PhD student;PhD student;Researcher;;Principal Researcher;Vice President;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nwang2023incontext,\ntitle={In-Context Learning Unlocked for Diffusion Models},\nauthor={Zhendong Wang and Yifan Jiang and Yadong Lu and yelong shen and Pengcheng He and Weizhu Chen and Zhangyang Wang and Mingyuan Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6BZS2EAkns}\n}", "github": "", "project": "", "reviewers": "Gb8x;vt4T;foSi;7sFz", "pdf_size": 14444081, "rating": "5;7;7;8", "confidence": "4;4;3;5", "soundness": "2;3;3;3", "novelty": "3;4;3;3", "presentation": "3;4;3;4", "wc_summary": "65;61;99;65", "wc_strengths": "55;185;34;142", "wc_weaknesses": "136;159;102;148", "wc_questions": "211;2;1;5", "wc_limitations": "52;9;53;1", "wc_review": "519;416;289;361", "wc_reply_reviewers": "313;0;17;22", "wc_reply_authors": "1528;0;31;15", "reply_reviewers": "2;0;1;1", "reply_authors": "6;1;2;2", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 72.5, 15.386682553429118 ], "wc_strengths_avg": [ 104.0, 61.85870997684966 ], "wc_weaknesses_avg": [ 136.25, 21.3819433167334 ], "wc_questions_avg": [ 54.75, 90.22298764727313 ], "wc_limitations_avg": [ 28.75, 23.920441049445557 ], "wc_review_avg": [ 396.25, 83.96837202185118 ], "wc_reply_reviewers_avg": [ 88.0, 130.15951751600804 ], "wc_reply_authors_avg": [ 393.5, 655.0956037098707 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 1.920286436967152 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3244428422615251, "gs_citation": 68, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9458761225543586202&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "utexas.edu;utexas.edu;microsoft.com;;microsoft.com;microsoft.com;utexas.edu;google.com", "author_num": 8, "aff_unique_index": "0;0;1;1;1;0;2", "aff_unique_norm": "University of Texas at Austin;Microsoft;Google", "aff_unique_dep": ";Microsoft Corporation;Google", "aff_unique_url": "https://www.utexas.edu;https://www.microsoft.com;https://www.google.com", "aff_unique_abbr": "UT Austin;Microsoft;Google", "aff_campus_unique_index": "0;0;0;2", "aff_campus_unique": "Austin;;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DDF-HO: Hand-Held Object Reconstruction via Conditional Directed Distance Field", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72768", "id": "6EDHfVHicP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b2876deb92cbd098219a10da25671577-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6EDHfVHicP", "openreview": "https://openreview.net/forum?id=6EDHfVHicP", "poster": "/media/PosterPDFs/NeurIPS%202023/72768.png?t=1699883239.2286425", "slides": "https://nips.cc/virtual/2023/poster/72768", "video": "https://nips.cc/virtual/2023/poster/72768", "author_site": "Chenyangguang Zhang, Yan Di, Ruida Zhang, Guangyao Zhai, Fabian Manhardt, Federico Tombari, Xiangyang Ji", "tldr": "", "abstract": "Reconstructing hand-held objects from a single RGB image is an important and challenging problem. Existing works utilizing Signed Distance Fields (SDF) reveal limitations in comprehensively capturing the complex hand-object interactions, since SDF is only reliable within the proximity of the target, and hence, infeasible to simultaneously encode local hand and object cues. To address this issue, we propose DDF-HO, a novel approach leveraging Directed Distance Field (DDF) as the shape representation. Unlike SDF, DDF maps a ray in 3D space, consisting of an origin and a direction, to corresponding DDF values, including a binary visibility signal determining whether the ray intersects the objects and a distance value measuring the distance from origin to target in the given direction. We randomly sample multiple rays and collect local to global geometric features for them by introducing a novel 2D ray-based feature aggregation scheme and a 3D intersection-aware hand pose embedding, combining 2D-3D features to model hand-object interactions. Extensive experiments on synthetic and real-world datasets demonstrate that DDF-HO consistently outperforms all baseline methods by a large margin, especially under Chamfer Distance, with about 80% leap forward. Codes are available at https://github.com/ZhangCYG/DDFHO.", "keywords": "hand-held object reconstruction;directed distance field;human-object interaction", "primary_area": "", "supplementary_material": "/attachment/149e6612f562affb3b2703d5013f4b6ac5990da7.pdf", "author": "Chenyangguang Zhang;Yan Di;Ruida Zhang;Guangyao Zhai;Fabian Manhardt;Federico Tombari;Xiangyang Ji", "authorids": "~Chenyangguang_Zhang1;~Yan_Di2;~Ruida_Zhang1;~Guangyao_Zhai1;~Fabian_Manhardt1;~Federico_Tombari1;~Xiangyang_Ji1", "gender": "M;M;M;M;M;M;", "homepage": "https://zhangcyg.github.io/;;https://lolrudy.github.io/;https://ymxlzgy.com/;http://campar.in.tum.de/Main/FabianManhardt;https://federicotombari.github.io/;", "dblp": "336/2805;274/9623;317/0062;243/2753;173/9271;16/3539;", "google_scholar": "https://scholar.google.com.sg/citations?user=-sCslRcAAAAJ;HSlGGvwAAAAJ;J4u6VicAAAAJ;X_djKCUAAAAJ;https://scholar.google.de/citations?user=bERItx8AAAAJ;TFsE4BIAAAAJ;", "orcid": ";0000-0003-0671-8323;;0000-0002-6702-8302;0000-0002-4577-4590;0000-0001-5598-5212;", "linkedin": "chenyangguangzhang/;;;guangyao-zhai-1525a6b7/;;fedet/;", "or_profile": "~Chenyangguang_Zhang1;~Yan_Di2;~Ruida_Zhang1;~Guangyao_Zhai1;~Fabian_Manhardt1;~Federico_Tombari1;~Xiangyang_Ji1", "aff": "Tsinghua University;Technische Universit\u00e4t M\u00fcnchen;Tsinghua University;Technische Universit\u00e4t M\u00fcnchen;Google;Technical University Munich (TUM);", "aff_domain": "tsinghua.edu.cn;tum.de;mails.tsinghua.edu.cn;tum.de;google.com;in.tum.de;", "position": "MS student;PhD student;PhD student;PhD student;Researcher;Lecturer;", "bibtex": "@inproceedings{\nzhang2023ddfho,\ntitle={{DDF}-{HO}: Hand-Held Object Reconstruction via Conditional Directed Distance Field},\nauthor={Chenyangguang Zhang and Yan Di and Ruida Zhang and Guangyao Zhai and Fabian Manhardt and Federico Tombari and Xiangyang Ji},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6EDHfVHicP}\n}", "github": "", "project": "", "reviewers": "9guR;1CC4;7sso;hGDA;dmTA;e2Ui", "pdf_size": 997072, "rating": "3;4;4;6;6;8", "confidence": "3;4;3;3;5;2", "soundness": "2;3;3;3;3;3", "novelty": "2;3;3;3;3;3", "presentation": "1;2;3;3;2;3", "wc_summary": "114;73;44;119;57;61", "wc_strengths": "35;48;56;194;130;56", "wc_weaknesses": "219;147;92;242;156;30", "wc_questions": "16;160;2;26;8;10", "wc_limitations": "1;19;8;11;47;44", "wc_review": "385;447;202;592;398;201", "wc_reply_reviewers": "0;0;0;166;140;61", "wc_reply_authors": "0;0;0;308;674;0", "reply_reviewers": "0;0;0;1;1;1", "reply_authors": "1;1;1;2;2;1", "rating_avg": [ 5.166666666666667, 1.6749792701868151 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 2.3333333333333335, 0.7453559924999298 ], "wc_summary_avg": [ 78.0, 28.54236617147686 ], "wc_strengths_avg": [ 86.5, 56.92612171343955 ], "wc_weaknesses_avg": [ 147.66666666666666, 71.88107462252417 ], "wc_questions_avg": [ 37.0, 55.50675634551167 ], "wc_limitations_avg": [ 21.666666666666668, 17.679240808234827 ], "wc_review_avg": [ 370.8333333333333, 137.23631281681813 ], "wc_reply_reviewers_avg": [ 61.166666666666664, 68.83414043110358 ], "wc_reply_authors_avg": [ 163.66666666666666, 254.43379405172487 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.24625914065560206, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3642661841186764911&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;tum.de;mails.tsinghua.edu.cn;tum.de;google.com;in.tum.de;", "author_num": 7, "aff_unique_index": "0;1;0;1;2;3", "aff_unique_norm": "Tsinghua University;Technische Universit\u00e4t M\u00fcnchen;Google;Technical University Munich", "aff_unique_dep": ";;Google;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.tum.de;https://www.google.com;https://www.tum.de", "aff_unique_abbr": "THU;TUM;Google;TUM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;1;2;1", "aff_country_unique": "China;Germany;United States" }, { "title": "LinkerNet: Fragment Poses and Linker Co-Design with 3D Equivariant Diffusion", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72767", "id": "6EaLIw3W7c", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f4821075019a058700f6e6738eea1365-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6EaLIw3W7c", "openreview": "https://openreview.net/forum?id=6EaLIw3W7c", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72767", "video": "https://nips.cc/virtual/2023/poster/72767", "author_site": "Jiaqi Guan, Xingang Peng, PeiQi Jiang, Yunan Luo, Jian Peng, Jianzhu Ma", "tldr": "", "abstract": "Targeted protein degradation techniques, such as PROteolysis TArgeting Chimeras (PROTACs), have emerged as powerful tools for selectively removing disease-causing proteins. One challenging problem in this field is designing a linker to connect different molecular fragments to form a stable drug-candidate molecule. Existing models for linker design assume that the relative positions of the fragments are known, which may not be the case in real scenarios. In this work, we address a more general problem where the poses of the fragments are *unknown* in 3D space. We develop a 3D equivariant diffusion model that jointly learns the generative process of both fragment poses and the 3D structure of the linker. By viewing fragments as rigid bodies, we design a fragment pose prediction module inspired by the Newton-Euler equations in rigid body mechanics. Empirical studies on ZINC and PROTAC-DB datasets demonstrate that our model can generate chemically valid, synthetically-accessible, and low-energy molecules under both unconstrained and constrained generation settings.", "keywords": "Linker design;generative models", "primary_area": "", "supplementary_material": "", "author": "Jiaqi Guan;Xingang Peng;PeiQi Jiang;Yunan Luo;Jian Peng;Jianzhu Ma", "authorids": "~Jiaqi_Guan1;~Xingang_Peng1;~PeiQi_Jiang2;~Yunan_Luo1;~Jian_Peng1;~Jianzhu_Ma2", "gender": "M;;M;;M;M", "homepage": "http://jiaqi.web.illinois.edu/;https://github.com/pengxingang;https://github.com/jpq20;https://faculty.cc.gatech.edu/~yunan/;http://jianpeng.web.engr.illinois.edu/;https://majianzhu.com/", "dblp": "207/7593;223/2200;;225/8950;29/4181-1;24/9080.html", "google_scholar": "On-ONT4AAAAJ;6yMuAlgAAAAJ;;N8RBFoAAAAAJ;https://scholar.google.com.tw/citations?user=4wcAVXAAAAAJ;", "orcid": ";;;0000-0001-7728-6412;;", "linkedin": ";;;;;", "or_profile": "~Jiaqi_Guan1;~Xingang_Peng1;~PeiQi_Jiang2;~Yunan_Luo1;~Jian_Peng1;~Jianzhu_Ma2", "aff": "University of Illinois, Urbana Champaign;Peking University;Tsinghua University;Georgia Institute of Technology;University of Illinois, Urbana Champaign;Tsinghua University", "aff_domain": "illinois.edu;pku.edu.cn;mail.tsinghua.edu.cn;gatech.edu;illinois.edu;tsinghua.edu.cn", "position": "PhD student;PhD student;Undergrad student;Assistant Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nguan2023linkernet,\ntitle={LinkerNet: Fragment Poses and Linker Co-Design with 3D Equivariant Diffusion},\nauthor={Jiaqi Guan and Xingang Peng and PeiQi Jiang and Yunan Luo and Jian Peng and Jianzhu Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6EaLIw3W7c}\n}", "github": "", "project": "", "reviewers": "ZLo5;4AKh;aUNA;kzw2", "pdf_size": 4084931, "rating": "6;6;6;7", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;2", "wc_summary": "40;272;49;40", "wc_strengths": "75;246;34;31", "wc_weaknesses": "24;26;53;65", "wc_questions": "4;43;395;57", "wc_limitations": "7;1;8;18", "wc_review": "150;588;539;211", "wc_reply_reviewers": "16;18;15;27", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 100.25, 99.22795724996055 ], "wc_strengths_avg": [ 96.5, 88.0468625221819 ], "wc_weaknesses_avg": [ 42.0, 17.53567791675018 ], "wc_questions_avg": [ 124.75, 157.23290845112547 ], "wc_limitations_avg": [ 8.5, 6.103277807866851 ], "wc_review_avg": [ 372.0, 193.4877257088935 ], "wc_reply_reviewers_avg": [ 19.0, 4.743416490252569 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13102232806215668271&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "illinois.edu;pku.edu.cn;mail.tsinghua.edu.cn;gatech.edu;illinois.edu;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;0;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;Peking University;Tsinghua University;Georgia Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://illinois.edu;http://www.pku.edu.cn;https://www.tsinghua.edu.cn;https://www.gatech.edu", "aff_unique_abbr": "UIUC;Peking U;THU;Georgia Tech", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;1;1;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Revisiting Scalarization in Multi-Task Learning: A Theoretical Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72766", "id": "6EqUpqMnwl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/97c8a8eb0e5231d107d0da51b79e09cb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6EqUpqMnwl", "openreview": "https://openreview.net/forum?id=6EqUpqMnwl", "poster": "/media/PosterPDFs/NeurIPS%202023/72766.png?t=1701023295.3270516", "slides": "https://nips.cc/virtual/2023/poster/72766", "video": "https://nips.cc/virtual/2023/poster/72766", "author_site": "Yuzheng Hu, Ruicheng Xian, Qilong Wu, Qiuling Fan, Lang Yin, Han Zhao", "tldr": "", "abstract": "Linear scalarization, i.e., combining all loss functions by a weighted sum, has been the default choice in the literature of multi-task learning (MTL) since its inception. In recent years, there is a surge of interest in developing Specialized Multi-Task Optimizers (SMTOs) that treat MTL as a multi-objective optimization problem. However, it remains open whether there is a fundamental advantage of SMTOs over scalarization. In fact, heated debates exist in the community comparing these two types of algorithms, mostly from an empirical perspective. To approach the above question, in this paper, we revisit scalarization from a theoretical perspective. We focus on linear MTL models and study whether scalarization is capable of fully exploring the Pareto front. Our findings reveal that, in contrast to recent works that claimed empirical advantages of scalarization, scalarization is inherently incapable of full exploration, especially for those Pareto optimal solutions that strike the balanced trade-offs between multiple tasks. More concretely, when the model is under-parametrized, we reveal a multi-surface structure of the feasible region and identify necessary and sufficient conditions for full exploration. This leads to the conclusion that scalarization is in general incapable of tracing out the Pareto front. Our theoretical results partially answer the open questions in Xin et al. (2021), and provide a more intuitive explanation on why scalarization fails beyond non-convexity. We additionally perform experiments on a real-world dataset using both scalarization and state-of-the-art SMTOs. The experimental results not only corroborate our theoretical findings, but also unveil the potential of SMTOs in finding balanced solutions, which cannot be achieved by scalarization.", "keywords": "multi-task learning;scalarization;Pareto front", "primary_area": "", "supplementary_material": "/attachment/edccc102e963c45bad11af81b6a0e856eac35e7d.pdf", "author": "Yuzheng Hu;Ruicheng Xian;Qilong Wu;Qiuling Fan;Lang Yin;Han Zhao", "authorids": "~Yuzheng_Hu1;~Ruicheng_Xian1;~Qilong_Wu1;qiuling2@illinois.edu;~Lang_Yin1;~Han_Zhao1", "gender": "M;M;M;;M;M", "homepage": "https://mirnegg.github.io;https://rxian.github.io;https://www.qilongwu.com/;;;https://hanzhaoml.github.io/", "dblp": "231/2255.html;243/3086.html;233/1786-2;;324/7991.html;03/3520-2", "google_scholar": "cVVimVcAAAAJ;Nmk26z4AAAAJ;ad6T5ewAAAAJ;;;x942ipYAAAAJ", "orcid": ";;;;;0000-0002-8579-1600", "linkedin": "yuzheng-hu-a74b5823b/;;qilong-wu-461150257/;;lang-yin-813222a0/;", "or_profile": "~Yuzheng_Hu1;~Ruicheng_Xian1;~Qilong_Wu1;qiuling2@illinois.edu;~Lang_Yin1;~Han_Zhao1", "aff": "University of Illinois, Urbana Champaign;University of Illinois Urbana-Champaign;Thomas M. Siebel Center for Computer Science, University of Illinois at Urbana-Champaign;;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "uiuc.edu;illinois.edu;cs.illinois.edu;;illinois.edu;illinois.edu", "position": "PhD student;PhD student;MS student;;MS student;Assistant Professor", "bibtex": "@inproceedings{\nhu2023revisiting,\ntitle={Revisiting Scalarization in Multi-Task Learning: A Theoretical Perspective},\nauthor={Yuzheng Hu and Ruicheng Xian and Qilong Wu and Qiuling Fan and Lang Yin and Han Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6EqUpqMnwl}\n}", "github": "", "project": "", "reviewers": "CFe6;c3Pi;nBcV;2hHW;J52d;aGcP", "pdf_size": 7387960, "rating": "5;5;5;6;6;7", "confidence": "2;3;4;3;2;4", "soundness": "3;3;2;3;3;3", "novelty": "3;2;2;3;3;3", "presentation": "3;3;2;3;3;2", "wc_summary": "51;66;180;87;66;80", "wc_strengths": "70;57;69;79;62;35", "wc_weaknesses": "87;299;533;126;54;213", "wc_questions": "41;251;130;4;4;10", "wc_limitations": "4;51;37;23;1;1", "wc_review": "253;724;949;319;187;339", "wc_reply_reviewers": "38;248;284;129;42;121", "wc_reply_authors": "10;249;908;520;15;500", "reply_reviewers": "2;1;2;2;1;1", "reply_authors": "2;2;4;3;2;2", "rating_avg": [ 5.666666666666667, 0.7453559924999299 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 88.33333333333333, 42.554540167752826 ], "wc_strengths_avg": [ 62.0, 13.880441875771343 ], "wc_weaknesses_avg": [ 218.66666666666666, 162.4445204438187 ], "wc_questions_avg": [ 73.33333333333333, 90.78117768690943 ], "wc_limitations_avg": [ 19.5, 19.302417810557653 ], "wc_review_avg": [ 461.8333333333333, 277.0971049209204 ], "wc_reply_reviewers_avg": [ 143.66666666666666, 93.81660596906902 ], "wc_reply_authors_avg": [ 367.0, 315.9789022492905 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.7637626158259734 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.2738612787525831, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15478113180965535669&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "uiuc.edu;illinois.edu;cs.illinois.edu;;illinois.edu;illinois.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Certified Minimax Unlearning with Generalization Rates and Deletion Capacity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72765", "id": "6H8Md75kAw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c69465280855cfe25d566e359da140c1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6H8Md75kAw", "openreview": "https://openreview.net/forum?id=6H8Md75kAw", "poster": "/media/PosterPDFs/NeurIPS%202023/72765.png?t=1701876023.965834", "slides": "https://nips.cc/virtual/2023/poster/72765", "video": "https://nips.cc/virtual/2023/poster/72765", "author_site": "Jiaqi Liu, Jian Lou, Zhan Qin, Kui Ren", "tldr": "", "abstract": "We study the problem of $(\\epsilon,\\delta)$-certified machine unlearning for minimax models. Most of the existing works focus on unlearning from standard statistical learning models that have a single variable and their unlearning steps hinge on the direct Hessian-based conventional Newton update. We develop a new $(\\epsilon,\\delta)$-certified machine unlearning algorithm for minimax models. It proposes a minimax unlearning step consisting of a total Hessian-based complete Newton update and the Gaussian mechanism borrowed from differential privacy. To obtain the unlearning certification, our method injects calibrated Gaussian noises by carefully analyzing the ''sensitivity'' of the minimax unlearning step (i.e., the closeness between the minimax unlearning variables and the retraining-from-scratch variables). We derive the generalization rates in terms of population strong and weak primal-dual risk for three different cases of loss functions, i.e., (strongly-)convex-(strongly-)concave losses. We also provide the deletion capacity to guarantee that a desired population risk can be maintained as long as the number of deleted samples does not exceed the derived amount. With training samples $n$ and model dimension $d$, it yields the order $\\mathcal O(n/d^{1/4})$, which shows a strict gap over the baseline method of differentially private minimax learning that has $\\mathcal O(n/d^{1/2})$. In addition, our rates of generalization and deletion capacity match the state-of-the-art rates derived previously for standard statistical learning models.", "keywords": "machine unlearning;machin learning privacy;minimax learning;certified removal", "primary_area": "", "supplementary_material": "/attachment/e86ea53f00dd26136d7ad62643932c63422466a4.pdf", "author": "Jiaqi Liu;Jian Lou;Zhan Qin;Kui Ren", "authorids": "~Jiaqi_Liu3;~Jian_Lou2;~Zhan_Qin2;~Kui_Ren4", "gender": "F;;M;M", "homepage": "https://kakiiliu.github.io/;https://sites.google.com/view/jianlou;https://person.zju.edu.cn/en/zhanqin;", "dblp": "51/2773-3;05/4625-1;148/4477;20/6179-1.html", "google_scholar": ";;;https://scholar.google.com/citations?view_op=list_works", "orcid": ";0000-0002-4110-2068;;0000-0003-3441-6277", "linkedin": ";;;", "or_profile": "~Jiaqi_Liu3;~Jian_Lou2;~Zhan_Qin2;~Kui_Ren4", "aff": "Zhejiang University;www.hoiying.net;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;hoiying.net;zju.edu.cn;zju.edu.cn", "position": "PhD student;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023certified,\ntitle={Certified Minimax Unlearning with Generalization Rates and Deletion Capacity},\nauthor={Jiaqi Liu and Jian Lou and Zhan Qin and Kui Ren},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6H8Md75kAw}\n}", "github": "", "project": "", "reviewers": "fuZ1;sP3P;R8EP;DtDD;kPH1", "pdf_size": 581083, "rating": "6;6;6;6;7", "confidence": "4;4;3;4;4", "soundness": "3;3;3;2;4", "novelty": "3;2;3;2;3", "presentation": "4;2;3;3;3", "wc_summary": "108;70;39;68;36", "wc_strengths": "83;21;104;53;118", "wc_weaknesses": "68;185;56;330;103", "wc_questions": "51;1;143;27;196", "wc_limitations": "112;1;1;31;10", "wc_review": "422;278;343;509;463", "wc_reply_reviewers": "13;98;9;42;13", "wc_reply_authors": "0;203;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 64.2, 26.064535292231856 ], "wc_strengths_avg": [ 75.8, 35.0736368231183 ], "wc_weaknesses_avg": [ 148.4, 101.37179094797527 ], "wc_questions_avg": [ 83.6, 73.82574076837969 ], "wc_limitations_avg": [ 31.0, 41.95712096891301 ], "wc_review_avg": [ 403.0, 82.97228452923301 ], "wc_reply_reviewers_avg": [ 35.0, 33.65115154047481 ], "wc_reply_authors_avg": [ 40.6, 81.2 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.25000000000000006, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13334787897666543818&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "zju.edu.cn;hoiying.net;zju.edu.cn;zju.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Zhejiang University;Hoiying Limited", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;http://www.hoiying.net", "aff_unique_abbr": "ZJU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Hierarchical Randomized Smoothing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72764", "id": "6IhNHKyuJO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9c0efc0d84c263972af72bf70a2de533-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6IhNHKyuJO", "openreview": "https://openreview.net/forum?id=6IhNHKyuJO", "poster": "/media/PosterPDFs/NeurIPS%202023/72764.png?t=1701794071.8784325", "slides": "https://nips.cc/virtual/2023/poster/72764", "video": "https://nips.cc/virtual/2023/poster/72764", "author_site": "Yan Scholten, Jan Schuchardt, Aleksandar Bojchevski, Stephan G\u00fcnnemann", "tldr": "", "abstract": "Real-world data is complex and often consists of objects that can be decomposed into multiple entities (e.g. images into pixels, graphs into interconnected nodes). Randomized smoothing is a powerful framework for making models provably robust against small changes to their inputs - by guaranteeing robustness of the majority vote when randomly adding noise before classification. Yet, certifying robustness on such complex data via randomized smoothing is challenging when adversaries do not arbitrarily perturb entire objects (e.g. images) but only a subset of their entities (e.g. pixels). As a solution, we introduce hierarchical randomized smoothing: We partially smooth objects by adding random noise only on a randomly selected subset of their entities. By adding noise in a more targeted manner than existing methods we obtain stronger robustness guarantees while maintaining high accuracy. We initialize hierarchical smoothing using different noising distributions, yielding novel robustness certificates for discrete and continuous domains. We experimentally demonstrate the importance of hierarchical smoothing in image and node classification, where it yields superior robustness-accuracy trade-offs. Overall, hierarchical smoothing is an important contribution towards models that are both - certifiably robust to perturbations and accurate.", "keywords": "Adversarial Robustness;Robustness Certification;Randomized Smoothing;Graph Neural Networks", "primary_area": "", "supplementary_material": "", "author": "Yan Scholten;Jan Schuchardt;Aleksandar Bojchevski;Stephan G\u00fcnnemann", "authorids": "~Yan_Scholten1;~Jan_Schuchardt1;~Aleksandar_Bojchevski1;~Stephan_G\u00fcnnemann1", "gender": ";;M;M", "homepage": ";https://www.cs.cit.tum.de/daml/team/jan-schuchardt/;https://abojchevski.github.io/;http://www.daml.in.tum.de", "dblp": "240/9194;241/5487;203/8114;43/3011", "google_scholar": "8G2bJ7sAAAAJ;O-cixlwAAAAJ;https://scholar.google.de/citations?user=F1APiN4AAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yan_Scholten1;~Jan_Schuchardt1;~Aleksandar_Bojchevski1;~Stephan_G\u00fcnnemann1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Department of Informatics, Technical University Munich;CISPA Helmholtz Center for Information Security;Technical University Munich", "aff_domain": "tum.de;in.tum.de;cispa.de;tum.de", "position": "PhD student;PhD student;Principal Researcher;Professor", "bibtex": "@inproceedings{\nscholten2023hierarchical,\ntitle={Hierarchical Randomized Smoothing},\nauthor={Yan Scholten and Jan Schuchardt and Aleksandar Bojchevski and Stephan G{\\\"u}nnemann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6IhNHKyuJO}\n}", "github": "", "project": "", "reviewers": "u4ou;BgsW;FfTs;7yDS", "pdf_size": 827717, "rating": "4;6;6;7", "confidence": "2;4;4;4", "soundness": "2;2;3;4", "novelty": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "62;87;129;210", "wc_strengths": "44;61;87;16", "wc_weaknesses": "159;360;3;73", "wc_questions": "59;1;161;61", "wc_limitations": "15;1;1;10", "wc_review": "339;510;381;370", "wc_reply_reviewers": "74;293;74;93", "wc_reply_authors": "122;0;76;257", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 122.0, 56.16493568054717 ], "wc_strengths_avg": [ 52.0, 25.816661286851172 ], "wc_weaknesses_avg": [ 148.75, 133.89618179768982 ], "wc_questions_avg": [ 70.5, 57.53911712913225 ], "wc_limitations_avg": [ 6.75, 6.015604707757983 ], "wc_review_avg": [ 400.0, 65.3490627323759 ], "wc_reply_reviewers_avg": [ 133.5, 92.41347304370721 ], "wc_reply_authors_avg": [ 113.75, 93.47827287664231 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18248522868449644988&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "tum.de;in.tum.de;cispa.de;tum.de", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Technical University Munich;CISPA Helmholtz Center for Information Security;Technical University of Munich", "aff_unique_dep": ";Department of Informatics;;", "aff_unique_url": "https://www.tum.de;https://www.tum.de;https://www.cispa.de/;https://www.tum.de", "aff_unique_abbr": "TUM;TUM;CISPA;TUM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Munich", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Learning World Models with Identifiable Factorization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72763", "id": "6JJq5TW9Mc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/65496a4902252d301cdf219339bfbf9e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6JJq5TW9Mc", "openreview": "https://openreview.net/forum?id=6JJq5TW9Mc", "poster": "/media/PosterPDFs/NeurIPS%202023/72763.png?t=1701773700.9060037", "slides": "https://nips.cc/virtual/2023/poster/72763", "video": "https://nips.cc/virtual/2023/poster/72763", "author_site": "Yuren Liu, Biwei Huang, Zhengmao Zhu, Honglong Tian, Mingming Gong, Yang Yu, Kun Zhang", "tldr": "", "abstract": "Extracting a stable and compact representation of the environment is crucial for efficient reinforcement learning in high-dimensional, noisy, and non-stationary environments. Different categories of information coexist in such environments -- how to effectively extract and disentangle the information remains a challenging problem. In this paper, we propose IFactor, a general framework to model four distinct categories of latent state variables that capture various aspects of information within the RL system, based on their interactions with actions and rewards. Our analysis establishes block-wise identifiability of these latent variables, which not only provides a stable and compact representation but also discloses that all reward-relevant factors are significant for policy learning. We further present a practical approach to learning the world model with identifiable blocks, ensuring the removal of redundancies but retaining minimal and sufficient information for policy optimization. Experiments in synthetic worlds demonstrate that our method accurately identifies the ground-truth latent variables, substantiating our theoretical findings. Moreover, experiments in variants of the DeepMind Control Suite and RoboDesk showcase the superior performance of our approach over baselines.", "keywords": "Model-based Reinforcement Learning; Causal Representation Learning;", "primary_area": "", "supplementary_material": "/attachment/bf321124fea11860e6697582db481ce2fe57a55c.zip", "author": "Yu-Ren Liu;Biwei Huang;Zhengmao Zhu;Honglong Tian;Mingming Gong;Yang Yu;Kun Zhang", "authorids": "~Yu-Ren_Liu1;~Biwei_Huang1;~Zhengmao_Zhu1;~Honglong_Tian1;~Mingming_Gong1;~Yang_Yu5;~Kun_Zhang1", "gender": "M;F;M;M;M;M;M", "homepage": "http://lamda.nju.edu.cn/liuyr/;;https://lamda.nju.edu.cn/zhuzm;;https://mingming-gong.github.io/;http://www.andrew.cmu.edu/user/kunz1/;http://www.lamda.nju.edu.cn/yuy", "dblp": "213/7470;165/3288;;321/9889.html;98/8479;96/3115-1;46/2181-1", "google_scholar": "GAgJIbQAAAAJ;;;wBA05dAAAAAJ;https://scholar.google.com.au/citations?user=6BmiCJIAAAAJ;RGoypN4AAAAJ;PG2lDSwAAAAJ", "orcid": ";;;0000-0001-7137-1296;0000-0001-7147-5589;;", "linkedin": "yu-ren-liu-400355152/;;;;;;", "or_profile": "~Yu-Ren_Liu1;~Biwei_Huang1;~Zhengmao_Zhu1;~Honglong_Tian1;~Mingming_Gong1;~Kun_Zhang1;~Yang_Yu2", "aff": "Nanjing University;University of California, San Diego;Nanjing University;Nanjing University;University of Melbourne;Carnegie Mellon University;Nanjing University", "aff_domain": "nju.edu.cn;ucsd.edu;nju.edu.cn;nju.edu.cn;unimelb.edu.au;cmu.edu;nju.edu.cn", "position": "PhD student;Assistant Professor;PhD student;MS student;Assistant Professor;Associate Professor;Professor", "bibtex": "@inproceedings{\nliu2023learning,\ntitle={Learning World Models with Identifiable Factorization},\nauthor={Yu-Ren Liu and Biwei Huang and Zhengmao Zhu and Honglong Tian and Mingming Gong and Yang Yu and Kun Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6JJq5TW9Mc}\n}", "github": "", "project": "", "reviewers": "oFQM;qxcU;gMe8", "pdf_size": 6415371, "rating": "6;6;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "145;95;70", "wc_strengths": "79;142;88", "wc_weaknesses": "58;149;22", "wc_questions": "326;87;111", "wc_limitations": "24;6;123", "wc_review": "632;479;414", "wc_reply_reviewers": "118;18;24", "wc_reply_authors": "277;100;28", "reply_reviewers": "2;1;1", "reply_authors": "3;3;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 103.33333333333333, 31.18047822311618 ], "wc_strengths_avg": [ 103.0, 27.820855486487112 ], "wc_weaknesses_avg": [ 76.33333333333333, 53.443635937520405 ], "wc_questions_avg": [ 174.66666666666666, 107.45645112737014 ], "wc_limitations_avg": [ 51.0, 51.43928459844674 ], "wc_review_avg": [ 508.3333333333333, 91.38319806665166 ], "wc_reply_reviewers_avg": [ 53.333333333333336, 45.79179936286505 ], "wc_reply_authors_avg": [ 135.0, 104.62313319720452 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7889141643311965496&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "nju.edu.cn;ucsd.edu;nju.edu.cn;nju.edu.cn;unimelb.edu.au;cmu.edu;nju.edu.cn", "author_num": 7, "aff_unique_index": "0;1;0;0;2;3;0", "aff_unique_norm": "Nanjing University;University of California, San Diego;University of Melbourne;Carnegie Mellon University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nju.edu.cn;https://www.ucsd.edu;https://www.unimelb.edu.au;https://www.cmu.edu", "aff_unique_abbr": "Nanjing U;UCSD;UniMelb;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;1;0;0;2;1;0", "aff_country_unique": "China;United States;Australia" }, { "title": "Polyhedron Attention Module: Learning Adaptive-order Interactions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72762", "id": "6JrckqCxtl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1d83ad88759cef8192451543e5d59bf6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6JrckqCxtl", "openreview": "https://openreview.net/forum?id=6JrckqCxtl", "poster": "/media/PosterPDFs/NeurIPS%202023/72762.png?t=1701377029.2023757", "slides": "https://nips.cc/virtual/2023/poster/72762", "video": "https://nips.cc/virtual/2023/poster/72762", "author_site": "Tan Zhu, Fei Dou, Xinyu Wang, Jin Lu, Jinbo Bi", "tldr": "", "abstract": "Learning feature interactions can be the key for multivariate predictive modeling. ReLU-activated neural networks create piecewise linear prediction models, and other nonlinear activation functions lead to models with only high-order feature interactions. Recent methods incorporate candidate polynomial terms of fixed orders into deep learning, which is subject to the issue of combinatorial explosion, or learn the orders that are difficult to adapt to different regions of the feature space. We propose a Polyhedron Attention Module (PAM) to create piecewise polynomial models where the input space is split into polyhedrons which define the different pieces and on each piece the hyperplanes that define the polyhedron boundary multiply to form the interactive terms, resulting in interactions of adaptive order to each piece. PAM is interpretable to identify important interactions in predicting a target. Theoretic analysis shows that PAM has stronger expression capability than ReLU-activated networks. Extensive experimental results demonstrate the superior classification performance of PAM on massive datasets of the click-through rate prediction and PAM can learn meaningful interaction effects in a medical problem.", "keywords": "Feature interaction modeling;model interpretation framework;adptive-order interaction;piece-wise polynomial", "primary_area": "", "supplementary_material": "/attachment/1f13b6e68ecd99a1c61384fb77f5464f221ae8ff.pdf", "author": "Tan Zhu;Fei Dou;Xinyu Wang;Jin Lu;Jinbo Bi", "authorids": "~Tan_Zhu1;~Fei_Dou1;~Xinyu_Wang8;~Jin_Lu1;~Jinbo_Bi1", "gender": ";;;M;F", "homepage": "http://tanzhu.info;;;https://jinlucs.github.io/;https://jinbo-bi.uconn.edu/", "dblp": "170/5347;;;33/863-1.html;26/3430", "google_scholar": "n4wTgx4AAAAJ;;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0003-1356-0202;0000-0001-6996-4092", "linkedin": ";;;;", "or_profile": "~Tan_Zhu1;~Fei_Dou1;~Xinyu_Wang8;~Jin_Lu1;~Jinbo_Bi1", "aff": "University of Connecticut;;;University of Michigan;University of Connecticut", "aff_domain": "uconn.edu;;;umich.edu;uconn.edu", "position": "PhD student;;;Assistant Professor;Professor", "bibtex": "@inproceedings{\nzhu2023polyhedron,\ntitle={Polyhedron Attention Module: Learning Adaptive-order Interactions},\nauthor={Tan Zhu and Fei Dou and Xinyu Wang and Jin Lu and Jinbo Bi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6JrckqCxtl}\n}", "github": "", "project": "", "reviewers": "wPpx;8xSi;61Ki;fykg", "pdf_size": 987683, "rating": "6;6;6;8", "confidence": "4;3;3;3", "soundness": "2;3;3;4", "novelty": "2;3;2;3", "presentation": "2;3;2;3", "wc_summary": "73;96;25;196", "wc_strengths": "29;45;21;69", "wc_weaknesses": "37;63;52;6", "wc_questions": "218;1;29;7", "wc_limitations": "8;1;5;6", "wc_review": "365;206;132;284", "wc_reply_reviewers": "19;18;0;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 97.5, 62.37186865887537 ], "wc_strengths_avg": [ 41.0, 18.33030277982336 ], "wc_weaknesses_avg": [ 39.5, 21.43011899173684 ], "wc_questions_avg": [ 63.75, 89.6643044918099 ], "wc_limitations_avg": [ 5.0, 2.5495097567963922 ], "wc_review_avg": [ 246.75, 86.88893773087572 ], "wc_reply_reviewers_avg": [ 14.25, 8.257572282456872 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Kh8uRLlDKoYJ:scholar.google.com/&scioq=Polyhedron+Attention+Module:+Learning+Adaptive-order+Interactions&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "uconn.edu;;;umich.edu;uconn.edu", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Connecticut;University of Michigan", "aff_unique_dep": ";", "aff_unique_url": "https://www.uconn.edu;https://www.umich.edu", "aff_unique_abbr": "UConn;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Decision-Aware Actor-Critic with Function Approximation and Theoretical Guarantees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72761", "id": "6MQ5cheYDZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d18d208fa9c333483e5724ade7beff0f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6MQ5cheYDZ", "openreview": "https://openreview.net/forum?id=6MQ5cheYDZ", "poster": "/media/PosterPDFs/NeurIPS%202023/72761.png?t=1702054867.8609657", "slides": "https://nips.cc/virtual/2023/poster/72761", "video": "https://nips.cc/virtual/2023/poster/72761", "author_site": "Sharan Vaswani, Amirreza Kazemi, Reza Babanezhad Harikandeh, Nicolas Le Roux", "tldr": "", "abstract": "Actor-critic (AC) methods are widely used in reinforcement learning (RL), and benefit from the flexibility of using any policy gradient method as the actor and value-based method as the critic. The critic is usually trained by minimizing the TD error, an objective that is potentially decorrelated with the true goal of achieving a high reward with the actor. We address this mismatch by designing a joint objective for training the actor and critic in a decision-aware fashion. We use the proposed objective to design a generic, AC algorithm that can easily handle any function approximation. We explicitly characterize the conditions under which the resulting algorithm guarantees monotonic policy improvement, regardless of the choice of the policy and critic parameterization. Instantiating the generic algorithm results in an actor that involves maximizing a sequence of surrogate functions (similar to TRPO, PPO), and a critic that involves minimizing a closely connected objective. Using simple bandit examples, we provably establish the benefit of the proposed critic objective over the standard squared error. Finally, we empirically demonstrate the benefit of our decision-aware actor-critic framework on simple RL problems.", "keywords": "Decision-aware reinforcement learning;Actor-Critic algorithm;Off-policy updates;General function approximation;Theoretical guarantees", "primary_area": "", "supplementary_material": "/attachment/d03a67dbe7e13fe3ee44dbcb28392c87c40cca31.zip", "author": "Sharan Vaswani;Amirreza Kazemi;Reza Babanezhad Harikandeh;Nicolas Le Roux", "authorids": "~Sharan_Vaswani1;~Amirreza_Kazemi1;~Reza_Babanezhad_Harikandeh1;~Nicolas_Le_Roux2", "gender": "M;M;M;M", "homepage": "http://vaswanis.github.io;;http://babanezhad.ca;http://nicolas.le-roux.name", "dblp": "136/5916;325/2030;37/8904.html;http://dblp.uni-trier.de/pers/hd/r/Roux:Nicolas_Le", "google_scholar": "https://scholar.google.ca/citations?user=bDb2zWwAAAAJ;ni-AkxIAAAAJ;KLrwPsgAAAAJ;https://scholar.google.fr/citations?user=LmKtwk8AAAAJ", "orcid": ";;;", "linkedin": "sharan-vaswani-05b8ab35/;;;", "or_profile": "~Sharan_Vaswani1;~Amirreza_Kazemi1;~Reza_Babanezhad_Harikandeh1;~Nicolas_Le_Roux1", "aff": "Simon Fraser University;Simon Fraser University;Samsung;Microsoft", "aff_domain": "sfu.ca;sfu.ca;samsung.com;microsoft.com", "position": "Assistant Professor;MS student;Research Scientist;Researcher", "bibtex": "@inproceedings{\nvaswani2023decisionaware,\ntitle={Decision-Aware Actor-Critic with Function Approximation and Theoretical Guarantees},\nauthor={Sharan Vaswani and Amirreza Kazemi and Reza Babanezhad Harikandeh and Nicolas Le Roux},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6MQ5cheYDZ}\n}", "github": "", "project": "", "reviewers": "uLAb;odRi;os4C;NBGk", "pdf_size": 2656327, "rating": "3;7;7;7", "confidence": "4;2;3;3", "soundness": "2;3;4;3", "novelty": "2;3;3;4", "presentation": "3;3;4;4", "wc_summary": "22;42;138;65", "wc_strengths": "29;15;46;47", "wc_weaknesses": "670;20;87;61", "wc_questions": "8;135;52;1", "wc_limitations": "74;26;3;1", "wc_review": "803;238;326;175", "wc_reply_reviewers": "224;238;114;17", "wc_reply_authors": "685;618;100;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 1.7320508075688772 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 66.75, 43.859862060886606 ], "wc_strengths_avg": [ 34.25, 13.216939887886303 ], "wc_weaknesses_avg": [ 209.5, 266.94053644959956 ], "wc_questions_avg": [ 49.0, 53.36197147782304 ], "wc_limitations_avg": [ 26.0, 29.402380855978315 ], "wc_review_avg": [ 385.5, 246.93774519096914 ], "wc_reply_reviewers_avg": [ 148.25, 89.7116909884102 ], "wc_reply_authors_avg": [ 350.75, 303.74609050982036 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9530909278011937011&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "sfu.ca;sfu.ca;samsung.com;microsoft.com", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Simon Fraser University;Samsung;Microsoft", "aff_unique_dep": ";Samsung;Microsoft Corporation", "aff_unique_url": "https://www.sfu.ca;https://www.samsung.com;https://www.microsoft.com", "aff_unique_abbr": "SFU;Samsung;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "Canada;South Korea;United States" }, { "title": "TempME: Towards the Explainability of Temporal Graph Neural Networks via Motif Discovery", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72760", "id": "6OOgw4boZI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5c5bc3553815adb4d1a8a5b8701e41a9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6OOgw4boZI", "openreview": "https://openreview.net/forum?id=6OOgw4boZI", "poster": "/media/PosterPDFs/NeurIPS%202023/72760.png?t=1699600487.218721", "slides": "https://nips.cc/virtual/2023/poster/72760", "video": "https://nips.cc/virtual/2023/poster/72760", "author_site": "Jialin Chen, Rex Ying", "tldr": "", "abstract": "Temporal graphs are widely used to model dynamic systems with time-varying interactions. In real-world scenarios, the underlying mechanisms of generating future interactions in dynamic systems are typically governed by a set of recurring substructures within the graph, known as temporal motifs. Despite the success and prevalence of current temporal graph neural networks (TGNN), it remains uncertain which temporal motifs are recognized as the significant indications that trigger a certain prediction from the model, which is a critical challenge for advancing the explainability and trustworthiness of current TGNNs. To address this challenge, we propose a novel approach, called **Temp**oral **M**otifs **E**xplainer (**TempME**), which uncovers the most pivotal temporal motifs guiding the prediction of TGNNs. Derived from the information bottleneck principle, TempME extracts the most interaction-related motifs while minimizing the amount of contained information to preserve the sparsity and succinctness of the explanation. Events in the explanations generated by TempME are verified to be more spatiotemporally correlated than those of existing approaches, providing more understandable insights. Extensive experiments validate the superiority of TempME, with up to 8.21% increase in terms of explanation accuracy across six real-world datasets and up to 22.96% increase in boosting the prediction Average Precision of current TGNNs.", "keywords": "Explainability;Temporal Graph Neural Network", "primary_area": "", "supplementary_material": "", "author": "Jialin Chen;Zhitao Ying", "authorids": "~Jialin_Chen2;~Zhitao_Ying1", "gender": "F;M", "homepage": "https://github.com/Cather-learner;https://www.cs.yale.edu/homes/ying-rex", "dblp": ";209/4936", "google_scholar": "rHyMKPYAAAAJ;6fqNXooAAAAJ", "orcid": "0009-0007-0909-4620;", "linkedin": ";rex-ying-92770148/", "or_profile": "~Jialin_Chen2;~Zhitao_Ying1", "aff": "Yale University;Yale University", "aff_domain": "yale.edu;yale.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchen2023tempme,\ntitle={Temp{ME}: Towards the Explainability of Temporal Graph Neural Networks via Motif Discovery},\nauthor={Jialin Chen and Zhitao Ying},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6OOgw4boZI}\n}", "github": "", "project": "", "reviewers": "M5gq;U22B;HiZ7;uwpb;NnW1", "pdf_size": 1123186, "rating": "5;6;6;6;7", "confidence": "4;4;4;3;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;4;3;3", "wc_summary": "65;49;181;95;18", "wc_strengths": "45;43;68;85;15", "wc_weaknesses": "52;236;375;119;240", "wc_questions": "229;149;171;5;254", "wc_limitations": "32;26;10;32;29", "wc_review": "423;503;805;336;556", "wc_reply_reviewers": "215;23;12;100;8", "wc_reply_authors": "435;0;0;323;0", "reply_reviewers": "2;1;1;2;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 81.6, 55.5755341854669 ], "wc_strengths_avg": [ 51.2, 23.83610706470333 ], "wc_weaknesses_avg": [ 204.4, 111.25574142488108 ], "wc_questions_avg": [ 161.6, 87.00712614493136 ], "wc_limitations_avg": [ 25.8, 8.207313811473277 ], "wc_review_avg": [ 524.6, 158.72567530176082 ], "wc_reply_reviewers_avg": [ 71.6, 79.15705906613762 ], "wc_reply_authors_avg": [ 151.6, 189.01915246873793 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6454972243679028, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10811037302909226119&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "yale.edu;yale.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Probabilistic Weight Fixing: Large-scale training of neural network weight uncertainties for quantisation.", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72759", "id": "6Odmtoek02", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ba178fab60f9306a0b2d7ec8973715a6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6Odmtoek02", "openreview": "https://openreview.net/forum?id=6Odmtoek02", "poster": "/media/PosterPDFs/NeurIPS%202023/72759.png?t=1696363758.8738964", "slides": "https://nips.cc/virtual/2023/poster/72759", "video": "https://nips.cc/virtual/2023/poster/72759", "author_site": "Chris Subia-Waud, Srinandan Dasmahapatra", "tldr": "", "abstract": "Weight-sharing quantization has emerged as a technique to reduce energy expenditure during inference in large neural networks by constraining their weights to a limited set of values. However, existing methods often assume weights are treated solely based on value, neglecting the unique role of weight position. This paper proposes a probabilistic framework based on Bayesian neural networks (BNNs) and a variational relaxation to identify which weights can be moved to which cluster center and to what degree based on their individual position-specific learned uncertainty distributions. We introduce a new initialization setting and a regularization term, enabling the training of BNNs with complex dataset-model combinations. Leveraging the flexibility of weight values from probability distributions, we enhance noise resilience and compressibility. Our iterative clustering procedure demonstrates superior compressibility and higher accuracy compared to state-of-the-art methods on both ResNet models and the more complex transformer-based architectures. In particular, our method outperforms the state-of-the-art quantization method top-1 accuracy by 1.6\\% on ImageNet using DeiT-Tiny, with its 5 million+ weights now represented by only 296 unique values. Code available at https://github.com/subiawaud/PWFN.", "keywords": "Quantization;compression;bayesian neural networks;accelerators", "primary_area": "", "supplementary_material": "/attachment/9032c7f2e000d36b4b1bb11aa20be9bde6ea96cc.pdf", "author": "Chris Subia-Waud;Srinandan Dasmahapatra", "authorids": "~Chris_Subia-Waud1;~Srinandan_Dasmahapatra1", "gender": "M;", "homepage": ";", "dblp": ";64/5025", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=4FVYygkAAAAJ", "orcid": ";", "linkedin": "https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=&cad=rja&uact=8&ved=2ahUKEwj8hp-59YLzAhV95-AKHboQADQQFnoECCgQAQ&url=https%3A%2F%2Fuk.linkedin.com%2Fin%2Fchris-subia-waud&usg=AOvVaw3fubacqImpPQuXMtcRciXH;", "or_profile": "~Chris_Subia-Waud1;~Srinandan_Dasmahapatra1", "aff": "University of Southampton;University of Southampton", "aff_domain": "soton.ac.uk;soton.ac.uk", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nsubia-waud2023probabilistic,\ntitle={Probabilistic Weight Fixing: Large-scale training of neural network weight uncertainties for quantisation.},\nauthor={Chris Subia-Waud and Srinandan Dasmahapatra},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6Odmtoek02}\n}", "github": "", "project": "", "reviewers": "jtUH;1QKt;TzYT;tpmz;NcTB", "pdf_size": 1404299, "rating": "5;5;5;6;7", "confidence": "4;2;4;3;3", "soundness": "3;3;2;2;3", "novelty": "2;2;2;3;3", "presentation": "2;3;2;2;3", "wc_summary": "101;93;126;61;46", "wc_strengths": "73;71;38;47;35", "wc_weaknesses": "205;85;66;40;87", "wc_questions": "119;2;2;31;4", "wc_limitations": "4;1;2;16;38", "wc_review": "502;252;234;195;210", "wc_reply_reviewers": "0;0;0;42;14", "wc_reply_authors": "0;0;0;23;0", "reply_reviewers": "0;0;0;1;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 85.4, 28.62586243242289 ], "wc_strengths_avg": [ 52.8, 16.17899873292535 ], "wc_weaknesses_avg": [ 96.6, 56.775346762481334 ], "wc_questions_avg": [ 31.6, 45.06262309275838 ], "wc_limitations_avg": [ 12.2, 13.977124167724918 ], "wc_review_avg": [ 278.6, 113.40123456118104 ], "wc_reply_reviewers_avg": [ 11.2, 16.32666530556684 ], "wc_reply_authors_avg": [ 4.6, 9.2 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.2004459314343183, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11809357543424847243&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "soton.ac.uk;soton.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Southampton", "aff_unique_dep": "", "aff_unique_url": "https://www.southampton.ac.uk", "aff_unique_abbr": "Southampton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "ComSL: A Composite Speech-Language Model for End-to-End Speech-to-Text Translation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72758", "id": "6Qx7G1xrAk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b6262f7a34e5d641cdb3d33dc9ad1a5a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6Qx7G1xrAk", "openreview": "https://openreview.net/forum?id=6Qx7G1xrAk", "poster": "/media/PosterPDFs/NeurIPS%202023/72758.png?t=1699499560.6536694", "slides": "https://nips.cc/virtual/2023/poster/72758", "video": "https://nips.cc/virtual/2023/poster/72758", "author_site": "Chenyang Le, Yao Qian, Long Zhou, Shujie LIU, Yanmin Qian, Michael Zeng, Xuedong Huang", "tldr": "", "abstract": "Joint speech-language training is challenging due to the large demand for training data and GPU consumption, as well as the modality gap between speech and language. We present ComSL, a speech-language model built atop a composite architecture of public pre-trained speech-only and language-only models and optimized data-efficiently for spoken language tasks. Particularly, we propose to incorporate cross-modality learning into transfer learning and conduct them simultaneously for downstream tasks in a multi-task learning manner. Our approach has demonstrated effectiveness in end-to-end speech-to-text translation tasks, achieving a new state-of-the-art average BLEU score of 31.5 on the multilingual speech to English text translation task for 21 languages, as measured on the public CoVoST2 evaluation set.", "keywords": "end-to-end speech to text translation;cross-modality learning;joint speech and language training", "primary_area": "", "supplementary_material": "/attachment/8593af75abd361f85897209921fbcbd2d5d5f21e.zip", "author": "Chenyang Le;Yao Qian;Long Zhou;Shujie LIU;Yanmin Qian;Michael Zeng;Xuedong Huang", "authorids": "~Chenyang_Le2;~Yao_Qian2;~Long_Zhou2;~Shujie_LIU1;~Yanmin_Qian1;~Michael_Zeng1;~Xuedong_Huang1", "gender": "F;;M;M;M;M;M", "homepage": "https://www.microsoft.com/en-us/research/people/yaoqian/;;https://www.microsoft.com/en-us/research/people/shujliu/;https://x-lance.sjtu.edu.cn/en/members/yanmin-qian;https://www.microsoft.com/en-us/research/people/nzeng/;;https://github.com/nethermanpro", "dblp": ";;;07/8638;232/1866-1.html;41/4753;301/7724", "google_scholar": "o7OfErXuEJIC;ZnwgSXIAAAAJ;6mNya-wAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;", "orcid": ";;0009-0008-2599-6752;;;;", "linkedin": ";;;;michaelnanshanzeng/;;", "or_profile": "~Yao_Qian2;~Long_Zhou2;~Shujie_LIU1;~Yanmin_Qian1;~Michael_Zeng1;~Xuedong_Huang1;~chenyang_le1", "aff": "Microsoft;Microsoft Research Asia;Microsoft;Shanghai Jiaotong University;Microsoft;;Shanghai Jiaotong University", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;sjtu.edu.cn;microsoft.com;;sjtu.edu.cn", "position": "Principal Researcher;Researcher;Researcher;Full Professor;Vice President Research Manager;;Undergrad student", "bibtex": "@inproceedings{\nle2023comsl,\ntitle={Com{SL}: A Composite Speech-Language Model for End-to-End Speech-to-Text Translation},\nauthor={Chenyang Le and Yao Qian and Long Zhou and Shujie LIU and Yanmin Qian and Michael Zeng and Xuedong Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6Qx7G1xrAk}\n}", "github": "", "project": "", "reviewers": "nRjc;7Di5;KhJu;tyJC", "pdf_size": 3846719, "rating": "6;6;6;6", "confidence": "5;4;5;4", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;2", "wc_summary": "58;60;74;131", "wc_strengths": "25;42;33;51", "wc_weaknesses": "84;83;259;112", "wc_questions": "100;2;32;163", "wc_limitations": "18;2;8;26", "wc_review": "285;189;406;483", "wc_reply_reviewers": "20;14;16;0", "wc_reply_authors": "20;39;17;93", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.75, 29.65952629426168 ], "wc_strengths_avg": [ 37.75, 9.730750228014282 ], "wc_weaknesses_avg": [ 134.5, 72.81655031653175 ], "wc_questions_avg": [ 74.25, 62.3392933870765 ], "wc_limitations_avg": [ 13.5, 9.205976319760984 ], "wc_review_avg": [ 340.75, 112.50416658950903 ], "wc_reply_reviewers_avg": [ 12.5, 7.533259586659682 ], "wc_reply_authors_avg": [ 42.25, 30.49077729412617 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5691198822064456475&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "microsoft.com;microsoft.com;microsoft.com;sjtu.edu.cn;microsoft.com;;sjtu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;0;1", "aff_unique_norm": "Microsoft;Shanghai Jiao Tong University", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.sjtu.edu.cn", "aff_unique_abbr": "Microsoft;SJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0;1;0;1", "aff_country_unique": "United States;China" }, { "title": "Robust Lipschitz Bandits to Adversarial Corruptions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72757", "id": "6RiqluMFNz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/238f3b98bbe998b4f2234443907fe663-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6RiqluMFNz", "openreview": "https://openreview.net/forum?id=6RiqluMFNz", "poster": "/media/PosterPDFs/NeurIPS%202023/72757.png?t=1701300672.637396", "slides": "https://nips.cc/virtual/2023/poster/72757", "video": "https://nips.cc/virtual/2023/poster/72757", "author_site": "Yue Kang, Cho-Jui Hsieh, Thomas Chun Man Lee", "tldr": "", "abstract": "Lipschitz bandit is a variant of stochastic bandits that deals with a continuous arm set defined on a metric space, where the reward function is subject to a Lipschitz constraint. In this paper, we introduce a new problem of Lipschitz bandits in the presence of adversarial corruptions where an adaptive adversary corrupts the stochastic rewards up to a total budget $C$. The budget is measured by the sum of corruption levels across the time horizon $T$. We consider both weak and strong adversaries, where the weak adversary is unaware of the current action before the attack, while the strong one can observe it. Our work presents the first line of robust Lipschitz bandit algorithms that can achieve sub-linear regret under both types of adversary, even when the total budget of corruption $C$ is unrevealed to the agent. We provide a lower bound under each type of adversary, and show that our algorithm is optimal under the strong case. Finally, we conduct experiments to illustrate the effectiveness of our algorithms against two classic kinds of attacks.", "keywords": "bandits", "primary_area": "", "supplementary_material": "/attachment/186d8445ee6bfe7e47e22499a51691cfff0e2c76.pdf", "author": "Yue Kang;Cho-Jui Hsieh;Thomas Chun Man Lee", "authorids": "~Yue_Kang1;~Cho-Jui_Hsieh1;~Thomas_Chun_Man_Lee1", "gender": "M;M;", "homepage": ";http://web.cs.ucla.edu/~chohsieh/index.html;", "dblp": "135/9726-2;14/2770;", "google_scholar": ";Wy89g4IAAAAJ;", "orcid": ";;", "linkedin": "yue-kang-b52063158/;;", "or_profile": "~Yue_Kang1;~Cho-Jui_Hsieh1;~Thomas_Chun_Man_Lee1", "aff": "University of California, Davis;Amazon;", "aff_domain": "ucdavis.edu;amazon.com;", "position": "PhD student;visiting scholar;", "bibtex": "@inproceedings{\nkang2023robust,\ntitle={Robust Lipschitz Bandits to Adversarial Corruptions},\nauthor={Yue Kang and Cho-Jui Hsieh and Thomas Chun Man Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6RiqluMFNz}\n}", "github": "", "project": "", "reviewers": "4cuU;Mz32;Vfr6;ecqm;DQs2", "pdf_size": 423743, "rating": "5;5;6;7;7", "confidence": "3;3;3;5;5", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "2;3;3;3;4", "wc_summary": "85;116;90;274;132", "wc_strengths": "39;25;144;145;86", "wc_weaknesses": "47;64;94;559;31", "wc_questions": "79;20;1;438;24", "wc_limitations": "5;10;1;5;1", "wc_review": "255;235;330;1421;274", "wc_reply_reviewers": "13;15;27;227;0", "wc_reply_authors": "621;33;11;1535;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "3;2;2;4;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 139.4, 69.45386958262297 ], "wc_strengths_avg": [ 87.8, 50.514948282661834 ], "wc_weaknesses_avg": [ 159.0, 201.08605123180473 ], "wc_questions_avg": [ 112.4, 164.8594552945023 ], "wc_limitations_avg": [ 4.4, 3.32264954516723 ], "wc_review_avg": [ 503.0, 460.09172998435866 ], "wc_reply_reviewers_avg": [ 56.4, 85.72887494887588 ], "wc_reply_authors_avg": [ 440.0, 595.8315198107599 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9128709291752769, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7967450423845167472&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ucdavis.edu;amazon.com;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of California, Davis;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.ucdavis.edu;https://www.amazon.com", "aff_unique_abbr": "UC Davis;Amazon", "aff_campus_unique_index": "0", "aff_campus_unique": "Davis;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Preference-grounded Token-level Guidance for Language Model Fine-tuning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72756", "id": "6SRE9GZ9s6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4d4a3b6a34332d80349137bcc98164a5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6SRE9GZ9s6", "openreview": "https://openreview.net/forum?id=6SRE9GZ9s6", "poster": "/media/PosterPDFs/NeurIPS%202023/72756.png?t=1699740784.5653872", "slides": "https://nips.cc/virtual/2023/poster/72756", "video": "https://nips.cc/virtual/2023/poster/72756", "author_site": "Shentao Yang, Shujian Zhang, Congying Xia, Yihao Feng, Caiming Xiong, Mingyuan Zhou", "tldr": "", "abstract": "Aligning language models (LMs) with preferences is an important problem in natural language generation. A key challenge is that preferences are typically provided at the *sequence level* while LM training and generation both occur at the *token level*. There is, therefore, a *granularity mismatch* between the preference and the LM training losses, which may complicate the learning problem. In this paper, we address this issue by developing an alternate training process, where we iterate between grounding the sequence-level preference into token-level training guidance, and improving the LM with the learned guidance. For guidance learning, we design a framework that extends the pairwise-preference learning in imitation learning to both variable-length LM generation and the utilization of the preference among multiple generations. For LM training, based on the amount of supervised data, we present two *minimalist* learning objectives that utilize the learned guidance. In experiments, our method performs competitively on two distinct representative LM tasks --- discrete-prompt generation and text summarization.", "keywords": "Preference Learning;Training Guidance Learning;Language Model Fine-tuning;Text Sequence Generation", "primary_area": "", "supplementary_material": "/attachment/8b2a4600eb145a6560bdde82e13fd2ef15ed6114.pdf", "author": "Shentao Yang;Shujian Zhang;Congying Xia;Yihao Feng;Caiming Xiong;Mingyuan Zhou", "authorids": "~Shentao_Yang1;~Shujian_Zhang1;~Congying_Xia1;~Yihao_Feng1;~Caiming_Xiong1;~Mingyuan_Zhou1", "gender": "M;;F;M;M;M", "homepage": ";https://www.utexas.edu/;;;http://cmxiong.com/;http://mingyuanzhou.github.io", "dblp": ";84/3190.html;210/2265;204/3696;80/7282;", "google_scholar": "https://scholar.google.com/citations?hl=en;7RmLVQkAAAAJ;gJDablUAAAAJ;uqnNle0AAAAJ;vaSdahkAAAAJ;LXwCIisAAAAJ", "orcid": "0009-0009-8058-3149;;;;;", "linkedin": "shentaoyang/;;;;caiming-xiong-150a1417;", "or_profile": "~Shentao_Yang1;~Shujian_Zhang1;~Congying_Xia1;~Yihao_Feng1;~Caiming_Xiong1;~Mingyuan_Zhou1", "aff": "University of Texas at Austin;University of Texas, Austin;SalesForce.com;Salesforce AI Research;Salesforce Research;Google", "aff_domain": "utexas.edu;utexas.edu;salesforce.com;salesforce.com;salesforce.com;google.com", "position": "PhD student;PhD student;Researcher;Researcher;Research Scientist;Researcher", "bibtex": "@inproceedings{\nyang2023preferencegrounded,\ntitle={Preference-grounded Token-level Guidance for Language Model Fine-tuning},\nauthor={Shentao Yang and Shujian Zhang and Congying Xia and Yihao Feng and Caiming Xiong and Mingyuan Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6SRE9GZ9s6}\n}", "github": "", "project": "", "reviewers": "wRKD;XL7b;9EYo;gmdH;xDED", "pdf_size": 979416, "rating": "4;5;6;7;7", "confidence": "4;4;3;4;4", "soundness": "2;3;3;3;3", "novelty": "3;2;3;3;2", "presentation": "2;3;2;3;3", "wc_summary": "45;101;53;176;91", "wc_strengths": "50;52;56;92;57", "wc_weaknesses": "194;336;173;159;181", "wc_questions": "1;92;27;8;27", "wc_limitations": "1;6;1;17;12", "wc_review": "291;587;310;452;368", "wc_reply_reviewers": "183;0;13;212;276", "wc_reply_authors": "305;0;18;194;699", "reply_reviewers": "1;0;1;2;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 93.2, 46.60643732361443 ], "wc_strengths_avg": [ 61.4, 15.512575543732254 ], "wc_weaknesses_avg": [ 208.6, 64.70425024679601 ], "wc_questions_avg": [ 31.0, 32.19316697686017 ], "wc_limitations_avg": [ 7.4, 6.280127387243033 ], "wc_review_avg": [ 401.6, 108.31915804694937 ], "wc_reply_reviewers_avg": [ 136.8, 110.64067967976335 ], "wc_reply_authors_avg": [ 243.2, 254.50139488812238 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.08574929257125444, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4411302643555128174&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "utexas.edu;utexas.edu;salesforce.com;salesforce.com;salesforce.com;google.com", "author_num": 6, "aff_unique_index": "0;0;1;1;1;2", "aff_unique_norm": "University of Texas at Austin;Salesforce;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.utexas.edu;https://www.salesforce.com;https://www.google.com", "aff_unique_abbr": "UT Austin;Salesforce;Google", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Austin;;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Adversarial Model for Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72755", "id": "6UCMa0Qgej", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0429ececfb199efc93182990169e73bb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6UCMa0Qgej", "openreview": "https://openreview.net/forum?id=6UCMa0Qgej", "poster": "/media/PosterPDFs/NeurIPS%202023/72755.png?t=1702237265.5568213", "slides": "https://nips.cc/virtual/2023/poster/72755", "video": "https://nips.cc/virtual/2023/poster/72755", "author_site": "Mohak Bhardwaj, Tengyang Xie, Byron Boots, Nan Jiang, Ching-An Cheng", "tldr": "", "abstract": "We propose a novel model-based offline Reinforcement Learning (RL) framework, called Adversarial Model for Offline Reinforcement Learning (ARMOR), which can robustly learn policies to improve upon an arbitrary reference policy regardless of data coverage. ARMOR is designed to optimize policies for the worst-case performance relative to the reference policy through adversarially training a Markov decision process model. In theory, we prove that ARMOR, with a well-tuned hyperparameter, can compete with the best policy within data coverage when the reference policy is supported by the data. At the same time, ARMOR is robust to hyperparameter choices: the policy learned by ARMOR, with any admissible hyperparameter, would never degrade the performance of the reference policy, even when the reference policy is not covered by the dataset. To validate these properties in practice, we design a scalable implementation of ARMOR, which by adversarial training, can optimize policies without using model ensembles in contrast to typical model-based methods. We show that ARMOR achieves competent performance with both state-of-the-art offline model-free and model-based RL algorithms and can robustly improve the reference policy over various hyperparameter choices.", "keywords": "model based;offline;reinforcement learning;adversarial training", "primary_area": "", "supplementary_material": "/attachment/fe50eceae716ed4ddf1f3548c458efecdaa58ee0.zip", "author": "Mohak Bhardwaj;Tengyang Xie;Byron Boots;Nan Jiang;Ching-An Cheng", "authorids": "~Mohak_Bhardwaj1;~Tengyang_Xie1;~Byron_Boots1;~Nan_Jiang2;~Ching-An_Cheng1", "gender": ";;;M;M", "homepage": ";https://tengyangxie.github.io/;;http://nanjiang.cs.illinois.edu;http://www.chinganc.com", "dblp": ";227/3335;;06/4489-8;123/6369", "google_scholar": ";rlmROVsAAAAJ;;nUlanA8AAAAJ;bMZFLZ_V4goC", "orcid": ";;;;", "linkedin": ";;;nan-jiang-28139937/;", "or_profile": "~Mohak_Bhardwaj1;~Tengyang_Xie1;~Byron_Boots1;~Nan_Jiang2;~Ching-An_Cheng1", "aff": ";Department of Computer Science, University of Illinois, Urbana Champaign;;University of Illinois, Urbana Champaign;Microsoft Research", "aff_domain": ";cs.illinois.edu;;illinois.edu;microsoft.com", "position": ";PhD student;;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nbhardwaj2023adversarial,\ntitle={Adversarial Model for Offline Reinforcement Learning},\nauthor={Mohak Bhardwaj and Tengyang Xie and Byron Boots and Nan Jiang and Ching-An Cheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6UCMa0Qgej}\n}", "github": "", "project": "", "reviewers": "pF8E;sNsB;V62y;suos", "pdf_size": 768965, "rating": "4;6;6;7", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "157;128;95;57", "wc_strengths": "54;42;56;46", "wc_weaknesses": "124;290;121;432", "wc_questions": "1;128;6;371", "wc_limitations": "1;6;6;1", "wc_review": "337;594;284;907", "wc_reply_reviewers": "22;379;0;559", "wc_reply_authors": "0;1082;0;1107", "reply_reviewers": "1;2;0;5", "reply_authors": "1;3;1;5", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 109.25, 37.298625980054545 ], "wc_strengths_avg": [ 49.5, 5.722761571129799 ], "wc_weaknesses_avg": [ 241.75, 129.39160521455787 ], "wc_questions_avg": [ 126.5, 150.04416016626573 ], "wc_limitations_avg": [ 3.5, 2.5 ], "wc_review_avg": [ 530.5, 246.97621342955276 ], "wc_reply_reviewers_avg": [ 240.0, 237.80559286946973 ], "wc_reply_authors_avg": [ 547.25, 547.3213749708666 ], "reply_reviewers_avg": [ 2.0, 1.8708286933869707 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4551817285915306480&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";cs.illinois.edu;;illinois.edu;microsoft.com", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Microsoft", "aff_unique_dep": "Department of Computer Science;Microsoft Research", "aff_unique_url": "https://illinois.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UIUC;MSR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Into the LAION\u2019s Den: Investigating Hate in Multimodal Datasets", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73694", "id": "6URyQ9QhYv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/42f225509e8263e2043c9d834ccd9a2b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=6URyQ9QhYv", "openreview": "https://openreview.net/forum?id=6URyQ9QhYv", "poster": "/media/PosterPDFs/NeurIPS%202023/73694.png?t=1702566042.7409775", "slides": "https://nips.cc/virtual/2023/poster/73694", "video": "https://nips.cc/virtual/2023/poster/73694", "author_site": "Abeba Birhane, vinay prabhu, Sanghyun Han, Vishnu Boddeti, Sasha Luccioni", "tldr": "", "abstract": "`Scale the model, scale the data, scale the compute' is the reigning sentiment in the world of generative AI today. While the impact of model scaling has been extensively studied, we are only beginning to scratch the surface of data scaling and its consequences. This is especially of critical importance in the context of vision-language datasets such as LAION. These datasets are continually growing in size and are built based on large-scale internet dumps such as the Common Crawl, which is known to have numerous drawbacks ranging from quality, legality, and content. The datasets then serve as the backbone for large generative models, contributing to the operationalization and perpetuation of harmful societal and historical biases and stereotypes. In this paper, we investigate the effect of scaling datasets on hateful content through a comparative audit of two datasets: LAION-400M and LAION-2B. Our results show that hate content increased by nearly **12%** with dataset scale, measured both qualitatively and quantitatively using a metric that we term as Hate Content Rate (HCR). We also found that filtering dataset contents based on Not Safe For Work (NSFW) values calculated based on images alone does not exclude all the harmful content in alt-text. Instead, we found that trace amounts of hateful, targeted, and aggressive text remain even when carrying out conservative filtering. We end with a reflection and a discussion of the significance of our results for dataset curation and usage in the AI community.\nCode and the meta-data assets curated in this paper are publicly available at https://github.com/vinayprabhu/hate_scaling.\n Content warning: This paper contains examples of hateful text that might be disturbing, distressing, and/or offensive.", "keywords": "multimodal datasets;dataset audit;hate speech detection;toxic content detection", "primary_area": "", "supplementary_material": "/attachment/93df1f4ad44dd8fe7f1e8d1c9b1e61b31c72d3d2.zip", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nbirhane2023into,\ntitle={Into the {LAION}{\\textquoteright}s Den: Investigating Hate in Multimodal Datasets},\nauthor={Abeba Birhane and vinay uday prabhu and Sanghyun Han and Vishnu Boddeti and Sasha Luccioni},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=6URyQ9QhYv}\n}", "github": "", "project": "", "reviewers": "gEqv;ToJe;9brB", "pdf_size": 1724417, "rating": "4;7;8", "confidence": "3;3;5", "wc_summary_and_contributions": "145;48;69", "wc_strengths": "25;35;69", "wc_improvement": "46;116;199", "wc_limitations": "131;12;251", "wc_correctness": "37;6;50", "wc_clarity": "9;5;88", "wc_relation_to_prior_work": "1;1;88", "wc_documentation": "1;12;1", "wc_additional_feedback": "1;1;1", "wc_review": "396;236;816", "wc_reply_reviewers": "0;34;0", "wc_reply_authors": "825;835;815", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "rating_avg": [ 6.333333333333333, 1.699673171197595 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 87.33333333333333, 41.66799997866735 ], "wc_strengths_avg": [ 43.0, 18.83259585576738 ], "wc_improvement_avg": [ 120.33333333333333, 62.537100099771884 ], "wc_limitations_avg": [ 131.33333333333334, 97.57162611242175 ], "wc_correctness_avg": [ 31.0, 18.457157599876172 ], "wc_clarity_avg": [ 34.0, 38.21866908549625 ], "wc_relation_to_prior_work_avg": [ 30.0, 41.012193308819754 ], "wc_documentation_avg": [ 4.666666666666667, 5.185449728701348 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 482.6666666666667, 244.58581770458855 ], "wc_reply_reviewers_avg": [ 11.333333333333334, 16.027753706895076 ], "wc_reply_authors_avg": [ 825.0, 8.16496580927726 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": 0.6933752452815365, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2648695357399036870&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "", "author_num": 1 }, { "title": "DELTA: Diverse Client Sampling for Fasting Federated Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72754", "id": "6XC5iKqRVm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/949c57d30f8791e3ae42646081b3c102-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6XC5iKqRVm", "openreview": "https://openreview.net/forum?id=6XC5iKqRVm", "poster": "/media/PosterPDFs/NeurIPS%202023/72754.png?t=1701660221.8534086", "slides": "https://nips.cc/virtual/2023/poster/72754", "video": "https://nips.cc/virtual/2023/poster/72754", "author_site": "Lin Wang, Yongxin Guo, Tao Lin, Xiaoying Tang", "tldr": "", "abstract": "Partial client participation has been widely adopted in Federated Learning (FL) to reduce the communication burden efficiently. However, an inadequate client sampling scheme can lead to the selection of unrepresentative subsets, resulting in significant variance in model updates and slowed convergence. Existing sampling methods are either biased or can be further optimized for faster convergence.\nIn this paper, we present DELTA, an unbiased sampling scheme designed to alleviate these issues. DELTA characterizes the effects of client diversity and local variance, and samples representative clients with valuable information for global model updates. In addition, DELTA is a proven optimal unbiased sampling scheme that minimizes variance caused by partial client participation and outperforms other unbiased sampling schemes in terms of convergence. Furthermore, to address full-client gradient dependence, we provide a practical version of DELTA depending on the available clients' information, and also analyze its convergence. Our results are validated through experiments on both synthetic and real-world datasets.", "keywords": "federated learning;client sampling", "primary_area": "", "supplementary_material": "/attachment/66c1f7ae0ef244afd3ce31b55f1273bf9b9a34b3.zip", "author": "Lin Wang;Yongxin Guo;Tao Lin;Xiaoying Tang", "authorids": "~Lin_Wang14;~Yongxin_Guo1;~Tao_Lin1;~Xiaoying_Tang2", "gender": ";M;M;F", "homepage": ";https://gyxxyg.github.io/yongxinguo/;https://lins-lab.github.io/;https://sse.cuhk.edu.cn/en/faculty/tangxiaoying", "dblp": ";;64/4492-4.html;134/9714-2", "google_scholar": ";5Cl1GZwAAAAJ;QE9pa_cAAAAJ;https://scholar.google.com/citations?hl=zh-TW", "orcid": ";0009-0001-8652-0722;0000-0002-3246-6935;0000-0003-3955-1195", "linkedin": ";;;", "or_profile": "~Lin_Wang14;~Yongxin_Guo1;~Tao_Lin1;~Xiaoying_Tang2", "aff": ";Chinese University of HongKong, Shenzhen;Westlake University;The Chinese University of Hong Kong, Shenzhen", "aff_domain": ";cuhk.edu.cn;westlake.edu;cuhk.edu.cn", "position": ";PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023delta,\ntitle={{DELTA}: Diverse Client Sampling for Fasting Federated Learning},\nauthor={Lin Wang and Yongxin Guo and Tao Lin and Xiaoying Tang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6XC5iKqRVm}\n}", "github": "", "project": "", "reviewers": "9Bi1;stNZ;1Yft;71Y7;yPdX", "pdf_size": 2037927, "rating": "5;5;5;5;7", "confidence": "2;3;4;4;4", "soundness": "2;2;2;3;3", "novelty": "2;2;2;3;3", "presentation": "2;1;3;3;4", "wc_summary": "56;41;96;65;125", "wc_strengths": "63;13;100;40;56", "wc_weaknesses": "50;61;58;137;42", "wc_questions": "85;2;5;153;169", "wc_limitations": "14;13;28;11;1", "wc_review": "268;130;287;406;393", "wc_reply_reviewers": "55;78;0;23;21", "wc_reply_authors": "889;257;96;210;21", "reply_reviewers": "1;1;0;1;1", "reply_authors": "4;3;2;3;2", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 76.6, 30.15029021419197 ], "wc_strengths_avg": [ 54.4, 28.555910071296974 ], "wc_weaknesses_avg": [ 69.6, 34.34297599218798 ], "wc_questions_avg": [ 82.8, 70.63257039071989 ], "wc_limitations_avg": [ 13.4, 8.639444426582072 ], "wc_review_avg": [ 296.8, 99.92677318917089 ], "wc_reply_reviewers_avg": [ 35.4, 27.615937427507326 ], "wc_reply_authors_avg": [ 294.6, 308.60369408028805 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.37500000000000006, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7563394755737674697&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";cuhk.edu.cn;westlake.edu;cuhk.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Westlake University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.westlake.edu.cn", "aff_unique_abbr": "CUHK;WU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "HAP: Structure-Aware Masked Image Modeling for Human-Centric Perception", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72753", "id": "6XPPfZkhKi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9ed1c94a6c87276f25ebb65231c86c3e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6XPPfZkhKi", "openreview": "https://openreview.net/forum?id=6XPPfZkhKi", "poster": "/media/PosterPDFs/NeurIPS%202023/72753.png?t=1698660377.8672743", "slides": "https://nips.cc/virtual/2023/poster/72753", "video": "https://nips.cc/virtual/2023/poster/72753", "author_site": "Junkun Yuan, Xinyu Zhang, Hao Zhou, Jian Wang, Zhongwei Qiu, Zhiyin Shao, Shaofeng Zhang, Sifan Long, Kun Kuang, Kun Yao, Junyu Han, Errui Ding, Lanfen Lin, Fei Wu, Jingdong Wang", "tldr": "", "abstract": "Model pre-training is essential in human-centric perception. In this paper, we first introduce masked image modeling (MIM) as a pre-training approach for this task. Upon revisiting the MIM training strategy, we reveal that human structure priors offer significant potential. Motivated by this insight, we further incorporate an intuitive human structure prior - human parts - into pre-training. Specifically, we employ this prior to guide the mask sampling process. Image patches, corresponding to human part regions, have high priority to be masked out. This encourages the model to concentrate more on body structure information during pre-training, yielding substantial benefits across a range of human-centric perception tasks. To further capture human characteristics, we propose a structure-invariant alignment loss that enforces different masked views, guided by the human part prior, to be closely aligned for the same image. We term the entire method as HAP. HAP simply uses a plain ViT as the encoder yet establishes new state-of-the-art performance on 11 human-centric benchmarks, and on-par result on one dataset. For example, HAP achieves 78.1% mAP on MSMT17 for person re-identification, 86.54% mA on PA-100K for pedestrian attribute recognition, 78.2% AP on MS COCO for 2D pose estimation, and 56.0 PA-MPJPE on 3DPW for 3D pose and shape estimation.", "keywords": "human centric perception;masked image modeling;structural-aware pre-training", "primary_area": "", "supplementary_material": "", "author": "Junkun Yuan;Xinyu Zhang;Hao Zhou;Jian Wang;Zhongwei Qiu;Zhiyin Shao;Shaofeng Zhang;Sifan Long;Kun Kuang;Kun Yao;Junyu Han;Errui Ding;Lanfen Lin;Fei Wu;Jingdong Wang", "authorids": "~Junkun_Yuan1;~Xinyu_Zhang3;~Hao_Zhou13;~Jian_Wang11;~Zhongwei_Qiu1;~Zhiyin_Shao1;~Shaofeng_Zhang1;~Sifan_Long1;~Kun_Kuang1;~Kun_Yao1;~Junyu_Han1;~Errui_Ding2;~Lanfen_Lin1;~Fei_Wu2;~Jingdong_Wang1", "gender": "M;;M;M;M;M;M;M;M;M;;M;F;;M", "homepage": "https://junkunyuan.github.io/;;;;https://ericzw.github.io/;;https://sherrylone.github.io;;http://kunkuang.github.io;https://github.com/kk12333;;;https://person.zju.edu.cn/en/0096005;https://person.zju.edu.cn/wufei;https://jingdongwang2017.github.io/", "dblp": "https://dblp.uni-trier.de/pid/238/0171;;63/778;39/449-66;246/5883;298/1665;132/2540;;194/4245;03/6550;;180/5531;;84/3254-1;49/3441", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;;xZ-0R3cAAAAJ;https://scholar.google.com.hk/citations?user=hDPRTekAAAAJ;uVV3rqcAAAAJ;https://scholar.google.com.hk/citations?user=-hSPgbEAAAAJ;VoVVJIgAAAAJ;;https://scholar.google.com.hk/citations?user=FOsNiMQAAAAJ;;;1wzEtxcAAAAJ;;XJLn4MYAAAAJ;z5SPCmgAAAAJ", "orcid": "0000-0003-0012-7397;;0000-0001-9764-1012;;;;;0000-0001-7060-1133;0009-0000-7528-8131;0000-0001-7155-4076;;;;;0000-0002-4888-4445", "linkedin": ";;;;;;;;;;;;;;", "or_profile": "~Junkun_Yuan1;~Xinyu_Zhang3;~Hao_Zhou13;~Jian_Wang11;~Zhongwei_Qiu1;~Zhiyin_Shao1;~Shaofeng_Zhang1;~Sifan_Long1;~Kun_Kuang1;~Kun_Yao1;~Junyu_Han1;~Errui_Ding2;~Lanfen_Lin1;~Fei_Wu2;~Jingdong_Wang1", "aff": "Zhejiang University;;Baidu;Baidu;University of Science and Technology Beijing;South China University of Technology;Shanghai Jiaotong University;Jilin University;Zhejiang University;Baidu;;Baidu;Zhejiang University;Zhejiang University;Baidu", "aff_domain": "zju.edu.cn;;baidu.com;baidu.com;ustb.edu.cn;scut.edu.cn;sjtu.edu.cn;jlu.edu.cn;zju.edu.cn;baidu.com;;baidu.com;zju.edu.cn;zju.edu.cn;baidu.com", "position": "PhD student;;Researcher;Engineer;PhD student;MS student;PhD student;PhD student;Associate Professor;Manager;;Director;Full Professor;Full Professor;Chief Scientist for Computer Vision", "bibtex": "@inproceedings{\nyuan2023hap,\ntitle={{HAP}: Structure-Aware Masked Image Modeling for Human-Centric Perception},\nauthor={Junkun Yuan and Xinyu Zhang and Hao Zhou and Jian Wang and Zhongwei Qiu and Zhiyin Shao and Shaofeng Zhang and Sifan Long and Kun Kuang and Kun Yao and Junyu Han and Errui Ding and Lanfen Lin and Fei Wu and Jingdong Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6XPPfZkhKi}\n}", "github": "", "project": "", "reviewers": "ueQY;BM1x;rHqg;QQMw", "pdf_size": 8152458, "rating": "5;5;6;6", "confidence": "5;4;5;5", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "3;4;3;3", "wc_summary": "70;88;69;73", "wc_strengths": "56;84;132;279", "wc_weaknesses": "110;73;30;466", "wc_questions": "59;74;114;76", "wc_limitations": "18;27;69;1", "wc_review": "313;346;414;895", "wc_reply_reviewers": "91;28;61;173", "wc_reply_authors": "69;34;130;182", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.0, 7.648529270389178 ], "wc_strengths_avg": [ 137.75, 85.9603833169676 ], "wc_weaknesses_avg": [ 169.75, 173.36720422271335 ], "wc_questions_avg": [ 80.75, 20.29008378494283 ], "wc_limitations_avg": [ 28.75, 25.043711785596 ], "wc_review_avg": [ 492.0, 235.50477702161373 ], "wc_reply_reviewers_avg": [ 88.25, 53.76511415406834 ], "wc_reply_authors_avg": [ 103.75, 56.75550634079481 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2228029422953610627&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "zju.edu.cn;;baidu.com;baidu.com;ustb.edu.cn;scut.edu.cn;sjtu.edu.cn;jlu.edu.cn;zju.edu.cn;baidu.com;;baidu.com;zju.edu.cn;zju.edu.cn;baidu.com", "author_num": 15, "aff_unique_index": "0;1;1;2;3;4;5;0;1;1;0;0;1", "aff_unique_norm": "Zhejiang University;Baidu;University of Science and Technology Beijing;South China University of Technology;Shanghai Jiao Tong University;Jilin University", "aff_unique_dep": ";Baidu, Inc.;;;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.baidu.com;http://www.ustb.edu.cn;https://www.scut.edu.cn;https://www.sjtu.edu.cn;http://www.jlu.edu.cn", "aff_unique_abbr": "ZJU;Baidu;USTB;SCUT;SJTU;JLU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Strategic Classification under Unknown Personalized Manipulation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72752", "id": "6cJKcIxPck", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/543924fdf260ba990f2ef84f940f3db2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6cJKcIxPck", "openreview": "https://openreview.net/forum?id=6cJKcIxPck", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72752", "video": "https://nips.cc/virtual/2023/poster/72752", "author_site": "Han Shao, Avrim Blum, Omar Montasser", "tldr": "", "abstract": "We study the fundamental mistake bound and sample complexity in the strategic classification, where agents can strategically manipulate their feature vector up to an extent in order to be predicted as positive. For example, given a classifier determining college admission, student candidates may try to take easier classes to improve their GPA, retake SAT and change schools in an effort to fool the classifier. *Ball manipulations* are a widely studied class of manipulations in the literature, where agents can modify their feature vector within a bounded radius ball. Unlike most prior work, our work consider manipulations to be *personalized*, meaning that agents can have different levels of manipulation abilities (e.g., varying radii for ball manipulations), and *unknown* to the learner.\n\nWe formalize the learning problem in an interaction model where the learner first deploys a classifier and the agent manipulates the feature vector within their manipulation set to game the deployed classifier. We investigate various scenarios in terms of the information available to the learner during the interaction, such as observing the original feature vector before or after deployment, observing the manipulated feature vector, or not seeing either the original or the manipulated feature vector. We begin by providing online mistake bounds and PAC sample complexity in these scenarios for ball manipulations. We also explore non-ball manipulations and show that, even in the simplest scenario where both the original and the manipulated feature vectors are revealed, the mistake bounds and sample complexity are lower bounded by $\\Omega(|\\mathcal H|)$ when the target function belongs to a known class $\\mathcal H$.", "keywords": "strategic classification;mistake bound in online learning;PAC learning", "primary_area": "", "supplementary_material": "/attachment/0f6461b91f79fd9ca239b6688c56751e9bae7861.pdf", "author": "Han Shao;Avrim Blum;Omar Montasser", "authorids": "~Han_Shao4;~Avrim_Blum1;~Omar_Montasser1", "gender": "F;M;M", "homepage": "https://sites.google.com/view/hanshao/;https://home.ttic.edu/~avrim/;https://ttic.uchicago.edu/~omar/", "dblp": ";b/AvrimBlum;194/3002", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=Jlv4MR4AAAAJ;u455rGAAAAAJ", "orcid": "0009-0005-9206-1357;;", "linkedin": ";;", "or_profile": "~Han_Shao4;~Avrim_Blum1;~Omar_Montasser1", "aff": "Toyota Technological Institute at Chicago;Toyota Technological Institute at Chicago;Toyota Technological Institute at Chicago", "aff_domain": "ttic.edu;ttic.edu;ttic.edu", "position": "PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nshao2023strategic,\ntitle={Strategic Classification under Unknown Personalized Manipulation},\nauthor={Han Shao and Avrim Blum and Omar Montasser},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6cJKcIxPck}\n}", "github": "", "project": "", "reviewers": "6Wy9;V2P4;Hsdn;XhgR;Ro1X", "pdf_size": 525764, "rating": "5;5;7;7;7", "confidence": "3;2;3;4;3", "soundness": "3;3;4;4;3", "novelty": "3;2;3;3;3", "presentation": "3;2;4;3;3", "wc_summary": "142;148;64;157;106", "wc_strengths": "44;73;42;190;72", "wc_weaknesses": "165;244;23;193;53", "wc_questions": "28;125;139;69;49", "wc_limitations": "23;14;1;9;22", "wc_review": "402;604;269;618;302", "wc_reply_reviewers": "73;39;10;21;20", "wc_reply_authors": "144;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 123.4, 34.38371707654657 ], "wc_strengths_avg": [ 84.2, 54.52485671691399 ], "wc_weaknesses_avg": [ 135.6, 84.15604553447126 ], "wc_questions_avg": [ 82.0, 43.06274491947767 ], "wc_limitations_avg": [ 13.8, 8.231646250902672 ], "wc_review_avg": [ 439.0, 147.1760850138364 ], "wc_reply_reviewers_avg": [ 32.6, 22.258481529520385 ], "wc_reply_authors_avg": [ 28.8, 57.60000000000001 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6454972243679028, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9176841894800955495&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ttic.edu;ttic.edu;ttic.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Toyota Technological Institute at Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.tti-chicago.org", "aff_unique_abbr": "TTI Chicago", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Globally injective and bijective neural operators", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72751", "id": "6cc69ArD3O", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b40d5797756800c97f3d525c2e4c8357-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6cc69ArD3O", "openreview": "https://openreview.net/forum?id=6cc69ArD3O", "poster": "/media/PosterPDFs/NeurIPS%202023/72751.png?t=1699603152.3007991", "slides": "https://nips.cc/virtual/2023/poster/72751", "video": "https://nips.cc/virtual/2023/poster/72751", "author_site": "Takashi Furuya, Michael Puthawala, Matti Lassas, Maarten V. de Hoop", "tldr": "", "abstract": "Recently there has been great interest in operator learning, where networks learn operators between function spaces from an essentially infinite-dimensional perspective. In this work we present results for when the operators learned by these networks are injective and surjective. As a warmup, we combine prior work in both the finite-dimensional ReLU and operator learning setting by giving sharp conditions under which ReLU layers with linear neural operators are injective. We then consider the case when the activation function is pointwise bijective and obtain sufficient conditions for the layer to be injective. We remark that this question, while trivial in the finite-rank setting, is subtler in the infinite-rank setting and is proven using tools from Fredholm theory. Next, we prove that our supplied injective neural operators are universal approximators and that their implementation, with finite-rank neural networks, are still injective. This ensures that injectivity is not 'lost' in the transcription from analytical operators to their finite-rank implementation with networks. Finally, we conclude with an increase in abstraction and consider general conditions when subnetworks, which may have many layers, are injective and surjective and provide an exact inversion from a 'linearization.\u2019 This section uses general arguments from Fredholm theory and Leray-Schauder degree theory for non-linear integral equations to analyze the mapping properties of neural operators in function spaces. These results apply to subnetworks formed from the layers considered in this work, under natural conditions. We believe that our work has applications in Bayesian uncertainty quantification where injectivity enables likelihood estimation and in inverse problems where surjectivity and injectivity corresponds to existence and uniqueness of the solutions, respectively.", "keywords": "Deep Learning;Operator Learning;Functional Analysis;Injectivity;Bijectivity;Universal approximation", "primary_area": "", "supplementary_material": "/attachment/74de35ca7134e2e065e20d7c64d2703ee301c4b0.pdf", "author": "Takashi Furuya;Michael Anthony Puthawala;Matti Lassas;Maarten V. de Hoop", "authorids": "~Takashi_Furuya1;~Michael_Anthony_Puthawala1;~Matti_Lassas1;~Maarten_V._de_Hoop2", "gender": "M;M;M;", "homepage": ";https://scholar.google.com/citations?user=ntwCDpoAAAAJ&hl=en;https://www.mv.helsinki.fi/home/lassas/index.html;http://maartendehoop.rice.edu/", "dblp": ";;;60/4525", "google_scholar": "https://scholar.google.co.jp/citations?user=e3YJUQoAAAAJ;ntwCDpoAAAAJ;;", "orcid": "0000-0001-6132-6846;;0000-0003-2043-3156;", "linkedin": ";;;", "or_profile": "~Takashi_Furuya1;~Michael_Anthony_Puthawala1;~Matti_Lassas1;~Maarten_v._de_Hoop1", "aff": "Shimane University;South Dakota State University;University of Helsinki;Rice University", "aff_domain": "shimane-u.ac.jp;sdstate.edu;helsinki.fi;rice.edu", "position": "Assistant Professor;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nfuruya2023globally,\ntitle={Globally injective and bijective neural operators},\nauthor={Takashi Furuya and Michael Anthony Puthawala and Matti Lassas and Maarten V. de Hoop},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6cc69ArD3O}\n}", "github": "", "project": "", "reviewers": "GL1W;ZQ9v;GKdz;kfSm;KyUH", "pdf_size": 531523, "rating": "4;5;6;7;8", "confidence": "3;4;2;3;3", "soundness": "4;3;4;4;3", "novelty": "2;3;3;4;4", "presentation": "3;1;3;3;3", "wc_summary": "50;34;161;220;92", "wc_strengths": "13;15;109;26;68", "wc_weaknesses": "95;640;194;104;76", "wc_questions": "92;17;110;8;104", "wc_limitations": "1;66;3;1;53", "wc_review": "251;772;577;359;393", "wc_reply_reviewers": "58;1316;130;0;0", "wc_reply_authors": "0;1985;49;0;0", "reply_reviewers": "1;3;1;0;0", "reply_authors": "1;4;2;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 111.4, 69.84439848692234 ], "wc_strengths_avg": [ 46.2, 37.155887824138986 ], "wc_weaknesses_avg": [ 221.8, 213.01492905428015 ], "wc_questions_avg": [ 66.2, 44.31884475028654 ], "wc_limitations_avg": [ 24.8, 28.63843571147 ], "wc_review_avg": [ 470.4, 183.77333865389724 ], "wc_reply_reviewers_avg": [ 300.8, 509.84640824467914 ], "wc_reply_authors_avg": [ 406.8, 789.3281700281577 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 1.8, 1.1661903789690602 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.22360679774997896, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3912714657570890648&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "email": "shimane-u.ac.jp;sdstate.edu;helsinki.fi;rice.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Shimane University;South Dakota State University;University of Helsinki;Rice University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.shimane-u.ac.jp;https://www.sdsu.edu;https://www.helsinki.fi;https://www.rice.edu", "aff_unique_abbr": "Shimane U;SDSU;UH;Rice", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "Japan;United States;Finland" }, { "title": "Offline Imitation Learning with Variational Counterfactual Reasoning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72750", "id": "6d9Yxttb3w", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8833c8aa10542d24d693bbaf6a4598f5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6d9Yxttb3w", "openreview": "https://openreview.net/forum?id=6d9Yxttb3w", "poster": "/media/PosterPDFs/NeurIPS%202023/72750.png?t=1701537525.9907506", "slides": "https://nips.cc/virtual/2023/poster/72750", "video": "https://nips.cc/virtual/2023/poster/72750", "author_site": "Zexu Sun, Bowei He, Jinxin Liu, Xu Chen, Chen Ma, Shuai Zhang", "tldr": "", "abstract": "In offline imitation learning (IL), an agent aims to learn an optimal expert behavior policy without additional online environment interactions. However, in many real-world scenarios, such as robotics manipulation, the offline dataset is collected from suboptimal behaviors without rewards. Due to the scarce expert data, the agents usually suffer from simply memorizing poor trajectories and are vulnerable to the variations in the environments, lacking the capability of generalizing to new environments.To automatically generate high-quality expert data and improve the generalization ability of the agent, we propose a framework named \\underline{O}ffline \\underline{I}mitation \\underline{L}earning with \\underline{C}ounterfactual data \\underline{A}ugmentation (OILCA) by doing counterfactual inference. In particular, we leverage identifiable variational autoencoder to generate \\textit{counterfactual} samples for expert data augmentation. We theoretically analyze the influence of the generated expert data and the improvement of generalization. Moreover, we conduct extensive experiments to demonstrate that our approach significantly outperforms various baselines on both \\textsc{DeepMind Control Suite} benchmark for in-distribution performance and \\textsc{CausalWorld} benchmark for out-of-distribution generalization.", "keywords": "offline imitaion learning;counterfactual reasoning;data augmentation", "primary_area": "", "supplementary_material": "/attachment/996f18ddee5a744a784ece4454b9d96c9c1271df.pdf", "author": "Zexu Sun;Bowei He;Jinxin Liu;Xu Chen;Chen Ma;Shuai Zhang", "authorids": "~Zexu_Sun1;~Bowei_He1;~Jinxin_Liu1;~Xu_Chen13;~Chen_Ma3;~Shuai_Zhang18", "gender": "M;M;;M;M;", "homepage": ";;;https://gsai.ruc.edu.cn/chenxu;https://allenjack.github.io;", "dblp": "358/7111;179/0894;;83/6331-17;126/4567-1;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;1cH0A9cAAAAJ;;loPoqy0AAAAJ;https://scholar.google.ca/citations?user=sSy7nvsAAAAJ;", "orcid": "0000-0002-6727-6242;0000-0002-0360-2950;;0000-0003-0144-1775;0000-0001-7933-9813;", "linkedin": ";;;;ma-chen-93455693/?locale=en_US;", "or_profile": "~Zexu_Sun1;~Bowei_He1;~Jinxin_Liu1;~Xu_Chen13;~Chen_Ma3;~Shuai_Zhang18", "aff": "Renmin University of China;City University of Hong Kong;;Renmin University of China;City University of Hong Kong;", "aff_domain": "ruc.edu.cn;my.cityu.edu.hk;;ruc.edu.cn;cityu.edu.hk;", "position": "PhD student;PhD student;;Associate Professor;Assistant Professor;", "bibtex": "@inproceedings{\nsun2023offline,\ntitle={Offline Imitation Learning with Variational Counterfactual Reasoning},\nauthor={Zexu Sun and Bowei He and Jinxin Liu and Xu Chen and Chen Ma and Shuai Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6d9Yxttb3w}\n}", "github": "", "project": "", "reviewers": "Txp8;agsz;srX3;XaVg", "pdf_size": 0, "rating": "4;6;7;7", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "111;54;195;54", "wc_strengths": "96;27;296;58", "wc_weaknesses": "502;263;424;703", "wc_questions": "17;298;198;2", "wc_limitations": "49;3;121;1", "wc_review": "775;645;1234;818", "wc_reply_reviewers": "818;464;28;17", "wc_reply_authors": "2459;2743;8;1067", "reply_reviewers": "3;3;1;1", "reply_authors": "6;8;2;3", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 103.5, 57.72564421468157 ], "wc_strengths_avg": [ 119.25, 104.93182310433761 ], "wc_weaknesses_avg": [ 473.0, 158.30508519943382 ], "wc_questions_avg": [ 128.75, 124.49372474145032 ], "wc_limitations_avg": [ 43.5, 48.69034811951954 ], "wc_review_avg": [ 868.0, 220.70002265518687 ], "wc_reply_reviewers_avg": [ 331.75, 333.63930748639314 ], "wc_reply_authors_avg": [ 1569.25, 1102.168402513881 ], "reply_reviewers_avg": [ 2.0, 1.0 ], "reply_authors_avg": [ 4.75, 2.384848003542364 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3174517306028471036&as_sdt=5,40&sciodt=0,40&hl=en", "gs_version_total": 5, "email": "ruc.edu.cn;my.cityu.edu.hk;;ruc.edu.cn;cityu.edu.hk;", "author_num": 6, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Renmin University of China;City University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;https://www.cityu.edu.hk", "aff_unique_abbr": "RUC;CityU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Balancing Risk and Reward: A Batched-Bandit Strategy for Automated Phased Release", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72749", "id": "6e86TccKyQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f02a7dd6bd3d038b51d092d99e74c638-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6e86TccKyQ", "openreview": "https://openreview.net/forum?id=6e86TccKyQ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72749", "video": "https://nips.cc/virtual/2023/poster/72749", "author_site": "Yufan Li, Jialiang Mao, Iavor Bojinov", "tldr": "", "abstract": "Phased releases are a common strategy in the technology industry for gradually releasing new products or updates through a sequence of A/B tests in which the number of treated units gradually grows until full deployment or deprecation. Performing phased releases in a principled way requires selecting the proportion of units assigned to the new release in a way that balances the risk of an adverse effect with the need to iterate and learn from the experiment rapidly. In this paper, we formalize this problem and propose an algorithm that automatically determines the release percentage at each stage in the schedule, balancing the need to control risk while maximizing ramp-up speed. Our framework models the challenge as a constrained batched bandit problem that ensures that our pre-specified experimental budget is not depleted with high probability. Our proposed algorithm leverages an adaptive Bayesian approach in which the maximal number of units assigned to the treatment is determined by the posterior distribution, ensuring that the probability of depleting the remaining budget is low. Notably, our approach analytically solves the ramp sizes by inverting probability bounds, eliminating the need for challenging rare-event Monte Carlo simulation. It only requires computing means and variances of outcome subsets, making it highly efficient and parallelizable.", "keywords": "bandit algorithms;online learning;causality;Bayesian inference", "primary_area": "", "supplementary_material": "/attachment/dcd8a7ff94997dba3e5d175859ae03fda71398a5.zip", "author": "Yufan Li;Jialiang Mao;Iavor Bojinov", "authorids": "~Yufan_Li2;~Jialiang_Mao1;~Iavor_Bojinov1", "gender": "M;M;M", "homepage": "https://statistics.fas.harvard.edu/people/yufan-li;;https://www.ibojinov.com", "dblp": ";299/5016;", "google_scholar": "rATgLxcAAAAJ;;LggQflEAAAAJ", "orcid": "0000-0001-5412-3397;0009-0007-1234-1004;0000-0002-3470-8539", "linkedin": ";jialiangmao/;iavor-bojinov/", "or_profile": "~Yufan_Li2;~Jialiang_Mao1;~Iavor_Bojinov1", "aff": "Harvard University, Harvard University;LinkedIn;Harvard Business School", "aff_domain": "g.harvard.edu;linkedin.com;hbs.edu", "position": "PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nli2023balancing,\ntitle={Balancing Risk and Reward: A Batched-Bandit Strategy for Automated Phased Release},\nauthor={Yufan Li and Jialiang Mao and Iavor Bojinov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6e86TccKyQ}\n}", "github": "", "project": "", "reviewers": "8PYx;hXT6;m8cf;mRfu", "pdf_size": 716476, "rating": "6;6;6;6", "confidence": "1;2;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;4;3;3", "wc_summary": "162;16;65;55", "wc_strengths": "65;52;82;59", "wc_weaknesses": "90;25;220;57", "wc_questions": "5;1;190;25", "wc_limitations": "1;1;108;1", "wc_review": "323;95;665;197", "wc_reply_reviewers": "24;0;57;8", "wc_reply_authors": "0;0;80;0", "reply_reviewers": "1;0;2;1", "reply_authors": "1;1;3;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.5, 53.73313688963264 ], "wc_strengths_avg": [ 64.5, 11.10180165558726 ], "wc_weaknesses_avg": [ 98.0, 74.09116006650186 ], "wc_questions_avg": [ 55.25, 78.32743772140131 ], "wc_limitations_avg": [ 27.75, 46.332359102467464 ], "wc_review_avg": [ 320.0, 214.93487385717563 ], "wc_reply_reviewers_avg": [ 22.25, 21.84462176372024 ], "wc_reply_authors_avg": [ 20.0, 34.64101615137755 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:c5TihIHzAQQJ:scholar.google.com/&scioq=Balancing+Risk+and+Reward:+A+Batched-Bandit+Strategy+for+Automated+Phased+Release&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "g.harvard.edu;linkedin.com;hbs.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Harvard University;LinkedIn Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.linkedin.com", "aff_unique_abbr": "Harvard;LinkedIn", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "An Efficient End-to-End Training Approach for Zero-Shot Human-AI Coordination", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72748", "id": "6ePsuwXUwf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/07a363fd2263091c2063998e0034999c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6ePsuwXUwf", "openreview": "https://openreview.net/forum?id=6ePsuwXUwf", "poster": "/media/PosterPDFs/NeurIPS%202023/72748.png?t=1701761910.5590181", "slides": "https://nips.cc/virtual/2023/poster/72748", "video": "https://nips.cc/virtual/2023/poster/72748", "author_site": "Xue Yan, Jiaxian Guo, Xingzhou Lou, Jun Wang, Haifeng Zhang, Yali Du", "tldr": "", "abstract": "The goal of zero-shot human-AI coordination is to develop an agent that can collaborate with humans without relying on human data. Prevailing two-stage population-based methods require a diverse population of mutually distinct policies to simulate diverse human behaviors. The necessity of such populations severely limits their computational efficiency. To address this issue, we propose E3T, an **E**fficient **E**nd-to-**E**nd **T**raining approach for zero-shot human-AI coordination. E3T employs a mixture of ego policy and random policy to construct the partner policy, making it both coordination-skilled and diverse. In this way, the ego agent is end-to-end trained with this mixture policy without the need of a pre-trained population, thus significantly improving the training efficiency. In addition, a partner modeling module is proposed to predict the partner's action from historical information. With the predicted partner's action, the ego policy is able to adapt its policy and take actions accordingly when collaborating with humans of different behavior patterns. Empirical results on the Overcooked environment show that our method significantly improves the training efficiency while preserving comparable or superior performance than the population-based baselines. Demo videos are available at https://sites.google.com/view/e3t-overcooked.", "keywords": "Zero-Shot Coordination;Human-AI coordination;Training Efficiency;Partner Modeling", "primary_area": "", "supplementary_material": "", "author": "Xue Yan;Jiaxian Guo;Xingzhou Lou;Jun Wang;Haifeng Zhang;Yali Du", "authorids": "~Xue_Yan2;~Jiaxian_Guo2;~Xingzhou_Lou1;~Jun_Wang2;~Haifeng_Zhang3;~Yali_Du1", "gender": "F;M;M;M;;", "homepage": ";;https://github.com/LxzGordon;http://www0.cs.ucl.ac.uk/staff/jun.wang/;https://pkuzhf.github.io;", "dblp": ";206/6264;328/5451;w/JunWang12;93/7133-2;", "google_scholar": "5d0Upv8AAAAJ;wQgPocEAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ;;", "orcid": ";;;;;", "linkedin": "\u96ea-\u95eb-48926a188/;;;;;", "or_profile": "~Xue_Yan2;~Jiaxian_Guo2;~Xingzhou_Lou1;~Jun_Wang2;~Haifeng_Zhang3;~Yali_Du1", "aff": "Institute of Automation, Chinese Academy of Sciences;The University of Tokyo, The University of Tokyo;University of Chinese Academy of Sciences;University College London;Institute of Automation, Chinese Academy of Sciences;", "aff_domain": "ia.ac.cn;weblab.t.u-tokyo.ac.jp;ucas.ac.cn;ucl.ac.uk;ia.ac.cn;", "position": "PhD student;Postdoc;PhD student;Professor;Associate Professor;", "bibtex": "@inproceedings{\nyan2023an,\ntitle={An Efficient End-to-End Training Approach for Zero-Shot Human-{AI} Coordination},\nauthor={Xue Yan and Jiaxian Guo and Xingzhou Lou and Jun Wang and Haifeng Zhang and Yali Du},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6ePsuwXUwf}\n}", "github": "", "project": "", "reviewers": "kGQD;DWEk;dKj6;dkSb", "pdf_size": 910550, "rating": "6;7;7;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;4;4;3", "wc_summary": "105;96;100;117", "wc_strengths": "43;100;289;66", "wc_weaknesses": "79;61;337;66", "wc_questions": "66;40;30;160", "wc_limitations": "19;7;1;19", "wc_review": "312;304;757;428", "wc_reply_reviewers": "53;32;39;34", "wc_reply_authors": "539;0;0;492", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 104.5, 7.88986691902975 ], "wc_strengths_avg": [ 124.5, 97.11462299777516 ], "wc_weaknesses_avg": [ 135.75, 116.37734960034105 ], "wc_questions_avg": [ 74.0, 51.36146415358503 ], "wc_limitations_avg": [ 11.5, 7.794228634059948 ], "wc_review_avg": [ 450.25, 183.77482825457898 ], "wc_reply_reviewers_avg": [ 39.5, 8.200609733428363 ], "wc_reply_authors_avg": [ 257.75, 258.28508958126093 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13598398955866429748&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ia.ac.cn;weblab.t.u-tokyo.ac.jp;ucas.ac.cn;ucl.ac.uk;ia.ac.cn;", "author_num": 6, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Tokyo;University of Chinese Academy of Sciences;University College London", "aff_unique_dep": "Institute of Automation;;;", "aff_unique_url": "http://www.ia.cas.cn;https://www.u-tokyo.ac.jp;http://www.ucas.ac.cn;https://www.ucl.ac.uk", "aff_unique_abbr": "CAS;UTokyo;UCAS;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0", "aff_country_unique": "China;Japan;United Kingdom" }, { "title": "Conformalized matrix completion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72747", "id": "6f320HfMeS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0f7e4bb7a35dd4cb426203c91a4bfa10-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6f320HfMeS", "openreview": "https://openreview.net/forum?id=6f320HfMeS", "poster": "/media/PosterPDFs/NeurIPS%202023/72747.png?t=1701288160.498751", "slides": "https://nips.cc/virtual/2023/poster/72747", "video": "https://nips.cc/virtual/2023/poster/72747", "author_site": "Yu Gui, Rina Barber, Cong Ma", "tldr": "", "abstract": "Matrix completion aims to estimate missing entries in a data matrix, using the assumption of a low-complexity structure (e.g., low-rankness) so that imputation is possible. While many effective estimation algorithms exist in the literature, uncertainty quantification for this problem has proved to be challenging, and existing methods are extremely sensitive to model misspecification. In this work, we propose a distribution-free method for predictive inference in the matrix completion problem. Our method adapts the framework of conformal prediction, which provides prediction intervals with guaranteed distribution-free validity in the setting of regression, to the problem of matrix completion. Our resulting method, conformalized matrix completion (cmc), offers provable predictive coverage regardless of the accuracy of the low-rank model. Empirical results on simulated and real data demonstrate that cmc is robust to model misspecification while matching the performance of existing model-based methods when the model is correct.", "keywords": "matrix completion;conformal inference;uncertainty quantification", "primary_area": "", "supplementary_material": "/attachment/f2d840999105a5b28e4978d262b1d09af7f6dbdb.zip", "author": "Yu Gui;Rina Barber;Cong Ma", "authorids": "~Yu_Gui1;~Rina_Barber1;~Cong_Ma1", "gender": "M;F;M", "homepage": "https://yugjerry.github.io/;http://www.stat.uchicago.edu/~rina;https://congma1028.github.io/", "dblp": "276/7601;;42/10808", "google_scholar": "22zXYlkAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yu_Gui1;~Rina_Barber1;~Cong_Ma1", "aff": "University of Chicago;University of Chicago;University of Chicago", "aff_domain": "uchicago.edu;uchicago.edu;uchicago.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ngui2023conformalized,\ntitle={Conformalized matrix completion},\nauthor={Yu Gui and Rina Barber and Cong Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6f320HfMeS}\n}", "github": "", "project": "", "reviewers": "1TvL;bWVG;u41g;1PHH", "pdf_size": 1287015, "rating": "5;5;6;6", "confidence": "2;2;4;4", "soundness": "2;3;3;3", "novelty": "3;2;2;2", "presentation": "3;2;3;3", "wc_summary": "67;58;287;199", "wc_strengths": "37;126;230;113", "wc_weaknesses": "15;195;348;316", "wc_questions": "125;5;283;262", "wc_limitations": "1;11;90;24", "wc_review": "245;395;1238;914", "wc_reply_reviewers": "0;49;43;72", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 152.75, 95.51537834296633 ], "wc_strengths_avg": [ 126.5, 68.7477272351603 ], "wc_weaknesses_avg": [ 218.5, 130.6148919534063 ], "wc_questions_avg": [ 168.75, 112.33515700794653 ], "wc_limitations_avg": [ 31.5, 34.74550330618338 ], "wc_review_avg": [ 698.0, 398.5203884370284 ], "wc_reply_reviewers_avg": [ 41.0, 26.02883016964074 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3622513074535707788&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "email": "uchicago.edu;uchicago.edu;uchicago.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uchicago.edu", "aff_unique_abbr": "UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "6fXzz8cvTA", "title": "DNDesign: Denoising is All You Need for Protein Inverse Folding", "track": "main", "status": "Reject", "tldr": "", "abstract": "Based on the central dogma that protein structure determines its functionality, an important approach for protein sequence design is to identify promising sequences that fold into pre-designed structures based on domain knowledge. Numerous studies have introduced deep generative model-based inverse-folding, which utilizes various generative models to translate fixed backbones to corresponding sequences. In this work, we reveal that denoising training enables models to deeply capture the protein energy landscape, which previous models do not fully leverage. Based on this, we propose a novel Denoising-enhanced protein fixed backbone design (DNDesign), which combines conventional inverse-folding networks with a novel plug-in module, which learns the physical understanding via denoising training and transfers the knowledge to the entire network. Through extensive experiments, we demonstrate that DNDesign can easily be integrated into state-of-the-art models and improve performance in multiple modes, including auto-regressive, non-auto-regressive, and scaled-up scenarios. Furthermore, we introduce a fixed backbone conservation analysis based on potential energy changes, which confirms that DNDesign ensures more energetically favorable inverse-folding.", "keywords": "Protein sequence design;Inverse folding;Denoising;Protein modeling", "primary_area": "", "supplementary_material": "/attachment/732ba97c64509df7c2bbfd7f1e9c0b39be5c86b3.zip", "author": "Youhan Lee;Jaehoon Kim", "authorids": "~Youhan_Lee1;~Jaehoon_Kim1", "gender": "M;M", "homepage": ";", "dblp": "190/1819;", "google_scholar": "https://scholar.google.co.kr/citations?user=EFNg9UcAAAAJ;", "orcid": ";0000-0001-8598-3429", "linkedin": "youhanlee/;", "or_profile": "~Youhan_Lee1;~Jaehoon_Kim1", "aff": "Kakao Brain Corp;Kakaobrain", "aff_domain": "kakaobrain.com;kakaobrain.com", "position": "Researcher;Researcher", "bibtex": "@misc{\nlee2023dndesign,\ntitle={{DND}esign: Denoising is All You Need for Protein Inverse Folding},\nauthor={Youhan Lee and Jaehoon Kim},\nyear={2023},\nurl={https://openreview.net/forum?id=6fXzz8cvTA}\n}", "github": "", "project": "", "reviewers": "r7Sm;iMTN;xi89;LQo1", "site": "https://openreview.net/forum?id=6fXzz8cvTA", "pdf_size": 656260, "rating": "4;4;4;5", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "3;2;2;3", "presentation": "2;3;3;2", "wc_summary": "93;77;46;110", "wc_strengths": "64;84;28;26", "wc_weaknesses": "54;86;118;56", "wc_questions": "476;94;95;201", "wc_limitations": "9;1;1;11", "wc_review": "696;342;288;404", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 81.5, 23.58495283014151 ], "wc_strengths_avg": [ 50.5, 24.550967394381836 ], "wc_weaknesses_avg": [ 78.5, 26.091186251299497 ], "wc_questions_avg": [ 216.5, 156.00400635881118 ], "wc_limitations_avg": [ 5.5, 4.55521678957215 ], "wc_review_avg": [ 432.5, 157.571412381815 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5033009364078959372&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Kakao Brain", "aff_unique_dep": "Corp", "aff_unique_url": "https://www.kakaobrain.com", "aff_unique_abbr": "Kakao Brain", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Optimal and Fair Encouragement Policy Evaluation and Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72746", "id": "6fuZs3ibGA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2c7967a442300bff58e9d7b73aa26f24-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6fuZs3ibGA", "openreview": "https://openreview.net/forum?id=6fuZs3ibGA", "poster": "/media/PosterPDFs/NeurIPS%202023/72746.png?t=1701997587.6096272", "slides": "https://nips.cc/virtual/2023/poster/72746", "video": "https://nips.cc/virtual/2023/poster/72746", "tldr": "", "abstract": "In consequential domains, it is often impossible to compel individuals to take treatment, so that optimal policy rules are merely suggestions in the presence of human non-adherence to treatment recommendations. In these same domains, there may be heterogeneity both in who responds in taking-up treatment, and heterogeneity in treatment efficacy. For example, in social services, a persistent puzzle is the gap in take-up of beneficial services among those who may benefit from them the most. When in addition the decision-maker has distributional preferences over both access and average outcomes, the optimal decision rule changes. We study identification, doubly-robust estimation, and robust estimation under potential violations of positivity. We consider fairness constraints such as demographic parity in treatment take-up, and other constraints, via constrained optimization. Our framework can be extended to handle algorithmic recommendations under an often-reasonable covariate-conditional exclusion restriction, using our robustness checks for lack of positivity in the recommendation. We develop a two-stage, online learning-based algorithm for solving over parametrized policy classes under general constraints to obtain variance-sensitive regret bounds. We assess improved recommendation rules in a stylized case study of optimizing recommendation of supervised release in the PSA-DMF pretrial risk-assessment tool while reducing surveillance disparities.", "keywords": "causal inference;fairness in machine learning;algorithmic fairness;criminal justice;policy learning;off-policy evaluation", "primary_area": "", "supplementary_material": "/attachment/a6240e295deb027e348f68e8e9227a2b8f42996a.zip", "author": "Angela Zhou", "authorids": "~Angela_Zhou1", "gender": "F", "homepage": "https://angelamzhou.github.io", "dblp": "194/2543", "google_scholar": "uSO4RPUAAAAJ", "orcid": "0000-0003-2814-5693", "linkedin": "", "or_profile": "~Angela_Zhou1", "aff": "University of Southern California", "aff_domain": "usc.edu", "position": "Assistant Professor", "bibtex": "@inproceedings{\nzhou2023optimal,\ntitle={Optimal and Fair Encouragement Policy Evaluation and Learning},\nauthor={Angela Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6fuZs3ibGA}\n}", "github": "", "project": "", "reviewers": "Cr9c;h3Xk;QSPP;2wiP;xiA7", "pdf_size": 1044100, "rating": "3;6;6;6;7", "confidence": "3;3;3;1;1", "soundness": "2;3;3;3;3", "novelty": "1;3;3;3;4", "presentation": "1;3;2;3;4", "wc_summary": "164;118;43;20;74", "wc_strengths": "75;172;63;19;77", "wc_weaknesses": "828;184;149;10;18", "wc_questions": "207;2;9;3;1", "wc_limitations": "20;2;11;3;1", "wc_review": "1294;478;275;55;171", "wc_reply_reviewers": "151;77;21;0;0", "wc_reply_authors": "623;0;89;0;11", "reply_reviewers": "1;1;1;0;0", "reply_authors": "3;1;2;1;1", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 2.2, 0.9797958971132712 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.9797958971132712 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 83.8, 51.832036425361494 ], "wc_strengths_avg": [ 81.2, 50.00159997440082 ], "wc_weaknesses_avg": [ 237.8, 303.09100943446015 ], "wc_questions_avg": [ 44.4, 81.34764901335502 ], "wc_limitations_avg": [ 7.4, 7.2277243998370615 ], "wc_review_avg": [ 454.6, 442.1549954484287 ], "wc_reply_reviewers_avg": [ 49.8, 57.915110290838605 ], "wc_reply_authors_avg": [ 144.6, 241.50577632843482 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.5417363388859615, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12586963013112079235&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "usc.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "When is Agnostic Reinforcement Learning Statistically Tractable?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72745", "id": "6gWpJ0IExE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/58a799d16fb0c1f2014e98f4ba972b25-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6gWpJ0IExE", "openreview": "https://openreview.net/forum?id=6gWpJ0IExE", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72745", "video": "https://nips.cc/virtual/2023/poster/72745", "author_site": "Zeyu Jia, Gene Li, Alexander Rakhlin, Ayush Sekhari, Nati Srebro", "tldr": "", "abstract": "We study the problem of agnostic PAC reinforcement learning (RL): given a policy class $\\Pi$, how many rounds of interaction with an unknown MDP (with a potentially large state and action space) are required to learn an $\\epsilon$-suboptimal policy with respect to \\(\\Pi\\)? Towards that end, we introduce a new complexity measure, called the \\emph{spanning capacity}, that depends solely on the set \\(\\Pi\\) and is independent of the MDP dynamics. With a generative model, we show that the spanning capacity characterizes PAC learnability for every policy class $\\Pi$. However, for online RL, the situation is more subtle. We show there exists a policy class $\\Pi$ with a bounded spanning capacity that requires a superpolynomial number of samples to learn. This reveals a surprising separation for agnostic learnability between generative access and online access models (as well as between deterministic/stochastic MDPs under online access). On the positive side, we identify an additional \\emph{sunflower} structure which in conjunction with bounded spanning capacity enables statistically efficient online RL via a new algorithm called POPLER, which takes inspiration from classical importance sampling methods as well as recent developments for reachable-state identification and policy evaluation in reward-free exploration.", "keywords": "Agnostic Reinforcement Learning;Sample Complexity;Learning Theory;Complexity Measure", "primary_area": "", "supplementary_material": "", "author": "Zeyu Jia;Gene Li;Alexander Rakhlin;Ayush Sekhari;Nathan Srebro", "authorids": "~Zeyu_Jia1;~Gene_Li1;~Alexander_Rakhlin1;~Ayush_Sekhari1;~Nathan_Srebro1", "gender": "M;;M;M;M", "homepage": "https://www.mit.edu/~zyjia/;;http://www.mit.edu/~rakhlin/;https://ayush.sekhari.com/;http://ttic.uchicago.edu/~nati/", "dblp": ";;59/407;203/8152;50/3633", "google_scholar": "8TkJbjgAAAAJ;;https://scholar.google.com.tw/citations?user=fds2VpgAAAAJ;jH9i188AAAAJ;https://scholar.google.com.tw/citations?user=ZnT-QpMAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Zeyu_Jia1;~Gene_Li1;~Alexander_Rakhlin1;~Ayush_Sekhari1;~Nathan_Srebro1", "aff": "Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Massachusetts Institute of Technology;University of Chicago", "aff_domain": "mit.edu;;mit.edu;mit.edu;uchicago.edu", "position": "PhD student;;Full Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\njia2023when,\ntitle={When is Agnostic Reinforcement Learning Statistically Tractable?},\nauthor={Zeyu Jia and Gene Li and Alexander Rakhlin and Ayush Sekhari and Nathan Srebro},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6gWpJ0IExE}\n}", "github": "", "project": "", "reviewers": "c49B;Gfxz;HA2Z;yJrU", "pdf_size": 1533927, "rating": "6;6;7;7", "confidence": "4;3;3;4", "soundness": "3;3;3;4", "novelty": "3;3;2;3", "presentation": "4;3;3;4", "wc_summary": "118;125;238;139", "wc_strengths": "234;102;288;213", "wc_weaknesses": "86;130;326;507", "wc_questions": "103;117;40;322", "wc_limitations": "11;4;40;1", "wc_review": "552;478;932;1182", "wc_reply_reviewers": "51;23;56;86", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 155.0, 48.51288488638869 ], "wc_strengths_avg": [ 209.25, 67.69555007531883 ], "wc_weaknesses_avg": [ 262.25, 167.7205637362336 ], "wc_questions_avg": [ 145.5, 105.94928031846182 ], "wc_limitations_avg": [ 14.0, 15.443445211480501 ], "wc_review_avg": [ 786.0, 286.2481440987871 ], "wc_reply_reviewers_avg": [ 54.0, 22.34949663862701 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11145706691993962425&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "mit.edu;;mit.edu;mit.edu;uchicago.edu", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;University of Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.uchicago.edu", "aff_unique_abbr": "MIT;UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Statistical Analysis of Quantum State Learning Process in Quantum Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72744", "id": "6gcY0MGNhj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/68efc144ad3b41108f779b51b9fb1300-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6gcY0MGNhj", "openreview": "https://openreview.net/forum?id=6gcY0MGNhj", "poster": "/media/PosterPDFs/NeurIPS%202023/72744.png?t=1700735788.952603", "slides": "https://nips.cc/virtual/2023/poster/72744", "video": "https://nips.cc/virtual/2023/poster/72744", "author_site": "Hao-Kai Zhang, Chenghong Zhu, Mingrui Jing, Xin Wang", "tldr": "", "abstract": "Quantum neural networks (QNNs) have been a promising framework in pursuing near-term quantum advantage in various fields, where many applications can be viewed as learning a quantum state that encodes useful data. As a quantum analog of probability distribution learning, quantum state learning is theoretically and practically essential in quantum machine learning. In this paper, we develop a no-go theorem for learning an unknown quantum state with QNNs even starting from a high-fidelity initial state. We prove that when the loss value is lower than a critical threshold, the probability of avoiding local minima vanishes exponentially with the qubit count, while only grows polynomially with the circuit depth. The curvature of local minima is concentrated to the quantum Fisher information times a loss-dependent constant, which characterizes the sensibility of the output state with respect to parameters in QNNs. These results hold for any circuit structures, initialization strategies, and work for both fixed ansatzes and adaptive methods. Extensive numerical simulations are performed to validate our theoretical results. Our findings place generic limits on good initial guesses and adaptive methods for improving the learnability and scalability of QNNs, and deepen the understanding of prior information's role in QNNs.", "keywords": "quantum neural networks;quantum state learning;quantum computing;quantum machine learning;quantum optimization", "primary_area": "", "supplementary_material": "/attachment/503b6b11d50c9b81d5755ec8d4c81777bb133d6a.zip", "author": "Hao-Kai Zhang;Chenghong Zhu;Mingrui Jing;Xin Wang", "authorids": "zhk20@mails.tsinghua.edu.cn;~Chenghong_Zhu1;~Mingrui_Jing2;~Xin_Wang48", "gender": ";;M;M", "homepage": ";https://github.com/chenghongz;;https://www.xinwang.info/", "dblp": ";342/3486;;10/5630-22", "google_scholar": ";;;BFkAPOQAAAAJ", "orcid": ";;0000-0002-6437-9852;0000-0002-0641-3186", "linkedin": ";;;", "or_profile": "zhk20@mails.tsinghua.edu.cn;~Chenghong_Zhu1;~Mingrui_Jing2;~Xin_Wang48", "aff": ";Baidu;The Hong Kong University of Science and Technology (Guangzhou);Baidu", "aff_domain": ";baidu.com;connect.hkust-gz.edu.cn;baidu.com", "position": ";baidu;PhD student;Researcher", "bibtex": "@inproceedings{\nzhang2023statistical,\ntitle={Statistical Analysis of Quantum State Learning Process in Quantum Neural Networks},\nauthor={Hao-Kai Zhang and Chenghong Zhu and Mingrui Jing and Xin Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6gcY0MGNhj}\n}", "github": "", "project": "", "reviewers": "KReL;oE2w;atEi;HYxZ;a4pD", "pdf_size": 1517295, "rating": "6;6;7;7;8", "confidence": "4;2;3;3;4", "soundness": "3;3;3;4;4", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "142;72;80;65;262", "wc_strengths": "13;103;35;71;99", "wc_weaknesses": "101;66;0;104;46", "wc_questions": "58;61;56;40;18", "wc_limitations": "37;8;0;13;54", "wc_review": "351;310;171;293;479", "wc_reply_reviewers": "9;20;15;10;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 124.2, 74.14688125605824 ], "wc_strengths_avg": [ 64.2, 35.31798408743059 ], "wc_weaknesses_avg": [ 63.4, 38.44788680798985 ], "wc_questions_avg": [ 46.6, 16.044936896105263 ], "wc_limitations_avg": [ 22.4, 20.04594722132132 ], "wc_review_avg": [ 320.8, 99.2661070053621 ], "wc_reply_reviewers_avg": [ 10.8, 6.675327707311455 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4285714285714286, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1174867064518701574&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";baidu.com;connect.hkust-gz.edu.cn;baidu.com", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Baidu;Hong Kong University of Science and Technology", "aff_unique_dep": "Baidu, Inc.;", "aff_unique_url": "https://www.baidu.com;https://www.ust.hk", "aff_unique_abbr": "Baidu;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Guangzhou", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Large Language Model as Attributed Training Data Generator: A Tale of Diversity and Bias", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73693", "id": "6hZIfAY9GD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ae9500c4f5607caf2eff033c67daa9d7-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=6hZIfAY9GD", "openreview": "https://openreview.net/forum?id=6hZIfAY9GD", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73693", "video": "https://nips.cc/virtual/2023/poster/73693", "author_site": "Yue Yu, Yuchen Zhuang, Jieyu Zhang, Yu Meng, Alexander Ratner, Ranjay Krishna, Jiaming Shen, Chao Zhang", "tldr": "", "abstract": "Large language models (LLMs) have been recently leveraged as training data generators for various natural language processing (NLP) tasks. While previous research has explored different approaches to training models using generated data, they generally rely on simple class-conditional prompts, which may limit the diversity of the generated data and inherit systematic biases of LLM. Thus, we investigate training data generation with diversely attributed prompts (e.g., specifying attributes like length and style), which have the potential to yield diverse and attributed generated data. Our investigation focuses on datasets with high cardinality and diverse domains, wherein we demonstrate that attributed prompts outperform simple class-conditional prompts in terms of the resulting model's performance. Additionally, we present a comprehensive empirical study on data generation encompassing vital aspects like bias, diversity, and efficiency, and highlight three key observations: firstly, synthetic datasets generated by simple prompts exhibit significant biases, such as regional bias; secondly, attribute diversity plays a pivotal role in enhancing model performance; lastly, attributed prompts achieve the performance of simple class-conditional prompts while utilizing only 5\\% of the querying cost of ChatGPT associated with the latter. The data and code are available on {\\url{https://github.com/yueyu1030/AttrPrompt}}.", "keywords": "training data generation;large language model;text classification", "primary_area": "", "supplementary_material": "/attachment/89ed4b68c8b18163c525f958da9bc6283c901140.pdf", "author": "Yue Yu;Yuchen Zhuang;Jieyu Zhang;Yu Meng;Alexander Ratner;Ranjay Krishna;Jiaming Shen;Chao Zhang", "authorids": "~Yue_Yu2;~Yuchen_Zhuang1;~Jieyu_Zhang1;~Yu_Meng1;~Alexander_Ratner1;~Ranjay_Krishna1;~Jiaming_Shen1;~Chao_Zhang15", "gender": "M;M;M;M;M;M;;", "homepage": "https://yueyu1030.github.io;https://night-chen.github.io/;https://jieyuz2.github.io/;https://yumeng5.github.io/;https://ajratner.github.io/;http://ranjaykrishna.com;https://mickeysjm.github.io;http://chaozhang.org/", "dblp": ";191/5231.html;;30/4233-1;180/5513;167/3785;178/3627;94/3019-14", "google_scholar": "zQ3Jh6UAAAAJ;T-f6XlEAAAAJ;T_INUHUAAAAJ;S2-yZKcAAAAJ;rfwwtFYAAAAJ;IcqahyAAAAAJ;-ZJ0sCoAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-3683-5208;;0000-0002-1846-2436;0000-0003-2554-2888;;0000-0001-8784-2531;0000-0002-0467-4956;0000-0003-3009-598X", "linkedin": ";;jieyu-zhang-3baaa8154/;;alexander-ratner-038ba239/;ranjay-krishna-1a344444/;jiaming-shen-08186710a/;", "or_profile": "~Yue_Yu2;~Yuchen_Zhuang1;~Jieyu_Zhang1;~Yu_Meng1;~Alexander_Ratner1;~Ranjay_Krishna1;~Jiaming_Shen1;~Chao_Zhang15", "aff": "Google;Adobe Systems;University of Washington;University of Illinois, Urbana Champaign;Department of Computer Science, University of Washington;University of Washington;Google Research;Georgia Institute of Technology", "aff_domain": "google.com;adobe.com;cs.washington.edu;illinois.edu;cs.washington.edu;cs.washington.edu;google.com;gatech.edu", "position": "Research Intern;Intern;PhD student;PhD student;Assistant Professor;Assistant Professor;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nyu2023large,\ntitle={Large Language Model as Attributed Training Data Generator: A Tale of Diversity and Bias},\nauthor={Yue Yu and Yuchen Zhuang and Jieyu Zhang and Yu Meng and Alexander Ratner and Ranjay Krishna and Jiaming Shen and Chao Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=6hZIfAY9GD}\n}", "github": "", "project": "", "reviewers": "mJxq;t81z;upz4;KfWS;rTXq", "pdf_size": 793854, "rating": "6;6;7;7;7", "confidence": "4;3;3;4;4", "wc_summary_and_contributions": "160;113;57;75;79", "wc_strengths": "117;56;89;69;48", "wc_improvement": "164;58;39;179;195", "wc_limitations": "1;58;1;12;10", "wc_correctness": "13;1;1;9;9", "wc_clarity": "23;1;1;11;1", "wc_relation_to_prior_work": "18;1;12;12;36", "wc_documentation": "22;1;23;6;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "519;290;224;374;380", "wc_reply_reviewers": "0;0;0;35;0", "wc_reply_authors": "998;488;228;940;1178", "reply_reviewers": "0;0;0;1;0", "reply_authors": "2;1;1;3;2", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 96.8, 36.42197139090634 ], "wc_strengths_avg": [ 75.8, 24.830626250660693 ], "wc_improvement_avg": [ 127.0, 65.11835378754594 ], "wc_limitations_avg": [ 16.4, 21.28473631502162 ], "wc_correctness_avg": [ 6.6, 4.800000000000001 ], "wc_clarity_avg": [ 7.4, 8.708616422830897 ], "wc_relation_to_prior_work_avg": [ 15.8, 11.496086290559932 ], "wc_documentation_avg": [ 10.6, 9.89141041510259 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 357.4, 99.23628368696603 ], "wc_reply_reviewers_avg": [ 7.0, 14.0 ], "wc_reply_authors_avg": [ 766.4, 352.2985097896385 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.16666666666666666, "gs_citation": 241, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11839381200951011410&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com;adobe.com;cs.washington.edu;illinois.edu;cs.washington.edu;cs.washington.edu;google.com;gatech.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;2;2;0;4", "aff_unique_norm": "Google;Adobe;University of Washington;University of Illinois Urbana-Champaign;Georgia Institute of Technology", "aff_unique_dep": "Google;Adobe Systems Incorporated;;;", "aff_unique_url": "https://www.google.com;https://www.adobe.com;https://www.washington.edu;https://illinois.edu;https://www.gatech.edu", "aff_unique_abbr": "Google;Adobe;UW;UIUC;Georgia Tech", "aff_campus_unique_index": "0;2;3;0", "aff_campus_unique": "Mountain View;;Urbana-Champaign;Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Into the Single Cell Multiverse: an End-to-End Dataset for Procedural Knowledge Extraction in Biomedical Texts", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73692", "id": "6iRH9SITva", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/23e3d86c9a19d0caf2ec997e73dfcfbd-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=6iRH9SITva", "openreview": "https://openreview.net/forum?id=6iRH9SITva", "poster": "/media/PosterPDFs/NeurIPS%202023/73692.png?t=1699481734.9068012", "slides": "https://nips.cc/virtual/2023/poster/73692", "video": "https://nips.cc/virtual/2023/poster/73692", "author_site": "Ruth Dannenfelser, Jeffrey Zhong, Ran Zhang, Vicky Yao", "tldr": "", "abstract": "Many of the most commonly explored natural language processing (NLP) information extraction tasks can be thought of as evaluations of declarative knowledge, or fact-based information extraction. Procedural knowledge extraction, i.e., breaking down a described process into a series of steps, has received much less attention, perhaps in part due to the lack of structured datasets that capture the knowledge extraction process from end-to-end. To address this unmet need, we present FlaMB\u00e9 (Flow annotations for Multiverse Biological entities), a collection of expert-curated datasets across a series of complementary tasks that capture procedural knowledge in biomedical texts. This dataset is inspired by the observation that one ubiquitous source of procedural knowledge that is described as unstructured text is within academic papers describing their methodology. The workflows annotated in FlaMB\u00e9 are from texts in the burgeoning field of single cell research, a research area that has become notorious for the number of software tools and complexity of workflows used. Additionally, FlaMB\u00e9 provides, to our knowledge, the largest manually curated named entity recognition (NER) and disambiguation (NED) datasets for tissue/cell type, a fundamental biological entity that is critical for knowledge extraction in the biomedical research domain. Beyond providing a valuable dataset to enable further development of NLP models for procedural knowledge extraction, automating the process of workflow mining also has important implications for advancing reproducibility in biomedical research.", "keywords": "information extraction;relationship extraction;named entity recognition;biomedical;single cell;bioinformatic tools;tissue;cell type", "primary_area": "", "supplementary_material": "/attachment/084a571a647e4e6a59beee81fa80a9e6fd7d1e3d.pdf", "author": "Ruth Dannenfelser;Jeffrey Zhong;Ran Zhang;Vicky Yao", "authorids": "~Ruth_Dannenfelser1;~Jeffrey_Zhong1;~Ran_Zhang7;~Vicky_Yao1", "gender": ";;F;", "homepage": ";;https://ranzhang08.github.io/;https://www.cs.rice.edu/~vy/", "dblp": "57/8244;;;", "google_scholar": "92zc-yQAAAAJ;;;egBVVA8AAAAJ", "orcid": ";0000-0003-1431-6973;;", "linkedin": ";;;", "or_profile": "~Ruth_Dannenfelser1;~Jeffrey_Zhong1;~Ran_Zhang7;~Vicky_Yao1", "aff": "Rice University;Rice University ;University of Washington;Rice University", "aff_domain": "rice.edu;rice.edu;uw.edu;cs.rice.edu", "position": "Researcher;Undergrad student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\ndannenfelser2023into,\ntitle={Into the Single Cell Multiverse: an End-to-End Dataset for Procedural Knowledge Extraction in Biomedical Texts},\nauthor={Ruth Dannenfelser and Jeffrey Zhong and Ran Zhang and Vicky Yao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=6iRH9SITva}\n}", "github": "", "project": "", "reviewers": "oxgk;oyUc;FQ7K;oXH1;bopq", "pdf_size": 866955, "rating": "6;7;8;8;9", "confidence": "3;3;4;3;4", "wc_summary_and_contributions": "97;112;75;51;71", "wc_strengths": "78;65;101;103;40", "wc_improvement": "55;100;44;141;97", "wc_limitations": "61;15;43;20;1", "wc_correctness": "39;15;32;128;3", "wc_clarity": "9;9;6;11;3", "wc_relation_to_prior_work": "72;7;74;1;6", "wc_documentation": "59;19;54;22;2", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "471;343;430;478;224", "wc_reply_reviewers": "0;27;0;49;13", "wc_reply_authors": "359;350;144;482;524", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.6, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 81.2, 21.22639865827456 ], "wc_strengths_avg": [ 77.4, 23.51680250374187 ], "wc_improvement_avg": [ 87.4, 34.805746651953896 ], "wc_limitations_avg": [ 28.0, 21.335416564951338 ], "wc_correctness_avg": [ 43.4, 44.14793313395317 ], "wc_clarity_avg": [ 7.6, 2.8000000000000003 ], "wc_relation_to_prior_work_avg": [ 32.0, 33.544000953970894 ], "wc_documentation_avg": [ 31.2, 21.811923344813035 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 389.2, 95.56861409479579 ], "wc_reply_reviewers_avg": [ 17.8, 18.519179247472064 ], "wc_reply_authors_avg": [ 371.8, 132.54493577651317 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7205766921228922, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=510395196438078277&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "rice.edu;rice.edu;uw.edu;cs.rice.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Rice University;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.rice.edu;https://www.washington.edu", "aff_unique_abbr": "Rice;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Exact Sample Complexity Gain from Invariances for Kernel Regression", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72743", "id": "6iouUxI45W", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/adf5a38a2e2e7606fbfc3eff72998afa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6iouUxI45W", "openreview": "https://openreview.net/forum?id=6iouUxI45W", "poster": "/media/PosterPDFs/NeurIPS%202023/72743.png?t=1702417638.718093", "slides": "https://nips.cc/virtual/2023/poster/72743", "video": "https://nips.cc/virtual/2023/poster/72743", "author_site": "Behrooz Tahmasebi, Stefanie Jegelka", "tldr": "", "abstract": "In practice, encoding invariances into models improves sample complexity. In this work, we study this phenomenon from a theoretical perspective. In particular, we provide minimax optimal rates for kernel ridge regression on compact manifolds, with a target function that is invariant to a group action on the manifold. Our results hold for any smooth compact Lie group action, even groups of positive dimension. For a finite group, the gain effectively multiplies the number of samples by the group size. For groups of positive dimension, the gain is observed by a reduction in the manifold's dimension, in addition to a factor proportional to the volume of the quotient space. Our proof takes the viewpoint of differential geometry, in contrast to the more common strategy of using invariant polynomials. This new geometric viewpoint on learning with invariances may be of independent interest.", "keywords": "invariances;manifolds;sample complexity", "primary_area": "", "supplementary_material": "", "author": "Behrooz Tahmasebi;Stefanie Jegelka", "authorids": "~Behrooz_Tahmasebi1;~Stefanie_Jegelka3", "gender": "M;F", "homepage": "https://people.csail.mit.edu/bzt/;http://people.csail.mit.edu/stefje/", "dblp": "223/0884;38/7003", "google_scholar": "ZXCO3DMAAAAJ;gTWUZlsAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Behrooz_Tahmasebi1;~Stefanie_Jegelka3", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\ntahmasebi2023the,\ntitle={The Exact Sample Complexity Gain from Invariances for Kernel Regression},\nauthor={Behrooz Tahmasebi and Stefanie Jegelka},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6iouUxI45W}\n}", "github": "", "project": "", "reviewers": "jgVU;DGX1;doFr;xLSq", "pdf_size": 481923, "rating": "5;7;7;8", "confidence": "4;3;3;3", "soundness": "4;3;3;4", "novelty": "4;4;3;4", "presentation": "3;3;3;4", "wc_summary": "145;98;34;97", "wc_strengths": "32;208;34;44", "wc_weaknesses": "41;214;303;2", "wc_questions": "1;4;2;2", "wc_limitations": "1;57;3;12", "wc_review": "220;581;376;157", "wc_reply_reviewers": "0;14;0;55", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 93.5, 39.44933459514875 ], "wc_strengths_avg": [ 79.5, 74.32866203558355 ], "wc_weaknesses_avg": [ 140.0, 123.37949586539897 ], "wc_questions_avg": [ 2.25, 1.0897247358851685 ], "wc_limitations_avg": [ 18.25, 22.75274708689041 ], "wc_review_avg": [ 333.5, 163.62838995724428 ], "wc_reply_reviewers_avg": [ 17.25, 22.531921799970814 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3611108058549591868&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "mit.edu;mit.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Synthetic Experience Replay", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72742", "id": "6jNQ1AY1Uf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/911fc798523e7d4c2e9587129fcf88fc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6jNQ1AY1Uf", "openreview": "https://openreview.net/forum?id=6jNQ1AY1Uf", "poster": "/media/PosterPDFs/NeurIPS%202023/72742.png?t=1701436918.1830022", "slides": "https://nips.cc/virtual/2023/poster/72742", "video": "https://nips.cc/virtual/2023/poster/72742", "author_site": "Cong Lu, Philip Ball, Yee Whye Teh, Jack Parker-Holder", "tldr": "", "abstract": "A key theme in the past decade has been that when large neural networks and large datasets combine they can produce remarkable results. In deep reinforcement learning (RL), this paradigm is commonly made possible through experience replay, whereby a dataset of past experiences is used to train a policy or value function. However, unlike in supervised or self-supervised learning, an RL agent has to collect its own data, which is often limited. Thus, it is challenging to reap the benefits of deep learning, and even small neural networks can overfit at the start of training. In this work, we leverage the tremendous recent progress in generative modeling and propose Synthetic Experience Replay (SynthER), a diffusion-based approach to flexibly upsample an agent's collected experience. We show that SynthER is an effective method for training RL agents across offline and online settings, in both proprioceptive and pixel-based environments. In offline settings, we observe drastic improvements when upsampling small offline datasets and see that additional synthetic data also allows us to effectively train larger networks. Furthermore, SynthER enables online agents to train with a much higher update-to-data ratio than before, leading to a significant increase in sample efficiency, without any algorithmic changes. We believe that synthetic training data could open the door to realizing the full potential of deep learning for replay-based RL algorithms from limited data. Finally, we open-source our code at https://github.com/conglu1997/SynthER.", "keywords": "Reinforcement Learning;Diffusion Models;Synthetic Data;Sample-Efficient RL", "primary_area": "", "supplementary_material": "/attachment/a56aa80a461f85a9d85ba7ec58f3b2d9d6f5dcbc.pdf", "author": "Cong Lu;Philip J. Ball;Yee Whye Teh;Jack Parker-Holder", "authorids": "~Cong_Lu1;~Philip_J._Ball2;~Yee_Whye_Teh2;~Jack_Parker-Holder1", "gender": "M;M;M;M", "homepage": "https://conglu.co.uk;https://jparkerholder.github.io/;https://philipjball.github.io/;http://csml.stats.ox.ac.uk/people/teh/", "dblp": ";237/9793.html;244/1972;88/2483", "google_scholar": "yMGBji4AAAAJ;;5Cm8L90AAAAJ;https://scholar.google.co.uk/citations?user=y-nUzMwAAAAJ", "orcid": "0000-0001-5564-838X;;;", "linkedin": "cong-lu-530b74104/;;;", "or_profile": "~Cong_Lu1;~Jack_Parker-Holder1;~Philip_Ball1;~Yee_Whye_Teh1", "aff": "University of Oxford;Google DeepMind;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;google.com;ox.ac.uk;ox.ac.uk", "position": "PhD student;Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nlu2023synthetic,\ntitle={Synthetic Experience Replay},\nauthor={Cong Lu and Philip J. Ball and Yee Whye Teh and Jack Parker-Holder},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6jNQ1AY1Uf}\n}", "github": "", "project": "", "reviewers": "jt3y;oBNH;MwZd;2GdG", "pdf_size": 1206705, "rating": "6;7;7;7", "confidence": "5;5;4;4", "soundness": "3;4;3;4", "novelty": "2;4;3;4", "presentation": "3;4;4;4", "wc_summary": "80;44;87;77", "wc_strengths": "70;25;168;113", "wc_weaknesses": "77;22;299;51", "wc_questions": "1;48;13;58", "wc_limitations": "1;48;59;16", "wc_review": "229;187;626;315", "wc_reply_reviewers": "0;17;58;19", "wc_reply_authors": "42;13;13;201", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.0, 16.56804152578089 ], "wc_strengths_avg": [ 94.0, 52.853571307906904 ], "wc_weaknesses_avg": [ 112.25, 109.56134126597757 ], "wc_questions_avg": [ 30.0, 23.65375234502974 ], "wc_limitations_avg": [ 31.0, 23.441416339462084 ], "wc_review_avg": [ 339.25, 171.86386327555888 ], "wc_reply_reviewers_avg": [ 23.5, 21.242645786248 ], "wc_reply_authors_avg": [ 67.25, 78.12289997177524 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14027327218415914540&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ox.ac.uk;google.com;ox.ac.uk;ox.ac.uk", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Oxford;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.ox.ac.uk;https://deepmind.com", "aff_unique_abbr": "Oxford;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Humans in Kitchens: A Dataset for Multi-Person Human Motion Forecasting with Scene Context", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73691", "id": "6jOlRwnqbb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2052b3e0617ecb2ce9474a6feaf422b3-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=6jOlRwnqbb", "openreview": "https://openreview.net/forum?id=6jOlRwnqbb", "poster": "/media/PosterPDFs/NeurIPS%202023/73691.png?t=1702072750.5234206", "slides": "https://nips.cc/virtual/2023/poster/73691", "video": "https://nips.cc/virtual/2023/poster/73691", "author_site": "Julian Tanke, Oh-Hun Kwon, Felix B Mueller, Andreas Doering, J\u00fcrgen Gall", "tldr": "", "abstract": "Forecasting human motion of multiple persons is very challenging. It requires to model the interactions between humans and the interactions with objects and the environment. For example, a person might want to make a coffee, but if the coffee machine is already occupied the person will have\nto wait. These complex relations between scene geometry and persons arise\nconstantly in our daily lives, and models that wish to accurately forecast\nhuman behavior will have to take them into consideration. To facilitate research in this direction, we propose Humans in Kitchens, a\nlarge-scale multi-person human motion dataset with annotated 3D human poses, scene geometry and activities per person and frame.\nOur dataset consists of over 7.3h recorded data of up to 16 persons at the same time in four kitchen scenes, with more than 4M annotated human poses, represented by a parametric 3D body model. In addition, dynamic scene geometry and objects like chair or cupboard are annotated per frame. As first benchmarks, we propose two protocols for short-term and long-term human motion forecasting.", "keywords": "3D Human Motion;3D scene environment", "primary_area": "", "supplementary_material": "/attachment/b3baa30ebe1a9f6795245de420483418b2cd4ae1.pdf", "author": "Julian Alexander Tanke;Oh-Hun Kwon;Felix Benjamin Mueller;Andreas Doering;Juergen Gall", "authorids": "~Julian_Alexander_Tanke1;~Oh-Hun_Kwon1;~Felix_Benjamin_Mueller1;~Andreas_Doering1;~Juergen_Gall1", "gender": "M;M;;;", "homepage": "https://github.com/jutanke;https://github.com/ohkwon718;https://github.com/felixbmuller;https://andoer.github.io/;https://pages.iai.uni-bonn.de/gall_juergen/", "dblp": "217/2077;121/4553;;;13/6920", "google_scholar": "https://scholar.google.com/citations?hl=en;;;https://scholar.google.de/citations?user=AsuNggYAAAAJ;1CLaPMEAAAAJ", "orcid": ";;;;0000-0002-9447-3399", "linkedin": ";;;;", "or_profile": "~Julian_Alexander_Tanke1;~Oh-Hun_Kwon1;~Felix_Benjamin_Mueller1;~Andreas_Doering1;~Juergen_Gall1", "aff": "University of Bonn;Rheinische Friedrich-Wilhelms-Universit\u00e4t Bonn, Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;University of Bonn;University of Bonn", "aff_domain": "uni-bonn.de;cs.uni-bonn.de;uni-bonn.de;uni-bonn.de;uni-bonn.de", "position": "PhD student;PhD student;MS student;PhD student;Professor", "bibtex": "@inproceedings{\ntanke2023humans,\ntitle={Humans in Kitchens: A Dataset for Multi-Person Human Motion Forecasting with Scene Context},\nauthor={Julian Alexander Tanke and Oh-Hun Kwon and Felix Benjamin Mueller and Andreas Doering and Juergen Gall},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=6jOlRwnqbb}\n}", "github": "", "project": "", "reviewers": "1cBr;khgs;zmcC;bd4f;Kghq", "pdf_size": 3767425, "rating": "6;6;6;7;7", "confidence": "4;4;4;4;3", "wc_summary_and_contributions": "68;101;80;71;173", "wc_strengths": "81;72;81;55;284", "wc_improvement": "73;460;119;58;318", "wc_limitations": "66;42;11;11;142", "wc_correctness": "14;7;28;5;21", "wc_clarity": "10;1;12;26;12", "wc_relation_to_prior_work": "16;13;53;61;18", "wc_documentation": "15;6;5;5;16", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "344;703;390;293;985", "wc_reply_reviewers": "0;0;0;0;64", "wc_reply_authors": "387;748;487;342;671", "reply_reviewers": "0;0;0;0;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 98.6, 38.949197681081955 ], "wc_strengths_avg": [ 114.6, 85.23051096878395 ], "wc_improvement_avg": [ 205.6, 157.62055703492484 ], "wc_limitations_avg": [ 54.4, 48.4338724448087 ], "wc_correctness_avg": [ 15.0, 8.602325267042627 ], "wc_clarity_avg": [ 12.2, 8.009993757800315 ], "wc_relation_to_prior_work_avg": [ 32.2, 20.468512403201164 ], "wc_documentation_avg": [ 9.4, 5.0039984012787215 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 543.0, 263.2390548531885 ], "wc_reply_reviewers_avg": [ 12.8, 25.6 ], "wc_reply_authors_avg": [ 527.0, 158.11514791442343 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6123724356957945, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7964887322499696173&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "uni-bonn.de;cs.uni-bonn.de;uni-bonn.de;uni-bonn.de;uni-bonn.de", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Bonn;Rheinische Friedrich-Wilhelms-Universit\u00e4t Bonn;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-bonn.de/;https://www.uni-bonn.de;https://www.uni-bonn.de/", "aff_unique_abbr": "UBonn;Uni Bonn;Uni Bonn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Adversarially Robust Distributed Count Tracking via Partial Differential Privacy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72741", "id": "6kINNTYQcm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f0722b58f02d7793acf7d328928f933a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6kINNTYQcm", "openreview": "https://openreview.net/forum?id=6kINNTYQcm", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72741", "video": "https://nips.cc/virtual/2023/poster/72741", "author_site": "Zhongzheng Xiong, Xiaoyi Zhu, zengfeng Huang", "tldr": "", "abstract": "We study the distributed tracking model, also known as distributed functional monitoring. This model involves $k$ sites each receiving a stream of items and communicating with the central server. The server's task is to track a function of all items received thus far continuously, with minimum communication cost. For count tracking, it is known that there is a $\\sqrt{k}$ gap in communication between deterministic and randomized algorithms. However, existing randomized algorithms assume an \"oblivious adversary\" who constructs the entire input streams before the algorithm starts. Here we consider adaptive adversaries who can choose new items based on previous answers from the algorithm. Deterministic algorithms are trivially robust to adaptive adversaries, while randomized ones may not. Therefore, we investigate whether the $\\sqrt{k}$ advantage of randomized algorithms is from randomness itself or the oblivious adversary assumption. We provide an affirmative answer to this question by giving a robust algorithm with optimal communication. Existing robustification techniques do not yield optimal bounds due to the inherent challenges of the distributed nature of the problem. To address this, we extend the differential privacy framework by introducing \"partial differential privacy\" and proving a new generalization theorem. This theorem may have broader applications beyond robust count tracking, making it of independent interest.", "keywords": "Distributed Tracking;Adaptive Robustness;Differential Privacy;Generalization", "primary_area": "", "supplementary_material": "/attachment/371047854a89aac6f7ab812eabd4022b4d4d46e2.pdf", "author": "Zhongzheng Xiong;Xiaoyi Zhu;Zengfeng Huang", "authorids": "~Zhongzheng_Xiong1;~Xiaoyi_Zhu1;~Zengfeng_Huang1", "gender": "M;M;M", "homepage": "https://github.com/ZhongzhengXiong;https://github.com/Xyzhu0616;https://zengfenghuang.github.io/", "dblp": "280/0893;;97/9726", "google_scholar": ";S7b9-D0AAAAJ;https://scholar.google.com.hk/citations?user=FwNBuXUAAAAJ", "orcid": ";;0000-0003-2671-7483", "linkedin": ";;", "or_profile": "~Zhongzheng_Xiong1;~Xiaoyi_Zhu1;~Zengfeng_Huang1", "aff": "Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nxiong2023adversarially,\ntitle={Adversarially Robust Distributed Count Tracking via Partial Differential Privacy},\nauthor={Zhongzheng Xiong and Xiaoyi Zhu and Zengfeng Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6kINNTYQcm}\n}", "github": "", "project": "", "reviewers": "5Raw;8gDd;W7US;MMAV", "pdf_size": 328622, "rating": "5;6;7;8", "confidence": "3;3;2;4", "soundness": "4;3;3;3", "novelty": "3;2;4;4", "presentation": "3;2;4;3", "wc_summary": "173;198;50;223", "wc_strengths": "43;68;97;120", "wc_weaknesses": "78;317;43;134", "wc_questions": "28;96;55;44", "wc_limitations": "56;101;7;1", "wc_review": "378;780;252;522", "wc_reply_reviewers": "117;21;0;0", "wc_reply_authors": "127;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 161.0, 66.47932009279276 ], "wc_strengths_avg": [ 82.0, 29.09467305195231 ], "wc_weaknesses_avg": [ 143.0, 105.57225014178678 ], "wc_questions_avg": [ 55.75, 25.14333907817337 ], "wc_limitations_avg": [ 41.25, 40.56091098582476 ], "wc_review_avg": [ 483.0, 196.28805363546707 ], "wc_reply_reviewers_avg": [ 34.5, 48.396797414705034 ], "wc_reply_authors_avg": [ 31.75, 54.99261314031185 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3162277660168379, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HzqUJ3MKSusJ:scholar.google.com/&scioq=Adversarially+Robust+Distributed+Count+Tracking+via+Partial+Differential+Privacy&hl=en&as_sdt=0,14", "gs_version_total": 8, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "AI for Interpretable Chemistry: Predicting Radical Mechanistic Pathways via Contrastive Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72740", "id": "6kRQTPEVip", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0ca70969597da7166128f7755c64ffd5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6kRQTPEVip", "openreview": "https://openreview.net/forum?id=6kRQTPEVip", "poster": "/media/PosterPDFs/NeurIPS%202023/72740.png?t=1702306464.0144", "slides": "https://nips.cc/virtual/2023/poster/72740", "video": "https://nips.cc/virtual/2023/poster/72740", "author_site": "Mohammadamin Tavakoli, Pierre Baldi, Ann Marie Carlton, Yin Ting Chiu, Alexander Shmakov, David Van Vranken", "tldr": "", "abstract": "Deep learning-based reaction predictors have undergone significant architectural evolution. However, their reliance on reactions from the US Patent Office results in a lack of interpretable predictions and limited generalizability to other chemistry domains, such as radical and atmospheric chemistry. To address these challenges, we introduce a new reaction predictor system, RMechRP, that leverages contrastive learning in conjunction with mechanistic pathways, the most interpretable representation of chemical reactions. Specifically designed for radical reactions, RMechRP provides different levels of interpretation of chemical reactions. We develop and train multiple deep-learning models using RMechDB, a public database of radical reactions, to establish the first benchmark for predicting radical reactions. Our results demonstrate the effectiveness of RMechRP in providing accurate and interpretable predictions of radical reactions, and its potential for various applications in atmospheric chemistry.", "keywords": "Chemistry;Reactions;Contrastive;Radical;Graph", "primary_area": "", "supplementary_material": "/attachment/5981fef8697ba818bbe98ef8ae859827d39f294c.zip", "author": "Mohammadamin Tavakoli;Pierre Baldi;Ann Marie Carlton;Yinting Chiu;Alexander Shmakov;David Van Vranken", "authorids": "~Mohammadamin_Tavakoli1;~Pierre_Baldi1;~Ann_Marie_Carlton1;yintc@uci.edu;~Alexander_Shmakov1;david.vv@uci.edu", "gender": "M;;Not Specified;;;", "homepage": "https://www.ics.uci.edu/~mohamadt/;;https://www.chem.uci.edu/people/ann-marie-carlton;;;", "dblp": ";;;;;", "google_scholar": "27JuT8kAAAAJ;;;;;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Mohammadamin_Tavakoli1;~Pierre_Baldi1;~Ann_Marie_Carlton1;yintc@uci.edu;~Alexander_Shmakov1;david.vv@uci.edu", "aff": "California Institute of Technology;;University of California, Irvine;;;", "aff_domain": "caltech.edu;;uci.edu;;;", "position": "Postdoc;;Full Professor;;;", "bibtex": "@inproceedings{\ntavakoli2023ai,\ntitle={{AI} for Interpretable Chemistry: Predicting Radical Mechanistic Pathways via Contrastive Learning},\nauthor={Mohammadamin Tavakoli and Pierre Baldi and Ann Marie Carlton and Yinting Chiu and Alexander Shmakov and David Van Vranken},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6kRQTPEVip}\n}", "github": "", "project": "", "reviewers": "G4db;PKyZ;n7xM;URj3", "pdf_size": 1461368, "rating": "5;6;6;7", "confidence": "4;4;4;5", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;2;2;3", "wc_summary": "173;174;61;36", "wc_strengths": "101;43;37;37", "wc_weaknesses": "169;58;358;110", "wc_questions": "72;47;138;11", "wc_limitations": "19;50;41;1", "wc_review": "534;372;635;195", "wc_reply_reviewers": "16;17;329;2", "wc_reply_authors": "26;102;227;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 111.0, 63.12289600454022 ], "wc_strengths_avg": [ 54.5, 26.95830113341714 ], "wc_weaknesses_avg": [ 173.75, 113.39394825121842 ], "wc_questions_avg": [ 67.0, 46.373483802707774 ], "wc_limitations_avg": [ 27.75, 19.122957407263137 ], "wc_review_avg": [ 434.0, 166.85772382482028 ], "wc_reply_reviewers_avg": [ 91.0, 137.53726767680098 ], "wc_reply_authors_avg": [ 88.75, 88.17985881140885 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14462620490717249035&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "caltech.edu;;uci.edu;;;", "author_num": 6, "aff_unique_index": "0;1", "aff_unique_norm": "California Institute of Technology;University of California, Irvine", "aff_unique_dep": ";", "aff_unique_url": "https://www.caltech.edu;https://www.uci.edu", "aff_unique_abbr": "Caltech;UCI", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Pasadena;Irvine", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Towards A Richer 2D Understanding of Hands at Scale", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72739", "id": "6ldTxwhgtP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/612a7948f3294a02a63d970566ca8536-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6ldTxwhgtP", "openreview": "https://openreview.net/forum?id=6ldTxwhgtP", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72739", "video": "https://nips.cc/virtual/2023/poster/72739", "author_site": "Tianyi Cheng, Dandan Shan, Ayda Hassen, Richard Higgins, David Fouhey", "tldr": "", "abstract": "As humans, we learn a lot about how to interact with the world by observing others interacting with their hands. To help AI systems obtain a better understanding of hand interactions, we introduce a new model that produces a rich understanding of hand interaction. Our system produces a richer output than past systems at a larger scale. Our outputs include boxes and segments for hands, in-contact objects, and second objects touched by tools as well as contact and grasp type. Supporting this method are annotations of 257K images, 401K hands, 288K objects, and 19K second objects spanning four datasets. We show that our method provides rich information and performs and generalizes well.", "keywords": "human-object interaction; hand object detection; hand detection", "primary_area": "", "supplementary_material": "/attachment/2afc17a43617d894aa480b0ade9e219608228772.pdf", "author": "Tianyi Cheng;Dandan Shan;Ayda Sultan Hassen;Richard Ely Locke Higgins;David Fouhey", "authorids": "~Tianyi_Cheng1;~Dandan_Shan1;~Ayda_Sultan_Hassen1;~Richard_Ely_Locke_Higgins1;~David_Fouhey2", "gender": "F;F;F;M;", "homepage": ";https://ddshan.github.io/;;https://relh.net/;", "dblp": ";72/6513;;289/1410;29/8613", "google_scholar": ";8EVs9AEAAAAJ;;uMZ5Xq4AAAAJ;FLcpd34AAAAJ", "orcid": ";0000-0002-8170-5496;;0000-0002-6227-0773;", "linkedin": "eva-tianyi-cheng/;dandan-shan-362731176/;ayda-sultan/;;", "or_profile": "~Tianyi_Cheng1;~Dandan_Shan1;~Ayda_Sultan_Hassen1;~Richard_Ely_Locke_Higgins1;~David_Fouhey2", "aff": "Carnegie Mellon University;University of Michigan;Addis Ababa University;University of Michigan;University of Michigan", "aff_domain": "cmu.edu;umich.edu;aait.edu;umich.edu;umich.edu", "position": "MS student;PhD student;Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncheng2023towards,\ntitle={Towards A Richer 2D Understanding of Hands at Scale},\nauthor={Tianyi Cheng and Dandan Shan and Ayda Sultan Hassen and Richard Ely Locke Higgins and David Fouhey},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6ldTxwhgtP}\n}", "github": "", "project": "", "reviewers": "ufdR;oZYn;87MP;N4EC", "pdf_size": 3701495, "rating": "3;4;5;7", "confidence": "4;4;3;5", "soundness": "3;2;4;4", "novelty": "1;2;3;3", "presentation": "3;3;3;4", "wc_summary": "60;74;73;117", "wc_strengths": "65;81;61;132", "wc_weaknesses": "100;219;51;270", "wc_questions": "12;5;57;237", "wc_limitations": "49;14;11;0", "wc_review": "286;393;253;756", "wc_reply_reviewers": "305;132;40;19", "wc_reply_authors": "1010;459;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 4.75, 1.479019945774904 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 81.0, 21.50581316760657 ], "wc_strengths_avg": [ 84.75, 28.287585616308792 ], "wc_weaknesses_avg": [ 160.0, 88.12207441952329 ], "wc_questions_avg": [ 77.75, 94.0834071449371 ], "wc_limitations_avg": [ 18.5, 18.364367672206956 ], "wc_review_avg": [ 422.0, 199.65845837329306 ], "wc_reply_reviewers_avg": [ 124.0, 112.81179016397178 ], "wc_reply_authors_avg": [ 367.25, 415.71948174219597 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.47809144373375745, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14555547244599837507&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cmu.edu;umich.edu;aait.edu;umich.edu;umich.edu", "author_num": 5, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Carnegie Mellon University;University of Michigan;Addis Ababa University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.umich.edu;https://www.aau.edu.et", "aff_unique_abbr": "CMU;UM;AAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;Ethiopia" }, { "title": "Language Model Alignment with Elastic Reset", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72738", "id": "6lgugutkin", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0a980183c520446f6b8afb6fa2a2c70e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6lgugutkin", "openreview": "https://openreview.net/forum?id=6lgugutkin", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72738", "video": "https://nips.cc/virtual/2023/poster/72738", "author_site": "Michael Noukhovitch, Samuel Lavoie, Florian Strub, Aaron Courville", "tldr": "", "abstract": "Finetuning language models with reinforcement learning (RL), e.g. from human feedback (HF), is a prominent method for alignment. But optimizing against a reward model can improve on reward while degrading performance in other areas, a phenomenon known as reward hacking, alignment tax, or language drift. First, we argue that commonly-used test metrics are insufficient and instead measure how different algorithms tradeoff between reward and drift. The standard method modified the reward with a Kullback-Lieber (KL) penalty between the online and initial model. We propose Elastic Reset, a new algorithm that achieves higher reward with less drift without explicitly modifying the training objective. We periodically reset the online model to an exponentially moving average (EMA) of itself, then reset the EMA model to the initial model. Through the use of an EMA, our model recovers quickly after resets and achieves higher reward with less drift in the same number of steps. We demonstrate that fine-tuning language models with Elastic Reset leads to state-of-the-art performance on a small scale pivot-translation benchmark, outperforms all baselines in a medium-scale RLHF-like IMDB mock sentiment task and leads to a more performant and more aligned technical QA chatbot with LLaMA-7B. Code available https://github.com/mnoukhov/elastic-reset", "keywords": "reinforcement learning from human feedback (rlhf);language", "primary_area": "", "supplementary_material": "/attachment/de5eae0878e4953e177f6bdcb53f6207ddb890f8.zip", "author": "Michael Noukhovitch;Samuel Lavoie;Florian Strub;Aaron Courville", "authorids": "~Michael_Noukhovitch1;~Samuel_Lavoie1;~Florian_Strub1;~Aaron_Courville3", "gender": "M;M;;M", "homepage": "http://mnoukhov.github.io;http://www.florian-strub.com;;http://example.com", "dblp": "218/6652;;56/1688;225/6508", "google_scholar": "https://scholar.google.ca/citations?user=EwmQKdMAAAAJ;zxO5kccAAAAJ;https://scholar.google.ca/citations?user=km6CP8cAAAAJ;", "orcid": ";;;", "linkedin": ";florian-strub-64443527/;;", "or_profile": "~Michael_Noukhovitch1;~Florian_Strub1;~Aaron_Courville3;~Samuel_Lavoie-Marchildon1", "aff": "University of Montreal;Google DeepMind;Universit\u00e9 de Montr\u00e9al;University of Montreal", "aff_domain": "umontreal.ca;google.com; ;umontreal.ca", "position": "PhD student;Research Scientist;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nnoukhovitch2023language,\ntitle={Language Model Alignment with Elastic Reset},\nauthor={Michael Noukhovitch and Samuel Lavoie and Florian Strub and Aaron Courville},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6lgugutkin}\n}", "github": "", "project": "", "reviewers": "tYGJ;8rTL;Xquz;1Gya", "pdf_size": 1738069, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;4;3", "presentation": "3;3;4;3", "wc_summary": "113;96;213;103", "wc_strengths": "62;68;147;117", "wc_weaknesses": "205;324;1097;147", "wc_questions": "66;145;119;109", "wc_limitations": "9;16;1;1", "wc_review": "455;649;1577;477", "wc_reply_reviewers": "10;348;81;125", "wc_reply_authors": "54;203;54;355", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 131.25, 47.58347927589995 ], "wc_strengths_avg": [ 98.5, 35.20298282816387 ], "wc_weaknesses_avg": [ 443.25, 382.7978415560882 ], "wc_questions_avg": [ 109.75, 28.472574523565655 ], "wc_limitations_avg": [ 6.75, 6.2599920127744575 ], "wc_review_avg": [ 789.5, 460.8261602817271 ], "wc_reply_reviewers_avg": [ 141.0, 126.35861664326656 ], "wc_reply_authors_avg": [ 166.5, 124.67658160215976 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6802849537446970193&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "umontreal.ca;google.com; ;umontreal.ca", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Montreal;Google;Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://wwwumontreal.ca;https://deepmind.com;https://www.umontreal.ca", "aff_unique_abbr": "UM;DeepMind;UdeM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Canada;United Kingdom" }, { "title": "Mask Propagation for Efficient Video Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72737", "id": "6ljXBlojde", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/167bcf2af2cd08fcf75b932022db0311-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6ljXBlojde", "openreview": "https://openreview.net/forum?id=6ljXBlojde", "poster": "/media/PosterPDFs/NeurIPS%202023/72737.png?t=1701492446.567483", "slides": "https://nips.cc/virtual/2023/poster/72737", "video": "https://nips.cc/virtual/2023/poster/72737", "author_site": "Yuetian Weng, Mingfei Han, Haoyu He, Mingjie Li, Lina Yao, Xiaojun Chang, Bohan Zhuang", "tldr": "", "abstract": "Video Semantic Segmentation (VSS) involves assigning a semantic label to each pixel in a video sequence. Prior work in this field has demonstrated promising results by extending image semantic segmentation models to exploit temporal relationships across video frames; however, these approaches often incur significant computational costs. In this paper, we propose an efficient mask propagation framework for VSS, called MPVSS. Our approach first employs a strong query-based image segmentor on sparse key frames to generate accurate binary masks and class predictions. We then design a flow estimation module utilizing the learned queries to generate a set of segment-aware flow maps, each associated with a mask prediction from the key frame. Finally, the mask-flow pairs are warped to serve as the mask predictions for the non-key frames. By reusing predictions from key frames, we circumvent the need to process a large volume of video frames individually with resource-intensive segmentors, alleviating temporal redundancy and significantly reducing computational costs. Extensive experiments on VSPW and Cityscapes demonstrate that our mask propagation framework achieves SOTA accuracy and efficiency trade-offs. For instance, our best model with Swin-L backbone outperforms the SOTA MRCFA using MiT-B5 by 4.0% mIoU, requiring only 26% FLOPs on the VSPW dataset. Moreover, our framework reduces up to 4\u00d7 FLOPs compared to the per-frame Mask2Former baseline with only up to 2% mIoU degradation on the Cityscapes validation set. Code is available at https://github.com/ziplab/MPVSS.", "keywords": "Video Semantic Segmentation; Inference Efficiency", "primary_area": "", "supplementary_material": "/attachment/d1701a023a84152eed28f3a8eb305bc99f205b2e.pdf", "author": "Yuetian Weng;Mingfei Han;Haoyu He;Mingjie Li;Lina Yao;Xiaojun Chang;Bohan Zhuang", "authorids": "~Yuetian_Weng1;~Mingfei_Han1;~Haoyu_He2;~Mingjie_Li2;~Lina_Yao2;~Xiaojun_Chang4;~Bohan_Zhuang1", "gender": "F;M;M;M;F;M;M", "homepage": ";https://mingfei.info;;http://www.mmvg.org/member/mingjie_li/;https://www.linayao.com/;https://bohanzhuang.github.io/;https://www.xiaojun.ai", "dblp": "319/7287;142/5360-2;184/4312;48/10103-6;56/6651-1;145/1096;116/8412", "google_scholar": ";wJEoIXsAAAAJ;aU1zMhUAAAAJ;ag0m3aoAAAAJ;https://scholar.google.com.au/citations?user=EU3snBgAAAAJ;https://scholar.google.com.au/citations?user=DFuDBBwAAAAJ;https://scholar.google.co.uk/citations?user=8suupocAAAAJ", "orcid": ";;;;;;", "linkedin": "yuetian-weng/;;;;linayao/;bohan-zhuang/;", "or_profile": "~Yuetian_Weng1;~Mingfei_Han1;~Haoyu_He2;~Mingjie_Li2;~Lina_Yao2;~Bohan_Zhuang1;~Xiaojun_Chang1", "aff": "Baidu;Bytedance Inc.;Monash University;;CSIRO's Data61;Monash University;University of Technology Sydney", "aff_domain": "baidu.com;bytedance.com;monash.edu;;data61.csiro.au;monash.edu;uts.edu.au", "position": "Intern;Intern;PhD student;;Principal Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nweng2023mask,\ntitle={Mask Propagation for Efficient Video Semantic Segmentation},\nauthor={Yuetian Weng and Mingfei Han and Haoyu He and Mingjie Li and Lina Yao and Xiaojun Chang and Bohan Zhuang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6ljXBlojde}\n}", "github": "", "project": "", "reviewers": "8jSp;LSXE;cyWY;41Ay;oxr6", "pdf_size": 2649858, "rating": "3;5;6;6;6", "confidence": "4;4;4;4;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "31;59;135;146;87", "wc_strengths": "14;72;46;70;49", "wc_weaknesses": "100;242;81;8;161", "wc_questions": "7;6;3;206;4", "wc_limitations": "6;5;5;1;4", "wc_review": "158;384;270;431;305", "wc_reply_reviewers": "0;51;11;45;19", "wc_reply_authors": "0;320;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 91.6, 43.81597882051707 ], "wc_strengths_avg": [ 50.2, 20.960916010518243 ], "wc_weaknesses_avg": [ 118.4, 78.7619197328252 ], "wc_questions_avg": [ 45.2, 80.41243684903475 ], "wc_limitations_avg": [ 4.2, 1.7204650534085255 ], "wc_review_avg": [ 309.6, 94.70501570666677 ], "wc_reply_reviewers_avg": [ 25.2, 19.661129163911212 ], "wc_reply_authors_avg": [ 64.0, 128.0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12741305102574157407&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "baidu.com;bytedance.com;monash.edu;;data61.csiro.au;monash.edu;uts.edu.au", "author_num": 7, "aff_unique_index": "0;1;2;3;2;4", "aff_unique_norm": "Baidu;Bytedance Inc.;Monash University;CSIRO;University of Technology Sydney", "aff_unique_dep": "Baidu, Inc.;;;Data61;", "aff_unique_url": "https://www.baidu.com;https://www.bytedance.com;https://www.monash.edu;https://www.csiro.au;https://www.uts.edu.au", "aff_unique_abbr": "Baidu;Bytedance;Monash;CSIRO;UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;1;1", "aff_country_unique": "China;Australia" }, { "title": "Learning the Efficient Frontier", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72736", "id": "6lnoUqFd5R", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/45a7ca247462d9e465ee88c8a302ca70-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6lnoUqFd5R", "openreview": "https://openreview.net/forum?id=6lnoUqFd5R", "poster": "/media/PosterPDFs/NeurIPS%202023/72736.png?t=1697224940.274999", "slides": "https://nips.cc/virtual/2023/poster/72736", "video": "https://nips.cc/virtual/2023/poster/72736", "author_site": "Philippe Chatigny, Ivan Sergienko, Ryan Ferguson, Jordan Weir, Maxime Bergeron", "tldr": "", "abstract": "The efficient frontier (EF) is a fundamental resource allocation problem where one has to find an optimal portfolio maximizing a reward at a given level of risk. This optimal solution is traditionally found by solving a convex optimization problem. In this paper, we introduce NeuralEF: a fast neural approximation framework that robustly forecasts the result of the EF convex optimizations problems with respect to heterogeneous linear constraints and variable number of optimization inputs. By reformulating an optimization problem as a sequence to sequence problem, we show that NeuralEF is a viable solution to accelerate large-scale simulation while handling discontinuous behavior.", "keywords": "Efficient Frontier;Convex Optimization;Resource Allocation;Constrainted Optimization;Finance", "primary_area": "", "supplementary_material": "/attachment/176b65c44c8f811dbe3dc2bedaac20a4ea6ae137.pdf", "author": "Philippe Chatigny;Ivan Sergienko;Ryan Ferguson;Jordan Weir;Maxime Bergeron", "authorids": "~Philippe_Chatigny1;ivan.sergienko@beacon.io;rf@riskfuel.com;jw@riskfuel.com;mb@riskfuel.com", "gender": ";;;;", "homepage": ";;;;", "dblp": "232/5559;;;;", "google_scholar": "qqbSt6sAAAAJ;;;;", "orcid": ";;;;", "linkedin": "pchatigny/;;;;", "or_profile": "~Philippe_Chatigny1;ivan.sergienko@beacon.io;rf@riskfuel.com;jw@riskfuel.com;mb@riskfuel.com", "aff": "Riskfuel;;;;", "aff_domain": "riskfuel.com;;;;", "position": "Researcher;;;;", "bibtex": "@inproceedings{\nchatigny2023learning,\ntitle={Learning the Efficient Frontier},\nauthor={Philippe Chatigny and Ivan Sergienko and Ryan Ferguson and Jordan Weir and Maxime Bergeron},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6lnoUqFd5R}\n}", "github": "", "project": "", "reviewers": "o9F6;5SDP;Ygwb;oNAD", "pdf_size": 2572819, "rating": "5;5;5;8", "confidence": "2;4;3;5", "soundness": "2;3;2;3", "novelty": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "110;90;120;96", "wc_strengths": "80;38;113;75", "wc_weaknesses": "124;224;187;61", "wc_questions": "101;245;240;24", "wc_limitations": "78;117;1;2", "wc_review": "493;714;661;258", "wc_reply_reviewers": "38;71;45;59", "wc_reply_authors": "0;689;24;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 104.0, 11.74734012447073 ], "wc_strengths_avg": [ 76.5, 26.5941722939444 ], "wc_weaknesses_avg": [ 149.0, 62.12487424534556 ], "wc_questions_avg": [ 152.5, 94.04387274033327 ], "wc_limitations_avg": [ 49.5, 49.94246689942338 ], "wc_review_avg": [ 531.5, 177.7364622130192 ], "wc_reply_reviewers_avg": [ 53.25, 12.735285626950029 ], "wc_reply_authors_avg": [ 178.25, 295.0443822546025 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5329350919401849882&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "riskfuel.com;;;;", "author_num": 5, "aff_unique_index": "0", "aff_unique_norm": "Riskfuel Inc.", "aff_unique_dep": "", "aff_unique_url": "https://www.riskfuel.com", "aff_unique_abbr": "Riskfuel", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "Bayesian Optimization with Cost-varying Variable Subsets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72735", "id": "6oiux75UDj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/090b23d52bc2722eef2fbf79c5ebf9ec-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6oiux75UDj", "openreview": "https://openreview.net/forum?id=6oiux75UDj", "poster": "/media/PosterPDFs/NeurIPS%202023/72735.png?t=1699600688.7432094", "slides": "https://nips.cc/virtual/2023/poster/72735", "video": "https://nips.cc/virtual/2023/poster/72735", "author_site": "Sebastian Tay, Chuan Sheng Foo, Daisuke Urano, Richalynn Leong, Bryan Kian Hsiang Low", "tldr": "", "abstract": "We introduce the problem of Bayesian optimization with cost-varying variable subsets (BOCVS) where in each iteration, the learner chooses a subset of query variables and specifies their values while the rest are randomly sampled. Each chosen subset has an associated cost. This presents the learner with the novel challenge of balancing between choosing more informative subsets for more directed learning versus leaving some variables to be randomly sampled to reduce incurred costs. This paper presents a novel Gaussian process upper confidence bound-based algorithm for solving the BOCVS problem that is provably no-regret. We analyze how the availability of cheaper control sets helps in exploration and reduces overall regret. We empirically show that our proposed algorithm can find significantly better solutions than comparable baselines with the same budget.", "keywords": "Bayesian optimization;Gaussian processes", "primary_area": "", "supplementary_material": "/attachment/0e6ad2549a8a16235a2d5b40df37c9f9f62b5397.zip", "author": "Sebastian Shenghong Tay;Chuan-Sheng Foo;Daisuke Urano;Richalynn Leong;Bryan Kian Hsiang Low", "authorids": "~Sebastian_Shenghong_Tay1;~Chuan-Sheng_Foo1;~Daisuke_Urano1;~Richalynn_Leong1;~Bryan_Kian_Hsiang_Low1", "gender": "M;M;M;F;M", "homepage": ";http://ai.stanford.edu/~csfoo;http://www.tll.org.sg/group-leaders/urano-daisuke/;;http://www.comp.nus.edu.sg/~lowkh", "dblp": "281/7664;73/1823;;;97/4877", "google_scholar": "https://scholar.google.com/citations?hl=en;AgbeqGkAAAAJ;ixmLRN0AAAAJ;;https://scholar.google.com.tw/citations?user=2P-Q09UAAAAJ", "orcid": ";0000-0002-4748-5792;;;", "linkedin": ";;daisuke-urano-73206a86/?originalSubdomain=sg;www.linkedin.com/in/richalynn-leong-41432b39;", "or_profile": "~Sebastian_Shenghong_Tay1;~Chuan-Sheng_Foo1;~Daisuke_Urano1;~Richalynn_Leong1;~Bryan_Kian_Hsiang_Low1", "aff": "National University of Singapore;Institute for Infocomm Research, A*STAR;Temasek Life Sciences Laboratory / National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu;i2r.a-star.edu.sg;nus.edu.sg;nus.edu.sg;nus.edu.sg", "position": "PhD student;Principal Scientist;Assistant Professor;PhD student;Associate Professor", "bibtex": "@inproceedings{\ntay2023bayesian,\ntitle={Bayesian Optimization with Cost-varying Variable Subsets},\nauthor={Sebastian Shenghong Tay and Chuan-Sheng Foo and Daisuke Urano and Richalynn Leong and Bryan Kian Hsiang Low},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6oiux75UDj}\n}", "github": "", "project": "", "reviewers": "o8eg;aGSh;xcAB;mBSK", "pdf_size": 3653454, "rating": "5;6;7;7", "confidence": "3;3;4;3", "soundness": "3;4;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "550;51;143;59", "wc_strengths": "234;39;53;35", "wc_weaknesses": "478;227;51;11", "wc_questions": "131;46;116;30", "wc_limitations": "49;39;17;1", "wc_review": "1442;402;380;136", "wc_reply_reviewers": "132;408;113;18", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 200.75, 204.8345368828216 ], "wc_strengths_avg": [ 90.25, 83.26276178460573 ], "wc_weaknesses_avg": [ 191.75, 184.16212287004078 ], "wc_questions_avg": [ 80.75, 43.447525821385966 ], "wc_limitations_avg": [ 26.5, 18.728320800328042 ], "wc_review_avg": [ 590.0, 502.8578327917345 ], "wc_reply_reviewers_avg": [ 167.75, 145.27624547736633 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3195794392056311101&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "nus.edu;i2r.a-star.edu.sg;nus.edu.sg;nus.edu.sg;nus.edu.sg", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "National University of Singapore;Institute for Infocomm Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.i2r.a-star.edu.sg", "aff_unique_abbr": "NUS;I2R", "aff_campus_unique_index": "1", "aff_campus_unique": ";Singapore", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Where are we in the search for an Artificial Visual Cortex for Embodied Intelligence?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72734", "id": "6qLzQeFGio", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/022ca1bed6b574b962c48a2856eb207b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6qLzQeFGio", "openreview": "https://openreview.net/forum?id=6qLzQeFGio", "poster": "/media/PosterPDFs/NeurIPS%202023/72734.png?t=1702268933.4281323", "slides": "https://nips.cc/virtual/2023/poster/72734", "video": "https://nips.cc/virtual/2023/poster/72734", "author_site": "Arjun Majumdar, Karmesh Yadav, Sergio Arnaud, Jason Ma, Claire Chen, Sneha Silwal, Aryan Jain, Vincent-Pierre Berges, Tingfan Wu, Jay Vakil, Pieter Abbeel, Jitendra Malik, Dhruv Batra, Yixin Lin, Oleksandr Maksymets, Aravind Rajeswaran, Franziska Meier", "tldr": "", "abstract": "We present the largest and most comprehensive empirical study of pre-trained visual representations (PVRs) or visual \u2018foundation models\u2019 for Embodied AI. First, we curate CortexBench, consisting of 17 different tasks spanning locomotion, navigation, dexterous, and mobile manipulation. Next, we systematically evaluate existing PVRs and find that none are universally dominant. To study the effect of pre-training data size and diversity, we combine over 4,000 hours of egocentric videos from 7 different sources (over 4.3M images) and ImageNet to train different-sized vision transformers using Masked Auto-Encoding (MAE) on slices of this data. Contrary to inferences from prior work, we find that scaling dataset size and diversity does not improve performance universally (but does so on average). Our largest model, named VC-1, outperforms all prior PVRs on average but does not universally dominate either. Next, we show that task- or domain-specific adaptation of VC-1 leads to substantial gains, with VC-1 (adapted) achieving competitive or superior performance than the best known results on all of the benchmarks in CortexBench. Finally, we present real-world hardware experiments, in which VC-1 and VC-1 (adapted) outperform the strongest pre-existing PVR. Overall, this paper presents no new techniques but a rigorous systematic evaluation, a broad set of findings about PVRs (that in some cases, refute those made in narrow domains in prior work), and open-sourced code and models (that required over 10,000 GPU-hours to train) for the benefit of the research community.", "keywords": "representation learning;pre-training;foundation models;embodied AI;reinforcement learning;imitation learning", "primary_area": "", "supplementary_material": "", "author": "Arjun Majumdar;Karmesh Yadav;Sergio Arnaud;Yecheng Jason Ma;Claire Chen;Sneha Silwal;Aryan Jain;Vincent-Pierre Berges;Tingfan Wu;Jay Vakil;Pieter Abbeel;Jitendra Malik;Dhruv Batra;Yixin Lin;Oleksandr Maksymets;Aravind Rajeswaran;Franziska Meier", "authorids": "~Arjun_Majumdar2;~Karmesh_Yadav1;~Sergio_Arnaud1;~Yecheng_Jason_Ma2;~Claire_Chen1;~Sneha_Silwal1;~Aryan_Jain1;~Vincent-Pierre_Berges1;~Tingfan_Wu2;~Jay_Vakil1;~Pieter_Abbeel2;~Jitendra_Malik2;~Dhruv_Batra1;~Yixin_Lin1;~Oleksandr_Maksymets1;~Aravind_Rajeswaran1;~Franziska_Meier2", "gender": "M;M;M;;F;F;M;;M;M;M;M;Not Specified;M;M;M;", "homepage": "https://arjunmajum.github.io/;https://www.karmeshyadav.com;https://github.com/SergioArnaud;;http://iprl.stanford.edu/;http://ssilwal.com;;;;https://jdvakil.github.io;https://people.eecs.berkeley.edu/~pabbeel/;https://people.eecs.berkeley.edu/~malik/;https://dhruvbatra.com;https://yixinlin.net;https://research.fb.com/people/maksymets-oleksandr/;http://aravindr93.github.io/;", "dblp": "168/2927;264/3702;344/3641;;19/7720;;;227/3339.html;;345/8174;;58/2944;67/6586;236/9891;239/4227;164/5778;", "google_scholar": "nyicsDgAAAAJ;VsTyEcQAAAAJ;-dCETaQAAAAJ;;;;;JEr3qVwAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;oY9R5YQAAAAJ;_bs7PqgAAAAJ;;https://scholar.google.com.ua/citations?user=ZKDLDQoAAAAJ;_EJrRVAAAAAJ;", "orcid": ";;;;;;;;;;;0000-0003-3695-1580;;;;;", "linkedin": ";karmesh-yadav/;sergio-arnaud-226456198/;;;;aryan-jain-9101/;vincentpierreberges/;;jdvakil;;;;;maksymets/;;", "or_profile": "~Arjun_Majumdar2;~Karmesh_Yadav1;~Sergio_Arnaud1;~Yecheng_Jason_Ma2;~Claire_Chen1;~Sneha_Silwal1;~Aryan_Jain1;~Vincent-Pierre_Berges1;~Tingfan_Wu2;~Jay_Vakil1;~Pieter_Abbeel2;~Jitendra_Malik2;~Dhruv_Batra1;~Yixin_Lin1;~Oleksandr_Maksymets1;~Aravind_Rajeswaran1;~Franziska_Meier2", "aff": "Georgia Institute of Technology;Meta AI;Meta AI;;Stanford University;AI at Meta;Electrical Engineering & Computer Science Department, University of California, Berkeley;Meta;;Meta AI ;Covariant;University of California, Berkeley;Georgia Institute of Technology;Facebook AI Research;Meta;Meta Facebook;", "aff_domain": "gatech.edu;meta.com;meta.com;;stanford.edu;meta.com;eecs.berkeley.edu;meta.com;;meta.com;covariant.ai;berkeley.edu;gatech.edu;facebook.com;meta.com;meta.com;", "position": "PhD student;Researcher;Researcher;;PhD student;AI Resident;Undergrad student;Researcher;;Researcher;Founder;Full Professor;Associate Professor;Research engineer;Researcher;Research Scientist;", "bibtex": "@inproceedings{\nmajumdar2023where,\ntitle={Where are we in the search for an Artificial Visual Cortex for Embodied Intelligence?},\nauthor={Arjun Majumdar and Karmesh Yadav and Sergio Arnaud and Yecheng Jason Ma and Claire Chen and Sneha Silwal and Aryan Jain and Vincent-Pierre Berges and Tingfan Wu and Jay Vakil and Pieter Abbeel and Jitendra Malik and Dhruv Batra and Yixin Lin and Oleksandr Maksymets and Aravind Rajeswaran and Franziska Meier},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6qLzQeFGio}\n}", "github": "", "project": "", "reviewers": "nrzP;aWtL;5TRU;on5Q", "pdf_size": 24665145, "rating": "2;5;7;8", "confidence": "4;5;5;4", "soundness": "2;3;3;3", "novelty": "1;3;4;4", "presentation": "2;3;4;4", "wc_summary": "132;101;62;127", "wc_strengths": "53;56;41;49", "wc_weaknesses": "418;173;207;73", "wc_questions": "7;7;48;118", "wc_limitations": "1;10;32;6", "wc_review": "611;347;390;373", "wc_reply_reviewers": "0;57;0;376", "wc_reply_authors": "0;0;0;149", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.5, 2.29128784747792 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 1.224744871391589 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 105.5, 27.735356496717326 ], "wc_strengths_avg": [ 49.75, 5.629165124598851 ], "wc_weaknesses_avg": [ 217.75, 125.6689599702329 ], "wc_questions_avg": [ 45.0, 45.348649373492925 ], "wc_limitations_avg": [ 12.25, 11.840080236214618 ], "wc_review_avg": [ 430.25, 105.47363414616945 ], "wc_reply_reviewers_avg": [ 108.25, 156.32718093792903 ], "wc_reply_authors_avg": [ 37.25, 64.51889258194068 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 17, 0 ], "corr_rating_confidence": 0.21821789023599236, "gs_citation": 161, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10032738632668936539&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "gatech.edu;meta.com;meta.com;;stanford.edu;meta.com;eecs.berkeley.edu;meta.com;;meta.com;covariant.ai;berkeley.edu;gatech.edu;facebook.com;meta.com;meta.com;", "author_num": 17, "aff_unique_index": "0;1;1;2;1;3;1;1;4;3;0;1;1;1", "aff_unique_norm": "Georgia Institute of Technology;Meta;Stanford University;University of California, Berkeley;Covariant", "aff_unique_dep": ";Meta AI;;Electrical Engineering & Computer Science Department;", "aff_unique_url": "https://www.gatech.edu;https://meta.com;https://www.stanford.edu;https://www.berkeley.edu;", "aff_unique_abbr": "Georgia Tech;Meta;Stanford;UC Berkeley;", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Stanford;Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Explaining Predictive Uncertainty with Information Theoretic Shapley Values", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72733", "id": "6rabAZhCRS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/16e4be78e61a3897665fa01504e9f452-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6rabAZhCRS", "openreview": "https://openreview.net/forum?id=6rabAZhCRS", "poster": "/media/PosterPDFs/NeurIPS%202023/72733.png?t=1701795797.5195403", "slides": "https://nips.cc/virtual/2023/poster/72733", "video": "https://nips.cc/virtual/2023/poster/72733", "author_site": "David Watson, Joshua O'Hara, Niek Tax, Richard Mudd, Ido Guy", "tldr": "", "abstract": "Researchers in explainable artificial intelligence have developed numerous methods for helping users understand the predictions of complex supervised learning models. By contrast, explaining the $\\textit{uncertainty}$ of model outputs has received relatively little attention. We adapt the popular Shapley value framework to explain various types of predictive uncertainty, quantifying each feature's contribution to the conditional entropy of individual model outputs. We consider games with modified characteristic functions and find deep connections between the resulting Shapley values and fundamental quantities from information theory and conditional independence testing. We outline inference procedures for finite sample error rate control with provable guarantees, and implement efficient algorithms that perform well in a range of experiments on real and simulated data. Our method has applications to covariate shift detection, active learning, feature selection, and active feature-value acquisition.", "keywords": "Explainable AI;interpretable ML;feature attributions;information theory;Shapley values", "primary_area": "", "supplementary_material": "/attachment/3b2b232c5f49cfaaf6320981d08871f617b1908c.zip", "author": "David Watson;Joshua O'Hara;Niek Tax;Richard Mudd;Ido Guy", "authorids": "~David_Watson2;~Joshua_O'Hara1;~Niek_Tax1;~Richard_Mudd1;~Ido_Guy1", "gender": "M;M;M;;", "homepage": "http://dswatson.github.io;https://github.com/joshwa71;;;", "dblp": "234/8807.html;;143/7361;;46/650", "google_scholar": "BAHkyk8AAAAJ;;XkRvCC4AAAAJ;;iiXF7KkAAAAJ", "orcid": "0000-0001-9632-2159;;0000-0001-7239-5206;;", "linkedin": "david-watson-9707a7106/;;niektax/;;", "or_profile": "~David_Watson2;~Joshua_O'Hara1;~Niek_Tax1;~Richard_Mudd1;~Ido_Guy1", "aff": "King's College London, University of London;King's College London, University of London;Meta Facebook;;Ben-Gurion University of the Negev", "aff_domain": "kcl.ac.uk;kcl.ac.uk;facebook.com;;bgu.ac.il", "position": "Lecturer;MS student;Researcher;;Associate Professor", "bibtex": "@inproceedings{\nwatson2023explaining,\ntitle={Explaining Predictive Uncertainty with Information Theoretic Shapley Values},\nauthor={David Watson and Joshua O'Hara and Niek Tax and Richard Mudd and Ido Guy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6rabAZhCRS}\n}", "github": "", "project": "", "reviewers": "TWY9;DQBN;yQq8;2QPT", "pdf_size": 2925213, "rating": "4;4;6;7", "confidence": "5;4;3;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "47;73;129;58", "wc_strengths": "59;59;62;114", "wc_weaknesses": "382;296;57;162", "wc_questions": "1;41;24;151", "wc_limitations": "8;9;1;29", "wc_review": "497;478;273;514", "wc_reply_reviewers": "232;193;39;25", "wc_reply_authors": "401;465;40;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 76.75, 31.546592525976557 ], "wc_strengths_avg": [ 73.5, 23.41473894793619 ], "wc_weaknesses_avg": [ 224.25, 124.37920847151263 ], "wc_questions_avg": [ 54.25, 57.63408279828872 ], "wc_limitations_avg": [ 11.75, 10.425329730996522 ], "wc_review_avg": [ 440.5, 97.541017013357 ], "wc_reply_reviewers_avg": [ 122.25, 91.43132668839493 ], "wc_reply_authors_avg": [ 226.5, 208.21683409369186 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8703882797784892, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16391915982055282774&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "kcl.ac.uk;kcl.ac.uk;facebook.com;;bgu.ac.il", "author_num": 5, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "King's College London;Meta;Ben-Gurion University of the Negev", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.kcl.ac.uk;https://meta.com;https://www.bgu.ac.il", "aff_unique_abbr": "KCL;Meta;BGU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "United Kingdom;United States;Israel" }, { "title": "Efficient Uncertainty Quantification and Reduction for Over-Parameterized Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72732", "id": "6vnwhzRinw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cb2266111eadcfa2c02187ace64e2183-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6vnwhzRinw", "openreview": "https://openreview.net/forum?id=6vnwhzRinw", "poster": "/media/PosterPDFs/NeurIPS%202023/72732.png?t=1702332701.7372253", "slides": "https://nips.cc/virtual/2023/poster/72732", "video": "https://nips.cc/virtual/2023/poster/72732", "author_site": "Ziyi Huang, Henry Lam, Haofeng Zhang", "tldr": "", "abstract": "Uncertainty quantification (UQ) is important for reliability assessment and enhancement of machine learning models. In deep learning, uncertainties arise not only from data, but also from the training procedure that often injects substantial noises and biases. These hinder the attainment of statistical guarantees and, moreover, impose computational challenges on UQ due to the need for repeated network retraining. Building upon the recent neural tangent kernel theory, we create statistically guaranteed schemes to principally \\emph{characterize}, and \\emph{remove}, the uncertainty of over-parameterized neural networks with very low computation effort. In particular, our approach, based on what we call a procedural-noise-correcting (PNC) predictor, removes the procedural uncertainty by using only \\emph{one} auxiliary network that is trained on a suitably labeled dataset, instead of many retrained networks employed in deep ensembles. Moreover, by combining our PNC predictor with suitable light-computation resampling methods, we build several approaches to construct asymptotically exact-coverage confidence intervals using as low as four trained networks without additional overheads.", "keywords": "frequentist uncertainty;epistemic uncertainty;procedural variability;confidence intervals;batching;cheap bootstrap", "primary_area": "", "supplementary_material": "/attachment/8219d86a46163bd5cc246dd30a9f22695e6b4f7a.pdf", "author": "Ziyi Huang;Henry Lam;Haofeng Zhang", "authorids": "~Ziyi_Huang1;~Henry_Lam1;~Haofeng_Zhang1", "gender": "F;;", "homepage": "https://structurefunctionlab.ee.columbia.edu/people/ziyi-huang;http://www.columbia.edu/~khl2114/;", "dblp": ";35/9508;", "google_scholar": "KWfiGJUAAAAJ;Bnj50x0AAAAJ;", "orcid": "0000-0001-6985-0298;;", "linkedin": "ziyi-huang-083683135/;;", "or_profile": "~Ziyi_Huang1;~Henry_Lam1;~Haofeng_Zhang1", "aff": "Columbia University;Columbia University;", "aff_domain": "columbia.edu;columbia.edu;", "position": "Researcher;Associate Professor;", "bibtex": "@inproceedings{\nhuang2023efficient,\ntitle={Efficient Uncertainty Quantification and Reduction for Over-Parameterized Neural Networks},\nauthor={Ziyi Huang and Henry Lam and Haofeng Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6vnwhzRinw}\n}", "github": "", "project": "", "reviewers": "bPtq;cgMr;VEbh;nEzW", "pdf_size": 667997, "rating": "6;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;2;3", "wc_summary": "101;54;65;54", "wc_strengths": "82;28;143;55", "wc_weaknesses": "271;170;357;64", "wc_questions": "3;92;22;26", "wc_limitations": "100;8;12;17", "wc_review": "557;352;599;216", "wc_reply_reviewers": "109;9;244;78", "wc_reply_authors": "54;13;596;80", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 68.5, 19.29378138157474 ], "wc_strengths_avg": [ 77.0, 42.62041764225217 ], "wc_weaknesses_avg": [ 215.5, 109.68705484240152 ], "wc_questions_avg": [ 35.75, 33.61826140656295 ], "wc_limitations_avg": [ 34.25, 38.09445497706982 ], "wc_review_avg": [ 431.0, 155.37535197063914 ], "wc_reply_reviewers_avg": [ 110.0, 85.41369913544314 ], "wc_reply_authors_avg": [ 185.75, 238.05921007178026 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14818502368753542233&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "columbia.edu;columbia.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "PUe: Biased Positive-Unlabeled Learning Enhancement by Causal Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72731", "id": "6vtZIoxZoJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3efb4bdc6bfe13e1ff95b4407c37961d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6vtZIoxZoJ", "openreview": "https://openreview.net/forum?id=6vtZIoxZoJ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72731", "video": "https://nips.cc/virtual/2023/poster/72731", "author_site": "Xutao Wang, Hanting Chen, Tianyu Guo, Yunhe Wang", "tldr": "", "abstract": "Positive-Unlabeled (PU) learning aims to achieve high-accuracy binary classification with \nlimited labeled positive examples and numerous unlabeled ones. Existing cost-sensitive-based \nmethods often rely on strong assumptions that examples with an observed positive label were \nselected entirely at random. In fact, the uneven distribution of labels is prevalent in \nreal-world PU problems, indicating that most actual positive and unlabeled data are subject \nto selection bias. In this paper, we propose a PU learning enhancement (PUe) algorithm \nbased on causal inference theory, which employs normalized propensity scores and normalized \ninverse probability weighting (NIPW) techniques to reconstruct the loss function, thus \nobtaining a consistent, unbiased estimate of the classifier and enhancing the model's \nperformance. Moreover, we investigate and propose a method for estimating propensity scores \nin deep learning using regularization techniques when the labeling mechanism is unknown. \nOur experiments on three benchmark datasets demonstrate the proposed PUe algorithm significantly \nimproves the accuracy of classifiers on non-uniform label distribution datasets compared to \nadvanced cost-sensitive PU methods. Codes are available at https://github.com/huawei-noah/Noah-research/tree/master/PUe and https://gitee.com/mindspore/models/tree/master/research/cv/PUe.", "keywords": "PU learning;causal inference;semi-supervised learning", "primary_area": "", "supplementary_material": "/attachment/dff891bb7b8f351a44067c770c7c35a0bf6b9306.pdf", "author": "Xutao Wang;Hanting Chen;Tianyu Guo;Yunhe Wang", "authorids": "~Xutao_Wang1;~Hanting_Chen1;~Tianyu_Guo1;~Yunhe_Wang1", "gender": "M;M;M;M", "homepage": ";;;https://www.wangyunhe.site/", "dblp": ";232/2060;218/7273;63/8217-1", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;RPK3oQgAAAAJ;https://scholar.google.com.sg/citations?user=isizOkYAAAAJ", "orcid": ";;;0000-0002-0142-509X", "linkedin": ";;;", "or_profile": "~Xutao_Wang1;~Hanting_Chen1;~Tianyu_Guo1;~Yunhe_Wang1", "aff": ";Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Noah's Ark Lab", "aff_domain": ";huawei.com;huawei.com;huawei.com", "position": ";Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nwang2023pue,\ntitle={{PU}e: Biased Positive-Unlabeled Learning Enhancement by Causal Inference},\nauthor={Xutao Wang and Hanting Chen and Tianyu Guo and Yunhe Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6vtZIoxZoJ}\n}", "github": "", "project": "", "reviewers": "w1uD;7yJd;nitz;fhZG;U7Dp", "pdf_size": 429748, "rating": "4;4;6;7;7", "confidence": "4;4;3;4;4", "soundness": "1;3;2;3;3", "novelty": "2;2;2;3;3", "presentation": "1;1;3;3;2", "wc_summary": "37;125;202;43;101", "wc_strengths": "29;67;142;33;69", "wc_weaknesses": "266;278;203;58;192", "wc_questions": "3;29;156;2;5", "wc_limitations": "1;12;1;1;1", "wc_review": "336;511;704;137;368", "wc_reply_reviewers": "929;271;187;0;11", "wc_reply_authors": "3174;1223;1487;0;0", "reply_reviewers": "6;2;2;0;1", "reply_authors": "10;5;8;1;1", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.0, 0.8944271909999159 ], "wc_summary_avg": [ 101.6, 60.390727765113084 ], "wc_strengths_avg": [ 68.0, 40.55613393803704 ], "wc_weaknesses_avg": [ 199.4, 78.3264961555156 ], "wc_questions_avg": [ 39.0, 59.34644049983116 ], "wc_limitations_avg": [ 3.2, 4.4 ], "wc_review_avg": [ 411.2, 188.89086796348838 ], "wc_reply_reviewers_avg": [ 279.6, 340.79061019928355 ], "wc_reply_authors_avg": [ 1176.8, 1171.0578807215295 ], "reply_reviewers_avg": [ 2.2, 2.039607805437114 ], "reply_authors_avg": [ 5.0, 3.63318042491699 ], "replies_avg": [ 43, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.14744195615489716, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13119647479788339813&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";huawei.com;huawei.com;huawei.com", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Weitzman's Rule for Pandora's Box with Correlations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72730", "id": "6wBkT2ndDu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/29d319f7c1513c9ecd81d3a6e9632a6e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6wBkT2ndDu", "openreview": "https://openreview.net/forum?id=6wBkT2ndDu", "poster": "/media/PosterPDFs/NeurIPS%202023/72730.png?t=1702168864.4958289", "slides": "https://nips.cc/virtual/2023/poster/72730", "video": "https://nips.cc/virtual/2023/poster/72730", "author_site": "Evangelia Gergatsouli, Christos Tzamos", "tldr": "", "abstract": "Pandora\u2019s Box is a central problem in decision making under uncertainty that can model various real life scenarios. In this problem we are given n boxes, each with a fixed opening cost, and an unknown value drawn from a known distribution, only revealed if we pay the opening cost. Our goal is to find a strategy for opening boxes to minimize the sum of the value selected and the opening cost paid.\nIn this work we revisit Pandora\u2019s Box when the value distributions are correlated, first studied in [CGT+20]. We show that the optimal algorithm for the independent case, given by Weitzman\u2019s rule, directly works for the correlated case. In fact, our algorithm results in significantly improved approximation guarantees compared to the previous work, while also being substantially simpler. We also show how to implement the rule given only sample access to the correlated distribution of values. Specifically, we find that a number of samples that is polynomial in the number of boxes is sufficient for the algorithm to work.", "keywords": "pandora's box;stochastic optimization;discrete optimization;learning from samples;algorithms under uncertainty", "primary_area": "", "supplementary_material": "", "author": "Evangelia Gergatsouli;Christos Tzamos", "authorids": "~Evangelia_Gergatsouli1;~Christos_Tzamos1", "gender": ";", "homepage": ";https://tzamos.com", "dblp": ";79/8819", "google_scholar": ";wB01auEAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Evangelia_Gergatsouli1;~Christos_Tzamos1", "aff": ";University of Wisconsin, Madison", "aff_domain": ";wisc.edu", "position": ";Assistant Professor", "bibtex": "@inproceedings{\ngergatsouli2023weitzmans,\ntitle={Weitzman's Rule for Pandora's Box with Correlations},\nauthor={Evangelia Gergatsouli and Christos Tzamos},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6wBkT2ndDu}\n}", "github": "", "project": "", "reviewers": "1WDf;coaw;ktwT;6o9F;PoqM", "pdf_size": 434984, "rating": "4;6;6;7;8", "confidence": "3;2;4;4;4", "soundness": "2;2;3;3;4", "novelty": "2;3;3;3;4", "presentation": "2;2;2;3;3", "wc_summary": "102;42;47;215;96", "wc_strengths": "43;67;69;139;22", "wc_weaknesses": "106;95;91;141;107", "wc_questions": "13;80;14;82;21", "wc_limitations": "4;1;4;2;1", "wc_review": "268;285;225;579;247", "wc_reply_reviewers": "53;79;0;13;5", "wc_reply_authors": "167;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.2, 1.32664991614216 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 100.4, 62.31725282776833 ], "wc_strengths_avg": [ 68.0, 39.45630494610462 ], "wc_weaknesses_avg": [ 108.0, 17.618172436436193 ], "wc_questions_avg": [ 42.0, 31.96873472629156 ], "wc_limitations_avg": [ 2.4, 1.3564659966250538 ], "wc_review_avg": [ 320.8, 130.66047604382896 ], "wc_reply_reviewers_avg": [ 30.0, 30.80259729308553 ], "wc_reply_authors_avg": [ 33.4, 66.80000000000001 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.48995593493886586, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4008191694537651315&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": ";wisc.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Wisconsin", "aff_unique_dep": "", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "$\\mathcal{M}^4$: A Unified XAI Benchmark for Faithfulness Evaluation of Feature Attribution Methods across Metrics, Modalities and Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73690", "id": "6zcfrSz98y", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/05957c194f4c77ac9d91e1374d2def6b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=6zcfrSz98y", "openreview": "https://openreview.net/forum?id=6zcfrSz98y", "poster": "/media/PosterPDFs/NeurIPS%202023/73690.png?t=1701089291.1404126", "slides": "https://nips.cc/virtual/2023/poster/73690", "video": "https://nips.cc/virtual/2023/poster/73690", "author_site": "Xuhong Li, Mengnan Du, Jiamin Chen, Yekun Chai, Himabindu Lakkaraju, Haoyi Xiong", "tldr": "", "abstract": "While Explainable Artificial Intelligence (XAI) techniques have been widely studied to explain predictions made by deep neural networks, the way to evaluate the faithfulness of explanation results remains challenging, due to the heterogeneity of explanations for various models and the lack of ground-truth explanations. This paper introduces an XAI benchmark named $\\mathcal{M}^4$, which allows evaluating various input feature attribution methods using the same set of faithfulness metrics across multiple data modalities (images and texts) and network structures (ResNets, MobileNets, Transformers). A taxonomy for the metrics has been proposed as well. We first categorize commonly used XAI evaluation metrics into three groups based on the ground truth they require. We then implement classic and state-of-the-art feature attribution methods using InterpretDL and conduct extensive experiments to compare methods and gain insights. Extensive experiments have been conducted to provide holistic evaluations as benchmark baselines. Several interesting observations are noticed for designing attribution algorithms. The implementation of state-of-the-art explanation methods and evaluation metrics of $\\mathcal{M}^4$ is publicly available at \\url{https://github.com/PaddlePaddle/InterpretDL}.", "keywords": "explanation;attribution;XAI;faithfulness evaluation;benchmark", "primary_area": "", "supplementary_material": "/attachment/79bd0bc009a17577d06fff056ff63368dc9935e7.zip", "author": "Xuhong Li;Mengnan Du;Jiamin Chen;Yekun Chai;Himabindu Lakkaraju;Haoyi Xiong", "authorids": "~Xuhong_Li3;~Mengnan_Du1;~Jiamin_Chen2;~Yekun_Chai1;~Himabindu_Lakkaraju1;~Haoyi_Xiong1", "gender": ";;M;M;F;M", "homepage": "https://mengnandu.com/;;https://cyk1337.github.io/;https://sites.google.com/site/haoyixiongshomepage/;http://web.stanford.edu/~himalv;", "dblp": "183/5606;;252/0188;06/2700;68/9376;76/5330-2.html", "google_scholar": "0i-Js2gAAAAJ;zLoLPukAAAAJ;P0NRuRYAAAAJ;f_Kcie0AAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;;;", "linkedin": ";;;;;xuhong-li-4b2776a9/", "or_profile": "~Mengnan_Du1;~Jiamin_Chen2;~Yekun_Chai1;~Haoyi_Xiong1;~Hima_Lakkaraju1;~Xuhong_LI1", "aff": "New Jersey Institute of Technology;Beihang University;Baidu;Baidu;Harvard University;Baidu", "aff_domain": "njit.edu;buaa.edu.cn;baidu.com;baidu.com;harvard.edu;baidu.com", "position": "Assistant Professor;MS student;Researcher;Principal Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nli2023mathcalm,\ntitle={\\${\\textbackslash}mathcal\\{M\\}{\\textasciicircum}4\\$: A Unified {XAI} Benchmark for Faithfulness Evaluation of Feature Attribution Methods across Metrics, Modalities and Models},\nauthor={Xuhong Li and Mengnan Du and Jiamin Chen and Yekun Chai and Himabindu Lakkaraju and Haoyi Xiong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=6zcfrSz98y}\n}", "github": "", "project": "", "reviewers": "65De;Jbv9;inEz", "pdf_size": 473315, "rating": "4;6;7", "confidence": "4;3;4", "wc_summary_and_contributions": "148;45;206", "wc_strengths": "35;50;283", "wc_improvement": "113;34;289", "wc_limitations": "16;14;327", "wc_correctness": "31;19;233", "wc_clarity": "23;11;158", "wc_relation_to_prior_work": "17;17;139", "wc_documentation": "25;11;194", "wc_additional_feedback": "1;1;1", "wc_review": "409;202;1830", "wc_reply_reviewers": "229;0;0", "wc_reply_authors": "1147;230;972", "reply_reviewers": "1;0;0", "reply_authors": "2;1;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 133.0, 66.57827473483124 ], "wc_strengths_avg": [ 122.66666666666667, 113.53805040112714 ], "wc_improvement_avg": [ 145.33333333333334, 106.58434323211934 ], "wc_limitations_avg": [ 119.0, 147.08047683722904 ], "wc_correctness_avg": [ 94.33333333333333, 98.17444790892498 ], "wc_clarity_avg": [ 64.0, 66.64833081180653 ], "wc_relation_to_prior_work_avg": [ 57.666666666666664, 57.51135153650587 ], "wc_documentation_avg": [ 76.66666666666667, 83.16382760685214 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 813.6666666666666, 723.6077820354216 ], "wc_reply_reviewers_avg": [ 76.33333333333333, 107.95163526114627 ], "wc_reply_authors_avg": [ 783.0, 397.5030398206618 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.18898223650461363, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15559447071743768985&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "njit.edu;buaa.edu.cn;baidu.com;baidu.com;harvard.edu;baidu.com", "author_num": 6, "aff_unique_index": "0;1;2;2;3;2", "aff_unique_norm": "New Jersey Institute of Technology;Beihang University;Baidu;Harvard University", "aff_unique_dep": ";;Baidu, Inc.;", "aff_unique_url": "https://www.njit.edu;http://www.buaa.edu.cn/;https://www.baidu.com;https://www.harvard.edu", "aff_unique_abbr": "NJIT;BUAA;Baidu;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0;1", "aff_country_unique": "United States;China" }, { "title": "Causes and Effects of Unanticipated Numerical Deviations in Neural Network Inference Frameworks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72729", "id": "6zyFgr1b8Q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/af076c3bdbf935b81d808e37c5ede463-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=6zyFgr1b8Q", "openreview": "https://openreview.net/forum?id=6zyFgr1b8Q", "poster": "/media/PosterPDFs/NeurIPS%202023/72729.png?t=1701709835.2837923", "slides": "https://nips.cc/virtual/2023/poster/72729", "video": "https://nips.cc/virtual/2023/poster/72729", "author_site": "Alex Schl\u00f6gl, Nora Hofer, Rainer B\u00f6hme", "tldr": "", "abstract": "Hardware-specific optimizations in machine learning (ML) frameworks can cause numerical deviations of inference results. Quite surprisingly, despite using a fixed trained model and fixed input data, inference results are not consistent across platforms, and sometimes not even deterministic on the same platform. We study the causes of these numerical deviations for convolutional neural networks (CNN) on realistic end-to-end inference pipelines and in isolated experiments. Results from 75 distinct platforms suggest that the main causes of deviations on CPUs are differences in SIMD use, and the selection of convolution algorithms at runtime on GPUs. We link the causes and propagation effects to properties of the ML model and evaluate potential mitigations. We make our research code publicly available.", "keywords": "machine learning;security;reproducibility;forensics", "primary_area": "", "supplementary_material": "/attachment/b903334067a248622547e375a1db3a4e7bef4de9.pdf", "author": "Alexander Schl\u00f6gl;Nora Hofer;Rainer B\u00f6hme", "authorids": "~Alexander_Schl\u00f6gl1;~Nora_Hofer1;~Rainer_B\u00f6hme1", "gender": "Not Specified;;", "homepage": "https://informationsecurity.uibk.ac.at/people/alexander-schloegl/;;", "dblp": ";;", "google_scholar": "iQLDBXMAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Alexander_Schl\u00f6gl1;~Nora_Hofer1;~Rainer_B\u00f6hme1", "aff": "Universit\u00e4t Innsbruck;;", "aff_domain": "uibk.ac.at;;", "position": "PhD student;;", "bibtex": "@inproceedings{\nschl{\\\"o}gl2023causes,\ntitle={Causes and Effects of Unanticipated Numerical Deviations in Neural Network Inference Frameworks},\nauthor={Alexander Schl{\\\"o}gl and Nora Hofer and Rainer B{\\\"o}hme},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=6zyFgr1b8Q}\n}", "github": "", "project": "", "reviewers": "s6bp;djPp;u1sW;3gga", "pdf_size": 318544, "rating": "5;5;6;7", "confidence": "4;4;3;5", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "3;3;3;3", "wc_summary": "57;38;98;94", "wc_strengths": "64;13;42;48", "wc_weaknesses": "336;75;72;163", "wc_questions": "153;125;219;29", "wc_limitations": "44;7;2;6", "wc_review": "654;258;433;340", "wc_reply_reviewers": "631;66;22;111", "wc_reply_authors": "760;376;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "4;2;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 71.75, 25.202926417382564 ], "wc_strengths_avg": [ 41.75, 18.444172521422587 ], "wc_weaknesses_avg": [ 161.5, 107.17392406737751 ], "wc_questions_avg": [ 131.5, 68.31361504122 ], "wc_limitations_avg": [ 14.75, 16.990806337546196 ], "wc_review_avg": [ 421.25, 147.95501850224616 ], "wc_reply_reviewers_avg": [ 207.5, 246.5243395691387 ], "wc_reply_authors_avg": [ 284.0, 314.7824645687876 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9643430057935411610&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "uibk.ac.at;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Innsbruck", "aff_unique_dep": "", "aff_unique_url": "https://www.uibk.ac.at", "aff_unique_abbr": "UIBK", "aff_campus_unique_index": "0", "aff_campus_unique": "Innsbruck", "aff_country_unique_index": "0", "aff_country_unique": "Austria" }, { "title": "VPGTrans: Transfer Visual Prompt Generator across LLMs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72728", "id": "716PvHoDct", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/407106f4b56040b2e8dcad75a6e461e5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=716PvHoDct", "openreview": "https://openreview.net/forum?id=716PvHoDct", "poster": "/media/PosterPDFs/NeurIPS%202023/72728.png?t=1701614274.4398463", "slides": "https://nips.cc/virtual/2023/poster/72728", "video": "https://nips.cc/virtual/2023/poster/72728", "author_site": "Ao Zhang, Hao Fei, Yuan Yao, Wei Ji, Li Li, Zhiyuan Liu, Tat-Seng Chua", "tldr": "", "abstract": "Since developing a new multimodal LLM (MLLM) by pre-training on tremendous image-text pairs from scratch can be exceedingly resource-consuming, connecting an existing LLM with a comparatively lightweight visual prompt generator (VPG) becomes a feasible paradigm. However, further tuning the VPG component of the MLLM still incurs significant computational costs, such as thousands of GPU hours and millions of training data points. An alternative solution is transferring an existing VPG from one MLLM to the target MLLM. In this work, we investigate VPG transferability across LLMs for the first time, aiming to reduce the cost of VPG training. Specifically, we explore VPG transfer across different LLM sizes (e.g., small-to-large) and types. We identify key factors to maximize transfer efficiency, based on which we develop a simple yet highly effective two-stage transfer framework, called VPGTrans. Notably, it enables VPG transfer from BLIP-2 OPT 2.7B to BLIP-2 OPT 6.7B with less than 10% of the GPU hours using only 10.7% of the training data compared to training a VPG for OPT 6.7B from scratch. Furthermore, we provide a series of intriguing findings and discuss potential explanations behind them. Finally, we showcase the practical value of our VPGTrans approach, by customizing two novel MLLMs, including VL-LLaMA and VL-Vicuna, with recently released LLaMA and Vicuna LLMs.", "keywords": "Visual Prompt Generator;Efficient Transfer;Multimodality", "primary_area": "", "supplementary_material": "/attachment/67eee9669c7ec14d16554d9913b5f8977fb39b3f.zip", "author": "Ao Zhang;Hao Fei;Yuan Yao;Wei Ji;Li Li;Zhiyuan Liu;Tat-Seng Chua", "authorids": "~Ao_Zhang2;~Hao_Fei1;~Yuan_Yao12;~Wei_Ji1;~Li_Li18;~Zhiyuan_Liu1;~Tat-Seng_Chua2", "gender": "M;M;M;M;M;M;M", "homepage": "https://waxnkw.github.io/;https://haofei.vip/;https://yaoyuanthu.github.io/;https://jiwei0523.github.io/;https://lili0415.github.io;http://nlp.csai.tsinghua.edu.cn/~lzy;http://www.comp.nus.edu.sg/~chuats/", "dblp": "187/6243;81/3569-1;;52/3220-8;53/2189-91;53/3245-1;", "google_scholar": "0akC8h8AAAAJ;YGDX46AAAAAJ;https://scholar.google.com.hk/citations?user=3NWfi3YAAAAJ;69OFB-AAAAAJ;r4kIL4cAAAAJ;dT0v5u0AAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "orcid": ";0000-0003-3026-6347;;0000-0002-8106-9768;0009-0003-2007-2706;0000-0002-7709-2543;0000-0001-6097-7807", "linkedin": ";;;;;;", "or_profile": "~Ao_Zhang2;~Hao_Fei1;~Yuan_Yao12;~Wei_Ji1;~Li_Li18;~Zhiyuan_Liu1;~Tat-seng_Chua1", "aff": "National University of Singapore;National University of Singapore;Tsinghua University;;National University of Singapore;Tsinghua University;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu.sg;tsinghua.edu.cn;;u.nus.edu;tsinghua.edu.cn;nus.edu.sg", "position": "PhD student;Postdoc;PhD student;;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023vpgtrans,\ntitle={{VPGT}rans: Transfer Visual Prompt Generator across {LLM}s},\nauthor={Ao Zhang and Hao Fei and Yuan Yao and Wei Ji and Li Li and Zhiyuan Liu and Tat-Seng Chua},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=716PvHoDct}\n}", "github": "", "project": "", "reviewers": "GwBd;iPdz;7KJM;mi8b", "pdf_size": 2609169, "rating": "4;6;6;9", "confidence": "5;4;4;5", "soundness": "3;4;2;4", "novelty": "3;3;2;4", "presentation": "2;3;3;3", "wc_summary": "46;45;85;201", "wc_strengths": "30;11;41;312", "wc_weaknesses": "67;81;212;352", "wc_questions": "34;6;18;2", "wc_limitations": "2;6;1;13", "wc_review": "179;149;357;880", "wc_reply_reviewers": "0;12;25;151", "wc_reply_authors": "110;0;102;310", "reply_reviewers": "0;1;1;2", "reply_authors": "2;1;2;2", "rating_avg": [ 6.25, 1.7853571071357126 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.25, 63.707829189197774 ], "wc_strengths_avg": [ 98.5, 123.73055402769359 ], "wc_weaknesses_avg": [ 178.0, 115.284430865577 ], "wc_questions_avg": [ 15.0, 12.449899597988733 ], "wc_limitations_avg": [ 5.5, 4.716990566028302 ], "wc_review_avg": [ 391.25, 293.1658020642926 ], "wc_reply_reviewers_avg": [ 47.0, 60.691844592169055 ], "wc_reply_authors_avg": [ 130.5, 112.34211142754972 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.14002800840280097, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7868428139053341679&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "u.nus.edu;nus.edu.sg;tsinghua.edu.cn;;u.nus.edu;tsinghua.edu.cn;nus.edu.sg", "author_num": 7, "aff_unique_index": "0;0;1;0;1;0", "aff_unique_norm": "National University of Singapore;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.tsinghua.edu.cn", "aff_unique_abbr": "NUS;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1;0", "aff_country_unique": "Singapore;China" }, { "title": "Understanding, Predicting and Better Resolving Q-Value Divergence in Offline-RL", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72727", "id": "71P7ugOGCV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bd6bb13e78da078d8adcabbe6d9ca737-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=71P7ugOGCV", "openreview": "https://openreview.net/forum?id=71P7ugOGCV", "poster": "/media/PosterPDFs/NeurIPS%202023/72727.png?t=1701751418.1505277", "slides": "https://nips.cc/virtual/2023/poster/72727", "video": "https://nips.cc/virtual/2023/poster/72727", "author_site": "Yang Yue, Rui Lu, Bingyi Kang, Shiji Song, Gao Huang", "tldr": "", "abstract": "The divergence of the Q-value estimation has been a prominent issue offline reinforcement learning (offline RL), where the agent has no access to real dynamics. Traditional beliefs attribute this instability to querying out-of-distribution actions when bootstrapping value targets. Though this issue can be alleviated with policy constraints or conservative Q estimation, a theoretical understanding of the underlying mechanism causing the divergence has been absent. In this work, we aim to thoroughly comprehend this mechanism and attain an improved solution. We first identify a fundamental pattern, \\emph{self-excitation}, as the primary cause of Q-value estimation divergence in offline RL. Then, we propose a novel Self-Excite Eigenvalue Measure (SEEM) metric based on Neural Tangent Kernel (NTK) to measure the evolving property of Q-network at training, which provides an intriguing explanation of the emergence of divergence. For the first time, our theory can reliably decide whether the training will diverge at an early stage, and even predict the order of the growth for the estimated Q-value, the model's norm, and the crashing step when an SGD optimizer is used. The experiments demonstrate perfect alignment with this theoretic analysis. Building on our insights, we propose to resolve divergence from a novel perspective, namely improving the model's architecture for better extrapolating behavior. Through extensive empirical studies, we identify LayerNorm as a good solution to effectively avoid divergence without introducing detrimental bias, leading to superior performance. Experimental results prove that it can still work in some most challenging settings, i.e. using only 1$\\%$ transitions of the dataset, where all previous methods fail. Moreover, it can be easily plugged into modern offline RL methods and achieve SOTA results on many challenging tasks. We also give unique insights into its effectiveness.", "keywords": "Offline RL;Theory", "primary_area": "", "supplementary_material": "/attachment/949fe98dc934bd7a61fa34ebcaa54b02ab2f7ddc.zip", "author": "Yang Yue;Rui Lu;Bingyi Kang;Shiji Song;Gao Huang", "authorids": "~Yang_Yue1;~Rui_Lu2;~Bingyi_Kang1;~Shiji_Song1;~Gao_Huang1", "gender": ";M;;M;M", "homepage": ";;https://bingykang.github.io/;;http://www.gaohuang.net", "dblp": ";;;72/5351;", "google_scholar": ";upMvIv4AAAAJ;https://scholar.google.com.sg/citations?user=NmHgX-wAAAAJ;;-P9LwcgAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Yang_Yue1;~Rui_Lu2;~Bingyi_Kang1;~Shiji_Song1;~Gao_Huang1", "aff": ";Department of Automation, Tsinghua University;Sea AI Lab;Tsinghua University;Tsinghua University", "aff_domain": ";tsinghua.edu.cn;sea.com;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": ";PhD student;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nyue2023understanding,\ntitle={Understanding, Predicting and Better Resolving Q-Value Divergence in Offline-{RL}},\nauthor={Yang Yue and Rui Lu and Bingyi Kang and Shiji Song and Gao Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=71P7ugOGCV}\n}", "github": "", "project": "", "reviewers": "T4o6;4Xya;WzD5", "pdf_size": 1756077, "rating": "5;7;7", "confidence": "4;4;4", "soundness": "2;4;4", "novelty": "2;4;4", "presentation": "3;4;3", "wc_summary": "78;211;72", "wc_strengths": "42;126;69", "wc_weaknesses": "171;35;87", "wc_questions": "5;288;1", "wc_limitations": "1;85;1", "wc_review": "297;745;230", "wc_reply_reviewers": "216;43;0", "wc_reply_authors": "1261;39;0", "reply_reviewers": "3;1;0", "reply_authors": "5;2;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 3.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 120.33333333333333, 64.15779159402405 ], "wc_strengths_avg": [ 79.0, 35.014282800023196 ], "wc_weaknesses_avg": [ 97.66666666666667, 56.031737038535184 ], "wc_questions_avg": [ 98.0, 134.3602123646233 ], "wc_limitations_avg": [ 29.0, 39.59797974644666 ], "wc_review_avg": [ 424.0, 228.62341670674653 ], "wc_reply_reviewers_avg": [ 86.33333333333333, 93.35356923486584 ], "wc_reply_authors_avg": [ 433.3333333333333, 585.4652470946124 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18153214842806240540&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": ";tsinghua.edu.cn;sea.com;mail.tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Tsinghua University;Sea AI Lab", "aff_unique_dep": "Department of Automation;", "aff_unique_url": "https://www.tsinghua.edu.cn;", "aff_unique_abbr": "THU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "title": "QH9: A Quantum Hamiltonian Prediction Benchmark for QM9 Molecules", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73689", "id": "71uRr9N39A", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7f755e271717450020fda40f020922dd-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=71uRr9N39A", "openreview": "https://openreview.net/forum?id=71uRr9N39A", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73689", "video": "https://nips.cc/virtual/2023/poster/73689", "author_site": "Haiyang Yu, Meng Liu, Youzhi Luo, Alex Strasser, Xiaofeng Qian, Xiaoning Qian, Shuiwang Ji", "tldr": "", "abstract": "Supervised machine learning approaches have been increasingly used in accelerating electronic structure prediction as surrogates of first-principle computational methods, such as density functional theory (DFT). While numerous quantum chemistry datasets focus on chemical properties and atomic forces, the ability to achieve accurate and efficient prediction of the Hamiltonian matrix is highly desired, as it is the most important and fundamental physical quantity that determines the quantum states of physical systems and chemical properties. In this work, we generate a new Quantum Hamiltonian dataset, named as QH9, to provide precise Hamiltonian matrices for 999 molecular dynamics trajectories and 130,831 stable molecular geometries, based on the QM9 dataset. By designing benchmark tasks with various molecules, we show that current machine learning models have the capacity to predict Hamiltonian matrices for arbitrary molecules. Both the QH9 dataset and the baseline models are provided to the community through an open-source benchmark, which can be highly valuable for developing machine learning methods and accelerating molecular and materials design for scientific and technological applications. Our benchmark is publicly available at https://github.com/divelab/AIRS/tree/main/OpenDFT/QHBench.", "keywords": "quantum chemistry;dataset;equivariant networks;machine learning;deep learning;graph neural networks;density functional theory", "primary_area": "", "supplementary_material": "", "author": "Haiyang Yu;Meng Liu;Youzhi Luo;Alex Strasser;Xiaofeng Qian;Xiaoning Qian;Shuiwang Ji", "authorids": "~Haiyang_Yu6;~Meng_Liu3;~Youzhi_Luo1;~Alex_Strasser1;~Xiaofeng_Qian1;~Xiaoning_Qian2;~Shuiwang_Ji1", "gender": "M;M;M;;;M;M", "homepage": "https://oceanusity.github.io/;https://mengliu1998.github.io;https://lyzustc.github.io/;;https://sites.google.com/tamu.edu/qian-group;https://www.ece.tamu.edu/~xqian;http://people.tamu.edu/~sji", "dblp": "90/6643-5;41/7841-15;280/0590;;266/1654;62/4504;84/6405", "google_scholar": "LZKU1hUAAAAJ;https://scholar.google.com/citations?hl=en;3lqQFIoAAAAJ;CBwhezMAAAAJ;bK7fFKoAAAAJ;dXGlddgAAAAJ;BZGj6sAAAAAJ", "orcid": ";;0000-0002-3763-0239;0000-0002-6573-0747;0000-0003-1627-288X;0000-0002-4347-2476;0000-0002-4205-4563", "linkedin": ";meng-liu-4a1813197/;youzhi-luo-139981172/;alex-m-strasser/;;;shuiwang-ji-9a040715/", "or_profile": "~Haiyang_Yu6;~Meng_Liu3;~Youzhi_Luo1;~Alex_Strasser1;~Xiaofeng_Qian1;~Xiaoning_Qian2;~Shuiwang_Ji1", "aff": "Texas A&M University - College Station;Texas A&M University - College Station;Texas A&M University;Texas A&M University - College Station;Texas A&M University;Texas A&M;Texas A&M University", "aff_domain": "tamu.edu;tamu.edu;tamu.edu;tamu.edu;tamu.edu;tamu.edu;tamu.edu", "position": "PhD student;PhD student;PhD student;PhD student;Associate Professor;Full Professor;Professor", "bibtex": "@inproceedings{\nyu2023qh,\ntitle={{QH}9: A Quantum Hamiltonian Prediction Benchmark for {QM}9 Molecules},\nauthor={Haiyang Yu and Meng Liu and Youzhi Luo and Alex Strasser and Xiaofeng Qian and Xiaoning Qian and Shuiwang Ji},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=71uRr9N39A}\n}", "github": "", "project": "", "reviewers": "gi6S;wfG7;jeVm;R63V", "pdf_size": 608199, "rating": "6;6;7;7", "confidence": "4;3;4;2", "wc_summary_and_contributions": "55;68;147;31", "wc_strengths": "92;46;32;18", "wc_improvement": "432;231;419;157", "wc_limitations": "17;50;37;13", "wc_correctness": "57;28;27;14", "wc_clarity": "100;28;24;27", "wc_relation_to_prior_work": "20;13;3;5", "wc_documentation": "33;37;51;10", "wc_additional_feedback": "1;1;1;1", "wc_review": "807;502;741;276", "wc_reply_reviewers": "151;0;68;0", "wc_reply_authors": "604;406;1206;453", "reply_reviewers": "2;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 75.25, 43.49928160326329 ], "wc_strengths_avg": [ 47.0, 27.80287754891569 ], "wc_improvement_avg": [ 309.75, 118.75894703137108 ], "wc_limitations_avg": [ 29.25, 15.039531242695032 ], "wc_correctness_avg": [ 31.5, 15.724185193516387 ], "wc_clarity_avg": [ 44.75, 31.932546093288583 ], "wc_relation_to_prior_work_avg": [ 10.25, 6.7592529172978875 ], "wc_documentation_avg": [ 32.75, 14.737282653189496 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 581.5, 209.7266077539996 ], "wc_reply_reviewers_avg": [ 54.75, 62.118334652500145 ], "wc_reply_authors_avg": [ 667.25, 319.53354675213683 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12600688888868875844&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "tamu.edu;tamu.edu;tamu.edu;tamu.edu;tamu.edu;tamu.edu;tamu.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Station;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Quantus: An Explainable AI Toolkit for Responsible Evaluation of Neural Network Explanations and Beyond", "author": "Anna Hedstr\u00f6m, Leander Weber, Daniel Krakowczyk, Dilyara Bareeva, Franz Motzkus, Wojciech Samek, Sebastian Lapuschkin, Marina H\u00f6hne", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73908", "id": "73908", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202023/73908.png?t=1701864662.2619634", "slides": "https://nips.cc/virtual/2023/poster/73908", "video": "https://nips.cc/virtual/2023/poster/73908" }, { "title": "Fundamental Limits and Tradeoffs in Invariant Representation Learning", "author": "Han Zhao, Chen Dan, Bryon Aragam, Tommi Jaakkola, Geoffrey Gordon, Pradeep Ravikumar", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73909", "id": "73909", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202023/73909.png?t=1702182728.861078", "slides": "https://nips.cc/virtual/2023/poster/73909", "video": "https://nips.cc/virtual/2023/poster/73909" }, { "title": "Intrinsic Gaussian Process on Unknown Manifolds with Probabilistic Metrics", "author": "mu niu, Zhenwen Dai, Pokman Cheung, Yizhu Wang", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73910", "id": "73910", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202023/73910.png?t=1699875996.1201954", "slides": "https://nips.cc/virtual/2023/poster/73910", "video": "https://nips.cc/virtual/2023/poster/73910" }, { "title": "Small Transformers Compute Universal Metric Embeddings", "author": "Anastasis Kratsios, Valentin Debarnot, Ivan Dokmani\u0107", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73911", "id": "73911", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73911", "video": "https://nips.cc/virtual/2023/poster/73911" }, { "title": "Euler-Lagrange Analysis of Generative Adversarial Networks", "author": "Siddarth Asokan, Chandra Seelamantula", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73912", "id": "73912", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202023/73912.png?t=1701424181.5500808", "slides": "https://nips.cc/virtual/2023/poster/73912", "video": "https://nips.cc/virtual/2023/poster/73912" }, { "title": "Fast Online Changepoint Detection via Functional Pruning CUSUM Statistics", "author": "Gaetano Romano, Idris A. Eckley, Paul Fearnhead, Guillem Rigaill", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73914", "id": "73914", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202023/73914.png?t=1700828985.1371107", "slides": "https://nips.cc/virtual/2023/poster/73914", "video": "https://nips.cc/virtual/2023/poster/73914" }, { "title": "Large sample spectral analysis of graph-based multi-manifold clustering", "author": "Nicolas Garcia Trillos, Pengfei He, Chenghui Li", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73915", "id": "73915", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202023/73915.png?t=1701751239.3799064", "slides": "https://nips.cc/virtual/2023/poster/73915", "video": "https://nips.cc/virtual/2023/poster/73915" }, { "title": "The Separation Capacity of Random Neural Networks", "author": "Sjoerd Dirksen, Martin Genzel, Laurent Jacques, Alexander Stollenwerk", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73916", "id": "73916", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202023/73916.png?t=1701717906.540078", "slides": "https://nips.cc/virtual/2023/poster/73916", "video": "https://nips.cc/virtual/2023/poster/73916" }, { "title": "Network Regression with Graph Laplacians", "author": "Yidong Zhou, Hans-Georg M\u00fcller", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73917", "id": "73917", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202023/73917.png?t=1702574693.7212543", "slides": "https://nips.cc/virtual/2023/poster/73917", "video": "https://nips.cc/virtual/2023/poster/73917" }, { "title": "Bilevel Optimization with a Lower-level Contraction: Optimal Sample Complexity without Warm-Start", "author": "Riccardo Grazzi, Massimiliano Pontil, Saverio Salzo", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73918", "id": "73918", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202023/73918.png?t=1701351935.1254282", "slides": "https://nips.cc/virtual/2023/poster/73918", "video": "https://nips.cc/virtual/2023/poster/73918" }, { "title": "Conditional Distribution Function Estimation Using Neural Networks for Censored and Uncensored Data", "author": "Bingqing Hu, Bin Nan", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73919", "id": "73919", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202023/73919.png?t=1701472065.8795416", "slides": "https://nips.cc/virtual/2023/poster/73919", "video": "https://nips.cc/virtual/2023/poster/73919" }, { "title": "An Empirical Investigation of the Role of Pre-training in Lifelong Learning", "author": "Sanket Vaibhav Mehta, Darshan Patil, Sarath Chandar, Emma Strubell", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73920", "id": "73920", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73920", "video": "https://nips.cc/virtual/2023/poster/73920" }, { "title": "Variational Gibbs Inference for Statistical Model Estimation from Incomplete Data", "author": "Vaidotas Simkus, Benjamin Rhodes, Michael Gutmann", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73921", "id": "73921", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202023/73921.png?t=1700398463.1284835", "slides": "https://nips.cc/virtual/2023/poster/73921", "video": "https://nips.cc/virtual/2023/poster/73921" }, { "title": "Alpha-divergence Variational Inference Meets Importance Weighted Auto-Encoders: Methodology and Asymptotics", "author": "Kam\u00e9lia Daudel, Joe Benton, Yuyang Shi, Arnaud Doucet", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73922", "id": "73922", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73922", "video": "https://nips.cc/virtual/2023/poster/73922" }, { "title": "Inference for Gaussian Processes with Matern Covariogram on Compact Riemannian Manifolds", "author": "Didong Li, Wenpin Tang, Sudipto Banerjee", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73923", "id": "73923", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73923", "video": "https://nips.cc/virtual/2023/poster/73923" }, { "title": "Sparse Graph Learning from Spatiotemporal Time Series", "author": "Andrea Cini, Daniele Zambon, Cesare Alippi", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73924", "id": "73924", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202023/73924.png?t=1701782875.9035187", "slides": "https://nips.cc/virtual/2023/poster/73924", "video": "https://nips.cc/virtual/2023/poster/73924" }, { "title": "Concentration analysis of multivariate elliptic diffusions", "author": "Lukas Trottner, Cathrine Aeckerle-Willems, Claudia Strauch", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73925", "id": "73925", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202023/73925.png?t=1702422969.5374162", "slides": "https://nips.cc/virtual/2023/poster/73925", "video": "https://nips.cc/virtual/2023/poster/73925" }, { "title": "Global Optimality and Finite Sample Analysis of Softmax Off-Policy Actor Critic under State Distribution Mismatch", "author": "Shangtong Zhang, Remi Tachet des Combes, Romain Laroche", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73926", "id": "73926", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73926", "video": "https://nips.cc/virtual/2023/poster/73926" }, { "title": "MMD Aggregated Two-Sample Test", "author": "Antonin Schrab, Ilmun Kim, M\u00e9lisande Albert, B\u00e9atrice Laurent, Benjamin Guedj, Arthur Gretton", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73927", "id": "73927", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202023/73927.png?t=1702228328.0024228", "slides": "https://nips.cc/virtual/2023/poster/73927", "video": "https://nips.cc/virtual/2023/poster/73927" }, { "title": "Toolbox for Multimodal Learn (scikit-multimodallearn)", "author": "Dominique Benielli, Baptiste Bauvin, Sokol Ko\u00e7o, Riikka Huusari, C\u00e9cile Capponi, Hachem Kadri, Fran\u00e7ois Laviolette", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73928", "id": "73928", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73928", "video": "https://nips.cc/virtual/2023/poster/73928" }, { "title": "Graph Clustering with Graph Neural Networks", "author": "Anton Tsitsulin, John Palowitch, Bryan Perozzi, Emmanuel M\u00fcller", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/73929", "id": "73929", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202023/73929.png?t=1702229227.7242503", "slides": "https://nips.cc/virtual/2023/poster/73929", "video": "https://nips.cc/virtual/2023/poster/73929" }, { "title": "Smoothing the Landscape Boosts the Signal for SGD: Optimal Sample Complexity for Learning Single Index Models", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72726", "id": "73XPopmbXH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/02763667a5761ff92bb15d8751bcd223-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=73XPopmbXH", "openreview": "https://openreview.net/forum?id=73XPopmbXH", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72726", "video": "https://nips.cc/virtual/2023/poster/72726", "author_site": "Alex Damian, Eshaan Nichani, Rong Ge, Jason Lee", "tldr": "", "abstract": "We focus on the task of learning a single index model $\\sigma(w^\\star \\cdot x)$ with respect to the isotropic Gaussian distribution in $d$ dimensions. Prior work has shown that the sample complexity of learning $w^\\star$ is governed by the information exponent $k^\\star$ of the link function $\\sigma$, which is defined as the index of the first nonzero Hermite coefficient of $\\sigma$. Ben Arous et al. (2021) showed that $n \\gtrsim d^{k^\\star-1}$ samples suffice for learning $w^\\star$ and that this is tight for online SGD. However, the CSQ lower bound for gradient based methods only shows that $n \\gtrsim d^{k^\\star/2}$ samples are necessary. In this work, we close the gap between the upper and lower bounds by showing that online SGD on a smoothed loss learns $w^\\star$ with $n \\gtrsim d^{k^\\star/2}$ samples. We also draw connections to statistical analyses of tensor PCA and to the implicit regularization effects of minibatch SGD on empirical losses.", "keywords": "statistical learning;learning theory;single index model;gradient descent;stochastic gradient descent", "primary_area": "", "supplementary_material": "/attachment/cf6198449546f66315a55be2cce0204934c8ff3b.zip", "author": "Alex Damian;Eshaan Nichani;Rong Ge;Jason D. Lee", "authorids": "~Alex_Damian1;~Eshaan_Nichani1;~Rong_Ge1;~Jason_D._Lee1", "gender": "M;;M;M", "homepage": "https://web.math.princeton.edu/~ad27/;https://eshaannichani.com/;https://users.cs.duke.edu/~rongge/;https://jasondlee88.github.io/", "dblp": ";260/6510;89/6869-1.html;88/3262", "google_scholar": "YvHcBcEAAAAJ;;https://scholar.google.com.tw/citations?user=MVxcjEoAAAAJ;GR_DsT0AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Alex_Damian1;~Eshaan_Nichani1;~Rong_Ge1;~Jason_D._Lee1", "aff": "Princeton University;Princeton University;Google (visiting);Princeton University", "aff_domain": "princeton.edu;princeton.edu;google.com;princeton.edu", "position": "PhD student;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\ndamian2023smoothing,\ntitle={Smoothing the Landscape Boosts the Signal for {SGD}: Optimal Sample Complexity for Learning Single Index Models},\nauthor={Alex Damian and Eshaan Nichani and Rong Ge and Jason D. Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=73XPopmbXH}\n}", "github": "", "project": "", "reviewers": "xzE5;68nG;mDyK;2zse", "pdf_size": 634767, "rating": "7;7;8;8", "confidence": "3;4;4;4", "soundness": "4;3;4;4", "novelty": "4;3;3;4", "presentation": "4;3;4;4", "wc_summary": "33;101;133;133", "wc_strengths": "70;71;47;72", "wc_weaknesses": "110;63;14;125", "wc_questions": "1;303;400;122", "wc_limitations": "1;23;1;16", "wc_review": "215;561;595;468", "wc_reply_reviewers": "0;39;267;71", "wc_reply_authors": "0;0;1007;36", "reply_reviewers": "0;1;2;1", "reply_authors": "1;1;3;2", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 100.0, 40.82891132518721 ], "wc_strengths_avg": [ 65.0, 10.41633332799983 ], "wc_weaknesses_avg": [ 78.0, 43.45687517528153 ], "wc_questions_avg": [ 206.5, 155.02015997927495 ], "wc_limitations_avg": [ 10.25, 9.575359001102779 ], "wc_review_avg": [ 459.75, 148.75714268565392 ], "wc_reply_reviewers_avg": [ 94.25, 102.85760788585354 ], "wc_reply_authors_avg": [ 260.75, 431.09823416478986 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16725045529927958238&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "princeton.edu;princeton.edu;google.com;princeton.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Princeton University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.princeton.edu;https://www.google.com", "aff_unique_abbr": "Princeton;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "[Re] Pure Noise to the Rescue of Insufficient Data", "author": "Ryan Lee, Seungmin Lee", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74140", "id": "74140", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=ErBe4MnsVD", "poster": "/media/PosterPDFs/NeurIPS%202023/74140.png?t=1702411591.9367738", "slides": "https://nips.cc/virtual/2023/poster/74140", "video": "https://nips.cc/virtual/2023/poster/74140" }, { "title": "[Re] Masked Autoencoders Are Small Scale Vision Learners: A Reproduction Under Resource Constraints", "author": "Athanasios Charisoudis, Simon Ekman von Huth, Emil Jansson", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74141", "id": "74141", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=KXfjZPL5pqr", "poster": "/media/PosterPDFs/NeurIPS%202023/74141.png?t=1701888959.9704773", "slides": "https://nips.cc/virtual/2023/poster/74141", "video": "https://nips.cc/virtual/2023/poster/74141" }, { "title": "[Re] CrossWalk: Fairness-enhanced Node Representation Learning", "author": "Luca Pantea, Andrei-Eusebiu Blahovici", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74143", "id": "74143", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=tpk45Zll8eh", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/74143", "video": "https://nips.cc/virtual/2023/poster/74143" }, { "title": "Reproducibility study of the Fairness-enhanced Node Representation Learning", "author": "Gijs Moens, Job De Witte, Tobias Gobel, Meggie Van den Oever", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74144", "id": "74144", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=KNp7Zq3KkT0", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/74144", "video": "https://nips.cc/virtual/2023/poster/74144" }, { "title": "[Re] VAE Approximation Error: ELBO and Exponential Families", "author": "Volodymyr Kyrylov, Navdeep Singh Bedi, Qianbo Zang", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74145", "id": "74145", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=ozbAwipuZu", "poster": "/media/PosterPDFs/NeurIPS%202023/74145.png?t=1702019868.106192", "slides": "https://nips.cc/virtual/2023/poster/74145", "video": "https://nips.cc/virtual/2023/poster/74145" }, { "title": "RELIC: Reproducibility and Extension on LIC metric in quantifying bias in captioning models", "author": "Martijn van Raaphorst, Egoitz Gonzalez, Marta Grasa, Paula Antequera Hern\u00e1ndez", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74146", "id": "74146", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=9_hCoP3LXwy", "poster": "/media/PosterPDFs/NeurIPS%202023/74146.png?t=1701877319.2317977", "slides": "https://nips.cc/virtual/2023/poster/74146", "video": "https://nips.cc/virtual/2023/poster/74146" }, { "title": "[Re] Exploring the Role of Grammar and Word Choice in Bias Toward African American English (AAE) in Hate Speech Classification", "author": "Priyanka Bose, Chandra Shekhar Pandey, Fraida Fund", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74147", "id": "74147", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=MjZVx7a0KX-", "poster": "/media/PosterPDFs/NeurIPS%202023/74147.png?t=1702333216.6995823", "slides": "https://nips.cc/virtual/2023/poster/74147", "video": "https://nips.cc/virtual/2023/poster/74147" }, { "title": "[Re] Variational Neural Cellular Automata", "author": "Albert Sund Aillet, Simon Sond\u00e9n", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74151", "id": "74151", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=d7-ns6SZqp", "poster": "/media/PosterPDFs/NeurIPS%202023/74151.png?t=1701633698.859865", "slides": "https://nips.cc/virtual/2023/poster/74151", "video": "https://nips.cc/virtual/2023/poster/74151" }, { "title": "Reproducibility Study of \u201dCartoonX: Cartoon Explanations of Image Classifiers\u201d", "author": "Aditya Patra, Sina Taslimi, Luke Chin A Foeng, Pratik Kayal", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74153", "id": "74153", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=DWKJpl8s06", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/74153", "video": "https://nips.cc/virtual/2023/poster/74153" }, { "title": "Reproducibility Study of \u201cQuantifying Societal Bias Amplification in Image Captioning\u201d", "author": "Farrukh Baratov, Goksenin Yuksel, Darie Petcu, Jan Bakker", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74156", "id": "74156", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=eJmQJT0Dtt", "poster": "/media/PosterPDFs/NeurIPS%202023/74156.png?t=1701823571.4065063", "slides": "https://nips.cc/virtual/2023/poster/74156", "video": "https://nips.cc/virtual/2023/poster/74156" }, { "title": "Reproducibility Study of \u201dLabel-Free Explainability for Unsupervised Models\u201d", "author": "Julius Wagenbach, Gergely Papp, Niklas Mather, Laurens de Vries", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74159", "id": "74159", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=n2qXFXiMsAM", "poster": "/media/PosterPDFs/NeurIPS%202023/74159.png?t=1701282677.7247312", "slides": "https://nips.cc/virtual/2023/poster/74159", "video": "https://nips.cc/virtual/2023/poster/74159" }, { "title": "Reproducibility study of 'Proto2Proto: Can you recognise the car, the way I do?'", "author": "Gerson de Kleuver, David Bikker, Wenhua Hu, Bram Veenman", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74161", "id": "74161", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=a_9YF58u61", "poster": "/media/PosterPDFs/NeurIPS%202023/74161.png?t=1699439339.3815246", "slides": "https://nips.cc/virtual/2023/poster/74161", "video": "https://nips.cc/virtual/2023/poster/74161" }, { "title": "[Re] Hierarchical Shrinkage: Improving the Accuracy and Interpretability of Tree-Based Methods", "author": "Domen Mohor\u010di\u010d, David Ocepek", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74164", "id": "74164", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=NgPQSqpz-Y", "poster": "/media/PosterPDFs/NeurIPS%202023/74164.png?t=1701638592.5074024", "slides": "https://nips.cc/virtual/2023/poster/74164", "video": "https://nips.cc/virtual/2023/poster/74164" }, { "title": "[Re] Numerical influence of ReLU'(0) on backpropagation", "author": "Tommaso Martorella, Hector Manuel Ramirez Contreras, Daniel Garcia", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74166", "id": "74166", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=YAWQTQZVoA", "poster": "/media/PosterPDFs/NeurIPS%202023/74166.png?t=1701611548.0176413", "slides": "https://nips.cc/virtual/2023/poster/74166", "video": "https://nips.cc/virtual/2023/poster/74166" }, { "title": "[Re] On the Reproducibility of \u201cFairCal: Fairness Calibration for Face Verification\u201d", "author": "Marga Don, Satchit Chatterji, Milena Kapralova, Ryan Amaudruz", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74168", "id": "74168", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=uVHUy7CWCL", "poster": "/media/PosterPDFs/NeurIPS%202023/74168.png?t=1701805293.6160448", "slides": "https://nips.cc/virtual/2023/poster/74168", "video": "https://nips.cc/virtual/2023/poster/74168" }, { "title": "[Re] Fairness Guarantees under Demographic Shift", "author": "Valentin Buchner, Philip Schutte, Yassin Ben Allal, Hamed Ahadi", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74170", "id": "74170", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=xEfg6h1GFmW", "poster": "/media/PosterPDFs/NeurIPS%202023/74170.png?t=1700996602.6548355", "slides": "https://nips.cc/virtual/2023/poster/74170", "video": "https://nips.cc/virtual/2023/poster/74170" }, { "title": "[Re] FOCUS: Flexible Optimizable Counterfactual Explanations for Tree Ensembles", "author": "Kyosuke Morita", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74171", "id": "74171", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=n1q-iz83S5", "poster": "/media/PosterPDFs/NeurIPS%202023/74171.png?t=1700002093.6030712", "slides": "https://nips.cc/virtual/2023/poster/74171", "video": "https://nips.cc/virtual/2023/poster/74171" }, { "title": "[Re] On the Reproducibility of CartoonX", "author": "Robin Sasse, Aniek Eijpe, Jona Ruthardt, Elias Dubbeldam", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74173", "id": "74173", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=MK4IQJdLLeo", "poster": "/media/PosterPDFs/NeurIPS%202023/74173.png?t=1701433303.119617", "slides": "https://nips.cc/virtual/2023/poster/74173", "video": "https://nips.cc/virtual/2023/poster/74173" }, { "title": "Easy Bayesian Transfer Learning with Informative Priors", "author": "Martin \u0160pendl, Klementina Pirc", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74174", "id": "74174", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=JpaQ8GFOVu", "poster": "/media/PosterPDFs/NeurIPS%202023/74174.png?t=1701930879.302873", "slides": "https://nips.cc/virtual/2023/poster/74174", "video": "https://nips.cc/virtual/2023/poster/74174" }, { "title": "[Re] Bandit Theory and Thompson Sampling-guided Directed Evolution for Sequence Optimization", "author": "Luka \u017dontar", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74176", "id": "74176", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=NE_x1dpz-Q", "poster": "/media/PosterPDFs/NeurIPS%202023/74176.png?t=1701676783.6590142", "slides": "https://nips.cc/virtual/2023/poster/74176", "video": "https://nips.cc/virtual/2023/poster/74176" }, { "title": "[Re] End-to-end Algorithm Synthesis with Recurrent Networks: Logical Extrapolation Without Overthinking", "author": "Sean McLeish, Long Tran-Thanh", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74180", "id": "74180", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=WaZB4pUVTi", "poster": "/media/PosterPDFs/NeurIPS%202023/74180.png?t=1701648591.1523688", "slides": "https://nips.cc/virtual/2023/poster/74180", "video": "https://nips.cc/virtual/2023/poster/74180" }, { "title": "[Re] $\\mathcal{G}$-Mixup: Graph Data Augmentation for Graph Classification", "author": "Ermin Omeragic, Vuk \u0110uranovi\u0107", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74183", "id": "74183", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=XxUIomN-ndH", "poster": "/media/PosterPDFs/NeurIPS%202023/74183.png?t=1701637212.1145904", "slides": "https://nips.cc/virtual/2023/poster/74183", "video": "https://nips.cc/virtual/2023/poster/74183" }, { "title": "[Re] On Explainability of Graph Neural Networks via Subgraph Explorations", "author": "Yannik Mahlau, Lukas Berg, Leonie Kayser", "status": "Poster", "track": "Journal", "site": "https://nips.cc/virtual/2023/poster/74184", "id": "74184", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=zKBJw4Ht8s", "poster": "/media/PosterPDFs/NeurIPS%202023/74184.png?t=1701772136.6342692", "slides": "https://nips.cc/virtual/2023/poster/74184", "video": "https://nips.cc/virtual/2023/poster/74184" }, { "title": "No-Regret Learning in Dynamic Competition with Reference Effects Under Logit Demand", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72725", "id": "75Mxzfoeq7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/21c9fd36a6d94a491bf330a0ba0e5f6e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=75Mxzfoeq7", "openreview": "https://openreview.net/forum?id=75Mxzfoeq7", "poster": "/media/PosterPDFs/NeurIPS%202023/72725.png?t=1701892053.2391162", "slides": "https://nips.cc/virtual/2023/poster/72725", "video": "https://nips.cc/virtual/2023/poster/72725", "author_site": "Mengzi Amy Guo, Donghao Ying, Javad Lavaei, Zuo-Jun Shen", "tldr": "", "abstract": "This work is dedicated to the algorithm design in a competitive framework, with the primary goal of learning a stable equilibrium. We consider the dynamic price competition between two firms operating within an opaque marketplace, where each firm lacks information about its competitor. The demand follows the multinomial logit (MNL) choice model, which depends on the consumers' observed price and their reference price, and consecutive periods in the repeated games are connected by reference price updates. We use the notion of stationary Nash equilibrium (SNE), defined as the fixed point of the equilibrium pricing policy for the single-period game, to simultaneously capture the long-run market equilibrium and stability. We propose the online projected gradient ascent algorithm (OPGA), where the firms adjust prices using the first-order derivatives of their log-revenues that can be obtained from the market feedback mechanism. Despite the absence of typical properties required for the convergence of online games, such as strong monotonicity and variational stability, we demonstrate that under diminishing step-sizes, the price and reference price paths generated by OPGA converge to the unique SNE, thereby achieving the no-regret learning and a stable market. Moreover, with appropriate step-sizes, we prove that this convergence exhibits a rate of $\\mathcal{O}(1/t)$.", "keywords": "no-regret learning;price competition;reference effect;last-iterate convergence", "primary_area": "", "supplementary_material": "/attachment/eb2e98b027c84021908415076446b60dcd311567.pdf", "author": "Mengzi Amy Guo;Donghao Ying;Javad Lavaei;Zuo-Jun Shen", "authorids": "~Mengzi_Amy_Guo2;~Donghao_Ying1;~Javad_Lavaei1;~Zuo-Jun_Shen1", "gender": ";M;;M", "homepage": ";https://sites.google.com/view/donghao-ying;;http://shen.ieor.berkeley.edu", "dblp": ";;;", "google_scholar": ";NzMQHG4AAAAJ;;", "orcid": ";;;", "linkedin": ";donghao-ying-2507071a7/;;", "or_profile": "~Mengzi_Amy_Guo2;~Donghao_Ying1;~Javad_Lavaei1;~Zuo-Jun_Shen1", "aff": ";University of California, Berkeley;;", "aff_domain": ";berkeley.edu;;", "position": ";PhD student;;", "bibtex": "@inproceedings{\nguo2023noregret,\ntitle={No-Regret Learning in Dynamic Competition with Reference Effects Under Logit Demand},\nauthor={Mengzi Amy Guo and Donghao Ying and Javad Lavaei and Zuo-Jun Shen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=75Mxzfoeq7}\n}", "github": "", "project": "", "reviewers": "aAW7;8Smn;CZUh;M19q", "pdf_size": 1360701, "rating": "6;6;6;7", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;4", "wc_summary": "80;125;116;62", "wc_strengths": "49;68;120;55", "wc_weaknesses": "101;177;102;38", "wc_questions": "49;108;2;262", "wc_limitations": "8;11;1;1", "wc_review": "287;489;341;418", "wc_reply_reviewers": "62;0;27;376", "wc_reply_authors": "254;44;15;2649", "reply_reviewers": "1;0;1;2", "reply_authors": "2;2;2;8", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 95.75, 25.752427070084092 ], "wc_strengths_avg": [ 73.0, 27.99107000455681 ], "wc_weaknesses_avg": [ 104.5, 49.23667332385485 ], "wc_questions_avg": [ 105.25, 97.98309803226269 ], "wc_limitations_avg": [ 5.25, 4.380353866983808 ], "wc_review_avg": [ 383.75, 76.54859567621081 ], "wc_reply_reviewers_avg": [ 116.25, 151.56908490849972 ], "wc_reply_authors_avg": [ 740.5, 1105.72566670038 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.5, 2.598076211353316 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16993663819828078320&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": ";berkeley.edu;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Hierarchical clustering with dot products recovers hidden tree structure", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72724", "id": "75v88kyyko", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6521937507d78f327cd402401be73bf2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=75v88kyyko", "openreview": "https://openreview.net/forum?id=75v88kyyko", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72724", "video": "https://nips.cc/virtual/2023/poster/72724", "author_site": "Annie Gray, Alexander Modell, Patrick Rubin-Delanchy, Nick Whiteley", "tldr": "", "abstract": "In this paper we offer a new perspective on the well established agglomerative clustering algorithm, focusing on recovery of hierarchical structure. We recommend a simple variant of the standard algorithm, in which clusters are merged by maximum average dot product and not, for example, by minimum distance or within-cluster variance. We demonstrate that the tree output by this algorithm provides a bona fide estimate of generative hierarchical structure in data, under a generic probabilistic graphical model. The key technical innovations are to understand how hierarchical information in this model translates into tree geometry which can be recovered from data, and to characterise the benefits of simultaneously growing sample size and data dimension. We demonstrate superior tree recovery performance with real data over existing approaches such as UPGMA, Ward's method, and HDBSCAN.", "keywords": "agglomerative clustering;generative model;graphical model;hierarchical clustering;high-dimensional data", "primary_area": "", "supplementary_material": "/attachment/7906e817651c17b377cb4ae41437eed0e52a362c.zip", "author": "Annie Gray;Alexander Modell;Patrick Rubin-Delanchy;Nick Whiteley", "authorids": "~Annie_Gray1;~Alexander_Modell1;~Patrick_Rubin-Delanchy1;~Nick_Whiteley1", "gender": ";M;M;", "homepage": "https://research-information.bris.ac.uk/en/persons/annie-m-gray;http://alexandermodell.github.io/;https://people.maths.bris.ac.uk/~pr12244/;", "dblp": ";;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=a5ZrPkAAAAAJ;", "orcid": ";0000-0002-0074-8145;;", "linkedin": ";;;", "or_profile": "~Annie_Gray1;~Alexander_Modell1;~Patrick_Rubin-Delanchy1;~Nick_Whiteley1", "aff": "University of Bristol;University of Bristol;;", "aff_domain": "bristol.ac.uk;bristol.ac.uk;;", "position": "PhD student;PhD student;;", "bibtex": "@inproceedings{\ngray2023hierarchical,\ntitle={Hierarchical clustering with dot products recovers hidden tree structure},\nauthor={Annie Gray and Alexander Modell and Patrick Rubin-Delanchy and Nick Whiteley},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=75v88kyyko}\n}", "github": "", "project": "", "reviewers": "wZ8j;s6Xk;Emfa;FShC;dt3f", "pdf_size": 588686, "rating": "5;6;7;7;8", "confidence": "3;4;3;3;3", "soundness": "3;3;4;3;4", "novelty": "3;3;3;3;3", "presentation": "2;3;4;2;4", "wc_summary": "47;122;223;85;64", "wc_strengths": "110;58;151;7;74", "wc_weaknesses": "192;241;71;47;235", "wc_questions": "65;96;670;419;91", "wc_limitations": "1;54;79;2;44", "wc_review": "415;571;1194;560;508", "wc_reply_reviewers": "104;18;50;391;61", "wc_reply_authors": "0;0;0;195;0", "reply_reviewers": "1;1;1;3;1", "reply_authors": "1;1;1;3;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 108.2, 62.62076332974551 ], "wc_strengths_avg": [ 80.0, 48.55924216871593 ], "wc_weaknesses_avg": [ 157.2, 82.29313458606374 ], "wc_questions_avg": [ 268.2, 239.38537967052207 ], "wc_limitations_avg": [ 36.0, 30.390788077968626 ], "wc_review_avg": [ 649.6, 277.7283564924547 ], "wc_reply_reviewers_avg": [ 124.8, 135.9167392192735 ], "wc_reply_authors_avg": [ 39.0, 78.0 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.29417420270727607, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11967719778052070424&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 7, "email": "bristol.ac.uk;bristol.ac.uk;;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Bristol", "aff_unique_dep": "", "aff_unique_url": "https://www.bristol.ac.uk", "aff_unique_abbr": "Bristol", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Fragment-based Pretraining and Finetuning on Molecular Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72723", "id": "77Nq1KjmLl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/38ec60a949c3538e5cbb337b1b386dcf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=77Nq1KjmLl", "openreview": "https://openreview.net/forum?id=77Nq1KjmLl", "poster": "/media/PosterPDFs/NeurIPS%202023/72723.png?t=1701919100.0351024", "slides": "https://nips.cc/virtual/2023/poster/72723", "video": "https://nips.cc/virtual/2023/poster/72723", "author_site": "Kha-Dinh Luong, Ambuj K Singh", "tldr": "", "abstract": "Property prediction on molecular graphs is an important application of Graph Neural Networks (GNNs). Recently, unlabeled molecular data has become abundant, which facilitates the rapid development of self-supervised learning for GNNs in the chemical domain. In this work, we propose pretraining GNNs at the fragment level, a promising middle ground to overcome the limitations of node-level and graph-level pretraining. Borrowing techniques from recent work on principal subgraph mining, we obtain a compact vocabulary of prevalent fragments from a large pretraining dataset. From the extracted vocabulary, we introduce several fragment-based contrastive and predictive pretraining tasks. The contrastive learning task jointly pretrains two different GNNs: one on molecular graphs and the other on fragment graphs, which represents higher-order connectivity within molecules. By enforcing consistency between the fragment embedding and the aggregated embedding of the corresponding atoms from the molecular graphs, we ensure that the embeddings capture structural information at multiple resolutions. The structural information of fragment graphs is further exploited to extract auxiliary labels for graph-level predictive pretraining. We employ both the pretrained molecular-based and fragment-based GNNs for downstream prediction, thus utilizing the fragment information during finetuning. Our graph fragment-based pretraining (GraphFP) advances the performances on 5 out of 8 common molecular benchmarks and improves the performances on long-range biological benchmarks by at least 11.5%. Code is available at: https://github.com/lvkd84/GraphFP.", "keywords": "Self-supervised Learning;Graph Neural Network;Molecule", "primary_area": "", "supplementary_material": "", "author": "Kha-Dinh Luong;Ambuj Singh", "authorids": "~Kha-Dinh_Luong1;~Ambuj_Singh1", "gender": "M;", "homepage": "https://dynamo.cs.ucsb.edu/people/luong;", "dblp": "358/5840;", "google_scholar": ";", "orcid": ";", "linkedin": "kha-dinh-luong/;", "or_profile": "~Kha-Dinh_Luong1;~Ambuj_Singh1", "aff": "University of California, Santa Barbara;", "aff_domain": "ucsb.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nluong2023fragmentbased,\ntitle={Fragment-based Pretraining and Finetuning on Molecular Graphs},\nauthor={Kha-Dinh Luong and Ambuj Singh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=77Nq1KjmLl}\n}", "github": "", "project": "", "reviewers": "wcQm;Br6k;MPQy;Vdiv;RaR1", "pdf_size": 994496, "rating": "5;5;6;6;6", "confidence": "5;3;3;1;4", "soundness": "3;2;3;3;3", "novelty": "3;2;2;3;2", "presentation": "3;3;2;3;3", "wc_summary": "38;82;145;34;60", "wc_strengths": "52;19;36;19;47", "wc_weaknesses": "55;86;48;10;122", "wc_questions": "4;2;291;125;73", "wc_limitations": "14;1;13;1;1", "wc_review": "163;190;533;189;303", "wc_reply_reviewers": "16;32;96;0;111", "wc_reply_authors": "35;27;388;0;40", "reply_reviewers": "1;1;2;0;1", "reply_authors": "2;2;3;1;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.2, 1.32664991614216 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 71.8, 40.429692059178485 ], "wc_strengths_avg": [ 34.6, 13.74918179383777 ], "wc_weaknesses_avg": [ 64.2, 37.68501028260441 ], "wc_questions_avg": [ 99.0, 106.44247272588137 ], "wc_limitations_avg": [ 6.0, 6.131883886702357 ], "wc_review_avg": [ 275.6, 137.48541740853827 ], "wc_reply_reviewers_avg": [ 51.0, 44.29898418699914 ], "wc_reply_authors_avg": [ 98.0, 145.65575855420204 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4923659639173309, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3282457820736384478&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "ucsb.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "IDEA: An Invariant Perspective for Efficient Domain Adaptive Image Retrieval", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72722", "id": "77i6itptQW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b33ad9d46ab2a23b6783d954121d26e3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=77i6itptQW", "openreview": "https://openreview.net/forum?id=77i6itptQW", "poster": "/media/PosterPDFs/NeurIPS%202023/72722.png?t=1701956093.0387492", "slides": "https://nips.cc/virtual/2023/poster/72722", "video": "https://nips.cc/virtual/2023/poster/72722", "author_site": "Haixin Wang, Hao Wu, Jinan Sun, Shikun Zhang, Chong Chen, Xian-Sheng Hua, Xiao Luo", "tldr": "", "abstract": "In this paper, we investigate the problem of unsupervised domain adaptive hashing, which leverage knowledge from a label-rich source domain to expedite learning to hash on a label-scarce target domain. Although numerous existing approaches attempt to incorporate transfer learning techniques into deep hashing frameworks, they often neglect the essential invariance for adequate alignment between these two domains. Worse yet, these methods fail to distinguish between causal and non-causal effects embedded in images, rendering cross-domain retrieval ineffective. To address these challenges, we propose an Invariance-acquired Domain AdaptivE HAshing (IDEA) model. Our IDEA first decomposes each image into a causal feature representing label information, and a non-causal feature indicating domain information. Subsequently, we generate discriminative hash codes using causal features with consistency learning on both source and target domains. More importantly, we employ a generative model for synthetic samples to simulate the intervention of various non-causal effects, ultimately minimizing their impact on hash codes for domain invariance. Comprehensive experiments conducted on benchmark datasets validate the superior performance of our IDEA compared to a variety of competitive baselines.", "keywords": "domain adaption;binary descriptor;causal inference", "primary_area": "", "supplementary_material": "", "author": "Haixin Wang;Hao Wu;Jinan Sun;Shikun Zhang;Chong Chen;Xian-Sheng Hua;Xiao Luo", "authorids": "~Haixin_Wang3;~Hao_Wu39;~Jinan_Sun1;~Shikun_Zhang2;~Chong_Chen2;~Xian-Sheng_Hua1;~Xiao_Luo3", "gender": ";M;M;M;;M;M", "homepage": "https://willdreamer.github.io/;https://easylearningscores.github.io/;;;;;http://luoxiao12.github.io", "dblp": "81/5956-3;111;16/10588;83/3715.html;;56/5807-1;50/1585-1", "google_scholar": "RGZUJOkAAAAJ;HdXMhfcAAAAJ;;uiklLscAAAAJ;;https://scholar.google.co.uk/citations?user=6G-l4o0AAAAJ;https://scholar.google.com.hk/citations?", "orcid": "0000-0002-5714-0149;0009-0008-4084-1409;;;;;", "linkedin": ";;;;;xshua;%E9%9C%84-%E7%BD%97-303548214/", "or_profile": "~Haixin_Wang3;~Hao_Wu39;~Jinan_Sun1;~Shikun_Zhang2;~Chong_Chen2;~Xian-Sheng_Hua1;~Xiao_Luo3", "aff": "Peking University;University of Science and Technology of China;Peking University;Peking University;;Terminus Group;University of California, Los Angeles", "aff_domain": "pku.edu.cn;ustc.edu.cn;pku.edu.cn;pku.edu.cn;;tslsmart.com;cs.ucla.edu", "position": "MS student;MS student;Associate Professor;Full Professor;;Principal Researcher;Postdoc", "bibtex": "@inproceedings{\nwang2023idea,\ntitle={{IDEA}: An Invariant Perspective for Efficient Domain Adaptive Image Retrieval},\nauthor={Haixin Wang and Hao Wu and Jinan Sun and Shikun Zhang and Chong Chen and Xian-Sheng Hua and Xiao Luo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=77i6itptQW}\n}", "github": "", "project": "", "reviewers": "BQ1H;fabx;8Msq;z9GY;MFjc", "pdf_size": 6823968, "rating": "4;6;6;6;6", "confidence": "4;4;4;4;3", "soundness": "2;3;3;4;3", "novelty": "2;3;3;3;2", "presentation": "2;3;3;4;3", "wc_summary": "55;49;97;85;152", "wc_strengths": "10;38;57;56;82", "wc_weaknesses": "103;121;184;75;47", "wc_questions": "3;22;67;7;35", "wc_limitations": "8;6;21;1;65", "wc_review": "179;236;426;224;381", "wc_reply_reviewers": "19;82;69;27;45", "wc_reply_authors": "63;43;50;31;52", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.6, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 87.6, 36.865159703980666 ], "wc_strengths_avg": [ 48.6, 23.84617369726221 ], "wc_weaknesses_avg": [ 106.0, 46.389654018972806 ], "wc_questions_avg": [ 26.8, 23.085926448812923 ], "wc_limitations_avg": [ 20.2, 23.352944139872385 ], "wc_review_avg": [ 289.2, 96.2982865891185 ], "wc_reply_reviewers_avg": [ 48.4, 24.029981273400942 ], "wc_reply_authors_avg": [ 47.8, 10.571660229122008 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.2500000000000001, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6647078334685304845&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;ustc.edu.cn;pku.edu.cn;pku.edu.cn;;tslsmart.com;cs.ucla.edu", "author_num": 7, "aff_unique_index": "0;1;0;0;2;3", "aff_unique_norm": "Peking University;University of Science and Technology of China;Terminus Group;University of California, Los Angeles", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.pku.edu.cn;http://www.ustc.edu.cn;;https://www.ucla.edu", "aff_unique_abbr": "Peking U;USTC;;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;2", "aff_country_unique": "China;;United States" }, { "title": "Language Model Tokenizers Introduce Unfairness Between Languages", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72721", "id": "78yDLKi95p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/74bb24dca8334adce292883b4b651eda-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=78yDLKi95p", "openreview": "https://openreview.net/forum?id=78yDLKi95p", "poster": "/media/PosterPDFs/NeurIPS%202023/72721.png?t=1701785014.7581482", "slides": "https://nips.cc/virtual/2023/poster/72721", "video": "https://nips.cc/virtual/2023/poster/72721", "author_site": "Aleksandar Petrov, Emanuele La Malfa, Philip Torr, Adel Bibi", "tldr": "", "abstract": "Recent language models have shown impressive multilingual performance, even when not explicitly trained for it.\nDespite this, there are concerns about the quality of their outputs across different languages.\nIn this paper, we show how disparity in the treatment of different languages arises at the tokenization stage, well before a model is even invoked.\nThe same text translated into different languages can have drastically different tokenization lengths, with differences up to 15 times in some cases.\nThese disparities persist even for tokenizers that are intentionally trained for multilingual support.\nCharacter-level and byte-level models also exhibit over 4 times the difference in the encoding length for some language pairs.\nThis induces unfair treatment for some language communities in regard to the cost of accessing commercial language services, the processing time and latency, as well as the amount of content that can be provided as context to the models.\nTherefore, we make the case that we should train future language models using multilingually fair subword tokenizers.", "keywords": "LLM;language model;tokenizer;multilingual;language;fairness", "primary_area": "", "supplementary_material": "/attachment/8573f224494aba1a8f8b34322c6e174f1d1773a2.zip", "author": "Aleksandar Petrov;Emanuele La Malfa;Philip Torr;Adel Bibi", "authorids": "~Aleksandar_Petrov1;~Emanuele_La_Malfa2;~Philip_Torr1;~Adel_Bibi1", "gender": "M;M;;M", "homepage": "https://p-petrov.com/;https://emanuelelm.github.io/;http://www.robots.ox.ac.uk/~tvg/;http://adelbibi.com", "dblp": "49/8105;276/0274;;176/0964", "google_scholar": "em54BT4AAAAJ;4_91m08AAAAJ;;Q4j2laYAAAAJ", "orcid": ";0000-0002-6254-0470;;0000-0002-6169-3918", "linkedin": "aleksandar-petrov/;;;adel-bibi-ba3671ab/", "or_profile": "~Aleksandar_Petrov1;~Emanuele_La_Malfa2;~Philip_Torr1;~Adel_Bibi1", "aff": "University of Oxford;Department of Computer Science, University of Oxford;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;cs.ox.ac.uk;ox.ac.uk;ox.ac.uk", "position": "PhD student;PhD student;Full Professor;Senior Research Associate", "bibtex": "@inproceedings{\npetrov2023language,\ntitle={Language Model Tokenizers Introduce Unfairness Between Languages},\nauthor={Aleksandar Petrov and Emanuele La Malfa and Philip Torr and Adel Bibi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=78yDLKi95p}\n}", "github": "", "project": "", "reviewers": "Qimt;EmCj;okDv;qshy;Fdv1", "pdf_size": 487395, "rating": "4;5;6;7;7", "confidence": "4;3;4;4;4", "soundness": "3;3;3;4;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "82;161;73;129;114", "wc_strengths": "47;165;122;62;79", "wc_weaknesses": "77;119;54;264;109", "wc_questions": "17;32;19;52;34", "wc_limitations": "1;74;1;1;43", "wc_review": "224;551;269;508;379", "wc_reply_reviewers": "96;0;11;329;0", "wc_reply_authors": "149;0;0;239;0", "reply_reviewers": "1;0;1;1;0", "reply_authors": "2;1;1;2;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 111.8, 31.983745871926885 ], "wc_strengths_avg": [ 95.0, 43.07667582346623 ], "wc_weaknesses_avg": [ 124.6, 73.42642576075727 ], "wc_questions_avg": [ 30.8, 12.576167937809991 ], "wc_limitations_avg": [ 24.0, 29.826163011691598 ], "wc_review_avg": [ 386.2, 128.13336801941952 ], "wc_reply_reviewers_avg": [ 87.2, 126.14182494319638 ], "wc_reply_authors_avg": [ 77.6, 99.21008013301875 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3429971702850177, "gs_citation": 111, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3329452056209092998&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ox.ac.uk;cs.ox.ac.uk;ox.ac.uk;ox.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Oxford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Learning Energy-based Model via Dual-MCMC Teaching", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72720", "id": "7962B4nXX7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5bed8703db85ab27dc32f6a42f8fbdb6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7962B4nXX7", "openreview": "https://openreview.net/forum?id=7962B4nXX7", "poster": "/media/PosterPDFs/NeurIPS%202023/72720.png?t=1697132369.0759084", "slides": "https://nips.cc/virtual/2023/poster/72720", "video": "https://nips.cc/virtual/2023/poster/72720", "author_site": "Jiali Cui, Tian Han", "tldr": "", "abstract": "This paper studies the fundamental learning problem of the energy-based model (EBM). Learning the EBM can be achieved using the maximum likelihood estimation (MLE), which typically involves the Markov Chain Monte Carlo (MCMC) sampling, such as the Langevin dynamics. However, the noise-initialized Langevin dynamics can be challenging in practice and hard to mix. This motivates the exploration of joint training with the generator model where the generator model serves as a complementary model to bypass MCMC sampling. However, such a method can be less accurate than the MCMC and result in biased EBM learning. While the generator can also serve as an initializer model for better MCMC sampling, its learning can be biased since it only matches the EBM and has no access to empirical training examples. Such biased generator learning may limit the potential of learning the EBM. To address this issue, we present a joint learning framework that interweaves the maximum likelihood learning algorithm for both the EBM and the complementary generator model. In particular, the generator model is learned by MLE to match both the EBM and the empirical data distribution, making it a more informative initializer for MCMC sampling of EBM. Learning generator with observed examples typically requires inference of the generator posterior. To ensure accurate and efficient inference, we adopt the MCMC posterior sampling and introduce a complementary inference model to initialize such latent MCMC sampling. We show that three separate models can be seamlessly integrated into our joint framework through two (dual-) MCMC teaching, enabling effective and efficient EBM learning.", "keywords": "Energy-based model;MCMC;Joint-training;Generator model", "primary_area": "", "supplementary_material": "/attachment/c9ce7f9267d622aba15662e4cef229f0cddf642d.pdf", "author": "Jiali Cui;Tian Han", "authorids": "~Jiali_Cui1;~Tian_Han1", "gender": "M;M", "homepage": "https://jcui1224.github.io/;https://hthth0801.github.io/", "dblp": "17/2469;65/4065-1", "google_scholar": "dDBTlNAAAAAJ;Qtvu5t4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Jiali_Cui1;~Tian_Han1", "aff": "Stevens Institute of Technology;Stevens Institute of Technology", "aff_domain": "stevens.edu;stevens.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncui2023learning,\ntitle={Learning Energy-based Model via Dual-{MCMC} Teaching},\nauthor={Jiali Cui and Tian Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7962B4nXX7}\n}", "github": "", "project": "", "reviewers": "iX9Y;XcPg;RJbi;uj7J", "pdf_size": 2129967, "rating": "5;6;6;6", "confidence": "4;5;2;5", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;4;3;3", "wc_summary": "129;86;158;169", "wc_strengths": "22;74;16;128", "wc_weaknesses": "40;238;358;262", "wc_questions": "209;225;22;15", "wc_limitations": "2;33;1;30", "wc_review": "402;656;555;604", "wc_reply_reviewers": "625;32;43;29", "wc_reply_authors": "752;40;27;25", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 135.5, 32.09750769140807 ], "wc_strengths_avg": [ 60.0, 45.27692569068709 ], "wc_weaknesses_avg": [ 224.5, 115.59736156158583 ], "wc_questions_avg": [ 117.75, 99.44188001038597 ], "wc_limitations_avg": [ 16.5, 15.041608956491324 ], "wc_review_avg": [ 554.25, 94.87985824188398 ], "wc_reply_reviewers_avg": [ 182.25, 255.6749645546079 ], "wc_reply_authors_avg": [ 211.0, 312.39958386656025 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7444272437736548623&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "stevens.edu;stevens.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Stevens Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.stevens.edu", "aff_unique_abbr": "SIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "COCO-Counterfactuals: Automatically Constructed Counterfactual Examples for Image-Text Pairs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73688", "id": "7AjdHnjIHX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e14e4cb8266184ceb234973dfe07faed-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=7AjdHnjIHX", "openreview": "https://openreview.net/forum?id=7AjdHnjIHX", "poster": "/media/PosterPDFs/NeurIPS%202023/73688.png?t=1701981141.916156", "slides": "https://nips.cc/virtual/2023/poster/73688", "video": "https://nips.cc/virtual/2023/poster/73688", "author_site": "Tiep Le, VASUDEV LAL, Phillip Howard", "tldr": "", "abstract": "Counterfactual examples have proven to be valuable in the field of natural language processing (NLP) for both evaluating and improving the robustness of language models to spurious correlations in datasets. Despite their demonstrated utility for NLP, multimodal counterfactual examples have been relatively unexplored due to the difficulty of creating paired image-text data with minimal counterfactual changes. To address this challenge, we introduce a scalable framework for automatic generation of counterfactual examples using text-to-image diffusion models. We use our framework to create COCO-Counterfactuals, a multimodal counterfactual dataset of paired image and text captions based on the MS-COCO dataset. We validate the quality of COCO-Counterfactuals through human evaluations and show that existing multimodal models are challenged by our counterfactual image-text pairs. Additionally, we demonstrate the usefulness of COCO-Counterfactuals for improving out-of-domain generalization of multimodal vision-language models via training data augmentation. We make our code and the COCO-Counterfactuals dataset publicly available.", "keywords": "Counterfactuals; Data Augmentation; Multimodal Models; Transformers; Diffusion Models; Cross Attention Control; Prompt-to-Prompt", "primary_area": "", "supplementary_material": "/attachment/15ed84972b1adab5b5373779b69db55c050c9348.pdf", "author": "Tiep Le;Vasudev Lal;Phillip Howard", "authorids": "~Tiep_Le2;~Vasudev_Lal1;~Phillip_Howard1", "gender": "M;M;M", "homepage": ";;", "dblp": "118/3873;;212/2868", "google_scholar": "3RV_GAwAAAAJ;Qbu4oKwAAAAJ;EKh822gAAAAJ", "orcid": ";0000-0002-5907-9898;", "linkedin": "tiep-le/;vasudev-lal-79bb336/;", "or_profile": "~Tiep_Le2;~Vasudev_Lal1;~Phillip_Howard1", "aff": "Intel;Intel;Intel", "aff_domain": "intel.com;intel.com;intel.com", "position": "Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nle2023cococounterfactuals,\ntitle={{COCO}-Counterfactuals: Automatically Constructed Counterfactual Examples for Image-Text Pairs},\nauthor={Tiep Le and Vasudev Lal and Phillip Howard},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=7AjdHnjIHX}\n}", "github": "", "project": "", "reviewers": "BUVv;QvHF;ndME;jADd", "pdf_size": 7593925, "rating": "7;7;7;8", "confidence": "4;3;3;3", "wc_summary_and_contributions": "39;46;75;44", "wc_strengths": "46;29;42;91", "wc_improvement": "35;83;70;127", "wc_limitations": "33;7;37;26", "wc_correctness": "9;1;1;3", "wc_clarity": "11;1;1;1", "wc_relation_to_prior_work": "36;1;1;29", "wc_documentation": "13;1;1;18", "wc_additional_feedback": "1;1;1;1", "wc_review": "223;170;229;340", "wc_reply_reviewers": "0;19;0;0", "wc_reply_authors": "736;1440;1134;767", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;2;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 51.0, 14.089002803605371 ], "wc_strengths_avg": [ 52.0, 23.37733945512192 ], "wc_improvement_avg": [ 78.75, 32.927002596653104 ], "wc_limitations_avg": [ 25.75, 11.519006033508273 ], "wc_correctness_avg": [ 3.5, 3.278719262151 ], "wc_clarity_avg": [ 3.5, 4.330127018922194 ], "wc_relation_to_prior_work_avg": [ 16.75, 15.943258763502524 ], "wc_documentation_avg": [ 8.25, 7.46240577829965 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 240.5, 61.86477188190384 ], "wc_reply_reviewers_avg": [ 4.75, 8.227241335952167 ], "wc_reply_authors_avg": [ 1019.25, 288.9890785133584 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=377577653161960473&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "intel.com;intel.com;intel.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Intel", "aff_unique_dep": "Intel Corporation", "aff_unique_url": "https://www.intel.com", "aff_unique_abbr": "Intel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Defending against Data-Free Model Extraction by Distributionally Robust Defensive Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72719", "id": "7DZAVpOoAK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0207c9ea9faf66c6e892c3fa3c167b75-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7DZAVpOoAK", "openreview": "https://openreview.net/forum?id=7DZAVpOoAK", "poster": "/media/PosterPDFs/NeurIPS%202023/72719.png?t=1702387929.2289207", "slides": "https://nips.cc/virtual/2023/poster/72719", "video": "https://nips.cc/virtual/2023/poster/72719", "author_site": "Zhenyi Wang, Zhenyi Wang, Li Shen, Tongliang Liu, Tiehang Duan, Yanjun Zhu, Donglin Zhan, DAVID DOERMANN, Mingchen Gao", "tldr": "", "abstract": "Data-Free Model Extraction (DFME) aims to clone a black-box model without knowing its original training data distribution, making it much easier for attackers to steal commercial models. Defense against DFME faces several challenges: (i) effectiveness; (ii) efficiency; (iii) no prior on the attacker's query data distribution and strategy. However, existing defense methods: (1) are highly computation and memory inefficient; or (2) need strong assumptions about attack data distribution; or (3) can only delay the attack or prove a model theft after the model stealing has happened. In this work, we propose a Memory and Computation efficient defense approach, named MeCo, to prevent DFME from happening while maintaining the model utility simultaneously by distributionally robust defensive training on the target victim model. Specifically, we randomize the input so that it: (1) causes a mismatch of the knowledge distillation loss for attackers; (2) disturbs the zeroth-order gradient estimation; (3) changes the label prediction for the attack query data. Therefore, the attacker can only extract misleading information from the black-box model. Extensive experiments on defending against both decision-based and score-based DFME demonstrate that MeCo can significantly reduce the effectiveness of existing DFME methods and substantially improve running efficiency.", "keywords": "Data-Free Model Extraction; Defense", "primary_area": "", "supplementary_material": "/attachment/14dbfa15ea4dbc5c39b0a8170bce8c8b775f842f.zip", "author": "Zhenyi Wang;Li Shen;Tongliang Liu;Tiehang Duan;Yanjun Zhu;Donglin Zhan;David Doermann;Mingchen Gao", "authorids": "~Zhenyi_Wang1;~Li_Shen1;~Tongliang_Liu1;~Tiehang_Duan1;~Yanjun_Zhu1;~Donglin_Zhan1;~David_Doermann2;~Mingchen_Gao1", "gender": ";M;M;;;;M;F", "homepage": ";https://sites.google.com/site/mathshenli/home;https://tongliang-liu.github.io/;;;https://sites.google.com/view/icarusjanestephen;https://cse.buffalo.edu/~doermann/;http://engineering.buffalo.edu/computer-science-engineering/people/faculty-directory/mingchen-gao.html", "dblp": ";91/3680-8;150/6667;184/7734;;235/6846.html;;11/9613", "google_scholar": ";yVhgENIAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;gemTJXgAAAAJ;;;RoGOW9AAAAAJ;1KUHms8AAAAJ", "orcid": ";;;0000-0003-4323-642X;;;0000-0003-1639-4561;0000-0002-5488-8514", "linkedin": ";;;;;;david-doermann-bb7757/;", "or_profile": "~Zhenyi_Wang1;~Li_Shen1;~Tongliang_Liu1;~Tiehang_Duan1;~Yanjun_Zhu1;~Donglin_Zhan1;~David_Doermann2;~Mingchen_Gao1", "aff": ";JD Explore Academy;University of Sydney;Meta Platforms, Inc.;;Columbia University;State University of New York at Buffalo;University at Buffalo, SUNY", "aff_domain": ";jd.com;sydney.edu.au;fb.com;;columbia.edu;buffalo.edu;buffalo.edu", "position": ";Researcher;Lecturer;Research Scientist;;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023defending,\ntitle={Defending against Data-Free Model Extraction by Distributionally Robust Defensive Training},\nauthor={Zhenyi Wang and Li Shen and Tongliang Liu and Tiehang Duan and Yanjun Zhu and Donglin Zhan and David Doermann and Mingchen Gao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7DZAVpOoAK}\n}", "github": "", "project": "", "reviewers": "xYzi;v48m;VEKG;e67e", "pdf_size": 2601899, "rating": "5;5;5;6", "confidence": "3;3;4;4", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "3;3;3;3", "wc_summary": "92;48;65;125", "wc_strengths": "60;69;26;132", "wc_weaknesses": "42;90;129;198", "wc_questions": "23;2;21;117", "wc_limitations": "62;13;8;7", "wc_review": "279;222;249;579", "wc_reply_reviewers": "0;11;18;0", "wc_reply_authors": "0;5;18;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.5, 29.124731758421397 ], "wc_strengths_avg": [ 71.75, 38.3038836151114 ], "wc_weaknesses_avg": [ 114.75, 57.093673029504764 ], "wc_questions_avg": [ 40.75, 44.7793200037696 ], "wc_limitations_avg": [ 22.5, 22.91833327273168 ], "wc_review_avg": [ 332.25, 143.8808100477614 ], "wc_reply_reviewers_avg": [ 7.25, 7.660776723022281 ], "wc_reply_authors_avg": [ 5.75, 7.361215932167728 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1352553956132625358&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";jd.com;sydney.edu.au;fb.com;;columbia.edu;buffalo.edu;buffalo.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "JD;University of Sydney;Meta;Columbia University;State University of New York at Buffalo;University at Buffalo", "aff_unique_dep": "JD Explore Academy;;Meta Platforms, Inc.;;;", "aff_unique_url": ";https://www.sydney.edu.au;https://www.meta.com;https://www.columbia.edu;https://www.buffalo.edu;https://www.buffalo.edu", "aff_unique_abbr": ";USYD;Meta;Columbia;SUNY Buffalo;UB", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Buffalo", "aff_country_unique_index": "1;2;2;2;2", "aff_country_unique": ";Australia;United States" }, { "title": "AVIS: Autonomous Visual Information Seeking with Large Language Model Agent", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72718", "id": "7EMphtUgCI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/029df12a9363313c3e41047844ecad94-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7EMphtUgCI", "openreview": "https://openreview.net/forum?id=7EMphtUgCI", "poster": "/media/PosterPDFs/NeurIPS%202023/72718.png?t=1702251386.3747787", "slides": "https://nips.cc/virtual/2023/poster/72718", "video": "https://nips.cc/virtual/2023/poster/72718", "author_site": "Ziniu Hu, Ahmet Iscen, Chen Sun, Kai-Wei Chang, Yizhou Sun, David Ross, Cordelia Schmid, Alireza Fathi", "tldr": "", "abstract": "In this paper, we propose an autonomous information seeking visual question answering framework, AVIS. Our method leverages a Large Language Model (LLM) to dynamically strategize the utilization of external tools and to investigate their outputs via tree search, thereby acquiring the indispensable knowledge needed to provide answers to the posed questions. Responding to visual questions that necessitate external knowledge, such as \"What event is commemorated by the building depicted in this image?\", is a complex task. This task presents a combinatorial search space that demands a sequence of actions, including invoking APIs, analyzing their responses, and making informed decisions. We conduct a user study to collect a variety of instances of human decision-making when faced with this task. This data is then used to design a system comprised of three components: an LLM-powered planner that dynamically determines which tool to use next, an LLM-powered reasoner that analyzes and extracts key information from the tool outputs, and a working memory component that retains the acquired information throughout the process. The collected user behavior serves as a guide for our system in two key ways. First, we create a transition graph by analyzing the sequence of decisions made by users. This graph delineates distinct states and confines the set of actions available at each state. Second, we use examples of user decision-making to provide our LLM-powered planner and reasoner with relevant contextual instances, enhancing their capacity to make informed decisions. We show that AVIS achieves state-of-the-art results on knowledge-based visual question answering benchmarks such as Infoseek and OK-VQA.", "keywords": "large language model;visual question answering;dynamic decision making;Tool augmented LLM", "primary_area": "", "supplementary_material": "/attachment/6ded4629b6ee27b70b0e176fde1b4ab298889ee3.pdf", "author": "Ziniu Hu;Ahmet Iscen;Chen Sun;Kai-Wei Chang;Yizhou Sun;David A Ross;Cordelia Schmid;Alireza Fathi", "authorids": "~Ziniu_Hu1;~Ahmet_Iscen3;~Chen_Sun1;~Kai-Wei_Chang1;~Yizhou_Sun1;~David_A_Ross1;~Cordelia_Schmid1;~Alireza_Fathi1", "gender": "M;M;M;M;F;F;M;M", "homepage": "http://acbull.github.io;;https://chensun.me;http://kwchang.net;http://web.cs.ucla.edu/~yzsun/;https://cordeliaschmid.github.io/;http://ai.stanford.edu/~alireza/;http://www.cs.toronto.edu/~dross/", "dblp": "180/5436;140/7520;01/6072-2;18/2428;37/3868;s/CordeliaSchmid;70/3898;68/2171", "google_scholar": "x6ct1CsAAAAJ;wIjyqzAAAAAJ;vQa7heEAAAAJ;fqDBtzYAAAAJ;https://scholar.google.com.tw/citations?user=TQgOjK0AAAAJ;IvqCXP4AAAAJ;luv0xMIAAAAJ;RqOzJR0AAAAJ", "orcid": ";;;0000-0001-5365-0072;;;;", "linkedin": ";;;kai-wei-chang-41239040;;cordelia-schmid-47985a9;alireza-fathi-04338411/;", "or_profile": "~Ziniu_Hu1;~Ahmet_Iscen3;~Chen_Sun1;~Kai-Wei_Chang1;~Yizhou_Sun1;~Cordelia_Schmid1;~Alireza_Fathi1;~David_Alexander_Ross1", "aff": "University of California, Los Angeles;Google;Google;Amazon;University of California, Los Angeles;Inria;Google;Research, Google", "aff_domain": "ucla.edu;google.com;google.com;amazon.com;ucla.edu;inria.fr;google.com;research.google.com", "position": "PhD student;Researcher;Research Scientist;Researcher;Associate Professor;Researcher;researcher;Software Engineer", "bibtex": "@inproceedings{\nhu2023avis,\ntitle={{AVIS}: Autonomous Visual Information Seeking with Large Language Model Agent},\nauthor={Ziniu Hu and Ahmet Iscen and Chen Sun and Kai-Wei Chang and Yizhou Sun and David A Ross and Cordelia Schmid and Alireza Fathi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7EMphtUgCI}\n}", "github": "", "project": "", "reviewers": "mb9w;M3n5;MSpf;hDYr", "pdf_size": 9154699, "rating": "4;6;6;7", "confidence": "4;4;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "74;69;96;153", "wc_strengths": "12;44;95;50", "wc_weaknesses": "60;60;168;4", "wc_questions": "26;33;3;60", "wc_limitations": "1;7;1;29", "wc_review": "173;213;363;296", "wc_reply_reviewers": "0;82;13;30", "wc_reply_authors": "37;347;29;27", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 98.0, 33.3391661563393 ], "wc_strengths_avg": [ 50.25, 29.600464523382062 ], "wc_weaknesses_avg": [ 73.0, 59.42221806698232 ], "wc_questions_avg": [ 30.5, 20.328551350256124 ], "wc_limitations_avg": [ 9.5, 11.521718621802913 ], "wc_review_avg": [ 261.25, 73.61513091749549 ], "wc_reply_reviewers_avg": [ 31.25, 31.171902412268647 ], "wc_reply_authors_avg": [ 110.0, 136.88316185711082 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18031759560990467641&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ucla.edu;google.com;google.com;amazon.com;ucla.edu;inria.fr;google.com;research.google.com", "author_num": 8, "aff_unique_index": "0;1;1;2;0;3;1;1", "aff_unique_norm": "University of California, Los Angeles;Google;Amazon;INRIA", "aff_unique_dep": ";Google;Amazon.com, Inc.;", "aff_unique_url": "https://www.ucla.edu;https://www.google.com;https://www.amazon.com;https://www.inria.fr", "aff_unique_abbr": "UCLA;Google;Amazon;Inria", "aff_campus_unique_index": "0;1;1;0;1;1", "aff_campus_unique": "Los Angeles;Mountain View;", "aff_country_unique_index": "0;0;0;0;0;1;0;0", "aff_country_unique": "United States;France" }, { "title": "Exact Optimality of Communication-Privacy-Utility Tradeoffs in Distributed Mean Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72717", "id": "7ETbK9lQd7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/76bea0a1cf7bf9b78f842009f6de15a1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7ETbK9lQd7", "openreview": "https://openreview.net/forum?id=7ETbK9lQd7", "poster": "/media/PosterPDFs/NeurIPS%202023/72717.png?t=1701507611.0714555", "slides": "https://nips.cc/virtual/2023/poster/72717", "video": "https://nips.cc/virtual/2023/poster/72717", "author_site": "Berivan Isik, Wei-Ning Chen, Ayfer Ozgur, Tsachy Weissman, Albert No", "tldr": "", "abstract": "We study the mean estimation problem under communication and local differential privacy constraints. While previous work has proposed order-optimal algorithms for the same problem (i.e., asymptotically optimal as we spend more bits), exact optimality (in the non-asymptotic setting) still has not been achieved. In this work, we take a step towards characterizing the exact-optimal approach in the presence of shared randomness (a random variable shared between the server and the user) and identify several conditions for exact optimality. We prove that one of the conditions is to utilize a rotationally symmetric shared random codebook. Based on this, we propose a randomization mechanism where the codebook is a randomly rotated simplex -- satisfying the properties of the exact-optimal codebook. The proposed mechanism is based on a $k$-closest encoding which we prove to be exact-optimal for the randomly rotated simplex codebook.", "keywords": "distributed mean estimation;privacy;compression;communication;federated analytics.", "primary_area": "", "supplementary_material": "/attachment/57af6bb586f7a23caa410cbe286c46a2af8530b3.pdf", "author": "Berivan Isik;Wei-Ning Chen;Ayfer Ozgur;Tsachy Weissman;Albert No", "authorids": "~Berivan_Isik1;~Wei-Ning_Chen1;~Ayfer_Ozgur1;~Tsachy_Weissman1;~Albert_No1", "gender": ";;;;", "homepage": "https://sites.google.com/view/berivanisik;https://web.stanford.edu/~wnchen/index.html;;;http://albert-no.github.io/", "dblp": "265/6197;51/2118;12/4534;34/2720;https://dblp.uni-trier.de/pid/23/11268", "google_scholar": "GdXOFKoAAAAJ;-TqCZLIAAAAJ;;;Kzj3HC8AAAAJ", "orcid": ";0000-0001-7355-9487;;;", "linkedin": "berivan-isik-439a3b122/;;;;", "or_profile": "~Berivan_Isik1;~Wei-Ning_Chen1;~Ayfer_Ozgur1;~Tsachy_Weissman1;~Albert_No1", "aff": "Amazon;Stanford University;Stanford University;Stanford University;Hongik University", "aff_domain": "amazon.com;stanford.edu;stanford.edu;stanford.edu;hongik.ac.kr", "position": "Research Intern;PhD student;Associate Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nisik2023exact,\ntitle={Exact Optimality of Communication-Privacy-Utility Tradeoffs in Distributed Mean Estimation},\nauthor={Berivan Isik and Wei-Ning Chen and Ayfer Ozgur and Tsachy Weissman and Albert No},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7ETbK9lQd7}\n}", "github": "", "project": "", "reviewers": "jPit;69ED;JsmV;eu7R;FeU2", "pdf_size": 569604, "rating": "6;6;7;7;7", "confidence": "4;3;3;3;2", "soundness": "3;3;4;3;3", "novelty": "3;3;3;3;3", "presentation": "3;2;3;3;4", "wc_summary": "147;87;120;35;123", "wc_strengths": "47;85;28;74;32", "wc_weaknesses": "68;56;26;45;18", "wc_questions": "374;59;1;31;2", "wc_limitations": "1;10;6;9;13", "wc_review": "637;297;181;194;188", "wc_reply_reviewers": "412;77;40;14;9", "wc_reply_authors": "562;17;20;10;14", "reply_reviewers": "2;1;1;1;1", "reply_authors": "3;2;2;2;2", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 102.4, 38.73809494541516 ], "wc_strengths_avg": [ 53.2, 22.657449106199046 ], "wc_weaknesses_avg": [ 42.6, 18.499729727755483 ], "wc_questions_avg": [ 93.4, 141.91913190264378 ], "wc_limitations_avg": [ 7.8, 4.069397989875161 ], "wc_review_avg": [ 299.4, 174.07883271667464 ], "wc_reply_reviewers_avg": [ 110.4, 152.71489776704826 ], "wc_reply_authors_avg": [ 124.6, 218.7250328608959 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6454972243679027, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5346612417984092308&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 10, "email": "amazon.com;stanford.edu;stanford.edu;stanford.edu;hongik.ac.kr", "author_num": 5, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Amazon;Stanford University;Hongik University", "aff_unique_dep": "Amazon.com, Inc.;;", "aff_unique_url": "https://www.amazon.com;https://www.stanford.edu;https://www.hongik.ac.kr", "aff_unique_abbr": "Amazon;Stanford;HU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;South Korea" }, { "title": "Change point detection and inference in multivariate non-parametric models under mixing conditions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72716", "id": "7Fb2lCwS76", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/42a0de6b8a1809ceba8fdad1661be06c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7Fb2lCwS76", "openreview": "https://openreview.net/forum?id=7Fb2lCwS76", "poster": "/media/PosterPDFs/NeurIPS%202023/72716.png?t=1701455355.854231", "slides": "https://nips.cc/virtual/2023/poster/72716", "video": "https://nips.cc/virtual/2023/poster/72716", "author_site": "Carlos Misael Madrid Padilla, Haotian Xu, Daren Wang, OSCAR HERNAN MADRID PADILLA, Yi Yu", "tldr": "", "abstract": "This paper addresses the problem of localizing and inferring multiple change points, in non-parametric multivariate time series settings. Specifically, we consider a multivariate time series with potentially short-range dependence, whose underlying distributions have H\u00f6lder smooth densities and can change over time in a piecewise-constant manner. The change points, which correspond to the times when the distribution changes, are unknown.\n We present the limiting distributions of the change point estimators under the scenarios where the minimal jump size vanishes or remains constant. Such results have not been revealed in the literature in non-parametric change point settings. As byproducts, we develop a sharp estimator that can accurately localize the change points in multivariate non-parametric time series, and a consistent block-type long-run variance estimator. Numerical studies are provided to complement our theoretical findings.", "keywords": "Multivariate; Nonparametric; Change point inference; short range dependence; Long-run variance; Confidence interval.", "primary_area": "", "supplementary_material": "", "author": "Carlos Misael Madrid Padilla;Haotian Xu;Daren Wang;OSCAR HERNAN MADRID PADILLA;Yi Yu", "authorids": "~Carlos_Misael_Madrid_Padilla1;~Haotian_Xu5;~Daren_Wang3;~OSCAR_HERNAN_MADRID_PADILLA2;~Yi_Yu3", "gender": "M;M;M;;F", "homepage": ";https://haotianxu.github.io;https://darenwang.github.io/website/;https://hernanmp.github.io/;https://warwick.ac.uk/fac/sci/statistics/staff/academic-research/yu/", "dblp": "346/0975;;143/6471;;", "google_scholar": "ed4qZZkAAAAJ;AKcttAUAAAAJ;;;", "orcid": "0000-0003-2157-0160;;;;", "linkedin": ";;;;", "or_profile": "~Carlos_Misael_Madrid_Padilla1;~Haotian_Xu5;~Daren_Wang3;~OSCAR_HERNAN_MADRID_PADILLA2;~Yi_Yu3", "aff": ";Pennsylvania State University;University of Notre Dame;University of California, Los Angeles;University of Warwick", "aff_domain": ";psu.edu;nd.edu;ucla.edu;warwick.ac.uk", "position": ";Postdoc;Assistant Professor;Assistant Professor;Reader", "bibtex": "@inproceedings{\npadilla2023change,\ntitle={Change point detection and inference in multivariate non-parametric models under mixing conditions},\nauthor={Carlos Misael Madrid Padilla and Haotian Xu and Daren Wang and OSCAR HERNAN MADRID PADILLA and Yi Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7Fb2lCwS76}\n}", "github": "", "project": "", "reviewers": "rZb3;DHN6;TS3z;Ng5S;XKms", "pdf_size": 701802, "rating": "5;5;6;7;7", "confidence": "3;2;3;3;3", "soundness": "3;3;3;3;3", "novelty": "4;3;2;3;3", "presentation": "4;2;2;3;3", "wc_summary": "97;111;64;60;85", "wc_strengths": "46;74;45;43;55", "wc_weaknesses": "359;1156;143;14;76", "wc_questions": "9;315;151;192;102", "wc_limitations": "43;5;1;1;6", "wc_review": "554;1661;404;310;324", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 83.4, 19.355619339096332 ], "wc_strengths_avg": [ 52.6, 11.46472851837321 ], "wc_weaknesses_avg": [ 349.6, 419.6515697575788 ], "wc_questions_avg": [ 153.8, 101.09678530991971 ], "wc_limitations_avg": [ 11.2, 16.0299719276111 ], "wc_review_avg": [ 650.6, 512.5870072485257 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5590169943749476, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4652084447041028697&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";psu.edu;nd.edu;ucla.edu;warwick.ac.uk", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Pennsylvania State University;University of Notre Dame;University of California, Los Angeles;University of Warwick", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.psu.edu;https://www.nd.edu;https://www.ucla.edu;https://www.warwick.ac.uk", "aff_unique_abbr": "PSU;Notre Dame;UCLA;Warwick", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "7FitZnnnu8", "title": "Learning Directed Graphical Models with Optimal Transport", "track": "main", "status": "Reject", "tldr": "", "abstract": "Estimating the parameters of a probabilistic directed graphical model from incomplete data remains a long-standing challenge. This is because, in the presence of latent variables, both the likelihood function and posterior distribution are intractable without further assumptions about structural dependencies or model classes. While existing learning methods are fundamentally based on likelihood maximization, here we offer a new view of the parameter learning problem through the lens of optimal transport. This perspective licenses a framework that operates on many directed graphs without making unrealistic assumptions on the posterior over the latent variables or resorting to black-box variational approximations. We develop a theoretical framework and support it with extensive empirical evidence demonstrating the flexibility and versatility of our approach. Across experiments, we show that not only can our method recover the ground-truth parameters but it also performs competitively on downstream applications, notably the non-trivial task of discrete representation learning. ", "keywords": "Bayesian networks;probabilistic graphical models", "primary_area": "", "supplementary_material": "/attachment/f9787474cf2c96d6ba13c06c0f854b00eeeee29f.zip", "author": "Vy Vo;Trung Le;Long Tung Vuong;He Zhao;Edwin V. Bonilla;Dinh Phung", "authorids": "~Vy_Vo2;~Trung_Le2;~Long_Tung_Vuong1;~He_Zhao1;~Edwin_V._Bonilla1;~Dinh_Phung2", "gender": "F;M;M;;;", "homepage": "https://isvy08.github.io/;;;;;", "dblp": "176/4660;;329/6838;;;", "google_scholar": "3CpFpFkAAAAJ;https://scholar.google.com/citations?hl=en;DCC657sAAAAJ;;;", "orcid": ";;;;;", "linkedin": ";;long-vuong-783477131/;;;", "or_profile": "~Vy_Vo2;~Trung_Le2;~Long_Tung_Vuong1;~He_Zhao1;~Edwin_V._Bonilla1;~Dinh_Phung2", "aff": "Monash University;Monash University;Monash University;;;", "aff_domain": "monash.edu;monash.edu;monash.edu;;;", "position": "PhD student;Assistant Professor;PhD student;;;", "bibtex": "@misc{\nvo2023learning,\ntitle={Learning Directed Graphical Models with Optimal Transport},\nauthor={Vy Vo and Trung Le and Long Tung Vuong and He Zhao and Edwin V. Bonilla and Dinh Phung},\nyear={2023},\nurl={https://openreview.net/forum?id=7FitZnnnu8}\n}", "github": "", "project": "", "reviewers": "HQH3;UL8t;s48C;uiRc", "site": "https://openreview.net/forum?id=7FitZnnnu8", "pdf_size": 479739, "rating": "5;5;5;6", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "novelty": "3;2;2;3", "presentation": "2;3;2;3", "wc_summary": "68;198;121;105", "wc_strengths": "42;74;136;118", "wc_weaknesses": "75;253;173;405", "wc_questions": "26;30;423;2", "wc_limitations": "1;25;27;53", "wc_review": "212;580;880;683", "wc_reply_reviewers": "0;14;492;27", "wc_reply_authors": "0;22;1129;22", "reply_reviewers": "0;1;2;1", "reply_authors": "1;2;4;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 123.0, 47.37615433949868 ], "wc_strengths_avg": [ 92.5, 36.86122624113311 ], "wc_weaknesses_avg": [ 226.5, 120.80873312803176 ], "wc_questions_avg": [ 120.25, 175.12049423182884 ], "wc_limitations_avg": [ 26.5, 18.405162319305962 ], "wc_review_avg": [ 588.75, 242.75849624678432 ], "wc_reply_reviewers_avg": [ 133.25, 207.3443693472287 ], "wc_reply_authors_avg": [ 293.25, 482.6040690876943 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=599274555169055601&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Monash University", "aff_unique_dep": "", "aff_unique_url": "https://www.monash.edu", "aff_unique_abbr": "Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "AbDiffuser: full-atom generation of in-vitro functioning antibodies", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72715", "id": "7GyYpomkEa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/801ec05b0aae9fcd2ef35c168bd538e0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7GyYpomkEa", "openreview": "https://openreview.net/forum?id=7GyYpomkEa", "poster": "/media/PosterPDFs/NeurIPS%202023/72715.png?t=1702355759.7470894", "slides": "https://nips.cc/virtual/2023/poster/72715", "video": "https://nips.cc/virtual/2023/poster/72715", "author_site": "Karolis Martinkus, Jan Ludwiczak, WEI-CHING LIANG, Julien Lafrance-Vanasse, Isidro Hotzel, Arvind Rajpal, Yan Wu, Kyunghyun Cho, Richard Bonneau, Vladimir Gligorijevic, Andreas Loukas", "tldr": "", "abstract": "We introduce AbDiffuser, an equivariant and physics-informed diffusion model for the joint generation of antibody 3D structures and sequences. AbDiffuser is built on top of a new representation of protein structure, relies on a novel architecture for aligned proteins, and utilizes strong diffusion priors to improve the denoising process. Our approach improves protein diffusion by taking advantage of domain knowledge and physics-based constraints; handles sequence-length changes; and reduces memory complexity by an order of magnitude, enabling backbone and side chain generation. We validate AbDiffuser in silico and in vitro. Numerical experiments showcase the ability of AbDiffuser to generate antibodies that closely track the sequence and structural properties of a reference set. Laboratory experiments confirm that all 16 HER2 antibodies discovered were expressed at high levels and that 57.1% of the selected designs were tight binders.", "keywords": "antibody generation;diffusion;equivariance", "primary_area": "", "supplementary_material": "", "author": "Karolis Martinkus;Jan Ludwiczak;WEI-CHING LIANG;Julien Lafrance-Vanasse;Isidro Hotzel;Arvind Rajpal;Yan Wu;Kyunghyun Cho;Richard Bonneau;Vladimir Gligorijevic;Andreas Loukas", "authorids": "~Karolis_Martinkus1;~Jan_Ludwiczak1;~WEI-CHING_LIANG1;~Julien_Lafrance-Vanasse1;~Isidro_Hotzel1;~Arvind_Rajpal1;~Yan_Wu7;~Kyunghyun_Cho1;~Richard_Bonneau1;~Vladimir_Gligorijevic2;~Andreas_Loukas1", "gender": "M;;;M;;M;F;M;M;M;M", "homepage": "https://disco.ethz.ch/members/mkarolis;;;;;;https://www.linkedin.com/feed/;http://kyunghyuncho.me;https://as.nyu.edu/content/nyu-as/as/faculty/richard-bonneau.html;https://www.gene.com/scientists/our-scientists/vladimir-gligorijevic;", "dblp": "276/5531;;;;;;;41/9736;;116/2862;19/10012", "google_scholar": "https://scholar.google.ch/citations?user=Sr6ho54AAAAJ;;;;;https://scholar.google.com/citations?view_op=search_authors;;https://scholar.google.fi/citations?user=0RAmmIAAAAAJ;https://scholar.google.com.tw/citations?user=Wq8XTykAAAAJ;rnuxS_YAAAAJ;https://scholar.google.ch/citations?user=-XGXJbQAAAAJ", "orcid": "0000-0002-5344-4321;;;0000-0001-8807-6277;;;;;;;", "linkedin": ";;wei-ching-liang-1557ba52/;julienlv/;;;https://www.linkedin.com/feed/;;;;", "or_profile": "~Karolis_Martinkus1;~Jan_Ludwiczak1;~WEI-CHING_LIANG1;~Julien_Lafrance-Vanasse1;~Isidro_Hotzel1;~Arvind_Rajpal1;~Yan_Wu7;~Kyunghyun_Cho1;~Richard_Bonneau1;~Vladimir_Gligorijevic2;~Andreas_Loukas1", "aff": "Swiss Federal Institute of Technology;;Genentech;Genentech;;;;New York University;New York University;Genentech;Roche / Genentech", "aff_domain": "ethz.ch;;gene.com;gene.com;;;;nyu.edu;nyu.edu;gene.com;roche.com", "position": "PhD student;;Researcher;Researcher;;;;Associate Professor;Full Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nmartinkus2023abdiffuser,\ntitle={AbDiffuser: full-atom generation of in-vitro functioning antibodies},\nauthor={Karolis Martinkus and Jan Ludwiczak and WEI-CHING LIANG and Julien Lafrance-Vanasse and Isidro Hotzel and Arvind Rajpal and Yan Wu and Kyunghyun Cho and Richard Bonneau and Vladimir Gligorijevic and Andreas Loukas},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7GyYpomkEa}\n}", "github": "", "project": "", "reviewers": "9R9d;NpkL;is6j;AFyF", "pdf_size": 13409858, "rating": "5;6;7;8", "confidence": "4;2;4;4", "soundness": "2;3;4;4", "novelty": "3;2;3;3", "presentation": "1;2;3;4", "wc_summary": "137;86;45;24", "wc_strengths": "104;107;92;125", "wc_weaknesses": "821;150;164;71", "wc_questions": "6;63;120;26", "wc_limitations": "14;97;1;20", "wc_review": "1082;503;422;266", "wc_reply_reviewers": "303;47;31;23", "wc_reply_authors": "2354;0;314;0", "reply_reviewers": "1;1;1;1", "reply_authors": "5;1;3;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 73.0, 43.15669125408017 ], "wc_strengths_avg": [ 107.0, 11.811011811017716 ], "wc_weaknesses_avg": [ 301.5, 302.0219362894027 ], "wc_questions_avg": [ 53.75, 43.37265843823733 ], "wc_limitations_avg": [ 33.0, 37.58324094593227 ], "wc_review_avg": [ 568.25, 308.601988814071 ], "wc_reply_reviewers_avg": [ 101.0, 116.94443124834974 ], "wc_reply_authors_avg": [ 667.0, 982.3894339822675 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7914819643448305313&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ethz.ch;;gene.com;gene.com;;;;nyu.edu;nyu.edu;gene.com;roche.com", "author_num": 11, "aff_unique_index": "0;1;1;2;2;1;3", "aff_unique_norm": "Swiss Federal Institute of Technology;Genentech;New York University;Roche", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ethz.ch;https://www.genentech.com;https://www.nyu.edu;https://www.roche.com", "aff_unique_abbr": "ETH Zurich;Genentech;NYU;Roche", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;0", "aff_country_unique": "Switzerland;United States" }, { "title": "Long-Term Fairness with Unknown Dynamics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72714", "id": "7INd5Yu9ET", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/acf4a08f67724e9d2de34099f57a9c25-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7INd5Yu9ET", "openreview": "https://openreview.net/forum?id=7INd5Yu9ET", "poster": "/media/PosterPDFs/NeurIPS%202023/72714.png?t=1699567825.850636", "slides": "https://nips.cc/virtual/2023/poster/72714", "video": "https://nips.cc/virtual/2023/poster/72714", "author_site": "Tongxin Yin, Reilly Raab, Mingyan Liu, Yang Liu", "tldr": "", "abstract": "While machine learning can myopically reinforce social inequalities, it may also be used to dynamically seek equitable outcomes. In this paper, we formalize long-term fairness as an online reinforcement learning problem for a policy affecting human populations. This formulation accommodates dynamical control objectives, such as achieving equitable population states, that cannot be incorporated into static formulations of fairness. We demonstrate that algorithmic solutions to the proposed fairness problem can adapt to unknown dynamics and, by sacrificing short-term incentives, drive the policy-population system towards more desirable equilibria. For the proposed setting, we develop an algorithm that adapts recent work in online learning and prove that this algorithm achieves simultaneous probabilistic bounds on cumulative loss and cumulative violations of fairness. In the classification setting subject to group fairness, we compare our proposed algorithm to several baselines, including the repeated retraining of myopic or distributionally robust classifiers, and to a deep reinforcement learning algorithm that lacks fairness guarantees. Our experiments model human populations according to evolutionary game theory and integrate real-world datasets.", "keywords": "Long-term Fairness;Dynamics;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/e67d0d5411b218ba0e601b48e2ec611bd3cdc9b0.zip", "author": "Tongxin Yin;Reilly Raab;Mingyan Liu;Yang Liu", "authorids": "~Tongxin_Yin1;~Reilly_Raab1;~Mingyan_Liu1;~Yang_Liu3", "gender": "F;;F;M", "homepage": "https://www.linkedin.com/in/tongxinyin/;https://reillyraab.com;https://liu.engin.umich.edu;http://www.yliuu.com", "dblp": "305/3911;305/3753;97/5725;51/3710-18", "google_scholar": "_02Q5nEAAAAJ;;WiIM-MgAAAAJ;jKrIVCIAAAAJ", "orcid": "0000-0002-6166-3890;;0000-0003-3295-9200;0000-0001-8420-6011", "linkedin": "tongxinyin/;;;", "or_profile": "~Tongxin_Yin1;~Reilly_Raab1;~Mingyan_Liu1;~Yang_Liu3", "aff": "University of Michigan - Ann Arbor;University of California, Santa Cruz;University of Michigan - Ann Arbor;University of California, Santa Cruz", "aff_domain": "umich.edu;ucsc.edu;umich.edu;ucsc.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nyin2023longterm,\ntitle={Long-Term Fairness with Unknown Dynamics},\nauthor={Tongxin Yin and Reilly Raab and Mingyan Liu and Yang Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7INd5Yu9ET}\n}", "github": "", "project": "", "reviewers": "oXaL;iPFQ;kVGy;nDcm", "pdf_size": 14241598, "rating": "4;5;6;6", "confidence": "3;3;4;3", "soundness": "2;2;3;3", "novelty": "2;2;3;2", "presentation": "3;3;2;3", "wc_summary": "126;100;112;149", "wc_strengths": "68;48;177;110", "wc_weaknesses": "219;141;238;109", "wc_questions": "161;70;70;106", "wc_limitations": "11;7;13;30", "wc_review": "585;366;610;504", "wc_reply_reviewers": "0;329;0;107", "wc_reply_authors": "0;921;0;111", "reply_reviewers": "0;3;0;1", "reply_authors": "1;4;1;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 121.75, 18.226011631731172 ], "wc_strengths_avg": [ 100.75, 49.38306896093032 ], "wc_weaknesses_avg": [ 176.75, 53.396512058373254 ], "wc_questions_avg": [ 101.75, 37.23153904957462 ], "wc_limitations_avg": [ 15.25, 8.78564169540279 ], "wc_review_avg": [ 516.25, 95.18501720333931 ], "wc_reply_reviewers_avg": [ 109.0, 134.31865097595346 ], "wc_reply_authors_avg": [ 258.0, 385.4562231953195 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11523279730601763568&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "umich.edu;ucsc.edu;umich.edu;ucsc.edu", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Michigan;University of California, Santa Cruz", "aff_unique_dep": ";", "aff_unique_url": "https://www.umich.edu;https://www.ucsc.edu", "aff_unique_abbr": "UM;UCSC", "aff_campus_unique_index": "0;1;0;1", "aff_campus_unique": "Ann Arbor;Santa Cruz", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Adaptive Contextual Perception: How To Generalize To New Backgrounds and Ambiguous Objects", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72713", "id": "7JuReDmGSL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8d2c36836fb0e7d78fe68762ff8b5f1e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7JuReDmGSL", "openreview": "https://openreview.net/forum?id=7JuReDmGSL", "poster": "/media/PosterPDFs/NeurIPS%202023/72713.png?t=1702329941.834125", "slides": "https://nips.cc/virtual/2023/poster/72713", "video": "https://nips.cc/virtual/2023/poster/72713", "author_site": "Zhuofan Ying, Peter Hase, Mohit Bansal", "tldr": "", "abstract": "Biological vision systems make adaptive use of context to recognize objects in new settings with novel contexts as well as occluded or blurry objects in familiar settings. In this paper, we investigate how vision models adaptively use context for out-of-distribution (OOD) generalization and leverage our analysis results to improve model OOD generalization. First, we formulate two distinct OOD settings where the contexts are either beneficial Object-Disambiguation or irrelevant Background-Invariance, reflecting the diverse contextual challenges faced in biological vision. We then analyze model performance in these two different OOD settings and demonstrate that models that excel in one setting tend to struggle in the other. Notably, prior works on learning causal features improve on one setting but hurt on the other. This underscores the importance of generalizing across both OOD settings, as this ability is crucial for both human cognition and robust AI systems. Next, to better understand the model properties contributing to OOD generalization, we use representational geometry analysis and our own probing methods to examine a population of models, and we discover that those with more factorized representations and appropriate feature weighting are more successful in handling Object-Disambiguation and Background-Invariance tests. We further validate these findings through causal intervention, manipulating representation factorization and feature weighting to demonstrate their causal effect on performance. Motivated by our analysis results, we propose new augmentation methods aimed at enhancing model generalization. The proposed methods outperform strong baselines, yielding improvements in both in-distribution and OOD tests. We conclude that, in order to replicate the generalization abilities of biological vision, computer vision models must have factorized object vs. background representations and appropriately weigh both kinds of features.", "keywords": "Computer vision;out-of-distribution generalization;representational geometry", "primary_area": "", "supplementary_material": "", "author": "Zhuofan Ying;Peter Hase;Mohit Bansal", "authorids": "~Zhuofan_Ying1;~Peter_Hase1;~Mohit_Bansal2", "gender": "M;;M", "homepage": ";;https://www.cs.unc.edu/~mbansal/", "dblp": "262/0436;;32/5243.html", "google_scholar": "r8pcDSkAAAAJ;;DN8QtscAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zhuofan_Ying1;~Peter_Hase1;~Mohit_Bansal2", "aff": "Department of Psychology, Columbia University;;University of North Carolina at Chapel Hill", "aff_domain": "columbia.edu;;unc.edu", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nying2023adaptive,\ntitle={Adaptive Contextual Perception: How To Generalize To New Backgrounds and Ambiguous Objects},\nauthor={Zhuofan Ying and Peter Hase and Mohit Bansal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7JuReDmGSL}\n}", "github": "", "project": "", "reviewers": "Qyp3;WSbu;D3vS;FMxu", "pdf_size": 5128847, "rating": "5;5;5;7", "confidence": "5;4;4;4", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;4", "wc_summary": "79;130;79;79", "wc_strengths": "23;101;55;58", "wc_weaknesses": "16;199;164;8", "wc_questions": "76;5;2;115", "wc_limitations": "16;1;1;10", "wc_review": "210;436;301;270", "wc_reply_reviewers": "0;151;0;0", "wc_reply_authors": "0;421;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;3;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 91.75, 22.083647796503186 ], "wc_strengths_avg": [ 59.25, 27.73422975314079 ], "wc_weaknesses_avg": [ 96.75, 85.69531784175842 ], "wc_questions_avg": [ 49.5, 48.03384223648989 ], "wc_limitations_avg": [ 7.0, 6.363961030678928 ], "wc_review_avg": [ 304.25, 82.80209840336174 ], "wc_reply_reviewers_avg": [ 37.75, 65.38491798572512 ], "wc_reply_authors_avg": [ 105.25, 182.29834749662433 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11075720189552503080&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "columbia.edu;;unc.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Columbia University;University of North Carolina", "aff_unique_dep": "Department of Psychology;", "aff_unique_url": "https://www.columbia.edu;https://www.unc.edu", "aff_unique_abbr": "Columbia;UNC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chapel Hill", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Representation Equivalent Neural Operators: a Framework for Alias-free Operator Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72712", "id": "7LSEkvEGCM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dc35c593e61f6df62db541b976d09dcf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7LSEkvEGCM", "openreview": "https://openreview.net/forum?id=7LSEkvEGCM", "poster": "/media/PosterPDFs/NeurIPS%202023/72712.png?t=1702150434.4130914", "slides": "https://nips.cc/virtual/2023/poster/72712", "video": "https://nips.cc/virtual/2023/poster/72712", "author_site": "Francesca Bartolucci, Emmanuel de B\u00e9zenac, Bogdan Raonic, Roberto Molinaro, Siddhartha Mishra, Rima Alaifari", "tldr": "", "abstract": "Recently, operator learning, or learning mappings between infinite-dimensional function spaces, has garnered significant attention, notably in relation to learning partial differential equations from data. Conceptually clear when outlined on paper, neural operators necessitate discretization in the transition to computer implementations. This step can compromise their integrity, often causing them to deviate from the underlying operators. This research offers a fresh take on neural operators with a framework Representation equivalent Neural Operators (ReNO) designed to address these issues. At its core is the concept of operator aliasing, which measures inconsistency between neural operators and their discrete representations. We explore this for widely-used operator learning techniques. Our findings detail how aliasing introduces errors when handling different discretizations and grids and loss of crucial continuous structures. More generally, this framework not only sheds light on existing challenges but, given its constructive and broad nature, also potentially offers tools for developing new neural operators.", "keywords": "Operator Learning;Neural Operators;PDEs;Frame theory;Sampling theory", "primary_area": "", "supplementary_material": "/attachment/a627ccc5ea5aef72c5c250819a7e9e9914b1beb7.pdf", "author": "Francesca Bartolucci;Emmanuel de Bezenac;Bogdan Raonic;Roberto Molinaro;Siddhartha Mishra;Rima Alaifari", "authorids": "~Francesca_Bartolucci1;~Emmanuel_de_Bezenac2;~Bogdan_Raonic1;~Roberto_Molinaro1;~Siddhartha_Mishra1;~Rima_Alaifari1", "gender": "F;M;M;M;M;F", "homepage": "https://sites.google.com/view/bartoluccifrancesca;;https://www.linkedin.com/in/bogdan-raoni%C4%87-210066167;;http://www.sam.math.ethz.ch/;http://www.alaifari.com/", "dblp": "255/8922;;339/6810;249/2799;07/2856.html;159/8644", "google_scholar": "NUUWKPwAAAAJ;https://scholar.google.fr/citations?user=KvZw5gYAAAAJ;DN9CCpkAAAAJ;2ohT8yYAAAAJ;FmEqyNcAAAAJ;WiX5uI4AAAAJ", "orcid": "0000-0001-8748-413X;;;;;0000-0003-1608-8580", "linkedin": ";;bogdan-raoni%C4%87-210066167;;;rima-alaifari-6b9b39153/?originalSubdomain=ch", "or_profile": "~Francesca_Bartolucci1;~Emmanuel_de_Bezenac2;~Bogdan_Raonic1;~Roberto_Molinaro1;~Siddhartha_Mishra1;~Rima_Alaifari1", "aff": "ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich;Swiss Federal Institute of Technology;ETHZ - ETH Zurich", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "Postdoc;Postdoc;MS student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nbartolucci2023representation,\ntitle={Representation Equivalent Neural Operators: a Framework for Alias-free Operator Learning},\nauthor={Francesca Bartolucci and Emmanuel de Bezenac and Bogdan Raonic and Roberto Molinaro and Siddhartha Mishra and Rima Alaifari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7LSEkvEGCM}\n}", "github": "", "project": "", "reviewers": "2w9i;3pPU;9wa4;nALh;ydDu", "pdf_size": 296863, "rating": "6;6;6;6;7", "confidence": "2;2;4;5;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;2;3;3;2", "wc_summary": "83;119;58;35;65", "wc_strengths": "133;106;56;26;44", "wc_weaknesses": "356;83;311;669;151", "wc_questions": "343;30;151;644;57", "wc_limitations": "153;15;15;8;19", "wc_review": "1068;353;591;1382;336", "wc_reply_reviewers": "316;11;0;1121;35", "wc_reply_authors": "715;22;37;2635;38", "reply_reviewers": "2;1;0;3;1", "reply_authors": "5;2;2;6;2", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 72.0, 28.085583490467133 ], "wc_strengths_avg": [ 73.0, 40.0699388569536 ], "wc_weaknesses_avg": [ 314.0, 203.8273779451622 ], "wc_questions_avg": [ 245.0, 227.6883835420683 ], "wc_limitations_avg": [ 42.0, 55.612948132606675 ], "wc_review_avg": [ 746.0, 413.4619692305448 ], "wc_reply_reviewers_avg": [ 296.6, 428.4822516744422 ], "wc_reply_authors_avg": [ 689.4, 1008.1056690645083 ], "reply_reviewers_avg": [ 1.4, 1.019803902718557 ], "reply_authors_avg": [ 3.4, 1.7435595774162693 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.08574929257125441, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11608021030503335353&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Bounding training data reconstruction in DP-SGD", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72711", "id": "7LZ4tZrYlx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f8928b073ccbec15d35f2a9d39430bfd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7LZ4tZrYlx", "openreview": "https://openreview.net/forum?id=7LZ4tZrYlx", "poster": "/media/PosterPDFs/NeurIPS%202023/72711.png?t=1701441770.8956096", "slides": "https://nips.cc/virtual/2023/poster/72711", "video": "https://nips.cc/virtual/2023/poster/72711", "author_site": "Jamie Hayes, Borja Balle, Saeed Mahloujifar", "tldr": "", "abstract": "Differentially private training offers a protection which is usually interpreted as a guarantee against membership inference attacks. By proxy, this guarantee extends to other threats like reconstruction attacks attempting to extract complete training examples. Recent works provide evidence that if one does not need to protect against membership attacks but instead only wants to protect against a training data reconstruction, then utility of private models can be improved because less noise is required to protect against these more ambitious attacks. We investigate this question further in the context of DP-SGD, a standard algorithm for private deep learning, and provide an upper bound on the success of any reconstruction attack against DP-SGD together with an attack that empirically matches the predictions of our bound. Together, these two results open the door to fine-grained investigations on how to set the privacy parameters of DP-SGD in practice to protect against reconstruction attacks. Finally, we use our methods to demonstrate that different settings of the DP-SGD parameters leading to same DP guarantees can results in significantly different success rates for reconstruction, indicating that the DP guarantee alone might not be a good proxy for controlling the protection against reconstruction attacks.", "keywords": "Differential privacy;reconstruction", "primary_area": "", "supplementary_material": "/attachment/0c9a95b6cc345ebae288081a1c43d7c71b64625a.pdf", "author": "Jamie Hayes;Borja Balle;Saeed Mahloujifar", "authorids": "~Jamie_Hayes2;~Borja_Balle2;~Saeed_Mahloujifar1", "gender": "M;;M", "homepage": ";https://borjaballe.github.io/;https://www.cs.virginia.edu/~sm5fd/", "dblp": ";https://dblp.uni-trier.de/pers/b/Balle:Borja.html;208/0825", "google_scholar": "https://scholar.google.com/citations?hl=en;;kW-hl3YAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jamie_Hayes2;~Borja_Balle2;~Saeed_Mahloujifar1", "aff": "Google DeepMind;Google DeepMind;Princeton University", "aff_domain": "google.com;google.com;princeton.edu", "position": "Researcher;Research scientist;Postdoc", "bibtex": "@inproceedings{\nhayes2023bounding,\ntitle={Bounding training data reconstruction in {DP}-{SGD}},\nauthor={Jamie Hayes and Borja Balle and Saeed Mahloujifar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7LZ4tZrYlx}\n}", "github": "", "project": "", "reviewers": "j4Wo;AUNf;rZtg;paNh;wroj", "pdf_size": 3036022, "rating": "4;5;6;6;8", "confidence": "3;3;2;2;3", "soundness": "2;2;4;4;3", "novelty": "2;2;4;3;4", "presentation": "3;2;4;4;3", "wc_summary": "216;52;82;54;91", "wc_strengths": "85;34;74;61;62", "wc_weaknesses": "524;191;11;44;74", "wc_questions": "233;120;118;37;120", "wc_limitations": "22;73;13;19;10", "wc_review": "1080;470;298;215;357", "wc_reply_reviewers": "390;27;5;12;34", "wc_reply_authors": "322;0;0;0;30", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;2", "rating_avg": [ 5.8, 1.32664991614216 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 99.0, 60.4582500573743 ], "wc_strengths_avg": [ 63.2, 17.03408347989407 ], "wc_weaknesses_avg": [ 168.8, 187.68207159982012 ], "wc_questions_avg": [ 125.6, 62.45830609294492 ], "wc_limitations_avg": [ 27.4, 23.191377708105225 ], "wc_review_avg": [ 484.0, 309.353519456301 ], "wc_reply_reviewers_avg": [ 93.6, 148.5592137835954 ], "wc_reply_authors_avg": [ 70.4, 126.33542654378464 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.1230914909793327, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9785085049177262595&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "google.com;google.com;princeton.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Google;Princeton University", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.princeton.edu", "aff_unique_abbr": "DeepMind;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Dual Self-Awareness Value Decomposition Framework without Individual Global Max for Cooperative MARL", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72710", "id": "7LtzqnfuOs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e9e140df6de01afb672cb859d203c307-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7LtzqnfuOs", "openreview": "https://openreview.net/forum?id=7LtzqnfuOs", "poster": "/media/PosterPDFs/NeurIPS%202023/72710.png?t=1696994423.7049642", "slides": "https://nips.cc/virtual/2023/poster/72710", "video": "https://nips.cc/virtual/2023/poster/72710", "author_site": "Zhiwei Xu, Bin Zhang, dapeng li, Guangchong Zhou, Zeren Zhang, Guoliang Fan", "tldr": "", "abstract": "Value decomposition methods have gained popularity in the field of cooperative multi-agent reinforcement learning. However, almost all existing methods follow the principle of Individual Global Max (IGM) or its variants, which limits their problem-solving capabilities. To address this, we propose a dual self-awareness value decomposition framework, inspired by the notion of dual self-awareness in psychology, that entirely rejects the IGM premise. Each agent consists of an ego policy for action selection and an alter ego value function to solve the credit assignment problem. The value function factorization can ignore the IGM assumption by utilizing an explicit search procedure. On the basis of the above, we also suggest a novel anti-ego exploration mechanism to avoid the algorithm becoming stuck in a local optimum. As the first fully IGM-free value decomposition method, our proposed framework achieves desirable performance in various cooperative tasks.", "keywords": "Multi-Agent Reinforcement Learning;Individual Global Max", "primary_area": "", "supplementary_material": "/attachment/7f74741b948935754fbc6baabeac9a1555edffd4.zip", "author": "Zhiwei Xu;Bin Zhang;Dapeng Li;Guangchong Zhou;Zeren Zhang;Guoliang Fan", "authorids": "~Zhiwei_Xu3;~Bin_Zhang12;~Dapeng_Li2;~Guangchong_Zhou1;~Zeren_Zhang2;~Guoliang_Fan3", "gender": "M;;;M;;M", "homepage": ";;;https://github.com/Jugg1er;;http://www.ia.ac.cn", "dblp": "262/0620-5;;;322/1173;;f/GuoliangFan", "google_scholar": "https://scholar.google.co.uk/citations?user=kZoG7ssAAAAJ;;;;;", "orcid": "0000-0002-0754-5295;;;;;", "linkedin": ";;;;;", "or_profile": "~Zhiwei_Xu3;~Bin_Zhang12;~Dapeng_Li2;~Guangchong_Zhou1;~Zeren_Zhang2;~Guoliang_Fan3", "aff": "Institute of Automation, Chinese Academy of Sciences;;;Institute of Automation, Chinese Academy of Sciences;;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;;;ia.ac.cn;;ia.ac.cn", "position": "PhD student;;;PhD student;;Full Professor", "bibtex": "@inproceedings{\nxu2023dual,\ntitle={Dual Self-Awareness Value Decomposition Framework without Individual Global Max for Cooperative {MARL}},\nauthor={Zhiwei Xu and Bin Zhang and Dapeng Li and Guangchong Zhou and Zeren Zhang and Guoliang Fan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7LtzqnfuOs}\n}", "github": "", "project": "", "reviewers": "BzQX;rpvy;NJQv;uEFU;Yoei", "pdf_size": 8908923, "rating": "4;4;4;6;7", "confidence": "4;4;4;5;4", "soundness": "2;2;3;3;3", "novelty": "2;2;3;2;3", "presentation": "1;3;3;3;3", "wc_summary": "85;41;76;108;200", "wc_strengths": "38;38;128;102;73", "wc_weaknesses": "357;57;146;144;113", "wc_questions": "56;72;370;282;64", "wc_limitations": "23;26;12;4;46", "wc_review": "559;234;732;640;496", "wc_reply_reviewers": "14;0;0;20;24", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.0, 1.2649110640673518 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 102.0, 53.52756299328412 ], "wc_strengths_avg": [ 75.8, 35.431059820445675 ], "wc_weaknesses_avg": [ 163.4, 102.0011764638036 ], "wc_questions_avg": [ 168.8, 131.43272043140553 ], "wc_limitations_avg": [ 22.2, 14.260434775980709 ], "wc_review_avg": [ 532.2, 168.8151651955475 ], "wc_reply_reviewers_avg": [ 11.6, 9.991996797437437 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3952847075210474, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3253153394907270703&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ia.ac.cn;;;ia.ac.cn;;ia.ac.cn", "author_num": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "7NR2ZVzZxx", "title": "LogicBench: A Benchmark for Evaluation of Logical Reasoning", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Recently developed large language models (LLMs) have been shown to perform remarkably well on a wide range of language understanding tasks. But, can they really \"Reason\" over the natural language? This question has been receiving significant research attention and a number of reasoning skills such as commonsense, numerical, and qualitative have been studied. However, the crucial skill pertaining to 'logical reasoning' has remained underexplored. Existing work investigating this reasoning ability has focused only on a couple of axioms (such as modus ponens and modus tollens) of propositional and first-order logic. To study logical reasoning, we introduce LogicBench, a systematically created natural language question-answering dataset encompassing 25 reasoning patterns spanning over propositional, first-order, and non-monotonic logics. Key steps of our dataset construction consist of (1) controlled generation of sentences and their negations containing different ontologies, (2) (context, question, answer) triplets creation using heuristically designed templates, and (3) semantic variations of triplets adding more diversity. We first evaluate easily accessible and widely used LLMs such as GPT-3, ChatGPT, and FLAN-T5 and show that they do not fare well on LogicBench, achieving just above random accuracy on average (~52%). Then, we show that LLMs trained using our data exhibit a better understanding of logical reasoning leading to performance improvements on several existing logical reasoning datasets such as LogicNLI, FOLIO, LogiQA, and ReClor.", "keywords": "Logical Reasoning;Large Language Models;Prompting", "primary_area": "", "supplementary_material": "/attachment/6a579e9d7c04aabb96ef8f37622e16e6d62c175b.pdf", "author": "Mihir Parmar;Neeraj Varshney;Nisarg Patel;Santosh Mashetty;Man Luo;Arindam Mitra;Chitta Baral", "authorids": "~Mihir_Parmar1;~Neeraj_Varshney1;~Nisarg_Patel1;~Santosh_Mashetty1;~Man_Luo2;~Arindam_Mitra1;~Chitta_Baral1", "gender": "M;M;;M;;M;M", "homepage": ";https://nrjvarshney.github.io/;;;;https://ari9dam.github.io/;http://chitta.orissalinks.com", "dblp": "253/6105;139/3970;304/0895;;;04/2864;b/ChittaBaral", "google_scholar": "2UPwJC4AAAAJ;Ju9nR0IAAAAJ;https://scholar.google.com/citations?hl=en;eQN-aNAAAAAJ;;https://scholar.google.com/;9Yd716IAAAAJ", "orcid": ";;0000-0001-5964-4204;;;;0000-0002-7549-723X", "linkedin": "mihir-parmar-b44003157/;neerajvarshney97/;https://linkedin.com/in/nisarg-p-patel;santoshmashetty/;;;chitta-baral-8a8438b", "or_profile": "~Mihir_Parmar1;~Neeraj_Varshney1;~Nisarg_Patel1;~Santosh_Mashetty1;~Man_Luo2;~Arindam_Mitra1;~Chitta_Baral1", "aff": "Arizona State University;Tencent AI Lab;Arizona State University;Arizona State University;;Microsoft Research;Arizona State University", "aff_domain": "asu.edu;tencent.com;asu.edu;asu.edu;;research.microsoft.com;asu.edu", "position": "PhD student;Intern;MS student;PhD student;;Researcher;Full Professor", "bibtex": "@misc{\nparmar2023logicbench,\ntitle={LogicBench: A Benchmark for Evaluation of Logical Reasoning},\nauthor={Mihir Parmar and Neeraj Varshney and Nisarg Patel and Santosh Mashetty and Man Luo and Arindam Mitra and Chitta Baral},\nyear={2023},\nurl={https://openreview.net/forum?id=7NR2ZVzZxx}\n}", "github": "", "project": "", "reviewers": "oWM8;dRyV;p5i9;9HEk;ywMi", "site": "https://openreview.net/forum?id=7NR2ZVzZxx", "pdf_size": 446304, "rating": "4;5;6;7;7", "confidence": "4;3;4;3;3", "wc_summary_and_contributions": "56;171;91;97;65", "wc_strengths": "64;49;55;75;48", "wc_improvement": "197;220;148;123;14", "wc_limitations": "4;30;3;24;10", "wc_correctness": "38;9;3;8;5", "wc_clarity": "37;18;11;9;11", "wc_relation_to_prior_work": "9;30;51;16;9", "wc_documentation": "5;24;3;23;5", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "411;552;366;376;168", "wc_reply_reviewers": "158;0;64;37;0", "wc_reply_authors": "507;434;716;346;178", "reply_reviewers": "1;0;1;1;0", "reply_authors": "2;2;3;2;2", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 96.0, 40.52653451752321 ], "wc_strengths_avg": [ 58.2, 10.146920715172657 ], "wc_improvement_avg": [ 140.4, 71.94053099609427 ], "wc_limitations_avg": [ 14.2, 10.88852607105296 ], "wc_correctness_avg": [ 12.6, 12.877888025604198 ], "wc_clarity_avg": [ 17.2, 10.361467077590895 ], "wc_relation_to_prior_work_avg": [ 23.0, 15.962455951387932 ], "wc_documentation_avg": [ 12.0, 9.423375191511797 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 374.6, 122.92697018962112 ], "wc_reply_reviewers_avg": [ 51.8, 58.33146663679904 ], "wc_reply_authors_avg": [ 436.2, 177.82508259522896 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.560112033611204, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VkgiuTolF4MJ:scholar.google.com/&scioq=LogicBench:+A+Benchmark+for+Evaluation+of+Logical+Reasoning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "Arizona State University;Tencent;Microsoft", "aff_unique_dep": ";Tencent AI Lab;Microsoft Research", "aff_unique_url": "https://www.asu.edu;https://ai.tencent.com;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "ASU;Tencent AI Lab;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Statistical and Computational Trade-off in Multi-Agent Multi-Armed Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72709", "id": "7PJ6LaIOO4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d9c7c8bd6ad4cebb7d006e5109e0b682-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7PJ6LaIOO4", "openreview": "https://openreview.net/forum?id=7PJ6LaIOO4", "poster": "/media/PosterPDFs/NeurIPS%202023/72709.png?t=1701797913.348609", "slides": "https://nips.cc/virtual/2023/poster/72709", "video": "https://nips.cc/virtual/2023/poster/72709", "author_site": "Filippo Vannella, Alexandre Proutiere, Jaeseong Jeong", "tldr": "", "abstract": "We study the problem of regret minimization in Multi-Agent Multi-Armed Bandits (MAMABs) where the rewards are defined through a factor graph. We derive an instance-specific regret lower bound and characterize the minimal expected number of times each global action should be explored. Unfortunately, this bound and the corresponding optimal exploration process are obtained by solving a combinatorial optimization problem with a set of variables and constraints exponentially growing with the number of agents. We approximate the regret lower bound problem via Mean Field techniques to reduce the number of variables and constraints. By tuning the latter, we explore the trade-off between achievable regret and complexity. We devise Efficient Sampling for MAMAB (ESM), an algorithm whose regret asymptotically matches the corresponding approximated lower bound. We assess the regret and computational complexity of ESM numerically, using both synthetic and real-world experiments in radio communications networks.", "keywords": "Multi-Agent Multi-Armed Bandits;Multi-Armed Bandits;Regret Minimization", "primary_area": "", "supplementary_material": "/attachment/f2ce68cbd9041d382596a24d555fedaccd40eba7.pdf", "author": "Filippo Vannella;Alexandre Proutiere;Jaeseong Jeong", "authorids": "~Filippo_Vannella1;~Alexandre_Proutiere1;~Jaeseong_Jeong1", "gender": "M;M;M", "homepage": ";https://people.kth.se/~alepro/;https://www.linkedin.com/in/jaeseong-jeong-a7468739/", "dblp": "221/3638;p/AlexandreProutiere;", "google_scholar": "QedUtsAAAAAJ;g5sya5cAAAAJ;kyVMGCgAAAAJ", "orcid": "0000-0002-7668-0650;;", "linkedin": "filippovannella/;;", "or_profile": "~Filippo_Vannella1;~Alexandre_Proutiere1;~Jaeseong_Jeong1", "aff": "KTH Royal Institute of Technology;KTH Royal Institute of Technology, Stockholm, Sweden;", "aff_domain": "kth.se;kth.se;", "position": "PhD student;Full Professor;", "bibtex": "@inproceedings{\nvannella2023statistical,\ntitle={Statistical and Computational Trade-off in Multi-Agent Multi-Armed Bandits},\nauthor={Filippo Vannella and Alexandre Proutiere and Jaeseong Jeong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7PJ6LaIOO4}\n}", "github": "", "project": "", "reviewers": "EJNU;7bJY;Pi4y;pqkD", "pdf_size": 1957846, "rating": "5;6;6;7", "confidence": "3;3;2;3", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "137;139;106;78", "wc_strengths": "43;29;58;59", "wc_weaknesses": "109;57;35;90", "wc_questions": "6;38;144;151", "wc_limitations": "3;1;1;8", "wc_review": "298;264;344;386", "wc_reply_reviewers": "207;17;16;13", "wc_reply_authors": "611;11;0;18", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 115.0, 25.0499500997507 ], "wc_strengths_avg": [ 47.25, 12.295832627357937 ], "wc_weaknesses_avg": [ 72.75, 28.656369274560934 ], "wc_questions_avg": [ 84.75, 63.80977589680127 ], "wc_limitations_avg": [ 3.25, 2.8613807855648994 ], "wc_review_avg": [ 323.0, 46.14108798023731 ], "wc_reply_reviewers_avg": [ 63.25, 83.00715330620609 ], "wc_reply_authors_avg": [ 160.0, 260.46400903003854 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7378784513037995458&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "kth.se;kth.se;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "KTH Royal Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kth.se", "aff_unique_abbr": "KTH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stockholm", "aff_country_unique_index": "0;0", "aff_country_unique": "Sweden" }, { "title": "Tempo Adaptation in Non-stationary Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72708", "id": "7R8noSP4vL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1a0672689a693e0764f93f900488b3d9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7R8noSP4vL", "openreview": "https://openreview.net/forum?id=7R8noSP4vL", "poster": "/media/PosterPDFs/NeurIPS%202023/72708.png?t=1701476591.2131152", "slides": "https://nips.cc/virtual/2023/poster/72708", "video": "https://nips.cc/virtual/2023/poster/72708", "author_site": "Hyunin Lee, Yuhao Ding, Jongmin Lee, Ming Jin, Javad Lavaei, Somayeh Sojoudi", "tldr": "", "abstract": "We first raise and tackle a ``time synchronization'' issue between the agent and the environment in non-stationary reinforcement learning (RL), a crucial factor hindering its real-world applications. In reality, environmental changes occur over wall-clock time ($t$) rather than episode progress ($k$), where wall-clock time signifies the actual elapsed time within the fixed duration $t \\in [0, T]$. In existing works, at episode $k$, the agent rolls a trajectory and trains a policy before transitioning to episode $k+1$. In the context of the time-desynchronized environment, however, the agent at time $t_{k}$ allocates $\\Delta t$ for trajectory generation and training, subsequently moves to the next episode at $t_{k+1}=t_{k}+\\Delta t$. Despite a fixed total number of episodes ($K$), the agent accumulates different trajectories influenced by the choice of interaction times ($t_1,t_2,...,t_K$), significantly impacting the suboptimality gap of the policy. We propose a Proactively Synchronizing Tempo ($\\texttt{ProST}$) framework that computes a suboptimal sequence {$t_1,t_2,...,t_K$} (= { $t_{1:K}$}) by minimizing an upper bound on its performance measure, i.e., the dynamic regret. Our main contribution is that we show that a suboptimal {$t_{1:K}$} trades-off between the policy training time (agent tempo) and how fast the environment changes (environment tempo). Theoretically, this work develops a suboptimal {$t_{1:K}$} as a function of the degree of the environment's non-stationarity while also achieving a sublinear dynamic regret. Our experimental evaluation on various high-dimensional non-stationary environments shows that the $\\texttt{ProST}$ framework achieves a higher online return at suboptimal {$t_{1:K}$} than the existing methods.", "keywords": "Non-stationary RL;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/633a58c9ce8d91850c4164b9825f3ceef5a42ac6.zip", "author": "Hyunin Lee;Yuhao Ding;Jongmin Lee;Ming Jin;Javad Lavaei;Somayeh Sojoudi", "authorids": "~Hyunin_Lee1;~Yuhao_Ding2;~Jongmin_Lee1;~Ming_Jin2;~Javad_Lavaei1;~Somayeh_Sojoudi1", "gender": "M;M;M;M;;F", "homepage": "https://hyunin-lee.github.io/;https://yuhaod.github.io/homepage/;https://www.jmlee.kr;http://www.jinming.tech/;;https://eecs.berkeley.edu/~sojoudi/", "dblp": "353/1740;218/2837;68/222-4.html;;;06/7000", "google_scholar": "kHTDu1YAAAAJ;Q65PtLgAAAAJ;https://scholar.google.co.kr/citations?user=rFcK8EEAAAAJ;YdxdTtkAAAAJ;;kNH8zcgAAAAJ", "orcid": ";;;;;", "linkedin": ";;jmlee123/;;;", "or_profile": "~Hyunin_Lee1;~Yuhao_Ding2;~Jongmin_Lee1;~Ming_Jin2;~Javad_Lavaei1;~Somayeh_Sojoudi1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Virginia Tech;;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;vt.edu;;berkeley.edu", "position": "PhD student;PhD student;Postdoc;Assistant Professor;;Assistant Professor", "bibtex": "@inproceedings{\nlee2023tempo,\ntitle={Tempo Adaptation in Non-stationary Reinforcement Learning},\nauthor={Hyunin Lee and Yuhao Ding and Jongmin Lee and Ming Jin and Javad Lavaei and Somayeh Sojoudi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7R8noSP4vL}\n}", "github": "", "project": "", "reviewers": "WHPE;3oD2;EXsm;2jp7;4AXz", "pdf_size": 1217450, "rating": "4;5;7;7;7", "confidence": "5;3;2;3;3", "soundness": "3;3;3;3;3", "novelty": "2;3;4;3;3", "presentation": "2;2;2;3;2", "wc_summary": "80;91;111;118;220", "wc_strengths": "46;67;122;120;154", "wc_weaknesses": "314;585;49;190;427", "wc_questions": "42;109;0;9;114", "wc_limitations": "119;9;6;13;21", "wc_review": "601;861;288;450;936", "wc_reply_reviewers": "243;40;49;76;1058", "wc_reply_authors": "864;127;0;0;2281", "reply_reviewers": "2;1;1;1;4", "reply_authors": "4;2;1;1;6", "rating_avg": [ 6.0, 1.2649110640673518 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 124.0, 49.89188310737529 ], "wc_strengths_avg": [ 101.8, 39.46846842734083 ], "wc_weaknesses_avg": [ 313.0, 185.36774260911739 ], "wc_questions_avg": [ 54.8, 48.387601717795434 ], "wc_limitations_avg": [ 33.6, 42.995813749712894 ], "wc_review_avg": [ 627.2, 243.78794063693965 ], "wc_reply_reviewers_avg": [ 293.2, 389.4505873663564 ], "wc_reply_authors_avg": [ 654.4, 874.5729472148106 ], "reply_reviewers_avg": [ 1.8, 1.1661903789690602 ], "reply_authors_avg": [ 2.8, 1.9390719429665317 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8068715304598785, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8608387991631363966&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "berkeley.edu;berkeley.edu;berkeley.edu;vt.edu;;berkeley.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of California, Berkeley;Virginia Tech", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.vt.edu", "aff_unique_abbr": "UC Berkeley;VT", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Order Matters in the Presence of Dataset Imbalance for Multilingual Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72707", "id": "7RMGI4slcb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d346609ec2fefd3938c898a0dda4a480-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7RMGI4slcb", "openreview": "https://openreview.net/forum?id=7RMGI4slcb", "poster": "/media/PosterPDFs/NeurIPS%202023/72707.png?t=1702062434.8238268", "slides": "https://nips.cc/virtual/2023/poster/72707", "video": "https://nips.cc/virtual/2023/poster/72707", "author_site": "Dami Choi, Derrick Xin, Hamid Dadkhahi, Justin Gilmer, Ankush Garg, Orhan Firat, Chih-Kuan Yeh, Andrew Dai, Behrooz Ghorbani", "tldr": "", "abstract": "In this paper, we empirically study the optimization dynamics of multi-task learning, particularly focusing on those that govern a collection of tasks with significant data imbalance. We present a simple yet effective method of pre-training on high-resource tasks, followed by fine-tuning on a mixture of high/low-resource tasks. We provide a thorough empirical study and analysis of this method's benefits showing that it achieves consistent improvements relative to the performance trade-off profile of standard static weighting. We analyze under what data regimes this method is applicable and show its improvements empirically in neural machine translation (NMT) and multi-lingual language modeling.", "keywords": "Multitask Optimization;Multilingual;Pre-training;Language Models;Language Sampling;Low Resource Languages;Overfitting", "primary_area": "", "supplementary_material": "", "author": "Dami Choi;Derrick Xin;Hamid Dadkhahi;Justin Gilmer;Ankush Garg;Orhan Firat;Chih-Kuan Yeh;Andrew M. Dai;Behrooz Ghorbani", "authorids": "~Dami_Choi1;~Derrick_Xin1;~Hamid_Dadkhahi1;~Justin_Gilmer1;~Ankush_Garg1;~Orhan_Firat1;~Chih-Kuan_Yeh1;~Andrew_M._Dai1;~Behrooz_Ghorbani1", "gender": ";M;;M;M;M;M;;M", "homepage": ";https://github.com/dxin12345;;;;;https://chihkuanyeh.github.io/;;", "dblp": "209/9687;;124/3214;;86/7221;120/2225;;162/0166;59/9736", "google_scholar": "giuZW04AAAAJ;;https://scholar.google.com/citations?hl=en;Ml_vQ8MAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tr/citations?user=dLaR9lgAAAAJ;;;2r2NuDAAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;;;agbgarg/;;;;andrewdai/", "or_profile": "~Dami_Choi1;~Derrick_Xin1;~Hamid_Dadkhahi1;~Justin_Gilmer1;~Ankush_Garg1;~Orhan_Firat1;~Chih-Kuan_Yeh1;~Behrooz_Ghorbani1;~Andrew_Mingbo_Dai1", "aff": "Department of Computer Science, University of Toronto;Research, Google;Google;Google Brain;Google;Google;Google;Google;Google", "aff_domain": "cs.toronto.edu;research.google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "position": "PhD student;Researcher;Researcher;Researcher;research engineer;Research Scientist;Researcher;Researcher;Software Engineer", "bibtex": "@inproceedings{\nchoi2023order,\ntitle={Order Matters in the Presence of Dataset Imbalance for Multilingual Learning},\nauthor={Dami Choi and Derrick Xin and Hamid Dadkhahi and Justin Gilmer and Ankush Garg and Orhan Firat and Chih-Kuan Yeh and Andrew M. Dai and Behrooz Ghorbani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7RMGI4slcb}\n}", "github": "", "project": "", "reviewers": "V1qx;wUUc;yZ9n;evVX;9ZBe", "pdf_size": 778270, "rating": "5;6;6;7;7", "confidence": "4;4;3;4;4", "soundness": "3;4;4;3;3", "novelty": "2;3;3;3;3", "presentation": "3;4;3;4;4", "wc_summary": "60;67;76;82;90", "wc_strengths": "48;42;125;53;135", "wc_weaknesses": "121;59;60;72;151", "wc_questions": "321;58;32;228;67", "wc_limitations": "58;14;41;5;11", "wc_review": "608;240;334;440;454", "wc_reply_reviewers": "78;39;0;89;22", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 75.0, 10.620734437881403 ], "wc_strengths_avg": [ 80.6, 40.60837352074077 ], "wc_weaknesses_avg": [ 92.6, 36.96809435175149 ], "wc_questions_avg": [ 141.2, 113.32678412449548 ], "wc_limitations_avg": [ 25.8, 20.291870293297265 ], "wc_review_avg": [ 415.2, 123.75847445730737 ], "wc_reply_reviewers_avg": [ 45.6, 33.505820389896435 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.13363062095621223, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15975988217821284144&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cs.toronto.edu;research.google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "author_num": 9, "aff_unique_index": "0;1;1;1;1;1;1;1;1", "aff_unique_norm": "University of Toronto;Google", "aff_unique_dep": "Department of Computer Science;Google Research", "aff_unique_url": "https://www.utoronto.ca;https://research.google", "aff_unique_abbr": "U of T;Google", "aff_campus_unique_index": "0;1;1;1;1;1;1;1;1", "aff_campus_unique": "Toronto;Mountain View", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1", "aff_country_unique": "Canada;United States" }, { "title": "On student-teacher deviations in distillation: does it pay to disobey?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72706", "id": "7UdVPRmpif", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/12d286282e1be5431ea05262a21f415c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7UdVPRmpif", "openreview": "https://openreview.net/forum?id=7UdVPRmpif", "poster": "/media/PosterPDFs/NeurIPS%202023/72706.png?t=1700532500.6580796", "slides": "https://nips.cc/virtual/2023/poster/72706", "video": "https://nips.cc/virtual/2023/poster/72706", "author_site": "Vaishnavh Nagarajan, Aditya Menon, Srinadh Bhojanapalli, Hossein Mobahi, Sanjiv Kumar", "tldr": "", "abstract": "Knowledge distillation (KD) has been widely used to improve the test accuracy of a \"student\" network, by training it to mimic the soft probabilities of a trained \"teacher\" network. Yet, it has been shown in recent work that, despite being trained to fit the teacher's probabilities, the student may not only significantly deviate from the teacher probabilities, but may also outdo than the teacher in performance. Our work aims to reconcile this seemingly paradoxical observation. Specifically, we characterize the precise nature of the student-teacher deviations, and argue how they _can_ co-occur with better generalization. First, through experiments on image and language data, we identify that these probability deviations correspond to the student systematically _exaggerating_ the confidence levels of the teacher.\nNext, we theoretically and empirically establish another form of exaggeration in some simple settings: KD exaggerates the implicit bias of gradient descent in converging faster along the top eigendirections of the data. Finally, we tie these two observations together: we demonstrate that the exaggerated bias of KD can simultaneously result in both (a) the exaggeration of confidence and (b) the improved generalization of the student, thus offering a resolution to the apparent paradox. Our analysis brings existing theory and practice closer by considering the role of gradient descent in KD and by demonstrating the exaggerated bias effect in both theoretical and empirical settings.", "keywords": "knowledge distillation;regularization;understanding;underfitting;theory", "primary_area": "", "supplementary_material": "/attachment/e29459603a4a9b52cb092861372c1825e730e332.pdf", "author": "Vaishnavh Nagarajan;Aditya Krishna Menon;Srinadh Bhojanapalli;Hossein Mobahi;Sanjiv Kumar", "authorids": "~Vaishnavh_Nagarajan3;~Aditya_Krishna_Menon1;~Srinadh_Bhojanapalli1;~Hossein_Mobahi2;~Sanjiv_Kumar1", "gender": "M;;M;M;M", "homepage": "https://bsrinadh.github.io/;http://www.sanjivk.com/;https://akmenon.github.io/;http://people.csail.mit.edu/hmobahi/;https://vaishnavh.github.io/", "dblp": "131/6700;;89/3514;94/1490;161/0079", "google_scholar": "bpSF_9EAAAAJ;https://scholar.google.com/citations?hl=en;;GSHmKZkAAAAJ;https://scholar.google.nl/citations?user=LrsjJfwAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Srinadh_Bhojanapalli1;~Sanjiv_Kumar1;~Aditya_Menon1;~Hossein_Mobahi1;~Vaishnavh_Nagarajan1", "aff": "Google;Google;Google;Google;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com", "position": "Research Scientist;Research Scientist;Research Scientist;Research Scientist;Researcher", "bibtex": "@inproceedings{\nnagarajan2023on,\ntitle={On student-teacher deviations in distillation: does it pay to disobey?},\nauthor={Vaishnavh Nagarajan and Aditya Krishna Menon and Srinadh Bhojanapalli and Hossein Mobahi and Sanjiv Kumar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7UdVPRmpif}\n}", "github": "", "project": "", "reviewers": "pE3g;vVNQ;MZ5g;tbs9;zo9s", "pdf_size": 5273689, "rating": "5;6;6;7;7", "confidence": "4;4;3;4;4", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "3;3;3;4;4", "wc_summary": "107;128;186;138;93", "wc_strengths": "44;44;85;91;131", "wc_weaknesses": "106;136;272;226;122", "wc_questions": "158;2;51;18;1", "wc_limitations": "9;2;68;3;11", "wc_review": "424;312;662;476;358", "wc_reply_reviewers": "66;0;110;46;156", "wc_reply_authors": "349;0;41;0;33", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;1;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 130.4, 31.941195970094796 ], "wc_strengths_avg": [ 79.0, 32.66190441477655 ], "wc_weaknesses_avg": [ 172.4, 64.9110160142329 ], "wc_questions_avg": [ 46.0, 58.8455605802171 ], "wc_limitations_avg": [ 18.6, 24.9367199126108 ], "wc_review_avg": [ 446.4, 121.44068511005692 ], "wc_reply_reviewers_avg": [ 75.6, 53.5372767331324 ], "wc_reply_authors_avg": [ 84.6, 133.2555439747255 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.13363062095621223, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17119461308126198348&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "google.com;google.com;google.com;google.com;google.com", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A State Representation for Diminishing Rewards", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72705", "id": "7Uix1eQZ8z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8df0fe2bba0f14208a10c1cb22e71552-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7Uix1eQZ8z", "openreview": "https://openreview.net/forum?id=7Uix1eQZ8z", "poster": "/media/PosterPDFs/NeurIPS%202023/72705.png?t=1702335663.2169962", "slides": "https://nips.cc/virtual/2023/poster/72705", "video": "https://nips.cc/virtual/2023/poster/72705", "author_site": "Ted Moskovitz, Samo Hromadka, Ahmed Touati, Diana Borsa, Maneesh Sahani", "tldr": "", "abstract": "A common setting in multitask reinforcement learning (RL) demands that an agent rapidly adapt to various stationary reward functions randomly sampled from a fixed distribution. In such situations, the successor representation (SR) is a popular framework which supports rapid policy evaluation by decoupling a policy's expected discounted, cumulative state occupancies from a specific reward function. However, in the natural world, sequential tasks are rarely independent, and instead reflect shifting priorities based on the availability and subjective perception of rewarding stimuli. Reflecting this disjunction, in this paper we study the phenomenon of diminishing marginal utility and introduce a novel state representation, the $\\lambda$ representation ($\\lambda$R) which, surprisingly, is required for policy evaluation in this setting and which generalizes the SR as well as several other state representations from the literature. We establish the $\\lambda$R's formal properties and examine its normative advantages in the context of machine learning, as well as its usefulness for studying natural behaviors, particularly foraging.", "keywords": "reinforcement learning;successor features;successor representation;neuroscience", "primary_area": "", "supplementary_material": "/attachment/b12f52cba0c78e12bd820a30b3e9c1de710b9b46.zip", "author": "Ted Moskovitz;Samo Hromadka;Ahmed Touati;Diana L Borsa;Maneesh Sahani", "authorids": "~Ted_Moskovitz1;~Samo_Hromadka1;~Ahmed_Touati1;~Diana_L_Borsa1;~Maneesh_Sahani1", "gender": "M;M;M;;", "homepage": "https://tedmoskovitz.github.io/;;;;http://www.gatsby.ucl.ac.uk/~maneesh", "dblp": ";;147/5871;164/6204;44/3197", "google_scholar": "pPVXrTYAAAAJ;;https://scholar.google.fr/citations?user=D4LT5xAAAAAJ;;https://scholar.google.co.uk/citations?user=rwxX83UAAAAJ", "orcid": ";;;;0000-0001-5560-3341", "linkedin": ";samo-hromadka/;ahmed-touati-4a132a76/;diana-l-borsa-12834023;", "or_profile": "~Ted_Moskovitz1;~Samo_Hromadka1;~Ahmed_Touati1;~Diana_L_Borsa1;~Maneesh_Sahani1", "aff": "Gatsby Computational Neuroscience Unit;Gatsby Computational Neuroscience Unit;Meta Facebook;DeepMind/Google;University College London", "aff_domain": "gatsby.ucl.ac.uk;gatsby.ucl.ac.uk;fb.com;google.com;ucl.ac.uk", "position": "PhD student;PhD student;Researcher;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nmoskovitz2023a,\ntitle={A State Representation for Diminishing Rewards},\nauthor={Ted Moskovitz and Samo Hromadka and Ahmed Touati and Diana L Borsa and Maneesh Sahani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7Uix1eQZ8z}\n}", "github": "", "project": "", "reviewers": "GnnZ;2Ebj;7pTK;jk48", "pdf_size": 1755508, "rating": "5;6;6;7", "confidence": "3;3;4;3", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "50;62;72;159", "wc_strengths": "33;29;74;147", "wc_weaknesses": "376;84;230;423", "wc_questions": "131;39;59;80", "wc_limitations": "4;22;6;17", "wc_review": "594;236;441;826", "wc_reply_reviewers": "473;9;0;62", "wc_reply_authors": "1664;16;0;23", "reply_reviewers": "3;1;0;1", "reply_authors": "6;2;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 85.75, 43.002180177288686 ], "wc_strengths_avg": [ 70.75, 47.415055625824166 ], "wc_weaknesses_avg": [ 278.25, 132.82389656985674 ], "wc_questions_avg": [ 77.25, 34.2518247689083 ], "wc_limitations_avg": [ 12.25, 7.495832175282475 ], "wc_review_avg": [ 524.25, 215.60191905453902 ], "wc_reply_reviewers_avg": [ 136.0, 196.00382649326008 ], "wc_reply_authors_avg": [ 425.75, 714.9525770986493 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 1.920286436967152 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3996054630312528950&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "gatsby.ucl.ac.uk;gatsby.ucl.ac.uk;fb.com;google.com;ucl.ac.uk", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University College London;Meta;Google", "aff_unique_dep": "Gatsby Computational Neuroscience Unit;Meta Platforms, Inc.;DeepMind", "aff_unique_url": "https://www.ucl.ac.uk;https://meta.com;https://www.google.com", "aff_unique_abbr": "UCL;Meta;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Waymax: An Accelerated, Data-Driven Simulator for Large-Scale Autonomous Driving Research", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73687", "id": "7VSBaP2OXN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1838feeb71c4b4ea524d0df2f7074245-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=7VSBaP2OXN", "openreview": "https://openreview.net/forum?id=7VSBaP2OXN", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73687", "video": "https://nips.cc/virtual/2023/poster/73687", "author_site": "Cole Gulino, Justin Fu, Wenjie Luo, George Tucker, Eli Bronstein, Yiren Lu, Jean Harb, Xinlei Pan, Yan Wang, Xiangyu Chen, John Co-Reyes, Rishabh Agarwal, Rebecca Roelofs, Yao Lu, Nico Montali, Paul Mougin, Zoey Yang, Brandyn White, Aleksandra Faust, Rowan McAllister, Dragomir Anguelov, Benjamin Sapp", "tldr": "", "abstract": "Simulation is an essential tool to develop and benchmark autonomous vehicle planning software in a safe and cost-effective manner. However, realistic simulation requires accurate modeling of multi-agent interactive behaviors to be trustworthy, behaviors which can be highly nuanced and complex. To address these challenges, we introduce Waymax, a new data-driven simulator for autonomous driving in multi-agent scenes, designed for large-scale simulation and testing. Waymax uses publicly-released, real-world driving data (e.g., the Waymo Open Motion Dataset) to initialize or play back a diverse set of multi-agent simulated scenarios. It runs entirely on hardware accelerators such as TPUs/GPUs and supports in-graph simulation for training, making it suitable for modern large-scale, distributed machine learning workflows. To support online training and evaluation, Waymax includes several learned and hard-coded behavior models that allow for realistic interaction within simulation. To supplement Waymax, we benchmark a suite of popular imitation and reinforcement learning algorithms with ablation studies on different design decisions, where we highlight the effectiveness of routes as guidance for planning agents and the ability of RL to overfit against simulated agents.", "keywords": "simulator;autonomous driving;planning;behavior prediction;reinforcement learning;imitation learning", "primary_area": "", "supplementary_material": "/attachment/f2f7a41c34d24784f8a5172f354f28003c62adb3.pdf", "author": "Cole Gulino;Justin Fu;Wenjie Luo;George Tucker;Eli Bronstein;Yiren Lu;Jean Harb;Xinlei Pan;Yan Wang;Xiangyu Chen;John D Co-Reyes;Rishabh Agarwal;Rebecca Roelofs;Yao Lu;Nico Montali;Paul Mougin;Zoey Zeyu Yang;Brandyn White;Aleksandra Faust;Rowan Thomas McAllister;Dragomir Anguelov;Benjamin Sapp", "authorids": "~Cole_Gulino1;~Justin_Fu1;~Wenjie_Luo1;~George_Tucker1;~Eli_Bronstein1;~Yiren_Lu1;~Jean_Harb1;~Xinlei_Pan1;~Yan_Wang10;~Xiangyu_Chen1;~John_D_Co-Reyes1;~Rishabh_Agarwal2;~Rebecca_Roelofs1;~Yao_Lu13;~Nico_Montali1;~Paul_Mougin1;~Zoey_Zeyu_Yang1;~Brandyn_White1;~Aleksandra_Faust1;~Rowan_Thomas_McAllister1;~Dragomir_Anguelov1;~Benjamin_Sapp3", "gender": "M;;;M;M;M;M;M;M;M;M;M;F;;M;;;M;F;M;M;M", "homepage": ";;;https://sites.google.com/view/gjt;http://elibronstein.com/;https://luyiren.me/;cs.mcgill.ca/~jmerhe1;;https://www.cs.cornell.edu/~yanwang/;https://www.cs.cornell.edu/~xchen/;;https://agarwl.github.io;;;;;;;http://www.afaust.info;https://rowanmcallister.github.io/;;", "dblp": ";;;135/5748;;;;188/6125.html;59/2227;;198/1129;;145/2224;26/5662-6;;;;63/3079;135/8420;123/6416;a/DragomirAnguelov;54/5582", "google_scholar": ";T9To2C0AAAAJ;;-gJkPHIAAAAJ;uQRY6KoAAAAJ;wUUxGfAAAAAJ;;tlhfhLoAAAAJ;nZsD8XwAAAAJ;xBv-PMEAAAAJ;;https://scholar.google.ca/citations?user=aH8AJu4AAAAJ;;OI7zFmwAAAAJ;xPWCLvEAAAAJ;;;;RK72t68AAAAJ;https://scholar.google.co.uk/citations?user=6uIhh6MAAAAJ;https://scholar.google.com/citations?hl=en;aPqcyU4AAAAJ", "orcid": ";;;;0009-0002-6544-2988;;;;;;;;;;;;;;0000-0002-3268-8685;0000-0002-9519-2345;;", "linkedin": "colegulino/;;;;eli-bronstein;luyiren/;;;;;;;;;;https://uk.linkedin.com/in/paul-mougin-763775118;;;aleksandrafaust;rowantmcallister;dragomiranguelov/;", "or_profile": "~Cole_Gulino1;~Justin_Fu1;~Wenjie_Luo1;~George_Tucker1;~Eli_Bronstein1;~Yiren_Lu1;~Jean_Harb1;~Xinlei_Pan1;~Yan_Wang10;~Xiangyu_Chen1;~John_D_Co-Reyes1;~Rishabh_Agarwal2;~Rebecca_Roelofs1;~Yao_Lu13;~Nico_Montali1;~Paul_Mougin1;~Zoey_Zeyu_Yang1;~Brandyn_White1;~Aleksandra_Faust1;~Rowan_Thomas_McAllister1;~Dragomir_Anguelov1;~Benjamin_Sapp3", "aff": ";Google;;Google Brain;Waymo;Waymo Research;McGill University;Waymo LLC;Waymo;Cornell University;;Google DeepMind;Google;Google;Google;;;University of Central Florida;Google Brain;Waymo;Waymo;Waymo", "aff_domain": ";google.com;;google.com;waymo.com;google.com;mcgill.ca;waymo.com;waymo.com;cornell.edu;;google.com;google.com;google.com;google.com;;;ucf.edu;google.com;waymo.com;waymo.com;waymo.com", "position": ";Researcher;;Research Scientist;Researcher;Researcher;PhD student;Researcher;Researcher;PhD student;;Research Scientist;Research scientist;Researcher;Researcher;;;Undergrad student;Principal Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\ngulino2023waymax,\ntitle={Waymax: An Accelerated, Data-Driven Simulator for Large-Scale Autonomous Driving Research},\nauthor={Cole Gulino and Justin Fu and Wenjie Luo and George Tucker and Eli Bronstein and Yiren Lu and Jean Harb and Xinlei Pan and Yan Wang and Xiangyu Chen and John D Co-Reyes and Rishabh Agarwal and Rebecca Roelofs and Yao Lu and Nico Montali and Paul Mougin and Zoey Zeyu Yang and Brandyn White and Aleksandra Faust and Rowan Thomas McAllister and Dragomir Anguelov and Benjamin Sapp},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=7VSBaP2OXN}\n}", "github": "", "project": "", "reviewers": "DvxP;1Xpz;ngUf", "pdf_size": 894000, "rating": "6;7;8", "confidence": "4;2;5", "wc_summary_and_contributions": "76;52;93", "wc_strengths": "105;111;129", "wc_improvement": "56;147;112", "wc_limitations": "1;37;34", "wc_correctness": "30;11;7", "wc_clarity": "1;5;3", "wc_relation_to_prior_work": "1;16;14", "wc_documentation": "54;47;7", "wc_additional_feedback": "1;1;1", "wc_review": "325;427;400", "wc_reply_reviewers": "12;6;0", "wc_reply_authors": "584;515;419", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "wc_summary_and_contributions_avg": [ 73.66666666666667, 16.81930108205715 ], "wc_strengths_avg": [ 115.0, 10.198039027185569 ], "wc_improvement_avg": [ 105.0, 37.47888294315436 ], "wc_limitations_avg": [ 24.0, 16.30950643030009 ], "wc_correctness_avg": [ 16.0, 10.03327796219494 ], "wc_clarity_avg": [ 3.0, 1.632993161855452 ], "wc_relation_to_prior_work_avg": [ 10.333333333333334, 6.649979114420001 ], "wc_documentation_avg": [ 36.0, 20.704266871026046 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 384.0, 43.15089802078283 ], "wc_reply_reviewers_avg": [ 6.0, 4.898979485566356 ], "wc_reply_authors_avg": [ 506.0, 67.66091929614909 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 22, 0 ], "corr_rating_confidence": 0.3273268353539886, "gs_citation": 116, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=654263767191448802&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": ";google.com;;google.com;waymo.com;google.com;mcgill.ca;waymo.com;waymo.com;cornell.edu;;google.com;google.com;google.com;google.com;;;ucf.edu;google.com;waymo.com;waymo.com;waymo.com", "author_num": 22, "aff_unique_index": "0;0;1;1;2;1;1;3;0;0;0;0;4;0;1;1;1", "aff_unique_norm": "Google;Waymo;McGill University;Cornell University;University of Central Florida", "aff_unique_dep": "Google;;;;", "aff_unique_url": "https://www.google.com;https://www.waymo.com;https://www.mcgill.ca;https://www.cornell.edu;https://www.ucf.edu", "aff_unique_abbr": "Google;Waymo;McGill;Cornell;UCF", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;1;0;0;0;2;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;Canada;United Kingdom" }, { "title": "Generative Modeling through the Semi-dual Formulation of Unbalanced Optimal Transport", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72704", "id": "7WQt1J13ex", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/84706cdfc192cd0351daf48f379847e6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7WQt1J13ex", "openreview": "https://openreview.net/forum?id=7WQt1J13ex", "poster": "/media/PosterPDFs/NeurIPS%202023/72704.png?t=1702016204.875153", "slides": "https://nips.cc/virtual/2023/poster/72704", "video": "https://nips.cc/virtual/2023/poster/72704", "author_site": "Jaemoo Choi, Jaewoong Choi, Myungjoo Kang", "tldr": "", "abstract": "Optimal Transport (OT) problem investigates a transport map that bridges two distributions while minimizing a given cost function. In this regard, OT between tractable prior distribution and data has been utilized for generative modeling tasks. However, OT-based methods are susceptible to outliers and face optimization challenges during training. \nIn this paper, we propose a novel generative model based on the semi-dual formulation of Unbalanced Optimal Transport (UOT). Unlike OT, UOT relaxes the hard constraint on distribution matching. This approach provides better robustness against outliers, stability during training, and faster convergence. We validate these properties empirically through experiments. Moreover, we study the theoretical upper-bound of divergence between distributions in UOT. Our model outperforms existing OT-based generative models, achieving FID scores of 2.97 on CIFAR-10 and 6.36 on CelebA-HQ-256. The code is available at \\url{https://github.com/Jae-Moo/UOTM}.", "keywords": "Optimal Transport;Generative modeling;Generative adversarial network", "primary_area": "", "supplementary_material": "/attachment/70b0160cae25610b7f36a7a9fe78e7e5f4f0bf20.pdf", "author": "Jaemoo Choi;Jaewoong Choi;Myungjoo Kang", "authorids": "~Jaemoo_Choi1;~Jaewoong_Choi1;~Myungjoo_Kang1", "gender": "M;M;", "homepage": "https://github.com/JaemooC;;http://ncia.snu.ac.kr/", "dblp": "295/8916;63/11483;64/5657.html", "google_scholar": "Ba2G6sIAAAAJ;e4ZLjREAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jaemoo_Choi1;~Jaewoong_Choi1;~Myungjoo_Kang1", "aff": "Seoul National University;Korea Institute for Advanced Study;Seoul National University", "aff_domain": "snu.ac.kr;kias.re.kr;snu.ac.kr", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nchoi2023generative,\ntitle={Generative Modeling through the Semi-dual Formulation of Unbalanced Optimal Transport},\nauthor={Jaemoo Choi and Jaewoong Choi and Myungjoo Kang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7WQt1J13ex}\n}", "github": "", "project": "", "reviewers": "6D69;Pz9z;9t3v;TBHC", "pdf_size": 9340630, "rating": "5;6;6;7", "confidence": "4;3;4;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;2", "wc_summary": "135;42;80;89", "wc_strengths": "42;44;88;53", "wc_weaknesses": "1035;135;187;6", "wc_questions": "238;82;35;90", "wc_limitations": "6;18;1;1", "wc_review": "1456;321;391;239", "wc_reply_reviewers": "407;118;10;11", "wc_reply_authors": "2430;54;13;27", "reply_reviewers": "3;1;1;1", "reply_authors": "8;3;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 86.5, 33.094561486745825 ], "wc_strengths_avg": [ 56.75, 18.511820547963403 ], "wc_weaknesses_avg": [ 340.75, 406.20584375412426 ], "wc_questions_avg": [ 111.25, 76.13598032467961 ], "wc_limitations_avg": [ 6.5, 6.946221994724902 ], "wc_review_avg": [ 601.75, 496.12668493037137 ], "wc_reply_reviewers_avg": [ 136.5, 162.22284056198745 ], "wc_reply_authors_avg": [ 631.0, 1038.7576714518166 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.75, 2.48746859276655 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17277087908868106363&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "snu.ac.kr;kias.re.kr;snu.ac.kr", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Seoul National University;Korea Institute for Advanced Study", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;http://www.kaist.edu", "aff_unique_abbr": "SNU;KIAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Masked Image Residual Learning for Scaling Deeper Vision Transformers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72703", "id": "7WTA298wts", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b3bac97f3227c52c0179a6d967480867-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7WTA298wts", "openreview": "https://openreview.net/forum?id=7WTA298wts", "poster": "/media/PosterPDFs/NeurIPS%202023/72703.png?t=1699444796.542762", "slides": "https://nips.cc/virtual/2023/poster/72703", "video": "https://nips.cc/virtual/2023/poster/72703", "author_site": "Guoxi Huang, Hongtao Fu, Adrian G. Bors", "tldr": "", "abstract": "Deeper Vision Transformers (ViTs) are more challenging to train. We expose a degradation problem in deeper layers of ViT when using masked image modeling (MIM) for pre-training.\nTo ease the training of deeper ViTs, we introduce a self-supervised learning framework called $\\textbf{M}$asked $\\textbf{I}$mage $\\textbf{R}$esidual $\\textbf{L}$earning ($\\textbf{MIRL}$), which significantly alleviates the degradation problem, making scaling ViT along depth a promising direction for performance upgrade. We reformulate the pre-training objective for deeper layers of ViT as learning to recover the residual of the masked image.\nWe provide extensive empirical evidence showing that deeper ViTs can be effectively optimized using MIRL and easily gain accuracy from increased depth. \nWith the same level of computational complexity as ViT-Base and ViT-Large, we instantiate $4.5{\\times}$ and $2{\\times}$ deeper ViTs, dubbed ViT-S-54 and ViT-B-48.\nThe deeper ViT-S-54, costing $3{\\times}$ less than ViT-Large, achieves performance on par with ViT-Large.\nViT-B-48 achieves 86.2\\% top-1 accuracy on ImageNet. \nOn one hand, deeper ViTs pre-trained with MIRL exhibit excellent generalization capabilities on downstream tasks, such as object detection and semantic segmentation. On the other hand, MIRL demonstrates high pre-training efficiency. With less pre-training time, MIRL yields competitive performance compared to other approaches.", "keywords": "self-supervised learning;vision transformer;masked image modeling", "primary_area": "", "supplementary_material": "/attachment/bd1a333b61c75ee39dc0a4d48a4f6a8c87eec8e1.pdf", "author": "Guoxi Huang;Hongtao Fu;Adrian G. Bors", "authorids": "~Guoxi_Huang1;~Hongtao_Fu1;~Adrian_G._Bors1", "gender": "M;M;M", "homepage": "https://russellllaputa.github.io/;;https://www-users.cs.york.ac.uk/adrian/", "dblp": "258/5002;;94/1481", "google_scholar": "q16Hy-sAAAAJ;;https://scholar.google.co.uk/citations?user=cvdyalUAAAAJ", "orcid": "0000-0002-8481-0232;0000-0002-6692-0913;0000-0001-7838-0021", "linkedin": ";;adrian-bors-32a3668/", "or_profile": "~Guoxi_Huang1;~Hongtao_Fu1;~Adrian_G._Bors1", "aff": "University of York;Baidu;University of York", "aff_domain": "york.ac.uk;baidu.com;york.ac.uk", "position": "PhD student;Intern;Associate Professor", "bibtex": "@inproceedings{\nhuang2023masked,\ntitle={Masked Image Residual Learning for Scaling Deeper Vision Transformers},\nauthor={Guoxi Huang and Hongtao Fu and Adrian G. Bors},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7WTA298wts}\n}", "github": "", "project": "", "reviewers": "6uey;Pj1d;Cuwd;DfMD;igwC", "pdf_size": 2053584, "rating": "5;5;5;6;6", "confidence": "4;5;5;5;4", "soundness": "3;3;2;2;3", "novelty": "3;3;2;3;3", "presentation": "3;3;2;3;3", "wc_summary": "20;109;90;79;72", "wc_strengths": "4;51;47;48;80", "wc_weaknesses": "4;159;58;356;130", "wc_questions": "302;56;5;127;81", "wc_limitations": "4;9;1;6;4", "wc_review": "334;384;201;616;367", "wc_reply_reviewers": "14;25;16;180;35", "wc_reply_authors": "15;18;9;603;13", "reply_reviewers": "1;1;1;2;1", "reply_authors": "2;2;2;3;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 74.0, 29.752310834622577 ], "wc_strengths_avg": [ 46.0, 24.289915602982237 ], "wc_weaknesses_avg": [ 141.4, 120.2723575889323 ], "wc_questions_avg": [ 114.2, 101.83005450258779 ], "wc_limitations_avg": [ 4.8, 2.6381811916545836 ], "wc_review_avg": [ 380.4, 134.19180302835193 ], "wc_reply_reviewers_avg": [ 54.0, 63.438158863573584 ], "wc_reply_authors_avg": [ 131.6, 235.71813676507796 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.16666666666666666, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3382727797842648496&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "york.ac.uk;baidu.com;york.ac.uk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of York;Baidu", "aff_unique_dep": ";Baidu, Inc.", "aff_unique_url": "https://www.york.ac.uk;https://www.baidu.com", "aff_unique_abbr": "York;Baidu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;China" }, { "title": "Joint processing of linguistic properties in brains and language models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72702", "id": "7WeCyYy9TL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3a0e2de215bd17c39ad08ba1d16c1b12-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7WeCyYy9TL", "openreview": "https://openreview.net/forum?id=7WeCyYy9TL", "poster": "/media/PosterPDFs/NeurIPS%202023/72702.png?t=1701438883.8556097", "slides": "https://nips.cc/virtual/2023/poster/72702", "video": "https://nips.cc/virtual/2023/poster/72702", "author_site": "SUBBAREDDY OOTA, Manish Gupta, Mariya Toneva", "tldr": "", "abstract": "Language models have been shown to be very effective in predicting brain recordings of subjects experiencing complex language stimuli. For a deeper understanding of this alignment, it is important to understand the correspondence between the detailed processing of linguistic information by the human brain versus language models. We investigate this correspondence via a direct approach, in which we eliminate information related to specific linguistic properties in the language model representations and observe how this intervention affects the alignment with fMRI brain recordings obtained while participants listened to a story. We investigate a range of linguistic properties (surface, syntactic, and semantic) and find that the elimination of each one results in a significant decrease in brain alignment. Specifically, we find that syntactic properties (i.e. Top Constituents and Tree Depth) have the largest effect on the trend of brain alignment across model layers. These findings provide clear evidence for the role of specific linguistic information in the alignment between brain and language models, and open new avenues for mapping the joint information processing in both systems. We make the code publicly available https://github.com/subbareddy248/lingprop-brain-alignment.", "keywords": "Linguistic properties;fMRI;probing tasks;cognitive neuroscience;language models;NLP", "primary_area": "", "supplementary_material": "/attachment/b57c159056bff17e8e93b543171cdfa0adfb29c0.pdf", "author": "SUBBA REDDY OOTA;Manish Gupta;Mariya Toneva", "authorids": "~SUBBA_REDDY_OOTA1;~Manish_Gupta1;~Mariya_Toneva1", "gender": "M;M;F", "homepage": "https://sites.google.com/view/subbareddyoota300/home?authuser=0;https://sites.google.com/view/manishg/;https://mtoneva.com", "dblp": "190/1709;g/ManishGupta1.html;160/4677", "google_scholar": "https://scholar.google.co.in/citations?user=4Uz0LngAAAAJ;https://scholar.google.co.in/citations?user=eX9PSu0AAAAJ;https://scholar.google.ca/citations?user=a61sk-4AAAAJ", "orcid": "0000-0002-5975-622X;0000-0002-2843-3110;0000-0002-2407-9871", "linkedin": "subba-reddy-oota-11a91254/;manishsgupta/;", "or_profile": "~SUBBA_REDDY_OOTA1;~Manish_Gupta1;~Mariya_K_Toneva1", "aff": "MPI-SWS;Microsoft;Max Planck Institute for Software Systems", "aff_domain": "mpi-sws.org;microsoft.com;mpi-sws.org", "position": "Visiting Scholar;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\noota2023joint,\ntitle={Joint processing of linguistic properties in brains and language models},\nauthor={SUBBA REDDY OOTA and Manish Gupta and Mariya Toneva},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7WeCyYy9TL}\n}", "github": "", "project": "", "reviewers": "9Zaj;YHJW;p9mD;wwF4", "pdf_size": 7106752, "rating": "2;5;7;7", "confidence": "4;4;4;4", "soundness": "2;3;3;4", "novelty": "1;2;3;3", "presentation": "3;3;3;4", "wc_summary": "50;58;136;113", "wc_strengths": "7;120;36;122", "wc_weaknesses": "182;218;72;342", "wc_questions": "2;164;185;154", "wc_limitations": "2;93;27;1", "wc_review": "243;653;456;732", "wc_reply_reviewers": "0;35;73;121", "wc_reply_authors": "0;15;15;338", "reply_reviewers": "0;1;1;2", "reply_authors": "1;2;2;3", "rating_avg": [ 5.25, 2.0463381929681126 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 89.25, 36.28618883266745 ], "wc_strengths_avg": [ 71.25, 50.80046751753373 ], "wc_weaknesses_avg": [ 203.5, 96.36778507364377 ], "wc_questions_avg": [ 126.25, 72.60294415517872 ], "wc_limitations_avg": [ 30.75, 37.41907935799597 ], "wc_review_avg": [ 521.0, 189.37660890405658 ], "wc_reply_reviewers_avg": [ 57.25, 44.95761893161158 ], "wc_reply_authors_avg": [ 92.0, 142.16012099038184 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10695823774315022810&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "mpi-sws.org;microsoft.com;mpi-sws.org", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Max Planck Institute for Software Systems;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.mpi-sws.org;https://www.microsoft.com", "aff_unique_abbr": "MPI-SWS;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;United States" }, { "title": "Bounding the Invertibility of Privacy-preserving Instance Encoding using Fisher Information", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72701", "id": "7ZQiucQu2u", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a344f7f474958cc0775be7e46bc94309-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7ZQiucQu2u", "openreview": "https://openreview.net/forum?id=7ZQiucQu2u", "poster": "/media/PosterPDFs/NeurIPS%202023/72701.png?t=1701977739.4336016", "slides": "https://nips.cc/virtual/2023/poster/72701", "video": "https://nips.cc/virtual/2023/poster/72701", "author_site": "Kiwan Maeng, Chuan Guo, Sanjay Kariyappa, Sanjay Kariyappa, G. Edward Suh", "tldr": "", "abstract": "Privacy-preserving instance encoding aims to encode raw data into feature vectors without revealing their privacy-sensitive information. When designed properly, these encodings can be used for downstream ML applications such as training and inference with limited privacy risk. However, the vast majority of existing schemes do not theoretically justify that their encoding is non-invertible, and their privacy-enhancing properties are only validated empirically against a limited set of attacks. In this paper, we propose a theoretically-principled measure for the invertibility of instance encoding based on Fisher information that is broadly applicable to a wide range of popular encoders. We show that dFIL can be used to bound the invertibility of encodings both theoretically and empirically, providing an intuitive interpretation of the privacy of instance encoding.", "keywords": "privacy;instance encoding;split learning", "primary_area": "", "supplementary_material": "/attachment/f3f9a4a73e34127361154bbf9d563f157153e47c.pdf", "author": "Kiwan Maeng;Chuan Guo;Sanjay Kariyappa;G. Edward Suh", "authorids": "~Kiwan_Maeng1;~Chuan_Guo1;~Sanjay_Kariyappa1;~G._Edward_Suh2", "gender": "M;M;M;", "homepage": "https://kiwanmaeng.com;https://sites.google.com/view/chuanguo;https://sanjaykariyappa.github.io/;", "dblp": "200/5551;;223/6062;", "google_scholar": "pd9rcl0AAAAJ;0gp5M-kAAAAJ;qd9U-h4AAAAJ;", "orcid": ";;;", "linkedin": ";;sanjay-kariyappa-74583924/;", "or_profile": "~Kiwan_Maeng1;~Chuan_Guo1;~Sanjay_Kariyappa1;~G._Edward_Suh2", "aff": "Pennsylvania State University;Meta;J.P. Morgan Chase;", "aff_domain": "psu.edu;meta.com;jpmorganchase.com;", "position": "Assistant Professor;Researcher;Researcher;", "bibtex": "@inproceedings{\nmaeng2023bounding,\ntitle={Bounding the Invertibility of Privacy-preserving Instance Encoding using Fisher Information},\nauthor={Kiwan Maeng and Chuan Guo and Sanjay Kariyappa and G. Edward Suh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7ZQiucQu2u}\n}", "github": "", "project": "", "reviewers": "yPPy;k1TB;AWfY;KGoq", "pdf_size": 3339989, "rating": "5;6;7;7", "confidence": "2;3;3;3", "soundness": "3;3;4;3", "novelty": "2;3;2;3", "presentation": "2;3;4;4", "wc_summary": "91;363;110;244", "wc_strengths": "105;87;57;207", "wc_weaknesses": "181;139;113;264", "wc_questions": "2;33;118;100", "wc_limitations": "1;28;11;222", "wc_review": "380;650;409;1037", "wc_reply_reviewers": "21;4;40;42", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 202.0, 110.07951671405539 ], "wc_strengths_avg": [ 114.0, 56.36488268416781 ], "wc_weaknesses_avg": [ 174.25, 57.21614719639903 ], "wc_questions_avg": [ 63.25, 47.47301865270419 ], "wc_limitations_avg": [ 65.5, 90.869411795169 ], "wc_review_avg": [ 619.0, 263.1092928803542 ], "wc_reply_reviewers_avg": [ 26.75, 15.481844205391036 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6858034933025473245&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "psu.edu;meta.com;jpmorganchase.com;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Pennsylvania State University;Meta;JPMorgan Chase & Co.", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.psu.edu;https://meta.com;https://www.jpmorganchase.com", "aff_unique_abbr": "PSU;Meta;JPM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Information Theoretic Lower Bounds for Information Theoretic Upper Bounds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72700", "id": "7anW5TWbCJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/768396006e9214568dba5aae9dd312c5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7anW5TWbCJ", "openreview": "https://openreview.net/forum?id=7anW5TWbCJ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72700", "video": "https://nips.cc/virtual/2023/poster/72700", "tldr": "", "abstract": "We examine the relationship between the mutual information between the output model and the empirical sample and the algorithm's generalization in the context of stochastic convex optimization. Despite increasing interest in information-theoretic generalization bounds, it is uncertain if these bounds can provide insight into the exceptional performance of various learning algorithms. Our study of stochastic convex optimization reveals that, for true risk minimization, dimension-dependent mutual information is necessary. This indicates that existing information-theoretic generalization bounds fall short in capturing the generalization capabilities of algorithms like SGD and regularized ERM, which have dimension-independent sample complexity.", "keywords": "Learning Theory", "primary_area": "", "supplementary_material": "/attachment/96420251b7bd67d74255ea0be70e0fd6b81d60c1.pdf", "author": "Roi Livni", "authorids": "~Roi_Livni1", "gender": "Not Specified", "homepage": "https://www.rlivni.sites.tau.ac.il/", "dblp": "59/11348", "google_scholar": "xhU85M4AAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Roi_Livni1", "aff": "Tel Aviv University", "aff_domain": "tau.ac.il", "position": "Assistant Professor", "bibtex": "@inproceedings{\nlivni2023information,\ntitle={Information Theoretic Lower Bounds for Information Theoretic Upper Bounds},\nauthor={Roi Livni},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7anW5TWbCJ}\n}", "github": "", "project": "", "reviewers": "z3UU;dYAB;UMeF;VAde", "pdf_size": 287122, "rating": "5;6;6;7", "confidence": "3;4;3;3", "soundness": "2;2;3;3", "novelty": "3;2;3;4", "presentation": "2;3;2;3", "wc_summary": "138;76;401;38", "wc_strengths": "85;42;118;130", "wc_weaknesses": "623;262;568;1", "wc_questions": "326;54;114;1", "wc_limitations": "3;7;84;1", "wc_review": "1175;441;1285;171", "wc_reply_reviewers": "86;144;315;0", "wc_reply_authors": "17;39;245;0", "reply_reviewers": "1;1;2;0", "reply_authors": "2;2;3;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 163.25, 141.82978354351388 ], "wc_strengths_avg": [ 93.75, 34.120191968979306 ], "wc_weaknesses_avg": [ 363.5, 250.4341230743127 ], "wc_questions_avg": [ 123.75, 123.42279975758126 ], "wc_limitations_avg": [ 23.75, 34.85236720798173 ], "wc_review_avg": [ 768.0, 473.3592715897725 ], "wc_reply_reviewers_avg": [ 136.25, 115.2179998958496 ], "wc_reply_authors_avg": [ 75.25, 98.97569146007518 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2425129870532972267&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tau.ac.il", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Tel Aviv University", "aff_unique_dep": "", "aff_unique_url": "https://www.tau.ac.il", "aff_unique_abbr": "TAU", "aff_country_unique_index": "0", "aff_country_unique": "Israel" }, { "title": "Generalized equivalences between subsampling and ridge regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72699", "id": "7aoVQkNmQ6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f976982cd1c1b9e076c096787ef6652e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7aoVQkNmQ6", "openreview": "https://openreview.net/forum?id=7aoVQkNmQ6", "poster": "/media/PosterPDFs/NeurIPS%202023/72699.png?t=1698067910.5506594", "slides": "https://nips.cc/virtual/2023/poster/72699", "video": "https://nips.cc/virtual/2023/poster/72699", "author_site": "Pratik Patil, Jin-Hong Du", "tldr": "", "abstract": "We establish precise structural and risk equivalences between subsampling and ridge regularization for ensemble ridge estimators. Specifically, we prove that linear and quadratic functionals of subsample ridge estimators, when fitted with different ridge regularization levels $\\lambda$ and subsample aspect ratios $\\psi$, are asymptotically equivalent along specific paths in the $(\\lambda,\\psi)$-plane (where $\\psi$ is the ratio of the feature dimension to the subsample size). Our results only require bounded moment assumptions on feature and response distributions and allow for arbitrary joint distributions. Furthermore, we provide a data-dependent method to determine the equivalent paths of $(\\lambda,\\psi)$. An indirect implication of our equivalences is that optimally tuned ridge regression exhibits a monotonic prediction risk in the data aspect ratio. This resolves a recent open problem raised by Nakkiran et al. for general data distributions under proportional asymptotics, assuming a mild regularity condition that maintains regression hardness through linearized signal-to-noise ratios.", "keywords": "subsampling;ridge regularization;asymptotic equivalences;proportional asymptotics", "primary_area": "", "supplementary_material": "/attachment/67d23eb347e3e33e5ba3d555d81a9e86220e8414.zip", "author": "Pratik Patil;Jin-Hong Du", "authorids": "~Pratik_Patil1;~Jin-Hong_Du1", "gender": ";", "homepage": "https://www.stat.berkeley.edu/~pratikpatil/;", "dblp": "48/2268;", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Pratik_Patil1;~Jin-Hong_Du1", "aff": "University of California, Berkeley;", "aff_domain": "berkeley.edu;", "position": "Postdoc;", "bibtex": "@inproceedings{\npatil2023generalized,\ntitle={Generalized equivalences between subsampling and ridge regularization},\nauthor={Pratik Patil and Jin-Hong Du},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7aoVQkNmQ6}\n}", "github": "", "project": "", "reviewers": "wEHo;DNS5;96Gw;nskG;MtFr", "pdf_size": 3959583, "rating": "6;6;7;7;10", "confidence": "3;4;3;2;5", "soundness": "4;3;3;4;4", "novelty": "3;2;3;3;4", "presentation": "4;3;3;3;4", "wc_summary": "147;95;165;109;77", "wc_strengths": "147;97;40;61;50", "wc_weaknesses": "152;199;10;45;53", "wc_questions": "318;51;32;128;42", "wc_limitations": "130;1;51;15;4", "wc_review": "894;443;298;358;226", "wc_reply_reviewers": "191;90;34;18;21", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.2, 1.469693845669907 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 118.6, 32.67782122480016 ], "wc_strengths_avg": [ 79.0, 39.07428822128434 ], "wc_weaknesses_avg": [ 91.8, 71.4182049620403 ], "wc_questions_avg": [ 114.2, 107.41396557245245 ], "wc_limitations_avg": [ 40.2, 48.29658373011491 ], "wc_review_avg": [ 443.8, 236.1087884853082 ], "wc_reply_reviewers_avg": [ 70.8, 65.48098960767163 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6138245895861673, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6356022470297847676&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "berkeley.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Bias in Evaluation Processes: An Optimization-Based Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72698", "id": "7b4oobeB4w", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e4748b6b6ca49f04b6a8cfce1d5f9a70-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7b4oobeB4w", "openreview": "https://openreview.net/forum?id=7b4oobeB4w", "poster": "/media/PosterPDFs/NeurIPS%202023/72698.png?t=1702402153.8540585", "slides": "https://nips.cc/virtual/2023/poster/72698", "video": "https://nips.cc/virtual/2023/poster/72698", "author_site": "L. Elisa Celis, Amit Kumar, Anay Mehrotra, Nisheeth K. Vishnoi", "tldr": "", "abstract": "Biases with respect to socially-salient attributes of individuals have been well documented in evaluation processes used in settings such as admissions and hiring. We view such an evaluation process as a transformation of a distribution of the true utility of an individual for a task to an observed distribution and model it as a solution to a loss minimization problem subject to an information constraint. Our model has two parameters that have been identified as factors leading to biases: the resource-information trade-off parameter in the information constraint and the risk-averseness parameter in the loss function. We characterize the distributions that arise from our model and study the effect of the parameters on the observed distribution. The outputs of our model enrich the class of distributions that can be used to capture variation across groups in the observed evaluations. We empirically validate our model by fitting real-world datasets and use it to study the effect of interventions in a downstream selection task. These results contribute to an understanding of the emergence of bias in evaluation processes and provide tools to guide the deployment of interventions to mitigate biases.", "keywords": "bias;evaluation;maximum entropy;selection", "primary_area": "", "supplementary_material": "", "author": "L. Elisa Celis;Amit Kumar;Anay Mehrotra;Nisheeth K Vishnoi", "authorids": "~L._Elisa_Celis2;~Amit_Kumar7;~Anay_Mehrotra1;~Nisheeth_K_Vishnoi1", "gender": "M;;M;F", "homepage": "http://www.cse.iitd.ac.in/~amitk/;;http://cs.yale.edu/homes/vishnoi/Home.html;https://datascienceethics.wordpress.com/elisacelis/", "dblp": "k/AmitKumar1.html;234/8808;02/2229;90/7216", "google_scholar": "https://scholar.google.co.in/citations?user=Qu97aMEAAAAJ;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Amit_Kumar7;~Anay_Mehrotra1;~Nisheeth_K_Vishnoi1;~Elisa_CELIS1", "aff": ";Yale University;Yale University;Yale University", "aff_domain": ";yale.edu;yale.edu;yale.edu", "position": ";PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ncelis2023bias,\ntitle={Bias in Evaluation Processes: An Optimization-Based Model},\nauthor={L. Elisa Celis and Amit Kumar and Anay Mehrotra and Nisheeth K Vishnoi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7b4oobeB4w}\n}", "github": "", "project": "", "reviewers": "Aa19;7ABm;2dLt;gzsA;G1nQ", "pdf_size": 1613770, "rating": "4;4;5;6;6", "confidence": "3;1;3;4;2", "soundness": "2;2;3;3;3", "novelty": "2;2;3;2;3", "presentation": "2;1;4;3;2", "wc_summary": "92;22;113;587;50", "wc_strengths": "63;41;48;173;33", "wc_weaknesses": "457;154;63;505;83", "wc_questions": "12;2;71;94;22", "wc_limitations": "1;32;139;90;26", "wc_review": "625;251;434;1449;214", "wc_reply_reviewers": "169;10;0;168;0", "wc_reply_authors": "1117;0;0;0;0", "reply_reviewers": "2;1;0;1;0", "reply_authors": "3;1;1;1;1", "rating_avg": [ 5.0, 0.8944271909999159 ], "confidence_avg": [ 2.6, 1.019803902718557 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 1.019803902718557 ], "wc_summary_avg": [ 172.8, 209.51696828658055 ], "wc_strengths_avg": [ 71.6, 51.65113745117333 ], "wc_weaknesses_avg": [ 252.4, 189.69406949085152 ], "wc_questions_avg": [ 40.2, 35.85749572962395 ], "wc_limitations_avg": [ 57.6, 50.066355968853976 ], "wc_review_avg": [ 594.6, 451.58237343811373 ], "wc_reply_reviewers_avg": [ 69.4, 80.99777774729378 ], "wc_reply_authors_avg": [ 223.4, 446.80000000000007 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4385290096535146, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14274348622141895677&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";yale.edu;yale.edu;yale.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "HOH: Markerless Multimodal Human-Object-Human Handover Dataset with Large Object Count", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73686", "id": "7bghy0Gq75", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d8c6a37c4c94e9a63e53d296f1f668ae-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=7bghy0Gq75", "openreview": "https://openreview.net/forum?id=7bghy0Gq75", "poster": "/media/PosterPDFs/NeurIPS%202023/73686.png?t=1701715840.574931", "slides": "https://nips.cc/virtual/2023/poster/73686", "video": "https://nips.cc/virtual/2023/poster/73686", "author_site": "Noah Wiederhold, Ava Megyeri, DiMaggio Paris, Sean Banerjee, Natasha Banerjee", "tldr": "", "abstract": "We present the HOH (Human-Object-Human) Handover Dataset, a large object count dataset with 136 objects, to accelerate data-driven research on handover studies, human-robot handover implementation, and artificial intelligence (AI) on handover parameter estimation from 2D and 3D data of two-person interactions. HOH contains multi-view RGB and depth data, skeletons, fused point clouds, grasp type and handedness labels, object, giver hand, and receiver hand 2D and 3D segmentations, giver and receiver comfort ratings, and paired object metadata and aligned 3D models for 2,720 handover interactions spanning 136 objects and 20 giver-receiver pairs\u201440 with role-reversal\u2014organized from 40 participants. We also show experimental results of neural networks trained using HOH to perform grasp, orientation, and trajectory prediction. As the only fully markerless handover capture dataset, HOH represents natural human-human handover interactions, overcoming challenges with markered datasets that require specific suiting for body tracking, and lack high-resolution hand tracking. To date, HOH is the largest handover dataset in terms of object count, participant count, pairs with role reversal accounted for, and total interactions captured.", "keywords": "human-human;handover;dataset;handoff;human-robot interaction;hand-object interactions;multimodal human data collection", "primary_area": "", "supplementary_material": "/attachment/6e30adcca621136882d70fd16f78f86bcef33eea.pdf", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nwiederhold2023hoh,\ntitle={{HOH}: Markerless Multimodal Human-Object-Human Handover Dataset with Large Object Count},\nauthor={Noah Wiederhold and Ava Megyeri and DiMaggio Paris and Sean Banerjee and Natasha Kholgade Banerjee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=7bghy0Gq75}\n}", "github": "", "project": "", "reviewers": "oQbw;e1Fp;MkyD", "pdf_size": 5836262, "rating": "6;7;8", "confidence": "3;4;3", "wc_summary_and_contributions": "52;50;60", "wc_strengths": "58;32;52", "wc_improvement": "28;294;32", "wc_limitations": "1;1;10", "wc_correctness": "1;8;7", "wc_clarity": "1;1;5", "wc_relation_to_prior_work": "1;6;10", "wc_documentation": "1;1;11", "wc_additional_feedback": "1;1;1", "wc_review": "144;394;188", "wc_reply_reviewers": "55;101;0", "wc_reply_authors": "707;887;91", "reply_reviewers": "2;2;0", "reply_authors": "2;2;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 54.0, 4.320493798938574 ], "wc_strengths_avg": [ 47.333333333333336, 11.115554667022044 ], "wc_improvement_avg": [ 118.0, 124.46150676681793 ], "wc_limitations_avg": [ 4.0, 4.242640687119285 ], "wc_correctness_avg": [ 5.333333333333333, 3.0912061651652345 ], "wc_clarity_avg": [ 2.3333333333333335, 1.8856180831641267 ], "wc_relation_to_prior_work_avg": [ 5.666666666666667, 3.6817870057290873 ], "wc_documentation_avg": [ 4.333333333333333, 4.714045207910317 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 242.0, 108.97094413955799 ], "wc_reply_reviewers_avg": [ 52.0, 41.28760911782937 ], "wc_reply_authors_avg": [ 561.6666666666666, 340.82775056552475 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820634 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3637616804413108001&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "", "author_num": 1 }, { "title": "Beyond Geometry: Comparing the Temporal Structure of Computation in Neural Circuits with Dynamical Similarity Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72697", "id": "7blSUMwe7R", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6ac807c9b296964409b277369e55621a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7blSUMwe7R", "openreview": "https://openreview.net/forum?id=7blSUMwe7R", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72697", "video": "https://nips.cc/virtual/2023/poster/72697", "author_site": "Mitchell Ostrow, Adam Eisen, Leo Kozachkov, Ila Fiete", "tldr": "", "abstract": "How can we tell whether two neural networks utilize the same internal processes for a particular computation? This question is pertinent for multiple subfields of neuroscience and machine learning, including neuroAI, mechanistic interpretability, and brain-machine interfaces. Standard approaches for comparing neural networks focus on the spatial geometry of latent states. Yet in recurrent networks, computations are implemented at the level of dynamics, and two networks performing the same computation with equivalent dynamics need not exhibit the same geometry. To bridge this gap, we introduce a novel similarity metric that compares two systems at the level of their dynamics, called Dynamical Similarity Analysis (DSA). Our method incorporates two components: Using recent advances in data-driven dynamical systems theory, we learn a high-dimensional linear system that accurately captures core features of the original nonlinear dynamics. Next, we compare different systems passed through this embedding using a novel extension of Procrustes Analysis that accounts for how vector fields change under orthogonal transformation. In four case studies, we demonstrate that our method disentangles conjugate and non-conjugate recurrent neural networks (RNNs), while geometric methods fall short. We additionally show that our method can distinguish learning rules in an unsupervised manner. Our method opens the door to comparative analyses of the essential temporal structure of computation in neural circuits.", "keywords": "Computational Neuroscience;Neural Data Analysis;Statistical Shape Metrics;Representational Similarity Analysis;Recurrent Neural Networks;Dynamical Systems", "primary_area": "", "supplementary_material": "/attachment/db1dcd794a3ca96df662213741099f15f6e2eaed.pdf", "author": "Mitchell Ostrow;Adam Joseph Eisen;Leo Kozachkov;Ila R Fiete", "authorids": "~Mitchell_Ostrow2;~Adam_Joseph_Eisen1;~Leo_Kozachkov1;~Ila_R_Fiete1", "gender": ";M;;F", "homepage": "https://mitchellostrow.github.io;https://adamjeisen.github.io/;https://kozleo.github.io/;https://fietelab.mit.edu/", "dblp": ";349/8299;195/5971;", "google_scholar": "WRIrZ2cAAAAJ;pEJ4yfIAAAAJ;V5dtdeUAAAAJ;uE-CihIAAAAJ", "orcid": ";0009-0005-3091-7586;0000-0003-4330-1201;0000-0003-4738-2539", "linkedin": "https://linkedin.com/in/mitchell-ostrow;adam-eisen-87461712b/;;", "or_profile": "~Mitchell_Ostrow2;~Adam_Joseph_Eisen1;~Leo_Kozachkov1;~Ila_R_Fiete1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;Postdoc;Professor", "bibtex": "@inproceedings{\nostrow2023beyond,\ntitle={Beyond Geometry: Comparing the Temporal Structure of Computation in Neural Circuits with Dynamical Similarity Analysis},\nauthor={Mitchell Ostrow and Adam Joseph Eisen and Leo Kozachkov and Ila R Fiete},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7blSUMwe7R}\n}", "github": "", "project": "", "reviewers": "mRC1;f1wF;TLG4;cz18", "pdf_size": 3109564, "rating": "5;6;7;8", "confidence": "3;5;4;4", "soundness": "3;3;4;4", "novelty": "3;2;3;4", "presentation": "3;3;3;4", "wc_summary": "119;73;81;72", "wc_strengths": "109;38;26;142", "wc_weaknesses": "183;122;484;110", "wc_questions": "86;138;414;4", "wc_limitations": "12;34;10;5", "wc_review": "509;405;1015;333", "wc_reply_reviewers": "59;753;468;0", "wc_reply_authors": "0;227;510;0", "reply_reviewers": "1;1;2;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.25, 19.22725929507375 ], "wc_strengths_avg": [ 78.75, 48.37031631072925 ], "wc_weaknesses_avg": [ 224.75, 152.21592393701783 ], "wc_questions_avg": [ 160.5, 153.95697450911408 ], "wc_limitations_avg": [ 15.25, 11.121488209767612 ], "wc_review_avg": [ 565.5, 266.9545841524359 ], "wc_reply_reviewers_avg": [ 320.0, 308.1858205693442 ], "wc_reply_authors_avg": [ 184.25, 209.66446408488017 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3162277660168379, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12355134432668928878&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Certification of Distributional Individual Fairness", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72696", "id": "7cnMLZvTy9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/57d8ebf4c2f050a6485f370d47656a9e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7cnMLZvTy9", "openreview": "https://openreview.net/forum?id=7cnMLZvTy9", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72696", "video": "https://nips.cc/virtual/2023/poster/72696", "author_site": "Matthew Wicker, Vihari Piratla, Adrian Weller", "tldr": "", "abstract": "Providing formal guarantees of algorithmic fairness is of paramount importance to socially responsible deployment of machine learning algorithms. In this work, we study formal guarantees, i.e., certificates, for individual fairness (IF) of neural networks. We start by introducing a novel convex approximation of IF constraints that exponentially decreases the computational cost of providing formal guarantees of local individual fairness. We highlight that prior methods are constrained by their focus on global IF certification and can therefore only scale to models with a few dozen hidden neurons, thus limiting their practical impact. We propose to certify \\textit{distributional} individual fairness which ensures that for a given empirical distribution and all distributions within a $\\gamma$-Wasserstein ball, the neural network has guaranteed individually fair predictions. Leveraging developments in quasi-convex optimization, we provide novel and efficient certified bounds on distributional individual fairness and show that our method allows us to certify and regularize neural networks that are several orders of magnitude larger than those considered by prior works. Moreover, we study real-world distribution shifts and find our bounds to be a scalable, practical, and sound source of IF guarantees.", "keywords": "Fairness;Individual Fairness;Deep Learning;Certification;Trustworthy ML", "primary_area": "", "supplementary_material": "/attachment/f6cd4a0cdfbbd4c63cc23120d56c61287baf6bb5.pdf", "author": "Matthew Robert Wicker;Vihari Piratla;Adrian Weller", "authorids": "~Matthew_Robert_Wicker1;~Vihari_Piratla1;~Adrian_Weller1", "gender": "M;M;M", "homepage": "https://www.matthewwicker.org;https://vihari.github.io/;http://mlg.eng.cam.ac.uk/adrian/", "dblp": "207/7909.html;161/3626;73/8324", "google_scholar": "_0qEDNIAAAAJ;https://scholar.google.co.in/citations?user=DQddccYAAAAJ;https://scholar.google.co.uk/citations?user=Ek4hM10AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Matthew_Robert_Wicker1;~Vihari_Piratla1;~Adrian_Weller1", "aff": "Alan Turing Institute;University of Cambridge;University of Cambridge", "aff_domain": "turing.ac.uk;cam.ac.uk;cam.ac.uk", "position": "Postdoc;Postdoc;Principal Researcher", "bibtex": "@inproceedings{\nwicker2023certification,\ntitle={Certification of Distributional Individual Fairness},\nauthor={Matthew Robert Wicker and Vihari Piratla and Adrian Weller},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7cnMLZvTy9}\n}", "github": "", "project": "", "reviewers": "xyNq;Lpfq;FRnw", "pdf_size": 1383722, "rating": "6;6;7", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;2", "wc_summary": "71;64;213", "wc_strengths": "17;37;64", "wc_weaknesses": "41;31;46", "wc_questions": "26;406;39", "wc_limitations": "1;81;1", "wc_review": "156;619;363", "wc_reply_reviewers": "22;701;0", "wc_reply_authors": "0;782;0", "reply_reviewers": "1;2;0", "reply_authors": "1;2;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 116.0, 68.64886500639808 ], "wc_strengths_avg": [ 39.333333333333336, 19.258475767539053 ], "wc_weaknesses_avg": [ 39.333333333333336, 6.236095644623236 ], "wc_questions_avg": [ 157.0, 176.14955766809825 ], "wc_limitations_avg": [ 27.666666666666668, 37.71236166328253 ], "wc_review_avg": [ 379.3333333333333, 189.37147503136674 ], "wc_reply_reviewers_avg": [ 241.0, 325.393095603866 ], "wc_reply_authors_avg": [ 260.6666666666667, 368.6383352585868 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8162301771354161379&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "turing.ac.uk;cam.ac.uk;cam.ac.uk", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Alan Turing Institute;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://www.turing.ac.uk;https://www.cam.ac.uk", "aff_unique_abbr": "ATI;Cambridge", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Random-Access Infinite Context Length for Transformers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72695", "id": "7eHn64wOVy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ab05dc8bf36a9f66edbff6992ec86f56-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7eHn64wOVy", "openreview": "https://openreview.net/forum?id=7eHn64wOVy", "poster": "/media/PosterPDFs/NeurIPS%202023/72695.png?t=1701900795.2738628", "slides": "https://nips.cc/virtual/2023/poster/72695", "video": "https://nips.cc/virtual/2023/poster/72695", "author_site": "Amirkeivan Mohtashami, Martin Jaggi", "tldr": "", "abstract": "While Transformers have shown remarkable success in natural language processing, their attention mechanism's large memory requirements have limited their ability to handle longer contexts. Prior approaches, such as recurrent memory or retrieval-based augmentation, have either compromised the random-access flexibility of attention (i.e., the capability to select any token in the entire context) or relied on separate mechanisms for relevant context retrieval, which may not be compatible with the model's attention. In this paper, we present a novel approach that allows access to the complete context while retaining random-access flexibility, closely resembling running attention on the entire context. Our method uses a landmark token to represent each block of the input and trains the attention to use it for selecting relevant blocks, enabling retrieval of blocks directly through the attention mechanism instead of by relying on a separate mechanism. Our approach seamlessly integrates with specialized data structures and the system's memory hierarchy, enabling processing of arbitrarily long context lengths. We demonstrate that our method can obtain comparable performance with Transformer-XL while significantly reducing the number of retrieved tokens in each step. Finally, we show that fine-tuning LLaMA 7B with our method successfully extends its context length capacity to over 32k tokens, allowing for inference at the context lengths of GPT-4. We release the implementation of landmark attention and the code to reproduce our experiments at https://github.com/epfml/landmark-attention/.", "keywords": "large language models;memory;context length", "primary_area": "", "supplementary_material": "", "author": "Amirkeivan Mohtashami;Martin Jaggi", "authorids": "~Amirkeivan_Mohtashami1;~Martin_Jaggi1", "gender": "M;M", "homepage": ";https://mlo.epfl.ch", "dblp": "271/7873;17/4402", "google_scholar": "YT1udC0AAAAJ;https://scholar.google.ch/citations?user=r1TJBr8AAAAJ", "orcid": ";0000-0003-1579-5558", "linkedin": ";", "or_profile": "~Amirkeivan_Mohtashami1;~Martin_Jaggi1", "aff": "Swiss Federal Institute of Technology Lausanne;EPFL", "aff_domain": "epfl.ch;epfl.ch", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nmohtashami2023randomaccess,\ntitle={Random-Access Infinite Context Length for Transformers},\nauthor={Amirkeivan Mohtashami and Martin Jaggi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7eHn64wOVy}\n}", "github": "", "project": "", "reviewers": "tGXa;g7s5;fLCX;Hd8A;DxW4", "pdf_size": 458823, "rating": "5;5;5;5;8", "confidence": "4;5;4;5;5", "soundness": "2;2;3;2;4", "novelty": "3;3;3;3;4", "presentation": "3;2;4;3;3", "wc_summary": "169;130;82;72;235", "wc_strengths": "37;158;77;81;142", "wc_weaknesses": "675;250;323;203;38", "wc_questions": "353;147;43;87;78", "wc_limitations": "1;12;1;28;16", "wc_review": "1235;697;526;471;509", "wc_reply_reviewers": "1158;215;478;47;0", "wc_reply_authors": "691;0;396;0;0", "reply_reviewers": "2;1;1;1;0", "reply_authors": "2;1;2;1;1", "rating_avg": [ 5.6, 1.2 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 137.6, 59.87520354871455 ], "wc_strengths_avg": [ 99.0, 44.68109219793089 ], "wc_weaknesses_avg": [ 297.8, 210.5624847877703 ], "wc_questions_avg": [ 141.6, 110.8757863557233 ], "wc_limitations_avg": [ 11.6, 10.131140113531151 ], "wc_review_avg": [ 687.6, 284.48662534467235 ], "wc_reply_reviewers_avg": [ 379.6, 423.65108285002646 ], "wc_reply_authors_avg": [ 217.4, 282.1287649283568 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11933022875355490226&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "epfl.ch;epfl.ch", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Information Maximizing Curriculum: A Curriculum-Based Approach for Learning Versatile Skills", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72694", "id": "7eW6NzSE4g", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a1e6783e4d739196cad3336f12d402bf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7eW6NzSE4g", "openreview": "https://openreview.net/forum?id=7eW6NzSE4g", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72694", "video": "https://nips.cc/virtual/2023/poster/72694", "author_site": "Denis Blessing, Onur Celik, Xiaogang Jia, Xiaogang Jia, Moritz Reuss, Maximilian Li, Rudolf Lioutikov, Gerhard Neumann", "tldr": "", "abstract": "Imitation learning uses data for training policies to solve complex tasks. However,\nwhen the training data is collected from human demonstrators, it often leads\nto multimodal distributions because of the variability in human actions. Most\nimitation learning methods rely on a maximum likelihood (ML) objective to learn\na parameterized policy, but this can result in suboptimal or unsafe behavior due\nto the mode-averaging property of the ML objective. In this work, we propose\nInformation Maximizing Curriculum, a curriculum-based approach that assigns\na weight to each data point and encourages the model to specialize in the data it\ncan represent, effectively mitigating the mode-averaging problem by allowing the\nmodel to ignore data from modes it cannot represent. To cover all modes and thus,\nenable versatile behavior, we extend our approach to a mixture of experts (MoE)\npolicy, where each mixture component selects its own subset of the training data\nfor learning. A novel, maximum entropy-based objective is proposed to achieve\nfull coverage of the dataset, thereby enabling the policy to encompass all modes\nwithin the data distribution. We demonstrate the effectiveness of our approach on\ncomplex simulated control tasks using versatile human demonstrations, achieving\nsuperior performance compared to state-of-the-art methods.", "keywords": "Imitation Learning;Verstile Skill Learning;Curriculum Learning", "primary_area": "", "supplementary_material": "/attachment/755a8718275b84b41eee0c94eddd4db53ec5167f.zip", "author": "Denis Blessing;Onur Celik;Xiaogang Jia;Moritz Reuss;Maximilian Xiling Li;Rudolf Lioutikov;Gerhard Neumann", "authorids": "~Denis_Blessing1;~Onur_Celik1;~Xiaogang_Jia1;~Moritz_Reuss1;~Maximilian_Xiling_Li1;~Rudolf_Lioutikov1;~Gerhard_Neumann2", "gender": "M;M;M;M;M;M;M", "homepage": ";https://alr.anthropomatik.kit.edu/21_69.php;https://xiaogangjia.github.io/Personal_Website/;;;https://rudolf.intuitive-robots.net;https://alr.anthropomatik.kit.edu/", "dblp": "219/1435;243/5913;23/10777;321/1769;218/0460;151/9451;60/4878", "google_scholar": "https://scholar.google.de/citations?view_op=list_works;9jqaTcAAAAAJ;E7Tja9gAAAAJ;NLuzkPIAAAAJ;N8vw1IAAAAAJ;hvjV43MAAAAJ;https://scholar.google.com.tw/citations?user=GL360kMAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;rudolf-lioutikov-74830730a/;", "or_profile": "~Denis_Blessing1;~Onur_Celik1;~Xiaogang_Jia1;~Moritz_Reuss1;~Maximilian_Xiling_Li1;~Rudolf_Lioutikov1;~Gerhard_Neumann1", "aff": "Karlsruher Institut f\u00fcr Technologie;Karlsruhe Institute of Technology;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruhe Institute of Technology", "aff_domain": "kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Tenure-Track Professor;Full Professor", "bibtex": "@inproceedings{\nblessing2023information,\ntitle={Information Maximizing Curriculum: A Curriculum-Based Approach for Learning Versatile Skills},\nauthor={Denis Blessing and Onur Celik and Xiaogang Jia and Moritz Reuss and Maximilian Xiling Li and Rudolf Lioutikov and Gerhard Neumann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7eW6NzSE4g}\n}", "github": "", "project": "", "reviewers": "kzo3;tAUb;EzYZ;kVDY", "pdf_size": 8257981, "rating": "6;6;6;6", "confidence": "3;3;3;4", "soundness": "3;2;3;3", "novelty": "3;2;2;3", "presentation": "3;3;3;2", "wc_summary": "73;121;85;152", "wc_strengths": "64;36;56;35", "wc_weaknesses": "45;280;227;796", "wc_questions": "26;68;87;4", "wc_limitations": "6;72;3;69", "wc_review": "214;577;458;1056", "wc_reply_reviewers": "20;52;0;332", "wc_reply_authors": "0;0;0;1258", "reply_reviewers": "1;1;0;2", "reply_authors": "1;1;1;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 107.75, 31.059418861272984 ], "wc_strengths_avg": [ 47.75, 12.577261228105266 ], "wc_weaknesses_avg": [ 337.0, 278.9686362299533 ], "wc_questions_avg": [ 46.25, 32.89661836724255 ], "wc_limitations_avg": [ 37.5, 33.03407331831786 ], "wc_review_avg": [ 576.25, 306.3367224150575 ], "wc_reply_reviewers_avg": [ 101.0, 134.65140177510222 ], "wc_reply_authors_avg": [ 314.5, 544.7299789804119 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15992502702881065253&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu", "author_num": 7, "aff_unique_index": "0;1;0;0;0;0;1", "aff_unique_norm": "Karlsruher Institut f\u00fcr Technologie;Karlsruhe Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.kit.edu;https://www.kit.edu", "aff_unique_abbr": "KIT;KIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Multi-Agent Learning with Heterogeneous Linear Contextual Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72693", "id": "7f6vH3mmhr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f8d39584f87944e5dbe46ec76f19e20a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7f6vH3mmhr", "openreview": "https://openreview.net/forum?id=7f6vH3mmhr", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72693", "video": "https://nips.cc/virtual/2023/poster/72693", "author_site": "Anh Do, Thanh Nguyen-Tang, Raman Arora", "tldr": "", "abstract": "As trained intelligent systems become increasingly pervasive, multiagent learning has emerged as a popular framework for studying complex interactions between autonomous agents. Yet, a formal understanding of how and when learners in heterogeneous environments benefit from sharing their respective experiences is far from complete. In this paper, we seek answers to these questions in the context of linear contextual bandits. We present a novel distributed learning algorithm based on the upper confidence bound (UCB) algorithm, which we refer to as H-LINUCB, wherein agents cooperatively minimize the group regret under the coordination of a central server. In the setting where the level of heterogeneity or dissimilarity across the environments is known to the agents, we show that H-LINUCB is provably optimal in regimes where the tasks are highly similar or highly dissimilar.", "keywords": "Multi-agent;Bandits;Cooperative", "primary_area": "", "supplementary_material": "", "author": "Anh Do;Thanh Nguyen-Tang;Raman Arora", "authorids": "~Anh_Do2;~Thanh_Nguyen-Tang1;~Raman_Arora1", "gender": ";M;M", "homepage": "https://anhddo.github.io;http://www.cs.jhu.edu/~raman/Home.html;https://thanhnguyentang.github.io/", "dblp": ";;287/5102.html", "google_scholar": "aB4jrTIAAAAJ;Spe0xdkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-1917-2190", "linkedin": ";;thanhnguyentang/", "or_profile": "~Anh_Do2;~Raman_Arora1;~Thanh_Tang_Nguyen2", "aff": "Department of Computer Science, Whiting School of Engineering;Johns Hopkins University;Johns Hopkins University", "aff_domain": "cs.jhu.edu;jhu.edu;jhu.edu", "position": "PhD student;Associate Professor;Postdoc", "bibtex": "@inproceedings{\ndo2023multiagent,\ntitle={Multi-Agent Learning with Heterogeneous Linear Contextual Bandits},\nauthor={Anh Do and Thanh Nguyen-Tang and Raman Arora},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7f6vH3mmhr}\n}", "github": "", "project": "", "reviewers": "gcmR;kifo;bsff;5b9x;JJk8", "pdf_size": 769173, "rating": "5;6;6;6;6", "confidence": "3;3;4;4;3", "soundness": "3;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "2;3;3;2;3", "wc_summary": "88;50;204;62;68", "wc_strengths": "60;75;12;101;38", "wc_weaknesses": "60;88;135;152;18", "wc_questions": "68;51;122;61;115", "wc_limitations": "53;13;1;11;1", "wc_review": "329;277;474;387;240", "wc_reply_reviewers": "33;11;87;59;14", "wc_reply_authors": "20;0;20;57;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;2;2;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 94.4, 56.16262102145875 ], "wc_strengths_avg": [ 57.2, 30.511637124218687 ], "wc_weaknesses_avg": [ 90.6, 48.8982617277956 ], "wc_questions_avg": [ 83.4, 29.24790590794493 ], "wc_limitations_avg": [ 15.8, 19.249935064825543 ], "wc_review_avg": [ 341.4, 82.75892701092734 ], "wc_reply_reviewers_avg": [ 40.8, 28.749956521706256 ], "wc_reply_authors_avg": [ 19.4, 20.819221887476967 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18324204788336506260&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.jhu.edu;jhu.edu;jhu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "0", "aff_campus_unique": "Baltimore;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "7fRThuXp3h", "title": "Off-the-Grid MARL: Datasets with Baselines for Offline Multi-Agent Reinforcement Learning", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Being able to harness the power of large datasets for developing cooperative multi-agent controllers promises to unlock enormous value for real-world applications. \nMany important industrial systems are multi-agent in nature and are difficult to model using bespoke simulators. \nHowever, in industry, distributed processes can often be recorded during operation, and large quantities of demonstrative data stored.\nOffline multi-agent reinforcement learning (MARL) provides a promising paradigm for building effective decentralised controllers from such datasets. \nHowever, offline MARL is still in its infancy and therefore lacks standardised benchmark datasets and baselines typically found in more mature subfields of reinforcement learning (RL). \nThese deficiencies make it difficult for the community to sensibly measure progress. \nIn this work, we aim to fill this gap by releasing \\emph{off-the-grid MARL (OG-MARL)}: a growing repository of high-quality datasets with baselines for cooperative offline MARL research.\nOur datasets provide settings that are characteristic of real-world systems, including complex environment dynamics, heterogeneous agents, non-stationarity, many agents, partial observability, suboptimality, sparse rewards and demonstrated coordination.\nFor each setting, we provide a range of different dataset types (e.g. \\texttt{Good}, \\texttt{Medium}, \\texttt{Poor}, and \\texttt{Replay}) and profile the composition of experiences for each dataset.\nWe hope that OG-MARL will serve the community as a reliable source of datasets and help drive progress, while also providing an accessible entry point for researchers new to the field.", "keywords": "reinforcement learning;multi-agent reinforcement learning;offline reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/5896f6f928b76e01d98b7a789c3c36d12e8c5107.pdf", "author": "Juan Claude Formanek;Asad Jeewa;Jonathan Phillip Shock;Arnu Pretorius", "authorids": "~Juan_Claude_Formanek1;~Asad_Jeewa1;~Jonathan_Phillip_Shock1;~Arnu_Pretorius1", "gender": "M;M;M;M", "homepage": ";;http://www.shocklab.net;", "dblp": ";258/0370.html;161/9917;188/4368", "google_scholar": "YGkp5PYAAAAJ;nkDOU-kAAAAJ;lR_UV54AAAAJ;zZ6ydrAAAAAJ", "orcid": ";0000-0003-4329-8137;0000-0003-3757-0376;", "linkedin": "claude-formanek/;asadjeewa;;arnupretorius/", "or_profile": "~Juan_Claude_Formanek1;~Asad_Jeewa1;~Jonathan_Phillip_Shock1;~Arnu_Pretorius1", "aff": "University of Cape Town;University of KwaZulu-Natal;University of Cape Town;InstaDeep", "aff_domain": "uct.ac.za;ukzn.ac.za;uct.ac.za;instadeep.com", "position": "PhD student;Lecturer;Lecturer;Researcher", "bibtex": "@misc{\nformanek2023offthegrid,\ntitle={Off-the-Grid {MARL}: Datasets with Baselines for Offline Multi-Agent Reinforcement Learning},\nauthor={Juan Claude Formanek and Asad Jeewa and Jonathan Phillip Shock and Arnu Pretorius},\nyear={2023},\nurl={https://openreview.net/forum?id=7fRThuXp3h}\n}", "github": "", "project": "", "reviewers": "ThDq;nd1b;kDbB;EyuJ;eGfF", "site": "https://openreview.net/forum?id=7fRThuXp3h", "pdf_size": 2138110, "rating": "4;5;6;7;7", "confidence": "4;4;4;3;3", "wc_summary_and_contributions": "43;35;166;33;58", "wc_strengths": "37;42;63;76;109", "wc_improvement": "52;69;110;23;130", "wc_limitations": "10;10;105;1;23", "wc_correctness": "52;8;8;1;15", "wc_clarity": "7;8;10;1;51", "wc_relation_to_prior_work": "7;17;38;1;19", "wc_documentation": "1;21;24;1;24", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "210;211;525;138;430", "wc_reply_reviewers": "122;27;84;10;128", "wc_reply_authors": "1701;891;429;277;744", "reply_reviewers": "2;1;1;1;1", "reply_authors": "5;3;1;1;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 67.0, 50.27524241612366 ], "wc_strengths_avg": [ 65.4, 25.973832986296035 ], "wc_improvement_avg": [ 76.8, 38.73706235635325 ], "wc_limitations_avg": [ 29.8, 38.24866010725082 ], "wc_correctness_avg": [ 16.8, 18.148278155241066 ], "wc_clarity_avg": [ 15.4, 18.051038751274124 ], "wc_relation_to_prior_work_avg": [ 16.4, 12.642784503423286 ], "wc_documentation_avg": [ 14.2, 10.833282051160673 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 302.8, 148.15586387315219 ], "wc_reply_reviewers_avg": [ 74.2, 48.217839022502865 ], "wc_reply_authors_avg": [ 808.4, 496.7967793776445 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 1.6 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.840168050416806, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10296668573684770739&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Cape Town;University of KwaZulu-Natal;InstaDeep", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uct.ac.za;https://ukzn.ac.za;https://www.instadeep.com", "aff_unique_abbr": "UCT;UKZN;InstaDeep", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "South Africa;United Kingdom" }, { "title": "Unsupervised Optical Flow Estimation with Dynamic Timing Representation for Spike Camera", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72692", "id": "7gbjsgcN5p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/96810b6d4752abe7bfb91f234c51e9e6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7gbjsgcN5p", "openreview": "https://openreview.net/forum?id=7gbjsgcN5p", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72692", "video": "https://nips.cc/virtual/2023/poster/72692", "author_site": "Lujie Xia, Ziluo Ding, Rui Zhao, Jiyuan Zhang, Lei Ma, Zhaofei Yu, Tiejun Huang, Ruiqin Xiong", "tldr": "", "abstract": "Efficiently selecting an appropriate spike stream data length to extract precise information is the key to the spike vision tasks. To address this issue, we propose a dynamic timing representation for spike streams. Based on multi-layers architecture, it applies dilated convolutions on temporal dimension to extract features on multi-temporal scales with few parameters. And we design layer attention to dynamically fuse these features. Moreover, we propose an unsupervised learning method for optical flow estimation in a spike-based manner to break the dependence on labeled data. In addition, to verify the robustness, we also build a spike-based synthetic validation dataset for extreme scenarios in autonomous driving, denoted as SSES dataset. It consists of various corner cases. Experiments show that our method can predict optical flow from spike streams in different high-speed scenes, including real scenes. For instance, our method achieves $15\\%$ and $19\\%$ error reduction on PHM dataset compared to the best spike-based work, SCFlow, in $\\Delta t=10$ and $\\Delta t=20$ respectively, using the same settings as in previous works. The source code and dataset are available at \\href{https://github.com/Bosserhead/USFlow}{https://github.com/Bosserhead/USFlow}.", "keywords": "Optical flow;unsupervised learning;spike camera", "primary_area": "", "supplementary_material": "/attachment/fb217de8e6be39bfad4ef076ebebbcbe73fcccb8.zip", "author": "Lujie Xia;Ziluo Ding;Rui Zhao;Jiyuan Zhang;Lei Ma;Zhaofei Yu;Tiejun Huang;Ruiqin Xiong", "authorids": "~Lujie_Xia1;~Ziluo_Ding1;~Rui_Zhao11;~Jiyuan_Zhang3;~Lei_Ma3;~Zhaofei_Yu1;~Tiejun_Huang1;~Ruiqin_Xiong1", "gender": "M;M;M;M;Not Specified;M;M;M", "homepage": ";;http:\\\\ruizhao26.github.io;;https://nbic.pku.edu.cn/rcdw/kyry/02c5f5ce8e254b1e82a48bebd0a24c33.htm;https://yuzhaofei.github.io;https://idm.pku.edu.cn/~tjhuang/;http://idm.pku.edu.cn/staff/xiongruiqin/home.html", "dblp": ";267/2359;26/2578-10;;20/6534-8;166/0573;h/TiejunHuang;12/6908", "google_scholar": "https://scholar.google.com/citations?hl=en;;Ju7_T9cAAAAJ;ukHrw0IAAAAJ;;qaUgD50AAAAJ;https://scholar.google.com.tw/citations?user=knvEK4AAAAAJ;https://scholar.google.com.tw/citations?user=46Rur-YAAAAJ", "orcid": "0000-0002-0218-1760;;0000-0002-8892-9222;;0000-0001-6024-3854;;0000-0002-4234-6099;0000-0001-9796-0478", "linkedin": ";ziluo/;rui-zhao-247055189/;jiyuanzhang-leo;maleiwhat/;;;", "or_profile": "~Lujie_Xia1;~Ziluo_Ding1;~Rui_Zhao11;~Jiyuan_Zhang3;~Lei_Ma3;~Zhaofei_Yu1;~Tiejun_Huang1;~Ruiqin_Xiong1", "aff": "Peking University;Peking University;Peking University;Peking University;Beijing Academy of Artifical Intelligence;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;baai.ac.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;PhD student;PhD student;PhD student;Principal Researcher;Assistant Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nxia2023unsupervised,\ntitle={Unsupervised Optical Flow Estimation with Dynamic Timing Representation for Spike Camera},\nauthor={Lujie Xia and Ziluo Ding and Rui Zhao and Jiyuan Zhang and Lei Ma and Zhaofei Yu and Tiejun Huang and Ruiqin Xiong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7gbjsgcN5p}\n}", "github": "", "project": "", "reviewers": "2gct;PCU5;Hpzy;eenq;fVdD", "pdf_size": 2052922, "rating": "4;6;6;6;6", "confidence": "5;5;4;3;3", "soundness": "2;3;2;3;2", "novelty": "3;3;2;3;2", "presentation": "3;3;1;3;3", "wc_summary": "42;61;108;64;67", "wc_strengths": "32;88;68;59;45", "wc_weaknesses": "198;91;161;82;104", "wc_questions": "41;51;419;632;5", "wc_limitations": "26;34;9;1;39", "wc_review": "339;325;765;838;260", "wc_reply_reviewers": "250;114;64;12;27", "wc_reply_authors": "356;204;17;8;27", "reply_reviewers": "1;3;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 5.6, 0.7999999999999999 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 68.4, 21.638853943774382 ], "wc_strengths_avg": [ 58.4, 19.21041384249699 ], "wc_weaknesses_avg": [ 127.2, 44.82588537887456 ], "wc_questions_avg": [ 229.6, 251.28119706814513 ], "wc_limitations_avg": [ 21.8, 14.551975810864997 ], "wc_review_avg": [ 505.4, 244.32322853138626 ], "wc_reply_reviewers_avg": [ 93.4, 85.84544251152765 ], "wc_reply_authors_avg": [ 122.4, 137.49559992959774 ], "reply_reviewers_avg": [ 1.4, 0.8000000000000002 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5590169943749476, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5807494432724462575&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;baai.ac.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;1;0;0;0", "aff_unique_norm": "Peking University;Beijing Academy of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.baaic.cn", "aff_unique_abbr": "Peking U;BAAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Physics-Driven ML-Based Modelling for Correcting Inverse Estimation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72691", "id": "7h1YaSGaHS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3268353cd4ff87451347f242c7401773-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7h1YaSGaHS", "openreview": "https://openreview.net/forum?id=7h1YaSGaHS", "poster": "/media/PosterPDFs/NeurIPS%202023/72691.png?t=1698474029.2199948", "slides": "https://nips.cc/virtual/2023/poster/72691", "video": "https://nips.cc/virtual/2023/poster/72691", "author_site": "ruiyuan kang, Tingting Mu, Panagiotis Liatsis, Dimitrios Kyritsis", "tldr": "", "abstract": "When deploying machine learning estimators in science and engineering (SAE) domains, it is critical to avoid failed estimations that can have disastrous consequences, e.g., in aero engine design. This work focuses on detecting and correcting failed state estimations before adopting them in SAE inverse problems, by utilizing simulations and performance metrics guided by physical laws. We suggest to flag a machine learning estimation when its physical model error exceeds a feasible threshold, and propose a novel approach, GEESE, to correct it through optimization, aiming at delivering both low error and high efficiency. The key designs of GEESE include (1) a hybrid surrogate error model to provide fast error estimations to reduce simulation cost and to enable gradient based backpropagation of error feedback, and (2) two generative models to approximate the probability distributions of the candidate states for simulating the exploitation and exploration behaviours. All three models are constructed as neural networks. GEESE is tested on three real-world SAE inverse problems and compared to a number of state-of-the-art optimization/search approaches. Results show that it fails the least number of times in terms of finding a feasible state correction, and requires physical evaluations less frequently in general.", "keywords": "Failure detection;Physical evaluation;Network-based optimization;Generative model;Hybrid surrogate model", "primary_area": "", "supplementary_material": "/attachment/6233d41e409fe9fde524e287882fd8e1b2b8a049.zip", "author": "Ruiyuan Kang;Tingting Mu;Panos Liatsis;Dimitrios Kyritsis", "authorids": "~Ruiyuan_Kang1;~Tingting_Mu1;~Panos_Liatsis1;dimitrios.kyritsis@ku.ac.ae", "gender": "M;F;M;", "homepage": ";https://personalpages.manchester.ac.uk/staff/tingting.mu/Site/About_Me.html;https://www.ku.ac.ae/college-people/panos-liatsis;", "dblp": ";89/4352;;", "google_scholar": "Vqe87eYAAAAJ;https://scholar.google.co.uk/citations?user=dOG10IUAAAAJ;https://scholar.google.com.sg/citations?user=Z8vhYQ8AAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ruiyuan_Kang1;~Tingting_Mu1;~Panos_Liatsis1;dimitrios.kyritsis@ku.ac.ae", "aff": "Khalifa University of Science, Technology and Research;University of Manchester;Khalifa University of Science, Technology and Research;", "aff_domain": "ku.ac.ae;manchester.ac.uk;ku.ac.ae;", "position": "PhD student;Associate Professor;Full Professor;", "bibtex": "@inproceedings{\nkang2023physicsdriven,\ntitle={Physics-Driven {ML}-Based Modelling for Correcting Inverse Estimation},\nauthor={Ruiyuan Kang and Tingting Mu and Panos Liatsis and Dimitrios Kyritsis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7h1YaSGaHS}\n}", "github": "", "project": "", "reviewers": "mU9t;aYMv;EBsP;Kiaj;Rpgy", "pdf_size": 788378, "rating": "5;5;6;8;8", "confidence": "3;4;4;3;4", "soundness": "3;3;3;3;4", "novelty": "3;2;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "136;222;87;82;110", "wc_strengths": "77;26;41;64;45", "wc_weaknesses": "146;184;233;54;195", "wc_questions": "23;54;151;1;33", "wc_limitations": "9;13;24;13;45", "wc_review": "391;499;536;214;428", "wc_reply_reviewers": "157;0;59;0;4", "wc_reply_authors": "348;0;13;0;7", "reply_reviewers": "1;0;1;0;1", "reply_authors": "2;1;2;1;2", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 127.4, 51.02783554100643 ], "wc_strengths_avg": [ 50.6, 17.917589123540033 ], "wc_weaknesses_avg": [ 162.4, 60.88218130126417 ], "wc_questions_avg": [ 52.4, 52.16742278472265 ], "wc_limitations_avg": [ 20.8, 13.090454537562858 ], "wc_review_avg": [ 413.6, 112.10102586506513 ], "wc_reply_reviewers_avg": [ 44.0, 60.771703941884006 ], "wc_reply_authors_avg": [ 73.6, 137.2859788907811 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.06019292654288467, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11015012195234121861&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "email": "ku.ac.ae;manchester.ac.uk;ku.ac.ae;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Khalifa University of Science, Technology and Research;University of Manchester", "aff_unique_dep": ";", "aff_unique_url": "https://www.kustar.ac.ae;https://www.manchester.ac.uk", "aff_unique_abbr": "KUSTAR;UoM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Arab Emirates;United Kingdom" }, { "title": "A Theory of Link Prediction via Relational Weisfeiler-Leman on Knowledge Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72690", "id": "7hLlZNrkt5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3eceb70f47690051d6769739fbf6294b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7hLlZNrkt5", "openreview": "https://openreview.net/forum?id=7hLlZNrkt5", "poster": "/media/PosterPDFs/NeurIPS%202023/72690.png?t=1701274076.8762388", "slides": "https://nips.cc/virtual/2023/poster/72690", "video": "https://nips.cc/virtual/2023/poster/72690", "author_site": "Xingyue Huang, Miguel Romero, Ismail Ceylan, Pablo Barcel\u00f3", "tldr": "", "abstract": "Graph neural networks are prominent models for representation learning over graph-structured data. While the capabilities and limitations of these models are well-understood for simple graphs, our understanding remains incomplete in the context of knowledge graphs. Our goal is to provide a systematic understanding of the landscape of graph neural networks for knowledge graphs pertaining to the prominent task of link prediction. Our analysis entails a unifying perspective on seemingly unrelated models and unlocks a series of other models. The expressive power of various models is characterized via a corresponding relational Weisfeiler-Leman algorithm. \nThis analysis is extended to provide a precise logical characterization of the class of functions captured by a class of graph neural networks. The theoretical findings presented in this paper explain the benefits of some widely employed practical design choices, which are validated empirically.", "keywords": "graph neural networks;knowledge graphs;expressivity;logical characterization", "primary_area": "", "supplementary_material": "/attachment/3ce5c4d2443b7b591c841be7b9f6090dff12ae62.zip", "author": "Xingyue Huang;Miguel Romero Orth;Ismail Ilkan Ceylan;Pablo Barcelo", "authorids": "~Xingyue_Huang1;~Miguel_Romero_Orth1;~Ismail_Ilkan_Ceylan2;~Pablo_Barcelo1", "gender": "M;M;;M", "homepage": "https://github.com/HxyScotthuang;http://www.mromero.cl;https://www.cs.ox.ac.uk/people/ismaililkan.ceylan/;https://pbarcelo.ing.uc.cl/", "dblp": "208/4778;57/4918-1.html;147/6111;29/5169", "google_scholar": "qah4McsAAAAJ;NFNyaIAAAAAJ;avJ5kQcAAAAJ;9OH3PokAAAAJ", "orcid": ";0000-0002-2615-6455;0000-0003-4118-4689;0000-0003-2293-2653", "linkedin": "xingyue-huang-1251651a2/;;;", "or_profile": "~Xingyue_Huang1;~Miguel_Romero_Orth1;~Ismail_Ilkan_Ceylan2;~Pablo_Barcelo1", "aff": "University of Oxford;Universidad \"Adolfo Iba\u00f1ez\";University of Oxford;Pontificia Universidad Cat\u00f3lica", "aff_domain": "ox.ac.uk;uai.cl;oxford.ac.uk;uc.cl", "position": "MS student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2023a,\ntitle={A Theory of Link Prediction via Relational Weisfeiler-Leman on Knowledge Graphs},\nauthor={Xingyue Huang and Miguel Romero Orth and Ismail Ilkan Ceylan and Pablo Barcelo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7hLlZNrkt5}\n}", "github": "", "project": "", "reviewers": "cjW9;kRBD;1Kyu;7NY6", "pdf_size": 536470, "rating": "4;5;6;7", "confidence": "3;3;2;4", "soundness": "3;4;3;4", "novelty": "2;3;3;3", "presentation": "3;3;2;4", "wc_summary": "61;101;64;56", "wc_strengths": "47;54;79;123", "wc_weaknesses": "86;91;312;116", "wc_questions": "5;48;3;24", "wc_limitations": "1;15;7;4", "wc_review": "200;309;465;323", "wc_reply_reviewers": "18;68;65;19", "wc_reply_authors": "494;437;191;18", "reply_reviewers": "1;2;2;1", "reply_authors": "3;3;4;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 70.5, 17.839562774911272 ], "wc_strengths_avg": [ 75.75, 29.76050234791073 ], "wc_weaknesses_avg": [ 151.25, 93.50233954292267 ], "wc_questions_avg": [ 20.0, 18.12456896039186 ], "wc_limitations_avg": [ 6.75, 5.2141634036535525 ], "wc_review_avg": [ 324.25, 94.18432725246807 ], "wc_reply_reviewers_avg": [ 42.5, 24.026027553467927 ], "wc_reply_authors_avg": [ 285.0, 191.64420158199414 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3162277660168379, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4561546732872986854&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ox.ac.uk;uai.cl;oxford.ac.uk;uc.cl", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Oxford;Universidad Adolfo Iba\u00f1ez;Pontificia Universidad Cat\u00f3lica", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ox.ac.uk;https://www.uai.cl;https://www.puc.cl", "aff_unique_abbr": "Oxford;UAI;PUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United Kingdom;Chile" }, { "title": "Structure from Duplicates: Neural Inverse Graphics from a Pile of Objects", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72689", "id": "7irm2VJARb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3764a9c8abc84c7482f778fefc24f10b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7irm2VJARb", "openreview": "https://openreview.net/forum?id=7irm2VJARb", "poster": "/media/PosterPDFs/NeurIPS%202023/72689.png?t=1702146659.4365313", "slides": "https://nips.cc/virtual/2023/poster/72689", "video": "https://nips.cc/virtual/2023/poster/72689", "author_site": "Tianhang Cheng, Wei-Chiu Ma, Kaiyu Guan, Antonio Torralba, Shenlong Wang", "tldr": "", "abstract": "Abstract Our world is full of identical objects (\\emph{e.g.}, cans of coke, cars of same model). These duplicates, when seen together, provide additional and strong cues for us to effectively reason about 3D. Inspired by this observation, we introduce Structure from Duplicates (SfD), a novel inverse graphics framework that reconstructs geometry, material, and illumination from a single image containing multiple identical objects. SfD begins by identifying multiple instances of an object within an image, and then jointly estimates the 6DoF pose for all instances. An inverse graphics pipeline is subsequently employed to jointly reason about the shape, material of the object, and the environment light, while adhering to the shared geometry and material constraint across instances.\nOur primary contributions involve utilizing object duplicates as a robust prior for single-image inverse graphics and proposing an in-plane rotation-robust Structure from Motion (SfM) formulation for joint 6-DoF object pose estimation. By leveraging multi-view cues from a single image, SfD generates more realistic and detailed 3D reconstructions, significantly outperforming existing single image reconstruction models and multi-view reconstruction approaches with a similar or greater number of observations.", "keywords": "3d reconstruction;inverse rendering;pose estimation;single view reconstruction;nerf;duplicates", "primary_area": "", "supplementary_material": "/attachment/57851e630e6721f6ee8c48a372762b4e8c9e1071.pdf", "author": "Tianhang Cheng;Wei-Chiu Ma;Kaiyu Guan;Antonio Torralba;Shenlong Wang", "authorids": "~Tianhang_Cheng1;~Wei-Chiu_Ma1;~Kaiyu_Guan1;~Antonio_Torralba1;~Shenlong_Wang1", "gender": "M;M;M;M;M", "homepage": "https://tianhang-cheng.github.io/;https://www.cs.cornell.edu/~weichiu/;http://faculty.nres.illinois.edu/~kaiyuguan/;http://web.mit.edu/torralba/www//;https://shenlong.web.illinois.edu/", "dblp": ";151/4277;;t/AntonioBTorralba;117/4842", "google_scholar": "IXVAaV8AAAAJ;SVIdh6AAAAAJ;YLjpc_cAAAAJ;https://scholar.google.com.tw/citations?user=8cxDHS4AAAAJ;QFpswmcAAAAJ", "orcid": "0009-0001-2729-2758;;;;", "linkedin": "tianhang-cheng-4528b0296/;;;;shenlong-wang-3496023b", "or_profile": "~Tianhang_Cheng1;~Wei-Chiu_Ma1;~Kaiyu_Guan1;~Antonio_Torralba1;~Shenlong_Wang1", "aff": "Department of Computer Science;Massachusetts Institute of Technology;University of Illinois, Urbana Champaign;Massachusetts Institute of Technology;University of Illinois, Urbana Champaign", "aff_domain": "cs.illinois.edu;mit.edu;illinois.edu;mit.edu;illinois.edu", "position": "MS student;PhD student;Associate Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ncheng2023structure,\ntitle={Structure from Duplicates: Neural Inverse Graphics from a Pile of Objects},\nauthor={Tianhang Cheng and Wei-Chiu Ma and Kaiyu Guan and Antonio Torralba and Shenlong Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7irm2VJARb}\n}", "github": "", "project": "", "reviewers": "4hmW;vUuZ;qVSc;vbZQ;sRgJ", "pdf_size": 41236357, "rating": "3;4;6;7;8", "confidence": "4;3;5;4;4", "soundness": "2;2;3;3;4", "novelty": "1;2;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "45;81;114;196;107", "wc_strengths": "85;139;92;182;124", "wc_weaknesses": "336;100;281;287;39", "wc_questions": "45;70;92;22;24", "wc_limitations": "53;36;26;52;25", "wc_review": "564;426;605;739;319", "wc_reply_reviewers": "64;0;66;128;200", "wc_reply_authors": "367;0;16;195;52", "reply_reviewers": "1;0;1;2;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 5.6, 1.8547236990991407 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 108.6, 49.95437918741459 ], "wc_strengths_avg": [ 124.4, 35.0234207352737 ], "wc_weaknesses_avg": [ 208.6, 116.77088678262233 ], "wc_questions_avg": [ 50.6, 27.00814691903167 ], "wc_limitations_avg": [ 38.4, 12.142487389328432 ], "wc_review_avg": [ 530.6, 145.46284749034717 ], "wc_reply_reviewers_avg": [ 91.6, 67.65086843492847 ], "wc_reply_authors_avg": [ 126.0, 138.77607863028842 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.34099716973523675, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18004319225855873388&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cs.illinois.edu;mit.edu;illinois.edu;mit.edu;illinois.edu", "author_num": 5, "aff_unique_index": "0;1;2;1;2", "aff_unique_norm": "Unknown Institution;Massachusetts Institute of Technology;University of Illinois Urbana-Champaign", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": ";https://web.mit.edu;https://illinois.edu", "aff_unique_abbr": ";MIT;UIUC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "1;1;1;1", "aff_country_unique": ";United States" }, { "title": "A Comprehensive Benchmark for Neural Human Radiance Fields", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73685", "id": "7kc4gtEk3b", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6e566c91d381bd7a45647d9a90838817-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=7kc4gtEk3b", "openreview": "https://openreview.net/forum?id=7kc4gtEk3b", "poster": "/media/PosterPDFs/NeurIPS%202023/73685.png?t=1701602201.2401204", "slides": "https://nips.cc/virtual/2023/poster/73685", "video": "https://nips.cc/virtual/2023/poster/73685", "author_site": "Kenkun Liu, Derong Jin, Ailing Zeng, Xiaoguang Han, Lei Zhang", "tldr": "", "abstract": "The past two years have witnessed a significant increase in interest concerning NeRF-based human body rendering. While this surge has propelled considerable advancements, it has also led to an influx of methods and datasets. This explosion complicates experimental settings and makes fair comparisons challenging. In this work, we design and execute thorough studies into unified evaluation settings and metrics to establish a fair and reasonable benchmark for human NeRF models. To reveal the effects of extant models, we benchmark them against diverse and hard scenes. Additionally, we construct a cross-subject benchmark pre-trained on large-scale datasets to assess generalizable methods. Finally, we analyze the essential components for animatability and generalizability, and make HumanNeRF from monocular videos generalizable, as the inaugural baseline. We hope these benchmarks and analyses could serve the community.", "keywords": "Benchmark;Human Body Rendering;Human NeRF;Generalizable NeRF;Novel Pose Rendering", "primary_area": "", "supplementary_material": "/attachment/6edd5d3aa06eaddf33e497aa5f787a2bd2841ddf.pdf", "author": "Kenkun Liu;Derong Jin;Ailing Zeng;Xiaoguang Han;Lei Zhang", "authorids": "~Kenkun_Liu1;~Derong_Jin1;~Ailing_Zeng1;~Xiaoguang_Han2;~Lei_Zhang23", "gender": "M;M;F;M;M", "homepage": "https://kenkunliu.github.io/PersonalPage/;https://github.com/HuMathe;https://ailingzeng.site/;https://gaplab.cuhk.edu.cn/;https://www.leizhang.org/", "dblp": "278/0934;;226/4720;60/8294;z/LeiZhang", "google_scholar": "qy40HA0AAAAJ;;Tn7fzS8AAAAJ;;fIlGZToAAAAJ", "orcid": ";;;;", "linkedin": "kenkun-liu-7228b31a2/;;%E7%88%B1%E7%8E%B2-%E6%9B%BE-65504112a/;;", "or_profile": "~Kenkun_Liu1;~Derong_Jin1;~Ailing_Zeng1;~Xiaoguang_Han2;~Lei_Zhang1", "aff": "The Chinese University of Hong Kong (Shenzhen\uff09;The Chinese University of Hong Kong, Shenzhen;International Digital Economy Academy;The Chinese University of Hong Kong, Shenzhen;International Digital Economy Academy", "aff_domain": "link.cuhk.edu.cn;cuhk.edu.cn;idea.edu.cn;cuhk.edu.cn;idea.edu.cn", "position": "PhD student;Undergrad student;Researcher;Assistant Professor;Chief Scientist", "bibtex": "@inproceedings{\nliu2023a,\ntitle={A Comprehensive Benchmark for Neural Human Radiance Fields},\nauthor={Kenkun Liu and Derong Jin and Ailing Zeng and Xiaoguang Han and Lei Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=7kc4gtEk3b}\n}", "github": "", "project": "", "reviewers": "dvPh;gjLn;1eYJ;a43V", "pdf_size": 6637525, "rating": "5;6;7;7", "confidence": "3;5;4;3", "wc_summary_and_contributions": "48;42;125;57", "wc_strengths": "43;65;436;64", "wc_improvement": "244;598;654;117", "wc_limitations": "8;11;123;1", "wc_correctness": "5;142;54;1", "wc_clarity": "2;7;12;1", "wc_relation_to_prior_work": "1;1;39;1", "wc_documentation": "4;1;22;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "356;868;1466;244", "wc_reply_reviewers": "0;0;21;0", "wc_reply_authors": "509;849;787;455", "reply_reviewers": "0;0;1;0", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 68.0, 33.3391661563393 ], "wc_strengths_avg": [ 152.0, 164.20261873673027 ], "wc_improvement_avg": [ 403.25, 228.09140163539703 ], "wc_limitations_avg": [ 35.75, 50.50433149740723 ], "wc_correctness_avg": [ 50.5, 56.8000880281008 ], "wc_clarity_avg": [ 5.5, 4.387482193696061 ], "wc_relation_to_prior_work_avg": [ 10.5, 16.454482671904334 ], "wc_documentation_avg": [ 7.0, 8.74642784226795 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 733.5, 483.9325882806406 ], "wc_reply_reviewers_avg": [ 5.25, 9.093266739736606 ], "wc_reply_authors_avg": [ 650.0, 170.49633427144408 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8096195458583449391&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "link.cuhk.edu.cn;cuhk.edu.cn;idea.edu.cn;cuhk.edu.cn;idea.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;1", "aff_unique_norm": "Chinese University of Hong Kong;International Digital Economy Academy", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;", "aff_unique_abbr": "CUHK;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "title": "A Sublinear-Time Spectral Clustering Oracle with Improved Preprocessing Time", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72688", "id": "7nXaoclHed", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/82aec8518602748540a42b783468c94d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7nXaoclHed", "openreview": "https://openreview.net/forum?id=7nXaoclHed", "poster": "/media/PosterPDFs/NeurIPS%202023/72688.png?t=1701588568.181395", "slides": "https://nips.cc/virtual/2023/poster/72688", "video": "https://nips.cc/virtual/2023/poster/72688", "author_site": "Ranran Shen, Pan Peng", "tldr": "", "abstract": "We address the problem of designing a sublinear-time spectral clustering oracle for graphs that exhibit strong clusterability. Such graphs contain $k$ latent clusters, each characterized by a large inner conductance (at least $\\varphi$) and a small outer conductance (at most $\\varepsilon$). Our aim is to preprocess the graph to enable clustering membership queries, with the key requirement that both preprocessing and query answering should be performed in sublinear time, and the resulting partition should be consistent with a $k$-partition that is close to the ground-truth clustering. Previous oracles have relied on either a $\\textrm{poly}(k)\\log n$ gap between inner and outer conductances or exponential (in $k/\\varepsilon$) preprocessing time. Our algorithm relaxes these assumptions, albeit at the cost of a slightly higher misclassification ratio. We also show that our clustering oracle is robust against a few random edge deletions. To validate our theoretical bounds, we conducted experiments on synthetic networks.", "keywords": "Sublinear-time algorithms;Spectral Clustering;Graph Clustering;Random Walks", "primary_area": "", "supplementary_material": "/attachment/3db6af87e9955d53842171cfcfba348222963127.zip", "author": "Ranran Shen;Pan Peng", "authorids": "~Ranran_Shen1;~Pan_Peng1", "gender": "F;M", "homepage": "https://ranran-shen.github.io/;http://staff.ustc.edu.cn/~ppeng/", "dblp": ";08/9919-1", "google_scholar": "BvA-wRQAAAAJ;", "orcid": "0009-0005-3310-7706;0000-0003-2700-5699", "linkedin": ";", "or_profile": "~Ranran_Shen1;~Pan_Peng1", "aff": "University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn", "position": "MS student;Full Professor", "bibtex": "@inproceedings{\nshen2023a,\ntitle={A Sublinear-Time Spectral Clustering Oracle with Improved Preprocessing Time},\nauthor={Ranran Shen and Pan Peng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7nXaoclHed}\n}", "github": "", "project": "", "reviewers": "Wy1e;xqth;2P47", "pdf_size": 1398633, "rating": "6;6;7", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "3;2;2", "presentation": "3;3;3", "wc_summary": "62;159;447", "wc_strengths": "62;64;21", "wc_weaknesses": "104;117;32", "wc_questions": "92;74;19", "wc_limitations": "4;16;1", "wc_review": "324;430;520", "wc_reply_reviewers": "27;81;62", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 222.66666666666666, 163.4958375277147 ], "wc_strengths_avg": [ 49.0, 19.8158185969358 ], "wc_weaknesses_avg": [ 84.33333333333333, 37.38389433373087 ], "wc_questions_avg": [ 61.666666666666664, 31.05192783422991 ], "wc_limitations_avg": [ 7.0, 6.48074069840786 ], "wc_review_avg": [ 424.6666666666667, 80.10548600994122 ], "wc_reply_reviewers_avg": [ 56.666666666666664, 22.365648262955006 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AOjazsjARIMJ:scholar.google.com/&scioq=A+Sublinear-Time+Spectral+Clustering+Oracle+with+Improved+Preprocessing+Time&hl=en&as_sdt=0,39", "gs_version_total": 7, "email": "ustc.edu.cn;ustc.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "AMAG: Additive, Multiplicative and Adaptive Graph Neural Network For Forecasting Neuron Activity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72687", "id": "7ntI4kcoqG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1c70ba3591d0694a535089e1c25888d7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7ntI4kcoqG", "openreview": "https://openreview.net/forum?id=7ntI4kcoqG", "poster": "/media/PosterPDFs/NeurIPS%202023/72687.png?t=1702150351.6343293", "slides": "https://nips.cc/virtual/2023/poster/72687", "video": "https://nips.cc/virtual/2023/poster/72687", "author_site": "Jingyuan Li, Leo Scholl, Trung Le, Pavithra Rajeswaran, Amy Orsborn, Eli Shlizerman", "tldr": "", "abstract": "Latent Variable Models (LVMs) propose to model the dynamics of neural populations by capturing low-dimensional structures that represent features involved in neural activity. Recent LVMs are based on deep learning methodology where a deep neural network is trained to reconstruct the same neural activity given as input and as a result to build the latent representation. Without taking past or future activity into account such a task is non-causal. In contrast, the task of forecasting neural activity based on given input extends the reconstruction task. LVMs that are trained on such a task could potentially capture temporal causality constraints within its latent representation. Forecasting has received less attention than reconstruction due to recording challenges such as limited neural measurements and trials. In this work, we address modeling neural population dynamics via the forecasting task and improve forecasting performance by including a prior, which consists of pairwise neural unit interaction as a multivariate dynamic system. Our proposed model---Additive, Multiplicative, and Adaptive Graph Neural Network (AMAG)---leverages additive and multiplicative message-passing operations analogous to the interactions in neuronal systems and adaptively learns the interaction among neural units to forecast their future activity. We demonstrate the advantage of AMAG compared to non-GNN based methods on synthetic data and multiple modalities of neural recordings (field potentials from penetrating electrodes or surface-level micro-electrocorticography) from four rhesus macaques. Our results show the ability of AMAG to recover ground truth spatial interactions and yield estimation for future dynamics of the neural population.", "keywords": "Neuroscience and Cognitive Science;Neural Activity Forecasting;Graph Neural Network", "primary_area": "", "supplementary_material": "", "author": "Jingyuan Li;Leo Scholl;Trung Le;Pavithra Rajeswaran;Amy L Orsborn;Eli Shlizerman", "authorids": "~Jingyuan_Li3;lscholl@uw.edu;~Trung_Le4;~Pavithra_Rajeswaran1;~Amy_L_Orsborn1;~Eli_Shlizerman1", "gender": ";;M;F;F;", "homepage": "https://scholar.google.com/citations?user=BEhp15IAAAAJ&hl=en&oi=ao;;;;http://faculty.washington.edu/aorsborn/;http://faculty.washington.edu/shlizee/", "dblp": ";;88/8728;;;00/9501", "google_scholar": "BEhp15IAAAAJ;;;lZ6kdz8AAAAJ;8r_4fMkAAAAJ;oJnSO50AAAAJ", "orcid": ";;0000-0002-7047-8451;;0000-0003-4131-5781;0000-0002-3136-4531", "linkedin": ";;trung-le/;;;", "or_profile": "~Jingyuan_Li3;lscholl@uw.edu;~Trung_Le4;~Pavithra_Rajeswaran1;~Amy_L_Orsborn1;~Eli_Shlizerman1", "aff": "University of Washington, Seattle;;Sandia National Laboratories;University of Washington;University of Washington, Seattle;University of Washington", "aff_domain": "uw.edu;;sandia.gov;uw.edu;uw.edu;u.washington.edu", "position": "PhD student;;Intern;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nli2023amag,\ntitle={{AMAG}: Additive, Multiplicative and Adaptive Graph Neural Network For Forecasting Neuron Activity},\nauthor={Jingyuan Li and Leo Scholl and Trung Le and Pavithra Rajeswaran and Amy L Orsborn and Eli Shlizerman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7ntI4kcoqG}\n}", "github": "", "project": "", "reviewers": "Byrz;zdkz;peT1;Yw6R;BP1k", "pdf_size": 3474717, "rating": "5;6;6;6;7", "confidence": "4;4;2;3;2", "soundness": "3;3;3;3;4", "novelty": "3;3;2;3;4", "presentation": "3;2;3;3;4", "wc_summary": "90;78;103;43;57", "wc_strengths": "33;54;26;41;78", "wc_weaknesses": "154;255;44;74;319", "wc_questions": "138;60;94;32;163", "wc_limitations": "15;52;8;4;18", "wc_review": "430;499;275;194;635", "wc_reply_reviewers": "103;7;23;0;0", "wc_reply_authors": "615;26;22;0;0", "reply_reviewers": "2;1;1;0;0", "reply_authors": "3;2;2;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 74.2, 21.73844520659194 ], "wc_strengths_avg": [ 46.4, 18.33684814792335 ], "wc_weaknesses_avg": [ 169.2, 104.66212304363026 ], "wc_questions_avg": [ 97.4, 48.24769424542483 ], "wc_limitations_avg": [ 19.4, 17.03643155123748 ], "wc_review_avg": [ 406.6, 157.33353107332206 ], "wc_reply_reviewers_avg": [ 26.6, 39.113169137772516 ], "wc_reply_authors_avg": [ 132.6, 241.4420013170865 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7813246751367370312&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uw.edu;;sandia.gov;uw.edu;uw.edu;u.washington.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Washington;Sandia National Laboratories", "aff_unique_dep": ";", "aff_unique_url": "https://www.washington.edu;https://www.sandia.gov", "aff_unique_abbr": "UW;SNL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Energy-Efficient Scheduling with Predictions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72686", "id": "7ntySBR3Ey", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f99bb39502f09c4825e89760b4e1ad04-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7ntySBR3Ey", "openreview": "https://openreview.net/forum?id=7ntySBR3Ey", "poster": "/media/PosterPDFs/NeurIPS%202023/72686.png?t=1701629705.5340078", "slides": "https://nips.cc/virtual/2023/poster/72686", "video": "https://nips.cc/virtual/2023/poster/72686", "author_site": "Eric Balkanski, Noemie Perivier, Clifford Stein, Hao-Ting Wei", "tldr": "", "abstract": "An important goal of modern scheduling systems is to efficiently manage power usage. In energy-efficient scheduling, the operating system controls the speed at which a machine is processing jobs with the dual objective of minimizing energy consumption and optimizing the quality of service cost of the resulting schedule. Since machine-learned predictions about future requests can often be learned from historical data, a recent line of work on learning-augmented algorithms aims to achieve improved performance guarantees by leveraging predictions. In particular, for energy-efficient scheduling, Bamas et. al. [NeurIPS '20] and Antoniadis et. al. [SWAT '22]\n designed algorithms with predictions for the energy minimization with deadlines problem and achieved an improved competitive ratio when the prediction error is small while also maintaining worst-case bounds even when the prediction error is arbitrarily large.\n\nIn this paper, we consider a general setting for energy-efficient scheduling and provide a flexible learning-augmented algorithmic framework that takes as input an offline and an online algorithm for the desired energy-efficient scheduling problem. We show that, when the prediction error is small, this framework gives improved competitive ratios for many different energy-efficient scheduling problems, including energy minimization with deadlines, while also maintaining a bounded competitive ratio regardless of the prediction error. Finally, we empirically demonstrate that this framework achieves an improved performance on real and synthetic datasets.", "keywords": "Scheduling;algorithms with predictions;speed scaling;energy minimization", "primary_area": "", "supplementary_material": "/attachment/31ca4f603515e244f0b0e6c9523d973bb984b88b.pdf", "author": "Eric Balkanski;Noemie Perivier;Clifford Stein;Hao-Ting Wei", "authorids": "~Eric_Balkanski2;~Noemie_Perivier1;~Clifford_Stein1;~Hao-Ting_Wei1", "gender": ";F;M;M", "homepage": "http://ericbalkanski.com;;http://www.columbia.edu/~cs2035/;", "dblp": ";294/1529;s/CliffordStein;215/4478.html", "google_scholar": ";iGfAGxEAAAAJ;https://scholar.google.com.tw/citations?user=r49_E2cAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Eric_Balkanski2;~Noemie_Perivier1;~Clifford_Stein1;~Hao-Ting_Wei1", "aff": "Columbia University;Columbia University;Columbia University;Columbia University", "aff_domain": "columbia.edu;columbia.edu;columbia.edu;columbia.edu", "position": "Assistant Professor;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nbalkanski2023energyefficient,\ntitle={Energy-Efficient Scheduling with Predictions},\nauthor={Eric Balkanski and Noemie Perivier and Clifford Stein and Hao-Ting Wei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7ntySBR3Ey}\n}", "github": "", "project": "", "reviewers": "BEvj;HbG7;fguw;RAEY", "pdf_size": 1432068, "rating": "5;6;6;7", "confidence": "4;4;3;4", "soundness": "4;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "98;181;160;184", "wc_strengths": "43;67;64;15", "wc_weaknesses": "153;106;161;18", "wc_questions": "52;20;83;26", "wc_limitations": "8;12;4;11", "wc_review": "354;386;472;254", "wc_reply_reviewers": "160;72;162;5", "wc_reply_authors": "0;30;17;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 155.75, 34.60039739656179 ], "wc_strengths_avg": [ 47.25, 20.78911975048487 ], "wc_weaknesses_avg": [ 109.5, 56.85288031401751 ], "wc_questions_avg": [ 45.25, 24.893523254051445 ], "wc_limitations_avg": [ 8.75, 3.112474899497183 ], "wc_review_avg": [ 366.5, 77.97916388369396 ], "wc_reply_reviewers_avg": [ 99.75, 65.67486200975226 ], "wc_reply_authors_avg": [ 11.75, 12.616952880945542 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8464893274166688095&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "columbia.edu;columbia.edu;columbia.edu;columbia.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Dis-inhibitory neuronal circuits can control the sign of synaptic plasticity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72685", "id": "7otRtfrRqo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ca22641c182b3b9608634edb4d09bc33-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7otRtfrRqo", "openreview": "https://openreview.net/forum?id=7otRtfrRqo", "poster": "/media/PosterPDFs/NeurIPS%202023/72685.png?t=1702058843.8180676", "slides": "https://nips.cc/virtual/2023/poster/72685", "video": "https://nips.cc/virtual/2023/poster/72685", "author_site": "Julian Rossbroich, Friedemann Zenke", "tldr": "", "abstract": "How neuronal circuits achieve credit assignment remains a central unsolved question in systems neuroscience. Various studies have suggested plausible solutions for back-propagating error signals through multi-layer networks. These purely functionally motivated models assume distinct neuronal compartments to represent local error signals that determine the sign of synaptic plasticity. However, this explicit error modulation is inconsistent with phenomenological plasticity models in which the sign depends primarily on postsynaptic activity. Here we show how a plausible microcircuit model and Hebbian learning rule derived within an adaptive control theory framework can resolve this discrepancy. Assuming errors are encoded in top-down dis-inhibitory synaptic afferents, we show that error-modulated learning emerges naturally at the circuit level when recurrent inhibition explicitly influences Hebbian plasticity. The same learning rule accounts for experimentally observed plasticity in the absence of inhibition and performs comparably to back-propagation of error (BP) on several non-linearly separable benchmarks. Our findings bridge the gap between functional and experimentally observed plasticity rules and make concrete predictions on inhibitory modulation of excitatory plasticity.", "keywords": "Credit assignment;hebbian plasticity;inhibitory microcircuits;bio-plausible learning", "primary_area": "", "supplementary_material": "", "author": "Julian Rossbroich;Friedemann Zenke", "authorids": "~Julian_Rossbroich1;~Friedemann_Zenke1", "gender": "M;M", "homepage": ";https://fzenke.net", "dblp": "311/6819;155/2110", "google_scholar": "-RFRIPQAAAAJ;_IxvO8QAAAAJ", "orcid": "0000-0002-1927-8198;0000-0003-1883-644X", "linkedin": ";", "or_profile": "~Julian_Rossbroich1;~Friedemann_Zenke1", "aff": "University of Basel;Friedrich Miescher Institute", "aff_domain": "unibas.ch;fmi.ch", "position": "PhD student;Principal Researcher", "bibtex": "@inproceedings{\nrossbroich2023disinhibitory,\ntitle={Dis-inhibitory neuronal circuits can control the sign of synaptic plasticity},\nauthor={Julian Rossbroich and Friedemann Zenke},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7otRtfrRqo}\n}", "github": "", "project": "", "reviewers": "z6yQ;TQSB;A5Rt;EPnb", "pdf_size": 2679142, "rating": "5;6;7;7", "confidence": "4;5;4;3", "soundness": "2;4;3;3", "novelty": "2;2;3;3", "presentation": "3;4;3;4", "wc_summary": "52;101;57;101", "wc_strengths": "23;87;36;49", "wc_weaknesses": "129;258;279;78", "wc_questions": "199;211;653;19", "wc_limitations": "3;21;6;13", "wc_review": "406;678;1031;260", "wc_reply_reviewers": "25;75;76;46", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 77.75, 23.31710745354149 ], "wc_strengths_avg": [ 48.75, 23.920441049445557 ], "wc_weaknesses_avg": [ 186.0, 84.77322690566875 ], "wc_questions_avg": [ 270.5, 233.56530136131096 ], "wc_limitations_avg": [ 10.75, 6.94172168845741 ], "wc_review_avg": [ 593.75, 293.6514728381249 ], "wc_reply_reviewers_avg": [ 55.5, 21.33658829335187 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8397363898095611996&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "unibas.ch;fmi.ch", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Basel;Friedrich Miescher Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.unibas.ch;https://www.fmi.ch", "aff_unique_abbr": "UniBas;FMI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "MeGraph: Capturing Long-Range Interactions by Alternating Local and Hierarchical Aggregation on Multi-Scaled Graph Hierarchy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72684", "id": "7p5YWe8GqG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c9034f4f90fbfad5b80f47fe3dd6cf51-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7p5YWe8GqG", "openreview": "https://openreview.net/forum?id=7p5YWe8GqG", "poster": "/media/PosterPDFs/NeurIPS%202023/72684.png?t=1702158902.7053828", "slides": "https://nips.cc/virtual/2023/poster/72684", "video": "https://nips.cc/virtual/2023/poster/72684", "author_site": "Honghua Dong, Jiawei Xu, Yu Yang, Rui Zhao, Shiwen Wu, Chun Yuan, Xiu Li, Chris Maddison, Lei Han", "tldr": "", "abstract": "Graph neural networks, which typically exchange information between local neighbors, often struggle to capture long-range interactions (LRIs) within the graph. Building a graph hierarchy via graph pooling methods is a promising approach to address this challenge; however, hierarchical information propagation cannot entirely take over the role of local information aggregation. To balance locality and hierarchy, we integrate the local and hierarchical structures, represented by intra- and inter-graphs respectively, of a multi-scale graph hierarchy into a single mega graph. Our proposed MeGraph model consists of multiple layers alternating between local and hierarchical information aggregation on the mega graph. Each layer first performs local-aware message-passing on graphs of varied scales via the intra-graph edges, then fuses information across the entire hierarchy along the bidirectional pathways formed by inter-graph edges. By repeating this fusion process, local and hierarchical information could intertwine and complement each other. To evaluate our model, we establish a new Graph Theory Benchmark designed to assess LRI capture ability, in which MeGraph demonstrates dominant performance. Furthermore, MeGraph exhibits superior or equivalent performance to state-of-the-art models on the Long Range Graph Benchmark. The experimental results on commonly adopted real-world datasets further demonstrate the broad applicability of MeGraph.", "keywords": "Long-Range Interactions;Hierachical Structure;Multi-Scale;Graph Pooling;Graph Neural Networks(GNNs)", "primary_area": "", "supplementary_material": "/attachment/a7e1fb6de57c0c3dbc6a6e5174489a826da25ec2.zip", "author": "Honghua Dong;Jiawei Xu;Yu Yang;Rui Zhao;Shiwen Wu;Chun Yuan;Xiu Li;Chris J. Maddison;Lei Han", "authorids": "~Honghua_Dong1;~Jiawei_Xu1;~Yu_Yang18;~Rui_Zhao1;~Shiwen_Wu1;~Chun_Yuan1;~Xiu_Li1;~Chris_J._Maddison1;~Lei_Han1", "gender": "M;M;M;M;F;M;F;M;M", "homepage": "https://dhh1995.github.io/;https://github.com/jiawei415;http://google.com;https://ruizhaogit.github.io;;https://www.sigs.tsinghua.edu.cn/fg3/105064.jhtml;https://thusigsiclab.github.io/thu.github.io/introduction.html;https://www.leihan.org;http://www.cs.toronto.edu/~cmaddis/", "dblp": "238/2646;;;26/2578-11;;;13/1206-1;75/2307-1;139/1388", "google_scholar": "MrGN4oMAAAAJ;;;N1yNDnQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=fYdxi2sAAAAJ;https://scholar.google.com/citations?hl=zh-CN;Tz4_zi8AAAAJ;https://scholar.google.ca/citations?user=WjCG3owAAAAJ", "orcid": ";;;;;;0000-0003-0403-1923;;", "linkedin": ";;;rui-zhao-profile/;;;;;", "or_profile": "~Honghua_Dong1;~Jiawei_Xu1;~Yu_Yang18;~Rui_Zhao1;~Shiwen_Wu1;~Chun_Yuan1;~Xiu_Li1;~Lei_Han1;~Chris_J_Maddison1", "aff": "Department of Computer Science, University of Toronto;Tsinghua University;Shenzhen International Graduate School, Tsinghua University, Tsinghua University;Tencent AI Lab;Hong Kong University of Science and Technology;Tsinghua University;Tsinghua University;Tencent Robotics X;Google", "aff_domain": "cs.toronto.edu;tsinghua.edu.cn;mails.tsinghua.edu.cn;tencent.com;hkust.edu;tsinghua.edu.cn;tsinghua.edu.cn;tencent.com;google.com", "position": "PhD student;MS student;MS student;Researcher;PhD student;Full Professor;Professor;Principal Researcher;Researcher", "bibtex": "@inproceedings{\ndong2023megraph,\ntitle={MeGraph: Capturing Long-Range Interactions by Alternating Local and Hierarchical Aggregation on Multi-Scaled Graph Hierarchy},\nauthor={Honghua Dong and Jiawei Xu and Yu Yang and Rui Zhao and Shiwen Wu and Chun Yuan and Xiu Li and Chris J. Maddison and Lei Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7p5YWe8GqG}\n}", "github": "", "project": "", "reviewers": "Y5z6;qUTa;NF28", "pdf_size": 1006612, "rating": "5;6;8", "confidence": "5;4;3", "soundness": "3;2;4", "novelty": "3;2;4", "presentation": "3;2;4", "wc_summary": "74;182;178", "wc_strengths": "146;162;98", "wc_weaknesses": "219;210;51", "wc_questions": "3;27;93", "wc_limitations": "113;120;11", "wc_review": "555;701;431", "wc_reply_reviewers": "0;0;51", "wc_reply_authors": "0;0;23", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 144.66666666666666, 49.99555535800713 ], "wc_strengths_avg": [ 135.33333333333334, 27.19477073916152 ], "wc_weaknesses_avg": [ 160.0, 77.1621668954417 ], "wc_questions_avg": [ 41.0, 38.05259518088089 ], "wc_limitations_avg": [ 81.33333333333333, 49.815214097257034 ], "wc_review_avg": [ 562.3333333333334, 110.34894149419327 ], "wc_reply_reviewers_avg": [ 17.0, 24.041630560342615 ], "wc_reply_authors_avg": [ 7.666666666666667, 10.842303978193728 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.9819805060619659, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7352691935864061346&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.toronto.edu;tsinghua.edu.cn;mails.tsinghua.edu.cn;tencent.com;hkust.edu;tsinghua.edu.cn;tsinghua.edu.cn;tencent.com;google.com", "author_num": 9, "aff_unique_index": "0;1;1;2;3;1;1;2;4", "aff_unique_norm": "University of Toronto;Tsinghua University;Tencent;Hong Kong University of Science and Technology;Google", "aff_unique_dep": "Department of Computer Science;;Tencent AI Lab;;Google", "aff_unique_url": "https://www.utoronto.ca;https://www.tsinghua.edu.cn;https://ai.tencent.com;https://www.ust.hk;https://www.google.com", "aff_unique_abbr": "U of T;THU;Tencent AI Lab;HKUST;Google", "aff_campus_unique_index": "0;2;3;4", "aff_campus_unique": "Toronto;;Shenzhen;Hong Kong SAR;Mountain View", "aff_country_unique_index": "0;1;1;1;1;1;1;1;2", "aff_country_unique": "Canada;China;United States" }, { "title": "ExPT: Synthetic Pretraining for Few-Shot Experimental Design", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72683", "id": "7qfkImn0dL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8fab4407e1fe9006b39180525c0d323c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7qfkImn0dL", "openreview": "https://openreview.net/forum?id=7qfkImn0dL", "poster": "/media/PosterPDFs/NeurIPS%202023/72683.png?t=1701307962.2493308", "slides": "https://nips.cc/virtual/2023/poster/72683", "video": "https://nips.cc/virtual/2023/poster/72683", "author_site": "Tung Nguyen, Sudhanshu Agrawal, Aditya Grover", "tldr": "", "abstract": "Experimental design is a fundamental problem in many science and engineering fields. In this problem, sample efficiency is crucial due to the time, money, and safety costs of real-world design evaluations. Existing approaches either rely on active data collection or access to large, labeled datasets of past experiments, making them impractical in many real-world scenarios. In this work, we address the more challenging yet realistic setting of few-shot experimental design, where only a few labeled data points of input designs and their corresponding values are available. We approach this problem as a conditional generation task, where a model conditions on a few labeled examples and the desired output to generate an optimal input design. To this end, we introduce Experiment Pretrained Transformers (ExPT), a foundation model for few-shot experimental design that employs a novel combination of synthetic pretraining with in-context learning. In ExPT, we only assume knowledge of a finite collection of unlabelled data points from the input domain and pretrain a transformer neural network to optimize diverse synthetic functions defined over this domain. Unsupervised pretraining allows ExPT to adapt to any design task at test time in an in-context fashion by conditioning on a few labeled data points from the target task and generating the candidate optima. We evaluate ExPT on few-shot experimental design in challenging domains and demonstrate its superior generality and performance compared to existing methods. The source code is available at https://github.com/tung-nd/ExPT.git.", "keywords": "experimental design;few-shot;black-box optimization;synthetic pretraining;in-context learning;transformer", "primary_area": "", "supplementary_material": "/attachment/5ca65b85b2436dc9cda620d39662a1ed58f61f0e.pdf", "author": "Tung Nguyen;Sudhanshu Agrawal;Aditya Grover", "authorids": "~Tung_Nguyen2;sudhanshuagr27@g.ucla.edu;~Aditya_Grover1", "gender": "M;;M", "homepage": "https://tung-nd.github.io/;;https://aditya-grover.github.io", "dblp": ";;162/5052", "google_scholar": "https://scholar.google.com.vn/citations?user=F9mgq3sAAAAJ;;oOhnPUgAAAAJ", "orcid": ";;", "linkedin": "tung-nguyen-40703616b/;;", "or_profile": "~Tung_Nguyen2;sudhanshuagr27@g.ucla.edu;~Aditya_Grover1", "aff": "University of California, Los Angeles;;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;;ucla.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2023expt,\ntitle={Ex{PT}: Synthetic Pretraining for Few-Shot Experimental Design},\nauthor={Tung Nguyen and Sudhanshu Agrawal and Aditya Grover},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7qfkImn0dL}\n}", "github": "", "project": "", "reviewers": "5SUX;DuxH;Tttq;piUM", "pdf_size": 1437892, "rating": "6;6;6;7", "confidence": "3;2;2;3", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "52;154;35;60", "wc_strengths": "48;141;30;81", "wc_weaknesses": "69;392;83;92", "wc_questions": "42;31;83;64", "wc_limitations": "1;7;1;6", "wc_review": "212;725;232;303", "wc_reply_reviewers": "8;221;17;0", "wc_reply_authors": "0;358;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.25, 46.35393726534996 ], "wc_strengths_avg": [ 75.0, 42.26700841081611 ], "wc_weaknesses_avg": [ 159.0, 134.77202973911167 ], "wc_questions_avg": [ 55.0, 20.062402647738878 ], "wc_limitations_avg": [ 3.75, 2.7726341266023544 ], "wc_review_avg": [ 368.0, 208.86957653042722 ], "wc_reply_reviewers_avg": [ 61.5, 92.28353049163215 ], "wc_reply_authors_avg": [ 89.5, 155.0185472774145 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5884234573544752761&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.ucla.edu;;ucla.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "DP-Mix: Mixup-based Data Augmentation for Differentially Private Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72682", "id": "7rm3OcASkg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/28484cee66f27fa070796b631cc5242d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7rm3OcASkg", "openreview": "https://openreview.net/forum?id=7rm3OcASkg", "poster": "/media/PosterPDFs/NeurIPS%202023/72682.png?t=1701997308.6428378", "slides": "https://nips.cc/virtual/2023/poster/72682", "video": "https://nips.cc/virtual/2023/poster/72682", "author_site": "Wenxuan Bao, Francesco Pittaluga, Vijay Kumar B G, Vincent Bindschaedler", "tldr": "", "abstract": "Data augmentation techniques, such as image transformations and combinations, are highly effective at improving the generalization of computer vision models, especially when training data is limited. However, such techniques are fundamentally incompatible with differentially private learning approaches, due to the latter\u2019s built-in assumption that each training image\u2019s contribution to the learned model is bounded. In this paper, we investigate why naive applications of multi-sample data augmentation techniques, such as mixup, fail to achieve good performance and propose two novel data augmentation techniques specifically designed for the constraints of differentially private learning. Our first technique, DP-Mix_Self, achieves SoTA classification performance across a range of datasets and settings by performing mixup on self-augmented data. Our second technique, DP-Mix_Diff, further improves performance by incorporating synthetic data from a pre-trained diffusion model into the mixup process. We open-source the code at https://github.com/wenxuan-Bao/DP-Mix.", "keywords": "differential privacy;deep learning;data augmentation", "primary_area": "", "supplementary_material": "/attachment/9bdbdf9e8679498057fbadaf2c3fbb097bc03682.zip", "author": "Wenxuan Bao;Francesco Pittaluga;Vijay Kumar b g;Vincent Bindschaedler", "authorids": "~Wenxuan_Bao2;~Francesco_Pittaluga2;~Vijay_Kumar_b_g1;~Vincent_Bindschaedler1", "gender": "M;M;;", "homepage": "https://wenxuan-bao.github.io/;https://www.francescopittaluga.com/;;https://vbinds.ch", "dblp": ";167/5304;;117/2526", "google_scholar": "q9NyXmoAAAAJ;bIeCNNoAAAAJ;;uJMkuykAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Wenxuan_Bao2;~Francesco_Pittaluga2;~Vijay_Kumar_b_g1;~Vincent_Bindschaedler1", "aff": "University of Florida ;NEC-Labs;;University of Florida", "aff_domain": "cise.ufl.edu;nec-labs.com;;ufl.edu", "position": "PhD student;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nbao2023dpmix,\ntitle={{DP}-Mix: Mixup-based Data Augmentation for Differentially Private Learning},\nauthor={Wenxuan Bao and Francesco Pittaluga and Vijay Kumar b g and Vincent Bindschaedler},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7rm3OcASkg}\n}", "github": "", "project": "", "reviewers": "ayCk;DhpR;sSKD;GjeG", "pdf_size": 2658805, "rating": "4;5;5;6", "confidence": "4;3;5;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;4;4", "wc_summary": "107;154;125;93", "wc_strengths": "37;41;196;41", "wc_weaknesses": "58;422;134;23", "wc_questions": "84;24;671;236", "wc_limitations": "72;14;247;13", "wc_review": "358;655;1373;406", "wc_reply_reviewers": "0;458;106;11", "wc_reply_authors": "0;769;151;0", "reply_reviewers": "0;4;1;1", "reply_authors": "1;4;3;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 119.75, 22.796655456447994 ], "wc_strengths_avg": [ 78.75, 67.7140125823304 ], "wc_weaknesses_avg": [ 159.25, 156.91617985408644 ], "wc_questions_avg": [ 253.75, 252.9885125850579 ], "wc_limitations_avg": [ 86.5, 95.69352120180342 ], "wc_review_avg": [ 698.0, 405.6901527027739 ], "wc_reply_reviewers_avg": [ 143.75, 186.0542595588717 ], "wc_reply_authors_avg": [ 230.0, 317.2388689930665 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16079801148390001289&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 13, "email": "cise.ufl.edu;nec-labs.com;;ufl.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Florida;NEC Laboratories", "aff_unique_dep": ";", "aff_unique_url": "https://www.ufl.edu;https://www.nec-labs.com", "aff_unique_abbr": "UF;NEC-Labs", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "7sjLvCkEwq", "title": "Discriminative Entropy Clustering and its Relation to K-means and SVM", "track": "main", "status": "Reject", "tldr": "", "abstract": "Maximization of mutual information between the model's input and output is formally related to \"decisiveness\" and \"fairness'\" of the softmax predictions, motivating such unsupervised entropy-based losses for discriminative models. Recent self-labeling methods based on such losses represent the state of the art in deep clustering. First, we discuss a number of general properties of such entropy clustering methods, including their relation to K-means and unsupervised SVM-based techniques. Disproving some earlier published claims, we point out fundamental differences with K-means. On the other hand, we show similarity with SVM-based clustering allowing us to link explicit margin maximization to entropy clustering. Finally, we observe that the common form of cross-entropy is not robust to \npseudo-label errors. Our new loss addresses the problem and leads \nto a new EM algorithm improving the state of the art on many standard benchmarks.", "keywords": "discriminative clustering;Kmeans;entropy;SVM", "primary_area": "", "supplementary_material": "/attachment/192dd1421fa667a7e9ef8df2595eb7c12738c85e.pdf", "author": "Zhongwen Zhang;Yuri Boykov", "authorids": "~Zhongwen_Zhang1;~Yuri_Boykov1", "gender": "M;M", "homepage": ";https://cs.uwaterloo.ca/~yboykov/", "dblp": "02/10655;b/YuriBoykov", "google_scholar": ";h6_PdYsAAAAJ", "orcid": ";0000-0001-6374-1736", "linkedin": ";", "or_profile": "~Zhongwen_Zhang1;~Yuri_Boykov1", "aff": "University of Waterloo;University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca", "position": "PhD student;Professor", "bibtex": "@misc{\nzhang2023discriminative,\ntitle={Discriminative Entropy Clustering and its Relation to K-means and {SVM}},\nauthor={Zhongwen Zhang and Yuri Boykov},\nyear={2023},\nurl={https://openreview.net/forum?id=7sjLvCkEwq}\n}", "github": "", "project": "", "reviewers": "ed2e;aVDq;pLFZ;UHK9", "site": "https://openreview.net/forum?id=7sjLvCkEwq", "pdf_size": 931675, "rating": "3;4;5;7", "confidence": "4;4;3;4", "soundness": "3;2;3;3", "novelty": "2;2;3;4", "presentation": "2;1;2;4", "wc_summary": "41;75;58;150", "wc_strengths": "47;72;34;205", "wc_weaknesses": "138;447;47;178", "wc_questions": "26;195;7;41", "wc_limitations": "9;10;7;5", "wc_review": "261;799;153;579", "wc_reply_reviewers": "213;106;0;0", "wc_reply_authors": "972;593;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "3;2;1;1", "rating_avg": [ 4.75, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 81.0, 41.611296543126365 ], "wc_strengths_avg": [ 89.5, 68.06798072515447 ], "wc_weaknesses_avg": [ 202.5, 148.93035284991439 ], "wc_questions_avg": [ 67.25, 74.73411202389441 ], "wc_limitations_avg": [ 7.75, 1.920286436967152 ], "wc_review_avg": [ 448.0, 256.1034947047775 ], "wc_reply_reviewers_avg": [ 79.75, 88.26770360669865 ], "wc_reply_authors_avg": [ 391.25, 413.55977500235684 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.09759000729485331, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EwAExtpHD0gJ:scholar.google.com/&scioq=Discriminative+Entropy+Clustering+and+its+Relation+to+K-means+and+SVM&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Massively Multilingual Corpus of Sentiment Datasets and Multi-faceted Sentiment Classification Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73684", "id": "7tMgzSvopH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7945ab41f2aada1247a7c95e75cdf6c8-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=7tMgzSvopH", "openreview": "https://openreview.net/forum?id=7tMgzSvopH", "poster": "/media/PosterPDFs/NeurIPS%202023/73684.png?t=1697499066.7559469", "slides": "https://nips.cc/virtual/2023/poster/73684", "video": "https://nips.cc/virtual/2023/poster/73684", "author_site": "Lukasz Augustyniak, Szymon Wo\u017aniak, Marcin Gruza, Piotr Gramacki, Krzysztof Rajda, Miko\u0142aj Morzy, Tomasz Kajdanowicz", "tldr": "", "abstract": "Despite impressive advancements in multilingual corpora collection and model training, developing large-scale deployments of multilingual models still presents a significant challenge. This is particularly true for language tasks that are culture-dependent. One such example is the area of multilingual sentiment analysis, where affective markers can be subtle and deeply ensconced in culture.\n\nThis work presents the most extensive open massively multilingual corpus of datasets for training sentiment models. The corpus consists of 79 manually selected datasets from over 350 datasets reported in the scientific literature based on strict quality criteria. The corpus covers 27 languages representing 6 language families. Datasets can be queried using several linguistic and functional features. In addition, we present a multi-faceted sentiment classification benchmark summarizing hundreds of experiments conducted on different base models, training objectives, dataset collections, and fine-tuning strategies.", "keywords": "sentiment analysis;multilingual;dataset;benchmark;nlp", "primary_area": "", "supplementary_material": "/attachment/65853d3f8174695dc638369e436d615f14774bc0.zip", "author": "Lukasz Augustyniak;Szymon Wo\u017aniak;Marcin Gruza;Piotr Gramacki;Krzysztof Rajda;Miko\u0142aj Morzy;Tomasz Jan Kajdanowicz", "authorids": "~Lukasz_Augustyniak1;~Szymon_Wo\u017aniak2;~Marcin_Gruza2;~Piotr_Gramacki1;~Krzysztof_Rajda1;~Miko\u0142aj_Morzy1;~Tomasz_Jan_Kajdanowicz1", "gender": "M;M;M;M;M;M;M", "homepage": "https://www.lukaszaugustyniak.com/;;;;;http://www.cs.put.poznan.pl/mmorzy;http://www.kajdanowicz.com", "dblp": "https://dblp.uni-trier.de/pid/142/5323.html;;297/8679.html;305/4062;;45/1692.html;74/608", "google_scholar": "https://scholar.google.pl/citations?user=o3apDSYAAAAJ;;;;https://scholar.google.com/citations?hl=pl;https://scholar.google.pl/citations?user=UwvCguQAAAAJ;GOoaHHEAAAAJ", "orcid": "0000-0002-4090-4480;0000-0002-2047-1649;;0000-0002-4587-5586;0000-0002-5253-1474;0000-0002-2905-9538;0000-0002-8417-1012", "linkedin": "lukaszaugustyniak/;;;piotrgramacki;krzysztof-rajda/;mikolajmorzy/;kajdanowicz", "or_profile": "~Lukasz_Augustyniak1;~Szymon_Wo\u017aniak2;~Marcin_Gruza2;~Piotr_Gramacki1;~Krzysztof_Rajda1;~Miko\u0142aj_Morzy1;~Tomasz_Jan_Kajdanowicz1", "aff": "Wroclaw University of Science and Technology;;;Wroclaw University of Science and Technology;Brand24;Poznan University of Technology;Wroclaw University of Science and Technology", "aff_domain": "pwr.edu.pl;;;pwr.edu.pl;brand24.com;put.poznan.pl;pwr.edu.pl", "position": "PhD student;;;PhD student;Principal Researcher;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\naugustyniak2023massively,\ntitle={Massively Multilingual Corpus of Sentiment Datasets and Multi-faceted Sentiment Classification Benchmark},\nauthor={Lukasz Augustyniak and Szymon Wo{\\'z}niak and Marcin Gruza and Piotr Gramacki and Krzysztof Rajda and Miko{\\l}aj Morzy and Tomasz Jan Kajdanowicz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=7tMgzSvopH}\n}", "github": "", "project": "", "reviewers": "98Yo;HqfF;NixT;5m3s", "pdf_size": 1137013, "rating": "5;5;6;7", "confidence": "4;4;3;5", "wc_summary_and_contributions": "104;44;79;227", "wc_strengths": "37;39;13;39", "wc_improvement": "50;2;50;121", "wc_limitations": "70;84;6;13", "wc_correctness": "67;1;48;20", "wc_clarity": "75;1;22;12", "wc_relation_to_prior_work": "46;11;34;15", "wc_documentation": "59;1;42;17", "wc_additional_feedback": "1;1;1;1", "wc_review": "509;184;295;465", "wc_reply_reviewers": "0;112;0;0", "wc_reply_authors": "1315;131;2364;763", "reply_reviewers": "0;1;0;0", "reply_authors": "3;1;5;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 113.5, 68.90754675650557 ], "wc_strengths_avg": [ 32.0, 11.0 ], "wc_improvement_avg": [ 55.75, 42.463955303292224 ], "wc_limitations_avg": [ 43.25, 34.200694437394105 ], "wc_correctness_avg": [ 34.0, 25.347583711273153 ], "wc_clarity_avg": [ 27.5, 28.412145290350743 ], "wc_relation_to_prior_work_avg": [ 26.5, 14.221462653327892 ], "wc_documentation_avg": [ 29.75, 22.331312097590683 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 363.25, 130.75239003551712 ], "wc_reply_reviewers_avg": [ 28.0, 48.49742261192856 ], "wc_reply_authors_avg": [ 1143.25, 819.903767219056 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5786247075752580548&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "pwr.edu.pl;;;pwr.edu.pl;brand24.com;put.poznan.pl;pwr.edu.pl", "author_num": 7, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Wroclaw University of Science and Technology;Brand24;Poznan University of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.pwr.edu.pl;https://www.brand24.pl;https://www.put.poznan.pl/", "aff_unique_abbr": "WUST;;PUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Poland" }, { "title": "Federated Learning with Manifold Regularization and Normalized Update Reaggregation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72681", "id": "7uPnuoYqac", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/acf2b98eeb09b21968c2de6b1c6952e9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7uPnuoYqac", "openreview": "https://openreview.net/forum?id=7uPnuoYqac", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72681", "video": "https://nips.cc/virtual/2023/poster/72681", "author_site": "Xuming An, Li Shen, Han Hu, Yong Luo", "tldr": "", "abstract": "Federated Learning (FL) is an emerging collaborative machine learning framework where multiple clients train the global model without sharing their own datasets. \nIn FL, the model inconsistency caused by the local data heterogeneity across clients results in the near-orthogonality of client updates, which leads to the global update norm reduction and slows down the convergence. Most previous works focus on eliminating the difference of parameters (or gradients) between the local and global models, which may fail to reflect the model inconsistency due to the complex structure of the machine learning model and the Euclidean space's limitation in meaningful geometric representations.\nIn this paper, we propose FedMRUR by adopting the manifold model fusion scheme and a new global optimizer to alleviate the negative impacts.\nConcretely, FedMRUR adopts a hyperbolic graph manifold regularizer enforcing the representations of the data in the local and global models are close to each other in a low-dimensional subspace. \nBecause the machine learning model has the graph structure, the distance in hyperbolic space can reflect the model bias better than the Euclidean distance.\nIn this way, FedMRUR exploits the manifold structures of the representations to significantly reduce the model inconsistency.\nFedMRUR also aggregates the client updates norms as the global update norm, which can appropriately enlarge each client's contribution to the global update, thereby mitigating the norm reduction introduced by the near-orthogonality of client updates.\nFurthermore, we theoretically prove that our algorithm can achieve a linear speedup property $\\mathcal{O}(\\frac{1}{\\sqrt{SKT}})$ for non-convex setting under partial client participation, where $S$ is the participated clients number, $K$ is the local interval and $T$ is the total number of communication rounds.\nExperiments demonstrate that FedMRUR can achieve a new state-of-the-art (SOTA) accuracy with less communication.", "keywords": "federated learning; manifold regularization; update reaggregation", "primary_area": "", "supplementary_material": "/attachment/16b877a9bcc5849af8cdf3d3d6daa14ca702cd63.pdf", "author": "Xuming An;Li Shen;Han Hu;Yong Luo", "authorids": "~Xuming_An1;~Li_Shen1;~Han_Hu6;~Yong_Luo2", "gender": "M;M;;M", "homepage": ";https://sites.google.com/site/mathshenli/home;;", "dblp": ";91/3680-8;;57/5272-2.html", "google_scholar": ";yVhgENIAAAAJ;;zb1oVGIAAAAJ", "orcid": "0000-0003-1032-5346;;;", "linkedin": ";;;", "or_profile": "~Xuming_An1;~Li_Shen1;~Han_Hu6;~Yong_Luo2", "aff": "Beijing Institute of Technology;JD Explore Academy;;Wuhan University", "aff_domain": "bit.edu.cn;jd.com;;whu.edu.cn", "position": "PhD student;Researcher;;Professor", "bibtex": "@inproceedings{\nan2023federated,\ntitle={Federated Learning with Manifold Regularization and Normalized Update Reaggregation},\nauthor={Xuming An and Li Shen and Han Hu and Yong Luo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7uPnuoYqac}\n}", "github": "", "project": "", "reviewers": "EAwW;jAXP;mqsa;aqu4;HiX9", "pdf_size": 1792083, "rating": "5;5;5;7;7", "confidence": "4;3;3;3;3", "soundness": "3;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "1;2;2;3;3", "wc_summary": "38;46;91;105;113", "wc_strengths": "23;28;53;74;119", "wc_weaknesses": "39;53;111;2;15", "wc_questions": "364;245;5;80;49", "wc_limitations": "22;9;1;5;6", "wc_review": "486;381;261;266;302", "wc_reply_reviewers": "723;151;0;0;0", "wc_reply_authors": "1547;1254;36;51;45", "reply_reviewers": "3;3;0;0;0", "reply_authors": "6;5;2;2;2", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 78.6, 30.806492822130856 ], "wc_strengths_avg": [ 59.4, 34.977707186149296 ], "wc_weaknesses_avg": [ 44.0, 37.94733192202055 ], "wc_questions_avg": [ 148.6, 134.86081714122898 ], "wc_limitations_avg": [ 8.6, 7.172168430816443 ], "wc_review_avg": [ 339.2, 85.03505159638583 ], "wc_reply_reviewers_avg": [ 174.8, 280.2694417877197 ], "wc_reply_authors_avg": [ 586.6, 670.9916840021194 ], "reply_reviewers_avg": [ 1.2, 1.4696938456699067 ], "reply_authors_avg": [ 3.4, 1.7435595774162693 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13208567720469934204&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "bit.edu.cn;jd.com;;whu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Beijing Institute of Technology;JD;Wuhan University", "aff_unique_dep": ";JD Explore Academy;", "aff_unique_url": "http://www.bit.edu.cn/;;http://www.whu.edu.cn/", "aff_unique_abbr": "BIT;;WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China;" }, { "title": "HyTrel: Hypergraph-enhanced Tabular Data Representation Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72680", "id": "7vqlzODS28", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/66178beae8f12fcd48699de95acc1152-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7vqlzODS28", "openreview": "https://openreview.net/forum?id=7vqlzODS28", "poster": "/media/PosterPDFs/NeurIPS%202023/72680.png?t=1701742948.8212645", "slides": "https://nips.cc/virtual/2023/poster/72680", "video": "https://nips.cc/virtual/2023/poster/72680", "author_site": "Pei Chen, Soumajyoti Sarkar, Leonard Lausen, Balasubramaniam Srinivasan, Sheng Zha, Ruihong Huang, George Karypis", "tldr": "", "abstract": "Language models pretrained on large collections of tabular data have demonstrated their effectiveness in several downstream tasks.\nHowever, many of these models do not take into account the row/column permutation invariances, hierarchical structure, etc. that exist in tabular data. To alleviate these limitations, we propose HyTrel, a tabular language model, that captures the permutation invariances and three more structural properties of tabular data by using hypergraphs--where the table cells make up the nodes and the cells occurring jointly together in each row, column, and the entire table are used to form three different types of hyperedges. We show that\nHyTrel is maximally invariant under certain conditions for tabular data, i.e., two tables obtain the same representations via HyTrel\niff the two tables are identical up to permutation. Our empirical results demonstrate that HyTrel consistently outperforms other competitive baselines on four downstream tasks with minimal pretraining, illustrating the advantages of incorporating inductive biases associated with tabular data into the representations. Finally, our qualitative analyses showcase that HyTrel can assimilate the table structure to generate robust representations for the cells, rows, columns, and the entire table.", "keywords": "Tabular Language Model;Tabular Representation Learning;Pretraining;Tabular Data;Table;Hypergraph", "primary_area": "", "supplementary_material": "", "author": "Pei Chen;Soumajyoti Sarkar;Leonard Lausen;Balasubramaniam Srinivasan;Sheng Zha;Ruihong Huang;George Karypis", "authorids": "~Pei_Chen2;~Soumajyoti_Sarkar1;~Leonard_Lausen1;~Balasubramaniam_Srinivasan1;~Sheng_Zha1;~Ruihong_Huang1;~George_Karypis1", "gender": "M;M;;;M;F;M", "homepage": "https://brickee.github.io/;https://soumajyoti.github.io;;;https://github.com/szha;https://people.engr.tamu.edu/huangrh/index.html;", "dblp": "98/4148;171/7121;;230/3792;218/5471;42/4811.html;", "google_scholar": "9sOFHvcAAAAJ;DGN-VVUAAAAJ;;uM4EhgEAAAAJ;;https://scholar.google.com.tw/citations?user=NU2aHWUAAAAJ;ElqwScwAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;shengzha/;;", "or_profile": "~Pei_Chen2;~Soumajyoti_Sarkar1;~Leonard_Lausen1;~Balasubramaniam_Srinivasan1;~Sheng_Zha1;~Ruihong_Huang1;~George_Karypis1", "aff": "Texas A&M University - College Station;Amazon;;Amazon;Amazon;Texas A&M University;University of Minnesota, Minneapolis", "aff_domain": "tamu.edu;amazon.com;;amazon.com;amazon.com;cse.tamu.edu;umn.edu", "position": "PhD student;ML Researcher;;Senior Applied Scientist;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023hytrel,\ntitle={HyTrel: Hypergraph-enhanced Tabular Data Representation Learning},\nauthor={Pei Chen and Soumajyoti Sarkar and Leonard Lausen and Balasubramaniam Srinivasan and Sheng Zha and Ruihong Huang and George Karypis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7vqlzODS28}\n}", "github": "", "project": "", "reviewers": "SuH5;kUWt;QZu3;4rho;7wTp", "pdf_size": 2289498, "rating": "6;6;6;7;7", "confidence": "4;3;1;3;3", "soundness": "3;3;3;3;3", "novelty": "3;3;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "146;120;47;30;95", "wc_strengths": "41;69;62;71;86", "wc_weaknesses": "135;47;89;114;20", "wc_questions": "6;82;1;75;17", "wc_limitations": "6;7;1;18;19", "wc_review": "334;325;200;308;237", "wc_reply_reviewers": "11;65;14;9;21", "wc_reply_authors": "18;31;18;18;18", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.6, 43.5458379182213 ], "wc_strengths_avg": [ 65.8, 14.661514246489002 ], "wc_weaknesses_avg": [ 81.0, 42.29893615683496 ], "wc_questions_avg": [ 36.2, 34.99371372118141 ], "wc_limitations_avg": [ 10.2, 7.0823724838503095 ], "wc_review_avg": [ 280.8, 52.859814604290854 ], "wc_reply_reviewers_avg": [ 24.0, 20.89976076418101 ], "wc_reply_authors_avg": [ 20.6, 5.2 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.16666666666666666, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6825879101125505895&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "tamu.edu;amazon.com;;amazon.com;amazon.com;cse.tamu.edu;umn.edu", "author_num": 7, "aff_unique_index": "0;1;1;1;0;2", "aff_unique_norm": "Texas A&M University;Amazon;University of Minnesota", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.tamu.edu;https://www.amazon.com;https://www.minnesota.edu", "aff_unique_abbr": "TAMU;Amazon;UMN", "aff_campus_unique_index": "0;2", "aff_campus_unique": "College Station;;Minneapolis", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "7w4RGjzd81", "title": "Unbiased Watermark for Large Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "The recent advancements in large language models (LLMs) have sparked a growing apprehension regarding the potential misuse. One approach to mitigating this risk is to incorporate watermarking techniques into LLMs, allowing for the tracking and attribution of model outputs. This study examines a crucial aspect of watermarking: how significantly watermarks impact the quality of model-generated outputs. Previous studies have suggested a trade-off between watermark strength and output quality. However, our research demonstrates that it is possible to integrate watermarks without affecting the output probability distribution with appropriate implementation. We refer to this type of watermark as an unbiased watermark. This has significant implications for the use of LLMs, as it becomes impossible for users to discern whether a service provider has incorporated watermarks or not. Furthermore, the presence of watermarks does not compromise the performance of the model in downstream tasks, ensuring that the overall utility of the language model is preserved. Our findings contribute to the ongoing discussion around responsible AI development, suggesting that unbiased watermarks can serve as an effective means of tracking and attributing model outputs without sacrificing output quality.", "keywords": "watermark;language model", "primary_area": "", "supplementary_material": "/attachment/aceb66ac1a6f7b22393b1e916bd72f2d80016bbb.zip", "author": "Zhengmian Hu;Lichang Chen;Xidong Wu;Yihan Wu;Hongyang Zhang;Heng Huang", "authorids": "~Zhengmian_Hu1;~Lichang_Chen2;~Xidong_Wu1;~Yihan_Wu1;~Hongyang_Zhang1;~Heng_Huang1", "gender": "M;Non-Binary;M;M;M;M", "homepage": "https://www.umd.edu/;https://www.linkedin.com/in/xidong-wu-22924112b/;https://yihwu.github.io/;https://hongyanz.github.io/;https://www.cs.umd.edu/~heng/;", "dblp": "285/4945;37/10581;;23/10537-1;03/281;151/6212", "google_scholar": "4eXiWWgAAAAJ;rj21L7sAAAAJ;cajTg_wAAAAJ;https://scholar.google.com/citations?hl=en;4OqLaDwAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0003-0316-146X;;;;;", "linkedin": ";;;;;lichang-chen-b7a506173/", "or_profile": "~Zhengmian_Hu1;~Xidong_Wu1;~Yihan_Wu1;~Hongyang_Zhang1;~Heng_Huang1;~LICHANG_CHEN1", "aff": "University of Pittsburgh;University of Pittsburgh;University of Pittsburgh;School of Computer Science, University of Waterloo;University of Pittsburgh;Department of Computer Science, University of Maryland, College Park", "aff_domain": "pitt.edu;pitt.edu;pitt.edu;uwaterloo.ca;pitt.edu;cs.umd.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Full Professor;PhD student", "bibtex": "@misc{\nhu2023unbiased,\ntitle={Unbiased Watermark for Large Language Models},\nauthor={Zhengmian Hu and Lichang Chen and Xidong Wu and Yihan Wu and Hongyang Zhang and Heng Huang},\nyear={2023},\nurl={https://openreview.net/forum?id=7w4RGjzd81}\n}", "github": "", "project": "", "reviewers": "WEht;qjwP;rGHj;3JdJ", "site": "https://openreview.net/forum?id=7w4RGjzd81", "pdf_size": 435009, "rating": "3;5;5;6", "confidence": "4;4;3;2", "soundness": "2;2;3;2", "novelty": "2;3;3;3", "presentation": "2;2;2;3", "wc_summary": "54;77;104;86", "wc_strengths": "65;44;45;82", "wc_weaknesses": "120;123;51;111", "wc_questions": "11;38;127;13", "wc_limitations": "4;26;4;1", "wc_review": "254;308;331;293", "wc_reply_reviewers": "0;78;0;29", "wc_reply_authors": "281;205;0;66", "reply_reviewers": "0;1;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 80.25, 18.005207580030838 ], "wc_strengths_avg": [ 59.0, 15.700318468107582 ], "wc_weaknesses_avg": [ 101.25, 29.345996319770776 ], "wc_questions_avg": [ 47.25, 47.256613293802594 ], "wc_limitations_avg": [ 8.75, 10.034316120194738 ], "wc_review_avg": [ 296.5, 28.02231253840411 ], "wc_reply_reviewers_avg": [ 26.75, 31.869852525545205 ], "wc_reply_authors_avg": [ 138.0, 110.86703748184128 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7608859102526822, "gs_citation": 129, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3247248020044552104&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;1;0;2", "aff_unique_norm": "University of Pittsburgh;University of Waterloo;University of Maryland, College Park", "aff_unique_dep": ";School of Computer Science;Department of Computer Science", "aff_unique_url": "https://www.pitt.edu;https://uwaterloo.ca;https://www/umd.edu", "aff_unique_abbr": "Pitt;UWaterloo;UMD", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Waterloo;College Park", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "A Theory of Multimodal Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72679", "id": "7xlrdSOm3g", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b316495425d076b4abffc065a64c2cca-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7xlrdSOm3g", "openreview": "https://openreview.net/forum?id=7xlrdSOm3g", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72679", "video": "https://nips.cc/virtual/2023/poster/72679", "tldr": "", "abstract": "Human perception of the empirical world involves recognizing the diverse appearances, or 'modalities', of underlying objects. Despite the longstanding consideration of this perspective in philosophy and cognitive science, the study of multimodality remains relatively under-explored within the field of machine learning. Nevertheless, current studies of multimodal machine learning are limited to empirical practices, lacking theoretical foundations beyond heuristic arguments. \nAn intriguing finding from the practice of multimodal learning is that a model trained on multiple modalities can outperform a finely-tuned unimodal model, even on unimodal tasks. This paper provides a theoretical framework that explains this phenomenon, by studying generalization properties of multimodal learning algorithms. We demonstrate that multimodal learning allows for a superior generalization bound compared to unimodal learning, up to a factor of $O(\\sqrt{n})$, where $n$ represents the sample size. Such advantage occurs when both connection and heterogeneity exist between the modalities.", "keywords": "Multimodal Learning", "primary_area": "", "supplementary_material": "/attachment/2bbfb214bd7a09db0fefa94d5c4773b8b14e5a4d.pdf", "author": "Zhou Lu", "authorids": "~Zhou_Lu1", "gender": "", "homepage": "https://leozoroaster.github.io/", "dblp": "68/11524", "google_scholar": "17_nX_kAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Zhou_Lu1", "aff": "Princeton University", "aff_domain": "princeton.edu", "position": "PhD student", "bibtex": "@inproceedings{\nlu2023a,\ntitle={A Theory of Multimodal Learning},\nauthor={Zhou Lu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7xlrdSOm3g}\n}", "github": "", "project": "", "reviewers": "XfDh;iaEv;wXhy;TbhH;PsHQ", "pdf_size": 264263, "rating": "5;6;7;8;8", "confidence": "2;4;3;2;3", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "2;2;3;4;3", "wc_summary": "84;47;80;61;52", "wc_strengths": "40;53;9;61;49", "wc_weaknesses": "156;338;4;94;58", "wc_questions": "1;56;499;13;74", "wc_limitations": "6;9;13;10;54", "wc_review": "287;503;605;239;287", "wc_reply_reviewers": "149;328;44;0;15", "wc_reply_authors": "28;654;19;0;9", "reply_reviewers": "1;3;1;0;1", "reply_authors": "2;4;2;1;2", "rating_avg": [ 6.8, 1.16619037896906 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 64.8, 14.79729705047513 ], "wc_strengths_avg": [ 42.4, 18.017769007288333 ], "wc_weaknesses_avg": [ 130.0, 115.14859964411204 ], "wc_questions_avg": [ 128.6, 187.13268020311148 ], "wc_limitations_avg": [ 18.4, 17.939899665271266 ], "wc_review_avg": [ 384.2, 143.41882721595516 ], "wc_reply_reviewers_avg": [ 107.2, 122.05474181694049 ], "wc_reply_authors_avg": [ 142.0, 256.17259806622565 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.0458349248514106, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2388377514755694417&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "princeton.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "The Target-Charging Technique for Privacy Analysis across Interactive Computations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72678", "id": "7yjsYrajlt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c3fe2a07ec47b89c50e89706d2e23358-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7yjsYrajlt", "openreview": "https://openreview.net/forum?id=7yjsYrajlt", "poster": "/media/PosterPDFs/NeurIPS%202023/72678.png?t=1701401368.9746542", "slides": "https://nips.cc/virtual/2023/poster/72678", "video": "https://nips.cc/virtual/2023/poster/72678", "author_site": "Edith Cohen, Xin Lyu", "tldr": "", "abstract": "We propose the \\emph{Target Charging Technique} (TCT), a unified privacy analysis framework for interactive settings where a sensitive dataset is accessed multiple times using differentially private algorithms. Unlike traditional composition, where privacy guarantees deteriorate quickly with the number of accesses, TCT allows computations that don't hit a specified \\emph{target}, often the vast majority, to be essentially free (while incurring instead a small overhead on those that do hit their targets). TCT generalizes tools such as the sparse vector technique and top-k selection from private candidates and extends their remarkable privacy enhancement benefits from noisy Lipschitz functions to general private algorithms.", "keywords": "Differential Privacy; Adaptive Composition; Sparse Vector Technique", "primary_area": "", "supplementary_material": "/attachment/d8e21a613f41ff249beda4dc4fd389bfc3f5a349.pdf", "author": "Edith Cohen;Xin Lyu", "authorids": "~Edith_Cohen1;~Xin_Lyu1", "gender": "F;M", "homepage": "http://www.cohenwang.com/edith/;https://people.eecs.berkeley.edu/~xinlyu/", "dblp": "40/1039;", "google_scholar": "O-TV6OgAAAAJ;", "orcid": "0000-0002-3926-8237;", "linkedin": ";", "or_profile": "~Edith_Cohen1;~Xin_Lyu1", "aff": "Google;University of California, Berkeley", "aff_domain": "google.com;berkeley.edu", "position": "Research Scientist;PhD student", "bibtex": "@inproceedings{\ncohen2023the,\ntitle={The Target-Charging Technique for Privacy Analysis across Interactive Computations},\nauthor={Edith Cohen and Xin Lyu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7yjsYrajlt}\n}", "github": "", "project": "", "reviewers": "xrUp;ot2S;fHHh;o37L;MAFe;5m6N", "pdf_size": 585071, "rating": "6;6;7;7;7;8", "confidence": "2;2;3;2;4;1", "soundness": "3;3;4;4;3;4", "novelty": "3;2;3;3;3;4", "presentation": "3;2;4;3;2;4", "wc_summary": "106;70;109;66;221;11", "wc_strengths": "30;67;48;80;69;12", "wc_weaknesses": "27;68;83;33;59;6", "wc_questions": "22;69;65;2;59;31", "wc_limitations": "2;1;27;1;7;6", "wc_review": "187;275;332;182;415;66", "wc_reply_reviewers": "16;22;16;0;45;0", "wc_reply_authors": "2;2;2;0;15;0", "reply_reviewers": "1;1;1;0;1;0", "reply_authors": "2;2;2;1;2;1", "rating_avg": [ 6.833333333333333, 0.6871842709362768 ], "confidence_avg": [ 2.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 97.16666666666667, 64.14415189416898 ], "wc_strengths_avg": [ 51.0, 23.77673933350268 ], "wc_weaknesses_avg": [ 46.0, 26.30589287593181 ], "wc_questions_avg": [ 41.333333333333336, 24.716166549222166 ], "wc_limitations_avg": [ 7.333333333333333, 9.104333522498441 ], "wc_review_avg": [ 242.83333333333334, 113.07433641439404 ], "wc_reply_reviewers_avg": [ 16.5, 15.20690632574555 ], "wc_reply_authors_avg": [ 3.5, 5.220153254455275 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.1714985851425088, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18384746943136884164&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "google.com;berkeley.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Google;University of California, Berkeley", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.berkeley.edu", "aff_unique_abbr": "Google;UC Berkeley", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Mountain View;Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "7ym1abRTF3", "title": "Leveraging automatic strategy discovery to teach people how to select better projects", "track": "main", "status": "Reject", "tldr": "", "abstract": "Human decisions are often suboptimal due to limited cognitive resources and time constraints. Prior work has shown that errors in human decision-making can in part be avoided by leveraging artificial intelligence to automatically discover efficient decision strategies and teach them to people. So far, this line of research has been limited to simplified decision problems that are not directly related to the problems people face in the real world. Current methods are mainly limited by the computational difficulties of deriving efficient decision strategies for complex real-world problems through metareasoning. To bridge this gap, we model a real-world decision problem in which people have to choose which project to pursue, and develop a metareasoning method that enables us to discover and teach efficient decision strategies in this setting. Our main contributions are: formulating the metareasoning problem of deciding how to select a project, developing a metareasoning method that can automatically discover near-optimal project selection strategies, and developing an intelligent tutor that teaches people the discovered strategies. We test our strategy discovery method on a computational benchmark and experimentally evaluate its utility for improving human decision-making. In the benchmark, we demonstrate that our method outperforms PO-UCT while also being more computationally efficient. In the experiment, we taught the discovered planning strategies to people using an intelligent tutor. People who were trained by our tutor showed a significant improvement in their decision strategies compared to people who tried to discover good decision strategies on their own or practiced with an equivalent tutor that did not reveal the optimal strategy. Project selection is a very consequential high-stakes decision regularly faced by organizations, companies, and individuals. Our results indicate that our method can successfully improve human decision-making in naturalistic settings similar to the project selection decisions people face in the real-world. This is a first step towards applying strategy discovery methods to improve people's decisions in the real-world. ", "keywords": "metareasoning;automatic strategy discovery;decision-making;reinforcement learning;partially observable Markov decision process;project selection;metalevel Markov decision process;judge advisor systems;resource rationality", "primary_area": "", "supplementary_material": "/attachment/12f55e81f77440cf0040abcbfae8047223978129.zip", "author": "Lovis Heindrich;Falk Lieder", "authorids": "~Lovis_Heindrich1;~Falk_Lieder1", "gender": ";M", "homepage": ";https://re.is.mpg.de/person/flieder", "dblp": "284/8336;126/1714", "google_scholar": "WMBFQ4gAAAAJ;https://scholar.google.de/citations?user=JscQvlUAAAAJ", "orcid": ";0000-0003-2746-6110", "linkedin": ";", "or_profile": "~Lovis_Heindrich1;~Falk_Lieder1", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Max-Planck-Institute for Intelligent Systems, Max-Planck Institute", "aff_domain": "uni-tuebingen.de;is.mpg.de", "position": "PhD student;Principal Researcher", "bibtex": "@misc{\nheindrich2023leveraging,\ntitle={Leveraging automatic strategy discovery to teach people how to select better projects},\nauthor={Lovis Heindrich and Falk Lieder},\nyear={2023},\nurl={https://openreview.net/forum?id=7ym1abRTF3}\n}", "github": "", "project": "", "reviewers": "RmKP;teWD;3k1i;SQT4;uoEm", "site": "https://openreview.net/forum?id=7ym1abRTF3", "pdf_size": 389571, "rating": "3;3;4;5;5", "confidence": "4;4;3;4;5", "soundness": "3;4;2;4;3", "novelty": "2;2;1;3;2", "presentation": "3;4;2;4;3", "wc_summary": "98;90;157;103;153", "wc_strengths": "35;61;17;48;114", "wc_weaknesses": "95;368;357;196;517", "wc_questions": "193;35;65;94;59", "wc_limitations": "2;4;4;24;4", "wc_review": "423;558;600;465;847", "wc_reply_reviewers": "0;64;43;0;400", "wc_reply_authors": "0;231;0;0;180", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;2;1;1;2", "rating_avg": [ 4.0, 0.8944271909999159 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 120.2, 28.74299914761854 ], "wc_strengths_avg": [ 55.0, 32.89376840679705 ], "wc_weaknesses_avg": [ 306.6, 146.6868773953553 ], "wc_questions_avg": [ 89.2, 55.195652002671366 ], "wc_limitations_avg": [ 7.6, 8.2365041127896 ], "wc_review_avg": [ 578.6, 148.3490478567355 ], "wc_reply_reviewers_avg": [ 101.4, 151.35071853149557 ], "wc_reply_authors_avg": [ 82.2, 101.9576382621724 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.35355339059327373, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PCScDJeaHhwJ:scholar.google.com/&scioq=Leveraging+automatic+strategy+discovery+to+teach+people+how+to+select+better+projects&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;Max-Planck-Institute for Intelligent Systems", "aff_unique_dep": ";Intelligent Systems", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.mpi-is.mpg.de", "aff_unique_abbr": "Uni T\u00fcbingen;MPI-IS", "aff_campus_unique_index": "0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "LD2: Scalable Heterophilous Graph Neural Network with Decoupled Embeddings", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72677", "id": "7zkFc9TGKz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/206191b9b7349e2743d98d855dec9e58-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=7zkFc9TGKz", "openreview": "https://openreview.net/forum?id=7zkFc9TGKz", "poster": "/media/PosterPDFs/NeurIPS%202023/72677.png?t=1699681389.2681623", "slides": "https://nips.cc/virtual/2023/poster/72677", "video": "https://nips.cc/virtual/2023/poster/72677", "author_site": "Ningyi Liao, Siqiang Luo, Xiang Li, Jieming Shi", "tldr": "", "abstract": "Heterophilous Graph Neural Network (GNN) is a family of GNNs that specializes in learning graphs under heterophily, where connected nodes tend to have different labels. Most existing heterophilous models incorporate iterative non-local computations to capture node relationships. However, these approaches have limited application to large-scale graphs due to their high computational costs and challenges in adopting minibatch schemes. In this work, we study the scalability issues of heterophilous GNN and propose a scalable model, LD2, which simplifies the learning process by decoupling graph propagation and generating expressive embeddings prior to training. Theoretical analysis demonstrates that LD2 achieves optimal time complexity in training, as well as a memory footprint that remains independent of the graph scale. We conduct extensive experiments to showcase that our model is capable of lightweight minibatch training on large-scale heterophilous graphs, with up to $15\\times$ speed improvement and efficient memory utilization, while maintaining comparable or better performance than the baselines.", "keywords": "Graph neural networks;Scalability;Heterophilous Graphs;Non-Homophily", "primary_area": "", "supplementary_material": "/attachment/e60e3a2856fbfc5287aa41c12df0419b0e8428f8.pdf", "author": "Ningyi Liao;Siqiang Luo;Xiang Li;Jieming Shi", "authorids": "~Ningyi_Liao1;~Siqiang_Luo1;~Xiang_Li24;~Jieming_Shi1", "gender": "M;M;M;Not Specified", "homepage": "https://nyliao.github.io;https://siqiangluo.com/;https://lixiang3776.github.io;https://www4.comp.polyu.edu.hk/~jiemshi/", "dblp": "274/2346;117/5965;40/1491-67.html;147/1237-1.html", "google_scholar": "RadPeSAAAAAJ;ZDwbMg4AAAAJ;JnxxNtsAAAAJ;", "orcid": "0000-0003-3176-4401;0000-0001-8197-0903;0009-0003-0142-2483;0000-0002-0465-1551", "linkedin": "ningyi-liao-94310a2ba;;;", "or_profile": "~Ningyi_Liao1;~Siqiang_Luo1;~Xiang_Li24;~Jieming_Shi1", "aff": "Nanyang Technological University;Nanyang Technological University;East China Normal University;The Hong Kong Polytechnic University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ecnu.edu.cn;polyu.edu.hk", "position": "PhD student;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nliao2023ld,\ntitle={{LD}2: Scalable Heterophilous Graph Neural Network with Decoupled Embeddings},\nauthor={Ningyi Liao and Siqiang Luo and Xiang Li and Jieming Shi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=7zkFc9TGKz}\n}", "github": "", "project": "", "reviewers": "4icY;aZqt;K3oj;ZDfg", "pdf_size": 1432941, "rating": "5;5;5;7", "confidence": "5;4;2;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "54;188;61;86", "wc_strengths": "75;103;133;104", "wc_weaknesses": "217;127;102;2", "wc_questions": "127;107;98;91", "wc_limitations": "6;13;6;1", "wc_review": "479;538;400;284", "wc_reply_reviewers": "140;359;25;13", "wc_reply_authors": "453;790;15;22", "reply_reviewers": "1;3;1;1", "reply_authors": "2;4;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 97.25, 53.72790243439623 ], "wc_strengths_avg": [ 103.75, 20.51066795596867 ], "wc_weaknesses_avg": [ 112.0, 76.56696415556777 ], "wc_questions_avg": [ 105.75, 13.516193990913271 ], "wc_limitations_avg": [ 6.5, 4.272001872658765 ], "wc_review_avg": [ 425.25, 95.11933294551639 ], "wc_reply_reviewers_avg": [ 134.25, 138.90891800024934 ], "wc_reply_authors_avg": [ 320.0, 324.1982418212659 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14718352581420771213&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ntu.edu.sg;ntu.edu.sg;ecnu.edu.cn;polyu.edu.hk", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Nanyang Technological University;East China Normal University;Hong Kong Polytechnic University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu.sg;http://www.ecnu.edu.cn;https://www.polyu.edu.hk", "aff_unique_abbr": "NTU;ECNU;PolyU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "Singapore;China" }, { "title": "A Simple Solution for Offline Imitation from Observations and Examples with Possibly Incomplete Trajectories", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72676", "id": "805CW5w2CY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0203f489345567b4a048c38f507cdbfa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=805CW5w2CY", "openreview": "https://openreview.net/forum?id=805CW5w2CY", "poster": "/media/PosterPDFs/NeurIPS%202023/72676.png?t=1701319582.7192044", "slides": "https://nips.cc/virtual/2023/poster/72676", "video": "https://nips.cc/virtual/2023/poster/72676", "author_site": "Kai Yan, Alex Schwing, Yu-Xiong Wang", "tldr": "", "abstract": "Offline imitation from observations aims to solve MDPs where only task-specific expert states and task-agnostic non-expert state-action pairs are available. Offline imitation is useful in real-world scenarios where arbitrary interactions are costly and expert actions are unavailable. The state-of-the-art \u2018DIstribution Correction Estimation\u2019 (DICE) methods minimize divergence of state occupancy between expert and learner policies and retrieve a policy with weighted behavior cloning; however, their results are unstable when learning from incomplete trajectories, due to a non-robust optimization in the dual domain. To address the issue, in this paper, we propose Trajectory-Aware Imitation Learning from Observations (TAILO). TAILO uses a discounted sum along the future trajectory as the weight for weighted behavior cloning. The terms for the sum are scaled by the output of a discriminator, which aims to identify expert states. Despite simplicity, TAILO works well if there exist trajectories or segments of expert behavior in the task-agnostic data, a common assumption in prior work. In experiments across multiple testbeds, we find TAILO to be more robust and effective, particularly with incomplete trajectories.", "keywords": "offline Imitation learning;learning from observations;positive-unlabeled learning", "primary_area": "", "supplementary_material": "/attachment/5abe47978d2c7fb05c38cde018dac6ac4030929c.zip", "author": "Kai Yan;Alex Schwing;Yu-Xiong Wang", "authorids": "~Kai_Yan1;~Alex_Schwing1;~Yu-Xiong_Wang1", "gender": "M;Unspecified;", "homepage": "https://kaiyan289.github.io/;https://ece.illinois.edu/directory/profile/aschwing;https://yxw.cs.illinois.edu/", "dblp": ";79/9775;35/10700", "google_scholar": "KElKfgQAAAAJ;3B2c31wAAAAJ;T_Q-xDkAAAAJ", "orcid": ";;", "linkedin": "%E5%BC%80-%E9%A2%9C-18b7931b1/;;", "or_profile": "~Kai_Yan1;~Alex_Schwing1;~Yu-Xiong_Wang1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Department of Computer Science, University of Illinois Urbana-Champaign", "aff_domain": "cs.illinois.edu;illinois.edu;cs.illinois.edu", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nyan2023a,\ntitle={A Simple Solution for Offline Imitation from Observations and Examples with Possibly Incomplete Trajectories},\nauthor={Kai Yan and Alex Schwing and Yu-Xiong Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=805CW5w2CY}\n}", "github": "", "project": "", "reviewers": "Re2g;d8U7;iGFb;6nYj;rs8a", "pdf_size": 11602382, "rating": "4;5;5;6;6", "confidence": "4;3;4;3;3", "soundness": "2;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;4", "wc_summary": "114;67;84;144;65", "wc_strengths": "17;69;33;90;62", "wc_weaknesses": "412;85;228;142;1", "wc_questions": "297;143;6;62;1", "wc_limitations": "56;47;21;8;1", "wc_review": "896;411;372;446;130", "wc_reply_reviewers": "105;95;13;41;0", "wc_reply_authors": "851;372;63;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "3;2;3;1;1", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 94.8, 30.221846402892066 ], "wc_strengths_avg": [ 54.2, 26.056860900730157 ], "wc_weaknesses_avg": [ 173.6, 140.3090873749808 ], "wc_questions_avg": [ 101.8, 110.20235932138658 ], "wc_limitations_avg": [ 26.6, 21.50906785520935 ], "wc_review_avg": [ 451.0, 248.5687027765161 ], "wc_reply_reviewers_avg": [ 50.8, 42.418863728298994 ], "wc_reply_authors_avg": [ 257.2, 327.3514319504346 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.0, 0.8944271909999159 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7637626158259733, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16081986778001415627&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "cs.illinois.edu;illinois.edu;cs.illinois.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Fast Scalable and Accurate Discovery of DAGs Using the Best Order Score Search and Grow Shrink Trees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72675", "id": "80g3Yqlo1a", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c9cde817d04811ba28e44071bd9f76a5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=80g3Yqlo1a", "openreview": "https://openreview.net/forum?id=80g3Yqlo1a", "poster": "/media/PosterPDFs/NeurIPS%202023/72675.png?t=1702228306.9646971", "slides": "https://nips.cc/virtual/2023/poster/72675", "video": "https://nips.cc/virtual/2023/poster/72675", "author_site": "Bryan Andrews, Joseph Ramsey, Ruben Sanchez Romero, Jazmin Camchong, Erich Kummerfeld", "tldr": "", "abstract": "Learning graphical conditional independence structures is an important machine learning problem and a cornerstone of causal discovery. However, the accuracy and execution time of learning algorithms generally struggle to scale to problems with hundreds of highly connected variables---for instance, recovering brain networks from fMRI data. We introduce the best order score search (BOSS) and grow-shrink trees (GSTs) for learning directed acyclic graphs (DAGs) in this paradigm. BOSS greedily searches over permutations of variables, using GSTs to construct and score DAGs from permutations. GSTs efficiently cache scores to eliminate redundant calculations. BOSS achieves state-of-the-art performance in accuracy and execution time, comparing favorably to a variety of combinatorial and gradient-based learning algorithms under a broad range of conditions. To demonstrate its practicality, we apply BOSS to two sets of resting-state fMRI data: simulated data with pseudo-empirical noise distributions derived from randomized empirical fMRI cortical signals and clinical data from 3T fMRI scans processed into cortical parcels. BOSS is available for use within the TETRAD project which includes Python and R wrappers.", "keywords": "Causal Discovery;Directed Acyclic Graphs;DAGs;fMRI;Graphical Models;High Dimension;Densely Connected", "primary_area": "", "supplementary_material": "/attachment/8ea072ae371f9439de02f35ecce50715b5e71f51.pdf", "author": "Bryan Andrews;Joseph Ramsey;Ruben Sanchez Romero;Jazmin Camchong;Erich Kummerfeld", "authorids": "~Bryan_Andrews1;~Joseph_Ramsey1;~Ruben_Sanchez_Romero1;camch002@umn.edu;~Erich_Kummerfeld1", "gender": "M;M;M;;", "homepage": ";https://www.cmu.edu/dietrich/philosophy/people/faculty/ramsey.html;;;https://erichkummerfeld.com/", "dblp": "206/7428;;;;139/1432", "google_scholar": ";O-L-EuAAAAAJ;NWUbLH8AAAAJ;;", "orcid": ";;;;0000-0001-5342-7743", "linkedin": ";;;;", "or_profile": "~Bryan_Andrews1;~Joseph_Ramsey1;~Ruben_Sanchez_Romero1;camch002@umn.edu;~Erich_Kummerfeld1", "aff": "University of Minnesota - Twin Cities;;Rutgers University - Newark;;University of Minnesota - Twin Cities", "aff_domain": "umn.edu;;rutgers.edu;;umn.edu", "position": "Postdoc;;Postdoc;;Assistant Professor", "bibtex": "@inproceedings{\nandrews2023fast,\ntitle={Fast Scalable and Accurate Discovery of {DAG}s Using the Best Order Score Search and Grow Shrink Trees},\nauthor={Bryan Andrews and Joseph Ramsey and Ruben Sanchez Romero and Jazmin Camchong and Erich Kummerfeld},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=80g3Yqlo1a}\n}", "github": "", "project": "", "reviewers": "oAsg;P8PW;9NHG;yEGg", "pdf_size": 346682, "rating": "4;5;5;6", "confidence": "2;1;1;2", "soundness": "3;2;3;3", "novelty": "3;2;2;3", "presentation": "3;2;3;3", "wc_summary": "78;11;36;71", "wc_strengths": "152;11;26;108", "wc_weaknesses": "146;11;51;57", "wc_questions": "146;11;38;2", "wc_limitations": "143;11;3;9", "wc_review": "665;55;154;247", "wc_reply_reviewers": "29;0;233;0", "wc_reply_authors": "0;0;806;0", "reply_reviewers": "1;0;2;0", "reply_authors": "1;1;2;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 1.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 49.0, 27.101660465735304 ], "wc_strengths_avg": [ 74.25, 58.12217735081851 ], "wc_weaknesses_avg": [ 66.25, 49.32228198289288 ], "wc_questions_avg": [ 49.25, 57.408078699778834 ], "wc_limitations_avg": [ 41.5, 58.67495206644825 ], "wc_review_avg": [ 280.25, 232.27933076363036 ], "wc_reply_reviewers_avg": [ 65.5, 97.42817867537092 ], "wc_reply_authors_avg": [ 201.5, 349.00823772512877 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17517133457866750434&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "umn.edu;;rutgers.edu;;umn.edu", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Minnesota;Rutgers University", "aff_unique_dep": ";", "aff_unique_url": "https://www.minnesota.edu;https://www.rutgers.edu", "aff_unique_abbr": "UMN;RU", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Twin Cities;Newark", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "One-step differentiation of iterative algorithms", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72674", "id": "81snFfE3vR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f3716db40060004d0629d4051b2c57ab-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=81snFfE3vR", "openreview": "https://openreview.net/forum?id=81snFfE3vR", "poster": "/media/PosterPDFs/NeurIPS%202023/72674.png?t=1701865938.1900613", "slides": "https://nips.cc/virtual/2023/poster/72674", "video": "https://nips.cc/virtual/2023/poster/72674", "author_site": "Jerome Bolte, Edouard Pauwels, Samuel Vaiter", "tldr": "", "abstract": "In appropriate frameworks, automatic differentiation is transparent to the user, at the cost of being a significant computational burden when the number of operations is large. For iterative algorithms, implicit differentiation alleviates this issue but requires custom implementation of Jacobian evaluation. In this paper, we study one-step differentiation, also known as Jacobian-free backpropagation, a method as easy as automatic differentiation and as performant as implicit differentiation for fast algorithms (e.g. superlinear optimization methods). We provide a complete theoretical approximation analysis with specific examples (Newton's method, gradient descent) along with its consequences in bilevel optimization. Several numerical examples illustrate the well-foundness of the one-step estimator.", "keywords": "automatic differentiation;implicit differentiation;super-linear algorithms;bilevel optimization.", "primary_area": "", "supplementary_material": "", "author": "Jerome Bolte;Edouard Pauwels;Samuel Vaiter", "authorids": "~Jerome_Bolte1;~Edouard_Pauwels1;~Samuel_Vaiter1", "gender": "M;M;M", "homepage": "https://www.tse-fr.eu/fr/people/jerome-bolte;https://edouardpauwels.fr/;https://samuelvaiter.com", "dblp": "09/1620.html;52/9593;51/10261.html", "google_scholar": "Re1SrTkAAAAJ;E9lzDYQAAAAJ;HkXkm7IAAAAJ", "orcid": ";;0000-0002-4077-708X", "linkedin": ";;", "or_profile": "~Jerome_Bolte1;~Edouard_Pauwels1;~Samuel_Vaiter1", "aff": "Toulouse School of Economics;TSE;CNRS", "aff_domain": "tse-fr.eu;tse-fr.eu;cnrs.fr", "position": "Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nbolte2023onestep,\ntitle={One-step differentiation of iterative algorithms},\nauthor={Jerome Bolte and Edouard Pauwels and Samuel Vaiter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=81snFfE3vR}\n}", "github": "", "project": "", "reviewers": "WGTU;agCr;qFET;USU8;G3kk", "pdf_size": 810551, "rating": "5;6;7;7;8", "confidence": "4;4;5;3;5", "soundness": "3;3;3;3;4", "novelty": "2;3;2;3;3", "presentation": "4;4;3;3;3", "wc_summary": "62;151;117;108;83", "wc_strengths": "50;92;100;178;79", "wc_weaknesses": "139;83;382;43;219", "wc_questions": "3;140;96;27;32", "wc_limitations": "4;2;1;24;32", "wc_review": "258;468;696;380;445", "wc_reply_reviewers": "10;0;19;27;12", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 104.2, 30.32754523531372 ], "wc_strengths_avg": [ 99.8, 42.63519672758647 ], "wc_weaknesses_avg": [ 173.2, 119.96066021825654 ], "wc_questions_avg": [ 59.6, 50.63042563518501 ], "wc_limitations_avg": [ 12.6, 12.86234815265082 ], "wc_review_avg": [ 449.4, 143.26702342130238 ], "wc_reply_reviewers_avg": [ 13.6, 9.046546302318914 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3668996928526715, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6300939073931889653&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "tse-fr.eu;tse-fr.eu;cnrs.fr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Toulouse School of Economics;TSE;Centre National de la Recherche Scientifique", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tse-fr.eu;;https://www.cnrs.fr", "aff_unique_abbr": "TSE;;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France;" }, { "title": "Expanding Small-Scale Datasets with Guided Imagination", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72673", "id": "82HeVCqsfh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f188a55392d3a7509b0b27f8d24364bb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=82HeVCqsfh", "openreview": "https://openreview.net/forum?id=82HeVCqsfh", "poster": "/media/PosterPDFs/NeurIPS%202023/72673.png?t=1696839859.5101907", "slides": "https://nips.cc/virtual/2023/poster/72673", "video": "https://nips.cc/virtual/2023/poster/72673", "author_site": "Yifan Zhang, Daquan Zhou, Bryan Hooi, Kai Wang, Jiashi Feng", "tldr": "", "abstract": "The power of DNNs relies heavily on the quantity and quality of training data. However, collecting and annotating data on a large scale is often expensive and time-consuming. To address this issue, we explore a new task, termed dataset expansion, aimed at expanding a ready-to-use small dataset by automatically creating new labeled samples. To this end, we present a Guided Imagination Framework (GIF) that leverages cutting-edge generative models like DALL-E2 and Stable Diffusion (SD) to \"imagine\" and create informative new data from the input seed data. Specifically, GIF conducts data imagination by optimizing the latent features of the seed data in the semantically meaningful space of the prior model, resulting in the creation of photo-realistic images with new content. To guide the imagination towards creating informative samples for model training, we introduce two key criteria, i.e., class-maintained information boosting and sample diversity promotion. These criteria are verified to be essential for effective dataset expansion: GIF-SD obtains 13.5% higher model accuracy on natural image datasets than unguided expansion with SD. With these essential criteria, GIF successfully expands small datasets in various scenarios, boosting model accuracy by 36.9% on average over six natural image datasets and by 13.5% on average over three medical datasets. The source code is available at https://github.com/Vanint/DatasetExpansion.", "keywords": "Dataset Expansion;Guided Imagination", "primary_area": "", "supplementary_material": "/attachment/767305ce70c84c41fe0715853230631e3af32dcf.pdf", "author": "Yifan Zhang;Daquan Zhou;Bryan Hooi;Kai Wang;Jiashi Feng", "authorids": "~Yifan_Zhang1;~Daquan_Zhou1;~Bryan_Hooi1;~Kai_Wang8;~Jiashi_Feng1", "gender": "M;;M;M;M", "homepage": "https://sites.google.com/view/yifan-zhang/%E9%A6%96%E9%A1%B5;http://bhooi.github.io;https://kaiwang960112.github.io/;;https://sites.google.com/site/jshfeng/", "dblp": "57/4707-4;169/9975;78/2022-36;244/9623;56/8278", "google_scholar": "https://scholar.google.com.hk/citations?user=zuYIUJEAAAAJ;;i2II0XIAAAAJ;DdCAbWwAAAAJ;https://scholar.google.com.sg/citations?user=Q8iay0gAAAAJ", "orcid": ";0000-0002-5645-1754;0000-0002-1154-5175;;0000-0001-6843-0064", "linkedin": ";;;;", "or_profile": "~Yifan_Zhang1;~Bryan_Hooi1;~Kai_Wang8;~Zhou_Daquan1;~Jiashi_Feng2", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;Bytedance;ByteDance", "aff_domain": "nus.edu;nus.edu.sg;u.nus.edu;bytedance.com;bytedance.com", "position": "PhD student;Assistant Professor;PhD student;Researcher;Research Lead", "bibtex": "@inproceedings{\nzhang2023expanding,\ntitle={Expanding Small-Scale Datasets with Guided Imagination},\nauthor={Yifan Zhang and Daquan Zhou and Bryan Hooi and Kai Wang and Jiashi Feng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=82HeVCqsfh}\n}", "github": "", "project": "", "reviewers": "UbXT;J4xT;7xaH;1CUd;6GPi", "pdf_size": 12948908, "rating": "4;5;5;6;7", "confidence": "2;5;3;4;4", "soundness": "3;3;2;3;3", "novelty": "2;3;2;3;3", "presentation": "2;2;3;3;3", "wc_summary": "52;80;28;128;85", "wc_strengths": "29;88;40;78;192", "wc_weaknesses": "37;202;180;85;504", "wc_questions": "4;276;50;205;147", "wc_limitations": "4;183;1;11;60", "wc_review": "126;829;299;507;988", "wc_reply_reviewers": "0;305;175;22;20", "wc_reply_authors": "0;1910;793;59;65", "reply_reviewers": "0;2;2;1;1", "reply_authors": "1;5;3;2;2", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 74.6, 33.67847977566684 ], "wc_strengths_avg": [ 85.4, 57.73595067200331 ], "wc_weaknesses_avg": [ 201.6, 162.8503607610373 ], "wc_questions_avg": [ 136.4, 99.29874118033923 ], "wc_limitations_avg": [ 51.8, 69.00840528515349 ], "wc_review_avg": [ 549.8, 320.7213120452085 ], "wc_reply_reviewers_avg": [ 104.4, 118.35303122438395 ], "wc_reply_authors_avg": [ 565.4, 732.9760159786949 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.6, 1.3564659966250536 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5384615384615384, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13063253452486718296&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "nus.edu;nus.edu.sg;u.nus.edu;bytedance.com;bytedance.com", "author_num": 5, "aff_unique_index": "0;0;0;1;1", "aff_unique_norm": "National University of Singapore;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.bytedance.com", "aff_unique_abbr": "NUS;Bytedance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "Singapore;China" }, { "title": "Convolutions Die Hard: Open-Vocabulary Segmentation with Single Frozen Convolutional CLIP", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72672", "id": "83LJRUzXWj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/661caac7729aa7d8c6b8ac0d39ccbc6a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=83LJRUzXWj", "openreview": "https://openreview.net/forum?id=83LJRUzXWj", "poster": "/media/PosterPDFs/NeurIPS%202023/72672.png?t=1701985935.3563533", "slides": "https://nips.cc/virtual/2023/poster/72672", "video": "https://nips.cc/virtual/2023/poster/72672", "author_site": "Qihang Yu, Ju He, Xueqing Deng, Xiaohui Shen, Liang-Chieh Chen", "tldr": "", "abstract": "Open-vocabulary segmentation is a challenging task requiring segmenting and recognizing objects from an open set of categories in diverse environments. One way to address this challenge is to leverage multi-modal models, such as CLIP, to provide image and text features in a shared embedding space, which effectively bridges the gap between closed-vocabulary and open-vocabulary recognition.\nHence, existing methods often adopt a two-stage framework to tackle the problem, where the inputs first go through a mask generator and then through the CLIP model along with the predicted masks. This process involves extracting features from raw images multiple times, which can be ineffective and inefficient. By contrast, we propose to build everything into a single-stage framework using a _shared **F**rozen **C**onvolutional **CLIP** backbone_, which not only significantly simplifies the current two-stage pipeline, but also remarkably yields a better accuracy-cost trade-off. The resulting single-stage system, called FC-CLIP, benefits from the following observations: the _frozen_ CLIP backbone maintains the ability of open-vocabulary classification and can also serve as a strong mask generator, and the _convolutional_ CLIP generalizes well to a larger input resolution than the one used during contrastive image-text pretraining. Surprisingly, FC-CLIP advances state-of-the-art results on various benchmarks, while running practically fast. Specifically, when training on COCO panoptic data only and testing in a zero-shot manner, FC-CLIP achieve 26.8 PQ, 16.8 AP, and 34.1 mIoU on ADE20K, 18.2 PQ, 27.9 mIoU on Mapillary Vistas, 44.0 PQ, 26.8 AP, 56.2 mIoU on Cityscapes, outperforming the prior art under the same setting by +4.2 PQ, +2.4 AP, +4.2 mIoU on ADE20K, +4.0 PQ on Mapillary Vistas and +20.1 PQ on Cityscapes, respectively. Additionally, the training and testing time of FC-CLIP is 7.5x and 6.6x significantly faster than the same prior art, while using 5.9x fewer total model parameters. Meanwhile, FC-CLIP also sets a new state-of-the-art performance across various open-vocabulary semantic segmentation datasets. Code and models are available at https://github.com/bytedance/fc-clip", "keywords": "open-vocabulary panoptic segmentation;panoptic segmentation;vision and language", "primary_area": "", "supplementary_material": "", "author": "Qihang Yu;Ju He;Xueqing Deng;Xiaohui Shen;Liang-Chieh Chen", "authorids": "~Qihang_Yu1;~Ju_He1;~Xueqing_Deng2;~Xiaohui_Shen2;~Liang-Chieh_Chen1", "gender": ";M;;;F", "homepage": ";https://tacju.github.io/;https://xiaohuishen.github.io/;http://liangchiehchen.com/;https://sites.google.com/view/xueqingdeng7/home", "dblp": ";;88/6582;138/2443;209/9919", "google_scholar": "7zZdZxsAAAAJ;NyTPm_zUV_kC;pViZYwIAAAAJ;ACjYGPUAAAAJ;UGhyv2UAAAAJ", "orcid": ";;;;", "linkedin": ";ju-he-43b884190/;;;", "or_profile": "~Qihang_Yu1;~Ju_He1;~Xiaohui_Shen2;~Liang-Chieh_Chen1;~Xueqing_Deng1", "aff": "Johns Hopkins University;TikTok;ByteDance Inc.;Google;ByteDance Research", "aff_domain": "jhu.edu;tiktok.com;bytedance.com;google.com;bytedance.com", "position": "PhD student;Intern;Researcher;Research Scientist;Researcher", "bibtex": "@inproceedings{\nyu2023convolutions,\ntitle={Convolutions Die Hard: Open-Vocabulary Segmentation with Single Frozen Convolutional {CLIP}},\nauthor={Qihang Yu and Ju He and Xueqing Deng and Xiaohui Shen and Liang-Chieh Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=83LJRUzXWj}\n}", "github": "", "project": "", "reviewers": "YCke;5KPy;VW2t;GVRs;N4iz", "pdf_size": 2703547, "rating": "3;5;6;6;7", "confidence": "5;4;4;4;5", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;4;4;4", "wc_summary": "92;19;44;89;123", "wc_strengths": "64;23;53;99;67", "wc_weaknesses": "314;39;364;150;237", "wc_questions": "21;2;80;21;68", "wc_limitations": "19;2;38;112;1", "wc_review": "510;85;579;471;496", "wc_reply_reviewers": "0;0;0;0;73", "wc_reply_authors": "0;0;0;0;22", "reply_reviewers": "0;0;0;0;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 73.4, 37.07613787869497 ], "wc_strengths_avg": [ 61.2, 24.481829996958968 ], "wc_weaknesses_avg": [ 220.8, 116.20567972349717 ], "wc_questions_avg": [ 38.4, 30.123744787127645 ], "wc_limitations_avg": [ 34.4, 41.07846150965248 ], "wc_review_avg": [ 428.2, 175.29791784273993 ], "wc_reply_reviewers_avg": [ 14.6, 29.2 ], "wc_reply_authors_avg": [ 4.4, 8.8 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.24077170617153837, "gs_citation": 145, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2541522311728529779&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "jhu.edu;tiktok.com;bytedance.com;google.com;bytedance.com", "author_num": 5, "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "Johns Hopkins University;TikTok;ByteDance;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.jhu.edu;https://www.tiktok.com;https://www.bytedance.com;https://www.google.com", "aff_unique_abbr": "JHU;TikTok;ByteDance;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "United States;China" }, { "title": "ProBio: A Protocol-guided Multimodal Dataset for Molecular Biology Lab", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73683", "id": "846X3N11bf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/81c7202dbd3cd3006b35a58a076195c0-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=846X3N11bf", "openreview": "https://openreview.net/forum?id=846X3N11bf", "poster": "/media/PosterPDFs/NeurIPS%202023/73683.png?t=1699600115.868344", "slides": "https://nips.cc/virtual/2023/poster/73683", "video": "https://nips.cc/virtual/2023/poster/73683", "author_site": "Jieming Cui, Ziren Gong, Baoxiong Jia, Siyuan Huang, Zilong Zheng, Jianzhu Ma, Yixin Zhu", "tldr": "", "abstract": "The challenge of replicating research results has posed a significant impediment to the field of molecular biology. The advent of modern intelligent systems has led to notable progress in various domains. Consequently, we embarked on an investigation of intelligent monitoring systems as a means of tackling the issue of the reproducibility crisis. Specifically, we first curate a comprehensive multimodal dataset, named ProBio, as an initial step towards this objective. This dataset comprises fine-grained hierarchical annotations intended for the purpose of studying activity understanding in BioLab. Next, we devise two challenging benchmarks, transparent solution tracking and multimodal action recognition, to emphasize the unique characteristics and difficulties associated with activity understanding in BioLab settings. Finally, we provide a thorough experimental evaluation of contemporary video understanding models and highlight their limitations in this specialized domain to identify potential avenues for future research. We hope ProBio with associated benchmarks may garner increased focus on modern AI techniques in the realm of molecular biology.", "keywords": "Protocol-guided; Reproducibility crisis; Ambiguous action recognition; Transparent solution tracking; Monitoring System", "primary_area": "", "supplementary_material": "/attachment/364f3e069cfb8d5960d19bf24faadd4b620eb50d.zip", "author": "Jieming Cui;Ziren Gong;Baoxiong Jia;Siyuan Huang;Zilong Zheng;Jianzhu Ma;Yixin Zhu", "authorids": "~Jieming_Cui1;~Ziren_Gong1;~Baoxiong_Jia1;~Siyuan_Huang2;~Zilong_Zheng1;~Jianzhu_Ma2;~Yixin_Zhu1", "gender": "F;M;M;M;M;M;M", "homepage": "https://jiemingcui.github.io/;https://zorangong.github.io/;https://buzz-beater.github.io/;https://siyuanhuang.com/;http://zilongzheng.github.io;https://majianzhu.com/;https://yzhu.io/", "dblp": "336/7638;;206/8738;62/885-1;218/5234;24/9080.html;91/1103-1.html", "google_scholar": ";rF-i-GAAAAAJ;qIBUK6sAAAAJ;1NN7Ee8AAAAJ;9sDx70IAAAAJ;;qG9l6JEAAAAJ", "orcid": "0000-0001-5189-7266;0000-0003-0093-835X;0000-0002-4968-3290;;;;0000-0001-7024-1545", "linkedin": ";;baoxiong-jia-2b6094122?trk=public_post-text;;;;", "or_profile": "~Jieming_Cui1;~Ziren_Gong1;~Baoxiong_Jia1;~Siyuan_Huang2;~Zilong_Zheng1;~Jianzhu_Ma2;~Yixin_Zhu1", "aff": "Beijing Institute for General Artificial Intelligence;Beihang University;University of California, Los Angeles;Beijing Institute for General Artificial Intelligence;Beijing Institute for General Artificial Intelligence;Tsinghua University;Peking University", "aff_domain": "bigai.ai;buaa.edu.cn;ucla.edu;bigai.ai;bigai.ai;tsinghua.edu.cn;pku.edu.cn", "position": "Intern;MS student;PhD student;Researcher;Researcher;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ncui2023probio,\ntitle={ProBio: A Protocol-guided Multimodal Dataset for Molecular Biology Lab},\nauthor={Jieming Cui and Ziren Gong and Baoxiong Jia and Siyuan Huang and Zilong Zheng and Jianzhu Ma and Yixin Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=846X3N11bf}\n}", "github": "", "project": "", "reviewers": "bbUW;7iDD;CFqS;P3VE;7ke8", "pdf_size": 33575916, "rating": "6;6;7;8;9", "confidence": "4;3;2;3;4", "wc_summary_and_contributions": "59;372;97;40;57", "wc_strengths": "157;43;94;48;19", "wc_improvement": "25;61;60;41;22", "wc_limitations": "6;28;122;44;22", "wc_correctness": "174;5;6;119;6", "wc_clarity": "13;4;17;1;5", "wc_relation_to_prior_work": "7;5;41;1;5", "wc_documentation": "166;28;6;67;5", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "608;547;444;362;142", "wc_reply_reviewers": "0;0;13;10;0", "wc_reply_authors": "553;544;1616;818;220", "reply_reviewers": "0;0;1;1;0", "reply_authors": "1;1;3;2;1", "rating_avg": [ 7.2, 1.16619037896906 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 125.0, 124.89835867616516 ], "wc_strengths_avg": [ 72.2, 48.85652464103438 ], "wc_improvement_avg": [ 41.8, 16.581917862539303 ], "wc_limitations_avg": [ 44.4, 40.66251344912166 ], "wc_correctness_avg": [ 62.0, 71.15335550766387 ], "wc_clarity_avg": [ 8.0, 6.0 ], "wc_relation_to_prior_work_avg": [ 11.8, 14.729562111617575 ], "wc_documentation_avg": [ 54.4, 60.15513278183333 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 420.6, 162.90807223707486 ], "wc_reply_reviewers_avg": [ 4.6, 5.71314274283428 ], "wc_reply_authors_avg": [ 750.2, 472.593863692706 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.1833396994056422, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=660441536390686801&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": "bigai.ai;buaa.edu.cn;ucla.edu;bigai.ai;bigai.ai;tsinghua.edu.cn;pku.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;0;0;3;4", "aff_unique_norm": "Beijing Institute for General Artificial Intelligence;Beihang University;University of California, Los Angeles;Tsinghua University;Peking University", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.bigaiai.org/;http://www.buaa.edu.cn/;https://www.ucla.edu;https://www.tsinghua.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "BIGAI;BUAA;UCLA;THU;Peking U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Nearest Neighbour with Bandit Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72671", "id": "86ADcKOHAw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4078c8b648dc107aedbdf561dd4edc2a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=86ADcKOHAw", "openreview": "https://openreview.net/forum?id=86ADcKOHAw", "poster": "/media/PosterPDFs/NeurIPS%202023/72671.png?t=1702379734.5821946", "slides": "https://nips.cc/virtual/2023/poster/72671", "video": "https://nips.cc/virtual/2023/poster/72671", "author_site": "Stephen Pasteris, Chris Hicks, Vasilios Mavroudis", "tldr": "", "abstract": "In this paper we adapt the nearest neighbour rule to the contextual bandit problem. Our algorithm handles the fully adversarial setting in which no assumptions at all are made about the data-generation process. When combined with a sufficiently fast data-structure for (perhaps approximate) adaptive nearest neighbour search, such as a navigating net, our algorithm is extremely efficient - having a per trial running time polylogarithmic in both the number of trials and actions, and taking only quasi-linear space. We give generic regret bounds for our algorithm and further analyse them when applied to the stochastic bandit problem in euclidean space. A side result of this paper is that, when applied to the online classification problem with stochastic labels, our algorithm can, under certain conditions, have sublinear regret whilst only finding a single nearest neighbour per trial - in stark contrast to the k-nearest neighbours algorithm.", "keywords": "Nearest Neighbours;Contextual Bandits", "primary_area": "", "supplementary_material": "/attachment/ab3dff58e8a989c0207cb12c131161abce37dd23.pdf", "author": "Stephen Pasteris;Chris Hicks;Vasilios Mavroudis", "authorids": "~Stephen_Pasteris1;~Chris_Hicks1;~Vasilios_Mavroudis1", "gender": "M;Non-Binary;", "homepage": ";https://chrishicks.io;", "dblp": "126/1728;220/3716;", "google_scholar": ";IMw5HFkAAAAJ;", "orcid": ";;", "linkedin": ";christopher-r-hicks/;", "or_profile": "~Stephen_Pasteris1;~Chris_Hicks1;~Vasilios_Mavroudis1", "aff": "Alan Turing Institute;Alan Turing Institute;", "aff_domain": "turing.ac.uk;turing.ac.uk;", "position": "Senior Reasearch Associate;Principal Researcher;", "bibtex": "@inproceedings{\npasteris2023nearest,\ntitle={Nearest Neighbour with Bandit Feedback},\nauthor={Stephen Pasteris and Chris Hicks and Vasilios Mavroudis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=86ADcKOHAw}\n}", "github": "", "project": "", "reviewers": "nfmq;UgjS;j5Qr;WneD;PH3Q", "pdf_size": 403843, "rating": "6;6;6;7;7", "confidence": "2;2;2;2;2", "soundness": "3;3;3;4;3", "novelty": "3;2;3;4;3", "presentation": "3;2;3;3;2", "wc_summary": "94;37;61;289;65", "wc_strengths": "35;36;43;36;33", "wc_weaknesses": "32;97;15;45;54", "wc_questions": "46;68;73;1;19", "wc_limitations": "1;1;12;1;15", "wc_review": "208;239;204;372;186", "wc_reply_reviewers": "11;223;19;5;0", "wc_reply_authors": "0;236;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 2.0, 0.0 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 109.2, 91.70474360686038 ], "wc_strengths_avg": [ 36.6, 3.3823069050575527 ], "wc_weaknesses_avg": [ 48.6, 27.528893911670334 ], "wc_questions_avg": [ 41.4, 27.789206537790893 ], "wc_limitations_avg": [ 6.0, 6.196773353931867 ], "wc_review_avg": [ 241.8, 67.29754824657434 ], "wc_reply_reviewers_avg": [ 51.6, 85.93392810758739 ], "wc_reply_authors_avg": [ 47.2, 94.39999999999998 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4693434001348981806&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "turing.ac.uk;turing.ac.uk;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Alan Turing Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.turing.ac.uk", "aff_unique_abbr": "ATI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Geometry-Informed Neural Operator for Large-Scale 3D PDEs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72670", "id": "86dXbqT5Ua", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/70518ea42831f02afc3a2828993935ad-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=86dXbqT5Ua", "openreview": "https://openreview.net/forum?id=86dXbqT5Ua", "poster": "/media/PosterPDFs/NeurIPS%202023/72670.png?t=1702062873.3597097", "slides": "https://nips.cc/virtual/2023/poster/72670", "video": "https://nips.cc/virtual/2023/poster/72670", "author_site": "Zongyi Li, Nikola Kovachki, Chris Choy, Boyi Li, Jean Kossaifi, Shourya Otta, Mohammad Amin Nabian, Maximilian Stadler, Christian Hundt, Kamyar Azizzadenesheli, Animashree Anandkumar", "tldr": "", "abstract": "We propose the geometry-informed neural operator (GINO), a highly efficient approach for learning the solution operator of large-scale partial differential equations with varying geometries. GINO uses a signed distance function (SDF) representation of the input shape and neural operators based on graph and Fourier architectures to learn the solution operator. The graph neural operator handles irregular grids and transforms them into and from regular latent grids on which Fourier neural operator can be efficiently applied. We provide an efficient implementation of GINO using an optimized hashing approach, which allows efficient learning in a shared, compressed latent space with reduced computation and memory costs. GINO is discretization-invariant, meaning the trained model can be applied to arbitrary discretizations of the continuous domain and applies to any shape or resolution. To empirically validate the performance of our method on large-scale simulation, we generate the industry-standard aerodynamics dataset of 3D vehicle geometries with Reynolds numbers as high as five million. For this large-scale 3D fluid simulation, numerical methods are expensive to compute surface pressure. We successfully trained GINO to predict the pressure on car surfaces using only five hundred data points. The cost-accuracy experiments show a 26,000x speed-up compared to optimized GPU-based computational fluid dynamics (CFD) simulators on computing the drag coefficient. When tested on new combinations of geometries and boundary conditions (inlet velocities), GINO obtains a one-fourth reduction in error rate compared to deep neural network approaches.", "keywords": "partial differential equation;computational fluid dynamics;neural operator", "primary_area": "", "supplementary_material": "/attachment/0c1c9f201b3bc661778804fe85d7c0fcfc2fc7bd.zip", "author": "Zongyi Li;Nikola Borislavov Kovachki;Chris Choy;Boyi Li;Jean Kossaifi;Shourya Prakash Otta;Mohammad Amin Nabian;Maximilian Stadler;Christian Hundt;Kamyar Azizzadenesheli;Anima Anandkumar", "authorids": "~Zongyi_Li1;~Nikola_Borislavov_Kovachki1;~Chris_Choy1;~Boyi_Li1;~Jean_Kossaifi1;~Shourya_Prakash_Otta1;~Mohammad_Amin_Nabian1;~Maximilian_Stadler1;~Christian_Hundt1;~Kamyar_Azizzadenesheli1;~Anima_Anandkumar1", "gender": "M;M;F;M;M;M;M;M;M;F;M", "homepage": "https://zongyi-li.github.io;http://www.its.caltech.edu/~nkovachk/;https://sites.google.com/site/boyilics/home;http://jeankossaifi.com/;https://www.linkedin.com/in/shourya-otta/;https://mnabian.github.io/;;;https://kamyar.page/;http://tensorlab.cms.caltech.edu/users/anima/;https://chrischoy.github.io/", "dblp": ";;;155/6766;;;;;176/5584;;169/4837", "google_scholar": ";;;https://scholar.google.co.uk/citations?user=hJS2TXwAAAAJ;;NVXDhJAAAAAJ;;https://scholar.google.de/citations?user=jcoDb5gAAAAJ;CxAS4SQAAAAJ;bEcLezcAAAAJ;2u8G5ksAAAAJ", "orcid": ";;;;;;;;;;", "linkedin": ";;;;;https://linkedin.com/in/mnabian;maximilian-j-stadler/;christian-hundt-20590712a/;;anima-anandkumar-35171b1/;", "or_profile": "~Zongyi_Li1;~Nikola_Borislavov_Kovachki1;~Boyi_Li1;~Jean_Kossaifi1;~Shourya_Prakash_Otta1;~Mohammad_Amin_Nabian1;~Maximilian_Stadler1;~Christian_Hundt1;~Kamyar_Azizzadenesheli1;~anima_anandkumar1;~Christopher_Choy3", "aff": "California Institute of Technology;NVIDIA;University of California, Berkeley;NVIDIA AI;;;NVIDIA;NVIDIA;NVIDIA;California Institute of Technology;NVIDIA", "aff_domain": "caltech.edu;nvidia.com;berkeley.edu;nvidia.com;;;nvidia.com;nvidia.com;nvidia.com;caltech.edu;nvidia.com", "position": "PhD student;Researcher;Postdoc;Researcher;;;Researcher;Researcher;Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\nli2023geometryinformed,\ntitle={Geometry-Informed Neural Operator for Large-Scale 3D {PDE}s},\nauthor={Zongyi Li and Nikola Borislavov Kovachki and Chris Choy and Boyi Li and Jean Kossaifi and Shourya Prakash Otta and Mohammad Amin Nabian and Maximilian Stadler and Christian Hundt and Kamyar Azizzadenesheli and Anima Anandkumar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=86dXbqT5Ua}\n}", "github": "", "project": "", "reviewers": "RPer;zgWV;5VBN;rW9d;zqvK", "pdf_size": 14602864, "rating": "4;5;5;5;5", "confidence": "3;4;5;4;3", "soundness": "2;3;3;2;3", "novelty": "2;2;2;3;2", "presentation": "2;3;3;2;2", "wc_summary": "178;40;151;139;23", "wc_strengths": "71;77;14;57;41", "wc_weaknesses": "351;167;247;129;35", "wc_questions": "7;2;6;84;33", "wc_limitations": "24;2;138;62;35", "wc_review": "631;288;556;471;167", "wc_reply_reviewers": "71;7;32;0;0", "wc_reply_authors": "1329;0;0;0;0", "reply_reviewers": "1;1;1;0;0", "reply_authors": "3;1;1;1;1", "rating_avg": [ 4.8, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 106.2, 62.518477268724325 ], "wc_strengths_avg": [ 52.0, 22.698017534577772 ], "wc_weaknesses_avg": [ 185.8, 107.1044350155492 ], "wc_questions_avg": [ 26.4, 30.819474362811576 ], "wc_limitations_avg": [ 52.2, 47.05061104810436 ], "wc_review_avg": [ 422.6, 171.47431294511725 ], "wc_reply_reviewers_avg": [ 22.0, 27.18087562975115 ], "wc_reply_authors_avg": [ 265.8, 531.5999999999999 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.5345224838248488, "gs_citation": 106, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13784811337132425812&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "caltech.edu;nvidia.com;berkeley.edu;nvidia.com;;;nvidia.com;nvidia.com;nvidia.com;caltech.edu;nvidia.com", "author_num": 11, "aff_unique_index": "0;1;2;1;1;1;1;0;1", "aff_unique_norm": "California Institute of Technology;NVIDIA;University of California, Berkeley", "aff_unique_dep": ";NVIDIA Corporation;", "aff_unique_url": "https://www.caltech.edu;https://www.nvidia.com;https://www.berkeley.edu", "aff_unique_abbr": "Caltech;NVIDIA;UC Berkeley", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Pasadena;;Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Embracing the chaos: analysis and diagnosis of numerical instability in variational flows", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72669", "id": "87Nu9SagB7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/66738d21d3cddb8717ca52deff5a5546-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=87Nu9SagB7", "openreview": "https://openreview.net/forum?id=87Nu9SagB7", "poster": "/media/PosterPDFs/NeurIPS%202023/72669.png?t=1699571792.8511608", "slides": "https://nips.cc/virtual/2023/poster/72669", "video": "https://nips.cc/virtual/2023/poster/72669", "author_site": "Zuheng Xu, Trevor Campbell", "tldr": "", "abstract": "In this paper, we investigate the impact of numerical instability on the reliability of sampling, density evaluation, and evidence lower bound (ELBO) estimation in variational flows. We first empirically demonstrate that common flows can exhibit a catastrophic accumulation of error: the numerical flow map deviates significantly from the exact map---which affects sampling---and the numerical inverse flow map does not accurately recover the initial input---which affects density and ELBO computations. Surprisingly though, we find that results produced by flows are often accurate enough for applications despite the presence of serious numerical instability. In this work, we treat variational flows as chaotic dynamical systems, and leverage shadowing theory to elucidate this behavior via theoretical guarantees on the error of sampling, density evaluation, and ELBO estimation. Finally, we develop and empirically test a diagnostic procedure that can be used to validate results produced by numerically unstable flows in practice.", "keywords": "variational flow;numerical instability;shadowing property", "primary_area": "", "supplementary_material": "/attachment/fb12d607a572f778cba56fd7057d7200585e21fd.zip", "author": "Zuheng Xu;Trevor Campbell", "authorids": "~Zuheng_Xu1;~Trevor_Campbell1", "gender": "M;M", "homepage": "https://zuhengxu.github.io/;https://trevorcampbell.me", "dblp": "278/8104;130/3822", "google_scholar": "lkMkblkAAAAJ;", "orcid": ";", "linkedin": "zuheng-david-xu-29825624b/;", "or_profile": "~Zuheng_Xu1;~Trevor_Campbell1", "aff": "University of British Columbia;University of British Columbia", "aff_domain": "ubc.ca;ubc.ca", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nxu2023embracing,\ntitle={Embracing the chaos: analysis and diagnosis of numerical instability in variational flows},\nauthor={Zuheng Xu and Trevor Campbell},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=87Nu9SagB7}\n}", "github": "", "project": "", "reviewers": "e3zg;sFEg;d5rn;MECE;c44C", "pdf_size": 5510034, "rating": "5;5;6;6;7", "confidence": "3;3;3;3;3", "soundness": "2;2;3;2;3", "novelty": "2;2;3;3;3", "presentation": "3;3;1;3;3", "wc_summary": "62;81;79;123;172", "wc_strengths": "110;203;38;89;47", "wc_weaknesses": "108;144;15;90;352", "wc_questions": "115;48;49;30;78", "wc_limitations": "24;16;37;5;63", "wc_review": "419;492;218;337;712", "wc_reply_reviewers": "66;21;0;43;251", "wc_reply_authors": "13;60;0;22;546", "reply_reviewers": "1;1;0;1;2", "reply_authors": "2;3;1;2;3", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 103.4, 39.75223264170202 ], "wc_strengths_avg": [ 97.4, 59.07486775270851 ], "wc_weaknesses_avg": [ 141.8, 113.21907966416262 ], "wc_questions_avg": [ 64.0, 29.779187362988935 ], "wc_limitations_avg": [ 29.0, 19.949937343260004 ], "wc_review_avg": [ 435.6, 165.4842590701605 ], "wc_reply_reviewers_avg": [ 76.2, 90.12746529221823 ], "wc_reply_authors_avg": [ 128.2, 209.85366329897602 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4071313948479156278&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "ubc.ca;ubc.ca", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Out-of-distribution Detection Learning with Unreliable Out-of-distribution Sources", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72668", "id": "87Qnneer8l", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e43f900f571de6c96a70d5724a0fb565-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=87Qnneer8l", "openreview": "https://openreview.net/forum?id=87Qnneer8l", "poster": "/media/PosterPDFs/NeurIPS%202023/72668.png?t=1701863635.6461694", "slides": "https://nips.cc/virtual/2023/poster/72668", "video": "https://nips.cc/virtual/2023/poster/72668", "author_site": "Haotian Zheng, Qizhou Wang, Zhen Fang, Xiaobo Xia, Feng Liu, Tongliang Liu, Bo Han", "tldr": "", "abstract": "Out-of-distribution (OOD) detection discerns OOD data where the predictor cannot make valid predictions as in-distribution (ID) data, thereby increasing the reliability of open-world classification. However, it is typically hard to collect real out-of-distribution (OOD) data for training a predictor capable of discerning ID and OOD patterns. This obstacle gives rise to *data generation-based learning methods*, synthesizing OOD data via data generators for predictor training without requiring any real OOD data. \nRelated methods typically pre-train a generator on ID data and adopt various selection procedures to find those data likely to be the OOD cases. However, generated data may still coincide with ID semantics, i.e., mistaken OOD generation remains, confusing the predictor between ID and OOD data. To this end, we suggest that generated data (with mistaken OOD generation) can be used to devise an *auxiliary OOD detection task* to facilitate real OOD detection. Specifically, we can ensure that learning from such an auxiliary task is beneficial if the ID and the OOD parts have disjoint supports, with the help of a well-designed training procedure for the predictor. Accordingly, we propose a powerful data generation-based learning method named *Auxiliary Task-based OOD Learning* (ATOL) that can relieve the mistaken OOD generation. We conduct extensive experiments under various OOD detection setups, demonstrating the effectiveness of our method against its advanced counterparts.", "keywords": "out-of-distribution detection", "primary_area": "", "supplementary_material": "/attachment/396c43173acd57fda6c96705936b8b8078dd025b.pdf", "author": "Haotian Zheng;Qizhou Wang;Zhen Fang;Xiaobo Xia;Feng Liu;Tongliang Liu;Bo Han", "authorids": "~Haotian_Zheng1;~Qizhou_Wang1;~Zhen_Fang2;~Xiaobo_Xia1;~Feng_Liu2;~Tongliang_Liu1;~Bo_Han1", "gender": ";;M;M;M;M;", "homepage": ";;https://fang-zhen.github.io/index.html;https://xiaoboxia.github.io/;https://fengliu90.github.io/index.html;https://tongliang-liu.github.io/;", "dblp": ";;;242/8072;77/1318-3;150/6667;", "google_scholar": "GtY-6i0AAAAJ;;OzD6WJcAAAAJ;jRsugY0AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;", "orcid": ";;0000-0003-0602-6255;;0000-0002-5005-9129;;", "linkedin": ";;;;alexfengliu;;", "or_profile": "~Haotian_Zheng1;~Qizhou_Wang1;~Zhen_Fang2;~Xiaobo_Xia1;~Feng_Liu2;~Tongliang_Liu1;~Bo_Han1", "aff": "Xidian University;;University of Technology Sydney;The University of Sydney;University of Melbourne;University of Sydney;", "aff_domain": "xidian.edu;;uts.edu.au;sydney.edu.au;unimelb.edu.au;sydney.edu.au;", "position": "Undergrad student;;Postdoc;PhD student;Assistant Professor;Lecturer;", "bibtex": "@inproceedings{\nzheng2023outofdistribution,\ntitle={Out-of-distribution Detection Learning with Unreliable Out-of-distribution Sources},\nauthor={Haotian Zheng and Qizhou Wang and Zhen Fang and Xiaobo Xia and Feng Liu and Tongliang Liu and Bo Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=87Qnneer8l}\n}", "github": "", "project": "", "reviewers": "iqYB;3oBY;Q6fc;eqWq", "pdf_size": 445940, "rating": "5;5;6;7", "confidence": "3;4;5;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;2;3;4", "wc_summary": "95;44;104;122", "wc_strengths": "39;33;89;66", "wc_weaknesses": "49;122;172;49", "wc_questions": "164;18;45;90", "wc_limitations": "11;1;1;17", "wc_review": "358;218;411;344", "wc_reply_reviewers": "138;0;24;9", "wc_reply_authors": "550;58;38;32", "reply_reviewers": "2;0;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 91.25, 28.960101864461734 ], "wc_strengths_avg": [ 56.75, 22.38721733489895 ], "wc_weaknesses_avg": [ 98.0, 52.091266062556016 ], "wc_questions_avg": [ 79.25, 55.27827330877838 ], "wc_limitations_avg": [ 7.5, 6.837397165588672 ], "wc_review_avg": [ 332.75, 70.80739721243819 ], "wc_reply_reviewers_avg": [ 42.75, 55.656872890955704 ], "wc_reply_authors_avg": [ 169.5, 219.8925874148558 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16383984742771962511&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "xidian.edu;;uts.edu.au;sydney.edu.au;unimelb.edu.au;sydney.edu.au;", "author_num": 7, "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "Xidian University;University of Technology Sydney;University of Sydney;University of Melbourne", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.uts.edu.au;https://www.sydney.edu.au;https://www.unimelb.edu.au", "aff_unique_abbr": "Xidian;UTS;USYD;UniMelb", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "China;Australia" }, { "title": "Tame a Wild Camera: In-the-Wild Monocular Camera Calibration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72667", "id": "898RcRYWCg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8db9279f593652ee9bb2223b4a2c43fa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=898RcRYWCg", "openreview": "https://openreview.net/forum?id=898RcRYWCg", "poster": "/media/PosterPDFs/NeurIPS%202023/72667.png?t=1701397516.6961286", "slides": "https://nips.cc/virtual/2023/poster/72667", "video": "https://nips.cc/virtual/2023/poster/72667", "author_site": "Shengjie Zhu, Abhinav Kumar, Masa Hu, Xiaoming Liu", "tldr": "", "abstract": "3D sensing for monocular in-the-wild images, e.g., depth estimation and 3D object detection, has become increasingly important.\nHowever, the unknown intrinsic parameter hinders their development and deployment.\nPrevious methods for the monocular camera calibration rely on specific 3D objects or strong geometry prior, such as using a checkerboard or imposing a Manhattan World assumption.\nThis work instead calibrates intrinsic via exploiting the monocular 3D prior.\nGiven an undistorted image as input, our method calibrates the complete 4 Degree-of-Freedom (DoF) intrinsic parameters.\nFirst, we show intrinsic is determined by the two well-studied monocular priors: monocular depthmap and surface normal map.\nHowever, this solution necessitates a low-bias and low-variance depth estimation.\nAlternatively, we introduce the incidence field, defined as the incidence rays between points in 3D space and pixels in the 2D imaging plane.\nWe show that: 1) The incidence field is a pixel-wise parametrization of the intrinsic invariant to image cropping and resizing.\n2) The incidence field is a learnable monocular 3D prior, determined pixel-wisely by up-to-sacle monocular depthmap and surface normal.\nWith the estimated incidence field, a robust RANSAC algorithm recovers intrinsic.\nWe show the effectiveness of our method through superior performance on synthetic and zero-shot testing datasets.\nBeyond calibration, we demonstrate downstream applications in image manipulation detection \\& restoration, uncalibrated two-view pose estimation, and 3D sensing.", "keywords": "Monocular Camera Calibration; Camera Pose Estimation; Image Editing", "primary_area": "", "supplementary_material": "/attachment/2341bfdd3ea7f65a5293959f61e8029b8847cfd3.pdf", "author": "Shengjie Zhu;Abhinav Kumar;Masa Hu;Xiaoming Liu", "authorids": "~Shengjie_Zhu1;~Abhinav_Kumar1;~Masa_Hu1;~Xiaoming_Liu2", "gender": "M;M;M;M", "homepage": ";https://sites.google.com/view/abhinavkumar;http://cvlab.cse.msu.edu/;http://www.cse.msu.edu/~liuxm/", "dblp": ";115/6458-4;;l/XiaomingLiu0002", "google_scholar": "4hHEXZkAAAAJ;https://scholar.google.co.in/citations?hl=en;;https://scholar.google.com/citations?hl=en", "orcid": "0009-0006-3132-1933;;;", "linkedin": "shengjie-zhu-b71945159/;abhinav1kumar;;xiaoming-liu-5a7807b/", "or_profile": "~Shengjie_Zhu1;~Abhinav_Kumar1;~Masa_Hu1;~Xiaoming_Liu2", "aff": "Michigan State University;Michigan State University;Michigan State University;Michigan State University", "aff_domain": "msu.edu;msu.edu;msu.edu;msu.edu", "position": "PhD student;PhD student;PhD student;Professor", "bibtex": "@inproceedings{\nzhu2023tame,\ntitle={Tame a Wild Camera: In-the-Wild Monocular Camera Calibration},\nauthor={Shengjie Zhu and Abhinav Kumar and Masa Hu and Xiaoming Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=898RcRYWCg}\n}", "github": "", "project": "", "reviewers": "wjnN;iokN;Ujoz;PGSe;TGwc", "pdf_size": 13958670, "rating": "3;5;6;7;7", "confidence": "3;5;4;5;3", "soundness": "3;3;4;3;4", "novelty": "2;2;3;4;4", "presentation": "3;2;3;4;3", "wc_summary": "73;49;113;72;113", "wc_strengths": "36;44;151;123;44", "wc_weaknesses": "196;31;70;282;48", "wc_questions": "43;371;72;2;74", "wc_limitations": "1;38;7;13;34", "wc_review": "349;533;413;492;313", "wc_reply_reviewers": "91;52;29;164;22", "wc_reply_authors": "0;34;0;187;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;2;1;2;1", "rating_avg": [ 5.6, 1.4966629547095764 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 84.0, 25.187298386289864 ], "wc_strengths_avg": [ 79.6, 47.78535340457367 ], "wc_weaknesses_avg": [ 125.4, 97.44660076164791 ], "wc_questions_avg": [ 112.4, 131.89783925447756 ], "wc_limitations_avg": [ 18.6, 14.759403781996074 ], "wc_review_avg": [ 420.0, 83.05660720255794 ], "wc_reply_reviewers_avg": [ 71.6, 52.102207246910375 ], "wc_reply_authors_avg": [ 44.2, 72.60413211381292 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2988071523335984, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6464993643473102413&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "msu.edu;msu.edu;msu.edu;msu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Michigan State University", "aff_unique_dep": "", "aff_unique_url": "https://www.msu.edu", "aff_unique_abbr": "MSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Towards Automated Circuit Discovery for Mechanistic Interpretability", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72666", "id": "89ia77nZ8u", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/34e1dbe95d34d7ebaf99b9bcaeb5b2be-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=89ia77nZ8u", "openreview": "https://openreview.net/forum?id=89ia77nZ8u", "poster": "/media/PosterPDFs/NeurIPS%202023/72666.png?t=1701482717.0775466", "slides": "https://nips.cc/virtual/2023/poster/72666", "video": "https://nips.cc/virtual/2023/poster/72666", "author_site": "Arthur Conmy, Augustine Mavor-Parker, Aengus Lynch, Stefan Heimersheim, Adri\u00e0 Garriga-Alonso", "tldr": "", "abstract": "Through considerable effort and intuition, several recent works have reverse-engineered nontrivial behaviors of\ntransformer models. This paper systematizes the mechanistic interpretability process they followed. First, researchers\nchoose a metric and dataset that elicit the desired model behavior. Then, they apply activation patching to find which\nabstract neural network units are involved in the behavior. By varying the dataset, metric, and units under\ninvestigation, researchers can understand the functionality of each component.\n\nWe automate one of the process' steps: finding the connections between the abstract neural network units that form a circuit. We propose several algorithms and reproduce previous interpretability results to validate them. For\nexample, the ACDC algorithm rediscovered 5/5 of the component types in a circuit in GPT-2 Small that computes the\nGreater-Than operation. ACDC selected 68 of the 32,000 edges in GPT-2 Small, all of which were manually found by\nprevious work. Our code is available at https://github.com/ArthurConmy/Automatic-Circuit-Discovery", "keywords": "Mechanistic Interpretability;Pruning;Science of Deep Learning;AI Safety", "primary_area": "", "supplementary_material": "", "author": "Arthur Conmy;Augustine N. Mavor-Parker;Aengus Lynch;Stefan Heimersheim;Adri\u00e0 Garriga-Alonso", "authorids": "~Arthur_Conmy1;~Augustine_N._Mavor-Parker1;~Aengus_Lynch1;~Stefan_Heimersheim1;~Adri\u00e0_Garriga-Alonso1", "gender": "M;M;M;;", "homepage": "https://arthurconmy.github.io/;https://self-supervisor.github.io/;;;", "dblp": ";;;;", "google_scholar": ";J7XkuPwAAAAJ;Pd2002AAAAAJ;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Arthur_Conmy1;~Augustine_N._Mavor-Parker1;~Aengus_Lynch1;~Stefan_Heimersheim1;~Adri\u00e0_Garriga-Alonso1", "aff": "Redwood Research;;University College London, University of London;;", "aff_domain": "rdwrs.com;;ucl.ac.uk;;", "position": "Researcher;;PhD student;;", "bibtex": "@inproceedings{\nconmy2023towards,\ntitle={Towards Automated Circuit Discovery for Mechanistic Interpretability},\nauthor={Arthur Conmy and Augustine N. Mavor-Parker and Aengus Lynch and Stefan Heimersheim and Adri{\\`a} Garriga-Alonso},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=89ia77nZ8u}\n}", "github": "", "project": "", "reviewers": "6BEj;pJGB;Zemb;aEyk", "pdf_size": 4997988, "rating": "6;7;7;9", "confidence": "3;4;5;4", "soundness": "3;3;4;4", "novelty": "3;3;3;4", "presentation": "3;2;4;4", "wc_summary": "171;40;153;135", "wc_strengths": "125;37;185;74", "wc_weaknesses": "166;55;274;49", "wc_questions": "192;265;227;9", "wc_limitations": "1;20;8;4", "wc_review": "655;417;847;271", "wc_reply_reviewers": "0;21;168;29", "wc_reply_authors": "16;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 124.75, 50.558752951393096 ], "wc_strengths_avg": [ 105.25, 55.64339583454626 ], "wc_weaknesses_avg": [ 136.0, 92.29572037749097 ], "wc_questions_avg": [ 173.25, 98.28116554050425 ], "wc_limitations_avg": [ 8.25, 7.224091638399945 ], "wc_review_avg": [ 547.5, 220.64621002863385 ], "wc_reply_reviewers_avg": [ 54.5, 66.37959023675877 ], "wc_reply_authors_avg": [ 4.0, 6.928203230275509 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3244428422615251, "gs_citation": 271, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11951169017287900557&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "rdwrs.com;;ucl.ac.uk;;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Redwood Research;University College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.redwoodresearch.org;https://www.ucl.ac.uk", "aff_unique_abbr": "Redwood Research;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Label-efficient Segmentation via Affinity Propagation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72665", "id": "8BPzLxF9p5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5f6fae52f3b62c3334e288e3bc58230d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8BPzLxF9p5", "openreview": "https://openreview.net/forum?id=8BPzLxF9p5", "poster": "/media/PosterPDFs/NeurIPS%202023/72665.png?t=1701603496.3696098", "slides": "https://nips.cc/virtual/2023/poster/72665", "video": "https://nips.cc/virtual/2023/poster/72665", "author_site": "Wentong Li, Yuqian Yuan, Song Wang, Wenyu Liu, Dongqi Tang, Jian liu, Jianke Zhu, Lei Zhang", "tldr": "", "abstract": "Weakly-supervised segmentation with label-efficient sparse annotations has attracted increasing research attention to reduce the cost of laborious pixel-wise labeling process, while the pairwise affinity modeling techniques play an essential role in this task. Most of the existing approaches focus on using the local appearance kernel to model the neighboring pairwise potentials. However, such a local operation fails to capture the long-range dependencies and ignores the topology of objects. In this work, we formulate the affinity modeling as an affinity propagation process, and propose a local and a global pairwise affinity terms to generate accurate soft pseudo labels. An efficient algorithm is also developed to reduce significantly the computational cost. The proposed approach can be conveniently plugged into existing segmentation networks. Experiments on three typical label-efficient segmentation tasks, i.e. box-supervised instance segmentation, point/scribble-supervised semantic segmentation and CLIP-guided semantic segmentation, demonstrate the superior performance of the proposed approach.", "keywords": "Computer Vision;Segmentation;Weakly-supervised Learning", "primary_area": "", "supplementary_material": "/attachment/b5bb72e048daa9e4ffb502f12970e6f363cf608a.pdf", "author": "Wentong Li;Yuqian Yuan;Song Wang;Wenyu Liu;Dongqi Tang;Jian liu;Jianke Zhu;Lei Zhang", "authorids": "~Wentong_Li2;~Yuqian_Yuan1;~Song_Wang11;~Wenyu_Liu4;~Dongqi_Tang2;~Jian_liu8;~Jianke_Zhu1;~Lei_Zhang2", "gender": "M;F;M;M;M;M;M;M", "homepage": "https://cslwt.github.io;https://github.com/CircleRadon;https://github.com/songw-zju;;https://github.com/courao;;https://person.zju.edu.cn/en/jkzhu;http://www4.comp.polyu.edu.hk/~cslzhang/", "dblp": ";354/6035;;42/4110-5;246/5788;;10/4016;64/5666-6.html", "google_scholar": "MJjM6BcAAAAJ;;Jj0jbL8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;SC-WmzwAAAAJ;tAK5l1IAAAAJ", "orcid": ";;;0000-0002-3035-987X;;;0000-0003-1831-0106;0000-0002-2078-4215", "linkedin": ";;;;;https://www.linkedin.cn/incareer/in/%E5%81%A5-%E5%88%98-917ba7138;https://www.linkedin.cn/incareer/in/jianke-zhu-b83bba8;", "or_profile": "~Wentong_Li2;~Yuqian_Yuan1;~Song_Wang11;~Wenyu_Liu4;~Dongqi_Tang2;~Jian_liu8;~Jianke_Zhu1;~Lei_Zhang2", "aff": "Alibaba Group;Peking University;Zhejiang University;Zhejiang University;Ant Group;AntGroup;Zhejiang University;The Hong Kong Polytechnic University", "aff_domain": "antgroup.com;pku.edu.cn;zju.edu.cn;zju.edu.cn;antgroup.com;antgroup.com;zju.edu.cn;polyu.edu.hk", "position": "Intern;Intern;PhD student;PhD student;Researcher;Researcher;Full Professor;Chair Professor", "bibtex": "@inproceedings{\nli2023labelefficient,\ntitle={Label-efficient Segmentation via Affinity Propagation},\nauthor={Wentong Li and Yuqian Yuan and Song Wang and Wenyu Liu and Dongqi Tang and Jian liu and Jianke Zhu and Lei Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8BPzLxF9p5}\n}", "github": "", "project": "", "reviewers": "bFj5;bNsh;8pug;acqT;4J6P", "pdf_size": 1204978, "rating": "4;5;5;5;5", "confidence": "4;3;3;4;4", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;2", "presentation": "3;2;3;2;3", "wc_summary": "35;58;85;54;74", "wc_strengths": "17;51;88;152;28", "wc_weaknesses": "171;72;90;78;256", "wc_questions": "79;70;48;337;6", "wc_limitations": "2;6;30;6;20", "wc_review": "304;257;341;627;384", "wc_reply_reviewers": "0;23;15;0;59", "wc_reply_authors": "0;12;12;0;24", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;2;2;1;2", "rating_avg": [ 4.8, 0.39999999999999997 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 61.2, 17.19767426136453 ], "wc_strengths_avg": [ 67.2, 48.872896374166324 ], "wc_weaknesses_avg": [ 133.4, 70.94674058757033 ], "wc_questions_avg": [ 108.0, 117.2433366976563 ], "wc_limitations_avg": [ 12.8, 10.552724766618336 ], "wc_review_avg": [ 382.6, 129.1643913778097 ], "wc_reply_reviewers_avg": [ 19.4, 21.694238866574693 ], "wc_reply_authors_avg": [ 9.6, 8.97997772825746 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3860739246655663500&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "antgroup.com;pku.edu.cn;zju.edu.cn;zju.edu.cn;antgroup.com;antgroup.com;zju.edu.cn;polyu.edu.hk", "author_num": 8, "aff_unique_index": "0;1;2;2;3;3;2;4", "aff_unique_norm": "Alibaba Group;Peking University;Zhejiang University;Ant Group;Hong Kong Polytechnic University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.alibaba.com;http://www.pku.edu.cn;https://www.zju.edu.cn;https://www.antgroup.com;https://www.polyu.edu.hk", "aff_unique_abbr": "Alibaba;Peking U;ZJU;Ant Group;PolyU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Analyzing Generalization of Neural Networks through Loss Path Kernels", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72664", "id": "8Ba7VJ7xiM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e0b6f389739496e363a89155c9448a8a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8Ba7VJ7xiM", "openreview": "https://openreview.net/forum?id=8Ba7VJ7xiM", "poster": "/media/PosterPDFs/NeurIPS%202023/72664.png?t=1702080795.2247963", "slides": "https://nips.cc/virtual/2023/poster/72664", "video": "https://nips.cc/virtual/2023/poster/72664", "author_site": "Yilan Chen, Wei Huang, Hao Wang, Charlotte Loh, Akash Srivastava, Lam Nguyen, Lily Weng", "tldr": "", "abstract": "Deep neural networks have been increasingly used in real-world applications, making it critical to ensure their ability to adapt to new, unseen data. In this paper, we study the generalization capability of neural networks trained with (stochastic) gradient flow. We establish a new connection between the loss dynamics of gradient flow and general kernel machines by proposing a new kernel, called loss path kernel. This kernel measures the similarity between two data points by evaluating the agreement between loss gradients along the path determined by the gradient flow. Based on this connection, we derive a new generalization upper bound that applies to general neural network architectures. This new bound is tight and strongly correlated with the true generalization error. We apply our results to guide the design of neural architecture search (NAS) and demonstrate favorable performance compared with state-of-the-art NAS algorithms through numerical experiments.", "keywords": "generalization;deep learning theory;neural tangent kernel;neural architecture search", "primary_area": "", "supplementary_material": "", "author": "Yilan Chen;Wei Huang;Hao Wang;Charlotte Loh;Akash Srivastava;Lam M. Nguyen;Tsui-Wei Weng", "authorids": "~Yilan_Chen1;~Wei_Huang6;~Hao_Wang22;~Charlotte_Loh1;~Akash_Srivastava1;~Lam_M._Nguyen1;~Tsui-Wei_Weng1", "gender": "M;M;M;F;M;F;M", "homepage": "https://yilanchen6.github.io/;https://weihuang05.github.io/;https://haowang94.github.io;;http://akashgit.github.io;https://lilywenglab.github.io;https://lamnguyen-mltd.github.io/", "dblp": "167/6638-2.html;81/6685-34;;217/6481;24/9528;177/9197;181/1428", "google_scholar": "6wmzpRIAAAAJ;RZfDh4MAAAAJ;A3WtYhAAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=2h6SZeEAAAAJ;v8GM4xoAAAAJ;DeFL5Q8AAAAJ", "orcid": ";0000-0001-5674-7021;;;;;", "linkedin": ";;;;https://uk.linkedin.com/in/akash-srivastava-aa97361b;;lam-m-nguyen-71b54750/", "or_profile": "~Yilan_Chen1;~Wei_Huang6;~Hao_Wang22;~Charlotte_Loh1;~Akash_Srivastava1;~Tsui-Wei_Weng1;~Lam_M_Nguyen1", "aff": "University of California, San Diego;RIKEN AIP;MIT-IBM Watson AI Lab;Massachusetts Institute of Technology;MIT-IBM Watson AI Research Lab;University of California, San Diego;IBM Research, Thomas J. Watson Research Center", "aff_domain": "ucsd.edu;riken.jp;ibm.com;mit.edu;ibm.com;ucsd.edu;ibm.com", "position": "PhD student;Postdoc;Researcher;PhD student;Research Scientist;Assistant Professor;Staff Research Scientist", "bibtex": "@inproceedings{\nchen2023analyzing,\ntitle={Analyzing Generalization of Neural Networks through Loss Path Kernels},\nauthor={Yilan Chen and Wei Huang and Hao Wang and Charlotte Loh and Akash Srivastava and Lam M. Nguyen and Tsui-Wei Weng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8Ba7VJ7xiM}\n}", "github": "", "project": "", "reviewers": "kz2v;kZTv;Zsen;W7Wt", "pdf_size": 5904680, "rating": "5;7;7;8", "confidence": "4;3;3;3", "soundness": "2;3;4;4", "novelty": "3;3;2;4", "presentation": "2;2;4;3", "wc_summary": "180;98;73;52", "wc_strengths": "37;99;104;104", "wc_weaknesses": "545;138;408;156", "wc_questions": "224;68;2;166", "wc_limitations": "33;28;2;1", "wc_review": "1019;431;589;479", "wc_reply_reviewers": "1109;109;17;32", "wc_reply_authors": "1814;69;34;39", "reply_reviewers": "3;1;1;1", "reply_authors": "6;2;2;2", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 100.75, 48.56632063477735 ], "wc_strengths_avg": [ 86.0, 28.36370920736567 ], "wc_weaknesses_avg": [ 311.75, 171.84058746407962 ], "wc_questions_avg": [ 115.0, 85.81957818586619 ], "wc_limitations_avg": [ 16.0, 14.611639196202457 ], "wc_review_avg": [ 629.5, 232.05764370087016 ], "wc_reply_reviewers_avg": [ 316.75, 458.7354221117005 ], "wc_reply_authors_avg": [ 489.0, 765.1062017785505 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.0, 1.7320508075688772 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14521896869907698826&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 9, "email": "ucsd.edu;riken.jp;ibm.com;mit.edu;ibm.com;ucsd.edu;ibm.com", "author_num": 7, "aff_unique_index": "0;1;2;2;2;0;3", "aff_unique_norm": "University of California, San Diego;RIKEN;Massachusetts Institute of Technology;IBM", "aff_unique_dep": ";Advanced Institute for Computational Science;IBM Watson AI Lab;IBM Research", "aff_unique_url": "https://www.ucsd.edu;https://www.aip.riken.jp;https://www.mitibmwatsonailab.org;https://www.ibm.com/research", "aff_unique_abbr": "UCSD;RIKEN AIP;MIT-IBM AI Lab;IBM", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "San Diego;;Yorktown Heights", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "United States;Japan" }, { "title": "Exploiting Connections between Lipschitz Structures for Certifiably Robust Deep Equilibrium Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72663", "id": "8F3Lutda7R", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4462db5eee6823b2abad0d1f955e187a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8F3Lutda7R", "openreview": "https://openreview.net/forum?id=8F3Lutda7R", "poster": "/media/PosterPDFs/NeurIPS%202023/72663.png?t=1701916858.1437025", "slides": "https://nips.cc/virtual/2023/poster/72663", "video": "https://nips.cc/virtual/2023/poster/72663", "author_site": "Aaron Havens, Alexandre Araujo, Siddharth Garg, Farshad Khorrami, Bin Hu", "tldr": "", "abstract": "Recently, deep equilibrium models (DEQs) have drawn increasing attention from the machine learning community. However, DEQs are much less understood in terms of certified robustness than their explicit network counterparts. In this paper, we advance the understanding of certified robustness of DEQs via exploiting the connections between various Lipschitz network parameterizations for both explicit and implicit models. Importantly, we show that various popular Lipschitz network structures, including convex potential layers (CPL), SDP-based Lipschitz layers (SLL), almost orthogonal layers (AOL), Sandwich layers, and monotone DEQs (MonDEQ) can all be reparameterized as special cases of the Lipschitz-bounded equilibrium networks (LBEN) without changing the prescribed Lipschitz constant in the original network parameterization. A key feature of our reparameterization technique is that it preserves the Lipschitz prescription used in different structures. This opens the possibility of achieving improved certified robustness of DEQs via a combination of network reparameterization, structure-preserving regularization, and LBEN-based fine-tuning. We also support our theoretical understanding with new empirical results, which show that our proposed method improves the certified robust accuracy of DEQs on classification tasks. All codes and experiments are made available at \\url{https://github.com/AaronHavens/ExploitingLipschitzDEQ}.", "keywords": "Deep equilibrium models;Lipschitz networks;certified robustness", "primary_area": "", "supplementary_material": "", "author": "Aaron J Havens;Alexandre Araujo;Siddharth Garg;Farshad Khorrami;Bin Hu", "authorids": "~Aaron_J_Havens1;~Alexandre_Araujo3;~Siddharth_Garg1;~Farshad_Khorrami1;~Bin_Hu2", "gender": "M;M;M;M;M", "homepage": "https://aaronhavens.github.io/;http://engineering.nyu.edu/people/siddharth-garg/;https://engineering.nyu.edu/faculty/farshad-khorrami;;https://alexandrearaujo.com/", "dblp": ";94/3807;94/5644;;228/6599", "google_scholar": ";https://scholar.google.com.tw/citations?user=Yf8OqQQAAAAJ;NdOqlPQAAAAJ;;https://scholar.google.fr/citations?user=wsu61VYAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Aaron_J_Havens1;~Siddharth_Garg1;~Farshad_Khorrami1;~Bin_Hu2;~Alexandre_ARAUJO1", "aff": "University of Illinois, Urbana Champaign;New York University;New York University;University of Illinois, Urbana Champaign;New York University", "aff_domain": "illinois.edu;nyu.edu;nyu.edu;illinois.edu;nyu.edu", "position": "PhD student;Associate Professor;Full Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nhavens2023exploiting,\ntitle={Exploiting Connections between Lipschitz Structures for Certifiably Robust Deep Equilibrium Models},\nauthor={Aaron J Havens and Alexandre Araujo and Siddharth Garg and Farshad Khorrami and Bin Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8F3Lutda7R}\n}", "github": "", "project": "", "reviewers": "MZ1P;gX3G;PSUC;Lz7W", "pdf_size": 389747, "rating": "6;6;6;7", "confidence": "3;2;3;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;4", "wc_summary": "78;73;69;96", "wc_strengths": "56;31;62;77", "wc_weaknesses": "57;65;41;21", "wc_questions": "61;30;27;120", "wc_limitations": "9;17;1;6", "wc_review": "261;216;200;320", "wc_reply_reviewers": "40;48;0;16", "wc_reply_authors": "0;13;0;12", "reply_reviewers": "1;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 79.0, 10.319883720275147 ], "wc_strengths_avg": [ 56.5, 16.590660023037056 ], "wc_weaknesses_avg": [ 46.0, 16.822603841260722 ], "wc_questions_avg": [ 59.5, 37.379807383131336 ], "wc_limitations_avg": [ 8.25, 5.80409338312195 ], "wc_review_avg": [ 249.25, 46.56916898549941 ], "wc_reply_reviewers_avg": [ 26.0, 19.078784028338912 ], "wc_reply_authors_avg": [ 6.25, 6.2599920127744575 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8387385840455597732&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "illinois.edu;nyu.edu;nyu.edu;illinois.edu;nyu.edu", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.nyu.edu", "aff_unique_abbr": "UIUC;NYU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Differentially Private Statistical Inference through $\\beta$-Divergence One Posterior Sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72662", "id": "8FbuHeVU7D", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f3024ea88cec9f45a411cf4d51ab649c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8FbuHeVU7D", "openreview": "https://openreview.net/forum?id=8FbuHeVU7D", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72662", "video": "https://nips.cc/virtual/2023/poster/72662", "author_site": "Jack Jewson, Sahra Ghalebikesabi, Chris C Holmes", "tldr": "", "abstract": "Differential privacy guarantees allow the results of a statistical analysis involving sensitive data to be released without compromising the privacy of any individual taking part. Achieving such guarantees generally requires the injection of noise, either directly into parameter estimates or into the estimation process. \nInstead of artificially introducing perturbations, \nsampling from Bayesian posterior distributions has been shown to be a special case of the exponential mechanism, producing consistent,\nand efficient private estimates without altering the data generative process. The application of current approaches has, however, been limited by their strong bounding assumptions which do not hold for basic models, such as simple linear regressors.\nTo ameliorate this, we propose $\\beta$D-Bayes, a posterior sampling scheme from a generalised posterior targeting the minimisation of the $\\beta$-divergence between the model and the data generating process. This provides private estimation that is generally applicable without requiring changes to the underlying model and consistently learns the data generating parameter. \nWe show that $\\beta$D-Bayes produces more precise inference estimation for the same privacy guarantees, and further facilitates differentially private estimation of complex classifiers, and continuous regression models such as neural networks, which goes beyond what has been currently possible with private posterior sampling.", "keywords": "differential privacy;beta-divergence;posterior sampling;generalised Bayesian inference", "primary_area": "", "supplementary_material": "/attachment/6f458523a67a31f9f74e684e8c4badf8093c5ccc.pdf", "author": "Jack Jewson;Sahra Ghalebikesabi;Christopher C. Holmes", "authorids": "~Jack_Jewson1;~Sahra_Ghalebikesabi1;~Christopher_C._Holmes1", "gender": "M;;M", "homepage": "https://sites.google.com/view/jack-jewson-academic-profile/home;;", "dblp": "222/3206;;08/6129", "google_scholar": "XBlL5AQAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jack_Jewson1;~Sahra_Ghalebikesabi1;~Christopher_C._Holmes1", "aff": "Universitat Pompeu Fabra;;University of Oxford", "aff_domain": "upf.edu;;ox.ac.uk", "position": "Postdoc;;Full Professor", "bibtex": "@inproceedings{\njewson2023differentially,\ntitle={Differentially Private Statistical Inference through \\${\\textbackslash}beta\\$-Divergence One Posterior Sampling},\nauthor={Jack Jewson and Sahra Ghalebikesabi and Christopher C. Holmes},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8FbuHeVU7D}\n}", "github": "", "project": "", "reviewers": "fSk8;D3oa;9D13;ADTx", "pdf_size": 1090349, "rating": "5;6;6;7", "confidence": "4;2;4;3", "soundness": "3;4;2;3", "novelty": "2;3;2;3", "presentation": "1;3;2;3", "wc_summary": "174;65;204;246", "wc_strengths": "59;44;73;81", "wc_weaknesses": "306;218;156;31", "wc_questions": "74;44;558;31", "wc_limitations": "1;25;15;80", "wc_review": "614;396;1006;469", "wc_reply_reviewers": "80;0;118;51", "wc_reply_authors": "170;0;87;22", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 172.25, 66.99393629277205 ], "wc_strengths_avg": [ 64.25, 14.095655359010449 ], "wc_weaknesses_avg": [ 177.75, 100.09589152407806 ], "wc_questions_avg": [ 176.75, 220.66646210967357 ], "wc_limitations_avg": [ 30.25, 29.961433543807612 ], "wc_review_avg": [ 621.25, 235.58583892076365 ], "wc_reply_reviewers_avg": [ 62.25, 43.083494519363214 ], "wc_reply_authors_avg": [ 69.75, 66.13008014512005 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4473621369242754371&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "upf.edu;;ox.ac.uk", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Universitat Pompeu Fabra;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://www.upf.edu/;https://www.ox.ac.uk", "aff_unique_abbr": "UPF;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Spain;United Kingdom" }, { "title": "Conservative State Value Estimation for Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72661", "id": "8GSCaoFot9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6e469fbdc43ade121170f61096f4458b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8GSCaoFot9", "openreview": "https://openreview.net/forum?id=8GSCaoFot9", "poster": "/media/PosterPDFs/NeurIPS%202023/72661.png?t=1701639718.8554053", "slides": "https://nips.cc/virtual/2023/poster/72661", "video": "https://nips.cc/virtual/2023/poster/72661", "author_site": "Liting Chen, Jie Yan, Zhengdao Shao, Lu Wang, Qingwei Lin, Saravanakumar Rajmohan, Thomas Moscibroda, Dongmei Zhang", "tldr": "", "abstract": "Offline reinforcement learning faces a significant challenge of value over-estimation due to the distributional drift between the dataset and the current learned policy, leading to learning failure in practice. The common approach is to incorporate a penalty term to reward or value estimation in the Bellman iterations. Meanwhile, to avoid extrapolation on out-of-distribution (OOD) states and actions, existing methods focus on conservative Q-function estimation. In this paper, we propose Conservative State Value Estimation (CSVE), a new approach that learns conservative V-function via directly imposing penalty on OOD states. Compared to prior work, CSVE allows more effective state value estimation with conservative guarantees and further better policy optimization. Further, we apply CSVE and develop a practical actor-critic algorithm in which the critic does the conservative value estimation by additionally sampling and penalizing the states around the dataset, and the actor applies advantage weighted updates extended with state exploration to improve the policy. We evaluate in classic continual control tasks of D4RL, showing that our method performs better than the conservative Q-function learning methods and is strongly competitive among recent SOTA methods.", "keywords": "Offline Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/318e07bd2b79c263d107577f5d9e21ce92be4db3.pdf", "author": "Liting Chen;Jie Yan;Zhengdao Shao;Lu Wang;Qingwei Lin;Saravan Rajmohan;Thomas Moscibroda;Dongmei Zhang", "authorids": "~Liting_Chen1;~Jie_Yan3;~Zhengdao_Shao1;~Lu_Wang8;~Qingwei_Lin1;~Saravan_Rajmohan2;~Thomas_Moscibroda1;~Dongmei_Zhang2", "gender": "F;M;M;;M;;M;", "homepage": "https://ianthechan.github.io/;;https://blog.csdn.net/Xixo0628;;https://www.microsoft.com/en-us/research/people/qlin/;;;https://www.microsoft.com/en-us/research/people/dongmeiz/", "dblp": ";;;;120/0743;;m/ThomasMoscibroda;87/461-1", "google_scholar": "https://scholar.google.com/citations?hl=en;W4NOQM0AAAAJ;;;https://scholar.google.co.jp/citations?hl=zh-CN;;;jLlBBl4AAAAJ", "orcid": ";;;;0000-0003-2559-2383;;;0000-0002-9230-2799", "linkedin": ";;;;;;;dongmei-zhang-38a86317/", "or_profile": "~Liting_Chen1;~Jie_Yan3;~Zhengdao_Shao1;~Lu_Wang8;~Qingwei_Lin1;~Saravan_Rajmohan2;~Thomas_Moscibroda1;~Dongmei_Zhang2", "aff": "Microsoft;Microsoft Research Asia;University of Science and Technology of China;;Microsoft Research;;Microsoft;Microsoft", "aff_domain": "microsoft.com;microsoft.com;ustc.edu.cn;;microsoft.com;;microsoft.com;microsoft.com", "position": "MS student;Researcher;MS student;;Sr. Principal Researcher;;Emeritus;Assistant Managing Director, Microsoft Research Asia", "bibtex": "@inproceedings{\nchen2023conservative,\ntitle={Conservative State Value Estimation for Offline Reinforcement Learning},\nauthor={Liting Chen and Jie Yan and Zhengdao Shao and Lu Wang and Qingwei Lin and Saravan Rajmohan and Thomas Moscibroda and Dongmei Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8GSCaoFot9}\n}", "github": "", "project": "", "reviewers": "Qqxx;ze4E;QSVW;EMsr", "pdf_size": 840201, "rating": "3;6;6;6", "confidence": "4;4;3;3", "soundness": "1;3;3;3", "novelty": "1;3;3;3", "presentation": "1;3;3;2", "wc_summary": "49;49;192;177", "wc_strengths": "8;83;149;86", "wc_weaknesses": "211;212;99;295", "wc_questions": "6;138;89;90", "wc_limitations": "7;2;18;3", "wc_review": "281;484;547;651", "wc_reply_reviewers": "735;289;179;23", "wc_reply_authors": "1035;378;247;13", "reply_reviewers": "3;3;2;1", "reply_authors": "4;4;3;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 116.75, 67.95724758993701 ], "wc_strengths_avg": [ 81.5, 49.95247741604014 ], "wc_weaknesses_avg": [ 204.25, 69.6755875468589 ], "wc_questions_avg": [ 80.75, 47.483549783056446 ], "wc_limitations_avg": [ 7.5, 6.34428877022476 ], "wc_review_avg": [ 490.75, 134.98587889108992 ], "wc_reply_reviewers_avg": [ 306.5, 264.83343822108264 ], "wc_reply_authors_avg": [ 418.25, 379.3266237690152 ], "reply_reviewers_avg": [ 2.25, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9761122047552675133&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "microsoft.com;microsoft.com;ustc.edu.cn;;microsoft.com;;microsoft.com;microsoft.com", "author_num": 8, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Microsoft;University of Science and Technology of China", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;http://www.ustc.edu.cn", "aff_unique_abbr": "Microsoft;USTC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Pre-training Contextualized World Models with In-the-wild Videos for Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72660", "id": "8GuEVzAUQS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7ce1cbededb4b0d6202847ac1b484ee8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8GuEVzAUQS", "openreview": "https://openreview.net/forum?id=8GuEVzAUQS", "poster": "/media/PosterPDFs/NeurIPS%202023/72660.png?t=1699282410.5369947", "slides": "https://nips.cc/virtual/2023/poster/72660", "video": "https://nips.cc/virtual/2023/poster/72660", "author_site": "Jialong Wu, Haoyu Ma, Chaoyi Deng, Mingsheng Long", "tldr": "", "abstract": "Unsupervised pre-training methods utilizing large and diverse datasets have achieved tremendous success across a range of domains. Recent work has investigated such unsupervised pre-training methods for model-based reinforcement learning (MBRL) but is limited to domain-specific or simulated data. In this paper, we study the problem of pre-training world models with abundant in-the-wild videos for efficient learning of downstream visual control tasks. However, in-the-wild videos are complicated with various contextual factors, such as intricate backgrounds and textured appearance, which precludes a world model from extracting shared world knowledge to generalize better. To tackle this issue, we introduce Contextualized World Models (ContextWM) that explicitly separate context and dynamics modeling to overcome the complexity and diversity of in-the-wild videos and facilitate knowledge transfer between distinct scenes. Specifically, a contextualized extension of the latent dynamics model is elaborately realized by incorporating a context encoder to retain contextual information and empower the image decoder, which encourages the latent dynamics model to concentrate on essential temporal variations. Our experiments show that in-the-wild video pre-training equipped with ContextWM can significantly improve the sample efficiency of MBRL in various domains, including robotic manipulation, locomotion, and autonomous driving. Code is available at this repository: https://github.com/thuml/ContextWM.", "keywords": "Model-based reinforcement learning;world model;pre-training", "primary_area": "", "supplementary_material": "", "author": "Jialong Wu;Haoyu Ma;Chaoyi Deng;Mingsheng Long", "authorids": "~Jialong_Wu1;~Haoyu_Ma3;~Chaoyi_Deng1;~Mingsheng_Long5", "gender": "M;;M;", "homepage": "https://manchery.github.io/;;https://github.com/dcy11011;", "dblp": "73/498-1.html;;348/6553;", "google_scholar": "FfTZ66gAAAAJ;;;", "orcid": "0009-0008-7846-053X;;0009-0003-0635-2568;", "linkedin": ";;;", "or_profile": "~Jialong_Wu1;~Haoyu_Ma3;~Chaoyi_Deng1;~Mingsheng_Long5", "aff": "Tsinghua University;;Tsinghua University;", "aff_domain": "tsinghua.edu.cn;;tsinghua.edu.cn;", "position": "PhD student;;Undergrad student;", "bibtex": "@inproceedings{\nwu2023pretraining,\ntitle={Pre-training Contextualized World Models with In-the-wild Videos for Reinforcement Learning},\nauthor={Jialong Wu and Haoyu Ma and Chaoyi Deng and Mingsheng Long},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8GuEVzAUQS}\n}", "github": "", "project": "", "reviewers": "MYvK;JHvL;wPqn;ajER;kJR6", "pdf_size": 6467017, "rating": "3;5;7;7;8", "confidence": "3;4;4;4;5", "soundness": "2;3;4;2;4", "novelty": "1;3;3;2;4", "presentation": "3;3;4;4;3", "wc_summary": "54;103;67;53;192", "wc_strengths": "38;46;100;78;64", "wc_weaknesses": "100;180;79;249;54", "wc_questions": "114;155;175;53;98", "wc_limitations": "9;6;1;10;21", "wc_review": "315;490;422;443;429", "wc_reply_reviewers": "44;86;52;78;57", "wc_reply_authors": "714;318;29;364;52", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;3;2;4;2", "rating_avg": [ 6.0, 1.7888543819998317 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 2.6, 1.019803902718557 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 93.8, 52.335074281021136 ], "wc_strengths_avg": [ 65.2, 22.292599668948437 ], "wc_weaknesses_avg": [ 132.4, 71.9849984371744 ], "wc_questions_avg": [ 119.0, 43.02092514114498 ], "wc_limitations_avg": [ 9.4, 6.590902821313632 ], "wc_review_avg": [ 419.8, 57.5131289359221 ], "wc_reply_reviewers_avg": [ 63.4, 15.944905142395799 ], "wc_reply_authors_avg": [ 295.4, 249.26098772170505 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8838834764831843, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4208590369646396326&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "tsinghua.edu.cn;;tsinghua.edu.cn;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Efficient Subgame Refinement for Extensive-form Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72659", "id": "8HzOyg1ngp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1a2b4aba905a16733ff199888ac8eec4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8HzOyg1ngp", "openreview": "https://openreview.net/forum?id=8HzOyg1ngp", "poster": "/media/PosterPDFs/NeurIPS%202023/72659.png?t=1701884915.3188794", "slides": "https://nips.cc/virtual/2023/poster/72659", "video": "https://nips.cc/virtual/2023/poster/72659", "author_site": "Zhenxing Ge, Zheng Xu, Tianyu Ding, Wenbin Li, Yang Gao", "tldr": "", "abstract": "Subgame solving is an essential technique in addressing large imperfect information games, with various approaches developed to enhance the performance of refined strategies in the abstraction of the target subgame. However, directly applying existing subgame solving techniques may be difficult, due to the intricate nature and substantial size of many real-world games. To overcome this issue, recent subgame solving methods allow for subgame solving on limited knowledge order subgames, increasing their applicability in large games; yet this may still face obstacles due to extensive information set sizes. To address this challenge, we propose a generative subgame solving (GS2) framework, which utilizes a generation function to identify a subset of the earliest-reached nodes, reducing the size of the subgame. Our method is supported by a theoretical analysis and employs a diversity-based generation function to enhance safety. Experiments conducted on medium-sized games as well as the challenging large game of GuanDan demonstrate a significant improvement over the blueprint.", "keywords": "Subgame solving;extensive-form game;imperfect information", "primary_area": "", "supplementary_material": "/attachment/1459c817f2b49b61d4ec910969451bfbdf6d6f00.pdf", "author": "Zhenxing Ge;Zheng Xu;Tianyu Ding;Wenbin Li;Yang Gao", "authorids": "~Zhenxing_Ge1;~Zheng_Xu3;~Tianyu_Ding1;~Wenbin_Li5;~Yang_Gao3", "gender": "M;;M;M;M", "homepage": "http://cs.nju.edu.cn/rl;https://xuzheng.space/;https://cs.nju.edu.cn/liwenbin/;https://cs.nju.edu.cn/gaoyang/;https://www.tianyuding.com", "dblp": ";;27/1736-6.html;89/4402-1;134/4796", "google_scholar": ";;K-kC4yYAAAAJ;https://scholar.google.com.tw/citations?user=CJwLwzQAAAAJ;Qi7zTOcAAAAJ", "orcid": ";0009-0007-3486-6864;;;0000-0001-8445-4330", "linkedin": ";zheng-xu-259140193/;;;tianyuding/", "or_profile": "~Zhenxing_Ge1;~Zheng_Xu3;~Wenbin_Li5;~Yang_Gao3;~Tianyu_DING2", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University;", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;", "position": "PhD student;MS student;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nge2023efficient,\ntitle={Efficient Subgame Refinement for Extensive-form Games},\nauthor={Zhenxing Ge and Zheng Xu and Tianyu Ding and Wenbin Li and Yang Gao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8HzOyg1ngp}\n}", "github": "", "project": "", "reviewers": "gWJJ;hSoE;D79X;EqAE", "pdf_size": 643448, "rating": "6;6;6;6", "confidence": "4;4;5;4", "soundness": "3;4;2;4", "novelty": "3;3;3;2", "presentation": "3;3;3;3", "wc_summary": "32;26;241;50", "wc_strengths": "54;31;89;68", "wc_weaknesses": "61;50;358;312", "wc_questions": "200;3;149;3", "wc_limitations": "1;8;1;6", "wc_review": "348;118;838;439", "wc_reply_reviewers": "195;5;59;17", "wc_reply_authors": "201;0;35;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.25, 89.20587144353223 ], "wc_strengths_avg": [ 60.5, 21.10094784600919 ], "wc_weaknesses_avg": [ 195.25, 140.7468916175416 ], "wc_questions_avg": [ 88.75, 87.62526747462744 ], "wc_limitations_avg": [ 4.0, 3.082207001484488 ], "wc_review_avg": [ 435.75, 260.03881921743914 ], "wc_reply_reviewers_avg": [ 69.0, 75.45859791965393 ], "wc_reply_authors_avg": [ 59.0, 83.21958904008119 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16773908305851648742&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Exploring Loss Functions for Time-based Training Strategy in Spiking Neural Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72658", "id": "8IvW2k5VeA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cde874a797a8300da693d5e412b7fdc0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8IvW2k5VeA", "openreview": "https://openreview.net/forum?id=8IvW2k5VeA", "poster": "/media/PosterPDFs/NeurIPS%202023/72658.png?t=1702014676.9425051", "slides": "https://nips.cc/virtual/2023/poster/72658", "video": "https://nips.cc/virtual/2023/poster/72658", "author_site": "Yaoyu Zhu, Wei Fang, Xiaodong Xie, Tiejun Huang, Zhaofei Yu", "tldr": "", "abstract": "Spiking Neural Networks (SNNs) are considered promising brain-inspired energy-efficient models due to their event-driven computing paradigm.\nThe spatiotemporal spike patterns used to convey information in SNNs consist of both rate coding and temporal coding, where the temporal coding is crucial to biological-plausible learning rules such as spike-timing-dependent-plasticity.\nThe time-based training strategy is proposed to better utilize the temporal information in SNNs and learn in an asynchronous fashion.\nHowever, some recent works train SNNs by the time-based scheme with rate-coding-dominated loss functions.\nIn this paper, we first map rate-based loss functions to time-based counterparts and explain why they are also applicable to the time-based training scheme.\nAfter that, we infer that loss functions providing adequate positive overall gradients help training by theoretical analysis.\nBased on this, we propose the enhanced counting loss to replace the commonly used mean square counting loss.\nIn addition, we transfer the training of scale factor in weight standardization into thresholds.\nExperiments show that our approach outperforms previous time-based training methods in most datasets. \nOur work provides insights for training SNNs with time-based schemes and offers a fresh perspective on the correlation between rate coding and temporal coding.\nOur code is available at https://github.com/zhuyaoyu/SNN-temporal-training-losses.", "keywords": "Spiking neural networks;Spike encoding;Time-based training", "primary_area": "", "supplementary_material": "/attachment/ca4797b9351dbfeb4ead8e8641bbf14a654bbd01.pdf", "author": "Yaoyu Zhu;Wei Fang;Xiaodong Xie;Tiejun Huang;Zhaofei Yu", "authorids": "~Yaoyu_Zhu1;~Wei_Fang2;~Xiaodong_Xie1;~Tiejun_Huang1;~Zhaofei_Yu1", "gender": "M;;M;M;M", "homepage": ";https://fangwei123456.github.io/;http://idm.pku.edu.cn/en/info/1009/1010.htm;https://idm.pku.edu.cn/~tjhuang/;https://yuzhaofei.github.io", "dblp": "325/0611;;;h/TiejunHuang;166/0573", "google_scholar": ";https://scholar.google.com.hk/citations?user=e2lED2gAAAAJ;;https://scholar.google.com.tw/citations?user=knvEK4AAAAAJ;qaUgD50AAAAJ", "orcid": "0000-0002-8485-5094;;;0000-0002-4234-6099;", "linkedin": ";;;;", "or_profile": "~Yaoyu_Zhu1;~Wei_Fang2;~Xiaodong_Xie1;~Tiejun_Huang1;~Zhaofei_Yu1", "aff": "Peking University;School of Computer Science, Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;PhD student;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhu2023exploring,\ntitle={Exploring Loss Functions for Time-based Training Strategy in Spiking Neural Networks},\nauthor={Yaoyu Zhu and Wei Fang and Xiaodong Xie and Tiejun Huang and Zhaofei Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8IvW2k5VeA}\n}", "github": "", "project": "", "reviewers": "LeWP;hbi5;jtny;98Db", "pdf_size": 593960, "rating": "6;6;7;8", "confidence": "4;4;5;5", "soundness": "4;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "27;87;71;95", "wc_strengths": "28;39;71;92", "wc_weaknesses": "83;18;15;31", "wc_questions": "4;86;105;82", "wc_limitations": "1;2;10;1", "wc_review": "143;232;272;301", "wc_reply_reviewers": "15;0;14;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.0, 26.28687885618983 ], "wc_strengths_avg": [ 57.5, 25.42144763777232 ], "wc_weaknesses_avg": [ 36.75, 27.371289702898547 ], "wc_questions_avg": [ 69.25, 38.661188548724155 ], "wc_limitations_avg": [ 3.5, 3.774917217635375 ], "wc_review_avg": [ 237.0, 59.54410130315177 ], "wc_reply_reviewers_avg": [ 10.75, 6.219927652312364 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5365216618257643138&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Systematic Visual Reasoning through Object-Centric Relational Abstraction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72657", "id": "8JCZe7QrPy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e3cdc587873dd1d00ac78f0c1f9aa60c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8JCZe7QrPy", "openreview": "https://openreview.net/forum?id=8JCZe7QrPy", "poster": "/media/PosterPDFs/NeurIPS%202023/72657.png?t=1701885509.1236734", "slides": "https://nips.cc/virtual/2023/poster/72657", "video": "https://nips.cc/virtual/2023/poster/72657", "author_site": "Taylor Webb, Shanka Subhra Mondal, Jonathan D Cohen", "tldr": "", "abstract": "Human visual reasoning is characterized by an ability to identify abstract patterns from only a small number of examples, and to systematically generalize those patterns to novel inputs. This capacity depends in large part on our ability to represent complex visual inputs in terms of both objects and relations. Recent work in computer vision has introduced models with the capacity to extract object-centric representations, leading to the ability to process multi-object visual inputs, but falling short of the systematic generalization displayed by human reasoning. Other recent models have employed inductive biases for relational abstraction to achieve systematic generalization of learned abstract rules, but have generally assumed the presence of object-focused inputs. Here, we combine these two approaches, introducing Object-Centric Relational Abstraction (OCRA), a model that extracts explicit representations of both objects and abstract relations, and achieves strong systematic generalization in tasks (including a novel dataset, CLEVR-ART, with greater visual complexity) involving complex visual displays.", "keywords": "relational reasoning;object-centric representations;abstract rule learning;relational inductive biases;systematic generalization", "primary_area": "", "supplementary_material": "/attachment/99a8608d6841cabe3c4851996ec28665a973ce62.pdf", "author": "Taylor Whittington Webb;Shanka Subhra Mondal;Jonathan Cohen", "authorids": "~Taylor_Whittington_Webb1;~Shanka_Subhra_Mondal1;~Jonathan_Cohen1", "gender": "M;M;M", "homepage": "https://scholar.google.com/citations?user=WCmrJoQAAAAJ&hl=en;https://sites.google.com/view/shankasubhramondal/;https://jdc.princeton.edu", "dblp": "183/6144;241/7065;31/5509-3", "google_scholar": "WCmrJoQAAAAJ;5V-xQYUAAAAJ;https://scholar.google.com.tw/citations?user=NCkkQAMAAAAJ", "orcid": ";;0000-0003-2316-0763", "linkedin": ";shanka-subhra-mondal-057622147;", "or_profile": "~Taylor_Whittington_Webb1;~Shanka_Subhra_Mondal1;~Jonathan_Cohen1", "aff": "University of California, Los Angeles;Microsoft Research;Princeton University", "aff_domain": "ucla.edu;research.microsoft.com;princeton.edu", "position": "Postdoc;Intern;Full Professor", "bibtex": "@inproceedings{\nwebb2023systematic,\ntitle={Systematic Visual Reasoning through Object-Centric Relational Abstraction},\nauthor={Taylor Whittington Webb and Shanka Subhra Mondal and Jonathan Cohen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8JCZe7QrPy}\n}", "github": "", "project": "", "reviewers": "6gd3;1vcp;bzvn;HzsY", "pdf_size": 2680615, "rating": "6;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;2;4", "novelty": "2;2;2;4", "presentation": "3;3;3;4", "wc_summary": "94;177;99;157", "wc_strengths": "58;199;72;104", "wc_weaknesses": "585;182;243;16", "wc_questions": "45;95;27;441", "wc_limitations": "102;62;7;12", "wc_review": "884;715;448;730", "wc_reply_reviewers": "61;48;45;260", "wc_reply_authors": "597;0;0;507", "reply_reviewers": "1;1;1;2", "reply_authors": "2;1;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 131.75, 35.99565946055163 ], "wc_strengths_avg": [ 108.25, 54.9835202583465 ], "wc_weaknesses_avg": [ 256.5, 207.05373698631956 ], "wc_questions_avg": [ 152.0, 168.70388258721255 ], "wc_limitations_avg": [ 45.75, 38.95109112720721 ], "wc_review_avg": [ 694.25, 156.80621001733317 ], "wc_reply_reviewers_avg": [ 103.5, 90.55523176492896 ], "wc_reply_authors_avg": [ 276.0, 277.8281843154146 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=848070260337544920&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ucla.edu;research.microsoft.com;princeton.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Los Angeles;Microsoft;Princeton University", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "https://www.ucla.edu;https://www.microsoft.com/en-us/research;https://www.princeton.edu", "aff_unique_abbr": "UCLA;MSR;Princeton", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "DAC-DETR: Divide the Attention Layers and Conquer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72656", "id": "8JMexYVcXB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/edd0d433f8a1a51aa11237a6543fc280-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8JMexYVcXB", "openreview": "https://openreview.net/forum?id=8JMexYVcXB", "poster": "/media/PosterPDFs/NeurIPS%202023/72656.png?t=1699259468.3652864", "slides": "https://nips.cc/virtual/2023/poster/72656", "video": "https://nips.cc/virtual/2023/poster/72656", "author_site": "Zhengdong Hu, Yifan Sun, Jingdong Wang, Yi Yang", "tldr": "", "abstract": "This paper reveals a characteristic of DEtection Transformer (DETR) that negatively impacts its training efficacy, i.e., the cross-attention and self-attention layers in DETR decoder have contrary impacts on the object queries (though both impacts are important). Specifically, we observe the cross-attention tends to gather multiple queries around the same object, while the self-attention disperses these queries far away. To improve the training efficacy, we propose a Divide-And-Conquer DETR (DAC-DETR) that divides the cross-attention out from this contrary for better conquering. During training, DAC-DETR employs an auxiliary decoder that focuses on learning the cross-attention layers. The auxiliary decoder, while sharing all the other parameters, has NO self-attention layers and employs one-to-many label assignment to improve the gathering effect. Experiments show that DAC-DETR brings remarkable improvement over popular DETRs. For example, under the 12 epochs training scheme on MS-COCO, DAC-DETR improves Deformable DETR (ResNet-50) by +3.4 AP and achieves 50.9 (ResNet-50) / 58.1 AP (Swin-Large) based on some popular methods (i.e., DINO and an IoU-related loss). Our code will be made available at https://github.com/huzhengdongcs/DAC-DETR.", "keywords": "deep learning;computer vision;object detection;transformer", "primary_area": "", "supplementary_material": "/attachment/86fd2504261b4a10686a338132aa05b3c3aeb5b7.pdf", "author": "Zhengdong Hu;Yifan Sun;Jingdong Wang;Yi Yang", "authorids": "~Zhengdong_Hu1;~Yifan_Sun2;~Jingdong_Wang1;~Yi_Yang22", "gender": "M;M;M;M", "homepage": ";https://yifansun-reid.github.io;https://jingdongwang2017.github.io/;https://person.zju.edu.cn/yiyang", "dblp": "323/9595;99/10261-3.html;49/3441;33/4854-1.html", "google_scholar": "Udl0uiMAAAAJ;uUZEL7UAAAAJ;z5SPCmgAAAAJ;RMSuNFwAAAAJ", "orcid": ";0000-0003-3532-6521;0000-0002-4888-4445;", "linkedin": ";;;", "or_profile": "~Zhengdong_Hu1;~Yifan_Sun2;~Jingdong_Wang1;~Yi_Yang22", "aff": "Baidu;Baidu;Baidu;Zhejiang University", "aff_domain": "baidu.com;baidu.com;baidu.com;zju.edu.cn", "position": "Researcher;Senior Expert;Chief Scientist for Computer Vision;Full Professor", "bibtex": "@inproceedings{\nhu2023dacdetr,\ntitle={{DAC}-{DETR}: Divide the Attention Layers and Conquer},\nauthor={Zhengdong Hu and Yifan Sun and Jingdong Wang and Yi Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8JMexYVcXB}\n}", "github": "", "project": "", "reviewers": "CkgD;VXuS;QUSJ;LpXx;9psD", "pdf_size": 662093, "rating": "4;7;7;7;7", "confidence": "4;5;4;4;4", "soundness": "3;4;3;4;3", "novelty": "2;3;3;3;3", "presentation": "2;4;3;3;3", "wc_summary": "60;78;23;67;37", "wc_strengths": "31;60;34;209;21", "wc_weaknesses": "107;8;20;33;40", "wc_questions": "103;12;1;314;58", "wc_limitations": "92;1;1;1;1", "wc_review": "393;159;79;624;157", "wc_reply_reviewers": "0;0;43;0;0", "wc_reply_authors": "0;0;203;0;0", "reply_reviewers": "0;0;1;0;0", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 53.0, 20.12958022413781 ], "wc_strengths_avg": [ 71.0, 70.19116753552402 ], "wc_weaknesses_avg": [ 41.6, 34.49405745922043 ], "wc_questions_avg": [ 97.6, 114.0922433822738 ], "wc_limitations_avg": [ 19.2, 36.39999999999999 ], "wc_review_avg": [ 282.4, 200.62263082713275 ], "wc_reply_reviewers_avg": [ 8.6, 17.2 ], "wc_reply_authors_avg": [ 40.6, 81.2 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.25000000000000006, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2931317047723170282&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "baidu.com;baidu.com;baidu.com;zju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Baidu;Zhejiang University", "aff_unique_dep": "Baidu, Inc.;", "aff_unique_url": "https://www.baidu.com;https://www.zju.edu.cn", "aff_unique_abbr": "Baidu;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "3D Indoor Instance Segmentation in an Open-World", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72655", "id": "8JsbdJjRvY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/801750bc49fdc3d498e9ee63479f315e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8JsbdJjRvY", "openreview": "https://openreview.net/forum?id=8JsbdJjRvY", "poster": "/media/PosterPDFs/NeurIPS%202023/72655.png?t=1701511356.1284003", "slides": "https://nips.cc/virtual/2023/poster/72655", "video": "https://nips.cc/virtual/2023/poster/72655", "author_site": "Mohamed El Amine Boudjoghra, Salwa Al Khatib, Jean Lahoud, Hisham Cholakkal, Rao Anwer, Rao Anwer, Salman Khan, Fahad Shahbaz Khan", "tldr": "", "abstract": "Existing 3D instance segmentation methods typically assume that all semantic classes to be segmented would be available during training and only seen categories are segmented at inference. We argue that such a closed-world assumption is restrictive and explore for the first time 3D indoor instance segmentation in an open-world setting, where the model is allowed to distinguish a set of known classes as well as identify an unknown object as unknown and then later incrementally learning the semantic category of the unknown when the corresponding category labels are available. To this end, we introduce an open-world 3D indoor instance segmentation method, where an auto-labeling scheme is employed to produce pseudo-labels during training and induce separation to separate known and unknown category labels. We further improve the pseudo-labels quality at inference by adjusting the unknown class probability based on the objectness score distribution. We also introduce carefully curated open-world splits leveraging realistic scenarios based on inherent object distribution, region-based indoor scene exploration and randomness aspect of open-world classes. Extensive experiments reveal the efficacy of the proposed contributions leading to promising open-world 3D instance segmentation performance. Code and splits are available at: https://github.com/aminebdj/3D-OWIS.", "keywords": "open-world;3d instance segmentation", "primary_area": "", "supplementary_material": "", "author": "Mohamed El Amine Boudjoghra;Salwa K. Al Khatib;Jean Lahoud;Hisham Cholakkal;Rao Muhammad Anwer;Salman Khan;Fahad Khan", "authorids": "~Mohamed_El_Amine_Boudjoghra1;~Salwa_K._Al_Khatib1;~Jean_Lahoud1;~Hisham_Cholakkal2;~Rao_Muhammad_Anwer2;~Salman_Khan4;~Fahad_Khan1", "gender": "M;F;;M;;M;M", "homepage": ";https://www.linkedin.com/in/salwa-al-khatib/;;https://mbzuai.ac.ae/pages/hisham-cholakkal/;;https://salman-h-khan.github.io/;https://sites.google.com/view/fahadkhans/home", "dblp": "339/8950;268/1388;161/1817;129/2046;;32/11535-1;05/8618", "google_scholar": "xwDqHG8AAAAJ;TWtF0CAAAAAJ;https://scholar.google.com/citations?hl=en;bZ3YBRcAAAAJ;;https://scholar.google.es/citations?user=M59O9lkAAAAJ;zvaeYnUAAAAJ", "orcid": "0000-0003-3343-9514;;;;;0000-0002-9502-1749;", "linkedin": "med-el-amine-boudjoghra/;;;;;;", "or_profile": "~Mohamed_El_Amine_Boudjoghra1;~Salwa_K._Al_Khatib1;~Jean_Lahoud1;~Hisham_Cholakkal2;~Rao_Muhammad_Anwer2;~Salman_Khan4;~Fahad_Khan1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;MBZUAI;;Australian National University;Link\u00f6ping University", "aff_domain": "mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;;anu.edu.au;liu.se", "position": "MS student;MS student;Postdoc;Assistant Professor;;Lecturer;Associate Professor", "bibtex": "@inproceedings{\nboudjoghra2023d,\ntitle={3D Indoor Instance Segmentation in an Open-World},\nauthor={Mohamed El Amine Boudjoghra and Salwa K. Al Khatib and Jean Lahoud and Hisham Cholakkal and Rao Muhammad Anwer and Salman Khan and Fahad Khan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8JsbdJjRvY}\n}", "github": "", "project": "", "reviewers": "HHFW;L857;WNwG;PXM7;CNBM", "pdf_size": 17695696, "rating": "5;5;5;5;6", "confidence": "2;3;4;4;3", "soundness": "2;2;3;3;3", "novelty": "2;3;2;3;2", "presentation": "1;2;3;2;2", "wc_summary": "59;56;38;95;91", "wc_strengths": "43;31;31;102;57", "wc_weaknesses": "85;80;102;196;340", "wc_questions": "530;75;6;1;1", "wc_limitations": "4;27;7;14;26", "wc_review": "721;269;184;408;515", "wc_reply_reviewers": "31;59;17;58;198", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.0, 0.6324555320336759 ], "wc_summary_avg": [ 67.8, 21.8302542358077 ], "wc_strengths_avg": [ 52.8, 26.4 ], "wc_weaknesses_avg": [ 160.6, 99.07895841196556 ], "wc_questions_avg": [ 122.6, 205.6254848018601 ], "wc_limitations_avg": [ 15.6, 9.478396488858229 ], "wc_review_avg": [ 419.4, 188.82012604592765 ], "wc_reply_reviewers_avg": [ 72.6, 64.72279351202326 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.13363062095621223, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=170461605453574306&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 7, "email": "mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;;anu.edu.au;liu.se", "author_num": 7, "aff_unique_index": "0;0;0;0;1;2", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Australian National University;Link\u00f6ping University", "aff_unique_dep": ";;", "aff_unique_url": "https://mbzuai.ac.ae;https://www.anu.edu.au;https://www.liu.se", "aff_unique_abbr": "MBZUAI;ANU;LiU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;2", "aff_country_unique": "United Arab Emirates;Australia;Sweden" }, { "title": "Bootstrapping Vision-Language Learning with Decoupled Language Pre-training", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72654", "id": "8Kch0ILfQH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/002262941c9edfd472a79298b2ac5e17-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8Kch0ILfQH", "openreview": "https://openreview.net/forum?id=8Kch0ILfQH", "poster": "/media/PosterPDFs/NeurIPS%202023/72654.png?t=1702107092.1745186", "slides": "https://nips.cc/virtual/2023/poster/72654", "video": "https://nips.cc/virtual/2023/poster/72654", "author_site": "Yiren Jian, Chongyang Gao, Soroush Vosoughi", "tldr": "", "abstract": "We present a novel methodology aimed at optimizing the application of frozen large language models (LLMs) for resource-intensive vision-language (VL) pre-training. The current paradigm uses visual features as prompts to guide language models, with a focus on determining the most relevant visual features for corresponding text. Our approach diverges by concentrating on the language component, specifically identifying the optimal prompts to align with visual features. We introduce the Prompt-Transformer (P-Former), a model that predicts these ideal prompts, which is trained exclusively on linguistic data, bypassing the need for image-text pairings. This strategy subtly bifurcates the end-to-end VL training process into an additional, separate stage. Our experiments reveal that our framework significantly enhances the performance of a robust image-to-text baseline (BLIP-2), and effectively narrows the performance gap between models trained with either 4M or 129M image-text pairs. Importantly, our framework is modality-agnostic and flexible in terms of architectural design, as validated by its successful application in a video learning task using varied base modules. The code will be made available at https://github.com/yiren-jian/BLIText.", "keywords": "vision-language pretraining;multi-modal learning;uni-modal auxiliary learning", "primary_area": "", "supplementary_material": "", "author": "Yiren Jian;Chongyang Gao;Soroush Vosoughi", "authorids": "~Yiren_Jian1;~Chongyang_Gao1;~Soroush_Vosoughi1", "gender": "M;;", "homepage": "https://yiren-jian.github.io/;https://gcyzsl.github.io/;https://www.cs.dartmouth.edu/~soroush/", "dblp": "226/8387;259/8515;01/1709", "google_scholar": "https://scholar.google.com/citations?hl=en;HEAgatAAAAAJ;45DAXkwAAAAJ", "orcid": ";0000-0002-2358-4710;0000-0002-2564-8909", "linkedin": ";chongyang-gao-685597116/;", "or_profile": "~Yiren_Jian1;~Chongyang_Gao1;~Soroush_Vosoughi1", "aff": "Dartmouth College;Northwestern University;Dartmouth College", "aff_domain": "dartmouth.edu;northwestern.edu;dartmouth.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\njian2023bootstrapping,\ntitle={Bootstrapping Vision-Language Learning with Decoupled Language Pre-training},\nauthor={Yiren Jian and Chongyang Gao and Soroush Vosoughi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8Kch0ILfQH}\n}", "github": "", "project": "", "reviewers": "8epX;FD1R;jFy5;3ni9;T7A1", "pdf_size": 1707985, "rating": "5;6;6;6;7", "confidence": "5;5;3;4;4", "soundness": "2;3;3;3;3", "novelty": "2;3;2;3;4", "presentation": "2;2;3;3;3", "wc_summary": "45;70;40;66;54", "wc_strengths": "34;78;46;205;32", "wc_weaknesses": "13;233;69;171;75", "wc_questions": "100;10;66;156;1", "wc_limitations": "4;6;3;39;40", "wc_review": "196;397;224;637;202", "wc_reply_reviewers": "16;13;173;13;9", "wc_reply_authors": "0;8;610;9;0", "reply_reviewers": "1;1;3;1;1", "reply_authors": "1;2;3;2;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 55.0, 11.593101396951552 ], "wc_strengths_avg": [ 79.0, 65.11528238439882 ], "wc_weaknesses_avg": [ 112.2, 78.91869233584652 ], "wc_questions_avg": [ 66.6, 57.64581511263415 ], "wc_limitations_avg": [ 18.4, 17.258041603843697 ], "wc_review_avg": [ 331.2, 169.88631492854273 ], "wc_reply_reviewers_avg": [ 44.8, 64.13859992235564 ], "wc_reply_authors_avg": [ 125.4, 242.33002290265233 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13327942398910531922&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "dartmouth.edu;northwestern.edu;dartmouth.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Dartmouth College;Northwestern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.dartmouth.edu;https://www.northwestern.edu", "aff_unique_abbr": "Dartmouth;NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Few-Shot Class-Incremental Learning via Training-Free Prototype Calibration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72653", "id": "8NAxGDdf7H", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/30dfe47a3ccbee68cffa0c19ccb1bc00-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8NAxGDdf7H", "openreview": "https://openreview.net/forum?id=8NAxGDdf7H", "poster": "/media/PosterPDFs/NeurIPS%202023/72653.png?t=1701528784.7710822", "slides": "https://nips.cc/virtual/2023/poster/72653", "video": "https://nips.cc/virtual/2023/poster/72653", "author_site": "Qi-Wei Wang, Da-Wei Zhou, Yi-Kai Zhang, De-Chuan Zhan, Han-Jia Ye", "tldr": "", "abstract": "Real-world scenarios are usually accompanied by continuously appearing classes with scare labeled samples, which require the machine learning model to incrementally learn new classes and maintain the knowledge of base classes. In this Few-Shot Class-Incremental Learning (FSCIL) scenario, existing methods either introduce extra learnable components or rely on a frozen feature extractor to mitigate catastrophic forgetting and overfitting problems. However, we find a tendency for existing methods to misclassify the samples of new classes into base classes, which leads to the poor performance of new classes. In other words, the strong discriminability of base classes distracts the classification of new classes. To figure out this intriguing phenomenon, we observe that although the feature extractor is only trained on base classes, it can surprisingly represent the *semantic similarity* between the base and *unseen* new classes. Building upon these analyses, we propose a *simple yet effective* Training-frEE calibratioN (TEEN) strategy to enhance the discriminability of new classes by fusing the new prototypes (i.e., mean features of a class) with weighted base prototypes. In addition to standard benchmarks in FSCIL, TEEN demonstrates remarkable performance and consistent improvements over baseline methods in the few-shot learning scenario. Code is available at: https://github.com/wangkiw/TEEN", "keywords": "Few-Shot Class-Incremental Learning;Continual Learning;Class-Incremental Learning", "primary_area": "", "supplementary_material": "", "author": "Qi-Wei Wang;Da-Wei Zhou;Yi-Kai Zhang;De-Chuan Zhan;Han-Jia Ye", "authorids": "~Qi-Wei_Wang1;~Da-Wei_Zhou1;~Yi-Kai_Zhang2;~De-Chuan_Zhan1;~Han-Jia_Ye1", "gender": ";;M;M;M", "homepage": "http://www.lamda.nju.edu.cn/wangqiwei/;http://www.lamda.nju.edu.cn/zhoudw/;http://www.lamda.nju.edu.cn/zhangyk;http://www.lamda.nju.edu.cn/zhandc/;http://www.lamda.nju.edu.cn/yehj", "dblp": "195/9944;120/6109;330/8964;74/498;165/3014", "google_scholar": "PQkB2EsAAAAJ;kMNaR-YAAAAJ;;mYJf4TcAAAAJ;mgOYhtoAAAAJ", "orcid": ";;;0000-0002-3533-2078;", "linkedin": ";;;;", "or_profile": "~Qi-Wei_Wang1;~Da-Wei_Zhou1;~Yi-Kai_Zhang2;~De-Chuan_Zhan1;~Han-Jia_Ye1", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "MS student;PhD student;MS student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023fewshot,\ntitle={Few-Shot Class-Incremental Learning via Training-Free Prototype Calibration},\nauthor={Qi-Wei Wang and Da-Wei Zhou and Yi-Kai Zhang and De-Chuan Zhan and Han-Jia Ye},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8NAxGDdf7H}\n}", "github": "", "project": "", "reviewers": "TNew;3Zyg;aEVg;wHh2;RFbX", "pdf_size": 941089, "rating": "5;5;5;5;5", "confidence": "4;5;4;4;5", "soundness": "3;3;2;3;3", "novelty": "3;2;2;2;3", "presentation": "3;3;2;2;4", "wc_summary": "46;74;53;61;72", "wc_strengths": "42;56;52;42;256", "wc_weaknesses": "122;37;101;75;472", "wc_questions": "7;25;93;46;14", "wc_limitations": "7;7;36;7;32", "wc_review": "224;199;335;231;846", "wc_reply_reviewers": "0;17;259;24;33", "wc_reply_authors": "0;17;750;53;0", "reply_reviewers": "0;1;2;1;1", "reply_authors": "1;2;3;3;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 61.2, 10.759182125050213 ], "wc_strengths_avg": [ 89.6, 83.38249216712103 ], "wc_weaknesses_avg": [ 161.4, 157.8690596665477 ], "wc_questions_avg": [ 37.0, 30.95157508108432 ], "wc_limitations_avg": [ 17.8, 13.287588193498472 ], "wc_review_avg": [ 367.0, 243.98114681261748 ], "wc_reply_reviewers_avg": [ 66.6, 96.80619814867228 ], "wc_reply_authors_avg": [ 164.0, 293.63855332704526 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.8944271909999159 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4772973492399305410&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "DPOK: Reinforcement Learning for Fine-tuning Text-to-Image Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72652", "id": "8OTPepXzeh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fc65fab891d83433bd3c8d966edde311-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8OTPepXzeh", "openreview": "https://openreview.net/forum?id=8OTPepXzeh", "poster": "/media/PosterPDFs/NeurIPS%202023/72652.png?t=1701968894.7774851", "slides": "https://nips.cc/virtual/2023/poster/72652", "video": "https://nips.cc/virtual/2023/poster/72652", "author_site": "Ying Fan, Olivia Watkins, Yuqing Du, Yuqing Du, Hao Liu, Moonkyung Ryu, Craig Boutilier, Pieter Abbeel, Mohammad Ghavamzadeh, Kangwook Lee, Kimin Lee", "tldr": "", "abstract": "Learning from human feedback has been shown to improve text-to-image models. These techniques first learn a reward function that captures what humans care about in the task and then improve the models based on the learned reward function. Even though relatively simple approaches (e.g., rejection sampling based on reward scores) have been investigated, fine-tuning text-to-image models with the reward function remains challenging. In this work, we propose using online reinforcement learning (RL) to fine-tune text-to-image models. We focus on diffusion models, defining the fine-tuning task as an RL problem, and updating the pre-trained text-to-image diffusion models using policy gradient to maximize the feedback-trained reward. Our approach, coined DPOK, integrates policy optimization with KL regularization. We conduct an analysis of KL regularization for both RL fine-tuning and supervised fine-tuning. In our experiments, we show that DPOK is generally superior to supervised fine-tuning with respect to both image-text alignment and image quality. Our code is available at https://github.com/google-research/google-research/tree/master/dpok.", "keywords": "Diffusion models;RLHF", "primary_area": "", "supplementary_material": "", "author": "Ying Fan;Olivia Watkins;Yuqing Du;Hao Liu;Moonkyung Ryu;Craig Boutilier;Pieter Abbeel;Mohammad Ghavamzadeh;Kangwook Lee;Kimin Lee", "authorids": "~Ying_Fan2;~Olivia_Watkins1;~Yuqing_Du1;~Hao_Liu1;~Moonkyung_Ryu1;~Craig_Boutilier2;~Pieter_Abbeel2;~Mohammad_Ghavamzadeh2;~Kangwook_Lee1;~Kimin_Lee1", "gender": ";;;M;M;M;M;M;M;M", "homepage": "https://yingfan-bot.github.io/;https://people.eecs.berkeley.edu/~oliviawatkins/;http://yuqingd.github.io;;https://research.google/people/craigboutilier/;https://people.eecs.berkeley.edu/~pabbeel/;http://kangwooklee.com/;https://sites.google.com/view/kiminlee;https://haoliu.ai;https://mohammadghavamzadeh.github.io/", "dblp": ";;218/5572;;10/3411;;88/9826-1;183/6849;09/3214-55;88/6389", "google_scholar": "1aj4dZcAAAAJ;;;EEBuCJ8AAAAJ;cXkm3rsAAAAJ;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;sCEl8r-n5VEC;92M8xv4AAAAJ;wtK4Yh4AAAAJ;https://scholar.google.ca/citations?user=LHIPpCsAAAAJ", "orcid": ";;;;;;;;;", "linkedin": "ying-fan-5b7b07147/;;yuqingdu;;;;;;;", "or_profile": "~Ying_Fan2;~Olivia_Watkins1;~Yuqing_Du1;~Moonkyung_Ryu1;~Craig_Boutilier2;~Pieter_Abbeel2;~Kangwook_Lee1;~Kimin_Lee1;~Hao_Liu10;~Mohammad_Ghavamzadeh1", "aff": "University of Wisconsin-Madison;Google;University of California, Berkeley;Google Research;Google;Covariant;KRAFTON;Google;University of California, Berkeley;Google Research", "aff_domain": "cs.wisc.edu;google.com;berkeley.edu;google.com;google.com;covariant.ai;krafton.com;google.com;berkeley.edu;google.com", "position": "Graduate student;Intern;PhD student;Software Engineer;Principal Researcher;Founder;Researcher;Researcher;PhD student;Senior Staff Research Scientist", "bibtex": "@inproceedings{\nfan2023reinforcement,\ntitle={Reinforcement Learning for Fine-tuning Text-to-Image Diffusion Models},\nauthor={Ying Fan and Olivia Watkins and Yuqing Du and Hao Liu and Moonkyung Ryu and Craig Boutilier and Pieter Abbeel and Mohammad Ghavamzadeh and Kangwook Lee and Kimin Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8OTPepXzeh}\n}", "github": "", "project": "", "reviewers": "gCGU;VuBv;YMPp;fxhn;gRSW", "pdf_size": 25372467, "rating": "5;5;5;5;6", "confidence": "4;3;3;4;4", "soundness": "3;3;3;3;3", "novelty": "3;3;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "52;50;68;122;76", "wc_strengths": "45;53;63;107;16", "wc_weaknesses": "122;122;297;283;78", "wc_questions": "6;87;111;124;2", "wc_limitations": "12;1;25;20;15", "wc_review": "237;313;564;656;187", "wc_reply_reviewers": "95;82;159;0;16", "wc_reply_authors": "185;244;429;47;0", "reply_reviewers": "1;1;2;0;1", "reply_authors": "3;3;3;2;1", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.6, 26.089078174592522 ], "wc_strengths_avg": [ 56.8, 29.586483400363754 ], "wc_weaknesses_avg": [ 180.4, 91.0265895219633 ], "wc_questions_avg": [ 66.0, 52.011537181667684 ], "wc_limitations_avg": [ 14.6, 8.114185110040317 ], "wc_review_avg": [ 391.4, 185.23995249405567 ], "wc_reply_reviewers_avg": [ 70.4, 57.45467779041146 ], "wc_reply_authors_avg": [ 181.0, 152.45064775198563 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.4, 0.8 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 193, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14209274253898652720&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cs.wisc.edu;google.com;berkeley.edu;google.com;google.com;covariant.ai;krafton.com;google.com;berkeley.edu;google.com", "author_num": 10, "aff_unique_index": "0;1;2;1;1;3;4;1;2;1", "aff_unique_norm": "University of Wisconsin-Madison;Google;University of California, Berkeley;Covariant;KRAFTON Inc.", "aff_unique_dep": ";Google;;;", "aff_unique_url": "https://www.wisc.edu;https://www.google.com;https://www.berkeley.edu;;https://www.krafton.com", "aff_unique_abbr": "UW-Madison;Google;UC Berkeley;;KRAFTON", "aff_campus_unique_index": "0;1;2;1;1;1;2;1", "aff_campus_unique": "Madison;Mountain View;Berkeley;", "aff_country_unique_index": "0;0;0;0;0;2;0;0;0", "aff_country_unique": "United States;;South Korea" }, { "title": "Rethinking Gauss-Newton for learning over-parameterized models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72651", "id": "8Oukmqfek2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6a14c7f9fb3f42645cfa6bd5aa446819-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8Oukmqfek2", "openreview": "https://openreview.net/forum?id=8Oukmqfek2", "poster": "/media/PosterPDFs/NeurIPS%202023/72651.png?t=1701788855.5558827", "slides": "https://nips.cc/virtual/2023/poster/72651", "video": "https://nips.cc/virtual/2023/poster/72651", "author_site": "Michael Arbel, Romain Menegaux, Pierre Wolinski", "tldr": "", "abstract": "This work studies the global convergence and implicit bias of Gauss Newton's (GN) when optimizing over-parameterized one-hidden layer networks in the mean-field regime. \nWe first establish a global convergence result for GN in the continuous-time limit exhibiting a faster convergence rate compared to GD due to improved conditioning. \nWe then perform an empirical study on a synthetic regression task to investigate the implicit bias of GN's method.\nWhile GN is consistently faster than GD in finding a global optimum, the learned model generalizes well on test data when starting from random initial weights with a small variance and using a small step size to slow down convergence. \nSpecifically, our study shows that such a setting results in a hidden learning phenomenon, where the dynamics are able to recover features with good generalization properties despite the model having sub-optimal training and test performances due to an under-optimized linear layer. This study exhibits a trade-off between the convergence speed of GN and the generalization ability of the learned solution.", "keywords": "implicit bias;gauss newton", "primary_area": "", "supplementary_material": "", "author": "Michael Arbel;Romain Menegaux;Pierre Wolinski", "authorids": "~Michael_Arbel1;~Romain_Menegaux1;~Pierre_Wolinski1", "gender": "M;M;M", "homepage": "https://michaelarbel.github.io/;;http://pierre-wolinski.fr/", "dblp": "200/8609;202/1215;228/6714.html", "google_scholar": "NsOqVtkAAAAJ;;https://scholar.google.fr/citations?user=tqK4sCkAAAAJ", "orcid": ";0000-0001-8814-4911;0000-0003-1007-0144", "linkedin": "michael-arbel-0a38a655/;;", "or_profile": "~Michael_Arbel1;~Romain_Menegaux1;~Pierre_Wolinski1", "aff": "INRIA;Inria;Inria Grenoble-Alpes", "aff_domain": "inria.fr;inria.fr;inria.fr", "position": "Researcher;Postdoc;Postdoc", "bibtex": "@inproceedings{\narbel2023rethinking,\ntitle={Rethinking Gauss-Newton for learning over-parameterized models},\nauthor={Michael Arbel and Romain Menegaux and Pierre Wolinski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8Oukmqfek2}\n}", "github": "", "project": "", "reviewers": "oHC4;6UDU;2qbp;c9sb", "pdf_size": 710650, "rating": "3;3;4;7", "confidence": "4;4;4;3", "soundness": "3;4;3;2", "novelty": "1;1;1;2", "presentation": "3;3;3;4", "wc_summary": "58;93;59;776", "wc_strengths": "46;43;31;98", "wc_weaknesses": "220;109;228;383", "wc_questions": "101;18;27;846", "wc_limitations": "9;1;2;125", "wc_review": "434;264;347;2228", "wc_reply_reviewers": "359;324;181;404", "wc_reply_authors": "1346;1563;772;504", "reply_reviewers": "2;2;1;2", "reply_authors": "4;5;3;3", "rating_avg": [ 4.25, 1.6393596310755 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 1.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 246.5, 306.03145263191493 ], "wc_strengths_avg": [ 54.5, 25.734218464915543 ], "wc_weaknesses_avg": [ 235.0, 97.53717240109025 ], "wc_questions_avg": [ 248.0, 346.7542357347636 ], "wc_limitations_avg": [ 34.25, 52.48511693804254 ], "wc_review_avg": [ 818.25, 816.1361329459687 ], "wc_reply_reviewers_avg": [ 317.0, 83.48353131007336 ], "wc_reply_authors_avg": [ 1046.25, 426.0659426661558 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 0.82915619758885 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9684959969581861, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13324256067718644676&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "email": "inria.fr;inria.fr;inria.fr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Grenoble-Alpes", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "New Bounds for Hyperparameter Tuning of Regression Problems Across Instances", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72650", "id": "8QGukmdAbh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fd62b65606f0f0d2af2c01623a224258-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8QGukmdAbh", "openreview": "https://openreview.net/forum?id=8QGukmdAbh", "poster": "/media/PosterPDFs/NeurIPS%202023/72650.png?t=1702106615.4397018", "slides": "https://nips.cc/virtual/2023/poster/72650", "video": "https://nips.cc/virtual/2023/poster/72650", "author_site": "Maria-Florina Balcan, Anh Nguyen, Dravyansh Sharma", "tldr": "", "abstract": "The task of tuning regularization coefficients in regularized regression models with provable guarantees across problem instances still poses a significant challenge in the literature. This paper investigates the sample complexity of tuning regularization parameters in linear and logistic regressions under $\\ell_1$ and $\\ell_2$-constraints in the data-driven setting. For the linear regression problem, by more carefully exploiting the structure of the dual function class, we provide a new upper bound for the pseudo-dimension of the validation loss function class, which significantly improves the best-known results on the problem. Remarkably, we also instantiate the first matching lower bound, proving our results are tight. For tuning the regularization parameters of logistic regression, we introduce a new approach to studying the learning guarantee via an approximation of the validation loss function class. We examine the pseudo-dimension of the approximation class and construct a uniform error bound between the validation loss function class and its approximation, which allows us to instantiate the first learning guarantee for the problem of tuning logistic regression regularization coefficients.", "keywords": "Elastic Net;logistic regression;data-driven algorithm design;learning theory;regularization", "primary_area": "", "supplementary_material": "/attachment/b431e2247867b14db5ad7621a43c79ad6d2e9cad.pdf", "author": "Nina Balcan;Anh Tuan Nguyen;Dravyansh Sharma", "authorids": "~Nina_Balcan1;~Anh_Tuan_Nguyen3;~Dravyansh_Sharma1", "gender": "F;;M", "homepage": "http://www.cs.cmu.edu/~ninamf/;;http://www.cs.cmu.edu/~dravyans/", "dblp": "b/MariaFlorinaBalcan;;164/7289", "google_scholar": "https://scholar.google.com.tw/citations?user=LWlN_BUAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Nina_Balcan1;~Anh_Tuan_Nguyen3;~Dravyansh_Sharma1", "aff": "Carnegie Mellon University;;Carnegie Mellon University", "aff_domain": "cmu.edu;;cmu.edu", "position": "Full Professor;;PhD student", "bibtex": "@inproceedings{\nbalcan2023new,\ntitle={New Bounds for Hyperparameter Tuning of Regression Problems Across Instances},\nauthor={Nina Balcan and Anh Tuan Nguyen and Dravyansh Sharma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8QGukmdAbh}\n}", "github": "", "project": "", "reviewers": "QofD;Ca6P;aDmj;be6e", "pdf_size": 492940, "rating": "6;7;7;7", "confidence": "3;3;2;2", "soundness": "3;3;2;3", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "112;66;27;75", "wc_strengths": "101;35;45;131", "wc_weaknesses": "120;206;266;74", "wc_questions": "8;36;2;1", "wc_limitations": "1;4;15;5", "wc_review": "342;347;355;286", "wc_reply_reviewers": "0;23;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.0, 30.224162519414826 ], "wc_strengths_avg": [ 78.0, 39.61060464067672 ], "wc_weaknesses_avg": [ 166.5, 74.46307809914924 ], "wc_questions_avg": [ 11.75, 14.254385290148432 ], "wc_limitations_avg": [ 6.25, 5.261891294962297 ], "wc_review_avg": [ 332.5, 27.244265451650556 ], "wc_reply_reviewers_avg": [ 5.75, 9.959292143521045 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9211199313657280231&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cmu.edu;;cmu.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "8S6ZeKB8tu", "title": "Streaming algorithms for evaluating noisy judges on unlabeled data - binary classification.", "track": "main", "status": "Reject", "tldr": "", "abstract": " The evaluation of noisy binary classifiers on unlabeled data is treated as a\n streaming task - given a data sketch of the decisions by an ensemble, estimate\n the true prevalence of the labels as well as each classifier's accuracy on them.\n Two fully algebraic evaluators are constructed to do this. Both are based on the assumption that\n the classifiers make independent errors on the test items. The first is based on\n majority voting. The second, the main contribution of the paper, is guaranteed\n to be correct for independent classifiers. But how do we know the classifiers\n are error independent on any given test? This principal/agent monitoring paradox\n is ameliorated by exploiting the failures of the independent evaluator to\nreturn sensible estimates. Some of these failures can be traced to producing\nalgebraic versus real numbers while evaluating a finite test. A\n search for nearly error independent trios is empirically carried out on the \n \\texttt{adult}, \\texttt{mushroom}, and \\texttt{twonorm} datasets by using\nthese algebraic failure modes to reject potential evaluation ensembles as\ntoo correlated. At its final steps, the searches are refined by constructing\na surface in evaluation space that must contain the true value point.\nThe surface comes from considering the algebra of arbitrarily correlated\nclassifiers and selecting a polynomial subset that is free of any correlation variables.\nCandidate evaluation ensembles are then rejected if their data sketches produce\nindependent evaluation estimates that are too far from the constructed surface.\nThe results produced by the surviving evaluation ensembles can sometimes be as good as 1\\%. \nBut handling even small amounts of correlation remains a challenge. A Taylor expansion\nof the estimates produced when error independence is assumed but the classifiers are, in fact,\nslightly correlated helps clarify how the proposed independent evaluator has algebraic `blind spots'\nof its own. They are points in evaluation space but the estimate of the independent evaluator\nhas a sensitivity inversely proportional to the distance of the true point from them.\nHow algebraic stream evaluation can and cannot help when done for safety or economic \nreasons is briefly discussed.", "keywords": "unlabeled data;evaluation;ensembles;stream algorithms;algebraic geometry", "primary_area": "", "supplementary_material": "/attachment/73fa136fb8c20c4cc74cf387a01e95cd31c2217c.zip", "author": "Andr\u00e9s Corrada-Emmanuel", "authorids": "~Andr\u00e9s_Corrada-Emmanuel1", "gender": "M", "homepage": "", "dblp": "30/6453", "google_scholar": "", "orcid": "", "linkedin": "andrescorrada", "or_profile": "~Andr\u00e9s_Corrada-Emmanuel1", "aff": "Real Chemistry", "aff_domain": "realchemistry.com", "position": "Director of Optimization", "bibtex": "@misc{\ncorrada-emmanuel2023streaming,\ntitle={Streaming algorithms for evaluating noisy judges on unlabeled data - binary classification.},\nauthor={Andr{\\'e}s Corrada-Emmanuel},\nyear={2023},\nurl={https://openreview.net/forum?id=8S6ZeKB8tu}\n}", "github": "", "project": "", "reviewers": "CtP8;rucn;FuFc;7zjd;bq3o", "site": "https://openreview.net/forum?id=8S6ZeKB8tu", "pdf_size": 411126, "rating": "2;2;2;3;4", "confidence": "5;4;2;2;2", "soundness": "2;2;2;1;3", "novelty": "2;1;2;2;2", "presentation": "2;1;1;1;2", "wc_summary": "53;78;99;74;87", "wc_strengths": "20;13;30;95;25", "wc_weaknesses": "369;237;226;311;122", "wc_questions": "164;43;69;27;3", "wc_limitations": "84;40;21;23;9", "wc_review": "690;411;445;530;246", "wc_reply_reviewers": "0;0;84;0;0", "wc_reply_authors": "0;0;469;0;0", "reply_reviewers": "0;0;1;0;0", "reply_authors": "1;1;2;1;1", "rating_avg": [ 2.6, 0.8 ], "confidence_avg": [ 3.0, 1.2649110640673518 ], "soundness_avg": [ 2.0, 0.6324555320336759 ], "novelty_avg": [ 1.8, 0.4000000000000001 ], "presentation_avg": [ 1.4, 0.4898979485566356 ], "wc_summary_avg": [ 78.2, 15.249918032566603 ], "wc_strengths_avg": [ 36.6, 29.736173257499026 ], "wc_weaknesses_avg": [ 253.0, 83.6253550067203 ], "wc_questions_avg": [ 61.2, 55.70780914737178 ], "wc_limitations_avg": [ 35.4, 26.23432865540874 ], "wc_review_avg": [ 464.4, 145.68816012291458 ], "wc_reply_reviewers_avg": [ 16.8, 33.599999999999994 ], "wc_reply_authors_avg": [ 93.8, 187.6 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.5929270612815711, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zp2SkjwmczcJ:scholar.google.com/&scioq=Streaming+algorithms+for+evaluating+noisy+judges+on+unlabeled+data+-+binary+classification.&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Real Chemistry", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "" }, { "title": "Data-driven Optimal Filtering for Linear Systems with Unknown Noise Covariances", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72649", "id": "8S9Fbee743", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dbe8185809cb7032ec7ec6e365e3ed3b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8S9Fbee743", "openreview": "https://openreview.net/forum?id=8S9Fbee743", "poster": "/media/PosterPDFs/NeurIPS%202023/72649.png?t=1699477157.2609184", "slides": "https://nips.cc/virtual/2023/poster/72649", "video": "https://nips.cc/virtual/2023/poster/72649", "author_site": "Shahriar Talebi, Amirhossein Taghvaei, Mehran Mesbahi", "tldr": "", "abstract": "This paper examines learning the optimal filtering policy, known as the Kalman gain, for a linear system with unknown noise covariance matrices using noisy output data. The learning problem is formulated as a stochastic policy optimiza- tion problem, aiming to minimize the output prediction error. This formulation provides a direct bridge between data-driven optimal control and, its dual, op- timal filtering. Our contributions are twofold. Firstly, we conduct a thorough convergence analysis of the stochastic gradient descent algorithm, adopted for the filtering problem, accounting for biased gradients and stability constraints. Secondly, we carefully leverage a combination of tools from linear system theory and high-dimensional statistics to derive bias-variance error bounds that scale logarithmically with problem dimension, and, in contrast to subspace methods, the length of output trajectories only affects the bias term.", "keywords": "Optimal filtering;data-driven control;stochastic optimization;learning", "primary_area": "", "supplementary_material": "/attachment/696354b5c73cbd3bdc5513f36d3bd4b23345a051.zip", "author": "Shahriar Talebi;Amirhossein Taghvaei;Mehran Mesbahi", "authorids": "~Shahriar_Talebi2;~Amirhossein_Taghvaei1;~Mehran_Mesbahi1", "gender": "M;M;M", "homepage": "https://shahriarta.github.io;https://amirtag.github.io/;https://mehran-mesbahi.github.io/", "dblp": "204/4214;158/4926;82/5153", "google_scholar": "https://scholar.google.com/citations?hl=en;l96zhjwAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Shahriar_Talebi2;~Amirhossein_Taghvaei1;~Mehran_Mesbahi1", "aff": "Harvard University;University of Washington, Seattle;University of Washington", "aff_domain": "harvard.edu;uw.edu;uw.edu", "position": "Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ntalebi2023datadriven,\ntitle={Data-driven Optimal Filtering for Linear Systems with Unknown Noise Covariances},\nauthor={Shahriar Talebi and Amirhossein Taghvaei and Mehran Mesbahi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8S9Fbee743}\n}", "github": "", "project": "", "reviewers": "aTTt;yyZM;LR36;4EhD", "pdf_size": 683748, "rating": "4;6;7;8", "confidence": "3;4;4;3", "soundness": "3;3;4;3", "novelty": "2;3;4;3", "presentation": "3;4;3;2", "wc_summary": "57;99;135;74", "wc_strengths": "63;65;55;78", "wc_weaknesses": "215;154;100;166", "wc_questions": "1;114;46;50", "wc_limitations": "48;1;38;46", "wc_review": "384;433;374;414", "wc_reply_reviewers": "0;25;37;54", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 91.25, 29.345996319770776 ], "wc_strengths_avg": [ 65.25, 8.257572282456872 ], "wc_weaknesses_avg": [ 158.75, 40.898502417570256 ], "wc_questions_avg": [ 52.75, 40.25776322648838 ], "wc_limitations_avg": [ 33.25, 18.9917745353087 ], "wc_review_avg": [ 401.25, 23.509306667785847 ], "wc_reply_reviewers_avg": [ 29.0, 19.6596032513375 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.16903085094570333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17725242009143200884&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "harvard.edu;uw.edu;uw.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Harvard University;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.washington.edu", "aff_unique_abbr": "Harvard;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Real-World Image Super-Resolution as Multi-Task Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72648", "id": "8SCz56sUGP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/42806406dd99e30c3796bc98b2670fa2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8SCz56sUGP", "openreview": "https://openreview.net/forum?id=8SCz56sUGP", "poster": "/media/PosterPDFs/NeurIPS%202023/72648.png?t=1699868206.9194655", "slides": "https://nips.cc/virtual/2023/poster/72648", "video": "https://nips.cc/virtual/2023/poster/72648", "author_site": "Wenlong Zhang, Xiaohui Li, Guangyuan SHI, Xiangyu Chen, Yu Qiao, Xiaoyun Zhang, Xiao-Ming Wu, Chao Dong", "tldr": "", "abstract": "In this paper, we take a new look at real-world image super-resolution (real-SR) from a multi-task learning perspective. We demonstrate that the conventional formulation of real-SR can be viewed as solving multiple distinct degradation tasks using a single shared model. This poses a challenge known as task competition or task conflict in multi-task learning, where certain tasks dominate the learning process, resulting in poor performance on other tasks. This problem is exacerbated in the case of real-SR, due to the involvement of numerous degradation tasks. To address the issue of task competition in real-SR, we propose a task grouping approach. Our approach efficiently identifies the degradation tasks where a real-SR model falls short and groups these unsatisfactory tasks into multiple task groups. We then utilize the task groups to fine-tune the real-SR model in a simple way, which effectively mitigates task competition and facilitates knowledge transfer. Extensive experiments demonstrate our method achieves significantly enhanced performance across a wide range of degradation scenarios.", "keywords": "Image super-resolution", "primary_area": "", "supplementary_material": "", "author": "Wenlong Zhang;Xiaohui Li;Guangyuan SHI;Xiangyu Chen;Yu Qiao;Xiaoyun Zhang;Xiao-Ming Wu;Chao Dong", "authorids": "~Wenlong_Zhang3;~Xiaohui_Li2;~Guangyuan_SHI1;~Xiangyu_Chen5;~Yu_Qiao1;~Xiaoyun_Zhang1;~Xiao-Ming_Wu1;~Chao_Dong4", "gender": "M;F;M;M;;F;F;M", "homepage": "https://wenlongzhang0517.github.io/;https://github.com/xh9998;;https://chxy95.github.io/;;https://mediabrain.sjtu.edu.cn/xiaoyun-zhang/;http://www4.comp.polyu.edu.hk/~csxmwu/;http://xpixel.group/2010/01/20/chaodong.html", "dblp": ";;;84/7543-6;;;98/2898-3;16/1278-5", "google_scholar": "https://scholar.google.com.hk/citations?user=UnMImiUAAAAJ;;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.hk/citations?user=_gkTxJUAAAAJ;;hQm9oqwAAAAJ;3KbaUFkAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0000-0001-7401-8009;;;0000-0001-7680-4062;;", "linkedin": ";;;;;;;", "or_profile": "~Wenlong_Zhang3;~Xiaohui_Li2;~Guangyuan_SHI1;~Xiangyu_Chen5;~Yu_Qiao1;~Xiaoyun_Zhang1;~Xiao-Ming_Wu1;~Chao_Dong4", "aff": "The Hong Kong Polytechnic University;Shanghai Aritifcal Intelligence Laboratory;The Hong Kong Polytechnic University;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;;Shanghai Jiaotong University;Hong Kong Polytechnic University;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences", "aff_domain": "polyu.edu.hk;pjlab.org.cn;polyu.edu.hk;siat.ac.cn;;sjtu.edu.cn;polyu.edu.hk;siat.ac.cn", "position": "PhD student;Intern;PhD student;PhD student;;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023realworld,\ntitle={Real-World Image Super-Resolution as Multi-Task Learning},\nauthor={Wenlong Zhang and Xiaohui Li and Guangyuan SHI and Xiangyu Chen and Yu Qiao and Xiaoyun Zhang and Xiao-Ming Wu and Chao Dong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8SCz56sUGP}\n}", "github": "", "project": "", "reviewers": "tGSk;LkK8;o9KK;4Sx7;yhLt", "pdf_size": 14838565, "rating": "3;4;5;5;8", "confidence": "5;5;4;4;5", "soundness": "2;2;3;2;4", "novelty": "2;2;3;2;4", "presentation": "2;3;3;2;3", "wc_summary": "46;52;95;70;56", "wc_strengths": "27;33;64;26;123", "wc_weaknesses": "290;365;117;221;66", "wc_questions": "5;19;116;123;9", "wc_limitations": "21;37;1;1;21", "wc_review": "389;506;393;441;275", "wc_reply_reviewers": "0;120;87;229;124", "wc_reply_authors": "48;552;16;523;23", "reply_reviewers": "0;1;1;2;1", "reply_authors": "2;3;2;3;2", "rating_avg": [ 5.0, 1.6733200530681511 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 63.8, 17.485994395515515 ], "wc_strengths_avg": [ 54.6, 36.913954001163304 ], "wc_weaknesses_avg": [ 211.8, 109.46670726755237 ], "wc_questions_avg": [ 54.4, 53.39513086415277 ], "wc_limitations_avg": [ 16.2, 13.717142559585797 ], "wc_review_avg": [ 400.8, 75.77440201017755 ], "wc_reply_reviewers_avg": [ 112.0, 73.57445208766423 ], "wc_reply_authors_avg": [ 232.4, 249.50879744009026 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17262630778616699929&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "polyu.edu.hk;pjlab.org.cn;polyu.edu.hk;siat.ac.cn;;sjtu.edu.cn;polyu.edu.hk;siat.ac.cn", "author_num": 8, "aff_unique_index": "0;1;0;2;3;0;2", "aff_unique_norm": "Hong Kong Polytechnic University;Shanghai Artificial Intelligence Laboratory;Chinese Academy of Sciences;Shanghai Jiao Tong University", "aff_unique_dep": ";;Shenzhen Institutes of Advanced Technology;", "aff_unique_url": "https://www.polyu.edu.hk;http://www.shanghaiai.cn;http://www.cas.cn;https://www.sjtu.edu.cn", "aff_unique_abbr": "PolyU;;CAS;SJTU", "aff_campus_unique_index": "0;0;2;0;2", "aff_campus_unique": "Hong Kong SAR;;Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "8SDsff42Lj", "title": "Continual Learning with Global Prototypes: Beyond the Scope of Task Supervision", "track": "main", "status": "Reject", "tldr": "", "abstract": "Continual learning aims to sequentially learn from different tasks without catastrophic forgetting. With no assumptions of task dependence, the knowledge learned from observed tasks may not align with that required for future tasks. This may result in models' disruptive updates for learning future tasks, causing abrupt changes to previously learned knowledge (e.g. representation drift) which induces catastrophic forgetting. To reduce such disruptive updates, we connect knowledge for observed and unknown tasks by learning task data representations properly related to a set of global prototypes, which have general-purpose connections and are shared across all tasks. We derive global prototypes and the corresponding objective for NLP tasks. For those tasks, the correlated global prototypes can be obtained from a model pre-trained by masked language modeling. And the data representations that have proper relationships to global prototypes can be learned by specific adaptations of the pre-trained model. We investigate existing adaptation models and propose a neighbor attention model which combines different advantages of existing models for our objective. Experiments show that models learning data representations well related to global prototypes can induce significantly less catastrophic forgetting, without memorizing information from past tasks. ", "keywords": "Continual Learning;Representation Drift;Global Prototypes;Adaptation Model;Self-Supervised Learning", "primary_area": "", "supplementary_material": "/attachment/99fe4d3c1a6631821ef9ced2751b45c7be763e99.pdf", "author": "Xueying Bai;Jinghuan Shang;Yifan Sun;Niranjan Balasubramanian", "authorids": "~Xueying_Bai1;~Jinghuan_Shang1;~Yifan_Sun1;~Niranjan_Balasubramanian2", "gender": ";M;F;M", "homepage": ";https://www.cs.stonybrook.edu/~jishang;https://sites.google.com/site/yifansunwebsite/;http://www3.cs.stonybrook.edu/~niranjan/", "dblp": ";218/7364;https://dblp.uni-trier.de/pid/99/10261-1;40/1931", "google_scholar": ";gMvLIDUAAAAJ;o3fSb1YAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0001-7301-5981;;", "linkedin": ";;;", "or_profile": "~Xueying_Bai1;~Jinghuan_Shang1;~Yifan_Sun1;~Niranjan_Balasubramanian2", "aff": ";Department of Computer Science, State University of New York, Stony Brook;State University of New York, Stony Brook;", "aff_domain": ";cs.stonybrook.edu;stonybrook.edu;", "position": ";PhD student;Assistant Professor;", "bibtex": "@misc{\nbai2023continual,\ntitle={Continual Learning with Global Prototypes: Beyond the Scope of Task Supervision},\nauthor={Xueying Bai and Jinghuan Shang and Yifan Sun and Niranjan Balasubramanian},\nyear={2023},\nurl={https://openreview.net/forum?id=8SDsff42Lj}\n}", "github": "", "project": "", "reviewers": "Bgqo;fEnP;Zm6a;tk6N;sed9", "site": "https://openreview.net/forum?id=8SDsff42Lj", "pdf_size": 788222, "rating": "3;4;5;5;6", "confidence": "4;4;3;2;4", "soundness": "2;3;2;4;3", "novelty": "2;2;2;3;3", "presentation": "2;3;2;3;2", "wc_summary": "92;57;84;87;110", "wc_strengths": "105;27;26;23;53", "wc_weaknesses": "172;149;229;136;90", "wc_questions": "30;23;53;50;141", "wc_limitations": "8;3;5;13;30", "wc_review": "407;259;397;309;424", "wc_reply_reviewers": "0;0;23;0;5", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 4.6, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 86.0, 17.07629936490925 ], "wc_strengths_avg": [ 46.8, 31.038041175306148 ], "wc_weaknesses_avg": [ 155.2, 45.57806489968612 ], "wc_questions_avg": [ 59.4, 42.372632677236375 ], "wc_limitations_avg": [ 11.8, 9.703607576566563 ], "wc_review_avg": [ 359.2, 63.988749011056626 ], "wc_reply_reviewers_avg": [ 5.6, 8.912911982062877 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2941742027072762, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5997371825629778252&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "State University of New York", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.stonybrook.edu", "aff_unique_abbr": "SUNY Stony Brook", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stony Brook", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Semantic segmentation of sparse irregular point clouds for leaf/wood discrimination", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72647", "id": "8SUtvEZCF2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9708c7d3a0fef3710f33ba05a74e10b3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8SUtvEZCF2", "openreview": "https://openreview.net/forum?id=8SUtvEZCF2", "poster": "/media/PosterPDFs/NeurIPS%202023/72647.png?t=1699748367.8802717", "slides": "https://nips.cc/virtual/2023/poster/72647", "video": "https://nips.cc/virtual/2023/poster/72647", "author_site": "Yuchen BAI, Jean-Baptiste Durand, Gr\u00e9goire Vincent, Florence Forbes", "tldr": "", "abstract": "Lidar (Light Detection and Ranging) has become an essential part of the remote sensing toolbox used for biosphere monitoring. In particular, Lidar provides the opportunity to map forest leaf area with unprecedented accuracy, while leaf area has remained an important source of uncertainty affecting models of gas exchanges between the vegetation and the atmosphere. Unmanned Aerial Vehicles (UAV) are easy to mobilize and therefore allow frequent revisits to track the response of vegetation to climate change. However, miniature sensors embarked on UAVs usually provide point clouds of limited density, which are further affected by a strong decrease in density from top to bottom of the canopy due to progressively stronger occlusion. In such a context, discriminating leaf points from wood points presents a significant challenge due in particular to strong class imbalance and spatially irregular sampling intensity. Here we introduce a neural network model based on the Pointnet ++ architecture which makes use of point geometry only (excluding any spectral information). To cope with local data sparsity, we propose an innovative sampling scheme which strives to preserve local important geometric information. We also propose a loss function adapted to the severe class imbalance. We show that our model outperforms state-of-the-art alternatives on UAV point clouds. We discuss future possible improvements, particularly regarding much denser point clouds acquired from below the canopy.", "keywords": "UAV;Deep Learning;Semantic Segmentation;Lidar;Class Imbalance;Point Cloud", "primary_area": "", "supplementary_material": "/attachment/c6c69d9970b1ff655c32991e30933f5feba194a9.zip", "author": "Yuchen BAI;Jean-Baptiste Durand;Gr\u00e9goire Laurent Vincent;Florence Forbes", "authorids": "~Yuchen_BAI1;~Jean-Baptiste_Durand1;~Gr\u00e9goire_Laurent_Vincent1;~Florence_Forbes1", "gender": "M;M;M;", "homepage": ";http://amap-collaboratif.cirad.fr/pages-chercheurs/?page_id=32153;http://amap.cirad.fr;", "dblp": "195/2141-2;;;", "google_scholar": "ewddOHQAAAAJ;;bc4TxdsAAAAJ;", "orcid": "0009-0002-9926-1931;0000-0001-6800-1438;0000-0001-9443-021X;", "linkedin": "yuchen-bai-4aa318ba/;;;", "or_profile": "~Yuchen_BAI1;~Jean-Baptiste_Durand1;~Gr\u00e9goire_Laurent_Vincent1;~Florence_Forbes1", "aff": "INRIA;Centre de coop\u00e9ration internationale en recherche agronomique pour le d\u00e9veloppement;IRD;", "aff_domain": "inria.fr;cirad.fr;ird.fr;", "position": "PhD student;Researcher;Researcher;", "bibtex": "@inproceedings{\nbai2023semantic,\ntitle={Semantic segmentation of sparse irregular point clouds for leaf/wood discrimination},\nauthor={Yuchen BAI and Jean-Baptiste Durand and Gr{\\'e}goire Laurent Vincent and Florence Forbes},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8SUtvEZCF2}\n}", "github": "", "project": "", "reviewers": "81in;eT4d;uJjq;bCLj;yADz", "pdf_size": 12054717, "rating": "3;3;4;6;7", "confidence": "4;5;4;5;3", "soundness": "3;2;3;2;3", "novelty": "2;2;3;2;3", "presentation": "2;2;3;2;3", "wc_summary": "50;81;68;176;75", "wc_strengths": "38;98;30;142;90", "wc_weaknesses": "245;114;17;221;50", "wc_questions": "26;2;12;419;32", "wc_limitations": "18;2;5;61;4", "wc_review": "377;297;132;1019;251", "wc_reply_reviewers": "206;89;0;186;184", "wc_reply_authors": "692;155;0;26;20", "reply_reviewers": "2;1;0;1;1", "reply_authors": "3;2;1;2;2", "rating_avg": [ 4.6, 1.624807680927192 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 90.0, 44.2402531638326 ], "wc_strengths_avg": [ 79.6, 41.30665805896187 ], "wc_weaknesses_avg": [ 129.4, 90.47563207847735 ], "wc_questions_avg": [ 98.2, 160.7437712634614 ], "wc_limitations_avg": [ 18.0, 22.22611077089287 ], "wc_review_avg": [ 415.2, 312.1438130093242 ], "wc_reply_reviewers_avg": [ 133.0, 77.92817205606713 ], "wc_reply_authors_avg": [ 178.6, 262.47864675055 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4276686017238498, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9211432117061302833&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "email": "inria.fr;cirad.fr;ird.fr;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "INRIA;Centre de coop\u00e9ration internationale en recherche agronomique pour le d\u00e9veloppement;Institut de Recherche pour le Developpement", "aff_unique_dep": ";;", "aff_unique_url": "https://www.inria.fr;;https://www.ird.fr", "aff_unique_abbr": "INRIA;;IRD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "NAVI: Category-Agnostic Image Collections with High-Quality 3D Shape and Pose Annotations", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73682", "id": "8TMhs2pIfG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/efc90033e6e1b05485312dd09fe302b8-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=8TMhs2pIfG", "openreview": "https://openreview.net/forum?id=8TMhs2pIfG", "poster": "/media/PosterPDFs/NeurIPS%202023/73682.png?t=1701399346.8654974", "slides": "https://nips.cc/virtual/2023/poster/73682", "video": "https://nips.cc/virtual/2023/poster/73682", "author_site": "Varun Jampani, Kevis-kokitsi Maninis, Andreas Engelhardt, Arjun Karpur, Karen Truong, Kyle Sargent, Stefan Popov, Andre Araujo, Ricardo Martin Brualla, Kaushal Patel, Daniel Vlasic, Vittorio Ferrari, Ameesh Makadia, Ce Liu, Yuanzhen Li, Howard Zhou", "tldr": "", "abstract": "Recent advances in neural reconstruction enable high-quality 3D object reconstruction from casually captured image collections. Current techniques mostly analyze their progress on relatively simple image collections where SfM techniques can provide ground-truth (GT) camera poses. We note that SfM techniques tend to fail on in-the-wild image collections such as image search results with varying backgrounds and illuminations. To enable systematic research progress on 3D reconstruction from casual image captures, we propose `NAVI': a new dataset of category-agnostic image collections of objects with high-quality 3D scans along with per-image 2D-3D alignments providing near-perfect GT camera parameters. These 2D-3D alignments allow us to extract accurate derivative annotations such as dense pixel correspondences, depth and segmentation maps. We demonstrate the use of NAVI image collections on different problem settings and show that NAVI enables more thorough evaluations that were not possible with existing datasets. We believe NAVI is beneficial for systematic research progress on 3D reconstruction and correspondence estimation.", "keywords": "3D reconstruction; image collections; pose estimation; neural radiance fields; pixel correspondences", "primary_area": "", "supplementary_material": "/attachment/7de56dd7d3ea1432ade4ae72ee77734581c6caba.zip", "author": "Varun Jampani;Kevis-kokitsi Maninis;Andreas Engelhardt;Arjun Karpur;Karen Truong;Kyle Sargent;Stefan Popov;Andre Araujo;Ricardo Martin Brualla;Kaushal Patel;Daniel Vlasic;Vittorio Ferrari;Ameesh Makadia;Ce Liu;Yuanzhen Li;Howard Zhou", "authorids": "~Varun_Jampani2;~Kevis-kokitsi_Maninis1;~Andreas_Engelhardt1;~Arjun_Karpur1;~Karen_Truong1;~Kyle_Sargent1;~Stefan_Popov1;~Andre_Araujo1;~Ricardo_Martin_Brualla1;~Kaushal_Patel1;~Daniel_Vlasic1;~Vittorio_Ferrari4;~Ameesh_Makadia1;~Ce_Liu1;~Yuanzhen_Li1;~Howard_Zhou1", "gender": ";M;M;M;;;M;M;;;;M;;M;F;M", "homepage": ";https://www.kmaninis.com/;;;;https://kylesargent.github.io;http://popov.im;https://andrefaraujo.github.io/;http://ricardomartinbrualla.com;;;https://sites.google.com/view/vittoferrari/home;http://www.ameeshmakadia.com/index.html;http://people.csail.mit.edu/celiu/;http://people.csail.mit.edu/yzli/;http://www.howardzzh.com/", "dblp": ";185/0928;172/8953;211/7994;;298/0019;72/5256;177/1567;16/7968;;;16/3608;59/6004;61/3937-1;97/371;85/1213", "google_scholar": ";Lw_-pYsAAAAJ;https://scholar.google.de/citations?user=ZQUFcqAAAAAJ;jgSItF4AAAAJ;;Lom6iMAAAAAJ;Glq3dWkAAAAJ;_ASUnDcAAAAJ;9F59OCYAAAAJ;US0_UBgAAAAJ;;4QvYJ00AAAAJ;OT1uf7kAAAAJ;j7MW4iYAAAAJ;k1eaag4AAAAJ;Rh9T3EcAAAAJ", "orcid": ";0000-0003-3776-0049;0000-0003-1313-3665;;;0009-0009-8609-6894;;;0000-0003-3247-9522;;;;;;0000-0002-9831-8249;0000-0003-3245-8481", "linkedin": ";kmaninis/;andreas-engelhardt-5a1451ab/;arjunkarpur/;;kyle-sargent-784006134/;https://linkedin.com/in/stefanpopov;andrefaraujo;;kaushal91/;;vittorio-ferrari-17062b2b/;;ce-liu-5697501a;yuanzhen-yz-li-5561655/;howard-zhou-0a34b84", "or_profile": "~Varun_Jampani2;~Kevis-kokitsi_Maninis1;~Andreas_Engelhardt1;~Arjun_Karpur1;~Karen_Truong1;~Kyle_Sargent1;~Stefan_Popov1;~Andre_Araujo1;~Ricardo_Martin_Brualla1;~Kaushal_Patel1;~Daniel_Vlasic1;~Vittorio_Ferrari4;~Ameesh_Makadia1;~Ce_Liu1;~Yuanzhen_Li1;~Howard_Zhou1", "aff": ";Google;Google;Google Research;Google;Computer Science Department, Stanford University;Google;Google Research;Google;Google;;Google;Google;Microsoft;Google;Google", "aff_domain": ";google.com;google.com;research.google.com;google.com;cs.stanford.edu;google.com;google.com;google.com;google.com;;google.com;google.com;microsoft.com;google.com;google.com", "position": ";Research Scientist;Intern;Researcher;Researcher;PhD student;Software Engineer;Researcher;Researcher;Researcher;;Principal Researcher;Research Scientist;Chief Architect for Computer Vision;Software Engineer;Software Engineer", "bibtex": "@inproceedings{\njampani2023navi,\ntitle={{NAVI}: Category-Agnostic Image Collections with High-Quality 3D Shape and Pose Annotations},\nauthor={Varun Jampani and Kevis-kokitsi Maninis and Andreas Engelhardt and Arjun Karpur and Karen Truong and Kyle Sargent and Stefan Popov and Andre Araujo and Ricardo Martin Brualla and Kaushal Patel and Daniel Vlasic and Vittorio Ferrari and Ameesh Makadia and Ce Liu and Yuanzhen Li and Howard Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=8TMhs2pIfG}\n}", "github": "", "project": "", "reviewers": "eykt;kSQG;Ymgi;vLyG;p2T3", "pdf_size": 17105526, "rating": "5;6;7;7;7", "confidence": "4;4;4;3;5", "wc_summary_and_contributions": "55;75;42;87;76", "wc_strengths": "49;35;24;59;163", "wc_improvement": "150;362;140;212;294", "wc_limitations": "1;79;9;21;31", "wc_correctness": "4;21;22;26;56", "wc_clarity": "27;5;23;22;56", "wc_relation_to_prior_work": "12;33;1;13;41", "wc_documentation": "1;2;24;40;88", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "300;613;286;481;806", "wc_reply_reviewers": "257;0;128;155;284", "wc_reply_authors": "810;639;397;391;510", "reply_reviewers": "1;0;1;1;1", "reply_authors": "4;3;2;2;2", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 67.0, 16.211107303327555 ], "wc_strengths_avg": [ 66.0, 49.94396860482755 ], "wc_improvement_avg": [ 231.6, 85.24224304885459 ], "wc_limitations_avg": [ 28.2, 27.38174574419973 ], "wc_correctness_avg": [ 25.8, 16.880758276807352 ], "wc_clarity_avg": [ 26.6, 16.523922052587878 ], "wc_relation_to_prior_work_avg": [ 20.0, 14.724129855444769 ], "wc_documentation_avg": [ 31.0, 32.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 497.2, 196.22578831539957 ], "wc_reply_reviewers_avg": [ 164.8, 101.31021666149964 ], "wc_reply_authors_avg": [ 549.4, 158.62484042545165 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15782135542099824909&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";google.com;google.com;research.google.com;google.com;cs.stanford.edu;google.com;google.com;google.com;google.com;;google.com;google.com;microsoft.com;google.com;google.com", "author_num": 16, "aff_unique_index": "0;0;0;0;1;0;0;0;0;0;0;2;0;0", "aff_unique_norm": "Google;Stanford University;Microsoft", "aff_unique_dep": "Google;Computer Science Department;Microsoft Corporation", "aff_unique_url": "https://www.google.com;https://www.stanford.edu;https://www.microsoft.com", "aff_unique_abbr": "Google;Stanford;Microsoft", "aff_campus_unique_index": "0;0;0;0;1;0;0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View;Stanford;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Shared Safety Constraints from Multi-task Demonstrations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72646", "id": "8U31BCquNF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/124dde499d62b58e97e42a45b26d7369-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8U31BCquNF", "openreview": "https://openreview.net/forum?id=8U31BCquNF", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72646", "video": "https://nips.cc/virtual/2023/poster/72646", "author_site": "Konwoo Kim, Gokul Swamy, ZUXIN LIU, DING ZHAO, Sanjiban Choudhury, Steven Wu", "tldr": "", "abstract": "Regardless of the particular task we want to perform in an environment, there are often shared safety constraints we want our agents to respect. For example, regardless of whether it is making a sandwich or clearing the table, a kitchen robot should not break a plate. Manually specifying such a constraint can be both time-consuming and error-prone. We show how to learn constraints from expert demonstrations of safe task completion by extending inverse reinforcement learning (IRL) techniques to the space of constraints. Intuitively, we learn constraints that forbid highly rewarding behavior that the expert could have taken but chose not to. Unfortunately, the constraint learning problem is rather ill-posed and typically leads to overly conservative constraints that forbid all behavior that the expert did not take. We counter this by leveraging diverse demonstrations that naturally occur in multi-task setting to learn a tighter set of constraints. We validate our method with simulation experiments on high-dimensional continuous control tasks.", "keywords": "constraints;inverse reinforcement learning;safe reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Konwoo Kim;Gokul Swamy;Zuxin Liu;Ding Zhao;Sanjiban Choudhury;Steven Wu", "authorids": "~Konwoo_Kim1;~Gokul_Swamy1;~Zuxin_Liu1;~Ding_Zhao1;~Sanjiban_Choudhury3;~Steven_Wu1", "gender": ";;M;;M;", "homepage": ";https://gokul.dev/;https://www.zuxin.me;https://safeai-lab.github.io;https://www.sanjibanchoudhury.com/;", "dblp": ";31/11509;227/3137;;135/8207;", "google_scholar": ";Sbpra_AAAAAJ;5ApCTCoAAAAJ;z7tPc9IAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;0000-0001-7412-5074;;;", "linkedin": ";;zuxin-liu/;;;", "or_profile": "~Konwoo_Kim1;~Gokul_Swamy1;~Zuxin_Liu1;~Ding_Zhao1;~Sanjiban_Choudhury3;~Steven_Wu1", "aff": ";Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Cornell University;", "aff_domain": ";cmu.edu;cmu.edu;cmu.edu;cornell.edu;", "position": ";PhD student;PhD student;Associate Professor;Assistant Professor;", "bibtex": "@inproceedings{\nkim2023learning,\ntitle={Learning Shared Safety Constraints from Multi-task Demonstrations},\nauthor={Konwoo Kim and Gokul Swamy and Zuxin Liu and Ding Zhao and Sanjiban Choudhury and Steven Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8U31BCquNF}\n}", "github": "", "project": "", "reviewers": "NZBn;3RKE;giMg;Ray8;doVy", "pdf_size": 4672847, "rating": "4;5;5;7;7", "confidence": "2;4;2;3;3", "soundness": "2;3;3;4;3", "novelty": "2;2;3;3;2", "presentation": "2;3;3;3;2", "wc_summary": "456;66;98;140;129", "wc_strengths": "56;37;117;59;93", "wc_weaknesses": "81;94;87;146;289", "wc_questions": "376;112;53;80;67", "wc_limitations": "3;70;14;81;107", "wc_review": "972;379;369;506;685", "wc_reply_reviewers": "488;166;47;17;227", "wc_reply_authors": "1442;20;42;0;981", "reply_reviewers": "3;1;1;1;2", "reply_authors": "6;2;2;1;4", "rating_avg": [ 5.6, 1.2 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 177.8, 141.47282424550662 ], "wc_strengths_avg": [ 72.4, 28.688673723265772 ], "wc_weaknesses_avg": [ 139.4, 78.28307607650584 ], "wc_questions_avg": [ 137.6, 120.78841004003654 ], "wc_limitations_avg": [ 55.0, 39.97499218261337 ], "wc_review_avg": [ 582.2, 225.85960240822175 ], "wc_reply_reviewers_avg": [ 189.0, 168.00119047197256 ], "wc_reply_authors_avg": [ 497.0, 601.4721938710052 ], "reply_reviewers_avg": [ 1.6, 0.8 ], "reply_authors_avg": [ 3.0, 1.7888543819998317 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3563483225498991, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13158031393353976901&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": ";cmu.edu;cmu.edu;cmu.edu;cornell.edu;", "author_num": 6, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Carnegie Mellon University;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.cornell.edu", "aff_unique_abbr": "CMU;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Grand Illusion: The Myth of Software Portability and Implications for ML Progress.", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72645", "id": "8VTbfVfAfI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/42c40aff7814e9796266e12053b1c610-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8VTbfVfAfI", "openreview": "https://openreview.net/forum?id=8VTbfVfAfI", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72645", "video": "https://nips.cc/virtual/2023/poster/72645", "author_site": "Fraser Mince, Dzung Dinh, Jonas Kgomo, Neil Thompson, Sara Hooker", "tldr": "", "abstract": "Pushing the boundaries of machine learning often requires exploring different hardware and software combinations. However, this ability to experiment with different systems can be at odds with the drive for efficiency, which has produced increasingly specialized AI hardware and incentivized consolidation around a narrow set of ML frameworks. Exploratory research can be further restricted if software and hardware are co-evolving, making it even harder to stray away from a given tooling stack. While this friction increasingly impacts the rate of innovation in machine learning, to our knowledge the lack of portability in tooling has not been quantified. In this work we ask: How portable are popular ML software frameworks? We conduct a large scale study of the portability of mainstream ML frameworks across different hardware types. Our findings paint an uncomfortable picture -- frameworks can lose more than 40% of their key functions when ported to other hardware. Worse, even when functions are portable, the slowdown in their performance can be extreme. Collectively, our results reveal how costly straying from a narrow set of hardware-software combinations can be - and thus how specialization incurs an exploration cost that can impede innovation in machine learning research.", "keywords": "hardware;software;meta study;portability", "primary_area": "", "supplementary_material": "/attachment/90965e7495cb4e69e2a18de475004ddf5b4df660.pdf", "author": "Fraser Mince;Dzung Dinh;Jonas Kgomo;Neil Thompson;Sara Hooker", "authorids": "~Fraser_Mince1;~Dzung_Dinh1;~Jonas_Kgomo1;neil_t@mit.edu;~Sara_Hooker2", "gender": "M;M;M;;", "homepage": "https://frasermince.com/;;https://jonas-kgomo.github.io/;;", "dblp": ";;;;", "google_scholar": ";;;;", "orcid": ";;;;", "linkedin": "fraser-mince-37a95332/;dzungdinh/;jonas-kgomo/;;", "or_profile": "~Fraser_Mince1;~Dzung_Dinh1;~Jonas_Kgomo1;neil_t@mit.edu;~Sara_Hooker2", "aff": "for.ai;Dickinson College;University of Sussex;;", "aff_domain": "for.ai;dickinson.edu;sussex.ac.uk;;", "position": "Researcher;Undergrad student;MS student;;", "bibtex": "@inproceedings{\nmince2023the,\ntitle={The Grand Illusion: The Myth of Software Portability and Implications for {ML} Progress.},\nauthor={Fraser Mince and Dzung Dinh and Jonas Kgomo and Neil Thompson and Sara Hooker},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8VTbfVfAfI}\n}", "github": "", "project": "", "reviewers": "fKz5;Twor;FGUA", "pdf_size": 3111313, "rating": "4;6;8", "confidence": "4;4;4", "soundness": "2;3;4", "novelty": "1;3;3", "presentation": "2;4;3", "wc_summary": "96;95;47", "wc_strengths": "44;62;54", "wc_weaknesses": "186;229;68", "wc_questions": "77;63;20", "wc_limitations": "7;25;10", "wc_review": "410;474;199", "wc_reply_reviewers": "449;134;37", "wc_reply_authors": "1321;688;32", "reply_reviewers": "2;1;1", "reply_authors": "5;3;2", "rating_avg": [ 6.0, 1.632993161855452 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 79.33333333333333, 22.866763848189994 ], "wc_strengths_avg": [ 53.333333333333336, 7.363574011458174 ], "wc_weaknesses_avg": [ 161.0, 68.06369565830721 ], "wc_questions_avg": [ 53.333333333333336, 24.25329301810833 ], "wc_limitations_avg": [ 14.0, 7.874007874011811 ], "wc_review_avg": [ 361.0, 117.4932622181658 ], "wc_reply_reviewers_avg": [ 206.66666666666666, 175.87179674094676 ], "wc_reply_authors_avg": [ 680.3333333333334, 526.2599695545497 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11888877237180086574&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "for.ai;dickinson.edu;sussex.ac.uk;;", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "for.ai;Dickinson College;University of Sussex", "aff_unique_dep": ";;", "aff_unique_url": "https://www.for.ai;https://www.dickinson.edu;https://www.sussex.ac.uk", "aff_unique_abbr": "for.ai;Dickinson;Sussex", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "MixFormerV2: Efficient Fully Transformer Tracking", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72644", "id": "8WvYAycmDJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b7870bd43b2d133a1ed95582ae5d82a4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8WvYAycmDJ", "openreview": "https://openreview.net/forum?id=8WvYAycmDJ", "poster": "/media/PosterPDFs/NeurIPS%202023/72644.png?t=1697360869.7521803", "slides": "https://nips.cc/virtual/2023/poster/72644", "video": "https://nips.cc/virtual/2023/poster/72644", "author_site": "Yutao Cui, Tianhui Song, Gangshan Wu, Limin Wang", "tldr": "", "abstract": "Transformer-based trackers have achieved strong accuracy on the standard benchmarks. However, their efficiency remains an obstacle to practical deployment on both GPU and CPU platforms. In this paper, to overcome this issue, we propose a fully transformer tracking framework, coined as \\emph{MixFormerV2}, without any dense convolutional operation and complex score prediction module. Our key design is to introduce four special prediction tokens and concatenate them with the tokens from target template and search areas. Then, we apply the unified transformer backbone on these mixed token sequence. These prediction tokens are able to capture the complex correlation between target template and search area via mixed attentions. Based on them, we can easily predict the tracking box and estimate its confidence score through simple MLP heads. To further improve the efficiency of MixFormerV2, we present a new distillation-based model reduction paradigm, including dense-to-sparse distillation and deep-to-shallow distillation. The former one aims to transfer knowledge from the dense-head based MixViT to our fully transformer tracker, while the latter one is used to prune some layers of the backbone. We instantiate two types of MixForemrV2, where the MixFormerV2-B achieves an AUC of 70.6\\% on LaSOT and AUC of 56.7\\% on TNL2k with a high GPU speed of 165 FPS, and the MixFormerV2-S surpasses FEAR-L by 2.7\\% AUC on LaSOT with a real-time CPU speed.", "keywords": "Efficient Tracking;Fully Transformer;Distillation;Model Pruning", "primary_area": "", "supplementary_material": "/attachment/3668be3d547c260c4bc4aec34d1192f1dd038a76.pdf", "author": "Yutao Cui;Tianhui Song;Gangshan Wu;Limin Wang", "authorids": "~Yutao_Cui1;~Tianhui_Song2;~Gangshan_Wu1;~Limin_Wang1", "gender": "M;M;M;M", "homepage": ";http://mcg.nju.edu.cn/member/gswu/en/index.html;https://wanglimin.github.io;", "dblp": "255/2385.html;78/1123;68/6610-2;181/8738", "google_scholar": "TSMchWcAAAAJ;;HEuN8PcAAAAJ;t2V09QEAAAAJ", "orcid": "0000-0003-4788-9751;0000-0003-1391-1762;;", "linkedin": ";;;", "or_profile": "~Yutao_Cui1;~Gangshan_Wu1;~Limin_Wang2;~tianhui_Song1", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;Full Professor;Full Professor;Undergrad student", "bibtex": "@inproceedings{\ncui2023mixformerv,\ntitle={MixFormerV2: Efficient Fully Transformer Tracking},\nauthor={Yutao Cui and Tianhui Song and Gangshan Wu and Limin Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8WvYAycmDJ}\n}", "github": "", "project": "", "reviewers": "eZL2;GURa;iyBU;fR9F", "pdf_size": 2164673, "rating": "5;5;6;8", "confidence": "5;4;5;5", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "59;64;73;120", "wc_strengths": "48;67;27;68", "wc_weaknesses": "42;154;143;120", "wc_questions": "47;3;8;26", "wc_limitations": "1;2;7;18", "wc_review": "197;290;258;352", "wc_reply_reviewers": "13;57;15;15", "wc_reply_authors": "0;0;9;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.0, 24.197107265125723 ], "wc_strengths_avg": [ 52.5, 16.740669042783207 ], "wc_weaknesses_avg": [ 114.75, 43.757142274147654 ], "wc_questions_avg": [ 21.0, 17.277152543170995 ], "wc_limitations_avg": [ 7.0, 6.745368781616021 ], "wc_review_avg": [ 274.25, 55.95701475239722 ], "wc_reply_reviewers_avg": [ 25.0, 18.49324200890693 ], "wc_reply_authors_avg": [ 2.25, 3.897114317029974 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 85, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3035114349811981986&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "nju.edu;nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Near-Optimal $k$-Clustering in the Sliding Window Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72643", "id": "8XRMbNAP6Z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/476ab8f369e489c04187ba84f68cfa68-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8XRMbNAP6Z", "openreview": "https://openreview.net/forum?id=8XRMbNAP6Z", "poster": "/media/PosterPDFs/NeurIPS%202023/72643.png?t=1701716752.5015585", "slides": "https://nips.cc/virtual/2023/poster/72643", "video": "https://nips.cc/virtual/2023/poster/72643", "author_site": "David Woodruff, Peilin Zhong, Samson Zhou", "tldr": "", "abstract": "Clustering is an important technique for identifying structural information in large-scale data analysis, where the underlying dataset may be too large to store. In many applications, recent data can provide more accurate information and thus older data past a certain time is expired. The sliding window model captures these desired properties and thus there has been substantial interest in clustering in the sliding window model. In this paper, we give the first algorithm that achieves near-optimal $(1+\\varepsilon)$-approximation to $(k,z)$-clustering in the sliding window model. Our algorithm uses $\\frac{k}{\\min(\\varepsilon^4,\\varepsilon^{2+z})}\\,\\text{polylog}\\frac{n\\Delta}{\\varepsilon}$ words of space when the points are from $[\\Delta]^d$, thus significantly improving on works by Braverman et. al. (SODA 2016), Borassi et. al. (NeurIPS 2021), and Epasto et. al. (SODA 2022).\n\nAlong the way, we develop a data structure for clustering called an online coreset, which outputs a coreset not only for the end of a stream, but also for all prefixes of the stream. Our online coreset samples $\\frac{k}{\\min(\\varepsilon^4,\\varepsilon^{2+z})}\\,\\text{polylog}\\frac{n\\Delta}{\\varepsilon}$ points from the stream. We then show that any online coreset requires $\\Omega\\left(\\frac{k}{\\varepsilon^2}\\log n\\right)$ samples, which shows a separation between the problem of constructing an offline coreset, i.e., constructing online coresets is strictly harder. Our results also extend to general metrics on $[\\Delta]^d$ and are near-optimal in light of a $\\Omega\\left(\\frac{k}{\\varepsilon^{2+z}}\\right)$ lower bound for the size of an offline coreset.", "keywords": "clustering;streaming algorithms;sliding window model", "primary_area": "", "supplementary_material": "/attachment/8c49a518302b1fa61de4a5a1bfd0f69a7ee44b6a.zip", "author": "David Woodruff;Peilin Zhong;Samson Zhou", "authorids": "~David_Woodruff1;~Peilin_Zhong1;~Samson_Zhou1", "gender": "M;M;", "homepage": "http://www.cs.cmu.edu/~dwoodruf/;http://www.cs.columbia.edu/~peilin/;https://samsonzhou.github.io/", "dblp": "w/DPWoodruff;148/9632;179/2683", "google_scholar": "https://scholar.google.com.tw/citations?user=0G2t-6sAAAAJ;https://scholar.google.com/citations?hl=en;NpjsgocAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~David_Woodruff1;~Peilin_Zhong1;~Samson_Zhou1", "aff": "Carnegie Mellon University;Google;University of California, Berkeley", "aff_domain": "cmu.edu;google.com;berkeley.edu", "position": "Full Professor;Researcher;Postdoc", "bibtex": "@inproceedings{\nwoodruff2023nearoptimal,\ntitle={Near-Optimal \\$k\\$-Clustering in the Sliding Window Model},\nauthor={David Woodruff and Peilin Zhong and Samson Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8XRMbNAP6Z}\n}", "github": "", "project": "", "reviewers": "o3G5;YLu7;P4Gj;8X17", "pdf_size": 685846, "rating": "7;7;7;7", "confidence": "4;2;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;4;3;2", "wc_summary": "68;119;49;395", "wc_strengths": "68;95;79;61", "wc_weaknesses": "17;111;130;211", "wc_questions": "78;160;15;197", "wc_limitations": "4;19;27;1", "wc_review": "235;504;300;865", "wc_reply_reviewers": "81;19;141;0", "wc_reply_authors": "219;0;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 157.75, 139.3473627306954 ], "wc_strengths_avg": [ 75.75, 12.833062767710599 ], "wc_weaknesses_avg": [ 117.25, 68.99411206762501 ], "wc_questions_avg": [ 112.5, 70.87488977063738 ], "wc_limitations_avg": [ 12.75, 10.685855136581255 ], "wc_review_avg": [ 476.0, 245.5412388988864 ], "wc_reply_reviewers_avg": [ 60.25, 55.41378438619763 ], "wc_reply_authors_avg": [ 54.75, 94.82978171439603 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16826992126328120553&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "cmu.edu;google.com;berkeley.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Carnegie Mellon University;Google;University of California, Berkeley", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.cmu.edu;https://www.google.com;https://www.berkeley.edu", "aff_unique_abbr": "CMU;Google;UC Berkeley", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Mitigating the Effect of Incidental Correlations on Part-based Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72642", "id": "8Xn3D9OtqI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c9493f7cb0d1ec4ae5fc6e0c1a5aca63-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8Xn3D9OtqI", "openreview": "https://openreview.net/forum?id=8Xn3D9OtqI", "poster": "/media/PosterPDFs/NeurIPS%202023/72642.png?t=1700013873.5860884", "slides": "https://nips.cc/virtual/2023/poster/72642", "video": "https://nips.cc/virtual/2023/poster/72642", "author_site": "Gaurav Bhatt, Deepayan Das, Leonid Sigal, Vineeth N Balasubramanian", "tldr": "", "abstract": "Intelligent systems possess a crucial characteristic of breaking complicated problems into smaller reusable components or parts and adjusting to new tasks using these part representations. However, current part-learners encounter difficulties in dealing with incidental correlations resulting from the limited observations of objects that may appear only in specific arrangements or with specific backgrounds. These incidental correlations may have a detrimental impact on the generalization and interpretability of learned part representations. This study asserts that part-based representations could be more interpretable and generalize better with limited data, employing two innovative regularization methods. The first regularization separates foreground and background information's generative process via a unique mixture-of-parts formulation. Structural constraints are imposed on the parts using a weakly-supervised loss, guaranteeing that the mixture-of-parts for foreground and background entails soft, object-agnostic masks. The second regularization assumes the form of a distillation loss, ensuring the invariance of the learned parts to the incidental background correlations. Furthermore, we incorporate sparse and orthogonal constraints to facilitate learning high-quality part representations.\nBy reducing the impact of incidental background correlations on the learned parts, we exhibit state-of-the-art (SoTA) performance on few-shot learning tasks on benchmark datasets, including MiniImagenet, TieredImageNet, and FC100. We also demonstrate that the part-based representations acquired through our approach generalize better than existing techniques, even under domain shifts of the background and common data corruption on the ImageNet-9 dataset.", "keywords": "part-based learning;interpretability;few-shot learning;vision transformers", "primary_area": "", "supplementary_material": "/attachment/122c7ef5e923ae67a43918cf48d400a98eb11841.pdf", "author": "Gaurav Bhatt;Deepayan Das;Leonid Sigal;Vineeth N. Balasubramanian", "authorids": "~Gaurav_Bhatt1;~Deepayan_Das1;~Leonid_Sigal2;~Vineeth_N._Balasubramanian2", "gender": "M;M;M;M", "homepage": "https://gauravbh1010tt.github.io/;https://deepayan137.github.io/;http://www.cs.ubc.ca/~lsigal;https://people.iith.ac.in/vineethnb/", "dblp": "173/8402;;09/4991;88/4691", "google_scholar": "https://scholar.google.co.in/citations?user=PcmMT-4AAAAJ;xu4M7IoAAAAJ;P2mG6rcAAAAJ;https://scholar.google.co.in/citations?user=7soDcboAAAAJ", "orcid": ";;;0000-0003-2656-0375", "linkedin": ";;leonid-sigal-23723037;vineethnb?originalSubdomain=in", "or_profile": "~Gaurav_Bhatt1;~Deepayan_Das2;~Leonid_Sigal1;~Vineeth_Balasubramanian1", "aff": "University of British Columbia;University of Trento;University of British Columbia;Indian Institute of Technology Hyderabad", "aff_domain": "cs.ubc.ca;unitn.it;ubc.ca;iith.ac.in", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nbhatt2023mitigating,\ntitle={Mitigating the Effect of Incidental Correlations on Part-based Learning},\nauthor={Gaurav Bhatt and Deepayan Das and Leonid Sigal and Vineeth N. Balasubramanian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8Xn3D9OtqI}\n}", "github": "", "project": "", "reviewers": "Tpfu;QGVs;gyRR;DtGy", "pdf_size": 3726771, "rating": "6;6;6;6", "confidence": "5;3;3;4", "soundness": "3;3;2;4", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "66;97;83;81", "wc_strengths": "71;47;47;111", "wc_weaknesses": "163;39;251;236", "wc_questions": "70;78;8;40", "wc_limitations": "35;24;11;58", "wc_review": "405;285;400;526", "wc_reply_reviewers": "22;131;111;0", "wc_reply_authors": "20;187;234;79", "reply_reviewers": "1;1;2;0", "reply_authors": "2;4;4;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.75, 10.985786271359915 ], "wc_strengths_avg": [ 69.0, 26.153393661244042 ], "wc_weaknesses_avg": [ 172.25, 83.82533924774775 ], "wc_questions_avg": [ 49.0, 27.586228448267445 ], "wc_limitations_avg": [ 32.0, 17.24818831066034 ], "wc_review_avg": [ 404.0, 85.23790236743277 ], "wc_reply_reviewers_avg": [ 66.0, 55.995535536326464 ], "wc_reply_authors_avg": [ 130.0, 84.80271222077747 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=912593170285629453&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cs.ubc.ca;unitn.it;ubc.ca;iith.ac.in", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of British Columbia;University of Trento;Indian Institute of Technology Hyderabad", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ubc.ca;https://www.unitn.it;https://www.iith.ac.in", "aff_unique_abbr": "UBC;UniTN;IIT Hyderabad", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hyderabad", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "Canada;Italy;India" }, { "title": "A Unified Discretization Framework for Differential Equation Approach with Lyapunov Arguments for Convex Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72641", "id": "8YN62t19AW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/52da50b1ef221e4b1793e3bf44dd973d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8YN62t19AW", "openreview": "https://openreview.net/forum?id=8YN62t19AW", "poster": "/media/PosterPDFs/NeurIPS%202023/72641.png?t=1701416501.382732", "slides": "https://nips.cc/virtual/2023/poster/72641", "video": "https://nips.cc/virtual/2023/poster/72641", "author_site": "Kansei Ushiyama, Shun Sato, Takayasu Matsuo", "tldr": "", "abstract": "The differential equation (DE) approach for convex optimization, which relates optimization methods to specific continuous DEs with rate-revealing Lyapunov functionals, has gained increasing interest since the seminal paper by Su--Boyd--Cand\u00e8s (2014).\nHowever, the approach still lacks a crucial component to make it truly useful: there is no general, consistent way to transition back to discrete optimization methods. Consequently, even if we derive insights from continuous DEs, we still need to perform individualized and tedious calculations for the analysis of each method.\nThis paper aims to bridge this gap by introducing a new concept called ``weak discrete gradient'' (wDG), which consolidates the conditions required for discrete versions of gradients in the DE approach arguments.\nWe then define abstract optimization methods using wDG and provide abstract convergence theories that parallel those in continuous DEs.\nWe demonstrate that many typical optimization methods and their convergence rates can be derived as special cases of this abstract theory.\nThe proposed unified discretization framework for the differential equation approach to convex optimization provides an easy environment for developing new optimization methods and achieving competitive convergence rates with state-of-the-art methods, such as Nesterov's accelerated gradient.", "keywords": "Convex optimization;Numerical analysis;Ordinary differential equations;Convergence estimate", "primary_area": "", "supplementary_material": "/attachment/a7282ab291672413b4d0bc14013a0a55bfa99c36.zip", "author": "Kansei Ushiyama;Shun Sato;Takayasu Matsuo", "authorids": "~Kansei_Ushiyama1;~Shun_Sato2;~Takayasu_Matsuo1", "gender": "M;M;", "homepage": "https://kanseiushiyama.github.io/;http://www.sr3.t.u-tokyo.ac.jp/sato/;", "dblp": "322/1264;14/9770-1;13/9299", "google_scholar": ";https://scholar.google.co.jp/citations?user=SnL7aYgAAAAJ;https://scholar.google.co.jp/citations?user=5uPIeBQAAAAJ", "orcid": ";0000-0002-8938-0825;", "linkedin": ";;", "or_profile": "~Kansei_Ushiyama1;~Shun_Sato2;~Takayasu_Matsuo1", "aff": "The University of Tokyo, Tokyo University;The University of Tokyo;University of Tokyo", "aff_domain": "u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp", "position": "MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nushiyama2023a,\ntitle={A Unified Discretization Framework for Differential Equation Approach with Lyapunov Arguments for Convex Optimization},\nauthor={Kansei Ushiyama and Shun Sato and Takayasu Matsuo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8YN62t19AW}\n}", "github": "", "project": "", "reviewers": "7m3u;xD6M;eJSN;tGA7", "pdf_size": 617746, "rating": "5;6;6;7", "confidence": "3;3;3;4", "soundness": "4;3;4;3", "novelty": "2;2;3;3", "presentation": "4;3;3;4", "wc_summary": "107;102;77;104", "wc_strengths": "123;86;32;94", "wc_weaknesses": "110;35;159;142", "wc_questions": "80;18;1;162", "wc_limitations": "78;45;1;20", "wc_review": "498;286;270;522", "wc_reply_reviewers": "101;10;42;13", "wc_reply_authors": "60;0;0;18", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 97.5, 11.968709203585824 ], "wc_strengths_avg": [ 83.75, 32.89661836724255 ], "wc_weaknesses_avg": [ 111.5, 47.542086618069256 ], "wc_questions_avg": [ 65.25, 63.12438118508569 ], "wc_limitations_avg": [ 36.0, 28.83574171059243 ], "wc_review_avg": [ 394.0, 116.4474130240771 ], "wc_reply_reviewers_avg": [ 41.5, 36.55475345286848 ], "wc_reply_authors_avg": [ 19.5, 24.510201957552287 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13761054850680849045&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "0", "aff_campus_unique": "Tokyo;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "title": "LagrangeBench: A Lagrangian Fluid Mechanics Benchmarking Suite", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73681", "id": "8ZRAHNT7E9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ccac3b120c7dc86d45f56830732b62be-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=8ZRAHNT7E9", "openreview": "https://openreview.net/forum?id=8ZRAHNT7E9", "poster": "/media/PosterPDFs/NeurIPS%202023/73681.png?t=1701538854.371201", "slides": "https://nips.cc/virtual/2023/poster/73681", "video": "https://nips.cc/virtual/2023/poster/73681", "author_site": "Artur Toshev, Gianluca Galletti, Fabian Fritz, Stefan Adami, Nikolaus Adams", "tldr": "", "abstract": "Machine learning has been successfully applied to grid-based PDE modeling in various scientific applications. However, learned PDE solvers based on Lagrangian particle discretizations, which are the preferred approach to problems with free surfaces or complex physics, remain largely unexplored. We present LagrangeBench, the first benchmarking suite for Lagrangian particle problems, focusing on temporal coarse-graining. In particular, our contribution is: (a) seven new fluid mechanics datasets (four in 2D and three in 3D) generated with the Smoothed Particle Hydrodynamics (SPH) method including the Taylor-Green vortex, lid-driven cavity, reverse Poiseuille flow, and dam break, each of which includes different physics like solid wall interactions or free surface, (b) efficient JAX-based API with various recent training strategies and three neighbor search routines, and (c) JAX implementation of established Graph Neural Networks (GNNs) like GNS and SEGNN with baseline results. Finally, to measure the performance of learned surrogates we go beyond established position errors and introduce physical metrics like kinetic energy MSE and Sinkhorn distance for the particle distribution. Our codebase is available under the URL: [https://github.com/tumaer/lagrangebench](https://github.com/tumaer/lagrangebench).", "keywords": "particle-based;Lagrangian;fluid mechanics;benchmark;graph neural networks;smoothed particle hydrodynamics", "primary_area": "", "supplementary_material": "/attachment/007cb9a33f158dade049ed4734c308544ea1a8fb.zip", "author": "Artur Toshev;Gianluca Galletti;Fabian Fritz;Stefan Adami;Nikolaus A. Adams", "authorids": "~Artur_Toshev1;~Gianluca_Galletti1;fabian.fritz@tum.de;~Stefan_Adami1;~Nikolaus_A._Adams1", "gender": ";Not Specified;;;M", "homepage": "https://arturtoshev.github.io/;https://github.com/gerkone;;;", "dblp": "344/3672;344/3504;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;https://scholar.google.de/citations?user=Oer7zf4AAAAJ", "orcid": "0000-0003-0486-5565;;;0000-0003-0731-3969; 0000-0001-5048-8639", "linkedin": ";;;;", "or_profile": "~Artur_Toshev1;~Gianluca_Galletti1;fabian.fritz@tum.de;~Stefan_Adami1;~Nikolaus_A._Adams1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;;Technical University Munich;Technische Universit\u00e4t M\u00fcnchen", "aff_domain": "tum.de;tum.de;;tum.de;tum.de", "position": "PhD student;MS student;;Postdoc;Full Professor", "bibtex": "@inproceedings{\ntoshev2023lagrangebench,\ntitle={LagrangeBench: A Lagrangian Fluid Mechanics Benchmarking Suite},\nauthor={Artur Toshev and Gianluca Galletti and Fabian Fritz and Stefan Adami and Nikolaus A. Adams},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=8ZRAHNT7E9}\n}", "github": "", "project": "", "reviewers": "YttU;uzym;Dee9;5Cwx;6nz7", "pdf_size": 10358477, "rating": "5;6;6;7;8", "confidence": "3;2;4;3;3", "wc_summary_and_contributions": "48;82;77;132;123", "wc_strengths": "102;40;107;99;106", "wc_improvement": "152;307;121;118;24", "wc_limitations": "113;54;8;9;64", "wc_correctness": "50;36;4;223;196", "wc_clarity": "6;7;1;23;634", "wc_relation_to_prior_work": "16;20;1;20;122", "wc_documentation": "27;13;8;43;158", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "515;560;328;668;1428", "wc_reply_reviewers": "0;80;12;42;63", "wc_reply_authors": "409;629;521;747;794", "reply_reviewers": "0;1;1;1;2", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 92.4, 31.052214091751978 ], "wc_strengths_avg": [ 90.8, 25.560907652115954 ], "wc_improvement_avg": [ 144.4, 91.90995593514339 ], "wc_limitations_avg": [ 49.6, 39.0517605236947 ], "wc_correctness_avg": [ 101.8, 89.60000000000001 ], "wc_clarity_avg": [ 134.2, 250.00911983365725 ], "wc_relation_to_prior_work_avg": [ 35.8, 43.664172956784604 ], "wc_documentation_avg": [ 49.8, 55.45232186302031 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 699.8, 380.32007572569717 ], "wc_reply_reviewers_avg": [ 39.4, 30.05062395358872 ], "wc_reply_authors_avg": [ 620.0, 141.9915490443005 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15628163973795233655&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "tum.de;tum.de;;tum.de;tum.de", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://www.tum.de", "aff_unique_abbr": "TUM;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "On the Convergence of Encoder-only Shallow Transformers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72640", "id": "8ZveVHfmIE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a3cf318fbeec1126da21e9185ae9908c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8ZveVHfmIE", "openreview": "https://openreview.net/forum?id=8ZveVHfmIE", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72640", "video": "https://nips.cc/virtual/2023/poster/72640", "author_site": "Yongtao Wu, Fanghui Liu, Grigorios Chrysos, Volkan Cevher", "tldr": "", "abstract": "In this paper, we aim to build the global convergence theory of encoder-only shallow Transformers under a realistic setting from the perspective of architectures, initialization, and scaling under a finite width regime. The difficulty lies in how to tackle the softmax in self-attention mechanism, the core ingredient of Transformer. In particular, we diagnose the scaling scheme, carefully tackle the input/output of softmax, and prove that quadratic overparameterization is sufficient for global convergence of our shallow Transformers under commonly-used He/LeCun initialization in practice. Besides, neural tangent kernel (NTK) based analysis is also given, which facilitates a comprehensive comparison. Our theory demonstrates the separation on the importance of different scaling schemes and initialization. We believe our results can pave the way for a better understanding of modern Transformers, particularly on training dynamics.", "keywords": "Transformer;convergence;scaling;initialization;over-parameterization", "primary_area": "", "supplementary_material": "", "author": "Yongtao Wu;Fanghui Liu;Grigorios Chrysos;Volkan Cevher", "authorids": "~Yongtao_Wu1;~Fanghui_Liu1;~Grigorios_Chrysos1;~Volkan_Cevher1", "gender": "M;M;M;M", "homepage": "https://www.epfl.ch/labs/lions/people/phds/yongtao-wu/;http://www.lfhsgre.org;https://grigorisg9gr.github.io/;http://lions.epfl.ch", "dblp": "322/3726;119/1038;75/6117-2;70/5301", "google_scholar": "rLgDE9AAAAAJ;AKxBgssAAAAJ;1bU041kAAAAJ;https://scholar.google.ch/citations?user=hlWhzU8AAAAJ", "orcid": ";0000-0003-4133-7921;;", "linkedin": ";;;", "or_profile": "~Yongtao_Wu1;~Fanghui_Liu1;~Grigorios_Chrysos1;~Volkan_Cevher1", "aff": "Swiss Federal Institute of Technology Lausanne;\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL);Swiss Federal Institute of Technology Lausanne;Amazon Development Center Germany", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;amazon.de", "position": "PhD student;Postdoc;Postdoc;Amazon Scholar", "bibtex": "@inproceedings{\nwu2023on,\ntitle={On the Convergence of Encoder-only Shallow Transformers},\nauthor={Yongtao Wu and Fanghui Liu and Grigorios Chrysos and Volkan Cevher},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8ZveVHfmIE}\n}", "github": "", "project": "", "reviewers": "UvAq;p3n5;n1Tb;sZxG;9ERM;YNcT", "pdf_size": 1080547, "rating": "5;5;5;6;7;7", "confidence": "4;3;2;3;3;4", "soundness": "3;2;3;3;4;3", "novelty": "2;3;3;3;4;4", "presentation": "3;2;4;2;4;3", "wc_summary": "127;154;43;83;105;168", "wc_strengths": "98;156;39;53;107;135", "wc_weaknesses": "111;230;27;125;55;263", "wc_questions": "100;155;50;144;77;406", "wc_limitations": "19;3;26;8;17;16", "wc_review": "455;698;185;413;361;988", "wc_reply_reviewers": "30;252;0;162;24;25", "wc_reply_authors": "0;513;0;165;0;0", "reply_reviewers": "1;2;0;1;1;1", "reply_authors": "1;3;1;2;1;1", "rating_avg": [ 5.833333333333333, 0.8975274678557507 ], "confidence_avg": [ 3.1666666666666665, 0.6871842709362768 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 3.1666666666666665, 0.6871842709362768 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 113.33333333333333, 42.35826037766686 ], "wc_strengths_avg": [ 98.0, 41.47288270665544 ], "wc_weaknesses_avg": [ 135.16666666666666, 85.7795948281926 ], "wc_questions_avg": [ 155.33333333333334, 117.79596295666882 ], "wc_limitations_avg": [ 14.833333333333334, 7.4703116103383245 ], "wc_review_avg": [ 516.6666666666666, 259.42928816067695 ], "wc_reply_reviewers_avg": [ 82.16666666666667, 92.50300295425849 ], "wc_reply_authors_avg": [ 113.0, 188.7591057406238 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 1.5, 0.7637626158259734 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.31526414437773154, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4952122738201698241&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "epfl.ch;epfl.ch;epfl.ch;amazon.de", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL;Amazon", "aff_unique_dep": ";;Development Center", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch;https://www.amazon.de", "aff_unique_abbr": "EPFL;EPFL;Amazon", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Switzerland;Germany" }, { "title": "No Change, No Gain: Empowering Graph Neural Networks with Expected Model Change Maximization for Active Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72639", "id": "8aDG51pxFc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/944ecf65a46feb578a43abfd5cddd960-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8aDG51pxFc", "openreview": "https://openreview.net/forum?id=8aDG51pxFc", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72639", "video": "https://nips.cc/virtual/2023/poster/72639", "author_site": "Zixing Song, Yifei Zhang, Irwin King", "tldr": "", "abstract": "Graph Neural Networks (GNNs) are crucial for machine learning applications with graph-structured data, but their success depends on sufficient labeled data. We present a novel active learning (AL) method for GNNs, extending the Expected Model Change Maximization (EMCM) principle to improve prediction performance on unlabeled data. By presenting a Bayesian interpretation for the node embeddings generated by GNNs under the semi-supervised setting, we efficiently compute the closed-form EMCM acquisition function as the selection criterion for AL without re-training. Our method establishes a direct connection with expected prediction error minimization, offering theoretical guarantees for AL performance. Experiments demonstrate our method's effectiveness compared to existing approaches, in terms of both accuracy and efficiency.", "keywords": "Graph Neural Networks;Expected Model Change Maximization", "primary_area": "", "supplementary_material": "/attachment/9b5477dec767f153ab8c9b0cbb336e4b7db950c3.pdf", "author": "Zixing Song;Yifei Zhang;Irwin King", "authorids": "~Zixing_Song2;~Yifei_Zhang6;~Irwin_King1", "gender": ";M;M", "homepage": ";https://yifeiacc.github.io/;https://www.cse.cuhk.edu.hk/irwin.king/", "dblp": ";55/5266-1.html;k/IrwinKing", "google_scholar": ";DmwXESQAAAAJ;MXvC7tkAAAAJ", "orcid": ";0000-0003-4185-8663;0000-0001-8106-6447", "linkedin": ";;irwinking/", "or_profile": "~Zixing_Song2;~Yifei_Zhang6;~Irwin_King1", "aff": ";Department of Computer Science and Engineering, The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": ";cse.cuhk.edu.hk;cuhk.edu.hk", "position": ";PhD student;Full Professor", "bibtex": "@inproceedings{\nsong2023no,\ntitle={No Change, No Gain: Empowering Graph Neural Networks with Expected Model Change Maximization for Active Learning},\nauthor={Zixing Song and Yifei Zhang and Irwin King},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8aDG51pxFc}\n}", "github": "", "project": "", "reviewers": "3rHV;MppE;zbTy;bxJi", "pdf_size": 429897, "rating": "7;7;7;7", "confidence": "5;4;5;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;4", "wc_summary": "91;71;67;64", "wc_strengths": "428;32;88;47", "wc_weaknesses": "120;323;51;50", "wc_questions": "96;6;81;63", "wc_limitations": "63;1;67;3", "wc_review": "798;433;354;227", "wc_reply_reviewers": "0;40;0;20", "wc_reply_authors": "0;33;0;20", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 73.25, 10.54454835448157 ], "wc_strengths_avg": [ 148.75, 162.5228830042096 ], "wc_weaknesses_avg": [ 136.0, 111.63108885968998 ], "wc_questions_avg": [ 61.5, 34.106451002706216 ], "wc_limitations_avg": [ 33.5, 31.539657575820318 ], "wc_review_avg": [ 453.0, 212.30991498279113 ], "wc_reply_reviewers_avg": [ 15.0, 16.583123951777 ], "wc_reply_authors_avg": [ 13.25, 14.02453207775575 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11144623077913130471&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": ";cse.cuhk.edu.hk;cuhk.edu.hk", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Convex and Non-convex Optimization Under Generalized Smoothness", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72638", "id": "8aunGrXdkl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7e8bb8d17bb1cb24dfe972a2f8ff2500-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8aunGrXdkl", "openreview": "https://openreview.net/forum?id=8aunGrXdkl", "poster": "/media/PosterPDFs/NeurIPS%202023/72638.png?t=1702268268.0256257", "slides": "https://nips.cc/virtual/2023/poster/72638", "video": "https://nips.cc/virtual/2023/poster/72638", "author_site": "Haochuan Li, Jian Qian, Yi Tian, Alexander Rakhlin, Ali Jadbabaie", "tldr": "", "abstract": "Classical analysis of convex and non-convex optimization methods often requires the Lipschitz continuity of the gradient, which limits the analysis to functions bounded by quadratics. Recent work relaxed this requirement to a non-uniform smoothness condition with the Hessian norm bounded by an affine function of the gradient norm, and proved convergence in the non-convex setting via gradient clipping, assuming bounded noise. In this paper, we further generalize this non-uniform smoothness condition and develop a simple, yet powerful analysis technique that bounds the gradients along the trajectory, thereby leading to stronger results for both convex and non-convex optimization problems. In particular, we obtain the classical convergence rates for (stochastic) gradient descent and Nesterov's accelerated gradient method in the convex and/or non-convex setting under this general smoothness condition. The new analysis approach does not require gradient clipping and allows heavy-tailed noise with bounded variance in the stochastic setting.", "keywords": "Optimization;Convergence;Generalized smoothness", "primary_area": "", "supplementary_material": "", "author": "Haochuan Li;Jian Qian;Yi Tian;Alexander Rakhlin;Ali Jadbabaie", "authorids": "~Haochuan_Li2;~Jian_Qian2;~Yi_Tian1;~Alexander_Rakhlin1;~Ali_Jadbabaie1", "gender": "M;;;M;M", "homepage": ";https://sites.google.com/view/jianqian/about;https://yi-t.github.io/;http://www.mit.edu/~rakhlin/;http://www.mit.edu/~jadbabai/www", "dblp": "https://dblp.org/pers/l/Li:Haochuan.html;;98/1774;59/407;83/3158", "google_scholar": "1yB0eLMAAAAJ;;B9jUcIgAAAAJ;https://scholar.google.com.tw/citations?user=fds2VpgAAAAJ;ZBc_WwYAAAAJ", "orcid": ";;;;", "linkedin": ";jianQ/;yi-tian-mit/;;", "or_profile": "~Haochuan_Li2;~Jian_Qian2;~Yi_Tian1;~Alexander_Rakhlin1;~Ali_Jadbabaie1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2023convex,\ntitle={Convex and Non-convex Optimization Under Generalized Smoothness},\nauthor={Haochuan Li and Jian Qian and Yi Tian and Alexander Rakhlin and Ali Jadbabaie},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8aunGrXdkl}\n}", "github": "", "project": "", "reviewers": "YkEg;cK5L;QDyc;GKNp;MmTX", "pdf_size": 449592, "rating": "3;6;7;8;8", "confidence": "4;4;4;4;4", "soundness": "2;3;4;4;4", "novelty": "1;2;3;4;4", "presentation": "2;2;3;4;4", "wc_summary": "27;32;135;108;59", "wc_strengths": "12;30;52;87;28", "wc_weaknesses": "52;1182;28;108;308", "wc_questions": "13;17;119;356;2", "wc_limitations": "15;1;10;34;1", "wc_review": "119;1262;344;693;398", "wc_reply_reviewers": "0;635;0;0;13", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;0;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 1.8547236990991407 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.4, 0.8 ], "novelty_avg": [ 2.8, 1.16619037896906 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 72.2, 42.56477416831904 ], "wc_strengths_avg": [ 41.8, 25.941472587345537 ], "wc_weaknesses_avg": [ 335.6, 434.5119561070788 ], "wc_questions_avg": [ 101.4, 134.1262092210169 ], "wc_limitations_avg": [ 12.2, 12.155657119218196 ], "wc_review_avg": [ 563.2, 394.41673392491856 ], "wc_reply_reviewers_avg": [ 129.6, 252.7501533135044 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13859637737815379538&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 7, "email": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Provably Efficient Offline Reinforcement Learning in Regular Decision Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72637", "id": "8bQc7oRnjm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7bf3e93543a612b75b6373178ba1faa4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8bQc7oRnjm", "openreview": "https://openreview.net/forum?id=8bQc7oRnjm", "poster": "/media/PosterPDFs/NeurIPS%202023/72637.png?t=1699613576.9910781", "slides": "https://nips.cc/virtual/2023/poster/72637", "video": "https://nips.cc/virtual/2023/poster/72637", "author_site": "Roberto Cipollone, Anders Jonsson, Alessandro Ronca, Mohammad Sadegh Talebi", "tldr": "", "abstract": "This paper deals with offline (or batch) Reinforcement Learning (RL) in episodic Regular Decision Processes (RDPs). RDPs are the subclass of Non-Markov Decision Processes where the dependency on the history of past events can be captured by a finite-state automaton. We consider a setting where the automaton that underlies the RDP is unknown, and a learner strives to learn a near-optimal policy using pre-collected data, in the form of non-Markov sequences of observations, without further exploration. We present RegORL, an algorithm that suitably combines automata learning techniques and state-of-the-art algorithms for offline RL in MDPs. RegORL has a modular design allowing one to use any off-the-shelf offline RL algorithm in MDPs. We report a non-asymptotic high-probability sample complexity bound for RegORL to yield an $\\varepsilon$-optimal policy, which makes appear a notion of concentrability relevant for RDPs. Furthermore, we present a sample complexity lower bound for offline RL in RDPs. To our best knowledge, this is the first work presenting a provably efficient algorithm for offline learning in RDPs.", "keywords": "Reinforcement Learning;Offline Reinforcement Learning;Regular Decision Processes;Sample complexity;Automata", "primary_area": "", "supplementary_material": "", "author": "Roberto Cipollone;Anders Jonsson;Alessandro Ronca;Mohammad Sadegh Talebi", "authorids": "~Roberto_Cipollone1;~Anders_Jonsson1;~Alessandro_Ronca1;~Mohammad_Sadegh_Talebi3", "gender": "M;M;M;M", "homepage": "https://cipollone.github.io/;https://www.upf.edu/web/anders-jonsson;https://www.cs.ox.ac.uk/people/alessandro.ronca/;https://sites.google.com/view/talebi/", "dblp": "132/1071-2;05/3488;166/1433;32/1105", "google_scholar": "kR301eQAAAAJ;https://scholar.google.es/citations?user=SI_uHCIAAAAJ;0NwK2-IAAAAJ;Q4_AHDYAAAAJ", "orcid": "0000-0002-0421-5792;;0000-0002-0131-2087;", "linkedin": ";;ronca-alessandro/;", "or_profile": "~Roberto_Cipollone1;~Anders_Jonsson1;~Alessandro_Ronca1;~Mohammad_Sadegh_Talebi3", "aff": "Sapienza University of Rome;Universitat Pompeu Fabra;Sapienza University of Rome;University of Copenhagen", "aff_domain": "uniroma1.it;upf.edu;diag.uniroma1.it;ku.dk", "position": "PhD student;Full Professor;Research Associate;Assistant Professor", "bibtex": "@inproceedings{\ncipollone2023provably,\ntitle={Provably Efficient Offline Reinforcement Learning in Regular Decision Processes},\nauthor={Roberto Cipollone and Anders Jonsson and Alessandro Ronca and Mohammad Sadegh Talebi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8bQc7oRnjm}\n}", "github": "", "project": "", "reviewers": "xyrg;9NCg;TAnR;9RiT;jsz4", "pdf_size": 660305, "rating": "6;6;6;8;8", "confidence": "3;3;4;3;2", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "2;4;3;3;3", "wc_summary": "84;57;172;105;105", "wc_strengths": "60;55;110;14;60", "wc_weaknesses": "21;78;225;1;91", "wc_questions": "31;63;1;11;163", "wc_limitations": "15;9;1;1;1", "wc_review": "211;262;509;132;420", "wc_reply_reviewers": "26;0;18;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 104.6, 38.03472097965226 ], "wc_strengths_avg": [ 59.8, 30.465718438927386 ], "wc_weaknesses_avg": [ 83.2, 78.51216466255404 ], "wc_questions_avg": [ 53.8, 58.56415285821183 ], "wc_limitations_avg": [ 5.4, 5.71314274283428 ], "wc_review_avg": [ 306.8, 138.1584597482181 ], "wc_reply_reviewers_avg": [ 8.8, 11.070682002478438 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6454972243679028, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11411139970504372829&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "uniroma1.it;upf.edu;diag.uniroma1.it;ku.dk", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Sapienza University of Rome;Universitat Pompeu Fabra;University of Copenhagen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uniroma1.it;https://www.upf.edu/;https://www.ku.dk", "aff_unique_abbr": "Sapienza;UPF;UCPH", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Rome;", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "Italy;Spain;Denmark" }, { "title": "Understanding Social Reasoning in Language Models with Language Models", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73680", "id": "8bqjirgxQM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2b9efb085d3829a2aadffab63ba206de-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=8bqjirgxQM", "openreview": "https://openreview.net/forum?id=8bqjirgxQM", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73680", "video": "https://nips.cc/virtual/2023/poster/73680", "author_site": "Kanishk Gandhi, Jan-Philipp Fraenken, Tobias Gerstenberg, Noah Goodman", "tldr": "", "abstract": "As Large Language Models (LLMs) become increasingly integrated into our everyday lives, understanding their ability to comprehend human mental states becomes critical for ensuring effective interactions. However, despite the recent attempts to assess the Theory-of-Mind (ToM) reasoning capabilities of LLMs, the degree to which these models can align with human ToM remains a nuanced topic of exploration. This is primarily due to two distinct challenges: (1) the presence of inconsistent results from previous evaluations, and (2) concerns surrounding the validity of existing evaluation methodologies. To address these challenges, we present a novel framework for procedurally generating evaluations with LLMs by populating causal templates. Using our framework, we create a new social reasoning benchmark (BigToM) for LLMs which consists of 25 controls and 5,000 model-written evaluations. We find that human participants rate the quality of our benchmark higher than previous crowd-sourced evaluations and comparable to expert-written evaluations. Using BigToM, we evaluate the social reasoning capabilities of a variety of LLMs and compare model performances with human performance. Our results suggest that GPT4 has ToM capabilities that mirror human inference patterns, though less reliable, while other LLMs struggle.", "keywords": "Social Reasoning;Theory of Mind;Large Language Models;Benchmark;Multi-Agent Reasoning;Intuitive Psychology;Common Sense;Reasoning", "primary_area": "", "supplementary_material": "/attachment/3252fc97bd19bc4a1b8d91a7bcf0c477225731b6.pdf", "author": "Kanishk Gandhi;Jan-Philipp Fr\u00e4nken;Tobias Gerstenberg;Noah Goodman", "authorids": "~Kanishk_Gandhi1;~Jan-Philipp_Fr\u00e4nken1;~Tobias_Gerstenberg1;~Noah_Goodman1", "gender": ";M;;M", "homepage": "https://janphilippfranken.github.io/;http://cicl.stanford.edu/member/tobias_gerstenberg;https://cocolab.stanford.edu/;https://kanishkgandhi.com", "dblp": ";;96/1216;243/5820", "google_scholar": "s2omqQcAAAAJ;d0TfP8EAAAAJ;OUpIbcQAAAAJ;", "orcid": "0000-0001-5467-1887;0000-0002-9162-0779;;", "linkedin": ";;;", "or_profile": "~Jan-Philipp_Fr\u00e4nken1;~Tobias_Gerstenberg1;~Noah_Goodman1;~Kanishk_V_Gandhi1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "Postdoc;Assistant Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\ngandhi2023understanding,\ntitle={Understanding Social Reasoning in Language Models with Language Models},\nauthor={Kanishk Gandhi and Jan-Philipp Fr{\\\"a}nken and Tobias Gerstenberg and Noah Goodman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=8bqjirgxQM}\n}", "github": "", "project": "", "reviewers": "rvRQ;KFdV;ffAP;m3qS;URe5", "pdf_size": 1254842, "rating": "6;6;6;8;9", "confidence": "2;4;4;3;4", "wc_summary_and_contributions": "63;67;63;50;56", "wc_strengths": "39;39;52;67;34", "wc_improvement": "48;191;219;28;24", "wc_limitations": "33;6;16;22;2", "wc_correctness": "7;4;17;15;40", "wc_clarity": "4;6;251;14;9", "wc_relation_to_prior_work": "3;1;25;17;28", "wc_documentation": "60;1;23;4;20", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "258;316;667;218;214", "wc_reply_reviewers": "0;0;13;12;11", "wc_reply_authors": "497;513;1308;350;101", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 7.0, 1.2649110640673518 ], "confidence_avg": [ 3.4, 0.8 ], "wc_summary_and_contributions_avg": [ 59.8, 6.046486583132389 ], "wc_strengths_avg": [ 46.2, 11.98999582985749 ], "wc_improvement_avg": [ 102.0, 84.95410525689738 ], "wc_limitations_avg": [ 15.8, 11.142710621747296 ], "wc_correctness_avg": [ 16.6, 12.658593918757328 ], "wc_clarity_avg": [ 56.8, 97.15842732362437 ], "wc_relation_to_prior_work_avg": [ 14.8, 11.070682002478437 ], "wc_documentation_avg": [ 21.6, 21.039011383617815 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 334.6, 170.2017626230704 ], "wc_reply_reviewers_avg": [ 7.2, 5.912698199637792 ], "wc_reply_authors_avg": [ 553.8, 405.03599840014215 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.1976423537605237, "gs_citation": 123, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13923690522410723559&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 11, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Accurate Interpolation for Scattered Data through Hierarchical Residual Refinement", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72636", "id": "8d9wVXri89", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1d5a92867cf463fad136cfa23395840b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8d9wVXri89", "openreview": "https://openreview.net/forum?id=8d9wVXri89", "poster": "/media/PosterPDFs/NeurIPS%202023/72636.png?t=1702133533.5973837", "slides": "https://nips.cc/virtual/2023/poster/72636", "video": "https://nips.cc/virtual/2023/poster/72636", "author_site": "Shizhe Ding, Boyang Xia, Dongbo Bu", "tldr": "", "abstract": "Accurate interpolation algorithms are highly desired in various theoretical and engineering scenarios. Unlike the traditional numerical algorithms that have exact zero-residual constraints on observed points, the neural network-based interpolation methods exhibit non-zero residuals at these points. These residuals, which provide observations of an underlying residual function, can guide predicting interpolation functions, but have not been exploited by the existing approaches. To fill this gap, we propose Hierarchical INTerpolation Network (HINT), which utilizes the residuals on observed points to guide target function estimation in a hierarchical fashion. HINT consists of several sequentially arranged lightweight interpolation blocks. The first interpolation block estimates the main component of the target function, while subsequent blocks predict the residual components using observed points residuals of the preceding blocks. The main component and residual components are accumulated to form the final interpolation results. Furthermore, under the assumption that finer residual prediction requires a more focused attention range on observed points, we utilize hierarchical local constraints in correlation modeling between observed and target points. Extensive experiments demonstrate that HINT outperforms existing interpolation algorithms significantly in terms of interpolation accuracy across a wide variety of datasets, which underscores its potential for practical scenarios.", "keywords": "Interpolation algorithm;scattered data;deep learning;residual learning", "primary_area": "", "supplementary_material": "/attachment/dc5c12a4876ed06d61ebf7be0566010d68b407f9.pdf", "author": "Shizhe Ding;Boyang Xia;Dongbo Bu", "authorids": "~Shizhe_Ding2;~Boyang_Xia1;~Dongbo_Bu1", "gender": ";M;M", "homepage": "https://github.com/DingShizhe;https://github.com/LawrenceXia2008;http://bioinfo.ict.ac.cn/~dbu/", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Shizhe_Ding2;~Boyang_Xia1;~Dongbo_Bu1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;ucas.ac.cn", "position": "PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nding2023accurate,\ntitle={Accurate Interpolation for Scattered Data through Hierarchical Residual Refinement},\nauthor={Shizhe Ding and Boyang Xia and Dongbo Bu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8d9wVXri89}\n}", "github": "", "project": "", "reviewers": "WnAW;ce6Q;RpoB;G7AZ;746Q", "pdf_size": 2083094, "rating": "3;5;5;5;5", "confidence": "4;3;3;4;3", "soundness": "3;3;3;3;2", "novelty": "2;2;3;2;3", "presentation": "2;4;2;3;3", "wc_summary": "99;94;82;138;69", "wc_strengths": "29;29;22;21;52", "wc_weaknesses": "237;158;68;32;91", "wc_questions": "34;48;7;66;2", "wc_limitations": "13;24;1;1;1", "wc_review": "412;353;180;258;215", "wc_reply_reviewers": "305;0;26;21;19", "wc_reply_authors": "405;22;21;26;36", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 4.6, 0.7999999999999999 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 96.4, 23.243063481391605 ], "wc_strengths_avg": [ 30.6, 11.217842929904126 ], "wc_weaknesses_avg": [ 117.2, 72.63993392067479 ], "wc_questions_avg": [ 31.4, 24.24541193710678 ], "wc_limitations_avg": [ 8.0, 9.252026804976301 ], "wc_review_avg": [ 283.6, 86.48375570013134 ], "wc_reply_reviewers_avg": [ 74.2, 115.7365974962112 ], "wc_reply_authors_avg": [ 102.0, 151.59287582205175 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6123724356957948, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uN3n0tc5SzwJ:scholar.google.com/&scioq=Accurate+Interpolation+for+Scattered+Data+through+Hierarchical+Residual+Refinement&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "ict.ac.cn;ict.ac.cn;ucas.ac.cn", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "8eVgdwKs2N", "title": "G4SATBench: Benchmarking and Advancing SAT Solving with Graph Neural Networks", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Graph neural networks (GNNs) have recently emerged as a promising approach for solving the Boolean Satisfiability Problem (SAT), offering potential alternatives to traditional backtracking or local search SAT solvers. However, despite the growing volume of literature in this field, there remains a notable absence of a unified dataset and a fair benchmark to evaluate and compare existing approaches. To address this crucial gap, we present G4SATBench, the first benchmark study that establishes a comprehensive evaluation framework for GNN-based SAT solvers. In G4SATBench, we meticulously curate a large and diverse set of SAT datasets comprising 7 problems with 3 difficulty levels and benchmark a broad range of GNN models across various prediction tasks, training objectives, and inference algorithms. To explore the learning abilities and comprehend the strengths and limitations of GNN-based SAT solvers, we also compare their solving processes with the heuristics in search-based SAT solvers. Our empirical results provide valuable insights into the performance of GNN-based SAT solvers and further suggest that existing GNN models can effectively learn a solving strategy akin to greedy local search but struggle to learn backtracking search in the latent space.", "keywords": "SAT solving;Graph neural networks", "primary_area": "", "supplementary_material": "/attachment/d6c9c26a256199f34dd5a86be38de3b62def4e6b.pdf", "author": "Zhaoyu Li;Jinpei Guo;Xujie Si", "authorids": "~Zhaoyu_Li3;~Jinpei_Guo1;~Xujie_Si1", "gender": "M;M;M", "homepage": "https://www.zhaoyu-li.com/;https://jp-guo.github.io/;https://xujie.si", "dblp": ";;142/8449", "google_scholar": ";;Ru-jrx4AAAAJ", "orcid": ";;", "linkedin": "zhaoyu-li-9171892a5/;;", "or_profile": "~Zhaoyu_Li3;~Jinpei_Guo1;~Xujie_Si1", "aff": "McGill University;Shanghai Jiaotong University;University of Toronto", "aff_domain": "cs.mcgill.ca;sjtu.edu.cn;toronto.edu", "position": "PhD student;Undergrad student;Assistant Professor", "bibtex": "@misc{\nli2023gsatbench,\ntitle={G4{SATB}ench: Benchmarking and Advancing {SAT} Solving with Graph Neural Networks},\nauthor={Zhaoyu Li and Jinpei Guo and Xujie Si},\nyear={2023},\nurl={https://openreview.net/forum?id=8eVgdwKs2N}\n}", "github": "", "project": "", "reviewers": "CeEF;yiUC;oRgT;upz8", "site": "https://openreview.net/forum?id=8eVgdwKs2N", "pdf_size": 491843, "rating": "6;7;7;9", "confidence": "3;4;3;3", "wc_summary_and_contributions": "20;90;98;110", "wc_strengths": "53;63;52;77", "wc_improvement": "55;121;98;18", "wc_limitations": "1;64;77;40", "wc_correctness": "7;46;25;27", "wc_clarity": "1;8;5;77", "wc_relation_to_prior_work": "1;68;85;24", "wc_documentation": "52;23;13;7", "wc_additional_feedback": "1;1;1;1", "wc_review": "191;484;454;381", "wc_reply_reviewers": "5;0;0;0", "wc_reply_authors": "119;444;326;169", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 79.5, 35.082046690579496 ], "wc_strengths_avg": [ 61.25, 10.059199769365355 ], "wc_improvement_avg": [ 73.0, 39.61691557908061 ], "wc_limitations_avg": [ 45.5, 28.91798748184251 ], "wc_correctness_avg": [ 26.25, 13.808964479641476 ], "wc_clarity_avg": [ 22.75, 31.419540098480116 ], "wc_relation_to_prior_work_avg": [ 44.5, 33.55964838909967 ], "wc_documentation_avg": [ 23.75, 17.282577932704367 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 377.5, 114.00548232431632 ], "wc_reply_reviewers_avg": [ 1.25, 2.165063509461097 ], "wc_reply_authors_avg": [ 264.5, 128.73713527960766 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4975517059889206601&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "McGill University;Shanghai Jiao Tong University;University of Toronto", "aff_unique_dep": ";;", "aff_unique_url": "https://www.mcgill.ca;https://www.sjtu.edu.cn;https://www.utoronto.ca", "aff_unique_abbr": "McGill;SJTU;U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Canada;China" }, { "title": "Uniform-in-Time Wasserstein Stability Bounds for (Noisy) Stochastic Gradient Descent", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72635", "id": "8fLatmFQgF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/05d6b5b6901fb57d2c287e1d3ce6d63c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8fLatmFQgF", "openreview": "https://openreview.net/forum?id=8fLatmFQgF", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72635", "video": "https://nips.cc/virtual/2023/poster/72635", "author_site": "Lingjiong Zhu, Mert Gurbuzbalaban, Anant Raj, Umut Simsekli", "tldr": "", "abstract": "Algorithmic stability is an important notion that has proven powerful for deriving generalization bounds for practical algorithms. The last decade has witnessed an increasing number of stability bounds for different algorithms applied on different classes of loss functions. While these bounds have illuminated various properties of optimization algorithms, the analysis of each case typically required a different proof technique with significantly different mathematical tools. In this study, we make a novel connection between learning theory and applied probability and introduce a unified guideline for proving Wasserstein stability bounds for stochastic optimization algorithms. We illustrate our approach on stochastic gradient descent (SGD) and we obtain time-uniform stability bounds (i.e., the bound does not increase with the number of iterations) for strongly convex losses and non-convex losses with additive noise, where we recover similar results to the prior art or extend them to more general cases by using a single proof technique. Our approach is flexible and can be generalizable to other popular optimizers, as it mainly requires developing Lyapunov functions, which are often readily available in the literature. It also illustrates that ergodicity is an important component for obtaining time-uniform bounds -- which might not be achieved for convex or non-convex losses unless additional noise is injected to the iterates. Finally, we slightly stretch our analysis technique and prove time-uniform bounds for SGD under convex and non-convex losses (without additional additive noise), which, to our knowledge, is novel.", "keywords": "Algorithmic stability;SGD;Wasserstein distance", "primary_area": "", "supplementary_material": "", "author": "Lingjiong Zhu;Mert Gurbuzbalaban;Anant Raj;Umut Simsekli", "authorids": "~Lingjiong_Zhu1;~Mert_Gurbuzbalaban1;~Anant_Raj2;~Umut_Simsekli1", "gender": "M;;;M", "homepage": ";;;https://www.di.ens.fr/~simsekli/", "dblp": "178/6958;09/9185;;https://dblp.org/pers/s/Simsekli:Umut.html", "google_scholar": "Z9JkFaoAAAAJ;;;https://scholar.google.fr/citations?user=CuArAkgAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Lingjiong_Zhu1;~Mert_Gurbuzbalaban1;~Anant_Raj2;~Umut_Simsekli1", "aff": "Florida State University;Rutgers University;;INRIA", "aff_domain": "fsu.edu;rutgers.edu;;inria.fr", "position": "Associate Professor;Assistant Professor;;Research Faculty", "bibtex": "@inproceedings{\nzhu2023uniformintime,\ntitle={Uniform-in-Time Wasserstein Stability Bounds for (Noisy) Stochastic Gradient Descent},\nauthor={Lingjiong Zhu and Mert Gurbuzbalaban and Anant Raj and Umut Simsekli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8fLatmFQgF}\n}", "github": "", "project": "", "reviewers": "tckZ;Wt2h;yPZ3;9dAF;RR88", "pdf_size": 443858, "rating": "5;6;6;7;7", "confidence": "5;2;4;3;3", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;3;3;4;3", "wc_summary": "18;477;109;22;88", "wc_strengths": "17;258;96;37;48", "wc_weaknesses": "60;429;109;1;499", "wc_questions": "212;98;122;53;220", "wc_limitations": "2;27;7;1;29", "wc_review": "309;1289;443;114;884", "wc_reply_reviewers": "477;86;84;74;574", "wc_reply_authors": "769;28;185;9;267", "reply_reviewers": "3;2;1;1;2", "reply_authors": "4;2;2;2;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 142.8, 170.88171347455528 ], "wc_strengths_avg": [ 91.2, 87.35536617747076 ], "wc_weaknesses_avg": [ 219.6, 203.66796508042202 ], "wc_questions_avg": [ 141.0, 65.17054549411107 ], "wc_limitations_avg": [ 13.2, 12.270289320142375 ], "wc_review_avg": [ 607.8, 424.4051837572204 ], "wc_reply_reviewers_avg": [ 259.0, 219.78534983023778 ], "wc_reply_authors_avg": [ 251.6, 276.1257684461919 ], "reply_reviewers_avg": [ 1.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.4, 0.8000000000000002 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6289709020331509, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5606836560594188444&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "fsu.edu;rutgers.edu;;inria.fr", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Florida State University;Rutgers University;INRIA", "aff_unique_dep": ";;", "aff_unique_url": "https://www.fsu.edu;https://www.rutgers.edu;https://www.inria.fr", "aff_unique_abbr": "FSU;Rutgers;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;France" }, { "title": "Pairwise GUI Dataset Construction Between Android Phones and Tablets", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73679", "id": "8gDJXL652A", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bc4cff0b37ccab13e98b6128d89ca172-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=8gDJXL652A", "openreview": "https://openreview.net/forum?id=8gDJXL652A", "poster": "/media/PosterPDFs/NeurIPS%202023/73679.png?t=1699887558.4902587", "slides": "https://nips.cc/virtual/2023/poster/73679", "video": "https://nips.cc/virtual/2023/poster/73679", "author_site": "han hu, Haolan Zhan, Yujin Huang, Di Liu", "tldr": "", "abstract": "In the current landscape of pervasive smartphones and tablets, apps frequently exist across both platforms.\nAlthough apps share most graphic user interfaces (GUIs) and functionalities across phones and tablets, developers often rebuild from scratch for tablet versions, escalating costs and squandering existing design resources.\nResearchers are attempting to collect data and employ deep learning in automated GUIs development to enhance developers' productivity.\nThere are currently several publicly accessible GUI page datasets for phones, but none for pairwise GUIs between phones and tablets.\nThis poses a significant barrier to the employment of deep learning in automated GUI development.\nIn this paper, we introduce the Papt dataset, a pioneering pairwise GUI dataset tailored for Android phones and tablets, encompassing 10,035 phone-tablet GUI page pairs sourced from 5,593 unique app pairs.\nWe propose novel pairwise GUI collection approaches for constructing this dataset and delineate its advantages over currently prevailing datasets in the field.\nThrough preliminary experiments on this dataset, we analyze the present challenges of utilizing deep learning in automated GUI development.", "keywords": "Datasets;Tablet;GUI Retrieval;GUI Generation;GUI Recommendation", "primary_area": "", "supplementary_material": "/attachment/021327b81f5c5eb46eb76183ed544ae0136d082e.pdf", "author": "Han Hu;Haolan Zhan;Yujin Huang;Di Liu", "authorids": "~Han_Hu8;~Haolan_Zhan1;yujin.huang@monash.edu;dliu0024@student.monash.edu", "gender": "M;;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": "8vqgN5gAAAAJ;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Han_Hu8;~Haolan_Zhan1;yujin.huang@monash.edu;dliu0024@student.monash.edu", "aff": "Monash University;;;", "aff_domain": "monash.edu;;;", "position": "PhD student;;;", "bibtex": "@inproceedings{\nhu2023pairwise,\ntitle={Pairwise {GUI} Dataset Construction Between Android Phones and Tablets},\nauthor={Han Hu and Haolan Zhan and Yujin Huang and Di Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=8gDJXL652A}\n}", "github": "", "project": "", "reviewers": "ZrJu;BuL6;abm3;s1Us", "pdf_size": 3750637, "rating": "4;6;6;8", "confidence": "4;3;3;4", "wc_summary_and_contributions": "27;34;65;18", "wc_strengths": "24;29;83;26", "wc_improvement": "39;136;169;199", "wc_limitations": "50;41;14;18", "wc_correctness": "11;10;4;17", "wc_clarity": "9;5;14;20", "wc_relation_to_prior_work": "26;1;33;29", "wc_documentation": "23;1;15;14", "wc_additional_feedback": "1;1;1;1", "wc_review": "210;258;398;342", "wc_reply_reviewers": "0;127;0;21", "wc_reply_authors": "748;945;714;677", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 36.0, 17.67766952966369 ], "wc_strengths_avg": [ 40.5, 24.60182920028509 ], "wc_improvement_avg": [ 135.75, 60.13890171926987 ], "wc_limitations_avg": [ 30.75, 15.155444566227676 ], "wc_correctness_avg": [ 10.5, 4.6097722286464435 ], "wc_clarity_avg": [ 12.0, 5.612486080160912 ], "wc_relation_to_prior_work_avg": [ 22.25, 12.517487767120047 ], "wc_documentation_avg": [ 13.25, 7.8859051477937525 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 302.0, 72.8285658241325 ], "wc_reply_reviewers_avg": [ 37.0, 52.664029469838326 ], "wc_reply_authors_avg": [ 771.0, 103.54950506883169 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1749047717509778602&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 9, "email": "monash.edu;;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Monash University", "aff_unique_dep": "", "aff_unique_url": "https://www.monash.edu", "aff_unique_abbr": "Monash", "aff_country_unique_index": "0", "aff_country_unique": "Australia" }, { "title": "A Riemannian Exponential Augmented Lagrangian Method for Computing the Projection Robust Wasserstein Distance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72634", "id": "8hKCNVqrlf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fd02779b6c8885efc69bab6dd9571cee-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8hKCNVqrlf", "openreview": "https://openreview.net/forum?id=8hKCNVqrlf", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72634", "video": "https://nips.cc/virtual/2023/poster/72634", "author_site": "Bo Jiang, Ya-Feng Liu", "tldr": "", "abstract": "Projection robust Wasserstein (PRW) distance is recently proposed to efficiently mitigate the curse of dimensionality in the classical Wasserstein distance. In this paper, by equivalently reformulating the computation of the PRW distance as an optimization problem over the Cartesian product of the Stiefel manifold and the Euclidean space with additional nonlinear inequality constraints, we propose a Riemannian exponential augmented Lagrangian method (REALM) for solving this problem. Compared with the existing Riemannian exponential penalty-based approaches, REALM can potentially avoid too small penalty parameters and exhibit more stable numerical performance. To solve the subproblems in REALM efficiently, we design an inexact Riemannian Barzilai-Borwein method with Sinkhorn iteration (iRBBS), which selects the stepsizes adaptively rather than tuning the stepsizes in efforts as done in the existing methods. We show that iRBBS can return an $\\epsilon$-stationary point of the original PRW distance problem within $\\mathcal{O}(\\epsilon^{-3})$ iterations, which matches the best known iteration complexity result. Extensive numerical results demonstrate that our proposed methods outperform the state-of-the-art solvers for computing the PRW distance.", "keywords": "Barzilai-Borwein method;exponential augmented Lagrangian;inexact gradient;Stiefel manifold;Sinkhorn iteration;Wasserstein distance", "primary_area": "", "supplementary_material": "/attachment/cc5c9487df974bda1e3380312b81ea922c0983d8.zip", "author": "Bo Jiang;Ya-Feng Liu", "authorids": "~Bo_Jiang16;~Ya-Feng_Liu1", "gender": "M;M", "homepage": "http://math.njnu.edu.cn/info/1012/3667.htm;http://lsec.cc.ac.cn/~yafliu/", "dblp": ";29/8760", "google_scholar": "https://scholar.google.com.hk/citations?user=xJ7s50AAAAAJ;2IGxlzYAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Bo_Jiang16;~Ya-Feng_Liu1", "aff": "Nanjing Normal University;Chinese Academy of Sciences", "aff_domain": "njnu.edu.cn;lsec.cc.ac.cn", "position": "Associate Professor;Associate Professor", "bibtex": "@inproceedings{\njiang2023a,\ntitle={A Riemannian Exponential Augmented Lagrangian Method for Computing the Projection Robust Wasserstein Distance},\nauthor={Bo Jiang and Ya-Feng Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8hKCNVqrlf}\n}", "github": "", "project": "", "reviewers": "EBs3;iBqS;KEsx;g8K2;X3cc", "pdf_size": 605566, "rating": "5;5;5;5;6", "confidence": "3;3;1;4;1", "soundness": "3;3;3;2;3", "novelty": "3;3;2;2;3", "presentation": "3;3;2;2;2", "wc_summary": "113;81;96;66;47", "wc_strengths": "79;46;15;51;77", "wc_weaknesses": "116;20;49;127;32", "wc_questions": "64;33;171;3;4", "wc_limitations": "1;21;47;3;2", "wc_review": "373;201;378;250;162", "wc_reply_reviewers": "19;11;11;10;18", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 2.4, 1.2 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 80.6, 22.93120145129775 ], "wc_strengths_avg": [ 53.6, 23.44013651837378 ], "wc_weaknesses_avg": [ 68.8, 44.14249653112066 ], "wc_questions_avg": [ 55.0, 62.170732664172455 ], "wc_limitations_avg": [ 14.8, 17.713271860387625 ], "wc_review_avg": [ 272.8, 88.38416147704294 ], "wc_reply_reviewers_avg": [ 13.8, 3.867815921162743 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5833333333333334, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12462263125089576084&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "njnu.edu.cn;lsec.cc.ac.cn", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Nanjing Normal University;Chinese Academy of Sciences", "aff_unique_dep": ";", "aff_unique_url": "http://www.nju.edu.cn;https://www.cas.cn", "aff_unique_abbr": "NNU;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Closing the Computational-Statistical Gap in Best Arm Identification for Combinatorial Semi-bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72633", "id": "8jg8z3ASiw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3ae8a7d6fc6d0d45e7c1ad9d4b063a01-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8jg8z3ASiw", "openreview": "https://openreview.net/forum?id=8jg8z3ASiw", "poster": "/media/PosterPDFs/NeurIPS%202023/72633.png?t=1699011483.4987803", "slides": "https://nips.cc/virtual/2023/poster/72633", "video": "https://nips.cc/virtual/2023/poster/72633", "author_site": "Ruo-Chun Tzeng, Po-An Wang, Alexandre Proutiere, Chi-Jen Lu", "tldr": "", "abstract": "We study the best arm identification problem in combinatorial semi-bandits in the fixed confidence setting. We present Perturbed Frank-Wolfe Sampling (P-FWS), an algorithm that (i) runs in polynomial time, (ii) achieves the instance-specific minimal sample complexity in the high confidence regime, and (iii) enjoys polynomial sample complexity guarantees in the moderate confidence regime. To our best knowledge, existing algorithms cannot achieve (ii) and (iii) simultaneously in vanilla bandits. With P-FWS, we close the computational-statistical gap in best arm identification in combinatorial semi-bandits. The design of P-FWS starts from the optimization problem that defines the information-theoretical and instance-specific sample complexity lower bound. P-FWS solves this problem in an online manner using, in each round, a single iteration of the Frank-Wolfe algorithm. Structural properties of the problem are leveraged to make the P-FWS successive updates computationally efficient. In turn, P-FWS only relies on a simple linear maximization oracle.", "keywords": "best-arm identification; combinatorial semi-bandit; no-regret learning;", "primary_area": "", "supplementary_material": "/attachment/24aa860d9919217be51d9aff5cb414c1f915db90.pdf", "author": "Ruo-Chun Tzeng;Po-An Wang;Alexandre Proutiere;Chi-Jen Lu", "authorids": "~Ruo-Chun_Tzeng1;~Po-An_Wang1;~Alexandre_Proutiere1;~Chi-Jen_Lu1", "gender": "F;;M;M", "homepage": "https://rctzeng.github.io/;;https://people.kth.se/~alepro/;", "dblp": "242/3884;203/4451;p/AlexandreProutiere;40/243", "google_scholar": "jntcHQ0AAAAJ;https://scholar.google.com.tw/citations?user=kzXIxFYAAAAJ;g5sya5cAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ruo-Chun_Tzeng1;~Po-An_Wang1;~Alexandre_Proutiere1;~Chi-Jen_Lu1", "aff": "KTH Royal Institute of Technology, Stockholm, Sweden;KTH Royal Institute of Technology, Stockholm, Sweden;KTH Royal Institute of Technology, Stockholm, Sweden;Academia Sinica", "aff_domain": "kth.se;kth.se;kth.se;sinica.edu.tw", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\ntzeng2023closing,\ntitle={Closing the Computational-Statistical Gap in Best Arm Identification for Combinatorial Semi-bandits},\nauthor={Ruo-Chun Tzeng and Po-An Wang and Alexandre Proutiere and Chi-Jen Lu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8jg8z3ASiw}\n}", "github": "", "project": "", "reviewers": "d388;26At;Vkf2;b447", "pdf_size": 451908, "rating": "6;6;6;7", "confidence": "3;4;2;3", "soundness": "3;3;4;4", "novelty": "3;3;4;3", "presentation": "2;3;2;3", "wc_summary": "218;112;93;99", "wc_strengths": "40;81;93;72", "wc_weaknesses": "333;182;150;37", "wc_questions": "69;22;35;61", "wc_limitations": "1;13;6;44", "wc_review": "661;410;377;313", "wc_reply_reviewers": "191;0;75;25", "wc_reply_authors": "382;0;43;0", "reply_reviewers": "2;0;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 130.5, 50.98284025042151 ], "wc_strengths_avg": [ 71.5, 19.653244007033546 ], "wc_weaknesses_avg": [ 175.5, 105.68940344235084 ], "wc_questions_avg": [ 46.75, 19.031224343168255 ], "wc_limitations_avg": [ 16.0, 16.718253497300488 ], "wc_review_avg": [ 440.25, 132.13511077681056 ], "wc_reply_reviewers_avg": [ 72.75, 73.4178963196304 ], "wc_reply_authors_avg": [ 106.25, 160.16924642389998 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1561668893887877363&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "kth.se;kth.se;kth.se;sinica.edu.tw", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "KTH Royal Institute of Technology;Academia Sinica", "aff_unique_dep": ";", "aff_unique_url": "https://www.kth.se;https://www.sinica.edu.tw", "aff_unique_abbr": "KTH;Academia Sinica", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Stockholm;Taiwan", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Sweden;China" }, { "title": "When Does Optimizing a Proper Loss Yield Calibration?", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72632", "id": "8kyIChWsAG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e4165c96702bac5f4962b70f3cf2f136-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8kyIChWsAG", "openreview": "https://openreview.net/forum?id=8kyIChWsAG", "poster": "/media/PosterPDFs/NeurIPS%202023/72632.png?t=1701890894.236411", "slides": "https://nips.cc/virtual/2023/poster/72632", "video": "https://nips.cc/virtual/2023/poster/72632", "author_site": "Jaroslaw Blasiok, Parikshit Gopalan, Lunjia Hu, Preetum Nakkiran", "tldr": "", "abstract": "Optimizing proper loss functions is popularly believed to yield predictors with good calibration properties; the intuition being that for such losses, the global optimum is to predict the ground-truth probabilities, which is indeed calibrated. However, typical machine learning models are trained to approximately minimize loss over restricted families of predictors, that are unlikely to contain the ground truth. Under what circumstances does optimizing proper loss over a restricted family yield calibrated models? What precise calibration guarantees does it give? In this work, we provide a rigorous answer to these questions. We replace the global optimality with a local optimality condition stipulating that the (proper) loss of the predictor cannot be reduced much by post-processing its predictions with a certain family of Lipschitz functions. We show that any predictor with this local optimality satisfies smooth calibration as defined in [Kakade and Foster, 2008, B\u0142asiok et al., 2023]. Local optimality is plausibly satisfied by well-trained DNNs, which suggests an explanation for why they are calibrated from proper loss minimization alone. Finally, we show that the connection between local optimality and calibration error goes both ways: nearly calibrated predictors are also nearly locally optimal.", "keywords": "calibration;deep learning;theory;optimization", "primary_area": "", "supplementary_material": "", "author": "Jaros\u0142aw B\u0142asiok;Parikshit Gopalan;Lunjia Hu;Preetum Nakkiran", "authorids": "jaro3000@gmail.com;~Parikshit_Gopalan1;~Lunjia_Hu1;~Preetum_Nakkiran1", "gender": ";M;M;", "homepage": ";https://parikg.github.io/;https://lunjiahu.com;http://preetum.nakkiran.org", "dblp": ";16/1585;195/6273;151/6343", "google_scholar": ";fb2-dasAAAAJ;ss7CIgcAAAAJ;zithBbUAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "jaro3000@gmail.com;~Parikshit_Gopalan1;~Lunjia_Hu1;~Preetum_Nakkiran1", "aff": ";Apple;Stanford University;Apple", "aff_domain": ";apple.com;stanford.edu;apple.com", "position": ";Principal Researcher;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nb{\\l}asiok2023when,\ntitle={When Does Optimizing a Proper Loss Yield Calibration?},\nauthor={Jaros{\\l}aw B{\\l}asiok and Parikshit Gopalan and Lunjia Hu and Preetum Nakkiran},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8kyIChWsAG}\n}", "github": "", "project": "", "reviewers": "ooFk;5Dzr;6rky;PVPP;CHsP", "pdf_size": 1294643, "rating": "4;7;7;7;9", "confidence": "3;4;3;3;3", "soundness": "3;4;3;3;4", "novelty": "3;3;4;3;4", "presentation": "3;3;3;3;4", "wc_summary": "68;56;73;80;209", "wc_strengths": "58;183;153;1;203", "wc_weaknesses": "77;112;290;1;368", "wc_questions": "52;22;36;1;101", "wc_limitations": "20;25;20;1;44", "wc_review": "275;398;572;84;925", "wc_reply_reviewers": "778;26;37;0;51", "wc_reply_authors": "495;0;0;0;0", "reply_reviewers": "2;2;1;0;1", "reply_authors": "3;1;1;1;1", "rating_avg": [ 6.8, 1.6 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 97.2, 56.446080466229006 ], "wc_strengths_avg": [ 119.6, 77.39664075397587 ], "wc_weaknesses_avg": [ 169.6, 137.24226754174532 ], "wc_questions_avg": [ 42.4, 33.755592129305036 ], "wc_limitations_avg": [ 22.0, 13.725887949418793 ], "wc_review_avg": [ 450.8, 285.5838931032351 ], "wc_reply_reviewers_avg": [ 178.4, 300.26561574712485 ], "wc_reply_authors_avg": [ 99.0, 198.0 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.06250000000000004, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2099058743961178921&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";apple.com;stanford.edu;apple.com", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Apple;Stanford University", "aff_unique_dep": "Apple Inc.;", "aff_unique_url": "https://www.apple.com;https://www.stanford.edu", "aff_unique_abbr": "Apple;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Investigating how ReLU-networks encode symmetries", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72631", "id": "8lbFwpebeu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2c74f005aabbf90a8f1747d99f387321-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8lbFwpebeu", "openreview": "https://openreview.net/forum?id=8lbFwpebeu", "poster": "/media/PosterPDFs/NeurIPS%202023/72631.png?t=1701877432.6010711", "slides": "https://nips.cc/virtual/2023/poster/72631", "video": "https://nips.cc/virtual/2023/poster/72631", "author_site": "Georg B\u00f6kman, Fredrik Kahl", "tldr": "", "abstract": "Many data symmetries can be described in terms of group equivariance and\n the most common way of encoding group equivariances in neural networks is by building linear layers that are group equivariant.\nIn this work we investigate whether equivariance of a network implies that all layers are equivariant.\nOn the theoretical side we find cases where equivariance implies layerwise equivariance, but also\ndemonstrate that this is not the case generally.\nNevertheless, we conjecture that CNNs that are trained to be equivariant will exhibit layerwise equivariance and explain how this conjecture is a weaker version of the recent permutation conjecture by Entezari et al.\\ [2022].\nWe perform quantitative experiments with VGG-nets on CIFAR10 and qualitative experiments with ResNets on ImageNet to illustrate and support our theoretical findings. \nThese experiments are not only of interest for understanding how group equivariance is encoded in ReLU-networks, but they also give a new perspective on Entezari et al.'s permutation conjecture as we find that it\nis typically easier to merge a network with a group-transformed version of itself than merging two different networks.", "keywords": "loss landscape;network merging;linear mode connectivity;equivariance;group convolutional neural network;permutation;group;symmetry;invariance;weight space ensembling", "primary_area": "", "supplementary_material": "", "author": "Georg B\u00f6kman;Fredrik Kahl", "authorids": "~Georg_B\u00f6kman1;~Fredrik_Kahl3", "gender": ";M", "homepage": "https://www.chalmers.se/en/Staff/Pages/bokman.aspx;https://fredkahl.github.io/", "dblp": "307/6596;01/7013", "google_scholar": "FUE3Wd0AAAAJ;P_w6UgMAAAAJ", "orcid": "0000-0001-7522-2255;", "linkedin": ";", "or_profile": "~Georg_B\u00f6kman1;~Fredrik_Kahl3", "aff": "Chalmers University of Technology;Chalmers University", "aff_domain": "chalmers.se;chalmers.se", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nb{\\\"o}kman2023investigating,\ntitle={Investigating how Re{LU}-networks encode symmetries},\nauthor={Georg B{\\\"o}kman and Fredrik Kahl},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8lbFwpebeu}\n}", "github": "", "project": "", "reviewers": "k5hy;A92L;ZqAK;79cV;k3h9", "pdf_size": 6751685, "rating": "5;6;7;7;8", "confidence": "2;3;3;3;3", "soundness": "3;3;3;4;4", "novelty": "2;3;3;3;4", "presentation": "3;4;3;3;2", "wc_summary": "33;73;246;62;410", "wc_strengths": "71;156;115;80;80", "wc_weaknesses": "167;187;106;96;194", "wc_questions": "54;5;131;76;44", "wc_limitations": "3;7;15;7;12", "wc_review": "328;428;613;321;740", "wc_reply_reviewers": "49;51;87;19;68", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 164.8, 143.58467884840638 ], "wc_strengths_avg": [ 100.4, 31.62657110721932 ], "wc_weaknesses_avg": [ 150.0, 41.099878345318736 ], "wc_questions_avg": [ 62.0, 41.4584128977461 ], "wc_limitations_avg": [ 8.8, 4.214261501141095 ], "wc_review_avg": [ 486.0, 165.0442364943411 ], "wc_reply_reviewers_avg": [ 54.8, 22.542404485768593 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7844645405527362, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2952050731720657028&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "chalmers.se;chalmers.se", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Chalmers University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.chalmers.se", "aff_unique_abbr": "Chalmers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Sweden" }, { "title": "Towards Stable Backdoor Purification through Feature Shift Tuning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72630", "id": "8muKbaAgsh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ee37d51b3c003d89acba2363dde256af-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8muKbaAgsh", "openreview": "https://openreview.net/forum?id=8muKbaAgsh", "poster": "/media/PosterPDFs/NeurIPS%202023/72630.png?t=1697258915.2865057", "slides": "https://nips.cc/virtual/2023/poster/72630", "video": "https://nips.cc/virtual/2023/poster/72630", "author_site": "Rui Min, Zeyu Qin, Li Shen, Minhao Cheng", "tldr": "", "abstract": "It has been widely observed that deep neural networks (DNN) are vulnerable to backdoor attacks where attackers could manipulate the model behavior maliciously by tampering with a small set of training samples. Although a line of defense methods is proposed to mitigate this threat, they either require complicated modifications to the training process or heavily rely on the specific model architecture, which makes them hard to deploy into real-world applications. Therefore, in this paper, we instead start with fine-tuning, one of the most common and easy-to-deploy backdoor defenses, through comprehensive evaluations against diverse attack scenarios. Observations made through initial experiments show that in contrast to the promising defensive results on high poisoning rates, vanilla tuning methods completely fail at low poisoning rate scenarios. Our analysis shows that with the low poisoning rate, the entanglement between backdoor and clean features undermines the effect of tuning-based defenses. Therefore, it is necessary to disentangle the backdoor and clean features in order to improve backdoor purification. To address this, we introduce Feature Shift Tuning (FST), a method for tuning-based backdoor purification. Specifically, FST encourages feature shifts by actively deviating the classifier weights from the originally compromised weights. Extensive experiments demonstrate that our FST provides consistently stable performance under different attack settings. Without complex parameter adjustments, FST also achieves much lower tuning costs, only $10$ epochs. Our codes are available at https://github.com/AISafety-HKUST/stable_backdoor_purification.", "keywords": "Backdoor Defense;Model-tuning", "primary_area": "", "supplementary_material": "/attachment/18839aceba56fd6b9fdda37e834e5c804aa6c3e2.pdf", "author": "Rui Min;Zeyu Qin;Li Shen;Minhao Cheng", "authorids": "~Rui_Min1;~Zeyu_Qin1;~Li_Shen1;~Minhao_Cheng1", "gender": "M;M;M;M", "homepage": "https://rmin2000.github.io/;https://alan-qin.github.io/;https://sites.google.com/site/mathshenli/home;https://cmhcbb.github.io/", "dblp": ";271/5778;91/3680-8;174/1717", "google_scholar": "https://scholar.google.com/citations?hl=en;3LXI4-MAAAAJ;yVhgENIAAAAJ;_LkC1yoAAAAJ", "orcid": "0009-0005-8528-9783;0000-0003-1733-7892;;0000-0003-3965-4215", "linkedin": ";zeyu-qin-546398179/;;", "or_profile": "~Rui_Min1;~Zeyu_Qin1;~Li_Shen1;~Minhao_Cheng1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;JD Explore Academy;Hong Kong University of Science and Technology", "aff_domain": "cse.ust.hk;ust.hk;jd.com;ust.hk", "position": "PhD student;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nmin2023towards,\ntitle={Towards Stable Backdoor Purification through Feature Shift Tuning},\nauthor={Rui Min and Zeyu Qin and Li Shen and Minhao Cheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8muKbaAgsh}\n}", "github": "", "project": "", "reviewers": "X5wr;chxU;Zkrc;JBzP;2qbe", "pdf_size": 4324305, "rating": "5;5;5;7;7", "confidence": "4;4;4;4;4", "soundness": "2;2;3;4;3", "novelty": "4;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "48;69;147;52;91", "wc_strengths": "32;30;203;94;52", "wc_weaknesses": "143;102;158;53;81", "wc_questions": "43;4;17;76;27", "wc_limitations": "1;15;24;3;69", "wc_review": "267;220;549;278;320", "wc_reply_reviewers": "22;0;26;11;23", "wc_reply_authors": "108;117;42;22;30", "reply_reviewers": "1;0;1;1;1", "reply_authors": "3;3;2;2;2", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.4, 36.13640823324864 ], "wc_strengths_avg": [ 82.2, 64.63559390923858 ], "wc_weaknesses_avg": [ 107.4, 38.76390073251142 ], "wc_questions_avg": [ 33.4, 24.824181758922087 ], "wc_limitations_avg": [ 22.4, 24.751565607047972 ], "wc_review_avg": [ 326.8, 115.57058449276789 ], "wc_reply_reviewers_avg": [ 16.4, 9.645724441429994 ], "wc_reply_authors_avg": [ 63.8, 40.37028610252843 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4140923959261964602&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "cse.ust.hk;ust.hk;jd.com;ust.hk", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;JD", "aff_unique_dep": ";JD Explore Academy", "aff_unique_url": "https://www.ust.hk;", "aff_unique_abbr": "HKUST;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "title": "Sparse Deep Learning for Time Series Data: Theory and Applications", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72629", "id": "8niGwlkLAX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aaa9c20f0a217a1aef6fa5d97f310292-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8niGwlkLAX", "openreview": "https://openreview.net/forum?id=8niGwlkLAX", "poster": "/media/PosterPDFs/NeurIPS%202023/72629.png?t=1702358894.7246227", "slides": "https://nips.cc/virtual/2023/poster/72629", "video": "https://nips.cc/virtual/2023/poster/72629", "author_site": "Mingxuan Zhang, Yan Sun, Faming Liang", "tldr": "", "abstract": "Sparse deep learning has become a popular technique for improving the performance of deep neural networks in areas such as uncertainty quantification, variable selection, and large-scale network compression. However, most existing research has focused on problems where the observations are independent and identically distributed (i.i.d.), and there has been little work on the problems where the observations are dependent, such as time series data and sequential data in natural language processing. This paper aims to address this gap by studying the theory for sparse deep learning with dependent data. We show that sparse recurrent neural networks (RNNs) can be consistently estimated, and their predictions are asymptotically normally distributed under appropriate assumptions, enabling the prediction uncertainty to be correctly quantified. Our numerical results show that sparse deep learning outperforms state-of-the-art methods, such as conformal predictions, in prediction uncertainty quantification for time series data. Furthermore, our results indicate that the proposed method can consistently identify the autoregressive order for time series data and outperform existing methods in large-scale model compression. Our proposed method has important practical implications in fields such as finance, healthcare, and energy, where both accurate point estimates and prediction uncertainty quantification are of concern.", "keywords": "Sparse Deep Learning;Uncertainty Quantification;Model Compression;Variable Selection;Dependent Data", "primary_area": "", "supplementary_material": "/attachment/d525f35ff940a45e50dee7d77dead0206b8d9435.pdf", "author": "Mingxuan Zhang;Yan Sun;Faming Liang", "authorids": "~Mingxuan_Zhang1;~Yan_Sun6;~Faming_Liang1", "gender": "M;M;M", "homepage": ";https://sylydya.github.io;https://www.stat.purdue.edu/~fmliang/", "dblp": ";181/2323-11;29/1122", "google_scholar": ";NC1KaRwAAAAJ;TboqoPIAAAAJ", "orcid": ";;", "linkedin": "mingxuan-zhang-093459151/;yan-sun-2aab73141/;", "or_profile": "~Mingxuan_Zhang1;~Yan_Sun6;~Faming_Liang1", "aff": "Purdue University;Amazon;Purdue University", "aff_domain": "purdue.edu;amazon.com;purdue.edu", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhang2023sparse,\ntitle={Sparse Deep Learning for Time Series Data: Theory and Applications},\nauthor={Mingxuan Zhang and Yan Sun and Faming Liang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8niGwlkLAX}\n}", "github": "", "project": "", "reviewers": "riun;AFD6;shpG;UrAN;TwH7", "pdf_size": 2652176, "rating": "4;5;7;7;7", "confidence": "3;2;3;1;2", "soundness": "3;2;3;3;3", "novelty": "3;3;3;3;3", "presentation": "2;3;3;3;2", "wc_summary": "80;36;66;65;66", "wc_strengths": "72;25;155;117;43", "wc_weaknesses": "169;571;350;194;74", "wc_questions": "217;32;238;131;552", "wc_limitations": "93;18;143;100;3", "wc_review": "631;682;952;607;738", "wc_reply_reviewers": "0;253;420;62;141", "wc_reply_authors": "0;327;1166;0;62", "reply_reviewers": "0;2;2;1;1", "reply_authors": "1;3;5;1;2", "rating_avg": [ 6.0, 1.2649110640673518 ], "confidence_avg": [ 2.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 62.6, 14.416657032752079 ], "wc_strengths_avg": [ 82.4, 47.79790790400769 ], "wc_weaknesses_avg": [ 271.6, 174.0006896538057 ], "wc_questions_avg": [ 234.0, 174.83821092655919 ], "wc_limitations_avg": [ 71.4, 52.803787742926175 ], "wc_review_avg": [ 722.0, 123.50060728595629 ], "wc_reply_reviewers_avg": [ 175.2, 148.76746956240132 ], "wc_reply_authors_avg": [ 311.0, 444.2350729062261 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.4, 1.4966629547095764 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6772890805512157037&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "purdue.edu;amazon.com;purdue.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Purdue University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.purdue.edu;https://www.amazon.com", "aff_unique_abbr": "Purdue;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Self-Supervised Learning of Representations for Space Generates Multi-Modular Grid Cells", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72628", "id": "8ox2vrQiTF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4846257e355f6923fc2a1fbe35099e91-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8ox2vrQiTF", "openreview": "https://openreview.net/forum?id=8ox2vrQiTF", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72628", "video": "https://nips.cc/virtual/2023/poster/72628", "author_site": "Rylan Schaeffer, Mikail Khona, Tzuhsuan Ma, Cristobal Eyzaguirre, Sanmi Koyejo, Ila Fiete", "tldr": "", "abstract": "To solve the spatial problems of mapping, localization and navigation, the mammalian lineage has developed striking spatial representations. One important spatial representation is the Nobel-prize winning grid cells: neurons that represent self-location, a local and aperiodic quantity, with seemingly bizarre non-local and spatially periodic activity patterns of a few discrete periods. Why has the mammalian lineage learnt this peculiar grid representation? Mathematical analysis suggests that this multi-periodic representation has excellent properties as an algebraic code with high capacity and intrinsic error-correction, but to date, synthesis of multi-modular grid cells in deep recurrent neural networks remains absent. In this work, we begin by identifying key insights from four families of approaches to answering the grid cell question: dynamical systems, coding theory, function optimization and supervised deep learning. We then leverage our insights to propose a new approach that elegantly combines the strengths of all four approaches. Our approach is a self-supervised learning (SSL) framework - including data, data augmentations, loss functions and a network architecture - motivated from a normative perspective, with no access to supervised position information. Without making assumptions about internal or readout representations, we show that multiple grid cell modules can emerge in networks trained on our SSL framework and that the networks generalize significantly beyond their training distribution. This work contains insights for neuroscientists interested in the origins of grid cells as well as machine learning researchers interested in novel SSL frameworks.", "keywords": "self-supervised learning;unsupervised learning;grid cells;neuroscience;systems neuroscience;representation learning", "primary_area": "", "supplementary_material": "/attachment/35f82169480e512af3a9d33416260c555f156811.pdf", "author": "Rylan Schaeffer;Mikail Khona;Tzuhsuan Ma;Cristobal Eyzaguirre;Sanmi Koyejo;Ila R Fiete", "authorids": "~Rylan_Schaeffer2;~Mikail_Khona2;~Tzuhsuan_Ma1;~Cristobal_Eyzaguirre1;~Sanmi_Koyejo1;~Ila_R_Fiete1", "gender": "M;;M;;;F", "homepage": "https://rylanschaeffer.github.io;;https://www.janelia.org/people/tzuhsuan-ma;;;https://fietelab.mit.edu/", "dblp": "280/1341;;;;;", "google_scholar": "6tMEGz8AAAAJ;;https://scholar.google.com/citations?hl=en;;;uE-CihIAAAAJ", "orcid": ";;0000-0002-6099-9966;;;0000-0003-4738-2539", "linkedin": "rylanschaeffer/;;tzuhsuan-ma-888759154/;;;", "or_profile": "~Rylan_Schaeffer2;~Mikail_Khona2;~Tzuhsuan_Ma1;~Cristobal_Eyzaguirre1;~Sanmi_Koyejo1;~Ila_R_Fiete1", "aff": "Massachusetts Institute of Technology;;HHMI Janelia Research Campus;;;Massachusetts Institute of Technology", "aff_domain": "mit.edu;;janelia.hhmi.org;;;mit.edu", "position": "Researcher;;Postdoc;;;Professor", "bibtex": "@inproceedings{\nschaeffer2023selfsupervised,\ntitle={Self-Supervised Learning of Representations for Space Generates Multi-Modular Grid Cells},\nauthor={Rylan Schaeffer and Mikail Khona and Tzuhsuan Ma and Cristobal Eyzaguirre and Sanmi Koyejo and Ila R Fiete},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8ox2vrQiTF}\n}", "github": "", "project": "", "reviewers": "dHUQ;1Nr3;keKQ;cRXg", "pdf_size": 10409998, "rating": "6;7;7;8", "confidence": "4;3;3;3", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "2;3;4;3", "wc_summary": "93;88;74;87", "wc_strengths": "116;15;95;41", "wc_weaknesses": "242;62;191;37", "wc_questions": "156;79;89;73", "wc_limitations": "9;1;1;2", "wc_review": "616;245;450;240", "wc_reply_reviewers": "178;0;97;66", "wc_reply_authors": "532;91;215;432", "reply_reviewers": "3;0;1;1", "reply_authors": "4;2;3;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 85.5, 7.0178344238090995 ], "wc_strengths_avg": [ 66.75, 40.511572420729365 ], "wc_weaknesses_avg": [ 133.0, 85.88073124979782 ], "wc_questions_avg": [ 99.25, 33.25939716831921 ], "wc_limitations_avg": [ 3.25, 3.344772040064913 ], "wc_review_avg": [ 387.75, 156.66903810261937 ], "wc_reply_reviewers_avg": [ 85.25, 63.989745272191854 ], "wc_reply_authors_avg": [ 317.5, 173.87423615935742 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15522054267905040469&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mit.edu;;janelia.hhmi.org;;;mit.edu", "author_num": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;HHMI Janelia Research Campus", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.janelia.org", "aff_unique_abbr": "MIT;HHMI Janelia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Janelia", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "FLSL: Feature-level Self-supervised Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72627", "id": "8pOBo5NgTQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/15212bd2265c4a3ab0dbc1b1982c1b69-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8pOBo5NgTQ", "openreview": "https://openreview.net/forum?id=8pOBo5NgTQ", "poster": "/media/PosterPDFs/NeurIPS%202023/72627.png?t=1701463231.1539419", "slides": "https://nips.cc/virtual/2023/poster/72627", "video": "https://nips.cc/virtual/2023/poster/72627", "author_site": "Qing Su, Anton Netchaev, Hai Li, Shihao Ji", "tldr": "", "abstract": "Current self-supervised learning (SSL) methods (e.g., SimCLR, DINO, VICReg, MOCOv3) target primarily on representations at instance level and do not generalize well to dense prediction tasks, such as object detection and segmentation. Towards aligning SSL with dense predictions, this paper demonstrates for the first time the underlying mean-shift clustering process of Vision Transformers (ViT), which aligns well with natural image semantics (e.g., a world of objects and stuffs). By employing transformer for joint embedding and clustering, we propose a bi-level feature clustering SSL method, coined Feature-Level Self-supervised Learning (FLSL). We present the formal definition of the FLSL problem and construct the objectives from the mean-shift and k-means perspectives. We show that FLSL promotes remarkable semantic cluster representations and learns an embedding scheme amenable to intra-view and inter-view feature clustering. Experiments show that FLSL yields significant improvements in dense prediction tasks, achieving 44.9 (+2.8)% AP and 46.5% AP in object detection, as well as 40.8 (+2.3)% AP and 42.1% AP in instance segmentation on MS-COCO, using Mask R-CNN with ViT-S/16 and ViT-S/8 as backbone, respectively. FLSL consistently outperforms existing SSL methods across additional benchmarks, including UAV object detection on UAVDT, and video instance segmentation on DAVIS 2017. We conclude by presenting visualization and various ablation studies to better understand the success of FLSL. The source code is available at https://github.com/ISL-CV/FLSL.", "keywords": "Transformer;ViT;Dense Prediction;Self-supervised Learning;Mean Shift;Self-attention;Representation learning", "primary_area": "", "supplementary_material": "/attachment/a69c66ad7497d9f750adc1db1416a1d3a204feaf.pdf", "author": "Qing Su;Anton Netchaev;Hai Li;Shihao Ji", "authorids": "~Qing_Su1;anton.netchaev@erdc.dren.mil;~Hai_Li1;~Shihao_Ji1", "gender": ";;F;", "homepage": ";;https://ece.duke.edu/faculty/hai-helen-li;", "dblp": "81/4659;;30/5330-1;", "google_scholar": "Pz9s8e4AAAAJ;;E6Tpfq8AAAAJ;", "orcid": ";;0000-0003-3228-6544;", "linkedin": ";;;", "or_profile": "~Qing_Su1;anton.netchaev@erdc.dren.mil;~Hai_Li1;~Shihao_Ji1", "aff": ";;Duke University;", "aff_domain": ";;duke.edu;", "position": ";;Professor;", "bibtex": "@inproceedings{\nsu2023flsl,\ntitle={{FLSL}: Feature-level Self-supervised Learning},\nauthor={Qing Su and Anton Netchaev and Hai Li and Shihao Ji},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8pOBo5NgTQ}\n}", "github": "", "project": "", "reviewers": "asnD;rt9u;Hfeu;taUM;ggdQ", "pdf_size": 19978325, "rating": "5;5;6;7;7", "confidence": "4;2;4;5;3", "soundness": "3;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;3;2;3;3", "wc_summary": "81;92;112;45;126", "wc_strengths": "21;14;189;59;65", "wc_weaknesses": "128;161;244;256;68", "wc_questions": "63;59;23;2;106", "wc_limitations": "6;6;12;4;34", "wc_review": "299;332;580;366;399", "wc_reply_reviewers": "6;0;97;13;0", "wc_reply_authors": "0;0;840;0;0", "reply_reviewers": "1;0;1;1;0", "reply_authors": "1;1;3;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 91.2, 27.86682615584344 ], "wc_strengths_avg": [ 69.6, 62.9971427923521 ], "wc_weaknesses_avg": [ 171.4, 70.86776418090244 ], "wc_questions_avg": [ 50.6, 35.82513084414347 ], "wc_limitations_avg": [ 12.4, 11.128342194594845 ], "wc_review_avg": [ 395.2, 98.25151398324608 ], "wc_reply_reviewers_avg": [ 23.2, 37.20967616091277 ], "wc_reply_authors_avg": [ 168.0, 336.0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4385290096535146, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11324798209820086255&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 5, "email": ";;duke.edu;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "One-for-All: Bridge the Gap Between Heterogeneous Architectures in Knowledge Distillation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72626", "id": "8qePPvL1VY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fb8e5f198c7a5dcd48860354e38c0edc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8qePPvL1VY", "openreview": "https://openreview.net/forum?id=8qePPvL1VY", "poster": "/media/PosterPDFs/NeurIPS%202023/72626.png?t=1697251256.8514507", "slides": "https://nips.cc/virtual/2023/poster/72626", "video": "https://nips.cc/virtual/2023/poster/72626", "author_site": "Zhiwei Hao, Jianyuan Guo, Kai Han, Yehui Tang, Han Hu, Yunhe Wang, Chang Xu", "tldr": "", "abstract": "Knowledge distillation (KD) has proven to be a highly effective approach for enhancing model performance through a teacher-student training scheme. However, most existing distillation methods are designed under the assumption that the teacher and student models belong to the same model family, particularly the hint-based approaches. By using centered kernel alignment (CKA) to compare the learned features between heterogeneous teacher and student models, we observe significant feature divergence. This divergence illustrates the ineffectiveness of previous hint-based methods in cross-architecture distillation. To tackle the challenge in distilling heterogeneous models, we propose a simple yet effective one-for-all KD framework called OFA-KD, which significantly improves the distillation performance between heterogeneous architectures. Specifically, we project intermediate features into an aligned latent space such as the logits space, where architecture-specific information is discarded. Additionally, we introduce an adaptive target enhancement scheme to prevent the student from being disturbed by irrelevant information. Extensive experiments with various architectures, including CNN, Transformer, and MLP, demonstrate the superiority of our OFA-KD framework in enabling distillation between heterogeneous architectures. Specifically, when equipped with our OFA-KD, the student models achieve notable performance improvements, with a maximum gain of 8.0% on the CIFAR-100 dataset and 0.7% on the ImageNet-1K dataset. PyTorch code and checkpoints can be found at https://github.com/Hao840/OFAKD.", "keywords": "knowledge distillation;feature distillation;heterogeneous architectures", "primary_area": "", "supplementary_material": "/attachment/ddf503af07325b115ef886a972d02114d0ff7663.pdf", "author": "Zhiwei Hao;Jianyuan Guo;Kai Han;Yehui Tang;Han Hu;Yunhe Wang;Chang Xu", "authorids": "~Zhiwei_Hao1;~Jianyuan_Guo1;~Kai_Han2;~Yehui_Tang1;~Han_Hu6;~Yunhe_Wang1;~Chang_Xu4", "gender": ";M;M;M;;M;", "homepage": ";https://ggjy.github.io/;https://iamhankai.github.io;;;https://www.wangyunhe.site/;", "dblp": "125/5604;190/0258;51/4757-2;244/9659;;63/8217-1;", "google_scholar": "MwDSTNAAAAAJ;https://scholar.google.com/citations?hl=en;vThoBVcAAAAJ;TkSZQ6gAAAAJ;;https://scholar.google.com.sg/citations?user=isizOkYAAAAJ;", "orcid": ";;0000-0002-9761-2702;;;0000-0002-0142-509X;", "linkedin": ";;;;;;", "or_profile": "~Zhiwei_Hao1;~Jianyuan_Guo1;~Kai_Han2;~Yehui_Tang1;~Han_Hu6;~Yunhe_Wang1;~Chang_Xu4", "aff": "Beijing Institute of Technology;University of Sydney;Institute of Software, Chinese Academy of Sciences;Peking University;;Huawei Noah's Ark Lab;", "aff_domain": "bit.edu.cn;usyd.edu.au;ios.ac.cn;pku.edu.cn;;huawei.com;", "position": "PhD student;PhD student;PhD student;PhD student;;Principal Researcher;", "bibtex": "@inproceedings{\nhao2023oneforall,\ntitle={One-for-All: Bridge the Gap Between Heterogeneous Architectures in Knowledge Distillation},\nauthor={Zhiwei Hao and Jianyuan Guo and Kai Han and Yehui Tang and Han Hu and Yunhe Wang and Chang Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8qePPvL1VY}\n}", "github": "", "project": "", "reviewers": "FwZi;R7Hg;d3Lq;sLfN", "pdf_size": 5029874, "rating": "5;6;7;7", "confidence": "5;5;5;4", "soundness": "2;2;3;3", "novelty": "3;2;3;3", "presentation": "2;2;4;3", "wc_summary": "65;50;124;102", "wc_strengths": "72;36;118;81", "wc_weaknesses": "162;162;192;100", "wc_questions": "6;53;4;155", "wc_limitations": "6;4;10;10", "wc_review": "311;305;448;448", "wc_reply_reviewers": "0;34;0;0", "wc_reply_authors": "0;51;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 85.25, 29.303370113350443 ], "wc_strengths_avg": [ 76.75, 29.166547618804664 ], "wc_weaknesses_avg": [ 154.0, 33.49626844888845 ], "wc_questions_avg": [ 54.5, 61.2474489264655 ], "wc_limitations_avg": [ 7.5, 2.598076211353316 ], "wc_review_avg": [ 378.0, 70.03213548079196 ], "wc_reply_reviewers_avg": [ 8.5, 14.722431864335457 ], "wc_reply_authors_avg": [ 12.75, 22.083647796503186 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 79, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6323822815471276595&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "bit.edu.cn;usyd.edu.au;ios.ac.cn;pku.edu.cn;;huawei.com;", "author_num": 7, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Beijing Institute of Technology;University of Sydney;Chinese Academy of Sciences;Peking University;Huawei", "aff_unique_dep": ";;Institute of Software;;Noah's Ark Lab", "aff_unique_url": "http://www.bit.edu.cn/;https://www.sydney.edu.au;http://www.ios.ac.cn;http://www.pku.edu.cn;https://www.huawei.com", "aff_unique_abbr": "BIT;USYD;CAS;Peking U;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Australia" }, { "title": "Constant Approximation for Individual Preference Stable Clustering", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72625", "id": "8rDbUoYc0p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/881259965dacb9f42967aae84a157283-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8rDbUoYc0p", "openreview": "https://openreview.net/forum?id=8rDbUoYc0p", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72625", "video": "https://nips.cc/virtual/2023/poster/72625", "author_site": "Anders Aamand, Justin Chen, Allen Liu, Sandeep Silwal, Pattara Sukprasert, Ali Vakilian, Fred Zhang", "tldr": "", "abstract": "Individual preference (IP) stability, introduced by Ahmadi et al. (ICML 2022), is a natural clustering objective inspired by stability and fairness constraints. A clustering is $\\alpha$-IP stable if the average distance of every data point to its own cluster is at most $\\alpha$ times the average distance to any other cluster. Unfortunately, determining if a dataset admits a $1$-IP stable clustering is NP-Hard. Moreover, before this work, it was unknown if an $o(n)$-IP stable clustering always exists, as the prior state of the art only guaranteed an $O(n)$-IP stable clustering. We close this gap in understanding and show that an $O(1)$-IP stable clustering always exists for general metrics, and we give an efficient algorithm which outputs such a clustering. We also introduce generalizations of IP stability beyond average distance and give efficient near optimal algorithms in the cases where we consider the maximum and minimum distances within and between clusters.", "keywords": "clustering;fairness;approximation algorithms", "primary_area": "", "supplementary_material": "/attachment/179505fb8c0860a64bb47a8bf3c49e5b50186404.zip", "author": "Anders Aamand;Justin Y. Chen;Allen Liu;Sandeep Silwal;Pattara Sukprasert;Ali Vakilian;Fred Zhang", "authorids": "~Anders_Aamand1;~Justin_Y._Chen1;~Allen_Liu1;~Sandeep_Silwal1;~Pattara_Sukprasert2;~Ali_Vakilian1;~Fred_Zhang1", "gender": "M;M;M;M;;M;M", "homepage": "https://www.andersaamand.com/;https://aliu42.github.io/;https://sandeepsilwal.com;http://pattaras.github.io/;http://www.mit.edu/~vakilian/;http://fredzhang.me/;https://people.csail.mit.edu/justc/", "dblp": "205/2416;225/5531.html;225/4637;;116/4679;232/9071;254/0805.html", "google_scholar": "WpIvLroAAAAJ;;MnDnUvcAAAAJ;https://scholar.google.co.th/citations?user=jeQidJUAAAAJ;uXZaVaAAAAAJ;guJ_kBQAAAAJ;X_myU1YAAAAJ", "orcid": "0000-0002-0402-0514;;;;0000-0001-5049-7594;;", "linkedin": ";allen-liu-284b28127/;;;;fred-zhang-0/;", "or_profile": "~Anders_Aamand1;~Allen_Liu1;~Sandeep_Silwal1;~Pattara_Sukprasert2;~Ali_Vakilian1;~Fred_Zhang1;~Justin_Y_Chen1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;;Toyota Technological Institute at Chicago;University of California, Berkeley;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;;ttic.edu;berkeley.edu;mit.edu", "position": "Postdoc;PhD student;PhD student;;Research Assistant Professor;PhD student;PhD student", "bibtex": "@inproceedings{\naamand2023constant,\ntitle={Constant Approximation for Individual Preference Stable Clustering},\nauthor={Anders Aamand and Justin Y. Chen and Allen Liu and Sandeep Silwal and Pattara Sukprasert and Ali Vakilian and Fred Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8rDbUoYc0p}\n}", "github": "", "project": "", "reviewers": "kydQ;F6zy;zmPi;d1bb", "pdf_size": 1192868, "rating": "7;7;7;7", "confidence": "3;5;3;5", "soundness": "4;4;4;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "443;255;204;219", "wc_strengths": "257;193;109;50", "wc_weaknesses": "196;112;68;80", "wc_questions": "120;165;63;74", "wc_limitations": "2;9;18;9", "wc_review": "1018;734;462;432", "wc_reply_reviewers": "67;82;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 280.25, 95.77414839088887 ], "wc_strengths_avg": [ 152.25, 78.99169260118434 ], "wc_weaknesses_avg": [ 114.0, 50.0 ], "wc_questions_avg": [ 105.5, 40.46294601237038 ], "wc_limitations_avg": [ 9.5, 5.678908345800274 ], "wc_review_avg": [ 661.5, 237.07540994375609 ], "wc_reply_reviewers_avg": [ 37.25, 37.62562291843153 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1582069732512720219&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mit.edu;mit.edu;mit.edu;;ttic.edu;berkeley.edu;mit.edu", "author_num": 7, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Massachusetts Institute of Technology;Toyota Technological Institute at Chicago;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.tti-chicago.org;https://www.berkeley.edu", "aff_unique_abbr": "MIT;TTI Chicago;UC Berkeley", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Chicago;Berkeley", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Intrinsic Dimension Estimation for Robust Detection of AI-Generated Texts", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72624", "id": "8uOZ0kNji6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7baa48bc166aa2013d78cbdc15010530-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8uOZ0kNji6", "openreview": "https://openreview.net/forum?id=8uOZ0kNji6", "poster": "/media/PosterPDFs/NeurIPS%202023/72624.png?t=1701451368.9923096", "slides": "https://nips.cc/virtual/2023/poster/72624", "video": "https://nips.cc/virtual/2023/poster/72624", "author_site": "Eduard Tulchinskii, Kristian Kuznetsov, Laida Kushnareva, Daniil Cherniavskii, Sergey Nikolenko, Evgeny Burnaev, Serguei Barannikov, Irina Piontkovskaya", "tldr": "", "abstract": "Rapidly increasing quality of AI-generated content makes it difficult to distinguish between human and AI-generated texts, which may lead to undesirable consequences for society. Therefore, it becomes increasingly important to study the properties of human texts that are invariant over text domains and various proficiency of human writers, can be easily calculated for any language, and can robustly separate natural and AI-generated texts regardless of the generation model and sampling method. In this work, we propose such an invariant of human texts, namely the intrinsic dimensionality of the manifold underlying the set of embeddings of a given text sample. We show that the average intrinsic dimensionality of fluent texts in natural language is hovering around the value $9$ for several alphabet-based languages and around $7$ for Chinese, while the average intrinsic dimensionality of AI-generated texts for each language is $\\approx 1.5$ lower, with a clear statistical separation between human-generated and AI-generated distributions. This property allows us to build a score-based artificial text detector. The proposed detector's accuracy is stable over text domains, generator models, and human writer proficiency levels, outperforming SOTA detectors in model-agnostic and cross-domain scenarios by a significant margin.", "keywords": "generated texts detection;intrinsic dimension;TDA;Persistent Homology;ChatGPT", "primary_area": "", "supplementary_material": "/attachment/06661950755456b01325e80e1be15a9eb16bd789.zip", "author": "Eduard Tulchinskii;Kristian Kuznetsov;Kushnareva Laida;Daniil Cherniavskii;Sergey Nikolenko;Evgeny Burnaev;Serguei Barannikov;Irina Piontkovskaya", "authorids": "~Eduard_Tulchinskii1;~Kristian_Kuznetsov1;~Kushnareva_Laida1;~Daniil_Cherniavskii1;~Sergey_Nikolenko1;~Evgeny_Burnaev1;~Serguei_Barannikov1;~Irina_Piontkovskaya2", "gender": "M;M;F;M;M;M;;F", "homepage": ";;https://m.vk.com/kushnareva_laida;https://www.dcherniavskii.com/;http://logic.pdmi.ras.ru/~sergey/;http://faculty.skoltech.ru/people/evgenyburnaev;;", "dblp": "320/8026;334/7843;276/6159;;50/1870.html;144/7845;255/5203;211/7823", "google_scholar": "https://scholar.google.com/citations?hl=ru;keUwPTwAAAAJ;rsTb_hYAAAAJ;646PbvoAAAAJ;https://scholar.google.ru/citations?hl=ru;https://scholar.google.ru/citations?user=pCRdcOwAAAAJ;https://scholar.google.fr/citations?user=-soT8KcAAAAJ;", "orcid": ";;;;0000-0001-7787-2251;0000-0001-8424-0690;0000-0002-9323-0651;", "linkedin": ";;laida-kushnareva/;https://linkedin.com/in/daniil-cherniavskii-a2861128a;;;;irina-piontkovskaya-6b10b0b5/?originalSubdomain=ru", "or_profile": "~Eduard_Tulchinskii1;~Kristian_Kuznetsov1;~Kushnareva_Laida1;~Daniil_Cherniavskii1;~Sergey_Nikolenko1;~Evgeny_Burnaev1;~Serguei_Barannikov1;~Irina_Piontkovskaya2", "aff": "Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology;Huawei;University of Amsterdam;Steklov Institute of Mathematics at St. Petersburg;Skolkovo Institute of Science and Technology;CNRS, Institut Mathematiques de Jussieu, Paris Diderot University;Huawei Technologies Ltd.", "aff_domain": "skoltech.ru;skoltech.ru;huawei-partners.com;uva.nl;pdmi.ras.ru;skoltech.ru;imj-prg.fr;huawei.com", "position": "PhD student;MS student;Senior Academic Consultant;PhD student;Assistant Professor;Full Professor;Researcher;Researcher", "bibtex": "@inproceedings{\ntulchinskii2023intrinsic,\ntitle={Intrinsic Dimension Estimation for Robust Detection of {AI}-Generated Texts},\nauthor={Eduard Tulchinskii and Kristian Kuznetsov and Kushnareva Laida and Daniil Cherniavskii and Sergey Nikolenko and Evgeny Burnaev and Serguei Barannikov and Irina Piontkovskaya},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8uOZ0kNji6}\n}", "github": "", "project": "", "reviewers": "SSuo;98X7;NYsD;oVCJ", "pdf_size": 1301321, "rating": "5;7;7;7", "confidence": "3;3;4;3", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "3;3;3;2", "wc_summary": "51;157;128;207", "wc_strengths": "31;57;145;142", "wc_weaknesses": "27;130;499;332", "wc_questions": "44;1;104;147", "wc_limitations": "3;33;50;5", "wc_review": "156;378;926;833", "wc_reply_reviewers": "0;44;313;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 135.75, 56.50387154877089 ], "wc_strengths_avg": [ 93.75, 50.60323606252865 ], "wc_weaknesses_avg": [ 247.0, 182.22101964372825 ], "wc_questions_avg": [ 74.0, 55.80770556114989 ], "wc_limitations_avg": [ 22.75, 19.702474463883973 ], "wc_review_avg": [ 573.25, 317.85324837100535 ], "wc_reply_reviewers_avg": [ 92.75, 128.15103394042515 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 82, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14223206596892686635&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 9, "email": "skoltech.ru;skoltech.ru;huawei-partners.com;uva.nl;pdmi.ras.ru;skoltech.ru;imj-prg.fr;huawei.com", "author_num": 8, "aff_unique_index": "0;0;1;2;3;0;4;1", "aff_unique_norm": "Skolkovo Institute of Science and Technology;Huawei;University of Amsterdam;Steklov Institute of Mathematics;Paris Diderot University", "aff_unique_dep": ";Huawei Technologies Co., Ltd.;;Mathematics;Institut Mathematiques de Jussieu", "aff_unique_url": "https://www.skoltech.ru;https://www.huawei.com;https://www.uva.nl;http://www.pdmi.ras.ru;https://www.univ-paris-diderot.fr", "aff_unique_abbr": "Skoltech;Huawei;UvA;PDMI;Paris Diderot", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";St. Petersburg;Paris", "aff_country_unique_index": "0;0;1;2;0;0;3;1", "aff_country_unique": "Russian Federation;China;Netherlands;France" }, { "title": "OpenMask3D: Open-Vocabulary 3D Instance Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72623", "id": "8vuDHCxrmy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d77b5482e38339a8068791d939126be2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8vuDHCxrmy", "openreview": "https://openreview.net/forum?id=8vuDHCxrmy", "poster": "/media/PosterPDFs/NeurIPS%202023/72623.png?t=1699620640.613892", "slides": "https://nips.cc/virtual/2023/poster/72623", "video": "https://nips.cc/virtual/2023/poster/72623", "author_site": "Ayca Takmaz, Elisabetta Fedele, Robert Sumner, Marc Pollefeys, Federico Tombari, Francis Engelmann", "tldr": "", "abstract": "We introduce the task of open-vocabulary 3D instance segmentation. Current approaches for 3D instance segmentation can typically only recognize object categories from a pre-defined closed set of classes that are annotated in the training datasets. This results in important limitations for real-world applications where one might need to perform tasks guided by novel, open-vocabulary queries related to a wide variety of objects. Recently, open-vocabulary 3D scene understanding methods have emerged to address this problem by learning queryable features for each point in the scene. While such a representation can be directly employed to perform semantic segmentation, existing methods cannot separate multiple object instances. In this work, we address this limitation, and propose OpenMask3D, which is a zero-shot approach for open-vocabulary 3D instance segmentation. Guided by predicted class-agnostic 3D instance masks, our model aggregates per-mask features via multi-view fusion of CLIP-based image embeddings. Experiments and ablation studies on ScanNet200 and Replica show that OpenMask3D outperforms other open-vocabulary methods, especially on the long-tail distribution. Qualitative experiments further showcase OpenMask3D\u2019s ability to segment object properties based on free-form queries describing geometry, affordances, and materials.", "keywords": "open-world;open-vocabulary;3D vision;point cloud;instance segmentation;3D instance segmentation", "primary_area": "", "supplementary_material": "/attachment/0a93e07550973097b0ab06f6cf981d4aa22a6e84.zip", "author": "Ay\u00e7a Takmaz;Elisabetta Fedele;Robert Sumner;Marc Pollefeys;Federico Tombari;Francis Engelmann", "authorids": "~Ay\u00e7a_Takmaz1;~Elisabetta_Fedele1;~Robert_Sumner1;~Marc_Pollefeys2;~Federico_Tombari1;~Francis_Engelmann1", "gender": "F;F;M;M;M;", "homepage": "https://aycatakmaz.github.io/;;https://studios.disneyresearch.com/people/bob-sumner/;;https://federicotombari.github.io/;", "dblp": "282/3873;;;p/MarcPollefeys;16/3539;", "google_scholar": "q8cyjtcAAAAJ;;;YYH0BjEAAAAJ;TFsE4BIAAAAJ;", "orcid": ";;;;0000-0001-5598-5212;", "linkedin": ";elisabetta-f-0a35bb113/;;marc-pollefeys-30a7075/;fedet/;", "or_profile": "~Ay\u00e7a_Takmaz1;~Elisabetta_Fedele1;~Robert_Sumner1;~Marc_Pollefeys2;~Federico_Tombari1;~Francis_Engelmann1", "aff": "ETH Zurich;EPFL - EPF Lausanne;Disney Research, Disney Research;Swiss Federal Institute of Technology;Technical University Munich (TUM);", "aff_domain": "inf.ethz.ch;epfl.ch;disneyresearch.com;ethz.ch;in.tum.de;", "position": "PhD student;Intern;Principal Researcher;Full Professor;Lecturer;", "bibtex": "@inproceedings{\ntakmaz2023openmaskd,\ntitle={OpenMask3D: Open-Vocabulary 3D Instance Segmentation},\nauthor={Ay{\\c{c}}a Takmaz and Elisabetta Fedele and Robert Sumner and Marc Pollefeys and Federico Tombari and Francis Engelmann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8vuDHCxrmy}\n}", "github": "", "project": "", "reviewers": "zuiw;X8Ds;STxk;cqVz;7yua", "pdf_size": 35310544, "rating": "4;4;5;5;6", "confidence": "3;4;3;5;4", "soundness": "3;2;3;3;3", "novelty": "3;2;2;3;2", "presentation": "3;3;3;4;3", "wc_summary": "63;39;124;76;157", "wc_strengths": "124;32;124;43;73", "wc_weaknesses": "164;133;211;99;540", "wc_questions": "45;58;58;86;165", "wc_limitations": "1;2;43;13;29", "wc_review": "397;264;560;317;964", "wc_reply_reviewers": "0;0;11;0;181", "wc_reply_authors": "60;60;0;60;186", "reply_reviewers": "0;0;1;0;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 4.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 91.8, 42.78971839122104 ], "wc_strengths_avg": [ 79.2, 38.96357273146291 ], "wc_weaknesses_avg": [ 229.4, 159.61528748838566 ], "wc_questions_avg": [ 82.4, 43.41704734318077 ], "wc_limitations_avg": [ 17.6, 16.218507946170636 ], "wc_review_avg": [ 500.4, 252.46354192239323 ], "wc_reply_reviewers_avg": [ 38.4, 71.4271657004532 ], "wc_reply_authors_avg": [ 73.2, 60.99967213026641 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.28571428571428564, "gs_citation": 173, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1354431204285222970&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "inf.ethz.ch;epfl.ch;disneyresearch.com;ethz.ch;in.tum.de;", "author_num": 6, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "ETH Zurich;EPFL;Disney Research;Swiss Federal Institute of Technology;Technical University Munich", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ethz.ch;https://www.epfl.ch;https://research.disney.com;https://www.ethz.ch;https://www.tum.de", "aff_unique_abbr": "ETHZ;EPFL;Disney Research;ETH Zurich;TUM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;0;1;0;2", "aff_country_unique": "Switzerland;United States;Germany" }, { "title": "TMT-VIS: Taxonomy-aware Multi-dataset Joint Training for Video Instance Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72622", "id": "8xTOtxinMH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7a62d9a4c03377d1175b8859b4cc16d4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8xTOtxinMH", "openreview": "https://openreview.net/forum?id=8xTOtxinMH", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72622", "video": "https://nips.cc/virtual/2023/poster/72622", "author_site": "Rongkun Zheng, Lu Qi, Xi Chen, Yi Wang, Kun Wang, Yu Qiao, Hengshuang Zhao", "tldr": "", "abstract": "Training on large-scale datasets can boost the performance of video instance segmentation while the annotated datasets for VIS are hard to scale up due to the high labor cost. What we possess are numerous isolated filed-specific datasets, thus, it is appealing to jointly train models across the aggregation of datasets to enhance data volume and diversity. However, due to the heterogeneity in category space, as mask precision increase with the data volume, simply utilizing multiple datasets will dilute the attention of models on different taxonomy. Thus, increasing the data scale and enriching taxonomy space while improving classification precision is important. In this work, we analyze that providing extra taxonomy information can help models concentrate on specific taxonomy, and propose our model named Taxonomy-aware Multi-dataset Joint Training for Video Instance Segmentation (TMT-VIS) to address this vital challenge. Specifically, we design a two-stage taxonomy aggregation module that first compiles taxonomy information from input videos and then aggregates these taxonomy priors into instance queries before the transformer decoder. We conduct extensive experimental evaluations on four popular and challenging benchmarks, including YouTube-VIS 2019, YouTube-VIS 2021, OVIS, and UVO. Our model shows significant improvement over the baseline solutions, and sets new state-of-the-art records on all these benchmarks. These appealing and encouraging results demonstrate the effectiveness and generality of our proposed approach. The code and trained models will be publicly available.", "keywords": "taxonomy-aware;multiple-datasets;video instance segementation", "primary_area": "", "supplementary_material": "/attachment/edc0657fb6a75cb9543bf1d8849d1fde6815be30.zip", "author": "rongkun Zheng;Lu Qi;Xi Chen;Yi Wang;Kun Wang;Yu Qiao;Hengshuang Zhao", "authorids": "~rongkun_Zheng1;~Lu_Qi1;~Xi_Chen30;~Yi_Wang19;~Kun_Wang8;~Yu_Qiao1;~Hengshuang_Zhao2", "gender": "M;M;M;M;M;;M", "homepage": ";https://www.luqi.info;;https://shepnerd.github.io/;https://twitter.com/wk910930;;https://hszhao.github.io", "dblp": ";;;17/221-33;;;185/7848", "google_scholar": ";https://scholar.google.com.hk/citations?user=SSI90d4AAAAJ;INISnXkAAAAJ;Xm2M8UwAAAAJ;;;4uE10I0AAAAJ", "orcid": ";;;;;;0000-0001-8277-2706", "linkedin": "rongkun-zheng-4886b6153/;;;;;;hengshuang-zhao-347b8391/?originalSubdomain=hk", "or_profile": "~rongkun_Zheng1;~Lu_Qi1;~Xi_Chen30;~Yi_Wang19;~Kun_Wang8;~Yu_Qiao1;~Hengshuang_Zhao2", "aff": "University of Hong Kong;University of California, Merced;the University of Hong Kong, University of Hong Kong;Shanghai AI Laboratory;SenseTime Group Ltd;;The University of Hong Kong", "aff_domain": "hku.hk;ucmerced.edu;cs.hku.hk;pjlab.org.cn;sensetime.com;;hku.hk", "position": "PhD student;Postdoc;PhD student;Researcher;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nzheng2023tmtvis,\ntitle={{TMT}-{VIS}: Taxonomy-aware Multi-dataset Joint Training for Video Instance Segmentation},\nauthor={rongkun Zheng and Lu Qi and Xi Chen and Yi Wang and Kun Wang and Yu Qiao and Hengshuang Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8xTOtxinMH}\n}", "github": "", "project": "", "reviewers": "VJ6M;k3N8;wd7m;fxcA;5w4z", "pdf_size": 1779673, "rating": "4;5;5;6;6", "confidence": "5;4;4;4;5", "soundness": "2;3;3;3;1", "novelty": "2;2;3;2;1", "presentation": "3;3;3;3;1", "wc_summary": "112;43;43;60;43", "wc_strengths": "45;29;50;76;16", "wc_weaknesses": "40;218;172;124;231", "wc_questions": "136;9;32;32;8", "wc_limitations": "4;13;27;1;13", "wc_review": "337;312;324;293;311", "wc_reply_reviewers": "64;120;150;20;330", "wc_reply_authors": "549;796;322;0;410", "reply_reviewers": "1;1;1;1;4", "reply_authors": "3;2;2;1;4", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 60.2, 26.72377218882095 ], "wc_strengths_avg": [ 43.2, 20.33125672456083 ], "wc_weaknesses_avg": [ 157.0, 69.57010852370435 ], "wc_questions_avg": [ 43.4, 47.478837390989256 ], "wc_limitations_avg": [ 11.6, 9.068627239003707 ], "wc_review_avg": [ 315.4, 14.65059725744995 ], "wc_reply_reviewers_avg": [ 136.8, 106.51272224480981 ], "wc_reply_authors_avg": [ 415.4, 262.3033358537401 ], "reply_reviewers_avg": [ 1.6, 1.2 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.2182178902359923, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17145544452428786565&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "hku.hk;ucmerced.edu;cs.hku.hk;pjlab.org.cn;sensetime.com;;hku.hk", "author_num": 7, "aff_unique_index": "0;1;0;2;3;0", "aff_unique_norm": "University of Hong Kong;University of California, Merced;Shanghai AI Laboratory;SenseTime Group", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.hku.hk;https://www.ucmerced.edu;https://www.shanghai-ai-lab.com;https://www.sensetime.com", "aff_unique_abbr": "HKU;UC Merced;SAIL;SenseTime", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Hong Kong SAR;Merced;", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Training neural operators to preserve invariant measures of chaotic attractors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72621", "id": "8xx0pyMOW1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/57d7e7e1593ad1ab6818c258fa5654ce-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=8xx0pyMOW1", "openreview": "https://openreview.net/forum?id=8xx0pyMOW1", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72621", "video": "https://nips.cc/virtual/2023/poster/72621", "author_site": "Ruoxi Jiang, Peter Y. Lu, Elena Orlova, Rebecca Willett", "tldr": "", "abstract": "Chaotic systems make long-horizon forecasts difficult because small perturbations in initial conditions cause trajectories to diverge at an exponential rate. In this setting, neural operators trained to minimize squared error losses, while capable of accurate short-term forecasts, often fail to reproduce statistical or structural properties of the dynamics over longer time horizons and can yield degenerate results. In this paper, we propose an alternative framework designed to preserve invariant measures of chaotic attractors that characterize the time-invariant statistical properties of the dynamics. Specifically, in the multi-environment setting (where each sample trajectory is governed by slightly different dynamics), we consider two novel approaches to training with noisy data. First, we propose a loss based on the optimal transport distance between the observed dynamics and the neural operator outputs. This approach requires expert knowledge of the underlying physics to determine what statistical features should be included in the optimal transport loss. Second, we show that a contrastive learning framework, which does not require any specialized prior knowledge, can preserve statistical properties of the dynamics nearly as well as the optimal transport approach. On a variety of chaotic systems, our method is shown empirically to preserve invariant measures of chaotic attractors.", "keywords": "Neural operators;contrastive learning;optimal transport;chaotic attractors;invariant measures", "primary_area": "", "supplementary_material": "/attachment/e7116ea62f1aaa9abeaef9c0014a17e33ceb316e.pdf", "author": "Ruoxi Jiang;Peter Y. Lu;Elena Orlova;Rebecca Willett", "authorids": "~Ruoxi_Jiang1;~Peter_Y._Lu1;~Elena_Orlova1;~Rebecca_Willett1", "gender": "F;;;F", "homepage": "https://roxie62.github.io/;;;https://willett.psd.uchicago.edu/", "dblp": "296/0221;;;w/RebeccaWillett", "google_scholar": "fbVZpI4AAAAJ;;;bGRVPl8AAAAJ", "orcid": ";;;0000-0002-8109-7582", "linkedin": "ruoxi-jiang/;;;rebecca-willett-90b95973/", "or_profile": "~Ruoxi_Jiang1;~Peter_Y._Lu1;~Elena_Orlova1;~Rebecca_Willett1", "aff": "University of Chicago;;;University of Chicago", "aff_domain": "uchicago.edu;;;uchicago.edu", "position": "PhD student;;;Full Professor", "bibtex": "@inproceedings{\njiang2023training,\ntitle={Training neural operators to preserve invariant measures of chaotic attractors},\nauthor={Ruoxi Jiang and Peter Y. Lu and Elena Orlova and Rebecca Willett},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=8xx0pyMOW1}\n}", "github": "", "project": "", "reviewers": "tYBd;fKLU;DhyB;G25b;GF1X", "pdf_size": 11062229, "rating": "5;5;6;6;7", "confidence": "3;4;4;4;4", "soundness": "3;2;2;3;2", "novelty": "2;2;3;3;4", "presentation": "3;3;2;4;3", "wc_summary": "53;86;85;70;84", "wc_strengths": "45;20;38;53;77", "wc_weaknesses": "44;28;147;34;292", "wc_questions": "170;80;113;133;33", "wc_limitations": "19;6;7;5;1", "wc_review": "331;220;390;295;487", "wc_reply_reviewers": "48;156;40;0;417", "wc_reply_authors": "37;1084;38;0;1915", "reply_reviewers": "1;1;1;0;3", "reply_authors": "2;3;2;1;5", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 75.6, 12.7216351150314 ], "wc_strengths_avg": [ 46.6, 18.7040102651811 ], "wc_weaknesses_avg": [ 109.0, 101.33508770411164 ], "wc_questions_avg": [ 105.8, 46.645042609049035 ], "wc_limitations_avg": [ 7.6, 6.053098380168622 ], "wc_review_avg": [ 344.6, 90.01022164176689 ], "wc_reply_reviewers_avg": [ 132.2, 151.48914152506111 ], "wc_reply_authors_avg": [ 614.8, 768.7911029662089 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 2.6, 1.3564659966250536 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5345224838248487, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8750765237359626328&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "uchicago.edu;;;uchicago.edu", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uchicago.edu", "aff_unique_abbr": "UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "GUST: Combinatorial Generalization by Unsupervised Grouping with Neuronal Coherence", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72620", "id": "90O5cvFZkZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/67d5c7dd7930dfce2725defdb0552b6e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=90O5cvFZkZ", "openreview": "https://openreview.net/forum?id=90O5cvFZkZ", "poster": "/media/PosterPDFs/NeurIPS%202023/72620.png?t=1701900391.764882", "slides": "https://nips.cc/virtual/2023/poster/72620", "video": "https://nips.cc/virtual/2023/poster/72620", "author_site": "Hao Zheng, Hui Lin, Rong Zhao", "tldr": "", "abstract": "Dynamically grouping sensory information into structured entities is essential for understanding the world of combinatorial nature. However, the grouping ability and therefore combinatorial generalization are still challenging artificial neural networks. Inspired by the evidence that successful grouping is indicated by neuronal coherence in the human brain, we introduce GUST (Grouping Unsupervisely by Spike Timing network), an iterative network architecture with biological constraints to bias the network towards a dynamical state of neuronal coherence that softly reflects the grouping information in the temporal structure of its spiking activity. We evaluate and analyze the model on synthetic datasets. Interestingly, the segregation ability is directly learned from superimposed stimuli with a succinct unsupervised objective. Two learning stages are present, from coarsely perceiving global features to additionally capturing local features. Further, the learned symbol-like building blocks can be systematically composed to represent novel scenes in a bio-plausible manner.", "keywords": "neuronal coherence;combinatorial generalization;perceptual grouping;unsupervised learning", "primary_area": "", "supplementary_material": "/attachment/208603a8dfe287ed5d82ef5ad356d80f2ccf0108.pdf", "author": "Hao Zheng;Hui Lin;Rong Zhao", "authorids": "~Hao_Zheng2;~Hui_Lin5;~Rong_Zhao3", "gender": "M;M;F", "homepage": "https://bcs.mit.edu/directory/hao-zheng;https://www.researchgate.net/profile/Hui-Lin-59;http://faculty.dpi.tsinghua.edu.cn/home/r_zhao", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Hao_Zheng2;~Hui_Lin5;~Rong_Zhao3", "aff": "Tsinghua University;Electronic Engineering, Tsinghua University, Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzheng2023gust,\ntitle={{GUST}: Combinatorial Generalization by Unsupervised Grouping with Neuronal Coherence},\nauthor={Hao Zheng and Hui Lin and Rong Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=90O5cvFZkZ}\n}", "github": "", "project": "", "reviewers": "9PtM;dadF;fSiP;WxQh", "pdf_size": 4156533, "rating": "5;5;7;8", "confidence": "4;4;3;3", "soundness": "3;3;3;4", "novelty": "3;2;3;4", "presentation": "3;3;3;4", "wc_summary": "108;66;176;166", "wc_strengths": "73;124;188;146", "wc_weaknesses": "189;437;124;92", "wc_questions": "159;9;400;304", "wc_limitations": "31;133;136;8", "wc_review": "560;769;1024;716", "wc_reply_reviewers": "112;0;153;0", "wc_reply_authors": "96;0;92;0", "reply_reviewers": "2;0;2;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 129.0, 44.68780594300866 ], "wc_strengths_avg": [ 132.75, 41.4570560942284 ], "wc_weaknesses_avg": [ 210.5, 135.3597059689478 ], "wc_questions_avg": [ 218.0, 148.05573274952914 ], "wc_limitations_avg": [ 77.0, 58.08183881386677 ], "wc_review_avg": [ 767.25, 166.96013745801721 ], "wc_reply_reviewers_avg": [ 66.25, 67.81730973726398 ], "wc_reply_authors_avg": [ 47.0, 47.02127178203499 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9622504486493761, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8598382815503067329&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Distribution-Free Statistical Dispersion Control for Societal Applications", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72619", "id": "917crxqJdA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7ea46207ec9bda974b140fe11d8dd727-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=917crxqJdA", "openreview": "https://openreview.net/forum?id=917crxqJdA", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72619", "video": "https://nips.cc/virtual/2023/poster/72619", "author_site": "Zhun Deng, Thomas Zollo, Jake Snell, Toniann Pitassi, Richard Zemel", "tldr": "", "abstract": "Explicit finite-sample statistical guarantees on model performance are an important ingredient in responsible machine learning. Previous work has focused mainly on bounding either the expected loss of a predictor or the probability that an individual prediction will incur a loss value in a specified range. However, for many high-stakes applications it is crucial to understand and control the \\textit{dispersion} of a loss distribution, or the extent to which different members of a population experience unequal effects of algorithmic decisions. We initiate the study of distribution-free control of statistical dispersion measures with societal implications and propose a simple yet flexible framework that allows us to handle a much richer class of statistical functionals beyond previous work. Our methods are verified through experiments in toxic comment detection, medical imaging, and film recommendation.", "keywords": "societal dispersion;distribution-free uncertainty quantification", "primary_area": "", "supplementary_material": "/attachment/a43faf53295aeca27718e0eac0526373dd023ec7.zip", "author": "Zhun Deng;Thomas P Zollo;Jake Snell;Toniann Pitassi;Richard Zemel", "authorids": "~Zhun_Deng1;~Thomas_P_Zollo1;~Jake_Snell1;~Toniann_Pitassi3;~Richard_Zemel1", "gender": "M;M;M;F;M", "homepage": "https://www.zhundeng.org/;https://www.thomaszollo.com/;https://www.jakesnell.com;http://www.cs.columbia.edu/~toni;http://www.cs.columbia.edu/~zemel", "dblp": "204/4353;336/8946;172/1406;p/TPitassi;16/6366", "google_scholar": "nkmi-moAAAAJ;Xp7LgAwAAAAJ;MbXKAK8AAAAJ;;https://scholar.google.ca/citations?user=iBeDoRAAAAAJ", "orcid": ";;;;", "linkedin": ";thomas-zollo/;;;", "or_profile": "~Zhun_Deng1;~Thomas_P_Zollo1;~Jake_Snell1;~Toniann_Pitassi3;~Richard_Zemel1", "aff": "Columbia University;Columbia University;Princeton University;Columbia University;Department of Computer Science, University of Toronto", "aff_domain": "columbia.edu;columbia.edu;princeton.edu;columbia.edu;cs.toronto.edu", "position": "Postdoc;PhD student;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\ndeng2023distributionfree,\ntitle={Distribution-Free Statistical Dispersion Control for Societal Applications},\nauthor={Zhun Deng and Thomas P Zollo and Jake Snell and Toniann Pitassi and Richard Zemel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=917crxqJdA}\n}", "github": "", "project": "", "reviewers": "Jihe;21Hg;6JyR;DzT2", "pdf_size": 5372081, "rating": "6;7;7;7", "confidence": "4;3;3;5", "soundness": "4;3;4;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "48;83;104;88", "wc_strengths": "98;102;165;94", "wc_weaknesses": "848;214;185;362", "wc_questions": "11;23;149;254", "wc_limitations": "1;7;196;49", "wc_review": "1006;429;799;847", "wc_reply_reviewers": "55;50;116;0", "wc_reply_authors": "11;0;256;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 80.75, 20.437404434027332 ], "wc_strengths_avg": [ 114.75, 29.14939965076468 ], "wc_weaknesses_avg": [ 402.25, 265.9646358070937 ], "wc_questions_avg": [ 109.25, 99.52983221125211 ], "wc_limitations_avg": [ 63.25, 78.8428024616071 ], "wc_review_avg": [ 770.25, 211.3922597920747 ], "wc_reply_reviewers_avg": [ 55.25, 41.14228360215315 ], "wc_reply_authors_avg": [ 66.75, 109.35578402626905 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7611996126144230955&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "columbia.edu;columbia.edu;princeton.edu;columbia.edu;cs.toronto.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Columbia University;Princeton University;University of Toronto", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": "https://www.columbia.edu;https://www.princeton.edu;https://www.utoronto.ca", "aff_unique_abbr": "Columbia;Princeton;U of T", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;Canada" }, { "title": "Self-supervised Object-Centric Learning for Videos", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72618", "id": "919tWtJPXe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/67b0e7c7c2a5780aeefe3b79caac106e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=919tWtJPXe", "openreview": "https://openreview.net/forum?id=919tWtJPXe", "poster": "/media/PosterPDFs/NeurIPS%202023/72618.png?t=1699430952.1487164", "slides": "https://nips.cc/virtual/2023/poster/72618", "video": "https://nips.cc/virtual/2023/poster/72618", "author_site": "G\u00f6rkay Aydemir, Weidi Xie, Fatma Guney", "tldr": "", "abstract": "Unsupervised multi-object segmentation has shown impressive results on images by utilizing powerful semantics learned from self-supervised pretraining. An additional modality such as depth or motion is often used to facilitate the segmentation in video sequences. However, the performance improvements observed in synthetic sequences, which rely on the robustness of an additional cue, do not translate to more challenging real-world scenarios. In this paper, we propose the first fully unsupervised method for segmenting multiple objects in real-world sequences. Our object-centric learning framework spatially binds objects to slots on each frame and then relates these slots across frames. From these temporally-aware slots, the training objective is to reconstruct the middle frame in a high-level semantic feature space. We propose a masking strategy by dropping a significant portion of tokens in the feature space for efficiency and regularization. Additionally, we address over-clustering by merging slots based on similarity. Our method can successfully segment multiple instances of complex and high-variety classes in YouTube videos.", "keywords": "Unsupervised Object Discovery;Unsupervised Video Object Segmentation;Object-Centric Learning;Unsupervised Video Multi Object Segmentation", "primary_area": "", "supplementary_material": "/attachment/c4b2e7d7a878d31da96cc18b53f6d9cce7c50bf7.zip", "author": "G\u00f6rkay Aydemir;Weidi Xie;Fatma Guney", "authorids": "~G\u00f6rkay_Aydemir1;~Weidi_Xie3;~Fatma_Guney1", "gender": "M;M;F", "homepage": "https://gorkaydemir.github.io/;https://weidixie.github.io;https://mysite.ku.edu.tr/fguney/", "dblp": "323/7577;199/1718;27/11433", "google_scholar": "AtT2D54AAAAJ;https://scholar.google.co.uk/citations?user=Vtrqj4gAAAAJ;https://scholar.google.de/citations?user=g3UitywAAAAJ", "orcid": ";;", "linkedin": "gorkaydemir/;;", "or_profile": "~G\u00f6rkay_Aydemir1;~Weidi_Xie3;~Fatma_Guney1", "aff": "Ko\u00e7 University;Shanghai Jiaotong University;Koc University", "aff_domain": "ku.edu.tr;sjtu.edu.cn;ku.edu.tr", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\naydemir2023selfsupervised,\ntitle={Self-supervised Object-Centric Learning for Videos},\nauthor={G{\\\"o}rkay Aydemir and Weidi Xie and Fatma Guney},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=919tWtJPXe}\n}", "github": "", "project": "", "reviewers": "gzBP;xWs8;Sj2S;xM1B;zEVX", "pdf_size": 5605657, "rating": "5;5;5;5;7", "confidence": "4;4;3;4;3", "soundness": "3;3;3;2;3", "novelty": "2;2;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "101;51;95;73;101", "wc_strengths": "47;43;38;59;98", "wc_weaknesses": "20;37;251;173;24", "wc_questions": "54;44;5;90;93", "wc_limitations": "26;2;1;21;68", "wc_review": "248;177;390;416;384", "wc_reply_reviewers": "115;74;20;174;16", "wc_reply_authors": "343;181;18;115;0", "reply_reviewers": "2;1;1;2;1", "reply_authors": "2;2;2;2;1", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 84.2, 19.538679586911698 ], "wc_strengths_avg": [ 57.0, 21.642550681470055 ], "wc_weaknesses_avg": [ 101.0, 94.09569596958194 ], "wc_questions_avg": [ 57.2, 32.45550800711645 ], "wc_limitations_avg": [ 23.6, 24.335981591051553 ], "wc_review_avg": [ 323.0, 93.59487165438073 ], "wc_reply_reviewers_avg": [ 79.8, 59.65366711276013 ], "wc_reply_authors_avg": [ 131.4, 124.60272870206335 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6123724356957947, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13541860928864070970&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ku.edu.tr;sjtu.edu.cn;ku.edu.tr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Ko\u00e7 University;Shanghai Jiao Tong University;Koc University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ku.edu.tr;https://www.sjtu.edu.cn;https://www.koc.edu.tr", "aff_unique_abbr": "Ko\u00e7;SJTU;Koc", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "T\u00fcrkiye;China" }, { "title": "UltraRE: Enhancing RecEraser for Recommendation Unlearning via Error Decomposition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72617", "id": "93NLxUojvc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/29a0ea49a103a233b17c0705cdeccb66-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=93NLxUojvc", "openreview": "https://openreview.net/forum?id=93NLxUojvc", "poster": "/media/PosterPDFs/NeurIPS%202023/72617.png?t=1698038948.2506104", "slides": "https://nips.cc/virtual/2023/poster/72617", "video": "https://nips.cc/virtual/2023/poster/72617", "author_site": "Yuyuan Li, Chaochao Chen, Yizhao Zhang, Weiming Liu, Lingjuan Lyu, Xiaolin Zheng, Dan Meng, Jun Wang", "tldr": "", "abstract": "With growing concerns regarding privacy in machine learning models, regulations have committed to granting individuals the right to be forgotten while mandating companies to develop non-discriminatory machine learning systems, thereby fueling the study of the machine unlearning problem. Our attention is directed toward a practical unlearning scenario, i.e., recommendation unlearning. As the state-of-the-art framework, i.e., RecEraser, naturally achieves full unlearning completeness, our objective is to enhance it in terms of model utility and unlearning efficiency. In this paper, we rethink RecEraser from an ensemble-based perspective and focus on its three potential losses, i.e., redundancy, relevance, and combination. Under the theoretical guidance of the above three losses, we propose a new framework named UltraRE, which simplifies and powers RecEraser for recommendation tasks. Specifically, for redundancy loss, we incorporate transport weights in the clustering algorithm to optimize the equilibrium between collaboration and balance while enhancing efficiency; for relevance loss, we ensure that sub-models reach convergence on their respective group data; for combination loss, we simplify the combination estimator without compromising its efficacy. Extensive experiments on three real-world datasets demonstrate the effectiveness of UltraRE.", "keywords": "recommendation unlearning;machine unlearning;recommender systems;ensemble learning", "primary_area": "", "supplementary_material": "/attachment/1b8f6e33d563edd0a7efc784872ac90237be4a25.pdf", "author": "Yuyuan Li;Chaochao Chen;Yizhao Zhang;Weiming Liu;Lingjuan Lyu;Xiaolin Zheng;Dan Meng;Jun Wang", "authorids": "~Yuyuan_Li1;~Chaochao_Chen3;~Yizhao_Zhang1;~Weiming_Liu2;~Lingjuan_Lyu1;~Xiaolin_Zheng1;~Dan_Meng2;~Jun_Wang38", "gender": ";;M;F;M;F;M;M", "homepage": ";https://sites.google.com/site/ccchomepage/;https://www.linkedin.com/in/yizhao-zhang-84aa18151/;https://sites.google.com/view/lingjuan-lyu;https://person.zju.edu.cn/xlzheng;;https://dblp.org/pid/w/JunWang20.html;https://www.github.com/459548764", "dblp": "35/11288;26/1492-1;;178/9876;09/5763;;w/JunWang20;00/105-5", "google_scholar": "v4e49qEAAAAJ;qZTMyzwAAAAJ;;;MY23M60AAAAJ;Mtd7u-QAAAAJ;8alC56MAAAAJ;", "orcid": "0000-0003-4896-2885;0000-0003-1419-964X;;;0000-0001-5483-0366;0000-0003-1980-9283;0000-0002-0481-5341;0000-0002-4115-7667", "linkedin": ";ccchomepage/;yizhao-zhang-84aa18151/;;;;;", "or_profile": "~Yuyuan_Li1;~Chaochao_Chen3;~Yizhao_Zhang1;~Lingjuan_Lyu1;~Xiaolin_Zheng1;~Dan_Meng2;~Jun_Wang38;~Liu_Weiming2", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Sony;Zhejiang University;OPPO;OPPO Research Institute ;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;sony.com;zju.edu.cn;oppo.com;oppo.com;zju.edu.cn", "position": "PhD student;Distinguished Research Fellow;MS student;scientist;Full Professor;Researcher;Principal Researcher;PhD student", "bibtex": "@inproceedings{\nli2023ultrare,\ntitle={Ultra{RE}: Enhancing RecEraser for Recommendation Unlearning via Error Decomposition},\nauthor={Yuyuan Li and Chaochao Chen and Yizhao Zhang and Weiming Liu and Lingjuan Lyu and Xiaolin Zheng and Dan Meng and Jun Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=93NLxUojvc}\n}", "github": "", "project": "", "reviewers": "zuBB;55Fw;Jkju;oK8f", "pdf_size": 557713, "rating": "4;6;7;7", "confidence": "4;4;5;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "118;91;89;79", "wc_strengths": "50;66;122;107", "wc_weaknesses": "99;41;92;57", "wc_questions": "32;89;75;38", "wc_limitations": "11;1;1;24", "wc_review": "310;288;379;305", "wc_reply_reviewers": "0;16;14;23", "wc_reply_authors": "51;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.25, 14.446020213193666 ], "wc_strengths_avg": [ 86.25, 29.294837429144405 ], "wc_weaknesses_avg": [ 72.25, 24.055924426219832 ], "wc_questions_avg": [ 58.5, 24.109126902482387 ], "wc_limitations_avg": [ 9.25, 9.443913383762052 ], "wc_review_avg": [ 320.5, 34.74550330618338 ], "wc_reply_reviewers_avg": [ 13.25, 8.347903928532 ], "wc_reply_authors_avg": [ 12.75, 22.083647796503186 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8164965809277259, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15366587760614000260&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;sony.com;zju.edu.cn;oppo.com;oppo.com;zju.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;1;0;2;3;0", "aff_unique_norm": "Zhejiang University;Sony Corporation;OPPO;OPPO Research Institute", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.sony.com;https://www.oppo.com;https://www.oppo.com/en", "aff_unique_abbr": "ZJU;Sony;OPPO;OPPO RI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0;0", "aff_country_unique": "China;Japan" }, { "title": "Distance-Restricted Folklore Weisfeiler-Leman GNNs with Provable Cycle Counting Power", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72616", "id": "94rKFkcm56", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2e2e7c2e3c2e70fa2e9756dce728fcca-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=94rKFkcm56", "openreview": "https://openreview.net/forum?id=94rKFkcm56", "poster": "/media/PosterPDFs/NeurIPS%202023/72616.png?t=1699605844.197247", "slides": "https://nips.cc/virtual/2023/poster/72616", "video": "https://nips.cc/virtual/2023/poster/72616", "author_site": "Junru Zhou, Jiarui Feng, Xiyuan Wang, Muhan Zhang", "tldr": "", "abstract": "The ability of graph neural networks (GNNs) to count certain graph substructures, especially cycles, is important for the success of GNNs on a wide range of tasks. It has been recently used as a popular metric for evaluating the expressive power of GNNs. Many of the proposed GNN models with provable cycle counting power are based on subgraph GNNs, i.e., extracting a bag of subgraphs from the input graph, generating representations for each subgraph, and using them to augment the representation of the input graph. However, those methods require heavy preprocessing, and suffer from high time and memory costs. In this paper, we overcome the aforementioned limitations of subgraph GNNs by proposing a novel class of GNNs---$d$-Distance-Restricted FWL(2) GNNs, or $d$-DRFWL(2) GNNs, based on the well-known FWL(2) algorithm. As a heuristic method for graph isomorphism testing, FWL(2) colors all node pairs in a graph and performs message passing among those node pairs. In order to balance the expressive power and complexity, $d$-DRFWL(2) GNNs simplify FWL(2) by restricting the range of message passing to node pairs whose mutual distances are at most $d$. This way, $d$-DRFWL(2) GNNs exploit graph sparsity while avoiding the expensive subgraph extraction operations in subgraph GNNs, making both the time and space complexity lower. We theoretically investigate both the discriminative power and the cycle counting power of $d$-DRFWL(2) GNNs. Our most important finding is that $d$-DRFWL(2) GNNs have provably strong cycle counting power even with $d=2$: they can count all 3, 4, 5, 6-cycles. Since 6-cycles (e.g., benzene rings) are ubiquitous in organic molecules, being able to detect and count them is crucial for achieving robust and generalizable performance on molecular tasks. Experiments on both synthetic datasets and molecular datasets verify our theory. To the best of our knowledge, 2-DRFWL(2) GNN is the most efficient GNN model to date (both theoretically and empirically) that can count up to 6-cycles.", "keywords": "Cycle counting;graph neural networks", "primary_area": "", "supplementary_material": "", "author": "Junru Zhou;Jiarui Feng;Xiyuan Wang;Muhan Zhang", "authorids": "~Junru_Zhou1;~Jiarui_Feng1;~Xiyuan_Wang1;~Muhan_Zhang1", "gender": "M;M;;M", "homepage": "https://github.com/zml72062;https://jiaruifeng.github.io/;;https://muhanzhang.github.io/", "dblp": ";77/8797;95/8542;157/5518", "google_scholar": ";6CSGUR8AAAAJ;;https://scholar.google.com.hk/citations?user=OBBqkosAAAAJ", "orcid": ";0000-0002-3409-6819;;0000-0002-7680-6401", "linkedin": ";;%E5%B8%8C%E5%85%83-%E7%8E%8B-969660221/;jerry-muhan-zhang-a33a1777/", "or_profile": "~Junru_Zhou1;~Jiarui_Feng1;~Xiyuan_Wang1;~Muhan_Zhang1", "aff": "Peking University;Washington University, Saint Louis;Peking University;Peking University", "aff_domain": "pku.edu.cn;wustl.edu;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhou2023distancerestricted,\ntitle={Distance-Restricted Folklore Weisfeiler-Leman {GNN}s with Provable Cycle Counting Power},\nauthor={Junru Zhou and Jiarui Feng and Xiyuan Wang and Muhan Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=94rKFkcm56}\n}", "github": "", "project": "", "reviewers": "TSJX;pvsr;4wqe;8bs4;9V2p", "pdf_size": 555610, "rating": "6;6;6;6;7", "confidence": "2;3;5;4;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;4;3;3", "wc_summary": "41;178;86;119;130", "wc_strengths": "29;66;35;76;130", "wc_weaknesses": "93;99;253;373;375", "wc_questions": "75;90;75;93;122", "wc_limitations": "15;8;6;3;42", "wc_review": "253;441;455;664;799", "wc_reply_reviewers": "52;25;18;16;118", "wc_reply_authors": "553;35;14;721;708", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;2;2;3;3", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 110.8, 45.69201243105845 ], "wc_strengths_avg": [ 67.2, 36.10761692496474 ], "wc_weaknesses_avg": [ 238.6, 124.54974909649557 ], "wc_questions_avg": [ 91.0, 17.193021840269964 ], "wc_limitations_avg": [ 14.8, 14.161920773680384 ], "wc_review_avg": [ 522.4, 189.89639280407619 ], "wc_reply_reviewers_avg": [ 45.8, 38.32701397187107 ], "wc_reply_authors_avg": [ 406.2, 317.2830912607856 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.196116135138184, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12264432157908232203&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "pku.edu.cn;wustl.edu;pku.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Peking University;Washington University in St. Louis", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://wustl.edu", "aff_unique_abbr": "Peking U;WUSTL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Saint Louis", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Generalizable One-shot 3D Neural Head Avatar", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72615", "id": "95q46MpBGZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/937ae0e83eb08d2cb8627fe1def8c751-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=95q46MpBGZ", "openreview": "https://openreview.net/forum?id=95q46MpBGZ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72615", "video": "https://nips.cc/virtual/2023/poster/72615", "author_site": "Xueting Li, Shalini De Mello, Sifei Liu, Koki Nagano, Umar Iqbal, Jan Kautz", "tldr": "", "abstract": "We present a method that reconstructs and animates a 3D head avatar from a single-view portrait image. Existing methods either involve time-consuming optimization for a specific person with multiple images, or they struggle to synthesize intricate appearance details beyond the facial region. To address these limitations, we propose a framework that not only generalizes to unseen identities based on a single-view image without requiring person-specific optimization, but also captures characteristic details within and beyond the face area (e.g. hairstyle, accessories, etc.). At the core of our method are three branches that produce three tri-planes representing the coarse 3D geometry, detailed appearance of a source image, as well as the expression of a target image. By applying volumetric rendering to the combination of the three tri-planes followed by a super-resolution module, our method yields a high fidelity image of the desired identity, expression and pose. Once trained, our model enables efficient 3D head avatar reconstruction and animation via a single forward pass through a network. Experiments show that the proposed approach generalizes well to unseen validation datasets, surpassing SOTA baseline methods by a large margin on head avatar reconstruction and animation.", "keywords": "Neural Radiance Field;Portrait Reconstruction and Animation", "primary_area": "", "supplementary_material": "/attachment/d72640220b4f5470c7dc0d89aeff7591bd171d84.zip", "author": "Xueting Li;Shalini De Mello;Sifei Liu;Koki Nagano;Umar Iqbal;Jan Kautz", "authorids": "~Xueting_Li2;~Shalini_De_Mello1;~Sifei_Liu2;~Koki_Nagano1;~Umar_Iqbal3;~Jan_Kautz1", "gender": "Not Specified;F;M;M;;F", "homepage": "https://research.nvidia.com/person/shalini-de-mello;https://www.sifeiliu.net;https://luminohope.org/;http://www.umariqbal.info/;http://jankautz.com;https://sunshineatnoon.github.io/", "dblp": "206/7364;118/1301;58/8319;08/8604;48/6214;", "google_scholar": "xQM4BlMAAAAJ;j4pcHV4AAAAJ;GB7NfS4AAAAJ;https://scholar.google.de/citations?hl=en;P9FclNEAAAAJ;nfXdXswAAAAJ", "orcid": ";;;;;", "linkedin": "shalini-de-mello-02b8251/;;koki-nagano-07926a49/;iqbalu/;;", "or_profile": "~Shalini_De_Mello1;~Sifei_Liu2;~Koki_Nagano1;~Umar_Iqbal3;~Jan_Kautz1;~Xueting_Li1", "aff": "NVIDIA;NVIDIA;NVIDIA;NVIDIA;NVIDIA;NVIDIA", "aff_domain": "nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com", "position": "Principal Researcher;Researcher;Researcher;Researcher;VP Research;Researcher", "bibtex": "@inproceedings{\nli2023generalizable,\ntitle={Generalizable One-shot 3D Neural Head Avatar},\nauthor={Xueting Li and Shalini De Mello and Sifei Liu and Koki Nagano and Umar Iqbal and Jan Kautz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=95q46MpBGZ}\n}", "github": "", "project": "", "reviewers": "PU1D;ufhJ;k8VD;8oNf;FcT2", "pdf_size": 3062535, "rating": "4;4;6;6;6", "confidence": "4;5;5;3;4", "soundness": "2;3;3;3;3", "novelty": "2;2;3;2;3", "presentation": "3;3;3;4;3", "wc_summary": "282;22;75;76;135", "wc_strengths": "33;11;64;85;99", "wc_weaknesses": "213;201;140;225;37", "wc_questions": "45;1;97;10;22", "wc_limitations": "18;9;51;14;43", "wc_review": "591;244;427;410;336", "wc_reply_reviewers": "436;0;74;13;36", "wc_reply_authors": "789;0;0;0;0", "reply_reviewers": "4;0;1;1;1", "reply_authors": "4;1;1;1;1", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 118.0, 89.45837020648207 ], "wc_strengths_avg": [ 58.4, 32.49369169546606 ], "wc_weaknesses_avg": [ 163.2, 69.55975848146686 ], "wc_questions_avg": [ 35.0, 34.33365695640358 ], "wc_limitations_avg": [ 27.0, 16.769019053003667 ], "wc_review_avg": [ 401.6, 114.65530951508526 ], "wc_reply_reviewers_avg": [ 111.8, 164.03706898137384 ], "wc_reply_authors_avg": [ 157.8, 315.6 ], "reply_reviewers_avg": [ 1.4, 1.3564659966250538 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3273268353539886, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10388956831320800695&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "NVIDIA", "aff_unique_dep": "NVIDIA Corporation", "aff_unique_url": "https://www.nvidia.com", "aff_unique_abbr": "NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Towards Data-Algorithm Dependent Generalization: a Case Study on Overparameterized Linear Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72614", "id": "966yOmwk6d", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fbe30aab28ad7148bc73804689ac0bd7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=966yOmwk6d", "openreview": "https://openreview.net/forum?id=966yOmwk6d", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72614", "video": "https://nips.cc/virtual/2023/poster/72614", "author_site": "Jing Xu, Jiaye Teng, Yang Yuan, Andrew Yao", "tldr": "", "abstract": "One of the major open problems in machine learning is to characterize generalization in the overparameterized regime, where most traditional generalization bounds become inconsistent even for overparameterized linear regression. In many scenarios, this failure can be attributed to obscuring the crucial interplay between the training algorithm and the underlying data distribution. This paper demonstrate that the generalization behavior of overparameterized model should be analyzed in a both data-relevant and algorithm-relevant manner. To make a formal characterization, We introduce a notion called data-algorithm compatibility, which considers the generalization behavior of the entire data-dependent training trajectory, instead of traditional last-iterate analysis. We validate our claim by studying the setting of solving overparameterized linear regression with gradient descent. Specifically, we perform a data-dependent trajectory analysis and derive a sufficient condition for compatibility in such a setting. Our theoretical results demonstrate that if we take early stopping iterates into consideration, generalization can hold with significantly weaker restrictions on the problem instance than the previous last-iterate analysis.", "keywords": "data-algorithm dependent generalization analysis;overparameterized linear regression", "primary_area": "", "supplementary_material": "", "author": "Jing Xu;Jiaye Teng;Yang Yuan;Andrew C Yao", "authorids": "~Jing_Xu4;~Jiaye_Teng2;~Yang_Yuan4;~Andrew_C_Yao1", "gender": "M;M;M;M", "homepage": "https://jingxuthu.github.io;http://www.tengjiaye.com;http://people.iiis.tsinghua.edu.cn/~yuanyang/index.html;https://iiis.tsinghua.edu.cn/en/yao/", "dblp": "07/1951-27;266/8187;;y/AndrewChiChihYao", "google_scholar": "jlrroGQAAAAJ;NGqfK2wAAAAJ;;", "orcid": ";0000-0002-4385-5792;;", "linkedin": ";;;", "or_profile": "~Jing_Xu4;~Jiaye_Teng2;~Yang_Yuan4;~Andrew_C_Yao1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "thu.edu.cn;iiis.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nxu2023towards,\ntitle={Towards Data-Algorithm Dependent Generalization: a Case Study on Overparameterized Linear Regression},\nauthor={Jing Xu and Jiaye Teng and Yang Yuan and Andrew C Yao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=966yOmwk6d}\n}", "github": "", "project": "", "reviewers": "YpbP;dysH;ytRs;5qZn", "pdf_size": 2594693, "rating": "6;6;7;7", "confidence": "2;4;2;1", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "96;67;91;131", "wc_strengths": "36;103;79;73", "wc_weaknesses": "142;119;87;140", "wc_questions": "2;60;121;39", "wc_limitations": "1;1;1;30", "wc_review": "277;350;379;413", "wc_reply_reviewers": "17;47;20;28", "wc_reply_authors": "362;317;20;37", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 2.25, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 96.25, 22.862359895688808 ], "wc_strengths_avg": [ 72.75, 24.00390593216029 ], "wc_weaknesses_avg": [ 122.0, 22.124646889837585 ], "wc_questions_avg": [ 55.5, 43.142206712220926 ], "wc_limitations_avg": [ 8.25, 12.55736835487436 ], "wc_review_avg": [ 354.75, 50.12172682579881 ], "wc_reply_reviewers_avg": [ 28.0, 11.683321445547923 ], "wc_reply_authors_avg": [ 184.0, 156.42729940774404 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7819809510219311315&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "thu.edu.cn;iiis.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Accelerating Reinforcement Learning with Value-Conditional State Entropy Exploration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72613", "id": "97E3YXvcFM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6530db249c161fe9254db2667453952c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=97E3YXvcFM", "openreview": "https://openreview.net/forum?id=97E3YXvcFM", "poster": "/media/PosterPDFs/NeurIPS%202023/72613.png?t=1701687709.8766813", "slides": "https://nips.cc/virtual/2023/poster/72613", "video": "https://nips.cc/virtual/2023/poster/72613", "author_site": "Dongyoung Kim, Jinwoo Shin, Pieter Abbeel, Younggyo Seo", "tldr": "", "abstract": "A promising technique for exploration is to maximize the entropy of visited state distribution, i.e., state entropy, by encouraging uniform coverage of visited state space. While it has been effective for an unsupervised setup, it tends to struggle in a supervised setup with a task reward, where an agent prefers to visit high-value states to exploit the task reward. Such a preference can cause an imbalance between the distributions of high-value states and low-value states, which biases exploration towards low-value state regions as a result of the state entropy increasing when the distribution becomes more uniform. This issue is exacerbated when high-value states are narrowly distributed within the state space, making it difficult for the agent to complete the tasks. In this paper, we present a novel exploration technique that maximizes the value-conditional state entropy, which separately estimates the state entropies that are conditioned on the value estimates of each state, then maximizes their average. By only considering the visited states with similar value estimates for computing the intrinsic bonus, our method prevents the distribution of low-value states from affecting exploration around high-value states, and vice versa. We demonstrate that the proposed alternative to the state entropy baseline significantly accelerates various reinforcement learning algorithms across a variety of tasks within MiniGrid, DeepMind Control Suite, and Meta-World benchmarks. Source code is available at https://sites.google.com/view/rl-vcse.", "keywords": "Reinforcement Learning;State Entropy;Exploration", "primary_area": "", "supplementary_material": "/attachment/0c10591c8289569a650ca06b549a7229307a2801.zip", "author": "Dongyoung Kim;Jinwoo Shin;Pieter Abbeel;Younggyo Seo", "authorids": "~Dongyoung_Kim3;~Jinwoo_Shin1;~Pieter_Abbeel2;~Younggyo_Seo1", "gender": "M;M;M;M", "homepage": "https://kingdy2002.github.io/;https://sites.google.com/site/mijirim/;https://people.eecs.berkeley.edu/~pabbeel/;https://younggyo.me/", "dblp": ";31/7062;;265/5586", "google_scholar": "bsp1E58AAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;tI1-YwIAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Dongyoung_Kim3;~Jinwoo_Shin1;~Pieter_Abbeel2;~Younggyo_Seo1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Covariant;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;covariant.ai;kaist.ac.kr", "position": "Undergrad student;Full Professor;Founder;PhD student", "bibtex": "@inproceedings{\nkim2023accelerating,\ntitle={Accelerating Reinforcement Learning with Value-Conditional State Entropy Exploration},\nauthor={Dongyoung Kim and Jinwoo Shin and Pieter Abbeel and Younggyo Seo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=97E3YXvcFM}\n}", "github": "", "project": "", "reviewers": "CsYB;pL2M;FYPf;MwTR", "pdf_size": 2091805, "rating": "4;6;7;7", "confidence": "3;4;4;4", "soundness": "2;3;3;4", "novelty": "2;3;2;4", "presentation": "3;3;3;4", "wc_summary": "48;94;102;56", "wc_strengths": "26;68;50;82", "wc_weaknesses": "131;93;40;21", "wc_questions": "16;80;364;32", "wc_limitations": "5;29;37;28", "wc_review": "226;364;593;219", "wc_reply_reviewers": "58;99;146;10", "wc_reply_authors": "328;31;69;38", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.0, 23.345235059857504 ], "wc_strengths_avg": [ 56.5, 20.946360065653412 ], "wc_weaknesses_avg": [ 71.25, 43.43026018803019 ], "wc_questions_avg": [ 123.0, 141.12051587207299 ], "wc_limitations_avg": [ 24.75, 11.92424001771182 ], "wc_review_avg": [ 350.5, 151.47689592805895 ], "wc_reply_reviewers_avg": [ 78.25, 50.22138488731668 ], "wc_reply_authors_avg": [ 116.5, 122.94409298538909 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9428090415820632, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6399388373567675084&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "kaist.ac.kr;kaist.ac.kr;covariant.ai;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Covariant", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;", "aff_unique_abbr": "KAIST;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea;" }, { "title": "Generalization bounds for neural ordinary differential equations and deep residual networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72612", "id": "992vogTP1L", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/98ed250b203d1ac6b24bbcf263e3d4a7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=992vogTP1L", "openreview": "https://openreview.net/forum?id=992vogTP1L", "poster": "/media/PosterPDFs/NeurIPS%202023/72612.png?t=1699625356.907615", "slides": "https://nips.cc/virtual/2023/poster/72612", "video": "https://nips.cc/virtual/2023/poster/72612", "author_site": "Pierre Marion, Pierre Marion", "tldr": "", "abstract": "Neural ordinary differential equations (neural ODEs) are a popular family of continuous-depth deep learning models. In this work, we consider a large family of parameterized ODEs with continuous-in-time parameters, which include time-dependent neural ODEs. We derive a generalization bound for this class by a Lipschitz-based argument. By leveraging the analogy between neural ODEs and deep residual networks, our approach yields in particular a generalization bound for a class of deep residual networks. The bound involves the magnitude of the difference between successive weight matrices. We illustrate numerically how this quantity affects the generalization capability of neural networks.", "keywords": "residual neural networks;neural ODEs;generalization bound", "primary_area": "", "supplementary_material": "/attachment/3b4ab4761d31f20114e28afce32044229aa7ef0d.zip", "author": "Pierre Marion", "authorids": "~Pierre_Marion1", "gender": "M", "homepage": "https://pierremarion23.github.io/", "dblp": "250/2318", "google_scholar": "https://scholar.google.fr/citations?user=Q8H5LgIAAAAJ", "orcid": "", "linkedin": "pierre-marion-816474130/", "or_profile": "~Pierre_Marion1", "aff": "LPSM, Sorbonne Universit\u00e9", "aff_domain": "sorbonne-universite.fr", "position": "PhD student", "bibtex": "@inproceedings{\nmarion2023generalization,\ntitle={Generalization bounds for neural ordinary differential equations and deep residual networks},\nauthor={Pierre Marion},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=992vogTP1L}\n}", "github": "", "project": "", "reviewers": "wvQq;W2zB;EEyp;d5dy", "pdf_size": 516715, "rating": "6;6;7;7", "confidence": "3;4;3;4", "soundness": "3;4;4;4", "novelty": "3;4;4;3", "presentation": "3;4;3;3", "wc_summary": "128;101;31;86", "wc_strengths": "107;76;40;191", "wc_weaknesses": "56;17;33;63", "wc_questions": "59;248;11;131", "wc_limitations": "12;7;1;3", "wc_review": "362;449;116;474", "wc_reply_reviewers": "112;34;0;25", "wc_reply_authors": "51;30;0;0", "reply_reviewers": "2;1;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.5, 35.40127116361784 ], "wc_strengths_avg": [ 103.5, 55.80546568213547 ], "wc_weaknesses_avg": [ 42.25, 18.32177666057525 ], "wc_questions_avg": [ 112.25, 89.25630229849318 ], "wc_limitations_avg": [ 5.75, 4.205650960315181 ], "wc_review_avg": [ 350.25, 141.48917803139574 ], "wc_reply_reviewers_avg": [ 42.75, 41.87705218851967 ], "wc_reply_authors_avg": [ 20.25, 21.568205766822608 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8151504265619866928&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "sorbonne-universite.fr", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Sorbonne Universit\u00e9", "aff_unique_dep": "LPSM", "aff_unique_url": "https://www.sorbonne-universite.fr", "aff_unique_abbr": "Sorbonne", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "title": "Scenario Diffusion: Controllable Driving Scenario Generation With Diffusion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72611", "id": "99MHSB98yZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d95cb79a3421e6d9b6c9a9008c4d07c5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=99MHSB98yZ", "openreview": "https://openreview.net/forum?id=99MHSB98yZ", "poster": "/media/PosterPDFs/NeurIPS%202023/72611.png?t=1702220644.9693267", "slides": "https://nips.cc/virtual/2023/poster/72611", "video": "https://nips.cc/virtual/2023/poster/72611", "author_site": "Ethan Pronovost, Meghana Reddy Ganesina, Noureldin Hendy, Zeyu Wang, Andres Morales, Kai Wang, Nick Roy", "tldr": "", "abstract": "Automated creation of synthetic traffic scenarios is a key part of scaling the safety validation of autonomous vehicles (AVs). In this paper, we propose Scenario Diffusion, a novel diffusion-based architecture for generating traffic scenarios that enables controllable scenario generation. We combine latent diffusion, object detection and trajectory regression to generate distributions of synthetic agent poses, orientations and trajectories simultaneously. This distribution is conditioned on the map and sets of tokens describing the desired scenario to provide additional control over the generated scenario. We show that our approach has sufficient expressive capacity to model diverse traffic patterns and generalizes to different geographical regions.", "keywords": "Deep Learning;(Other) Applications;(Other) Machine Learning Topics", "primary_area": "", "supplementary_material": "", "author": "Ethan Pronovost;Meghana Reddy Ganesina;Noureldin Hendy;Zeyu Wang;Andres Morales;Kai Wang;Nicholas Roy", "authorids": "~Ethan_Pronovost1;~Meghana_Reddy_Ganesina1;~Noureldin_Hendy1;~Zeyu_Wang13;~Andres_Morales1;kai@zoox.com;~Nicholas_Roy1", "gender": "M;;M;M;M;;M", "homepage": ";;;;https://zoox.com;;", "dblp": ";;267/9850;;;;32/2668", "google_scholar": ";;mmC2pJwAAAAJ;;;;", "orcid": ";;;;;;", "linkedin": "epronovost;;noureldin1997;zeyu-wang-cs/;;;", "or_profile": "~Ethan_Pronovost1;~Meghana_Reddy_Ganesina1;~Noureldin_Hendy1;~Zeyu_Wang13;~Andres_Morales1;kai@zoox.com;~Nicholas_Roy1", "aff": "Zoox;;Work;University of Illinois, Urbana Champaign;;;Massachusetts Institute of Technology", "aff_domain": "zoox.com;;zoox.com;uiuc.edu;;;mit.edu", "position": "Researcher;;Researcher;MS student;;;Full Professor", "bibtex": "@inproceedings{\npronovost2023scenario,\ntitle={Scenario Diffusion: Controllable Driving Scenario Generation With Diffusion},\nauthor={Ethan Pronovost and Meghana Reddy Ganesina and Noureldin Hendy and Zeyu Wang and Andres Morales and Kai Wang and Nicholas Roy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=99MHSB98yZ}\n}", "github": "", "project": "", "reviewers": "9X4d;uQ2d;cvm4;VGNk", "pdf_size": 8145422, "rating": "4;4;4;6", "confidence": "5;4;4;4", "soundness": "1;2;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "13;92;87;97", "wc_strengths": "11;159;76;57", "wc_weaknesses": "205;596;129;210", "wc_questions": "42;554;232;75", "wc_limitations": "2;7;19;13", "wc_review": "273;1408;543;452", "wc_reply_reviewers": "74;91;82;36", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 72.25, 34.390223901568305 ], "wc_strengths_avg": [ 75.75, 53.560129760858494 ], "wc_weaknesses_avg": [ 285.0, 182.40202849749232 ], "wc_questions_avg": [ 225.75, 202.6553416517808 ], "wc_limitations_avg": [ 10.25, 6.378675411086537 ], "wc_review_avg": [ 669.0, 437.57913570004683 ], "wc_reply_reviewers_avg": [ 70.75, 20.94486810653149 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8682892070394743433&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "zoox.com;;zoox.com;uiuc.edu;;;mit.edu", "author_num": 7, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Zoox;Work;University of Illinois Urbana-Champaign;Massachusetts Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zoox.com;;https://illinois.edu;https://web.mit.edu", "aff_unique_abbr": ";;UIUC;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "title": "AmadeusGPT: a natural language interface for interactive animal behavioral analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72610", "id": "9AcG3Tsyoq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1456560769bbc38e4f8c5055048ea712-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9AcG3Tsyoq", "openreview": "https://openreview.net/forum?id=9AcG3Tsyoq", "poster": "/media/PosterPDFs/NeurIPS%202023/72610.png?t=1701780014.8377924", "slides": "https://nips.cc/virtual/2023/poster/72610", "video": "https://nips.cc/virtual/2023/poster/72610", "author_site": "Shaokai Ye, Jessy Lauer, Mu Zhou, Alexander Mathis, Mackenzie Mathis", "tldr": "", "abstract": "The process of quantifying and analyzing animal behavior involves translating the naturally occurring descriptive language of their actions into machine-readable code. Yet, codifying behavior analysis is often challenging without deep understanding of animal behavior and technical machine learning knowledge. To limit this gap, we introduce AmadeusGPT: a natural language interface that turns natural language descriptions of behaviors into machine-executable code. Large-language models (LLMs) such as GPT3.5 and GPT4 allow for interactive language-based queries that are potentially well suited for making interactive behavior analysis. However, the comprehension capability of these LLMs is limited by the context window size, which prevents it from remembering distant conversations. To overcome the context window limitation, we implement a novel dual-memory mechanism to allow communication between short-term and long-term memory using symbols as context pointers for retrieval and saving. Concretely, users directly use language-based definitions of behavior and our augmented GPT develops code based on the core AmadeusGPT API, which contains machine learning, computer vision, spatio-temporal reasoning, and visualization modules. Users then can interactively refine results, and seamlessly add new behavioral modules as needed. We used the MABe 2022 behavior challenge tasks to benchmark AmadeusGPT and show excellent performance. Note, an end-user would not need to write any code to achieve this. Thus, collectively AmadeusGPT presents a novel way to merge deep biological knowledge, large-language models, and core computer vision modules into a more naturally intelligent system. Code and demos can be found at: https://github.com/AdaptiveMotorControlLab/AmadeusGPT", "keywords": "ChatGPT;GPT3.5;GPT4;behavioral analysis;LLMs;human-AI interaction;behavioral neuroscience", "primary_area": "", "supplementary_material": "", "author": "Shaokai Ye;Jessy Lauer;Mu Zhou;Alexander Mathis;Mackenzie W Mathis", "authorids": "~Shaokai_Ye3;~Jessy_Lauer1;~Mu_Zhou3;~Alexander_Mathis1;~Mackenzie_W_Mathis1", "gender": "M;;F;M;F", "homepage": ";;;;http://www.mackenziemathislab.org", "dblp": ";;https://dblp.org/rec/journals/corr/abs-2306-07879;117/7258;218/5502", "google_scholar": "Gky1L_gAAAAJ;;noSwD5oAAAAJ;https://scholar.google.ch/citations?user=Y1xCzE0AAAAJ;IhqY9XgAAAAJ", "orcid": ";;0000-0003-2961-1879;0000-0002-3777-2202;0000-0001-7368-4456", "linkedin": ";;;;", "or_profile": "~Shaokai_Ye3;~Jessy_Lauer1;~Mu_Zhou3;~Alexander_Mathis1;~Mackenzie_W_Mathis1", "aff": "EPFL - EPF Lausanne;;EPFL - EPF Lausanne;EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne", "aff_domain": "epfl.ch;;epfl.ch;epfl.ch;epfl.ch", "position": "PhD student;;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nye2023amadeusgpt,\ntitle={Amadeus{GPT}: a natural language interface for interactive animal behavioral analysis},\nauthor={Shaokai Ye and Jessy Lauer and Mu Zhou and Alexander Mathis and Mackenzie W Mathis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9AcG3Tsyoq}\n}", "github": "", "project": "", "reviewers": "ZigY;XjKy;f7B3;eSvp;b8BG", "pdf_size": 12775825, "rating": "5;5;6;6;8", "confidence": "3;4;3;3;4", "soundness": "3;2;3;3;4", "novelty": "2;3;3;3;4", "presentation": "4;3;4;3;4", "wc_summary": "120;270;97;90;70", "wc_strengths": "84;139;197;80;54", "wc_weaknesses": "299;114;177;94;69", "wc_questions": "75;338;1;2;87", "wc_limitations": "11;1;27;62;5", "wc_review": "589;862;499;328;285", "wc_reply_reviewers": "48;265;0;17;15", "wc_reply_authors": "29;801;53;3;3", "reply_reviewers": "1;1;0;1;1", "reply_authors": "3;4;3;2;2", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 129.4, 72.09327291779726 ], "wc_strengths_avg": [ 110.8, 51.222651239466316 ], "wc_weaknesses_avg": [ 150.6, 82.36649804380419 ], "wc_questions_avg": [ 100.6, 123.9687057285023 ], "wc_limitations_avg": [ 21.2, 22.238704998268222 ], "wc_review_avg": [ 512.6, 206.87252113318482 ], "wc_reply_reviewers_avg": [ 69.0, 99.2350744444725 ], "wc_reply_authors_avg": [ 177.8, 312.15534594172817 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.372677996249965, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1364185262625155872&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "epfl.ch;;epfl.ch;epfl.ch;epfl.ch", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "EPFL;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "id": "9B57dEeP3O", "title": "Intelligent Grimm - Open-ended Visual Storytelling via Latent Diffusion Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Benefiting from the impressive diffusion models, conditional generative models have exhibited exceptional capabilities in various generation tasks, for example, image or short video generation based on text description. In this work, we focus on the task of generating a series of coherent images based on a given storyline, denoted as open-ended visual storytelling. We make the following three contributions: (i) to fulfill the task of visual storytelling, we introduce two modules into a pre-trained stable diffusion model, and construct an auto-regressive image generator, termed as StoryGen, that enables to generate the current frame by conditioning on a text prompt and preceding frame; (ii) to train our proposed model, we collect paired image and text samples by sourcing from various online sources, such as videos, E-books, and establish a data processing pipeline for constructing a diverse dataset, named StorySalon, with a far larger vocabulary than existing animation-specific datasets; (iii) we adopt a three-stage curriculum training strategy, that enables style transfer, visual context conditioning, and human feedback alignment, respectively. Quantitative experiments and human evaluation have validated the superiority of our proposed model, in terms of image quality, style consistency, content consistency, and visual-language alignment. We will make the code, model, and dataset publicly available to the research community.", "keywords": "Story Generation;Diffusion Model", "primary_area": "", "supplementary_material": "/attachment/b2f6bcd31f06aa9a752f37d9adcac1490eb54690.zip", "author": "Chang Liu;Haoning Wu;Yujie Zhong;Xiaoyun Zhang;Weidi Xie", "authorids": "~Chang_Liu32;~Haoning_Wu2;~Yujie_Zhong1;~Xiaoyun_Zhang1;~Weidi_Xie3", "gender": "M;M;M;F;M", "homepage": "https://verg-avesta.github.io/;https://haoningwu3639.github.io/;https://y-zhong.info/;https://mediabrain.sjtu.edu.cn/xiaoyun-zhang/;https://weidixie.github.io", "dblp": "52/5716-79.html;264/5802-2;198/2504;;199/1718", "google_scholar": "https://scholar.google.com.hk/citations?user=4Cx0DyEAAAAJ;https://scholar.google.com.hk/citations?user=ia4M9mMAAAAJ;;hQm9oqwAAAAJ;https://scholar.google.co.uk/citations?user=Vtrqj4gAAAAJ", "orcid": ";;;0000-0001-7680-4062;", "linkedin": ";haoningwu0815/;;;", "or_profile": "~Chang_Liu32;~Haoning_Wu2;~Yujie_Zhong1;~Xiaoyun_Zhang1;~Weidi_Xie3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Meituan Inc.;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;meituan.com;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;Researcher;Full Professor;Associate Professor", "bibtex": "@misc{\nliu2023intelligent,\ntitle={Intelligent Grimm - Open-ended Visual Storytelling via Latent Diffusion Models},\nauthor={Chang Liu and Haoning Wu and Yujie Zhong and Xiaoyun Zhang and Weidi Xie},\nyear={2023},\nurl={https://openreview.net/forum?id=9B57dEeP3O}\n}", "github": "", "project": "", "reviewers": "tcbg;SemY;zaiF;4bmh;WrUp", "site": "https://openreview.net/forum?id=9B57dEeP3O", "pdf_size": 15967855, "rating": "3;3;4;5;6", "confidence": "4;4;4;4;5", "soundness": "3;2;2;3;3", "novelty": "2;3;2;3;2", "presentation": "3;3;3;3;4", "wc_summary": "89;47;204;68;293", "wc_strengths": "62;49;92;77;14", "wc_weaknesses": "140;410;568;234;12", "wc_questions": "186;219;363;57;187", "wc_limitations": "87;147;15;27;48", "wc_review": "564;872;1242;463;554", "wc_reply_reviewers": "134;16;0;58;17", "wc_reply_authors": "286;0;0;114;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 4.2, 1.16619037896906 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 140.2, 93.74305307594798 ], "wc_strengths_avg": [ 58.8, 26.63381309538685 ], "wc_weaknesses_avg": [ 272.8, 196.53233830593885 ], "wc_questions_avg": [ 202.4, 97.68848448000409 ], "wc_limitations_avg": [ 64.8, 47.8347154271874 ], "wc_review_avg": [ 739.0, 286.95783662412845 ], "wc_reply_reviewers_avg": [ 45.0, 48.45616575834287 ], "wc_reply_authors_avg": [ 80.0, 112.06426727552365 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7717436331412897, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6638285596773981511&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Meituan Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.meituan.com", "aff_unique_abbr": "SJTU;Meituan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Accelerating Motion Planning via Optimal Transport", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72609", "id": "9B9J8X23LK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f7a94134f1c726796c6f81fb946e489d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9B9J8X23LK", "openreview": "https://openreview.net/forum?id=9B9J8X23LK", "poster": "/media/PosterPDFs/NeurIPS%202023/72609.png?t=1700171357.3627846", "slides": "https://nips.cc/virtual/2023/poster/72609", "video": "https://nips.cc/virtual/2023/poster/72609", "author_site": "An T. Le, Georgia Chalvatzaki, Armin Biess, Jan Peters", "tldr": "", "abstract": "Motion planning is still an open problem for many disciplines, e.g., robotics, autonomous driving, due to their need for high computational resources that hinder real-time, efficient decision-making. A class of methods striving to provide smooth solutions is gradient-based trajectory optimization. However, those methods usually suffer from bad local minima, while for many settings, they may be inapplicable due to the absence of easy-to-access gradients of the optimization objectives. In response to these issues, we introduce Motion Planning via Optimal Transport (MPOT)---a \\textit{gradient-free} method that optimizes a batch of smooth trajectories over highly nonlinear costs, even for high-dimensional tasks, while imposing smoothness through a Gaussian Process dynamics prior via the planning-as-inference perspective. To facilitate batch trajectory optimization, we introduce an original zero-order and highly-parallelizable update rule----the Sinkhorn Step, which uses the regular polytope family for its search directions. Each regular polytope, centered on trajectory waypoints, serves as a local cost-probing neighborhood, acting as a \\textit{trust region} where the Sinkhorn Step ``transports'' local waypoints toward low-cost regions. We theoretically show that Sinkhorn Step guides the optimizing parameters toward local minima regions of non-convex objective functions. We then show the efficiency of MPOT in a range of problems from low-dimensional point-mass navigation to high-dimensional whole-body robot motion planning, evincing its superiority compared to popular motion planners, paving the way for new applications of optimal transport in motion planning.", "keywords": "Motion Planning;Trajectory Optimization;Optimal Transport", "primary_area": "", "supplementary_material": "", "author": "An Thai Le;Georgia Chalvatzaki;Armin Biess;Jan Peters", "authorids": "~An_Thai_Le1;~Georgia_Chalvatzaki1;~Armin_Biess1;~Jan_Peters3", "gender": "Not Specified;F;M;M", "homepage": "https://www.ias.informatik.tu-darmstadt.de/index.php/Team/AnThaiLe;https://www.ias.informatik.tu-darmstadt.de/Team/GeorgiaChalvatzaki;https://armin-biess.net/;https://www.jan-peters.net", "dblp": "121/0037;145/3334;https://dblp.org/rec/journals/nn/ManelaB22;p/JanPeters1", "google_scholar": "k0r0tfUAAAAJ;https://scholar.google.gr/citations?user=mlho5FkAAAAJ;;https://scholar.google.de/citations?user=-kIVAcAAAAAJ", "orcid": ";;0000-0002-0087-3675;0000-0002-5266-8091", "linkedin": ";;arminbiess/;janrpeters/", "or_profile": "~An_Thai_Le1;~Georgia_Chalvatzaki1;~Armin_Biess1;~Jan_Peters3", "aff": "Technische Universit\u00e4t Darmstadt;TU Darmstadt;;TU Darmstadt", "aff_domain": "tu-darmstadt.de;tu-darmstadt.de;;tu-darmstadt.de", "position": "PhD student;Principal Researcher;;Full Professor", "bibtex": "@inproceedings{\nle2023accelerating,\ntitle={Accelerating Motion Planning via Optimal Transport},\nauthor={An Thai Le and Georgia Chalvatzaki and Armin Biess and Jan Peters},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9B9J8X23LK}\n}", "github": "", "project": "", "reviewers": "hcju;2eXt;MARJ;caUS", "pdf_size": 2926625, "rating": "6;7;7;7", "confidence": "3;3;2;2", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "56;57;84;22", "wc_strengths": "69;39;70;46", "wc_weaknesses": "104;56;55;37", "wc_questions": "110;61;33;2", "wc_limitations": "15;13;14;6", "wc_review": "354;226;256;113", "wc_reply_reviewers": "27;22;23;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 54.75, 21.992896580487074 ], "wc_strengths_avg": [ 56.0, 13.729530217745982 ], "wc_weaknesses_avg": [ 63.0, 24.849547279578356 ], "wc_questions_avg": [ 51.5, 39.702015062210634 ], "wc_limitations_avg": [ 12.0, 3.5355339059327378 ], "wc_review_avg": [ 237.25, 85.94584050435483 ], "wc_reply_reviewers_avg": [ 18.0, 10.559356040971437 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12026217842290042521&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 11, "email": "tu-darmstadt.de;tu-darmstadt.de;;tu-darmstadt.de", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TUD", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "9BV9dMhRjt", "title": "On the estimation of persistence intensity functions and linear representations of persistence diagrams", "track": "main", "status": "Reject", "tldr": "", "abstract": "Persistence diagrams are one of the most popular types of data summaries used in Topological Data Analysis. The prevailing statistical approach to analyzing persistence diagrams is concerned with filtering out topological noise. In this paper, we adopt a different viewpoint and aim at estimating the actual distribution of a random persistence diagram, which captures both topological signal and noise. To that effect, [CD19] has shown that, under general conditions, the expected value of a random persistence diagram is a measure admitting a Lebesgue density, called the persistence intensity function. In this paper, we are concerned with estimating the persistence intensity function and a novel, normalized version of it -- called the persistence density function. We present a class of kernel-based estimators based on an i.i.d. sample of persistence diagrams and derive estimation rates in the supremum norm. As a direct corollary, we obtain uniform consistency rates for estimating linear representations of persistence diagrams, including Betti numbers and persistence images. Interestingly, the persistence density function delivers stronger statistical guarantees. ", "keywords": "topological data analysis;persistence diagram;betti numbers;non-parametric density estimation;persistence surface", "primary_area": "", "supplementary_material": "/attachment/ec10cefb61b1186da090a3634e6b6b510c21cee8.pdf", "author": "Weichen Wu;Jisu Kim;Alessandro Rinaldo", "authorids": "~Weichen_Wu1;~Jisu_Kim1;~Alessandro_Rinaldo1", "gender": "M;M;M", "homepage": ";https://pages.saclay.inria.fr/jisu.kim/;https://arinaldo.github.io", "dblp": ";;75/5558", "google_scholar": "8qUwFl4AAAAJ;;tBIzO-EAAAAJ", "orcid": "0000-0002-5955-3289;0000-0003-0573-4495;", "linkedin": "weichen-wu-94ab14221/;;", "or_profile": "~Weichen_Wu1;~Jisu_Kim1;~Alessandro_Rinaldo1", "aff": "Carnegie Mellon University;INRIA;Carnegie Mellon University", "aff_domain": "cmu.edu;inria.fr;cmu.edu", "position": "PhD student;Researcher;Full Professor", "bibtex": "@misc{\nwu2023on,\ntitle={On the estimation of persistence intensity functions and linear representations of persistence diagrams},\nauthor={Weichen Wu and Jisu Kim and Alessandro Rinaldo},\nyear={2023},\nurl={https://openreview.net/forum?id=9BV9dMhRjt}\n}", "github": "", "project": "", "reviewers": "1g88;LEMN;7uZh;LDNi", "site": "https://openreview.net/forum?id=9BV9dMhRjt", "pdf_size": 471116, "rating": "3;4;5;7", "confidence": "5;4;2;3", "soundness": "2;3;3;3", "novelty": "1;2;3;3", "presentation": "2;2;2;3", "wc_summary": "294;27;93;187", "wc_strengths": "119;31;71;40", "wc_weaknesses": "889;172;78;152", "wc_questions": "82;152;64;152", "wc_limitations": "12;78;20;86", "wc_review": "1396;460;326;617", "wc_reply_reviewers": "42;132;0;0", "wc_reply_authors": "80;438;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 4.75, 1.479019945774904 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 150.25, 100.60162772043005 ], "wc_strengths_avg": [ 65.25, 34.39749264117953 ], "wc_weaknesses_avg": [ 322.75, 328.7942935940343 ], "wc_questions_avg": [ 112.5, 40.009373901624606 ], "wc_limitations_avg": [ 49.0, 33.24154027718932 ], "wc_review_avg": [ 699.75, 414.9640797707676 ], "wc_reply_reviewers_avg": [ 43.5, 53.895732669665044 ], "wc_reply_authors_avg": [ 129.5, 181.08216367163277 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.680336051416609, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13989818241577172798&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Carnegie Mellon University;INRIA", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.inria.fr", "aff_unique_abbr": "CMU;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;France" }, { "title": "ADGym: Design Choices for Deep Anomaly Detection", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73678", "id": "9CKx9SsSSc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/de670b9d118229d09d9a9bd9dec2598b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=9CKx9SsSSc", "openreview": "https://openreview.net/forum?id=9CKx9SsSSc", "poster": "/media/PosterPDFs/NeurIPS%202023/73678.png?t=1699699997.2730124", "slides": "https://nips.cc/virtual/2023/poster/73678", "video": "https://nips.cc/virtual/2023/poster/73678", "author_site": "Minqi Jiang, Chaochuan Hou, Ao Zheng, Songqiao Han, Hailiang Huang, Qingsong Wen, Xiyang Hu, Yue Zhao", "tldr": "", "abstract": "Deep learning (DL) techniques have recently found success in anomaly detection (AD) across various fields such as finance, medical services, and cloud computing. However, most of the current research tends to view deep AD algorithms as a whole, without dissecting the contributions of individual design choices like loss functions and network architectures. This view tends to diminish the value of preliminary steps like data preprocessing, as more attention is given to newly designed loss functions, network architectures, and learning paradigms. In this paper, we aim to bridge this gap by asking two key questions: (i) Which design choices in deep AD methods are crucial for detecting anomalies? (ii) How can we automatically select the optimal design choices for a given AD dataset, instead of relying on generic, pre-existing solutions? To address these questions, we introduce ADGym, a platform specifically crafted for comprehensive evaluation and automatic selection of AD design elements in deep methods. Our extensive experiments reveal that relying solely on existing leading methods is not sufficient. In contrast, models developed using ADGym significantly surpass current state-of-the-art techniques.", "keywords": "anomaly detection;outlier detection;benchmark;model selection;automated machine learning", "primary_area": "", "supplementary_material": "/attachment/9f36ca2e7ef0832aa15ac8d2c16730de51c42651.pdf", "author": "Minqi Jiang;Chaochuan Hou;Ao Zheng;Songqiao Han;Hailiang Huang;Qingsong Wen;Xiyang Hu;Yue Zhao", "authorids": "~Minqi_Jiang2;~Chaochuan_Hou1;~Ao_Zheng1;~Songqiao_Han1;~Hailiang_Huang1;~Qingsong_Wen2;~Xiyang_Hu1;~Yue_Zhao13", "gender": "M;M;M;M;M;Not Specified;M;M", "homepage": "https://github.com/jmq19950824;https://github.com/BraudoCC;;;;https://www.andrew.cmu.edu/user/xiyanghu/;https://viterbi-web.usc.edu/~yzhao010/;https://sites.google.com/site/qingsongwen8/", "dblp": ";;;32/6502.html;;239/8741;48/76-16;27/561", "google_scholar": ";;qLn1x20AAAAJ;;https://scholar.google.com/citations?hl=en;-UT6q2oAAAAJ;https://scholar.google.ca/citations?user=zoGDYsoAAAAJ;vjPJvwYAAAAJ", "orcid": ";0000-0001-7055-6766;0000-0002-6421-314X;;0000-0002-0009-6677;;0000-0003-3401-4921;0000-0003-4516-2524", "linkedin": ";;;;;;yzhao062/;qingsong-wen-22814156/", "or_profile": "~Minqi_Jiang2;~Chaochuan_Hou1;~Ao_Zheng1;~Songqiao_Han1;~Hailiang_Huang1;~Xiyang_Hu1;~Yue_Zhao13;~Qingsong_Wen1", "aff": "Shanghai University of Finance and Economics;Shanghai University of Finance and Economics;Shanghai University of Finance and Economics;Shanghai University of Finance and Economics;Shanghai University of Finance and Economics;Carnegie Mellon University;Carnegie Mellon University;Alibaba Group", "aff_domain": "sufe.edu;sufe.edu;sufe.edu;shufe.edu.cn;shufe.edu.cn;cmu.edu;cmu.edu;alibaba-inc.com", "position": "PhD student;PhD student;PhD student;Associate Professor;Full Professor;PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\njiang2023adgym,\ntitle={{ADG}ym: Design Choices for Deep Anomaly Detection},\nauthor={Minqi Jiang and Chaochuan Hou and Ao Zheng and Songqiao Han and Hailiang Huang and Qingsong Wen and Xiyang Hu and Yue Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=9CKx9SsSSc}\n}", "github": "", "project": "", "reviewers": "cdKh;ufDM;YA3B;hpUe;n2uJ", "pdf_size": 1468018, "rating": "6;6;6;6;7", "confidence": "4;3;5;4;5", "wc_summary_and_contributions": "54;76;77;75;140", "wc_strengths": "148;108;84;41;102", "wc_improvement": "116;193;137;126;114", "wc_limitations": "53;67;24;4;46", "wc_correctness": "59;138;10;1;12", "wc_clarity": "44;196;9;1;8", "wc_relation_to_prior_work": "24;12;11;39;10", "wc_documentation": "1;10;8;7;15", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "500;801;361;295;448", "wc_reply_reviewers": "397;56;73;0;23", "wc_reply_authors": "1581;2391;784;644;410", "reply_reviewers": "2;1;1;0;1", "reply_authors": "3;5;2;2;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 84.4, 29.08332855778375 ], "wc_strengths_avg": [ 96.6, 34.78850384825424 ], "wc_improvement_avg": [ 137.2, 29.075075236360096 ], "wc_limitations_avg": [ 38.8, 22.26566864030811 ], "wc_correctness_avg": [ 44.0, 51.16639522186413 ], "wc_clarity_avg": [ 51.6, 73.73628686067667 ], "wc_relation_to_prior_work_avg": [ 19.2, 11.124747188138706 ], "wc_documentation_avg": [ 8.2, 4.534313619501853 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 481.0, 174.84049874099537 ], "wc_reply_reviewers_avg": [ 109.8, 145.82372920756075 ], "wc_reply_authors_avg": [ 1162.0, 729.7553014538504 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.6, 1.3564659966250538 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5345224838248488, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3110361350435411531&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "sufe.edu;sufe.edu;sufe.edu;shufe.edu.cn;shufe.edu.cn;cmu.edu;cmu.edu;alibaba-inc.com", "author_num": 8, "aff_unique_index": "0;0;0;0;0;1;1;2", "aff_unique_norm": "Shanghai University of Finance and Economics;Carnegie Mellon University;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "http://www.sufe.edu.cn;https://www.cmu.edu;https://www.alibaba.com", "aff_unique_abbr": "SUFE;CMU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;1;0", "aff_country_unique": "China;United States" }, { "title": "Scale-teaching: Robust Multi-scale Training for Time Series Classification with Noisy Labels", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72608", "id": "9D0fELXbrg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6a6ecedac816a24f92ad1f444b1edcb0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9D0fELXbrg", "openreview": "https://openreview.net/forum?id=9D0fELXbrg", "poster": "/media/PosterPDFs/NeurIPS%202023/72608.png?t=1698193835.9907894", "slides": "https://nips.cc/virtual/2023/poster/72608", "video": "https://nips.cc/virtual/2023/poster/72608", "author_site": "Zhen Liu, ma peitian, Dongliang Chen, Wenbin Pei, Qianli Ma", "tldr": "", "abstract": "Deep Neural Networks (DNNs) have been criticized because they easily overfit noisy (incorrect) labels. To improve the robustness of DNNs, existing methods for image data regard samples with small training losses as correctly labeled data (small-loss criterion). Nevertheless, time series' discriminative patterns are easily distorted by external noises (i.e., frequency perturbations) during the recording process. This results in training losses of some time series samples that do not meet the small-loss criterion. Therefore, this paper proposes a deep learning paradigm called Scale-teaching to cope with time series noisy labels. Specifically, we design a fine-to-coarse cross-scale fusion mechanism for learning discriminative patterns by utilizing time series at different scales to train multiple DNNs simultaneously. Meanwhile, each network is trained in a cross-teaching manner by using complementary information from different scales to select small-loss samples as clean labels. For unselected large-loss samples, we introduce multi-scale embedding graph learning via label propagation to correct their labels by using selected clean samples. Experiments on multiple benchmark time series datasets demonstrate the superiority of the proposed Scale-teaching paradigm over state-of-the-art methods in terms of effectiveness and robustness.", "keywords": "time series classification;deep neural networks;noisy labels", "primary_area": "", "supplementary_material": "/attachment/5d0a6e7d1c792e35d363c1f5264f8cacfe8fa138.pdf", "author": "Zhen Liu;Peitian Ma;Dongliang Chen;Wenbin Pei;Qianli Ma", "authorids": "~Zhen_Liu5;~Peitian_Ma1;~Dongliang_Chen2;~Wenbin_Pei1;~Qianli_Ma3", "gender": "M;M;;F;M", "homepage": "https://zliu21.github.io/;https://github.com/Ma187;;http://faculty.dlut.edu.cn/peiwenb12/en/index.htm;http://www2.scut.edu.cn/qianlima", "dblp": "77/35-23;;15/3543;231/0977;57/8221-1", "google_scholar": "https://scholar.google.com.hk/citations?user=M5qB8dsAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;F7KB6d0AAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-8107-0929;0000-0003-3422-331X;0000-0001-7000-163X;;0000-0002-9356-2883", "linkedin": ";;;;", "or_profile": "~Zhen_Liu5;~Peitian_Ma1;~Dongliang_Chen2;~Wenbin_Pei1;~Qianli_Ma3", "aff": "South China University of Technology;South China University of Technology;South China University of Technology;Dalian University of Technology;South China University of Technology", "aff_domain": "scut.edu.cn;scut.edu.cn;scut.edu.cn;dlut.edu.cn;scut.edu.cn", "position": "PhD student;MS student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023scaleteaching,\ntitle={Scale-teaching: Robust Multi-scale Training for Time Series Classification with Noisy Labels},\nauthor={Zhen Liu and Peitian Ma and Dongliang Chen and Wenbin Pei and Qianli Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9D0fELXbrg}\n}", "github": "", "project": "", "reviewers": "L5T6;oEKq;QaFC;Rktg", "pdf_size": 1526733, "rating": "4;5;5;5", "confidence": "4;3;3;4", "soundness": "3;3;3;2", "novelty": "2;3;2;2", "presentation": "4;2;3;3", "wc_summary": "47;93;46;98", "wc_strengths": "55;27;65;128", "wc_weaknesses": "70;64;169;264", "wc_questions": "19;23;113;2", "wc_limitations": "28;22;1;24", "wc_review": "219;229;394;516", "wc_reply_reviewers": "0;28;0;161", "wc_reply_authors": "0;7;0;38", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 71.0, 24.566236993076494 ], "wc_strengths_avg": [ 68.75, 36.93490896157726 ], "wc_weaknesses_avg": [ 141.75, 81.97674975259754 ], "wc_questions_avg": [ 39.25, 43.30343519860751 ], "wc_limitations_avg": [ 18.75, 10.473180032826706 ], "wc_review_avg": [ 339.5, 123.34200419970482 ], "wc_reply_reviewers_avg": [ 47.25, 66.66098934159318 ], "wc_reply_authors_avg": [ 11.25, 15.706288549495072 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9062332731170689746&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "scut.edu.cn;scut.edu.cn;scut.edu.cn;dlut.edu.cn;scut.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "South China University of Technology;Dalian University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.scut.edu.cn;http://www.dlut.edu.cn/", "aff_unique_abbr": "SCUT;DUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Should We Learn Most Likely Functions or Parameters?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72607", "id": "9EndFTDiqh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/703f727ec10190b2fddcf8e24f52df48-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9EndFTDiqh", "openreview": "https://openreview.net/forum?id=9EndFTDiqh", "poster": "/media/PosterPDFs/NeurIPS%202023/72607.png?t=1702242815.3521712", "slides": "https://nips.cc/virtual/2023/poster/72607", "video": "https://nips.cc/virtual/2023/poster/72607", "author_site": "Shikai Qiu, Tim G. J. Rudner, Sanyam Kapoor, Andrew Wilson", "tldr": "", "abstract": "Standard regularized training procedures correspond to maximizing a posterior distribution over parameters, known as maximum a posteriori (MAP) estimation. However, model parameters are of interest only insomuch as they combine with the functional form of a model to provide a function that can make good predictions. Moreover, the most likely parameters under the parameter posterior do not generally correspond to the most likely function induced by the parameter posterior. In fact, we can re-parametrize a model such that any setting of parameters can maximize the parameter posterior. As an alternative, we investigate the benefits and drawbacks of directly estimating the most likely function implied by the model and the data. We show that this procedure leads to pathological solutions when using neural networks and prove conditions under which the procedure is well-behaved, as well as a scalable approximation. Under these conditions, we find that function-space MAP estimation can lead to flatter minima, better generalization, and improved robustness to overfitting.", "keywords": "Function-Space Modeling;Function-Space Regularization;Maximum A Posteriori Estimation;Generalization", "primary_area": "", "supplementary_material": "", "author": "Shikai Qiu;Tim G. J. Rudner;Sanyam Kapoor;Andrew Gordon Wilson", "authorids": "~Shikai_Qiu1;~Tim_G._J._Rudner2;~Sanyam_Kapoor1;~Andrew_Gordon_Wilson1", "gender": "M;M;Not Specified;Not Specified", "homepage": "https://shikaiqiu.github.io/;https://sanyamkapoor.com;https://cims.nyu.edu/~andrewgw;https://timrudner.com", "dblp": ";223/5826;65/10453;230/3480", "google_scholar": "pK0OAsQAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ;https://scholar.google.de/citations?user=MbBntPgAAAAJ", "orcid": ";;;", "linkedin": ";sanyamkapoor/;;trudner", "or_profile": "~Shikai_Qiu1;~Sanyam_Kapoor1;~Andrew_Gordon_Wilson1;~Tim_Georg_Johann_Rudner1", "aff": "New York University;New York University;New York University;Yale University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu;yale.edu", "position": "PhD student;PhD student;Associate Professor;Visiting Fellow", "bibtex": "@inproceedings{\nqiu2023should,\ntitle={Should We Learn Most Likely Functions or Parameters?},\nauthor={Shikai Qiu and Tim G. J. Rudner and Sanyam Kapoor and Andrew Gordon Wilson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9EndFTDiqh}\n}", "github": "", "project": "", "reviewers": "NXLr;Zoj6;PZwV;wSt3;VTcx", "pdf_size": 1033304, "rating": "5;6;6;7;7", "confidence": "3;4;2;2;2", "soundness": "3;4;3;3;3", "novelty": "3;3;2;3;3", "presentation": "3;3;3;4;3", "wc_summary": "298;101;140;60;89", "wc_strengths": "47;116;55;44;35", "wc_weaknesses": "307;237;82;55;160", "wc_questions": "349;83;86;3;70", "wc_limitations": "26;12;12;3;39", "wc_review": "1027;549;375;165;393", "wc_reply_reviewers": "71;0;58;11;10", "wc_reply_authors": "159;75;25;100;100", "reply_reviewers": "1;0;1;1;1", "reply_authors": "3;2;2;3;3", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 2.6, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 137.6, 84.21068815773921 ], "wc_strengths_avg": [ 59.4, 29.014479144041168 ], "wc_weaknesses_avg": [ 168.2, 94.13904609671802 ], "wc_questions_avg": [ 118.2, 119.28017437948354 ], "wc_limitations_avg": [ 18.4, 12.658593918757328 ], "wc_review_avg": [ 501.8, 289.62486081136063 ], "wc_reply_reviewers_avg": [ 30.0, 28.726294574831613 ], "wc_reply_authors_avg": [ 91.8, 43.34697221260096 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.6, 0.4898979485566356 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5345224838248488, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11335937832182717411&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 9, "email": "nyu.edu;nyu.edu;nyu.edu;yale.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "New York University;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://www.yale.edu", "aff_unique_abbr": "NYU;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "9FLkxTGY3B", "title": "${\\rm EFO}_k$-CQA: Towards Knowledge Graph Complex Query Answering beyond Set Operation", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "To answer complex queries on knowledge graphs, logical reasoning over incomplete knowledge is required due to the open-world assumption. Learning-based methods are essential because they are capable of generalizing over unobserved knowledge. Therefore, an appropriate dataset is fundamental to both obtaining and evaluating such methods under this paradigm. In this paper, we propose ${\\rm EFO}_k$-CQA, a comprehensive framework for data generation, model training, and method evaluation that covers the combinatorial space of Existential First-order Queries with multiple variables (${\\rm EFO}_k$). The combinatorial query space in our framework significantly extends those defined by set operations in the existing literature. Additionally, we construct a dataset with 741 query types for empirical evaluation, and our benchmark results provide new insights into how query hardness affects the results. Furthermore, we demonstrate that the existing dataset construction process is systematically biased that hinders the appropriate development of query-answering methods, highlighting the importance of our work. Our code and data are provided in~\\url{https://anonymous.4open.science/r/EFOK-CQA/README.md}.", "keywords": "complex query answering;knowledge graph;first order logic", "primary_area": "", "supplementary_material": "/attachment/dd951483349c53c27520aaa5a2e367d926f97cbc.pdf", "author": "Hang Yin;Zihao Wang;Fei Weizhi;Yangqiu Song", "authorids": "~Hang_Yin3;~Zihao_Wang11;~Fei_Weizhi1;~Yangqiu_Song1", "gender": ";;M;M", "homepage": ";https://zihao-wang.github.io;;https://www.cse.ust.hk/~yqsong/", "dblp": ";148/9655-1;346/6929;86/2159", "google_scholar": ";T28rR00AAAAJ;97wX854AAAAJ;MdQZ-q8AAAAJ", "orcid": ";0000-0002-3919-0396;;0000-0002-7818-6090", "linkedin": ";zihao-wang-6a0a3286/;\u4f1f\u679d-\u8d39-87b131233/;yqsong/", "or_profile": "~Hang_Yin3;~Zihao_Wang11;~Fei_Weizhi1;~Yangqiu_Song1", "aff": ";University of Illinois Urbana-Champaign;The Department of Mathematics, Tsinghua University;Hong Kong University of Science and Technology", "aff_domain": ";illinois.edu;mail.tsinghua.edu.cn;ust.hk", "position": ";Intern;PhD student;Associate Professor", "bibtex": "@misc{\nyin2023rm,\ntitle={\\$\\{{\\textbackslash}rm {EFO}\\}\\_k\\$-{CQA}: Towards Knowledge Graph Complex Query Answering beyond Set Operation},\nauthor={Hang Yin and Zihao Wang and Fei Weizhi and Yangqiu Song},\nyear={2023},\nurl={https://openreview.net/forum?id=9FLkxTGY3B}\n}", "github": "", "project": "", "reviewers": "cx54;YhLr;t4bs;tAiM", "site": "https://openreview.net/forum?id=9FLkxTGY3B", "pdf_size": 370264, "rating": "4;5;6;6", "confidence": "3;2;5;3", "wc_summary_and_contributions": "33;43;91;24", "wc_strengths": "58;45;38;19", "wc_improvement": "141;118;302;27", "wc_limitations": "56;1;57;2", "wc_correctness": "13;9;1;1", "wc_clarity": "22;23;1;7", "wc_relation_to_prior_work": "1;24;1;1", "wc_documentation": "1;10;11;8", "wc_additional_feedback": "1;1;1;1", "wc_review": "326;274;503;90", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "869;700;876;519", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "wc_summary_and_contributions_avg": [ 47.75, 25.85899263312475 ], "wc_strengths_avg": [ 40.0, 14.089002803605371 ], "wc_improvement_avg": [ 147.0, 99.12366014226876 ], "wc_limitations_avg": [ 29.0, 27.504545078950134 ], "wc_correctness_avg": [ 6.0, 5.196152422706632 ], "wc_clarity_avg": [ 13.25, 9.496709956611289 ], "wc_relation_to_prior_work_avg": [ 6.75, 9.959292143521045 ], "wc_documentation_avg": [ 7.5, 3.905124837953327 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 298.25, 147.1807986797191 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 741.0, 146.26517015338956 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.48420012470625223, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5464308228007727041&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;Tsinghua University;Hong Kong University of Science and Technology", "aff_unique_dep": ";Department of Mathematics;", "aff_unique_url": "https://illinois.edu;https://www.tsinghua.edu.cn;https://www.ust.hk", "aff_unique_abbr": "UIUC;THU;HKUST", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Urbana-Champaign;;Hong Kong SAR", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;China" }, { "title": "Spike-driven Transformer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72606", "id": "9FmolyOHi5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ca0f5358dbadda74b3049711887e9ead-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9FmolyOHi5", "openreview": "https://openreview.net/forum?id=9FmolyOHi5", "poster": "/media/PosterPDFs/NeurIPS%202023/72606.png?t=1701929679.976799", "slides": "https://nips.cc/virtual/2023/poster/72606", "video": "https://nips.cc/virtual/2023/poster/72606", "author_site": "Man Yao, Man Yao, JiaKui Hu, Zhaokun Zhou, Li Yuan, Yonghong Tian, Bo Xu, Guoqi Li", "tldr": "", "abstract": "Spiking Neural Networks (SNNs) provide an energy-efficient deep learning option due to their unique spike-based event-driven (i.e., spike-driven) paradigm. In this paper, we incorporate the spike-driven paradigm into Transformer by the proposed Spike-driven Transformer with four unique properties: (1) Event-driven, no calculation is triggered when the input of Transformer is zero; (2) Binary spike communication, all matrix multiplications associated with the spike matrix can be transformed into sparse additions; (3) Self-attention with linear complexity at both token and channel dimensions; (4) The operations between spike-form Query, Key, and Value are mask and addition. Together, there are only sparse addition operations in the Spike-driven Transformer. To this end, we design a novel Spike-Driven Self-Attention (SDSA), which exploits only mask and addition operations without any multiplication, and thus having up to $87.2\\times$ lower computation energy than vanilla self-attention. Especially in SDSA, the matrix multiplication between Query, Key, and Value is designed as the mask operation. In addition, we rearrange all residual connections in the vanilla Transformer before the activation functions to ensure that all neurons transmit binary spike signals. It is shown that the Spike-driven Transformer can achieve 77.1\\% top-1 accuracy on ImageNet-1K, which is the state-of-the-art result in the SNN field.", "keywords": "Spiking Neural Networks; Transformer; Neuromorphic Computing; Event-driven; Linear Attention", "primary_area": "", "supplementary_material": "/attachment/c0b352a9dbbab5579050d7133d6d479bda844bf7.pdf", "author": "Man Yao;JiaKui Hu;Zhaokun Zhou;Li Yuan;Yonghong Tian;Bo XU;Guoqi Li", "authorids": "~Man_Yao1;~JiaKui_Hu1;~Zhaokun_Zhou1;~Li_Yuan2;~Yonghong_Tian1;~Bo_XU10;~Guoqi_Li1", "gender": "M;M;M;M;M;M;M", "homepage": ";https://github.com/jkhu29;;http://www.pkuml.org;;https://scholar.google.com/citations?hl=en&user=qCfE--MAAAAJ;https://yuanli2333.github.io/", "dblp": "21/5932;327/3225;;86/5857;;;98/4583-7", "google_scholar": "eE4vvp0AAAAJ;VagFt-sAAAAJ;;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;-5juAR0AAAAJ", "orcid": ";;0000-0003-4454-6630;0000-0002-2978-5935;;;0000-0002-2120-5588", "linkedin": ";;;;%E6%B3%A2-%E5%BE%90-74210b115/?midToken=AQH1EMB1ZoboJA&midSig=2Q5MzMXmNEH9M1&trk=eml-email_pymk_02-header-22-profile&trkEmail=eml-email_pymk_02-header-22-profile-null-7ydrhe~kpggjoav~k9-null-neptune/profile~vanity.view;;", "or_profile": "~Man_Yao1;~JiaKui_Hu1;~Zhaokun_Zhou1;~Yonghong_Tian1;~Bo_XU10;~Guoqi_Li1;~Yuan_LI2", "aff": "Xi'an Jiaotong University;Xi'an University of Electronic Science and Technology;Peking University;Peking University;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Peking University", "aff_domain": "xjtu.edu.cn;xidian.edu.cn;pku.edu.cn;pku.edu.cn;ia.ac.cn;ia.ac.cn;pku.edu.cn", "position": "PhD student;Undergrad student;PhD student;Full Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nyao2023spikedriven,\ntitle={Spike-driven Transformer},\nauthor={Man Yao and JiaKui Hu and Zhaokun Zhou and Li Yuan and Yonghong Tian and Bo XU and Guoqi Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9FmolyOHi5}\n}", "github": "", "project": "", "reviewers": "qknk;8FkB;Lu4B;GiWX", "pdf_size": 5524937, "rating": "5;7;7;7", "confidence": "4;3;4;2", "soundness": "2;3;3;3", "novelty": "3;3;4;3", "presentation": "3;3;2;3", "wc_summary": "113;39;27;56", "wc_strengths": "44;72;15;22", "wc_weaknesses": "129;52;132;18", "wc_questions": "53;73;98;12", "wc_limitations": "1;5;1;3", "wc_review": "340;241;273;111", "wc_reply_reviewers": "77;19;9;208", "wc_reply_authors": "27;21;41;401", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 58.75, 32.97252644247935 ], "wc_strengths_avg": [ 38.25, 22.230328382639787 ], "wc_weaknesses_avg": [ 82.75, 49.25126901918366 ], "wc_questions_avg": [ 59.0, 31.47220996371243 ], "wc_limitations_avg": [ 2.5, 1.6583123951777 ], "wc_review_avg": [ 241.25, 83.25375366912894 ], "wc_reply_reviewers_avg": [ 78.25, 79.28232779125497 ], "wc_reply_authors_avg": [ 122.5, 160.95573925772266 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 172, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6183854208515620494&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "xjtu.edu.cn;xidian.edu.cn;pku.edu.cn;pku.edu.cn;ia.ac.cn;ia.ac.cn;pku.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;2;3;3;2", "aff_unique_norm": "Xi'an Jiao Tong University;Xi'an University of Electronic Science and Technology;Peking University;Chinese Academy of Sciences", "aff_unique_dep": ";;;Institute of Automation", "aff_unique_url": "https://www.xjtu.edu.cn;http://www.xidian.edu.cn/;http://www.pku.edu.cn;http://www.ia.cas.cn", "aff_unique_abbr": "XJTU;Xidian University;Peking U;CAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "9HJyRsgU13", "title": "Bayesian Kernelized Tensor Factorization as Surrogate for Bayesian Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Bayesian optimization (BO) primarily uses Gaussian processes (GP) as the key surrogate model, mostly with a simple stationary and separable kernel function such as the squared-exponential kernel with automatic relevance determination (SE-ARD). However, such simple kernel specifications are deficient in learning functions with complex features, such as being nonstationary, nonseparable, and multimodal. Approximating such functions using a local GP, even in a low-dimensional space, requires a large number of samples, not to mention in a high-dimensional setting. In this paper, we propose to use Bayesian Kernelized Tensor Factorization (BKTF)---as a new surrogate model---for BO in a $D$-dimensional Cartesian product space. Our key idea is to approximate the underlying $D$-dimensional solid with a fully Bayesian low-rank tensor CP decomposition, in which we place GP priors on the latent basis functions for each dimension to encode local consistency and smoothness. With this formulation, information from each sample can be shared not only with neighbors but also across dimensions. Although BKTF no longer has an analytical posterior, we can still efficiently approximate the posterior distribution through Markov chain Monte Carlo (MCMC) and obtain prediction and full uncertainty quantification (UQ). We conduct numerical experiments on both standard BO test functions and machine learning hyperparameter tuning problems, and our results show that BKTF offers a flexible and highly effective approach for characterizing complex functions with UQ, especially in cases where the initial sample size and budget are severely limited. ", "keywords": "Bayesian optimization;surrogate model;low-rank factorization;kernelized tensor factorization;Markov chain Monte Carlo", "primary_area": "", "supplementary_material": "/attachment/14efe4dd4acf8dd527b32afc9b1791c01fde5cd6.zip", "author": "Mengying Lei;Lijun Sun", "authorids": "mengying.lei@mail.mcgill.ca;~Lijun_Sun1", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": ";", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@misc{\nlei2023bayesian,\ntitle={Bayesian Kernelized Tensor Factorization as Surrogate for Bayesian Optimization},\nauthor={Mengying Lei and Lijun Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=9HJyRsgU13}\n}", "github": "", "project": "", "reviewers": "LjoC;1Atp;BFf9;LVy9", "site": "https://openreview.net/forum?id=9HJyRsgU13", "pdf_size": 1314287, "rating": "4;5;7;7", "confidence": "4;3;3;4", "soundness": "1;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;3;3", "wc_summary": "60;335;106;52", "wc_strengths": "38;94;101;62", "wc_weaknesses": "296;485;92;40", "wc_questions": "375;178;2;88", "wc_limitations": "16;24;2;54", "wc_review": "785;1116;303;296", "wc_reply_reviewers": "1149;49;0;26", "wc_reply_authors": "1002;484;0;0", "reply_reviewers": "3;1;0;1", "reply_authors": "3;2;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 138.25, 115.44776957568301 ], "wc_strengths_avg": [ 73.75, 25.341418665891617 ], "wc_weaknesses_avg": [ 228.25, 176.4318211094586 ], "wc_questions_avg": [ 160.75, 138.46908499733794 ], "wc_limitations_avg": [ 24.0, 19.026297590440446 ], "wc_review_avg": [ 625.0, 345.9067793495814 ], "wc_reply_reviewers_avg": [ 306.0, 487.01488683612126 ], "wc_reply_authors_avg": [ 371.5, 414.189268330313 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9402037878263293330&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 5 }, { "title": "Provable Advantage of Curriculum Learning on Parity Targets with Mixed Inputs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72605", "id": "9Ihu0VBOTq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4c8ce3c63f6b66d6811c6d67c68e487b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9Ihu0VBOTq", "openreview": "https://openreview.net/forum?id=9Ihu0VBOTq", "poster": "/media/PosterPDFs/NeurIPS%202023/72605.png?t=1702270259.6528685", "slides": "https://nips.cc/virtual/2023/poster/72605", "video": "https://nips.cc/virtual/2023/poster/72605", "author_site": "Emmanuel Abbe, Elisabetta Cornacchia, Aryo Lotfi", "tldr": "", "abstract": "Experimental results have shown that curriculum learning, i.e., presenting simpler examples before more complex ones, can improve the efficiency of learning. Some recent theoretical results also showed that changing the sampling distribution can help neural networks learn parities, with formal results only for large learning rates and one-step arguments. Here we show a separation result in the number of training steps with standard (bounded) learning rates on a common sample distribution: if the data distribution is a mixture of sparse and dense inputs, there exists a regime in which a 2-layer ReLU neural network trained by a curriculum noisy-GD (or SGD) algorithm that uses sparse examples first, can learn parities of sufficiently large degree, while any fully connected neural network of possibly larger width or depth trained by noisy-GD on the unordered samples cannot learn without additional steps. We also provide experimental results supporting the qualitative separation beyond the specific regime of the theoretical results.", "keywords": "curriculum learning;parities;time complexity;sample complexity;neural networks;SGD", "primary_area": "", "supplementary_material": "", "author": "Emmanuel Abbe;Elisabetta Cornacchia;Aryo Lotfi", "authorids": "~Emmanuel_Abbe1;~Elisabetta_Cornacchia1;~Aryo_Lotfi1", "gender": ";F;M", "homepage": ";https://sites.google.com/view/e-cornacchia/home;https://aryol.github.io", "dblp": "84/5016;267/1973;284/8194", "google_scholar": ";UpIOkvgAAAAJ;M7_iyAgAAAAJ", "orcid": ";;", "linkedin": ";;aryo/", "or_profile": "~Emmanuel_Abbe1;~Elisabetta_Cornacchia1;~Aryo_Lotfi1", "aff": "Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch;epfl.ch", "position": "Full Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nabbe2023provable,\ntitle={Provable Advantage of Curriculum Learning on Parity Targets with Mixed Inputs},\nauthor={Emmanuel Abbe and Elisabetta Cornacchia and Aryo Lotfi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9Ihu0VBOTq}\n}", "github": "", "project": "", "reviewers": "3jFh;8Apd;Sb2P;RLGW", "pdf_size": 791372, "rating": "6;6;6;6", "confidence": "4;3;1;5", "soundness": "3;3;3;3", "novelty": "3;3;3;2", "presentation": "3;2;3;4", "wc_summary": "201;66;43;137", "wc_strengths": "109;71;35;119", "wc_weaknesses": "193;757;3;138", "wc_questions": "179;196;149;46", "wc_limitations": "4;28;1;25", "wc_review": "686;1118;231;465", "wc_reply_reviewers": "66;27;24;36", "wc_reply_authors": "42;10;9;10", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 1.479019945774904 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 111.75, 62.0941824972356 ], "wc_strengths_avg": [ 83.5, 33.237779709240506 ], "wc_weaknesses_avg": [ 272.75, 288.00206162456544 ], "wc_questions_avg": [ 142.5, 58.200085910589515 ], "wc_limitations_avg": [ 14.5, 12.093386622447824 ], "wc_review_avg": [ 625.0, 326.9579483664528 ], "wc_reply_reviewers_avg": [ 38.25, 16.618889854620253 ], "wc_reply_authors_avg": [ 17.75, 14.00669482783144 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=778912232349116625&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "epfl.ch;epfl.ch;epfl.ch", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "The Behavior and Convergence of Local Bayesian Optimization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72604", "id": "9KtX12YmA7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e8f4eae0a41cab67fdead3aa6b77f083-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9KtX12YmA7", "openreview": "https://openreview.net/forum?id=9KtX12YmA7", "poster": "/media/PosterPDFs/NeurIPS%202023/72604.png?t=1701992462.158619", "slides": "https://nips.cc/virtual/2023/poster/72604", "video": "https://nips.cc/virtual/2023/poster/72604", "author_site": "Kaiwen Wu, Kyurae Kim, Roman Garnett, Jacob Gardner", "tldr": "", "abstract": "A recent development in Bayesian optimization is the use of local optimization strategies, which can deliver strong empirical performance on high-dimensional problems compared to traditional global strategies. The \"folk wisdom\" in the literature is that the focus on local optimization sidesteps the curse of dimensionality; however, little is known concretely about the expected behavior or convergence of Bayesian local optimization routines. We first study the behavior of the local approach, and find that the statistics of individual local solutions of Gaussian process sample paths are surprisingly good compared to what we would expect to recover from global methods. We then present the first rigorous analysis of such a Bayesian local optimization algorithm recently proposed by M\u00fcller et al. (2021), and derive convergence rates in both the noisy and noiseless settings.", "keywords": "Bayesian optimization;convergence rates", "primary_area": "", "supplementary_material": "/attachment/7b7c1993532f068c4d662e5dee8ab52ee8a26910.pdf", "author": "Kaiwen Wu;Kyurae Kim;Roman Garnett;Jacob R. Gardner", "authorids": "~Kaiwen_Wu2;~Kyurae_Kim1;~Roman_Garnett1;~Jacob_R._Gardner1", "gender": ";;M;", "homepage": ";;http://www.cse.wustl.edu/~garnett/;", "dblp": ";;29/7058;", "google_scholar": ";;CUkAtC4AAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Kaiwen_Wu2;~Kyurae_Kim1;~Roman_Garnett1;~Jacob_R._Gardner1", "aff": ";;Uber;", "aff_domain": ";;uber.com;", "position": ";;Applied Scientist;", "bibtex": "@inproceedings{\nwu2023the,\ntitle={The Behavior and Convergence of Local Bayesian Optimization},\nauthor={Kaiwen Wu and Kyurae Kim and Roman Garnett and Jacob R. Gardner},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9KtX12YmA7}\n}", "github": "", "project": "", "reviewers": "kgRX;pxRe;GVja;aL9q", "pdf_size": 753014, "rating": "6;8;8;8", "confidence": "3;4;4;3", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "3;3;4;3", "wc_summary": "73;76;68;120", "wc_strengths": "111;47;44;102", "wc_weaknesses": "174;75;6;100", "wc_questions": "33;42;259;113", "wc_limitations": "19;17;100;20", "wc_review": "410;257;477;455", "wc_reply_reviewers": "51;12;16;78", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.25, 20.837166314064877 ], "wc_strengths_avg": [ 76.0, 30.683871985132516 ], "wc_weaknesses_avg": [ 88.75, 60.06402833643445 ], "wc_questions_avg": [ 111.75, 90.48584143389506 ], "wc_limitations_avg": [ 39.0, 35.234925854895735 ], "wc_review_avg": [ 399.75, 85.88182287306202 ], "wc_reply_reviewers_avg": [ 39.25, 27.03123193641015 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10963437393746871069&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 9, "email": ";;uber.com;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Uber Technologies Inc.", "aff_unique_dep": "", "aff_unique_url": "https://www.uber.com", "aff_unique_abbr": "Uber", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Optimal Transport-Guided Conditional Score-Based Diffusion Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72603", "id": "9Muli2zoFn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/72c12e48c6135762f56bf188cd2479d2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9Muli2zoFn", "openreview": "https://openreview.net/forum?id=9Muli2zoFn", "poster": "/media/PosterPDFs/NeurIPS%202023/72603.png?t=1699165504.8073568", "slides": "https://nips.cc/virtual/2023/poster/72603", "video": "https://nips.cc/virtual/2023/poster/72603", "author_site": "Xiang Gu, Liwei Yang, Jian Sun, Zongben Xu", "tldr": "", "abstract": "Conditional score-based diffusion model (SBDM) is for conditional generation of target data with paired data as condition, and has achieved great success in image translation. However, it requires the paired data as condition, and there would be insufficient paired data provided in real-world applications. To tackle the applications with partially paired or even unpaired dataset, we propose a novel Optimal Transport-guided Conditional Score-based diffusion model (OTCS) in this paper. We build the coupling relationship for the unpaired or partially paired dataset based on $L_2$-regularized unsupervised or semi-supervised optimal transport, respectively. Based on the coupling relationship, we develop the objective for training the conditional score-based model for unpaired or partially paired settings, which is based on a reformulation and generalization of the conditional SBDM for paired setting. With the estimated coupling relationship, we effectively train the conditional score-based model by designing a ``resampling-by-compatibility'' strategy to choose the sampled data with high compatibility as guidance. Extensive experiments on unpaired super-resolution and semi-paired image-to-image translation demonstrated the effectiveness of the proposed OTCS model. From the viewpoint of optimal transport, OTCS provides an approach to transport data across distributions, which is a challenge for OT on large-scale datasets. We theoretically prove that OTCS realizes the data transport in OT with a theoretical bound.", "keywords": "optimal transport;diffusion probabilistic model;conditional score-based model;unpaired super-resolution;image-to-image translation", "primary_area": "", "supplementary_material": "/attachment/e1c67a8a705ee783402bd12ab838a177fe64b9a2.pdf", "author": "Xiang Gu;Liwei Yang;Jian Sun;Zongben Xu", "authorids": "~Xiang_Gu1;~Liwei_Yang1;~Jian_Sun1;~Zongben_Xu1", "gender": "M;M;M;M", "homepage": "https://xjtu-xgu.github.io/xianggu/;;https://gr.xjtu.edu.cn/en/web/jiansun/publications;https://gr.xjtu.edu.cn/web/zbxu", "dblp": "57/7710-5;71/3668;68/4942-9.html;", "google_scholar": "51GDv0EAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;SSgNWOMAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xiang_Gu1;~Liwei_Yang1;~Jian_Sun1;~Zongben_Xu1", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University", "aff_domain": "xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn", "position": "PhD student;PhD student;Professor;Full Professor", "bibtex": "@inproceedings{\ngu2023optimal,\ntitle={Optimal Transport-Guided Conditional Score-Based Diffusion Model},\nauthor={Xiang Gu and Liwei Yang and Jian Sun and Zongben Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9Muli2zoFn}\n}", "github": "", "project": "", "reviewers": "H2CC;Tt9x;w9nv;63Ma", "pdf_size": 18469317, "rating": "6;7;7;8", "confidence": "3;4;4;4", "soundness": "3;4;3;4", "novelty": "3;4;3;3", "presentation": "3;3;3;3", "wc_summary": "60;76;81;107", "wc_strengths": "165;160;86;55", "wc_weaknesses": "105;50;64;295", "wc_questions": "21;225;85;11", "wc_limitations": "33;69;39;8", "wc_review": "384;580;355;476", "wc_reply_reviewers": "39;14;31;56", "wc_reply_authors": "17;12;19;33", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.0, 16.896745248715803 ], "wc_strengths_avg": [ 116.5, 47.32071428032337 ], "wc_weaknesses_avg": [ 128.5, 98.23059604827816 ], "wc_questions_avg": [ 85.5, 85.39759949787816 ], "wc_limitations_avg": [ 37.25, 21.706853756359994 ], "wc_review_avg": [ 448.75, 87.96412621063203 ], "wc_reply_reviewers_avg": [ 35.0, 15.116216457830975 ], "wc_reply_authors_avg": [ 20.25, 7.790218225441442 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10422163595811837753&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Xi'an Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.xjtu.edu.cn", "aff_unique_abbr": "XJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Cross-modal Prompts: Adapting Large Pre-trained Models for Audio-Visual Downstream Tasks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72602", "id": "9MwidIH4ea", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/af01716e08073368a7c8a62be46dba17-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9MwidIH4ea", "openreview": "https://openreview.net/forum?id=9MwidIH4ea", "poster": "/media/PosterPDFs/NeurIPS%202023/72602.png?t=1699521476.3931298", "slides": "https://nips.cc/virtual/2023/poster/72602", "video": "https://nips.cc/virtual/2023/poster/72602", "author_site": "Haoyi Duan, Yan Xia, Zhou Mingze, Li Tang, Jieming Zhu, Zhou Zhao", "tldr": "", "abstract": "In recent years, the deployment of large-scale pre-trained models in audio-visual downstream tasks has yielded remarkable outcomes. However, these models, primarily trained on single-modality unconstrained datasets, still encounter challenges in feature extraction for multi-modal tasks, leading to suboptimal performance. This limitation arises due to the introduction of irrelevant modality-specific information during encoding, which adversely affects the performance of downstream tasks. To address this challenge, this paper proposes a novel Dual-Guided Spatial-Channel-Temporal (DG-SCT) attention mechanism. This mechanism leverages audio and visual modalities as soft prompts to dynamically adjust the parameters of pre-trained models based on the current multi-modal input features. Specifically, the DG-SCT module incorporates trainable cross-modal interaction layers into pre-trained audio-visual encoders, allowing adaptive extraction of crucial information from the current modality across spatial, channel, and temporal dimensions, while preserving the frozen parameters of large-scale pre-trained models. Experimental evaluations demonstrate that our proposed model achieves state-of-the-art results across multiple downstream tasks, including AVE, AVVP, AVS, and AVQA. Furthermore, our model exhibits promising performance in challenging few-shot and zero-shot scenarios. The source code and pre-trained models are available at https://github.com/haoyi-duan/DG-SCT.", "keywords": "audio-visual;multi-modal prompt;clip;cross-modal attention", "primary_area": "", "supplementary_material": "", "author": "Haoyi Duan;Yan Xia;Mingze Zhou;Li Tang;Jieming Zhu;Zhou Zhao", "authorids": "~Haoyi_Duan1;~Yan_Xia4;~Mingze_Zhou1;~Li_Tang3;~Jieming_Zhu2;~Zhou_Zhao2", "gender": "M;M;M;M;M;M", "homepage": "https://haoyi-duan.github.io;https://github.com/marmot-xy;https://github.com/Themoonlightoflibai;;https://jiemingzhu.github.io/;https://dblp.uni-trier.de/pid/75/7785.html?", "dblp": "344/4292;17/6518-6;360/5349;;10/2717;75/7785", "google_scholar": "zGkZW-UAAAAJ;6kEbV3IAAAAJ;;https://scholar.google.com.hk/citations?hl=en;oNKerP8AAAAJ;https://scholar.google.com.hk/citations?user=IIoFY90AAAAJ", "orcid": "0000-0002-5551-2594;0000-0003-4631-741X;;;0000-0002-5666-8320;0000-0001-6121-0384", "linkedin": "haoyi-duan-a07576291/;;;;;", "or_profile": "~Haoyi_Duan1;~Yan_Xia4;~Mingze_Zhou1;~Li_Tang3;~Jieming_Zhu2;~Zhou_Zhao2", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Huawei Noah's Ark Lab;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;huawei.com;zju.edu.cn", "position": "Undergrad student;PhD student;Undergrad student;Undergrad student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nduan2023crossmodal,\ntitle={Cross-modal Prompts: Adapting Large Pre-trained Models for Audio-Visual Downstream Tasks},\nauthor={Haoyi Duan and Yan Xia and Mingze Zhou and Li Tang and Jieming Zhu and Zhou Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9MwidIH4ea}\n}", "github": "", "project": "", "reviewers": "myp2;TYiB;VeZU;BzrV", "pdf_size": 3327623, "rating": "4;6;6;6", "confidence": "5;4;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "28;37;91;45", "wc_strengths": "30;29;92;53", "wc_weaknesses": "144;154;7;171", "wc_questions": "125;53;204;18", "wc_limitations": "8;7;25;21", "wc_review": "335;280;419;308", "wc_reply_reviewers": "167;26;0;863", "wc_reply_authors": "1040;21;0;751", "reply_reviewers": "2;1;0;2", "reply_authors": "3;2;1;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 50.25, 24.283482040267618 ], "wc_strengths_avg": [ 51.0, 25.544079548889602 ], "wc_weaknesses_avg": [ 119.0, 65.37966044573801 ], "wc_questions_avg": [ 100.0, 71.36876067300034 ], "wc_limitations_avg": [ 15.25, 7.8859051477937525 ], "wc_review_avg": [ 335.5, 51.98317035349037 ], "wc_reply_reviewers_avg": [ 264.0, 351.62124509193126 ], "wc_reply_authors_avg": [ 453.0, 454.20424920953786 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11122846919269458934&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;huawei.com;zju.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Zhejiang University;Huawei", "aff_unique_dep": ";Noah's Ark Lab", "aff_unique_url": "https://www.zju.edu.cn;https://www.huawei.com", "aff_unique_abbr": "ZJU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "9NzC3PjpAt", "title": "Make You Better: Reinforcement Learning from Human Gain", "track": "main", "status": "Reject", "tldr": "", "abstract": "In human-agent collaboration tasks, it is essential to explore ways for developing assistive agents that can improve humans' performance in achieving their goals. In this paper, we propose the Reinforcement Learning from Human Gain (RLHG) approach, designed to effectively enhance human goal-achievement abilities in collaborative tasks with known human goals. Firstly, the RLHG method trains a value network to estimate primitive human performance in achieving goals. Subsequently, the RLHG method trains a gain network to estimate the positive gain of human performance in achieving goals when subjected to effective enhancement, in comparison to the primitive performance. The positive gains are used for guiding the agent to learn effective enhancement behaviors. Distinct from directly integrating human goal rewards into optimization objectives, the RLHG method largely mitigates the human-agent credit assignment issues encountered by agents in learning to enhance humans. We evaluate the RLHG agent in the widely popular Multi-player Online Battle Arena (MOBA) game, Honor of Kings, by conducting experiments in both simulated environments and real-world human-agent tests. Experimental results demonstrate that the RLHG agent effectively improves the goal-achievement performance of participants across varying levels.", "keywords": "deep reinforcement learning;human enhancement;human-agent collaboration;game playing", "primary_area": "", "supplementary_material": "/attachment/dd74f742dcae4951b48c528fce516a0b8829072e.pdf", "author": "Yiming Gao;Feiyu Liu;Liang Wang;Weixuan Wang;Wenjin Yang;Zhenjie Lian;Siqin Li;Xianliang Wang;Xianhan Zeng;Wenhui Chen;Jing Dai;QIANG FU;Yang Wei;Lanxiao Huang;Wei Liu", "authorids": "~Yiming_Gao4;~Feiyu_Liu1;~Liang_Wang10;~Weixuan_Wang1;whitjimyang@tencent.com;~Zhenjie_Lian1;~Siqin_Li1;~Xianliang_Wang1;~Xianhan_Zeng1;wenhuichen@tencent.com;jingthudai@tencent.com;~QIANG_FU8;~Yang_Wei2;~Lanxiao_Huang1;~Wei_Liu3", "gender": "M;;M;M;;;;M;M;;;M;M;M;M", "homepage": ";;;;;;;;;;;;;;https://sites.google.com/view/cuweiliu", "dblp": "304/8689;https://dblp.uni-trier.de/pid/269/4504.html;56/4499.html;;;279/6569.html;274/6530.html;03/1094.html;241/9695.html;;;;03/1094-32.html;255/6012.html;49/3283-5", "google_scholar": "https://scholar.google.com.hk/citations?user=UvvufgQAAAAJ;;;https://scholar.google.com.hk/citations?user=VN4m3l8AAAAJ;;;;;;;;gANaxT0AAAAJ;;;AjxoEpIAAAAJ", "orcid": ";;;;;;;;;;;;;;0000-0002-3865-8145", "linkedin": ";;;;;;;;;;;;;;", "or_profile": "~Yiming_Gao4;~Feiyu_Liu1;~Liang_Wang10;~Weixuan_Wang1;whitjimyang@tencent.com;~Zhenjie_Lian1;~Siqin_Li1;~Xianliang_Wang1;~Xianhan_Zeng1;wenhuichen@tencent.com;jingthudai@tencent.com;~QIANG_FU8;~Yang_Wei2;~Lanxiao_Huang1;~Wei_Liu3", "aff": "Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;;;Tencent AI Lab;Tencent AI Lab;Tencent TiMi L1 Studio;Tencent", "aff_domain": "tencent.com;tencent.com;tencent.com;tencent.com;;tencent.com;tencent.com;tencent.com;tencent.com;;;tencent.com;tencent.com;tencent.com;tencent.com", "position": "Researcher;Researcher;Researcher;Researcher;;Researcher;Researcher;Researcher;Researcher;;;Principal Researcher;Researcher;Researcher;Distinguished Scientist", "bibtex": "@misc{\ngao2023make,\ntitle={Make You Better: Reinforcement Learning from Human Gain},\nauthor={Yiming Gao and Feiyu Liu and Liang Wang and Weixuan Wang and Wenjin Yang and Zhenjie Lian and Siqin Li and Xianliang Wang and Xianhan Zeng and Wenhui Chen and Jing Dai and QIANG FU and Yang Wei and Lanxiao Huang and Wei Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=9NzC3PjpAt}\n}", "github": "", "project": "", "reviewers": "HuXj;5Pqf;3jnU;11JW", "site": "https://openreview.net/forum?id=9NzC3PjpAt", "pdf_size": 5006534, "rating": "3;4;6;6", "confidence": "4;4;4;3", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "3;1;3;3", "wc_summary": "56;56;127;116", "wc_strengths": "39;30;109;44", "wc_weaknesses": "126;200;119;266", "wc_questions": "65;284;75;2", "wc_limitations": "78;1;27;7", "wc_review": "364;571;457;435", "wc_reply_reviewers": "65;0;37;29", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 88.75, 32.98010764081888 ], "wc_strengths_avg": [ 55.5, 31.2929704566377 ], "wc_weaknesses_avg": [ 177.75, 60.02655662288151 ], "wc_questions_avg": [ 106.5, 106.23205730851681 ], "wc_limitations_avg": [ 28.25, 30.293357357678268 ], "wc_review_avg": [ 456.75, 74.37867637972593 ], "wc_reply_reviewers_avg": [ 32.75, 23.155722834755128 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hx4PS8CsSNYJ:scholar.google.com/&scioq=Make+You+Better:+Reinforcement+Learning+from+Human+Gain&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Tencent", "aff_unique_dep": "Tencent AI Lab", "aff_unique_url": "https://ai.tencent.com", "aff_unique_abbr": "Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Loss Decoupling for Task-Agnostic Continual Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72601", "id": "9Oi3YxIBSa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/249f73e01f0a2bb6c8d971b565f159a7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9Oi3YxIBSa", "openreview": "https://openreview.net/forum?id=9Oi3YxIBSa", "poster": "/media/PosterPDFs/NeurIPS%202023/72601.png?t=1701938854.6707325", "slides": "https://nips.cc/virtual/2023/poster/72601", "video": "https://nips.cc/virtual/2023/poster/72601", "author_site": "Yan-Shuo Liang, Wu-Jun Li", "tldr": "", "abstract": "Continual learning requires the model to learn multiple tasks in a sequential order. To perform continual learning, the model must possess the abilities to maintain performance on old tasks (stability) and adapt itself to learn new tasks (plasticity). Task-agnostic problem in continual learning is a challenging problem, in which task identities are not available in the inference stage and hence the model must learn to distinguish all the classes in all the tasks. In task-agnostic problem, the model needs to learn two new objectives for learning a new task, including distinguishing new classes from old classes and distinguishing between different new classes. For task-agnostic problem, replay-based methods are commonly used. These methods update the model with both saved old samples and new samples for continual learning. Most existing replay-based methods mix the two objectives in task-agnostic problem together, inhibiting the models from achieving a good trade-off between stability and plasticity. In this paper, we propose a simple yet effective method, called loss decoupling (LODE), for task-agnostic continual learning. LODE separates the two objectives for the new task by decoupling the loss of the new task. As a result, LODE can assign different weights for different objectives, which provides a way to obtain a better trade-off between stability and plasticity than those methods with coupled loss. Experiments show that LODE can outperform existing state-of-the-art replay-based methods on multiple continual learning datasets.", "keywords": "Continual Learning;stability;plasticity", "primary_area": "", "supplementary_material": "", "author": "Yan-Shuo Liang;Wu-Jun Li", "authorids": "~Yan-Shuo_Liang1;~Wu-Jun_Li1", "gender": "M;M", "homepage": "https://liangyanshuo.github.io/;https://cs.nju.edu.cn/lwj/", "dblp": "329/6195;26/988.html", "google_scholar": ";NCCdqdcAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Yan-Shuo_Liang1;~Wu-Jun_Li1", "aff": "Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nliang2023loss,\ntitle={Loss Decoupling for Task-Agnostic Continual Learning},\nauthor={Yan-Shuo Liang and Wu-Jun Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9Oi3YxIBSa}\n}", "github": "", "project": "", "reviewers": "n7mC;Y5HQ;Tdag;odRp", "pdf_size": 900519, "rating": "5;5;6;7", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "104;73;82;114", "wc_strengths": "149;19;50;83", "wc_weaknesses": "276;115;58;34", "wc_questions": "127;48;49;175", "wc_limitations": "1;1;1;18", "wc_review": "657;256;240;424", "wc_reply_reviewers": "21;46;22;12", "wc_reply_authors": "32;81;32;32", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 93.25, 16.452583383772897 ], "wc_strengths_avg": [ 75.25, 48.22019805019469 ], "wc_weaknesses_avg": [ 120.75, 94.33815505933958 ], "wc_questions_avg": [ 99.75, 53.9878458544143 ], "wc_limitations_avg": [ 5.25, 7.361215932167728 ], "wc_review_avg": [ 394.25, 167.9499553438464 ], "wc_reply_reviewers_avg": [ 25.25, 12.597122687344122 ], "wc_reply_authors_avg": [ 44.25, 21.21762239271875 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8755443643816190603&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 4, "email": "nju.edu.cn;nju.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Incentives in Federated Learning: Equilibria, Dynamics, and Mechanisms for Welfare Maximization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72600", "id": "9OqezkNxnX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/39b77b5e422b4e070e2811b73ea9bcf7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9OqezkNxnX", "openreview": "https://openreview.net/forum?id=9OqezkNxnX", "poster": "/media/PosterPDFs/NeurIPS%202023/72600.png?t=1702332297.7680008", "slides": "https://nips.cc/virtual/2023/poster/72600", "video": "https://nips.cc/virtual/2023/poster/72600", "author_site": "Aniket Murhekar, Zhuowen Yuan, Bhaskar Ray Chaudhury, Bo Li, Ruta Mehta", "tldr": "", "abstract": "Federated learning (FL) has emerged as a powerful scheme to facilitate the collaborative learning of models amongst a set of agents holding their own private data. Although the agents benefit from the global model trained on shared data, by participating in federated learning, they may also incur costs (related to privacy and communication) due to data sharing. In this paper, we model a collaborative FL framework, where every agent attempts to achieve an optimal trade-off between her learning payoff and data sharing cost. We show the existence of Nash equilibrium (NE) under mild assumptions on agents' payoff and costs. Furthermore, we show that agents can discover the NE via best response dynamics. However, some of the NE may be bad in terms of overall welfare for the agents, implying little incentive for some fraction of the agents to participate in the learning. To remedy this, we design a budget-balanced mechanism involving payments to the agents, that ensures that any $p$-mean welfare function of the agents' utilities is maximized at NE. In addition, we introduce a FL protocol FedBR-BG that incorporates our budget-balanced mechanism, utilizing best response dynamics. Our empirical validation on MNIST and CIFAR-10 substantiates our theoretical analysis. We show that FedBR-BG outperforms the basic best-response-based protocol without additional incentivization, the standard federated learning protocol FedAvg, as well as a recent baseline MWFed in terms of achieving superior $p$-mean welfare.", "keywords": "Federated learning;Nash equilibrium;Mechanism design;Welfare maximization", "primary_area": "", "supplementary_material": "/attachment/18d8427c9356e34c7f946d2e4eb82f1b26a5a46e.zip", "author": "Aniket Murhekar;Zhuowen Yuan;Bhaskar Ray Chaudhury;Bo Li;Ruta Mehta", "authorids": "~Aniket_Murhekar1;~Zhuowen_Yuan1;~Bhaskar_Ray_Chaudhury1;~Bo_Li19;~Ruta_Mehta2", "gender": ";M;M;F;F", "homepage": "https://aniket2.web.illinois.edu/;;https://www.bhaskar-ray-chaudhury.com/;http://boli.cs.illinois.edu/;http://rutamehta.cs.illinois.edu/", "dblp": ";304/3576;228/6594.html;50/3402-26;50/7864", "google_scholar": "bGOsYz4AAAAJ;F-r0bYQAAAAJ;-p5GvgcAAAAJ;K8vJkTcAAAAJ;", "orcid": ";;;;", "linkedin": "aniket-murhekar-99381016b/;;;;", "or_profile": "~Aniket_Murhekar1;~Zhuowen_Yuan1;~Bhaskar_Ray_Chaudhury1;~Bo_Li19;~Ruta_Mehta2", "aff": "University of Illinois, Urbana-Champaign;University of Illinois Urbana-Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illinois.edu;illinois.edu;illinois.edu;illinois.edu", "position": "PhD student;PhD student;Postdoc;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nmurhekar2023incentives,\ntitle={Incentives in Federated Learning: Equilibria, Dynamics, and Mechanisms for Welfare Maximization},\nauthor={Aniket Murhekar and Zhuowen Yuan and Bhaskar Ray Chaudhury and Bo Li and Ruta Mehta},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9OqezkNxnX}\n}", "github": "", "project": "", "reviewers": "VcZd;NtpC;t7td;KoDx", "pdf_size": 419596, "rating": "5;5;5;6", "confidence": "3;3;4;5", "soundness": "3;4;3;3", "novelty": "2;3;2;4", "presentation": "3;4;3;3", "wc_summary": "90;102;32;55", "wc_strengths": "40;114;12;77", "wc_weaknesses": "213;92;110;235", "wc_questions": "29;74;15;143", "wc_limitations": "1;1;2;38", "wc_review": "373;383;171;548", "wc_reply_reviewers": "44;11;37;24", "wc_reply_authors": "187;0;13;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 69.75, 27.806249297594956 ], "wc_strengths_avg": [ 60.75, 38.42769183804825 ], "wc_weaknesses_avg": [ 162.5, 62.315728351676995 ], "wc_questions_avg": [ 65.25, 49.90177852541931 ], "wc_limitations_avg": [ 10.5, 15.88238017426859 ], "wc_review_avg": [ 368.75, 133.65697699708758 ], "wc_reply_reviewers_avg": [ 29.0, 12.62933094031509 ], "wc_reply_authors_avg": [ 50.0, 79.27483837889548 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2483924950337838717&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "illinois.edu;illinois.edu;illinois.edu;illinois.edu;illinois.edu", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "University of Illinois;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://illinois.edu", "aff_unique_abbr": "UIUC;UIUC", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Robust Learning with Progressive Data Expansion Against Spurious Correlation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72599", "id": "9QEVJ9qm46", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0506ad3d1bcc8398a920db9340f27fe4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9QEVJ9qm46", "openreview": "https://openreview.net/forum?id=9QEVJ9qm46", "poster": "/media/PosterPDFs/NeurIPS%202023/72599.png?t=1701934282.5131266", "slides": "https://nips.cc/virtual/2023/poster/72599", "video": "https://nips.cc/virtual/2023/poster/72599", "author_site": "Yihe Deng, Yu Yang, Baharan Mirzasoleiman, Quanquan Gu", "tldr": "", "abstract": "While deep learning models have shown remarkable performance in various tasks, they are susceptible to learning non-generalizable _spurious features_ rather than the core features that are genuinely correlated to the true label. In this paper, beyond existing analyses of linear models, we theoretically examine the learning process of a two-layer nonlinear convolutional neural network in the presence of spurious features. Our analysis suggests that imbalanced data groups and easily learnable spurious features can lead to the dominance of spurious features during the learning process. In light of this, we propose a new training algorithm called **PDE** that efficiently enhances the model's robustness for a better worst-group performance. PDE begins with a group-balanced subset of training data and progressively expands it to facilitate the learning of the core features. Experiments on synthetic and real-world benchmark datasets confirm the superior performance of our method on models such as ResNets and Transformers. On average, our method achieves a $2.8$ \\% improvement in worst-group accuracy compared with the state-of-the-art method, while enjoying up to $10\\times$ faster training efficiency.", "keywords": "spurious correlation;robustness;robust learning", "primary_area": "", "supplementary_material": "/attachment/284a7fe1318eefec4d1110bb5790a8604cf2a425.pdf", "author": "Yihe Deng;Yu Yang;Baharan Mirzasoleiman;Quanquan Gu", "authorids": "~Yihe_Deng1;~Yu_Yang4;~Baharan_Mirzasoleiman1;~Quanquan_Gu1", "gender": "F;F;F;M", "homepage": ";https://sites.google.com/view/yuyang0901/home;http://web.cs.ucla.edu/~baharan/;http://web.cs.ucla.edu/~qgu/", "dblp": "230/8011;16/4505-7;52/10075;50/4597", "google_scholar": "7Lix1poAAAAJ;KK6Yj4IAAAAJ;x63j7HEAAAAJ;GU9HgNAAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yihe_Deng1;~Yu_Yang4;~Baharan_Mirzasoleiman1;~Quanquan_Gu1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu;ucla.edu;cs.ucla.edu", "position": "PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\ndeng2023robust,\ntitle={Robust Learning with Progressive Data Expansion Against Spurious Correlation},\nauthor={Yihe Deng and Yu Yang and Baharan Mirzasoleiman and Quanquan Gu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9QEVJ9qm46}\n}", "github": "", "project": "", "reviewers": "vLux;YdZz;uURe;6r1Q", "pdf_size": 3784049, "rating": "4;5;6;8", "confidence": "5;4;4;3", "soundness": "2;3;3;4", "novelty": "2;2;3;4", "presentation": "3;2;3;3", "wc_summary": "138;97;92;66", "wc_strengths": "99;68;34;78", "wc_weaknesses": "279;45;481;78", "wc_questions": "4;51;96;79", "wc_limitations": "1;30;42;52", "wc_review": "521;291;745;353", "wc_reply_reviewers": "45;149;28;37", "wc_reply_authors": "40;50;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 98.25, 25.791229129298976 ], "wc_strengths_avg": [ 69.75, 23.47738273317535 ], "wc_weaknesses_avg": [ 220.75, 174.92051766445238 ], "wc_questions_avg": [ 57.5, 34.81738071710737 ], "wc_limitations_avg": [ 31.25, 19.122957407263137 ], "wc_review_avg": [ 477.5, 175.87708776301704 ], "wc_reply_reviewers_avg": [ 64.75, 49.01211584904288 ], "wc_reply_authors_avg": [ 22.5, 22.776083947860748 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9561828874675149, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13932750330092410537&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "ucla.edu;ucla.edu;ucla.edu;cs.ucla.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Test-time Training for Matching-based Video Object Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72598", "id": "9QsdPQlWiE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4267d84ca2f6fbb4aa5172b76b433aca-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9QsdPQlWiE", "openreview": "https://openreview.net/forum?id=9QsdPQlWiE", "poster": "/media/PosterPDFs/NeurIPS%202023/72598.png?t=1702114419.8358524", "slides": "https://nips.cc/virtual/2023/poster/72598", "video": "https://nips.cc/virtual/2023/poster/72598", "author_site": "Juliette Bertrand, Giorgos Kordopatis Zilos, Yannis Kalantidis, Giorgos Tolias", "tldr": "", "abstract": "The video object segmentation (VOS) task involves the segmentation of an object over time based on a single initial mask. Current state-of-the-art approaches use a memory of previously processed frames and rely on matching to estimate segmentation masks of subsequent frames. Lacking any adaptation mechanism, such methods are prone to test-time distribution shifts. This work focuses on matching-based VOS under distribution shifts such as video corruptions, stylization, and sim-to-real transfer. We explore test-time training strategies that are agnostic to the specific task as well as strategies that are designed specifically for VOS. This includes a variant based on mask cycle consistency tailored to matching-based VOS methods. The experimental results on common benchmarks demonstrate that the proposed test-time training yields significant improvements in performance. In particular for the sim-to-real scenario and despite using only a single test video, our approach manages to recover a substantial portion of the performance gain achieved through training on real videos. Additionally, we introduce DAVIS-C, an augmented version of the popular DAVIS test set, featuring extreme distribution shifts like image-/video-level corruptions and stylizations. Our results illustrate that test-time training enhances performance even in these challenging cases.", "keywords": "VOS;video object segmentation;test-time training;test-time adaptation", "primary_area": "", "supplementary_material": "/attachment/85b76fefab8f1ef00e9e1e8ac23167c41913ec94.zip", "author": "Juliette Bertrand;Giorgos Kordopatis-Zilos;Yannis Kalantidis;Giorgos Tolias", "authorids": "~Juliette_Bertrand1;~Giorgos_Kordopatis-Zilos1;~Yannis_Kalantidis2;~Giorgos_Tolias1", "gender": "F;M;M;", "homepage": ";https://gkordo.github.io/;https://www.skamalas.com/;http://cmp.felk.cvut.cz/~toliageo/index.html", "dblp": "344/1682;138/0862;33/8693;09/4652", "google_scholar": ";https://scholar.google.gr/citations?user=Do-7qx4AAAAJ;QJZQgN8AAAAJ;https://scholar.google.gr/citations?user=e765N80AAAAJ", "orcid": ";0000-0003-2297-4802;;0000-0002-9570-3870", "linkedin": "juliette-bertrand-164a0b4a/;giorgos-kordopatis-zilos-a41243216/;;", "or_profile": "~Juliette_Bertrand1;~Giorgos_Kordopatis-Zilos1;~Yannis_Kalantidis2;~Giorgos_Tolias1", "aff": ";Czech Technical University in Prague;Naver Labs Europe;Czech Technical University in Prague", "aff_domain": ";fel.cvut.cz;naverlabs.com;cvut.cz", "position": ";Postdoc;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nbertrand2023testtime,\ntitle={Test-time Training for Matching-based Video Object Segmentation},\nauthor={Juliette Bertrand and Giorgos Kordopatis-Zilos and Yannis Kalantidis and Giorgos Tolias},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9QsdPQlWiE}\n}", "github": "", "project": "", "reviewers": "hK29;3wfH;dPR9;iPsr;jwwD", "pdf_size": 8402282, "rating": "4;4;5;6;7", "confidence": "3;4;4;5;5", "soundness": "3;3;3;3;3", "novelty": "3;2;2;3;3", "presentation": "3;2;2;4;3", "wc_summary": "82;56;68;61;64", "wc_strengths": "22;48;37;82;153", "wc_weaknesses": "123;168;178;8;95", "wc_questions": "3;6;2;233;42", "wc_limitations": "3;26;19;1;40", "wc_review": "233;304;304;385;394", "wc_reply_reviewers": "0;56;187;58;103", "wc_reply_authors": "0;225;270;31;171", "reply_reviewers": "0;1;2;1;2", "reply_authors": "1;2;3;2;2", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 66.2, 8.818163074019441 ], "wc_strengths_avg": [ 68.4, 46.68447279342458 ], "wc_weaknesses_avg": [ 114.4, 61.13787696673806 ], "wc_questions_avg": [ 57.2, 89.15469701591722 ], "wc_limitations_avg": [ 17.8, 14.579437574886077 ], "wc_review_avg": [ 324.0, 59.50126049085011 ], "wc_reply_reviewers_avg": [ 80.8, 62.345489010833816 ], "wc_reply_authors_avg": [ 139.4, 106.36277544329124 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8708635721768008, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18336372923023602433&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";fel.cvut.cz;naverlabs.com;cvut.cz", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Czech Technical University;NAVER LABS", "aff_unique_dep": ";", "aff_unique_url": "https://www.ctu.cz;https://labs.naver.com", "aff_unique_abbr": "CTU;NLE", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Prague;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Czech Republic;Unknown" }, { "id": "9RUblEXVVD", "title": "OTOv3: Towards Automatic Sub-Network Search Within General Super Deep Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Existing neural architecture search (NAS) methods typically rely on pre-specified super deep neural networks (super-networks) with handcrafted search spaces beforehand. Such requirements make it challenging to extend them onto general scenarios without significant human expertise and manual intervention. To overcome the limitations, we propose the third generation of Only-Train-Once (OTOv3). OTOv3 is perhaps the first automated system that trains general super-networks and produces high-performing sub-networks in the one shot manner without pretraining and fine-tuning. Technologically, OTOv3 delivers three noticeable contributions to minimize human efforts: (i) automatic search space construction for general super-networks; (ii) a Hierarchical Half-Space Projected Gradient (H2SPG) that leverages the dependency graph to ensure the network validity during optimization and reliably produces a solution with both high performance and hierarchical group sparsity; and (iii) automatic sub-network construction based on the super-network and the H2SPG solution. Numerically, we demonstrate the effectiveness of OTOv3 on a variety of super-networks, including StackedUnets, SuperResNet, and DARTS, over benchmark datasets such as CIFAR10, Fashion-MNIST, ImageNet, STL-10, and SVNH. The sub-networks computed by OTOv3 achieve competitive even superior performance compared to the super-networks and other state-of-the-arts.", "keywords": "Neural architecture search;automl;structured sparsity;sparse optimization", "primary_area": "", "supplementary_material": "/attachment/0690d217fed738a4f7e54be26c39a9625caf50c9.pdf", "author": "Tianyi Chen;Luming Liang;Tianyu Ding;Ilya Zharkov", "authorids": "~Tianyi_Chen3;~Luming_Liang2;~Tianyu_Ding1;~Ilya_Zharkov1", "gender": "M;M;M;M", "homepage": ";;;https://www.tianyuding.com", "dblp": ";46/6624;217/3421;134/4796", "google_scholar": "2BahjdkAAAAJ;vTgdAS4AAAAJ;;Qi7zTOcAAAAJ", "orcid": ";;;0000-0001-8445-4330", "linkedin": "tianyi-chen-b65502b3/;luming-liang-76185b19/;;tianyuding/", "or_profile": "~Tianyi_Chen3;~Luming_Liang2;~Ilya_Zharkov1;~Tianyu_DING2", "aff": "Microsoft;Microsoft;Microsoft;", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;", "position": "Senior Researcher;Principal Researcher;Principal Research Manager;", "bibtex": "@misc{\nchen2023otov,\ntitle={{OTO}v3: Towards Automatic Sub-Network Search Within General Super Deep Neural Networks},\nauthor={Tianyi Chen and Luming Liang and Tianyu Ding and Ilya Zharkov},\nyear={2023},\nurl={https://openreview.net/forum?id=9RUblEXVVD}\n}", "github": "", "project": "", "reviewers": "bA4X;h4H8;csCP;XuwW", "site": "https://openreview.net/forum?id=9RUblEXVVD", "pdf_size": 471214, "rating": "4;4;6;7", "confidence": "5;3;3;4", "soundness": "2;1;3;4", "novelty": "2;2;3;4", "presentation": "2;1;2;3", "wc_summary": "38;54;25;122", "wc_strengths": "24;37;41;66", "wc_weaknesses": "337;144;77;76", "wc_questions": "54;2;34;2", "wc_limitations": "1;1;10;2", "wc_review": "454;238;187;268", "wc_reply_reviewers": "80;44;19;0", "wc_reply_authors": "855;110;10;0", "reply_reviewers": "3;1;1;0", "reply_authors": "4;2;2;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 59.75, 37.37897136091361 ], "wc_strengths_avg": [ 42.0, 15.215124054702938 ], "wc_weaknesses_avg": [ 158.5, 106.6782545788972 ], "wc_questions_avg": [ 23.0, 22.15851980616034 ], "wc_limitations_avg": [ 3.5, 3.774917217635375 ], "wc_review_avg": [ 286.75, 100.81015573839771 ], "wc_reply_reviewers_avg": [ 35.75, 29.93639089803579 ], "wc_reply_authors_avg": [ 243.75, 355.5167893363125 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wrvTpB75xacJ:scholar.google.com/&scioq=OTOv3:+Towards+Automatic+Sub-Network+Search+Within+General+Super+Deep+Neural+Networks&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "9Rhopbm4qu", "title": "Hierarchical Bias-Driven Stratification for Interpretable Causal Effect Estimation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Causal effect estimation from observational data is an important analytical approach for data-driven policy-making. However, due to the inherent lack of ground truth in causal inference accepting such recommendations requires transparency and explainability. To date, attempts at transparent causal effect estimation consist of applying post hoc explanation methods to black-box models, which are not interpretable. In this manuscript, we present BICauseTree: an interpretable balancing method that identifies clusters where natural experiments occur locally. Our approach builds on decision trees to reduce treatment allocation bias. As a result, we can define subpopulations presenting positivity violations and exclude them while providing a covariate-based definition of the target population we can infer from. We characterize the method's performance using synthetic and realistic datasets, explore its bias-interpretability tradeoff, and show that it is comparable with existing approaches. ", "keywords": "Causal inference;effect estimation;selective prediction", "primary_area": "", "supplementary_material": "/attachment/5b22130cadab78618c95864b21ff3670015493a5.zip", "author": "Lucile Ter-Minassian;Liran Szlak;Ehud Karavani;Christopher C. Holmes;Yishai Shimoni", "authorids": "~Lucile_Ter-Minassian1;~Liran_Szlak2;~Ehud_Karavani1;~Christopher_C._Holmes1;~Yishai_Shimoni1", "gender": "F;;;M;M", "homepage": ";;;;", "dblp": ";173/5246;215/4948;08/6129;39/11007", "google_scholar": "PXyI6qkAAAAJ;https://scholar.google.com/citations?hl=iw;KAzt_pYAAAAJ;;wp-F1asAAAAJ", "orcid": ";;0000-0002-0187-5437;;0000-0002-4364-4207", "linkedin": "lucile-ter-minassian-94428a12b/;;;;", "or_profile": "~Lucile_Ter-Minassian1;~Liran_Szlak2;~Ehud_Karavani1;~Christopher_C._Holmes1;~Yishai_Shimoni1", "aff": "University of Oxford;International Business Machines;International Business Machines;University of Oxford;International Business Machines", "aff_domain": "ox.ac.uk;ibm.com;ibm.com;ox.ac.uk;ibm.com", "position": "PhD student;Researcher;Researcher;Full Professor;Principal Researcher", "bibtex": "@misc{\nter-minassian2023hierarchical,\ntitle={Hierarchical Bias-Driven Stratification for Interpretable Causal Effect Estimation},\nauthor={Lucile Ter-Minassian and Liran Szlak and Ehud Karavani and Christopher C. Holmes and Yishai Shimoni},\nyear={2023},\nurl={https://openreview.net/forum?id=9Rhopbm4qu}\n}", "github": "", "project": "", "reviewers": "UicP;vmTU;aknA;kdjD", "site": "https://openreview.net/forum?id=9Rhopbm4qu", "pdf_size": 396788, "rating": "3;3;5;6", "confidence": "4;3;3;3", "soundness": "1;1;3;3", "novelty": "1;2;3;3", "presentation": "1;2;3;3", "wc_summary": "77;33;95;80", "wc_strengths": "13;21;214;81", "wc_weaknesses": "240;99;229;220", "wc_questions": "58;136;23;13", "wc_limitations": "33;56;2;1", "wc_review": "421;345;563;395", "wc_reply_reviewers": "119;188;15;49", "wc_reply_authors": "0;599;0;343", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;2", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.0, 1.0 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 71.25, 23.112496619794236 ], "wc_strengths_avg": [ 82.25, 80.47786962886133 ], "wc_weaknesses_avg": [ 197.0, 57.02192560761167 ], "wc_questions_avg": [ 57.5, 48.303726564313855 ], "wc_limitations_avg": [ 23.0, 22.9891278651453 ], "wc_review_avg": [ 431.0, 80.95677859203639 ], "wc_reply_reviewers_avg": [ 92.75, 66.55965369501257 ], "wc_reply_authors_avg": [ 235.5, 252.29397535414913 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lWOQRJO0ilUJ:scholar.google.com/&scioq=Hierarchical+Bias-Driven+Stratification+for+Interpretable+Causal+Effect+Estimation&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "University of Oxford;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.ibm.com", "aff_unique_abbr": "Oxford;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Intervention Generalization: A View from Factor Graph Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72597", "id": "9S8oVumknA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/88139fdcc82fc597090620d77b023282-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9S8oVumknA", "openreview": "https://openreview.net/forum?id=9S8oVumknA", "poster": "/media/PosterPDFs/NeurIPS%202023/72597.png?t=1702415343.417272", "slides": "https://nips.cc/virtual/2023/poster/72597", "video": "https://nips.cc/virtual/2023/poster/72597", "author_site": "Gecia Bravo-Hermsdorff, David Watson, Jialin Yu, Jakob Zeitler, Ricardo Silva", "tldr": "", "abstract": "One of the goals of causal inference is to generalize from past experiments and observational data to novel conditions. While it is in principle possible to eventually learn a mapping from a novel experimental condition to an outcome of interest, provided a sufficient variety of experiments is available in the training data, coping with a large combinatorial space of possible interventions is hard. Under a typical sparse experimental design, this mapping is ill-posed without relying on heavy regularization or prior distributions. Such assumptions may or may not be reliable, and can be hard to defend or test. In this paper, we take a close look at how to warrant a leap from past experiments to novel conditions based on minimal assumptions about the factorization of the distribution of the manipulated system, communicated in the well-understood language of factor graph models. A postulated interventional factor model (IFM) may not always be informative, but it conveniently abstracts away a need for explicitly modeling unmeasured confounding and feedback mechanisms, leading to directly testable claims. Given an IFM and datasets from a collection of experimental regimes, we derive conditions for identifiability of the expected outcomes of new regimes never observed in these training data. We implement our framework using several efficient algorithms, and apply them on a range of semi-synthetic experiments.", "keywords": "Causality;experimental design", "primary_area": "", "supplementary_material": "/attachment/31f5ec59410e702e6c95355616d1e70f53915be9.zip", "author": "Gecia Bravo-Hermsdorff;David Watson;Jialin Yu;Jakob Zeitler;Ricardo Silva", "authorids": "~Gecia_Bravo-Hermsdorff1;~David_Watson2;~Jialin_Yu2;~Jakob_Zeitler1;~Ricardo_Silva1", "gender": "F;M;;;M", "homepage": "https://gecia.github.io/;http://dswatson.github.io;https://jialin-yu.github.io/;http://www0.cs.ucl.ac.uk/people/J.Zeitler.html;http://www.homepages.ucl.ac.uk/~ucgtrbd/", "dblp": "236/6201;234/8807.html;167/1075;296/0613;42/2642-1", "google_scholar": "Jq9GtykAAAAJ;BAHkyk8AAAAJ;L8tFzjgAAAAJ;0xvQ82oAAAAJ;I-ANa0QAAAAJ", "orcid": ";0000-0001-9632-2159;;;", "linkedin": ";david-watson-9707a7106/;;;", "or_profile": "~Gecia_Bravo-Hermsdorff1;~David_Watson2;~Jialin_Yu2;~Jakob_Zeitler1;~Ricardo_Silva1", "aff": ";King's College London, University of London;Durham University;University College London, University of London;University College London", "aff_domain": ";kcl.ac.uk;dur.ac.uk;ucl.ac.uk;ucl.ac.uk", "position": ";Lecturer;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nbravo-hermsdorff2023intervention,\ntitle={Intervention Generalization: A View from Factor Graph Models},\nauthor={Gecia Bravo-Hermsdorff and David Watson and Jialin Yu and Jakob Zeitler and Ricardo Silva},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9S8oVumknA}\n}", "github": "", "project": "", "reviewers": "EjuP;ybr1;2Na9;t5cb;T46G;4dmo", "pdf_size": 802118, "rating": "5;5;6;6;6;7", "confidence": "4;3;3;2;4;3", "soundness": "3;3;3;3;2;4", "novelty": "2;3;2;2;2;3", "presentation": "2;3;3;3;3;3", "wc_summary": "75;55;56;79;170;136", "wc_strengths": "63;63;12;25;77;126", "wc_weaknesses": "121;316;49;108;101;203", "wc_questions": "119;80;101;70;53;52", "wc_limitations": "1;44;2;14;11;3", "wc_review": "379;558;220;296;412;520", "wc_reply_reviewers": "91;35;25;21;154;29", "wc_reply_authors": "101;259;0;0;0;0", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "3;2;1;1;1;1", "rating_avg": [ 5.833333333333333, 0.6871842709362768 ], "confidence_avg": [ 3.1666666666666665, 0.6871842709362768 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 95.16666666666667, 42.97835372474019 ], "wc_strengths_avg": [ 61.0, 36.93688310257558 ], "wc_weaknesses_avg": [ 149.66666666666666, 87.15057212791103 ], "wc_questions_avg": [ 79.16666666666667, 24.395468613840745 ], "wc_limitations_avg": [ 12.5, 14.885675440951054 ], "wc_review_avg": [ 397.5, 117.70549406605171 ], "wc_reply_reviewers_avg": [ 59.166666666666664, 48.51946917360998 ], "wc_reply_authors_avg": [ 60.0, 96.33448672896604 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.7637626158259734 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2941176470588235, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17876747628939790557&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": ";kcl.ac.uk;dur.ac.uk;ucl.ac.uk;ucl.ac.uk", "author_num": 5, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "King's College London;Durham University;University College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kcl.ac.uk;https://www.dur.ac.uk;https://www.ucl.ac.uk", "aff_unique_abbr": "KCL;Durham;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Convergence of mean-field Langevin dynamics: time-space discretization, stochastic gradient, and variance reduction", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72596", "id": "9STYRIVx6u", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/32133a6a24d6554263d3584e3ac10faa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9STYRIVx6u", "openreview": "https://openreview.net/forum?id=9STYRIVx6u", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72596", "video": "https://nips.cc/virtual/2023/poster/72596", "author_site": "Taiji Suzuki, Denny Wu, Atsushi Nitanda", "tldr": "", "abstract": "The mean-field Langevin dynamics (MFLD) is a nonlinear generalization of the Langevin dynamics that incorporates a distribution-dependent drift, and it naturally arises from the optimization of two-layer neural networks via (noisy) gradient descent. Recent works have shown that MFLD globally minimizes an entropy-regularized convex functional in the space of measures. However, all prior analyses assumed the infinite-particle or continuous-time limit, and cannot handle stochastic gradient updates. We provide a general framework to prove a uniform-in-time propagation of chaos for MFLD that takes into account the errors due to finite-particle approximation, time-discretization, and stochastic gradient. To demonstrate the wide applicability of our framework, we establish quantitative convergence rate guarantees to the regularized global optimal solution for $(i)$ a wide range of learning problems such as mean-field neural network and MMD minimization, and $(ii)$ different gradient estimators including SGD and SVRG. Despite the generality of our results, we achieve an improved convergence rate in both the SGD and SVRG settings when specialized to the standard Langevin dynamics.", "keywords": "mean-field regime;interacting particle system;propagation of chaos;Neural network optimization;MMD minimization", "primary_area": "", "supplementary_material": "", "author": "Taiji Suzuki;Denny Wu;Atsushi Nitanda", "authorids": "~Taiji_Suzuki1;~Denny_Wu2;~Atsushi_Nitanda1", "gender": "M;M;M", "homepage": "http://ibis.t.u-tokyo.ac.jp/suzuki/;https://dennywu1.github.io/;https://sites.google.com/site/atsushinitanda", "dblp": "08/312;;155/1884", "google_scholar": "x8osrBsAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.jp/citations?user=LyVvaf8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Taiji_Suzuki1;~Denny_Wu2;~Atsushi_Nitanda1", "aff": "The University of Tokyo;University of Toronto;Kyushu Institute of Technology", "aff_domain": "tokyo.ac.jp;toronto.edu;kyutech.ac.jp", "position": "Associate Professor;PhD student;Associate Professor", "bibtex": "@inproceedings{\nsuzuki2023meanfield,\ntitle={Mean-field Langevin dynamics: Time-space discretization, stochastic gradient, and variance reduction},\nauthor={Taiji Suzuki and Denny Wu and Atsushi Nitanda},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9STYRIVx6u}\n}", "github": "", "project": "", "reviewers": "WF3Y;Y8on;wZfS;rhAi;YwhW", "pdf_size": 523759, "rating": "6;6;7;8;8", "confidence": "4;4;4;4;4", "soundness": "4;3;3;4;4", "novelty": "3;3;3;4;4", "presentation": "2;3;3;4;4", "wc_summary": "143;37;54;36;55", "wc_strengths": "108;29;70;36;57", "wc_weaknesses": "221;289;8;13;13", "wc_questions": "90;4;20;1;17", "wc_limitations": "1;1;1;1;19", "wc_review": "563;360;153;87;161", "wc_reply_reviewers": "33;15;71;34;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 0.8944271909999159 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 65.0, 39.824615503479755 ], "wc_strengths_avg": [ 60.0, 28.106938645110393 ], "wc_weaknesses_avg": [ 108.8, 121.30688356395937 ], "wc_questions_avg": [ 26.4, 32.62269148920733 ], "wc_limitations_avg": [ 4.6, 7.2 ], "wc_review_avg": [ 264.8, 174.855826325576 ], "wc_reply_reviewers_avg": [ 30.6, 23.7873916182502 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3077295226154064012&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "tokyo.ac.jp;toronto.edu;kyutech.ac.jp", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Tokyo;University of Toronto;Kyushu Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.utoronto.ca;https://www.kyutech.ac.jp", "aff_unique_abbr": "UTokyo;U of T;Kyutech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Japan;Canada" }, { "id": "9SwKSvaCiP", "title": "SING: A Plug-and-Play DNN Learning Technique", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose SING (StabIlized and Normalized Gradient), a plug-and-play technique that improves the stability and generalization of the Adam(W) optimizer. SING is straightforward to implement and has minimal computational overhead, requiring only a layer-wise standardization of the gradients fed to Adam(W) without introducing additional hyper-parameters. We support the effectiveness and practicality of the proposed approach by showing improved results on a wide range of architectures, problems (such as image classification, depth estimation, and natural language processing), and in combination with other optimizers. We provide a theoretical analysis of the convergence of the method, and we show that by virtue of the standardization, SING can escape local minima narrower than a threshold that is inversely proportional to the network's depth.", "keywords": "optimization;normalization;standardization;centralization;gradient descent;adam;stabilization;plug-and-play", "primary_area": "", "supplementary_material": "/attachment/9d4adad5f93b3fb9df82a3c19e119f4ca9e56c08.zip", "author": "Adrien Courtois;Damien Scieur;Jean-michel Morel;Pablo Arias;Thomas Eboli", "authorids": "~Adrien_Courtois1;~Damien_Scieur3;~Jean-michel_Morel1;~Pablo_Arias1;~Thomas_Eboli1", "gender": "M;M;M;M;M", "homepage": ";https://damienscieur.com/;https://www.cityu.edu.hk/stfprofile/jeamorel.htm;http://dev.ipol.im/~pariasm/;https://teboli.github.io/", "dblp": ";191/6712;14/3755;08/4671-1;267/5286", "google_scholar": "https://scholar.google.fr/citations?user=2uyNIHIAAAAJ;https://scholar.google.fr/citations?user=hNscQzgAAAAJ;https://scholar.google.fr/citations?user=BlEbdeEAAAAJ;9XSPStkAAAAJ;WLh8770AAAAJ", "orcid": ";;0000-0002-6108-897X;0000-0002-6961-5156;", "linkedin": ";damien-scieur-6873ba82/;;;thomas-eboli-6a275a248/", "or_profile": "~Adrien_Courtois1;~Damien_Scieur3;~Jean-michel_Morel1;~Pablo_Arias1;~Thomas_Eboli1", "aff": "Ecole Normale Superieure;Samsung;City University of Hong Kong;CMLA - ENS Paris Saclay, ENS Paris-Saclay;Ecole Normale Superieure", "aff_domain": "ens-paris-saclay.fr;samsung.com;cityu.edu;cmla.ens-cachan.fr;ens-paris-saclay.fr", "position": "PhD student;Researcher;Full Professor;Researcher;Postdoc", "bibtex": "@misc{\ncourtois2023sing,\ntitle={{SING}: A Plug-and-Play {DNN} Learning Technique},\nauthor={Adrien Courtois and Damien Scieur and Jean-michel Morel and Pablo Arias and Thomas Eboli},\nyear={2023},\nurl={https://openreview.net/forum?id=9SwKSvaCiP}\n}", "github": "", "project": "", "reviewers": "JvsR;hVDH;CMfs;QCf2;Ahr6", "site": "https://openreview.net/forum?id=9SwKSvaCiP", "pdf_size": 394986, "rating": "3;4;6;6;7", "confidence": "4;4;3;4;4", "soundness": "2;3;2;3;3", "novelty": "1;2;2;3;2", "presentation": "3;3;2;3;3", "wc_summary": "73;33;81;155;73", "wc_strengths": "100;14;98;117;47", "wc_weaknesses": "1017;248;240;197;279", "wc_questions": "8;35;55;51;39", "wc_limitations": "5;4;34;1;8", "wc_review": "1203;334;508;521;446", "wc_reply_reviewers": "0;80;26;0;0", "wc_reply_authors": "0;167;0;0;0", "reply_reviewers": "0;1;1;0;0", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.2, 1.469693845669907 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 83.0, 39.71901307938051 ], "wc_strengths_avg": [ 75.2, 38.529988320787226 ], "wc_weaknesses_avg": [ 396.2, 311.50306579550704 ], "wc_questions_avg": [ 37.6, 16.53602128687551 ], "wc_limitations_avg": [ 10.4, 12.009995836801943 ], "wc_review_avg": [ 602.4, 307.4921787623224 ], "wc_reply_reviewers_avg": [ 21.2, 31.07667935928805 ], "wc_reply_authors_avg": [ 33.4, 66.80000000000001 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.27216552697590857, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Q5qF_HDmy74J:scholar.google.com/&scioq=SING:+A+Plug-and-Play+DNN+Learning+Technique&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Ecole Normale Superieure;Samsung;City University of Hong Kong;\u00c9cole Normale Sup\u00e9rieure Paris-Saclay", "aff_unique_dep": ";Samsung;;CMLA", "aff_unique_url": "https://www.ens.fr;https://www.samsung.com;https://www.cityu.edu.hk;https://www.ens-paris-saclay.fr", "aff_unique_abbr": "ENS;Samsung;CityU;ENS Paris-Saclay", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;Paris-Saclay", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "France;South Korea;China" }, { "title": "Diffused Task-Agnostic Milestone Planner", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72595", "id": "9Tx2znbyTm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0163ca1c69f848e766cfb0b7bb7e17f4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9Tx2znbyTm", "openreview": "https://openreview.net/forum?id=9Tx2znbyTm", "poster": "/media/PosterPDFs/NeurIPS%202023/72595.png?t=1701781299.8107471", "slides": "https://nips.cc/virtual/2023/poster/72595", "video": "https://nips.cc/virtual/2023/poster/72595", "author_site": "Mineui Hong, Minjae Kang, Songhwai Oh", "tldr": "", "abstract": "Addressing decision-making problems using sequence modeling to predict future trajectories shows promising results in recent years.\nIn this paper, we take a step further to leverage the sequence predictive method in wider areas such as long-term planning, vision-based control, and multi-task decision-making.\nTo this end, we propose a method to utilize a diffusion-based generative sequence model to plan a series of milestones in a latent space and to have an agent to follow the milestones to accomplish a given task.\nThe proposed method can learn control-relevant, low-dimensional latent representations of milestones, which makes it possible to efficiently perform long-term planning and vision-based control.\nFurthermore, our approach exploits generation flexibility of the diffusion model, which makes it possible to plan diverse trajectories for multi-task decision-making.\nWe demonstrate the proposed method across offline reinforcement learning (RL) benchmarks and an visual manipulation environment.\nThe results show that our approach outperforms offline RL methods in solving long-horizon, sparse-reward tasks and multi-task problems,\nwhile also achieving the state-of-the-art performance on the most challenging vision-based manipulation benchmark.", "keywords": "Multi-task decision-making;Offline reinforcement learning;Planning;Diffusion model", "primary_area": "", "supplementary_material": "/attachment/a7db81b91f27bd52cd2a609312288b94ac2cf25f.zip", "author": "Mineui Hong;Minjae Kang;Songhwai Oh", "authorids": "~Mineui_Hong1;~Minjae_Kang2;~Songhwai_Oh1", "gender": "M;;", "homepage": "https://rllab.snu.ac.kr/people/mineui-hong;https://rllab.snu.ac.kr/;https://rllab.snu.ac.kr/", "dblp": "263/9614;;17/3173", "google_scholar": "OclyP2kAAAAJ;;VEzNY_oAAAAJ", "orcid": "0000-0001-8572-5171;;0000-0002-9781-2018", "linkedin": ";;", "or_profile": "~Mineui_Hong1;~Minjae_Kang2;~Songhwai_Oh1", "aff": "Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nhong2023diffused,\ntitle={Diffused Task-Agnostic Milestone Planner},\nauthor={Mineui Hong and Minjae Kang and Songhwai Oh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9Tx2znbyTm}\n}", "github": "", "project": "", "reviewers": "d8Sq;g1BQ;M7TA;8zhN;Haes;xaPC", "pdf_size": 1890683, "rating": "5;5;6;6;6;8", "confidence": "1;4;3;4;3;3", "soundness": "3;3;2;3;3;4", "novelty": "3;3;3;3;3;4", "presentation": "3;3;3;2;3;4", "wc_summary": "143;75;134;75;54;22", "wc_strengths": "107;110;11;49;128;48", "wc_weaknesses": "50;134;109;202;216;43", "wc_questions": "31;88;29;97;88;3", "wc_limitations": "47;30;1;2;1;4", "wc_review": "378;437;284;425;487;120", "wc_reply_reviewers": "13;0;19;0;21;5", "wc_reply_authors": "31;0;31;0;40;31", "reply_reviewers": "1;0;1;0;1;1", "reply_authors": "2;1;2;1;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 3.1666666666666665, 0.3726779962499649 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 83.83333333333333, 42.59662219263661 ], "wc_strengths_avg": [ 75.5, 41.94738768187279 ], "wc_weaknesses_avg": [ 125.66666666666667, 66.94940046200729 ], "wc_questions_avg": [ 56.0, 36.26752449965855 ], "wc_limitations_avg": [ 14.166666666666666, 17.92034845891365 ], "wc_review_avg": [ 355.1666666666667, 122.36750204018858 ], "wc_reply_reviewers_avg": [ 9.666666666666666, 8.517954892787092 ], "wc_reply_authors_avg": [ 22.166666666666668, 15.993922456844524 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.16666666666666663, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14220993282718855794&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "EMBERSim: A Large-Scale Databank for Boosting Similarity Search in Malware Analysis", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73677", "id": "9U8bqr8epr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/54bf430f5d3090502ea021941e9cb18e-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=9U8bqr8epr", "openreview": "https://openreview.net/forum?id=9U8bqr8epr", "poster": "/media/PosterPDFs/NeurIPS%202023/73677.png?t=1699458732.4803417", "slides": "https://nips.cc/virtual/2023/poster/73677", "video": "https://nips.cc/virtual/2023/poster/73677", "author_site": "Dragos Georgian Corlatescu, Alexandru Dinu, Mihaela Petruta Gaman, Paul Sumedrea", "tldr": "", "abstract": "In recent years there has been a shift from heuristics based malware detection towards machine learning, which proves to be more robust in the current heavily adversarial threat landscape. While we acknowledge machine learning to be better equipped to mine for patterns in the increasingly high amounts of similar-looking files, we also note a remarkable scarcity of the data available for similarity targeted research. Moreover, we observe that the focus in the few related works falls on quantifying similarity in malware, often overlooking the clean data. This one-sided quantification is especially dangerous in the context of detection bypass. We propose to address the deficiencies in the space of similarity research on binary files, starting from EMBER \u2014 one of the largest malware classification datasets. We enhance EMBER with similarity information as well as malware class tags, to enable further research in the similarity space. Our contribution is threefold: (1) we publish EMBERSim, an augmented version of EMBER, that includes similarity informed tags; (2) we enrich EMBERSim with automatically determined malware class tags using the open-source tool AVClass on VirusTotal data and (3) we describe and share the implementation for our class scoring technique and leaf similarity method.", "keywords": "similarity search;leaf similarity;boosting algorithms;tree-based methods;binary files;malware;static analysis;cybersecurity", "primary_area": "", "supplementary_material": "", "author": "Dragos Georgian Corlatescu;Alexandru Dinu;Mihaela Gaman;Paul Sumedrea", "authorids": "~Dragos_Georgian_Corlatescu1;~Alexandru_Dinu1;~Mihaela_Gaman1;~Paul_Sumedrea1", "gender": "M;;F;", "homepage": ";https://alexandru-dinu.github.io/;;", "dblp": "225/4909;;242/9258;306/7557", "google_scholar": "rfr85cYAAAAJ;;https://scholar.google.ro/citations?user=inb8HYEAAAAJ;vb121RoAAAAJ", "orcid": "0000-0002-7994-9950;;0000-0001-7751-6759;", "linkedin": "dragos-corlatescu-0b815a78/;;mihaela-g%C4%83man-0470149b;paul-sumedrea", "or_profile": "~Dragos_Georgian_Corlatescu1;~Alexandru_Dinu1;~Mihaela_Gaman1;~Paul_Sumedrea1", "aff": "University Politehnica of Bucharest;CrowdStrike;University of Bucharest;CrowdStrike", "aff_domain": "cs.pub.ro;crowdstrike.com;unibuc.ro;crowdstrike.com", "position": "PhD student;Researcher;PhD student;Researcher", "bibtex": "@inproceedings{\ncorlatescu2023embersim,\ntitle={{EMBERS}im: A Large-Scale Databank for Boosting Similarity Search in Malware Analysis},\nauthor={Dragos Georgian Corlatescu and Alexandru Dinu and Mihaela Gaman and Paul Sumedrea},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=9U8bqr8epr}\n}", "github": "", "project": "", "reviewers": "ts2F;BNKs;6wpR;qd4B;xndR", "pdf_size": 1499076, "rating": "5;6;7;7;7", "confidence": "4;3;4;3;2", "wc_summary_and_contributions": "67;32;17;195;69", "wc_strengths": "104;44;39;156;39", "wc_improvement": "163;25;43;193;53", "wc_limitations": "89;24;40;215;41", "wc_correctness": "22;7;38;35;27", "wc_clarity": "1;7;11;45;16", "wc_relation_to_prior_work": "19;8;29;23;16", "wc_documentation": "15;28;21;3;115", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "481;176;239;866;377", "wc_reply_reviewers": "0;0;28;79;0", "wc_reply_authors": "1430;515;1079;812;131", "reply_reviewers": "0;0;1;1;0", "reply_authors": "3;1;2;2;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 76.0, 62.78216307200637 ], "wc_strengths_avg": [ 76.4, 46.787177730656076 ], "wc_improvement_avg": [ 95.4, 68.69526912386326 ], "wc_limitations_avg": [ 81.8, 70.06682524561819 ], "wc_correctness_avg": [ 25.8, 10.979981785048645 ], "wc_clarity_avg": [ 16.0, 15.31012736720371 ], "wc_relation_to_prior_work_avg": [ 19.0, 7.014271166700073 ], "wc_documentation_avg": [ 36.4, 40.14772720839873 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 427.8, 243.50720728553398 ], "wc_reply_reviewers_avg": [ 21.4, 30.774015012669373 ], "wc_reply_authors_avg": [ 793.4, 447.9850890375705 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4677071733467427, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11791098437148434154&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.pub.ro;crowdstrike.com;unibuc.ro;crowdstrike.com", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University Politehnica of Bucharest;CrowdStrike;University of Bucharest", "aff_unique_dep": ";;", "aff_unique_url": "https://www.upb.ro;https://www.crowdstrike.com;https://www.unibuc.ro", "aff_unique_abbr": "UPB;CrowdStrike;Unibuc", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Romania;United States" }, { "title": "LuminAIRe: Illumination-Aware Conditional Image Repainting for Lighting-Realistic Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72594", "id": "9UxUTGCteW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cb3658b9983f677670a246c46ece553d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9UxUTGCteW", "openreview": "https://openreview.net/forum?id=9UxUTGCteW", "poster": "/media/PosterPDFs/NeurIPS%202023/72594.png?t=1701326764.8494737", "slides": "https://nips.cc/virtual/2023/poster/72594", "video": "https://nips.cc/virtual/2023/poster/72594", "author_site": "Jiajun Tang, Haofeng Zhong, Shuchen Weng, Boxin Shi", "tldr": "", "abstract": "We present the ilLumination-Aware conditional Image Repainting (LuminAIRe) task to address the unrealistic lighting effects in recent conditional image repainting (CIR) methods. The environment lighting and 3D geometry conditions are explicitly estimated from given background images and parsing masks using a parametric lighting representation and learning-based priors. These 3D conditions are then converted into illumination images through the proposed physically-based illumination rendering and illumination attention module. With the injection of illumination images, physically-correct lighting information is fed into the lighting-realistic generation process and repainted images with harmonized lighting effects in both foreground and background regions can be acquired, whose superiority over the results of state-of-the-art methods is confirmed through extensive experiments. For facilitating and validating the LuminAIRe task, a new dataset Car-LuminAIRe with lighting annotations and rich appearance variants is collected.", "keywords": "Illumination;Image Generation;Conditional Image Repainting", "primary_area": "", "supplementary_material": "/attachment/4f98f8ca7375343ee5170308585780d582a7d96d.pdf", "author": "Jiajun Tang;Haofeng Zhong;Shuchen Weng;Boxin Shi", "authorids": "~Jiajun_Tang2;~Haofeng_Zhong1;~Shuchen_Weng1;~Boxin_Shi3", "gender": "M;M;M;M", "homepage": "https://me.jeffreet.com;https://github.com/DDXDaniel;https://shuchenweng.github.io/;http://camera.pku.edu.cn", "dblp": "247/1319;369/7702;220/4303;69/783", "google_scholar": "UBy9z8wAAAAJ;;-5qVEQsAAAAJ;K1LjZxcAAAAJ", "orcid": "0000-0002-0254-9764;;0000-0003-0777-5055;0000-0001-6749-0364", "linkedin": ";;;", "or_profile": "~Jiajun_Tang2;~Haofeng_Zhong1;~Shuchen_Weng1;~Boxin_Shi3", "aff": "Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ntang2023luminaire,\ntitle={Lumin{AIR}e: Illumination-Aware Conditional Image Repainting for Lighting-Realistic Generation},\nauthor={Jiajun Tang and Haofeng Zhong and Shuchen Weng and Boxin Shi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9UxUTGCteW}\n}", "github": "", "project": "", "reviewers": "Xnyj;oBS5;Kvqo;U8wa;Ex3g", "pdf_size": 8728752, "rating": "3;4;5;5;7", "confidence": "4;5;3;4;4", "soundness": "2;2;3;3;3", "novelty": "2;3;3;3;4", "presentation": "2;2;4;4;4", "wc_summary": "89;76;112;147;166", "wc_strengths": "82;72;91;50;136", "wc_weaknesses": "371;143;61;354;213", "wc_questions": "59;3;109;2;5", "wc_limitations": "24;3;1;6;11", "wc_review": "625;297;374;559;531", "wc_reply_reviewers": "315;0;15;0;110", "wc_reply_authors": "568;100;0;0;78", "reply_reviewers": "1;0;1;0;1", "reply_authors": "2;2;1;1;2", "rating_avg": [ 4.8, 1.32664991614216 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.9797958971132712 ], "wc_summary_avg": [ 118.0, 34.017642481512446 ], "wc_strengths_avg": [ 86.2, 28.4 ], "wc_weaknesses_avg": [ 228.4, 119.71900433932784 ], "wc_questions_avg": [ 35.6, 42.57511009968148 ], "wc_limitations_avg": [ 9.0, 8.221921916437786 ], "wc_review_avg": [ 477.2, 122.10880394140302 ], "wc_reply_reviewers_avg": [ 88.0, 120.68968472906042 ], "wc_reply_authors_avg": [ 149.2, 213.26265495862137 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.23836564731139803, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10968684752567970304&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Brain Diffusion for Visual Exploration: Cortical Discovery using Large Scale Generative Models", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72593", "id": "9VqMaSjf7U", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ef0c0a23a1a8219c4fc381614664df3e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9VqMaSjf7U", "openreview": "https://openreview.net/forum?id=9VqMaSjf7U", "poster": "/media/PosterPDFs/NeurIPS%202023/72593.png?t=1701375342.9603124", "slides": "https://nips.cc/virtual/2023/poster/72593", "video": "https://nips.cc/virtual/2023/poster/72593", "author_site": "Andrew Luo, Maggie Henderson, Leila Wehbe, Michael Tarr", "tldr": "", "abstract": "A long standing goal in neuroscience has been to elucidate the functional organization of the brain. Within higher visual cortex, functional accounts have remained relatively coarse, focusing on regions of interest (ROIs) and taking the form of selectivity for broad categories such as faces, places, bodies, food, or words. Because the identification of such ROIs has typically relied on manually assembled stimulus sets consisting of isolated objects in non-ecological contexts, exploring functional organization without robust a priori hypotheses has been challenging. To overcome these limitations, we introduce a data-driven approach in which we synthesize images predicted to activate a given brain region using paired natural images and fMRI recordings, bypassing the need for category-specific stimuli. Our approach -- Brain Diffusion for Visual Exploration (\"BrainDiVE\") -- builds on recent generative methods by combining large-scale diffusion models with brain-guided image synthesis. Validating our method, we demonstrate the ability to synthesize preferred images with appropriate semantic specificity for well-characterized category-selective ROIs. We then show that BrainDiVE can characterize differences between ROIs selective for the same high-level category. Finally we identify novel functional subdivisions within these ROIs, validated with behavioral data. These results advance our understanding of the fine-grained functional organization of human visual cortex, and provide well-specified constraints for further examination of cortical organization using hypothesis-driven methods.", "keywords": "neuroscience;brain;fmri;generative models;diffusion models;image synthesis;visual cortex", "primary_area": "", "supplementary_material": "", "author": "Andrew Luo;Margaret Marie Henderson;Leila Wehbe;Michael J. Tarr", "authorids": "~Andrew_Luo2;~Margaret_Marie_Henderson1;~Leila_Wehbe1;~Michael_J._Tarr1", "gender": "M;F;F;M", "homepage": "https://andrewluo.net/;https://www.hendersonneurolab.com;http://www.cs.cmu.edu/~lwehbe/;https://tarrlab.org", "dblp": "234/8054;348/9728.html;125/4359;36/1880", "google_scholar": "bWYvvkUAAAAJ;91bNlCUAAAAJ;YezyUawAAAAJ;O8ALPlkAAAAJ", "orcid": ";0000-0001-9375-6680;0000-0001-8545-2062;0000-0003-4724-1744", "linkedin": ";;;michael-tarr-ab078046/", "or_profile": "~Andrew_Luo2;~Margaret_Marie_Henderson1;~Leila_Wehbe1;~Michael_Tarr1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nluo2023brain,\ntitle={Brain Diffusion for Visual Exploration: Cortical Discovery using Large Scale Generative Models},\nauthor={Andrew Luo and Margaret Marie Henderson and Leila Wehbe and Michael J. Tarr},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9VqMaSjf7U}\n}", "github": "", "project": "", "reviewers": "Vg6A;SV7e;fpHd;MC3H", "pdf_size": 30722067, "rating": "6;7;7;7", "confidence": "4;5;4;4", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "55;98;124;132", "wc_strengths": "32;70;45;226", "wc_weaknesses": "132;85;117;219", "wc_questions": "27;154;192;217", "wc_limitations": "20;3;4;61", "wc_review": "266;410;482;855", "wc_reply_reviewers": "181;18;20;27", "wc_reply_authors": "11;15;21;49", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 102.25, 30.036436206714004 ], "wc_strengths_avg": [ 93.25, 77.85041746837328 ], "wc_weaknesses_avg": [ 138.25, 49.61539579606314 ], "wc_questions_avg": [ 147.5, 73.09753757822489 ], "wc_limitations_avg": [ 22.0, 23.50531854708632 ], "wc_review_avg": [ 503.25, 217.46422119511982 ], "wc_reply_reviewers_avg": [ 61.5, 69.07423542826949 ], "wc_reply_authors_avg": [ 24.0, 14.866068747318506 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12987940500177130664&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cmu.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Large Language Models for Automated Data Science: Introducing CAAFE for Context-Aware Automated Feature Engineering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72592", "id": "9WSxQZ9mG7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8c2df4c35cdbee764ebb9e9d0acd5197-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9WSxQZ9mG7", "openreview": "https://openreview.net/forum?id=9WSxQZ9mG7", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72592", "video": "https://nips.cc/virtual/2023/poster/72592", "author_site": "Noah Hollmann, Samuel M\u00fcller, Frank Hutter", "tldr": "", "abstract": "As the field of automated machine learning (AutoML) advances, it becomes increasingly important to incorporate domain knowledge into these systems.\nWe present an approach for doing so by harnessing the power of large language models (LLMs). Specifically, we introduce Context-Aware Automated Feature Engineering (CAAFE), a feature engineering method for tabular datasets that utilizes an LLM to iteratively generate additional semantically meaningful features for tabular datasets based on the description of the dataset. The method produces both Python code for creating new features and explanations for the utility of the generated features.\n\nDespite being methodologically simple, CAAFE improves performance on 11 out of 14 datasets -- boosting mean ROC AUC performance from 0.798 to 0.822 across all dataset - similar to the improvement achieved by using a random forest instead of logistic regression on our datasets. Furthermore, CAAFE is interpretable by providing a textual explanation for each generated feature.\n\nCAAFE paves the way for more extensive semi-automation in data science tasks and emphasizes the significance of context-aware solutions that can extend the scope of AutoML systems to semantic AutoML. We release our [code](url{https://github.com/automl/CAAFE), a simple [demo](url{https://colab.research.google.com/drive/1mCA8xOAJZ4MaB_alZvyARTMjhl6RZf0a) and a [python package](url{https://pypi.org/project/caafe/).", "keywords": "AutoML;AutoDS;Automated Feature Engineering;LLM Code Generation;Tabular Data;Feature Engineering;Automated Data Science;Automated Machine Learning", "primary_area": "", "supplementary_material": "/attachment/eb0c8f6717106a27f07a09f0ba99cc69e10810ce.pdf", "author": "Noah Hollmann;Samuel M\u00fcller;Frank Hutter", "authorids": "~Noah_Hollmann1;~Samuel_M\u00fcller1;~Frank_Hutter1", "gender": ";;M", "homepage": "http://www.noahhollmann.com;https://uncoolis.cool;http://ml.informatik.uni-freiburg.de/~hutter/", "dblp": ";284/9655;89/5383", "google_scholar": ";pevYEjAAAAAJ;https://scholar.google.de/citations?user=YUrxwrkAAAAJ", "orcid": "0000-0001-8556-518X;;0000-0002-2037-3694", "linkedin": ";;frank-hutter-9190b24b/", "or_profile": "~Noah_Hollmann1;~Samuel_M\u00fcller1;~Frank_Hutter1", "aff": "Charite Universit\u00e4tsmedizin Berlin;University of Freiburg, Universit\u00e4t Freiburg;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_domain": "charite.de;cs.uni-freiburg.de;uni-freiburg.de", "position": "Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nhollmann2023large,\ntitle={Large Language Models for Automated Data Science: Introducing {CAAFE} for Context-Aware Automated Feature Engineering},\nauthor={Noah Hollmann and Samuel M{\\\"u}ller and Frank Hutter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9WSxQZ9mG7}\n}", "github": "", "project": "", "reviewers": "ndjC;QFgW;uCcq;Cmyk;Y55o", "pdf_size": 415918, "rating": "3;4;6;6;6", "confidence": "4;5;4;4;5", "soundness": "2;2;3;4;2", "novelty": "3;3;2;2;3", "presentation": "4;3;3;4;3", "wc_summary": "67;35;182;57;118", "wc_strengths": "71;31;119;17;72", "wc_weaknesses": "364;144;133;166;98", "wc_questions": "130;31;55;17;96", "wc_limitations": "18;6;85;23;110", "wc_review": "650;247;574;280;494", "wc_reply_reviewers": "147;142;22;20;0", "wc_reply_authors": "406;401;0;44;0", "reply_reviewers": "2;1;1;1;0", "reply_authors": "2;3;1;2;1", "rating_avg": [ 5.0, 1.2649110640673518 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 91.8, 52.677889099697225 ], "wc_strengths_avg": [ 62.0, 35.82178108358098 ], "wc_weaknesses_avg": [ 181.0, 94.1020722407323 ], "wc_questions_avg": [ 65.8, 41.81578649266327 ], "wc_limitations_avg": [ 48.4, 41.233966580963326 ], "wc_review_avg": [ 449.0, 159.6345827194095 ], "wc_reply_reviewers_avg": [ 66.2, 64.4124211623814 ], "wc_reply_authors_avg": [ 170.2, 191.17154599992125 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 77, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12295495016034093597&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "charite.de;cs.uni-freiburg.de;uni-freiburg.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Charite Universit\u00e4tsmedizin Berlin;University of Freiburg;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_unique_dep": ";;", "aff_unique_url": "https://www.charite.de;https://www.uni-freiburg.de;https://www.uni-freiburg.de", "aff_unique_abbr": "Charite;UoF;Albert-Ludwigs-Universit\u00e4t", "aff_campus_unique_index": "1", "aff_campus_unique": ";Freiburg", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Hierarchical Decomposition of Prompt-Based Continual Learning: Rethinking Obscured Sub-optimality", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72591", "id": "9XieH21Tlf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d9f8b5abc8e0926539ecbb492af7b2f1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9XieH21Tlf", "openreview": "https://openreview.net/forum?id=9XieH21Tlf", "poster": "/media/PosterPDFs/NeurIPS%202023/72591.png?t=1697182816.437778", "slides": "https://nips.cc/virtual/2023/poster/72591", "video": "https://nips.cc/virtual/2023/poster/72591", "author_site": "Liyuan Wang, Jingyi Xie, Xingxing Zhang, Mingyi Huang, Hang Su, Jun Zhu", "tldr": "", "abstract": "Prompt-based continual learning is an emerging direction in leveraging pre-trained knowledge for downstream continual learning, and has almost reached the performance pinnacle under supervised pre-training. However, our empirical research reveals that the current strategies fall short of their full potential under the more realistic self-supervised pre-training, which is essential for handling vast quantities of unlabeled data in practice. This is largely due to the difficulty of task-specific knowledge being incorporated into instructed representations via prompt parameters and predicted by uninstructed representations at test time. To overcome the exposed sub-optimality, we conduct a theoretical analysis of the continual learning objective in the context of pre-training, and decompose it into hierarchical components: within-task prediction, task-identity inference, and task-adaptive prediction. Following these empirical and theoretical insights, we propose Hierarchical Decomposition (HiDe-)Prompt, an innovative approach that explicitly optimizes the hierarchical components with an ensemble of task-specific prompts and statistics of both uninstructed and instructed representations, further with the coordination of a contrastive regularization strategy. Our extensive experiments demonstrate the superior performance of HiDe-Prompt and its robustness to pre-training paradigms in continual learning (e.g., up to 15.01% and 9.61% lead on Split CIFAR-100 and Split ImageNet-R, respectively).", "keywords": "Continual Learning;Catastrophic Forgetting;Pre-training;Prompt Tuning", "primary_area": "", "supplementary_material": "/attachment/80d04cf7f2746d89585cc430e98ba9a4c1152817.zip", "author": "Liyuan Wang;Jingyi Xie;Xingxing Zhang;Mingyi Huang;Hang Su;Jun Zhu", "authorids": "~Liyuan_Wang1;~Jingyi_Xie3;~Xingxing_Zhang3;~Mingyi_Huang1;~Hang_Su3;~Jun_Zhu2", "gender": "M;F;F;M;M;M", "homepage": "https://lywang3081.github.io/;https://github.com/ninoxjy;https://indussky8.github.io/;;http://ml.cs.tsinghua.edu.cn/~jun;", "dblp": "121/6094;;;;50/2644-1;26/5371-6", "google_scholar": "UAgdoY4AAAAJ;;https://scholar.google.com.hk/citations?user=RKjiLyAAAAAJ;;axsP38wAAAAJ;dxN1_X0AAAAJ", "orcid": ";;0000-0002-2909-1589;0009-0003-7287-3417;;", "linkedin": ";;;;;", "or_profile": "~Liyuan_Wang1;~Jingyi_Xie3;~Xingxing_Zhang3;~Mingyi_Huang1;~Jun_Zhu2;~Hang_Su2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Beijing University of Technology;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;bjut.edu;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Intern;Researcher;Undergrad student;Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023hierarchical,\ntitle={Hierarchical Decomposition of Prompt-Based Continual Learning: Rethinking Obscured Sub-optimality},\nauthor={Liyuan Wang and Jingyi Xie and Xingxing Zhang and Mingyi Huang and Hang Su and Jun Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9XieH21Tlf}\n}", "github": "", "project": "", "reviewers": "UkQY;Qf6e;6q9Z;tgkY;pdqM", "pdf_size": 8364324, "rating": "5;5;7;7;8", "confidence": "4;3;5;5;5", "soundness": "3;3;3;3;3", "novelty": "2;3;4;3;3", "presentation": "3;3;3;4;3", "wc_summary": "29;82;89;54;68", "wc_strengths": "23;32;101;165;70", "wc_weaknesses": "164;82;97;173;39", "wc_questions": "2;67;12;40;1", "wc_limitations": "11;5;12;12;4", "wc_review": "229;268;311;444;182", "wc_reply_reviewers": "62;169;55;35;31", "wc_reply_authors": "419;797;28;12;20", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;4;2;2;2", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 4.4, 0.7999999999999999 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 64.4, 21.397196077991154 ], "wc_strengths_avg": [ 78.2, 51.58061651434578 ], "wc_weaknesses_avg": [ 111.0, 50.74248712863807 ], "wc_questions_avg": [ 24.4, 25.53898979991182 ], "wc_limitations_avg": [ 8.8, 3.54400902933387 ], "wc_review_avg": [ 286.8, 89.41454020459985 ], "wc_reply_reviewers_avg": [ 70.4, 50.665964907420836 ], "wc_reply_authors_avg": [ 255.2, 311.9175532091773 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.875, "gs_citation": 106, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4972023679108268205&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;bjut.edu;mail.tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Tsinghua University;Beijing University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.bjut.edu.cn", "aff_unique_abbr": "THU;BJUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Generating QM1B with PySCF$_{\\text{IPU}}$", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73676", "id": "9Z1cmO7S7o", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ac7f98dd0b342edaf3be79844a180a6b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=9Z1cmO7S7o", "openreview": "https://openreview.net/forum?id=9Z1cmO7S7o", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73676", "video": "https://nips.cc/virtual/2023/poster/73676", "author_site": "Alexander Mathiasen, Hatem Helal, Kerstin Klaser, Paul Balanca, Josef Dean, Carlo Luschi, Dominique Beaini, Andrew Fitzgibbon, Dominic Masters", "tldr": "", "abstract": "The emergence of foundation models in Computer Vision and Natural Language Processing have resulted in immense progress on downstream tasks. This progress was enabled by datasets with billions of training examples. Similar benefits are yet to be unlocked for quantum chemistry, where the potential of deep learning is constrained by comparatively small datasets with 100k to 20M training examples. These datasets are limited in size because the labels are computed using the accurate (but computationally demanding) predictions of Density Functional Theory (DFT). Notably, prior DFT datasets were created using CPU supercomputers without leveraging hardware acceleration. In this paper, we take a first step towards utilising hardware accelerators by introducing the data generator PySCF$_{\\text{IPU}}$ using Intelligence Processing Units (IPUs). This allows us to create the dataset QM1B with one billion training examples containing 9-11 heavy atoms. We demonstrate that a simple baseline neural network (SchNet 9M) improves its performance by simply increasing the amount of training data without additional inductive biases. To encourage future researchers to use QM1B responsibly, we highlight several limitations of QM1B and emphasise the low resolution of our DFT options, which also serves as motivation for even larger, more accurate datasets.", "keywords": "create;density functional theory;dataset;quantum chemistry;molecules;deep learning", "primary_area": "", "supplementary_material": "/attachment/68c1db0cf6b4b4d42395fa981e071546b360b03c.pdf", "author": "Alexander Mathiasen;Hatem Helal;Kerstin Klaeser;Paul Balanca;Josef Dean;Carlo Luschi;Dominique Beaini;Andrew W Fitzgibbon;Dominic Masters", "authorids": "~Alexander_Mathiasen2;~Hatem_Helal1;~Kerstin_Klaeser1;~Paul_Balanca1;~Josef_Dean1;~Carlo_Luschi1;~Dominique_Beaini1;~Andrew_W_Fitzgibbon1;~Dominic_Masters1", "gender": ";M;F;M;;M;M;M;M", "homepage": ";;;https://github.com/balancap;https://github.com/jndean;;;http://awf.fitzgibbon.ie;", "dblp": "https://dblp.uni-trier.de/pers/hd/m/Mathiasen:Alexander;;;;;72/10621;201/8526;f/AndrewWFitzgibbon;", "google_scholar": "https://scholar.google.dk/citations?hl=da;inS9BJ8AAAAJ;;;;;https://scholar.google.ca/citations?hl=en;73t3lIcAAAAJ;3rx6VWIAAAAJ", "orcid": ";;0000-0002-5230-6939;;;;0000-0002-4613-9388;;", "linkedin": ";;;;;carlo-luschi-1908144/;dbeaini/;andrew-fitzgibbon-952b9370;", "or_profile": "~Alexander_Mathiasen2;~Hatem_Helal1;~Kerstin_Klaeser1;~Paul_Balanca1;~Josef_Dean1;~Carlo_Luschi1;~Dominique_Beaini1;~Andrew_W_Fitzgibbon1;~Dominic_Masters1", "aff": ";Graphcore;;Graphcore;;Graphcore;Valence Discovery;Graphcore;Graphcore", "aff_domain": ";graphcore.ai;;graphcore.ai;;graphcore.ai;valencediscovery.com;graphcore.ai;graphcore.ai", "position": ";Principal Researcher;;Researcher;;VP & Head of Research;Principal Researcher;Researcher;Research Scientist", "bibtex": "@inproceedings{\nmathiasen2023generating,\ntitle={Generating {QM}1B with Py{SCF}\\$\\_\\{{\\textbackslash}text\\{{IPU}\\}\\}\\$},\nauthor={Alexander Mathiasen and Hatem Helal and Kerstin Klaeser and Paul Balanca and Josef Dean and Carlo Luschi and Dominique Beaini and Andrew W Fitzgibbon and Dominic Masters},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=9Z1cmO7S7o}\n}", "github": "", "project": "", "reviewers": "hHv3;NGMZ;mTsd;Aoip", "pdf_size": 1392792, "rating": "6;6;7;8", "confidence": "3;3;4;3", "wc_summary_and_contributions": "73;47;45;66", "wc_strengths": "97;2;53;45", "wc_improvement": "130;21;112;5", "wc_limitations": "20;92;1;23", "wc_correctness": "7;7;9;8", "wc_clarity": "20;8;1;4", "wc_relation_to_prior_work": "28;10;1;7", "wc_documentation": "51;6;1;20", "wc_additional_feedback": "1;1;1;1", "wc_review": "427;194;224;179", "wc_reply_reviewers": "32;0;51;0", "wc_reply_authors": "477;537;941;362", "reply_reviewers": "1;0;2;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 57.75, 12.028611723719408 ], "wc_strengths_avg": [ 49.25, 33.70738049745189 ], "wc_improvement_avg": [ 67.0, 54.66717479438644 ], "wc_limitations_avg": [ 34.0, 34.532593299664015 ], "wc_correctness_avg": [ 7.75, 0.82915619758885 ], "wc_clarity_avg": [ 8.25, 7.224091638399945 ], "wc_relation_to_prior_work_avg": [ 11.5, 10.062305898749054 ], "wc_documentation_avg": [ 19.5, 19.474342094150447 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 256.0, 100.04748872410542 ], "wc_reply_reviewers_avg": [ 20.75, 21.810261346439663 ], "wc_reply_authors_avg": [ 579.25, 218.11737092675585 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": ";graphcore.ai;;graphcore.ai;;graphcore.ai;valencediscovery.com;graphcore.ai;graphcore.ai", "author_num": 9, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Graphcore;Valence Discovery", "aff_unique_dep": ";", "aff_unique_url": "https://www.graphcore.ai;", "aff_unique_abbr": "Graphcore;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom;" }, { "id": "9buR1UFCDh", "title": "Iterated Deep Q-Network: Efficient Learning of Bellman Iterations for Deep Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Value-based reinforcement learning~(RL) methods strive to obtain accurate approximations of optimal action-value functions. Notoriously, these methods heavily rely on the application of the optimal Bellman operator, which needs to be approximated from samples.Most approaches consider only a single Bellman iteration, which limits their power. In this paper, we introduce iterated Deep Q-Network (iDQN), a new DQN-based algorithm that incorporates several consecutive Bellman iterations into the training loss. iDQN leverages the online network of DQN to build a target for a second online network, which in turn serves as a target for a third online network, etc., thereby taking into account future Bellman iterations. While using the same number of gradient steps, iDQN allows for better learning of the Bellman iterations compared to DQN. We evaluate iDQN against relevant baselines on 54 Atari 2600 games to showcase its benefit in terms of approximation error and performance. iDQN greatly outperforms its closest baselines, DQN and Random Ensemble Mixture, while being orthogonal to more advanced DQN-based approaches.", "keywords": "deep reinforcement learning;bellman operator;approximate value iteration;atari games", "primary_area": "", "supplementary_material": "/attachment/39365aae65fe86f962a0aeef040ea66ab27470e2.zip", "author": "Th\u00e9o Vincent;Boris Belousov;Carlo D'Eramo;Jan Peters", "authorids": "~Th\u00e9o_Vincent2;~Boris_Belousov1;~Carlo_D'Eramo2;~Jan_Peters3", "gender": "M;M;M;M", "homepage": "https://carloderamo.wixsite.com/home;https://www.jan-peters.net;https://www.ias.informatik.tu-darmstadt.de/Team/BorisBelousov;https://www.ias.informatik.tu-darmstadt.de/Team/TheoVincent", "dblp": "182/8953;p/JanPeters1;191/6726;", "google_scholar": "https://scholar.google.it/citations?user=1Rt_86gAAAAJ;https://scholar.google.de/citations?user=-kIVAcAAAAAJ;https://scholar.google.de/citations?user=XjNbRVYAAAAJ;nZPOL4wAAAAJ", "orcid": "0000-0003-2712-118X;0000-0002-5266-8091;0000-0001-7172-9104;0000-0002-1723-7405", "linkedin": "carlo-d-eramo-6438a289/;janrpeters/;boris-belousov/;theo-vincent/", "or_profile": "~Carlo_D'Eramo2;~Jan_Peters3;~Boris_Belousov2;~Th\u00e9o_VINCENT1", "aff": "TU Darmstadt;TU Darmstadt;German Research Centre for AI;Technische Universit\u00e4t Darmstadt", "aff_domain": "tu-darmstadt.de;tu-darmstadt.de;dfki.de;tu-darmstadt.de", "position": "Postdoc;Full Professor;Researcher;PhD student", "bibtex": "@misc{\nvincent2023iterated,\ntitle={Iterated Deep Q-Network: Efficient Learning of Bellman Iterations for Deep Reinforcement Learning},\nauthor={Th{\\'e}o Vincent and Boris Belousov and Carlo D'Eramo and Jan Peters},\nyear={2023},\nurl={https://openreview.net/forum?id=9buR1UFCDh}\n}", "github": "", "project": "", "reviewers": "G4ak;PHSC;VNTR;UiiX;A74j", "site": "https://openreview.net/forum?id=9buR1UFCDh", "pdf_size": 7929452, "rating": "4;6;6;7;7", "confidence": "3;3;4;4;4", "soundness": "3;3;2;3;3", "novelty": "2;3;2;3;3", "presentation": "3;2;3;3;4", "wc_summary": "90;89;73;73;130", "wc_strengths": "137;95;40;17;60", "wc_weaknesses": "123;81;171;81;41", "wc_questions": "98;38;2;375;51", "wc_limitations": "13;23;27;33;32", "wc_review": "461;326;313;579;314", "wc_reply_reviewers": "146;0;140;195;18", "wc_reply_authors": "419;0;536;331;21", "reply_reviewers": "1;0;3;1;1", "reply_authors": "2;1;4;2;2", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 91.0, 20.851858430365386 ], "wc_strengths_avg": [ 69.8, 42.24405283587265 ], "wc_weaknesses_avg": [ 99.4, 44.2067868092672 ], "wc_questions_avg": [ 112.8, 134.6542238476016 ], "wc_limitations_avg": [ 25.6, 7.255342858886822 ], "wc_review_avg": [ 398.6, 106.01245209879828 ], "wc_reply_reviewers_avg": [ 99.8, 76.76561730358195 ], "wc_reply_authors_avg": [ 261.4, 215.03915922454684 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.74535599249993, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UJHPoVH5zKYJ:scholar.google.com/&scioq=Iterated+Deep+Q-Network:+Efficient+Learning+of+Bellman+Iterations+for+Deep+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt;German Research Centre for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.tu-darmstadt.de;https://www.dfki.de/", "aff_unique_abbr": "TU Darmstadt;DFKI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Darmstadt;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Learning Space-Time Continuous Latent Neural PDEs from Partially Observed States", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72590", "id": "9cF6RUwMe7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/53e9b4152ca09d5f1228157e752651dd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9cF6RUwMe7", "openreview": "https://openreview.net/forum?id=9cF6RUwMe7", "poster": "/media/PosterPDFs/NeurIPS%202023/72590.png?t=1701440526.7259102", "slides": "https://nips.cc/virtual/2023/poster/72590", "video": "https://nips.cc/virtual/2023/poster/72590", "author_site": "Valerii Iakovlev, Markus Heinonen, Harri L\u00e4hdesm\u00e4ki", "tldr": "", "abstract": "We introduce a novel grid-independent model for learning partial differential equations (PDEs) from noisy and partial observations on irregular spatiotemporal grids. We propose a space-time continuous latent neural PDE model with an efficient probabilistic framework and a novel encoder design for improved data efficiency and grid independence. The latent state dynamics are governed by a PDE model that combines the collocation method and the method of lines. We employ amortized variational inference for approximate posterior estimation and utilize a multiple shooting technique for enhanced training speed and stability. Our model demonstrates state-of-the-art performance on complex synthetic and real-world datasets, overcoming limitations of previous approaches and effectively handling partially-observed data. The proposed model outperforms recent methods, showing its potential to advance data-driven PDE modeling and enabling robust, grid-independent modeling of complex partially-observed dynamic processes across various domains.", "keywords": "neural;PDEs;neural PDEs;partial observations;space time continuous", "primary_area": "", "supplementary_material": "/attachment/39168096f2c2c18b5c58719f609e3fc7aa7cfd86.pdf", "author": "Valerii Iakovlev;Markus Heinonen;Harri L\u00e4hdesm\u00e4ki", "authorids": "~Valerii_Iakovlev1;~Markus_Heinonen1;~Harri_L\u00e4hdesm\u00e4ki1", "gender": ";M;M", "homepage": ";https://users.aalto.fi/~heinom10/;https://research.cs.aalto.fi/csb/", "dblp": ";22/7709;85/4466", "google_scholar": ";hFtfHZoAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": "valerii-iakovlev-a12596190/;;", "or_profile": "~Valerii_Iakovlev1;~Markus_Heinonen1;~Harri_L\u00e4hdesm\u00e4ki1", "aff": "Aalto University;Aalto University;Aalto University", "aff_domain": "aalto.fi;aalto.fi;aalto.fi", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\niakovlev2023learning,\ntitle={Learning Space-Time Continuous Latent Neural {PDE}s from Partially Observed States},\nauthor={Valerii Iakovlev and Markus Heinonen and Harri L{\\\"a}hdesm{\\\"a}ki},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9cF6RUwMe7}\n}", "github": "", "project": "", "reviewers": "1peH;9sfV;mL6m;nYoy;yPZT;VsBY", "pdf_size": 2579311, "rating": "3;5;5;6;6;6", "confidence": "5;4;4;3;4;3", "soundness": "2;3;2;3;3;3", "novelty": "2;2;2;3;3;3", "presentation": "3;1;2;4;2;3", "wc_summary": "116;64;51;51;143;60", "wc_strengths": "70;32;44;142;85;182", "wc_weaknesses": "312;74;257;407;397;672", "wc_questions": "258;236;25;259;136;309", "wc_limitations": "1;1;10;54;21;13", "wc_review": "757;407;387;913;782;1236", "wc_reply_reviewers": "0;16;25;192;183;202", "wc_reply_authors": "0;6;16;235;157;262", "reply_reviewers": "0;1;2;1;1;2", "reply_authors": "1;2;2;2;2;2", "rating_avg": [ 5.166666666666667, 1.0671873729054746 ], "confidence_avg": [ 3.8333333333333335, 0.6871842709362768 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.9574271077563381 ], "wc_summary_avg": [ 80.83333333333333, 35.5875290266439 ], "wc_strengths_avg": [ 92.5, 53.28461316365166 ], "wc_weaknesses_avg": [ 353.1666666666667, 180.4396636613531 ], "wc_questions_avg": [ 203.83333333333334, 95.44180891458883 ], "wc_limitations_avg": [ 16.666666666666668, 18.080068829760823 ], "wc_review_avg": [ 747.0, 292.4613934635932 ], "wc_reply_reviewers_avg": [ 103.0, 89.7997772825746 ], "wc_reply_authors_avg": [ 112.66666666666667, 110.03585274304412 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.6871842709362768 ], "reply_authors_avg": [ 1.8333333333333333, 0.3726779962499649 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8711871219403557, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1071523498383915727&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "aalto.fi;aalto.fi;aalto.fi", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Aalto University", "aff_unique_dep": "", "aff_unique_url": "https://www.aalto.fi", "aff_unique_abbr": "Aalto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Finland" }, { "title": "Learning threshold neurons via edge of stability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72589", "id": "9cQ6kToLnJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3e592c571de69a43d7a870ea89c7e33a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9cQ6kToLnJ", "openreview": "https://openreview.net/forum?id=9cQ6kToLnJ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72589", "video": "https://nips.cc/virtual/2023/poster/72589", "author_site": "Kwangjun Ahn, Sebastien Bubeck, Sinho Chewi, Yin Tat Lee, Felipe Suarez, Felipe Suarez, Yi Zhang", "tldr": "", "abstract": "Existing analyses of neural network training often operate under the unrealistic assumption of an extremely small learning rate. This lies in stark contrast to practical wisdom and empirical studies, such as the work of J. Cohen et al. (ICLR 2021), which exhibit startling new phenomena (the \"edge of stability\"' or \"unstable convergence\") and potential benefits for generalization in the large learning rate regime. Despite a flurry of recent works on this topic, however, the latter effect is still poorly understood. In this paper, we take a step towards understanding genuinely non-convex training dynamics with large learning rates by performing a detailed analysis of gradient descent for simplified models of two-layer neural networks. For these models, we provably establish the edge of stability phenomenon and discover a sharp phase transition for the step size below which the neural network fails to learn ``threshold-like'' neurons (i.e., neurons with a non-zero first-layer bias). This elucidates one possible mechanism by which the edge of stability can in fact lead to better generalization, as threshold neurons are basic building blocks with useful inductive bias for many tasks.", "keywords": "Gradient descent;edge of stability;generalization", "primary_area": "", "supplementary_material": "/attachment/7b57b6d4b310784b67b7be3a1ce657996dbfbfb9.pdf", "author": "Kwangjun Ahn;Sebastien Bubeck;Sinho Chewi;Yin Tat Lee;Felipe Suarez;Yi Zhang", "authorids": "~Kwangjun_Ahn2;~Sebastien_Bubeck1;~Sinho_Chewi1;~Yin_Tat_Lee1;~Felipe_Suarez1;~Yi_Zhang1", "gender": ";;M;;;M", "homepage": "http://kjahn.mit.edu/;http://sbubeck.com/;https://chewisinho.github.io/;;https://math.mit.edu/directory/profile.php?pid=2036;https://yi-zhang.me", "dblp": ";35/4292;200/8964;;;64/6544-74", "google_scholar": "z94iNtgAAAAJ;V2Y1L4sAAAAJ;u_fAQO4AAAAJ;;;lc6CVqEAAAAJ", "orcid": ";;0000-0003-2701-0703;;;", "linkedin": ";;chewisinho/;;;", "or_profile": "~Kwangjun_Ahn2;~Sebastien_Bubeck1;~Sinho_Chewi1;~Yin_Tat_Lee1;~Felipe_Suarez1;~Yi_Zhang1", "aff": "Massachusetts Institute of Technology;Microsoft;Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Microsoft", "aff_domain": "mit.edu;microsoft.com;mit.edu;;mit.edu;microsoft.com", "position": "PhD student;Researcher;PhD student;;PhD student;Postdoc", "bibtex": "@inproceedings{\nahn2023learning,\ntitle={Learning threshold neurons via edge of stability},\nauthor={Kwangjun Ahn and Sebastien Bubeck and Sinho Chewi and Yin Tat Lee and Felipe Suarez and Yi Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9cQ6kToLnJ}\n}", "github": "", "project": "", "reviewers": "C6cz;poXm;5SQG;gYkS", "pdf_size": 1475845, "rating": "3;4;6;7", "confidence": "5;4;4;3", "soundness": "4;3;3;4", "novelty": "1;2;2;4", "presentation": "3;2;3;4", "wc_summary": "75;149;140;76", "wc_strengths": "29;123;102;132", "wc_weaknesses": "90;140;68;74", "wc_questions": "19;103;17;117", "wc_limitations": "1;1;23;25", "wc_review": "214;516;350;424", "wc_reply_reviewers": "99;188;9;0", "wc_reply_authors": "0;110;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.25, 1.0897247358851685 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 110.0, 34.64823227814083 ], "wc_strengths_avg": [ 96.5, 40.46294601237038 ], "wc_weaknesses_avg": [ 93.0, 28.30194339616981 ], "wc_questions_avg": [ 64.0, 46.2709412050371 ], "wc_limitations_avg": [ 12.5, 11.521718621802913 ], "wc_review_avg": [ 376.0, 110.48076755707302 ], "wc_reply_reviewers_avg": [ 74.0, 76.35771080905975 ], "wc_reply_authors_avg": [ 27.5, 47.63139720814412 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.894427190999916, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18197612848137070519&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "mit.edu;microsoft.com;mit.edu;;mit.edu;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://web.mit.edu;https://www.microsoft.com", "aff_unique_abbr": "MIT;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Diffusion Representation for Asymmetric Kernels via Magnetic Transform", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72588", "id": "9cQzO3rXgR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a86b7a9bf7647d6f9f9168d8167d9283-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9cQzO3rXgR", "openreview": "https://openreview.net/forum?id=9cQzO3rXgR", "poster": "/media/PosterPDFs/NeurIPS%202023/72588.png?t=1699429497.5419827", "slides": "https://nips.cc/virtual/2023/poster/72588", "video": "https://nips.cc/virtual/2023/poster/72588", "author_site": "Mingzhen He, FAN He, Ruikai Yang, Xiaolin Huang", "tldr": "", "abstract": "As a nonlinear dimension reduction technique, the diffusion map (DM) has been widely used. \nIn DM, kernels play an important role for capturing the nonlinear relationship of data. However, only symmetric kernels can be used now, which prevents the use of DM in directed graphs, trophic networks, and other real-world scenarios where the intrinsic and extrinsic geometries in data are asymmetric. A promising technique is the magnetic transform which converts an asymmetric matrix to a Hermitian one. However, we are facing essential problems, including how diffusion distance could be preserved and how divergence could be avoided during diffusion process. Via theoretical proof, we successfully establish a diffusion representation framework with the magnetic transform, named MagDM. The effectiveness and robustness for dealing data endowed with asymmetric proximity are demonstrated on three synthetic datasets and two trophic networks.", "keywords": "Asymmetric kernels;diffusion maps;magnetic transform;dimension reduction", "primary_area": "", "supplementary_material": "/attachment/b551dd567310d0450071769641e6b4b4d28f7cbd.pdf", "author": "Mingzhen He;FAN He;Ruikai Yang;Xiaolin Huang", "authorids": "~Mingzhen_He1;~FAN_He1;ruikai.yang@sjtu.edu.cn;~Xiaolin_Huang1", "gender": "M;F;;M", "homepage": "https://mingzhenhe.github.io/;http://www.pami.sjtu.edu.cn/StudentDetail/12;;http://www.pami.sjtu.edu.cn/en/xiaolin", "dblp": "313/1706;;;61/2227", "google_scholar": ";https://scholar.google.com/citations?hl=en;;DR-gBcEAAAAJ", "orcid": "0000-0002-9214-4196;;;", "linkedin": ";;;", "or_profile": "~Mingzhen_He1;~FAN_He1;ruikai.yang@sjtu.edu.cn;~Xiaolin_Huang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;;sjtu.edu.cn", "position": "PhD student;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nhe2023diffusion,\ntitle={Diffusion Representation for Asymmetric Kernels via Magnetic Transform},\nauthor={Mingzhen He and FAN He and Ruikai Yang and Xiaolin Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9cQzO3rXgR}\n}", "github": "", "project": "", "reviewers": "E65Y;sG91;SsqM", "pdf_size": 1443073, "rating": "6;7;7", "confidence": "5;2;4", "soundness": "4;3;3", "novelty": "3;3;4", "presentation": "4;2;3", "wc_summary": "79;74;165", "wc_strengths": "48;33;43", "wc_weaknesses": "268;26;96", "wc_questions": "29;40;195", "wc_limitations": "1;38;1", "wc_review": "425;211;500", "wc_reply_reviewers": "426;13;22", "wc_reply_authors": "957;19;19", "reply_reviewers": "3;1;1", "reply_authors": "4;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 106.0, 41.769207158703246 ], "wc_strengths_avg": [ 41.333333333333336, 6.236095644623236 ], "wc_weaknesses_avg": [ 130.0, 101.67923419590977 ], "wc_questions_avg": [ 88.0, 75.79357932349328 ], "wc_limitations_avg": [ 13.333333333333334, 17.441967269268172 ], "wc_review_avg": [ 378.6666666666667, 122.44817497846 ], "wc_reply_reviewers_avg": [ 153.66666666666666, 192.6037959704383 ], "wc_reply_authors_avg": [ 331.6666666666667, 442.1774405019877 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7559289460184542, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3487835528580354333&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "sjtu.edu.cn;sjtu.edu.cn;;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Reinforcement-Enhanced Autoregressive Feature Transformation: Gradient-steered Search in Continuous Space for Postfix Expressions", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72587", "id": "9dp35y5C0p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8797d13e5998acfab387d4bf0a5b9b00-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9dp35y5C0p", "openreview": "https://openreview.net/forum?id=9dp35y5C0p", "poster": "/media/PosterPDFs/NeurIPS%202023/72587.png?t=1701768535.3722012", "slides": "https://nips.cc/virtual/2023/poster/72587", "video": "https://nips.cc/virtual/2023/poster/72587", "author_site": "Dongjie Wang, Meng Xiao, Min Wu, pengfei wang, Yuanchun Zhou, Yanjie Fu", "tldr": "", "abstract": "Feature transformation aims to generate new pattern-discriminative feature space from original features to improve downstream machine learning (ML) task performances. However, the discrete search space for the optimal feature explosively grows on the basis of combinations of features and operations from low-order forms to high-order forms. Existing methods, such as exhaustive search, expansion reduction, evolutionary algorithms, reinforcement learning, and iterative greedy, suffer from large search space. Overly emphasizing efficiency in algorithm design usually sacrifice stability or robustness. To fundamentally fill this gap, we reformulate discrete feature transformation as a continuous space optimization task and develop an embedding-optimization-reconstruction framework. This framework includes four steps: 1) reinforcement-enhanced data preparation, aiming to prepare high-quality transformation-accuracy training data; 2) feature transformation operation sequence embedding, intending to encapsulate the knowledge of prepared training data within a continuous space; 3) gradient-steered optimal embedding search, dedicating to uncover potentially superior embeddings within the learned space; 4) transformation operation sequence reconstruction, striving to reproduce the feature transformation solution to pinpoint the optimal feature space. Finally, extensive experiments and case studies are performed to demonstrate the effectiveness and robustness of the proposed method. The code and data are publicly accessible https://www.dropbox.com/sh/imh8ckui7va3k5u/AACulQegVx0MuywYyoCqSdVPa?dl=0.", "keywords": "Feature Transformation;Autoregressive Generation;Continuous Space Optimization", "primary_area": "", "supplementary_material": "/attachment/d78e24cb8352ee9f3f5eba7a84944f7599e201c7.pdf", "author": "Dongjie Wang;Meng Xiao;Min Wu;pengfei wang;Yuanchun Zhou;Yanjie Fu", "authorids": "~Dongjie_Wang1;~Meng_Xiao1;~Min_Wu2;~pengfei_wang6;~Yuanchun_Zhou2;~Yanjie_Fu2", "gender": "M;M;M;;M;", "homepage": "https://wangdongjie100.github.io/;https://coco11563.github.io/;https://sites.google.com/site/wumincf/;;;", "dblp": "144/9006;25/6475-1;16/0-8;;08/5234.html;", "google_scholar": "K428NRsAAAAJ;YGwukbUAAAAJ;https://scholar.google.com.sg/citations?user=Hji1uWQAAAAJ;;;", "orcid": "0000-0003-3948-0059;0000-0001-5294-5776;0000-0003-0977-3600;;0000-0003-2144-1131;", "linkedin": ";;;;;", "or_profile": "~Dongjie_Wang1;~Meng_Xiao1;~Min_Wu2;~pengfei_wang6;~Yuanchun_Zhou2;~Yanjie_Fu2", "aff": "University of Central Florida;University of Chinese Academy of Sciences;Institute for Infocomm Research (I2R), A*STAR;;Computer Network Information Center, Chinese Academy of Sciences,;", "aff_domain": "ucf.edu;ucas.ac.cn;i2r.a-star.edu.sg;;cnic.cn;", "position": "PhD student;PhD student;Researcher;;Full Professor;", "bibtex": "@inproceedings{\nwang2023reinforcementenhanced,\ntitle={Reinforcement-Enhanced Autoregressive Feature Transformation: Gradient-steered Search in Continuous Space for Postfix Expressions},\nauthor={Dongjie Wang and Meng Xiao and Min Wu and pengfei wang and Yuanchun Zhou and Yanjie Fu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9dp35y5C0p}\n}", "github": "", "project": "", "reviewers": "GfrD;yQs6;miDK;MJDJ", "pdf_size": 3926834, "rating": "6;7;7;7", "confidence": "4;5;4;5", "soundness": "3;4;3;4", "novelty": "3;4;4;4", "presentation": "4;3;3;3", "wc_summary": "81;89;68;104", "wc_strengths": "44;123;113;131", "wc_weaknesses": "42;111;89;103", "wc_questions": "50;71;52;95", "wc_limitations": "31;1;1;1", "wc_review": "248;395;323;434", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 85.5, 13.047988350699889 ], "wc_strengths_avg": [ 102.75, 34.513584282134474 ], "wc_weaknesses_avg": [ 86.25, 26.733639856929322 ], "wc_questions_avg": [ 67.0, 18.12456896039186 ], "wc_limitations_avg": [ 8.5, 12.99038105676658 ], "wc_review_avg": [ 350.0, 71.08797366643671 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5820154520915659664&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ucf.edu;ucas.ac.cn;i2r.a-star.edu.sg;;cnic.cn;", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Central Florida;University of Chinese Academy of Sciences;Institute for Infocomm Research;Chinese Academy of Sciences", "aff_unique_dep": ";;;Computer Network Information Center", "aff_unique_url": "https://www.ucf.edu;http://www.ucas.ac.cn;https://www.i2r.a-star.edu.sg;http://www.cas.cn", "aff_unique_abbr": "UCF;UCAS;I2R;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "United States;China;Singapore" }, { "title": "Inner Product-based Neural Network Similarity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72586", "id": "9eneYFIGKq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ea7623ff02edffe68866f88da2667592-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9eneYFIGKq", "openreview": "https://openreview.net/forum?id=9eneYFIGKq", "poster": "/media/PosterPDFs/NeurIPS%202023/72586.png?t=1701715847.0394595", "slides": "https://nips.cc/virtual/2023/poster/72586", "video": "https://nips.cc/virtual/2023/poster/72586", "author_site": "Wei Chen, Zichen Miao, Qiang Qiu", "tldr": "", "abstract": "Analyzing representational similarity among neural networks (NNs) is essential for interpreting or transferring deep models. In application scenarios where numerous NN models are learned, it becomes crucial to assess model similarities in computationally efficient ways. In this paper, we propose a new paradigm for reducing NN representational similarity to filter subspace distance. Specifically, when convolutional filters are decomposed as a linear combination of a set of filter subspace elements, denoted as filter atoms, and have those decomposed atom coefficients shared across networks, NN representational similarity can be significantly simplified as calculating the cosine distance among respective filter atoms, to achieve millions of times computation reduction over popular probing-based methods. We provide both theoretical and empirical evidence that such simplified filter subspace-based similarity preserves a strong linear correlation with other popular probing-based metrics, while being significantly more efficient to obtain and robust to probing data. We further validate the effectiveness of the proposed method in various application scenarios where numerous models exist, such as federated and continual learning as well as analyzing training dynamics. We hope our findings can help further explorations of real-time large-scale representational similarity analysis in neural networks.", "keywords": "Neural Network Similarity;Filter Subspace", "primary_area": "", "supplementary_material": "/attachment/d7f4fbe719b67ef488041fc03d65d5c27d00e370.pdf", "author": "Wei Chen;Zichen Miao;Qiang Qiu", "authorids": "~Wei_Chen26;~Zichen_Miao1;~Qiang_Qiu1", "gender": "M;M;", "homepage": "https://weichennone.github.io/myhomepage/;https://zichenmiao.github.io;https://web.ics.purdue.edu/~qqiu/", "dblp": "181/2832-124.html;206/1549;97/360", "google_scholar": "jVT7rQgAAAAJ;Kmv2KIkAAAAJ;jdLtt_YAAAAJ", "orcid": "0000-0001-6722-4322;;", "linkedin": ";;", "or_profile": "~Wei_Chen26;~Zichen_Miao1;~Qiang_Qiu1", "aff": "Purdue University;Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;purdue.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchen2023inner,\ntitle={Inner Product-based Neural Network Similarity},\nauthor={Wei Chen and Zichen Miao and Qiang Qiu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9eneYFIGKq}\n}", "github": "", "project": "", "reviewers": "8het;qnN8;PBCY;naiq", "pdf_size": 6133093, "rating": "5;6;6;7", "confidence": "4;3;3;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;4;4", "wc_summary": "63;53;50;151", "wc_strengths": "61;46;38;22", "wc_weaknesses": "251;20;47;81", "wc_questions": "56;42;198;53", "wc_limitations": "1;18;26;2", "wc_review": "432;179;359;309", "wc_reply_reviewers": "39;0;21;0", "wc_reply_authors": "15;0;14;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 79.25, 41.70356699372369 ], "wc_strengths_avg": [ 41.75, 14.077908225301087 ], "wc_weaknesses_avg": [ 99.75, 89.95936582702214 ], "wc_questions_avg": [ 87.25, 64.15362421562791 ], "wc_limitations_avg": [ 11.75, 10.638961415476606 ], "wc_review_avg": [ 319.75, 92.28590087331867 ], "wc_reply_reviewers_avg": [ 15.0, 16.294170736800325 ], "wc_reply_authors_avg": [ 7.25, 7.258615570478987 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5013864310088978347&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "purdue.edu;purdue.edu;purdue.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "DPM-Solver-v3: Improved Diffusion ODE Solver with Empirical Model Statistics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72585", "id": "9fWKExmKa0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ada8de994b46571bdcd7eeff2d3f9cff-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9fWKExmKa0", "openreview": "https://openreview.net/forum?id=9fWKExmKa0", "poster": "/media/PosterPDFs/NeurIPS%202023/72585.png?t=1702033311.0359318", "slides": "https://nips.cc/virtual/2023/poster/72585", "video": "https://nips.cc/virtual/2023/poster/72585", "author_site": "Kaiwen Zheng, Cheng Lu, Jianfei Chen, Jun Zhu", "tldr": "", "abstract": "Diffusion probabilistic models (DPMs) have exhibited excellent performance for high-fidelity image generation while suffering from inefficient sampling. Recent works accelerate the sampling procedure by proposing fast ODE solvers that leverage the specific ODE form of DPMs. However, they highly rely on specific parameterization during inference (such as noise/data prediction), which might not be the optimal choice. In this work, we propose a novel formulation towards the optimal parameterization during sampling that minimizes the first-order discretization error of the ODE solution. Based on such formulation, we propose \\textit{DPM-Solver-v3}, a new fast ODE solver for DPMs by introducing several coefficients efficiently computed on the pretrained model, which we call \\textit{empirical model statistics}. We further incorporate multistep methods and a predictor-corrector framework, and propose some techniques for improving sample quality at small numbers of function evaluations (NFE) or large guidance scales. Experiments show that DPM-Solver-v3 achieves consistently better or comparable performance in both unconditional and conditional sampling with both pixel-space and latent-space DPMs, especially in 5$\\sim$10 NFEs. We achieve FIDs of 12.21 (5 NFE), 2.51 (10 NFE) on unconditional CIFAR10, and MSE of 0.55 (5 NFE, 7.5 guidance scale) on Stable Diffusion, bringing a speed-up of 15\\%$\\sim$30\\% compared to previous state-of-the-art training-free methods. Code is available at \\url{https://github.com/thu-ml/DPM-Solver-v3}.", "keywords": "diffusion models;fast sampling;ODE solver", "primary_area": "", "supplementary_material": "/attachment/82453db1937780422b6da4db194860c627f58902.zip", "author": "Kaiwen Zheng;Cheng Lu;Jianfei Chen;Jun Zhu", "authorids": "~Kaiwen_Zheng2;~Cheng_Lu5;~Jianfei_Chen1;~Jun_Zhu2", "gender": "M;M;M;M", "homepage": "https://zhengkw18.github.io/;https://luchengthu.github.io/;http://ml.cs.tsinghua.edu.cn/~jianfei;http://ml.cs.tsinghua.edu.cn/~jun", "dblp": ";91/1482-11;48/6809-1;50/2644-1", "google_scholar": "0d80xSIAAAAJ;vPE9VRoAAAAJ;di5RZ1MAAAAJ;axsP38wAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Kaiwen_Zheng2;~Cheng_Lu5;~Jianfei_Chen1;~Jun_Zhu2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn", "position": "MS student;PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\nzheng2023dpmsolverv,\ntitle={{DPM}-Solver-v3: Improved Diffusion {ODE} Solver with Empirical Model Statistics},\nauthor={Kaiwen Zheng and Cheng Lu and Jianfei Chen and Jun Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9fWKExmKa0}\n}", "github": "", "project": "", "reviewers": "oHv2;wxE7;yHtS;dqUr", "pdf_size": 8426054, "rating": "4;5;6;6", "confidence": "3;4;2;5", "soundness": "3;3;3;2", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "36;57;90;85", "wc_strengths": "28;52;63;68", "wc_weaknesses": "181;371;107;257", "wc_questions": "20;168;71;38", "wc_limitations": "1;1;9;13", "wc_review": "266;649;340;461", "wc_reply_reviewers": "22;0;16;39", "wc_reply_authors": "92;53;52;30", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 67.0, 21.874642854227357 ], "wc_strengths_avg": [ 52.75, 15.417117110536587 ], "wc_weaknesses_avg": [ 229.0, 97.64220398987315 ], "wc_questions_avg": [ 74.25, 57.13306835800087 ], "wc_limitations_avg": [ 6.0, 5.196152422706632 ], "wc_review_avg": [ 429.0, 144.83956641746758 ], "wc_reply_reviewers_avg": [ 19.25, 13.953046262375826 ], "wc_reply_authors_avg": [ 56.75, 22.331312097590683 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.1348399724926484, "gs_citation": 75, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17602810534234117782&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Django: Detecting Trojans in Object Detection Models via Gaussian Focus Calibration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72584", "id": "9fb975Au9G", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a102d6cb996be3482c059c1e18bbe523-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9fb975Au9G", "openreview": "https://openreview.net/forum?id=9fb975Au9G", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72584", "video": "https://nips.cc/virtual/2023/poster/72584", "author_site": "Guangyu Shen, Siyuan Cheng, Guanhong Tao, Kaiyuan Zhang, Yingqi Liu, Shengwei An, Shiqing Ma, Shiqing Ma, Xiangyu Zhang", "tldr": "", "abstract": "Object detection models are vulnerable to backdoor or trojan attacks, where an attacker can inject malicious triggers into the model, leading to altered behavior during inference. As a defense mechanism, trigger inversion leverages optimization to reverse-engineer triggers and identify compromised models. While existing trigger inversion methods assume that each instance from the support set is equally affected by the injected trigger, we observe that the poison effect can vary significantly across bounding boxes in object detection models due to its dense prediction nature, leading to an undesired optimization objective misalignment issue for existing trigger reverse-engineering methods. To address this challenge, we propose the first object detection backdoor detection framework Django (Detecting Trojans in Object Detection Models via Gaussian Focus Calibration). It leverages a dynamic Gaussian weighting scheme that prioritizes more vulnerable victim boxes and assigns appropriate coefficients to calibrate the optimization objective during trigger inversion. In addition, we combine Django with a novel label proposal pre-processing technique to enhance its efficiency. We evaluate Django on 3 object detection image datasets, 3 model architectures, and 2 types of attacks, with a total of 168 models. Our experimental results show that Django outperforms 6 state-of-the-art baselines, with up to 38% accuracy improvement and 10x reduced overhead. The code is available at https://github.com/PurduePAML/DJGO.", "keywords": "backdoor detection; object detection;", "primary_area": "", "supplementary_material": "/attachment/e5f81735d4e31ff98971d74c0a4e5a5abfe67f21.pdf", "author": "Guangyu Shen;Siyuan Cheng;Guanhong Tao;Kaiyuan Zhang;Yingqi Liu;Shengwei An;Shiqing Ma;Xiangyu Zhang", "authorids": "~Guangyu_Shen1;~Siyuan_Cheng1;~Guanhong_Tao1;~Kaiyuan_Zhang1;~Yingqi_Liu1;~Shengwei_An1;~Shiqing_Ma2;~Xiangyu_Zhang3", "gender": "M;M;;M;M;;;M", "homepage": ";https://www.cs.purdue.edu/homes/cheng535/;;https://kaiyuanzhang.com/;https://www.cs.purdue.edu/homes/liu1751/;https://www.cs.purdue.edu/homes/an93/;https://people.cs.umass.edu/~shiqingma/;https://www.cs.purdue.edu/homes/xyzhang", "dblp": "216/6403;263/7049;;147/6644-2;92/10048;168/9413;172/8745;", "google_scholar": "YiMTVwgAAAAJ;GcL9AFMAAAAJ;;https://scholar.google.com/citations?hl=en;gOPVK2UAAAAJ;qcmmzeEAAAAJ;X_mDnjkAAAAJ;PXbu1wIAAAAJ", "orcid": ";;;0000-0001-6023-363X;;;0000-0003-1551-8948;", "linkedin": ";sycheng98/;;kaiyuan-zhang/;;;shiqing-ma-6590b086;", "or_profile": "~Guangyu_Shen1;~Siyuan_Cheng1;~Guanhong_Tao1;~Kaiyuan_Zhang1;~Yingqi_Liu1;~Shengwei_An1;~Shiqing_Ma2;~Xiangyu_Zhang3", "aff": "Purdue University;Purdue University;;Purdue University;Microsoft;Purdue University;Rutgers University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;;cs.purdue.edu;microsoft.com;purdue.edu;rutgers.edu;cs.purdue.edu", "position": "PhD student;PhD student;;PhD student;Researcher;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nshen2023django,\ntitle={Django: Detecting Trojans in Object Detection Models via Gaussian Focus Calibration},\nauthor={Guangyu Shen and Siyuan Cheng and Guanhong Tao and Kaiyuan Zhang and Yingqi Liu and Shengwei An and Shiqing Ma and Xiangyu Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9fb975Au9G}\n}", "github": "", "project": "", "reviewers": "tRsV;VAnQ;sH6B;TXHx;5mmp", "pdf_size": 1496088, "rating": "5;5;6;6;7", "confidence": "3;3;4;4;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "58;80;106;98;51", "wc_strengths": "135;47;96;111;126", "wc_weaknesses": "142;59;174;81;12", "wc_questions": "71;37;2;133;5", "wc_limitations": "11;5;8;34;27", "wc_review": "417;228;386;457;221", "wc_reply_reviewers": "129;16;14;16;0", "wc_reply_authors": "224;0;0;0;0", "reply_reviewers": "2;1;1;1;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 78.6, 21.518364250100426 ], "wc_strengths_avg": [ 103.0, 30.99032106965012 ], "wc_weaknesses_avg": [ 93.6, 58.00206892861668 ], "wc_questions_avg": [ 49.6, 48.63578929142612 ], "wc_limitations_avg": [ 17.0, 11.40175425099138 ], "wc_review_avg": [ 341.8, 98.41016207689123 ], "wc_reply_reviewers_avg": [ 35.0, 47.379320383475324 ], "wc_reply_authors_avg": [ 44.8, 89.6 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8728715609439696, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17302670773663166071&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "purdue.edu;purdue.edu;;cs.purdue.edu;microsoft.com;purdue.edu;rutgers.edu;cs.purdue.edu", "author_num": 8, "aff_unique_index": "0;0;0;1;0;2;0", "aff_unique_norm": "Purdue University;Microsoft;Rutgers University", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.purdue.edu;https://www.microsoft.com;https://www.rutgers.edu", "aff_unique_abbr": "Purdue;Microsoft;Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Digital Typhoon: Long-term Satellite Image Dataset for the Spatio-Temporal Modeling of Tropical Cyclones", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73675", "id": "9gLnjw8DfA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7fc36bce5de315751001981baaf4751a-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=9gLnjw8DfA", "openreview": "https://openreview.net/forum?id=9gLnjw8DfA", "poster": "/media/PosterPDFs/NeurIPS%202023/73675.png?t=1702025611.8991375", "slides": "https://nips.cc/virtual/2023/poster/73675", "video": "https://nips.cc/virtual/2023/poster/73675", "author_site": "Asanobu Kitamoto, Jared Hwang, Bastien Vuillod, Lucas Gautier, Yingtao Tian, Tarin Clanuwat", "tldr": "", "abstract": "This paper presents the official release of the Digital Typhoon dataset, the longest typhoon satellite image dataset for 40+ years aimed at benchmarking machine learning models for long-term spatio-temporal data. To build the dataset, we developed a workflow to create an infrared typhoon-centered image for cropping using Lambert azimuthal equal-area projection referring to the best track data. We also address data quality issues such as inter-satellite calibration to create a homogeneous dataset. To take advantage of the dataset, we organized machine learning tasks by the types and targets of inference, with other tasks for meteorological analysis, societal impact, and climate change. The benchmarking results on the analysis, forecasting, and reanalysis for the intensity suggest that the dataset is challenging for recent deep learning models, due to many choices that affect the performance of various models. This dataset reduces the barrier for machine learning researchers to meet large-scale real-world events called tropical cyclones and develop machine learning models that may contribute to advancing scientific knowledge on tropical cyclones as well as solving societal and sustainability issues such as disaster reduction and climate change. The dataset is publicly available at http://agora.ex.nii.ac.jp/digital-typhoon/dataset/ and https://github.com/kitamoto-lab/digital-typhoon/.", "keywords": "meteorology;typhoon;tropical cyclone;climate;satellite image;dataset", "primary_area": "", "supplementary_material": "/attachment/33a256423968d9e67eb156c1cf17bb7b5eb18be0.pdf", "author": "Asanobu Kitamoto;Jared Hwang;Bastien Vuillod;Lucas Gautier;Yingtao Tian;Tarin Clanuwat", "authorids": "~Asanobu_Kitamoto1;~Jared_Hwang1;~Bastien_Vuillod1;~Lucas_Gautier1;~Yingtao_Tian1;~Tarin_Clanuwat1", "gender": "M;;M;Non-Binary;;F", "homepage": "https://researchmap.jp/kitamoto/;https://jared-hwang.github.io/;;;https://alantian.net/;http://tkasasagi.github.io", "dblp": ";;;;180/5335;", "google_scholar": "https://scholar.google.co.jp/citations?user=TZG-qp0AAAAJ;;;;17Fe5K0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;", "linkedin": ";jared-hwang/;bastien-vuillod-3a6205221/;lucas-gautier-it;;", "or_profile": "~Asanobu_Kitamoto1;~Jared_Hwang1;~Bastien_Vuillod1;~Lucas_Gautier1;~Yingtao_Tian1;~Tarin_Clanuwat1", "aff": "National Institute of Informatics;University of Southern California;Grenoble-INP, Ensimag;Isima;Google;Google DeepMind", "aff_domain": "nii.ac.jp;usc.edu;ensimag.grenoble-inp.fr;etu.uca.fr;google.com;google.com", "position": "Full Professor;MS student;MS student;MS student;Research Scientist;Researcher", "bibtex": "@inproceedings{\nkitamoto2023digital,\ntitle={Digital Typhoon: Long-term Satellite Image Dataset for the Spatio-Temporal Modeling of Tropical Cyclones},\nauthor={Asanobu Kitamoto and Jared Hwang and Bastien Vuillod and Lucas Gautier and Yingtao Tian and Tarin Clanuwat},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=9gLnjw8DfA}\n}", "github": "", "project": "", "reviewers": "SUZt;JoXz;Tmq2;fsVs;zc1f", "pdf_size": 1714101, "rating": "6;7;7;8;9", "confidence": "4;3;3;5;3", "wc_summary_and_contributions": "60;62;64;133;138", "wc_strengths": "57;77;57;50;220", "wc_improvement": "131;181;63;239;433", "wc_limitations": "1;45;24;47;84", "wc_correctness": "9;6;6;18;187", "wc_clarity": "1;1;17;4;644", "wc_relation_to_prior_work": "1;16;21;48;31", "wc_documentation": "13;58;6;32;77", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "274;447;259;572;1815", "wc_reply_reviewers": "0;15;0;25;26", "wc_reply_authors": "224;574;285;671;817", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.4, 1.0198039027185568 ], "confidence_avg": [ 3.6, 0.8 ], "wc_summary_and_contributions_avg": [ 91.4, 36.0643868657156 ], "wc_strengths_avg": [ 92.2, 64.53340220382 ], "wc_improvement_avg": [ 209.4, 125.9040904816043 ], "wc_limitations_avg": [ 40.2, 27.520174418051933 ], "wc_correctness_avg": [ 45.2, 71.03632873396542 ], "wc_clarity_avg": [ 133.4, 255.36843971015682 ], "wc_relation_to_prior_work_avg": [ 23.4, 15.653753543479596 ], "wc_documentation_avg": [ 37.2, 26.843248685656512 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 673.4, 582.4151783736409 ], "wc_reply_reviewers_avg": [ 13.2, 11.443775600735973 ], "wc_reply_authors_avg": [ 514.2, 226.53865012399098 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.04902903378454606, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3898087587775583472&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "nii.ac.jp;usc.edu;ensimag.grenoble-inp.fr;etu.uca.fr;google.com;google.com", "author_num": 6, "aff_unique_index": "0;1;2;3;4;4", "aff_unique_norm": "National Institute of Informatics;University of Southern California;Grenoble INP;Isima;Google", "aff_unique_dep": ";;Ensimag;;Google", "aff_unique_url": "https://www.nii.ac.jp/;https://www.usc.edu;https://www.grenoble-inp.fr;https://www.isima.fr;https://www.google.com", "aff_unique_abbr": "NII;USC;Grenoble INP;;Google", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Los Angeles;Grenoble;Mountain View", "aff_country_unique_index": "0;1;2;2;1;3", "aff_country_unique": "Japan;United States;France;United Kingdom" }, { "title": "OpenSTL: A Comprehensive Benchmark of Spatio-Temporal Predictive Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73674", "id": "9gkrbrFzZj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dcbff44d11130e75d09d3930411c23e1-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=9gkrbrFzZj", "openreview": "https://openreview.net/forum?id=9gkrbrFzZj", "poster": "/media/PosterPDFs/NeurIPS%202023/73674.png?t=1699799934.2626762", "slides": "https://nips.cc/virtual/2023/poster/73674", "video": "https://nips.cc/virtual/2023/poster/73674", "author_site": "Cheng Tan, Siyuan Li, Zhangyang Gao, Wenfei Guan, Zedong Wang, Zicheng Liu, Lirong Wu, Stan Z. Li", "tldr": "", "abstract": "Spatio-temporal predictive learning is a learning paradigm that enables models to learn spatial and temporal patterns by predicting future frames from given past frames in an unsupervised manner. Despite remarkable progress in recent years, a lack of systematic understanding persists due to the diverse settings, complex implementation, and difficult reproducibility. Without standardization, comparisons can be unfair and insights inconclusive. To address this dilemma, we propose OpenSTL, a comprehensive benchmark for spatio-temporal predictive learning that categorizes prevalent approaches into recurrent-based and recurrent-free models. OpenSTL provides a modular and extensible framework implementing various state-of-the-art methods. We conduct standard evaluations on datasets across various domains, including synthetic moving object trajectory, human motion, driving scenes, traffic flow, and weather forecasting. Based on our observations, we provide a detailed analysis of how model architecture and dataset properties affect spatio-temporal predictive learning performance. Surprisingly, we find that recurrent-free models achieve a good balance between efficiency and performance than recurrent models. Thus, we further extend the common MetaFormers to boost recurrent-free spatial-temporal predictive learning. We open-source the code and models at https://github.com/chengtan9907/OpenSTL.", "keywords": "spatio-temporal predictive learning;self-supervised learning;computer vision", "primary_area": "", "supplementary_material": "/attachment/0884b3de1a7f429f827f7be2a362e05377820fe1.zip", "author": "Cheng Tan;Siyuan Li;Zhangyang Gao;Wenfei Guan;Zedong Wang;Zicheng Liu;Lirong Wu;Stan Z. Li", "authorids": "~Cheng_Tan1;~Siyuan_Li6;~Zhangyang_Gao1;~Wenfei_Guan1;~Zedong_Wang1;~Zicheng_Liu2;~Lirong_Wu1;~Stan_Z._Li2", "gender": "M;M;M;M;M;M;;M", "homepage": "https://chengtan9907.github.io/;https://lupin1998.github.io/;;https://github.com/xiaofei-guan;https://jacky1128.github.io;;;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": "70/1533-12.html;63/9705-2;275/3266;;179/8811.html;l/ZichengLiu-6;15/10330;l/StanZLi", "google_scholar": "6kTV6aMAAAAJ;https://scholar.google.com/citations?hl=zh-CN;4SclT-QAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;Tk7TrCoAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0001-6806-2468;0000-0003-1026-6083;;0009-0000-0112-0491;;;", "linkedin": ";https://www.linkedin.cn/incareer/in/siyuan-li-lupin1998/;;;;;;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Cheng_Tan1;~Siyuan_Li6;~Zhangyang_Gao1;~Wenfei_Guan1;~Zedong_Wang1;~Zicheng_Liu2;~Lirong_Wu1;~Stan_Z._Li1", "aff": "Zhejiang University & Westlake University;Alibaba Group;Westlake University, China;Xi'an University of Electronic Science and Technology;Huazhong University of Science and Technology;Zhejiang University;Westlake University;Westlake University", "aff_domain": "westlake.edu.cn;alibaba-inc.com;westlake.edu.cn;xidian.edu.cn;hust.edu.cn;zju.edu.cn;westlake.edu.cn;westlake.edu.cn", "position": "PhD student;Intern;PhD student;Undergrad student;Undergrad student;PhD student;PhD student;Chair Professor", "bibtex": "@inproceedings{\ntan2023openstl,\ntitle={Open{STL}: A Comprehensive Benchmark of Spatio-Temporal Predictive Learning},\nauthor={Cheng Tan and Siyuan Li and Zhangyang Gao and Wenfei Guan and Zedong Wang and Zicheng Liu and Lirong Wu and Stan Z. Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=9gkrbrFzZj}\n}", "github": "", "project": "", "reviewers": "sUSg;HExp;jUek;rqPr", "pdf_size": 675456, "rating": "6;6;7;7", "confidence": "3;2;4;4", "wc_summary_and_contributions": "92;40;26;65", "wc_strengths": "78;32;31;113", "wc_improvement": "153;23;58;86", "wc_limitations": "46;5;1;4", "wc_correctness": "1;1;6;4", "wc_clarity": "1;1;1;8", "wc_relation_to_prior_work": "1;17;1;14", "wc_documentation": "1;13;1;9", "wc_additional_feedback": "1;1;1;1", "wc_review": "374;133;126;304", "wc_reply_reviewers": "39;21;9;0", "wc_reply_authors": "326;406;303;328", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 55.75, 25.16321720289359 ], "wc_strengths_avg": [ 63.5, 34.311076928595526 ], "wc_improvement_avg": [ 80.0, 47.69171835864168 ], "wc_limitations_avg": [ 14.0, 18.533752992850637 ], "wc_correctness_avg": [ 3.0, 2.1213203435596424 ], "wc_clarity_avg": [ 2.75, 3.031088913245535 ], "wc_relation_to_prior_work_avg": [ 8.25, 7.327175444876422 ], "wc_documentation_avg": [ 6.0, 5.196152422706632 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 234.25, 107.66237736554028 ], "wc_reply_reviewers_avg": [ 17.25, 14.600941750448838 ], "wc_reply_authors_avg": [ 340.75, 38.931831449342326 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 75, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9201592651402616190&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "westlake.edu.cn;alibaba-inc.com;westlake.edu.cn;xidian.edu.cn;hust.edu.cn;zju.edu.cn;westlake.edu.cn;westlake.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;3;4;0;2;2", "aff_unique_norm": "Zhejiang University;Alibaba Group;Westlake University;Xi'an University of Electronic Science and Technology;Huazhong University of Science and Technology", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.zju.edu.cn;https://www.alibaba.com;https://www.westlake.edu.cn;http://www.xidian.edu.cn/;http://www.hust.edu.cn", "aff_unique_abbr": "ZJU;Alibaba;WU;Xidian University;HUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "(Almost) Provable Error Bounds Under Distribution Shift via Disagreement Discrepancy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72583", "id": "9i8MD9btc8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5bacb12bf81e98e2ee0eed953a23c656-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9i8MD9btc8", "openreview": "https://openreview.net/forum?id=9i8MD9btc8", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72583", "video": "https://nips.cc/virtual/2023/poster/72583", "author_site": "Elan Rosenfeld, Saurabh Garg", "tldr": "", "abstract": "We derive a new, (almost) guaranteed upper bound on the error of deep neural networks under distribution shift using unlabeled test data. Prior methods are either vacuous in practice or accurate on average but heavily underestimate error for a sizeable fraction of shifts. In particular, the latter only give guarantees based on complex continuous measures such as test calibration, which cannot be identified without labels, and are therefore unreliable. Instead, our bound requires a simple, intuitive condition which is well justified by prior empirical works and holds in practice effectively 100\\% of the time. The bound is inspired by $\\mathcal{H}\\Delta\\mathcal{H}$-divergence but is easier to evaluate and substantially tighter, consistently providing non-vacuous test error upper bounds. Estimating the bound requires optimizing one multiclass classifier to disagree with another, for which some prior works have used sub-optimal proxy losses; we devise a \"disagreement loss\" which is theoretically justified and performs better in practice. We expect this loss can serve as a drop-in replacement for future methods which require maximizing multiclass disagreement. Across a wide range of natural and synthetic distribution shift benchmarks, our method gives valid error bounds while achieving average accuracy comparable to\u2014though not better than\u2014competitive estimation baselines.", "keywords": "accuracy estimation;error bounds;distribution shift;unsupervised domain adaptation", "primary_area": "", "supplementary_material": "/attachment/8034fe4f1e3d535776a1fea5f05990ffba694c57.pdf", "author": "Elan Rosenfeld;Saurabh Garg", "authorids": "~Elan_Rosenfeld1;~Saurabh_Garg3", "gender": "M;M", "homepage": ";http://saurabhgarg1996.github.io/", "dblp": "236/4508;80/208", "google_scholar": "f0j0K8QAAAAJ;SAnJ1hIAAAAJ", "orcid": ";", "linkedin": ";saurabh-garg-b680b5b8/", "or_profile": "~Elan_Rosenfeld1;~Saurabh_Garg3", "aff": "Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cmu.edu", "position": "PhD student;PhD student", "bibtex": "@inproceedings{\nrosenfeld2023almost,\ntitle={(Almost) Provable Error Bounds Under Distribution Shift via Disagreement Discrepancy},\nauthor={Elan Rosenfeld and Saurabh Garg},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9i8MD9btc8}\n}", "github": "", "project": "", "reviewers": "PQBq;xsfT;uhaU;JSAn;TEVz;rQbE", "pdf_size": 1087967, "rating": "3;4;6;6;7;7", "confidence": "3;3;4;3;3;3", "soundness": "1;3;3;3;3;4", "novelty": "2;2;3;3;4;3", "presentation": "2;3;3;3;4;4", "wc_summary": "65;116;29;51;103;115", "wc_strengths": "64;84;81;40;73;142", "wc_weaknesses": "107;186;332;38;186;102", "wc_questions": "17;32;64;90;2;2", "wc_limitations": "4;33;33;4;7;13", "wc_review": "257;451;539;223;371;374", "wc_reply_reviewers": "1429;0;568;9;155;0", "wc_reply_authors": "2063;127;335;0;0;0", "reply_reviewers": "5;0;1;1;1;0", "reply_authors": "5;2;2;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.1666666666666665, 0.3726779962499649 ], "soundness_avg": [ 2.8333333333333335, 0.8975274678557507 ], "novelty_avg": [ 2.8333333333333335, 0.6871842709362768 ], "presentation_avg": [ 3.1666666666666665, 0.6871842709362768 ], "wc_summary_avg": [ 79.83333333333333, 33.45851494346726 ], "wc_strengths_avg": [ 80.66666666666667, 30.992830712207553 ], "wc_weaknesses_avg": [ 158.5, 93.07658137254505 ], "wc_questions_avg": [ 34.5, 32.59729027593142 ], "wc_limitations_avg": [ 15.666666666666666, 12.618328820498467 ], "wc_review_avg": [ 369.1666666666667, 107.63583149779734 ], "wc_reply_reviewers_avg": [ 360.1666666666667, 518.0815304778025 ], "wc_reply_authors_avg": [ 420.8333333333333, 744.0563188062821 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.699673171197595 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.149071198499986, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Mb3laMgMzPoJ:scholar.google.com/&scioq=(Almost)+Provable+Error+Bounds+Under+Distribution+Shift+via+Disagreement+Discrepancy&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "andrew.cmu.edu;cmu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Rewrite Caption Semantics: Bridging Semantic Gaps for Language-Supervised Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72582", "id": "9iafshF7s3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d937cb3fe2851ed0ab9af5e38f885077-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9iafshF7s3", "openreview": "https://openreview.net/forum?id=9iafshF7s3", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72582", "video": "https://nips.cc/virtual/2023/poster/72582", "author_site": "Yun Xing, Jian Kang, Aoran Xiao, Jiahao Nie, Ling Shao, Shijian Lu", "tldr": "", "abstract": "Vision-Language Pre-training has demonstrated its remarkable zero-shot recognition ability and potential to learn generalizable visual representations from languagesupervision. Taking a step ahead, language-supervised semantic segmentation enables spatial localization of textual inputs by learning pixel grouping solely from image-text pairs. Nevertheless, the state-of-the-art suffers from a clear semantic gap between visual and textual modalities: plenty of visual concepts appeared in images are missing in their paired captions. Such semantic misalignment circulates in pre-training, leading to inferior zero-shot performance in dense predictions due to insufficient visual concepts captured in textual representations. To close such semantic gap, we propose Concept Curation (CoCu), a pipeline that leverages CLIP to compensate for the missing semantics. For each image-text pair, we establish a concept archive that maintains potential visually-matched concepts with our proposed vision-driven expansion and text-to-vision-guided ranking. Relevant concepts can thus be identified via cluster-guided sampling and fed into pre-training, thereby bridging the gap between visual and textual semantics. Extensive experiments over a broad suite of 8 segmentation benchmarks show that CoCu achieves superb zero-shot transfer performance and greatly boosts language-supervised segmentation baseline by a large margin, suggesting the value of closing semantic gap in pre-training data.", "keywords": "language-supervised semantic segmentation;vision-language pre-training", "primary_area": "", "supplementary_material": "/attachment/745bb411caba65c79606ec7e3d5af27a353b24c7.pdf", "author": "Yun Xing;Jian Kang;Aoran Xiao;Jiahao Nie;Ling Shao;Shijian Lu", "authorids": "~Yun_Xing2;~Jian_Kang6;~Aoran_Xiao1;~Jiahao_Nie1;~Ling_Shao1;~Shijian_Lu1", "gender": "M;M;M;;M;M", "homepage": "https://xing0047.github.io;;https://xiaoaoran.github.io;;;https://personal.ntu.edu.sg/shijian.lu/", "dblp": "09/9613-1;;205/7219;;;42/2718", "google_scholar": "uOAYTXoAAAAJ;;yGKsEpAAAAAJ;;z84rLjoAAAAJ;https://scholar.google.com.sg/scholar?hl=en", "orcid": "0000-0001-9839-0120;;0000-0002-2956-0613;;;", "linkedin": "yun-xing-2bbb22239/;alan-kang-6497b5239/;;;;", "or_profile": "~Yun_Xing2;~Jian_Kang6;~Aoran_Xiao1;~Jiahao_Nie1;~Ling_Shao1;~Shijian_Lu1", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;;Terminus Group;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;;terminusgroup.com;ntu.edu.sg", "position": "PhD student;MS student;PhD student;;Chief Scientist;Associate Professor", "bibtex": "@inproceedings{\nxing2023rewrite,\ntitle={Rewrite Caption Semantics: Bridging Semantic Gaps for Language-Supervised Semantic Segmentation},\nauthor={Yun Xing and Jian Kang and Aoran Xiao and Jiahao Nie and Ling Shao and Shijian Lu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9iafshF7s3}\n}", "github": "", "project": "", "reviewers": "e4vq;wvpf;iruH;griQ", "pdf_size": 1018138, "rating": "5;6;6;6", "confidence": "5;3;4;4", "soundness": "2;3;4;2", "novelty": "2;3;3;2", "presentation": "3;3;2;3", "wc_summary": "146;61;81;57", "wc_strengths": "65;36;151;25", "wc_weaknesses": "161;86;119;98", "wc_questions": "46;1;108;2", "wc_limitations": "57;1;1;1", "wc_review": "475;185;460;183", "wc_reply_reviewers": "24;8;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 86.25, 35.67474596966319 ], "wc_strengths_avg": [ 69.25, 49.40837479618207 ], "wc_weaknesses_avg": [ 116.0, 28.53944638566067 ], "wc_questions_avg": [ 39.25, 43.654180784891615 ], "wc_limitations_avg": [ 15.0, 24.24871130596428 ], "wc_review_avg": [ 325.75, 141.8509340822259 ], "wc_reply_reviewers_avg": [ 8.0, 9.797958971132712 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7141814377898775996&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;;terminusgroup.com;ntu.edu.sg", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Nanyang Technological University;Terminus Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;", "aff_unique_abbr": "NTU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore;" }, { "title": "Hypervolume Maximization: A Geometric View of Pareto Set Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72581", "id": "9ieV1hnuva", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7a7f6cc5dc2a84fb4edf0feb8e5cfd50-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9ieV1hnuva", "openreview": "https://openreview.net/forum?id=9ieV1hnuva", "poster": "/media/PosterPDFs/NeurIPS%202023/72581.png?t=1699508450.9766703", "slides": "https://nips.cc/virtual/2023/poster/72581", "video": "https://nips.cc/virtual/2023/poster/72581", "author_site": "Xiaoyuan Zhang, Xi Lin, Bo Xue, Yifan Chen, Qingfu Zhang", "tldr": "", "abstract": "This paper presents a novel approach to multiobjective algorithms aimed at modeling the Pareto set using neural networks. \nWhereas previous methods mainly focused on identifying a finite number of solutions, our approach allows for the direct modeling of the entire Pareto set. \nFurthermore, we establish an equivalence between learning the complete Pareto set and maximizing the associated hypervolume, which enables the convergence analysis of hypervolume (as a new metric) for Pareto set learning. \nSpecifically, our new analysis framework reveals the connection between the learned Pareto solution and its representation in a polar coordinate system. \nWe evaluate our proposed approach on various benchmark problems and real-world problems, and the encouraging results make it a potentially viable alternative to existing multiobjective algorithms. Code is available at \\url{https://github.com/xzhang2523/hvpsl/tree/master}.", "keywords": "multiobjective optimization;multitask learning;hypervolume maximization;Pareto set learning", "primary_area": "", "supplementary_material": "/attachment/04ecf98cbb8d101cb6f3a1b6c24337033142865c.pdf", "author": "Xiaoyuan Zhang;Xi Lin;Bo Xue;Yifan Chen;Qingfu Zhang", "authorids": "~Xiaoyuan_Zhang2;~Xi_Lin2;~Bo_Xue2;~Yifan_Chen3;~Qingfu_Zhang1", "gender": "M;M;;;M", "homepage": ";https://xi-l.github.io/;;;https://www.cs.cityu.edu.hk/~qzhan7/index.html", "dblp": ";43/489-1;;;98/1240.html", "google_scholar": "KQj18L8AAAAJ;QB_MUboAAAAJ;;;https://scholar.google.co.uk/citations?user=nhL9PHwAAAAJ", "orcid": "0000-0002-3852-645X;;;;", "linkedin": ";;;;", "or_profile": "~Xiaoyuan_Zhang2;~Xi_Lin2;~Bo_Xue2;~Yifan_Chen3;~Qingfu_Zhang1", "aff": "City University of Hong Kong;City University of Hong Kong;;;City University of Hong Kong", "aff_domain": "cityu.edu.hk;cityu.edu.hk;;;cityu.edu.hk", "position": "PhD student;Postdoc;;;Full Professor", "bibtex": "@inproceedings{\nzhang2023hypervolume,\ntitle={Hypervolume Maximization: A Geometric View of Pareto Set Learning},\nauthor={Xiaoyuan Zhang and Xi Lin and Bo Xue and Yifan Chen and Qingfu Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9ieV1hnuva}\n}", "github": "", "project": "", "reviewers": "Urc8;ZpxS;jfi6;Kyce;6icQ;FLs5", "pdf_size": 15480988, "rating": "6;6;7;7;7;7", "confidence": "4;3;3;2;3;3", "soundness": "3;3;3;3;3;3", "novelty": "3;3;2;3;4;3", "presentation": "3;4;3;3;3;3", "wc_summary": "32;79;103;50;87;124", "wc_strengths": "29;171;30;37;81;95", "wc_weaknesses": "82;138;20;29;159;118", "wc_questions": "52;80;75;25;75;2", "wc_limitations": "8;153;1;18;8;7", "wc_review": "203;621;229;159;410;346", "wc_reply_reviewers": "30;23;69;12;11;20", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.5773502691896257 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 79.16666666666667, 30.86214869742906 ], "wc_strengths_avg": [ 73.83333333333333, 50.379945304544435 ], "wc_weaknesses_avg": [ 91.0, 52.48491846870553 ], "wc_questions_avg": [ 51.5, 29.067450295247202 ], "wc_limitations_avg": [ 32.5, 54.11946661476503 ], "wc_review_avg": [ 328.0, 156.46298816866138 ], "wc_reply_reviewers_avg": [ 27.5, 19.653244007033546 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6123724356957945, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13739605609475584302&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "cityu.edu.hk;cityu.edu.hk;;;cityu.edu.hk", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "City University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cityu.edu.hk", "aff_unique_abbr": "CityU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "RangePerception: Taming LiDAR Range View for Efficient and Accurate 3D Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72580", "id": "9kFQEJSyCM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fb8e52adcd9b59bad73f109c53afc43a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9kFQEJSyCM", "openreview": "https://openreview.net/forum?id=9kFQEJSyCM", "poster": "/media/PosterPDFs/NeurIPS%202023/72580.png?t=1699442980.5426366", "slides": "https://nips.cc/virtual/2023/poster/72580", "video": "https://nips.cc/virtual/2023/poster/72580", "author_site": "Yeqi BAI, Ben Fei, Youquan Liu, Tao MA, Yuenan Hou, Botian Shi, Yikang LI", "tldr": "", "abstract": "LiDAR-based 3D detection methods currently use bird's-eye view (BEV) or range view (RV) as their primary basis. The former relies on voxelization and 3D convolutions, resulting in inefficient training and inference processes. Conversely, RV-based methods demonstrate higher efficiency due to their compactness and compatibility with 2D convolutions, but their performance still trails behind that of BEV-based methods. To eliminate this performance gap while preserving the efficiency of RV-based methods, this study presents an efficient and accurate RV-based 3D object detection framework termed RangePerception. Through meticulous analysis, this study identifies two critical challenges impeding the performance of existing RV-based methods: 1) there exists a natural domain gap between the 3D world coordinate used in output and 2D range image coordinate used in input, generating difficulty in information extraction from range images; 2) native range images suffer from vision corruption issue, affecting the detection accuracy of the objects located on the margins of the range images. To address the key challenges above, we propose two novel algorithms named Range Aware Kernel (RAK) and Vision Restoration Module (VRM), which facilitate information flow from range image representation and world-coordinate 3D detection results. With the help of RAK and VRM, our RangePerception achieves 3.25/4.18 higher averaged L1/L2 AP compared to previous state-of-the-art RV-based method RangeDet, on Waymo Open Dataset. For the first time as an RV-based 3D detection method, RangePerception achieves slightly superior averaged AP compared with the well-known BEV-based method CenterPoint and the inference speed of RangePerception is 1.3 times as fast as CenterPoint.", "keywords": "3D Detection;Autonomous Driving", "primary_area": "", "supplementary_material": "/attachment/c812b9a5c2413b8f10bc6fd6eb0e4aa624e81dde.pdf", "author": "Yeqi BAI;Ben Fei;Youquan Liu;Tao MA;Yuenan Hou;Botian Shi;Yikang LI", "authorids": "~Yeqi_BAI1;~Ben_Fei2;~Youquan_Liu1;~Tao_MA3;~Yuenan_Hou1;~Botian_Shi1;~Yikang_LI2", "gender": "M;M;M;M;M;M;M", "homepage": ";;https://github.com/youquanl;https://cardwing.github.io/;;https://www.liyikang.top;https://superkoma.github.io/", "dblp": "241/5040;93/866;91/6914;210/3047;245/8742;;60/6411-2", "google_scholar": "_fl950wAAAAJ;skQROj8AAAAJ;;https://scholar.google.com.hk/citations?user=o9mX9sUAAAAJ;K0PpvLkAAAAJ;G9b6hpYAAAAJ;9h86v8kAAAAJ", "orcid": ";0000-0002-3219-9996;0000-0002-1625-4318;0000-0002-2844-7416;0000-0003-3677-7252;;", "linkedin": ";;;yuenan-hou-859589136/;friskit/;;", "or_profile": "~Yeqi_BAI1;~Ben_Fei2;~Youquan_Liu1;~Yuenan_Hou1;~Botian_Shi1;~Yikang_LI2;~TAO_MA1", "aff": "Shanghai AI Laboratory;Fudan University;Hochschule Bremerhaven;Shanghai AI Laboratory;Shanghai AI Lab;Shanghai AI Lab;The Chinese University of Hong Kong", "aff_domain": "pjlab.org.cn;fudan.edu.cn;hs-bremerhaven.de;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;cuhk.edu.hk", "position": "Researcher;PhD student;MS student;Researcher;Researcher;Researcher;PhD student", "bibtex": "@inproceedings{\nbai2023rangeperception,\ntitle={RangePerception: Taming Li{DAR} Range View for Efficient and Accurate 3D Object Detection},\nauthor={Yeqi BAI and Ben Fei and Youquan Liu and Tao MA and Yuenan Hou and Botian Shi and Yikang LI},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9kFQEJSyCM}\n}", "github": "", "project": "", "reviewers": "GJQ1;PXtx;2sVP;ZtTP", "pdf_size": 4210091, "rating": "6;6;6;6", "confidence": "5;4;5;5", "soundness": "3;3;3;3", "novelty": "3;3;3;2", "presentation": "2;3;3;3", "wc_summary": "118;173;83;22", "wc_strengths": "120;84;45;57", "wc_weaknesses": "160;232;53;60", "wc_questions": "69;23;76;37", "wc_limitations": "5;9;13;12", "wc_review": "472;521;270;188", "wc_reply_reviewers": "213;43;20;0", "wc_reply_authors": "605;56;46;0", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.0, 54.82244066073673 ], "wc_strengths_avg": [ 76.5, 28.81405906844782 ], "wc_weaknesses_avg": [ 126.25, 74.29123434160991 ], "wc_questions_avg": [ 51.25, 21.958768180387533 ], "wc_limitations_avg": [ 9.75, 3.112474899497183 ], "wc_review_avg": [ 362.75, 137.94813336903113 ], "wc_reply_reviewers_avg": [ 69.0, 84.51922858142991 ], "wc_reply_authors_avg": [ 176.75, 248.1505339506647 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10616140108085390222&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "pjlab.org.cn;fudan.edu.cn;hs-bremerhaven.de;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;cuhk.edu.hk", "author_num": 7, "aff_unique_index": "0;1;2;0;3;3;4", "aff_unique_norm": "Shanghai AI Laboratory;Fudan University;Hochschule Bremerhaven;Shanghai AI Lab;Chinese University of Hong Kong", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.shanghai-ai-lab.com;https://www.fudan.edu.cn;https://www.hs-bremerhaven.de;https://www.shanghaiailab.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "SAIL;Fudan;;SAIL;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;Germany" }, { "title": "Low-shot Object Learning with Mutual Exclusivity Bias", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73673", "id": "9lOVNw7guQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/de6ff07cbd222c10d694c2b2f732aceb-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=9lOVNw7guQ", "openreview": "https://openreview.net/forum?id=9lOVNw7guQ", "poster": "/media/PosterPDFs/NeurIPS%202023/73673.png?t=1701804771.9053853", "slides": "https://nips.cc/virtual/2023/poster/73673", "video": "https://nips.cc/virtual/2023/poster/73673", "author_site": "Anh Thai, Ahmad Humayun, Stefan Stojanov, Zixuan Huang, Bikram Boote, James Rehg", "tldr": "", "abstract": "This paper introduces Low-shot Object Learning with Mutual Exclusivity Bias (LSME), the first computational framing of mutual exclusivity bias, a phenomenon commonly observed in infants during word learning. We provide a novel dataset, comprehensive baselines, and a SOTA method to enable the ML community to tackle this challenging learning task. The goal of LSME is to analyze an RGB image of a scene containing multiple objects and correctly associate a previously-unknown object instance with a provided category label. This association is then used to perform low-shot learning to test category generalization. We provide a data generation pipeline for the LSME problem and conduct a thorough analysis of the factors that contribute to its difficulty. Additionally, we evaluate the performance of multiple baselines, including state-of-the-art foundation models. Finally, we present a baseline approach that outperforms state-of-the-art models in terms of low-shot accuracy. Code and data are available at https://github.com/rehg-lab/LSME.", "keywords": "low-shot learning;self-supervised learning;object learning;mutual exclusivity bias", "primary_area": "", "supplementary_material": "/attachment/f2aaf4a12a087a24fc449aa9ea60f42e033997c8.pdf", "author": "Ngoc Anh Thai;Ahmad Humayun;Stefan Stojanov;Zixuan Huang;Bikram Boote;James Matthew Rehg", "authorids": "~Ngoc_Anh_Thai1;~Ahmad_Humayun2;~Stefan_Stojanov1;~Zixuan_Huang2;~Bikram_Boote1;~James_Matthew_Rehg1", "gender": "F;M;M;M;M;M", "homepage": "https://anhthai1997.wordpress.com/;http://ahumayun.com/;https://sstojanov.github.io/;http://zixuanh.com/;;http://rehg.org/", "dblp": "250/4394;36/9989;239/5058;218/2712-1;292/2707;r/JMRehg", "google_scholar": "_pchxWQAAAAJ;SdZCMmgAAAAJ;XC_WricAAAAJ;;;https://scholar.google.com.tw/citations?user=8kA3eDwAAAAJ", "orcid": ";;;;;0000-0003-1793-5462", "linkedin": "anh-thai/;;;;bikram-boote-7136b4154/;", "or_profile": "~Ngoc_Anh_Thai1;~Ahmad_Humayun2;~Stefan_Stojanov1;~Zixuan_Huang2;~Bikram_Boote1;~James_Rehg1", "aff": "Meta AI;Google DeepMind;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;University of Illinois, Urbana Champaign", "aff_domain": "meta.com;google.com;gatech.edu;gatech.edu;gatech.edu;illinois.edu", "position": "Intern;Researcher;PhD student;PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nthai2023lowshot,\ntitle={Low-shot Object Learning with Mutual Exclusivity Bias},\nauthor={Ngoc Anh Thai and Ahmad Humayun and Stefan Stojanov and Zixuan Huang and Bikram Boote and James Matthew Rehg},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=9lOVNw7guQ}\n}", "github": "", "project": "", "reviewers": "HVNj;BMRQ;xYTB;FRCv", "pdf_size": 1930503, "rating": "6;6;7;8", "confidence": "4;5;3;5", "wc_summary_and_contributions": "111;197;64;58", "wc_strengths": "165;22;18;136", "wc_improvement": "268;192;18;161", "wc_limitations": "31;22;23;20", "wc_correctness": "18;19;8;1", "wc_clarity": "29;25;6;6", "wc_relation_to_prior_work": "18;2;9;95", "wc_documentation": "1;2;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "642;482;148;479", "wc_reply_reviewers": "89;27;11;25", "wc_reply_authors": "601;1344;210;778", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 107.5, 55.599010782566985 ], "wc_strengths_avg": [ 85.25, 66.06578161196612 ], "wc_improvement_avg": [ 159.75, 90.626637916233 ], "wc_limitations_avg": [ 24.0, 4.183300132670378 ], "wc_correctness_avg": [ 11.5, 7.433034373659253 ], "wc_clarity_avg": [ 16.5, 10.594810050208546 ], "wc_relation_to_prior_work_avg": [ 31.0, 37.383151285037485 ], "wc_documentation_avg": [ 1.25, 0.4330127018922193 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 437.75, 179.81431394635968 ], "wc_reply_reviewers_avg": [ 38.0, 30.083217912982647 ], "wc_reply_authors_avg": [ 733.25, 408.13562390460356 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2168570024767006356&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "meta.com;google.com;gatech.edu;gatech.edu;gatech.edu;illinois.edu", "author_num": 6, "aff_unique_index": "0;1;2;2;2;3", "aff_unique_norm": "Meta;Google;Georgia Institute of Technology;University of Illinois Urbana-Champaign", "aff_unique_dep": "Meta AI;Google DeepMind;;", "aff_unique_url": "https://meta.com;https://deepmind.com;https://www.gatech.edu;https://illinois.edu", "aff_unique_abbr": "Meta;DeepMind;Georgia Tech;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Multi-body SE(3) Equivariance for Unsupervised Rigid Segmentation and Motion Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72579", "id": "9lygTqLdWn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/efca456a4e861f3b47455c44bb134424-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9lygTqLdWn", "openreview": "https://openreview.net/forum?id=9lygTqLdWn", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72579", "video": "https://nips.cc/virtual/2023/poster/72579", "author_site": "Jia-Xing Zhong, Ta-Ying Cheng, Yuhang He, Kai Lu, Kaichen Zhou, Andrew Markham, Niki Trigoni", "tldr": "", "abstract": "A truly generalizable approach to rigid segmentation and motion estimation is fundamental to 3D understanding of articulated objects and moving scenes. In view of the closely intertwined relationship between segmentation and motion estimates, we present an SE(3) equivariant architecture and a training strategy to tackle this task in an unsupervised manner. Our architecture is composed of two interconnected, lightweight heads. These heads predict segmentation masks using point-level invariant features and estimate motion from SE(3) equivariant features, all without the need for category information. Our training strategy is unified and can be implemented online, which jointly optimizes the predicted segmentation and motion by leveraging the interrelationships among scene flow, segmentation mask, and rigid transformations. We conduct experiments on four datasets to demonstrate the superiority of our method. The results show that our method excels in both model performance and computational efficiency, with only 0.25M parameters and 0.92G FLOPs. To the best of our knowledge, this is the first work designed for category-agnostic part-level SE(3) equivariance in dynamic point clouds.", "keywords": "Dynamic Point Cloud Analytics;Multi-body Motion", "primary_area": "", "supplementary_material": "/attachment/a060ebd9a46625f7e1777de9a7f0d52e7fba3747.pdf", "author": "Jia-Xing Zhong;Ta-Ying Cheng;Yuhang He;Kai Lu;Kaichen Zhou;Andrew Markham;Niki Trigoni", "authorids": "~Jia-Xing_Zhong1;~Ta-Ying_Cheng1;~Yuhang_He3;~Kai_Lu5;~Kaichen_Zhou1;~Andrew_Markham2;~Niki_Trigoni1", "gender": "M;M;M;M;M;M;F", "homepage": ";https://ttchengab.github.io;https://yuhanghe01.github.io/;https://www.cs.ox.ac.uk/people/kai.lu/;http://zalex97.github.io/;;https://www.cs.ox.ac.uk/people/niki.trigoni/", "dblp": "208/4752;264/7281.html;;;;83/7169;t/NikiTrigoni", "google_scholar": "dIckm98AAAAJ;onX3k7kAAAAJ;H1p3ve8AAAAJ;;;https://scholar.google.co.uk/citations?user=g3JTO9EAAAAJ;", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Jia-Xing_Zhong1;~Ta-Ying_Cheng1;~Yuhang_He3;~Kai_Lu5;~Kaichen_Zhou1;~Andrew_Markham2;~Niki_Trigoni1", "aff": "Department of Computer Science, University of Oxford;University of Oxford;University of Oxford;University of Oxford;Department of Computer Science, University of Oxford;University of Oxford;University of Oxford", "aff_domain": "cs.ox.ac.uk;cs.ox.ac.uk;ox.ac.uk;ox.ac.uk;cs.ox.ac.uk;ox.ac.uk;ox.ac.uk", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhong2023multibody,\ntitle={Multi-body {SE}(3) Equivariance for Unsupervised Rigid Segmentation and Motion Estimation},\nauthor={Jia-Xing Zhong and Ta-Ying Cheng and Yuhang He and Kai Lu and Kaichen Zhou and Andrew Markham and Niki Trigoni},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9lygTqLdWn}\n}", "github": "", "project": "", "reviewers": "XLXX;86bd;S5HY;ZttT;mTMF;sjc7", "pdf_size": 1496103, "rating": "5;5;6;6;6;6", "confidence": "5;2;4;4;2;3", "soundness": "3;3;3;3;3;2", "novelty": "3;3;3;3;3;3", "presentation": "2;2;3;3;3;2", "wc_summary": "130;40;80;77;70;132", "wc_strengths": "140;72;43;103;54;60", "wc_weaknesses": "320;59;242;78;33;106", "wc_questions": "64;9;2;4;15;447", "wc_limitations": "11;9;17;38;5;2", "wc_review": "665;189;384;300;177;747", "wc_reply_reviewers": "176;0;20;18;23;0", "wc_reply_authors": "1210;0;44;869;71;552", "reply_reviewers": "2;0;1;1;1;0", "reply_authors": "4;1;2;4;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 1.1055415967851334 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 88.16666666666667, 32.946505462980774 ], "wc_strengths_avg": [ 78.66666666666667, 33.23485452887609 ], "wc_weaknesses_avg": [ 139.66666666666666, 104.73246339549588 ], "wc_questions_avg": [ 90.16666666666667, 160.96108915580257 ], "wc_limitations_avg": [ 13.666666666666666, 11.855612829185826 ], "wc_review_avg": [ 410.3333333333333, 221.54733630134717 ], "wc_reply_reviewers_avg": [ 39.5, 61.735322142190206 ], "wc_reply_authors_avg": [ 457.6666666666667, 460.83065098676855 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.6871842709362768 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.10660035817780522, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11453116983314203969&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cs.ox.ac.uk;cs.ox.ac.uk;ox.ac.uk;ox.ac.uk;cs.ox.ac.uk;ox.ac.uk;ox.ac.uk", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Oxford;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Beyond Invariance: Test-Time Label-Shift Adaptation for Addressing \"Spurious\" Correlations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72578", "id": "9mJXDcr17V", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4b0c1645f3d6a1730931e65ecbf91ac3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9mJXDcr17V", "openreview": "https://openreview.net/forum?id=9mJXDcr17V", "poster": "/media/PosterPDFs/NeurIPS%202023/72578.png?t=1701551158.4663851", "slides": "https://nips.cc/virtual/2023/poster/72578", "video": "https://nips.cc/virtual/2023/poster/72578", "author_site": "Qingyao Sun, Kevin Murphy, Sayna Ebrahimi, Alexander D'Amour", "tldr": "", "abstract": "Changes in the data distribution at test time can have deleterious effects on the performance of predictive models $p(y|x)$.\nWe consider situations where there are additional meta-data labels (such as group labels), denoted by $z$, that can account for such changes in the distribution.\nIn particular, we assume that the prior distribution $p(y,z)$, which models the dependence between the class label $y$ and the \"nuisance\" factors $z$, may change across domains, either due to a change in the correlation between these terms, or a change in one of their marginals.\nHowever, we assume that the generative model for features $p(x|y,z)$ is invariant across domains.\nWe note that this corresponds to an expanded version of the widely used \"label shift\" assumption, where the labels now also include the nuisance factors $z$. \nBased on this observation, we propose a test-time label shift correction that adapts to changes in the joint distribution $p(y, z)$ using EM applied to unlabeled samples from the target domain distribution, $p_t(x)$.\nImportantly, we are able to avoid fitting a generative model $p(x|y,z)$, and merely need to reweight the outputs of a discriminative model $p_s(y,z|x)$ trained on the source distribution.\nWe evaluate our method, which we call \"Test-Time Label-Shift Adaptation\" (TTLSA), on several standard image and text datasets, as well as the CheXpert chest X-ray dataset, and show that it improves performance over methods that target invariance to changes in the distribution, as well as baseline empirical risk minimization methods.\nCode for reproducing experiments is available at https://github.com/nalzok/test-time-label-shift.", "keywords": "Distribution shift;Spurious correlation;Group robustness", "primary_area": "", "supplementary_material": "/attachment/d486921368522a6c6296dbb9676435855b11f19a.pdf", "author": "Qingyao Sun;Kevin Patrick Murphy;Sayna Ebrahimi;Alexander D'Amour", "authorids": "~Qingyao_Sun1;~Kevin_Patrick_Murphy1;~Sayna_Ebrahimi1;~Alexander_D'Amour1", "gender": ";;F;M", "homepage": "https://nalzok.github.io/;https://www.cs.ubc.ca/~murphyk/;https://saynaebrahimi.github.io/;http://www.alexdamour.com", "dblp": "271/4259;26/2599;207/7584;209/4892", "google_scholar": ";MxxZkEcAAAAJ;wRyjJfMAAAAJ;okP0uukAAAAJ", "orcid": ";;;", "linkedin": ";;saynaebrahimi/;", "or_profile": "~Qingyao_Sun1;~Kevin_Patrick_Murphy1;~Sayna_Ebrahimi1;~Alexander_D'Amour1", "aff": "University of Chicago;Google;Google;Google", "aff_domain": "uchicago.edu;google.com;google.com;google.com", "position": "MS student;Principal Researcher;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nsun2023beyond,\ntitle={Beyond Invariance: Test-Time Label-Shift Adaptation for Addressing ''Spurious'' Correlations},\nauthor={Qingyao Sun and Kevin Patrick Murphy and Sayna Ebrahimi and Alexander D'Amour},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9mJXDcr17V}\n}", "github": "", "project": "", "reviewers": "C4bL;1Li8;wAGR;KyoC", "pdf_size": 627662, "rating": "6;6;6;6", "confidence": "3;4;4;3", "soundness": "3;4;2;3", "novelty": "3;2;2;3", "presentation": "3;4;2;4", "wc_summary": "78;211;104;99", "wc_strengths": "33;195;73;75", "wc_weaknesses": "138;85;55;96", "wc_questions": "16;152;181;29", "wc_limitations": "4;8;4;14", "wc_review": "269;651;417;313", "wc_reply_reviewers": "37;13;26;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 123.0, 51.73490117899134 ], "wc_strengths_avg": [ 94.0, 60.67124524847005 ], "wc_weaknesses_avg": [ 93.5, 29.75315109362368 ], "wc_questions_avg": [ 94.5, 72.87146217827662 ], "wc_limitations_avg": [ 7.5, 4.092676385936225 ], "wc_review_avg": [ 412.5, 147.81322674239948 ], "wc_reply_reviewers_avg": [ 22.0, 10.27131929208707 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8496913048540093715&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "uchicago.edu;google.com;google.com;google.com", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Chicago;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.uchicago.edu;https://www.google.com", "aff_unique_abbr": "UChicago;Google", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Geometric Transformer with Interatomic Positional Encoding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72577", "id": "9o6KQrklrE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aee2f03ecb2b2c1ea55a43946b651cfd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9o6KQrklrE", "openreview": "https://openreview.net/forum?id=9o6KQrklrE", "poster": "/media/PosterPDFs/NeurIPS%202023/72577.png?t=1701178105.2405257", "slides": "https://nips.cc/virtual/2023/poster/72577", "video": "https://nips.cc/virtual/2023/poster/72577", "author_site": "Yusong Wang, Shaoning Li, Tong Wang, Bin Shao, Nanning Zheng, Tie-Yan Liu", "tldr": "", "abstract": "The widespread adoption of Transformer architectures in various data modalities has opened new avenues for the applications in molecular modeling. \nNevertheless, it remains elusive that whether the Transformer-based architecture can do molecular modeling as good as equivariant GNNs. \nIn this paper, by designing Interatomic Positional Encoding (IPE) that\nparameterizes atomic environments as Transformer's positional encodings,\nwe propose Geoformer, a novel geometric Transformer \nto effectively model molecular structures for various molecular property prediction. \nWe evaluate Geoformer on several benchmarks, including the QM9 dataset and the recently proposed Molecule3D dataset. \nCompared with both Transformers and equivariant GNN models, Geoformer outperforms the state-of-the-art (SoTA) algorithms on QM9, and achieves the best performance on Molecule3D for both random and scaffold splits.\nBy introducing IPE, \nGeoformer paves the way for molecular geometric modeling based on Transformer architecture.\nCodes are available at https://github.com/microsoft/AI2BMD/tree/Geoformer.", "keywords": "Geometric Deep Learning;Molecular Modeling;Positional Encoding", "primary_area": "", "supplementary_material": "/attachment/477db1ff10f3636a4351cd2b8bf5c010ae6ad9a3.pdf", "author": "Yusong Wang;Shaoning Li;Tong Wang;Bin Shao;Nanning Zheng;Tie-Yan Liu", "authorids": "~Yusong_Wang1;~Shaoning_Li2;~Tong_Wang2;~Bin_Shao1;~Nanning_Zheng1;~Tie-Yan_Liu1", "gender": "M;;M;;M;M", "homepage": "https://yusowa0716.github.io;;;https://www.binshao.info/;;http://member.acm.org/~tieyanliu", "dblp": ";;51/6856-14;;07/256-1;l/TieYanLiu", "google_scholar": "sCBrMcIAAAAJ;;ETiIfc4AAAAJ;h9L4CgIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;Nh832fgAAAAJ", "orcid": ";;0000-0002-9483-0050;;;0000-0002-0476-8020", "linkedin": ";;tong-wang-2a5965b9/;;;", "or_profile": "~Yusong_Wang1;~Shaoning_Li2;~Tong_Wang2;~Bin_Shao1;~Nanning_Zheng1;~Tie-Yan_Liu1", "aff": "Xi'an Jiaotong University;;Microsoft;Microsoft;Xi'an Jiaotong University;Microsoft", "aff_domain": "stu.xjtu.edu.cn;;microsoft.com;microsoft.com;xjtu.edu.cn;microsoft.com", "position": "PhD student;;Researcher;Principal Research Manager;Full Professor;Distinguished Scientist", "bibtex": "@inproceedings{\nwang2023geometric,\ntitle={Geometric Transformer with Interatomic Positional Encoding},\nauthor={Yusong Wang and Shaoning Li and Tong Wang and Bin Shao and Nanning Zheng and Tie-Yan Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9o6KQrklrE}\n}", "github": "", "project": "", "reviewers": "mN7Q;efGJ;M4bW;z5Cf;nHVJ", "pdf_size": 1263118, "rating": "5;5;5;6;8", "confidence": "4;3;3;2;5", "soundness": "3;3;3;3;4", "novelty": "2;2;2;3;4", "presentation": "3;3;2;3;3", "wc_summary": "41;123;70;76;45", "wc_strengths": "28;86;79;79;127", "wc_weaknesses": "290;177;247;157;50", "wc_questions": "206;181;78;15;282", "wc_limitations": "5;1;8;15;15", "wc_review": "570;568;482;342;519", "wc_reply_reviewers": "260;67;75;49;132", "wc_reply_authors": "838;144;56;99;23", "reply_reviewers": "2;1;1;1;1", "reply_authors": "3;2;2;2;2", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 71.0, 29.34620929523948 ], "wc_strengths_avg": [ 79.8, 31.466807909287525 ], "wc_weaknesses_avg": [ 184.2, 82.38786318384523 ], "wc_questions_avg": [ 152.4, 94.76412823426384 ], "wc_limitations_avg": [ 8.8, 5.528109984434102 ], "wc_review_avg": [ 496.2, 83.7863950770052 ], "wc_reply_reviewers_avg": [ 116.6, 76.8910918117307 ], "wc_reply_authors_avg": [ 232.0, 305.72078764781435 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5717718748968655, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3657664570343202597&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "stu.xjtu.edu.cn;;microsoft.com;microsoft.com;xjtu.edu.cn;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Xi'an Jiao Tong University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "XJTU;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "China;United States" }, { "title": "NeRF-IBVS: Visual Servo Based on NeRF for Visual Localization and Navigation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72576", "id": "9pLaDXX8m3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1a57081f257da7b440b8eda72a0b12d4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9pLaDXX8m3", "openreview": "https://openreview.net/forum?id=9pLaDXX8m3", "poster": "/media/PosterPDFs/NeurIPS%202023/72576.png?t=1698204083.8016925", "slides": "https://nips.cc/virtual/2023/poster/72576", "video": "https://nips.cc/virtual/2023/poster/72576", "author_site": "Yuanze Wang, Yichao Yan, Dianxi Shi, Wenhan Zhu, Jianqiang Xia, Tan Jeff, Songchang Jin, KE GAO, XIAOBO LI, XIAOBO LI, Xiaokang Yang", "tldr": "", "abstract": "Visual localization is a fundamental task in computer vision and robotics. Training existing visual localization methods requires a large number of posed images to generalize to novel views, while state-of-the-art methods generally require dense ground truth 3D labels for supervision. However, acquiring a large number of posed images and dense 3D labels in the real world is challenging and costly. In this paper, we present a novel visual localization method that achieves accurate localization while using only a few posed images compared to other localization methods. To achieve this, we first use a few posed images with coarse pseudo-3D labels provided by NeRF to train a coordinate regression network. Then a coarse pose is estimated from the regression network with PNP. Finally, we use the image-based visual servo (IBVS) with the scene prior provided by NeRF for pose optimization. Furthermore, our method can provide effective navigation prior, which enable navigation based on IBVS without using custom markers and depth sensor. Extensive experiments on 7-Scenes and 12-Scenes datasets demonstrate that our method outperforms state-of-the-art methods under the same setting, with only 5\\% to 25\\% training data. Furthermore, our framework can be naturally extended to the visual navigation task based on IBVS, and its effectiveness is verified in simulation experiments.", "keywords": "NeRF;Image-Based Visual Servoing (IBVS);visual localization;visual navigation", "primary_area": "", "supplementary_material": "/attachment/f5dc14cdeac6a3f9dbe17d880a6224728112811d.zip", "author": "Yuanze Wang;Yichao Yan;Dianxi Shi;Wenhan Zhu;Jianqiang Xia;Tan Jeff;Songchang Jin;KE GAO;XIAOBO LI;Xiaokang Yang", "authorids": "~Yuanze_Wang1;~Yichao_Yan1;~Dianxi_Shi1;~Wenhan_Zhu1;~Jianqiang_Xia2;~Tan_Jeff1;~Songchang_Jin1;~KE_GAO2;~XIAOBO_LI4;~Xiaokang_Yang1", "gender": "M;M;M;M;M;M;F;M;M;M", "homepage": "https://github.com/wyz18018967952;https://daodaofr.github.io/;https://dl.acm.org/profile/81409595302;;https://user.qzone.qq.com/285288595/infocenter;;;https://icne.sjtu.edu.cn/info/1064/1078.htm;https://github.com/xiajianqiang;", "dblp": "94/10304;185/7881;04/6023;;;143/6354;81/2423.html;06/3071-1.html;;l/XiaoboLi-1", "google_scholar": ";ZPHMMRkAAAAJ;;;;;;yDEavdMAAAAJ;;", "orcid": ";0000-0003-3209-8965;0000-0002-8112-371X;0000-0001-8781-1110;;;;0000-0003-4029-3322;;0000-0002-8074-0230", "linkedin": ";;;;;songchang-jin-29669473/;;;;\u6653\u6ce2-\u674e-7b64271a3", "or_profile": "~Yuanze_Wang1;~Yichao_Yan1;~Dianxi_Shi1;~Wenhan_Zhu1;~Tan_Jeff1;~Songchang_Jin1;~KE_GAO2;~Xiaokang_Yang1;~jianqiang_Xia1;~LI_XIAOBO1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;National University of Defense Technology;Shanghai Jiaotong University;;intelligent game and decision lab;Alibaba Group;Shanghai Jiaotong University;National Innovation Institute of Defense Technology;Ant Group", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;nudt.edu.cn;sjtu.edu.cn;;mil.cn;alibaba-inc.com;sjtu.edu.cn;foxmail.com;antgroup.com", "position": "PhD student;Assistant Professor;Full Professor;Postdoc;;Associate Professor;Researcher;Full Professor;MS student;Researcher", "bibtex": "@inproceedings{\nwang2023nerfibvs,\ntitle={Ne{RF}-{IBVS}: Visual Servo Based on Ne{RF} for Visual Localization and Navigation},\nauthor={Yuanze Wang and Yichao Yan and Dianxi Shi and Wenhan Zhu and Jianqiang Xia and Tan Jeff and Songchang Jin and KE GAO and XIAOBO LI and Xiaokang Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9pLaDXX8m3}\n}", "github": "", "project": "", "reviewers": "91GX;F153;oADp;k2yC;iS7u", "pdf_size": 6381503, "rating": "4;5;5;6;6", "confidence": "3;4;4;4;2", "soundness": "3;2;3;4;3", "novelty": "2;3;2;3;2", "presentation": "2;2;2;3;2", "wc_summary": "82;234;223;70;65", "wc_strengths": "77;51;77;79;68", "wc_weaknesses": "154;110;256;346;284", "wc_questions": "3;63;136;2;36", "wc_limitations": "40;942;88;21;104", "wc_review": "356;1400;780;518;557", "wc_reply_reviewers": "4;241;129;136;84", "wc_reply_authors": "20;365;36;30;129", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 134.8, 76.7838524691227 ], "wc_strengths_avg": [ 70.4, 10.423051376636307 ], "wc_weaknesses_avg": [ 230.0, 86.28325445878824 ], "wc_questions_avg": [ 48.0, 49.50555524383097 ], "wc_limitations_avg": [ 239.0, 352.8058956423489 ], "wc_review_avg": [ 722.2, 364.9232248021493 ], "wc_reply_reviewers_avg": [ 118.8, 77.10097275650936 ], "wc_reply_authors_avg": [ 116.0, 130.523561091475 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.13363062095621217, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9409600713322925921&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "sjtu.edu.cn;sjtu.edu.cn;nudt.edu.cn;sjtu.edu.cn;;mil.cn;alibaba-inc.com;sjtu.edu.cn;foxmail.com;antgroup.com", "author_num": 10, "aff_unique_index": "0;0;1;0;2;3;0;4;5", "aff_unique_norm": "Shanghai Jiao Tong University;National University of Defense Technology;Intelligent Game and Decision Lab;Alibaba Group;National Innovation Institute of Defense Technology;Ant Group", "aff_unique_dep": ";;Game and Decision Lab;;;", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.nudt.edu.cn/;;https://www.alibaba.com;;https://www.antgroup.com", "aff_unique_abbr": "SJTU;NUDT;;Alibaba;;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Generalized Logit Adjustment: Calibrating Fine-tuned Models by Removing Label Bias in Foundation Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72575", "id": "9qG6cMGUWk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cbe1fd3136e0f049bb8bc104231ccb99-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9qG6cMGUWk", "openreview": "https://openreview.net/forum?id=9qG6cMGUWk", "poster": "/media/PosterPDFs/NeurIPS%202023/72575.png?t=1697891623.299102", "slides": "https://nips.cc/virtual/2023/poster/72575", "video": "https://nips.cc/virtual/2023/poster/72575", "author_site": "Beier Zhu, Kaihua Tang, QIANRU SUN, Hanwang Zhang", "tldr": "", "abstract": "Foundation models like CLIP allow zero-shot transfer on various tasks without additional training data. Yet, the zero-shot performance is less competitive than a fully supervised one. Thus, to enhance the performance, fine-tuning and ensembling are also commonly adopted to better fit the downstream tasks. However, we argue that such prior work has overlooked the inherent biases in foundation models. Due to the highly imbalanced Web-scale training set, these foundation models are inevitably skewed toward frequent semantics, and thus the subsequent fine-tuning or ensembling is still biased. In this study, we systematically examine the biases in foundation models and demonstrate the efficacy of our proposed Generalized Logit Adjustment (GLA) method. Note that bias estimation in foundation models is challenging, as most pre-train data cannot be explicitly assessed like in traditional long-tailed classification tasks.\nTo this end, GLA has an optimization-based bias estimation approach for debiasing foundation models. As our work resolves a fundamental flaw in the pre-training, the proposed GLA demonstrates significant improvements across a diverse range of tasks: it achieves 1.5 pp accuracy gains on ImageNet, an large average improvement (1.4-4.6 pp) on 11 few-shot datasets, 2.4 pp gains on long-tailed classification. Codes are in https://github.com/BeierZhu/GLA.", "keywords": "Foundation Model;Class Bias;Generalized Logit Adjustment", "primary_area": "", "supplementary_material": "/attachment/56c06167a4be037b793bc5484606778e0d8aa3ff.zip", "author": "Beier Zhu;Kaihua Tang;Qianru Sun;Hanwang Zhang", "authorids": "~Beier_Zhu1;~Kaihua_Tang1;~Qianru_Sun2;~Hanwang_Zhang3", "gender": "M;M;F;M", "homepage": "https://beierzhu.github.io;https://kaihuatang.github.io/;https://qianrusun.com/;https://mreallab.github.io/index.html", "dblp": "243/7531;196/7269;127/6132.html;79/8116.html", "google_scholar": "jHczmjwAAAAJ;https://scholar.google.com.sg/citations?user=WuO1sSkAAAAJ;https://scholar.google.de/citations?user=fNfrGMIAAAAJ;YG0DFyYAAAAJ", "orcid": "0000-0002-7900-6979;;0000-0003-2689-317X;", "linkedin": ";kaihua-tang-1b2522125/;;", "or_profile": "~Beier_Zhu1;~Kaihua_Tang1;~Qianru_Sun2;~Hanwang_Zhang3", "aff": "Nanyang Technological University;Nanyang Technological University;Singapore Management University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;smu.edu.sg;ntu.edu.sg", "position": "PhD student;Postdoc;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nzhu2023generalized,\ntitle={Generalized Logit Adjustment: Calibrating Fine-tuned Models by Removing Label Bias in Foundation Models},\nauthor={Beier Zhu and Kaihua Tang and Qianru Sun and Hanwang Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9qG6cMGUWk}\n}", "github": "", "project": "", "reviewers": "egNQ;8Trg;D4ZP;iLax;wcSK", "pdf_size": 1021950, "rating": "4;4;7;7;7", "confidence": "5;4;4;2;3", "soundness": "3;3;4;2;3", "novelty": "3;2;3;1;3", "presentation": "3;3;4;2;3", "wc_summary": "45;102;101;60;46", "wc_strengths": "46;91;136;15;84", "wc_weaknesses": "97;193;85;290;208", "wc_questions": "89;46;8;3;113", "wc_limitations": "41;8;39;8;19", "wc_review": "318;440;369;376;470", "wc_reply_reviewers": "112;0;21;29;657", "wc_reply_authors": "195;0;0;23;928", "reply_reviewers": "1;0;1;1;3", "reply_authors": "2;1;1;2;4", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 70.8, 25.623426780975255 ], "wc_strengths_avg": [ 74.4, 41.22426469932484 ], "wc_weaknesses_avg": [ 174.6, 75.92259215806584 ], "wc_questions_avg": [ 51.8, 43.50356307246568 ], "wc_limitations_avg": [ 23.0, 14.463747785411636 ], "wc_review_avg": [ 394.6, 54.0651458890106 ], "wc_reply_reviewers_avg": [ 163.8, 249.5286757068213 ], "wc_reply_authors_avg": [ 229.2, 356.9523217461962 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7205766921228921, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14707651670581216600&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ntu.edu.sg;ntu.edu.sg;smu.edu.sg;ntu.edu.sg", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Nanyang Technological University;Singapore Management University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.smu.edu.sg", "aff_unique_abbr": "NTU;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Efficient Adversarial Attacks on Online Multi-agent Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72574", "id": "9qlJGjO7bA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4cddc8fc57039f8fe44e23aba1e4df40-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9qlJGjO7bA", "openreview": "https://openreview.net/forum?id=9qlJGjO7bA", "poster": "/media/PosterPDFs/NeurIPS%202023/72574.png?t=1701064581.8550112", "slides": "https://nips.cc/virtual/2023/poster/72574", "video": "https://nips.cc/virtual/2023/poster/72574", "author_site": "Guanlin Liu, Lifeng LAI", "tldr": "", "abstract": "Due to the broad range of applications of multi-agent reinforcement learning (MARL), understanding the effects of adversarial attacks against MARL model is essential for the safe applications of this model. Motivated by this, we investigate the impact of adversarial attacks on MARL. In the considered setup, there is an exogenous attacker who is able to modify the rewards before the agents receive them or manipulate the actions before the environment receives them. The attacker aims to guide each agent into a target policy or maximize the cumulative rewards under some specific reward function chosen by the attacker, while minimizing the amount of the manipulation on feedback and action. We first show the limitations of the action poisoning only attacks and the reward poisoning only attacks. We then introduce a mixed attack strategy with both the action poisoning and reward poisoning. We show that the mixed attack strategy can efficiently attack MARL agents even if the attacker has no prior information about the underlying environment and the agents\u2019 algorithms.", "keywords": "adversarial attacks; multi agent reinforcement learning;", "primary_area": "", "supplementary_material": "/attachment/123ded4122cd31aa943ff037da1faac351ae4509.zip", "author": "Guanlin Liu;Lifeng Lai", "authorids": "~Guanlin_Liu1;~Lifeng_Lai1", "gender": "M;", "homepage": ";", "dblp": "224/9954;12/4889", "google_scholar": "a7eYJk4AAAAJ;gOhaCfUAAAAJ", "orcid": "0000-0002-0595-9398;", "linkedin": ";", "or_profile": "~Guanlin_Liu1;~Lifeng_Lai1", "aff": "University of California, Davis;University of California, Davis", "aff_domain": "ucdavis.edu;ucdavis.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nliu2023efficient,\ntitle={Efficient Adversarial Attacks on Online Multi-agent Reinforcement Learning},\nauthor={Guanlin Liu and Lifeng Lai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9qlJGjO7bA}\n}", "github": "", "project": "", "reviewers": "qHBd;Mdqm;YKVp;SPgM", "pdf_size": 760590, "rating": "5;6;6;6", "confidence": "2;3;2;4", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;4", "wc_summary": "64;26;235;111", "wc_strengths": "47;71;222;260", "wc_weaknesses": "166;101;132;195", "wc_questions": "2;46;20;178", "wc_limitations": "8;101;1;81", "wc_review": "287;345;610;825", "wc_reply_reviewers": "56;20;55;28", "wc_reply_authors": "0;23;27;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 109.0, 78.73055315441395 ], "wc_strengths_avg": [ 150.0, 92.37694517573094 ], "wc_weaknesses_avg": [ 148.5, 35.344730866141845 ], "wc_questions_avg": [ 61.5, 69.05613658466567 ], "wc_limitations_avg": [ 47.75, 43.89404857153188 ], "wc_review_avg": [ 516.75, 215.63670258098458 ], "wc_reply_reviewers_avg": [ 39.75, 16.005858302509115 ], "wc_reply_authors_avg": [ 12.5, 12.579745625409124 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1999192441486448316&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ucdavis.edu;ucdavis.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Davis", "aff_unique_dep": "", "aff_unique_url": "https://www.ucdavis.edu", "aff_unique_abbr": "UC Davis", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Davis", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Gradient-Free Kernel Stein Discrepancy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72573", "id": "9rmwPAjk9O", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4b4d25dc0c52d3cf43d5b203cdfdf241-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9rmwPAjk9O", "openreview": "https://openreview.net/forum?id=9rmwPAjk9O", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72573", "video": "https://nips.cc/virtual/2023/poster/72573", "author_site": "Matthew Fisher, Chris Oates", "tldr": "", "abstract": "Stein discrepancies have emerged as a powerful statistical tool, being applied to fundamental statistical problems including parameter inference, goodness-of-fit testing, and sampling. The canonical Stein discrepancies require the derivatives of a statistical model to be computed, and in return provide theoretical guarantees of convergence detection and control. However, for complex statistical models, the stable numerical computation of derivatives can require bespoke algorithmic development and render Stein discrepancies impractical. This paper focuses on posterior approximation using Stein discrepancies, and introduces a collection of non-canonical Stein discrepancies that are gradient-free, meaning that derivatives of the statistical model are not required. Sufficient conditions for convergence detection and control are established, and applications to sampling and variational inference are presented.", "keywords": "Bayesian;discrepancy;kernel;sampling;Stein's method", "primary_area": "", "supplementary_material": "/attachment/ed6163b30f57074a5d9c9ec52fefa47b2c4e2feb.pdf", "author": "Matthew A Fisher;Chris J. Oates", "authorids": "~Matthew_A_Fisher1;~Chris_J._Oates1", "gender": ";", "homepage": "https://www.ncl.ac.uk/bigdata/people/people/fishermatthew.html;https://oates.work", "dblp": "57/5879;118/6076", "google_scholar": ";W_Ul5jMAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Matthew_A_Fisher1;~Chris_J._Oates1", "aff": "Newcastle University, UK;Newcastle University", "aff_domain": "newcastle.ac.uk;ncl.ac.uk", "position": "Lecturer;Full Professor", "bibtex": "@inproceedings{\nfisher2023gradientfree,\ntitle={Gradient-Free Kernel Stein Discrepancy},\nauthor={Matthew A Fisher and Chris J. Oates},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9rmwPAjk9O}\n}", "github": "", "project": "", "reviewers": "WN2C;9g9f;SmKF;DoGh", "pdf_size": 3196099, "rating": "4;4;6;8", "confidence": "3;4;3;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;2;4;4", "wc_summary": "13;66;97;99", "wc_strengths": "13;82;105;152", "wc_weaknesses": "30;365;139;159", "wc_questions": "33;103;64;64", "wc_limitations": "30;10;42;13", "wc_review": "119;626;447;487", "wc_reply_reviewers": "0;0;15;143", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 68.75, 34.744603897583865 ], "wc_strengths_avg": [ 88.0, 50.114868053303304 ], "wc_weaknesses_avg": [ 173.25, 121.10403585347599 ], "wc_questions_avg": [ 66.0, 24.829418035870273 ], "wc_limitations_avg": [ 23.75, 13.007209539328564 ], "wc_review_avg": [ 419.75, 185.91311814931188 ], "wc_reply_reviewers_avg": [ 39.5, 60.068710657046736 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=523990860065510905&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "newcastle.ac.uk;ncl.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Newcastle University", "aff_unique_dep": "", "aff_unique_url": "https://www.ncl.ac.uk", "aff_unique_abbr": "NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Dynamic Regret of Adversarial Linear Mixture MDPs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72572", "id": "9tUjsRLjf2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/becd02b89259774da2ede23116a80648-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9tUjsRLjf2", "openreview": "https://openreview.net/forum?id=9tUjsRLjf2", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72572", "video": "https://nips.cc/virtual/2023/poster/72572", "author_site": "Long-Fei Li, Peng Zhao, Zhi-Hua Zhou", "tldr": "", "abstract": "We study reinforcement learning in episodic inhomogeneous MDPs with adversarial full-information rewards and the unknown transition kernel. We consider the linear mixture MDPs whose transition kernel is a linear mixture model and choose the \\emph{dynamic regret} as the performance measure. Denote by $d$ the dimension of the feature mapping, $H$ the horizon, $K$ the number of episodes, $P_T$ the non-stationary measure, we propose a novel algorithm that enjoys an $\\widetilde{\\mathcal{O}}\\big(\\sqrt{d^2 H^3K} + \\sqrt{H^4(K+P_T)(1+P_T)}\\big)$ dynamic regret under the condition that $P_T$ is known, which improves previously best-known dynamic regret for adversarial linear mixture MDP and adversarial tabular MDPs. We also establish an $\\Omega\\big(\\sqrt{d^2 H^3 K} + \\sqrt{H K (H+P_T)}\\big)$ lower bound, indicating our algorithm is \\emph{optimal} in $K$ and $P_T$. Furthermore, when the non-stationary measure $P_T$ is unknown, we design an online ensemble algorithm with a meta-base structure, which is proved to achieve an $\\widetilde{\\mathcal{O}}\\big(\\sqrt{d^2 H^3K} + \\sqrt{H^4(K+P_T)(1+P_T) + H^2 S_T^2}\\big)$ dynamic regret and here $S_T$ is the expected switching number of the best base-learner. The result can be optimal under certain regimes.", "keywords": "dynamic regret;adversarial MDPs;linear mixture MDPs;policy optimization", "primary_area": "", "supplementary_material": "", "author": "Long-Fei Li;Peng Zhao;Zhi-Hua Zhou", "authorids": "~Long-Fei_Li1;~Peng_Zhao1;~Zhi-Hua_Zhou2", "gender": ";;", "homepage": ";;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Long-Fei_Li1;~Peng_Zhao1;~Zhi-Hua_Zhou2", "aff": ";;", "aff_domain": ";;", "position": ";;", "bibtex": "@inproceedings{\nli2023dynamic,\ntitle={Dynamic Regret of Adversarial Linear Mixture {MDP}s},\nauthor={Long-Fei Li and Peng Zhao and Zhi-Hua Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9tUjsRLjf2}\n}", "github": "", "project": "", "reviewers": "Xpcd;BpVE;Gpzi;yhLY;cWVL", "pdf_size": 502892, "rating": "3;6;6;8;8", "confidence": "4;3;2;4;2", "soundness": "3;2;3;4;3", "novelty": "1;3;3;3;3", "presentation": "3;3;3;4;4", "wc_summary": "70;72;71;108;112", "wc_strengths": "78;52;38;67;42", "wc_weaknesses": "163;261;87;71;64", "wc_questions": "171;4;22;31;54", "wc_limitations": "1;8;11;7;1", "wc_review": "483;397;229;284;273", "wc_reply_reviewers": "110;82;90;32;0", "wc_reply_authors": "428;264;113;0;0", "reply_reviewers": "2;1;1;1;0", "reply_authors": "3;2;2;1;1", "rating_avg": [ 6.2, 1.8330302779823362 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 86.6, 19.158288023724875 ], "wc_strengths_avg": [ 55.4, 15.094369811290566 ], "wc_weaknesses_avg": [ 129.2, 74.7432940135769 ], "wc_questions_avg": [ 56.4, 59.52008064510665 ], "wc_limitations_avg": [ 5.6, 3.9799497484264794 ], "wc_review_avg": [ 333.2, 93.18025541926788 ], "wc_reply_reviewers_avg": [ 62.8, 40.56796765922592 ], "wc_reply_authors_avg": [ 161.0, 164.9509017859557 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3659625273556999, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4313610516120969060&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";;", "author_num": 3 }, { "title": "Unbiased Compression Saves Communication in Distributed Optimization: When and How Much?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72571", "id": "9v6gpFTfCM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9602d22a8c791f23f8e4d1398e3fb5be-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9v6gpFTfCM", "openreview": "https://openreview.net/forum?id=9v6gpFTfCM", "poster": "/media/PosterPDFs/NeurIPS%202023/72571.png?t=1701500720.8780425", "slides": "https://nips.cc/virtual/2023/poster/72571", "video": "https://nips.cc/virtual/2023/poster/72571", "author_site": "Yutong He, Xinmeng Huang, Kun Yuan", "tldr": "", "abstract": "Communication compression is a common technique in distributed optimization\nthat can alleviate communication overhead by transmitting compressed gradients\nand model parameters. However, compression can introduce information distortion,\nwhich slows down convergence and incurs more communication rounds to achieve\ndesired solutions. Given the trade-off between lower per-round communication\ncosts and additional rounds of communication, it is unclear whether communication\ncompression reduces the total communication cost.\n\nThis paper explores the conditions under which unbiased compression, a widely\nused form of compression, can reduce the total communication cost, as well as the\nextent to which it can do so. To this end, we present the first theoretical formulation\nfor characterizing the total communication cost in distributed optimization with\nunbiased compressors. We demonstrate that unbiased compression alone does not\nnecessarily save the total communication cost, but this outcome can be achieved\nif the compressors used by all workers are further assumed independent. We\nestablish lower bounds on the communication rounds required by algorithms using\nindependent unbiased compressors to minimize smooth convex functions and\nshow that these lower bounds are tight by refining the analysis for ADIANA.\nOur results reveal that using independent unbiased compression can reduce the\ntotal communication cost by a factor of up to $\\Theta(\\sqrt{\\min\\\\{n,\\kappa\\\\}})$ when all local\nsmoothness constants are constrained by a common upper bound, where $n$ is the\nnumber of workers and $\\kappa$ is the condition number of the functions being minimized.\nThese theoretical findings are supported by experimental results.", "keywords": "Communication Compression;Distributed Optimization;Unbiased Compression;Optimal Complexity", "primary_area": "", "supplementary_material": "/attachment/244518418b070e90903a994f904dc671df8fd3f7.pdf", "author": "Yutong He;Xinmeng Huang;Kun Yuan", "authorids": "~Yutong_He2;~Xinmeng_Huang1;~Kun_Yuan4", "gender": "M;M;", "homepage": "https://www.researchgate.net/profile/Yutong-He-16;;", "dblp": ";256/1617;", "google_scholar": "XsXjeysAAAAJ;vM2nHxEAAAAJ;", "orcid": "0009-0002-5078-6454;;", "linkedin": ";xinmeng-huang-8032221b3/;", "or_profile": "~Yutong_He2;~Xinmeng_Huang1;~Kun_Yuan4", "aff": "Alibaba Group;University of Pennsylvania;", "aff_domain": "alibaba-inc.com;upenn.edu;", "position": "Intern;PhD student;", "bibtex": "@inproceedings{\nhe2023unbiased,\ntitle={Unbiased Compression Saves Communication in Distributed Optimization: When and How Much?},\nauthor={Yutong He and Xinmeng Huang and Kun Yuan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9v6gpFTfCM}\n}", "github": "", "project": "", "reviewers": "JbF9;gfiB;yR98;MRty;u1Ro;tEWM;oD7w", "pdf_size": 5458933, "rating": "3;5;7;7;7;7;7", "confidence": "2;4;4;4;3;1;2", "soundness": "2;2;3;3;3;4;3", "novelty": "2;2;3;3;3;3;3", "presentation": "1;2;3;3;3;3;3", "wc_summary": "35;37;59;66;143;96;92", "wc_strengths": "41;25;86;25;141;13;116", "wc_weaknesses": "216;81;180;178;139;11;41", "wc_questions": "33;107;1;158;119;15;1", "wc_limitations": "2;8;1;1;55;1;1", "wc_review": "327;258;327;428;597;136;251", "wc_reply_reviewers": "0;290;227;11;69;0;0", "wc_reply_authors": "116;977;149;79;36;0;0", "reply_reviewers": "0;2;2;1;1;0;0", "reply_authors": "3;4;2;3;2;1;1", "rating_avg": [ 6.142857142857143, 1.4568627181693672 ], "confidence_avg": [ 2.857142857142857, 1.124858267715973 ], "soundness_avg": [ 2.857142857142857, 0.6388765649999399 ], "novelty_avg": [ 2.7142857142857144, 0.4517539514526256 ], "presentation_avg": [ 2.5714285714285716, 0.7284313590846836 ], "wc_summary_avg": [ 75.42857142857143, 35.32242162405849 ], "wc_strengths_avg": [ 63.857142857142854, 46.73459584696964 ], "wc_weaknesses_avg": [ 120.85714285714286, 71.8916418178449 ], "wc_questions_avg": [ 62.0, 59.739912477816226 ], "wc_limitations_avg": [ 9.857142857142858, 18.581316049258966 ], "wc_review_avg": [ 332.0, 136.24767363675818 ], "wc_reply_reviewers_avg": [ 85.28571428571429, 113.13419880290625 ], "wc_reply_authors_avg": [ 193.85714285714286, 323.96182692737494 ], "reply_reviewers_avg": [ 0.8571428571428571, 0.8329931278350428 ], "reply_authors_avg": [ 2.2857142857142856, 1.0301575072754257 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.09962709627734356, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1470087624165969203&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "alibaba-inc.com;upenn.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Alibaba Group;University of Pennsylvania", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.upenn.edu", "aff_unique_abbr": "Alibaba;UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "title": "Diversify Your Vision Datasets with Automatic Diffusion-based Augmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72570", "id": "9wrYfqdrwk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f99f7b22ad47fa6ce151730cf8d17911-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9wrYfqdrwk", "openreview": "https://openreview.net/forum?id=9wrYfqdrwk", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72570", "video": "https://nips.cc/virtual/2023/poster/72570", "author_site": "Lisa Dunlap, Alyssa Umino, Han Zhang, Jiezhi Yang, Joseph Gonzalez, Trevor Darrell", "tldr": "", "abstract": "Many fine-grained classification tasks, like rare animal identification, have limited training data and consequently classifiers trained on these datasets often fail to generalize to variations in the domain like changes in weather or location. As such, we explore how natural language descriptions of the domains seen in training data can be used with large vision models trained on diverse pretraining datasets to generate useful variations of the training data. We introduce ALIA (Automated Language-guided Image Augmentation), a method which utilizes large vision and language models to automatically generate natural language descriptions of a dataset's domains and augment the training data via language-guided image editing. To maintain data integrity, a model trained on the original dataset filters out minimal image edits and those which corrupt class-relevant information. The resulting dataset is visually consistent with the original training data and offers significantly enhanced diversity. We show that ALIA is able to surpasses traditional data augmentation and text-to-image generated data on fine-grained classification tasks, including cases of domain generalization and contextual bias. Code is available at https://github.com/lisadunlap/ALIA.", "keywords": "data augmentation;diffusion;vision and language", "primary_area": "", "supplementary_material": "/attachment/60ca95710ea37f79fa049c2f04589d1c12fff894.pdf", "author": "Lisa Dunlap;Alyssa Umino;Han Zhang;Jiezhi Yang;Joseph E. Gonzalez;Trevor Darrell", "authorids": "~Lisa_Dunlap1;~Alyssa_Umino1;~Han_Zhang17;~Jiezhi_Yang1;~Joseph_E._Gonzalez1;~Trevor_Darrell2", "gender": "F;;F;M;M;M", "homepage": ";;https://pariszhang11.github.io/;https://stephenjyang.com/;http://eecs.berkeley.edu/~jegonzal;https://people.eecs.berkeley.edu/~trevor/", "dblp": ";;;291/6839;61/8262;d/TrevorDarrell", "google_scholar": "https://scholar.google.com/citations?hl=en;;6-ZdX1MAAAAJ;BzyVxVUAAAAJ;https://scholar.google.com.tw/citations?user=gM2WW9UAAAAJ;https://scholar.google.com.tw/citations?user=bh-uRFMAAAAJ", "orcid": ";;;;0000-0003-2921-956X;", "linkedin": ";alyssa-umino/;pariszhang11/;stephenyangjz/;;", "or_profile": "~Lisa_Dunlap1;~Alyssa_Umino1;~Han_Zhang17;~Jiezhi_Yang1;~Joseph_E._Gonzalez1;~trevor_darrell1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Electrical Engineering & Computer Science Department", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;eecs.berkeley.edu", "position": "PhD Student;Undergrad student;Undergrad student;Undergrad student;Associate Professor;Professor", "bibtex": "@inproceedings{\ndunlap2023diversify,\ntitle={Diversify Your Vision Datasets with Automatic Diffusion-based Augmentation},\nauthor={Lisa Dunlap and Alyssa Umino and Han Zhang and Jiezhi Yang and Joseph E. Gonzalez and Trevor Darrell},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9wrYfqdrwk}\n}", "github": "", "project": "", "reviewers": "u22M;Aa4b;JLbC;p3WR;za7y", "pdf_size": 2463075, "rating": "3;7;7;7;7", "confidence": "4;4;4;4;4", "soundness": "4;3;3;3;4", "novelty": "2;3;3;4;3", "presentation": "3;3;3;4;3", "wc_summary": "58;47;52;208;97", "wc_strengths": "33;54;67;380;114", "wc_weaknesses": "45;99;123;375;141", "wc_questions": "194;153;51;64;246", "wc_limitations": "1;9;1;77;79", "wc_review": "331;362;294;1104;677", "wc_reply_reviewers": "0;89;59;68;104", "wc_reply_authors": "0;39;15;35;29", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;2;2;2", "rating_avg": [ 6.2, 1.6000000000000003 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 92.4, 60.433765396506615 ], "wc_strengths_avg": [ 129.6, 127.99156222189023 ], "wc_weaknesses_avg": [ 156.6, 113.87993677553567 ], "wc_questions_avg": [ 141.6, 74.84009620517601 ], "wc_limitations_avg": [ 33.4, 36.53819918934156 ], "wc_review_avg": [ 553.6, 307.18762995928074 ], "wc_reply_reviewers_avg": [ 64.0, 35.67071628100563 ], "wc_reply_authors_avg": [ 23.6, 14.333178293735134 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12757711962248304096&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;eecs.berkeley.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "University of California, Berkeley;Electrical Engineering & Computer Science Department", "aff_unique_dep": ";Electrical Engineering & Computer Science", "aff_unique_url": "https://www.berkeley.edu;", "aff_unique_abbr": "UC Berkeley;", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Probabilistic Inference in Reinforcement Learning Done Right", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72569", "id": "9yQ2aaArDn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6a6e010edde1b8f2812f558b67a1974e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9yQ2aaArDn", "openreview": "https://openreview.net/forum?id=9yQ2aaArDn", "poster": "/media/PosterPDFs/NeurIPS%202023/72569.png?t=1699529468.766572", "slides": "https://nips.cc/virtual/2023/poster/72569", "video": "https://nips.cc/virtual/2023/poster/72569", "author_site": "Jean Tarbouriech, Tor Lattimore, Brendan O'Donoghue", "tldr": "", "abstract": "A popular perspective in Reinforcement learning (RL) casts the problem as probabilistic inference on a graphical model of the Markov decision process (MDP). The core object of study is the probability of each state-action pair being visited under the optimal policy. Previous approaches to approximate this quantity can be arbitrarily poor, leading to algorithms that do not implement genuine statistical inference and consequently do not perform well in challenging problems. In this work, we undertake a rigorous Bayesian treatment of the posterior probability of state-action optimality and clarify how it flows through the MDP. We first reveal that this quantity can indeed be used to generate a policy that explores efficiently, as measured by regret. Unfortunately, computing it is intractable, so we derive a new variational Bayesian approximation yielding a tractable convex optimization problem and establish that the resulting policy also explores efficiently. We call our approach VAPOR and show that it has strong connections to Thompson sampling, K-learning, and maximum entropy exploration. We conclude with some experiments demonstrating the performance advantage of a deep RL version of VAPOR.", "keywords": "Reinforcement learning;Bayesian inference;Exploration", "primary_area": "", "supplementary_material": "", "author": "Jean Tarbouriech;Tor Lattimore;Brendan O'Donoghue", "authorids": "~Jean_Tarbouriech1;~Tor_Lattimore1;~Brendan_O'Donoghue1", "gender": ";M;", "homepage": ";http://tor-lattimore.com;", "dblp": ";44/9886;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jean_Tarbouriech1;~Tor_Lattimore1;~Brendan_O'Donoghue1", "aff": ";Google DeepMind;", "aff_domain": ";google.com;", "position": ";Research Scientist;", "bibtex": "@inproceedings{\ntarbouriech2023probabilistic,\ntitle={Probabilistic Inference in Reinforcement Learning Done Right},\nauthor={Jean Tarbouriech and Tor Lattimore and Brendan O'Donoghue},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9yQ2aaArDn}\n}", "github": "", "project": "", "reviewers": "QdEN;UaER;NW3r;MCkd;3w7N", "pdf_size": 8459418, "rating": "3;6;6;7;10", "confidence": "4;2;4;3;4", "soundness": "1;4;4;3;4", "novelty": "3;3;3;3;4", "presentation": "2;3;3;1;4", "wc_summary": "69;80;52;66;41", "wc_strengths": "188;87;110;12;24", "wc_weaknesses": "515;139;177;291;1", "wc_questions": "208;33;27;44;1", "wc_limitations": "23;40;15;13;15", "wc_review": "1003;379;381;426;82", "wc_reply_reviewers": "561;73;17;1008;0", "wc_reply_authors": "788;0;0;1457;0", "reply_reviewers": "4;1;1;4;0", "reply_authors": "4;1;1;5;1", "rating_avg": [ 6.4, 2.244994432064365 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.2, 1.16619037896906 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 61.6, 13.632314550361578 ], "wc_strengths_avg": [ 84.2, 63.69426975796174 ], "wc_weaknesses_avg": [ 224.6, 172.23890385159794 ], "wc_questions_avg": [ 62.6, 74.06105589309405 ], "wc_limitations_avg": [ 21.2, 10.007996802557443 ], "wc_review_avg": [ 454.2, 300.5071712954617 ], "wc_reply_reviewers_avg": [ 331.8, 396.47113387988287 ], "wc_reply_authors_avg": [ 449.0, 589.2008146633879 ], "reply_reviewers_avg": [ 2.0, 1.6733200530681511 ], "reply_authors_avg": [ 2.4, 1.7435595774162693 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.02227177015936872, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5704650925900311602&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";google.com;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "MultiFusion: Fusing Pre-Trained Models for Multi-Lingual, Multi-Modal Image Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72568", "id": "9ych3krqP0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ba8d1b46292c5e82cbfb3b3dc3b968af-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9ych3krqP0", "openreview": "https://openreview.net/forum?id=9ych3krqP0", "poster": "/media/PosterPDFs/NeurIPS%202023/72568.png?t=1699970299.4173357", "slides": "https://nips.cc/virtual/2023/poster/72568", "video": "https://nips.cc/virtual/2023/poster/72568", "author_site": "Marco Bellagente, Manuel Brack, Hannah Teufel, Felix Friedrich, Bj\u00f6rn Deiseroth, Constantin Eichenberg, Andrew Dai, Robert Baldock, Souradeep Nanda, Koen Oostermeijer, Andres Felipe Cruz-Salinas, Patrick Schramowski, Kristian Kersting, Samuel Weinbach", "tldr": "", "abstract": "The recent popularity of text-to-image diffusion models (DM) can largely be attributed to the intuitive interface they provide to users. The intended generation can be expressed in natural language, with the model producing faithful interpretations of text prompts. However, expressing complex or nuanced ideas in text alone can be difficult. To ease image generation, we propose MultiFusion that allows one to express complex and nuanced concepts with arbitrarily interleaved inputs of multiple modalities and languages. MultiFusion leverages pre-trained models and aligns them for integration into a cohesive system, thereby avoiding the need for extensive training from scratch. Our experimental results demonstrate the efficient transfer of capabilities from individual modules to the downstream model. Specifically, the fusion of all independent components allows the image generation module to utilize multilingual, interleaved multimodal inputs despite being trained solely on monomodal data in a single language.", "keywords": "diffusion;image generation;multimodal", "primary_area": "", "supplementary_material": "", "author": "Marco Bellagente;Manuel Brack;Hannah Benita Teufel;Felix Friedrich;Bj\u00f6rn Deiseroth;Constantin Eichenberg;Andrew Dai;Robert John Nicholas Baldock;Souradeep Nanda;Koen Oostermeijer;Andres Felipe Cruz-Salinas;Patrick Schramowski;Kristian Kersting;Samuel Weinbach", "authorids": "~Marco_Bellagente1;~Manuel_Brack1;~Hannah_Benita_Teufel1;~Felix_Friedrich1;~Bj\u00f6rn_Deiseroth1;~Constantin_Eichenberg1;~Andrew_Dai1;~Robert_John_Nicholas_Baldock1;~Souradeep_Nanda1;~Koen_Oostermeijer1;~Andres_Felipe_Cruz-Salinas1;~Patrick_Schramowski1;~Kristian_Kersting1;~Samuel_Weinbach1", "gender": "Not Specified;M;F;;;;;;M;M;;M;M;M", "homepage": "https://marcobellagente93.github.io;;;https://ml-research.github.io/people/ffriedrich/;;;https://scholar.google.com/citations?hl=en&user=EgdmMxUAAAAJ;;https://ghost---shadow.github.io/website/;https://www.aleph-alpha.com/;;https://ml-research.github.io/people/pschramowski/index.html;http://www.ml.informatik.tu-darmstadt.de/;https://aleph-alpha.com", "dblp": "294/7150;326/8265;;18/4626;;247/2141;;;335/1903;;;217/1650;40/3793;278/8408", "google_scholar": ";kJ9Abf8AAAAJ;;RfM9ud0AAAAJ;;;https://scholar.google.com/citations?hl=en;;RdhcgnoAAAAJ;;;GD481RkAAAAJ;QY-earAAAAAJ;", "orcid": ";;;0000-0001-8387-793X;;0000-0002-9973-2687;;;0000-0003-1303-5301;;;0000-0003-1231-7120;0000-0002-2873-9152;0000-0001-9481-5363", "linkedin": "marco-bellagente-025884128/;;hannah-t-3a565a69/;;;;;;souradeep-nanda/;;;;;samuel-weinbach", "or_profile": "~Marco_Bellagente1;~Manuel_Brack1;~Hannah_Benita_Teufel1;~Felix_Friedrich1;~Bj\u00f6rn_Deiseroth1;~Constantin_Eichenberg1;~Andrew_Dai1;~Robert_John_Nicholas_Baldock1;~Souradeep_Nanda1;~Koen_Oostermeijer1;~Andres_Felipe_Cruz-Salinas1;~Patrick_Schramowski1;~Kristian_Kersting1;~Samuel_Weinbach1", "aff": "Aleph-Alpha gmbh;Adobe Systems;Aleph Alpha GmbH;TU Darmstadt;;Aleph Alpha;Aleph Alpha GmbH;;Aleph Alpha Gmbh;University of Science and Technology of China;;TU Darmstadt;TU Darmstadt;Aleph Alpha GmbH", "aff_domain": "aleph-alpha.com;adobe.com;aleph-alpha.com;tu-darmstadt.de;;aleph-alpha.com;aleph-alpha.com;;aleph-alpha.com;ustc.edu.cn;;tu-darmstadt.de;tu-darmstadt.de;aleph-alpha.com", "position": "Researcher;Intern;Researcher;PhD student;;Researcher;Researcher;;Researcher;PhD student;;PhD student;Full Professor;Researcher", "bibtex": "@inproceedings{\nbellagente2023multifusion,\ntitle={MultiFusion: Fusing Pre-Trained Models for Multi-Lingual, Multi-Modal Image Generation},\nauthor={Marco Bellagente and Manuel Brack and Hannah Benita Teufel and Felix Friedrich and Bj{\\\"o}rn Deiseroth and Constantin Eichenberg and Andrew Dai and Robert John Nicholas Baldock and Souradeep Nanda and Koen Oostermeijer and Andres Felipe Cruz-Salinas and Patrick Schramowski and Kristian Kersting and Samuel Weinbach},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9ych3krqP0}\n}", "github": "", "project": "", "reviewers": "g96C;8eHc;2Kwy;5v9N;sjMA", "pdf_size": 3835699, "rating": "4;6;7;7;7", "confidence": "4;4;3;4;3", "soundness": "1;2;4;3;3", "novelty": "2;3;4;3;4", "presentation": "1;2;3;3;3", "wc_summary": "136;119;137;39;223", "wc_strengths": "204;140;90;50;90", "wc_weaknesses": "532;476;139;49;54", "wc_questions": "41;353;81;5;119", "wc_limitations": "6;36;34;1;57", "wc_review": "919;1124;481;144;543", "wc_reply_reviewers": "44;24;10;0;36", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 1.019803902718557 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 130.8, 58.53682601576549 ], "wc_strengths_avg": [ 114.8, 52.95431993709295 ], "wc_weaknesses_avg": [ 250.0, 210.58869865213566 ], "wc_questions_avg": [ 119.8, 122.7002852482422 ], "wc_limitations_avg": [ 26.8, 20.721003836687064 ], "wc_review_avg": [ 642.2, 344.31927044532375 ], "wc_reply_reviewers_avg": [ 22.8, 16.178998732925347 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": -0.560112033611204, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9625170482008986714&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "aleph-alpha.com;adobe.com;aleph-alpha.com;tu-darmstadt.de;;aleph-alpha.com;aleph-alpha.com;;aleph-alpha.com;ustc.edu.cn;;tu-darmstadt.de;tu-darmstadt.de;aleph-alpha.com", "author_num": 14, "aff_unique_index": "0;1;2;3;2;2;2;4;3;3;2", "aff_unique_norm": "Aleph-Alpha GmbH;Adobe;Aleph Alpha;Technische Universit\u00e4t Darmstadt;University of Science and Technology of China", "aff_unique_dep": ";Adobe Systems Incorporated;;;", "aff_unique_url": "https://www.aleph-alpha.com;https://www.adobe.com;https://www.aleph-alpha.com;https://www.tu-darmstadt.de;http://www.ustc.edu.cn", "aff_unique_abbr": "Aleph-Alpha;Adobe;Aleph Alpha;TU Darmstadt;USTC", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;1;0;0;0;0;0;2;0;0;0", "aff_country_unique": "Germany;United States;China" }, { "title": "Three Iterations of (d \u2212 1)-WL Test Distinguish Non Isometric Clouds of d-dimensional Points", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72567", "id": "9yhYcjsdab", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1e6cf8f77bd8e907f53babcd7664c710-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9yhYcjsdab", "openreview": "https://openreview.net/forum?id=9yhYcjsdab", "poster": "/media/PosterPDFs/NeurIPS%202023/72567.png?t=1701705716.7952213", "slides": "https://nips.cc/virtual/2023/poster/72567", "video": "https://nips.cc/virtual/2023/poster/72567", "author_site": "Valentino Delle Rose, Alexander Kozachinskiy, Cristobal Rojas, Mircea Petrache, Pablo Barcel\u00f3", "tldr": "", "abstract": "The Weisfeiler-Lehman (WL) test is a fundamental iterative algorithm for checking the isomorphism of graphs. It has also been observed that it underlies the design of several graph neural network architectures, whose capabilities and performance can be understood in terms of the expressive power of this test. Motivated by recent developments in machine learning applications to datasets involving three-dimensional objects, we study when the WL test is {\\em complete} for clouds of Euclidean points represented by complete distance graphs, i.e., when it can distinguish, up to isometry, any arbitrary such cloud. Our main result states that the $(d-1)$-dimensional WL test is complete for point clouds in $d$-dimensional Euclidean space, for any $d\\ge 2$, and only three iterations of the test suffice. Our result is tight for $d = 2, 3$. We also observe that the $d$-dimensional WL test only requires one iteration to achieve completeness.", "keywords": "euclidean graphs;point clouds;WL test;graph neural networks", "primary_area": "", "supplementary_material": "", "author": "Valentino delle Rose;Alexander Kozachinskiy;Cristobal Rojas;Mircea Petrache;Pablo Barcelo", "authorids": "valentin.dellerose@student.unisi.it;~Alexander_Kozachinskiy1;~Cristobal_Rojas1;~Mircea_Petrache1;~Pablo_Barcelo1", "gender": ";M;M;M;M", "homepage": ";https://kozlachinskiy.github.io/;;https://sites.google.com/site/mircpetrache/home;https://pbarcelo.ing.uc.cl/", "dblp": ";164/0711;83/3605;;29/5169", "google_scholar": ";gAKBJ7kAAAAJ;https://scholar.google.cl/citations?user=Z1aTUGsAAAAJ;HiYZ-6MAAAAJ;9OH3PokAAAAJ", "orcid": ";0000-0002-9956-9023;;0000-0003-2181-169X;0000-0003-2293-2653", "linkedin": ";;;mircea-petrache-4983a4104/;", "or_profile": "valentin.dellerose@student.unisi.it;~Alexander_Kozachinskiy1;~Cristobal_Rojas1;~Mircea_Petrache1;~Pablo_Barcelo1", "aff": ";Pontificia Universidad Catolica de Chile;Pontificia Universidad Catolica de Chile;Pontificia Universidad Catolica de Chile;Pontificia Universidad Cat\u00f3lica", "aff_domain": ";puc.cl;uc.cl;puc.cl;uc.cl", "position": ";Postdoc;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nrose2023three,\ntitle={Three Iterations of (d \\ensuremath{-} 1)-{WL} Test Distinguish Non Isometric Clouds of d-dimensional Points},\nauthor={Valentino delle Rose and Alexander Kozachinskiy and Cristobal Rojas and Mircea Petrache and Pablo Barcelo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9yhYcjsdab}\n}", "github": "", "project": "", "reviewers": "yTgF;Z3UF;q73v;4J2w", "pdf_size": 397801, "rating": "5;6;7;8", "confidence": "3;1;3;5", "soundness": "3;3;4;4", "novelty": "2;3;3;4", "presentation": "2;3;4;3", "wc_summary": "125;130;147;49", "wc_strengths": "55;62;288;32", "wc_weaknesses": "187;67;349;3", "wc_questions": "2;110;5;337", "wc_limitations": "49;1;9;1", "wc_review": "418;370;798;422", "wc_reply_reviewers": "0;0;21;8", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 1.4142135623730951 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 112.75, 37.69864055904404 ], "wc_strengths_avg": [ 109.25, 103.79637517755617 ], "wc_weaknesses_avg": [ 151.5, 131.7753770626364 ], "wc_questions_avg": [ 113.5, 136.1699306014364 ], "wc_limitations_avg": [ 15.0, 19.8997487421324 ], "wc_review_avg": [ 502.0, 172.11623979160132 ], "wc_reply_reviewers_avg": [ 7.25, 8.584142356694699 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6324555320336758, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10747826926036258417&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": ";puc.cl;uc.cl;puc.cl;uc.cl", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Pontificia Universidad Catolica de Chile;Pontificia Universidad Cat\u00f3lica", "aff_unique_dep": ";", "aff_unique_url": "https://www.puc.cl;https://www.puc.cl", "aff_unique_abbr": "PUC;PUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Chile" }, { "title": "k-Median Clustering via Metric Embedding: Towards Better Initialization with Differential Privacy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72566", "id": "9zV2OXCrVF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e9a612969b4df241ff0d8273656bd5a4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=9zV2OXCrVF", "openreview": "https://openreview.net/forum?id=9zV2OXCrVF", "poster": "/media/PosterPDFs/NeurIPS%202023/72566.png?t=1702105416.842058", "slides": "https://nips.cc/virtual/2023/poster/72566", "video": "https://nips.cc/virtual/2023/poster/72566", "author_site": "Chenglin Fan, Ping Li, Xiaoyun Li", "tldr": "", "abstract": "In clustering algorithms, the choice of initial centers is crucial for the quality of the learned clusters. We propose a new initialization scheme for the $k$-median problem in the general metric space (e.g., discrete space induced by graphs), based on the construction of metric embedding tree structure of the data. We propose a novel and efficient search algorithm, for good initial centers that can be used subsequently for the local search algorithm. The so-called HST initialization method can produce initial centers achieving lower error than those from another popular method $k$-median++, also with higher efficiency when $k$ is not too small. Our HST initialization can also be easily extended to the setting of differential privacy (DP) to generate private initial centers. We show that the error of applying DP local search followed by our private HST initialization improves previous results on the approximation error, and approaches the lower bound within a small factor. Experiments demonstrate the effectiveness of our proposed methods.", "keywords": "privacy;clustering", "primary_area": "", "supplementary_material": "", "author": "Chenglin Fan;Ping Li;Xiaoyun Li", "authorids": "~Chenglin_Fan2;~Ping_Li3;~Xiaoyun_Li2", "gender": ";M;M", "homepage": ";http://www.stat.rutgers.edu/home/pingli/;", "dblp": ";62/5860-1;48/1982", "google_scholar": ";;", "orcid": ";;", "linkedin": "chenglin-fan-3a38b0a1/;;", "or_profile": "~Chenglin_Fan2;~Ping_Li3;~Xiaoyun_Li2", "aff": "Pennsylvania State University;LinkedIn;LinkedIn", "aff_domain": "psu.edu;linkedin.com;linkedin.com", "position": "Researcher;Engineer;Researcher", "bibtex": "@inproceedings{\nfan2023kmedian,\ntitle={k-Median Clustering via Metric Embedding: Towards Better Initialization with Differential Privacy},\nauthor={Chenglin Fan and Ping Li and Xiaoyun Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=9zV2OXCrVF}\n}", "github": "", "project": "", "reviewers": "vfbx;JWq6;5iV1;MjrB", "pdf_size": 649417, "rating": "5;6;6;7", "confidence": "4;2;3;4", "soundness": "3;3;3;3", "novelty": "3;2;3;2", "presentation": "2;3;4;3", "wc_summary": "81;69;135;547", "wc_strengths": "65;32;22;112", "wc_weaknesses": "98;163;55;122", "wc_questions": "342;47;127;165", "wc_limitations": "9;18;1;1", "wc_review": "595;329;340;947", "wc_reply_reviewers": "69;39;5;71", "wc_reply_authors": "354;0;0;73", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 208.0, 197.29419656948858 ], "wc_strengths_avg": [ 57.75, 35.13100482479828 ], "wc_weaknesses_avg": [ 109.5, 39.1184099881373 ], "wc_questions_avg": [ 170.25, 107.9198197737561 ], "wc_limitations_avg": [ 7.25, 7.013380069552769 ], "wc_review_avg": [ 552.75, 251.269153498793 ], "wc_reply_reviewers_avg": [ 46.0, 26.851443164195103 ], "wc_reply_authors_avg": [ 106.75, 145.82759512520255 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9757282522699722038&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "psu.edu;linkedin.com;linkedin.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Pennsylvania State University;LinkedIn Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.psu.edu;https://www.linkedin.com", "aff_unique_abbr": "PSU;LinkedIn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Accelerating Molecular Graph Neural Networks via Knowledge Distillation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72565", "id": "A18PgVSUgf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/51ec452ca04d8ec7160e5bbaf76153f6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=A18PgVSUgf", "openreview": "https://openreview.net/forum?id=A18PgVSUgf", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72565", "video": "https://nips.cc/virtual/2023/poster/72565", "author_site": "Filip Ekstr\u00f6m Kelvinius, Dimitar Georgiev, Artur Toshev, Johannes Gasteiger", "tldr": "", "abstract": "Recent advances in graph neural networks (GNNs) have enabled more comprehensive modeling of molecules and molecular systems, thereby enhancing the precision of molecular property prediction and molecular simulations. Nonetheless, as the field has been progressing to bigger and more complex architectures, state-of-the-art GNNs have become largely prohibitive for many large-scale applications. In this paper, we explore the utility of knowledge distillation (KD) for accelerating molecular GNNs. To this end, we devise KD strategies that facilitate the distillation of hidden representations in directional and equivariant GNNs, and evaluate their performance on the regression task of energy and force prediction. We validate our protocols across different teacher-student configurations and datasets, and demonstrate that they can consistently boost the predictive accuracy of student models without any modifications to their architecture. Moreover, we conduct comprehensive optimization of various components of our framework, and investigate the potential of data augmentation to further enhance performance. All in all, we manage to close the gap in predictive accuracy between teacher and student models by as much as 96.7\\% and 62.5\\% for energy and force prediction respectively, while fully preserving the inference throughput of the more lightweight models.", "keywords": "GNN;graph neural networks;knowledge distillation;molecules;molecular simulations", "primary_area": "", "supplementary_material": "/attachment/f831895b7e03bff9f34351a023264a3dd8f0ffd4.pdf", "author": "Filip Ekstr\u00f6m Kelvinius;Dimitar Georgiev;Artur Toshev;Johannes Gasteiger", "authorids": "~Filip_Ekstr\u00f6m_Kelvinius1;~Dimitar_Georgiev1;~Artur_Toshev1;~Johannes_Gasteiger1", "gender": "M;M;;M", "homepage": ";https://dimgeorgievv.github.io/;https://arturtoshev.github.io/;", "dblp": "350/0999;320/0426;344/3672;228/7897", "google_scholar": "L94IReoAAAAJ;https://scholar.google.co.uk/citations?user=v9OuyKAAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;0000-0003-0486-5565;", "linkedin": ";dimitar-georgievv/;;", "or_profile": "~Filip_Ekstr\u00f6m_Kelvinius1;~Dimitar_Georgiev1;~Artur_Toshev1;~Johannes_Klicpera1", "aff": "Link\u00f6ping University;Imperial College London;Technische Universit\u00e4t M\u00fcnchen;Google", "aff_domain": "liu.se;imperial.ac.uk;tum.de;google.com", "position": "PhD student;PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\nkelvinius2023accelerating,\ntitle={Accelerating Molecular Graph Neural Networks via Knowledge Distillation},\nauthor={Filip Ekstr{\\\"o}m Kelvinius and Dimitar Georgiev and Artur Toshev and Johannes Gasteiger},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=A18PgVSUgf}\n}", "github": "", "project": "", "reviewers": "xfxb;o8B6;6ZfK;n7yW;t1R8", "pdf_size": 12593848, "rating": "4;5;6;7;7", "confidence": "3;3;3;5;2", "soundness": "3;2;3;3;3", "novelty": "2;2;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "79;27;85;79;79", "wc_strengths": "28;22;75;136;12", "wc_weaknesses": "207;58;165;377;2", "wc_questions": "152;2;119;297;28", "wc_limitations": "17;1;8;150;8", "wc_review": "483;110;452;1039;129", "wc_reply_reviewers": "327;13;193;140;0", "wc_reply_authors": "771;0;304;112;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;1;2;2;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.8, 21.525798475317934 ], "wc_strengths_avg": [ 54.6, 46.10249450951651 ], "wc_weaknesses_avg": [ 161.8, 130.13439207219588 ], "wc_questions_avg": [ 119.6, 104.63383773904118 ], "wc_limitations_avg": [ 36.8, 56.82745815184769 ], "wc_review_avg": [ 442.6, 336.56535769446026 ], "wc_reply_reviewers_avg": [ 134.6, 121.12902212104248 ], "wc_reply_authors_avg": [ 237.4, 289.00076124467216 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.21004201260420152, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8388376612609895164&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "liu.se;imperial.ac.uk;tum.de;google.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Link\u00f6ping University;Imperial College London;Technische Universit\u00e4t M\u00fcnchen;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.liu.se;https://www.imperial.ac.uk;https://www.tum.de;https://www.google.com", "aff_unique_abbr": "LiU;ICL;TUM;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;2;3", "aff_country_unique": "Sweden;United Kingdom;Germany;United States" }, { "title": "Tuning Multi-mode Token-level Prompt Alignment across Modalities", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72564", "id": "A253n2EXCd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a547d86953a4e36aa8a1390e6f4708e2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=A253n2EXCd", "openreview": "https://openreview.net/forum?id=A253n2EXCd", "poster": "/media/PosterPDFs/NeurIPS%202023/72564.png?t=1699959602.5978413", "slides": "https://nips.cc/virtual/2023/poster/72564", "video": "https://nips.cc/virtual/2023/poster/72564", "author_site": "Dongsheng Wang, Miaoge Li, Xinyang Liu, MingSheng Xu, Bo Chen, Hanwang Zhang", "tldr": "", "abstract": "Advancements in prompt tuning of vision-language models have underscored their potential in enhancing open-world visual concept comprehension. However, prior works only primarily focus on single-mode (only one prompt for each modality) and holistic level (image or sentence) semantic alignment, which fails to capture the sample diversity, leading to sub-optimal prompt discovery. To address the limitation, we propose a multi-mode token-level tuning framework that leverages the optimal transportation to learn and align a set of prompt tokens across modalities. Specifically, we rely on two essential factors: 1) multi-mode prompts discovery, which guarantees diverse semantic representations, and 2) token-level alignment, which helps explore fine-grained similarity. Consequently, the similarity can be calculated as a hierarchical transportation problem between the modality-specific sets. Extensive experiments on popular image recognition benchmarks show the superior generalization and few-shot abilities of our approach. The qualitative analysis demonstrates that the learned prompt tokens have the ability to capture diverse visual concepts.", "keywords": "Multi-modal prompt learning; Optimal transport", "primary_area": "", "supplementary_material": "/attachment/c9cde39c14b6fdc058491d0d034e600d02a4ef63.zip", "author": "Dongsheng Wang;Miaoge Li;Xinyang Liu;MingSheng Xu;Bo Chen;Hanwang Zhang", "authorids": "~Dongsheng_Wang4;~Miaoge_Li1;~Xinyang_Liu4;~MingSheng_Xu1;~Bo_Chen1;~Hanwang_Zhang3", "gender": "F;M;M;M;M;M", "homepage": "https://github.com/keepgoingjkg;https://xinyangatk.github.io;https://www.linkedin.cn/incareer/in/ACoAACmc6AIBlWmHdX7XgwFG7xeqBvDJAHEY54o;http://web.xidian.edu.cn/bchen/en/index.html;https://mreallab.github.io/index.html;https://wds2014.github.io/", "dblp": "330/3622;;;89/5615-1;79/8116.html;21/841-3", "google_scholar": ";https://scholar.google.com.hk/citations?hl=zh-CN;;;YG0DFyYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0001-5151-9388;;0000-0002-3380-5337", "linkedin": ";;https://www.linkedin.cn/incareer/in/ACoAACmc6AIBlWmHdX7XgwFG7xeqBvDJAHEY54o;;;", "or_profile": "~Miaoge_Li1;~Xinyang_Liu4;~MingSheng_Xu1;~Bo_Chen1;~Hanwang_Zhang3;~dongsheng_wang3", "aff": "Xidian University;Xidian University;;Xidian University;Nanyang Technological University;Xidian University", "aff_domain": "xidian.edu.cn;xidian.edu;;xidian.edu.cn;ntu.edu.sg;xidian.edu.cn", "position": "MS student;MS student;;Full Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nwang2023tuning,\ntitle={Tuning Multi-mode Token-level Prompt Alignment across Modalities},\nauthor={Dongsheng Wang and Miaoge Li and Xinyang Liu and MingSheng Xu and Bo Chen and Hanwang Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=A253n2EXCd}\n}", "github": "", "project": "", "reviewers": "dk2b;S87N;TnsF;Uuq5;hEEC", "pdf_size": 2915749, "rating": "4;4;5;6;6", "confidence": "3;4;4;4;4", "soundness": "2;2;2;3;2", "novelty": "2;2;3;3;3", "presentation": "2;3;2;3;3", "wc_summary": "43;54;67;104;72", "wc_strengths": "43;24;59;124;55", "wc_weaknesses": "110;115;175;136;119", "wc_questions": "4;53;4;26;5", "wc_limitations": "22;1;1;2;48", "wc_review": "222;247;306;392;299", "wc_reply_reviewers": "81;0;0;17;21", "wc_reply_authors": "619;108;0;20;26", "reply_reviewers": "1;0;0;1;1", "reply_authors": "3;2;1;2;2", "rating_avg": [ 5.0, 0.8944271909999159 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 68.0, 20.659138413786767 ], "wc_strengths_avg": [ 61.0, 33.769809001532714 ], "wc_weaknesses_avg": [ 131.0, 23.672769166280485 ], "wc_questions_avg": [ 18.4, 19.23122460999299 ], "wc_limitations_avg": [ 14.8, 18.432579851990337 ], "wc_review_avg": [ 293.2, 58.588053389748325 ], "wc_reply_reviewers_avg": [ 23.8, 29.862350878656557 ], "wc_reply_authors_avg": [ 154.6, 235.11495060927112 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5590169943749476, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7957402950165982624&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "xidian.edu.cn;xidian.edu;;xidian.edu.cn;ntu.edu.sg;xidian.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Xidian University;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.ntu.edu.sg", "aff_unique_abbr": "Xidian;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;Singapore" }, { "title": "Oracle Complexity of Single-Loop Switching Subgradient Methods for Non-Smooth Weakly Convex Functional Constrained Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72563", "id": "A383wMho4h", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c132c02176577c4319a878f6417a331a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=A383wMho4h", "openreview": "https://openreview.net/forum?id=A383wMho4h", "poster": "/media/PosterPDFs/NeurIPS%202023/72563.png?t=1699584860.8852792", "slides": "https://nips.cc/virtual/2023/poster/72563", "video": "https://nips.cc/virtual/2023/poster/72563", "author_site": "Yankun Huang, Qihang Lin", "tldr": "", "abstract": "We consider a non-convex constrained optimization problem, where the objective function is weakly convex and the constraint function is either convex or weakly convex. To solve this problem, we consider the classical switching subgradient method, which is an intuitive and easily implementable first-order method whose oracle complexity was only known for convex problems. This paper provides the first analysis on the oracle complexity of the switching subgradient method for finding a nearly stationary point of non-convex problems. Our results are derived separately for convex and weakly convex constraints. Compared to existing approaches, especially the double-loop methods, the switching gradient method can be applied to non-smooth problems and achieves the same complexity using only a single loop, which saves the effort on tuning the number of inner iterations.", "keywords": "Constrained optimization;first-order method;non-smooth optimization;non-convex optimization", "primary_area": "", "supplementary_material": "/attachment/47d7c4454f3a1317fe70be4dcd47ca99a2d22b29.zip", "author": "Yankun Huang;Qihang Lin", "authorids": "~Yankun_Huang1;~Qihang_Lin1", "gender": ";", "homepage": ";https://tippie.uiowa.edu/people/qihang-lin", "dblp": ";02/8146", "google_scholar": ";sPtFRB8AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Yankun_Huang1;~Qihang_Lin1", "aff": ";University of Iowa", "aff_domain": ";uiowa.edu", "position": ";Associate Professor", "bibtex": "@inproceedings{\nhuang2023oracle,\ntitle={Oracle Complexity of Single-Loop Switching Subgradient Methods for Non-Smooth Weakly Convex Functional Constrained Optimization},\nauthor={Yankun Huang and Qihang Lin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=A383wMho4h}\n}", "github": "", "project": "", "reviewers": "DHUQ;aPsS;tsNX", "pdf_size": 910617, "rating": "6;6;7", "confidence": "4;3;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "49;50;24", "wc_strengths": "42;88;31", "wc_weaknesses": "13;43;33", "wc_questions": "87;39;21", "wc_limitations": "1;1;1", "wc_review": "192;221;110", "wc_reply_reviewers": "8;11;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 41.0, 12.027745701779143 ], "wc_strengths_avg": [ 53.666666666666664, 24.689178916188272 ], "wc_weaknesses_avg": [ 29.666666666666668, 12.47219128924647 ], "wc_questions_avg": [ 49.0, 27.85677655436824 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 174.33333333333334, 47.00590979393501 ], "wc_reply_reviewers_avg": [ 6.333333333333333, 4.642796092394707 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11122949064413693734&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";uiowa.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Iowa", "aff_unique_dep": "", "aff_unique_url": "https://www.uiowa.edu", "aff_unique_abbr": "UIowa", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Koopa: Learning Non-stationary Time Series Dynamics with Koopman Predictors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72562", "id": "A4zzxu82a7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/28b3dc0970fa4624a63278a4268de997-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=A4zzxu82a7", "openreview": "https://openreview.net/forum?id=A4zzxu82a7", "poster": "/media/PosterPDFs/NeurIPS%202023/72562.png?t=1699165013.4112644", "slides": "https://nips.cc/virtual/2023/poster/72562", "video": "https://nips.cc/virtual/2023/poster/72562", "author_site": "Yong Liu, Chenyu Li, Jianmin Wang, Mingsheng Long", "tldr": "", "abstract": "Real-world time series are characterized by intrinsic non-stationarity that poses a principal challenge for deep forecasting models. While previous models suffer from complicated series variations induced by changing temporal distribution, we tackle non-stationary time series with modern Koopman theory that fundamentally considers the underlying time-variant dynamics. Inspired by Koopman theory of portraying complex dynamical systems, we disentangle time-variant and time-invariant components from intricate non-stationary series by Fourier Filter and design Koopman Predictor to advance respective dynamics forward. Technically, we propose Koopa as a novel Koopman forecaster composed of stackable blocks that learn hierarchical dynamics. Koopa seeks measurement functions for Koopman embedding and utilizes Koopman operators as linear portraits of implicit transition. To cope with time-variant dynamics that exhibits strong locality, Koopa calculates context-aware operators in the temporal neighborhood and is able to utilize incoming ground truth to scale up forecast horizon. Besides, by integrating Koopman Predictors into deep residual structure, we ravel out the binding reconstruction loss in previous Koopman forecasters and achieve end-to-end forecasting objective optimization. Compared with the state-of-the-art model, Koopa achieves competitive performance while saving 77.3% training time and 76.0% memory.", "keywords": "Time series forecasting;Deep learning", "primary_area": "", "supplementary_material": "/attachment/d7986358d20747ac52bbd73e26373c1f3b9e6ccb.zip", "author": "Yong Liu;Chenyu Li;Jianmin Wang;Mingsheng Long", "authorids": "~Yong_Liu15;~Chenyu_Li2;~Jianmin_Wang1;~Mingsheng_Long5", "gender": ";M;M;", "homepage": ";https://lichenyu20.github.io/;https://www.thss.tsinghua.edu.cn/en/faculty/jianminwang.htm;", "dblp": ";51/2854-3.html;06/3456-1.html;", "google_scholar": ";Ul5hyswAAAAJ;https://scholar.google.com.tw/citations?user=MiovcboAAAAJ;", "orcid": ";0009-0002-1179-788X;0000-0001-6841-7943;", "linkedin": ";;;", "or_profile": "~Yong_Liu15;~Chenyu_Li2;~Jianmin_Wang1;~Mingsheng_Long5", "aff": ";Tsinghua University;Tsinghua University;", "aff_domain": ";tsinghua.edu.cn;tsinghua.edu.cn;", "position": ";Undergrad student;Full Professor;", "bibtex": "@inproceedings{\nliu2023koopa,\ntitle={Koopa: Learning Non-stationary Time Series Dynamics with Koopman Predictors},\nauthor={Yong Liu and Chenyu Li and Jianmin Wang and Mingsheng Long},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=A4zzxu82a7}\n}", "github": "", "project": "", "reviewers": "EGYL;M3Xz;pXJ3;36zV;a2Yj", "pdf_size": 2077578, "rating": "5;5;6;7;8", "confidence": "4;4;5;2;2", "soundness": "2;2;2;3;4", "novelty": "3;1;3;3;3", "presentation": "3;3;4;3;4", "wc_summary": "81;95;51;78;121", "wc_strengths": "43;45;107;35;270", "wc_weaknesses": "280;130;170;102;38", "wc_questions": "6;2;33;48;15", "wc_limitations": "1;2;1;15;25", "wc_review": "411;274;362;278;469", "wc_reply_reviewers": "129;42;59;17;41", "wc_reply_authors": "1485;55;26;0;26", "reply_reviewers": "1;1;1;1;1", "reply_authors": "5;2;2;1;2", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.4, 1.2 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 85.2, 22.87706274852609 ], "wc_strengths_avg": [ 100.0, 88.82342033495445 ], "wc_weaknesses_avg": [ 144.0, 80.45868505015478 ], "wc_questions_avg": [ 20.8, 17.29045979724079 ], "wc_limitations_avg": [ 8.8, 9.682974749528164 ], "wc_review_avg": [ 358.8, 75.62909493045649 ], "wc_reply_reviewers_avg": [ 57.6, 38.124008183820344 ], "wc_reply_authors_avg": [ 318.4, 583.5596284870982 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.4, 1.3564659966250538 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7717436331412898, "gs_citation": 125, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5169871634464962182&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": ";tsinghua.edu.cn;tsinghua.edu.cn;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Maximize to Explore: One Objective Function Fusing Estimation, Planning, and Exploration", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72561", "id": "A57UMlUJdc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4640d5da5888238b9de7e0dbacd2c605-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=A57UMlUJdc", "openreview": "https://openreview.net/forum?id=A57UMlUJdc", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72561", "video": "https://nips.cc/virtual/2023/poster/72561", "author_site": "Zhihan Liu, Miao Lu, WEI XIONG, Han Zhong, Hao Hu, Shenao Zhang, Sirui Zheng, Zhuoran Yang, Zhaoran Wang", "tldr": "", "abstract": "In reinforcement learning (RL), balancing exploration and exploitation is crucial for achieving an optimal policy in a sample-efficient way. To this end, existing sample- efficient algorithms typically consist of three components: estimation, planning, and exploration. However, to cope with general function approximators, most of them involve impractical algorithmic components to incentivize exploration, such as data-dependent level-set constraints or complicated sampling procedures. To address this challenge, we propose an easy-to-implement RL framework called Maximize to Explore (MEX), which only needs to optimize unconstrainedly a single objective that integrates the estimation and planning components while balancing exploration and exploitation automatically. Theoretically, we prove that the MEX achieves a sublinear regret with general function approximators and is extendable to the zero-sum Markov game setting. Meanwhile, we adapt deep RL baselines to design practical versions of MEX in both the model-based and model-free settings, which outperform baselines in various MuJoCo environments with sparse reward by a stable margin. Compared with existing sample-efficient algorithms with general function approximators, MEX achieves similar sample efficiency while also enjoying a lower computational cost and is more compatible with modern deep RL methods.", "keywords": "reinforcement learning; online learning; game", "primary_area": "", "supplementary_material": "/attachment/0dc60c4753b90ef229a1ffc1d08bdce143a884cf.zip", "author": "Zhihan Liu;Miao Lu;Wei Xiong;Han Zhong;Hao Hu;Shenao Zhang;Sirui Zheng;Zhuoran Yang;Zhaoran Wang", "authorids": "~Zhihan_Liu1;~Miao_Lu3;~Wei_Xiong9;~Han_Zhong1;~Hao_Hu3;~Shenao_Zhang1;~Sirui_Zheng2;~Zhuoran_Yang1;~Zhaoran_Wang1", "gender": "M;;M;;M;M;M;M;Not Specified", "homepage": ";https://miaolu3.github.io;https://weixiongust.github.io/WeiXiongUST/index.html;https://hanzhong-ml.github.io/;https://mousehu.github.io;https://shenao-zhang.github.io/;;https://zhuoranyang.github.io/;https://zhaoranwang.github.io/", "dblp": ";09/1168;33/4054-15;137/8096.html;67/6924-6;253/4543.html;;;117/2756", "google_scholar": "0VVg_R4AAAAJ;3jS17zQAAAAJ;m2-OwQEAAAAJ;Bk5q_pAAAAAJ;https://scholar.google.com/citations?hl=en;8NamuusAAAAJ;;;https://scholar.google.com.tw/citations?user=HSx0BgQAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";miao-lu-5bb9a31aa/;;;hao-hu-tsinghua;shenao-zhang-055a53178/;%E6%80%9D%E9%94%90-%E9%83%91-448756212/;;", "or_profile": "~Zhihan_Liu1;~Miao_Lu3;~Wei_Xiong9;~Han_Zhong1;~Hao_Hu3;~Shenao_Zhang1;~Sirui_Zheng2;~Zhuoran_Yang1;~Zhaoran_Wang1", "aff": "Northwestern University;University of Science and Technology of China;Hong Kong University of Science and Technology;Peking University;Tsinghua University;Georgia Institute of Technology;Northwestern University;Yale University;", "aff_domain": "northwestern.edu;ustc.edu.cn;ust.hk;stu.pku.edu.cn;tsinghua.edu.cn;gatech.edu;northwestern.edu;yale.edu;", "position": "PhD student;Undergrad student;MS student;PhD student;PhD student;MS student;PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nliu2023maximize,\ntitle={Maximize to Explore: One Objective Function Fusing Estimation, Planning, and Exploration},\nauthor={Zhihan Liu and Miao Lu and Wei Xiong and Han Zhong and Hao Hu and Shenao Zhang and Sirui Zheng and Zhuoran Yang and Zhaoran Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=A57UMlUJdc}\n}", "github": "", "project": "", "reviewers": "J9HB;wpGc;44XZ;vhgd", "pdf_size": 867519, "rating": "6;7;7;8", "confidence": "3;2;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "61;95;71;59", "wc_strengths": "42;383;34;134", "wc_weaknesses": "176;579;163;341", "wc_questions": "27;250;29;226", "wc_limitations": "1;61;20;33", "wc_review": "307;1368;317;793", "wc_reply_reviewers": "20;78;53;15", "wc_reply_authors": "46;69;46;46", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 71.5, 14.309088021254185 ], "wc_strengths_avg": [ 148.25, 141.11409390985722 ], "wc_weaknesses_avg": [ 314.75, 167.9261370364959 ], "wc_questions_avg": [ 133.0, 105.34467238546048 ], "wc_limitations_avg": [ 28.75, 21.821720830401986 ], "wc_review_avg": [ 696.25, 434.72829157992464 ], "wc_reply_reviewers_avg": [ 41.5, 25.636887486588538 ], "wc_reply_authors_avg": [ 51.75, 9.959292143521045 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6199236731526523850&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "northwestern.edu;ustc.edu.cn;ust.hk;stu.pku.edu.cn;tsinghua.edu.cn;gatech.edu;northwestern.edu;yale.edu;", "author_num": 9, "aff_unique_index": "0;1;2;3;4;5;0;6", "aff_unique_norm": "Northwestern University;University of Science and Technology of China;Hong Kong University of Science and Technology;Peking University;Tsinghua University;Georgia Institute of Technology;Yale University", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.northwestern.edu;http://www.ustc.edu.cn;https://www.ust.hk;http://www.pku.edu.cn;https://www.tsinghua.edu.cn;https://www.gatech.edu;https://www.yale.edu", "aff_unique_abbr": "NU;USTC;HKUST;Peking U;THU;Georgia Tech;Yale", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;1;1;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Combinatorial Group Testing with Selfish Agents", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72560", "id": "A5yMv7XPuA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/69f98acf161316ed896047e45da3bc0c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=A5yMv7XPuA", "openreview": "https://openreview.net/forum?id=A5yMv7XPuA", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72560", "video": "https://nips.cc/virtual/2023/poster/72560", "author_site": "Georgios Chionas, Dariusz Kowalski, Piotr Krysta", "tldr": "", "abstract": "We study the Combinatorial Group Testing (CGT) problem in a novel game-theoretic framework, with a solution concept of Adversarial Equilibrium (AE). In this new framework, we have $n$ selfish agents corresponding to the elements of the universe $[n] =\\{0,1,\\ldots,n-1\\}$ and a hidden set $K \\subseteq [n]$ of active agents of size $|K| = k \\ll n$. In each round of the game, each active agent decides if it is present in a query $Q \\subseteq [n]$, and all agents receive feedback on $Q \\cap K$. The goal of each active agent is to assure that its id could be learned from the feedback as early as possible. \n\nWe present a comprehensive set of results in this new game, where we design and analyze adaptive algorithmic strategies of agents which are AE's. In particular, if $k$ is known to the agents, then we design adaptive AE strategies with provably near optimal learning time of $O(k \\log(n/k))$. In the case of unknown $k$, we design an adaptive AE strategies with learning time of order $n^k$, and we prove a lower bound of $\\Omega(n)$ on the learning time of any such algorithmic strategies. This shows a strong separations between the two models of known and unknown $k$, as well as between the classic CGT, i.e., without selfish agents, and our game theoretic CGT model.", "keywords": "Combinatorial Group Testing;Adversarial Equilibrium;Contention Resolution;selfish agents;learning time;adaptive learning algorithms", "primary_area": "", "supplementary_material": "/attachment/a4b16aba583cf7d8d0bcdad3d1459f7361600fd4.pdf", "author": "Giorgos Chionas;Dariusz Rafal Kowalski;Piotr Krysta", "authorids": "g.chionas@liverpool.ac.uk;~Dariusz_Rafal_Kowalski1;~Piotr_Krysta1", "gender": ";M;", "homepage": ";;", "dblp": ";43/6109;k/PiotrKrysta", "google_scholar": ";https://scholar.google.com/citations?hl=en;", "orcid": ";0000-0002-1316-7788;", "linkedin": ";;", "or_profile": "g.chionas@liverpool.ac.uk;~Dariusz_Rafal_Kowalski1;~Piotr_Krysta1", "aff": ";Augusta University;University of Liverpool", "aff_domain": ";augusta.edu;liverpool.ac.uk", "position": ";Full Professor;Full Professor", "bibtex": "@inproceedings{\nchionas2023combinatorial,\ntitle={Combinatorial Group Testing with Selfish Agents},\nauthor={Giorgos Chionas and Dariusz Rafal Kowalski and Piotr Krysta},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=A5yMv7XPuA}\n}", "github": "", "project": "", "reviewers": "X2Ux;YF8o;53TB;LHGX", "pdf_size": 348407, "rating": "6;6;7;7", "confidence": "3;1;3;2", "soundness": "3;3;3;3", "novelty": "3;2;3;2", "presentation": "4;3;4;3", "wc_summary": "100;50;148;196", "wc_strengths": "32;52;94;75", "wc_weaknesses": "23;102;156;188", "wc_questions": "36;2;144;22", "wc_limitations": "11;5;49;2", "wc_review": "202;211;591;483", "wc_reply_reviewers": "15;36;207;25", "wc_reply_authors": "0;0;90;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 123.5, 54.33921236087251 ], "wc_strengths_avg": [ 63.25, 23.381349405027933 ], "wc_weaknesses_avg": [ 117.25, 62.49549983798833 ], "wc_questions_avg": [ 51.0, 55.036351623268054 ], "wc_limitations_avg": [ 16.75, 18.89940475253123 ], "wc_review_avg": [ 371.75, 169.63398097079488 ], "wc_reply_reviewers_avg": [ 70.75, 79.01384372374248 ], "wc_reply_authors_avg": [ 22.5, 38.97114317029974 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZhjUNZnHVM4J:scholar.google.com/&scioq=Combinatorial+Group+Testing+with+Selfish+Agents&hl=en&as_sdt=0,44", "gs_version_total": 6, "email": ";augusta.edu;liverpool.ac.uk", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Augusta University;University of Liverpool", "aff_unique_dep": ";", "aff_unique_url": "https://www.augusta.edu;https://www.liverpool.ac.uk", "aff_unique_abbr": "AU;Liv Uni", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Effective Bayesian Heteroscedastic Regression with Deep Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72559", "id": "A6EquH0enk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a901d5540789a086ee0881a82211b63d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=A6EquH0enk", "openreview": "https://openreview.net/forum?id=A6EquH0enk", "poster": "/media/PosterPDFs/NeurIPS%202023/72559.png?t=1702044524.6153302", "slides": "https://nips.cc/virtual/2023/poster/72559", "video": "https://nips.cc/virtual/2023/poster/72559", "author_site": "Alexander Immer, Emanuele Palumbo, Alexander Marx, Julia Vogt", "tldr": "", "abstract": "Flexibly quantifying both irreducible aleatoric and model-dependent epistemic uncertainties plays an important role for complex regression problems. While deep neural networks in principle can provide this flexibility and learn heteroscedastic aleatoric uncertainties through non-linear functions, recent works highlight that maximizing the log likelihood objective parameterized by mean and variance can lead to compromised mean fits since the gradient are scaled by the predictive variance, and propose adjustments in line with this premise. \nWe instead propose to use the natural parametrization of the Gaussian, which has been shown to be more stable for heteroscedastic regression based on non-linear feature maps and Gaussian processes. Further, we emphasize the significance of principled regularization of the network parameters and prediction. We therefore propose an efficient Laplace approximation for heteroscedastic neural networks that allows automatic regularization through empirical Bayes and provides epistemic uncertainties, both of which improve generalization.\nWe showcase on a range of regression problems\u2014including a new heteroscedastic image regression benchmark\u2014that our methods are scalable, improve over previous approaches for heteroscedastic regression, and provide epistemic uncertainty without requiring hyperparameter tuning.", "keywords": "Heteroscedastic Regression;Marginal Likelihood;Bayesian Neural Networks;Uncertainty Estimaton;Model Selection;Laplace Approximation", "primary_area": "", "supplementary_material": "", "author": "Alexander Immer;Emanuele Palumbo;Alexander Marx;Julia E Vogt", "authorids": "~Alexander_Immer1;~Emanuele_Palumbo1;~Alexander_Marx1;~Julia_E_Vogt1", "gender": ";M;;F", "homepage": ";;http://a-marx.com;http://mds.inf.ethz.ch", "dblp": ";;;13/8412", "google_scholar": ";Y7VFjEpEmyoC;fJ9u_woAAAAJ;UoeV-8kAAAAJ", "orcid": ";;;", "linkedin": ";;;julia-vogt-50b53895", "or_profile": "~Alexander_Immer1;~Emanuele_Palumbo1;~Alexander_Marx1;~Julia_E_Vogt1", "aff": ";Department of Computer Science, ETHZ - ETH Zurich;ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": ";inf.ethz.ch;ethz.ch;ethz.ch", "position": ";PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nimmer2023effective,\ntitle={Effective Bayesian Heteroscedastic Regression with Deep Neural Networks},\nauthor={Alexander Immer and Emanuele Palumbo and Alexander Marx and Julia E Vogt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=A6EquH0enk}\n}", "github": "", "project": "", "reviewers": "7fHN;HwYi;PsPP;2cQi", "pdf_size": 2177675, "rating": "6;6;7;7", "confidence": "4;4;3;4", "soundness": "3;2;3;4", "novelty": "2;3;3;3", "presentation": "3;2;3;4", "wc_summary": "62;84;141;120", "wc_strengths": "39;111;63;51", "wc_weaknesses": "192;452;8;73", "wc_questions": "425;110;34;102", "wc_limitations": "31;4;8;17", "wc_review": "749;761;254;363", "wc_reply_reviewers": "108;44;13;12", "wc_reply_authors": "138;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 101.75, 30.695072894521687 ], "wc_strengths_avg": [ 66.0, 27.331300737432898 ], "wc_weaknesses_avg": [ 181.25, 169.6722944384262 ], "wc_questions_avg": [ 167.75, 151.43047084388266 ], "wc_limitations_avg": [ 15.0, 10.36822067666386 ], "wc_review_avg": [ 531.75, 226.59145504630135 ], "wc_reply_reviewers_avg": [ 44.25, 38.98958194184698 ], "wc_reply_authors_avg": [ 34.5, 59.75575286112627 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8773412477228751189&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": ";inf.ethz.ch;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Keep Various Trajectories: Promoting Exploration of Ensemble Policies in Continuous Control", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72558", "id": "A6JDQDv7Nt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/10cb15f4559b3d578b7f24966d48a137-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=A6JDQDv7Nt", "openreview": "https://openreview.net/forum?id=A6JDQDv7Nt", "poster": "/media/PosterPDFs/NeurIPS%202023/72558.png?t=1699873381.382525", "slides": "https://nips.cc/virtual/2023/poster/72558", "video": "https://nips.cc/virtual/2023/poster/72558", "author_site": "Chao Li, Chen GONG, Qiang He, Xinwen Hou", "tldr": "", "abstract": "The combination of deep reinforcement learning (DRL) with ensemble methods has been proved to be highly effective in addressing complex sequential decision-making problems. This success can be primarily attributed to the utilization of multiple models, which enhances both the robustness of the policy and the accuracy of value function estimation. However, there has been limited analysis of the empirical success of current ensemble RL methods thus far. Our new analysis reveals that the sample efficiency of previous ensemble DRL algorithms may be limited by sub-policies that are not as diverse as they could be. Motivated by these findings, our study introduces a new ensemble RL algorithm, termed \\textbf{T}rajectories-awar\\textbf{E} \\textbf{E}nsemble exploratio\\textbf{N} (TEEN). The primary goal of TEEN is to maximize the expected return while promoting more diverse trajectories. Through extensive experiments, we demonstrate that TEEN not only enhances the sample diversity of the ensemble policy compared to using sub-policies alone but also improves the performance over ensemble RL algorithms. On average, TEEN outperforms the baseline ensemble DRL algorithms by 41\\% in performance on the tested representative environments.", "keywords": "Reinforcement Learning;Ensemble Exploration;Control Tasks", "primary_area": "", "supplementary_material": "/attachment/38b61019a0e13167d1107d6c9868bb3d801140b6.pdf", "author": "Chao Li;Chen GONG;Qiang He;Xinwen Hou", "authorids": "~Chao_Li28;~Chen_GONG8;~Qiang_He1;~Xinwen_Hou2", "gender": "M;M;M;M", "homepage": ";https://2019chengong.github.io/;;https://people.ucas.ac.cn/~xwhou?language=en", "dblp": ";21/8587-5;;76/5119", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;l6Y2ZDYAAAAJ;WFsqZskAAAAJ", "orcid": "0000-0002-9371-498X;;;", "linkedin": ";;;", "or_profile": "~Chao_Li28;~Chen_GONG8;~Qiang_He1;~Xinwen_Hou2", "aff": "Chinese academic of science;Institute of automation, Chinese academy of science;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;", "aff_domain": "ia.ac.an;ia.ac.cn;uni-tuebingen.de;", "position": "MS student;MS student;PhD student;", "bibtex": "@inproceedings{\nli2023keep,\ntitle={Keep Various Trajectories: Promoting Exploration of Ensemble Policies in Continuous Control},\nauthor={Chao Li and Chen GONG and Qiang He and Xinwen Hou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=A6JDQDv7Nt}\n}", "github": "", "project": "", "reviewers": "gTUk;C6dk;vN15;aBo4", "pdf_size": 1492659, "rating": "5;5;6;6", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;2;3", "wc_summary": "99;171;172;87", "wc_strengths": "68;44;91;76", "wc_weaknesses": "32;131;242;386", "wc_questions": "286;174;6;72", "wc_limitations": "20;11;28;19", "wc_review": "505;531;539;640", "wc_reply_reviewers": "112;0;21;33", "wc_reply_authors": "371;103;18;28", "reply_reviewers": "2;0;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 132.25, 39.480216564755565 ], "wc_strengths_avg": [ 69.75, 17.005513811702368 ], "wc_weaknesses_avg": [ 197.75, 131.64796808154694 ], "wc_questions_avg": [ 134.5, 105.98466870260057 ], "wc_limitations_avg": [ 19.5, 6.020797289396148 ], "wc_review_avg": [ 553.75, 51.35842189943145 ], "wc_reply_reviewers_avg": [ 41.5, 42.38218965556169 ], "wc_reply_authors_avg": [ 130.0, 142.9667793580033 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5_tjACCe20kJ:scholar.google.com/&scioq=Keep+Various+Trajectories:+Promoting+Exploration+of+Ensemble+Policies+in+Continuous+Control&hl=en&as_sdt=0,14", "gs_version_total": 5, "email": "ia.ac.an;ia.ac.cn;uni-tuebingen.de;", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Chinese Academy of Sciences;Eberhard Karls University of T\u00fcbingen", "aff_unique_dep": ";", "aff_unique_url": "http://www.cas.cn;https://www.uni-tuebingen.de/", "aff_unique_abbr": "CAS;Uni T\u00fcbingen", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;Germany" }, { "title": "Generalized Semi-Supervised Learning via Self-Supervised Feature Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72557", "id": "A6PRwRjI8V", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bf145010b30dc5f14fa87dc152074e4d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=A6PRwRjI8V", "openreview": "https://openreview.net/forum?id=A6PRwRjI8V", "poster": "/media/PosterPDFs/NeurIPS%202023/72557.png?t=1701512464.2306387", "slides": "https://nips.cc/virtual/2023/poster/72557", "video": "https://nips.cc/virtual/2023/poster/72557", "author_site": "Jiachen Liang, RuiBing Hou, Hong Chang, Bingpeng MA, Shiguang Shan, Xilin Chen", "tldr": "", "abstract": "Traditional semi-supervised learning (SSL) assumes that the feature distributions of labeled and unlabeled data are consistent which rarely holds in realistic scenarios. \nIn this paper, we propose a novel SSL setting, where unlabeled samples are drawn from a mixed distribution that deviates from the feature distribution of labeled samples.\nUnder this setting, previous SSL methods tend to predict wrong pseudo-labels with the model fitted on labeled data, resulting in noise accumulation. To tackle this issue, we propose \\emph{Self-Supervised Feature Adaptation} (SSFA), a generic framework for improving SSL performance when labeled and unlabeled data come from different distributions. \nSSFA decouples the prediction of pseudo-labels from the current model to improve the quality of pseudo-labels. Particularly, SSFA incorporates a self-supervised task into the SSL framework and uses it to adapt the feature extractor of the model to the unlabeled data. In this way, the extracted features better fit the distribution of unlabeled data, thereby generating high-quality pseudo-labels. Extensive experiments show that our proposed SSFA is applicable to various pseudo-label-based SSL learners and significantly improves performance in labeled, unlabeled, and even unseen distributions.", "keywords": "semi-supervised learning;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/f837c24d32f834fd2f94382190f3dffae691207b.zip", "author": "Jiachen Liang;RuiBing Hou;Hong Chang;Bingpeng Ma;Shiguang Shan;Xilin CHEN", "authorids": "~Jiachen_Liang2;~RuiBing_Hou1;~Hong_Chang1;~Bingpeng_Ma1;~Shiguang_Shan2;~Xilin_CHEN2", "gender": "F;F;M;M;F;M", "homepage": "https://scholar.google.com.tw/citations?hl=zh-CN&view_op=list_works&gmla=AJsN-F4WR9JNhX176XHhFNyY-uOoxAkSKlLh3wyOGdmZz2O2DsHREcsE__DkYGPfqQhe6mOj1V68pipS94MgujusUy5PRDZ1ZgpejCjdktRkQtfpffMKWW0&user=VfS4cisAAAAJ;;http://people.ucas.edu.cn/~bpma;http://vipl.ict.ac.cn/people/sgshan/;http://vipl.ict.ac.cn/edu/student/doctoral/202211/t20221118_123501.html;http://vipl.ict.ac.cn/people/_xlchen/", "dblp": ";;62/1822;s/ShiguangShan;;c/XilinChen", "google_scholar": ";LX6MnNsAAAAJ;;https://scholar.google.com.tw/citations?user=Vkzd7MIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;vVx2v20AAAAJ", "orcid": ";;0000-0001-8984-205X;0000-0002-8348-392X;;0000-0003-3024-4404", "linkedin": ";;;;;", "or_profile": "~RuiBing_Hou1;~Hong_Chang1;~Bingpeng_Ma1;~Shiguang_Shan2;~Jc_Liang1;~Xilin_Chen4", "aff": " Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;, Chinese Academy of Sciences;Institute of Computing Technology", "aff_domain": "ict.ac.cn;ict.ac.cn;ucas.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "Assistant Professor;Full Professor;Full Professor;Full Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nliang2023generalized,\ntitle={Generalized Semi-Supervised Learning via Self-Supervised Feature Adaptation},\nauthor={Jiachen Liang and RuiBing Hou and Hong Chang and Bingpeng Ma and Shiguang Shan and Xilin CHEN},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=A6PRwRjI8V}\n}", "github": "", "project": "", "reviewers": "Pxab;WadC;LyxH;epDa;wRdb", "pdf_size": 2988076, "rating": "5;5;5;6;6", "confidence": "3;4;4;4;4", "soundness": "3;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "3;4;4;3;2", "wc_summary": "117;85;72;91;136", "wc_strengths": "31;39;58;54;112", "wc_weaknesses": "269;99;337;58;88", "wc_questions": "30;156;2;88;191", "wc_limitations": "40;9;1;55;29", "wc_review": "487;388;470;346;556", "wc_reply_reviewers": "126;87;0;0;51", "wc_reply_authors": "435;201;0;0;0", "reply_reviewers": "1;1;0;0;1", "reply_authors": "3;2;1;1;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 100.2, 23.12920232087566 ], "wc_strengths_avg": [ 58.8, 28.350661367946955 ], "wc_weaknesses_avg": [ 170.2, 111.35420961957388 ], "wc_questions_avg": [ 93.4, 71.89881779278433 ], "wc_limitations_avg": [ 26.8, 19.782820830205182 ], "wc_review_avg": [ 449.4, 74.38171818397313 ], "wc_reply_reviewers_avg": [ 52.8, 49.20731652915042 ], "wc_reply_authors_avg": [ 127.2, 172.46843189407159 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.40824829046386313, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12421846804494240375&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ict.ac.cn;ict.ac.cn;ucas.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;0;2", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Computing Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cas.cn;http://www.ucas.ac.cn;http://www.ict.ac.cn", "aff_unique_abbr": "CAS;UCAS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "One-2-3-45: Any Single Image to 3D Mesh in 45 Seconds without Per-Shape Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72556", "id": "A6X9y8n4sT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4683beb6bab325650db13afd05d1a14a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=A6X9y8n4sT", "openreview": "https://openreview.net/forum?id=A6X9y8n4sT", "poster": "/media/PosterPDFs/NeurIPS%202023/72556.png?t=1701828060.513501", "slides": "https://nips.cc/virtual/2023/poster/72556", "video": "https://nips.cc/virtual/2023/poster/72556", "author_site": "Minghua Liu, Chao Xu, Haian Jin, Linghao Chen, Mukund Varma T, Zexiang Xu, Hao Su", "tldr": "", "abstract": "Single image 3D reconstruction is an important but challenging task that requires extensive knowledge of our natural world. Many existing methods solve this problem by optimizing a neural radiance field under the guidance of 2D diffusion models but suffer from lengthy optimization time, 3D inconsistency results, and poor geometry. In this work, we propose a novel method that takes a single image of any object as input and generates a full 360-degree 3D textured mesh in a single feed-forward pass. Given a single image, we first use a view-conditioned 2D diffusion model, Zero123, to generate multi-view images for the input view, and then aim to lift them up to 3D space. Since traditional reconstruction methods struggle with inconsistent multi-view predictions, we build our 3D reconstruction module upon an SDF-based generalizable neural surface reconstruction method and propose several critical training strategies to enable the reconstruction of 360-degree meshes. Without costly optimizations, our method reconstructs 3D shapes in significantly less time than existing methods. Moreover, our method favors better geometry, generates more 3D consistent results, and adheres more closely to the input image. We evaluate our approach on both synthetic data and in-the-wild images and demonstrate its superiority in terms of both mesh quality and runtime. In addition, our approach can seamlessly support the text-to-3D task by integrating with off-the-shelf text-to-image diffusion models.", "keywords": "single image reconstruction;3d generation;mesh reconstruction;diffusion models", "primary_area": "", "supplementary_material": "/attachment/be81ab29ed43adc296aa229a381f757c1a0e5190.zip", "author": "Minghua Liu;Chao Xu;Haian Jin;Linghao Chen;Mukund Varma T;Zexiang Xu;Hao Su", "authorids": "~Minghua_Liu1;~Chao_Xu6;~Haian_Jin1;~Linghao_Chen2;~Mukund_Varma_T1;~Zexiang_Xu1;~Hao_Su1", "gender": "M;M;M;;M;M;M", "homepage": "https://cseweb.ucsd.edu//~mil070/;https://chaoxu.xyz;https://haian-jin.github.io/;https://ootts.github.io/;;https://cseweb.ucsd.edu/~zex014/;http://ai.ucsd.edu/~haosu", "dblp": "28/8907;79/1442-16;345/8396;262/3716;;154/0366;09/4945-1", "google_scholar": "6U3IGtEAAAAJ;9Az3LhwAAAAJ;VZvmpKoAAAAJ;;;_RRIYvEAAAAJ;1P8Zu04AAAAJ", "orcid": ";0009-0001-0574-5357;;;;;", "linkedin": ";chaoxu/;;;mukundvarmat/;;", "or_profile": "~Minghua_Liu1;~Chao_Xu6;~Haian_Jin1;~Linghao_Chen2;~Mukund_Varma_T1;~Zexiang_Xu1;~Hao_Su1", "aff": "University of California, San Diego;University of California, Los Angeles;Zhejiang University;Zhejiang University;Indian Institute of Technology Madras;Adobe Research;University of California, San Diego", "aff_domain": "ucsd.edu;ucla.edu;zju.edu.cn;zju.edu.cn;iitm.ac.in;adobe.com;ucsd.edu", "position": "PhD student;PhD student;Undergrad student;PhD student;Undergrad student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nliu2023one,\ntitle={One-2-3-45: Any Single Image to 3D Mesh in 45 Seconds without Per-Shape Optimization},\nauthor={Minghua Liu and Chao Xu and Haian Jin and Linghao Chen and Mukund Varma T and Zexiang Xu and Hao Su},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=A6X9y8n4sT}\n}", "github": "", "project": "", "reviewers": "yirP;eL63;AK29;L2hY;Ph7H", "pdf_size": 27518710, "rating": "4;5;5;6;6", "confidence": "4;4;4;5;5", "soundness": "2;3;3;3;3", "novelty": "3;2;2;3;3", "presentation": "2;3;3;3;2", "wc_summary": "128;81;117;116;65", "wc_strengths": "121;121;80;103;58", "wc_weaknesses": "232;238;110;187;235", "wc_questions": "110;5;25;106;4", "wc_limitations": "4;6;26;31;34", "wc_review": "595;451;358;543;396", "wc_reply_reviewers": "0;101;16;80;36", "wc_reply_authors": "0;365;8;23;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;5;2;2;1", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 101.4, 24.1047713119208 ], "wc_strengths_avg": [ 96.6, 24.483463807231196 ], "wc_weaknesses_avg": [ 200.4, 48.91053056346864 ], "wc_questions_avg": [ 50.0, 47.96248534010722 ], "wc_limitations_avg": [ 20.2, 12.687001221722966 ], "wc_review_avg": [ 468.6, 88.68506074869656 ], "wc_reply_reviewers_avg": [ 46.6, 38.19738210924932 ], "wc_reply_authors_avg": [ 79.2, 143.14663810233196 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 1.469693845669907 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8728715609439693, "gs_citation": 412, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15686398576005123770&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "ucsd.edu;ucla.edu;zju.edu.cn;zju.edu.cn;iitm.ac.in;adobe.com;ucsd.edu", "author_num": 7, "aff_unique_index": "0;1;2;2;3;4;0", "aff_unique_norm": "University of California, San Diego;University of California, Los Angeles;Zhejiang University;Indian Institute of Technology Madras;Adobe", "aff_unique_dep": ";;;;Adobe Research", "aff_unique_url": "https://www.ucsd.edu;https://www.ucla.edu;https://www.zju.edu.cn;https://www.iitm.ac.in;https://research.adobe.com", "aff_unique_abbr": "UCSD;UCLA;ZJU;IIT Madras;Adobe", "aff_campus_unique_index": "0;1;3;0", "aff_campus_unique": "San Diego;Los Angeles;;Madras", "aff_country_unique_index": "0;0;1;1;2;0;0", "aff_country_unique": "United States;China;India" }, { "title": "On Convergence of Polynomial Approximations to the Gaussian Mixture Entropy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72555", "id": "A7ESFTMJWs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ee860a9fa65a55a335754c557a5211de-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=A7ESFTMJWs", "openreview": "https://openreview.net/forum?id=A7ESFTMJWs", "poster": "/media/PosterPDFs/NeurIPS%202023/72555.png?t=1701893724.5036314", "slides": "https://nips.cc/virtual/2023/poster/72555", "video": "https://nips.cc/virtual/2023/poster/72555", "author_site": "Caleb Dahlke, Jason Pacheco", "tldr": "", "abstract": "Gaussian mixture models (GMMs) are fundamental to machine learning due to their flexibility as approximating densities. However, uncertainty quantification of GMMs remains a challenge as differential entropy lacks a closed form. This paper explores polynomial approximations, specifically Taylor and Legendre, to the GMM entropy from a theoretical and practical perspective. We provide new analysis of a widely used approach due to Huber et al.(2008) and show that the series diverges under simple conditions. Motivated by this divergence we provide a novel Taylor series that is provably convergent to the true entropy of any GMM. We demonstrate a method for selecting a center such that the series converges from below, providing a lower bound on GMM entropy. Furthermore, we demonstrate that orthogonal polynomial series result in more accurate polynomial approximations. Experimental validation supports our theoretical results while showing that our method is comparable in computation to Huber et al. We also show that in application, the use of these polynomial approximations, such as in Nonparametric Variational Inference by Gershamn et al. (2012), rely on the convergence of the methods in computing accurate approximations. This work contributes useful analysis to existing methods while introducing a novel approximation supported by firm theoretical guarantees.", "keywords": "entropy;Gaussian mixture model;uncertainty quantification;approximate inference", "primary_area": "", "supplementary_material": "/attachment/e465272f4af8e79370ff7f5dce6ab244c5b88641.zip", "author": "Caleb Dahlke;Jason Pacheco", "authorids": "~Caleb_Dahlke1;~Jason_Pacheco1", "gender": "M;M", "homepage": "https://sites.google.com/view/calebdahlke;http://www.pachecoj.com", "dblp": ";126/1745", "google_scholar": ";71ZEsnEAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Caleb_Dahlke1;~Jason_Pacheco1", "aff": "University of Arizona;University of Arizona", "aff_domain": "arizona.edu;arizona.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\ndahlke2023on,\ntitle={On Convergence of Polynomial Approximations to the Gaussian Mixture Entropy},\nauthor={Caleb Dahlke and Jason Pacheco},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=A7ESFTMJWs}\n}", "github": "", "project": "", "reviewers": "BrW6;2uqK;HKUU;gUEj", "pdf_size": 991247, "rating": "4;6;7;7", "confidence": "5;3;3;4", "soundness": "3;3;4;4", "novelty": "3;2;4;3", "presentation": "2;3;4;4", "wc_summary": "48;32;85;49", "wc_strengths": "12;78;66;42", "wc_weaknesses": "59;82;43;64", "wc_questions": "184;25;65;2", "wc_limitations": "1;1;58;5", "wc_review": "304;218;317;162", "wc_reply_reviewers": "0;11;127;23", "wc_reply_authors": "0;0;190;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 53.5, 19.397164741270824 ], "wc_strengths_avg": [ 49.5, 25.233905761891084 ], "wc_weaknesses_avg": [ 62.0, 13.910427743243556 ], "wc_questions_avg": [ 69.0, 70.11775809308224 ], "wc_limitations_avg": [ 16.25, 24.159625411003375 ], "wc_review_avg": [ 250.25, 63.58606372468735 ], "wc_reply_reviewers_avg": [ 40.25, 50.74137857804023 ], "wc_reply_authors_avg": [ 47.5, 82.27241335952168 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7385489458759963, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8655393479725103353&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "arizona.edu;arizona.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Arizona", "aff_unique_dep": "", "aff_unique_url": "https://www.arizona.edu", "aff_unique_abbr": "UA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Image Captioners Are Scalable Vision Learners Too", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72554", "id": "A7feCufBhL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/92369a01fbe8046a093746389b2c413e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=A7feCufBhL", "openreview": "https://openreview.net/forum?id=A7feCufBhL", "poster": "/media/PosterPDFs/NeurIPS%202023/72554.png?t=1701975449.7301457", "slides": "https://nips.cc/virtual/2023/poster/72554", "video": "https://nips.cc/virtual/2023/poster/72554", "author_site": "Michael Tschannen, Manoj Kumar, Andreas Steiner, Andreas Steiner, Xiaohua Zhai, Neil Houlsby, Lucas Beyer", "tldr": "", "abstract": "Contrastive pretraining on image-text pairs from the web is one of the most popular large-scale pretraining strategies for vision backbones, especially in the context of large multimodal models. At the same time, image captioning on this type of data is commonly considered an inferior pretraining strategy. In this paper, we perform a fair comparison of these two pretraining strategies, carefully matching training data, compute, and model capacity. Using a standard encoder-decoder transformer, we find that captioning alone is surprisingly effective: on classification tasks, captioning produces vision encoders competitive with contrastively pretrained encoders, while surpassing them on vision & language tasks. We further analyze the effect of the model architecture and scale, as well as the pretraining data on the representation quality, and find that captioning exhibits the same or better scaling behavior along these axes. Overall our results show that plain image captioning is a more powerful pretraining strategy than was previously believed. Code is available at [https://github.com/google-research/big_vision](https://github.com/google-research/big_vision).", "keywords": "contrastive learning;CLIP;CapPa;Cap;vision-language;image captioning;visual representation learning;weakly supervised learning;VLM;multimodal learning;VQA;image classification", "primary_area": "", "supplementary_material": "/attachment/da008bd7c6976bdc28ac59a1817733bf62cb3e84.pdf", "author": "Michael Tschannen;Manoj Kumar;Andreas Peter Steiner;Xiaohua Zhai;Neil Houlsby;Lucas Beyer", "authorids": "~Michael_Tschannen1;~Manoj_Kumar1;~Andreas_Peter_Steiner1;~Xiaohua_Zhai2;~Neil_Houlsby1;~Lucas_Beyer1", "gender": ";;M;;M;", "homepage": "https://mitscha.github.io/;https://mechcoder.github.io/;;;https://neilhoulsby.github.io/;http://lucasb.eyer.be", "dblp": "134/9824;;s/AndreasSteiner;66/636;91/10669;126/4720", "google_scholar": "https://scholar.google.ch/citations?user=TSj_8nYAAAAJ;https://scholar.google.nl/citations?user=XQJN7dsAAAAJ;;;https://scholar.google.com/citations?hl=en;p2gwhK4AAAAJ", "orcid": ";;;;;", "linkedin": ";;andreas-steiner-1859223b/;;;", "or_profile": "~Michael_Tschannen1;~Manoj_Kumar1;~Andreas_Peter_Steiner1;~Xiaohua_Zhai2;~Neil_Houlsby1;~Lucas_Beyer1", "aff": "Google DeepMind;Google;Google DeepMind;Google Brain;Google;Google Brain", "aff_domain": "google.com;google.com;deepmind.com;google.com;google.com;google.com", "position": "Researcher;Research Engineer;Research Engineer;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\ntschannen2023image,\ntitle={Image Captioners Are Scalable Vision Learners Too},\nauthor={Michael Tschannen and Manoj Kumar and Andreas Peter Steiner and Xiaohua Zhai and Neil Houlsby and Lucas Beyer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=A7feCufBhL}\n}", "github": "", "project": "", "reviewers": "9JDL;mqQw;J9Jb;G8WT", "pdf_size": 5103552, "rating": "6;8;8;9", "confidence": "4;5;5;5", "soundness": "4;4;4;4", "novelty": "3;3;3;4", "presentation": "2;4;4;4", "wc_summary": "142;71;72;42", "wc_strengths": "52;114;455;63", "wc_weaknesses": "93;76;268;49", "wc_questions": "2;56;94;15", "wc_limitations": "13;7;25;5", "wc_review": "302;324;914;174", "wc_reply_reviewers": "70;28;492;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.75, 1.0897247358851685 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 81.75, 36.81287139031673 ], "wc_strengths_avg": [ 171.0, 165.62759431930417 ], "wc_weaknesses_avg": [ 121.5, 86.024705753638 ], "wc_questions_avg": [ 41.75, 36.15504805694496 ], "wc_limitations_avg": [ 12.5, 7.794228634059948 ], "wc_review_avg": [ 428.5, 286.0957007716124 ], "wc_reply_reviewers_avg": [ 150.25, 198.4746520339562 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6161736499934320443&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "google.com;google.com;deepmind.com;google.com;google.com;google.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "On the Importance of Feature Separability in Predicting Out-Of-Distribution Error", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72553", "id": "A86JTXllHa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/585e9cf25585612ac27b535457116513-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=A86JTXllHa", "openreview": "https://openreview.net/forum?id=A86JTXllHa", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72553", "video": "https://nips.cc/virtual/2023/poster/72553", "author_site": "RENCHUNZI XIE, Hongxin Wei, Lei Feng, Yuzhou Cao, Bo An", "tldr": "", "abstract": "Estimating the generalization performance is practically challenging on out-of-distribution (OOD) data without ground-truth labels. While previous methods emphasize the connection between distribution difference and OOD accuracy, we show that a large domain gap not necessarily leads to a low test accuracy. In this paper, we investigate this problem from the perspective of feature separability empirically and theoretically. Specifically, we propose a dataset-level score based upon feature dispersion to estimate the test accuracy under distribution shift. Our method is inspired by desirable properties of features in representation learning: high inter-class dispersion and high intra-class compactness. Our analysis shows that inter-class dispersion is strongly correlated with the model accuracy, while intra-class compactness does not reflect the generalization performance on OOD data. Extensive experiments demonstrate the superiority of our method in both prediction performance and computational efficiency.", "keywords": "Machine Learning;Uncertainty Estimation", "primary_area": "", "supplementary_material": "/attachment/0b89d55560fc02f719dab29d334e4972bffd8c10.pdf", "author": "RENCHUNZI XIE;Hongxin Wei;Lei Feng;Yuzhou Cao;Bo An", "authorids": "~RENCHUNZI_XIE1;~Hongxin_Wei1;~Lei_Feng1;~Yuzhou_Cao1;~Bo_An2", "gender": ";M;M;M;M", "homepage": ";https://hongxin001.github.io/;https://lfeng1995.github.io/;https://yzcao-nkg.github.io/;https://personal.ntu.edu.sg/boan/", "dblp": ";150/6350;76/847-6;256/5052;42/6178-1.html", "google_scholar": ";cABH034AAAAJ;https://scholar.google.com.sg/citations?user=KomQOFkAAAAJ;https://scholar.google.com/citations?hl=zh-CN;PEEpuNwAAAAJ", "orcid": ";;0000-0003-2839-5799;;0000-0002-7064-7438", "linkedin": ";;;;", "or_profile": "~RENCHUNZI_XIE1;~Hongxin_Wei1;~Lei_Feng1;~Yuzhou_Cao1;~Bo_An2", "aff": ";Southern University of Science and Technology;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": ";sustech.edu.cn;ntu.edu.sg;ntu.edu;ntu.edu.sg", "position": ";Assistant Professor;Visiting Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nxie2023on,\ntitle={On the Importance of Feature Separability in Predicting Out-Of-Distribution Error},\nauthor={RENCHUNZI XIE and Hongxin Wei and Lei Feng and Yuzhou Cao and Bo An},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=A86JTXllHa}\n}", "github": "", "project": "", "reviewers": "1A8L;ALm9;HxyT;BweV", "pdf_size": 2802662, "rating": "5;6;7;8", "confidence": "5;4;4;5", "soundness": "2;2;4;4", "novelty": "2;2;3;4", "presentation": "2;2;4;3", "wc_summary": "76;58;127;118", "wc_strengths": "56;76;211;253", "wc_weaknesses": "226;188;39;76", "wc_questions": "70;5;187;2", "wc_limitations": "223;136;10;39", "wc_review": "651;463;574;488", "wc_reply_reviewers": "91;50;208;36", "wc_reply_authors": "284;31;160;20", "reply_reviewers": "1;1;2;1", "reply_authors": "3;2;3;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 94.75, 28.647643882176418 ], "wc_strengths_avg": [ 149.0, 84.61382865702272 ], "wc_weaknesses_avg": [ 132.25, 77.0661242051266 ], "wc_questions_avg": [ 66.0, 74.95665414091 ], "wc_limitations_avg": [ 102.0, 84.00892809695884 ], "wc_review_avg": [ 544.0, 74.23947736885006 ], "wc_reply_reviewers_avg": [ 96.25, 67.61055760752163 ], "wc_reply_authors_avg": [ 123.75, 107.65773311750532 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12905750819695300945&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 7, "email": ";sustech.edu.cn;ntu.edu.sg;ntu.edu;ntu.edu.sg", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Southern University of Science and Technology;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sustech.edu.cn;https://www.ntu.edu.sg", "aff_unique_abbr": "SUSTech;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "China;Singapore" }, { "title": "AGD: an Auto-switchable Optimizer using Stepwise Gradient Difference for Preconditioning Matrix", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72552", "id": "A954O4tDmU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8f9d459c19b59b5400ce396e0f8c23e0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=A954O4tDmU", "openreview": "https://openreview.net/forum?id=A954O4tDmU", "poster": "/media/PosterPDFs/NeurIPS%202023/72552.png?t=1701765765.2338853", "slides": "https://nips.cc/virtual/2023/poster/72552", "video": "https://nips.cc/virtual/2023/poster/72552", "author_site": "Yun Yue, Zhiling Ye, Jiadi Jiang, Yongchao Liu, Ke Zhang", "tldr": "", "abstract": "Adaptive optimizers, such as Adam, have achieved remarkable success in deep learning. A key component of these optimizers is the so-called preconditioning matrix, providing enhanced gradient information and regulating the step size of each gradient direction. In this paper, we propose a novel approach to designing the preconditioning matrix by utilizing the gradient difference between two successive steps as the diagonal elements. These diagonal elements are closely related to the Hessian and can be perceived as an approximation of the inner product between the Hessian row vectors and difference of the adjacent parameter vectors. Additionally, we introduce an auto-switching function that enables the preconditioning matrix to switch dynamically between Stochastic Gradient Descent (SGD) and the adaptive optimizer. Based on these two techniques, we develop a new optimizer named AGD that enhances the generalization performance. We evaluate AGD on public datasets of Natural Language Processing (NLP), Computer Vision (CV), and Recommendation Systems (RecSys). Our experimental results demonstrate that AGD outperforms the state-of-the-art (SOTA) optimizers, achieving highly competitive or significantly better predictive performance. Furthermore, we analyze how AGD is able to switch automatically between SGD and the adaptive optimizer and its actual effects on various scenarios. The code is available at https://github.com/intelligent-machine-learning/dlrover/tree/master/atorch/atorch/optimizers.", "keywords": "adaptive optimizer;gradient difference;auto switch;AGD", "primary_area": "", "supplementary_material": "/attachment/90c7789d7785f8cb579ccb8a7f778a3a6d02987c.zip", "author": "Yun Yue;Zhiling Ye;Jiadi Jiang;Yongchao Liu;Ke Zhang", "authorids": "~Yun_Yue3;~Zhiling_Ye1;~Jiadi_Jiang1;~Yongchao_Liu2;~Ke_Zhang15", "gender": "M;M;M;M;M", "homepage": "http://weibo.com/u/2359452820?topnav=1&wvr=6&topsug=1&is_all=1;https://yzlnew.com;https://github.com/jiangjiadi;https://yongchao-liu.github.io;", "dblp": "260/4251;;348/6197;29/3462;", "google_scholar": ";;cUB6B9gAAAAJ;qYQHl4sAAAAJ;", "orcid": ";;0000-0002-5998-0037;0000-0003-3440-9675;", "linkedin": ";;;yongchaoliu;ke-zhang-8286642b/", "or_profile": "~Yun_Yue3;~Zhiling_Ye1;~Jiadi_Jiang1;~Yongchao_Liu2;~Ke_Zhang15", "aff": "antgroup;Ant Group;Ant Group;Ant Group;", "aff_domain": "antgroup.com;antgroup.com;antgroup.com;antgroup.com;", "position": "Algorithm engineer;Engineer;Algorithm Engineer;Researcher;", "bibtex": "@inproceedings{\nyue2023agd,\ntitle={{AGD}: an Auto-switchable Optimizer using Stepwise Gradient Difference for Preconditioning Matrix},\nauthor={Yun Yue and Zhiling Ye and Jiadi Jiang and Yongchao Liu and Ke Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=A954O4tDmU}\n}", "github": "", "project": "", "reviewers": "ns5n;1Tit;BE2A;Ag5C;xqym", "pdf_size": 7091239, "rating": "3;5;7;7;7", "confidence": "4;3;2;3;4", "soundness": "2;2;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;2;3;3;3", "wc_summary": "85;72;71;127;74", "wc_strengths": "77;16;19;82;66", "wc_weaknesses": "406;51;15;85;144", "wc_questions": "23;28;129;4;13", "wc_limitations": "26;9;1;8;53", "wc_review": "617;176;235;306;350", "wc_reply_reviewers": "1193;0;4;0;0", "wc_reply_authors": "650;0;0;0;0", "reply_reviewers": "3;0;1;0;0", "reply_authors": "4;1;1;1;1", "rating_avg": [ 5.8, 1.6 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 85.8, 21.198113123577766 ], "wc_strengths_avg": [ 52.0, 28.65658737533135 ], "wc_weaknesses_avg": [ 140.2, 139.5297817671912 ], "wc_questions_avg": [ 39.4, 45.55699726715974 ], "wc_limitations_avg": [ 19.4, 18.7040102651811 ], "wc_review_avg": [ 336.8, 152.22010379710034 ], "wc_reply_reviewers_avg": [ 239.4, 476.8025167718812 ], "wc_reply_authors_avg": [ 130.0, 260.0 ], "reply_reviewers_avg": [ 0.8, 1.1661903789690602 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.46770717334674267, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13154517506277511115&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "antgroup.com;antgroup.com;antgroup.com;antgroup.com;", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Ant Group", "aff_unique_dep": "", "aff_unique_url": "https://www.antgroup.com", "aff_unique_abbr": "Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "NAS-X: Neural Adaptive Smoothing via Twisting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72551", "id": "A9mHph8GJk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1b3d005a2cb0e71e698e0b13ac657473-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=A9mHph8GJk", "openreview": "https://openreview.net/forum?id=A9mHph8GJk", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72551", "video": "https://nips.cc/virtual/2023/poster/72551", "author_site": "Dieterich Lawson, Michael Li, Scott Linderman", "tldr": "", "abstract": "Sequential latent variable models (SLVMs) are essential tools in statistics and machine learning, with applications ranging from healthcare to neuroscience. As their flexibility increases, analytic inference and model learning can become challenging, necessitating approximate methods. Here we introduce neural adaptive smoothing via twisting (NAS-X), a method that extends reweighted wake-sleep (RWS) to the sequential setting by using smoothing sequential Monte Carlo (SMC) to estimate intractable posterior expectations. Combining RWS and smoothing SMC allows NAS-X to provide low-bias and low-variance gradient estimates, and fit both discrete and continuous latent variable models. We illustrate the theoretical advantages of NAS-X over previous methods and explore these advantages empirically in a variety of tasks, including a challenging application to mechanistic models of neuronal dynamics. These experiments show that NAS-X substantially outperforms previous VI- and RWS-based methods in inference and model learning, achieving lower parameter error and tighter likelihood bounds.", "keywords": "sequence models;probabilistic inference;reweighted wake-sleep;sequential monte carlo;smoothing;mechanistic models", "primary_area": "", "supplementary_material": "/attachment/d8a12c44e48ffb01cb3d494173946a1bacb2eabf.pdf", "author": "Dieterich Lawson;Michael Y. Li;Scott Linderman", "authorids": "~Dieterich_Lawson1;~Michael_Y._Li1;~Scott_Linderman1", "gender": "M;;M", "homepage": ";https://michaelyli.github.io/;https://web.stanford.edu/~swl1/", "dblp": ";40/2032;142/2484", "google_scholar": "8xSYX9IAAAAJ;;6mD3I24AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Dieterich_Lawson1;~Michael_Yifan_Li1;~Scott_W_Linderman1", "aff": ";Stanford University;Stanford University", "aff_domain": ";stanford.edu;stanford.edu", "position": ";PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlawson2023nasx,\ntitle={{NAS}-X: Neural Adaptive Smoothing via Twisting},\nauthor={Dieterich Lawson and Michael Y. Li and Scott Linderman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=A9mHph8GJk}\n}", "github": "", "project": "", "reviewers": "nTEg;mkCV;DvQm;Dwhv", "pdf_size": 1628933, "rating": "5;6;6;6", "confidence": "4;4;3;2", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "127;91;91;20", "wc_strengths": "147;207;33;45", "wc_weaknesses": "196;255;13;35", "wc_questions": "186;123;237;23", "wc_limitations": "16;50;17;1", "wc_review": "672;726;391;124", "wc_reply_reviewers": "199;91;12;27", "wc_reply_authors": "356;105;20;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.25, 38.82895182721264 ], "wc_strengths_avg": [ 108.0, 72.3118247591637 ], "wc_weaknesses_avg": [ 124.75, 103.18036392647585 ], "wc_questions_avg": [ 142.25, 79.81658662208 ], "wc_limitations_avg": [ 21.0, 17.902513789968157 ], "wc_review_avg": [ 478.25, 240.8447373309203 ], "wc_reply_reviewers_avg": [ 82.25, 73.64568894375284 ], "wc_reply_authors_avg": [ 120.25, 141.7045782605488 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5868492613155042637&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 7, "email": ";stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Universal Online Learning with Gradient Variations: A Multi-layer Online Ensemble Approach", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72550", "id": "AA1xrgAP5z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/76818d8d85e05e45ce3a16a8468619d1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AA1xrgAP5z", "openreview": "https://openreview.net/forum?id=AA1xrgAP5z", "poster": "/media/PosterPDFs/NeurIPS%202023/72550.png?t=1702005720.6787348", "slides": "https://nips.cc/virtual/2023/poster/72550", "video": "https://nips.cc/virtual/2023/poster/72550", "author_site": "Yu-Hu Yan, Peng Zhao, Zhi-Hua Zhou", "tldr": "", "abstract": "In this paper, we propose an online convex optimization approach with two different levels of adaptivity. On a higher level, our approach is agnostic to the unknown types and curvatures of the online functions, while at a lower level, it can exploit the unknown niceness of the environments and attain problem-dependent guarantees. Specifically, we obtain $\\mathcal{O}(\\log V_T)$, $\\mathcal{O}(d \\log V_T)$ and $\\hat{\\mathcal{O}}(\\sqrt{V_T})$ regret bounds for strongly convex, exp-concave and convex loss functions, respectively, where $d$ is the dimension, $V_T$ denotes problem-dependent gradient variations and the $\\hat{\\mathcal{O}}(\\cdot)$-notation omits $\\log V_T$ factors. Our result not only safeguards the worst-case guarantees but also directly implies the small-loss bounds in analysis. Moreover, when applied to adversarial/stochastic convex optimization and game theory problems, our result enhances the existing universal guarantees. Our approach is based on a multi-layer online ensemble framework incorporating novel ingredients, including a carefully designed optimism for unifying diverse function types and cascaded corrections for algorithmic stability. Notably, despite its multi-layer structure, our algorithm necessitates only one gradient query per round, making it favorable when the gradient evaluation is time-consuming. This is facilitated by a novel regret decomposition equipped with carefully designed surrogate losses.", "keywords": "online learning", "primary_area": "", "supplementary_material": "", "author": "Yu-Hu Yan;Peng Zhao;Zhi-Hua Zhou", "authorids": "~Yu-Hu_Yan1;~Peng_Zhao1;~Zhi-Hua_Zhou2", "gender": "M;;", "homepage": "https://www.lamda.nju.edu.cn/yanyh;;", "dblp": "271/0054;;", "google_scholar": "NdaoylQAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yu-Hu_Yan1;~Peng_Zhao1;~Zhi-Hua_Zhou2", "aff": "Nanjing University;;", "aff_domain": "nju.edu.cn;;", "position": "PhD student;;", "bibtex": "@inproceedings{\nyan2023universal,\ntitle={Universal Online Learning with Gradient Variations: A Multi-layer Online Ensemble Approach},\nauthor={Yu-Hu Yan and Peng Zhao and Zhi-Hua Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AA1xrgAP5z}\n}", "github": "", "project": "", "reviewers": "ZhYs;x4hL;FwDP;wYJC;rs9j;osvq", "pdf_size": 845386, "rating": "6;6;6;7;8;9", "confidence": "3;3;4;4;4;3", "soundness": "3;4;2;3;4;4", "novelty": "2;2;2;3;4;4", "presentation": "1;4;3;2;4;4", "wc_summary": "390;68;106;84;230;47", "wc_strengths": "41;46;40;27;215;36", "wc_weaknesses": "808;67;71;214;69;19", "wc_questions": "629;9;24;13;14;2", "wc_limitations": "123;15;6;25;1;1", "wc_review": "1991;205;247;363;529;105", "wc_reply_reviewers": "230;26;0;70;16;0", "wc_reply_authors": "1481;0;56;76;0;0", "reply_reviewers": "1;1;0;1;1;0", "reply_authors": "3;1;2;2;1;1", "rating_avg": [ 7.0, 1.1547005383792515 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.3333333333333335, 0.7453559924999298 ], "novelty_avg": [ 2.8333333333333335, 0.8975274678557507 ], "presentation_avg": [ 3.0, 1.1547005383792515 ], "wc_summary_avg": [ 154.16666666666666, 120.77584839509741 ], "wc_strengths_avg": [ 67.5, 66.21870329949589 ], "wc_weaknesses_avg": [ 208.0, 274.99333325252326 ], "wc_questions_avg": [ 115.16666666666667, 229.88650581440305 ], "wc_limitations_avg": [ 28.5, 43.09582346353298 ], "wc_review_avg": [ 573.3333333333334, 647.7503805908225 ], "wc_reply_reviewers_avg": [ 57.0, 80.86820553633342 ], "wc_reply_authors_avg": [ 268.8333333333333, 542.9310627408316 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.74535599249993 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17765133853898752997&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "nju.edu.cn;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Estimating Generic 3D Room Structures from 2D Annotations", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73672", "id": "AA2uO0HHmr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/76bf913ad349686b2aa552a1c6ee0a2e-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=AA2uO0HHmr", "openreview": "https://openreview.net/forum?id=AA2uO0HHmr", "poster": "/media/PosterPDFs/NeurIPS%202023/73672.png?t=1702282280.8866756", "slides": "https://nips.cc/virtual/2023/poster/73672", "video": "https://nips.cc/virtual/2023/poster/73672", "author_site": "Denys Rozumnyi, Stefan Popov, Kevis-kokitsi Maninis, Matthias Niessner, Vittorio Ferrari", "tldr": "", "abstract": "Indoor rooms are among the most common use cases in 3D scene understanding. Current state-of-the-art methods for this task are driven by large annotated datasets. Room layouts are especially important, consisting of structural elements in 3D, such as wall, floor, and ceiling. However, they are difficult to annotate, especially on pure RGB video. We propose a novel method to produce generic 3D room layouts just from 2D segmentation masks, which are easy to annotate for humans. Based on these 2D annotations, we automatically reconstruct 3D plane equations for the structural elements and their spatial extent in the scene, and connect adjacent elements at the appropriate contact edges. We annotate and publicly release 2246 3D room layouts on the RealEstate10k dataset, containing YouTube videos. We demonstrate the high quality of these 3D layouts annotations with extensive experiments.", "keywords": "room layout;3D reconstruction;dataset construction;dataset;optimization", "primary_area": "", "supplementary_material": "/attachment/c7a5f6922cab9a5ad656c7fe8ad47926ba50e230.zip", "author": "Denys Rozumnyi;Stefan Popov;Kevis-kokitsi Maninis;Matthias Nie\u00dfner;Vittorio Ferrari", "authorids": "~Denys_Rozumnyi1;~Stefan_Popov1;~Kevis-kokitsi_Maninis1;~Matthias_Nie\u00dfner2;~Vittorio_Ferrari4", "gender": "M;M;M;M;", "homepage": "http://people.inf.ethz.ch/denysr/;http://popov.im;https://www.kmaninis.com/;https://sites.google.com/view/vittoferrari/home;https://niessnerlab.org/", "dblp": "190/7346;72/5256;185/0928;16/3608;84/8221.html", "google_scholar": "5KvD78sAAAAJ;Glq3dWkAAAAJ;Lw_-pYsAAAAJ;4QvYJ00AAAAJ;eUtEs6YAAAAJ", "orcid": "0000-0001-9874-1349;;0000-0003-3776-0049;;", "linkedin": "denys-rozumnyi-35004a145/;https://linkedin.com/in/stefanpopov;kmaninis/;vittorio-ferrari-17062b2b/;", "or_profile": "~Denys_Rozumnyi1;~Stefan_Popov1;~Kevis-kokitsi_Maninis1;~Vittorio_Ferrari4;~Matthias_Niessner1", "aff": "Google;Google;Google;Google;Technical University of Munich", "aff_domain": "google.com;google.com;google.com;google.com;tum.de", "position": "Intern;Software Engineer;Research Scientist;Principal Researcher;Professor", "bibtex": "@inproceedings{\nrozumnyi2023estimating,\ntitle={Estimating Generic 3D Room Structures from 2D Annotations},\nauthor={Denys Rozumnyi and Stefan Popov and Kevis-kokitsi Maninis and Matthias Nie{\\ss}ner and Vittorio Ferrari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=AA2uO0HHmr}\n}", "github": "", "project": "", "reviewers": "ujhv;aGuE;WKsm;42Nb;ChJp", "pdf_size": 36377440, "rating": "6;6;6;6;8", "confidence": "3;4;5;4;5", "wc_summary_and_contributions": "60;57;110;60;211", "wc_strengths": "68;55;69;33;168", "wc_improvement": "42;316;194;190;2", "wc_limitations": "8;4;1;56;261", "wc_correctness": "13;1;2;28;240", "wc_clarity": "4;1;3;7;16", "wc_relation_to_prior_work": "13;1;2;20;57", "wc_documentation": "19;1;3;20;77", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "228;437;385;415;1033", "wc_reply_reviewers": "33;20;50;209;0", "wc_reply_authors": "93;147;287;159;768", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 99.6, 59.10871340166355 ], "wc_strengths_avg": [ 78.6, 46.547180365732146 ], "wc_improvement_avg": [ 148.8, 113.71262023188105 ], "wc_limitations_avg": [ 66.0, 99.55701883845258 ], "wc_correctness_avg": [ 56.8, 92.11601380867498 ], "wc_clarity_avg": [ 6.2, 5.2687759489277965 ], "wc_relation_to_prior_work_avg": [ 18.6, 20.460694025374604 ], "wc_documentation_avg": [ 24.0, 27.640549922170507 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 499.6, 276.5831520537721 ], "wc_reply_reviewers_avg": [ 62.4, 75.10153127599996 ], "wc_reply_authors_avg": [ 290.8, 246.9448521431455 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5345224838248488, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12912672911028643485&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "google.com;google.com;google.com;google.com;tum.de", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Google;Technical University of Munich", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.tum.de", "aff_unique_abbr": "Google;TUM", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;Germany" }, { "title": "Training Energy-Based Normalizing Flow with Score-Matching Objectives", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72549", "id": "AALLvnv95q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8882d370cdafec9885b918a8cfac642e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AALLvnv95q", "openreview": "https://openreview.net/forum?id=AALLvnv95q", "poster": "/media/PosterPDFs/NeurIPS%202023/72549.png?t=1701157944.361489", "slides": "https://nips.cc/virtual/2023/poster/72549", "video": "https://nips.cc/virtual/2023/poster/72549", "author_site": "Chen-Hao Chao, Wei-Fang Sun, Yen-Chang Hsu, Zsolt Kira, Chun-Yi Lee", "tldr": "", "abstract": "In this paper, we establish a connection between the parameterization of flow-based and energy-based generative models, and present a new flow-based modeling approach called energy-based normalizing flow (EBFlow). We demonstrate that by optimizing EBFlow with score-matching objectives, the computation of Jacobian determinants for linear transformations can be entirely bypassed. This feature enables the use of arbitrary linear layers in the construction of flow-based models without increasing the computational time complexity of each training iteration from $\\mathcal{O}(D^2L)$ to $\\mathcal{O}(D^3L)$ for an $L$-layered model that accepts $D$-dimensional inputs. This makes the training of EBFlow more efficient than the commonly-adopted maximum likelihood training method. In addition to the reduction in runtime, we enhance the training stability and empirical performance of EBFlow through a number of techniques developed based on our analysis of the score-matching methods. The experimental results demonstrate that our approach achieves a significant speedup compared to maximum likelihood estimation while outperforming prior methods with a noticeable margin in terms of negative log-likelihood (NLL).", "keywords": "flow-based models;score-matching methods", "primary_area": "", "supplementary_material": "/attachment/f40021317331944a9c644cf3fd71ed6f252efff0.pdf", "author": "Chen-Hao Chao;Wei-Fang Sun;Yen-Chang Hsu;Zsolt Kira;Chun-Yi Lee", "authorids": "~Chen-Hao_Chao2;~Wei-Fang_Sun1;~Yen-Chang_Hsu1;~Zsolt_Kira1;~Chun-Yi_Lee1", "gender": "M;M;M;M;M", "homepage": ";;https://faculty.cc.gatech.edu/~zk15;https://elsalab.ai;https://chen-hao-chao.github.io/", "dblp": "275/9039;172/1140;36/4127;36/3668;291/4406", "google_scholar": "TgMlVRUAAAAJ;7QWAiigAAAAJ;2a5XgNAAAAAJ;https://scholar.google.com.tw/citations?user=5mYNdo0AAAAJ;puKAQDgAAAAJ", "orcid": ";;0000-0002-2626-2004;0000-0002-4680-4800;0000-0003-1409-7467", "linkedin": ";yenchanghsu/;;;", "or_profile": "~Wei-Fang_Sun1;~Yen-Chang_Hsu1;~Zsolt_Kira1;~Chun-Yi_Lee1;~CHEN-HAO_CHAO1", "aff": "Department of Computer Science, National Tsing Hua University, National Tsing Hua University;Samsung Research America;Georgia Tech Research Institute;National Tsing Hua University;Department of Computer Science, National Tsing Hua University, National Tsing Hua University", "aff_domain": "cs.nthu.edu.tw;samsung.com;gtri.gatech.edu;nthu.edu.tw;cs.nthu.edu.tw", "position": "MS student;Research Scientist;Senior Research Scientist;Associate Professor;MS student", "bibtex": "@inproceedings{\nchao2023training,\ntitle={Training Energy-Based Normalizing Flow with Score-Matching Objectives},\nauthor={Chen-Hao Chao and Wei-Fang Sun and Yen-Chang Hsu and Zsolt Kira and Chun-Yi Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AALLvnv95q}\n}", "github": "", "project": "", "reviewers": "fofM;htQa;tDWv;DEYZ;Bq9G", "pdf_size": 7171249, "rating": "5;5;6;6;8", "confidence": "4;4;4;4;3", "soundness": "3;2;3;3;4", "novelty": "2;3;3;3;4", "presentation": "2;2;2;3;4", "wc_summary": "82;39;232;104;102", "wc_strengths": "77;41;98;60;61", "wc_weaknesses": "226;356;143;76;46", "wc_questions": "26;55;19;2;27", "wc_limitations": "10;51;19;1;14", "wc_review": "421;542;511;243;250", "wc_reply_reviewers": "38;0;98;15;45", "wc_reply_authors": "14;0;152;14;15", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 111.8, 64.48689789406836 ], "wc_strengths_avg": [ 67.4, 19.08507270093567 ], "wc_weaknesses_avg": [ 169.4, 111.95642009282004 ], "wc_questions_avg": [ 25.8, 17.127755252805315 ], "wc_limitations_avg": [ 19.0, 17.052858997833766 ], "wc_review_avg": [ 393.4, 126.3781626706133 ], "wc_reply_reviewers_avg": [ 39.2, 33.51059533938483 ], "wc_reply_authors_avg": [ 39.0, 56.77323312970647 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.912870929175277, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12392320888397736279&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.nthu.edu.tw;samsung.com;gtri.gatech.edu;nthu.edu.tw;cs.nthu.edu.tw", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "National Tsing Hua University;Samsung;Georgia Tech Research Institute", "aff_unique_dep": "Department of Computer Science;Samsung Research America;", "aff_unique_url": "https://www.nthu.edu.tw;https://www.samsung.com/us/careers/research/;https://www.gtri.gatech.edu", "aff_unique_abbr": "NTHU;SRA;GTRI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "DIFFER:Decomposing Individual Reward for Fair Experience Replay in Multi-Agent Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72548", "id": "AG9A7Ae9r3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/edac78c3e300629acfe6cbe9ca88fb84-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AG9A7Ae9r3", "openreview": "https://openreview.net/forum?id=AG9A7Ae9r3", "poster": "/media/PosterPDFs/NeurIPS%202023/72548.png?t=1701536248.0105696", "slides": "https://nips.cc/virtual/2023/poster/72548", "video": "https://nips.cc/virtual/2023/poster/72548", "author_site": "Xunhan Hu, Jian Zhao, Wengang Zhou, Ruili Feng, Houqiang Li", "tldr": "", "abstract": "Cooperative multi-agent reinforcement learning (MARL) is a challenging task, as agents must learn complex and diverse individual strategies from a shared team reward. However, existing methods struggle to distinguish and exploit important individual experiences, as they lack an effective way to decompose the team reward into individual rewards. To address this challenge, we propose DIFFER, a powerful theoretical framework for decomposing individual rewards to enable fair experience replay in MARL.\nBy enforcing the invariance of network gradients, we establish a partial differential equation whose solution yields the underlying individual reward function. The individual TD-error can then be computed from the solved closed-form individual rewards, indicating the importance of each piece of experience in the learning task and guiding the training process. Our method elegantly achieves an equivalence to the original learning framework when individual experiences are homogeneous, while also adapting to achieve more muscular efficiency and fairness when diversity is observed.\nOur extensive experiments on popular benchmarks validate the effectiveness of our theory and method, demonstrating significant improvements in learning efficiency and fairness. \nCode is available in supplement material.", "keywords": "Experience Replay; Reinforcement Learning; Multi-Agent System", "primary_area": "", "supplementary_material": "/attachment/c7d161e08ba55c8c69128beaabb0ed1fd32e85f6.zip", "author": "Xunhan Hu;Jian Zhao;Wengang Zhou;Ruili Feng;Houqiang Li", "authorids": "~Xunhan_Hu1;~Jian_Zhao7;~Wengang_Zhou1;~Ruili_Feng1;~Houqiang_Li1", "gender": "F;M;M;;M", "homepage": ";;http://staff.ustc.edu.cn/~zhwg/index.html;https://github.com/RuiLiFeng;https://staff.ustc.edu.cn/~lihq/", "dblp": "313/9602.html;70/2932-18.html;22/4544-1;20/9594;59/7017.html", "google_scholar": ";n6zuurcAAAAJ;8s1JF8YAAAAJ;;7sFMIKoAAAAJ", "orcid": "0000-0003-0239-3541;0000-0003-4895-990X;0000-0003-1690-9836;;0000-0003-2188-3028", "linkedin": ";;;;", "or_profile": "~Xunhan_Hu1;~Jian_Zhao7;~Wengang_Zhou1;~Ruili_Feng1;~Houqiang_Li1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu;ustc.edu;ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn", "position": "MS student;PhD student;Full Professor;PhD student;Professor", "bibtex": "@inproceedings{\nhu2023differdecomposing,\ntitle={{DIFFER}:Decomposing Individual Reward for Fair Experience Replay in Multi-Agent Reinforcement Learning},\nauthor={Xunhan Hu and Jian Zhao and Wengang Zhou and Ruili Feng and Houqiang Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AG9A7Ae9r3}\n}", "github": "", "project": "", "reviewers": "ddQi;f7XM;Jgve;U3kJ", "pdf_size": 4765405, "rating": "5;6;6;7", "confidence": "5;4;3;3", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "115;48;59;94", "wc_strengths": "114;56;76;36", "wc_weaknesses": "61;63;18;6", "wc_questions": "227;45;5;1", "wc_limitations": "13;23;10;18", "wc_review": "530;235;168;155", "wc_reply_reviewers": "0;9;0;0", "wc_reply_authors": "52;41;45;0", "reply_reviewers": "0;1;0;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 79.0, 26.842131062939096 ], "wc_strengths_avg": [ 70.5, 28.822734082664677 ], "wc_weaknesses_avg": [ 37.0, 25.367301787931645 ], "wc_questions_avg": [ 69.5, 92.54593454063772 ], "wc_limitations_avg": [ 16.0, 4.949747468305833 ], "wc_review_avg": [ 272.0, 152.0180910286667 ], "wc_reply_reviewers_avg": [ 2.25, 3.897114317029974 ], "wc_reply_authors_avg": [ 34.5, 20.303940504246953 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8528028654224418, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17836557372134148147&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ustc.edu;ustc.edu;ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Active Bipartite Ranking", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72547", "id": "AGMVzMGcGP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/adb77ecc8ba1c2d3135c86a46b8f2496-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AGMVzMGcGP", "openreview": "https://openreview.net/forum?id=AGMVzMGcGP", "poster": "/media/PosterPDFs/NeurIPS%202023/72547.png?t=1702308284.3761005", "slides": "https://nips.cc/virtual/2023/poster/72547", "video": "https://nips.cc/virtual/2023/poster/72547", "author_site": "James Cheshire, Vincent Laurent, Stephan Cl\u00e9men\u00e7on", "tldr": "", "abstract": "In this paper, we develop an active learning framework for the bipartite ranking problem.\nMotivated by numerous applications, ranging from supervised anomaly detection to credit-scoring through the design of medical diagnosis support systems, and usually formulated as the problem of optimizing (a scalar summary of) the ROC curve, bipartite ranking has been the subject of much attention in the passive context. Various dedicated algorithms have been recently proposed and studied by the machine-learning community. In contrast, active bipartite ranking rule is poorly documented in the literature. Due to its global nature, a strategy for labeling sequentially data points that are difficult to rank w.r.t. to the others is required. This learning task is much more complex than binary classification, for which many active algorithms have been designed. It is the goal of this article to provide a rigorous formulation of such a selective sampling approach. We propose a dedicated algorithm, referred to as active-rank, which aims to minimise the distance between the ROC curve of the ranking function built and the optimal one, w.r.t. the sup norm. We show that, for a fixed confidence level $\\epsilon$ and probability $\\delta$, active-rank is PAC$(\\epsilon,\\delta)$. In addition, we provide a problem dependent upper bound on the expected sampling time of active-rank and also demonstrate a problem dependent lower bound on the expected sampling time of any PAC$(\\epsilon,\\delta)$ algorithm. Beyond the theoretical analysis carried out, numerical results are presented, providing strong empirical evidence of the performance of the algorithm proposed, which compares favorably with more naive approaches.", "keywords": "bipartite ranking;multi armed bandits;active learning", "primary_area": "", "supplementary_material": "/attachment/88bd9c447afd3e5d7fc4e03e030696b9d3c291b0.zip", "author": "James Cheshire;Vincent Laurent;Stephan Cl\u00e9men\u00e7on", "authorids": "~James_Cheshire2;vincent.laurent@ens-paris-saclay.fr;~Stephan_Cl\u00e9men\u00e7on1", "gender": "M;;", "homepage": "https://sites.google.com/view/jamescheshireresearch;;", "dblp": "267/9232;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~James_Cheshire2;vincent.laurent@ens-paris-saclay.fr;~Stephan_Cl\u00e9men\u00e7on1", "aff": "T\u00e9l\u00e9com ParisTech;;", "aff_domain": "telecom-paristech.fr;;", "position": "Postdoc;;", "bibtex": "@inproceedings{\ncheshire2023active,\ntitle={Active Bipartite Ranking},\nauthor={James Cheshire and Vincent Laurent and Stephan Cl{\\'e}men{\\c{c}}on},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AGMVzMGcGP}\n}", "github": "", "project": "", "reviewers": "rRJf;WRWn;iq8o;jBg3;RSb9", "pdf_size": 947272, "rating": "4;6;6;7;7", "confidence": "1;3;4;2;2", "soundness": "3;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;2;3;2", "wc_summary": "75;132;135;43;73", "wc_strengths": "31;60;66;43;47", "wc_weaknesses": "49;56;151;83;29", "wc_questions": "46;62;114;8;26", "wc_limitations": "8;13;3;1;2", "wc_review": "209;323;469;178;177", "wc_reply_reviewers": "0;27;20;12;0", "wc_reply_authors": "60;0;76;128;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;0;1;1;0", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 2.4, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 91.6, 36.05329388558 ], "wc_strengths_avg": [ 49.4, 12.435433245367852 ], "wc_weaknesses_avg": [ 73.6, 42.38679039512192 ], "wc_questions_avg": [ 51.2, 36.30096417452297 ], "wc_limitations_avg": [ 5.4, 4.498888751680798 ], "wc_review_avg": [ 271.2, 112.46048194810477 ], "wc_reply_reviewers_avg": [ 11.8, 10.740577265678041 ], "wc_reply_authors_avg": [ 52.8, 48.622628476872784 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 0.6, 0.48989794855663565 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3580574370197165, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:STjA99oC-IkJ:scholar.google.com/&scioq=Active+Bipartite+Ranking&hl=en&as_sdt=0,14", "gs_version_total": 11, "email": "telecom-paristech.fr;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "T\u00e9l\u00e9com ParisTech", "aff_unique_dep": "", "aff_unique_url": "https://www.telecom-paristech.fr", "aff_unique_abbr": "TP", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "title": "Fantastic Robustness Measures: The Secrets of Robust Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72546", "id": "AGVBqJuL0T", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/98a5c0470e57d518ade4e56c6ee0b363-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AGVBqJuL0T", "openreview": "https://openreview.net/forum?id=AGVBqJuL0T", "poster": "/media/PosterPDFs/NeurIPS%202023/72546.png?t=1698917455.2135086", "slides": "https://nips.cc/virtual/2023/poster/72546", "video": "https://nips.cc/virtual/2023/poster/72546", "author_site": "Hoki Kim, Jinseong Park, Yujin Choi, Jaewook Lee", "tldr": "", "abstract": "Adversarial training has become the de-facto standard method for improving the robustness of models against adversarial examples. However, robust overfitting remains a significant challenge, leading to a large gap between the robustness on the training and test datasets. To understand and improve robust generalization, various measures have been developed, including margin, smoothness, and flatness-based measures. In this study, we present a large-scale analysis of robust generalization to empirically verify whether the relationship between these measures and robust generalization remains valid in diverse settings. We demonstrate when and how these measures effectively capture the robust generalization gap by comparing over 1,300 models trained on CIFAR-10 under the $L_\\infty$ norm and further validate our findings through an evaluation of more than 100 models from RobustBench across CIFAR-10, CIFAR-100, and ImageNet. We hope this work can help the community better understand adversarial robustness and motivate the development of more robust defense methods against adversarial attacks.", "keywords": "Adversarial Robustness;Generalization;Measures", "primary_area": "", "supplementary_material": "/attachment/d68302464bde8bb6180b3af340636e6b8266e782.pdf", "author": "Hoki Kim;Jinseong Park;Yujin Choi;Jaewook Lee", "authorids": "~Hoki_Kim1;~Jinseong_Park1;~Yujin_Choi1;~Jaewook_Lee1", "gender": "M;M;F;M", "homepage": ";https://github.com/JinseongP;;http://slcf.snu.ac.kr", "dblp": "75/6518;178/8948-1;251/3065;39/4985-1", "google_scholar": "X4sz0QsAAAAJ;o4-E5z0AAAAJ;3u0-O2sAAAAJ;teMdzbwAAAAJ", "orcid": "0000-0001-5361-459X;0000-0003-1931-8441;0000-0001-9150-704X;", "linkedin": ";jinseong-park-a84740226/;;", "or_profile": "~Hoki_Kim1;~Jinseong_Park1;~Yujin_Choi1;~Jaewook_Lee1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkim2023fantastic,\ntitle={Fantastic Robustness Measures: The Secrets of Robust Generalization},\nauthor={Hoki Kim and Jinseong Park and Yujin Choi and Jaewook Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AGVBqJuL0T}\n}", "github": "", "project": "", "reviewers": "qzPP;PVgT;vGwc;2BXc", "pdf_size": 1181698, "rating": "3;6;7;8", "confidence": "4;4;4;4", "soundness": "2;2;4;3", "novelty": "2;2;3;4", "presentation": "2;2;3;3", "wc_summary": "75;51;50;34", "wc_strengths": "24;54;62;141", "wc_weaknesses": "70;480;305;346", "wc_questions": "7;125;224;173", "wc_limitations": "1;4;124;29", "wc_review": "177;714;765;723", "wc_reply_reviewers": "0;0;28;490", "wc_reply_authors": "0;901;813;344", "reply_reviewers": "0;0;1;2", "reply_authors": "1;3;3;3", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 52.5, 14.637281168304447 ], "wc_strengths_avg": [ 70.25, 43.234101123997014 ], "wc_weaknesses_avg": [ 300.25, 147.85191070797833 ], "wc_questions_avg": [ 132.25, 80.34106982110707 ], "wc_limitations_avg": [ 39.5, 49.98249693642766 ], "wc_review_avg": [ 594.75, 241.95492865407806 ], "wc_reply_reviewers_avg": [ 129.5, 208.44843487059336 ], "wc_reply_authors_avg": [ 514.5, 364.78246942527267 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9306575398498469088&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "YouTubePD: A Multimodal Benchmark for Parkinson\u2019s Disease Analysis", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73671", "id": "AIeeXKsspI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/acffd5024f52c3a9ecc8ccb4b75b4e5c-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=AIeeXKsspI", "openreview": "https://openreview.net/forum?id=AIeeXKsspI", "poster": "/media/PosterPDFs/NeurIPS%202023/73671.png?t=1702014375.9899673", "slides": "https://nips.cc/virtual/2023/poster/73671", "video": "https://nips.cc/virtual/2023/poster/73671", "author_site": "Andy Zhou, Samuel Li, Pranav Sriram, Xiang Li, Jiahua Dong, Ansh Sharma, Yuanyi Zhong, Shirui Luo, Volodymyr Kindratenko, George Heintz, Christopher Zallek, Yu-Xiong Wang", "tldr": "", "abstract": "The healthcare and AI communities have witnessed a growing interest in the development of AI-assisted systems for automated diagnosis of Parkinson's Disease (PD), one of the most prevalent neurodegenerative disorders. However, the progress in this area has been significantly impeded by the absence of a unified, publicly available benchmark, which prevents comprehensive evaluation of existing PD analysis methods and the development of advanced models. This work overcomes these challenges by introducing YouTubePD -- the *first* publicly available multimodal benchmark designed for PD analysis. We crowd-source existing videos featured with PD from YouTube, exploit multimodal information including *in-the-wild* videos, audios, and facial landmarks across 200+ subject videos, and provide dense and diverse annotations from a clinical expert. Based on our benchmark, we propose three challenging and complementary tasks encompassing *both discriminative and generative* tasks, along with a comprehensive set of corresponding baselines. Experimental evaluation showcases the potential of modern deep learning and computer vision techniques, in particular the generalizability of the models developed on our YouTubePD to real-world clinical settings, while revealing their limitations. We hope that our work paves the way for future research in this direction.", "keywords": "medical classification;interpretability;multimodality;healthcare", "primary_area": "", "supplementary_material": "/attachment/630eab4c42dda985a39d5a5efd14fa28fddae91f.pdf", "author": "Andy Zhou;Samuel Li;Pranav Sriram;Xiang Li;Jiahua Dong;Ansh Sharma;Yuanyi Zhong;Shirui Luo;Maria Jaromin;Volodymyr Kindratenko;Joerg Heintz;Christopher Zallek;Yu-Xiong Wang", "authorids": "~Andy_Zhou2;~Samuel_Li1;~Pranav_Sriram1;~Xiang_Li19;~Jiahua_Dong3;~Ansh_Sharma1;~Yuanyi_Zhong1;~Shirui_Luo2;~Maria_Jaromin1;~Volodymyr_Kindratenko1;~Joerg_Heintz1;~Christopher_Zallek1;~Yu-Xiong_Wang1", "gender": "M;M;M;M;M;M;;M;F;M;M;;", "homepage": "https://www.andyzhou.ai;https://samwli.github.io;;https://ryanxli.github.io/;;https://anshgs.com;;;;https://ece.illinois.edu/about/directory/faculty/kindrtnk;;;https://yxw.cs.illinois.edu/", "dblp": ";;;;247/5746-2;;194/2743;;;70/536.html;;;35/10700", "google_scholar": "https://scholar.google.com/citations?hl=en;;;3Ds7hOQAAAAJ;;i6lNFGYAAAAJ;PtmjwooAAAAJ;lTT-vAoAAAAJ;;Cy81VegAAAAJ;;;T_Q-xDkAAAAJ", "orcid": ";;;;;0000-0001-7959-2392;;0000-0002-9360-1299;;0000-0002-9336-4756;0000-0002-4542-390X;0000-0002-5959-3192;", "linkedin": "andy-zhou-679376206/;samuelwli/;pranavsriram1/;;jiahua-dong-190431268/;anshgs/;;shiruiluo/;maria-jaromin;;joergheintz/;;", "or_profile": "~Andy_Zhou2;~Samuel_Li1;~Pranav_Sriram1;~Xiang_Li19;~Jiahua_Dong3;~Ansh_Sharma1;~Yuanyi_Zhong1;~Shirui_Luo2;~Maria_Jaromin1;~Volodymyr_Kindratenko1;~Joerg_Heintz1;~Christopher_Zallek1;~Yu-Xiong_Wang1", "aff": "Department of Computer Science;University of Illinois, Urbana Champaign;University of Illinois at Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois Urbana-Champaign;UIUC Department of Computer Science;University of Illinois Urbana Champaign;University of Illinois;;University of Illinois, Urbana Champaign;University of Illinois Urbana Champaign;OSF HealthCare;Department of Computer Science, University of Illinois Urbana-Champaign", "aff_domain": "cs.illinois.edu;uiuc.edu;illinois.edu;illinois.edu;illinois.edu;cs.illinois.edu;illinois.edu;illinois.edu;;ncsa.illinois.edu;illinois.edu;osfhealthcare.org;cs.illinois.edu", "position": "Undergrad student;Undergrad student;MS student;PhD student;MS student;Undergrad student;PhD student;Researcher;;Assistant Director;Researcher;Clinical Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhou2023youtubepd,\ntitle={YouTube{PD}: A Multimodal Benchmark for Parkinson{\\textquoteright}s Disease Analysis},\nauthor={Andy Zhou and Samuel Li and Pranav Sriram and Xiang Li and Jiahua Dong and Ansh Sharma and Yuanyi Zhong and Shirui Luo and Maria Jaromin and Volodymyr Kindratenko and Joerg Heintz and Christopher Zallek and Yu-Xiong Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=AIeeXKsspI}\n}", "github": "", "project": "", "reviewers": "xcbn;NCw3;xYDp;tFQT;6pw4", "pdf_size": 9658177, "rating": "5;5;6;8;8", "confidence": "3;5;3;5;3", "wc_summary_and_contributions": "25;37;49;105;139", "wc_strengths": "38;6;36;33;75", "wc_improvement": "16;87;97;166;89", "wc_limitations": "22;50;93;16;4", "wc_correctness": "2;15;42;1;29", "wc_clarity": "3;1;2;1;5", "wc_relation_to_prior_work": "2;1;1;1;16", "wc_documentation": "3;1;119;4;2", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "112;199;440;328;360", "wc_reply_reviewers": "0;0;0;15;0", "wc_reply_authors": "447;1192;1667;1224;856", "reply_reviewers": "0;0;0;3;0", "reply_authors": "1;2;3;2;2", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "wc_summary_and_contributions_avg": [ 71.0, 43.67150100465978 ], "wc_strengths_avg": [ 37.6, 22.00545386943882 ], "wc_improvement_avg": [ 91.0, 47.552076715954264 ], "wc_limitations_avg": [ 37.0, 31.811947441173732 ], "wc_correctness_avg": [ 17.8, 15.816447135814036 ], "wc_clarity_avg": [ 2.4, 1.4966629547095764 ], "wc_relation_to_prior_work_avg": [ 4.2, 5.912698199637793 ], "wc_documentation_avg": [ 25.8, 46.61072837877563 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 287.8, 117.29006778069488 ], "wc_reply_reviewers_avg": [ 3.0, 6.0 ], "wc_reply_authors_avg": [ 1077.2, 407.105588269186 ], "reply_reviewers_avg": [ 0.6, 1.2 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.06019292654288467, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11289497670902715077&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cs.illinois.edu;uiuc.edu;illinois.edu;illinois.edu;illinois.edu;cs.illinois.edu;illinois.edu;illinois.edu;;ncsa.illinois.edu;illinois.edu;osfhealthcare.org;cs.illinois.edu", "author_num": 13, "aff_unique_index": "0;1;1;1;1;1;1;2;1;1;3;1", "aff_unique_norm": "Unknown Institution;University of Illinois Urbana-Champaign;University of Illinois;OSF HealthCare", "aff_unique_dep": "Department of Computer Science;;;", "aff_unique_url": ";https://illinois.edu;https://www.illinois.edu;https://www.osfhealthcare.org", "aff_unique_abbr": ";UIUC;UIUC;", "aff_campus_unique_index": "1;1;1;1;1;1;1;1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "1;1;1;1;1;1;1;1;1;1;1", "aff_country_unique": ";United States" }, { "title": "Boosting Adversarial Transferability by Achieving Flat Local Maxima", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72545", "id": "AKAMNDe2Sw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/de1739eba209c682a90ec3669229ab2d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AKAMNDe2Sw", "openreview": "https://openreview.net/forum?id=AKAMNDe2Sw", "poster": "/media/PosterPDFs/NeurIPS%202023/72545.png?t=1699462863.674807", "slides": "https://nips.cc/virtual/2023/poster/72545", "video": "https://nips.cc/virtual/2023/poster/72545", "author_site": "Zhijin Ge, Hongying Liu, Wang Xiaosen, Fanhua Shang, Yuanyuan Liu", "tldr": "", "abstract": "Transfer-based attack adopts the adversarial examples generated on the surrogate model to attack various models, making it applicable in the physical world and attracting increasing interest. Recently, various adversarial attacks have emerged to boost adversarial transferability from different perspectives. In this work, inspired by the observation that flat local minima are correlated with good generalization, we assume and empirically validate that adversarial examples at a flat local region tend to have good transferability by introducing a penalized gradient norm to the original loss function. Since directly optimizing the gradient regularization norm is computationally expensive and intractable for generating adversarial examples, we propose an approximation optimization method to simplify the gradient update of the objective function. Specifically, we randomly sample an example and adopt a first-order procedure to approximate the curvature of the second-order Hessian matrix, which makes computing more efficient by interpolating two Jacobian matrices. Meanwhile, in order to obtain a more stable gradient direction, we randomly sample multiple examples and average the gradients of these examples to reduce the variance due to random sampling during the iterative process. Extensive experimental results on the ImageNet-compatible dataset show that the proposed method can generate adversarial examples at flat local regions, and significantly improve the adversarial transferability on either normally trained models or adversarially trained models than the state-of-the-art attacks. Our codes are available at: https://github.com/Trustworthy-AI-Group/PGN.", "keywords": "Adversarial attack;Adversarial transferability;Black-box Attack", "primary_area": "", "supplementary_material": "/attachment/8f53157347667d3e3f953e5b213d63152cc33700.zip", "author": "Zhijin Ge;Hongying Liu;Xiaosen Wang;Fanhua Shang;Yuanyuan Liu", "authorids": "~Zhijin_Ge1;~Hongying_Liu2;~Xiaosen_Wang1;~Fanhua_Shang2;~Yuanyuan_Liu1", "gender": "M;F;M;M;Not Specified", "homepage": "https://github.com/Zhijin-Ge;;https://xiaosen-wang.github.io/;https://sites.google.com/site/fanhua217/home;https://dblp.uni-trier.de/pid/97/2119-1.html", "dblp": "309/0727;43/8776;241/6284;66/9057;97/2119-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;S0pp67AAAAAJ;sVeDOcsAAAAJ;rk_HZTkAAAAJ;https://scholar.google.com/scholar?q=Yuanyuan+Liu", "orcid": "0009-0000-3471-2629;0000-0001-5961-5569;;0000-0002-1040-352X;0000-0001-8646-8533", "linkedin": ";;;;", "or_profile": "~Zhijin_Ge1;~Hongying_Liu2;~Xiaosen_Wang1;~Fanhua_Shang2;~Yuanyuan_Liu1", "aff": "Xi'an University of Electronic Science and Technology;Xidian University;Huawei Technologies Ltd.;Tianjin University;The Chinese University of Hong Kong", "aff_domain": "xidian.edu.cn;xidian.edu.cn;huawei.com;tju.edu.cn;cuhk.edu.hk", "position": "MS student;Associate Professor;Researcher;Full Professor;Postdoc", "bibtex": "@inproceedings{\nge2023boosting,\ntitle={Boosting Adversarial Transferability by Achieving Flat Local Maxima},\nauthor={Zhijin Ge and Xiaosen Wang and Hongying Liu and Fanhua Shang and Yuanyuan Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AKAMNDe2Sw}\n}", "github": "", "project": "", "reviewers": "8Bpx;2kUx;xHRb;PZXp", "pdf_size": 25789047, "rating": "3;5;5;7", "confidence": "4;4;5;4", "soundness": "2;2;4;3", "novelty": "2;2;3;3", "presentation": "3;3;4;3", "wc_summary": "113;76;42;71", "wc_strengths": "77;44;30;75", "wc_weaknesses": "137;123;195;27", "wc_questions": "311;93;3;60", "wc_limitations": "15;9;23;1", "wc_review": "653;345;293;234", "wc_reply_reviewers": "30;51;30;41", "wc_reply_authors": "429;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.5, 25.243811122728676 ], "wc_strengths_avg": [ 56.5, 20.130822139197395 ], "wc_weaknesses_avg": [ 120.5, 60.355198616192126 ], "wc_questions_avg": [ 116.75, 116.67985044556751 ], "wc_limitations_avg": [ 12.0, 8.06225774829855 ], "wc_review_avg": [ 381.25, 161.7349297461745 ], "wc_reply_reviewers_avg": [ 38.0, 8.74642784226795 ], "wc_reply_authors_avg": [ 107.25, 185.7624491117621 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2458202628078915106&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "xidian.edu.cn;xidian.edu.cn;huawei.com;tju.edu.cn;cuhk.edu.hk", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Xi'an University of Electronic Science and Technology;Xidian University;Huawei;Tianjin University;Chinese University of Hong Kong", "aff_unique_dep": ";;Huawei Technologies;;", "aff_unique_url": "http://www.xidian.edu.cn/;http://www.xidian.edu.cn/;https://www.huawei.com;http://www.tju.edu.cn;https://www.cuhk.edu.hk", "aff_unique_abbr": "Xidian University;Xidian;Huawei;TJU;CUHK", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Xi'an;;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "3D-Aware Visual Question Answering about Parts, Poses and Occlusions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72544", "id": "AMIJEupsNq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b783c44ba9adbc30344473dc633b4869-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AMIJEupsNq", "openreview": "https://openreview.net/forum?id=AMIJEupsNq", "poster": "/media/PosterPDFs/NeurIPS%202023/72544.png?t=1699452343.670369", "slides": "https://nips.cc/virtual/2023/poster/72544", "video": "https://nips.cc/virtual/2023/poster/72544", "author_site": "Xingrui Wang, Wufei Ma, Zhuowan Li, Adam Kortylewski, Alan Yuille", "tldr": "", "abstract": "Despite rapid progress in Visual question answering (\\textit{VQA}), existing datasets and models mainly focus on testing reasoning in 2D. However, it is important that VQA models also understand the 3D structure of visual scenes, for example to support tasks like navigation or manipulation. This includes an understanding of the 3D object pose, their parts and occlusions. In this work, we introduce the task of 3D-aware VQA, which focuses on challenging questions that require a compositional reasoning over the 3D structure of visual scenes. We address 3D-aware VQA from both the dataset and the model perspective. First, we introduce Super-CLEVR-3D, a compositional reasoning dataset that contains questions about object parts, their 3D poses, and occlusions. Second, we propose PO3D-VQA, a 3D-aware VQA model that marries two powerful ideas: probabilistic neural symbolic program execution for reasoning and deep neural networks with 3D generative representations of objects for robust visual recognition. Our experimental results show our model PO3D-VQA outperforms existing methods significantly, but we still observe a significant performance gap compared to 2D VQA benchmarks, indicating that 3D-aware VQA remains an important open research area.", "keywords": "VQA;reasoning;3D scene understanding;analysis-by-synthesis;neural modular network;neuro-symbolic reasoning", "primary_area": "", "supplementary_material": "/attachment/d61cff42acc3b6bcbd5a25cfa032ea16d5d5971b.zip", "author": "Xingrui Wang;Wufei Ma;Zhuowan Li;Adam Kortylewski;Alan Yuille", "authorids": "~Xingrui_Wang1;~Wufei_Ma1;~Zhuowan_Li1;~Adam_Kortylewski1;~Alan_Yuille1", "gender": "M;M;F;;M", "homepage": "https://xingruiwang.github.io/;https://wufeim.github.io;https://lizw14.github.io;https://gvrl.mpi-inf.mpg.de/;", "dblp": "280/8952;243/2814;228/6963;161/0772;y/AlanLYuille", "google_scholar": "-MWsoWUAAAAJ;mYkvHdIAAAAJ;Fft1WvwAAAAJ;https://scholar.google.ch/citations?user=tRLUOBIAAAAJ;", "orcid": ";;;0000-0002-9146-4403;", "linkedin": ";wufei-ma-256352133/;;;", "or_profile": "~Xingrui_Wang1;~Wufei_Ma1;~Zhuowan_Li1;~Adam_Kortylewski1;~Alan_Yuille1", "aff": "University of Southern California;Meta;Amazon;Albert-Ludwigs-Universit\u00e4t Freiburg;Johns Hopkins University", "aff_domain": "usc.edu;meta.com;amazon.com;uni-freiburg.de;johnshopkins.edu", "position": "MS student;Research Intern;Intern;Research Group Leader;Full Professor", "bibtex": "@inproceedings{\nwang2023daware,\ntitle={3D-Aware Visual Question Answering about Parts, Poses and Occlusions},\nauthor={Xingrui Wang and Wufei Ma and Zhuowan Li and Adam Kortylewski and Alan Yuille},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AMIJEupsNq}\n}", "github": "", "project": "", "reviewers": "NU9U;HP3a;sJrg;SUrx", "pdf_size": 4996658, "rating": "5;6;6;9", "confidence": "4;2;5;2", "soundness": "3;3;2;4", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "75;130;67;83", "wc_strengths": "133;97;71;39", "wc_weaknesses": "252;168;126;10", "wc_questions": "63;6;77;23", "wc_limitations": "6;15;20;28", "wc_review": "529;416;361;183", "wc_reply_reviewers": "0;49;100;0", "wc_reply_authors": "0;41;405;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 88.75, 24.47830672248389 ], "wc_strengths_avg": [ 85.0, 34.49637662132068 ], "wc_weaknesses_avg": [ 139.0, 87.20665112249179 ], "wc_questions_avg": [ 42.25, 28.82164984868146 ], "wc_limitations_avg": [ 17.25, 7.980444849756184 ], "wc_review_avg": [ 372.25, 124.9267285251639 ], "wc_reply_reviewers_avg": [ 37.25, 41.384628788959795 ], "wc_reply_authors_avg": [ 111.5, 170.2769802410179 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896258, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16058989354333932692&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "usc.edu;meta.com;amazon.com;uni-freiburg.de;johnshopkins.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "University of Southern California;Meta;Amazon;Albert-Ludwigs-Universit\u00e4t Freiburg;Johns Hopkins University", "aff_unique_dep": ";Meta Platforms, Inc.;Amazon.com, Inc.;;", "aff_unique_url": "https://www.usc.edu;https://meta.com;https://www.amazon.com;https://www.uni-freiburg.de;https://www.jhu.edu", "aff_unique_abbr": "USC;Meta;Amazon;Albert-Ludwigs-Universit\u00e4t;JHU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Los Angeles;;Freiburg", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Germany" }, { "title": "Linguistic Binding in Diffusion Models: Enhancing Attribute Correspondence through Attention Map Alignment", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72543", "id": "AOKU4nRw1W", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0b08d733a5d45a547344c4e9d88bb8bc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AOKU4nRw1W", "openreview": "https://openreview.net/forum?id=AOKU4nRw1W", "poster": "/media/PosterPDFs/NeurIPS%202023/72543.png?t=1700948985.4404268", "slides": "https://nips.cc/virtual/2023/poster/72543", "video": "https://nips.cc/virtual/2023/poster/72543", "author_site": "Royi Rassin, Eran Hirsch, Daniel Glickman, Shauli Ravfogel, Yoav Goldberg, Gal Chechik", "tldr": "", "abstract": "Text-conditioned image generation models often generate incorrect associations between entities and their visual attributes. This reflects an impaired mapping between linguistic binding of entities and modifiers in the prompt and visual binding of the corresponding elements in the generated image. As one example, a query like ``a pink sunflower and a yellow flamingo'' may incorrectly produce an image of a yellow sunflower and a pink flamingo. To remedy this issue, we propose SynGen, an approach which first syntactically analyses the prompt to identify entities and their modifiers, and then uses a novel loss function that encourages the cross-attention maps to agree with the linguistic binding reflected by the syntax. Specifically, we encourage large overlap between attention maps of entities and their modifiers, and small overlap with other entities and modifier words. The loss is optimized during inference, without retraining or fine-tuning the model. Human evaluation on three datasets, including one new and challenging set, demonstrate significant improvements of SynGen compared with current state of the art methods. This work highlights how making use of sentence structure during inference can efficiently and substantially improve the faithfulness of text-to-image generation.", "keywords": "syntax;diffusion;stable diffusion;attribute;attention", "primary_area": "", "supplementary_material": "/attachment/e62ae60400a4bfd6c56f40dad1c6a6453a39dbcc.zip", "author": "Royi Rassin;Eran Hirsch;Daniel Glickman;Shauli Ravfogel;Yoav Goldberg;Gal Chechik", "authorids": "~Royi_Rassin1;~Eran_Hirsch1;~Daniel_Glickman1;~Shauli_Ravfogel1;~Yoav_Goldberg1;~Gal_Chechik1", "gender": "M;M;;M;M;", "homepage": "https://royi-rassin.netlify.app/;https://eranhirs.github.io/;;https://github.com/Shaul1321;https://www.cs.biu.ac.il/~yogo;https://chechiklab.biu.ac.il/~gal/", "dblp": "331/5719;302/4300.html;;227/2231;68/5296;c/GalChechik", "google_scholar": "_6g4hxsAAAAJ;GPsTrDEAAAAJ;;;https://scholar.google.co.il/citations?user=0rskDKgAAAAJ;Wk2gAZUAAAAJ", "orcid": ";;;;;0000-0001-9164-5303", "linkedin": "royi-rassin-4b8085163/;;https://linkedin.com/in/daniel-glickman-7134a712b;;;", "or_profile": "~Royi_Rassin1;~Eran_Hirsch1;~Daniel_Glickman1;~Shauli_Ravfogel1;~Yoav_Goldberg1;~Gal_Chechik1", "aff": "Bar-Ilan University;Bar-Ilan University;Tel Aviv University;Bar-Ilan University;Allen Institute for Artificial Intelligence;NVIDIA", "aff_domain": "biu.ac.il;biu.ac.il;tau.ac.il;biu.ac.il;allenai.org;nvidia.com", "position": "PhD student;PhD student;MS student;PhD student;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nrassin2023linguistic,\ntitle={Linguistic Binding in Diffusion Models: Enhancing Attribute Correspondence through Attention Map Alignment},\nauthor={Royi Rassin and Eran Hirsch and Daniel Glickman and Shauli Ravfogel and Yoav Goldberg and Gal Chechik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AOKU4nRw1W}\n}", "github": "", "project": "", "reviewers": "GWNd;LWw2;RH2g;EHJQ", "pdf_size": 14755456, "rating": "6;7;7;8", "confidence": "3;3;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;2;3;4", "wc_summary": "169;153;45;159", "wc_strengths": "84;125;86;131", "wc_weaknesses": "37;206;83;46", "wc_questions": "85;24;79;1", "wc_limitations": "1;29;2;7", "wc_review": "376;537;295;344", "wc_reply_reviewers": "17;49;16;89", "wc_reply_authors": "0;28;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 131.5, 50.26678824034812 ], "wc_strengths_avg": [ 106.5, 21.615966321217286 ], "wc_weaknesses_avg": [ 93.0, 67.47962655498324 ], "wc_questions_avg": [ 47.25, 35.75174820900371 ], "wc_limitations_avg": [ 9.75, 11.344051304538427 ], "wc_review_avg": [ 388.0, 90.73312515283489 ], "wc_reply_reviewers_avg": [ 42.75, 29.819247140060394 ], "wc_reply_authors_avg": [ 7.0, 12.12435565298214 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 91, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4096045248149802358&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "biu.ac.il;biu.ac.il;tau.ac.il;biu.ac.il;allenai.org;nvidia.com", "author_num": 6, "aff_unique_index": "0;0;1;0;2;3", "aff_unique_norm": "Bar-Ilan University;Tel Aviv University;Allen Institute for Artificial Intelligence;NVIDIA", "aff_unique_dep": ";;;NVIDIA Corporation", "aff_unique_url": "https://www.biu.ac.il;https://www.tau.ac.il;https://allenai.org;https://www.nvidia.com", "aff_unique_abbr": "BIU;TAU;AI2;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1", "aff_country_unique": "Israel;United States" }, { "title": "When Do Transformers Shine in RL? Decoupling Memory from Credit Assignment", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72542", "id": "APGXBNkt6h", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9dc5accb1e4f4a9798eae145f2e4869b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=APGXBNkt6h", "openreview": "https://openreview.net/forum?id=APGXBNkt6h", "poster": "/media/PosterPDFs/NeurIPS%202023/72542.png?t=1699408379.6055212", "slides": "https://nips.cc/virtual/2023/poster/72542", "video": "https://nips.cc/virtual/2023/poster/72542", "author_site": "Tianwei Ni, Michel Ma, Benjamin Eysenbach, Pierre-Luc Bacon", "tldr": "", "abstract": "Reinforcement learning (RL) algorithms face two distinct challenges: learning effective representations of past and present observations, and determining how actions influence future returns. Both challenges involve modeling long-term dependencies. The Transformer architecture has been very successful to solve problems that involve long-term dependencies, including in the RL domain. However, the underlying reason for the strong performance of Transformer-based RL methods remains unclear: is it because they learn effective memory, or because they perform effective credit assignment? After introducing formal definitions of memory length and credit assignment length, we design simple configurable tasks to measure these distinct quantities. Our empirical results reveal that Transformers can enhance the memory capability of RL algorithms, scaling up to tasks that require memorizing observations $1500$ steps ago. However, Transformers do not improve long-term credit assignment. In summary, our results provide an explanation for the success of Transformers in RL, while also highlighting an important area for future research and benchmark design. Our code is open-sourced at https://github.com/twni2016/Memory-RL.", "keywords": "Memory-based RL;Transformers;Credit Assignment;Online RL;Model-free RL", "primary_area": "", "supplementary_material": "/attachment/1f016e9f6bafb483efcdae04ecee28b89ecfb28d.pdf", "author": "Tianwei Ni;Michel Ma;Benjamin Eysenbach;Pierre-Luc Bacon", "authorids": "~Tianwei_Ni1;~Michel_Ma1;~Benjamin_Eysenbach1;~Pierre-Luc_Bacon1", "gender": "M;M;M;", "homepage": "https://twni2016.github.io/;;https://ben-eysenbach.github.io/;", "dblp": "230/8153;;192/1863;", "google_scholar": "njAD34UAAAAJ;;DRnOvU8AAAAJ;", "orcid": ";;0009-0000-7136-6307;", "linkedin": ";michel-ma/;benjamin-eysenbach-a7235775/;", "or_profile": "~Tianwei_Ni1;~Michel_Ma1;~Benjamin_Eysenbach1;~Pierre-Luc_Bacon1", "aff": "Mila - Quebec Artificial Intelligence Institute;University of Montreal;Carnegie Mellon University;", "aff_domain": "mila.quebec;umontreal.ca;cmu.edu;", "position": "PhD student;PhD student;PhD student;", "bibtex": "@inproceedings{\nni2023when,\ntitle={When Do Transformers Shine in {RL}? Decoupling Memory from Credit Assignment},\nauthor={Tianwei Ni and Michel Ma and Benjamin Eysenbach and Pierre-Luc Bacon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=APGXBNkt6h}\n}", "github": "", "project": "", "reviewers": "2AdR;kd6F;mXpQ;naYb", "pdf_size": 1181448, "rating": "6;7;7;8", "confidence": "5;4;3;2", "soundness": "2;3;4;4", "novelty": "3;4;4;4", "presentation": "3;4;4;4", "wc_summary": "96;124;114;102", "wc_strengths": "90;163;9;50", "wc_weaknesses": "420;59;59;69", "wc_questions": "362;1;49;186", "wc_limitations": "65;1;6;6", "wc_review": "1033;348;237;413", "wc_reply_reviewers": "35;0;11;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 109.0, 10.816653826391969 ], "wc_strengths_avg": [ 78.0, 56.819890883386954 ], "wc_weaknesses_avg": [ 151.75, 154.92800747443957 ], "wc_questions_avg": [ 149.5, 140.21501346146923 ], "wc_limitations_avg": [ 19.5, 26.348624252510795 ], "wc_review_avg": [ 507.75, 309.7138800570617 ], "wc_reply_reviewers_avg": [ 15.0, 12.668859459319927 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9486832980505138, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17633048856569074500&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "mila.quebec;umontreal.ca;cmu.edu;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Quebec Artificial Intelligence Institute;University of Montreal;Carnegie Mellon University", "aff_unique_dep": "Artificial Intelligence;;", "aff_unique_url": "https://mila.quebec;https://wwwumontreal.ca;https://www.cmu.edu", "aff_unique_abbr": "Mila;UM;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Canada;United States" }, { "title": "Differentiable Sampling of Categorical Distributions Using the CatLog-Derivative Trick", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72541", "id": "AQyqxXctsN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/61202bb341e7e0a6026ea134a5057abf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AQyqxXctsN", "openreview": "https://openreview.net/forum?id=AQyqxXctsN", "poster": "/media/PosterPDFs/NeurIPS%202023/72541.png?t=1701861828.8986273", "slides": "https://nips.cc/virtual/2023/poster/72541", "video": "https://nips.cc/virtual/2023/poster/72541", "author_site": "Lennert De Smet, Emanuele Sansone, Pedro Zuidberg Dos Martires", "tldr": "", "abstract": "Categorical random variables can faithfully represent the discrete and uncertain aspects of data as part of a discrete latent variable model. Learning in such models necessitates taking gradients with respect to the parameters of the categorical probability distributions, which is often intractable due to their combinatorial nature. A popular technique to estimate these otherwise intractable gradients is the Log-Derivative trick. This trick forms the basis of the well-known REINFORCE gradient estimator and its many extensions. While the Log-Derivative trick allows us to differentiate through samples drawn from categorical distributions, it does not take into account the discrete nature of the distribution itself. Our first contribution addresses this shortcoming by introducing the CatLog-Derivative trick -- a variation of the Log-Derivative trick tailored towards categorical distributions. Secondly, we use the CatLog-Derivative trick to introduce IndeCateR, a novel and unbiased gradient estimator for the important case of products of independent categorical distributions with provably lower variance than REINFORCE. Thirdly, we empirically show that IndeCateR can be efficiently implemented and that its gradient estimates have significantly lower bias and variance for the same number of samples compared to the state of the art.", "keywords": "gradient estimation;categorical random variables;probability theory;discrete distributions", "primary_area": "", "supplementary_material": "/attachment/68e60ba10042a5d1c7c0f9094edb9bb2f611b42e.zip", "author": "Lennert De Smet;Emanuele Sansone;Pedro Zuidberg Dos Martires", "authorids": "~Lennert_De_Smet1;~Emanuele_Sansone1;~Pedro_Zuidberg_Dos_Martires1", "gender": "M;;M", "homepage": ";;https://pedrozudo.github.io/", "dblp": "342/2788;;223/4292", "google_scholar": ";;", "orcid": "0000-0003-3136-0634;;", "linkedin": ";;", "or_profile": "~Lennert_De_Smet1;~Emanuele_Sansone1;~Pedro_Zuidberg_Dos_Martires1", "aff": "KU Leuven;;", "aff_domain": "kuleuven.be;;", "position": "PhD student;;", "bibtex": "@inproceedings{\nsmet2023differentiable,\ntitle={Differentiable Sampling of Categorical Distributions Using the CatLog-Derivative Trick},\nauthor={Lennert De Smet and Emanuele Sansone and Pedro Zuidberg Dos Martires},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AQyqxXctsN}\n}", "github": "", "project": "", "reviewers": "szeH;JDGY;8h59;i3wk;Z21m", "pdf_size": 1334342, "rating": "3;4;7;7;7", "confidence": "4;5;4;3;4", "soundness": "2;3;3;3;3", "novelty": "1;1;3;3;4", "presentation": "2;4;3;2;3", "wc_summary": "109;54;240;62;278", "wc_strengths": "51;42;60;65;101", "wc_weaknesses": "260;323;7;42;494", "wc_questions": "27;218;51;90;206", "wc_limitations": "33;8;7;9;18", "wc_review": "480;645;365;268;1097", "wc_reply_reviewers": "121;230;68;0;263", "wc_reply_authors": "231;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.6, 1.7435595774162693 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 1.2 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 148.6, 92.86032522019293 ], "wc_strengths_avg": [ 63.8, 20.193068117549647 ], "wc_weaknesses_avg": [ 225.2, 181.21964573412012 ], "wc_questions_avg": [ 118.4, 79.11662277928704 ], "wc_limitations_avg": [ 15.0, 9.818350166906862 ], "wc_review_avg": [ 571.0, 291.4371287259055 ], "wc_reply_reviewers_avg": [ 136.4, 98.29465906141594 ], "wc_reply_authors_avg": [ 46.2, 92.4 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5441071875825088, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7166846408913579965&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 11, "email": "kuleuven.be;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Katholieke Universiteit Leuven", "aff_unique_dep": "", "aff_unique_url": "https://www.kuleuven.be", "aff_unique_abbr": "KU Leuven", "aff_country_unique_index": "0", "aff_country_unique": "Belgium" }, { "id": "ARJG1kr8A7", "title": "GPT is becoming a Turing machine: Here are some ways to program it", "track": "main", "status": "Reject", "tldr": "", "abstract": "We demonstrate that, through appropriate prompting, GPT-3 family of models can be triggered to perform iterative behaviours necessary to execute (rather than just write or recall) programs that involve loops, including several popular algorithms found in computer science curricula or software developer interviews. We trigger execution and description of Iterations by Regimenting Self-Attention (IRSA) in one (or a combination) of three ways: 1) Using strong repetitive structure in an example of an execution path of a target program for one particular input, 2) Prompting with fragments of execution paths, and 3) Explicitly forbidding (skipping) self-attention to parts of the generated text. On a dynamic program execution, IRSA leads to larger accuracy gains than replacing the model with the much more powerful GPT-4. IRSA has promising applications in education, as the prompts and responses resemble student assignments in data structures and algorithms classes. Our findings hold implications for evaluating LLMs, which typically target the in-context learning: We show that prompts that may not even cover one full task example can trigger algorithmic behaviour, allowing solving problems previously thought of as hard for LLMs, such as logical puzzles. Consequently, prompt design plays an even more critical role in LLM performance than previously recognized.", "keywords": "Large Language Models;Turing machine;GPT-3;GPT-4;Prompt design;Programming", "primary_area": "", "supplementary_material": "/attachment/037bc14f89971b43f4c36cf4a040f295d98f786f.pdf", "author": "Ana Jojic;Zhen Wang;Nebojsa Jojic", "authorids": "~Ana_Jojic1;~Zhen_Wang6;~Nebojsa_Jojic1", "gender": "F;M;", "homepage": ";https://zhenwang9102.github.io;www.research.microsoft.com/~jojic", "dblp": ";78/6727;20/1944", "google_scholar": ";asBaytUAAAAJ;", "orcid": ";0000-0001-7407-5118;", "linkedin": "ana-jojic-9a26a715a/;zhenwang9102/;", "or_profile": "~Ana_Jojic1;~Zhen_Wang6;~Nebojsa_Jojic1", "aff": "Fred Hutchinson Cancer Research Center;University of California, San Diego;Microsoft Research", "aff_domain": "fredhutch.org;ucsd.edu; ", "position": "Researcher;Postdoc;Researcher", "bibtex": "@misc{\njojic2023gpt,\ntitle={{GPT} is becoming a Turing machine: Here are some ways to program it},\nauthor={Ana Jojic and Zhen Wang and Nebojsa Jojic},\nyear={2023},\nurl={https://openreview.net/forum?id=ARJG1kr8A7}\n}", "github": "", "project": "", "reviewers": "tn6w;HgMc;AAHn;n7ti;2ucu", "site": "https://openreview.net/forum?id=ARJG1kr8A7", "pdf_size": 424763, "rating": "4;4;6;6;7", "confidence": "4;3;3;3;4", "soundness": "3;3;2;3;4", "novelty": "1;2;3;3;3", "presentation": "2;2;3;2;3", "wc_summary": "51;92;124;184;216", "wc_strengths": "98;10;164;321;171", "wc_weaknesses": "212;12;256;933;48", "wc_questions": "34;360;13;206;112", "wc_limitations": "77;15;16;5;1", "wc_review": "472;489;573;1649;548", "wc_reply_reviewers": "694;0;954;1028;0", "wc_reply_authors": "956;168;1090;463;0", "reply_reviewers": "3;0;2;2;0", "reply_authors": "4;2;3;2;1", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 133.4, 59.95865241981344 ], "wc_strengths_avg": [ 152.8, 102.12032119025086 ], "wc_weaknesses_avg": [ 292.2, 333.62038307033936 ], "wc_questions_avg": [ 145.0, 127.10625476348518 ], "wc_limitations_avg": [ 22.8, 27.701263509089255 ], "wc_review_avg": [ 746.2, 452.9165044464598 ], "wc_reply_reviewers_avg": [ 535.2, 450.85270321913345 ], "wc_reply_authors_avg": [ 535.4, 426.9339995830737 ], "reply_reviewers_avg": [ 1.4, 1.2000000000000002 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.06804138174397723, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8178619334231825325&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Fred Hutchinson Cancer Research Center;University of California, San Diego;Microsoft", "aff_unique_dep": ";;Microsoft Research", "aff_unique_url": "https://www.fredhutch.org;https://www.ucsd.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Fred Hutch;UCSD;MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Dense and Aligned Captions (DAC) Promote Compositional Reasoning in VL Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72540", "id": "ARrwf7Ev2T", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/efe406d6d2674d176cdcd958ce605d17-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ARrwf7Ev2T", "openreview": "https://openreview.net/forum?id=ARrwf7Ev2T", "poster": "/media/PosterPDFs/NeurIPS%202023/72540.png?t=1701252608.243393", "slides": "https://nips.cc/virtual/2023/poster/72540", "video": "https://nips.cc/virtual/2023/poster/72540", "author_site": "Sivan Doveh, Assaf Arbelle, Sivan Harary, Roei Herzig, Donghyun Kim, Paola Cascante-Bonilla, Amit Alfassy, Rameswar Panda, Raja Giryes, Rogerio Feris, Shimon Ullman, Leonid Karlinsky", "tldr": "", "abstract": "Vision and Language (VL) models offer an effective method for aligning representation spaces of images and text allowing for numerous applications such as cross-modal retrieval, visual and multi-hop question answering, captioning, and many more. However, the aligned image-text spaces learned by all the popular VL models are still suffering from the so-called 'object bias' - their representations behave as 'bags of nouns' mostly ignoring or downsizing the attributes, relations, and states of objects described/appearing in texts/images. Although some great attempts at fixing these `compositional reasoning' issues were proposed in the recent literature, the problem is still far from being solved. In this paper, we uncover two factors limiting the VL models' compositional reasoning performance. These two factors are properties of the paired VL dataset used for finetuning (or pre-training) the VL model: (i) the caption quality, or in other words 'image-alignment', of the texts; and (ii) the 'density' of the captions in the sense of mentioning all the details appearing on the image. We propose a fine-tuning approach for automatically treating these factors on a standard collection of paired VL data (CC3M). Applied to CLIP, we demonstrate its significant compositional reasoning performance increase of up to $\\sim27$\\% over the base model, up to $\\sim20$\\% over the strongest baseline, and by $6.7$\\% on average. Our code is provided in the Supplementary and would be released upon acceptance.", "keywords": "computer vision;deep learning;vision and language models", "primary_area": "", "supplementary_material": "/attachment/d3966094ad9cd2b47f12dcac7c7937832a1cd64f.zip", "author": "Sivan Doveh;Assaf Arbelle;Sivan Harary;Roei Herzig;Donghyun Kim;Paola Cascante-Bonilla;Amit Alfassy;Rameswar Panda;Raja Giryes;Rogerio Feris;Shimon Ullman;Leonid Karlinsky", "authorids": "~Sivan_Doveh1;~Assaf_Arbelle1;~Sivan_Harary1;~Roei_Herzig2;~Donghyun_Kim2;~Paola_Cascante-Bonilla1;~Amit_Alfassy1;~Rameswar_Panda1;~Raja_Giryes1;~Rogerio_Feris1;~Shimon_Ullman1;~Leonid_Karlinsky3", "gender": ";M;F;M;M;;M;M;M;M;M;M", "homepage": ";https://www.linkedin.com/in/assaf-arbelle-74065876/?originalSubdomain=il;;https://roeiherz.github.io/;https://cs-people.bu.edu/donhk;https://paolacascante.com/;;https://rpand002.github.io/;https://www.giryes.sites.tau.ac.il/;http://rogerioferis.com;http://www.weizmann.ac.il/math/shimon/;", "dblp": ";168/5494;126/7845;215/5165;;242/7369;236/5808;126/0986;50/7998;;93/2158;05/4463", "google_scholar": ";https://scholar.google.co.uk/citations?user=uU_V_PsAAAAJ;https://scholar.google.co.il/citations?hl=iw;https://scholar.google.co.il/citations?user=6Q-289IAAAAJ;https://scholar.google.co.kr/citations?user=UsqNPH4AAAAJ;4viWbgIAAAAJ;hVyhT-gAAAAJ;_ySuu6gAAAAJ;https://scholar.google.co.il/citations?user=9aQUYVQAAAAJ;xt3XLjcAAAAJ;XOfA8ckAAAAJ;https://scholar.google.co.il/citations?user=WbO7tjYAAAAJ", "orcid": ";0000-0001-6559-2316;;;;;;;0000-0002-2830-0297;;0000-0003-4331-298X;", "linkedin": "sivan-doveh-128849b1;assaf-arbelle-74065876/?originalSubdomain=il;;roei-herzig-7534615a/;;;amit-alfassy/;;raja-giryes-0818935/;;;", "or_profile": "~Sivan_Doveh1;~Assaf_Arbelle1;~Sivan_Harary1;~Roei_Herzig2;~Donghyun_Kim2;~Paola_Cascante-Bonilla1;~Amit_Alfassy1;~Rameswar_Panda1;~Raja_Giryes1;~Rogerio_Feris1;~Shimon_Ullman1;~Leonid_Karlinsky3", "aff": "Weizmann Institute of Science;International Business Machines;IBM;Tel Aviv University;MIT-IBM Watson AI Lab;;International Business Machines;MIT-IBM Watson AI Lab;Tel Aviv University;International Business Machines;Weizmann Institute of Science;International Business Machines", "aff_domain": "weizmann.ac.il;ibm.com;il.ibm.com;tau.ac.il;ibm.com;;ibm.com;ibm.com;tauex.tau.ac.il;ibm.com;weizmann.ac.il;ibm.com", "position": "PhD student;Researcher;Researcher;PhD student;Researcher;;Researcher;Research Scientist;Associate Professor;Research Manager;Emeritus;Principal Researcher", "bibtex": "@inproceedings{\ndoveh2023dense,\ntitle={Dense and Aligned Captions ({DAC}) Promote Compositional Reasoning in {VL} Models},\nauthor={Sivan Doveh and Assaf Arbelle and Sivan Harary and Roei Herzig and Donghyun Kim and Paola Cascante-Bonilla and Amit Alfassy and Rameswar Panda and Raja Giryes and Rogerio Feris and Shimon Ullman and Leonid Karlinsky},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ARrwf7Ev2T}\n}", "github": "", "project": "", "reviewers": "xcfP;bpWN;BYVP;Fh7T;93oA", "pdf_size": 8087280, "rating": "5;6;6;7;8", "confidence": "5;4;4;4;4", "soundness": "2;2;3;4;4", "novelty": "2;2;3;3;4", "presentation": "2;3;3;4;3", "wc_summary": "59;133;82;268;112", "wc_strengths": "38;86;216;63;79", "wc_weaknesses": "34;523;132;118;170", "wc_questions": "55;30;89;26;82", "wc_limitations": "1;1;5;6;43", "wc_review": "187;773;524;481;486", "wc_reply_reviewers": "86;396;0;19;28", "wc_reply_authors": "267;21;0;0;30", "reply_reviewers": "2;1;0;1;1", "reply_authors": "2;2;1;1;2", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 130.8, 73.10102598459204 ], "wc_strengths_avg": [ 96.4, 62.03418412456151 ], "wc_weaknesses_avg": [ 195.4, 169.72165448168363 ], "wc_questions_avg": [ 56.4, 25.85033848907979 ], "wc_limitations_avg": [ 11.2, 16.0299719276111 ], "wc_review_avg": [ 490.2, 186.0918053005022 ], "wc_reply_reviewers_avg": [ 105.8, 147.91132478617044 ], "wc_reply_authors_avg": [ 63.6, 102.37695053086901 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.6864064729836443, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16755673547330402861&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "weizmann.ac.il;ibm.com;il.ibm.com;tau.ac.il;ibm.com;;ibm.com;ibm.com;tauex.tau.ac.il;ibm.com;weizmann.ac.il;ibm.com", "author_num": 12, "aff_unique_index": "0;1;1;2;3;1;3;2;1;0;1", "aff_unique_norm": "Weizmann Institute of Science;International Business Machines Corporation;Tel Aviv University;Massachusetts Institute of Technology", "aff_unique_dep": ";;;IBM Watson AI Lab", "aff_unique_url": "https://www.weizmann.org.il;https://www.ibm.com;https://www.tau.ac.il;https://www.mitibmwatsonailab.org", "aff_unique_abbr": "Weizmann;IBM;TAU;MIT-IBM AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1;1;1;0;1;0;1", "aff_country_unique": "Israel;United States" }, { "title": "A Reduction-based Framework for Sequential Decision Making with Delayed Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72539", "id": "AT6NaLPwy0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/915125efea950af378435518b3542e6a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AT6NaLPwy0", "openreview": "https://openreview.net/forum?id=AT6NaLPwy0", "poster": "/media/PosterPDFs/NeurIPS%202023/72539.png?t=1701680319.2199445", "slides": "https://nips.cc/virtual/2023/poster/72539", "video": "https://nips.cc/virtual/2023/poster/72539", "author_site": "Yunchang Yang, Han Zhong, Tianhao Wu, Bin Liu, Liwei Wang, Simon Du", "tldr": "", "abstract": "We study stochastic delayed feedback in general single-agent and multi-agent sequential decision making, which includes bandits, single-agent Markov decision processes (MDPs), and Markov games (MGs). We propose a novel reduction-based framework, which turns any multi-batched algorithm for sequential decision making with instantaneous feedback into a sample-efficient algorithm that can handle stochastic delays in sequential decision making. By plugging different multi-batched algorithms into our framework, we provide several examples demonstrating that our framework not only matches or improves existing results for bandits, tabular MDPs, and tabular MGs, but also provides the first line of studies on delays in sequential decision making with function approximation. In summary, we provide a complete set of sharp results for single-agent and multi-agent sequential decision making with delayed feedback.", "keywords": "sequential decision making;delay;reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Yunchang Yang;Han Zhong;Tianhao Wu;Bin Liu;Liwei Wang;Simon Shaolei Du", "authorids": "~Yunchang_Yang2;~Han_Zhong1;~Tianhao_Wu1;~Bin_Liu9;~Liwei_Wang1;~Simon_Shaolei_Du1", "gender": "M;;M;M;M;M", "homepage": ";https://hanzhong-ml.github.io/;https://thwu1.github.io/tianhaowu/;https://www.researchgate.net/profile/Bin-Liu-120/;http://www.liweiwang-pku.com/;http://simonshaoleidu.com", "dblp": "249/8267;137/8096.html;;35/837-21;;176/5602", "google_scholar": "https://scholar.google.com.hk/citations?user=m8m9nD0AAAAJ;Bk5q_pAAAAAJ;df-THM0AAAAJ;UWCOeegAAAAJ;VZHxoh8AAAAJ;OttawxUAAAAJ", "orcid": ";;;0000-0002-1677-2772;;", "linkedin": ";;tianhao-wu-b069296/;robinlau/;;", "or_profile": "~Yunchang_Yang2;~Han_Zhong1;~Tianhao_Wu1;~Bin_Liu9;~Liwei_Wang1;~Simon_Shaolei_Du1", "aff": "Peking University;Peking University;University of California, Berkeley;Zhejiang Lab;Peking University;Meta Facebook", "aff_domain": "pku.edu.cn;stu.pku.edu.cn;berkeley.edu;zhejianglab.com;pku.edu.cn;fb.com", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor;Visiting Professor", "bibtex": "@inproceedings{\nyang2023a,\ntitle={A Reduction-based Framework for Sequential Decision Making with Delayed Feedback},\nauthor={Yunchang Yang and Han Zhong and Tianhao Wu and Bin Liu and Liwei Wang and Simon Shaolei Du},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AT6NaLPwy0}\n}", "github": "", "project": "", "reviewers": "nwcC;EWXd;bZKJ;p5oe", "pdf_size": 479206, "rating": "4;6;6;7", "confidence": "4;3;4;3", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "2;2;2;3", "wc_summary": "83;52;68;284", "wc_strengths": "97;109;71;130", "wc_weaknesses": "61;165;187;115", "wc_questions": "39;55;229;81", "wc_limitations": "105;15;7;10", "wc_review": "385;396;562;620", "wc_reply_reviewers": "35;25;224;56", "wc_reply_authors": "37;0;111;113", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 121.75, 94.31430167265196 ], "wc_strengths_avg": [ 101.75, 21.323402636539974 ], "wc_weaknesses_avg": [ 132.0, 48.590122453025366 ], "wc_questions_avg": [ 101.0, 75.40557008603542 ], "wc_limitations_avg": [ 34.25, 40.947374763225056 ], "wc_review_avg": [ 490.75, 102.39964599548183 ], "wc_reply_reviewers_avg": [ 85.0, 81.02777301641703 ], "wc_reply_authors_avg": [ 65.25, 48.55087537830806 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11511997633317219336&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;stu.pku.edu.cn;berkeley.edu;zhejianglab.com;pku.edu.cn;fb.com", "author_num": 6, "aff_unique_index": "0;0;1;2;0;3", "aff_unique_norm": "Peking University;University of California, Berkeley;Zhejiang Lab;Meta", "aff_unique_dep": ";;;Meta Platforms, Inc.", "aff_unique_url": "http://www.pku.edu.cn;https://www.berkeley.edu;http://www.zhejianglab.com;https://meta.com", "aff_unique_abbr": "Peking U;UC Berkeley;;Meta", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;1;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Enhancing User Intent Capture in Session-Based Recommendation with Attribute Patterns", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72538", "id": "AV3iZlDrzF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/621d0fd41c720ab252e178b77c200d90-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AV3iZlDrzF", "openreview": "https://openreview.net/forum?id=AV3iZlDrzF", "poster": "/media/PosterPDFs/NeurIPS%202023/72538.png?t=1701716303.8048391", "slides": "https://nips.cc/virtual/2023/poster/72538", "video": "https://nips.cc/virtual/2023/poster/72538", "author_site": "Xin Liu, Zheng Li, Yifan Gao, Jingfeng Yang, Jingfeng Yang, Tianyu Cao, Zhengyang Wang, Bing Yin, Yangqiu Song", "tldr": "", "abstract": "The goal of session-based recommendation in E-commerce is to predict the next item that an anonymous user will purchase based on the browsing and purchase history. However, constructing global or local transition graphs to supplement session data can lead to noisy correlations and user intent vanishing. In this work, we propose the Frequent Attribute Pattern Augmented Transformer (FAPAT) that characterizes user intents by building attribute transition graphs and matching attribute patterns. Specifically, the frequent and compact attribute patterns are served as memory to augment session representations, followed by a gate and a transformer block to fuse the whole session information. Through extensive experiments on two public benchmarks and 100 million industrial data in three domains, we demonstrate that FAPAT consistently outperforms state-of-the-art methods by an average of 4.5% across various evaluation metrics (Hits, NDCG, MRR). Besides evaluating the next-item prediction, we estimate the models' capabilities to capture user intents via predicting items' attributes and period-item recommendations.", "keywords": "session-based recommendation;representation learning;pattern mining", "primary_area": "", "supplementary_material": "/attachment/f7ac1afb560c563112625d745023d8452e1ce948.zip", "author": "Xin Liu;Zheng Li;Yifan Gao;Jingfeng Yang;Tianyu Cao;Zhengyang Wang;Bing Yin;Yangqiu Song", "authorids": "~Xin_Liu9;~Zheng_Li9;~Yifan_Gao1;~Jingfeng_Yang2;~Tianyu_Cao1;~Zhengyang_Wang1;~Bing_Yin1;~Yangqiu_Song1", "gender": "M;Not Specified;M;M;M;M;M;M", "homepage": "https://www.cse.ust.hk/~xliucr/;http://yifan-gao.github.io;https://jingfengyang.github.io/;;;;https://www.cse.ust.hk/~yqsong/;https://hsqmlzno1.github.io/", "dblp": "76/1820-39.html;79/3190-1;;65/7211;;;86/2159;10/1143-18", "google_scholar": "https://scholar.google.com.hk/citations?user=WvC4upQAAAAJ;https://scholar.google.com.hk/citations?user=erdMFJwAAAAJ;hysBvrwAAAAJ;kX0CcGUAAAAJ;A4fNBtEAAAAJ;qSOxydEAAAAJ;MdQZ-q8AAAAJ;https://scholar.google.com.hk/citations?user=P6fwn4AAAAAJ", "orcid": "0000-0001-9610-9526;;;;0000-0002-5146-2884;0000-0002-5890-0031;0000-0002-7818-6090;", "linkedin": "xin-liu-179830143;yi-fan-gao/;jingfeng-yang-797864172/;;;bingyin;yqsong/;", "or_profile": "~Xin_Liu9;~Yifan_Gao1;~Jingfeng_Yang2;~Tianyu_Cao1;~Zhengyang_Wang1;~Bing_Yin1;~Yangqiu_Song1;~zheng_li4", "aff": "Hong Kong University of Science and Technology;Amazon;Amazon;;Amazon;Amazon;Hong Kong University of Science and Technology;Amazon", "aff_domain": "ust.hk;amazon.com;amazon.com;;amazon.com;amazon.com;ust.hk;amazon.com", "position": "PhD student;Researcher;Researcher;;Researcher;Senior Science Manager;Associate Professor;Researcher", "bibtex": "@inproceedings{\nliu2023enhancing,\ntitle={Enhancing User Intent Capture in Session-Based Recommendation with Attribute Patterns},\nauthor={Xin Liu and Zheng Li and Yifan Gao and Jingfeng Yang and Tianyu Cao and Zhengyang Wang and Bing Yin and Yangqiu Song},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AV3iZlDrzF}\n}", "github": "", "project": "", "reviewers": "WQyi;2VLV;jZGH;BdTQ;LurS", "pdf_size": 1077715, "rating": "3;4;5;6;7", "confidence": "4;3;1;3;4", "soundness": "2;3;3;3;4", "novelty": "1;2;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "30;94;139;91;77", "wc_strengths": "24;27;3;64;79", "wc_weaknesses": "215;163;29;115;31", "wc_questions": "4;19;44;71;66", "wc_limitations": "1;30;1;23;8", "wc_review": "274;333;216;364;261", "wc_reply_reviewers": "516;0;20;18;0", "wc_reply_authors": "724;49;72;49;0", "reply_reviewers": "2;0;1;1;0", "reply_authors": "4;2;2;2;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 86.2, 34.97084499979948 ], "wc_strengths_avg": [ 39.4, 27.889783075527856 ], "wc_weaknesses_avg": [ 110.6, 73.01944946382436 ], "wc_questions_avg": [ 40.8, 26.026140705068048 ], "wc_limitations_avg": [ 12.6, 11.842297074469968 ], "wc_review_avg": [ 289.6, 52.72039453570127 ], "wc_reply_reviewers_avg": [ 110.8, 202.77909162435856 ], "wc_reply_authors_avg": [ 178.8, 273.6109646925722 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7614485710931951992&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ust.hk;amazon.com;amazon.com;;amazon.com;amazon.com;ust.hk;amazon.com", "author_num": 8, "aff_unique_index": "0;1;1;1;1;0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.ust.hk;https://www.amazon.com", "aff_unique_abbr": "HKUST;Amazon", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;1;1;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Every Parameter Matters: Ensuring the Convergence of Federated Learning with Dynamic Heterogeneous Models Reduction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72537", "id": "AWpWaub6nf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/526356453b7301c9b29aa0533f62bdef-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AWpWaub6nf", "openreview": "https://openreview.net/forum?id=AWpWaub6nf", "poster": "/media/PosterPDFs/NeurIPS%202023/72537.png?t=1701538691.417895", "slides": "https://nips.cc/virtual/2023/poster/72537", "video": "https://nips.cc/virtual/2023/poster/72537", "author_site": "Hanhan Zhou, Tian Lan, Guru Prasadh Venkataramani, Wenbo Ding", "tldr": "", "abstract": "Cross-device Federated Learning (FL) faces significant challenges where low-end clients that could potentially make unique contributions are excluded from training large models due to their resource bottlenecks. Recent research efforts have focused on model-heterogeneous FL, by extracting reduced-size models from the global model and applying them to local clients accordingly. Despite the empirical success, general theoretical guarantees of convergence on this method remain an open question. \nThis paper presents a unifying framework for heterogeneous FL algorithms with online model extraction and provides a general convergence analysis for the first time. \nIn particular, we prove that under certain sufficient conditions and for both IID and non-IID data, these algorithms converge to a stationary point of standard FL for general smooth cost functions. Moreover, we introduce the concept of minimum coverage index, together with model reduction noise, which will determine the convergence of heterogeneous federated learning, and therefore we advocate for a holistic approach that considers both factors to enhance the efficiency of heterogeneous federated learning.", "keywords": "Federated Learning;Optimization;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/8926c150378970b2fc850b01fe0ec2357152da68.pdf", "author": "Hanhan Zhou;Tian Lan;Guru Prasadh Venkataramani;Wenbo Ding", "authorids": "~Hanhan_Zhou1;~Tian_Lan4;~Guru_Prasadh_Venkataramani1;~Wenbo_Ding1", "gender": ";M;M;M", "homepage": "https://hanhanzhou.com/;https://www2.seas.gwu.edu/~tlan/;https://www2.seas.gwu.edu/~guruv/;http://ssr-group.net/", "dblp": "307/3083;;62/4049.html;", "google_scholar": "https://scholar.google.com/citations?hl=en;;;xo2FkgIAAAAJ", "orcid": ";;;", "linkedin": "hanhanzhou/;;;", "or_profile": "~Hanhan_Zhou1;~Tian_Lan4;~Guru_Prasadh_Venkataramani1;~Wenbo_Ding1", "aff": "George Washington University;George Washington University;George Washington University;Tsinghua Univeresity", "aff_domain": "gwu.edu;gwu.edu;gwu.edu;sz.tsinghua.edu.cn", "position": "PhD student;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhou2023every,\ntitle={Every Parameter Matters: Ensuring the Convergence of Federated Learning with Dynamic Heterogeneous Models Reduction},\nauthor={Hanhan Zhou and Tian Lan and Guru Prasadh Venkataramani and Wenbo Ding},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AWpWaub6nf}\n}", "github": "", "project": "", "reviewers": "mRuV;oYBg;9LDk;dR1n", "pdf_size": 538015, "rating": "5;5;6;7", "confidence": "5;2;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "85;62;83;79", "wc_strengths": "40;48;39;107", "wc_weaknesses": "140;48;17;44", "wc_questions": "5;194;329;37", "wc_limitations": "33;26;1;1", "wc_review": "303;378;469;268", "wc_reply_reviewers": "43;0;68;27", "wc_reply_authors": "84;84;56;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 77.25, 9.065732182234372 ], "wc_strengths_avg": [ 58.5, 28.217902119044926 ], "wc_weaknesses_avg": [ 62.25, 46.44553261617311 ], "wc_questions_avg": [ 141.25, 129.86988680983748 ], "wc_limitations_avg": [ 15.25, 14.463315664120728 ], "wc_review_avg": [ 354.5, 77.13138142157186 ], "wc_reply_reviewers_avg": [ 34.5, 24.70323865407125 ], "wc_reply_authors_avg": [ 56.0, 34.292856398964496 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2075143391598224, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16198285079046539057&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 8, "email": "gwu.edu;gwu.edu;gwu.edu;sz.tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "George Washington University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gwu.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "GWU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Two Heads are Better Than One: A Simple Exploration Framework for Efficient Multi-Agent Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72536", "id": "AYLlZMmUbo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3fa2d2b637122007845a2fbb7c21453b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AYLlZMmUbo", "openreview": "https://openreview.net/forum?id=AYLlZMmUbo", "poster": "/media/PosterPDFs/NeurIPS%202023/72536.png?t=1699083142.8678303", "slides": "https://nips.cc/virtual/2023/poster/72536", "video": "https://nips.cc/virtual/2023/poster/72536", "author_site": "Jiahui Li, Kun Kuang, Baoxiang Wang, Xingchen Li, Fei Wu, Jun Xiao, Long Chen", "tldr": "", "abstract": "Exploration strategy plays an important role in reinforcement learning, especially in sparse-reward tasks. In cooperative multi-agent reinforcement learning~(MARL), designing a suitable exploration strategy is much more challenging due to the large state space and the complex interaction among agents. Currently, mainstream exploration methods in MARL either contribute to exploring the unfamiliar states which are large and sparse, or measuring the interaction among agents with high computational costs. We found an interesting phenomenon that different kinds of exploration plays a different role in different MARL scenarios, and choosing a suitable one is often more effective than designing an exquisite algorithm. In this paper, we propose a exploration method that incorporate the \\underline{C}uri\\underline{O}sity-based and \\underline{IN}fluence-based exploration~(COIN) which is simple but effective in various situations. First, COIN measures the influence of each agent on the other agents based on mutual information theory and designs it as intrinsic rewards which are applied to each individual value function. Moreover, COIN computes the curiosity-based intrinsic rewards via prediction errors which are added to the extrinsic reward. For integrating the two kinds of intrinsic rewards, COIN utilizes a novel framework in which they complement each other and lead to a sufficient and effective exploration on cooperative MARL tasks. We perform extensive experiments on different challenging benchmarks, and results across different scenarios show the superiority of our method.", "keywords": "multi-agent reinforcement learning;influence-based exploration", "primary_area": "", "supplementary_material": "/attachment/0bd8474ad31e0c3289c041f3be83f7c5b4c6109b.zip", "author": "Jiahui Li;Kun Kuang;Baoxiang Wang;Xingchen Li;Fei Wu;Jun Xiao;Long Chen", "authorids": "~Jiahui_Li2;~Kun_Kuang1;~Baoxiang_Wang1;~Xingchen_Li1;~Fei_Wu2;~Jun_Xiao1;~Long_Chen8", "gender": ";M;;F;;M;M", "homepage": "https://vicg.netlify.app/;http://kunkuang.github.io;;;https://person.zju.edu.cn/wufei;;https://zjuchenlong.github.io/", "dblp": "153/2952-3;194/4245;;;84/3254-1;71/2308-1;64/5725-16", "google_scholar": ";https://scholar.google.com.hk/citations?user=FOsNiMQAAAAJ;;;XJLn4MYAAAAJ;fqOwFhQAAAAJ;https://scholar.google.com.sg/citations?user=-gtmMpIAAAAJ", "orcid": ";0009-0000-7528-8131;;0000-0002-8204-1302;;;0000-0001-6148-9709", "linkedin": ";;;;;;", "or_profile": "~Jiahui_Li2;~Kun_Kuang1;~Baoxiang_Wang1;~Xingchen_Li1;~Fei_Wu2;~Jun_Xiao1;~Long_Chen8", "aff": "Zhejiang University;Zhejiang University;;Zhejiang University;Zhejiang University;Zhejiang University;Columbia University", "aff_domain": "zju.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn;columbia.edu", "position": "PhD student;Associate Professor;;PhD student;Full Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nli2023two,\ntitle={Two Heads are Better Than One: A Simple Exploration Framework for Efficient Multi-Agent Reinforcement Learning},\nauthor={Jiahui Li and Kun Kuang and Baoxiang Wang and Xingchen Li and Fei Wu and Jun Xiao and Long Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AYLlZMmUbo}\n}", "github": "", "project": "", "reviewers": "DahA;dhms;a6y3;PR9o", "pdf_size": 1240864, "rating": "4;6;6;6", "confidence": "4;3;4;4", "soundness": "3;3;2;2", "novelty": "2;3;2;2", "presentation": "2;3;3;1", "wc_summary": "87;60;30;84", "wc_strengths": "55;59;60;73", "wc_weaknesses": "119;44;161;169", "wc_questions": "96;149;359;24", "wc_limitations": "26;10;3;19", "wc_review": "383;322;613;369", "wc_reply_reviewers": "129;55;112;14", "wc_reply_authors": "225;26;23;18", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 65.25, 22.884219453588535 ], "wc_strengths_avg": [ 61.75, 6.7592529172978875 ], "wc_weaknesses_avg": [ 123.25, 49.53975676161521 ], "wc_questions_avg": [ 157.0, 124.77780251310728 ], "wc_limitations_avg": [ 14.5, 8.73212459828649 ], "wc_review_avg": [ 421.75, 112.70619991819439 ], "wc_reply_reviewers_avg": [ 77.5, 45.77390086064329 ], "wc_reply_authors_avg": [ 73.0, 87.80375846169684 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4942405826751885564&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn;columbia.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Zhejiang University;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.columbia.edu", "aff_unique_abbr": "ZJU;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "A Unified Solution for Privacy and Communication Efficiency in Vertical Federated Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72535", "id": "AYiRHZirD2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2b5af479527167d4af78847a9b9b645f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AYiRHZirD2", "openreview": "https://openreview.net/forum?id=AYiRHZirD2", "poster": "/media/PosterPDFs/NeurIPS%202023/72535.png?t=1700895091.7247872", "slides": "https://nips.cc/virtual/2023/poster/72535", "video": "https://nips.cc/virtual/2023/poster/72535", "author_site": "Ganyu Wang, Bin Gu, Qingsong Zhang, Xiang Li, Boyu Wang, Charles Ling", "tldr": "", "abstract": "Vertical Federated Learning (VFL) is a collaborative machine learning paradigm that enables multiple participants to jointly train a model on their private data without sharing it.\nTo make VFL practical, privacy security and communication efficiency should both be satisfied. Recent research has shown that Zero-Order Optimization (ZOO) in VFL can effectively conceal the internal information of the model without adding costly privacy protective add-ons, making it a promising approach for privacy and efficiency.\nHowever, there are still two key problems that have yet to be resolved. First, the convergence rate of ZOO-based VFL is significantly slower compared to gradient-based VFL, resulting in low efficiency in model training and more communication round, which hinders its application on large neural networks. Second, although ZOO-based VFL has demonstrated resistance to state-of-the-art (SOTA) attacks, its privacy guarantee lacks a theoretical explanation.\nTo address these challenges, we propose a novel cascaded hybrid optimization approach that employs a zeroth-order (ZO) gradient on the most critical output layer of the clients, with other parts utilizing the first-order (FO) gradient. This approach preserves the privacy protection of ZOO while significantly enhancing convergence.\nMoreover, we theoretically prove that applying ZOO to the VFL is equivalent to adding Gaussian Mechanism to the gradient information, which offers an implicit differential privacy guarantee. \nExperimental results demonstrate that our proposed framework achieves similar utility as the Gaussian mechanism under the same privacy budget, while also having significantly lower communication costs compared with SOTA communication-efficient VFL frameworks.", "keywords": "Vertical Federated Learning;Zeroth Order Optimization;Communication Efficiency;Privacy", "primary_area": "", "supplementary_material": "/attachment/e7968a17a93b07cadf83be03658a2ff159465ebc.pdf", "author": "Ganyu Wang;Bin Gu;Qingsong Zhang;Xiang Li;Boyu Wang;Charles Ling", "authorids": "~Ganyu_Wang1;~Bin_Gu1;~Qingsong_Zhang1;~Xiang_Li1;~Boyu_Wang3;~Charles_Ling1", "gender": "M;M;M;M;M;M", "homepage": "https://ganyuwang.github.io/;https://mbzuai.ac.ae/study/faculty/bin-gu/;https://qingsongzhang.github.io;https://www.linkedin.com/in/xiang-li-2703005a/;https://sites.google.com/site/borriewang/;http://cling.csd.uwo.ca/", "dblp": "248/2835;29/1758-1;https://dblp.uni-trier.de/pid/221/6245;40/1491-12;41/6565-4.html;", "google_scholar": "eDy2ni4AAAAJ;Vo8OgCgAAAAJ;https://scholar.google.com.tw/citations?user=CkopTf8AAAAJ;;qAZM5KcAAAAJ;https://scholar.google.co.uk/citations?hl=en", "orcid": "0000-0002-0573-1887;0000-0001-6049-1815;;;0000-0002-7413-4162;", "linkedin": "ganyu-wang/;;;;;", "or_profile": "~Ganyu_Wang1;~Bin_Gu1;~Qingsong_Zhang1;~Xiang_Li1;~Boyu_Wang3;~Charles_Ling1", "aff": "Western University;Mohamed bin Zayed University of Artificial Intelligence;Xidian University;;University of Western Ontario;Western University", "aff_domain": "uwo.ca;mbzuai.ac.ae;xidian.edu;;uwo.ca;uwo.ca", "position": "PhD student;Assistant Professor;PhD student;;Assistant Professor;Professor", "bibtex": "@inproceedings{\nwang2023a,\ntitle={A Unified Solution for Privacy and Communication Efficiency in Vertical Federated Learning},\nauthor={Ganyu Wang and Bin Gu and Qingsong Zhang and Xiang Li and Boyu Wang and Charles Ling},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AYiRHZirD2}\n}", "github": "", "project": "", "reviewers": "WL8R;wequ;uLCn;Mz1s;1JdL", "pdf_size": 1437092, "rating": "5;5;6;6;8", "confidence": "3;3;4;3;5", "soundness": "3;3;3;3;3", "novelty": "3;2;2;3;3", "presentation": "3;3;2;2;3", "wc_summary": "42;31;77;88;85", "wc_strengths": "41;35;64;71;153", "wc_weaknesses": "45;135;180;72;124", "wc_questions": "85;29;83;34;72", "wc_limitations": "48;23;21;20;5", "wc_review": "261;253;425;285;439", "wc_reply_reviewers": "22;16;68;11;94", "wc_reply_authors": "210;279;443;25;272", "reply_reviewers": "1;1;2;1;2", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 64.6, 23.482759633399137 ], "wc_strengths_avg": [ 72.8, 42.315009157508165 ], "wc_weaknesses_avg": [ 111.2, 47.71331051184774 ], "wc_questions_avg": [ 60.6, 24.220652344641753 ], "wc_limitations_avg": [ 23.4, 13.865064009949611 ], "wc_review_avg": [ 332.6, 81.95999023914047 ], "wc_reply_reviewers_avg": [ 42.2, 32.91443452347313 ], "wc_reply_authors_avg": [ 245.8, 134.69283574117816 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.912870929175277, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7285938497531916309&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "uwo.ca;mbzuai.ac.ae;xidian.edu;;uwo.ca;uwo.ca", "author_num": 6, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Western University;Mohamed bin Zayed University of Artificial Intelligence;Xidian University;University of Western Ontario", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uwo.ca;https://mbzuai.ac.ae;http://www.xidian.edu.cn/;https://www.uwo.ca", "aff_unique_abbr": "Western;MBZUAI;Xidian;UWO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "Canada;United Arab Emirates;China" }, { "title": "No-Regret Online Prediction with Strategic Experts", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72534", "id": "AesN5bYnJr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ab9f9cfe97da3665e08f50ade9f8c4d6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AesN5bYnJr", "openreview": "https://openreview.net/forum?id=AesN5bYnJr", "poster": "/media/PosterPDFs/NeurIPS%202023/72534.png?t=1701660752.9968202", "slides": "https://nips.cc/virtual/2023/poster/72534", "video": "https://nips.cc/virtual/2023/poster/72534", "author_site": "Omid Sadeghi, Maryam Fazel", "tldr": "", "abstract": "We study a generalization of the online binary prediction with expert advice framework where at each round, the learner is allowed to pick $m\\geq 1$ experts from a pool of $K$ experts and the overall utility is a modular or submodular function of the chosen experts. We focus on the setting in which experts act strategically and aim to maximize their influence on the algorithm's predictions by potentially misreporting their beliefs about the events. Among others, this setting finds applications in forecasting competitions where the learner seeks not only to make predictions by aggregating different forecasters but also to rank them according to their relative performance. Our goal is to design algorithms that satisfy the following two requirements: 1) \\emph{Incentive-compatible}: Incentivize the experts to report their beliefs truthfully, and 2) \\emph{No-regret}: Achieve sublinear regret with respect to the true beliefs of the best fixed set of $m$ experts in hindsight. Prior works have studied this framework when $m=1$ and provided incentive-compatible no-regret algorithms for the problem. We first show that a simple reduction of our problem to the $m=1$ setting is neither efficient nor effective. Then, we provide algorithms that utilize the specific structure of the utility functions to achieve the two desired goals.", "keywords": "incentive-compatible;online prediction with expert advice;forecasting", "primary_area": "", "supplementary_material": "/attachment/27dedd490addb19b167fbf62911723f739b37584.zip", "author": "Omid Sadeghi;Maryam Fazel", "authorids": "~Omid_Sadeghi1;~Maryam_Fazel1", "gender": "M;F", "homepage": "http://students.washington.edu/omids/;", "dblp": "145/7996;10/2309", "google_scholar": "BxSmIeQAAAAJ;vlN_kRoAAAAJ", "orcid": ";", "linkedin": "sadeghiomid/;", "or_profile": "~Omid_Sadeghi1;~Maryam_Fazel1", "aff": "University of Washington, Seattle;University of Washington, Seattle", "aff_domain": "uw.edu;uw.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nsadeghi2023noregret,\ntitle={No-Regret Online Prediction with Strategic Experts},\nauthor={Omid Sadeghi and Maryam Fazel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AesN5bYnJr}\n}", "github": "", "project": "", "reviewers": "oxKD;Aodd;xXJd;9kMc", "pdf_size": 1424564, "rating": "6;6;6;6", "confidence": "3;3;3;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "4;4;3;3", "wc_summary": "361;209;173;272", "wc_strengths": "143;46;49;104", "wc_weaknesses": "184;16;155;59", "wc_questions": "20;55;404;1", "wc_limitations": "10;1;1;2", "wc_review": "718;327;782;438", "wc_reply_reviewers": "125;47;27;54", "wc_reply_authors": "333;33;23;41", "reply_reviewers": "3;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 253.75, 71.34204580750401 ], "wc_strengths_avg": [ 85.5, 40.4382244911915 ], "wc_weaknesses_avg": [ 103.5, 68.5 ], "wc_questions_avg": [ 120.0, 165.10754071210678 ], "wc_limitations_avg": [ 3.5, 3.774917217635375 ], "wc_review_avg": [ 566.25, 189.25165124775 ], "wc_reply_reviewers_avg": [ 63.25, 37.002533697032156 ], "wc_reply_authors_avg": [ 107.5, 130.3485711467525 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9178169295054424930&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "uw.edu;uw.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "To Repeat or Not To Repeat: Insights from Scaling LLM under Token-Crisis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72533", "id": "Af5GvIj3T5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b9e472cd579c83e2f6aa3459f46aac28-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Af5GvIj3T5", "openreview": "https://openreview.net/forum?id=Af5GvIj3T5", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72533", "video": "https://nips.cc/virtual/2023/poster/72533", "author_site": "Fuzhao Xue, Yao Fu, Wangchunshu Zhou, Zangwei Zheng, Zangwei Zheng, Yang You", "tldr": "", "abstract": "Recent research has highlighted the importance of dataset size in scaling language models. However, large language models (LLMs) are notoriously token-hungry during pre-training, and high-quality text data on the web is likely to be approaching its scaling limit for LLMs. To further enhance LLMs, a straightforward approach is to repeat the pre-training data for additional epochs. In this study, we empirically investigate three key aspects under this approach. First, we explore the consequences of repeating pre-training data, revealing that the model is susceptible to overfitting, leading to multi-epoch degradation. Second, we examine the key factors contributing to multi-epoch degradation, finding that significant factors include dataset size, model parameters, and training objectives, while less influential factors consist of dataset quality and model FLOPs. Finally, we explore whether widely used regularization can alleviate multi-epoch degradation. Most regularization techniques do not yield significant improvements, except for dropout, which demonstrates remarkable effectiveness but requires careful tuning when scaling up the model size. Additionally, we discover that leveraging mixture-of-experts (MoE) enables cost-effective and efficient hyper-parameter tuning for computationally intensive dense LLMs with comparable trainable parameters, potentially impacting efficient LLM development on a broader scale.", "keywords": "Large Language Model;Transformer Scaling;Foundation Model Pre-training", "primary_area": "", "supplementary_material": "/attachment/b86934bcd0aa02664a82b0fbdaa98dae34a98495.zip", "author": "Fuzhao Xue;Yao Fu;Wangchunshu Zhou;Zangwei Zheng;Yang You", "authorids": "~Fuzhao_Xue1;~Yao_Fu3;~Wangchunshu_Zhou1;~Zangwei_Zheng1;~Yang_You1", "gender": "M;M;M;M;M", "homepage": "https://xuefuzhao.github.io/;https://franxyao.github.io/;https://michaelzhouwang.github.io;https://zhengzangw.github.io;https://www.comp.nus.edu.sg/~youy/", "dblp": "248/1245;;245/8640.html;289/0376;33/8167-1.html", "google_scholar": "JMHsqIkAAAAJ;liSP4cEAAAAJ;UebIjuQAAAAJ;FTqutJEAAAAJ;jF4dPZwAAAAJ", "orcid": ";;;0000-0002-1505-1535;", "linkedin": "fuzhao-xue-6410561a6/;;;;yang-you-0b92914b/", "or_profile": "~Fuzhao_Xue1;~Yao_Fu3;~Wangchunshu_Zhou1;~Zangwei_Zheng1;~Yang_You1", "aff": "National University of Singapore;University of Edinburgh;Department of Computer Science, ETHZ - ETH Zurich;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;ed.ac.uk;inf.ethz.ch;nus.edu.sg;nus.edu.sg", "position": "PhD student;PhD student;PhD student;PhD student;Professor", "bibtex": "@inproceedings{\nxue2023to,\ntitle={To Repeat or Not To Repeat: Insights from Scaling {LLM} under Token-Crisis},\nauthor={Fuzhao Xue and Yao Fu and Wangchunshu Zhou and Zangwei Zheng and Yang You},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Af5GvIj3T5}\n}", "github": "", "project": "", "reviewers": "vyo8;SEVe;kBHt;2hBw;m3sH", "pdf_size": 579023, "rating": "3;4;6;7;8", "confidence": "4;4;3;4;4", "soundness": "2;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "71;55;85;107;96", "wc_strengths": "11;39;43;34;161", "wc_weaknesses": "49;125;193;164;87", "wc_questions": "12;20;65;248;4", "wc_limitations": "1;4;13;4;1", "wc_review": "144;243;399;557;349", "wc_reply_reviewers": "0;53;151;508;0", "wc_reply_authors": "0;343;160;470;0", "reply_reviewers": "0;1;1;3;0", "reply_authors": "1;2;2;3;1", "rating_avg": [ 5.6, 1.8547236990991407 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.8, 18.312837027615355 ], "wc_strengths_avg": [ 57.6, 52.87570330501524 ], "wc_weaknesses_avg": [ 123.6, 51.6821052202791 ], "wc_questions_avg": [ 69.8, 91.5737953783723 ], "wc_limitations_avg": [ 4.6, 4.409081537009721 ], "wc_review_avg": [ 338.4, 140.33046711245566 ], "wc_reply_reviewers_avg": [ 142.4, 190.9372671848008 ], "wc_reply_authors_avg": [ 194.6, 186.9776457226906 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.10783277320343845, "gs_citation": 81, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15041293014894465994&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nus.edu.sg;ed.ac.uk;inf.ethz.ch;nus.edu.sg;nus.edu.sg", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "National University of Singapore;University of Edinburgh;ETH Zurich", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": "https://www.nus.edu.sg;https://www.ed.ac.uk;https://www.ethz.ch", "aff_unique_abbr": "NUS;Edinburgh;ETHZ", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "Singapore;United Kingdom;Switzerland" }, { "title": "Multi-task Representation Learning for Pure Exploration in Bilinear Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72532", "id": "AfC8PVQZ9z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/95464e2e49103dc560091ed2c64a5b12-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AfC8PVQZ9z", "openreview": "https://openreview.net/forum?id=AfC8PVQZ9z", "poster": "/media/PosterPDFs/NeurIPS%202023/72532.png?t=1702018438.525468", "slides": "https://nips.cc/virtual/2023/poster/72532", "video": "https://nips.cc/virtual/2023/poster/72532", "author_site": "Subhojyoti Mukherjee, Qiaomin Xie, Josiah Hanna, Robert Nowak", "tldr": "", "abstract": "We study multi-task representation learning for the problem of pure exploration in bilinear bandits. In bilinear bandits, an action takes the\nform of a pair of arms from two different entity types and the reward is a bilinear function of the known feature vectors of the arms. In the \\textit{multi-task bilinear bandit problem}, we aim to find optimal actions for multiple tasks that share a common low-dimensional linear representation. The objective is to leverage this characteristic to expedite the process of identifying the best pair of arms for all tasks. We propose the algorithm GOBLIN that uses an experimental design approach to optimize sample allocations for learning the global representation as well as minimize the number of samples needed to identify the optimal pair of arms in individual tasks. To the best of our knowledge, this is the first study to give sample complexity analysis for pure exploration in bilinear bandits with shared representation. Our results demonstrate that by learning the shared representation across tasks, we achieve significantly improved sample complexity compared to the traditional approach of solving tasks independently.", "keywords": "Linear Bandits;Experimental design;Pure Exploration;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/58d4bf1698b42e3d85bdc792c5156d65dfedbdb2.zip", "author": "Subhojyoti Mukherjee;Qiaomin Xie;Josiah P. Hanna;Robert D Nowak", "authorids": "~Subhojyoti_Mukherjee1;~Qiaomin_Xie1;~Josiah_P._Hanna1;~Robert_D_Nowak1", "gender": "M;F;M;M", "homepage": "https://subhojyoti.github.io/;https://qiaominxie.github.io/;http://nowak.ece.wisc.edu;https://pages.cs.wisc.edu/~jphanna/", "dblp": "199/2032;37/10269;n/RobertDNowak;135/6336", "google_scholar": ";RVNcy4EAAAAJ;fn13u8IAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Subhojyoti_Mukherjee1;~Qiaomin_Xie1;~Robert_D_Nowak1;~Josiah_Hanna2", "aff": "University of Wisconsin, Madison;University of Wisconsin - Madison;University of Wisconsin - Madison;University of Wisconsin - Madison", "aff_domain": "wisc.edu;wisc.edu;;wisc.edu", "position": "PhD student;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nmukherjee2023multitask,\ntitle={Multi-task Representation Learning for Pure Exploration in Bilinear Bandits},\nauthor={Subhojyoti Mukherjee and Qiaomin Xie and Josiah P. Hanna and Robert D Nowak},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AfC8PVQZ9z}\n}", "github": "", "project": "", "reviewers": "vf5o;9og1;8J9B", "pdf_size": 630083, "rating": "6;6;6", "confidence": "3;4;3", "soundness": "3;4;4", "novelty": "3;2;4", "presentation": "3;2;4", "wc_summary": "80;105;121", "wc_strengths": "18;98;48", "wc_weaknesses": "102;311;30", "wc_questions": "5;168;80", "wc_limitations": "5;1;1", "wc_review": "210;683;280", "wc_reply_reviewers": "19;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 102.0, 16.87206764645835 ], "wc_strengths_avg": [ 54.666666666666664, 32.99831645537222 ], "wc_weaknesses_avg": [ 147.66666666666666, 119.17587377019264 ], "wc_questions_avg": [ 84.33333333333333, 66.61497996363548 ], "wc_limitations_avg": [ 2.3333333333333335, 1.8856180831641267 ], "wc_review_avg": [ 391.0, 208.44343757160277 ], "wc_reply_reviewers_avg": [ 6.333333333333333, 8.956685895029603 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8649073860979585403&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "wisc.edu;wisc.edu;;wisc.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Wisconsin;University of Wisconsin-Madison", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "UW;UW-Madison", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Optimal Excess Risk Bounds for Empirical Risk Minimization on $p$-Norm Linear Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72531", "id": "Ah2Q8mLH96", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4cca5640267b416cef4f00630aef93a2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ah2Q8mLH96", "openreview": "https://openreview.net/forum?id=Ah2Q8mLH96", "poster": "/media/PosterPDFs/NeurIPS%202023/72531.png?t=1701987238.2111208", "slides": "https://nips.cc/virtual/2023/poster/72531", "video": "https://nips.cc/virtual/2023/poster/72531", "author_site": "Ayoub El Hanchi, Murat Erdogdu", "tldr": "", "abstract": "We study the performance of empirical risk minimization on the $p$-norm linear regression problem for $p \\in (1, \\infty)$. We show that, in the realizable case, under no moment assumptions, and up to a distribution-dependent constant, $O(d)$ samples are enough to exactly recover the target. Otherwise, for $p \\in [2, \\infty)$, and under weak moment assumptions on the target and the covariates, we prove a high probability excess risk bound on the empirical risk minimizer whose leading term matches, up to a constant that depends only on $p$, the asymptotically exact rate. We extend this result to the case $p \\in (1, 2)$ under mild assumptions that guarantee the existence of the Hessian of the risk at its minimizer.", "keywords": "Excess risk bounds;Linear regression;Lp-norm;Fast rates", "primary_area": "", "supplementary_material": "/attachment/885130a0d9c55261829ccbcc868df992b664ed51.pdf", "author": "Ayoub El Hanchi;Murat A Erdogdu", "authorids": "~Ayoub_El_Hanchi1;~Murat_A_Erdogdu1", "gender": "M;M", "homepage": "https://www.cs.toronto.edu/~aelhan/;http://www.cs.toronto.edu/~erdogdu/", "dblp": ";139/1292", "google_scholar": "5ZzcGmgAAAAJ;Lqc4cdAAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Ayoub_El_Hanchi1;~Murat_A_Erdogdu1", "aff": "University of Toronto;Vector Institute", "aff_domain": "toronto.edu;vectorinstitute.ai", "position": "PhD student;Faculty", "bibtex": "@inproceedings{\nhanchi2023optimal,\ntitle={Optimal Excess Risk Bounds for Empirical Risk Minimization on \\$p\\$-Norm Linear Regression},\nauthor={Ayoub El Hanchi and Murat A Erdogdu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ah2Q8mLH96}\n}", "github": "", "project": "", "reviewers": "81uA;YnMx;KiJP;Ydss;1SnJ", "pdf_size": 355396, "rating": "5;6;6;7;7", "confidence": "4;3;2;3;4", "soundness": "2;4;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "87;104;95;253;76", "wc_strengths": "45;38;45;52;55", "wc_weaknesses": "124;145;34;97;159", "wc_questions": "1;160;6;11;176", "wc_limitations": "1;1;13;1;15", "wc_review": "258;448;193;414;481", "wc_reply_reviewers": "15;415;0;0;280", "wc_reply_authors": "0;1193;0;0;455", "reply_reviewers": "1;2;0;0;2", "reply_authors": "1;4;1;1;3", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 123.0, 65.6505902486794 ], "wc_strengths_avg": [ 47.0, 5.966573556070519 ], "wc_weaknesses_avg": [ 111.8, 44.160615937733475 ], "wc_questions_avg": [ 70.8, 79.58743619441451 ], "wc_limitations_avg": [ 6.2, 6.4 ], "wc_review_avg": [ 358.8, 112.77127293774775 ], "wc_reply_reviewers_avg": [ 142.0, 173.2224003990246 ], "wc_reply_authors_avg": [ 329.6, 466.28171741984477 ], "reply_reviewers_avg": [ 1.0, 0.8944271909999159 ], "reply_authors_avg": [ 2.0, 1.2649110640673518 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.0714285714285715, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2077475615107329894&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "toronto.edu;vectorinstitute.ai", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Toronto;Vector Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.utoronto.ca;https://vectorinstitute.ai/", "aff_unique_abbr": "U of T;Vector Institute", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "Ai40Gvt2wj", "title": "Rethinking SO(3)-equivariance with Bilinear Tensor Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Many datasets in scientific and engineering applications are comprised of objects which have\nspecific geometric structure. A common example is data which inhabits a representation of the\ngroup SO(3) of 3D rotations: scalars, vectors, tensors, etc. One way for a neural network to\nexploit prior knowledge of this structure is to enforce SO(3)-equivariance throughout its layers, and\nseveral such architectures have been proposed. While general methods for handling arbitrary SO(3)\nrepresentations exist, they computationally intensive and complicated to implement. We show that\nby judicious symmetry breaking, we can efficiently increase the expressiveness of a network operating\nonly on vector and order-2 tensor representations of SO(2). We demonstrate the method on an\nimportant problem from High Energy Physics known as b-tagging, where particle jets originating\nfrom b-meson decays must be discriminated from an overwhelming QCD background. In this task,\nwe find that augmenting a standard architecture with our method results in a 2.3\u00d7 improvement in\nrejection score.", "keywords": "equivariance;so(3) symmetry;tensor data;physics", "primary_area": "", "supplementary_material": "", "author": "Chase Shimmin;Zhelun Li;Ema Catalina Smith", "authorids": "~Chase_Shimmin1;~Zhelun_Li2;~Ema_Catalina_Smith1", "gender": "M;M;F", "homepage": "https://chase.sh;;http://www.linkedin.com/in/ema-smith-410740276", "dblp": ";;", "google_scholar": ";;", "orcid": "0000-0002-2228-2251;;", "linkedin": ";zhelun-li-2b989515b/;", "or_profile": "~Chase_Shimmin1;~Zhelun_Li2;~Ema_Catalina_Smith1", "aff": "Yale University;;Cornell University", "aff_domain": "yale.edu;;cornell.edu", "position": "Postdoc;;PhD student", "bibtex": "@misc{\nshimmin2023rethinking,\ntitle={Rethinking {SO}(3)-equivariance with Bilinear Tensor Networks},\nauthor={Chase Shimmin and Zhelun Li and Ema Catalina Smith},\nyear={2023},\nurl={https://openreview.net/forum?id=Ai40Gvt2wj}\n}", "github": "", "project": "", "reviewers": "er1i;NLGv;gJAE;p1Sx;ybFn", "site": "https://openreview.net/forum?id=Ai40Gvt2wj", "pdf_size": 328556, "rating": "4;4;5;6;6", "confidence": "3;4;2;5;3", "soundness": "2;2;3;2;3", "novelty": "1;2;2;3;2", "presentation": "2;3;3;3;2", "wc_summary": "63;82;57;387;79", "wc_strengths": "63;17;46;82;61", "wc_weaknesses": "359;376;82;824;133", "wc_questions": "313;182;367;157;43", "wc_limitations": "20;51;1;55;7", "wc_review": "818;708;553;1505;323", "wc_reply_reviewers": "216;383;55;31;0", "wc_reply_authors": "0;122;808;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.0, 0.8944271909999159 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 133.6, 127.04896693794878 ], "wc_strengths_avg": [ 53.8, 21.664717861075413 ], "wc_weaknesses_avg": [ 354.8, 262.3855178930423 ], "wc_questions_avg": [ 212.4, 115.50861439736866 ], "wc_limitations_avg": [ 26.8, 22.292599668948437 ], "wc_review_avg": [ 781.4, 398.1510266218084 ], "wc_reply_reviewers_avg": [ 137.0, 143.86521469764676 ], "wc_reply_authors_avg": [ 186.0, 314.56891136919427 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.2192645048267573, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HwoOkdsbjGoJ:scholar.google.com/&scioq=Rethinking+SO(3)-equivariance+with+Bilinear+Tensor+Networks&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Yale University;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.yale.edu;https://www.cornell.edu", "aff_unique_abbr": "Yale;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Deep Instance Generative Framework for MILP Solvers Under Limited Data Availability", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72530", "id": "AiEipk1X0c", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5297e56ac65ba2bfa70ee9fc4818c042-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AiEipk1X0c", "openreview": "https://openreview.net/forum?id=AiEipk1X0c", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72530", "video": "https://nips.cc/virtual/2023/poster/72530", "author_site": "Zijie Geng, Xijun Li, Jie Wang, Xiao Li, Yongdong Zhang, Feng Wu", "tldr": "", "abstract": "In the past few years, there has been an explosive surge in the use of machine learning (ML) techniques to address combinatorial optimization (CO) problems, especially mixed-integer linear programs (MILPs). Despite the achievements, the limited availability of real-world instances often leads to sub-optimal decisions and biased solver assessments, which motivates a suite of synthetic MILP instance generation techniques. However, existing methods either rely heavily on expert-designed formulations or struggle to capture the rich features of real-world instances. To tackle this problem, we propose G2MILP, *the first* deep generative framework for MILP instances. Specifically, G2MILP represents MILP instances as bipartite graphs, and applies a masked variational autoencoder to iteratively corrupt and replace parts of the original graphs to generate new ones. The appealing feature of G2MILP is that it can learn to generate novel and realistic MILP instances without prior expert-designed formulations, while preserving the structures and computational hardness of real-world datasets, simultaneously. Thus the generated instances can facilitate downstream tasks for enhancing MILP solvers under limited data availability. We design a suite of benchmarks to evaluate the quality of the generated MILP instances. Experiments demonstrate that our method can produce instances that closely resemble real-world datasets in terms of both structures and computational hardness. The deliverables are released at [https://miralab-ustc.github.io/L2O-G2MILP](https://miralab-ustc.github.io/L2O-G2MILP).", "keywords": "Learning to Optimize;Machine Learning for Combinatorial Optimization;Mixed-Integer Linear Programming;Graph Generation", "primary_area": "", "supplementary_material": "/attachment/085d49bd34c6f1a30c99d5ca3e5cb090ad09b44f.pdf", "author": "Zijie Geng;Xijun Li;Jie Wang;Xiao Li;Yongdong Zhang;Feng Wu", "authorids": "~Zijie_Geng1;~Xijun_Li1;~Jie_Wang1;~Xiao_Li19;~Yongdong_Zhang2;~Feng_Wu1", "gender": "M;M;M;F;M;M", "homepage": "https://miralab.ai/people/zijie-geng/;https://xijunlee.github.io/;http://staff.ustc.edu.cn/~jwangx;https://github.com/Deborah-x;https://imcc.ustc.edu.cn/_upload/tpl/0d/13/3347/template3347/zhangyongdong.html;", "dblp": "320/7568;203/0784;29/5259-5;;z/YongdongZhang;25/3972-1", "google_scholar": "https://scholar.google.com.hk/citations?user=Ga66HL4AAAAJ;QXU_QbMAAAAJ;OugG4dUAAAAJ;;https://scholar.google.com.hk/citations?user=hxGs4ukAAAAJ;5bInRDEAAAAJ", "orcid": ";0000-0002-9013-1180;;;0000-0003-0066-3448;", "linkedin": ";;;;;", "or_profile": "~Zijie_Geng1;~Xijun_Li1;~Jie_Wang1;~Xiao_Li19;~Yongdong_Zhang2;~Feng_Wu1", "aff": "University of Science and Technology of China;Huawei Technologies Ltd.;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "mail.ustc.edu.cn;huawei.com;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "MS student;Researcher;Full Professor;Undergrad student;Full Professor;Full Professor", "bibtex": "@inproceedings{\ngeng2023a,\ntitle={A Deep Instance Generative Framework for {MILP} Solvers Under Limited Data Availability},\nauthor={Zijie Geng and Xijun Li and Jie Wang and Xiao Li and Yongdong Zhang and Feng Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AiEipk1X0c}\n}", "github": "", "project": "", "reviewers": "sAxU;K4a3;GoAh;sm9N;Vxmz;qq1T", "pdf_size": 711239, "rating": "5;5;6;7;7;7", "confidence": "3;3;4;3;4;5", "soundness": "2;3;3;3;3;3", "novelty": "2;2;3;3;3;4", "presentation": "2;2;3;3;4;4", "wc_summary": "99;79;97;146;64;179", "wc_strengths": "67;46;58;42;72;76", "wc_weaknesses": "489;154;1;25;168;37", "wc_questions": "89;140;64;29;34;1", "wc_limitations": "7;25;18;1;6;1", "wc_review": "751;444;238;243;344;294", "wc_reply_reviewers": "73;523;0;37;57;0", "wc_reply_authors": "22;1517;0;22;32;0", "reply_reviewers": "1;2;0;1;1;0", "reply_authors": "2;5;1;2;2;1", "rating_avg": [ 6.166666666666667, 0.8975274678557507 ], "confidence_avg": [ 3.6666666666666665, 0.7453559924999298 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.6871842709362768 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 110.66666666666667, 39.626029604569545 ], "wc_strengths_avg": [ 60.166666666666664, 12.733376963276038 ], "wc_weaknesses_avg": [ 145.66666666666666, 166.20435881434906 ], "wc_questions_avg": [ 59.5, 45.41200281863816 ], "wc_limitations_avg": [ 9.666666666666666, 8.900686615212459 ], "wc_review_avg": [ 385.6666666666667, 177.56188279645556 ], "wc_reply_reviewers_avg": [ 115.0, 184.44782460088814 ], "wc_reply_authors_avg": [ 265.5, 559.8123941702851 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.6871842709362768 ], "reply_authors_avg": [ 2.1666666666666665, 1.343709624716425 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5813183589761799, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1152443035989513893&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "mail.ustc.edu.cn;huawei.com;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.ustc.edu.cn;https://www.huawei.com", "aff_unique_abbr": "USTC;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Strategic Data Sharing between Competitors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72529", "id": "AkK3S2spZs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/355091f86e3e2296fbeefa10676ddb17-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AkK3S2spZs", "openreview": "https://openreview.net/forum?id=AkK3S2spZs", "poster": "/media/PosterPDFs/NeurIPS%202023/72529.png?t=1701692346.9435298", "slides": "https://nips.cc/virtual/2023/poster/72529", "video": "https://nips.cc/virtual/2023/poster/72529", "author_site": "Nikita Tsoy, Nikola Konstantinov", "tldr": "", "abstract": "Collaborative learning techniques have significantly advanced in recent years, enabling private model training across multiple organizations. Despite this opportunity, firms face a dilemma when considering data sharing with competitors\u2014while collaboration can improve a company\u2019s machine learning model, it may also benefit competitors and hence reduce profits. In this work, we introduce a general framework for analyzing this data-sharing trade-off. The framework consists of three components, representing the firms\u2019 production decisions, the effect of additional data on model quality, and the data-sharing negotiation process, respectively. We then study an instantiation of the framework, based on a conventional market model from economic theory, to identify key factors that affect collaboration incentives. Our findings indicate a profound impact of market conditions on the data-sharing incentives. In particular, we find that reduced competition, in terms of the similarities between the firms\u2019 products, and harder learning tasks foster collaboration.", "keywords": "Incentives;collaborative learning;federated learning;game theory;competition;oligopolistic markets;strategic behavior;Nash equilibrium", "primary_area": "", "supplementary_material": "/attachment/d08f0241d394d6662eba5e25873ae1427193554b.zip", "author": "Nikita Tsoy;Nikola Konstantinov", "authorids": "~Nikita_Tsoy1;~Nikola_Konstantinov1", "gender": ";M", "homepage": "https://insait.ai/nikita-tsoy/;https://nikolakon.github.io/", "dblp": "348/6414;217/1964", "google_scholar": "R6oTOHUAAAAJ;https://scholar.google.at/citations?user=0_lvOo8AAAAJ", "orcid": "0000-0001-8612-057X;", "linkedin": "nikita-tsoy-560010238/;", "or_profile": "~Nikita_Tsoy1;~Nikola_Konstantinov1", "aff": "ETHZ - ETH Zurich;ETHZ - ETH Zurich", "aff_domain": "inf.ethz.ch;ethz.ch", "position": "Intern;Postdoc", "bibtex": "@inproceedings{\ntsoy2023strategic,\ntitle={Strategic Data Sharing between Competitors},\nauthor={Nikita Tsoy and Nikola Konstantinov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AkK3S2spZs}\n}", "github": "", "project": "", "reviewers": "DZoM;4Akt;Jw59;JkGv", "pdf_size": 431420, "rating": "6;6;7;8", "confidence": "5;3;3;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "4;3;3;3", "wc_summary": "117;109;273;106", "wc_strengths": "25;92;24;186", "wc_weaknesses": "262;72;90;247", "wc_questions": "107;147;148;274", "wc_limitations": "21;41;37;108", "wc_review": "532;461;572;921", "wc_reply_reviewers": "258;97;65;88", "wc_reply_authors": "329;360;38;63", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 151.25, 70.40729720703672 ], "wc_strengths_avg": [ 81.75, 66.19809287283131 ], "wc_weaknesses_avg": [ 167.75, 87.14463552049547 ], "wc_questions_avg": [ 169.0, 62.83709095749102 ], "wc_limitations_avg": [ 51.75, 33.32697856091968 ], "wc_review_avg": [ 621.5, 177.42674544724085 ], "wc_reply_reviewers_avg": [ 127.0, 76.52777273643864 ], "wc_reply_authors_avg": [ 197.5, 147.67278015937805 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10410729117002950328&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 6, "email": "inf.ethz.ch;ethz.ch", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Towards Anytime Classification in Early-Exit Architectures by Enforcing Conditional Monotonicity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72528", "id": "Akslsk891N", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/af2d9fb5bcee19ef2dfa70d843520c97-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Akslsk891N", "openreview": "https://openreview.net/forum?id=Akslsk891N", "poster": "/media/PosterPDFs/NeurIPS%202023/72528.png?t=1701705562.4186025", "slides": "https://nips.cc/virtual/2023/poster/72528", "video": "https://nips.cc/virtual/2023/poster/72528", "author_site": "Metod Jazbec, James Allingham, Dan Zhang, Eric Nalisnick", "tldr": "", "abstract": "Modern predictive models are often deployed to environments in which computational budgets are dynamic. Anytime algorithms are well-suited to such environments as, at any point during computation, they can output a prediction whose quality is a function of computation time. Early-exit neural networks have garnered attention in the context of anytime computation due to their capability to provide intermediate predictions at various stages throughout the network. However, we demonstrate that current early-exit networks are not directly applicable to anytime settings, as the quality of predictions for individual data points is not guaranteed to improve with longer computation. To address this shortcoming, we propose an elegant post-hoc modification, based on the Product-of-Experts, that encourages an early-exit network to become gradually confident. This gives our deep models the property of *conditional monotonicity* in the prediction quality---an essential building block towards truly anytime predictive modeling using early-exit architectures. Our empirical results on standard image-classification tasks demonstrate that such behaviors can be achieved while preserving competitive accuracy on average.", "keywords": "anytime algorithms;early-exit neural networks;conditional monotonicity;anytime uncertainty", "primary_area": "", "supplementary_material": "/attachment/8bc08127198d8519839b9cd20788414df76cf082.pdf", "author": "Metod Jazbec;James Urquhart Allingham;Dan Zhang;Eric Nalisnick", "authorids": "~Metod_Jazbec1;~James_Urquhart_Allingham1;~Dan_Zhang1;~Eric_Nalisnick1", "gender": "M;M;;M", "homepage": ";https://jamesallingham.com;;https://enalisnick.github.io", "dblp": ";;21/802-17;136/4057", "google_scholar": "https://scholar.google.ch/citations?user=lDEYafIAAAAJ;CIp9adkAAAAJ;https://scholar.google.de/citations?user=yazO-mMAAAAJ;cb1ZN7AAAAAJ", "orcid": ";;0000-0003-0930-9162;", "linkedin": "metod-jazbec-300412102/;;;", "or_profile": "~Metod_Jazbec1;~James_Urquhart_Allingham1;~Dan_Zhang1;~Eric_Nalisnick1", "aff": "University of Amsterdam;University of Cambridge;Robert Bosch GmbH, Bosch;University of Amsterdam", "aff_domain": "uva.nl;cam.ac.uk;de.bosch.com;uva.nl", "position": "PhD student;PhD student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\njazbec2023towards,\ntitle={Towards Anytime Classification in Early-Exit Architectures by Enforcing Conditional Monotonicity},\nauthor={Metod Jazbec and James Urquhart Allingham and Dan Zhang and Eric Nalisnick},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Akslsk891N}\n}", "github": "", "project": "", "reviewers": "kJKM;6rYn;ne1V;NWJy;G2Sm", "pdf_size": 1505631, "rating": "4;4;5;6;6", "confidence": "4;3;4;5;3", "soundness": "2;2;3;3;2", "novelty": "2;2;2;3;2", "presentation": "3;2;3;3;4", "wc_summary": "63;38;90;70;114", "wc_strengths": "44;18;66;25;116", "wc_weaknesses": "241;172;212;204;141", "wc_questions": "20;252;184;2;15", "wc_limitations": "16;2;25;19;11", "wc_review": "384;482;577;320;397", "wc_reply_reviewers": "134;1241;117;173;85", "wc_reply_authors": "874;3139;286;797;25", "reply_reviewers": "2;2;2;3;1", "reply_authors": "4;6;3;4;2", "rating_avg": [ 5.0, 0.8944271909999159 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 75.0, 25.628109567426154 ], "wc_strengths_avg": [ 53.8, 35.28399070400059 ], "wc_weaknesses_avg": [ 194.0, 34.42673379802388 ], "wc_questions_avg": [ 94.6, 103.19224777084759 ], "wc_limitations_avg": [ 14.6, 7.761443164772902 ], "wc_review_avg": [ 432.0, 88.99213448389695 ], "wc_reply_reviewers_avg": [ 350.0, 446.403405004935 ], "wc_reply_authors_avg": [ 1024.2, 1103.6212031308569 ], "reply_reviewers_avg": [ 2.0, 0.6324555320336759 ], "reply_authors_avg": [ 3.8, 1.32664991614216 ], "replies_avg": [ 37, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2988071523335984, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4063702255124225470&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uva.nl;cam.ac.uk;de.bosch.com;uva.nl", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Amsterdam;University of Cambridge;Robert Bosch GmbH", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uva.nl;https://www.cam.ac.uk;https://www.bosch.com", "aff_unique_abbr": "UvA;Cambridge;Bosch", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Netherlands;United Kingdom;Germany" }, { "title": "Phase diagram of early training dynamics in deep neural networks: effect of the learning rate, depth, and width", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72527", "id": "Al9yglQGKj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a23598416361c7a9860164155e6ddd0b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Al9yglQGKj", "openreview": "https://openreview.net/forum?id=Al9yglQGKj", "poster": "/media/PosterPDFs/NeurIPS%202023/72527.png?t=1699885672.5477245", "slides": "https://nips.cc/virtual/2023/poster/72527", "video": "https://nips.cc/virtual/2023/poster/72527", "author_site": "Dayal Singh Kalra, Maissam Barkeshli", "tldr": "", "abstract": "We systematically analyze optimization dynamics in deep neural networks (DNNs) trained with stochastic gradient descent (SGD) and study the effect of learning rate $\\eta$, depth $d$, and width $w$ of the neural network. By analyzing the maximum eigenvalue $\\lambda^H_t$ of the Hessian of the loss, which is a measure of sharpness of the loss landscape, we find that the dynamics can show four distinct regimes: (i) an early time transient regime, (ii) an intermediate saturation regime, (iii) a progressive sharpening regime, and (iv) a late time \"edge of stability\" regime. The early and intermediate regimes (i) and (ii) exhibit a rich phase diagram depending on $\\eta \\equiv c / \\lambda_0^H $, $d$, and $w$. We identify several critical values of $c$, which separate qualitatively distinct phenomena in the early time dynamics of training loss and sharpness. Notably, we discover the opening up of a \"sharpness reduction\" phase, where sharpness decreases at early times, as $d$ and $ 1/w$ are increased.", "keywords": "Optimization dynamics;Phase diagrams;learning rate transition;Catapult effect", "primary_area": "", "supplementary_material": "/attachment/eea6fa8fc532145fa1a922b5f774ef5cdc6041e4.zip", "author": "Dayal Singh Kalra;Maissam Barkeshli", "authorids": "~Dayal_Singh_Kalra1;~Maissam_Barkeshli1", "gender": "M;", "homepage": ";", "dblp": "341/1483;", "google_scholar": "pqtFNxoAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Dayal_Singh_Kalra1;~Maissam_Barkeshli1", "aff": "University of Maryland, College Park;", "aff_domain": "umd.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nkalra2023phase,\ntitle={Phase diagram of early training dynamics in deep neural networks: effect of the learning rate, depth, and width},\nauthor={Dayal Singh Kalra and Maissam Barkeshli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Al9yglQGKj}\n}", "github": "", "project": "", "reviewers": "qjGV;bXcV;bhX5;BcD3", "pdf_size": 2768680, "rating": "5;6;7;7", "confidence": "3;3;4;4", "soundness": "2;3;4;4", "novelty": "2;3;3;3", "presentation": "2;4;4;2", "wc_summary": "90;91;233;283", "wc_strengths": "57;78;57;56", "wc_weaknesses": "146;56;71;19", "wc_questions": "70;241;210;19", "wc_limitations": "22;110;9;2", "wc_review": "385;576;580;379", "wc_reply_reviewers": "27;58;9;0", "wc_reply_authors": "0;0;0;238", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 174.25, 85.59607175565944 ], "wc_strengths_avg": [ 62.0, 9.246621004453464 ], "wc_weaknesses_avg": [ 73.0, 46.20064934608604 ], "wc_questions_avg": [ 135.0, 92.92739101040124 ], "wc_limitations_avg": [ 35.75, 43.464784596268274 ], "wc_review_avg": [ 480.0, 98.03315765596862 ], "wc_reply_reviewers_avg": [ 23.5, 22.1641602593015 ], "wc_reply_authors_avg": [ 59.5, 103.05702305034819 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7816438789120388572&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "umd.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "You Only Condense Once: Two Rules for Pruning Condensed Datasets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72526", "id": "AlTyimRsLf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7bdd36a198a8408f444834039b09f518-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AlTyimRsLf", "openreview": "https://openreview.net/forum?id=AlTyimRsLf", "poster": "/media/PosterPDFs/NeurIPS%202023/72526.png?t=1699717449.5947897", "slides": "https://nips.cc/virtual/2023/poster/72526", "video": "https://nips.cc/virtual/2023/poster/72526", "author_site": "Yang He, Lingao Xiao, Joey Tianyi Zhou", "tldr": "", "abstract": "Dataset condensation is a crucial tool for enhancing training efficiency by reducing the size of the training dataset, particularly in on-device scenarios. However, these scenarios have two significant challenges: 1) the varying computational resources available on the devices require a dataset size different from the pre-defined condensed dataset, and 2) the limited computational resources often preclude the possibility of conducting additional condensation processes. We introduce You Only Condense Once (YOCO) to overcome these limitations. On top of one condensed dataset, YOCO produces smaller condensed datasets with two embarrassingly simple dataset pruning rules: Low LBPE Score and Balanced Construction. YOCO offers two key advantages: 1) it can flexibly resize the dataset to fit varying computational constraints, and 2) it eliminates the need for extra condensation processes, which can be computationally prohibitive. Experiments validate our findings on networks including ConvNet, ResNet and DenseNet, and datasets including CIFAR-10, CIFAR-100 and ImageNet. For example, our YOCO surpassed various dataset condensation and dataset pruning methods on CIFAR-10 with ten Images Per Class (IPC), achieving 6.98-8.89% and 6.31-23.92% accuracy gains, respectively. The code is available at: [https://github.com/he-y/you-only-condense-once](https://github.com/he-y/you-only-condense-once).", "keywords": "Dataset Condensation;Dataset Pruning", "primary_area": "", "supplementary_material": "/attachment/e3d7dbd24400e0334a6030c2d0364d9da06794fd.pdf", "author": "Yang He;Lingao Xiao;Joey Tianyi Zhou", "authorids": "~Yang_He2;~Lingao_Xiao1;~Joey_Tianyi_Zhou1", "gender": "M;M;M", "homepage": ";https://joeyzhouty.github.io/;https://he-y.github.io/", "dblp": "341/5434;123/5110;06/1998-2", "google_scholar": "MlNI5YYAAAAJ;https://scholar.google.com.sg/citations?user=cYNqDokAAAAJ;vvnFsIIAAAAJ", "orcid": "0009-0007-1697-1986;0000-0002-4675-7055;0000-0002-2257-6073", "linkedin": ";;", "or_profile": "~Lingao_Xiao1;~Joey_Tianyi_Zhou1;~yang_he1", "aff": "School of Computer Science and Engineering, Nanyang Technological University;A*STAR Centre for Frontier AI Research;Institute of High Performance Computing, Singapore, A*STAR", "aff_domain": "scse.ntu.edu.sg;cfar.a-star.edu.sg;ihpc.a-star.edu.sg", "position": "Undergrad student;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nhe2023you,\ntitle={You Only Condense Once: Two Rules for Pruning Condensed Datasets},\nauthor={Yang He and Lingao Xiao and Joey Tianyi Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AlTyimRsLf}\n}", "github": "", "project": "", "reviewers": "9GkH;h1cK;hpsG;hLEg", "pdf_size": 2443305, "rating": "6;6;7;8", "confidence": "4;3;5;4", "soundness": "2;3;4;3", "novelty": "3;3;3;4", "presentation": "2;3;4;3", "wc_summary": "50;125;51;60", "wc_strengths": "36;65;110;122", "wc_weaknesses": "165;217;68;61", "wc_questions": "4;82;41;4", "wc_limitations": "1;28;1;8", "wc_review": "256;517;271;255", "wc_reply_reviewers": "637;58;15;10", "wc_reply_authors": "1432;0;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "5;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 71.5, 31.132780152116194 ], "wc_strengths_avg": [ 83.25, 34.57871455100666 ], "wc_weaknesses_avg": [ 127.75, 65.91424352899759 ], "wc_questions_avg": [ 32.75, 32.19763190049852 ], "wc_limitations_avg": [ 9.5, 11.05667219374799 ], "wc_review_avg": [ 324.75, 111.1763801353507 ], "wc_reply_reviewers_avg": [ 180.0, 264.5080339044544 ], "wc_reply_authors_avg": [ 358.0, 620.074189109658 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2920160166348168979&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "scse.ntu.edu.sg;cfar.a-star.edu.sg;ihpc.a-star.edu.sg", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Nanyang Technological University;A*STAR;Institute of High Performance Computing", "aff_unique_dep": "School of Computer Science and Engineering;Centre for Frontier AI Research;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.a-star.edu.sg;https://www.ihpc.a-star.edu.sg", "aff_unique_abbr": "NTU;A*STAR;IHPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Causal Context Connects Counterfactual Fairness to Robust Prediction and Group Fairness", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72525", "id": "AmwgBjXqc3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6b7e1e96243c9edc378f85e7d232e415-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AmwgBjXqc3", "openreview": "https://openreview.net/forum?id=AmwgBjXqc3", "poster": "/media/PosterPDFs/NeurIPS%202023/72525.png?t=1701376930.7863119", "slides": "https://nips.cc/virtual/2023/poster/72525", "video": "https://nips.cc/virtual/2023/poster/72525", "author_site": "Jacy Anthis, Victor Veitch", "tldr": "", "abstract": "Counterfactual fairness requires that a person would have been classified in the same way by an AI or other algorithmic system if they had a different protected class, such as a different race or gender. This is an intuitive standard, as reflected in the U.S. legal system, but its use is limited because counterfactuals cannot be directly observed in real-world data. On the other hand, group fairness metrics (e.g., demographic parity or equalized odds) are less intuitive but more readily observed. In this paper, we use \\textit{causal context} to bridge the gaps between counterfactual fairness, robust prediction, and group fairness. First, we motivate counterfactual fairness by showing that there is not necessarily a fundamental trade-off between fairness and accuracy because, under plausible conditions, the counterfactually fair predictor is in fact accuracy-optimal in an unbiased target distribution. Second, we develop a correspondence between the causal graph of the data-generating process and which, if any, group fairness metrics are equivalent to counterfactual fairness. Third, we show that in three common fairness contexts\u2014measurement error, selection on label, and selection on predictors\u2014counterfactual fairness is equivalent to demographic parity, equalized odds, and calibration, respectively. Counterfactual fairness can sometimes be tested by measuring relatively simple group fairness metrics.", "keywords": "causal graphs;causality;counterfactual fairness;domain generalization;fairness;robustness;machine learning;artificial intelligence", "primary_area": "", "supplementary_material": "/attachment/ababbb2de603fe0454a90f6a2e1e1fd08e859571.zip", "author": "Jacy Reese Anthis;Victor Veitch", "authorids": "~Jacy_Reese_Anthis1;~Victor_Veitch1", "gender": ";", "homepage": "https://jacyanthis.com;http://victorveitch.com", "dblp": "284/9100;167/5650", "google_scholar": "lRhxKSAAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-4684-348X;", "linkedin": "jacyanthis/;", "or_profile": "~Jacy_Reese_Anthis1;~Victor_Veitch1", "aff": "Sentience Institute;Google", "aff_domain": "sentienceinstitute.org;google.com", "position": "Principal Researcher;Research Scientist", "bibtex": "@inproceedings{\nanthis2023causal,\ntitle={Causal Context Connects Counterfactual Fairness to Robust Prediction and Group Fairness},\nauthor={Jacy Reese Anthis and Victor Veitch},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AmwgBjXqc3}\n}", "github": "", "project": "", "reviewers": "c2Rt;7eza;U9Dm;WTYn;buKj", "pdf_size": 281673, "rating": "6;6;6;6;7", "confidence": "3;2;2;4;4", "soundness": "2;4;3;3;3", "novelty": "3;4;3;2;3", "presentation": "3;4;3;3;3", "wc_summary": "64;115;36;54;35", "wc_strengths": "69;52;62;27;56", "wc_weaknesses": "309;42;332;46;198", "wc_questions": "182;62;29;228;55", "wc_limitations": "53;19;45;141;74", "wc_review": "677;290;504;496;418", "wc_reply_reviewers": "42;50;221;23;44", "wc_reply_authors": "62;139;92;61;72", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;3;3;2;2", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 60.8, 29.239699040858817 ], "wc_strengths_avg": [ 53.2, 14.3024473430249 ], "wc_weaknesses_avg": [ 185.4, 124.03483381695644 ], "wc_questions_avg": [ 111.2, 78.72839386142715 ], "wc_limitations_avg": [ 66.4, 41.24851512478964 ], "wc_review_avg": [ 477.0, 126.1269202034205 ], "wc_reply_reviewers_avg": [ 76.0, 73.06161783043132 ], "wc_reply_authors_avg": [ 85.2, 29.11631844859511 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5590169943749475, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15538063663750726313&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sentienceinstitute.org;google.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Sentience Institute;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://sentienceinstitute.org;https://www.google.com", "aff_unique_abbr": ";Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Resetting the Optimizer in Deep RL: An Empirical Study", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72524", "id": "AnFUgNC3Yc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e4bf5c3245fd92a4554a16af9803b757-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AnFUgNC3Yc", "openreview": "https://openreview.net/forum?id=AnFUgNC3Yc", "poster": "/media/PosterPDFs/NeurIPS%202023/72524.png?t=1702313615.7965045", "slides": "https://nips.cc/virtual/2023/poster/72524", "video": "https://nips.cc/virtual/2023/poster/72524", "author_site": "Kavosh Asadi, Rasool Fakoor, Shoham Sabach", "tldr": "", "abstract": "We focus on the task of approximating the optimal value function in deep reinforcement learning. This iterative process is comprised of solving a sequence of optimization problems where the loss function changes per iteration. The common approach to solving this sequence of problems is to employ modern variants of the stochastic gradient descent algorithm such as Adam. These optimizers maintain their own internal parameters such as estimates of the first-order and the second-order moments of the gradient, and update them over time. Therefore, information obtained in previous iterations is used to solve the optimization problem in the current iteration. We demonstrate that this can contaminate the moment estimates because the optimization landscape can change arbitrarily from one iteration to the next one. To hedge against this negative effect, a simple idea is to reset the internal parameters of the optimizer when starting a new iteration. We empirically investigate this resetting idea by employing various optimizers in conjunction with the Rainbow algorithm. We demonstrate that this simple modification significantly improves the performance of deep RL on the Atari benchmark.", "keywords": "Deep Reinforcement Learning;Rainbow Algorithm;Atari benchmark;Adam Optimizer", "primary_area": "", "supplementary_material": "/attachment/cbc993a1d7d90227fffbe60b9d1129121ab0c143.zip", "author": "Kavosh Asadi;Rasool Fakoor;Shoham Sabach", "authorids": "~Kavosh_Asadi1;~Rasool_Fakoor1;~Shoham_Sabach1", "gender": ";M;M", "homepage": "http://cs.brown.edu/~kasadiat/;http://rasoolfa.github.io;https://ssabach.net.technion.ac.il/", "dblp": "192/1404;123/2447;", "google_scholar": "-2qyBJEAAAAJ;nVsOPtQAAAAJ;https://scholar.google.ca/citations?user=42D12TkAAAAJ", "orcid": ";;", "linkedin": ";rasool-fakoor-695b5845/;", "or_profile": "~Kavosh_Asadi1;~Rasool_Fakoor1;~Shoham_Sabach1", "aff": "Amazon;Amazon Web Services;Technion - Israel Institute of Technology, Technion", "aff_domain": "amazon.com;amazon.com;technion.ac.il", "position": "Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nasadi2023resetting,\ntitle={Resetting the Optimizer in Deep {RL}: An Empirical Study},\nauthor={Kavosh Asadi and Rasool Fakoor and Shoham Sabach},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AnFUgNC3Yc}\n}", "github": "", "project": "", "reviewers": "59Jp;aUSe;5d1P;Ks5C", "pdf_size": 8823825, "rating": "5;6;6;6", "confidence": "4;4;4;4", "soundness": "2;3;1;3", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "74;25;78;92", "wc_strengths": "147;34;100;47", "wc_weaknesses": "387;121;541;60", "wc_questions": "121;58;8;53", "wc_limitations": "31;2;105;145", "wc_review": "760;240;832;397", "wc_reply_reviewers": "413;26;508;271", "wc_reply_authors": "269;0;1084;1114", "reply_reviewers": "2;1;2;3", "reply_authors": "3;1;5;5", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.25, 25.292044203662147 ], "wc_strengths_avg": [ 82.0, 44.93884733724264 ], "wc_weaknesses_avg": [ 277.25, 195.7171109024451 ], "wc_questions_avg": [ 60.0, 40.24301181571777 ], "wc_limitations_avg": [ 70.75, 56.992872361375156 ], "wc_review_avg": [ 557.25, 246.43597038581848 ], "wc_reply_reviewers_avg": [ 304.5, 181.5688574618456 ], "wc_reply_authors_avg": [ 616.75, 491.653015347206 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.5, 1.6583123951777 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1286223032806125418&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "amazon.com;amazon.com;technion.ac.il", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Amazon;Technion - Israel Institute of Technology", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.technion.ac.il", "aff_unique_abbr": "Amazon;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Israel" }, { "title": "Occ3D: A Large-Scale 3D Occupancy Prediction Benchmark for Autonomous Driving", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73670", "id": "ApqgcSnhjh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cabfaeecaae7d6540ee797a66f0130b0-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=ApqgcSnhjh", "openreview": "https://openreview.net/forum?id=ApqgcSnhjh", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73670", "video": "https://nips.cc/virtual/2023/poster/73670", "author_site": "Xiaoyu Tian, Tao Jiang, Longfei Yun, Yucheng Mao, Huitong Yang, Yue Wang, Yilun Wang, Hang Zhao", "tldr": "", "abstract": "Robotic perception requires the modeling of both 3D geometry and semantics. Existing methods typically focus on estimating 3D bounding boxes, neglecting finer geometric details and struggling to handle general, out-of-vocabulary objects. 3D occupancy prediction, which estimates the detailed occupancy states and semantics of a scene, is an emerging task to overcome these limitations.\nTo support 3D occupancy prediction, we develop a label generation pipeline that produces dense, visibility-aware labels for any given scene. This pipeline comprises three stages: voxel densification, occlusion reasoning, and image-guided voxel refinement. We establish two benchmarks, derived from the Waymo Open Dataset and the nuScenes Dataset, namely Occ3D-Waymo and Occ3D-nuScenes benchmarks. \nFurthermore, we provide an extensive analysis of the proposed dataset with various baseline models. \nLastly, we propose a new model, dubbed Coarse-to-Fine Occupancy (CTF-Occ) network, which demonstrates superior performance on the Occ3D benchmarks.The code, data, and benchmarks are released at \\url{https://tsinghua-mars-lab.github.io/Occ3D/}.", "keywords": "3D occupancy prediction; 3D auto-labeling", "primary_area": "", "supplementary_material": "/attachment/b8e773d8a4b5bebe28a10a68195fb08b7d366b8e.pdf", "author": "Xiaoyu Tian;Tao Jiang;Longfei Yun;Yucheng Mao;Huitong Yang;Yue Wang;Yilun Wang;Hang Zhao", "authorids": "~Xiaoyu_Tian3;~Tao_Jiang11;~Longfei_Yun1;~Yucheng_Mao1;~Huitong_Yang1;~Yue_Wang2;~Yilun_Wang1;~Hang_Zhao1", "gender": "M;;M;;M;M;;M", "homepage": "https://github.com/134994;;;https://github.com/myc634;https://yangh8.github.io/github.io/;https://yuewang.xyz;;http://www.mit.edu/~hangzhao/", "dblp": ";;;;;33/4822-41;;", "google_scholar": ";;;W3gCeQkAAAAJ;;v-AEFIEAAAAJ;https://scholar.google.com.hk/citations?hl=en;DmahiOYAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;https://www.linkedin.cn/incareer/in/ACoAADli_1IBmu1IYtjE8njIX0h1Pj7rCVEHgo4;;;;yilunw/;", "or_profile": "~Xiaoyu_Tian3;~Tao_Jiang11;~Longfei_Yun1;~Yucheng_Mao1;~Huitong_Yang1;~Yue_Wang2;~Yilun_Wang1;~Hang_Zhao1", "aff": "School of Software, Tsinghua University;;University of California, San Diego;University of Science and Technology Beijing;;NVIDIA;Li Auto;Tsinghua University", "aff_domain": "tsinghua.edu.cn;;ucsd.edu;ustb.edu.cn;;nvidia.com;lixiang.com;tsinghua.edu.cn", "position": "MS student;;MS student;Undergrad student;;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\ntian2023occd,\ntitle={Occ3D: A Large-Scale 3D Occupancy Prediction Benchmark for Autonomous Driving},\nauthor={Xiaoyu Tian and Tao Jiang and Longfei Yun and Yucheng Mao and Huitong Yang and Yue Wang and Yilun Wang and Hang Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=ApqgcSnhjh}\n}", "github": "", "project": "", "reviewers": "S7Q7;dHUH;EF4V;PEmo;Dt1s", "pdf_size": 10551064, "rating": "5;6;8;9;9", "confidence": "5;4;3;4;5", "wc_summary_and_contributions": "90;55;157;87;153", "wc_strengths": "91;55;258;126;79", "wc_improvement": "98;82;145;120;40", "wc_limitations": "164;32;20;27;25", "wc_correctness": "31;41;4;17;24", "wc_clarity": "13;8;8;21;8", "wc_relation_to_prior_work": "14;97;2;7;25", "wc_documentation": "42;29;2;20;21", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "544;400;597;426;376", "wc_reply_reviewers": "214;0;0;0;14", "wc_reply_authors": "1100;397;224;218;329", "reply_reviewers": "1;0;0;0;1", "reply_authors": "3;1;1;1;1", "rating_avg": [ 7.4, 1.624807680927192 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 108.4, 39.9979999499975 ], "wc_strengths_avg": [ 121.8, 71.84817325443981 ], "wc_improvement_avg": [ 97.0, 35.519008995184535 ], "wc_limitations_avg": [ 53.6, 55.33389557947281 ], "wc_correctness_avg": [ 23.4, 12.531560158256434 ], "wc_clarity_avg": [ 11.6, 5.083306010855534 ], "wc_relation_to_prior_work_avg": [ 29.0, 34.86545568324039 ], "wc_documentation_avg": [ 22.8, 13.044539087296263 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 468.6, 86.33330759330376 ], "wc_reply_reviewers_avg": [ 45.6, 84.3744037015966 ], "wc_reply_authors_avg": [ 453.6, 330.08641292849364 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.2302830932359192, "gs_citation": 228, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17079804201616393810&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;;ucsd.edu;ustb.edu.cn;;nvidia.com;lixiang.com;tsinghua.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;3;4;0", "aff_unique_norm": "Tsinghua University;University of California, San Diego;University of Science and Technology Beijing;NVIDIA;Li Auto", "aff_unique_dep": "School of Software;;;NVIDIA Corporation;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.ucsd.edu;http://www.ustb.edu.cn;https://www.nvidia.com;https://www.liauto.com", "aff_unique_abbr": "THU;UCSD;USTB;NVIDIA;Li Auto", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;1;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Neural Lyapunov Control for Discrete-Time Systems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72523", "id": "ArRycLMoUg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/08bf1773e94763b6cc366ee7c6582f27-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ArRycLMoUg", "openreview": "https://openreview.net/forum?id=ArRycLMoUg", "poster": "/media/PosterPDFs/NeurIPS%202023/72523.png?t=1700268907.6039515", "slides": "https://nips.cc/virtual/2023/poster/72523", "video": "https://nips.cc/virtual/2023/poster/72523", "author_site": "Junlin Wu, Andrew Clark, Yiannis Kantaros, Yevgeniy Vorobeychik", "tldr": "", "abstract": "While ensuring stability for linear systems is well understood, it remains a major challenge for nonlinear systems. A general approach in such cases is to compute a combination of a Lyapunov function and an associated control policy. However, finding Lyapunov functions for general nonlinear systems is a challenging task. To address this challenge, several methods have been proposed that represent Lyapunov functions using neural networks. However, such approaches either focus on continuous-time systems, or highly restricted classes of nonlinear dynamics. We propose the first approach for learning neural Lyapunov control in a broad class of discrete-time systems. Three key ingredients enable us to effectively learn provably stable control policies. The first is a novel mixed-integer linear programming approach for verifying the discrete-time Lyapunov stability conditions, leveraging the particular structure of these conditions. The second is a novel approach for computing verified sublevel sets. The third is a heuristic gradient-based method for quickly finding counterexamples to significantly speed up Lyapunov function learning. Our experiments on four standard benchmarks demonstrate that our approach significantly outperforms state-of-the-art baselines. For example, on the path tracking benchmark, we outperform recent neural Lyapunov control baselines by an order of magnitude in both running time and the size of the region of attraction, and on two of the four benchmarks (cartpole and PVTOL), ours is the first automated approach to return a provably stable controller. Our code is available at: https://github.com/jlwu002/nlc_discrete.", "keywords": "nonlinear systems;Lyapunov stability;neural Lyapunov control", "primary_area": "", "supplementary_material": "/attachment/3bd1a8699dd15b0bd3cb4271029ac4f9cd93d812.zip", "author": "Junlin Wu;Andrew Clark;Yiannis Kantaros;Yevgeniy Vorobeychik", "authorids": "~Junlin_Wu2;~Andrew_Clark1;~Yiannis_Kantaros1;~Yevgeniy_Vorobeychik1", "gender": ";M;;M", "homepage": "https://jlwu002.github.io/;https://awclark587.wixsite.com/mysite;https://sites.google.com/view/kantaros;http://vorobeychik.com", "dblp": "188/8292-1;;121/0062;70/2217", "google_scholar": ";;HuCTrEEAAAAJ;https://scholar.google.com.tw/citations?user=ptI-HHkAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Junlin_Wu2;~Andrew_Clark1;~Yiannis_Kantaros1;~Yevgeniy_Vorobeychik1", "aff": "Washington University, St. Louis;Washington University, Saint Louis;Washington University, Saint Louis;Washington University, St. Louis", "aff_domain": "wustl.edu;wustl.edu;wustl.edu;wustl.edu", "position": "PhD student;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwu2023neural,\ntitle={Neural Lyapunov Control for Discrete-Time Systems},\nauthor={Junlin Wu and Andrew Clark and Yiannis Kantaros and Yevgeniy Vorobeychik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ArRycLMoUg}\n}", "github": "", "project": "", "reviewers": "yt77;ruNW;KEba;qMzc;NZiv", "pdf_size": 1068403, "rating": "2;5;6;6;7", "confidence": "5;3;5;2;3", "soundness": "1;3;3;3;3", "novelty": "1;2;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "42;160;62;113;60", "wc_strengths": "31;25;32;34;35", "wc_weaknesses": "61;16;202;204;4", "wc_questions": "64;39;2;8;81", "wc_limitations": "25;15;2;24;25", "wc_review": "223;255;300;383;205", "wc_reply_reviewers": "1221;6;33;11;22", "wc_reply_authors": "1815;57;31;0;0", "reply_reviewers": "6;1;1;1;1", "reply_authors": "6;2;2;1;1", "rating_avg": [ 5.2, 1.7204650534085253 ], "confidence_avg": [ 3.6, 1.2 ], "soundness_avg": [ 2.6, 0.8000000000000002 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 87.4, 43.32020313895123 ], "wc_strengths_avg": [ 31.4, 3.49857113690718 ], "wc_weaknesses_avg": [ 97.4, 88.29405415994896 ], "wc_questions_avg": [ 38.8, 30.72067707587188 ], "wc_limitations_avg": [ 18.2, 8.930845424706442 ], "wc_review_avg": [ 273.2, 63.681708519793965 ], "wc_reply_reviewers_avg": [ 258.6, 481.2901827380235 ], "wc_reply_authors_avg": [ 380.6, 717.516996314373 ], "reply_reviewers_avg": [ 2.0, 2.0 ], "reply_authors_avg": [ 2.4, 1.8547236990991407 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5424889808044899, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=726163059755517848&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "wustl.edu;wustl.edu;wustl.edu;wustl.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Washington University in St. Louis", "aff_unique_dep": "", "aff_unique_url": "https://wustl.edu", "aff_unique_abbr": "WUSTL", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "St. Louis;Saint Louis", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "What a MESS: Multi-Domain Evaluation of Zero-Shot Semantic Segmentation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73669", "id": "As4101fOG1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e82f45e480f5f44d696ba15dad88f9a3-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=As4101fOG1", "openreview": "https://openreview.net/forum?id=As4101fOG1", "poster": "/media/PosterPDFs/NeurIPS%202023/73669.png?t=1699364646.553254", "slides": "https://nips.cc/virtual/2023/poster/73669", "video": "https://nips.cc/virtual/2023/poster/73669", "author_site": "Benedikt Blumenstiel, Johannes Jakubik, Hilde Kuehne, Michael V\u00f6ssing", "tldr": "", "abstract": "While semantic segmentation has seen tremendous improvements in the past, there are still significant labeling efforts necessary and the problem of limited generalization to classes that have not been present during training. To address this problem, zero-shot semantic segmentation makes use of large self-supervised vision-language models, allowing zero-shot transfer to unseen classes. In this work, we build a benchmark for Multi-domain Evaluation of Zero-Shot Semantic Segmentation (MESS), which allows a holistic analysis of performance across a wide range of domain-specific datasets such as medicine, engineering, earth monitoring, biology, and agriculture. To do this, we reviewed 120 datasets, developed a taxonomy, and classified the datasets according to the developed taxonomy. We select a representative subset consisting of 22 datasets and propose it as the MESS benchmark. We evaluate eight recently published models on the proposed MESS benchmark and analyze characteristics for the performance of zero-shot transfer models. The toolkit is available at https://github.com/blumenstiel/MESS.", "keywords": "Zero-shot semantic segmentation;multi-domain benchmark;open-vocabulary semantic-segmentation", "primary_area": "", "supplementary_material": "/attachment/5598bfdd05158ca6dd4a741f841298c665422d6d.pdf", "author": "Benedikt Blumenstiel;Johannes Jakubik;Hilde Kuehne;Michael V\u00f6ssing", "authorids": "~Benedikt_Blumenstiel1;~Johannes_Jakubik1;~Hilde_Kuehne5;~Michael_V\u00f6ssing1", "gender": "M;M;F;M", "homepage": "https://blumenstiel.github.io;https://jhnnsjkbk.github.io;https://hildekuehne.github.io;", "dblp": ";;45/4963;", "google_scholar": "UGynAs4AAAAJ;Bz3X5pQAAAAJ;pxhCcH0AAAAJ;AgqMr_QAAAAJ", "orcid": ";;0000-0003-1079-4441;", "linkedin": ";;hilde-kuehne-8b9aa661;michaelvoessing/", "or_profile": "~Benedikt_Blumenstiel1;~Johannes_Jakubik1;~Hilde_Kuehne5;~Michael_V\u00f6ssing1", "aff": "Karlsruher Institut f\u00fcr Technologie;Karlsruhe Institute of Technology;Goethe University Frankfurt;Karlsruhe Institute of Technology", "aff_domain": "kit.edu;kit.edu;uni-frankfurt.de;kit.edu", "position": "MS student;PhD student;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nblumenstiel2023what,\ntitle={What a {MESS}: Multi-Domain Evaluation of Zero-Shot Semantic Segmentation},\nauthor={Benedikt Blumenstiel and Johannes Jakubik and Hilde Kuehne and Michael V{\\\"o}ssing},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=As4101fOG1}\n}", "github": "", "project": "", "reviewers": "Wrdt;Xrpu;7dcs;jcNS", "pdf_size": 2553896, "rating": "6;6;6;8", "confidence": "3;4;4;4", "wc_summary_and_contributions": "47;75;63;119", "wc_strengths": "51;85;41;141", "wc_improvement": "3;79;115;97", "wc_limitations": "2;26;3;55", "wc_correctness": "1;19;16;35", "wc_clarity": "1;10;11;55", "wc_relation_to_prior_work": "8;11;17;33", "wc_documentation": "1;17;10;24", "wc_additional_feedback": "1;1;1;1", "wc_review": "115;323;277;560", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "354;837;776;705", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 76.0, 26.739483914241877 ], "wc_strengths_avg": [ 79.5, 39.073648409126065 ], "wc_improvement_avg": [ 73.5, 42.64680527307995 ], "wc_limitations_avg": [ 21.5, 21.592822881689184 ], "wc_correctness_avg": [ 17.75, 12.07010770457331 ], "wc_clarity_avg": [ 19.25, 21.00446381129497 ], "wc_relation_to_prior_work_avg": [ 17.25, 9.65336728815391 ], "wc_documentation_avg": [ 13.0, 8.514693182963201 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 318.75, 159.27707776073743 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 668.0, 187.2097753857955 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=348280219714254801&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "kit.edu;kit.edu;uni-frankfurt.de;kit.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Karlsruher Institut f\u00fcr Technologie;Karlsruhe Institute of Technology;Goethe University Frankfurt", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kit.edu;https://www.kit.edu;https://www.uni-frankfurt.de", "aff_unique_abbr": "KIT;KIT;GU Frankfurt", "aff_campus_unique_index": "1", "aff_campus_unique": ";Frankfurt", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "FeCAM: Exploiting the Heterogeneity of Class Distributions in Exemplar-Free Continual Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72522", "id": "Asx5eDqFZl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/15294ba2dcfb4521274f7aa1c26f4dd4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Asx5eDqFZl", "openreview": "https://openreview.net/forum?id=Asx5eDqFZl", "poster": "/media/PosterPDFs/NeurIPS%202023/72522.png?t=1701439543.881568", "slides": "https://nips.cc/virtual/2023/poster/72522", "video": "https://nips.cc/virtual/2023/poster/72522", "author_site": "Dipam Goswami, Yuyang Liu, Bart\u0142omiej Twardowski, Joost van de Weijer", "tldr": "", "abstract": "Exemplar-free class-incremental learning (CIL) poses several challenges since it prohibits the rehearsal of data from previous tasks and thus suffers from catastrophic forgetting. Recent approaches to incrementally learning the classifier by freezing the feature extractor after the first task have gained much attention. In this paper, we explore prototypical networks for CIL, which generate new class prototypes using the frozen feature extractor and classify the features based on the Euclidean distance to the prototypes. In an analysis of the feature distributions of classes, we show that classification based on Euclidean metrics is successful for jointly trained features. However, when learning from non-stationary data, we observe that the Euclidean metric is suboptimal and that feature distributions are heterogeneous. To address this challenge, we revisit the anisotropic Mahalanobis distance for CIL. In addition, we empirically show that modeling the feature covariance relations is better than previous attempts at sampling features from normal distributions and training a linear classifier. Unlike existing methods, our approach generalizes to both many- and few-shot CIL settings, as well as to domain-incremental settings. Interestingly, without updating the backbone network, our method obtains state-of-the-art results on several standard continual learning benchmarks. Code is available at https://github.com/dipamgoswami/FeCAM.", "keywords": "Continual Learning;Class-Incremental Learning", "primary_area": "", "supplementary_material": "/attachment/e916c4c3539437dd3f3c8d81b1f7c9fec7140372.pdf", "author": "Dipam Goswami;Yuyang Liu;Bart\u0142omiej Twardowski;Joost van de Weijer", "authorids": "~Dipam_Goswami1;~Yuyang_Liu1;~Bart\u0142omiej_Twardowski1;~Joost_van_de_Weijer5", "gender": "M;F;M;M", "homepage": "https://dipamgoswami.github.io;http://ai.sia.cn/grc/lyy/;;http://lamp.cvc.uab.es/", "dblp": "307/3058;;156/6628;67/3379", "google_scholar": "6_aj45AAAAAJ;sWSKvYUAAAAJ;https://scholar.google.pl/citations?user=8yywECgAAAAJ;https://scholar.google.es/citations?user=Gsw2iUEAAAAJ", "orcid": ";;0000-0003-2117-8679;0000-0002-9656-9706", "linkedin": "dipam-goswami-0a424416b;;bartlomiejtwardowski/;", "or_profile": "~Dipam_Goswami1;~Yuyang_Liu1;~Bart\u0142omiej_Twardowski1;~Joost_van_de_Weijer1", "aff": "Computer Vision Center, Universitat Aut\u00f3noma de Barcelona;Shenyang Institute of Automation, Chinese Academy of Sciences/ University of Chinese Academy of Sciences;Computer Vision Center, Universitat Aut\u00f2noma de Barcelona;Computer Vision Center, Universitat Aut\u00f3noma de Barcelona", "aff_domain": "cvc.uab.es;sia.cn;cvc.uab.es;cvc.uab.es", "position": "PhD student;PhD student;Postdoc;Researcher", "bibtex": "@inproceedings{\ngoswami2023fecam,\ntitle={Fe{CAM}: Exploiting the Heterogeneity of Class Distributions in Exemplar-Free Continual Learning},\nauthor={Dipam Goswami and Yuyang Liu and Bart{\\l}omiej Twardowski and Joost van de Weijer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Asx5eDqFZl}\n}", "github": "", "project": "", "reviewers": "YVv6;QHUR;nDen;9pER;MMgd", "pdf_size": 1518447, "rating": "4;6;6;7;7", "confidence": "5;5;3;4;4", "soundness": "3;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "2;3;3;4;3", "wc_summary": "61;51;104;62;139", "wc_strengths": "25;52;30;67;100", "wc_weaknesses": "164;226;148;77;99", "wc_questions": "73;128;37;59;35", "wc_limitations": "44;1;5;4;36", "wc_review": "367;458;324;269;409", "wc_reply_reviewers": "0;88;27;16;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 83.4, 33.242141928582164 ], "wc_strengths_avg": [ 54.8, 27.213232075591463 ], "wc_weaknesses_avg": [ 142.8, 52.24327707944822 ], "wc_questions_avg": [ 66.4, 33.89159187763242 ], "wc_limitations_avg": [ 18.0, 18.18790807102345 ], "wc_review_avg": [ 365.4, 65.55181156917023 ], "wc_reply_reviewers_avg": [ 26.2, 32.547810986301364 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.48795003647426666, "gs_citation": 72, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5452757042095305824&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cvc.uab.es;sia.cn;cvc.uab.es;cvc.uab.es", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Universitat Aut\u00f3noma de Barcelona;Shenyang Institute of Automation;Universitat Aut\u00f2noma de Barcelona", "aff_unique_dep": "Computer Vision Center;;Computer Vision Center", "aff_unique_url": "https://www.uab.cat;http://www.sia.cas.cn;https://www.uab.cat", "aff_unique_abbr": "UAB;SIA;UAB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Spain;China" }, { "title": "Calibrate and Boost Logical Expressiveness of GNN Over Multi-Relational and Temporal Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72521", "id": "AtHJ7TLheF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d2706f9149856b6f7016ebf270dd9f25-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AtHJ7TLheF", "openreview": "https://openreview.net/forum?id=AtHJ7TLheF", "poster": "/media/PosterPDFs/NeurIPS%202023/72521.png?t=1702449080.1467376", "slides": "https://nips.cc/virtual/2023/poster/72521", "video": "https://nips.cc/virtual/2023/poster/72521", "author_site": "Dingmin Wang, Yeyuan Chen", "tldr": "", "abstract": "As a powerful framework for graph representation learning, Graph Neural Networks (GNNs) have garnered significant attention in recent years. However, to the best of our knowledge, there has been no formal analysis of the logical expressiveness of GNNs as Boolean node classifiers over multi-relational graphs, where each edge carries a specific relation type. In this paper, we investigate $\\mathcal{FOC}_2$, a fragment of first-order logic with two variables and counting quantifiers. On the negative side, we demonstrate that the R$^2$-GNN architecture, which extends the local message passing GNN by incorporating global readout, fails to capture $\\mathcal{FOC}_2$ classifiers in the general case. Nevertheless, on the positive side, we establish that R$^2$-GNNs models are equivalent to $\\mathcal{FOC}_2$ classifiers under certain restricted yet reasonable scenarios. To address the limitations of R$^2$-GNNs regarding expressiveness, we propose a simple graph transformation technique, akin to a preprocessing step, which can be executed in linear time. This transformation enables R$^2$-GNNs to effectively capture any $\\mathcal{FOC}_2$ classifiers when applied to the \"transformed\" input graph. Moreover, we extend our analysis of expressiveness and graph transformation to temporal graphs, exploring several temporal GNN architectures and providing an expressiveness hierarchy for them. To validate our findings, we implement R$^2$-GNNs and the graph transformation technique and conduct empirical tests in node classification tasks against various well-known GNN architectures that support multi-relational or temporal graphs. Our experimental results consistently demonstrate that R$^2$-GNN with the graph transformation outperforms the baseline methods on both synthetic and real-world datasets", "keywords": "Knowledge Graphs;First-Order Logic;Temporal Knowledge Graph;Graph Neural Networks", "primary_area": "", "supplementary_material": "/attachment/6501fff0d53ec58bef61c2f280e0b1e3ed83d03b.zip", "author": "Dingmin Wang;Yeyuan Chen", "authorids": "~Dingmin_Wang1;~Yeyuan_Chen1", "gender": "M;", "homepage": "http://www.dingmin.wang;http://hdmmblz.github.io", "dblp": "206/1677;347/6106", "google_scholar": "zpidC7IAAAAJ;YOD913gAAAAJ", "orcid": ";", "linkedin": "dimmy0302/;", "or_profile": "~Dingmin_Wang1;~Yeyuan_Chen1", "aff": "University of Oxford;Xi'an Jiaotong University", "aff_domain": "ox.ac.uk;xjtu.edu.cn", "position": "PhD student;Undergrad student", "bibtex": "@inproceedings{\nchen2023calibrate,\ntitle={Calibrate and Boost Logical Expressiveness of {GNN} Over Multi-Relational and Temporal Graphs},\nauthor={Yeyuan Chen and Dingmin Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AtHJ7TLheF}\n}", "github": "", "project": "", "reviewers": "auoR;pnMq;Hbfa;KFPi;Yror;p5D8;a55p", "pdf_size": 570284, "rating": "4;6;6;7;7;7;8", "confidence": "3;4;3;2;4;4;4", "soundness": "3;4;3;3;4;4;4", "novelty": "1;2;3;3;3;3;4", "presentation": "2;3;3;2;3;4;3", "wc_summary": "107;183;52;234;245;250;181", "wc_strengths": "16;69;16;61;63;69;72", "wc_weaknesses": "490;183;175;120;45;178;181", "wc_questions": "2;82;15;161;54;268;121", "wc_limitations": "1;49;4;1;59;28;31", "wc_review": "616;566;262;577;466;793;586", "wc_reply_reviewers": "376;95;19;20;0;280;36", "wc_reply_authors": "723;194;0;36;0;500;0", "reply_reviewers": "1;1;1;1;0;1;1", "reply_authors": "2;2;1;2;1;2;1", "rating_avg": [ 6.428571428571429, 1.178030178747903 ], "confidence_avg": [ 3.4285714285714284, 0.7284313590846836 ], "soundness_avg": [ 3.5714285714285716, 0.49487165930539345 ], "novelty_avg": [ 2.7142857142857144, 0.880630571852711 ], "presentation_avg": [ 2.857142857142857, 0.6388765649999399 ], "wc_summary_avg": [ 178.85714285714286, 69.44150379261377 ], "wc_strengths_avg": [ 52.285714285714285, 23.211538298959887 ], "wc_weaknesses_avg": [ 196.0, 128.82546332150332 ], "wc_questions_avg": [ 100.42857142857143, 85.90169987151782 ], "wc_limitations_avg": [ 24.714285714285715, 21.93962029697666 ], "wc_review_avg": [ 552.2857142857143, 149.11617166847205 ], "wc_reply_reviewers_avg": [ 118.0, 138.04243653932758 ], "wc_reply_authors_avg": [ 207.57142857142858, 269.93642486590755 ], "reply_reviewers_avg": [ 0.8571428571428571, 0.3499271061118826 ], "reply_authors_avg": [ 1.5714285714285714, 0.4948716593053935 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.2853908964926964, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14903616110471565571&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 2, "email": "ox.ac.uk;xjtu.edu.cn", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Oxford;Xi'an Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.xjtu.edu.cn", "aff_unique_abbr": "Oxford;XJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;China" }, { "title": "Extraction and Recovery of Spatio-Temporal Structure in Latent Dynamics Alignment with Diffusion Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72520", "id": "AuXd54odxm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7abbcb05a5d55157ede410bb718e32d7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AuXd54odxm", "openreview": "https://openreview.net/forum?id=AuXd54odxm", "poster": "/media/PosterPDFs/NeurIPS%202023/72520.png?t=1701561495.0777147", "slides": "https://nips.cc/virtual/2023/poster/72520", "video": "https://nips.cc/virtual/2023/poster/72520", "author_site": "Yule Wang, Zijing Wu, Chengrui Li, Anqi Wu", "tldr": "", "abstract": "In the field of behavior-related brain computation, it is necessary to align raw neural signals against the drastic domain shift among them. A foundational framework within neuroscience research posits that trial-based neural population activities rely on low-dimensional latent dynamics, thus focusing on the latter greatly facilitates the alignment procedure. Despite this field's progress, existing methods ignore the intrinsic spatio-temporal structure during the alignment phase. Hence, their solutions usually lead to poor quality in latent dynamics structures and overall performance. To tackle this problem, we propose an alignment method ERDiff, which leverages the expressivity of the diffusion model to preserve the spatio-temporal structure of latent dynamics. Specifically, the latent dynamics structures of the source domain are first extracted by a diffusion model. Then, under the guidance of this diffusion model, such structures are well-recovered through a maximum likelihood alignment procedure in the target domain. We first demonstrate the effectiveness of our proposed method on a synthetic dataset. Then, when applied to neural recordings from the non-human primate motor cortex, under both cross-day and inter-subject settings, our method consistently manifests its capability of preserving the spatio-temporal structure of latent dynamics and outperforms existing approaches in alignment goodness-of-fit and neural decoding performance.", "keywords": "Neural distribution alignment;Diffusion model;Neuroscience;Neural decoding", "primary_area": "", "supplementary_material": "/attachment/323cbd7f09d126192358fb39c8b8219d787ac0c6.pdf", "author": "Yule Wang;Zijing Wu;Chengrui Li;Anqi Wu", "authorids": "~Yule_Wang1;~Zijing_Wu1;~Chengrui_Li1;~Anqi_Wu4", "gender": "M;M;M;F", "homepage": "https://yulewang97.github.io/;;https://jerrysoybean.github.io/;https://sites.google.com/view/brainml/home", "dblp": ";;174/4237;15/9453", "google_scholar": "vqsl1YYAAAAJ;;https://scholar.google.com/citations?h;ptGYJiEAAAAJ", "orcid": ";;0000-0001-5947-2393;0000-0002-7866-9455", "linkedin": "yule-wang-a8002b195/;zijing-miles-wu/;;", "or_profile": "~Yule_Wang1;~Zijing_Wu1;~Chengrui_Li1;~Anqi_Wu3", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;gatech.edu", "position": "PhD student;Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2023extraction,\ntitle={Extraction and Recovery of Spatio-Temporal Structure in Latent Dynamics Alignment with Diffusion Model},\nauthor={Yule Wang and Zijing Wu and Chengrui Li and Anqi Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AuXd54odxm}\n}", "github": "", "project": "", "reviewers": "xrSS;tfQ5;9MfT;1oNu;2uTS;wx95", "pdf_size": 5409841, "rating": "6;6;6;7;7;7", "confidence": "3;2;3;3;4;2", "soundness": "3;3;3;4;3;4", "novelty": "2;2;3;3;3;3", "presentation": "2;4;2;3;3;3", "wc_summary": "115;119;169;139;152;57", "wc_strengths": "100;32;180;153;94;43", "wc_weaknesses": "292;38;160;71;55;38", "wc_questions": "103;46;81;143;93;28", "wc_limitations": "35;41;29;28;19;8", "wc_review": "645;276;619;534;413;174", "wc_reply_reviewers": "56;79;105;36;25;18", "wc_reply_authors": "27;603;105;16;0;0", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "2;2;2;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 2.8333333333333335, 0.6871842709362768 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.8333333333333335, 0.6871842709362768 ], "wc_summary_avg": [ 125.16666666666667, 35.63900796723849 ], "wc_strengths_avg": [ 100.33333333333333, 53.41244133054479 ], "wc_weaknesses_avg": [ 109.0, 91.77145525706781 ], "wc_questions_avg": [ 82.33333333333333, 37.62829904325851 ], "wc_limitations_avg": [ 26.666666666666668, 10.71862346054235 ], "wc_review_avg": [ 443.5, 173.786027439876 ], "wc_reply_reviewers_avg": [ 53.166666666666664, 30.78645084376495 ], "wc_reply_authors_avg": [ 125.16666666666667, 216.64826844962832 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.24253562503633294, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6192584899799151886&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "gatech.edu;gatech.edu;gatech.edu;gatech.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Massive Scale Semantic Similarity Dataset of Historical English", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73668", "id": "AvttCE8n3H", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b5ae304ecd18c5d4ac4a011ab086ba23-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=AvttCE8n3H", "openreview": "https://openreview.net/forum?id=AvttCE8n3H", "poster": "/media/PosterPDFs/NeurIPS%202023/73668.png?t=1701813139.5617385", "slides": "https://nips.cc/virtual/2023/poster/73668", "video": "https://nips.cc/virtual/2023/poster/73668", "author_site": "Emily Silcock, Abhishek Arora, Melissa Dell", "tldr": "", "abstract": "A diversity of tasks use language models trained on semantic similarity data. While there are a variety of datasets that capture semantic similarity, they are either constructed from modern web data or are relatively small datasets created in the past decade by human annotators. This study utilizes a novel source, newly digitized articles from off-copyright, local U.S. newspapers, to assemble a massive-scale semantic similarity dataset spanning 70 years from 1920 to 1989 and containing nearly 400M positive semantic similarity pairs. Historically, around half of articles in U.S. local newspapers came from newswires like the Associated Press. While local papers reproduced articles from the newswire, they wrote their own headlines, which form abstractive summaries of the associated articles. We associate articles and their headlines by exploiting document layouts and language understanding. We then use deep neural methods to detect which articles are from the same underlying source, in the presence of substantial noise and abridgement. The headlines of reproduced articles form positive semantic similarity pairs. The resulting publicly available HEADLINES dataset is significantly larger than most existing semantic similarity datasets and covers a much longer span of time. It will facilitate the application of contrastively trained semantic similarity models to a variety of tasks, including the study of semantic change across space and time.", "keywords": "semantic similarity;semantic change", "primary_area": "", "supplementary_material": "/attachment/a6ee9f1bcec8355baa407126029b3b218e53f41d.pdf", "author": "Emily Silcock;Abhishek Arora;Melissa Dell", "authorids": "~Emily_Silcock1;~Abhishek_Arora1;~Melissa_Dell1", "gender": ";M;", "homepage": ";https://econabhishek.github.io/;", "dblp": ";344/4529;", "google_scholar": "dfGziwkAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;", "linkedin": ";abhishek-arora1996/;", "or_profile": "~Emily_Silcock1;~Abhishek_Arora1;~Melissa_Dell1", "aff": "Department of Economics, Harvard University;Harvard University, Harvard University;", "aff_domain": "fas.harvard.edu;fas.harvard.edu;", "position": "Researcher;Researcher;", "bibtex": "@inproceedings{\nsilcock2023a,\ntitle={A Massive Scale Semantic Similarity Dataset of Historical English},\nauthor={Emily Silcock and Abhishek Arora and Melissa Dell},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=AvttCE8n3H}\n}", "github": "", "project": "", "reviewers": "d3Qx;boGz;ZfNZ;oH6a;eGCu", "pdf_size": 12861119, "rating": "5;5;5;6;8", "confidence": "5;4;5;4;4", "wc_summary_and_contributions": "40;268;187;103;52", "wc_strengths": "20;21;140;26;24", "wc_improvement": "147;43;174;133;31", "wc_limitations": "10;1;1;44;60", "wc_correctness": "10;26;1;35;20", "wc_clarity": "38;5;10;2;31", "wc_relation_to_prior_work": "10;5;1;66;9", "wc_documentation": "6;1;1;18;44", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "282;371;516;428;272", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "662;214;661;995;460", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;2;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "wc_summary_and_contributions_avg": [ 130.0, 86.26239041436308 ], "wc_strengths_avg": [ 46.2, 46.94848240358787 ], "wc_improvement_avg": [ 105.6, 57.666628131008316 ], "wc_limitations_avg": [ 23.2, 24.276737836867618 ], "wc_correctness_avg": [ 18.4, 11.909659944767524 ], "wc_clarity_avg": [ 17.2, 14.524462124292244 ], "wc_relation_to_prior_work_avg": [ 18.2, 24.111408088288826 ], "wc_documentation_avg": [ 14.0, 16.235762994081924 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 373.8, 91.60436670814335 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 598.4, 257.74918040606843 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5601120336112039, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4328625871559668253&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "fas.harvard.edu;fas.harvard.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "Department of Economics", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "StoryBench: A Multifaceted Benchmark for Continuous Story Visualization", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73667", "id": "AwhpBEqmyo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f63f5fbed1a4ef08c857c5f377b5d33a-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=AwhpBEqmyo", "openreview": "https://openreview.net/forum?id=AwhpBEqmyo", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73667", "video": "https://nips.cc/virtual/2023/poster/73667", "author_site": "Emanuele Bugliarello, H. Hernan Moraldo, Ruben Villegas, Mohammad Babaeizadeh, Mohammad Taghi Saffar, Han Zhang, Dumitru Erhan, Vittorio Ferrari, Pieter-Jan Kindermans, Paul Voigtlaender", "tldr": "", "abstract": "Generating video stories from text prompts is a complex task. In addition to having high visual quality, videos need to realistically adhere to a sequence of text prompts whilst being consistent throughout the frames. Creating a benchmark for video generation requires data annotated over time, which contrasts with the single caption used often in video datasets. To fill this gap, we collect comprehensive human annotations on three existing datasets, and introduce StoryBench: a new, challenging multi-task benchmark to reliably evaluate forthcoming text-to-video models. Our benchmark includes three video generation tasks of increasing difficulty: action execution, where the next action must be generated starting from a conditioning video; story continuation, where a sequence of actions must be executed starting from a conditioning video; and story generation, where a video must be generated from only text prompts. We evaluate small yet strong text-to-video baselines, and show the benefits of training on story-like data algorithmically generated from existing video captions. Finally, we establish guidelines for human evaluation of video stories, and reaffirm the need of better automatic metrics for video generation. StoryBench aims at encouraging future research efforts in this exciting new area.", "keywords": "generative ai;video generation;text-to-video;benchmark;story visualization;multimodal", "primary_area": "", "supplementary_material": "/attachment/2c70b11e17ac6027b3348ac67bdbb93148f7f56d.pdf", "author": "Emanuele Bugliarello;Hernan Moraldo;Ruben Villegas;Mohammad Babaeizadeh;Mohammad Taghi Saffar;Han Zhang;Dumitru Erhan;Vittorio Ferrari;Pieter-Jan Kindermans;Paul Voigtlaender", "authorids": "~Emanuele_Bugliarello1;~Hernan_Moraldo1;~Ruben_Villegas1;~Mohammad_Babaeizadeh1;~Mohammad_Taghi_Saffar1;~Han_Zhang5;~Dumitru_Erhan1;~Vittorio_Ferrari4;~Pieter-Jan_Kindermans1;~Paul_Voigtlaender1", "gender": "M;M;M;M;M;M;M;M;M;M", "homepage": "http://e-bug.github.io/;;https://rubenvillegas.github.io/;;;http://dumitru.ca;https://sites.google.com/view/vittoferrari/home;;https://www.vision.rwth-aachen.de/person/197/;https://sites.google.com/corp/view/hanzhang", "dblp": "241/9497;330/5093;151/8871;;;http://dblp.uni-trier.de/pers/hd/e/Erhan:Dumitru;16/3608;118/8180;https://dblp.uni-trier.de/pers/v/Voigtlaender:Paul;", "google_scholar": "9yc1aXYAAAAJ;https://scholar.google.com.ar/citations?user=vON2HosAAAAJ;uGDQoU0AAAAJ;3Y4egcYAAAAJ;p1cmEzsAAAAJ;wfGiqXEAAAAJ;4QvYJ00AAAAJ;https://scholar.google.com/schhp?hl=en;taUv_MUAAAAJ;cxEoVL4AAAAJ", "orcid": "0000-0002-2999-7081;;;;;;;;0000-0002-7548-7162;", "linkedin": "emanuelebugliarello/;hmoraldo;;;;;vittorio-ferrari-17062b2b/;;;", "or_profile": "~Emanuele_Bugliarello1;~Hernan_Moraldo1;~Ruben_Villegas1;~Mohammad_Babaeizadeh1;~Mohammad_Taghi_Saffar1;~Dumitru_Erhan1;~Vittorio_Ferrari4;~Pieter-Jan_Kindermans1;~Paul_Voigtlaender1;~Han_Zhang1", "aff": "University of Copenhagen;Google Brain;Google DeepMind;Google;Google;Google;Google;Google Deepmind;Google;Google", "aff_domain": "ku.dk;research.google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "position": "PhD student;SWE;Research Scientist;Research Enginner;Research Engineer;Research Scientist;Principal Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nbugliarello2023storybench,\ntitle={StoryBench: A Multifaceted Benchmark for Continuous Story Visualization},\nauthor={Emanuele Bugliarello and Hernan Moraldo and Ruben Villegas and Mohammad Babaeizadeh and Mohammad Taghi Saffar and Han Zhang and Dumitru Erhan and Vittorio Ferrari and Pieter-Jan Kindermans and Paul Voigtlaender},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=AwhpBEqmyo}\n}", "github": "", "project": "", "reviewers": "Ae3X;i1xQ;V9cT;xof8", "pdf_size": 8556211, "rating": "6;7;7;7", "confidence": "4;5;3;5", "wc_summary_and_contributions": "57;81;70;73", "wc_strengths": "28;110;44;32", "wc_improvement": "23;132;59;154", "wc_limitations": "1;27;35;16", "wc_correctness": "1;1;1;16", "wc_clarity": "1;1;1;5", "wc_relation_to_prior_work": "1;1;1;1", "wc_documentation": "1;1;6;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "114;355;218;299", "wc_reply_reviewers": "0;0;0;18", "wc_reply_authors": "0;992;281;801", "reply_reviewers": "0;0;0;1", "reply_authors": "0;2;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 70.25, 8.642193008721803 ], "wc_strengths_avg": [ 53.5, 33.14739808793444 ], "wc_improvement_avg": [ 92.0, 53.13661637703327 ], "wc_limitations_avg": [ 19.75, 12.754901018824098 ], "wc_correctness_avg": [ 4.75, 6.49519052838329 ], "wc_clarity_avg": [ 2.0, 1.7320508075688772 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 2.25, 2.165063509461097 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 246.5, 90.68765075797255 ], "wc_reply_reviewers_avg": [ 4.5, 7.794228634059948 ], "wc_reply_authors_avg": [ 518.5, 396.6286046164598 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.7071067811865476 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9184782664688647245&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ku.dk;research.google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "author_num": 10, "aff_unique_index": "0;1;1;1;1;1;1;2;1;1", "aff_unique_norm": "University of Copenhagen;Google;DeepMind", "aff_unique_dep": ";Google Brain;DeepMind", "aff_unique_url": "https://www.ku.dk;https://brain.google.com;https://deepmind.com", "aff_unique_abbr": "UCPH;Google Brain;DeepMind", "aff_campus_unique_index": "1;1;1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;2;1;1;1;1;2;1;1", "aff_country_unique": "Denmark;United States;United Kingdom" }, { "title": "ProteinNPT: Improving Protein Property Prediction and Design with Non-Parametric Transformers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72519", "id": "AwzbQVuDBk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6a4d5d85f7a52f062d23d98d544a5578-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AwzbQVuDBk", "openreview": "https://openreview.net/forum?id=AwzbQVuDBk", "poster": "/media/PosterPDFs/NeurIPS%202023/72519.png?t=1702262915.731309", "slides": "https://nips.cc/virtual/2023/poster/72519", "video": "https://nips.cc/virtual/2023/poster/72519", "author_site": "Pascal Notin, Ruben Weitzman, Debora Marks, Yarin Gal", "tldr": "", "abstract": "Protein design holds immense potential for optimizing naturally occurring proteins, with broad applications in drug discovery, material design, and sustainability. However, computational methods for protein engineering are confronted with significant challenges, such as an expansive design space, sparse functional regions, and a scarcity of available labels. These issues are further exacerbated in practice by the fact most real-life design scenarios necessitate the simultaneous optimization of multiple properties. In this work, we introduce ProteinNPT, a non-parametric transformer variant tailored to protein sequences and particularly suited to label-scarce and multi-task learning settings. We first focus on the supervised fitness prediction setting and develop several cross-validation schemes which support robust performance assessment. We subsequently reimplement prior top-performing baselines, introduce several extensions of these baselines by integrating diverse branches of the protein engineering literature, and demonstrate that ProteinNPT consistently outperforms all of them across a diverse set of protein property prediction tasks. Finally, we demonstrate the value of our approach for iterative protein design across extensive in silico Bayesian optimization and conditional sampling experiments.", "keywords": "Non-Parametric Transformers;protein design;protein property prediction;fitness prediction;Bayesian optimization;ProteinGym", "primary_area": "", "supplementary_material": "", "author": "Pascal Notin;Ruben Weitzman;Debora Susan Marks;Yarin Gal", "authorids": "~Pascal_Notin1;~Ruben_Weitzman1;~Debora_Susan_Marks1;~Yarin_Gal1", "gender": "M;F;;M", "homepage": "https://www.bdi.ox.ac.uk/Team/ruben-weitzman;https://www.deboramarkslab.com/;http://www.cs.ox.ac.uk/people/yarin.gal/website//;https://www.pascalnotin.com", "dblp": ";;67/9076;270/9032", "google_scholar": ";qFmoeNkAAAAJ;https://scholar.google.co.uk/citations?user=SIayDoQAAAAJ;soxv0s0AAAAJ", "orcid": ";0000-0001-9388-2281;;0000-0002-1877-8983", "linkedin": ";debora-marks-3932a97/;;", "or_profile": "~Ruben_Weitzman1;~Debora_Susan_Marks1;~Yarin_Gal1;~Pascal_M_Notin1", "aff": "University of Oxford;Harvard Medical School;University of Oxford;Department of Computer Science, University of Oxford", "aff_domain": "ox.ac.uk;harvard.edu;ox.ac.uk;cs.ox.ac.uk", "position": "PhD student;Associate Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nnotin2023proteinnpt,\ntitle={Protein{NPT}: Improving protein property prediction and design with non-parametric transformers},\nauthor={Pascal Notin and Ruben Weitzman and Debora Susan Marks and Yarin Gal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AwzbQVuDBk}\n}", "github": "", "project": "", "reviewers": "8GnD;yxtV;42SC;af1N", "pdf_size": 21499242, "rating": "5;5;6;6", "confidence": "3;3;3;4", "soundness": "3;4;3;2", "novelty": "3;2;3;3", "presentation": "2;3;3;2", "wc_summary": "85;68;134;48", "wc_strengths": "118;134;188;5", "wc_weaknesses": "153;174;296;163", "wc_questions": "87;179;43;1", "wc_limitations": "3;8;2;1", "wc_review": "446;563;663;218", "wc_reply_reviewers": "11;57;25;9", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 83.75, 31.830606340439072 ], "wc_strengths_avg": [ 111.25, 66.60095720032858 ], "wc_weaknesses_avg": [ 196.5, 57.92451985126851 ], "wc_questions_avg": [ 77.5, 66.0208300462816 ], "wc_limitations_avg": [ 3.5, 2.692582403567252 ], "wc_review_avg": [ 472.5, 165.79580814966343 ], "wc_reply_reviewers_avg": [ 25.5, 19.20286436967152 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15813847798700522130&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "ox.ac.uk;harvard.edu;ox.ac.uk;cs.ox.ac.uk", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Oxford;Harvard University", "aff_unique_dep": ";Medical School", "aff_unique_url": "https://www.ox.ac.uk;https://hms.harvard.edu", "aff_unique_abbr": "Oxford;HMS", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Boston;Oxford", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Model-Free Reinforcement Learning with the Decision-Estimation Coefficient", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72518", "id": "Ay3WvSrtpO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3fcd0f8747f9217c6dbc45ed138b1fde-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ay3WvSrtpO", "openreview": "https://openreview.net/forum?id=Ay3WvSrtpO", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72518", "video": "https://nips.cc/virtual/2023/poster/72518", "author_site": "Dylan J Foster, Noah Golowich, Jian Qian, Alexander Rakhlin, Ayush Sekhari", "tldr": "", "abstract": "We consider the problem of interactive decision making, encompassing structured bandits and reinforcement\nlearning with general function approximation. Recently, Foster et al. (2021) introduced the\nDecision-Estimation Coefficient, a measure of statistical complexity that lower bounds the optimal regret for interactive decision\nmaking, as well as a meta-algorithm, Estimation-to-Decisions, which achieves upper\nbounds in terms of the same quantity. Estimation-to-Decisions is a reduction, which lifts\nalgorithms for (supervised) online estimation into algorithms for\ndecision making. In this paper, we show that by combining Estimation-to-Decisions with\na specialized form of \"optimistic\" estimation introduced by\nZhang (2022), it is possible to obtain guarantees\nthat improve upon those of Foster et al. (2021) by\naccommodating more lenient notions of estimation error. We use this approach to derive regret bounds for\nmodel-free reinforcement learning with value function approximation, and give structural results showing when it can and cannot help more generally.", "keywords": "Decision making;learning theory;bandits;reinforcement learning theory;online learning;decision-estimation coefficient", "primary_area": "", "supplementary_material": "/attachment/6040d85aeb98a11d60cb2f1191e63cf6effb97e2.pdf", "author": "Dylan J Foster;Noah Golowich;Jian Qian;Alexander Rakhlin;Ayush Sekhari", "authorids": "~Dylan_J_Foster1;~Noah_Golowich1;~Jian_Qian2;~Alexander_Rakhlin1;~Ayush_Sekhari1", "gender": ";;;M;M", "homepage": "http://dylanfoster.net;https://noahgol.github.io;https://sites.google.com/view/jianqian/about;http://www.mit.edu/~rakhlin/;https://ayush.sekhari.com/", "dblp": "167/4271;150/1861;;59/407;203/8152", "google_scholar": "RqwU8xsAAAAJ;roUlyWcAAAAJ;;https://scholar.google.com.tw/citations?user=fds2VpgAAAAJ;jH9i188AAAAJ", "orcid": ";;;;", "linkedin": ";;jianQ/;;", "or_profile": "~Dylan_J_Foster1;~Noah_Golowich1;~Jian_Qian2;~Alexander_Rakhlin1;~Ayush_Sekhari1", "aff": "Microsoft Research;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "microsoft.com;mit.edu;mit.edu;mit.edu;mit.edu", "position": "Principal Researcher;PhD student;PhD student;Full Professor;Postdoc", "bibtex": "@inproceedings{\nfoster2023modelfree,\ntitle={Model-Free Reinforcement Learning with the Decision-Estimation Coefficient},\nauthor={Dylan J Foster and Noah Golowich and Jian Qian and Alexander Rakhlin and Ayush Sekhari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ay3WvSrtpO}\n}", "github": "", "project": "", "reviewers": "DHV3;kpYc;ooU7;nAEr", "pdf_size": 578221, "rating": "6;6;6;6", "confidence": "3;4;3;3", "soundness": "3;4;4;3", "novelty": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "12;111;95;64", "wc_strengths": "11;126;76;32", "wc_weaknesses": "109;210;111;110", "wc_questions": "2;2;115;42", "wc_limitations": "34;1;25;1", "wc_review": "168;450;422;249", "wc_reply_reviewers": "0;103;64;130", "wc_reply_authors": "0;47;0;163", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 70.5, 37.765725201563384 ], "wc_strengths_avg": [ 61.25, 44.13261265776138 ], "wc_weaknesses_avg": [ 135.0, 43.30704330706496 ], "wc_questions_avg": [ 40.25, 46.14311974715191 ], "wc_limitations_avg": [ 15.25, 14.600941750448838 ], "wc_review_avg": [ 322.25, 117.71655576001194 ], "wc_reply_reviewers_avg": [ 74.25, 48.86908531986249 ], "wc_reply_authors_avg": [ 52.5, 66.62019213421709 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9484028013529962982&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "microsoft.com;mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Microsoft;Massachusetts Institute of Technology", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://web.mit.edu", "aff_unique_abbr": "MSR;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Partial Counterfactual Identification of Continuous Outcomes with a Curvature Sensitivity Model", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72517", "id": "AygwZzdCM0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/65cbe3e21ac62553111d9ecf7d60c18e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=AygwZzdCM0", "openreview": "https://openreview.net/forum?id=AygwZzdCM0", "poster": "/media/PosterPDFs/NeurIPS%202023/72517.png?t=1701185231.180074", "slides": "https://nips.cc/virtual/2023/poster/72517", "video": "https://nips.cc/virtual/2023/poster/72517", "author_site": "Valentyn Melnychuk, Dennis Frauen, Stefan Feuerriegel", "tldr": "", "abstract": "Counterfactual inference aims to answer retrospective \"what if\" questions and thus belongs to the most fine-grained type of inference in Pearl's causality ladder. Existing methods for counterfactual inference with continuous outcomes aim at point identification and thus make strong and unnatural assumptions about the underlying structural causal model. In this paper, we relax these assumptions and aim at partial counterfactual identification of continuous outcomes, i.e., when the counterfactual query resides in an ignorance interval with informative bounds. We prove that, in general, the ignorance interval of the counterfactual queries has non-informative bounds, already when functions of structural causal models are continuously differentiable. As a remedy, we propose a novel sensitivity model called Curvature Sensitivity Model. This allows us to obtain informative bounds by bounding the curvature of level sets of the functions. We further show that existing point counterfactual identification methods are special cases of our Curvature Sensitivity Model when the bound of the curvature is set to zero. We then propose an implementation of our Curvature Sensitivity Model in the form of a novel deep generative model, which we call Augmented Pseudo-Invertible Decoder. Our implementation employs (i) residual normalizing flows with (ii) variational augmentations. We empirically demonstrate the effectiveness of our Augmented Pseudo-Invertible Decoder. To the best of our knowledge, ours is the first partial identification model for Markovian structural causal models with continuous outcomes.", "keywords": "causal inference;counterfactual inference;partial identification;sensitivity model;normalizing flows;causal machine learning", "primary_area": "", "supplementary_material": "", "author": "Valentyn Melnychuk;Dennis Frauen;Stefan Feuerriegel", "authorids": "~Valentyn_Melnychuk1;~Dennis_Frauen1;~Stefan_Feuerriegel1", "gender": "M;M;M", "homepage": "https://valentyn1997.github.io/;https://www.ai.bwl.uni-muenchen.de/team/research_team/dennis_frauen/index.html;http://www.ai.bwl.lmu.de", "dblp": "254/1513;315/0115;125/0630", "google_scholar": "EMExrOMAAAAJ;ieyW4WQAAAAJ;https://scholar.google.de/citations?hl=de", "orcid": "0000-0002-2401-6803;;0000-0001-7856-8729", "linkedin": "valentyn-melnychuk/;dennis-frauen-6b5746171/;", "or_profile": "~Valentyn_Melnychuk1;~Dennis_Frauen1;~Stefan_Feuerriegel1", "aff": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;LMU Munich", "aff_domain": "lmu.de;lmu.de;lmu.de", "position": "PhD student;PhD student;Professor", "bibtex": "@inproceedings{\nmelnychuk2023partial,\ntitle={Partial Counterfactual Identification of Continuous Outcomes with a Curvature Sensitivity Model},\nauthor={Valentyn Melnychuk and Dennis Frauen and Stefan Feuerriegel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=AygwZzdCM0}\n}", "github": "", "project": "", "reviewers": "23yB;8Zs8;53D4;JMz8", "pdf_size": 2996648, "rating": "6;6;7;7", "confidence": "3;3;4;4", "soundness": "3;3;3;4", "novelty": "3;2;4;4", "presentation": "2;3;4;3", "wc_summary": "159;109;62;64", "wc_strengths": "183;125;76;129", "wc_weaknesses": "139;1164;160;132", "wc_questions": "9;127;2;22", "wc_limitations": "22;21;5;6", "wc_review": "512;1546;305;353", "wc_reply_reviewers": "0;429;0;0", "wc_reply_authors": "0;368;0;0", "reply_reviewers": "0;2;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 98.5, 39.6642156105475 ], "wc_strengths_avg": [ 128.25, 37.877268908937985 ], "wc_weaknesses_avg": [ 398.75, 441.9374248691776 ], "wc_questions_avg": [ 40.0, 50.73953093988946 ], "wc_limitations_avg": [ 13.5, 8.0156097709407 ], "wc_review_avg": [ 679.0, 506.391646850538 ], "wc_reply_reviewers_avg": [ 107.25, 185.7624491117621 ], "wc_reply_authors_avg": [ 92.0, 159.34867429633672 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6839335118040019703&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "lmu.de;lmu.de;lmu.de", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig Maximilian University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.lmu.de;https://www.lmu.de", "aff_unique_abbr": "LMU;LMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Munich", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Parameterizing Context: Unleashing the Power of Parameter-Efficient Fine-Tuning and In-Context Tuning for Continual Table Semantic Parsing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72516", "id": "B01uiWhjpc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/398b00a05b847ac65eb98c8e5e865fe8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=B01uiWhjpc", "openreview": "https://openreview.net/forum?id=B01uiWhjpc", "poster": "/media/PosterPDFs/NeurIPS%202023/72516.png?t=1697364703.009646", "slides": "https://nips.cc/virtual/2023/poster/72516", "video": "https://nips.cc/virtual/2023/poster/72516", "author_site": "Yongrui Chen, Shenyu Zhang, Guilin Qi, Xinnan Guo", "tldr": "", "abstract": "Continual table semantic parsing aims to train a parser on a sequence of tasks, where each task requires the parser to translate natural language into SQL based on task-specific tables but only offers limited training examples. \nConventional methods tend to suffer from overfitting with limited supervision, as well as catastrophic forgetting due to parameter updates.\nDespite recent advancements that partially alleviate these issues through semi-supervised data augmentation and retention of a few past examples, the performance is still limited by the volume of unsupervised data and stored examples.\nTo overcome these challenges, this paper introduces a novel method integrating parameter-efficient fine-tuning (PEFT) and in-context tuning (ICT) for training a continual table semantic parser. Initially, we present a task-adaptive PEFT framework capable of fully circumventing catastrophic forgetting, which is achieved by freezing the pre-trained model backbone and fine-tuning small-scale prompts. \nBuilding on this, we propose a teacher-student framework-based solution. The teacher addresses the few-shot problem using ICT, which procures contextual information by demonstrating a few training examples. In turn, the student leverages the proposed PEFT framework to learn from the teacher's output distribution, and subsequently compresses and saves the contextual information to the prompts, eliminating the need to store any training examples.\nExperimental evaluations on two benchmarks affirm the superiority of our method over prevalent few-shot and continual learning baselines across various metrics.", "keywords": "semantic parsing;continual learning;few-shot learning", "primary_area": "", "supplementary_material": "/attachment/3751be415893dd89dc005450d8fa0f6e3e71e94d.zip", "author": "Yongrui Chen;Shenyu Zhang;Guilin Qi;Xinnan Guo", "authorids": "~Yongrui_Chen1;shenyuzhang@seu.edu.cn;~Guilin_Qi2;~Xinnan_Guo1", "gender": "M;;M;M", "homepage": ";;https://cse.seu.edu.cn/_s191/2023/1024/c23024a469541/page.psp;", "dblp": "143/0948-2.html;;71/5935;293/9872", "google_scholar": "8ZjIHyEAAAAJ;;;UL69H_EAAAAJ", "orcid": "0000-0001-8934-3920;;0000-0003-0150-7236;", "linkedin": ";;;", "or_profile": "~Yongrui_Chen1;shenyuzhang@seu.edu.cn;~Guilin_Qi2;~Xinnan_Guo1", "aff": "Southeast University;;Southeast University;Southeast University", "aff_domain": "seu.edu.cn;;seu.edu.cn;seu.edu.cn", "position": "PhD student;;Full Professor;MS student", "bibtex": "@inproceedings{\nchen2023parameterizing,\ntitle={Parameterizing Context: Unleashing the Power of Parameter-Efficient Fine-Tuning and In-Context Tuning for Continual Table Semantic Parsing},\nauthor={Yongrui Chen and Shenyu Zhang and Guilin Qi and Xinnan Guo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=B01uiWhjpc}\n}", "github": "", "project": "", "reviewers": "yC62;vuh7;zd3d;Bddu", "pdf_size": 987897, "rating": "5;5;6;7", "confidence": "3;4;4;3", "soundness": "3;3;2;3", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "62;73;65;91", "wc_strengths": "54;88;32;55", "wc_weaknesses": "94;143;167;12", "wc_questions": "47;65;105;72", "wc_limitations": "1;1;4;1", "wc_review": "258;370;373;231", "wc_reply_reviewers": "0;0;10;5", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.75, 11.277743568639961 ], "wc_strengths_avg": [ 57.25, 19.992185973524755 ], "wc_weaknesses_avg": [ 104.0, 59.27478384608416 ], "wc_questions_avg": [ 72.25, 20.99255820523073 ], "wc_limitations_avg": [ 1.75, 1.299038105676658 ], "wc_review_avg": [ 308.0, 64.2222702806433 ], "wc_reply_reviewers_avg": [ 3.75, 4.14578098794425 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14676162890644451512&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "seu.edu.cn;;seu.edu.cn;seu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Southeast University", "aff_unique_dep": "", "aff_unique_url": "https://www.seu.edu.cn/", "aff_unique_abbr": "SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Counterfactually Comparing Abstaining Classifiers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72515", "id": "B2DEcj4a7i", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/59fe467d5e71ba6b8d41bb3928da6f4c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=B2DEcj4a7i", "openreview": "https://openreview.net/forum?id=B2DEcj4a7i", "poster": "/media/PosterPDFs/NeurIPS%202023/72515.png?t=1702072096.269938", "slides": "https://nips.cc/virtual/2023/poster/72515", "video": "https://nips.cc/virtual/2023/poster/72515", "author_site": "Yo Joong Choe, Aditya Gangrade, Aaditya Ramdas", "tldr": "", "abstract": "Abstaining classifiers have the option to abstain from making predictions on inputs that they are unsure about. These classifiers are becoming increasingly popular in high-stakes decision-making problems, as they can withhold uncertain predictions to improve their reliability and safety. When evaluating black-box abstaining classifier(s), however, we lack a principled approach that accounts for what the classifier would have predicted on its abstentions. These missing predictions matter when they can eventually be utilized, either directly or as a backup option in a failure mode. In this paper, we introduce a novel approach and perspective to the problem of evaluating and comparing abstaining classifiers by treating abstentions as missing data. Our evaluation approach is centered around defining the counterfactual score of an abstaining classifier, defined as the expected performance of the classifier had it not been allowed to abstain. We specify the conditions under which the counterfactual score is identifiable: if the abstentions are stochastic, and if the evaluation data is independent of the training data (ensuring that the predictions are missing at random), then the score is identifiable. Note that, if abstentions are deterministic, then the score is unidentifiable because the classifier can perform arbitrarily poorly on its abstentions. Leveraging tools from observational causal inference, we then develop nonparametric and doubly robust methods to efficiently estimate this quantity under identification. Our approach is examined in both simulated and real data experiments.", "keywords": "abstaining classifiers;black-box model evaluation;causal inference;missing data", "primary_area": "", "supplementary_material": "/attachment/3fccbeddbdb82e7fe44e6eb60706465e3ff0c5a5.zip", "author": "Yo Joong Choe;Aditya Gangrade;Aaditya Ramdas", "authorids": "~Yo_Joong_Choe1;~Aditya_Gangrade1;~Aaditya_Ramdas2", "gender": "M;;M", "homepage": "https://yjchoe.github.io/;;http://stat.cmu.edu/~aramdas", "dblp": "234/0589;;117/3518", "google_scholar": "71g2MrUAAAAJ;;ZvFaPxUAAAAJ", "orcid": "0000-0002-0614-9477;;0000-0003-0497-311X", "linkedin": ";;", "or_profile": "~Yo_Joong_Choe1;~Aditya_Gangrade1;~Aaditya_Ramdas2", "aff": "Carnegie Mellon University;;Carnegie Mellon University", "aff_domain": "cmu.edu;;cmu.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nchoe2023counterfactually,\ntitle={Counterfactually Comparing Abstaining Classifiers},\nauthor={Yo Joong Choe and Aditya Gangrade and Aaditya Ramdas},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=B2DEcj4a7i}\n}", "github": "", "project": "", "reviewers": "9d5v;paQd;9p41;fENC", "pdf_size": 469868, "rating": "4;5;6;7", "confidence": "3;4;3;3", "soundness": "4;3;3;3", "novelty": "2;2;2;3", "presentation": "3;2;3;4", "wc_summary": "77;63;80;90", "wc_strengths": "61;55;35;90", "wc_weaknesses": "228;183;45;97", "wc_questions": "80;17;46;1", "wc_limitations": "1;9;1;9", "wc_review": "447;327;207;287", "wc_reply_reviewers": "127;193;28;0", "wc_reply_authors": "58;819;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 77.5, 9.656603957913983 ], "wc_strengths_avg": [ 60.25, 19.68978161382193 ], "wc_weaknesses_avg": [ 138.25, 71.51005174099653 ], "wc_questions_avg": [ 36.0, 30.091527046662154 ], "wc_limitations_avg": [ 5.0, 4.0 ], "wc_review_avg": [ 317.0, 86.60254037844386 ], "wc_reply_reviewers_avg": [ 87.0, 77.27548123434755 ], "wc_reply_authors_avg": [ 219.25, 347.0744696747371 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6090675854923995317&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cmu.edu;;cmu.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Recurrent Temporal Revision Graph Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72514", "id": "B3UDx1rNOy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dafd116ac8c735f149558b79fd48e090-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=B3UDx1rNOy", "openreview": "https://openreview.net/forum?id=B3UDx1rNOy", "poster": "/media/PosterPDFs/NeurIPS%202023/72514.png?t=1701746214.5087605", "slides": "https://nips.cc/virtual/2023/poster/72514", "video": "https://nips.cc/virtual/2023/poster/72514", "author_site": "Yizhou Chen, Anxiang Zeng, Qingtao Yu, Kerui Zhang, Cao Yuanpeng, Kangle Wu, Guangda Huzhang, Han Yu, Zhiming Zhou", "tldr": "", "abstract": "Temporal graphs offer more accurate modeling of many real-world scenarios than static graphs. However, neighbor aggregation, a critical building block of graph networks, for temporal graphs, is currently straightforwardly extended from that of static graphs. It can be computationally expensive when involving all historical neighbors during such aggregation. In practice, typically only a subset of the most recent neighbors are involved. However, such subsampling leads to incomplete and biased neighbor information. To address this limitation, we propose a novel framework for temporal neighbor aggregation that uses the recurrent neural network with node-wise hidden states to integrate information from all historical neighbors for each node to acquire the complete neighbor information. We demonstrate the superior theoretical expressiveness of the proposed framework as well as its state-of-the-art performance in real-world applications. Notably, it achieves a significant +9.4% improvement on averaged precision in a real-world Ecommerce dataset over existing methods on 2-layer models.", "keywords": "temporal graph;temporal graph network;temporal graph model expressiveness;continuous-time dynamic graph", "primary_area": "", "supplementary_material": "/attachment/b2a0c1c8bdec86463d1d5a82f5a7b82a6258a7d2.zip", "author": "YIZHOU CHEN;Anxiang Zeng;Qingtao Yu;Kerui Zhang;Cao Yuanpeng;Kangle Wu;Guangda Huzhang;Han Yu;Zhiming Zhou", "authorids": "~YIZHOU_CHEN1;~Anxiang_Zeng2;~Qingtao_Yu2;~Kerui_Zhang1;~Cao_Yuanpeng1;~Kangle_Wu4;~Guangda_Huzhang2;~Han_Yu1;~Zhiming_Zhou2", "gender": ";M;;M;M;M;M;M;M", "homepage": "https://arxiv.org/abs/1910.11998;;;;;;;https://sites.google.com/site/hanyushomepage/home;https://zhimingzhou.github.io/", "dblp": ";182/7262.html;;;;;160/6387.html;35/1096-1;56/321-2.html", "google_scholar": ";aJ0zOMMAAAAJ;;https://scholar.google.com.hk/citations?user=cYa4o2EAAAAJ;;;;https://scholar.google.com.sg/citations?hl=en;b8YJ1EMAAAAJ", "orcid": ";;0000-0001-9906-8969;0009-0004-0746-7698;0009-0005-7341-7792;0009-0003-8445-5191;;0000-0001-6893-8650;0000-0002-2407-961X", "linkedin": ";;;;;;;;", "or_profile": "~YIZHOU_CHEN1;~Anxiang_Zeng2;~Qingtao_Yu2;~Kerui_Zhang1;~Cao_Yuanpeng1;~Kangle_Wu4;~Guangda_Huzhang2;~Han_Yu1;~Zhiming_Zhou2", "aff": ";National Technological University;shopee;;shopee;;shopee;Nanyang Technological University;Shanghai University of Finance and Economics", "aff_domain": ";ntu.edu;shopee.com;;shopee.com;;shopee.com;ntu.edu.sg;shufe.edu.cn", "position": ";PhD student;Researcher;;Researcher;;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2023recurrent,\ntitle={Recurrent Temporal Revision Graph Networks},\nauthor={YIZHOU CHEN and Anxiang Zeng and Qingtao Yu and Kerui Zhang and Cao Yuanpeng and Kangle Wu and Guangda Huzhang and Han Yu and Zhiming Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=B3UDx1rNOy}\n}", "github": "", "project": "", "reviewers": "2aAk;UPBh;sEkX;gXqR", "pdf_size": 4894072, "rating": "5;5;7;7", "confidence": "3;5;4;3", "soundness": "2;3;3;3", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "70;56;79;202", "wc_strengths": "88;19;47;164", "wc_weaknesses": "60;166;46;67", "wc_questions": "40;4;100;41", "wc_limitations": "6;11;1;1", "wc_review": "264;256;273;475", "wc_reply_reviewers": "15;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 101.75, 58.45671475544961 ], "wc_strengths_avg": [ 79.5, 54.609980772748855 ], "wc_weaknesses_avg": [ 84.75, 47.515129169560296 ], "wc_questions_avg": [ 46.25, 34.4265522525855 ], "wc_limitations_avg": [ 4.75, 4.14578098794425 ], "wc_review_avg": [ 317.0, 91.41936337560003 ], "wc_reply_reviewers_avg": [ 3.75, 6.49519052838329 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7831409744620607012&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";ntu.edu;shopee.com;;shopee.com;;shopee.com;ntu.edu.sg;shufe.edu.cn", "author_num": 9, "aff_unique_index": "0;1;1;1;2;3", "aff_unique_norm": "National Technological University;Shopee;Nanyang Technological University;Shanghai University of Finance and Economics", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ntu.edu;https://shopee.sg;https://www.ntu.edu.sg;http://www.sufe.edu.cn", "aff_unique_abbr": "NTU;Shopee;NTU;SUFE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;2", "aff_country_unique": "United States;Singapore;China" }, { "title": "Fast Approximation of Similarity Graphs with Kernel Density Estimation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72513", "id": "B4G87Bq5wA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d5c56ec4f69c9a473089b16000d3f8cd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=B4G87Bq5wA", "openreview": "https://openreview.net/forum?id=B4G87Bq5wA", "poster": "/media/PosterPDFs/NeurIPS%202023/72513.png?t=1701565652.8609655", "slides": "https://nips.cc/virtual/2023/poster/72513", "video": "https://nips.cc/virtual/2023/poster/72513", "author_site": "Peter Macgregor, He Sun", "tldr": "", "abstract": "Constructing a similarity graph from a set $X$ of data points in $ \\mathbb{R}^d$ is the first step of many modern clustering algorithms. However, typical constructions of a similarity graph have high time complexity, and a quadratic space dependency with respect to $|X|$. We address this limitation and present a new algorithmic framework that constructs a sparse approximation of the fully connected similarity graph while preserving its cluster structure. Our presented algorithm is based on the kernel density estimation problem, and is applicable for arbitrary kernel functions. We compare our designed algorithm with the well-known implementations from the scikit-learn library and the FAISS library, and find that our method significantly outperforms the implementation from both libraries on a variety of datasets.", "keywords": "similarity graphs;spectral clustering", "primary_area": "", "supplementary_material": "/attachment/05af410d6356796af690ca832bc020d445ce2e22.zip", "author": "Peter Macgregor;He Sun", "authorids": "~Peter_Macgregor1;~He_Sun5", "gender": ";M", "homepage": "https://pmacg.io;http://homepages.inf.ed.ac.uk/hsun4/", "dblp": "294/8868;", "google_scholar": "https://scholar.google.co.uk/citations?user=t72xITMAAAAJ;https://scholar.google.co.uk/citations?user=K6-JprYAAAAJ", "orcid": "0000-0002-1066-8798;", "linkedin": "peter-macgregor-4626a993/;", "or_profile": "~Peter_Macgregor1;~He_Sun5", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh", "aff_domain": "ed.ac.uk;ed.ac.uk", "position": "Postdoc;Associate Professor", "bibtex": "@inproceedings{\nmacgregor2023fast,\ntitle={Fast Approximation of Similarity Graphs with Kernel Density Estimation},\nauthor={Peter Macgregor and He Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=B4G87Bq5wA}\n}", "github": "", "project": "", "reviewers": "oQAs;hhPL;eUVC;awKP", "pdf_size": 2354249, "rating": "6;6;7;8", "confidence": "4;4;4;3", "soundness": "3;4;3;4", "novelty": "2;3;4;4", "presentation": "3;2;4;4", "wc_summary": "111;170;51;69", "wc_strengths": "67;148;160;56", "wc_weaknesses": "351;790;321;4", "wc_questions": "69;219;493;88", "wc_limitations": "22;11;64;2", "wc_review": "620;1338;1089;219", "wc_reply_reviewers": "135;223;33;26", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 100.25, 45.77867953534702 ], "wc_strengths_avg": [ 107.75, 46.606732346303794 ], "wc_weaknesses_avg": [ 366.5, 279.7628459963903 ], "wc_questions_avg": [ 217.25, 169.35521102109612 ], "wc_limitations_avg": [ 24.75, 23.742103950576915 ], "wc_review_avg": [ 816.5, 430.6497997213049 ], "wc_reply_reviewers_avg": [ 104.25, 81.00424371599306 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15771315997034856305&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "ed.ac.uk;ed.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "B4TAPfHa7g", "title": "Constrained Proximal Policy Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "The problem of constrained reinforcement learning (CRL) holds significant importance as it provides a framework for addressing critical safety satisfaction concerns in the field of reinforcement learning (RL). However, with the introduction of constraint satisfaction, the current CRL methods necessitate the utilization of second-order optimization or primal-dual frameworks with additional Lagrangian multipliers, resulting in increased complexity and inefficiency during implementation. To address these issues, we propose a novel first-order feasible method named Constrained Proximal Policy Optimization (CPPO). By treating the CRL problem as a probabilistic inference problem, our approach integrates the Expectation-Maximization framework to solve it through two steps: 1) calculating the optimal policy distribution within the feasible region (E-step), and 2) conducting a first-order update to adjust the current policy towards the optimal policy obtained in the E-step (M-step). We establish the relationship between the probability ratios and KL divergence to convert the E-step into a convex optimization problem. Furthermore, we develop an iterative heuristic algorithm from a geometric perspective to solve this problem. Additionally, we introduce a conservative update mechanism to overcome the constraint violation issue that occurs in the existing feasible region method. Empirical evaluations conducted in complex and uncertain environments validate the effectiveness of our proposed method, as it performs at least as well as other baselines.", "keywords": "constrained reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/5d5c83ba08574cf00d98974e658bcf6645cca2ea.zip", "author": "Chengbin Xuan;Feng Zhang;Faliang Yin;Hak-Keung Lam", "authorids": "~Chengbin_Xuan1;~Feng_Zhang16;~Faliang_Yin1;~Hak-Keung_Lam1", "gender": "M;M;M;", "homepage": "https://kclpure.kcl.ac.uk/portal/en/persons/chengbin-xuan(f762f09d-9d99-4af0-ab7a-83c141af3df5).html;https://kclpure.kcl.ac.uk/portal/en/persons/feng-zhang(6bd6bb4c-ad0f-44fe-a28e-92bb119e98da).html;;", "dblp": "266/5456;48/1294;;l/HKLam", "google_scholar": ";9kSqIfwAAAAJ;;iO8fu_IAAAAJ", "orcid": ";;0000-0003-0444-3952;", "linkedin": ";;;", "or_profile": "~Chengbin_Xuan1;~Feng_Zhang16;~Faliang_Yin1;~Hak-Keung_Lam1", "aff": "King's College London, University of London;King's College London, University of London;King's College London, University of London;King's College London, University of London", "aff_domain": "kcl.ac.uk;kcl.ac.uk;kcl.ac.uk;kcl.ac.uk", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@misc{\nxuan2023constrained,\ntitle={Constrained Proximal Policy Optimization},\nauthor={Chengbin Xuan and Feng Zhang and Faliang Yin and Hak-Keung Lam},\nyear={2023},\nurl={https://openreview.net/forum?id=B4TAPfHa7g}\n}", "github": "", "project": "", "reviewers": "ChuP;qYFp;n1BZ;XWVs;DAoZ;8NjX", "site": "https://openreview.net/forum?id=B4TAPfHa7g", "pdf_size": 516503, "rating": "3;4;4;5;5;5", "confidence": "4;2;3;4;3;2", "soundness": "3;2;3;3;3;3", "novelty": "3;2;2;3;3;2", "presentation": "2;1;3;3;2;3", "wc_summary": "25;98;31;107;75;35", "wc_strengths": "24;58;12;61;52;17", "wc_weaknesses": "389;129;76;244;137;68", "wc_questions": "78;2;46;52;22;1", "wc_limitations": "41;7;22;6;7;1", "wc_review": "557;294;187;470;293;122", "wc_reply_reviewers": "151;64;38;73;116;37", "wc_reply_authors": "154;26;26;26;26;26", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "2;2;2;2;2;2", "rating_avg": [ 4.333333333333333, 0.7453559924999298 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.3333333333333335, 0.7453559924999298 ], "wc_summary_avg": [ 61.833333333333336, 33.03743667753633 ], "wc_strengths_avg": [ 37.333333333333336, 20.146684315015435 ], "wc_weaknesses_avg": [ 173.83333333333334, 112.07053830313993 ], "wc_questions_avg": [ 33.5, 27.867244810589607 ], "wc_limitations_avg": [ 14.0, 13.686976778431873 ], "wc_review_avg": [ 320.5, 151.0879986409686 ], "wc_reply_reviewers_avg": [ 79.83333333333333, 41.34173301748354 ], "wc_reply_authors_avg": [ 47.333333333333336, 47.702783519995506 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2738612787525831, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-7VwLwiOaWQJ:scholar.google.com/&scioq=Constrained+Proximal+Policy+Optimization&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "King's College London", "aff_unique_dep": "", "aff_unique_url": "https://www.kcl.ac.uk", "aff_unique_abbr": "KCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Grammar Prompting for Domain-Specific Language Generation with Large Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72512", "id": "B4tkwuzeiY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cd40d0d65bfebb894ccc9ea822b47fa8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=B4tkwuzeiY", "openreview": "https://openreview.net/forum?id=B4tkwuzeiY", "poster": "/media/PosterPDFs/NeurIPS%202023/72512.png?t=1702099740.7798297", "slides": "https://nips.cc/virtual/2023/poster/72512", "video": "https://nips.cc/virtual/2023/poster/72512", "author_site": "Bailin Wang, Zi Wang, Xuezhi Wang, Yuan Cao, Rif A. Saurous, Yoon Kim", "tldr": "", "abstract": "Large language models (LLMs) can learn to perform a wide range of natural language tasks from just a handful of in-context examples. However, for generating strings from highly structured languages (e.g., semantic parsing to complex domain-specific languages), it is challenging for the LLM to generalize from just a few exemplars. We propose \\emph{grammar prompting}, a simple approach to enable LLMs to use external knowledge and domain-specific constraints, expressed through a grammar in Backus--Naur Form (BNF), during in-context learning. Grammar prompting augments each demonstration example with a specialized grammar that is minimally sufficient for generating the particular output example, where the specialized grammar is a subset of the full DSL grammar. For inference, the LLM first predicts a BNF grammar given a test input, and then generates the output according to the rules of the grammar. Experiments demonstrate that grammar prompting can enable LLMs to perform competitively on a diverse set of DSL generation tasks, including semantic parsing (SMCalFlow, Overnight, GeoQuery), PDDL planning, and SMILES-based molecule generation.", "keywords": "semantic parsing;large language models;PDDL;AI planning;molecule generation;data efficiency;grammar-based learning", "primary_area": "", "supplementary_material": "", "author": "Bailin Wang;Zi Wang;Xuezhi Wang;Yuan Cao;Rif A. Saurous;Yoon Kim", "authorids": "~Bailin_Wang3;~Zi_Wang1;~Xuezhi_Wang3;~Yuan_Cao2;~Rif_A._Saurous1;~Yoon_Kim1", "gender": "F;;M;M;;M", "homepage": "http://zi-wang.com/;https://research.google/people/105995/;;;https://people.csail.mit.edu/yoonkim/;https://berlino.github.io/", "dblp": "78/8711-4;70/4090-2;52/4472-7.html;186/7923;;218/7334", "google_scholar": "U0egIsIAAAAJ;ScLUQ-YAAAAJ;Q82vvqcAAAAJ;QNnjg7YAAAAJ;n_ts4eYAAAAJ;", "orcid": ";;0000-0002-1267-8930;;;", "linkedin": ";;;;;", "or_profile": "~Zi_Wang1;~Xuezhi_Wang3;~Yuan_Cao2;~Rif_A._Saurous1;~Yoon_Kim1;~bailin_wang1", "aff": "Google DeepMind;Google DeepMind;Google DeepMind;Google;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "google.com;google.com;google.com;google.com;mit.edu;mit.edu", "position": "Research scientist;Research Scientist;Research scientist;Engineer, Director;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nwang2023grammar,\ntitle={Grammar Prompting for Domain-Specific Language Generation with Large Language Models},\nauthor={Bailin Wang and Zi Wang and Xuezhi Wang and Yuan Cao and Rif A. Saurous and Yoon Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=B4tkwuzeiY}\n}", "github": "", "project": "", "reviewers": "8y4i;qhZ7;rfkJ;pwxh", "pdf_size": 466241, "rating": "5;5;7;8", "confidence": "3;3;4;5", "soundness": "3;3;4;4", "novelty": "2;3;3;4", "presentation": "4;3;4;3", "wc_summary": "77;101;146;110", "wc_strengths": "26;30;144;50", "wc_weaknesses": "176;33;77;15", "wc_questions": "1;2;133;6", "wc_limitations": "1;2;89;4", "wc_review": "281;168;589;185", "wc_reply_reviewers": "0;0;24;17", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 108.5, 24.78406746278746 ], "wc_strengths_avg": [ 62.5, 47.92441966263129 ], "wc_weaknesses_avg": [ 75.25, 62.3873985673389 ], "wc_questions_avg": [ 35.5, 56.32273075766125 ], "wc_limitations_avg": [ 24.0, 37.54330832518626 ], "wc_review_avg": [ 305.75, 169.11442132473505 ], "wc_reply_reviewers_avg": [ 10.25, 10.54454835448157 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.986440050415621, "gs_citation": 85, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11694070042468483715&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "google.com;google.com;google.com;google.com;mit.edu;mit.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;1;1", "aff_unique_norm": "Google;Massachusetts Institute of Technology", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://web.mit.edu", "aff_unique_abbr": "DeepMind;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Optimal Time Complexities of Parallel Stochastic Optimization Methods Under a Fixed Computation Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72511", "id": "B4xF1wfQnF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3563abb1040f4e150f4242a7282cd1ec-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=B4xF1wfQnF", "openreview": "https://openreview.net/forum?id=B4xF1wfQnF", "poster": "/media/PosterPDFs/NeurIPS%202023/72511.png?t=1699515156.7327297", "slides": "https://nips.cc/virtual/2023/poster/72511", "video": "https://nips.cc/virtual/2023/poster/72511", "author_site": "Alexander Tyurin, Peter Richtarik", "tldr": "", "abstract": "Parallelization is a popular strategy for improving the performance of methods. Optimization methods are no exception: design of efficient parallel optimization methods and tight analysis of their theoretical properties are important research endeavors. While the minimax complexities are well known for sequential optimization methods, the theory of parallel optimization methods is less explored. In this paper, we propose a new protocol that generalizes the classical oracle framework approach. Using this protocol, we establish minimax complexities for parallel optimization methods that have access to an unbiased stochastic gradient oracle with bounded variance. We consider a fixed computation model characterized by each worker requiring a fixed but worker-dependent time to calculate stochastic gradient. We prove lower bounds and develop optimal algorithms that attain them. Our results have surprising consequences for the literature of asynchronous optimization methods.", "keywords": "nonconvex optimization;convex optimization;parallel methods;asynchronous methods;lower bounds", "primary_area": "", "supplementary_material": "/attachment/16dbb1606d5d7fe475fbf0b33cc91e3dd0fd8ade.zip", "author": "Alexander Tyurin;Peter Richt\u00e1rik", "authorids": "~Alexander_Tyurin1;~Peter_Richt\u00e1rik1", "gender": "M;M", "homepage": "https://k3nfalt.github.io/;https://richtarik.org", "dblp": "203/8919;62/8001", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-4380-5848", "linkedin": ";richtarik/", "or_profile": "~Alexander_Tyurin1;~Peter_Richtarik1", "aff": "KAUST;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "kaust.edu.sa;kaust.edu.sa", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\ntyurin2023optimal,\ntitle={Optimal Time Complexities of Parallel Stochastic Optimization Methods Under a Fixed Computation Model},\nauthor={Alexander Tyurin and Peter Richt{\\'a}rik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=B4xF1wfQnF}\n}", "github": "", "project": "", "reviewers": "DBuG;AK4H;Vdkt;563Q", "pdf_size": 850792, "rating": "4;7;7;7", "confidence": "2;4;4;4", "soundness": "2;4;3;3", "novelty": "2;3;3;4", "presentation": "2;4;3;3", "wc_summary": "35;75;19;66", "wc_strengths": "14;87;42;109", "wc_weaknesses": "56;98;147;41", "wc_questions": "4;11;201;172", "wc_limitations": "1;70;1;1", "wc_review": "110;341;410;389", "wc_reply_reviewers": "0;25;19;110", "wc_reply_authors": "35;0;0;160", "reply_reviewers": "0;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 48.75, 22.69774217846348 ], "wc_strengths_avg": [ 63.0, 37.19542982679458 ], "wc_weaknesses_avg": [ 85.5, 41.19769411022904 ], "wc_questions_avg": [ 97.0, 90.11936528848835 ], "wc_limitations_avg": [ 18.25, 29.877876430563134 ], "wc_review_avg": [ 312.5, 119.55856305593507 ], "wc_reply_reviewers_avg": [ 38.5, 42.29952718411874 ], "wc_reply_authors_avg": [ 48.75, 65.80036094125929 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1213408434205250024&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "email": "kaust.edu.sa;kaust.edu.sa", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaust.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Saudi Arabia" }, { "title": "Online Nonstochastic Model-Free Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72510", "id": "B5LpWAaBVA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/491bbea0db340e2d0bc1feea9059909a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=B5LpWAaBVA", "openreview": "https://openreview.net/forum?id=B5LpWAaBVA", "poster": "/media/PosterPDFs/NeurIPS%202023/72510.png?t=1699602048.6384637", "slides": "https://nips.cc/virtual/2023/poster/72510", "video": "https://nips.cc/virtual/2023/poster/72510", "author_site": "Udaya Ghai, Arushi Gupta, Wenhan Xia, Karan Singh, Elad Hazan", "tldr": "", "abstract": "We investigate robust model-free reinforcement learning algorithms designed for environments that may be dynamic or even adversarial. Traditional state-based policies often struggle to accommodate the challenges imposed by the presence of unmodeled disturbances in such settings. Moreover, optimizing linear state-based policies pose an obstacle for efficient optimization, leading to nonconvex objectives, even in benign environments like linear dynamical systems.\n\nDrawing inspiration from recent advancements in model-based control, we intro- duce a novel class of policies centered on disturbance signals. We define several categories of these signals, which we term pseudo-disturbances, and develop corresponding policy classes based on them. We provide efficient and practical algorithms for optimizing these policies.\n\nNext, we examine the task of online adaptation of reinforcement learning agents in the face of adversarial disturbances. Our methods seamlessly integrate with any black-box model-free approach, yielding provable regret guarantees when dealing with linear dynamics. These regret guarantees unconditionally improve the best-known results for bandit linear control in having no dependence on the state-space dimension. We evaluate our method over various standard RL benchmarks and demonstrate improved robustness.", "keywords": "Control;Reinforcement Learning;Online Learning;Regret Minimization;Bandit Linear Control", "primary_area": "", "supplementary_material": "/attachment/9d2d70066d6cd6ba7a2e525ddf5c2f0ca20228cb.zip", "author": "Udaya Ghai;Arushi Gupta;Wenhan Xia;Karan Singh;Elad Hazan", "authorids": "~Udaya_Ghai1;~Arushi_Gupta1;~Wenhan_Xia1;~Karan_Singh1;~Elad_Hazan1", "gender": "M;;F;M;M", "homepage": "https://www.cs.princeton.edu/~ughai/;;https://wenhanlunaxia.github.io/;https://i-am-karan-singh.github.io/;https://www.ehazan.com", "dblp": "https://dblp.uni-trier.de/pid/236/4605;;;00/505;72/739", "google_scholar": "Kf1SGfgAAAAJ;;;PZJIgZUAAAAJ;LnhCGNMAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Udaya_Ghai1;~Arushi_Gupta1;~Wenhan_Xia1;~Karan_Singh1;~Elad_Hazan1", "aff": "Department of Computer Science, Princeton University;Department of Computer Science, Princeton University;Princeton University;Carnegie Mellon University;Princeton University", "aff_domain": "cs.princeton.edu;cs.princeton.edu;princeton.edu;cmu.edu;princeton.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nghai2023online,\ntitle={Online Nonstochastic Model-Free Reinforcement Learning},\nauthor={Udaya Ghai and Arushi Gupta and Wenhan Xia and Karan Singh and Elad Hazan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=B5LpWAaBVA}\n}", "github": "", "project": "", "reviewers": "4h3N;ijfz;MnP5", "pdf_size": 8485324, "rating": "5;6;6", "confidence": "3;2;3", "soundness": "4;3;3", "novelty": "2;2;3", "presentation": "3;3;4", "wc_summary": "151;62;121", "wc_strengths": "112;106;44", "wc_weaknesses": "125;281;70", "wc_questions": "127;84;36", "wc_limitations": "56;10;19", "wc_review": "571;543;290", "wc_reply_reviewers": "179;159;89", "wc_reply_authors": "319;313;75", "reply_reviewers": "1;2;1", "reply_authors": "3;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 111.33333333333333, 36.97146046464609 ], "wc_strengths_avg": [ 87.33333333333333, 30.739045022396013 ], "wc_weaknesses_avg": [ 158.66666666666666, 89.36939570618618 ], "wc_questions_avg": [ 82.33333333333333, 37.16928241916375 ], "wc_limitations_avg": [ 28.333333333333332, 19.90533150244482 ], "wc_review_avg": [ 468.0, 126.38301573655642 ], "wc_reply_reviewers_avg": [ 142.33333333333334, 38.58612300930075 ], "wc_reply_authors_avg": [ 235.66666666666666, 113.6348929197757 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13130507675549629974&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cs.princeton.edu;cs.princeton.edu;princeton.edu;cmu.edu;princeton.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Princeton University;Carnegie Mellon University", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.princeton.edu;https://www.cmu.edu", "aff_unique_abbr": "Princeton;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Communication-Efficient Federated Bilevel Optimization with Global and Local Lower Level Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72509", "id": "B5XwENgy0T", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/04bd683d5428d91c5fbb5a7d2c27064d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=B5XwENgy0T", "openreview": "https://openreview.net/forum?id=B5XwENgy0T", "poster": "/media/PosterPDFs/NeurIPS%202023/72509.png?t=1702401717.0900455", "slides": "https://nips.cc/virtual/2023/poster/72509", "video": "https://nips.cc/virtual/2023/poster/72509", "author_site": "Junyi Li, Feihu Huang, Heng Huang", "tldr": "", "abstract": "Bilevel Optimization has witnessed notable progress recently with new emerging efficient algorithms. However, its application in the Federated Learning setting remains relatively underexplored, and the impact of Federated Learning's inherent challenges on the convergence of bilevel algorithms remain obscure.\nIn this work, we investigate Federated Bilevel Optimization problems and propose a communication-efficient algorithm, named FedBiOAcc. The algorithm leverages an efficient estimation of the hyper-gradient in the distributed setting and utilizes the momentum-based variance-reduction acceleration. Remarkably, FedBiOAcc achieves a communication complexity $O(\\epsilon^{-1})$, a sample complexity $O(\\epsilon^{-1.5})$ and the linear speed up with respect to the number of clients. We also analyze a special case of the Federated Bilevel Optimization problems, where lower level problems are locally managed by clients. We prove that FedBiOAcc-Local, a modified version of FedBiOAcc, converges at the same rate for this type of problems. Finally, we validate the proposed algorithms through two real-world tasks: Federated Data-cleaning and Federated Hyper-representation Learning. Empirical results show superior performance of our algorithms.", "keywords": "Federated Learning;Bilevel Optimization", "primary_area": "", "supplementary_material": "/attachment/542c2245cf69018812a7686796cf5dfc798a7d53.pdf", "author": "Junyi Li;Feihu Huang;Heng Huang", "authorids": "~Junyi_Li1;~Feihu_Huang1;~Heng_Huang1", "gender": "M;M;M", "homepage": ";;https://www.cs.umd.edu/~heng/", "dblp": ";169/6247;03/281", "google_scholar": "MzvZSs0AAAAJ;tRQwlHUAAAAJ;4OqLaDwAAAAJ", "orcid": ";0000-0003-0806-6074;", "linkedin": ";;", "or_profile": "~Junyi_Li1;~Feihu_Huang1;~Heng_Huang1", "aff": "University of Pittsburgh;Nanjing University of Aeronautics and Astronautics;University of Pittsburgh", "aff_domain": "pitt.edu;nuaa.edu.cn;pitt.edu", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2023communicationefficient,\ntitle={Communication-Efficient Federated Bilevel Optimization with Global and Local Lower Level Problems},\nauthor={Junyi Li and Feihu Huang and Heng Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=B5XwENgy0T}\n}", "github": "", "project": "", "reviewers": "5Ddx;Cbyn;rgL1;nAuw;MLKp", "pdf_size": 4394020, "rating": "4;5;5;6;7", "confidence": "3;3;4;3;3", "soundness": "2;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;2;3;3;3", "wc_summary": "87;68;38;87;272", "wc_strengths": "25;33;33;84;469", "wc_weaknesses": "106;28;238;18;176", "wc_questions": "2;286;38;160;82", "wc_limitations": "2;11;1;11;28", "wc_review": "222;426;348;360;1027", "wc_reply_reviewers": "11;208;13;97;81", "wc_reply_authors": "62;1529;27;234;31", "reply_reviewers": "1;2;1;2;1", "reply_authors": "3;4;2;3;2", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 110.4, 82.76376018524049 ], "wc_strengths_avg": [ 128.8, 171.39008139329417 ], "wc_weaknesses_avg": [ 113.2, 84.72638314008216 ], "wc_questions_avg": [ 113.6, 101.03781470320901 ], "wc_limitations_avg": [ 10.6, 9.687104830649867 ], "wc_review_avg": [ 476.6, 283.0036042173315 ], "wc_reply_reviewers_avg": [ 82.0, 71.97777434736365 ], "wc_reply_authors_avg": [ 376.6, 581.2044734858808 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.1961161351381841, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=115632088592464233&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "pitt.edu;nuaa.edu.cn;pitt.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Pittsburgh;Nanjing University of Aeronautics and Astronautics", "aff_unique_dep": ";", "aff_unique_url": "https://www.pitt.edu;http://www.nuaa.edu.cn", "aff_unique_abbr": "Pitt;NUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "title": "Universal Gradient Descent Ascent Method for Nonconvex-Nonconcave Minimax Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72508", "id": "B6FihisDBl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a961dea42c23c3c0d01b79918701fb6e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=B6FihisDBl", "openreview": "https://openreview.net/forum?id=B6FihisDBl", "poster": "/media/PosterPDFs/NeurIPS%202023/72508.png?t=1701937705.0847998", "slides": "https://nips.cc/virtual/2023/poster/72508", "video": "https://nips.cc/virtual/2023/poster/72508", "author_site": "Taoli Zheng, Linglingzhi Zhu, Anthony Man-Cho So, Jose Blanchet, Jiajin Li", "tldr": "", "abstract": "Nonconvex-nonconcave minimax optimization has received intense attention over the last decade due to its broad applications in machine learning. Most existing algorithms rely on one-sided information, such as the convexity (resp. concavity) of the primal (resp. dual) functions, or other specific structures, such as the Polyak-\u0141ojasiewicz (P\u0141) and Kurdyka-\u0141ojasiewicz (K\u0141) conditions. However, verifying these regularity conditions is challenging in practice. To meet this challenge, we propose a novel universally applicable single-loop algorithm, the doubly smoothed gradient descent ascent method (DS-GDA), which naturally balances the primal and dual updates. That is, DS-GDA with the same hyperparameters is able to uniformly solve nonconvex-concave, convex-nonconcave, and nonconvex-nonconcave problems with one-sided K\u0141 properties, achieving convergence with $\\mathcal{O}(\\epsilon^{-4})$ complexity. Sharper (even optimal) iteration complexity can be obtained when the K\u0141 exponent is known. Specifically, under the one-sided K\u0141 condition with exponent $\\theta\\in(0,1)$, DS-GDA converges with an iteration complexity of $\\mathcal{O}(\\epsilon^{-2\\max\\\\{2\\theta,1\\\\}})$. They all match the corresponding best results in the literature. Moreover, we show that DS-GDA is practically applicable to general nonconvex-nonconcave problems even without any regularity conditions, such as the P\u0141 condition, K\u0141 condition, or weak Minty variational inequalities condition. For various challenging nonconvex-nonconcave examples in the literature, including *Forsaken*, *Bilinearly-coupled minimax*, *Sixth-order polynomial*, and *PolarGame*, the proposed DS-GDA can all get rid of limit cycles. To the best of our knowledge, this is the first first-order algorithm to achieve convergence on all of these formidable problems.", "keywords": "Nonconvex-Nonconcave Minimax Optimization;Limit Cycle", "primary_area": "", "supplementary_material": "", "author": "Taoli Zheng;Linglingzhi Zhu;Anthony Man-Cho So;Jose Blanchet;Jiajin Li", "authorids": "~Taoli_Zheng1;~Linglingzhi_Zhu1;~Anthony_Man-Cho_So1;~Jose_Blanchet1;~Jiajin_Li2", "gender": "F;M;M;M;F", "homepage": ";https://lzzhuling.github.io/;http://www1.se.cuhk.edu.hk/~manchoso/;https://web.stanford.edu/~jblanche/;https://gerrili1996.github.io/", "dblp": "296/4727;329/6941;82/3202;75/5093.html;", "google_scholar": "LCDzD-MAAAAJ;https://scholar.google.com.hk/citations?user=nOSAyisAAAAJ;https://scholar.google.com.hk/citations?user=whi3UisAAAAJ;https://scholar.google.co.in/citations?user=O24CcQQAAAAJ;", "orcid": ";0000-0002-5484-1134;0000-0003-2588-7851;;", "linkedin": ";;;jose-blanchet;", "or_profile": "~Taoli_Zheng1;~Linglingzhi_Zhu1;~Anthony_Man-Cho_So1;~Jose_Blanchet1;~Jiajin_Li2", "aff": "The Chinese University of Hong Kong;The Chinese University of Hong Kong;The Chinese University of Hong Kong;Stanford University;Stanford University", "aff_domain": "cuhk.edu.hk;cuhk.edu.hk;cuhk.edu.hk;stanford.edu;stanford.edu", "position": "PhD student;PhD student;Full Professor;Professor;Postdoc", "bibtex": "@inproceedings{\nzheng2023universal,\ntitle={Universal Gradient Descent Ascent Method for Nonconvex-Nonconcave Minimax Optimization},\nauthor={Taoli Zheng and Linglingzhi Zhu and Anthony Man-Cho So and Jose Blanchet and Jiajin Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=B6FihisDBl}\n}", "github": "", "project": "", "reviewers": "pZz2;KYPK;71Wf;gc6W", "pdf_size": 1287031, "rating": "4;6;6;6", "confidence": "4;3;3;4", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;2;3;3", "wc_summary": "147;20;32;99", "wc_strengths": "68;26;38;77", "wc_weaknesses": "164;50;239;78", "wc_questions": "155;128;15;347", "wc_limitations": "52;1;3;13", "wc_review": "586;225;327;614", "wc_reply_reviewers": "581;111;60;66", "wc_reply_authors": "2154;37;0;0", "reply_reviewers": "4;1;2;1", "reply_authors": "6;2;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.5, 51.55821951929683 ], "wc_strengths_avg": [ 52.25, 20.932928605429293 ], "wc_weaknesses_avg": [ 132.75, 74.34841962005649 ], "wc_questions_avg": [ 161.25, 119.41183986523279 ], "wc_limitations_avg": [ 17.25, 20.571521577170707 ], "wc_review_avg": [ 438.0, 166.26033802443683 ], "wc_reply_reviewers_avg": [ 204.5, 218.26417479742295 ], "wc_reply_authors_avg": [ 547.75, 927.4918800183643 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 2.5, 2.0615528128088303 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7128332478729558948&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cuhk.edu.hk;cuhk.edu.hk;cuhk.edu.hk;stanford.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;1", "aff_unique_norm": "Chinese University of Hong Kong;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.stanford.edu", "aff_unique_abbr": "CUHK;Stanford", "aff_campus_unique_index": "0;0;0;1;1", "aff_campus_unique": "Hong Kong SAR;Stanford", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "China;United States" }, { "title": "A Batch-to-Online Transformation under Random-Order Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72507", "id": "B6HSIgvyJ3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/afe99e55be23b3523818da1fefa33494-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=B6HSIgvyJ3", "openreview": "https://openreview.net/forum?id=B6HSIgvyJ3", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72507", "video": "https://nips.cc/virtual/2023/poster/72507", "author_site": "Jing Dong, Yuichi Yoshida", "tldr": "", "abstract": "We introduce a transformation framework that can be utilized to develop online algorithms with low $\\epsilon$-approximate regret in the random-order model from offline approximation algorithms. We first give a general reduction theorem that transforms an offline approximation algorithm with low average sensitivity to an online algorithm with low $\\epsilon$-approximate regret. We then demonstrate that offline approximation algorithms can be transformed into a low-sensitivity version using a coreset construction method. To showcase the versatility of our approach, we apply it to various problems, including online $(k,z)$-clustering, online matrix approximation, and online regression, and successfully achieve polylogarithmic $\\epsilon$-approximate regret for each problem. Moreover, we show that in all three cases, our algorithm also enjoys low inconsistency, which may be desired in some online applications.", "keywords": "online learning;random model setting", "primary_area": "", "supplementary_material": "/attachment/b04f86ec60474b4008c0add3f9a7905b86ac3d1b.pdf", "author": "Jing Dong;Yuichi Yoshida", "authorids": "~Jing_Dong3;~Yuichi_Yoshida1", "gender": "F;M", "homepage": ";http://research.nii.ac.jp/~yyoshida/", "dblp": ";36/4449.html", "google_scholar": "g5pYW10AAAAJ;https://scholar.google.co.jp/citations?user=EIXTG_UAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Jing_Dong3;~Yuichi_Yoshida1", "aff": "The Chinese University of Hong Kong, Shenzhen;National Institute of Informatics", "aff_domain": "cuhk.edu.cn;nii.ac.jp", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ndong2023a,\ntitle={A Batch-to-Online Transformation under Random-Order Model},\nauthor={Jing Dong and Yuichi Yoshida},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=B6HSIgvyJ3}\n}", "github": "", "project": "", "reviewers": "zXrL;A1nn;mLKj;4met;uJY4", "pdf_size": 526957, "rating": "6;6;6;7;8", "confidence": "3;4;4;2;3", "soundness": "4;4;4;3;4", "novelty": "2;3;3;3;3", "presentation": "3;4;3;3;3", "wc_summary": "119;157;71;253;41", "wc_strengths": "39;130;25;54;28", "wc_weaknesses": "191;115;127;24;41", "wc_questions": "158;2;502;1;45", "wc_limitations": "13;1;1;1;1", "wc_review": "520;405;726;333;156", "wc_reply_reviewers": "61;19;152;4;19", "wc_reply_authors": "142;6;355;0;6", "reply_reviewers": "2;1;2;1;1", "reply_authors": "3;2;4;1;2", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 128.2, 73.97945660789892 ], "wc_strengths_avg": [ 55.2, 38.75770891061545 ], "wc_weaknesses_avg": [ 99.6, 60.813156471276834 ], "wc_questions_avg": [ 141.6, 189.07416534259775 ], "wc_limitations_avg": [ 3.4, 4.800000000000001 ], "wc_review_avg": [ 428.0, 190.16098443161258 ], "wc_reply_reviewers_avg": [ 51.0, 53.95924387906117 ], "wc_reply_authors_avg": [ 101.8, 137.43711289167857 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5345224838248488, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15751206564273588323&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 8, "email": "cuhk.edu.cn;nii.ac.jp", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Chinese University of Hong Kong;National Institute of Informatics", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.nii.ac.jp/", "aff_unique_abbr": "CUHK;NII", "aff_campus_unique_index": "0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;1", "aff_country_unique": "China;Japan" }, { "title": "POMDP Planning for Object Search in Partially Unknown Environment", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72506", "id": "B6qZdrGRpm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a6d7226db2ff3643d8624624e3859c19-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=B6qZdrGRpm", "openreview": "https://openreview.net/forum?id=B6qZdrGRpm", "poster": "/media/PosterPDFs/NeurIPS%202023/72506.png?t=1702138486.9774375", "slides": "https://nips.cc/virtual/2023/poster/72506", "video": "https://nips.cc/virtual/2023/poster/72506", "author_site": "Yongbo Chen, Hanna Kurniawati", "tldr": "", "abstract": "Efficiently searching for target objects in complex environments that contain various types of furniture, such as shelves, tables, and beds, is crucial for mobile robots, but it poses significant challenges due to various factors such as localization errors, limited field of view, and visual occlusion. To address this problem, we propose a Partially Observable Markov Decision Process (POMDP) formulation with a growing state space for object search in a 3D region. We solve this POMDP by carefully designing a perception module and developing a planning algorithm, called Growing Partially Observable Monte-Carlo Planning (GPOMCP), based on online Monte-Carlo tree search and belief tree reuse with a novel upper confidence bound. We have demonstrated that belief tree reuse is reasonable and achieves good performance when the belief differences are limited. Additionally, we introduce a guessed target object with an updating grid world to guide the search in the information-less and reward-less cases, like the absence of any detected objects. We tested our approach using Gazebo simulations on four scenarios of target finding in a realistic indoor living environment with the Fetch robot simulator. Compared to the baseline approaches, which are based on POMCP, our results indicate that our approach enables the robot to find the target object with a higher success rate faster while using the same computational requirements.", "keywords": "robotics;Partially Observable Markov Decision Process (POMDP);object search", "primary_area": "", "supplementary_material": "/attachment/11aef926b871b1a2d7a8eab93dd505ec16ff89fd.pdf", "author": "Yongbo Chen;Hanna Kurniawati", "authorids": "~Yongbo_Chen1;~Hanna_Kurniawati1", "gender": "M;F", "homepage": "https://sites.google.com/view/yongbochen/home;http://users.cecs.anu.edu.au/~hannakur/", "dblp": "17/9406-1;https://dblp.uni-trier.de/pers/k/Kurniawati:Hanna.html", "google_scholar": "0mipo_YAAAAJ;https://scholar.google.com.au/citations?user=JkjFXbAAAAAJ", "orcid": "0000-0003-2756-7050;", "linkedin": ";", "or_profile": "~Yongbo_Chen1;~Hanna_Kurniawati1", "aff": "Australian National University;Australian National University", "aff_domain": "anu.edu.au;anu.edu.au", "position": "Postdoc;Associate Professor", "bibtex": "@inproceedings{\nchen2023pomdp,\ntitle={{POMDP} Planning for Object Search in Partially Unknown Environment},\nauthor={Yongbo Chen and Hanna Kurniawati},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=B6qZdrGRpm}\n}", "github": "", "project": "", "reviewers": "pj6p;UoQp;2WUu;uTWr", "pdf_size": 4983695, "rating": "4;5;7;7", "confidence": "1;3;3;4", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "48;49;193;111", "wc_strengths": "13;45;208;82", "wc_weaknesses": "46;64;419;126", "wc_questions": "60;62;143;36", "wc_limitations": "9;6;8;62", "wc_review": "176;226;971;417", "wc_reply_reviewers": "0;10;61;61", "wc_reply_authors": "0;15;56;52", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 100.25, 59.31852577399408 ], "wc_strengths_avg": [ 87.0, 74.00337830126406 ], "wc_weaknesses_avg": [ 163.75, 150.32693537752974 ], "wc_questions_avg": [ 75.25, 40.43126883984721 ], "wc_limitations_avg": [ 21.25, 23.551804601770964 ], "wc_review_avg": [ 447.5, 315.3398959852686 ], "wc_reply_reviewers_avg": [ 33.0, 28.222331583340168 ], "wc_reply_authors_avg": [ 30.75, 23.889066536807167 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8388704928078611, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18130281062485226700&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "anu.edu.au;anu.edu.au", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Australian National University", "aff_unique_dep": "", "aff_unique_url": "https://www.anu.edu.au", "aff_unique_abbr": "ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Australia" }, { "title": "An Alternative to Variance: Gini Deviation for Risk-averse Policy Gradient", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72505", "id": "B7QRV4XXiK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bf665e1cf271faa5037374c884ba3808-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=B7QRV4XXiK", "openreview": "https://openreview.net/forum?id=B7QRV4XXiK", "poster": "/media/PosterPDFs/NeurIPS%202023/72505.png?t=1701399817.5919402", "slides": "https://nips.cc/virtual/2023/poster/72505", "video": "https://nips.cc/virtual/2023/poster/72505", "author_site": "Yudong Luo, Guiliang Liu, Pascal Poupart, Yangchen Pan", "tldr": "", "abstract": "Restricting the variance of a policy\u2019s return is a popular choice in risk-averse Reinforcement Learning (RL) due to its clear mathematical definition and easy interpretability. Traditional methods directly restrict the total return variance. Recent methods restrict the per-step reward variance as a proxy. We thoroughly examine the limitations of these variance-based methods, such as sensitivity to numerical scale and hindering of policy learning, and propose to use an alternative risk measure, Gini deviation, as a substitute. We study various properties of this new risk measure and derive a policy gradient algorithm to minimize it. Empirical evaluation in domains where risk-aversion can be clearly defined, shows that our algorithm can mitigate the limitations of variance-based risk measures and achieves high return with low risk in terms of variance and Gini deviation when others fail to learn a reasonable policy.", "keywords": "risk-averse RL;mean-variance RL", "primary_area": "", "supplementary_material": "/attachment/c6a637473654261195bbfbd34ab9e906ea23eb3d.zip", "author": "Yudong Luo;Guiliang Liu;Pascal Poupart;Yangchen Pan", "authorids": "~Yudong_Luo1;~Guiliang_Liu1;~Pascal_Poupart2;~Yangchen_Pan2", "gender": ";M;M;M", "homepage": "http://miyunluo.com;http://guiliang.me/;https://cs.uwaterloo.ca/~ppoupart;https://yannickycpan.github.io/yangchenpan/", "dblp": "161/8157;220/5411;26/2122;183/0925", "google_scholar": ";CuMylvEAAAAJ;https://scholar.google.ca/citations?user=KhAJWroAAAAJ;4M4pOp4AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yudong_Luo1;~Guiliang_Liu1;~Pascal_Poupart2;~Yangchen_Pan2", "aff": "Shanghai Jiaotong University;The Chinese University of Hong Kong, Shenzhen;University of Waterloo;University of Oxford", "aff_domain": "sjtu.edu.cn;cuhk.edu.hk;uwaterloo.ca;eng.ox.ac.uk", "position": "Undergrad student;Assistant Professor;Full Professor;Lecturer", "bibtex": "@inproceedings{\nluo2023an,\ntitle={An Alternative to Variance: Gini Deviation for Risk-averse Policy Gradient},\nauthor={Yudong Luo and Guiliang Liu and Pascal Poupart and Yangchen Pan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=B7QRV4XXiK}\n}", "github": "", "project": "", "reviewers": "Dz1b;diaH;oAim;cQhS;FmwV", "pdf_size": 7059454, "rating": "5;5;6;7;7", "confidence": "3;2;4;3;3", "soundness": "2;3;4;3;4", "novelty": "3;3;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "57;20;117;130;133", "wc_strengths": "27;30;14;116;125", "wc_weaknesses": "242;62;14;365;107", "wc_questions": "55;3;14;307;83", "wc_limitations": "8;1;13;17;1", "wc_review": "389;116;172;935;449", "wc_reply_reviewers": "93;0;21;257;24", "wc_reply_authors": "34;0;0;18;0", "reply_reviewers": "1;0;1;2;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 91.4, 45.07149875475631 ], "wc_strengths_avg": [ 62.4, 47.82718892011112 ], "wc_weaknesses_avg": [ 158.0, 128.41962466850617 ], "wc_questions_avg": [ 92.4, 111.06682673057693 ], "wc_limitations_avg": [ 8.0, 6.387487769068525 ], "wc_review_avg": [ 412.2, 290.0492371994796 ], "wc_reply_reviewers_avg": [ 79.0, 94.35041070392857 ], "wc_reply_authors_avg": [ 10.4, 13.705473359209451 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.35355339059327373, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17629607429364512950&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "sjtu.edu.cn;cuhk.edu.hk;uwaterloo.ca;eng.ox.ac.uk", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Shanghai Jiao Tong University;Chinese University of Hong Kong;University of Waterloo;University of Oxford", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.cuhk.edu.cn;https://uwaterloo.ca;https://www.ox.ac.uk", "aff_unique_abbr": "SJTU;CUHK;UW;Oxford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "China;Canada;United Kingdom" }, { "title": "Optimization and Bayes: A Trade-off for Overparameterized Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72504", "id": "B7QkdEnjL9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2b950a297fc888c95bfeb587ef000d70-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=B7QkdEnjL9", "openreview": "https://openreview.net/forum?id=B7QkdEnjL9", "poster": "/media/PosterPDFs/NeurIPS%202023/72504.png?t=1701883166.9191191", "slides": "https://nips.cc/virtual/2023/poster/72504", "video": "https://nips.cc/virtual/2023/poster/72504", "author_site": "Zhengmian Hu, Heng Huang", "tldr": "", "abstract": "This paper proposes a novel algorithm, Transformative Bayesian Learning (TansBL), which bridges the gap between empirical risk minimization (ERM) and Bayesian learning for neural networks. We compare ERM, which uses gradient descent to optimize, and Bayesian learning with importance sampling for their generalization and computational complexity. We derive the first algorithm-dependent PAC-Bayesian generalization bound for infinitely wide networks based on an exact KL divergence between the trained posterior distribution obtained by infinitesimal step size gradient descent and a Gaussian prior. Moreover, we show how to transform gradient-based optimization into importance sampling by incorporating a weight. While Bayesian learning has better generalization, it suffers from low sampling efficiency. Optimization methods, on the other hand, have good sampling efficiency but poor generalization. Our proposed algorithm TansBL enables a trade-off between generalization and sampling efficiency.", "keywords": "Bayesian learning;Generalization", "primary_area": "", "supplementary_material": "/attachment/fa3b7d61e2dd5927be3d68badd1f850bd7ee84db.zip", "author": "Zhengmian Hu;Heng Huang", "authorids": "~Zhengmian_Hu1;~Heng_Huang1", "gender": "M;M", "homepage": "https://www.umd.edu/;https://www.cs.umd.edu/~heng/", "dblp": "285/4945;03/281", "google_scholar": "4eXiWWgAAAAJ;4OqLaDwAAAAJ", "orcid": "0000-0003-0316-146X;", "linkedin": ";", "or_profile": "~Zhengmian_Hu1;~Heng_Huang1", "aff": "University of Pittsburgh;University of Pittsburgh", "aff_domain": "pitt.edu;pitt.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nhu2023optimization,\ntitle={Optimization and Bayes: A Trade-off for Overparameterized Neural Networks},\nauthor={Zhengmian Hu and Heng Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=B7QkdEnjL9}\n}", "github": "", "project": "", "reviewers": "4jd4;sSya;1HBf;5yu1;TNvC", "pdf_size": 618162, "rating": "4;5;5;6;7", "confidence": "3;2;2;3;3", "soundness": "2;3;3;2;4", "novelty": "1;3;3;3;3", "presentation": "1;2;2;2;4", "wc_summary": "48;219;62;109;59", "wc_strengths": "43;158;66;61;127", "wc_weaknesses": "308;409;97;59;23", "wc_questions": "48;311;63;192;98", "wc_limitations": "71;1;84;24;1", "wc_review": "518;1098;372;445;308", "wc_reply_reviewers": "80;100;0;0;34", "wc_reply_authors": "546;0;0;0;0", "reply_reviewers": "1;1;0;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 2.2, 0.9797958971132712 ], "wc_summary_avg": [ 99.4, 63.354873529981894 ], "wc_strengths_avg": [ 91.0, 43.84974344280705 ], "wc_weaknesses_avg": [ 179.2, 151.6580363844923 ], "wc_questions_avg": [ 142.4, 98.05427068720668 ], "wc_limitations_avg": [ 36.2, 34.99371372118141 ], "wc_review_avg": [ 548.2, 283.75510568093745 ], "wc_reply_reviewers_avg": [ 42.8, 40.97999511957023 ], "wc_reply_authors_avg": [ 109.2, 218.40000000000003 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.32025630761017426, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JRCA2jnDuxoJ:scholar.google.com/&scioq=Optimization+and+Bayes:+A+Trade-off+for+Overparameterized+Neural+Networks&hl=en&as_sdt=0,47", "gs_version_total": 5, "email": "pitt.edu;pitt.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Pittsburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.pitt.edu", "aff_unique_abbr": "Pitt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "CamoPatch: An Evolutionary Strategy for Generating Camoflauged Adversarial Patches", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72503", "id": "B94G0MXWQX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d482f1362bd6a8448d7c35e717c7063a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=B94G0MXWQX", "openreview": "https://openreview.net/forum?id=B94G0MXWQX", "poster": "/media/PosterPDFs/NeurIPS%202023/72503.png?t=1698665187.507827", "slides": "https://nips.cc/virtual/2023/poster/72503", "video": "https://nips.cc/virtual/2023/poster/72503", "author_site": "Phoenix Williams, Ke Li", "tldr": "", "abstract": "Deep neural networks (DNNs) have demonstrated vulnerabilities to adversarial examples, which raises concerns about their reliability in safety-critical applications. While the majority of existing methods generate adversarial examples by making small modifications to the entire image, recent research has proposed a practical alternative known as adversarial patches. Adversarial patches have shown to be highly effective in causing DNNs to misclassify by distorting a localized area (patch) of the image. However, existing methods often produce clearly visible distortions since they do not consider the visibility of the patch. To address this, we propose a novel method for constructing adversarial patches that approximates the appearance of the area it covers. We achieve this by using a set of semi-transparent, RGB-valued circles, drawing inspiration from the computational art community. We utilize an evolutionary strategy to optimize the properties of each shape, and employ a simulated annealing approach to optimize the patch's location. Our approach achieves better or comparable performance to state-of-the-art methods on ImageNet DNN classifiers while achieving a lower $l_2$ distance from the original image. By minimizing the visibility of the patch, this work further highlights the vulnerabilities of DNNs to adversarial patches.", "keywords": "Evolutionary Strategy;Adversarial Attack;Adversarial Patches;Computational Art;Computer Vision", "primary_area": "", "supplementary_material": "/attachment/c2bb8557da95b1cebacc2b1eb7ed8c88ba51fe37.zip", "author": "Phoenix Neale Williams;Ke Li", "authorids": "~Phoenix_Neale_Williams1;~Ke_Li5", "gender": "M;M", "homepage": "https://phoenixwilliams.github.io/PersonalWebsite/;https://colalab.ai/", "dblp": "310/4389;75/6627-1.html", "google_scholar": ";https://scholar.google.co.uk/citations?user=lUFU8KsAAAAJ", "orcid": ";0000-0001-7200-4244", "linkedin": ";ke-li-29423226/", "or_profile": "~Phoenix_Neale_Williams1;~Ke_Li5", "aff": "University of Exeter;University of Exeter", "aff_domain": "exeter.ac.uk;exeter.ac.uk", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nwilliams2023camopatch,\ntitle={CamoPatch: An Evolutionary Strategy for Generating Camoflauged Adversarial Patches},\nauthor={Phoenix Neale Williams and Ke Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=B94G0MXWQX}\n}", "github": "", "project": "", "reviewers": "sS1s;1i2p;Z9ie;tLpS;eiFr", "pdf_size": 7139773, "rating": "5;5;5;5;6", "confidence": "4;5;3;3;3", "soundness": "3;3;2;2;2", "novelty": "3;2;3;2;1", "presentation": "3;3;2;3;2", "wc_summary": "80;93;37;129;28", "wc_strengths": "22;23;31;127;15", "wc_weaknesses": "22;243;27;123;39", "wc_questions": "194;2;40;59;20", "wc_limitations": "142;1;29;54;1", "wc_review": "460;362;164;492;103", "wc_reply_reviewers": "44;75;13;0;22", "wc_reply_authors": "0;125;0;0;196", "reply_reviewers": "1;2;1;0;1", "reply_authors": "1;2;1;1;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 73.4, 37.16234653516916 ], "wc_strengths_avg": [ 43.6, 42.00761835667431 ], "wc_weaknesses_avg": [ 90.8, 84.4852649874521 ], "wc_questions_avg": [ 63.0, 68.2290260812801 ], "wc_limitations_avg": [ 45.4, 52.18658831539 ], "wc_review_avg": [ 316.2, 156.39744243433137 ], "wc_reply_reviewers_avg": [ 30.8, 26.346916328101855 ], "wc_reply_authors_avg": [ 64.2, 81.77138863930342 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.37500000000000006, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10818379599156814334&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "exeter.ac.uk;exeter.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Exeter", "aff_unique_dep": "", "aff_unique_url": "https://www.exeter.ac.uk", "aff_unique_abbr": "Exeter", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Sampling from Structured Log-Concave Distributions via a Soft-Threshold Dikin Walk", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72502", "id": "BA7NHAzbpO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/656faa09eb6e82dd86de9a417111c3b0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BA7NHAzbpO", "openreview": "https://openreview.net/forum?id=BA7NHAzbpO", "poster": "/media/PosterPDFs/NeurIPS%202023/72502.png?t=1702167047.5832343", "slides": "https://nips.cc/virtual/2023/poster/72502", "video": "https://nips.cc/virtual/2023/poster/72502", "author_site": "Oren Mangoubi, Nisheeth K. Vishnoi", "tldr": "", "abstract": "Given a Lipschitz or smooth convex function $f:K \\to \\mathbb{R}^d$ for a bounded polytope $K:=${ $\\theta \\in \\mathbb{R}^d: A\\theta \\leq b$}, where $A\\in \\mathbb{R}^{m\\times d}$ and $b \\in \\mathbb{R}^m$, we consider the problem of sampling from the log-concave distribution $\\pi(\\theta) \\propto e^{-f(\\theta)}$ constrained to $K$. Interest in this problem derives from its applications to Bayesian inference and differential privacy. We present a generalization of the Dikin walk to this setting that requires at most $O((md + d L^2 R^2) \\times md^{\\omega-1} \\log(\\frac{w}{\\delta}))$ arithmetic operations to sample from $\\pi$ within error $\\delta>0$ in the total variation distance from a $w$-warm start. Here $L$ is the Lipschitz constant of $f$, $K$ is contained in a ball of radius $R$ and contains a ball of smaller radius $r$, and $\\omega \\approx 2.37$ is the matrix-multiplication constant. This improves on the running time of prior works for a range of structured settings important for the aforementioned inference and privacy applications. Technically, we depart from previous Dikin walks by adding a soft-threshold regularizer derived from the Lipschitz or smoothness properties of $f$ to a barrier function for $K$ that allows our version of the Dikin walk to propose updates that have a high Metropolis acceptance ratio for $f$, while at the same time remaining inside the polytope $K$.", "keywords": "Logconcave sampling;Dikin walk;Markov chain Monte Carlo;interior point methods", "primary_area": "", "supplementary_material": "", "author": "Oren Mangoubi;Nisheeth K Vishnoi", "authorids": "~Oren_Mangoubi1;~Nisheeth_K_Vishnoi1", "gender": "M;M", "homepage": ";http://cs.yale.edu/homes/vishnoi/Home.html", "dblp": "158/6707;02/2229", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Oren_Mangoubi1;~Nisheeth_K_Vishnoi1", "aff": "Worcester Polytechnic Institute;Yale University", "aff_domain": "wpi.edu;yale.edu", "position": "Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nmangoubi2023sampling,\ntitle={Sampling from Structured Log-Concave Distributions via a Soft-Threshold Dikin Walk},\nauthor={Oren Mangoubi and Nisheeth K Vishnoi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BA7NHAzbpO}\n}", "github": "", "project": "", "reviewers": "D286;ACUh;wurv;5ZqR", "pdf_size": 907227, "rating": "7;7;7;7", "confidence": "5;3;3;4", "soundness": "4;3;4;3", "novelty": "2;2;2;3", "presentation": "3;1;3;2", "wc_summary": "170;51;244;67", "wc_strengths": "93;53;88;62", "wc_weaknesses": "359;105;167;13", "wc_questions": "13;107;75;1", "wc_limitations": "1;1;33;1", "wc_review": "636;317;607;144", "wc_reply_reviewers": "28;21;47;0", "wc_reply_authors": "0;0;40;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 133.0, 78.69243928103894 ], "wc_strengths_avg": [ 74.0, 16.896745248715803 ], "wc_weaknesses_avg": [ 161.0, 126.76750372236569 ], "wc_questions_avg": [ 49.0, 43.70354676682432 ], "wc_limitations_avg": [ 9.0, 13.856406460551018 ], "wc_review_avg": [ 426.0, 205.10119453577056 ], "wc_reply_reviewers_avg": [ 24.0, 16.80773631397161 ], "wc_reply_authors_avg": [ 10.0, 17.320508075688775 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14947121046872003862&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "wpi.edu;yale.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Worcester Polytechnic Institute;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.wpi.edu;https://www.yale.edu", "aff_unique_abbr": "WPI;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Subspace Identification for Multi-Source Domain Adaptation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72501", "id": "BACQLWQW8u", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6cb7246003d556c4d1cbf9c17c392ee3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BACQLWQW8u", "openreview": "https://openreview.net/forum?id=BACQLWQW8u", "poster": "/media/PosterPDFs/NeurIPS%202023/72501.png?t=1697385765.118303", "slides": "https://nips.cc/virtual/2023/poster/72501", "video": "https://nips.cc/virtual/2023/poster/72501", "author_site": "Zijian Li, Ruichu Cai, Guangyi Chen, Boyang Sun, Zhifeng Hao, Kun Zhang", "tldr": "", "abstract": "Multi-source domain adaptation (MSDA) methods aim to transfer knowledge from multiple labeled source domains to an unlabeled target domain. Although current methods achieve target joint distribution identifiability by enforcing minimal changes across domains, they often necessitate stringent conditions, such as an adequate number of domains, monotonic transformation of latent variables, and invariant label distributions. These requirements are challenging to satisfy in real-world applications. To mitigate the need for these strict assumptions, we propose a subspace identification theory that guarantees the disentanglement of domain-invariant and domain-specific variables under less restrictive constraints regarding domain numbers and transformation properties and thereby facilitating domain adaptation by minimizing the impact of domain shifts on invariant variables. Based on this theory, we develop a Subspace Identification Guarantee (SIG) model that leverages variational inference. Furthermore, the SIG model incorporates class-aware conditional alignment to accommodate target shifts where label distributions change with the domain. Experimental results demonstrate that our SIG model outperforms existing MSDA techniques on various benchmark datasets, highlighting its effectiveness in real-world applications.", "keywords": "Domain Adaptation;Identification", "primary_area": "", "supplementary_material": "/attachment/ec60b24356b20e3048741398328b9a1b62f39708.zip", "author": "Zijian Li;Ruichu Cai;Guangyi Chen;Boyang Sun;Zhifeng Hao;Kun Zhang", "authorids": "~Zijian_Li1;~Ruichu_Cai1;~Guangyi_Chen1;~Boyang_Sun1;~Zhifeng_Hao2;~Kun_Zhang1", "gender": "M;M;M;M;M;M", "homepage": ";https://ruichucai.github.io/;https://chengy12.github.io/;;http://www.andrew.cmu.edu/user/kunz1/;https://www.stu.edu.cn/xxgk/dzld1/hzf.htm", "dblp": "27/10487;09/6889;c/GuangyiChen-2;;96/3115-1;", "google_scholar": "j3ilESoAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;;RGoypN4AAAAJ;ZF3gp9wAAAAJ", "orcid": ";;;0000-0002-0118-4425;;", "linkedin": ";;;;;", "or_profile": "~Zijian_Li1;~Ruichu_Cai1;~Guangyi_Chen1;~Boyang_Sun1;~Kun_Zhang1;~Zhifeng_Hao4", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Guangdong University of Technology;Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence;Carnegie Mellon University;Shantou University", "aff_domain": "mbzuai.ac.ae;gdut.edu.cn;cmu.edu;mbzuai.ac.ae;cmu.edu;stu.edu.cn", "position": "Postdoc;Full Professor;Postdoc;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nli2023subspace,\ntitle={Subspace Identification for Multi-Source Domain Adaptation},\nauthor={Zijian Li and Ruichu Cai and Guangyi Chen and Boyang Sun and Zhifeng Hao and Kun Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BACQLWQW8u}\n}", "github": "", "project": "", "reviewers": "bTZb;nEWG;Yx4K;pec6", "pdf_size": 717948, "rating": "7;7;7;8", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "101;68;100;96", "wc_strengths": "145;46;141;48", "wc_weaknesses": "300;139;138;80", "wc_questions": "6;1;22;46", "wc_limitations": "1;1;6;6", "wc_review": "553;255;407;276", "wc_reply_reviewers": "40;7;0;33", "wc_reply_authors": "35;0;0;35", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 91.25, 13.5531361684298 ], "wc_strengths_avg": [ 95.0, 48.026034606242476 ], "wc_weaknesses_avg": [ 164.25, 81.93404359605353 ], "wc_questions_avg": [ 18.75, 17.541023345289748 ], "wc_limitations_avg": [ 3.5, 2.5 ], "wc_review_avg": [ 372.75, 119.25681322255764 ], "wc_reply_reviewers_avg": [ 20.0, 16.867127793433 ], "wc_reply_authors_avg": [ 17.5, 17.5 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2638511116211824695&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "mbzuai.ac.ae;gdut.edu.cn;cmu.edu;mbzuai.ac.ae;cmu.edu;stu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;0;2;3", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Guangdong University of Technology;Carnegie Mellon University;Shantou University", "aff_unique_dep": ";;;", "aff_unique_url": "https://mbzuai.ac.ae;http://www.gdut.edu.cn;https://www.cmu.edu;https://www.stu.edu.cn", "aff_unique_abbr": "MBZUAI;GDUT;CMU;STU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;2;1", "aff_country_unique": "United Arab Emirates;China;United States" }, { "title": "Selective Amnesia: A Continual Learning Approach to Forgetting in Deep Generative Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72500", "id": "BC1IJdsuYB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/376276a95781fa17c177b1ccdd0a03ac-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BC1IJdsuYB", "openreview": "https://openreview.net/forum?id=BC1IJdsuYB", "poster": "/media/PosterPDFs/NeurIPS%202023/72500.png?t=1701397677.8932803", "slides": "https://nips.cc/virtual/2023/poster/72500", "video": "https://nips.cc/virtual/2023/poster/72500", "author_site": "Alvin Heng, Harold Soh", "tldr": "", "abstract": "The recent proliferation of large-scale text-to-image models has led to growing concerns that such models may be misused to generate harmful, misleading, and inappropriate content. Motivated by this issue, we derive a technique inspired by continual learning to selectively forget concepts in pretrained deep generative models. Our method, dubbed Selective Amnesia, enables controllable forgetting where a user can specify how a concept should be forgotten. Selective Amnesia can be applied to conditional variational likelihood models, which encompass a variety of popular deep generative frameworks, including variational autoencoders and large-scale text-to-image diffusion models. Experiments across different models demonstrate that our approach induces forgetting on a variety of concepts, from entire classes in standard datasets to celebrity and nudity prompts in text-to-image models.", "keywords": "generative models;forgetting", "primary_area": "", "supplementary_material": "/attachment/0196a5d3e1d44f9a7c9e9d76fc8ffdf5da5d14ba.zip", "author": "Alvin Heng;Harold Soh", "authorids": "~Alvin_Heng1;~Harold_Soh1", "gender": ";M", "homepage": ";http://www.haroldsoh.com", "dblp": "338/9333;06/4578", "google_scholar": "https://scholar.google.com.sg/citations?user=fHFKbUMAAAAJ;https://scholar.google.com.sg/citations?user=lkgd1BsAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Alvin_Heng1;~Harold_Soh1", "aff": "National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu.sg", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nheng2023selective,\ntitle={Selective Amnesia: A Continual Learning Approach to Forgetting in Deep Generative Models},\nauthor={Alvin Heng and Harold Soh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BC1IJdsuYB}\n}", "github": "", "project": "", "reviewers": "ehLH;VhMr;bzv7;n35v", "pdf_size": 28615962, "rating": "6;6;6;7", "confidence": "4;3;3;4", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "53;69;86;103", "wc_strengths": "55;87;275;102", "wc_weaknesses": "155;136;286;423", "wc_questions": "22;60;9;104", "wc_limitations": "10;8;17;15", "wc_review": "295;360;673;747", "wc_reply_reviewers": "62;77;20;85", "wc_reply_authors": "16;125;13;50", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 77.75, 18.673175948402566 ], "wc_strengths_avg": [ 129.75, 85.5610162398741 ], "wc_weaknesses_avg": [ 250.0, 115.37547399685948 ], "wc_questions_avg": [ 48.75, 36.99577678600627 ], "wc_limitations_avg": [ 12.5, 3.640054944640259 ], "wc_review_avg": [ 518.75, 194.3944122139317 ], "wc_reply_reviewers_avg": [ 61.0, 25.06990227344335 ], "wc_reply_authors_avg": [ 51.0, 45.1275968781853 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 119, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=765714229002273583&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "u.nus.edu;nus.edu.sg", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Multi-Object Representation Learning via Feature Connectivity and Object-Centric Regularization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72499", "id": "BDno5qWEFh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bcef27c5825d1ed8757290f237b2d851-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BDno5qWEFh", "openreview": "https://openreview.net/forum?id=BDno5qWEFh", "poster": "/media/PosterPDFs/NeurIPS%202023/72499.png?t=1701364460.6391025", "slides": "https://nips.cc/virtual/2023/poster/72499", "video": "https://nips.cc/virtual/2023/poster/72499", "author_site": "Alex Foo, Wynne Hsu, Mong Li Lee", "tldr": "", "abstract": "Discovering object-centric representations from images has the potential to greatly improve the robustness, sample efficiency and interpretability of machine learning algorithms. Current works on multi-object images typically follow a generative approach that optimizes for input reconstruction and fail to scale to real-world datasets despite significant increases in model capacity. We address this limitation by proposing a novel method that leverages feature connectivity to cluster neighboring pixels likely to belong to the same object. We further design two object-centric regularization terms to refine object representations in the latent space, enabling our approach to scale to complex real-world images. Experimental results on simulated, real-world, complex texture and common object images demonstrate a substantial improvement in the quality of discovered objects compared to state-of-the-art methods, as well as the sample efficiency and generalizability of our approach. We also show that the discovered object-centric representations can accurately predict key object properties in downstream tasks, highlighting the potential of our method to advance the field of multi-object representation learning.", "keywords": "Object-Centric Learning;Multi-Object Representation Learning", "primary_area": "", "supplementary_material": "/attachment/e4faf9887eb40d963e28a736682aa3f66506889e.zip", "author": "Alex Foo;Wynne Hsu;Mong-Li Lee", "authorids": "~Alex_Foo1;~Wynne_Hsu1;~Mong-Li_Lee1", "gender": "M;F;F", "homepage": ";http://www.comp.nus.edu.sg/~whsu/;https://www.comp.nus.edu.sg/~leeml/", "dblp": "266/3614;h/WynneHsu;l/MongLiLee", "google_scholar": "nzge_YkAAAAJ;https://scholar.google.com.tw/citations?user=ljyBjv8AAAAJ;https://scholar.google.com.tw/citations?user=_xFTK8wAAAAJ", "orcid": ";0000-0002-4142-8893;0000-0002-9636-388X", "linkedin": ";;", "or_profile": "~Alex_Foo1;~Wynne_Hsu1;~Mong-Li_Lee1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;nus.edu.sg;nus.edu.sg", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nfoo2023multiobject,\ntitle={Multi-Object Representation Learning via Feature Connectivity and Object-Centric Regularization},\nauthor={Alex Foo and Wynne Hsu and Mong-Li Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BDno5qWEFh}\n}", "github": "", "project": "", "reviewers": "GCAa;T4Hn;2W4b;TJh8", "pdf_size": 3208383, "rating": "5;6;8;8", "confidence": "3;5;4;2", "soundness": "2;2;4;3", "novelty": "2;3;4;3", "presentation": "3;3;4;4", "wc_summary": "129;78;115;94", "wc_strengths": "35;122;110;71", "wc_weaknesses": "415;403;44;61", "wc_questions": "103;213;97;11", "wc_limitations": "232;214;4;55", "wc_review": "914;1030;370;292", "wc_reply_reviewers": "262;468;0;57", "wc_reply_authors": "527;727;0;0", "reply_reviewers": "2;2;0;1", "reply_authors": "3;3;1;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 104.0, 19.50640920313116 ], "wc_strengths_avg": [ 84.5, 34.23813663153998 ], "wc_weaknesses_avg": [ 230.75, 178.4017586796722 ], "wc_questions_avg": [ 106.0, 71.7007670809734 ], "wc_limitations_avg": [ 126.25, 98.6214352967954 ], "wc_review_avg": [ 651.5, 324.28806638542835 ], "wc_reply_reviewers_avg": [ 196.75, 184.44155578393932 ], "wc_reply_authors_avg": [ 313.5, 321.3755591204782 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5529904814839287803&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "nus.edu.sg;nus.edu.sg;nus.edu.sg", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "TensorNet: Cartesian Tensor Representations for Efficient Learning of Molecular Potentials", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72498", "id": "BEHlPdBZ2e", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/75c2ec5f98d7b2f50ad68033d2c07086-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BEHlPdBZ2e", "openreview": "https://openreview.net/forum?id=BEHlPdBZ2e", "poster": "/media/PosterPDFs/NeurIPS%202023/72498.png?t=1701424604.6646454", "slides": "https://nips.cc/virtual/2023/poster/72498", "video": "https://nips.cc/virtual/2023/poster/72498", "author_site": "Guillem Simeon, Gianni De Fabritiis", "tldr": "", "abstract": "The development of efficient machine learning models for molecular systems representation is becoming crucial in scientific research. We introduce TensorNet, an innovative O(3)-equivariant message-passing neural network architecture that leverages Cartesian tensor representations. By using Cartesian tensor atomic embeddings, feature mixing is simplified through matrix product operations. Furthermore, the cost-effective decomposition of these tensors into rotation group irreducible representations allows for the separate processing of scalars, vectors, and tensors when necessary. Compared to higher-rank spherical tensor models, TensorNet demonstrates state-of-the-art performance with significantly fewer parameters. For small molecule potential energies, this can be achieved even with a single interaction layer. As a result of all these properties, the model's computational cost is substantially decreased. Moreover, the accurate prediction of vector and tensor molecular quantities on top of potential energies and forces is possible. In summary, TensorNet's framework opens up a new space for the design of state-of-the-art equivariant models.", "keywords": "Neural network interatomic potentials;Equivariant graph neural network;Message passing neural network", "primary_area": "", "supplementary_material": "", "author": "Guillem Simeon;Gianni De Fabritiis", "authorids": "~Guillem_Simeon1;~Gianni_De_Fabritiis1", "gender": "Not Specified;M", "homepage": ";https://www.compscience.org", "dblp": ";29/605", "google_scholar": "sjXGN6MAAAAJ;-_kX4kMAAAAJ", "orcid": "0000-0003-3225-1632;", "linkedin": ";gdefabritiis/", "or_profile": "~Guillem_Simeon1;~Gianni_De_Fabritiis1", "aff": "Universitat Pompeu Fabra;Universitat Pompeu Fabra", "aff_domain": "upf.es;upf.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nsimeon2023tensornet,\ntitle={TensorNet: Cartesian Tensor Representations for Efficient Learning of Molecular Potentials},\nauthor={Guillem Simeon and Gianni De Fabritiis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BEHlPdBZ2e}\n}", "github": "", "project": "", "reviewers": "1mQm;3Tca;QwGa;u3os", "pdf_size": 557775, "rating": "2;5;6;6", "confidence": "4;4;2;2", "soundness": "3;3;3;3", "novelty": "1;3;3;3", "presentation": "3;3;3;2", "wc_summary": "43;58;115;71", "wc_strengths": "31;64;91;52", "wc_weaknesses": "64;87;70;177", "wc_questions": "2;31;91;1", "wc_limitations": "10;1;1;13", "wc_review": "150;241;368;314", "wc_reply_reviewers": "0;0;17;28", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.6393596310755 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.75, 26.864242032858474 ], "wc_strengths_avg": [ 59.5, 21.68524844220144 ], "wc_weaknesses_avg": [ 99.5, 45.53295509847785 ], "wc_questions_avg": [ 31.25, 36.540217569138804 ], "wc_limitations_avg": [ 6.25, 5.356071321407137 ], "wc_review_avg": [ 268.25, 81.80579135978088 ], "wc_reply_reviewers_avg": [ 11.25, 11.903255857117413 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7624928516630233, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16086668725128006872&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "upf.es;upf.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Universitat Pompeu Fabra", "aff_unique_dep": "", "aff_unique_url": "https://www.upf.edu/", "aff_unique_abbr": "UPF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Spain" }, { "title": "HyP-NeRF: Learning Improved NeRF Priors using a HyperNetwork", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72497", "id": "BExDjNDYkN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a03037317560b8c5f2fb4b6466d4c439-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BExDjNDYkN", "openreview": "https://openreview.net/forum?id=BExDjNDYkN", "poster": "/media/PosterPDFs/NeurIPS%202023/72497.png?t=1702222999.9218326", "slides": "https://nips.cc/virtual/2023/poster/72497", "video": "https://nips.cc/virtual/2023/poster/72497", "author_site": "Bipasha Sen, Gaurav Singh, Aditya Agarwal, Rohith Agaram, Madhava Krishna, Srinath Sridhar", "tldr": "", "abstract": "Neural Radiance Fields (NeRF) have become an increasingly popular representation to capture high-quality appearance and shape of scenes and objects. However, learning generalizable NeRF priors over categories of scenes or objects has been challenging due to the high dimensionality of network weight space. To address the limitations of existing work on generalization, multi-view consistency and to improve quality, we propose HyP-NeRF, a latent conditioning method for learning generalizable category-level NeRF priors using hypernetworks. Rather than using hypernetworks to estimate only the weights of a NeRF, we estimate both the weights and the multi-resolution hash encodings resulting in significant quality gains. To improve quality even further, we incorporate a denoise and finetune strategy that denoises images rendered from NeRFs estimated by the hypernetwork and finetunes it while retaining multiview consistency. These improvements enable us to use HyP-NeRF as a generalizable prior for multiple downstream tasks including NeRF reconstruction from single-view or cluttered scenes and text-to-NeRF. We provide qualitative comparisons and evaluate HyP-NeRF on three tasks: generalization, compression, and retrieval, demonstrating our state-of-the-art results.", "keywords": "neural radiance field;hypernetwork;multi-hash encoding;NeRF", "primary_area": "", "supplementary_material": "/attachment/ba0535393e16ba8844942f6a0eacf0074d38eee2.zip", "author": "Bipasha Sen;Gaurav Singh;Aditya Agarwal;Rohith Agaram;Madhava Krishna;Srinath Sridhar", "authorids": "~Bipasha_Sen1;~Gaurav_Singh3;~Aditya_Agarwal1;~Rohith_Agaram1;~Madhava_Krishna2;~Srinath_Sridhar2", "gender": "F;M;M;M;M;M", "homepage": "http://bipashasen.github.io/;https://vanhalen42.github.io/;https://skymanaditya1.github.io/;;https://robotics.iiit.ac.in/;https://srinathsridhar.com", "dblp": ";;;335/1286;90/4844;78/1463-2", "google_scholar": "GZZCH-8AAAAJ;kQCkrnwAAAAJ;https://scholar.google.co.in/citations?user=64Cgbv4AAAAJ;Ni6qG7wAAAAJ;https://scholar.google.com.tw/citations?user=QDuPGHwAAAAJ;qIvZT74AAAAJ", "orcid": ";;;;;", "linkedin": ";gaurav-singh-448363207/;skymanaditya1/;rohith-agaram-812278132/;;srinathsridhar", "or_profile": "~Bipasha_Sen1;~Gaurav_Singh3;~Aditya_Agarwal1;~Rohith_Agaram1;~K._Krishna1;~Srinath_Sridhar1", "aff": "International Institute of Information Technology Hyderabad, Dhirubhai Ambani Institute Of Information and Communication Technology;International Institute of Information Technology, Hyderabad, International Institute of Information Technology Hyderabad;International Institute of Information Technology, Hyderabad, International Institute of Information Technology Hyderabad;International Institute of Information Technology, Hyderabad, International Institute of Information Technology Hyderabad;International Institute of Information Technology Hyderabad;Amazon", "aff_domain": "iiit.ac.in;research.iiit.ac.in;research.iiit.ac.in;research.iiit.ac.in;iiit.ac.in;amazon.com", "position": "MS student;Undergrad student;MS student;MS student;Full Professor;Visiting Academic", "bibtex": "@inproceedings{\nsen2023hypnerf,\ntitle={HyP-Ne{RF}: Learning Improved Ne{RF} Priors using a HyperNetwork},\nauthor={Bipasha Sen and Gaurav Singh and Aditya Agarwal and Rohith Agaram and Madhava Krishna and Srinath Sridhar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BExDjNDYkN}\n}", "github": "", "project": "", "reviewers": "bLah;vtAE;bkzC;Muw9", "pdf_size": 10348247, "rating": "4;5;5;7", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "61;127;67;97", "wc_strengths": "18;133;98;87", "wc_weaknesses": "327;420;114;249", "wc_questions": "233;102;53;3", "wc_limitations": "8;11;45;20", "wc_review": "647;793;377;456", "wc_reply_reviewers": "75;148;250;199", "wc_reply_authors": "24;321;468;198", "reply_reviewers": "2;1;2;2", "reply_authors": "2;3;2;3", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.0, 26.324893162176366 ], "wc_strengths_avg": [ 84.0, 41.71930009000631 ], "wc_weaknesses_avg": [ 277.5, 112.13942214939401 ], "wc_questions_avg": [ 97.75, 85.57270300744274 ], "wc_limitations_avg": [ 21.0, 14.543039572248986 ], "wc_review_avg": [ 568.25, 162.70429465751664 ], "wc_reply_reviewers_avg": [ 168.0, 64.67998144712165 ], "wc_reply_authors_avg": [ 252.75, 163.02971354940178 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11302102940317910674&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "iiit.ac.in;research.iiit.ac.in;research.iiit.ac.in;research.iiit.ac.in;iiit.ac.in;amazon.com", "author_num": 6, "aff_unique_index": "0;1;1;1;2;3", "aff_unique_norm": "International Institute of Information Technology Hyderabad;International Institute of Information Technology, Hyderabad;International Institute of Information Technology;Amazon", "aff_unique_dep": ";;;Amazon.com, Inc.", "aff_unique_url": "https://iiit Hyderabad.ac.in;https://iiit Hyderabad.ac.in;https://iiit Hyderabad.ac.in;https://www.amazon.com", "aff_unique_abbr": "IIIT Hyderabad;IIIT Hyderabad;IIIT Hyderabad;Amazon", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hyderabad;", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "India;United States" }, { "title": "Score-based Source Separation with Applications to Digital Communication Signals", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72496", "id": "BFGQQKicuu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/106b2434b8d496c6aed9235d478678af-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BFGQQKicuu", "openreview": "https://openreview.net/forum?id=BFGQQKicuu", "poster": "/media/PosterPDFs/NeurIPS%202023/72496.png?t=1701380066.4832854", "slides": "https://nips.cc/virtual/2023/poster/72496", "video": "https://nips.cc/virtual/2023/poster/72496", "author_site": "Tejas Jayashankar, Gary C.F. Lee, Alejandro Lancho, Amir Weiss, Yury Polyanskiy, Gregory Wornell", "tldr": "", "abstract": "We propose a new method for separating superimposed sources using diffusion-based generative models. Our method relies only on separately trained statistical priors of independent sources to establish a new objective function guided by $\\textit{maximum a posteriori}$ estimation with an $\\textit{$\\alpha$-posterior}$, across multiple levels of Gaussian smoothing. Motivated by applications in radio-frequency (RF) systems, we are interested in sources with underlying discrete nature and the recovery of encoded bits from a signal of interest, as measured by the bit error rate (BER). Experimental results with RF mixtures demonstrate that our method results in a BER reduction of 95\\% over classical and existing learning-based methods. Our analysis demonstrates that our proposed method yields solutions that asymptotically approach the modes of an underlying discrete distribution. Furthermore, our method can be viewed as a multi-source extension to the recently proposed score distillation sampling scheme, shedding additional light on its use beyond conditional sampling. The project webpage is available at https://alpha-rgs.github.io.", "keywords": "Diffusion models;score-based models;source separation;digital communications;maximum a posteriori (MAP) estimation;alpha-posterior;Gaussian smoothing;score distillation sampling;radio frequency systems;interference mitigation", "primary_area": "", "supplementary_material": "/attachment/5e96f2d56f896e4ab4a54000c433de04a359bc2d.pdf", "author": "Tejas Jayashankar;Gary C.F. Lee;Alejandro Lancho;Amir Weiss;Yury Polyanskiy;Gregory Wornell", "authorids": "~Tejas_Jayashankar1;~Gary_C.F._Lee1;~Alejandro_Lancho1;~Amir_Weiss1;~Yury_Polyanskiy1;~Gregory_Wornell1", "gender": "M;M;;;M;M", "homepage": ";https://www.linkedin.com/in/garyleecf/;https://a-lancho.github.io/;https://www.weissamir.com/;http://www.mit.edu/~ypol/;https://web.mit.edu/gww/www/", "dblp": "254/7950;241/4474;202/2591;174/9657;74/8860;94/5969", "google_scholar": "gjN_lUoAAAAJ;-k6WQI0AAAAJ;https://scholar.google.es/citations?user=VyzRQicAAAAJ;https://scholar.google.co.il/citations?user=g8NtwsoAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-4211-0454;0000-0002-3104-6939;;0000-0001-9166-4758", "linkedin": "tkj97/;garyleecf/;;;;", "or_profile": "~Tejas_Jayashankar1;~Gary_C.F._Lee1;~Alejandro_Lancho1;~Amir_Weiss1;~Yury_Polyanskiy1;~Gregory_Wornell1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;Postdoc;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\njayashankar2023scorebased,\ntitle={Score-based Source Separation with Applications to Digital Communication Signals},\nauthor={Tejas Jayashankar and Gary C.F. Lee and Alejandro Lancho and Amir Weiss and Yury Polyanskiy and Gregory Wornell},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BFGQQKicuu}\n}", "github": "", "project": "", "reviewers": "jCQT;aSGC;J15P;AhTo;D4rU", "pdf_size": 8117432, "rating": "5;6;6;7;7", "confidence": "3;3;4;3;4", "soundness": "2;3;3;3;3", "novelty": "3;3;4;3;3", "presentation": "3;3;3;3;3", "wc_summary": "61;84;58;139;94", "wc_strengths": "67;82;53;5;56", "wc_weaknesses": "160;145;1;9;24", "wc_questions": "55;45;94;136;34", "wc_limitations": "21;7;1;15;21", "wc_review": "364;363;207;304;229", "wc_reply_reviewers": "21;18;6;0;17", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.2, 29.25337587356372 ], "wc_strengths_avg": [ 52.6, 25.8812673569128 ], "wc_weaknesses_avg": [ 67.8, 69.71197888455039 ], "wc_questions_avg": [ 72.8, 37.52012793155162 ], "wc_limitations_avg": [ 13.0, 7.899367063252599 ], "wc_review_avg": [ 293.4, 65.65546435750798 ], "wc_reply_reviewers_avg": [ 12.4, 8.014985963805552 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3273268353539886, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6672845409479277231&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Predicting mutational effects on protein-protein binding via a side-chain diffusion probabilistic model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72495", "id": "BGP5Vjt93A", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/99088dffd5eab0babebcda4bc58bbcea-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BGP5Vjt93A", "openreview": "https://openreview.net/forum?id=BGP5Vjt93A", "poster": "/media/PosterPDFs/NeurIPS%202023/72495.png?t=1702173299.6302824", "slides": "https://nips.cc/virtual/2023/poster/72495", "video": "https://nips.cc/virtual/2023/poster/72495", "author_site": "Shiwei Liu, Tian Zhu, Milong Ren, Chungong Yu, Dongbo Bu, Haicang Zhang", "tldr": "", "abstract": "Many crucial biological processes rely on networks of protein-protein interactions. Predicting the effect of amino acid mutations on protein-protein binding is important in protein engineering, including therapeutic discovery. However, the scarcity of annotated experimental data on binding energy poses a significant challenge for developing computational approaches, particularly deep learning-based methods. In this work, we propose SidechainDiff, a novel representation learning-based approach that leverages unlabelled experimental protein structures. SidechainDiff utilizes a Riemannian diffusion model to learn the generative process of side-chain conformations and can also give the structural context representations of mutations on the protein-protein interface. Leveraging the learned representations, we achieve state-of-the-art performance in predicting the mutational effects on protein-protein binding. Furthermore, SidechainDiff is the first diffusion-based generative model for side-chains, distinguishing it from prior efforts that have predominantly focused on the generation of protein backbone structures.", "keywords": "Riemannian Diffusion Probabilistic Model;Mutation;Protein-protein binding", "primary_area": "", "supplementary_material": "/attachment/7231347227d9f56397934862978bda4f873ab8da.pdf", "author": "Shiwei Liu;Tian Zhu;Milong Ren;Yu Chungong;Dongbo Bu;Haicang Zhang", "authorids": "~Shiwei_Liu3;~Tian_Zhu1;~Milong_Ren2;~Yu_Chungong1;~Dongbo_Bu1;~Haicang_Zhang1", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/lsw9803;https://eurekazhu.github.io;https://github.com/rabbit-0001/renmilong;http://bioinfo.ict.ac.cn/~yuchungong;http://bioinfo.ict.ac.cn/~dbu/;", "dblp": ";;;43/3695;;138/0439", "google_scholar": ";rLxdI10AAAAJ;;;;myzZFrYAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Shiwei_Liu3;~Tian_Zhu1;~Milong_Ren2;~Yu_Chungong1;~Dongbo_Bu1;~Haicang_Zhang1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences; Institute of Computing Technology;, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ucas.ac.cn;ict.ac.cn", "position": "MS student;MS student;PhD student;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2023predicting,\ntitle={Predicting mutational effects on protein-protein binding via a side-chain diffusion probabilistic model},\nauthor={Shiwei Liu and Tian Zhu and Milong Ren and Yu Chungong and Dongbo Bu and Haicang Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BGP5Vjt93A}\n}", "github": "", "project": "", "reviewers": "U583;wzHr;HbdY;Z2H5", "pdf_size": 1923240, "rating": "5;5;7;8", "confidence": "4;4;4;3", "soundness": "3;2;3;4", "novelty": "2;2;3;4", "presentation": "2;1;2;3", "wc_summary": "60;63;31;78", "wc_strengths": "47;61;41;165", "wc_weaknesses": "63;137;554;56", "wc_questions": "195;849;3;56", "wc_limitations": "24;12;3;12", "wc_review": "389;1122;632;367", "wc_reply_reviewers": "110;465;45;10", "wc_reply_authors": "369;803;12;12", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 58.0, 17.014699527173555 ], "wc_strengths_avg": [ 78.5, 50.46533463675833 ], "wc_weaknesses_avg": [ 202.5, 205.40508757087787 ], "wc_questions_avg": [ 275.75, 338.31152433814606 ], "wc_limitations_avg": [ 12.75, 7.46240577829965 ], "wc_review_avg": [ 627.5, 303.8474123635085 ], "wc_reply_reviewers_avg": [ 157.5, 181.12495686679955 ], "wc_reply_authors_avg": [ 299.0, 325.44354349103315 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7777777777777777, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17828567322315328452&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ucas.ac.cn;ict.ac.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "Chinese Academy of Sciences;Institute of Computing Technology;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology;;", "aff_unique_url": "http://www.ict.ac.cn;http://www.ict.ac.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Large Language Models Are Latent Variable Models: Explaining and Finding Good Demonstrations for In-Context Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72494", "id": "BGvkwZEGt7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3255a7554605a88800f4e120b3a929e1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BGvkwZEGt7", "openreview": "https://openreview.net/forum?id=BGvkwZEGt7", "poster": "/media/PosterPDFs/NeurIPS%202023/72494.png?t=1701835677.4277668", "slides": "https://nips.cc/virtual/2023/poster/72494", "video": "https://nips.cc/virtual/2023/poster/72494", "author_site": "Xinyi Wang, Wanrong Zhu, Michael Saxon, Mark Steyvers, William Yang Wang", "tldr": "", "abstract": "In recent years, pre-trained large language models (LLMs) have demonstrated remarkable efficiency in achieving an inference-time few-shot learning capability known as in-context learning. However, existing literature has highlighted the sensitivity of this capability to the selection of few-shot demonstrations. Current understandings of the underlying mechanisms by which this capability arises from regular language model pretraining objectives remain disconnected from the real-world LLMs. This study aims to examine the in-context learning phenomenon through a Bayesian lens, viewing real-world LLMs as latent variable models. On this premise, we propose an algorithm to select optimal demonstrations from a set of annotated data with a small LM, and then directly generalize the selected demonstrations to larger LMs. We demonstrate significant improvement over baselines, averaged over eight GPT models on eight real-world text classification datasets. We also demonstrate the real-world usefulness of our algorithm on GSM8K, a math word problem dataset. Our empirical findings support our hypothesis that LLMs implicitly infer a latent variable containing task information.", "keywords": "Large language models;Bayesian explanation;in-context learning", "primary_area": "", "supplementary_material": "/attachment/0986f94cb3c9ad5538c420ff6cd3937d46e4c14d.zip", "author": "Xinyi Wang;Wanrong Zhu;Michael Saxon;Mark Steyvers;William Yang Wang", "authorids": "~Xinyi_Wang2;~Wanrong_Zhu1;~Michael_Saxon1;~Mark_Steyvers1;~William_Yang_Wang2", "gender": "F;;M;;", "homepage": "https://wangxinyilinda.github.io/;;https://saxon.me;https://steyvers.socsci.uci.edu/;", "dblp": ";;222/6656;47/4456;", "google_scholar": "3vvbplcAAAAJ;;pAlwjdgAAAAJ;szUb_isAAAAJ;", "orcid": ";;;;", "linkedin": "xinyi-wang-444385133/;;;;", "or_profile": "~Xinyi_Wang2;~Wanrong_Zhu1;~Michael_Saxon1;~Mark_Steyvers1;~William_Yang_Wang2", "aff": "Microsoft;;UC Santa Barbara;University of California, Irvine;", "aff_domain": "microsoft.com;;ucsb.edu;uci.edu;", "position": "Intern;;PhD student;Full Professor;", "bibtex": "@inproceedings{\nwang2023large,\ntitle={Large Language Models Are Latent Variable Models: Explaining and Finding Good Demonstrations for In-Context Learning},\nauthor={Xinyi Wang and Wanrong Zhu and Michael Saxon and Mark Steyvers and William Yang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BGvkwZEGt7}\n}", "github": "", "project": "", "reviewers": "uvg2;sfNW;XxTq;waNU;8JqR", "pdf_size": 1504347, "rating": "5;5;5;5;6", "confidence": "4;3;3;5;4", "soundness": "2;2;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;2;3;3;3", "wc_summary": "87;123;58;71;103", "wc_strengths": "158;19;23;40;71", "wc_weaknesses": "223;149;79;188;50", "wc_questions": "64;2;20;26;63", "wc_limitations": "5;1;1;1;7", "wc_review": "537;294;181;326;294", "wc_reply_reviewers": "20;37;0;372;71", "wc_reply_authors": "83;31;35;348;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;2;3;1", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 88.4, 22.974768769239006 ], "wc_strengths_avg": [ 62.2, 51.28508555125944 ], "wc_weaknesses_avg": [ 137.8, 64.91656183132314 ], "wc_questions_avg": [ 35.0, 24.576411454889016 ], "wc_limitations_avg": [ 3.0, 2.5298221281347035 ], "wc_review_avg": [ 326.4, 116.2696865051248 ], "wc_reply_reviewers_avg": [ 100.0, 137.9811581340003 ], "wc_reply_authors_avg": [ 99.4, 127.1040518630307 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.1336306209562122, "gs_citation": 120, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8232526030366785368&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "microsoft.com;;ucsb.edu;uci.edu;", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Microsoft;University of California, Santa Barbara;University of California, Irvine", "aff_unique_dep": "Microsoft Corporation;;", "aff_unique_url": "https://www.microsoft.com;https://www.ucsb.edu;https://www.uci.edu", "aff_unique_abbr": "Microsoft;UCSB;UCI", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Santa Barbara;Irvine", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Regularity as Intrinsic Reward for Free Play", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72493", "id": "BHHrX3CRE1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c529dba08a146ea8d6cf715ae8930cbe-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BHHrX3CRE1", "openreview": "https://openreview.net/forum?id=BHHrX3CRE1", "poster": "/media/PosterPDFs/NeurIPS%202023/72493.png?t=1701902263.349891", "slides": "https://nips.cc/virtual/2023/poster/72493", "video": "https://nips.cc/virtual/2023/poster/72493", "author_site": "Cansu Sancaktar, Justus Piater, Georg Martius", "tldr": "", "abstract": "We propose regularity as a novel reward signal for intrinsically-motivated reinforcement learning. Taking inspiration from child development, we postulate that striving for structure and order helps guide exploration towards a subspace of tasks that are not favored by naive uncertainty-based intrinsic rewards. Our generalized formulation of Regularity as Intrinsic Reward (RaIR) allows us to operationalize it within model-based reinforcement learning. In a synthetic environment, we showcase the plethora of structured patterns that can emerge from pursuing this regularity objective. We also demonstrate the strength of our method in a multi-object robotic manipulation environment. We incorporate RaIR into free play and use it to complement the model\u2019s epistemic uncertainty as an intrinsic reward. Doing so, we witness the autonomous construction of towers and other regular structures during free play, which leads to a substantial improvement in zero-shot downstream task performance on assembly tasks.", "keywords": "Intrinsic Motivation;Reinforcement Learning;Model-based Planning;Regularity;Manipulation;Zero-shot Generalization;Unsupervised Exploration", "primary_area": "", "supplementary_material": "", "author": "Cansu Sancaktar;Justus Piater;Georg Martius", "authorids": "~Cansu_Sancaktar1;~Justus_Piater1;~Georg_Martius1", "gender": "F;M;M", "homepage": "https://csancaktar.github.io;https://iis.uibk.ac.at/;https://uni-tuebingen.de/de/264672", "dblp": "256/5345;p/JustusHPiater;47/2706", "google_scholar": "9JqNY7UAAAAJ;https://scholar.google.at/citations?user=meJCYXAAAAAJ;https://scholar.google.de/citations?user=b-JF-UIAAAAJ", "orcid": ";0000-0002-1898-3362;", "linkedin": "cansu-sancaktar-61715b140/;justus-piater/;", "or_profile": "~Cansu_Sancaktar1;~Justus_Piater1;~Georg_Martius1", "aff": "Max Planck Institute for Intelligent Systems, Max-Planck Institute;Universit\u00e4t Innsbruck;Max Planck Institute for Intelligent Systems", "aff_domain": "tuebingen.mpg.de;uibk.ac.at;tuebingen.mpg.de", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nsancaktar2023regularity,\ntitle={Regularity as Intrinsic Reward for Free Play},\nauthor={Cansu Sancaktar and Justus Piater and Georg Martius},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BHHrX3CRE1}\n}", "github": "", "project": "", "reviewers": "fHtQ;fSr3;GveK;CLep", "pdf_size": 19402475, "rating": "5;6;6;6", "confidence": "5;4;3;3", "soundness": "2;3;3;2", "novelty": "2;3;3;2", "presentation": "3;3;3;2", "wc_summary": "431;116;66;74", "wc_strengths": "55;94;171;28", "wc_weaknesses": "655;220;302;132", "wc_questions": "115;13;16;2", "wc_limitations": "31;1;94;5", "wc_review": "1287;444;649;241", "wc_reply_reviewers": "1311;54;43;16", "wc_reply_authors": "3052;0;0;0", "reply_reviewers": "4;1;1;1", "reply_authors": "6;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 171.75, 150.8780550643466 ], "wc_strengths_avg": [ 87.0, 53.874854988203914 ], "wc_weaknesses_avg": [ 327.25, 198.54643663385147 ], "wc_questions_avg": [ 36.5, 45.62071897723665 ], "wc_limitations_avg": [ 32.75, 37.19122880465231 ], "wc_review_avg": [ 655.25, 392.2297636590064 ], "wc_reply_reviewers_avg": [ 356.0, 551.5428360517432 ], "wc_reply_authors_avg": [ 763.0, 1321.5547661750534 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.25, 2.165063509461097 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14461536674156937094&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "tuebingen.mpg.de;uibk.ac.at;tuebingen.mpg.de", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;University of Innsbruck", "aff_unique_dep": "Intelligent Systems;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.uibk.ac.at", "aff_unique_abbr": "MPI-IS;UIBK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Innsbruck", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;Austria" }, { "title": "ToolkenGPT: Augmenting Frozen Language Models with Massive Tools via Tool Embeddings", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72492", "id": "BHXsb69bSx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8fd1a81c882cd45f64958da6284f4a3f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BHXsb69bSx", "openreview": "https://openreview.net/forum?id=BHXsb69bSx", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72492", "video": "https://nips.cc/virtual/2023/poster/72492", "author_site": "Shibo Hao, Tianyang Liu, Zhen Wang, Zhiting Hu", "tldr": "", "abstract": "Integrating large language models (LLMs) with various tools has led to increased attention in the field. Existing approaches either involve fine-tuning the LLM, which is both computationally costly and limited to a fixed set of tools, or prompting LLMs by in-context tool demonstrations. Although the latter method offers adaptability to new tools, it struggles with the inherent context length constraint of LLMs when many new tools are presented, and mastering a new set of tools with few-shot examples remains challenging, resulting in suboptimal performance. To address these limitations, we propose a novel solution, named **ToolkenGPT**, wherein LLMs effectively learn to master tools as predicting tokens through **tool embeddings** for solving complex tasks. In this framework, each tool is transformed into vector embeddings and plugged into the language model head. Once the function is triggered during text generation, the LLM enters a special function mode to execute the tool calls. Our experiments show that function embeddings effectively help LLMs understand tool use and improve on several tasks, including numerical reasoning, knowledge-based question answering and embodied decision-making.", "keywords": "large language model;tool learning", "primary_area": "", "supplementary_material": "/attachment/1460b6696f16562b4018db076399f3ed7d97a330.zip", "author": "Shibo Hao;Tianyang Liu;Zhen Wang;Zhiting Hu", "authorids": "~Shibo_Hao1;~Tianyang_Liu2;~Zhen_Wang6;~Zhiting_Hu3", "gender": "M;Not Specified;M;M", "homepage": "https://ber666.github.io/;https://leolty.github.io/;https://zhenwang9102.github.io;http://zhiting.ucsd.edu", "dblp": "302/1341;89/1676-3.html;78/6727;134/4031", "google_scholar": "xwbHbUQAAAAJ;rJAeYdwAAAAJ;asBaytUAAAAJ;N7_xhHoAAAAJ", "orcid": ";0000-0001-7754-7029;0000-0001-7407-5118;", "linkedin": ";tianyangliu-whu-ucsd/;zhenwang9102/;", "or_profile": "~Shibo_Hao1;~Tianyang_Liu2;~Zhen_Wang6;~Zhiting_Hu3", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego;Amazon", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu;amazon.com", "position": "PhD student;MS student;Postdoc;Researcher", "bibtex": "@inproceedings{\nhao2023toolkengpt,\ntitle={Toolken{GPT}: Augmenting Frozen Language Models with Massive Tools via Tool Embeddings},\nauthor={Shibo Hao and Tianyang Liu and Zhen Wang and Zhiting Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BHXsb69bSx}\n}", "github": "", "project": "", "reviewers": "QLCo;Mpoh;C7TV;5YmU;xBHJ", "pdf_size": 710461, "rating": "7;7;7;8;8", "confidence": "4;4;4;4;4", "soundness": "3;4;3;4;4", "novelty": "3;3;3;4;3", "presentation": "3;4;4;3;3", "wc_summary": "56;97;78;101;74", "wc_strengths": "118;57;42;82;23", "wc_weaknesses": "302;108;109;144;77", "wc_questions": "2;1;24;41;102", "wc_limitations": "8;1;7;7;19", "wc_review": "486;264;260;375;295", "wc_reply_reviewers": "175;0;12;23;101", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.4, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 81.2, 16.36337373526621 ], "wc_strengths_avg": [ 64.4, 33.02483913662563 ], "wc_weaknesses_avg": [ 148.0, 79.86739009132576 ], "wc_questions_avg": [ 34.0, 37.11064537299237 ], "wc_limitations_avg": [ 8.4, 5.851495535331118 ], "wc_review_avg": [ 336.0, 85.62943419175441 ], "wc_reply_reviewers_avg": [ 62.2, 66.56545650711035 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 158, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7887070876504290247&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 8, "email": "ucsd.edu;ucsd.edu;ucsd.edu;amazon.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of California, San Diego;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.ucsd.edu;https://www.amazon.com", "aff_unique_abbr": "UCSD;Amazon", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "An Improved Relaxation for Oracle-Efficient Adversarial Contextual Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72491", "id": "BHZsJ2sTkG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d54e440c92affd396117e161bbab5e78-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BHZsJ2sTkG", "openreview": "https://openreview.net/forum?id=BHZsJ2sTkG", "poster": "/media/PosterPDFs/NeurIPS%202023/72491.png?t=1702068629.8265452", "slides": "https://nips.cc/virtual/2023/poster/72491", "video": "https://nips.cc/virtual/2023/poster/72491", "author_site": "Kiarash Banihashem, MohammadTaghi Hajiaghayi, Suho Shin, Max Springer", "tldr": "", "abstract": "We present an oracle-efficient relaxation\n for the adversarial contextual bandits problem,\n where the contexts are sequentially drawn i.i.d from a known distribution and the cost sequence is chosen by an online adversary.\n Our algorithm has a regret bound of $O(T^{\\frac{2}{3}}(K\\log(|\\Pi|))^{\\frac{1}{3}})$ and makes at most $O(K)$ calls per round to an offline optimization oracle,\n where $K$ denotes the number of actions, $T$ denotes the number of rounds and $\\Pi$ denotes \n the set of policies.\n This is the first result to improve the prior best bound of $O((TK)^{\\frac{2}{3}}(\\log(|\\Pi|))^{\\frac{1}{3}})$ as obtained by \n Syrgkanis et al.\n at NeurIPS 2016, and the first to match the original bound of \n Langford and Zhang at NeurIPS 2007\n which was obtained for the stochastic case.", "keywords": "contextual bandits;adversarial bandits;oracle-efficient online learning", "primary_area": "", "supplementary_material": "", "author": "Kiarash Banihashem;MohammadTaghi Hajiaghayi;Suho Shin;Max Springer", "authorids": "~Kiarash_Banihashem1;~MohammadTaghi_Hajiaghayi1;~Suho_Shin1;~Max_Springer1", "gender": "M;M;M;M", "homepage": ";http://www.cs.umd.edu/~hajiagha/;https://suhoshin.github.io/;https://www.maxspringer.me", "dblp": "285/5061;334/4488;218/5505;292/2716", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=SQ1eGN4AAAAJ;-p5eVQsAAAAJ;x9NBFhwAAAAJ", "orcid": ";0000-0003-4842-0533;;0000-0001-9291-6574", "linkedin": ";mohammad-hajiaghayi-2139a913a&ved=2ahUKEwjMyeH-5-_-AhV3K1kFHeeBDKwQjjh6BAgSEAE&usg=AOvVaw1NSVoT5FCGtOTi4eT8nr4b;;mss423/", "or_profile": "~Kiarash_Banihashem1;~MohammadTaghi_Hajiaghayi1;~Suho_Shin1;~Max_Springer1", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;umd.edu;umd.edu", "position": "PhD student;Full Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nbanihashem2023an,\ntitle={An Improved Relaxation for Oracle-Efficient Adversarial Contextual Bandits},\nauthor={Kiarash Banihashem and MohammadTaghi Hajiaghayi and Suho Shin and Max Springer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BHZsJ2sTkG}\n}", "github": "", "project": "", "reviewers": "gZc2;yx68;2pq4;D6vD", "pdf_size": 300289, "rating": "6;6;7;7", "confidence": "3;5;4;3", "soundness": "3;4;3;4", "novelty": "3;2;3;4", "presentation": "4;4;2;4", "wc_summary": "147;102;164;34", "wc_strengths": "90;57;32;17", "wc_weaknesses": "145;78;117;30", "wc_questions": "64;409;62;1", "wc_limitations": "4;1;6;1", "wc_review": "450;647;381;83", "wc_reply_reviewers": "43;10;14;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 111.75, 50.28108491271842 ], "wc_strengths_avg": [ 49.0, 27.649593125396983 ], "wc_weaknesses_avg": [ 92.5, 43.22325762827231 ], "wc_questions_avg": [ 134.0, 160.7777969745823 ], "wc_limitations_avg": [ 3.0, 2.1213203435596424 ], "wc_review_avg": [ 390.25, 202.47144860448844 ], "wc_reply_reviewers_avg": [ 16.75, 15.990231392947383 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6570234126073669780&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "umd.edu;umd.edu;umd.edu;umd.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "OKRidge: Scalable Optimal k-Sparse Ridge Regression", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72490", "id": "BHxsP5fSHv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/80f48ffa8022773973a4a5cec7cce19c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BHxsP5fSHv", "openreview": "https://openreview.net/forum?id=BHxsP5fSHv", "poster": "/media/PosterPDFs/NeurIPS%202023/72490.png?t=1701833360.7664986", "slides": "https://nips.cc/virtual/2023/poster/72490", "video": "https://nips.cc/virtual/2023/poster/72490", "author_site": "Jiachang Liu, Sam Rosen, Chudi Zhong, Cynthia Rudin", "tldr": "", "abstract": "We consider an important problem in scientific discovery, namely identifying sparse governing equations for nonlinear dynamical systems. This involves solving sparse ridge regression problems to provable optimality in order to determine which terms drive the underlying dynamics. We propose a fast algorithm, OKRidge, for sparse ridge regression, using a novel lower bound calculation involving, first, a saddle point formulation, and from there, either solving (i) a linear system or (ii) using an ADMM-based approach, where the proximal operators can be efficiently evaluated by solving another linear system and an isotonic regression problem. We also propose a method to warm-start our solver, which leverages a beam search. Experimentally, our methods attain provable optimality with run times that are orders of magnitude faster than those of the existing MIP formulations solved by the commercial solver Gurobi.", "keywords": "Sparse Ridge Regression;Dynamical Systems", "primary_area": "", "supplementary_material": "", "author": "Jiachang Liu;Sam Rosen;Chudi Zhong;Cynthia Rudin", "authorids": "~Jiachang_Liu1;~Sam_Rosen1;~Chudi_Zhong1;~Cynthia_Rudin1", "gender": "M;M;F;", "homepage": "https://jiachangliu.github.io/;https://samgrosen.github.io/;https://chudizhong.github.io/;", "dblp": "194/1565-1;344/6112;267/5474;", "google_scholar": "W_Zyr0AAAAAJ;;DXKNTLIAAAAJ;", "orcid": ";;;", "linkedin": ";sam-rosen-b1912714a/;;", "or_profile": "~Jiachang_Liu1;~Sam_Rosen1;~Chudi_Zhong1;~Cynthia_Rudin1", "aff": "Duke University;Duke University;Duke University;", "aff_domain": "duke.edu;duke.edu;duke.edu;", "position": "PhD student;PhD student;PhD student;", "bibtex": "@inproceedings{\nliu2023okridge,\ntitle={{OKR}idge: Scalable Optimal k-Sparse Ridge Regression},\nauthor={Jiachang Liu and Sam Rosen and Chudi Zhong and Cynthia Rudin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BHxsP5fSHv}\n}", "github": "", "project": "", "reviewers": "kX5s;Nrfj;ono9;MGha;fUoX;X2ir;GTWb", "pdf_size": 2821622, "rating": "6;7;7;7;7;8;8", "confidence": "2;4;4;4;2;3;3", "soundness": "2;3;4;3;2;4;4", "novelty": "2;3;4;3;4;4;3", "presentation": "2;3;4;4;2;4;4", "wc_summary": "72;85;40;182;68;154;157", "wc_strengths": "19;87;86;70;105;77;225", "wc_weaknesses": "54;54;36;397;394;69;184", "wc_questions": "2;141;7;26;72;74;156", "wc_limitations": "6;56;19;20;1;14;27", "wc_review": "153;423;188;695;640;388;749", "wc_reply_reviewers": "9;20;0;50;12;67;179", "wc_reply_authors": "0;0;0;0;0;0;0", "reply_reviewers": "1;1;0;1;1;1;1", "reply_authors": "1;1;1;1;1;1;1", "rating_avg": [ 7.142857142857143, 0.6388765649999398 ], "confidence_avg": [ 3.142857142857143, 0.8329931278350429 ], "soundness_avg": [ 3.142857142857143, 0.8329931278350429 ], "novelty_avg": [ 3.2857142857142856, 0.6998542122237652 ], "presentation_avg": [ 3.2857142857142856, 0.880630571852711 ], "wc_summary_avg": [ 108.28571428571429, 50.7675776335203 ], "wc_strengths_avg": [ 95.57142857142857, 58.39240934734373 ], "wc_weaknesses_avg": [ 169.71428571428572, 149.74713380106027 ], "wc_questions_avg": [ 68.28571428571429, 57.27805435072042 ], "wc_limitations_avg": [ 20.428571428571427, 16.637798127475047 ], "wc_review_avg": [ 462.2857142857143, 222.2917736963319 ], "wc_reply_reviewers_avg": [ 48.142857142857146, 57.85555553378128 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8571428571428571, 0.3499271061118826 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2300894966542111, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4795416116546198334&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "duke.edu;duke.edu;duke.edu;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Flow: Per-instance Personalized Federated Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72489", "id": "BI031mw7iS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3baf4eeffad860ca9c54aeab632716b4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BI031mw7iS", "openreview": "https://openreview.net/forum?id=BI031mw7iS", "poster": "/media/PosterPDFs/NeurIPS%202023/72489.png?t=1696427478.5718153", "slides": "https://nips.cc/virtual/2023/poster/72489", "video": "https://nips.cc/virtual/2023/poster/72489", "author_site": "Kunjal Panchal, Sunav Choudhary, Nisarg Parikh, Lijun Zhang, Hui Guan", "tldr": "", "abstract": "Federated learning (FL) suffers from data heterogeneity, where the diverse data distributions across clients make it challenging to train a single global model effectively. Existing personalization approaches aim to address the data heterogeneity issue by creating a personalized model for each client from the global model that fits their local data distribution. However, these personalized models may achieve lower accuracy than the global model in some clients, resulting in limited performance improvement compared to that without personalization. To overcome this limitation, we propose a per-instance personalization FL algorithm Flow. Flow creates dynamic personalized models that are adaptive not only to each client\u2019s data distributions but also to each client\u2019s data instances. The personalized model allows each instance to dynamically determine whether it prefers the local parameters or its global counterpart to make correct predictions, thereby improving clients\u2019\naccuracy. We provide theoretical analysis on the convergence of Flow and empirically demonstrate the superiority of Flow in improving clients\u2019 accuracy compared to state-of-the-art personalization approaches on both vision and language-based tasks.", "keywords": "federated learning;personalization;statistical heterogeneity;dynamic routing", "primary_area": "", "supplementary_material": "/attachment/0c009dd9be74c2bc38a28fa9177185e4446f0042.pdf", "author": "Kunjal Panchal;Sunav Choudhary;Nisarg Parikh;Lijun Zhang;Hui Guan", "authorids": "~Kunjal_Panchal1;~Sunav_Choudhary1;~Nisarg_Parikh1;~Lijun_Zhang4;~Hui_Guan1", "gender": "F;;M;F;F", "homepage": "https://astuary.github.io/Kunjal;;https://github.com/namelessCrusader?tab=repositories;https://zhanglijun95.github.io/resume/;https://guanh01.github.io/", "dblp": "277/6382.html;;;;77/6645-1.html", "google_scholar": "QdPrnDgAAAAJ;;;;L2P0jCsAAAAJ", "orcid": "0000-0003-4325-1072;;;;0000-0001-9128-2231", "linkedin": "kunjal-panchal/;;nisargparikh-/;;", "or_profile": "~Kunjal_Panchal1;~Sunav_Choudhary1;~Nisarg_Parikh1;~Lijun_Zhang4;~Hui_Guan1", "aff": "University of Massachusetts at Amherst;;Northeastern University;University of Massachusetts, Amherst;University of Massachusetts, Amherst", "aff_domain": "cs.umass.edu;;neu.edu;umass.edu;umass.edu", "position": "PhD student;;MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\npanchal2023flow,\ntitle={Flow: Per-instance Personalized Federated Learning},\nauthor={Kunjal Panchal and Sunav Choudhary and Nisarg Parikh and Lijun Zhang and Hui Guan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BI031mw7iS}\n}", "github": "", "project": "", "reviewers": "MbeB;iGNe;cLqQ;QKWb", "pdf_size": 1577442, "rating": "5;5;6;6", "confidence": "4;4;5;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "69;128;68;80", "wc_strengths": "109;49;80;112", "wc_weaknesses": "56;334;40;45", "wc_questions": "55;91;142;2", "wc_limitations": "2;7;28;2", "wc_review": "291;609;358;241", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.25, 24.55987581401828 ], "wc_strengths_avg": [ 87.5, 25.5 ], "wc_weaknesses_avg": [ 118.75, 124.40935455181817 ], "wc_questions_avg": [ 72.5, 51.11017511220246 ], "wc_limitations_avg": [ 9.75, 10.732543966832841 ], "wc_review_avg": [ 374.75, 141.47150773212252 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5925235038877822759&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "cs.umass.edu;;neu.edu;umass.edu;umass.edu", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Massachusetts Amherst;Northeastern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.umass.edu;https://www.northeastern.edu", "aff_unique_abbr": "UMass Amherst;NEU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Recommender Systems with Generative Retrieval", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72488", "id": "BJ0fQUU32w", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/20dcab0f14046a5c6b02b61da9f13229-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BJ0fQUU32w", "openreview": "https://openreview.net/forum?id=BJ0fQUU32w", "poster": "/media/PosterPDFs/NeurIPS%202023/72488.png?t=1702193689.6373792", "slides": "https://nips.cc/virtual/2023/poster/72488", "video": "https://nips.cc/virtual/2023/poster/72488", "author_site": "Shashank Rajput, Nikhil Mehta, Anima Singh, Raghunandan Hulikal Keshavan, Trung Vu, Lukasz Heldt, Lichan Hong, Yi Tay, Vinh Tran, Jonah Samost, Maciej Kula, Ed Chi, Maheswaran Sathiamoorthy", "tldr": "", "abstract": "Modern recommender systems perform large-scale retrieval by embedding queries and item candidates in the same unified space, followed by approximate nearest neighbor search to select top candidates given a query embedding. In this paper, we propose a novel generative retrieval approach, where the retrieval model autoregressively decodes the identifiers of the target candidates. To that end, we create semantically meaningful tuple of codewords to serve as a Semantic ID for each item. Given Semantic IDs for items in a user session, a Transformer-based sequence-to-sequence model is trained to predict the Semantic ID of the next item that the user will interact with. We show that recommender systems trained with the proposed paradigm significantly outperform the current SOTA models on various datasets. In addition, we show that incorporating Semantic IDs into the sequence-to-sequence model enhances its ability to generalize, as evidenced by the improved retrieval performance observed for items with no prior interaction history.", "keywords": "Recommender Systems;Generative Retrieval;Vector Quantization", "primary_area": "", "supplementary_material": "/attachment/cd80753e2b9ef3e6920ce7ab2bddd17113976f27.pdf", "author": "Shashank Rajput;Nikhil Mehta;Anima Singh;Raghunandan Hulikal Keshavan;Trung Vu;Lukasz Heldt;Lichan Hong;Yi Tay;Vinh Q. Tran;Jonah Samost;Maciej Kula;Ed H. Chi;Maheswaran Sathiamoorthy", "authorids": "~Shashank_Rajput1;~Nikhil_Mehta1;~Anima_Singh1;~Raghunandan_Hulikal_Keshavan1;~Trung_Vu1;~Lukasz_Heldt1;~Lichan_Hong1;~Yi_Tay1;~Vinh_Q._Tran1;~Jonah_Samost1;~Maciej_Kula1;~Ed_H._Chi1;~Maheswaran_Sathiamoorthy1", "gender": "M;M;;M;M;;M;M;M;M;;M;M", "homepage": "https://pages.cs.wisc.edu/~srajput/;;;https://scholar.google.com/citations?user=PCtRSvUAAAAJ&hl=en;;https://www.linkedin.com/in/lukasz-heldt-6226691;;http://yitay.net;https://vqtran.github.io;http://google.com;https://github.com/maciejkula;http://smahesh.com;http://edchi.net", "dblp": "241/5361;89/7487-2;;;;63/5281;85/4697;;77/2885-2.html;;;;13/310", "google_scholar": "qEXxyDQAAAAJ;4cgHaJ0AAAAJ;;;;;https://scholar.google.com/citations?view_op=list_works;VBclY_cAAAAJ;ot3WsOwAAAAJ;;;HLkvYl0AAAAJ;VuWl-KUAAAAJ", "orcid": ";;;;;;;;;;;;0000-0003-3230-5338", "linkedin": ";;anima-singh-b36ba62b;;trung-vu-72103b8a/;;lichanhong/;;vinh-tran-32597468/;;;smaheswaran;edchi/", "or_profile": "~Shashank_Rajput1;~Nikhil_Mehta1;~Anima_Singh1;~Raghunandan_Hulikal_Keshavan1;~Trung_Vu1;~Lukasz_Heldt1;~Lichan_Hong1;~Yi_Tay1;~Vinh_Q._Tran1;~Jonah_Samost1;~Maciej_Kula1;~Maheswaran_Sathiamoorthy1;~Ed_Chi1", "aff": "University of Wisconsin, Madison;Research, Google;Google;;;;Google ;Google;Google;;Google;;Google", "aff_domain": "wisc.edu;research.google.com;google.com;;;;google.com;google.com;google.com;;google.com;;google.com", "position": "PhD student;Researcher;Researcher;;;;Software Engineer;Research Scientist;Researcher;;Intern;;Researcher", "bibtex": "@inproceedings{\nrajput2023recommender,\ntitle={Recommender Systems with Generative Retrieval},\nauthor={Shashank Rajput and Nikhil Mehta and Anima Singh and Raghunandan Hulikal Keshavan and Trung Vu and Lukasz Heldt and Lichan Hong and Yi Tay and Vinh Q. Tran and Jonah Samost and Maciej Kula and Ed H. Chi and Maheswaran Sathiamoorthy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BJ0fQUU32w}\n}", "github": "", "project": "", "reviewers": "mHfk;5EdZ;aLHH;VYRN;5DSM", "pdf_size": 2323169, "rating": "2;3;4;5;7", "confidence": "5;4;4;3;5", "soundness": "3;3;2;3;3", "novelty": "1;1;2;3;3", "presentation": "3;3;3;3;4", "wc_summary": "38;114;26;100;59", "wc_strengths": "18;45;46;89;37", "wc_weaknesses": "135;178;120;329;24", "wc_questions": "3;72;42;6;83", "wc_limitations": "1;1;1;6;187", "wc_review": "195;410;235;530;390", "wc_reply_reviewers": "253;112;0;0;52", "wc_reply_authors": "345;309;0;176;278", "reply_reviewers": "1;1;0;0;1", "reply_authors": "2;3;1;2;2", "rating_avg": [ 4.2, 1.7204650534085253 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.0, 0.8944271909999159 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 67.4, 34.30218651922935 ], "wc_strengths_avg": [ 47.0, 23.280893453645632 ], "wc_weaknesses_avg": [ 157.2, 99.56585760189083 ], "wc_questions_avg": [ 41.2, 32.84752654310519 ], "wc_limitations_avg": [ 39.2, 73.92536777047512 ], "wc_review_avg": [ 352.0, 122.33560397529412 ], "wc_reply_reviewers_avg": [ 83.4, 94.31776078766926 ], "wc_reply_authors_avg": [ 221.6, 124.28451230945875 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.031068488300060027, "gs_citation": 189, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8228948348343790806&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "wisc.edu;research.google.com;google.com;;;;google.com;google.com;google.com;;google.com;;google.com", "author_num": 13, "aff_unique_index": "0;1;1;1;1;1;1;1", "aff_unique_norm": "University of Wisconsin;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.wisc.edu;https://research.google", "aff_unique_abbr": "UW;Google", "aff_campus_unique_index": "0;1;1;1;1;1;1;1", "aff_campus_unique": "Madison;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Retaining Beneficial Information from Detrimental Data for Neural Network Repair", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72487", "id": "BJ1vOqh3hJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/964b1c8dd5667fd647c09c8772829fd1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BJ1vOqh3hJ", "openreview": "https://openreview.net/forum?id=BJ1vOqh3hJ", "poster": "/media/PosterPDFs/NeurIPS%202023/72487.png?t=1701782443.1361854", "slides": "https://nips.cc/virtual/2023/poster/72487", "video": "https://nips.cc/virtual/2023/poster/72487", "author_site": "Long-Kai Huang, Peilin Zhao, Junzhou Huang, Sinno Pan", "tldr": "", "abstract": "The performance of deep learning models heavily relies on the quality of the training data. Inadequacies in the training data, such as corrupt input or noisy labels, can lead to the failure of model generalization. Recent studies propose repairing the model by identifying the training samples that contribute to the failure and removing their influence from the model. However, it is important to note that the identified data may contain both beneficial and detrimental information. Simply erasing the information of the identified data from the model can have a negative impact on its performance, especially when accurate data is mistakenly identified as detrimental and removed. To overcome this challenge, we propose a novel approach that leverages the knowledge obtained from a retained clean set. Our method first identifies harmful data by utilizing the clean set, then separates the beneficial and detrimental information within the identified data. Finally, we utilize the extracted beneficial information to enhance the model's performance. Through empirical evaluations, we demonstrate that our method outperforms baseline approaches in both identifying harmful data and rectifying model failures. Particularly in scenarios where identification is challenging and a significant amount of benign data is involved, our method improves performance while the baselines deteriorate due to the erroneous removal of beneficial information.", "keywords": "Model Repair; Fine-tuning", "primary_area": "", "supplementary_material": "", "author": "Long-Kai Huang;Peilin Zhao;Junzhou Huang;Sinno Pan", "authorids": "~Long-Kai_Huang1;~Peilin_Zhao2;~Junzhou_Huang2;~Sinno_Pan1", "gender": ";;M;M", "homepage": "https://sites.google.com/site/longkaihugo/home;;http://ranger.uta.edu/~huang/;http://www.cse.cuhk.edu.hk/~sinnopan/", "dblp": "133/2006;84/8411;22/1170.html;80/5412", "google_scholar": "CaP64WUAAAAJ;https://scholar.google.com.hk/citations?user=HPeX_YcAAAAJ;https://scholar.google.com.tw/citations?user=X7KrguAAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-5263-1443;0000-0001-8543-3953;0000-0002-9548-1227;", "linkedin": ";;;", "or_profile": "~Long-Kai_Huang1;~Peilin_Zhao2;~Junzhou_Huang2;~Sinno_Pan1", "aff": "Tencent;Tencent;University of Texas, Arlington;Nanyang Technological University", "aff_domain": "tencent.com;tencent.com;uta.edu;ntu.edu.sg", "position": "Researcher;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2023retaining,\ntitle={Retaining Beneficial Information from Detrimental Data for Neural Network Repair},\nauthor={Long-Kai Huang and Peilin Zhao and Junzhou Huang and Sinno Pan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BJ1vOqh3hJ}\n}", "github": "", "project": "", "reviewers": "9imB;7x7R;uAW7;usXi", "pdf_size": 725459, "rating": "5;6;8;8", "confidence": "3;4;4;4", "soundness": "3;2;4;3", "novelty": "3;2;3;3", "presentation": "3;2;3;4", "wc_summary": "59;94;89;221", "wc_strengths": "19;82;32;42", "wc_weaknesses": "163;339;93;36", "wc_questions": "208;9;406;94", "wc_limitations": "8;21;5;8", "wc_review": "457;545;625;401", "wc_reply_reviewers": "9;36;19;29", "wc_reply_authors": "46;0;16;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 115.75, 62.22288566114561 ], "wc_strengths_avg": [ 43.75, 23.54118731075389 ], "wc_weaknesses_avg": [ 157.75, 113.90209611767467 ], "wc_questions_avg": [ 179.25, 148.7403358205164 ], "wc_limitations_avg": [ 10.5, 6.18465843842649 ], "wc_review_avg": [ 507.0, 85.29947244854449 ], "wc_reply_reviewers_avg": [ 23.25, 10.207227831296802 ], "wc_reply_authors_avg": [ 15.5, 18.78163997099295 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7777777777777777, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XLwTG3iTsEAJ:scholar.google.com/&scioq=Retaining+Beneficial+Information+from+Detrimental+Data+for+Neural+Network+Repair&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "tencent.com;tencent.com;uta.edu;ntu.edu.sg", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Tencent;University of Texas at Arlington;Nanyang Technological University", "aff_unique_dep": "Tencent Holdings Limited;;", "aff_unique_url": "https://www.tencent.com;https://www.uta.edu;https://www.ntu.edu.sg", "aff_unique_abbr": "Tencent;UTA;NTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Arlington", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "China;United States;Singapore" }, { "title": "Outlier-Robust Gromov-Wasserstein for Graph Data", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72486", "id": "BKAFLUcpBS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4e429936318af03ae99c01c90e2604ec-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BKAFLUcpBS", "openreview": "https://openreview.net/forum?id=BKAFLUcpBS", "poster": "/media/PosterPDFs/NeurIPS%202023/72486.png?t=1701937320.8014004", "slides": "https://nips.cc/virtual/2023/poster/72486", "video": "https://nips.cc/virtual/2023/poster/72486", "author_site": "Lemin Kong, Jiajin Li, Jianheng Tang, Anthony Man-Cho So", "tldr": "", "abstract": "Gromov-Wasserstein (GW) distance is a powerful tool for comparing and aligning probability distributions supported on different metric spaces. Recently, GW has become the main modeling technique for aligning heterogeneous data for a wide range of graph learning tasks. However, the GW distance is known to be highly sensitive to outliers, which can result in large inaccuracies if the outliers are given the same weight as other samples in the objective function. To mitigate this issue, we introduce a new and robust version of the GW distance called RGW. RGW features optimistically perturbed marginal constraints within a Kullback-Leibler divergence-based ambiguity set. To make the benefits of RGW more accessible in practice, we develop a computationally efficient and theoretically provable procedure using Bregman proximal alternating linearized minimization algorithm. Through extensive experimentation, we validate our theoretical results and demonstrate the effectiveness of RGW on real-world graph learning tasks, such as subgraph matching and partial shape correspondence.", "keywords": "Gromov Wasserstein;Robust Optimization;Nonconvex Optimization", "primary_area": "", "supplementary_material": "/attachment/9e3cdb0421aa78549356c5792c84052e9ff34894.zip", "author": "Lemin Kong;Jiajin Li;Jianheng Tang;Anthony Man-Cho So", "authorids": "~Lemin_Kong1;~Jiajin_Li2;~Jianheng_Tang1;~Anthony_Man-Cho_So1", "gender": ";F;M;M", "homepage": ";https://gerrili1996.github.io/;https://squareroot3.github.io/;http://www1.se.cuhk.edu.hk/~manchoso/", "dblp": "320/8260;;;82/3202", "google_scholar": ";;w4kWvXEAAAAJ;https://scholar.google.com.hk/citations?user=whi3UisAAAAJ", "orcid": ";;0000-0001-9341-7312;0000-0003-2588-7851", "linkedin": "lemin-kong/;;;", "or_profile": "~Lemin_Kong1;~Jiajin_Li2;~Jianheng_Tang1;~Anthony_Man-Cho_So1", "aff": "Chinese University of Hong Kong, The Chinese University of Hong Kong;Stanford University;Hong Kong University of Science and Technology;The Chinese University of Hong Kong", "aff_domain": "se.cuhk.edu.hk;stanford.edu;ust.hk;cuhk.edu.hk", "position": "PhD student;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nkong2023outlierrobust,\ntitle={Outlier-Robust Gromov-Wasserstein for Graph Data},\nauthor={Lemin Kong and Jiajin Li and Jianheng Tang and Anthony Man-Cho So},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BKAFLUcpBS}\n}", "github": "", "project": "", "reviewers": "we1a;YxnT;1vP9;Lqoh", "pdf_size": 2722473, "rating": "4;7;7;8", "confidence": "4;4;5;4", "soundness": "2;3;2;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "53;71;150;71", "wc_strengths": "58;57;140;33", "wc_weaknesses": "46;49;474;18", "wc_questions": "431;232;47;92", "wc_limitations": "22;9;46;14", "wc_review": "610;418;857;228", "wc_reply_reviewers": "11;38;213;21", "wc_reply_authors": "32;0;640;0", "reply_reviewers": "1;1;2;1", "reply_authors": "2;1;3;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.25, 37.53248592885902 ], "wc_strengths_avg": [ 72.0, 40.51542915976579 ], "wc_weaknesses_avg": [ 146.75, 189.32429189092454 ], "wc_questions_avg": [ 200.5, 149.54681541243198 ], "wc_limitations_avg": [ 22.75, 14.201672436723781 ], "wc_review_avg": [ 528.25, 232.9510409935959 ], "wc_reply_reviewers_avg": [ 70.75, 82.69333407234225 ], "wc_reply_authors_avg": [ 168.0, 272.82228647967895 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6619608426422501508&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "se.cuhk.edu.hk;stanford.edu;ust.hk;cuhk.edu.hk", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Chinese University of Hong Kong;Stanford University;Hong Kong University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.stanford.edu;https://www.ust.hk", "aff_unique_abbr": "CUHK;Stanford;HKUST", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Hong Kong SAR;Stanford", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Fast Model DeBias with Machine Unlearning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72485", "id": "BL9Pc7xsdX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2ecc80084c96cc25b11b0ab995c25f47-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BL9Pc7xsdX", "openreview": "https://openreview.net/forum?id=BL9Pc7xsdX", "poster": "/media/PosterPDFs/NeurIPS%202023/72485.png?t=1698396163.1493788", "slides": "https://nips.cc/virtual/2023/poster/72485", "video": "https://nips.cc/virtual/2023/poster/72485", "author_site": "Ruizhe Chen, Jianfei Yang, Huimin Xiong, Jianhong Bai, Tianxiang Hu, Jin Hao, YANG FENG, Joey Tianyi Zhou, Jian Wu, Zuozhu Liu", "tldr": "", "abstract": "Recent discoveries have revealed that deep neural networks might behave in a biased manner in many real-world scenarios. For instance, deep networks trained on a large-scale face recognition dataset CelebA tend to predict blonde hair for females and black hair for males. Such biases not only jeopardize the robustness of models but also perpetuate and amplify social biases, which is especially concerning for automated decision-making processes in healthcare, recruitment, etc., as they could exacerbate unfair economic and social inequalities among different groups. Existing debiasing methods suffer from high costs in bias labeling or model re-training, while also exhibiting a deficiency in terms of elucidating the origins of biases within the model. To this respect, we propose a fast model debiasing method (FMD) which offers an efficient approach to identify, evaluate and remove biases inherent in trained models. The FMD identifies biased attributes through an explicit counterfactual concept and quantifies the influence of data samples with influence functions. Moreover, we design a machine unlearning-based strategy to efficiently and effectively remove the bias in a trained model with a small counterfactual dataset. \nExperiments on the Colored MNIST, CelebA, and Adult Income datasets demonstrate that our method achieves superior or competing classification accuracies compared with state-of-the-art retraining-based methods while attaining significantly fewer biases and requiring much less debiasing cost. Notably, our method requires only a small external dataset and updating a minimal amount of model parameters, without the requirement of access to training data that may be too large or unavailable in practice.", "keywords": "Model Debias;Bias Mitigation;Machine Unlearning;Counterfactual Fairness", "primary_area": "", "supplementary_material": "/attachment/7223355deb88ab6b226d702abaff4fe495513038.pdf", "author": "Ruizhe Chen;Jianfei Yang;Huimin Xiong;Jianhong Bai;Tianxiang Hu;Jin Hao;YANG FENG;Joey Tianyi Zhou;Jian Wu;Zuozhu Liu", "authorids": "~Ruizhe_Chen1;~Jianfei_Yang4;~Huimin_Xiong1;~Jianhong_Bai2;~Tianxiang_Hu2;~Jin_Hao1;~YANG_FENG6;~Joey_Tianyi_Zhou1;~Jian_Wu6;~Zuozhu_Liu1", "gender": "M;F;M;M;M;M;M;M;M;M", "homepage": "https://www.linkedin.com/in/ruizhe-chen-015887275/;https://www.homepage.org/profile/;https://jianhongbai.github.io/;https://t5hu.github.io/;;;https://joeyzhouty.github.io/;https://scholar.google.com/citations?hl=zh-TW&user=VO9XIXYAAAAJ;https://person.zju.edu.cn/en/lzz;https://marsyang.site/", "dblp": "286/3608;;349/0391;;86/1845;;123/5110;96/2744-1;173/9297;06/5852.html", "google_scholar": "Wr2K2sMAAAAJ;;U926UgYAAAAJ;;RBcwDr8AAAAJ;;https://scholar.google.com.sg/citations?user=cYNqDokAAAAJ;https://scholar.google.com/citations?hl=zh-TW;h602wLIAAAAJ;https://scholar.google.com.sg/citations?user=V25k08UAAAAJ", "orcid": "0000-0003-2302-6775;;0000-0002-3121-7259;;0000-0002-6685-2017;;0000-0002-4675-7055;;0000-0002-7816-502X;0000-0002-8075-0439", "linkedin": ";;;;;https://www.linkedin.cn/incareer/in/%E6%B4%8B-%E5%86%AF-797451b0;;;;jianfei-yang-55560386/", "or_profile": "~Ruizhe_Chen1;~Huimin_Xiong1;~Jianhong_Bai2;~Tianxiang_Hu2;~Jin_Hao1;~YANG_FENG6;~Joey_Tianyi_Zhou1;~Jian_Wu6;~Zuozhu_Liu1;~JIANFEI_YANG3", "aff": "Zhejiang University;Zhejiang University;Zhejiang University; Zhejiang University-University of Illinois Urbana-Champaign Institute, Zhejiang University;Stanford University;;A*STAR Centre for Frontier AI Research;Zhejiang University;Zhejiang University;Nanyang Technological University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;intl.zju.edu.cn;stanford.edu;;cfar.a-star.edu.sg;zju.edu.cn;zju.edu.cn;ntu.edu.sg", "position": "PhD student;PhD student;PhD student;Intern;Postdoc;;Principal Researcher;Full Professor;Assistant Professor;Presidential Postdoc", "bibtex": "@inproceedings{\nchen2023fast,\ntitle={Fast Model DeBias with Machine Unlearning},\nauthor={Ruizhe Chen and Jianfei Yang and Huimin Xiong and Jianhong Bai and Tianxiang Hu and Jin Hao and YANG FENG and Joey Tianyi Zhou and Jian Wu and Zuozhu Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BL9Pc7xsdX}\n}", "github": "", "project": "", "reviewers": "hkdb;hEy4;AWfs;NRrd;2Mrc", "pdf_size": 1788949, "rating": "5;6;6;7;8", "confidence": "4;4;5;3;4", "soundness": "2;3;2;3;3", "novelty": "2;3;3;3;3", "presentation": "3;2;3;2;3", "wc_summary": "103;108;134;76;99", "wc_strengths": "70;105;78;51;106", "wc_weaknesses": "620;31;279;85;132", "wc_questions": "191;192;58;1;75", "wc_limitations": "45;252;6;1;4", "wc_review": "1029;688;555;214;416", "wc_reply_reviewers": "92;23;619;16;163", "wc_reply_authors": "198;58;1837;48;107", "reply_reviewers": "1;1;2;1;1", "reply_authors": "3;2;7;2;2", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 104.0, 18.579558659989747 ], "wc_strengths_avg": [ 82.0, 21.099763031844695 ], "wc_weaknesses_avg": [ 229.4, 212.00433957822656 ], "wc_questions_avg": [ 103.4, 75.99631570017063 ], "wc_limitations_avg": [ 61.6, 96.54967633296344 ], "wc_review_avg": [ 580.4, 273.76676204389753 ], "wc_reply_reviewers_avg": [ 182.6, 224.6175416124039 ], "wc_reply_authors_avg": [ 449.6, 695.7282802933915 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 3.2, 1.9390719429665317 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.31008683647302115, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7985155277985219540&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;intl.zju.edu.cn;stanford.edu;;cfar.a-star.edu.sg;zju.edu.cn;zju.edu.cn;ntu.edu.sg", "author_num": 10, "aff_unique_index": "0;0;0;0;1;2;0;0;3", "aff_unique_norm": "Zhejiang University;Stanford University;A*STAR;Nanyang Technological University", "aff_unique_dep": ";;Centre for Frontier AI Research;", "aff_unique_url": "https://www.zju.edu.cn;https://www.stanford.edu;https://www.a-star.edu.sg;https://www.ntu.edu.sg", "aff_unique_abbr": "ZJU;Stanford;A*STAR;NTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;1;2;0;0;2", "aff_country_unique": "China;United States;Singapore" }, { "id": "BMVcW1IL9l", "title": "Semi-Supervised Semantic Segmentation via Marginal Contextual Information", "track": "main", "status": "Reject", "tldr": "", "abstract": "We present a novel confidence refinement scheme that enhances pseudo-labels in semi-supervised semantic segmentation. Unlike current leading methods, which filter pixels with low-confidence teacher predictions in isolation, our approach leverages the spatial correlation of labels in segmentation maps by grouping neighboring pixels and considering their pseudo-labels collectively. With this contextual information, our method, named S4MC, increases the amount of unlabeled data used during training while maintaining the quality of the pseudo-labels, all with negligible computational overhead. Through extensive experiments on standard benchmarks, we demonstrate that S4MC outperforms existing state-of-the-art semi-supervised learning approaches, offering a promising solution for reducing the cost of acquiring dense annotations. For example, S4MC achieves a substantial 6.34 mIoU improvement over the prior state-of-the-art method on PASCAL VOC 12 with 92 annotated images. The code to reproduce our experiments is available at https://s4mcontext.github.io/", "keywords": "Semantic segmentation;semi-supervised learning;contextual information;semi-supervised segmentation", "primary_area": "", "supplementary_material": "/attachment/ea2d4becfa325111fb170eee3c1fe97153da8b8c.pdf", "author": "Moshe Kimhi;Shai Kimhi;Evgenii Zheltonozhskii;Or Litany;Chaim Baskin", "authorids": "~Moshe_Kimhi2;~Shai_Kimhi1;~Evgenii_Zheltonozhskii1;~Or_Litany1;~Chaim_Baskin1", "gender": "Non-Binary;;M;M;M", "homepage": "https://mkimhi.github.io/;http://GitHub.com/shaithekimhi;https://evgeniizh.com/;http://orlitany.github.io;https://chaimbaskin.bgu.ac.il", "dblp": "317/6911.html;;205/2315;119/1476;205/2308", "google_scholar": "uz7qJbsAAAAJ;;1yHw4W0AAAAJ;https://scholar.google.co.il/citations?user=Ihs8dwsAAAAJ;https://scholar.google.co.il/citations?user=lfWCxJYAAAAJ", "orcid": "0009-0000-7645-7339;;0000-0002-5400-9321;;0000-0003-4341-5639", "linkedin": "moshekimhi/;;zheltonozhskiy/;;chaim-baskin-97441340/", "or_profile": "~Moshe_Kimhi2;~Shai_Kimhi1;~Evgenii_Zheltonozhskii1;~Or_Litany1;~Chaim_Baskin1", "aff": "Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;Technion - Israel Institute of Technology, Technion;Technion - Israel Institute of Technology, Technion;NVIDIA;Technion - Israel Institute of Technology, Technion", "aff_domain": "campus.technion.ac.il;technion.ac.il;technion.ac.il;nvidia.com;technion.ac.il", "position": "PhD student;Undergrad student;PhD student;Research Scientist;Postdoc", "bibtex": "@misc{\nkimhi2023semisupervised,\ntitle={Semi-Supervised Semantic Segmentation via Marginal Contextual Information},\nauthor={Moshe Kimhi and Shai Kimhi and Evgenii Zheltonozhskii and Or Litany and Chaim Baskin},\nyear={2023},\nurl={https://openreview.net/forum?id=BMVcW1IL9l}\n}", "github": "", "project": "", "reviewers": "RDV7;EHYQ;Utxo;k6ru", "site": "https://openreview.net/forum?id=BMVcW1IL9l", "pdf_size": 16184001, "rating": "4;5;6;6", "confidence": "3;4;5;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "54;97;60;62", "wc_strengths": "13;35;23;35", "wc_weaknesses": "165;67;362;7", "wc_questions": "3;5;4;162", "wc_limitations": "3;7;1;21", "wc_review": "238;211;450;287", "wc_reply_reviewers": "168;0;26;27", "wc_reply_authors": "24;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.25, 16.857861667483217 ], "wc_strengths_avg": [ 26.5, 9.205976319760984 ], "wc_weaknesses_avg": [ 150.25, 134.63538725015798 ], "wc_questions_avg": [ 43.5, 68.41966091702004 ], "wc_limitations_avg": [ 8.0, 7.810249675906654 ], "wc_review_avg": [ 296.5, 92.71596410543332 ], "wc_reply_reviewers_avg": [ 55.25, 65.99005606907755 ], "wc_reply_authors_avg": [ 6.0, 10.392304845413264 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11273827857411512363&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Technion - Israel Institute of Technology;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.nvidia.com", "aff_unique_abbr": "Technion;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Israel;United States" }, { "title": "VisoGender: A dataset for benchmarking gender bias in image-text pronoun resolution", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73666", "id": "BNwsJ4bFsc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c93f26b1381b17693055a611a513f1e9-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=BNwsJ4bFsc", "openreview": "https://openreview.net/forum?id=BNwsJ4bFsc", "poster": "/media/PosterPDFs/NeurIPS%202023/73666.png?t=1699613099.9199607", "slides": "https://nips.cc/virtual/2023/poster/73666", "video": "https://nips.cc/virtual/2023/poster/73666", "author_site": "Siobhan Mackenzie Hall, Fernanda Gon\u00e7alves Abrantes, Hanwen Zhu, Grace Sodunke, Aleksandar Shtedritski, Hannah Rose Kirk", "tldr": "", "abstract": "We introduce VisoGender, a novel dataset for benchmarking gender bias in vision-language models. We focus on occupation-related biases within a hegemonic system of binary gender, inspired by Winograd and Winogender schemas, where each image is associated with a caption containing a pronoun relationship of subjects and objects in the scene. VisoGender is balanced by gender representation in professional roles, supporting bias evaluation in two ways: i) resolution bias, where we evaluate the difference between pronoun resolution accuracies for image subjects with gender presentations perceived as masculine versus feminine by human annotators and ii) retrieval bias, where we compare ratios of professionals perceived to have masculine and feminine gender presentations retrieved for a gender-neutral search query. We benchmark several state-of-the-art vision-language models and find that they demonstrate bias in resolving binary gender in complex scenes. While the direction and magnitude of gender bias depends on the task and the model being evaluated, captioning models are generally less biased than Vision-Language Encoders.", "keywords": "fairness;vision-language;bias;benchmark", "primary_area": "", "supplementary_material": "", "author": "Siobhan Mackenzie Hall;Fernanda Gon\u00e7alves Abrantes;Hanwen Zhu;Grace Sodunke;Aleksandar Shtedritski;Hannah Rose Kirk", "authorids": "~Siobhan_Mackenzie_Hall1;~Fernanda_Gon\u00e7alves_Abrantes1;~Hanwen_Zhu2;~Grace_Sodunke1;~Aleksandar_Shtedritski1;~Hannah_Rose_Kirk1", "gender": ";F;;F;M;F", "homepage": ";;https://thomaszhu.cn;;;https://www.hannahrosekirk.com/", "dblp": ";;;;284/9612;284/9434", "google_scholar": "A9c8wlwAAAAJ;https://scholar.google.com/citations?hl=pt-BR;_UrS_fIAAAAJ;;cGnonsQAAAAJ;Fha8ldEAAAAJ", "orcid": "0000-0002-1520-4220;0000-0002-7318-482X;0009-0006-6179-4555;0009-0004-6788-5250;;0000-0002-7419-5993", "linkedin": "siobhan-hall-805255bb/;fernanda-gon\u00e7alves-abrantes-312679207/?originalSubdomain=uk;;grace-sodunke/;;hannah-rose-kirk", "or_profile": "~Siobhan_Mackenzie_Hall1;~Fernanda_Gon\u00e7alves_Abrantes1;~Hanwen_Zhu2;~Grace_Sodunke1;~Aleksandar_Shtedritski1;~Hannah_Rose_Kirk1", "aff": "University of Oxford;University of Oxford;University of Oxford;University of Oxford;University of Oxford;Alan Turing Institute", "aff_domain": "ox.ac.uk;ox.ac.uk;ox.ac.uk;ox.ac.uk;oxford.ac.uk;turing.ac.uk", "position": "PhD student;PhD student;Undergrad student;Undergrad student;PhD student;Researcher", "bibtex": "@inproceedings{\nhall2023visogender,\ntitle={VisoGender: A dataset for benchmarking gender bias in image-text pronoun resolution},\nauthor={Siobhan Mackenzie Hall and Fernanda Gon{\\c{c}}alves Abrantes and Hanwen Zhu and Grace Sodunke and Aleksandar Shtedritski and Hannah Rose Kirk},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=BNwsJ4bFsc}\n}", "github": "", "project": "", "reviewers": "BTLm;aXST;6G4g;EykH;Qbui", "pdf_size": 1344247, "rating": "5;7;7;7;8", "confidence": "4;4;4;3;5", "wc_summary_and_contributions": "84;33;99;167;104", "wc_strengths": "96;106;74;54;37", "wc_improvement": "227;774;404;119;119", "wc_limitations": "124;31;1;10;7", "wc_correctness": "156;38;35;16;10", "wc_clarity": "8;240;295;12;46", "wc_relation_to_prior_work": "36;26;1;7;10", "wc_documentation": "93;92;1;17;14", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "825;1341;911;403;348", "wc_reply_reviewers": "183;112;13;138;28", "wc_reply_authors": "1451;781;560;567;740", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 97.4, 42.93530016198792 ], "wc_strengths_avg": [ 73.4, 25.593749236874224 ], "wc_improvement_avg": [ 328.6, 245.86711858237572 ], "wc_limitations_avg": [ 34.6, 45.82837548942795 ], "wc_correctness_avg": [ 51.0, 53.58357957434348 ], "wc_clarity_avg": [ 120.2, 122.23649209626396 ], "wc_relation_to_prior_work_avg": [ 16.0, 12.976902558006667 ], "wc_documentation_avg": [ 43.4, 40.450463532572776 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 765.6, 363.76453922833105 ], "wc_reply_reviewers_avg": [ 94.8, 64.95352184446969 ], "wc_reply_authors_avg": [ 819.8, 327.9301145061246 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3227486121839514, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17471244405259486123&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "ox.ac.uk;ox.ac.uk;ox.ac.uk;ox.ac.uk;oxford.ac.uk;turing.ac.uk", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "University of Oxford;Alan Turing Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.turing.ac.uk", "aff_unique_abbr": "Oxford;ATI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Uncovering and Quantifying Social Biases in Code Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72484", "id": "BOP5McdqGy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/071a637d41ea290ac4360818a8323f33-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BOP5McdqGy", "openreview": "https://openreview.net/forum?id=BOP5McdqGy", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72484", "video": "https://nips.cc/virtual/2023/poster/72484", "author_site": "Yan Liu, Xiaokang Chen, Yan Gao, Zhe Su, Fengji Zhang, Daoguang Zan, Jian-Guang Lou, Pin-Yu Chen, Tsung-Yi Ho", "tldr": "", "abstract": "With the popularity of automatic code generation tools, such as Copilot, the study of the potential hazards of these tools is gaining importance. In this work, we explore the social bias problem in pre-trained code generation models. We propose a new paradigm to construct code prompts and successfully uncover social biases in code generation models. To quantify the severity of social biases in generated code, we develop a dataset along with three metrics to evaluate the overall social bias and fine-grained unfairness across different demographics. Experimental results on three pre-trained code generation models (Codex, InCoder, and CodeGen) with varying sizes, reveal severe social biases. Moreover, we conduct analysis to provide useful insights for further choice of code generation models with low social bias.", "keywords": "Social Bias;Code Fairness", "primary_area": "", "supplementary_material": "/attachment/4169d40c9830f7031ff6e5ad7778276d170b42ed.pdf", "author": "Yan Liu;Xiaokang Chen;Yan Gao;Zhe Su;Fengji Zhang;Daoguang Zan;Jian-Guang Lou;Pin-Yu Chen;Tsung-Yi Ho", "authorids": "~Yan_Liu13;~Xiaokang_Chen1;~Yan_Gao7;~Zhe_Su2;~Fengji_Zhang1;~Daoguang_Zan1;~Jian-Guang_Lou1;~Pin-Yu_Chen1;~Tsung-Yi_Ho2", "gender": ";M;;M;M;M;M;M;M", "homepage": ";https://charlescxk.github.io/;;;https://github.com/zfj1998;;https://www.microsoft.com/en-us/research/people/jlou/;http://www.pinyuchen.com;https://www.cse.cuhk.edu.hk/people/faculty/tsung-yi-ho/", "dblp": ";163/6632;;;287/8086;305/5798;37/1917;39/8969;63/4181.html", "google_scholar": ";https://scholar.google.com.hk/citations?view_op=list_works;;zvcvNE0AAAAJ;plXSJ7IAAAAJ;https://scholar.google.com/citations?hl=zh-CN;alDxINIAAAAJ;jxwlCUUAAAAJ;TRDUYkAAAAAJ", "orcid": ";;;;;0009-0009-4269-8543;;0000-0003-1039-8369;0000-0001-7348-5625", "linkedin": ";;;zhe-su-b134b823a/;;;;pin-yu-chen-940062a2;", "or_profile": "~Yan_Liu13;~Xiaokang_Chen1;~Yan_Gao7;~Zhe_Su2;~Fengji_Zhang1;~Daoguang_Zan1;~Jian-Guang_Lou1;~Pin-Yu_Chen1;~Tsung-Yi_Ho2", "aff": ";Peking University;;Zhejiang University;City University of Hong Kong;Institute of Software, Chinese Academy of Sciences;Microsoft Research Asia;International Business Machines;Department of Computer Science and Engineering, The Chinese University of Hong Kong", "aff_domain": ";pku.edu.cn;;zju.edu.cn;cityu.edu.hk;ucas.ac.cn;microsoft.com;ibm.com;cse.cuhk.edu.hk", "position": ";PhD student;;Undergrad student;PhD student;PhD student;Principal Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nliu2023uncovering,\ntitle={Uncovering and Quantifying Social Biases in Code Generation},\nauthor={Yan Liu and Xiaokang Chen and Yan Gao and Zhe Su and Fengji Zhang and Daoguang Zan and Jian-Guang Lou and Pin-Yu Chen and Tsung-Yi Ho},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BOP5McdqGy}\n}", "github": "", "project": "", "reviewers": "6YHH;9vM7;AcqK;y9do", "pdf_size": 320154, "rating": "4;5;5;6", "confidence": "4;4;4;3", "soundness": "2;2;2;3", "novelty": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "71;119;91;82", "wc_strengths": "16;53;73;93", "wc_weaknesses": "77;149;77;305", "wc_questions": "11;98;33;76", "wc_limitations": "15;36;7;23", "wc_review": "190;455;281;579", "wc_reply_reviewers": "305;108;0;36", "wc_reply_authors": "920;48;0;20", "reply_reviewers": "2;1;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 90.75, 17.781661902083282 ], "wc_strengths_avg": [ 58.75, 28.44622119016865 ], "wc_weaknesses_avg": [ 152.0, 93.0967238950974 ], "wc_questions_avg": [ 54.5, 34.311076928595526 ], "wc_limitations_avg": [ 20.25, 10.709224995301948 ], "wc_review_avg": [ 376.25, 150.88965338948856 ], "wc_reply_reviewers_avg": [ 112.25, 117.88209151520853 ], "wc_reply_authors_avg": [ 247.0, 388.9305850662815 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10327376821542876021&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";pku.edu.cn;;zju.edu.cn;cityu.edu.hk;ucas.ac.cn;microsoft.com;ibm.com;cse.cuhk.edu.hk", "author_num": 9, "aff_unique_index": "0;1;2;3;4;5;6", "aff_unique_norm": "Peking University;Zhejiang University;City University of Hong Kong;Chinese Academy of Sciences;Microsoft;International Business Machines Corporation;Chinese University of Hong Kong", "aff_unique_dep": ";;;Institute of Software;Research;;Department of Computer Science and Engineering", "aff_unique_url": "http://www.pku.edu.cn;https://www.zju.edu.cn;https://www.cityu.edu.hk;http://www.ios.ac.cn;https://www.microsoft.com/en-us/research/group/asia;https://www.ibm.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "Peking U;ZJU;CityU;CAS;MSR Asia;IBM;CUHK", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Hong Kong SAR;Asia", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Identifiable Contrastive Learning with Automatic Feature Importance Discovery", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72483", "id": "BQA7wR2KBF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b6a171867138c80de2a35a6125d6757c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BQA7wR2KBF", "openreview": "https://openreview.net/forum?id=BQA7wR2KBF", "poster": "/media/PosterPDFs/NeurIPS%202023/72483.png?t=1701753944.328726", "slides": "https://nips.cc/virtual/2023/poster/72483", "video": "https://nips.cc/virtual/2023/poster/72483", "author_site": "Qi Zhang, Yifei Wang, Yisen Wang", "tldr": "", "abstract": "Existing contrastive learning methods rely on pairwise sample contrast $z_x^\\top z_{x'}$ to learn data representations, but the learned features often lack clear interpretability from a human perspective. Theoretically, it lacks feature identifiability and different initialization may lead to totally different features. In this paper, we study a new method named tri-factor contrastive learning (triCL) that involves a 3-factor contrast in the form of $z_x^\\top S z_{x'}$, where $S=\\text{diag}(s_1,\\dots,s_k)$ is a learnable diagonal matrix that automatically captures the importance of each feature. We show that by this simple extension, triCL can not only obtain identifiable features that eliminate randomness but also obtain more interpretable features that are ordered according to the importance matrix $S$. We show that features with high importance have nice interpretability by capturing common classwise features, and obtain superior performance when evaluated for image retrieval using a few features. The proposed triCL objective is general and can be applied to different contrastive learning methods like SimCLR and CLIP. We believe that it is a better alternative to existing 2-factor contrastive learning by improving its identifiability and interpretability with minimal overhead. Code is available at https://github.com/PKU-ML/Tri-factor-Contrastive-Learning.", "keywords": "Self-supervised Learning;Contrastive Learning;Identifiability;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/bdf011b079a50fa61e51fa5aa3eaa3c9b416beb5.pdf", "author": "Qi Zhang;Yifei Wang;Yisen Wang", "authorids": "~Qi_Zhang28;~Yifei_Wang1;~Yisen_Wang1", "gender": "M;M;M", "homepage": "https://yifeiwang77.com;https://yisenwang.github.io/;https://github.com/zhangq327", "dblp": "00/555-1;172/1346-1;", "google_scholar": "-CLy6YsAAAAJ;uMWPDboAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yifei_Wang1;~Yisen_Wang1;~zhang_qi2", "aff": "Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nzhang2023identifiable,\ntitle={Identifiable Contrastive Learning with Automatic Feature Importance Discovery},\nauthor={Qi Zhang and Yifei Wang and Yisen Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BQA7wR2KBF}\n}", "github": "", "project": "", "reviewers": "aafx;tkty;Zqsm;eV59", "pdf_size": 442578, "rating": "4;5;6;7", "confidence": "4;4;3;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "72;48;77;85", "wc_strengths": "13;19;58;90", "wc_weaknesses": "166;166;155;36", "wc_questions": "3;4;51;41", "wc_limitations": "1;15;30;1", "wc_review": "255;252;371;253", "wc_reply_reviewers": "26;117;56;0", "wc_reply_authors": "347;531;53;0", "reply_reviewers": "1;2;2;0", "reply_authors": "4;3;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.5, 13.793114224133722 ], "wc_strengths_avg": [ 45.0, 31.20096152364539 ], "wc_weaknesses_avg": [ 130.75, 54.88795405186825 ], "wc_questions_avg": [ 24.75, 21.545011023436494 ], "wc_limitations_avg": [ 11.75, 11.986972094736853 ], "wc_review_avg": [ 282.75, 50.962608842169765 ], "wc_reply_reviewers_avg": [ 49.75, 43.59114015485257 ], "wc_reply_authors_avg": [ 232.75, 217.07645542527177 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5568463507952202155&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "BoardgameQA: A Dataset for Natural Language Reasoning with Contradictory Information", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73665", "id": "BR1m3JIoKm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7adce80e86aa841490e6307109094de5-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=BR1m3JIoKm", "openreview": "https://openreview.net/forum?id=BR1m3JIoKm", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73665", "video": "https://nips.cc/virtual/2023/poster/73665", "author_site": "Mehran Kazemi, Quan Yuan, Deepti Bhatia, Najoung Kim, Xin Xu, Vaiva Imbrasaite, Deepak Ramachandran", "tldr": "", "abstract": "Automated reasoning with unstructured natural text is a key requirement for many potential applications of NLP and for developing robust AI systems. Recently, Language Models (LMs) have demonstrated complex reasoning capacities even without any finetuning. However, existing evaluation for automated reasoning assumes access to a consistent and coherent set of information over which models reason. When reasoning in the real-world, the available information is frequently inconsistent or contradictory, and therefore models need to be equipped with a strategy to resolve such conflicts when they arise. One widely-applicable way of resolving conflicts is to impose preferences over information sources (e.g., based on source credibility or information recency) and adopt the source with higher preference. In this paper, we formulate the problem of reasoning with contradictory information guided by preferences over sources as the classical problem of defeasible reasoning, and develop a dataset called BoardgameQA for measuring the reasoning capacity of LMs in this setting. BoardgameQA also incorporates reasoning with implicit background knowledge, to better reflect reasoning problems in downstream applications. We benchmark various LMs on BoardgameQA and the results reveal a significant gap in the reasoning capacity of state-of-the-art LMs on this problem, showing that reasoning with conflicting information does not surface out-of-the-box in LMs. While performance can be improved with finetuning, it nevertheless remains poor.", "keywords": "natural language reasoning;defeasible reasoning;large language models", "primary_area": "", "supplementary_material": "", "author": "Mehran Kazemi;Quan Yuan;Deepti Bhatia;Najoung Kim;Xin Xu;Vaiva Imbrasaite;Deepak Ramachandran", "authorids": "~Mehran_Kazemi1;~Quan_Yuan8;~Deepti_Bhatia2;~Najoung_Kim1;~Xin_Xu6;~Vaiva_Imbrasaite1;~Deepak_Ramachandran2", "gender": ";F;F;F;M;F;M", "homepage": "https://github.com/yq911122;https://najoungkim.github.io;;;;;https://mehran-k.github.io/", "dblp": ";194/1249;;139/8032;80/703;;149/1283", "google_scholar": ";Uod-_B8AAAAJ;CvFz5K0AAAAJ;https://scholar.google.ca/citations?user=oYSqHXEAAAAJ;WbM9EAIAAAAJ;6NjgfvIAAAAJ;https://scholar.google.ca/citations?user=L79ecZkAAAAJ", "orcid": ";;;;;;", "linkedin": ";;xin-xu-4b978152/;vaiva-imbrasaite-a89b3023/;;deepti;mehran-kazemi-64675071/", "or_profile": "~Quan_Yuan8;~Najoung_Kim1;~Xin_Xu6;~Vaiva_Imbrasaite1;~Deepak_Ramachandran2;~DEEPTI_BHATIA1;~Seyed_Mehran_Kazemi1", "aff": "Google;Google;;Google;Google;Google;Google", "aff_domain": "google.com;google.com;;google.com;google.com;google.com;google.com", "position": "Researcher;Researcher;;Researcher;Staff Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nkazemi2023boardgameqa,\ntitle={Boardgame{QA}: A Dataset for Natural Language Reasoning with Contradictory Information},\nauthor={Mehran Kazemi and Quan Yuan and Deepti Bhatia and Najoung Kim and Xin Xu and Vaiva Imbrasaite and Deepak Ramachandran},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=BR1m3JIoKm}\n}", "github": "", "project": "", "reviewers": "MReC;ayn7;Gau4;cpZF", "pdf_size": 1461684, "rating": "6;7;7;7", "confidence": "4;3;3;3", "wc_summary_and_contributions": "92;57;105;187", "wc_strengths": "119;33;46;185", "wc_improvement": "365;68;107;72", "wc_limitations": "15;8;27;29", "wc_correctness": "35;17;41;46", "wc_clarity": "40;4;17;6", "wc_relation_to_prior_work": "11;6;29;37", "wc_documentation": "18;15;12;18", "wc_additional_feedback": "1;1;1;1", "wc_review": "696;209;385;581", "wc_reply_reviewers": "0;14;0;0", "wc_reply_authors": "709;337;1005;21", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 110.25, 47.662222986344226 ], "wc_strengths_avg": [ 95.75, 61.07116750153054 ], "wc_improvement_avg": [ 153.0, 123.3349098998333 ], "wc_limitations_avg": [ 19.75, 8.642193008721803 ], "wc_correctness_avg": [ 34.75, 10.96300597464035 ], "wc_clarity_avg": [ 16.75, 14.306903927824496 ], "wc_relation_to_prior_work_avg": [ 20.75, 12.695963925594622 ], "wc_documentation_avg": [ 15.75, 2.48746859276655 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 467.75, 186.22751542132542 ], "wc_reply_reviewers_avg": [ 3.5, 6.06217782649107 ], "wc_reply_authors_avg": [ 518.0, 371.96101946306146 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3205168177373935472&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "email": "google.com;google.com;;google.com;google.com;google.com;google.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Optimal privacy guarantees for a relaxed threat model: Addressing sub-optimal adversaries in differentially private machine learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72482", "id": "BRSgVw85Mc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aea831d6c7af37fd4230937225be3414-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BRSgVw85Mc", "openreview": "https://openreview.net/forum?id=BRSgVw85Mc", "poster": "/media/PosterPDFs/NeurIPS%202023/72482.png?t=1701785977.9447715", "slides": "https://nips.cc/virtual/2023/poster/72482", "video": "https://nips.cc/virtual/2023/poster/72482", "author_site": "Georgios Kaissis, Alexander Ziller, Stefan Kolek, Anneliese Riess, Daniel Rueckert", "tldr": "", "abstract": "Differentially private mechanisms restrict the membership inference capabilities of powerful (optimal) adversaries against machine learning models. Such adversaries are rarely encountered in practice. In this work, we examine a more realistic threat model relaxation, where (sub-optimal) adversaries lack access to the exact model training database, but may possess related or partial data. We then formally characterise and experimentally validate adversarial membership inference capabilities in this setting in terms of hypothesis testing errors. Our work helps users to interpret the privacy properties of sensitive data processing systems under realistic threat model relaxations and choose appropriate noise levels for their use-case.", "keywords": "Differential Privacy;Membership Inference Attack;Hypothesis Testing;Data Reconstruction Attack;Security", "primary_area": "", "supplementary_material": "", "author": "Georgios Kaissis;Alexander Ziller;Stefan Kolek;Anneliese Riess;Daniel Rueckert", "authorids": "~Georgios_Kaissis1;~Alexander_Ziller1;~Stefan_Kolek1;anneliese.riess@helmholtz-munich.de;~Daniel_Rueckert2", "gender": ";M;M;;M", "homepage": ";;https://skmda37.github.io/;;https://aim-lab.io/author/daniel-ruckert/", "dblp": ";179/1249;304/2478;;69/2478", "google_scholar": ";https://scholar.google.de/citations?user=Ir90mU4AAAAJ;7umQNF8AAAAJ;;https://scholar.google.co.uk/citations?user=H0O0WnQAAAAJ", "orcid": ";0000-0002-3242-0195;;;", "linkedin": ";a1302z/;;;", "or_profile": "~Georgios_Kaissis1;~Alexander_Ziller1;~Stefan_Kolek1;anneliese.riess@helmholtz-munich.de;~Daniel_Rueckert2", "aff": ";Technical University Munich;Institut f\u00fcr Mathematik;;Imperial College London", "aff_domain": ";tum.de;lmu.de;;imperial.ac.uk", "position": ";PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nkaissis2023optimal,\ntitle={Optimal privacy guarantees for a relaxed threat model: Addressing sub-optimal adversaries in differentially private machine learning},\nauthor={Georgios Kaissis and Alexander Ziller and Stefan Kolek and Anneliese Riess and Daniel Rueckert},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BRSgVw85Mc}\n}", "github": "", "project": "", "reviewers": "WnyT;pkta;9uc5;5PdE", "pdf_size": 636672, "rating": "5;6;7;7", "confidence": "4;2;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "3;3;3;2", "wc_summary": "96;50;161;113", "wc_strengths": "96;12;108;31", "wc_weaknesses": "148;6;286;99", "wc_questions": "47;115;124;71", "wc_limitations": "8;1;59;9", "wc_review": "395;184;738;323", "wc_reply_reviewers": "117;32;114;34", "wc_reply_authors": "301;230;45;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 105.0, 39.70516339218364 ], "wc_strengths_avg": [ 61.75, 41.026668156212736 ], "wc_weaknesses_avg": [ 134.75, 101.12708588701645 ], "wc_questions_avg": [ 89.25, 31.578275760402118 ], "wc_limitations_avg": [ 19.25, 23.155722834755128 ], "wc_review_avg": [ 410.0, 203.9938724569932 ], "wc_reply_reviewers_avg": [ 74.25, 41.26969226926705 ], "wc_reply_authors_avg": [ 144.0, 125.08197312162932 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14885885357190087153&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";tum.de;lmu.de;;imperial.ac.uk", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Technical University of Munich;Institut f\u00fcr Mathematik;Imperial College London", "aff_unique_dep": ";Mathematics Department;", "aff_unique_url": "https://www.tum.de;;https://www.imperial.ac.uk", "aff_unique_abbr": "TUM;;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Germany;United Kingdom" }, { "title": "Passive learning of active causal strategies in agents and language models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72481", "id": "BRpi8YAfac", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/045c87def0c02e3ad0d3d849766d7f1e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BRpi8YAfac", "openreview": "https://openreview.net/forum?id=BRpi8YAfac", "poster": "/media/PosterPDFs/NeurIPS%202023/72481.png?t=1701422836.0201275", "slides": "https://nips.cc/virtual/2023/poster/72481", "video": "https://nips.cc/virtual/2023/poster/72481", "author_site": "Andrew Lampinen, Stephanie Chan, Ishita Dasgupta, Andrew Nam, Jane Wang", "tldr": "", "abstract": "What can be learned about causality and experimentation from passive data? This question is salient given recent successes of passively-trained language models in interactive domains such as tool use. Passive learning is inherently limited. However, we show that purely passive learning can in fact allow an agent to learn generalizable strategies for determining and using causal structures, as long as the agent can intervene at test time. We formally illustrate that learning a strategy of first experimenting, then seeking goals, can allow generalization from passive learning in principle. We then show empirically that agents trained via imitation on expert data can indeed generalize at test time to infer and use causal links which are never present in the training data; these agents can also generalize experimentation strategies to novel variable sets never observed in training.\nWe then show that strategies for causal intervention and exploitation can be generalized from passive data even in a more complex environment with high-dimensional observations, with the support of natural language explanations. Explanations can even allow passive learners to generalize out-of-distribution from perfectly-confounded training data. Finally, we show that language models, trained only on passive next-word prediction, can generalize causal intervention strategies from a few-shot prompt containing explanations and reasoning. These results highlight the surprising power of passive learning of active causal strategies, and have implications for understanding the behaviors and capabilities of language models.", "keywords": "passive; causal; offline; agency; language models", "primary_area": "", "supplementary_material": "/attachment/5c704ec5f5660592d3511fa90475eef7535582d8.pdf", "author": "Andrew Kyle Lampinen;Stephanie C.Y. Chan;Ishita Dasgupta;Andrew Joohun Nam;Jane X Wang", "authorids": "~Andrew_Kyle_Lampinen1;~Stephanie_C.Y._Chan1;~Ishita_Dasgupta1;~Andrew_Joohun_Nam1;~Jane_X_Wang1", "gender": "M;F;;M;F", "homepage": "https://github.com/google/BIG-bench;https://scychan.github.io/;;;http://www.janexwang.com", "dblp": "https://dblp.uni-trier.de/pers/hd/l/Lampinen:Andrew_K=;255/7866;169/6218;;88/10757", "google_scholar": "_N44XxAAAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.co.uk/citations?user=YizAq4gAAAAJ", "orcid": ";;;0000-0001-9860-4221;", "linkedin": ";scychan;idasgupta6/;;", "or_profile": "~Andrew_Kyle_Lampinen1;~Stephanie_C.Y._Chan1;~Ishita_Dasgupta1;~Andrew_Joohun_Nam1;~Jane_Wang1", "aff": "Google DeepMind;Google DeepMind;Google DeepMind;Stanford University;Google DeepMind", "aff_domain": "google.com;deepmind.com;deepmind.com;stanford.edu;google.com", "position": "Research Scientist;Research Scientist;Researcher;PhD student;Research Scientist", "bibtex": "@inproceedings{\nlampinen2023passive,\ntitle={Passive learning of active causal strategies in agents and language models},\nauthor={Andrew Kyle Lampinen and Stephanie C.Y. Chan and Ishita Dasgupta and Andrew Joohun Nam and Jane X Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BRpi8YAfac}\n}", "github": "", "project": "", "reviewers": "U5ax;X6cj;c7Wr;KP4B", "pdf_size": 549204, "rating": "7;7;7;8", "confidence": "3;4;4;5", "soundness": "3;3;3;4", "novelty": "3;4;3;4", "presentation": "4;3;3;4", "wc_summary": "86;208;79;112", "wc_strengths": "138;189;48;137", "wc_weaknesses": "138;845;10;306", "wc_questions": "109;149;1;4", "wc_limitations": "5;82;11;39", "wc_review": "476;1473;149;598", "wc_reply_reviewers": "11;42;0;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 121.25, 51.57215818637029 ], "wc_strengths_avg": [ 128.0, 50.749384232717546 ], "wc_weaknesses_avg": [ 324.75, 318.1802751585962 ], "wc_questions_avg": [ 65.75, 64.82042502174758 ], "wc_limitations_avg": [ 34.25, 30.408674749156695 ], "wc_review_avg": [ 674.0, 489.6442586204805 ], "wc_reply_reviewers_avg": [ 18.25, 15.433324334050653 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12020804617782292866&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 5, "email": "google.com;deepmind.com;deepmind.com;stanford.edu;google.com", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Google;Stanford University", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.stanford.edu", "aff_unique_abbr": "DeepMind;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "BQ-NCO: Bisimulation Quotienting for Efficient Neural Combinatorial Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72480", "id": "BRqlkTDvvm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f445ba15f0f05c26e1d24f908ea78d60-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BRqlkTDvvm", "openreview": "https://openreview.net/forum?id=BRqlkTDvvm", "poster": "/media/PosterPDFs/NeurIPS%202023/72480.png?t=1702055522.9545057", "slides": "https://nips.cc/virtual/2023/poster/72480", "video": "https://nips.cc/virtual/2023/poster/72480", "author_site": "Darko Drakulic, Sofia Michel, Florian Mai, Arnaud Sors, Jean-Marc Andreoli", "tldr": "", "abstract": "Despite the success of neural-based combinatorial optimization methods for end-to-end heuristic learning, out-of-distribution generalization remains a challenge. In this paper, we present a novel formulation of Combinatorial Optimization Problems (COPs) as Markov Decision Processes (MDPs) that effectively leverages common symmetries of COPs to improve out-of-distribution robustness. Starting from a direct MDP formulation of a constructive method, we introduce a generic way to reduce the state space, based on Bisimulation Quotienting (BQ) in MDPs. Then, for COPs with a recursive nature, we specialize the bisimulation and show how the reduced state exploits the symmetries of these problems and facilitates MDP solving. Our approach is principled and we prove that an optimal policy for the proposed BQ-MDP actually solves the associated COPs. We illustrate our approach on five classical problems: the Euclidean and Asymmetric Traveling Salesman, Capacitated Vehicle Routing, Orienteering and Knapsack Problems. Furthermore, for each problem, we introduce a simple attention-based policy network for the BQ-MDPs, which we train by imitation of (near) optimal solutions of small instances from a single distribution. We obtain new state-of-the-art results for the five COPs on both synthetic and realistic benchmarks. Notably, in contrast to most existing neural approaches, our learned policies show excellent generalization performance to much larger instances than seen during training, without any additional search procedure. Our code is available at: [link](https://github.com/naver/bq-nco).", "keywords": "Combinatorial Optimization;Markov Decision Processes;Bisimulation;Policy Learning;Out-of-Distribution Generalization;Routing Problems;TSP;CVRP;KP.", "primary_area": "", "supplementary_material": "/attachment/935be79edfeeb24c03138c27c94a92d293856a6a.pdf", "author": "Darko Drakulic;Sofia Michel;Florian Mai;Arnaud Sors;Jean-Marc Andreoli", "authorids": "~Darko_Drakulic1;~Sofia_Michel1;~Florian_Mai1;~Arnaud_Sors1;~Jean-Marc_Andreoli2", "gender": "M;;Non-Binary;M;M", "homepage": ";https://europe.naverlabs.com/people_user/sofia-michel/;;;https://europe.naverlabs.com/people_user/jean-marc-andreoli/", "dblp": "121/2070.html;139/2626;200/7899;217/2664;89/4299.html", "google_scholar": "B3-rbrcAAAAJ;;MfETM20AAAAJ;https://scholar.google.com/scholar?hl=fr;shjlrvEAAAAJ", "orcid": ";;;;", "linkedin": ";;;;https://fr.linkedin.com/public-profile/in/jean-marc-andreoli-ab80332?trk=people-guest_people_search-card&challengeId=AQHatWUwZW-a2gAAAXcKvbGBUWRIqIBbgOTe62Bfj7_NtUbKi3T53z487fBcrsbq3TFmiI6_Wr_s3zVpKXEDMSNYJvfWn3nRxQ&submissionId=4e430cf4-a4af-5a16-cb22-39677341584d", "or_profile": "~Darko_Drakulic1;~Sofia_Michel1;~Florian_Mai1;~Arnaud_Sors1;~Jean-Marc_Andreoli2", "aff": "Naver Labs Europe;Naver Labs Europe;Idiap Research Institute;Naver Labs Europe;Naver Labs Europe", "aff_domain": "naverlabs.com;naverlabs.com;idiap.ch;naverlabs.com;naverlabs.com", "position": "Researcher;Researcher;PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\ndrakulic2023bqnco,\ntitle={{BQ}-{NCO}: Bisimulation Quotienting for Efficient Neural Combinatorial Optimization},\nauthor={Darko Drakulic and Sofia Michel and Florian Mai and Arnaud Sors and Jean-Marc Andreoli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BRqlkTDvvm}\n}", "github": "", "project": "", "reviewers": "e3VL;8qf4;cBr6;fu7F", "pdf_size": 358510, "rating": "5;6;6;6", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;2", "wc_summary": "105;52;90;80", "wc_strengths": "77;68;55;54", "wc_weaknesses": "569;239;245;101", "wc_questions": "167;57;12;7", "wc_limitations": "14;11;15;1", "wc_review": "932;427;417;243", "wc_reply_reviewers": "477;44;24;20", "wc_reply_authors": "1279;55;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 81.75, 19.343926695477318 ], "wc_strengths_avg": [ 63.5, 9.5524865872714 ], "wc_weaknesses_avg": [ 288.5, 171.88586329305852 ], "wc_questions_avg": [ 60.75, 64.35982830928 ], "wc_limitations_avg": [ 10.25, 5.539629951540085 ], "wc_review_avg": [ 504.75, 257.2939709748365 ], "wc_reply_reviewers_avg": [ 141.25, 194.05846412872592 ], "wc_reply_authors_avg": [ 333.5, 546.3462729807901 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=499883380830462795&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "naverlabs.com;naverlabs.com;idiap.ch;naverlabs.com;naverlabs.com", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "NAVER LABS;Idiap Research Institute", "aff_unique_dep": ";", "aff_unique_url": "https://labs.naver.com;https://www.idiap.ch", "aff_unique_abbr": "NLE;Idiap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Unknown;Switzerland" }, { "id": "BT03V9Re9a", "title": "EmbedDistill: A Geometric Knowledge Distillation for Information Retrieval", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large neural models (such as Transformers) achieve state-of-the-art performance for information retrieval (IR). In this paper, we aim to improve distillation methods that pave the way for the resource-efficient deployment of such models in practice. Inspired by our theoretical analysis of the teacher-student generalization gap for IR models, we propose a novel distillation approach that leverages the relative geometry among queries and documents learned by the large teacher model. Unlike existing teacher score-based distillation methods, our proposed approach employs embedding matching tasks to provide a stronger signal to align the representations of the teacher and student models. In addition, it utilizes query generation to explore the data manifold to reduce the discrepancies between the student and the teacher where training data is sparse. Furthermore, our analysis also motivates novel asymmetric architectures for student models which realizes better embedding alignment without increasing online inference cost. On standard benchmarks like MSMARCO, we show that our approach successfully distills from both dual-encoder (DE) and cross-encoder (CE) teacher models to 1/10th size asymmetric students that can retain 95-97% of the teacher performance.", "keywords": "Knowledge distillation;teacher-student gap;dual encoder;cross encoder;information retrieval;query generation;embedding matching;retrieval;re-ranking", "primary_area": "", "supplementary_material": "/attachment/959769bcd6bb76380404afd2e8f9785ec9f04036.pdf", "author": "Seungyeon Kim;Ankit Singh Rawat;Manzil Zaheer;Sadeep Jayasumana;Veeranjaneyulu Sadhanala;Wittawat Jitkrittum;Aditya Krishna Menon;Rob Fergus;Sanjiv Kumar", "authorids": "~Seungyeon_Kim1;~Ankit_Singh_Rawat1;~Manzil_Zaheer1;~Sadeep_Jayasumana1;~Veeranjaneyulu_Sadhanala1;~Wittawat_Jitkrittum1;~Aditya_Krishna_Menon1;~Rob_Fergus1;~Sanjiv_Kumar1", "gender": ";M;M;;M;M;;M;", "homepage": "https://www.seungyeon.ai;https://ankitsrawat.github.io/home/;https://www.aclweb.org/anthology/people/m/manzil-zaheer/;;https://veeranjaneyulus.github.io/;http://wittawat.com;;http://cs.nyu.edu/fergus/;http://www.sanjivk.com/", "dblp": "74/7997-1.html;https://dblp.org/pers/hd/r/Rawat:Ankit_Singh;40/10701;;81/7249;95/3398.html;;77/3763;", "google_scholar": "zbcN_QIAAAAJ;http://scholar.google.com/citations?user=U0_ab4cAAAAJ;A33FhJMAAAAJ;;FuIExf4AAAAJ;https://scholar.google.co.uk/citations?hl=en;;https://scholar.google.com.tw/citations?user=GgQ9GEkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;0000-0002-9400-9262;;;", "linkedin": ";;;;;wittawat-jitkrittum/;;;", "or_profile": "~Seungyeon_Kim1;~Ankit_Singh_Rawat1;~Manzil_Zaheer1;~Sadeep_Jayasumana1;~Veeranjaneyulu_Sadhanala1;~Wittawat_Jitkrittum1;~Aditya_Krishna_Menon1;~Rob_Fergus1;~Sanjiv_Kumar1", "aff": "Google;Google;Google DeepMind;;Google;Google Research;;Google;Google", "aff_domain": "google.com;google.com;deepmind.com;;google.com;google.com;;google.com;google.com", "position": "Researcher;Research Scientist;Researcher;;Researcher;Research Scientist;;Research scientist;Research Scientist", "bibtex": "@misc{\nkim2023embeddistill,\ntitle={EmbedDistill: A Geometric Knowledge Distillation for Information Retrieval},\nauthor={Seungyeon Kim and Ankit Singh Rawat and Manzil Zaheer and Sadeep Jayasumana and Veeranjaneyulu Sadhanala and Wittawat Jitkrittum and Aditya Krishna Menon and Rob Fergus and Sanjiv Kumar},\nyear={2023},\nurl={https://openreview.net/forum?id=BT03V9Re9a}\n}", "github": "", "project": "", "reviewers": "pFfq;QGH5;KiMa;cm3b;579a", "site": "https://openreview.net/forum?id=BT03V9Re9a", "pdf_size": 725669, "rating": "3;5;5;6;6", "confidence": "5;4;5;4;3", "soundness": "3;2;3;3;3", "novelty": "1;2;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "50;72;134;133;146", "wc_strengths": "53;24;157;102;53", "wc_weaknesses": "150;358;116;153;65", "wc_questions": "15;7;4;56;11", "wc_limitations": "1;21;1;51;38", "wc_review": "269;482;412;495;313", "wc_reply_reviewers": "343;83;0;45;27", "wc_reply_authors": "716;258;0;0;0", "reply_reviewers": "2;1;0;1;1", "reply_authors": "3;2;1;1;1", "rating_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 107.0, 38.47076812334269 ], "wc_strengths_avg": [ 77.8, 46.86747272896203 ], "wc_weaknesses_avg": [ 168.4, 99.96119246987803 ], "wc_questions_avg": [ 18.6, 19.06410239166796 ], "wc_limitations_avg": [ 22.4, 19.895728184713423 ], "wc_review_avg": [ 394.2, 89.9497637573329 ], "wc_reply_reviewers_avg": [ 99.6, 124.65247691080992 ], "wc_reply_authors_avg": [ 194.8, 279.1002687207592 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.7319250547113999, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2626310148737686729&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Greedy Pruning with Group Lasso Provably Generalizes for Matrix Sensing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72479", "id": "BTRcVP7ZJn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bd2107343c9cc973635d90dbfc122223-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BTRcVP7ZJn", "openreview": "https://openreview.net/forum?id=BTRcVP7ZJn", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72479", "video": "https://nips.cc/virtual/2023/poster/72479", "author_site": "Nived Rajaraman, Fnu Devvrit, Aryan Mokhtari, Kannan Ramchandran", "tldr": "", "abstract": "Pruning schemes have been widely used in practice to reduce the complexity of trained models with a massive number of parameters. In fact, several practical studies have shown that if the pruned model is fine-tuned with some gradient-based updates it generalizes well to new samples. Although the above pipeline, which we refer to as pruning + fine-tuning, has been extremely successful in lowering the complexity of trained models, there is very little known about the theory behind this success. In this paper we address this issue by investigating the pruning + fine-tuning framework on the overparameterized matrix sensing problem with the ground truth denoted $U_\\star \\in \\mathbb{R}^{d \\times r}$ and the overparameterized model $U \\in \\mathbb{R}^{d \\times k}$ with $k \\gg r$. We study the approximate local minima of the mean square error, augmented with a smooth version of a group Lasso regularizer, $\\sum_{i=1}^{k} \\lVert Ue_i \\rVert_2 $. In particular, we provably show that pruning all the columns below a certain explicit $\\ell_2$-norm threshold results in a solution $U_{\\text{prune}}$ which has the minimum number of columns $r$, yet close to the ground truth in training loss. Moreover, in the subsequent fine-tuning phase, gradient descent initialized at $U_{\\text{prune}}$ converges at a linear rate to its limit. While our analysis provides insights into the role of regularization in pruning, we also show that running gradient descent in the absence of regularization results in models which {are not suitable for greedy pruning}, i.e., many columns could have their $\\ell_2$ norm comparable to that of the maximum. Lastly, we show that our results also extend for the training and pruning of two-layer neural networks with quadratic activation functions. To the best of our knowledge, our results provide the first rigorous insights on why greedy pruning + fine-tuning leads to smaller models which also generalize well.", "keywords": "Greedy Pruning; Matrix Sensing; Lasso regularization", "primary_area": "", "supplementary_material": "/attachment/31aa642d44492d07a5af9ba9c257792c89300bf4.pdf", "author": "Nived Rajaraman;Fnu Devvrit;Aryan Mokhtari;Kannan Ramchandran", "authorids": "~Nived_Rajaraman1;~Fnu_Devvrit1;~Aryan_Mokhtari3;~Kannan_Ramchandran1", "gender": "M;M;M;M", "homepage": "https://people.eecs.berkeley.edu/~nived.rajaraman/;;https://sites.utexas.edu/mokhtari/;https://www.eecs.berkeley.edu/~kannanr/", "dblp": "229/4215;;140/7407;53/5765", "google_scholar": "7hb2BM8AAAAJ;c86HtPoAAAAJ;glcep6EAAAAJ;https://scholar.google.com.tw/citations?user=DcV-5RAAAAAJ", "orcid": ";;;0000-0002-4567-328X", "linkedin": ";devvrit/;;", "or_profile": "~Nived_Rajaraman1;~Fnu_Devvrit1;~Aryan_Mokhtari3;~Kannan_Ramchandran1", "aff": "University of California, Berkeley;, University of Texas at Austin;University of Texas, Austin;University of California, Berkeley", "aff_domain": "berkeley.edu;cs.utexas.edu;utexas.edu;berkeley.edu", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nrajaraman2023greedy,\ntitle={Greedy Pruning with Group Lasso Provably Generalizes for Matrix Sensing},\nauthor={Nived Rajaraman and Fnu Devvrit and Aryan Mokhtari and Kannan Ramchandran},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BTRcVP7ZJn}\n}", "github": "", "project": "", "reviewers": "oETd;J6kp;mDyb", "pdf_size": 770069, "rating": "5;7;7", "confidence": "4;4;3", "soundness": "2;3;4", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "175;59;110", "wc_strengths": "91;36;73", "wc_weaknesses": "250;69;45", "wc_questions": "139;15;44", "wc_limitations": "39;1;1", "wc_review": "694;180;273", "wc_reply_reviewers": "58;33;12", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 114.66666666666667, 47.471628954097994 ], "wc_strengths_avg": [ 66.66666666666667, 22.89589968143253 ], "wc_weaknesses_avg": [ 121.33333333333333, 91.50713390526204 ], "wc_questions_avg": [ 66.0, 52.959103718498355 ], "wc_limitations_avg": [ 13.666666666666666, 17.913371790059205 ], "wc_review_avg": [ 382.3333333333333, 223.62816360099987 ], "wc_reply_reviewers_avg": [ 34.333333333333336, 18.80307303489394 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12556875313042177514&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "berkeley.edu;cs.utexas.edu;utexas.edu;berkeley.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of California, Berkeley;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.utexas.edu", "aff_unique_abbr": "UC Berkeley;UT Austin", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Berkeley;Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "From Cloze to Comprehension: Retrofitting Pre-trained Masked Language Models to Pre-trained Machine Reader", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72478", "id": "BVN9Kgvwzv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d4e1c24ac41ff0b82ca1b171731f0b23-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BVN9Kgvwzv", "openreview": "https://openreview.net/forum?id=BVN9Kgvwzv", "poster": "/media/PosterPDFs/NeurIPS%202023/72478.png?t=1699269999.8869383", "slides": "https://nips.cc/virtual/2023/poster/72478", "video": "https://nips.cc/virtual/2023/poster/72478", "author_site": "Weiwen Xu, Xin Li, Wenxuan Zhang, Meng Zhou, Wai Lam, Luo Si, Lidong Bing", "tldr": "", "abstract": "We present Pre-trained Machine Reader (PMR), a novel method for retrofitting pre-trained masked language models (MLMs) to pre-trained machine reading comprehension (MRC) models without acquiring labeled data.\nPMR can resolve the discrepancy between model pre-training and downstream fine-tuning of existing MLMs.\nTo build the proposed PMR, we constructed a large volume of general-purpose and high-quality MRC-style training data by using Wikipedia hyperlinks and designed a Wiki Anchor Extraction task to guide the MRC-style pre-training.\nApart from its simplicity, PMR effectively solves extraction tasks, such as Extractive Question Answering and Named Entity Recognition. PMR shows tremendous improvements over existing approaches, especially in low-resource scenarios.\nWhen applied to the sequence classification task in the MRC formulation, PMR enables the extraction of high-quality rationales to explain the classification process, thereby providing greater prediction explainability. PMR also has the potential to serve as a unified model for tackling various extraction and classification tasks in the MRC formulation.", "keywords": "Machine Reading Comprehension;Pre-training;Natural Language Understanding", "primary_area": "", "supplementary_material": "/attachment/f65d86a1fc45946adfa886f94bf55057cc38a008.zip", "author": "Weiwen Xu;Xin Li;Wenxuan Zhang;Meng Zhou;Wai Lam;Luo Si;Lidong Bing", "authorids": "~Weiwen_Xu1;~Xin_Li40;~Wenxuan_Zhang1;~Meng_Zhou2;~Wai_Lam1;~Luo_Si3;~Lidong_Bing2", "gender": "M;M;;M;M;M;", "homepage": "https://wwxu21.github.io/;https://lixin4ever.github.io/;https://isakzhang.github.io/;https://www.prettymeng.com;http://www.se.cuhk.edu.hk/~textmine;;https://lidongbing.github.io", "dblp": "57/4640;09/1365-56.html;85/1177-1.html;;48/1707;;53/6625", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=syD9lxQAAAAJ;https://scholar.google.com/citations?hl=en;;ewA4NAcAAAAJ;xqEfATIAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;", "linkedin": ";;wenxuan-zhang-608b88153/;prettymeng;;;", "or_profile": "~Weiwen_Xu1;~Xin_Li40;~Wenxuan_Zhang1;~Meng_Zhou2;~Wai_Lam1;~Luo_Si3;~Lidong_Bing3", "aff": "The Chinese University of Hong Kong;Alibaba Group;Alibaba Group;Carnegie Mellon University;The Chinese University of Hong Kong;Alibaba Group;Alibaba Group", "aff_domain": "cuhk.edu.hk;alibaba-inc.com;alibaba-inc.com;andrew.cmu.edu;cuhk.edu.hk;alibaba-inc.com;alibaba-inc.com", "position": "PhD student;Researcher;Researcher;MS student;Professor;Alibaba Group Inc;Scientist", "bibtex": "@inproceedings{\nxu2023from,\ntitle={From Cloze to Comprehension: Retrofitting Pre-trained Masked Language Models to Pre-trained Machine Reader},\nauthor={Weiwen Xu and Xin Li and Wenxuan Zhang and Meng Zhou and Wai Lam and Luo Si and Lidong Bing},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BVN9Kgvwzv}\n}", "github": "", "project": "", "reviewers": "8871;p962;cpnT;kcDM;D3ec;bQaJ", "pdf_size": 1229531, "rating": "4;5;6;6;7;7", "confidence": "3;4;5;3;4;3", "soundness": "3;3;3;3;3;3", "novelty": "2;3;3;3;3;3", "presentation": "3;3;3;3;4;3", "wc_summary": "65;119;79;17;72;101", "wc_strengths": "21;35;64;38;47;31", "wc_weaknesses": "145;53;194;20;96;77", "wc_questions": "5;74;14;12;44;55", "wc_limitations": "14;5;12;1;1;5", "wc_review": "250;286;363;88;260;269", "wc_reply_reviewers": "0;55;0;0;115;116", "wc_reply_authors": "0;93;0;0;0;422", "reply_reviewers": "0;1;0;0;1;2", "reply_authors": "1;2;1;1;1;3", "rating_avg": [ 5.833333333333333, 1.0671873729054746 ], "confidence_avg": [ 3.6666666666666665, 0.7453559924999298 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 75.5, 31.883903985135404 ], "wc_strengths_avg": [ 39.333333333333336, 13.498971154211059 ], "wc_weaknesses_avg": [ 97.5, 57.70254182731757 ], "wc_questions_avg": [ 34.0, 25.383721817994566 ], "wc_limitations_avg": [ 6.333333333333333, 5.022173057773122 ], "wc_review_avg": [ 252.66666666666666, 82.38864538811698 ], "wc_reply_reviewers_avg": [ 47.666666666666664, 51.75798381785064 ], "wc_reply_authors_avg": [ 85.83333333333333, 154.12594056665336 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.74535599249993 ], "reply_authors_avg": [ 1.5, 0.7637626158259734 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.1396860591539156, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14697873897071152654&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cuhk.edu.hk;alibaba-inc.com;alibaba-inc.com;andrew.cmu.edu;cuhk.edu.hk;alibaba-inc.com;alibaba-inc.com", "author_num": 7, "aff_unique_index": "0;1;1;2;0;1;1", "aff_unique_norm": "Chinese University of Hong Kong;Alibaba Group;Carnegie Mellon University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.alibaba.com;https://www.cmu.edu", "aff_unique_abbr": "CUHK;Alibaba;CMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Mip-Grid: Anti-aliased Grid Representations for Neural Radiance Fields", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72477", "id": "BW6nZf7TnK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/082d3d795520c43214da5123e56a3a34-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BW6nZf7TnK", "openreview": "https://openreview.net/forum?id=BW6nZf7TnK", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72477", "video": "https://nips.cc/virtual/2023/poster/72477", "author_site": "Seungtae Nam, Daniel Rho, Jong Hwan Ko, Eunbyung Park", "tldr": "", "abstract": "Despite the remarkable achievements of neural radiance fields (NeRF) in representing 3D scenes and generating novel view images, the aliasing issue, rendering 'jaggies' or 'blurry' images at varying camera distances, remains unresolved in most existing approaches. The recently proposed mip-NeRF has effectively addressed this challenge by introducing integrated positional encodings (IPE). However, it relies on MLP architecture to represent the radiance fields, missing out on the fast training speed offered by the latest grid-based methods. In this work, we present mip-Grid, a novel approach that integrates anti-aliasing techniques into grid-based representations for radiance fields, mitigating the aliasing artifacts while enjoying fast training time. Notably, the proposed method uses a single-scale shared grid representation and a single-sampling approach, which only introduces minimal additions to the model parameters and computational costs. To handle scale ambiguity, mip-Grid generates multiple grids by applying simple convolution operations over the shared grid and uses the scale-aware coordinate to retrieve the appropriate features from the generated multiple grids. To test the effectiveness, we incorporated the proposed approach into the two recent representative grid-based methods, TensoRF and K-Planes. The experimental results demonstrated that mip-Grid greatly improved the rendering performance of both methods and showed comparable performance to mip-NeRF on multi-scale datasets while achieving significantly faster training time.", "keywords": "Novel view synthesis;Neural radiance fields", "primary_area": "", "supplementary_material": "/attachment/e9ade19f25a3e35ecb8de7ea79321807c71f32b6.zip", "author": "Seungtae Nam;Daniel Rho;Jong Hwan Ko;Eunbyung Park", "authorids": "~Seungtae_Nam1;~Daniel_Rho1;~Jong_Hwan_Ko2;~Eunbyung_Park1", "gender": "M;M;;M", "homepage": "https://github.com/stnamjef;;http://iris.skku.edu/;https://silverbottlep.github.io/", "dblp": "321/0019;311/4143;168/6308;92/9727", "google_scholar": "8NKPmmwCmrAC;nEC0wK4AAAAJ;https://scholar.google.co.kr/citations?user=UN_OIs4AAAAJ;iPyuJmQAAAAJ", "orcid": ";;0000-0003-4434-4318;", "linkedin": ";;;eunbyung-park-286384b4/", "or_profile": "~Seungtae_Nam1;~Daniel_Rho1;~Jong_Hwan_Ko2;~Eunbyung_Park1", "aff": "Sungkyunkwan University;Korea Telecom Research;Sungkyunkwan University;Sungkyunkwan University", "aff_domain": "skku.edu;kt.com;skku.edu;skku.edu", "position": "MS student;Researcher;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nnam2023mipgrid,\ntitle={Mip-Grid: Anti-aliased Grid Representations for Neural Radiance Fields},\nauthor={Seungtae Nam and Daniel Rho and Jong Hwan Ko and Eunbyung Park},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BW6nZf7TnK}\n}", "github": "", "project": "", "reviewers": "8s5r;QWBQ;9wTm;rA7o", "pdf_size": 4584298, "rating": "5;6;6;6", "confidence": "4;4;5;3", "soundness": "3;3;3;2", "novelty": "3;2;3;2", "presentation": "3;3;3;2", "wc_summary": "106;79;28;42", "wc_strengths": "107;101;43;25", "wc_weaknesses": "96;156;113;125", "wc_questions": "17;23;11;5", "wc_limitations": "10;28;24;1", "wc_review": "336;387;219;198", "wc_reply_reviewers": "42;8;0;40", "wc_reply_authors": "187;0;0;140", "reply_reviewers": "1;1;0;2", "reply_authors": "2;1;1;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.75, 30.695072894521687 ], "wc_strengths_avg": [ 69.0, 35.63705936241092 ], "wc_weaknesses_avg": [ 122.5, 21.914607000811127 ], "wc_questions_avg": [ 14.0, 6.708203932499369 ], "wc_limitations_avg": [ 15.75, 10.825317547305483 ], "wc_review_avg": [ 285.0, 78.9461842016446 ], "wc_reply_reviewers_avg": [ 22.5, 18.728320800328042 ], "wc_reply_authors_avg": [ 81.75, 83.42174476717686 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9847023256312999970&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "skku.edu;kt.com;skku.edu;skku.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Sungkyunkwan University;Korea Telecom", "aff_unique_dep": ";Research", "aff_unique_url": "https://www.skku.edu;https://www.kt.com", "aff_unique_abbr": "SKKU;KT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Offline Multi-Agent Reinforcement Learning with Implicit Global-to-Local Value Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72476", "id": "BXQtgwA2n0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a46c84276e3a4249ab7dbf3e069baf7f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BXQtgwA2n0", "openreview": "https://openreview.net/forum?id=BXQtgwA2n0", "poster": "/media/PosterPDFs/NeurIPS%202023/72476.png?t=1697520107.4072769", "slides": "https://nips.cc/virtual/2023/poster/72476", "video": "https://nips.cc/virtual/2023/poster/72476", "author_site": "Xiangsen Wang, Haoran Xu, Yinan Zheng, Xianyuan Zhan", "tldr": "", "abstract": "Offline reinforcement learning (RL) has received considerable attention in recent years due to its attractive capability of learning policies from offline datasets without environmental interactions. Despite some success in the single-agent setting, offline multi-agent RL (MARL) remains to be a challenge. The large joint state-action space and the coupled multi-agent behaviors pose extra complexities for offline policy optimization. Most existing offline MARL studies simply apply offline data-related regularizations on individual agents, without fully considering the multi-agent system at the global level. In this work, we present OMIGA, a new offline multi-agent RL algorithm with implicit global-to-local value regularization. OMIGA provides a principled framework to convert global-level value regularization into equivalent implicit local value regularizations and simultaneously enables in-sample learning, thus elegantly bridging multi-agent value decomposition and policy learning with offline regularizations. Based on comprehensive experiments on the offline multi-agent MuJoCo and StarCraft II micro-management tasks, we show that OMIGA achieves superior performance over the state-of-the-art offline MARL methods in almost all tasks.", "keywords": "Offline reinforcement learning; multi-agent reinforcement learning; multi-agent cooperation", "primary_area": "", "supplementary_material": "/attachment/3641b3b8d7ddbfe0fc269bc4be45f3f4a68943d7.zip", "author": "Xiangsen Wang;Haoran Xu;Yinan Zheng;Xianyuan Zhan", "authorids": "~Xiangsen_Wang1;~Haoran_Xu4;~Yinan_Zheng1;~Xianyuan_Zhan1", "gender": "M;M;;M", "homepage": "https://github.com/sanmuyang;https://ryanxhr.github.io/;https://github.com/ZhengYinan-AIR;http://zhanxianyuan.xyz/", "dblp": "341/5749;;;181/5081", "google_scholar": ";iX8AJI0AAAAJ;;pDMnGloAAAAJ", "orcid": "0000-0002-5349-9170;;;0000-0002-3683-0554", "linkedin": ";;;", "or_profile": "~Xiangsen_Wang1;~Haoran_Xu4;~Yinan_Zheng1;~Xianyuan_Zhan1", "aff": "Beijing Jiaotong University;JD.com;Tsinghua University;Tsinghua University", "aff_domain": "bjtu.edu.cn;jd.com;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;Researcher;PhD student;Associate Professor", "bibtex": "@inproceedings{\nwang2023offline,\ntitle={Offline Multi-Agent Reinforcement Learning with Implicit Global-to-Local Value Regularization},\nauthor={Xiangsen Wang and Haoran Xu and Yinan Zheng and Xianyuan Zhan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BXQtgwA2n0}\n}", "github": "", "project": "", "reviewers": "uuTt;WVBc;xgpF;LQB9;trCY", "pdf_size": 740345, "rating": "5;5;5;6;7", "confidence": "2;5;4;4;4", "soundness": "3;3;1;2;3", "novelty": "2;3;3;3;3", "presentation": "3;3;2;3;3", "wc_summary": "55;77;90;46;102", "wc_strengths": "19;108;27;65;100", "wc_weaknesses": "105;71;533;54;156", "wc_questions": "7;606;188;45;503", "wc_limitations": "1;11;1;1;133", "wc_review": "187;873;839;211;994", "wc_reply_reviewers": "5;231;1036;9;169", "wc_reply_authors": "0;278;2149;0;54", "reply_reviewers": "1;1;3;1;1", "reply_authors": "1;2;6;1;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 74.0, 20.94755355644186 ], "wc_strengths_avg": [ 63.8, 36.40549409086491 ], "wc_weaknesses_avg": [ 183.8, 178.04201751272086 ], "wc_questions_avg": [ 269.8, 242.3645188553803 ], "wc_limitations_avg": [ 29.4, 51.94458585839336 ], "wc_review_avg": [ 620.8, 348.31388143454745 ], "wc_reply_reviewers_avg": [ 290.0, 383.3598831385464 ], "wc_reply_authors_avg": [ 496.2, 832.7459156309325 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 2.4, 1.8547236990991407 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.15309310892394862, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15813525235637701050&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "bjtu.edu.cn;jd.com;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Beijing Jiao Tong University;JD.com;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.njtu.edu.cn/en;https://www.jd.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "BJTU;JD;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Waypoint Transformer: Reinforcement Learning via Supervised Learning with Intermediate Targets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72475", "id": "BYywOFbRFz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f58c24798220ba724fe05c0fa786227d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BYywOFbRFz", "openreview": "https://openreview.net/forum?id=BYywOFbRFz", "poster": "/media/PosterPDFs/NeurIPS%202023/72475.png?t=1702349766.2960339", "slides": "https://nips.cc/virtual/2023/poster/72475", "video": "https://nips.cc/virtual/2023/poster/72475", "author_site": "Anirudhan Badrinath, Yannis Flet-Berliac, Allen Nie, Emma Brunskill", "tldr": "", "abstract": "Despite the recent advancements in offline reinforcement learning via supervised learning (RvS) and the success of the decision transformer (DT) architecture in various domains, DTs have fallen short in several challenging benchmarks. The root cause of this underperformance lies in their inability to seamlessly connect segments of suboptimal trajectories. To overcome this limitation, we present a novel approach to enhance RvS methods by integrating intermediate targets. We introduce the Waypoint Transformer (WT), using an architecture that builds upon the DT framework and conditioned on automatically-generated waypoints. The results show a significant increase in the final return compared to existing RvS methods, with performance on par or greater than existing state-of-the-art temporal difference learning-based methods. Additionally, the performance and stability improvements are largest in the most challenging environments and data configurations, including AntMaze Large Play/Diverse and Kitchen Mixed/Partial.", "keywords": "offline reinforcement learning;reinforcement learning via supervised learning;behavioral cloning", "primary_area": "", "supplementary_material": "/attachment/0104f726a9279f7dcedd2cdab7665baf9a6e19f0.zip", "author": "Anirudhan Badrinath;Yannis Flet-Berliac;Allen Nie;Emma Brunskill", "authorids": "~Anirudhan_Badrinath1;~Yannis_Flet-Berliac1;~Allen_Nie1;~Emma_Brunskill2", "gender": ";;M;", "homepage": ";https://ynns.io/;https://anie.me;", "dblp": ";239/5247;207/7996;", "google_scholar": ";https://scholar.google.fr/citations?user=qclRKHoAAAAJ;r90OelAAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Anirudhan_Badrinath1;~Yannis_Flet-Berliac1;~Allen_Nie1;~Emma_Brunskill2", "aff": ";Stanford University;Microsoft Research;", "aff_domain": ";stanford.edu;microsoft.com;", "position": ";Postdoc;Intern;", "bibtex": "@inproceedings{\nbadrinath2023waypoint,\ntitle={Waypoint Transformer: Reinforcement Learning via Supervised Learning with Intermediate Targets},\nauthor={Anirudhan Badrinath and Yannis Flet-Berliac and Allen Nie and Emma Brunskill},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BYywOFbRFz}\n}", "github": "", "project": "", "reviewers": "j7or;751f;QWFP;8scR", "pdf_size": 2678390, "rating": "4;4;5;6", "confidence": "4;3;3;3", "soundness": "2;2;2;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "76;60;59;89", "wc_strengths": "97;22;92;121", "wc_weaknesses": "94;92;337;210", "wc_questions": "91;49;196;9", "wc_limitations": "2;1;12;7", "wc_review": "360;224;696;436", "wc_reply_reviewers": "0;374;782;107", "wc_reply_authors": "0;957;1624;220", "reply_reviewers": "0;2;3;1", "reply_authors": "1;3;3;2", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 71.0, 12.389511693363866 ], "wc_strengths_avg": [ 83.0, 36.88495628301598 ], "wc_weaknesses_avg": [ 183.25, 100.80519579862934 ], "wc_questions_avg": [ 86.25, 69.68276329193613 ], "wc_limitations_avg": [ 5.5, 4.387482193696061 ], "wc_review_avg": [ 429.0, 171.84586116633707 ], "wc_reply_reviewers_avg": [ 315.75, 301.68557721574956 ], "wc_reply_authors_avg": [ 700.25, 640.3562973064293 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11286288898892068227&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";stanford.edu;microsoft.com;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Stanford University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.stanford.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Stanford;MSR", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "LANCE: Stress-testing Visual Models by Generating Language-guided Counterfactual Images", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72474", "id": "BbIxB4xnbq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4f3820576130a8f796ddbf204c841487-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BbIxB4xnbq", "openreview": "https://openreview.net/forum?id=BbIxB4xnbq", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72474", "video": "https://nips.cc/virtual/2023/poster/72474", "author_site": "Viraj Prabhu, Sriram Yenamandra, Sriram Yenamandra, Prithvijit Chattopadhyay, Judy Hoffman", "tldr": "", "abstract": "We propose an automated algorithm to stress-test a trained visual model by generating language-guided counterfactual test images (LANCE). Our method leverages recent progress in large language modeling and text-based image editing to augment an IID test set with a suite of diverse, realistic, and challenging test images without altering model weights. We benchmark the performance of a diverse set of pre-trained models on our generated data and observe significant and consistent performance drops. We further analyze model sensitivity across different types of edits, and demonstrate its applicability at surfacing previously unknown class-level model biases in ImageNet. Code is available at https://github.com/virajprabhu/lance.", "keywords": "image classification;robustness;guided diffusion models;counterfactuals", "primary_area": "", "supplementary_material": "", "author": "Viraj Uday Prabhu;Sriram Yenamandra;Prithvijit Chattopadhyay;Judy Hoffman", "authorids": "~Viraj_Uday_Prabhu1;~Sriram_Yenamandra1;~Prithvijit_Chattopadhyay1;~Judy_Hoffman1", "gender": "M;M;M;F", "homepage": "http://virajprabhu.github.io;;https://prithv1.xyz/;https://www.cc.gatech.edu/~judy/", "dblp": "199/1973;291/9224;179/2452;45/10336", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.co.in/citations?user=rIK7AMkAAAAJ;mqpjAt4AAAAJ", "orcid": ";;;", "linkedin": "viraj-prabhu-0a2a9435/;;;", "or_profile": "~Viraj_Uday_Prabhu1;~Sriram_Yenamandra1;~Prithvijit_Chattopadhyay1;~Judy_Hoffman1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;gatech.edu", "position": "PhD student;MS student;PhD;Assistant Professor", "bibtex": "@inproceedings{\nprabhu2023lance,\ntitle={{LANCE}: Stress-testing Visual Models by Generating Language-guided Counterfactual Images},\nauthor={Viraj Uday Prabhu and Sriram Yenamandra and Prithvijit Chattopadhyay and Judy Hoffman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BbIxB4xnbq}\n}", "github": "", "project": "", "reviewers": "9YtG;G3fV;muqu;Fgt7", "pdf_size": 6001588, "rating": "5;5;6;7", "confidence": "2;4;2;4", "soundness": "3;3;3;3", "novelty": "3;2;3;2", "presentation": "3;4;3;3", "wc_summary": "53;83;123;106", "wc_strengths": "33;108;58;71", "wc_weaknesses": "53;556;117;291", "wc_questions": "20;5;2;57", "wc_limitations": "1;5;2;17", "wc_review": "160;757;302;542", "wc_reply_reviewers": "0;0;0;103", "wc_reply_authors": "36;36;36;168", "reply_reviewers": "0;0;0;2", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 91.25, 26.252380844411046 ], "wc_strengths_avg": [ 67.5, 27.07858932810201 ], "wc_weaknesses_avg": [ 254.25, 194.7708589599584 ], "wc_questions_avg": [ 21.0, 21.874642854227357 ], "wc_limitations_avg": [ 6.25, 6.378675411086537 ], "wc_review_avg": [ 440.25, 228.2196036715514 ], "wc_reply_reviewers_avg": [ 25.75, 44.60030829489859 ], "wc_reply_authors_avg": [ 69.0, 57.15767664977295 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12829186529610756082&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "gatech.edu;gatech.edu;gatech.edu;gatech.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Contextual Lasso: Sparse Linear Models via Deep Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72473", "id": "BdvCo8RVlx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3f226824426a4d6ae3d3efad8883fc53-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BdvCo8RVlx", "openreview": "https://openreview.net/forum?id=BdvCo8RVlx", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72473", "video": "https://nips.cc/virtual/2023/poster/72473", "author_site": "Ryan Thompson, Amir Dezfouli, robert kohn", "tldr": "", "abstract": "Sparse linear models are one of several core tools for interpretable machine learning, a field of emerging importance as predictive models permeate decision-making in many domains. Unfortunately, sparse linear models are far less flexible as functions of their input features than black-box models like deep neural networks. With this capability gap in mind, we study a not-uncommon situation where the input features dichotomize into two groups: explanatory features, which are candidates for inclusion as variables in an interpretable model, and contextual features, which select from the candidate variables and determine their effects. This dichotomy leads us to the contextual lasso, a new statistical estimator that fits a sparse linear model to the explanatory features such that the sparsity pattern and coefficients vary as a function of the contextual features. The fitting process learns this function nonparametrically via a deep neural network. To attain sparse coefficients, we train the network with a novel lasso regularizer in the form of a projection layer that maps the network's output onto the space of $\\ell_1$-constrained linear models. An extensive suite of experiments on real and synthetic data suggests that the learned models, which remain highly transparent, can be sparser than the regular lasso without sacrificing the predictive power of a standard deep neural network.", "keywords": "feature selection;sparsity;sparse regression;varying coefficients;deep learning", "primary_area": "", "supplementary_material": "/attachment/0e9111f671a2620217c68981ba7cd00e2d9183a0.zip", "author": "Ryan Thompson;Amir Dezfouli;Robert Kohn", "authorids": "~Ryan_Thompson1;~Amir_Dezfouli2;~Robert_Kohn1", "gender": ";M;M", "homepage": "https://ryan-thompson.github.io/;https://adezfouli.github.io/;", "dblp": ";13/7492;46/4172", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ryan_Thompson1;~Amir_Dezfouli2;~Robert_Kohn1", "aff": "University of New South Wales;Data61;University of New South Wales", "aff_domain": "unsw.edu.au;data61.csiro.au;unsw.edu.au", "position": "Postdoc;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nthompson2023the,\ntitle={The Contextual Lasso: Sparse Linear Models via Deep Neural Networks},\nauthor={Ryan Thompson and Amir Dezfouli and Robert Kohn},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BdvCo8RVlx}\n}", "github": "", "project": "", "reviewers": "kkJW;mL1j;BSpt;DGcD;Eybr", "pdf_size": 2683858, "rating": "4;5;5;6;7", "confidence": "3;4;4;2;3", "soundness": "3;2;3;3;4", "novelty": "2;2;3;3;3", "presentation": "3;3;4;3;4", "wc_summary": "58;158;78;115;83", "wc_strengths": "71;265;69;51;44", "wc_weaknesses": "104;477;171;135;42", "wc_questions": "77;15;248;142;62", "wc_limitations": "46;36;71;1;9", "wc_review": "356;951;637;444;240", "wc_reply_reviewers": "17;0;0;110;43", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 98.4, 34.966269460724575 ], "wc_strengths_avg": [ 100.0, 83.14324987634294 ], "wc_weaknesses_avg": [ 185.8, 151.63561586909586 ], "wc_questions_avg": [ 108.8, 80.5962778296864 ], "wc_limitations_avg": [ 32.6, 25.381883302859936 ], "wc_review_avg": [ 525.6, 249.1446166386101 ], "wc_reply_reviewers_avg": [ 34.0, 41.12906514862695 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3668996928526715, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14121381345683024671&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "unsw.edu.au;data61.csiro.au;unsw.edu.au", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of New South Wales;Data61", "aff_unique_dep": ";", "aff_unique_url": "https://www.unsw.edu.au;https://data61.csiro.au", "aff_unique_abbr": "UNSW;Data61", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "MG-ViT: A Multi-Granularity Method for Compact and Efficient Vision Transformers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72472", "id": "Bf6WFWNCUP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/daeef96627a461ec43b7567b2930cfde-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Bf6WFWNCUP", "openreview": "https://openreview.net/forum?id=Bf6WFWNCUP", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72472", "video": "https://nips.cc/virtual/2023/poster/72472", "author_site": "Yu Zhang, Yepeng Liu, Duoqian Miao, Qi Zhang, Yiwei Shi, Liang Hu", "tldr": "", "abstract": "Vision Transformer (ViT) faces obstacles in wide application due to its huge computational cost. Almost all existing studies on compressing ViT adopt the manner of splitting an image with a single granularity, with very few exploration of splitting an image with multi-granularity. As we know, important information often randomly concentrate in few regions of an image, necessitating multi-granularity attention allocation to an image. Enlightened by this, we introduce the multi-granularity strategy to compress ViT, which is simple but effective. We propose a two-stage multi-granularity framework, MG-ViT, to balance ViT\u2019s performance and computational cost. In single-granularity inference stage, an input image is split into a small number of patches for simple inference. If necessary, multi-granularity inference stage will be instigated, where the important patches are further subsplit into multi-finer-grained patches for subsequent inference. Moreover, prior studies on compression only for classification, while we extend the multi-granularity strategy to hierarchical ViT for downstream tasks such as detection and segmentation. Extensive experiments Prove the effectiveness of the multi-granularity strategy. For instance, on ImageNet, without any loss of performance, MG-ViT reduces 47\\% FLOPs of LV-ViT-S and 56\\% FLOPs of DeiT-S.", "keywords": "Efficient AI;Vision Transformer;Image Classification;Multi-Granularity;Three-Way Decisions.", "primary_area": "", "supplementary_material": "", "author": "Yu Zhang;Yepeng Liu;Duoqian Miao;Qi Zhang;Yiwei Shi;Liang Hu", "authorids": "~Yu_Zhang60;~Yepeng_Liu1;~Duoqian_Miao1;~Qi_Zhang25;~Yiwei_Shi1;~Liang_Hu1", "gender": ";;M;M;;M", "homepage": ";;https://iip.tongji.edu.cn;https://sites.google.com/view/qizhang-bit-uts/home;;https://sites.google.com/view/lianghu/home", "dblp": ";;90/1041-1;52/323-20;;48/5388-4", "google_scholar": ";;;8UAk1p4AAAAJ;;https://scholar.google.com.au/citations?user=cj6wAgYAAAAJ", "orcid": ";;0000-0001-6588-1468;0000-0002-1037-1361;;", "linkedin": ";;;;;", "or_profile": "~Yu_Zhang60;~Yepeng_Liu1;~Duoqian_Miao1;~Qi_Zhang25;~Yiwei_Shi1;~Liang_Hu1", "aff": ";;Tongji University;Tongji University;;Tongji University", "aff_domain": ";;tongji.edu.cn;tongji.edu.cn;;tongji.edu.cn", "position": ";;Full Professor;Researcher;;Full Professor", "bibtex": "@inproceedings{\nzhang2023mgvit,\ntitle={{MG}-ViT: A Multi-Granularity Method for Compact and Efficient Vision Transformers},\nauthor={Yu Zhang and Yepeng Liu and Duoqian Miao and Qi Zhang and Yiwei Shi and Liang Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Bf6WFWNCUP}\n}", "github": "", "project": "", "reviewers": "V3HF;L7yG;cCy5;J3bo;qw7Z", "pdf_size": 17368851, "rating": "4;5;5;5;6", "confidence": "5;4;5;4;5", "soundness": "3;3;3;3;3", "novelty": "3;2;2;2;3", "presentation": "3;3;1;3;3", "wc_summary": "96;46;37;40;73", "wc_strengths": "38;40;25;55;35", "wc_weaknesses": "55;87;222;157;206", "wc_questions": "3;49;6;23;65", "wc_limitations": "3;1;10;3;1", "wc_review": "195;223;300;278;380", "wc_reply_reviewers": "77;245;50;115;560", "wc_reply_authors": "219;155;52;567;1165", "reply_reviewers": "2;1;1;3;3", "reply_authors": "3;2;2;4;5", "rating_avg": [ 5.0, 0.6324555320336759 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 58.4, 22.703303724348135 ], "wc_strengths_avg": [ 38.6, 9.687104830649869 ], "wc_weaknesses_avg": [ 145.4, 65.20306741250752 ], "wc_questions_avg": [ 29.2, 24.235511135521776 ], "wc_limitations_avg": [ 3.6, 3.32264954516723 ], "wc_review_avg": [ 275.2, 64.44036002382359 ], "wc_reply_reviewers_avg": [ 209.4, 187.63645701195705 ], "wc_reply_authors_avg": [ 431.6, 405.4654609211492 ], "reply_reviewers_avg": [ 2.0, 0.8944271909999159 ], "reply_authors_avg": [ 3.2, 1.16619037896906 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6085277116424733314&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";;tongji.edu.cn;tongji.edu.cn;;tongji.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tongji University", "aff_unique_dep": "", "aff_unique_url": "https://www.tongji.edu.cn", "aff_unique_abbr": "Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Zero-shot causal learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72471", "id": "BfQJrIiOZC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/15ddb1773510075ef44981cdb204330b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BfQJrIiOZC", "openreview": "https://openreview.net/forum?id=BfQJrIiOZC", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72471", "video": "https://nips.cc/virtual/2023/poster/72471", "author_site": "Hamed Nilforoshan, Michael Moor, Yusuf Roohani, Yining Chen, Anja \u0160urina, Michihiro Yasunaga, Sara Oblak, Jure Leskovec", "tldr": "", "abstract": "Predicting how different interventions will causally affect a specific individual is important in a variety of domains such as personalized medicine, public policy, and online marketing. There are a large number of methods to predict the effect of an existing intervention based on historical data from individuals who received it. \nHowever, in many settings it is important to predict the effects of novel interventions (e.g., a newly invented drug), which these methods do not address.\nHere, we consider zero-shot causal learning: predicting the personalized effects of a novel intervention. We propose CaML, a causal meta-learning framework which formulates the personalized prediction of each intervention's effect as a task. CaML trains a single meta-model across thousands of tasks, each constructed by sampling an intervention, its recipients, and its nonrecipients. By leveraging both intervention information (e.g., a drug's attributes) and individual features (e.g., a patient's history), CaML is able to predict the personalized effects of novel interventions that do not exist at the time of training. Experimental results on real world datasets in large-scale medical claims and cell-line perturbations demonstrate the effectiveness of our approach. Most strikingly, CaML's zero-shot predictions outperform even strong baselines trained directly on data from the test interventions.", "keywords": "causal inference;CATE;CATE estimation;causal machine learning;causal ML;heterogenous treatment effects;causality;potential outcomes;treatment effect", "primary_area": "", "supplementary_material": "/attachment/f485ab8b117b3957496b3201e42ecfe1a1f92365.zip", "author": "Hamed Nilforoshan;Michael Moor;Yusuf H Roohani;Yining Chen;Anja \u0160urina;Michihiro Yasunaga;Sara Oblak;Jure Leskovec", "authorids": "~Hamed_Nilforoshan1;~Michael_Moor1;~Yusuf_H_Roohani1;~Yining_Chen1;~Anja_\u0160urina1;~Michihiro_Yasunaga1;~Sara_Oblak1;~Jure_Leskovec1", "gender": "M;;;F;F;;F;", "homepage": "http://hamedn.com;;;;;;;http://cs.stanford.edu/~jure/", "dblp": "194/2918;;;;328/8656;202/1809;;l/JureLeskovec", "google_scholar": ";;;4a6iPeUAAAAJ;https://scholar.google.com/citations?hl=en;SieJYoEAAAAJ;;Q_kKkIUAAAAJ", "orcid": ";;;;;;;0000-0002-5411-923X", "linkedin": ";;;;anja-%C5%A1urina-423054173/;;sara-oblak-585a5a263/;leskovec/", "or_profile": "~Hamed_Nilforoshan1;~Michael_Moor1;~Yusuf_H_Roohani1;~Yining_Chen1;~Anja_\u0160urina1;~Michihiro_Yasunaga1;~Sara_Oblak1;~Jure_Leskovec1", "aff": "Computer Science Department, Stanford University;;;Stanford University;Institute of Neuroinformatics, University of Zurich;Stanford University;University of Ljubljana;Kumo.AI", "aff_domain": "cs.stanford.edu;;;stanford.edu;ini.uzh.ch;stanford.edu;uni-lj.si;kumo.ai", "position": "PhD student;;;PhD student;MS student;PhD student;MS student;Chief Scientist", "bibtex": "@inproceedings{\nnilforoshan2023zeroshot,\ntitle={Zero-shot causal learning},\nauthor={Hamed Nilforoshan and Michael Moor and Yusuf H Roohani and Yining Chen and Anja {\\v{S}}urina and Michihiro Yasunaga and Sara Oblak and Jure Leskovec},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BfQJrIiOZC}\n}", "github": "", "project": "", "reviewers": "fcYE;o8uy;qej5;uxCL", "pdf_size": 1573914, "rating": "7;7;7;7", "confidence": "4;3;4;3", "soundness": "3;3;3;2", "novelty": "2;3;3;3", "presentation": "1;3;2;3", "wc_summary": "71;28;138;36", "wc_strengths": "82;19;87;30", "wc_weaknesses": "95;64;109;96", "wc_questions": "55;13;3;28", "wc_limitations": "1;7;3;15", "wc_review": "304;131;340;205", "wc_reply_reviewers": "293;42;0;0", "wc_reply_authors": "1162;0;0;0", "reply_reviewers": "2;1;0;0", "reply_authors": "3;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 68.25, 43.395708313150045 ], "wc_strengths_avg": [ 54.5, 30.30264014900352 ], "wc_weaknesses_avg": [ 91.0, 16.537835408541227 ], "wc_questions_avg": [ 24.75, 19.60070151805797 ], "wc_limitations_avg": [ 6.5, 5.361902647381804 ], "wc_review_avg": [ 245.0, 82.31342539343142 ], "wc_reply_reviewers_avg": [ 83.75, 122.02125839377334 ], "wc_reply_authors_avg": [ 290.5, 503.16075959875883 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12907162807346560394&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": "cs.stanford.edu;;;stanford.edu;ini.uzh.ch;stanford.edu;uni-lj.si;kumo.ai", "author_num": 8, "aff_unique_index": "0;0;1;0;2;3", "aff_unique_norm": "Stanford University;University of Zurich;University of Ljubljana;Kumo.AI", "aff_unique_dep": "Computer Science Department;Institute of Neuroinformatics;;", "aff_unique_url": "https://www.stanford.edu;https://www.neuro.ethz.ch;https://www.uni-lj.si;https://www.kumo.ai", "aff_unique_abbr": "Stanford;UZH;UL;Kumo.AI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;1;0;2;0", "aff_country_unique": "United States;Switzerland;Slovenia" }, { "title": "AVIDa-hIL6: A Large-Scale VHH Dataset Produced from an Immunized Alpaca for Predicting Antigen-Antibody Interactions", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73664", "id": "BgY17iEnTb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8339dacd9df7ffe9623760f74169dd1e-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=BgY17iEnTb", "openreview": "https://openreview.net/forum?id=BgY17iEnTb", "poster": "/media/PosterPDFs/NeurIPS%202023/73664.png?t=1699338446.0325513", "slides": "https://nips.cc/virtual/2023/poster/73664", "video": "https://nips.cc/virtual/2023/poster/73664", "author_site": "Hirofumi Tsuruta, Hiroyuki Yamazaki, Ryota Maeda, Ryotaro Tamura, Jennifer Wei, Zelda Mariet, Poomarin Phloyphisut, Hidetoshi Shimokawa, Joseph R. Ledsam, Lucy Colwell, Akihiro Imura", "tldr": "", "abstract": "Antibodies have become an important class of therapeutic agents to treat human diseases.\nTo accelerate therapeutic antibody discovery, computational methods, especially machine learning, have attracted considerable interest for predicting specific interactions between antibody candidates and target antigens such as viruses and bacteria.\nHowever, the publicly available datasets in existing works have notable limitations, such as small sizes and the lack of non-binding samples and exact amino acid sequences.\nTo overcome these limitations, we have developed AVIDa-hIL6, a large-scale dataset for predicting antigen-antibody interactions in the variable domain of heavy chain of heavy chain antibodies (VHHs), produced from an alpaca immunized with the human interleukin-6 (IL-6) protein, as antigens.\nBy leveraging the simple structure of VHHs, which facilitates identification of full-length amino acid sequences by DNA sequencing technology, AVIDa-hIL6 contains 573,891 antigen-VHH pairs with amino acid sequences.\nAll the antigen-VHH pairs have reliable labels for binding or non-binding, as generated by a novel labeling method.\nFurthermore, via introduction of artificial mutations, AVIDa-hIL6 contains 30 different mutants in addition to wild-type IL-6 protein.\nThis characteristic provides opportunities to develop machine learning models for predicting changes in antibody binding by antigen mutations.\nWe report experimental benchmark results on AVIDa-hIL6 by using machine learning models.\nThe results indicate that the existing models have potential, but further research is needed to generalize them to predict effective antibodies against unknown mutants.\nThe dataset is available at https://avida-hil6.cognanous.com.", "keywords": "antibody;antigen-antibody interaction;drug discovery;VHH;immunization;machine learning", "primary_area": "", "supplementary_material": "/attachment/e207b912adc6b0114044963f283d1abb03fd199f.pdf", "author": "Hirofumi Tsuruta;Hiroyuki Yamazaki;Ryota Maeda;Ryotaro Tamura;Jennifer N. Wei;Zelda E Mariet;Poomarin Phloyphisut;Hidetoshi Shimokawa;Joseph R. Ledsam;Lucy J Colwell;Akihiro Imura", "authorids": "~Hirofumi_Tsuruta1;~Hiroyuki_Yamazaki1;~Ryota_Maeda1;~Ryotaro_Tamura1;~Jennifer_N._Wei1;~Zelda_E_Mariet1;~Poomarin_Phloyphisut1;~Hidetoshi_Shimokawa1;~Joseph_R._Ledsam1;~Lucy_J_Colwell1;~Akihiro_Imura1", "gender": "M;M;;M;F;M;M;M;;M;F", "homepage": ";https://cognanous.com/;https://www.cognano.co.jp;;https://zelda.lids.mit.edu/;;;;https://research.google/people/106569/;https://cognanous.com/?lang=en;", "dblp": ";;;;164/7319;;;222/1593;13/10359;;251/5551", "google_scholar": "https://scholar.google.co.jp/citations?user=t8xmSZcAAAAJ;;;;twuEPEEAAAAJ;;;rnDYOcQAAAAJ;GVc-U0IAAAAJ;;H9ay0NkAAAAJ", "orcid": ";0000-0002-7690-5522;0000-0002-1514-1071;;;;;0000-0001-9917-7196;0000-0003-3148-0337;;0000-0003-3567-9511", "linkedin": ";yamazaki-hiroyuki-481622117?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BCPrSP%2BKcT5mZi03e%2BDuwGg%3D%3D;;ryotaro-tamura-364082117/;;poomarinph/;hidetoshi-shimokawa-9b94209/;;;akihiro-imura-127599242/;", "or_profile": "~Hirofumi_Tsuruta1;~Hiroyuki_Yamazaki1;~Ryota_Maeda1;~Ryotaro_Tamura1;~Zelda_E_Mariet1;~Poomarin_Phloyphisut1;~Hidetoshi_Shimokawa1;~Joseph_R._Ledsam1;~Lucy_J_Colwell1;~Akihiro_Imura1;~Jennifer_Wei1", "aff": "SAKURA internet Inc.;Shizuoka City Shizuoka Hospital;COGNANO Inc.;SAKURA internet Inc.;Google;Google;Google;Google;University of Cambridge;biorhodes, Inc.;Google Deepmind", "aff_domain": "sakura.ad.jp;shizuokahospital.jp;cognano.co.jp;sakura.ad.jp;google.com;google.com;google.com;google.com;cam.ac.uk;biorhodes.co.jp;google.com", "position": "Researcher;MD;Researcher;R&D Engineer;Research Scientist;Researcher;Researcher;Researcher;Associate Professor;Principal Researcher;Researcher", "bibtex": "@inproceedings{\ntsuruta2023avidahil,\ntitle={{AVID}a-h{IL}6: A Large-Scale {VHH} Dataset Produced from an Immunized Alpaca for Predicting Antigen-Antibody Interactions},\nauthor={Hirofumi Tsuruta and Hiroyuki Yamazaki and Ryota Maeda and Ryotaro Tamura and Jennifer N. Wei and Zelda E Mariet and Poomarin Phloyphisut and Hidetoshi Shimokawa and Joseph R. Ledsam and Lucy J Colwell and Akihiro Imura},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=BgY17iEnTb}\n}", "github": "", "project": "", "reviewers": "CS39;Bn86;E2ji", "pdf_size": 8543930, "rating": "6;7;7", "confidence": "3;2;3", "wc_summary_and_contributions": "121;39;99", "wc_strengths": "91;17;155", "wc_improvement": "201;40;32", "wc_limitations": "359;1;32", "wc_correctness": "170;1;12", "wc_clarity": "205;1;8", "wc_relation_to_prior_work": "114;1;35", "wc_documentation": "83;1;10", "wc_additional_feedback": "1;1;1", "wc_review": "1345;102;384", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "3136;180;287", "reply_reviewers": "0;0;0", "reply_authors": "5;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 86.33333333333333, 34.653843782312066 ], "wc_strengths_avg": [ 87.66666666666667, 56.387547876774676 ], "wc_improvement_avg": [ 91.0, 77.85028366465126 ], "wc_limitations_avg": [ 130.66666666666666, 161.9512958337235 ], "wc_correctness_avg": [ 61.0, 77.20535387307454 ], "wc_clarity_avg": [ 71.33333333333333, 94.55979883415338 ], "wc_relation_to_prior_work_avg": [ 50.0, 47.33568069296845 ], "wc_documentation_avg": [ 31.333333333333332, 36.718145680606234 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 610.3333333333334, 532.0916796526286 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1201.0, 1368.948745083857 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1286832441342489651&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "sakura.ad.jp;shizuokahospital.jp;cognano.co.jp;sakura.ad.jp;google.com;google.com;google.com;google.com;cam.ac.uk;biorhodes.co.jp;google.com", "author_num": 11, "aff_unique_index": "0;1;2;0;3;3;3;3;4;5;6", "aff_unique_norm": "SAKURA internet Inc.;Shizuoka City Shizuoka Hospital;COGNANO Inc.;Google;University of Cambridge;biorhodes, Inc.;DeepMind", "aff_unique_dep": ";;;Google;;;DeepMind", "aff_unique_url": "https://www.sakura.ne.jp;;;https://www.google.com;https://www.cam.ac.uk;;https://deepmind.com", "aff_unique_abbr": ";;;Google;Cambridge;;DeepMind", "aff_campus_unique_index": "1;1;1;1;2", "aff_campus_unique": ";Mountain View;Cambridge", "aff_country_unique_index": "0;0;1;0;1;1;1;1;2;1;2", "aff_country_unique": "Japan;United States;United Kingdom" }, { "title": "Participatory Personalization in Classification", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72470", "id": "Bj1QSgiBPP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2dbb8bfe4cd3875609b23799830ee865-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Bj1QSgiBPP", "openreview": "https://openreview.net/forum?id=Bj1QSgiBPP", "poster": "/media/PosterPDFs/NeurIPS%202023/72470.png?t=1702355980.205178", "slides": "https://nips.cc/virtual/2023/poster/72470", "video": "https://nips.cc/virtual/2023/poster/72470", "author_site": "Hailey Joren, Chirag Nagpal, Katherine Heller, Berk Ustun", "tldr": "", "abstract": "Machine learning models are often personalized based on information that is protected, sensitive, self-reported, or costly to acquire. These models use information about people, but do not facilitate nor inform their *consent*. Individuals cannot opt out of reporting information that a model needs to personalize their predictions nor tell if they benefit from personalization in the first place. We introduce a new family of prediction models, called participatory systems, that let individuals opt into personalization at prediction time. We present a model-agnostic algorithm to learn participatory systems for supervised learning tasks where models are personalized with categorical group attributes. We conduct a comprehensive empirical study of participatory systems in clinical prediction tasks, comparing them to common approaches for personalization and imputation. Our results show that participatory systems can facilitate and inform consent in a way that improves performance and privacy across all groups who report personal data.", "keywords": "healthcare;algorithmic fairness;data privacy;classification;interpretability", "primary_area": "", "supplementary_material": "/attachment/f55c04c41606df51ffaa9c28fb3c48bd78f60168.pdf", "author": "Hailey Joren;Chirag Nagpal;Katherine A Heller;Berk Ustun", "authorids": "~Hailey_Joren1;~Chirag_Nagpal1;~Katherine_A_Heller1;~Berk_Ustun1", "gender": ";;F;M", "homepage": ";http://cs.cmu.edu/~chiragn;;http://www.berkustun.com", "dblp": ";149/2771;32/4403;138/5585", "google_scholar": ";rAbWdAkAAAAJ;;6z_XWYcAAAAJ", "orcid": ";;;0000-0001-5188-3155", "linkedin": ";;;berkustun/", "or_profile": "~Hailey_Joren1;~Chirag_Nagpal1;~Katherine_A_Heller1;~Berk_Ustun1", "aff": ";;Google;School of Engineering and Applied Sciences, Harvard University", "aff_domain": ";;google.com;seas.harvard.edu", "position": ";;Researcher;Associate ", "bibtex": "@inproceedings{\njames2023participatory,\ntitle={Participatory Personalization in Classification},\nauthor={Hailey Joren and Chirag Nagpal and Katherine A Heller and Berk Ustun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Bj1QSgiBPP}\n}", "github": "", "project": "", "reviewers": "tDFi;ekFJ;SNn4;LFwB", "pdf_size": 1059989, "rating": "3;6;7;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;4;3", "presentation": "2;3;4;3", "wc_summary": "122;59;224;56", "wc_strengths": "73;42;185;71", "wc_weaknesses": "130;13;271;277", "wc_questions": "88;58;315;2", "wc_limitations": "18;16;50;2", "wc_review": "431;188;1045;408", "wc_reply_reviewers": "0;14;51;23", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 115.25, 68.09322653539043 ], "wc_strengths_avg": [ 92.75, 54.655169014467425 ], "wc_weaknesses_avg": [ 172.75, 109.39464109361116 ], "wc_questions_avg": [ 115.75, 119.10578281510936 ], "wc_limitations_avg": [ 21.5, 17.57128339080558 ], "wc_review_avg": [ 518.0, 318.70754619243013 ], "wc_reply_reviewers_avg": [ 22.0, 18.641351882307248 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.44022545316281186, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6680921486429884908&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": ";;google.com;seas.harvard.edu", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Google;Harvard University", "aff_unique_dep": "Google;School of Engineering and Applied Sciences", "aff_unique_url": "https://www.google.com;https://www.harvard.edu", "aff_unique_abbr": "Google;Harvard", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Mountain View;Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A normative theory of social conflict", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72469", "id": "BkQM8huiIc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8ec61d4084443d29c9e47ac60f9aea31-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BkQM8huiIc", "openreview": "https://openreview.net/forum?id=BkQM8huiIc", "poster": "/media/PosterPDFs/NeurIPS%202023/72469.png?t=1701469630.1965966", "slides": "https://nips.cc/virtual/2023/poster/72469", "video": "https://nips.cc/virtual/2023/poster/72469", "author_site": "Sergey Shuvaev, Evgeny Amelchenko, Dmitry Smagin, Natalia Kudryavtseva, Grigori Enikolopov, Alex Koulakov", "tldr": "", "abstract": "Social conflict is a survival mechanism yielding both normal and pathological behaviors. To understand its underlying principles, we collected behavioral and whole-brain neural data from mice advancing through stages of social conflict. We modeled the animals\u2019 interactions as a normal-form game using Bayesian inference to account for the partial observability of animals\u2019 strengths. We find that our behavioral and neural data are consistent with the first-level Theory of Mind (1-ToM) model where mice form \u201cprimary\u201d beliefs about the strengths of all mice involved and \u201csecondary\u201d beliefs that estimate the beliefs of their opponents. Our model identifies the brain regions that carry the information about these beliefs and offers a framework for studies of social behaviors in partially observable settings.", "keywords": "neuroscience;decision-making;normative modeling;game theory;Bayesian methods;POMDP;inverse rational control;belief;theory of mind", "primary_area": "", "supplementary_material": "", "author": "Sergey A. Shuvaev;Evgeny M Amelchenko;Dmitry Smagin;Natalia Kudryavtseva;Grigori Enikolopov;Alexei A. Koulakov", "authorids": "~Sergey_A._Shuvaev1;~Evgeny_M_Amelchenko1;smagin@ngs.ru;n.n.kudryavtseva@gmail.com;~Grigori_Enikolopov1;~Alexei_A._Koulakov1", "gender": ";;;;M;M", "homepage": ";;;;;", "dblp": ";;;;;25/232", "google_scholar": ";;;;;", "orcid": ";0000-0002-4363-8747;;;0000-0001-8178-8917;", "linkedin": ";;;;;", "or_profile": "~Sergey_A._Shuvaev1;~Evgeny_M_Amelchenko1;smagin@ngs.ru;n.n.kudryavtseva@gmail.com;~Grigori_Enikolopov1;~Alexei_A._Koulakov1", "aff": ";State University of New York at Stony Brook;;;State University of New York at Stony Brook;Cold Spring Harbor Laboratory", "aff_domain": ";stonybrook.edu;;;stonybrook.edu;cshl.edu", "position": ";Researcher;;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nshuvaev2023a,\ntitle={A normative theory of social conflict},\nauthor={Sergey A. Shuvaev and Evgeny M Amelchenko and Dmitry Smagin and Natalia Kudryavtseva and Grigori Enikolopov and Alexei A. Koulakov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BkQM8huiIc}\n}", "github": "", "project": "", "reviewers": "uQtV;RYok;UypE;QTqc;hLmb", "pdf_size": 1737348, "rating": "4;5;5;6;6", "confidence": "3;2;2;1;4", "soundness": "2;3;2;3;3", "novelty": "2;2;2;2;3", "presentation": "3;3;2;2;3", "wc_summary": "109;62;42;84;37", "wc_strengths": "40;44;29;103;51", "wc_weaknesses": "164;76;489;229;142", "wc_questions": "69;2;2;134;152", "wc_limitations": "64;2;52;10;1", "wc_review": "446;186;614;560;383", "wc_reply_reviewers": "45;35;15;333;72", "wc_reply_authors": "67;13;64;32;269", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;3;2;3", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 2.4, 1.019803902718557 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 66.8, 26.843248685656512 ], "wc_strengths_avg": [ 53.4, 25.80387567788994 ], "wc_weaknesses_avg": [ 220.0, 143.1069530106766 ], "wc_questions_avg": [ 71.8, 63.328982306681674 ], "wc_limitations_avg": [ 25.8, 26.746214685446613 ], "wc_review_avg": [ 437.8, 149.96853003213707 ], "wc_reply_reviewers_avg": [ 100.0, 117.93896726697245 ], "wc_reply_authors_avg": [ 89.0, 92.23231537807125 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.1048284836721918, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14510944370970228653&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";stonybrook.edu;;;stonybrook.edu;cshl.edu", "author_num": 6, "aff_unique_index": "0;0;1", "aff_unique_norm": "State University of New York at Stony Brook;Cold Spring Harbor Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.stonybrook.edu;https://www.cshl.edu", "aff_unique_abbr": "SUNY Stony Brook;CSHL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stony Brook;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "How many samples are needed to leverage smoothness?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72468", "id": "BklIgOO76D", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/54dcf25318f9de5a7a01f0a4125c541e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BklIgOO76D", "openreview": "https://openreview.net/forum?id=BklIgOO76D", "poster": "/media/PosterPDFs/NeurIPS%202023/72468.png?t=1702386300.923645", "slides": "https://nips.cc/virtual/2023/poster/72468", "video": "https://nips.cc/virtual/2023/poster/72468", "author_site": "Vivien Cabannes, Stefano Vigogna", "tldr": "", "abstract": "A core principle in statistical learning is that smoothness of target functions allows to break the curse of dimensionality. However, learning a smooth function seems to require enough samples close to one another to get meaningful estimate of high-order derivatives, which would be hard in machine learning problems where the ratio between number of data and input dimension is relatively small. By deriving new lower bounds on the generalization error, this paper formalizes such an intuition, before investigating the role of constants and transitory regimes which are usually not depicted beyond classical learning theory statements while they play a dominant role in practice.", "keywords": "Statistical learning;breaking the curse of dimensionality;smoothness priors;kernel methods", "primary_area": "", "supplementary_material": "/attachment/99ca2b70a9a82d5683497c412838084097754a83.zip", "author": "Vivien Cabannes;Stefano Vigogna", "authorids": "~Vivien_Cabannes1;~Stefano_Vigogna1", "gender": "Not Specified;", "homepage": "https://viviencabannes.github.io/;https://www.mat.uniroma2.it/~vigogna/", "dblp": ";188/7395", "google_scholar": ";ndTj8zEAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Vivien_Cabannes1;~Stefano_Vigogna1", "aff": "META;Universit\u00e0 degli Studi di Genova", "aff_domain": "meta.com;unige.it", "position": "Postdoc;Postdoc", "bibtex": "@inproceedings{\ncabannes2023how,\ntitle={How many samples are needed to leverage smoothness?},\nauthor={Vivien Cabannes and Stefano Vigogna},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BklIgOO76D}\n}", "github": "", "project": "", "reviewers": "z9Dq;n2Cg;7mBD;yXo1;jRbT", "pdf_size": 0, "rating": "5;6;6;6;7", "confidence": "3;3;3;3;3", "soundness": "3;2;4;3;4", "novelty": "3;2;2;3;3", "presentation": "1;1;4;3;2", "wc_summary": "66;80;61;372;86", "wc_strengths": "178;38;32;300;109", "wc_weaknesses": "255;198;162;450;391", "wc_questions": "27;112;2;417;48", "wc_limitations": "1;57;21;1;33", "wc_review": "527;485;278;1540;667", "wc_reply_reviewers": "0;764;56;151;46", "wc_reply_authors": "0;883;0;0;0", "reply_reviewers": "0;3;1;1;1", "reply_authors": "1;3;1;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.2, 1.16619037896906 ], "wc_summary_avg": [ 133.0, 119.84323093107929 ], "wc_strengths_avg": [ 131.4, 99.70275823667068 ], "wc_weaknesses_avg": [ 291.2, 111.2356058103699 ], "wc_questions_avg": [ 121.2, 152.33043031515405 ], "wc_limitations_avg": [ 22.6, 21.10544953323667 ], "wc_review_avg": [ 699.4, 438.38914220130954 ], "wc_reply_reviewers_avg": [ 203.4, 284.5667584240999 ], "wc_reply_authors_avg": [ 176.6, 353.20000000000005 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12669258436940585535&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "meta.com;unige.it", "author_num": 2, "aff_unique_index": "1", "aff_unique_norm": ";Universit\u00e0 degli Studi di Genova", "aff_unique_dep": ";", "aff_unique_url": ";https://www.unige.it", "aff_unique_abbr": ";UniGe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";Italy" }, { "title": "Learning to Discover Skills through Guidance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72467", "id": "Bkrmr9LjeI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/59d4e18a60490b9ed9913f3be2b14839-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Bkrmr9LjeI", "openreview": "https://openreview.net/forum?id=Bkrmr9LjeI", "poster": "/media/PosterPDFs/NeurIPS%202023/72467.png?t=1702267456.635155", "slides": "https://nips.cc/virtual/2023/poster/72467", "video": "https://nips.cc/virtual/2023/poster/72467", "author_site": "HYUNSEUNG KIM, BYUNG KUN LEE, Hojoon Lee, Dongyoon Hwang, Sejik Park, Kyushik Min, Jaegul Choo", "tldr": "", "abstract": "In the field of unsupervised skill discovery (USD), a major challenge is limited exploration, primarily due to substantial penalties when skills deviate from their initial trajectories. To enhance exploration, recent methodologies employ auxiliary rewards to maximize the epistemic uncertainty or entropy of states. However, we have identified that the effectiveness of these rewards declines as the environmental complexity rises. Therefore, we present a novel USD algorithm, skill **disco**very with gui**dance** (**DISCO-DANCE**), which (1) selects the guide skill that possesses the highest potential to reach unexplored states, (2) guides other skills to follow guide skill, then (3) the guided skills are dispersed to maximize their discriminability in unexplored states. Empirical evaluation demonstrates that DISCO-DANCE outperforms other USD baselines in challenging environments, including two navigation benchmarks and a continuous control benchmark. Qualitative visualizations and code of DISCO-DANCE are available at https://mynsng.github.io/discodance/.", "keywords": "Unsupervised skill discovery;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Hyunseung Kim;Byungkun Lee;Hojoon Lee;Dongyoon Hwang;Sejik Park;Kyushik Min;Jaegul Choo", "authorids": "~Hyunseung_Kim1;~Byungkun_Lee1;~Hojoon_Lee1;~Dongyoon_Hwang1;~Sejik_Park1;~Kyushik_Min1;~Jaegul_Choo1", "gender": "M;M;M;M;;M;M", "homepage": ";;https://joonleesky.github.io/;;;https://github.com/Kyushik;https://sites.google.com/site/jaegulchoo/", "dblp": "244/0949;;;;254/3093;228/4620;07/2074", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;RFjZjzkAAAAJ;;https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.co.kr/citations?user=dz8VK3IAAAAJ;GHJYsLEAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;ks-min/;", "or_profile": "~Hyunseung_Kim1;~Byungkun_Lee1;~Hojoon_Lee1;~Dongyoon_Hwang1;~Sejik_Park1;~Kyushik_Min1;~Jaegul_Choo1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Kakao;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kakaocorp.com;kaist.ac.kr", "position": "PhD student;PhD student;PhD student;PhD student;MS student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nkim2023learning,\ntitle={Learning to Discover Skills through Guidance},\nauthor={Hyunseung Kim and Byungkun Lee and Hojoon Lee and Dongyoon Hwang and Sejik Park and Kyushik Min and Jaegul Choo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Bkrmr9LjeI}\n}", "github": "", "project": "", "reviewers": "gakY;b566;oWju;EWQN;adLT", "pdf_size": 16754737, "rating": "3;4;6;6;7", "confidence": "5;4;4;3;4", "soundness": "3;2;3;3;3", "novelty": "1;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "136;83;88;83;149", "wc_strengths": "81;72;74;33;85", "wc_weaknesses": "39;176;65;66;27", "wc_questions": "30;17;200;112;139", "wc_limitations": "35;35;43;1;10", "wc_review": "321;383;470;295;410", "wc_reply_reviewers": "0;0;0;25;17", "wc_reply_authors": "594;59;59;82;78", "reply_reviewers": "0;0;0;1;1", "reply_authors": "4;2;2;3;3", "rating_avg": [ 5.2, 1.469693845669907 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 107.8, 28.68727941091661 ], "wc_strengths_avg": [ 69.0, 18.601075237738275 ], "wc_weaknesses_avg": [ 74.6, 52.879485625334894 ], "wc_questions_avg": [ 99.6, 68.488247166941 ], "wc_limitations_avg": [ 24.8, 16.277591959500644 ], "wc_review_avg": [ 375.8, 62.652693477615145 ], "wc_reply_reviewers_avg": [ 8.4, 10.594338110519223 ], "wc_reply_authors_avg": [ 174.4, 210.0139043015962 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.6454972243679027, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13335144848436179796&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kakaocorp.com;kaist.ac.kr", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Kakao Corp.", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.kakao.com", "aff_unique_abbr": "KAIST;Kakao", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Koopman Kernel Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72466", "id": "BmIW6U0rz8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/34678d08b36076de986df95c5bbba92f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BmIW6U0rz8", "openreview": "https://openreview.net/forum?id=BmIW6U0rz8", "poster": "/media/PosterPDFs/NeurIPS%202023/72466.png?t=1699603885.380837", "slides": "https://nips.cc/virtual/2023/poster/72466", "video": "https://nips.cc/virtual/2023/poster/72466", "author_site": "Petar Bevanda, Max Beier, Armin Lederer, Stefan Sosnowski, Eyke H\u00fcllermeier, Sandra Hirche", "tldr": "", "abstract": "Many machine learning approaches for decision making, such as reinforcement learning, rely on simulators or predictive models to forecast the time-evolution of quantities of interest, e.g., the state of an agent or the reward of a policy. Forecasts of such complex phenomena are commonly described by highly nonlinear dynamical systems, making their use in optimization-based decision-making challenging.\nKoopman operator theory offers a beneficial paradigm for addressing this problem by characterizing forecasts via linear time-invariant (LTI) ODEs, turning multi-step forecasts into sparse matrix multiplication.\nThough there exists a variety of learning approaches, they usually lack crucial learning-theoretic guarantees, making the behavior of the obtained models with increasing data and dimensionality unclear.\nWe address the aforementioned by deriving a universal Koopman-invariant reproducing kernel Hilbert space (RKHS) that solely spans transformations into LTI dynamical systems. The resulting Koopman Kernel Regression (KKR) framework enables the use of statistical learning tools from function approximation for novel convergence results and generalization error bounds under weaker assumptions than existing work. Our experiments demonstrate superior forecasting performance compared to Koopman operator and sequential data predictors in RKHS.", "keywords": "Kernel Methods;Regression;Statistical Learning Theory;Koopman Operator;Mode Decomposition;Dynamical Systems;Supervised Learning", "primary_area": "", "supplementary_material": "/attachment/59087b6b52447450b972c7e8d2229c4dc6b8f77d.zip", "author": "Petar Bevanda;Max Beier;Armin Lederer;Stefan Georg Sosnowski;Eyke H\u00fcllermeier;Sandra Hirche", "authorids": "~Petar_Bevanda1;~Max_Beier1;~Armin_Lederer1;~Stefan_Georg_Sosnowski1;~Eyke_H\u00fcllermeier1;~Sandra_Hirche1", "gender": "M;M;;;M;F", "homepage": "https://www.ce.cit.tum.de/itr/bevanda/;;;;https://cs.uni-paderborn.de/index.php?id=60202;http://www.itr.ei.tum.de", "dblp": "284/8547;308/6359;202/5716;;h/EykeHullermeier;89/6985", "google_scholar": "Hne4SYQAAAAJ;https://scholar.google.de/citations?user=iZppzEYAAAAJ;6yB84RUAAAAJ;;https://scholar.google.de/citations?user=usVJeNN3xFAC;", "orcid": "0000-0001-8205-3322;0000-0001-7772-2819;0000-0001-6263-5608;;0000-0002-9944-4108;", "linkedin": "https://linkedin.com/in/petar-bevanda-5a4498120;;;;;", "or_profile": "~Petar_Bevanda1;~Max_Beier1;~Armin_Lederer1;~Stefan_Georg_Sosnowski1;~Eyke_H\u00fcllermeier1;~Sandra_Hirche1", "aff": "Technische Universit\u00e4t M\u00fcnchen, Chair of Information-oriented Control;Technische Universit\u00e4t M\u00fcnchen;Technical University Munich;;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Technical University Munich", "aff_domain": "tum.de;tum.de;tum.de;;lmu.de;tum.de", "position": "PhD student;MS student;PhD student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nbevanda2023koopman,\ntitle={Koopman Kernel Regression},\nauthor={Petar Bevanda and Max Beier and Armin Lederer and Stefan Georg Sosnowski and Eyke H{\\\"u}llermeier and Sandra Hirche},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BmIW6U0rz8}\n}", "github": "", "project": "", "reviewers": "oAtn;GSvu;xMDr;aayt", "pdf_size": 466719, "rating": "4;5;6;7", "confidence": "3;4;3;3", "soundness": "2;2;3;4", "novelty": "2;3;3;3", "presentation": "2;2;2;4", "wc_summary": "40;47;81;111", "wc_strengths": "34;22;65;112", "wc_weaknesses": "52;154;111;145", "wc_questions": "269;435;152;18", "wc_limitations": "60;8;7;4", "wc_review": "455;666;416;390", "wc_reply_reviewers": "24;203;13;7", "wc_reply_authors": "0;736;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 69.75, 28.41984341969533 ], "wc_strengths_avg": [ 58.25, 34.77337343428158 ], "wc_weaknesses_avg": [ 115.5, 40.0156219494337 ], "wc_questions_avg": [ 218.5, 153.3337862312152 ], "wc_limitations_avg": [ 19.75, 23.284920012746447 ], "wc_review_avg": [ 481.75, 108.86315951689075 ], "wc_reply_reviewers_avg": [ 61.75, 81.77828256939614 ], "wc_reply_authors_avg": [ 184.0, 318.69734859267345 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16987466684556974430&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 9, "email": "tum.de;tum.de;tum.de;;lmu.de;tum.de", "author_num": 6, "aff_unique_index": "0;0;1;2;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_unique_dep": "Chair of Information-oriented Control;;", "aff_unique_url": "https://www.tum.de;https://www.tum.de;https://www.lmu.de", "aff_unique_abbr": "TUM;TUM;LMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Noise-Adaptive Thompson Sampling for Linear Contextual Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72465", "id": "BnV2M2WFaY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4a6824f8f137e78f18e73d9cfc1d22ed-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BnV2M2WFaY", "openreview": "https://openreview.net/forum?id=BnV2M2WFaY", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72465", "video": "https://nips.cc/virtual/2023/poster/72465", "author_site": "Ruitu Xu, Yifei Min, Tianhao Wang", "tldr": "", "abstract": "Linear contextual bandits represent a fundamental class of models with numerous real-world applications, and it is critical to develop algorithms that can effectively manage noise with unknown variance, ensuring provable guarantees for both worst-case constant-variance noise and deterministic reward scenarios. In this paper, we study linear contextual bandits with heteroscedastic noise and propose the first noise-adaptive Thompson sampling-style algorithm that achieves a variance-dependent regret upper bound of $\\widetilde O\\Big(d^{3/2} + d^{3/2} \\sqrt{\\sum_{t=1}^T \\sigma_t^2}\\Big)$, where $d$ is the dimension of the context vectors and $\\sigma_t^2$ is the variance of the reward in round $t$. This recovers the existing $\\widetilde O(d^{3/2}\\sqrt{T})$ regret guarantee in the constant-variance regime and further improves to $\\widetilde O(d^{3/2})$ in the deterministic regime, thus achieving a smooth interpolation in between. Our approach utilizes a stratified sampling procedure to overcome the too-conservative optimism in the linear Thompson sampling algorithm for linear contextual bandits.", "keywords": "Linear Contextual Bandit;Thompson Sampling;Noise-Adaptive", "primary_area": "", "supplementary_material": "/attachment/c93abac9cd08afe22c81fabd6f8d7b7ed7c0ad97.pdf", "author": "Ruitu Xu;Yifei Min;Tianhao Wang", "authorids": "~Ruitu_Xu1;~Yifei_Min1;~Tianhao_Wang1", "gender": "M;;M", "homepage": ";;https://tianhaowang.ttic.edu", "dblp": "211/7813;;145/3288-2", "google_scholar": "-MccX84AAAAJ;;m45LD1kAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ruitu_Xu1;~Yifei_Min1;~Tianhao_Wang1", "aff": "Yale University;;Yale University", "aff_domain": "yale.edu;;yale.edu", "position": "PhD student;;PhD student", "bibtex": "@inproceedings{\nxu2023noiseadaptive,\ntitle={Noise-Adaptive Thompson Sampling for Linear Contextual Bandits},\nauthor={Ruitu Xu and Yifei Min and Tianhao Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BnV2M2WFaY}\n}", "github": "", "project": "", "reviewers": "xGkR;AK5v;oXdZ;Fr5q", "pdf_size": 669283, "rating": "3;7;7;7", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;4;4;4", "wc_summary": "78;398;171;74", "wc_strengths": "37;308;150;80", "wc_weaknesses": "192;172;35;116", "wc_questions": "39;149;93;158", "wc_limitations": "13;2;6;1", "wc_review": "359;1029;455;429", "wc_reply_reviewers": "0;54;101;18", "wc_reply_authors": "77;40;54;51", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.7320508075688772 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 180.25, 131.57198600006006 ], "wc_strengths_avg": [ 143.75, 103.049442016927 ], "wc_weaknesses_avg": [ 128.75, 60.87435831283973 ], "wc_questions_avg": [ 109.75, 47.840228887412316 ], "wc_limitations_avg": [ 5.5, 4.716990566028302 ], "wc_review_avg": [ 568.0, 268.4641503068892 ], "wc_reply_reviewers_avg": [ 43.25, 38.596470045847454 ], "wc_reply_authors_avg": [ 55.5, 13.46291201783626 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17050722003619276394&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "yale.edu;;yale.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Computationally Efficient Sparsified Online Newton Method", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72464", "id": "BopG5dhH7L", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0b43289db08ed60edc6451cb2132e203-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BopG5dhH7L", "openreview": "https://openreview.net/forum?id=BopG5dhH7L", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72464", "video": "https://nips.cc/virtual/2023/poster/72464", "author_site": "Fnu Devvrit, Sai Surya Duvvuri, Rohan Anil, Vineet Gupta, Cho-Jui Hsieh, Inderjit Dhillon", "tldr": "", "abstract": "Second-order methods hold significant promise for enhancing the convergence of deep neural network training; however, their large memory and computational demands have limited their practicality. Thus there is a need for scalable second-order methods that can efficiently train large models. In this paper, we introduce the Sparsified Online Newton~(SONew) method, a memory-efficient second-order algorithm that yields a sparsified yet effective preconditioner. The algorithm emerges from a novel use of the LogDet matrix divergence measure; we combine it with sparsity constraints to minimize regret in the online convex optimization framework. Empirically, we test our method on large scale benchmarks of up to 1B parameters. We achieve up to $30\\\\%$ faster convergence, $3.4\\\\%$ relative improvement in validation performance, and $80\\\\%$ relative improvement in training loss, in comparison to memory efficient optimizers including first order methods. Powering the method is a surprising fact -- imposing structured sparsity patterns, like tridiagonal and banded structure, requires little to no overhead, making it as efficient and parallelizable as first-order methods. In wall-clock time, tridiagonal SONew is only about $3\\\\%$ slower per step than first-order methods but gives overall gains due to much faster convergence. In contrast, one of the state-of-the-art (SOTA) memory-intensive second-order methods, Shampoo, is unable to scale to large benchmarks. Additionally, while Shampoo necessitates significant engineering efforts to scale to large benchmarks, SONew offers a more straightforward implementation, increasing its practical appeal. SONew code is available at: https://github.com/devvrit/SONew", "keywords": "Optimization;Second order methods; Deep Learning", "primary_area": "", "supplementary_material": "/attachment/fa10f73f3c98a1be670a048a0b3a047289c8a5d6.pdf", "author": "Fnu Devvrit;Sai Surya Duvvuri;Rohan Anil;Vineet Gupta;Cho-Jui Hsieh;Inderjit S Dhillon", "authorids": "~Fnu_Devvrit1;~Sai_Surya_Duvvuri1;~Rohan_Anil1;~Vineet_Gupta1;~Cho-Jui_Hsieh1;~Inderjit_S_Dhillon1", "gender": "M;M;M;M;M;M", "homepage": ";;;;http://web.cs.ucla.edu/~chohsieh/index.html;http://www.cs.utexas.edu/users/inderjit/", "dblp": ";277/6122;182/1833;g/VineetGupta;14/2770;d/InderjitSDhillon", "google_scholar": "c86HtPoAAAAJ;UL3980gAAAAJ;;u-V83_EAAAAJ;Wy89g4IAAAAJ;xBv5ZfkAAAAJ", "orcid": ";;;;;", "linkedin": "devvrit/;sai-surya-duvvuri-79903511b/;;;;inderjit-dhillon-a20888b0/", "or_profile": "~Fnu_Devvrit1;~Sai_Surya_Duvvuri1;~Rohan_Anil1;~Vineet_Gupta1;~Cho-Jui_Hsieh1;~Inderjit_S_Dhillon1", "aff": ", University of Texas at Austin;University of Texas at Austin;Google Brain ;Google;Amazon;University of Texas, Austin", "aff_domain": "cs.utexas.edu;cs.utexas.edu;google.com;google.com;amazon.com;utexas.edu", "position": "PhD student;PhD student;Principal Engineer;Software Engineer;visiting scholar;Full Professor", "bibtex": "@inproceedings{\ndevvrit2023a,\ntitle={A Computationally Efficient Sparsified Online Newton Method},\nauthor={Fnu Devvrit and Sai Surya Duvvuri and Rohan Anil and Vineet Gupta and Cho-Jui Hsieh and Inderjit S Dhillon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BopG5dhH7L}\n}", "github": "", "project": "", "reviewers": "jEyd;3rTc;UQcK;XomD;J48q;rRBk;KiQx", "pdf_size": 1482561, "rating": "5;5;5;6;7;7;7", "confidence": "3;4;4;1;4;3;3", "soundness": "2;3;2;3;3;3;3", "novelty": "2;2;2;3;3;2;4", "presentation": "3;4;2;3;3;3;4", "wc_summary": "115;32;102;68;113;69;71", "wc_strengths": "37;60;31;42;51;111;81", "wc_weaknesses": "101;202;455;104;122;275;63", "wc_questions": "151;98;2;33;18;2;65", "wc_limitations": "2;4;2;49;99;42;26", "wc_review": "406;396;592;296;403;499;306", "wc_reply_reviewers": "13;63;61;33;18;47;134", "wc_reply_authors": "47;47;47;288;0;0;0", "reply_reviewers": "1;1;1;1;1;1;1", "reply_authors": "2;2;2;2;1;1;1", "rating_avg": [ 6.0, 0.9258200997725514 ], "confidence_avg": [ 3.142857142857143, 0.989743318610787 ], "soundness_avg": [ 2.7142857142857144, 0.4517539514526256 ], "novelty_avg": [ 2.5714285714285716, 0.7284313590846836 ], "presentation_avg": [ 3.142857142857143, 0.6388765649999398 ], "wc_summary_avg": [ 81.42857142857143, 27.861171869811642 ], "wc_strengths_avg": [ 59.0, 26.213409872484295 ], "wc_weaknesses_avg": [ 188.85714285714286, 127.42216511763291 ], "wc_questions_avg": [ 52.714285714285715, 51.546689752978274 ], "wc_limitations_avg": [ 32.0, 32.728318886598146 ], "wc_review_avg": [ 414.0, 96.36685855921334 ], "wc_reply_reviewers_avg": [ 52.714285714285715, 37.76889985051679 ], "wc_reply_authors_avg": [ 61.285714285714285, 95.07848530212785 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5714285714285714, 0.4948716593053935 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.15590239111558088, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ESycgdLFuF8J:scholar.google.com/&scioq=A+Computationally+Efficient+Sparsified+Online+Newton+Method&hl=en&as_sdt=0,23", "gs_version_total": 6, "email": "cs.utexas.edu;cs.utexas.edu;google.com;google.com;amazon.com;utexas.edu", "author_num": 6, "aff_unique_index": "0;0;1;1;2;0", "aff_unique_norm": "University of Texas at Austin;Google;Amazon", "aff_unique_dep": ";Google Brain;Amazon.com, Inc.", "aff_unique_url": "https://www.utexas.edu;https://brain.google.com;https://www.amazon.com", "aff_unique_abbr": "UT Austin;Google Brain;Amazon", "aff_campus_unique_index": "0;0;1;1;0", "aff_campus_unique": "Austin;Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Designing Robust Transformers using Robust Kernel Density Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72463", "id": "BqTv1Mtuhu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a766f56d2da42cae20b5652970ec04ef-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BqTv1Mtuhu", "openreview": "https://openreview.net/forum?id=BqTv1Mtuhu", "poster": "/media/PosterPDFs/NeurIPS%202023/72463.png?t=1701077940.769699", "slides": "https://nips.cc/virtual/2023/poster/72463", "video": "https://nips.cc/virtual/2023/poster/72463", "author_site": "Xing Han, Tongzheng Ren, Tan Nguyen, Khai Nguyen, Joydeep Ghosh, Nhat Ho", "tldr": "", "abstract": "Transformer-based architectures have recently exhibited remarkable successes across different domains beyond just powering large language models. However, existing approaches typically focus on predictive accuracy and computational cost, largely ignoring certain other practical issues such as robustness to contaminated samples. In this paper, by re-interpreting the self-attention mechanism as a non-parametric kernel density estimator, we adapt classical robust kernel density estimation methods to develop novel classes of transformers that are resistant to adversarial attacks and data contamination. We first propose methods that down-weight outliers in RKHS when computing the self-attention operations. We empirically show that these methods produce improved performance over existing state-of-the-art methods, particularly on image data under adversarial attacks. Then we leverage the median-of-means principle to obtain another efficient approach that results in noticeably enhanced performance and robustness on language modeling and time series classification tasks. Our methods can be combined with existing transformers to augment their robust properties, thus promising to impact a wide variety of applications.", "keywords": "Transformers;Kernel Density Estimation;Robustness", "primary_area": "", "supplementary_material": "/attachment/61e2d74f41e437a4727eae25d0c6df18db6e94dd.pdf", "author": "Xing Han;Tongzheng Ren;Tan Minh Nguyen;Khai Nguyen;Joydeep Ghosh;Nhat Ho", "authorids": "~Xing_Han1;~Tongzheng_Ren1;~Tan_Minh_Nguyen1;~Khai_Nguyen1;~Joydeep_Ghosh1;~Nhat_Ho1", "gender": "M;M;M;M;M;M", "homepage": "https://aaronhan223.github.io/;https://www.cs.utexas.edu/~tzren/;https://tanmnguyen89.github.io/;https://khainb.com;http://ideal.ece.utexas.edu/ghosh/;https://nhatptnk8912.github.io/", "dblp": "05/2143;211/8004;255/4725;120/4308;51/2272;203/4479", "google_scholar": "Vejou24AAAAJ;VgNDYeYAAAAJ;OizOh88AAAAJ;im5fNaQAAAAJ;;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ", "orcid": "0000-0003-0857-5506;;;;;", "linkedin": "xing-han-628653b6/;;;;;nhat-pham-minh-ho-267b8164/", "or_profile": "~Xing_Han1;~Tongzheng_Ren1;~Tan_Minh_Nguyen1;~Khai_Nguyen1;~Joydeep_Ghosh1;~Nhat_Ho1", "aff": "University of Texas at Austin;Google;University of California, Los Angeles;University of Texas, Austin;University of Texas, Austin;University of Texas, Austin", "aff_domain": "utexas.edu;google.com;ucla.edu;utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;Intern;Postdoc;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nhan2023designing,\ntitle={Designing Robust Transformers using Robust Kernel Density Estimation},\nauthor={Xing Han and Tongzheng Ren and Tan Minh Nguyen and Khai Nguyen and Joydeep Ghosh and Nhat Ho},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BqTv1Mtuhu}\n}", "github": "", "project": "", "reviewers": "KTaQ;c4Ug;SN7k;3B8c;ATq2", "pdf_size": 901997, "rating": "5;5;6;7;7", "confidence": "3;4;2;3;4", "soundness": "3;3;3;4;3", "novelty": "2;3;3;3;4", "presentation": "2;3;3;3;3", "wc_summary": "377;51;49;155;49", "wc_strengths": "1;29;61;130;77", "wc_weaknesses": "1;120;44;32;58", "wc_questions": "1;1;1;69;185", "wc_limitations": "1;1;19;1;8", "wc_review": "381;202;174;387;377", "wc_reply_reviewers": "339;183;0;78;27", "wc_reply_authors": "674;696;0;39;18", "reply_reviewers": "2;2;0;1;1", "reply_authors": "3;3;1;2;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 136.2, 127.12576450114273 ], "wc_strengths_avg": [ 59.6, 43.88895077351474 ], "wc_weaknesses_avg": [ 51.0, 39.293765408777 ], "wc_questions_avg": [ 51.4, 71.80417815141399 ], "wc_limitations_avg": [ 6.0, 7.042726744663604 ], "wc_review_avg": [ 304.2, 95.34233057776592 ], "wc_reply_reviewers_avg": [ 125.4, 123.77172536569084 ], "wc_reply_authors_avg": [ 285.4, 326.57960744663774 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17215263212641311567&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "utexas.edu;google.com;ucla.edu;utexas.edu;utexas.edu;utexas.edu", "author_num": 6, "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "University of Texas at Austin;Google;University of California, Los Angeles", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.utexas.edu;https://www.google.com;https://www.ucla.edu", "aff_unique_abbr": "UT Austin;Google;UCLA", "aff_campus_unique_index": "0;1;2;0;0;0", "aff_campus_unique": "Austin;Mountain View;Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SANFlow: Semantic-Aware Normalizing Flow for Anomaly Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72462", "id": "BqZ70BEtuW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ee74a6ade401e200985e2421b20bbae4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BqZ70BEtuW", "openreview": "https://openreview.net/forum?id=BqZ70BEtuW", "poster": "/media/PosterPDFs/NeurIPS%202023/72462.png?t=1701780504.1057544", "slides": "https://nips.cc/virtual/2023/poster/72462", "video": "https://nips.cc/virtual/2023/poster/72462", "author_site": "Daehyun Kim, Sungyong Baik, Tae Hyun Kim", "tldr": "", "abstract": "Visual anomaly detection, the task of detecting abnormal characteristics in images, is challenging due to the rarity and unpredictability of anomalies. In order to reliably model the distribution of normality and detect anomalies, a few works have attempted to exploit the density estimation ability of normalizing flow (NF). However, previous NF-based methods have relied solely on the capability of NF and forcibly transformed the distribution of all features to a single distribution (e.g., unit normal distribution), when features can have different semantic information and thus follow different distributions. We claim that forcibly learning to transform such diverse distributions to a single distribution with a single network will cause the learning difficulty, limiting the capacity of a network to discriminate normal and abnormal data. As such, we propose to transform the distribution of features at each location of a given image to different distributions. In particular, we train NF to map normal data distribution to distributions with the same mean but different variances at each location of the given image. To enhance the discriminability, we also train NF to map abnormal data distribution to a distribution with a mean that is different from that of normal data, where abnormal data is synthesized with data augmentation. The experimental results outline the effectiveness of the proposed framework in improving the density modeling and thus anomaly detection performance.", "keywords": "Anomaly Detection;Visual Anomaly Detection;Computer Vision;Normalizing Flow;Anomaly Localization", "primary_area": "", "supplementary_material": "", "author": "Daehyun Kim;Sungyong Baik;Tae Hyun Kim", "authorids": "~Daehyun_Kim6;~Sungyong_Baik1;~Tae_Hyun_Kim2", "gender": "M;M;M", "homepage": "https://sites.google.com/site/lliger9/home?authuser=0;https://dsybaik-hy.github.io/;https://sites.google.com/view/lliger9/", "dblp": ";243/2775;43/11343-6", "google_scholar": "https://scholar.google.com/citations?hl=ko;lQ4gotkAAAAJ;https://scholar.google.co.kr/citations?user=8soccsoAAAAJ", "orcid": ";;0000-0002-7995-3984", "linkedin": ";;", "or_profile": "~Daehyun_Kim6;~Sungyong_Baik1;~Tae_Hyun_Kim2", "aff": "Hanyang University;Hanyang University;Hanyang University", "aff_domain": "hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nkim2023sanflow,\ntitle={{SANF}low: Semantic-Aware Normalizing Flow for Anomaly Detection},\nauthor={Daehyun Kim and Sungyong Baik and Tae Hyun Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BqZ70BEtuW}\n}", "github": "", "project": "", "reviewers": "cBPv;dSr9;7gCM;VpXm", "pdf_size": 26426120, "rating": "5;5;5;6", "confidence": "3;5;5;4", "soundness": "3;3;3;3", "novelty": "1;2;2;2", "presentation": "3;3;3;3", "wc_summary": "143;68;103;42", "wc_strengths": "33;68;78;33", "wc_weaknesses": "161;59;152;139", "wc_questions": "9;53;14;17", "wc_limitations": "9;1;8;1", "wc_review": "355;249;355;232", "wc_reply_reviewers": "12;0;0;63", "wc_reply_authors": "33;0;0;575", "reply_reviewers": "1;0;0;2", "reply_authors": "2;1;1;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.0, 37.953919428696686 ], "wc_strengths_avg": [ 53.0, 20.310096011589902 ], "wc_weaknesses_avg": [ 127.75, 40.45599461142934 ], "wc_questions_avg": [ 23.25, 17.41228014936585 ], "wc_limitations_avg": [ 4.75, 3.766629793329841 ], "wc_review_avg": [ 297.75, 57.56463758246029 ], "wc_reply_reviewers_avg": [ 18.75, 26.013217794036937 ], "wc_reply_authors_avg": [ 152.0, 244.59047405816932 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15035605176387247008&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hanyang University", "aff_unique_dep": "", "aff_unique_url": "https://www.hanyang.ac.kr", "aff_unique_abbr": "HYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Augmenting Language Models with Long-Term Memory", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72461", "id": "BryMFPQ4L6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ebd82705f44793b6f9ade5a669d0f0bf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BryMFPQ4L6", "openreview": "https://openreview.net/forum?id=BryMFPQ4L6", "poster": "/media/PosterPDFs/NeurIPS%202023/72461.png?t=1701581538.1958816", "slides": "https://nips.cc/virtual/2023/poster/72461", "video": "https://nips.cc/virtual/2023/poster/72461", "author_site": "Weizhi Wang, Li Dong, Hao Cheng, Xiaodong Liu, Xifeng Yan, Jianfeng Gao, Furu Wei", "tldr": "", "abstract": "Existing large language models (LLMs) can only afford fix-sized inputs due to the input length limit, preventing them from utilizing rich long-context information from past inputs. To address this, we propose a framework, Language Models Augmented with Long-Term Memory (LongMem), which enables LLMs to memorize long history. We design a novel decoupled network architecture with the original backbone LLM frozen as a memory encoder and an adaptive residual side-network as a memory retriever and reader. Such a decoupled memory design can easily cache and update long-term past contexts for memory retrieval without suffering from memory staleness. Enhanced with memory-augmented adaptation training, LongMem can thus memorize long past context and use long-term memory for language modeling. The proposed memory retrieval module can handle unlimited-length context in its memory bank to benefit various downstream tasks. Typically, LongMem can enlarge the long-form memory to 65k tokens and thus cache many-shot extra demonstration examples as long-form memory for in-context learning. Experiments show that our method outperforms strong long-context models on ChapterBreak, a challenging long-context modeling benchmark, and achieves remarkable improvements on memory-augmented in-context learning over LLMs. The results demonstrate that the proposed method is effective in helping language models to memorize and utilize long-form contents.", "keywords": "large language models;long-term memory;long-text modeling and understanding;residual side-network;in-context learning", "primary_area": "", "supplementary_material": "", "author": "Weizhi Wang;Li Dong;Hao Cheng;Xiaodong Liu;Xifeng Yan;Jianfeng Gao;Furu Wei", "authorids": "~Weizhi_Wang1;~Li_Dong1;~Hao_Cheng4;~Xiaodong_Liu1;~Xifeng_Yan1;~Jianfeng_Gao1;~Furu_Wei1", "gender": "M;M;M;;;M;M", "homepage": "https://victorwz.github.io;http://dong.li;https://sites.google.com/site/hcheng2site/Home;;https://sites.cs.ucsb.edu/~xyan/;https://www.microsoft.com/en-us/research/people/jfgao/;https://www.microsoft.com/en-us/research/people/fuwei/", "dblp": "98/6969;85/5090-4;09/5158-2;65/622;y/XifengYan;92/5339;72/5870", "google_scholar": "UC2_V1MAAAAJ;wEfQgPgAAAAJ;https://scholar.google.com/citations?hl=en;NIewcxMAAAAJ;XZV2eogAAAAJ;https://scholar.google.com/citations?hl=en;G-V1VpwAAAAJ", "orcid": ";;0000-0001-7988-3149;;;;", "linkedin": ";;;;;;", "or_profile": "~Weizhi_Wang1;~Li_Dong1;~Hao_Cheng4;~Xiaodong_Liu1;~Xifeng_Yan1;~Jianfeng_Gao1;~Furu_Wei1", "aff": "University of California, Santa Barbara;Microsoft Research;Microsoft Research;Microsoft Research;UC Santa Barbara;Microsoft Research;Microsoft Research", "aff_domain": "ucsb.edu;microsoft.com;microsoft.com;microsoft.com;ucsb.edu;microsoft.com;microsoft.com", "position": "PhD student;Principal Researcher;Researcher;Researcher;Full Professor;Principal Researcher;Distinguished Scientist", "bibtex": "@inproceedings{\nwang2023augmenting,\ntitle={Augmenting Language Models with Long-Term Memory},\nauthor={Weizhi Wang and Li Dong and Hao Cheng and Xiaodong Liu and Xifeng Yan and Jianfeng Gao and Furu Wei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BryMFPQ4L6}\n}", "github": "", "project": "", "reviewers": "Mats;yw5j;k2cK;UtJk;yoPY", "pdf_size": 611939, "rating": "5;6;6;7;7", "confidence": "4;3;4;4;3", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "2;3;3;2;3", "wc_summary": "22;78;67;32;82", "wc_strengths": "28;56;78;39;52", "wc_weaknesses": "171;53;254;112;147", "wc_questions": "69;1;20;77;5", "wc_limitations": "0;1;1;1;5", "wc_review": "290;189;420;261;291", "wc_reply_reviewers": "144;0;0;0;0", "wc_reply_authors": "165;0;0;0;0", "reply_reviewers": "2;0;0;0;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 56.2, 24.547097588106013 ], "wc_strengths_avg": [ 50.6, 16.8949696655543 ], "wc_weaknesses_avg": [ 147.4, 66.46081552313362 ], "wc_questions_avg": [ 34.4, 32.246550203083736 ], "wc_limitations_avg": [ 1.6, 1.7435595774162693 ], "wc_review_avg": [ 290.2, 74.75667194304467 ], "wc_reply_reviewers_avg": [ 28.8, 57.60000000000001 ], "wc_reply_authors_avg": [ 33.0, 66.0 ], "reply_reviewers_avg": [ 0.4, 0.8000000000000002 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3273268353539886, "gs_citation": 186, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10032266285163330345&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ucsb.edu;microsoft.com;microsoft.com;microsoft.com;ucsb.edu;microsoft.com;microsoft.com", "author_num": 7, "aff_unique_index": "0;1;1;1;0;1;1", "aff_unique_norm": "University of California, Santa Barbara;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.ucsb.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UCSB;MSR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Optimizing Prompts for Text-to-Image Generation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72460", "id": "BsZNWXD3a1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d346d91999074dd8d6073d4c3b13733b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BsZNWXD3a1", "openreview": "https://openreview.net/forum?id=BsZNWXD3a1", "poster": "/media/PosterPDFs/NeurIPS%202023/72460.png?t=1699880403.4005306", "slides": "https://nips.cc/virtual/2023/poster/72460", "video": "https://nips.cc/virtual/2023/poster/72460", "author_site": "Yaru Hao, Zewen Chi, Li Dong, Furu Wei", "tldr": "", "abstract": "Well-designed prompts can guide text-to-image models to generate amazing images. However, the performant prompts are often model-specific and misaligned with user input. Instead of laborious human engineering, we propose prompt adaptation, a general framework that automatically adapts original user input to model-preferred prompts. Specifically, we first perform supervised fine-tuning with a pretrained language model on a small collection of manually engineered prompts. Then we use reinforcement learning to explore better prompts. We define a reward function that encourages the policy to generate more aesthetically pleasing images while preserving the original user intentions. Experimental results on Stable Diffusion show that our method outperforms manual prompt engineering in terms of both automatic metrics and human preference ratings. Moreover, reinforcement learning further boosts performance, especially on out-of-domain prompts.", "keywords": "prompt adaptation;automatic prompt engineering;text-to-image generation", "primary_area": "", "supplementary_material": "", "author": "Yaru Hao;Zewen Chi;Li Dong;Furu Wei", "authorids": "~Yaru_Hao1;~Zewen_Chi1;~Li_Dong1;~Furu_Wei1", "gender": "F;M;M;M", "homepage": "https://yaruhao.github.io/;;http://dong.li;https://www.microsoft.com/en-us/research/people/fuwei/", "dblp": "173/4242;220/0954.html;85/5090-4;72/5870", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;MP1GX_0AAAAJ;wEfQgPgAAAAJ;G-V1VpwAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yaru_Hao1;~Zewen_Chi1;~Li_Dong1;~Furu_Wei1", "aff": "Microsoft Research Asia;Beijing Institute of Technology;Microsoft Research;Microsoft Research", "aff_domain": "microsoft.com;bit.edu.cn;microsoft.com;microsoft.com", "position": "Researcher;PhD student;Principal Researcher;Distinguished Scientist", "bibtex": "@inproceedings{\nhao2023optimizing,\ntitle={Optimizing Prompts for Text-to-Image Generation},\nauthor={Yaru Hao and Zewen Chi and Li Dong and Furu Wei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BsZNWXD3a1}\n}", "github": "", "project": "", "reviewers": "Kg4J;brRy;zvr2;51Hz", "pdf_size": 4101221, "rating": "6;6;6;7", "confidence": "5;4;4;4", "soundness": "3;2;2;3", "novelty": "3;2;3;4", "presentation": "3;4;3;4", "wc_summary": "89;134;120;127", "wc_strengths": "163;30;86;33", "wc_weaknesses": "96;73;122;87", "wc_questions": "43;31;191;70", "wc_limitations": "50;1;20;56", "wc_review": "441;269;539;373", "wc_reply_reviewers": "14;10;76;17", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 117.5, 17.18284027743958 ], "wc_strengths_avg": [ 78.0, 53.89341332667657 ], "wc_weaknesses_avg": [ 94.5, 17.867568385205637 ], "wc_questions_avg": [ 83.75, 63.511317889018805 ], "wc_limitations_avg": [ 31.75, 22.38721733489895 ], "wc_review_avg": [ 405.5, 98.451764839438 ], "wc_reply_reviewers_avg": [ 29.25, 27.105119442644042 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 191, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10553021418197605510&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "microsoft.com;bit.edu.cn;microsoft.com;microsoft.com", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Microsoft;Beijing Institute of Technology", "aff_unique_dep": "Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/asia;http://www.bit.edu.cn/", "aff_unique_abbr": "MSR Asia;BIT", "aff_campus_unique_index": "0", "aff_campus_unique": "Asia;", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "China;United States" }, { "title": "Pretraining task diversity and the emergence of non-Bayesian in-context learning for regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72459", "id": "BtAz4a5xDg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2e10b2c2e1aa4f8083c37dfe269873f8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BtAz4a5xDg", "openreview": "https://openreview.net/forum?id=BtAz4a5xDg", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72459", "video": "https://nips.cc/virtual/2023/poster/72459", "author_site": "Allan Ravent\u00f3s, Mansheej Paul, Feng Chen, Surya Ganguli", "tldr": "", "abstract": "Pretrained transformers exhibit the remarkable ability of in-context learning (ICL): they can learn tasks from just a few examples provided in the prompt without updating any weights. This raises a foundational question: can ICL solve fundamentally _new_ tasks that are very different from those seen during pretraining? To probe this question, we examine ICL\u2019s performance on linear regression while varying the diversity of tasks in the pretraining dataset. We empirically demonstrate a _task diversity threshold_ for the emergence of ICL. Below this threshold, the pretrained transformer cannot solve unseen regression tasks, instead behaving like a Bayesian estimator with the _non-diverse pretraining task distribution_ as the prior. Beyond this threshold, the transformer significantly outperforms this estimator; its behavior aligns with that of ridge regression, corresponding to a Gaussian prior over _all tasks_, including those not seen during pretraining. Thus, when pretrained on data with task diversity greater than the threshold, transformers _can_ optimally solve fundamentally new tasks in-context. Importantly, this capability hinges on it deviating from the Bayes optimal estimator with the pretraining distribution as the prior. This study also explores the effect of regularization, model capacity and task structure and underscores, in a concrete example, the critical role of task diversity, alongside data and model scale, in the emergence of ICL.", "keywords": "in-context learning;Bayesian inference;transformers;task diversity;emergence", "primary_area": "", "supplementary_material": "", "author": "Allan Raventos;Mansheej Paul;Feng Chen;Surya Ganguli", "authorids": "~Allan_Raventos1;~Mansheej_Paul1;~Feng_Chen13;~Surya_Ganguli1", "gender": ";M;M;M", "homepage": ";https://mansheej.github.io/;;http://ganguli-gang.stanford.edu/surya.html", "dblp": "232/1768;277/6622;21/3047-46.html;56/10453", "google_scholar": "qqtCGpQAAAAJ;https://scholar.google.co.uk/citations?hl=en;;", "orcid": ";;0000-0002-8645-7356;", "linkedin": ";;feng-chen-a0286a185;", "or_profile": "~Allan_Raventos1;~Mansheej_Paul1;~Feng_Chen13;~Surya_Ganguli1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;@stanford.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nraventos2023pretraining,\ntitle={Pretraining task diversity and the emergence of non-Bayesian in-context learning for regression},\nauthor={Allan Raventos and Mansheej Paul and Feng Chen and Surya Ganguli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BtAz4a5xDg}\n}", "github": "", "project": "", "reviewers": "vcDA;nyxK;Uwh7;NkJG;CEjq", "pdf_size": 848133, "rating": "4;6;7;7;8", "confidence": "5;4;4;4;4", "soundness": "2;3;3;4;4", "novelty": "2;2;3;2;3", "presentation": "3;3;4;4;4", "wc_summary": "121;96;89;85;212", "wc_strengths": "69;43;73;32;93", "wc_weaknesses": "225;149;217;191;84", "wc_questions": "113;181;316;268;1", "wc_limitations": "18;23;148;8;1", "wc_review": "546;492;843;584;391", "wc_reply_reviewers": "459;37;245;18;70", "wc_reply_authors": "641;0;507;0;0", "reply_reviewers": "2;1;2;1;1", "reply_authors": "2;1;2;1;1", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 120.6, 47.38185306633754 ], "wc_strengths_avg": [ 62.0, 21.872356983187704 ], "wc_weaknesses_avg": [ 173.2, 51.886028948070404 ], "wc_questions_avg": [ 175.8, 111.96678078787477 ], "wc_limitations_avg": [ 39.6, 54.737921041997936 ], "wc_review_avg": [ 571.2, 150.61128775759138 ], "wc_reply_reviewers_avg": [ 165.8, 167.24281748404027 ], "wc_reply_authors_avg": [ 229.6, 284.37622966767105 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8846517369293831, "gs_citation": 88, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6928975033883003365&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "stanford.edu;stanford.edu;stanford.edu;@stanford.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Bto5a6w06l", "title": "On Architectural Compression of Text-to-Image Diffusion Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Exceptional text-to-image (T2I) generation results of Stable Diffusion models (SDMs) come with substantial computational demands. To resolve this issue, recent research on efficient SDMs has prioritized reducing the number of sampling steps and utilizing network quantization. Orthogonal to these directions, this study highlights the power of classical architectural compression for general-purpose T2I synthesis by introducing a block-removed knowledge-distilled SDM (BK-SDM). We eliminate several residual and attention blocks from the U-Net of SDMs, obtaining over a 30% reduction in the number of parameters, MACs per sampling step, and latency. We conduct distillation-based pretraining with only 0.22M LAION pairs (fewer than 0.1% of the full training pairs) on a single A100 GPU. Despite being trained with limited resources, our compact models can imitate the original SDM by benefiting from transferred knowledge and achieve competitive results against larger multi-billion parameter models on the zero-shot MS-COCO benchmark. Moreover, we demonstrate the applicability of our lightweight pretrained models in personalized generation with DreamBooth finetuning. \n", "keywords": "Stable Diffusion;Block Removal;Knowledge Distillation;Network Compression;Text-to-Image Generation", "primary_area": "", "supplementary_material": "/attachment/482e572ff208dda9c1e60e12a250a0a75b488747.pdf", "author": "Bo-Kyeong Kim;Hyoung-Kyu Song;Thibault Castells;Shinkook Choi", "authorids": "~Bo-Kyeong_Kim1;~Hyoung-Kyu_Song1;~Thibault_Castells1;~Shinkook_Choi1", "gender": "F;M;Not Specified;M", "homepage": "https://sites.google.com/view/bkkim;https://deepkyu.me;;https://www.linkedin.com/in/shinkookchoi/", "dblp": "120/8951;;280/1085;197/9643", "google_scholar": "https://scholar.google.co.kr/citations?user=hIWBLUgAAAAJ;KR4U5YMAAAAJ;__bQPScAAAAJ;", "orcid": ";0000-0002-6546-9593;;0000-0002-9617-2418", "linkedin": "bokyeong1015;deepkyu/;;shinkookchoi/", "or_profile": "~Bo-Kyeong_Kim1;~Hyoung-Kyu_Song1;~Thibault_Castells1;~Shinkook_Choi1", "aff": "Nota Inc.;Korea Advanced Institute of Science & Technology;Nota Inc.;Nota Inc.", "aff_domain": "nota.ai;kaist.ac.kr;nota.ai;nota.ai", "position": "ML researcher;Undergrad student;Researcher;Researcher", "bibtex": "@misc{\nkim2023on,\ntitle={On Architectural Compression of Text-to-Image Diffusion Models},\nauthor={Bo-Kyeong Kim and Hyoung-Kyu Song and Thibault Castells and Shinkook Choi},\nyear={2023},\nurl={https://openreview.net/forum?id=Bto5a6w06l}\n}", "github": "", "project": "", "reviewers": "wHDk;ZGLR;M23o;WEDt;Gqvg", "site": "https://openreview.net/forum?id=Bto5a6w06l", "pdf_size": 16439405, "rating": "3;3;4;5;7", "confidence": "4;5;4;4;3", "soundness": "3;2;4;3;4", "novelty": "1;2;2;2;3", "presentation": "2;2;3;3;4", "wc_summary": "47;47;67;53;82", "wc_strengths": "19;30;40;60;81", "wc_weaknesses": "93;313;99;100;230", "wc_questions": "86;5;30;19;34", "wc_limitations": "1;5;8;1;56", "wc_review": "246;400;244;233;483", "wc_reply_reviewers": "167;54;117;186;0", "wc_reply_authors": "648;168;315;407;0", "reply_reviewers": "1;1;1;3;0", "reply_authors": "3;2;2;4;1", "rating_avg": [ 4.4, 1.4966629547095764 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 59.2, 13.541048703848606 ], "wc_strengths_avg": [ 46.0, 22.09977375449803 ], "wc_weaknesses_avg": [ 167.0, 89.30173570541616 ], "wc_questions_avg": [ 34.8, 27.505635786143902 ], "wc_limitations_avg": [ 14.2, 21.06561178793533 ], "wc_review_avg": [ 321.2, 101.7671852809146 ], "wc_reply_reviewers_avg": [ 104.8, 69.53387663578093 ], "wc_reply_authors_avg": [ 307.6, 219.0494008209107 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8451542547285165, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10283015624160719973&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Nota Inc.;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": ";https://www.kaist.ac.kr", "aff_unique_abbr": ";KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;South Korea" }, { "title": "Incomplete Multimodality-Diffused Emotion Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72458", "id": "BuGFwUS9B3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/372cb7805eaccb2b7eed641271a30eec-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BuGFwUS9B3", "openreview": "https://openreview.net/forum?id=BuGFwUS9B3", "poster": "/media/PosterPDFs/NeurIPS%202023/72458.png?t=1698576071.7358017", "slides": "https://nips.cc/virtual/2023/poster/72458", "video": "https://nips.cc/virtual/2023/poster/72458", "author_site": "Yuanzhi Wang, Yong Li, Zhen Cui", "tldr": "", "abstract": "Human multimodal emotion recognition (MER) aims to perceive and understand human emotions via various heterogeneous modalities, such as language, vision, and acoustic. Compared with unimodality, the complementary information in the multimodalities facilitates robust emotion understanding. Nevertheless, in real-world scenarios, the missing modalities hinder multimodal understanding and result in degraded MER performance. In this paper, we propose an Incomplete Multimodality-Diffused emotion recognition (IMDer) method to mitigate the challenge of MER under incomplete multimodalities. To recover the missing modalities, IMDer exploits the score-based diffusion model that maps the input Gaussian noise into the desired distribution space of the missing modalities and recovers missing data abided by their original distributions. Specially, to reduce semantic ambiguity between the missing and the recovered modalities, the available modalities are embedded as the condition to guide and refine the diffusion-based recovering process. In contrast to previous work, the diffusion-based modality recovery mechanism in IMDer allows to simultaneously reach both distribution consistency and semantic disambiguation. Feature visualization of the recovered modalities illustrates the consistent modality-specific distribution and semantic alignment. Besides, quantitative experimental results verify that IMDer obtains state-of-the-art MER accuracy under various missing modality patterns.", "keywords": "Multimodal emotion recognition;Incomplete multimodalities", "primary_area": "", "supplementary_material": "/attachment/f6831bef637f9c4b4f9e27e15a0a4be3b2344a6b.pdf", "author": "Yuanzhi Wang;Yong Li;Zhen Cui", "authorids": "~Yuanzhi_Wang1;~Yong_Li13;~Zhen_Cui4", "gender": "M;M;M", "homepage": "https://mdswyz.github.io/;https://mysee1989.github.io/;http://aip.seu.edu.cn/zcui/", "dblp": "09/7017;;59/8491-1", "google_scholar": "https://scholar.google.com.hk/citations?user=pgZc4sgAAAAJ;HRBTJYYAAAAJ;ChRyl3kAAAAJ", "orcid": "0000-0003-2594-2574;;", "linkedin": ";;", "or_profile": "~Yuanzhi_Wang1;~Yong_Li13;~Zhen_Cui4", "aff": "Nanjing University of Science and Technology;City University of Hong Kong;Nanjing University of Science and Technology", "aff_domain": "njust.edu.cn;cityu.edu;njust.edu.cn", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nwang2023incomplete,\ntitle={Incomplete Multimodality-Diffused Emotion Recognition},\nauthor={Yuanzhi Wang and Yong Li and Zhen Cui},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BuGFwUS9B3}\n}", "github": "", "project": "", "reviewers": "EQy6;tA7f;BscJ;4zZs;bs2t", "pdf_size": 1835598, "rating": "4;5;6;6;8", "confidence": "5;5;4;4;4", "soundness": "3;2;3;3;4", "novelty": "2;3;3;3;4", "presentation": "2;3;3;3;4", "wc_summary": "22;82;68;54;40", "wc_strengths": "30;18;168;27;86", "wc_weaknesses": "141;49;184;29;62", "wc_questions": "19;49;31;108;10", "wc_limitations": "31;2;1;2;4", "wc_review": "243;200;452;220;202", "wc_reply_reviewers": "0;0;20;21;11", "wc_reply_authors": "0;518;0;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.8, 1.32664991614216 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 53.2, 20.960916010518243 ], "wc_strengths_avg": [ 65.8, 56.4354498520212 ], "wc_weaknesses_avg": [ 93.0, 59.292495309271644 ], "wc_questions_avg": [ 43.4, 34.840206658399715 ], "wc_limitations_avg": [ 8.0, 11.541230437002806 ], "wc_review_avg": [ 263.4, 95.56066136229909 ], "wc_reply_reviewers_avg": [ 10.4, 9.178235124467014 ], "wc_reply_authors_avg": [ 103.6, 207.19999999999996 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8000946913656625, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13351614966993101583&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "njust.edu.cn;cityu.edu;njust.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Nanjing University of Science and Technology;City University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.nust.edu.cn/;https://www.cityu.edu.hk", "aff_unique_abbr": "NUST;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Improving the Knowledge Gradient Algorithm", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72457", "id": "BvslVXlUvF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c272409133942e2f4b7631c8cb7e507e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BvslVXlUvF", "openreview": "https://openreview.net/forum?id=BvslVXlUvF", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72457", "video": "https://nips.cc/virtual/2023/poster/72457", "author_site": "Le Yang, Siyang Gao, Chin Pang Ho", "tldr": "", "abstract": "The knowledge gradient (KG) algorithm is a popular policy for the best arm identification (BAI) problem. It is built on the simple idea of always choosing the measurement that yields the greatest expected one-step improvement in the estimate of the best mean of the arms. In this research, we show that this policy has limitations, causing the algorithm not asymptotically optimal. We next provide a remedy for it, by following the manner of one-step look ahead of KG, but instead choosing the measurement that yields the greatest one-step improvement in the probability of selecting the best arm. The new policy is called improved knowledge gradient (iKG). iKG can be shown to be asymptotically optimal. In addition, we show that compared to KG, it is easier to extend iKG to variant problems of BAI, with the $\\epsilon$-good arm identification and feasible arm identification as two examples. The superior performances of iKG on these problems are further demonstrated using numerical examples.", "keywords": "best arm identification;knowledge gradient;asymptotic optimality;convergence rate", "primary_area": "", "supplementary_material": "/attachment/5dc31eef01c7bff0b1baafa79a735cc0a5b06078.pdf", "author": "Le Yang;Siyang Gao;Chin Pang Ho", "authorids": "~Le_Yang4;~Siyang_Gao1;~Chin_Pang_Ho2", "gender": "F;M;M", "homepage": ";https://www.cityu.edu.hk/stfprofile/siyangao.htm;https://sites.google.com/view/clint-chin-pang-ho/home?authuser=0", "dblp": "79/2888-12;136/9876;143/4728", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;NK6nQ9YAAAAJ;", "orcid": ";0000-0002-3574-6393;", "linkedin": ";;", "or_profile": "~Le_Yang4;~Siyang_Gao1;~Chin_Pang_Ho2", "aff": "City University of Hong Kong;City University of Hong Kong;City University of Hong Kong", "aff_domain": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nyang2023improving,\ntitle={Improving the Knowledge Gradient Algorithm},\nauthor={Le Yang and Siyang Gao and Chin Pang Ho},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BvslVXlUvF}\n}", "github": "", "project": "", "reviewers": "Ye7U;eJBQ;h3kD;dyGQ;2tPM", "pdf_size": 299272, "rating": "4;5;6;7;7", "confidence": "2;5;2;3;4", "soundness": "2;4;3;3;4", "novelty": "3;2;3;3;3", "presentation": "3;4;2;2;4", "wc_summary": "66;101;95;31;54", "wc_strengths": "42;32;47;59;63", "wc_weaknesses": "66;49;46;45;24", "wc_questions": "36;67;10;211;16", "wc_limitations": "26;1;7;5;1", "wc_review": "236;250;205;351;158", "wc_reply_reviewers": "0;9;0;11;49", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 69.4, 25.989228537992428 ], "wc_strengths_avg": [ 48.6, 11.288932633336069 ], "wc_weaknesses_avg": [ 46.0, 13.371611720357423 ], "wc_questions_avg": [ 68.0, 74.21859605247191 ], "wc_limitations_avg": [ 8.0, 9.2951600308978 ], "wc_review_avg": [ 240.0, 63.8529560787909 ], "wc_reply_reviewers_avg": [ 13.8, 18.17030544597421 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.17647058823529413, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11937051828881968601&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "City University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cityu.edu.hk", "aff_unique_abbr": "CityU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Self-Evaluation Guided Beam Search for Reasoning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72456", "id": "Bw82hwg5Q3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/81fde95c4dc79188a69ce5b24d63010b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Bw82hwg5Q3", "openreview": "https://openreview.net/forum?id=Bw82hwg5Q3", "poster": "/media/PosterPDFs/NeurIPS%202023/72456.png?t=1701664029.2364998", "slides": "https://nips.cc/virtual/2023/poster/72456", "video": "https://nips.cc/virtual/2023/poster/72456", "author_site": "Yuxi Xie, Kenji Kawaguchi, Yiran Zhao, James Xu Zhao, Min-Yen Kan, Junxian He, Michael Xie", "tldr": "", "abstract": "Breaking down a problem into intermediate steps has demonstrated impressive performance in Large Language Model (LLM) reasoning. However, the growth of the reasoning chain introduces uncertainty and error accumulation, making it challenging to elicit accurate final results. To tackle this challenge of uncertainty in multi-step reasoning, we introduce a stepwise self-evaluation mechanism to guide and calibrate the reasoning process of LLMs. We propose a decoding algorithm integrating the self-evaluation guidance via stochastic beam search. The self-evaluation guidance serves as a better-calibrated automatic criterion, facilitating an efficient search in the reasoning space and resulting in superior prediction quality. Stochastic beam search balances exploitation and exploration of the search space with temperature-controlled randomness. Our approach surpasses the corresponding Codex-backboned baselines in few-shot accuracy by $6.34$%, $9.56$%, and $5.46$% on the GSM8K, AQuA, and StrategyQA benchmarks, respectively. Experiment results with Llama-2 on arithmetic reasoning demonstrate the efficiency of our method in outperforming the baseline methods with comparable computational budgets. Further analysis in multi-step reasoning finds our self-evaluation guidance pinpoints logic failures and leads to higher consistency and robustness. Our code is publicly available at [https://guideddecoding.github.io/](https://guideddecoding.github.io/).", "keywords": "Large Language Models;Multistep Reasoning;Stochastic Beam Search;LLM Self-Evaluation", "primary_area": "", "supplementary_material": "/attachment/ce1a404de7cd83152ec37774a2e2c12c79bf723a.zip", "author": "Yuxi Xie;Kenji Kawaguchi;Yiran Zhao;Xu Zhao;Min-Yen Kan;Junxian He;Qizhe Xie", "authorids": "~Yuxi_Xie1;~Kenji_Kawaguchi1;~Yiran_Zhao2;~Xu_Zhao6;~Min-Yen_Kan1;~Junxian_He1;~Qizhe_Xie1", "gender": "F;;M;M;M;M;", "homepage": "https://yuxixie.github.io/;https://ml.comp.nus.edu.sg/#members;https://zhaoyiran924.github.io/;;https://www.comp.nus.edu.sg/~kanmy/;https://jxhe.github.io;", "dblp": ";;;;k/MinYenKan;188/6127.html;", "google_scholar": "LNLECx0AAAAJ;aLl3rYoAAAAJ;D_HwSlEAAAAJ;Ybcwc2IAAAAJ;https://scholar.google.com.tw/citations?user=aNVcd3EAAAAJ;BIFGeoUAAAAJ;", "orcid": ";;;;;;", "linkedin": "yuxi-xie-494265181;;;;;;", "or_profile": "~Yuxi_Xie1;~Kenji_Kawaguchi1;~Yiran_Zhao2;~Xu_Zhao6;~Min-Yen_Kan1;~Junxian_He1;~Qizhe_Xie1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore;Hong Kong University of Science and Technology;", "aff_domain": "u.nus.edu;nus.edu;u.nus.edu;nus.edu;nus.edu.sg;ust.hk;", "position": "PhD student;Presidential Young Professor;PhD student;MS student;Associate Professor;Assistant Professor;", "bibtex": "@inproceedings{\nxie2023selfevaluation,\ntitle={Self-Evaluation Guided Beam Search for Reasoning},\nauthor={Yuxi Xie and Kenji Kawaguchi and Yiran Zhao and Xu Zhao and Min-Yen Kan and Junxian He and Qizhe Xie},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Bw82hwg5Q3}\n}", "github": "", "project": "", "reviewers": "WznY;wb7Q;TWEq;fpKX;pGEM", "pdf_size": 2136124, "rating": "3;6;6;6;6", "confidence": "5;2;4;4;4", "soundness": "1;4;3;3;3", "novelty": "1;3;3;3;3", "presentation": "3;2;2;2;3", "wc_summary": "114;92;32;70;56", "wc_strengths": "47;65;59;43;63", "wc_weaknesses": "334;47;346;221;128", "wc_questions": "44;41;31;8;20", "wc_limitations": "20;10;2;6;1", "wc_review": "559;255;470;348;268", "wc_reply_reviewers": "363;11;229;406;243", "wc_reply_authors": "21;0;130;634;116", "reply_reviewers": "2;1;2;2;2", "reply_authors": "2;1;2;2;2", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 2.8, 0.9797958971132712 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 72.8, 28.357714999625763 ], "wc_strengths_avg": [ 55.4, 8.799999999999999 ], "wc_weaknesses_avg": [ 215.2, 115.88856716691254 ], "wc_questions_avg": [ 28.8, 13.377593206552515 ], "wc_limitations_avg": [ 7.8, 6.881860213634101 ], "wc_review_avg": [ 380.0, 117.75737768819413 ], "wc_reply_reviewers_avg": [ 250.4, 137.63371679933664 ], "wc_reply_authors_avg": [ 180.2, 232.54797354524507 ], "reply_reviewers_avg": [ 1.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.6123724356957945, "gs_citation": 136, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2702104562395501515&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "u.nus.edu;nus.edu;u.nus.edu;nus.edu;nus.edu.sg;ust.hk;", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "National University of Singapore;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.ust.hk", "aff_unique_abbr": "NUS;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "Singapore;China" }, { "title": "Lookup Table meets Local Laplacian Filter: Pyramid Reconstruction Network for Tone Mapping", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72455", "id": "BxqPN7KuQS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b3a08d179347e33414badadf100e4e8d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=BxqPN7KuQS", "openreview": "https://openreview.net/forum?id=BxqPN7KuQS", "poster": "/media/PosterPDFs/NeurIPS%202023/72455.png?t=1701754968.0243165", "slides": "https://nips.cc/virtual/2023/poster/72455", "video": "https://nips.cc/virtual/2023/poster/72455", "author_site": "Feng Zhang, Ming Tian, Zhiqiang Li, Bin Xu, Qingbo Lu, Changxin Gao, Nong Sang", "tldr": "", "abstract": "Tone mapping aims to convert high dynamic range (HDR) images to low dynamic range (LDR) representations, a critical task in the camera imaging pipeline. In recent years, 3-Dimensional LookUp Table (3D LUT) based methods have gained attention due to their ability to strike a favorable balance between enhancement performance and computational efficiency. However, these methods often fail to deliver satisfactory results in local areas since the look-up table is a global operator for tone mapping, which works based on pixel values and fails to incorporate crucial local information. To this end, this paper aims to address this issue by exploring a novel strategy that integrates global and local operators by utilizing closed-form Laplacian pyramid decomposition and reconstruction. Specifically, we employ image-adaptive 3D LUTs to manipulate the tone in the low-frequency image by leveraging the specific characteristics of the frequency information. Furthermore, we utilize local Laplacian filters to refine the edge details in the high-frequency components in an adaptive manner. Local Laplacian filters are widely used to preserve edge details in photographs, but their conventional usage involves manual tuning and fixed implementation within camera imaging pipelines or photo editing tools. We propose to learn parameter value maps progressively for local Laplacian filters from annotated data using a lightweight network. Our model achieves simultaneous global tone manipulation and local edge detail preservation in an end-to-end manner. Extensive experimental results on two benchmark datasets demonstrate that the proposed method performs favorably against state-of-the-art methods.", "keywords": "tone mapping; learnable local laplacian filter; laplacian pyramid; 3D lookup table", "primary_area": "", "supplementary_material": "/attachment/2ae5a599cd366ba171e508c514d8180ffbcfde1c.pdf", "author": "Feng Zhang;Ming Tian;Zhiqiang Li;Bin Xu;Qingbo Lu;Changxin Gao;Nong Sang", "authorids": "~Feng_Zhang11;~Ming_Tian1;~Zhiqiang_Li3;~Bin_Xu5;~Qingbo_Lu3;~Changxin_Gao1;~Nong_Sang1", "gender": "M;M;F;M;M;M;M", "homepage": ";https://github.com/Nabauu;https://github.com/xubin19939;;https://sites.google.com/site/changxingao/home;http://faculty.hust.edu.cn/sangnong/en/index.htm;https://lzqcode.github.io", "dblp": ";;;142/6495.html;99/7463;10/1545;", "google_scholar": "iJ0tdroAAAAJ;;;;4tku-lwAAAAJ;ky_ZowEAAAAJ;", "orcid": ";;;;0000-0003-2736-3920;0000-0002-9167-1496;", "linkedin": ";;;;;;", "or_profile": "~Feng_Zhang11;~Ming_Tian1;~Bin_Xu5;~Qingbo_Lu3;~Changxin_Gao1;~Nong_Sang1;~Zhiqiangcs_Li1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;\u5927\u7586\u521b\u65b0;Da Jiang Innovations;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Dajiang Innovation Technology Co., Ltd", "aff_domain": "hust.edu.cn;hust.edu.cn;dji.com;dji.com;hust.edu.cn;hust.edu.cn;dji.com", "position": "PhD student;MS student;Researcher;Researcher;Associate Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nzhang2023lookup,\ntitle={Lookup Table meets Local Laplacian Filter: Pyramid Reconstruction Network for Tone Mapping},\nauthor={Feng Zhang and Ming Tian and Zhiqiang Li and Bin Xu and Qingbo Lu and Changxin Gao and Nong Sang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=BxqPN7KuQS}\n}", "github": "", "project": "", "reviewers": "g4yF;h55c;nHJa;zrx6;1rH8", "pdf_size": 10756176, "rating": "3;6;6;7;7", "confidence": "4;4;4;5;5", "soundness": "2;3;3;2;3", "novelty": "2;3;3;2;3", "presentation": "3;3;3;2;3", "wc_summary": "52;133;90;204;43", "wc_strengths": "52;16;55;47;41", "wc_weaknesses": "137;7;47;500;42", "wc_questions": "2;6;48;4;77", "wc_limitations": "8;9;2;24;7", "wc_review": "251;171;242;779;210", "wc_reply_reviewers": "0;0;11;0;17", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 104.4, 59.09517746821647 ], "wc_strengths_avg": [ 42.2, 13.934130758680285 ], "wc_weaknesses_avg": [ 146.6, 181.84234930290577 ], "wc_questions_avg": [ 27.4, 30.117104774529707 ], "wc_limitations_avg": [ 10.0, 7.402702209328699 ], "wc_review_avg": [ 330.6, 225.94919782995467 ], "wc_reply_reviewers_avg": [ 5.6, 7.116178749862878 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6666666666666665, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8617591098991983293&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "hust.edu.cn;hust.edu.cn;dji.com;dji.com;hust.edu.cn;hust.edu.cn;dji.com", "author_num": 7, "aff_unique_index": "0;0;1;2;0;0;3", "aff_unique_norm": "Huazhong University of Science and Technology;DJI (Dajiang Innovation);Da Jiang Innovations;Dajiang Innovation Technology Co., Ltd", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.hust.edu.cn;https://www.dji.com;;", "aff_unique_abbr": "HUST;DJI;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "On the explainable properties of 1-Lipschitz Neural Networks: An Optimal Transport Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72454", "id": "ByDy2mlkig", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ab5a2bf4385bee44f3919060b184605b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ByDy2mlkig", "openreview": "https://openreview.net/forum?id=ByDy2mlkig", "poster": "/media/PosterPDFs/NeurIPS%202023/72454.png?t=1701421788.7865252", "slides": "https://nips.cc/virtual/2023/poster/72454", "video": "https://nips.cc/virtual/2023/poster/72454", "author_site": "Mathieu Serrurier, Franck Mamalet, Thomas FEL, Louis B\u00e9thune, Louis B\u00e9thune, Thibaut Boissin", "tldr": "", "abstract": "Input gradients have a pivotal role in a variety of applications, including adversarial attack algorithms for evaluating model robustness, explainable AI techniques for generating saliency maps, and counterfactual explanations. However, saliency maps generated by traditional neural networks are often noisy and provide limited insights. In this paper, we demonstrate that, on the contrary, the saliency maps of 1-Lipschitz neural networks, learnt with the dual loss of an optimal transportation problem, exhibit desirable XAI properties:\nThey are highly concentrated on the essential parts of the image with low noise, significantly outperforming state-of-the-art explanation approaches across various models and metrics. We also prove that these maps align unprecedentedly well with human explanations on ImageNet. To explain the particularly beneficial properties of the saliency map for such models, we prove this gradient encodes both the direction of the transportation plan and the direction towards the nearest adversarial attack. Following the gradient down to the decision boundary is no longer considered an adversarial attack, but rather a counterfactual explanation that explicitly transports the input from one class to another. Thus, Learning with such a loss jointly optimizes the classification objective and the alignment of the gradient , i.e. the saliency map, to the transportation plan direction. These networks were previously known to be certifiably robust by design, and we demonstrate that they scale well for large problems and models, and are tailored for explainability using a fast and straightforward method.", "keywords": "1-Lipschitz neural network;explicability", "primary_area": "", "supplementary_material": "/attachment/6a91e34da63d87f91295f1d5c83ae22aaa66f2d0.zip", "author": "Mathieu Serrurier;Franck Mamalet;Thomas FEL;Louis B\u00e9thune;Thibaut Boissin", "authorids": "~Mathieu_Serrurier1;~Franck_Mamalet2;~Thomas_FEL1;~Louis_B\u00e9thune1;~Thibaut_Boissin1", "gender": "M;M;M;M;M", "homepage": ";https://www.researchgate.net/profile/Franck-Mamalet;https://thomasfel.me;https://louis-bethune.fr/;", "dblp": "30/2092;15/6625;274/2390;270/0797;", "google_scholar": "https://scholar.google.com/scholar?scilib=1;https://scholar.google.fr/citations?user=5C5p0osAAAAJ;1m5Mlx4AAAAJ;1zvpCDcAAAAJ;zC-MstIAAAAJ", "orcid": ";;;0000-0003-1498-8251;", "linkedin": ";franck-mamalet-0453a91b;;;", "or_profile": "~Mathieu_Serrurier1;~Franck_Mamalet2;~Thomas_FEL1;~Louis_B\u00e9thune1;~Thibaut_Boissin1", "aff": "university Paul Sabatier;IRT Saint Exupery;Brown University;Institut de Recherche en Informatique de Toulouse;IRT Saint exup\u00e9ry", "aff_domain": "irit.fr;irt-saintexupery.com;brown.edu;irit.fr;irt-saintexupery.com", "position": "Assistant Professor;Researcher;PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\nserrurier2023on,\ntitle={On the explainable properties of 1-Lipschitz Neural Networks: An Optimal Transport Perspective},\nauthor={Mathieu Serrurier and Franck Mamalet and Thomas FEL and Louis B{\\'e}thune and Thibaut Boissin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ByDy2mlkig}\n}", "github": "", "project": "", "reviewers": "7q2B;LnPf;n7GW;4Zh4;WMVT", "pdf_size": 8563958, "rating": "5;5;6;6;6", "confidence": "4;4;3;4;4", "soundness": "2;3;3;3;3", "novelty": "3;2;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "84;146;34;95;86", "wc_strengths": "28;98;84;42;141", "wc_weaknesses": "326;58;392;140;415", "wc_questions": "71;128;21;2;48", "wc_limitations": "1;15;1;31;9", "wc_review": "510;445;532;310;699", "wc_reply_reviewers": "51;0;19;351;581", "wc_reply_authors": "198;40;40;487;573", "reply_reviewers": "1;0;1;2;2", "reply_authors": "3;2;2;2;3", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.0, 35.6202189774291 ], "wc_strengths_avg": [ 78.6, 40.49493795525559 ], "wc_weaknesses_avg": [ 266.2, 141.9977464609914 ], "wc_questions_avg": [ 54.0, 43.80410939626555 ], "wc_limitations_avg": [ 11.4, 11.128342194594845 ], "wc_review_avg": [ 499.2, 126.36993313284613 ], "wc_reply_reviewers_avg": [ 200.4, 229.3134099872923 ], "wc_reply_authors_avg": [ 267.6, 223.54113715376863 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18116289977409532063&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 43, "email": "irit.fr;irt-saintexupery.com;brown.edu;irit.fr;irt-saintexupery.com", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Paul Sabatier University;IRT Saint Exupery;Brown University;Institut de Recherche en Informatique de Toulouse;IRT Saint Exup\u00e9ry", "aff_unique_dep": ";;;Informatique;", "aff_unique_url": "https://www.univ-toulouse1.fr;;https://www.brown.edu;https://www.irit.fr;", "aff_unique_abbr": "UT1;;Brown;IRIT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "France;United States" }, { "title": "Revisiting the Evaluation of Image Synthesis with GANs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73663", "id": "C0zw2ERKiQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1e5fa672b2c35744cbcfacb2e77f1cb0-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=C0zw2ERKiQ", "openreview": "https://openreview.net/forum?id=C0zw2ERKiQ", "poster": "/media/PosterPDFs/NeurIPS%202023/73663.png?t=1697961007.7905679", "slides": "https://nips.cc/virtual/2023/poster/73663", "video": "https://nips.cc/virtual/2023/poster/73663", "author_site": "mengping yang, Ceyuan Yang, Yichi Zhang, Qingyan Bai, Yujun Shen, Bo Dai", "tldr": "", "abstract": "A good metric, which promises a reliable comparison between solutions, is essential for any well-defined task. Unlike most vision tasks that have per-sample ground-truth, image synthesis tasks target generating unseen data and hence are usually evaluated through a distributional distance between one set of real samples and another set of generated samples. This study presents an empirical investigation into the evaluation of synthesis performance, with generative adversarial networks (GANs) as a representative of generative models. In particular, we make in-depth analyses of various factors, including how to represent a data point in the representation space, how to calculate a fair distance using selected samples, and how many instances to use from each set. Extensive experiments conducted on multiple datasets and settings reveal several important findings. Firstly, a group of models that include both CNN-based and ViT-based architectures serve as reliable and robust feature extractors for measurement evaluation. Secondly, Centered Kernel Alignment (CKA) provides a better comparison across various extractors and hierarchical layers in one model. Finally, CKA is more sample-efficient and enjoys better agreement with human judgment in characterizing the similarity between two internal data correlations. These findings contribute to the development of a new measurement system, which enables a consistent and reliable re-evaluation of current state-of-the-art generative models.", "keywords": "Image Synthesis;Generative Models;Synthesis Measurement", "primary_area": "", "supplementary_material": "/attachment/d8ed01ec96a64587c52f714b427b2577818d156e.pdf", "author": "Mengping Yang;Ceyuan Yang;Yichi Zhang;Qingyan Bai;Yujun Shen;Bo Dai", "authorids": "~Mengping_Yang2;~Ceyuan_Yang2;~Yichi_Zhang9;~Qingyan_Bai1;~Yujun_Shen1;~Bo_Dai2", "gender": "M;M;M;M;;M", "homepage": "https://forevermamba.work/;https://ceyuan.me/;https://github.com/eachtt;https://github.com/EzioBy;;http://daibo.info/", "dblp": "324/0385;218/2676;;290/9174;;64/2903-2", "google_scholar": "https://scholar.google.com.hk/citations?user=yF34LtcAAAAJ;Rfj4jWoAAAAJ;;xUMjxi4AAAAJ;;https://scholar.google.com.hk/citations?user=KNWTvgEAAAAJ", "orcid": "0000-0003-1503-9621;;;;;0000-0003-0777-9232", "linkedin": ";;;;;", "or_profile": "~Mengping_Yang2;~Ceyuan_Yang2;~Yichi_Zhang9;~Qingyan_Bai1;~Yujun_Shen1;~Bo_Dai2", "aff": "Shanghai Artificial Intelligence Laboratory;The Chinese University of Hong Kong;;Tsinghua University;;Shanghai AI Laboratory", "aff_domain": "pjlab.org.cn;cuhk.edu.hk;;tsinghua.edu.cn;;pjlab.org.cn", "position": "Intern;PhD student;;MS student;;Scientist", "bibtex": "@inproceedings{\nyang2023revisiting,\ntitle={Revisiting the Evaluation of Image Synthesis with {GAN}s},\nauthor={Mengping Yang and Ceyuan Yang and Yichi Zhang and Qingyan Bai and Yujun Shen and Bo Dai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=C0zw2ERKiQ}\n}", "github": "", "project": "", "reviewers": "xEcW;y8MJ;cWrw;95Dj;Z9sB", "pdf_size": 13867294, "rating": "5;6;6;7;7", "confidence": "4;4;2;3;4", "wc_summary_and_contributions": "56;52;110;132;258", "wc_strengths": "78;28;166;75;73", "wc_improvement": "44;70;44;20;200", "wc_limitations": "56;6;66;39;27", "wc_correctness": "8;1;16;25;131", "wc_clarity": "5;4;1;35;95", "wc_relation_to_prior_work": "11;1;1;13;41", "wc_documentation": "8;37;1;12;56", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "267;200;406;352;882", "wc_reply_reviewers": "0;0;0;0;96", "wc_reply_authors": "1208;1250;1122;1120;2167", "reply_reviewers": "0;0;0;0;1", "reply_authors": "3;3;3;3;6", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.8 ], "wc_summary_and_contributions_avg": [ 121.6, 74.82673319075208 ], "wc_strengths_avg": [ 84.0, 44.93995994657761 ], "wc_improvement_avg": [ 75.6, 64.17974758442105 ], "wc_limitations_avg": [ 38.8, 21.216974336601343 ], "wc_correctness_avg": [ 36.2, 48.07244532993927 ], "wc_clarity_avg": [ 28.0, 35.69873947354444 ], "wc_relation_to_prior_work_avg": [ 13.4, 14.66424222385869 ], "wc_documentation_avg": [ 22.8, 20.565991344936428 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 421.4, 240.85813251787866 ], "wc_reply_reviewers_avg": [ 19.2, 38.4 ], "wc_reply_authors_avg": [ 1373.4, 399.9497968495546 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 3.6, 1.2 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.13363062095621217, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14808196640451557246&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "pjlab.org.cn;cuhk.edu.hk;;tsinghua.edu.cn;;pjlab.org.cn", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Shanghai Artificial Intelligence Laboratory;Chinese University of Hong Kong;Tsinghua University;Shanghai AI Laboratory", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.shailab.org/;https://www.cuhk.edu.hk;https://www.tsinghua.edu.cn;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "Shanghai AI Lab;CUHK;THU;SAIL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Clustering the Sketch: Dynamic Compression for Embedding Tables", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72453", "id": "C4rRqkXFyC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e468a76212a58c1af94a3d235151944a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=C4rRqkXFyC", "openreview": "https://openreview.net/forum?id=C4rRqkXFyC", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72453", "video": "https://nips.cc/virtual/2023/poster/72453", "author_site": "Henry Tsang, Thomas Ahle", "tldr": "", "abstract": "Embedding tables are used by machine learning systems to work with categorical features.\nIn modern Recommendation Systems, these tables can be very large, necessitating the development of new methods for fitting them in memory, even during training.\nWe suggest Clustered Compositional Embeddings (CCE) which combines clustering-based compression like quantization to codebooks with dynamic methods like The Hashing Trick and Compositional Embeddings [Shi et al., 2020].\nExperimentally CCE achieves the best of both worlds: The high compression rate of codebook-based quantization, but \\emph{dynamically} like hashing-based methods, so it can be used during training.\nTheoretically, we prove that CCE is guaranteed to converge to the optimal codebook and give a tight bound for the number of iterations required.", "keywords": "Embedding table compression;Clustering and sketching;Memory-efficient training", "primary_area": "", "supplementary_material": "/attachment/a6581e103bd97671020a7855da6183ec6999bc2e.pdf", "author": "Henry Tsang;Thomas Dybdahl Ahle", "authorids": "henrylhtsang@meta.com;~Thomas_Dybdahl_Ahle1", "gender": ";M", "homepage": ";https://thomasahle.com", "dblp": ";169/9933.html", "google_scholar": ";https://scholar.google.dk/citations?user=aRiVoYgAAAAJ", "orcid": ";0000-0001-9747-0479", "linkedin": ";thomasahle/", "or_profile": "henrylhtsang@meta.com;~Thomas_Dybdahl_Ahle1", "aff": ";Meta Facebook", "aff_domain": ";facebook.com", "position": ";Researcher", "bibtex": "@inproceedings{\ntsang2023clustering,\ntitle={Clustering the Sketch: Dynamic Compression for Embedding Tables},\nauthor={Henry Tsang and Thomas Dybdahl Ahle},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=C4rRqkXFyC}\n}", "github": "", "project": "", "reviewers": "Xht4;jNoe;mVdt;5kPE", "pdf_size": 934095, "rating": "1;7;7;8", "confidence": "5;3;4;4", "soundness": "1;3;4;4", "novelty": "1;3;4;4", "presentation": "1;3;4;3", "wc_summary": "30;201;48;154", "wc_strengths": "2;70;99;217", "wc_weaknesses": "30;68;1;243", "wc_questions": "30;103;10;147", "wc_limitations": "1;35;6;3", "wc_review": "93;477;164;764", "wc_reply_reviewers": "0;0;0;22", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "0;1;1;1", "rating_avg": [ 5.75, 2.7726341266023544 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 1.224744871391589 ], "novelty_avg": [ 3.0, 1.224744871391589 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 108.25, 71.4995629357271 ], "wc_strengths_avg": [ 97.0, 77.71422006299748 ], "wc_weaknesses_avg": [ 85.5, 93.98537120211847 ], "wc_questions_avg": [ 72.5, 55.210959057056776 ], "wc_limitations_avg": [ 11.25, 13.827056809024834 ], "wc_review_avg": [ 374.5, 267.2868309513209 ], "wc_reply_reviewers_avg": [ 5.5, 9.526279441628825 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.765092055676006, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1735992826811318490&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";facebook.com", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Dynamic Sparsity Is Channel-Level Sparsity Learner", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72452", "id": "C6IIwFHWkF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d6d0e41e0b1ed38c76d13c9e417a8f1f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=C6IIwFHWkF", "openreview": "https://openreview.net/forum?id=C6IIwFHWkF", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72452", "video": "https://nips.cc/virtual/2023/poster/72452", "author_site": "Lu Yin, Gen Li, Meng Fang, Li Shen, Tianjin Huang, Zhangyang \"Atlas\" Wang, Vlado Menkovski, Xiaolong Ma, Mykola Pechenizkiy, Shiwei Liu, Shiwei Liu", "tldr": "", "abstract": "Sparse training has received an upsurging interest in machine learning due to its tantalizing saving potential for both the entire training process as well as the inference. Dynamic sparse training (DST) as a leading approach can train deep neural networks at high sparsity from scratch to match the performance of their dense counterparts. However, most if not all DST prior arts demonstrate their effectiveness on unstructured sparsity with highly irregular sparse patterns, which receives limited support in common hardware. This limitation hinders the usage of DST in practice. In this paper, we propose Channel-aware dynamic sparse (Chase), that for the first time seamlessly translates the promise of unstructured dynamic sparsity to GPU-friendly channel-level sparsity (not fine-grained N:M or group sparsity) during one end-to-end training process, without any ad-hoc operations. The resulting small sparse networks can be directly accelerated by commodity hardware, without using any particularly sparsity-aware hardware accelerators. This appealing outcome is partially motivated by a hidden phenomenon of dynamic sparsity: off-the-shelf unstructured DST implicitly involves biased parameter reallocation across channels, with a large fraction of channels (up to 60%) being sparser than others. By progressively identifying and removing these channels during training, our approach transfers unstructured sparsity to channel-wise sparsity. Our experimental results demonstrate that Chase achieves 1.7x inference throughput speedup on common GPU devices without compromising accuracy with ResNet-50 on ImageNet. We release our code in https://github.com/luuyin/chase.", "keywords": "dynamic sparsity;dynamic sparse training;sparse training", "primary_area": "", "supplementary_material": "", "author": "Lu Yin;Gen Li;Meng Fang;Li Shen;Tianjin Huang;Zhangyang Wang;Vlado Menkovski;Xiaolong Ma;Mykola Pechenizkiy;Shiwei Liu", "authorids": "~Lu_Yin1;~Gen_Li4;~Meng_Fang1;~Li_Shen1;~Tianjin_Huang1;~Zhangyang_Wang1;~Vlado_Menkovski2;~Xiaolong_Ma2;~Mykola_Pechenizkiy1;~Shiwei_Liu2", "gender": ";M;M;M;M;M;M;M;M;M", "homepage": "https://luuyin.com/;https://coulsonlee.github.io;;https://sites.google.com/site/mathshenli/home;https://research.tue.nl/nl/persons/tianjin-huang;https://vita-group.github.io;https://vlamen.github.io;https://xiaolongma2016.com;http://www.win.tue.nl/~mpechen/;https://shiweiliuiiiiiii.github.io/", "dblp": "87/2528-6;28/538-12;67/463;91/3680-8;189/3972;119/4026;06/726;;37/4649;234/8697-3.html", "google_scholar": "G4Xe1NkAAAAJ;;IcNYP1oAAAAJ;yVhgENIAAAAJ;https://scholar.google.co.uk/citations?user=yFLmPsoAAAAJ;pxFyKAIAAAAJ;2s9HUEMAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=F0uFT_kAAAAJ;73IbXtsAAAAJ", "orcid": ";;;;;;0000-0001-5262-0605;0000-0003-3753-7648;0000-0003-4955-0743;", "linkedin": ";;;;;;;xiaolong-ma-66b98910b/;mpechen/;", "or_profile": "~Lu_Yin1;~Gen_Li4;~Meng_Fang1;~Li_Shen1;~Tianjin_Huang1;~Zhangyang_Wang1;~Vlado_Menkovski2;~Xiaolong_Ma2;~Mykola_Pechenizkiy1;~Shiwei_Liu2", "aff": "University of Aberdeen;Clemson University;Eindhoven University of Technology;JD Explore Academy;;University of Texas, Austin;Eindhoven University of Technology;Clemson University;Eindhoven University of Technology;University of Texas at Austin", "aff_domain": "abdn.ac.uk;clemson.edu;tue.nl;jd.com;;utexas.edu;tue.nl;clemson.edu;tue.nl;utexas.edu", "position": "Assistant Professor;PhD student;Assistant Professor;Researcher;;Assistant Professor;Assistant Professor;Assistant Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nyin2023dynamic,\ntitle={Dynamic Sparsity Is Channel-Level Sparsity Learner},\nauthor={Lu Yin and Gen Li and Meng Fang and Li Shen and Tianjin Huang and Zhangyang Wang and Vlado Menkovski and Xiaolong Ma and Mykola Pechenizkiy and Shiwei Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=C6IIwFHWkF}\n}", "github": "", "project": "", "reviewers": "VN42;bsGW;DQ24;G8BR", "pdf_size": 597995, "rating": "6;6;7;7", "confidence": "4;2;5;5", "soundness": "3;3;3;3", "novelty": "2;2;4;3", "presentation": "3;3;4;3", "wc_summary": "67;120;74;30", "wc_strengths": "39;107;68;288", "wc_weaknesses": "90;175;51;218", "wc_questions": "8;10;52;4", "wc_limitations": "8;14;1;9", "wc_review": "212;426;246;549", "wc_reply_reviewers": "24;20;32;24", "wc_reply_authors": "34;25;33;32", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.75, 31.995116814914116 ], "wc_strengths_avg": [ 125.5, 96.87233867312175 ], "wc_weaknesses_avg": [ 133.5, 66.25896165802781 ], "wc_questions_avg": [ 18.5, 19.461500456028563 ], "wc_limitations_avg": [ 8.0, 4.636809247747852 ], "wc_review_avg": [ 358.25, 136.89845689415202 ], "wc_reply_reviewers_avg": [ 25.0, 4.358898943540674 ], "wc_reply_authors_avg": [ 31.0, 3.5355339059327378 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.8164965809277259, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5557230907843413190&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "abdn.ac.uk;clemson.edu;tue.nl;jd.com;;utexas.edu;tue.nl;clemson.edu;tue.nl;utexas.edu", "author_num": 10, "aff_unique_index": "0;1;2;3;4;2;1;2;4", "aff_unique_norm": "University of Aberdeen;Clemson University;Eindhoven University of Technology;JD;University of Texas at Austin", "aff_unique_dep": ";;;JD Explore Academy;", "aff_unique_url": "https://www.abdn.ac.uk;https://www.clemson.edu;https://www.tue.nl;;https://www.utexas.edu", "aff_unique_abbr": "Aberdeen;Clemson;TU/e;;UT Austin", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;1;2;1;2;1;2;1", "aff_country_unique": "United Kingdom;United States;Netherlands;" }, { "title": "Self-Consistent Velocity Matching of Probability Flows", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72451", "id": "C6fvJ2RfsL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b2b781badeeb49896c4b324c466ec442-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=C6fvJ2RfsL", "openreview": "https://openreview.net/forum?id=C6fvJ2RfsL", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72451", "video": "https://nips.cc/virtual/2023/poster/72451", "author_site": "Lingxiao Li, Samuel Hurault, Justin Solomon", "tldr": "", "abstract": "We present a discretization-free scalable framework for solving a large class of mass-conserving partial differential equations (PDEs), including the time-dependent Fokker-Planck equation and the Wasserstein gradient flow. The main observation is that the time-varying velocity field of the PDE solution needs to be self-consistent: it must satisfy a fixed-point equation involving the probability flow characterized by the same velocity field. Instead of directly minimizing the residual of the fixed-point equation with neural parameterization, we use an iterative formulation with a biased gradient estimator that bypasses significant computational obstacles with strong empirical performance. Compared to existing approaches, our method does not suffer from temporal or spatial discretization, covers a wider range of PDEs, and scales to high dimensions. Experimentally, our method recovers analytical solutions accurately when they are available and achieves superior performance in high dimensions with less training time compared to alternatives.", "keywords": "JKO;mass-conservation;PDE;Fokker-Planck;scalable;discretization-free;neural ODE", "primary_area": "", "supplementary_material": "", "author": "Lingxiao Li;Samuel Hurault;Justin Solomon", "authorids": "~Lingxiao_Li1;~Samuel_Hurault1;~Justin_Solomon1", "gender": "M;M;M", "homepage": "http://people.csail.mit.edu/lingxiao/;;http://people.csail.mit.edu/jsolomon/", "dblp": ";239/3588;80/5094", "google_scholar": ";https://scholar.google.fr/citations?user=f_rtYCAAAAAJ;pImSVwoAAAAJ", "orcid": ";;0000-0002-7701-7586", "linkedin": ";;justin-solomon-8a587914/", "or_profile": "~Lingxiao_Li1;~Samuel_Hurault1;~Justin_Solomon1", "aff": "Massachusetts Institute of Technology;University of Bordeaux;Massachusetts Institute of Technology", "aff_domain": "mit.edu;u-bordeaux.fr;mit.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nli2023selfconsistent,\ntitle={Self-Consistent Velocity Matching of Probability Flows},\nauthor={Lingxiao Li and Samuel Hurault and Justin Solomon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=C6fvJ2RfsL}\n}", "github": "", "project": "", "reviewers": "egp4;wUYN;5yDs;AfQf;v66q;3qHN", "pdf_size": 3131274, "rating": "5;6;6;6;7;7", "confidence": "3;4;4;3;3;3", "soundness": "3;4;2;4;4;4", "novelty": "3;3;2;3;3;3", "presentation": "3;4;3;4;3;4", "wc_summary": "64;37;124;122;234;214", "wc_strengths": "51;41;32;89;50;72", "wc_weaknesses": "67;64;152;90;58;51", "wc_questions": "16;60;280;45;104;97", "wc_limitations": "7;24;101;2;2;52", "wc_review": "205;226;689;348;448;486", "wc_reply_reviewers": "49;284;0;25;0;0", "wc_reply_authors": "62;403;0;39;0;0", "reply_reviewers": "1;2;0;1;0;0", "reply_authors": "2;3;1;2;1;1", "rating_avg": [ 6.166666666666667, 0.6871842709362768 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.5, 0.7637626158259734 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 132.5, 71.80935407962762 ], "wc_strengths_avg": [ 55.833333333333336, 19.178257364931664 ], "wc_weaknesses_avg": [ 80.33333333333333, 34.23773097362356 ], "wc_questions_avg": [ 100.33333333333333, 85.73732494595858 ], "wc_limitations_avg": [ 31.333333333333332, 35.70091813322951 ], "wc_review_avg": [ 400.3333333333333, 165.44149687695918 ], "wc_reply_reviewers_avg": [ 59.666666666666664, 101.91608748813353 ], "wc_reply_authors_avg": [ 84.0, 144.59022558020072 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.74535599249993 ], "reply_authors_avg": [ 1.6666666666666667, 0.74535599249993 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17149858514250882, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13430357560357673779&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "mit.edu;u-bordeaux.fr;mit.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of Bordeaux", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.u-bordeaux.fr", "aff_unique_abbr": "MIT;UBordeaux", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;France" }, { "title": "Towards Label-free Scene Understanding by Vision Foundation Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72450", "id": "C8JdyM7B8I", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ef6c94e9cf4d169298479ee2e230ee13-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=C8JdyM7B8I", "openreview": "https://openreview.net/forum?id=C8JdyM7B8I", "poster": "/media/PosterPDFs/NeurIPS%202023/72450.png?t=1697289800.023241", "slides": "https://nips.cc/virtual/2023/poster/72450", "video": "https://nips.cc/virtual/2023/poster/72450", "author_site": "Runnan Chen, Youquan Liu, Lingdong Kong, Nenglun Chen, Xinge ZHU, Yuexin Ma, Tongliang Liu, Wenping Wang", "tldr": "", "abstract": "Vision foundation models such as Contrastive Vision-Language Pre-training (CLIP) and Segment Anything (SAM) have demonstrated impressive zero-shot performance on image classification and segmentation tasks. However, the incorporation of CLIP and SAM for label-free scene understanding has yet to be explored. In this paper, we investigate the potential of vision foundation models in enabling networks to comprehend 2D and 3D worlds without labelled data. The primary challenge lies in effectively supervising networks under extremely noisy pseudo labels, which are generated by CLIP and further exacerbated during the propagation from the 2D to the 3D domain. To tackle these challenges, we propose a novel Cross-modality Noisy Supervision (CNS) method that leverages the strengths of CLIP and SAM to supervise 2D and 3D networks simultaneously. In particular, we introduce a prediction consistency regularization to co-train 2D and 3D networks, then further impose the networks' latent space consistency using the SAM's robust feature representation. Experiments conducted on diverse indoor and outdoor datasets demonstrate the superior performance of our method in understanding 2D and 3D open environments. Our 2D and 3D network achieves label-free semantic segmentation with 28.4\\% and 33.5\\% mIoU on ScanNet, improving 4.7\\% and 7.9\\%, respectively. For nuImages and nuScenes datasets, the performance is 22.1\\% and 26.8\\% with improvements of 3.5\\% and 6.0\\%, respectively. Code is available. (https://github.com/runnanchen/Label-Free-Scene-Understanding)", "keywords": "label-free;scene understanding", "primary_area": "", "supplementary_material": "/attachment/26787d7b7c4aa9dcb9746b649f21279ad2e0c096.zip", "author": "Runnan Chen;Youquan Liu;Lingdong Kong;Nenglun Chen;Xinge ZHU;Yuexin Ma;Tongliang Liu;Wenping Wang", "authorids": "~Runnan_Chen1;~Youquan_Liu1;~Lingdong_Kong1;~Nenglun_Chen1;~Xinge_ZHU2;~Yuexin_Ma2;~Tongliang_Liu1;~Wenping_Wang1", "gender": "M;M;;M;;F;M;M", "homepage": "https://scholar.google.com.hk/citations?hl=en&user=Uq2DuzkAAAAJ&view_op=list_works&sortby=pubdate;https://github.com/youquanl;;https://scholar.google.com/citations?user=UhjTC7AAAAAJ;;http://yuexinma.me/aboutme.html;https://tongliang-liu.github.io/;https://engineering.tamu.edu/cse/profiles/Wang-Wenping.html", "dblp": "232/1849;91/6914;;230/7699.html;;209/5925;150/6667;", "google_scholar": "https://scholar.google.com.hk/citations?hl=en;;;UhjTC7AAAAAJ;;;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;28shvv0AAAAJ", "orcid": ";0000-0002-1625-4318;;;;;;0000-0002-2284-3952", "linkedin": ";;;;;;;", "or_profile": "~Runnan_Chen1;~Youquan_Liu1;~Lingdong_Kong1;~Nenglun_Chen1;~Xinge_ZHU2;~Yuexin_Ma2;~Tongliang_Liu1;~Wenping_Wang1", "aff": "the University of Hong Kong, University of Hong Kong;Hochschule Bremerhaven;;Nanjing University of Information Science and Technology;;ShanghaiTech University;University of Sydney;Texas A&M University - College Station", "aff_domain": "cs.hku.hk;hs-bremerhaven.de;;nuist.edu.cn;;shanghaitech.edu.cn;sydney.edu.au;tamu.edu", "position": "PhD student;MS student;;Lecturer;;Assistant Professor;Lecturer;Full Professor", "bibtex": "@inproceedings{\nchen2023towards,\ntitle={Towards Label-free Scene Understanding by Vision Foundation Models},\nauthor={Runnan Chen and Youquan Liu and Lingdong Kong and Nenglun Chen and Xinge ZHU and Yuexin Ma and Tongliang Liu and Wenping Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=C8JdyM7B8I}\n}", "github": "", "project": "", "reviewers": "rM41;Mxk9;9W6n;inkR", "pdf_size": 1383971, "rating": "4;6;6;7", "confidence": "4;5;4;4", "soundness": "3;4;3;3", "novelty": "2;4;3;2", "presentation": "2;3;3;3", "wc_summary": "54;100;113;194", "wc_strengths": "75;76;45;73", "wc_weaknesses": "329;249;247;176", "wc_questions": "17;4;160;42", "wc_limitations": "6;7;12;50", "wc_review": "481;436;577;535", "wc_reply_reviewers": "144;34;42;143", "wc_reply_authors": "311;52;61;134", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 115.25, 50.47462233637811 ], "wc_strengths_avg": [ 67.25, 12.891373084353738 ], "wc_weaknesses_avg": [ 250.25, 54.14505979311501 ], "wc_questions_avg": [ 55.75, 61.71861550618257 ], "wc_limitations_avg": [ 18.75, 18.18481509391833 ], "wc_review_avg": [ 507.25, 53.38714732967102 ], "wc_reply_reviewers_avg": [ 90.75, 52.826958080131774 ], "wc_reply_authors_avg": [ 139.5, 103.99639416825951 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13870656735508742182&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.hku.hk;hs-bremerhaven.de;;nuist.edu.cn;;shanghaitech.edu.cn;sydney.edu.au;tamu.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "University of Hong Kong;Hochschule Bremerhaven;Nanjing University of Information Science and Technology;ShanghaiTech University;University of Sydney;Texas A&M University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.hku.hk;https://www.hs-bremerhaven.de;http://www.nuist.edu.cn;https://www.shanghaitech.edu.cn;https://www.sydney.edu.au;https://www.tamu.edu", "aff_unique_abbr": "HKU;;;ShanghaiTech;USYD;TAMU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Hong Kong SAR;;College Station", "aff_country_unique_index": "0;1;0;0;2;3", "aff_country_unique": "China;Germany;Australia;United States" }, { "title": "Conservative Offline Policy Adaptation in Multi-Agent Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72449", "id": "C8pvL8Qbfa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a31253f4871694f09541122d6b6f5ad1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=C8pvL8Qbfa", "openreview": "https://openreview.net/forum?id=C8pvL8Qbfa", "poster": "/media/PosterPDFs/NeurIPS%202023/72449.png?t=1698600886.522368", "slides": "https://nips.cc/virtual/2023/poster/72449", "video": "https://nips.cc/virtual/2023/poster/72449", "author_site": "Chengjie Wu, Pingzhong Tang, Jun Yang, Yujing Hu, Tangjie Lv, Changjie Fan, Chongjie Zhang", "tldr": "", "abstract": "Prior research on policy adaptation in multi-agent games has often relied on online interaction with the target agent in training, which can be expensive and impractical in real-world scenarios. Inspired by recent progress in offline reinforcement learn- ing, this paper studies offline policy adaptation, which aims to utilize the target agent\u2019s behavior data to exploit its weakness or enable effective cooperation. We investigate its distinct challenges of distributional shift and risk-free deviation, and propose a novel learning objective, conservative offline adaptation, that optimizes the worst-case performance against any dataset consistent proxy models. We pro- pose an efficient algorithm called Constrained Self-Play (CSP) that incorporates dataset information into regularized policy learning. We prove that CSP learns a near-optimal risk-free offline adaptation policy upon convergence. Empirical results demonstrate that CSP outperforms non-conservative baselines in various environments, including Maze, predator-prey, MuJoCo, and Google Football.", "keywords": "reinforcement learning;opponent exploitation;multi-agent", "primary_area": "", "supplementary_material": "/attachment/e07ba2247b1e95645498a31f934dd7b33e030590.pdf", "author": "Chengjie Wu;Pingzhong Tang;Jun Yang;Yujing Hu;Tangjie Lv;Changjie Fan;Chongjie Zhang", "authorids": "~Chengjie_Wu1;~Pingzhong_Tang1;~Jun_Yang6;~Yujing_Hu2;~Tangjie_Lv1;~Changjie_Fan1;~Chongjie_Zhang1", "gender": "M;;M;;M;M;", "homepage": ";;;;;;", "dblp": "70/6141;96/3886;;https://dblp.uni-trier.de/pid/160/1923.html;;71/882;29/6693", "google_scholar": "fXL69VsAAAAJ;;ZrgN9ssAAAAJ;IR5WY-wAAAAJ;EIuWpJcAAAAJ;;LjxqXycAAAAJ", "orcid": ";;;;0000-0001-9858-809X;0000-0001-5420-0516;", "linkedin": ";;;;;;", "or_profile": "~Chengjie_Wu1;~Pingzhong_Tang1;~Jun_Yang6;~Yujing_Hu2;~Tangjie_Lv1;~Changjie_Fan1;~Chongjie_Zhang1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;NetEase, Inc.;NetEase, Inc.;Netease, Fuxi AI Lab;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;corp.netease.com;netease.com;corp.netease.com;tsinghua.edu.cn", "position": "PhD student;Associate Professor;Assistant Professor;Researcher;Researcher;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nwu2023conservative,\ntitle={Conservative Offline Policy Adaptation in Multi-Agent Games},\nauthor={Chengjie Wu and Pingzhong Tang and Jun Yang and Yujing Hu and Tangjie Lv and Changjie Fan and Chongjie Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=C8pvL8Qbfa}\n}", "github": "", "project": "", "reviewers": "bBXB;71Zz;dBj1;x4rE", "pdf_size": 4013333, "rating": "6;6;6;7", "confidence": "4;4;3;4", "soundness": "3;2;3;4", "novelty": "3;2;3;4", "presentation": "3;3;3;3", "wc_summary": "123;86;105;130", "wc_strengths": "47;33;61;130", "wc_weaknesses": "274;84;117;178", "wc_questions": "33;27;5;76", "wc_limitations": "1;14;5;38", "wc_review": "478;244;293;552", "wc_reply_reviewers": "0;13;10;145", "wc_reply_authors": "0;30;28;337", "reply_reviewers": "0;1;1;2", "reply_authors": "1;2;2;3", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 111.0, 17.073371078963874 ], "wc_strengths_avg": [ 67.75, 37.278512577623054 ], "wc_weaknesses_avg": [ 163.25, 72.28891685452203 ], "wc_questions_avg": [ 35.25, 25.733004099793714 ], "wc_limitations_avg": [ 14.5, 14.361406616345072 ], "wc_review_avg": [ 391.75, 127.18171055619594 ], "wc_reply_reviewers_avg": [ 42.0, 59.661545404054024 ], "wc_reply_authors_avg": [ 98.75, 138.06407027174015 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14176352145508019571&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;corp.netease.com;netease.com;corp.netease.com;tsinghua.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;1;2;0", "aff_unique_norm": "Tsinghua University;NetEase, Inc.;Netease", "aff_unique_dep": ";;Fuxi AI Lab", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.163.com;https://www.netease.com", "aff_unique_abbr": "THU;NetEase;Netease", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Fast Projected Newton-like Method for Precision Matrix Estimation under Total Positivity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72448", "id": "C9cgwmJ8Pt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e878c8f38381d0964677fb9536c494ee-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=C9cgwmJ8Pt", "openreview": "https://openreview.net/forum?id=C9cgwmJ8Pt", "poster": "/media/PosterPDFs/NeurIPS%202023/72448.png?t=1701795067.471846", "slides": "https://nips.cc/virtual/2023/poster/72448", "video": "https://nips.cc/virtual/2023/poster/72448", "author_site": "Jian-Feng CAI, Jos\u00e9 Vin\u00edcius de Miranda Cardoso, Daniel Palomar, Jiaxi Ying", "tldr": "", "abstract": "We study the problem of estimating precision matrices in Gaussian distributions that are multivariate totally positive of order two ($\\mathrm{MTP}_2$). The precision matrix in such a distribution is an M-matrix. This problem can be formulated as a sign-constrained log-determinant program. Current algorithms are designed using the block coordinate descent method or the proximal point algorithm, which becomes computationally challenging in high-dimensional cases due to the requirement to solve numerous nonnegative quadratic programs or large-scale linear systems. To address this issue, we propose a novel algorithm based on the two-metric projection method, incorporating a carefully designed search direction and variable partitioning scheme. Our algorithm substantially reduces computational complexity, and its theoretical convergence is established. Experimental results on synthetic and real-world datasets demonstrate that our proposed algorithm provides a significant improvement in computational efficiency compared to the state-of-the-art methods.", "keywords": "MTP2;Total Positivity;Generalized graph Laplacian;Precision matrix estimation;Nonnegative partial correlations", "primary_area": "", "supplementary_material": "", "author": "Jian-Feng CAI;Jos\u00e9 Vin\u00edcius De Miranda Cardoso;Daniel P. Palomar;Jiaxi Ying", "authorids": "~Jian-Feng_CAI1;~Jos\u00e9_Vin\u00edcius_De_Miranda_Cardoso1;~Daniel_P._Palomar1;~Jiaxi_Ying1", "gender": "M;;M;M", "homepage": "https://www.math.ust.hk/~jfcai/;https://mirca.github.io;https://www.danielppalomar.com/;https://jxying.github.io/", "dblp": ";;;179/2448", "google_scholar": "Mo4v5iwAAAAJ;;qlReqq8AAAAJ;_IzItlcAAAAJ", "orcid": ";;0000-0001-5250-4874;", "linkedin": ";;;", "or_profile": "~Jian-Feng_CAI1;~Jos\u00e9_Vin\u00edcius_De_Miranda_Cardoso1;~Daniel_P._Palomar1;~Jiaxi_Ying1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;ust.hk;ust.hk;ust.hk", "position": "Full Professor;PhD student;Full Professor;Postdoc", "bibtex": "@inproceedings{\ncai2023fast,\ntitle={Fast Projected Newton-like Method for Precision Matrix Estimation under Total Positivity},\nauthor={Jian-Feng CAI and Jos{\\'e} Vin{\\'\\i}cius De Miranda Cardoso and Daniel P. Palomar and Jiaxi Ying},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=C9cgwmJ8Pt}\n}", "github": "", "project": "", "reviewers": "7MEo;oo93;KQq4;AisR", "pdf_size": 866098, "rating": "3;6;7;7", "confidence": "4;3;3;4", "soundness": "2;3;4;4", "novelty": "2;3;4;3", "presentation": "2;3;3;4", "wc_summary": "37;93;54;202", "wc_strengths": "58;39;7;144", "wc_weaknesses": "90;93;92;125", "wc_questions": "11;2;3;163", "wc_limitations": "3;7;1;35", "wc_review": "199;234;157;669", "wc_reply_reviewers": "10;17;6;156", "wc_reply_authors": "77;22;26;294", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 96.5, 64.20475060305117 ], "wc_strengths_avg": [ 62.0, 50.72967573324316 ], "wc_weaknesses_avg": [ 100.0, 14.474114826130128 ], "wc_questions_avg": [ 44.75, 68.36071605827429 ], "wc_limitations_avg": [ 11.5, 13.738631664034086 ], "wc_review_avg": [ 314.75, 206.33513394475503 ], "wc_reply_reviewers_avg": [ 47.25, 62.91015418833433 ], "wc_reply_authors_avg": [ 104.75, 111.39428845322367 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.45749571099781405, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8604695984266846299&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ust.hk;ust.hk;ust.hk;ust.hk", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Causal discovery from observational and interventional data across multiple environments", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72447", "id": "C9wTM5xyw2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/368cba57d00902c752eaa9e4770bbbbe-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=C9wTM5xyw2", "openreview": "https://openreview.net/forum?id=C9wTM5xyw2", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72447", "video": "https://nips.cc/virtual/2023/poster/72447", "author_site": "Adam Li, Amin Jaber, Elias Bareinboim", "tldr": "", "abstract": "A fundamental problem in many sciences is the learning of causal structure underlying a system, typically through observation and experimentation. Commonly, one even collects data across multiple domains, such as gene sequencing from different labs, or neural recordings from different species. Although there exist methods for learning the equivalence class of causal diagrams from observational and experimental data, they are meant to operate in a single domain. In this paper, we develop a fundamental approach to structure learning in non-Markovian systems (i.e. when there exist latent confounders) leveraging observational and interventional data collected from multiple domains. Specifically, we start by showing that learning from observational data in multiple domains is equivalent to learning from interventional data with unknown targets in a single domain. But there are also subtleties when considering observational and experimental data. Using causal invariances derived from do-calculus, we define a property called S-Markov that connects interventional distributions from multiple-domains to graphical criteria on a selection diagram. Leveraging the S-Markov property, we introduce a new constraint-based causal discovery algorithm, S-FCI, that can learn from observational and interventional data from different domains. We prove that the algorithm is sound and subsumes existing constraint-based causal discovery algorithms.", "keywords": "causal inference;causal discovery;transportability;multi-domain learning", "primary_area": "", "supplementary_material": "", "author": "Adam Li;Amin Jaber;Elias Bareinboim", "authorids": "~Adam_Li1;~Amin_Jaber1;~Elias_Bareinboim2", "gender": "M;M;M", "homepage": "https://adam2392.github.io;;https://causalai.net", "dblp": "176/3454;https://dblp.uni-trier.de/pers/hd/j/Jaber:Amin;85/9005", "google_scholar": "KxY17KcAAAAJ;Dfu661gAAAAJ;r5U-D7YAAAAJ", "orcid": "0000-0001-8421-365X;;", "linkedin": "adam2392/;;", "or_profile": "~Adam_Li1;~Amin_Jaber1;~Elias_Bareinboim2", "aff": "Columbia University;Purdue University;Columbia University", "aff_domain": "columbia.edu;cs.purdue.edu;columbia.edu", "position": "Postdoc;PhD student;Associate Professor", "bibtex": "@inproceedings{\nli2023causal,\ntitle={Causal discovery from observational and interventional data across multiple environments},\nauthor={Adam Li and Amin Jaber and Elias Bareinboim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=C9wTM5xyw2}\n}", "github": "", "project": "", "reviewers": "WVJw;9ikg;KPtn", "pdf_size": 2446808, "rating": "4;5;6", "confidence": "3;2;3", "soundness": "2;3;3", "novelty": "3;2;3", "presentation": "1;2;3", "wc_summary": "40;75;46", "wc_strengths": "48;64;26", "wc_weaknesses": "307;77;45", "wc_questions": "120;24;734", "wc_limitations": "1;2;1", "wc_review": "516;242;852", "wc_reply_reviewers": "423;78;302", "wc_reply_authors": "1039;31;619", "reply_reviewers": "2;2;3", "reply_authors": "4;2;3", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 53.666666666666664, 15.2825245151302 ], "wc_strengths_avg": [ 46.0, 15.57776192739723 ], "wc_weaknesses_avg": [ 143.0, 116.69904312661123 ], "wc_questions_avg": [ 292.6666666666667, 314.5211527950951 ], "wc_limitations_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_review_avg": [ 536.6666666666666, 249.45986094404492 ], "wc_reply_reviewers_avg": [ 267.6666666666667, 142.92266751250094 ], "wc_reply_authors_avg": [ 563.0, 413.4150456865352 ], "reply_reviewers_avg": [ 2.3333333333333335, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16992119923825728683&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "columbia.edu;cs.purdue.edu;columbia.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Columbia University;Purdue University", "aff_unique_dep": ";", "aff_unique_url": "https://www.columbia.edu;https://www.purdue.edu", "aff_unique_abbr": "Columbia;Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Bicriteria Multidimensional Mechanism Design with Side Information", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72446", "id": "C9wlNF1Ooj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8039ca1e9860daab3a79e45d010d5398-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=C9wlNF1Ooj", "openreview": "https://openreview.net/forum?id=C9wlNF1Ooj", "poster": "/media/PosterPDFs/NeurIPS%202023/72446.png?t=1701984789.498487", "slides": "https://nips.cc/virtual/2023/poster/72446", "video": "https://nips.cc/virtual/2023/poster/72446", "author_site": "Siddharth Prasad, Maria-Florina Balcan, Tuomas Sandholm", "tldr": "", "abstract": "We develop a versatile new methodology for multidimensional mechanism design that incorporates side information about agent types to generate high social welfare and high revenue simultaneously. Prominent sources of side information in practice include predictions from a machine-learning model trained on historical agent data, advice from domain experts, and even the mechanism designer's own gut instinct. In this paper we adopt a prior-free perspective that makes no assumptions on the correctness, accuracy, or source of the side information. First, we design a meta-mechanism that integrates input side information with an improvement of the classical VCG mechanism. The welfare, revenue, and incentive properties of our meta-mechanism are characterized by novel constructions we introduce based on the notion of a weakest competitor, which is an agent that has the smallest impact on welfare. We show that our meta-mechanism, when carefully instantiated, simultaneously achieves strong welfare and revenue guarantees parameterized by errors in the side information. When the side information is highly informative and accurate, our mechanism achieves welfare and revenue competitive with the total social surplus, and its performance decays continuously and gradually as the quality of the side information decreases. Finally, we apply our meta-mechanism to a setting where each agent's type is determined by a constant number of parameters. Specifically, agent types lie on constant-dimensional subspaces (of the potentially high-dimensional ambient type space) that are known to the mechanism designer. We use our meta-mechanism to obtain the first known welfare and revenue guarantees in this setting.", "keywords": "mechanism design;revenue maximization;welfare maximization;side information;weakest competitors;algorithms with predictions;learning-augmented algorithms", "primary_area": "", "supplementary_material": "/attachment/5ef1efd2427bd900b3c556899182e36e5a9b30ab.pdf", "author": "Siddharth Prasad;Nina Balcan;Tuomas Sandholm", "authorids": "~Siddharth_Prasad1;~Nina_Balcan1;~Tuomas_Sandholm1", "gender": ";F;M", "homepage": "https://www.cs.cmu.edu/~sprasad2/;http://www.cs.cmu.edu/~ninamf/;http://www.cs.cmu.edu/~sandholm", "dblp": "227/2787;b/MariaFlorinaBalcan;s/TuomasSandholm", "google_scholar": "qW72Z4YAAAAJ;https://scholar.google.com.tw/citations?user=LWlN_BUAAAAJ;0DpK1EMAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Siddharth_Prasad1;~Nina_Balcan1;~Tuomas_Sandholm1", "aff": "Computer Science Department, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nprasad2023bicriteria,\ntitle={Bicriteria Multidimensional Mechanism Design with Side Information},\nauthor={Siddharth Prasad and Nina Balcan and Tuomas Sandholm},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=C9wlNF1Ooj}\n}", "github": "", "project": "", "reviewers": "We4E;A5XS;B9vf;Whzb;94sR;kPNZ", "pdf_size": 562858, "rating": "5;5;6;7;7;7", "confidence": "2;3;4;4;2;2", "soundness": "3;3;3;4;3;3", "novelty": "2;3;3;4;4;3", "presentation": "3;4;3;3;3;3", "wc_summary": "71;319;73;68;47;151", "wc_strengths": "141;25;48;67;57;90", "wc_weaknesses": "67;40;49;31;51;49", "wc_questions": "52;23;122;1;11;71", "wc_limitations": "4;2;6;9;1;1", "wc_review": "335;409;298;176;167;362", "wc_reply_reviewers": "0;0;11;10;0;19", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "0;0;1;1;0;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.166666666666667, 0.8975274678557507 ], "confidence_avg": [ 2.8333333333333335, 0.8975274678557507 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 3.1666666666666665, 0.6871842709362768 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 121.5, 94.15545655988292 ], "wc_strengths_avg": [ 71.33333333333333, 36.781637931023255 ], "wc_weaknesses_avg": [ 47.833333333333336, 10.991158062531698 ], "wc_questions_avg": [ 46.666666666666664, 41.25799585804214 ], "wc_limitations_avg": [ 3.8333333333333335, 2.91070819942883 ], "wc_review_avg": [ 291.1666666666667, 90.87613670387965 ], "wc_reply_reviewers_avg": [ 6.666666666666667, 7.249521056977182 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.034482758620689676, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18204548607236702785&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "email": "cs.cmu.edu;cmu.edu;cmu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Pointwise uncertainty quantification for sparse variational Gaussian process regression with a Brownian motion prior", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72445", "id": "CA8tMQiscx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/176a579942089c4cdc70136c567932ab-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CA8tMQiscx", "openreview": "https://openreview.net/forum?id=CA8tMQiscx", "poster": "/media/PosterPDFs/NeurIPS%202023/72445.png?t=1701169180.7226872", "slides": "https://nips.cc/virtual/2023/poster/72445", "video": "https://nips.cc/virtual/2023/poster/72445", "author_site": "Luke Travis, Kolyan Ray", "tldr": "", "abstract": "We study pointwise estimation and uncertainty quantification for a sparse variational Gaussian process method with eigenvector inducing variables. For a rescaled Brownian motion prior, we derive theoretical guarantees and limitations for the frequentist size and coverage of pointwise credible sets. For sufficiently many inducing variables, we precisely characterize the asymptotic frequentist coverage, deducing when credible sets from this variational method are conservative and when overconfident/misleading. We numerically illustrate the applicability of our results and discuss connections with other common Gaussian process priors.", "keywords": "Gaussian process;sparse variational Bayes;uncertainty quantification;theoretical guarantees", "primary_area": "", "supplementary_material": "/attachment/678260762ab00e5a81a6e0ccea2772ecb025b4b0.zip", "author": "Luke Travis;Kolyan Ray", "authorids": "~Luke_Travis1;~Kolyan_Ray1", "gender": "M;M", "homepage": ";https://kolyanray.wordpress.com/", "dblp": ";249/5594", "google_scholar": ";https://scholar.google.nl/citations?user=wv5Bn5kAAAAJ", "orcid": ";0000-0002-2874-092X", "linkedin": "luke-travis-140306143/;", "or_profile": "~Luke_Travis1;~Kolyan_Ray1", "aff": "Imperial College London;Imperial College London", "aff_domain": "ic.ac.uk;imperial.ac.uk", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\ntravis2023pointwise,\ntitle={Pointwise uncertainty quantification for sparse variational Gaussian process regression with a Brownian motion prior},\nauthor={Luke Travis and Kolyan Ray},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CA8tMQiscx}\n}", "github": "", "project": "", "reviewers": "tkgm;oD8s;BMyX;sbVc", "pdf_size": 401753, "rating": "4;6;6;7", "confidence": "4;3;3;3", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "2;2;3;3", "wc_summary": "27;126;82;58", "wc_strengths": "22;148;139;105", "wc_weaknesses": "134;262;98;58", "wc_questions": "410;179;76;52", "wc_limitations": "3;17;7;89", "wc_review": "596;732;402;362", "wc_reply_reviewers": "0;108;4;7", "wc_reply_authors": "0;268;0;0", "reply_reviewers": "0;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 73.25, 36.16196205960069 ], "wc_strengths_avg": [ 103.5, 49.711668650327965 ], "wc_weaknesses_avg": [ 138.0, 76.47221717722064 ], "wc_questions_avg": [ 179.25, 141.50861281208293 ], "wc_limitations_avg": [ 29.0, 35.014282800023196 ], "wc_review_avg": [ 523.0, 149.64290828502365 ], "wc_reply_reviewers_avg": [ 29.75, 45.245856163852174 ], "wc_reply_authors_avg": [ 67.0, 116.04740410711477 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18403055284792542254&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ic.ac.uk;imperial.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Information Maximization Perspective of Orthogonal Matching Pursuit with Applications to Explainable AI", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72444", "id": "CAF4CnUblx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/08eac13583b310ec55d755f99c549be3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CAF4CnUblx", "openreview": "https://openreview.net/forum?id=CAF4CnUblx", "poster": "/media/PosterPDFs/NeurIPS%202023/72444.png?t=1700191710.8490658", "slides": "https://nips.cc/virtual/2023/poster/72444", "video": "https://nips.cc/virtual/2023/poster/72444", "author_site": "Aditya Chattopadhyay, Ryan Pilgrim, Rene Vidal", "tldr": "", "abstract": "Information Pursuit (IP) is a classical active testing algorithm for predicting an output by sequentially and greedily querying the input in order of information gain. However, IP is computationally intensive since it involves estimating mutual information in high-dimensional spaces. This paper explores Orthogonal Matching Pursuit (OMP) as an alternative to IP for greedily selecting the queries. OMP is a classical signal processing algorithm for sequentially encoding a signal in terms of dictionary atoms chosen in order of correlation gain. In each iteration, OMP selects the atom that is most correlated with the signal residual (the signal minus its reconstruction thus far). Our first contribution is to establish a fundamental connection between IP and OMP, where we prove that IP with random projections of dictionary atoms as queries ``almost'' reduces to OMP, with the difference being that IP selects atoms in order of normalized correlation gain. We call this version IP-OMP and present simulations indicating that this difference does not have any appreciable effect on the sparse code recovery rate of IP-OMP compared to that of OMP for random Gaussian dictionaries. Inspired by this connection, our second contribution is to explore the utility of IP-OMP for generating explainable predictions, an area in which IP has recently gained traction. More specifically, we propose a simple explainable AI algorithm which encodes an image as a sparse combination of semantically meaningful dictionary atoms that are defined as text embeddings of interpretable concepts. The final prediction is made using the weights of this sparse combination, which serve as an explanation. Empirically, our proposed algorithm is not only competitive with existing explainability methods but also computationally less expensive.", "keywords": "Information Maximization;Sparse Coding;Orthogonal Matching Pursuit;Explainable AI;Information Pursuit", "primary_area": "", "supplementary_material": "", "author": "Aditya Chattopadhyay;Ryan Pilgrim;Rene Vidal", "authorids": "~Aditya_Chattopadhyay1;~Ryan_Pilgrim1;~Rene_Vidal1", "gender": "M;;", "homepage": ";;http://www.vision.jhu.edu", "dblp": "207/8574;;v/ReneVidal", "google_scholar": "aekzv1gAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;rene-vidal-74844928/", "or_profile": "~Aditya_Chattopadhyay1;~Ryan_Pilgrim1;~Rene_Vidal1", "aff": "Johns Hopkins University;;Amazon", "aff_domain": "jhu.edu;;amazon.com", "position": "PhD student;;Principal Researcher", "bibtex": "@inproceedings{\nchattopadhyay2023information,\ntitle={Information Maximization Perspective of Orthogonal Matching Pursuit with Applications to Explainable {AI}},\nauthor={Aditya Chattopadhyay and Ryan Pilgrim and Rene Vidal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CAF4CnUblx}\n}", "github": "", "project": "", "reviewers": "FGVf;osjG;VN2v;tdY5", "pdf_size": 36154672, "rating": "5;6;7;8", "confidence": "4;4;4;5", "soundness": "3;3;3;4", "novelty": "3;3;2;4", "presentation": "3;4;3;3", "wc_summary": "157;71;103;61", "wc_strengths": "112;28;113;42", "wc_weaknesses": "312;67;198;27", "wc_questions": "48;4;213;6", "wc_limitations": "2;5;6;22", "wc_review": "631;175;633;158", "wc_reply_reviewers": "270;10;240;14", "wc_reply_authors": "31;0;21;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 98.0, 37.429934544425805 ], "wc_strengths_avg": [ 73.75, 39.06644980030819 ], "wc_weaknesses_avg": [ 151.0, 112.42997820866105 ], "wc_questions_avg": [ 67.75, 85.68073003890665 ], "wc_limitations_avg": [ 8.75, 7.790218225441442 ], "wc_review_avg": [ 399.25, 232.8286655461479 ], "wc_reply_reviewers_avg": [ 133.5, 121.97028326604804 ], "wc_reply_authors_avg": [ 13.0, 13.47219358530748 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17509760222295360980&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "jhu.edu;;amazon.com", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Johns Hopkins University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.jhu.edu;https://www.amazon.com", "aff_unique_abbr": "JHU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Neural Data Transformer 2: Multi-context Pretraining for Neural Spiking Activity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72443", "id": "CBBtMnlTGq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fe51de4e7baf52e743b679e3bdba7905-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CBBtMnlTGq", "openreview": "https://openreview.net/forum?id=CBBtMnlTGq", "poster": "/media/PosterPDFs/NeurIPS%202023/72443.png?t=1702152180.4199398", "slides": "https://nips.cc/virtual/2023/poster/72443", "video": "https://nips.cc/virtual/2023/poster/72443", "author_site": "Joel Ye, Jennifer Collinger, Leila Wehbe, Robert Gaunt", "tldr": "", "abstract": "The neural population spiking activity recorded by intracortical brain-computer interfaces (iBCIs) contain rich structure. Current models of such spiking activity are largely prepared for individual experimental contexts, restricting data volume to that collectable within a single session and limiting the effectiveness of deep neural networks (DNNs). The purported challenge in aggregating neural spiking data is the pervasiveness of context-dependent shifts in the neural data distributions. However, large scale unsupervised pretraining by nature spans heterogeneous data, and has proven to be a fundamental recipe for successful representation learning across deep learning. We thus develop Neural Data Transformer 2 (NDT2), a spatiotemporal Transformer for neural spiking activity, and demonstrate that pretraining can leverage motor BCI datasets that span sessions, subjects, and experimental tasks. NDT2 enables rapid adaptation to novel contexts in downstream decoding tasks and opens the path to deployment of pretrained DNNs for iBCI control. Code: https://github.com/joel99/context_general_bci", "keywords": "Pretraining;Scaling Laws;Neuroscience;Brain-computer interfaces", "primary_area": "", "supplementary_material": "/attachment/455c24da86e2f608c16a4f5328f433e1d390c3b4.zip", "author": "Joel Ye;Jennifer L Collinger;Leila Wehbe;Robert Gaunt", "authorids": "~Joel_Ye1;~Jennifer_L_Collinger1;~Leila_Wehbe1;~Robert_Gaunt1", "gender": "M;F;F;M", "homepage": "https://joel99.github.io;https://www.rnel.pitt.edu;http://www.cs.cmu.edu/~lwehbe/;https://www.rnel.pitt.edu", "dblp": ";;125/4359;", "google_scholar": "CUrST4oAAAAJ;;YezyUawAAAAJ;lAoUqf8AAAAJ", "orcid": ";0000-0002-4517-5395;0000-0001-8545-2062;0000-0001-6202-5818", "linkedin": "joelye/;;;", "or_profile": "~Joel_Ye1;~Jennifer_L_Collinger1;~Leila_Wehbe1;~Robert_Gaunt1", "aff": "Carnegie Mellon University;University of Pittsburgh;Carnegie Mellon University;University of Pittsburgh", "aff_domain": "cmu.edu;pitt.edu;cmu.edu;pitt.edu", "position": "PhD student;Associate Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nye2023neural,\ntitle={Neural Data Transformer 2: Multi-context Pretraining for Neural Spiking Activity},\nauthor={Joel Ye and Jennifer L Collinger and Leila Wehbe and Robert Gaunt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CBBtMnlTGq}\n}", "github": "", "project": "", "reviewers": "cSw7;p9Yw;mmUN;yEFS", "pdf_size": 1723173, "rating": "5;5;7;7", "confidence": "4;3;4;3", "soundness": "2;3;3;3", "novelty": "2;3;4;3", "presentation": "2;2;2;3", "wc_summary": "85;91;115;64", "wc_strengths": "81;93;141;154", "wc_weaknesses": "30;329;198;95", "wc_questions": "38;112;689;77", "wc_limitations": "2;8;10;24", "wc_review": "236;633;1153;414", "wc_reply_reviewers": "11;107;53;78", "wc_reply_authors": "0;13;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 88.75, 18.171062159378575 ], "wc_strengths_avg": [ 117.25, 30.889925542156945 ], "wc_weaknesses_avg": [ 163.0, 113.01990975045061 ], "wc_questions_avg": [ 229.0, 266.86794487161626 ], "wc_limitations_avg": [ 11.0, 8.06225774829855 ], "wc_review_avg": [ 609.0, 344.11698592193903 ], "wc_reply_reviewers_avg": [ 62.25, 35.223394214640926 ], "wc_reply_authors_avg": [ 3.25, 5.629165124598851 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15279670420488099494&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "cmu.edu;pitt.edu;cmu.edu;pitt.edu", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Carnegie Mellon University;University of Pittsburgh", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.pitt.edu", "aff_unique_abbr": "CMU;Pitt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Equivariant Neural Simulators for Stochastic Spatiotemporal Dynamics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72442", "id": "CCVsGbhFdj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7a8d388b7a17df480856dff1cc079b08-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CCVsGbhFdj", "openreview": "https://openreview.net/forum?id=CCVsGbhFdj", "poster": "/media/PosterPDFs/NeurIPS%202023/72442.png?t=1700043335.9951665", "slides": "https://nips.cc/virtual/2023/poster/72442", "video": "https://nips.cc/virtual/2023/poster/72442", "author_site": "Koen Minartz, Yoeri Poels, Simon Koop, Vlado Menkovski", "tldr": "", "abstract": "Neural networks are emerging as a tool for scalable data-driven simulation of high-dimensional dynamical systems, especially in settings where numerical methods are infeasible or computationally expensive. Notably, it has been shown that incorporating domain symmetries in deterministic neural simulators can substantially improve their accuracy, sample efficiency, and parameter efficiency. However, to incorporate symmetries in probabilistic neural simulators that can simulate stochastic phenomena, we need a model that produces equivariant distributions over trajectories, rather than equivariant function approximations. In this paper, we propose Equivariant Probabilistic Neural Simulation (EPNS), a framework for autoregressive probabilistic modeling of equivariant distributions over system evolutions. We use EPNS to design models for a stochastic n-body system and stochastic cellular dynamics. Our results show that EPNS considerably outperforms existing neural network-based methods for probabilistic simulation. More specifically, we demonstrate that incorporating equivariance in EPNS improves simulation quality, data efficiency, rollout stability, and uncertainty quantification. We conclude that EPNS is a promising method for efficient and effective data-driven probabilistic simulation in a diverse range of domains.", "keywords": "stochastic simulation;equivariance;dynamical systems;probabilistic simulation;generative models", "primary_area": "", "supplementary_material": "", "author": "Koen Minartz;Yoeri Poels;Simon Martinus Koop;Vlado Menkovski", "authorids": "~Koen_Minartz1;~Yoeri_Poels1;~Simon_Martinus_Koop1;~Vlado_Menkovski2", "gender": "M;;;M", "homepage": "http://kminartz.github.io;;https://research.tue.nl/en/persons/simon-m-koop;https://vlamen.github.io", "dblp": "318/0581;;;06/726", "google_scholar": "QsRqn94AAAAJ;;;2s9HUEMAAAAJ", "orcid": "0000-0002-6459-8692;;;0000-0001-5262-0605", "linkedin": "koen-minartz/;;;", "or_profile": "~Koen_Minartz1;~Yoeri_Poels1;~Simon_Martinus_Koop1;~Vlado_Menkovski2", "aff": "Eindhoven University of Technology;;Eindhoven University of Technology;Eindhoven University of Technology", "aff_domain": "tue.nl;;tue.nl;tue.nl", "position": "PhD student;;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nminartz2023equivariant,\ntitle={Equivariant Neural Simulators for Stochastic Spatiotemporal Dynamics},\nauthor={Koen Minartz and Yoeri Poels and Simon Martinus Koop and Vlado Menkovski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CCVsGbhFdj}\n}", "github": "", "project": "", "reviewers": "QKcR;yRZx;AV4g;n81N", "pdf_size": 2228113, "rating": "4;5;6;7", "confidence": "4;4;3;3", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "79;71;55;107", "wc_strengths": "41;139;65;89", "wc_weaknesses": "301;387;106;112", "wc_questions": "386;5;194;103", "wc_limitations": "40;7;15;9", "wc_review": "847;609;435;420", "wc_reply_reviewers": "129;174;18;0", "wc_reply_authors": "227;27;24;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.0, 18.841443681416774 ], "wc_strengths_avg": [ 83.5, 36.25948151863179 ], "wc_weaknesses_avg": [ 226.5, 121.38883803711114 ], "wc_questions_avg": [ 172.0, 140.47241722131787 ], "wc_limitations_avg": [ 17.75, 13.179055353097201 ], "wc_review_avg": [ 577.75, 172.2895455330938 ], "wc_reply_reviewers_avg": [ 80.25, 73.28156316564215 ], "wc_reply_authors_avg": [ 69.5, 91.53278101314305 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8944271909999159, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6673652905788944687&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "tue.nl;;tue.nl;tue.nl", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Eindhoven University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.tue.nl", "aff_unique_abbr": "TU/e", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "title": "Video Dynamics Prior: An Internal Learning Approach for Robust Video Enhancements", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72441", "id": "CCq73CGMyV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6ba85c6f1c7656a6a647bc4d63b90bf0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CCq73CGMyV", "openreview": "https://openreview.net/forum?id=CCq73CGMyV", "poster": "/media/PosterPDFs/NeurIPS%202023/72441.png?t=1702126507.7969835", "slides": "https://nips.cc/virtual/2023/poster/72441", "video": "https://nips.cc/virtual/2023/poster/72441", "author_site": "Gaurav Shrivastava, Gaurav Shrivastava, Ser Nam Lim, Abhinav Shrivastava", "tldr": "", "abstract": "In this paper, we present a novel robust framework for low-level vision tasks, including denoising, object removal, frame interpolation, and super-resolution, that does not require any external training data corpus. Our proposed approach directly learns the weights of neural modules by optimizing over the corrupted test sequence, leveraging the spatio-temporal coherence and internal statistics of videos. Furthermore, we introduce a novel spatial pyramid loss that leverages the property of spatio-temporal patch recurrence in a video across the different scales of the video. This loss enhances robustness to unstructured noise in both the spatial and temporal domains. This further results in our framework being highly robust to degradation in input frames and yields state-of-the-art results on downstream tasks such as denoising, object removal, and frame interpolation. To validate the effectiveness of our approach, we conduct qualitative and quantitative evaluations on standard video datasets such as DAVIS, UCF-101, and VIMEO90K-T.", "keywords": "Computational Photography;Deep Internal Learning;low-level vision;video denoising;video super-resolution;video frame interpolation;video inpainting", "primary_area": "", "supplementary_material": "", "author": "Gaurav Shrivastava;Ser-Nam Lim;Abhinav Shrivastava", "authorids": "~Gaurav_Shrivastava1;~Ser-Nam_Lim3;~Abhinav_Shrivastava2", "gender": "M;M;M", "homepage": "http://www.cs.umd.edu/~gauravsh/;http://abhinavsh.info;https://sites.google.com/site/sernam", "dblp": "225/6433;65/10572;04/6633", "google_scholar": ";mIF9BowAAAAJ;HX0BfLYAAAAJ", "orcid": ";0000-0001-8928-8554;", "linkedin": "gshrivastava1/;;", "or_profile": "~Gaurav_Shrivastava1;~Abhinav_Shrivastava2;~Ser-Nam_Lim1", "aff": "Research, Google;Department of Computer Science, University of Maryland, College Park;Meta Facebook", "aff_domain": "research.google.com;cs.umd.edu;facebook.com", "position": "Intern;Assistant Professor;Research Scientist Manager", "bibtex": "@inproceedings{\nshrivastava2023video,\ntitle={Video Dynamics Prior: An Internal Learning Approach for Robust Video Enhancements},\nauthor={Gaurav Shrivastava and Ser-Nam Lim and Abhinav Shrivastava},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CCq73CGMyV}\n}", "github": "", "project": "", "reviewers": "fnUo;Pc2m;U9TZ;6LSS", "pdf_size": 6884257, "rating": "3;5;7;7", "confidence": "5;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;4;3", "presentation": "3;2;4;4", "wc_summary": "63;36;184;86", "wc_strengths": "31;33;73;44", "wc_weaknesses": "128;142;136;155", "wc_questions": "292;2;299;7", "wc_limitations": "9;2;31;34", "wc_review": "523;215;723;326", "wc_reply_reviewers": "360;37;222;18", "wc_reply_authors": "408;44;259;37", "reply_reviewers": "2;1;2;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 92.25, 55.84968665981932 ], "wc_strengths_avg": [ 45.25, 16.768646337734005 ], "wc_weaknesses_avg": [ 140.25, 9.858372076565177 ], "wc_questions_avg": [ 150.0, 145.531783470141 ], "wc_limitations_avg": [ 19.0, 13.765899897936205 ], "wc_review_avg": [ 446.75, 193.91799168720783 ], "wc_reply_reviewers_avg": [ 159.25, 140.65449690642671 ], "wc_reply_authors_avg": [ 187.0, 155.70324338304582 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12509195487058943291&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "research.google.com;cs.umd.edu;facebook.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;University of Maryland, College Park;Meta", "aff_unique_dep": "Google Research;Department of Computer Science;Meta Platforms, Inc.", "aff_unique_url": "https://research.google;https://www/umd.edu;https://meta.com", "aff_unique_abbr": "Google;UMD;Meta", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Mountain View;College Park;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Refined Mechanism Design for Approximately Structured Priors via Active Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72440", "id": "CDTifMbUNc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4f693c15f189efd888b6782a5f4eccb1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CDTifMbUNc", "openreview": "https://openreview.net/forum?id=CDTifMbUNc", "poster": "/media/PosterPDFs/NeurIPS%202023/72440.png?t=1701813626.8816156", "slides": "https://nips.cc/virtual/2023/poster/72440", "video": "https://nips.cc/virtual/2023/poster/72440", "author_site": "Christos Boutsikas, Petros Drineas, Marios Mertzanidis, Alexandros Psomas, Paritosh Verma", "tldr": "", "abstract": "We consider the problem of a revenue-maximizing seller with a large number of items $m$ for sale to $n$ strategic bidders, whose valuations are drawn independently from high-dimensional, unknown prior distributions. It is well-known that optimal and even approximately-optimal mechanisms for this setting are notoriously difficult to characterize or compute, and, even when they can be found, are often rife with various counter-intuitive properties. In this paper, following a model introduced recently by Cai and Daskalakis [CD22], we consider the case that bidders' prior distributions can be well-approximated by a topic model. We design an active learning component, responsible for interacting with the bidders and outputting low-dimensional approximations of their types, and a mechanism design component, responsible for robustifying mechanisms for the low-dimensional model to work for the approximate types of the former component. On the active learning front, we cast our problem in the framework of Randomized Linear Algebra (RLA) for regression problems, allowing us to import several breakthrough results from that line of research, and adapt them to our setting. On the mechanism design front, we remove many restrictive assumptions of prior work on the type of access needed to the underlying distributions and the associated mechanisms. To the best of our knowledge, our work is the first to formulate connections between mechanism design, and RLA for active learning of regression problems, opening the door for further applications of randomized linear algebra primitives to mechanism design.", "keywords": "mechanism design;revenue maximization;randomized linear algebra;active regression", "primary_area": "", "supplementary_material": "/attachment/e190c2bda613da83b4fffb39f3144e056d0c9e49.pdf", "author": "Christos Boutsikas;Petros Drineas;Marios Mertzanidis;Alexandros Psomas;Paritosh Verma", "authorids": "cboutsik@purdue.edu;~Petros_Drineas1;~Marios_Mertzanidis1;~Alexandros_Psomas1;~Paritosh_Verma1", "gender": ";;M;;", "homepage": ";https://www.cs.purdue.edu/homes/pdrineas/;https://mertzanidismarios.com/;https://www.alexpsomas.com/;https://sites.google.com/view/paritoshverma/", "dblp": ";67/1567;;19/10537;238/7992.html", "google_scholar": ";https://scholar.google.co.uk/citations?user=Yw2PquQAAAAJ;;FrTxJzcAAAAJ;jDFO-20AAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "cboutsik@purdue.edu;~Petros_Drineas1;~Marios_Mertzanidis1;~Alexandros_Psomas1;~Paritosh_Verma1", "aff": ";Purdue University;Purdue University;Purdue University;Purdue University", "aff_domain": ";purdue.edu;purdue.edu;purdue.edu;purdue.edu", "position": ";Professor;PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nboutsikas2023refined,\ntitle={Refined Mechanism Design for Approximately Structured Priors via Active Regression},\nauthor={Christos Boutsikas and Petros Drineas and Marios Mertzanidis and Alexandros Psomas and Paritosh Verma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CDTifMbUNc}\n}", "github": "", "project": "", "reviewers": "NuQU;wo8x;RniX;mHuT;Nu2U;pEGj", "pdf_size": 827891, "rating": "3;5;6;7;7;7", "confidence": "2;2;3;3;4;1", "soundness": "2;2;4;4;4;3", "novelty": "2;2;3;3;3;3", "presentation": "1;1;3;3;3;3", "wc_summary": "74;68;155;55;105;62", "wc_strengths": "22;45;97;75;100;16", "wc_weaknesses": "203;58;44;167;46;2", "wc_questions": "359;76;210;44;100;49", "wc_limitations": "2;1;1;74;1;44", "wc_review": "660;248;507;415;352;173", "wc_reply_reviewers": "157;65;10;109;9;0", "wc_reply_authors": "129;0;0;10;0;0", "reply_reviewers": "1;1;1;1;1;0", "reply_authors": "2;1;1;2;1;1", "rating_avg": [ 5.833333333333333, 1.462494064565354 ], "confidence_avg": [ 2.5, 0.9574271077563381 ], "soundness_avg": [ 3.1666666666666665, 0.8975274678557507 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820635 ], "wc_summary_avg": [ 86.5, 34.46133098609706 ], "wc_strengths_avg": [ 59.166666666666664, 33.65222462515996 ], "wc_weaknesses_avg": [ 86.66666666666667, 72.4008901111643 ], "wc_questions_avg": [ 139.66666666666666, 112.58576977378426 ], "wc_limitations_avg": [ 20.5, 28.570089254323307 ], "wc_review_avg": [ 392.5, 161.17976506580058 ], "wc_reply_reviewers_avg": [ 58.333333333333336, 58.479816081182136 ], "wc_reply_authors_avg": [ 23.166666666666668, 47.47075122875372 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.29756985032180766, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8Y7eCrdnk30J:scholar.google.com/&scioq=Refined+Mechanism+Design+for+Approximately+Structured+Priors+via+Active+Regression&hl=en&as_sdt=0,39", "gs_version_total": 7, "email": ";purdue.edu;purdue.edu;purdue.edu;purdue.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Meet in the Middle: A New Pre-training Paradigm", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72439", "id": "CEk6JK71Mb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/105fdc31cc9eb927cc5a0110f4031287-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CEk6JK71Mb", "openreview": "https://openreview.net/forum?id=CEk6JK71Mb", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72439", "video": "https://nips.cc/virtual/2023/poster/72439", "author_site": "Anh Nguyen, Nikos Karampatziakis, Weizhu Chen", "tldr": "", "abstract": "Most language models (LMs) are trained and applied in an autoregressive left-to-right fashion, predicting the next token from the preceding ones. However, this ignores that the full sequence is available during training. \nIn this paper, we introduce ``Meet in the Middle'' (MIM) a new pre-training paradigm that improves data \nefficiency by training in two directions, left-to-right and right-to-left, and encouraging the respective models\nto agree on their token distribution for each position. While the primary outcome is an improved left-to-right LM,\nwe also obtain secondary benefits in the infilling task. There, we leverage the two pre-trained directions to propose an infilling procedure that builds the completion simultaneously from both sides. We conduct extensive experiments on both programming and natural languages and show that MIM significantly surpasses existing pre-training paradigms, in both left-to-right generation as well as infilling.\nCode and models available at https://github.com/microsoft/Meet-in-the-Middle", "keywords": "language modeling;pre-training;deep learning;NLP", "primary_area": "", "supplementary_material": "/attachment/49cbd8f67550e553e34ea500601a08d85ae8d769.pdf", "author": "Anh Tuan Nguyen;Nikos Karampatziakis;Weizhu Chen", "authorids": "~Anh_Tuan_Nguyen4;~Nikos_Karampatziakis1;~Weizhu_Chen1", "gender": "M;;M", "homepage": ";;https://www.microsoft.com/en-us/research/people/wzchen/", "dblp": ";91/8774;79/2536", "google_scholar": "https://scholar.google.com/citations?hl=en;;LG_E-4EAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Anh_Tuan_Nguyen4;~Nikos_Karampatziakis1;~Weizhu_Chen1", "aff": "Microsoft;Microsoft;Microsoft GenAI", "aff_domain": "microsoft.com;microsoft.com;microsoft.com", "position": "Researcher;Researcher;Vice President", "bibtex": "@inproceedings{\nnguyen2023meet,\ntitle={Meet in the Middle: A New Pre-training Paradigm},\nauthor={Anh Tuan Nguyen and Nikos Karampatziakis and Weizhu Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CEk6JK71Mb}\n}", "github": "", "project": "", "reviewers": "SDkH;M9cS;V8eN;5x9n;zFcY", "pdf_size": 317201, "rating": "5;6;7;7;8", "confidence": "4;5;4;4;5", "soundness": "3;3;3;3;3", "novelty": "2;3;4;3;3", "presentation": "2;4;3;3;3", "wc_summary": "56;84;82;111;89", "wc_strengths": "56;59;87;36;54", "wc_weaknesses": "82;52;106;136;48", "wc_questions": "142;113;1;59;40", "wc_limitations": "2;1;1;1;1", "wc_review": "338;309;277;343;232", "wc_reply_reviewers": "49;11;16;10;149", "wc_reply_authors": "119;0;0;0;54", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;2", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 84.4, 17.556765077883796 ], "wc_strengths_avg": [ 58.4, 16.402438843050138 ], "wc_weaknesses_avg": [ 84.8, 33.19277029715959 ], "wc_questions_avg": [ 71.0, 50.61620293937506 ], "wc_limitations_avg": [ 1.2, 0.4000000000000001 ], "wc_review_avg": [ 299.8, 41.29600464936045 ], "wc_reply_reviewers_avg": [ 47.0, 52.9792412176694 ], "wc_reply_authors_avg": [ 34.6, 47.09819529451208 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3202563076101743, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13809009579759706734&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "microsoft.com;microsoft.com;microsoft.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Adversarially Robust Learning with Uncertain Perturbation Sets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72438", "id": "CFQBcz7k8n", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1097a0aeaf00cacfa8f6aced24f3a8bd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CFQBcz7k8n", "openreview": "https://openreview.net/forum?id=CFQBcz7k8n", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72438", "video": "https://nips.cc/virtual/2023/poster/72438", "author_site": "Tosca Lechner, Vinayak Pathak, Ruth Urner", "tldr": "", "abstract": "In many real-world settings exact perturbation sets to be used by an adversary are not plausibly available to a learner. While prior literature has studied both scenarios with completely known and completely unknown perturbation sets, we propose an in-between setting of learning with respect to a class of perturbation sets. We show that in this setting we can improve on previous results with completely unknown perturbation sets, while still addressing the concerns of not having perfect knowledge of these sets in real life. In particular, we give the first positive results for the learnability of infinite Littlestone classes when having access to a perfect-attack oracle. We also consider a setting of learning with abstention, where predictions are considered robustness violations, only when the wrong prediction is made within the perturbation set. We show there are classes for which perturbation-set unaware learning without query access is possible, but abstention is required.", "keywords": "adversarially robust learning", "primary_area": "", "supplementary_material": "/attachment/4d5fee660f69af3d7967b9ce7ab0b1f364492de9.pdf", "author": "Tosca Lechner;Vinayak Pathak;Ruth Urner", "authorids": "~Tosca_Lechner1;~Vinayak_Pathak1;~Ruth_Urner3", "gender": "F;M;F", "homepage": "https://toscalechner.github.io/;;https://www.eecs.yorku.ca/~ruth/", "dblp": "267/6485.html;72/9960;68/8050", "google_scholar": ";W1P2QnoAAAAJ;https://scholar.google.ca/citations?user=O7p7lRAAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Tosca_Lechner1;~Vinayak_Pathak1;~Ruth_Urner3", "aff": "University of Waterloo;Layer 6 AI;York University", "aff_domain": "uwaterloo.ca;layer6.ai;yorku.ca", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nlechner2023adversarially,\ntitle={Adversarially Robust Learning with Uncertain Perturbation Sets},\nauthor={Tosca Lechner and Vinayak Pathak and Ruth Urner},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CFQBcz7k8n}\n}", "github": "", "project": "", "reviewers": "wkxJ;hPab;o4ch;uGK4;2e2f", "pdf_size": 328894, "rating": "6;6;6;7;7", "confidence": "3;3;5;3;3", "soundness": "3;4;4;4;4", "novelty": "3;3;3;3;3", "presentation": "2;4;3;3;4", "wc_summary": "184;446;236;157;201", "wc_strengths": "76;41;68;83;57", "wc_weaknesses": "57;58;96;91;62", "wc_questions": "84;23;1;16;94", "wc_limitations": "42;7;4;33;1", "wc_review": "443;575;405;380;415", "wc_reply_reviewers": "26;0;0;0;47", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;0;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 244.8, 103.81021144376886 ], "wc_strengths_avg": [ 65.0, 14.791889669680478 ], "wc_weaknesses_avg": [ 72.8, 17.057549648176316 ], "wc_questions_avg": [ 43.6, 37.876641878603756 ], "wc_limitations_avg": [ 17.4, 16.764247671756703 ], "wc_review_avg": [ 443.6, 68.73019714797857 ], "wc_reply_reviewers_avg": [ 14.6, 19.07459042810618 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6810108231981748704&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "uwaterloo.ca;layer6.ai;yorku.ca", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Waterloo;Layer 6 AI;York University", "aff_unique_dep": ";;", "aff_unique_url": "https://uwaterloo.ca;https://layer6.ai;https://www.yorku.ca", "aff_unique_abbr": "UW;Layer 6 AI;York U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Dual Mean-Teacher: An Unbiased Semi-Supervised Framework for Audio-Visual Source Localization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72437", "id": "CFhpBJ8eZ5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/98143953a7fd1319175b491888fc8df5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CFhpBJ8eZ5", "openreview": "https://openreview.net/forum?id=CFhpBJ8eZ5", "poster": "/media/PosterPDFs/NeurIPS%202023/72437.png?t=1702100583.3844483", "slides": "https://nips.cc/virtual/2023/poster/72437", "video": "https://nips.cc/virtual/2023/poster/72437", "author_site": "Yuxin Guo, Shijie Ma, Hu Su, Zhiqing Wang, Yuhao Zhao, Wei Zou, Siyang Sun, Yun Zheng", "tldr": "", "abstract": "Audio-Visual Source Localization (AVSL) aims to locate sounding objects within video frames given the paired audio clips. Existing methods predominantly rely on self-supervised contrastive learning of audio-visual correspondence. Without any bounding-box annotations, they struggle to achieve precise localization, especially for small objects, and suffer from blurry boundaries and false positives. Moreover, the naive semi-supervised method is poor in effectively utilizing the abundance of unlabeled audio-visual pairs. In this paper, we propose a novel Semi-Supervised Learning framework for AVSL, namely Dual Mean-Teacher (DMT), comprising two teacher-student structures to circumvent the confirmation bias issue. Specifically, two teachers, pre-trained on limited labeled data, are employed to filter out noisy samples via the consensus between their predictions, and then generate high-quality pseudo-labels by intersecting their confidence maps. The optimal utilization of both labeled and unlabeled data combined with this unbiased framework enable DMT to outperform current state-of-the-art methods by a large margin, with CIoU of $\\textbf{90.4\\%}$ and $\\textbf{48.8\\%}$ on Flickr-SoundNet and VGG-Sound Source, obtaining $\\textbf{8.9\\%}$ and $\\textbf{9.6\\%}$ improvements respectively, given only $3\\%$ of data positional-annotated. We also extend our framework to some existing AVSL methods and consistently boost their performance. Our code is publicly available at https://github.com/gyx-gloria/DMT.", "keywords": "Audio-Visual Learning;Audio-Visual Source Localization;Semi-Supervised Learning;Multimodal Learning", "primary_area": "", "supplementary_material": "/attachment/c6f7a83e8ab7fb2d421c6b73ed5114d9499aa7be.pdf", "author": "Yuxin Guo;Shijie Ma;Hu Su;Zhiqing Wang;Yuhao Zhao;Wei Zou;Siyang Sun;Yun Zheng", "authorids": "~Yuxin_Guo2;~Shijie_Ma1;~Hu_Su1;~Zhiqing_Wang1;~Yuhao_Zhao1;~Wei_Zou2;~Siyang_Sun1;~Yun_Zheng3", "gender": "F;M;M;M;;M;F;M", "homepage": ";https://mashijie1028.github.io/;https://www.researchgate.net/profile/Hu_Su;;https://github.com/ZYHzhaoyuhao;;;", "dblp": ";191/4553;89/7698;;;;;", "google_scholar": "x_0spxgAAAAJ;https://scholar.google.com/citations?hl=en;;;;;https://scholar.google.com.hk/citations?hl=zh-CN;-hFpScAAAAAJ", "orcid": ";0009-0005-1131-5686;;0000-0002-3916-514X;;0000-0003-4215-5361;;", "linkedin": ";;;;;;;", "or_profile": "~Yuxin_Guo2;~Shijie_Ma1;~Hu_Su1;~Zhiqing_Wang1;~Yuhao_Zhao1;~Wei_Zou2;~Sun_Siyang1;~Yun_Zheng1", "aff": "Alibaba Group;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Alibaba Group;Alibaba Group", "aff_domain": "alibaba-inc.com;ia.ac.cn;ia.ac.cn;ucas.edu;ia.ac.cn;ia.ac.cn;alibaba-inc.com;alibaba-inc.com", "position": "Intern;PhD student;Associate Professor;PhD student;MS student;Full Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nguo2023dual,\ntitle={Dual Mean-Teacher: An Unbiased Semi-Supervised Framework for Audio-Visual Source Localization},\nauthor={Yuxin Guo and Shijie Ma and Hu Su and Zhiqing Wang and Yuhao Zhao and Wei Zou and Siyang Sun and Yun Zheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CFhpBJ8eZ5}\n}", "github": "", "project": "", "reviewers": "dVkQ;GZse;1pQt;MLW4;13tz", "pdf_size": 2635875, "rating": "5;5;6;7;8", "confidence": "4;5;4;4;4", "soundness": "3;3;3;4;4", "novelty": "2;2;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "69;72;50;110;346", "wc_strengths": "10;31;65;86;193", "wc_weaknesses": "20;300;103;407;103", "wc_questions": "17;2;43;30;456", "wc_limitations": "41;10;17;37;16", "wc_review": "157;415;278;670;1114", "wc_reply_reviewers": "34;214;50;21;0", "wc_reply_authors": "441;1240;675;0;0", "reply_reviewers": "1;2;1;1;0", "reply_authors": "3;3;2;1;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 129.4, 110.03563059300383 ], "wc_strengths_avg": [ 77.0, 63.696153730033025 ], "wc_weaknesses_avg": [ 186.6, 143.64483979593558 ], "wc_questions_avg": [ 109.6, 173.73381939046868 ], "wc_limitations_avg": [ 24.2, 12.383860464330176 ], "wc_review_avg": [ 526.8, 339.5475813490651 ], "wc_reply_reviewers_avg": [ 63.8, 76.86455620115164 ], "wc_reply_authors_avg": [ 471.2, 464.2324417789002 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.8944271909999159 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5144957554275267, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15574787737304970245&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "alibaba-inc.com;ia.ac.cn;ia.ac.cn;ucas.edu;ia.ac.cn;ia.ac.cn;alibaba-inc.com;alibaba-inc.com", "author_num": 8, "aff_unique_index": "0;1;1;2;1;1;0;0", "aff_unique_norm": "Alibaba Group;Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation;", "aff_unique_url": "https://www.alibaba.com;http://www.ia.cas.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "Alibaba;CAS;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "D4: Improving LLM Pretraining via Document De-Duplication and Diversification", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73662", "id": "CG0L2PFrb1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a8f8cbd7f7a5fb2c837e578c75e5b615-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=CG0L2PFrb1", "openreview": "https://openreview.net/forum?id=CG0L2PFrb1", "poster": "/media/PosterPDFs/NeurIPS%202023/73662.png?t=1697440319.4420848", "slides": "https://nips.cc/virtual/2023/poster/73662", "video": "https://nips.cc/virtual/2023/poster/73662", "author_site": "Kushal Tirumala, Daniel Simig, Armen Aghajanyan, Ari Morcos", "tldr": "", "abstract": "Over recent years, an increasing amount of compute and data has been poured into training large language models (LLMs), usually by doing one-pass learning on as many tokens as possible randomly selected from large-scale web corpora. While training on ever-larger portions of the internet leads to consistent performance improvements, the size of these improvements diminishes with scale, and there has been little work exploring the effect of data selection on pre-training and downstream performance beyond simple de-duplication methods such as MinHash. Here, we show that careful data selection (on top of de-duplicated data) via pre-trained model embeddings can speed up training (20% efficiency gains) and improves average downstream accuracy on 16 NLP tasks (up to 2%) at the 6.7B model scale. Furthermore, we show that repeating data intelligently consistently outperforms baseline training (while repeating random data performs worse than baseline training). Our results indicate that clever data selection can significantly improve LLM pre-training, calls into question the common practice of training for a single epoch on as much data as possible, and demonstrates a path to keep improving our models past the limits of randomly sampling web data.", "keywords": "Language Modeling;NLP;Data Pruning;Data Selection", "primary_area": "", "supplementary_material": "/attachment/b9db26af8f50d94ace8569cb2f8be0c5a717c2ef.pdf", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\ntirumala2023d,\ntitle={D4: Improving {LLM} Pretraining via Document De-Duplication and Diversification},\nauthor={Kushal Tirumala and Daniel Simig and Armen Aghajanyan and Ari S. Morcos},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=CG0L2PFrb1}\n}", "github": "", "project": "", "reviewers": "nVTQ;XL6a;qR3n;Wz3a;QH9f", "pdf_size": 926271, "rating": "5;6;6;6;8", "confidence": "4;4;2;2;5", "wc_summary_and_contributions": "49;70;79;104;131", "wc_strengths": "112;21;76;47;27", "wc_improvement": "218;85;145;138;1", "wc_limitations": "110;9;21;132;1", "wc_correctness": "28;76;267;1;1", "wc_clarity": "8;176;116;1;1", "wc_relation_to_prior_work": "2;18;44;1;1", "wc_documentation": "10;1;3;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "538;457;752;426;165", "wc_reply_reviewers": "16;17;28;0;0", "wc_reply_authors": "1519;669;1617;476;73", "reply_reviewers": "1;1;1;0;0", "reply_authors": "3;1;3;1;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.4, 1.2 ], "wc_summary_and_contributions_avg": [ 86.6, 28.359125515431536 ], "wc_strengths_avg": [ 56.6, 33.70815924965349 ], "wc_improvement_avg": [ 117.4, 71.97944150936432 ], "wc_limitations_avg": [ 54.6, 55.02944666267325 ], "wc_correctness_avg": [ 74.6, 100.02519682559989 ], "wc_clarity_avg": [ 60.4, 72.4668199937047 ], "wc_relation_to_prior_work_avg": [ 13.2, 16.70209567689037 ], "wc_documentation_avg": [ 3.2, 3.4871191548325386 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 467.6, 189.34053976895706 ], "wc_reply_reviewers_avg": [ 12.2, 10.814804667676619 ], "wc_reply_authors_avg": [ 870.8, 601.6731338525929 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": 0.44226898133585163, "gs_citation": 125, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8343620948022518992&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "", "author_num": 1 }, { "title": "Automatic Grouping for Efficient Cooperative Multi-Agent Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72436", "id": "CGj72TyGJy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/906c860f1b7515a8ffec02dcdac74048-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CGj72TyGJy", "openreview": "https://openreview.net/forum?id=CGj72TyGJy", "poster": "/media/PosterPDFs/NeurIPS%202023/72436.png?t=1701761623.663596", "slides": "https://nips.cc/virtual/2023/poster/72436", "video": "https://nips.cc/virtual/2023/poster/72436", "author_site": "Yifan Zang, Jinmin He, Kai Li, Haobo Fu, Qiang Fu, Junliang Xing, Jian Cheng", "tldr": "", "abstract": "Grouping is ubiquitous in natural systems and is essential for promoting efficiency in team coordination. This paper proposes a novel formulation of Group-oriented Multi-Agent Reinforcement Learning (GoMARL), which learns automatic grouping without domain knowledge for efficient cooperation. In contrast to existing approaches that attempt to directly learn the complex relationship between the joint action-values and individual utilities, we empower subgroups as a bridge to model the connection between small sets of agents and encourage cooperation among them, thereby improving the learning efficiency of the whole team. In particular, we factorize the joint action-values as a combination of group-wise values, which guide agents to improve their policies in a fine-grained fashion. We present an automatic grouping mechanism to generate dynamic groups and group action-values. We further introduce a hierarchical control for policy learning that drives the agents in the same group to specialize in similar policies and possess diverse strategies for various groups. Experiments on the StarCraft II micromanagement tasks and Google Research Football scenarios verify our method's effectiveness. Extensive component studies show how grouping works and enhances performance.", "keywords": "MARL;Cooperative Multi-Agent Reinforcement Learning;Coordination and Cooperation;Automatic Grouping;Group-Wise Learning", "primary_area": "", "supplementary_material": "/attachment/ac508065c94c3c071cceff7881a0408823158a39.zip", "author": "Yifan Zang;Jinmin He;Kai Li;Haobo Fu;QIANG FU;Junliang Xing;Jian Cheng", "authorids": "~Yifan_Zang1;~Jinmin_He1;~Kai_Li2;~Haobo_Fu2;~QIANG_FU8;~Junliang_Xing1;~Jian_Cheng7", "gender": "M;M;M;M;M;M;M", "homepage": ";;;;http://people.ucas.ac.cn/~jlxing?language=en;https://people.ucas.ac.cn/~chengjian?language=en;https://github.com/DarkDawn233", "dblp": "269/4608;181/2853;85/8571;;43/7659.html;14/6145-1;347/6803", "google_scholar": ";_cY_PXgAAAAJ;LFdJXNcAAAAJ;gANaxT0AAAAJ;jSwNd3MAAAAJ;ZGCIUJ8AAAAJ;", "orcid": ";;;;0000-0001-6801-0510;0000-0003-1289-2758;", "linkedin": ";;haobo-fu-382b0784/;;https://www.linkedin.cn/incareer/in/ACoAAAvlU14B40ZWH1pxg5JJDtQ6LlgMYkp0e5s;;", "or_profile": "~Yifan_Zang1;~Kai_Li2;~Haobo_Fu2;~QIANG_FU8;~Junliang_Xing1;~Jian_Cheng7;~He_Jinmin1", "aff": "University of Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Tencent AI Lab;Tencent AI Lab;Tsinghua University;Institute of Automation, Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ia.ac.cn;tencent.com;tencent.com;tsinghua.edu.cn;ia.ac.cn;ucas.ac.cn", "position": "PhD student;Associate Professor;Principal Researcher;Principal Researcher;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nzang2023automatic,\ntitle={Automatic Grouping for Efficient Cooperative Multi-Agent Reinforcement Learning},\nauthor={Yifan Zang and Jinmin He and Kai Li and Haobo Fu and QIANG FU and Junliang Xing and Jian Cheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CGj72TyGJy}\n}", "github": "", "project": "", "reviewers": "wS3D;Dgdo;y5k7;p19m", "pdf_size": 6168500, "rating": "6;7;7;7", "confidence": "4;3;4;3", "soundness": "2;3;3;3", "novelty": "3;3;4;3", "presentation": "3;2;3;4", "wc_summary": "46;114;172;319", "wc_strengths": "17;108;80;2", "wc_weaknesses": "200;120;283;2", "wc_questions": "7;32;162;2", "wc_limitations": "1;32;1;5", "wc_review": "271;406;698;330", "wc_reply_reviewers": "19;11;14;12", "wc_reply_authors": "23;41;23;34", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 162.75, 100.63144389304965 ], "wc_strengths_avg": [ 51.75, 43.71713050967549 ], "wc_weaknesses_avg": [ 151.25, 103.66623124238674 ], "wc_questions_avg": [ 50.75, 65.22796562824875 ], "wc_limitations_avg": [ 9.75, 12.94942083646987 ], "wc_review_avg": [ 426.25, 164.03105651064985 ], "wc_reply_reviewers_avg": [ 14.0, 3.082207001484488 ], "wc_reply_authors_avg": [ 30.25, 7.660776723022281 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2245804366214020678&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ucas.ac.cn;ia.ac.cn;tencent.com;tencent.com;tsinghua.edu.cn;ia.ac.cn;ucas.ac.cn", "author_num": 7, "aff_unique_index": "0;1;2;2;3;1;0", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences;Tencent;Tsinghua University", "aff_unique_dep": ";Institute of Automation;Tencent AI Lab;", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ia.cas.cn;https://ai.tencent.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "UCAS;CAS;Tencent AI Lab;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Differentiable Random Partition Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72435", "id": "CJWQGDwa6u", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/933b5d002cf251b3e854d586e55ac58c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CJWQGDwa6u", "openreview": "https://openreview.net/forum?id=CJWQGDwa6u", "poster": "/media/PosterPDFs/NeurIPS%202023/72435.png?t=1699972860.5069633", "slides": "https://nips.cc/virtual/2023/poster/72435", "video": "https://nips.cc/virtual/2023/poster/72435", "author_site": "Thomas Sutter, Alain Ryser, Joram Liebeskind, Julia Vogt", "tldr": "", "abstract": "Partitioning a set of elements into an unknown number of mutually exclusive subsets is essential in many machine learning problems.\nHowever, assigning elements, such as samples in a dataset or neurons in a network layer, to an unknown and discrete number of subsets is inherently non-differentiable, prohibiting end-to-end gradient-based optimization of parameters.\nWe overcome this limitation by proposing a novel two-step method for inferring partitions, which allows its usage in variational inference tasks.\nThis new approach enables reparameterized gradients with respect to the parameters of the new random partition model.\nOur method works by inferring the number of elements per subset and, second, by filling these subsets in a learned order.\nWe highlight the versatility of our general-purpose approach on three different challenging experiments: variational clustering, inference of shared and independent generative factors under weak supervision, and multitask learning.", "keywords": "random partition model;continuous relaxation;reparameterization;generative models;vae;representation learning;weak supervision;variational clustering;deep learning", "primary_area": "", "supplementary_material": "/attachment/96f112e8286cb647889777e8c4a2e134ba66616b.zip", "author": "Thomas M. Sutter;Alain Ryser;Joram Liebeskind;Julia E Vogt", "authorids": "~Thomas_M._Sutter1;~Alain_Ryser1;~Joram_Liebeskind1;~Julia_E_Vogt1", "gender": ";M;;F", "homepage": ";https://mds.inf.ethz.ch/team/detail/alain-ryser;;http://mds.inf.ethz.ch", "dblp": ";230/3590;;13/8412", "google_scholar": ";https://scholar.google.ch/citations?user=l9tQ2agAAAAJ;;UoeV-8kAAAAJ", "orcid": ";;;", "linkedin": ";alain-r-0554441b5/;;julia-vogt-50b53895", "or_profile": "~Thomas_M._Sutter1;~Alain_Ryser1;~Joram_Liebeskind1;~Julia_E_Vogt1", "aff": ";ETHZ - ETH Zurich;;Swiss Federal Institute of Technology", "aff_domain": ";ethz.ch;;ethz.ch", "position": ";PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nsutter2023differentiable,\ntitle={Differentiable Random Partition Models},\nauthor={Thomas M. Sutter and Alain Ryser and Joram Liebeskind and Julia E Vogt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CJWQGDwa6u}\n}", "github": "", "project": "", "reviewers": "jtds;WEvf;Ynzj;6PXU", "pdf_size": 10233187, "rating": "6;6;7;7", "confidence": "4;3;2;4", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "43;79;161;9", "wc_strengths": "48;199;45;14", "wc_weaknesses": "175;248;12;113", "wc_questions": "242;43;87;1", "wc_limitations": "2;1;29;3", "wc_review": "510;570;334;140", "wc_reply_reviewers": "164;0;123;11", "wc_reply_authors": "453;0;328;11", "reply_reviewers": "1;0;2;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.0, 56.515484603779164 ], "wc_strengths_avg": [ 76.5, 71.96700632928953 ], "wc_weaknesses_avg": [ 137.0, 86.55345169315895 ], "wc_questions_avg": [ 93.25, 91.10536482556886 ], "wc_limitations_avg": [ 8.75, 11.712706775122479 ], "wc_review_avg": [ 388.5, 167.6506784954955 ], "wc_reply_reviewers_avg": [ 74.5, 70.61338400048535 ], "wc_reply_authors_avg": [ 198.0, 197.54619712867165 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7651153293629646048&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";ethz.ch;;ethz.ch", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "A Theory of Transfer-Based Black-Box Attacks: Explanation and Implications", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72434", "id": "CJY7NEXVwC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2d0842550e6d92b0e27e7e810b1a4792-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CJY7NEXVwC", "openreview": "https://openreview.net/forum?id=CJY7NEXVwC", "poster": "/media/PosterPDFs/NeurIPS%202023/72434.png?t=1702088084.1482682", "slides": "https://nips.cc/virtual/2023/poster/72434", "video": "https://nips.cc/virtual/2023/poster/72434", "author_site": "Yanbo Chen, Weiwei Liu", "tldr": "", "abstract": "Transfer-based attacks are a practical method of black-box adversarial attacks, in which the attacker aims to craft adversarial examples from a source (surrogate) model that is transferable to the target model. A wide range of empirical works has tried to explain the transferability of adversarial examples from different angles. However, these works only provide ad hoc explanations without quantitative analyses. The theory behind transfer-based attacks remains a mystery.\nThis paper studies transfer-based attacks under a unified theoretical framework. We propose an explanatory model, called the manifold attack model, that formalizes popular beliefs and explains the existing empirical results. Our model explains why adversarial examples are transferable even when the source model is inaccurate. Moreover, our model implies that the existence of transferable adversarial examples depends on the \u201ccurvature\u201d of the data manifold, which quantitatively explains why the success rates of transfer-based attacks are hard to improve. We also discuss the expressive power and the possible extensions of our model in general applications.", "keywords": "Learning Theory", "primary_area": "", "supplementary_material": "/attachment/914a6abc627e0b22322e023d00f2609c46a8e90d.pdf", "author": "Yanbo Chen;Weiwei Liu", "authorids": "~Yanbo_Chen2;~Weiwei_Liu1", "gender": "M;M", "homepage": "https://yanboc.github.io/;https://sites.google.com/site/weiweiliuhomepage/", "dblp": ";54/6677-3.html", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN", "orcid": ";", "linkedin": ";weiwei-liu-4a7849134/", "or_profile": "~Yanbo_Chen2;~Weiwei_Liu1", "aff": "Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;whu.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nchen2023a,\ntitle={A Theory of Transfer-Based Black-Box Attacks: Explanation and Implications},\nauthor={Yanbo Chen and Weiwei Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CJY7NEXVwC}\n}", "github": "", "project": "", "reviewers": "KVUg;9Mj1;rJF7;b7e2;HLto", "pdf_size": 1492453, "rating": "4;5;5;8;8", "confidence": "3;2;3;4;4", "soundness": "3;2;4;3;3", "novelty": "2;4;2;4;3", "presentation": "2;2;3;3;3", "wc_summary": "66;80;91;50;80", "wc_strengths": "23;110;98;49;53", "wc_weaknesses": "81;151;419;38;30", "wc_questions": "1;2;405;32;95", "wc_limitations": "1;2;15;1;1", "wc_review": "172;345;1028;170;259", "wc_reply_reviewers": "0;20;581;15;20", "wc_reply_authors": "81;97;922;8;8", "reply_reviewers": "0;1;2;1;1", "reply_authors": "2;3;3;2;2", "rating_avg": [ 6.0, 1.6733200530681511 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 73.4, 14.136477637657833 ], "wc_strengths_avg": [ 66.6, 32.45057780687426 ], "wc_weaknesses_avg": [ 143.8, 144.1352142954663 ], "wc_questions_avg": [ 107.0, 152.86202929439344 ], "wc_limitations_avg": [ 4.0, 5.513619500836088 ], "wc_review_avg": [ 394.8, 323.1218965034713 ], "wc_reply_reviewers_avg": [ 127.2, 227.01841335010693 ], "wc_reply_authors_avg": [ 223.2, 351.30920853288205 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7985957062499248, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15616513968731471645&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "whu.edu.cn;whu.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Divide, Evaluate, and Refine: Evaluating and Improving Text-to-Image Alignment with Iterative VQA Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72433", "id": "CLjBBd8u2j", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dfd0bd56e8a6f82d1619f5d093d5f9ca-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CLjBBd8u2j", "openreview": "https://openreview.net/forum?id=CLjBBd8u2j", "poster": "/media/PosterPDFs/NeurIPS%202023/72433.png?t=1702273173.3760555", "slides": "https://nips.cc/virtual/2023/poster/72433", "video": "https://nips.cc/virtual/2023/poster/72433", "author_site": "Jaskirat Singh, Liang Zheng", "tldr": "", "abstract": "The field of text-conditioned image generation has made unparalleled progress with the recent advent of latent diffusion models. While revolutionary, as the complexity of given text input increases, the current state of art diffusion models may still fail in generating images that accurately convey the semantics of the given prompt. Furthermore, such misalignments are often left undetected by pretrained multi-modal models such as CLIP. To address these problems, in this paper, we explore a simple yet effective decompositional approach towards both evaluation and improvement of text-to-image alignment. In particular, we first introduce a Decompositional-Alignment-Score which given a complex caption decomposes it into a set of disjoint assertions. The alignment of each assertion with generated images is then measured using a VQA model. Finally, alignment scores for different assertions are combined aposteriori to give the final text-to-image alignment score. Experimental analysis reveals that the proposed alignment metric shows a significantly higher correlation with human ratings as opposed to traditional CLIP, BLIP scores. Furthermore, we also find that the assertion level alignment scores also provide useful feedback which can then be used in a simple iterative procedure to gradually increase the expressivity of different assertions in the final image outputs. Human user studies indicate that the proposed approach surpasses previous state-of-the-art by 8.7% in overall text-to-image alignment accuracy.", "keywords": "Text-to-Image Generation", "primary_area": "", "supplementary_material": "/attachment/c95281e074729776582e02e725781b8bac9e40d3.pdf", "author": "Jaskirat Singh;Liang Zheng", "authorids": "~Jaskirat_Singh1;~Liang_Zheng4", "gender": ";M", "homepage": "https://1jsingh.github.io/;http://zheng-lab.cecs.anu.edu.au/", "dblp": "74/2036;61/7360-1", "google_scholar": "HAmEM_4AAAAJ;https://scholar.google.com.au/citations?user=vNHqr3oAAAAJ", "orcid": ";", "linkedin": ";liang-zheng-76341311a/", "or_profile": "~Jaskirat_Singh1;~Liang_Zheng4", "aff": "Adobe Systems;Australian National University", "aff_domain": "adobe.com;anu.edu.au", "position": "Intern;Senior Lecturer", "bibtex": "@inproceedings{\nsingh2023divide,\ntitle={Divide, Evaluate, and Refine: Evaluating and Improving Text-to-Image Alignment with Iterative {VQA} Feedback},\nauthor={Jaskirat Singh and Liang Zheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CLjBBd8u2j}\n}", "github": "", "project": "", "reviewers": "sTZE;5iW3;bVCd;GK7B", "pdf_size": 19332528, "rating": "5;7;7;7", "confidence": "4;3;4;4", "soundness": "3;4;3;3", "novelty": "3;4;3;3", "presentation": "3;4;3;4", "wc_summary": "90;91;156;113", "wc_strengths": "28;73;128;62", "wc_weaknesses": "77;29;172;45", "wc_questions": "300;17;51;59", "wc_limitations": "17;15;24;31", "wc_review": "512;225;531;310", "wc_reply_reviewers": "0;0;24;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 112.5, 26.744158240632665 ], "wc_strengths_avg": [ 72.75, 35.95396362016294 ], "wc_weaknesses_avg": [ 80.75, 55.445355982264196 ], "wc_questions_avg": [ 106.75, 112.68179755399716 ], "wc_limitations_avg": [ 21.75, 6.299801584177076 ], "wc_review_avg": [ 394.5, 130.67995255585302 ], "wc_reply_reviewers_avg": [ 6.0, 10.392304845413264 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14163189837701334290&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "adobe.com;anu.edu.au", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Adobe;Australian National University", "aff_unique_dep": "Adobe Systems Incorporated;", "aff_unique_url": "https://www.adobe.com;https://www.anu.edu.au", "aff_unique_abbr": "Adobe;ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Australia" }, { "title": "Norm-based Generalization Bounds for Sparse Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72432", "id": "COPzNA10hZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8493e190ff1bbe3837eca821190b61ff-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=COPzNA10hZ", "openreview": "https://openreview.net/forum?id=COPzNA10hZ", "poster": "/media/PosterPDFs/NeurIPS%202023/72432.png?t=1701732622.412254", "slides": "https://nips.cc/virtual/2023/poster/72432", "video": "https://nips.cc/virtual/2023/poster/72432", "author_site": "Tomer Galanti, Mengjia Xu, Liane Galanti, Tomaso Poggio", "tldr": "", "abstract": "In this paper, we derive norm-based generalization bounds for sparse ReLU neural networks, including convolutional neural networks. These bounds differ from previous ones because they consider the sparse structure of the neural network architecture and the norms of the convolutional filters, rather than the norms of the (Toeplitz) matrices associated with the convolutional layers. Theoretically, we demonstrate that these bounds are significantly tighter than standard norm-based generalization bounds. Empirically, they offer relatively tight estimations of generalization for various simple classification problems. Collectively, these findings suggest that the sparsity of the underlying target function and the model's architecture plays a crucial role in the success of deep learning.", "keywords": "generalization bounds;convolution;rademacher;generalization;sparsity", "primary_area": "", "supplementary_material": "", "author": "Tomer Galanti;Mengjia Xu;Liane Galanti;Tomaso Poggio", "authorids": "~Tomer_Galanti1;~Mengjia_Xu1;~Liane_Galanti1;~Tomaso_Poggio1", "gender": "M;F;;M", "homepage": "https://tomergalanti.github.io;;;https://cbmm.mit.edu/about/people/poggio", "dblp": "198/1490;;;12/5544", "google_scholar": ";Ok1giekAAAAJ;;WgAGy7wAAAAJ", "orcid": ";;;", "linkedin": "tomer-galanti-5880b1104/;;;", "or_profile": "~Tomer_Galanti1;~Mengjia_Xu1;~Liane_Galanti1;~Tomaso_Poggio1", "aff": "Massachusetts Institute of Technology;Brown University;;Massachusetts Institute of Technology", "aff_domain": "mit.edu;brown.edu;;mit.edu", "position": "Postdoc;Postdoc;;Full Professor", "bibtex": "@inproceedings{\ngalanti2023normbased,\ntitle={Norm-based Generalization Bounds for Sparse Neural Networks},\nauthor={Tomer Galanti and Mengjia Xu and Liane Galanti and Tomaso Poggio},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=COPzNA10hZ}\n}", "github": "", "project": "", "reviewers": "G4im;U2qE;oZvZ;dgpJ;LtGn", "pdf_size": 591555, "rating": "5;5;6;7;7", "confidence": "2;5;4;3;4", "soundness": "3;3;3;3;4", "novelty": "3;2;3;3;3", "presentation": "3;2;4;4;3", "wc_summary": "72;243;44;49;99", "wc_strengths": "35;68;64;64;47", "wc_weaknesses": "95;450;301;171;24", "wc_questions": "73;330;505;232;952", "wc_limitations": "1;18;91;2;43", "wc_review": "276;1109;1005;518;1165", "wc_reply_reviewers": "95;0;921;35;270", "wc_reply_authors": "0;0;1040;11;808", "reply_reviewers": "1;0;3;1;2", "reply_authors": "1;1;4;2;3", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 101.4, 73.43187318869103 ], "wc_strengths_avg": [ 55.6, 12.59523719506703 ], "wc_weaknesses_avg": [ 208.2, 151.79380751532653 ], "wc_questions_avg": [ 418.4, 301.3599840722056 ], "wc_limitations_avg": [ 31.0, 33.627369804966904 ], "wc_review_avg": [ 814.6, 353.2039637376681 ], "wc_reply_reviewers_avg": [ 264.2, 341.28662440828236 ], "wc_reply_authors_avg": [ 371.8, 456.81698742494245 ], "reply_reviewers_avg": [ 1.4, 1.019803902718557 ], "reply_authors_avg": [ 2.2, 1.16619037896906 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16316434476516944132&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mit.edu;brown.edu;;mit.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Brown University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.brown.edu", "aff_unique_abbr": "MIT;Brown", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Counting Distinct Elements in the Turnstile Model with Differential Privacy under Continual Observation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72431", "id": "CQ38aC92WY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0ef1afa0daa888d695dcd5e9513bafa3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CQ38aC92WY", "openreview": "https://openreview.net/forum?id=CQ38aC92WY", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72431", "video": "https://nips.cc/virtual/2023/poster/72431", "author_site": "Palak Jain, Iden Kalemaj, Sofya Raskhodnikova, Satchit Sivakumar, Adam Smith", "tldr": "", "abstract": "Privacy is a central challenge for systems that learn from sensitive data sets, especially when a system's outputs must be continuously updated to reflect changing data. We consider the achievable error for differentially private continual release of a basic statistic---the number of distinct items---in a stream where items may be both inserted and deleted (the turnstile model). With only insertions, existing algorithms have additive error just polylogarithmic in the length of the stream $T$. We uncover a much richer landscape in the turnstile model, even without considering memory restrictions. We show that every differentially private mechanism that handles insertions and deletions has worst-case additive error at least $T^{1/4}$ even under a relatively weak, event-level privacy definition. \nThen, we identify a parameter of the input stream, its maximum flippancy, that is low for natural data streams and for which we give tight parameterized error guarantees. Specifically, the maximum flippancy is the largest number of times that the contribution of a single \nitem to the distinct elements count changes over the course of the stream. We present an item-level differentially private mechanism that, for all turnstile streams with maximum flippancy $w$, continually outputs the number of distinct elements with an $O(\\sqrt{w} \\cdot \\mathsf{poly}\\log T)$ additive error, without requiring prior knowledge of $w$. We prove that this is the best achievable error bound that depends only on $w$, for a large range of values of $w$. When $w$ is small, the error of our mechanism is similar to the polylogarithmic in $T$ error in the insertion-only setting, bypassing the hardness in the turnstile model.", "keywords": "distinct elements;differential privacy;continual release;turnstile streams", "primary_area": "", "supplementary_material": "/attachment/16a3f26e69b5a900ce2d4a7d29a02f329335f049.pdf", "author": "Palak Jain;Iden Kalemaj;Sofya Raskhodnikova;Satchit Sivakumar;Adam Smith", "authorids": "~Palak_Jain2;~Iden_Kalemaj1;~Sofya_Raskhodnikova1;~Satchit_Sivakumar1;~Adam_Smith1", "gender": "Non-Binary;;F;M;M", "homepage": "https://thepalakjain.com;;https://cs-people.bu.edu/sofya/;;http://cs-people.bu.edu/ads22", "dblp": "221/3845-4.html;;;;04/5072", "google_scholar": "A5q3qSIAAAAJ;;;;fkGi-JMAAAAJ", "orcid": ";;;;", "linkedin": "the-palak-jain-/;;;satchit-s-a85344114/;", "or_profile": "~Palak_Jain2;~Iden_Kalemaj1;~Sofya_Raskhodnikova1;~Satchit_Sivakumar1;~Adam_Smith1", "aff": "Boston University, Boston University;;Boston University, Boston University;Boston University;Google", "aff_domain": "bu.edu;;bu.edu;bu.edu;google.com", "position": "PhD student;;Full Professor;PhD student;Researcher", "bibtex": "@inproceedings{\njain2023counting,\ntitle={Counting Distinct Elements in the Turnstile Model with Differential Privacy under Continual Observation},\nauthor={Palak Jain and Iden Kalemaj and Sofya Raskhodnikova and Satchit Sivakumar and Adam Smith},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CQ38aC92WY}\n}", "github": "", "project": "", "reviewers": "wtJp;MGRg;7TW6;iH8C;G17t", "pdf_size": 556189, "rating": "5;6;7;7;8", "confidence": "2;3;4;4;4", "soundness": "3;3;3;4;4", "novelty": "2;2;3;3;4", "presentation": "3;3;3;4;4", "wc_summary": "86;236;232;69;87", "wc_strengths": "83;83;94;143;82", "wc_weaknesses": "69;49;38;89;10", "wc_questions": "12;1;101;16;1", "wc_limitations": "1;11;1;5;7", "wc_review": "251;380;466;322;187", "wc_reply_reviewers": "15;16;17;16;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 142.0, 75.40026525152282 ], "wc_strengths_avg": [ 97.0, 23.417941839538333 ], "wc_weaknesses_avg": [ 51.0, 26.914680009244027 ], "wc_questions_avg": [ 26.2, 37.87030498952973 ], "wc_limitations_avg": [ 5.0, 3.794733192202055 ], "wc_review_avg": [ 321.2, 97.32707742452766 ], "wc_reply_reviewers_avg": [ 12.8, 6.43117407632541 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9315516419063743, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1946003642757540938&as_sdt=5,28&sciodt=0,28&hl=en", "gs_version_total": 7, "email": "bu.edu;;bu.edu;bu.edu;google.com", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Boston University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.bu.edu;https://www.google.com", "aff_unique_abbr": "BU;Google", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Boston;;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unbiased learning of deep generative models with structured discrete representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72430", "id": "CQqBt46FUD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dcc337bb2a4d25afefd9ab800721debb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CQqBt46FUD", "openreview": "https://openreview.net/forum?id=CQqBt46FUD", "poster": "/media/PosterPDFs/NeurIPS%202023/72430.png?t=1702157188.08133", "slides": "https://nips.cc/virtual/2023/poster/72430", "video": "https://nips.cc/virtual/2023/poster/72430", "author_site": "Henry C Bendekgey, Gabe Hope, Erik Sudderth", "tldr": "", "abstract": "By composing graphical models with deep learning architectures, we learn generative models with the strengths of both frameworks. The structured variational autoencoder (SVAE) inherits structure and interpretability from graphical models, and flexible likelihoods for high-dimensional data from deep learning, but poses substantial optimization challenges. We propose novel algorithms for learning SVAEs, and are the first to demonstrate the SVAE's ability to handle multimodal uncertainty when data is missing by incorporating discrete latent variables. Our memory-efficient implicit differentiation scheme makes the SVAE tractable to learn via gradient descent, while demonstrating robustness to incomplete optimization. To more rapidly learn accurate graphical model parameters, we derive a method for computing natural gradients without manual derivations, which avoids biases found in prior work. These optimization innovations enable the first comparisons of the SVAE to state-of-the-art time series models, where the SVAE performs competitively while learning interpretable and structured discrete data representations.", "keywords": "Generative Models;Graphical Models;Variational Inference;Amortized Inference", "primary_area": "", "supplementary_material": "", "author": "Harry Bendekgey;Gabriel Hope;Erik B. Sudderth", "authorids": "~Harry_Bendekgey1;~Gabriel_Hope1;~Erik_B._Sudderth2", "gender": "M;M;M", "homepage": "https://hbendekgey.me;https://gabehope.com;https://www.ics.uci.edu/~sudderth/", "dblp": ";https://dblp.uni-trier.de/pers/hd/h/Hope:Gabriel;22/3923", "google_scholar": ";;ePiPQ2cAAAAJ", "orcid": ";;0000-0002-0595-9726", "linkedin": ";;", "or_profile": "~Harry_Bendekgey1;~Gabriel_Hope1;~Erik_Sudderth1", "aff": "Donald Bren School of Information and Computer Sciences, University of California, Irvine;;University of California, Irvine", "aff_domain": "ics.uci.edu;;uci.edu", "position": "PhD student;;Professor", "bibtex": "@inproceedings{\nbendekgey2023unbiased,\ntitle={Unbiased learning of deep generative models with structured discrete representations},\nauthor={Harry Bendekgey and Gabriel Hope and Erik B. Sudderth},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CQqBt46FUD}\n}", "github": "", "project": "", "reviewers": "gyLw;FFkU;4krc;JW8q", "pdf_size": 2767367, "rating": "4;5;7;7", "confidence": "4;4;4;3", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "74;79;90;75", "wc_strengths": "65;28;70;74", "wc_weaknesses": "202;366;57;206", "wc_questions": "57;5;39;201", "wc_limitations": "6;5;20;1", "wc_review": "404;483;276;557", "wc_reply_reviewers": "0;0;0;204", "wc_reply_authors": "0;0;0;724", "reply_reviewers": "0;0;0;2", "reply_authors": "1;1;1;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.5, 6.34428877022476 ], "wc_strengths_avg": [ 59.25, 18.32177666057525 ], "wc_weaknesses_avg": [ 207.75, 109.32148690902443 ], "wc_questions_avg": [ 75.5, 74.82479535555042 ], "wc_limitations_avg": [ 8.0, 7.176350047203662 ], "wc_review_avg": [ 430.0, 104.07929669247386 ], "wc_reply_reviewers_avg": [ 51.0, 88.33459118601274 ], "wc_reply_authors_avg": [ 181.0, 313.5011961699668 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9688210288515216065&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "ics.uci.edu;;uci.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Irvine", "aff_unique_dep": "Donald Bren School of Information and Computer Sciences", "aff_unique_url": "https://www.uci.edu", "aff_unique_abbr": "UCI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Irvine", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Online Clustering of Bandits with Misspecified User Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72429", "id": "CQuRzAgjg9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0bcd8d153b8c548629eca53f4ebdeb42-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CQuRzAgjg9", "openreview": "https://openreview.net/forum?id=CQuRzAgjg9", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72429", "video": "https://nips.cc/virtual/2023/poster/72429", "author_site": "Zhiyong Wang, Jize Xie, Xutong Liu, Shuai Li, John C.S. Lui", "tldr": "", "abstract": "The contextual linear bandit is an important online learning problem where given arm features, a learning agent selects an arm at each round to maximize the cumulative rewards in the long run. A line of works, called the clustering of bandits (CB), utilize the collaborative effect over user preferences and have shown significant improvements over classic linear bandit algorithms. However, existing CB algorithms require well-specified linear user models and can fail when this critical assumption does not hold. Whether robust CB algorithms can be designed for more practical scenarios with misspecified user models remains an open problem. In this paper, we are the first to present the important problem of clustering of bandits with misspecified user models (CBMUM), where the expected rewards in user models can be perturbed away from perfect linear models. We devise two robust CB algorithms, RCLUMB and RSCLUMB (representing the learned clustering structure with dynamic graph and sets, respectively), that can accommodate the inaccurate user preference estimations and erroneous clustering caused by model misspecifications. We prove regret upper bounds of $O(\\epsilon_*T\\sqrt{md\\log T} + d\\sqrt{mT}\\log T)$ for our algorithms under milder assumptions than previous CB works, which match the lower bound asymptotically in $T$ up to logarithmic factors, and also match the state-of-the-art results in several degenerate cases. Our regret analysis is novel and different from the typical proof flow of previous CB works. The techniques in proving the regret caused by misclustering users are quite general and may be of independent interest. Experiments on both synthetic and real-world data show our outperformance over previous algorithms.", "keywords": "online clustering of bandits", "primary_area": "", "supplementary_material": "/attachment/7b59f57f4b51856d0c0151809ac2002141fa1910.zip", "author": "Zhiyong Wang;Jize Xie;Xutong Liu;Shuai Li;John C.S. Lui", "authorids": "~Zhiyong_Wang9;~Jize_Xie1;~Xutong_Liu1;~Shuai_Li3;~John_C.S._Lui2", "gender": "M;M;M;F;M", "homepage": "https://zhiyongwangwzy.github.io/;;https://xutongliu.me/;http://shuaili8.github.io;http://www.cse.cuhk.edu.hk/~cslui/Index.html", "dblp": ";339/2280;70/3372-2;57/2281-10;l/JohnCSLui", "google_scholar": "https://scholar.google.com/citations?hl=en;cX6B3HsAAAAJ;KNfY6BIAAAAJ;https://scholar.google.com.hk/citations?user=kMZgQxcAAAAJ;https://scholar.google.com.tw/citations?user=7LVjQ7MAAAAJ", "orcid": ";0000-0001-9702-5025;0000-0002-8628-5873;;0000-0001-7466-0384", "linkedin": "zhiyong-wang-a44aaa1a3/;;;;", "or_profile": "~Zhiyong_Wang9;~Jize_Xie1;~Xutong_Liu1;~Shuai_Li3;~John_C.S._Lui2", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;Shanghai Jiaotong University;The Chinese University of Hong Kong;John Hopcroft Center, Shanghai Jiao Tong University;The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;sjtu.edu.cn;cuhk.edu.hk;sjtu.edu.cn;cse.cuhk.edu.hk", "position": "PhD student;Undergrad student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023online,\ntitle={Online Clustering of Bandits with Misspecified User Models},\nauthor={Zhiyong Wang and Jize Xie and Xutong Liu and Shuai Li and John C.S. Lui},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CQuRzAgjg9}\n}", "github": "", "project": "", "reviewers": "ovwY;1JFu;TDwD;oJ3g;AnqL;ANNG", "pdf_size": 3722595, "rating": "1;3;6;6;7;7", "confidence": "4;4;4;3;3;3", "soundness": "1;2;3;3;4;3", "novelty": "1;2;3;3;3;3", "presentation": "1;3;3;2;4;3", "wc_summary": "75;75;50;82;134;136", "wc_strengths": "38;41;132;28;130;57", "wc_weaknesses": "61;425;107;73;229;42", "wc_questions": "158;8;8;84;152;23", "wc_limitations": "3;12;1;17;105;1", "wc_review": "335;561;298;284;750;259", "wc_reply_reviewers": "49;0;10;93;27;88", "wc_reply_authors": "340;0;37;62;41;65", "reply_reviewers": "1;0;1;1;1;1", "reply_authors": "3;1;2;2;2;2", "rating_avg": [ 5.0, 2.23606797749979 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "novelty_avg": [ 2.5, 0.7637626158259734 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 92.0, 31.994791242742412 ], "wc_strengths_avg": [ 71.0, 43.27431878916948 ], "wc_weaknesses_avg": [ 156.16666666666666, 134.78429268856056 ], "wc_questions_avg": [ 72.16666666666667, 63.94376869997229 ], "wc_limitations_avg": [ 23.166666666666668, 37.078370454424714 ], "wc_review_avg": [ 414.5, 180.3355668376781 ], "wc_reply_reviewers_avg": [ 44.5, 35.92005011132362 ], "wc_reply_authors_avg": [ 90.83333333333333, 113.44222709771212 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.372677996249965 ], "reply_authors_avg": [ 2.0, 0.5773502691896257 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7453559924999299, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14518312404560635800&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 8, "email": "cse.cuhk.edu.hk;sjtu.edu.cn;cuhk.edu.hk;sjtu.edu.cn;cse.cuhk.edu.hk", "author_num": 5, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Shanghai Jiao Tong University", "aff_unique_dep": "Department of Computer Science and Engineering;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.sjtu.edu.cn", "aff_unique_abbr": "CUHK;SJTU", "aff_campus_unique_index": "0;0;2;0", "aff_campus_unique": "Hong Kong SAR;;Shanghai", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MedSat: A Public Health Dataset for England Featuring Medical Prescriptions and Satellite Imagery", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73661", "id": "CSJYz1Zovj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f4fdf676c3b21f20f8c391d929188386-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=CSJYz1Zovj", "openreview": "https://openreview.net/forum?id=CSJYz1Zovj", "poster": "/media/PosterPDFs/NeurIPS%202023/73661.png?t=1702232174.1621156", "slides": "https://nips.cc/virtual/2023/poster/73661", "video": "https://nips.cc/virtual/2023/poster/73661", "author_site": "Sanja Scepanovic, Ivica Obadic, Sagar Joglekar, Laura GIUSTARINI, Cristiano Nattero, Daniele Quercia, Xiaoxiang Zhu", "tldr": "", "abstract": "As extreme weather events become more frequent, understanding their impact on human health becomes increasingly crucial. However, the utilization of Earth Observation to effectively analyze the environmental context in relation to health remains limited. This limitation is primarily due to the lack of fine-grained spatial and temporal data in public and population health studies, hindering a comprehensive understanding of health outcomes. Additionally, obtaining appropriate environmental indices across different geographical levels and timeframes poses a challenge. For the years 2019 (pre-COVID) and 2020 (COVID), we collected spatio-temporal indicators for all Lower Layer Super Output Areas in England. These indicators included: i) 111 sociodemographic features linked to health in existing literature, ii) 43 environmental point features (e.g., greenery and air pollution levels), iii) 4 seasonal composite satellite images each with 11 bands, and iv) prescription prevalence associated with five medical conditions (depression, anxiety, diabetes, hypertension, and asthma), opioids and total prescriptions. We combined these indicators into a single MedSat dataset, the availability of which presents an opportunity for the machine learning community to develop new techniques specific to public health. These techniques would address challenges such as handling large and complex data volumes, performing effective feature engineering on environmental and sociodemographic factors, capturing spatial and temporal dependencies in the models, addressing imbalanced data distributions, developing novel computer vision methods for health modeling based on satellite imagery, ensuring model explainability, and achieving generalization beyond the specific geographical region.", "keywords": "public health; population health; medical prescriptions; satellite imagery; satellite data; SDGs", "primary_area": "", "supplementary_material": "/attachment/f284f060955244be9ab8edeee0abd1781546806a.pdf", "author": "Sanja Scepanovic;Ivica Obadic;Sagar Joglekar;Laura GIUSTARINI;Cristiano Nattero;Daniele Quercia;Xiao Xiang Zhu", "authorids": "~Sanja_Scepanovic1;~Ivica_Obadic1;~Sagar_Joglekar2;~Laura_GIUSTARINI1;~Cristiano_Nattero1;~Daniele_Quercia2;~Xiao_Xiang_Zhu1", "gender": "F;M;M;F;M;M;F", "homepage": "https://www.nokia.com/people/sanja-scepanovic/;;https://sagarjoglekar.com/;;;https://researchswinger.org;https://www.sipeo.bgu.tum.de/", "dblp": "151/9322.html;202/2162;;;;04/1995;35/8954", "google_scholar": "pwySZlwAAAAJ;WJTOEKMAAAAJ;https://scholar.google.co.uk/citations?user=kJgQh3MAAAAJ;;https://scholar.google.com/citations?hl=en;;https://scholar.google.de/citations?user=CNakdIgAAAAJ", "orcid": "0000-0002-1534-8128;0000-0003-4403-2170;;;;;0000-0001-5530-3613", "linkedin": "sanja7s/;ivica-obadic/;;lauragiustarinild/;kr1zz/;;xiaoxiang-zhu-90b473228/", "or_profile": "~Sanja_Scepanovic1;~Ivica_Obadic1;~Sagar_Joglekar2;~Laura_GIUSTARINI1;~Cristiano_Nattero1;~Daniele_Quercia2;~Xiaoxiang_Zhu1", "aff": "Nokia Bell Labs;Technische Universit\u00e4t M\u00fcnchen;Expedia Group;;;Nokia Bell Labs Cambridge;Technical University Munich", "aff_domain": "nokia-bell-labs.com;tum.de;expedia.com;;;nokia-bell-labs.com;tum.de", "position": "Researcher;PhD student;Scientist;;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nscepanovic2023medsat,\ntitle={MedSat: A Public Health Dataset for England Featuring Medical Prescriptions and Satellite Imagery},\nauthor={Sanja Scepanovic and Ivica Obadic and Sagar Joglekar and Laura GIUSTARINI and Cristiano Nattero and Daniele Quercia and Xiao Xiang Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=CSJYz1Zovj}\n}", "github": "", "project": "", "reviewers": "Zvuw;nQhp;AAen;xh3o", "pdf_size": 8974727, "rating": "5;7;7;8", "confidence": "3;5;4;4", "wc_summary_and_contributions": "136;97;123;117", "wc_strengths": "72;45;28;185", "wc_improvement": "169;268;304;227", "wc_limitations": "6;36;73;11", "wc_correctness": "4;23;5;9", "wc_clarity": "16;23;22;6", "wc_relation_to_prior_work": "9;16;14;19", "wc_documentation": "62;160;59;54", "wc_additional_feedback": "1;1;1;1", "wc_review": "475;669;629;629", "wc_reply_reviewers": "0;13;82;26", "wc_reply_authors": "693;727;993;409", "reply_reviewers": "0;1;1;1", "reply_authors": "2;1;4;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 118.25, 14.060138690638865 ], "wc_strengths_avg": [ 82.5, 61.22295321201028 ], "wc_improvement_avg": [ 242.0, 50.184659010498414 ], "wc_limitations_avg": [ 31.5, 26.51886121235224 ], "wc_correctness_avg": [ 10.25, 7.595228765481656 ], "wc_clarity_avg": [ 16.75, 6.7592529172978875 ], "wc_relation_to_prior_work_avg": [ 14.5, 3.640054944640259 ], "wc_documentation_avg": [ 83.75, 44.115615149286995 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 600.5, 74.27482749895822 ], "wc_reply_reviewers_avg": [ 30.25, 31.259998400511794 ], "wc_reply_authors_avg": [ 705.5, 206.87375377268137 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6488856845230502, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14473449243598865669&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "nokia-bell-labs.com;tum.de;expedia.com;;;nokia-bell-labs.com;tum.de", "author_num": 7, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Nokia Bell Labs;Technische Universit\u00e4t M\u00fcnchen;Expedia Group;Technical University of Munich", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nokialabs.com;https://www.tum.de;https://www.expediagroup.com;https://www.tum.de", "aff_unique_abbr": "Nokia Bell Labs;TUM;Expedia Group;TUM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "United States;Germany" }, { "title": "Fine-Grained Human Feedback Gives Better Rewards for Language Model Training", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72428", "id": "CSbGXyCswu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b8c90b65739ae8417e61eadb521f63d5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CSbGXyCswu", "openreview": "https://openreview.net/forum?id=CSbGXyCswu", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72428", "video": "https://nips.cc/virtual/2023/poster/72428", "author_site": "Zeqiu Wu, Yushi Hu, Weijia Shi, Nouha Dziri, Alane Suhr, Prithviraj Ammanabrolu, Noah Smith, Mari Ostendorf, Hannaneh Hajishirzi", "tldr": "", "abstract": "Language models (LMs) often exhibit undesirable text generation behaviors, including generating false, toxic, or irrelevant outputs. \nReinforcement learning from human feedback (RLHF)---where human preference judgments on LM outputs are transformed into a learning signal---has recently shown promise in addressing these issues. However, such holistic feedback conveys limited information on long text outputs; it does not indicate which aspects of the outputs influenced user preference; e.g., which parts contain what type(s) of errors. In this paper, we use fine-grained human feedback (e.g., which sentence is false, which sub-sentence is irrelevant) as an explicit training signal. We introduce Fine-Grained RLHF, a framework that enables training and learning from reward functions that are fine-grained in two respects: (1) density, providing a reward after every segment (e.g., a sentence) is generated; and (2) incorporating multiple reward models associated with different feedback types (e.g., factual incorrectness, irrelevance, and information incompleteness). We conduct experiments on detoxification and long-form question answering to illustrate how learning with this reward function leads to improved performance, supported by both automatic and human evaluation. Additionally, we show that LM behaviors can be customized using different combinations of fine-grained reward models. We release all data, collected human feedback, and codes at https://FineGrainedRLHF.github.io.", "keywords": "Language Model;Reinforcement Learning with Human Feedback;Long-Form Text Generation", "primary_area": "", "supplementary_material": "", "author": "Zeqiu Wu;Yushi Hu;Weijia Shi;Nouha Dziri;Alane Suhr;Prithviraj Ammanabrolu;Noah A. Smith;Mari Ostendorf;Hannaneh Hajishirzi", "authorids": "~Zeqiu_Wu1;~Yushi_Hu1;~Weijia_Shi1;~Nouha_Dziri2;~Alane_Suhr1;~Prithviraj_Ammanabrolu1;~Noah_A._Smith2;~Mari_Ostendorf1;~Hannaneh_Hajishirzi1", "gender": "F;M;;;Not Specified;M;;F;F", "homepage": "http://ellenmellon.github.io/;https://yushi-hu.github.io;https://weijiashi.notion.site/;;http://www.alanesuhr.com;http://prithvirajva.com;;https://people.ece.uw.edu/ostendorf/;https://homes.cs.washington.edu/~hannaneh/", "dblp": "188/5861;268/5766;132/80601;;203/9306;202/2351;;85/2189;52/1296", "google_scholar": "Ug_hZoYAAAAJ;mXN51X0AAAAJ;https://scholar.google.com/citations?hl=en;;daslsUkAAAAJ;2yaiWZ8AAAAJ;;exS-GecAAAAJ;LOV6_WIAAAAJ", "orcid": ";;0000-3200-0000-0011;;;;;0000-0001-9385-9655;", "linkedin": ";;weijia-shi-773768112;;;rajammanabrolu/;;mari-ostendorf-66820a1/;", "or_profile": "~Zeqiu_Wu1;~Yushi_Hu1;~Weijia_Shi1;~Nouha_Dziri2;~Alane_Suhr1;~Prithviraj_Ammanabrolu1;~Noah_A._Smith2;~Mari_Ostendorf1;~Hannaneh_Hajishirzi1", "aff": "University of Washington, Seattle;University of Washington;University of Washington, Seattle;;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;;University of Washington;University of Washington", "aff_domain": "uw.edu;uw.edu;uw.edu;;allenai.org;allenai.org;;u.washington.edu;uw.edu", "position": "PhD student;PhD student;PhD student;;Postdoc;Researcher;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2023finegrained,\ntitle={Fine-Grained Human Feedback Gives Better Rewards for Language Model Training},\nauthor={Zeqiu Wu and Yushi Hu and Weijia Shi and Nouha Dziri and Alane Suhr and Prithviraj Ammanabrolu and Noah A. Smith and Mari Ostendorf and Hannaneh Hajishirzi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CSbGXyCswu}\n}", "github": "", "project": "", "reviewers": "tgte;uPU2;Q8Ja;7ENw", "pdf_size": 2365968, "rating": "6;7;7;7", "confidence": "4;4;4;4", "soundness": "4;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;4;4", "wc_summary": "105;327;60;163", "wc_strengths": "108;101;97;59", "wc_weaknesses": "84;224;200;67", "wc_questions": "143;73;3;20", "wc_limitations": "79;2;1;1", "wc_review": "519;727;361;310", "wc_reply_reviewers": "0;52;12;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 163.75, 101.07763105653001 ], "wc_strengths_avg": [ 91.25, 19.031224343168255 ], "wc_weaknesses_avg": [ 143.75, 69.03758034577979 ], "wc_questions_avg": [ 59.75, 54.55902766728894 ], "wc_limitations_avg": [ 20.75, 33.633130987167995 ], "wc_review_avg": [ 479.25, 162.47211299173776 ], "wc_reply_reviewers_avg": [ 19.0, 19.672315572906 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 94, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9400790265193597011&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "uw.edu;uw.edu;uw.edu;;allenai.org;allenai.org;;u.washington.edu;uw.edu", "author_num": 9, "aff_unique_index": "0;0;0;1;1;0;0", "aff_unique_norm": "University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "UW;AI2", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "CWdxHxVAGG", "title": "Reject option models comprising out-of-distribution detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "The optimal prediction strategy for out-of-distribution (OOD) setups is a fundamental question in machine learning. In this paper, we address this question and present several contributions. We propose three reject option models for OOD setups: the Cost-based model, the Bounded TPR-FPR model, and the Bounded Precision-Recall model. These models extend the standard reject option models used in non-OOD setups and define the notion of an optimal OOD selective classifier. We establish that all the proposed models, despite their different formulations, share a common class of optimal strategies. Motivated by the optimal strategy, we introduce double-score OOD methods that leverage uncertainty scores from two chosen OOD detectors: one focused on OOD/ID discrimination and the other on misclassification detection. The experimental results consistently demonstrate the superior performance of this simple strategy compared to state-of-the-art methods. Additionally, we propose novel evaluation metrics derived from the definition of the optimal strategy under the proposed OOD rejection models. These new metrics provide a comprehensive and reliable assessment of OOD methods without the deficiencies observed in existing evaluation approaches. ", "keywords": "reject option classification;out-of-distribution detection;selective classification", "primary_area": "", "supplementary_material": "/attachment/475ab5d4a1d89500c6b431937a7ad12635f39c2b.zip", "author": "Vojtech Franc;Daniel Prusa;Jakub Paplham", "authorids": "~Vojtech_Franc1;prusapa1@fel.cvut.cz;paplhjak@fel.cvut.cz", "gender": "M;;", "homepage": "http://cmp.felk.cvut.cz/~xfrancv/;;", "dblp": "60/1691;;", "google_scholar": "https://scholar.google.cz/citations?user=9a5P_D8AAAAJ;;", "orcid": "0000-0001-7189-1224;;", "linkedin": "vojtech-franc-48139b42/;;", "or_profile": "~Vojtech_Franc1;prusapa1@fel.cvut.cz;paplhjak@fel.cvut.cz", "aff": "Czech Technical University in Prague, Faculty of Electrical Engineering;;", "aff_domain": "cvut.cz;;", "position": "Assistant Professor;;", "bibtex": "@misc{\nfranc2023reject,\ntitle={Reject option models comprising out-of-distribution detection},\nauthor={Vojtech Franc and Daniel Prusa and Jakub Paplham},\nyear={2023},\nurl={https://openreview.net/forum?id=CWdxHxVAGG}\n}", "github": "", "project": "", "reviewers": "cqqv;msSt;G4Mq;hjaD;77aK", "site": "https://openreview.net/forum?id=CWdxHxVAGG", "pdf_size": 248613, "rating": "4;4;5;6;7", "confidence": "3;4;4;3;3", "soundness": "2;3;3;4;3", "novelty": "2;3;2;3;3", "presentation": "2;2;3;3;3", "wc_summary": "47;65;38;210;106", "wc_strengths": "54;100;38;102;145", "wc_weaknesses": "269;289;109;140;74", "wc_questions": "199;16;55;442;60", "wc_limitations": "6;16;1;82;12", "wc_review": "575;486;241;976;397", "wc_reply_reviewers": "421;405;0;51;34", "wc_reply_authors": "617;224;0;29;15", "reply_reviewers": "1;2;0;1;1", "reply_authors": "2;3;1;2;2", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 93.2, 62.90119235753802 ], "wc_strengths_avg": [ 87.8, 38.06520721078502 ], "wc_weaknesses_avg": [ 176.2, 86.7257747154789 ], "wc_questions_avg": [ 154.4, 156.6200498020608 ], "wc_limitations_avg": [ 23.4, 29.742898312034086 ], "wc_review_avg": [ 535.0, 246.56114860212668 ], "wc_reply_reviewers_avg": [ 182.2, 189.2293846103189 ], "wc_reply_authors_avg": [ 177.0, 234.64270711019338 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4900980294098034, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3072920754735130612&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "aff_unique_index": "0", "aff_unique_norm": "Czech Technical University in Prague", "aff_unique_dep": "Faculty of Electrical Engineering", "aff_unique_url": "https://www.fel.cvut.cz", "aff_unique_abbr": "CTU", "aff_campus_unique_index": "0", "aff_campus_unique": "Prague", "aff_country_unique_index": "0", "aff_country_unique": "Czech Republic" }, { "id": "CWnzWMLk3P", "title": "FAVAS: Federated AVeraging with ASynchronous clients", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper, we propose a novel centralized Asynchronous Federated Learning (FL) framework, FAVAS, for training Deep Neural Networks (DNNs) in resource-constrained environments. Despite its popularity, \"classical\" federated learning faces the increasingly difficult task of scaling synchronous communication over large wireless networks. Moreover, clients typically have different computing resources and therefore computing speed, which can lead to a significant bias (in favor of \"fast'\" clients) when the updates are asynchronous. Therefore, practical deployment of FL requires to handle users with strongly varying computing speed in communication/resource constrained setting. We provide convergence guarantees for FAVAS in a smooth, non-convex environment and carefully compare the obtained convergence guarantees with existing bounds, when they are available. Experimental results show that the FAVAS algorithm outperforms current methods on standard benchmarks.", "keywords": "Asynchronous Federated Learning;Federated Averaging", "primary_area": "", "supplementary_material": "/attachment/54be7fab2f3ff3b22b5495b57be899e7a5c768ce.zip", "author": "Louis Leconte;Van Minh Nguyen;Eric Moulines", "authorids": "~Louis_Leconte1;~Van_Minh_Nguyen1;~Eric_Moulines1", "gender": "Not Specified;;M", "homepage": ";;", "dblp": "246/3352;69/7397;54/2358", "google_scholar": ";;https://scholar.google.fr/citations?user=_XE1LvQAAAAJ", "orcid": ";0000-0001-6475-6163;0000-0002-2058-0693", "linkedin": ";vanminh/;", "or_profile": "~Louis_Leconte1;~Van_Minh_Nguyen1;~Eric_Moulines1", "aff": "Lisite;Huawei Technologies Ltd.;Ecole polytechnique", "aff_domain": "sorbonne-universite.fr;huawei.com;polytechnique.edu", "position": "PhD student;Expert;Full Professor", "bibtex": "@misc{\nleconte2023favas,\ntitle={{FAVAS}: Federated {AV}eraging with {AS}ynchronous clients},\nauthor={Louis Leconte and Van Minh Nguyen and Eric Moulines},\nyear={2023},\nurl={https://openreview.net/forum?id=CWnzWMLk3P}\n}", "github": "", "project": "", "reviewers": "GoYN;1shw;PbVJ;yPyi;Aa1R", "site": "https://openreview.net/forum?id=CWnzWMLk3P", "pdf_size": 1969803, "rating": "4;4;5;6;7", "confidence": "4;4;3;4;4", "soundness": "3;2;3;3;4", "novelty": "2;2;2;3;3", "presentation": "2;2;3;3;3", "wc_summary": "77;43;110;61;44", "wc_strengths": "61;27;148;42;37", "wc_weaknesses": "139;272;135;27;59", "wc_questions": "139;93;532;481;331", "wc_limitations": "2;1;39;1;18", "wc_review": "418;436;964;612;489", "wc_reply_reviewers": "179;114;393;55;171", "wc_reply_authors": "0;243;290;0;0", "reply_reviewers": "1;1;2;1;1", "reply_authors": "1;2;2;1;1", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 67.0, 24.859605789312106 ], "wc_strengths_avg": [ 63.0, 43.91355143916283 ], "wc_weaknesses_avg": [ 126.4, 84.68199336340636 ], "wc_questions_avg": [ 315.2, 176.15947320538854 ], "wc_limitations_avg": [ 12.2, 14.878171930717834 ], "wc_review_avg": [ 583.8, 201.8260637281518 ], "wc_reply_reviewers_avg": [ 182.4, 114.37062559940817 ], "wc_reply_authors_avg": [ 106.6, 131.40106544469114 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.08574929257125441, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6989651951774513645&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2", "aff_unique_norm": "Lisite;Huawei;Ecole Polytechnique", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": ";https://www.huawei.com;https://www.polytechnique.edu", "aff_unique_abbr": ";Huawei;X", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;2", "aff_country_unique": ";China;France" }, { "title": "SPAE: Semantic Pyramid AutoEncoder for Multimodal Generation with Frozen LLMs", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72427", "id": "CXPUg86A1D", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a526cc8f6ffb74bedb6ff313e3fdb450-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CXPUg86A1D", "openreview": "https://openreview.net/forum?id=CXPUg86A1D", "poster": "/media/PosterPDFs/NeurIPS%202023/72427.png?t=1702165794.6808026", "slides": "https://nips.cc/virtual/2023/poster/72427", "video": "https://nips.cc/virtual/2023/poster/72427", "author_site": "Lijun Yu, Yong Cheng, Zhiruo Wang, Vivek Kumar, Wolfgang Macherey, Yanping Huang, David Ross, Irfan Essa, Yonatan Bisk, Ming-Hsuan Yang, Kevin Murphy, Alexander Hauptmann, Lu Jiang, Lu Jiang", "tldr": "", "abstract": "In this work, we introduce Semantic Pyramid AutoEncoder (SPAE) for enabling frozen LLMs to perform both understanding and generation tasks involving non-linguistic modalities such as images or videos. SPAE converts between raw pixels and interpretable lexical tokens (or words) extracted from the LLM's vocabulary. The resulting tokens capture both the rich semantic meaning and the fine-grained details needed for visual reconstruction, effectively translating the visual content into a language comprehensible to the LLM, and empowering it to perform a wide array of multimodal tasks. Our approach is validated through in-context learning experiments with frozen PaLM 2 and GPT 3.5 on a diverse set of image understanding and generation tasks.\nOur method marks the first successful attempt to enable a frozen LLM to generate image content while surpassing state-of-the-art performance in image understanding tasks, under the same setting, by over 25%.", "keywords": "multimodal;generation;large language model", "primary_area": "", "supplementary_material": "/attachment/16f6950427cc3e294ec30028cd79211af8f332af.pdf", "author": "Lijun Yu;Yong Cheng;Zhiruo Wang;Vivek Kumar;Wolfgang Macherey;Yanping Huang;David A Ross;Irfan Essa;Yonatan Bisk;Ming-Hsuan Yang;Kevin Patrick Murphy;Alexander G Hauptmann;Lu Jiang", "authorids": "~Lijun_Yu1;~Yong_Cheng3;~Zhiruo_Wang1;~Vivek_Kumar3;~Wolfgang_Macherey1;~Yanping_Huang1;~David_A_Ross1;~Irfan_Essa1;~Yonatan_Bisk1;~Ming-Hsuan_Yang1;~Kevin_Patrick_Murphy1;~Alexander_G_Hauptmann1;~Lu_Jiang1", "gender": "M;M;F;gender non-conforming;M;M;M;M;M;;M;M;M", "homepage": "https://me.lj-y.com/;;https://zorazrw.github.io;https://www.v1vek.com/;;;http://www.irfanessa.com/;http://www.YonatanBisk.com;https://faculty.ucmerced.edu/mhyang/;https://www.cs.ubc.ca/~murphyk/;;http://www.lujiang.info/;http://www.cs.toronto.edu/~dross/", "dblp": "94/5561;34/6276.html;249/2286;;88/4457;00/10104;e/IrfanAEssa;38/9282;79/3711.html;26/2599;h/AlexanderGHauptmann;22/752-4;68/2171", "google_scholar": "IaDc0OcAAAAJ;rZ0mlMYAAAAJ;https://scholar.google.com/citations?hl=en;fOexgn8AAAAJ;;uEtBQScAAAAJ;https://scholar.google.com.tw/citations?user=XM97iScAAAAJ;bWoGh8UAAAAJ;p9-ohHsAAAAJ;MxxZkEcAAAAJ;https://scholar.google.co.uk/citations?user=Py54GcEAAAAJ;jIKjjSYAAAAJ;RqOzJR0AAAAJ", "orcid": "0000-0003-0645-1657;;;;;;0000-0002-6236-2969;0000-0002-2111-9081;0000-0003-4848-2304;;;0000-0003-0286-8439;", "linkedin": "lijun-yu/;;;;;;irfanessa/;yonatanbisk/;minghsuanyang/;;;roadjiang/;", "or_profile": "~Lijun_Yu1;~Yong_Cheng3;~Zhiruo_Wang1;~Vivek_Kumar3;~Wolfgang_Macherey1;~Yanping_Huang1;~Irfan_Essa1;~Yonatan_Bisk1;~Ming-Hsuan_Yang1;~Kevin_Patrick_Murphy1;~Alexander_G_Hauptmann1;~Lu_Jiang1;~David_Alexander_Ross1", "aff": "School of Computer Science, Carnegie Mellon University;Google;Carnegie Mellon University;;Google;Google;Georgia Institute of Technology;Meta;University of California at Merced;Google;School of Computer Science, Carnegie Mellon University;Google Research;Research, Google", "aff_domain": "cs.cmu.edu;google.com;cmu.edu;;google.com;google.com;gatech.edu;meta.com;umcerced.edu;google.com;cs.cmu.edu;google.com;research.google.com", "position": "PhD student;Researcher;MS student;;Research Scientist;Engineer;Full Professor;Visiting Professor;Professor;Principal Researcher;Full Professor;Researcher;Software Engineer", "bibtex": "@inproceedings{\nyu2023spae,\ntitle={{SPAE}: Semantic Pyramid AutoEncoder for Multimodal Generation with Frozen {LLM}s},\nauthor={Lijun Yu and Yong Cheng and Zhiruo Wang and Vivek Kumar and Wolfgang Macherey and Yanping Huang and David A Ross and Irfan Essa and Yonatan Bisk and Ming-Hsuan Yang and Kevin Patrick Murphy and Alexander G Hauptmann and Lu Jiang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CXPUg86A1D}\n}", "github": "", "project": "", "reviewers": "mRhs;2BCt;ekNF;1ht3", "pdf_size": 3054764, "rating": "5;6;6;7", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;3;3;2", "wc_summary": "66;81;42;112", "wc_strengths": "72;72;46;48", "wc_weaknesses": "88;79;67;203", "wc_questions": "2;6;53;30", "wc_limitations": "31;9;1;1", "wc_review": "259;247;209;394", "wc_reply_reviewers": "41;18;13;41", "wc_reply_authors": "236;20;19;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 75.25, 25.370997221236692 ], "wc_strengths_avg": [ 59.5, 12.519984025548915 ], "wc_weaknesses_avg": [ 109.25, 54.63686941983408 ], "wc_questions_avg": [ 22.75, 20.48627589387588 ], "wc_limitations_avg": [ 10.5, 12.278029157808675 ], "wc_review_avg": [ 277.25, 69.88696230342252 ], "wc_reply_reviewers_avg": [ 28.25, 12.871965661856 ], "wc_reply_authors_avg": [ 68.75, 96.8900794715331 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1482009236076112496&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "cs.cmu.edu;google.com;cmu.edu;;google.com;google.com;gatech.edu;meta.com;umcerced.edu;google.com;cs.cmu.edu;google.com;research.google.com", "author_num": 13, "aff_unique_index": "0;1;0;1;1;2;3;4;1;0;1;1", "aff_unique_norm": "Carnegie Mellon University;Google;Georgia Institute of Technology;Meta;University of California, Merced", "aff_unique_dep": "School of Computer Science;Google;;Meta Platforms, Inc.;", "aff_unique_url": "https://www.cmu.edu;https://www.google.com;https://www.gatech.edu;https://meta.com;https://www.ucmerced.edu", "aff_unique_abbr": "CMU;Google;Georgia Tech;Meta;UC Merced", "aff_campus_unique_index": "0;1;1;1;3;1;0;1;1", "aff_campus_unique": "Pittsburgh;Mountain View;;Merced", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "PETAL: Physics Emulation Through Averaged Linearizations for Solving Inverse Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72426", "id": "CXrRMfs5eY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/991c9324ca71aa85ab4dd11146b35fc3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CXrRMfs5eY", "openreview": "https://openreview.net/forum?id=CXrRMfs5eY", "poster": "/media/PosterPDFs/NeurIPS%202023/72426.png?t=1701898528.2204764", "slides": "https://nips.cc/virtual/2023/poster/72426", "video": "https://nips.cc/virtual/2023/poster/72426", "author_site": "Jihui Jin, Etienne Ollivier, Richard Touret, Matthew McKinley, Karim Sabra, Justin Romberg", "tldr": "", "abstract": "Inverse problems describe the task of recovering an underlying signal of interest given observables. Typically, the observables are related via some non-linear forward model applied to the underlying unknown signal. Inverting the non-linear forward model can be computationally expensive, as it often involves computing and inverting a linearization at a series of estimates. Rather than inverting the physics-based model, we instead train a surrogate forward model (emulator) and leverage modern auto-grad libraries to solve for the input within a classical optimization framework. Current methods to train emulators are done in a black box supervised machine learning fashion and fail to take advantage of any existing knowledge of the forward model. In this article, we propose a simple learned weighted average model that embeds linearizations of the forward model around various reference points into the model itself, explicitly incorporating known physics. Grounding the learned model with physics based linearizations improves the forward modeling accuracy and provides richer physics based gradient information during the inversion process leading to more accurate signal recovery. We demonstrate the efficacy on an ocean acoustic tomography (OAT) example that aims to recover ocean sound speed profile (SSP) variations from acoustic observations (e.g. eigenray arrival times) within simulation of ocean dynamics in the Gulf of Mexico.", "keywords": "Inverse Problems;Neural Adjoint;Hybrid Machine Learning;Physics", "primary_area": "", "supplementary_material": "/attachment/3d275badafcb3a26c11fa4e34a53f5e47288f0dc.zip", "author": "Jihui Jin;Etienne Ollivier;Richard Touret;Matthew McKinley;Karim Sabra;Justin Romberg", "authorids": "~Jihui_Jin1;eollivier3@gatech.edu;rtouret3@gatech.edu;mmckinley31@gatech.edu;~Karim_Sabra1;~Justin_Romberg1", "gender": "M;;;;;M", "homepage": ";;;;https://www.me.gatech.edu/faculty/sabra;https://jrom.ece.gatech.edu/", "dblp": ";;;;;", "google_scholar": ";;;;svaTGrwAAAAJ;", "orcid": ";;;;;", "linkedin": "jihui-jin-4aa111112/;;;;;", "or_profile": "~Jihui_Jin1;eollivier3@gatech.edu;rtouret3@gatech.edu;mmckinley31@gatech.edu;~Karim_Sabra1;~Justin_Romberg1", "aff": "Georgia Institute of Technology;;;;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;;;;gatech.edu;gatech.edu", "position": "PhD student;;;;Full Professor;Full Professor", "bibtex": "@inproceedings{\njin2023petal,\ntitle={{PETAL}: Physics Emulation Through Averaged Linearizations for Solving Inverse Problems},\nauthor={Jihui Jin and Etienne Ollivier and Richard Touret and Matthew McKinley and Karim Sabra and Justin Romberg},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CXrRMfs5eY}\n}", "github": "", "project": "", "reviewers": "1TNX;Bbps;yV52;exJb", "pdf_size": 3582575, "rating": "3;6;6;6", "confidence": "3;3;3;4", "soundness": "2;3;3;2", "novelty": "1;2;2;3", "presentation": "2;3;2;4", "wc_summary": "101;57;24;84", "wc_strengths": "42;60;48;15", "wc_weaknesses": "231;76;31;299", "wc_questions": "185;133;70;82", "wc_limitations": "1;1;1;7", "wc_review": "560;327;174;487", "wc_reply_reviewers": "97;18;39;132", "wc_reply_authors": "260;0;0;372", "reply_reviewers": "1;1;1;2", "reply_authors": "2;1;1;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 66.5, 29.124731758421397 ], "wc_strengths_avg": [ 41.25, 16.48294573187693 ], "wc_weaknesses_avg": [ 159.25, 109.60924915352719 ], "wc_questions_avg": [ 117.5, 45.5878273226527 ], "wc_limitations_avg": [ 2.5, 2.598076211353316 ], "wc_review_avg": [ 387.0, 149.07883820314672 ], "wc_reply_reviewers_avg": [ 71.5, 45.356917884706405 ], "wc_reply_authors_avg": [ 158.0, 162.88646352597874 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12287560403410966627&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "email": "gatech.edu;;;;gatech.edu;gatech.edu", "author_num": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "DiffSketcher: Text Guided Vector Sketch Synthesis through Latent Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72425", "id": "CY1xatvEQj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/333e67fc4728f147d31608db3ca78e09-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CY1xatvEQj", "openreview": "https://openreview.net/forum?id=CY1xatvEQj", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72425", "video": "https://nips.cc/virtual/2023/poster/72425", "author_site": "XiMing Xing, Chuang Wang, Chuang Wang, Haitao Zhou, Jing Zhang, Qian Yu, Dong Xu", "tldr": "", "abstract": "Even though trained mainly on images, we discover that pretrained diffusion models show impressive power in guiding sketch synthesis. In this paper, we present DiffSketcher, an innovative algorithm that creates \\textit{vectorized} free-hand sketches using natural language input. DiffSketcher is developed based on a pre-trained text-to-image diffusion model. It performs the task by directly optimizing a set of B\u00e9zier curves with an extended version of the score distillation sampling (SDS) loss, which allows us to use a raster-level diffusion model as a prior for optimizing a parametric vectorized sketch generator. Furthermore, we explore attention maps embedded in the diffusion model for effective stroke initialization to speed up the generation process. The generated sketches demonstrate multiple levels of abstraction while maintaining recognizability, underlying structure, and essential visual details of the subject drawn. Our experiments show that DiffSketcher achieves greater quality than prior work. The code and demo of DiffSketcher can be found at https://ximinng.github.io/DiffSketcher-project/.", "keywords": "Sketch; Vector Sketch; Sketch Generation; Diffusion Models", "primary_area": "", "supplementary_material": "/attachment/e3dc8d4e247287eb1166ffca9d8e85a3f15f0a60.pdf", "author": "XiMing Xing;Chuang Wang;Haitao Zhou;Jing Zhang;Qian Yu;Dong Xu", "authorids": "~XiMing_Xing1;~Chuang_Wang6;~Haitao_Zhou1;~Jing_Zhang10;~Qian_Yu4;~Dong_Xu2", "gender": "M;M;F;F;Unspecified;M", "homepage": "https://buaaer-stack.github.io/chuangwang.github.io/;https://zhtjtcz.github.io;;https://yuqian1023.github.io/;https://www.cs.hku.hk/people/academic-staff/dongxu;https://ximinng.github.io/", "dblp": ";24/4582;05/3499-17;;09/3493-1;350/0927", "google_scholar": ";https://scholar.google.com.hk/citations?user=e_kUVpcAAAAJ;XtwOoQgAAAAJ;mmm90qgAAAAJ;7Hdu5k4AAAAJ;https://scholar.google.com.tw/citations?user=tFpaF7AAAAAJ", "orcid": ";;;0000-0002-0538-7940;;", "linkedin": ";;;;;", "or_profile": "~Chuang_Wang6;~Haitao_Zhou1;~Jing_Zhang10;~Qian_Yu4;~Dong_Xu2;~Xing_XiMing1", "aff": "Beihang University;Beihang University;Beihang University;Beihang University;University of Hong Kong;Beihang University", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;hku.hk;buaa.edu.cn", "position": "Undergrad student;Undergrad student;Associate Professor;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nxing2023diffsketcher,\ntitle={DiffSketcher: Text Guided Vector Sketch Synthesis through Latent Diffusion Models},\nauthor={XiMing Xing and Chuang Wang and Haitao Zhou and Jing Zhang and Qian Yu and Dong Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CY1xatvEQj}\n}", "github": "", "project": "", "reviewers": "ZP2A;PspM;mvSd;6RS1;iPV2", "pdf_size": 15762826, "rating": "3;5;7;7;7", "confidence": "4;3;3;4;4", "soundness": "2;3;3;3;4", "novelty": "2;2;4;3;3", "presentation": "2;2;3;3;4", "wc_summary": "54;81;62;95;72", "wc_strengths": "36;57;90;83;123", "wc_weaknesses": "234;123;149;152;24", "wc_questions": "112;255;41;59;21", "wc_limitations": "68;93;10;56;19", "wc_review": "504;609;352;445;259", "wc_reply_reviewers": "0;330;74;63;28", "wc_reply_authors": "0;498;0;0;0", "reply_reviewers": "0;2;1;1;1", "reply_authors": "1;3;1;1;1", "rating_avg": [ 5.8, 1.6 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 72.8, 14.358272876638054 ], "wc_strengths_avg": [ 77.8, 29.66074847336122 ], "wc_weaknesses_avg": [ 136.4, 67.44064056635287 ], "wc_questions_avg": [ 97.6, 84.31275111156081 ], "wc_limitations_avg": [ 49.2, 30.876528302255746 ], "wc_review_avg": [ 433.8, 120.82615610868369 ], "wc_reply_reviewers_avg": [ 99.0, 118.4263484195979 ], "wc_reply_authors_avg": [ 99.6, 199.2 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.10206207261596574, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13867141543622364245&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;hku.hk;buaa.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Beihang University;University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.hku.hk", "aff_unique_abbr": "BUAA;HKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Survival Permanental Processes for Survival Analysis with Time-Varying Covariates", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72424", "id": "CYCzfXn6cZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e664650506f1cf2b4696df892147c06e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CYCzfXn6cZ", "openreview": "https://openreview.net/forum?id=CYCzfXn6cZ", "poster": "/media/PosterPDFs/NeurIPS%202023/72424.png?t=1701889698.96281", "slides": "https://nips.cc/virtual/2023/poster/72424", "video": "https://nips.cc/virtual/2023/poster/72424", "tldr": "", "abstract": "Survival or time-to-event data with time-varying covariates are common in practice, and exploring the non-stationarity in covariates is essential to accurately analyzing the nonlinear dependence of time-to-event outcomes on covariates. Traditional survival analysis methods such as Cox proportional hazards model have been extended to address the time-varying covariates through a counting process formulation, although sophisticated machine learning methods that can accommodate time-varying covariates have been limited. In this paper, we propose a non-parametric Bayesian survival model to analyze the nonlinear dependence of time-to-event outcomes on time-varying covariates. We focus on a computationally feasible Cox process called permanental process, which assumes the square root of hazard function to be generated from a Gaussian process, and tailor it for survival data with time-varying covariates. We verify that the proposed model holds with the representer theorem, a beneficial property for functional analysis, which offers us a fast Bayesian estimation algorithm that scales linearly with the number of observed events without relying on Markov Chain Monte Carlo computation. We evaluate our algorithm on synthetic and real-world data, and show that it achieves comparable predictive accuracy while being tens to hundreds of times faster than state-of-the-art methods.", "keywords": "survival analysis;temporal point process;Bayesian estimation;permanental process;representer theorem;kernel method", "primary_area": "", "supplementary_material": "/attachment/872f2abff1cb7dc80a17e35c89f1f3736d892bf2.zip", "author": "Hideaki Kim", "authorids": "~Hideaki_Kim1", "gender": "M", "homepage": "", "dblp": "41/10856", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Hideaki_Kim1", "aff": "NTT", "aff_domain": "ntt.co.jp", "position": "Researcher", "bibtex": "@inproceedings{\nkim2023survival,\ntitle={Survival Permanental Processes for Survival Analysis with Time-Varying Covariates},\nauthor={Hideaki Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CYCzfXn6cZ}\n}", "github": "", "project": "", "reviewers": "4Stf;SVLR;3BPs;feqp;LBpm", "pdf_size": 381517, "rating": "3;4;5;7;7", "confidence": "4;3;4;3;3", "soundness": "3;4;2;4;3", "novelty": "2;3;1;4;3", "presentation": "2;4;2;4;3", "wc_summary": "72;127;108;61;53", "wc_strengths": "24;56;193;222;70", "wc_weaknesses": "152;113;5;76;76", "wc_questions": "9;52;850;52;149", "wc_limitations": "20;7;1;1;14", "wc_review": "277;355;1157;412;362", "wc_reply_reviewers": "0;0;155;130;60", "wc_reply_authors": "0;0;158;86;44", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;2;2;2", "rating_avg": [ 5.2, 1.6 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 1.019803902718557 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 84.2, 28.491402211895434 ], "wc_strengths_avg": [ 113.0, 79.12016177940993 ], "wc_weaknesses_avg": [ 84.4, 48.64812432149877 ], "wc_questions_avg": [ 222.4, 317.1375726715458 ], "wc_limitations_avg": [ 8.6, 7.445804187594514 ], "wc_review_avg": [ 512.6, 325.0837430570775 ], "wc_reply_reviewers_avg": [ 69.0, 64.37390775772433 ], "wc_reply_authors_avg": [ 57.6, 59.50999915980507 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.6123724356957946, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11811139102113974146&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ntt.co.jp", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "NTT Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ntt.co.jp", "aff_unique_abbr": "NTT", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "title": "MMGP: a Mesh Morphing Gaussian Process-based machine learning method for regression of physical problems under nonparametrized geometrical variability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72423", "id": "Ca78M3awPw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/89379d5fc6eb34ff98488202fb52b9d0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ca78M3awPw", "openreview": "https://openreview.net/forum?id=Ca78M3awPw", "poster": "/media/PosterPDFs/NeurIPS%202023/72423.png?t=1699472960.3257945", "slides": "https://nips.cc/virtual/2023/poster/72423", "video": "https://nips.cc/virtual/2023/poster/72423", "author_site": "Fabien Casenave, Brian Staber, Xavier Roynard", "tldr": "", "abstract": "When learning simulations for modeling physical phenomena in industrial designs, geometrical variabilities are of prime interest. While classical regression techniques prove effective for parameterized geometries, practical scenarios often involve the absence of shape parametrization during the inference stage, leaving us with only mesh discretizations as available data. Learning simulations from such mesh-based representations poses significant challenges, with recent advances relying heavily on deep graph neural networks to overcome the limitations of conventional machine learning approaches. Despite their promising results, graph neural networks exhibit certain drawbacks, including their dependency on extensive datasets and limitations in providing built-in predictive uncertainties or handling large meshes. In this work, we propose a machine learning method that do not rely on graph neural networks. Complex geometrical shapes and variations with fixed topology are dealt with using well-known mesh morphing onto a common support, combined with classical dimensionality reduction techniques and Gaussian processes. The proposed methodology can easily deal with large meshes without the need for explicit shape parameterization and provides crucial predictive uncertainties, which are essential for informed decision-making. In the considered numerical experiments, the proposed method is competitive with respect to existing graph neural networks, regarding training efficiency and accuracy of the predictions.", "keywords": "Gaussian process;mesh morphing;mesh parametrization;finite element interpolation;simulation;physics;predictive uncertainties;nonparametrized geometries", "primary_area": "", "supplementary_material": "/attachment/816fe8180a6f323c61811b6a4a8717f8e56087c8.pdf", "author": "Fabien Casenave;Brian Staber;Xavier Roynard", "authorids": "~Fabien_Casenave1;~Brian_Staber1;~Xavier_Roynard1", "gender": "M;M;M", "homepage": ";https://bstaber.github.io/;", "dblp": "136/6770;181/2793;211/7108", "google_scholar": "https://scholar.google.fr/citations?user=VBv7cPQAAAAJ;https://scholar.google.fr/citations?user=61j2VawAAAAJ;X7hJnO0AAAAJ", "orcid": "0000-0002-8810-9128;;", "linkedin": "fabien-casenave/;brian-staber/;", "or_profile": "~Fabien_Casenave1;~Brian_Staber1;~Xavier_Roynard1", "aff": "Safran Group;Safran;Safran Group", "aff_domain": "safran-group.com;safrangroup.com;safrangroup.com", "position": "Senior Reseach Manager;Researcher;Researcher", "bibtex": "@inproceedings{\ncasenave2023mmgp,\ntitle={{MMGP}: a Mesh Morphing Gaussian Process-based machine learning method for regression of physical problems under nonparametrized geometrical variability},\nauthor={Fabien Casenave and Brian Staber and Xavier Roynard},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ca78M3awPw}\n}", "github": "", "project": "", "reviewers": "erhK;mqry;Cr5H;iwUN;4CEk", "pdf_size": 14377378, "rating": "5;5;6;6;7", "confidence": "4;2;5;2;4", "soundness": "2;3;3;3;3", "novelty": "3;3;2;3;3", "presentation": "1;3;3;3;4", "wc_summary": "136;78;53;91;103", "wc_strengths": "19;47;68;143;143", "wc_weaknesses": "425;54;87;42;395", "wc_questions": "6;38;317;1;7", "wc_limitations": "72;4;12;1;28", "wc_review": "658;221;537;278;676", "wc_reply_reviewers": "61;16;310;12;12", "wc_reply_authors": "0;0;1385;0;0", "reply_reviewers": "2;1;3;1;1", "reply_authors": "1;1;3;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.4, 1.2000000000000002 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 92.2, 27.47653544390195 ], "wc_strengths_avg": [ 84.0, 50.620154089058246 ], "wc_weaknesses_avg": [ 200.6, 171.87041630251554 ], "wc_questions_avg": [ 73.8, 122.30028618118602 ], "wc_limitations_avg": [ 23.4, 26.043041297052845 ], "wc_review_avg": [ 474.0, 190.29135555773416 ], "wc_reply_reviewers_avg": [ 82.2, 115.39566716302653 ], "wc_reply_authors_avg": [ 277.0, 554.0 ], "reply_reviewers_avg": [ 1.6, 0.8 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.31180478223116176, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4042712819048522884&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "safran-group.com;safrangroup.com;safrangroup.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Safran Group;Safran", "aff_unique_dep": ";", "aff_unique_url": "https://www.safran-group.com;https://www.safran-group.com", "aff_unique_abbr": "Safran;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "id": "Cb3clCaZG4", "title": "Graph Bernoulli Pooling", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph pooling is crucial for enlarging receptive field and reducing computational cost in deep graph representation learning. In this work, we propose a simple but effective non-deterministic graph pooling method, called graph Bernoulli pooling (BernPool), to facilitate graph feature learning.\nIn contrast to most graph pooling methods with deterministic modes, we design a probabilistic Bernoulli sampling to reach an expected sampling rate through deducing a variational bound as the constraint. To further mine more useful info, a learnable reference set is introduced to encode nodes into a latent expressive probability space. Hereby the resultant Bernoulli sampling would endeavor to capture salient substructures of the graph while possessing much diversity on sampled nodes due to its non-deterministic manner. Considering the complementarity of node dropping and node clustering, further, we propose a hybrid graph pooling paradigm to combine a compact subgraph (via dropping) and a coarsening graph (via clustering), in order to retain both representative substructures and input graph info. Extensive experiments on multiple public graph classification datasets demonstrate that our BernPool is superior to various graph pooling methods, and achieves state-of-the-art performance. The code is publicly available in an anonymous format at \\href{https://anonymous.4open.science/r/BernPool}{https:/github/BernPool}.", "keywords": "graph classification;variational inference;Bernoulli sampling;mutual information", "primary_area": "", "supplementary_material": "/attachment/c204a77b8b78e34e060fabef5141be419310a5ab.pdf", "author": "Guangbu Liu;Tong Zhang;Xudong Wang;Chuanwei Zhou;Zhen Cui", "authorids": "~Guangbu_Liu1;~Tong_Zhang8;~Xudong_Wang6;~Chuanwei_Zhou1;~Zhen_Cui4", "gender": "M;M;M;M;M", "homepage": "http://vgg-ai.cn/pages/Student/;https://vgg-ai.cn/teachers/ZhangTong/;http://vgg-ai.cn/pages/;https://github.com/deepcharle;http://aip.seu.edu.cn/zcui/", "dblp": ";07/4227-21;;;59/8491-1", "google_scholar": ";;;;ChRyl3kAAAAJ", "orcid": ";0000-0001-6212-4891;;;", "linkedin": ";;;;", "or_profile": "~Guangbu_Liu1;~Tong_Zhang8;~Xudong_Wang6;~Chuanwei_Zhou1;~Zhen_Cui4", "aff": "Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology", "aff_domain": "njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn", "position": "MS student;Associate Professor;PhD student;PhD student;Full Professor", "bibtex": "@misc{\nliu2023graph,\ntitle={Graph Bernoulli Pooling},\nauthor={Guangbu Liu and Tong Zhang and Xudong Wang and Chuanwei Zhou and Zhen Cui},\nyear={2023},\nurl={https://openreview.net/forum?id=Cb3clCaZG4}\n}", "github": "", "project": "", "reviewers": "Zzuw;rF61;rS8P;kSdL;PxAC", "site": "https://openreview.net/forum?id=Cb3clCaZG4", "pdf_size": 492736, "rating": "3;5;5;6;6", "confidence": "5;4;4;3;5", "soundness": "1;2;3;3;3", "novelty": "1;2;2;2;3", "presentation": "3;3;3;2;4", "wc_summary": "58;93;69;89;38", "wc_strengths": "10;20;43;85;21", "wc_weaknesses": "98;83;87;69;94", "wc_questions": "2;8;195;200;4", "wc_limitations": "2;1;8;1;4", "wc_review": "170;205;402;444;161", "wc_reply_reviewers": "57;94;84;12;32", "wc_reply_authors": "474;0;48;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;2;1;1", "rating_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 69.4, 20.283983829612957 ], "wc_strengths_avg": [ 35.8, 26.85814587792687 ], "wc_weaknesses_avg": [ 86.2, 10.067770358922575 ], "wc_questions_avg": [ 81.8, 94.50164019740609 ], "wc_limitations_avg": [ 3.2, 2.638181191654584 ], "wc_review_avg": [ 276.4, 121.3269961715034 ], "wc_reply_reviewers_avg": [ 55.8, 30.792206806268368 ], "wc_reply_authors_avg": [ 104.4, 185.7327111738802 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.48795003647426666, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qosCyhM6L8sJ:scholar.google.com/&scioq=Graph+Bernoulli+Pooling&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanjing University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.nust.edu.cn/", "aff_unique_abbr": "NUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "In-Context Impersonation Reveals Large Language Models' Strengths and Biases", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72422", "id": "CbsJ53LdKc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e3fe7b34ba4f378df39cb12a97193f41-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CbsJ53LdKc", "openreview": "https://openreview.net/forum?id=CbsJ53LdKc", "poster": "/media/PosterPDFs/NeurIPS%202023/72422.png?t=1701808560.4351242", "slides": "https://nips.cc/virtual/2023/poster/72422", "video": "https://nips.cc/virtual/2023/poster/72422", "author_site": "Leonard Salewski, Stephan Alaniz, Isabel Rio-Torto, Eric Schulz, Zeynep Akata, Zeynep Akata", "tldr": "", "abstract": "In everyday conversations, humans can take on different roles and adapt their vocabulary to their chosen roles. We explore whether LLMs can take on, that is impersonate, different roles when they generate text in-context. We ask LLMs to assume different personas before solving vision and language tasks. We do this by prefixing the prompt with a persona that is associated either with a social identity or domain expertise. In a multi-armed bandit task, we find that LLMs pretending to be children of different ages recover human-like developmental stages of exploration. In a language-based reasoning task, we find that LLMs impersonating domain experts perform better than LLMs impersonating non-domain experts. Finally, we test whether LLMs' impersonations are complementary to visual information when describing different categories. We find that impersonation can improve performance: an LLM prompted to be a bird expert describes birds better than one prompted to be a car expert. However, impersonation can also uncover LLMs' biases: an LLM prompted to be a man describes cars better than one prompted to be a woman. These findings demonstrate that LLMs are capable of taking on diverse roles and that this in-context impersonation can be used to uncover their strengths and hidden biases. Our code is available at https://github.com/ExplainableML/in-context-impersonation.", "keywords": "large language models;impersonation;vision language models;reasoning", "primary_area": "", "supplementary_material": "/attachment/07f5dac228234a1bda1ed05e376e00621d4e2127.pdf", "author": "Leonard Salewski;Stephan Alaniz;Isabel Rio-Torto;Eric Schulz;Zeynep Akata", "authorids": "~Leonard_Salewski1;~Stephan_Alaniz1;~Isabel_Rio-Torto1;~Eric_Schulz1;~Zeynep_Akata1", "gender": ";M;;M;F", "homepage": ";https://www.eml-munich.de/people/stephan-alaniz;;https://cpilab.org;https://eml-unitue.de/people/zeynep-akata", "dblp": ";217/1919;;124/0016;117/4838", "google_scholar": ";mzZa_yQAAAAJ;;;jQl9RtkAAAAJ", "orcid": ";0000-0003-3541-2163;;;0000-0002-1432-7747", "linkedin": ";;;;zeynep-akata-36182045/?ppe=1", "or_profile": "~Leonard_Salewski1;~Stephan_Alaniz1;~Isabel_Rio-Torto1;~Eric_Schulz1;~Zeynep_Akata1", "aff": ";Eberhard-Karls-Universit\u00e4t T\u00fcbingen;;Max Planck Institute for Biological Cybernetics;University of T\u00fcbingen", "aff_domain": ";uni-tuebingen.de;;tuebingen.mpg.de;uni-tuebingen.de", "position": ";Postdoc;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nsalewski2023incontext,\ntitle={In-Context Impersonation Reveals Large Language Models' Strengths and Biases},\nauthor={Leonard Salewski and Stephan Alaniz and Isabel Rio-Torto and Eric Schulz and Zeynep Akata},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CbsJ53LdKc}\n}", "github": "", "project": "", "reviewers": "NysF;J5Gp;ZdXm;KqkQ", "pdf_size": 2049841, "rating": "6;6;7;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "107;95;49;77", "wc_strengths": "134;100;72;32", "wc_weaknesses": "245;148;663;85", "wc_questions": "14;5;327;38", "wc_limitations": "13;17;4;1", "wc_review": "513;365;1115;233", "wc_reply_reviewers": "36;55;654;18", "wc_reply_authors": "48;38;226;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.0, 21.840329667841555 ], "wc_strengths_avg": [ 84.5, 37.426594822398684 ], "wc_weaknesses_avg": [ 285.25, 225.41780652823326 ], "wc_questions_avg": [ 96.0, 133.91228472399385 ], "wc_limitations_avg": [ 8.75, 6.49519052838329 ], "wc_review_avg": [ 556.5, 337.31995197438295 ], "wc_reply_reviewers_avg": [ 190.75, 267.777309531633 ], "wc_reply_authors_avg": [ 78.0, 87.3040663428686 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 156, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3535135592715854722&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": ";uni-tuebingen.de;;tuebingen.mpg.de;uni-tuebingen.de", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;Max Planck Institute for Biological Cybernetics;University of T\u00fcbingen", "aff_unique_dep": ";Biological Cybernetics;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.biocybernetics.mpg.de;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;MPIBC;Uni T\u00fcbingen", "aff_campus_unique_index": "0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Spuriosity Didn\u2019t Kill the Classifier: Using Invariant Predictions to Harness Spurious Features", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72421", "id": "Cc2fjBBlBD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3acbe9dc3a1e8d48a57b16e9aef91879-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Cc2fjBBlBD", "openreview": "https://openreview.net/forum?id=Cc2fjBBlBD", "poster": "/media/PosterPDFs/NeurIPS%202023/72421.png?t=1699557840.8969553", "slides": "https://nips.cc/virtual/2023/poster/72421", "video": "https://nips.cc/virtual/2023/poster/72421", "author_site": "Cian Eastwood, Shashank Singh, Andrei L Nicolicioiu, Marin Vlastelica Pogan\u010di\u0107, Julius von K\u00fcgelgen, Bernhard Sch\u00f6lkopf", "tldr": "", "abstract": "To avoid failures on out-of-distribution data, recent works have sought to extract features that have an invariant or stable relationship with the label across domains, discarding \"spurious\" or unstable features whose relationship with the label changes across domains. However, unstable features often carry complementary information that could boost performance if used correctly in the test domain. In this work, we show how this can be done without test-domain labels. In particular, we prove that pseudo-labels based on stable features provide sufficient guidance for doing so, provided that stable and unstable features are conditionally independent given the label. Based on this theoretical insight, we propose Stable Feature Boosting (SFB), an algorithm for: (i) learning a predictor that separates stable and conditionally-independent unstable features; and (ii) using the stable-feature predictions to adapt the unstable-feature predictions in the test domain. Theoretically, we prove that SFB can learn an asymptotically-optimal predictor without test-domain labels. Empirically, we demonstrate the effectiveness of SFB on real and synthetic data.", "keywords": "invariant prediction;spurious correlations;out-of-distribution generalization;domain generalization;domain adaptation;test-time domain adaptation", "primary_area": "", "supplementary_material": "/attachment/c7072385af01ab5706d78dd9ff8284ee4e55fe51.pdf", "author": "Cian Eastwood;Shashank Singh;Andrei Liviu Nicolicioiu;Marin Vlastelica;Julius von K\u00fcgelgen;Bernhard Sch\u00f6lkopf", "authorids": "~Cian_Eastwood1;~Shashank_Singh1;~Andrei_Liviu_Nicolicioiu1;~Marin_Vlastelica1;~Julius_von_K\u00fcgelgen2;~Bernhard_Sch\u00f6lkopf1", "gender": "M;;;;;", "homepage": "https://cianeastwood.github.io/;;https://andreinicolicioiu.github.io/;;;", "dblp": "238/2792;;;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;BVUKrDQAAAAJ;;;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Cian_Eastwood1;~Shashank_Singh1;~Andrei_Liviu_Nicolicioiu1;~Marin_Vlastelica1;~Julius_von_K\u00fcgelgen2;~Bernhard_Sch\u00f6lkopf1", "aff": "University of Edinburgh;;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;;;", "aff_domain": "ed.ac.uk;;mila.umontreal.ca;;;", "position": "PhD student;;PhD student;;;", "bibtex": "@inproceedings{\neastwood2023spuriosity,\ntitle={Spuriosity Didn{\\textquoteright}t Kill the Classifier: Using Invariant Predictions to Harness Spurious Features},\nauthor={Cian Eastwood and Shashank Singh and Andrei Liviu Nicolicioiu and Marin Vlastelica and Julius von K{\\\"u}gelgen and Bernhard Sch{\\\"o}lkopf},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Cc2fjBBlBD}\n}", "github": "", "project": "", "reviewers": "f1Zc;S7EC;5BRF;NDD6", "pdf_size": 1579106, "rating": "6;6;7;7", "confidence": "4;4;3;3", "soundness": "3;3;4;4", "novelty": "3;3;3;3", "presentation": "2;3;3;4", "wc_summary": "264;140;48;74", "wc_strengths": "103;41;51;170", "wc_weaknesses": "254;98;66;42", "wc_questions": "367;47;61;26", "wc_limitations": "57;40;6;1", "wc_review": "1045;366;232;313", "wc_reply_reviewers": "89;38;16;25", "wc_reply_authors": "84;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 131.5, 83.52694176132633 ], "wc_strengths_avg": [ 91.25, 51.197534120307004 ], "wc_weaknesses_avg": [ 115.0, 82.67405880927826 ], "wc_questions_avg": [ 125.25, 140.12918147195467 ], "wc_limitations_avg": [ 26.0, 23.355941428253324 ], "wc_review_avg": [ 489.0, 324.53428170225715 ], "wc_reply_reviewers_avg": [ 42.0, 28.240042492885877 ], "wc_reply_authors_avg": [ 21.0, 36.373066958946424 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13199264789222568997&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ed.ac.uk;;mila.umontreal.ca;;;", "author_num": 6, "aff_unique_index": "0;1", "aff_unique_norm": "University of Edinburgh;University of Montreal", "aff_unique_dep": ";Montreal Institute for Learning Algorithms", "aff_unique_url": "https://www.ed.ac.uk;https://www.mila.quebec", "aff_unique_abbr": "Edinburgh;MILA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;Canada" }, { "title": "Smooth, exact rotational symmetrization for deep learning on point clouds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72420", "id": "CdSRFn1fVe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fb4a7e3522363907b26a86cc5be627ac-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CdSRFn1fVe", "openreview": "https://openreview.net/forum?id=CdSRFn1fVe", "poster": "/media/PosterPDFs/NeurIPS%202023/72420.png?t=1702327255.7048545", "slides": "https://nips.cc/virtual/2023/poster/72420", "video": "https://nips.cc/virtual/2023/poster/72420", "author_site": "Sergey Pozdnyakov, Michele Ceriotti", "tldr": "", "abstract": "Point clouds are versatile representations of 3D objects and have found widespread application in science and engineering. Many successful deep-learning models have been proposed that use them as input. The domain of chemical and materials modeling is especially challenging because exact compliance with physical constraints is highly desirable for a model to be usable in practice. These constraints include smoothness and invariance with respect to translations, rotations, and permutations of identical atoms. If these requirements are not rigorously fulfilled, atomistic simulations might lead to absurd outcomes even if the model has excellent accuracy. Consequently, dedicated architectures, which achieve invariance by restricting their design space, have been developed. General-purpose point-cloud models are more varied but often disregard rotational symmetry. We propose a general symmetrization method that adds rotational equivariance to any given model while preserving all the other requirements.\nOur approach simplifies the development of better atomic-scale machine-learning schemes by relaxing the constraints on the design space and making it possible to incorporate ideas that proved effective in other domains.\nWe demonstrate this idea by introducing the Point Edge Transformer (PET) architecture, which is not intrinsically equivariant but achieves state-of-the-art performance on several benchmark datasets of molecules and solids. A-posteriori application of our general protocol makes PET exactly equivariant, with minimal changes to its accuracy.", "keywords": "geometric deep learning;point clouds;equivariance;machine learning potentials;GNN;transformer;atomic-scale modeling", "primary_area": "", "supplementary_material": "/attachment/f25e2f099820e696fa5de3139229188a3330ffec.zip", "author": "Sergey Pozdnyakov;Michele Ceriotti", "authorids": "~Sergey_Pozdnyakov1;~Michele_Ceriotti1", "gender": "M;M", "homepage": ";https://cosmo.epfl.ch", "dblp": "293/9220;45/8770", "google_scholar": "1-uZ3uYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-5980-5813;0000-0003-2571-2832", "linkedin": ";", "or_profile": "~Sergey_Pozdnyakov1;~Michele_Ceriotti1", "aff": "EPFL - EPF Lausanne;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\npozdnyakov2023smooth,\ntitle={Smooth, exact rotational symmetrization for deep learning on point clouds},\nauthor={Sergey Pozdnyakov and Michele Ceriotti},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CdSRFn1fVe}\n}", "github": "", "project": "", "reviewers": "oCKF;mhUe;r8ta;QskA", "pdf_size": 1115309, "rating": "4;5;5;6", "confidence": "3;3;2;3", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "3;2;2;3", "wc_summary": "53;108;49;87", "wc_strengths": "49;35;22;125", "wc_weaknesses": "119;182;221;95", "wc_questions": "2;237;13;134", "wc_limitations": "1;9;1;6", "wc_review": "224;571;306;447", "wc_reply_reviewers": "25;90;36;0", "wc_reply_authors": "17;229;8;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 74.25, 24.44764814864612 ], "wc_strengths_avg": [ 57.75, 39.983590384056306 ], "wc_weaknesses_avg": [ 154.25, 49.946846747317295 ], "wc_questions_avg": [ 96.5, 96.2405839550031 ], "wc_limitations_avg": [ 4.25, 3.418698582794336 ], "wc_review_avg": [ 387.0, 132.84012947900948 ], "wc_reply_reviewers_avg": [ 37.75, 32.86620604815834 ], "wc_reply_authors_avg": [ 63.5, 95.74053478020686 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11502837352055753800&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "epfl.ch;epfl.ch", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "id": "Ce0dDt9tUT", "title": "Explore Positive Noise in Deep Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "In computer vision, noise is conventionally viewed as a harmful perturbation in various deep learning architectures, such as convolutional neural networks (CNNs) and vision transformers (ViTs), as well as different tasks like image classification and transfer learning. However, this paper aims to rethink whether the conventional proposition always holds. We demonstrate that specific noise can boost the performance of various deep architectures under certain conditions. We theoretically prove the enhancement gained from positive noise by reducing the task complexity defined by information entropy and experimentally show the significant performance gain in large image datasets, such as the ImageNet. Herein, we use the information entropy to define the complexity of the task. We categorize the noise into two types, positive noise (PN) and harmful noise (HN), based on whether the noise can help reduce the complexity of the task. Extensive experiments of CNNs and ViTs have shown performance improvements by proactively injecting positive noise, where we achieve an unprecedented top 1 accuracy over 95$\\%$ on ImageNet. Both theoretical analysis and empirical evidence have confirmed the presence of positive noise, which can benefit the learning process, while the traditionally perceived harmful noise indeed impairs deep learning models. The different roles of noise offer new explanations for deep models on specific tasks and provide a new paradigm for improving model performance. Moreover, it reminds us to utilize noise rather than suppress noise.", "keywords": "Deep learning;Positive Noise", "primary_area": "", "supplementary_material": "/attachment/c1b8e4362f40b32e53d7a0974e2c84d7be195534.pdf", "author": "Xiaowei Yu;Yao Xue;Li Wang;Lu Zhang;Yanjun Lyu;Tianming Liu;Dajiang Zhu", "authorids": "~Xiaowei_Yu1;~Yao_Xue2;~Li_Wang1;~Lu_Zhang16;~Yanjun_Lyu1;~Tianming_Liu3;~Dajiang_Zhu1", "gender": "M;F;F;F;;M;M", "homepage": "http://shawey94.github.io/;;http://www.uta.edu/faculty/wangl3/;https://qidianzl.github.io/;;https://cobweb.cs.uga.edu/~tliu/;https://mentis.uta.edu/explore/profile/dajiang-zhu", "dblp": ";;;;277/7781;96/5013-1.html;https://dblp.uni-trier.de/pers/hd/z/Zhu:Dajiang", "google_scholar": "Kc1FjToAAAAJ;;;XNbe_O4AAAAJ;;92RPXm0AAAAJ;cFgudIYAAAAJ", "orcid": ";;;;;;", "linkedin": "shawey94;yao-xue-a81045138;;;;;", "or_profile": "~Xiaowei_Yu1;~Yao_Xue2;~Li_Wang1;~Lu_Zhang16;~Yanjun_Lyu1;~Tianming_Liu3;~Dajiang_Zhu1", "aff": "University of Texas at Arlington, University of Texas at Arlington;Southern Illinois University-Carbondale;University of Texas, Arlington;;University of Texas at Arlington;University of Georgia;University of Texas at Arlington", "aff_domain": "mavs.uta.edu;siu.edu;uta.edu;;uta.edu;uga.edu;uta.edu", "position": "PhD student;PhD student;Assistant Professor;;PhD student;Professor;Assistant Professor", "bibtex": "@misc{\nyu2023explore,\ntitle={Explore Positive Noise in Deep Learning},\nauthor={Xiaowei Yu and Yao Xue and Li Wang and Lu Zhang and Yanjun Lyu and Tianming Liu and Dajiang Zhu},\nyear={2023},\nurl={https://openreview.net/forum?id=Ce0dDt9tUT}\n}", "github": "", "project": "", "reviewers": "fTiY;93YL;vkXB;tNhi;MHYe", "site": "https://openreview.net/forum?id=Ce0dDt9tUT", "pdf_size": 1013732, "rating": "3;4;5;5;7", "confidence": "2;2;4;4;4", "soundness": "1;2;3;3;3", "novelty": "3;3;3;3;3", "presentation": "2;1;3;2;3", "wc_summary": "228;67;57;65;70", "wc_strengths": "20;36;59;51;84", "wc_weaknesses": "331;521;67;198;29", "wc_questions": "26;7;301;22;60", "wc_limitations": "6;5;1;12;14", "wc_review": "611;636;485;348;257", "wc_reply_reviewers": "326;0;732;133;9", "wc_reply_authors": "537;0;1205;44;0", "reply_reviewers": "2;0;3;1;1", "reply_authors": "3;1;4;2;1", "rating_avg": [ 4.8, 1.32664991614216 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 97.4, 65.44188261350678 ], "wc_strengths_avg": [ 50.0, 21.60555484129024 ], "wc_weaknesses_avg": [ 229.2, 180.48423753890532 ], "wc_questions_avg": [ 83.2, 110.27130179697708 ], "wc_limitations_avg": [ 7.6, 4.758150901348127 ], "wc_review_avg": [ 467.4, 146.888529164125 ], "wc_reply_reviewers_avg": [ 240.0, 272.7159694627361 ], "wc_reply_authors_avg": [ 357.2, 469.9725098343519 ], "reply_reviewers_avg": [ 1.4, 1.019803902718557 ], "reply_authors_avg": [ 2.2, 1.16619037896906 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8000946913656628, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FYYFSEl9kPoJ:scholar.google.com/&scioq=Explore+Positive+Noise+in+Deep+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "University of Texas at Arlington;Southern Illinois University;University of Georgia", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uta.edu;https://www.siu.edu;https://www.uga.edu", "aff_unique_abbr": "UTA;SIU;UGA", "aff_campus_unique_index": "1;2;2;2", "aff_campus_unique": ";Carbondale;Arlington", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "ImageNet-Hard: The Hardest Images Remaining from a Study of the Power of Zoom and Spatial Biases in Image Classification", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73660", "id": "Cf2c9Pk9yF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/706390d6f9208b03bc54f97ac3cfe99e-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Cf2c9Pk9yF", "openreview": "https://openreview.net/forum?id=Cf2c9Pk9yF", "poster": "/media/PosterPDFs/NeurIPS%202023/73660.png?t=1698192629.2121255", "slides": "https://nips.cc/virtual/2023/poster/73660", "video": "https://nips.cc/virtual/2023/poster/73660", "author_site": "Mohammad Reza Taesiri, Giang Nguyen, Sarra Habchi, Cor-Paul Bezemer, Anh Nguyen", "tldr": "", "abstract": "Image classifiers are information-discarding machines, by design. Yet, how these models discard information remains mysterious. We hypothesize that one way for image classifiers to reach high accuracy is to first zoom to the most discriminative region in the image and then extract features from there to predict image labels, discarding the rest of the image. Studying six popular networks ranging from AlexNet to CLIP, we find that proper framing of the input image can lead to the correct classification of 98.91% of ImageNet images. Furthermore, we uncover positional biases in various datasets, especially a strong center bias in two popular datasets: ImageNet-A and ObjectNet. Finally, leveraging our insights into the potential of zooming, we propose a test-time augmentation (TTA) technique that improves classification accuracy by forcing models to explicitly perform zoom-in operations before making predictions.\nOur method is more interpretable, accurate, and faster than MEMO, a state-of-the-art (SOTA) TTA method. We introduce ImageNet-Hard, a new benchmark that challenges SOTA classifiers including large vision-language models even when optimal zooming is allowed.", "keywords": "imagenet-hard;rare;zoom;spatial bias;image classification;new benchmark", "primary_area": "", "supplementary_material": "/attachment/c19b992fdea3f4ed316058cf2903192b18941dc8.zip", "author": "Mohammad Reza Taesiri;Giang Nguyen;Sarra Habchi;Cor-Paul Bezemer;Anh Totti Nguyen", "authorids": "~Mohammad_Reza_Taesiri1;~Giang_Nguyen1;~Sarra_Habchi1;~Cor-Paul_Bezemer1;~Anh_Totti_Nguyen1", "gender": "M;;F;M;M", "homepage": "https://taesiri.ai/;;;https://asgaard.ece.ualberta.ca/;http://anhnguyen.me", "dblp": ";;;95/7251;", "google_scholar": "-egLZy8AAAAJ;;w0dlVlwAAAAJ;8HgcKdoAAAAJ;EQw8d9AAAAAJ", "orcid": "0000-0002-8229-0325;;;;", "linkedin": "taesiri/;;;;", "or_profile": "~Mohammad_Reza_Taesiri1;~Giang_Nguyen1;~Sarra_Habchi1;~Cor-Paul_Bezemer1;~Anh_Totti_Nguyen1", "aff": "University of Alberta;;Ubisoft;University of Alberta;Auburn University", "aff_domain": "ualberta.ca;;ubisoft.com;ualberta.ca;auburn.edu", "position": "PhD student;;Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\ntaesiri2023imagenethard,\ntitle={ImageNet-Hard: The Hardest Images Remaining from a Study of the Power of Zoom and Spatial Biases in Image Classification},\nauthor={Mohammad Reza Taesiri and Giang Nguyen and Sarra Habchi and Cor-Paul Bezemer and Anh Totti Nguyen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Cf2c9Pk9yF}\n}", "github": "", "project": "", "reviewers": "twuE;pbFv;3Lfp;j8Bw;JuE9", "pdf_size": 21363215, "rating": "5;7;7;7;7", "confidence": "3;4;4;4;3", "wc_summary_and_contributions": "56;62;107;76;66", "wc_strengths": "16;91;44;92;15", "wc_improvement": "3;305;180;449;191", "wc_limitations": "361;14;10;1;2", "wc_correctness": "93;1;23;1;3", "wc_clarity": "5;1;5;1;1", "wc_relation_to_prior_work": "45;1;5;1;21", "wc_documentation": "1;10;17;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "581;486;392;623;301", "wc_reply_reviewers": "411;31;0;72;26", "wc_reply_authors": "2905;997;1621;1393;1007", "reply_reviewers": "4;1;0;1;1", "reply_authors": "6;2;3;2;2", "rating_avg": [ 6.6, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 73.4, 18.017769007288333 ], "wc_strengths_avg": [ 51.6, 34.20292385162415 ], "wc_improvement_avg": [ 225.6, 147.67477780582576 ], "wc_limitations_avg": [ 77.6, 141.7837790440077 ], "wc_correctness_avg": [ 24.2, 35.38587288735435 ], "wc_clarity_avg": [ 2.6, 1.9595917942265426 ], "wc_relation_to_prior_work_avg": [ 14.6, 16.8949696655543 ], "wc_documentation_avg": [ 6.0, 6.511528238439882 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 476.6, 118.67029956985867 ], "wc_reply_reviewers_avg": [ 108.0, 153.24620713087813 ], "wc_reply_authors_avg": [ 1584.6, 701.482316241828 ], "reply_reviewers_avg": [ 1.4, 1.3564659966250538 ], "reply_authors_avg": [ 3.0, 1.5491933384829668 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6123724356957948, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13912067025405671236&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "ualberta.ca;;ubisoft.com;ualberta.ca;auburn.edu", "author_num": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Alberta;Ubisoft;Auburn University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ualberta.ca;https://www.ubisoft.com;https://www.auburn.edu", "aff_unique_abbr": "UAlberta;Ubisoft;Auburn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "Canada;France;United States" }, { "title": "PAPR: Proximity Attention Point Rendering", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72419", "id": "CgJJvuLjec", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bda5c35eded86adaf0231748e3ce071c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CgJJvuLjec", "openreview": "https://openreview.net/forum?id=CgJJvuLjec", "poster": "/media/PosterPDFs/NeurIPS%202023/72419.png?t=1702108692.9449623", "slides": "https://nips.cc/virtual/2023/poster/72419", "video": "https://nips.cc/virtual/2023/poster/72419", "author_site": "Yanshu Zhang, Shichong Peng, Alireza Moazeni, Ke Li", "tldr": "", "abstract": "Learning accurate and parsimonious point cloud representations of scene surfaces from scratch remains a challenge in 3D representation learning. Existing point-based methods often suffer from the vanishing gradient problem or require a large number of points to accurately model scene geometry and texture. To address these limitations, we propose Proximity Attention Point Rendering (PAPR), a novel method that consists of a point-based scene representation and a differentiable renderer. Our scene representation uses a point cloud where each point is characterized by its spatial position, influence score, and view-independent feature vector. The renderer selects the relevant points for each ray and produces accurate colours using their associated features. PAPR effectively learns point cloud positions to represent the correct scene geometry, even when the initialization drastically differs from the target geometry. Notably, our method captures fine texture details while using only a parsimonious set of points. We also demonstrate four practical applications of our method: zero-shot geometry editing, object manipulation, texture transfer, and exposure control. More results and code are available on our project website at https://zvict.github.io/papr/.", "keywords": "point cloud learning;point cloud rendering", "primary_area": "", "supplementary_material": "", "author": "Yanshu Zhang;Shichong Peng;Seyed Alireza Moazenipourasil;Ke Li", "authorids": "~Yanshu_Zhang1;~Shichong_Peng1;~Seyed_Alireza_Moazenipourasil1;~Ke_Li1", "gender": "M;M;M;M", "homepage": "https://zvict.github.io/;https://sites.google.com/view/niopeng/home;https://amoazeni75.github.io/;http://www.sfu.ca/~keli/", "dblp": "352/5386;221/4790;;75/6627-11", "google_scholar": ";;KjSsypYAAAAJ;vQc8tI4AAAAJ", "orcid": ";;;", "linkedin": ";;s-alireza-moazeni/;", "or_profile": "~Yanshu_Zhang1;~Shichong_Peng1;~Seyed_Alireza_Moazenipourasil1;~Ke_Li1", "aff": "Simon Fraser University;Simon Fraser University;Simon Fraser University;Simon Fraser University", "aff_domain": "sfu.ca;sfu.ca;sfu.ca;sfu.ca", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023papr,\ntitle={{PAPR}: Proximity Attention Point Rendering},\nauthor={Yanshu Zhang and Shichong Peng and Seyed Alireza Moazenipourasil and Ke Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CgJJvuLjec}\n}", "github": "", "project": "", "reviewers": "U5yB;dttq;QEnw;TJqY;fb1M", "pdf_size": 12765619, "rating": "4;6;7;8;8", "confidence": "4;3;4;4;5", "soundness": "3;3;2;4;4", "novelty": "3;3;2;4;3", "presentation": "3;3;3;4;4", "wc_summary": "81;52;61;98;81", "wc_strengths": "121;26;23;211;97", "wc_weaknesses": "287;265;468;358;139", "wc_questions": "253;46;2;18;66", "wc_limitations": "1;15;9;12;14", "wc_review": "743;404;563;697;397", "wc_reply_reviewers": "0;22;47;80;34", "wc_reply_authors": "98;33;41;55;31", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.6, 1.4966629547095764 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 74.6, 16.280049139974977 ], "wc_strengths_avg": [ 95.6, 69.39625350117973 ], "wc_weaknesses_avg": [ 303.4, 108.48520636473896 ], "wc_questions_avg": [ 77.0, 90.73477833774655 ], "wc_limitations_avg": [ 10.2, 5.035871324805669 ], "wc_review_avg": [ 560.8, 143.64456133108555 ], "wc_reply_reviewers_avg": [ 36.6, 26.650328328183875 ], "wc_reply_authors_avg": [ 51.6, 24.686838598735157 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.42257712736425823, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8012170504395427613&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "sfu.ca;sfu.ca;sfu.ca;sfu.ca", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Simon Fraser University", "aff_unique_dep": "", "aff_unique_url": "https://www.sfu.ca", "aff_unique_abbr": "SFU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "When Visual Prompt Tuning Meets Source-Free Domain Adaptive Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72418", "id": "ChGGbmTNgE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/157c30da6a988e1cbef2095f7b9521db-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ChGGbmTNgE", "openreview": "https://openreview.net/forum?id=ChGGbmTNgE", "poster": "/media/PosterPDFs/NeurIPS%202023/72418.png?t=1699528764.785021", "slides": "https://nips.cc/virtual/2023/poster/72418", "video": "https://nips.cc/virtual/2023/poster/72418", "author_site": "Xinhong Ma, Yiming Wang, Hao Liu, Tianyu Guo, Yunhe Wang", "tldr": "", "abstract": "Source-free domain adaptive semantic segmentation aims to adapt a pre-trained source model to the unlabeled target domain \nwithout accessing the private source data. Previous methods usually fine-tune the entire network, which suffers from expensive parameter tuning. To avoid this problem, we propose to utilize visual prompt tuning for parameter-efficient adaptation. However, the existing visual prompt tuning methods are unsuitable for source-free domain adaptive semantic segmentation due to the following two reasons: (1) Commonly used visual prompts like input tokens or pixel-level perturbations cannot reliably learn informative knowledge beneficial for semantic segmentation. (2) Visual prompts require sufficient labeled data to fill the gap between the pre-trained model and downstream tasks. To alleviate these problems, we propose a universal unsupervised visual prompt tuning (Uni-UVPT) framework, which is applicable to various transformer-based backbones. Specifically, we first divide the source pre-trained backbone with frozen parameters into multiple stages, and propose a lightweight prompt adapter for progressively encoding informative knowledge into prompts and enhancing the generalization of target features between adjacent backbone stages. Cooperatively, a novel adaptive pseudo-label correction strategy with a multiscale consistency loss is designed to alleviate the negative effect of target samples with noisy pseudo labels and raise the capacity of visual prompts to spatial perturbations. Extensive experiments demonstrate that Uni-UVPT achieves state-of-the-art performance on GTA5 $\\to$ Cityscapes and SYNTHIA $\\to$ Cityscapes tasks and can serve as a universal and parameter-efficient framework for large-model unsupervised knowledge transfer. Code will be available at https://gitee.com/mindspore/models/tree/master/research/cv/uni-uvpt and https://github.com/huawei-noah/noah-research/tree/master/uni-uvpt.", "keywords": "unsupervised domain adaptation;semantic segmentation;visual prompt tuning", "primary_area": "", "supplementary_material": "/attachment/8466c85537423c8a43b36b66930fda3146de0985.pdf", "author": "Xinhong Ma;Yiming Wang;Hao Liu;Tianyu Guo;Yunhe Wang", "authorids": "~Xinhong_Ma2;~Yiming_Wang12;~Hao_Liu33;~Tianyu_Guo1;~Yunhe_Wang1", "gender": "M;M;M;M;M", "homepage": "https://github.com/EamonWang;https://github.com/ChanghaoLau;;https://www.wangyunhe.site/;", "dblp": ";;218/7273;63/8217-1;https://dblp.uni-trier.de/pid/248/0802", "google_scholar": ";;RPK3oQgAAAAJ;https://scholar.google.com.sg/citations?user=isizOkYAAAAJ;a44XUMQAAAAJ", "orcid": ";;;0000-0002-0142-509X;", "linkedin": ";;;;", "or_profile": "~Yiming_Wang12;~Hao_Liu33;~Tianyu_Guo1;~Yunhe_Wang1;~Ma_Xinhong1", "aff": "Noah's Ark Lab, Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Noah's Ark Lab;Huawei Noah's Ark Lab", "aff_domain": "huawei.com;huawei.com;huawei.com;huawei.com;huawei.com", "position": "Researcher;Researcher;Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nma2023when,\ntitle={When Visual Prompt Tuning Meets Source-Free Domain Adaptive Semantic Segmentation},\nauthor={Xinhong Ma and Yiming Wang and Hao Liu and Tianyu Guo and Yunhe Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ChGGbmTNgE}\n}", "github": "", "project": "", "reviewers": "HKNc;Ytkx;zvBk;bcKo", "pdf_size": 9499081, "rating": "4;4;6;7", "confidence": "4;5;4;5", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "36;76;87;85", "wc_strengths": "56;84;40;368", "wc_weaknesses": "112;197;43;50", "wc_questions": "79;4;33;5", "wc_limitations": "48;1;6;1", "wc_review": "331;362;209;509", "wc_reply_reviewers": "187;253;44;0", "wc_reply_authors": "333;159;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;1;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 71.0, 20.627651344736268 ], "wc_strengths_avg": [ 137.0, 134.29445260322558 ], "wc_weaknesses_avg": [ 100.5, 61.84860548144962 ], "wc_questions_avg": [ 30.25, 30.457962834043908 ], "wc_limitations_avg": [ 14.0, 19.73575435599055 ], "wc_review_avg": [ 352.75, 106.81379826595439 ], "wc_reply_reviewers_avg": [ 121.0, 102.89557813628339 ], "wc_reply_authors_avg": [ 123.0, 137.52636110942512 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11464554454189990593&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "huawei.com;huawei.com;huawei.com;huawei.com;huawei.com", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Noah's Ark Lab", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Benchmarking Robustness to Adversarial Image Obfuscations", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73659", "id": "CiRHWaRbp0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/85c123f6da0fa159eb249e6a2e171903-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=CiRHWaRbp0", "openreview": "https://openreview.net/forum?id=CiRHWaRbp0", "poster": "/media/PosterPDFs/NeurIPS%202023/73659.png?t=1701903559.5158873", "slides": "https://nips.cc/virtual/2023/poster/73659", "video": "https://nips.cc/virtual/2023/poster/73659", "author_site": "Florian Stimberg, Ayan Chakrabarti, Chun-Ta Lu, Hussein Hazimeh, Otilia Stretcu, Wei Qiao, Yintao Liu, Merve Kaya, Cyrus Rashtchian, Ariel Fuxman, Mehmet Tek, Sven Gowal", "tldr": "", "abstract": "Automated content filtering and moderation is an important tool that allows online platforms to build striving user communities that facilitate cooperation and prevent abuse. Unfortunately, resourceful actors try to bypass automated filters in a bid to post content that violate platform policies and codes of conduct. To reach this goal, these malicious actors may obfuscate policy violating images (e.g., overlay harmful images by carefully selected benign images or visual patterns) to prevent machine learning models from reaching the correct decision. In this paper, we invite researchers to tackle this specific issue and present a new image benchmark. This benchmark, based on ImageNet, simulates the type of obfuscations created by malicious actors. It goes beyond Image-Net-C and ImageNet-C-bar by proposing general, drastic, adversarial modifications that preserve the original content intent. It aims to tackle a more common adversarial threat than the one considered by lp-norm bounded adversaries. We evaluate 33 pretrained models on the benchmark and train models with different augmentations, architectures and training methods on subsets of the obfuscations to measure generalization. Our hope is that this benchmark will encourage researchers to test their models and methods and try to find new approaches that are more robust to these obfuscations.", "keywords": "obfuscation;benchmark;imagenet;adversarial", "primary_area": "", "supplementary_material": "", "author": "Florian Stimberg;Ayan Chakrabarti;Chun-Ta Lu;Hussein Hazimeh;Otilia Stretcu;Wei Qiao;Yintao Liu;Merve Kaya;Cyrus Rashtchian;Ariel Fuxman;Mehmet Nejat Tek;Sven Gowal", "authorids": "~Florian_Stimberg1;~Ayan_Chakrabarti1;~Chun-Ta_Lu1;~Hussein_Hazimeh1;~Otilia_Stretcu1;~Wei_Qiao2;~Yintao_Liu1;~Merve_Kaya1;~Cyrus_Rashtchian1;~Ariel_Fuxman1;~Mehmet_Nejat_Tek1;~Sven_Gowal2", "gender": "M;M;M;;F;M;;;M;Not Specified;M;M", "homepage": ";https://projects.ayanc.org/;;http://www.mit.edu/~hazimeh;https://otiliastr.github.io/;;;;http://www.cyrusrashtchian.com;;;", "dblp": "57/11107;68/5758;96/9436;165/0820-1;179/6151;;49/426.html;28/7978;69/8610;90/1367;;75/8368", "google_scholar": "https://scholar.google.com/citations?hl=en;0v5utcwAAAAJ;05CGvyAAAAAJ;;UcT4CXoAAAAJ;;;;OtgZrhUAAAAJ;pyzFbV0AAAAJ;JxxGs_YAAAAJ;", "orcid": ";0000-0002-4843-740X;;0000-0003-4501-0678;0000-0001-7141-2916;;;;;;;", "linkedin": ";ayan-chakrabarti/;;;otiliastr;wayneqiao/;;emkaya;;ariel-fuxman-b821984;mehmettek/;", "or_profile": "~Florian_Stimberg1;~Ayan_Chakrabarti1;~Chun-Ta_Lu1;~Hussein_Hazimeh1;~Otilia_Stretcu1;~Wei_Qiao2;~Yintao_Liu1;~Merve_Kaya1;~Cyrus_Rashtchian1;~Ariel_Fuxman1;~Mehmet_Nejat_Tek1;~Sven_Gowal1", "aff": "Google DeepMind;Google;Google;Google;Google;Google;Google LLC;Google;Google Research;Google;Google;Google DeepMind", "aff_domain": "deepmind.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "position": "Researcher;Research Scientist;Researcher;Research Scientist;Research Scientist;Software engineer;Software Engineer;Research Scientist;Researcher;Researcher;Principal Researcher;Research Engineer", "bibtex": "@inproceedings{\nstimberg2023benchmarking,\ntitle={Benchmarking Robustness to Adversarial Image Obfuscations},\nauthor={Florian Stimberg and Ayan Chakrabarti and Chun-Ta Lu and Hussein Hazimeh and Otilia Stretcu and Wei Qiao and Yintao Liu and Merve Kaya and Cyrus Rashtchian and Ariel Fuxman and Mehmet Nejat Tek and Sven Gowal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=CiRHWaRbp0}\n}", "github": "", "project": "", "reviewers": "Y2nL;nVKg;jWzW;hqY3;W7ys", "pdf_size": 3785395, "rating": "5;7;7;8;8", "confidence": "3;5;4;4;4", "wc_summary_and_contributions": "56;75;100;150;92", "wc_strengths": "48;148;39;64;71", "wc_improvement": "194;223;196;81;85", "wc_limitations": "5;13;37;23;17", "wc_correctness": "8;19;7;14;16", "wc_clarity": "8;23;12;1;9", "wc_relation_to_prior_work": "34;43;14;1;135", "wc_documentation": "5;33;2;1;23", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "359;578;408;336;449", "wc_reply_reviewers": "0;234;21;0;59", "wc_reply_authors": "515;348;290;166;223", "reply_reviewers": "0;1;1;0;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 7.0, 1.0954451150103321 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 94.6, 31.556932677305635 ], "wc_strengths_avg": [ 74.0, 38.69366873275265 ], "wc_improvement_avg": [ 155.8, 60.330423502574554 ], "wc_limitations_avg": [ 19.0, 10.73312629199899 ], "wc_correctness_avg": [ 12.8, 4.621688003316537 ], "wc_clarity_avg": [ 10.6, 7.172168430816443 ], "wc_relation_to_prior_work_avg": [ 45.4, 47.16184898835074 ], "wc_documentation_avg": [ 12.8, 12.874781551544865 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 426.0, 85.49385942861628 ], "wc_reply_reviewers_avg": [ 62.8, 88.27094652262429 ], "wc_reply_authors_avg": [ 308.4, 120.1342582280342 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.5773502691896258, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12120416487638792843&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 8, "email": "deepmind.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "author_num": 12, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1;1;1;1;1;1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1;1;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "When Do Neural Nets Outperform Boosted Trees on Tabular Data?", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73658", "id": "CjVdXey4zT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f06d5ebd4ff40b40dd97e30cee632123-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=CjVdXey4zT", "openreview": "https://openreview.net/forum?id=CjVdXey4zT", "poster": "/media/PosterPDFs/NeurIPS%202023/73658.png?t=1702274686.1162755", "slides": "https://nips.cc/virtual/2023/poster/73658", "video": "https://nips.cc/virtual/2023/poster/73658", "author_site": "Duncan McElfresh, Sujay Khandagale, Jonathan Valverde, Vishak Prasad C, Ganesh Ramakrishnan, Micah Goldblum, Colin White", "tldr": "", "abstract": "Tabular data is one of the most commonly used types of data in machine learning. Despite recent advances in neural nets (NNs) for tabular data, there is still an active discussion on whether or not NNs generally outperform gradient-boosted decision trees (GBDTs) on tabular data, with several recent works arguing either that GBDTs consistently outperform NNs on tabular data, or vice versa. In this work, we take a step back and question the importance of this debate. To this end, we conduct the largest tabular data analysis to date, comparing 19 algorithms across 176 datasets, and we find that the 'NN vs. GBDT' debate is overemphasized: for a surprisingly high number of datasets, either the performance difference between GBDTs and NNs is negligible, or light hyperparameter tuning on a GBDT is more important than choosing between NNs and GBDTs. Next, we analyze dozens of metafeatures to determine what \\emph{properties} of a dataset make NNs or GBDTs better-suited to perform well. For example, we find that GBDTs are much better than NNs at handling skewed or heavy-tailed feature distributions and other forms of dataset irregularities. Our insights act as a guide for practitioners to determine which techniques may work best on their dataset. Finally, with the goal of accelerating tabular data research, we release the TabZilla Benchmark Suite: a collection of the 36 'hardest' of the datasets we study. Our benchmark suite, codebase, and all raw results are available at https://github.com/naszilla/tabzilla.", "keywords": "tabular data", "primary_area": "", "supplementary_material": "", "author": "Duncan C. McElfresh;Sujay Khandagale;Jonathan Valverde;Vishak Prasad C;Ganesh Ramakrishnan;Micah Goldblum;Colin White", "authorids": "~Duncan_C._McElfresh1;~Sujay_Khandagale1;~Jonathan_Valverde1;~Vishak_Prasad_C1;~Ganesh_Ramakrishnan1;~Micah_Goldblum1;~Colin_White1", "gender": "M;M;;M;M;;M", "homepage": "https://duncanmcelfresh.github.io/;https://suj97.github.io/;https://jonathan-valverde-l.github.io/;;https://www.cse.iitb.ac.in/~ganesh/;;https://crwhite.ml/", "dblp": "195/6302;;;279/3692.html;r/GaneshRamakrishnan;241/7231;136/9162", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.fi/citations?user=7fwPm3wAAAAJ;;OVvf2HQAAAAJ;https://scholar.google.com/scholar?hl=hi;pGDKzuUAAAAJ;LS6HY-gAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;vishak-prasad-777891a3/;;;", "or_profile": "~Duncan_C._McElfresh1;~Sujay_Khandagale1;~Jonathan_Valverde1;~Vishak_Prasad_C1;~Ganesh_Ramakrishnan1;~Micah_Goldblum1;~Colin_White1", "aff": "Stanford University;;;Indian Institute of Technology Bombay, Indian Institute of Technology, Bombay;Indian Institute of Technology Bombay, Indian Institute of Technology Bombay;New York University;Abacus.AI", "aff_domain": "stanford.edu;;;cse.iitb.ac.in;cse.iitb.ac.in;nyu.edu;abacus.ai", "position": "Postdoc;;;PhD student;Full Professor;Postdoc;Head of Research", "bibtex": "@inproceedings{\nmcelfresh2023when,\ntitle={When Do Neural Nets Outperform Boosted Trees on Tabular Data?},\nauthor={Duncan C. McElfresh and Sujay Khandagale and Jonathan Valverde and Vishak Prasad C and Ganesh Ramakrishnan and Micah Goldblum and Colin White},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=CjVdXey4zT}\n}", "github": "", "project": "", "reviewers": "7j2u;PPEn;kSbf;c2ni;WtT2", "pdf_size": 685009, "rating": "6;6;7;7;8", "confidence": "4;4;4;3;3", "wc_summary_and_contributions": "207;150;220;124;158", "wc_strengths": "54;102;184;50;98", "wc_improvement": "642;98;76;143;221", "wc_limitations": "34;1;1;47;154", "wc_correctness": "23;1;1;67;123", "wc_clarity": "11;9;1;11;8", "wc_relation_to_prior_work": "45;1;1;53;8", "wc_documentation": "6;1;25;17;17", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "1023;364;510;513;788", "wc_reply_reviewers": "650;72;16;33;124", "wc_reply_authors": "1907;819;557;751;1264", "reply_reviewers": "2;1;1;1;1", "reply_authors": "3;2;1;1;3", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 171.8, 36.090996107062495 ], "wc_strengths_avg": [ 97.6, 48.27255949294589 ], "wc_improvement_avg": [ 236.0, 208.9755966614284 ], "wc_limitations_avg": [ 47.4, 56.301332133440674 ], "wc_correctness_avg": [ 43.0, 46.69903639262806 ], "wc_clarity_avg": [ 8.0, 3.687817782917155 ], "wc_relation_to_prior_work_avg": [ 21.6, 22.659214461229677 ], "wc_documentation_avg": [ 13.2, 8.588364221433554 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 639.6, 235.71050040250643 ], "wc_reply_reviewers_avg": [ 179.0, 238.41140912296962 ], "wc_reply_authors_avg": [ 1059.6, 482.82402591420407 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.8944271909999159 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7637626158259733, "gs_citation": 176, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17597516083673528904&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "stanford.edu;;;cse.iitb.ac.in;cse.iitb.ac.in;nyu.edu;abacus.ai", "author_num": 7, "aff_unique_index": "0;1;1;2;3", "aff_unique_norm": "Stanford University;Indian Institute of Technology Bombay;New York University;Abacus.AI", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.stanford.edu;https://www.iitb.ac.in;https://www.nyu.edu;https://www.abacus.ai", "aff_unique_abbr": "Stanford;IIT Bombay;NYU;Abacus.AI", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Stanford;Bombay;", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United States;India" }, { "title": "Reusable Slotwise Mechanisms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72417", "id": "CniUitfEY3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/49ff6951ef47bc9bab276a31a965528e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CniUitfEY3", "openreview": "https://openreview.net/forum?id=CniUitfEY3", "poster": "/media/PosterPDFs/NeurIPS%202023/72417.png?t=1700048220.6616852", "slides": "https://nips.cc/virtual/2023/poster/72417", "video": "https://nips.cc/virtual/2023/poster/72417", "author_site": "Trang Nguyen, Amin Mansouri, Kanika Madan, Khuong Duy Nguyen, Kartik Ahuja, Dianbo Liu, Yoshua Bengio", "tldr": "", "abstract": "Agents with the ability to comprehend and reason about the dynamics of objects would be expected to exhibit improved robustness and generalization in novel scenarios. However, achieving this capability necessitates not only an effective scene representation but also an understanding of the mechanisms governing interactions among object subsets. Recent studies have made significant progress in representing scenes using object slots. In this work, we introduce Reusable Slotwise Mechanisms, or RSM, a framework that models object dynamics by leveraging communication among slots along with a modular architecture capable of dynamically selecting reusable mechanisms for predicting the future states of each object slot. Crucially, RSM leverages the Central Contextual Information (CCI), enabling selected mechanisms to access the remaining slots through a bottleneck, effectively allowing for modeling of higher order and complex interactions that might require a sparse subset of objects. Experimental results demonstrate the superior performance of RSM compared to state-of-the-art methods across various future prediction and related downstream tasks, including Visual Question Answering and action planning. Furthermore, we showcase RSM\u2019s Out-of-Distribution generalization ability to handle scenes in intricate scenarios.", "keywords": "Out-of-Distribution Generalization;Slotwise;Visual Reasoning;Video Prediction;Reusable Mechanism;Dynamics modeling", "primary_area": "", "supplementary_material": "/attachment/8013d36201f45484ad31280525fd9f2b3a67bdab.pdf", "author": "Trang Nguyen;Amin Mansouri;Kanika Madan;Nguyen Duy Khuong;Kartik Ahuja;Dianbo Liu;Yoshua Bengio", "authorids": "~Trang_Nguyen1;~Amin_Mansouri1;~Kanika_Madan3;~Nguyen_Duy_Khuong1;~Kartik_Ahuja1;~Dianbo_Liu2;~Yoshua_Bengio1", "gender": "F;Not Specified;;M;;;M", "homepage": "https://baileytrang.github.io/;https://mila.quebec/en/person/amin-mansouri/;;https://khuongnd.github.io/;;;http://yoshuabengio.org", "dblp": "290/5980;340/7700;;;;;56/953", "google_scholar": "-yVY9T4AAAAJ;9aNdCL4AAAAJ;;vAOT46YAAAAJ;;;kukA0LcAAAAJ", "orcid": ";;;;;;", "linkedin": "baileytrang/;amansouri3476?original_referer=;;;;;yoshuabengio/?originalSubdomain=ca", "or_profile": "~Trang_Nguyen1;~Amin_Mansouri1;~Kanika_Madan3;~Nguyen_Duy_Khuong1;~Kartik_Ahuja1;~Dianbo_Liu2;~Yoshua_Bengio1", "aff": "Mila Institute;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;;FPT Software Ltd. - FPT Corporation;;;University of Montreal", "aff_domain": "mila.quebec;mila.umontreal.ca;;fpt-software.com;;;umontreal.ca", "position": "Intern;MS student;;Researcher;;;Full Professor", "bibtex": "@inproceedings{\nnguyen2023reusable,\ntitle={Reusable Slotwise Mechanisms},\nauthor={Trang Nguyen and Amin Mansouri and Kanika Madan and Nguyen Duy Khuong and Kartik Ahuja and Dianbo Liu and Yoshua Bengio},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CniUitfEY3}\n}", "github": "", "project": "", "reviewers": "8CU7;NsYp;DY7c;TEgG", "pdf_size": 9424896, "rating": "6;6;6;7", "confidence": "3;4;2;4", "soundness": "2;3;3;2", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "115;55;51;140", "wc_strengths": "180;35;40;80", "wc_weaknesses": "265;493;73;408", "wc_questions": "139;2;2;140", "wc_limitations": "49;1;2;19", "wc_review": "748;586;168;787", "wc_reply_reviewers": "171;216;0;72", "wc_reply_authors": "22;38;0;19", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 90.25, 38.310409812477864 ], "wc_strengths_avg": [ 83.75, 58.24248878610872 ], "wc_weaknesses_avg": [ 309.75, 159.1278966743418 ], "wc_questions_avg": [ 70.75, 68.75090908489865 ], "wc_limitations_avg": [ 17.75, 19.40843888621648 ], "wc_review_avg": [ 572.25, 245.26146762180153 ], "wc_reply_reviewers_avg": [ 114.75, 84.2774436014762 ], "wc_reply_authors_avg": [ 19.75, 13.497684986693088 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16202307131268505613&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 9, "email": "mila.quebec;mila.umontreal.ca;;fpt-software.com;;;umontreal.ca", "author_num": 7, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Mila Institute for Quantum Computing;University of Montreal;FPT Corporation", "aff_unique_dep": ";Montreal Institute for Learning Algorithms;", "aff_unique_url": "https://mila.quebec;https://www.mila.quebec;https://www.fpt-software.com", "aff_unique_abbr": "Mila;MILA;FPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Canada;Vietnam" }, { "title": "Newton\u2013Cotes Graph Neural Networks: On the Time Evolution of Dynamic Systems", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72416", "id": "CnvZ7FIyAD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/43e8fd8b9581faa71a6a61602bc28435-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CnvZ7FIyAD", "openreview": "https://openreview.net/forum?id=CnvZ7FIyAD", "poster": "/media/PosterPDFs/NeurIPS%202023/72416.png?t=1697712116.0855134", "slides": "https://nips.cc/virtual/2023/poster/72416", "video": "https://nips.cc/virtual/2023/poster/72416", "author_site": "Lingbing Guo, Weiqing Wang, Zhuo Chen, Ningyu Zhang, Zequn Sun, Yixuan Lai, Qiang Zhang, Huajun Chen", "tldr": "", "abstract": "Reasoning system dynamics is one of the most important analytical approaches for many scientific studies. With the initial state of a system as input, the recent graph neural networks (GNNs)-based methods are capable of predicting the future state distant in time with high accuracy. Although these methods have diverse designs in modeling the coordinates and interacting forces of the system, we show that they actually share a common paradigm that learns the integration of the velocity over the interval between the initial and terminal coordinates. However, their integrand is constant w.r.t. time. Inspired by this observation, we propose a new approach to predict the integration based on several velocity estimations with Newton\u2013Cotes formulas and prove its effectiveness theoretically. Extensive experiments on several benchmarks empirically demonstrate consistent and significant improvement compared with the state-of-the-art methods.", "keywords": "Equivariant Graph Neural Networks;Molecular Dynamics;N-body System;Human Motion", "primary_area": "", "supplementary_material": "/attachment/a2bc94fd027019bdccf1145a28dc25f69addaf29.zip", "author": "Lingbing Guo;Weiqing Wang;Zhuo Chen;Ningyu Zhang;Zequn Sun;Yixuan Lai;Qiang Zhang;Huajun Chen", "authorids": "~Lingbing_Guo1;~Weiqing_Wang2;~Zhuo_Chen3;~Ningyu_Zhang1;~Zequn_Sun1;~Yixuan_Lai1;~Qiang_Zhang6;~Huajun_Chen1", "gender": "M;F;;M;M;F;;M", "homepage": "https://guolingbing.github.io/;https://scholar.google.com/citations?user=jCziD10AAAAJ&hl=en;;https://person.zju.edu.cn/en/ningyu;https://sunzequn.github.io;;https://qiangairesearcher.github.io;", "dblp": "228/2586;57/3195-1;;139/4181-1.html;186/9718;;72/3527-26;94/5089", "google_scholar": "og4v8cMAAAAJ;jCziD10AAAAJ;;xQDOPvsAAAAJ;ph8SU3EAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";0000-0002-9578-819X;;0000-0002-1970-0678;;;;", "linkedin": ";;;ningyuzhang/;;https://www.linkedin.cn/incareer/in/yixuan-lai-82a749226;;", "or_profile": "~Lingbing_Guo1;~Weiqing_Wang2;~Zhuo_Chen3;~Ningyu_Zhang1;~Zequn_Sun1;~Yixuan_Lai1;~Qiang_Zhang6;~Huajun_Chen1", "aff": "Zhejiang University;Monash University;;Zhejiang University;Nanjing University;College of Computer Science and Technology, Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;monash.edu;;zju.edu.cn;nju.edu.cn;cs.zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;Lecturer;;Associate Professor;PhD student;MS student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nguo2023newtoncotes,\ntitle={Newton{\\textendash}Cotes Graph Neural Networks: On the Time Evolution of Dynamic Systems},\nauthor={Lingbing Guo and Weiqing Wang and Zhuo Chen and Ningyu Zhang and Zequn Sun and Yixuan Lai and Qiang Zhang and Huajun Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CnvZ7FIyAD}\n}", "github": "", "project": "", "reviewers": "9Zk2;P4zh;Gu3W;MNf4", "pdf_size": 2589619, "rating": "4;6;8;8", "confidence": "4;3;3;4", "soundness": "2;4;4;3", "novelty": "1;3;4;3", "presentation": "2;3;3;3", "wc_summary": "51;81;112;103", "wc_strengths": "16;106;89;59", "wc_weaknesses": "143;66;214;76", "wc_questions": "47;38;49;214", "wc_limitations": "22;9;8;10", "wc_review": "279;300;472;462", "wc_reply_reviewers": "117;16;452;64", "wc_reply_authors": "230;28;555;40", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.5, 1.6583123951777 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 86.75, 23.519938350259338 ], "wc_strengths_avg": [ 67.5, 34.16504061171302 ], "wc_weaknesses_avg": [ 124.75, 59.428002658679354 ], "wc_questions_avg": [ 87.0, 73.4404520683254 ], "wc_limitations_avg": [ 12.25, 5.673402858955108 ], "wc_review_avg": [ 378.25, 89.13017165920864 ], "wc_reply_reviewers_avg": [ 162.25, 171.0590175933441 ], "wc_reply_authors_avg": [ 213.25, 212.9593564509435 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14695404833112776705&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": "zju.edu.cn;monash.edu;;zju.edu.cn;nju.edu.cn;cs.zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;2;0;0;0", "aff_unique_norm": "Zhejiang University;Monash University;Nanjing University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.monash.edu;https://www.nju.edu.cn", "aff_unique_abbr": "ZJU;Monash;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "China;Australia" }, { "title": "CARE-MI: Chinese Benchmark for Misinformation Evaluation in Maternity and Infant Care", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73657", "id": "CpFFRtxcbz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/84062fe53d23e0791c6dbb456783e4a9-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=CpFFRtxcbz", "openreview": "https://openreview.net/forum?id=CpFFRtxcbz", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73657", "video": "https://nips.cc/virtual/2023/poster/73657", "author_site": "Tong Xiang, Liangzhi Li, Wangyue Li, Mingbai Bai, Lu Wei, Bowen Wang, Noa Garcia", "tldr": "", "abstract": "The recent advances in natural language processing (NLP), have led to a new trend of applying large language models (LLMs) to real-world scenarios. While the latest LLMs are astonishingly fluent when interacting with humans, they suffer from the misinformation problem by unintentionally generating factually false statements. This can lead to harmful consequences, especially when produced within sensitive contexts, such as healthcare. Yet few previous works have focused on evaluating misinformation in the long-form (LF) generation of LLMs, especially for knowledge-intensive topics. Moreover, although LLMs have been shown to perform well in different languages, misinformation evaluation has been mostly conducted in English. To this end, we present a benchmark, CARE-MI, for evaluating LLM misinformation in: 1) a sensitive topic, specifically the maternity and infant care domain; and 2) a language other than English, namely Chinese. Most importantly, we provide an innovative paradigm for building LF generation evaluation benchmarks that can be transferred to other knowledge-intensive domains and low-resourced languages. Our proposed benchmark fills the gap between the extensive usage of LLMs and the lack of datasets for assessing the misinformation generated by these models. It contains 1,612 expert-checked questions, accompanied with human-selected references. Using our benchmark, we conduct extensive experiments and found that current Chinese LLMs are far from perfect in the topic of maternity and infant care. In an effort to minimize the reliance on human resources for performance evaluation, we offer off-the-shelf judgment models for automatically assessing the LF output of LLMs given benchmark questions. Moreover, we compare potential solutions for LF generation evaluation and provide insights for building better automated metrics.", "keywords": "Benchmark;NLP;Large Language Model;Evaluation;Medical Domain;Text Generation", "primary_area": "", "supplementary_material": "", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nxiang2023caremi,\ntitle={{CARE}-{MI}: Chinese Benchmark for Misinformation Evaluation in Maternity and Infant Care},\nauthor={Tong Xiang and Liangzhi Li and Wangyue Li and Mingbai Bai and Lu Wei and Bowen Wang and Noa Garcia},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=CpFFRtxcbz}\n}", "github": "", "project": "", "reviewers": "w6yk;dFCS;5eH2;aQxk", "pdf_size": 837753, "rating": "5;6;6;7", "confidence": "4;3;3;4", "wc_summary_and_contributions": "80;56;80;68", "wc_strengths": "38;74;56;57", "wc_improvement": "386;308;412;104", "wc_limitations": "37;12;3;69", "wc_correctness": "33;7;2;10", "wc_clarity": "84;52;18;10", "wc_relation_to_prior_work": "7;18;84;56", "wc_documentation": "9;11;229;24", "wc_additional_feedback": "1;1;1;1", "wc_review": "675;539;885;399", "wc_reply_reviewers": "54;0;32;4", "wc_reply_authors": "637;430;2038;61", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;3;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 71.0, 9.9498743710662 ], "wc_strengths_avg": [ 56.25, 12.735285626950029 ], "wc_improvement_avg": [ 302.5, 120.82528708842368 ], "wc_limitations_avg": [ 30.25, 25.606395685453272 ], "wc_correctness_avg": [ 13.0, 11.895377253370318 ], "wc_clarity_avg": [ 41.0, 29.410882339705484 ], "wc_relation_to_prior_work_avg": [ 41.25, 30.65432269680738 ], "wc_documentation_avg": [ 68.25, 92.98756637314474 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 624.5, 179.2839925927577 ], "wc_reply_reviewers_avg": [ 22.5, 21.97157254271983 ], "wc_reply_authors_avg": [ 791.5, 748.6562963069234 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1527636434330311250&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "", "author_num": 1 }, { "title": "HiNeRV: Video Compression with Hierarchical Encoding-based Neural Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72415", "id": "CpoS56pYnU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e5dc475c370ff42f2f96dddf8191a40c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CpoS56pYnU", "openreview": "https://openreview.net/forum?id=CpoS56pYnU", "poster": "/media/PosterPDFs/NeurIPS%202023/72415.png?t=1702058847.2823508", "slides": "https://nips.cc/virtual/2023/poster/72415", "video": "https://nips.cc/virtual/2023/poster/72415", "author_site": "Ho Man Kwan, Ge Gao, Fan Zhang, Andrew Gower, David Bull", "tldr": "", "abstract": "Learning-based video compression is currently a popular research topic, offering the potential to compete with conventional standard video codecs. In this context, Implicit Neural Representations (INRs) have previously been used to represent and compress image and video content, demonstrating relatively high decoding speed compared to other methods. However, existing INR-based methods have failed to deliver rate quality performance comparable with the state of the art in video compression. This is mainly due to the simplicity of the employed network architectures, which limit their representation capability. In this paper, we propose HiNeRV, an INR that combines light weight layers with novel hierarchical positional encodings. We employs depth-wise convolutional, MLP and interpolation layers to build the deep and wide network architecture with high capacity. HiNeRV is also a unified representation encoding videos in both frames and patches at the same time, which offers higher performance and flexibility than existing methods. We further build a video codec based on HiNeRV and a refined pipeline for training, pruning and quantization that can better preserve HiNeRV's performance during lossy model compression. The proposed method has been evaluated on both UVG and MCL-JCV datasets for video compression, demonstrating significant improvement over all existing INRs baselines and competitive performance when compared to learning-based codecs (72.3\\% overall bit rate saving over HNeRV and 43.4\\% over DCVC on the UVG dataset, measured in PSNR).", "keywords": "Video compression;Implicit neural representations", "primary_area": "", "supplementary_material": "/attachment/27a218057eb0fa06e237651b7b79d2c39ee558d7.pdf", "author": "Ho Man Kwan;Ge Gao;Fan Zhang;Andy Gower;David Bull", "authorids": "~Ho_Man_Kwan1;~Ge_Gao8;~Fan_Zhang6;andrew.p.gower@bt.com;~David_Bull1", "gender": "M;M;M;;M", "homepage": ";;https://fan-aaron-zhang.github.io/;;https://david-bull.github.io/", "dblp": "325/4462;;21/3626-17;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-TW;j2_80ewAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.co.uk/citations?hl=en", "orcid": "0000-0002-8283-4513;;0000-0001-6623-9936;;0000-0001-7634-190X", "linkedin": ";;fan-zhang-b32ba430/;;dave-bull-968b756/", "or_profile": "~Ho_Man_Kwan1;~Ge_Gao8;~Fan_Zhang6;andrew.p.gower@bt.com;~David_Bull1", "aff": "University of Bristol;University of Bristol;University of Bristol;;University of Bristol", "aff_domain": "bristol.ac.uk;bristol.ac.uk;bristol.ac.uk;;bristol.ac.uk", "position": "PhD student;Researcher;Lecturer;;Full Professor", "bibtex": "@inproceedings{\nkwan2023hinerv,\ntitle={HiNe{RV}: Video Compression with Hierarchical Encoding-based Neural Representation},\nauthor={Ho Man Kwan and Ge Gao and Fan Zhang and Andy Gower and David Bull},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CpoS56pYnU}\n}", "github": "", "project": "", "reviewers": "H9aP;zJ4f;JKXU;zt3G;boCg", "pdf_size": 2798456, "rating": "5;5;5;5;5", "confidence": "4;4;4;5;4", "soundness": "3;3;2;3;3", "novelty": "3;2;2;3;2", "presentation": "3;3;2;2;2", "wc_summary": "77;83;92;36;67", "wc_strengths": "76;33;21;50;13", "wc_weaknesses": "99;40;153;96;294", "wc_questions": "40;34;94;44;7", "wc_limitations": "15;1;1;24;6", "wc_review": "307;191;361;250;387", "wc_reply_reviewers": "0;21;35;101;48", "wc_reply_authors": "0;0;0;625;54", "reply_reviewers": "0;1;1;1;2", "reply_authors": "1;1;1;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 71.0, 19.29766825292631 ], "wc_strengths_avg": [ 38.6, 22.47309502494038 ], "wc_weaknesses_avg": [ 136.4, 86.52999479949135 ], "wc_questions_avg": [ 43.8, 28.23047998175022 ], "wc_limitations_avg": [ 9.4, 8.912911982062877 ], "wc_review_avg": [ 299.2, 71.7172224782862 ], "wc_reply_reviewers_avg": [ 41.0, 33.9587985653203 ], "wc_reply_authors_avg": [ 135.8, 245.49248461001815 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15495846924532804948&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "bristol.ac.uk;bristol.ac.uk;bristol.ac.uk;;bristol.ac.uk", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Bristol", "aff_unique_dep": "", "aff_unique_url": "https://www.bristol.ac.uk", "aff_unique_abbr": "Bristol", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "On the Relationship Between Relevance and Conflict in Online Social Link Recommendations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72414", "id": "CrpL8mGa0Q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/73d6c3e4b214deebbbf8256e26d2cf45-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CrpL8mGa0Q", "openreview": "https://openreview.net/forum?id=CrpL8mGa0Q", "poster": "/media/PosterPDFs/NeurIPS%202023/72414.png?t=1702143140.8052354", "slides": "https://nips.cc/virtual/2023/poster/72414", "video": "https://nips.cc/virtual/2023/poster/72414", "author_site": "Yanbang Wang, Jon Kleinberg", "tldr": "", "abstract": "In an online social network, link recommendations are a way for users to discover relevant links to people they may know, thereby potentially increasing their engagement on the platform. However, the addition of links to a social network can also have an effect on the level of conflict in the network --- expressed in terms of polarization and disagreement. To date, however, we have very little understanding of how these two implications of link formation relate to each other: are the goals of high relevance and conflict reduction aligned, or are the links that users are most likely to accept fundamentally different from the ones with the greatest potential for reducing conflict? Here we provide the first analysis of this question, using the recently popular Friedkin-Johnsen model of opinion dynamics. We first present a surprising result on how link additions shift the level of opinion conflict, followed by explanation work that relates the amount of shift to structural features of the added links. We then characterize the gap in conflict reduction between the set of links achieving the largest reduction and the set of links achieving the highest relevance. The gap is measured on real-world data, based on instantiations of relevance defined by 13 link recommendation algorithms. We find that some, but not all, of the more accurate algorithms actually lead to better reduction of conflict. Our work suggests that social links recommended for increasing user engagement may not be as conflict-provoking as people might have thought.", "keywords": "social networks;spectral analysis;link recommendation;polarization and conflict", "primary_area": "", "supplementary_material": "/attachment/c81b0a86e2d319e98514a2ce6e8efa412edb87cb.pdf", "author": "Yanbang Wang;Jon Kleinberg", "authorids": "~Yanbang_Wang1;~Jon_Kleinberg3", "gender": ";M", "homepage": ";http://www.cs.cornell.edu/home/kleinber/", "dblp": "232/1994;https://dblp.uni-trier.de/pid/k/JonMKleinberg.html", "google_scholar": "Ch3YUgsAAAAJ;VX7d5EQAAAAJ", "orcid": ";0000-0002-1929-2512", "linkedin": ";", "or_profile": "~Yanbang_Wang1;~Jon_Kleinberg3", "aff": "Department of Computer Science, Cornell University;Cornell University", "aff_domain": "cs.cornell.edu;cornell.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2023on,\ntitle={On the Relationship Between Relevance and Conflict in Online Social Link Recommendations},\nauthor={Yanbang Wang and Jon Kleinberg},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CrpL8mGa0Q}\n}", "github": "", "project": "", "reviewers": "tBHa;tPqS;4FjV;dCzc", "pdf_size": 3878927, "rating": "5;5;6;6", "confidence": "3;3;2;3", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "133;64;65;161", "wc_strengths": "41;54;59;158", "wc_weaknesses": "88;82;234;38", "wc_questions": "119;51;48;183", "wc_limitations": "39;28;37;13", "wc_review": "420;279;443;553", "wc_reply_reviewers": "21;0;0;4", "wc_reply_authors": "0;352;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 105.75, 42.42272386351447 ], "wc_strengths_avg": [ 78.0, 46.652974181717504 ], "wc_weaknesses_avg": [ 110.5, 73.86981792315451 ], "wc_questions_avg": [ 100.25, 55.57596153014359 ], "wc_limitations_avg": [ 29.25, 10.256095748383007 ], "wc_review_avg": [ 423.75, 97.52275375521346 ], "wc_reply_reviewers_avg": [ 6.25, 8.671072598012312 ], "wc_reply_authors_avg": [ 88.0, 152.4204710660612 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9918513694979888410&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "cs.cornell.edu;cornell.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Finding Local Minima Efficiently in Decentralized Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72413", "id": "CruxS0C0LS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c6b84d35d783cf289bb0cd7c7b897ea6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CruxS0C0LS", "openreview": "https://openreview.net/forum?id=CruxS0C0LS", "poster": "/media/PosterPDFs/NeurIPS%202023/72413.png?t=1699927788.5983858", "slides": "https://nips.cc/virtual/2023/poster/72413", "video": "https://nips.cc/virtual/2023/poster/72413", "author_site": "Wenhan Xian, Heng Huang", "tldr": "", "abstract": "In this paper we study the second-order optimality of decentralized stochastic algorithm that escapes saddle point efficiently for nonconvex optimization problems. We propose a new pure gradient-based decentralized stochastic algorithm PEDESTAL with a novel convergence analysis framework to address the technical challenges unique to the decentralized stochastic setting. Our method is the first decentralized stochastic algorithm to achieve second-order optimality with non-asymptotic analysis. We provide theoretical guarantees with the gradient complexity of $\\tilde{O} (\\epsilon^{-3})$ to find $O(\\epsilon, \\sqrt{\\epsilon})$-second-order stationary point, which matches state-of-the-art results of centralized counterparts or decentralized methods to find first-order stationary point. We also conduct two decentralized tasks in our experiments, a matrix sensing task with synthetic data and a matrix factorization task with a real-world dataset to validate the performance of our method.", "keywords": "second-order optimality;decentralized optimization", "primary_area": "", "supplementary_material": "/attachment/4c18434a8b4c06b25246ddaf9a061ce668dc4617.zip", "author": "Wenhan Xian;Heng Huang", "authorids": "~Wenhan_Xian1;~Heng_Huang1", "gender": "M;M", "homepage": ";https://www.cs.umd.edu/~heng/", "dblp": "246/3134;03/281", "google_scholar": ";4OqLaDwAAAAJ", "orcid": ";", "linkedin": "wenhan-xian-3392ba170;", "or_profile": "~Wenhan_Xian1;~Heng_Huang1", "aff": "University of Pittsburgh;University of Pittsburgh", "aff_domain": "pitt.edu;pitt.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nxian2023finding,\ntitle={Finding Local Minima Efficiently in Decentralized Optimization},\nauthor={Wenhan Xian and Heng Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CruxS0C0LS}\n}", "github": "", "project": "", "reviewers": "aGCa;C5yK;eGsh;D1UK;tsG4", "pdf_size": 1541811, "rating": "5;6;6;7;7", "confidence": "4;3;2;4;4", "soundness": "2;3;3;3;3", "novelty": "3;3;2;3;3", "presentation": "3;2;3;3;4", "wc_summary": "71;61;19;42;143", "wc_strengths": "62;130;13;38;188", "wc_weaknesses": "228;231;6;96;226", "wc_questions": "46;214;55;9;4", "wc_limitations": "14;1;2;1;9", "wc_review": "421;637;95;186;570", "wc_reply_reviewers": "40;57;16;4;96", "wc_reply_authors": "0;0;0;0;344", "reply_reviewers": "1;1;1;1;2", "reply_authors": "1;1;1;1;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 67.2, 41.84925327888181 ], "wc_strengths_avg": [ 86.2, 64.1074098681268 ], "wc_weaknesses_avg": [ 157.4, 91.43216064383473 ], "wc_questions_avg": [ 65.6, 76.83384670833551 ], "wc_limitations_avg": [ 5.4, 5.238320341483518 ], "wc_review_avg": [ 381.8, 211.03307797594195 ], "wc_reply_reviewers_avg": [ 42.6, 32.44441400302986 ], "wc_reply_authors_avg": [ 68.8, 137.6 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.2004459314343183, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12846071344748183977&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "pitt.edu;pitt.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Pittsburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.pitt.edu", "aff_unique_abbr": "Pitt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "Cs74qIBfiq", "title": "Zero-Shot Robustification of Zero-Shot Models with Auxiliary Foundation Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Zero-shot inference is a powerful paradigm that enables the use of large pretrained models for downstream classification tasks without further training. However, these models are vulnerable to inherited biases that can impact their performance. The traditional solution is fine-tuning, but this undermines the key advantage of pretrained models, which is their ability to be used out-of-the-box. We propose RoboShot, a method that improves the robustness of pretrained model embeddings in a fully zero-shot fashion. First, we use zero-shot language models (LMs) to obtain useful insights from task descriptions. These insights are embedded and used to remove harmful and boost useful components in embeddings---without any supervision. Theoretically, we provide a simple and tractable model for biases in zero-shot embeddings and give a result characterizing under what conditions our approach can boost performance. Next, we make pretrained models invariant to spurious features by projecting pre-trained models embeddings on the subspace orthogonal to the spurious feature subspace, which are spanned by the spurious feature descriptions. Empirically, we evaluate RoboShot on nine image and NLP classification tasks and show an average improvement of 15.98% over several zero-shot baselines. Additionally, we demonstrate that RoboShot is compatible with a variety of pretrained and language models.", "keywords": "zero-shot classification;spurious correlations;invariant embedding;foundation model;language model", "primary_area": "", "supplementary_material": "/attachment/b455ef95d89ee53fcd6538149a8cdf038baaa93e.zip", "author": "Dyah Adila;Changho Shin;Linrong Cai;Frederic Sala", "authorids": "~Dyah_Adila1;~Changho_Shin2;lcai54@wisc.edu;~Frederic_Sala1", "gender": "F;;;M", "homepage": ";;;https://pages.cs.wisc.edu/~fredsala/", "dblp": ";;;133/3602", "google_scholar": ";VpvIQAcAAAAJ;;9KhIkNkAAAAJ", "orcid": ";;;", "linkedin": "dyahadila/;;;", "or_profile": "~Dyah_Adila1;~Changho_Shin2;lcai54@wisc.edu;~Frederic_Sala1", "aff": "University of Wisconsin, Madison;University of Wisconsin, Madison;;University of Wisconsin, Madison", "aff_domain": "wisc.edu;wisc.edu;;wisc.edu", "position": "PhD student;PhD student;;Assistant Professor", "bibtex": "@misc{\nadila2023zeroshot,\ntitle={Zero-Shot Robustification of Zero-Shot Models with Auxiliary Foundation Models},\nauthor={Dyah Adila and Changho Shin and Linrong Cai and Frederic Sala},\nyear={2023},\nurl={https://openreview.net/forum?id=Cs74qIBfiq}\n}", "github": "", "project": "", "reviewers": "DxCE;Kdvh;Bg99;E4A7", "site": "https://openreview.net/forum?id=Cs74qIBfiq", "pdf_size": 1800540, "rating": "4;4;6;6", "confidence": "4;3;5;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "114;81;90;92", "wc_strengths": "76;23;71;75", "wc_weaknesses": "275;121;63;236", "wc_questions": "110;69;14;96", "wc_limitations": "6;8;6;4", "wc_review": "581;302;244;503", "wc_reply_reviewers": "0;10;0;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 94.25, 12.132085558550928 ], "wc_strengths_avg": [ 61.25, 22.16275028059469 ], "wc_weaknesses_avg": [ 173.75, 85.40308835165155 ], "wc_questions_avg": [ 72.25, 36.71767285654144 ], "wc_limitations_avg": [ 6.0, 1.4142135623730951 ], "wc_review_avg": [ 407.5, 138.82092781709824 ], "wc_reply_reviewers_avg": [ 5.5, 5.545268253204709 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:M_EwWFRLvX8J:scholar.google.com/&scioq=Zero-Shot+Robustification+of+Zero-Shot+Models+with+Auxiliary+Foundation+Models&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Wisconsin", "aff_unique_dep": "", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "List and Certificate Complexities in Replicable Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72412", "id": "Cs9ea2Gbgx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/61d0a96d4a73b626367310b3ad32579d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Cs9ea2Gbgx", "openreview": "https://openreview.net/forum?id=Cs9ea2Gbgx", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72412", "video": "https://nips.cc/virtual/2023/poster/72412", "author_site": "Peter Dixon, A. Pavan, Jason Vander Woude, N. V. Vinodchandran", "tldr": "", "abstract": "We investigate replicable learning algorithms. Informally a learning algorithm is replicable if the algorithm outputs the same canonical hypothesis over multiple runs with high probability, even when different runs observe a different set of samples from the unknown data distribution. In general, such a strong notion of replicability is not achievable. \nThus we consider two feasible notions of replicability called {\\em list replicability} and {\\em certificate replicability}. \nIntuitively, these notions capture the degree of (non) replicability. The goal is to design learning algorithms with optimal list and certificate complexities while minimizing the sample complexity. Our contributions are the following.\n\n1. We first study the learning task of estimating the biases of $d$ coins, up to an additive error of $\\varepsilon$, by observing samples. For this task, we design a $(d+1)$-list replicable algorithm. To complement this result, we establish that the list complexity is optimal, i.e there are no learning algorithms with a list size smaller than $d+1$ for this task. We also design learning algorithms with certificate complexity $\\tilde{O}(\\log d)$. The sample complexity of both these algorithms is $\\tilde{O}(\\frac{d^2}{\\varepsilon^2})$ where $\\varepsilon$ is the approximation error parameter (for a constant error probability). \n\n2. In the PAC model, we show that any hypothesis class that is learnable with $d$-nonadaptive statistical queries can be learned via a $(d+1)$-list replicable algorithm and also via a $\\tilde{O}(\\log d)$-certificate replicable algorithm. The sample complexity of both these algorithms is $\\tilde{O}(\\frac{d^2}{\\nu^2})$ where $\\nu$ is the approximation error of the statistical query. We also show that for the concept class \\dtep, the list complexity is exactly $d+1$ with respect to the uniform distribution. \n\nTo establish our upper bound results we use rounding schemes induced by geometric partitions with certain properties. We use Sperner/KKM Lemma to establish the lower bound results.", "keywords": "Replicability;learning algorithms;sample complexity;PAC Learning", "primary_area": "", "supplementary_material": "/attachment/c532486cc55ec7adee466eaf92df62b8b0d07f56.pdf", "author": "Peter Dixon;A. Pavan;Jason Vander Woude;N V Vinodchandran", "authorids": "~Peter_Dixon1;~A._Pavan1;~Jason_Vander_Woude1;~N_V_Vinodchandran1", "gender": ";;;", "homepage": ";https://www.cs.iastate.edu/pavan;https://www.math.unl.edu/~jvanderwoude2/;", "dblp": "69/10756-2;88/1807;322/1237;", "google_scholar": "adp22rAAAAAJ;4QIV0FUAAAAJ;;", "orcid": "0009-0008-0710-3762;0000-0003-1665-5266;0000-0002-7672-6321;", "linkedin": ";;;", "or_profile": "~Peter_Dixon1;~A._Pavan1;~Jason_Vander_Woude1;~N_V_Vinodchandran1", "aff": "Ben-Gurion University of the Negev;Iowa State University;University of Nebraska, Lincoln;", "aff_domain": "bgu.ac.il;iastate.edu;unl.edu;", "position": "Postdoc;Full Professor;PhD student;", "bibtex": "@inproceedings{\ndixon2023list,\ntitle={List and Certificate Complexities in Replicable Learning},\nauthor={Peter Dixon and A. Pavan and Jason Vander Woude and N V Vinodchandran},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Cs9ea2Gbgx}\n}", "github": "", "project": "", "reviewers": "HcY8;xJoj;Hncd;U7Lz", "pdf_size": 394431, "rating": "5;6;6;7", "confidence": "3;5;2;5", "soundness": "3;4;3;4", "novelty": "3;3;3;3", "presentation": "2;2;2;3", "wc_summary": "79;76;106;684", "wc_strengths": "87;79;82;283", "wc_weaknesses": "49;195;100;982", "wc_questions": "39;32;193;552", "wc_limitations": "2;7;20;5", "wc_review": "256;389;501;2506", "wc_reply_reviewers": "20;107;75;209", "wc_reply_authors": "0;68;108;58", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 1.299038105676658 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 236.25, 258.77246279308775 ], "wc_strengths_avg": [ 132.75, 86.79393700023061 ], "wc_weaknesses_avg": [ 331.5, 379.20344144007976 ], "wc_questions_avg": [ 204.0, 210.9703770674926 ], "wc_limitations_avg": [ 8.5, 6.87386354243376 ], "wc_review_avg": [ 913.0, 923.7989499885784 ], "wc_reply_reviewers_avg": [ 102.75, 68.78362813926 ], "wc_reply_authors_avg": [ 58.5, 38.61023180453596 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5443310539518174, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16858386856246402758&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "bgu.ac.il;iastate.edu;unl.edu;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Ben-Gurion University of the Negev;Iowa State University;University of Nebraska", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bgu.ac.il;https://www.iastate.edu;https://www.unl.edu", "aff_unique_abbr": "BGU;ISU;UNL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lincoln", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Israel;United States" }, { "title": "EHRSHOT: An EHR Benchmark for Few-Shot Evaluation of Foundation Models", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73656", "id": "CsXC6IcdwI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d42db1f74df54cb992b3956eb7f15a6f-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=CsXC6IcdwI", "openreview": "https://openreview.net/forum?id=CsXC6IcdwI", "poster": "/media/PosterPDFs/NeurIPS%202023/73656.png?t=1701460934.618272", "slides": "https://nips.cc/virtual/2023/poster/73656", "video": "https://nips.cc/virtual/2023/poster/73656", "author_site": "Michael Wornow, Rahul Thapa, Ethan Steinberg, Jason Fries, Nigam Shah", "tldr": "", "abstract": "While the general machine learning (ML) community has benefited from public datasets, tasks, and models, the progress of ML in healthcare has been hampered by a lack of such shared assets. The success of foundation models creates new challenges for healthcare ML by requiring access to shared pretrained models to validate performance benefits. We help address these challenges through three contributions. First, we publish a new dataset, EHRSHOT, which contains de-identified structured data from the electronic health records (EHRs) of 6,739 patients from Stanford Medicine. Unlike MIMIC-III/IV and other popular EHR datasets, EHRSHOT is longitudinal and not restricted to ICU/ED patients. Second, we publish the weights of CLMBR-T-base, a 141M parameter clinical foundation model pretrained on the structured EHR data of 2.57M patients. We are one of the first to fully release such a model for coded EHR data; in contrast, most prior models released for clinical data (e.g. GatorTron, ClinicalBERT) only work with unstructured text and cannot process the rich, structured data within an EHR. We provide an end-to-end pipeline for the community to validate and build upon its performance. Third, we define 15 few-shot clinical prediction tasks, enabling evaluation of foundation models on benefits such as sample efficiency and task adaptation. Our model, dataset, and code are available here: https://ehrshot.stanford.edu/", "keywords": "ehr;healthcare;benchmark;dataset;foundation model;emr;electronic health record", "primary_area": "", "supplementary_material": "/attachment/13fabe8701c71f44dad9213797d14a0eea4bb984.pdf", "author": "Michael Wornow;Rahul Thapa;Ethan Steinberg;Jason Alan Fries;Nigam Shah", "authorids": "~Michael_Wornow1;~Rahul_Thapa1;~Ethan_Steinberg1;~Jason_Alan_Fries1;~Nigam_Shah1", "gender": ";M;M;M;M", "homepage": "https://michaelwornow.net;https://rthapa84.github.io/;;https://web.stanford.edu/~jfries/;https://shahlab.stanford.edu/nigam_shah", "dblp": "295/5424.html;;241/9476;182/2122;s/NHShah", "google_scholar": "rXYzcbcAAAAJ;H9FNWVcAAAAJ;;wywWmwoAAAAJ;n63DmP8AAAAJ", "orcid": "0000-0003-2215-6527;;0000-0001-7166-5032;0000-0001-9316-5768;0000-0001-9385-7158", "linkedin": ";rahul-thapa/;;jason-fries/;", "or_profile": "~Michael_Wornow1;~Rahul_Thapa1;~Ethan_Steinberg1;~Jason_Alan_Fries1;~Nigam_Shah1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Researcher;PhD student;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nwornow2023ehrshot,\ntitle={{EHRSHOT}: An {EHR} Benchmark for Few-Shot Evaluation of Foundation Models},\nauthor={Michael Wornow and Rahul Thapa and Ethan Steinberg and Jason Alan Fries and Nigam Shah},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=CsXC6IcdwI}\n}", "github": "", "project": "", "reviewers": "ipCJ;bA6k;G23w;gNum", "pdf_size": 2225964, "rating": "7;7;7;8", "confidence": "3;4;4;5", "wc_summary_and_contributions": "130;57;128;100", "wc_strengths": "59;270;37;85", "wc_improvement": "54;93;20;153", "wc_limitations": "15;29;65;8", "wc_correctness": "19;199;1;7", "wc_clarity": "5;5;1;6", "wc_relation_to_prior_work": "26;15;1;5", "wc_documentation": "37;1;1;19", "wc_additional_feedback": "1;1;1;1", "wc_review": "346;670;255;384", "wc_reply_reviewers": "0;0;0;14", "wc_reply_authors": "990;1253;252;764", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 103.75, 29.4819860253681 ], "wc_strengths_avg": [ 112.75, 92.36442767645994 ], "wc_improvement_avg": [ 80.0, 49.43177115985225 ], "wc_limitations_avg": [ 29.25, 21.98152633462927 ], "wc_correctness_avg": [ 56.5, 82.52726822087352 ], "wc_clarity_avg": [ 4.25, 1.920286436967152 ], "wc_relation_to_prior_work_avg": [ 11.75, 9.67923034130297 ], "wc_documentation_avg": [ 14.5, 14.9248115565993 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 413.75, 155.19403177957585 ], "wc_reply_reviewers_avg": [ 3.5, 6.06217782649107 ], "wc_reply_authors_avg": [ 814.75, 368.11640482325697 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1264058279334378905&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Imitation Learning from Vague Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72411", "id": "CswEebv5Hn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/96e35b532b4932a86cce8c929ff3f960-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CswEebv5Hn", "openreview": "https://openreview.net/forum?id=CswEebv5Hn", "poster": "/media/PosterPDFs/NeurIPS%202023/72411.png?t=1702050258.027953", "slides": "https://nips.cc/virtual/2023/poster/72411", "video": "https://nips.cc/virtual/2023/poster/72411", "author_site": "Xin-Qiang Cai, Yu-Jie Zhang, Chao-Kai Chiang, Masashi Sugiyama", "tldr": "", "abstract": "Imitation learning from human feedback studies how to train well-performed imitation agents with an annotator's relative comparison of two demonstrations (one demonstration is better/worse than the other), which is usually easier to collect than the perfect expert data required by traditional imitation learning. However, in many real-world applications, it is still expensive or even impossible to provide a clear pairwise comparison between two demonstrations with similar quality. This motivates us to study the problem of imitation learning with vague feedback, where the data annotator can only distinguish the paired demonstrations correctly when their quality differs significantly, i.e., one from the expert and another from the non-expert. By modeling the underlying demonstration pool as a mixture of expert and non-expert data, we show that the expert policy distribution can be recovered when the proportion $\\alpha$ of expert data is known. We also propose a mixture proportion estimation method for the unknown $\\alpha$ case. Then, we integrate the recovered expert policy distribution with generative adversarial imitation learning to form an end-to-end algorithm. Experiments show that our methods outperform standard and preference-based imitation learning methods on various tasks.", "keywords": "imitation learning;vague feedback;risk rewriting;mixture propotion estimation", "primary_area": "", "supplementary_material": "", "author": "Xin-Qiang Cai;Yu-Jie Zhang;Chao-Kai Chiang;Masashi Sugiyama", "authorids": "~Xin-Qiang_Cai1;~Yu-Jie_Zhang1;~Chao-Kai_Chiang1;~Masashi_Sugiyama1", "gender": "M;M;M;M", "homepage": "https://caixq1996.github.io/;https://yujie-zhang96.github.io/;;http://www.ms.k.u-tokyo.ac.jp/sugi/", "dblp": "248/8034.html;234/6681;34/8336;35/1228", "google_scholar": "rtMUMooAAAAJ;https://scholar.google.com/citations?hl=zh-CN;pBQgK_YAAAAJ;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ", "orcid": ";;;0000-0001-6658-6743", "linkedin": ";;;", "or_profile": "~Xin-Qiang_Cai1;~Yu-Jie_Zhang1;~Chao-Kai_Chiang1;~Masashi_Sugiyama1", "aff": "The University of Tokyo;The University of Tokyo;Tokyo University;The University of Tokyo", "aff_domain": "u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp", "position": "PhD student;PhD student;Project Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ncai2023imitation,\ntitle={Imitation Learning from Vague Feedback},\nauthor={Xin-Qiang Cai and Yu-Jie Zhang and Chao-Kai Chiang and Masashi Sugiyama},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CswEebv5Hn}\n}", "github": "", "project": "", "reviewers": "DWbY;B2bu;CNdS;zY91;g4mG", "pdf_size": 6476678, "rating": "5;5;6;6;9", "confidence": "2;3;3;3;5", "soundness": "2;3;2;3;3", "novelty": "2;3;4;3;4", "presentation": "2;2;2;4;4", "wc_summary": "84;81;204;195;113", "wc_strengths": "6;46;167;75;241", "wc_weaknesses": "42;141;547;79;35", "wc_questions": "27;4;246;413;18", "wc_limitations": "14;22;116;8;29", "wc_review": "173;294;1280;770;436", "wc_reply_reviewers": "23;0;489;0;0", "wc_reply_authors": "12;0;865;0;0", "reply_reviewers": "1;0;2;0;0", "reply_authors": "2;1;4;1;1", "rating_avg": [ 6.2, 1.469693845669907 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 135.4, 53.593283161232065 ], "wc_strengths_avg": [ 107.0, 85.44237824405404 ], "wc_weaknesses_avg": [ 168.8, 192.79668046934833 ], "wc_questions_avg": [ 141.6, 162.41994951359885 ], "wc_limitations_avg": [ 37.8, 39.74116253961376 ], "wc_review_avg": [ 590.6, 398.42670593222033 ], "wc_reply_reviewers_avg": [ 102.4, 193.50514205054088 ], "wc_reply_authors_avg": [ 175.4, 344.83132108322184 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.8, 1.1661903789690604 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9444444444444444, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15755028439013534470&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "title": "Saving 100x Storage: Prototype Replay for Reconstructing Training Sample Distribution in Class-Incremental Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72410", "id": "Ct0zPIe3xs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/708e0d691a22212e1e373dc8779cbe53-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ct0zPIe3xs", "openreview": "https://openreview.net/forum?id=Ct0zPIe3xs", "poster": "/media/PosterPDFs/NeurIPS%202023/72410.png?t=1701415136.9432428", "slides": "https://nips.cc/virtual/2023/poster/72410", "video": "https://nips.cc/virtual/2023/poster/72410", "author_site": "Jinpeng Chen, Runmin Cong, Yuxuan LUO, Horace Ip, Sam Kwong", "tldr": "", "abstract": "Existing class-incremental semantic segmentation (CISS) methods mainly tackle catastrophic forgetting and background shift, but often overlook another crucial issue. In CISS, each step focuses on different foreground classes, and the training set for a single step only includes images containing pixels of the current foreground classes, excluding images without them. This leads to an overrepresentation of these foreground classes in the single-step training set, causing the classification biased towards these classes. To address this issue, we present STAR, which preserves the main characteristics of each past class by storing a compact prototype and necessary statistical data, and aligns the class distribution of single-step training samples with the complete dataset by replaying these prototypes and repeating background pixels with appropriate frequency. Compared to the previous works that replay raw images, our method saves over 100 times the storage while achieving better performance. Moreover, STAR incorporates an old-class features maintaining (OCFM) loss, keeping old-class features unchanged while preserving sufficient plasticity for learning new classes. Furthermore, a similarity-aware discriminative (SAD) loss is employed to specifically enhance the feature diversity between similar old-new class pairs. Experiments on two public datasets, Pascal VOC 2012 and ADE20K, reveal that our model surpasses all previous state-of-the-art methods.", "keywords": "Continual learning;Class-incremental semantic segmentation;Prototype replay", "primary_area": "", "supplementary_material": "/attachment/12cd406d19c18f0ad8fc4c0605e1fd0f95692634.pdf", "author": "Jinpeng Chen;Runmin Cong;Yuxuan LUO;Horace Ip;Sam Kwong", "authorids": "~Jinpeng_Chen2;~Runmin_Cong1;~Yuxuan_LUO3;~Horace_Ip1;~Sam_Kwong1", "gender": "M;M;M;M;M", "homepage": "https://jinpeng0528.github.io;https://rmcong.github.io/;;https://www.cs.cityu.edu.hk/profile/profile.php?eid=cship;https://scholars.ln.edu.hk/en/persons/sam-tak-wu-kwong", "dblp": ";180/7852;;;18/30", "google_scholar": "HdnFJ5kAAAAJ;https://scholar.google.co.uk/citations?hl=en;;;_PVI6EAAAAAJ", "orcid": "0000-0002-0469-4463;0000-0003-0972-4008;0000-0003-1003-2252;;0000-0001-7484-7261", "linkedin": "jinpeng-chen-401a52171/;;;;", "or_profile": "~Jinpeng_Chen2;~Runmin_Cong1;~Yuxuan_LUO3;~Horace_Ip1;~Sam_Kwong1", "aff": "City University of Hong Kong;Shandong University;City University of Hong Kong;City University of Hong Kong;Lingnan University", "aff_domain": "cityu.edu.hk;sdu.edu.cn;cityu.edu;cityu.edu.hk;ln.edu.hk", "position": "PhD student;Full Professor;PhD student;Full Professor;Chair Professor", "bibtex": "@inproceedings{\nchen2023saving,\ntitle={Saving 100x Storage: Prototype Replay for Reconstructing Training Sample Distribution in Class-Incremental Semantic Segmentation},\nauthor={Jinpeng Chen and Runmin Cong and Yuxuan LUO and Horace Ip and Sam Kwong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ct0zPIe3xs}\n}", "github": "", "project": "", "reviewers": "b1on;DAss;YBrR;gzuL;2RZD", "pdf_size": 4403342, "rating": "5;6;6;7;8", "confidence": "5;5;5;4;5", "soundness": "3;4;4;3;3", "novelty": "2;4;2;3;3", "presentation": "3;4;3;3;3", "wc_summary": "56;73;30;102;113", "wc_strengths": "22;121;56;95;75", "wc_weaknesses": "220;90;84;130;77", "wc_questions": "67;1;1;3;13", "wc_limitations": "6;9;1;28;14", "wc_review": "371;294;172;358;292", "wc_reply_reviewers": "26;33;0;31;67", "wc_reply_authors": "22;22;21;21;23", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 4.8, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 74.8, 30.208608044727914 ], "wc_strengths_avg": [ 73.8, 33.701038559664596 ], "wc_weaknesses_avg": [ 120.2, 53.18796856432853 ], "wc_questions_avg": [ 17.0, 25.392912396966203 ], "wc_limitations_avg": [ 11.6, 9.221713506718803 ], "wc_review_avg": [ 297.4, 70.50560261426038 ], "wc_reply_reviewers_avg": [ 31.4, 21.37849386650051 ], "wc_reply_authors_avg": [ 21.8, 0.7483314773547882 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.294174202707276, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11704835357542565502&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "cityu.edu.hk;sdu.edu.cn;cityu.edu;cityu.edu.hk;ln.edu.hk", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "City University of Hong Kong;Shandong University;Lingnan University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cityu.edu.hk;http://www.sdu.edu.cn;http://www.lingnan.edu.cn", "aff_unique_abbr": "CityU;SDU;LNU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "V-InFoR: A Robust Graph Neural Networks Explainer for Structurally Corrupted Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72409", "id": "CtXXOaxDw7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b07d36fb3fae0630897700593c8cf49d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CtXXOaxDw7", "openreview": "https://openreview.net/forum?id=CtXXOaxDw7", "poster": "/media/PosterPDFs/NeurIPS%202023/72409.png?t=1699530816.080687", "slides": "https://nips.cc/virtual/2023/poster/72409", "video": "https://nips.cc/virtual/2023/poster/72409", "author_site": "Senzhang Wang, Jun Yin, Chaozhuo Li, Chaozhuo Li, Xing Xie, Jianxin Wang", "tldr": "", "abstract": "GNN explanation method aims to identify an explanatory subgraph which contains the most informative components of the full graph. However, a major limitation of existing GNN explainers is that they are not robust to the structurally corrupted graphs, e.g., graphs with noisy or adversarial edges. On the one hand, existing GNN explainers mostly explore explanations based on either the raw graph features or the learned latent representations, both of which can be easily corrupted. On the other hand, the corruptions in graphs are irregular in terms of the structural properties, e.g., the size or connectivity of graphs, which makes the rigorous constraints used by previous GNN explainers unfeasible. To address these issues, we propose a robust GNN explainer called V-InfoR. Specifically, a robust graph representation extractor, which takes insights of variational inference, is proposed to infer the latent distribution of graph representations. Instead of directly using the corrupted raw features or representations of each single graph, we sample the graph representations from the inferred distribution for the downstream explanation generator, which can effectively eliminate the minor corruption. We next formulate the explanation exploration as a graph information bottleneck (GIB) optimization problem. As a more general method that does not need any rigorous structural constraints, our GIB-based method can adaptively capture both the regularity and irregularity of the severely corrupted graphs for explanation. Extensive evaluations on both synthetic and real-world datasets indicate that V-InfoR significantly improves the GNN explanation performance for the structurally corrupted graphs. Code and dataset are available at https://anonymous.4open.science/r/V-InfoR-EF88", "keywords": "Explainable AI;Graph Neural Networks;Machine Learning", "primary_area": "", "supplementary_material": "", "author": "Senzhang Wang;Jun Yin;Chaozhuo Li;Xing Xie;Jianxin Wang", "authorids": "~Senzhang_Wang2;~Jun_Yin11;~Chaozhuo_Li1;~Xing_Xie3;~Jianxin_Wang1", "gender": "M;M;;M;", "homepage": "https://senzhangwangcsu.github.io/index.html;https://esperanto-mega.github.io/;https://scss.bupt.edu.cn/info/1063/5534.htm;http://research.microsoft.com/en-us/people/xingx/;https://faculty.csu.edu.cn/wangjianxin1/zh_CN/index/106082/list/", "dblp": "118/5055;58/5423-5;316/1269.html;08/6809-1;75/2669-1.html", "google_scholar": "zdWyGRMAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;5EQfAFIAAAAJ;7pgY2F0AAAAJ", "orcid": "0000-0002-3615-4859;;0000-0002-8179-7503;0000-0002-8608-8482;0000-0003-1516-0480", "linkedin": ";;;xingx/;", "or_profile": "~Senzhang_Wang2;~Jun_Yin11;~Chaozhuo_Li1;~Xing_Xie3;~Jianxin_Wang1", "aff": "Central South University;Central South University;Beijing University of Posts and Telecommunications;Microsoft Research Asia;Central South University", "aff_domain": "csu.edu.cn;csu.edu.cn;bupt.edu.cn;microsoft.com;csu.edu.cn", "position": "Full Professor;MS student;Associate Professor;Senior Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nwang2023vinfor,\ntitle={V-InFoR: A Robust Graph Neural Networks Explainer for Structurally Corrupted Graphs},\nauthor={Senzhang Wang and Jun Yin and Chaozhuo Li and Xing Xie and Jianxin Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CtXXOaxDw7}\n}", "github": "", "project": "", "reviewers": "2tSN;cjXo;Q8Xb;kswM;SeJw", "pdf_size": 911395, "rating": "5;6;6;7;8", "confidence": "5;4;4;4;4", "soundness": "3;3;2;4;3", "novelty": "3;2;2;3;4", "presentation": "3;3;1;3;3", "wc_summary": "63;44;65;160;152", "wc_strengths": "43;55;33;184;151", "wc_weaknesses": "94;70;213;112;96", "wc_questions": "22;67;51;30;65", "wc_limitations": "25;6;7;36;14", "wc_review": "247;242;369;522;478", "wc_reply_reviewers": "12;47;435;110;19", "wc_reply_authors": "8;20;1067;10;9", "reply_reviewers": "1;1;4;1;1", "reply_authors": "2;2;5;2;2", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 96.8, 48.95467291280783 ], "wc_strengths_avg": [ 93.2, 61.949656980486985 ], "wc_weaknesses_avg": [ 117.0, 49.839743177508446 ], "wc_questions_avg": [ 47.0, 18.18790807102345 ], "wc_limitations_avg": [ 17.6, 11.42978564978364 ], "wc_review_avg": [ 371.6, 115.12532301800503 ], "wc_reply_reviewers_avg": [ 124.6, 159.0051571490686 ], "wc_reply_authors_avg": [ 222.8, 422.12197289409136 ], "reply_reviewers_avg": [ 1.6, 1.2000000000000002 ], "reply_authors_avg": [ 2.6, 1.2000000000000002 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6864064729836443, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6594326797499967265&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "csu.edu.cn;csu.edu.cn;bupt.edu.cn;microsoft.com;csu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Central South University;Beijing University of Posts and Telecommunications;Microsoft", "aff_unique_dep": ";;Research", "aff_unique_url": "https://www.csu.edu.cn;http://www.bupt.edu.cn/;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "CSU;BUPT;MSR Asia", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Beijing;Asia", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "Cu3T82cegI", "title": "A General Framework for Learning under Corruption: Label Noise, Attribute Noise, and Beyond", "track": "main", "status": "Reject", "tldr": "", "abstract": "Corruption is frequently observed in collected data and has been extensively studied in machine learning under different corruption models.\nDespite this, there remains a limited understanding of how these models relate such that a unified view of corruptions and their consequences on learning is still lacking. In this work, we formally analyze corruption models at the distribution level through a general, exhaustive framework based on Markov kernels. We highlight the existence of intricate joint and dependent corruptions on both labels and attributes, which are rarely touched by existing research. Further, we show how these corruptions affect standard supervised learning by analyzing the resulting changes in Bayes Risk. Our findings offer qualitative insights into the consequences of ``more complex'' corruptions on the learning problem, and provide a foundation for future quantitative comparisons. Applications of the framework include corruption-corrected learning, a subcase of which we study in this paper by theoretically analyzing loss correction with respect to different corruption instances.", "keywords": "corruption framework;Markov transitions;learning theory", "primary_area": "", "supplementary_material": "/attachment/6c54898cf7d2c89aaea5ad9fb8275fdaf88b6c2b.pdf", "author": "Laura Iacovissi;Nan Lu;Robert Williamson", "authorids": "~Laura_Iacovissi1;~Nan_Lu1;~Robert_Williamson1", "gender": ";F;M", "homepage": "https://fm.ls/laura-iacovissi;;https://uni-tuebingen.de/en/research/core-research/cluster-of-excellence-machine-learning/research/research/cluster-research-groups/professorships/foundations-of-machine-learning-systems/", "dblp": "309/7524;;29/5199", "google_scholar": "MVltCsAAAAAJ;https://scholar.google.co.jp/citations?user=KQUQlG4AAAAJ;G4MBruQAAAAJ", "orcid": "0000-0002-5012-551X;;", "linkedin": "lauraiacovissi/;;robert-bob-williamson-0115774/", "or_profile": "~Laura_Iacovissi1;~Nan_Lu1;~Robert_Williamson1", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of Tuebingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@misc{\niacovissi2023a,\ntitle={A General Framework for Learning under Corruption: Label Noise, Attribute Noise, and Beyond},\nauthor={Laura Iacovissi and Nan Lu and Robert Williamson},\nyear={2023},\nurl={https://openreview.net/forum?id=Cu3T82cegI}\n}", "github": "", "project": "", "reviewers": "2zzP;FFYg;5hDq;uSg2", "site": "https://openreview.net/forum?id=Cu3T82cegI", "pdf_size": 415585, "rating": "3;4;5;6", "confidence": "3;2;2;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;1;2;2", "wc_summary": "125;52;38;68", "wc_strengths": "71;54;82;80", "wc_weaknesses": "128;213;147;141", "wc_questions": "237;5;3;255", "wc_limitations": "20;15;27;1", "wc_review": "581;339;297;545", "wc_reply_reviewers": "0;234;20;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.75, 33.070946463625745 ], "wc_strengths_avg": [ 71.75, 11.053845484717073 ], "wc_weaknesses_avg": [ 157.25, 32.91181398829302 ], "wc_questions_avg": [ 125.0, 121.16930304330384 ], "wc_limitations_avg": [ 15.75, 9.522998477370455 ], "wc_review_avg": [ 440.5, 124.05140063699402 ], "wc_reply_reviewers_avg": [ 63.5, 98.77626233058224 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5578976523359821029&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;University of Tuebingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "0;0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Convolution Monge Mapping Normalization for learning on sleep data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72408", "id": "CuHymkHRus", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/21718991f6acf19a42376b5c7a8668c5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CuHymkHRus", "openreview": "https://openreview.net/forum?id=CuHymkHRus", "poster": "/media/PosterPDFs/NeurIPS%202023/72408.png?t=1702154211.446067", "slides": "https://nips.cc/virtual/2023/poster/72408", "video": "https://nips.cc/virtual/2023/poster/72408", "author_site": "Th\u00e9o Gnassounou, R\u00e9mi Flamary, Alexandre Gramfort", "tldr": "", "abstract": "In many machine learning applications on signals and biomedical data, especially electroencephalogram (EEG), one major challenge is the variability of the data across subjects, sessions, and hardware devices. In this work, we propose a new method called Convolutional Monge Mapping Normalization ($\\texttt{CMMN}$), which consists in filtering the signals in order to adapt their power spectrum density (PSD) to a Wasserstein barycenter estimated on training data. $\\texttt{CMMN}$ relies on novel closed-form solutions for optimal transport mappings and barycenters and provides individual test time adaptation to new data without needing to retrain a prediction model. Numerical experiments on sleep EEG data show that $\\texttt{CMMN}$ leads to significant and consistent performance gains independent from the neural network architecture when adapting between subjects, sessions, and even datasets collected with different hardware. Notably our performance gain is on par with much more numerically intensive Domain Adaptation (DA) methods and can be used in conjunction with those for even better performances.", "keywords": "Neuroscience;Domain adaptation;Optimal Transport", "primary_area": "", "supplementary_material": "/attachment/342f4e5a643aa0d895155aedb519cbc0882eb661.zip", "author": "Theo Gnassounou;R\u00e9mi Flamary;Alexandre Gramfort", "authorids": "~Theo_Gnassounou1;~R\u00e9mi_Flamary1;~Alexandre_Gramfort1", "gender": "M;;M", "homepage": ";https://remi.flamary.com/;http://alexandre.gramfort.net", "dblp": ";00/8318;15/7980", "google_scholar": ";https://scholar.google.fr/citations?user=zDnwxFQAAAAJ;fhxshS0AAAAJ", "orcid": ";0000-0002-4212-6627;0000-0001-9791-4404", "linkedin": "th%C3%A9o-gnassounou-4a1569120/?originalSubdomain=fr;;alexandregramfort/", "or_profile": "~Theo_Gnassounou1;~R\u00e9mi_Flamary1;~Alexandre_Gramfort1", "aff": "INRIA;\u00c9cole Polytechnique;Meta", "aff_domain": "inria.fr;polytechnique.edu;meta.com", "position": "PhD student;Full Professor;Researcher", "bibtex": "@inproceedings{\ngnassounou2023convolution,\ntitle={Convolution Monge Mapping Normalization for learning on sleep data},\nauthor={Theo Gnassounou and R{\\'e}mi Flamary and Alexandre Gramfort},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CuHymkHRus}\n}", "github": "", "project": "", "reviewers": "crXL;Dgon;VNrC;uQaR", "pdf_size": 1632174, "rating": "4;6;6;8", "confidence": "4;3;4;5", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "72;82;47;147", "wc_strengths": "78;82;66;60", "wc_weaknesses": "304;155;111;128", "wc_questions": "112;50;40;4", "wc_limitations": "21;45;1;4", "wc_review": "587;414;265;343", "wc_reply_reviewers": "486;14;43;34", "wc_reply_authors": "586;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 87.0, 36.9120576505835 ], "wc_strengths_avg": [ 71.5, 8.874119674649425 ], "wc_weaknesses_avg": [ 174.5, 76.39535326183132 ], "wc_questions_avg": [ 51.5, 38.89408695418881 ], "wc_limitations_avg": [ 17.75, 17.483921184905864 ], "wc_review_avg": [ 402.25, 118.97347393431865 ], "wc_reply_reviewers_avg": [ 144.25, 197.58842957015474 ], "wc_reply_authors_avg": [ 146.5, 253.74544330884052 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10547026105110547854&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "inria.fr;polytechnique.edu;meta.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "INRIA;Ecole Polytechnique;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "https://www.inria.fr;https://www.polytechnique.edu;https://meta.com", "aff_unique_abbr": "INRIA;X;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "France;United States" }, { "title": "Online POMDP Planning with Anytime Deterministic Guarantees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72407", "id": "Cupr2yTFSx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fc6bd0eef19459655d5b097af783661d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Cupr2yTFSx", "openreview": "https://openreview.net/forum?id=Cupr2yTFSx", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72407", "video": "https://nips.cc/virtual/2023/poster/72407", "author_site": "Moran Barenboim, Vadim Indelman", "tldr": "", "abstract": "Autonomous agents operating in real-world scenarios frequently encounter uncertainty and make decisions based on incomplete information. Planning under uncertainty can be mathematically formalized using partially observable Markov decision processes (POMDPs). However, finding an optimal plan for POMDPs can be computationally expensive and is feasible only for small tasks. In recent years, approximate algorithms, such as tree search and sample-based methodologies, have emerged as state-of-the-art POMDP solvers for larger problems. Despite their effectiveness, these algorithms offer only probabilistic and often asymptotic guarantees toward the optimal solution due to their dependence on sampling. To address these limitations, we derive a deterministic relationship between a simplified solution that is\neasier to obtain and the theoretically optimal one. First, we derive bounds for selecting a subset of the observations to branch from while computing a complete belief at each posterior node. Then, since a complete belief update may be computationally demanding, we extend the bounds to support reduction of both the state and the observation spaces. We demonstrate how our guarantees can be integrated with existing state-of-the-art solvers that sample a subset of states and observations. As a result, the returned solution holds deterministic bounds relative to the optimal policy. Lastly, we substantiate our findings with supporting experimental results.", "keywords": "POMDPs;Planning under uncertainty;Robotics", "primary_area": "", "supplementary_material": "/attachment/402f661f6875f980e7280326546fa98a8d2f29ee.pdf", "author": "Moran Barenboim;Vadim Indelman", "authorids": "~Moran_Barenboim1;~Vadim_Indelman1", "gender": "M;M", "homepage": ";https://vindelman.net.technion.ac.il/", "dblp": "274/9412;16/9967", "google_scholar": "pVnIUzMAAAAJ;https://scholar.google.co.il/citations?user=nY6P_nwAAAAJ", "orcid": ";0000-0002-1863-3442", "linkedin": "moran-barenboim-3b5675149/;", "or_profile": "~Moran_Barenboim1;~Vadim_Indelman1", "aff": "Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;Technion, Technion", "aff_domain": "campus.technion.ac.il;technion.ac.il", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nbarenboim2023online,\ntitle={Online {POMDP} Planning with Anytime Deterministic Guarantees},\nauthor={Moran Barenboim and Vadim Indelman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Cupr2yTFSx}\n}", "github": "", "project": "", "reviewers": "T4b6;3sFn;GBcp;JfUf;9qkM", "pdf_size": 523288, "rating": "5;6;6;6;6", "confidence": "1;4;3;3;2", "soundness": "2;4;2;2;3", "novelty": "3;3;2;3;3", "presentation": "2;3;2;1;3", "wc_summary": "28;164;93;210;64", "wc_strengths": "7;94;36;32;40", "wc_weaknesses": "5;331;233;405;65", "wc_questions": "1;126;62;730;2", "wc_limitations": "1;93;5;54;4", "wc_review": "42;808;429;1431;175", "wc_reply_reviewers": "0;116;65;208;0", "wc_reply_authors": "0;102;35;0;0", "reply_reviewers": "0;2;1;1;0", "reply_authors": "0;2;2;1;0", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 2.6, 1.019803902718557 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 111.8, 66.37589924061292 ], "wc_strengths_avg": [ 41.8, 28.526478927480692 ], "wc_weaknesses_avg": [ 207.8, 152.46035550266828 ], "wc_questions_avg": [ 184.2, 276.75866743428287 ], "wc_limitations_avg": [ 31.4, 36.543672502910816 ], "wc_review_avg": [ 577.0, 500.48176789969085 ], "wc_reply_reviewers_avg": [ 77.8, 78.33364538944936 ], "wc_reply_authors_avg": [ 27.4, 39.68677361539988 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.0, 0.8944271909999159 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7844645405527363, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7857016180513944838&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "campus.technion.ac.il;technion.ac.il", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Align Your Prompts: Test-Time Prompting with Distribution Alignment for Zero-Shot Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72406", "id": "CusNOTRkQw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fe8debfd5a36ada52e038c8b2078b2ce-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CusNOTRkQw", "openreview": "https://openreview.net/forum?id=CusNOTRkQw", "poster": "/media/PosterPDFs/NeurIPS%202023/72406.png?t=1701518650.4223518", "slides": "https://nips.cc/virtual/2023/poster/72406", "video": "https://nips.cc/virtual/2023/poster/72406", "author_site": "Jameel Abdul Samadh, Mohammad Hanan Gani, Noor Hussein, Muhammad Uzair Khattak, Muhammad Muzammal Naseer, Fahad Shahbaz Khan, Salman Khan", "tldr": "", "abstract": "The promising zero-shot generalization of vision-language models such as CLIP has led to their adoption using prompt learning for numerous downstream tasks. Previous works have shown test-time prompt tuning using entropy minimization to adapt text prompts for unseen domains. While effective, this overlooks the key cause for performance degradation to unseen domains -- distribution shift. In this work, we explicitly handle this problem by aligning the out-of-distribution (OOD) test sample statistics to those of the source data using prompt tuning. We use a single test sample to adapt multi-modal prompts at test time by minimizing the feature distribution shift to bridge the gap in the test domain. Evaluating against the domain generalization benchmark, our method improves zero-shot top-1 accuracy beyond existing prompt-learning techniques, with a 3.08% improvement over the baseline MaPLe. In cross-dataset generalization with unseen categories across 10 datasets, our method improves consistently across all datasets compared to the existing state-of-the-art. Our source code and models are available at [https://jameelhassan.github.io/promptalign](https://jameelhassan.github.io/promptalign)", "keywords": "Vision-Language models;Prompt Learning;Test-Time Adaptation", "primary_area": "", "supplementary_material": "", "author": "Jameel Hassan Abdul Samadh;Hanan Gani;Noor Hazim Hussein;Muhammad Uzair Khattak;Muzammal Naseer;Fahad Khan;Salman Khan", "authorids": "~Jameel_Hassan_Abdul_Samadh1;~Hanan_Gani1;~Noor_Hazim_Hussein1;~Muhammad_Uzair_Khattak1;~Muzammal_Naseer1;~Fahad_Khan1;~Salman_Khan4", "gender": "M;M;F;M;M;M;M", "homepage": "https://jameelhassan.github.io/;https://hananshafi.github.io/;;https://muzammal-naseer.com/;https://sites.google.com/view/fahadkhans/home;https://salman-h-khan.github.io/;https://muzairkhattak.github.io/", "dblp": ";;;;05/8618;32/11535-1;324/2256.html", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?user=XFugeQ4AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ch/citations?user=tM9xKA8AAAAJ;zvaeYnUAAAAJ;https://scholar.google.es/citations?user=M59O9lkAAAAJ;https://scholar.google.es/citations?user=M6fFL4gAAAAJ", "orcid": ";;;0000-0001-7663-7161;;0000-0002-9502-1749;", "linkedin": "jameelhassan;hanan-gani-269a73134/;noor-hussein-67566a183/;muzammalnaseer/;;;muhammad-uzair-khattak-204ba1150/", "or_profile": "~Jameel_Hassan_Abdul_Samadh1;~Hanan_Gani1;~Noor_Hazim_Hussein1;~Muzammal_Naseer1;~Fahad_Khan1;~Salman_Khan4;~Muhammd_Uzair_Khattak1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Link\u00f6ping University;Australian National University;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;liu.se;anu.edu.au;mbzuai.ac.ae", "position": "MS student;MS student;MS student;Researcher;Associate Professor;Lecturer;MS student", "bibtex": "@inproceedings{\nsamadh2023align,\ntitle={Align Your Prompts: Test-Time Prompting with Distribution Alignment for Zero-Shot Generalization},\nauthor={Jameel Hassan Abdul Samadh and Hanan Gani and Noor Hazim Hussein and Muhammad Uzair Khattak and Muzammal Naseer and Fahad Khan and Salman Khan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CusNOTRkQw}\n}", "github": "", "project": "", "reviewers": "ftF2;JL6L;H4M3;wdgw", "pdf_size": 10076885, "rating": "4;5;5;6", "confidence": "5;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "51;51;84;64", "wc_strengths": "29;78;38;63", "wc_weaknesses": "271;86;128;134", "wc_questions": "4;46;5;8", "wc_limitations": "5;15;1;1", "wc_review": "360;276;256;270", "wc_reply_reviewers": "70;34;36;21", "wc_reply_authors": "56;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 62.5, 13.5 ], "wc_strengths_avg": [ 52.0, 19.50640920313116 ], "wc_weaknesses_avg": [ 154.75, 69.61815495975169 ], "wc_questions_avg": [ 15.75, 17.52676524633111 ], "wc_limitations_avg": [ 5.5, 5.722761571129799 ], "wc_review_avg": [ 290.5, 40.77683165720456 ], "wc_reply_reviewers_avg": [ 40.25, 18.115946014492316 ], "wc_reply_authors_avg": [ 14.0, 24.24871130596428 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 74, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1240635744115671231&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;liu.se;anu.edu.au;mbzuai.ac.ae", "author_num": 7, "aff_unique_index": "0;0;0;0;1;2;0", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Link\u00f6ping University;Australian National University", "aff_unique_dep": ";;", "aff_unique_url": "https://mbzuai.ac.ae;https://www.liu.se;https://www.anu.edu.au", "aff_unique_abbr": "MBZUAI;LiU;ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;2;0", "aff_country_unique": "United Arab Emirates;Sweden;Australia" }, { "title": "Diffusion Probabilistic Models for Structured Node Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72405", "id": "CxUuCydMDU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/65d32185f73cbf4535449a792c63926f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CxUuCydMDU", "openreview": "https://openreview.net/forum?id=CxUuCydMDU", "poster": "/media/PosterPDFs/NeurIPS%202023/72405.png?t=1701759016.8507779", "slides": "https://nips.cc/virtual/2023/poster/72405", "video": "https://nips.cc/virtual/2023/poster/72405", "author_site": "Hyosoon Jang, Seonghyun Park, Sangwoo Mo, Sungsoo Ahn", "tldr": "", "abstract": "This paper studies structured node classification on graphs, where the predictions should consider dependencies between the node labels. In particular, we focus on solving the problem for partially labeled graphs where it is essential to incorporate the information in the known label for predicting the unknown labels. To address this issue, we propose a novel framework leveraging the diffusion probabilistic model for structured node classification (DPM-SNC). At the heart of our framework is the extraordinary capability of DPM-SNC to (a) learn a joint distribution over the labels with an expressive reverse diffusion process and (b) make predictions conditioned on the known labels utilizing manifold-constrained sampling. Since the DPMs lack training algorithms for partially labeled data, we design a novel training algorithm to apply DPMs, maximizing a new variational lower bound. We also theoretically analyze how DPMs benefit node classification by enhancing the expressive power of GNNs based on proposing AGG-WL, which is strictly more powerful than the classic 1-WL test. We extensively verify the superiority of our DPM-SNC in diverse scenarios, which include not only the transductive setting on partially labeled graphs but also the inductive setting and unlabeled graphs.", "keywords": "diffusion model;graph neural network;structured prediction;node classification", "primary_area": "", "supplementary_material": "/attachment/3e5991aff18094464b093ef632135c06821fb4d3.zip", "author": "Hyosoon Jang;Seonghyun Park;Sangwoo Mo;Sungsoo Ahn", "authorids": "~Hyosoon_Jang3;~Seonghyun_Park1;~Sangwoo_Mo1;~Sungsoo_Ahn1", "gender": "M;;M;M", "homepage": "https://hsjang0.github.io/hsjang/;;https://sites.google.com/view/sangwoomo;https://sungsooahn.super.site/", "dblp": "341/1374;;198/0432;90/5164", "google_scholar": "https://scholar.google.co.kr/citations?user=NOJd3GEAAAAJ;;https://scholar.google.co.kr/citations?user=Sq9y3NMAAAAJ;XTenHs0AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Hyosoon_Jang3;~Seonghyun_Park1;~Sangwoo_Mo1;~Sungsoo_Ahn1", "aff": "KNU;;KAIST;Pohang University of Science and Technology", "aff_domain": "kangwon.ac.kr;;kaist.ac.kr;postech.ac.kr", "position": "Undergrad student;;PhD student;Assistant Professor", "bibtex": "@inproceedings{\njang2023diffusion,\ntitle={Diffusion Probabilistic Models for Structured Node Classification},\nauthor={Hyosoon Jang and Seonghyun Park and Sangwoo Mo and Sungsoo Ahn},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CxUuCydMDU}\n}", "github": "", "project": "", "reviewers": "w4tc;kJkj;JeF8;rLqd;7A8R", "pdf_size": 2421663, "rating": "5;5;6;6;7", "confidence": "3;3;4;2;4", "soundness": "3;3;3;4;3", "novelty": "3;3;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "67;59;101;94;105", "wc_strengths": "28;65;65;81;98", "wc_weaknesses": "41;127;63;198;5", "wc_questions": "28;3;6;18;100", "wc_limitations": "5;8;9;9;10", "wc_review": "169;262;244;400;318", "wc_reply_reviewers": "0;0;11;26;11", "wc_reply_authors": "0;0;0;72;24", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;2;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.2, 18.637596411554792 ], "wc_strengths_avg": [ 67.4, 23.174123500145587 ], "wc_weaknesses_avg": [ 86.8, 68.31807959830253 ], "wc_questions_avg": [ 31.0, 35.63144678510823 ], "wc_limitations_avg": [ 8.2, 1.7204650534085253 ], "wc_review_avg": [ 278.6, 77.1689056550629 ], "wc_reply_reviewers_avg": [ 9.6, 9.562426470305537 ], "wc_reply_authors_avg": [ 19.2, 27.98856909525744 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.42857142857142855, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7250920019898731989&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "kangwon.ac.kr;;kaist.ac.kr;postech.ac.kr", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Kangnung National University;Korea Advanced Institute of Science and Technology;Pohang University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "http://www.knu.ac.kr;https://www.kaist.ac.kr;https://www.postech.ac.kr", "aff_unique_abbr": "KNU;KAIST;POSTECH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pohang", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Non-Asymptotic Analysis of a UCB-based Top Two Algorithm", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72404", "id": "CxjmYRP9Ji", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d9b564716709357b4bccec9fc9ad04d2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CxjmYRP9Ji", "openreview": "https://openreview.net/forum?id=CxjmYRP9Ji", "poster": "/media/PosterPDFs/NeurIPS%202023/72404.png?t=1696926284.6576324", "slides": "https://nips.cc/virtual/2023/poster/72404", "video": "https://nips.cc/virtual/2023/poster/72404", "author_site": "Marc Jourdan, R\u00e9my Degenne", "tldr": "", "abstract": "A Top Two sampling rule for bandit identification is a method which selects the next arm to sample from among two candidate arms, a *leader* and a *challenger*. Due to their simplicity and good empirical performance, they have received increased attention in recent years. However, for fixed-confidence best arm identification, theoretical guarantees for Top Two methods have only been obtained in the asymptotic regime, when the error level vanishes. In this paper, we derive the first non-asymptotic upper bound on the expected sample complexity of a Top Two algorithm, which holds for any error level. Our analysis highlights sufficient properties for a regret minimization algorithm to be used as leader. These properties are satisfied by the UCB algorithm, and our proposed UCB-based Top Two algorithm simultaneously enjoys non-asymptotic guarantees and competitive empirical performance.", "keywords": "multi-armed bandits;best-arm identification;Gaussian bandits;Top Two algorithm;fixed confidence;finite confidence", "primary_area": "", "supplementary_material": "/attachment/6cc13eef45ceb04bcbc27902c253b36ef80db40a.zip", "author": "Marc Jourdan;R\u00e9my Degenne", "authorids": "~Marc_Jourdan1;~R\u00e9my_Degenne1", "gender": "M;M", "homepage": "https://marcjourdan.github.io;https://remydegenne.github.io/", "dblp": "228/8157;157/1070", "google_scholar": "BOXGjhgAAAAJ;https://scholar.google.fr/citations?user=H-uIBOwAAAAJ", "orcid": "0000-0002-2449-4549;", "linkedin": "marc-jourdan/;", "or_profile": "~Marc_Jourdan1;~R\u00e9my_Degenne1", "aff": "INRIA;INRIA", "aff_domain": "inria.fr;inria.fr", "position": "PhD student;Researcher", "bibtex": "@inproceedings{\njourdan2023nonasymptotic,\ntitle={Non-Asymptotic Analysis of a {UCB}-based Top Two Algorithm},\nauthor={Marc Jourdan and R{\\'e}my Degenne},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CxjmYRP9Ji}\n}", "github": "", "project": "", "reviewers": "YhuH;nKF8;WPkC;4xLs", "pdf_size": 10516491, "rating": "5;7;7;8", "confidence": "4;3;3;3", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;4;4;4", "wc_summary": "59;34;108;159", "wc_strengths": "83;34;92;213", "wc_weaknesses": "143;14;135;136", "wc_questions": "125;28;258;83", "wc_limitations": "21;1;12;19", "wc_review": "431;111;605;610", "wc_reply_reviewers": "9;8;166;21", "wc_reply_authors": "0;0;120;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 90.0, 47.91137651956996 ], "wc_strengths_avg": [ 105.5, 65.8729838401146 ], "wc_weaknesses_avg": [ 107.0, 53.781967238099426 ], "wc_questions_avg": [ 123.5, 84.93085422860175 ], "wc_limitations_avg": [ 13.25, 7.8222439235810075 ], "wc_review_avg": [ 439.25, 202.75893938369276 ], "wc_reply_reviewers_avg": [ 51.0, 66.59204156654157 ], "wc_reply_authors_avg": [ 30.0, 51.96152422706632 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16159146582491255270&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 10, "email": "inria.fr;inria.fr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Neural Circuits for Fast Poisson Compressed Sensing in the Olfactory Bulb", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72403", "id": "Cxn1FpnNvG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cc8638553a347b1834d98be7613fa3f0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Cxn1FpnNvG", "openreview": "https://openreview.net/forum?id=Cxn1FpnNvG", "poster": "/media/PosterPDFs/NeurIPS%202023/72403.png?t=1702047354.8345346", "slides": "https://nips.cc/virtual/2023/poster/72403", "video": "https://nips.cc/virtual/2023/poster/72403", "author_site": "Jacob Zavatone-Veth, Paul Masset, William Tong, Joseph D. Zak, Venkatesh Murthy, Cengiz Pehlevan", "tldr": "", "abstract": "Within a single sniff, the mammalian olfactory system can decode the identity and concentration of odorants wafted on turbulent plumes of air. Yet, it must do so given access only to the noisy, dimensionally-reduced representation of the odor world provided by olfactory receptor neurons. As a result, the olfactory system must solve a compressed sensing problem, relying on the fact that only a handful of the millions of possible odorants are present in a given scene. Inspired by this principle, past works have proposed normative compressed sensing models for olfactory decoding. However, these models have not captured the unique anatomy and physiology of the olfactory bulb, nor have they shown that sensing can be achieved within the 100-millisecond timescale of a single sniff. Here, we propose a rate-based Poisson compressed sensing circuit model for the olfactory bulb. This model maps onto the neuron classes of the olfactory bulb, and recapitulates salient features of their connectivity and physiology. For circuit sizes comparable to the human olfactory bulb, we show that this model can accurately detect tens of odors within the timescale of a single sniff. We also show that this model can perform Bayesian posterior sampling for accurate uncertainty estimation. Fast inference is possible only if the geometry of the neural code is chosen to match receptor properties, yielding a distributed neural code that is not axis-aligned to individual odor identities. Our results illustrate how normative modeling can help us map function onto specific neural circuits to generate new hypotheses.", "keywords": "Olfaction;Bayesian inference;neural circuits;normative models;population geometry", "primary_area": "", "supplementary_material": "", "author": "Jacob A Zavatone-Veth;Paul Masset;William Lingxiao Tong;Joseph Zak;Venkatesh N Murthy;Cengiz Pehlevan", "authorids": "~Jacob_A_Zavatone-Veth1;~Paul_Masset1;~William_Lingxiao_Tong1;~Joseph_Zak1;~Venkatesh_N_Murthy1;~Cengiz_Pehlevan2", "gender": "M;M;;M;M;", "homepage": "https://jzv.io;https://scholar.harvard.edu/paul-masset;;https://zak-lab.org;http://vnmurthylab.org;https://pehlevan.seas.harvard.edu/", "dblp": "270/9915;158/2619;;;75/7277.html;145/3480", "google_scholar": "i_HogJkAAAAJ;Mi1NbLkAAAAJ;;https://scholar.google.com/citations?hl=en;vSeHOe4AAAAJ;veDLTPEAAAAJ", "orcid": "0000-0002-4060-1738;0000-0003-2001-7515;;0000-0002-3148-5325;0000-0003-2443-4252;0000-0001-9767-6063", "linkedin": ";;;;;", "or_profile": "~Jacob_A_Zavatone-Veth1;~Paul_Masset1;~William_Lingxiao_Tong1;~Joseph_Zak1;~Venkatesh_N_Murthy1;~Cengiz_Pehlevan2", "aff": "Harvard University;Harvard University;;University of Illinois at Chicago;Harvard University;School of Engineering and Applied Sciences, Harvard University", "aff_domain": "harvard.edu;harvard.edu;;uic.edu;fas.harvard.edu;seas.harvard.edu", "position": "PhD student;Postdoc;;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzavatone-veth2023neural,\ntitle={Neural Circuits for Fast Poisson Compressed Sensing in the Olfactory Bulb},\nauthor={Jacob A Zavatone-Veth and Paul Masset and William Lingxiao Tong and Joseph Zak and Venkatesh N Murthy and Cengiz Pehlevan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Cxn1FpnNvG}\n}", "github": "", "project": "", "reviewers": "DAo3;Qhvj;xx4u;cVJu;Wphi", "pdf_size": 12109663, "rating": "5;6;6;7;7", "confidence": "3;4;1;4;3", "soundness": "3;4;3;3;4", "novelty": "3;3;2;2;3", "presentation": "4;4;4;3;4", "wc_summary": "38;62;30;67;76", "wc_strengths": "50;52;50;100;130", "wc_weaknesses": "102;145;49;70;67", "wc_questions": "515;215;9;144;60", "wc_limitations": "1;13;5;20;11", "wc_review": "706;487;143;401;344", "wc_reply_reviewers": "114;370;0;14;27", "wc_reply_authors": "218;740;0;0;0", "reply_reviewers": "1;3;0;1;1", "reply_authors": "2;3;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_avg": [ 54.6, 17.590906741836815 ], "wc_strengths_avg": [ 76.4, 32.921725349683605 ], "wc_weaknesses_avg": [ 86.6, 33.82661673889365 ], "wc_questions_avg": [ 188.6, 177.77356383894653 ], "wc_limitations_avg": [ 10.0, 6.572670690061994 ], "wc_review_avg": [ 416.2, 183.89061966288546 ], "wc_reply_reviewers_avg": [ 105.0, 138.3444975414635 ], "wc_reply_authors_avg": [ 191.6, 286.904583441952 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.2439750182371333, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4803189471265018700&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 11, "email": "harvard.edu;harvard.edu;;uic.edu;fas.harvard.edu;seas.harvard.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Harvard University;University of Illinois at Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.uic.edu", "aff_unique_abbr": "Harvard;UIC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Chicago;Cambridge", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SALSA VERDE: a machine learning attack on LWE with sparse small secrets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72402", "id": "CxzCoFDeQf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a75db7d2ee1e4bee8fb819979b0a6cad-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CxzCoFDeQf", "openreview": "https://openreview.net/forum?id=CxzCoFDeQf", "poster": "/media/PosterPDFs/NeurIPS%202023/72402.png?t=1700668947.1417553", "slides": "https://nips.cc/virtual/2023/poster/72402", "video": "https://nips.cc/virtual/2023/poster/72402", "author_site": "Cathy Li, Emily Wenger, Zeyuan Allen-Zhu, Francois Charton, Kristin E. Lauter", "tldr": "", "abstract": "Learning with Errors (LWE) is a hard math problem used in post-quantum cryptography. Homomorphic Encryption (HE) schemes rely on the hardness of the LWE problem for their security, and two LWE-based cryptosystems were recently standardized by NIST for digital signatures and key exchange (KEM). Thus, it is critical to continue assessing the security of LWE and specific parameter choices. For example, HE uses secrets with small entries, and the HE community has considered standardizing small sparse secrets to improve efficiency and functionality. However, prior work, SALSA and PICANTE, showed that ML attacks can recover sparse binary secrets. Building on these, we propose VERDE, an improved ML attack that can recover sparse binary, ternary, and narrow Gaussian secrets. Using improved preprocessing and secret recovery techniques, VERDE can attack LWE with larger dimensions ($n=512$) and smaller moduli ($\\log_2 q=12$ for $n=256$), using less time and power. We propose novel architectures for scaling. Finally, we develop a theory that explains the success of ML LWE attacks.", "keywords": "machine learning;cryptography;cryptanalysis", "primary_area": "", "supplementary_material": "/attachment/114b18885301dcbb97bb3d2c2808a2fbfa8fee34.zip", "author": "Cathy Yuanchen Li;Emily Wenger;Zeyuan Allen-Zhu;Francois Charton;Kristin E. Lauter", "authorids": "~Cathy_Yuanchen_Li2;~Emily_Wenger1;~Zeyuan_Allen-Zhu1;~Francois_Charton1;~Kristin_E._Lauter1", "gender": "F;;;M;F", "homepage": "https://cathyliyuanchen.github.io/;https://www.emilywenger.com/;;;https://ai.facebook.com/people/kristin-lauter/", "dblp": ";259/1518;;255/5318;08/1510", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;_xYN0z0AAAAJ;;;TmuUs30AAAAJ", "orcid": ";;;;", "linkedin": "cathy-li-0/;;;fran%C3%A7ois-charton-214187120/;kristin-lauter-6508a410/?fbclid=IwAR08l11qSLSgripTtUitzaGOeqECvw-A0xBhz3ZeqSGVZL28xbq6AO_jBgw", "or_profile": "~Cathy_Yuanchen_Li2;~Emily_Wenger1;~Zeyuan_Allen-Zhu1;~Francois_Charton1;~Kristin_E._Lauter1", "aff": "Meta;University of Chicago;;Meta Facebook;Meta Facebook", "aff_domain": "meta.com;uchicago.edu;;fb.com;fb.com", "position": "Intern;PhD student;;Research Engineer;Principal Researcher", "bibtex": "@inproceedings{\nli2023salsa,\ntitle={{SALSA} {VERDE}: a machine learning attack on {LWE} with sparse small secrets},\nauthor={Cathy Yuanchen Li and Emily Wenger and Zeyuan Allen-Zhu and Francois Charton and Kristin E. Lauter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CxzCoFDeQf}\n}", "github": "", "project": "", "reviewers": "wmYQ;XtRr;heZ1;j97t;StdV", "pdf_size": 771487, "rating": "4;5;6;7;7", "confidence": "4;2;2;3;3", "soundness": "3;3;3;3;4", "novelty": "3;2;3;3;3", "presentation": "4;3;3;2;4", "wc_summary": "209;44;40;194;98", "wc_strengths": "152;66;60;58;153", "wc_weaknesses": "438;184;71;34;129", "wc_questions": "270;6;6;87;142", "wc_limitations": "77;48;6;23;48", "wc_review": "1146;348;183;396;570", "wc_reply_reviewers": "0;0;0;0;125", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 117.0, 72.12766459549346 ], "wc_strengths_avg": [ 97.8, 44.74103262107391 ], "wc_weaknesses_avg": [ 171.2, 142.82212713721916 ], "wc_questions_avg": [ 102.2, 98.46908144184144 ], "wc_limitations_avg": [ 40.4, 24.253659517689286 ], "wc_review_avg": [ 528.6, 332.4259917635803 ], "wc_reply_reviewers_avg": [ 25.0, 50.0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2750095491084634, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4006787270562478216&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "meta.com;uchicago.edu;;fb.com;fb.com", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Meta;University of Chicago", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.uchicago.edu", "aff_unique_abbr": "Meta;UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Neural Priming for Sample-Efficient Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72401", "id": "Cyn1PvuZsB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cea5bc68b890bffb10f18aaaab2becb1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Cyn1PvuZsB", "openreview": "https://openreview.net/forum?id=Cyn1PvuZsB", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72401", "video": "https://nips.cc/virtual/2023/poster/72401", "author_site": "Matthew Wallingford, Vivek Ramanujan, Alex Fang, Aditya Kusupati, Roozbeh Mottaghi, Aniruddha Kembhavi, Ludwig Schmidt, Ali Farhadi", "tldr": "", "abstract": "We propose Neural Priming, a technique for adapting large pretrained models to distribution shifts and downstream tasks given few or no labeled examples. Presented with class names or unlabeled test samples, Neural Priming enables the model to recall and conditions its parameters on relevant data seen throughout pretraining, thereby priming it for the test distribution. Neural Priming can be performed at test time in even for pretraining datasets as large as LAION-2B. Performing lightweight updates on the recalled data significantly improves accuracy across a variety of distribution shift and transfer learning benchmarks. Concretely, in the zero-shot setting, we see a 2.45% improvement in accuracy on ImageNet and 3.81% accuracy improvement on average across standard transfer learning benchmarks. Further, using our test time inference scheme, we see a 1.41% accuracy improvement on ImageNetV2. These results demonstrate the effectiveness of Neural Priming in addressing the common challenge of limited labeled data and changing distributions. Code and models are open-sourced at [https://www.github.com/RAIVNLab/neural-priming](https://www.github.com/RAIVNLab/neural-priming).", "keywords": "Transfer Learning;Distribution Shift;Test-Time Training", "primary_area": "", "supplementary_material": "", "author": "Matthew Wallingford;Vivek Ramanujan;Alex Fang;Aditya Kusupati;Roozbeh Mottaghi;Aniruddha Kembhavi;Ludwig Schmidt;Ali Farhadi", "authorids": "~Matthew_Wallingford1;~Vivek_Ramanujan1;~Alex_Fang1;~Aditya_Kusupati1;~Roozbeh_Mottaghi1;~Aniruddha_Kembhavi1;~Ludwig_Schmidt1;~Ali_Farhadi3", "gender": "M;;M;;M;M;M;M", "homepage": "https://vkramanuj.github.io;;http://www.adityakusupati.com/;http://roozbehm.info;https://anikem.github.io/;http://people.csail.mit.edu/ludwigs/;https://homes.cs.washington.edu/~ali/;https://mattwallingford.github.io/", "dblp": "225/4845;260/0449;231/7662;36/633;81/7583;141/2720;37/5826;263/1795", "google_scholar": "yXFPyNMAAAAJ;;https://scholar.google.co.in/citations?user=qULx8g8AAAAJ;CCV58dgAAAAJ;JnUevM0AAAAJ;SWMKy70AAAAJ;jeOFRDsAAAAJ;", "orcid": ";;0000-0001-8455-1851;;;;;", "linkedin": ";alex-fang-8a11a8115/;adityakusupati/;roozbeh-mottaghi-63397aa0;;ludwig-schmidt-87ba3612/;;", "or_profile": "~Vivek_Ramanujan1;~Alex_Fang1;~Aditya_Kusupati1;~Roozbeh_Mottaghi1;~Aniruddha_Kembhavi1;~Ludwig_Schmidt1;~Ali_Farhadi3;~Matthew_C_Wallingford2", "aff": "Apple;Department of Computer Science, University of Washington;Department of Computer Science, University of Washington;University of Washington;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;University of Washington;University of Washington", "aff_domain": "apple.com;cs.washington.edu;cs.washington.edu;cs.washington.edu;allenai.org;allenai.org;cs.uw.edu;washington.edu", "position": "Intern;PhD student;PhD student;Affiliate Professor ;Research Manager;Researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\nwallingford2023neural,\ntitle={Neural Priming for Sample-Efficient Adaptation},\nauthor={Matthew Wallingford and Vivek Ramanujan and Alex Fang and Aditya Kusupati and Roozbeh Mottaghi and Aniruddha Kembhavi and Ludwig Schmidt and Ali Farhadi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Cyn1PvuZsB}\n}", "github": "", "project": "", "reviewers": "89m2;xxj2;AXQG;Ekvm", "pdf_size": 1900859, "rating": "4;6;7;7", "confidence": "5;4;4;4", "soundness": "3;3;4;3", "novelty": "2;3;3;2", "presentation": "4;3;3;3", "wc_summary": "185;96;118;48", "wc_strengths": "76;73;127;71", "wc_weaknesses": "990;276;234;92", "wc_questions": "363;40;33;63", "wc_limitations": "37;7;31;23", "wc_review": "1651;492;543;297", "wc_reply_reviewers": "0;45;50;11", "wc_reply_authors": "472;25;36;21", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 111.75, 49.286788290575394 ], "wc_strengths_avg": [ 86.75, 23.306383245797704 ], "wc_weaknesses_avg": [ 398.0, 348.52546535368117 ], "wc_questions_avg": [ 124.75, 138.00067934615396 ], "wc_limitations_avg": [ 24.5, 11.258330249197702 ], "wc_review_avg": [ 745.75, 530.6483652099571 ], "wc_reply_reviewers_avg": [ 26.5, 21.43011899173684 ], "wc_reply_authors_avg": [ 138.5, 192.62463497694162 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.9428090415820632, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5464532979334107241&as_sdt=5,30&sciodt=0,30&hl=en", "gs_version_total": 8, "email": "apple.com;cs.washington.edu;cs.washington.edu;cs.washington.edu;allenai.org;allenai.org;cs.uw.edu;washington.edu", "author_num": 8, "aff_unique_index": "0;1;1;1;2;2;1;1", "aff_unique_norm": "Apple;University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": "Apple Inc.;Department of Computer Science;", "aff_unique_url": "https://www.apple.com;https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "Apple;UW;AI2", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "CyuhEwvSof", "title": "Brain development dictates energy constraints on neural architecture search: cross-disciplinary insights on optimization strategies", "track": "main", "status": "Reject", "tldr": "", "abstract": "Today\u2019s artificial neural architecture search (NAS) strategies are essentially prediction-error-optimized. That principle also holds true for AI functions in general. From the developmental neuroscience perspective, I present evidence for the central role of energy-cost-, rather than prediction-error-, optimized neural architecture search (NAS). Supporting evidence is drawn from the latest insights into the glial-neural organization of the human brain and the dynamic coordination theory which provides a mathematical foundation for the functional expression of this optimization strategy. This is relevant to devising novel NAS strategies in AI, especially in AGI. Additional implications arise for causal reasoning from deep neural nets. Together, these insights from developmental neuroscience offer a new perspective on NAS and the foundational assumptions in AI modeling.", "keywords": "NAS;brain development;glial-neural networks;metabolic optimization;dynamic coordination", "primary_area": "", "supplementary_material": "", "author": "Martin G Frasch", "authorids": "~Martin_G_Frasch1", "gender": "M", "homepage": "https://fraschlab.org", "dblp": "226/0681.html", "google_scholar": "https://scholar.google.ca/citations?user=3lacmuYAAAAJ", "orcid": "0000-0003-3159-6321", "linkedin": "https://linkedin.com/in/mfrasch", "or_profile": "~Martin_G_Frasch1", "aff": "University of Washington", "aff_domain": "u.washington.edu", "position": "Principal Researcher", "bibtex": "@misc{\nfrasch2023brain,\ntitle={Brain development dictates energy constraints on neural architecture search: cross-disciplinary insights on optimization strategies},\nauthor={Martin G Frasch},\nyear={2023},\nurl={https://openreview.net/forum?id=CyuhEwvSof}\n}", "github": "", "project": "", "reviewers": "t3j3;kaEY;i8bJ", "site": "https://openreview.net/forum?id=CyuhEwvSof", "pdf_size": 582396, "rating": "2;2;3", "confidence": "3;3;3", "soundness": "1;1;2", "novelty": "1;1;2", "presentation": "1;1;2", "wc_summary": "26;103;45", "wc_strengths": "14;72;15", "wc_weaknesses": "96;368;87", "wc_questions": "19;125;83", "wc_limitations": "15;52;1", "wc_review": "170;720;231", "wc_reply_reviewers": "63;0;35", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 2.3333333333333335, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 1.3333333333333333, 0.4714045207910317 ], "novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "presentation_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_avg": [ 58.0, 32.751590292177674 ], "wc_strengths_avg": [ 33.666666666666664, 27.10883414846328 ], "wc_weaknesses_avg": [ 183.66666666666666, 130.39512601661494 ], "wc_questions_avg": [ 75.66666666666667, 43.583891009816405 ], "wc_limitations_avg": [ 22.666666666666668, 21.51485275080657 ], "wc_review_avg": [ 373.6666666666667, 246.15758277078436 ], "wc_reply_reviewers_avg": [ 32.666666666666664, 25.772509040103607 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Y6q995Jit3oJ:scholar.google.com/&scioq=Brain+development+dictates+energy+constraints+on+neural+architecture+search:+cross-disciplinary+insights+on+optimization+strategies&hl=en&as_sdt=0,44", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Exploring and Interacting with the Set of Good Sparse Generalized Additive Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72400", "id": "CzAAbKOHQW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b1719f44953c2e0754a016ab267fe4e7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CzAAbKOHQW", "openreview": "https://openreview.net/forum?id=CzAAbKOHQW", "poster": "/media/PosterPDFs/NeurIPS%202023/72400.png?t=1701832968.6144443", "slides": "https://nips.cc/virtual/2023/poster/72400", "video": "https://nips.cc/virtual/2023/poster/72400", "author_site": "Chudi Zhong, Zhi Chen, Jiachang Liu, Margo Seltzer, Cynthia Rudin", "tldr": "", "abstract": "In real applications, interaction between machine learning models and domain experts is critical; however, the classical machine learning paradigm that usually produces only a single model does not facilitate such interaction. Approximating and exploring the Rashomon set, i.e., the set of all near-optimal models, addresses this practical challenge by providing the user with a searchable space containing a diverse set of models from which domain experts can choose. We present algorithms to efficiently and accurately approximate the Rashomon set of sparse, generalized additive models with ellipsoids for fixed support sets and use these ellipsoids to approximate Rashomon sets for many different support sets. The approximated Rashomon set serves as a cornerstone to solve practical challenges such as (1) studying the variable importance for the model class; (2) finding models under user-specified constraints (monotonicity, direct editing); and (3) investigating sudden changes in the shape functions. Experiments demonstrate the fidelity of the approximated Rashomon set and its effectiveness in solving practical challenges.", "keywords": "Interpretability;human-model interaction;generalized additive model;Rashomon set", "primary_area": "", "supplementary_material": "/attachment/813802c51dcb0514a8c62ce166f4fb2874cd5b2f.zip", "author": "Chudi Zhong;Zhi Chen;Jiachang Liu;Margo Seltzer;Cynthia Rudin", "authorids": "~Chudi_Zhong1;~Zhi_Chen6;~Jiachang_Liu1;~Margo_Seltzer1;~Cynthia_Rudin1", "gender": "F;M;M;;", "homepage": "https://chudizhong.github.io/;https://users.cs.duke.edu/~zhichen/;https://jiachangliu.github.io/;;", "dblp": "267/5474;05/1539-9;194/1565-1;;", "google_scholar": "DXKNTLIAAAAJ;ucUuuJ0AAAAJ;W_Zyr0AAAAAJ;;", "orcid": ";0000-0003-1993-5749;;;", "linkedin": ";;;;", "or_profile": "~Chudi_Zhong1;~Zhi_Chen6;~Jiachang_Liu1;~Margo_Seltzer1;~Cynthia_Rudin1", "aff": "Duke University;Duke University;Duke University;;", "aff_domain": "duke.edu;duke.edu;duke.edu;;", "position": "PhD student;PhD student;PhD student;;", "bibtex": "@inproceedings{\nzhong2023exploring,\ntitle={Exploring and Interacting with the Set of Good Sparse Generalized Additive Models},\nauthor={Chudi Zhong and Zhi Chen and Jiachang Liu and Margo Seltzer and Cynthia Rudin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CzAAbKOHQW}\n}", "github": "", "project": "", "reviewers": "8bE1;SBSW;r4Ee", "pdf_size": 6776758, "rating": "5;6;7", "confidence": "4;2;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "51;73;93", "wc_strengths": "28;35;12", "wc_weaknesses": "69;64;14", "wc_questions": "26;28;50", "wc_limitations": "1;61;1", "wc_review": "175;261;170", "wc_reply_reviewers": "0;70;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.33333333333333, 17.15290710702481 ], "wc_strengths_avg": [ 25.0, 9.626352718795768 ], "wc_weaknesses_avg": [ 49.0, 24.8327740429189 ], "wc_questions_avg": [ 34.666666666666664, 10.873004286866726 ], "wc_limitations_avg": [ 21.0, 28.284271247461902 ], "wc_review_avg": [ 202.0, 41.769207158703246 ], "wc_reply_reviewers_avg": [ 23.333333333333332, 32.99831645537222 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14936969087041827853&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "duke.edu;duke.edu;duke.edu;;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Coneheads: Hierarchy Aware Attention", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72399", "id": "CzAFnfwbGd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a17251f8d595179eef5e466b1f5f7a85-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CzAFnfwbGd", "openreview": "https://openreview.net/forum?id=CzAFnfwbGd", "poster": "/media/PosterPDFs/NeurIPS%202023/72399.png?t=1701210101.0317354", "slides": "https://nips.cc/virtual/2023/poster/72399", "video": "https://nips.cc/virtual/2023/poster/72399", "author_site": "Albert Tseng, Tao Yu, Toni Liu, Christopher De Sa", "tldr": "", "abstract": "Attention networks such as transformers have achieved state-of-the-art performance in many domains. \nThese networks rely heavily on the dot product attention operator, which computes the similarity between two points by taking their inner product.\nHowever, the inner product does not explicitly model the complex structural properties of real world datasets, such as hierarchies between data points.\nTo remedy this, we introduce cone attention, a drop-in replacement for dot product attention based on hyperbolic entailment cones.\nCone attention associates two points by the depth of their lowest common ancestor in a hierarchy defined by hyperbolic cones, which intuitively measures the divergence of two points and gives a $\\textit{hierarchy aware}$ similarity score.\nWe test cone attention on a wide variety of models and tasks and show that it improves task-level performance over dot product attention and other baselines, and is able to match dot-product attention with significantly fewer parameters.\nOur results suggest that cone attention is an effective way to capture hierarchical relationships when calculating attention.", "keywords": "Hyperbolic Entailment Cones;Hyperbolic Space;Entailment Cones;Attention;Dot Product;Hierarchy;Transformers", "primary_area": "", "supplementary_material": "/attachment/c3b2ee35fd7e364514f9bafa2d4195cd66ebc65c.pdf", "author": "Albert Tseng;Tao Yu;Toni J.B. Liu;Christopher De Sa", "authorids": "~Albert_Tseng1;~Tao_Yu1;~Toni_J.B._Liu1;~Christopher_De_Sa2", "gender": ";M;M;M", "homepage": "https://tsengalb99.github.io/;https://ydtydr.github.io/;;http://cs.cornell.edu/~cdesa", "dblp": "249/9439;;348/5536;154/6336", "google_scholar": ";lbi95bUAAAAJ;;", "orcid": ";;0009-0001-3142-5402;", "linkedin": ";tao-yu-220720182/;toni-jb-liu;", "or_profile": "~Albert_Tseng1;~Tao_Yu1;~Toni_J.B._Liu1;~Christopher_De_Sa1", "aff": "Cornell University;Cornell University;Cornell University;Cornell University", "aff_domain": "cs.cornell.edu;cornell.edu;cornell.edu;cornell.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ntseng2023coneheads,\ntitle={Coneheads: Hierarchy Aware Attention},\nauthor={Albert Tseng and Tao Yu and Toni J.B. Liu and Christopher De Sa},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CzAFnfwbGd}\n}", "github": "", "project": "", "reviewers": "F7eB;AaRb;Rx82;hcYy", "pdf_size": 2445741, "rating": "6;6;6;7", "confidence": "2;5;3;3", "soundness": "3;3;3;3", "novelty": "3;3;4;3", "presentation": "3;3;2;3", "wc_summary": "64;89;27;127", "wc_strengths": "67;60;62;77", "wc_weaknesses": "69;153;191;115", "wc_questions": "21;61;380;12", "wc_limitations": "1;19;4;4", "wc_review": "222;382;664;335", "wc_reply_reviewers": "91;214;222;16", "wc_reply_authors": "56;381;335;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 76.75, 36.44430682562093 ], "wc_strengths_avg": [ 66.5, 6.576473218982953 ], "wc_weaknesses_avg": [ 132.0, 45.221676218380054 ], "wc_questions_avg": [ 118.5, 152.09947402933383 ], "wc_limitations_avg": [ 7.0, 7.035623639735144 ], "wc_review_avg": [ 400.75, 162.73194984390742 ], "wc_reply_reviewers_avg": [ 135.75, 86.46494954604438 ], "wc_reply_authors_avg": [ 193.0, 166.97754340030278 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8632736620774978933&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cs.cornell.edu;cornell.edu;cornell.edu;cornell.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Optimal cross-learning for contextual bandits with unknown context distributions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72398", "id": "CzkOzKWpMa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a3017a8d202a433be56a3dfdcac6c8eb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=CzkOzKWpMa", "openreview": "https://openreview.net/forum?id=CzkOzKWpMa", "poster": "/media/PosterPDFs/NeurIPS%202023/72398.png?t=1702310496.506178", "slides": "https://nips.cc/virtual/2023/poster/72398", "video": "https://nips.cc/virtual/2023/poster/72398", "author_site": "Jon Schneider, Julian Zimmert", "tldr": "", "abstract": "We consider the problem of designing contextual bandit algorithms in the ``cross-learning'' setting of Balseiro et al., where the learner observes the loss for the action they play in all possible contexts, not just the context of the current round. We specifically consider the setting where losses are chosen adversarially and contexts are sampled i.i.d. from an unknown distribution. In this setting, we resolve an open problem of Balseiro et al. by providing an efficient algorithm with a nearly tight (up to logarithmic factors) regret bound of $\\widetilde{O}(\\sqrt{TK})$, independent of the number of contexts. As a consequence, we obtain the first nearly tight regret bounds for the problems of learning to bid in first-price auctions (under unknown value distributions) and sleeping bandits with a stochastic action set.\n\nAt the core of our algorithm is a novel technique for coordinating the execution of a learning algorithm over multiple epochs in such a way to remove correlations between estimation of the unknown distribution and the actions played by the algorithm. This technique may be of independent interest for other learning problems involving estimation of an unknown context distribution.", "keywords": "bandits;first-price auction;sleeping bandits;contextual bandits", "primary_area": "", "supplementary_material": "/attachment/bd768ac1af3720b163c2dcbed3d6d7508677c64c.pdf", "author": "Jon Schneider;Julian Zimmert", "authorids": "~Jon_Schneider1;~Julian_Zimmert1", "gender": "M;", "homepage": "https://jschnei.github.io;", "dblp": "146/0503;190/7636", "google_scholar": "Jc97EyAAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Jon_Schneider1;~Julian_Zimmert1", "aff": "Google;Google", "aff_domain": "google.com;google.com", "position": "Researcher;Postdoc", "bibtex": "@inproceedings{\nschneider2023optimal,\ntitle={Optimal cross-learning for contextual bandits with unknown context distributions},\nauthor={Jon Schneider and Julian Zimmert},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=CzkOzKWpMa}\n}", "github": "", "project": "", "reviewers": "Eqpa;JZMx;4dhq;Z4YZ;WAx4", "pdf_size": 357862, "rating": "5;6;6;6;7", "confidence": "3;2;3;2;4", "soundness": "3;2;2;3;3", "novelty": "3;3;3;2;3", "presentation": "3;2;3;3;2", "wc_summary": "66;15;91;241;366", "wc_strengths": "39;24;43;57;44", "wc_weaknesses": "79;70;30;104;174", "wc_questions": "111;62;99;12;1", "wc_limitations": "13;6;17;10;1", "wc_review": "308;177;280;424;586", "wc_reply_reviewers": "0;23;84;5;0", "wc_reply_authors": "0;0;153;60;133", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;1;2;2;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 155.8, 129.2523113913248 ], "wc_strengths_avg": [ 41.4, 10.594338110519221 ], "wc_weaknesses_avg": [ 91.4, 47.67221412940666 ], "wc_questions_avg": [ 57.0, 44.42071588797281 ], "wc_limitations_avg": [ 9.4, 5.535341001239218 ], "wc_review_avg": [ 355.0, 139.74262055650738 ], "wc_reply_reviewers_avg": [ 22.4, 31.941195970094796 ], "wc_reply_authors_avg": [ 69.2, 64.42794424781843 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.42257712736425823, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11692728202876658648&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 8, "email": "google.com;google.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Selectivity Drives Productivity: Efficient Dataset Pruning for Enhanced Transfer Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72397", "id": "D0MII7rP3R", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/749252feedd44f7f10d47ec1d674a2f8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=D0MII7rP3R", "openreview": "https://openreview.net/forum?id=D0MII7rP3R", "poster": "/media/PosterPDFs/NeurIPS%202023/72397.png?t=1701964923.4604945", "slides": "https://nips.cc/virtual/2023/poster/72397", "video": "https://nips.cc/virtual/2023/poster/72397", "author_site": "Yihua Zhang, Yimeng Zhang, Aochuan Chen, jinghan jia, Jiancheng Liu, Gaowen Liu, Mingyi Hong, Shiyu Chang, Sijia Liu", "tldr": "", "abstract": "Massive data is often considered essential for deep learning applications, but it also incurs significant computational and infrastructural costs. Therefore, dataset pruning (DP) has emerged as an effective way to improve data efficiency by identifying and removing redundant training samples without sacrificing performance. In this work, we aim to address the problem of DP for transfer learning, i.e., how to prune a source dataset for improved pretraining efficiency and lossless finetuning accuracy on downstream target tasks. To our best knowledge, the problem of DP for transfer learning remains open, as previous studies have primarily addressed DP and transfer learning as separate problems. By contrast, we establish a unified viewpoint to integrate DP with transfer learning and find that existing DP methods are not suitable for the transfer learning paradigm. We then propose two new DP methods, label mapping and feature mapping, for supervised and self-supervised pretraining settings respectively, by revisiting the DP problem through the lens of source-target domain mapping. Furthermore, we demonstrate the effectiveness of our approach on numerous transfer learning tasks. We show that source data classes can be pruned by up to $40\\%\\sim 80\\%$ without sacrificing the downstream performance, resulting in a significant $2\\sim 5\\times$ speed-up during the pretraining stage. Besides, our proposal exhibits broad applicability and can improve other computationally intensive transfer learning techniques, such as adversarial pretraining.", "keywords": "Dataset pruning;transfer learning", "primary_area": "", "supplementary_material": "/attachment/1bbc6efe7772a6b778e108d0add4811cb9a7b4cd.zip", "author": "Yihua Zhang;Yimeng Zhang;Aochuan Chen;Jinghan Jia;Jiancheng Liu;Gaowen Liu;Mingyi Hong;Shiyu Chang;Sijia Liu", "authorids": "~Yihua_Zhang1;~Yimeng_Zhang2;~Aochuan_Chen1;~Jinghan_Jia1;~Jiancheng_Liu2;~Gaowen_Liu4;~Mingyi_Hong1;~Shiyu_Chang2;~Sijia_Liu1", "gender": "M;M;M;M;M;F;M;Unspecified;M", "homepage": "https://yihua-zhang.com;https://damon-demon.github.io;https://scholar.google.com/citations?hl=en&view_op=list_works&gmla=AJsN-F6N4cEX-_kViGgRpnUVo_iBHlVXwMpnhlyB-Cdrndwj6B0jaDy088r7K9gHPGqSwsQ9tNxpijGpb1IoIB2B5KVS3Scvtdz9Mt_WR9GSou_saurFpSA&user=7pY-Ie8AAAAJ;https://jinghanjia.netlify.app/;https://ljcc0930.github.io/;;http://people.ece.umn.edu/~mhong/mingyi.html;http://people.csail.mit.edu/chang87/;https://lsjxjtu.github.io/", "dblp": ";;331/2356;286/5392;74/3002;136/1007;57/8053;28/9988;128/6972-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;bqP_zxYAAAAJ;ReWNzl4AAAAJ;NIv_aeQAAAAJ;qRnP-p0AAAAJ;r21asW4AAAAJ;C7dO_UgAAAAJ", "orcid": ";0000-0003-1608-2541;0009-0002-2300-1498;;;0009-0000-9194-1233;;;", "linkedin": "zhangyihua/;;;jinghan-jia-5194451ba/;;;;;", "or_profile": "~Yihua_Zhang1;~Yimeng_Zhang2;~Aochuan_Chen1;~Jinghan_Jia1;~Jiancheng_Liu2;~Gaowen_Liu4;~Mingyi_Hong1;~Shiyu_Chang2;~Sijia_Liu1", "aff": "Michigan State University;Intel;Michigan State University;Michigan State University;Michigan State University;Cisco Systems;University of Minnesota, Minneapolis;University of California, Santa Barbara;Michigan State University", "aff_domain": "msu.edu;intel.com;msu.edu;msu.edu;msu.edu;cisco.com;umn.edu;ucsb.edu;msu.edu", "position": "PhD student;Intern;Researcher;PhD student;MS student;Researcher;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023selectivity,\ntitle={Selectivity Drives Productivity: Efficient Dataset Pruning for Enhanced Transfer Learning},\nauthor={Yihua Zhang and Yimeng Zhang and Aochuan Chen and Jinghan Jia and Jiancheng Liu and Gaowen Liu and Mingyi Hong and Shiyu Chang and Sijia Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=D0MII7rP3R}\n}", "github": "", "project": "", "reviewers": "vmFL;omyc;BXTc;RSvb", "pdf_size": 17160444, "rating": "4;5;6;7", "confidence": "5;3;3;3", "soundness": "2;3;2;4", "novelty": "2;2;2;3", "presentation": "2;2;3;3", "wc_summary": "52;75;84;135", "wc_strengths": "67;55;45;56", "wc_weaknesses": "279;246;182;70", "wc_questions": "14;45;166;35", "wc_limitations": "87;17;45;1", "wc_review": "499;438;522;297", "wc_reply_reviewers": "217;191;328;30", "wc_reply_authors": "1540;587;794;42", "reply_reviewers": "2;2;2;1", "reply_authors": "5;3;5;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 86.5, 30.335622624235025 ], "wc_strengths_avg": [ 55.75, 7.790218225441442 ], "wc_weaknesses_avg": [ 194.25, 79.76332177135052 ], "wc_questions_avg": [ 65.0, 59.37592104548779 ], "wc_limitations_avg": [ 37.5, 32.630507198019465 ], "wc_review_avg": [ 439.0, 87.54141876848924 ], "wc_reply_reviewers_avg": [ 191.5, 106.49530506083354 ], "wc_reply_authors_avg": [ 740.75, 537.0118131847753 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 1.299038105676658 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10696100183754303457&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "msu.edu;intel.com;msu.edu;msu.edu;msu.edu;cisco.com;umn.edu;ucsb.edu;msu.edu", "author_num": 9, "aff_unique_index": "0;1;0;0;0;2;3;4;0", "aff_unique_norm": "Michigan State University;Intel;Cisco Systems;University of Minnesota;University of California, Santa Barbara", "aff_unique_dep": ";Intel Corporation;;;", "aff_unique_url": "https://www.msu.edu;https://www.intel.com;https://www.cisco.com;https://www.minnesota.edu;https://www.ucsb.edu", "aff_unique_abbr": "MSU;Intel;Cisco;UMN;UCSB", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Minneapolis;Santa Barbara", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "BEDD: The MineRL BASALT Evaluation and Demonstrations Dataset for Training and Benchmarking Agents that Solve Fuzzy Tasks", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73655", "id": "D1MOK2t2t2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/67a6726dcd555b982cabb3446ffac01d-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=D1MOK2t2t2", "openreview": "https://openreview.net/forum?id=D1MOK2t2t2", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73655", "video": "https://nips.cc/virtual/2023/poster/73655", "author_site": "Stephanie Milani, Anssi Kanervisto, Karolis Ramanauskas, Sander Schulhoff, Brandon Houghton, Rohin Shah", "tldr": "", "abstract": "The MineRL BASALT competition has served to catalyze advances in learning from human feedback through four hard-to-specify tasks in Minecraft, such as create and photograph a waterfall. Given the completion of two years of BASALT competitions, we offer to the community a formalized benchmark through the BASALT Evaluation and Demonstrations Dataset (BEDD), which serves as a resource for algorithm development and performance assessment. BEDD consists of a collection of 26 million image-action pairs from nearly 14,000 videos of human players completing the BASALT tasks in Minecraft. It also includes over 3,000 dense pairwise human evaluations of human and algorithmic agents. These comparisons serve as a fixed, preliminary leaderboard for evaluating newly-developed algorithms. To enable this comparison, we present a streamlined codebase for benchmarking new algorithms against the leaderboard. In addition to presenting these datasets, we conduct a detailed analysis of the data from both datasets to guide algorithm development and evaluation. The released code and data are available at https://github.com/minerllabs/basalt-benchmark.", "keywords": "learning from human feedback;minecraft;human evaluation;embodied agents;rlhf;demonstrations;benchmark;evaluations", "primary_area": "", "supplementary_material": "/attachment/1f949c207dba85aad822acfdf1c1437996bbbf7f.pdf", "author": "Stephanie Milani;Anssi Kanervisto;Karolis Ramanauskas;Sander V Schulhoff;Brandon Houghton;Rohin Shah", "authorids": "~Stephanie_Milani1;~Anssi_Kanervisto1;~Karolis_Ramanauskas1;~Sander_V_Schulhoff1;~Brandon_Houghton1;~Rohin_Shah1", "gender": "F;M;;M;M;M", "homepage": "https://stephmilani.github.io/;;https://ka.rol.is/;https://trigaten.github.io;;http://rohinshah.com/", "dblp": "239/6037;186/7786;;;;145/1009", "google_scholar": "vx68rkMAAAAJ;https://scholar.google.fi/citations?user=iPimqbwAAAAJ;k1RrCrgAAAAJ;;https://scholar.google.com/citations?hl=en;odFQXSYAAAAJ", "orcid": "0000-0003-1150-4418;0000-0002-7479-4574;;;;", "linkedin": ";;karolisramanauskas/;;;rohin-shah-76405832/", "or_profile": "~Stephanie_Milani1;~Anssi_Kanervisto1;~Karolis_Ramanauskas1;~Sander_V_Schulhoff1;~Brandon_Houghton1;~Rohin_Shah1", "aff": "Carnegie Mellon University;Microsoft;University of Bath;University of Maryland, College Park;;Google DeepMind", "aff_domain": "cmu.edu;microsoft.com;bath.ac.uk;umd.edu;;deepmind.com", "position": "PhD student;Researcher;PhD student;Undergrad student;;Researcher", "bibtex": "@inproceedings{\nmilani2023bedd,\ntitle={{BEDD}: The Mine{RL} {BASALT} Evaluation and Demonstrations Dataset for Training and Benchmarking Agents that Solve Fuzzy Tasks},\nauthor={Stephanie Milani and Anssi Kanervisto and Karolis Ramanauskas and Sander V Schulhoff and Brandon Houghton and Rohin Shah},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=D1MOK2t2t2}\n}", "github": "", "project": "", "reviewers": "Vxj5;PcC9;328S", "pdf_size": 1772362, "rating": "6;8;10", "confidence": "3;3;4", "wc_summary_and_contributions": "209;71;96", "wc_strengths": "98;186;17", "wc_improvement": "78;103;12", "wc_limitations": "43;21;15", "wc_correctness": "9;11;20", "wc_clarity": "7;8;1", "wc_relation_to_prior_work": "38;26;9", "wc_documentation": "20;34;1", "wc_additional_feedback": "1;1;1", "wc_review": "503;461;172", "wc_reply_reviewers": "0;20;0", "wc_reply_authors": "713;479;27", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 8.0, 1.632993161855452 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 125.33333333333333, 60.035174874586836 ], "wc_strengths_avg": [ 100.33333333333333, 69.01368624330169 ], "wc_improvement_avg": [ 64.33333333333333, 38.38691906829143 ], "wc_limitations_avg": [ 26.333333333333332, 12.036980056845193 ], "wc_correctness_avg": [ 13.333333333333334, 4.784233364802441 ], "wc_clarity_avg": [ 5.333333333333333, 3.0912061651652345 ], "wc_relation_to_prior_work_avg": [ 24.333333333333332, 11.897712198383164 ], "wc_documentation_avg": [ 18.333333333333332, 13.523641850067197 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 378.6666666666667, 147.13787940416825 ], "wc_reply_reviewers_avg": [ 6.666666666666667, 9.428090415820632 ], "wc_reply_authors_avg": [ 406.3333333333333, 284.73301334564087 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10561692236814681880&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "cmu.edu;microsoft.com;bath.ac.uk;umd.edu;;deepmind.com", "author_num": 6, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Carnegie Mellon University;Microsoft;University of Bath;University of Maryland;Google", "aff_unique_dep": ";Microsoft Corporation;;;Google DeepMind", "aff_unique_url": "https://www.cmu.edu;https://www.microsoft.com;https://www.bath.ac.uk;https://www/umd.edu;https://deepmind.com", "aff_unique_abbr": "CMU;Microsoft;Bath;UMD;DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Temporal Dynamic Quantization for Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72396", "id": "D1sECc9fiG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/983591c3e9a0dc94a99134b3238bbe52-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=D1sECc9fiG", "openreview": "https://openreview.net/forum?id=D1sECc9fiG", "poster": "/media/PosterPDFs/NeurIPS%202023/72396.png?t=1701674987.6762419", "slides": "https://nips.cc/virtual/2023/poster/72396", "video": "https://nips.cc/virtual/2023/poster/72396", "author_site": "Junhyuk So, Jungwon Lee, Daehyun Ahn, Hyungjun Kim, Eunhyeok Park", "tldr": "", "abstract": "Diffusion model has gained popularity in vision applications due to its remarkable generative performance and versatility. However, its high storage and computation demands, resulting from the model size and iterative generation, hinder its use on mobile devices. Existing quantization techniques struggle to maintain performance even in 8-bit precision due to the diffusion model's unique property of temporal variation in activation. We introduce a novel quantization method that dynamically adjusts the quantization interval based on time step information, significantly improving output quality. Unlike conventional dynamic quantization techniques, our approach has no computational overhead during inference and is compatible with both post-training quantization (PTQ) and quantization-aware training (QAT). Our extensive experiments demonstrate substantial improvements in output quality with the quantized model across various configurations.", "keywords": "deep learning optimization;quantization;diffusion model;generative model", "primary_area": "", "supplementary_material": "/attachment/1eff486bfe209c39ddd74fb3d7ec8417cb907ec0.pdf", "author": "Junhyuk So;Jungwon Lee;Daehyun Ahn;Hyungjun Kim;Eunhyeok Park", "authorids": "~Junhyuk_So1;~Jungwon_Lee2;~Daehyun_Ahn1;~Hyungjun_Kim2;~Eunhyeok_Park1", "gender": "M;M;;;M", "homepage": "https://github.com/junhyukso;https://github.com/Jungwon-Lee;;;", "dblp": "307/3085;;223/9559;;161/0829", "google_scholar": ";TbzDLMUAAAAJ;https://scholar.google.co.kr/citations?user=a4e-yE4AAAAJ;pX2macYAAAAJ;pBr1GV4AAAAJ", "orcid": ";0009-0006-6985-2916;;0000-0001-8403-1557;", "linkedin": ";;;;", "or_profile": "~Junhyuk_So1;~Jungwon_Lee2;~Daehyun_Ahn1;~Hyungjun_Kim2;~Eunhyeok_Park1", "aff": "Pohang University of Science and Technology;Pohang University of Science and Technology;SqueezeBits Inc.;SqueezeBits Inc.;POSTECH", "aff_domain": "postech.ac.kr;postech.ac.kr;squeezebits.com;squeezebits.com;postech.ac.kr", "position": "PhD student;MS student;Researcher;CEO;Assistant Professor", "bibtex": "@inproceedings{\nso2023temporal,\ntitle={Temporal Dynamic Quantization for Diffusion Models},\nauthor={Junhyuk So and Jungwon Lee and Daehyun Ahn and Hyungjun Kim and Eunhyeok Park},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=D1sECc9fiG}\n}", "github": "", "project": "", "reviewers": "qrrn;ijsE;7wR5;xsTz", "pdf_size": 2318931, "rating": "4;5;5;6", "confidence": "3;4;5;3", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "43;63;86;79", "wc_strengths": "61;44;90;37", "wc_weaknesses": "57;48;276;244", "wc_questions": "189;240;124;6", "wc_limitations": "103;9;6;26", "wc_review": "453;404;582;392", "wc_reply_reviewers": "187;132;60;72", "wc_reply_authors": "666;373;50;221", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 67.75, 16.543503256565703 ], "wc_strengths_avg": [ 58.0, 20.43281674170255 ], "wc_weaknesses_avg": [ 156.25, 104.41354078853949 ], "wc_questions_avg": [ 139.75, 87.48249824964991 ], "wc_limitations_avg": [ 36.0, 39.42714800743264 ], "wc_review_avg": [ 457.75, 75.28736613801814 ], "wc_reply_reviewers_avg": [ 112.75, 50.81030899335291 ], "wc_reply_authors_avg": [ 327.5, 226.38518061039244 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4714347628226378441&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "postech.ac.kr;postech.ac.kr;squeezebits.com;squeezebits.com;postech.ac.kr", "author_num": 5, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Pohang University of Science and Technology;SqueezeBits Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.postech.ac.kr;", "aff_unique_abbr": "POSTECH;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pohang;", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Flow Matching for Scalable Simulation-Based Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72395", "id": "D2cS6SoYlP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3663ae53ec078860bb0b9c6606e092a0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=D2cS6SoYlP", "openreview": "https://openreview.net/forum?id=D2cS6SoYlP", "poster": "/media/PosterPDFs/NeurIPS%202023/72395.png?t=1701974459.041032", "slides": "https://nips.cc/virtual/2023/poster/72395", "video": "https://nips.cc/virtual/2023/poster/72395", "author_site": "Jonas Wildberger, Maximilian Dax, Simon Buchholz, Stephen Green, Jakob H Macke, Bernhard Sch\u00f6lkopf", "tldr": "", "abstract": "Neural posterior estimation methods based on discrete normalizing flows have become established tools for simulation-based inference (SBI), but scaling them to high-dimensional problems can be challenging. Building on recent advances in generative modeling, we here present flow matching posterior estimation (FMPE), a technique for SBI using continuous normalizing flows. Like diffusion models, and in contrast to discrete flows, flow matching allows for unconstrained architectures, providing enhanced flexibility for complex data modalities. Flow matching, therefore, enables exact density evaluation, fast training, and seamless scalability to large architectures---making it ideal for SBI. We show that FMPE achieves competitive performance on an established SBI benchmark, and then demonstrate its improved scalability on a challenging scientific problem: for gravitational-wave inference, FMPE outperforms methods based on comparable discrete flows, reducing training time by 30\\% with substantially improved accuracy. Our work underscores the potential of FMPE to enhance performance in challenging inference scenarios, thereby paving the way for more advanced applications to scientific problems.", "keywords": "simulation-based inference;likelihood-free inference;machine learning for physical sciences", "primary_area": "", "supplementary_material": "/attachment/bdb0fda8fac24d566ccf93644efba21cae1e9e28.pdf", "author": "Jonas Bernhard Wildberger;Maximilian Dax;Simon Buchholz;Stephen R Green;Jakob H. Macke;Bernhard Sch\u00f6lkopf", "authorids": "~Jonas_Bernhard_Wildberger1;~Maximilian_Dax1;~Simon_Buchholz1;~Stephen_R_Green1;~Jakob_H._Macke1;~Bernhard_Sch\u00f6lkopf1", "gender": "M;M;;M;;", "homepage": ";;https://www.is.mpg.de/person/sbuchholz;https://www.stephenrgreen.com;;", "dblp": "309/6792;249/8145;207/9068;259/3011;;", "google_scholar": ";VRBv6mEAAAAJ;;https://scholar.google.de/citations?user=sqvBC1wAAAAJ;;", "orcid": "0000-0002-3433-5920;;;0000-0002-6987-6313;;", "linkedin": ";;;stephen-green-57a1b639;;", "or_profile": "~Jonas_Bernhard_Wildberger1;~Maximilian_Dax1;~Simon_Buchholz1;~Stephen_R_Green1;~Jakob_H._Macke1;~Bernhard_Sch\u00f6lkopf1", "aff": "Max Planck Institute for Intelligent Systems, Max-Planck Institute;Max-Planck Institute;Max-Planck Institute;University of Nottingham;;", "aff_domain": "tuebingen.mpg.de;mpg.de;mpg.de;nottingham.ac.uk;;", "position": "PhD student;PhD student;Postdoc;Researcher;;", "bibtex": "@inproceedings{\nwildberger2023flow,\ntitle={Flow Matching for Scalable Simulation-Based Inference},\nauthor={Jonas Bernhard Wildberger and Maximilian Dax and Simon Buchholz and Stephen R Green and Jakob H. Macke and Bernhard Sch{\\\"o}lkopf},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=D2cS6SoYlP}\n}", "github": "", "project": "", "reviewers": "SM25;7UFh;VmG3;1iUK", "pdf_size": 2682742, "rating": "5;6;7;7", "confidence": "4;3;3;4", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;4;3;4", "wc_summary": "395;80;116;264", "wc_strengths": "53;21;15;45", "wc_weaknesses": "134;9;85;82", "wc_questions": "1;161;226;243", "wc_limitations": "10;8;2;39", "wc_review": "593;279;444;673", "wc_reply_reviewers": "0;0;141;49", "wc_reply_authors": "32;32;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 213.75, 125.3203395303412 ], "wc_strengths_avg": [ 33.5, 15.898113095584646 ], "wc_weaknesses_avg": [ 77.5, 44.612218057388716 ], "wc_questions_avg": [ 157.75, 95.53369824308069 ], "wc_limitations_avg": [ 14.75, 14.306903927824496 ], "wc_review_avg": [ 497.25, 150.4366561048204 ], "wc_reply_reviewers_avg": [ 47.5, 57.56952318718646 ], "wc_reply_authors_avg": [ 16.0, 16.0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9685109320939818562&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "tuebingen.mpg.de;mpg.de;mpg.de;nottingham.ac.uk;;", "author_num": 6, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.;University of Nottingham", "aff_unique_dep": "Intelligent Systems;;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.mpg.de;https://www.nottingham.ac.uk", "aff_unique_abbr": "MPI-IS;MPG;UoN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Germany;United Kingdom" }, { "title": "Optimal Transport Model Distributional Robustness", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72394", "id": "D7LdL2SCCi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4b91825aec2ed35150f1d3e8fb195556-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=D7LdL2SCCi", "openreview": "https://openreview.net/forum?id=D7LdL2SCCi", "poster": "/media/PosterPDFs/NeurIPS%202023/72394.png?t=1701855018.9146445", "slides": "https://nips.cc/virtual/2023/poster/72394", "video": "https://nips.cc/virtual/2023/poster/72394", "author_site": "Van-Anh Nguyen, Trung Le, Anh Bui, Thanh-Toan Do, Dinh Phung", "tldr": "", "abstract": "Distributional robustness is a promising framework for training deep learning models that are less vulnerable to adversarial examples and data distribution shifts. Previous works have mainly focused on exploiting distributional robustness in the data space. In this work, we explore an optimal transport-based distributional robustness framework in model spaces. Specifically, we examine a model distribution within a Wasserstein ball centered on a given model distribution that maximizes the loss. We have developed theories that enable us to learn the optimal robust center model distribution. Interestingly, our developed theories allow us to flexibly incorporate the concept of sharpness awareness into training, whether it's a single model, ensemble models, or Bayesian Neural Networks, by considering specific forms of the center model distribution. These forms include a Dirac delta distribution over a single model, a uniform distribution over several models, and a general Bayesian Neural Network. Furthermore, we demonstrate that Sharpness-Aware Minimization (SAM) is a specific case of our framework when using a Dirac delta distribution over a single model, while our framework can be seen as a probabilistic extension of SAM. To validate the effectiveness of our framework in the aforementioned settings, we conducted extensive experiments, and the results reveal remarkable improvements compared to the baselines.", "keywords": "Distributional Robustness;Sharpness-aware;SAM", "primary_area": "", "supplementary_material": "/attachment/1f1ed3aca5766ad82640c6fd83d662099030fe4b.pdf", "author": "Van-Anh Nguyen;Trung Le;Anh Tuan Bui;Thanh-Toan Do;Dinh Phung", "authorids": "~Van-Anh_Nguyen1;~Trung_Le2;~Anh_Tuan_Bui2;~Thanh-Toan_Do4;~Dinh_Phung2", "gender": "F;M;M;;", "homepage": ";;https://tuananhbui89.github.io/;;", "dblp": ";;120/0106;;", "google_scholar": "I5kuXKsAAAAJ;https://scholar.google.com/citations?hl=en;jEjMZ7oAAAAJ;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Van-Anh_Nguyen1;~Trung_Le2;~Anh_Tuan_Bui2;~Thanh-Toan_Do4;~Dinh_Phung2", "aff": "Monash University;Monash University;Monash University;;", "aff_domain": "monash.edu;monash.edu;monash.edu;;", "position": "PhD student;Assistant Professor;PhD student;;", "bibtex": "@inproceedings{\nnguyen2023optimal,\ntitle={Optimal Transport Model Distributional Robustness},\nauthor={Van-Anh Nguyen and Trung Le and Anh Tuan Bui and Thanh-Toan Do and Dinh Phung},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=D7LdL2SCCi}\n}", "github": "", "project": "", "reviewers": "WK2o;X3P4;NFva;P1BN", "pdf_size": 542308, "rating": "5;6;6;7", "confidence": "4;3;3;3", "soundness": "2;3;2;3", "novelty": "2;3;3;3", "presentation": "3;2;3;4", "wc_summary": "37;54;71;97", "wc_strengths": "22;16;50;95", "wc_weaknesses": "170;31;117;97", "wc_questions": "23;58;264;54", "wc_limitations": "29;12;35;1", "wc_review": "281;171;537;344", "wc_reply_reviewers": "511;16;31;0", "wc_reply_authors": "609;15;50;0", "reply_reviewers": "3;1;1;0", "reply_authors": "5;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 64.75, 22.16275028059469 ], "wc_strengths_avg": [ 45.75, 31.19595326320387 ], "wc_weaknesses_avg": [ 103.75, 49.75628101054178 ], "wc_questions_avg": [ 99.75, 95.79241880232485 ], "wc_limitations_avg": [ 19.25, 13.497684986693088 ], "wc_review_avg": [ 333.25, 132.9330188478393 ], "wc_reply_reviewers_avg": [ 139.5, 214.76556986630794 ], "wc_reply_authors_avg": [ 168.5, 254.96911577679364 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9268285098437269848&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "monash.edu;monash.edu;monash.edu;;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Monash University", "aff_unique_dep": "", "aff_unique_url": "https://www.monash.edu", "aff_unique_abbr": "Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "On Transfer of Adversarial Robustness from Pretraining to Downstream Tasks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72393", "id": "D8nAMRRCLS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b9801626a6ffaf6664af1e983dbd0094-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=D8nAMRRCLS", "openreview": "https://openreview.net/forum?id=D8nAMRRCLS", "poster": "/media/PosterPDFs/NeurIPS%202023/72393.png?t=1701689866.9049873", "slides": "https://nips.cc/virtual/2023/poster/72393", "video": "https://nips.cc/virtual/2023/poster/72393", "author_site": "Laura F. Nern, Harsh Raj, Maurice Andr\u00e9 Georgi, Yash Sharma", "tldr": "", "abstract": "As large-scale training regimes have gained popularity, the use of pretrained models for downstream tasks has become common practice in machine learning. While pretraining has been shown to enhance the performance of models in practice, the transfer of robustness properties from pretraining to downstream tasks remains poorly understood. In this study, we demonstrate that the robustness of a linear predictor on downstream tasks can be constrained by the robustness of its underlying representation, regardless of the protocol used for pretraining. We prove (i) a bound on the loss that holds independent of any downstream task, as well as (ii) a criterion for robust classification in particular. We validate our theoretical results in practical applications, show how our results can be used for calibrating expectations of downstream robustness, and when our results are useful for optimal transfer learning. Taken together, our results offer an initial step towards characterizing the requirements of the representation function for reliable post-adaptation performance.", "keywords": "Machine Learning Theory;Transfer Learning;Adversarial Robustness", "primary_area": "", "supplementary_material": "/attachment/9f31c9a14b1b9a4876eea28569dbc89f25e618e6.pdf", "author": "Laura Fee Nern;Harsh Raj;Maurice Georgi;Yash Sharma", "authorids": "~Laura_Fee_Nern1;~Harsh_Raj1;~Maurice_Georgi1;~Yash_Sharma1", "gender": "F;M;M;", "homepage": ";https://harshraj172.github.io/;;http://www.yash-sharma.com", "dblp": "326/1531.html;;;121/9967-1", "google_scholar": "https://scholar.google.de/citations?user=cBIJYxsAAAAJ;https://scholar.google.com/citations?view_op=list_works;;AlGCn8wAAAAJ", "orcid": "0000-0002-1429-8157;;;", "linkedin": "lfee-schneider/;harsh-raj-425593195/;https://www.linkedin.com/mwlite/in/maurice-georgi-6913a4196;yashjsharma/", "or_profile": "~Laura_Fee_Nern1;~Harsh_Raj1;~Maurice_Georgi1;~Yash_Sharma1", "aff": "Yahoo;Delhi Technological University (Delhi College of Engineering);;University of Tuebingen", "aff_domain": "yahoo-inc.com;dtu.ac.in;;uni-tuebingen.de", "position": "Researcher;Undergrad student;;PhD student", "bibtex": "@inproceedings{\nnern2023on,\ntitle={On Transfer of Adversarial Robustness from Pretraining to Downstream Tasks},\nauthor={Laura Fee Nern and Harsh Raj and Maurice Georgi and Yash Sharma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=D8nAMRRCLS}\n}", "github": "", "project": "", "reviewers": "MCBm;dV4u;2ud3;9x4B", "pdf_size": 605199, "rating": "3;5;6;6", "confidence": "4;3;3;4", "soundness": "2;3;3;3", "novelty": "1;3;2;3", "presentation": "3;3;2;3", "wc_summary": "115;65;108;237", "wc_strengths": "54;36;9;88", "wc_weaknesses": "180;2;348;93", "wc_questions": "216;63;80;110", "wc_limitations": "1;41;9;29", "wc_review": "566;207;554;557", "wc_reply_reviewers": "531;18;77;51", "wc_reply_authors": "644;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 131.25, 63.98583827691875 ], "wc_strengths_avg": [ 46.75, 28.699956445959984 ], "wc_weaknesses_avg": [ 155.75, 127.59775664172156 ], "wc_questions_avg": [ 117.25, 59.44482736117584 ], "wc_limitations_avg": [ 20.0, 15.84297951775486 ], "wc_review_avg": [ 471.0, 152.48442543420623 ], "wc_reply_reviewers_avg": [ 169.25, 209.9004228199648 ], "wc_reply_authors_avg": [ 161.0, 278.86018001858923 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11227350461597161978&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "yahoo-inc.com;dtu.ac.in;;uni-tuebingen.de", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Yahoo;Delhi Technological University;University of Tuebingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.yahoo.com;https://www.dtu.ac.in;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Yahoo;DTU;Uni T\u00fcbingen", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United States;India;Germany" }, { "title": "Fairness-guided Few-shot Prompting for Large Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72392", "id": "D8oHQ2qSTj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8678da90126aa58326b2fc0254b33a8c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=D8oHQ2qSTj", "openreview": "https://openreview.net/forum?id=D8oHQ2qSTj", "poster": "/media/PosterPDFs/NeurIPS%202023/72392.png?t=1697424443.4240098", "slides": "https://nips.cc/virtual/2023/poster/72392", "video": "https://nips.cc/virtual/2023/poster/72392", "author_site": "Huan Ma, Changqing Zhang, Yatao Bian, Lemao Liu, Zhirui Zhang, Peilin Zhao, Shu Zhang, Huazhu Fu, Qinghua Hu, Bingzhe Wu", "tldr": "", "abstract": "Large language models have demonstrated surprising ability to perform in-context learning, i.e., these models can be directly applied to solve numerous downstream tasks by conditioning on a prompt constructed by a few input-output examples. However, prior research has shown that in-context learning can suffer from high instability due to variations in training examples, example order, and prompt formats. Therefore, the construction of an appropriate prompt is essential for improving the performance of in-context learning. In this paper, we revisit this problem from the view of predictive bias. Specifically, we introduce a metric to evaluate the predictive bias of a fixed prompt against labels or a given attributes. Then we empirically show that prompts with higher bias always lead to unsatisfactory predictive quality. Based on this observation, we propose a novel search strategy based on the greedy search to identify the near-optimal prompt for improving the performance of in-context learning. We perform comprehensive experiments with state-of-the-art mainstream models such as GPT-3 on various downstream tasks. Our results indicate that our method can enhance the model's in-context learning performance in an effective and interpretable manner.", "keywords": "large language models;prompts;classification", "primary_area": "", "supplementary_material": "/attachment/6eedfceabaa4b4a60831f835fa011d5754daa4ca.zip", "author": "Huan Ma;Changqing Zhang;Yatao Bian;Lemao Liu;Zhirui Zhang;Peilin Zhao;Shu Zhang;Huazhu Fu;Qinghua Hu;Bingzhe Wu", "authorids": "~Huan_Ma1;~Changqing_Zhang1;~Yatao_Bian1;~Lemao_Liu3;~Zhirui_Zhang1;~Peilin_Zhao2;bookzhang@tencent.com;~Huazhu_Fu4;~Qinghua_Hu1;~Bingzhe_Wu1", "gender": "M;M;;;M;;;M;M;M", "homepage": "https://github.com/MaHuanAAA;http://cic.tju.edu.cn/faculty/zhangchangqing/index.html;;;;;;https://hzfu.github.io;http://cic.tju.edu.cn/faculty/huqinghua/index.html;", "dblp": ";78/2668;;;202/1838;84/8411;;63/7767;;207/4843", "google_scholar": ";yJGhdykAAAAJ;;;C8Ylo7sAAAAJ;https://scholar.google.com.hk/citations?user=HPeX_YcAAAAJ;;https://scholar.google.com/citations?hl=en;TVSNq_wAAAAJ;_3hgtf8AAAAJ", "orcid": "0009-0000-4448-9897;;;;;0000-0001-8543-3953;;0000-0002-9702-5524;0000-0001-7765-8095;", "linkedin": "huan-ma-037711276/;;;;;;;;;", "or_profile": "~Huan_Ma1;~Changqing_Zhang1;~Yatao_Bian1;~Lemao_Liu3;~Zhirui_Zhang1;~Peilin_Zhao2;bookzhang@tencent.com;~Huazhu_Fu4;~Qinghua_Hu1;~Bingzhe_Wu1", "aff": "Tianjin University;Tianjin University;;;Tencent AI Lab;Tencent;;Institute of High Performance Computing, Singapore, A*STAR;Tianjin University;Tencent AI Lab", "aff_domain": "tju.edu.cn;tju.edu.cn;;;tencent.com;tencent.com;;ihpc.a-star.edu.sg;tju.edu.cn;tencent.com", "position": "MS student;Associate Professor;;;Senior Researcher;Researcher;;Principal Scientist;Professor;Researcher", "bibtex": "@inproceedings{\nma2023fairnessguided,\ntitle={Fairness-guided Few-shot Prompting for Large Language Models},\nauthor={Huan Ma and Changqing Zhang and Yatao Bian and Lemao Liu and Zhirui Zhang and Peilin Zhao and Shu Zhang and Huazhu Fu and Qinghua Hu and Bingzhe Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=D8oHQ2qSTj}\n}", "github": "", "project": "", "reviewers": "Q4AG;bbKr;35hW;WoAP;pNzj", "pdf_size": 1595313, "rating": "4;5;5;6;6", "confidence": "3;4;3;4;4", "soundness": "3;3;3;4;3", "novelty": "3;2;3;3;3", "presentation": "3;4;2;3;3", "wc_summary": "151;103;63;102;93", "wc_strengths": "46;134;46;61;67", "wc_weaknesses": "63;73;98;158;117", "wc_questions": "144;77;10;50;112", "wc_limitations": "7;1;43;45;22", "wc_review": "411;388;260;416;411", "wc_reply_reviewers": "47;0;41;29;63", "wc_reply_authors": "242;0;0;0;75", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;1;1;2", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 102.4, 28.295582694123834 ], "wc_strengths_avg": [ 70.8, 32.664353659608814 ], "wc_weaknesses_avg": [ 101.8, 33.87860681905323 ], "wc_questions_avg": [ 78.6, 46.731573908868086 ], "wc_limitations_avg": [ 23.6, 18.017769007288333 ], "wc_review_avg": [ 377.2, 59.40168347782746 ], "wc_reply_reviewers_avg": [ 36.0, 21.071307505705477 ], "wc_reply_authors_avg": [ 63.4, 93.90548439787742 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.7637626158259733, "gs_citation": 82, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15290194572488590101&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "tju.edu.cn;tju.edu.cn;;;tencent.com;tencent.com;;ihpc.a-star.edu.sg;tju.edu.cn;tencent.com", "author_num": 10, "aff_unique_index": "0;0;1;1;2;0;1", "aff_unique_norm": "Tianjin University;Tencent;Institute of High Performance Computing", "aff_unique_dep": ";Tencent AI Lab;", "aff_unique_url": "http://www.tju.edu.cn;https://ai.tencent.com;https://www.ihpc.a-star.edu.sg", "aff_unique_abbr": "TJU;Tencent AI Lab;IHPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "China;Singapore" }, { "title": "A One-Size-Fits-All Approach to Improving Randomness in Paper Assignment", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72391", "id": "D94QKZA7UP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2e9f9cde1b709281a06dd14f679e4c51-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=D94QKZA7UP", "openreview": "https://openreview.net/forum?id=D94QKZA7UP", "poster": "/media/PosterPDFs/NeurIPS%202023/72391.png?t=1700986831.771059", "slides": "https://nips.cc/virtual/2023/poster/72391", "video": "https://nips.cc/virtual/2023/poster/72391", "author_site": "Yixuan Xu, Steven Jecmen, Zimeng Song, Fei Fang", "tldr": "", "abstract": "The assignment of papers to reviewers is a crucial part of the peer review processes of large publication venues, where organizers (e.g., conference program chairs) rely on algorithms to perform automated paper assignment. As such, a major challenge for the organizers of these processes is to specify paper assignment algorithms that find appropriate assignments with respect to various desiderata. Although the main objective when choosing a good paper assignment is to maximize the expertise of each reviewer for their assigned papers, several other considerations make introducing randomization into the paper assignment desirable: robustness to malicious behavior, the ability to evaluate alternative paper assignments, reviewer diversity, and reviewer anonymity. However, it is unclear in what way one should randomize the paper assignment in order to best satisfy all of these considerations simultaneously. In this work, we present a practical, one-size-fits-all method for randomized paper assignment intended to perform well across different motivations for randomness. We show theoretically and experimentally that our method outperforms currently-deployed methods for randomized paper assignment on several intuitive randomness metrics, demonstrating that the randomized assignments produced by our method are general-purpose.", "keywords": "peer review;randomized paper assignment;mitigating malicious behavior;convex optimization", "primary_area": "", "supplementary_material": "/attachment/4db7432766968ff25ada1571a7690047131fbd03.zip", "author": "Yixuan Even Xu;Steven Jecmen;Zimeng Song;Fei Fang", "authorids": "~Yixuan_Even_Xu1;~Steven_Jecmen1;~Zimeng_Song1;~Fei_Fang1", "gender": "M;M;;F", "homepage": "https://yixuanevenxu.github.io/;https://sjecmen.github.io/;;https://feifang.info/", "dblp": "349/7695;266/4836;;57/2878", "google_scholar": "viloxoAAAAAJ;SMn18BwAAAAJ;;R6jE0VEAAAAJ", "orcid": "0009-0003-9360-753X;;;", "linkedin": "yixuan-xu-57584a268/;;zimengsong/;", "or_profile": "~Yixuan_Even_Xu1;~Steven_Jecmen1;~Zimeng_Song1;~Fei_Fang1", "aff": "Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Institute for Interdisciplinary Information Sciences, Tsinghua University;Carnegie Mellon University", "aff_domain": "cmu.edu;cs.cmu.edu;tsinghua.edu.cn;cmu.edu", "position": "Intern;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nxu2023a,\ntitle={A One-Size-Fits-All Approach to Improving Randomness in Paper Assignment},\nauthor={Yixuan Even Xu and Steven Jecmen and Zimeng Song and Fei Fang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=D94QKZA7UP}\n}", "github": "", "project": "", "reviewers": "FQ98;625T;euo8;iL2x", "pdf_size": 803388, "rating": "5;6;6;8", "confidence": "4;3;2;3", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "128;138;72;101", "wc_strengths": "108;60;69;51", "wc_weaknesses": "231;57;95;120", "wc_questions": "235;69;1;91", "wc_limitations": "281;44;1;25", "wc_review": "983;368;238;388", "wc_reply_reviewers": "0;33;14;59", "wc_reply_authors": "0;0;0;403", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 109.75, 25.655165171949292 ], "wc_strengths_avg": [ 72.0, 21.737065119284157 ], "wc_weaknesses_avg": [ 125.75, 64.77412677913921 ], "wc_questions_avg": [ 99.0, 85.24083528450434 ], "wc_limitations_avg": [ 87.75, 112.60855873333962 ], "wc_review_avg": [ 494.25, 287.99685328142044 ], "wc_reply_reviewers_avg": [ 26.5, 22.118996360594664 ], "wc_reply_authors_avg": [ 100.75, 174.50411886256438 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3244428422615251, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17581158746075187554&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "cmu.edu;cs.cmu.edu;tsinghua.edu.cn;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Carnegie Mellon University;Tsinghua University", "aff_unique_dep": ";Institute for Interdisciplinary Information Sciences", "aff_unique_url": "https://www.cmu.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CMU;Tsinghua", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "MGDD: A Meta Generator for Fast Dataset Distillation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72390", "id": "D9CMRR5Lof", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b0506debbf49e31d25690fbd1e69cd2f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=D9CMRR5Lof", "openreview": "https://openreview.net/forum?id=D9CMRR5Lof", "poster": "/media/PosterPDFs/NeurIPS%202023/72390.png?t=1701960274.136268", "slides": "https://nips.cc/virtual/2023/poster/72390", "video": "https://nips.cc/virtual/2023/poster/72390", "author_site": "Songhua Liu, Xinchao Wang", "tldr": "", "abstract": "Existing dataset distillation (DD) techniques typically rely on iterative strategies to synthesize condensed datasets, where datasets before and after distillation are forward and backward through neural networks a massive number of times. Despite the promising results achieved, the time efficiency of prior approaches is still far from satisfactory. Moreover, when different sizes of synthetic datasets are required, they have to repeat the iterative training procedures, which is highly cumbersome and lacks flexibility. In this paper, different from the time-consuming forward-backward passes, we introduce a generative fashion for dataset distillation with significantly improved efficiency. Specifically, synthetic samples are produced by a generator network conditioned on the initialization of DD, while synthetic labels are obtained by solving a least-squares problem in a feature space. Our theoretical analysis reveals that the errors of synthetic datasets solved in the original space and then processed by any conditional generators are upper-bounded. To find a satisfactory generator efficiently, we propose a meta-learning algorithm, where a meta generator is trained on a large dataset so that only a few steps are required to adapt to a target dataset. The meta generator is termed as MGDD in our approach. Once adapted, it can handle arbitrary sizes of synthetic datasets, even for those unseen during adaptation. Experiments demonstrate that the generator adapted with only a limited number of steps performs on par with those state-of-the-art DD methods and yields $22\\times$ acceleration.", "keywords": "Dataset Distillation;Dataset Condensation;Efficient Learning;Conditional Generation;Meta Learning", "primary_area": "", "supplementary_material": "/attachment/bd87ca6e651dd8c7c40d5037e0c562b225c7279e.zip", "author": "Songhua Liu;Xinchao Wang", "authorids": "~Songhua_Liu2;~Xinchao_Wang1", "gender": "M;M", "homepage": "http://121.37.94.87;https://sites.google.com/site/sitexinchaowang/", "dblp": "42/8978;", "google_scholar": "AnYh2rAAAAAJ;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Songhua_Liu2;~Xinchao_WANG3", "aff": "National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nliu2023mgdd,\ntitle={{MGDD}: A Meta Generator for Fast Dataset Distillation},\nauthor={Songhua Liu and Xinchao Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=D9CMRR5Lof}\n}", "github": "", "project": "", "reviewers": "pTHr;jcg2;CHTg;Bfcg", "pdf_size": 1654107, "rating": "6;6;7;8", "confidence": "4;4;4;5", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;3;4;4", "wc_summary": "104;44;63;156", "wc_strengths": "105;52;63;191", "wc_weaknesses": "377;58;620;98", "wc_questions": "156;2;152;27", "wc_limitations": "2;14;51;14", "wc_review": "744;170;949;486", "wc_reply_reviewers": "236;0;381;0", "wc_reply_authors": "682;0;103;0", "reply_reviewers": "2;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 91.75, 42.96728406590298 ], "wc_strengths_avg": [ 102.75, 54.655169014467425 ], "wc_weaknesses_avg": [ 288.25, 227.56578719130871 ], "wc_questions_avg": [ 84.25, 70.32202713232888 ], "wc_limitations_avg": [ 20.25, 18.417043736713012 ], "wc_review_avg": [ 587.25, 291.454434689198 ], "wc_reply_reviewers_avg": [ 154.25, 162.5459550404131 ], "wc_reply_authors_avg": [ 196.25, 283.5827701042502 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16144558438613435676&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "u.nus.edu;nus.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "A Smooth Binary Mechanism for Efficient Private Continual Observation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72389", "id": "DAKAkMhjSR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/99c41fb9fd53abfdd4a0259560ef1c9d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DAKAkMhjSR", "openreview": "https://openreview.net/forum?id=DAKAkMhjSR", "poster": "/media/PosterPDFs/NeurIPS%202023/72389.png?t=1698407728.8568223", "slides": "https://nips.cc/virtual/2023/poster/72389", "video": "https://nips.cc/virtual/2023/poster/72389", "author_site": "Joel Daniel Andersson, Rasmus Pagh", "tldr": "", "abstract": "In privacy under continual observation we study how to release differentially private estimates based on a dataset that evolves over time. The problem of releasing private prefix sums of $x_1, x_2, x_3,\\dots\\in${$0,1$} (where the value of each $x_i$ is to be private) is particularly well-studied, and a generalized form is used in state-of-the-art methods for private stochastic gradient descent (SGD).\n\nThe seminal binary mechanism privately releases the first $t$ prefix sums with noise of variance polylogarithmic in $t$. Recently, Henzinger et al. and Denisov et al. showed that it is possible to improve on the binary mechanism in two ways: The variance of the noise can be reduced by a (large) constant factor, and also made more even across time steps. However, their algorithms for generating the noise distribution are not as efficient as one would like in terms of computation time and (in particular) space.\n\nWe address the efficiency problem by presenting a simple alternative to the binary mechanism in which 1) generating the noise takes constant average time per value, 2) the variance is reduced by a factor about 4 compared to the binary mechanism, and 3) the noise distribution at each step is identical. Empirically, a simple Python implementation of our approach outperforms the running time of the approach of Henzinger et al., as well as an attempt to improve their algorithm using high-performance algorithms for multiplication with Toeplitz matrices.", "keywords": "differential privacy;continual observation;binary mechanism", "primary_area": "", "supplementary_material": "/attachment/bb990eaac21a728773aa56bab65f88883dfbbace.zip", "author": "Joel Daniel Andersson;Rasmus Pagh", "authorids": "~Joel_Daniel_Andersson1;~Rasmus_Pagh1", "gender": ";M", "homepage": "https://jdandersson.net/;https://www.rasmuspagh.net", "dblp": "349/7870;p/RasmusPagh", "google_scholar": "YIN6oFUAAAAJ;https://scholar.google.com.tw/citations?user=VO4oS8UAAAAJ", "orcid": "0000-0003-2530-0520;0000-0002-1516-9306", "linkedin": ";", "or_profile": "~Joel_Daniel_Andersson1;~Rasmus_Pagh1", "aff": "University of Copenhagen;University of Copenhagen", "aff_domain": "ku.dk;ku.dk", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\npagh2023a,\ntitle={A Smooth Binary Mechanism for Efficient Private Continual Observation},\nauthor={Rasmus Pagh and Joel Daniel Andersson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DAKAkMhjSR}\n}", "github": "", "project": "", "reviewers": "pQUM;kh8m;aGJ1;3m6M", "pdf_size": 580917, "rating": "6;6;7;7", "confidence": "4;3;4;5", "soundness": "4;3;3;3", "novelty": "3;4;3;3", "presentation": "3;2;3;3", "wc_summary": "295;177;775;166", "wc_strengths": "69;37;31;65", "wc_weaknesses": "28;109;129;153", "wc_questions": "5;28;405;207", "wc_limitations": "1;1;1;1", "wc_review": "398;352;1341;592", "wc_reply_reviewers": "192;15;126;106", "wc_reply_authors": "59;0;84;54", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 353.25, 248.69295828390477 ], "wc_strengths_avg": [ 50.5, 16.695807857064 ], "wc_weaknesses_avg": [ 104.75, 46.970070257558696 ], "wc_questions_avg": [ 161.25, 160.9943710196105 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 670.75, 397.31308498462516 ], "wc_reply_reviewers_avg": [ 109.75, 63.286550703921286 ], "wc_reply_authors_avg": [ 49.25, 30.621683493890405 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9982680072564200939&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "ku.dk;ku.dk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Copenhagen", "aff_unique_dep": "", "aff_unique_url": "https://www.ku.dk", "aff_unique_abbr": "UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Denmark" }, { "title": "Optimal Transport for Treatment Effect Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72388", "id": "DAdfU1ASLb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1160e7f31d0a74abbbe1bbf7924b949c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DAdfU1ASLb", "openreview": "https://openreview.net/forum?id=DAdfU1ASLb", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72388", "video": "https://nips.cc/virtual/2023/poster/72388", "author_site": "Hao Wang, Jiajun Fan, Zhichao Chen, Haoxuan Li, Weiming Liu, Tianqiao Liu, Quanyu Dai, Yichao Wang, Zhenhua Dong, Ruiming Tang", "tldr": "", "abstract": "Estimating individual treatment effects from observational data is challenging due to treatment selection bias. Prevalent methods mainly mitigate this issue by aligning different treatment groups in the latent space, the core of which is the calculation of distribution discrepancy. However, two issues that are often overlooked can render these methods invalid:\n(1) mini-batch sampling effects (MSE), where the calculated discrepancy is erroneous in non-ideal mini-batches with outcome imbalance and outliers;\n(2) unobserved confounder effects (UCE), where the unobserved confounders are not considered in the discrepancy calculation.\nBoth of these issues invalidate the calculated discrepancy, mislead the training of estimators, and thus impede the handling of treatment selection bias.\nTo tackle these issues, we propose Entire Space CounterFactual Regression (ESCFR), which is a new take on optimal transport technology in the context of causality.\nSpecifically, based on the canonical optimal transport framework, we propose a relaxed mass-preserving regularizer to address the MSE issue and design a proximal factual outcome regularizer to handle the UCE issue.\nExtensive experiments demonstrate that ESCFR estimates distribution discrepancy accurately, handles the treatment selection bias effectively, and outperforms prevalent competitors significantly.", "keywords": "treatment effect estimation;optimal transport;wasserstein;causal inference;counterfactual", "primary_area": "", "supplementary_material": "/attachment/7c9d41d449d196aeab4a43d70df4691f9234fe71.zip", "author": "Hao Wang;Jiajun Fan;Zhichao Chen;Haoxuan Li;Weiming Liu;Tianqiao Liu;Quanyu Dai;Yichao Wang;Zhenhua Dong;Ruiming Tang", "authorids": "~Hao_Wang28;~Jiajun_Fan1;~Zhichao_Chen2;~Haoxuan_Li6;~Weiming_Liu2;~Tianqiao_Liu1;~Quanyu_Dai1;~Yichao_Wang3;~Zhenhua_Dong1;~Ruiming_Tang2", "gender": ";;;M;;M;M;M;;M", "homepage": ";;;https://haoxuanli-pku.github.io/;;https://www.linkedin.com/in/tianqiao-liu-4a9446248/;;;;https://scholar.google.com/citations?user=fUtHww0AAAAJ&hl=en", "dblp": ";;;145/4965-1.html;;240/9193;210/1089;79/10448-2;;24/10003.html", "google_scholar": ";;;gtDqiucAAAAJ;;HIuCuFkAAAAJ;https://scholar.google.com/citations?hl=en;W7vCGJAAAAAJ;;https://scholar.google.com.sg/citations?user=fUtHww0AAAAJ", "orcid": ";;;0000-0003-3620-3769;;;0000-0001-7578-2738;0000-0001-7053-8269;;0000-0002-9224-2431", "linkedin": ";;;;;;;;;", "or_profile": "~Hao_Wang28;~Jiajun_Fan1;~Zhichao_Chen2;~Haoxuan_Li6;~Weiming_Liu2;~Tianqiao_Liu1;~Quanyu_Dai1;~Yichao_Wang3;~Zhenhua_Dong1;~Ruiming_Tang2", "aff": ";;;Peking University;;Tomorrow Advancing Life;Huawei Technologies Ltd.;Huawei Technologies Ltd.;;Huawei Technologies Ltd.", "aff_domain": ";;;pku.edu.cn;;tal.com;huawei.com;huawei.com;;huawei.com", "position": ";;;PhD student;;Researcher;Researcher;Researcher;;Principal Researcher", "bibtex": "@inproceedings{\nwang2023optimal,\ntitle={Optimal Transport for Treatment Effect Estimation},\nauthor={Hao Wang and Jiajun Fan and Zhichao Chen and Haoxuan Li and Weiming Liu and Tianqiao Liu and Quanyu Dai and Yichao Wang and Zhenhua Dong and Ruiming Tang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DAdfU1ASLb}\n}", "github": "", "project": "", "reviewers": "hjjj;MG3J;jCP6;VwsR", "pdf_size": 658126, "rating": "5;6;7;8", "confidence": "4;4;4;5", "soundness": "3;2;2;4", "novelty": "3;3;2;3", "presentation": "3;4;3;4", "wc_summary": "157;92;70;84", "wc_strengths": "64;51;100;35", "wc_weaknesses": "168;238;134;23", "wc_questions": "2;63;158;3", "wc_limitations": "2;8;11;3", "wc_review": "393;452;473;148", "wc_reply_reviewers": "71;98;164;33", "wc_reply_authors": "0;0;62;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 100.75, 33.41687447981932 ], "wc_strengths_avg": [ 62.5, 23.96351393264352 ], "wc_weaknesses_avg": [ 140.75, 77.63818326055808 ], "wc_questions_avg": [ 56.5, 63.594417994034664 ], "wc_limitations_avg": [ 6.0, 3.6742346141747673 ], "wc_review_avg": [ 366.5, 129.5154430946364 ], "wc_reply_reviewers_avg": [ 91.5, 47.80428851055102 ], "wc_reply_authors_avg": [ 15.5, 26.846787517317598 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5238722391693176529&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";;;pku.edu.cn;;tal.com;huawei.com;huawei.com;;huawei.com", "author_num": 10, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "Peking University;Tomorrow Advancing Life;Huawei", "aff_unique_dep": ";;Huawei Technologies", "aff_unique_url": "http://www.pku.edu.cn;;https://www.huawei.com", "aff_unique_abbr": "Peking U;;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "title": "Brain-like Flexible Visual Inference by Harnessing Feedback Feedforward Alignment", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72387", "id": "DBlkX8Nczr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b29ec434e049fb96f3c4245a405ee976-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DBlkX8Nczr", "openreview": "https://openreview.net/forum?id=DBlkX8Nczr", "poster": "/media/PosterPDFs/NeurIPS%202023/72387.png?t=1701967838.631067", "slides": "https://nips.cc/virtual/2023/poster/72387", "video": "https://nips.cc/virtual/2023/poster/72387", "author_site": "Tahereh Toosi, Elias Issa", "tldr": "", "abstract": "In natural vision, feedback connections support versatile visual inference capabilities such as making sense of the occluded or noisy bottom-up sensory information or mediating pure top-down processes such as imagination. However, the mechanisms by which the feedback pathway learns to give rise to these capabilities flexibly are not clear. We propose that top-down effects emerge through alignment between feedforward and feedback pathways, each optimizing its own objectives. To achieve this co-optimization, we introduce Feedback-Feedforward Alignment (FFA), a learning algorithm that leverages feedback and feedforward pathways as mutual credit assignment computational graphs, enabling alignment. In our study, we demonstrate the effectiveness of FFA in co-optimizing classification and reconstruction tasks on widely used MNIST and CIFAR10 datasets. Notably, the alignment mechanism in FFA endows feedback connections with emergent visual inference functions, including denoising, resolving occlusions, hallucination, and imagination. Moreover, FFA offers bio-plausibility compared to traditional backpropagation (BP) methods in implementation. By repurposing the computational graph of credit assignment into a goal-driven feedback pathway, FFA alleviates weight transport problems encountered in BP, enhancing the bio-plausibility of the learning algorithm. Our study presents FFA as a promising proof-of-concept for the mechanisms underlying how feedback connections in the visual cortex support flexible visual functions. This work also contributes to the broader field of visual inference underlying perceptual phenomena and has implications for developing more biologically inspired learning algorithms.", "keywords": "Visual inference;Bio-plausible learning algorithm;Feedback connections;Visual imagery;Occlusions;Noise", "primary_area": "", "supplementary_material": "/attachment/bd18e4f45fc341644fe27296519fb060e5cfab15.pdf", "author": "Tahereh Toosi;Elias Issa", "authorids": "~Tahereh_Toosi2;~Elias_Issa1", "gender": "F;", "homepage": "https://toosi.github.io/;https://issalab.neuroscience.columbia.edu", "dblp": "350/4506;", "google_scholar": "fDjSvTsAAAAJ;Qo93vOkAAAAJ", "orcid": "0000-0002-1793-0613;0000-0002-5387-7207", "linkedin": "tahereh-toosi-45b99014/;", "or_profile": "~Tahereh_Toosi2;~Elias_Issa1", "aff": "Columbia University;Columbia University", "aff_domain": "columbia.edu;columbia.edu", "position": "Associate Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\ntoosi2023brainlike,\ntitle={Brain-like Flexible Visual Inference by Harnessing Feedback Feedforward Alignment},\nauthor={Tahereh Toosi and Elias Issa},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DBlkX8Nczr}\n}", "github": "", "project": "", "reviewers": "WNmV;3rRy;jn5o;gcpd;KkB4;6SSt", "pdf_size": 5444600, "rating": "4;4;6;6;6;7", "confidence": "3;4;4;4;4;5", "soundness": "2;2;3;3;3;3", "novelty": "3;2;3;2;3;4", "presentation": "3;2;3;3;2;1", "wc_summary": "80;39;106;532;136;29", "wc_strengths": "17;42;72;13;127;43", "wc_weaknesses": "235;745;56;34;378;226", "wc_questions": "427;151;53;39;100;7", "wc_limitations": "26;57;16;1;11;1", "wc_review": "785;1034;303;619;752;306", "wc_reply_reviewers": "127;211;110;0;170;513", "wc_reply_authors": "0;0;738;0;47;822", "reply_reviewers": "1;1;1;0;1;2", "reply_authors": "1;1;2;1;2;3", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.5773502691896257 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.8333333333333335, 0.6871842709362768 ], "presentation_avg": [ 2.3333333333333335, 0.7453559924999298 ], "wc_summary_avg": [ 153.66666666666666, 173.12680003075462 ], "wc_strengths_avg": [ 52.333333333333336, 38.62497752606322 ], "wc_weaknesses_avg": [ 279.0, 238.55677171971735 ], "wc_questions_avg": [ 129.5, 140.76428287507215 ], "wc_limitations_avg": [ 18.666666666666668, 19.206480387850576 ], "wc_review_avg": [ 633.1666666666666, 262.7073255333564 ], "wc_reply_reviewers_avg": [ 188.5, 158.96828824223613 ], "wc_reply_authors_avg": [ 267.8333333333333, 363.34759146335654 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 1.6666666666666667, 0.7453559924999299 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7745966692414835, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7214293760318384611&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 11, "email": "columbia.edu;columbia.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Provably Fast Finite Particle Variants of SVGD via Virtual Particle Stochastic Approximation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72386", "id": "DBz9E5aZey", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9bf1962c5b65a243ee243bb03ff2c506-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DBz9E5aZey", "openreview": "https://openreview.net/forum?id=DBz9E5aZey", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72386", "video": "https://nips.cc/virtual/2023/poster/72386", "author_site": "Aniket Das, Dheeraj Nagaraj", "tldr": "", "abstract": "Stein Variational Gradient Descent (SVGD) is a popular particle-based variational inference algorithm with impressive empirical performance across various domains. Although the population (i.e, infinite-particle) limit dynamics of SVGD is well characterized, its behavior in the finite-particle regime is far less understood. To this end, our work introduces the notion of *virtual particles* to develop novel stochastic approximations of population-limit SVGD dynamics in the space of probability measures, that are exactly realizable using finite particles. As a result, we design two computationally efficient variants of SVGD, namely VP-SVGD and GB-SVGD, with provably fast finite-particle convergence rates. Our algorithms can be viewed as specific random-batch approximations of SVGD, which are computationally more efficient than ordinary SVGD. We show that the $n$ particles output by VP-SVGD and GB-SVGD, run for $T$ steps with batch-size $K$, are at-least as good as i.i.d samples from a distribution whose Kernel Stein Discrepancy to the target is at most $O(\\tfrac{d^{1/3}}{(KT)^{1/6}})$ under standard assumptions. Our results also hold under a mild growth condition on the potential function, which is much weaker than the isoperimetric (e.g. Poincare Inequality) or information-transport conditions (e.g. Talagrand's Inequality $\\mathsf{T}_1$) generally considered in prior works. As a corollary, we analyze the convergence of the empirical measure (of the particles output by VP-SVGD and GB-SVGD) to the target distribution and demonstrate a **double exponential improvement** over the best known finite-particle analysis of SVGD. Beyond this, our results present the **first known oracle complexities for this setting with polynomial dimension dependence**, thereby completely eliminating the curse of dimensionality exhibited by previously known finite-particle rates.", "keywords": "Stein Variational Gradient Descent;Variational Inference;Sampling", "primary_area": "", "supplementary_material": "/attachment/ab0fc6d155184a3edda82b3eb28a0e701da5c2f3.pdf", "author": "Aniket Das;Dheeraj Mysore Nagaraj", "authorids": "~Aniket_Das1;~Dheeraj_Mysore_Nagaraj1", "gender": "M;M", "homepage": "https://aniket1998.github.io;https://dheerajmn.mit.edu", "dblp": "248/8281;215/5097", "google_scholar": "o8Dyas0AAAAJ;0g80b7sAAAAJ", "orcid": ";", "linkedin": ";dheeraj-m-nagaraj-01739792/", "or_profile": "~Aniket_Das1;~Dheeraj_Mysore_Nagaraj1", "aff": "Google;Google", "aff_domain": "google.com;google.com", "position": "Predoctoral Researcher;Research Scientist", "bibtex": "@inproceedings{\ndas2023provably,\ntitle={Provably Fast Finite Particle Variants of {SVGD} via Virtual Particle Stochastic Approximation},\nauthor={Aniket Das and Dheeraj Mysore Nagaraj},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DBz9E5aZey}\n}", "github": "", "project": "", "reviewers": "ArA7;UYmX;T9Sw;9R2A", "pdf_size": 488769, "rating": "5;5;8;8", "confidence": "3;4;4;3", "soundness": "2;4;4;4", "novelty": "3;3;4;4", "presentation": "2;2;3;3", "wc_summary": "61;84;118;178", "wc_strengths": "68;48;228;160", "wc_weaknesses": "52;157;169;44", "wc_questions": "258;208;223;79", "wc_limitations": "2;6;11;6", "wc_review": "441;503;749;467", "wc_reply_reviewers": "12;0;81;27", "wc_reply_authors": "10;0;23;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.8660254037844386 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 110.25, 44.05890942817355 ], "wc_strengths_avg": [ 126.0, 72.47068372797375 ], "wc_weaknesses_avg": [ 105.5, 57.72564421468157 ], "wc_questions_avg": [ 192.0, 67.71632004177427 ], "wc_limitations_avg": [ 6.25, 3.191786333700926 ], "wc_review_avg": [ 540.0, 122.65806129235861 ], "wc_reply_reviewers_avg": [ 30.0, 30.95965116082544 ], "wc_reply_authors_avg": [ 8.25, 9.443913383762052 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=999497755054520692&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "google.com;google.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Payoff-based Learning with Matrix Multiplicative Weights in Quantum Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72385", "id": "DCIsNIUCV7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/77307e2e3f326335dfeb94ab47f7a6c0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DCIsNIUCV7", "openreview": "https://openreview.net/forum?id=DCIsNIUCV7", "poster": "/media/PosterPDFs/NeurIPS%202023/72385.png?t=1702392244.3985193", "slides": "https://nips.cc/virtual/2023/poster/72385", "video": "https://nips.cc/virtual/2023/poster/72385", "author_site": "Kyriakos Lotidis, Panayotis Mertikopoulos, Nicholas Bambos, Jose Blanchet", "tldr": "", "abstract": "In this paper, we study the problem of learning in quantum games - and other classes of semidefinite games - with scalar, payoff-based feedback.\nFor concreteness, we focus on the widely used matrix multiplicative weights (MMW) algorithm and, instead of requiring players to have full knowledge of the game (and/or each other's chosen states), we introduce a suite of minimal-information matrix multiplicative weights (3MW) methods tailored to different information frameworks.\nThe main difficulty to attaining convergence in this setting is that, in contrast to classical finite games, quantum games have an infinite continuum of pure states (the quantum equivalent of pure strategies), so standard importance-weighting techniques for estimating payoff vectors cannot be employed.\nInstead, we borrow ideas from bandit convex optimization and we design a zeroth-order gradient sampler adapted to the semidefinite geometry of the problem at hand.\nAs a first result, we show that the 3MW method with deterministic payoff feedback retains the $\\mathcal{O}(1/\\sqrt{T})$ convergence rate of the vanilla, full information MMW algorithm in quantum min-max games, even though the players only observe a single scalar.\nSubsequently, we relax the algorithm's information requirements even further and we provide a 3MW method that only requires players to observe a random realization of their payoff observable, and converges to equilibrium at an $\\mathcal{O}(T^{-1/4})$ rate.\nFinally, going beyond zero-sum games, we show that a regularized variant of the proposed 3MW method guarantees local convergence with high probability to all equilibria that satisfy a certain first-order stability condition.", "keywords": "quantum games;Matrix Multiplicative Weights;zero-sum games;Nash equilibrium", "primary_area": "", "supplementary_material": "/attachment/5aeb325be9b52df24dc082c4daa06cabec781007.zip", "author": "Kyriakos Lotidis;Panayotis Mertikopoulos;Nicholas Bambos;Jose Blanchet", "authorids": "~Kyriakos_Lotidis1;~Panayotis_Mertikopoulos1;~Nicholas_Bambos1;~Jose_Blanchet1", "gender": ";M;M;M", "homepage": ";http://polaris.imag.fr/panayotis.mertikopoulos/;;https://web.stanford.edu/~jblanche/", "dblp": ";49/6721;b/NicholasBambos;75/5093.html", "google_scholar": ";xsusqPYAAAAJ;;https://scholar.google.co.in/citations?user=O24CcQQAAAAJ", "orcid": ";0000-0003-2026-9616;;", "linkedin": ";;;jose-blanchet", "or_profile": "~Kyriakos_Lotidis1;~Panayotis_Mertikopoulos1;~Nicholas_Bambos1;~Jose_Blanchet1", "aff": ";French National Center for Scientific Research;Stanford University;Stanford University", "aff_domain": ";imag.fr;stanford.edu;stanford.edu", "position": ";Principal Researcher;Professor;Professor", "bibtex": "@inproceedings{\nlotidis2023payoffbased,\ntitle={Payoff-based Learning with Matrix Multiplicative Weights in Quantum Games},\nauthor={Kyriakos Lotidis and Panayotis Mertikopoulos and Nicholas Bambos and Jose Blanchet},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DCIsNIUCV7}\n}", "github": "", "project": "", "reviewers": "TyoP;Kb52;G5vn;UZra", "pdf_size": 1611550, "rating": "5;6;6;7", "confidence": "3;5;3;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;2", "wc_summary": "58;77;65;126", "wc_strengths": "52;70;12;150", "wc_weaknesses": "9;157;71;76", "wc_questions": "67;314;22;54", "wc_limitations": "1;26;12;17", "wc_review": "187;644;182;423", "wc_reply_reviewers": "30;84;13;88", "wc_reply_authors": "21;31;21;19", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.5, 26.575364531836623 ], "wc_strengths_avg": [ 71.0, 50.20956084253277 ], "wc_weaknesses_avg": [ 78.25, 52.57078561330428 ], "wc_questions_avg": [ 114.25, 116.48256307276209 ], "wc_limitations_avg": [ 14.0, 9.027735042633894 ], "wc_review_avg": [ 359.0, 191.20277194643387 ], "wc_reply_reviewers_avg": [ 53.75, 32.83576556135093 ], "wc_reply_authors_avg": [ 23.0, 4.69041575982343 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9348128989058777340&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "email": ";imag.fr;stanford.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "French National Center for Scientific Research;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cnrs.fr;https://www.stanford.edu", "aff_unique_abbr": "CNRS;Stanford", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;1", "aff_country_unique": "France;United States" }, { "title": "ParaFuzz: An Interpretability-Driven Technique for Detecting Poisoned Samples in NLP", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72384", "id": "DD0QJvPbTD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d2b752ed4726286a4b488ae16e091d64-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DD0QJvPbTD", "openreview": "https://openreview.net/forum?id=DD0QJvPbTD", "poster": "/media/PosterPDFs/NeurIPS%202023/72384.png?t=1701551619.9559908", "slides": "https://nips.cc/virtual/2023/poster/72384", "video": "https://nips.cc/virtual/2023/poster/72384", "author_site": "Lu Yan, Zhuo Zhang, Guanhong Tao, Kaiyuan Zhang, Xuan Chen, Guangyu Shen, Xiangyu Zhang", "tldr": "", "abstract": "Backdoor attacks have emerged as a prominent threat to natural language processing (NLP) models, where the presence of specific triggers in the input can lead poisoned models to misclassify these inputs to predetermined target classes. Current detection mechanisms are limited by their inability to address more covert backdoor strategies, such as style-based attacks. In this work, we propose an innovative test-time poisoned sample detection framework that hinges on the interpretability of model predictions, grounded in the semantic meaning of inputs.\nWe contend that triggers (e.g., infrequent words) are \nnot supposed to fundamentally alter the underlying semantic meanings of poisoned samples as they want to stay stealthy. Based on this observation, we hypothesize that while the model's predictions for paraphrased clean samples should remain stable, predictions for poisoned samples should revert to their true labels upon the mutations applied to triggers during the paraphrasing process.\nWe employ ChatGPT, a state-of-the-art large language model, as our paraphraser and formulate the trigger-removal task as a prompt engineering problem. We adopt fuzzing, a technique commonly used for unearthing software vulnerabilities, to discover optimal paraphrase prompts that can effectively eliminate triggers while concurrently maintaining input semantics.\nExperiments on 4 types of backdoor attacks, including the subtle style backdoors, and 4 distinct datasets demonstrate that our approach surpasses baseline methods, including STRIP, RAP, and ONION, in precision and recall.", "keywords": "NLP;backdoor attack;fuzzing", "primary_area": "", "supplementary_material": "/attachment/214941d82ab14876f2ab54063d65d9b65ab968a5.pdf", "author": "Lu Yan;ZHUO ZHANG;Guanhong Tao;Kaiyuan Zhang;Xuan Chen;Guangyu Shen;Xiangyu Zhang", "authorids": "~Lu_Yan1;~ZHUO_ZHANG1;~Guanhong_Tao1;~Kaiyuan_Zhang1;~Xuan_Chen3;~Guangyu_Shen1;~Xiangyu_Zhang3", "gender": "F;;;M;;M;M", "homepage": "https://lunaryan.github.io/;https://www.cs.purdue.edu/homes/zhan3299/index.html;;https://kaiyuanzhang.com/;;;https://www.cs.purdue.edu/homes/xyzhang", "dblp": "y/LuYan;16/1234-2.html;;147/6644-2;;216/6403;", "google_scholar": "giOa-kYAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;;YiMTVwgAAAAJ;PXbu1wIAAAAJ", "orcid": ";;;0000-0001-6023-363X;;;", "linkedin": ";;;kaiyuan-zhang/;;;", "or_profile": "~Lu_Yan1;~ZHUO_ZHANG1;~Guanhong_Tao1;~Kaiyuan_Zhang1;~Xuan_Chen3;~Guangyu_Shen1;~Xiangyu_Zhang3", "aff": "Purdue University;Purdue University;;Purdue University;;Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;;cs.purdue.edu;;purdue.edu;cs.purdue.edu", "position": "PhD student;PhD student;;PhD student;;PhD student;Full Professor", "bibtex": "@inproceedings{\nyan2023parafuzz,\ntitle={ParaFuzz: An Interpretability-Driven Technique for Detecting Poisoned Samples in {NLP}},\nauthor={Lu Yan and ZHUO ZHANG and Guanhong Tao and Kaiyuan Zhang and Xuan Chen and Guangyu Shen and Xiangyu Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DD0QJvPbTD}\n}", "github": "", "project": "", "reviewers": "4qgi;oHTu;6kxG;7yjk", "pdf_size": 2452516, "rating": "5;5;7;8", "confidence": "4;3;4;4", "soundness": "2;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;2;4", "wc_summary": "77;98;68;116", "wc_strengths": "32;50;63;82", "wc_weaknesses": "77;148;161;59", "wc_questions": "130;103;35;47", "wc_limitations": "35;59;1;2", "wc_review": "351;458;328;306", "wc_reply_reviewers": "157;24;223;160", "wc_reply_authors": "900;83;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 89.75, 18.659782956937093 ], "wc_strengths_avg": [ 56.75, 18.267115262131565 ], "wc_weaknesses_avg": [ 111.25, 43.95665478627781 ], "wc_questions_avg": [ 78.75, 39.168705620686524 ], "wc_limitations_avg": [ 24.25, 24.283482040267618 ], "wc_review_avg": [ 360.75, 58.35826848013913 ], "wc_reply_reviewers_avg": [ 141.0, 72.50862017718997 ], "wc_reply_authors_avg": [ 245.75, 379.2481872072693 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14198735009983738074&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "purdue.edu;purdue.edu;;cs.purdue.edu;;purdue.edu;cs.purdue.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Brant: Foundation Model for Intracranial Neural Signal", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72383", "id": "DDkl9vaJyE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/535915d26859036410b0533804cee788-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DDkl9vaJyE", "openreview": "https://openreview.net/forum?id=DDkl9vaJyE", "poster": "/media/PosterPDFs/NeurIPS%202023/72383.png?t=1698496845.275258", "slides": "https://nips.cc/virtual/2023/poster/72383", "video": "https://nips.cc/virtual/2023/poster/72383", "author_site": "Daoze Zhang, Zhizhang Yuan, YANG YANG, Junru Chen, Jingjing Wang, Yafeng Li", "tldr": "", "abstract": "We propose a foundation model named Brant for modeling intracranial recordings, which learns powerful representations of intracranial neural signals by pre-training, providing a large-scale, off-the-shelf model for medicine. Brant is the largest model in the field of brain signals and is pre-trained on a large corpus of intracranial data collected by us. The design of Brant is to capture long-term temporal dependency and spatial correlation from neural signals, combining the information in both time and frequency domains. As a foundation model, Brant achieves SOTA performance on various downstream tasks (i.e. neural signal forecasting, frequency-phase forecasting, imputation and seizure detection), showing the generalization ability to a broad range of tasks. The low-resource label analysis and representation visualization further illustrate the effectiveness of our pre-training strategy. In addition, we explore the effect of model size to show that a larger model with a higher capacity can lead to performance improvements on our dataset. The source code and pre-trained weights are available at: https://zju-brainnet.github.io/Brant.github.io/.", "keywords": "Foundation model;Brain signal;Pretraining;Medicine", "primary_area": "", "supplementary_material": "", "author": "Daoze Zhang;Zhizhang Yuan;Yang Yang;Junru Chen;Jingjing Wang;Yafeng Li", "authorids": "~Daoze_Zhang1;~Zhizhang_Yuan1;~Yang_Yang35;~Junru_Chen1;~Jingjing_Wang9;~Yafeng_Li3", "gender": "M;M;M;M;Not Specified;M", "homepage": "https://daozezhang.github.io/;;http://yangy.org;https://mrnobodycali.github.io/;;https://nuozhu.net/", "dblp": ";;;212/6753;;84/8135", "google_scholar": "5beC28QAAAAJ;UzkMfYAAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.cz/citations?hl=zh-CN;", "orcid": ";;0000-0002-5058-4417;;;", "linkedin": ";;;;;", "or_profile": "~Daoze_Zhang1;~Zhizhang_Yuan1;~Yang_Yang35;~Junru_Chen1;~Jingjing_Wang9;~Yafeng_Li3", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;;", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;", "position": "Undergrad student;Undergrad student;Associate Professor;PhD student;;", "bibtex": "@inproceedings{\nzhang2023brant,\ntitle={Brant: Foundation Model for Intracranial Neural Signal},\nauthor={Daoze Zhang and Zhizhang Yuan and Yang Yang and Junru Chen and Jingjing Wang and Yafeng Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DDkl9vaJyE}\n}", "github": "", "project": "", "reviewers": "uVSw;5spB;5rAo;T6RH", "pdf_size": 6594523, "rating": "6;6;7;7", "confidence": "4;5;2;4", "soundness": "3;3;4;3", "novelty": "2;3;3;4", "presentation": "3;3;4;3", "wc_summary": "85;140;28;75", "wc_strengths": "79;123;62;87", "wc_weaknesses": "139;430;33;92", "wc_questions": "234;42;6;92", "wc_limitations": "42;9;13;44", "wc_review": "579;744;142;390", "wc_reply_reviewers": "14;14;12;18", "wc_reply_authors": "18;21;18;18", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.0, 39.805778474990284 ], "wc_strengths_avg": [ 87.75, 22.26404051379713 ], "wc_weaknesses_avg": [ 173.5, 152.77843434202356 ], "wc_questions_avg": [ 93.5, 86.676121279162 ], "wc_limitations_avg": [ 27.0, 16.077935190813527 ], "wc_review_avg": [ 463.75, 224.04505685241082 ], "wc_reply_reviewers_avg": [ 14.5, 2.179449471770337 ], "wc_reply_authors_avg": [ 18.75, 1.299038105676658 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6823231202470438762&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Discrete-Smoothness in Online Algorithms with Predictions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72382", "id": "DDmH3H78iJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/82f0dae85424eb743017c90380e7ab9b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DDmH3H78iJ", "openreview": "https://openreview.net/forum?id=DDmH3H78iJ", "poster": "/media/PosterPDFs/NeurIPS%202023/72382.png?t=1699966594.770555", "slides": "https://nips.cc/virtual/2023/poster/72382", "video": "https://nips.cc/virtual/2023/poster/72382", "author_site": "Yossi Azar, Debmalya Panigrahi, Noam Touitou", "tldr": "", "abstract": "In recent years, there has been an increasing focus on designing online algorithms with (machine-learned) predictions. The ideal learning-augmented algorithm is comparable to the optimum when given perfect predictions (consistency), to the best online approximation for arbitrary predictions (robustness), and should interpolate between these extremes as a smooth function of the prediction error. In this paper, we quantify these guarantees in terms of a general property that we call discrete-smoothness, and achieve discrete-smooth algorithms for online covering, specifically the facility location and set cover problems. For set cover, our work improves the results of Bamas, Maggiori, and Svensson (2020) by augmenting consistency and robustness with smoothness guarantees. For facility location, our work improves on prior work by Almanza et al. (2021) by generalizing to nonuniform costs and also providing smoothness guarantees by augmenting consistency and robustness.", "keywords": "Online;Predictions;Learning-augmented;Facility Location;Set Cover", "primary_area": "", "supplementary_material": "/attachment/19da6fd890ae3423f796f63c01261d2e4a68f786.zip", "author": "Yossi Azar;Debmalya Panigrahi;Noam Touitou", "authorids": "~Yossi_Azar1;~Debmalya_Panigrahi1;~Noam_Touitou1", "gender": "M;M;M", "homepage": "http://www.cs.tau.ac.il/~azar/;https://www.debmalyapanigrahi.org/;https://noamtouitou.com", "dblp": "a/YAzar.html;81/6547;211/8092", "google_scholar": "https://scholar.google.co.il/citations?user=X8jXGq0AAAAJ;https://scholar.google.com.tw/citations?user=syv4e-EAAAAJ;-N6FOX0AAAAJ", "orcid": ";;0000-0002-5720-4114", "linkedin": ";;noam-touitou/", "or_profile": "~Yossi_Azar1;~Debmalya_Panigrahi1;~Noam_Touitou1", "aff": "Tel Aviv University;Department of Computer Science, Duke University;Amazon", "aff_domain": "tau.ac.il;cs.duke.edu;amazon.com", "position": "Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nazar2023discretesmoothness,\ntitle={Discrete-Smoothness in Online Algorithms with Predictions},\nauthor={Yossi Azar and Debmalya Panigrahi and Noam Touitou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DDmH3H78iJ}\n}", "github": "", "project": "", "reviewers": "9LeQ;mT48;h71T;MB7r", "pdf_size": 410236, "rating": "3;5;7;8", "confidence": "3;3;4;2", "soundness": "2;3;3;4", "novelty": "2;2;3;4", "presentation": "2;3;3;3", "wc_summary": "271;158;168;110", "wc_strengths": "37;27;102;69", "wc_weaknesses": "281;191;98;8", "wc_questions": "303;39;314;66", "wc_limitations": "7;1;1;1", "wc_review": "899;416;683;254", "wc_reply_reviewers": "0;8;7;4", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.920286436967152 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 176.75, 58.66589724874239 ], "wc_strengths_avg": [ 58.75, 29.397066180147977 ], "wc_weaknesses_avg": [ 144.5, 101.96690639614404 ], "wc_questions_avg": [ 180.5, 128.41436835494696 ], "wc_limitations_avg": [ 2.5, 2.598076211353316 ], "wc_review_avg": [ 563.0, 247.1770620425771 ], "wc_reply_reviewers_avg": [ 4.75, 3.112474899497183 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.18411492357966466, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11056106944490462123&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "tau.ac.il;cs.duke.edu;amazon.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Tel Aviv University;Duke University;Amazon", "aff_unique_dep": ";Department of Computer Science;Amazon.com, Inc.", "aff_unique_url": "https://www.tau.ac.il;https://www.duke.edu;https://www.amazon.com", "aff_unique_abbr": "TAU;Duke;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Israel;United States" }, { "title": "Coupled Reconstruction of Cortical Surfaces by Diffeomorphic Mesh Deformation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72381", "id": "DEC7NxDJLh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ff0da832a110c6537e885cdfbac80a94-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DEC7NxDJLh", "openreview": "https://openreview.net/forum?id=DEC7NxDJLh", "poster": "/media/PosterPDFs/NeurIPS%202023/72381.png?t=1701195267.788563", "slides": "https://nips.cc/virtual/2023/poster/72381", "video": "https://nips.cc/virtual/2023/poster/72381", "author_site": "Hao Zheng, Hongming Li, Yong Fan", "tldr": "", "abstract": "Accurate reconstruction of cortical surfaces from brain magnetic resonance images (MRIs) remains a challenging task due to the notorious partial volume effect in brain MRIs and the cerebral cortex's thin and highly folded patterns. Although many promising deep learning-based cortical surface reconstruction methods have been developed, they typically fail to model the interdependence between inner (white matter) and outer (pial) cortical surfaces, which can help generate cortical surfaces with spherical topology. To robustly reconstruct the cortical surfaces with topological correctness, we develop a new deep learning framework to jointly reconstruct the inner, outer, and their in-between (midthickness) surfaces and estimate cortical thickness directly from 3D MRIs. Our method first estimates the midthickness surface and then learns three diffeomorphic flows jointly to optimize the midthickness surface and deform it inward and outward to the inner and outer cortical surfaces respectively, regularized by topological correctness. Our method also outputs a cortex thickness value for each surface vertex, estimated from its diffeomorphic deformation trajectory. Our method has been evaluated on two large-scale neuroimaging datasets, including ADNI and OASIS, achieving state-of-the-art cortical surface reconstruction performance in terms of accuracy, surface regularity, and computation efficiency.", "keywords": "Brain MRIs;cortical surface reconstruction;deep learning", "primary_area": "", "supplementary_material": "/attachment/16e85fa22353f28bca1c00d03da7d8ef9fa80204.pdf", "author": "Hao Zheng;Hongming Li;Yong Fan", "authorids": "~Hao_Zheng1;~Hongming_Li1;yong.fan@pennmedicine.upenn.edu", "gender": "M;M;", "homepage": "https://hzaoheng123.github.io/HZ/;;", "dblp": "31/6916-6;31/3923;", "google_scholar": "43iRyc8AAAAJ;CpF6oAYAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Hao_Zheng1;~Hongming_Li1;yong.fan@pennmedicine.upenn.edu", "aff": "University of Pennsylvania;University of Pennsylvania;", "aff_domain": "upenn.edu;upenn.edu;", "position": "Postdoc;Senior Research Investigator;", "bibtex": "@inproceedings{\nzheng2023coupled,\ntitle={Coupled Reconstruction of Cortical Surfaces by Diffeomorphic Mesh Deformation},\nauthor={Hao Zheng and Hongming Li and Yong Fan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DEC7NxDJLh}\n}", "github": "", "project": "", "reviewers": "DGUA;7qip;YgxF;9EVU;vxoq;2ocD", "pdf_size": 3329357, "rating": "4;4;6;6;7;7", "confidence": "5;4;3;4;4;4", "soundness": "2;3;3;3;3;3", "novelty": "2;2;2;3;2;3", "presentation": "3;3;3;3;2;4", "wc_summary": "69;69;155;38;57;54", "wc_strengths": "30;38;127;63;23;133", "wc_weaknesses": "78;147;146;202;50;30", "wc_questions": "181;36;73;1;40;2", "wc_limitations": "13;9;96;13;1;30", "wc_review": "371;299;597;317;171;249", "wc_reply_reviewers": "771;240;0;0;13;30", "wc_reply_authors": "1162;601;0;0;0;0", "reply_reviewers": "3;1;0;0;1;1", "reply_authors": "4;4;1;1;1;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.5773502691896257 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 73.66666666666667, 37.84911565090465 ], "wc_strengths_avg": [ 69.0, 44.89617652614381 ], "wc_weaknesses_avg": [ 108.83333333333333, 60.752000424311596 ], "wc_questions_avg": [ 55.5, 61.2501700677911 ], "wc_limitations_avg": [ 27.0, 32.046840717924134 ], "wc_review_avg": [ 334.0, 132.81942628998215 ], "wc_reply_reviewers_avg": [ 175.66666666666666, 279.27206965410784 ], "wc_reply_authors_avg": [ 293.8333333333333, 445.98520777662077 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.46291004988627577, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5169969303735941099&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "upenn.edu;upenn.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "DreamSim: Learning New Dimensions of Human Visual Similarity using Synthetic Data", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72380", "id": "DEiNSfh1k7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9f09f316a3eaf59d9ced5ffaefe97e0f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DEiNSfh1k7", "openreview": "https://openreview.net/forum?id=DEiNSfh1k7", "poster": "/media/PosterPDFs/NeurIPS%202023/72380.png?t=1702204062.2721622", "slides": "https://nips.cc/virtual/2023/poster/72380", "video": "https://nips.cc/virtual/2023/poster/72380", "author_site": "Stephanie Fu, Netanel Tamir, Shobhita Sundaram, Lucy Chai, Richard Zhang, Tali Dekel, Phillip Isola", "tldr": "", "abstract": "Current perceptual similarity metrics operate at the level of pixels and patches. These metrics compare images in terms of their low-level colors and textures, but fail to capture mid-level similarities and differences in image layout, object pose, and semantic content. In this paper, we develop a perceptual metric that assesses images holistically. Our first step is to collect a new dataset of human similarity judgments over image pairs that are alike in diverse ways. Critical to this dataset is that judgments are nearly automatic and shared by all observers. To achieve this we use recent text-to-image models to create synthetic pairs that are perturbed along various dimensions. We observe that popular perceptual metrics fall short of explaining our new data, and we introduce a new metric, DreamSim, tuned to better align with human perception. We analyze how our metric is affected by different visual attributes, and find that it focuses heavily on foreground objects and semantic content while also being sensitive to color and layout. Notably, despite being trained on synthetic data, our metric generalizes to real images, giving strong results on retrieval and reconstruction tasks. Furthermore, our metric outperforms both prior learned metrics and recent large vision models on these tasks. Our project page: https://dreamsim-nights.github.io/", "keywords": "perceptual similarity;foundation model;perception;computer vision;image metric", "primary_area": "", "supplementary_material": "/attachment/4cdf44cc4ccc6422a902592c1f514f60edba7ef3.zip", "author": "Stephanie Fu;Netanel Yakir Tamir;Shobhita Sundaram;Lucy Chai;Richard Zhang;Tali Dekel;Phillip Isola", "authorids": "~Stephanie_Fu1;~Netanel_Yakir_Tamir1;~Shobhita_Sundaram1;~Lucy_Chai1;~Richard_Zhang1;~Tali_Dekel1;~Phillip_Isola1", "gender": "F;M;F;;M;F;M", "homepage": "https://stephanie-fu.github.io/;https://netanel-tamir.github.io/;https://ssundaram21.github.io;https://people.csail.mit.edu/lrchai/;http://richzhang.github.io;https://www.weizmann.ac.il/math/dekel/home;http://web.mit.edu/phillipi/", "dblp": "270/1541;349/5316;;245/2729;;;36/9988", "google_scholar": "Rx-h05AAAAAJ;easC2gEAAAAJ;;;LW8ze_UAAAAJ;https://scholar.google.co.il/citations?user=T0-Wo0EAAAAJ;ROILf3EAAAAJ", "orcid": "0000-0001-6591-6026;;;;;;0000-0002-1411-6704", "linkedin": "stephanie-fu/;netanel-yakir-tamir-4a9691167/;;;;;phillip-isola-a9955b20/", "or_profile": "~Stephanie_Fu1;~Netanel_Yakir_Tamir1;~Shobhita_Sundaram1;~Lucy_Chai1;~Richard_Zhang1;~Tali_Dekel1;~Phillip_Isola1", "aff": "Massachusetts Institute of Technology;Weizmann Institute of Science;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Adobe Systems;Google;Massachusetts Institute of Technology", "aff_domain": "mit.edu;weizmann.ac.il;mit.edu;mit.edu;adobe.com;google.com;mit.edu", "position": "MS student;MS student;PhD student;PhD student;Research Scientist;Researcher;Associate Professor", "bibtex": "@inproceedings{\nfu2023dreamsim,\ntitle={DreamSim: Learning New Dimensions of Human Visual Similarity using Synthetic Data},\nauthor={Stephanie Fu and Netanel Yakir Tamir and Shobhita Sundaram and Lucy Chai and Richard Zhang and Tali Dekel and Phillip Isola},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DEiNSfh1k7}\n}", "github": "", "project": "", "reviewers": "71tx;23q7;hsYj;Mq9Y", "pdf_size": 41334381, "rating": "7;7;8;8", "confidence": "3;3;5;4", "soundness": "3;3;4;4", "novelty": "3;3;4;3", "presentation": "3;3;4;4", "wc_summary": "100;79;58;98", "wc_strengths": "94;37;73;126", "wc_weaknesses": "202;112;88;187", "wc_questions": "49;88;12;44", "wc_limitations": "31;5;2;1", "wc_review": "476;321;233;456", "wc_reply_reviewers": "10;0;19;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 83.75, 16.97608612136496 ], "wc_strengths_avg": [ 82.5, 32.34578797927173 ], "wc_weaknesses_avg": [ 147.25, 48.297903681215814 ], "wc_questions_avg": [ 48.25, 26.984949508939238 ], "wc_limitations_avg": [ 9.75, 12.356678356257397 ], "wc_review_avg": [ 371.5, 99.7409143731899 ], "wc_reply_reviewers_avg": [ 7.25, 7.917543811056558 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 184, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14315851209652937510&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "mit.edu;weizmann.ac.il;mit.edu;mit.edu;adobe.com;google.com;mit.edu", "author_num": 7, "aff_unique_index": "0;1;0;0;2;3;0", "aff_unique_norm": "Massachusetts Institute of Technology;Weizmann Institute of Science;Adobe;Google", "aff_unique_dep": ";;Adobe Systems Incorporated;Google", "aff_unique_url": "https://web.mit.edu;https://www.weizmann.org.il;https://www.adobe.com;https://www.google.com", "aff_unique_abbr": "MIT;Weizmann;Adobe;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "United States;Israel" }, { "id": "DEqiM9CmmZ", "title": "ANQ: Approximate Nearest-Neighbor Q Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "In specific domains such as autonomous driving, quantitative trading, and healthcare, explainability is crucial for developing ethical, responsible, and trustworthy reinforcement learning (RL) models. Although many deep RL algorithms have attained remarkable performance, the resulting policies are often neural networks that lack explainability, rendering them unsuitable for real-world deployment. To tackle this challenge, we introduce a novel semi-parametric reinforcement learning framework, dubbed ANQ (Approximate Nearest Neighbor Q-Learning), which capitalizes on neural networks as encoders for high performance and memory-based structures for explainability. Furthermore, we propose the Sim-Encoder contrastive learning as a component of ANQ for state representation. Our evaluations on MuJoCo continuous control tasks validate the efficacy of ANQ in solving continuous tasks while offering an explainable decision-making process.", "keywords": "reinforcement learning;explainable reinforcement learning;semi-parametric learning;Memory-based learning", "primary_area": "", "supplementary_material": "", "author": "Yiwen Chen;Chuheng Zhang;ZEDONG ZHANG;Li Zhao;Jiang Bian;HAOFENG LIU;Jiayi Tan;Marcelo H Ang Jr", "authorids": "~Yiwen_Chen1;~Chuheng_Zhang1;~ZEDONG_ZHANG1;~Li_Zhao1;~Jiang_Bian1;~HAOFENG_LIU1;~Jiayi_Tan1;~Marcelo_H_Ang_Jr1", "gender": "M;M;;F;M;;M;M", "homepage": "https://github.com/yiwc;;;https://www.microsoft.com/en-us/research/people/lizo/;https://sites.google.com/view/jiangbian;https://www.linkedin.com/in/haofeng-liu-aa48221b6/;;http://guppy.mpe.nus.edu.sg/~mpeangh/", "dblp": ";241/9716;;97/4708-7;09/851-2.html;;;", "google_scholar": ";q7M83KQAAAAJ;SdG2e9UAAAAJ;b-LJkLQAAAAJ;pZBEnY8AAAAJ;;;dMogb2EAAAAJ", "orcid": ";;;;0000-0002-9472-600X;;;0000-0001-8277-6408", "linkedin": ";;;;jbian/;;jiayitanJayee;marcelo-ang-41370b15", "or_profile": "~Yiwen_Chen1;~Chuheng_Zhang1;~ZEDONG_ZHANG1;~Li_Zhao1;~Jiang_Bian1;~HAOFENG_LIU1;~Jiayi_Tan1;~Marcelo_H_Ang_Jr1", "aff": "National University of Singapore;Microsoft;National University of Singapore;Microsoft;Microsoft;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;microsoft.com;u.nus.edu;microsoft.com;microsoft.com;nus.edu.sg;nus.edu.sg;nus.edu.sg", "position": "PhD student;Researcher;Researcher;Researcher;Partner Research Manager;PhD student;PhD student;Full Professor", "bibtex": "@misc{\nchen2023anq,\ntitle={{ANQ}: Approximate Nearest-Neighbor Q Learning},\nauthor={Yiwen Chen and Chuheng Zhang and ZEDONG ZHANG and Li Zhao and Jiang Bian and HAOFENG LIU and Jiayi Tan and Marcelo H Ang Jr},\nyear={2023},\nurl={https://openreview.net/forum?id=DEqiM9CmmZ}\n}", "github": "", "project": "", "reviewers": "5Ud9;JkAh;mXa9;GxjE", "site": "https://openreview.net/forum?id=DEqiM9CmmZ", "pdf_size": 1336569, "rating": "3;3;4;5", "confidence": "4;4;4;2", "soundness": "2;2;2;3", "novelty": "2;2;2;3", "presentation": "2;2;3;3", "wc_summary": "123;55;111;112", "wc_strengths": "42;8;63;177", "wc_weaknesses": "187;53;158;136", "wc_questions": "57;8;91;48", "wc_limitations": "29;1;16;62", "wc_review": "438;125;439;535", "wc_reply_reviewers": "25;30;9;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 3.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 100.25, 26.54595072699413 ], "wc_strengths_avg": [ 72.5, 63.4448579476698 ], "wc_weaknesses_avg": [ 133.5, 49.87233702163956 ], "wc_questions_avg": [ 51.0, 29.5550334122633 ], "wc_limitations_avg": [ 27.0, 22.5055548698538 ], "wc_review_avg": [ 384.25, 154.77624979304804 ], "wc_reply_reviewers_avg": [ 19.5, 8.381527307120106 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:t7wGPokNOYcJ:scholar.google.com/&scioq=ANQ:+Approximate+Nearest-Neighbor+Q+Learning&hl=en&as_sdt=0,18", "gs_version_total": 0, "aff_unique_index": "0;1;0;1;1;0;0;0", "aff_unique_norm": "National University of Singapore;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.nus.edu.sg;https://www.microsoft.com", "aff_unique_abbr": "NUS;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1;0;0;0", "aff_country_unique": "Singapore;United States" }, { "title": "Propagating Knowledge Updates to LMs Through Distillation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72379", "id": "DFaGf3O7jf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/932147114c48f8b04d41aebc0c631158-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DFaGf3O7jf", "openreview": "https://openreview.net/forum?id=DFaGf3O7jf", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72379", "video": "https://nips.cc/virtual/2023/poster/72379", "author_site": "Shankar Padmanabhan, Yasumasa Onoe, Michael Zhang, Greg Durrett, Eunsol Choi", "tldr": "", "abstract": "Modern language models have the capacity to store and use immense amounts of knowledge about real-world entities, but it remains unclear how to update such knowledge stored in model parameters. While prior methods for updating knowledge in LMs successfully inject atomic facts, updated LMs fail to make inferences based on injected facts. In this work, we demonstrate that a context distillation-based approach can both impart knowledge about entities \\emph{and} propagate that knowledge to enable broader inferences. Our approach consists of two stages: transfer set generation and distillation on the transfer set. We first generate a transfer set by prompting a language model to generate continuations from the entity definition. Then, we update the model parameters so that the distribution of the LM (the 'student') matches the distribution of the LM conditioned on the definition (the 'teacher') on the transfer set. Our experiments demonstrate that this approach is more effective at propagating knowledge updates than fine-tuning and other gradient-based knowledge-editing methods. Moreover, it does not compromise performance in other contexts, even when injecting the definitions of up to 150 entities at once.", "keywords": "Knowledge editing;NLP;Distillation;deep learning;fine-tuning", "primary_area": "", "supplementary_material": "", "author": "Shankar Padmanabhan;Yasumasa Onoe;Michael JQ Zhang;Greg Durrett;Eunsol Choi", "authorids": "~Shankar_Padmanabhan1;~Yasumasa_Onoe1;~Michael_JQ_Zhang1;~Greg_Durrett1;~Eunsol_Choi1", "gender": "M;M;M;;", "homepage": ";https://yasumasaonoe.github.io/;http://www.cs.utexas.edu/~gdurrett/;https://eunsol.github.io/;https://mikejqzhang.github.io/", "dblp": ";241/5449;69/7968;116/2765;301/8020", "google_scholar": ";Qd-puacAAAAJ;https://scholar.google.com.tw/citations?user=EpQ_sDEAAAAJ;6wulN88AAAAJ;https://scholar.google.com/citations?view_op=list_works", "orcid": ";;;0000-0003-3607-9104;", "linkedin": "shankar-padmanabhan-1691a5222/;yasumasaonoe/;;;", "or_profile": "~Shankar_Padmanabhan1;~Yasumasa_Onoe1;~Greg_Durrett1;~Eunsol_Choi1;~Michael_J_Zhang1", "aff": "University of Texas at Austin;University of Texas, Austin;University of Texas, Austin;University of Texas, Austin;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;cs.utexas.edu;utexas.edu", "position": "Undergrad student;PhD student;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\npadmanabhan2023propagating,\ntitle={Propagating Knowledge Updates to {LM}s Through Distillation},\nauthor={Shankar Padmanabhan and Yasumasa Onoe and Michael JQ Zhang and Greg Durrett and Eunsol Choi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DFaGf3O7jf}\n}", "github": "", "project": "", "reviewers": "ARok;NBus;LLss;GYKR", "pdf_size": 1255810, "rating": "5;6;6;7", "confidence": "3;4;3;3", "soundness": "3;2;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "123;123;133;88", "wc_strengths": "33;46;96;25", "wc_weaknesses": "147;92;121;64", "wc_questions": "89;64;12;1", "wc_limitations": "65;3;1;1", "wc_review": "457;328;363;179", "wc_reply_reviewers": "0;0;0;260", "wc_reply_authors": "0;0;0;1352", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 116.75, 17.09349291397168 ], "wc_strengths_avg": [ 50.0, 27.595289453093258 ], "wc_weaknesses_avg": [ 106.0, 31.088583113419627 ], "wc_questions_avg": [ 41.5, 36.30771267926417 ], "wc_limitations_avg": [ 17.5, 27.436289836637897 ], "wc_review_avg": [ 331.75, 100.01343659728927 ], "wc_reply_reviewers_avg": [ 65.0, 112.58330249197702 ], "wc_reply_authors_avg": [ 338.0, 585.4331729582806 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10677223116779410050&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "utexas.edu;utexas.edu;utexas.edu;cs.utexas.edu;utexas.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Zero-One Laws of Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72378", "id": "DGmxTUCHYs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dfba85bc32a3cb63a96d1412062b4d8e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DGmxTUCHYs", "openreview": "https://openreview.net/forum?id=DGmxTUCHYs", "poster": "/media/PosterPDFs/NeurIPS%202023/72378.png?t=1701448646.073692", "slides": "https://nips.cc/virtual/2023/poster/72378", "video": "https://nips.cc/virtual/2023/poster/72378", "author_site": "Sam Adam-Day, Iliant, Ismail Ceylan", "tldr": "", "abstract": "Graph neural networks (GNNs) are the de facto standard deep learning architectures for machine learning on graphs. This has led to a large body of work analyzing the capabilities and limitations of these models, particularly pertaining to their representation and extrapolation capacity. We offer a novel theoretical perspective on the representation and extrapolation capacity of GNNs, by answering the question: how do GNNs behave as the number of graph nodes become very large? Under mild assumptions, we show that when we draw graphs of increasing size from the Erd\u0151s\u2013R\u00e9nyi model, the probability that such graphs are mapped to a particular output by a class of GNN classifiers tends to either zero or one. This class includes the popular graph convolutional network architecture. The result establishes `zero-one laws' for these GNNs, and analogously to other convergence laws, entails theoretical limitations on their capacity. We empirically verify our results, observing that the theoretical asymptotic limits are evident already on relatively small graphs.", "keywords": "graph neural networks;graph convolutional networks;zero-one law;expressivity;asymptotic behavior", "primary_area": "", "supplementary_material": "/attachment/e256a33de493347cab615e7b951ae201a3184978.zip", "author": "Sam Adam-Day;Theodor-Mihai Iliant;Ismail Ilkan Ceylan", "authorids": "~Sam_Adam-Day1;~Theodor-Mihai_Iliant1;~Ismail_Ilkan_Ceylan2", "gender": ";;", "homepage": "https://samadamday.com;https://github.com/tmiliant;https://www.cs.ox.ac.uk/people/ismaililkan.ceylan/", "dblp": "338/9995;;147/6111", "google_scholar": "wCHY5IsAAAAJ;;avJ5kQcAAAAJ", "orcid": "0000-0003-3316-1954;;0000-0003-4118-4689", "linkedin": "sam-adam-day/;;", "or_profile": "~Sam_Adam-Day1;~Theodor-Mihai_Iliant1;~Ismail_Ilkan_Ceylan2", "aff": "University of Oxford;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk;oxford.ac.uk", "position": "PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nadam-day2023zeroone,\ntitle={Zero-One Laws of Graph Neural Networks},\nauthor={Sam Adam-Day and Theodor-Mihai Iliant and Ismail Ilkan Ceylan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DGmxTUCHYs}\n}", "github": "", "project": "", "reviewers": "BoTS;P48L;ye6a;Mx1j", "pdf_size": 6395524, "rating": "3;6;6;7", "confidence": "4;3;4;4", "soundness": "3;4;4;3", "novelty": "2;3;4;3", "presentation": "3;3;3;3", "wc_summary": "123;100;70;63", "wc_strengths": "8;71;72;67", "wc_weaknesses": "56;152;98;172", "wc_questions": "435;47;220;33", "wc_limitations": "1;31;45;5", "wc_review": "623;401;505;340", "wc_reply_reviewers": "57;21;72;25", "wc_reply_authors": "98;31;59;16", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.0, 24.052026941611388 ], "wc_strengths_avg": [ 54.5, 26.911893281595777 ], "wc_weaknesses_avg": [ 119.5, 45.57137259288994 ], "wc_questions_avg": [ 183.75, 162.68585525484383 ], "wc_limitations_avg": [ 20.5, 18.2414363469547 ], "wc_review_avg": [ 467.25, 107.54621099787757 ], "wc_reply_reviewers_avg": [ 43.75, 21.46363203188128 ], "wc_reply_authors_avg": [ 51.0, 31.216982557575932 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10497393456357227433&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "ox.ac.uk;ox.ac.uk;oxford.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "First Order Stochastic Optimization with Oblivious Noise", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72377", "id": "DI6KQhgqUr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3ec90b0eec9c1151c152ba865713f184-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DI6KQhgqUr", "openreview": "https://openreview.net/forum?id=DI6KQhgqUr", "poster": "/media/PosterPDFs/NeurIPS%202023/72377.png?t=1701657504.323457", "slides": "https://nips.cc/virtual/2023/poster/72377", "video": "https://nips.cc/virtual/2023/poster/72377", "author_site": "Ilias Diakonikolas, Sushrut Karmalkar, Jong Ho Park, Christos Tzamos", "tldr": "", "abstract": "We initiate the study of stochastic optimization with oblivious noise, broadly generalizing the standard heavy-tailed noise setup.\nIn our setting, in addition to random observation noise, the stochastic gradient \nmay be subject to independent \\emph{oblivious noise}, \nwhich may not have bounded moments and is not necessarily centered. \nSpecifically, we assume access to a noisy oracle for the stochastic gradient of $f$ \nat $x$, which returns a vector $\\nabla f(\\gamma, x) + \\xi$, where $\\gamma$ is \nthe bounded variance observation noise \nand $\\xi$ is the oblivious noise that is independent of $\\gamma$ and $x$. \nThe only assumption we make on the oblivious noise $\\xi$ \nis that $\\Pr[\\xi = 0] \\ge \\alpha$, for some $\\alpha \\in (0, 1)$.\nIn this setting, it is not information-theoretically possible to recover a single solution \nclose to the target when the fraction of inliers $\\alpha$ is less than $1/2$. \nOur main result is an efficient {\\em list-decodable} learner that recovers \na small list of candidates at least one of which is close to the true solution. \nOn the other hand, if $\\alpha = 1-\\epsilon$, where $0< \\epsilon < 1/2$ is sufficiently small\nconstant, the algorithm recovers a single solution.\n\nAlong the way, we develop a rejection-sampling-based algorithm to perform noisy location estimation, \nwhich may be of independent interest.", "keywords": "Oblivious noise;Robust Statistics;Heavy-tailed Stochastic Optimization;Approximate Gradients;Inexact Gradients", "primary_area": "", "supplementary_material": "/attachment/46c1fd0e125d799ee203f6d997f2a90ddc46d054.pdf", "author": "Ilias Diakonikolas;Sushrut Karmalkar;Jongho Park;Christos Tzamos", "authorids": "~Ilias_Diakonikolas1;~Sushrut_Karmalkar2;~Jongho_Park2;~Christos_Tzamos1", "gender": "M;;M;", "homepage": "http://www.iliasdiakonikolas.org/;;http://jerryjonghopark.github.io;https://tzamos.com", "dblp": "d/IliasDiakonikolas;;03/1871-4.html;79/8819", "google_scholar": "Vb3FLmkAAAAJ;;e9cfXjQAAAAJ;wB01auEAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ilias_Diakonikolas1;~Sushrut_Karmalkar2;~Jongho_Park2;~Christos_Tzamos1", "aff": "University of Wisconsin, Madison;;Krafton Inc.;University of Wisconsin, Madison", "aff_domain": "wisc.edu;;krafton.com;wisc.edu", "position": "Associate Professor;;Researcher;Assistant Professor", "bibtex": "@inproceedings{\ndiakonikolas2023first,\ntitle={First Order Stochastic Optimization with Oblivious Noise},\nauthor={Ilias Diakonikolas and Sushrut Karmalkar and Jongho Park and Christos Tzamos},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DI6KQhgqUr}\n}", "github": "", "project": "", "reviewers": "5dvc;FMyo;2f9g;M73n;NDsr", "pdf_size": 391062, "rating": "4;4;7;7;7", "confidence": "3;3;3;4;1", "soundness": "2;3;4;3;4", "novelty": "2;3;4;3;3", "presentation": "1;2;4;3;4", "wc_summary": "141;81;166;589;253", "wc_strengths": "32;70;96;3;44", "wc_weaknesses": "152;118;78;3;21", "wc_questions": "117;102;100;3;35", "wc_limitations": "9;4;5;1;1", "wc_review": "451;375;445;599;354", "wc_reply_reviewers": "141;30;15;11;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 1.16619037896906 ], "wc_summary_avg": [ 246.0, 180.1932296175414 ], "wc_strengths_avg": [ 49.0, 31.874754901018456 ], "wc_weaknesses_avg": [ 74.4, 56.36523751391455 ], "wc_questions_avg": [ 71.4, 44.355833889128945 ], "wc_limitations_avg": [ 4.0, 2.9664793948382653 ], "wc_review_avg": [ 444.8, 85.94509875496101 ], "wc_reply_reviewers_avg": [ 39.4, 51.70145065663051 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.16666666666666669, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9277305522604591093&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "wisc.edu;;krafton.com;wisc.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Wisconsin;KRAFTON Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.krafton.com", "aff_unique_abbr": "UW;Krafton", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;South Korea" }, { "title": "Don\u2019t just prune by magnitude! Your mask topology is a secret weapon", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72376", "id": "DIBcdjWV7k", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cd5404354496e39d37b7947d8a0d7b72-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DIBcdjWV7k", "openreview": "https://openreview.net/forum?id=DIBcdjWV7k", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72376", "video": "https://nips.cc/virtual/2023/poster/72376", "author_site": "Duc Hoang, Souvik Kundu, Shiwei Liu, Shiwei Liu, Zhangyang \"Atlas\" Wang", "tldr": "", "abstract": "Recent years have witnessed significant progress in understanding the relationship between the connectivity of a deep network's architecture as a graph, and the network's performance. A few prior arts connected deep architectures to expander graphs or Ramanujan graphs, and particularly,[7] demonstrated the use of such graph connectivity measures with ranking and relative performance of various obtained sparse sub-networks (i.e. models with prune masks) without the need for training. However, no prior work explicitly explores the role of parameters in the graph's connectivity, making the graph-based understanding of prune masks and the magnitude/gradient-based pruning practice isolated from one another. This paper strives to fill in this gap, by analyzing the Weighted Spectral Gap of Ramanujan structures in sparse neural networks and investigates its correlation with final performance. We specifically examine the evolution of sparse structures under a popular dynamic sparse-to-sparse network training scheme, and intriguingly find that the generated random topologies inherently maximize Ramanujan graphs. We also identify a strong correlation between masks, performance, and the weighted spectral gap. Leveraging this observation, we propose to construct a new \"full-spectrum coordinate'' aiming to comprehensively characterize a sparse neural network's promise. Concretely, it consists of the classical Ramanujan's gap (structure), our proposed weighted spectral gap (parameters), and the constituent nested regular graphs within. In this new coordinate system, a sparse subnetwork's L2-distance from its original initialization is found to have nearly linear correlated with its performance. Eventually, we apply this unified perspective to develop a new actionable pruning method, by sampling sparse masks to maximize the L2-coordinate distance. Our method can be augmented with the \"pruning at initialization\" (PaI) method, and significantly outperforms existing PaI methods. With only a few iterations of training (e.g 500 iterations), we can get LTH-comparable performance as that yielded via \"pruning after training\", significantly saving pre-training costs. Codes can be found at: https://github.com/VITA-Group/FullSpectrum-PAI.", "keywords": "Pruning at Initialization;Pruning at Training;LTH;DST;Ramanujan;graph", "primary_area": "", "supplementary_material": "/attachment/cd32133dd9089214468a908acf1b1537dd36fea8.pdf", "author": "Duc N.M Hoang;Souvik Kundu;Shiwei Liu;Zhangyang Wang", "authorids": "~Duc_N.M_Hoang1;~Souvik_Kundu2;~Shiwei_Liu2;~Zhangyang_Wang1", "gender": "M;M;M;M", "homepage": ";https://ksouvik52.github.io;https://shiweiliuiiiiiii.github.io/;https://vita-group.github.io", "dblp": ";126/2210;234/8697-3.html;119/4026", "google_scholar": "v7S4UNcAAAAJ;https://scholar.google.com/citations?hl=en;73IbXtsAAAAJ;pxFyKAIAAAAJ", "orcid": ";0000-0002-3533-9405;;", "linkedin": ";souvik-kundu-64922b50/;;", "or_profile": "~Duc_N.M_Hoang1;~Souvik_Kundu2;~Shiwei_Liu2;~Zhangyang_Wang1", "aff": "University of Texas, Austin;Intel;University of Texas at Austin;University of Texas, Austin", "aff_domain": "utexas.edu;intel.com;utexas.edu;utexas.edu", "position": "PhD student;Researcher;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nhoang2023dont,\ntitle={Don{\\textquoteright}t just prune by magnitude! Your mask topology is a secret weapon},\nauthor={Duc N.M Hoang and Souvik Kundu and Shiwei Liu and Zhangyang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DIBcdjWV7k}\n}", "github": "", "project": "", "reviewers": "i7oA;62wf;QXrC;9NeG", "pdf_size": 1648152, "rating": "3;5;7;7", "confidence": "2;4;3;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "1;2;3;2", "wc_summary": "75;77;54;72", "wc_strengths": "43;17;197;152", "wc_weaknesses": "132;77;210;208", "wc_questions": "19;5;23;5", "wc_limitations": "16;3;1;8", "wc_review": "285;179;485;445", "wc_reply_reviewers": "111;125;12;31", "wc_reply_authors": "136;569;27;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;1", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 69.5, 9.12414379544733 ], "wc_strengths_avg": [ 102.25, 74.5498993963104 ], "wc_weaknesses_avg": [ 156.75, 55.755605099397854 ], "wc_questions_avg": [ 13.0, 8.12403840463596 ], "wc_limitations_avg": [ 7.0, 5.787918451395113 ], "wc_review_avg": [ 348.5, 123.19395277366499 ], "wc_reply_reviewers_avg": [ 69.75, 48.96618731328793 ], "wc_reply_authors_avg": [ 183.0, 228.59899387355142 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13375297124111595069&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "utexas.edu;intel.com;utexas.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Texas at Austin;Intel", "aff_unique_dep": ";Intel Corporation", "aff_unique_url": "https://www.utexas.edu;https://www.intel.com", "aff_unique_abbr": "UT Austin;Intel", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "HA-ViD: A Human Assembly Video Dataset for Comprehensive Assembly Knowledge Understanding", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73654", "id": "DILUIcDmU9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d40e6e4b3ee6c24f2bf2cb72c2412f4b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=DILUIcDmU9", "openreview": "https://openreview.net/forum?id=DILUIcDmU9", "poster": "/media/PosterPDFs/NeurIPS%202023/73654.png?t=1700691806.3899484", "slides": "https://nips.cc/virtual/2023/poster/73654", "video": "https://nips.cc/virtual/2023/poster/73654", "author_site": "Hao Zheng, Regina Lee, Yuqian Lu", "tldr": "", "abstract": "Understanding comprehensive assembly knowledge from videos is critical for futuristic ultra-intelligent industry. To enable technological breakthrough, we present HA-ViD \u2013 the first human assembly video dataset that features representative industrial assembly scenarios, natural procedural knowledge acquisition process, and consistent human-robot shared annotations. Specifically, HA-ViD captures diverse collaboration patterns of real-world assembly, natural human behaviors and learning progression during assembly, and granulate action annotations to subject, action verb, manipulated object, target object, and tool. We provide 3222 multi-view and multi-modality videos), 1.5M frames, 96K temporal labels and 2M spatial labels. We benchmark four foundational video understanding tasks: action recognition, action segmentation, object detection and multi-object tracking. Importantly, we analyze their performance and the further reasoning steps for comprehending knowledge in assembly progress, process efficiency, task collaboration, skill parameters and human intention. Details of HA-ViD is available at: https://iai-hrc.github.io/ha-vid.", "keywords": "Assembly video dataset;Video understanding;Assembly knowledge understanding", "primary_area": "", "supplementary_material": "/attachment/f0f808dd2b172de43b2032e419a1f29300093c68.pdf", "author": "Hao Zheng;Regina Lee;Yuqian Lu", "authorids": "~Hao_Zheng7;~Regina_Lee2;~Yuqian_Lu1", "gender": "M;;", "homepage": "https://hao-zheng-research.github.io;;", "dblp": ";;157/2471", "google_scholar": "OHEa2rIAAAAJ;;https://scholar.google.co.nz/citations?user=E4VnO8UAAAAJ", "orcid": "0000-0003-0388-0755;0000-0002-9667-4709;", "linkedin": "hao-zheng-ab7b15248/;;", "or_profile": "~Hao_Zheng7;~Regina_Lee2;~Yuqian_Lu1", "aff": "University of Auckland;University of Auckland;University of Auckland", "aff_domain": "auckland.ac.nz;auckland.ac.nz;auckland.ac.nz", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nzheng2023havid,\ntitle={{HA}-ViD: A Human Assembly Video Dataset for Comprehensive Assembly Knowledge Understanding},\nauthor={Hao Zheng and Regina Lee and Yuqian Lu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=DILUIcDmU9}\n}", "github": "", "project": "", "reviewers": "VsBS;ei2t;soUy;ZuD2;YGBP;HHfr;NUWb", "pdf_size": 12446464, "rating": "6;6;7;7;7;7;8", "confidence": "4;4;3;3;4;5;4", "wc_summary_and_contributions": "67;55;32;78;70;151;40", "wc_strengths": "50;33;26;114;27;34;72", "wc_improvement": "80;49;37;247;25;24;70", "wc_limitations": "164;2;9;8;1;1;7", "wc_correctness": "1;1;1;6;1;8;7", "wc_clarity": "1;1;16;1;1;10;8", "wc_relation_to_prior_work": "2;1;6;1;1;27;1", "wc_documentation": "11;9;1;6;1;1;7", "wc_additional_feedback": "1;1;1;1;1;1;1", "wc_review": "377;152;129;462;128;257;213", "wc_reply_reviewers": "62;35;0;152;0;0;0", "wc_reply_authors": "978;503;329;755;156;242;309", "reply_reviewers": "1;1;0;1;0;0;0", "reply_authors": "2;1;1;1;1;1;1", "rating_avg": [ 6.857142857142857, 0.6388765649999398 ], "confidence_avg": [ 3.857142857142857, 0.6388765649999399 ], "wc_summary_and_contributions_avg": [ 70.42857142857143, 36.25645614569767 ], "wc_strengths_avg": [ 50.857142857142854, 29.82055174074499 ], "wc_improvement_avg": [ 76.0, 72.55342858886821 ], "wc_limitations_avg": [ 27.428571428571427, 55.843805495729775 ], "wc_correctness_avg": [ 3.5714285714285716, 3.0169588688489823 ], "wc_clarity_avg": [ 5.428571428571429, 5.576920370269467 ], "wc_relation_to_prior_work_avg": [ 5.571428571428571, 8.909980645435816 ], "wc_documentation_avg": [ 5.142857142857143, 3.8703477668983046 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 245.42857142857142, 120.40035256575948 ], "wc_reply_reviewers_avg": [ 35.57142857142857, 52.52987488184263 ], "wc_reply_authors_avg": [ 467.42857142857144, 276.8995986907566 ], "reply_reviewers_avg": [ 0.42857142857142855, 0.4948716593053935 ], "reply_authors_avg": [ 1.1428571428571428, 0.34992710611188266 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.05000000000000001, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14520295642723990644&as_sdt=20005&sciodt=0,9&hl=en", "gs_version_total": 5, "email": "auckland.ac.nz;auckland.ac.nz;auckland.ac.nz", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Auckland", "aff_unique_dep": "", "aff_unique_url": "https://www.auckland.ac.nz", "aff_unique_abbr": "UoA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "New Zealand" }, { "title": "EgoTracks: A Long-term Egocentric Visual Object Tracking Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73653", "id": "DIeZu6nqvo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ef01d91aa87e7701aa9c8dc66a2d5bdb-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=DIeZu6nqvo", "openreview": "https://openreview.net/forum?id=DIeZu6nqvo", "poster": "/media/PosterPDFs/NeurIPS%202023/73653.png?t=1699408619.9611537", "slides": "https://nips.cc/virtual/2023/poster/73653", "video": "https://nips.cc/virtual/2023/poster/73653", "author_site": "Hao Tang, Kevin J Liang, Kristen Grauman, Matt Feiszli, Weiyao Wang", "tldr": "", "abstract": "Visual object tracking is a key component to many egocentric vision problems. However, the full spectrum of challenges of egocentric tracking faced by an embodied AI is underrepresented in many existing datasets; these tend to focus on relatively short, third-person videos. Egocentric video has several distinguishing characteristics from those commonly found in past datasets: frequent large camera motions and hand interactions with objects commonly lead to occlusions or objects exiting the frame, and object appearance can change rapidly due to widely different points of view, scale, or object states. Embodied tracking is also naturally long-term, and being able to consistently (re-)associate objects to their appearances and disappearances over as long as a lifetime is critical. Previous datasets under-emphasize this re-detection problem, and their \"framed\" nature has led to adoption of various spatiotemporal priors that we find do not necessarily generalize to egocentric video. We thus introduce EgoTracks, a new dataset for long-term egocentric visual object tracking. Sourced from the Ego4D dataset, this new dataset presents a significant challenge to recent state-of-the-art single-object tracking models, which we find score poorly on traditional tracking metrics for our new dataset, compared to popular benchmarks. We further show improvements that can be made to a STARK tracker to significantly increase its performance on egocentric data, resulting in a baseline model we call EgoSTARK. We publicly release our annotations and benchmark, hoping our dataset leads to further advancements in tracking.", "keywords": "single object tracking; egocentric vision", "primary_area": "", "supplementary_material": "", "author": "Hao Tang;Kevin J Liang;Kristen Grauman;Matt Feiszli;Weiyao Wang", "authorids": "~Hao_Tang14;~Kevin_J_Liang1;~Kristen_Grauman1;~Matt_Feiszli1;~Weiyao_Wang1", "gender": ";M;F;M;M", "homepage": "https://tanghaotommy.github.io/;https://kevinjliang.github.io/;http://www.cs.utexas.edu/~grauman/;;https://research.fb.com/people/wang-weiyao/", "dblp": ";230/8348;57/4553;182/8255;206/6183-1", "google_scholar": "2X3D1-4AAAAJ;DBqwS2YAAAAJ;Jp6Mz1sAAAAJ;A-wA73gAAAAJ;", "orcid": ";;;;", "linkedin": ";kevin-j-liang/;;matt-feiszli-76b34b/;", "or_profile": "~Hao_Tang14;~Kevin_J_Liang1;~Kristen_Grauman1;~Matt_Feiszli1;~Weiyao_Wang1", "aff": "Meta Platforms;Meta;University of Texas, Austin;Meta AI;Meta Facebook", "aff_domain": "meta.com;meta.com;utexas.edu;fb.com;meta.com", "position": "Researcher;Research Scientist;Professor;Research Scientist;Researcher", "bibtex": "@inproceedings{\ntang2023egotracks,\ntitle={EgoTracks: A Long-term Egocentric Visual Object Tracking Dataset},\nauthor={Hao Tang and Kevin J Liang and Kristen Grauman and Matt Feiszli and Weiyao Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=DIeZu6nqvo}\n}", "github": "", "project": "", "reviewers": "G7Uz;XGD2;N2DY;Pzo5;hGyN", "pdf_size": 6992171, "rating": "6;6;6;7;8", "confidence": "4;4;5;4;4", "wc_summary_and_contributions": "74;76;47;89;56", "wc_strengths": "114;35;22;20;71", "wc_improvement": "143;127;40;81;44", "wc_limitations": "7;2;8;7;23", "wc_correctness": "48;1;18;5;8", "wc_clarity": "16;1;8;5;5", "wc_relation_to_prior_work": "75;1;16;10;1", "wc_documentation": "9;5;4;11;20", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "487;249;164;229;229", "wc_reply_reviewers": "0;0;0;48;0", "wc_reply_authors": "375;687;254;498;494", "reply_reviewers": "0;0;0;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 68.4, 15.001333274079341 ], "wc_strengths_avg": [ 52.4, 35.82513084414347 ], "wc_improvement_avg": [ 87.0, 42.02380277890139 ], "wc_limitations_avg": [ 9.4, 7.116178749862878 ], "wc_correctness_avg": [ 16.0, 16.95877354056006 ], "wc_clarity_avg": [ 7.0, 5.019960159204453 ], "wc_relation_to_prior_work_avg": [ 20.6, 27.789206537790893 ], "wc_documentation_avg": [ 9.8, 5.706137047074843 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 271.6, 111.45869189973476 ], "wc_reply_reviewers_avg": [ 9.6, 19.2 ], "wc_reply_authors_avg": [ 461.6, 144.06748418709893 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.375, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13417175901029473820&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "meta.com;meta.com;utexas.edu;fb.com;meta.com", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Meta;University of Texas at Austin", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://www.meta.com;https://www.utexas.edu", "aff_unique_abbr": "Meta;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Optimal Convergence Rate for Exact Policy Mirror Descent in Discounted Markov Decision Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72375", "id": "DKHEkP7Idx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f0d7b528c31bc3f9a0d5bab515ed6ed5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DKHEkP7Idx", "openreview": "https://openreview.net/forum?id=DKHEkP7Idx", "poster": "/media/PosterPDFs/NeurIPS%202023/72375.png?t=1701509545.1809635", "slides": "https://nips.cc/virtual/2023/poster/72375", "video": "https://nips.cc/virtual/2023/poster/72375", "author_site": "Emmeran Johnson, Ciara Pike-Burke, Patrick Rebeschini", "tldr": "", "abstract": "Policy Mirror Descent (PMD) is a general family of algorithms that covers a wide range of novel and fundamental methods in reinforcement learning. Motivated by the instability of policy iteration (PI) with inexact policy evaluation, unregularised PMD algorithmically regularises the policy improvement step of PI without regularising the objective function. With exact policy evaluation, PI is known to converge linearly with a rate given by the discount factor $\\gamma$ of a Markov Decision Process. In this work, we bridge the gap between PI and PMD with exact policy evaluation and show that the dimension-free $\\gamma$-rate of PI can be achieved by the general family of unregularised PMD algorithms under an adaptive step-size. We show that both the rate and step-size are unimprovable for PMD: we provide matching lower bounds that demonstrate that the $\\gamma$-rate is optimal for PMD methods as well as PI and that the adaptive step-size is necessary to achieve it. Our work is the first to relate PMD to rate-optimality and step-size necessity. Our study of the convergence of PMD avoids the use of the performance difference lemma, which leads to a direct analysis of independent interest. We also extend the analysis to the inexact setting and establish the first dimension-optimal sample complexity for unregularised PMD under a generative model, improving upon the best-known result.", "keywords": "Reinforcement Learning Theory;Policy Mirror Descent;Policy Gradient", "primary_area": "", "supplementary_material": "/attachment/1a735dbcfc3a5610e0999e7dfdde52cacf9d9292.zip", "author": "Emmeran Johnson;Ciara Pike-Burke;Patrick Rebeschini", "authorids": "~Emmeran_Johnson1;~Ciara_Pike-Burke2;~Patrick_Rebeschini1", "gender": "M;;M", "homepage": ";https://www.ma.imperial.ac.uk/~cpikebur/;http://www.stats.ox.ac.uk/~rebeschi/", "dblp": "340/6893;202/1263;164/7439", "google_scholar": "ZubVvboAAAAJ;Hl1vu1MAAAAJ;", "orcid": ";;0000-0001-7772-4160", "linkedin": ";;patrick-rebeschini/", "or_profile": "~Emmeran_Johnson1;~Ciara_Pike-Burke2;~Patrick_Rebeschini1", "aff": "Imperial College London;Imperial College London;University of Oxford", "aff_domain": "ic.ac.uk;imperial.ac.uk;oxford.ac.uk", "position": "PhD student;Lecturer;Associate Professor", "bibtex": "@inproceedings{\njohnson2023optimal,\ntitle={Optimal Convergence Rate for Exact Policy Mirror Descent in Discounted Markov Decision Processes},\nauthor={Emmeran Johnson and Ciara Pike-Burke and Patrick Rebeschini},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DKHEkP7Idx}\n}", "github": "", "project": "", "reviewers": "wj3u;GGgc;TTxd;YE79", "pdf_size": 434508, "rating": "5;6;6;6", "confidence": "1;4;2;4", "soundness": "2;4;3;3", "novelty": "2;3;2;3", "presentation": "2;4;3;4", "wc_summary": "38;56;151;116", "wc_strengths": "31;27;55;107", "wc_weaknesses": "83;94;267;285", "wc_questions": "49;69;126;132", "wc_limitations": "1;13;1;27", "wc_review": "202;259;600;667", "wc_reply_reviewers": "0;173;217;51", "wc_reply_authors": "0;82;169;0", "reply_reviewers": "0;2;2;1", "reply_authors": "1;2;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 90.25, 45.433330276351086 ], "wc_strengths_avg": [ 55.0, 31.874754901018456 ], "wc_weaknesses_avg": [ 182.25, 94.04619875359131 ], "wc_questions_avg": [ 94.0, 35.770099245039845 ], "wc_limitations_avg": [ 10.5, 10.712142642814275 ], "wc_review_avg": [ 432.0, 203.88599755745855 ], "wc_reply_reviewers_avg": [ 110.25, 88.03230940967072 ], "wc_reply_authors_avg": [ 62.75, 69.8833850067382 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7777777777777777, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=504436734810589433&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ic.ac.uk;imperial.ac.uk;oxford.ac.uk", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Imperial College London;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://www.imperial.ac.uk;https://www.ox.ac.uk", "aff_unique_abbr": "ICL;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "On the Stability-Plasticity Dilemma in Continual Meta-Learning: Theory and Algorithm", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72374", "id": "DNHGKeOhLl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/57587d8d6a7ede0e5302fc22d0878c53-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DNHGKeOhLl", "openreview": "https://openreview.net/forum?id=DNHGKeOhLl", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72374", "video": "https://nips.cc/virtual/2023/poster/72374", "author_site": "Qi CHEN, Changjian Shui, Ligong Han, Mario Marchand", "tldr": "", "abstract": "We focus on Continual Meta-Learning (CML), which targets accumulating and exploiting meta-knowledge on a sequence of non-i.i.d. tasks. The primary challenge is to strike a balance between stability and plasticity, where a model should be stable to avoid catastrophic forgetting in previous tasks and plastic to learn generalizable concepts from new tasks. To address this, we formulate the CML objective as controlling the average excess risk upper bound of the task sequence, which reflects the trade-off between forgetting and generalization. Based on the objective, we introduce a unified theoretical framework for CML in both static and shifting environments, providing guarantees for various task-specific learning algorithms. Moreover, we first present a rigorous analysis of a bi-level trade-off in shifting environments. To approach the optimal trade-off, we propose a novel algorithm that dynamically adjusts the meta-parameter and its learning rate w.r.t environment change. Empirical evaluations on synthetic and real datasets illustrate the effectiveness of the proposed theory and algorithm.", "keywords": "continual meta-learning; transfer learning; stability-plasticity dilemma;", "primary_area": "", "supplementary_material": "/attachment/99f298d7fb1e8c8dc4dd66ede5da5c704fafef23.zip", "author": "Qi CHEN;Changjian Shui;Ligong Han;Mario Marchand", "authorids": "~Qi_CHEN6;~Changjian_Shui2;~Ligong_Han1;~Mario_Marchand1", "gender": "F;M;;Not Specified", "homepage": "https://livreq.github.io/;https://phymhan.github.io;http://www2.ift.ulaval.ca/~mmarchand/;https://cjshui.github.io", "dblp": "66/6320-15.html;187/1675;01/4590;215/5461", "google_scholar": "MqLoSeoAAAAJ;n2v43R4AAAAJ;https://scholar.google.ca/citations?user=M792u2sAAAAJ;r91NXUgAAAAJ", "orcid": "0000-0002-7213-0221;0000-0003-3166-0848;;", "linkedin": ";ligongh/;;", "or_profile": "~Qi_CHEN6;~Ligong_Han1;~Mario_Marchand1;~changjian_shui1", "aff": "Laval university;Rutgers University;Laval university;McGill University", "aff_domain": "ulaval.ca;rutgers.edu;ulaval.ca;mcgill.ca", "position": "PhD student;PhD student;Full Professor;Postdoc", "bibtex": "@inproceedings{\nchen2023on,\ntitle={On the Stability-Plasticity Dilemma in Continual Meta-Learning: Theory and Algorithm},\nauthor={Qi CHEN and Changjian Shui and Ligong Han and Mario Marchand},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DNHGKeOhLl}\n}", "github": "", "project": "", "reviewers": "JcdX;9fyt;HjRj;iCpz;Rxnp", "pdf_size": 2750067, "rating": "5;6;7;7;7", "confidence": "2;3;2;4;3", "soundness": "3;2;4;3;3", "novelty": "3;2;3;3;3", "presentation": "3;2;4;3;3", "wc_summary": "44;87;96;100;62", "wc_strengths": "76;16;62;57;17", "wc_weaknesses": "57;125;116;28;14", "wc_questions": "26;231;38;22;1", "wc_limitations": "2;5;1;45;1", "wc_review": "205;464;313;252;95", "wc_reply_reviewers": "0;76;243;0;0", "wc_reply_authors": "0;170;30;0;0", "reply_reviewers": "0;1;1;0;0", "reply_authors": "1;2;2;1;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 77.8, 21.451340284467076 ], "wc_strengths_avg": [ 45.6, 24.565015774470815 ], "wc_weaknesses_avg": [ 68.0, 45.14421336118285 ], "wc_questions_avg": [ 63.6, 84.54726488775377 ], "wc_limitations_avg": [ 10.8, 17.16275036233995 ], "wc_review_avg": [ 265.8, 122.12354400360317 ], "wc_reply_reviewers_avg": [ 63.8, 94.31097497110292 ], "wc_reply_authors_avg": [ 40.0, 66.03029607687671 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4677071733467428, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9720928308449865801&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "ulaval.ca;rutgers.edu;ulaval.ca;mcgill.ca", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Laval University;Rutgers University;McGill University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.laval.ca;https://www.rutgers.edu;https://www.mcgill.ca", "aff_unique_abbr": "Laval;Rutgers;McGill", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Canada;United States" }, { "title": "Crystal Structure Prediction by Joint Equivariant Diffusion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72373", "id": "DNdN26m2Jk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/38b787fc530d0b31825827e2cc306656-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DNdN26m2Jk", "openreview": "https://openreview.net/forum?id=DNdN26m2Jk", "poster": "/media/PosterPDFs/NeurIPS%202023/72373.png?t=1702397031.5599635", "slides": "https://nips.cc/virtual/2023/poster/72373", "video": "https://nips.cc/virtual/2023/poster/72373", "author_site": "Rui Jiao, Wenbing Huang, Peijia Lin, Jiaqi Han, Pin Chen, Yutong Lu, Yang Liu", "tldr": "", "abstract": "Crystal Structure Prediction (CSP) is crucial in various scientific disciplines. While CSP can be addressed by employing currently-prevailing generative models (**e.g.** diffusion models), this task encounters unique challenges owing to the symmetric geometry of crystal structures---the invariance of translation, rotation, and periodicity. To incorporate the above symmetries, this paper proposes DiffCSP, a novel diffusion model to learn the structure distribution from stable crystals. To be specific, DiffCSP jointly generates the lattice and atom coordinates for each crystal by employing a periodic-E(3)-equivariant denoising model, to better model the crystal geometry. Notably, different from related equivariant generative approaches, DiffCSP leverages fractional coordinates other than Cartesian coordinates to represent crystals, remarkably promoting the diffusion and the generation process of atom positions. Extensive experiments verify that our DiffCSP remarkably outperforms existing CSP methods, with a much lower computation cost in contrast to DFT-based methods. Moreover, the superiority of DiffCSP is still observed when it is extended for ab initio crystal generation.", "keywords": "crystal structure prediction;equivariant graph neural networks;diffusion generative models", "primary_area": "", "supplementary_material": "", "author": "Rui Jiao;Wenbing Huang;Peijia Lin;Jiaqi Han;Pin Chen;Yutong Lu;Yang Liu", "authorids": "~Rui_Jiao1;~Wenbing_Huang1;~Peijia_Lin1;~Jiaqi_Han2;~Pin_Chen1;~Yutong_Lu1;~Yang_Liu19", "gender": "M;M;M;M;M;F;M", "homepage": "https://jiaor17.github.io/;https://gsai.ruc.edu.cn/english/wenbing_huang;https://github.com/EmperorJia;https://hanjq17.github.io;;http://www.sysu.edu.cn;http://nlp.csai.tsinghua.edu.cn/~ly/", "dblp": "223/1073;155/3181-1.html;;235/0412;78/5412;;51/3710-5", "google_scholar": "buW16-AAAAAJ;0yNkmO4AAAAJ;;AKppgMAAAAAJ;;;https://scholar.google.com.hk/citations?user=lVhoKNcAAAAJ", "orcid": ";;;;0000-0001-8746-9917;;0000-0002-3087-242X", "linkedin": ";;;;;;", "or_profile": "~Rui_Jiao1;~Wenbing_Huang1;~Peijia_Lin1;~Jiaqi_Han2;~Pin_Chen1;~Yutong_Lu1;~Yang_Liu19", "aff": "Tsinghua University;Renmin University of China;SUN YAT-SEN UNIVERSITY;Computer Science Department, Stanford University;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Tsinghua University", "aff_domain": "tsinghua.edu.cn;ruc.edu.cn;sysu.edu.cn;cs.stanford.edu;sysu.edu.cn;sysu.edu.cn;tsinghua.edu.cn", "position": "PhD student;Associate Professor;MS student;PhD student;Researcher;Full Professor;Professor", "bibtex": "@inproceedings{\njiao2023crystal,\ntitle={Crystal Structure Prediction by Joint Equivariant Diffusion},\nauthor={Rui Jiao and Wenbing Huang and Peijia Lin and Jiaqi Han and Pin Chen and Yutong Lu and Yang Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DNdN26m2Jk}\n}", "github": "", "project": "", "reviewers": "pv4b;1HBn;eNoa;pLZt", "pdf_size": 6761611, "rating": "5;5;7;8", "confidence": "4;5;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;4", "presentation": "4;3;3;3", "wc_summary": "51;48;58;127", "wc_strengths": "65;70;56;121", "wc_weaknesses": "202;157;65;189", "wc_questions": "44;15;34;293", "wc_limitations": "8;1;8;52", "wc_review": "370;291;221;782", "wc_reply_reviewers": "0;47;228;41", "wc_reply_authors": "0;14;247;21", "reply_reviewers": "0;1;2;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.0, 32.53459696999488 ], "wc_strengths_avg": [ 78.0, 25.32785028382788 ], "wc_weaknesses_avg": [ 153.25, 53.51810441336651 ], "wc_questions_avg": [ 96.5, 113.92651140099042 ], "wc_limitations_avg": [ 17.25, 20.26542622300355 ], "wc_review_avg": [ 416.0, 217.78544487637367 ], "wc_reply_reviewers_avg": [ 79.0, 87.9062000088731 ], "wc_reply_authors_avg": [ 70.5, 102.18243488975979 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10140920712532431476&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;ruc.edu.cn;sysu.edu.cn;cs.stanford.edu;sysu.edu.cn;sysu.edu.cn;tsinghua.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;3;2;2;0", "aff_unique_norm": "Tsinghua University;Renmin University of China;Sun Yat-sen University;Stanford University", "aff_unique_dep": ";;;Computer Science Department", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ruc.edu.cn;http://www.sysu.edu.cn;https://www.stanford.edu", "aff_unique_abbr": "THU;RUC;SYSU;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Weakly Coupled Deep Q-Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72372", "id": "DNubFPV5Dy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8912b4892064a4f08a0c04f92913c134-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DNubFPV5Dy", "openreview": "https://openreview.net/forum?id=DNubFPV5Dy", "poster": "/media/PosterPDFs/NeurIPS%202023/72372.png?t=1699456729.57668", "slides": "https://nips.cc/virtual/2023/poster/72372", "video": "https://nips.cc/virtual/2023/poster/72372", "author_site": "Ibrahim El Shar, Daniel Jiang", "tldr": "", "abstract": "We propose weakly coupled deep Q-networks (WCDQN), a novel deep reinforcement learning algorithm that enhances performance in a class of structured problems called weakly coupled Markov decision processes (WCMDP). WCMDPs consist of multiple independent subproblems connected by an action space constraint, which is a structural property that frequently emerges in practice. Despite this appealing structure, WCMDPs quickly become intractable as the number of subproblems grows. WCDQN employs a single network to train multiple DQN ``subagents,'' one for each subproblem, and then combine their solutions to establish an upper bound on the optimal action value. This guides the main DQN agent towards optimality. We show that the tabular version, weakly coupled Q-learning (WCQL), converges almost surely to the optimal action value. Numerical experiments show faster convergence compared to DQN and related techniques in settings with as many as 10 subproblems, $3^{10}$ total actions, and a continuous state space.", "keywords": "Reinforcement learning;Deep Reinforcement Learning;Weakly Coupled MDPs", "primary_area": "", "supplementary_material": "/attachment/7a4d167e3befb1003e82794ce3db6f3aeea10bac.pdf", "author": "Ibrahim El Shar;Daniel R. Jiang", "authorids": "~Ibrahim_El_Shar1;~Daniel_R._Jiang1", "gender": "M;", "homepage": "https://ibrahim-elshar.github.io/;http://danielrjiang.github.io", "dblp": ";157/1102", "google_scholar": "i5hvjowAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Ibrahim_El_Shar1;~Daniel_R._Jiang1", "aff": "Hitachi America, Ltd.;University of Pittsburgh", "aff_domain": "hal.hitachi.com;pitt.edu", "position": "Researcher;Assistant Professor", "bibtex": "@inproceedings{\nshar2023weakly,\ntitle={Weakly Coupled Deep Q-Networks},\nauthor={Ibrahim El Shar and Daniel R. Jiang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DNubFPV5Dy}\n}", "github": "", "project": "", "reviewers": "HJ3Y;aL5V;da8o;QmyG", "pdf_size": 607918, "rating": "5;5;6;7", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "4;2;3;4", "wc_summary": "40;96;67;131", "wc_strengths": "49;48;12;74", "wc_weaknesses": "86;410;28;308", "wc_questions": "1;69;53;51", "wc_limitations": "10;49;13;22", "wc_review": "186;672;173;586", "wc_reply_reviewers": "22;35;0;40", "wc_reply_authors": "87;69;0;20", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 83.5, 33.826764551165695 ], "wc_strengths_avg": [ 45.75, 22.094965489902897 ], "wc_weaknesses_avg": [ 208.0, 156.5950190778749 ], "wc_questions_avg": [ 43.5, 25.509802037648196 ], "wc_limitations_avg": [ 23.5, 15.370426148939398 ], "wc_review_avg": [ 404.25, 226.84397170742713 ], "wc_reply_reviewers_avg": [ 24.25, 15.465687828221544 ], "wc_reply_authors_avg": [ 44.0, 35.30580688781946 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14316649516303991287&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "hal.hitachi.com;pitt.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Hitachi America, Ltd.;University of Pittsburgh", "aff_unique_dep": ";", "aff_unique_url": "https://www.hitachi-america.com;https://www.pitt.edu", "aff_unique_abbr": "HAL;Pitt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Off-Policy Evaluation for Human Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72371", "id": "DOdaV0Hqdy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1cb57fcf7ff3f6d37eebae5becc9ea6d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DOdaV0Hqdy", "openreview": "https://openreview.net/forum?id=DOdaV0Hqdy", "poster": "/media/PosterPDFs/NeurIPS%202023/72371.png?t=1702511635.4744098", "slides": "https://nips.cc/virtual/2023/poster/72371", "video": "https://nips.cc/virtual/2023/poster/72371", "author_site": "Qitong Gao, Ge Gao, Juncheng Dong, Vahid Tarokh, Min Chi, Miroslav Pajic", "tldr": "", "abstract": "Off-policy evaluation (OPE) is important for closing the gap between offline training and evaluation of reinforcement learning (RL), by estimating performance and/or rank of target (evaluation) policies using offline trajectories only. It can improve the safety and efficiency of data collection and policy testing procedures in situations where online deployments are expensive, such as healthcare. However, existing OPE methods fall short in estimating human feedback (HF) signals, as HF may be conditioned over multiple underlying factors and are only sparsely available; as opposed to the agent-defined environmental rewards (used in policy optimization), which are usually determined over parametric functions or distributions. Consequently, the nature of HF signals makes extrapolating accurate OPE estimations to be challenging. To resolve this, we introduce an OPE for HF (OPEHF) framework that revives existing OPE methods in order to accurately evaluate the HF signals. Specifically, we develop an immediate human reward (IHR) reconstruction approach, regularized by environmental knowledge distilled in a latent space that captures the underlying dynamics of state transitions as well as issuing HF signals. Our approach has been tested over *two real-world experiments*, adaptive *in-vivo* neurostimulation and intelligent tutoring, and a simulation environment (visual Q&A). Results show that our approach significantly improves the performance toward estimating HF signals accurately, compared to directly applying (variants of) existing OPE methods.", "keywords": "Off-policy evaluation (OPE);Variational latent model for trajectory representation learning;Reinforcement learning and OPE for adaptive neurostimulation", "primary_area": "", "supplementary_material": "/attachment/1ac958c03e2e0d47b85476be38b9db764e0b916e.gz", "author": "Qitong Gao;Ge Gao;Juncheng Dong;Vahid Tarokh;Min Chi;Miroslav Pajic", "authorids": "~Qitong_Gao1;~Ge_Gao4;~Juncheng_Dong1;~Vahid_Tarokh1;~Min_Chi1;~Miroslav_Pajic2", "gender": "M;;;;;M", "homepage": "http://qitonggao.com;https://gegao.tech/;;;;http://people.duke.edu/~mp275/", "dblp": "238/5422;;;;;74/7446.html", "google_scholar": "Flv4SrsAAAAJ;d_WL-9cAAAAJ;;;;Fbn21-8AAAAJ", "orcid": ";0000-0002-3474-8637;;;;", "linkedin": "qitong-gao;;;;;", "or_profile": "~Qitong_Gao1;~Ge_Gao4;~Juncheng_Dong1;~Vahid_Tarokh1;~Min_Chi1;~Miroslav_Pajic2", "aff": "Duke University;North Carolina State University;;;;Duke University", "aff_domain": "duke.edu;ncsu.edu;;;;duke.edu", "position": "PhD student;PhD student;;;;Associate Professor", "bibtex": "@inproceedings{\ngao2023offpolicy,\ntitle={Off-Policy Evaluation for Human Feedback},\nauthor={Qitong Gao and Ge Gao and Juncheng Dong and Vahid Tarokh and Min Chi and Miroslav Pajic},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DOdaV0Hqdy}\n}", "github": "", "project": "", "reviewers": "1oyy;cuXN;KHGy;ochq", "pdf_size": 3990704, "rating": "5;6;6;7", "confidence": "3;3;3;2", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "306;23;103;157", "wc_strengths": "69;30;63;81", "wc_weaknesses": "244;27;263;47", "wc_questions": "78;48;129;131", "wc_limitations": "1;4;8;17", "wc_review": "698;132;566;433", "wc_reply_reviewers": "1331;14;27;63", "wc_reply_authors": "1266;21;65;87", "reply_reviewers": "2;1;1;1", "reply_authors": "6;2;2;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 147.25, 103.31111992423662 ], "wc_strengths_avg": [ 60.75, 18.89940475253123 ], "wc_weaknesses_avg": [ 145.25, 108.68848835088286 ], "wc_questions_avg": [ 96.5, 35.14612354157995 ], "wc_limitations_avg": [ 7.5, 6.020797289396148 ], "wc_review_avg": [ 457.25, 209.85873224624225 ], "wc_reply_reviewers_avg": [ 358.75, 561.6156937800082 ], "wc_reply_authors_avg": [ 359.75, 523.7630069983943 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2448239063395771010&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "duke.edu;ncsu.edu;;;;duke.edu", "author_num": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "Duke University;North Carolina State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.duke.edu;https://www.ncsu.edu", "aff_unique_abbr": "Duke;NCSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Theory of Unsupervised Translation Motivated by Understanding Animal Communication", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72370", "id": "DP2lioYIYl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7571c9d44179c7988178593c5b62a9b6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DP2lioYIYl", "openreview": "https://openreview.net/forum?id=DP2lioYIYl", "poster": "/media/PosterPDFs/NeurIPS%202023/72370.png?t=1702271576.1208823", "slides": "https://nips.cc/virtual/2023/poster/72370", "video": "https://nips.cc/virtual/2023/poster/72370", "author_site": "Shafi Goldwasser, David Gruber, Adam Tauman Kalai, Orr Paradise", "tldr": "", "abstract": "Neural networks are capable of translating between languages\u2014in some cases even between two languages where there is little or no access to parallel translations, in what is known as Unsupervised Machine Translation (UMT). Given this progress, it is intriguing to ask whether machine learning tools can ultimately enable understanding animal communication, particularly that of highly intelligent\nanimals. We propose a theoretical framework for analyzing UMT when no parallel translations are available and when it cannot be assumed that the source and target corpora address related subject domains or posses similar linguistic structure. We\nexemplify this theory with two stylized models of language, for which our framework provides bounds on necessary sample complexity; the bounds are formally proven and experimentally verified on synthetic data. These bounds show that the error rates are inversely related to the language complexity and amount of common ground. This suggests that unsupervised translation of animal communication may be feasible if the communication system is sufficiently complex.", "keywords": "Theory;Unsupervised Machine Translation", "primary_area": "", "supplementary_material": "", "author": "Shafi Goldwasser;David Gruber;Adam Tauman Kalai;Orr Paradise", "authorids": "~Shafi_Goldwasser2;~David_Gruber1;~Adam_Tauman_Kalai1;~Orr_Paradise1", "gender": "F;;;M", "homepage": "https://simons.berkeley.edu/people/shafi-goldwasser;;;https://people.eecs.berkeley.edu/~orrp/", "dblp": "g/ShafiGoldwasser;225/0103.html;;236/4369", "google_scholar": ";_FZpPv0AAAAJ;;9At07_kAAAAJ", "orcid": ";0000-0001-9041-2911;;", "linkedin": ";david-gruber-b003871/;;", "or_profile": "~Shafi_Goldwasser2;~David_Gruber1;~Adam_Tauman_Kalai1;~Orr_Paradise1", "aff": "University of California, Berkeley;City University of New York;;University of California, Berkeley", "aff_domain": "berkeley.edu;cuny.edu;;berkeley.edu", "position": "Full Professor;Full Professor;;PhD student", "bibtex": "@inproceedings{\ngoldwasser2023a,\ntitle={A Theory of Unsupervised Translation Motivated by Understanding Animal Communication},\nauthor={Shafi Goldwasser and David Gruber and Adam Tauman Kalai and Orr Paradise},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DP2lioYIYl}\n}", "github": "", "project": "", "reviewers": "FHLH;7v3M;LYrz;5Kio;CDK4;sQGQ", "pdf_size": 1628924, "rating": "3;4;6;6;8;9", "confidence": "3;2;2;3;4;4", "soundness": "2;2;3;2;4;4", "novelty": "3;2;3;2;4;4", "presentation": "1;2;2;3;2;4", "wc_summary": "301;231;119;77;156;102", "wc_strengths": "214;8;115;64;64;49", "wc_weaknesses": "477;37;83;168;235;13", "wc_questions": "109;30;68;72;1;11", "wc_limitations": "36;1;23;10;6;1", "wc_review": "1137;307;408;391;462;176", "wc_reply_reviewers": "1099;0;25;33;0;53", "wc_reply_authors": "1590;0;0;0;0;172", "reply_reviewers": "4;0;1;1;0;2", "reply_authors": "5;1;1;1;1;2", "rating_avg": [ 6.0, 2.0816659994661326 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.8333333333333335, 0.8975274678557507 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 164.33333333333334, 78.27231666148356 ], "wc_strengths_avg": [ 85.66666666666667, 65.377536067232 ], "wc_weaknesses_avg": [ 168.83333333333334, 157.2560721314831 ], "wc_questions_avg": [ 48.5, 37.85388575386504 ], "wc_limitations_avg": [ 12.833333333333334, 12.746459203332595 ], "wc_review_avg": [ 480.1666666666667, 307.5231246950743 ], "wc_reply_reviewers_avg": [ 201.66666666666666, 401.7269664281395 ], "wc_reply_authors_avg": [ 293.6666666666667, 583.1299645495466 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.3743685418725535 ], "reply_authors_avg": [ 1.8333333333333333, 1.462494064565354 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6864064729836441, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6854359311855792722&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "berkeley.edu;cuny.edu;;berkeley.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Berkeley;City University of New York", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.cuny.edu", "aff_unique_abbr": "UC Berkeley;CUNY", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Transfer Learning with Affine Model Transformation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72369", "id": "DPeBX79eNz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3819a070922cc0d19f3d66ce108f28e0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DPeBX79eNz", "openreview": "https://openreview.net/forum?id=DPeBX79eNz", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72369", "video": "https://nips.cc/virtual/2023/poster/72369", "author_site": "Shunya Minami, Shunya Minami, Kenji Fukumizu, Yoshihiro Hayashi, Ryo Yoshida", "tldr": "", "abstract": "Supervised transfer learning has received considerable attention due to its potential to boost the predictive power of machine learning in scenarios where data are scarce. Generally, a given set of source models and a dataset from a target domain are used to adapt the pre-trained models to a target domain by statistically learning domain shift and domain-specific factors. While such procedurally and intuitively plausible methods have achieved great success in a wide range of real-world applications, the lack of a theoretical basis hinders further methodological development. This paper presents a general class of transfer learning regression called affine model transfer, following the principle of expected-square loss minimization. It is shown that the affine model transfer broadly encompasses various existing methods, including the most common procedure based on neural feature extractors. Furthermore, the current paper clarifies theoretical properties of the affine model transfer such as generalization error and excess risk. Through several case studies, we demonstrate the practical benefits of modeling and estimating inter-domain commonality and domain-specific factors separately with the affine-type transfer models.", "keywords": "Machine learning;Transfer learning", "primary_area": "", "supplementary_material": "/attachment/8d768a477ff49b3f7e26a45486917b583450638b.zip", "author": "Shunya Minami;Kenji Fukumizu;Yoshihiro Hayashi;Ryo Yoshida", "authorids": "~Shunya_Minami1;~Kenji_Fukumizu1;~Yoshihiro_Hayashi1;~Ryo_Yoshida2", "gender": "M;M;;", "homepage": ";http://www.ism.ac.jp/~fukumizu/;;http://spacier.ism.ac.jp/en/", "dblp": "268/6654;96/464;78/5710;44/2367", "google_scholar": "https://scholar.google.co.jp/citations?view_op=list_works;;https://scholar.google.co.jp/citations?hl=ja;HiUqrycAAAAJ", "orcid": "0000-0002-3566-817X;0000-0002-3488-2625;0000-0002-7650-4083;0000-0001-8092-0162", "linkedin": ";;;", "or_profile": "~Shunya_Minami1;~Kenji_Fukumizu1;~Yoshihiro_Hayashi1;~Ryo_Yoshida2", "aff": "The Institute of Statistical Mathematics, Japan;The Institute of Statistical Mathematics, Japan, Tokyo Institute of Technology;The Institute of Statistical Mathematics;The Institute of Statistical Mathematics, Japan, Tokyo Institute of Technology", "aff_domain": "ism.ac.jp;ism.ac.jp;ism.ac.jp;ism.ac.jp", "position": "PhD student;Full Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nminami2023transfer,\ntitle={Transfer Learning with Affine Model Transformation},\nauthor={Shunya Minami and Kenji Fukumizu and Yoshihiro Hayashi and Ryo Yoshida},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DPeBX79eNz}\n}", "github": "", "project": "", "reviewers": "XSgT;DK8u;t1vu;3FfK;tCYn", "pdf_size": 829411, "rating": "4;5;6;6;7", "confidence": "4;1;2;3;2", "soundness": "3;3;3;3;3", "novelty": "2;3;3;2;3", "presentation": "3;2;3;3;3", "wc_summary": "96;60;68;199;170", "wc_strengths": "41;28;51;96;81", "wc_weaknesses": "79;127;168;106;24", "wc_questions": "53;46;111;124;37", "wc_limitations": "20;49;4;13;48", "wc_review": "289;310;402;538;360", "wc_reply_reviewers": "65;93;169;11;0", "wc_reply_authors": "0;140;128;0;0", "reply_reviewers": "1;2;2;1;0", "reply_authors": "1;2;3;1;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 2.4, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 118.6, 55.87700779390392 ], "wc_strengths_avg": [ 59.4, 25.30296425322535 ], "wc_weaknesses_avg": [ 100.8, 48.172191148005716 ], "wc_questions_avg": [ 74.2, 35.9521904756859 ], "wc_limitations_avg": [ 26.8, 18.432579851990337 ], "wc_review_avg": [ 379.8, 88.35021222385377 ], "wc_reply_reviewers_avg": [ 67.6, 61.167311531568885 ], "wc_reply_authors_avg": [ 53.6, 65.75591228171045 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.42307692307692313, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1640280809418797758&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ism.ac.jp;ism.ac.jp;ism.ac.jp;ism.ac.jp", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Institute of Statistical Mathematics", "aff_unique_dep": "", "aff_unique_url": "https://www.ism.ac.jp", "aff_unique_abbr": "ISM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "title": "ZoomTrack: Target-aware Non-uniform Resizing for Efficient Visual Tracking", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72368", "id": "DQgTewaKzt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9fc291fef2f9607a46777d367f900a15-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DQgTewaKzt", "openreview": "https://openreview.net/forum?id=DQgTewaKzt", "poster": "/media/PosterPDFs/NeurIPS%202023/72368.png?t=1699866484.2504683", "slides": "https://nips.cc/virtual/2023/poster/72368", "video": "https://nips.cc/virtual/2023/poster/72368", "author_site": "Yutong Kou, Jin Gao, Bing Li, Gang Wang, Weiming Hu, Yizheng Wang, Liang Li", "tldr": "", "abstract": "Recently, the transformer has enabled the speed-oriented trackers to approach state-of-the-art (SOTA) performance with high-speed thanks to the smaller input size or the lighter feature extraction backbone, though they still substantially lag behind their corresponding performance-oriented versions. In this paper, we demonstrate that it is possible to narrow or even close this gap while achieving high tracking speed based on the smaller input size. To this end, we non-uniformly resize the cropped image to have a smaller input size while the resolution of the area where the target is more likely to appear is higher and vice versa. This enables us to solve the dilemma of attending to a larger visual field while retaining more raw information for the target despite a smaller input size. Our formulation for the non-uniform resizing can be efficiently solved through quadratic programming (QP) and naturally integrated into most of the crop-based local trackers. Comprehensive experiments on five challenging datasets based on two kinds of transformer trackers, \\ie, OSTrack and TransT, demonstrate consistent improvements over them. In particular, applying our method to the speed-oriented version of OSTrack even outperforms its performance-oriented counterpart by 0.6\\% AUC on TNL2K, while running 50\\% faster and saving over 55\\% MACs. Codes and models are available at https://github.com/Kou-99/ZoomTrack.", "keywords": "Visual tracking;non-uniform resizing;HVS-inspired processing", "primary_area": "", "supplementary_material": "", "author": "Yutong Kou;Jin Gao;Bing Li;Gang Wang;Weiming Hu;Yizheng Wang;Liang Li", "authorids": "~Yutong_Kou1;~Jin_Gao1;~Bing_Li1;~Gang_Wang22;~Weiming_Hu1;~Yizheng_Wang2;~Liang_Li9", "gender": "M;M;M;M;M;M;M", "homepage": "https://kou-99.github.io/;https://people.ucas.edu.cn/~jgao?language=en;http://www.escience.cn/people/BingLi;http://weiminghu.people-ai.net/;https://iobs.fudan.edu.cn/iobsenglish/9b/b5/c17668a170933/page.htm;https://github.com/biobrain;https://scholar.google.com/citations?user=YOtXJvQAAAAJ&hl=zh-CN", "dblp": "247/4139;;13/2692-1;;;;", "google_scholar": ";W1o3B-0AAAAJ;;;;;YOtXJvQAAAAJ", "orcid": ";;;0000-0001-9237-8825;;0000-0003-0800-1094;0000-0002-1916-6110", "linkedin": ";;;;;;", "or_profile": "~Yutong_Kou1;~Jin_Gao1;~Bing_Li1;~Weiming_Hu1;~Yizheng_Wang2;~Liang_Li9;~Gang_WANG21", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science;Fudan University;Beijing Institute of Basic Medical Sciences;University of Electronic Science and Technology of China", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn;nlpr.ia.ac.cn;fudan.edu.cn;bmi.ac.cn;uestc.edu.cn", "position": "PhD student;Associate Professor;Full Professor;Full Professor;Prof;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nkou2023zoomtrack,\ntitle={ZoomTrack: Target-aware Non-uniform Resizing for Efficient Visual Tracking},\nauthor={Yutong Kou and Jin Gao and Bing Li and Gang Wang and Weiming Hu and Yizheng Wang and Liang Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DQgTewaKzt}\n}", "github": "", "project": "", "reviewers": "PYzX;Gq4E;P4Ao;2uk1;Fe6p", "pdf_size": 2162088, "rating": "5;5;6;7;8", "confidence": "4;4;5;5;5", "soundness": "3;3;3;3;4", "novelty": "2;2;2;4;3", "presentation": "3;4;2;3;4", "wc_summary": "63;152;67;162;73", "wc_strengths": "59;137;51;150;48", "wc_weaknesses": "52;238;183;129;95", "wc_questions": "3;356;35;171;5", "wc_limitations": "3;22;17;10;6", "wc_review": "180;905;353;622;227", "wc_reply_reviewers": "0;197;44;96;0", "wc_reply_authors": "0;465;37;32;33", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;2;2;2;2", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 103.4, 43.99363590338948 ], "wc_strengths_avg": [ 89.0, 44.83302354291979 ], "wc_weaknesses_avg": [ 139.4, 65.33176868874743 ], "wc_questions_avg": [ 114.0, 135.8351942612812 ], "wc_limitations_avg": [ 11.6, 7.002856560004639 ], "wc_review_avg": [ 457.4, 271.4307278109831 ], "wc_reply_reviewers_avg": [ 67.4, 73.82032240514803 ], "wc_reply_authors_avg": [ 113.4, 176.30042541071762 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8401680504168058, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7414373799595044776&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ia.ac.cn;ia.ac.cn;ia.ac.cn;nlpr.ia.ac.cn;fudan.edu.cn;bmi.ac.cn;uestc.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;1;2;3", "aff_unique_norm": "Chinese Academy of Sciences;Fudan University;Beijing Institute of Basic Medical Sciences;University of Electronic Science and Technology of China", "aff_unique_dep": "Institute of Automation;;;", "aff_unique_url": "http://www.ia.cas.cn;https://www.fudan.edu.cn;http://www.bibms.cn;https://www.uestc.edu.cn", "aff_unique_abbr": "CAS;Fudan;;UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Causal Interpretation of Self-Attention in Pre-Trained Transformers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72367", "id": "DS4rKySlYC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/642a321fba8a0f03765318e629cb93ea-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DS4rKySlYC", "openreview": "https://openreview.net/forum?id=DS4rKySlYC", "poster": "/media/PosterPDFs/NeurIPS%202023/72367.png?t=1699953268.7815602", "slides": "https://nips.cc/virtual/2023/poster/72367", "video": "https://nips.cc/virtual/2023/poster/72367", "author_site": "Raanan Rohekar, Yaniv Gurwicz, Shami Nisimov", "tldr": "", "abstract": "We propose a causal interpretation of self-attention in the Transformer neural network architecture. We interpret self-attention as a mechanism that estimates a structural equation model for a given input sequence of symbols (tokens). The structural equation model can be interpreted, in turn, as a causal structure over the input symbols under the specific context of the input sequence. Importantly, this interpretation remains valid in the presence of latent confounders. Following this interpretation, we estimate conditional independence relations between input symbols by calculating partial correlations between their corresponding representations in the deepest attention layer. This enables learning the causal structure over an input sequence using existing constraint-based algorithms. In this sense, existing pre-trained Transformers can be utilized for zero-shot causal-discovery. We demonstrate this method by providing causal explanations for the outcomes of Transformers in two tasks: sentiment classification (NLP) and recommendation.", "keywords": "Self-Attention;Causal Discovery;Reasoning;Explainability;Zero-shot;Transformer", "primary_area": "", "supplementary_material": "", "author": "Raanan Yehezkel Rohekar;Yaniv Gurwicz;Shami Nisimov", "authorids": "~Raanan_Yehezkel_Rohekar1;~Yaniv_Gurwicz1;~Shami_Nisimov3", "gender": ";;M", "homepage": ";;", "dblp": ";83/4274;", "google_scholar": "B7SmLVkAAAAJ;;https://scholar.google.co.il/citations?user=Men4J6oAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Raanan_Yehezkel_Rohekar1;~Yaniv_Gurwicz1;~shami_nisimov1", "aff": "Intel Corporation;Intel;Intel corporation", "aff_domain": "intel.com;intel.com;intel.com", "position": "Research Scientist;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nrohekar2023causal,\ntitle={Causal Interpretation of Self-Attention in Pre-Trained Transformers},\nauthor={Raanan Yehezkel Rohekar and Yaniv Gurwicz and Shami Nisimov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DS4rKySlYC}\n}", "github": "", "project": "", "reviewers": "unWZ;wsNj;qdMo;75HH", "pdf_size": 1403410, "rating": "5;5;6;7", "confidence": "4;3;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;2", "wc_summary": "74;97;139;81", "wc_strengths": "105;153;115;53", "wc_weaknesses": "97;357;238;14", "wc_questions": "30;96;70;54", "wc_limitations": "1;82;3;64", "wc_review": "307;785;565;266", "wc_reply_reviewers": "21;39;25;21", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 97.75, 25.232667318379164 ], "wc_strengths_avg": [ 106.5, 35.703641270884404 ], "wc_weaknesses_avg": [ 176.5, 131.42393237154334 ], "wc_questions_avg": [ 62.5, 24.015619917045655 ], "wc_limitations_avg": [ 37.5, 36.07284297085551 ], "wc_review_avg": [ 480.75, 209.7455303457025 ], "wc_reply_reviewers_avg": [ 26.5, 7.399324293474371 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10317002991581217896&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "intel.com;intel.com;intel.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Intel", "aff_unique_dep": "Intel Corporation", "aff_unique_url": "https://www.intel.com", "aff_unique_abbr": "Intel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Neural MMO 2.0: A Massively Multi-task Addition to Massively Multi-agent Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73652", "id": "DSYuRMJnaY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9ca22870ae0ba55ee50ce3e2d269e5de-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=DSYuRMJnaY", "openreview": "https://openreview.net/forum?id=DSYuRMJnaY", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73652", "video": "https://nips.cc/virtual/2023/poster/73652", "author_site": "Joseph Suarez, David Bloomin, Kyoung Whan Choe, Hao Xiang Li, Ryan Sullivan, Nishaanth Kanna, Daniel Scott, Rose Shuman, Herbie Bradley, Louis Castricato, Phillip Isola, Chenghui Yu, Yuhao Jiang, Qimai Li, Jiaxin Chen, Xiaolong Zhu", "tldr": "", "abstract": "Neural MMO 2.0 is a massively multi-agent and multi-task environment for reinforcement learning research. This version features a novel task-system that broadens the range of training settings and poses a new challenge in generalization: evaluation on and against tasks, maps, and opponents never seen during training. Maps are procedurally generated with 128 agents in the standard setting and 1-1024 supported overall. Version 2.0 is a complete rewrite of its predecessor with three-fold improved performance, effectively addressing simulation bottlenecks in online training. Enhancements to compatibility enable training with standard reinforcement learning frameworks designed for much simpler environments. Neural MMO 2.0 is free and open-source with comprehensive documentation available at neuralmmo.github.io and an active community Discord. To spark initial research on this new platform, we are concurrently running a competition at NeurIPS 2023.", "keywords": "environment; multi-agent; multi-task; reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/6bd9071d3e9debc787f176714fa4f6246f0d5302.pdf", "author": "Joseph Suarez;David Bloomin;Kyoung Whan Choe;Hao Xiang Li;Ryan Sullivan;Nishaanth Kanna Ravichandran;Daniel Scott;Rose S Shuman;Herbie Bradley;Louis Castricato;Phillip Isola;Kirsty You;Yuhao Jiang;Qimai Li;Jiaxin Chen;Xiaolong Zhu", "authorids": "~Joseph_Suarez1;daveey@gmail.com;~Kyoung_Whan_Choe1;~Hao_Xiang_Li1;~Ryan_Sullivan2;~Nishaanth_Kanna_Ravichandran1;~Daniel_Scott1;rose.shuman@alumni.brown.edu;~Herbie_Bradley1;~Louis_Castricato3;~Phillip_Isola1;kirstyyou@chaocanshu.ai;yuhaojiang@chaocanshu.ai;qimaili@chaocanshu.ai;~Jiaxin_Chen1;~Xiaolong_Zhu1", "gender": "M;;;;M;M;;;M;M;M;;;;F;Not Specified", "homepage": "https://jsuarez5341.github.io;;;https://markhaoxiang.com;https://ryannavillus.github.io/;;https://dsctt.github.io/;;https://herbiebradley.com;http://louiscatricato.com;http://web.mit.edu/phillipi/;;;;;http://xiaolongzhu.org", "dblp": ";;;360/4751;;360/4955;;;;;36/9988;;;;65/1392;", "google_scholar": ";;Smql8gkAAAAJ;;https://scholar.google.com/citations?hl=en;vT3LDgwAAAAJ;;;oQ0HzPcAAAAJ;WrUnrz4AAAAJ;ROILf3EAAAAJ;;;;;", "orcid": ";;0000-0002-2138-9807;;;;;;0000-0001-5390-1257;;0000-0002-1411-6704;;;;;", "linkedin": ";;kywch/;;ryan-navillus/;;;;herbiebradley/;;phillip-isola-a9955b20/;;;;;", "or_profile": "~Joseph_Suarez1;daveey@gmail.com;~Kyoung_Whan_Choe1;~Hao_Xiang_Li1;~Ryan_Sullivan2;~Nishaanth_Kanna_Ravichandran1;~Daniel_Scott1;rose.shuman@alumni.brown.edu;~Herbie_Bradley1;~Louis_Castricato3;~Phillip_Isola1;kirstyyou@chaocanshu.ai;yuhaojiang@chaocanshu.ai;qimaili@chaocanshu.ai;~Jiaxin_Chen1;~Xiaolong_Zhu1", "aff": "Massachusetts Institute of Technology;;;University of Cambridge;University of Maryland, College Park;University of New Brunswick;Georgia Institute of Technology;;CarperAI;Brown University;Massachusetts Institute of Technology;;;;Parametrix.ai;Parametrix", "aff_domain": "mit.edu;;;cam.ac.uk;umd.edu;unb.ca;gatech.edu;;carper.ai;brown.edu;mit.edu;;;;chaocanshu.ai;chaocanshu.ai", "position": "PhD student;;;Undergrad student;PhD student;MS student;MS student;;Researcher;PhD student;Associate Professor;;;;Researcher;Researcher", "bibtex": "@inproceedings{\nsuarez2023neural,\ntitle={Neural {MMO} 2.0: A Massively Multi-task Addition to Massively Multi-agent Learning},\nauthor={Joseph Suarez and David Bloomin and Kyoung Whan Choe and Hao Xiang Li and Ryan Sullivan and Nishaanth Kanna Ravichandran and Daniel Scott and Rose S Shuman and Herbie Bradley and Louis Castricato and Phillip Isola and Kirsty You and Yuhao Jiang and Qimai Li and Jiaxin Chen and Xiaolong Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=DSYuRMJnaY}\n}", "github": "", "project": "", "reviewers": "3mZR;3uba;wNF1", "pdf_size": 2700156, "rating": "6;6;7", "confidence": "4;4;3", "wc_summary_and_contributions": "149;49;90", "wc_strengths": "140;49;17", "wc_improvement": "186;88;45", "wc_limitations": "31;2;34", "wc_correctness": "11;2;10", "wc_clarity": "8;237;5", "wc_relation_to_prior_work": "22;57;26", "wc_documentation": "21;63;22", "wc_additional_feedback": "1;1;1", "wc_review": "569;548;250", "wc_reply_reviewers": "183;12;0", "wc_reply_authors": "594;383;245", "reply_reviewers": "2;1;0", "reply_authors": "3;2;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 96.0, 41.04469108991645 ], "wc_strengths_avg": [ 68.66666666666667, 52.10459565996927 ], "wc_improvement_avg": [ 106.33333333333333, 59.004707910094396 ], "wc_limitations_avg": [ 22.333333333333332, 14.42990721460891 ], "wc_correctness_avg": [ 7.666666666666667, 4.0276819911981905 ], "wc_clarity_avg": [ 83.33333333333333, 108.66564416696855 ], "wc_relation_to_prior_work_avg": [ 35.0, 15.641824275533422 ], "wc_documentation_avg": [ 35.333333333333336, 19.567546828585563 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 455.6666666666667, 145.68077734858807 ], "wc_reply_reviewers_avg": [ 65.0, 83.58229477586745 ], "wc_reply_authors_avg": [ 407.3333333333333, 143.51383982815813 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13466882612310101022&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "mit.edu;;;cam.ac.uk;umd.edu;unb.ca;gatech.edu;;carper.ai;brown.edu;mit.edu;;;;chaocanshu.ai;chaocanshu.ai", "author_num": 16, "aff_unique_index": "0;1;2;3;4;5;6;0;7;7", "aff_unique_norm": "Massachusetts Institute of Technology;University of Cambridge;University of Maryland;University of New Brunswick;Georgia Institute of Technology;CarperAI;Brown University;Parametrix", "aff_unique_dep": ";;;;;;;", "aff_unique_url": "https://web.mit.edu;https://www.cam.ac.uk;https://www/umd.edu;https://www.unb.ca;https://www.gatech.edu;https://www.carperai.com;https://www.brown.edu;https://www.parametrix.ai", "aff_unique_abbr": "MIT;Cambridge;UMD;UNB;Georgia Tech;CarperAI;Brown;Parametrix", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cambridge;College Park", "aff_country_unique_index": "0;1;0;2;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom;Canada;" }, { "title": "R-divergence for Estimating Model-oriented Distribution Discrepancy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72366", "id": "DVWIA9v9Jm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b157cfde6794e93b2353b9712bbd45a5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DVWIA9v9Jm", "openreview": "https://openreview.net/forum?id=DVWIA9v9Jm", "poster": "/media/PosterPDFs/NeurIPS%202023/72366.png?t=1698904330.5185335", "slides": "https://nips.cc/virtual/2023/poster/72366", "video": "https://nips.cc/virtual/2023/poster/72366", "author_site": "Zhilin Zhao, Longbing Cao", "tldr": "", "abstract": "Real-life data are often non-IID due to complex distributions and interactions, and the sensitivity to the distribution of samples can differ among learning models. Accordingly, a key question for any supervised or unsupervised model is whether the probability distributions of two given datasets can be considered identical. To address this question, we introduce R-divergence, designed to assess model-oriented distribution discrepancies. The core insight is that two distributions are likely identical if their optimal hypothesis yields the same expected risk for each distribution. To estimate the distribution discrepancy between two datasets, R-divergence learns a minimum hypothesis on the mixed data and then gauges the empirical risk difference between them. We evaluate the test power across various unsupervised and supervised tasks and find that R-divergence achieves state-of-the-art performance. To demonstrate the practicality of R-divergence, we employ R-divergence to train robust neural networks on samples with noisy labels.", "keywords": "non-IID;Distribution Discrepancy;Data Divergence;Two-sample Test", "primary_area": "", "supplementary_material": "/attachment/ff0e32019ddb5bf013cc24634b12cf3df7ebf218.pdf", "author": "Zhilin Zhao;Longbing Cao", "authorids": "~Zhilin_Zhao1;~Longbing_Cao1", "gender": "M;M", "homepage": "https://lawliet-zzl.github.io/;https://www.datasciences.org", "dblp": "189/1602.html;14/2589", "google_scholar": "3e8zto0AAAAJ;cDs3DM8AAAAJ", "orcid": ";0000-0003-1562-9429", "linkedin": ";", "or_profile": "~Zhilin_Zhao1;~Longbing_Cao1", "aff": "University of Technology Sydney;University of Technology Sydney", "aff_domain": "uts.edu.au;uts.edu.au", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\nzhao2023rdivergence,\ntitle={R-divergence for Estimating Model-oriented Distribution Discrepancy},\nauthor={Zhilin Zhao and Longbing Cao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DVWIA9v9Jm}\n}", "github": "", "project": "", "reviewers": "b1Kb;Z32S;3ypD;EFgW", "pdf_size": 1341076, "rating": "5;7;7;7", "confidence": "4;4;3;3", "soundness": "3;4;2;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "74;201;62;60", "wc_strengths": "67;106;60;20", "wc_weaknesses": "146;79;503;141", "wc_questions": "33;10;82;91", "wc_limitations": "29;1;35;28", "wc_review": "349;397;742;340", "wc_reply_reviewers": "145;0;684;14", "wc_reply_authors": "425;46;1057;43", "reply_reviewers": "2;0;2;1", "reply_authors": "3;2;3;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 99.25, 58.988876069984585 ], "wc_strengths_avg": [ 63.25, 30.50717128807586 ], "wc_weaknesses_avg": [ 217.25, 167.07539465762156 ], "wc_questions_avg": [ 54.0, 33.65263734092768 ], "wc_limitations_avg": [ 23.25, 13.12202347201071 ], "wc_review_avg": [ 457.0, 165.96535783108473 ], "wc_reply_reviewers_avg": [ 210.75, 279.0227365287639 ], "wc_reply_authors_avg": [ 392.75, 413.77190274352847 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=882406475601336927&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uts.edu.au;uts.edu.au", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Technology Sydney", "aff_unique_dep": "", "aff_unique_url": "https://www.uts.edu.au", "aff_unique_abbr": "UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Australia" }, { "title": "Chasing Fairness Under Distribution Shift: A Model Weight Perturbation Approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72365", "id": "DVjyq5eCAD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c9cd2d12abe92f30b1442557bdbe8f5a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DVjyq5eCAD", "openreview": "https://openreview.net/forum?id=DVjyq5eCAD", "poster": "/media/PosterPDFs/NeurIPS%202023/72365.png?t=1701752274.3364353", "slides": "https://nips.cc/virtual/2023/poster/72365", "video": "https://nips.cc/virtual/2023/poster/72365", "author_site": "Zhimeng (Stephen) Jiang, Xiaotian Han, Hongye Jin, Guanchu Wang, Rui Chen, Na Zou, Xia Hu", "tldr": "", "abstract": "Fairness in machine learning has attracted increasing attention in recent years. The fairness methods improving algorithmic fairness for in-distribution data may not perform well under distribution shifts. In this paper, we first theoretically demonstrate the inherent connection between distribution shift, data perturbation, and model weight perturbation.\nSubsequently, we analyze the sufficient conditions to guarantee fairness (i.e., low demographic parity) for the target dataset, including fairness for the source dataset, and low prediction difference between the source and target datasets for each sensitive attribute group. Motivated by these sufficient conditions, we propose robust fairness regularization (RFR) by considering the worst case within the model weight perturbation ball for each sensitive attribute group. We evaluate the effectiveness of our proposed RFR algorithm on synthetic and real distribution shifts across various datasets. Experimental results demonstrate that RFR achieves better fairness-accuracy trade-off performance compared with several baselines. The source code is available at \\url{https://github.com/zhimengj0326/RFR_NeurIPS23}.", "keywords": "Model Weight Perturbation;fairness;distribution shift", "primary_area": "", "supplementary_material": "/attachment/19454e2027502d9b51e1e917ebc53977a700ceb6.pdf", "author": "Zhimeng Jiang;Xiaotian Han;Hongye Jin;Guanchu Wang;Rui Chen;Na Zou;Xia Hu", "authorids": "~Zhimeng_Jiang1;~Xiaotian_Han1;~Hongye_Jin1;~Guanchu_Wang1;~Rui_Chen4;~Na_Zou2;~Xia_Hu4", "gender": "M;M;M;M;;F;", "homepage": "http://www.zhimengjiang.com/;https://ahxt.github.io/;https://github.com/Mooler0410;https://guanchuwang.github.io/home;;https://nzou1.github.io/;", "dblp": "217/3235;;268/7929;213/0985;;152/0090-1.html;", "google_scholar": "5Es3Yk4AAAAJ;Uromx98AAAAJ;;_QL5218AAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0001-6933-3952;;;;;0000-0003-1984-795X;", "linkedin": ";;;;;na-zou-a1721535/;", "or_profile": "~Zhimeng_Jiang1;~Xiaotian_Han1;~Hongye_Jin1;~Guanchu_Wang1;~Rui_Chen4;~Na_Zou2;~Xia_Hu4", "aff": "Texas A&M University;Texas A&M University;Texas A&M;Rice University;;Texas A&M University - College Station;", "aff_domain": "tamu.edu;tamu.edu;tamu.edu;rice.edu;;tamu.edu;", "position": "PhD student;PhD student;PhD student;PhD student;;Assistant Professor;", "bibtex": "@inproceedings{\njiang2023chasing,\ntitle={Chasing Fairness Under Distribution Shift: A Model Weight Perturbation Approach},\nauthor={Zhimeng Jiang and Xiaotian Han and Hongye Jin and Guanchu Wang and Rui Chen and Na Zou and Xia Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DVjyq5eCAD}\n}", "github": "", "project": "", "reviewers": "bBot;Viye;iSrC;646Z;3kzb", "pdf_size": 925023, "rating": "6;6;6;6;7", "confidence": "2;2;3;5;4", "soundness": "3;3;2;3;3", "novelty": "3;2;2;3;3", "presentation": "4;3;4;3;3", "wc_summary": "75;65;126;35;101", "wc_strengths": "54;13;52;25;112", "wc_weaknesses": "134;88;482;138;9", "wc_questions": "84;35;14;18;157", "wc_limitations": "38;4;2;33;10", "wc_review": "385;205;676;249;389", "wc_reply_reviewers": "46;84;336;0;9", "wc_reply_authors": "230;125;368;0;10", "reply_reviewers": "1;2;2;0;1", "reply_authors": "2;2;3;1;2", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 80.4, 31.084401232772684 ], "wc_strengths_avg": [ 51.2, 34.20760149440472 ], "wc_weaknesses_avg": [ 170.2, 162.67071033225372 ], "wc_questions_avg": [ 61.6, 53.809292877717695 ], "wc_limitations_avg": [ 17.4, 15.094369811290566 ], "wc_review_avg": [ 380.8, 164.62369209806954 ], "wc_reply_reviewers_avg": [ 95.0, 124.11607470428639 ], "wc_reply_authors_avg": [ 146.6, 138.99007158786557 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3429971702850177, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12317820704162930009&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "tamu.edu;tamu.edu;tamu.edu;rice.edu;;tamu.edu;", "author_num": 7, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Texas A&M University;Rice University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tamu.edu;https://www.rice.edu", "aff_unique_abbr": "TAMU;Rice", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "RoboCLIP: One Demonstration is Enough to Learn Robot Policies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72364", "id": "DVlawv2rSI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ae54ce310476218f26dd48c1626d5187-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DVlawv2rSI", "openreview": "https://openreview.net/forum?id=DVlawv2rSI", "poster": "/media/PosterPDFs/NeurIPS%202023/72364.png?t=1701909587.1895583", "slides": "https://nips.cc/virtual/2023/poster/72364", "video": "https://nips.cc/virtual/2023/poster/72364", "author_site": "Sumedh Sontakke, Jesse Zhang, S\u00e9b Arnold, Karl Pertsch, Erdem B\u0131y\u0131k, Dorsa Sadigh, Chelsea Finn, Laurent Itti", "tldr": "", "abstract": "Reward specification is a notoriously difficult problem in reinforcement learning, requiring extensive expert supervision to design robust reward functions. Imitation learning (IL) methods attempt to circumvent these problems by utilizing expert demonstrations instead of using an extrinsic reward function but typically require a large number of in-domain expert demonstrations. Inspired by advances in the field of Video-and-Language Models (VLMs), we present RoboCLIP, an online imitation learning method that uses a single demonstration (overcoming the large data requirement) in the form of a video demonstration or a textual description of the task to generate rewards without manual reward function design. Additionally, RoboCLIP can also utilize out-of-domain demonstrations, like videos of humans solving the task for reward generation, circumventing the need to have the same demonstration and deployment domains. \nRoboCLIP utilizes pretrained VLMs without any finetuning for reward generation. Reinforcement learning agents trained with RoboCLIP rewards demonstrate 2-3 times higher zero-shot performance than competing imitation learning methods on downstream robot manipulation tasks, doing so using only one video/text demonstration. Visit our website at https://sites.google.com/view/roboclip/home for experiment videos.", "keywords": "Reinforcement Learning;Vision and Language Models", "primary_area": "", "supplementary_material": "/attachment/1876245eca7c88a662739f291a384c91a911b390.zip", "author": "Sumedh Anand Sontakke;Jesse Zhang;S\u00e9b Arnold;Karl Pertsch;Erdem Biyik;Dorsa Sadigh;Chelsea Finn;Laurent Itti", "authorids": "~Sumedh_Anand_Sontakke1;~Jesse_Zhang3;~S\u00e9b_Arnold1;~Karl_Pertsch1;~Erdem_Biyik1;~Dorsa_Sadigh1;~Chelsea_Finn1;~Laurent_Itti1", "gender": "M;M;;M;F;F;M;", "homepage": "https://sumedh7.github.io/;https://jessezhang.net;https://kpertsch.github.io/;http://people.eecs.berkeley.edu/~ebiyik/;https://dorsa.fyi/;https://ai.stanford.edu/~cbfinn/;http://ilab.usc.edu;http://sebarnold.net", "dblp": "276/0127;;211/7137;194/2736;117/3174;131/1783;31/3256;206/7057", "google_scholar": "https://scholar.google.com/citations?hl=en;fSXCOfEAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.tr/citations?user=P-G3sjYAAAAJ;ZaJEZpYAAAAJ;vfPE6hgAAAAJ;xhUvqK8AAAAJ;qoEFB7UAAAAJ", "orcid": ";;;0000-0002-9516-3130;;;0000-0002-0168-2977;", "linkedin": "sumedh-sontakke-0ab24210a/;;;https://linkedin.com/in/ebiyik;;;;", "or_profile": "~Sumedh_Anand_Sontakke1;~Jesse_Zhang3;~Karl_Pertsch1;~Erdem_Biyik1;~Dorsa_Sadigh1;~Chelsea_Finn1;~Laurent_Itti1;~Sebastien_Arnold1", "aff": "University of Southern California;Amazon;University of Southern California;University of California, Berkeley;Stanford University;Google;University of Southern California;University of Southern California", "aff_domain": "usc.edu;amazon.com;usc.edu;berkeley.edu;stanford.edu;google.com;usc.edu;usc.edu", "position": "PhD student;Intern;PhD student;Postdoc;Assistant Professor;Research Scientist;Professor;PhD student", "bibtex": "@inproceedings{\nsontakke2023roboclip,\ntitle={Robo{CLIP}: One Demonstration is Enough to Learn Robot Policies},\nauthor={Sumedh Anand Sontakke and Jesse Zhang and S{\\'e}b Arnold and Karl Pertsch and Erdem Biyik and Dorsa Sadigh and Chelsea Finn and Laurent Itti},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DVlawv2rSI}\n}", "github": "", "project": "", "reviewers": "EmDU;S3ve;Dcaj;QrrK", "pdf_size": 1165975, "rating": "5;6;6;6", "confidence": "4;3;4;3", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "2;3;3;4", "wc_summary": "94;109;74;24", "wc_strengths": "49;31;57;22", "wc_weaknesses": "130;40;253;31", "wc_questions": "25;8;23;151", "wc_limitations": "4;4;14;1", "wc_review": "302;192;421;229", "wc_reply_reviewers": "89;0;80;0", "wc_reply_authors": "491;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.25, 32.08874413248359 ], "wc_strengths_avg": [ 39.75, 13.91716565971678 ], "wc_weaknesses_avg": [ 113.5, 89.36022605163888 ], "wc_questions_avg": [ 51.75, 57.67744359799592 ], "wc_limitations_avg": [ 5.75, 4.9180788932265 ], "wc_review_avg": [ 286.0, 87.41567365181143 ], "wc_reply_reviewers_avg": [ 42.25, 42.36965305498736 ], "wc_reply_authors_avg": [ 122.75, 212.60923662907967 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 74, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6195270478990594973&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "usc.edu;amazon.com;usc.edu;berkeley.edu;stanford.edu;google.com;usc.edu;usc.edu", "author_num": 8, "aff_unique_index": "0;1;0;2;3;4;0;0", "aff_unique_norm": "University of Southern California;Amazon;University of California, Berkeley;Stanford University;Google", "aff_unique_dep": ";Amazon.com, Inc.;;;Google", "aff_unique_url": "https://www.usc.edu;https://www.amazon.com;https://www.berkeley.edu;https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "USC;Amazon;UC Berkeley;Stanford;Google", "aff_campus_unique_index": "0;0;2;3;4;0;0", "aff_campus_unique": "Los Angeles;;Berkeley;Stanford;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "AiluRus: A Scalable ViT Framework for Dense Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72363", "id": "DVm0xxaEq1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/62c9aa4d48329a85d1e36d5b6d0a6a32-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DVm0xxaEq1", "openreview": "https://openreview.net/forum?id=DVm0xxaEq1", "poster": "/media/PosterPDFs/NeurIPS%202023/72363.png?t=1702027159.9895408", "slides": "https://nips.cc/virtual/2023/poster/72363", "video": "https://nips.cc/virtual/2023/poster/72363", "author_site": "Jin Li, Yaoming Wang, XIAOPENG ZHANG, Bowen Shi, Dongsheng Jiang, Chenglin Li, Wenrui Dai, Hongkai Xiong, Qi Tian", "tldr": "", "abstract": "Vision transformers (ViTs) have emerged as a prevalent architecture for vision tasks owing to their impressive performance. However, their complexity dramatically increases when handling long token sequences, particularly for dense prediction tasks that require high-resolution input. Notably, dense prediction tasks, such as semantic segmentation or object detection, emphasize more on the contours or shapes of objects, while the texture inside objects is less informative. Motivated by this observation, we propose to apply adaptive resolution for different regions in the image according to their importance. Specifically, at the intermediate layer of the ViT, we select anchors from the token sequence using the proposed spatial-aware density-based clustering algorithm. Tokens that are adjacent to anchors are merged to form low-resolution regions, while others are preserved independently as high-resolution. This strategy could significantly reduce the number of tokens, and the following layers only handle the reduced token sequence for acceleration. At the output end, the resolution of the feature map is recovered by unfolding merged tokens for task prediction. Consequently, we can considerably accelerate ViTs for dense prediction tasks. The proposed method is evaluated across three different datasets and demonstrates promising performance. For instance, \"Segmenter ViT-L\" can be accelerated by 48\\% FPS without fine-tuning, while maintaining the performance. Moreover, our method can also be applied to accelerate fine-tuning. Experiments indicate that we can save 52\\% training time while accelerating 2.46$\\times$ FPS with only a 0.09\\% performance drop.", "keywords": "vision transformer;dense prediction", "primary_area": "", "supplementary_material": "/attachment/187613055499136012599f8ff463481d6164e07f.pdf", "author": "Jin Li;Yaoming Wang;XIAOPENG ZHANG;Bowen Shi;Dongsheng Jiang;Chenglin Li;Wenrui Dai;Hongkai Xiong;Qi Tian", "authorids": "~Jin_Li10;~Yaoming_Wang1;~XIAOPENG_ZHANG7;~Bowen_Shi2;~Dongsheng_Jiang2;~Chenglin_Li2;~Wenrui_Dai1;~Hongkai_Xiong1;~Qi_Tian3", "gender": ";;M;M;;M;;M;M", "homepage": ";;https://sites.google.com/site/zxphistory/;;;https://min.sjtu.edu.cn/En/FacultyShow/4?Vid=17;;http://min.sjtu.edu.cn;https://www.qitian1987.com/index.html", "dblp": ";;;;;;16/5135.html;21/3569;78/1467-1.html", "google_scholar": ";;Ud6aBAcAAAAJ;lJHbpY0AAAAJ;;ltW2JMcAAAAJ;Xg8MhyAAAAAJ;bB16iN4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;;0000-0003-4552-0029;0000-0002-7252-5047", "linkedin": ";;;;;;;;", "or_profile": "~Jin_Li10;~Yaoming_Wang1;~XIAOPENG_ZHANG7;~Bowen_Shi2;~Dongsheng_Jiang2;~Chenglin_Li2;~Wenrui_Dai1;~Hongkai_Xiong1;~Qi_Tian3", "aff": ";;Huawei Technologies Ltd.;Shanghai Jiaotong University;;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Huawei Technologies Ltd.", "aff_domain": ";;huawei.com;sjtu.edu.cn;;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;huawei.com", "position": ";;Principal Researcher;PhD student;;Full Professor;Associate Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nli2023ailurus,\ntitle={AiluRus: A Scalable ViT Framework for Dense Prediction},\nauthor={Jin Li and Yaoming Wang and XIAOPENG ZHANG and Bowen Shi and Dongsheng Jiang and Chenglin Li and Wenrui Dai and Hongkai Xiong and Qi Tian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DVm0xxaEq1}\n}", "github": "", "project": "", "reviewers": "viNG;PvMg;ZFoB;2MFv;gX5G", "pdf_size": 7555545, "rating": "4;4;5;6;6", "confidence": "4;4;3;3;4", "soundness": "3;2;3;3;3", "novelty": "2;2;2;3;3", "presentation": "3;2;3;3;3", "wc_summary": "91;53;158;143;41", "wc_strengths": "48;62;63;66;27", "wc_weaknesses": "65;213;256;124;73", "wc_questions": "31;29;42;10;4", "wc_limitations": "1;7;16;62;1", "wc_review": "236;364;535;405;146", "wc_reply_reviewers": "0;96;0;23;0", "wc_reply_authors": "0;1042;0;0;101", "reply_reviewers": "0;1;0;1;0", "reply_authors": "1;4;1;1;2", "rating_avg": [ 5.0, 0.8944271909999159 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 97.2, 46.78632278775497 ], "wc_strengths_avg": [ 53.2, 14.496896219536097 ], "wc_weaknesses_avg": [ 146.2, 76.10886939115572 ], "wc_questions_avg": [ 23.2, 14.07693148381422 ], "wc_limitations_avg": [ 17.4, 22.966061917533878 ], "wc_review_avg": [ 337.2, 135.08426999469629 ], "wc_reply_reviewers_avg": [ 23.8, 37.182791718750764 ], "wc_reply_authors_avg": [ 228.6, 408.57684711691627 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 1.1661903789690602 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.45643546458763845, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15438248804320899212&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";;huawei.com;sjtu.edu.cn;;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;huawei.com", "author_num": 9, "aff_unique_index": "0;1;1;1;1;0", "aff_unique_norm": "Huawei;Shanghai Jiao Tong University", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;https://www.sjtu.edu.cn", "aff_unique_abbr": "Huawei;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "How2comm: Communication-Efficient and Collaboration-Pragmatic Multi-Agent Perception", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72362", "id": "Dbaxm9ujq6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4f31327e046913c7238d5b671f5d820e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Dbaxm9ujq6", "openreview": "https://openreview.net/forum?id=Dbaxm9ujq6", "poster": "/media/PosterPDFs/NeurIPS%202023/72362.png?t=1698037135.326504", "slides": "https://nips.cc/virtual/2023/poster/72362", "video": "https://nips.cc/virtual/2023/poster/72362", "author_site": "Dingkang Yang, Kun Yang, Yuzheng Wang, Jing Liu, Zhi Xu, Rongbin Yin, Peng Zhai, Lihua Zhang", "tldr": "", "abstract": "Multi-agent collaborative perception has recently received widespread attention as an emerging application in driving scenarios. Despite the advancements in previous efforts, challenges remain due to various noises in the perception procedure, including communication redundancy, transmission delay, and collaboration heterogeneity. To tackle these issues, we propose \\textit{How2comm}, a collaborative perception framework that seeks a trade-off between perception performance and communication bandwidth. Our novelties lie in three aspects. First, we devise a mutual information-aware communication mechanism to maximally sustain the informative features shared by collaborators. The spatial-channel filtering is adopted to perform effective feature sparsification for efficient communication. Second, we present a flow-guided delay compensation strategy to predict future characteristics from collaborators and eliminate feature misalignment due to temporal asynchrony. Ultimately, a pragmatic collaboration transformer is introduced to integrate holistic spatial semantics and temporal context clues among agents. Our framework is thoroughly evaluated on several LiDAR-based collaborative detection datasets in real-world and simulated scenarios. Comprehensive experiments demonstrate the superiority of How2comm and the effectiveness of all its vital components. The code will be released at https://github.com/ydk122024/How2comm.", "keywords": "Collaborative perception;multi-agent communication", "primary_area": "", "supplementary_material": "/attachment/42bde960b34b873890702907699b77da5f52cba3.pdf", "author": "Dingkang Yang;Kun Yang;Yuzheng Wang;Jing Liu;Zhi Xu;Rongbin Yin;Peng Zhai;Lihua Zhang", "authorids": "~Dingkang_Yang1;~Kun_Yang5;~Yuzheng_Wang1;~Jing_Liu14;~Zhi_Xu3;~Rongbin_Yin1;~Peng_Zhai1;~Lihua_Zhang1", "gender": "M;M;;M;M;M;M;M", "homepage": "https://ydk122024.github.io/;;;;;;https://github.com/hnsyzjianghan;https://faet.fudan.edu.cn/3f/9e/c23830a671646/page.htm", "dblp": "304/1099;63/1587;;;;;92/4002;31/3003", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;l08TlGUAAAAJ;;NTICGEsAAAAJ;;;;", "orcid": "0000-0003-1829-5671;0000-0002-9956-2200;;0000-0002-2819-0200;;;0000-0002-1374-7969;0000-0003-0467-4347", "linkedin": ";;;;https://www.linkedin.cn/incareer/in/ACoAAATs7pABOFSAsBE8JJCnZN7eljD-IHh_Eho;rongbin-yin-35b199276/;;", "or_profile": "~Dingkang_Yang1;~Kun_Yang5;~Yuzheng_Wang1;~Jing_Liu14;~Zhi_Xu3;~Rongbin_Yin1;~Peng_Zhai1;~Lihua_Zhang1", "aff": "Fudan University;Fudan University;;Fudan University;Fudan University;FAW;Fudan University;Fudan University", "aff_domain": "fudan.edu;fudan.edu.cn;;fudan.edu.cn;fudan.edu.cn;faw.com.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;PhD student;;PhD student;PhD student;Director;Postdoc;Full Professor", "bibtex": "@inproceedings{\nyang2023howcomm,\ntitle={How2comm: Communication-Efficient and Collaboration-Pragmatic Multi-Agent Perception},\nauthor={Dingkang Yang and Kun Yang and Yuzheng Wang and Jing Liu and Zhi Xu and Rongbin Yin and Peng Zhai and Lihua Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Dbaxm9ujq6}\n}", "github": "", "project": "", "reviewers": "aK4W;ToHv;fcX9;kdrk", "pdf_size": 1514449, "rating": "4;6;6;8", "confidence": "5;4;4;5", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "90;157;135;105", "wc_strengths": "53;117;73;111", "wc_weaknesses": "370;306;279;26", "wc_questions": "129;168;12;147", "wc_limitations": "28;97;1;43", "wc_review": "670;845;500;432", "wc_reply_reviewers": "206;99;12;36", "wc_reply_authors": "668;66;0;28", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;1;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 121.75, 26.013217794036937 ], "wc_strengths_avg": [ 88.5, 26.547127904916568 ], "wc_weaknesses_avg": [ 245.25, 130.8269372109582 ], "wc_questions_avg": [ 114.0, 60.48553546096785 ], "wc_limitations_avg": [ 42.25, 35.00982005095142 ], "wc_review_avg": [ 611.75, 160.15363717380882 ], "wc_reply_reviewers_avg": [ 88.25, 75.04123866248477 ], "wc_reply_authors_avg": [ 190.5, 276.6780620143202 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9241955467002402575&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "fudan.edu;fudan.edu.cn;;fudan.edu.cn;fudan.edu.cn;faw.com.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Fudan University;First Automobile Works", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.faw.com.cn", "aff_unique_abbr": "Fudan;FAW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Uncovering Prototypical Knowledge for Weakly Open-Vocabulary Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72361", "id": "DdViWdxCTs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e95eb5206c867be843fbc14bbfe8c10e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DdViWdxCTs", "openreview": "https://openreview.net/forum?id=DdViWdxCTs", "poster": "/media/PosterPDFs/NeurIPS%202023/72361.png?t=1699548359.8798187", "slides": "https://nips.cc/virtual/2023/poster/72361", "video": "https://nips.cc/virtual/2023/poster/72361", "author_site": "Fei Zhang, Tianfei Zhou, Boyang Li, Hao He, Chaofan Ma, Tianjiao Zhang, Jiangchao Yao, Ya Zhang, Yanfeng Wang", "tldr": "", "abstract": "This paper studies the problem of weakly open-vocabulary semantic segmentation (WOVSS), which learns to segment objects of arbitrary classes using mere image-text pairs. Existing works turn to enhance the vanilla vision transformer by introducing explicit grouping recognition, i.e., employing several group tokens/centroids to cluster the image tokens and perform the group-text alignment. Nevertheless, these methods suffer from a granularity inconsistency regarding the usage of group tokens, which are aligned in the all-to-one v.s. one-to-one manners during the training and inference phases, respectively. We argue that this discrepancy arises from the lack of elaborate supervision for each group token. To bridge this granularity gap, this paper explores explicit supervision for the group tokens from the prototypical knowledge. To this end, this paper proposes the non-learnable prototypical regularization (NPR) where non-learnable prototypes are estimated from source features to serve as supervision and enable contrastive matching of the group tokens. This regularization encourages the group tokens to segment objects with less redundancy and capture more comprehensive semantic regions, leading to increased compactness and richness. Based on NPR, we propose the prototypical guidance segmentation network (PGSeg) that incorporates multi-modal regularization by leveraging prototypical sources from both images and texts at different levels, progressively enhancing the segmentation capability with diverse prototypical patterns. Experimental results show that our proposed method achieves state-of-the-art performance on several benchmark datasets.", "keywords": "Weakly (Text-based) Open-Vocabulary Semantic Segmentation;Vision-Language Pretraining;Prototypical Knowledge", "primary_area": "", "supplementary_material": "/attachment/d0b2db08eb0b5526cdb3da3d28032a869a6e4a81.pdf", "author": "Fei Zhang;Tianfei Zhou;Boyang Li;Hao He;Chaofan Ma;Tianjiao Zhang;Jiangchao Yao;Ya Zhang;Yanfeng Wang", "authorids": "~Fei_Zhang3;~Tianfei_Zhou2;~Boyang_Li2;~Hao_He7;~Chaofan_Ma1;~Tianjiao_Zhang1;~Jiangchao_Yao1;~Ya_Zhang1;~Yanfeng_Wang1", "gender": "M;M;M;M;;M;M;F;M", "homepage": ";https://www.tfzhou.com/;https://yeren123455.github.io/boyangli.github.io/;https://hehao13.github.io;;https://xiaoeyuztj.github.io/;https://sunarker.github.io/;https://annzhanglion.github.io/;https://cmic.sjtu.edu.cn/wangyanfeng/", "dblp": ";150/6710;70/1211-7;;;82/8577;166/5900;85/3714-2;55/5407-1.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.ae/citations?user=-_33ccMAAAAJ;XgNj-V0AAAAJ;kdbmt6QAAAAJ;;;w8oDh9QAAAAJ;pbjw9sMAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0001-5475-1473;;;;;;0000-0002-5390-9053;0000-0002-3196-2347", "linkedin": "ferenas97/;;;;;;;;", "or_profile": "~Fei_Zhang3;~Tianfei_Zhou2;~Boyang_Li2;~Hao_He7;~Chaofan_Ma1;~Tianjiao_Zhang1;~Jiangchao_Yao1;~Ya_Zhang1;~Yanfeng_Wang1", "aff": "Shanghai AI Lab;Swiss Federal Institute of Technology;National University of Defense Technology;The Chinese University of Hong Kong;;Shanghai Jiaotong University;Shanghai Artificial Intelligence Laboratory;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "pjlab.org.cn;ethz.ch;nudt.edu.cn;link.cuhk.edu.hk;;sjtu.edu.cn;pjlab.org.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "Researcher;Postdoctoral Scholar;PhD student;PhD student;;PhD student;Researcher;Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023uncovering,\ntitle={Uncovering Prototypical Knowledge for Weakly Open-Vocabulary Semantic Segmentation},\nauthor={Fei Zhang and Tianfei Zhou and Boyang Li and Hao He and Chaofan Ma and Tianjiao Zhang and Jiangchao Yao and Ya Zhang and Yanfeng Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DdViWdxCTs}\n}", "github": "", "project": "", "reviewers": "hVxT;nyAz;ae73;YVCC", "pdf_size": 1778135, "rating": "5;5;5;6", "confidence": "4;3;3;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;2;3", "wc_summary": "53;78;104;70", "wc_strengths": "26;41;67;50", "wc_weaknesses": "128;78;198;233", "wc_questions": "2;78;6;5", "wc_limitations": "4;16;2;17", "wc_review": "213;291;377;375", "wc_reply_reviewers": "0;0;18;42", "wc_reply_authors": "69;0;14;20", "reply_reviewers": "0;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 76.25, 18.38987493160299 ], "wc_strengths_avg": [ 46.0, 14.849242404917497 ], "wc_weaknesses_avg": [ 159.25, 60.246887886429455 ], "wc_questions_avg": [ 22.75, 31.932546093288583 ], "wc_limitations_avg": [ 9.75, 6.796138609534093 ], "wc_review_avg": [ 314.0, 67.86015030929418 ], "wc_reply_reviewers_avg": [ 15.0, 17.233687939614086 ], "wc_reply_authors_avg": [ 25.75, 26.00360551923521 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1946215903452974722&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "pjlab.org.cn;ethz.ch;nudt.edu.cn;link.cuhk.edu.hk;;sjtu.edu.cn;pjlab.org.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 9, "aff_unique_index": "0;1;2;3;4;5;4;4", "aff_unique_norm": "Shanghai AI Lab;Swiss Federal Institute of Technology;National University of Defense Technology;Chinese University of Hong Kong;Shanghai Jiao Tong University;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.shanghaiailab.com;https://www.ethz.ch;http://www.nudt.edu.cn/;https://www.cuhk.edu.hk;https://www.sjtu.edu.cn;http://www.shailab.org/", "aff_unique_abbr": "SAIL;ETH Zurich;NUDT;CUHK;SJTU;Shanghai AI Lab", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;0;0;0;0", "aff_country_unique": "China;Switzerland" }, { "title": "Sketchy: Memory-efficient Adaptive Regularization with Frequent Directions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72360", "id": "DeZst6dKyi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ef72fa6579401ffff9da246a5014f055-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DeZst6dKyi", "openreview": "https://openreview.net/forum?id=DeZst6dKyi", "poster": "/media/PosterPDFs/NeurIPS%202023/72360.png?t=1701444207.4628103", "slides": "https://nips.cc/virtual/2023/poster/72360", "video": "https://nips.cc/virtual/2023/poster/72360", "author_site": "Vladimir Feinberg, Xinyi Chen, Y. Jennifer Sun, Rohan Anil, Elad Hazan", "tldr": "", "abstract": "Adaptive regularization methods that exploit more than the diagonal entries exhibit state of the art performance for many tasks, but can be prohibitive in terms of memory and running time. We find the spectra of the Kronecker-factored gradient covariance matrix in deep learning (DL) training tasks are concentrated on a small leading eigenspace that changes throughout training, motivating a low-rank sketching approach. We describe a generic method for reducing memory and compute requirements of maintaining a matrix preconditioner using the Frequent Directions (FD) sketch. While previous approaches have explored applying FD for second-order optimization, we present a novel analysis which allows efficient interpolation between resource requirements and the degradation in regret guarantees with rank $k$: in the online convex optimization (OCO) setting over dimension $d$, we match full-matrix $d^2$ memory regret using only $dk$ memory up to additive error in the bottom $d-k$ eigenvalues of the gradient covariance. Further, we show extensions of our work to Shampoo, resulting in a method competitive in quality with Shampoo and Adam, yet requiring only sub-linear memory for tracking second moments.", "keywords": "online convex optimization;deep learning;matrix sketching;frequent directions", "primary_area": "", "supplementary_material": "/attachment/d4fa1fffd6c30963d62a306205a209f56e4dd8eb.pdf", "author": "Vladimir Feinberg;Xinyi Chen;Y. Jennifer Sun;Rohan Anil;Elad Hazan", "authorids": "~Vladimir_Feinberg2;~Xinyi_Chen1;~Y._Jennifer_Sun1;~Rohan_Anil1;~Elad_Hazan1", "gender": "Not Specified;F;;M;M", "homepage": "http://vladfeinberg.com/;;https://orfe.princeton.edu/people/jennifer-sun;;https://www.ehazan.com", "dblp": "217/1663;84/6214;;182/1833;72/739", "google_scholar": "ayWBpZoAAAAJ;;;;LnhCGNMAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Vladimir_Feinberg2;~Xinyi_Chen1;~Y._Jennifer_Sun1;~Rohan_Anil1;~Elad_Hazan1", "aff": "Google;Google DeepMind;Princeton University;Google Brain ;Princeton University", "aff_domain": "google.com;google.com;princeton.edu;google.com;princeton.edu", "position": "Researcher;Researcher;PhD student;Principal Engineer;Full Professor", "bibtex": "@inproceedings{\nfeinberg2023sketchy,\ntitle={Sketchy: Memory-efficient Adaptive Regularization with Frequent Directions},\nauthor={Vladimir Feinberg and Xinyi Chen and Y. Jennifer Sun and Rohan Anil and Elad Hazan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DeZst6dKyi}\n}", "github": "", "project": "", "reviewers": "fXUQ;6Jc9;CbL3;rVs5", "pdf_size": 720866, "rating": "3;6;6;6", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;2", "wc_summary": "44;34;23;136", "wc_strengths": "32;18;28;30", "wc_weaknesses": "189;176;138;65", "wc_questions": "68;95;47;16", "wc_limitations": "18;1;1;8", "wc_review": "351;324;237;255", "wc_reply_reviewers": "78;40;14;495", "wc_reply_authors": "149;0;0;663", "reply_reviewers": "2;1;1;3", "reply_authors": "2;1;1;4", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 59.25, 44.92980636504012 ], "wc_strengths_avg": [ 27.0, 5.385164807134504 ], "wc_weaknesses_avg": [ 142.0, 48.24417063231578 ], "wc_questions_avg": [ 56.5, 28.91798748184251 ], "wc_limitations_avg": [ 7.0, 6.96419413859206 ], "wc_review_avg": [ 291.75, 47.16659305059037 ], "wc_reply_reviewers_avg": [ 156.75, 196.6104969222142 ], "wc_reply_authors_avg": [ 203.0, 272.45825368301837 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9779827651768984227&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com;google.com;princeton.edu;google.com;princeton.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;1", "aff_unique_norm": "Google;Princeton University", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.princeton.edu", "aff_unique_abbr": "Google;Princeton", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Automatic Integration for Spatiotemporal Neural Point Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72359", "id": "Deb1yP1zMN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9d30c2def27b5c6a5fb21a9aa5c16f8f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Deb1yP1zMN", "openreview": "https://openreview.net/forum?id=Deb1yP1zMN", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72359", "video": "https://nips.cc/virtual/2023/poster/72359", "author_site": "Zihao Zhou, Rose Yu", "tldr": "", "abstract": "Learning continuous-time point processes is essential to many discrete event forecasting tasks. However, integration poses a major challenge, particularly for spatiotemporal point processes (STPPs), as it involves calculating the likelihood through triple integrals over space and time. Existing methods for integrating STPP either assume a parametric form of the intensity function, which lacks flexibility; or approximating the intensity with Monte Carlo sampling, which introduces numerical errors. Recent work by Omi et al. proposes a dual network approach for efficient integration of flexible intensity function. However, their method only focuses on the 1D temporal point process. In this paper, we introduce a novel paradigm: `Auto-STPP` (Automatic Integration for Spatiotemporal Neural Point Processes) that extends the dual network approach to 3D STPP. While previous work provides a foundation, its direct extension overly restricts the intensity function and leads to computational challenges. In response, we introduce a decomposable parametrization for the integral network using ProdNet. This approach, leveraging the product of simplified univariate graphs, effectively sidesteps the computational complexities inherent in multivariate computational graphs. We prove the consistency of `Auto-STPP` and validate it on synthetic data and benchmark real-world datasets. `Auto-STPP` shows a significant advantage in recovering complex intensity functions from irregular spatiotemporal events, particularly when the intensity is sharply localized. Our code is open-source at https://github.com/Rose-STL-Lab/AutoSTPP.", "keywords": "spatiotemporal modeling;neural point processes;integration method", "primary_area": "", "supplementary_material": "/attachment/58cc608af5b96cdeec6f094e88f75df0f8d23b7d.zip", "author": "Zihao Zhou;Rose Yu", "authorids": "~Zihao_Zhou1;~Rose_Yu1", "gender": "M;F", "homepage": "http://zzhou.info;http://roseyu.com", "dblp": ";164/7314", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Zihao_Zhou1;~Rose_Yu1", "aff": ";University of California, San Diego", "aff_domain": ";ucsd.edu", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nzhou2023automatic,\ntitle={Automatic Integration for Spatiotemporal Neural Point Processes},\nauthor={Zihao Zhou and Rose Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Deb1yP1zMN}\n}", "github": "", "project": "", "reviewers": "WN8G;wohe;Hisf;HnTF", "pdf_size": 5293468, "rating": "5;6;6;7", "confidence": "3;4;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "74;98;69;68", "wc_strengths": "40;101;42;158", "wc_weaknesses": "39;162;191;121", "wc_questions": "65;94;22;84", "wc_limitations": "1;12;1;60", "wc_review": "219;467;325;491", "wc_reply_reviewers": "4;30;32;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 77.25, 12.193748398257199 ], "wc_strengths_avg": [ 85.25, 48.6280526034099 ], "wc_weaknesses_avg": [ 128.25, 57.21614719639903 ], "wc_questions_avg": [ 66.25, 27.589626673806226 ], "wc_limitations_avg": [ 18.5, 24.37724348649781 ], "wc_review_avg": [ 375.5, 110.40267206911253 ], "wc_reply_reviewers_avg": [ 16.5, 14.585952145814822 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16913185741607966568&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";ucsd.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "NAR-Former V2: Rethinking Transformer for Universal Neural Network Representation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72358", "id": "DjX2Nr15kY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c60bd92a01804b7df0540ed7ca2f7c05-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DjX2Nr15kY", "openreview": "https://openreview.net/forum?id=DjX2Nr15kY", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72358", "video": "https://nips.cc/virtual/2023/poster/72358", "author_site": "Yun Yi, Haokui Zhang, Rong Xiao, Nannan Wang, Xiaoyu Wang", "tldr": "", "abstract": "As more deep learning models are being applied in real-world applications, there is a growing need for modeling and learning the representations of neural networks themselves. An effective representation can be used to predict target attributes of networks without the need for actual training and deployment procedures, facilitating efficient network design and deployment. Recently, inspired by the success of Transformer, some Transformer-based representation learning frameworks have been proposed and achieved promising performance in handling cell-structured models. However, graph neural network (GNN) based approaches still dominate the field of learning representation for the entire network. In this paper, we revisit the Transformer and compare it with GNN to analyze their different architectural characteristics. We then propose a modified Transformer-based universal neural network representation learning model NAR-Former V2. It can learn efficient representations from both cell-structured networks and entire networks. Specifically, we first take the network as a graph and design a straightforward tokenizer to encode the network into a sequence. Then, we incorporate the inductive representation learning capability of GNN into Transformer, enabling Transformer to generalize better when encountering unseen architecture. Additionally, we introduce a series of simple yet effective modifications to enhance the ability of the Transformer in learning representation from graph structures. In encoding entire networks and then predicting the latency, our proposed method surpasses the GNN-based method NNLP by a significant margin on the NNLQP dataset. Furthermore, regarding accuracy prediction on the cell-structured NASBench101 and NASBench201 datasets, our method achieves highly comparable performance to other state-of-the-art methods. The code is available at https://github.com/yuny220/NAR-Former-V2.", "keywords": "Transformer;Graph Neural Network;neural network encoding;representation learning;neural architecture search;neural network deployment", "primary_area": "", "supplementary_material": "/attachment/45bf4b66eaa9b918e0864cd99ba31fb1f328c6d9.pdf", "author": "Yun Yi;Haokui Zhang;Rong Xiao;Nannan Wang;Xiaoyu Wang", "authorids": "~Yun_Yi1;~Haokui_Zhang1;~Rong_Xiao3;~Nannan_Wang1;~Xiaoyu_Wang1", "gender": "F;M;M;M;M", "homepage": ";https://teacher.nwpu.edu.cn/2023050022.html;;;http://www.xiaoyumu.com", "dblp": "78/5510;197/5431;;10/8359-1;58/4775-2", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;m3gPwCoAAAAJ;Zb5wT08AAAAJ;SRBn7oUAAAAJ;Bce64xEAAAAJ", "orcid": ";;;;0000-0002-6431-8822", "linkedin": ";%E5%8F%B7%E9%80%B5-%E5%BC%A0-1636a7110/;rong-xiao-5b00b3102;;", "or_profile": "~Yun_Yi1;~Haokui_Zhang1;~Rong_Xiao3;~Nannan_Wang1;~Xiaoyu_Wang1", "aff": "Xidian University;Harbin Institute of Technology;Intellifusion;Xidian University;Intellifusion", "aff_domain": "xidian.edu;hit.edu.cn;intellif.com;xidian.edu.cn;intellif.com", "position": "MS student;Postdoc;Chief Scientist;Full Professor;Chief Scientist", "bibtex": "@inproceedings{\nyi2023narformer,\ntitle={{NAR}-Former V2: Rethinking Transformer for Universal Neural Network Representation Learning},\nauthor={Yun Yi and Haokui Zhang and Rong Xiao and Nannan Wang and Xiaoyu Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DjX2Nr15kY}\n}", "github": "", "project": "", "reviewers": "sJnp;kdkx;9nHL;L4c1", "pdf_size": 201365, "rating": "5;5;5;6", "confidence": "4;2;4;3", "soundness": "3;3;2;3", "novelty": "3;3;2;2", "presentation": "3;2;2;3", "wc_summary": "39;70;125;80", "wc_strengths": "77;49;65;90", "wc_weaknesses": "67;175;43;236", "wc_questions": "3;2;376;68", "wc_limitations": "7;7;3;3", "wc_review": "193;303;612;477", "wc_reply_reviewers": "0;85;0;117", "wc_reply_authors": "0;547;72;267", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 78.5, 30.80990100600779 ], "wc_strengths_avg": [ 70.25, 15.122417134836613 ], "wc_weaknesses_avg": [ 130.25, 78.73809433812835 ], "wc_questions_avg": [ 112.25, 154.6065571054475 ], "wc_limitations_avg": [ 5.0, 2.0 ], "wc_review_avg": [ 396.25, 160.52628289473347 ], "wc_reply_reviewers_avg": [ 50.5, 51.751811562495085 ], "wc_reply_authors_avg": [ 221.5, 211.79766287662383 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13100161562248846068&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "xidian.edu;hit.edu.cn;intellif.com;xidian.edu.cn;intellif.com", "author_num": 5, "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "Xidian University;Harbin Institute of Technology;Intellifusion", "aff_unique_dep": ";;", "aff_unique_url": "http://www.xidian.edu.cn/;http://www.hit.edu.cn/;https://www.intellifusion.com/", "aff_unique_abbr": "Xidian;HIT;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Optimal Rates for Bandit Nonstochastic Control", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72357", "id": "DkKHSsmVuA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/45591d6727f0e127295f8d16adba6b23-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DkKHSsmVuA", "openreview": "https://openreview.net/forum?id=DkKHSsmVuA", "poster": "/media/PosterPDFs/NeurIPS%202023/72357.png?t=1701894886.2185597", "slides": "https://nips.cc/virtual/2023/poster/72357", "video": "https://nips.cc/virtual/2023/poster/72357", "author_site": "Y. Jennifer Sun, Stephen Newman, Elad Hazan", "tldr": "", "abstract": "Linear Quadratic Regulator (LQR) and Linear Quadratic Gaussian (LQG) control are foundational and extensively researched problems in optimal control. We investigate LQR and LQG problems with semi-adversarial perturbations and time-varying adversarial bandit loss functions. The best-known sublinear regret algorithm~\\cite{gradu2020non} has a $T^{\\frac{3}{4}}$ time horizon dependence, and its authors posed an open question about whether a tight rate of $\\sqrt{T}$ could be achieved. We answer in the affirmative, giving an algorithm for bandit LQR and LQG which attains optimal regret, up to logarithmic factors. A central component of our method is a new scheme for bandit convex optimization with memory, which is of independent interest.", "keywords": "Bandit control;online learning", "primary_area": "", "supplementary_material": "/attachment/1870ff7edc1cde1c379e7a597d694558e84f87e4.pdf", "author": "Y. Jennifer Sun;Stephen Newman;Elad Hazan", "authorids": "~Y._Jennifer_Sun1;stephen.newman@princeton.edu;~Elad_Hazan1", "gender": ";;M", "homepage": "https://orfe.princeton.edu/people/jennifer-sun;;https://www.ehazan.com", "dblp": ";;72/739", "google_scholar": ";;LnhCGNMAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Y._Jennifer_Sun1;stephen.newman@princeton.edu;~Elad_Hazan1", "aff": "Princeton University;;Princeton University", "aff_domain": "princeton.edu;;princeton.edu", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nsun2023optimal,\ntitle={Optimal Rates for Bandit Nonstochastic Control},\nauthor={Y. Jennifer Sun and Stephen Newman and Elad Hazan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DkKHSsmVuA}\n}", "github": "", "project": "", "reviewers": "FiLK;7wFb;iuZo;2R7J;6aiM", "pdf_size": 501100, "rating": "5;5;7;7;7", "confidence": "1;3;4;2;4", "soundness": "3;2;3;3;4", "novelty": "3;2;3;3;3", "presentation": "3;2;2;3;4", "wc_summary": "63;43;64;63;111", "wc_strengths": "27;76;14;62;104", "wc_weaknesses": "86;153;94;29;77", "wc_questions": "15;184;753;2;6", "wc_limitations": "118;1;1;2;16", "wc_review": "309;457;926;158;314", "wc_reply_reviewers": "83;510;92;18;50", "wc_reply_authors": "0;885;131;0;0", "reply_reviewers": "1;2;2;1;1", "reply_authors": "1;3;2;1;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 2.8, 1.16619037896906 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 68.8, 22.52465316048174 ], "wc_strengths_avg": [ 56.6, 32.69005965121507 ], "wc_weaknesses_avg": [ 87.8, 39.66560222661443 ], "wc_questions_avg": [ 192.0, 288.7247824486149 ], "wc_limitations_avg": [ 27.6, 45.55699726715974 ], "wc_review_avg": [ 432.8, 264.1161865543269 ], "wc_reply_reviewers_avg": [ 150.6, 181.59030811141875 ], "wc_reply_authors_avg": [ 203.2, 344.6548418345519 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.560112033611204, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17822818908544908221&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "princeton.edu;;princeton.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "How to Scale Your EMA", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72356", "id": "DkeeXVdQyu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e7681dd6fe16052433ab68cd1555bdc9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DkeeXVdQyu", "openreview": "https://openreview.net/forum?id=DkeeXVdQyu", "poster": "/media/PosterPDFs/NeurIPS%202023/72356.png?t=1701269163.1180394", "slides": "https://nips.cc/virtual/2023/poster/72356", "video": "https://nips.cc/virtual/2023/poster/72356", "author_site": "Dan Busbridge, Jason Ramapuram, Pierre Ablin, Tatiana Likhomanenko, Eeshan Gunesh Dhekane, Xavier Suau Cuadros, Russell Webb", "tldr": "", "abstract": "Preserving training dynamics across batch sizes is an important tool for practical machine learning as it enables the trade-off between batch size and wall-clock time. This trade-off is typically enabled by a scaling rule, for example, in stochastic gradient descent, one should scale the learning rate linearly with the batch size. Another important machine learning tool is the model EMA, a functional copy of a target model, whose parameters move towards those of its target model according to an Exponential Moving Average (EMA) at a rate parameterized by a momentum hyperparameter. This model EMA can improve the robustness and generalization of supervised learning, stabilize pseudo-labeling, and provide a learning signal for Self-Supervised Learning (SSL). Prior works have not considered the optimization of the model EMA when performing scaling, leading to different training dynamics across batch sizes and lower model performance. In this work, we provide a scaling rule for optimization in the presence of a model EMA and demonstrate the rule's validity across a range of architectures, optimizers, and data modalities. We also show the rule's validity where the model EMA contributes to the optimization of the target model, enabling us to train EMA-based pseudo-labeling and SSL methods at small and large batch sizes. For SSL, we enable training of BYOL up to batch size 24,576 without sacrificing performance, a 6$\\times$ wall-clock time reduction under idealized hardware settings.", "keywords": "Optimization;scaling rules;EMA;exponential moving average;self-supervised learning;pseudo-labelling;semi-supervised learning;BYOL;distillation;speech;vision", "primary_area": "", "supplementary_material": "/attachment/4a314510198d73d940b8484317e24b460e3d44d9.pdf", "author": "Dan Busbridge;Jason Ramapuram;Pierre Ablin;Tatiana Likhomanenko;Eeshan Gunesh Dhekane;Xavier Suau;Russell Webb", "authorids": "~Dan_Busbridge1;~Jason_Ramapuram1;~Pierre_Ablin2;~Tatiana_Likhomanenko1;~Eeshan_Gunesh_Dhekane1;~Xavier_Suau1;~Russell_Webb1", "gender": "M;M;M;F;M;M;", "homepage": "https://github.com/dbusbridge;http://jramapuram.github.io;https://pierreablin.com/;https://github.com/tlikhomanenko/tlikhomanenko;https://github.com/eeshandhekane;;", "dblp": "220/3480;200/8958;174/0980.html;202/2094;228/0596.html;21/8106;62/1807", "google_scholar": "https://scholar.google.co.uk/citations?user=CvA9jjMAAAAJ;U-MT4IsAAAAJ;1ZsunaYAAAAJ;https://scholar.google.ru/citations?user=x7Z3ysQAAAAJ;zob4NZEAAAAJ;;", "orcid": "0000-0002-2178-6917;;;0000-0003-0351-9839;0009-0006-3026-6258;;", "linkedin": "danbusbridge/;jramapuram/;;;eeshan-gunesh-dhekane-05677482/;;", "or_profile": "~Dan_Busbridge1;~Jason_Ramapuram1;~Pierre_Ablin2;~Tatiana_Likhomanenko1;~Eeshan_Gunesh_Dhekane1;~Xavier_Suau1;~Russell_Webb1", "aff": "Apple;Apple;Apple;Apple;Apple;Apple;Apple", "aff_domain": "apple.com;apple.com;apple.com;apple.com;apple.com;apple.com;apple.com", "position": "Researcher;Researcher;Researcher;Research Scientist;Researcher;Research scientist;Researcher", "bibtex": "@inproceedings{\nbusbridge2023how,\ntitle={How to Scale Your {EMA}},\nauthor={Dan Busbridge and Jason Ramapuram and Pierre Ablin and Tatiana Likhomanenko and Eeshan Gunesh Dhekane and Xavier Suau and Russell Webb},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DkeeXVdQyu}\n}", "github": "", "project": "", "reviewers": "6nqM;nnfC;dfpr;bHsu", "pdf_size": 3777892, "rating": "6;7;7;7", "confidence": "4;4;4;2", "soundness": "3;3;3;3", "novelty": "1;3;3;3", "presentation": "2;4;2;3", "wc_summary": "41;89;257;218", "wc_strengths": "37;112;155;63", "wc_weaknesses": "34;250;691;217", "wc_questions": "102;116;163;5", "wc_limitations": "35;9;201;49", "wc_review": "249;576;1467;552", "wc_reply_reviewers": "62;14;73;31", "wc_reply_authors": "62;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 151.25, 88.97857888278504 ], "wc_strengths_avg": [ 91.75, 45.372761652780184 ], "wc_weaknesses_avg": [ 298.0, 241.35554685981427 ], "wc_questions_avg": [ 96.5, 57.45650528878345 ], "wc_limitations_avg": [ 73.5, 74.9983333148144 ], "wc_review_avg": [ 711.0, 455.106031601428 ], "wc_reply_reviewers_avg": [ 45.0, 23.61143790623519 ], "wc_reply_authors_avg": [ 15.5, 26.846787517317598 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3561131031275788986&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "apple.com;apple.com;apple.com;apple.com;apple.com;apple.com;apple.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Apple", "aff_unique_dep": "Apple Inc.", "aff_unique_url": "https://www.apple.com", "aff_unique_abbr": "Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Sharpness Minimization Algorithms Do Not Only Minimize Sharpness To Achieve Better Generalization", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72355", "id": "Dkmpa6wCIx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0354767c6386386be17cabe4fc59711b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Dkmpa6wCIx", "openreview": "https://openreview.net/forum?id=Dkmpa6wCIx", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72355", "video": "https://nips.cc/virtual/2023/poster/72355", "author_site": "Kaiyue Wen, Zhiyuan Li, Tengyu Ma", "tldr": "", "abstract": "Despite extensive studies, the underlying reason as to why overparameterized\nneural networks can generalize remains elusive. Existing theory shows that common stochastic optimizers prefer flatter minimizers of the training loss, and thus\na natural potential explanation is that flatness implies generalization. This work\ncritically examines this explanation. Through theoretical and empirical investigation, we identify the following three scenarios for two-layer ReLU networks: (1)\nflatness provably implies generalization; (2) there exist non-generalizing flattest\nmodels and sharpness minimization algorithms fail to generalize poorly, and (3)\nperhaps most strikingly, there exist non-generalizing flattest models, but sharpness\nminimization algorithms still generalize. Our results suggest that the relationship\nbetween sharpness and generalization subtly depends on the data distributions\nand the model architectures and sharpness minimization algorithms do not only\nminimize sharpness to achieve better generalization. This calls for the search for\nother explanations for the generalization of over-parameterized neural networks", "keywords": "Sharpness;Flatness;Generalization;Generalization Bound;SAM", "primary_area": "", "supplementary_material": "/attachment/4b0c11e1ac24ed5af8744525d9ec3d9a75c6be16.zip", "author": "Kaiyue Wen;Zhiyuan Li;Tengyu Ma", "authorids": "~Kaiyue_Wen1;~Zhiyuan_Li2;~Tengyu_Ma1", "gender": "M;M;M", "homepage": "https://whenwen.github.io/;https://zhiyuanli.ttic.edu;http://ai.stanford.edu/~tengyuma/", "dblp": "322/0395;l/ZhiyuanLi;54/9061", "google_scholar": ";https://scholar.google.com/citations?hl=en;i38QlUwAAAAJ", "orcid": "0000-0002-3128-868X;;", "linkedin": "kaiyue-wen-a3a336192/;;", "or_profile": "~Kaiyue_Wen1;~Zhiyuan_Li2;~Tengyu_Ma1", "aff": "Stanford University;Computer Science Department, Stanford University;Facebook AI Research", "aff_domain": "stanford.edu;cs.stanford.edu;fb.com", "position": "Intern;Postdoc;Visiting Scientist", "bibtex": "@inproceedings{\nwen2023sharpness,\ntitle={Sharpness Minimization Algorithms Do Not Only Minimize Sharpness To Achieve Better Generalization},\nauthor={Kaiyue Wen and Zhiyuan Li and Tengyu Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Dkmpa6wCIx}\n}", "github": "", "project": "", "reviewers": "ex27;2a5p;fkTA;ZroU", "pdf_size": 578190, "rating": "5;5;7;8", "confidence": "4;2;3;4", "soundness": "2;2;3;4", "novelty": "2;2;4;4", "presentation": "3;2;2;4", "wc_summary": "42;248;90;161", "wc_strengths": "20;37;61;138", "wc_weaknesses": "235;67;184;129", "wc_questions": "44;47;21;696", "wc_limitations": "1;7;7;8", "wc_review": "342;406;363;1132", "wc_reply_reviewers": "91;35;0;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 135.25, 77.65106245248677 ], "wc_strengths_avg": [ 64.0, 45.13867521316947 ], "wc_weaknesses_avg": [ 153.75, 62.5594717049305 ], "wc_questions_avg": [ 202.0, 285.3883319268677 ], "wc_limitations_avg": [ 5.75, 2.7726341266023544 ], "wc_review_avg": [ 560.75, 330.6171312863264 ], "wc_reply_reviewers_avg": [ 35.0, 34.64823227814083 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4061811972299616, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12428390369575169760&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "stanford.edu;cs.stanford.edu;fb.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Stanford University;Meta", "aff_unique_dep": ";Facebook AI Research", "aff_unique_url": "https://www.stanford.edu;https://research.facebook.com", "aff_unique_abbr": "Stanford;FAIR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Data-Centric Learning from Unlabeled Graphs with Diffusion Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72354", "id": "DmakwvCJ7l", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4290cccf23be59e42a575d026ccbeeb8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DmakwvCJ7l", "openreview": "https://openreview.net/forum?id=DmakwvCJ7l", "poster": "/media/PosterPDFs/NeurIPS%202023/72354.png?t=1699745598.4182913", "slides": "https://nips.cc/virtual/2023/poster/72354", "video": "https://nips.cc/virtual/2023/poster/72354", "author_site": "Gang Liu, Eric Inae, Tong Zhao, Jiaxin Xu, Tengfei Luo, Meng Jiang", "tldr": "", "abstract": "Graph property prediction tasks are important and numerous. While each task offers a small size of labeled examples, unlabeled graphs have been collected from various sources and at a large scale. A conventional approach is training a model with the unlabeled graphs on self-supervised tasks and then fine-tuning the model on the prediction tasks. However, the self-supervised task knowledge could not be aligned or sometimes conflicted with what the predictions needed. In this paper, we propose to extract the knowledge underlying the large set of unlabeled graphs as a specific set of useful data points to augment each property prediction model. We use a diffusion model to fully utilize the unlabeled graphs and design two new objectives to guide the model's denoising process with each task's labeled data to generate task-specific graph examples and their labels. Experiments demonstrate that our data-centric approach performs significantly better than fifteen existing various methods on fifteen tasks. The performance improvement brought by unlabeled data is visible as the generated labeled examples unlike the self-supervised learning.", "keywords": "Graph property prediction;Molecular property prediction;Diffusion model;Unlabeled data;Data augmentation;Transfer learning", "primary_area": "", "supplementary_material": "/attachment/95c62e0cf154f6957d053feeeb478762dc2afa9b.zip", "author": "Gang Liu;Eric Inae;Tong Zhao;Jiaxin Xu;Tengfei Luo;Meng Jiang", "authorids": "~Gang_Liu6;~Eric_Inae1;~Tong_Zhao3;~Jiaxin_Xu1;~Tengfei_Luo1;~Meng_Jiang3", "gender": "M;;M;Non-Binary;M;M", "homepage": "https://liugangcode.github.io/;;https://tzhao.io/;https://www.linkedin.com/in/jiaxin-xu-75931213a/;https://monsterlab.nd.edu;http://www.meng-jiang.com/", "dblp": "37/2109-25;342/8313;94/6503-3;76/10625;;69/339-1", "google_scholar": "zdF3vTYAAAAJ;https://scholar.google.com/citations?view_op=list_works;05cRc-MAAAAJ;https://scholar.google.com/citations?hl=zh-CN;VIiy6ugAAAAJ;LZIPfCkAAAAJ", "orcid": "0000-0003-4204-731X;0009-0002-2101-2126;0000-0001-7660-1732;0000-0001-9830-3189;;0000-0002-3009-519X", "linkedin": ";eric-inae-6056b1214/;;jiaxin-xu-75931213a/;;meng-jiang-94b10916/", "or_profile": "~Gang_Liu6;~Eric_Inae1;~Tong_Zhao3;~Jiaxin_Xu1;~Tengfei_Luo1;~Meng_Jiang3", "aff": "University of Notre Dame;University of Notre Dame;Snap Inc.;University of Notre Dame;University of Notre Dame;University of Notre Dame", "aff_domain": "nd.edu;nd.edu;snap.com;nd.edu;nd.edu;nd.edu", "position": "PhD student;PhD student;Researcher;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nliu2023datacentric,\ntitle={Data-Centric Learning from Unlabeled Graphs with Diffusion Model},\nauthor={Gang Liu and Eric Inae and Tong Zhao and Jiaxin Xu and Tengfei Luo and Meng Jiang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DmakwvCJ7l}\n}", "github": "", "project": "", "reviewers": "3gzg;trSH;T9FT;6AKL", "pdf_size": 2075052, "rating": "5;6;6;6", "confidence": "3;3;5;4", "soundness": "2;3;3;3", "novelty": "3;3;2;3", "presentation": "3;4;3;2", "wc_summary": "246;68;19;134", "wc_strengths": "43;81;50;90", "wc_weaknesses": "144;56;325;26", "wc_questions": "112;88;2;101", "wc_limitations": "1;13;1;10", "wc_review": "546;306;397;361", "wc_reply_reviewers": "24;0;225;18", "wc_reply_authors": "19;0;641;41", "reply_reviewers": "1;0;3;1", "reply_authors": "2;1;4;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 116.75, 85.0510875885782 ], "wc_strengths_avg": [ 66.0, 19.912307751739878 ], "wc_weaknesses_avg": [ 137.75, 116.48256307276209 ], "wc_questions_avg": [ 75.75, 43.41874595148966 ], "wc_limitations_avg": [ 6.25, 5.356071321407137 ], "wc_review_avg": [ 402.5, 88.96207056942863 ], "wc_reply_reviewers_avg": [ 66.75, 91.79154372816703 ], "wc_reply_authors_avg": [ 175.25, 269.29201157850935 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17230539317937863137&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "nd.edu;nd.edu;snap.com;nd.edu;nd.edu;nd.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "University of Notre Dame;Snap Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.nd.edu;https://www.snapinc.com", "aff_unique_abbr": "Notre Dame;Snap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Approximation-Generalization Trade-offs under (Approximate) Group Equivariance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72353", "id": "DnO6LTQ77U", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c35f8e2fc6d81f195009a1d2ae5f6ae9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DnO6LTQ77U", "openreview": "https://openreview.net/forum?id=DnO6LTQ77U", "poster": "/media/PosterPDFs/NeurIPS%202023/72353.png?t=1701705574.2955732", "slides": "https://nips.cc/virtual/2023/poster/72353", "video": "https://nips.cc/virtual/2023/poster/72353", "author_site": "Mircea Petrache, Shubhendu Trivedi", "tldr": "", "abstract": "The explicit incorporation of task-specific inductive biases through symmetry has emerged as a general design precept in the development of high-performance machine learning models. For example, group equivariant neural networks have demonstrated impressive performance across various domains and applications such as protein and drug design. A prevalent intuition about such models is that the integration of relevant symmetry results in enhanced generalization. Moreover, it is posited that when the data and/or the model exhibits only approximate or partial symmetry, the optimal or best-performing model is one where the model symmetry aligns with the data symmetry. In this paper, we conduct a formal unified investigation of these intuitions. To begin, we present quantitative bounds that demonstrate how models capturing task-specific symmetries lead to improved generalization. Utilizing this quantification, we examine the more general question of dealing with approximate/partial symmetries. We establish, for a given symmetry group, a quantitative comparison between the approximate equivariance of the model and that of the data distribution, precisely connecting model equivariance error and data equivariance error. Our result delineates the conditions under which the model equivariance error is optimal, thereby yielding the best-performing model for the given task and data.", "keywords": "Equivariance;Invariance;Generalization;Equivariant Neural Networks;Approximation Error", "primary_area": "", "supplementary_material": "/attachment/b8b3b73bcb2b30f20769d243e39ef4d1cade266c.zip", "author": "Mircea Petrache;Shubhendu Trivedi", "authorids": "~Mircea_Petrache1;~Shubhendu_Trivedi2", "gender": "M;M", "homepage": "https://sites.google.com/site/mircpetrache/home;https://ttic.uchicago.edu/~shubhendu/", "dblp": ";97/9735", "google_scholar": "HiYZ-6MAAAAJ;EbyGwncAAAAJ", "orcid": "0000-0003-2181-169X;", "linkedin": "mircea-petrache-4983a4104/;", "or_profile": "~Mircea_Petrache1;~Shubhendu_Trivedi1", "aff": "Pontificia Universidad Catolica de Chile;Massachusetts Institute of Technology", "aff_domain": "puc.cl;mit.edu", "position": "Assistant Professor;Research Associate", "bibtex": "@inproceedings{\npetrache2023approximationgeneralization,\ntitle={Approximation-Generalization Trade-offs under (Approximate) Group Equivariance},\nauthor={Mircea Petrache and Shubhendu Trivedi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DnO6LTQ77U}\n}", "github": "", "project": "", "reviewers": "W1Je;R2KT;5syP;y3sQ;CLQc;PxmR", "pdf_size": 664853, "rating": "4;6;7;7;7;8", "confidence": "2;3;3;4;4;2", "soundness": "2;2;3;4;4;4", "novelty": "1;3;3;3;4;4", "presentation": "1;3;3;2;3;3", "wc_summary": "103;56;88;102;190;141", "wc_strengths": "14;115;12;27;44;39", "wc_weaknesses": "37;438;52;173;111;67", "wc_questions": "129;160;14;128;28;52", "wc_limitations": "1;1;21;1;1;42", "wc_review": "284;770;187;431;374;341", "wc_reply_reviewers": "84;49;91;61;0;20", "wc_reply_authors": "336;160;163;0;0;43", "reply_reviewers": "1;1;1;1;0;1", "reply_authors": "2;2;2;1;1;2", "rating_avg": [ 6.5, 1.2583057392117916 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.1666666666666665, 0.8975274678557507 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 2.5, 0.7637626158259734 ], "wc_summary_avg": [ 113.33333333333333, 42.440808453918756 ], "wc_strengths_avg": [ 41.833333333333336, 34.76308706404283 ], "wc_weaknesses_avg": [ 146.33333333333334, 137.9464469358389 ], "wc_questions_avg": [ 85.16666666666667, 55.95955881011056 ], "wc_limitations_avg": [ 11.166666666666666, 15.603596451530725 ], "wc_review_avg": [ 397.8333333333333, 182.94572662100882 ], "wc_reply_reviewers_avg": [ 50.833333333333336, 32.54441409656792 ], "wc_reply_authors_avg": [ 117.0, 118.86967653695369 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3244428422615251, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5941419441439601359&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "puc.cl;mit.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Pontificia Universidad Catolica de Chile;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.puc.cl;https://web.mit.edu", "aff_unique_abbr": "PUC;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Chile;United States" }, { "title": "On the Implicit Bias of Linear Equivariant Steerable Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72352", "id": "DnVjDRLwVu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/136a45cd9b841bf785625709a19c6508-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DnVjDRLwVu", "openreview": "https://openreview.net/forum?id=DnVjDRLwVu", "poster": "/media/PosterPDFs/NeurIPS%202023/72352.png?t=1701624867.3072832", "slides": "https://nips.cc/virtual/2023/poster/72352", "video": "https://nips.cc/virtual/2023/poster/72352", "author_site": "Ziyu Chen, Wei Zhu", "tldr": "", "abstract": "We study the implicit bias of gradient flow on linear equivariant steerable networks in group-invariant binary classification. Our findings reveal that the parameterized predictor converges in direction to the unique group-invariant classifier with a maximum margin defined by the input group action. Under a unitary assumption on the input representation, we establish the equivalence between steerable networks and data augmentation. Furthermore, we demonstrate the improved margin and generalization bound of steerable networks over their non-invariant counterparts.", "keywords": "implicit bias;equivariant steerable networks;data augmentation;margin;generalization bound", "primary_area": "", "supplementary_material": "/attachment/3939e053e54795888d1760b27477f92c7fc1af81.pdf", "author": "Ziyu Chen;Wei Zhu", "authorids": "~Ziyu_Chen6;~Wei_Zhu1", "gender": ";M", "homepage": ";https://sites.google.com/view/weizhumath/home", "dblp": ";83/4805-7", "google_scholar": ";F3AxPYQAAAAJ", "orcid": ";0000-0002-9181-5103", "linkedin": ";", "or_profile": "~Ziyu_Chen6;~Wei_Zhu1", "aff": ";University of Massachusetts, Amherst", "aff_domain": ";umass.edu", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nchen2023on,\ntitle={On the Implicit Bias of Linear Equivariant Steerable Networks},\nauthor={Ziyu Chen and Wei Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DnVjDRLwVu}\n}", "github": "", "project": "", "reviewers": "H6K3;vUEN;nbB9;3ceq;WwNm;UfQK", "pdf_size": 384651, "rating": "5;5;6;6;6;7", "confidence": "3;3;2;1;2;4", "soundness": "2;2;4;3;3;3", "novelty": "3;3;3;4;3;3", "presentation": "3;3;2;3;3;3", "wc_summary": "61;247;66;50;89;216", "wc_strengths": "49;63;41;131;95;54", "wc_weaknesses": "71;303;90;29;43;204", "wc_questions": "213;202;244;14;7;108", "wc_limitations": "1;3;4;9;1;1", "wc_review": "395;818;445;233;235;583", "wc_reply_reviewers": "59;0;117;0;13;105", "wc_reply_authors": "9;0;0;0;0;14", "reply_reviewers": "1;0;1;0;1;2", "reply_authors": "2;1;1;1;1;2", "rating_avg": [ 5.833333333333333, 0.6871842709362768 ], "confidence_avg": [ 2.5, 0.9574271077563381 ], "soundness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "novelty_avg": [ 3.1666666666666665, 0.3726779962499649 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 121.5, 79.15122656451173 ], "wc_strengths_avg": [ 72.16666666666667, 31.370988012635 ], "wc_weaknesses_avg": [ 123.33333333333333, 98.31016676259323 ], "wc_questions_avg": [ 131.33333333333334, 94.98011487791653 ], "wc_limitations_avg": [ 3.1666666666666665, 2.852873794770615 ], "wc_review_avg": [ 451.5, 203.90500239081925 ], "wc_reply_reviewers_avg": [ 49.0, 48.232077845903895 ], "wc_reply_authors_avg": [ 3.8333333333333335, 5.610010893235611 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.6871842709362768 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.1266600992762247, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1907288818647853580&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": ";umass.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Massachusetts Amherst", "aff_unique_dep": "", "aff_unique_url": "https://www.umass.edu", "aff_unique_abbr": "UMass Amherst", "aff_campus_unique_index": "0", "aff_campus_unique": "Amherst", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Statistically Valid Variable Importance Assessment through Conditional Permutations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72351", "id": "DoE3esTIEM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d60e14c19cd6e0fc38556ad29ac8fbc9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DoE3esTIEM", "openreview": "https://openreview.net/forum?id=DoE3esTIEM", "poster": "/media/PosterPDFs/NeurIPS%202023/72351.png?t=1701826817.843895", "slides": "https://nips.cc/virtual/2023/poster/72351", "video": "https://nips.cc/virtual/2023/poster/72351", "author_site": "Ahmad CHAMMA, Denis Engemann, Bertrand Thirion", "tldr": "", "abstract": "Variable importance assessment has become a crucial step in machine-learning applications when using complex learners, such as deep neural networks, on large-scale data. Removal-based importance assessment is currently the reference approach, particularly when statistical guarantees are sought to justify variable inclusion. It is often implemented with variable permutation schemes. On the flip side, these approaches risk misidentifying unimportant variables as important in the presence of correlations among covariates. Here we develop a systematic approach for studying Conditional Permutation Importance (CPI) that is model agnostic and computationally lean, as well as reusable benchmarks of state-of-the-art variable importance estimators. We show theoretically and empirically that \\textit{CPI} overcomes the limitations of standard permutation importance by providing accurate type-I error control. When used with a deep neural network, \\textit{CPI} consistently showed top accuracy across benchmarks. An experiment on real-world data analysis in a large-scale medical dataset showed that \\textit{CPI} provides a more parsimonious selection of statistically significant variables. Our results suggest that \\textit{CPI} can be readily used as drop-in replacement for permutation-based methods.", "keywords": "Interpretability;Variable Importance;Machine Learning;Deep Learning;Statistical Inference", "primary_area": "", "supplementary_material": "/attachment/a93462a83b9e03df0d47c1af87aa3107a675be61.pdf", "author": "Ahmad Chamma;Denis Engemann;Bertrand Thirion", "authorids": "~Ahmad_Chamma1;denis.engemann@roche.com;~Bertrand_Thirion1", "gender": "M;;M", "homepage": ";;http://pages.saclay.inria.fr/bertrand.thirion", "dblp": ";;62/2019", "google_scholar": "OzxzBdcAAAAJ;;MeKi5_AAAAAJ", "orcid": ";;http:// 0000-0001-5018-7895", "linkedin": ";;", "or_profile": "~Ahmad_Chamma1;denis.engemann@roche.com;~Bertrand_Thirion1", "aff": "INRIA;;INRIA", "aff_domain": "inria.fr;;inria.fr", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nchamma2023statistically,\ntitle={Statistically Valid Variable Importance Assessment through Conditional Permutations},\nauthor={Ahmad Chamma and Denis Engemann and Bertrand Thirion},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DoE3esTIEM}\n}", "github": "", "project": "", "reviewers": "XsJL;HX9z;pFgF;2imb", "pdf_size": 4170583, "rating": "4;6;6;7", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;4;3;3", "wc_summary": "147;68;78;38", "wc_strengths": "335;71;73;107", "wc_weaknesses": "749;177;63;225", "wc_questions": "2;81;130;58", "wc_limitations": "29;95;77;26", "wc_review": "1262;492;421;454", "wc_reply_reviewers": "383;130;17;53", "wc_reply_authors": "1042;224;0;44", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;1;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.75, 39.90848907187542 ], "wc_strengths_avg": [ 146.5, 109.76679825885421 ], "wc_weaknesses_avg": [ 303.5, 263.8536526182649 ], "wc_questions_avg": [ 67.75, 46.0129057982649 ], "wc_limitations_avg": [ 56.75, 29.953088321573787 ], "wc_review_avg": [ 657.25, 350.0552634942089 ], "wc_reply_reviewers_avg": [ 145.75, 142.92895962680203 ], "wc_reply_authors_avg": [ 327.5, 420.9664475941046 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1783917627791691540&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "inria.fr;;inria.fr", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Selectively Sharing Experiences Improves Multi-Agent Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72350", "id": "DpuphOgJqh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/baa7fc022f35b6ea7b8b2a2fe60babe0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DpuphOgJqh", "openreview": "https://openreview.net/forum?id=DpuphOgJqh", "poster": "/media/PosterPDFs/NeurIPS%202023/72350.png?t=1702050054.475305", "slides": "https://nips.cc/virtual/2023/poster/72350", "video": "https://nips.cc/virtual/2023/poster/72350", "author_site": "Matthias Gerstgrasser, Tom Danino, Sarah Keren", "tldr": "", "abstract": "We present a novel multi-agent RL approach, Selective Multi-Agent Prioritized Experience Relay, in which agents share with other agents a limited number of transitions they observe during training. The intuition behind this is that even a small number of relevant experiences from other agents could help each agent learn. Unlike many other multi-agent RL algorithms, this approach allows for largely decentralized training, requiring only a limited communication channel between agents. We show that our approach outperforms baseline no-sharing decentralized training and state-of-the art multi-agent RL algorithms. Further, sharing only a small number of highly relevant experiences outperforms sharing all experiences between agents, and the performance uplift from selective experience sharing is robust across a range of hyperparameters and DQN variants.", "keywords": "multi-agent reinforcement learning;reinforcement learning;deep q learning;cooperative ai", "primary_area": "", "supplementary_material": "/attachment/5413b04f20a93b334f013c944a054d6ac40b503a.zip", "author": "Matthias Gerstgrasser;Tom Danino;Sarah Keren", "authorids": "~Matthias_Gerstgrasser1;~Tom_Danino1;~Sarah_Keren1", "gender": ";M;", "homepage": "https://matthias.gerstgrasser.net/;;https://sarahk.cs.technion.ac.il", "dblp": "182/1338;;132/0317", "google_scholar": "qEirpPYAAAAJ;HdCffXUAAAAJ;Lmco3q8AAAAJ", "orcid": ";0009-0006-0503-6063;", "linkedin": ";tom-danino-85209b161/;", "or_profile": "~Matthias_Gerstgrasser1;~Tom_Danino1;~Sarah_Keren1", "aff": "Harvard University;Technion - Israel Institute of Technology, Technion;Technion, Technion", "aff_domain": "harvard.edu;technion.ac.il;technion.ac.il", "position": "Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ngerstgrasser2023selectively,\ntitle={Selectively Sharing Experiences Improves Multi-Agent Reinforcement Learning},\nauthor={Matthias Gerstgrasser and Tom Danino and Sarah Keren},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DpuphOgJqh}\n}", "github": "", "project": "", "reviewers": "8wH6;aJ1o;3mbW;mQNR", "pdf_size": 586797, "rating": "3;4;6;6", "confidence": "5;4;3;5", "soundness": "3;2;4;4", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "62;40;21;64", "wc_strengths": "84;32;60;84", "wc_weaknesses": "99;111;183;205", "wc_questions": "84;39;8;11", "wc_limitations": "13;3;5;13", "wc_review": "342;225;277;377", "wc_reply_reviewers": "226;0;307;0", "wc_reply_authors": "721;138;977;138", "reply_reviewers": "1;0;2;0", "reply_authors": "3;2;3;2", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 46.75, 17.597940220378067 ], "wc_strengths_avg": [ 65.0, 21.42428528562855 ], "wc_weaknesses_avg": [ 149.5, 45.37345038676252 ], "wc_questions_avg": [ 35.5, 30.5 ], "wc_limitations_avg": [ 8.5, 4.55521678957215 ], "wc_review_avg": [ 305.25, 58.601941094131 ], "wc_reply_reviewers_avg": [ 133.25, 136.29265387393409 ], "wc_reply_authors_avg": [ 493.5, 366.8409055707937 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4061811972299616, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13116510320428103392&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "harvard.edu;technion.ac.il;technion.ac.il", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Harvard University;Technion - Israel Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.technion.ac.il", "aff_unique_abbr": "Harvard;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Israel" }, { "title": "Decentralized Randomly Distributed Multi-agent Multi-armed Bandit with Heterogeneous Rewards", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72349", "id": "DqfdhM64LI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ec795aeadae0b7d230fa35cbaf04c041-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DqfdhM64LI", "openreview": "https://openreview.net/forum?id=DqfdhM64LI", "poster": "/media/PosterPDFs/NeurIPS%202023/72349.png?t=1702219865.9097085", "slides": "https://nips.cc/virtual/2023/poster/72349", "video": "https://nips.cc/virtual/2023/poster/72349", "author_site": "Mengfan Xu, Diego Klabjan", "tldr": "", "abstract": "We study a decentralized multi-agent multi-armed bandit problem in which multiple clients are connected by time dependent random graphs provided by an environment. The reward distributions of each arm vary across clients and rewards are generated independently over time by an environment based on distributions that include both sub-exponential and sub-gaussian distributions. Each client pulls an arm and communicates with neighbors based on the graph provided by the environment. The goal is to minimize the overall regret of the entire system through collaborations. To this end, we introduce a novel algorithmic framework, which first provides robust simulation methods for generating random graphs using rapidly mixing markov chains or the random graph model, and then combines an averaging-based consensus approach with a newly proposed weighting technique and the upper confidence bound to deliver a UCB-type solution. Our algorithms account for the randomness in the graphs, removing the conventional doubly stochasticity assumption, and only require the knowledge of the number of clients at initialization. We derive optimal instance-dependent regret upper bounds of order $\\log{T}$ in both sub-gaussian and sub-exponential environments, and a nearly optimal instance-free regret upper bound of order $\\sqrt{T}\\log T$ up to a $\\log T$ factor. Importantly, our regret bounds hold with high probability and capture graph randomness, whereas prior works consider expected regret under assumptions and require more stringent reward distributions.", "keywords": "decentralized multi-agent MAB;heterogeneous light-tailed and heavy-tailed rewards;time dependent random graphs", "primary_area": "", "supplementary_material": "/attachment/a11e2b0853b3d2ad3f268014dbd803fa0bbea802.pdf", "author": "Mengfan Xu;Diego Klabjan", "authorids": "~Mengfan_Xu1;~Diego_Klabjan1", "gender": "F;M", "homepage": "https://mengfanxu1997.github.io/;http://dynresmanagement.com/index.html", "dblp": "205/7008;17/105", "google_scholar": "MR47V4cAAAAJ;TaQZ_VUAAAAJ", "orcid": ";0000-0003-4213-9281", "linkedin": "mengfan-xu-4ba804250/;diegoklabjan", "or_profile": "~Mengfan_Xu1;~Diego_Klabjan1", "aff": "Northwestern University, Northwestern University;Northwestern University", "aff_domain": "u.northwestern.edu;u.northwestern.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nxu2023decentralized,\ntitle={Decentralized Randomly Distributed Multi-agent Multi-armed Bandit with Heterogeneous Rewards},\nauthor={Mengfan Xu and Diego Klabjan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DqfdhM64LI}\n}", "github": "", "project": "", "reviewers": "H8V9;zM1d;3jx9;NP2Q;a3u4", "pdf_size": 3271596, "rating": "6;7;7;7;7", "confidence": "2;4;4;3;2", "soundness": "3;3;3;3;4", "novelty": "3;3;4;3;4", "presentation": "3;3;3;3;3", "wc_summary": "135;166;65;215;121", "wc_strengths": "80;50;35;48;60", "wc_weaknesses": "155;307;27;27;90", "wc_questions": "4;13;49;150;53", "wc_limitations": "1;1;1;1;19", "wc_review": "375;537;177;441;343", "wc_reply_reviewers": "107;40;17;55;24", "wc_reply_authors": "587;688;437;959;193", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;3;3;3", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 140.4, 49.62096331189068 ], "wc_strengths_avg": [ 54.6, 14.987995196156156 ], "wc_weaknesses_avg": [ 121.2, 104.29266513039161 ], "wc_questions_avg": [ 53.8, 51.812739746128074 ], "wc_limitations_avg": [ 4.6, 7.2 ], "wc_review_avg": [ 374.6, 119.01865400011881 ], "wc_reply_reviewers_avg": [ 48.6, 32.028737096551275 ], "wc_reply_authors_avg": [ 572.8, 255.073636426817 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6, 0.4898979485566356 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5590169943749476, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2984128454056775062&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "u.northwestern.edu;u.northwestern.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Northwestern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northwestern.edu", "aff_unique_abbr": "NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Navigating the Pitfalls of Active Learning Evaluation: A Systematic Framework for Meaningful Performance Assessment", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72348", "id": "Dqn715Txgl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1ed4723f12853cbd02aecb8160f5e0c9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Dqn715Txgl", "openreview": "https://openreview.net/forum?id=Dqn715Txgl", "poster": "/media/PosterPDFs/NeurIPS%202023/72348.png?t=1702040477.92891", "slides": "https://nips.cc/virtual/2023/poster/72348", "video": "https://nips.cc/virtual/2023/poster/72348", "author_site": "Carsten L\u00fcth, Till Bungert, Lukas Klein, Paul Jaeger", "tldr": "", "abstract": "Active Learning (AL) aims to reduce the labeling burden by interactively selecting the most informative samples from a pool of unlabeled data. While there has been extensive research on improving AL query methods in recent years, some studies have questioned the effectiveness of AL compared to emerging paradigms such as semi-supervised (Semi-SL) and self-supervised learning (Self-SL), or a simple optimization of classifier configurations. Thus, today\u2019s AL literature presents an inconsistent and contradictory landscape, leaving practitioners uncertain about whether and how to use AL in their tasks. In this work, we make the case that this inconsistency arises from a lack of systematic and realistic evaluation of AL methods. Specifically, we identify five key pitfalls in the current literature that reflect the delicate considerations required for AL evaluation. Further, we present an evaluation framework that overcomes these pitfalls and thus enables meaningful statements about the performance of AL methods. To demonstrate the relevance of our protocol, we present a large-scale empirical study and benchmark for image classification spanning various data sets, query methods, AL settings, and training paradigms. Our findings clarify the inconsistent picture in the literature and enable us to give hands-on recommendations for practitioners. The benchmark is hosted at https://github.com/IML-DKFZ/realistic-al.", "keywords": "Active Learning;Evaluation;Study", "primary_area": "", "supplementary_material": "/attachment/a834cdf5bae185b96d52227b943125447756c640.pdf", "author": "Carsten Tim L\u00fcth;Till J. Bungert;Lukas Klein;Paul F Jaeger", "authorids": "~Carsten_Tim_L\u00fcth1;~Till_J._Bungert1;~Lukas_Klein1;~Paul_F_Jaeger1", "gender": "M;M;;M", "homepage": ";https://lukaskln.github.io;https://pfjaeger.github.io;https://github.com/tbung", "dblp": "244/2162;;179/4749;334/4062", "google_scholar": "3L6NkggAAAAJ;https://scholar.google.de/citations?user=sxpuZg4AAAAJ;https://scholar.google.de/citations?user=9B9-8h0AAAAJ;yWaPy7AAAAAJ", "orcid": ";;;", "linkedin": "carsten-l\u00fcth-530798190/;;;", "or_profile": "~Carsten_Tim_L\u00fcth1;~Lukas_Klein1;~Paul_F_Jaeger1;~Till_Bungert1", "aff": "German Cancer Research Center;German Cancer Research Center;German Cancer Research Center;Deutsches Krebsforschungszentrum", "aff_domain": "dkfz-heidelberg.de;dkfz.de;dkfz.de;dkfz-heidelberg.de", "position": "PhD student;PhD student;Research Group Leader;PhD student", "bibtex": "@inproceedings{\nl{\\\"u}th2023navigating,\ntitle={Navigating the Pitfalls of Active Learning Evaluation: A Systematic Framework for Meaningful Performance Assessment},\nauthor={Carsten Tim L{\\\"u}th and Till J. Bungert and Lukas Klein and Paul F Jaeger},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Dqn715Txgl}\n}", "github": "", "project": "", "reviewers": "DMVM;GZfx;QpNG;D3At;WUPX", "pdf_size": 28334160, "rating": "5;5;6;7;7", "confidence": "4;2;4;4;4", "soundness": "3;3;3;3;4", "novelty": "2;2;3;3;4", "presentation": "2;2;3;2;4", "wc_summary": "75;200;37;51;142", "wc_strengths": "70;104;69;74;48", "wc_weaknesses": "214;119;182;253;105", "wc_questions": "1;63;2;249;5", "wc_limitations": "1;10;9;5;5", "wc_review": "361;496;299;632;305", "wc_reply_reviewers": "15;17;14;0;16", "wc_reply_authors": "21;21;12;0;12", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.6, 0.8000000000000002 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 101.0, 61.243775193892155 ], "wc_strengths_avg": [ 73.0, 17.95550054997075 ], "wc_weaknesses_avg": [ 174.6, 56.01642616233207 ], "wc_questions_avg": [ 64.0, 95.4148835350125 ], "wc_limitations_avg": [ 6.0, 3.22490309931942 ], "wc_review_avg": [ 418.6, 128.10714265801107 ], "wc_reply_reviewers_avg": [ 12.4, 6.280127387243033 ], "wc_reply_authors_avg": [ 13.2, 7.730459236035076 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5590169943749473, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15961447643871186727&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "dkfz-heidelberg.de;dkfz.de;dkfz.de;dkfz-heidelberg.de", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "German Cancer Research Center;Deutsches Krebsforschungszentrum", "aff_unique_dep": ";", "aff_unique_url": "https://www.dkfz.de;https://www.dkfz.de", "aff_unique_abbr": "DKFZ;DKFZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Gaussian Differential Privacy on Riemannian Manifolds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72347", "id": "DrIZZwEZtM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2f27964513a28d034530bfdd117ea31d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DrIZZwEZtM", "openreview": "https://openreview.net/forum?id=DrIZZwEZtM", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72347", "video": "https://nips.cc/virtual/2023/poster/72347", "author_site": "Yangdi Jiang, Xiaotian Chang, Yi Liu, Lei Ding, Linglong Kong, Bei Jiang", "tldr": "", "abstract": "We develop an advanced approach for extending Gaussian Differential Privacy (GDP) to general Riemannian manifolds. The concept of GDP stands out as a prominent privacy definition that strongly warrants extension to manifold settings, due to its central limit properties. By harnessing the power of the renowned Bishop-Gromov theorem in geometric analysis, we propose a Riemannian Gaussian distribution that integrates the Riemannian distance, allowing us to achieve GDP in Riemannian manifolds with bounded Ricci curvature. To the best of our knowledge, this work marks the first instance of extending the GDP framework to accommodate general Riemannian manifolds, encompassing curved spaces, and circumventing the reliance on tangent space summaries. We provide a simple algorithm to evaluate the privacy budget $\\mu$ on any one-dimensional manifold and introduce a versatile Markov Chain Monte Carlo (MCMC)-based algorithm to calculate $\\mu$ on any Riemannian manifold with constant curvature. Through simulations on one of the most prevalent manifolds in statistics, the unit sphere $S^d$, we demonstrate the superior utility of our Riemannian Gaussian mechanism in comparison to the previously proposed Riemannian Laplace mechanism for implementing GDP.", "keywords": "Differential Privacy;Gaussian Differential Privacy;Differential Geometry;Riemannian Manifold;Homogeneous Riemannian Manifold;Frechet Mean", "primary_area": "", "supplementary_material": "/attachment/258c7034ab0de5525b1dd2f27b52f946c47ada3b.zip", "author": "Yangdi Jiang;Xiaotian Chang;Yi Liu;Lei Ding;Linglong Kong;Bei Jiang", "authorids": "~Yangdi_Jiang1;~Xiaotian_Chang1;~Yi_Liu13;~Lei_Ding6;~Linglong_Kong2;~Bei_Jiang1", "gender": "M;M;M;M;M;F", "homepage": "https://yangdijiang.github.io/;https://ca.linkedin.com/in/xiaotian-chang-96b264256;https://apps.ualberta.ca/directory/person/yliu16;;https://www.ualberta.ca/~lkong;https://www.ualberta.ca/~bei1", "dblp": ";;97/4626-62;;35/8525;190/4697", "google_scholar": "https://scholar.google.ca/citations?user=OfZ985EAAAAJ;;;ICUOaR4AAAAJ;https://scholar.google.ca/citations?hl=en;https://scholar.google.ca/citations?user=MfOZ8G0AAAAJ", "orcid": ";;;;0000-0003-3011-9216;0000-0002-0033-839X", "linkedin": "https://ca.linkedin.com/in/yangdi-jiang-b50408141;;;;;", "or_profile": "~Yangdi_Jiang1;~Xiaotian_Chang1;~Yi_Liu13;~Lei_Ding6;~Linglong_Kong2;~Bei_Jiang1", "aff": "University of Alberta;;University of Alberta;University of Alberta;University of Alberta;University of Alberta", "aff_domain": "ualberta.ca;;ualberta.ca;ualberta.ca;ualberta.ca;ualberta.ca", "position": "PhD student;;PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\njiang2023gaussian,\ntitle={Gaussian Differential Privacy on Riemannian Manifolds},\nauthor={Yangdi Jiang and Xiaotian Chang and Yi Liu and Lei Ding and Linglong Kong and Bei Jiang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DrIZZwEZtM}\n}", "github": "", "project": "", "reviewers": "j3sD;12f9;vrR5;3t86", "pdf_size": 622397, "rating": "4;6;7;7", "confidence": "4;3;4;4", "soundness": "1;2;3;3", "novelty": "1;2;3;3", "presentation": "3;3;3;3", "wc_summary": "188;135;32;80", "wc_strengths": "33;34;15;73", "wc_weaknesses": "376;84;158;38", "wc_questions": "4;128;1;183", "wc_limitations": "1;2;1;1", "wc_review": "602;383;207;375", "wc_reply_reviewers": "46;27;13;106", "wc_reply_authors": "302;209;18;290", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 108.75, 58.49519211012132 ], "wc_strengths_avg": [ 38.75, 21.1704392963396 ], "wc_weaknesses_avg": [ 164.0, 129.6688088940436 ], "wc_questions_avg": [ 79.0, 78.93985051924028 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 391.75, 140.26292275580172 ], "wc_reply_reviewers_avg": [ 48.0, 35.47534355013352 ], "wc_reply_authors_avg": [ 204.75, 113.59880060986559 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17958926071080877848&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ualberta.ca;;ualberta.ca;ualberta.ca;ualberta.ca;ualberta.ca", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "The Impact of Positional Encoding on Length Generalization in Transformers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72346", "id": "Drrl2gcjzl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4e85362c02172c0c6567ce593122d31c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Drrl2gcjzl", "openreview": "https://openreview.net/forum?id=Drrl2gcjzl", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72346", "video": "https://nips.cc/virtual/2023/poster/72346", "author_site": "Amirhossein Kazemnejad, Inkit Padhi, Karthikeyan Natesan Ramamurthy, Payel Das, Siva Reddy", "tldr": "", "abstract": "Length generalization, the ability to generalize from small training context sizes to larger ones, is a critical challenge in the development of Transformer-based language models. Positional encoding (PE) has been identified as a major factor influencing length generalization, but the exact impact of different PE schemes on extrapolation in downstream tasks remains unclear. In this paper, we conduct a systematic empirical study comparing the length generalization performance of decoder-only Transformers with five different position encoding approaches including Absolute Position Embedding (APE), T5's Relative PE, ALiBi, and Rotary, in addition to Transformers without positional encoding (NoPE). Our evaluation encompasses a battery of reasoning and mathematical tasks. Our findings reveal that the most commonly used positional encoding methods, such as ALiBi, Rotary, and APE, are not well suited for length generalization in downstream tasks. More importantly, NoPE outperforms other explicit positional encoding methods while requiring no additional computation. We theoretically demonstrate that NoPE can represent both absolute and relative PEs, but when trained with SGD, it mostly resembles T5's relative PE attention patterns. Finally, we find that scratchpad is not always helpful to solve length generalization and its format highly impacts the model's performance. Overall, our work suggests that explicit position embeddings are not essential for decoder-only Transformers to generalize well to longer sequences.", "keywords": "Transformers;Positional Encoding;Length Generalization", "primary_area": "", "supplementary_material": "/attachment/f55e201689a2e8d0ebf446cc04be19b09d7dc73e.pdf", "author": "Amirhossein Kazemnejad;Inkit Padhi;Karthikeyan Natesan;Payel Das;Siva Reddy", "authorids": "~Amirhossein_Kazemnejad1;~Inkit_Padhi1;knatesan@asu.edu;~Payel_Das1;~Siva_Reddy1", "gender": ";;;F;M", "homepage": ";;;;http://sivareddy.in", "dblp": ";188/9098;;56/7926;64/8153", "google_scholar": ";https://scholar.google.co.in/citations?user=c4yuGSoAAAAJ;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Amirhossein_Kazemnejad1;~Inkit_Padhi1;knatesan@asu.edu;~Payel_Das1;~Siva_Reddy1", "aff": ";IBM Research;;IBM, International Business Machines;Mila, McGill University", "aff_domain": ";ibm.com;;us.ibm.com;mila.quebec", "position": ";Researcher;;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nkazemnejad2023the,\ntitle={The Impact of Positional Encoding on Length Generalization in Transformers},\nauthor={Amirhossein Kazemnejad and Inkit Padhi and Karthikeyan Natesan and Payel Das and Siva Reddy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Drrl2gcjzl}\n}", "github": "", "project": "", "reviewers": "C4oP;VbrS;rWd9;qZrr", "pdf_size": 1185709, "rating": "5;5;7;7", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;4", "presentation": "3;3;3;2", "wc_summary": "164;39;249;133", "wc_strengths": "71;26;138;111", "wc_weaknesses": "95;37;453;66", "wc_questions": "92;40;132;776", "wc_limitations": "49;1;1;44", "wc_review": "471;143;973;1130", "wc_reply_reviewers": "0;0;57;736", "wc_reply_authors": "47;44;33;821", "reply_reviewers": "0;0;1;2", "reply_authors": "2;2;2;4", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 146.25, 75.08453569144581 ], "wc_strengths_avg": [ 86.5, 42.28770506896774 ], "wc_weaknesses_avg": [ 162.75, 168.8259088528772 ], "wc_questions_avg": [ 260.0, 299.6931764321637 ], "wc_limitations_avg": [ 23.75, 22.818577957445115 ], "wc_review_avg": [ 679.25, 393.82634180562377 ], "wc_reply_reviewers_avg": [ 198.25, 311.3409505670592 ], "wc_reply_authors_avg": [ 236.25, 337.64580183973857 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 164, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4006212005145195153&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";ibm.com;;us.ibm.com;mila.quebec", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "IBM;International Business Machines;McGill University", "aff_unique_dep": "IBM Research;;Mila", "aff_unique_url": "https://www.ibm.com/research;https://www.ibm.com;https://www.mcgill.ca", "aff_unique_abbr": "IBM;IBM;McGill", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Canada" }, { "title": "Faster Query Times for Fully Dynamic $k$-Center Clustering with Outliers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72345", "id": "Ds7Vd83HlC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1d8e261c241aa72f9b4a02af7f52587e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ds7Vd83HlC", "openreview": "https://openreview.net/forum?id=Ds7Vd83HlC", "poster": "/media/PosterPDFs/NeurIPS%202023/72345.png?t=1699829797.5422444", "slides": "https://nips.cc/virtual/2023/poster/72345", "video": "https://nips.cc/virtual/2023/poster/72345", "author_site": "Leyla Biabani, Annika Hennes, Morteza Monemizadeh, Melanie Schmidt", "tldr": "", "abstract": "Given a point set $P\\subseteq M$ from a metric space $(M,d)$ and numbers $k, z \\in N$, the *metric $k$-center problem with $z$ outliers* is to find a set $C^\\ast\\subseteq P$ of $k$ points such that the maximum distance of all but at most $z$ outlier points of $P$ to their nearest center in ${C}^\\ast$ is minimized. We consider this problem in the fully dynamic model, i.e., under insertions and deletions of points, for the case that the metric space has a bounded doubling dimension $dim$. We utilize a hierarchical data structure to maintain the points and their neighborhoods, which enables us to efficiently find the clusters. In particular, our data structure can be queried at any time to generate a $(3+\\varepsilon)$-approximate solution for input values of $k$ and $z$ in worst-case query time $\\varepsilon^{-O(dim)}k \\log{n} \\log\\log{\\Delta}$, where $\\Delta$ is the ratio between the maximum and minimum distance between two points in $P$. Moreover, it allows insertion/deletion of a point in worst-case update time $\\varepsilon^{-O(dim)}\\log{n}\\log{\\Delta}$. Our result achieves a significantly faster query time with respect to $k$ and $z$ than the current state-of-the-art by Pellizzoni, Pietracaprina, and Pucci, which uses $\\varepsilon^{-O(dim)}(k+z)^2\\log{\\Delta}$ query time to obtain a $(3+\\varepsilon)$-approximation.", "keywords": "$k$-center clustering;outliers;dynamic algorithms", "primary_area": "", "supplementary_material": "", "author": "Leyla Biabani;Annika Hennes;Morteza Monemizadeh;Melanie Schmidt", "authorids": "~Leyla_Biabani1;~Annika_Hennes1;~Morteza_Monemizadeh1;~Melanie_Schmidt1", "gender": ";F;M;F", "homepage": "https://research.tue.nl/en/persons/leyla-biabani;;https://research.tue.nl/en/persons/morteza-monemizadeh;", "dblp": ";352/5632;11/4322.html;67/7224-1", "google_scholar": ";YdPpy6QAAAAJ;wVH7Gp4AAAAJ;https://scholar.google.com/citations?hl=de", "orcid": ";0000-0001-9109-3107;;", "linkedin": ";;;", "or_profile": "~Leyla_Biabani1;~Annika_Hennes1;~Morteza_Monemizadeh1;~Melanie_Schmidt1", "aff": "Eindhoven University of Technology;Heinrich Heine University D\u00fcsseldorf;Eindhoven University of Technology;Heinrich Heine University D\u00fcsseldorf", "aff_domain": "tue.nl;uni-duesseldorf.de;tue.nl;hhu.de", "position": "PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nbiabani2023faster,\ntitle={Faster Query Times for Fully Dynamic \\$k\\$-Center Clustering with Outliers},\nauthor={Leyla Biabani and Annika Hennes and Morteza Monemizadeh and Melanie Schmidt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ds7Vd83HlC}\n}", "github": "", "project": "", "reviewers": "o6fX;RPz3;bgty;JTLo;k1og", "pdf_size": 471197, "rating": "6;6;6;7;7", "confidence": "4;3;5;3;3", "soundness": "4;3;4;3;3", "novelty": "3;3;4;3;3", "presentation": "4;2;3;3;4", "wc_summary": "198;173;251;276;205", "wc_strengths": "86;156;12;37;32", "wc_weaknesses": "171;134;11;27;70", "wc_questions": "184;154;32;1;39", "wc_limitations": "17;14;8;1;1", "wc_review": "656;631;314;342;347", "wc_reply_reviewers": "103;14;26;0;2", "wc_reply_authors": "101;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 220.6, 37.45183573604904 ], "wc_strengths_avg": [ 64.6, 51.77489739246231 ], "wc_weaknesses_avg": [ 82.6, 61.37295821451008 ], "wc_questions_avg": [ 82.0, 72.79835162969007 ], "wc_limitations_avg": [ 8.2, 6.554387843269576 ], "wc_review_avg": [ 458.0, 152.08287214541946 ], "wc_reply_reviewers_avg": [ 29.0, 38.157568056677825 ], "wc_reply_authors_avg": [ 20.2, 40.4 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6123724356957945, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13725426240507102935&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "tue.nl;uni-duesseldorf.de;tue.nl;hhu.de", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Eindhoven University of Technology;Heinrich Heine University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tue.nl;https://www.hhu.de", "aff_unique_abbr": "TU/e;HHU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";D\u00fcsseldorf", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Netherlands;Germany" }, { "title": "Ecosystem-level Analysis of Deployed Machine Learning Reveals Homogeneous Outcomes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72344", "id": "Ds8iLujo3p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a0b1082fc7823c4c68abcab4fa850e9c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ds8iLujo3p", "openreview": "https://openreview.net/forum?id=Ds8iLujo3p", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72344", "video": "https://nips.cc/virtual/2023/poster/72344", "author_site": "Connor Toups, Rishi Bommasani, Kathleen Creel, Sarah Bana, Dan Jurafsky, Percy Liang", "tldr": "", "abstract": "Machine learning is traditionally studied at the model level: researchers measure and improve the accuracy, robustness, bias, efficiency, and other dimensions of specific models. In practice, however, the societal impact of any machine learning model is partially determined by the context into which it is deployed. To capture this, we introduce *ecosystem-level analysis:* rather than analyzing a single model, we consider the collection of models that are deployed in a given context. For example, ecosystem-level analysis in hiring recognizes that a job candidate\u2019s outcomes are determined not only by a single hiring algorithm or firm but instead by the collective decisions of all the firms to which the candidate applied. Across three modalities (text, images, speech) and 11 datasets, we establish a clear trend: deployed machine learning is prone to systemic failure, meaning some users are exclusively misclassified by all models available. Even when individual models improve at the population level over time, we find these improvements rarely reduce the prevalence of systemic failure. Instead, the benefits of these improvements predominantly accrue to individuals who are already correctly classified by other models. In light of these trends, we analyze medical imaging for dermatology, a setting where the costs of systemic failure are especially high. While traditional analyses reveal that both models and humans exhibit racial performance disparities, ecosystem-level analysis reveals new forms of racial disparity in model predictions that do not present in human predictions. These examples demonstrate that ecosystem-level analysis has unique strengths in characterizing the societal impact of machine learning.", "keywords": "homogenous outcomes;societal impact of ML;deployed ML;systemic failure", "primary_area": "", "supplementary_material": "/attachment/4a5970a91d6740f771f866e839299921a16e1440.zip", "author": "Connor Toups;Rishi Bommasani;Kathleen Creel;Sarah H Bana;Dan Jurafsky;Percy Liang", "authorids": "~Connor_Toups1;~Rishi_Bommasani1;~Kathleen_Creel1;~Sarah_H_Bana1;~Dan_Jurafsky1;~Percy_Liang1", "gender": "M;M;F;F;M;", "homepage": ";https://rishibommasani.github.io/;https://kathleenacreel.com;http://www.sarahbana.com;http://web.stanford.edu/~jurafsky/;https://cs.stanford.edu/~pliang/", "dblp": "277/6751;245/8673;249/6794;;31/985;04/1701", "google_scholar": ";WMBXw1EAAAAJ;qMIT0dcAAAAJ;Ac7-JAQAAAAJ;uZg9l58AAAAJ;pouyVyUAAAAJ", "orcid": ";;0000-0001-7371-2680;;;", "linkedin": "connortoups/;;;;;", "or_profile": "~Connor_Toups1;~Rishi_Bommasani1;~Kathleen_Creel1;~Sarah_H_Bana1;~Dan_Jurafsky1;~Percy_Liang1", "aff": "Stanford University;Stanford University;Northeastern University;Chapman University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;northeastern.edu;chapman.edu;stanford.edu;stanford.edu", "position": "Undergrad student;PhD student;Assistant Professor;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ntoups2023ecosystemlevel,\ntitle={Ecosystem-level Analysis of Deployed Machine Learning Reveals Homogeneous Outcomes},\nauthor={Connor Toups and Rishi Bommasani and Kathleen Creel and Sarah H Bana and Dan Jurafsky and Percy Liang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ds8iLujo3p}\n}", "github": "", "project": "", "reviewers": "N5w8;kuru;HXE5;KctL", "pdf_size": 6205890, "rating": "5;5;6;8", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;4", "presentation": "4;3;4;4", "wc_summary": "83;86;180;131", "wc_strengths": "64;29;108;135", "wc_weaknesses": "237;423;130;254", "wc_questions": "19;2;84;154", "wc_limitations": "1;2;9;61", "wc_review": "404;542;511;735", "wc_reply_reviewers": "97;133;21;87", "wc_reply_authors": "119;88;12;45", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 120.0, 39.515819616958474 ], "wc_strengths_avg": [ 84.0, 40.626346131543755 ], "wc_weaknesses_avg": [ 261.0, 104.91663357161246 ], "wc_questions_avg": [ 64.75, 59.930689133364716 ], "wc_limitations_avg": [ 18.25, 24.873429598670143 ], "wc_review_avg": [ 548.0, 119.4884931698446 ], "wc_reply_reviewers_avg": [ 84.5, 40.4567670482949 ], "wc_reply_authors_avg": [ 66.0, 40.773766075750224 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9428090415820632, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2180048595206828019&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "stanford.edu;stanford.edu;northeastern.edu;chapman.edu;stanford.edu;stanford.edu", "author_num": 6, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "Stanford University;Northeastern University;Chapman University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.northeastern.edu;https://www.chapman.edu", "aff_unique_abbr": "Stanford;NEU;Chapman", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Curvature Filtrations for Graph Generative Model Evaluation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72343", "id": "Dt71xKyabn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c710d6b4507e70c6332bee871b8d1ca5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Dt71xKyabn", "openreview": "https://openreview.net/forum?id=Dt71xKyabn", "poster": "/media/PosterPDFs/NeurIPS%202023/72343.png?t=1702210378.3232613", "slides": "https://nips.cc/virtual/2023/poster/72343", "video": "https://nips.cc/virtual/2023/poster/72343", "author_site": "Joshua Southern, Jeremy Wayland, Michael Bronstein, Bastian Rieck", "tldr": "", "abstract": "Graph generative model evaluation necessitates understanding differences between graphs on the distributional level. This entails being able to harness salient attributes of graphs in an efficient manner. Curvature constitutes one such property of graphs, and has recently started to prove useful in characterising graphs. Its expressive properties, stability, and practical utility in model evaluation remain largely unexplored, however. We combine graph curvature descriptors with emerging methods from topological data analysis to obtain robust, expressive descriptors for evaluating graph generative models.", "keywords": "Curvature;topology;persistent homology;graph learning;generative model;machine learning;geometric deep learning", "primary_area": "", "supplementary_material": "/attachment/7dfc24c6b46969931ed7ab1e29d4b4341891aeb4.zip", "author": "Joshua Southern;Jeremy Wayland;Michael M. Bronstein;Bastian Rieck", "authorids": "~Joshua_Southern1;~Jeremy_Wayland1;~Michael_M._Bronstein1;~Bastian_Rieck1", "gender": "M;M;M;M", "homepage": ";https://jeremy-wayland.me/;http://www.inf.usi.ch/bronstein/;https://bastian.rieck.me", "dblp": ";;07/2668;119/8860", "google_scholar": "diG9smQAAAAJ;FucrCk8AAAAJ;UU3N6-UAAAAJ;https://scholar.google.ch/citations?user=La7zuKQAAAAJ", "orcid": ";0000-0002-8766-8737;;0000-0003-4335-0302", "linkedin": ";jeremy-wayland/;mbronstein/;br-ml/", "or_profile": "~Joshua_Southern1;~Jeremy_Wayland1;~Michael_M._Bronstein1;~Bastian_Rieck1", "aff": "Imperial College London, Imperial College London;Technische Universit\u00e4t M\u00fcnchen;Twitter;Helmholtz Zentrum M\u00fcnchen", "aff_domain": "imperial.ac.uk;tum.de;twitter.com;helmholtz-munich.de", "position": "PhD student;PhD student;Head of Graph ML;Principal Investigator", "bibtex": "@inproceedings{\nsouthern2023curvature,\ntitle={Curvature Filtrations for Graph Generative Model Evaluation},\nauthor={Joshua Southern and Jeremy Wayland and Michael M. Bronstein and Bastian Rieck},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Dt71xKyabn}\n}", "github": "", "project": "", "reviewers": "juxy;YxdW;4GUL", "pdf_size": 528830, "rating": "5;6;7", "confidence": "3;4;3", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "46;55;55", "wc_strengths": "28;29;72", "wc_weaknesses": "250;25;100", "wc_questions": "6;369;103", "wc_limitations": "32;14;13", "wc_review": "362;492;343", "wc_reply_reviewers": "1994;11;39", "wc_reply_authors": "2744;53;16", "reply_reviewers": "8;1;1", "reply_authors": "9;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 52.0, 4.242640687119285 ], "wc_strengths_avg": [ 43.0, 20.51016008388688 ], "wc_weaknesses_avg": [ 125.0, 93.54143466934853 ], "wc_questions_avg": [ 159.33333333333334, 153.45430010990967 ], "wc_limitations_avg": [ 19.666666666666668, 8.73053390247253 ], "wc_review_avg": [ 399.0, 66.21681558838863 ], "wc_reply_reviewers_avg": [ 681.3333333333334, 928.2658862392582 ], "wc_reply_authors_avg": [ 937.6666666666666, 1277.3598640251 ], "reply_reviewers_avg": [ 3.3333333333333335, 3.2998316455372216 ], "reply_authors_avg": [ 4.333333333333333, 3.299831645537222 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12436326634462816741&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "imperial.ac.uk;tum.de;twitter.com;helmholtz-munich.de", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Imperial College London;Technische Universit\u00e4t M\u00fcnchen;Twitter, Inc.;Helmholtz Zentrum M\u00fcnchen", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.imperial.ac.uk;https://www.tum.de;https://twitter.com;https://www.helmholtz-muenchen.de", "aff_unique_abbr": "ICL;TUM;Twitter;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "United Kingdom;Germany;United States" }, { "id": "DvRTU1whxF", "title": "Retinotopy Inspired Brain Encoding Model and the All-for-One Training Recipe", "track": "main", "status": "Reject", "tldr": "", "abstract": "Brain encoding models aim to predict brain voxel-wise responses to stimuli images, replicating brain signals captured by neuroimaging techniques. There is a large volume of publicly available data, but training a comprehensive brain encoding model is challenging. The main difficulties stem from a) diversity within individual brain, with functional heterogeneous brain regions; b) diversity of brains from different subjects, due to genetic and developmental differences; c) diversity of imaging modalities and processing pipelines.\nWe use this diversity to our advantage by introducing the All-for-One training recipe, which divides the challenging one-big-model problem into multiple small models, with the small models aggregating the knowledge while preserving the distinction between the different functional regions (ROI). \nAgnostic of the training recipe, we use biological domain knowledge of the brain, specifically retinotopy, to introduce inductive bias to learn a 3D brain-to-image mapping that ensures a) each neuron knows which image regions and semantic levels to gather information, and b) no neurons are left behind in the model (starvation). \nWe pre-trained a brain encoding model using over one million datapoints from five public datasets spanning three imaging modalities. To the best of our knowledge, this is the most comprehensive brain encoding model to the date. We demonstrate the effectiveness of the pre-trained model as a drop-in replacement for commonly used vision backbone models. Furthermore, we demonstrate the application of the pre-trained model to brain decoding. Code and the model checkpoint will be made available. ", "keywords": "brain encoding model;computer vision;neuroscience", "primary_area": "", "supplementary_material": "/attachment/572b15fe0c3254d03c42bf92d3d82a9c8ce4c9b4.zip", "author": "Huzheng Yang;Jianbo Shi;James Gee", "authorids": "~Huzheng_Yang1;~Jianbo_Shi1;~James_Gee1", "gender": ";M;M", "homepage": "https://huzeyann.github.io/;http://www.cs.cmu.edu/~jshi/;https://www.med.upenn.edu/apps/faculty/index.php/g5455356/p10656", "dblp": "250/3888.html;71/3879;30/6904", "google_scholar": "8yVLKyYAAAAJ;;https://scholar.google.com.tw/citations?user=fU8fmEIAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Huzheng_Yang1;~Jianbo_Shi1;~James_Gee1", "aff": "University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;upenn.edu;upenn.edu", "position": "PhD student;Professor;Full Professor", "bibtex": "@misc{\nyang2023retinotopy,\ntitle={Retinotopy Inspired Brain Encoding Model and the All-for-One Training Recipe},\nauthor={Huzheng Yang and Jianbo Shi and James Gee},\nyear={2023},\nurl={https://openreview.net/forum?id=DvRTU1whxF}\n}", "github": "", "project": "", "reviewers": "1xvD;n3y9;HdGa;ARqu;CWUX", "site": "https://openreview.net/forum?id=DvRTU1whxF", "pdf_size": 14179886, "rating": "4;4;5;6;6", "confidence": "4;2;3;3;2", "soundness": "2;2;3;2;3", "novelty": "3;2;2;3;2", "presentation": "2;3;3;2;4", "wc_summary": "110;115;202;79;92", "wc_strengths": "6;112;93;36;88", "wc_weaknesses": "6;117;721;87;19", "wc_questions": "233;7;5;87;250", "wc_limitations": "6;15;9;6;6", "wc_review": "361;366;1030;295;455", "wc_reply_reviewers": "71;0;117;37;34", "wc_reply_authors": "29;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.0, 0.8944271909999159 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 119.6, 43.158313220050665 ], "wc_strengths_avg": [ 67.0, 39.55755300824357 ], "wc_weaknesses_avg": [ 190.0, 268.6990882009092 ], "wc_questions_avg": [ 116.4, 106.47741544571787 ], "wc_limitations_avg": [ 8.4, 3.4985711369071804 ], "wc_review_avg": [ 501.4, 269.1531905811261 ], "wc_reply_reviewers_avg": [ 51.8, 39.59494917284274 ], "wc_reply_authors_avg": [ 5.8, 11.6 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2988071523335984, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bJPuXoBA5OoJ:scholar.google.com/&scioq=Retinotopy+Inspired+Brain+Encoding+Model+and+the+All-for-One+Training+Recipe&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Distribution Learnability and Robustness", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72342", "id": "Dx99y3okbL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a5321f64005b0d4a94d0b18e84e19f48-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Dx99y3okbL", "openreview": "https://openreview.net/forum?id=Dx99y3okbL", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72342", "video": "https://nips.cc/virtual/2023/poster/72342", "author_site": "Shai Ben-David, Alex Bie, Gautam Kamath, Tosca Lechner", "tldr": "", "abstract": "We examine the relationship between learnability and robust learnability for the problem of distribution learning.\nWe show that learnability implies robust learnability if the adversary can only perform additive contamination (and consequently, under Huber contamination), but not if the adversary is allowed to perform subtractive contamination. \nThus, contrary to other learning settings (e.g., PAC learning of function classes), realizable learnability does not imply agnostic learnability. \nWe also explore related implications in the context of compression schemes and differentially private learnability.", "keywords": "robustness;distribution learning", "primary_area": "", "supplementary_material": "/attachment/f2b9385c9c53ead7aece9cf406de01cabac60b45.pdf", "author": "Shai Ben-David;Alex Bie;Gautam Kamath;Tosca Lechner", "authorids": "~Shai_Ben-David2;~Alex_Bie1;~Gautam_Kamath1;~Tosca_Lechner1", "gender": "M;;M;F", "homepage": "https://cs.uwaterloo.ca/~shai/;;http://www.gautamkamath.com/;https://toscalechner.github.io/", "dblp": "15/6319;;73/11140;267/6485.html", "google_scholar": "https://scholar.google.com.tw/citations?user=kezPqwoAAAAJ;;MK6zHkYAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Shai_Ben-David2;~Alex_Bie1;~Gautam_Kamath1;~Tosca_Lechner1", "aff": "University of Waterloo;;University of Waterloo;University of Waterloo", "aff_domain": "uwaterloo.ca;;uwaterloo.ca;uwaterloo.ca", "position": "Full Professor;;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nben-david2023distribution,\ntitle={Distribution Learnability and Robustness},\nauthor={Shai Ben-David and Alex Bie and Gautam Kamath and Tosca Lechner},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Dx99y3okbL}\n}", "github": "", "project": "", "reviewers": "2DNr;1ajk;Xv71", "pdf_size": 455814, "rating": "6;6;7", "confidence": "3;3;5", "soundness": "4;3;3", "novelty": "3;3;3", "presentation": "2;4;4", "wc_summary": "177;177;58", "wc_strengths": "136;87;35", "wc_weaknesses": "826;114;1", "wc_questions": "2;154;96", "wc_limitations": "1;1;1", "wc_review": "1142;533;191", "wc_reply_reviewers": "139;59;4", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 137.33333333333334, 56.09713797413277 ], "wc_strengths_avg": [ 86.0, 41.23913998456644 ], "wc_weaknesses_avg": [ 313.6666666666667, 365.1997931117827 ], "wc_questions_avg": [ 84.0, 62.63119563497624 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 622.0, 393.3115813194419 ], "wc_reply_reviewers_avg": [ 67.33333333333333, 55.42763049438631 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11155087850298890925&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "uwaterloo.ca;;uwaterloo.ca;uwaterloo.ca", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Convergence Analysis of Sequential Federated Learning on Heterogeneous Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72341", "id": "Dxhv8Oja2V", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b18e5d6a10ba57d5273871f38189f062-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Dxhv8Oja2V", "openreview": "https://openreview.net/forum?id=Dxhv8Oja2V", "poster": "/media/PosterPDFs/NeurIPS%202023/72341.png?t=1699240250.8774185", "slides": "https://nips.cc/virtual/2023/poster/72341", "video": "https://nips.cc/virtual/2023/poster/72341", "author_site": "Yipeng Li, Xinchen Lyu", "tldr": "", "abstract": "There are two categories of methods in Federated Learning (FL) for joint training across multiple clients: i) parallel FL (PFL), where clients train models in a parallel manner; and ii) sequential FL (SFL), where clients train models in a sequential manner. In contrast to that of PFL, the convergence theory of SFL on heterogeneous data is still lacking. In this paper, we establish the convergence guarantees of SFL for strongly/general/non-convex objectives on heterogeneous data. The convergence guarantees of SFL are better than that of PFL on heterogeneous data with both full and partial client participation. Experimental results validate the counterintuitive analysis result that SFL outperforms PFL on extremely heterogeneous data in cross-device settings.", "keywords": "Federated Learning;Convergence analysis", "primary_area": "", "supplementary_material": "", "author": "Yipeng Li;Xinchen Lyu", "authorids": "~Yipeng_Li1;~Xinchen_Lyu1", "gender": "M;M", "homepage": "https://liyipeng00.github.io/;", "dblp": ";179/9891", "google_scholar": ";https://scholar.google.com.hk/citations?user=eZOOkb4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Yipeng_Li1;~Xinchen_Lyu1", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications", "aff_domain": "bupt.edu.cn;bupt.edu.cn", "position": "MS student;Associate Professor", "bibtex": "@inproceedings{\nli2023convergence,\ntitle={Convergence Analysis of Sequential Federated Learning on Heterogeneous Data},\nauthor={Yipeng Li and Xinchen Lyu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Dxhv8Oja2V}\n}", "github": "", "project": "", "reviewers": "98XT;K7U9;4Dha;D6Q7", "pdf_size": 8444381, "rating": "5;6;6;7", "confidence": "3;3;4;4", "soundness": "4;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "162;157;178;102", "wc_strengths": "64;81;146;55", "wc_weaknesses": "307;139;181;518", "wc_questions": "10;135;75;129", "wc_limitations": "1;1;3;1", "wc_review": "544;513;583;805", "wc_reply_reviewers": "10;27;137;139", "wc_reply_authors": "0;0;495;267", "reply_reviewers": "1;1;1;2", "reply_authors": "1;1;2;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 149.75, 28.63891583143468 ], "wc_strengths_avg": [ 86.5, 35.59845502265512 ], "wc_weaknesses_avg": [ 286.25, 147.39296964238153 ], "wc_questions_avg": [ 87.25, 50.35064547749115 ], "wc_limitations_avg": [ 1.5, 0.8660254037844386 ], "wc_review_avg": [ 611.25, 114.57830291988095 ], "wc_reply_reviewers_avg": [ 78.25, 60.05570330951091 ], "wc_reply_authors_avg": [ 190.5, 206.85320882210166 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12092451718796751131&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "bupt.edu.cn;bupt.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Modelling Cellular Perturbations with the Sparse Additive Mechanism Shift Variational Autoencoder", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72340", "id": "DzaCE00jGV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0001ca33ba34ce0351e4612b744b3936-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=DzaCE00jGV", "openreview": "https://openreview.net/forum?id=DzaCE00jGV", "poster": "/media/PosterPDFs/NeurIPS%202023/72340.png?t=1702275309.0474246", "slides": "https://nips.cc/virtual/2023/poster/72340", "video": "https://nips.cc/virtual/2023/poster/72340", "author_site": "Michael Bereket, Theofanis Karaletsos", "tldr": "", "abstract": "Generative models of observations under interventions have been a vibrant topic of interest across machine learning and the sciences in recent years. For example, in drug discovery, there is a need to model the effects of diverse interventions on cells in order to characterize unknown biological mechanisms of action. We propose the Sparse Additive Mechanism Shift Variational Autoencoder, SAMS-VAE, to combine compositionality, disentanglement, and interpretability for perturbation models. SAMS-VAE models the latent state of a perturbed sample as the sum of a local latent variable capturing sample-specific variation and sparse global variables of latent intervention effects. Crucially, SAMS-VAE sparsifies these global latent variables for individual perturbations to identify disentangled, perturbation-specific latent subspaces that are flexibly composable. We evaluate SAMS-VAE both quantitatively and qualitatively on a range of tasks using two popular single cell sequencing datasets.\nIn order to measure perturbation-specific model-properties, we also introduce a framework for evaluation of perturbation models based on average treatment effects with links to posterior predictive checks. SAMS-VAE outperforms comparable models in terms of generalization across in-distribution and out-of-distribution tasks, including a combinatorial reasoning task under resource paucity, and yields interpretable latent structures which correlate strongly to known biological mechanisms. \nOur results suggest SAMS-VAE is an interesting addition to the modeling toolkit for machine learning-driven scientific discovery.", "keywords": "Disentagled representation learning;VAE;generative models;sparse mechanism shift;perturbation modeling;cellular modeling", "primary_area": "", "supplementary_material": "/attachment/7d29e2f46192e563a5cdf588ef711c19ae9a77f8.pdf", "author": "Michael Bereket;Theofanis Karaletsos", "authorids": "~Michael_Bereket1;~Theofanis_Karaletsos1", "gender": ";M", "homepage": ";http://karaletsos.com/", "dblp": ";31/11191", "google_scholar": ";zrxafGsAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Michael_Bereket1;~Theofanis_Karaletsos1", "aff": ";Insitro", "aff_domain": ";insitro.com", "position": ";VP of ML", "bibtex": "@inproceedings{\nbereket2023modelling,\ntitle={Modelling Cellular Perturbations with the Sparse Additive Mechanism Shift Variational Autoencoder},\nauthor={Michael Bereket and Theofanis Karaletsos},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=DzaCE00jGV}\n}", "github": "", "project": "", "reviewers": "TMYT;Aahc;y8vh;9yRM", "pdf_size": 4516021, "rating": "4;5;6;6", "confidence": "3;3;2;4", "soundness": "2;3;2;3", "novelty": "2;2;2;3", "presentation": "3;2;3;2", "wc_summary": "74;93;151;56", "wc_strengths": "60;41;86;107", "wc_weaknesses": "284;174;256;86", "wc_questions": "29;85;2;136", "wc_limitations": "43;6;14;1", "wc_review": "490;399;509;386", "wc_reply_reviewers": "196;254;87;0", "wc_reply_authors": "217;193;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 93.5, 35.68262882692361 ], "wc_strengths_avg": [ 73.5, 25.084855989221865 ], "wc_weaknesses_avg": [ 200.0, 77.23988606931007 ], "wc_questions_avg": [ 63.0, 51.696228102251325 ], "wc_limitations_avg": [ 16.0, 16.263455967290593 ], "wc_review_avg": [ 446.0, 54.115616969595756 ], "wc_reply_reviewers_avg": [ 134.25, 97.99075211467662 ], "wc_reply_authors_avg": [ 102.5, 102.85061983284301 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12988170514486616612&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";insitro.com", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Insitro", "aff_unique_dep": "", "aff_unique_url": "https://www.insitro.com", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Federated Conditional Stochastic Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72339", "id": "E0Gw1uz7lU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1229eaae5bf1db93e1e4c539258eb472-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=E0Gw1uz7lU", "openreview": "https://openreview.net/forum?id=E0Gw1uz7lU", "poster": "/media/PosterPDFs/NeurIPS%202023/72339.png?t=1701375025.818368", "slides": "https://nips.cc/virtual/2023/poster/72339", "video": "https://nips.cc/virtual/2023/poster/72339", "author_site": "Xidong Wu, Jianhui Sun, Zhengmian Hu, Junyi Li, Aidong Zhang, Heng Huang", "tldr": "", "abstract": "Conditional stochastic optimization has found applications in a wide range of machine learning tasks, such as invariant learning, AUPRC maximization, and meta-learning. As the demand for training models with large-scale distributed data grows in these applications, there is an increasing need for communication-efficient distributed optimization algorithms, such as federated learning algorithms. This paper considers the nonconvex conditional stochastic optimization in federated learning and proposes the first federated conditional stochastic optimization algorithm (FCSG) with a conditional stochastic gradient estimator and a momentum-based algorithm (\\emph{i.e.}, FCSG-M). To match the lower bound complexity in the single-machine setting, we design an accelerated algorithm (Acc-FCSG-M) via the variance reduction to achieve the best sample and communication complexity. Compared with the existing optimization analysis for Meta-Learning in FL, federated conditional stochastic optimization considers the sample of tasks. Extensive experimental results on various tasks validate the efficiency of these algorithms.", "keywords": "Federated Learning;Conditional Stochastic Optimization;Nonconvex Optimization", "primary_area": "", "supplementary_material": "/attachment/1569c3a4c31cf4c9e0b7442610ae5436a12fc880.pdf", "author": "Xidong Wu;Jianhui Sun;Zhengmian Hu;Junyi Li;Aidong Zhang;Heng Huang", "authorids": "~Xidong_Wu1;~Jianhui_Sun1;~Zhengmian_Hu1;~Junyi_Li1;~Aidong_Zhang2;~Heng_Huang1", "gender": "Non-Binary;;M;M;F;M", "homepage": "https://www.linkedin.com/in/xidong-wu-22924112b/;https://jsycsjh.github.io/;https://www.umd.edu/;;https://engineering.virginia.edu/faculty/aidong-zhang;https://www.cs.umd.edu/~heng/", "dblp": "37/10581;207/9364;285/4945;;z/AidongZhang.html;03/281", "google_scholar": "rj21L7sAAAAJ;https://scholar.google.com/citations?hl=en;4eXiWWgAAAAJ;MzvZSs0AAAAJ;O8XxkE4AAAAJ;4OqLaDwAAAAJ", "orcid": ";;0000-0003-0316-146X;;0000-0001-9723-3246;", "linkedin": ";jianhui-sun-76a722a6/;;;;", "or_profile": "~Xidong_Wu1;~Jianhui_Sun1;~Zhengmian_Hu1;~Junyi_Li1;~Aidong_Zhang2;~Heng_Huang1", "aff": "University of Pittsburgh;University of Virginia;University of Pittsburgh;University of Pittsburgh;University of Virginia;University of Pittsburgh", "aff_domain": "pitt.edu;virginia.edu;pitt.edu;pitt.edu;virginia.edu;pitt.edu", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwu2023federated,\ntitle={Federated Conditional Stochastic Optimization},\nauthor={Xidong Wu and Jianhui Sun and Zhengmian Hu and Junyi Li and Aidong Zhang and Heng Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=E0Gw1uz7lU}\n}", "github": "", "project": "", "reviewers": "tZ8Z;w7D4;jBXN;VagV;pkoz;L363", "pdf_size": 1447438, "rating": "5;5;6;6;7;7", "confidence": "4;2;3;2;3;4", "soundness": "3;3;3;3;3;4", "novelty": "2;3;2;3;3;3", "presentation": "3;3;3;3;3;3", "wc_summary": "61;47;123;88;73;53", "wc_strengths": "32;26;61;79;46;21", "wc_weaknesses": "192;96;318;175;74;31", "wc_questions": "2;1;50;2;14;25", "wc_limitations": "2;1;29;4;10;3", "wc_review": "289;171;581;348;217;133", "wc_reply_reviewers": "131;16;192;266;21;29", "wc_reply_authors": "138;72;825;572;501;6", "reply_reviewers": "1;1;2;2;1;1", "reply_authors": "4;3;4;3;3;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.16666666666667, 25.615208156267027 ], "wc_strengths_avg": [ 44.166666666666664, 20.472881792480727 ], "wc_weaknesses_avg": [ 147.66666666666666, 94.36925111261377 ], "wc_questions_avg": [ 15.666666666666666, 17.594190960528863 ], "wc_limitations_avg": [ 8.166666666666666, 9.753916592266355 ], "wc_review_avg": [ 289.8333333333333, 148.42777892145241 ], "wc_reply_reviewers_avg": [ 109.16666666666667, 95.58140800153319 ], "wc_reply_authors_avg": [ 352.3333333333333, 299.51108308189345 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.1666666666666665, 0.6871842709362768 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.25000000000000006, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17100333605699769750&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "pitt.edu;virginia.edu;pitt.edu;pitt.edu;virginia.edu;pitt.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;1;0", "aff_unique_norm": "University of Pittsburgh;University of Virginia", "aff_unique_dep": ";", "aff_unique_url": "https://www.pitt.edu;https://www.virginia.edu", "aff_unique_abbr": "Pitt;UVA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "VeriX: Towards Verified Explainability of Deep Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72338", "id": "E2TJI6CKm0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/46907c2ff9fafd618095161d76461842-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=E2TJI6CKm0", "openreview": "https://openreview.net/forum?id=E2TJI6CKm0", "poster": "/media/PosterPDFs/NeurIPS%202023/72338.png?t=1697753142.5606139", "slides": "https://nips.cc/virtual/2023/poster/72338", "video": "https://nips.cc/virtual/2023/poster/72338", "author_site": "Min Wu, Haoze Wu, Clark Barrett", "tldr": "", "abstract": "We present **VeriX** (**Veri**fied e**X**plainability), a system for producing *optimal robust explanations* and generating *counterfactuals* along decision boundaries of machine learning models. We build such explanations and counterfactuals iteratively using constraint solving techniques and a heuristic based on feature-level sensitivity ranking. We evaluate our method on image recognition benchmarks and a real-world scenario of autonomous aircraft taxiing.", "keywords": "trustworthy machine learning;deep neural networks;explainability;interpretability;formal methods;automated verification", "primary_area": "", "supplementary_material": "/attachment/946307067320c55816a753da49c775bc7b82af2d.pdf", "author": "Min Wu;Haoze Wu;Clark Barrett", "authorids": "~Min_Wu7;~Haoze_Wu1;~Clark_Barrett1", "gender": ";;M", "homepage": ";https://wu-haoze.github.io/;http://theory.stanford.edu/~barrett", "dblp": ";195/8725-1;b/ClarkWBarrett", "google_scholar": ";uKegaWoAAAAJ;https://scholar.google.com.tw/citations?user=BtwmZfQAAAAJ", "orcid": ";;0000-0002-9522-3084", "linkedin": ";;clark-barrett-a5b157/", "or_profile": "~Min_Wu7;~Haoze_Wu1;~Clark_Barrett1", "aff": ";Stanford University;Stanford University", "aff_domain": ";stanford.edu;stanford.edu", "position": ";PhD student;Professor (Research)", "bibtex": "@inproceedings{\nwu2023verix,\ntitle={VeriX: Towards Verified Explainability of Deep Neural Networks},\nauthor={Min Wu and Haoze Wu and Clark Barrett},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=E2TJI6CKm0}\n}", "github": "", "project": "", "reviewers": "VvcV;zc3P;oNV3;Yjtz", "pdf_size": 2733210, "rating": "5;5;7;7", "confidence": "5;4;4;3", "soundness": "3;4;4;3", "novelty": "2;2;3;3", "presentation": "3;2;4;3", "wc_summary": "175;52;208;119", "wc_strengths": "132;34;61;93", "wc_weaknesses": "237;171;250;70", "wc_questions": "164;69;103;49", "wc_limitations": "281;22;13;53", "wc_review": "989;348;635;384", "wc_reply_reviewers": "275;0;426;0", "wc_reply_authors": "492;0;647;0", "reply_reviewers": "2;0;2;0", "reply_authors": "3;1;3;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 138.5, 59.21359641163506 ], "wc_strengths_avg": [ 80.0, 36.571847095819486 ], "wc_weaknesses_avg": [ 182.0, 71.26359519418031 ], "wc_questions_avg": [ 96.25, 43.61980628109208 ], "wc_limitations_avg": [ 92.25, 109.98039598037461 ], "wc_review_avg": [ 589.0, 256.0380831048381 ], "wc_reply_reviewers_avg": [ 175.25, 183.2012213387236 ], "wc_reply_authors_avg": [ 284.75, 289.97532222587495 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4267530038387174910&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Reward Imputation with Sketching for Contextual Batched Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72337", "id": "E2zoGTkTbW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cba76ef96c4cd625631ab4d33285b045-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=E2zoGTkTbW", "openreview": "https://openreview.net/forum?id=E2zoGTkTbW", "poster": "/media/PosterPDFs/NeurIPS%202023/72337.png?t=1698504235.6095479", "slides": "https://nips.cc/virtual/2023/poster/72337", "video": "https://nips.cc/virtual/2023/poster/72337", "author_site": "Xiao Zhang, Ninglu Shao, Zihua Si, Jun Xu, Wenhan Wang, Hanjing Su, Ji-Rong Wen", "tldr": "", "abstract": "Contextual batched bandit (CBB) is a setting where a batch of rewards is observed from the environment at the end of each episode, but the rewards of the non-executed actions are unobserved, resulting in partial-information feedback. Existing approaches for CBB often ignore the rewards of the non-executed actions, leading to underutilization of feedback information. In this paper, we propose an efficient approach called Sketched Policy Updating with Imputed Rewards (SPUIR) that completes the unobserved rewards using sketching, which approximates the full-information feedbacks. We formulate reward imputation as an imputation regularized ridge regression problem that captures the feedback mechanisms of both executed and non-executed actions. To reduce time complexity, we solve the regression problem using randomized sketching. We prove that our approach achieves an instantaneous regret with controllable bias and smaller variance than approaches without reward imputation. Furthermore, our approach enjoys a sublinear regret bound against the optimal policy. We also present two extensions, a rate-scheduled version and a version for nonlinear rewards, making our approach more practical. Experimental results show that SPUIR outperforms state-of-the-art baselines on synthetic, public benchmark, and real-world datasets.", "keywords": "batched bandit;sketching;reward imputation;regret bound;ridge regression", "primary_area": "", "supplementary_material": "/attachment/0017371752641d6737a0aa452bd665ee20790a8a.zip", "author": "Xiao Zhang;Ninglu Shao;Zihua Si;Jun Xu;Wenhan Wang;Hanjing Su;Ji-Rong Wen", "authorids": "~Xiao_Zhang7;~Ninglu_Shao1;~Zihua_Si1;~Jun_Xu1;~Wenhan_Wang3;~Hanjing_Su2;~Ji-Rong_Wen1", "gender": "M;M;M;M;M;M;M", "homepage": "https://pinkfloyd1989.github.io/Xiao_Zhang/;https://rainym00d.github.io/ninglushao.github.io/;;https://gsai.ruc.edu.cn/~junxu;;https://gsai.ruc.edu.cn/english/jrwen;http://www.52cs.com", "dblp": "49/4478-34;;;90/514-1;;w/JRWen;", "google_scholar": "https://scholar.google.com.hk/citations?user=5FZ6wbAAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;su14mcEAAAAJ;;tbxCHJgAAAAJ;", "orcid": "0000-0001-7397-5632;;;;;0000-0002-9777-9676;", "linkedin": ";;;;wenhan-wang-aa0bb378/;;", "or_profile": "~Xiao_Zhang7;~Ninglu_Shao1;~Zihua_Si1;~Jun_Xu1;~Wenhan_Wang3;~Ji-Rong_Wen1;~hanjing_su1", "aff": "Renmin University of China;Renmin University of China;Renmin University of China;Renmin University of China;Tencent Group;Renmin University of China;", "aff_domain": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;tencent.com;ruc.edu.cn;", "position": "Assistant Professor;MS student;MS student;Full Professor;Researcher;Full Professor;", "bibtex": "@inproceedings{\nzhang2023reward,\ntitle={Reward Imputation with Sketching for Contextual Batched Bandits},\nauthor={Xiao Zhang and Ninglu Shao and Zihua Si and Jun Xu and Wenhan Wang and Hanjing Su and Ji-Rong Wen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=E2zoGTkTbW}\n}", "github": "", "project": "", "reviewers": "wPev;PXsH;et1K;mYi5", "pdf_size": 1814227, "rating": "5;5;6;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;2", "wc_summary": "254;90;81;28", "wc_strengths": "25;135;30;21", "wc_weaknesses": "188;196;126;163", "wc_questions": "21;3;184;48", "wc_limitations": "14;1;5;53", "wc_review": "502;425;426;313", "wc_reply_reviewers": "80;17;36;0", "wc_reply_authors": "33;13;27;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 113.25, 84.64447707913375 ], "wc_strengths_avg": [ 52.75, 47.59398596461532 ], "wc_weaknesses_avg": [ 168.25, 27.261465477849864 ], "wc_questions_avg": [ 64.0, 71.10907115129545 ], "wc_limitations_avg": [ 18.25, 20.60794749605113 ], "wc_review_avg": [ 416.5, 67.4258852370512 ], "wc_reply_reviewers_avg": [ 33.25, 29.844388082183894 ], "wc_reply_authors_avg": [ 18.25, 12.794041581923986 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:tTKYBf3LTk8J:scholar.google.com/&scioq=Reward+Imputation+with+Sketching+for+Contextual+Batched+Bandits&hl=en&as_sdt=0,47", "gs_version_total": 7, "email": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;tencent.com;ruc.edu.cn;", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Renmin University of China;Tencent", "aff_unique_dep": ";Tencent", "aff_unique_url": "http://www.ruc.edu.cn;https://www.tencent.com", "aff_unique_abbr": "RUC;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Strategic Distribution Shift of Interacting Agents via Coupled Gradient Flows", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72336", "id": "E3ZUEaeFYS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/902c462e821e5e639ac3422b48b65932-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=E3ZUEaeFYS", "openreview": "https://openreview.net/forum?id=E3ZUEaeFYS", "poster": "/media/PosterPDFs/NeurIPS%202023/72336.png?t=1701547973.0867682", "slides": "https://nips.cc/virtual/2023/poster/72336", "video": "https://nips.cc/virtual/2023/poster/72336", "author_site": "Lauren Conger, Franca Hoffmann, Eric Mazumdar, Lillian Ratliff", "tldr": "", "abstract": "We propose a novel framework for analyzing the dynamics of distribution shift in real-world systems that captures the feedback loop between learning algorithms and the distributions on which they are deployed. Prior work largely models feedback-induced distribution shift as adversarial or via an overly simplistic distribution-shift structure. In contrast, we propose a coupled partial differential equation model that captures fine-grained changes in the distribution over time by accounting for complex dynamics that arise due to strategic responses to algorithmic decision-making, non-local endogenous population interactions, and other exogenous sources of distribution shift. We consider two common settings in machine learning: cooperative settings with information asymmetries, and competitive settings where a learner faces strategic users. For both of these settings, when the algorithm retrains via gradient descent, we prove asymptotic convergence of the retraining procedure to a steady-state, both in finite and in infinite dimensions, obtaining explicit rates in terms of the model parameters. To do so we derive new results on the convergence of coupled PDEs that extends what is known on multi-species systems. Empirically, we show that our approach captures well-documented forms of distribution shifts like polarization and disparate impacts that simpler models cannot capture.", "keywords": "distribution shift;partial differential equations", "primary_area": "", "supplementary_material": "/attachment/d96e2edf111e022f68b48e65d3259ee1afc7cc09.zip", "author": "Lauren E Conger;Franca Hoffman;Eric Mazumdar;Lillian J Ratliff", "authorids": "~Lauren_E_Conger1;franca.hoffmann@caltech.edu;~Eric_Mazumdar1;~Lillian_J_Ratliff1", "gender": ";;M;", "homepage": "https://leconger.github.io/;;http://people.eecs.berkeley.edu/~emazumdar/;", "dblp": ";;177/9322;", "google_scholar": "Iv6uAdMAAAAJ;;FZOxxvcAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Lauren_E_Conger1;franca.hoffmann@caltech.edu;~Eric_Mazumdar1;~Lillian_J_Ratliff1", "aff": "California Institute of Technology;;Deparment of Computing + Mathematical Sciences, California Institute of Technology;", "aff_domain": "caltech.edu;;cms.caltech.edu;", "position": "PhD student;;Assistant Professor;", "bibtex": "@inproceedings{\nconger2023strategic,\ntitle={Strategic Distribution Shift of Interacting Agents via Coupled Gradient Flows},\nauthor={Lauren E Conger and Franca Hoffman and Eric Mazumdar and Lillian J Ratliff},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=E3ZUEaeFYS}\n}", "github": "", "project": "", "reviewers": "HH7Z;P1kC;nK6f;vBit", "pdf_size": 2090789, "rating": "5;6;7;7", "confidence": "2;3;3;2", "soundness": "3;3;4;3", "novelty": "2;3;4;2", "presentation": "3;3;3;3", "wc_summary": "64;126;182;69", "wc_strengths": "31;58;109;58", "wc_weaknesses": "32;286;144;71", "wc_questions": "29;2;148;34", "wc_limitations": "1;2;8;15", "wc_review": "157;474;591;247", "wc_reply_reviewers": "21;12;21;17", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 110.25, 48.05400607649689 ], "wc_strengths_avg": [ 64.0, 28.222331583340168 ], "wc_weaknesses_avg": [ 133.25, 96.92103744801744 ], "wc_questions_avg": [ 53.25, 56.04183705054644 ], "wc_limitations_avg": [ 6.5, 5.5901699437494745 ], "wc_review_avg": [ 367.25, 173.29508792807718 ], "wc_reply_reviewers_avg": [ 17.75, 3.6996621467371855 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2091645154257604879&as_sdt=20005&sciodt=0,9&hl=en", "gs_version_total": 6, "email": "caltech.edu;;cms.caltech.edu;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "California Institute of Technology;", "aff_unique_dep": ";", "aff_unique_url": "https://www.caltech.edu;", "aff_unique_abbr": "Caltech;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pasadena;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "title": "On the Asymptotic Learning Curves of Kernel Ridge Regression under Power-law Decay", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72335", "id": "E4P5kVSKlT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9adc8ada9183f4b9a007a02773fd8114-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=E4P5kVSKlT", "openreview": "https://openreview.net/forum?id=E4P5kVSKlT", "poster": "/media/PosterPDFs/NeurIPS%202023/72335.png?t=1701775475.950977", "slides": "https://nips.cc/virtual/2023/poster/72335", "video": "https://nips.cc/virtual/2023/poster/72335", "author_site": "Yicheng Li, haobo Zhang, Qian Lin", "tldr": "", "abstract": "The widely observed 'benign overfitting phenomenon' in the neural network literature raises the challenge to the `bias-variance trade-off' doctrine in the statistical learning theory.\nSince the generalization ability of the 'lazy trained' over-parametrized neural network can be well approximated by that of the neural tangent kernel regression,\nthe curve of the excess risk (namely, the learning curve) of kernel ridge regression attracts increasing attention recently.\nHowever, most recent arguments on the learning curve are heuristic and are based on the 'Gaussian design' assumption.\nIn this paper, under mild and more realistic assumptions, we rigorously provide a full characterization of the learning curve in the asymptotic sense\nunder a power-law decay condition of the eigenvalues of the kernel and also the target function.\nThe learning curve elaborates the effect and the interplay of the choice of the regularization parameter, the source condition and the noise.\nIn particular, our results suggest that the 'benign overfitting phenomenon' exists in over-parametrized neural networks only when the noise level is small.", "keywords": "generalization;reproducing kernel Hilbert space;bias-variance trade-off", "primary_area": "", "supplementary_material": "", "author": "Yicheng Li;Haobo Zhang;Qian Lin", "authorids": "~Yicheng_Li2;~Haobo_Zhang2;~Qian_Lin2", "gender": "M;M;M", "homepage": ";;https://sites.google.com/site/qianlincd/", "dblp": ";;79/3108", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;kHPrqdgAAAAJ", "orcid": "0000-0002-9497-0379;0000-0003-3478-140X;", "linkedin": ";;", "or_profile": "~Yicheng_Li2;~Haobo_Zhang2;~Qian_Lin2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nli2023on,\ntitle={On the Asymptotic Learning Curves of Kernel Ridge Regression under Power-law Decay},\nauthor={Yicheng Li and Haobo Zhang and Qian Lin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=E4P5kVSKlT}\n}", "github": "", "project": "", "reviewers": "MHmF;xyck;oJ4i;3aFw;j73d", "pdf_size": 876337, "rating": "5;6;6;7;8", "confidence": "2;3;3;3;3", "soundness": "2;3;3;4;4", "novelty": "2;3;1;3;4", "presentation": "3;2;3;4;4", "wc_summary": "83;82;24;108;167", "wc_strengths": "25;65;40;101;111", "wc_weaknesses": "130;359;169;171;10", "wc_questions": "10;98;80;144;189", "wc_limitations": "23;4;5;25;40", "wc_review": "271;608;318;549;517", "wc_reply_reviewers": "24;16;33;106;17", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 1.019803902718557 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 92.8, 46.22293802864547 ], "wc_strengths_avg": [ 68.4, 33.40419135378074 ], "wc_weaknesses_avg": [ 167.8, 112.1595292429493 ], "wc_questions_avg": [ 104.2, 60.452956916928386 ], "wc_limitations_avg": [ 19.4, 13.514436725220923 ], "wc_review_avg": [ 452.6, 133.18047905004698 ], "wc_reply_reviewers_avg": [ 39.2, 33.949374073758705 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6864064729836443, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=474948850675065176&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Learning from Visual Observation via Offline Pretrained State-to-Go Transformer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72334", "id": "E58gaxJN1d", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bb203e938836544655996d1bb94a0fd7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=E58gaxJN1d", "openreview": "https://openreview.net/forum?id=E58gaxJN1d", "poster": "/media/PosterPDFs/NeurIPS%202023/72334.png?t=1698144773.7760963", "slides": "https://nips.cc/virtual/2023/poster/72334", "video": "https://nips.cc/virtual/2023/poster/72334", "author_site": "Bohan Zhou, Ke Li, Jiechuan Jiang, Zongqing Lu", "tldr": "", "abstract": "Learning from visual observation (LfVO), aiming at recovering policies from only visual observation data, is promising yet a challenging problem. Existing LfVO approaches either only adopt inefficient online learning schemes or require additional task-specific information like goal states, making them not suited for open-ended tasks. To address these issues, we propose a two-stage framework for learning from visual observation. In the first stage, we introduce and pretrain State-to-Go (STG) Transformer offline to predict and differentiate latent transitions of demonstrations. Subsequently, in the second stage, the STG Transformer provides intrinsic rewards for downstream reinforcement learning tasks where an agent learns merely from intrinsic rewards. Empirical results on Atari and Minecraft show that our proposed method outperforms baselines and in some tasks even achieves performance comparable to the policy learned from environmental rewards. These results shed light on the potential of utilizing video-only data to solve difficult visual reinforcement learning tasks rather than relying on complete offline datasets containing states, actions, and rewards. The project\u2019s website and code can be\nfound at https://sites.google.com/view/stgtransformer.", "keywords": "Learning from Observations;Offline Learning from Visual Observations;State-to-Go Transformer", "primary_area": "", "supplementary_material": "/attachment/a9dcef6ee99a022dac6247fa531c1aead94f5142.pdf", "author": "Bohan Zhou;Ke Li;Jiechuan Jiang;Zongqing Lu", "authorids": "~Bohan_Zhou1;~Ke_Li18;~Jiechuan_Jiang1;~Zongqing_Lu2", "gender": "M;F;;", "homepage": "https://www.zhihu.com/people/Zhoubh;https://kelichloe.github.io/;;", "dblp": ";;220/4026;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;A_3_zUoAAAAJ;;", "orcid": "0000-0001-5495-7631;;;", "linkedin": ";;;", "or_profile": "~Bohan_Zhou1;~Ke_Li18;~Jiechuan_Jiang1;~Zongqing_Lu2", "aff": "Nankai University;INSEAD;Tsinghua University;", "aff_domain": "nankai.edu.cn;insead.edu;mail.tsinghua.edu.cn;", "position": "Undergrad student;PhD student;Intern;", "bibtex": "@inproceedings{\nzhou2023learning,\ntitle={Learning from Visual Observation via Offline Pretrained State-to-Go Transformer},\nauthor={Bohan Zhou and Ke Li and Jiechuan Jiang and Zongqing Lu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=E58gaxJN1d}\n}", "github": "", "project": "", "reviewers": "HW2S;JcJo;B1XQ;9y8A", "pdf_size": 6016312, "rating": "4;5;5;6", "confidence": "4;4;4;3", "soundness": "2;3;2;2", "novelty": "2;2;3;2", "presentation": "2;2;3;3", "wc_summary": "105;87;78;68", "wc_strengths": "49;94;37;106", "wc_weaknesses": "511;213;35;371", "wc_questions": "43;46;331;2", "wc_limitations": "1;11;29;25", "wc_review": "709;451;510;572", "wc_reply_reviewers": "549;279;193;36", "wc_reply_authors": "992;397;1116;246", "reply_reviewers": "2;1;2;1", "reply_authors": "5;4;6;4", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 84.5, 13.6106575888162 ], "wc_strengths_avg": [ 71.5, 29.124731758421397 ], "wc_weaknesses_avg": [ 282.5, 177.57463219728206 ], "wc_questions_avg": [ 105.5, 131.34782068995284 ], "wc_limitations_avg": [ 16.5, 11.169153951844338 ], "wc_review_avg": [ 560.5, 95.818839483684 ], "wc_reply_reviewers_avg": [ 264.25, 186.06097790778162 ], "wc_reply_authors_avg": [ 687.75, 372.7079117754277 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 4.75, 0.82915619758885 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12193840966272549626&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "nankai.edu.cn;insead.edu;mail.tsinghua.edu.cn;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Nankai University;INSEAD;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.nankai.edu.cn;https://www.insead.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "NKU;INSEAD;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;France" }, { "id": "E8vGACczsQ", "title": "(Out-of-context) Meta-learning in Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "\nBrown et al. (2020) famously introduced the phenomenon of in-context meta-learning in large language models (LLMs). Our work establishes the existence of a phenomenon we call out-of-context meta-learning via carefully designed synthetic experiments with large language models. We show that out-of-context meta-learning leads LLMs to more readily \u201cinternalize\u201d the semantic content of text that is, or appears to be, broadly useful (such as true statements, or text from authoritative sources) and apply it in appropriate contexts. We further demonstrate internalization in a synthetic computer vision setting, and propose two hypotheses for the emergence of internalization: one relying on the way models store knowledge in their parameters, and another suggesting that the implicit gradient alignment bias of gradient-descent-based methods may be responsible. Finally, we reflect on what our results might imply about capabilities of future AI systems, and discuss potential risks.", "keywords": "LLMs;QA;world models;internalization;consistency;meta-learning", "primary_area": "", "supplementary_material": "/attachment/db8cc6ce44b59040f96b19e8290bde0e52286174.pdf", "author": "Dmitrii Krasheninnikov;Egor Krasheninnikov;Bruno Kacper Mlodozeniec;David Krueger", "authorids": "~Dmitrii_Krasheninnikov1;~Egor_Krasheninnikov1;~Bruno_Kacper_Mlodozeniec2;~David_Krueger1", "gender": "M;M;Not Specified;M", "homepage": "https://krasheninnikov.github.io/about/;;https://brunokm.github.io;https://mila.umontreal.ca/en/person/david-scott-krueger/", "dblp": ";;241/6874;142/2741.html", "google_scholar": "BIQflKQAAAAJ;6DiC_yYAAAAJ;kGPBRy8AAAAJ;https://scholar.google.ca/citations?user=5Uz70IoAAAAJ", "orcid": ";;;", "linkedin": ";;bkmlodozeniec/;", "or_profile": "~Dmitrii_Krasheninnikov1;~Egor_Krasheninnikov1;~Bruno_Kacper_Mlodozeniec2;~David_Krueger1", "aff": "University of Cambridge;University of Cambridge;University of Cambridge;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk", "position": "PhD student;Research Assistant;PhD student;Assistant Professor", "bibtex": "@misc{\nkrasheninnikov2023outofcontext,\ntitle={(Out-of-context) Meta-learning in Language Models},\nauthor={Dmitrii Krasheninnikov and Egor Krasheninnikov and Bruno Kacper Mlodozeniec and David Krueger},\nyear={2023},\nurl={https://openreview.net/forum?id=E8vGACczsQ}\n}", "github": "", "project": "", "reviewers": "yfNH;1KMo;pjae;21mo;gp6r", "site": "https://openreview.net/forum?id=E8vGACczsQ", "pdf_size": 509017, "rating": "5;6;6;6;7", "confidence": "3;4;2;3;3", "soundness": "2;3;2;3;3", "novelty": "2;3;2;3;4", "presentation": "1;3;4;3;4", "wc_summary": "108;101;96;71;141", "wc_strengths": "150;25;83;31;7", "wc_weaknesses": "299;31;1687;209;24", "wc_questions": "91;1;205;11;15", "wc_limitations": "13;7;37;21;49", "wc_review": "661;165;2108;343;236", "wc_reply_reviewers": "64;0;775;54;0", "wc_reply_authors": "28;0;304;148;0", "reply_reviewers": "1;0;1;1;0", "reply_authors": "2;1;2;2;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 1.0954451150103321 ], "wc_summary_avg": [ 103.4, 22.5619148123558 ], "wc_strengths_avg": [ 59.2, 51.96306380497593 ], "wc_weaknesses_avg": [ 450.0, 627.3895121852133 ], "wc_questions_avg": [ 64.6, 77.18445439335567 ], "wc_limitations_avg": [ 25.4, 15.512575543732254 ], "wc_review_avg": [ 702.6, 722.8998824180289 ], "wc_reply_reviewers_avg": [ 178.6, 299.3817629716279 ], "wc_reply_authors_avg": [ 96.0, 117.49382962521904 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3q_5h8LYxH0J:scholar.google.com/&scioq=(Out-of-context)+Meta-learning+in+Language+Models&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Video Timeline Modeling For News Story Understanding", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73651", "id": "EBYZSRRzSE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5a0f92efaa8f0cd67992caf6b2fa2bac-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=EBYZSRRzSE", "openreview": "https://openreview.net/forum?id=EBYZSRRzSE", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73651", "video": "https://nips.cc/virtual/2023/poster/73651", "author_site": "Meng Liu, Mingda Zhang, Jialu Liu, Hanjun Dai, Ming-Hsuan Yang, Shuiwang Ji, Zheyun Feng, Boqing Gong", "tldr": "", "abstract": "In this paper, we present a novel problem, namely video timeline modeling. Our objective is to create a video-associated timeline from a set of videos related to a specific topic, thereby facilitating the content and structure understanding of the story being told. This problem has significant potential in various real-world applications, for instance, news story summarization. To bootstrap research in this area, we curate a realistic benchmark dataset, YouTube-News-Timeline, consisting of over $12$k timelines and $300$k YouTube news videos. Additionally, we propose a set of quantitative metrics to comprehensively evaluate and compare methodologies. With such a testbed, we further develop and benchmark several deep learning approaches to tackling this problem. We anticipate that this exploratory work will pave the way for further research in video timeline modeling. The assets are available via https://github.com/google-research/google-research/tree/master/video_timeline_modeling.", "keywords": "Video Timeline Modeling;News Story;Datasets;Benchmark", "primary_area": "", "supplementary_material": "/attachment/b31ca75db3bc7d4fe67696dc63bd4b89d73b9334.pdf", "author": "Meng Liu;Mingda Zhang;Jialu Liu;Hanjun Dai;Ming-Hsuan Yang;Shuiwang Ji;Zheyun Feng;Boqing Gong", "authorids": "~Meng_Liu3;~Mingda_Zhang1;~Jialu_Liu1;~Hanjun_Dai1;~Ming-Hsuan_Yang1;~Shuiwang_Ji1;~Zheyun_Feng1;~Boqing_Gong1", "gender": "M;M;M;M;M;M;;M", "homepage": "https://mengliu1998.github.io;https://people.cs.pitt.edu/~mzhang/;https://jialu.info/;https://hanjun-dai.github.io;https://faculty.ucmerced.edu/mhyang/;http://people.tamu.edu/~sji;;http://boqinggong.info", "dblp": "41/7841-15;25/10133;14/8399;144/7311;79/3711.html;84/6405;142/2893;29/7457", "google_scholar": "https://scholar.google.com/citations?hl=en;4aIwj4QAAAAJ;BUERw4QAAAAJ;obpl7GQAAAAJ;p9-ohHsAAAAJ;BZGj6sAAAAAJ;jHCgT18AAAAJ;lv9ZeVUAAAAJ", "orcid": ";;;;0000-0003-4848-2304;0000-0002-4205-4563;;", "linkedin": "meng-liu-4a1813197/;;;hanjun-dai;minghsuanyang/;shuiwang-ji-9a040715/;;boqing-gong-46aa5821/", "or_profile": "~Meng_Liu3;~Mingda_Zhang1;~Jialu_Liu1;~Hanjun_Dai1;~Ming-Hsuan_Yang1;~Shuiwang_Ji1;~Zheyun_Feng1;~Boqing_Gong1", "aff": "Texas A&M University - College Station;Google DeepMind;Google Research;Google Research;University of California at Merced;Texas A&M University;Google;Google", "aff_domain": "tamu.edu;google.com;google.com;google.com;umcerced.edu;tamu.edu;google.com;google.com", "position": "PhD student;Software Engineer;Software Engineer Manager;Researcher;Professor;Professor;Researcher;Research Scientist", "bibtex": "@inproceedings{\nliu2023video,\ntitle={Video Timeline Modeling For News Story Understanding},\nauthor={Meng Liu and Mingda Zhang and Jialu Liu and Hanjun Dai and Ming-Hsuan Yang and Shuiwang Ji and Zheyun Feng and Boqing Gong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=EBYZSRRzSE}\n}", "github": "", "project": "", "reviewers": "xNvC;eQ6E;84mW;aTSU", "pdf_size": 4520067, "rating": "6;7;7;10", "confidence": "3;5;4;3", "wc_summary_and_contributions": "52;110;41;44", "wc_strengths": "38;21;75;32", "wc_improvement": "155;120;138;72", "wc_limitations": "101;90;75;37", "wc_correctness": "10;24;66;1", "wc_clarity": "25;18;7;18", "wc_relation_to_prior_work": "14;48;22;14", "wc_documentation": "9;10;43;42", "wc_additional_feedback": "1;1;1;1", "wc_review": "405;442;468;261", "wc_reply_reviewers": "0;0;0;63", "wc_reply_authors": "605;323;743;277", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 7.5, 1.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 61.75, 28.145825622994256 ], "wc_strengths_avg": [ 41.5, 20.279299790673246 ], "wc_improvement_avg": [ 121.25, 31.011086727169044 ], "wc_limitations_avg": [ 75.75, 24.200981385059574 ], "wc_correctness_avg": [ 25.25, 24.913600703230355 ], "wc_clarity_avg": [ 17.0, 6.442049363362563 ], "wc_relation_to_prior_work_avg": [ 24.5, 13.955285736952863 ], "wc_documentation_avg": [ 26.0, 16.507574019219177 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 394.0, 79.984373473823 ], "wc_reply_reviewers_avg": [ 15.75, 27.279800219209818 ], "wc_reply_authors_avg": [ 487.0, 193.94329068054918 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.3015113445777637, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11073366224124760707&as_sdt=5,33&sciodt=0,33&hl=en&oe=ASCII", "gs_version_total": 8, "email": "tamu.edu;google.com;google.com;google.com;umcerced.edu;tamu.edu;google.com;google.com", "author_num": 8, "aff_unique_index": "0;1;1;1;2;0;1;1", "aff_unique_norm": "Texas A&M University;Google;University of California, Merced", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://www.tamu.edu;https://deepmind.com;https://www.ucmerced.edu", "aff_unique_abbr": "TAMU;DeepMind;UC Merced", "aff_campus_unique_index": "0;2;2;3;2;2", "aff_campus_unique": "College Station;;Mountain View;Merced", "aff_country_unique_index": "0;1;0;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Zero-Regret Performative Prediction Under Inequality Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72333", "id": "ECBK3TVmZl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/047397849f63b4fcfced4ff720159f3d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ECBK3TVmZl", "openreview": "https://openreview.net/forum?id=ECBK3TVmZl", "poster": "/media/PosterPDFs/NeurIPS%202023/72333.png?t=1699876620.8192432", "slides": "https://nips.cc/virtual/2023/poster/72333", "video": "https://nips.cc/virtual/2023/poster/72333", "author_site": "Wenjing YAN, Xuanyu Cao", "tldr": "", "abstract": "Performative prediction is a recently proposed framework where predictions guide decision-making and hence influence future data distributions. Such performative phenomena are ubiquitous in various areas, such as transportation, finance, public policy, and recommendation systems. To date, work on performative prediction has only focused on unconstrained problems, neglecting the fact that many real-world learning problems are subject to constraints. This paper bridges this gap by studying performative prediction under inequality constraints. Unlike most existing work that provides only performative stable points, we aim to find the optimal solutions. Anticipating performative gradient is a challenging task, due to the agnostic performative effect on data distributions. To address this issue, we first develop a robust primal-dual framework that requires only approximate gradients up to a certain accuracy, yet delivers the same order of performance as the stationary stochastic primal-dual algorithm without performativity. Based on this framework, we then propose an adaptive primal-dual algorithm for location families. Our analysis demonstrates that the proposed adaptive primal-dual algorithm attains $\\mathcal{O}(\\sqrt{T})$ regret and constraint violations, using only $\\sqrt{T} + 2T$ samples, where $T$ is the time horizon. To our best knowledge, this is the first study and analysis on the optimality of the performative prediction problem under inequality constraints. Finally, we validate the effectiveness of our algorithm and theoretical results through numerical simulations.", "keywords": "Performative prediction;decision-dependent distribution;inequality constraints;primal-dual algorithm.", "primary_area": "", "supplementary_material": "/attachment/255437924092f622280220f81dde6b4ae461ce11.pdf", "author": "Wenjing Yan;Xuanyu Cao", "authorids": "~Wenjing_Yan2;~Xuanyu_Cao1", "gender": "F;M", "homepage": ";https://labs.wsu.edu/xuanyu/", "dblp": ";117/3366", "google_scholar": "sGUUkfEAAAAJ;jvrZYmAAAAAJ", "orcid": "0000-0002-4790-1397;", "linkedin": ";", "or_profile": "~Wenjing_Yan2;~Xuanyu_Cao1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "hkust.edu;ust.hk", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyan2023zeroregret,\ntitle={Zero-Regret Performative Prediction Under Inequality Constraints},\nauthor={Wenjing Yan and Xuanyu Cao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ECBK3TVmZl}\n}", "github": "", "project": "", "reviewers": "9Way;yrZY;qFHm;PDku;NMva", "pdf_size": 502103, "rating": "4;5;5;7;7", "confidence": "4;2;2;3;3", "soundness": "4;2;3;3;4", "novelty": "2;2;2;3;3", "presentation": "3;2;4;3;4", "wc_summary": "40;98;146;73;71", "wc_strengths": "55;36;44;73;85", "wc_weaknesses": "301;68;21;65;159", "wc_questions": "102;144;97;40;171", "wc_limitations": "1;5;1;1;25", "wc_review": "499;351;309;252;511", "wc_reply_reviewers": "69;0;12;15;13", "wc_reply_authors": "870;0;6;6;6", "reply_reviewers": "2;0;1;1;1", "reply_authors": "3;1;2;2;2", "rating_avg": [ 5.6, 1.2 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 85.6, 35.3643888678993 ], "wc_strengths_avg": [ 58.6, 18.11739495622922 ], "wc_weaknesses_avg": [ 122.8, 99.77254131272791 ], "wc_questions_avg": [ 110.8, 44.7365622282267 ], "wc_limitations_avg": [ 6.6, 9.32952303175248 ], "wc_review_avg": [ 384.4, 103.43229669692151 ], "wc_reply_reviewers_avg": [ 21.8, 24.17767565337909 ], "wc_reply_authors_avg": [ 177.6, 346.20779887229577 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.08908708063747486, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4759277999626972284&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "hkust.edu;ust.hk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Prototype-based Aleatoric Uncertainty Quantification for Cross-modal Retrieval", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72332", "id": "ECRgBK6sk1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4d893f766ab60e5337659b9e71883af4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ECRgBK6sk1", "openreview": "https://openreview.net/forum?id=ECRgBK6sk1", "poster": "/media/PosterPDFs/NeurIPS%202023/72332.png?t=1699554249.106422", "slides": "https://nips.cc/virtual/2023/poster/72332", "video": "https://nips.cc/virtual/2023/poster/72332", "author_site": "Hao Li, Jingkuan Song, Lianli Gao, Xiaosu Zhu, Hengtao Shen", "tldr": "", "abstract": "Cross-modal Retrieval methods build similarity relations between vision and language modalities by jointly learning a common representation space. However, the predictions are often unreliable due to the Aleatoric uncertainty, which is induced by low-quality data, e.g., corrupt images, fast-paced videos, and non-detailed texts. In this paper, we propose a novel Prototype-based Aleatoric Uncertainty Quantification (PAU) framework to provide trustworthy predictions by quantifying the uncertainty arisen from the inherent data ambiguity. Concretely, we first construct a set of various learnable prototypes for each modality to represent the entire semantics subspace. Then Dempster-Shafer Theory and Subjective Logic Theory are utilized to build an evidential theoretical framework by associating evidence with Dirichlet Distribution parameters. The PAU model induces accurate uncertainty and reliable predictions for cross-modal retrieval. Extensive experiments are performed on four major benchmark datasets of MSR-VTT, MSVD, DiDeMo, and MS-COCO, demonstrating the effectiveness of our method. The code is accessible at https://github.com/leolee99/PAU.", "keywords": "multimodal learning;cross-modal retrieval;robust learning;uncertainty", "primary_area": "", "supplementary_material": "", "author": "Hao Li;Jingkuan Song;Lianli Gao;Xiaosu Zhu;Heng Tao Shen", "authorids": "~Hao_Li21;~Jingkuan_Song3;~Lianli_Gao1;~Xiaosu_Zhu1;~Heng_Tao_Shen3", "gender": "M;M;F;M;M", "homepage": "https://leolee99.github.io/;https://cfm.uestc.edu.cn/~songjingkuan/;https://lianligao.github.io/;https://github.com/xiaosu-zhu;https://cfm.uestc.edu.cn/~shenht/", "dblp": "17/5705;70/10575;123/9849.html;243/3461;s/HTShen", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;F5Zy9V4AAAAJ;https://scholar.google.com.au/citations?user=zsm2dpYAAAAJ;2DihiQ0AAAAJ;https://scholar.google.com.au/citations?user=krryaDkAAAAJ", "orcid": "0000-0001-8205-6734;;;0000-0001-7728-2518;", "linkedin": "hao-li-b5b2b2208/;;;;", "or_profile": "~Hao_Li21;~Jingkuan_Song3;~Lianli_Gao1;~Xiaosu_Zhu1;~Hengtao_Shen1", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China,;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;", "aff_domain": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;", "position": "MS student;Full Professor;Full Professor;PhD student;", "bibtex": "@inproceedings{\nli2023prototypebased,\ntitle={Prototype-based Aleatoric Uncertainty Quantification for Cross-modal Retrieval},\nauthor={Hao Li and Jingkuan Song and Lianli Gao and Xiaosu Zhu and Heng Tao Shen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ECRgBK6sk1}\n}", "github": "", "project": "", "reviewers": "tGbU;qbVB;hqQV;VBVJ;b4gq", "pdf_size": 5969285, "rating": "3;5;6;6;8", "confidence": "4;4;3;5;5", "soundness": "2;3;3;3;3", "novelty": "2;2;3;2;4", "presentation": "2;2;3;2;3", "wc_summary": "72;55;53;176;91", "wc_strengths": "55;71;95;71;115", "wc_weaknesses": "230;154;109;1031;134", "wc_questions": "2;9;74;61;3", "wc_limitations": "5;1;9;11;1", "wc_review": "364;290;340;1350;344", "wc_reply_reviewers": "14;42;13;133;11", "wc_reply_authors": "151;260;34;92;22", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;3;2;2;2", "rating_avg": [ 5.6, 1.624807680927192 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 89.4, 45.41629663457821 ], "wc_strengths_avg": [ 81.4, 21.10544953323667 ], "wc_weaknesses_avg": [ 331.6, 352.0287488260014 ], "wc_questions_avg": [ 29.8, 31.14739154407637 ], "wc_limitations_avg": [ 5.4, 4.079215610874228 ], "wc_review_avg": [ 537.6, 406.9307557803907 ], "wc_reply_reviewers_avg": [ 42.6, 46.615877123572396 ], "wc_reply_authors_avg": [ 111.8, 87.18807257876503 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.39477101697586137, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1268162815569398743&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Electronic Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "https://www.uestc.edu.cn", "aff_unique_abbr": "UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Visual Explanations of Image-Text Representations via Multi-Modal Information Bottleneck Attribution", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72331", "id": "ECvtxmVP0x", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/339caf45a6fa281cae8adc6465343464-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ECvtxmVP0x", "openreview": "https://openreview.net/forum?id=ECvtxmVP0x", "poster": "/media/PosterPDFs/NeurIPS%202023/72331.png?t=1701934954.4560647", "slides": "https://nips.cc/virtual/2023/poster/72331", "video": "https://nips.cc/virtual/2023/poster/72331", "author_site": "Ying Wang, Tim G. J. Rudner, Andrew Wilson", "tldr": "", "abstract": "Vision-language pretrained models have seen remarkable success, but their application to safety-critical settings is limited by their lack of interpretability. To improve the interpretability of vision-language models such as CLIP, we propose a multi-modal information bottleneck (M2IB) approach that learns latent representations that compress irrelevant information while preserving relevant visual and textual features. We demonstrate how M2IB can be applied to attribution analysis of vision-language pretrained models, increasing attribution accuracy and improving the interpretability of such models when applied to safety-critical domains such as healthcare. Crucially, unlike commonly used unimodal attribution methods, M2IB does not require ground truth labels, making it possible to audit representations of vision-language pretrained models when multiple modalities but no ground-truth data is available. Using CLIP as an example, we demonstrate the effectiveness of M2IB attribution and show that it outperforms gradient-based, perturbation-based, and attention-based attribution methods both qualitatively and quantitatively.", "keywords": "Interpretability;Attribution Maps;Information Bottleneck;Multi-Modal Learning;Vision-Language Pretrained Models", "primary_area": "", "supplementary_material": "", "author": "Ying Wang;Tim G. J. Rudner;Andrew Gordon Wilson", "authorids": "~Ying_Wang14;~Tim_G._J._Rudner2;~Andrew_Gordon_Wilson1", "gender": "F;Not Specified;Not Specified", "homepage": "https://yingwangg.github.io/;https://cims.nyu.edu/~andrewgw;https://timrudner.com", "dblp": "94/3104-63;65/10453;230/3480", "google_scholar": "5BN__1MAAAAJ;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ;https://scholar.google.de/citations?user=MbBntPgAAAAJ", "orcid": "0009-0007-2559-8837;;", "linkedin": "ying-wang-90611714a/;;trudner", "or_profile": "~Ying_Wang14;~Andrew_Gordon_Wilson1;~Tim_Georg_Johann_Rudner1", "aff": "New York University;New York University;Yale University", "aff_domain": "nyu.edu;nyu.edu;yale.edu", "position": "MS student;Associate Professor;Visiting Fellow", "bibtex": "@inproceedings{\nwang2023visual,\ntitle={Visual Explanations of Image-Text Representations via Multi-Modal Information Bottleneck Attribution},\nauthor={Ying Wang and Tim G. J. Rudner and Andrew Gordon Wilson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ECvtxmVP0x}\n}", "github": "", "project": "", "reviewers": "6SYf;xLqC;We26;PE9L", "pdf_size": 7054019, "rating": "5;6;7;8", "confidence": "5;2;4;4", "soundness": "3;3;2;3", "novelty": "2;3;2;4", "presentation": "3;3;3;4", "wc_summary": "131;62;47;83", "wc_strengths": "101;93;33;60", "wc_weaknesses": "340;30;105;27", "wc_questions": "184;46;92;95", "wc_limitations": "54;9;39;9", "wc_review": "810;240;316;274", "wc_reply_reviewers": "22;42;840;61", "wc_reply_authors": "136;97;1621;38", "reply_reviewers": "1;1;2;1", "reply_authors": "2;3;5;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 80.75, 31.704692081772375 ], "wc_strengths_avg": [ 71.75, 27.141987768032024 ], "wc_weaknesses_avg": [ 125.5, 127.72333381179807 ], "wc_questions_avg": [ 104.25, 49.97186708539115 ], "wc_limitations_avg": [ 27.75, 19.48557158514987 ], "wc_review_avg": [ 410.0, 232.5037634104016 ], "wc_reply_reviewers_avg": [ 241.25, 345.96341930903617 ], "wc_reply_authors_avg": [ 473.0, 663.715677078672 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.1025978352085154, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17173638422085118973&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "nyu.edu;nyu.edu;yale.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "New York University;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://www.yale.edu", "aff_unique_abbr": "NYU;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Bandit Task Assignment with Unknown Processing Time", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72330", "id": "EE1Uiu3Ryb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1c5ee7343f396954377c2c16dda33a96-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EE1Uiu3Ryb", "openreview": "https://openreview.net/forum?id=EE1Uiu3Ryb", "poster": "/media/PosterPDFs/NeurIPS%202023/72330.png?t=1702312558.5925214", "slides": "https://nips.cc/virtual/2023/poster/72330", "video": "https://nips.cc/virtual/2023/poster/72330", "author_site": "Shinji Ito, Daisuke Hatano, Hanna Sumita, Kei Takemura, Takuro Fukunaga, Naonori Kakimura, Ken-Ichi Kawarabayashi", "tldr": "", "abstract": "This study considers a novel problem setting, referred to as \\textit{bandit task assignment}, that incorporates the processing time of each task in the bandit setting. In this problem setting, a player sequentially chooses a set of tasks to start so that the set of processing tasks satisfies a given combinatorial constraint. The reward and processing time for each task follow unknown distributions, values of which are revealed only after the task has been completed. The problem generalizes the stochastic combinatorial semi-bandit problem and the budget-constrained bandit problem. For this problem setting, we propose an algorithm based on upper confidence bounds~(UCB) combined with a phased-update approach. The proposed algorithm admits a gap-dependent regret upper bound of $O(MN(1/\\Delta){\\log T})$ and a gap-free regret upper bound of $\\tilde{O}( \\sqrt{MNT} )$, where $N$ is the number of the tasks, $M$ is the maximum number of tasks run at the same time, $T$ is the time horizon, and $\\Delta$ is the gap between expected per-round rewards of the optimal and best suboptimal sets of tasks. These regret bounds nearly match lower bounds.", "keywords": "bandit;combinatorial semi-bandits;bandits with badget", "primary_area": "", "supplementary_material": "/attachment/bc3471fa91a3b2661a2e2237beeb390d2e789297.zip", "author": "Shinji Ito;Daisuke Hatano;Hanna Sumita;Kei Takemura;Takuro Fukunaga;Naonori Kakimura;Ken-Ichi Kawarabayashi", "authorids": "~Shinji_Ito1;~Daisuke_Hatano1;~Hanna_Sumita2;~Kei_Takemura1;~Takuro_Fukunaga3;~Naonori_Kakimura1;~Ken-Ichi_Kawarabayashi1", "gender": "M;M;F;M;;M;", "homepage": "https://researchmap.jp/shinji_ito?lang=en;https://sites.google.com/view/daitokuhatano/home;https://alg.c.titech.ac.jp/sumita/;;;http://www.math.keio.ac.jp/~kakimura/;", "dblp": "49/852;08/9924;129/9380;248/9211;35/6826.html;45/4449.html;45/6846.html", "google_scholar": "https://scholar.google.co.jp/citations?user=GX0V06wAAAAJ;;;;;K3dZNeEAAAAJ;", "orcid": ";;;;0000-0003-3285-2876;;", "linkedin": ";;;;;;", "or_profile": "~Shinji_Ito1;~Daisuke_Hatano1;~Hanna_Sumita2;~Kei_Takemura1;~Takuro_Fukunaga3;~Naonori_Kakimura1;~Ken-Ichi_Kawarabayashi1", "aff": "NEC;RIKEN;Tokyo Institute of Technology, Tokyo Institute of Technology;NEC Corporation;Chuo University;Keio University;NII,\u3000the university of Tokyo", "aff_domain": "nec.com;riken.jp;titech.ac.jp;nec.com;chuo-u.ac.jp;keio.ac.jp;nii.ac.jp", "position": "Principal Researcher;Researcher;Lecturer;Researcher;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nito2023bandit,\ntitle={Bandit Task Assignment with Unknown Processing Time},\nauthor={Shinji Ito and Daisuke Hatano and Hanna Sumita and Kei Takemura and Takuro Fukunaga and Naonori Kakimura and Ken-Ichi Kawarabayashi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EE1Uiu3Ryb}\n}", "github": "", "project": "", "reviewers": "PHPg;uzGu;iLmP;2o4u", "pdf_size": 2483982, "rating": "6;6;7;7", "confidence": "3;2;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;4;3;2", "wc_summary": "199;87;89;51", "wc_strengths": "35;68;167;52", "wc_weaknesses": "21;165;321;40", "wc_questions": "147;2;167;14", "wc_limitations": "0;11;13;1", "wc_review": "402;333;757;158", "wc_reply_reviewers": "19;24;21;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 106.5, 55.50450432172149 ], "wc_strengths_avg": [ 80.5, 51.28596299183628 ], "wc_weaknesses_avg": [ 136.75, 119.90074019788202 ], "wc_questions_avg": [ 82.5, 74.95498649189392 ], "wc_limitations_avg": [ 6.25, 5.80409338312195 ], "wc_review_avg": [ 412.5, 217.8766853061612 ], "wc_reply_reviewers_avg": [ 16.0, 9.40744386111339 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:P9sUfanHMdYJ:scholar.google.com/&scioq=Bandit+Task+Assignment+with+Unknown+Processing+Time&hl=en&as_sdt=0,44", "gs_version_total": 6, "email": "nec.com;riken.jp;titech.ac.jp;nec.com;chuo-u.ac.jp;keio.ac.jp;nii.ac.jp", "author_num": 7, "aff_unique_index": "0;1;2;0;3;4;5", "aff_unique_norm": "NEC Corporation;RIKEN;Tokyo Institute of Technology;Chuo University;Keio University;University of Tokyo", "aff_unique_dep": ";;;;;NII (National Institute of Informatics)", "aff_unique_url": "https://www.nec.com;https://www.riken.jp;https://www.titech.ac.jp;https://www.chuo-u.ac.jp;https://www.keio.ac.jp;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "NEC;RIKEN;Titech;Chuo U;Keio;UTokyo", "aff_campus_unique_index": "1", "aff_campus_unique": ";Tokyo", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Japan" }, { "title": "PICProp: Physics-Informed Confidence Propagation for Uncertainty Quantification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72329", "id": "EETqXXdqkI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/68730224bbf35ffac7a4fbf9b1ea4bfe-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EETqXXdqkI", "openreview": "https://openreview.net/forum?id=EETqXXdqkI", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72329", "video": "https://nips.cc/virtual/2023/poster/72329", "author_site": "Qianli Shen, Wai Hoh Tang, Zhun Deng, Apostolos Psaros, Kenji Kawaguchi", "tldr": "", "abstract": "Standard approaches for uncertainty quantification in deep learning and physics-informed learning have persistent limitations. \nIndicatively, strong assumptions regarding the data likelihood are required, the performance highly depends on the selection of priors, and the posterior can be sampled only approximately, which leads to poor approximations because of the associated computational cost.\nThis paper introduces and studies confidence interval (CI) estimation for deterministic partial differential equations as a novel problem.\nThat is, to propagate confidence, in the form of CIs, from data locations to the entire domain with probabilistic guarantees.\nWe propose a method, termed Physics-Informed Confidence Propagation (PICProp), based on bi-level optimization to compute a valid CI without making heavy assumptions.\nWe provide a theorem regarding the validity of our method, and computational experiments, where the focus is on physics-informed learning. Code is available at https://github.com/ShenQianli/PICProp.", "keywords": "physics-informed learning;uncertainty quantification;deep learning", "primary_area": "", "supplementary_material": "", "author": "Qianli Shen;Wai Hoh Tang;Zhun Deng;Apostolos Psaros;Kenji Kawaguchi", "authorids": "~Qianli_Shen1;~Wai_Hoh_Tang1;~Zhun_Deng1;~Apostolos_Psaros1;~Kenji_Kawaguchi1", "gender": "M;;M;;", "homepage": "https://shenqianli.github.io/;;https://www.zhundeng.org/;https://afpsaros.com;https://ml.comp.nus.edu.sg/#members", "dblp": "22/10357.html;;204/4353;297/3125;", "google_scholar": "p3ekN2kAAAAJ;https://scholar.google.com.sg/citations?user=drtcisoAAAAJ;nkmi-moAAAAJ;;aLl3rYoAAAAJ", "orcid": ";0000-0002-6717-9426;;0000-0002-0995-4738;", "linkedin": ";wai-hoh-tang;;afpsaros/;", "or_profile": "~Qianli_Shen1;~Wai_Hoh_Tang1;~Zhun_Deng1;~Apostolos_Psaros1;~Kenji_Kawaguchi1", "aff": "National University of Singapore;National University of Singapore;Columbia University;;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu.sg;columbia.edu;;nus.edu", "position": "PhD student;Research Fellow;Postdoc;;Presidential Young Professor", "bibtex": "@inproceedings{\nshen2023picprop,\ntitle={{PICP}rop: Physics-Informed Confidence Propagation for Uncertainty Quantification},\nauthor={Qianli Shen and Wai Hoh Tang and Zhun Deng and Apostolos Psaros and Kenji Kawaguchi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EETqXXdqkI}\n}", "github": "", "project": "", "reviewers": "Lv8K;5z2U;H2bG;JUgH;Jho9", "pdf_size": 4136744, "rating": "4;5;6;7;7", "confidence": "4;3;3;4;2", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;2;3;4;4", "wc_summary": "92;39;61;39;82", "wc_strengths": "42;52;71;187;190", "wc_weaknesses": "48;154;283;139;95", "wc_questions": "180;118;2;55;14", "wc_limitations": "9;56;1;1;27", "wc_review": "371;419;418;421;408", "wc_reply_reviewers": "233;98;25;18;11", "wc_reply_authors": "280;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;1;1;1;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 62.6, 21.712669112755343 ], "wc_strengths_avg": [ 108.4, 66.06844935368167 ], "wc_weaknesses_avg": [ 143.8, 78.81979446814107 ], "wc_questions_avg": [ 73.8, 66.80838270756148 ], "wc_limitations_avg": [ 18.8, 20.88444397153058 ], "wc_review_avg": [ 407.4, 18.746733048720785 ], "wc_reply_reviewers_avg": [ 77.0, 84.0452259203341 ], "wc_reply_authors_avg": [ 56.0, 112.0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.412514323662695, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14448621671627510011&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "u.nus.edu;nus.edu.sg;columbia.edu;;nus.edu", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "National University of Singapore;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.columbia.edu", "aff_unique_abbr": "NUS;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Singapore;United States" }, { "title": "Auditing Fairness by Betting", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72328", "id": "EEVpt3dJQj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1338c277525011f20166cf740952bb47-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EEVpt3dJQj", "openreview": "https://openreview.net/forum?id=EEVpt3dJQj", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72328", "video": "https://nips.cc/virtual/2023/poster/72328", "author_site": "Ben Chugg, Santiago Cortes-Gomez, Bryan Wilder, Aaditya Ramdas", "tldr": "", "abstract": "We provide practical, efficient, and nonparametric methods for auditing the fairness of deployed classification and regression models. Whereas previous work relies on a fixed-sample size, our methods are sequential and allow for the continuous monitoring of incoming data, making them highly amenable to tracking the fairness of real-world systems. We also allow the data to be collected by a probabilistic policy as opposed to sampled uniformly from the population. This enables auditing to be conducted on data gathered for another purpose. Moreover, this policy may change over time and different policies may be used on different subpopulations. Finally, our methods can handle distribution shift resulting from either changes to the model or changes in the underlying population. Our approach is based on recent progress in anytime-valid inference and game-theoretic statistics---the ``testing by betting'' framework in particular. These connections ensure that our methods are interpretable, fast, and easy to implement. We demonstrate the efficacy of our approach on three benchmark fairness datasets.", "keywords": "fairness;auditing;sequential analysis;martingales;testing by betting", "primary_area": "", "supplementary_material": "/attachment/110581a2f9272e3d92ad9a27400b0855ba47de73.zip", "author": "Ben Chugg;Santiago Cortes-Gomez;Bryan Wilder;Aaditya Ramdas", "authorids": "~Ben_Chugg1;~Santiago_Cortes-Gomez1;~Bryan_Wilder2;~Aaditya_Ramdas2", "gender": ";M;;M", "homepage": "https://benchugg.com;https://secg5.github.io;https://bryanwilder.github.io/;http://stat.cmu.edu/~aramdas", "dblp": "228/6834;182/6679;164/1648;117/3518", "google_scholar": ";;;ZvFaPxUAAAAJ", "orcid": ";;;0000-0003-0497-311X", "linkedin": ";;;", "or_profile": "~Ben_Chugg1;~Santiago_Cortes-Gomez1;~Bryan_Wilder2;~Aaditya_Ramdas2", "aff": "Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cs.cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nchugg2023auditing,\ntitle={Auditing Fairness by Betting},\nauthor={Ben Chugg and Santiago Cortes-Gomez and Bryan Wilder and Aaditya Ramdas},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EEVpt3dJQj}\n}", "github": "", "project": "", "reviewers": "T7v6;am4v;YGbH;sAy5", "pdf_size": 1397407, "rating": "6;7;8;8", "confidence": "4;3;4;4", "soundness": "3;4;4;4", "novelty": "3;3;4;4", "presentation": "4;3;4;4", "wc_summary": "69;122;99;195", "wc_strengths": "188;131;44;58", "wc_weaknesses": "81;96;53;15", "wc_questions": "83;54;3;179", "wc_limitations": "129;20;1;23", "wc_review": "550;423;200;470", "wc_reply_reviewers": "164;132;23;10", "wc_reply_authors": "175;380;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 121.25, 46.54231945230061 ], "wc_strengths_avg": [ 105.25, 58.08345289322941 ], "wc_weaknesses_avg": [ 61.25, 30.84132779242813 ], "wc_questions_avg": [ 79.75, 64.06003044020507 ], "wc_limitations_avg": [ 43.25, 50.22138488731668 ], "wc_review_avg": [ 410.75, 129.8718117991737 ], "wc_reply_reviewers_avg": [ 82.25, 66.87441588529951 ], "wc_reply_authors_avg": [ 138.75, 156.5397313783309 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2300518058242119591&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "cmu.edu;cs.cmu.edu;cmu.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Harnessing the power of choices in decision tree learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72327", "id": "EEtJTfvNZx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fddad60891bdf85aac8041f80ed022df-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EEtJTfvNZx", "openreview": "https://openreview.net/forum?id=EEtJTfvNZx", "poster": "/media/PosterPDFs/NeurIPS%202023/72327.png?t=1702276287.9837756", "slides": "https://nips.cc/virtual/2023/poster/72327", "video": "https://nips.cc/virtual/2023/poster/72327", "author_site": "Guy Blanc, Jane Lange, Chirag Pabbaraju, Colin Sullivan, Li-Yang Tan, Mo Tiwari", "tldr": "", "abstract": "We propose a simple generalization of standard and empirically successful decision tree learning algorithms such as ID3, C4.5, and CART. These algorithms, which have been central to machine learning for decades, are greedy in nature: they grow a decision tree by iteratively splitting on the best attribute. Our algorithm, Top-$k$, considers the $k$ best attributes as possible splits instead of just the single best attribute. We demonstrate, theoretically and empirically, the power of this simple generalization. We first prove a greediness hierarchy theorem showing that for every $k\\in \\mathbb{N}$, Top-$(k+1)$ can be dramatically more powerful than Top-$k$: there are data distributions for which the former achieves accuracy $1-\\epsilon$, whereas the latter only achieves accuracy $\\frac{1}{2}+\\epsilon$. We then show, through extensive experiments, that Top-$k$ outperforms the two main approaches to decision tree learning: classic greedy algorithms and more recent ``optimal decision tree'' algorithms. On one hand, Top-$k$ consistently enjoys significant accuracy gains over greedy algorithms across a wide range of benchmarks. On the other hand, Top-$k$ is markedly more scalable than optimal decision tree algorithms and is able to handle dataset and feature set sizes that remain far beyond the reach of these algorithms. The code to reproduce our results is available at https://github.com/SullivanC19/pydl8.5-topk.", "keywords": "Decision Trees;Decision Tree Learning;Top-$k$;ID3;Greedy Algorithms", "primary_area": "", "supplementary_material": "/attachment/952778f93e14a8e865609192aba4edd78409d5cb.pdf", "author": "Guy Blanc;Jane Lange;Chirag Pabbaraju;Colin Sullivan;Li-Yang Tan;Mo Tiwari", "authorids": "~Guy_Blanc1;~Jane_Lange1;~Chirag_Pabbaraju1;~Colin_Sullivan1;~Li-Yang_Tan2;~Mo_Tiwari1", "gender": "M;Not Specified;M;M;;", "homepage": ";;https://web.stanford.edu/~cpabbara/;;;http://www.motiwari.com/", "dblp": "211/7035;254/1613.html;231/7619;;;267/5421", "google_scholar": "XDJL3bwAAAAJ;;IAGcpHkAAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-0642-9815;0000-0002-3424-691X;;;", "linkedin": ";;chirag-pabbaraju-277a4ba5/;colin-sullivan-0b636a198/;;motiwari", "or_profile": "~Guy_Blanc1;~Jane_Lange1;~Chirag_Pabbaraju1;~Colin_Sullivan1;~Li-Yang_Tan2;~Mo_Tiwari1", "aff": "Stanford University;Massachusetts Institute of Technology;Stanford University;Stanford University;;", "aff_domain": "stanford.edu;mit.edu;cs.stanford.edu;stanford.edu;;", "position": "PhD student;PhD student;PhD student;Undergrad student;;", "bibtex": "@inproceedings{\nblanc2023harnessing,\ntitle={Harnessing the power of choices in decision tree learning},\nauthor={Guy Blanc and Jane Lange and Chirag Pabbaraju and Colin Sullivan and Li-Yang Tan and Mo Tiwari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EEtJTfvNZx}\n}", "github": "", "project": "", "reviewers": "mdbV;8rCb;UM1K;B1BN", "pdf_size": 502929, "rating": "3;6;7;8", "confidence": "5;3;4;4", "soundness": "3;3;3;4", "novelty": "3;2;3;4", "presentation": "2;3;4;4", "wc_summary": "230;375;139;133", "wc_strengths": "60;70;136;57", "wc_weaknesses": "915;19;40;144", "wc_questions": "72;95;21;233", "wc_limitations": "7;6;9;25", "wc_review": "1284;565;345;592", "wc_reply_reviewers": "1717;37;11;22", "wc_reply_authors": "1267;12;12;12", "reply_reviewers": "3;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 219.25, 97.79155127105818 ], "wc_strengths_avg": [ 80.75, 32.25968846718765 ], "wc_weaknesses_avg": [ 279.5, 369.9462798839853 ], "wc_questions_avg": [ 105.25, 78.4677481517088 ], "wc_limitations_avg": [ 11.75, 7.725768570181222 ], "wc_review_avg": [ 696.5, 352.4631186379647 ], "wc_reply_reviewers_avg": [ 446.75, 733.4372416914756 ], "wc_reply_authors_avg": [ 325.75, 543.4309408747353 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5669467095138409, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10547779566625312676&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "stanford.edu;mit.edu;cs.stanford.edu;stanford.edu;;", "author_num": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Stanford University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://web.mit.edu", "aff_unique_abbr": "Stanford;MIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "LayoutPrompter: Awaken the Design Ability of Large Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72326", "id": "EF56cv8B3b", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/88a129e44f25a571ae8b838057c46855-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EF56cv8B3b", "openreview": "https://openreview.net/forum?id=EF56cv8B3b", "poster": "/media/PosterPDFs/NeurIPS%202023/72326.png?t=1701668734.3730216", "slides": "https://nips.cc/virtual/2023/poster/72326", "video": "https://nips.cc/virtual/2023/poster/72326", "author_site": "Jiawei Lin, Jiaqi Guo, Shizhao Sun, Zijiang Yang, Jian-Guang Lou, Dongmei Zhang", "tldr": "", "abstract": "Conditional graphic layout generation, which automatically maps user constraints to high-quality layouts, has attracted widespread attention today. Although recent works have achieved promising performance, the lack of versatility and data efficiency hinders their practical applications. In this work, we propose LayoutPrompter, which leverages large language models (LLMs) to address the above problems through in-context learning. LayoutPrompter is made up of three key components, namely input-output serialization, dynamic exemplar selection and layout ranking. Specifically, the input-output serialization component meticulously designs the input and output formats for each layout generation task. Dynamic exemplar selection is responsible for selecting the most helpful prompting exemplars for a given input. And a layout ranker is used to pick the highest quality layout from multiple outputs of LLMs. We conduct experiments on all existing layout generation tasks using four public datasets. Despite the simplicity of our approach, experimental results show that LayoutPrompter can compete with or even outperform state-of-the-art approaches on these tasks without any model training or fine-tuning. This demonstrates the effectiveness of this versatile and training-free approach. In addition, the ablation studies show that LayoutPrompter is significantly superior to the training-based baseline in a low-data regime, further indicating the data efficiency of LayoutPrompter. Our project is available at https://github.com/microsoft/LayoutGeneration/tree/main/LayoutPrompter.", "keywords": "graphic design;graphic layout;large language models;in-context learning", "primary_area": "", "supplementary_material": "/attachment/0673d9d8f312af234ab7944f208ee27f6537d951.pdf", "author": "Jiawei Lin;Jiaqi Guo;Shizhao Sun;Zijiang James Yang;Jian-Guang Lou;Dongmei Zhang", "authorids": "~Jiawei_Lin2;~Jiaqi_Guo1;~Shizhao_Sun2;~Zijiang_James_Yang1;~Jian-Guang_Lou1;~Dongmei_Zhang2", "gender": "M;M;;M;M;", "homepage": ";;;https://yangzijiangjames.github.io/;https://www.microsoft.com/en-us/research/people/jlou/;https://www.microsoft.com/en-us/research/people/dongmeiz/", "dblp": ";173/0121;;;37/1917;87/461-1", "google_scholar": "sFP5ipsAAAAJ;OCDyes4AAAAJ;;;alDxINIAAAAJ;jLlBBl4AAAAJ", "orcid": ";;;;;0000-0002-9230-2799", "linkedin": ";;;;;dongmei-zhang-38a86317/", "or_profile": "~Jiawei_Lin2;~Jiaqi_Guo1;~Shizhao_Sun2;~Zijiang_James_Yang1;~Jian-Guang_Lou1;~Dongmei_Zhang2", "aff": "Xi'an Jiaotong University;Microsoft;;;Microsoft Research Asia;Microsoft", "aff_domain": "xjtu.edu.cn;microsoft.com;;;microsoft.com;microsoft.com", "position": "PhD student;Researcher;;;Principal Researcher;Assistant Managing Director, Microsoft Research Asia", "bibtex": "@inproceedings{\nlin2023layoutprompter,\ntitle={LayoutPrompter: Awaken the Design Ability of Large Language Models},\nauthor={Jiawei Lin and Jiaqi Guo and Shizhao Sun and Zijiang James Yang and Jian-Guang Lou and Dongmei Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EF56cv8B3b}\n}", "github": "", "project": "", "reviewers": "Lp5t;kjZH;mVCb;HKER;7S5o", "pdf_size": 8792264, "rating": "6;6;7;7;7", "confidence": "4;4;3;4;5", "soundness": "2;3;4;3;3", "novelty": "2;3;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "82;216;65;44;96", "wc_strengths": "87;104;97;34;85", "wc_weaknesses": "245;190;227;89;166", "wc_questions": "153;217;161;2;112", "wc_limitations": "4;1;8;1;7", "wc_review": "571;728;558;170;466", "wc_reply_reviewers": "48;0;53;0;96", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 100.6, 60.258111487168264 ], "wc_strengths_avg": [ 81.4, 24.67873578609731 ], "wc_weaknesses_avg": [ 183.4, 54.68674428049269 ], "wc_questions_avg": [ 129.0, 71.78022011668673 ], "wc_limitations_avg": [ 4.2, 2.925747767665559 ], "wc_review_avg": [ 498.6, 184.62675862398712 ], "wc_reply_reviewers_avg": [ 39.4, 36.24141277599426 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8292987976765973430&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "xjtu.edu.cn;microsoft.com;;;microsoft.com;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Xi'an Jiao Tong University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "XJTU;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;United States" }, { "title": "OV-PARTS: Towards Open-Vocabulary Part Segmentation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73650", "id": "EFl8zjjXeX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dde53059fdb0f45e1e9ad9c66997d662-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=EFl8zjjXeX", "openreview": "https://openreview.net/forum?id=EFl8zjjXeX", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73650", "video": "https://nips.cc/virtual/2023/poster/73650", "author_site": "Meng Wei, Xiaoyu Yue, Wenwei Zhang, Shu Kong, Xihui Liu, Jiangmiao Pang", "tldr": "", "abstract": "Segmenting and recognizing diverse object parts is a crucial ability in applications spanning various computer vision and robotic tasks. While significant progress has been made in object-level Open-Vocabulary Semantic Segmentation (OVSS), i.e., segmenting objects with arbitrary text, the corresponding part-level research poses additional challenges. Firstly, part segmentation inherently involves intricate boundaries, while limited annotated data compounds the challenge. Secondly, part segmentation introduces an open granularity challenge due to the diverse and often ambiguous definitions of parts in the open world. Furthermore, the large-scale vision and language models, which play a key role in the open vocabulary setting, struggle to recognize parts as effectively as objects. To comprehensively investigate and tackle these challenges, we propose an Open-Vocabulary Part Segmentation (OV-PARTS) benchmark. OV-PARTS includes refined versions of two publicly available datasets: Pascal-Part-116 and ADE20K-Part-234. And it covers three specific tasks: Generalized Zero-Shot Part Segmentation, Cross-Dataset Part Segmentation, and Few-Shot Part Segmentation, providing insights into analogical reasoning, open granularity and few-shot adapting abilities of models. Moreover, we analyze and adapt two prevailing paradigms of existing object-level OVSS methods for OV-PARTS. Extensive experimental analysis is conducted to inspire future research in leveraging foundational models for OV-PARTS. The code and dataset are available at https://github.com/kellyiss/OV_PARTS.", "keywords": "Open vocabulary;semantic segmentation;part segmentation;foundation model", "primary_area": "", "supplementary_material": "/attachment/6b7aad200fa180990d7ca09fd1cf807379b4be82.pdf", "author": "Meng Wei;Xiaoyu Yue;Wenwei Zhang;Shu Kong;Xihui Liu;Jiangmiao Pang", "authorids": "~Meng_Wei7;~Xiaoyu_Yue1;~Wenwei_Zhang1;~Shu_Kong1;~Xihui_Liu1;~Jiangmiao_Pang1", "gender": "M;M;M;F;M;F", "homepage": "https://github.com/yuexy;https://zhangwenwei.cn;https://aimerykong.github.io/;https://xh-liu.github.io/;https://oceanpang.github.io/;https://github.com/kellyiss", "dblp": "220/3383;;26/11141;184/3911;231/7630;", "google_scholar": "9Fc5FY0AAAAJ;QDXADSEAAAAJ;sm9FdLoAAAAJ;https://scholar.google.com.hk/citations?user=4YL23GMAAAAJ;https://scholar.google.com/citations?authuser=0;Wx8ChLcAAAAJ", "orcid": ";0000-0002-2748-4514;0000-0002-1362-5937;0000-0003-1831-9952;0000-0002-6711-9319;", "linkedin": ";wenweizhang-b9769a124/;aimerykong/;;;", "or_profile": "~Xiaoyu_Yue1;~Wenwei_Zhang1;~Shu_Kong1;~Xihui_Liu1;~Jiangmiao_Pang1;~MENG_WEI6", "aff": "University of Sydney;Nanyang Technological University;Texas A&M University - College Station;University of Hong Kong;Shanghai AI Laboratory ;Shanghai AI Laboratory", "aff_domain": "usyd.edu.au;ntu.edu.sg;tamu.edu;hku.hk;pjlab.org.cn;pjlab.org.cn", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor;Research Scientist;Intern", "bibtex": "@inproceedings{\nwei2023ovparts,\ntitle={{OV}-{PARTS}: Towards Open-Vocabulary Part Segmentation},\nauthor={Meng Wei and Xiaoyu Yue and Wenwei Zhang and Shu Kong and Xihui Liu and Jiangmiao Pang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=EFl8zjjXeX}\n}", "github": "", "project": "", "reviewers": "v7gi;VdPY;osrm;cZ7P;r29M;NFDP;cNC2", "pdf_size": 3811739, "rating": "6;6;6;6;6;6;7", "confidence": "3;5;4;4;4;4;3", "wc_summary_and_contributions": "42;62;86;88;39;97;70", "wc_strengths": "63;75;43;26;39;80;52", "wc_improvement": "3;137;241;37;79;210;485", "wc_limitations": "2;5;1;1;44;64;4", "wc_correctness": "15;1;1;5;1;27;41", "wc_clarity": "1;5;1;6;1;27;78", "wc_relation_to_prior_work": "8;10;1;4;1;50;1", "wc_documentation": "11;1;46;12;9;19;1", "wc_additional_feedback": "1;1;1;1;1;1;1", "wc_review": "146;297;421;180;214;575;733", "wc_reply_reviewers": "0;0;0;14;0;148;380", "wc_reply_authors": "549;555;683;55;652;1203;1301", "reply_reviewers": "0;0;0;1;0;1;2", "reply_authors": "1;1;1;1;1;3;3", "rating_avg": [ 6.142857142857143, 0.3499271061118826 ], "confidence_avg": [ 3.857142857142857, 0.6388765649999398 ], "wc_summary_and_contributions_avg": [ 69.14285714285714, 21.08776606355022 ], "wc_strengths_avg": [ 54.0, 18.252201432782215 ], "wc_improvement_avg": [ 170.28571428571428, 151.56961086832516 ], "wc_limitations_avg": [ 17.285714285714285, 23.86697829766526 ], "wc_correctness_avg": [ 13.0, 14.540583599999398 ], "wc_clarity_avg": [ 17.0, 26.333031945231287 ], "wc_relation_to_prior_work_avg": [ 10.714285714285714, 16.38565823878819 ], "wc_documentation_avg": [ 14.142857142857142, 14.267130769995882 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 366.57142857142856, 204.32946584436837 ], "wc_reply_reviewers_avg": [ 77.42857142857143, 133.38114788921595 ], "wc_reply_authors_avg": [ 714.0, 391.9333033638687 ], "reply_reviewers_avg": [ 0.5714285714285714, 0.7284313590846836 ], "reply_authors_avg": [ 1.5714285714285714, 0.9035079029052512 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5477225575051662, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17128281513931679276&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "usyd.edu.au;ntu.edu.sg;tamu.edu;hku.hk;pjlab.org.cn;pjlab.org.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;4;4", "aff_unique_norm": "University of Sydney;Nanyang Technological University;Texas A&M University;University of Hong Kong;Shanghai AI Laboratory", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.sydney.edu.au;https://www.ntu.edu.sg;https://www.tamu.edu;https://www.hku.hk;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "USYD;NTU;TAMU;HKU;SAIL", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";College Station;Hong Kong SAR", "aff_country_unique_index": "0;1;2;3;3;3", "aff_country_unique": "Australia;Singapore;United States;China" }, { "title": "A Long $N$-step Surrogate Stage Reward for Deep Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72325", "id": "EGfYnTyEGv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/29ef811e72b2b97cf18dd5d866b0f472-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EGfYnTyEGv", "openreview": "https://openreview.net/forum?id=EGfYnTyEGv", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72325", "video": "https://nips.cc/virtual/2023/poster/72325", "author_site": "Junmin Zhong, Ruofan Wu, Jennie Si", "tldr": "", "abstract": "We introduce a new stage reward estimator named the long $N$-step surrogate stage (LNSS) reward for deep reinforcement learning (RL). It aims at mitigating the high variance problem, which has shown impeding successful convergence of learning, hurting task performance, and hindering applications of deep RL in continuous control problems. In this paper we show that LNSS, which utilizes a long reward trajectory of rewards of future steps, provides consistent performance improvement measured by average reward, convergence speed, learning success rate,and variance reduction in $Q$ values and rewards. Our evaluations are based on a variety of environments in DeepMind Control Suite and OpenAI Gym by using LNSS in baseline deep RL algorithms such as DDPG, D4PG, and TD3. We show that LNSS reward has enabled good results that have been challenging to obtain by deep RL previously. Our analysis also shows that LNSS exponentially reduces the upper bound on the variances of $Q$ values from respective single-step methods.", "keywords": "Deep reinforcement learning;Reward Estimation", "primary_area": "", "supplementary_material": "/attachment/511cb9324e23c22688012572cada7e1e2b6c6efd.pdf", "author": "Junmin Zhong;Ruofan Wu;Jennie Si", "authorids": "~Junmin_Zhong1;~Ruofan_Wu3;~Jennie_Si1", "gender": "M;M;", "homepage": "https://isearch.asu.edu/profile/2788545;;", "dblp": "316/9584;;", "google_scholar": "uVv_eWQAAAAJ;TrevQ1MAAAAJ;", "orcid": ";0000-0003-4438-0191;", "linkedin": ";;", "or_profile": "~Junmin_Zhong1;~Ruofan_Wu3;~Jennie_Si1", "aff": "Arizona State University;Arizona State University;", "aff_domain": "asu.edu;asu.edu;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\nzhong2023a,\ntitle={A Long \\$N\\$-step Surrogate Stage Reward for Deep Reinforcement Learning},\nauthor={Junmin Zhong and Ruofan Wu and Jennie Si},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EGfYnTyEGv}\n}", "github": "", "project": "", "reviewers": "ektB;dTq6;r58G;TX4g;PJwS", "pdf_size": 20792572, "rating": "4;4;5;7;7", "confidence": "4;5;3;3;3", "soundness": "2;1;2;3;3", "novelty": "2;3;2;3;3", "presentation": "2;4;3;4;3", "wc_summary": "138;46;128;105;50", "wc_strengths": "27;55;29;78;66", "wc_weaknesses": "58;319;123;90;16", "wc_questions": "22;9;53;82;226", "wc_limitations": "6;0;1;7;22", "wc_review": "251;429;334;362;380", "wc_reply_reviewers": "0;144;78;0;15", "wc_reply_authors": "0;2404;1732;0;53", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;6;5;1;2", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 93.4, 38.60362677262332 ], "wc_strengths_avg": [ 51.0, 20.149441679609886 ], "wc_weaknesses_avg": [ 121.2, 105.03599383068645 ], "wc_questions_avg": [ 78.4, 78.02717475341524 ], "wc_limitations_avg": [ 7.2, 7.884161337770809 ], "wc_review_avg": [ 351.2, 58.8808967323019 ], "wc_reply_reviewers_avg": [ 47.4, 56.23379766652791 ], "wc_reply_authors_avg": [ 837.8, 1026.8694951161028 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 3.0, 2.0976176963403033 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7740702698132101, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3489615695834681590&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "asu.edu;asu.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Arizona State University", "aff_unique_dep": "", "aff_unique_url": "https://www.asu.edu", "aff_unique_abbr": "ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Neural Graph Generation from Graph Statistics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72324", "id": "EI6BHFKA5p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/72153267883fbcafdb6e4662382696c5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EI6BHFKA5p", "openreview": "https://openreview.net/forum?id=EI6BHFKA5p", "poster": "/media/PosterPDFs/NeurIPS%202023/72324.png?t=1702063812.3445587", "slides": "https://nips.cc/virtual/2023/poster/72324", "video": "https://nips.cc/virtual/2023/poster/72324", "author_site": "Kiarash Zahirnia, Yaochen Hu, Mark Coates, Oliver Schulte", "tldr": "", "abstract": "We describe a new setting for learning a deep graph generative model (GGM) from aggregate graph statistics, rather than from the graph adjacency matrix. Matching the statistics of observed training graphs is the main approach for learning traditional GGMs (e.g, BTER, Chung-Lu, and Erdos-Renyi models). Privacy researchers have proposed learning from graph statistics as a way to protect privacy. \nWe develop an architecture for training a deep GGM to match statistics while preserving local differential privacy guarantees. Empirical evaluation on 8 datasets indicates that our deep GGM model generates more realistic graphs than the traditional GGMs when both are learned from graph statistics only. We also benchmark our deep GGM trained on statistics only, against state-of-the-art deep GGM models that are trained on the entire adjacency matrix. The results show that graph statistics are often sufficient to build a competitive deep GGM that generates realistic graphs while protecting local privacy.", "keywords": "Graph Generation;Local Differential Privacy;Graph Statistics;Latent Adjacency Matrix", "primary_area": "", "supplementary_material": "/attachment/bf5641eed076b6f49c70aa3dfdf9d57622300c5a.pdf", "author": "Kiarash Zahirnia;Yaochen Hu;Mark Coates;Oliver Schulte", "authorids": "~Kiarash_Zahirnia2;~Yaochen_Hu1;~Mark_Coates1;~Oliver_Schulte1", "gender": "M;M;M;M", "homepage": "https://www.linkedin.com/in/kzahirni/;https://hyclex.github.io/;http://www.ece.mcgill.ca/~mcoate/;http://www.cs.sfu.ca/~oschulte/", "dblp": "190/1748;143/4817-1;c/MarkCoates;s/OliverSchulte", "google_scholar": "CynXyykAAAAJ;VMwM-ZwAAAAJ;https://scholar.google.ca/citations?user=qxWORNoAAAAJ;", "orcid": ";;0000-0001-5030-1379;", "linkedin": "kzahirni/;;;", "or_profile": "~Kiarash_Zahirnia2;~Yaochen_Hu1;~Mark_Coates1;~Oliver_Schulte1", "aff": "Simon Fraser University;Huawei Technologies Ltd.;McGill University;Simon Fraser University", "aff_domain": "sfu.ca;huawei.com;mcgill.ca;sfu.ca", "position": "PhD student;Principal Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzahirnia2023neural,\ntitle={Neural Graph Generation from Graph Statistics},\nauthor={Kiarash Zahirnia and Yaochen Hu and Mark Coates and Oliver Schulte},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EI6BHFKA5p}\n}", "github": "", "project": "", "reviewers": "hsws;bJqH;ijbs;92Y3", "pdf_size": 3248321, "rating": "4;5;5;7", "confidence": "3;4;4;4", "soundness": "2;3;2;4", "novelty": "2;3;2;4", "presentation": "3;4;2;4", "wc_summary": "73;35;40;178", "wc_strengths": "33;30;43;92", "wc_weaknesses": "157;123;209;155", "wc_questions": "3;49;30;52", "wc_limitations": "1;2;17;7", "wc_review": "267;239;339;484", "wc_reply_reviewers": "480;0;326;71", "wc_reply_authors": "1591;0;398;10", "reply_reviewers": "3;0;2;1", "reply_authors": "5;1;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 81.5, 57.5955727465228 ], "wc_strengths_avg": [ 49.5, 25.004999500099974 ], "wc_weaknesses_avg": [ 161.0, 30.822070014844883 ], "wc_questions_avg": [ 33.5, 19.525624189766635 ], "wc_limitations_avg": [ 6.75, 6.339361166552983 ], "wc_review_avg": [ 332.25, 94.90356947976193 ], "wc_reply_reviewers_avg": [ 219.25, 193.28395561970476 ], "wc_reply_authors_avg": [ 499.75, 650.1508959464718 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14127522237470484969&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sfu.ca;huawei.com;mcgill.ca;sfu.ca", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Simon Fraser University;Huawei;McGill University", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.sfu.ca;https://www.huawei.com;https://www.mcgill.ca", "aff_unique_abbr": "SFU;Huawei;McGill", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Canada;China" }, { "title": "PTADisc: A Cross-Course Dataset Supporting Personalized Learning in Cold-Start Scenarios", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73649", "id": "EIydMrHBHP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8cf04c64d1734e5f7e63418a2a4d49de-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=EIydMrHBHP", "openreview": "https://openreview.net/forum?id=EIydMrHBHP", "poster": "/media/PosterPDFs/NeurIPS%202023/73649.png?t=1701872961.6835167", "slides": "https://nips.cc/virtual/2023/poster/73649", "video": "https://nips.cc/virtual/2023/poster/73649", "author_site": "Liya Hu, Zhiang Dong, Jingyuan Chen, Guifeng Wang, Zhihua Wang, Zhou Zhao, Fei Wu", "tldr": "", "abstract": "The focus of our work is on diagnostic tasks in personalized learning, such as cognitive diagnosis and knowledge tracing. The goal of these tasks is to assess students' latent proficiency on knowledge concepts through analyzing their historical learning records. However, existing research has been limited to single-course scenarios; cross-course studies have not been explored due to a lack of dataset. We address this issue by constructing PTADisc, a Diverse, Immense, Student-centered dataset that emphasizes its sufficient Cross-course information for personalized learning. PTADisc includes 74 courses, 1,530,100 students, 4,054 concepts, 225,615 problems, and over 680 million student response logs. Based on PTADisc, we developed a model-agnostic Cross-Course Learner Modeling Framework (CCLMF) which utilizes relationships between students' proficiency across courses to alleviate the difficulty of diagnosing student knowledge state in cold-start scenarios. CCLMF uses a meta network to generate personalized mapping functions between courses. The experimental results on PTADisc verify the effectiveness of CCLMF with an average improvement of 4.2% on AUC. We also report the performance of baseline models for cognitive diagnosis and knowledge tracing over PTADisc, demonstrating that our dataset supports a wide scope of research in personalized learning. Additionally, PTADisc contains valuable programming logs and student-group information that are worth exploring in the future.", "keywords": "Personalized Learning;Cognitive Diagnosis;Knowlege Tracing;Open-Access Datasets;Cross-Course Learner Modeling", "primary_area": "", "supplementary_material": "/attachment/a7aedcfc17a55780620e48eb98b8fdf57105b5b7.pdf", "author": "Liya Hu;Zhiang Dong;Jingyuan Chen;Guifeng Wang;Zhihua Wang;Zhou Zhao;Fei Wu", "authorids": "~Liya_Hu1;~Zhiang_Dong2;~Jingyuan_Chen3;~Guifeng_Wang1;~Zhihua_Wang4;~Zhou_Zhao3;~Fei_Wu1", "gender": ";M;;;M;;M", "homepage": ";;;;;;https://person.zju.edu.cn/wufei", "dblp": ";;;;;;84/3254-1", "google_scholar": ";;;;oDdO4JIAAAAJ;;XJLn4MYAAAAJ", "orcid": ";;;;0000-0002-1593-1321;;", "linkedin": ";zhiang-dong-2a9aa11b5/;;;;;", "or_profile": "~Liya_Hu1;~Zhiang_Dong2;~Jingyuan_Chen3;~Guifeng_Wang1;~Zhihua_Wang4;~Zhou_Zhao3;~Fei_Wu1", "aff": ";Zhejiang University;;;Shanghai Institute for Advanced Study of Zhejiang University;;Zhejiang University", "aff_domain": ";zju.edu.cn;;;zju.edu.cn;;zju.edu.cn", "position": ";PhD student;;;Researcher;;Full Professor", "bibtex": "@inproceedings{\nhu2023ptadisc,\ntitle={{PTAD}isc: A Cross-Course Dataset Supporting Personalized Learning in Cold-Start Scenarios},\nauthor={Liya Hu and Zhiang Dong and Jingyuan Chen and Guifeng Wang and Zhihua Wang and Zhou Zhao and Fei Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=EIydMrHBHP}\n}", "github": "", "project": "", "reviewers": "T6X2;G8A9;SZNR;ToYw;ieLM", "pdf_size": 2904321, "rating": "4;6;8;8;9", "confidence": "4;2;4;4;4", "wc_summary_and_contributions": "41;56;81;103;183", "wc_strengths": "43;54;46;52;93", "wc_improvement": "3;83;1;52;62", "wc_limitations": "224;55;16;41;32", "wc_correctness": "3;55;3;1;45", "wc_clarity": "6;54;85;1;5", "wc_relation_to_prior_work": "10;11;7;5;39", "wc_documentation": "2;50;8;1;7", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "333;419;248;257;467", "wc_reply_reviewers": "117;184;0;10;33", "wc_reply_authors": "1627;2613;248;594;475", "reply_reviewers": "1;2;0;1;1", "reply_authors": "4;6;1;1;1", "rating_avg": [ 7.0, 1.7888543819998317 ], "confidence_avg": [ 3.6, 0.8000000000000002 ], "wc_summary_and_contributions_avg": [ 92.8, 49.83332218506006 ], "wc_strengths_avg": [ 57.6, 18.13945974939717 ], "wc_improvement_avg": [ 40.2, 32.7621733100843 ], "wc_limitations_avg": [ 73.6, 76.2590322519241 ], "wc_correctness_avg": [ 21.4, 23.5762592452662 ], "wc_clarity_avg": [ 30.2, 33.59404709170957 ], "wc_relation_to_prior_work_avg": [ 14.4, 12.483589227461788 ], "wc_documentation_avg": [ 13.6, 18.402173784637508 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 344.8, 86.78340855255686 ], "wc_reply_reviewers_avg": [ 68.8, 70.80508456318655 ], "wc_reply_authors_avg": [ 1111.4, 887.5599360043241 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.6, 2.0591260281974 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.27950849718747367, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17573776349725534826&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";zju.edu.cn;;;zju.edu.cn;;zju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shanghai", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "AUDIT: Audio Editing by Following Instructions with Latent Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72323", "id": "EO1KuHoR0V", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e1b619a9e241606a23eb21767f16cf81-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EO1KuHoR0V", "openreview": "https://openreview.net/forum?id=EO1KuHoR0V", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72323", "video": "https://nips.cc/virtual/2023/poster/72323", "author_site": "Yuancheng Wang, Zeqian Ju, Xu Tan, Lei He, Zhizheng Wu, Jiang Bian, sheng zhao", "tldr": "", "abstract": "Audio editing is applicable for various purposes, such as adding background sound effects, replacing a musical instrument, and repairing damaged audio. Recently, some diffusion-based methods achieved zero-shot audio editing by using a diffusion and denoising process conditioned on the text description of the output audio. However, these methods still have some problems: 1) they have not been trained on editing tasks and cannot ensure good editing effects; 2) they can erroneously modify audio segments that do not require editing; 3) they need a complete description of the output audio, which is not always available or necessary in practical scenarios. In this work, we propose AUDIT, an instruction-guided audio editing model based on latent diffusion models. Specifically, \\textbf{AUDIT} has three main design features: 1) we construct triplet training data (instruction, input audio, output audio) for different audio editing tasks and train a diffusion model using instruction and input (to be edited) audio as conditions and generating output (edited) audio; 2) it can automatically learn to only modify segments that need to be edited by comparing the difference between the input and output audio; 3) it only needs edit instructions instead of full target audio descriptions as text input. AUDIT achieves state-of-the-art results in both objective and subjective metrics for several audio editing tasks (e.g., adding, dropping, replacement, inpainting, super-resolution). Demo samples are available at https://audit-demopage.github.io/.", "keywords": "audio editing;text-to-audio generation;diffusion models", "primary_area": "", "supplementary_material": "/attachment/c7e9787c7297fac84cf1d8591026c206b7a23941.zip", "author": "Yuancheng Wang;Zeqian Ju;Xu Tan;Lei He;Zhizheng Wu;Jiang Bian;sheng zhao", "authorids": "~Yuancheng_Wang1;~Zeqian_Ju1;~Xu_Tan1;~Lei_He6;wuzhizheng@cuhk.edu.cn;~Jiang_Bian1;~sheng_zhao1", "gender": "M;Not Specified;M;M;;M;M", "homepage": "https://hecheng0625.github.io/;;https://tan-xu.github.io/;;;https://sites.google.com/view/jiangbian;https://www.aaai.org/ojs/index.php/AAAI/article/view/4642", "dblp": "199/2310;262/3979;96/10484-3;;;09/851-2.html;", "google_scholar": "60uamz4AAAAJ;uN1JaDEAAAAJ;tob-U1oAAAAJ;EKl9yY8AAAAJ;;pZBEnY8AAAAJ;689bIIwAAAAJ", "orcid": ";;0000-0001-5631-0639;;;0000-0002-9472-600X;", "linkedin": ";;;;;jbian/;", "or_profile": "~Yuancheng_Wang1;~Zeqian_Ju1;~Xu_Tan1;~Lei_He6;wuzhizheng@cuhk.edu.cn;~Jiang_Bian1;~sheng_zhao1", "aff": "The Chinese University of Hong Kong, Shenzhen;Microsoft;Microsoft;Microsoft;;Microsoft;Microsoft", "aff_domain": "cuhk.edu.cn;microsoft.com;microsoft.com;microsoft.com;;microsoft.com;microsoft.com", "position": "Undergrad student;Intern;Principal Researcher;Principal Scientist Manager;;Partner Research Manager;Researcher", "bibtex": "@inproceedings{\nwang2023audit,\ntitle={{AUDIT}: Audio Editing by Following Instructions with Latent Diffusion Models},\nauthor={Yuancheng Wang and Zeqian Ju and Xu Tan and Lei He and Zhizheng Wu and Jiang Bian and sheng zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EO1KuHoR0V}\n}", "github": "", "project": "", "reviewers": "ptM3;LhWX;XrSg;ujZe", "pdf_size": 1781701, "rating": "4;5;5;8", "confidence": "4;5;4;4", "soundness": "3;4;3;3", "novelty": "2;4;2;3", "presentation": "2;4;3;3", "wc_summary": "10;74;64;54", "wc_strengths": "13;78;63;81", "wc_weaknesses": "238;1;139;170", "wc_questions": "29;42;1;57", "wc_limitations": "1;1;1;90", "wc_review": "291;196;268;452", "wc_reply_reviewers": "165;0;32;59", "wc_reply_authors": "152;18;50;45", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;3;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 50.5, 24.428467000612216 ], "wc_strengths_avg": [ 58.75, 27.279800219209818 ], "wc_weaknesses_avg": [ 137.0, 86.29889918185515 ], "wc_questions_avg": [ 32.25, 20.58367071248469 ], "wc_limitations_avg": [ 23.25, 38.53813046840752 ], "wc_review_avg": [ 301.75, 93.55847102213674 ], "wc_reply_reviewers_avg": [ 64.0, 61.9394865978077 ], "wc_reply_authors_avg": [ 66.25, 50.98222729540168 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14443585611343470852&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cuhk.edu.cn;microsoft.com;microsoft.com;microsoft.com;;microsoft.com;microsoft.com", "author_num": 7, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Chinese University of Hong Kong;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "CUHK;Microsoft", "aff_campus_unique_index": "0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Towards Federated Foundation Models: Scalable Dataset Pipelines for Group-Structured Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73648", "id": "EPz1DcdPVE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/662bb9c4dcc96aeaac8e7cd3fc6a0add-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=EPz1DcdPVE", "openreview": "https://openreview.net/forum?id=EPz1DcdPVE", "poster": "/media/PosterPDFs/NeurIPS%202023/73648.png?t=1702304575.1711938", "slides": "https://nips.cc/virtual/2023/poster/73648", "video": "https://nips.cc/virtual/2023/poster/73648", "author_site": "Zachary Charles, Nicole Mitchell, Krishna Pillutla, Michael Reneer, Zachary Garrett", "tldr": "", "abstract": "We introduce Dataset Grouper, a library to create large-scale group-structured (e.g., federated) datasets, enabling federated learning simulation at the scale of foundation models. This library facilitates the creation of group-structured versions of existing datasets based on user-specified partitions, and directly leads to a variety of useful heterogeneous datasets that can be plugged into existing software frameworks. Dataset Grouper offers three key advantages. First, it scales to settings where even a single group's dataset is too large to fit in memory. Second, it provides flexibility, both in choosing the base (non-partitioned) dataset and in defining partitions. Finally, it is framework-agnostic. We empirically demonstrate that Dataset Grouper enables large-scale federated language modeling simulations on datasets that are orders of magnitude larger than in previous work, allowing for federated training of language models with hundreds of millions, and even billions, of parameters. Our experimental results show that algorithms like FedAvg operate more as meta-learning methods than as empirical risk minimization methods at this scale, suggesting their utility in downstream personalization and task-specific adaptation. Dataset Grouper is available at https://github.com/google-research/dataset_grouper.", "keywords": "federated learning;datasets;foundation models;large language models;meta-learning", "primary_area": "", "supplementary_material": "", "author": "Zachary Charles;Nicole Elyse Mitchell;Krishna Pillutla;Michael Reneer;Zachary Garrett", "authorids": "~Zachary_Charles1;~Nicole_Elyse_Mitchell1;~Krishna_Pillutla1;michaelreneer@google.com;~Zachary_Garrett1", "gender": ";F;M;;M", "homepage": ";https://nicolemitchell.github.io/;https://krishnap25.github.io;;", "dblp": ";92/5737;173/5185.html;;255/5493", "google_scholar": ";https://scholar.google.co.uk/citations?user=IRQIuDQAAAAJ;IL7N6sMAAAAJ;;-M22wckAAAAJ", "orcid": ";0000-0001-9539-6674;;;0000-0001-8158-3997", "linkedin": ";nicole-mitchell/;;;zacharygarrett/", "or_profile": "~Zachary_Charles1;~Nicole_Elyse_Mitchell1;~Krishna_Pillutla1;michaelreneer@google.com;~Zachary_Garrett1", "aff": ";Research, Google;Google;;Research, Google", "aff_domain": ";research.google.com;google.com;;research.google.com", "position": ";Researcher;Visiting Researcher;;Researcher", "bibtex": "@inproceedings{\ncharles2023towards,\ntitle={Towards Federated Foundation Models: Scalable Dataset Pipelines for Group-Structured Learning},\nauthor={Zachary Charles and Nicole Elyse Mitchell and Krishna Pillutla and Michael Reneer and Zachary Garrett},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=EPz1DcdPVE}\n}", "github": "", "project": "", "reviewers": "wrGw;Pkpg;njkF;YDTj;T2tv", "pdf_size": 812422, "rating": "5;6;7;8;8", "confidence": "4;4;3;4;4", "wc_summary_and_contributions": "126;57;65;65;42", "wc_strengths": "47;18;61;50;81", "wc_improvement": "366;47;75;76;54", "wc_limitations": "1;96;33;60;1", "wc_correctness": "8;12;4;10;7", "wc_clarity": "19;13;35;6;5", "wc_relation_to_prior_work": "34;1;4;5;18", "wc_documentation": "22;10;3;12;4", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "624;255;281;285;213", "wc_reply_reviewers": "0;0;29;23;6", "wc_reply_authors": "1322;901;551;257;160", "reply_reviewers": "0;0;1;1;1", "reply_authors": "3;3;1;1;1", "rating_avg": [ 6.8, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 71.0, 28.754130138121027 ], "wc_strengths_avg": [ 51.4, 20.51925924588897 ], "wc_improvement_avg": [ 123.6, 121.7351222942664 ], "wc_limitations_avg": [ 38.2, 36.36151812012255 ], "wc_correctness_avg": [ 8.2, 2.7129319932501073 ], "wc_clarity_avg": [ 15.6, 10.947145746723207 ], "wc_relation_to_prior_work_avg": [ 12.4, 12.273548794053008 ], "wc_documentation_avg": [ 10.2, 6.823488843692792 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 331.6, 148.43395837880223 ], "wc_reply_reviewers_avg": [ 11.6, 12.109500402576483 ], "wc_reply_authors_avg": [ 638.2, 428.5414332360408 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.08574929257125444, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12874677529329962466&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";research.google.com;google.com;;research.google.com", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Byzantine-Tolerant Methods for Distributed Variational Inequalities", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72322", "id": "ER0bcYXvvo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5a5e9197ea547141b4977a5a198bbaac-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ER0bcYXvvo", "openreview": "https://openreview.net/forum?id=ER0bcYXvvo", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72322", "video": "https://nips.cc/virtual/2023/poster/72322", "author_site": "Nazarii Tupitsa, Abdulla Jasem Almansoori, Yanlin Wu, Martin Takac, Karthik Nandakumar, Samuel Horv\u00e1th, Eduard Gorbunov", "tldr": "", "abstract": "Robustness to Byzantine attacks is a necessity for various distributed training scenarios. When the training reduces to the process of solving a minimization problem, Byzantine robustness is relatively well-understood. However, other problem formulations, such as min-max problems or, more generally, variational inequalities, arise in many modern machine learning and, in particular, distributed learning tasks. These problems significantly differ from the standard minimization ones and, therefore, require separate consideration. Nevertheless, only one work [Abidi et al., 2022] addresses this important question in the context of Byzantine robustness. Our work makes a further step in this direction by providing several (provably) Byzantine-robust methods for distributed variational inequality, thoroughly studying their theoretical convergence, removing the limitations of the previous work, and providing numerical comparisons supporting the theoretical findings.", "keywords": "byzantine robustness;variational inequalities;min-max problems", "primary_area": "", "supplementary_material": "/attachment/8c7ee66ccf5c2cdb20c50c444e2299561a6ee34b.pdf", "author": "Nazarii Tupitsa;Abdulla Jasem Almansoori;Yanlin Wu;Martin Tak\u00e1\u010d;Karthik Nandakumar;Samuel Horv\u00e1th;Eduard Gorbunov", "authorids": "~Nazarii_Tupitsa1;~Abdulla_Jasem_Almansoori1;~Yanlin_Wu1;~Martin_Tak\u00e1\u010d1;~Karthik_Nandakumar3;~Samuel_Horv\u00e1th1;~Eduard_Gorbunov1", "gender": ";M;F;;;M;M", "homepage": ";;https://www.notion.so/Yanlin-Wu-18494aefd7f94e3e89f82422701bc6fc;;;https://sites.google.com/view/samuelhorvath;https://eduardgorbunov.github.io", "dblp": "234/8907;322/2190;;;;234/8604;215/5512.html", "google_scholar": "5siCuX4AAAAJ;J7Qf4ZsAAAAJ;;;;k252J7kAAAAJ;https://scholar.google.ru/citations?user=85j2RqQAAAAJ", "orcid": "0000-0002-9005-0129;0000-0002-3373-2043;;;;0000-0003-0619-9260;", "linkedin": ";;;;;samuel-horvath/;", "or_profile": "~Nazarii_Tupitsa1;~Abdulla_Jasem_Almansoori1;~Yanlin_Wu1;~Martin_Tak\u00e1\u010d1;~Karthik_Nandakumar3;~Samuel_Horv\u00e1th1;~Eduard_Gorbunov1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;;;MBZUAI;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;;;mbzuai.ac.ae;mbzuai.ac.ae", "position": "Researcher;PhD student;MS student;;;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\ntupitsa2023byzantinetolerant,\ntitle={Byzantine-Tolerant Methods for Distributed Variational Inequalities},\nauthor={Nazarii Tupitsa and Abdulla Jasem Almansoori and Yanlin Wu and Martin Tak{\\'a}{\\v{c}} and Karthik Nandakumar and Samuel Horv{\\'a}th and Eduard Gorbunov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ER0bcYXvvo}\n}", "github": "", "project": "", "reviewers": "zd6L;vExH;AxmQ;RWvG", "pdf_size": 2946698, "rating": "5;6;7;7", "confidence": "3;4;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "55;16;45;79", "wc_strengths": "49;9;138;82", "wc_weaknesses": "57;255;188;30", "wc_questions": "31;195;5;17", "wc_limitations": "4;1;1;5", "wc_review": "196;476;377;213", "wc_reply_reviewers": "0;35;36;0", "wc_reply_authors": "0;25;30;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 48.75, 22.587330519563395 ], "wc_strengths_avg": [ 69.5, 47.24669300596604 ], "wc_weaknesses_avg": [ 132.5, 92.5918462932887 ], "wc_questions_avg": [ 62.0, 77.33692520394123 ], "wc_limitations_avg": [ 2.75, 1.7853571071357126 ], "wc_review_avg": [ 315.5, 116.542910552294 ], "wc_reply_reviewers_avg": [ 17.75, 17.75352077758099 ], "wc_reply_authors_avg": [ 13.75, 13.863170633011771 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jM_IweHi8C4J:scholar.google.com/&scioq=Byzantine-Tolerant+Methods+for+Distributed+Variational+Inequalities&hl=en&as_sdt=0,33", "gs_version_total": 8, "email": "mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;;;mbzuai.ac.ae;mbzuai.ac.ae", "author_num": 7, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": "", "aff_unique_url": "https://mbzuai.ac.ae", "aff_unique_abbr": "MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Arab Emirates" }, { "title": "H3T: Efficient Integration of Memory Optimization and Parallelism for Large-scale Transformer Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72321", "id": "ES32O8mBK3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7886b89aced4d37dd25a6f32854bf3f9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ES32O8mBK3", "openreview": "https://openreview.net/forum?id=ES32O8mBK3", "poster": "/media/PosterPDFs/NeurIPS%202023/72321.png?t=1701401856.7464926", "slides": "https://nips.cc/virtual/2023/poster/72321", "video": "https://nips.cc/virtual/2023/poster/72321", "author_site": "Yuzhong Wang, Xu Han, Weilin Zhao, Guoyang Zeng, Zhiyuan Liu, Maosong Sun", "tldr": "", "abstract": "In recent years, big models based on Transformers have achieved state-of-the-art performance on many artificial intelligence (AI) tasks.\nDespite the success of these Transformer-based models, their huge parameter size poses a serious challenge to their training, both from the storage and computation perspectives.\nTo this end, memory optimization (e.g., rematerialization and offloading) and parallelism (e.g., data parallelism and model parallelism) are widely explored to make training Transformers more efficient.\nIn this paper, we propose a framework to automatically find an efficient integration of memory optimization and parallelism for High-Throughput Transformer Training (named H3T), which is rarely considered by existing efforts for training big Transformer-based models.\nSpecifically, we design search algorithms to combine appropriate memory optimization strategies and parallelism schemes to achieve a balance between memory overhead and training efficiency.\nWe implement H3T based on an open-source toolkit BMTrain and then use H3T to train the Transformers of different sizes to evaluate the efficiency of H3T.\nThe experimental results show that H3T outperforms the most popular deep learning (DL) toolkit Megatron-DeepSpeed by $1.2\\times \\sim 4.3\\times$ training speed while reducing $34.6\\% \\sim 80.5\\%$ of memory overhead.\nMoreover, H3T can use only 64 NVIDIA A100 GPUs to train GPT-3-175B, which is very difficult for existing DL toolkits. The source code is available at https://github.com/OpenBMB/BMTrain/tree/h3t.", "keywords": "ML System;Parallelism Learning;Memory Optimization;Data Parallelism;Model Parallelism;Parameter Parallelism;ZeRO;Rematerialization;Checkpointing;Tensor Offloading;Dynamic Programming", "primary_area": "", "supplementary_material": "", "author": "Yuzhong Wang;Xu Han;Weilin Zhao;Guoyang Zeng;Zhiyuan Liu;Maosong Sun", "authorids": "~Yuzhong_Wang2;~Xu_Han2;~Weilin_Zhao1;~Guoyang_Zeng1;~Zhiyuan_Liu1;~Maosong_Sun1", "gender": "M;;M;M;M;M", "homepage": ";;https://brawny-college-5b2.notion.site/Weilin-Zhao-11d20b7deb8280388213d5f5ed072992?pvs=4;https://github.com/a710128/;http://nlp.csai.tsinghua.edu.cn/~lzy;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm", "dblp": "68/3284;;197/5702.html;252/5581;53/3245-1;95/3291-1", "google_scholar": "-YNmXhgAAAAJ;;_CR92HUAAAAJ;;dT0v5u0AAAAJ;https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ", "orcid": ";;0000-0001-8016-1952;;0000-0002-7709-2543;", "linkedin": ";;;;;", "or_profile": "~Yuzhong_Wang2;~Xu_Han2;~Weilin_Zhao1;~Guoyang_Zeng1;~Zhiyuan_Liu1;~Maosong_Sun1", "aff": "Tsinghua University;;Tsinghua University;;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;;tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;;Undergrad student;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023ht,\ntitle={H3T: Efficient Integration of Memory Optimization and Parallelism for Large-scale Transformer Training},\nauthor={Yuzhong Wang and Xu Han and Weilin Zhao and Guoyang Zeng and Zhiyuan Liu and Maosong Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ES32O8mBK3}\n}", "github": "", "project": "", "reviewers": "xJUr;qHi5;QoRR;khhX", "pdf_size": 1302276, "rating": "4;5;6;6", "confidence": "4;3;4;3", "soundness": "2;2;3;3", "novelty": "3;2;2;3", "presentation": "3;2;3;3", "wc_summary": "148;15;95;75", "wc_strengths": "88;5;45;48", "wc_weaknesses": "110;5;105;134", "wc_questions": "237;258;56;80", "wc_limitations": "18;5;1;1", "wc_review": "601;288;302;338", "wc_reply_reviewers": "122;32;20;67", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.25, 47.58347927589995 ], "wc_strengths_avg": [ 46.5, 29.364093720052047 ], "wc_weaknesses_avg": [ 88.5, 49.439356791932475 ], "wc_questions_avg": [ 157.75, 90.45544483335428 ], "wc_limitations_avg": [ 6.25, 6.977642868476432 ], "wc_review_avg": [ 382.25, 127.60559352943741 ], "wc_reply_reviewers_avg": [ 60.25, 39.61297136040163 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4417032245806197931&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;;tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "PrimDiffusion: Volumetric Primitives Diffusion for 3D Human Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72320", "id": "ESCafo3oD5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2c575c088de5cfef858b8837251f3027-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ESCafo3oD5", "openreview": "https://openreview.net/forum?id=ESCafo3oD5", "poster": "/media/PosterPDFs/NeurIPS%202023/72320.png?t=1699339284.957017", "slides": "https://nips.cc/virtual/2023/poster/72320", "video": "https://nips.cc/virtual/2023/poster/72320", "author_site": "Zhaoxi Chen, Fangzhou Hong, Haiyi Mei, Guangcong Wang, Lei Yang, Ziwei Liu", "tldr": "", "abstract": "We present PrimDiffusion, the first diffusion-based framework for 3D human generation. Devising diffusion models for 3D human generation is difficult due to the intensive computational cost of 3D representations and the articulated topology of 3D humans. To tackle these challenges, our key insight is operating the denoising diffusion process directly on a set of volumetric primitives, which models the human body as a number of small volumes with radiance and kinematic information. This volumetric primitives representation marries the capacity of volumetric representations with the efficiency of primitive-based rendering. Our PrimDiffusion framework has three appealing properties: **1)** compact and expressive parameter space for the diffusion model, **2)** flexible representation that incorporates human prior, and **3)** decoder-free rendering for efficient novel-view and novel-pose synthesis. Extensive experiments validate that PrimDiffusion outperforms state-of-the-art methods in 3D human generation. Notably, compared to GAN-based methods, our PrimDiffusion supports real-time rendering of high-quality 3D humans at a resolution of $512\\times512$ once the denoising process is done. We also demonstrate the flexibility of our framework on training-free conditional generation such as texture transfer and 3D inpainting.", "keywords": "neural rendering;3D generative model;diffusion model;volumetric primitives;3D human generation", "primary_area": "", "supplementary_material": "/attachment/5140f761e4162b3f350b87db4306ee1ad02f3e57.zip", "author": "Zhaoxi Chen;Fangzhou Hong;Haiyi Mei;Guangcong Wang;Lei Yang;Ziwei Liu", "authorids": "~Zhaoxi_Chen1;~Fangzhou_Hong1;~Haiyi_Mei1;~Guangcong_Wang1;~Lei_Yang7;~Ziwei_Liu1", "gender": "M;M;M;M;M;M", "homepage": "https://frozenburning.github.io/;;;https://wanggcong.github.io/;https://www.yanglei.me;https://liuziwei7.github.io/", "dblp": "118/8512-9;261/3476;;211/7260;50/2484-45;05/6300-2", "google_scholar": "HsV0WbwAAAAJ;mhaiL5MAAAAJ;TOZ9wR4AAAAJ;dk8EnkoAAAAJ;jZH2IPYAAAAJ;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ", "orcid": "0000-0003-3998-7044;;;0000-0002-6627-814X;0000-0002-0571-5924;", "linkedin": ";;;;;", "or_profile": "~Zhaoxi_Chen1;~Fangzhou_Hong1;~Haiyi_Mei1;~Guangcong_Wang1;~Lei_Yang7;~Ziwei_Liu1", "aff": "Nanyang Technological University;Nanyang Technological University;SenseTime;Nanyang Technological University;Sensetime Ltd.;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;sensetime.com;ntu.edu.sg;sensetime.com;ntu.edu.sg", "position": "PhD student;PhD student;Researcher;Postdoc;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nchen2023primdiffusion,\ntitle={PrimDiffusion: Volumetric Primitives Diffusion for 3D Human Generation},\nauthor={Zhaoxi Chen and Fangzhou Hong and Haiyi Mei and Guangcong Wang and Lei Yang and Ziwei Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ESCafo3oD5}\n}", "github": "", "project": "", "reviewers": "GMsy;PWaT;wB9H;aNCP;oDDi", "pdf_size": 4459142, "rating": "4;5;5;6;6", "confidence": "4;5;5;4;4", "soundness": "2;3;3;3;3", "novelty": "2;3;2;3;4", "presentation": "3;3;3;3;3", "wc_summary": "80;152;157;72;90", "wc_strengths": "63;68;106;64;92", "wc_weaknesses": "572;128;378;287;239", "wc_questions": "8;108;87;65;99", "wc_limitations": "6;38;29;7;53", "wc_review": "729;494;757;495;573", "wc_reply_reviewers": "26;117;154;0;0", "wc_reply_authors": "139;510;407;0;0", "reply_reviewers": "1;1;2;0;0", "reply_authors": "2;2;2;1;1", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 110.2, 36.65187580465699 ], "wc_strengths_avg": [ 78.6, 17.315888657530692 ], "wc_weaknesses_avg": [ 320.8, 149.2506616400745 ], "wc_questions_avg": [ 73.4, 35.73569643927484 ], "wc_limitations_avg": [ 26.6, 18.11739495622922 ], "wc_review_avg": [ 609.6, 112.9771658345172 ], "wc_reply_reviewers_avg": [ 59.4, 63.93621821784581 ], "wc_reply_authors_avg": [ 211.2, 210.73338605925736 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.2182178902359924, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12505070390001100194&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "ntu.edu.sg;ntu.edu.sg;sensetime.com;ntu.edu.sg;sensetime.com;ntu.edu.sg", "author_num": 6, "aff_unique_index": "0;0;1;0;1;0", "aff_unique_norm": "Nanyang Technological University;SenseTime", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.sensetime.com", "aff_unique_abbr": "NTU;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1;0", "aff_country_unique": "Singapore;China" }, { "title": "AttrSeg: Open-Vocabulary Semantic Segmentation via Attribute Decomposition-Aggregation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72319", "id": "ESEM1lNoeS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2093ed77c549eda95bd6f7212b735b43-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ESEM1lNoeS", "openreview": "https://openreview.net/forum?id=ESEM1lNoeS", "poster": "/media/PosterPDFs/NeurIPS%202023/72319.png?t=1701919112.4641192", "slides": "https://nips.cc/virtual/2023/poster/72319", "video": "https://nips.cc/virtual/2023/poster/72319", "author_site": "Chaofan Ma, Yang Yuhuan, Chen Ju, Fei Zhang, Ya Zhang, Yanfeng Wang", "tldr": "", "abstract": "Open-vocabulary semantic segmentation is a challenging task that requires segmenting novel object categories at inference time. \nRecent works explore vision-language pre-training to handle this task, but suffer from unrealistic assumptions in practical scenarios, i.e., low-quality textual category names.\nFor example, this paradigm assumes that new textual categories will be accurately and completely provided, and exist in lexicons during pre-training.\nHowever, exceptions often happen when meet with ambiguity for brief or incomplete names, new words that are not present in the pre-trained lexicons, and difficult-to-describe categories for users.\nTo address these issues, this work proposes a novel *attribute decomposition-aggregation* framework, **AttrSeg**, inspired by human cognition in understanding new concepts. \nSpecifically, in the *decomposition* stage, we decouple class names into diverse attribute descriptions to complement semantic contexts from multiple perspectives.\nTwo attribute construction strategies are designed: using large language models for common categories, and involving manually labelling for human-invented categories. \nIn the *aggregation* stage, we group diverse attributes into an integrated global description, to form a discriminative classifier that distinguishes the target object from others. \nOne hierarchical aggregation architecture is further proposed \nto achieve multi-level aggregation, leveraging the meticulously designed clustering module.\nThe final result is obtained by computing the similarity between aggregated attributes and images embedding.\nTo evaluate the effectiveness, we annotate three datasets with attribute descriptions, and conduct extensive experiments and ablation studies. The results show the superior performance of attribute decomposition-aggregation.\nWe refer readers to the latest arXiv version at https://arxiv.org/abs/2309.00096.", "keywords": "Open-Vocabulary Semantic Segmentation;Attributes;Decomposition and Aggregation", "primary_area": "", "supplementary_material": "/attachment/b27090a5db7fab004588921c53090bac6b9336be.pdf", "author": "Chaofan Ma;Yuhuan Yang;Chen Ju;Fei Zhang;Ya Zhang;Yanfeng Wang", "authorids": "~Chaofan_Ma1;~Yuhuan_Yang1;~Chen_Ju1;~Fei_Zhang3;~Ya_Zhang1;~Yanfeng_Wang1", "gender": ";F;M;M;F;M", "homepage": ";;https://voide1220.github.io/;;https://annzhanglion.github.io/;https://cmic.sjtu.edu.cn/wangyanfeng/", "dblp": ";327/3314;221/1300;;85/3714-2;55/5407-1.html", "google_scholar": ";;https://scholar.google.com.hk/citations?user=b2jNn10AAAAJ;https://scholar.google.com/citations?hl=zh-CN;pbjw9sMAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0000-0001-8472-7677;;0000-0002-5390-9053;0000-0002-3196-2347", "linkedin": ";;;ferenas97/;;", "or_profile": "~Chaofan_Ma1;~Yuhuan_Yang1;~Chen_Ju1;~Fei_Zhang3;~Ya_Zhang1;~Yanfeng_Wang1", "aff": ";Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai AI Lab;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": ";sjtu.edu.cn;sjtu.edu;pjlab.org.cn;sjtu.edu.cn;sjtu.edu.cn", "position": ";PhD student;PhD student;Researcher;Professor;Full Professor", "bibtex": "@inproceedings{\nma2023openvocabulary,\ntitle={Open-Vocabulary Semantic Segmentation via Attribute Decomposition-Aggregation},\nauthor={Chaofan Ma and Yuhuan Yang and Chen Ju and Fei Zhang and Ya Zhang and Yanfeng Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ESEM1lNoeS}\n}", "github": "", "project": "", "reviewers": "PB8W;mZMs;fEW5;cgtT", "pdf_size": 722768, "rating": "5;6;6;6", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "2;3;3;3", "wc_summary": "63;229;75;56", "wc_strengths": "24;172;28;49", "wc_weaknesses": "98;189;161;259", "wc_questions": "50;124;94;35", "wc_limitations": "3;7;116;11", "wc_review": "238;721;474;410", "wc_reply_reviewers": "63;102;35;302", "wc_reply_authors": "28;227;157;749", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 105.75, 71.48207817348346 ], "wc_strengths_avg": [ 68.25, 60.64806262363209 ], "wc_weaknesses_avg": [ 176.75, 57.803005977198104 ], "wc_questions_avg": [ 75.75, 35.301380992816696 ], "wc_limitations_avg": [ 34.25, 47.283057219261956 ], "wc_review_avg": [ 460.75, 173.27633277513695 ], "wc_reply_reviewers_avg": [ 125.5, 104.64344222167006 ], "wc_reply_authors_avg": [ 290.25, 274.30947395232266 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9435453684763283792&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": ";sjtu.edu.cn;sjtu.edu;pjlab.org.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Shanghai AI Lab", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.shanghaiailab.com", "aff_unique_abbr": "SJTU;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SlotDiffusion: Object-Centric Generative Modeling with Diffusion Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72318", "id": "ETk6cfS3vk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9fa03b16dbd6cabc7601fe98c6ec291e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ETk6cfS3vk", "openreview": "https://openreview.net/forum?id=ETk6cfS3vk", "poster": "/media/PosterPDFs/NeurIPS%202023/72318.png?t=1701219867.506604", "slides": "https://nips.cc/virtual/2023/poster/72318", "video": "https://nips.cc/virtual/2023/poster/72318", "author_site": "Ziyi Wu, Jingyu Hu, Wuyue Lu, Igor Gilitschenski, Animesh Garg", "tldr": "", "abstract": "Object-centric learning aims to represent visual data with a set of object entities (a.k.a. slots), providing structured representations that enable systematic generalization.\nLeveraging advanced architectures like Transformers, recent approaches have made significant progress in unsupervised object discovery.\nIn addition, slot-based representations hold great potential for generative modeling, such as controllable image generation and object manipulation in image editing.\nHowever, current slot-based methods often produce blurry images and distorted objects, exhibiting poor generative modeling capabilities.\nIn this paper, we focus on improving slot-to-image decoding, a crucial aspect for high-quality visual generation.\nWe introduce SlotDiffusion -- an object-centric Latent Diffusion Model (LDM) designed for both image and video data.\nThanks to the powerful modeling capacity of LDMs, SlotDiffusion surpasses previous slot models in unsupervised object segmentation and visual generation across six datasets.\nFurthermore, our learned object features can be utilized by existing object-centric dynamics models, improving video prediction quality and downstream temporal reasoning tasks.\nFinally, we demonstrate the scalability of SlotDiffusion to unconstrained real-world datasets such as PASCAL VOC and COCO, when integrated with self-supervised pre-trained image encoders.", "keywords": "Unsupervised object-centric learning;diffusion model;generative modeling", "primary_area": "", "supplementary_material": "", "author": "Ziyi Wu;Jingyu Hu;Wuyue Lu;Igor Gilitschenski;Animesh Garg", "authorids": "~Ziyi_Wu1;~Jingyu_Hu1;~Wuyue_Lu1;~Igor_Gilitschenski1;~Animesh_Garg1", "gender": "M;M;F;M;M", "homepage": "https://wuziyi616.github.io/;https://github.com/franklyalpha;https://acorn.utoronto.ca/sws/#/;https://www.gilitschenski.org/igor;http://animesh.garg.tech", "dblp": "217/8678;;;129/1281;123/5728", "google_scholar": "iopH6wIAAAAJ;60oq1_sAAAAJ;;Nuw1Y4oAAAAJ;zp8V7ZMAAAAJ", "orcid": "0000-0002-8247-5872;;;;0000-0003-0482-4296", "linkedin": ";;;igorgilitschenski/;animeshgarg/", "or_profile": "~Ziyi_Wu1;~Jingyu_Hu1;~Wuyue_Lu1;~Igor_Gilitschenski1;~Animesh_Garg1", "aff": "Google;Department of Computer Science, University of Toronto;University of Toronto;University of Toronto;University of Toronto", "aff_domain": "google.com;cs.toronto.edu;utoronto.ca;toronto.edu;toronto.edu", "position": "Intern;Undergrad student;Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwu2023slotdiffusion,\ntitle={SlotDiffusion: Object-Centric Generative Modeling with Diffusion Models},\nauthor={Ziyi Wu and Jingyu Hu and Wuyue Lu and Igor Gilitschenski and Animesh Garg},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ETk6cfS3vk}\n}", "github": "", "project": "", "reviewers": "JU9y;jjQP;GFM9;QakT", "pdf_size": 36926490, "rating": "6;6;7;7", "confidence": "5;4;4;4", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;4;4;4", "wc_summary": "82;98;100;50", "wc_strengths": "24;54;137;33", "wc_weaknesses": "195;164;106;20", "wc_questions": "15;126;35;129", "wc_limitations": "1;50;26;1", "wc_review": "317;492;404;233", "wc_reply_reviewers": "33;159;34;55", "wc_reply_authors": "31;58;23;36", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.5, 20.0187412191676 ], "wc_strengths_avg": [ 62.0, 44.64862819841165 ], "wc_weaknesses_avg": [ 121.25, 66.61597030742703 ], "wc_questions_avg": [ 76.25, 51.74637668474963 ], "wc_limitations_avg": [ 19.5, 20.35313243704762 ], "wc_review_avg": [ 361.5, 96.60357136255367 ], "wc_reply_reviewers_avg": [ 70.25, 51.98737827588539 ], "wc_reply_authors_avg": [ 37.0, 12.98075498574717 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1765843894896710790&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com;cs.toronto.edu;utoronto.ca;toronto.edu;toronto.edu", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Google;University of Toronto", "aff_unique_dep": "Google;Department of Computer Science", "aff_unique_url": "https://www.google.com;https://www.utoronto.ca", "aff_unique_abbr": "Google;U of T", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Mountain View;Toronto;", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United States;Canada" }, { "title": "Better Private Linear Regression Through Better Private Feature Selection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72317", "id": "EUiIbwV379", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a79699db176ed0efc04a9da171e52112-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EUiIbwV379", "openreview": "https://openreview.net/forum?id=EUiIbwV379", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72317", "video": "https://nips.cc/virtual/2023/poster/72317", "author_site": "Travis Dick, Jennifer Gillenwater, Matthew Joseph", "tldr": "", "abstract": "Existing work on differentially private linear regression typically assumes that end users can precisely set data bounds or algorithmic hyperparameters. End users often struggle to meet these requirements without directly examining the data (and violating privacy). Recent work has attempted to develop solutions that shift these burdens from users to algorithms, but they struggle to provide utility as the feature dimension grows. This work extends these algorithms to higher-dimensional problems by introducing a differentially private feature selection method based on Kendall rank correlation. We prove a utility guarantee for the setting where features are normally distributed and conduct experiments across 25 datasets. We find that adding this private feature selection step before regression significantly broadens the applicability of ``plug-and-play'' private linear regression algorithms at little additional cost to privacy, computation, or decision-making by the end user.", "keywords": "differential privacy;linear regression;sparse;feature selection;kendall", "primary_area": "", "supplementary_material": "/attachment/3795cb8445ae642c82a152c0861e35530787c341.zip", "author": "Travis Dick;Jennifer Gillenwater;Matthew Joseph", "authorids": "~Travis_Dick1;~Jennifer_Gillenwater1;~Matthew_Joseph1", "gender": "M;F;M", "homepage": "https://www.cis.upenn.edu/~tbd/;http://jgillenw.com;https://www.majos.net/", "dblp": "135/8679;73/3828;180/5618", "google_scholar": "https://scholar.google.com/citations?hl=en;5lUnZgsAAAAJ;uzY-OQ-QMAEC", "orcid": ";;", "linkedin": ";;", "or_profile": "~Travis_Dick1;~Jennifer_Gillenwater1;~Matthew_Joseph1", "aff": "Google;Google;Google", "aff_domain": "google.com;google.com;google.com", "position": "Researcher;Research Scientist;Research scientist", "bibtex": "@inproceedings{\ndick2023better,\ntitle={Better Private Linear Regression Through Better Private Feature Selection},\nauthor={Travis Dick and Jennifer Gillenwater and Matthew Joseph},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EUiIbwV379}\n}", "github": "", "project": "", "reviewers": "8Qm2;1cAJ;itk8;yU7m", "pdf_size": 434068, "rating": "5;6;6;8", "confidence": "4;4;3;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "109;62;74;357", "wc_strengths": "82;52;102;106", "wc_weaknesses": "351;159;87;67", "wc_questions": "121;5;82;199", "wc_limitations": "6;5;70;1", "wc_review": "669;283;415;730", "wc_reply_reviewers": "46;67;30;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 150.5, 120.46680040575495 ], "wc_strengths_avg": [ 85.5, 21.37171027316251 ], "wc_weaknesses_avg": [ 166.0, 112.15614116043757 ], "wc_questions_avg": [ 101.75, 69.96204328062467 ], "wc_limitations_avg": [ 20.5, 28.64000698323937 ], "wc_review_avg": [ 524.25, 182.63539498136717 ], "wc_reply_reviewers_avg": [ 38.25, 20.932928605429293 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8921165628386094048&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "google.com;google.com;google.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Generative Neural Fields by Mixtures of Neural Implicit Functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72316", "id": "EWNtYvepJh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/407fb8c5f3fda374c57d1bb18313ea5d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EWNtYvepJh", "openreview": "https://openreview.net/forum?id=EWNtYvepJh", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72316", "video": "https://nips.cc/virtual/2023/poster/72316", "author_site": "Tackgeun You, Mijeong Kim, Jungtaek Kim, Bohyung Han", "tldr": "", "abstract": "We propose a novel approach to learning the generative neural fields represented by linear combinations of implicit basis networks. Our algorithm learns basis networks in the form of implicit neural representations and their coefficients in a latent space by either conducting meta-learning or adopting auto-decoding paradigms. The proposed method easily enlarges the capacity of generative neural fields by increasing the number of basis networks while maintaining the size of a network for inference to be small through their weighted model averaging. Consequently, sampling instances using the model is efficient in terms of latency and memory footprint. Moreover, we customize denoising diffusion probabilistic model for a target task to sample latent mixture coefficients, which allows our final model to generate unseen data effectively. Experiments show that our approach achieves competitive generation performance on diverse benchmarks for images, voxel data, and NeRF scenes without sophisticated designs for specific modalities and domains.", "keywords": "generative neural fields; implicit neural representation; model averaging", "primary_area": "", "supplementary_material": "", "author": "Tackgeun You;Mijeong Kim;Jungtaek Kim;Bohyung Han", "authorids": "~Tackgeun_You1;~Mijeong_Kim1;~Jungtaek_Kim1;~Bohyung_Han1", "gender": "M;;M;Not Specified", "homepage": "http://tackgeun.github.io/;;https://jungtaekkim.github.io;http://cvlab.snu.ac.kr/~bhhan", "dblp": "159/2150;;31/3193-1;73/4880.html", "google_scholar": "VDOtnIEAAAAJ;;KXNUYWgAAAAJ;9aaeCToAAAAJ", "orcid": ";;0000-0002-1905-1399;", "linkedin": ";;jungtaekkim;", "or_profile": "~Tackgeun_You1;~Mijeong_Kim1;~Jungtaek_Kim1;~Bohyung_Han1", "aff": "POSTECH;;University of Pittsburgh;Seoul National University", "aff_domain": "postech.edu;;pitt.edu;snu.ac.kr", "position": "PhD student;;Postdoc;Full Professor", "bibtex": "@inproceedings{\nyou2023generative,\ntitle={Generative Neural Fields by Mixtures of Neural Implicit Functions},\nauthor={Tackgeun You and Mijeong Kim and Jungtaek Kim and Bohyung Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EWNtYvepJh}\n}", "github": "", "project": "", "reviewers": "YZcJ;ZMK5;fjUK;uPSW;2vix", "pdf_size": 18731332, "rating": "5;5;5;6;7", "confidence": "4;5;5;3;3", "soundness": "2;3;2;3;4", "novelty": "3;2;2;3;4", "presentation": "2;2;4;3;3", "wc_summary": "175;83;94;40;163", "wc_strengths": "81;52;74;10;58", "wc_weaknesses": "193;113;196;54;14", "wc_questions": "239;11;22;16;45", "wc_limitations": "29;13;32;28;12", "wc_review": "717;272;418;148;292", "wc_reply_reviewers": "87;49;37;29;32", "wc_reply_authors": "532;414;406;65;139", "reply_reviewers": "2;1;1;1;1", "reply_authors": "5;3;3;2;3", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 111.0, 50.821255395749525 ], "wc_strengths_avg": [ 55.0, 24.819347291981714 ], "wc_weaknesses_avg": [ 114.0, 72.89170048777844 ], "wc_questions_avg": [ 66.6, 86.98183718455249 ], "wc_limitations_avg": [ 22.8, 8.518215775618742 ], "wc_review_avg": [ 369.4, 193.74374828623502 ], "wc_reply_reviewers_avg": [ 46.8, 21.226398658274558 ], "wc_reply_authors_avg": [ 311.2, 178.0869450577442 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 3.2, 0.9797958971132712 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8385254915624212, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12194135511751019736&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "postech.edu;;pitt.edu;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Pohang University of Science and Technology;University of Pittsburgh;Seoul National University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.postech.ac.kr;https://www.pitt.edu;https://www.snu.ac.kr", "aff_unique_abbr": "POSTECH;Pitt;SNU", "aff_campus_unique_index": "0", "aff_campus_unique": "Pohang;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Reward Scale Robustness for Proximal Policy Optimization via DreamerV3 Tricks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72315", "id": "EY4OHikuBm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/04f61ec02d1b3a025a59d978269ce437-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EY4OHikuBm", "openreview": "https://openreview.net/forum?id=EY4OHikuBm", "poster": "/media/PosterPDFs/NeurIPS%202023/72315.png?t=1701990376.9520857", "slides": "https://nips.cc/virtual/2023/poster/72315", "video": "https://nips.cc/virtual/2023/poster/72315", "author_site": "Ryan Sullivan, Akarsh Kumar, Shengyi Huang, John Dickerson, Joseph Suarez", "tldr": "", "abstract": "Most reinforcement learning methods rely heavily on dense, well-normalized environment rewards. DreamerV3 recently introduced a model-based method with a number of tricks that mitigate these limitations, achieving state-of-the-art on a wide range of benchmarks with a single set of hyperparameters. This result sparked discussion about the generality of the tricks, since they appear to be applicable to other reinforcement learning algorithms. Our work applies DreamerV3's tricks to PPO and is the first such empirical study outside of the original work. Surprisingly, we find that the tricks presented do not transfer as general improvements to PPO. We use a high quality PPO reference implementation and present extensive ablation studies totaling over 10,000 A100 hours on the Arcade Learning Environment and the DeepMind Control Suite. Though our experiments demonstrate that these tricks do not generally outperform PPO, we identify cases where they succeed and offer insight into the relationship between the implementation tricks. In particular, PPO with these tricks performs comparably to PPO on Atari games with reward clipping and significantly outperforms PPO without reward clipping.", "keywords": "Reinforcement Learning;Proximal Policy Optimization;Reward Normalization", "primary_area": "", "supplementary_material": "/attachment/d91e5473cfbf70571afe1637b32793faf2727deb.pdf", "author": "Ryan Sullivan;Akarsh Kumar;Shengyi Huang;John P Dickerson;Joseph Suarez", "authorids": "~Ryan_Sullivan2;~Akarsh_Kumar1;~Shengyi_Huang1;~John_P_Dickerson1;~Joseph_Suarez1", "gender": "M;M;M;M;M", "homepage": "https://ryannavillus.github.io/;https://akarshkumar.com/;https://costa.sh/;https://jpdickerson.com/;https://jsuarez5341.github.io", "dblp": ";;251/8731;75/8479;", "google_scholar": "https://scholar.google.com/citations?hl=en;FaM3qWEAAAAJ;kl9YcpEAAAAJ;https://scholar.google.com.tw/citations?user=QgDpfCQAAAAJ;", "orcid": ";;;0000-0003-2231-680X;", "linkedin": "ryan-navillus/;;costa-huang/;john-dickerson-83a74a7/;", "or_profile": "~Ryan_Sullivan2;~Akarsh_Kumar1;~Shengyi_Huang1;~John_P_Dickerson1;~Joseph_Suarez1", "aff": "University of Maryland, College Park;Massachusetts Institute of Technology;Drexel University;Optimized Markets, Inc;Massachusetts Institute of Technology", "aff_domain": "umd.edu;mit.edu;drexel.edu;optimizedmarkets.com;mit.edu", "position": "PhD student;PhD student;PhD student;Consultant;PhD student", "bibtex": "@inproceedings{\nsullivan2023reward,\ntitle={Reward Scale Robustness for Proximal Policy Optimization via DreamerV3 Tricks},\nauthor={Ryan Sullivan and Akarsh Kumar and Shengyi Huang and John P Dickerson and Joseph Suarez},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EY4OHikuBm}\n}", "github": "", "project": "", "reviewers": "DsH5;75kB;QvZz;WmdD", "pdf_size": 9320217, "rating": "3;6;7;7", "confidence": "4;3;5;5", "soundness": "3;3;3;4", "novelty": "2;2;3;2", "presentation": "2;3;4;4", "wc_summary": "46;28;83;42", "wc_strengths": "47;43;189;167", "wc_weaknesses": "208;228;164;96", "wc_questions": "12;27;77;34", "wc_limitations": "15;2;7;24", "wc_review": "328;328;520;363", "wc_reply_reviewers": "231;49;69;73", "wc_reply_authors": "434;87;71;52", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 49.75, 20.327014045353536 ], "wc_strengths_avg": [ 111.5, 66.96827607158482 ], "wc_weaknesses_avg": [ 174.0, 50.635955604688654 ], "wc_questions_avg": [ 37.5, 24.150569351466643 ], "wc_limitations_avg": [ 12.0, 8.336666000266533 ], "wc_review_avg": [ 384.75, 79.38316887098927 ], "wc_reply_reviewers_avg": [ 105.5, 73.02568041449528 ], "wc_reply_authors_avg": [ 161.0, 158.10281464920226 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.41382044088453257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11377572131661451114&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "umd.edu;mit.edu;drexel.edu;optimizedmarkets.com;mit.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "University of Maryland;Massachusetts Institute of Technology;Drexel University;Optimized Markets, Inc", "aff_unique_dep": ";;;", "aff_unique_url": "https://www/umd.edu;https://web.mit.edu;https://www.drexel.edu;", "aff_unique_abbr": "UMD;MIT;Drexel;", "aff_campus_unique_index": "0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Context-lumpable stochastic bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72314", "id": "EY7Hpj8Ok6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f564a952c1b86684baf7d7241ae27ac8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EY7Hpj8Ok6", "openreview": "https://openreview.net/forum?id=EY7Hpj8Ok6", "poster": "/media/PosterPDFs/NeurIPS%202023/72314.png?t=1699570163.6658359", "slides": "https://nips.cc/virtual/2023/poster/72314", "video": "https://nips.cc/virtual/2023/poster/72314", "author_site": "Chung-Wei Lee, Qinghua Liu, Yasin Abbasi Yadkori, Chi Jin, Tor Lattimore, Csaba Szepesvari", "tldr": "", "abstract": "We consider a contextual bandit problem with $S $ contexts and $K $ actions. In each round $t=1,2,\\dots$ the learner\nobserves a random context and chooses an action based on its past experience. The learner then observes a random reward whose mean is a function of the context and the action for the round. Under the assumption that the contexts can be lumped into $r\\le \\min(S ,K)$ groups such that the mean reward for the various actions is the same for any two contexts that are in the same group, we give an algorithm that outputs an $\\epsilon$-optimal policy after using at most $\\widetilde O(r (S +K )/\\epsilon^2)$ samples with high probability and provide a matching $\\widetilde\\Omega(r (S +K )/\\epsilon^2)$ lower bound. In the regret minimization setting, we give an algorithm whose cumulative regret up to time $T$ is bounded by $\\widetilde O(\\sqrt{r ^3(S +K )T})$. To the best of our knowledge, we are the first to show the near-optimal sample complexity in the PAC setting and $\\widetilde O{\\sqrt{\\text{poly}(r)(S+K)T}}$ minimax regret in the online setting for this problem. We also show our algorithms can be applied to more general low-rank bandits and get improved regret bounds in some scenarios.", "keywords": "Contextual bandits;low-rank bandits;latent bandits;clustering bandits;stochastic bandit problems;context-lumpable bandits", "primary_area": "", "supplementary_material": "/attachment/2eb2041b7b3afa70293c6faef9979f66194fa666.pdf", "author": "Chung-Wei Lee;Qinghua Liu;Yasin Abbasi-Yadkori;Chi Jin;Tor Lattimore;Csaba Szepesvari", "authorids": "~Chung-Wei_Lee1;~Qinghua_Liu1;~Yasin_Abbasi-Yadkori1;~Chi_Jin1;~Tor_Lattimore1;~Csaba_Szepesvari1", "gender": ";M;;M;M;M", "homepage": "https://chungwei.net/;http://qinghual2020.github.io/;https://yasin-abbasi.github.io;https://sites.google.com/view/cjin/home;http://tor-lattimore.com;https://sites.ualberta.ca/~szepesva/", "dblp": "80/2550;;https://dblp.org/pers/a/Abbasi=Yadkori:Yasin.html;126/1802-1;44/9886;http://dblp.uni-trier.de/pers/hd/s/Szepesv=aacute=ri:Csaba", "google_scholar": "VVVc6BIAAAAJ;CotFJJsAAAAJ;https://scholar.google.com.au/citations?user=-D0EgMIAAAAJ;GINhGvwAAAAJ;;https://scholar.google.ca/citations?user=zvC19mQAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;csaba-szepesvari-09376b1?trk=hp-identity-name", "or_profile": "~Chung-Wei_Lee1;~Qinghua_Liu1;~Yasin_Abbasi-Yadkori1;~Chi_Jin1;~Tor_Lattimore1;~Csaba_Szepesvari1", "aff": "University of Southern California;Princeton University;Google DeepMind;Princeton University;Google DeepMind;Google DeepMind", "aff_domain": "usc.edu;princeton.edu;deepmind.com;princeton.edu;google.com;google.com", "position": "PhD student;PhD student;Researcher;Assistant Professor;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nlee2023contextlumpable,\ntitle={Context-lumpable stochastic bandits},\nauthor={Chung-Wei Lee and Qinghua Liu and Yasin Abbasi-Yadkori and Chi Jin and Tor Lattimore and Csaba Szepesvari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EY7Hpj8Ok6}\n}", "github": "", "project": "", "reviewers": "15yr;bwz6;ihhL;nWDZ", "pdf_size": 401026, "rating": "6;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;2;4;3", "wc_summary": "186;56;87;151", "wc_strengths": "11;60;26;70", "wc_weaknesses": "48;74;85;64", "wc_questions": "48;22;36;31", "wc_limitations": "1;14;1;24", "wc_review": "294;226;235;340", "wc_reply_reviewers": "12;11;5;22", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 120.0, 51.2396330978277 ], "wc_strengths_avg": [ 41.75, 24.107830678018296 ], "wc_weaknesses_avg": [ 67.75, 13.608361400256829 ], "wc_questions_avg": [ 34.25, 9.390819985496474 ], "wc_limitations_avg": [ 10.0, 9.669539802906858 ], "wc_review_avg": [ 273.75, 46.31616888301536 ], "wc_reply_reviewers_avg": [ 12.5, 6.103277807866851 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1024014083824360740&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "usc.edu;princeton.edu;deepmind.com;princeton.edu;google.com;google.com", "author_num": 6, "aff_unique_index": "0;1;2;1;2;2", "aff_unique_norm": "University of Southern California;Princeton University;Google", "aff_unique_dep": ";;Google DeepMind", "aff_unique_url": "https://www.usc.edu;https://www.princeton.edu;https://deepmind.com", "aff_unique_abbr": "USC;Princeton;DeepMind", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;1;0;1;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Disentangled Wasserstein Autoencoder for T-Cell Receptor Engineering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72313", "id": "Eb74zfBkWa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e95da8078ec8389533c802e368da5298-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Eb74zfBkWa", "openreview": "https://openreview.net/forum?id=Eb74zfBkWa", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72313", "video": "https://nips.cc/virtual/2023/poster/72313", "author_site": "Tianxiao Li, Hongyu Guo, Filippo Grazioli, Mark Gerstein, Martin Renqiang Min", "tldr": "", "abstract": "In protein biophysics, the separation between the functionally important residues (forming the active site or binding surface) and those that create the overall structure (the fold) is a well-established and fundamental concept. Identifying and modifying those functional sites is critical for protein engineering but computationally non-trivial, and requires significant domain knowledge. To automate this process from a data-driven perspective, we propose a disentangled Wasserstein autoencoder with an auxiliary classifier, which isolates the function-related patterns from the rest with theoretical guarantees. This enables one-pass protein sequence editing and improves the understanding of the resulting sequences and editing actions involved. To demonstrate its effectiveness, we apply it to T-cell receptors (TCRs), a well-studied structure-function case. We show that our method can be used to alter the function of TCRs without changing the structural backbone, outperforming several competing methods in generation quality and efficiency, and requiring only 10\\% of the running time needed by baseline models. To our knowledge, this is the first approach that utilizes disentangled representations for TCR engineering.", "keywords": "protein engineering;disentangled representation;T cell receptor", "primary_area": "", "supplementary_material": "/attachment/bb81346f87ee9bb08190e685717cac615c632a52.pdf", "author": "Tianxiao Li;Hongyu Guo;Filippo Grazioli;Mark Gerstein;Martin Renqiang Min", "authorids": "~Tianxiao_Li1;~Hongyu_Guo1;~Filippo_Grazioli1;~Mark_Gerstein1;~Martin_Renqiang_Min1", "gender": "M;M;M;M;M", "homepage": ";https://hongyuharryguo.github.io/;https://flpgrz.github.io/;http://www.gersteinlab.org/;http://www.cs.toronto.edu/~cuty", "dblp": ";;211/0924;67/5132;29/7048", "google_scholar": "Q4EBjt4AAAAJ;https://scholar.google.ca/citations?user=bZUqlakAAAAJ;https://scholar.google.de/citations?user=pdje0fcAAAAJ;YvjuUugAAAAJ;T2M4JjEAAAAJ", "orcid": ";;0000-0001-8888-133X;0000-0002-9746-3719;0000-0002-8563-6133", "linkedin": ";harry-h-y-guo-a582087/;flpgrz/;mbgmbg/;martin-renqiang-min-955a8766", "or_profile": "~Tianxiao_Li1;~Hongyu_Guo1;~Filippo_Grazioli1;~Mark_Gerstein1;~Martin_Renqiang_Min1", "aff": "Yale University;National Research Council Canada;NEC Laboratories Europe;Yale University;NEC Laboratories America", "aff_domain": "yale.edu;nrc-cnrc.gc.ca;neclab.eu;yale.edu;nec-labs.com", "position": "PhD student;Senior Research Officer;Research Engineer;Full Professor;Researcher", "bibtex": "@inproceedings{\nli2023disentangled,\ntitle={Disentangled Wasserstein Autoencoder for T-Cell Receptor Engineering},\nauthor={Tianxiao Li and Hongyu Guo and Filippo Grazioli and Mark Gerstein and Martin Renqiang Min},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Eb74zfBkWa}\n}", "github": "", "project": "", "reviewers": "4twu;ULRU;TMVT;mx2A", "pdf_size": 7356646, "rating": "5;6;7;7", "confidence": "4;4;5;3", "soundness": "1;4;4;2", "novelty": "2;3;3;3", "presentation": "2;4;4;3", "wc_summary": "124;92;121;96", "wc_strengths": "62;26;77;33", "wc_weaknesses": "528;16;380;36", "wc_questions": "355;238;85;249", "wc_limitations": "61;1;97;1", "wc_review": "1130;373;760;415", "wc_reply_reviewers": "736;13;364;42", "wc_reply_authors": "1068;36;1025;36", "reply_reviewers": "2;1;2;2", "reply_authors": "3;2;3;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 1.299038105676658 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 108.25, 14.359230480774379 ], "wc_strengths_avg": [ 49.5, 20.838665984174707 ], "wc_weaknesses_avg": [ 240.0, 220.41778512633684 ], "wc_questions_avg": [ 231.75, 96.25844118829268 ], "wc_limitations_avg": [ 40.0, 41.02438299353203 ], "wc_review_avg": [ 669.5, 305.3412025914616 ], "wc_reply_reviewers_avg": [ 288.75, 292.6682208576804 ], "wc_reply_authors_avg": [ 541.25, 505.47867165687614 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12252518731161735069&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 5, "email": "yale.edu;nrc-cnrc.gc.ca;neclab.eu;yale.edu;nec-labs.com", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Yale University;National Research Council Canada;NEC Laboratories Europe;NEC Laboratories America", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.yale.edu;https://www.nrc-cnrc.gc.ca;https://www.nec-labs.eu;https://www.nec-labs.com", "aff_unique_abbr": "Yale;NRC-CNRC;NEC LE;NEC Labs America", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "United States;Canada;Unknown" }, { "title": "Learning Time-Invariant Representations for Individual Neurons from Population Dynamics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72312", "id": "EcN3l6Xmnx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9032e5c9ec394ce768a2fa9bdc56af6c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EcN3l6Xmnx", "openreview": "https://openreview.net/forum?id=EcN3l6Xmnx", "poster": "/media/PosterPDFs/NeurIPS%202023/72312.png?t=1702106007.6101885", "slides": "https://nips.cc/virtual/2023/poster/72312", "video": "https://nips.cc/virtual/2023/poster/72312", "author_site": "Lu Mi, Trung Le, Tianxing He, Eli Shlizerman, Uygar S\u00fcmb\u00fcl", "tldr": "", "abstract": "Neurons can display highly variable dynamics. While such variability presumably supports the wide range of behaviors generated by the organism, their gene expressions are relatively stable in the adult brain. This suggests that neuronal activity is a combination of its time-invariant identity and the inputs the neuron receives from the rest of the circuit. Here, we propose a self-supervised learning based method to assign time-invariant representations to individual neurons based on permutation-, and population size-invariant summary of population recordings. We fit dynamical models to neuronal activity to learn a representation by considering the activity of both the individual and the neighboring population. Our self-supervised approach and use of implicit representations enable robust inference against imperfections such as partial overlap of neurons across sessions, trial-to-trial variability, and limited availability of molecular (transcriptomic) labels for downstream supervised tasks. We demonstrate our method on a public multimodal dataset of mouse cortical neuronal activity and transcriptomic labels. We report >35\\% improvement in predicting the transcriptomic subclass identity and >20\\% improvement in predicting class identity with respect to the state-of-the-art.", "keywords": "population dynamics;neuronal representation;calcium imaging;cell types", "primary_area": "", "supplementary_material": "", "author": "Lu Mi;Trung Le;Tianxing He;Eli Shlizerman;Uygar S\u00fcmb\u00fcl", "authorids": "~Lu_Mi1;~Trung_Le4;~Tianxing_He1;~Eli_Shlizerman1;~Uygar_S\u00fcmb\u00fcl2", "gender": "F;M;M;;M", "homepage": "https://lumimim.github.io;;https://cloudygoose.github.io/;http://faculty.washington.edu/shlizee/;", "dblp": "185/3258;88/8728;149/0111;00/9501;30/8374", "google_scholar": "vokCG-MAAAAJ;;egmfjjwAAAAJ;oJnSO50AAAAJ;dhiRjJIAAAAJ", "orcid": ";0000-0002-7047-8451;;0000-0002-3136-4531;", "linkedin": "lu-mi-698899172/;trung-le/;;;", "or_profile": "~Lu_Mi1;~Trung_Le4;~Tianxing_He1;~Eli_Shlizerman1;~Uygar_Sumbul1", "aff": "Allen Institute;Sandia National Laboratories;University of Washington;University of Washington;Allen Institute", "aff_domain": "allen.org;sandia.gov;cs.washington.edu;u.washington.edu;alleninstitute.org", "position": "Postdoc;Intern;Postdoc;Associate Professor;Associate Investigator", "bibtex": "@inproceedings{\nmi2023learning,\ntitle={Learning Time-Invariant Representations for Individual Neurons from Population Dynamics},\nauthor={Lu Mi and Trung Le and Tianxing He and Eli Shlizerman and Uygar S{\\\"u}mb{\\\"u}l},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EcN3l6Xmnx}\n}", "github": "", "project": "", "reviewers": "4tWo;MNFF;5dAW;QXoV", "pdf_size": 4164131, "rating": "5;5;5;6", "confidence": "3;4;3;2", "soundness": "3;2;3;3", "novelty": "2;3;2;2", "presentation": "3;2;3;3", "wc_summary": "60;113;118;162", "wc_strengths": "122;59;88;64", "wc_weaknesses": "415;363;291;48", "wc_questions": "148;50;15;171", "wc_limitations": "10;13;38;15", "wc_review": "755;598;550;460", "wc_reply_reviewers": "48;82;51;21", "wc_reply_authors": "71;131;44;12", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 113.25, 36.17578610065025 ], "wc_strengths_avg": [ 83.25, 24.913600703230355 ], "wc_weaknesses_avg": [ 279.25, 140.5851610234878 ], "wc_questions_avg": [ 96.0, 65.20352751193758 ], "wc_limitations_avg": [ 19.0, 11.113055385446435 ], "wc_review_avg": [ 590.75, 106.98919337951848 ], "wc_reply_reviewers_avg": [ 50.5, 21.615966321217286 ], "wc_reply_authors_avg": [ 64.5, 43.706406853000395 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18180371624379939719&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "allen.org;sandia.gov;cs.washington.edu;u.washington.edu;alleninstitute.org", "author_num": 5, "aff_unique_index": "0;1;2;2;0", "aff_unique_norm": "Allen Institute for Artificial Intelligence;Sandia National Laboratories;University of Washington", "aff_unique_dep": ";;", "aff_unique_url": "https://allenai.org;https://www.sandia.gov;https://www.washington.edu", "aff_unique_abbr": "AI2;SNL;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Optimal Treatment Allocation for Efficient Policy Evaluation in Sequential Decision Making", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72311", "id": "EcReRm7q9p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/98d0ad88db1e51bd0aa341a823290ece-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EcReRm7q9p", "openreview": "https://openreview.net/forum?id=EcReRm7q9p", "poster": "/media/PosterPDFs/NeurIPS%202023/72311.png?t=1702130964.7199676", "slides": "https://nips.cc/virtual/2023/poster/72311", "video": "https://nips.cc/virtual/2023/poster/72311", "author_site": "Ting Li, Chengchun Shi, Jianing Wang, Fan Zhou, hongtu zhu", "tldr": "", "abstract": "A/B testing is critical for modern technological companies to evaluate the effectiveness of newly developed products against standard baselines. This paper studies optimal designs that aim to maximize the amount of information obtained from online experiments to estimate treatment effects accurately. We propose three optimal allocation strategies in a dynamic setting where treatments are sequentially assigned over time. These strategies are designed to minimize the variance of the treatment effect estimator when data follow a non Markov decision process or a (time-varying) Markov decision process. We further develop estimation procedures based on existing off-policy evaluation (OPE) methods and conduct extensive experiments in various environments to demonstrate the effectiveness of the proposed methodologies. In theory, we prove the optimality of the proposed treatment allocation design and establish upper bounds for the mean squared errors of the resulting treatment effect estimators.", "keywords": "Average treatment effect;Experimental design;Off-policy evaluation;Optimal treatment allocation", "primary_area": "", "supplementary_material": "/attachment/35eb94b3a2e854d8d30a9e88974b148922927135.pdf", "author": "Ting Li;Chengchun Shi;Jianing Wang;Fan Zhou;Hongtu Zhu", "authorids": "~Ting_Li7;~Chengchun_Shi1;~Jianing_Wang2;~Fan_Zhou7;~Hongtu_Zhu2", "gender": ";M;M;;M", "homepage": ";https://callmespring.github.io/;https://github.com/dc-wangjn;;https://bigkp.org", "dblp": ";;;;03/5683", "google_scholar": ";dDGy3N0AAAAJ;IoLV4AcAAAAJ;4QJkjl0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0002-6781-2690", "linkedin": ";;;;", "or_profile": "~Ting_Li7;~Chengchun_Shi1;~Jianing_Wang2;~Fan_Zhou7;~Hongtu_Zhu2", "aff": ";London School of Economics and Political Science, University of London;Shanghai University of Finance and Economics;Shanghai University of Finance and Economics;University of North Carolina at Chapel Hill", "aff_domain": ";lse.ac.uk;sufe.edu.cn;shufe.edu;unc.edu", "position": ";Assistant Professor;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nli2023optimal,\ntitle={Optimal Treatment Allocation for Efficient Policy Evaluation in Sequential Decision Making},\nauthor={Ting Li and Chengchun Shi and Jianing Wang and Fan Zhou and Hongtu Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EcReRm7q9p}\n}", "github": "", "project": "", "reviewers": "bgAF;NJNM;tKDd;WpTu", "pdf_size": 1229417, "rating": "6;6;6;7", "confidence": "1;5;2;4", "soundness": "3;4;3;4", "novelty": "3;2;3;3", "presentation": "3;4;4;4", "wc_summary": "26;52;121;50", "wc_strengths": "26;138;61;43", "wc_weaknesses": "2;168;80;173", "wc_questions": "174;73;66;35", "wc_limitations": "63;1;11;3", "wc_review": "291;432;339;304", "wc_reply_reviewers": "6;24;0;25", "wc_reply_authors": "0;0;0;428", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 1.5811388300841898 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 62.25, 35.42862543198649 ], "wc_strengths_avg": [ 67.0, 42.81938813201328 ], "wc_weaknesses_avg": [ 105.75, 70.40019531222907 ], "wc_questions_avg": [ 87.0, 52.225472712077966 ], "wc_limitations_avg": [ 19.5, 25.391927851189244 ], "wc_review_avg": [ 341.5, 55.12032293083922 ], "wc_reply_reviewers_avg": [ 13.75, 10.96300597464035 ], "wc_reply_authors_avg": [ 107.0, 185.32943640986986 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.36514837167011077, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15420249161552976356&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";lse.ac.uk;sufe.edu.cn;shufe.edu;unc.edu", "author_num": 5, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "London School of Economics and Political Science;Shanghai University of Finance and Economics;University of North Carolina", "aff_unique_dep": ";;", "aff_unique_url": "https://www.lse.ac.uk;http://www.sufe.edu.cn;https://www.unc.edu", "aff_unique_abbr": "LSE;SUFE;UNC", "aff_campus_unique_index": "0;2", "aff_campus_unique": "London;;Chapel Hill", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "United Kingdom;China;United States" }, { "title": "Mobilizing Personalized Federated Learning in Infrastructure-Less and Heterogeneous Environments via Random Walk Stochastic ADMM", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72310", "id": "EcmqyXekuP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/74088c68894b99383c12399c9c637be9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EcmqyXekuP", "openreview": "https://openreview.net/forum?id=EcmqyXekuP", "poster": "/media/PosterPDFs/NeurIPS%202023/72310.png?t=1699594330.4553556", "slides": "https://nips.cc/virtual/2023/poster/72310", "video": "https://nips.cc/virtual/2023/poster/72310", "author_site": "Ziba Parsons, Fei Dou, Houyi Du, Zheng Song, Jin Lu", "tldr": "", "abstract": "This paper explores the challenges of implementing Federated Learning (FL) in practical scenarios featuring isolated nodes with data heterogeneity, which can only be connected to the server through wireless links in an infrastructure-less environment. To overcome these challenges, we propose a novel mobilizing personalized FL approach, which aims to facilitate mobility and resilience. Specifically, we develop a novel optimization algorithm called Random Walk Stochastic Alternating Direction Method of Multipliers (RWSADMM). RWSADMM capitalizes on the server's random movement toward clients and formulates local proximity among their adjacent clients based on hard inequality constraints rather than requiring consensus updates or introducing bias via regularization methods. To mitigate the computational burden on the clients, an efficient stochastic solver of the approximated optimization problem is designed in RWSADMM, which provably converges to the stationary point almost surely in expectation. Our theoretical and empirical results demonstrate the provable fast convergence and substantial accuracy improvements achieved by RWSADMM compared to baseline methods, along with its benefits of reduced communication costs and enhanced scalability.", "keywords": "Mobilized Federated Networks;Personalized Federated Learning;Random Walk;Stochastic ADMM", "primary_area": "", "supplementary_material": "/attachment/e20e4f57a7656bfa72e3c6d839203cfb8d9d9869.pdf", "author": "Ziba Parsons;Fei Dou;Houyi Du;Zheng Song;Jin Lu", "authorids": "~Ziba_Parsons1;~Fei_Dou1;~Houyi_Du1;~Zheng_Song2;~Jin_Lu1", "gender": "F;;M;;M", "homepage": "https://sites.google.com/view/ziba-parsons/;;;https://sites.google.com/vt.edu/songz/;https://jinlucs.github.io/", "dblp": "345/8689;;;29/7459;33/863-1.html", "google_scholar": "ZgHsiJIAAAAJ;;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0003-1356-0202", "linkedin": "ziba-parsons-b1051372/;;houyi-du/;;", "or_profile": "~Ziba_Parsons1;~Fei_Dou1;~Houyi_Du1;~Zheng_Song2;~Jin_Lu1", "aff": "University of Michigan - Dearborn;;University of Michigan - Dearborn;University of Michigan - Dearborn;University of Michigan", "aff_domain": "umich.edu;;umich.edu;umich.edu;umich.edu", "position": "PhD student;;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nparsons2023mobilizing,\ntitle={Mobilizing Personalized Federated Learning in Infrastructure-Less and Heterogeneous Environments via Random Walk Stochastic {ADMM}},\nauthor={Ziba Parsons and Fei Dou and Houyi Du and Zheng Song and Jin Lu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EcmqyXekuP}\n}", "github": "", "project": "", "reviewers": "cJVn;46UP;J9bJ", "pdf_size": 2930575, "rating": "4;6;7", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "63;86;75", "wc_strengths": "20;156;48", "wc_weaknesses": "141;244;87", "wc_questions": "46;189;39", "wc_limitations": "11;1;1", "wc_review": "281;676;250", "wc_reply_reviewers": "0;28;18", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 74.66666666666667, 9.392668535736913 ], "wc_strengths_avg": [ 74.66666666666667, 58.63635580612272 ], "wc_weaknesses_avg": [ 157.33333333333334, 65.12722591632132 ], "wc_questions_avg": [ 91.33333333333333, 69.11986368299316 ], "wc_limitations_avg": [ 4.333333333333333, 4.714045207910317 ], "wc_review_avg": [ 402.3333333333333, 193.92495684900612 ], "wc_reply_reviewers_avg": [ 15.333333333333334, 11.585431464655176 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7559289460184545, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11573960285799507064&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "umich.edu;;umich.edu;umich.edu;umich.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://umdearborn.edu", "aff_unique_abbr": "UM-Dearborn", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Dearborn;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Better Correlation and Robustness: A Distribution-Balanced Self-Supervised Learning Framework for Automatic Dialogue Evaluation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72309", "id": "Ecv1GMiXSk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a8b148559549ce33261e79b4400e0d77-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ecv1GMiXSk", "openreview": "https://openreview.net/forum?id=Ecv1GMiXSk", "poster": "/media/PosterPDFs/NeurIPS%202023/72309.png?t=1699262054.2526236", "slides": "https://nips.cc/virtual/2023/poster/72309", "video": "https://nips.cc/virtual/2023/poster/72309", "author_site": "Peiwen Yuan, Xinglin Wang, Jiayi Shi, Bin Sun, Yiwei Li, Prof. Kan", "tldr": "", "abstract": "Turn-level dialogue evaluation models (TDEMs), using self-supervised learning (SSL) framework, have achieved state-of-the-art performance in open-domain dialogue evaluation. However, these models inevitably face two potential problems. First, they have low correlations with humans on medium coherence samples as the SSL framework often brings training data with unbalanced coherence distribution. Second, the SSL framework leads TDEM to nonuniform score distribution. There is a danger that the nonuniform score distribution will weaken the robustness of TDEM through our theoretical analysis. To tackle these problems, we propose Better Correlation and Robustness (BCR), a distribution-balanced self-supervised learning framework for TDEM. Given a dialogue dataset, BCR offers an effective training set reconstructing method to provide coherence-balanced training signals and further facilitate balanced evaluating abilities of TDEM. To get a uniform score distribution, a novel loss function is proposed, which can adjust adaptively according to the uniformity of score distribution estimated by kernel density estimation. Comprehensive experiments on 17 benchmark datasets show that vanilla BERT-base using BCR outperforms SOTA methods significantly by 11.3% on average. BCR also demonstrates strong generalization ability as it can lead multiple SOTA methods to attain better correlation and robustness.", "keywords": "Natural language process;Automatic dialog evaluation", "primary_area": "", "supplementary_material": "", "author": "Peiwen Yuan;Xinglin Wang;Jiayi Shi;Bin Sun;Yiwei Li;Kan Li", "authorids": "~Peiwen_Yuan1;3120225462@bit.edu.cn;1120191058@bit.edu.cn;~Bin_Sun3;~Yiwei_Li1;~Kan_Li3", "gender": "M;;;M;M;M", "homepage": "https://ypw0102.github.io/;;;;;", "dblp": "327/9196.html;;;01/5401-4.html;48/9884-1;21/2083-1.html", "google_scholar": "https://scholar.google.com.hk/citations?user=cUB5XN8AAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=yMZeaoMAAAAJ;", "orcid": "0000-0002-2403-8295;;;;;", "linkedin": ";;;;;", "or_profile": "~Peiwen_Yuan1;3120225462@bit.edu.cn;1120191058@bit.edu.cn;~Bin_Sun3;~Yiwei_Li1;~Kan_Li3", "aff": "Beijing Institute of Technology;;;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;;;bit.edu.cn;bit.edu.cn;bit.edu.cn", "position": "PhD student;;;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nyuan2023better,\ntitle={Better Correlation and Robustness: A Distribution-Balanced Self-Supervised Learning Framework for Automatic Dialogue Evaluation},\nauthor={Peiwen Yuan and Xinglin Wang and Jiayi Shi and Bin Sun and Yiwei Li and Kan Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ecv1GMiXSk}\n}", "github": "", "project": "", "reviewers": "z9qv;WZUk;yNip;qdHL;7oUE", "pdf_size": 1088324, "rating": "4;5;6;6;7", "confidence": "4;3;4;3;4", "soundness": "3;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "69;80;196;84;122", "wc_strengths": "41;65;82;62;62", "wc_weaknesses": "267;156;221;15;63", "wc_questions": "9;202;140;186;38", "wc_limitations": "7;8;41;138;22", "wc_review": "393;511;680;485;307", "wc_reply_reviewers": "117;17;647;0;13", "wc_reply_authors": "230;9;800;86;7", "reply_reviewers": "1;1;2;0;1", "reply_authors": "3;2;4;2;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 110.2, 46.468914340664334 ], "wc_strengths_avg": [ 62.4, 13.032267646115928 ], "wc_weaknesses_avg": [ 144.4, 94.23714766481422 ], "wc_questions_avg": [ 115.0, 77.9743547584717 ], "wc_limitations_avg": [ 43.2, 48.97101183353271 ], "wc_review_avg": [ 475.2, 125.19488807455359 ], "wc_reply_reviewers_avg": [ 158.8, 247.65653635630133 ], "wc_reply_authors_avg": [ 226.4, 298.0406683659128 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.08006407690254361, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10219131528160655046&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "bit.edu.cn;;;bit.edu.cn;bit.edu.cn;bit.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Beijing Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.bit.edu.cn/", "aff_unique_abbr": "BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "RRHF: Rank Responses to Align Language Models with Human Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72308", "id": "EdIGMCHk4l", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/23e6f78bdec844a9f7b6c957de2aae91-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EdIGMCHk4l", "openreview": "https://openreview.net/forum?id=EdIGMCHk4l", "poster": "/media/PosterPDFs/NeurIPS%202023/72308.png?t=1698047606.5343757", "slides": "https://nips.cc/virtual/2023/poster/72308", "video": "https://nips.cc/virtual/2023/poster/72308", "author_site": "Hongyi Yuan, Zheng Yuan, Chuanqi Tan, Wei Wang, Songfang Huang, Fei Huang", "tldr": "", "abstract": "Reinforcement Learning from Human Feedback (RLHF) facilitates the alignment of large language models with human preferences, significantly enhancing the quality of interactions between humans and models. \nInstructGPT implements RLHF through several stages, including Supervised Fine-Tuning (SFT), reward model training, and Proximal Policy Optimization (PPO). \nHowever, PPO is sensitive to hyperparameters and requires multiple models in its standard implementation, making it hard to train and scale up to larger parameter counts.\nIn contrast, we propose a novel learning paradigm called RRHF, which scores sampled responses from different sources via a logarithm of conditional probabilities and learns to align these probabilities with human preferences through ranking loss.\nRRHF can leverage sampled responses from various sources including the model responses from itself, other large language model responses, and human expert responses to learn to rank them.\nRRHF only needs 1 to 2 models during tuning and can efficiently align language models with human preferences robustly without complex hyperparameter tuning. \nAdditionally, RRHF can be considered an extension of SFT and reward model training while being simpler than PPO in terms of coding, model counts, and hyperparameters. \nWe evaluate RRHF on the Helpful and Harmless dataset, demonstrating comparable alignment performance with PPO by reward model score and human labeling.\nExtensive experiments show that the performance of RRHF is highly related to sampling quality which suggests RRHF is a best-of-$n$ learner.", "keywords": "Large Language Model;Human Alignment", "primary_area": "", "supplementary_material": "/attachment/1108e979cf59f12ca2af17b2d524cb18a0f86307.pdf", "author": "Hongyi Yuan;Zheng Yuan;Chuanqi Tan;Wei Wang;Songfang Huang;Fei Huang", "authorids": "~Hongyi_Yuan1;~Zheng_Yuan2;~Chuanqi_Tan3;~Wei_Wang41;~Songfang_Huang1;~Fei_Huang1", "gender": "M;M;M;M;;M", "homepage": ";;;;https://www.coe.pku.edu.cn/teaching/all_time/13007.html;https://sites.google.com/view/fei-huang", "dblp": "308/0909;56/2877-2;148/4497;35/7092-225;05/4919;h/FeiHuang.html", "google_scholar": "FG3O4i8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;tOfo4ncAAAAJ;0zSeT3oAAAAJ;3So9lV8AAAAJ;9r98PpoAAAAJ", "orcid": ";;0000-0002-6676-3057;0000-0002-7028-9845;;", "linkedin": ";;;;;fei-huang-cas-cmu", "or_profile": "~Hongyi_Yuan1;~Zheng_Yuan2;~Chuanqi_Tan3;~Wei_Wang41;~Songfang_Huang1;~Fei_Huang2", "aff": "Tsinghua University;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group US", "aff_domain": "tsinghua.edu.cn;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "position": "PhD student;Researcher;Full-time employee;Researcher;Senior Staff Engineer;Senior Research Director", "bibtex": "@inproceedings{\nyuan2023rrhf,\ntitle={{RRHF}: Rank Responses to Align Language Models with Human Feedback},\nauthor={Hongyi Yuan and Zheng Yuan and Chuanqi Tan and Wei Wang and Songfang Huang and Fei Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EdIGMCHk4l}\n}", "github": "", "project": "", "reviewers": "BSoN;BZjW;Mb9H;usKz;UQEH;DofH", "pdf_size": 625751, "rating": "4;4;5;6;6;7", "confidence": "4;5;4;4;3;4", "soundness": "3;3;3;3;3;3", "novelty": "1;1;3;3;2;4", "presentation": "2;1;3;2;3;3", "wc_summary": "88;73;76;71;72;202", "wc_strengths": "83;59;108;52;97;49", "wc_weaknesses": "374;206;115;179;233;107", "wc_questions": "3;48;4;105;47;46", "wc_limitations": "3;1;4;1;10;31", "wc_review": "551;387;307;408;459;435", "wc_reply_reviewers": "16;394;10;0;306;24", "wc_reply_authors": "0;572;0;0;154;2", "reply_reviewers": "1;1;1;0;1;1", "reply_authors": "1;3;1;1;2;2", "rating_avg": [ 5.333333333333333, 1.1055415967851332 ], "confidence_avg": [ 4.0, 0.5773502691896257 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 1.1055415967851334 ], "presentation_avg": [ 2.3333333333333335, 0.7453559924999298 ], "wc_summary_avg": [ 97.0, 47.3004580668447 ], "wc_strengths_avg": [ 74.66666666666667, 22.72052425060263 ], "wc_weaknesses_avg": [ 202.33333333333334, 89.14719413544222 ], "wc_questions_avg": [ 42.166666666666664, 34.18292296194435 ], "wc_limitations_avg": [ 8.333333333333334, 10.57775443508162 ], "wc_review_avg": [ 424.5, 73.9228201121503 ], "wc_reply_reviewers_avg": [ 125.0, 161.27306036657208 ], "wc_reply_authors_avg": [ 121.33333333333333, 209.1942212926118 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.6666666666666667, 0.74535599249993 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 91, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2550271344739652702&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Tsinghua University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "THU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Isometric Quotient Variational Auto-Encoders for Structure-Preserving Representation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72307", "id": "EdgPb3ngR4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7af8e3dfefe6e3141144197b8fa44f79-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EdgPb3ngR4", "openreview": "https://openreview.net/forum?id=EdgPb3ngR4", "poster": "/media/PosterPDFs/NeurIPS%202023/72307.png?t=1701482285.950327", "slides": "https://nips.cc/virtual/2023/poster/72307", "video": "https://nips.cc/virtual/2023/poster/72307", "author_site": "In Huh, changwook jeong, Jae Myung Choe, YOUNGGU KIM, Daesin Kim", "tldr": "", "abstract": "We study structure-preserving low-dimensional representation of a data manifold embedded in a high-dimensional observation space based on variational auto-encoders (VAEs). We approach this by decomposing the data manifold $\\mathcal{M}$ as $\\mathcal{M} = \\mathcal{M} / G \\times G$, where $G$ and $\\mathcal{M} / G$ are a group of symmetry transformations and a quotient space of $\\mathcal{M}$ up to $G$, respectively. From this perspective, we define the structure-preserving representation of such a manifold as a latent space $\\mathcal{Z}$ which is isometrically isomorphic (i.e., distance-preserving) to the quotient space $\\mathcal{M} / G$ rather $\\mathcal{M}$ (i.e., symmetry-preserving). To this end, we propose a novel auto-encoding framework, named isometric quotient VAEs (IQVAEs), that can extract the quotient space from observations and learn the Riemannian isometry of the extracted quotient in an unsupervised manner. Empirical proof-of-concept experiments reveal that the proposed method can find a meaningful representation of the learned data and outperform other competitors for downstream tasks.", "keywords": "representation learning;auto-encoders;geometry;symmetry", "primary_area": "", "supplementary_material": "/attachment/54377ef067f58cee788bf334f69a8f4ec5e010f7.pdf", "author": "In Huh;changwook jeong;Jae Myung Choe;Young-Gu Kim;Dae Sin Kim", "authorids": "~In_Huh1;~changwook_jeong1;~Jae_Myung_Choe1;yg09.kim@samsung.com;daesin.kim@samsung.com", "gender": ";M;M;;", "homepage": ";https://sites.google.com/view/jeong-research-group/home;https://scholar.google.com/citations?user=85zY1kMAAAAJ&hl=en;;", "dblp": "15/3634;;;;", "google_scholar": "SrTRPiAAAAAJ;CPYw1roAAAAJ;85zY1kMAAAAJ;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~In_Huh1;~changwook_jeong1;~Jae_Myung_Choe1;yg09.kim@samsung.com;daesin.kim@samsung.com", "aff": "Samsung Electronics;Ulsan National Institute of Science and Technology;Samsung Electronics;;", "aff_domain": "samsung.com;unist.ac.kr;samsung.com;;", "position": "Researcher;Associate Professor;Principal Researcher;;", "bibtex": "@inproceedings{\nhuh2023isometric,\ntitle={Isometric Quotient Variational Auto-Encoders for Structure-Preserving Representation Learning},\nauthor={In Huh and changwook jeong and Jae Myung Choe and Young-Gu Kim and Dae Sin Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EdgPb3ngR4}\n}", "github": "", "project": "", "reviewers": "kuEf;AAhr;nvnJ;Va6B", "pdf_size": 2848361, "rating": "4;4;5;7", "confidence": "4;3;3;4", "soundness": "3;3;2;3", "novelty": "2;2;2;3", "presentation": "2;3;4;3", "wc_summary": "29;119;105;93", "wc_strengths": "14;51;133;68", "wc_weaknesses": "8;161;146;166", "wc_questions": "69;36;436;61", "wc_limitations": "1;73;161;46", "wc_review": "121;440;981;434", "wc_reply_reviewers": "0;0;385;0", "wc_reply_authors": "0;0;611;0", "reply_reviewers": "0;0;2;0", "reply_authors": "1;1;3;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 86.5, 34.449238017697866 ], "wc_strengths_avg": [ 66.5, 43.072613108563544 ], "wc_weaknesses_avg": [ 120.25, 65.22413280374067 ], "wc_questions_avg": [ 150.5, 165.2823342042337 ], "wc_limitations_avg": [ 70.25, 58.36683561749771 ], "wc_review_avg": [ 494.0, 309.35982286004753 ], "wc_reply_reviewers_avg": [ 96.25, 166.70989022850443 ], "wc_reply_authors_avg": [ 152.75, 264.570760856146 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17135362566066297391&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "samsung.com;unist.ac.kr;samsung.com;;", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Samsung;Ulsan National Institute of Science and Technology", "aff_unique_dep": "Samsung Electronics;", "aff_unique_url": "https://www.samsung.com;https://www.unist.ac.kr", "aff_unique_abbr": "Samsung;UNIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "Eewh7sl0Xj", "title": "SKI to go Faster: Accelerating Toeplitz Neural Networks via Asymmetric Kernels", "track": "main", "status": "Reject", "tldr": "", "abstract": "Toeplitz Neural Networks (TNNs) (Qin et al. 2023) are a recent sequence model with impressive results. They require $O(n\\log n)$ computational complexity and $O(n)$ relative positional encoder (RPE) multi-layer perceptron (MLP) and decay bias calls. We aim to reduce both. We first note that the RPE is a non-SPD (symmetric positive definite) kernel and the Toeplitz matrices are pseudo-Gram matrices. Further 1) the learned kernels display spiky behavior near the main diagonals with otherwise smooth behavior; 2) the RPE MLP is slow. For bidirectional models, this motivates a sparse plus low-rank Toeplitz matrix decomposition. For the sparse component's action, we do a small 1D convolution. For the low rank component, we replace the RPE MLP with linear interpolation and use asymmetric Structured Kernel Interpolation (SKI) (Wilson et al. 2015) for $O(n)$ complexity: we provide rigorous error analysis. For causal models, ``fast'' causal masking (Katharopoulos et al. 2020) negates SKI's benefits. Working in the frequency domain, we avoid an explicit decay bias. To enforce causality, we represent the kernel via the real part of its frequency response using the RPE and compute the imaginary part via a Hilbert transform. This maintains $O(n \\log n)$ complexity but achieves an absolute speedup. Modeling the frequency response directly is also competitive for bidirectional training, using one fewer FFT. We set a speed state of the art on Long Range Arena with minimal score degradation.", "keywords": "language models;kernel methods;fourier analysis", "primary_area": "", "supplementary_material": "/attachment/2e6b3e471bb49cb9ccd9b88f44e3f9c2edf64da5.pdf", "author": "Alexander Moreno;Jonathan Mei;Luke Walters", "authorids": "~Alexander_Moreno1;~Jonathan_Mei1;~Luke_Walters1", "gender": "M;M;", "homepage": ";http://jonathan.mei.to;", "dblp": "161/6588;142/0040;", "google_scholar": "zoqP2-IAAAAJ;bm4orewAAAAJ;", "orcid": ";;", "linkedin": ";;luke-walters-2053119", "or_profile": "~Alexander_Moreno1;~Jonathan_Mei1;~Luke_Walters1", "aff": "Luminous Computing;Luminous Computing;Luminous", "aff_domain": "lmns.com;lmns.com;luminous.com", "position": "Researcher;Researcher;Researcher", "bibtex": "@misc{\nmoreno2023ski,\ntitle={{SKI} to go Faster: Accelerating Toeplitz Neural Networks via Asymmetric Kernels},\nauthor={Alexander Moreno and Jonathan Mei and Luke Walters},\nyear={2023},\nurl={https://openreview.net/forum?id=Eewh7sl0Xj}\n}", "github": "", "project": "", "reviewers": "aAC6;fqtA;qNcu;hMrX", "site": "https://openreview.net/forum?id=Eewh7sl0Xj", "pdf_size": 719736, "rating": "4;5;5;6", "confidence": "4;3;3;4", "soundness": "2;2;3;3", "novelty": "2;2;2;2", "presentation": "2;2;3;3", "wc_summary": "162;82;239;192", "wc_strengths": "82;85;99;220", "wc_weaknesses": "406;1071;59;348", "wc_questions": "23;101;93;30", "wc_limitations": "2;66;1;1", "wc_review": "675;1405;491;791", "wc_reply_reviewers": "193;1697;0;6", "wc_reply_authors": "0;1020;0;12", "reply_reviewers": "1;4;0;1", "reply_authors": "1;5;1;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 168.75, 57.11118541932044 ], "wc_strengths_avg": [ 121.5, 57.22979992975687 ], "wc_weaknesses_avg": [ 471.0, 370.5057354481844 ], "wc_questions_avg": [ 61.75, 35.449788433783354 ], "wc_limitations_avg": [ 17.5, 28.00446392988089 ], "wc_review_avg": [ 840.5, 343.02004314616954 ], "wc_reply_reviewers_avg": [ 474.0, 710.3502657140349 ], "wc_reply_authors_avg": [ 258.0, 439.9681806676478 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qlEOqZ6TQNcJ:scholar.google.com/&scioq=SKI+to+go+Faster:+Accelerating+Toeplitz+Neural+Networks+via+Asymmetric+Kernels&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Luminous Computing;Luminous", "aff_unique_dep": ";", "aff_unique_url": ";", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "", "aff_country_unique": "" }, { "title": "Speculative Decoding with Big Little Decoder", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72306", "id": "EfMyf9MC3t", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7b97adeafa1c51cf65263459ca9d0d7c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EfMyf9MC3t", "openreview": "https://openreview.net/forum?id=EfMyf9MC3t", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72306", "video": "https://nips.cc/virtual/2023/poster/72306", "author_site": "Sehoon Kim, Karttikeya Mangalam, Suhong Moon, Jitendra Malik, Michael Mahoney, Amir Gholami, Kurt Keutzer", "tldr": "", "abstract": "The recent emergence of Large Language Models based on the Transformer architecture has enabled dramatic advancements in the field of Natural Language Processing. However, these models have long inference latency, which limits their deployment and makes them prohibitively expensive for various real-time applications. The inference latency is further exacerbated by autoregressive generative tasks, as models need to run iteratively to generate tokens sequentially without leveraging token-level parallelization. To address this, we propose Big Little Decoder (BiLD), a framework that can improve inference efficiency and latency for a wide range of text generation applications. The BiLD framework contains two models with different sizes that collaboratively generate text. The small model runs autoregressively to generate text with a low inference cost, and the large model is only invoked occasionally to refine the small model\u2019s inaccurate predictions in a non-autoregressive manner. To coordinate the small and large models, BiLD introduces two simple yet effective policies: (1) the fallback policy that determines when to hand control over to the large model; and (2) the rollback policy that determines when the large model needs to correct the small model's inaccurate predictions. To evaluate our framework across different tasks and models, we apply BiLD to various text generation scenarios encompassing machine translation on IWSLT 2017 De-En and WMT 2014 De-En, and summarization on XSUM and CNN/DailyMail. On an NVIDIA T4 GPU, our framework achieves a speedup of up to 2.12x speedup with minimal generation quality degradation. Furthermore, our framework is fully plug-and-play and can be applied without any modifications in the training process or model architecture. Our code is open-sourced.", "keywords": "Transformer;efficient inference;efficient model;decoding", "primary_area": "", "supplementary_material": "/attachment/00de0dc2da82978cc45c0208e4995020ec481d3b.zip", "author": "Sehoon Kim;Karttikeya Mangalam;Suhong Moon;Jitendra Malik;Michael W. Mahoney;Amir Gholami;Kurt Keutzer", "authorids": "~Sehoon_Kim1;~Karttikeya_Mangalam1;~Suhong_Moon1;~Jitendra_Malik2;~Michael_W._Mahoney1;~Amir_Gholami2;~Kurt_Keutzer1", "gender": "M;M;M;M;;;M", "homepage": "https://sehoonkim.org;http://karttikeya.github.io/;;https://people.eecs.berkeley.edu/~malik/;;;https://people.eecs.berkeley.edu/~keutzer/", "dblp": ";200/8205;242/2290;58/2944;;;k/KurtKeutzer.html", "google_scholar": "zQABr7QAAAAJ;2l1fWEoAAAAJ;https://scholar.google.com/citations?hl=en;oY9R5YQAAAAJ;;;ID9QePIAAAAJ", "orcid": ";;;0000-0003-3695-1580;;;0000-0003-3868-8501", "linkedin": "sehoon-kim-13a1b51b1/;;;;;;kurtkeutzer/", "or_profile": "~Sehoon_Kim1;~Karttikeya_Mangalam1;~Suhong_Moon1;~Jitendra_Malik2;~Michael_W._Mahoney1;~Amir_Gholami2;~Kurt_Keutzer1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;;;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;;;berkeley.edu", "position": "PhD student;PhD student;PhD student;Full Professor;;;Full Professor", "bibtex": "@inproceedings{\nkim2023speculative,\ntitle={Speculative Decoding with Big Little Decoder},\nauthor={Sehoon Kim and Karttikeya Mangalam and Suhong Moon and Jitendra Malik and Michael W. Mahoney and Amir Gholami and Kurt Keutzer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EfMyf9MC3t}\n}", "github": "", "project": "", "reviewers": "asbG;NSPU;wHu9;mW2r;DxpA", "pdf_size": 1050938, "rating": "4;5;6;6;8", "confidence": "4;4;4;4;4", "soundness": "3;2;3;3;4", "novelty": "2;2;3;2;4", "presentation": "2;3;3;3;4", "wc_summary": "72;110;168;119;403", "wc_strengths": "67;77;53;76;65", "wc_weaknesses": "136;285;116;166;93", "wc_questions": "45;22;21;164;55", "wc_limitations": "2;1;13;24;1", "wc_review": "322;495;371;549;617", "wc_reply_reviewers": "69;133;0;134;11", "wc_reply_authors": "522;0;0;638;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 5.8, 1.32664991614216 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 174.4, 118.32260984275153 ], "wc_strengths_avg": [ 67.6, 8.708616422830897 ], "wc_weaknesses_avg": [ 159.2, 67.31834816749442 ], "wc_questions_avg": [ 61.4, 52.955075299729295 ], "wc_limitations_avg": [ 8.2, 9.10823802938856 ], "wc_review_avg": [ 470.8, 109.70578836141691 ], "wc_reply_reviewers_avg": [ 69.4, 57.35015257172382 ], "wc_reply_authors_avg": [ 232.0, 286.4988656173005 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 98, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13839281151324486667&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;;;berkeley.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "STREAMER: Streaming Representation Learning and Event Segmentation in a Hierarchical Manner", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72305", "id": "EfTMRQn00d", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8f0d446441a938d9de420a8ab8d7fd36-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EfTMRQn00d", "openreview": "https://openreview.net/forum?id=EfTMRQn00d", "poster": "/media/PosterPDFs/NeurIPS%202023/72305.png?t=1701697106.732041", "slides": "https://nips.cc/virtual/2023/poster/72305", "video": "https://nips.cc/virtual/2023/poster/72305", "author_site": "Ramy Mounir, Sujal Vijayaraghavan, Sudeep Sarkar", "tldr": "", "abstract": "We present a novel self-supervised approach for hierarchical representation learning and segmentation of perceptual inputs in a streaming fashion. Our research addresses how to semantically group streaming inputs into chunks at various levels of a hierarchy while simultaneously learning, for each chunk, robust global representations throughout the domain. To achieve this, we propose STREAMER, an architecture that is trained layer-by-layer, adapting to the complexity of the input domain. In our approach, each layer is trained with two primary objectives: making accurate predictions into the future and providing necessary information to other levels for achieving the same objective. The event hierarchy is constructed by detecting prediction error peaks at different levels, where a detected boundary triggers a bottom-up information flow. At an event boundary, the encoded representation of inputs at one layer becomes the input to a higher-level layer. Additionally, we design a communication module that facilitates top-down and bottom-up exchange of information during the prediction process. Notably, our model is fully self-supervised and trained in a streaming manner, enabling a single pass on the training data. This means that the model encounters each input only once and does not store the data. We evaluate the performance of our model on the egocentric EPIC-KITCHENS dataset, specifically focusing on temporal event segmentation. Furthermore, we conduct event retrieval experiments using the learned representations to demonstrate the high quality of our video event representations. Illustration videos and code are available on our project page: https://ramymounir.com/publications/streamer", "keywords": "predictive learning;hierarchical event segmentation;self-supervised learning;streaming processing;perceptual inputs;biologically-plausible.", "primary_area": "", "supplementary_material": "/attachment/07a03c17b7745afc43efff041a72028ffc5eed78.zip", "author": "Ramy Mounir;Sujal Vijayaraghavan;Sudeep Sarkar", "authorids": "~Ramy_Mounir1;~Sujal_Vijayaraghavan1;~Sudeep_Sarkar1", "gender": "M;M;Unspecified", "homepage": "https://ramymounir.com/;https://sujal.tv;", "dblp": "264/5593.html;340/5832;72/3470", "google_scholar": "93KqVdMAAAAJ;https://scholar.google.com/citations?hl=en;xX2D9FQAAAAJ", "orcid": ";0000-0003-2716-8199;0000-0001-7332-4207", "linkedin": "ramy-mounir-2a8b76113;https://linkedin.com/in/tvsujal;profsudeepsarkar", "or_profile": "~Ramy_Mounir1;~Sujal_Vijayaraghavan1;~Sudeep_Sarkar1", "aff": "University of South Florida;University of South Florida;University of South Florida", "aff_domain": "usf.edu;usf.edu;usf.edu", "position": "PhD student;PhD student;Professor", "bibtex": "@inproceedings{\nmounir2023streamer,\ntitle={{STREAMER}: Streaming Representation Learning and Event Segmentation in a Hierarchical Manner},\nauthor={Ramy Mounir and Sujal Vijayaraghavan and Sudeep Sarkar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EfTMRQn00d}\n}", "github": "", "project": "", "reviewers": "s9dg;L8go;egXC;NKYW", "pdf_size": 8224414, "rating": "4;4;5;6", "confidence": "3;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "80;107;156;73", "wc_strengths": "75;111;85;119", "wc_weaknesses": "73;244;189;229", "wc_questions": "21;2;237;261", "wc_limitations": "6;6;40;7", "wc_review": "255;470;707;689", "wc_reply_reviewers": "0;68;104;0", "wc_reply_authors": "0;575;180;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 104.0, 32.59601202601324 ], "wc_strengths_avg": [ 97.5, 18.07622748252522 ], "wc_weaknesses_avg": [ 183.75, 67.02751300771945 ], "wc_questions_avg": [ 130.25, 119.24213810562104 ], "wc_limitations_avg": [ 14.75, 14.58380951603524 ], "wc_review_avg": [ 530.25, 184.27883085151154 ], "wc_reply_reviewers_avg": [ 43.0, 44.844174649557324 ], "wc_reply_authors_avg": [ 188.75, 234.7971198716032 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1860936186534010583&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 5, "email": "usf.edu;usf.edu;usf.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of South Florida", "aff_unique_dep": "", "aff_unique_url": "https://www.usf.edu", "aff_unique_abbr": "USF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Training Private Models That Know What They Don\u2019t Know", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72304", "id": "EgCjf1vjMB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a8526465a91166fbb90aaa8452b21eda-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EgCjf1vjMB", "openreview": "https://openreview.net/forum?id=EgCjf1vjMB", "poster": "/media/PosterPDFs/NeurIPS%202023/72304.png?t=1699573351.6548245", "slides": "https://nips.cc/virtual/2023/poster/72304", "video": "https://nips.cc/virtual/2023/poster/72304", "author_site": "Stephan Rabanser, Anvith Thudi, Abhradeep Guha Thakurta, Krishnamurthy Dvijotham, Nicolas Papernot", "tldr": "", "abstract": "Training reliable deep learning models which avoid making overconfident but incorrect predictions is a longstanding challenge. This challenge is further exacerbated when learning has to be differentially private: protection provided to sensitive data comes at the price of injecting additional randomness into the learning process. In this work, we conduct a thorough empirical investigation of selective classifiers---that can abstain under uncertainty---under a differential privacy constraint. We find that some popular selective prediction approaches are ineffective in a differentially private setting because they increase the risk of privacy leakage. At the same time, we identify that a recent approach that only uses checkpoints produced by an off-the-shelf private learning algorithm stands out as particularly suitable under DP. Further, we show that differential privacy does not just harm utility but also degrades selective classification performance. To analyze this effect across privacy levels, we propose a novel evaluation mechanism which isolates selective prediction performance across model utility levels at full coverage. Our experimental results show that recovering the performance level attainable by non-private models is possible but comes at a considerable coverage cost as the privacy budget decreases.", "keywords": "differential privacy;selective classification;selective prediction;abstain option;reject option;uncertainty quantification;misclassification detection", "primary_area": "", "supplementary_material": "/attachment/384683d5acffd1502e80f1722aa0e02b821976bb.zip", "author": "Stephan Rabanser;Anvith Thudi;Abhradeep Guha Thakurta;Krishnamurthy Dj Dvijotham;Nicolas Papernot", "authorids": "~Stephan_Rabanser1;~Anvith_Thudi1;~Abhradeep_Guha_Thakurta1;~Krishnamurthy_Dj_Dvijotham1;~Nicolas_Papernot1", "gender": "M;M;M;M;M", "homepage": "https://rabanser.dev;https://www.anvith.com;https://athakurta.squarespace.com/;https://www.papernot.fr;http://dvij.github.io", "dblp": "210/2399;;31/8315;162/1405;16/8758", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?hl=en;1rV69hMAAAAJ;cGxq0cMAAAAJ;BUtloecAAAAJ", "orcid": ";;;;", "linkedin": ";anvith-thudi-54b5621bb/?originalSubdomain=ca;;nicolaspapernot;", "or_profile": "~Stephan_Rabanser1;~Anvith_Thudi1;~Abhradeep_Guha_Thakurta1;~Nicolas_Papernot1;~Krishnamurthy_Dvijotham2", "aff": "University of Cambridge;University of Toronto;Google;Google;Google Brain", "aff_domain": "cam.ac.uk;utoronto.ca;google.com;google.com;google.com", "position": "Visiting Graduate Student;PhD student;Senior Research Scientist;Research Scientist;research scientist ", "bibtex": "@inproceedings{\nrabanser2023training,\ntitle={Training Private Models That Know What They Don{\\textquoteright}t Know},\nauthor={Stephan Rabanser and Anvith Thudi and Abhradeep Guha Thakurta and Krishnamurthy Dj Dvijotham and Nicolas Papernot},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EgCjf1vjMB}\n}", "github": "", "project": "", "reviewers": "rhFq;sqFp;atra;9iMW", "pdf_size": 1547583, "rating": "5;6;7;7", "confidence": "3;2;4;4", "soundness": "2;3;3;4", "novelty": "2;2;3;4", "presentation": "3;1;4;4", "wc_summary": "81;62;132;87", "wc_strengths": "17;58;120;96", "wc_weaknesses": "138;173;539;70", "wc_questions": "140;113;56;10", "wc_limitations": "1;11;27;7", "wc_review": "377;417;874;270", "wc_reply_reviewers": "122;106;138;0", "wc_reply_authors": "720;281;143;0", "reply_reviewers": "1;1;2;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 90.5, 25.675864152935535 ], "wc_strengths_avg": [ 72.75, 39.04724702203729 ], "wc_weaknesses_avg": [ 230.0, 182.20455537664256 ], "wc_questions_avg": [ 79.75, 50.410192421771214 ], "wc_limitations_avg": [ 11.5, 9.630680142129112 ], "wc_review_avg": [ 484.5, 231.21040201513426 ], "wc_reply_reviewers_avg": [ 91.5, 54.025456962435776 ], "wc_reply_authors_avg": [ 286.0, 269.54869689909464 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1176187803211610910&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cam.ac.uk;utoronto.ca;google.com;google.com;google.com", "author_num": 5, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "University of Cambridge;University of Toronto;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.cam.ac.uk;https://www.utoronto.ca;https://www.google.com", "aff_unique_abbr": "Cambridge;U of T;Google", "aff_campus_unique_index": "0;2;2;2", "aff_campus_unique": "Cambridge;;Mountain View", "aff_country_unique_index": "0;1;2;2;2", "aff_country_unique": "United Kingdom;Canada;United States" }, { "title": "SwapPrompt: Test-Time Prompt Adaptation for Vision-Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72303", "id": "EhdNQiOWgQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cdd0640218a27e9e2c0e52e324e25db0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EhdNQiOWgQ", "openreview": "https://openreview.net/forum?id=EhdNQiOWgQ", "poster": "/media/PosterPDFs/NeurIPS%202023/72303.png?t=1699792347.243019", "slides": "https://nips.cc/virtual/2023/poster/72303", "video": "https://nips.cc/virtual/2023/poster/72303", "author_site": "XIAOSONG MA, Jie ZHANG, Song Guo, Wenchao Xu", "tldr": "", "abstract": "Test-time adaptation (TTA) is a special and practical setting in unsupervised domain adaptation, which allows a pre-trained model in a source domain to adapt to unlabeled test data in another target domain. To avoid the computation-intensive backbone fine-tuning process, the zero-shot generalization potentials of the emerging pre-trained vision-language models (e.g., CLIP, CoOp) are leveraged to only tune the run-time prompt for unseen test domains. However, existing solutions have yet to fully exploit the representation capabilities of pre-trained models as they only focus on the entropy-based optimization and the performance is far below the supervised prompt adaptation methods, e.g., CoOp. In this paper, we propose SwapPrompt, a novel framework that can effectively leverage the self-supervised contrastive learning to facilitate the test-time prompt adaptation. SwapPrompt employs a dual prompts paradigm, i.e., an online prompt and a target prompt that averaged from the online prompt to retain historical information. In addition, SwapPrompt applies a swapped prediction mechanism, which takes advantage of the representation capabilities of pre-trained models to enhance the online prompt via contrastive learning. Specifically, we use the online prompt together with an augmented view of the input image to predict the class assignment generated by the target prompt together with an alternative augmented view of the same image. The proposed SwapPrompt can be easily deployed on vision-language models without additional requirement, and experimental results show that it achieves state-of-the-art test-time adaptation performance on ImageNet and nine other datasets. It is also shown that SwapPrompt can even achieve comparable performance with supervised prompt adaptation methods.", "keywords": "Test-Time Adaptation;Prompt Learning;Unsupervised Representation Learning", "primary_area": "", "supplementary_material": "/attachment/a279173a13ec718d8f641976ec2a0999686f0bc1.zip", "author": "Xiaosong Ma;Jie ZHANG;Song Guo;Wenchao Xu", "authorids": "~Xiaosong_Ma4;~Jie_ZHANG18;~Song_Guo5;~Wenchao_Xu1", "gender": "M;F;M;", "homepage": ";https://cugzj.github.io/zhangjie.github.io/;https://cse.hkust.edu.hk/~songguo/;", "dblp": "m/XiaosongMa;84/6889-76;01/267-1;", "google_scholar": ";JRCNlI8AAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0001-7979-2183;0000-0002-8073-2118;;", "linkedin": ";;;", "or_profile": "~Xiaosong_Ma4;~Jie_ZHANG18;~Song_Guo5;~Wenchao_Xu1", "aff": "Hong Kong Polytechnic University;The Hong Kong Polytechnic University;The Hong Kong Polytechnic University;", "aff_domain": "polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;", "position": "PhD student;Postdoc;Full Professor;", "bibtex": "@inproceedings{\nma2023swapprompt,\ntitle={SwapPrompt: Test-Time Prompt Adaptation for Vision-Language Models},\nauthor={Xiaosong Ma and Jie ZHANG and Song Guo and Wenchao Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EhdNQiOWgQ}\n}", "github": "", "project": "", "reviewers": "3xii;N4YN;VTrP;73P2;V8X4", "pdf_size": 535449, "rating": "5;5;5;6;8", "confidence": "5;4;5;4;4", "soundness": "3;3;3;3;4", "novelty": "2;3;2;3;4", "presentation": "2;3;3;3;3", "wc_summary": "38;66;87;72;86", "wc_strengths": "25;42;24;54;155", "wc_weaknesses": "121;94;175;15;194", "wc_questions": "48;152;70;91;179", "wc_limitations": "5;3;5;12;44", "wc_review": "237;357;361;244;658", "wc_reply_reviewers": "43;0;50;17;30", "wc_reply_authors": "159;142;161;51;31", "reply_reviewers": "1;0;1;1;1", "reply_authors": "3;2;3;2;2", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 69.8, 17.825823964125753 ], "wc_strengths_avg": [ 60.0, 48.79754092164891 ], "wc_weaknesses_avg": [ 119.8, 63.56539939306604 ], "wc_questions_avg": [ 108.0, 49.61854492022111 ], "wc_limitations_avg": [ 13.8, 15.406492138056604 ], "wc_review_avg": [ 371.4, 152.80654436247158 ], "wc_reply_reviewers_avg": [ 28.0, 17.988885457415087 ], "wc_reply_authors_avg": [ 108.8, 56.10846638431673 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5601120336112039, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17344404046023372078&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong Polytechnic University", "aff_unique_dep": "", "aff_unique_url": "https://www.polyu.edu.hk", "aff_unique_abbr": "PolyU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "f-Policy Gradients: A General Framework for Goal-Conditioned RL using f-Divergences", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72302", "id": "EhhPtGsVAv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/27f4d95417bb722201597bf4d67cbacc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EhhPtGsVAv", "openreview": "https://openreview.net/forum?id=EhhPtGsVAv", "poster": "/media/PosterPDFs/NeurIPS%202023/72302.png?t=1702081282.7566202", "slides": "https://nips.cc/virtual/2023/poster/72302", "video": "https://nips.cc/virtual/2023/poster/72302", "author_site": "Siddhant Agarwal, Ishan Durugkar, Peter Stone, Amy Zhang", "tldr": "", "abstract": "Goal-Conditioned Reinforcement Learning (RL) problems often have access to sparse rewards where the agent receives a reward signal only when it has achieved the goal, making policy optimization a difficult problem.\n Several works augment this sparse reward with a learned dense reward function, but this can lead to sub-optimal policies if the reward is misaligned. \n Moreover, recent works have demonstrated that effective shaping rewards for a particular problem can depend on the underlying learning algorithm. \n This paper introduces a novel way to encourage exploration called\n $f$-Policy Gradients, or $f$-PG. $f$-PG minimizes the f-divergence between the agent's state visitation distribution and the goal, which we show can lead to an optimal policy. We derive gradients for various f-divergences to optimize this objective. Our learning paradigm provides dense learning signals for exploration in sparse reward settings. We further introduce an entropy-regularized policy optimization objective, that we call $state$-MaxEnt RL (or $s$-MaxEnt RL) as a special case of our objective. We show that several metric-based shaping rewards like L2 can be used with $s$-MaxEnt RL, providing a common ground to study such metric-based shaping rewards with efficient exploration. We find that $f$-PG has better performance compared to standard policy gradient methods on a challenging gridworld as well as the Point Maze and FetchReach environments. More information on our website https://agarwalsiddhant10.github.io/projects/fpg.html.", "keywords": "Goal Conditioned Reinforcement Learning;Shaping Rewards;Reward Design", "primary_area": "", "supplementary_material": "", "author": "Siddhant Agarwal;Ishan Durugkar;Peter Stone;Amy Zhang", "authorids": "~Siddhant_Agarwal1;~Ishan_Durugkar1;~Peter_Stone1;~Amy_Zhang1", "gender": "M;M;M;F", "homepage": "https://agarwalsiddhant10.github.io/;http://cs.utexas.edu/~ishand;http://www.cs.utexas.edu/~pstone;", "dblp": ";https://dblp.org/pers/d/Durugkar:Ishan;s/PeterStone;43/2754", "google_scholar": ";eb81CnYAAAAJ;qnwjcfAAAAAJ;", "orcid": ";;0000-0002-6795-420X;", "linkedin": "siddhant-agarwal-688a31156/;;;", "or_profile": "~Siddhant_Agarwal1;~Ishan_Durugkar1;~Peter_Stone1;~Amy_Zhang2", "aff": "University of Texas at Austin;, University of Texas, Austin;University of Texas, Austin;Meta Facebook", "aff_domain": "cs.utexas.edu;cs.utexas.edu;utexas.edu;facebook.com", "position": "PhD student;PhD student;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nagarwal2023fpolicy,\ntitle={f-Policy Gradients: A General Framework for Goal-Conditioned {RL} using f-Divergences},\nauthor={Siddhant Agarwal and Ishan Durugkar and Peter Stone and Amy Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EhhPtGsVAv}\n}", "github": "", "project": "", "reviewers": "soXp;trTZ;cLon;PJFh", "pdf_size": 3608435, "rating": "3;4;5;6", "confidence": "4;4;4;3", "soundness": "2;3;3;2", "novelty": "1;2;2;3", "presentation": "2;4;3;3", "wc_summary": "54;44;135;73", "wc_strengths": "180;46;109;39", "wc_weaknesses": "44;160;157;296", "wc_questions": "3;5;156;263", "wc_limitations": "31;37;36;40", "wc_review": "312;292;593;711", "wc_reply_reviewers": "90;93;61;279", "wc_reply_authors": "132;503;431;187", "reply_reviewers": "1;1;1;2", "reply_authors": "2;3;3;3", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 76.5, 35.344730866141845 ], "wc_strengths_avg": [ 93.5, 56.89683646741706 ], "wc_weaknesses_avg": [ 164.25, 89.28710713199303 ], "wc_questions_avg": [ 106.75, 109.49514829434224 ], "wc_limitations_avg": [ 36.0, 3.24037034920393 ], "wc_review_avg": [ 477.0, 180.0430504073956 ], "wc_reply_reviewers_avg": [ 130.75, 86.49963872756926 ], "wc_reply_authors_avg": [ 313.25, 157.0515440866469 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13206547254382263579&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cs.utexas.edu;cs.utexas.edu;utexas.edu;facebook.com", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Texas at Austin;University of Texas, Austin;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "https://www.utexas.edu;https://www.utexas.edu;https://meta.com", "aff_unique_abbr": "UT Austin;UT Austin;Meta", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Generative Modelling of Stochastic Actions with Arbitrary Constraints in Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72301", "id": "Ehzj9F2Kmj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7d4c0094ae32530494c71468558ab5b1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ehzj9F2Kmj", "openreview": "https://openreview.net/forum?id=Ehzj9F2Kmj", "poster": "/media/PosterPDFs/NeurIPS%202023/72301.png?t=1701825349.2158358", "slides": "https://nips.cc/virtual/2023/poster/72301", "video": "https://nips.cc/virtual/2023/poster/72301", "author_site": "Changyu CHEN, Ramesha Karunasena, Thanh Nguyen, Arunesh Sinha, Pradeep Varakantham", "tldr": "", "abstract": "Many problems in Reinforcement Learning (RL) seek an optimal policy with large discrete multidimensional yet unordered action spaces; these include problems in randomized allocation of resources such as placements of multiple security resources and emergency response units, etc. A challenge in this setting is that the underlying action space is categorical (discrete and unordered) and large, for which existing RL methods do not perform well. Moreover, these problems require validity of the realized action (allocation); this validity constraint is often difficult to express compactly in a closed mathematical form. The allocation nature of the problem also prefers stochastic optimal policies, if one exists. In this work, we address these challenges by (1) applying a (state) conditional normalizing flow to compactly represent the stochastic policy \u2014 the compactness arises due to the network only producing one sampled action and the corresponding log probability of the action, which is then used by an actor-critic method; and (2) employing an invalid action rejection method (via a valid action oracle) to update the base policy. The action rejection is enabled by a modified policy gradient that we derive. Finally, we conduct extensive experiments to show the scalability of our approach compared to prior methods and the ability to enforce arbitrary state-conditional constraints on the support of the distribution of actions in any state.", "keywords": "Action constrained reinforcement learning;Normalizing flow;Generative modelling", "primary_area": "", "supplementary_material": "/attachment/1824ba0049326ffe9c5783e1c2435b4585a09eae.pdf", "author": "Changyu Chen;Ramesha Karunasena;Thanh Hong Nguyen;Arunesh Sinha;Pradeep Varakantham", "authorids": "~Changyu_Chen2;~Ramesha_Karunasena1;~Thanh_Hong_Nguyen1;~Arunesh_Sinha2;~Pradeep_Varakantham1", "gender": "M;F;F;M;M", "homepage": ";https://rameshakaru.github.io/;https://ix.cs.uoregon.edu/~thanhhng/;http://aruneshsinha.net;http://www.mysmu.edu.sg/faculty/pradeepv", "dblp": ";249/4248;117/4935;26/9089.html;72/759", "google_scholar": "https://scholar.google.com.sg/citations?hl=en;https://scholar.google.com/citations?hl=en;6fpZnQIAAAAJ;Iuq3Qv8AAAAJ;https://scholar.google.com.sg/citations?user=BAdQpFkAAAAJ", "orcid": ";0000-0002-5400-4168;;;", "linkedin": ";ramesha-karunasena;;;", "or_profile": "~Changyu_Chen2;~Ramesha_Karunasena1;~Thanh_Hong_Nguyen1;~Arunesh_Sinha2;~Pradeep_Varakantham1", "aff": "Singapore Management University;Singapore Management University;University of Oregon;Rutgers University;Singapore Management University", "aff_domain": "smu.edu.sg;smu.edu.sg;uoregon.edu;rutgers.edu;smu.edu.sg", "position": "PhD student;Researcher;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023generative,\ntitle={Generative Modelling of Stochastic Actions with Arbitrary Constraints in Reinforcement Learning},\nauthor={Changyu Chen and Ramesha Karunasena and Thanh Hong Nguyen and Arunesh Sinha and Pradeep Varakantham},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ehzj9F2Kmj}\n}", "github": "", "project": "", "reviewers": "NSnS;Xx2Q;Ed9B;We8u", "pdf_size": 7161377, "rating": "5;6;6;7", "confidence": "3;4;3;3", "soundness": "3;2;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;2", "wc_summary": "111;102;85;69", "wc_strengths": "64;159;113;18", "wc_weaknesses": "178;498;227;274", "wc_questions": "294;280;101;67", "wc_limitations": "125;157;56;9", "wc_review": "772;1196;582;437", "wc_reply_reviewers": "196;298;21;12", "wc_reply_authors": "1040;1143;16;21", "reply_reviewers": "1;3;1;1", "reply_authors": "3;5;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 91.75, 16.11482236948332 ], "wc_strengths_avg": [ 88.5, 52.77546778570513 ], "wc_weaknesses_avg": [ 294.25, 122.43442122213834 ], "wc_questions_avg": [ 185.5, 102.32912586355852 ], "wc_limitations_avg": [ 86.75, 57.854883112836724 ], "wc_review_avg": [ 746.75, 285.2852738926424 ], "wc_reply_reviewers_avg": [ 131.75, 120.80226612112871 ], "wc_reply_authors_avg": [ 555.0, 537.7373894383763 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15641270077663140668&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 8, "email": "smu.edu.sg;smu.edu.sg;uoregon.edu;rutgers.edu;smu.edu.sg", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Singapore Management University;University of Oregon;Rutgers University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.smu.edu.sg;https://www.uoregon.edu;https://www.rutgers.edu", "aff_unique_abbr": "SMU;UO;Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "Singapore;United States" }, { "title": "Information-guided Planning: An Online Approach for Partially Observable Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72300", "id": "EjG2G1PT2v", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/da5498f88193ff61f0daea1940b819da-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EjG2G1PT2v", "openreview": "https://openreview.net/forum?id=EjG2G1PT2v", "poster": "/media/PosterPDFs/NeurIPS%202023/72300.png?t=1699570401.5772946", "slides": "https://nips.cc/virtual/2023/poster/72300", "video": "https://nips.cc/virtual/2023/poster/72300", "author_site": "Matheus Aparecido Do Carmo Alves, Amokh Varma, Yehia Elkhatib, Leandro Soriano Marcolino", "tldr": "", "abstract": "This paper presents IB-POMCP, a novel algorithm for online planning under partial observability. Our approach enhances the decision-making process by using estimations of the world belief's entropy to guide a tree search process and surpass the limitations of planning in scenarios with sparse reward configurations. By performing what we denominate as an *information-guided planning process*, the algorithm, which incorporates a novel I-UCB function, shows significant improvements in reward and reasoning time compared to state-of-the-art baselines in several benchmark scenarios, along with theoretical convergence guarantees.", "keywords": "Information-guided planning;Planning under uncertainty;Sequential decision making", "primary_area": "", "supplementary_material": "/attachment/dfc507dfe2eb128f5d4e6671f661348b927b4cf9.zip", "author": "Matheus Aparecido Do Carmo Alves;Amokh Varma;Yehia Elkhatib;Leandro Soriano Marcolino", "authorids": "~Matheus_Aparecido_Do_Carmo_Alves1;~Amokh_Varma1;yehia.elkhatib@glasgow.ac.uk;~Leandro_Soriano_Marcolino2", "gender": "M;M;;", "homepage": ";https://amokhvarma.github.io;;http://wp.lancs.ac.uk/colab/", "dblp": "266/5694;;;35/1126", "google_scholar": "DP4R9CEAAAAJ;;;", "orcid": "0000-0003-4530-3331;;;", "linkedin": "mthalves/;;;", "or_profile": "~Matheus_Aparecido_Do_Carmo_Alves1;~Amokh_Varma1;yehia.elkhatib@glasgow.ac.uk;~Leandro_Soriano_Marcolino2", "aff": "Lancaster University;Indian Institute of Technology Delhi;;Lancaster University", "aff_domain": "lancaster.ac.uk;iitd.ac.in;;lancaster.ac.uk", "position": "PhD student;Undergrad student;;Assistant Professor", "bibtex": "@inproceedings{\nalves2023informationguided,\ntitle={Information-guided Planning: An Online Approach for Partially Observable Problems},\nauthor={Matheus Aparecido Do Carmo Alves and Amokh Varma and Yehia Elkhatib and Leandro Soriano Marcolino},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EjG2G1PT2v}\n}", "github": "", "project": "", "reviewers": "QZzo;hPBV;JP1G;zmgm", "pdf_size": 1911899, "rating": "3;6;6;6", "confidence": "5;4;1;2", "soundness": "4;3;3;3", "novelty": "1;3;3;3", "presentation": "3;3;3;2", "wc_summary": "22;280;122;22", "wc_strengths": "28;114;137;15", "wc_weaknesses": "183;334;96;1", "wc_questions": "69;137;77;1", "wc_limitations": "24;68;48;1", "wc_review": "326;933;480;40", "wc_reply_reviewers": "66;429;29;0", "wc_reply_authors": "1193;1186;0;0", "reply_reviewers": "2;3;1;0", "reply_authors": "3;4;1;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 1.5811388300841898 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 111.5, 105.50236964163412 ], "wc_strengths_avg": [ 73.5, 52.83228179815821 ], "wc_weaknesses_avg": [ 153.5, 122.48775448998973 ], "wc_questions_avg": [ 71.0, 48.2078831727758 ], "wc_limitations_avg": [ 35.25, 25.17315037892556 ], "wc_review_avg": [ 444.75, 323.0923822995522 ], "wc_reply_reviewers_avg": [ 131.0, 173.63323414600097 ], "wc_reply_authors_avg": [ 594.75, 594.7551492000721 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7302967433402215, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12680679934475761283&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "lancaster.ac.uk;iitd.ac.in;;lancaster.ac.uk", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Lancaster University;Indian Institute of Technology Delhi", "aff_unique_dep": ";", "aff_unique_url": "https://www.lancaster.ac.uk;https://www.iitd.ac.in", "aff_unique_abbr": "Lancaster;IIT Delhi", "aff_campus_unique_index": "1", "aff_campus_unique": ";Delhi", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;India" }, { "title": "Timewarp: Transferable Acceleration of Molecular Dynamics by Learning Time-Coarsened Dynamics", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72299", "id": "EjMLpTgvKH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a598c367280f9054434fdcc227ce4d38-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EjMLpTgvKH", "openreview": "https://openreview.net/forum?id=EjMLpTgvKH", "poster": "/media/PosterPDFs/NeurIPS%202023/72299.png?t=1699996307.9537997", "slides": "https://nips.cc/virtual/2023/poster/72299", "video": "https://nips.cc/virtual/2023/poster/72299", "author_site": "Leon Klein, Andrew Foong, Tor Fjelde, Bruno Mlodozeniec, Marc Brockschmidt, Sebastian Nowozin, Frank Noe, Ryota Tomioka", "tldr": "", "abstract": "*Molecular dynamics* (MD) simulation is a widely used technique to simulate molecular systems, most commonly at the all-atom resolution where equations of motion are integrated with timesteps on the order of femtoseconds ($1\\textrm{fs}=10^{-15}\\textrm{s}$). \nMD is often used to compute equilibrium properties, which requires sampling from an equilibrium distribution such as the Boltzmann distribution. \nHowever, many important processes, such as binding and folding, occur over timescales of milliseconds or beyond, and cannot be efficiently sampled with conventional MD.\nFurthermore, new MD simulations need to be performed for each molecular system studied.\nWe present *Timewarp*, an enhanced sampling method which uses a normalising flow as a proposal distribution in a Markov chain Monte Carlo method targeting the Boltzmann distribution. \nThe flow is trained offline on MD trajectories and learns to make large steps in time, simulating the molecular dynamics of $10^{5} - 10^{6} \\textrm{fs}$.\nCrucially, Timewarp is *transferable* between molecular systems: once trained, we show that it generalises to unseen small peptides (2-4 amino acids) at all-atom resolution, exploring their metastable states and providing wall-clock acceleration of sampling compared to standard MD.\nOur method constitutes an important step towards general, transferable algorithms for accelerating MD.", "keywords": "Molecular Dynamics;Normalizing Flows;MCMC", "primary_area": "", "supplementary_material": "", "author": "Leon Klein;Andrew Y. K. Foong;Tor Erlend Fjelde;Bruno Kacper Mlodozeniec;Marc Brockschmidt;Sebastian Nowozin;Frank Noe;Ryota Tomioka", "authorids": "~Leon_Klein1;~Andrew_Y._K._Foong1;~Tor_Erlend_Fjelde1;~Bruno_Kacper_Mlodozeniec2;~Marc_Brockschmidt1;~Sebastian_Nowozin1;~Frank_Noe1;~Ryota_Tomioka1", "gender": ";M;M;Not Specified;M;M;M;M", "homepage": ";https://andrewfoongyk.github.io/;;https://brunokm.github.io;;http://www.nowozin.net/sebastian/;;http://tomioka.dk", "dblp": "249/9262;243/7014;267/6616;241/6874;80/8292;https://dblp.org/pers/n/Nowozin:Sebastian.html;;50/2945", "google_scholar": "P1vYX2AAAAAJ;2UOjgIUAAAAJ;mSqNt5cAAAAJ;kGPBRy8AAAAJ;https://scholar.google.co.uk/citations?user=pF27eLMAAAAJ;https://scholar.google.co.uk/citations?user=7-B7aQkAAAAJ;QGiLc_cAAAAJ;TxdeO-UAAAAJ", "orcid": "0000-0003-1095-1902;;;;;;;", "linkedin": ";;;bkmlodozeniec/;;;;", "or_profile": "~Leon_Klein1;~Andrew_Y._K._Foong1;~Tor_Erlend_Fjelde1;~Bruno_Kacper_Mlodozeniec2;~Marc_Brockschmidt1;~Sebastian_Nowozin1;~Frank_Noe1;~Ryota_Tomioka1", "aff": "Microsoft;Microsoft;University of Cambridge;University of Cambridge;Google Brain;Microsoft;Freie Universit\u00e4t Berlin;Microsoft Research Cambridge", "aff_domain": "microsoft.com;microsoft.com;cam.ac.uk;cam.ac.uk;google.com;microsoft.com;fu-berlin.de;microsoft.com", "position": "Researcher;Researcher;PhD student;PhD student;Researcher;Researcher;Professor;Researcher", "bibtex": "@inproceedings{\nklein2023timewarp,\ntitle={Timewarp: Transferable Acceleration of Molecular Dynamics by Learning Time-Coarsened Dynamics},\nauthor={Leon Klein and Andrew Y. K. Foong and Tor Erlend Fjelde and Bruno Kacper Mlodozeniec and Marc Brockschmidt and Sebastian Nowozin and Frank Noe and Ryota Tomioka},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EjMLpTgvKH}\n}", "github": "", "project": "", "reviewers": "UaAm;fomi;heyF;Pymp", "pdf_size": 11538387, "rating": "7;7;8;8", "confidence": "4;3;4;5", "soundness": "3;3;3;3", "novelty": "2;3;4;4", "presentation": "4;3;3;4", "wc_summary": "77;48;70;209", "wc_strengths": "68;30;129;134", "wc_weaknesses": "107;53;164;42", "wc_questions": "51;225;252;117", "wc_limitations": "8;14;5;7", "wc_review": "311;370;620;509", "wc_reply_reviewers": "43;11;103;31", "wc_reply_authors": "0;0;264;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 101.0, 63.265314351546536 ], "wc_strengths_avg": [ 90.25, 43.41874595148966 ], "wc_weaknesses_avg": [ 91.5, 48.55151902875954 ], "wc_questions_avg": [ 161.25, 81.25999938469111 ], "wc_limitations_avg": [ 8.5, 3.3541019662496847 ], "wc_review_avg": [ 452.5, 120.49585055096296 ], "wc_reply_reviewers_avg": [ 47.0, 34.292856398964496 ], "wc_reply_authors_avg": [ 66.0, 114.3153532995459 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11669622604695264905&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "microsoft.com;microsoft.com;cam.ac.uk;cam.ac.uk;google.com;microsoft.com;fu-berlin.de;microsoft.com", "author_num": 8, "aff_unique_index": "0;0;1;1;2;0;3;0", "aff_unique_norm": "Microsoft;University of Cambridge;Google;Freie Universit\u00e4t Berlin", "aff_unique_dep": "Microsoft Corporation;;Google Brain;", "aff_unique_url": "https://www.microsoft.com;https://www.cam.ac.uk;https://brain.google.com;https://www.fu-berlin.de", "aff_unique_abbr": "Microsoft;Cambridge;Google Brain;FU Berlin", "aff_campus_unique_index": "1;1;2;1", "aff_campus_unique": ";Cambridge;Mountain View", "aff_country_unique_index": "0;0;1;1;0;0;2;1", "aff_country_unique": "United States;United Kingdom;Germany" }, { "title": "Equivariant Neural Operator Learning with Graphon Convolution", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72298", "id": "EjiA3uWpnc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c362fbc0d182c6b4b8dadb90177239e4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EjiA3uWpnc", "openreview": "https://openreview.net/forum?id=EjiA3uWpnc", "poster": "/media/PosterPDFs/NeurIPS%202023/72298.png?t=1699334638.9820268", "slides": "https://nips.cc/virtual/2023/poster/72298", "video": "https://nips.cc/virtual/2023/poster/72298", "author_site": "Chaoran Cheng, Jian Peng", "tldr": "", "abstract": "We propose a general architecture that combines the coefficient learning scheme with a residual operator layer for learning mappings between continuous functions in the 3D Euclidean space. Our proposed model is guaranteed to achieve SE(3)-equivariance by design. From the graph spectrum view, our method can be interpreted as convolution on graphons (dense graphs with infinitely many nodes), which we term InfGCN. By leveraging both the continuous graphon structure and the discrete graph structure of the input data, our model can effectively capture the geometric information while preserving equivariance. Through extensive experiments on large-scale electron density datasets, we observed that our model significantly outperformed the current state-of-the-art architectures. Multiple ablation studies were also carried out to demonstrate the effectiveness of the proposed architecture.", "keywords": "Neural Operator Learning;Spectral Graph Theory;Graphon", "primary_area": "", "supplementary_material": "/attachment/f2ef8b7f6b5be9420c2c1ca4be792db7c06b5ddd.pdf", "author": "Chaoran Cheng;Jian Peng", "authorids": "~Chaoran_Cheng2;~Jian_Peng1", "gender": "M;M", "homepage": "https://ccr-cheng.github.io/;http://jianpeng.web.engr.illinois.edu/", "dblp": ";29/4181-1", "google_scholar": "SrGZZ1wAAAAJ;https://scholar.google.com.tw/citations?user=4wcAVXAAAAAJ", "orcid": ";", "linkedin": "chaoran-cheng-a70638214/;", "or_profile": "~Chaoran_Cheng2;~Jian_Peng1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illinois.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncheng2023equivariant,\ntitle={Equivariant Neural Operator Learning with Graphon Convolution},\nauthor={Chaoran Cheng and Jian Peng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EjiA3uWpnc}\n}", "github": "", "project": "", "reviewers": "SJUC;cyPX;sQju;Wmsq;cHMR", "pdf_size": 32856401, "rating": "5;5;6;7;8", "confidence": "3;2;4;4;3", "soundness": "2;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "1;2;3;4;3", "wc_summary": "43;55;79;112;99", "wc_strengths": "29;47;167;101;22", "wc_weaknesses": "189;220;107;45;32", "wc_questions": "546;4;2;66;10", "wc_limitations": "3;12;56;8;8", "wc_review": "810;338;411;332;171", "wc_reply_reviewers": "23;53;0;0;0", "wc_reply_authors": "52;60;0;0;0", "reply_reviewers": "1;1;0;0;0", "reply_authors": "2;2;1;1;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 77.6, 25.88899380045505 ], "wc_strengths_avg": [ 73.2, 54.46613626832731 ], "wc_weaknesses_avg": [ 118.6, 75.218614717369 ], "wc_questions_avg": [ 125.6, 211.52550673618535 ], "wc_limitations_avg": [ 17.4, 19.509997437211517 ], "wc_review_avg": [ 412.4, 213.71064549993758 ], "wc_reply_reviewers_avg": [ 15.2, 20.8940182827526 ], "wc_reply_authors_avg": [ 22.4, 27.5506805723561 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.412514323662695, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=266784709923005165&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "illinois.edu;illinois.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Recaptured Raw Screen Image and Video Demoir\u00e9ing via Channel and Spatial Modulations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72297", "id": "EkcO9tHm6S", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7f05193e5487287a890df7fbc3554427-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EkcO9tHm6S", "openreview": "https://openreview.net/forum?id=EkcO9tHm6S", "poster": "/media/PosterPDFs/NeurIPS%202023/72297.png?t=1700220190.8517766", "slides": "https://nips.cc/virtual/2023/poster/72297", "video": "https://nips.cc/virtual/2023/poster/72297", "author_site": "Yijia Cheng, Yijia Cheng, Xin Liu, Jingyu Yang", "tldr": "", "abstract": "Capturing screen contents by smartphone cameras has become a common way for information sharing. However, these images and videos are often degraded by moir\u00e9 patterns, which are caused by frequency aliasing between the camera filter array and digital display grids. We observe that the moir\u00e9 patterns in raw domain is simpler than those in sRGB domain, and the moir\u00e9 patterns in raw color channels have different properties. Therefore, we propose an image and video demoir\u00e9ing network tailored for raw inputs. We introduce a color-separated feature branch, and it is fused with the traditional feature-mixed branch via channel and spatial modulations. Specifically, the channel modulation utilizes modulated color-separated features to enhance the color-mixed features. The spatial modulation utilizes the feature with large receptive field to modulate the feature with small receptive field. In addition, we build the first well-aligned raw video demoir\u00e9ing (RawVDemoir\u00e9) dataset and propose an efficient temporal alignment method by inserting alternating patterns. Experiments demonstrate that our method achieves state-of-the-art performance for both image and video demoir\u00e9ing. Our dataset and code will be released after the acceptance of this work.", "keywords": "Raw image demoir\u00e9ing;raw video demoir\u00e9ing;video demoir\u00e9ing dataset", "primary_area": "", "supplementary_material": "/attachment/fc40dafcaae8c4f4c78ae893e4a1c6d96e237f82.zip", "author": "Huanjing Yue;Yijia Cheng;Xin Liu;Jingyu Yang", "authorids": "~Huanjing_Yue2;~Yijia_Cheng1;~Xin_Liu25;~Jingyu_Yang2", "gender": "F;F;M;", "homepage": "https://sites.google.com/site/huanjingyue/;https://github.com/tju-chengyijia/;https://www.lut.fi/en/profiles/xin-liu;", "dblp": "119/0275;;76/1820-12;", "google_scholar": ";;xHkC5U0AAAAJ;", "orcid": "0000-0003-2517-9783;;0000-0002-2242-6139;", "linkedin": ";;;", "or_profile": "~Huanjing_Yue2;~Yijia_Cheng1;~Xin_Liu25;~Jingyu_Yang2", "aff": "Tianjin University;Tianjin University;Lappeenranta University of Technology;", "aff_domain": "tju.edu.cn;tju.edu.cn;lut.fi;", "position": "Associate Professor;Undergrad student;Associate Professor;", "bibtex": "@inproceedings{\nyue2023recaptured,\ntitle={Recaptured Raw Screen Image and Video Demoir\\'eing via Channel and Spatial Modulations},\nauthor={Huanjing Yue and Yijia Cheng and Xin Liu and Jingyu Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EkcO9tHm6S}\n}", "github": "", "project": "", "reviewers": "irQe;pajB;tgX2;piUN;AQcs", "pdf_size": 8706294, "rating": "4;4;6;6;6", "confidence": "5;4;4;5;3", "soundness": "3;2;3;3;3", "novelty": "3;2;3;4;3", "presentation": "4;3;2;3;3", "wc_summary": "53;69;84;105;72", "wc_strengths": "24;45;99;97;63", "wc_weaknesses": "210;209;128;88;64", "wc_questions": "2;1;226;12;13", "wc_limitations": "1;2;68;18;15", "wc_review": "290;326;605;320;227", "wc_reply_reviewers": "0;0;56;27;21", "wc_reply_authors": "0;0;16;34;14", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;2;2;2", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 76.6, 17.304334717058612 ], "wc_strengths_avg": [ 65.6, 29.2 ], "wc_weaknesses_avg": [ 139.8, 60.472803804685626 ], "wc_questions_avg": [ 50.8, 87.7391588744729 ], "wc_limitations_avg": [ 20.8, 24.55524383914768 ], "wc_review_avg": [ 353.6, 130.51068921739707 ], "wc_reply_reviewers_avg": [ 20.8, 20.70169075220669 ], "wc_reply_authors_avg": [ 12.8, 12.560254774486067 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3273268353539886, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6360126769823446164&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tju.edu.cn;tju.edu.cn;lut.fi;", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Tianjin University;Lappeenranta University of Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.tju.edu.cn;https://www.lut.fi", "aff_unique_abbr": "TJU;LUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;Finland" }, { "title": "Does Localization Inform Editing? Surprising Differences in Causality-Based Localization vs. Knowledge Editing in Language Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72296", "id": "EldbUlZtbd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3927bbdcf0e8d1fa8aa23c26f358a281-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EldbUlZtbd", "openreview": "https://openreview.net/forum?id=EldbUlZtbd", "poster": "/media/PosterPDFs/NeurIPS%202023/72296.png?t=1700237453.7784572", "slides": "https://nips.cc/virtual/2023/poster/72296", "video": "https://nips.cc/virtual/2023/poster/72296", "author_site": "Peter Hase, Mohit Bansal, Been Kim, Asma Ghandeharioun", "tldr": "", "abstract": "Language models learn a great quantity of factual information during pretraining, and recent work localizes this information to specific model weights like mid-layer MLP weights. In this paper, we find that we can change how a fact is stored in a model by editing weights that are in a different location than where existing methods suggest that the fact is stored. This is surprising because we would expect that localizing facts to specific model parameters would tell us where to manipulate knowledge in models, and this assumption has motivated past work on model editing methods. Specifically, we show that localization conclusions from representation denoising (also known as Causal Tracing) do not provide any insight into which model MLP layer would be best to edit in order to override an existing stored fact with a new one. This finding raises questions about how past work relies on Causal Tracing to select which model layers to edit. Next, we consider several variants of the editing problem, including erasing and amplifying facts. For one of our editing problems, editing performance does relate to localization results from representation denoising, but we find that which layer we edit is a far better predictor of performance. Our results suggest, counterintuitively, that better mechanistic understanding of how pretrained language models work may not always translate to insights about how to best change their behavior.", "keywords": "localization;model editing;mechanistic interpretability;language models", "primary_area": "", "supplementary_material": "/attachment/3ce71714041af1a8fbce554c6c6de928f3e5dd28.zip", "author": "Peter Hase;Mohit Bansal;Been Kim;Asma Ghandeharioun", "authorids": "~Peter_Hase1;~Mohit_Bansal2;~Been_Kim1;~Asma_Ghandeharioun1", "gender": ";M;;", "homepage": ";https://www.cs.unc.edu/~mbansal/;https://beenkim.github.io/;https://alum.mit.edu/www/asma_gh", "dblp": ";32/5243.html;https://dblp.uni-trier.de/pers/k/Kim:Been.html;124/3110", "google_scholar": ";DN8QtscAAAAJ;;CkfQy2gAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Peter_Hase1;~Mohit_Bansal2;~Been_Kim1;~Asma_Ghandeharioun1", "aff": ";University of North Carolina at Chapel Hill;Google DeepMind;Google", "aff_domain": ";unc.edu;google.com;google.com", "position": ";Full Professor;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nhase2023does,\ntitle={Does Localization Inform Editing? Surprising Differences in Causality-Based Localization vs. Knowledge Editing in Language Models},\nauthor={Peter Hase and Mohit Bansal and Been Kim and Asma Ghandeharioun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EldbUlZtbd}\n}", "github": "", "project": "", "reviewers": "FzCS;q6TL;YdCx;UfPL", "pdf_size": 1954157, "rating": "7;7;8;8", "confidence": "3;4;4;5", "soundness": "3;3;4;4", "novelty": "3;3;4;4", "presentation": "3;4;4;4", "wc_summary": "74;87;87;149", "wc_strengths": "128;88;72;94", "wc_weaknesses": "193;61;257;54", "wc_questions": "115;54;106;1", "wc_limitations": "4;1;10;38", "wc_review": "514;291;532;336", "wc_reply_reviewers": "58;14;85;19", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.25, 29.20937349550654 ], "wc_strengths_avg": [ 95.5, 20.414455662593603 ], "wc_weaknesses_avg": [ 141.25, 86.78817603798342 ], "wc_questions_avg": [ 69.0, 45.645372164108814 ], "wc_limitations_avg": [ 13.25, 14.652218262092603 ], "wc_review_avg": [ 418.25, 106.1422983546145 ], "wc_reply_reviewers_avg": [ 44.0, 29.16333314283537 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 138, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6482120383418265405&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";unc.edu;google.com;google.com", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of North Carolina;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.unc.edu;https://deepmind.com", "aff_unique_abbr": "UNC;DeepMind", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Chapel Hill;;Mountain View", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "ImageBrush: Learning Visual In-Context Instructions for Exemplar-Based Image Manipulation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72295", "id": "EmOIP3t9nk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/98530736e5d94e62b689dfc1fda89bd1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EmOIP3t9nk", "openreview": "https://openreview.net/forum?id=EmOIP3t9nk", "poster": "/media/PosterPDFs/NeurIPS%202023/72295.png?t=1701756675.971674", "slides": "https://nips.cc/virtual/2023/poster/72295", "video": "https://nips.cc/virtual/2023/poster/72295", "author_site": "ya\u2006sheng sun, Yifan Yang, Houwen Peng, Yifei Shen, Yuqing Yang, Han Hu, Lili Qiu, Hideki Koike", "tldr": "", "abstract": "While language-guided image manipulation has made remarkable progress, the challenge of how to instruct the manipulation process faithfully reflecting human intentions persists. An accurate and comprehensive description of a manipulation task using natural language is laborious and sometimes even impossible, primarily due to the inherent uncertainty and ambiguity present in linguistic expressions. \nIs it feasible to accomplish image manipulation without resorting to external cross-modal language information? If this possibility exists, the inherent modality gap would be effortlessly eliminated. In this paper, we propose a novel manipulation methodology, dubbed ImageBrush, that learns visual instructions for more accurate image editing.\nOur key idea is to employ a pair of transformation images as visual instructions, which not only precisely captures human intention but also facilitates accessibility in real-world scenarios. Capturing visual instructions is particularly challenging because it involves extracting the underlying intentions solely from visual demonstrations and then applying this operation to a new image. To address this challenge, we formulate visual instruction learning as a diffusion-based inpainting problem, where the contextual information is fully exploited through an iterative process of generation. A visual prompting encoder is carefully devised to enhance the model's capacity in uncovering human intent behind the visual instructions. Extensive experiments show that our method generates engaging manipulation results conforming to the transformations entailed in demonstrations. Moreover, our model exhibits robust generalization capabilities on various downstream tasks such as pose transfer, image translation and video inpainting.", "keywords": "Image Manipulation;Visual Instruction", "primary_area": "", "supplementary_material": "/attachment/07c9e19a43791d8ceba84cc217917f2b66a403c2.pdf", "author": "Yasheng SUN;Yifan Yang;Houwen Peng;Yifei Shen;Yuqing Yang;Han Hu;Lili Qiu;Hideki Koike", "authorids": "~Yasheng_SUN1;~Yifan_Yang9;~Houwen_Peng2;~Yifei_Shen1;~Yuqing_Yang1;~Han_Hu1;~Lili_Qiu3;~Hideki_Koike2", "gender": "M;M;M;M;;M;;M", "homepage": ";https://www.microsoft.com/en-us/research/people/yifanyang/;https://houwenpeng.com/;https://openreview.net/profile?id=~Yifei_Shen1;;https://ancientmooner.github.io/;https://www.microsoft.com/en-us/research/people/liliqiu/;https://www.vogue.cs.titech.ac.jp/koike", "dblp": "254/0467;83/89-4;133/1706;51/609.html;91/9064-1.html;;;58/3276", "google_scholar": "Vrq1yOEAAAAJ;;https://scholar.google.co.jp/citations?user=UYlhQS8AAAAJ;;4BtNQAEAAAAJ;Jkss014AAAAJ;;Ih8cJXQAAAAJ", "orcid": ";;;;0000-0003-3518-5212;;;0000-0002-8989-6434", "linkedin": ";yifyang/;;;;;;", "or_profile": "~Yasheng_SUN1;~Yifan_Yang9;~Houwen_Peng2;~Yifei_Shen1;~Yuqing_Yang1;~Han_Hu1;~Lili_Qiu3;~Hideki_Koike2", "aff": "Tokyo Institute of Technology, Tokyo Institute of Technology;Microsoft;Microsoft Research;Microsoft Research Asia;Microsoft Research;Microsft Research Asia;University of Texas at Austin;Tokyo Institute of Technology, Tokyo Institute of Technology", "aff_domain": "titech.ac.jp;microsoft.com;microsoft.com;microsoft.com;research.microsoft.com;microsoft.com;utexas.edu;titech.ac.jp", "position": "PhD student;Researcher;Researcher;Research Cheerleader;Researcher;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsun2023imagebrush,\ntitle={ImageBrush: Learning Visual In-Context Instructions for Exemplar-Based Image Manipulation},\nauthor={Yasheng SUN and Yifan Yang and Houwen Peng and Yifei Shen and Yuqing Yang and Han Hu and Lili Qiu and Hideki Koike},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EmOIP3t9nk}\n}", "github": "", "project": "", "reviewers": "h3g5;fTo3;uVk2;RQEf;zhtQ", "pdf_size": 26094800, "rating": "4;5;6;6;7", "confidence": "5;4;2;4;5", "soundness": "3;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "4;3;3;2;3", "wc_summary": "96;51;121;181;59", "wc_strengths": "86;137;42;115;46", "wc_weaknesses": "296;512;186;116;74", "wc_questions": "1;70;151;22;142", "wc_limitations": "2;220;5;4;38", "wc_review": "481;990;505;438;359", "wc_reply_reviewers": "0;747;37;23;31", "wc_reply_authors": "0;893;0;0;0", "reply_reviewers": "0;2;1;1;1", "reply_authors": "1;3;1;1;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 4.0, 1.0954451150103321 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 101.6, 47.08970163422147 ], "wc_strengths_avg": [ 85.2, 37.34916331057498 ], "wc_weaknesses_avg": [ 236.8, 156.79081605757398 ], "wc_questions_avg": [ 77.2, 60.91108273541032 ], "wc_limitations_avg": [ 53.8, 84.16269957647508 ], "wc_review_avg": [ 554.6, 223.2958575522618 ], "wc_reply_reviewers_avg": [ 167.6, 289.9721365924664 ], "wc_reply_authors_avg": [ 178.6, 357.19999999999993 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.17902871850985824, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11238596851363684135&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "titech.ac.jp;microsoft.com;microsoft.com;microsoft.com;research.microsoft.com;microsoft.com;utexas.edu;titech.ac.jp", "author_num": 8, "aff_unique_index": "0;1;1;1;1;1;2;0", "aff_unique_norm": "Tokyo Institute of Technology;Microsoft;University of Texas at Austin", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.titech.ac.jp;https://www.microsoft.com;https://www.utexas.edu", "aff_unique_abbr": "Titech;Microsoft;UT Austin", "aff_campus_unique_index": "0;2;2;3;0", "aff_campus_unique": "Tokyo;;Asia;Austin", "aff_country_unique_index": "0;1;1;2;1;2;1;0", "aff_country_unique": "Japan;United States;China" }, { "title": "Conditional Mutual Information for Disentangled Representations in Reinforcement Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72294", "id": "EmYWJsyad4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fd750154df5f199f94df897975621306-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EmYWJsyad4", "openreview": "https://openreview.net/forum?id=EmYWJsyad4", "poster": "/media/PosterPDFs/NeurIPS%202023/72294.png?t=1698753659.193664", "slides": "https://nips.cc/virtual/2023/poster/72294", "video": "https://nips.cc/virtual/2023/poster/72294", "author_site": "Mhairi Dunion, Trevor McInroe, Kevin Luck, Kevin Sebastian Luck, Josiah Hanna, Stefano Albrecht", "tldr": "", "abstract": "Reinforcement Learning (RL) environments can produce training data with spurious correlations between features due to the amount of training data or its limited feature coverage. This can lead to RL agents encoding these misleading correlations in their latent representation, preventing the agent from generalising if the correlation changes within the environment or when deployed in the real world. Disentangled representations can improve robustness, but existing disentanglement techniques that minimise mutual information between features require independent features, thus they cannot disentangle correlated features. We propose an auxiliary task for RL algorithms that learns a disentangled representation of high-dimensional observations with correlated features by minimising the conditional mutual information between features in the representation. We demonstrate experimentally, using continuous control tasks, that our approach improves generalisation under correlation shifts, as well as improving the training performance of RL algorithms in the presence of correlated features.", "keywords": "Reinforcement Learning;Representation Learning;Disentanglement", "primary_area": "", "supplementary_material": "/attachment/acaf7025e3e549afce67d4c7500874a2447eb166.zip", "author": "Mhairi Dunion;Trevor McInroe;Kevin Sebastian Luck;Josiah P. Hanna;Stefano V Albrecht", "authorids": "~Mhairi_Dunion1;~Trevor_McInroe1;~Kevin_Sebastian_Luck1;~Josiah_P._Hanna1;~Stefano_V_Albrecht1", "gender": "F;M;;;M", "homepage": ";https://trevormcinroe.github.io/;;https://agents-lab.org/stefano-albrecht/;https://pages.cs.wisc.edu/~jphanna/", "dblp": ";304/2817;153/7680;118/3975;135/6336", "google_scholar": ";;;https://scholar.google.co.uk/citations?user=ceSFqCcAAAAJ;", "orcid": ";;;0000-0002-8735-1465;", "linkedin": "mhairi-dunion-012a1356/;;;;", "or_profile": "~Mhairi_Dunion1;~Trevor_McInroe1;~Kevin_Sebastian_Luck1;~Stefano_V_Albrecht1;~Josiah_Hanna2", "aff": "University of Edinburgh;University of Edinburgh, University of Edinburgh;Aalto University;University of Edinburgh;University of Wisconsin - Madison", "aff_domain": "ed.ac.uk;ed.ac.uk;aalto.fi;ed.ac.uk;wisc.edu", "position": "PhD student;PhD student;Postdoc;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ndunion2023conditional,\ntitle={Conditional Mutual Information for Disentangled Representations in Reinforcement Learning},\nauthor={Mhairi Dunion and Trevor McInroe and Kevin Sebastian Luck and Josiah P. Hanna and Stefano V Albrecht},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EmYWJsyad4}\n}", "github": "", "project": "", "reviewers": "42Lr;n6Jo;oLSP;x11j", "pdf_size": 3608989, "rating": "5;7;7;8", "confidence": "3;3;2;4", "soundness": "3;3;4;4", "novelty": "2;3;4;3", "presentation": "2;4;3;4", "wc_summary": "82;127;102;129", "wc_strengths": "23;108;89;141", "wc_weaknesses": "30;79;345;153", "wc_questions": "2;60;203;92", "wc_limitations": "1;10;71;9", "wc_review": "138;384;810;524", "wc_reply_reviewers": "0;29;31;31", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 110.0, 19.352002480363627 ], "wc_strengths_avg": [ 90.25, 43.054471312512945 ], "wc_weaknesses_avg": [ 151.75, 119.85694598144907 ], "wc_questions_avg": [ 89.25, 73.16889708065852 ], "wc_limitations_avg": [ 22.75, 28.07467720206236 ], "wc_review_avg": [ 464.0, 242.89503906008454 ], "wc_reply_reviewers_avg": [ 22.75, 13.160072188251856 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3244428422615251, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2260524502101042190&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ed.ac.uk;ed.ac.uk;aalto.fi;ed.ac.uk;wisc.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "University of Edinburgh;Aalto University;University of Wisconsin-Madison", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ed.ac.uk;https://www.aalto.fi;https://www.wisc.edu", "aff_unique_abbr": "Edinburgh;Aalto;UW-Madison", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;1;0;2", "aff_country_unique": "United Kingdom;Finland;United States" }, { "title": "Honesty Is the Best Policy: Defining and Mitigating AI Deception", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72293", "id": "EmxpDiPgRu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/06fc7ae4a11a7eb5e20fe018db6c036f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EmxpDiPgRu", "openreview": "https://openreview.net/forum?id=EmxpDiPgRu", "poster": "/media/PosterPDFs/NeurIPS%202023/72293.png?t=1702479826.3269165", "slides": "https://nips.cc/virtual/2023/poster/72293", "video": "https://nips.cc/virtual/2023/poster/72293", "author_site": "Francis Ward, Francesca Toni, Francesco Belardinelli, Tom Everitt", "tldr": "", "abstract": "Deceptive agents are a challenge for the safety, trustworthiness, and cooperation of AI systems. We focus on the problem that agents might deceive in order to achieve their goals (for instance, in our experiments with language models, the goal of being evaluated as truthful).\nThere are a number of existing definitions of deception in the literature on game theory and symbolic AI, but there is no overarching theory of deception for learning agents in games. \nWe introduce a formal\ndefinition of deception in structural causal games, grounded in the philosophy\nliterature, and applicable to real-world machine learning systems.\nSeveral examples and results illustrate that our formal definition aligns with the philosophical and commonsense meaning of deception.\nOur main technical result is to provide graphical criteria for deception. \nWe show, experimentally, that these results can be used to mitigate deception in reinforcement learning agents and language models.", "keywords": "Deception;Causality;Game Theory", "primary_area": "", "supplementary_material": "", "author": "Francis Rhys Ward;Francesca Toni;Francesco Belardinelli;Tom Everitt", "authorids": "~Francis_Rhys_Ward1;~Francesca_Toni1;~Francesco_Belardinelli1;~Tom_Everitt1", "gender": "M;F;M;M", "homepage": "https://francisrhysward.wordpress.com/;https://www.doc.ic.ac.uk/~ft/;https://www.doc.ic.ac.uk/~fbelard/;https://tomeveritt.se", "dblp": "273/0874;t/FrancescaToni;59/2916;151/4259", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.fr/citations?user=Mr35r1EAAAAJ;https://scholar.google.com.au/citations?user=BdulyjIAAAAJ", "orcid": ";0000-0001-8194-1459;0000-0002-7768-1794;0000-0003-1210-9866", "linkedin": ";;;", "or_profile": "~Francis_Rhys_Ward1;~Francesca_Toni1;~Francesco_Belardinelli1;~Tom_Everitt1", "aff": "Imperial College London;Imperial College London;Imperial College London;Google DeepMind", "aff_domain": "ic.ac.uk;ic.ac.uk;imperial.ac.uk;google.com", "position": "PhD student;Full Professor;Lecturer;Researcher", "bibtex": "@inproceedings{\nward2023honesty,\ntitle={Honesty Is the Best Policy: Defining and Mitigating {AI} Deception},\nauthor={Francis Rhys Ward and Francesca Toni and Francesco Belardinelli and Tom Everitt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EmxpDiPgRu}\n}", "github": "", "project": "", "reviewers": "sPGw;VTZB;kn7c;Fk4a", "pdf_size": 468955, "rating": "7;8;8;8", "confidence": "5;4;3;4", "soundness": "3;3;4;3", "novelty": "3;3;3;4", "presentation": "3;2;4;2", "wc_summary": "338;124;49;269", "wc_strengths": "356;112;88;158", "wc_weaknesses": "1174;465;736;936", "wc_questions": "207;106;1;57", "wc_limitations": "89;1;66;16", "wc_review": "2164;808;940;1436", "wc_reply_reviewers": "1171;189;211;387", "wc_reply_authors": "0;130;59;0", "reply_reviewers": "3;1;3;1", "reply_authors": "1;2;4;1", "rating_avg": [ 7.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 195.0, 114.32628744081563 ], "wc_strengths_avg": [ 178.5, 105.52132485900658 ], "wc_weaknesses_avg": [ 827.75, 260.58240059528197 ], "wc_questions_avg": [ 92.75, 75.70460686114154 ], "wc_limitations_avg": [ 43.0, 35.83992187491485 ], "wc_review_avg": [ 1337.0, 531.7847308827135 ], "wc_reply_reviewers_avg": [ 489.5, 400.87747504692754 ], "wc_reply_authors_avg": [ 47.25, 53.50408862881415 ], "reply_reviewers_avg": [ 2.0, 1.0 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12958581398809472677&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ic.ac.uk;ic.ac.uk;imperial.ac.uk;google.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Imperial College London;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.imperial.ac.uk;https://deepmind.com", "aff_unique_abbr": "ICL;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Transfer learning for atomistic simulations using GNNs and kernel mean embeddings", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72292", "id": "Enzew8XujO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5f02c76bc411a6f7c9a8bb2cbf981260-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Enzew8XujO", "openreview": "https://openreview.net/forum?id=Enzew8XujO", "poster": "/media/PosterPDFs/NeurIPS%202023/72292.png?t=1702392339.4023137", "slides": "https://nips.cc/virtual/2023/poster/72292", "video": "https://nips.cc/virtual/2023/poster/72292", "author_site": "John Falk, Luigi Bonati, Pietro Novelli, Michele Parrinello, Massimiliano Pontil", "tldr": "", "abstract": "Interatomic potentials learned using machine learning methods have been successfully applied to atomistic simulations. \nHowever, accurate models require large training datasets, while generating reference calculations is computationally demanding. To bypass this difficulty, we propose a transfer learning algorithm that leverages the ability of graph neural networks (GNNs) to represent chemical environments together with kernel mean embeddings. We extract a feature map from GNNs pre-trained on the OC20 dataset and use it to learn the potential energy surface from system-specific datasets of catalytic processes. Our method is further enhanced by incorporating into the kernel the chemical species information, resulting in improved performance and interpretability. We test our approach on a series of realistic datasets of increasing complexity, showing excellent generalization and transferability performance, and improving on methods that rely on GNNs or ridge regression alone, as well as similar fine-tuning approaches.", "keywords": "GNN;Mean Embedding;Kernels;Atomistic Simulations;OCP;Transfer Learning;Molecular Dynamics;Kernel Ridge Regression;Neural Networks", "primary_area": "", "supplementary_material": "/attachment/462800244df4ae3da353fbe8ebb3241378ca2a28.pdf", "author": "John Isak Texas Falk;Luigi Bonati;Pietro Novelli;Michele Parrinello;Massimiliano Pontil", "authorids": "~John_Isak_Texas_Falk1;~Luigi_Bonati1;~Pietro_Novelli1;michele.parrinello@iit.it;~Massimiliano_Pontil4", "gender": "M;M;M;;Not Specified", "homepage": "https://isakfalk.com;https://luigibonati.github.io/;;;https://www.iit.it/web/computational-statistics-and-machine-learning", "dblp": "322/1984;318/4437;318/3513;;", "google_scholar": "s4roj8kAAAAJ;XPXf728AAAAJ;;;lcOacs8AAAAJ", "orcid": "0000-0002-6616-0045;;0000-0003-1623-5659;;0000-0001-9415-098X", "linkedin": "isak-falk/;;;;", "or_profile": "~John_Isak_Texas_Falk1;~Luigi_Bonati1;~Pietro_Novelli1;michele.parrinello@iit.it;~Massimiliano_Pontil4", "aff": "University College London;Istituto Italiano di Tecnologia, Italy;Istituto Italiano di Tecnologia;;University College London, University of London", "aff_domain": "ucl.ac.uk;iit.it;iit.it;;ucl.ac.uk", "position": "PhD student;Postdoc;Postdoc;;Full Professor", "bibtex": "@inproceedings{\nfalk2023transfer,\ntitle={Transfer learning for atomistic simulations using {GNN}s and kernel mean embeddings},\nauthor={John Isak Texas Falk and Luigi Bonati and Pietro Novelli and Michele Parrinello and Massimiliano Pontil},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Enzew8XujO}\n}", "github": "", "project": "", "reviewers": "D8fw;7Ada;f9vG;qEyK", "pdf_size": 1630813, "rating": "5;6;7;7", "confidence": "3;3;3;5", "soundness": "2;2;3;4", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "62;29;72;27", "wc_strengths": "23;26;44;56", "wc_weaknesses": "320;157;69;116", "wc_questions": "81;58;87;1", "wc_limitations": "17;64;17;1", "wc_review": "503;334;289;201", "wc_reply_reviewers": "122;27;36;10", "wc_reply_authors": "711;0;40;21", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 47.5, 19.83053201505194 ], "wc_strengths_avg": [ 37.25, 13.47915056670857 ], "wc_weaknesses_avg": [ 165.5, 94.47883360838024 ], "wc_questions_avg": [ 56.75, 33.95861451826325 ], "wc_limitations_avg": [ 24.75, 23.583627795570383 ], "wc_review_avg": [ 331.75, 109.83481916041015 ], "wc_reply_reviewers_avg": [ 48.75, 43.30920802785477 ], "wc_reply_authors_avg": [ 193.0, 299.40190380156236 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15708482439733526682&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ucl.ac.uk;iit.it;iit.it;;ucl.ac.uk", "author_num": 5, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University College London;Istituto Italiano di Tecnologia", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://www.iit.it", "aff_unique_abbr": "UCL;IIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United Kingdom;Italy" }, { "title": "Reconciling Competing Sampling Strategies of Network Embedding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72291", "id": "EoDpq18R30", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/15dc2344ea9bdc01ffb8bb2d692e4018-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EoDpq18R30", "openreview": "https://openreview.net/forum?id=EoDpq18R30", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72291", "video": "https://nips.cc/virtual/2023/poster/72291", "author_site": "Yuchen Yan, Baoyu Jing, Lihui Liu, Ruijie Wang, Jinning Li, Tarek Abdelzaher, Hanghang Tong", "tldr": "", "abstract": "Network embedding plays a significant role in a variety of applications. To capture the topology of the network, most of the existing network embedding algorithms follow a sampling training procedure, which maximizes the similarity (e.g., embedding vectors' dot product) between positively sampled node pairs and minimizes the similarity between negatively sampled node pairs in the embedding space. Typically, close node pairs function as positive samples while distant node pairs are usually considered as negative samples. However, under different or even competing sampling strategies, some methods champion sampling distant node pairs as positive samples to encapsulate longer distance information in link prediction, whereas others advocate adding close nodes into the negative sample set to boost the performance of node recommendation. In this paper, we seek to understand the intrinsic relationships between these competing strategies. To this end, we identify two properties (discrimination and monotonicity) that given any node pair proximity distribution, node embeddings should embrace.\nMoreover, we quantify the empirical error of the trained similarity score w.r.t. the sampling strategy, which leads to an important finding that the discrimination property and the monotonicity property for all node pairs can not be satisfied simultaneously in real-world applications. Guided by such analysis, a simple yet novel model (SENSEI) is proposed, which seamlessly fulfills the discrimination property and the partial monotonicity within the top-$K$ ranking list. Extensive experiments show that SENSEI outperforms the state-of-the-arts in plain network embedding.", "keywords": "Network embedding", "primary_area": "", "supplementary_material": "", "author": "Yuchen Yan;Baoyu Jing;Lihui Liu;Ruijie Wang;Jinning Li;Tarek Abdelzaher;Hanghang Tong", "authorids": "~Yuchen_Yan1;~Baoyu_Jing1;~Lihui_Liu1;~Ruijie_Wang2;~Jinning_Li2;~Tarek_Abdelzaher1;~Hanghang_Tong3", "gender": ";;M;M;M;M;", "homepage": ";;https://lihuiliullh.github.io/;https://wjerry5.github.io;https://jinningli.cn;http://abdelzaher.cs.illinois.edu/;http://tonghanghang.org", "dblp": ";210/0936;97/10091-1;57/5759-4;211/7889-1;a/TarekFAbdelzaher;58/1757", "google_scholar": ";cl9YMcUAAAAJ;GI7jyeQAAAAJ;S1TuNNIAAAAJ;ED8QSJwAAAAJ;https://scholar.google.com.tw/citations?user=cA28Zs0AAAAJ;RaINcuUAAAAJ", "orcid": ";0000-0003-1564-6499;0000-0002-3758-4041;;0000-0003-1927-9999;0000-0003-3883-7220;0000-0003-4405-3887", "linkedin": ";baoyu-jing-b37455a0/;;;jinning-li-343168162/;tarek-abdelzaher-0216071/;htong/", "or_profile": "~Yuchen_Yan1;~Baoyu_Jing1;~Lihui_Liu1;~Ruijie_Wang2;~Jinning_Li2;~Tarek_Abdelzaher1;~Hanghang_Tong3", "aff": ";University of Illinois, Urbana Champaign;University of Illinois, Urbana-Champaign;University of Illinois, Urbana-Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": ";illinois.edu;uiuc.edu;uiuc.edu;illinois.edu;illinois.edu;illinois.edu", "position": ";PhD student;PhD student;PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nyan2023reconciling,\ntitle={Reconciling Competing Sampling Strategies of Network Embedding},\nauthor={Yuchen Yan and Baoyu Jing and Lihui Liu and Ruijie Wang and Jinning Li and Tarek Abdelzaher and Hanghang Tong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EoDpq18R30}\n}", "github": "", "project": "", "reviewers": "FQQK;1EU9;fuvb", "pdf_size": 459209, "rating": "7;7;8", "confidence": "3;4;4", "soundness": "3;3;3", "novelty": "3;3;4", "presentation": "3;4;3", "wc_summary": "290;128;156", "wc_strengths": "60;89;84", "wc_weaknesses": "179;67;133", "wc_questions": "354;29;105", "wc_limitations": "89;13;32", "wc_review": "972;326;510", "wc_reply_reviewers": "15;18;16", "wc_reply_authors": "13;9;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 191.33333333333334, 70.69810621383166 ], "wc_strengths_avg": [ 77.66666666666667, 12.657891697365017 ], "wc_weaknesses_avg": [ 126.33333333333333, 45.966171135835204 ], "wc_questions_avg": [ 162.66666666666666, 138.80521924224928 ], "wc_limitations_avg": [ 44.666666666666664, 32.293790252754306 ], "wc_review_avg": [ 602.6666666666666, 271.7466140032332 ], "wc_reply_reviewers_avg": [ 16.333333333333332, 1.247219128924647 ], "wc_reply_authors_avg": [ 7.333333333333333, 5.436502143433363 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6325865423750832547&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";illinois.edu;uiuc.edu;uiuc.edu;illinois.edu;illinois.edu;illinois.edu", "author_num": 7, "aff_unique_index": "0;1;1;0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Illinois", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://illinois.edu", "aff_unique_abbr": "UIUC;UIUC", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unbounded Differentially Private Quantile and Maximum Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72290", "id": "Eq9AFZlAjt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f4b6ef2a78684dca2fb3f1c09372e041-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Eq9AFZlAjt", "openreview": "https://openreview.net/forum?id=Eq9AFZlAjt", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72290", "video": "https://nips.cc/virtual/2023/poster/72290", "tldr": "", "abstract": "In this work we consider the problem of differentially private computation of\nquantiles for the data, especially the highest quantiles such as maximum, but\nwith an unbounded range for the dataset. We show that this can be done\nefficiently through a simple invocation of $\\texttt{AboveThreshold}$, a\nsubroutine that is iteratively called in the fundamental Sparse Vector\nTechnique, even when there is no upper bound on the data. In particular, we\nshow that this procedure can give more accurate and robust estimates on the\nhighest quantiles with applications towards clipping that is essential for\ndifferentially private sum and mean estimation. In addition, we show how two\ninvocations can handle the fully unbounded data setting. Within our study, we\nshow that an improved analysis of $\\texttt{AboveThreshold}$ can improve the\nprivacy guarantees for the widely used Sparse Vector Technique that is of\nindependent interest. We give a more general characterization of privacy loss\nfor $\\texttt{AboveThreshold}$ which we immediately apply to our method for\nimproved privacy guarantees. Our algorithm only requires one $O(n)$ pass\nthrough the data, which can be unsorted, and each subsequent query takes $O(1)$\ntime. We empirically compare our unbounded algorithm with the state-of-the-art\nalgorithms in the bounded setting. For inner quantiles, we find that our method\noften performs better on non-synthetic datasets. For the maximal quantiles,\nwhich we apply to differentially private sum computation, we find that our\nmethod performs significantly better.", "keywords": "Differential privacy;Theory;Spars Vector Technique;Quantile", "primary_area": "", "supplementary_material": "/attachment/825f4920724eed19e3b98374ff1505187cec3420.pdf", "author": "David Durfee", "authorids": "~David_Durfee1", "gender": "M", "homepage": "", "dblp": "155/9794", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~David_Durfee1", "aff": "LinkedIn", "aff_domain": "linkedin.com", "position": "Researcher", "bibtex": "@inproceedings{\ndurfee2023unbounded,\ntitle={Unbounded Differentially Private Quantile and Maximum Estimation},\nauthor={David Durfee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Eq9AFZlAjt}\n}", "github": "", "project": "", "reviewers": "VnsR;QwHj;nQ9t;Qz1m", "pdf_size": 995437, "rating": "4;6;7;7", "confidence": "2;3;3;4", "soundness": "1;3;3;4", "novelty": "2;2;3;3", "presentation": "2;3;4;4", "wc_summary": "21;81;59;253", "wc_strengths": "63;40;116;147", "wc_weaknesses": "206;112;14;68", "wc_questions": "43;44;91;68", "wc_limitations": "1;5;29;15", "wc_review": "334;282;309;551", "wc_reply_reviewers": "36;24;142;56", "wc_reply_authors": "155;66;654;61", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 103.5, 88.94239708935216 ], "wc_strengths_avg": [ 91.5, 42.26405091800832 ], "wc_weaknesses_avg": [ 100.0, 70.35623639735144 ], "wc_questions_avg": [ 61.5, 19.75474626513841 ], "wc_limitations_avg": [ 12.5, 10.805091392487155 ], "wc_review_avg": [ 369.0, 106.67473927786278 ], "wc_reply_reviewers_avg": [ 64.5, 46.18170633486814 ], "wc_reply_authors_avg": [ 234.0, 245.35382613686707 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.8660254037844386, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3626817358755822945&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "linkedin.com", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "LinkedIn Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.linkedin.com", "aff_unique_abbr": "LinkedIn", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Sample Complexity of Goal-Conditioned Hierarchical Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72289", "id": "EqnZqrbFrc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c5ed2c8acda8c3716b1b6f9c6c713aaa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EqnZqrbFrc", "openreview": "https://openreview.net/forum?id=EqnZqrbFrc", "poster": "/media/PosterPDFs/NeurIPS%202023/72289.png?t=1701888771.7031121", "slides": "https://nips.cc/virtual/2023/poster/72289", "video": "https://nips.cc/virtual/2023/poster/72289", "author_site": "Arnaud Robert, Ciara Pike-Burke, Aldo Faisal", "tldr": "", "abstract": "Hierarchical Reinforcement Learning (HRL) algorithms can perform planning at multiple levels of abstraction. Empirical results have shown that state or temporal abstractions might significantly improve the sample efficiency of algorithms. Yet, we still do not have a complete understanding of the basis of those efficiency gains nor any theoretically grounded design rules. In this paper, we derive a lower bound on the sample complexity for the considered class of goal-conditioned HRL algorithms. The proposed lower bound empowers us to quantify the benefits of hierarchical decomposition and leads to the design of a simple Q-learning-type algorithm that leverages hierarchical decompositions. We empirically validate our theoretical findings by investigating the sample complexity of the proposed hierarchical algorithm on a spectrum of tasks (hierarchical $n$-rooms, Gymnasium's Taxi). The hierarchical $n$-rooms tasks were designed to allow us to dial their complexity over multiple orders of magnitude. Our theory and algorithmic findings provide a step towards answering the foundational question of quantifying the improvement hierarchical decomposition offers over monolithic solutions in reinforcement learning.", "keywords": "Hierarchical Reinforcement Learning;Sample Complexity", "primary_area": "", "supplementary_material": "/attachment/ff74ae2fd2ee87c301551d666f2f1b2c50f85c04.zip", "author": "Arnaud Robert;Ciara Pike-Burke;Aldo A. Faisal", "authorids": "~Arnaud_Robert1;~Ciara_Pike-Burke2;~Aldo_A._Faisal1", "gender": "M;;M", "homepage": "https://faisallab.org/members/arnaud-robert;https://www.ma.imperial.ac.uk/~cpikebur/;https://www.imperial.ac.uk/people/a.faisal/", "dblp": ";202/1263;54/5027", "google_scholar": "eV_mq78AAAAJ;Hl1vu1MAAAAJ;https://scholar.google.co.uk/citations?user=WjHjbrwAAAAJ", "orcid": ";;0000-0003-0813-7207", "linkedin": ";;a-aldo-faisal-057b704b/?originalSubdomain=uk", "or_profile": "~Arnaud_Robert1;~Ciara_Pike-Burke2;~Aldo_A._Faisal1", "aff": "Imperial College London;Imperial College London;Universit\u00e4t Bayreuth", "aff_domain": "ic.ac.uk;imperial.ac.uk;uni-bayreuth.de", "position": "PhD student;Lecturer;Full Professor", "bibtex": "@inproceedings{\nrobert2023sample,\ntitle={Sample Complexity of Goal-Conditioned Hierarchical Reinforcement Learning},\nauthor={Arnaud Robert and Ciara Pike-Burke and Aldo A. Faisal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EqnZqrbFrc}\n}", "github": "", "project": "", "reviewers": "2sTV;hRCv;VzQJ", "pdf_size": 2039491, "rating": "5;7;8", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "3;3;4", "presentation": "3;3;4", "wc_summary": "50;183;105", "wc_strengths": "28;68;109", "wc_weaknesses": "26;108;212", "wc_questions": "217;145;4", "wc_limitations": "2;4;22", "wc_review": "323;508;452", "wc_reply_reviewers": "177;21;16", "wc_reply_authors": "171;11;11", "reply_reviewers": "2;1;1", "reply_authors": "3;2;2", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 112.66666666666667, 54.5669822837543 ], "wc_strengths_avg": [ 68.33333333333333, 33.06895153396242 ], "wc_weaknesses_avg": [ 115.33333333333333, 76.11103000806708 ], "wc_questions_avg": [ 122.0, 88.46468221838589 ], "wc_limitations_avg": [ 9.333333333333334, 8.993825042154693 ], "wc_review_avg": [ 427.6666666666667, 77.46110134914312 ], "wc_reply_reviewers_avg": [ 71.33333333333333, 74.74549410425278 ], "wc_reply_authors_avg": [ 64.33333333333333, 75.42472332656507 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11243348414489204706&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ic.ac.uk;imperial.ac.uk;uni-bayreuth.de", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Imperial College London;University of Bayreuth", "aff_unique_dep": ";", "aff_unique_url": "https://www.imperial.ac.uk;https://www.uni-bayreuth.de", "aff_unique_abbr": "ICL;Uni Bayreuth", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;Germany" }, { "title": "Does Invariant Graph Learning via Environment Augmentation Learn Invariance?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72288", "id": "EqpR9Vtt13", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e21a7b668ce3ea2c9c964c52d1c9f161-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=EqpR9Vtt13", "openreview": "https://openreview.net/forum?id=EqpR9Vtt13", "poster": "/media/PosterPDFs/NeurIPS%202023/72288.png?t=1701688558.966099", "slides": "https://nips.cc/virtual/2023/poster/72288", "video": "https://nips.cc/virtual/2023/poster/72288", "author_site": "Yongqiang Chen, Yatao Bian, Kaiwen Zhou, Binghui Xie, Bo Han, James Cheng", "tldr": "", "abstract": "Invariant graph representation learning aims to learn the invariance among data from different environments for out-of-distribution generalization on graphs. As the graph environment partitions are usually expensive to obtain, augmenting the environment information has become the de facto approach. However, the usefulness of the augmented environment information has never been verified. In this work, we find that it is fundamentally impossible to learn invariant graph representations via environment augmentation without additional assumptions. Therefore, we develop a set of minimal assumptions, including variation sufficiency and variation consistency, for feasible invariant graph learning. We then propose a new framework Graph invAriant Learning Assistant (GALA). GALA incorporates an assistant model that needs to be sensitive to graph environment changes or distribution shifts. The correctness of the proxy predictions by the assistant model hence can differentiate the variations in spurious subgraphs. We show that extracting the maximally invariant subgraph to the proxy predictions provably identifies the underlying invariant subgraph for successful OOD generalization under the established minimal assumptions. Extensive experiments on datasets including DrugOOD with various graph distribution shifts confirm the effectiveness of GALA.", "keywords": "Graph Neural Networks;Out-of-Distribution Generalization;Invariant Learning", "primary_area": "", "supplementary_material": "/attachment/d9b396abd68b0e741035c7b130ebaa142dc3e6c3.pdf", "author": "Yongqiang Chen;Yatao Bian;Kaiwen Zhou;Binghui Xie;Bo Han;James Cheng", "authorids": "~Yongqiang_Chen1;~Yatao_Bian1;~Kaiwen_Zhou2;~Binghui_Xie1;~Bo_Han1;~James_Cheng2", "gender": ";M;M;M;M;M", "homepage": "https://lfhase.win;https://jnhujnhu.github.io/;https://xiebinghui.github.io/;https://www.cse.cuhk.edu.hk/~jcheng/;https://bhanml.github.io/;https://yataobian.com", "dblp": "76/5774-2;215/4936;286/4313;06/4171;241/0472-3;222/2694", "google_scholar": "huQ_Ig8AAAAJ;nHmlZ5QAAAAJ;;;nTNjqHwAAAAJ;oZBTlBkAAAAJ", "orcid": ";;0000-0001-6533-9281;;;0000-0002-2368-4084", "linkedin": ";;;;;", "or_profile": "~Yongqiang_Chen1;~Kaiwen_Zhou2;~Binghui_Xie1;~James_Cheng2;~bo_han2;~An_Bian1", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;Huawei Noah's Ark Lab;Department of Computer Science and Engineering, The Chinese University of Hong Kong;The Chinese University of Hong Kong;RIKEN;Tencent AI Lab", "aff_domain": "cse.cuhk.edu.hk;huawei.com;cse.cuhk.edu.hk;cuhk.edu.hk;riken.jp;tencent.com", "position": "PhD student;Researcher;PhD student;Associate Professor;Adjunct Scientist;Senior researcher ", "bibtex": "@inproceedings{\nchen2023does,\ntitle={Does Invariant Graph Learning via Environment Augmentation Learn Invariance?},\nauthor={Yongqiang Chen and Yatao Bian and Kaiwen Zhou and Binghui Xie and Bo Han and James Cheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=EqpR9Vtt13}\n}", "github": "", "project": "", "reviewers": "voKo;3cig;W1GV;m4Xg;waKT", "pdf_size": 2945914, "rating": "4;5;5;6;7", "confidence": "4;3;1;3;4", "soundness": "2;2;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;1;2;3;4", "wc_summary": "99;72;83;63;129", "wc_strengths": "112;44;68;89;143", "wc_weaknesses": "99;110;67;127;123", "wc_questions": "202;45;19;57;35", "wc_limitations": "27;10;12;1;9", "wc_review": "539;281;249;337;439", "wc_reply_reviewers": "113;15;0;69;167", "wc_reply_authors": "634;60;226;157;367", "reply_reviewers": "1;1;0;1;1", "reply_authors": "5;2;3;3;4", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 1.019803902718557 ], "wc_summary_avg": [ 89.2, 23.24134247413432 ], "wc_strengths_avg": [ 91.2, 34.312679872023985 ], "wc_weaknesses_avg": [ 105.2, 21.507208094032105 ], "wc_questions_avg": [ 71.6, 66.37951491235832 ], "wc_limitations_avg": [ 11.8, 8.47112743381895 ], "wc_review_avg": [ 369.0, 106.74080756674086 ], "wc_reply_reviewers_avg": [ 72.8, 61.87859080489795 ], "wc_reply_authors_avg": [ 288.8, 199.47069960272358 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 3.4, 1.019803902718557 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.17902871850985824, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16482172386945624287&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cse.cuhk.edu.hk;huawei.com;cse.cuhk.edu.hk;cuhk.edu.hk;riken.jp;tencent.com", "author_num": 6, "aff_unique_index": "0;1;0;0;2;3", "aff_unique_norm": "Chinese University of Hong Kong;Huawei;RIKEN;Tencent", "aff_unique_dep": "Department of Computer Science and Engineering;Noah's Ark Lab;;Tencent AI Lab", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.huawei.com;https://www.riken.jp;https://ai.tencent.com", "aff_unique_abbr": "CUHK;Huawei;RIKEN;Tencent AI Lab", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;Japan" }, { "title": "Finite-Time Logarithmic Bayes Regret Upper Bounds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72287", "id": "ErAP8kF4tG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0d9057d84a9fc37523bf826232ea6820-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ErAP8kF4tG", "openreview": "https://openreview.net/forum?id=ErAP8kF4tG", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72287", "video": "https://nips.cc/virtual/2023/poster/72287", "author_site": "Alexia Atsidakou, Branislav Kveton, Sumeet Katariya, Constantine Caramanis, Sujay Sanghavi", "tldr": "", "abstract": "We derive the first finite-time logarithmic Bayes regret upper bounds for Bayesian bandits. In a multi-armed bandit, we obtain $O(c_\\Delta \\log n)$ and $O(c_h \\log^2 n)$ upper bounds for an upper confidence bound algorithm, where $c_h$ and $c_\\Delta$ are constants depending on the prior distribution and the gaps of bandit instances sampled from it, respectively. The latter bound asymptotically matches the lower bound of Lai (1987). Our proofs are a major technical departure from prior works, while being simple and general. To show the generality of our techniques, we apply them to linear bandits. Our results provide insights on the value of prior in the Bayesian setting, both in the objective and as a side information given to the learner. They significantly improve upon existing $\\tilde{O}(\\sqrt{n})$ bounds, which have become standard in the literature despite the logarithmic lower bound of Lai (1987).", "keywords": "Bayesian bandits;logarithmic regret bounds;multi-armed bandits;linear bandits", "primary_area": "", "supplementary_material": "/attachment/015ce6643aea8702597d3c4304de9f2858d87460.zip", "author": "Alexia Atsidakou;Branislav Kveton;Sumeet Katariya;Constantine Caramanis;sujay sanghavi", "authorids": "~Alexia_Atsidakou1;~Branislav_Kveton1;~Sumeet_Katariya1;~Constantine_Caramanis1;~sujay_sanghavi1", "gender": "F;M;;M;M", "homepage": "https://www.wncg.org/people/students/alexia-atsidakou;http://www.bkveton.com;;http://users.ece.utexas.edu/~cmcaram/constantine_caramanis/Home.html;https://sites.utexas.edu/sanghavi", "dblp": "289/1602;92/5526;72/9639;96/5760;69/4911.html", "google_scholar": ";CZaDvPgAAAAJ;;47YTUrEAAAAJ;O-DazBUAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Alexia_Atsidakou1;~Branislav_Kveton1;~Sumeet_Katariya1;~Constantine_Caramanis1;~sujay_sanghavi1", "aff": "University of Texas, Austin;Amazon;Amazon;University of Texas, Austin;University of Texas, Austin", "aff_domain": "utexas.edu;amazon.com;amazon.com;utexas.edu;utexas.edu", "position": "PhD student;Principal Scientist;Applied Scientist;Full Professor;Associate Professor", "bibtex": "@inproceedings{\natsidakou2023logarithmic,\ntitle={Logarithmic Bayes Regret Bounds},\nauthor={Alexia Atsidakou and Branislav Kveton and Sumeet Katariya and Constantine Caramanis and sujay sanghavi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ErAP8kF4tG}\n}", "github": "", "project": "", "reviewers": "Q5oQ;o1t4;MGeP;H2AN;7MLE", "pdf_size": 388531, "rating": "5;5;6;6;6", "confidence": "3;3;4;3;1", "soundness": "3;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "42;64;83;67;178", "wc_strengths": "44;69;75;20;32", "wc_weaknesses": "136;228;63;21;1", "wc_questions": "468;14;104;75;176", "wc_limitations": "1;8;4;1;1", "wc_review": "691;383;329;184;388", "wc_reply_reviewers": "51;17;14;0;0", "wc_reply_authors": "236;21;24;0;0", "reply_reviewers": "1;1;1;0;0", "reply_authors": "2;2;2;1;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.8, 47.4358514206291 ], "wc_strengths_avg": [ 48.0, 21.099763031844695 ], "wc_weaknesses_avg": [ 89.8, 83.1514281296479 ], "wc_questions_avg": [ 167.4, 159.07432225221015 ], "wc_limitations_avg": [ 3.0, 2.756809750418044 ], "wc_review_avg": [ 395.0, 165.3396504169523 ], "wc_reply_reviewers_avg": [ 16.4, 18.661189672687 ], "wc_reply_authors_avg": [ 56.2, 90.46634733424358 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.16666666666666663, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7840831772544569566&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "utexas.edu;amazon.com;amazon.com;utexas.edu;utexas.edu", "author_num": 5, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "University of Texas at Austin;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.utexas.edu;https://www.amazon.com", "aff_unique_abbr": "UT Austin;Amazon", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "OpenShape: Scaling Up 3D Shape Representation Towards Open-World Understanding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72286", "id": "Eu4Kkefq7p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8c7304e77c832ddc70075dfee081ca6c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Eu4Kkefq7p", "openreview": "https://openreview.net/forum?id=Eu4Kkefq7p", "poster": "/media/PosterPDFs/NeurIPS%202023/72286.png?t=1701828381.3197162", "slides": "https://nips.cc/virtual/2023/poster/72286", "video": "https://nips.cc/virtual/2023/poster/72286", "author_site": "Minghua Liu, Ruoxi Shi, Kaiming Kuang, Yinhao Zhu, Xuanlin Li, Shizhong Han, Hong Cai, Fatih Porikli, Hao Su", "tldr": "", "abstract": "We introduce OpenShape, a method for learning multi-modal joint representations of text, image, and point clouds. We adopt the commonly used multi-modal contrastive learning framework for representation alignment, but with a specific focus on scaling up 3D representations to enable open-world 3D shape understanding. To achieve this, we scale up training data by ensembling multiple 3D datasets and propose several strategies to automatically filter and enrich noisy text descriptions. We also explore and compare strategies for scaling 3D backbone networks and introduce a novel hard negative mining module for more efficient training. We evaluate OpenShape on zero-shot 3D classification benchmarks and demonstrate its superior capabilities for open-world recognition. Specifically, OpenShape achieves a zero-shot accuracy of 46.8% on the 1,156-category Objaverse-LVIS benchmark, compared to less than 10% for existing methods. OpenShape also achieves an accuracy of 85.3% on ModelNet40, outperforming previous zero-shot baseline methods by 20% and performing on par with some fully-supervised methods. Furthermore, we show that our learned embeddings encode a wide range of visual and semantic concepts (e.g., subcategories, color, shape, style) and facilitate fine-grained text-3D and image-3D interactions. Due to their alignment with CLIP embeddings, our learned shape representations can also be integrated with off-the-shelf CLIP-based models for various applications, such as point cloud captioning and point cloud-conditioned image generation.", "keywords": "3d;shape understanding;open-world understanding;zero-shot 3D classification;vision-language model", "primary_area": "", "supplementary_material": "/attachment/af2d2da4ef34812715a6299cff368b22213fa980.pdf", "author": "Minghua Liu;Ruoxi Shi;Kaiming Kuang;Yinhao Zhu;Xuanlin Li;Shizhong Han;Hong Cai;Fatih Porikli;Hao Su", "authorids": "~Minghua_Liu1;~Ruoxi_Shi1;~Kaiming_Kuang2;~Yinhao_Zhu1;~Xuanlin_Li1;~Shizhong_Han2;~Hong_Cai1;~Fatih_Porikli2;~Hao_Su1", "gender": "M;Not Specified;M;M;;M;M;M;M", "homepage": "https://cseweb.ucsd.edu//~mil070/;https://rshi.top/;https://kaimingkuang.github.io;https://yinhaoz.github.io/;https://xuanlinli17.github.io/;;https://herbertcai.github.io/;https://www.porikli.com;http://ai.ucsd.edu/~haosu", "dblp": "28/8907;190/7068;;202/3667;251/3029;73/7894;;p/FatihMuratPorikli;09/4945-1", "google_scholar": "6U3IGtEAAAAJ;Z7zLvdkAAAAJ;;89uRjBkAAAAJ;7vyVxxQAAAAJ;Nlbo2H8AAAAJ;9y3Kd3cAAAAJ;https://scholar.google.com.tw/citations?user=VpB8NZ8AAAAJ;1P8Zu04AAAAJ", "orcid": ";;;;;;0000-0002-7388-747X;0000-0002-1520-4466;", "linkedin": ";;;;xuanlin-li-4684b8145/;shizhonghan/;;fatih-porikli-a95643/;", "or_profile": "~Minghua_Liu1;~Ruoxi_Shi1;~Kaiming_Kuang2;~Yinhao_Zhu1;~Xuanlin_Li1;~Shizhong_Han2;~Hong_Cai1;~Fatih_Porikli2;~Hao_Su1", "aff": "University of California, San Diego;Shanghai Jiaotong University;University of California, San Diego;Qualcomm AI Research;University of California, San Diego;QualComm;Qualcomm AI Research;QualComm;University of California, San Diego", "aff_domain": "ucsd.edu;sjtu.edu.cn;ucsd.edu;qti.qualcomm.com;ucsd.edu;qualcomm.com;qualcomm.com;qualcomm.com;ucsd.edu", "position": "PhD student;Undergrad student;MS student;Researcher;PhD student;Researcher;Researcher;Senior Director;Assistant Professor", "bibtex": "@inproceedings{\nliu2023openshape,\ntitle={OpenShape: Scaling Up 3D Shape Representation Towards Open-World Understanding},\nauthor={Minghua Liu and Ruoxi Shi and Kaiming Kuang and Yinhao Zhu and Xuanlin Li and Shizhong Han and Hong Cai and Fatih Porikli and Hao Su},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Eu4Kkefq7p}\n}", "github": "", "project": "", "reviewers": "quLe;9Jeu;Mx8V;Q47N;zuqY", "pdf_size": 29988305, "rating": "6;6;6;6;7", "confidence": "4;4;5;5;4", "soundness": "3;3;3;3;4", "novelty": "3;3;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "138;89;69;40;95", "wc_strengths": "82;62;53;52;101", "wc_weaknesses": "322;62;155;59;138", "wc_questions": "125;4;112;1;62", "wc_limitations": "12;39;2;1;59", "wc_review": "679;256;391;153;455", "wc_reply_reviewers": "21;11;11;0;20", "wc_reply_authors": "0;0;376;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.2, 32.24530973645625 ], "wc_strengths_avg": [ 70.0, 18.878559267062727 ], "wc_weaknesses_avg": [ 147.2, 95.64392296429502 ], "wc_questions_avg": [ 60.8, 52.05151294631117 ], "wc_limitations_avg": [ 22.6, 22.791226382097122 ], "wc_review_avg": [ 386.8, 179.90041689779375 ], "wc_reply_reviewers_avg": [ 12.6, 7.605261336732617 ], "wc_reply_authors_avg": [ 75.2, 150.4 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 126, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9029868179838489840&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ucsd.edu;sjtu.edu.cn;ucsd.edu;qti.qualcomm.com;ucsd.edu;qualcomm.com;qualcomm.com;qualcomm.com;ucsd.edu", "author_num": 9, "aff_unique_index": "0;1;0;2;0;3;2;3;0", "aff_unique_norm": "University of California, San Diego;Shanghai Jiao Tong University;Qualcomm;Qualcomm Incorporated", "aff_unique_dep": ";;Qualcomm AI Research;", "aff_unique_url": "https://www.ucsd.edu;https://www.sjtu.edu.cn;https://www.qualcomm.com/research;https://www.qualcomm.com", "aff_unique_abbr": "UCSD;SJTU;QAI;Qualcomm", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;1;0;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Spiking PointNet: Spiking Neural Networks for Point Clouds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72285", "id": "Ev2XuqvJCy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8296d5800a8e68e58ad0472b393be80e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ev2XuqvJCy", "openreview": "https://openreview.net/forum?id=Ev2XuqvJCy", "poster": "/media/PosterPDFs/NeurIPS%202023/72285.png?t=1699806814.9934597", "slides": "https://nips.cc/virtual/2023/poster/72285", "video": "https://nips.cc/virtual/2023/poster/72285", "author_site": "Dayong Ren, Zhe Ma, Yuanpei Chen, Weihang Peng, Xiaode Liu, Yuhan Zhang, Yufei Guo, Yufei Guo", "tldr": "", "abstract": "Recently, Spiking Neural Networks (SNNs), enjoying extreme energy efficiency, have drawn much research attention on 2D visual recognition and shown gradually increasing application potential. However, it still remains underexplored whether SNNs can be generalized to 3D recognition. To this end, we present Spiking PointNet in the paper, the first spiking neural model for efficient deep learning on point clouds. We discover that the two huge obstacles limiting the application of SNNs in point clouds are: the intrinsic optimization obstacle of SNNs that impedes the training of a big spiking model with large time steps, and the expensive memory and computation cost of PointNet that makes training a big spiking point model unrealistic. To solve the problems simultaneously, we present a trained-less but learning-more paradigm for Spiking PointNet with theoretical justifications and in-depth experimental analysis. In specific, our Spiking PointNet is trained with only a single time step but can obtain better performance with multiple time steps inference, compared to the one trained directly with multiple time steps. We conduct various experiments on ModelNet10, ModelNet40 to demonstrate the effectiveness of Sipiking PointNet. Notably, our Spiking PointNet even can outperform its ANN counterpart, which is rare in the SNN field thus providing a potential research direction for the following work. Moreover, Spiking PointNet shows impressive speedup and storage saving in the training phase. Our code is open-sourced at https://github.com/DayongRen/Spiking-PointNet.", "keywords": "Spiking Neural Networks;Point Clouds", "primary_area": "", "supplementary_material": "/attachment/4b677c42c9ccac050af5519d95f442446c8691c5.zip", "author": "Dayong Ren;Zhe Ma;Yuanpei Chen;Weihang Peng;Xiaode Liu;Yuhan Zhang;Yufei Guo", "authorids": "~Dayong_Ren1;~Zhe_Ma2;~Yuanpei_Chen1;~Weihang_Peng2;~Xiaode_Liu2;~Yuhan_Zhang3;~Yufei_Guo2", "gender": "M;M;Non-Binary;;;;", "homepage": "http://www.njumeta.com/rendy/;https://dblp.org/pid/22/6672;;;;;", "dblp": "213/3263;22/6672-1;;;;;", "google_scholar": ";;;;;;", "orcid": ";;0000-0002-4674-553X;;;;", "linkedin": ";;;;;;", "or_profile": "~Dayong_Ren1;~Zhe_Ma2;~Yuanpei_Chen1;~Weihang_Peng2;~Xiaode_Liu2;~Yuhan_Zhang3;~Yufei_Guo2", "aff": "Nanjing University;Intelligent science and technology academy limited of CASIC;Baidu;;;;", "aff_domain": "nju.edu;casic.com;baidu.com;;;;", "position": "PhD student;Full Professor;Researcher;;;;", "bibtex": "@inproceedings{\nren2023spiking,\ntitle={Spiking PointNet: Spiking Neural Networks for Point Clouds},\nauthor={Dayong Ren and Zhe Ma and Yuanpei Chen and Weihang Peng and Xiaode Liu and Yuhan Zhang and Yufei Guo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ev2XuqvJCy}\n}", "github": "", "project": "", "reviewers": "zgZ2;CQRm;f5wg;o3KR", "pdf_size": 680106, "rating": "4;4;6;7", "confidence": "4;5;4;5", "soundness": "2;2;3;2", "novelty": "2;2;3;3", "presentation": "3;2;2;3", "wc_summary": "65;46;68;91", "wc_strengths": "27;10;36;44", "wc_weaknesses": "157;129;36;55", "wc_questions": "108;4;10;3", "wc_limitations": "41;34;4;7", "wc_review": "398;223;154;200", "wc_reply_reviewers": "75;0;63;89", "wc_reply_authors": "556;58;8;140", "reply_reviewers": "1;0;1;2", "reply_authors": "3;2;2;3", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 67.5, 15.976545308670458 ], "wc_strengths_avg": [ 29.25, 12.636751956100111 ], "wc_weaknesses_avg": [ 94.25, 50.19648892103909 ], "wc_questions_avg": [ 31.25, 44.39242615582077 ], "wc_limitations_avg": [ 21.5, 16.224980739587952 ], "wc_review_avg": [ 243.75, 92.45640864753508 ], "wc_reply_reviewers_avg": [ 56.75, 34.03215391361528 ], "wc_reply_authors_avg": [ 190.5, 216.21921746227832 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17857438538379525502&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "nju.edu;casic.com;baidu.com;;;;", "author_num": 7, "aff_unique_index": "0;1;2", "aff_unique_norm": "Nanjing University;China Aerospace Science and Industry Corporation;Baidu", "aff_unique_dep": ";Intelligent Science and Technology Academy;Baidu, Inc.", "aff_unique_url": "https://www.nju.edu.cn;http://www.casic.com.cn/;https://www.baidu.com", "aff_unique_abbr": "Nanjing U;CASIC;Baidu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Kronecker-Factored Approximate Curvature for Modern Neural Network Architectures", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72284", "id": "Ex3oJEKS53", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6a6679e3d5b9f7d5f09cdb79a5fc3fd8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ex3oJEKS53", "openreview": "https://openreview.net/forum?id=Ex3oJEKS53", "poster": "/media/PosterPDFs/NeurIPS%202023/72284.png?t=1702033836.286794", "slides": "https://nips.cc/virtual/2023/poster/72284", "video": "https://nips.cc/virtual/2023/poster/72284", "author_site": "Runa Eschenhagen, Alexander Immer, Richard Turner, Frank Schneider, Philipp Hennig", "tldr": "", "abstract": "The core components of many modern neural network architectures, such as transformers, convolutional, or graph neural networks, can be expressed as linear layers with *weight-sharing*. Kronecker-Factored Approximate Curvature (K-FAC), a second-order optimisation method, has shown promise to speed up neural network training and thereby reduce computational costs. However, there is currently no framework to apply it to generic architectures, specifically ones with linear weight-sharing layers. In this work, we identify two different settings of linear weight-sharing layers which motivate two flavours of K-FAC -- *expand* and *reduce*. We show that they are exact for deep linear networks with weight-sharing in their respective setting. Notably, K-FAC-reduce is generally faster than K-FAC-expand, which we leverage to speed up automatic hyperparameter selection via optimising the marginal likelihood for a Wide ResNet. Finally, we observe little difference between these two K-FAC variations when using them to train both a graph neural network and a vision transformer. However, both variations are able to reach a fixed validation metric target in $50$-$75$\\% of the number of steps of a first-order reference run, which translates into a comparable improvement in wall-clock time. This highlights the potential of applying K-FAC to modern neural network architectures.", "keywords": "deep learning;second-order;optimization;natural gradient;fisher;gauss-newton;k-fac;weight-sharing", "primary_area": "", "supplementary_material": "/attachment/9d0086dcab3930ddb927d8b8925f2abc013e406e.zip", "author": "Runa Eschenhagen;Alexander Immer;Richard E Turner;Frank Schneider;Philipp Hennig", "authorids": "~Runa_Eschenhagen1;~Alexander_Immer1;~Richard_E_Turner1;~Frank_Schneider1;~Philipp_Hennig1", "gender": ";;M;M;M", "homepage": "https://runame.github.io;;https://rich-turner-group.github.io/;https://fsschneider.github.io;http://mml.inf.uni-tuebingen.de", "dblp": "242/9235;;40/5352;41/328-1;08/9077", "google_scholar": "Ribmq4oAAAAJ;;https://scholar.google.co.uk/citations?user=DgLEyZgAAAAJ;znq-WkAAAAAJ;https://scholar.google.de/citations?user=UeG5w08AAAAJ", "orcid": ";;;;0000-0001-7293-6092", "linkedin": ";;;frankstefanschneider/;", "or_profile": "~Runa_Eschenhagen1;~Alexander_Immer1;~Richard_E_Turner1;~Frank_Schneider1;~Philipp_Hennig1", "aff": "University of T\u00fcbingen;;Microsoft Research;University of T\u00fcbingen;University of T\u00fcbingen", "aff_domain": "uni-tuebingen.de;;research.microsoft.com;uni-tuebingen.de;uni-tuebingen.de", "position": "MS student;;Researcher;Postdoc;Full Professor", "bibtex": "@inproceedings{\neschenhagen2023kroneckerfactored,\ntitle={Kronecker-Factored Approximate Curvature for Modern Neural Network Architectures},\nauthor={Runa Eschenhagen and Alexander Immer and Richard E Turner and Frank Schneider and Philipp Hennig},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ex3oJEKS53}\n}", "github": "", "project": "", "reviewers": "NJ6c;T2nD;t9fV;hvpV;M1Nr;1aFR", "pdf_size": 1104808, "rating": "5;5;6;7;7;7", "confidence": "3;3;4;3;3;2", "soundness": "3;3;3;3;3;2", "novelty": "3;2;3;3;3;3", "presentation": "4;2;3;3;2;3", "wc_summary": "36;50;70;79;109;48", "wc_strengths": "16;20;119;92;45;74", "wc_weaknesses": "11;62;159;77;26;89", "wc_questions": "13;26;295;29;27;46", "wc_limitations": "10;5;1;7;35;7", "wc_review": "86;163;644;284;242;264", "wc_reply_reviewers": "0;15;385;15;18;33", "wc_reply_authors": "0;0;812;0;0;0", "reply_reviewers": "0;1;2;1;1;1", "reply_authors": "1;1;2;1;1;1", "rating_avg": [ 6.166666666666667, 0.8975274678557507 ], "confidence_avg": [ 3.0, 0.5773502691896257 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 2.8333333333333335, 0.6871842709362768 ], "wc_summary_avg": [ 65.33333333333333, 24.191366687771534 ], "wc_strengths_avg": [ 61.0, 37.5410886008029 ], "wc_weaknesses_avg": [ 70.66666666666667, 47.97453027967607 ], "wc_questions_avg": [ 72.66666666666667, 99.89438867568532 ], "wc_limitations_avg": [ 10.833333333333334, 11.14176327557218 ], "wc_review_avg": [ 280.5, 175.84818262732583 ], "wc_reply_reviewers_avg": [ 77.66666666666667, 137.7771953392707 ], "wc_reply_authors_avg": [ 135.33333333333334, 302.61453295497154 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.32163376045133846, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8878367764553616333&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "uni-tuebingen.de;;research.microsoft.com;uni-tuebingen.de;uni-tuebingen.de", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of T\u00fcbingen;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Uni T\u00fcbingen;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Germany;United States" }, { "title": "GlucoSynth: Generating Differentially-Private Synthetic Glucose Traces", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72283", "id": "Eysb8t3MJ5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0ab51646ca369140c3c3ece011b66587-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Eysb8t3MJ5", "openreview": "https://openreview.net/forum?id=Eysb8t3MJ5", "poster": "/media/PosterPDFs/NeurIPS%202023/72283.png?t=1701716364.8578358", "slides": "https://nips.cc/virtual/2023/poster/72283", "video": "https://nips.cc/virtual/2023/poster/72283", "author_site": "Josephine Lamp, Mark Derdzinski, Christopher Hannemann, Joost van der Linden, Lu Feng, Tianhao Wang, David Evans", "tldr": "", "abstract": "We focus on the problem of generating high-quality, private synthetic glucose traces, a task generalizable to many other time series sources. Existing methods for time series data synthesis, such as those using Generative Adversarial Networks (GANs), are not able to capture the innate characteristics of glucose data and cannot provide any formal privacy guarantees without severely degrading the utility of the synthetic data. In this paper we present GlucoSynth, a novel privacy-preserving GAN framework to generate synthetic glucose traces. The core intuition behind our approach is to conserve relationships amongst motifs (glucose events) within the traces, in addition to temporal dynamics. Our framework incorporates differential privacy mechanisms to provide strong formal privacy guarantees. We provide a comprehensive evaluation on the real-world utility of the data using 1.2 million glucose traces; GlucoSynth outperforms all previous methods in its ability to generate high-quality synthetic glucose traces with strong privacy guarantees.", "keywords": "Synthetic Data;Time Series;Generative Adversarial Networks;Differential Privacy;Glucose;Diabetes", "primary_area": "", "supplementary_material": "/attachment/ae6be756a0fd24b00e39d9ebcda19828e6eea580.pdf", "author": "Josephine Lamp;Mark Derdzinski;Christopher Hannemann;Joost Van der Linden;Lu Feng;Tianhao Wang;David Evans", "authorids": "~Josephine_Lamp1;~Mark_Derdzinski1;~Christopher_Hannemann1;~Joost_Van_der_Linden1;~Lu_Feng4;~Tianhao_Wang3;~David_Evans1", "gender": "F;M;;;F;M;Not Specified", "homepage": ";http://markderdzinski.com/;;https://github.com/joosthvanderlinden;https://www.cs.virginia.edu/~lufeng/;https://tianhao.wang;https://www.cs.virginia.edu/evans/", "dblp": "178/7104;;;341/6087;48/4233-1;https://dblp.uni-trier.de/pid/145/3288-1.html;https://dblp.uni-trier.de/pid/e/DavidEvans", "google_scholar": "RXk5bbsAAAAJ;DP_eA00AAAAJ;;Oq-WKx4AAAAJ;https://scholar.google.com/citations?hl=en;TkgyXGwAAAAJ;DsR4PucAAAAJ", "orcid": "0000-0002-4982-7768;;;;0000-0002-4651-8441;;", "linkedin": "josephinelamp/;derdzinski;;joosthvanderlinden/;;;", "or_profile": "~Josephine_Lamp1;~Mark_Derdzinski1;~Christopher_Hannemann1;~Joost_Van_der_Linden1;~Lu_Feng4;~Tianhao_Wang3;~David_Evans1", "aff": "University of Virginia, Charlottesville;Dexcom, Inc.;;Dexcom, Inc.;University of Virginia, Charlottesville;University of Virginia, Charlottesville;University of Virginia", "aff_domain": "virginia.edu;dexcom.com;;dexcom.com;virginia.edu;virginia.edu;virginia.edu", "position": "PhD student;Senior Manager;;Data Scientist;Assistant Professor;Assistant Professor;Professor", "bibtex": "@inproceedings{\nlamp2023glucosynth,\ntitle={GlucoSynth: Generating Differentially-Private Synthetic Glucose Traces},\nauthor={Josephine Lamp and Mark Derdzinski and Christopher Hannemann and Joost Van der Linden and Lu Feng and Tianhao Wang and David Evans},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Eysb8t3MJ5}\n}", "github": "", "project": "", "reviewers": "2Sh2;VFZB;yHP4;Q4qV;Eaai", "pdf_size": 23651939, "rating": "5;5;5;5;6", "confidence": "3;4;3;3;3", "soundness": "2;3;3;3;4", "novelty": "2;3;3;3;3", "presentation": "3;3;2;2;4", "wc_summary": "70;124;75;34;132", "wc_strengths": "53;11;63;40;77", "wc_weaknesses": "184;132;103;90;283", "wc_questions": "82;128;104;42;216", "wc_limitations": "8;63;49;8;97", "wc_review": "397;458;394;214;805", "wc_reply_reviewers": "177;95;113;39;263", "wc_reply_authors": "151;152;103;0;227", "reply_reviewers": "2;1;2;1;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 87.0, 36.43075623700392 ], "wc_strengths_avg": [ 48.8, 22.4535075210979 ], "wc_weaknesses_avg": [ 158.4, 70.17862922571229 ], "wc_questions_avg": [ 114.4, 58.15015047272019 ], "wc_limitations_avg": [ 45.0, 34.005881844175136 ], "wc_review_avg": [ 453.6, 193.73445744110674 ], "wc_reply_reviewers_avg": [ 137.4, 76.70619270958505 ], "wc_reply_authors_avg": [ 126.6, 74.70635849778785 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.2500000000000001, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13507154096027526890&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 7, "email": "virginia.edu;dexcom.com;;dexcom.com;virginia.edu;virginia.edu;virginia.edu", "author_num": 7, "aff_unique_index": "0;1;1;0;0;0", "aff_unique_norm": "University of Virginia;Dexcom, Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.virginia.edu;https://www.dexcom.com", "aff_unique_abbr": "UVA;Dexcom", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Charlottesville;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Invariant Anomaly Detection under Distribution Shifts: A Causal Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72282", "id": "F1mv2L7Rkb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b010241b9f1cdfc7d4c392db899cef86-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=F1mv2L7Rkb", "openreview": "https://openreview.net/forum?id=F1mv2L7Rkb", "poster": "/media/PosterPDFs/NeurIPS%202023/72282.png?t=1701304022.5832345", "slides": "https://nips.cc/virtual/2023/poster/72282", "video": "https://nips.cc/virtual/2023/poster/72282", "author_site": "Jo\u00e3o Carvalho, Mengtao Zhang, Robin Geyer, Carlos Cotrini, Joachim M Buhmann", "tldr": "", "abstract": "Anomaly detection (AD) is the machine learning task of identifying highly discrepant abnormal samples by solely relying on the consistency of the normal training samples. Under the constraints of a distribution shift, the assumption that training samples and test samples are drawn from the same distribution breaks down. In this work, by leveraging tools from causal inference we attempt to increase the resilience of anomaly detection models to different kinds of distribution shifts. We begin by elucidating a simple yet necessary statistical property that ensures invariant representations, which is critical for robust AD under both domain and covariate shifts. From this property, we derive a regularization term which, when minimized, leads to partial distribution invariance across environments. \nThrough extensive experimental evaluation on both synthetic and real-world tasks, covering a range of six different AD methods, we demonstrated significant improvements in out-of-distribution performance. Under both covariate and domain shift, models regularized with our proposed term showed marked increased robustness. Code is available at: https://github.com/JoaoCarv/invariant-anomaly-detection", "keywords": "anomaly detection;causal inference;distribution shifts", "primary_area": "", "supplementary_material": "/attachment/029a93a2ae1ed16a9b9c4b5ca95239a86e027849.zip", "author": "Jo\u00e3o B. S. Carvalho;Mengtao Zhang;Robin Geyer;Carlos Cotrini;Joachim M. Buhmann", "authorids": "~Jo\u00e3o_B._S._Carvalho1;~Mengtao_Zhang1;~Robin_Geyer1;~Carlos_Cotrini1;~Joachim_M._Buhmann1", "gender": ";M;M;Not Specified;M", "homepage": ";;;https://people.inf.ethz.ch/ccarlos/;https://ise.ethz.ch", "dblp": ";;;150/0652;b/JMBuhmann", "google_scholar": ";;https://scholar.google.ch/citations?user=beAB8-QAAAAJ;;https://scholar.google.ch/citations?user=zQWbCzYAAAAJ", "orcid": ";;;;", "linkedin": ";zhang-mengtao-845204232/;;;", "or_profile": "~Jo\u00e3o_B._S._Carvalho1;~Mengtao_Zhang1;~Robin_Geyer1;~Carlos_Cotrini1;~Joachim_M._Buhmann1", "aff": ";Department of Computer Science, ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich;Department of Computer Science, ETHZ - ETH Zurich", "aff_domain": ";inf.ethz.ch;ethz.ch;ethz.ch;inf.ethz.ch", "position": ";MS student;PhD student;Instructor;Professor", "bibtex": "@inproceedings{\ncarvalho2023invariant,\ntitle={Invariant Anomaly Detection under Distribution Shifts: A Causal Perspective},\nauthor={Jo{\\~a}o B. S. Carvalho and Mengtao Zhang and Robin Geyer and Carlos Cotrini and Joachim M. Buhmann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=F1mv2L7Rkb}\n}", "github": "", "project": "", "reviewers": "AUeF;ePdN;ZicU;yiE8;zVDG", "pdf_size": 1157551, "rating": "4;4;5;6;7", "confidence": "3;3;3;4;4", "soundness": "3;3;2;3;3", "novelty": "3;2;2;3;2", "presentation": "1;2;3;3;3", "wc_summary": "72;76;40;83;89", "wc_strengths": "35;76;48;102;30", "wc_weaknesses": "428;236;142;150;308", "wc_questions": "0;66;2;111;2", "wc_limitations": "14;13;2;15;6", "wc_review": "549;467;234;461;435", "wc_reply_reviewers": "288;109;36;50;87", "wc_reply_authors": "528;602;25;84;278", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 72.0, 17.029386365926403 ], "wc_strengths_avg": [ 58.2, 27.10276738637588 ], "wc_weaknesses_avg": [ 252.8, 106.6478316704095 ], "wc_questions_avg": [ 36.2, 45.01732999634696 ], "wc_limitations_avg": [ 10.0, 5.0990195135927845 ], "wc_review_avg": [ 429.2, 104.81297629587664 ], "wc_reply_reviewers_avg": [ 114.0, 90.78546139112804 ], "wc_reply_authors_avg": [ 303.4, 230.60581085480044 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9101820546182064, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15424438241218585476&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";inf.ethz.ch;ethz.ch;ethz.ch;inf.ethz.ch", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "GRAND-SLAMIN\u2019 Interpretable Additive Modeling with Structural Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72281", "id": "F5DYsAc7Rt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c057cb81b8d3c67093427bf1c16a4e9f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=F5DYsAc7Rt", "openreview": "https://openreview.net/forum?id=F5DYsAc7Rt", "poster": "/media/PosterPDFs/NeurIPS%202023/72281.png?t=1699774132.9725087", "slides": "https://nips.cc/virtual/2023/poster/72281", "video": "https://nips.cc/virtual/2023/poster/72281", "author_site": "Shibal Ibrahim, Gabriel Afriat, Kayhan Behdin, Rahul Mazumder", "tldr": "", "abstract": "Generalized Additive Models (GAMs) are a family of flexible and interpretable models with old roots in statistics. GAMs are often used with pairwise interactions to improve model accuracy while still retaining flexibility and interpretability but lead to computational challenges as we are dealing with order of $p^2$ terms. It is desirable to restrict the number of components (i.e., encourage sparsity) for easier interpretability, and better computational and statistical properties. Earlier approaches, considering sparse pairwise interactions, have limited scalability, especially when imposing additional structural interpretability constraints. We propose a flexible GRAND-SLAMIN framework that can learn GAMs with interactions under sparsity and additional structural constraints in a differentiable end-to-end fashion. We customize first-order gradient-based optimization to perform sparse backpropagation to exploit sparsity in additive effects for any differentiable loss function in a GPU-compatible manner. Additionally, we establish novel non-asymptotic prediction bounds for our estimators with tree-based shape functions. Numerical experiments on real-world datasets show that our toolkit performs favorably in terms of performance, variable selection and scalability when compared with popular toolkits to fit GAMs with interactions. Our work expands the landscape of interpretable modeling while maintaining prediction accuracy competitive with non-interpretable black-box models. Our code is available at https://github.com/mazumder-lab/grandslamin.", "keywords": "Generalized additive models;component selection;hierarchy;interpretability", "primary_area": "", "supplementary_material": "", "author": "Shibal Ibrahim;Gabriel Isaac Afriat;Kayhan Behdin;Rahul Mazumder", "authorids": "~Shibal_Ibrahim1;afriatg@mit.edu;~Kayhan_Behdin1;~Rahul_Mazumder1", "gender": "M;;M;M", "homepage": "https://sites.google.com/view/shibal-ibrahim/;;http://mit.edu/~behdink/www/;http://www.mit.edu/~rahulmaz/", "dblp": "177/1113;;199/2308.html;11/9365.html", "google_scholar": "_ADL3k8AAAAJ;;;cyCp3pIAAAAJ", "orcid": "0000-0002-3300-0213;;0000-0003-3482-0421;0000-0003-1384-9743", "linkedin": "shibal-ibrahim-70097b77;;;", "or_profile": "~Shibal_Ibrahim1;afriatg@mit.edu;~Kayhan_Behdin1;~Rahul_Mazumder1", "aff": "Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;;mit.edu;mit.edu", "position": "PhD student;;PhD student;Associate Professor", "bibtex": "@inproceedings{\nibrahim2023grandslamin,\ntitle={{GRAND}-{SLAMIN}{\\textquoteright} Interpretable Additive Modeling with Structural Constraints},\nauthor={Shibal Ibrahim and Gabriel Isaac Afriat and Kayhan Behdin and Rahul Mazumder},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=F5DYsAc7Rt}\n}", "github": "", "project": "", "reviewers": "W32k;QfPH;xnEj;WskZ;nF9H", "pdf_size": 5448375, "rating": "5;6;6;6;6", "confidence": "4;4;3;4;4", "soundness": "3;2;3;3;3", "novelty": "2;3;3;3;4", "presentation": "2;3;3;3;3", "wc_summary": "45;150;127;32;36", "wc_strengths": "38;43;200;33;61", "wc_weaknesses": "174;129;98;95;175", "wc_questions": "38;77;270;16;4", "wc_limitations": "7;8;17;1;8", "wc_review": "302;407;712;177;284", "wc_reply_reviewers": "235;214;133;0;0", "wc_reply_authors": "1647;304;169;0;0", "reply_reviewers": "2;2;1;0;0", "reply_authors": "4;2;2;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 78.0, 50.10788361126421 ], "wc_strengths_avg": [ 75.0, 63.210758577950955 ], "wc_weaknesses_avg": [ 134.2, 34.99371372118141 ], "wc_questions_avg": [ 81.0, 97.71386800244886 ], "wc_limitations_avg": [ 8.2, 5.1146847410177685 ], "wc_review_avg": [ 376.4, 182.9738779170404 ], "wc_reply_reviewers_avg": [ 116.4, 100.96058636913713 ], "wc_reply_authors_avg": [ 424.0, 622.045979007983 ], "reply_reviewers_avg": [ 1.0, 0.8944271909999159 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.25000000000000006, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=812698283697114754&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "mit.edu;;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Decision Tree for Locally Private Estimation with Public Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72280", "id": "F5FVsfCxt8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/88237ac4e9941b1be5c6d3c1ad408184-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=F5FVsfCxt8", "openreview": "https://openreview.net/forum?id=F5FVsfCxt8", "poster": "/media/PosterPDFs/NeurIPS%202023/72280.png?t=1697775895.5255334", "slides": "https://nips.cc/virtual/2023/poster/72280", "video": "https://nips.cc/virtual/2023/poster/72280", "author_site": "Yuheng Ma, Han Zhang, Yuchao Cai, Hanfang Yang", "tldr": "", "abstract": "We propose conducting locally differentially private (LDP) estimation with the aid of a small amount of public data to enhance the performance of private estimation. Specifically, we introduce an efficient algorithm called Locally differentially Private Decision Tree (LPDT) for LDP regression. We first use the public data to grow a decision tree partition and then fit an estimator according to the partition privately. From a theoretical perspective, we show that LPDT is $\\varepsilon$-LDP and has a mini-max optimal convergence rate under a mild assumption of similarity between public and private data, whereas the lower bound of the convergence rate of LPDT without public data is strictly slower, which implies that the public data helps to improve the convergence rates of LDP estimation. We conduct experiments on both synthetic and real-world data to demonstrate the superior performance of LPDT compared with other state-of-the-art LDP regression methods. Moreover, we show that LPDT remains effective despite considerable disparities between public and private data.", "keywords": "Local differential privacy;non-parametric regression;decision tree;public data", "primary_area": "", "supplementary_material": "/attachment/42ae4616f97b61311f6d2047b7f0261c7cb8bfbc.pdf", "author": "Yuheng Ma;Han Zhang;Yuchao Cai;Hanfang Yang", "authorids": "~Yuheng_Ma1;~Han_Zhang21;~Yuchao_Cai1;~Hanfang_Yang2", "gender": "M;M;M;M", "homepage": "https://karlmyh.github.io/;https://hanzhang-lang.github.io/;;http://stat.ruc.edu.cn/en/teacher_more.php?cid=89248&id=40", "dblp": "258/0645-1.html;;;", "google_scholar": "JvMlW0gAAAAJ;n6sDKdoAAAAJ;u-NwlIEAAAAJ;EsSjDdAAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yuheng_Ma1;~Han_Zhang21;~Yuchao_Cai1;~Hanfang_Yang2", "aff": "Renmin University of China;Renmin University of China;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "position": "PhD student;MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nma2023decision,\ntitle={Decision Tree for Locally Private Estimation with Public Data},\nauthor={Yuheng Ma and Han Zhang and Yuchao Cai and Hanfang Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=F5FVsfCxt8}\n}", "github": "", "project": "", "reviewers": "xd2A;HsA8;QZtg;9jqb", "pdf_size": 608562, "rating": "5;6;6;6", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "93;85;42;100", "wc_strengths": "70;38;74;45", "wc_weaknesses": "169;345;253;30", "wc_questions": "118;50;45;85", "wc_limitations": "1;53;11;56", "wc_review": "451;571;425;316", "wc_reply_reviewers": "55;160;179;19", "wc_reply_authors": "259;78;456;0", "reply_reviewers": "2;1;3;1", "reply_authors": "2;3;3;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 80.0, 22.572106680591425 ], "wc_strengths_avg": [ 56.75, 15.514106484100203 ], "wc_weaknesses_avg": [ 199.25, 115.85848048373498 ], "wc_questions_avg": [ 74.5, 29.46608219631514 ], "wc_limitations_avg": [ 30.25, 24.529319191530774 ], "wc_review_avg": [ 440.75, 90.66524968255479 ], "wc_reply_reviewers_avg": [ 103.25, 67.79518788232687 ], "wc_reply_authors_avg": [ 198.25, 175.98916870080384 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16848434024284664157&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Toward Better PAC-Bayes Bounds for Uniformly Stable Algorithms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72279", "id": "F6j16Qr6Vk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5e8309c9ca683e11672e3dbcd4b87776-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=F6j16Qr6Vk", "openreview": "https://openreview.net/forum?id=F6j16Qr6Vk", "poster": "/media/PosterPDFs/NeurIPS%202023/72279.png?t=1701454344.4235961", "slides": "https://nips.cc/virtual/2023/poster/72279", "video": "https://nips.cc/virtual/2023/poster/72279", "author_site": "Sijia Zhou, Yunwen Lei, Ata Kaban", "tldr": "", "abstract": "We give sharper bounds for uniformly stable randomized algorithms in a PAC-Bayesian framework, which improve the existing results by up to a factor of $\\sqrt{n}$ (ignoring a log factor), where $n$ is the sample size. The key idea is to bound the moment generating function of the generalization gap using concentration of weakly dependent random variables due to Bousquet et al (2020). We introduce an assumption of sub-exponential stability parameter, which allows a general treatment that we instantiate in two applications: stochastic gradient descent and randomized coordinate descent. Our results eliminate the requirement of strong convexity from previous results, and hold for non-smooth convex problems.", "keywords": "PAC-Bayesian Bounds;Uniform Stability;Generalization Analysis", "primary_area": "", "supplementary_material": "/attachment/8447da86a8c5363321f8b1f382b410d4897f1396.zip", "author": "Sijia Zhou;Yunwen Lei;Ata Kaban", "authorids": "~Sijia_Zhou1;~Yunwen_Lei1;~Ata_Kaban1", "gender": ";M;F", "homepage": ";https://leiyw.github.io/;https://www.birmingham.ac.uk/staff/profiles/computer-science/academic-staff/kaban-ata.aspx", "dblp": ";https://dblp.org/pers/l/Lei:Yunwen;k/AtaKaban", "google_scholar": ";https://scholar.google.com.hk/citations?user=g3dg0rsAAAAJ;https://scholar.google.co.uk/citations?user=rb1Om0cAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Sijia_Zhou1;~Yunwen_Lei1;~Ata_Kaban1", "aff": ";University of Hong Kong;Universityof Birmingham", "aff_domain": ";hku.hk;bham.ac.uk", "position": ";Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2023toward,\ntitle={Toward Better {PAC}-Bayes Bounds for Uniformly Stable Algorithms},\nauthor={Sijia Zhou and Yunwen Lei and Ata Kaban},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=F6j16Qr6Vk}\n}", "github": "", "project": "", "reviewers": "qrJQ;ZHyT;qzEi;V6pn", "pdf_size": 355901, "rating": "4;5;7;7", "confidence": "4;3;2;3", "soundness": "1;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "57;73;43;376", "wc_strengths": "56;50;166;240", "wc_weaknesses": "37;36;139;363", "wc_questions": "362;230;137;130", "wc_limitations": "61;16;9;14", "wc_review": "573;405;494;1123", "wc_reply_reviewers": "168;12;9;20", "wc_reply_authors": "380;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 137.25, 138.25045207882684 ], "wc_strengths_avg": [ 128.0, 79.46068210127572 ], "wc_weaknesses_avg": [ 143.75, 133.32174428801926 ], "wc_questions_avg": [ 214.75, 93.73199827166815 ], "wc_limitations_avg": [ 25.0, 20.940391591371924 ], "wc_review_avg": [ 648.75, 280.1842028023707 ], "wc_reply_reviewers_avg": [ 52.25, 66.94914114460319 ], "wc_reply_authors_avg": [ 95.0, 164.54482671904336 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11682348269767563748&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": ";hku.hk;bham.ac.uk", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Hong Kong;University of Birmingham", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;https://www.birmingham.ac.uk", "aff_unique_abbr": "HKU;UoB", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Debiased and Denoised Entity Recognition from Distant Supervision", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72278", "id": "FAGY52HbyV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/359ddb9caccb4c54cc915dceeacf4892-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FAGY52HbyV", "openreview": "https://openreview.net/forum?id=FAGY52HbyV", "poster": "/media/PosterPDFs/NeurIPS%202023/72278.png?t=1702296807.528111", "slides": "https://nips.cc/virtual/2023/poster/72278", "video": "https://nips.cc/virtual/2023/poster/72278", "author_site": "Haobo Wang, Yiwen Dong, Ruixuan Xiao, Fei Huang, Gang Chen, Junbo Zhao", "tldr": "", "abstract": "While distant supervision has been extensively explored and exploited in NLP tasks like named entity recognition, a major obstacle stems from the inevitable noisy distant labels tagged unsupervisedly. A few past works approach this problem by adopting a self-training framework with a sample-selection mechanism. In this work, we innovatively identify two types of biases that were omitted by prior work, and these biases lead to inferior performance of the distant-supervised NER setup. First, we characterize the noise concealed in the distant labels as highly structural rather than fully randomized. Second, the self-training framework would ubiquitously introduce an inherent bias that causes erroneous behavior in both sample selection and eventually prediction. To cope with these problems, we propose a novel self-training framework, dubbed DesERT. This framework augments the conventional NER predicative pathway to a dual form that effectively adapts the sample-selection process to conform to its innate distributional-bias structure. The other crucial component of DesERT composes a debiased module aiming to enhance the token representations, hence the quality of the pseudo-labels. Extensive experiments are conducted to validate the DesERT. The results show that our framework establishes a new state-of-art performance, it achieves a +2.22% average F1 score improvement on five standardized benchmarking datasets. Lastly, DesERT demonstrates its effectiveness under a new DSNER benchmark where additional distant supervision comes from the ChatGPT model.", "keywords": "Distant Supervision; Named Entity-Recognition; Biased Learning", "primary_area": "", "supplementary_material": "/attachment/1bc34f5c5b7052abe6d72c59dbac7a32153cfa87.zip", "author": "Haobo Wang;Yiwen Dong;Ruixuan Xiao;Fei Huang;Gang Chen;Junbo Zhao", "authorids": "~Haobo_Wang1;~Yiwen_Dong2;~Ruixuan_Xiao1;~Fei_Huang1;~Gang_Chen6;~Junbo_Zhao1", "gender": "M;F;M;M;M;M", "homepage": "https://hbzju.github.io/;https://github.com/Ace424;https://github.com/Justherozen;;http://jakezhao.net/;https://sites.google.com/view/fei-huang", "dblp": ";274/6496-3;312/5605;67/6383-1;191/6665;h/FeiHuang.html", "google_scholar": "DnN-rggAAAAJ;;OLQeOJgAAAAJ;;8ipao8MAAAAJ;9r98PpoAAAAJ", "orcid": "0000-0001-8586-3048;;;0000-0002-7483-0045;;", "linkedin": ";;;;;fei-huang-cas-cmu", "or_profile": "~Haobo_Wang1;~Yiwen_Dong2;~Ruixuan_Xiao1;~Gang_Chen6;~Junbo_Zhao1;~Fei_Huang2", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;College of Computer Science and Technology, Zhejiang University;Zhejiang University;Alibaba Group US", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;cs.zju.edu.cn;zju.edu.cn;alibaba-inc.com", "position": "PhD student;MS student;MS student;Full Professor;Assistant Professor;Senior Research Director", "bibtex": "@inproceedings{\nwang2023debiased,\ntitle={Debiased and Denoised Entity Recognition from Distant Supervision},\nauthor={Haobo Wang and Yiwen Dong and Ruixuan Xiao and Fei Huang and Gang Chen and Junbo Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FAGY52HbyV}\n}", "github": "", "project": "", "reviewers": "qqbH;NjrY;Nukd;WYfp;WUVn", "pdf_size": 1435434, "rating": "5;5;5;6;7", "confidence": "4;3;4;5;3", "soundness": "2;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "3;3;3;3;4", "wc_summary": "60;63;75;76;112", "wc_strengths": "23;65;133;39;167", "wc_weaknesses": "166;196;202;156;136", "wc_questions": "23;44;21;3;104", "wc_limitations": "1;2;7;3;27", "wc_review": "273;370;438;277;546", "wc_reply_reviewers": "17;14;35;14;38", "wc_reply_authors": "22;53;63;25;104", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;3;2;3", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 77.2, 18.519179247472064 ], "wc_strengths_avg": [ 85.4, 55.47467890848941 ], "wc_weaknesses_avg": [ 171.2, 24.741867350707384 ], "wc_questions_avg": [ 39.0, 35.002857026248584 ], "wc_limitations_avg": [ 8.0, 9.715966241192895 ], "wc_review_avg": [ 380.8, 103.02892797656395 ], "wc_reply_reviewers_avg": [ 23.6, 10.63202708800161 ], "wc_reply_authors_avg": [ 53.4, 29.81677380267691 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.13363062095621223, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=689658894670360687&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;cs.zju.edu.cn;zju.edu.cn;alibaba-inc.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Zhejiang University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "ZJU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "A Privacy-Friendly Approach to Data Valuation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72277", "id": "FAZ3i0hvm0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bdd5522a32b3a959a6d81fb6ddc1cb38-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FAZ3i0hvm0", "openreview": "https://openreview.net/forum?id=FAZ3i0hvm0", "poster": "/media/PosterPDFs/NeurIPS%202023/72277.png?t=1701466693.6127656", "slides": "https://nips.cc/virtual/2023/poster/72277", "video": "https://nips.cc/virtual/2023/poster/72277", "author_site": "Jiachen (Tianhao) Wang, Yuqing Zhu, Yu-Xiang Wang, Ruoxi Jia, Prateek Mittal", "tldr": "", "abstract": "Data valuation, a growing field that aims at quantifying the usefulness of individual data sources for training machine learning (ML) models, faces notable yet often overlooked privacy challenges. This paper studies these challenges with a focus on KNN-Shapley, one of the most practical data valuation methods nowadays. We first emphasize the inherent privacy risks of KNN-Shapley, and demonstrate the significant technical challenges in adapting KNN-Shapley to accommodate differential privacy (DP). To overcome these challenges, we introduce TKNN-Shapley, a refined variant of KNN-Shapley that is privacy-friendly, allowing for straightforward modifications to incorporate DP guarantee (DP-TKNN-Shapley). We show that DP-TKNN-Shapley has several advantages and offers a superior privacy-utility tradeoff compared to naively privatized KNN-Shapley. Moreover, even non-private TKNN-Shapley matches KNN-Shapley's performance in discerning data quality. Overall, our findings suggest that TKNN-Shapley is a promising alternative to KNN-Shapley, particularly for real-world applications involving sensitive data.", "keywords": "Data Valuation;Differential Privacy", "primary_area": "", "supplementary_material": "", "author": "Jiachen T. Wang;Yuqing Zhu;Yu-Xiang Wang;Ruoxi Jia;Prateek Mittal", "authorids": "~Jiachen_T._Wang1;~Yuqing_Zhu1;~Yu-Xiang_Wang1;~Ruoxi_Jia1;~Prateek_Mittal1", "gender": "F;;;;M", "homepage": "https://jeremy43.github.io/;http://www.cs.ucsb.edu/~yuxiangw/publications.html;https://ruoxijia.info/;http://www.princeton.edu/~pmittal/;https://tianhaowang.netlify.app/", "dblp": ";62/1637-3.html;147/5355-1;;274/2144", "google_scholar": "QmMv9PIAAAAJ;HGNZ1fkAAAAJ;JCrug-YAAAAJ;https://scholar.google.com.tw/citations?user=xTKD8J4AAAAJ;nvQOtgkAAAAJ", "orcid": ";;;0000-0002-4057-0118;", "linkedin": ";;;;tian-hao-wang/", "or_profile": "~Yuqing_Zhu1;~Yu-Xiang_Wang1;~Ruoxi_Jia1;~Prateek_Mittal1;~Tianhao_Wang2", "aff": "UC Santa Barbara;UC Santa Barbara;Virginia Tech;Princeton University;Princeton University", "aff_domain": "ucsb.edu;ucsb.edu;vt.edu;princeton.edu;princeton.edu", "position": "PhD student;Assistant Professor;Assistant Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nwang2023a,\ntitle={A Privacy-Friendly Approach to Data Valuation},\nauthor={Jiachen T. Wang and Yuqing Zhu and Yu-Xiang Wang and Ruoxi Jia and Prateek Mittal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FAZ3i0hvm0}\n}", "github": "", "project": "", "reviewers": "SBGb;i1sa;z2hx;QHAC;qAKo;yczn", "pdf_size": 1265980, "rating": "5;6;6;6;6;7", "confidence": "2;4;3;3;4;4", "soundness": "3;3;3;3;3;4", "novelty": "2;3;3;3;3;3", "presentation": "3;3;3;4;2;3", "wc_summary": "49;36;100;61;37;43", "wc_strengths": "31;8;91;84;74;234", "wc_weaknesses": "43;23;238;190;400;212", "wc_questions": "3;178;152;94;8;8", "wc_limitations": "171;2;35;40;1;1", "wc_review": "297;247;616;469;520;498", "wc_reply_reviewers": "12;10;22;136;48;45", "wc_reply_authors": "19;24;432;1382;29;10", "reply_reviewers": "1;1;1;2;1;1", "reply_authors": "2;2;3;3;2;2", "rating_avg": [ 6.0, 0.5773502691896257 ], "confidence_avg": [ 3.3333333333333335, 0.7453559924999298 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 54.333333333333336, 22.073110841524404 ], "wc_strengths_avg": [ 87.0, 72.08790929598851 ], "wc_weaknesses_avg": [ 184.33333333333334, 126.61841186108055 ], "wc_questions_avg": [ 73.83333333333333, 71.9407549461515 ], "wc_limitations_avg": [ 41.666666666666664, 60.0768026964004 ], "wc_review_avg": [ 441.1666666666667, 128.6279086702761 ], "wc_reply_reviewers_avg": [ 45.5, 43.072613108563544 ], "wc_reply_authors_avg": [ 316.0, 499.88165266057393 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.3726779962499649 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3342569056703202331&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 9, "email": "ucsb.edu;ucsb.edu;vt.edu;princeton.edu;princeton.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;2", "aff_unique_norm": "University of California, Santa Barbara;Virginia Tech;Princeton University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucsb.edu;https://www.vt.edu;https://www.princeton.edu", "aff_unique_abbr": "UCSB;VT;Princeton", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Variational Monte Carlo on a Budget \u2014 Fine-tuning pre-trained Neural Wavefunctions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72276", "id": "FBNyccPfAu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4b5721f7fcc1672930d860e0dfcfee84-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FBNyccPfAu", "openreview": "https://openreview.net/forum?id=FBNyccPfAu", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72276", "video": "https://nips.cc/virtual/2023/poster/72276", "author_site": "Michael Scherbela, Leon Gerard, Philipp Grohs", "tldr": "", "abstract": "Obtaining accurate solutions to the Schr\u00f6dinger equation is the key challenge in computational quantum chemistry. \nDeep-learning-based Variational Monte Carlo (DL-VMC) has recently outperformed conventional approaches in terms of accuracy, but only at large computational cost.\nWhereas in many domains models are trained once and subsequently applied for inference, accurate DL-VMC so far requires a full optimization for every new problem instance, consuming thousands of GPUhs even for small molecules.\nWe instead propose a DL-VMC model which has been pre-trained using self-supervised wavefunction optimization on a large and chemically diverse set of molecules. \nApplying this model to new molecules without any optimization, yields wavefunctions and absolute energies that outperform established methods such as CCSD(T)-2Z.\nTo obtain accurate relative energies, only few fine-tuning steps of this base model are required.\nWe accomplish this with a fully end-to-end machine-learned model, consisting of an improved geometry embedding architecture and an existing SE(3)-equivariant model to represent molecular orbitals. \nCombining this architecture with continuous sampling of geometries, we improve zero-shot accuracy by two orders of magnitude compared to the state of the art.\nWe extensively evaluate the accuracy, scalability and limitations of our base model on a wide variety of test systems.", "keywords": "Computational Physics;Machine Learning for Science;Quantum Monte Carlo;Fermionic Neural Networks", "primary_area": "", "supplementary_material": "/attachment/e142493c70d0b29f9b4ce5dc04a9ddd82e1ef873.zip", "author": "Michael Scherbela;Leon Gerard;Philipp Grohs", "authorids": "~Michael_Scherbela1;~Leon_Gerard1;~Philipp_Grohs1", "gender": "M;;", "homepage": ";;", "dblp": "248/6254;;11/7839", "google_scholar": "gDd8Nc8AAAAJ;;", "orcid": "0000-0003-2751-0572;;", "linkedin": ";;", "or_profile": "~Michael_Scherbela1;~Leon_Gerard1;~Philipp_Grohs1", "aff": "Universit\u00e4t Vienna;;University of Vienna", "aff_domain": "univie.ac.at;;univie.ac.at", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nscherbela2023variational,\ntitle={Variational Monte Carlo on a Budget {\\textemdash} Fine-tuning pre-trained Neural Wavefunctions},\nauthor={Michael Scherbela and Leon Gerard and Philipp Grohs},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FBNyccPfAu}\n}", "github": "", "project": "", "reviewers": "R7Vm;NURi;BduG;37BZ;m8hG", "pdf_size": 714522, "rating": "5;5;6;6;7", "confidence": "4;3;3;3;4", "soundness": "2;3;3;3;4", "novelty": "3;2;3;3;4", "presentation": "3;2;3;3;3", "wc_summary": "208;80;102;165;81", "wc_strengths": "244;19;49;64;90", "wc_weaknesses": "156;56;55;38;37", "wc_questions": "63;17;312;27;33", "wc_limitations": "35;1;49;1;23", "wc_review": "706;173;567;295;264", "wc_reply_reviewers": "160;88;104;28;56", "wc_reply_authors": "590;48;90;94;249", "reply_reviewers": "2;1;1;1;2", "reply_authors": "3;2;2;2;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 127.2, 50.901473456079835 ], "wc_strengths_avg": [ 93.2, 78.81979446814107 ], "wc_weaknesses_avg": [ 68.4, 44.53582827342498 ], "wc_questions_avg": [ 90.4, 111.8563364320502 ], "wc_limitations_avg": [ 21.8, 18.872201779336716 ], "wc_review_avg": [ 401.0, 201.28089824918806 ], "wc_reply_reviewers_avg": [ 87.2, 44.8571064604038 ], "wc_reply_authors_avg": [ 214.2, 199.96639717712574 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.21821789023599233, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17645443909881205045&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "univie.ac.at;;univie.ac.at", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Vienna", "aff_unique_dep": "", "aff_unique_url": "https://univie.ac.at", "aff_unique_abbr": "UV", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Austria" }, { "title": "Lung250M-4B: A Combined 3D Dataset for CT- and Point Cloud-Based Intra-Patient Lung Registration", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73647", "id": "FC0dsvguFi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/abf37695a4562ac4c05194d717d47eec-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=FC0dsvguFi", "openreview": "https://openreview.net/forum?id=FC0dsvguFi", "poster": "/media/PosterPDFs/NeurIPS%202023/73647.png?t=1702035368.915983", "slides": "https://nips.cc/virtual/2023/poster/73647", "video": "https://nips.cc/virtual/2023/poster/73647", "author_site": "Fenja Falta, Christoph Gro\u00dfbr\u00f6hmer, Alessa Hering, Alexander Bigalke, Mattias Heinrich", "tldr": "", "abstract": "A popular benchmark for intra-patient lung registration is provided by the DIR-LAB COPDgene dataset consisting of large-motion in- and expiratory breath-hold CT pairs. This dataset alone, however, does not provide enough samples to properly train state-of-the-art deep learning methods. Other public datasets often also provide only small sample sizes or include primarily small motions between scans that do not translate well to larger deformations. For point-based geometric registration, the PVT1010 dataset provides a large number of vessel point clouds without any correspondences and a labeled test set corresponding to the COPDgene cases. However, the absence of correspondences for supervision complicates training, and a fair comparison with image-based algorithms is infeasible, since CT scans for the training data are not publicly available.\nWe here provide a combined benchmark for image- and point-based registration approaches. We curated a total of 248 public multi-centric in- and expiratory lung CT scans from 124 patients, which show large motion between scans, processed them to ensure sufficient homogeneity between the data and generated vessel point clouds that are well distributed even deeper inside the lungs. For supervised training, we provide vein and artery segmentations of the vessels and multiple thousand image-derived keypoint correspondences for each pair. For validation, we provide multiple scan pairs with manual landmark annotations. Finally, as first baselines on our new benchmark, we evaluate several image and point cloud registration methods on the dataset.", "keywords": "image registration;lung CT;point clouds;geometric deep learning", "primary_area": "", "supplementary_material": "/attachment/53b939bd6a9d0e35a921eb3a4ddbce8c4ad97884.pdf", "author": "Fenja Falta;Christoph Gro\u00dfbr\u00f6hmer;Alessa Hering;Alexander Bigalke;Mattias P Heinrich", "authorids": "~Fenja_Falta1;~Christoph_Gro\u00dfbr\u00f6hmer2;~Alessa_Hering1;~Alexander_Bigalke1;~Mattias_P_Heinrich1", "gender": ";M;F;;M", "homepage": "https://www.imi.uni-luebeck.de/institut/mitarbeiter/falta-fenja.html;;;;http://mpheinrich.de", "dblp": ";;https://dblp.uni-trier.de/pers/hd/h/Hering:Alessa;286/7895;95/10171", "google_scholar": ";;https://scholar.google.de/citations?user=xFdAEu4AAAAJ;;https://scholar.google.de/citations?user=_403fNcAAAAJ", "orcid": ";0000-0002-8926-8729;;0000-0001-7824-5735;", "linkedin": ";;;;", "or_profile": "~Fenja_Falta1;~Christoph_Gro\u00dfbr\u00f6hmer2;~Alessa_Hering1;~Alexander_Bigalke1;~Mattias_P_Heinrich1", "aff": "Universit\u00e4t zu L\u00fcbeck;Universit\u00e4t zu L\u00fcbeck;Radboud University Medical Center;University of L\u00fcbeck Institute of Medical Informatics;Universit\u00e4t zu L\u00fcbeck", "aff_domain": "uni-luebeck.de;uni-luebeck.de;radboudumc.nl;imi.uni-luebeck.de;uni-luebeck.de", "position": "PhD student;PhD student;Assistant Professor;PhD student;Associate Professor", "bibtex": "@inproceedings{\nfalta2023lungmb,\ntitle={Lung250M-4B: A Combined 3D Dataset for {CT}- and Point Cloud-Based Intra-Patient Lung Registration},\nauthor={Fenja Falta and Christoph Gro{\\ss}br{\\\"o}hmer and Alessa Hering and Alexander Bigalke and Mattias P Heinrich},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=FC0dsvguFi}\n}", "github": "", "project": "", "reviewers": "yrwe;fFmp;D1Xr;gUyw", "pdf_size": 4746481, "rating": "5;7;7;9", "confidence": "4;3;3;4", "wc_summary_and_contributions": "219;66;98;89", "wc_strengths": "200;93;77;17", "wc_improvement": "50;131;55;129", "wc_limitations": "1;8;55;1", "wc_correctness": "1;18;32;1", "wc_clarity": "1;6;8;1", "wc_relation_to_prior_work": "1;11;79;2", "wc_documentation": "1;26;1;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "475;360;406;246", "wc_reply_reviewers": "0;0;23;40", "wc_reply_authors": "707;809;750;201", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 118.0, 59.46847904562551 ], "wc_strengths_avg": [ 96.75, 66.00142043926024 ], "wc_improvement_avg": [ 91.25, 38.79674599757047 ], "wc_limitations_avg": [ 16.25, 22.554101622543072 ], "wc_correctness_avg": [ 13.0, 12.98075498574717 ], "wc_clarity_avg": [ 4.0, 3.082207001484488 ], "wc_relation_to_prior_work_avg": [ 23.25, 32.42202183701689 ], "wc_documentation_avg": [ 8.25, 10.37725879025863 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 371.75, 83.3437910104886 ], "wc_reply_reviewers_avg": [ 15.75, 16.857861667483217 ], "wc_reply_authors_avg": [ 616.75, 242.74922759918311 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=907034660150504651&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "uni-luebeck.de;uni-luebeck.de;radboudumc.nl;imi.uni-luebeck.de;uni-luebeck.de", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of L\u00fcbeck;Radboud University Medical Center", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-luebeck.de;https://www.radboudumc.nl", "aff_unique_abbr": "UzL;RadboudUMC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Germany;Netherlands" }, { "title": "Cognitive Steering in Deep Neural Networks via Long-Range Modulatory Feedback Connections", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72275", "id": "FCIj5KMn2m", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/444b09beab8438d4a58e9bc694dca32a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FCIj5KMn2m", "openreview": "https://openreview.net/forum?id=FCIj5KMn2m", "poster": "/media/PosterPDFs/NeurIPS%202023/72275.png?t=1702310083.644193", "slides": "https://nips.cc/virtual/2023/poster/72275", "video": "https://nips.cc/virtual/2023/poster/72275", "author_site": "Talia Konkle, George Alvarez", "tldr": "", "abstract": "Given the rich visual information available in each glance, humans can internally direct their visual attention to enhance goal-relevant information---a capacity often absent in standard vision models. Here we introduce cognitively and biologically-inspired long-range modulatory pathways to enable `cognitive steering\u2019 in vision models. First, we show that models equipped with these feedback pathways naturally show improved image recognition, adversarial robustness, and increased brain alignment, relative to baseline models. Further, these feedback projections from the final layer of the vision backbone provide a meaningful steering interface, where goals can be specified as vectors in the output space. We show that there are effective ways to steer the model that dramatically improve recognition of categories in composite images of multiple categories, succeeding where baseline feed-forward models without flexible steering fail. And, our multiplicative modulatory motif prevents rampant hallucination of the top-down goal category, dissociating what the model is looking for, from what it is looking at. Thus, these long-range modulatory pathways enable new behavioral capacities for goal-directed visual encoding, offering a flexible communication interface between cognitive and visual systems.", "keywords": "convolutional neural networks;steerability;computer vision", "primary_area": "", "supplementary_material": "/attachment/b9576cf71b432d792e67be9b7df620ae0fa9e702.zip", "author": "Talia Konkle;George A. Alvarez", "authorids": "~Talia_Konkle1;~George_A._Alvarez2", "gender": "F;M", "homepage": "https://konklab.fas.harvard.edu/;https://visionlab.harvard.edu/george/", "dblp": ";", "google_scholar": "QxV9vroAAAAJ;qU8dld4AAAAJ", "orcid": "0000-0003-1738-4744;", "linkedin": ";", "or_profile": "~Talia_Konkle1;~George_A._Alvarez2", "aff": "Harvard University;Harvard University", "aff_domain": "harvard.edu;harvard.edu", "position": "Full Professor;Full Professor", "bibtex": "@inproceedings{\nkonkle2023cognitive,\ntitle={Cognitive Steering in Deep Neural Networks via Long-Range Modulatory Feedback Connections},\nauthor={Talia Konkle and George A. Alvarez},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FCIj5KMn2m}\n}", "github": "", "project": "", "reviewers": "sqng;Bsgy;esAd;kmpt", "pdf_size": 2444812, "rating": "4;5;7;8", "confidence": "4;3;4;4", "soundness": "3;2;3;4", "novelty": "2;2;3;4", "presentation": "3;2;2;4", "wc_summary": "96;106;51;47", "wc_strengths": "34;35;35;157", "wc_weaknesses": "170;289;135;158", "wc_questions": "2;68;147;236", "wc_limitations": "1;11;1;30", "wc_review": "303;509;369;628", "wc_reply_reviewers": "156;130;574;156", "wc_reply_authors": "903;927;751;0", "reply_reviewers": "1;2;3;1", "reply_authors": "3;3;4;1", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 75.0, 26.277366686941825 ], "wc_strengths_avg": [ 65.25, 52.97346033628538 ], "wc_weaknesses_avg": [ 188.0, 59.65316420777694 ], "wc_questions_avg": [ 113.25, 87.50821390018197 ], "wc_limitations_avg": [ 10.75, 11.840080236214618 ], "wc_review_avg": [ 452.25, 125.81211189706657 ], "wc_reply_reviewers_avg": [ 254.0, 185.056748053131 ], "wc_reply_authors_avg": [ 645.25, 378.59897979260325 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.36514837167011077, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11188967278384965543&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "harvard.edu;harvard.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Directed Cyclic Graph for Causal Discovery from Multivariate Functional Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72274", "id": "FCwF5431IY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/854a9ab0f323b841955e70ca383b27d1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FCwF5431IY", "openreview": "https://openreview.net/forum?id=FCwF5431IY", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72274", "video": "https://nips.cc/virtual/2023/poster/72274", "author_site": "Saptarshi Roy, Raymond K. W. Wong, Yang Ni", "tldr": "", "abstract": "Discovering causal relationship using multivariate functional data has received a significant amount of attention very recently. In this article, we introduce a functional linear structural equation model for causal structure learning when the underlying graph involving the multivariate functions may have cycles. To enhance interpretability, our model involves a low-dimensional causal embedded space such that all the relevant causal information in the multivariate functional data is preserved in this lower-dimensional subspace. We prove that the proposed model is causally identifiable under standard assumptions that are often made in the causal discovery literature. To carry out inference of our model, we develop a fully Bayesian framework with suitable prior specifications and uncertainty quantification through posterior summaries. We illustrate the superior performance of our method over existing methods in terms of causal graph estimation through extensive simulation studies. We also demonstrate the proposed method using a brain EEG dataset.", "keywords": "Causal Embedding;Causal Discovery;Multivariate Functional Data;Directed Cyclic Graph;Causal Structure Learning;Bayesian Inference", "primary_area": "", "supplementary_material": "/attachment/e9a527575304f01ca053d87d7cf65e3864d62092.pdf", "author": "Saptarshi Roy;Raymond K. W. Wong;Yang Ni", "authorids": "~Saptarshi_Roy2;~Raymond_K._W._Wong1;~Yang_Ni2", "gender": "M;;M", "homepage": ";;https://www.stat.tamu.edu/~yni/", "dblp": ";;https://dblp.org/rec/conf/nips/ChoiCN20", "google_scholar": ";;", "orcid": "0000-0002-0363-7053;;0000-0003-0636-2363", "linkedin": "saptaroy/;;", "or_profile": "~Saptarshi_Roy2;~Raymond_K._W._Wong1;~Yang_Ni2", "aff": "Texas A&M University - College Station;;Texas A&M", "aff_domain": "tamu.edu;;tamu.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nroy2023directed,\ntitle={Directed Cyclic Graph for Causal Discovery from Multivariate Functional Data},\nauthor={Saptarshi Roy and Raymond K. W. Wong and Yang Ni},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FCwF5431IY}\n}", "github": "", "project": "", "reviewers": "Y9Hq;236D;3JSM;nZkc;ucHi", "pdf_size": 2261420, "rating": "5;5;6;7;7", "confidence": "3;4;3;3;4", "soundness": "3;3;3;3;4", "novelty": "2;2;3;3;3", "presentation": "1;2;3;3;4", "wc_summary": "55;45;131;116;61", "wc_strengths": "54;27;81;98;139", "wc_weaknesses": "117;5;68;183;231", "wc_questions": "4;256;31;89;223", "wc_limitations": "14;5;18;29;1", "wc_review": "244;338;329;515;655", "wc_reply_reviewers": "13;36;47;43;0", "wc_reply_authors": "18;18;0;38;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;1;2;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 81.6, 34.91475332864318 ], "wc_strengths_avg": [ 79.8, 38.185861257800646 ], "wc_weaknesses_avg": [ 120.8, 80.28050821961705 ], "wc_questions_avg": [ 120.6, 101.43096174245811 ], "wc_limitations_avg": [ 13.4, 9.891410415102591 ], "wc_review_avg": [ 416.2, 148.43099406795065 ], "wc_reply_reviewers_avg": [ 27.8, 18.21428011204396 ], "wc_reply_authors_avg": [ 14.8, 14.119490075778232 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16389783693824832801&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 8, "email": "tamu.edu;;tamu.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "0", "aff_campus_unique": "College Station;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Anytime-Competitive Reinforcement Learning with Policy Prior", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72273", "id": "FCwfZj1bQl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f53437debdd397c42929d929614bc705-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FCwfZj1bQl", "openreview": "https://openreview.net/forum?id=FCwfZj1bQl", "poster": "/media/PosterPDFs/NeurIPS%202023/72273.png?t=1702106795.3463495", "slides": "https://nips.cc/virtual/2023/poster/72273", "video": "https://nips.cc/virtual/2023/poster/72273", "author_site": "Jianyi Yang, Pengfei Li, Tongxin Li, Adam Wierman, Shaolei Ren", "tldr": "", "abstract": "This paper studies the problem of Anytime-Competitive Markov Decision Process (A-CMDP). Existing works on Constrained Markov Decision Processes (CMDPs) aim to optimize the expected reward while constraining the expected cost over random dynamics, but the cost in a specific episode can still be unsatisfactorily high. In contrast, the goal of A-CMDP is to optimize the expected reward while guaranteeing a bounded cost in each round of any episode against a policy prior. We propose a new algorithm, called Anytime-Competitive Reinforcement Learning (ACRL), which provably guarantees the anytime cost constraints. The regret analysis shows the policy asymptotically matches the optimal reward achievable under the anytime competitive constraints. Experiments on the application of carbon-intelligent computing verify the reward performance and cost constraint guarantee of ACRL.", "keywords": "Markov Decision Process;Constrained Reinforcement Learning;Anytime Competitive Constraints", "primary_area": "", "supplementary_material": "/attachment/a5873f03b209466add0e541cd3f6afe76c34a24f.pdf", "author": "Jianyi Yang;Pengfei Li;Tongxin Li;Adam Wierman;Shaolei Ren", "authorids": "~Jianyi_Yang1;~Pengfei_Li2;~Tongxin_Li1;~Adam_Wierman1;~Shaolei_Ren1", "gender": "M;M;M;M;", "homepage": "https://jyang-ai.github.io;https://www.cs.ucr.edu/~pli081/;https://tongxin.me/;https://adamwierman.com/;", "dblp": "124/1315;;140/7353;56/4447;", "google_scholar": "n7UUdJQAAAAJ;irA8gqoAAAAJ;qyNc3CkAAAAJ;4OvOdSgAAAAJ;", "orcid": ";0000-0003-3257-9929;;0000-0002-5923-0199;", "linkedin": "jianyi-yang-b7a9181a6/;;;adam-wierman-a529474/;", "or_profile": "~Jianyi_Yang1;~Pengfei_Li2;~Tongxin_Li1;~Adam_Wierman1;~Shaolei_Ren1", "aff": "University of California, Riverside;University of California, Riverside;The Chinese University of Hong Kong, Shenzhen;California Institute of Technology;", "aff_domain": "ucr.edu;ucr.edu;cuhk.edu.cn;caltech.edu;", "position": "PhD student;PhD student;Assistant Professor;Professor;", "bibtex": "@inproceedings{\nyang2023anytimecompetitive,\ntitle={Anytime-Competitive Reinforcement Learning with Policy Prior},\nauthor={Jianyi Yang and Pengfei Li and Tongxin Li and Adam Wierman and Shaolei Ren},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FCwfZj1bQl}\n}", "github": "", "project": "", "reviewers": "MVJu;LDWt;skPV;SitE", "pdf_size": 717277, "rating": "5;6;6;6", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;2", "wc_summary": "124;163;136;41", "wc_strengths": "71;128;130;29", "wc_weaknesses": "375;240;141;137", "wc_questions": "31;100;78;41", "wc_limitations": "5;64;80;6", "wc_review": "606;695;565;254", "wc_reply_reviewers": "134;107;28;0", "wc_reply_authors": "73;79;59;31", "reply_reviewers": "1;2;1;0", "reply_authors": "2;2;3;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 116.0, 45.54667935206693 ], "wc_strengths_avg": [ 89.5, 42.20485754033533 ], "wc_weaknesses_avg": [ 223.25, 96.84104243552937 ], "wc_questions_avg": [ 62.5, 27.84331158465171 ], "wc_limitations_avg": [ 38.75, 33.72962347847957 ], "wc_review_avg": [ 530.0, 166.13398207470982 ], "wc_reply_reviewers_avg": [ 67.25, 54.99715901753471 ], "wc_reply_authors_avg": [ 60.5, 18.513508581573618 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4388525288097209769&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "ucr.edu;ucr.edu;cuhk.edu.cn;caltech.edu;", "author_num": 5, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of California, Riverside;Chinese University of Hong Kong;California Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucr.edu;https://www.cuhk.edu.cn;https://www.caltech.edu", "aff_unique_abbr": "UCR;CUHK;Caltech", "aff_campus_unique_index": "0;0;1;2", "aff_campus_unique": "Riverside;Shenzhen;Pasadena", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Squared Neural Families: A New Class of Tractable Density Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72272", "id": "FDzQQTPqEJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ea13534ee239bb3977795b8cc855bacc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FDzQQTPqEJ", "openreview": "https://openreview.net/forum?id=FDzQQTPqEJ", "poster": "/media/PosterPDFs/NeurIPS%202023/72272.png?t=1699840487.9231274", "slides": "https://nips.cc/virtual/2023/poster/72272", "video": "https://nips.cc/virtual/2023/poster/72272", "author_site": "Russell Tsuchida, Cheng Soon Ong, Dino Sejdinovic", "tldr": "", "abstract": "Flexible models for probability distributions are an essential ingredient in many machine learning tasks. We develop and investigate a new class of probability distributions, which we call a Squared Neural Family (SNEFY), formed by squaring the 2-norm of a neural network and normalising it with respect to a base measure. Following the reasoning similar to the well established connections between infinitely wide neural networks and Gaussian processes, we show that SNEFYs admit closed form normalising constants in many cases of interest, thereby resulting in flexible yet fully tractable density models. SNEFYs strictly generalise classical exponential families, are closed under conditioning, and have tractable marginal distributions. Their utility is illustrated on a variety of density estimation, conditional density estimation, and density estimation with missing data tasks.", "keywords": "probabilistic modelling; density estimation; exponential family;", "primary_area": "", "supplementary_material": "", "author": "Russell Tsuchida;Cheng Soon Ong;Dino Sejdinovic", "authorids": "~Russell_Tsuchida1;~Cheng_Soon_Ong1;~Dino_Sejdinovic1", "gender": "M;M;M", "homepage": ";https://ong-home.my;https://sejdino.github.io/", "dblp": "210/2569;58/2283;31/1783", "google_scholar": "pQ7EkegAAAAJ;ofMZr0IAAAAJ;v8Dg1lIAAAAJ", "orcid": ";0000-0002-2302-9733;0000-0001-5547-9213", "linkedin": ";cheng-soon-ong-38bbb524/;https://linkedin.com/in/dinosejdinovic", "or_profile": "~Russell_Tsuchida1;~Cheng_Soon_Ong1;~Dino_Sejdinovic1", "aff": "CSIRO;CSIRO;University of Adelaide", "aff_domain": "csiro.au;csiro.au;adelaide.edu.au", "position": "Postdoc;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\ntsuchida2023squared,\ntitle={Squared Neural Families: A New Class of Tractable Density Models},\nauthor={Russell Tsuchida and Cheng Soon Ong and Dino Sejdinovic},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FDzQQTPqEJ}\n}", "github": "", "project": "", "reviewers": "k4Y8;zjNK;Y6Kj;tpyH", "pdf_size": 1400932, "rating": "5;6;7;7", "confidence": "3;4;3;2", "soundness": "3;3;3;4", "novelty": "3;3;2;3", "presentation": "2;4;4;3", "wc_summary": "164;94;85;112", "wc_strengths": "27;86;77;77", "wc_weaknesses": "183;376;65;221", "wc_questions": "67;140;138;1", "wc_limitations": "44;13;36;26", "wc_review": "485;709;401;437", "wc_reply_reviewers": "80;84;142;71", "wc_reply_authors": "20;46;99;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 113.75, 30.597181242722343 ], "wc_strengths_avg": [ 66.75, 23.24193408475293 ], "wc_weaknesses_avg": [ 211.25, 111.15838924705594 ], "wc_questions_avg": [ 86.5, 57.45650528878345 ], "wc_limitations_avg": [ 29.75, 11.583932838203095 ], "wc_review_avg": [ 508.0, 119.81235328629515 ], "wc_reply_reviewers_avg": [ 94.25, 27.9676152004421 ], "wc_reply_authors_avg": [ 41.25, 37.1172129880464 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6890368776724760790&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "csiro.au;csiro.au;adelaide.edu.au", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Commonwealth Scientific and Industrial Research Organisation;University of Adelaide", "aff_unique_dep": ";", "aff_unique_url": "https://www.csiro.au;https://www.adelaide.edu.au", "aff_unique_abbr": "CSIRO;Adelaide", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "C-Disentanglement: Discovering Causally-Independent Generative Factors under an Inductive Bias of Confounder", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72271", "id": "FFOYWUpBca", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7ca55c8276acf1f0aa996cd3622d1df4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FFOYWUpBca", "openreview": "https://openreview.net/forum?id=FFOYWUpBca", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72271", "video": "https://nips.cc/virtual/2023/poster/72271", "author_site": "Xiaoyu Liu, Jiaxin Yuan, Bang An, Yuancheng Xu, Yifan Yang, Furong Huang", "tldr": "", "abstract": "Representation learning assumes that real-world data is generated by a few semantically meaningful generative factors (i.e., sources of variation) and aims to discover them in the latent space. These factors are expected to be causally disentangled, meaning that distinct factors are encoded into separate latent variables, and changes in one factor will not affect the values of the others. Compared to statistical independence, causal disentanglement allows more controllable data generation, improved robustness, and better generalization. However, most existing work assumes unconfoundedness in the discovery process, that there are no common causes to the generative factors and thus obtain only statistical independence. In this paper, we recognize the importance of modeling confounders in discovering causal generative factors. Unfortunately, such factors are not identifiable without proper inductive bias. We fill the gap by introducing a framework entitled Confounded-Disentanglement (C-Disentanglement), the first framework that explicitly introduces the inductive bias of confounder via labels from domain expertise. In addition, we accordingly propose an approach to sufficiently identify the causally-disentangled factors under any inductive bias of the confounder. We conduct extensive experiments on both synthetic and real-world datasets. Our method demonstrates competitive results compared to various SOTA baselines in obtaining causally disentangled features and downstream tasks under domain shifts.", "keywords": "causal disentanglement;causal generative process;generative factors;confounder;inductive bias;disentanglement;causal inference", "primary_area": "", "supplementary_material": "/attachment/002098f371def5ad9b17ed36dad6206bb63f3bc2.pdf", "author": "Xiaoyu Liu;Jiaxin Yuan;Bang An;Yuancheng Xu;Yifan Yang;Furong Huang", "authorids": "~Xiaoyu_Liu3;~Jiaxin_Yuan1;~Bang_An1;~Yuancheng_Xu1;~Yifan_Yang5;~Furong_Huang1", "gender": "F;F;;M;M;F", "homepage": ";;https://bangann.github.io/;https://yuancheng-xu.github.io;https://yifanyang.dev/;https://furong-huang.com", "dblp": ";;188/0741;;;72/8513", "google_scholar": ";;3ce6z_sAAAAJ;OPB0QgwAAAAJ;Wn-GGXkAAAAJ;13yyuCcAAAAJ", "orcid": "0000-0003-3385-4726;;;;;", "linkedin": ";jiaxin-margot-yuan/;;yuancheng-xu/;;", "or_profile": "~Xiaoyu_Liu3;~Jiaxin_Yuan1;~Bang_An1;~Yuancheng_Xu1;~Yifan_Yang5;~Furong_Huang1", "aff": "University of Maryland, College Park;University of Maryland, College Park;Capital One;University of Maryland, College Park;University of Maryland, College Park;University of Maryland", "aff_domain": "umd.edu;umd.edu;capitalone.com;umd.edu;umd.edu;cs.umd.edu", "position": "PhD student;PhD student;Intern;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nliu2023cdisentanglement,\ntitle={C-Disentanglement: Discovering Causally-Independent Generative Factors under an Inductive Bias of Confounder},\nauthor={Xiaoyu Liu and Jiaxin Yuan and Bang An and Yuancheng Xu and Yifan Yang and Furong Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FFOYWUpBca}\n}", "github": "", "project": "", "reviewers": "S7Fk;j8n4;TdLu;Mytf", "pdf_size": 3707514, "rating": "5;5;7;7", "confidence": "3;3;4;4", "soundness": "3;3;2;3", "novelty": "2;2;3;3", "presentation": "2;1;3;2", "wc_summary": "57;94;86;130", "wc_strengths": "62;87;56;63", "wc_weaknesses": "75;207;172;299", "wc_questions": "200;72;40;31", "wc_limitations": "1;35;9;11", "wc_review": "395;495;363;534", "wc_reply_reviewers": "0;23;11;0", "wc_reply_authors": "143;116;37;28", "reply_reviewers": "0;1;1;0", "reply_authors": "3;3;3;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 91.75, 26.022826518270456 ], "wc_strengths_avg": [ 67.0, 11.853269591129697 ], "wc_weaknesses_avg": [ 188.25, 80.16662335411165 ], "wc_questions_avg": [ 85.75, 67.69924297951935 ], "wc_limitations_avg": [ 14.0, 12.68857754044952 ], "wc_review_avg": [ 446.75, 70.05845773352422 ], "wc_reply_reviewers_avg": [ 8.5, 9.5 ], "wc_reply_authors_avg": [ 81.0, 49.53281740422202 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8024475579168583221&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "umd.edu;umd.edu;capitalone.com;umd.edu;umd.edu;cs.umd.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "University of Maryland;Capital One", "aff_unique_dep": ";", "aff_unique_url": "https://www/umd.edu;https://www.capitalone.com", "aff_unique_abbr": "UMD;Capital One", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "On the spectral bias of two-layer linear networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72270", "id": "FFdrXkm3Cz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cad2fd66cf88226d868f90a7cbaa4a53-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FFdrXkm3Cz", "openreview": "https://openreview.net/forum?id=FFdrXkm3Cz", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72270", "video": "https://nips.cc/virtual/2023/poster/72270", "author_site": "Aditya Vardhan Varre, Maria-Luiza Vladarean, Loucas PILLAUD-VIVIEN, Nicolas Flammarion", "tldr": "", "abstract": "This paper studies the behaviour of two-layer fully connected networks with linear activations trained with gradient flow on the square loss. We show how the optimization process carries an implicit bias on the parameters that depends on the scale of its initialization. The main result of the paper is a variational characterization of the loss minimizers retrieved by the gradient flow for a specific initialization shape. This characterization reveals that, in the small scale initialization regime, the linear neural network's hidden layer is biased toward having a low-rank structure. To complement our results, we showcase a hidden mirror flow that tracks the dynamics of the singular values of the weights matrices and describe their time evolution. We support our findings with numerical experiments illustrating the phenomena.", "keywords": "linear networks;spectral bias;low rank;singular values;mirror flow", "primary_area": "", "supplementary_material": "/attachment/f230c5d0e6fa0ccbfd50b514917d600eff36d185.zip", "author": "Aditya Vardhan Varre;Maria-Luiza Vladarean;Loucas Pillaud-Vivien;Nicolas Flammarion", "authorids": "~Aditya_Vardhan_Varre1;~Maria-Luiza_Vladarean1;~Loucas_Pillaud-Vivien1;~Nicolas_Flammarion1", "gender": "M;F;M;M", "homepage": ";;https://thebiglouloup.github.io/loucaspillaudvivien/;", "dblp": "224/6338;194/3965;211/7988;164/7417", "google_scholar": ";5_tWxxoAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Aditya_Vardhan_Varre1;~Maria-Luiza_Vladarean1;~Loucas_Pillaud-Vivien1;~Nicolas_Flammarion1", "aff": "EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne;Flatiron Institute;Swiss Federal Institute of Technology Lausanne", "aff_domain": "epfl.ch;epfl.ch;flatironinstitute.org;epfl.ch", "position": "PhD student;PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nvarre2023on,\ntitle={On the spectral bias of two-layer linear networks},\nauthor={Aditya Vardhan Varre and Maria-Luiza Vladarean and Loucas Pillaud-Vivien and Nicolas Flammarion},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FFdrXkm3Cz}\n}", "github": "", "project": "", "reviewers": "zebK;mL5m;itua;EWXT;ynT5", "pdf_size": 779879, "rating": "5;5;6;7;7", "confidence": "4;3;3;3;3", "soundness": "3;4;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;4;3;3;4", "wc_summary": "104;22;38;13;96", "wc_strengths": "243;16;44;9;114", "wc_weaknesses": "174;22;341;38;60", "wc_questions": "85;35;53;2;133", "wc_limitations": "2;19;49;2;1", "wc_review": "608;114;525;64;404", "wc_reply_reviewers": "0;14;49;0;22", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 54.6, 38.00842011975767 ], "wc_strengths_avg": [ 85.2, 87.20412834264214 ], "wc_weaknesses_avg": [ 127.0, 119.53242237987148 ], "wc_questions_avg": [ 61.6, 44.67482512556708 ], "wc_limitations_avg": [ 14.6, 18.467268341582088 ], "wc_review_avg": [ 343.0, 217.87702953730576 ], "wc_reply_reviewers_avg": [ 17.0, 18.088670487352022 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5590169943749476, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12550013302106503950&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "epfl.ch;epfl.ch;flatironinstitute.org;epfl.ch", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "EPFL;Swiss Federal Institute of Technology Lausanne;Flatiron Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch;https://flatironinstitute.org", "aff_unique_abbr": "EPFL;EPFL;Flatiron", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Switzerland;United States" }, { "title": "Pseudo-Likelihood Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72269", "id": "FIv84qGPFT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/41aa1c9f57ea83d7c41f0d3e98ed3dd4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FIv84qGPFT", "openreview": "https://openreview.net/forum?id=FIv84qGPFT", "poster": "/media/PosterPDFs/NeurIPS%202023/72269.png?t=1701784127.4577096", "slides": "https://nips.cc/virtual/2023/poster/72269", "video": "https://nips.cc/virtual/2023/poster/72269", "author_site": "Theo Gruner, Boris Belousov, Fabio Muratore, Daniel Palenicek, Jan Peters", "tldr": "", "abstract": "Simulation-Based Inference (SBI) is a common name for an emerging family of approaches that infer the model parameters when the likelihood is intractable. Existing SBI methods either approximate the likelihood, such as Approximate Bayesian Computation (ABC) or directly model the posterior, such as Sequential Neural Posterior Estimation (SNPE). While ABC is efficient on low-dimensional problems, on higher-dimensional tasks, it is generally outperformed by SNPE, which leverages function approximation. In this paper, we propose Pseudo-Likelihood Inference (PLI), a new method that brings neural approximation into ABC, making it competitive on challenging Bayesian system identification tasks. By utilizing integral probability metrics, we introduce a smooth likelihood kernel with an adaptive bandwidth that is updated based on information-theoretic trust regions. Thanks to this formulation, our method (i) allows for optimizing neural posteriors via gradient descent, (ii) does not rely on summary statistics, and (iii) enables multiple observations as input. In comparison to SNPE, it leads to improved performance when more data is available. The effectiveness of PLI is evaluated on four classical SBI benchmark tasks and on a highly dynamic physical system, showing particular advantages on stochastic simulations and multi-modal posterior landscapes.", "keywords": "simulation-based inference;approximate Bayesian computation", "primary_area": "", "supplementary_material": "", "author": "Theo Gruner;Boris Belousov;Fabio Muratore;Daniel Palenicek;Jan Peters", "authorids": "~Theo_Gruner1;~Boris_Belousov1;~Fabio_Muratore1;~Daniel_Palenicek1;~Jan_Peters3", "gender": "M;;M;M;M", "homepage": ";;;https://www.jan-peters.net;https://www.ias.informatik.tu-darmstadt.de/Team/BorisBelousov", "dblp": "311/3233;;267/9480;p/JanPeters1;191/6726", "google_scholar": "https://scholar.google.de/citations?hl=en;https://scholar.google.de/citations?user=cZAcOyUAAAAJ;AtX0UHsAAAAJ;https://scholar.google.de/citations?user=-kIVAcAAAAAJ;https://scholar.google.de/citations?user=XjNbRVYAAAAJ", "orcid": ";0000-0001-8600-2610;0000-0002-8292-1318;0000-0002-5266-8091;0000-0001-7172-9104", "linkedin": "theogruner/;;danielpalenicek/;janrpeters/;boris-belousov/", "or_profile": "~Theo_Gruner1;~Fabio_Muratore1;~Daniel_Palenicek1;~Jan_Peters3;~Boris_Belousov2", "aff": "Technische Universit\u00e4t Darmstadt;Robert Bosch GmbH, Bosch;Technische Universit\u00e4t Darmstadt;TU Darmstadt;German Research Centre for AI", "aff_domain": "tu-darmstadt.de;de.bosch.com;tu-darmstadt.de;tu-darmstadt.de;dfki.de", "position": "PhD student;Researcher;PhD student;Full Professor;Researcher", "bibtex": "@inproceedings{\ngruner2023pseudolikelihood,\ntitle={Pseudo-Likelihood Inference},\nauthor={Theo Gruner and Boris Belousov and Fabio Muratore and Daniel Palenicek and Jan Peters},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FIv84qGPFT}\n}", "github": "", "project": "", "reviewers": "7dVy;dHap;9BA6;AzJv;XuP4", "pdf_size": 5961509, "rating": "6;6;6;6;6", "confidence": "4;3;3;2;3", "soundness": "3;3;3;3;3", "novelty": "3;3;2;2;3", "presentation": "3;3;3;2;2", "wc_summary": "80;29;80;176;76", "wc_strengths": "71;57;27;125;58", "wc_weaknesses": "236;141;16;413;266", "wc_questions": "286;39;149;65;83", "wc_limitations": "20;43;22;52;10", "wc_review": "693;309;294;831;493", "wc_reply_reviewers": "130;58;23;105;75", "wc_reply_authors": "386;15;0;25;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "3;2;1;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 88.2, 47.951642307641556 ], "wc_strengths_avg": [ 67.6, 32.122266420662164 ], "wc_weaknesses_avg": [ 214.4, 132.15233633954415 ], "wc_questions_avg": [ 124.4, 88.60609459850942 ], "wc_limitations_avg": [ 29.4, 15.589740215924062 ], "wc_review_avg": [ 524.0, 211.1378696491939 ], "wc_reply_reviewers_avg": [ 78.2, 37.05887208213439 ], "wc_reply_authors_avg": [ 85.2, 150.6989051055116 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15462595655377718145&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tu-darmstadt.de;de.bosch.com;tu-darmstadt.de;tu-darmstadt.de;dfki.de", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt;Robert Bosch GmbH;German Research Centre for Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tu-darmstadt.de;https://www.bosch.com;https://www.dfki.de/", "aff_unique_abbr": "TUD;Bosch;DFKI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Meta-learning families of plasticity rules in recurrent spiking networks using simulation-based inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72268", "id": "FLFasCFJNo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2bdc2267c3d7d01523e2e17ac0a754f3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FLFasCFJNo", "openreview": "https://openreview.net/forum?id=FLFasCFJNo", "poster": "/media/PosterPDFs/NeurIPS%202023/72268.png?t=1702310372.3719614", "slides": "https://nips.cc/virtual/2023/poster/72268", "video": "https://nips.cc/virtual/2023/poster/72268", "author_site": "Basile Confavreux, Poornima Ramesh, Pedro Goncalves, Jakob H Macke, Tim Vogels", "tldr": "", "abstract": "There is substantial experimental evidence that learning and memory-related behaviours rely on local synaptic changes, but the search for distinct plasticity rules has been driven by human intuition, with limited success for multiple, co-active plasticity rules in biological networks. More recently, automated meta-learning approaches have been used in simplified settings, such as rate networks and small feed-forward spiking networks. Here, we develop a simulation-based inference (SBI) method for sequentially filtering plasticity rules through an increasingly fine mesh of constraints that can be modified on-the-fly. This method, _filter SBI_, allows us to infer entire families of complex and co-active plasticity rules in spiking networks. We first consider flexibly parameterized doublet (Hebbian) rules, and find that the set of inferred rules contains solutions that extend and refine -and also reject- predictions from mean-field theory. Next, we expand the search space of plasticity rules by modelling them as multi-layer perceptrons that combine several plasticity-relevant factors, such as weight, voltage, triplets and co-dependency. Out of the millions of possible rules, we identify thousands of unique rule combinations that satisfy biological constraints like plausible activity and weight dynamics. The resulting rules can be used as a starting point for further investigations into specific network computations, and already suggest refinements and predictions for classical experimental approaches on plasticity. This flexible approach for principled exploration of complex plasticity rules in large recurrent spiking networks presents the most advanced search tool to date for enabling robust predictions and deep insights into the plasticity mechanisms underlying brain function.", "keywords": "synaptic plasticity;spiking network;meta-learning;computational neuroscience", "primary_area": "", "supplementary_material": "/attachment/1fb7ec078b8e0d0722f5d719cd7c60a066806d22.pdf", "author": "Basile Confavreux;Poornima Ramesh;Pedro J. Goncalves;Jakob H. Macke;Tim P. Vogels", "authorids": "~Basile_Confavreux1;~Poornima_Ramesh1;~Pedro_J._Goncalves1;~Jakob_H._Macke1;~Tim_P._Vogels1", "gender": "M;;M;;M", "homepage": ";;http://ppjgoncalves.github.io/;;http://www.vogelslab.org", "dblp": "280/1270;;209/4971;;209/4878", "google_scholar": "103waU8AAAAJ;;https://scholar.google.co.uk/citations?user=0bj8iLAAAAAJ;;RsHPZroAAAAJ", "orcid": ";;0000-0002-6987-4836;;", "linkedin": ";;;;", "or_profile": "~Basile_Confavreux1;~Poornima_Ramesh1;~Pedro_J._Goncalves1;~Jakob_H._Macke1;~Tim_P._Vogels1", "aff": "Institute of Science and Technology;;University of Tuebingen;;Institute of Science and Technology Austria", "aff_domain": "ist.ac.at;;uni-tuebingen.de;;ist.ac.at", "position": "PhD student;;Postdoc;;Full Professor", "bibtex": "@inproceedings{\nconfavreux2023metalearning,\ntitle={Meta-learning families of plasticity rules in recurrent spiking networks using simulation-based inference},\nauthor={Basile Confavreux and Poornima Ramesh and Pedro J. Goncalves and Jakob H. Macke and Tim P. Vogels},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FLFasCFJNo}\n}", "github": "", "project": "", "reviewers": "bG1x;jNn6;5no9;vJPD", "pdf_size": 4499352, "rating": "6;6;7;7", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "4;4;3;3", "wc_summary": "112;186;75;158", "wc_strengths": "145;68;90;70", "wc_weaknesses": "465;99;96;246", "wc_questions": "76;202;304;0", "wc_limitations": "34;76;9;1", "wc_review": "832;631;574;475", "wc_reply_reviewers": "37;32;31;29", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 132.75, 42.540421953713626 ], "wc_strengths_avg": [ 93.25, 31.09159854365806 ], "wc_weaknesses_avg": [ 226.5, 150.45680443236856 ], "wc_questions_avg": [ 145.5, 116.52789365641172 ], "wc_limitations_avg": [ 30.0, 29.214722315983085 ], "wc_review_avg": [ 628.0, 130.33610397737075 ], "wc_reply_reviewers_avg": [ 32.25, 2.947456530637899 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14792164056959369022&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ist.ac.at;;uni-tuebingen.de;;ist.ac.at", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Institute of Science and Technology;University of Tuebingen;Institute of Science and Technology Austria", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.uni-tuebingen.de/;https://www.ist.ac.at", "aff_unique_abbr": ";Uni T\u00fcbingen;IST Austria", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;2", "aff_country_unique": ";Germany;Austria" }, { "title": "Scaling Riemannian Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72267", "id": "FLTg8uA5xI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fe1ab2f77a9a0f224839cc9f1034a908-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FLTg8uA5xI", "openreview": "https://openreview.net/forum?id=FLTg8uA5xI", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72267", "video": "https://nips.cc/virtual/2023/poster/72267", "author_site": "Aaron Lou, Minkai Xu, Adam Farris, Stefano Ermon", "tldr": "", "abstract": "Riemannian diffusion models draw inspiration from standard Euclidean space diffusion models to learn distributions on general manifolds. Unfortunately, the additional geometric complexity renders the diffusion transition term inexpressible in closed form, so prior methods resort to imprecise approximations of the score matching training objective that degrade performance and preclude applications in high dimensions. In this work, we reexamine these approximations and propose several practical improvements. Our key observation is that most relevant manifolds are symmetric spaces, which are much more amenable to computation. By leveraging and combining various ans\\\"{a}tze, we can quickly compute relevant quantities to high precision. On low dimensional datasets, our correction produces a noticeable improvement and is competitive with other techniques. Additionally, we show that our method enables us to scale to high dimensional tasks on nontrivial manifolds, including $SU(n)$ lattices in the context of lattice quantum chromodynamics (QCD). Finally, we apply our models to contrastively learned hyperspherical embeddings, curbing the representation collapse problem in the projection head and closing the gap between theory and practice.", "keywords": "Diffusion Models;Geometric Deep Learning;Manifolds;Numerical Algorithms", "primary_area": "", "supplementary_material": "/attachment/a331a9657179729fb244dfd2e0434e491320dcb5.pdf", "author": "Aaron Lou;Minkai Xu;Adam Farris;Stefano Ermon", "authorids": "~Aaron_Lou1;~Minkai_Xu1;~Adam_Farris1;~Stefano_Ermon1", "gender": "M;M;M;M", "homepage": "https://aaronlou.com;https://minkaixu.com;;http://cs.stanford.edu/~ermon/", "dblp": "232/3858;257/3355;293/9715;47/8135", "google_scholar": ";https://scholar.google.com/citations?hl=en;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Aaron_Lou1;~Minkai_Xu1;~Adam_Farris1;~Stefano_Ermon1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nlou2023scaling,\ntitle={Scaling Riemannian Diffusion Models},\nauthor={Aaron Lou and Minkai Xu and Adam Farris and Stefano Ermon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FLTg8uA5xI}\n}", "github": "", "project": "", "reviewers": "VM6C;1Bu5;NaGf;611J", "pdf_size": 1512695, "rating": "5;6;6;7", "confidence": "3;4;4;3", "soundness": "2;4;3;4", "novelty": "2;3;3;4", "presentation": "2;4;3;3", "wc_summary": "46;148;178;95", "wc_strengths": "40;129;109;58", "wc_weaknesses": "64;487;133;53", "wc_questions": "46;76;263;127", "wc_limitations": "1;16;101;1", "wc_review": "197;856;784;334", "wc_reply_reviewers": "51;70;163;0", "wc_reply_authors": "48;0;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 116.75, 50.514230668198834 ], "wc_strengths_avg": [ 84.0, 36.26982216664427 ], "wc_weaknesses_avg": [ 184.25, 177.46179166231812 ], "wc_questions_avg": [ 128.0, 83.14745937188941 ], "wc_limitations_avg": [ 29.75, 41.589511899035315 ], "wc_review_avg": [ 542.75, 282.5981024352428 ], "wc_reply_reviewers_avg": [ 71.0, 58.96185207403173 ], "wc_reply_authors_avg": [ 12.0, 20.784609690826528 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9710521515254682545&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "FedL2P: Federated Learning to Personalize", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72266", "id": "FM81CI68Iz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2fb57276bfbaf1b832d7bfcba36bb41c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FM81CI68Iz", "openreview": "https://openreview.net/forum?id=FM81CI68Iz", "poster": "/media/PosterPDFs/NeurIPS%202023/72266.png?t=1699362318.6909974", "slides": "https://nips.cc/virtual/2023/poster/72266", "video": "https://nips.cc/virtual/2023/poster/72266", "author_site": "Royson Lee, Minyoung Kim, Da Li, Xinchi Qiu, Timothy Hospedales, Ferenc Huszar, Nicholas Lane", "tldr": "", "abstract": "Federated learning (FL) research has made progress in developing algorithms for distributed learning of global models, as well as algorithms for local personalization of those common models to the specifics of each client\u2019s local data distribution. However, different FL problems may require different personalization strategies, and it may not even be possible to define an effective one-size-fits-all personalization strategy for all clients: Depending on how similar each client\u2019s optimal predictor is to that of the global model, different personalization strategies may be preferred. In this paper, we consider the federated meta-learning problem of learning personalization strategies. Specifically, we consider meta-nets that induce the batch-norm and learning rate parameters for each client given local data statistics. By learning these meta-nets through FL, we allow the whole FL network to collaborate in learning a customized personalization strategy for each client. Empirical results show that this framework improves on a range of standard hand-crafted personalization baselines in both label and feature shift situations.", "keywords": "federated learning; meta-learning; hyperparameter optimization", "primary_area": "", "supplementary_material": "/attachment/1f20116bcbc905cc00f7d967aa168a4a0506cf9c.pdf", "author": "Royson Lee;Minyoung Kim;Da Li;Xinchi Qiu;Timothy Hospedales;Ferenc Husz\u00e1r;Nicholas Donald Lane", "authorids": "~Royson_Lee1;~Minyoung_Kim2;~Da_Li3;~Xinchi_Qiu1;~Timothy_Hospedales1;~Ferenc_Husz\u00e1r1;~Nicholas_Donald_Lane1", "gender": ";M;M;F;M;M;M", "homepage": ";https://sites.google.com/site/mikim21/;https://dali-dl.github.io/;;http://homepages.inf.ed.ac.uk/thospeda/;;http://niclane.org", "dblp": "247/5940;;43/4804-1;265/6559;32/3545;http://dblp.uni-trier.de/pers/hd/h/Huszar:Ferenc;03/2663.html", "google_scholar": "qNu3yNoAAAAJ;;RPvaE3oAAAAJ;yW6vsS8AAAAJ;https://scholar.google.fr/citations?user=nHhtvqkAAAAJ;https://scholar.google.co.uk/citations?user=koQCVT4AAAAJ;https://scholar.google.co.uk/citations?hl=en", "orcid": ";;0000-0002-2101-2989;;0000-0003-4867-7486;;0000-0002-2728-8273", "linkedin": "royson-lee-025a09169/;;;xinchi-qiu-686a7394/;timothyhospedales/;;niclane", "or_profile": "~Royson_Lee1;~Minyoung_Kim2;~Da_Li3;~Xinchi_Qiu1;~Timothy_Hospedales1;~Ferenc_Huszar1;~Nic_Lane2", "aff": "Samsung AI Center, Cambridge;Samsung AI Center, Cambridge, UK;University of Edinburgh;University of Cambridge;Samsung AI Research Centre;University of Cambridge;Samsung", "aff_domain": "samsung.com;samsung.com;ed.ac.uk;cam.ac.uk;samsung.com;cam.ac.uk;samsung.com", "position": "Research Engineer;Senior Researcher;Visiting Scholar;PhD student;Principal Researcher;Associate Professor;Laboratory Director", "bibtex": "@inproceedings{\nlee2023fedlp,\ntitle={FedL2P: Federated Learning to Personalize},\nauthor={Royson Lee and Minyoung Kim and Da Li and Xinchi Qiu and Timothy Hospedales and Ferenc Husz{\\'a}r and Nicholas Donald Lane},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FM81CI68Iz}\n}", "github": "", "project": "", "reviewers": "Jjav;aauE;P9q9;wFN9;nqFj", "pdf_size": 1124268, "rating": "5;5;5;6;6", "confidence": "3;3;3;3;4", "soundness": "3;3;3;2;3", "novelty": "3;3;3;3;2", "presentation": "3;2;3;2;2", "wc_summary": "200;85;58;81;217", "wc_strengths": "263;52;50;52;110", "wc_weaknesses": "218;162;93;183;136", "wc_questions": "151;117;54;1;435", "wc_limitations": "20;2;12;1;8", "wc_review": "852;418;267;318;906", "wc_reply_reviewers": "5;80;9;20;42", "wc_reply_authors": "33;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 128.2, 66.42710290235455 ], "wc_strengths_avg": [ 105.4, 82.01365739924053 ], "wc_weaknesses_avg": [ 158.4, 42.30650068251923 ], "wc_questions_avg": [ 151.6, 150.81060970634658 ], "wc_limitations_avg": [ 8.6, 6.974238309665077 ], "wc_review_avg": [ 552.2, 271.75459517734015 ], "wc_reply_reviewers_avg": [ 31.2, 27.578252301405904 ], "wc_reply_authors_avg": [ 6.6, 13.199999999999998 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6123724356957945, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7415387916494683156&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 8, "email": "samsung.com;samsung.com;ed.ac.uk;cam.ac.uk;samsung.com;cam.ac.uk;samsung.com", "author_num": 7, "aff_unique_index": "0;0;1;2;0;2;0", "aff_unique_norm": "Samsung;University of Edinburgh;University of Cambridge", "aff_unique_dep": "AI Center;;", "aff_unique_url": "https://www.samsung.com/global/research-innovation/ai-research-centers/samsung-ai-center-cambridge/;https://www.ed.ac.uk;https://www.cam.ac.uk", "aff_unique_abbr": "SAC;Edinburgh;Cambridge", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;0;1;0;1", "aff_country_unique": "United Kingdom;South Korea" }, { "title": "Core-sets for Fair and Diverse Data Summarization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72265", "id": "FM8thAWqiO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f980ba94f513168f2b292f58aef929ec-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FM8thAWqiO", "openreview": "https://openreview.net/forum?id=FM8thAWqiO", "poster": "/media/PosterPDFs/NeurIPS%202023/72265.png?t=1704896543.8845315", "slides": "https://nips.cc/virtual/2023/poster/72265", "video": "https://nips.cc/virtual/2023/poster/72265", "author_site": "Sepideh Mahabadi, Stojan Trajanovski", "tldr": "", "abstract": "We study core-set construction algorithms for the task of Diversity Maximization under fairness/partition constraint. Given a set of points $P$ in a metric space partitioned into $m$ groups, and given $k_1,\\ldots,k_m$, the goal of this problem is to pick $k_i$ points from each group $i$ such that the overall diversity of the $k=\\sum_i k_i$ picked points is maximized. We consider two natural diversity measures: sum-of-pairwise distances and sum-of-nearest-neighbor distances, and show improved core-set construction algorithms with respect to these measures. More precisely, we show the first constant factor core-set w.r.t. sum-of-pairwise distances whose size is independent of the size of the dataset and the aspect ratio. Second, we show the first core-set w.r.t. the sum-of-nearest-neighbor distances. Finally, we run several experiments showing the effectiveness of our core-set approach. In particular, we apply constrained diversity maximization to summarize a set of timed messages that takes into account the messages' recency. Specifically, the summary should include more recent messages compared to older ones. This is a real task in one of the largest communication platforms, affecting the experience of hundreds of millions daily active users. By utilizing our core-set method for this task, we achieve a 100x speed-up while losing the diversity by only a few percent. Moreover, our approach allows us to improve the space usage of the algorithm in the streaming setting.", "keywords": "Constrained Diversity Maximization;Fairness;Data Summarization;Core-sets;Approximation Algorithms", "primary_area": "", "supplementary_material": "", "author": "Sepideh Mahabadi;Stojan Trajanovski", "authorids": "~Sepideh_Mahabadi1;~Stojan_Trajanovski2", "gender": "F;M", "homepage": "https://www.mit.edu/~mahabadi/;https://tstojan.github.io/", "dblp": "130/0388;125/7630.html", "google_scholar": "NirVdpMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-0892-9263", "linkedin": ";stojantrajanovski/", "or_profile": "~Sepideh_Mahabadi1;~Stojan_Trajanovski2", "aff": "Microsoft Research;Microsoft", "aff_domain": "microsoft.com;microsoft.com", "position": "Researcher;Researcher", "bibtex": "@inproceedings{\nmahabadi2023coresets,\ntitle={Core-sets for Fair and Diverse Data Summarization},\nauthor={Sepideh Mahabadi and Stojan Trajanovski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FM8thAWqiO}\n}", "github": "", "project": "", "reviewers": "aN6A;9Vmw;S4pc;CoyK", "pdf_size": 608988, "rating": "6;6;6;7", "confidence": "3;1;4;4", "soundness": "2;3;3;4", "novelty": "3;3;3;3", "presentation": "2;1;3;3", "wc_summary": "50;108;76;239", "wc_strengths": "72;74;74;24", "wc_weaknesses": "404;67;31;7", "wc_questions": "100;32;10;25", "wc_limitations": "16;6;7;1", "wc_review": "642;287;198;296", "wc_reply_reviewers": "52;286;0;0", "wc_reply_authors": "52;44;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "3;2;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 118.25, 72.67865917860621 ], "wc_strengths_avg": [ 61.0, 21.37755832643195 ], "wc_weaknesses_avg": [ 127.25, 161.20231853171344 ], "wc_questions_avg": [ 41.75, 34.55701810052482 ], "wc_limitations_avg": [ 7.5, 5.408326913195984 ], "wc_review_avg": [ 355.75, 169.6472443041737 ], "wc_reply_reviewers_avg": [ 84.5, 118.25713509129164 ], "wc_reply_authors_avg": [ 24.0, 24.166091947189145 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9974723711774809456&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "microsoft.com;microsoft.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Research", "aff_unique_url": "https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Hyperbolic VAE via Latent Gaussian Distributions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72264", "id": "FNn4zibGvw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/01ecd39ca49ddecc5729ca996304781b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FNn4zibGvw", "openreview": "https://openreview.net/forum?id=FNn4zibGvw", "poster": "/media/PosterPDFs/NeurIPS%202023/72264.png?t=1701751819.1473122", "slides": "https://nips.cc/virtual/2023/poster/72264", "video": "https://nips.cc/virtual/2023/poster/72264", "author_site": "Seunghyuk Cho, Juyong Lee, Dongwoo Kim", "tldr": "", "abstract": "We propose a Gaussian manifold variational auto-encoder (GM-VAE) whose latent space consists of a set of Gaussian distributions. It is known that the set of the univariate Gaussian distributions with the Fisher information metric form a hyperbolic space, which we call a Gaussian manifold. To learn the VAE endowed with the Gaussian manifolds, we propose a pseudo-Gaussian manifold normal distribution based on the Kullback-Leibler divergence, a local approximation of the squared Fisher-Rao distance, to define a density over the latent space. We demonstrate the efficacy of GM-VAE on two different tasks: density estimation of image datasets and state representation learning for model-based reinforcement learning. GM-VAE outperforms the other variants of hyperbolic- and Euclidean-VAEs on density estimation tasks and shows competitive performance in model-based reinforcement learning. We observe that our model provides strong numerical stability, addressing a common limitation reported in previous hyperbolic-VAEs. The implementation is available at https://github.com/ml-postech/GM-VAE.", "keywords": "Hyperbolic space;VAE;Distribution on hyperbolic space;Hierarchical representation learning;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/c7d4b35e90e2eddb43b4ca6ec8dd896e7eec59d0.zip", "author": "Seunghyuk Cho;Juyong Lee;Dongwoo Kim", "authorids": "~Seunghyuk_Cho1;~Juyong_Lee1;~Dongwoo_Kim1", "gender": "M;M;M", "homepage": "https://seunghyukcho.github.io;;http://dongwookim-ml.github.io/", "dblp": "284/8079;;15/398-2", "google_scholar": "4OOM9_cAAAAJ;https://scholar.google.co.kr/citations?user=J7sqxHQAAAAJ;https://scholar.google.co.kr/citations?user=RkspD6IAAAAJ", "orcid": "0000-0002-9124-2712;0000-0002-8155-3998;0000-0002-6515-5260", "linkedin": "4stack/;;", "or_profile": "~Seunghyuk_Cho1;~Juyong_Lee1;~Dongwoo_Kim1", "aff": "POSTECH;Pohang University of Science and Technology;POSTECH", "aff_domain": "postech.ac.kr;postech.ac.kr;postech.ac.kr", "position": "MS student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\ncho2023hyperbolic,\ntitle={Hyperbolic {VAE} via Latent Gaussian Distributions},\nauthor={Seunghyuk Cho and Juyong Lee and Dongwoo Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FNn4zibGvw}\n}", "github": "", "project": "", "reviewers": "ACLv;1SSZ;Darj;bHMn", "pdf_size": 681942, "rating": "5;5;6;6", "confidence": "3;3;4;5", "soundness": "2;3;4;3", "novelty": "2;2;3;3", "presentation": "3;3;3;2", "wc_summary": "86;75;61;96", "wc_strengths": "64;54;64;85", "wc_weaknesses": "28;155;235;471", "wc_questions": "49;12;69;89", "wc_limitations": "15;39;16;1", "wc_review": "242;335;445;742", "wc_reply_reviewers": "25;49;16;347", "wc_reply_authors": "25;154;27;622", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.5, 13.009611831257688 ], "wc_strengths_avg": [ 66.75, 11.299889379989523 ], "wc_weaknesses_avg": [ 222.25, 161.47348853604421 ], "wc_questions_avg": [ 54.75, 28.44622119016865 ], "wc_limitations_avg": [ 17.75, 13.626720074911644 ], "wc_review_avg": [ 441.0, 188.05185455081266 ], "wc_reply_reviewers_avg": [ 109.25, 137.7940038608357 ], "wc_reply_authors_avg": [ 207.0, 245.23356214025844 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5705818013984917642&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "postech.ac.kr;postech.ac.kr;postech.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Pohang University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.postech.ac.kr", "aff_unique_abbr": "POSTECH", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pohang", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "BCDiff: Bidirectional Consistent Diffusion for Instantaneous Trajectory Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72263", "id": "FOFJmR1oxt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2e57e2c14232a7b99cf76213e190822d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FOFJmR1oxt", "openreview": "https://openreview.net/forum?id=FOFJmR1oxt", "poster": "/media/PosterPDFs/NeurIPS%202023/72263.png?t=1701403491.5920212", "slides": "https://nips.cc/virtual/2023/poster/72263", "video": "https://nips.cc/virtual/2023/poster/72263", "author_site": "Rongqing Li, Changsheng Li, Dongchun Ren, Guangyi Chen, Ye Yuan, Guoren Wang", "tldr": "", "abstract": "The objective of pedestrian trajectory prediction is to estimate the future paths of pedestrians by leveraging historical observations, which plays a vital role in ensuring the safety of self-driving vehicles and navigation robots. Previous works usually rely on a sufficient amount of observation time to accurately predict future trajectories. However, there are many real-world situations where the model lacks sufficient time to observe, such as when pedestrians abruptly emerge from blind spots, resulting in inaccurate predictions and even safety risks. Therefore, it is necessary to perform trajectory prediction based on instantaneous observations, which has rarely been studied before. In this paper, we propose a Bi-directional Consistent Diffusion framework tailored for instantaneous trajectory prediction, named BCDiff. At its heart, we develop two coupled diffusion models by designing a mutual guidance mechanism which can bidirectionally and consistently generate unobserved historical trajectories and future trajectories step-by-step, to utilize the complementary information between them. Specifically, at each step, the predicted unobserved historical trajectories and limited observed trajectories guide one diffusion model to generate future trajectories, while the predicted future trajectories and observed trajectories guide the other diffusion model to predict unobserved historical trajectories. Given the presence of relatively high noise in the generated trajectories during the initial steps, we introduce a gating mechanism to learn the weights between the predicted trajectories and the limited observed trajectories for automatically balancing their contributions. By means of this iterative and mutually guided generation process, both the future and unobserved historical trajectories undergo continuous refinement, ultimately leading to accurate predictions. Essentially, BCDiff is an encoder-free framework that can be compatible with existing trajectory prediction models in principle. Experiments show that our proposed BCDiff significantly improves the accuracy of instantaneous trajectory prediction on the ETH/UCY and Stanford Drone datasets, compared to related approaches.", "keywords": "Trajectory prediction;instantaneous observation", "primary_area": "", "supplementary_material": "/attachment/3a33ec63c44798101b5d2f36f01d75f68b29cca2.pdf", "author": "Rongqing Li;Changsheng Li;Dongchun Ren;Guangyi Chen;Ye Yuan;Guoren Wang", "authorids": "~Rongqing_Li1;~Changsheng_Li4;~Dongchun_Ren2;~Guangyi_Chen1;~Ye_Yuan15;~Guoren_Wang2", "gender": ";M;;M;;M", "homepage": ";;;https://chengy12.github.io/;;https://guorenwang.github.io/", "dblp": ";;;c/GuangyiChen-2;;", "google_scholar": ";FfJnUioAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0001-9789-7632;;;;", "linkedin": ";;;;;", "or_profile": "~Rongqing_Li1;~Changsheng_Li4;~Dongchun_Ren2;~Guangyi_Chen1;~Ye_Yuan15;~Guoren_Wang2", "aff": ";Beijing Institute of Technology;;Carnegie Mellon University;;Beijing Institute of Technology", "aff_domain": ";bit.edu.cn;;cmu.edu;;bit.edu.cn", "position": ";Full Professor;;Postdoc;;Full Professor", "bibtex": "@inproceedings{\nli2023bcdiff,\ntitle={{BCD}iff: Bidirectional Consistent Diffusion for Instantaneous Trajectory Prediction},\nauthor={Rongqing Li and Changsheng Li and Dongchun Ren and Guangyi Chen and Ye Yuan and Guoren Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FOFJmR1oxt}\n}", "github": "", "project": "", "reviewers": "JXan;VvSx;ytbj;GWuJ;WQWs", "pdf_size": 6743873, "rating": "6;6;6;6;7", "confidence": "4;3;3;5;4", "soundness": "3;2;3;2;4", "novelty": "2;3;3;2;3", "presentation": "3;3;3;3;4", "wc_summary": "56;79;161;61;66", "wc_strengths": "85;61;62;10;147", "wc_weaknesses": "261;145;121;307;103", "wc_questions": "239;45;16;9;32", "wc_limitations": "74;32;2;9;32", "wc_review": "715;362;362;396;380", "wc_reply_reviewers": "0;0;26;70;35", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 84.6, 38.959466115438495 ], "wc_strengths_avg": [ 73.0, 44.39369324577535 ], "wc_weaknesses_avg": [ 187.4, 81.30338246346211 ], "wc_questions_avg": [ 68.2, 86.31430935829819 ], "wc_limitations_avg": [ 29.8, 25.1745903640953 ], "wc_review_avg": [ 443.0, 136.5898971373798 ], "wc_reply_reviewers_avg": [ 26.2, 25.956887332652194 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.1336306209562122, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12744999391538337050&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";bit.edu.cn;;cmu.edu;;bit.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "Beijing Institute of Technology;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "http://www.bit.edu.cn/;https://www.cmu.edu", "aff_unique_abbr": "BIT;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "Streaming PCA for Markovian Data", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72262", "id": "FQGRkwmRzm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cbb1fa8e7f515e796cda6621a703492f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FQGRkwmRzm", "openreview": "https://openreview.net/forum?id=FQGRkwmRzm", "poster": "/media/PosterPDFs/NeurIPS%202023/72262.png?t=1701885730.2778182", "slides": "https://nips.cc/virtual/2023/poster/72262", "video": "https://nips.cc/virtual/2023/poster/72262", "author_site": "Syamantak Kumar, Purnamrita Sarkar", "tldr": "", "abstract": "Since its inception in 1982, Oja's algorithm has become an established method for streaming principle component analysis (PCA). We study the problem of streaming PCA, where the data-points are sampled from an irreducible, aperiodic, and reversible Markov chain starting in stationarity. Our goal is to estimate the top eigenvector of the unknown covariance matrix of the stationary distribution. This setting has implications in scenarios where data can solely be sampled from a Markov Chain Monte Carlo (MCMC) type algorithm, and the objective is to perform inference on parameters of the stationary distribution. Most convergence guarantees for Oja's algorithm in the literature assume that the data-points are sampled IID. For data streams with Markovian dependence, one typically downsamples the data to get a \"nearly\" independent data stream. In this paper, we obtain the first near-optimal rate for Oja's algorithm on the entire data, where we remove the logarithmic dependence on the sample size, $n$, resulting from throwing data away in downsampling strategies.", "keywords": "Streaming PCA;Markov Chain;Mixing;Oja's algorithm", "primary_area": "", "supplementary_material": "/attachment/9688ba166438558129b5794c74657f33aa42eed2.zip", "author": "Syamantak Kumar;Purnamrita Sarkar", "authorids": "~Syamantak_Kumar1;~Purnamrita_Sarkar1", "gender": "M;F", "homepage": "https://syamantakk.github.io/;https://psarkar.github.io/", "dblp": "297/4951;25/6929", "google_scholar": "https://scholar.google.com/citations?hl=en;KfT3_0AAAAAJ", "orcid": ";", "linkedin": "syamantak-kumar/;", "or_profile": "~Syamantak_Kumar1;~Purnamrita_Sarkar1", "aff": "University of Texas at Austin;University of Texas, Austin", "aff_domain": "cs.utexas.edu;utexas.edu", "position": "MS student;Associate Professor", "bibtex": "@inproceedings{\nkumar2023streaming,\ntitle={Streaming {PCA} for Markovian Data},\nauthor={Syamantak Kumar and Purnamrita Sarkar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FQGRkwmRzm}\n}", "github": "", "project": "", "reviewers": "eXbt;iipj;M3wf;1vdj", "pdf_size": 645541, "rating": "6;7;7;8", "confidence": "3;4;3;4", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "2;1;3;3", "wc_summary": "137;165;48;123", "wc_strengths": "77;33;81;38", "wc_weaknesses": "452;189;33;122", "wc_questions": "249;344;80;308", "wc_limitations": "34;28;5;1", "wc_review": "949;759;247;592", "wc_reply_reviewers": "365;263;22;0", "wc_reply_authors": "43;246;0;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 118.25, 43.28611209152423 ], "wc_strengths_avg": [ 57.25, 21.867498713844707 ], "wc_weaknesses_avg": [ 199.0, 156.2001920613416 ], "wc_questions_avg": [ 245.25, 101.25555540314812 ], "wc_limitations_avg": [ 17.0, 14.230249470757707 ], "wc_review_avg": [ 636.75, 258.04687074250677 ], "wc_reply_reviewers_avg": [ 162.5, 155.92706628420865 ], "wc_reply_authors_avg": [ 72.25, 101.83902739127078 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=963734867271616948&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "cs.utexas.edu;utexas.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Markovian Sliced Wasserstein Distances: Beyond Independent Projections", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72261", "id": "FT2q2B4cKZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7d2b770c3ccd35b41c9453ef6f8765a3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FT2q2B4cKZ", "openreview": "https://openreview.net/forum?id=FT2q2B4cKZ", "poster": "/media/PosterPDFs/NeurIPS%202023/72261.png?t=1699475382.3558338", "slides": "https://nips.cc/virtual/2023/poster/72261", "video": "https://nips.cc/virtual/2023/poster/72261", "author_site": "Khai Nguyen, Tongzheng Ren, Nhat Ho", "tldr": "", "abstract": "Sliced Wasserstein (SW) distance suffers from redundant projections due to independent uniform random projecting directions. To partially overcome the issue, max K sliced Wasserstein (Max-K-SW) distance ($K\\geq 1$), seeks the best discriminative orthogonal projecting directions. Despite being able to reduce the number of projections, the metricity of the Max-K-SW cannot be guaranteed in practice due to the non-optimality of the optimization. Moreover, the orthogonality constraint is also computationally expensive and might not be effective. To address the problem, we introduce a new family of SW distances, named Markovian sliced Wasserstein (MSW) distance, which imposes a first-order Markov structure on projecting directions. We discuss various members of the MSW by specifying the Markov structure including the prior distribution, the transition distribution, and the burning and thinning technique. Moreover, we investigate the theoretical properties of MSW including topological properties (metricity, weak convergence, and connection to other distances), statistical properties (sample complexity, and Monte Carlo estimation error), and computational properties (computational complexity and memory complexity). Finally, we compare MSW distances with previous SW variants in various applications such as gradient flows, color transfer, and deep generative modeling to demonstrate the favorable performance of the MSW.", "keywords": "Sliced Wasserstein;Generative Models;Optimal Transport", "primary_area": "", "supplementary_material": "/attachment/b593476a0cc7a5f1bf3ef476363ba26e0ab77cd9.zip", "author": "Khai Nguyen;Tongzheng Ren;Nhat Ho", "authorids": "~Khai_Nguyen1;~Tongzheng_Ren1;~Nhat_Ho1", "gender": "M;M;M", "homepage": "https://khainb.com;https://www.cs.utexas.edu/~tzren/;https://nhatptnk8912.github.io/", "dblp": "120/4308;211/8004;203/4479", "google_scholar": "im5fNaQAAAAJ;VgNDYeYAAAAJ;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ", "orcid": ";;", "linkedin": ";;nhat-pham-minh-ho-267b8164/", "or_profile": "~Khai_Nguyen1;~Tongzheng_Ren1;~Nhat_Ho1", "aff": "University of Texas, Austin;Google;University of Texas, Austin", "aff_domain": "utexas.edu;google.com;utexas.edu", "position": "PhD student;Intern;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2023markovian,\ntitle={Markovian Sliced Wasserstein Distances: Beyond Independent Projections},\nauthor={Khai Nguyen and Tongzheng Ren and Nhat Ho},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FT2q2B4cKZ}\n}", "github": "", "project": "", "reviewers": "72x2;AyBm;YkFo;yZut", "pdf_size": 19075272, "rating": "6;6;7;7", "confidence": "4;3;4;5", "soundness": "4;3;3;4", "novelty": "3;3;3;3", "presentation": "4;3;3;4", "wc_summary": "24;61;171;138", "wc_strengths": "39;20;57;132", "wc_weaknesses": "73;176;588;55", "wc_questions": "94;62;7;168", "wc_limitations": "7;10;8;1", "wc_review": "237;329;831;494", "wc_reply_reviewers": "70;11;58;131", "wc_reply_authors": "68;28;37;206", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 98.5, 58.67921267365471 ], "wc_strengths_avg": [ 62.0, 42.47940677551889 ], "wc_weaknesses_avg": [ 223.0, 215.7301555184161 ], "wc_questions_avg": [ 82.75, 58.22961016527588 ], "wc_limitations_avg": [ 6.5, 3.3541019662496847 ], "wc_review_avg": [ 472.75, 226.4049193370144 ], "wc_reply_reviewers_avg": [ 67.5, 42.78142120126446 ], "wc_reply_authors_avg": [ 84.75, 71.55897917103066 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17650796339486842669&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "utexas.edu;google.com;utexas.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Texas at Austin;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.utexas.edu;https://www.google.com", "aff_unique_abbr": "UT Austin;Google", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Austin;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "FTh5Rd3urw", "title": "PI-FL: Personalized and Incentivized Federated Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Existing incentive solutions for traditional Federated Learning (FL) only consider individual clients' contributions to a single global model. They are unsuitable for clustered personalization, where multiple cluster-level models can exist. Moreover, they focus solely on providing monetary incentives and fail to address the need for personalized FL, overlooking the importance of enhancing the personalized model's appeal to individual clients as a motivating factor for consistent participation. In this paper, we first propose to treat incentivization and personalization as interrelated challenges and solve them with an incentive mechanism that fosters personalized learning. Second, unlike existing approaches that rely on the aggregator to perform client clustering, we propose to involve clients by allowing them to provide incentive-driven preferences for joining clusters based on their data distributions. Our approach enhances the personalized and cluster-level model appeal for self-aware clients with high-quality data leading to their active and consistent participation. Through evaluation, we show that we achieve an 8-45% test accuracy improvement of the cluster models, 3-38% improvement in personalized model appeal, and 31-100% increase in the participation rate, compared to a wide range of FL modeling approaches, including those that tackle data heterogeneity and learn personalized models.", "keywords": "Federated Learning;Personalized Federated Learning;Incentivized Federated Learning;Client Autonomy", "primary_area": "", "supplementary_material": "/attachment/9753b7def7ff55db39abaf1311c34a98637c5e45.zip", "author": "Ahmad Faraz Khan;Xinran Wang;Qi Le;Azal Ahmad Khan;Haider Ali;Jie Ding;Ali Anwar;Ali R. Butt", "authorids": "~Ahmad_Faraz_Khan1;~Xinran_Wang3;~Qi_Le1;~Azal_Ahmad_Khan1;~Haider_Ali2;~Jie_Ding2;~Ali_Anwar1;~Ali_R._Butt1", "gender": "M;F;M;M;M;M;M;Not Specified", "homepage": "https://afkd98.github.io/;https://wang8740.github.io;https://www.linkedin.com/in/qi-le-60a8811a0/;https://azalahmadkhan.github.io/;;http://jding.org;https://chalianwar.github.io/;https://people.cs.vt.edu/butta/", "dblp": ";;;336/6801;;94/1825-2;69/9027-1;b/AliRazaButt.html", "google_scholar": "VjGylKsAAAAJ;u8gID6EAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.pk/citations?user=GQvzd34AAAAJ;ZyqvoqcAAAAJ;o3eOVbgAAAAJ;oqux_wcAAAAJ", "orcid": "0009-0009-3867-5656;;;;;;;0000-0002-0871-7263", "linkedin": "ahmadfarazkhandurrani/;wang-xinran;;azal/;;;;", "or_profile": "~Ahmad_Faraz_Khan1;~Xinran_Wang3;~Qi_Le1;~Azal_Ahmad_Khan1;~Haider_Ali2;~Jie_Ding2;~Ali_Anwar1;~Ali_R._Butt1", "aff": "Virginia Polytechnic Institute and State University;University of Minnesota - Twin Cities;University of Minnesota - Twin Cities;Indian Institute of Technology, Guwahati;;University of Minnesota, Minneapolis;University of Minnesota;Virginia Polytechnic Institute and State University", "aff_domain": "vt.edu;umn.edu;umn.edu;iitg.ac.in;;umn.edu;umn.edu;vt.edu", "position": "PhD student;PhD student;PhD student;Undergrad student;;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\nanonymous2023pifl,\ntitle={{PI}-{FL}: Personalized and Incentivized Federated Learning},\nauthor={Anonymous},\nyear={2023},\nurl={https://openreview.net/forum?id=FTh5Rd3urw}\n}", "github": "", "project": "", "reviewers": "MX3C;mhQ3;bwzk;H8pJ", "site": "https://openreview.net/forum?id=FTh5Rd3urw", "pdf_size": 567243, "rating": "4;4;5;7", "confidence": "2;4;4;4", "soundness": "3;2;2;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "43;111;85;117", "wc_strengths": "29;195;103;136", "wc_weaknesses": "178;205;57;212", "wc_questions": "7;39;340;251", "wc_limitations": "1;1;74;92", "wc_review": "258;551;659;808", "wc_reply_reviewers": "104;19;333;167", "wc_reply_authors": "341;152;901;0", "reply_reviewers": "1;1;2;1", "reply_authors": "3;2;3;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 89.0, 29.154759474226502 ], "wc_strengths_avg": [ 115.75, 59.95571282204891 ], "wc_weaknesses_avg": [ 163.0, 62.50199996800102 ], "wc_questions_avg": [ 159.25, 140.2932197221234 ], "wc_limitations_avg": [ 42.0, 41.49096287144949 ], "wc_review_avg": [ 569.0, 201.411270786915 ], "wc_reply_reviewers_avg": [ 155.75, 115.02472560280246 ], "wc_reply_authors_avg": [ 348.5, 341.0927293273781 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7230411363379278606&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;2;1;1;0", "aff_unique_norm": "Virginia Tech;University of Minnesota;Indian Institute of Technology Guwahati", "aff_unique_dep": ";;", "aff_unique_url": "https://www.vt.edu;https://www.minnesota.edu;https://www.iitg.ac.in", "aff_unique_abbr": "VT;UMN;IIT Guwahati", "aff_campus_unique_index": "1;1;2;3", "aff_campus_unique": ";Twin Cities;Guwahati;Minneapolis", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;India" }, { "title": "Token-Scaled Logit Distillation for Ternary Weight Generative Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72260", "id": "FUnEkOkodU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8342218a4ec08b8c19661725e9cd6c0b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FUnEkOkodU", "openreview": "https://openreview.net/forum?id=FUnEkOkodU", "poster": "/media/PosterPDFs/NeurIPS%202023/72260.png?t=1701499814.004198", "slides": "https://nips.cc/virtual/2023/poster/72260", "video": "https://nips.cc/virtual/2023/poster/72260", "author_site": "Minsoo Kim, Sihwa Lee, Janghwan Lee, Sukjin Hong, Du-Seong Chang, Wonyong Sung, Jungwook Choi", "tldr": "", "abstract": "Generative Language Models (GLMs) have shown impressive performance in tasks such as text generation, understanding, and reasoning. However, the large model size poses challenges for practical deployment. To solve this problem, Quantization-Aware Training (QAT) has become increasingly popular. However, current QAT methods for generative models have resulted in a noticeable loss of accuracy. To counteract this issue, we propose a novel knowledge distillation method specifically designed for GLMs. Our method, called token-scaled logit distillation, prevents overfitting and provides superior learning from the teacher model and ground truth. This research marks the first evaluation of ternary weight quantization-aware training of large-scale GLMs with less than 1.0 degradation in perplexity and achieves enhanced accuracy in tasks like common-sense QA and arithmetic reasoning as well as natural language understanding. Our code is available at https://github.com/aiha-lab/TSLD.", "keywords": "Generative Language Model;Quantization;QAT;Knowledge Distillation;Causal Attention;Language Modeling", "primary_area": "", "supplementary_material": "/attachment/5cb889908df3d39e063d8c9fe7ab9fa868ddacfc.pdf", "author": "Minsoo Kim;Sihwa Lee;Janghwan Lee;Sukjin Hong;Du-Seong Chang;Wonyong Sung;Jungwook Choi", "authorids": "~Minsoo_Kim2;~Sihwa_Lee1;~Janghwan_Lee2;~Sukjin_Hong1;~Du-Seong_Chang1;~Wonyong_Sung1;~Jungwook_Choi1", "gender": "M;M;M;M;M;;M", "homepage": "https://marsjacobs.github.io;https://nextai.hanyang.ac.kr/;https://superdocker.github.io;;https://duseongchang.github.io/;;", "dblp": ";308/0611;27/10012;334/0967;92/3996;22/1975;97/4140", "google_scholar": "https://scholar.google.co.kr/citations?hl=ko;wvBKhS8AAAAJ;_SJm7y4AAAAJ;;https://scholar.google.co.kr/citations?user=y1HTwWAAAAAJ;https://scholar.google.co.kr/citations?user=1IfNFz4AAAAJ;YPT98zwAAAAJ", "orcid": ";;;;;0000-0001-8801-210X;", "linkedin": "minsoo-kim-37268a1b0/;;;;;;jungwook-choi-5854996b/", "or_profile": "~Minsoo_Kim2;~Sihwa_Lee1;~Janghwan_Lee2;~Sukjin_Hong1;~Du-Seong_Chang1;~Wonyong_Sung1;~Jungwook_Choi1", "aff": "Hanyang University;Hanyang University;Hanyang University;Korea Telecom Research;KT;Seoul National University;Hanyang University", "aff_domain": "hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr;kt.com;kt.com;snu.ac.kr;hanyang.ac.kr", "position": "PhD student;PhD student;PhD student;Researcher;Senior Vice President;Emeritus;Assistant Professor", "bibtex": "@inproceedings{\nkim2023tokenscaled,\ntitle={Token-Scaled Logit Distillation for Ternary Weight Generative Language Models},\nauthor={Minsoo Kim and Sihwa Lee and Janghwan Lee and Sukjin Hong and Du-Seong Chang and Wonyong Sung and Jungwook Choi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FUnEkOkodU}\n}", "github": "", "project": "", "reviewers": "cozw;EGP4;QKmL;aiZD;kT5b", "pdf_size": 2141325, "rating": "5;5;6;6;7", "confidence": "4;4;4;4;5", "soundness": "3;2;3;3;3", "novelty": "3;2;2;3;3", "presentation": "3;3;2;3;4", "wc_summary": "48;12;154;78;434", "wc_strengths": "113;12;28;62;152", "wc_weaknesses": "355;237;218;102;147", "wc_questions": "10;38;59;1;161", "wc_limitations": "25;8;1;1;60", "wc_review": "551;307;460;244;954", "wc_reply_reviewers": "39;16;87;0;79", "wc_reply_authors": "511;40;54;0;549", "reply_reviewers": "1;1;1;0;2", "reply_authors": "2;2;2;1;3", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 145.2, 151.78194886085763 ], "wc_strengths_avg": [ 73.4, 52.35876239943034 ], "wc_weaknesses_avg": [ 211.8, 86.52722115034089 ], "wc_questions_avg": [ 53.8, 57.401742133841196 ], "wc_limitations_avg": [ 19.0, 22.297981971469973 ], "wc_review_avg": [ 503.2, 250.22022300365734 ], "wc_reply_reviewers_avg": [ 44.2, 34.11392677485252 ], "wc_reply_authors_avg": [ 230.8, 245.23246114656192 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8017837257372731, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2823516918835192012&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr;kt.com;kt.com;snu.ac.kr;hanyang.ac.kr", "author_num": 7, "aff_unique_index": "0;0;0;1;1;2;0", "aff_unique_norm": "Hanyang University;Korea Telecom;Seoul National University", "aff_unique_dep": ";Research;", "aff_unique_url": "https://www.hanyang.ac.kr;https://www.kt.com;https://www.snu.ac.kr", "aff_unique_abbr": "HYU;KT;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Non-stationary Experimental Design under Linear Trends", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72259", "id": "FV4ngfUlY0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/65e837e76a5308df3d5544aab6196e21-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FV4ngfUlY0", "openreview": "https://openreview.net/forum?id=FV4ngfUlY0", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72259", "video": "https://nips.cc/virtual/2023/poster/72259", "author_site": "David Simchi-Levi, Chonghuan Wang, Zeyu Zheng", "tldr": "", "abstract": "Experimentation has been critical and increasingly popular across various domains, such as clinical trials and online platforms, due to its widely recognized benefits. One of the primary objectives of classical experiments is to estimate the average treatment effect (ATE) to inform future decision-making. However, in healthcare and many other settings, treatment effects may be non-stationary, meaning that they can change over time, rendering the traditional experimental design inadequate and the classical static ATE uninformative. In this work, we address the problem of non-stationary experimental design under linear trends by considering two objectives: estimating the dynamic treatment effect and minimizing welfare loss within the experiment. We propose an efficient design that can be customized for optimal estimation error rate, optimal regret rate, or the Pareto optimal trade-off between the two objectives. We establish information-theoretical lower bounds that highlight the inherent challenge in estimating dynamic treatment effects and minimizing welfare loss, and also statistically reveal the fundamental trade-off between them.", "keywords": "Adaptive Experimental Design;Non-stationary;Online Learning;Treatment Effect", "primary_area": "", "supplementary_material": "", "author": "David Simchi-Levi;Chonghuan Wang;Zeyu Zheng", "authorids": "~David_Simchi-Levi2;~Chonghuan_Wang1;~Zeyu_Zheng2", "gender": "M;M;M", "homepage": "http://slevi1.mit.edu/;http://www.mit.edu/~chwang9/;https://zheng.ieor.berkeley.edu/", "dblp": ";298/4093;48/7883.html/", "google_scholar": "https://scholar.google.co.uk/citations?hl=en;;", "orcid": ";0000-0003-4887-6004;0000-0001-5653-152X", "linkedin": ";;", "or_profile": "~David_Simchi-Levi2;~Chonghuan_Wang1;~Zeyu_Zheng2", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;University of California, Berkeley", "aff_domain": "mit.edu;mit.edu;berkeley.edu", "position": "Full Professor;PhD student;Associate Professor", "bibtex": "@inproceedings{\nsimchi-levi2023nonstationary,\ntitle={Non-stationary Experimental Design under Linear Trends},\nauthor={David Simchi-Levi and Chonghuan Wang and Zeyu Zheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FV4ngfUlY0}\n}", "github": "", "project": "", "reviewers": "EcQ7;sYAZ;GLy4", "pdf_size": 589564, "rating": "5;6;6", "confidence": "3;4;4", "soundness": "3;3;4", "novelty": "2;2;3", "presentation": "2;4;3", "wc_summary": "99;100;103", "wc_strengths": "34;76;94", "wc_weaknesses": "294;418;140", "wc_questions": "6;2;35", "wc_limitations": "1;2;12", "wc_review": "434;598;384", "wc_reply_reviewers": "0;99;0", "wc_reply_authors": "0;133;0", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 100.66666666666667, 1.699673171197595 ], "wc_strengths_avg": [ 68.0, 25.13961017995307 ], "wc_weaknesses_avg": [ 284.0, 113.71308924950841 ], "wc_questions_avg": [ 14.333333333333334, 14.70449666674185 ], "wc_limitations_avg": [ 5.0, 4.96655480858378 ], "wc_review_avg": [ 472.0, 91.40386570964418 ], "wc_reply_reviewers_avg": [ 33.0, 46.66904755831214 ], "wc_reply_authors_avg": [ 44.333333333333336, 62.69680126520721 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15583630111702384367&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "mit.edu;mit.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.berkeley.edu", "aff_unique_abbr": "MIT;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Episodic Multi-Task Learning with Heterogeneous Neural Processes", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72258", "id": "FXU4aR2uif", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ee1e549d6fb7c58ed06557bfc264335c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FXU4aR2uif", "openreview": "https://openreview.net/forum?id=FXU4aR2uif", "poster": "/media/PosterPDFs/NeurIPS%202023/72258.png?t=1697447398.9988534", "slides": "https://nips.cc/virtual/2023/poster/72258", "video": "https://nips.cc/virtual/2023/poster/72258", "author_site": "Jiayi Shen, Jiayi Shen, Xiantong Zhen, Qi Wang, Marcel Worring", "tldr": "", "abstract": "This paper focuses on the data-insufficiency problem in multi-task learning within an episodic training setup. Specifically, we explore the potential of heterogeneous information across tasks and meta-knowledge among episodes to effectively tackle each task with limited data. Existing meta-learning methods often fail to take advantage of crucial heterogeneous information in a single episode, while multi-task learning models neglect reusing experience from earlier episodes. To address the problem of insufficient data, we develop Heterogeneous Neural Processes (HNPs) for the episodic multi-task setup. Within the framework of hierarchical Bayes, HNPs effectively capitalize on prior experiences as meta-knowledge and capture task-relatedness among heterogeneous tasks, mitigating data-insufficiency. Meanwhile, transformer-structured inference modules are designed to enable efficient inferences toward meta-knowledge and task-relatedness. In this way, HNPs can learn more powerful functional priors for adapting to novel heterogeneous tasks in each meta-test episode. Experimental results show the superior performance of the proposed HNPs over typical baselines, and ablation studies verify the effectiveness of the designed inference modules.", "keywords": "data-insufficiency problem;episodic training;multi-task learning and neural processes", "primary_area": "", "supplementary_material": "/attachment/10238bf7e0bd50823f7137e24e458a616d15d25f.pdf", "author": "Jiayi Shen;Xiantong Zhen;Cheems Wang;Marcel Worring", "authorids": "~Jiayi_Shen3;~Xiantong_Zhen1;~Cheems_Wang1;~Marcel_Worring2", "gender": "F;M;M;M", "homepage": "https://autumn9999.github.io/;;https://staff.fnwi.uva.nl/m.worring/;https://sites.google.com/view/albert-q-wang-at-ai-community/home", "dblp": ";78/10651;35/4613;375/3186", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.ca/citations?user=DnBb3e0AAAAJ;pdu8f3sAAAAJ;Mvbvv3IAAAAJ", "orcid": ";;;0000-0001-6135-6965", "linkedin": ";;;qi-cheems-wang-518a421a1/", "or_profile": "~Jiayi_Shen3;~Xiantong_Zhen1;~Marcel_Worring2;~Qi_Wang11", "aff": "University of Amsterdam;United Imaging Healthcare, Co., Ltd.;University of Amsterdam;Tsinghua University", "aff_domain": "uva.nl;cri-united-imaging.com;uva.nl;cs.tsinghua.edu.cn", "position": "PhD student;Principal Researcher;Full Professor;Postdoc", "bibtex": "@inproceedings{\nshen2023episodic,\ntitle={Episodic Multi-Task Learning with Heterogeneous Neural Processes},\nauthor={Jiayi Shen and Xiantong Zhen and Cheems Wang and Marcel Worring},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FXU4aR2uif}\n}", "github": "", "project": "", "reviewers": "xREG;21Dc;YPdL;n4KJ;LXqe;DuFB", "pdf_size": 3699908, "rating": "6;6;6;6;7;8", "confidence": "2;3;4;4;2;4", "soundness": "3;2;4;3;4;4", "novelty": "3;2;3;2;4;4", "presentation": "3;3;3;3;4;3", "wc_summary": "143;125;50;64;104;190", "wc_strengths": "94;135;114;44;46;88", "wc_weaknesses": "142;218;273;197;73;51", "wc_questions": "7;109;137;37;4;35", "wc_limitations": "14;31;64;44;32;10", "wc_review": "400;618;638;386;259;374", "wc_reply_reviewers": "45;0;127;54;33;12", "wc_reply_authors": "263;0;322;49;0;0", "reply_reviewers": "1;0;1;1;1;1", "reply_authors": "2;1;2;2;1;1", "rating_avg": [ 6.5, 0.7637626158259734 ], "confidence_avg": [ 3.1666666666666665, 0.8975274678557507 ], "soundness_avg": [ 3.3333333333333335, 0.7453559924999298 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 112.66666666666667, 47.29928352757811 ], "wc_strengths_avg": [ 86.83333333333333, 33.19847720737939 ], "wc_weaknesses_avg": [ 159.0, 78.81412732922104 ], "wc_questions_avg": [ 54.833333333333336, 50.44606580850042 ], "wc_limitations_avg": [ 32.5, 18.145247311624054 ], "wc_review_avg": [ 445.8333333333333, 136.81912228275047 ], "wc_reply_reviewers_avg": [ 45.166666666666664, 40.95288620950774 ], "wc_reply_authors_avg": [ 105.66666666666667, 134.32630254553854 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.372677996249965 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.12156613477096617, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4321918211283823722&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "uva.nl;cri-united-imaging.com;uva.nl;cs.tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Amsterdam;United Imaging Healthcare;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uva.nl;https://www.united-imaging.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "UvA;;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Netherlands;China" }, { "title": "StyleGAN knows Normal, Depth, Albedo, and More", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72257", "id": "FYqqvQdXhZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e7407ab5e89c405d28ff6807ffec594a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FYqqvQdXhZ", "openreview": "https://openreview.net/forum?id=FYqqvQdXhZ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72257", "video": "https://nips.cc/virtual/2023/poster/72257", "author_site": "Anand Bhattad, Daniel McKee, Derek Hoiem, David Forsyth", "tldr": "", "abstract": "Intrinsic images, in the original sense, are image-like maps of scene properties like depth, normal, albedo, or shading. This paper demonstrates that StyleGAN can easily be induced to produce intrinsic images. The procedure is straightforward. We show that if StyleGAN produces $G({\\bf w})$ from latent ${\\bf w}$, then for each type of intrinsic image, there is a fixed offset ${\\bf d}_c$ so that $G({\\bf w}+{\\bf d}_c)$ is that type of intrinsic image for $G({\\bf w})$. Here ${\\bf d}_c$ is {\\em independent of ${\\bf w}$}. The StyleGAN we used was pretrained by others, so this property is not some accident of our training regime. We show that there are image transformations StyleGAN will {\\em not} produce in this fashion, so StyleGAN is not a generic image regression engine. \n\nIt is conceptually exciting that an image generator should ``know'' and represent intrinsic images. There may also be practical advantages to using a generative model to produce intrinsic images. The intrinsic images obtained from StyleGAN compare well both qualitatively and quantitatively with those obtained by using SOTA image regression techniques; but StyleGAN's intrinsic images are robust to relighting effects, unlike SOTA methods.", "keywords": "Generative models;StyleGAN;Depth;Normals;Segmentation;Intrinsic Images;Albedo;Shading", "primary_area": "", "supplementary_material": "/attachment/c96c29a240d2cd18ecca2459e78a9fdf2a89b56c.pdf", "author": "Anand Bhattad;Daniel McKee;Derek Hoiem;David Forsyth", "authorids": "~Anand_Bhattad1;~Daniel_McKee1;~Derek_Hoiem1;~David_Forsyth1", "gender": ";;M;M", "homepage": "https://anandbhattad.github.io/;;http://dhoiem.cs.illinois.edu/;https://cs.illinois.edu/directory/profile/daf", "dblp": "215/4305;290/2142;08/6948;f/DavidAForsyth", "google_scholar": "XUsauXIAAAAJ;iMmbwEUAAAAJ;8Sfj7q8AAAAJ;https://scholar.google.com.tw/citations?user=5H0arvkAAAAJ", "orcid": ";;;0000-0002-2278-0752", "linkedin": ";;;", "or_profile": "~Anand_Bhattad1;~Daniel_McKee1;~Derek_Hoiem1;~David_Forsyth1", "aff": "University of Illinois Urbana Champaign;;Reconstruct;University of Illinois, Urbana-Champaign", "aff_domain": "illinois.edu;;reconstructinc.com;uiuc.edu", "position": "PhD student;;Chief Scientist;Full Professor", "bibtex": "@inproceedings{\nbhattad2023stylegan,\ntitle={Style{GAN} knows Normal, Depth, Albedo, and More},\nauthor={Anand Bhattad and Daniel McKee and Derek Hoiem and David Forsyth},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FYqqvQdXhZ}\n}", "github": "", "project": "", "reviewers": "qbrH;1Cfh;ouPJ;NPHF", "pdf_size": 13515947, "rating": "5;5;5;6", "confidence": "4;5;4;4", "soundness": "3;3;3;2", "novelty": "3;2;2;4", "presentation": "2;3;2;4", "wc_summary": "129;46;112;85", "wc_strengths": "144;67;121;263", "wc_weaknesses": "320;212;383;153", "wc_questions": "304;2;12;46", "wc_limitations": "102;45;54;15", "wc_review": "999;372;682;562", "wc_reply_reviewers": "24;15;29;68", "wc_reply_authors": "20;14;27;43", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 93.0, 31.34485603731496 ], "wc_strengths_avg": [ 148.75, 71.63928740572452 ], "wc_weaknesses_avg": [ 267.0, 89.84152714641488 ], "wc_questions_avg": [ 91.0, 124.05240827972668 ], "wc_limitations_avg": [ 54.0, 31.24899998399949 ], "wc_review_avg": [ 653.75, 227.9236440126386 ], "wc_reply_reviewers_avg": [ 34.0, 20.26079958935481 ], "wc_reply_authors_avg": [ 26.0, 10.8397416943394 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14406650649925599173&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "illinois.edu;;reconstructinc.com;uiuc.edu", "author_num": 4, "aff_unique_index": "0;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;;University of Illinois", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;;https://illinois.edu", "aff_unique_abbr": "UIUC;;UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "title": "Towards In-context Scene Understanding", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72256", "id": "FasIQqsJhe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c94a632545000531f0b47000e9caa5b6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FasIQqsJhe", "openreview": "https://openreview.net/forum?id=FasIQqsJhe", "poster": "/media/PosterPDFs/NeurIPS%202023/72256.png?t=1701706753.911982", "slides": "https://nips.cc/virtual/2023/poster/72256", "video": "https://nips.cc/virtual/2023/poster/72256", "author_site": "Ivana Balazevic, David Steiner, Nikhil Parthasarathy, Relja Arandjelovi\u0107, Olivier Henaff", "tldr": "", "abstract": "In-context learning\u2013\u2013the ability to configure a model's behavior with different prompts\u2013\u2013has revolutionized the field of natural language processing, alleviating the need for task-specific models and paving the way for generalist models capable of assisting with any query. Computer vision, in contrast, has largely stayed in the former regime: specialized decoders and finetuning protocols are generally required to perform dense tasks such as semantic segmentation and depth estimation. In this work we explore a simple mechanism for in-context learning of such scene understanding tasks: nearest neighbor retrieval from a prompt of annotated features. We propose a new pretraining protocol\u2013\u2013leveraging attention within and across images\u2013\u2013which yields representations particularly useful in this regime. The resulting Hummingbird model, suitably prompted, performs various scene understanding tasks without modification while approaching the performance of specialists that have been finetuned for each task. Moreover, Hummingbird can be configured to perform new tasks much more efficiently than finetuned models, raising the possibility of scene understanding in the interactive assistant regime.", "keywords": "transfer learning;adaptation;self-supervised learning;contrastive learning;scene understanding;representation learning;in-context learning;vision transformers", "primary_area": "", "supplementary_material": "/attachment/f55a132689f8ad2cbdb16a9bcae0b0d467e88b96.pdf", "author": "Ivana Balazevic;David Steiner;Nikhil Parthasarathy;Relja Arandjelovic;Olivier J Henaff", "authorids": "~Ivana_Balazevic1;~David_Steiner1;~Nikhil_Parthasarathy1;~Relja_Arandjelovic1;~Olivier_J_Henaff1", "gender": "F;Not Specified;M;M;", "homepage": "https://ibalazevic.github.io/;https://arxiv.org/a/steiner_d_1.html;;http://www.relja.info;https://www.olivierhenaff.com/", "dblp": "185/0837;;209/4951;88/8937;156/0035.html", "google_scholar": "CnxZPkkAAAAJ;https://scholar.google.com/citations?hl=en;X9mO4ckAAAAJ;https://scholar.google.co.uk/citations?user=VFO9h14AAAAJ;Sx75CVsAAAAJ", "orcid": ";0000-0003-3688-7064;;;0000-0001-8183-9489", "linkedin": ";david-steiner-35744387;nikparth/;;", "or_profile": "~Ivana_Balazevic1;~David_Steiner1;~Nikhil_Parthasarathy1;~Relja_Arandjelovic1;~Olivier_J_Henaff1", "aff": "Google DeepMind;Google;New York University;Google DeepMind;Google DeepMind", "aff_domain": "google.com;google.com;nyu.edu;google.com;google.com", "position": "Research Scientist;Researcher;PhD student;Staff Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nbalazevic2023towards,\ntitle={Towards In-context Scene Understanding},\nauthor={Ivana Balazevic and David Steiner and Nikhil Parthasarathy and Relja Arandjelovic and Olivier J Henaff},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FasIQqsJhe}\n}", "github": "", "project": "", "reviewers": "35vE;32Tr;KiUe;hsMp;UEmV", "pdf_size": 4016406, "rating": "5;6;7;8;8", "confidence": "4;4;3;4;4", "soundness": "4;3;4;3;4", "novelty": "3;3;3;3;3", "presentation": "4;4;3;4;3", "wc_summary": "69;57;101;27;114", "wc_strengths": "36;51;122;47;131", "wc_weaknesses": "98;56;173;46;103", "wc_questions": "379;46;5;49;110", "wc_limitations": "131;10;47;6;1", "wc_review": "713;220;448;175;459", "wc_reply_reviewers": "31;0;50;12;43", "wc_reply_authors": "23;0;78;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;1;1", "rating_avg": [ 6.8, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 73.6, 31.148675734290855 ], "wc_strengths_avg": [ 77.4, 40.48999876512717 ], "wc_weaknesses_avg": [ 95.2, 44.8972159493214 ], "wc_questions_avg": [ 117.8, 134.83234033420914 ], "wc_limitations_avg": [ 39.0, 48.78934309867269 ], "wc_review_avg": [ 403.0, 193.25320178460174 ], "wc_reply_reviewers_avg": [ 27.2, 18.733926443754388 ], "wc_reply_authors_avg": [ 20.2, 30.24169307429728 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.08574929257125444, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9742218849715975636&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "google.com;google.com;nyu.edu;google.com;google.com", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Google;New York University", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.nyu.edu", "aff_unique_abbr": "DeepMind;NYU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Mnemosyne: Learning to Train Transformers with Transformers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72255", "id": "Fdfyga5i0A", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f41b6e5af73421e46ceed9cb036e72e7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Fdfyga5i0A", "openreview": "https://openreview.net/forum?id=Fdfyga5i0A", "poster": "/media/PosterPDFs/NeurIPS%202023/72255.png?t=1701695335.2795112", "slides": "https://nips.cc/virtual/2023/poster/72255", "video": "https://nips.cc/virtual/2023/poster/72255", "author_site": "Deepali Jain, Krzysztof M Choromanski, Kumar Avinava Dubey, Sumeet Singh, Vikas Sindhwani, Tingnan Zhang, Jie Tan", "tldr": "", "abstract": "In this work, we propose a new class of learnable optimizers, called Mnemosyne. It is based on the novel spatio-temporal low-rank implicit attention Transformers that can learn to train entire neural network architectures, including other Transformers, without any task-specific optimizer tuning. We show that Mnemosyne: (a) outperforms popular LSTM optimizers (also with new feature engineering to mitigate catastrophic forgetting of LSTMs), (b) can successfully train Transformers while using simple meta-training strategies that require minimal computational resources, (c) matches accuracy-wise SOTA hand-designed optimizers with carefully tuned hyper-parameters (often producing top performing models). Furthermore, Mnemosyne provides space complexity comparable to that of its hand-designed first-order counterparts, which allows it to scale to training larger sets of parameters. We conduct an extensive empirical evaluation of Mnemosyne on: (a) fine-tuning a wide range of Vision Transformers (ViTs) from medium-size architectures to massive ViT-Hs (36 layers, 16 heads), (b) pre-training BERT models and (c) soft prompt-tuning large 11B+ T5XXL models. We complement our results with a comprehensive theoretical analysis of the compact associative memory used by Mnemosyne which we believe was never done before.", "keywords": "learnable optimizers;Transformers;efficient attention;spatio-temporal attention", "primary_area": "", "supplementary_material": "/attachment/45e85d6725cf0de577ebb940cf34e5285e816149.pdf", "author": "Deepali Jain;Krzysztof Marcin Choromanski;Kumar Avinava Dubey;Sumeet Singh;Vikas Sindhwani;Tingnan Zhang;Jie Tan", "authorids": "~Deepali_Jain1;~Krzysztof_Marcin_Choromanski1;~Kumar_Avinava_Dubey1;~Sumeet_Singh3;~Vikas_Sindhwani1;~Tingnan_Zhang1;~Jie_Tan1", "gender": "F;;M;M;M;M;M", "homepage": ";;;http://vikas.sindhwani.org;;http://www.jie-tan.net;https://sites.google.com/site/kumaravinavadubey/", "dblp": "84/8010;78/11411;;26/4825;https://dblp.uni-trier.de/pers/hd/z/Zhang:Tingnan;81/7419;10/7789", "google_scholar": ";;ZGpE5cYAAAAJ;https://scholar.google.com/citations?hl=en;RM2vMNcAAAAJ;neGbgzYAAAAJ;tBbUAfsAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;vikassindhwani;;jie-tan/;", "or_profile": "~Deepali_Jain1;~Krzysztof_Marcin_Choromanski1;~Sumeet_Singh3;~Vikas_Sindhwani1;~Tingnan_Zhang1;~Jie_Tan1;~Kumar_A_Dubey1", "aff": "Google;Google Brain Robotics & Columbia University;Google Brain Robotics;Google;Google;Google;Google Research", "aff_domain": "google.com;columbia.edu;google.com;google.com;google.com;google.com;google.com", "position": "Researcher;research scientist & adjunct assistant professor;Researcher;Senior Staff Research Scientist;Software Engineer;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\njain2023mnemosyne,\ntitle={Mnemosyne: Learning to Train Transformers with Transformers},\nauthor={Deepali Jain and Krzysztof Marcin Choromanski and Kumar Avinava Dubey and Sumeet Singh and Vikas Sindhwani and Tingnan Zhang and Jie Tan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Fdfyga5i0A}\n}", "github": "", "project": "", "reviewers": "V1vR;JvEY;xFuP", "pdf_size": 5756617, "rating": "5;6;6", "confidence": "4;4;2", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "91;56;70", "wc_strengths": "55;108;70", "wc_weaknesses": "203;69;59", "wc_questions": "122;53;3", "wc_limitations": "69;1;6", "wc_review": "540;287;208", "wc_reply_reviewers": "409;0;29", "wc_reply_authors": "970;0;0", "reply_reviewers": "3;0;1", "reply_authors": "4;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 72.33333333333333, 14.383632673594278 ], "wc_strengths_avg": [ 77.66666666666667, 22.305953365762143 ], "wc_weaknesses_avg": [ 110.33333333333333, 65.65228268858762 ], "wc_questions_avg": [ 59.333333333333336, 48.787521173167036 ], "wc_limitations_avg": [ 25.333333333333332, 30.94439457406714 ], "wc_review_avg": [ 345.0, 141.60743859934288 ], "wc_reply_reviewers_avg": [ 146.0, 186.3455571422798 ], "wc_reply_authors_avg": [ 323.3333333333333, 457.26238516730075 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11459180219318475729&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "google.com;columbia.edu;google.com;google.com;google.com;google.com;google.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "PaintSeg: Painting Pixels for Training-free Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72254", "id": "FdsS51iif3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0021c2cb1b9b6a71ac478ea52a93b25a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FdsS51iif3", "openreview": "https://openreview.net/forum?id=FdsS51iif3", "poster": "/media/PosterPDFs/NeurIPS%202023/72254.png?t=1701927344.7937326", "slides": "https://nips.cc/virtual/2023/poster/72254", "video": "https://nips.cc/virtual/2023/poster/72254", "author_site": "Xiang Li, Chung-Ching Lin, Yinpeng Chen, Yinpeng Chen, Zicheng Liu, Zicheng Liu, Jinglu Wang, Rita Singh, Bhiksha Raj", "tldr": "", "abstract": "The paper introduces PaintSeg, a new unsupervised method for segmenting objects without any training. We propose an adversarial masked contrastive painting (AMCP) process, which creates a contrast between the original image and a painted image in which a masked area is painted using off-the-shelf generative models. During the painting process, inpainting and outpainting are alternated, with the former masking the foreground and filling in the background, and the latter masking the background while recovering the missing part of the foreground object. Inpainting and outpainting, also referred to as I-step and O-step, allow our method to gradually advance the target segmentation mask toward the ground truth without supervision or training. PaintSeg can be configured to work with a variety of prompts, e.g. coarse masks, boxes, scribbles, and points. Our experimental results demonstrate that PaintSeg outperforms existing approaches in coarse mask-prompt, box-prompt, and point-prompt segmentation tasks, providing a training-free solution suitable for unsupervised segmentation. Code: https://github.com/lxa9867/PaintSeg.", "keywords": "Prompt-guided Segmentation;Generative models;Training-free", "primary_area": "", "supplementary_material": "/attachment/fbce28e7f706d65e0dcb50210e25118b722f6de2.zip", "author": "Xiang Li;Chung-Ching Lin;Yinpeng Chen;Zicheng Liu;Jinglu Wang;Rita Singh;Bhiksha Raj", "authorids": "~Xiang_Li35;~Chung-Ching_Lin2;~Yinpeng_Chen1;~Zicheng_Liu1;~Jinglu_Wang3;~Rita_Singh1;~Bhiksha_Raj1", "gender": ";;M;M;;F;M", "homepage": ";;https://scholar.google.com/citations?user=V_VpLksAAAAJ&hl=en;https://sites.google.com/view/zichengliu/home?pli=1;;http://mlsp.cs.cmu.edu/people/rsingh/index.html;https://www.cs.cmu.edu/directory/bhikshar/", "dblp": ";;45/6977;l/ZichengLiu;;;60/3996", "google_scholar": ";;;bkALdvsAAAAJ;;;", "orcid": ";;;0000-0001-5894-7828;;;", "linkedin": ";;;;;;", "or_profile": "~Xiang_Li35;~Chung-Ching_Lin2;~Yinpeng_Chen1;~Zicheng_Liu1;~Jinglu_Wang3;~Rita_Singh1;~Bhiksha_Raj1", "aff": ";;Microsoft;Microsoft;;School of Computer Science, Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": ";;microsoft.com;microsoft.com;;cs.cmu.edu;mbzuai.ac.ae", "position": ";;Researcher;partner research manager;;Research Professor;Full Professor", "bibtex": "@inproceedings{\nli2023paintseg,\ntitle={PaintSeg: Painting Pixels for Training-free Segmentation},\nauthor={Xiang Li and Chung-Ching Lin and Yinpeng Chen and Zicheng Liu and Jinglu Wang and Rita Singh and Bhiksha Raj},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FdsS51iif3}\n}", "github": "", "project": "", "reviewers": "1n6h;CwUQ;9reF;Xo6H;fhrH", "pdf_size": 7648406, "rating": "6;6;6;6;7", "confidence": "4;4;4;4;4", "soundness": "3;3;3;2;3", "novelty": "3;2;4;2;3", "presentation": "2;3;2;2;3", "wc_summary": "74;47;145;155;101", "wc_strengths": "38;50;30;33;61", "wc_weaknesses": "204;139;219;250;26", "wc_questions": "32;16;3;3;73", "wc_limitations": "7;12;2;3;6", "wc_review": "355;264;399;444;267", "wc_reply_reviewers": "113;35;51;0;32", "wc_reply_authors": "311;15;15;0;18", "reply_reviewers": "2;1;1;0;1", "reply_authors": "3;2;2;1;2", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 104.4, 41.08332995267058 ], "wc_strengths_avg": [ 42.4, 11.534296684236972 ], "wc_weaknesses_avg": [ 167.6, 79.53012008038212 ], "wc_questions_avg": [ 25.4, 26.08141100477503 ], "wc_limitations_avg": [ 6.0, 3.521363372331802 ], "wc_review_avg": [ 345.8, 71.35656942426534 ], "wc_reply_reviewers_avg": [ 46.2, 37.274119707915304 ], "wc_reply_authors_avg": [ 71.8, 119.76543741831362 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10093923286351332039&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": ";;microsoft.com;microsoft.com;;cs.cmu.edu;mbzuai.ac.ae", "author_num": 7, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Microsoft;Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": "Microsoft Corporation;School of Computer Science;", "aff_unique_url": "https://www.microsoft.com;https://www.cmu.edu;https://mbzuai.ac.ae", "aff_unique_abbr": "Microsoft;CMU;MBZUAI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;United Arab Emirates" }, { "title": "Constraint-Conditioned Policy Optimization for Versatile Safe Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72253", "id": "FdtdjQpAwJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/29906cbd165b78991da2c4dbabc2a04b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FdtdjQpAwJ", "openreview": "https://openreview.net/forum?id=FdtdjQpAwJ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72253", "video": "https://nips.cc/virtual/2023/poster/72253", "author_site": "Yihang Yao, ZUXIN LIU, Zhepeng Cen, Jiacheng Zhu, Wenhao Yu, Tingnan Zhang, DING ZHAO", "tldr": "", "abstract": "Safe reinforcement learning (RL) focuses on training reward-maximizing agents subject to pre-defined safety constraints. Yet, learning versatile safe policies that can adapt to varying safety constraint requirements during deployment without retraining remains a largely unexplored and challenging area. In this work, we formulate the versatile safe RL problem and consider two primary requirements: training efficiency and zero-shot adaptation capability. To address them, we introduce the Conditioned Constrained Policy Optimization (CCPO) framework, consisting of two key modules: (1) Versatile Value Estimation (VVE) for approximating value functions under unseen threshold conditions, and (2) Conditioned Variational Inference (CVI) for encoding arbitrary constraint thresholds during policy optimization. Our extensive experiments demonstrate that CCPO outperforms the baselines in terms of safety and task performance while preserving zero-shot adaptation capabilities to different constraint thresholds data-efficiently. This makes our approach suitable for real-world dynamic applications.", "keywords": "Safe Reinforcement Learning;Conditioned Reinforcement Learning;Multi-task Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Yihang Yao;Zuxin Liu;Zhepeng Cen;Jiacheng Zhu;Wenhao Yu;Tingnan Zhang;Ding Zhao", "authorids": "~Yihang_Yao1;~Zuxin_Liu1;~Zhepeng_Cen1;~Jiacheng_Zhu1;~Wenhao_Yu1;~Tingnan_Zhang1;~Ding_Zhao1", "gender": ";M;M;M;M;M;", "homepage": "https://yihangyao.github.io/;https://www.zuxin.me;https://czp16.github.io/;https://jiachengzhuml.github.io/;https://wenhaoyu.weebly.com/;;https://safeai-lab.github.io", "dblp": "305/7045.html;227/3137;254/6182;40/10195;;https://dblp.uni-trier.de/pers/hd/z/Zhang:Tingnan;", "google_scholar": "EPduTdwAAAAJ;5ApCTCoAAAAJ;M-X3Q-UAAAAJ;rKUnBPgAAAAJ;1bF2s2kAAAAJ;RM2vMNcAAAAJ;z7tPc9IAAAAJ", "orcid": ";0000-0001-7412-5074;;;;;", "linkedin": "yihang-yao-3a7658249/;zuxin-liu/;;;;;", "or_profile": "~Yihang_Yao1;~Zuxin_Liu1;~Zhepeng_Cen1;~Jiacheng_Zhu1;~Wenhao_Yu1;~Tingnan_Zhang1;~Ding_Zhao1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Google;Google;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;andrew.cmu.edu;andrew.cmu.edu;google.com;google.com;cmu.edu", "position": "PhD student;PhD student;PhD student;PhD student;Software Engineer;Software Engineer;Associate Professor", "bibtex": "@inproceedings{\nyao2023constraintconditioned,\ntitle={Constraint-Conditioned Policy Optimization for Versatile Safe Reinforcement Learning},\nauthor={Yihang Yao and Zuxin Liu and Zhepeng Cen and Jiacheng Zhu and Wenhao Yu and Tingnan Zhang and Ding Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FdtdjQpAwJ}\n}", "github": "", "project": "", "reviewers": "tMdC;TpqY;5QUs;hfso;R5zk", "pdf_size": 1507521, "rating": "7;7;7;7;7", "confidence": "4;3;2;4;3", "soundness": "4;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "4;2;2;3;2", "wc_summary": "64;90;147;105;41", "wc_strengths": "45;27;195;0;15", "wc_weaknesses": "2;176;217;0;60", "wc_questions": "157;2;34;300;28", "wc_limitations": "1;21;14;0;31", "wc_review": "269;316;607;405;175", "wc_reply_reviewers": "0;87;30;14;21", "wc_reply_authors": "0;19;15;19;19", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;2;2;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 89.4, 36.19171175835705 ], "wc_strengths_avg": [ 56.4, 70.85082921180245 ], "wc_weaknesses_avg": [ 91.0, 89.73739465796854 ], "wc_questions_avg": [ 104.2, 111.62867015242993 ], "wc_limitations_avg": [ 13.4, 11.84229707446997 ], "wc_review_avg": [ 354.4, 146.5054265206583 ], "wc_reply_reviewers_avg": [ 30.4, 29.950626036862733 ], "wc_reply_authors_avg": [ 14.4, 7.3647810558087885 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7018557450829744472&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cmu.edu;cmu.edu;andrew.cmu.edu;andrew.cmu.edu;google.com;google.com;cmu.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;1;1;0", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "CMU;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "On the Trade-off of Intra-/Inter-class Diversity for Supervised Pre-training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72252", "id": "Fe6fDq65aZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ca9567d8ef6b2ea2da0d7eed57b933ee-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Fe6fDq65aZ", "openreview": "https://openreview.net/forum?id=Fe6fDq65aZ", "poster": "/media/PosterPDFs/NeurIPS%202023/72252.png?t=1698489842.0564623", "slides": "https://nips.cc/virtual/2023/poster/72252", "video": "https://nips.cc/virtual/2023/poster/72252", "author_site": "Jieyu Zhang, Bohan Wang, Zhengyu Hu, Pang Wei Koh, Alexander Ratner", "tldr": "", "abstract": "Pre-training datasets are critical for building state-of-the-art machine learning models, motivating rigorous study on their impact on downstream tasks. In this work, we study the impact of the trade-off between the intra-class diversity (the number of samples per class) and the inter-class diversity (the number of classes) of a supervised pre-training dataset. Empirically, we found that with the size of the pre-training dataset fixed, the best downstream performance comes with a balance on the intra-/inter-class diversity. To understand the underlying mechanism, we show theoretically that the downstream performance depends monotonically on both types of diversity. Notably, our theory reveals that the optimal class-to-sample ratio (#classes / #samples per class) is invariant to the size of the pre-training dataset, which motivates an application of predicting the optimal number of pre-training classes. We demonstrate the effectiveness of this application by an improvement of around 2 points on the downstream tasks when using ImageNet as the pre-training dataset.", "keywords": "data-centric study;supervised pretraining;transfer learning", "primary_area": "", "supplementary_material": "", "author": "Jieyu Zhang;Bohan Wang;Zhengyu Hu;Pang Wei Koh;Alexander Ratner", "authorids": "~Jieyu_Zhang1;~Bohan_Wang1;~Zhengyu_Hu2;~Pang_Wei_Koh1;~Alexander_Ratner1", "gender": "M;M;;M;M", "homepage": "https://jieyuz2.github.io/;https://bhwangfy.github.io/;;http://cs.stanford.edu/~pangwei;https://ajratner.github.io/", "dblp": ";202/1184;270/4119;10/10453;180/5513", "google_scholar": "T_INUHUAAAAJ;LfkHCEUAAAAJ;_WJtdlcAAAAJ;Nn990CkAAAAJ;rfwwtFYAAAAJ", "orcid": "0000-0002-1846-2436;;0009-0007-3097-9714;;", "linkedin": "jieyu-zhang-3baaa8154/;;;;alexander-ratner-038ba239/", "or_profile": "~Jieyu_Zhang1;~Bohan_Wang1;~Zhengyu_Hu2;~Pang_Wei_Koh1;~Alexander_Ratner1", "aff": "University of Washington;Microsoft Research Asia, University of Science and Technology of China;Hong Kong University of Science and Technology;Google;Department of Computer Science, University of Washington", "aff_domain": "cs.washington.edu;ustc.edu.cn;connect.hkust-gz.edu.cn;google.com;cs.washington.edu", "position": "PhD student;PhD student;MS student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023on,\ntitle={On the Trade-off of Intra-/Inter-class Diversity for Supervised Pre-training},\nauthor={Jieyu Zhang and Bohan Wang and Zhengyu Hu and Pang Wei Koh and Alexander Ratner},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Fe6fDq65aZ}\n}", "github": "", "project": "", "reviewers": "AXB6;Ypk9;xyoX", "pdf_size": 1326087, "rating": "4;6;6", "confidence": "5;4;2", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "16;30;69", "wc_strengths": "56;105;42", "wc_weaknesses": "34;108;49", "wc_questions": "38;22;30", "wc_limitations": "4;6;1", "wc_review": "148;271;191", "wc_reply_reviewers": "0;110;0", "wc_reply_authors": "0;28;0", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 38.333333333333336, 22.425184255405547 ], "wc_strengths_avg": [ 67.66666666666667, 27.010286106510527 ], "wc_weaknesses_avg": [ 63.666666666666664, 31.94091767971331 ], "wc_questions_avg": [ 30.0, 6.531972647421808 ], "wc_limitations_avg": [ 3.6666666666666665, 2.0548046676563256 ], "wc_review_avg": [ 203.33333333333334, 50.96621974951208 ], "wc_reply_reviewers_avg": [ 36.666666666666664, 51.85449728701349 ], "wc_reply_authors_avg": [ 9.333333333333334, 13.199326582148887 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7559289460184545, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6692095043093239632&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cs.washington.edu;ustc.edu.cn;connect.hkust-gz.edu.cn;google.com;cs.washington.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of Washington;Microsoft;Hong Kong University of Science and Technology;Google", "aff_unique_dep": ";Research;;Google", "aff_unique_url": "https://www.washington.edu;https://www.microsoft.com/en-us/research/group/microsoft-research-asia;https://www.ust.hk;https://www.google.com", "aff_unique_abbr": "UW;MSRA;HKUST;Google", "aff_campus_unique_index": "1;2;3;4", "aff_campus_unique": ";Asia;Hong Kong SAR;Mountain View;Seattle", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Gradient-Based Feature Learning under Structured Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72251", "id": "Fe8PxP2F2p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e21955c93dede886af1d0d362c756757-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Fe8PxP2F2p", "openreview": "https://openreview.net/forum?id=Fe8PxP2F2p", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72251", "video": "https://nips.cc/virtual/2023/poster/72251", "author_site": "Alireza Mousavi-Hosseini, Denny Wu, Taiji Suzuki, Murat Erdogdu", "tldr": "", "abstract": "Recent works have demonstrated that the sample complexity of gradient-based learning of single index models, i.e. functions that depend on a 1-dimensional projection of the input data, is governed by their information exponent. However, these results are only concerned with isotropic data, while in practice the input often contains additional structure which can implicitly guide the algorithm. In this work, we investigate the effect of a spiked covariance structure and reveal several interesting phenomena. First, we show that in the anisotropic setting, the commonly used spherical gradient dynamics may fail to recover the true direction, even when the spike is perfectly aligned with the target direction. Next, \nwe show that appropriate weight normalization that is reminiscent of batch normalization can alleviate this issue. Further, by exploiting the alignment between the (spiked) input covariance and the target, we obtain improved sample complexity compared to the isotropic case. In particular, under the spiked model with a suitably large spike, the sample complexity of gradient-based training can be made independent of the information exponent while also outperforming lower bounds for \nrotationally invariant kernel methods.", "keywords": "feature learning;neural networks;single-index model;gradient descent", "primary_area": "", "supplementary_material": "", "author": "Alireza Mousavi-Hosseini;Denny Wu;Taiji Suzuki;Murat A Erdogdu", "authorids": "~Alireza_Mousavi-Hosseini1;~Denny_Wu2;~Taiji_Suzuki1;~Murat_A_Erdogdu1", "gender": "M;M;M;M", "homepage": "https://dennywu1.github.io/;http://ibis.t.u-tokyo.ac.jp/suzuki/;http://www.cs.toronto.edu/~erdogdu/;https://www.cs.toronto.edu/~mousavi/", "dblp": ";08/312;139/1292;296/4041", "google_scholar": "https://scholar.google.com/citations?hl=en;x8osrBsAAAAJ;Lqc4cdAAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Denny_Wu2;~Taiji_Suzuki1;~Murat_A_Erdogdu1;~Alireza_Mousavi1", "aff": "University of Toronto;The University of Tokyo;Vector Institute;Department of Computer Science, University of Toronto", "aff_domain": "toronto.edu;tokyo.ac.jp;vectorinstitute.ai;cs.toronto.edu", "position": "PhD student;Associate Professor;Faculty;PhD student", "bibtex": "@inproceedings{\nmousavi-hosseini2023gradientbased,\ntitle={Gradient-Based Feature Learning under Structured Data},\nauthor={Alireza Mousavi-Hosseini and Denny Wu and Taiji Suzuki and Murat A Erdogdu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Fe8PxP2F2p}\n}", "github": "", "project": "", "reviewers": "XbBV;GGcV;oCpw", "pdf_size": 1134632, "rating": "5;6;7", "confidence": "3;2;3", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "3;2;3", "wc_summary": "126;92;52", "wc_strengths": "281;52;41", "wc_weaknesses": "168;13;165", "wc_questions": "37;236;1", "wc_limitations": "2;12;1", "wc_review": "614;405;260", "wc_reply_reviewers": "43;30;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 90.0, 30.243456592570013 ], "wc_strengths_avg": [ 124.66666666666667, 110.63553779063136 ], "wc_weaknesses_avg": [ 115.33333333333333, 72.37095795659717 ], "wc_questions_avg": [ 91.33333333333333, 103.34516061346183 ], "wc_limitations_avg": [ 5.0, 4.96655480858378 ], "wc_review_avg": [ 426.3333333333333, 145.30504311810915 ], "wc_reply_reviewers_avg": [ 24.333333333333332, 18.00617178142601 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8719857737066905467&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 8, "email": "toronto.edu;tokyo.ac.jp;vectorinstitute.ai;cs.toronto.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Toronto;University of Tokyo;Vector Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utoronto.ca;https://www.u-tokyo.ac.jp;https://vectorinstitute.ai/", "aff_unique_abbr": "U of T;UTokyo;Vector Institute", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Canada;Japan" }, { "title": "A Metadata-Driven Approach to Understand Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72250", "id": "FgakGFpll1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/31994923f58ae5b2d661b300bd439107-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FgakGFpll1", "openreview": "https://openreview.net/forum?id=FgakGFpll1", "poster": "/media/PosterPDFs/NeurIPS%202023/72250.png?t=1699610303.670694", "slides": "https://nips.cc/virtual/2023/poster/72250", "video": "https://nips.cc/virtual/2023/poster/72250", "author_site": "Ting Wei Li, Qiaozhu Mei, Jiaqi Ma", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have achieved remarkable success in various applications, but their performance can be sensitive to specific data properties of the graph datasets they operate on. Current literature on understanding the limitations of GNNs has primarily employed a \\emph{model-driven} approach that leverage heuristics and domain knowledge from network science or graph theory to model the GNN behaviors, which is time-consuming and highly subjective. In this work, we propose a \\emph{metadata-driven} approach to analyze the sensitivity of GNNs to graph data properties, motivated by the increasing availability of graph learning benchmarks. We perform a multivariate sparse regression analysis on the metadata derived from benchmarking GNN performance across diverse datasets, yielding a set of salient data properties. To validate the effectiveness of our data-driven approach, we focus on one identified data property, the degree distribution, and investigate how this property influences GNN performance through theoretical analysis and controlled experiments. Our theoretical findings reveal that datasets with more balanced degree distribution exhibit better linear separability of node representations, thus leading to better GNN performance. We also conduct controlled experiments using synthetic datasets with varying degree distributions, and the results align well with our theoretical findings. Collectively, both the theoretical analysis and controlled experiments verify that the proposed metadata-driven approach is effective in identifying critical data properties for GNNs.", "keywords": "Graph Neural Networks;Metadata-Driven Analysis;Gini Coefficient of Degree Distribution", "primary_area": "", "supplementary_material": "/attachment/947be045ee84d004f8b901e0fb42999d1e82fefa.zip", "author": "Ting Wei Li;Qiaozhu Mei;Jiaqi Ma", "authorids": "~Ting_Wei_Li1;~Qiaozhu_Mei1;~Jiaqi_Ma1", "gender": "M;M;", "homepage": "https://tingwl0122.github.io/;http://www-personal.umich.edu/~qmei/;https://jiaqima.github.io", "dblp": "336/2460;30/5059;155/2199-1", "google_scholar": "ema4jhsAAAAJ;https://scholar.google.com.tw/citations?user=zr22WkQAAAAJ;Z9X2A1MAAAAJ", "orcid": ";0000-0002-8640-1942;0000-0001-8292-5901", "linkedin": "ting-wei-li-3940ab241/;;", "or_profile": "~Ting_Wei_Li1;~Qiaozhu_Mei1;~Jiaqi_Ma1", "aff": "University of Michigan - Ann Arbor;Google;Harvard University", "aff_domain": "umich.edu;google.com;harvard.edu", "position": "MS student;Researcher;Postdoc", "bibtex": "@inproceedings{\nli2023a,\ntitle={A Metadata-Driven Approach to Understand Graph Neural Networks},\nauthor={Ting Wei Li and Qiaozhu Mei and Jiaqi Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FgakGFpll1}\n}", "github": "", "project": "", "reviewers": "aoxj;FJsn;f4if;zbzi", "pdf_size": 379112, "rating": "4;5;5;7", "confidence": "4;5;3;3", "soundness": "2;4;2;4", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "116;91;82;109", "wc_strengths": "17;34;40;152", "wc_weaknesses": "125;57;225;81", "wc_questions": "3;73;20;31", "wc_limitations": "1;1;9;1", "wc_review": "262;256;376;374", "wc_reply_reviewers": "60;20;34;0", "wc_reply_authors": "469;15;11;11", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 99.5, 13.6106575888162 ], "wc_strengths_avg": [ 60.75, 53.35435783513845 ], "wc_weaknesses_avg": [ 122.0, 64.27285585688566 ], "wc_questions_avg": [ 31.75, 25.82029240732955 ], "wc_limitations_avg": [ 3.0, 3.4641016151377544 ], "wc_review_avg": [ 317.0, 58.0430874437258 ], "wc_reply_reviewers_avg": [ 28.5, 21.834605560898048 ], "wc_reply_authors_avg": [ 126.5, 197.74920985935697 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.48420012470625223, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12298052366829999645&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "umich.edu;google.com;harvard.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Michigan;Google;Harvard University", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.umich.edu;https://www.google.com;https://www.harvard.edu", "aff_unique_abbr": "UM;Google;Harvard", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Ann Arbor;Mountain View;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Unified Approach to Domain Incremental Learning with Memory: Theory and Algorithm", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72249", "id": "FiClXlUqA7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/30d046e94d7b8037d6ef27c4357a8dd4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FiClXlUqA7", "openreview": "https://openreview.net/forum?id=FiClXlUqA7", "poster": "/media/PosterPDFs/NeurIPS%202023/72249.png?t=1702323743.5515802", "slides": "https://nips.cc/virtual/2023/poster/72249", "video": "https://nips.cc/virtual/2023/poster/72249", "author_site": "Haizhou Shi, Hao Wang", "tldr": "", "abstract": "Domain incremental learning aims to adapt to a sequence of domains with access to only a small subset of data (i.e., memory) from previous domains. Various methods have been proposed for this problem, but it is still unclear how they are related and when practitioners should choose one method over another. In response, we propose a unified framework, dubbed Unified Domain Incremental Learning (UDIL), for domain incremental learning with memory. Our UDIL **unifies** various existing methods, and our theoretical analysis shows that UDIL always achieves a tighter generalization error bound compared to these methods. The key insight is that different existing methods correspond to our bound with different **fixed** coefficients; based on insights from this unification, our UDIL allows **adaptive** coefficients during training, thereby always achieving the tightest bound. Empirical results show that our UDIL outperforms the state-of-the-art domain incremental learning methods on both synthetic and real-world datasets. Code will be available at https://github.com/Wang-ML-Lab/unified-continual-learning.", "keywords": "Domain Incremental Learning;Continual Learning;Theory", "primary_area": "", "supplementary_material": "/attachment/002309813f1a2a74d7c2c36b919e68b58b0e4f41.pdf", "author": "Haizhou Shi;Hao Wang", "authorids": "~Haizhou_Shi1;~Hao_Wang3", "gender": "M;M", "homepage": "https://haizhou-shi.github.io;http://www.wanghao.in", "dblp": "245/0213;w/HaoWang-14", "google_scholar": "JKwP43sAAAAJ;NrOA9QoAAAAJ", "orcid": "0000-0002-8431-3703;", "linkedin": "haizhou-shi-229206180/;", "or_profile": "~Haizhou_Shi1;~Hao_Wang4", "aff": "Rutgers University, New Brunswick;Rutgers University", "aff_domain": "rutgers.edu;cs.rutgers.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nshi2023a,\ntitle={A Unified Approach to Domain Incremental Learning with Memory: Theory and Algorithm},\nauthor={Haizhou Shi and Hao Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FiClXlUqA7}\n}", "github": "", "project": "", "reviewers": "jN5u;Ps1S;4VUe", "pdf_size": 12999917, "rating": "5;6;7", "confidence": "4;3;3", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "3;3;4", "wc_summary": "111;81;183", "wc_strengths": "49;76;139", "wc_weaknesses": "2;73;83", "wc_questions": "277;107;8", "wc_limitations": "8;5;23", "wc_review": "447;342;436", "wc_reply_reviewers": "0;10;44", "wc_reply_authors": "0;0;24", "reply_reviewers": "0;1;1", "reply_authors": "1;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 125.0, 42.80186911806539 ], "wc_strengths_avg": [ 88.0, 37.70941526992961 ], "wc_weaknesses_avg": [ 52.666666666666664, 36.05859429071275 ], "wc_questions_avg": [ 130.66666666666666, 111.08655284156686 ], "wc_limitations_avg": [ 12.0, 7.874007874011811 ], "wc_review_avg": [ 408.3333333333333, 47.11923410054775 ], "wc_reply_reviewers_avg": [ 18.0, 18.83259585576738 ], "wc_reply_authors_avg": [ 8.0, 11.313708498984761 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14624351402779893732&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "rutgers.edu;cs.rutgers.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Rutgers University", "aff_unique_dep": "", "aff_unique_url": "https://www.rutgers.edu", "aff_unique_abbr": "Rutgers", "aff_campus_unique_index": "0", "aff_campus_unique": "New Brunswick;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Direct Preference-based Policy Optimization without Reward Modeling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72248", "id": "FkAwlqBuyO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/de8bd6b2b01cfa788e63f62e5b9a99b9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FkAwlqBuyO", "openreview": "https://openreview.net/forum?id=FkAwlqBuyO", "poster": "/media/PosterPDFs/NeurIPS%202023/72248.png?t=1701152597.160332", "slides": "https://nips.cc/virtual/2023/poster/72248", "video": "https://nips.cc/virtual/2023/poster/72248", "author_site": "Gaon An, Junhyeok Lee, Xingdong Zuo, Norio Kosaka, Kyung-Min Kim, Hyun Oh Song", "tldr": "", "abstract": "Preference-based reinforcement learning (PbRL) is an approach that enables RL agents to learn from preference, which is particularly useful when formulating a reward function is challenging. Existing PbRL methods generally involve a two-step procedure: they first learn a reward model based on given preference data and then employ off-the-shelf reinforcement learning algorithms using the learned reward model. However, obtaining an accurate reward model solely from preference information, especially when the preference is from human teachers, can be difficult. Instead, we propose a PbRL algorithm that directly learns from preference without requiring any reward modeling. To achieve this, we adopt a contrastive learning framework to design a novel policy scoring metric that assigns a high score to policies that align with the given preferences. We apply our algorithm to offline RL tasks with actual human preference labels and show that our algorithm outperforms or is on par with the existing PbRL methods. Notably, on high-dimensional control tasks, our algorithm surpasses offline RL methods that learn with ground-truth reward information. Finally, we show that our algorithm can be successfully applied to fine-tune large language models.", "keywords": "Preference-based reinforcement learning;Contrastive learning;Offline reinforcement learning;RLHF", "primary_area": "", "supplementary_material": "", "author": "Gaon An;Junhyeok Lee;Xingdong Zuo;Norio Kosaka;Kyung-Min Kim;Hyun Oh Song", "authorids": "~Gaon_An1;~Junhyeok_Lee1;~Xingdong_Zuo1;~Norio_Kosaka1;~Kyung-Min_Kim1;~Hyun_Oh_Song1", "gender": ";M;M;M;M;M", "homepage": ";http://mllab.snu.ac.kr/people.html;;https://rowing0914.github.io/;;https://mllab.snu.ac.kr/hyunoh", "dblp": "241/6191;228/6764;338/9844;;85/8572;05/10781", "google_scholar": ";https://scholar.google.com/citations?hl=en;Fn3Jm7wAAAAJ;dIpkfPAAAAAJ;https://scholar.google.com/citations?hl=en;ScoZZPsAAAAJ", "orcid": ";;;;0000-0003-2426-2198;", "linkedin": ";;;norio-kosaka-b73701117/;;hyun-oh-song-5a39b03", "or_profile": "~Gaon_An1;~Junhyeok_Lee1;~Xingdong_Zuo1;~Norio_Kosaka1;~Kyung-Min_Kim1;~Hyun_Oh_Song1", "aff": ";Seoul National University;NAVER;Birkbeck College, University of London;NAVER;Seoul National University", "aff_domain": ";mllab.snu.ac.kr;navercorp.com;bbk.ac.uk;navercorp.com;snu.ac.kr", "position": ";PhD student;Researcher;MS student;Leader;Associate Professor", "bibtex": "@inproceedings{\nan2023direct,\ntitle={Direct Preference-based Policy Optimization without Reward Modeling},\nauthor={Gaon An and Junhyeok Lee and Xingdong Zuo and Norio Kosaka and Kyung-Min Kim and Hyun Oh Song},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FkAwlqBuyO}\n}", "github": "", "project": "", "reviewers": "MTAP;93eM;E68m;CbsJ", "pdf_size": 2604427, "rating": "5;5;7;7", "confidence": "4;5;5;3", "soundness": "3;3;2;2", "novelty": "2;3;2;3", "presentation": "2;3;3;4", "wc_summary": "162;103;58;212", "wc_strengths": "52;154;26;165", "wc_weaknesses": "240;747;230;325", "wc_questions": "103;163;118;95", "wc_limitations": "35;33;20;65", "wc_review": "592;1200;452;862", "wc_reply_reviewers": "93;171;15;431", "wc_reply_authors": "217;352;284;335", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 133.75, 58.31970078798416 ], "wc_strengths_avg": [ 99.25, 61.07116750153054 ], "wc_weaknesses_avg": [ 385.5, 211.95105567087887 ], "wc_questions_avg": [ 119.75, 26.299952471439944 ], "wc_limitations_avg": [ 38.25, 16.48294573187693 ], "wc_review_avg": [ 776.5, 285.48336203708965 ], "wc_reply_reviewers_avg": [ 177.5, 156.40572240170755 ], "wc_reply_authors_avg": [ 297.0, 52.53094326204318 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3640108069531828703&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": ";mllab.snu.ac.kr;navercorp.com;bbk.ac.uk;navercorp.com;snu.ac.kr", "author_num": 6, "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "Seoul National University;NAVER Corporation;University of London", "aff_unique_dep": ";;Birkbeck College", "aff_unique_url": "https://www.snu.ac.kr;https://www.naver.com;https://www.bbk.ac.uk", "aff_unique_abbr": "SNU;NAVER;Birkbeck", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "South Korea;United Kingdom" }, { "title": "Faith and Fate: Limits of Transformers on Compositionality", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72247", "id": "Fkckkr3ya8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/deb3c28192f979302c157cb653c15e90-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Fkckkr3ya8", "openreview": "https://openreview.net/forum?id=Fkckkr3ya8", "poster": "/media/PosterPDFs/NeurIPS%202023/72247.png?t=1701836214.78365", "slides": "https://nips.cc/virtual/2023/poster/72247", "video": "https://nips.cc/virtual/2023/poster/72247", "author_site": "Nouha Dziri, Ximing Lu, Melanie Sclar, Xiang (Lorraine) Li, Liwei Jiang, Bill Yuchen Lin, Sean Welleck, Sean Welleck, Peter West, Chandra Bhagavatula, Ronan Le Bras, Jena Hwang, Soumya Sanyal, Xiang Ren, Allyson Ettinger, Zaid Harchaoui, Yejin Choi", "tldr": "", "abstract": "Transformer large language models (LLMs) have sparked admiration for their exceptional performance on tasks that demand intricate multi-step reasoning. Yet, these models simultaneously show failures on surprisingly trivial problems. \nThis begs the question: Are these errors incidental, or do they signal more substantial limitations?\nIn an attempt to demystify transformer LLMs, we investigate the limits of these models across three representative compositional tasks---multi-digit multiplication, logic grid puzzles, and a classic dynamic programming problem. These tasks require breaking problems down into sub-steps and synthesizing these steps into a precise answer. We formulate compositional tasks as computation graphs to systematically quantify the level of complexity, and break down reasoning steps into intermediate sub-procedures. \nOur empirical findings suggest that transformer LLMs solve compositional tasks by reducing multi-step compositional reasoning into linearized subgraph matching, without necessarily developing systematic problem-solving skills. To round off our empirical study, we provide theoretical arguments on abstract multi-step reasoning problems that highlight how autoregressive generations' performance can rapidly decay with increased task complexity.", "keywords": "Natural language processing;large language models;multi-step reasoning", "primary_area": "", "supplementary_material": "/attachment/dc54a152fe3fce152608267d97af8b7175d9d213.zip", "author": "Nouha Dziri;Ximing Lu;Melanie Sclar;Xiang Lorraine Li;Liwei Jiang;Bill Yuchen Lin;Sean Welleck;Peter West;Chandra Bhagavatula;Ronan Le Bras;Jena D. Hwang;Soumya Sanyal;Xiang Ren;Allyson Ettinger;Zaid Harchaoui;Yejin Choi", "authorids": "~Nouha_Dziri2;~Ximing_Lu1;~Melanie_Sclar1;~Xiang_Lorraine_Li1;~Liwei_Jiang2;~Bill_Yuchen_Lin1;~Sean_Welleck1;~Peter_West1;~Chandra_Bhagavatula1;~Ronan_Le_Bras1;~Jena_D._Hwang1;~Soumya_Sanyal1;~Xiang_Ren1;~Allyson_Ettinger1;~Zaid_Harchaoui1;~Yejin_Choi1", "gender": ";F;F;;F;M;;M;M;M;F;M;M;F;;F", "homepage": ";https://gloriaximinglu.github.io/;https://msclar.github.io;;https://liweijiang.me;http://yuchenlin.xyz/;;https://peterwestai.notion.site/;https://www.chandrab.page;https://rlebras.github.io/index.html;https://jenahwang.github.io/;https://soumyasanyal.github.io/;https://shanzhenren.github.io/;https://aetting.github.io;;https://yejinc.github.io/", "dblp": ";24/10879;274/6796;;;190/4518;;179/4587;151/3093;;83/10905;86/1950-1;36/360-1;165/0758;;89/579-1", "google_scholar": ";https://scholar.google.com/citations?hl=en;4uNPtZgAAAAJ;;lcPsDgUAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.ca/citations?user=9ubCBYwAAAAJ;AsgHp14AAAAJ;8dXLDSsAAAAJ;9QuMhLgAAAAJ;https://scholar.google.co.in/citations?user=KvaizyQAAAAJ;_moJlrIAAAAJ;;;vhP-tlcAAAAJ", "orcid": ";;;;;;;;;;;;;;;", "linkedin": ";;melanie-sclar-077047b5/;;;;;;;;;soumyasanyal/;xren7;;;", "or_profile": "~Nouha_Dziri2;~Ximing_Lu1;~Melanie_Sclar1;~Xiang_Lorraine_Li1;~Liwei_Jiang2;~Bill_Yuchen_Lin1;~Sean_Welleck1;~Peter_West1;~Chandra_Bhagavatula1;~Ronan_Le_Bras1;~Jena_D._Hwang1;~Soumya_Sanyal1;~Xiang_Ren1;~Allyson_Ettinger1;~Zaid_Harchaoui1;~Yejin_Choi1", "aff": ";University of Washington;University of Washington, Seattle;;University of Washington;Allen Institute for Artificial Intelligence;;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;University of Southern California;University of Southern California;University of Chicago;;Department of Computer Science, University of Washington", "aff_domain": ";cs.washington.edu;uw.edu;;washington.edu;allenai.org;;allenai.org;allenai.org;allenai.org;allenai.org;usc.edu;usc.edu;uchicago.edu;;cs.washington.edu", "position": ";Undergrad student;PhD student;;PhD student;Researcher;;Intern;Researcher;Researcher;Researcher;PhD student;Associate Professor;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\ndziri2023faith,\ntitle={Faith and Fate: Limits of Transformers on Compositionality},\nauthor={Nouha Dziri and Ximing Lu and Melanie Sclar and Xiang Lorraine Li and Liwei Jiang and Bill Yuchen Lin and Sean Welleck and Peter West and Chandra Bhagavatula and Ronan Le Bras and Jena D. Hwang and Soumya Sanyal and Xiang Ren and Allyson Ettinger and Zaid Harchaoui and Yejin Choi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Fkckkr3ya8}\n}", "github": "", "project": "", "reviewers": "fn75;jnqc;cZLE;euRW", "pdf_size": 6331442, "rating": "6;7;7;7", "confidence": "3;4;4;5", "soundness": "3;4;3;4", "novelty": "3;4;3;3", "presentation": "4;4;4;3", "wc_summary": "63;240;124;576", "wc_strengths": "19;158;84;39", "wc_weaknesses": "10;214;286;84", "wc_questions": "37;111;50;171", "wc_limitations": "19;16;21;44", "wc_review": "148;739;565;914", "wc_reply_reviewers": "0;26;102;0", "wc_reply_authors": "0;31;34;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 250.75, 198.25409831829455 ], "wc_strengths_avg": [ 75.0, 53.39007398384086 ], "wc_weaknesses_avg": [ 148.5, 107.86449833008078 ], "wc_questions_avg": [ 92.25, 53.36372831802516 ], "wc_limitations_avg": [ 25.0, 11.113055385446435 ], "wc_review_avg": [ 591.5, 284.23449825804045 ], "wc_reply_reviewers_avg": [ 32.0, 41.78516483155236 ], "wc_reply_authors_avg": [ 16.25, 16.284578594486256 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 379, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5824906986600000884&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": ";cs.washington.edu;uw.edu;;washington.edu;allenai.org;;allenai.org;allenai.org;allenai.org;allenai.org;usc.edu;usc.edu;uchicago.edu;;cs.washington.edu", "author_num": 16, "aff_unique_index": "0;0;0;1;1;1;1;1;2;2;3;0", "aff_unique_norm": "University of Washington;Allen Institute for Artificial Intelligence;University of Southern California;University of Chicago", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.washington.edu;https://allenai.org;https://www.usc.edu;https://www.uchicago.edu", "aff_unique_abbr": "UW;AI2;USC;UChicago", "aff_campus_unique_index": "1;2;2;1", "aff_campus_unique": ";Seattle;Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Entropy-dissipation Informed Neural Network for McKean-Vlasov Type PDEs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72246", "id": "FkpMm9avyP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b9a17133e3943509243b5e197c1c23b2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FkpMm9avyP", "openreview": "https://openreview.net/forum?id=FkpMm9avyP", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72246", "video": "https://nips.cc/virtual/2023/poster/72246", "author_site": "Zebang Shen, Zhenfu Wang", "tldr": "", "abstract": "The McKean-Vlasov equation (MVE) describes the collective behavior of particles subject to drift, diffusion, and mean-field interaction. In physical systems, the interaction term can be singular, i.e. it diverges when two particles collide. Notable examples of such interactions include the Coulomb interaction, fundamental in plasma physics, and the Biot-Savart interaction, present in the vorticity formulation of the 2D Navier-Stokes equation (NSE) in fluid dynamics. Solving MVEs that involve singular interaction kernels presents a significant challenge, especially when aiming to provide rigorous theoretical guarantees. In this work, we propose a novel approach based on the concept of entropy dissipation in the underlying system. We derive a potential function that effectively controls the KL divergence between a hypothesis solution and the ground truth. Building upon this theoretical foundation, we introduce the Entropy-dissipation Informed Neural Network (EINN) framework for solving MVEs. In EINN, we utilize neural networks (NN) to approximate the underlying velocity field and minimize the proposed potential function. By leveraging the expressive power of NNs, our approach offers a promising avenue for tackling the complexities associated with singular interactions. To assess the empirical performance of our method, we compare EINN with SOTA NN-based MVE solvers. The results demonstrate the effectiveness of our approach in solving MVEs across various example problems.", "keywords": "Entropy-dissipation;McKean-Vlasov;Navier-Stokes;PDE;Coulomb;singular interaction", "primary_area": "", "supplementary_material": "/attachment/e93ea60c484c5ecaa11be3f35f0d0f979c4aedf2.pdf", "author": "Zebang Shen;Zhenfu Wang", "authorids": "~Zebang_Shen1;~Zhenfu_Wang1", "gender": "M;M", "homepage": ";http://bicmr.pku.edu.cn/~zhenfuwang/", "dblp": "165/3377;", "google_scholar": "klqzFvgAAAAJ;uvpk5m4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Zebang_Shen1;~Zhenfu_Wang1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;Peking University", "aff_domain": "inf.ethz.ch;bicmr.pku.edu.cn", "position": "Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nshen2023entropydissipation,\ntitle={Entropy-dissipation Informed Neural Network for McKean-Vlasov Type {PDE}s},\nauthor={Zebang Shen and Zhenfu Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FkpMm9avyP}\n}", "github": "", "project": "", "reviewers": "Jhg5;HFPv;A2vP;AJKM", "pdf_size": 1237985, "rating": "5;6;7;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "61;132;105;69", "wc_strengths": "53;88;38;67", "wc_weaknesses": "122;122;100;50", "wc_questions": "70;209;75;68", "wc_limitations": "7;22;37;1", "wc_review": "313;573;355;255", "wc_reply_reviewers": "416;14;35;29", "wc_reply_authors": "1032;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 91.75, 28.542731123702932 ], "wc_strengths_avg": [ 61.5, 18.418740456393863 ], "wc_weaknesses_avg": [ 98.5, 29.406631905065225 ], "wc_questions_avg": [ 105.5, 59.81011620119125 ], "wc_limitations_avg": [ 16.75, 13.970952007647869 ], "wc_review_avg": [ 374.0, 120.2538980657176 ], "wc_reply_reviewers_avg": [ 123.5, 169.0480700865881 ], "wc_reply_authors_avg": [ 258.0, 446.86910835277035 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2016066350940022450&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "inf.ethz.ch;bicmr.pku.edu.cn", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;Peking University", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.ethz.ch;http://www.pku.edu.cn", "aff_unique_abbr": "ETHZ;Peking U", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;1", "aff_country_unique": "Switzerland;China" }, { "title": "Robust Multi-Agent Reinforcement Learning via Adversarial Regularization: Theoretical Foundation and Stable Algorithms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72245", "id": "FmZVRe0gn8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d6f8517fceeca1e2cd61721dff786c14-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FmZVRe0gn8", "openreview": "https://openreview.net/forum?id=FmZVRe0gn8", "poster": "/media/PosterPDFs/NeurIPS%202023/72245.png?t=1702179218.2539797", "slides": "https://nips.cc/virtual/2023/poster/72245", "video": "https://nips.cc/virtual/2023/poster/72245", "author_site": "Alexander Bukharin, Yan Li, Yue Yu, Qingru Zhang, Zhehui Chen, Simiao Zuo, Chao Zhang, Songan Zhang, Tuo Zhao", "tldr": "", "abstract": "Multi-Agent Reinforcement Learning (MARL) has shown promising results across several domains. Despite this promise, MARL policies often lack robustness and are therefore sensitive to small changes in their environment. This presents a serious concern for the real world deployment of MARL algorithms, where the testing environment may slightly differ from the training environment. In this work we show that we can gain robustness by controlling a policy\u2019s Lipschitz constant, and under mild conditions, establish the existence of a Lipschitz and close-to-optimal policy. Motivated by these insights, we propose a new robust MARL framework, ERNIE, that promotes the Lipschitz continuity of the policies with respect to the state observations and actions by adversarial regularization. The ERNIE framework provides robustness against noisy observations, changing transition dynamics, and malicious actions of agents. However, ERNIE\u2019s adversarial regularization may introduce some training instability. To reduce this instability, we reformulate adversarial regularization as a Stackelberg game. We demonstrate the effectiveness of the proposed framework with extensive experiments in traffic light control and particle environments. In addition, we extend ERNIE to mean-field MARL with a formulation based on distributionally robust optimization that outperforms its non-robust counterpart and is of independent interest. Our code is available at https://github.com/abukharin3/ERNIE.", "keywords": "Multi-Agent Reinforcement Learning;Theory of Robust Reinforcement Learning;Adversarial Regularization", "primary_area": "", "supplementary_material": "/attachment/87b6d0d9da0522834b5ad27f83a19969eaa23fb0.zip", "author": "Alexander Bukharin;Yan Li;Yue Yu;Qingru Zhang;Zhehui Chen;Simiao Zuo;Chao Zhang;Songan Zhang;Tuo Zhao", "authorids": "~Alexander_Bukharin1;~Yan_Li9;~Yue_Yu2;~Qingru_Zhang2;~Zhehui_Chen1;~Simiao_Zuo1;~Chao_Zhang15;~Songan_Zhang1;~Tuo_Zhao1", "gender": "M;M;M;M;M;;;;M", "homepage": "https://abukharin3.github.io;https://gzliyan113.github.io/;https://yueyu1030.github.io;https://qingruzhang.github.io/;https://sites.google.com/view/zhehuichen/home;;http://chaozhang.org/;;http://www2.isye.gatech.edu/~tzhao80", "dblp": "294/6372;;;228/6749;195/6300;232/2089;94/3019-14;;", "google_scholar": ";wLfoeakAAAAJ;zQ3Jh6UAAAAJ;7YM-faYAAAAJ;2lvIrNAAAAAJ;J8TSTXMAAAAJ;https://scholar.google.com/citations?hl=en;YnSPFY8AAAAJ;EJXN6tYAAAAJ", "orcid": ";;0000-0002-3683-5208;;;;0000-0003-3009-598X;;", "linkedin": ";;;qingru-zhang-4b789a187;zhehui-chen-366551105/;;;;", "or_profile": "~Alexander_Bukharin1;~Yan_Li9;~Yue_Yu2;~Qingru_Zhang2;~Zhehui_Chen1;~Simiao_Zuo1;~Chao_Zhang15;~Songan_Zhang1;~Tuo_Zhao1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Google;Georgia Institute of Technology;Didi Research;Georgia Institute of Technology;Georgia Institute of Technology;Ford Motor Company;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;google.com;gatech.edu;didichuxing.com;gatech.edu;gatech.edu;ford.com;gatech.edu", "position": "PhD student;PhD student;Research Intern;PhD student;Applied Scientist;PhD student;Assistant Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nbukharin2023robust,\ntitle={Robust Multi-Agent Reinforcement Learning via Adversarial Regularization: Theoretical Foundation and Stable Algorithms},\nauthor={Alexander Bukharin and Yan Li and Yue Yu and Qingru Zhang and Zhehui Chen and Simiao Zuo and Chao Zhang and Songan Zhang and Tuo Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FmZVRe0gn8}\n}", "github": "", "project": "", "reviewers": "fpJY;NyS9;YAu4;J2jx;mUHD", "pdf_size": 5615179, "rating": "5;5;6;6;6", "confidence": "4;3;2;3;4", "soundness": "3;2;3;3;3", "novelty": "2;3;3;2;3", "presentation": "2;2;3;3;3", "wc_summary": "260;52;60;66;141", "wc_strengths": "222;9;35;29;93", "wc_weaknesses": "576;143;52;2;123", "wc_questions": "38;125;47;189;62", "wc_limitations": "1;17;7;296;1", "wc_review": "1097;346;201;582;420", "wc_reply_reviewers": "49;56;0;38;27", "wc_reply_authors": "99;28;0;159;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "3;2;1;3;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 115.8, 78.85784678774839 ], "wc_strengths_avg": [ 77.6, 77.42247735638533 ], "wc_weaknesses_avg": [ 179.2, 204.69430866538522 ], "wc_questions_avg": [ 92.2, 57.164324538998976 ], "wc_limitations_avg": [ 64.4, 115.94757436013916 ], "wc_review_avg": [ 529.2, 309.31757143751145 ], "wc_reply_reviewers_avg": [ 34.0, 19.6468827043885 ], "wc_reply_authors_avg": [ 57.2, 62.46086774933566 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.0, 0.8944271909999159 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.32732683535398854, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12033749916241817467&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "gatech.edu;gatech.edu;google.com;gatech.edu;didichuxing.com;gatech.edu;gatech.edu;ford.com;gatech.edu", "author_num": 9, "aff_unique_index": "0;0;1;0;2;0;0;3;0", "aff_unique_norm": "Georgia Institute of Technology;Google;Didi Research;Ford Motor Company", "aff_unique_dep": ";Google;;", "aff_unique_url": "https://www.gatech.edu;https://www.google.com;https://www.didi.com;https://www.ford.com", "aff_unique_abbr": "Georgia Tech;Google;Didi;Ford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "NU-MCC: Multiview Compressive Coding with Neighborhood Decoder and Repulsive UDF", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72244", "id": "FmpH0CYWiX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c6f1e44be16e87887b7b894d59ba7f29-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FmpH0CYWiX", "openreview": "https://openreview.net/forum?id=FmpH0CYWiX", "poster": "/media/PosterPDFs/NeurIPS%202023/72244.png?t=1701063991.5589147", "slides": "https://nips.cc/virtual/2023/poster/72244", "video": "https://nips.cc/virtual/2023/poster/72244", "author_site": "Stefan Lionar, Xiangyu Xu, Min Lin, Gim Hee Lee", "tldr": "", "abstract": "Remarkable progress has been made in 3D reconstruction from single-view RGB-D inputs. MCC is the current state-of-the-art method in this field, which achieves unprecedented success by combining vision Transformers with large-scale training. However, we identified two key limitations of MCC: 1) The Transformer decoder is inefficient in handling large number of query points; 2) The 3D representation struggles to recover high-fidelity details. In this paper, we propose a new approach called NU-MCC that addresses these limitations. NU-MCC includes two key innovations: a Neighborhood decoder and a Repulsive Unsigned Distance Function (Repulsive UDF). First, our Neighborhood decoder introduces center points as an efficient proxy of input visual features, allowing each query point to only attend to a small neighborhood. This design not only results in much faster inference speed but also enables the exploitation of finer-scale visual features for improved recovery of 3D textures. Second, our Repulsive UDF is a novel alternative to the occupancy field used in MCC, significantly improving the quality of 3D object reconstruction. Compared to standard UDFs that suffer from holes in results, our proposed Repulsive UDF can achieve more complete surface reconstruction. Experimental results demonstrate that NU-MCC is able to learn a strong 3D representation, significantly advancing the state of the art in single-view 3D reconstruction. Particularly, it outperforms MCC by 9.7% in terms of the F1-score on the CO3D-v2 dataset with more than 5x faster running speed.", "keywords": "single-view 3d reconstruction;neural fields;3d reconstruction", "primary_area": "", "supplementary_material": "/attachment/a86e4ba4d7979fb741e74b9f95fb85afbc64460e.zip", "author": "Stefan Lionar;Xiangyu Xu;Min Lin;Gim Hee Lee", "authorids": "~Stefan_Lionar1;~Xiangyu_Xu3;~Min_Lin1;~Gim_Hee_Lee1", "gender": ";M;M;", "homepage": ";https://xuxy09.github.io/;https://linmin.me;https://www.comp.nus.edu.sg/~leegh/", "dblp": "278/2750.html;172/1282-2.html;;49/9455", "google_scholar": "w6RfcvMAAAAJ;Ec5Biz4AAAAJ;BGONmkIAAAAJ;https://scholar.google.com.sg/citations?user=7hNKrPsAAAAJ", "orcid": ";;;0000-0002-1583-0475", "linkedin": ";;min-lin-08a3a422/;", "or_profile": "~Stefan_Lionar1;~Xiangyu_Xu3;~Min_Lin1;~Gim_Hee_Lee1", "aff": "Sea AI Lab / Garena;Sea AI Lab;Sea AI Lab;National University of Singapore", "aff_domain": "sea.com;sea.com;sea.com;nus.edu.sg", "position": "PhD student;Researcher;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nlionar2023numcc,\ntitle={{NU}-{MCC}: Multiview Compressive Coding with Neighborhood Decoder and Repulsive {UDF}},\nauthor={Stefan Lionar and Xiangyu Xu and Min Lin and Gim Hee Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FmpH0CYWiX}\n}", "github": "", "project": "", "reviewers": "AYkM;iFNG;97oK;yY6d;WpNt", "pdf_size": 8550264, "rating": "4;6;6;6;7", "confidence": "5;4;4;4;4", "soundness": "2;3;3;3;3", "novelty": "3;4;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "145;92;75;45;112", "wc_strengths": "34;146;71;26;211", "wc_weaknesses": "342;242;99;170;320", "wc_questions": "22;151;30;4;49", "wc_limitations": "9;1;1;1;1", "wc_review": "552;632;276;246;693", "wc_reply_reviewers": "27;59;2;0;19", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.8, 33.73662698018283 ], "wc_strengths_avg": [ 97.6, 70.82541916572043 ], "wc_weaknesses_avg": [ 234.6, 91.04196834427516 ], "wc_questions_avg": [ 51.2, 51.95151585853872 ], "wc_limitations_avg": [ 2.6, 3.2000000000000006 ], "wc_review_avg": [ 479.8, 184.4065074773664 ], "wc_reply_reviewers_avg": [ 21.4, 21.378493866500513 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9185586535436918, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7540818550948502920&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "sea.com;sea.com;sea.com;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Sea AI Lab;National University of Singapore", "aff_unique_dep": "AI Lab;", "aff_unique_url": "https://www.sea-lab.com;https://www.nus.edu.sg", "aff_unique_abbr": "Sea AI;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore;" }, { "title": "3D-IntPhys: Towards More Generalized 3D-grounded Visual Intuitive Physics under Challenging Scenes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72243", "id": "Fp5uC6YHwe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/164687cb815daae754d33364716e65e6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Fp5uC6YHwe", "openreview": "https://openreview.net/forum?id=Fp5uC6YHwe", "poster": "/media/PosterPDFs/NeurIPS%202023/72243.png?t=1699480957.0512521", "slides": "https://nips.cc/virtual/2023/poster/72243", "video": "https://nips.cc/virtual/2023/poster/72243", "author_site": "Haotian Xue, Antonio Torralba, Josh Tenenbaum, Dan Yamins, Yunzhu Li, Hsiao-Yu Tung", "tldr": "", "abstract": "Given a visual scene, humans have strong intuitions about how a scene can evolve over time under given actions. The intuition, often termed visual intuitive physics, is a critical ability that allows us to make effective plans to manipulate the scene to achieve desired outcomes without relying on extensive trial and error. In this paper, we present a framework capable of learning 3D-grounded visual intuitive physics models from videos of complex scenes with fluids. Our method is composed of a conditional Neural Radiance Field (NeRF)-style visual frontend and a 3D point-based dynamics prediction backend, using which we can impose strong relational and structural inductive bias to capture the structure of the underlying environment. Unlike existing intuitive point-based dynamics works that rely on the supervision of dense point trajectory from simulators, we relax the requirements and only assume access to multi-view RGB images and (imperfect) instance masks acquired using color prior. This enables the proposed model to handle scenarios where accurate point estimation and tracking are hard or impossible. We generate datasets including three challenging scenarios involving fluid, granular materials, and rigid objects in the simulation. The datasets do not include any dense particle information so most previous 3D-based intuitive physics pipelines can barely deal with that. We show our model can make long-horizon future predictions by learning from raw images and significantly outperforms models that do not employ an explicit 3D representation space. We also show that once trained, our model can achieve strong generalization in complex scenarios under extrapolate settings.", "keywords": "Intuitive Physics;Computer Vision", "primary_area": "", "supplementary_material": "/attachment/d183adaec59dcb150929a16c875749851b24a450.pdf", "author": "Haotian Xue;Antonio Torralba;Joshua B. Tenenbaum;Daniel LK Yamins;Yunzhu Li;Hsiao-Yu Tung", "authorids": "~Haotian_Xue1;~Antonio_Torralba1;~Joshua_B._Tenenbaum1;~Daniel_LK_Yamins1;~Yunzhu_Li1;~Hsiao-Yu_Tung1", "gender": "M;;M;M;M;F", "homepage": "http://web.mit.edu/torralba/www//;;https://Neuroailab.stanford.edu;https://yunzhuli.github.io/;https://xavihart.github.io;", "dblp": "t/AntonioBTorralba;t/JoshuaBTenenbaum;;182/1831;;199/1661", "google_scholar": "https://scholar.google.com.tw/citations?user=8cxDHS4AAAAJ;;;WlA92lcAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;;", "linkedin": ";;;;haotian-xue-gatech/;", "or_profile": "~Antonio_Torralba1;~Joshua_B._Tenenbaum1;~Daniel_LK_Yamins1;~Yunzhu_Li1;~Xue_Haotian1;~Hsiao-Yu_Fish_Tung1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Stanford University;Stanford University;Georgia Institute of Technology;", "aff_domain": "mit.edu;mit.edu;stanford.edu;stanford.edu;gatech.edu;", "position": "Full Professor;Professor;Assistant Professor;Postdoc;PhD student;", "bibtex": "@inproceedings{\nxue2023dintphys,\ntitle={3D-IntPhys: Towards More Generalized 3D-grounded Visual Intuitive Physics under Challenging Scenes},\nauthor={Haotian Xue and Antonio Torralba and Joshua B. Tenenbaum and Daniel LK Yamins and Yunzhu Li and Hsiao-Yu Tung},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Fp5uC6YHwe}\n}", "github": "", "project": "", "reviewers": "kxU1;R6LT;493i;cs73;SEVF", "pdf_size": 8322988, "rating": "5;6;6;6;7", "confidence": "3;2;2;2;4", "soundness": "3;3;3;3;3", "novelty": "3;2;2;3;4", "presentation": "3;3;3;3;3", "wc_summary": "91;81;85;75;80", "wc_strengths": "26;16;37;119;38", "wc_weaknesses": "38;23;294;110;90", "wc_questions": "8;28;46;110;140", "wc_limitations": "14;1;38;14;46", "wc_review": "177;149;500;428;394", "wc_reply_reviewers": "0;86;69;47;0", "wc_reply_authors": "0;0;30;0;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 2.6, 0.8 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.4, 5.351635264103861 ], "wc_strengths_avg": [ 47.2, 36.7880415352598 ], "wc_weaknesses_avg": [ 111.0, 96.95772274553481 ], "wc_questions_avg": [ 66.4, 50.23783434822803 ], "wc_limitations_avg": [ 22.6, 16.72841893306119 ], "wc_review_avg": [ 329.6, 140.54835466842007 ], "wc_reply_reviewers_avg": [ 40.4, 35.22839763599815 ], "wc_reply_authors_avg": [ 6.0, 12.0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3952847075210474, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2561631282043636055&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 11, "email": "mit.edu;mit.edu;stanford.edu;stanford.edu;gatech.edu;", "author_num": 6, "aff_unique_index": "0;0;1;1;2", "aff_unique_norm": "Massachusetts Institute of Technology;Stanford University;Georgia Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.stanford.edu;https://www.gatech.edu", "aff_unique_abbr": "MIT;Stanford;Georgia Tech", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "MiliPoint: A Point Cloud Dataset for mmWave Radar", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73646", "id": "FpK2aQfbyo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c60468eca9cd0b0083f0ff9d0aeb171a-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=FpK2aQfbyo", "openreview": "https://openreview.net/forum?id=FpK2aQfbyo", "poster": "/media/PosterPDFs/NeurIPS%202023/73646.png?t=1699044079.109415", "slides": "https://nips.cc/virtual/2023/poster/73646", "video": "https://nips.cc/virtual/2023/poster/73646", "author_site": "Han Cui, Shu Zhong, Jiacheng Wu, Zichao Shen, Naim Dahnoun, Yiren Zhao", "tldr": "", "abstract": "Millimetre-wave (mmWave) radar has emerged as an attractive and cost-effective alternative for human activity sensing compared to traditional camera-based systems. mmWave radars are also non-intrusive, providing better protection for user privacy. However, as a Radio Frequency based technology, mmWave radars rely on capturing reflected signals from objects, making them more prone to noise compared to cameras. This raises an intriguing question for the deep learning community: Can we develop more effective point set-based deep learning methods for such attractive sensors? \n \nTo answer this question, our work, termed MiliPoint, delves into this idea by providing a large-scale, open dataset for the community to explore how mmWave radars can be utilised for human activity recognition. Moreover, MiliPoint stands out as it is larger in size than existing datasets, has more diverse human actions represented, and encompasses all three key tasks in human activity recognition. We have also established a range of point-based deep neural networks such as DGCNN, PointNet++ and PointTransformer, on MiliPoint, which can serve to set the ground baseline for further development.", "keywords": "mmWave radar;point cloud;keypoint estimation;identification;action classification;human activity recognition;mmWave dataset", "primary_area": "", "supplementary_material": "", "author": "Han Cui;Shu Zhong;Jiacheng Wu;Zichao Shen;Naim Dahnoun;Yiren Zhao", "authorids": "~Han_Cui1;~Shu_Zhong1;~Jiacheng_Wu3;~Zichao_Shen1;~Naim_Dahnoun1;~Yiren_Zhao2", "gender": "M;F;M;M;M;M", "homepage": ";https://profiles.ucl.ac.uk/86435-shu-zhong/;https://scholar.google.com/citations?hl=zh-CN&user=GBMVcBUAAAAJ;;https://www.bristol.ac.uk/people/person/Naim-Dahnoun-b0315f4f-9fdc-4002-9202-dbe2fb679818/;https://aaronzhao.me", "dblp": ";;;;;https://dblp.uni-trier.de/pers/hd/z/Zhao:Yiren", "google_scholar": "vRuTGakAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.co.uk/citations?user=p5oHs_4AAAAJ;lOOmgEgAAAAJ", "orcid": ";0000-0002-1820-6424;;0000-0002-4809-5905;;", "linkedin": ";;;;naim-dahnoun-270333229?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BSv95UYF3StG3Gs%2BB%2FjG1Yg%3D%3D;yiren-aaron-zhao-baa8b5116/", "or_profile": "~Han_Cui1;~Shu_Zhong1;~Jiacheng_Wu3;~Zichao_Shen1;~Naim_Dahnoun1;~Yiren_Zhao2", "aff": "University of Bristol;University College London, University of London;University of Bristol ;University of Bristol;University of Bristol;Imperial College London", "aff_domain": "bristol.ac.uk;ucl.ac.uk;brisrol.ac.uk;bris.ac.uk;bristol.ac.uk;ic.ac.uk", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ncui2023milipoint,\ntitle={MiliPoint: A Point Cloud Dataset for mmWave Radar},\nauthor={Han Cui and Shu Zhong and Jiacheng Wu and Zichao Shen and Naim Dahnoun and Yiren Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=FpK2aQfbyo}\n}", "github": "", "project": "", "reviewers": "hrVa;zF3S;k6RA;UdR9;Q9DZ", "pdf_size": 6303146, "rating": "4;6;7;7;8", "confidence": "5;4;3;4;5", "wc_summary_and_contributions": "64;139;83;119;75", "wc_strengths": "14;120;60;48;73", "wc_improvement": "118;119;55;143;100", "wc_limitations": "40;16;24;33;221", "wc_correctness": "18;1;21;1;249", "wc_clarity": "8;1;11;1;111", "wc_relation_to_prior_work": "9;1;14;1;63", "wc_documentation": "48;1;11;1;82", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "320;399;280;348;975", "wc_reply_reviewers": "0;269;0;0;40", "wc_reply_authors": "954;716;288;252;1553", "reply_reviewers": "0;2;0;0;1", "reply_authors": "2;2;1;1;3", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 96.0, 28.326665882168342 ], "wc_strengths_avg": [ 63.0, 34.59479729670345 ], "wc_improvement_avg": [ 107.0, 29.37345740630476 ], "wc_limitations_avg": [ 66.8, 77.52522170235954 ], "wc_correctness_avg": [ 58.0, 95.86240138865706 ], "wc_clarity_avg": [ 26.4, 42.481054600845304 ], "wc_relation_to_prior_work_avg": [ 17.6, 23.234457170332167 ], "wc_documentation_avg": [ 28.6, 31.815719385234715 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 464.4, 258.2220749664908 ], "wc_reply_reviewers_avg": [ 61.8, 104.75189735751805 ], "wc_reply_authors_avg": [ 752.6, 479.3735912625976 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.2758386421836853, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17519788542470362039&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 7, "email": "bristol.ac.uk;ucl.ac.uk;brisrol.ac.uk;bris.ac.uk;bristol.ac.uk;ic.ac.uk", "author_num": 6, "aff_unique_index": "0;1;0;0;0;2", "aff_unique_norm": "University of Bristol;University College London;Imperial College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bristol.ac.uk;https://www.ucl.ac.uk;https://www.imperial.ac.uk", "aff_unique_abbr": "Bristol;UCL;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Faster approximate subgraph counts with privacy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72242", "id": "Fqg9vGWy4k", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/deddcfbf08f57489b0088b71a00db640-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Fqg9vGWy4k", "openreview": "https://openreview.net/forum?id=Fqg9vGWy4k", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72242", "video": "https://nips.cc/virtual/2023/poster/72242", "author_site": "Dung Nguyen, Mahantesh Halappanavar, Venkatesh Srinivasan, Venkatesh Srinivasan, Anil Vullikanti", "tldr": "", "abstract": "One of the most common problems studied in the context of differential privacy for graph data is counting the number of non-induced embeddings of a subgraph in a given graph. \nThese counts have very high global sensitivity. Therefore, adding noise based on powerful alternative techniques, such as smooth sensitivity and higher-order local sensitivity have been shown to give significantly better accuracy. \nHowever, all these alternatives to global sensitivity become computationally very expensive, and to date efficient polynomial time algorithms are known only for few selected subgraphs, such as triangles, $k$-triangles, and $k$-stars.\nIn this paper, we show that good approximations to these sensitivity metrics can be still used to get private algorithms.\nUsing this approach, we much faster algorithms for privately counting the number of triangles in real-world social networks, which can be easily parallelized.\nWe also give a private polynomial time algorithm for counting any constant size subgraph using less noise than the global sensitivity; we show this can be improved significantly for counting paths in special classes of graphs.", "keywords": "differential privacy;subgraph counting;smooth sensitivity;local sensitivity", "primary_area": "", "supplementary_material": "", "author": "Dung Nguyen;Mahantesh M Halappanavar;Venkatesh Srinivasan;Anil Vullikanti", "authorids": "~Dung_Nguyen2;~Mahantesh_M_Halappanavar1;~Venkatesh_Srinivasan1;~Anil_Vullikanti1", "gender": ";M;;M", "homepage": ";https://hpc.pnl.gov/people/hala/index.html;;https://engineering.virginia.edu/faculty/anil-vullikanti", "dblp": ";41/3413.html;;89/7912", "google_scholar": ";E4Wqxq8AAAAJ;;MNJ-E9UAAAAJ", "orcid": ";0000-0002-2323-4753;;0000-0002-8597-6197", "linkedin": ";halappanavar/;;", "or_profile": "~Dung_Nguyen2;~Mahantesh_M_Halappanavar1;~Venkatesh_Srinivasan1;~Anil_Vullikanti1", "aff": ";;;University of Virginia", "aff_domain": ";;;virginia.edu", "position": ";;;Professor", "bibtex": "@inproceedings{\nnguyen2023faster,\ntitle={Faster approximate subgraph counts with privacy},\nauthor={Dung Nguyen and Mahantesh M Halappanavar and Venkatesh Srinivasan and Anil Vullikanti},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Fqg9vGWy4k}\n}", "github": "", "project": "", "reviewers": "tb5J;LedL;Bhh2;hrky", "pdf_size": 1362015, "rating": "4;6;6;6", "confidence": "5;4;2;3", "soundness": "3;4;3;3", "novelty": "2;3;3;3", "presentation": "3;4;3;3", "wc_summary": "40;168;108;374", "wc_strengths": "29;98;38;37", "wc_weaknesses": "141;97;20;25", "wc_questions": "2;119;93;1", "wc_limitations": "1;27;2;1", "wc_review": "213;509;261;438", "wc_reply_reviewers": "0;190;166;25", "wc_reply_authors": "83;845;359;7", "reply_reviewers": "0;2;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 172.5, 124.83889618223961 ], "wc_strengths_avg": [ 50.5, 27.64507189355817 ], "wc_weaknesses_avg": [ 70.75, 50.726595588507614 ], "wc_questions_avg": [ 53.75, 53.053628528122374 ], "wc_limitations_avg": [ 7.75, 11.121488209767612 ], "wc_review_avg": [ 355.25, 122.07042024995245 ], "wc_reply_reviewers_avg": [ 95.25, 83.6521816810536 ], "wc_reply_authors_avg": [ 323.5, 328.3424279620287 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8891348439780309950&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";;;virginia.edu", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Virginia", "aff_unique_dep": "", "aff_unique_url": "https://www.virginia.edu", "aff_unique_abbr": "UVA", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Differentiable Blocks World: Qualitative 3D Decomposition by Rendering Primitives", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72241", "id": "FsQWxU5TOL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/123fd8a56501194823c8e0dca00733df-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FsQWxU5TOL", "openreview": "https://openreview.net/forum?id=FsQWxU5TOL", "poster": "/media/PosterPDFs/NeurIPS%202023/72241.png?t=1701877790.5303478", "slides": "https://nips.cc/virtual/2023/poster/72241", "video": "https://nips.cc/virtual/2023/poster/72241", "author_site": "Tom Monnier, Jake Austin, Angjoo Kanazawa, Alexei Efros, Mathieu Aubry", "tldr": "", "abstract": "Given a set of calibrated images of a scene, we present an approach that produces a simple, compact, and actionable 3D world representation by means of 3D primitives. While many approaches focus on recovering high-fidelity 3D scenes, we focus on parsing a scene into mid-level 3D representations made of a small set of textured primitives. Such representations are interpretable, easy to manipulate and suited for physics-based simulations. Moreover, unlike existing primitive decomposition methods that rely on 3D input data, our approach operates directly on images through differentiable rendering. Specifically, we model primitives as textured superquadric meshes and optimize their parameters from scratch with an image rendering loss. We highlight the importance of modeling transparency for each primitive, which is critical for optimization and also enables handling varying numbers of primitives. We show that the resulting textured primitives faithfully reconstruct the input images and accurately model the visible 3D points, while providing amodal shape completions of unseen object regions. We compare our approach to the state of the art on diverse scenes from DTU, and demonstrate its robustness on real-life captures from BlendedMVS and Nerfstudio. We also showcase how our results can be used to effortlessly edit a scene or perform physical simulations. Code and video results are available at https://www.tmonnier.com/DBW.", "keywords": "3D decomposition;3D reconstruction;MVS;primitives;qualitative 3D", "primary_area": "", "supplementary_material": "", "author": "Tom Monnier;Jake Austin;Angjoo Kanazawa;Alexei A Efros;Mathieu Aubry", "authorids": "~Tom_Monnier1;jake-austin@berkeley.edu;~Angjoo_Kanazawa1;~Alexei_A_Efros1;~Mathieu_Aubry3", "gender": "M;;F;;", "homepage": "https://www.tmonnier.com;;https://people.eecs.berkeley.edu/~kanazawa/;;http://imagine.enpc.fr/~aubrym/", "dblp": "267/9457;;119/1305;;57/10067", "google_scholar": "ZfV1DqMAAAAJ;;Ci-_QYIAAAAJ;;https://scholar.google.fr/citations?user=0MiPsosAAAAJ", "orcid": ";;;;0000-0002-3804-0193", "linkedin": ";;;;", "or_profile": "~Tom_Monnier1;jake-austin@berkeley.edu;~Angjoo_Kanazawa1;~Alexei_A_Efros1;~Mathieu_Aubry3", "aff": "\u00c9cole des Ponts ParisTech;;University of California, Berkeley;;ENPC", "aff_domain": "enpc.fr;;berkeley.edu;;enpc.fr", "position": "PhD student;;Assistant Professor;;Principal Researcher", "bibtex": "@inproceedings{\nmonnier2023differentiable,\ntitle={Differentiable Blocks World: Qualitative 3D Decomposition by Rendering Primitives},\nauthor={Tom Monnier and Jake Austin and Angjoo Kanazawa and Alexei A Efros and Mathieu Aubry},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FsQWxU5TOL}\n}", "github": "", "project": "", "reviewers": "3Dgw;85Rz;ZNqf;xDAr", "pdf_size": 10304135, "rating": "5;5;5;7", "confidence": "5;4;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;4", "presentation": "4;3;2;4", "wc_summary": "125;93;95;115", "wc_strengths": "106;52;46;179", "wc_weaknesses": "249;160;130;216", "wc_questions": "111;139;107;101", "wc_limitations": "54;248;32;21", "wc_review": "645;692;410;632", "wc_reply_reviewers": "262;0;48;27", "wc_reply_authors": "251;0;0;0", "reply_reviewers": "2;0;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 107.0, 13.490737563232042 ], "wc_strengths_avg": [ 95.75, 53.44331108754397 ], "wc_weaknesses_avg": [ 188.75, 46.504704063137524 ], "wc_questions_avg": [ 114.5, 14.585952145814822 ], "wc_limitations_avg": [ 88.75, 92.70753744976726 ], "wc_review_avg": [ 594.75, 108.97562800920213 ], "wc_reply_reviewers_avg": [ 84.25, 104.02493691418418 ], "wc_reply_authors_avg": [ 62.75, 108.68618817494705 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12397639951223740337&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "enpc.fr;;berkeley.edu;;enpc.fr", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "\u00c9cole des Ponts ParisTech;University of California, Berkeley;\u00c9cole Nationale des Ponts et Chauss\u00e9es", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ponts.org;https://www.berkeley.edu;https://www.enpc.fr", "aff_unique_abbr": "ENPC;UC Berkeley;ENPC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;0", "aff_country_unique": "France;United States" }, { "title": "RiskQ: Risk-sensitive Multi-Agent Reinforcement Learning Value Factorization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72240", "id": "FskZtRvMJI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6d3040941a2d57ead4043556a70dd728-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FskZtRvMJI", "openreview": "https://openreview.net/forum?id=FskZtRvMJI", "poster": "/media/PosterPDFs/NeurIPS%202023/72240.png?t=1699886184.9336972", "slides": "https://nips.cc/virtual/2023/poster/72240", "video": "https://nips.cc/virtual/2023/poster/72240", "author_site": "Siqi Shen, Chennan Ma, Chao Li, Weiquan Liu, Yongquan Fu, Songzhu Mei, Xinwang Liu, Cheng Wang", "tldr": "", "abstract": "Multi-agent systems are characterized by environmental uncertainty, varying policies of agents, and partial observability, which result in significant risks. In the context of Multi-Agent Reinforcement Learning (MARL), learning coordinated and decentralized policies that are sensitive to risk is challenging. To formulate the coordination requirements in risk-sensitive MARL, we introduce the Risk-sensitive Individual-Global-Max (RIGM) principle as a generalization of the Individual-Global-Max (IGM) and Distributional IGM (DIGM) principles. This principle requires that the collection of risk-sensitive action selections of each agent should be equivalent to the risk-sensitive action selection of the central policy. Current MARL value factorization methods do not satisfy the RIGM principle for common risk metrics such as the Value at Risk (VaR) metric or distorted risk measurements. Therefore, we propose RiskQ to address this limitation, which models the joint return distribution by modeling quantiles of it as weighted quantile mixtures of per-agent return distribution utilities. RiskQ satisfies the RIGM principle for the VaR and distorted risk metrics. We show that RiskQ can obtain promising performance through extensive experiments. The source code of RiskQ is available in https://github.com/xmu-rl-3dv/RiskQ.", "keywords": "multi-agent reinforcement learning;value factorization;individual global max;risk-sensitive", "primary_area": "", "supplementary_material": "/attachment/029bb13caeb8684f9118493a041d3d1ece3ea072.zip", "author": "Siqi Shen;Chennan Ma;Chao Li;Weiquan Liu;Yongquan Fu;Songzhu Mei;Xinwang Liu;Cheng Wang", "authorids": "~Siqi_Shen5;~Chennan_Ma1;~Chao_Li29;~Weiquan_Liu1;~Yongquan_Fu2;~Songzhu_Mei1;~Xinwang_Liu1;~Cheng_Wang2", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://asc.xmu.edu.cn/t/machennan;https://github.com/1411279054;https://cec.jmu.edu.cn/info/1009/6440.htm;https://yongquanf.github.io/;;https://xinwangliu.github.io/;https://chwang.xmu.edu.cn/index_en.htm;https://asc.xmu.edu.cn/t/shensiqi", "dblp": "187/9101;;03/1188;;11/10487;45/6569-2.html;54/2062-3;37/8026", "google_scholar": "ROidaW4AAAAJ;;vNDNtP8AAAAJ;;;A56vWC4AAAAJ;https://scholar.google.com/citations?hl=en;gFKYanAAAAAJ", "orcid": ";;0000-0002-5934-1139;;0000-0002-4926-5953;;0000-0001-6075-796X;", "linkedin": ";;;;;;;", "or_profile": "~Chennan_Ma1;~Chao_Li29;~Weiquan_Liu1;~Yongquan_Fu2;~Songzhu_Mei1;~Xinwang_Liu1;~Cheng_Wang2;~Siqi_SHEN2", "aff": "Xiamen University;Xiamen University;Xiamen University;National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;Xiamen University;Xiamen University", "aff_domain": "xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;xmu.edu.cn;xmu.edu.cn", "position": "MS student;MS student;Postdoc;Associate Professor;Associate Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nshen2023riskq,\ntitle={RiskQ: Risk-sensitive Multi-Agent Reinforcement Learning Value Factorization},\nauthor={Siqi Shen and Chennan Ma and Chao Li and Weiquan Liu and Yongquan Fu and Songzhu Mei and Xinwang Liu and Cheng Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FskZtRvMJI}\n}", "github": "", "project": "", "reviewers": "X7fQ;EN1R;rLaN;e297", "pdf_size": 3735834, "rating": "5;6;6;6", "confidence": "3;2;1;4", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "48;131;37;28", "wc_strengths": "42;35;30;33", "wc_weaknesses": "337;13;54;56", "wc_questions": "92;65;20;112", "wc_limitations": "3;13;1;34", "wc_review": "522;257;142;263", "wc_reply_reviewers": "110;15;4;30", "wc_reply_authors": "96;92;631;39", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 61.0, 41.030476477857285 ], "wc_strengths_avg": [ 35.0, 4.415880433163924 ], "wc_weaknesses_avg": [ 115.0, 129.31550564414152 ], "wc_questions_avg": [ 72.25, 34.47009573528916 ], "wc_limitations_avg": [ 12.75, 13.083864108129525 ], "wc_review_avg": [ 296.0, 139.10607463371252 ], "wc_reply_reviewers_avg": [ 39.75, 41.595522595587134 ], "wc_reply_authors_avg": [ 214.5, 241.51656257904963 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17480200817536938259&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;xmu.edu.cn;xmu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;1;1;1;0;0", "aff_unique_norm": "Xiamen University;National University of Defense Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.xmu.edu.cn;http://www.nudt.edu.cn/", "aff_unique_abbr": "XMU;NUDT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Exact Bayesian Inference on Discrete Models via Probability Generating Functions: A Probabilistic Programming Approach", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72239", "id": "FtNruwFEs3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0747af6f877c0cb555fea595f01b0e83-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FtNruwFEs3", "openreview": "https://openreview.net/forum?id=FtNruwFEs3", "poster": "/media/PosterPDFs/NeurIPS%202023/72239.png?t=1702182330.8125012", "slides": "https://nips.cc/virtual/2023/poster/72239", "video": "https://nips.cc/virtual/2023/poster/72239", "author_site": "Fabian Zaiser, Andrzej Murawski, Chih-Hao Luke Ong", "tldr": "", "abstract": "We present an exact Bayesian inference method for discrete statistical models, which can find exact solutions to a large class of discrete inference problems, even with infinite support and continuous priors.\nTo express such models, we introduce a probabilistic programming language that supports discrete and continuous sampling, discrete observations, affine functions, (stochastic) branching, and conditioning on discrete events.\nOur key tool is *probability generating functions*:\nthey provide a compact closed-form representation of distributions that are definable by programs, thus enabling the exact computation of posterior probabilities, expectation, variance, and higher moments.\nOur inference method is provably correct and fully automated in a tool called *Genfer*, which uses automatic differentiation (specifically, Taylor polynomials), but does not require computer algebra.\nOur experiments show that Genfer is often faster than the existing exact inference tools PSI, Dice, and Prodigy.\nOn a range of real-world inference problems that none of these exact tools can solve, Genfer's performance is competitive with approximate Monte Carlo methods, while avoiding approximation errors.", "keywords": "Bayesian statistics;probabliistic programming;exact inference;discrete models;probability generating functions", "primary_area": "", "supplementary_material": "/attachment/00ae51f659ea1e5585cc2abbc08b316fccfa53d0.zip", "author": "Fabian Zaiser;Andrzej S Murawski;Luke Ong", "authorids": "~Fabian_Zaiser1;~Andrzej_S_Murawski1;~Luke_Ong1", "gender": ";;M", "homepage": "https://www.fabianzaiser.com/;;", "dblp": "249/5578.html;;o/CHLukeOng", "google_scholar": "Zit-MBEAAAAJ;;", "orcid": "0000-0001-5158-2002;;", "linkedin": ";;", "or_profile": "~Fabian_Zaiser1;~Andrzej_S_Murawski1;~Luke_Ong1", "aff": "University of Oxford;;Nanyang Technological University", "aff_domain": "cs.ox.ac.uk;;ntu.edu.sg", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nzaiser2023exact,\ntitle={Exact Bayesian Inference on Discrete Models via Probability Generating Functions: A Probabilistic Programming Approach},\nauthor={Fabian Zaiser and Andrzej S Murawski and Luke Ong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FtNruwFEs3}\n}", "github": "", "project": "", "reviewers": "SkQ5;JaNH;hi49;c91c", "pdf_size": 1106453, "rating": "6;8;8;8", "confidence": "2;4;4;5", "soundness": "3;4;4;4", "novelty": "3;4;4;3", "presentation": "3;3;3;4", "wc_summary": "58;143;72;57", "wc_strengths": "29;147;67;59", "wc_weaknesses": "119;289;54;39", "wc_questions": "2;77;70;63", "wc_limitations": "6;24;26;9", "wc_review": "214;680;289;227", "wc_reply_reviewers": "23;101;0;19", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.5, 35.42950747611375 ], "wc_strengths_avg": [ 75.5, 43.64344166080397 ], "wc_weaknesses_avg": [ 125.25, 99.20779959257236 ], "wc_questions_avg": [ 53.0, 29.857997253667232 ], "wc_limitations_avg": [ 16.25, 8.842369591913696 ], "wc_review_avg": [ 352.5, 191.1942729267799 ], "wc_reply_reviewers_avg": [ 35.75, 38.661188548724155 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16625842485741942303&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 8, "email": "cs.ox.ac.uk;;ntu.edu.sg", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Oxford;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.ntu.edu.sg", "aff_unique_abbr": "Oxford;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;Singapore" }, { "title": "Dynamic Pricing and Learning with Bayesian Persuasion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72238", "id": "FtZ7lUwH99", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b9c2e8a0bbed5fcfaf62856a3a719ada-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FtZ7lUwH99", "openreview": "https://openreview.net/forum?id=FtZ7lUwH99", "poster": "/media/PosterPDFs/NeurIPS%202023/72238.png?t=1698013568.6422741", "slides": "https://nips.cc/virtual/2023/poster/72238", "video": "https://nips.cc/virtual/2023/poster/72238", "author_site": "Shipra Agrawal, Yiding Feng, Wei Tang", "tldr": "", "abstract": "We consider a novel dynamic pricing and learning setting where in addition to setting prices of products in sequential rounds, the seller also ex-ante commits to \u2018advertising schemes\u2019. That is, in the beginning of each round the seller can decide what kind of signal they will provide to the buyer about the product\u2019s quality upon realization. Using the popular Bayesian persuasion framework to model the effect of these signals on the buyers\u2019 valuation and purchase responses, we formulate the problem of finding an optimal design of the advertising scheme along with a pricing scheme that maximizes the seller\u2019s expected revenue. Without any apriori knowledge of the buyers\u2019 demand function, our goal is to design an online algorithm that can use past purchase responses to adaptively learn the optimal pricing and advertising strategy. We study the regret of the algorithm when compared to the optimal clairvoyant price and advertising\nscheme. \n\nOur main result is a computationally efficient online algorithm that achieves an $O(T^{2/3}(m \\log T )^{1/3})$ regret bound when the valuation function is linear in the product quality. Here $m$ is the cardinality of the discrete product quality domain and $T$ is the time horizon. This result requires some natural monotonicity and Lipschitz assumptions on the valuation function, but no Lipschitz or smoothness assumption on the buyers\u2019 demand function. For constant $m$, our result matches the regret lower bound for dynamic pricing within logarithmic factors, which is a special case of our problem. We also obtain several improved results for the widely considered special case of additive valuations, including an $\\tilde{O}(T^{2/3})$ regret bound independent of $m$ when $m\\le T^{1/3}$.", "keywords": "dynamic pricing;information design;regret minimization", "primary_area": "", "supplementary_material": "/attachment/2f3dd59ca7f7cc947bf75344381fdda8b8804245.pdf", "author": "Shipra Agrawal;Yiding Feng;Wei Tang", "authorids": "~Shipra_Agrawal1;~Yiding_Feng1;~Wei_Tang1", "gender": "F;M;M", "homepage": "https://www.columbia.edu/~sa3305;https://www.ydfeng.us;https://wtang.org/", "dblp": "a/ShipraAgrawal;207/4923;", "google_scholar": "https://scholar.google.co.in/citations?user=qzIHHMEAAAAJ;p5t34vIAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Shipra_Agrawal1;~Yiding_Feng1;~Wei_Tang1", "aff": "Columbia University;Microsoft;Columbia University", "aff_domain": "columbia.edu;microsoft.com;columbia.edu", "position": "Associate Professor;Postdoc;Postdoc", "bibtex": "@inproceedings{\nagrawal2023dynamic,\ntitle={Dynamic Pricing and Learning with Bayesian Persuasion},\nauthor={Shipra Agrawal and Yiding Feng and Wei Tang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FtZ7lUwH99}\n}", "github": "", "project": "", "reviewers": "78HV;p9NT;mNn4;kmXq;khHp", "pdf_size": 713896, "rating": "5;6;6;6;7", "confidence": "4;3;3;4;3", "soundness": "3;3;3;3;4", "novelty": "3;2;3;3;3", "presentation": "2;2;3;2;3", "wc_summary": "280;50;155;186;71", "wc_strengths": "85;55;81;29;55", "wc_weaknesses": "195;53;202;34;58", "wc_questions": "7;184;164;817;50", "wc_limitations": "2;1;11;11;3", "wc_review": "569;343;613;1077;237", "wc_reply_reviewers": "0;63;0;0;13", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;0;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 148.4, 83.00506008672001 ], "wc_strengths_avg": [ 61.0, 20.356817039999154 ], "wc_weaknesses_avg": [ 108.4, 74.03404622199167 ], "wc_questions_avg": [ 244.4, 293.9840812016868 ], "wc_limitations_avg": [ 5.6, 4.454211490264017 ], "wc_review_avg": [ 567.8, 290.27325057607356 ], "wc_reply_reviewers_avg": [ 15.2, 24.424577785501224 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6454972243679028, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15491300235206810246&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "columbia.edu;microsoft.com;columbia.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Columbia University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.columbia.edu;https://www.microsoft.com", "aff_unique_abbr": "Columbia;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Balancing memorization and generalization in RNNs for high performance brain-machine Interfaces", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72237", "id": "FujJO3dsNj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/17a234c91f746d9625a75cf8a8731ee2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FujJO3dsNj", "openreview": "https://openreview.net/forum?id=FujJO3dsNj", "poster": "/media/PosterPDFs/NeurIPS%202023/72237.png?t=1701738593.9447637", "slides": "https://nips.cc/virtual/2023/poster/72237", "video": "https://nips.cc/virtual/2023/poster/72237", "author_site": "Joseph Costello, Hisham Temmar, Luis Cubillos, Matthew Mender, Dylan Wallace, Matt Willsey, Parag Patil, Cynthia Chestek", "tldr": "", "abstract": "Brain-machine interfaces (BMIs) can restore motor function to people with paralysis but are currently limited by the accuracy of real-time decoding algorithms. Recurrent neural networks (RNNs) using modern training techniques have shown promise in accurately predicting movements from neural signals but have yet to be rigorously evaluated against other decoding algorithms in a closed-loop setting. Here we compared RNNs to other neural network architectures in real-time, continuous decoding of finger movements using intracortical signals from nonhuman primates. Across one and two finger online tasks, LSTMs (a type of RNN) outperformed convolutional and transformer-based neural networks, averaging 18% higher throughput than the convolution network. On simplified tasks with a reduced movement set, RNN decoders were allowed to memorize movement patterns and matched able-bodied control. Performance gradually dropped as the number of distinct movements increased but did not go below fully continuous decoder performance. Finally, in a two-finger task where one degree-of-freedom had poor input signals, we recovered functional control using RNNs trained to act both like a movement classifier and continuous decoder. Our results suggest that RNNs can enable functional real-time BMI control by learning and generating accurate movement patterns.", "keywords": "brain computer interface;brain machine interface;neural decoding;prosthetic control;recurrent neural network;RNN;transformer;real time;closed-loop;user interface", "primary_area": "", "supplementary_material": "/attachment/45e8c05ad1462220261e429f1ab7362715e33486.zip", "author": "Joseph T Costello;Hisham Temmar;Luis H Cubillos;Matthew J Mender;Dylan M Wallace;Matthew S Willsey;Parag G Patil;Cynthia Chestek", "authorids": "~Joseph_T_Costello1;htemmar@umich.edu;lhcubill@umich.edu;mmender@umich.edu;dywallac@umich.edu;mwillsey@umich.edu;pgpatil@med.umich.edu;~Cynthia_Chestek1", "gender": "M;;;;;;;F", "homepage": ";;;;;;;http://chestekresearch.engin.umich.edu/", "dblp": ";;;;;;;", "google_scholar": "BwHH0TYAAAAJ;;;;;;;", "orcid": "0000-0001-7608-0885;;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Joseph_T_Costello1;htemmar@umich.edu;lhcubill@umich.edu;mmender@umich.edu;dywallac@umich.edu;mwillsey@umich.edu;pgpatil@med.umich.edu;~Cynthia_Chestek1", "aff": "University of Michigan - Ann Arbor;;;;;;;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;;;;;;;umich.edu", "position": "PhD student;;;;;;;Full Professor", "bibtex": "@inproceedings{\ncostello2023balancing,\ntitle={Balancing memorization and generalization in {RNN}s for high performance brain-machine Interfaces},\nauthor={Joseph T Costello and Hisham Temmar and Luis H Cubillos and Matthew J Mender and Dylan M Wallace and Matthew S Willsey and Parag G Patil and Cynthia Chestek},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FujJO3dsNj}\n}", "github": "", "project": "", "reviewers": "WBvg;cHBb;3c6F;ZEFp", "pdf_size": 2606609, "rating": "6;7;7;8", "confidence": "3;2;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;4", "presentation": "3;4;4;4", "wc_summary": "55;156;24;86", "wc_strengths": "49;100;30;113", "wc_weaknesses": "201;47;37;62", "wc_questions": "198;110;146;547", "wc_limitations": "33;7;4;98", "wc_review": "536;420;241;906", "wc_reply_reviewers": "310;115;12;116", "wc_reply_authors": "429;34;0;36", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;1;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.25, 48.92021565774215 ], "wc_strengths_avg": [ 73.0, 34.4746283518764 ], "wc_weaknesses_avg": [ 86.75, 66.55965369501257 ], "wc_questions_avg": [ 250.25, 174.16138349243784 ], "wc_limitations_avg": [ 35.5, 37.80542289143186 ], "wc_review_avg": [ 525.75, 243.39307200493607 ], "wc_reply_reviewers_avg": [ 138.25, 107.78769642217983 ], "wc_reply_authors_avg": [ 124.75, 176.24042527184278 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10238407800009395443&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "umich.edu;;;;;;;umich.edu", "author_num": 8, "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "On Differentially Private Sampling from Gaussian and Product Distributions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72236", "id": "FviF8vuz5B", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f4eaa4b8f2d08edb3f0af990d56134ea-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FviF8vuz5B", "openreview": "https://openreview.net/forum?id=FviF8vuz5B", "poster": "/media/PosterPDFs/NeurIPS%202023/72236.png?t=1701420730.6601202", "slides": "https://nips.cc/virtual/2023/poster/72236", "video": "https://nips.cc/virtual/2023/poster/72236", "author_site": "Badih Ghazi, Xiao Hu, Ravi Kumar, Pasin Manurangsi", "tldr": "", "abstract": "We study the problem, where given a dataset of $n$ i.i.d. samples from an unknown distribution $P$, we seek to generate a sample from a distribution that is close to $P$ in total variation distance, under the constraint of differential privacy. We study the settings where $P$ is a multi-dimensional Gaussian distribution with different assumptions: known covariance, unknown bounded covariance, and unknown unbounded covariance. We present new differentially private sampling algorithms, and show that they achieve near-optimal sample complexity in the first two settings. Moreover, when $P$ is a product distribution on the binary hypercube, we obtain a pure-DP algorithm whereas only an approximate-DP algorithm (with slightly worse sample complexity) was previously known.", "keywords": "privacy;sampling;Gaussian distribution;product distributions", "primary_area": "", "supplementary_material": "", "author": "Badih Ghazi;Xiao Hu;Ravi Kumar;Pasin Manurangsi", "authorids": "~Badih_Ghazi1;~Xiao_Hu5;~Ravi_Kumar1;~Pasin_Manurangsi2", "gender": ";F;M;M", "homepage": "https://sites.google.com/view/badihghazi/home;https://cs.uwaterloo.ca/~xiaohu/;https://sites.google.com/site/ravik53/;https://pasin30055.github.io/", "dblp": "125/2134;19/1374-5;k/RaviKumar.html;133/2059", "google_scholar": "GBJLTN8AAAAJ;rTXGtQ8AAAAJ;J_XhIsgAAAAJ;35hM-PkAAAAJ", "orcid": ";;0000-0002-2203-2586;", "linkedin": "badih-ghazi-608379132/;;ravi-kumar-a3a9631;", "or_profile": "~Badih_Ghazi1;~Xiao_Hu5;~Ravi_Kumar1;~Pasin_Manurangsi2", "aff": "Google;;Google;Google", "aff_domain": "google.com;;google.com;google.com", "position": "Researcher;;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nghazi2023on,\ntitle={On Differentially Private Sampling from Gaussian and Product Distributions},\nauthor={Badih Ghazi and Xiao Hu and Ravi Kumar and Pasin Manurangsi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FviF8vuz5B}\n}", "github": "", "project": "", "reviewers": "cbpB;Tiqw;iEFD;nVvL;jqWC", "pdf_size": 531812, "rating": "5;6;7;7;7", "confidence": "4;2;2;4;3", "soundness": "3;3;3;4;4", "novelty": "2;2;3;4;4", "presentation": "3;3;3;4;3", "wc_summary": "152;74;121;166;158", "wc_strengths": "58;44;97;237;37", "wc_weaknesses": "538;158;49;61;27", "wc_questions": "59;2;30;120;10", "wc_limitations": "17;1;11;33;15", "wc_review": "824;279;308;617;247", "wc_reply_reviewers": "30;17;19;229;0", "wc_reply_authors": "0;0;0;16;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;2;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 134.2, 33.74255473434103 ], "wc_strengths_avg": [ 94.6, 74.16360293297515 ], "wc_weaknesses_avg": [ 166.6, 191.039891122247 ], "wc_questions_avg": [ 44.2, 42.70081966426405 ], "wc_limitations_avg": [ 15.4, 10.384603988597735 ], "wc_review_avg": [ 455.0, 227.26812358973706 ], "wc_reply_reviewers_avg": [ 59.0, 85.54063361935076 ], "wc_reply_authors_avg": [ 3.2, 6.4 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2795084971874737, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4751914980712533671&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "google.com;;google.com;google.com", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Data Quality in Imitation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72235", "id": "FwmvbuDiMk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fe692980c5d9732cf153ce27947653a7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FwmvbuDiMk", "openreview": "https://openreview.net/forum?id=FwmvbuDiMk", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72235", "video": "https://nips.cc/virtual/2023/poster/72235", "author_site": "Suneel Belkhale, Yuchen Cui, Dorsa Sadigh", "tldr": "", "abstract": "In supervised learning, the question of data quality and curation has been sidelined in recent years in favor of increasingly more powerful and expressive models that can ingest internet-scale data. However, in offline learning for robotics, we simply lack internet scale data, and so high quality datasets are a necessity. This is especially true in imitation learning (IL), a sample efficient paradigm for robot learning using expert demonstrations. Policies learned through IL suffer from state distribution shift at test time due to compounding errors in action prediction, which leads to unseen states that the policy cannot recover from.\nInstead of designing new algorithms to address distribution shift, an alternative perspective is to develop new ways of assessing and curating datasets. There is growing evidence that the same IL algorithms can have substantially different performance across different datasets. \nThis calls for a formalism for defining metrics of \"data quality\" that can further be leveraged for data curation.\nIn this work, we take the first step toward formalizing data quality for imitation learning through the lens of distribution shift: a high quality dataset encourages the policy to stay in distribution at test time. We propose two fundamental properties that are necessary for a high quality datasets: \ni) action divergence: the mismatch between the expert and learned policy at certain states; and ii) transition diversity: the noise present in the system for a given state and action. We investigate the combined effect of these two key properties in imitation learning theoretically, and we empirically analyze models trained on a variety of different data sources. We show that state diversity is not always beneficial, and we demonstrate how action divergence and transition diversity interact in practice.", "keywords": "Imitation Learning;Robotics;Data Quality", "primary_area": "", "supplementary_material": "/attachment/5f904f4f776844c511e76817d0286f060e96a9e0.zip", "author": "Suneel Belkhale;Yuchen Cui;Dorsa Sadigh", "authorids": "~Suneel_Belkhale1;~Yuchen_Cui1;~Dorsa_Sadigh1", "gender": "M;F;F", "homepage": "https://github.com/suneelbelkhale;https://yuchencui.cc;https://dorsa.fyi/", "dblp": "236/5069;201/5416.html;117/3174", "google_scholar": ";qQz2cm8AAAAJ;ZaJEZpYAAAAJ", "orcid": "0000-0002-3963-7987;0000-0001-7417-1222;", "linkedin": "suneel-b-032b1a101/;;", "or_profile": "~Suneel_Belkhale1;~Yuchen_Cui1;~Dorsa_Sadigh1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nbelkhale2023data,\ntitle={Data Quality in Imitation Learning},\nauthor={Suneel Belkhale and Yuchen Cui and Dorsa Sadigh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FwmvbuDiMk}\n}", "github": "", "project": "", "reviewers": "AXdc;24NK;oFdr;WXvd;9hHD;zh6Y", "pdf_size": 2219546, "rating": "2;3;5;6;6;7", "confidence": "5;4;2;3;2;4", "soundness": "2;3;2;3;3;3", "novelty": "1;2;3;3;3;2", "presentation": "2;3;3;3;4;3", "wc_summary": "34;76;20;275;113;105", "wc_strengths": "19;17;15;164;81;117", "wc_weaknesses": "67;70;79;559;510;157", "wc_questions": "2;5;78;330;90;222", "wc_limitations": "2;12;5;53;3;1", "wc_review": "124;180;197;1381;797;602", "wc_reply_reviewers": "0;0;47;61;58;0", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "0;0;1;1;1;0", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 4.833333333333333, 1.7716909687891083 ], "confidence_avg": [ 3.3333333333333335, 1.1055415967851334 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.7453559924999298 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 103.83333333333333, 83.7285627621914 ], "wc_strengths_avg": [ 68.83333333333333, 57.14431049739092 ], "wc_weaknesses_avg": [ 240.33333333333334, 210.67325939051264 ], "wc_questions_avg": [ 121.16666666666667, 118.53890594324811 ], "wc_limitations_avg": [ 12.666666666666666, 18.390818965511627 ], "wc_review_avg": [ 546.8333333333334, 446.6913239462894 ], "wc_reply_reviewers_avg": [ 27.666666666666668, 27.99206236695602 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5672760277973995, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=939755397543932884&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "stanford.edu;stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Neural Image Compression: Generalization, Robustness, and Spectral Biases", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72234", "id": "FxRfAIj4s2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f3c5e56274140e0420baa3916c529210-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=FxRfAIj4s2", "openreview": "https://openreview.net/forum?id=FxRfAIj4s2", "poster": "/media/PosterPDFs/NeurIPS%202023/72234.png?t=1699467042.6492946", "slides": "https://nips.cc/virtual/2023/poster/72234", "video": "https://nips.cc/virtual/2023/poster/72234", "author_site": "Kelsey Lieberman, James Diffenderfer, Charles Godfrey, Bhavya Kailkhura", "tldr": "", "abstract": "Recent advances in neural image compression (NIC) have produced models that are starting to outperform classic codecs. While this has led to growing excitement about using NIC in real-world applications, the successful adoption of any machine learning system in the wild requires it to generalize (and be robust) to unseen distribution shifts at deployment. Unfortunately, current research lacks comprehensive datasets and informative tools to evaluate and understand NIC performance in real-world settings. To bridge this crucial gap, first, this paper presents a comprehensive benchmark suite to evaluate the out-of-distribution (OOD) performance of image compression methods. Specifically, we provide CLIC-C and Kodak-C by introducing 15 corruptions to the popular CLIC and Kodak benchmarks. Next, we propose spectrally-inspired inspection tools to gain deeper insight into errors introduced by image compression methods as well as their OOD performance. We then carry out a detailed performance comparison of several classic codecs and NIC variants, revealing intriguing findings that challenge our current understanding of the strengths and limitations of NIC. Finally, we corroborate our empirical findings with theoretical analysis, providing an in-depth view of the OOD performance of NIC and its dependence on the spectral properties of the data. Our benchmarks, spectral inspection tools, and findings provide a crucial bridge to the real-world adoption of NIC. We hope that our work will propel future efforts in designing robust and generalizable NIC methods. Code and data will be made available at https://github.com/klieberman/ood_nic.", "keywords": "image compression;robustness;generalization", "primary_area": "", "supplementary_material": "", "author": "Kelsey Lieberman;James Diffenderfer;Charles Godfrey;Bhavya Kailkhura", "authorids": "~Kelsey_Lieberman1;~James_Diffenderfer1;~Charles_Godfrey1;~Bhavya_Kailkhura1", "gender": "F;;M;M", "homepage": ";;https://godfrey-cw.github.io/;https://people.llnl.gov/kailkhura1", "dblp": "243/2573;188/4110;317/0066;132/8938", "google_scholar": "D2UjU3oAAAAJ;nRr24_QAAAAJ;yfT92d4AAAAJ;SQpJmOgAAAAJ", "orcid": ";;0000-0003-1698-2718;", "linkedin": "kelsey-l/;;godfrey-cw/;", "or_profile": "~Kelsey_Lieberman1;~James_Diffenderfer1;~Charles_Godfrey1;~Bhavya_Kailkhura1", "aff": "Lawrence Livermore National Labs;Lawrence Livermore National Labs;Pacific Northwest National Laboratory;Lawrence Livermore National Laboratory", "aff_domain": "llnl.gov;llnl.gov;pnnl.gov;llnl.gov", "position": "Intern;Researcher;Postdoc;Research Staff", "bibtex": "@inproceedings{\nlieberman2023neural,\ntitle={Neural Image Compression: Generalization, Robustness, and Spectral Biases},\nauthor={Kelsey Lieberman and James Diffenderfer and Charles Godfrey and Bhavya Kailkhura},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=FxRfAIj4s2}\n}", "github": "", "project": "", "reviewers": "h4BN;fKAX;e3od;5yiv", "pdf_size": 39239624, "rating": "3;5;5;6", "confidence": "4;4;3;4", "soundness": "2;3;4;3", "novelty": "2;2;2;3", "presentation": "2;4;4;4", "wc_summary": "51;54;201;68", "wc_strengths": "36;57;211;17", "wc_weaknesses": "481;325;271;29", "wc_questions": "73;2;10;14", "wc_limitations": "7;2;2;5", "wc_review": "648;440;695;133", "wc_reply_reviewers": "114;0;292;0", "wc_reply_authors": "369;165;165;165", "reply_reviewers": "1;0;1;0", "reply_authors": "3;2;2;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 93.5, 62.395913327717224 ], "wc_strengths_avg": [ 80.25, 76.80291335620024 ], "wc_weaknesses_avg": [ 276.5, 162.3722574826131 ], "wc_questions_avg": [ 24.75, 28.19020219863632 ], "wc_limitations_avg": [ 4.0, 2.1213203435596424 ], "wc_review_avg": [ 479.0, 221.61565829155666 ], "wc_reply_reviewers_avg": [ 101.5, 119.42675579617827 ], "wc_reply_authors_avg": [ 216.0, 88.33459118601274 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9440620073578684397&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "llnl.gov;llnl.gov;pnnl.gov;llnl.gov", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Lawrence Livermore National Laboratory;Pacific Northwest National Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.llnl.gov;https://www.pnnl.gov", "aff_unique_abbr": "LLNL;PNNL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Dual-Stream Neural Network Explains the Functional Segregation of Dorsal and Ventral Visual Pathways in Human Brains", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72233", "id": "Fy1S3v4UAk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9d8ed3c9e27a9265ee60c8edba3dec1d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Fy1S3v4UAk", "openreview": "https://openreview.net/forum?id=Fy1S3v4UAk", "poster": "/media/PosterPDFs/NeurIPS%202023/72233.png?t=1700074414.6231747", "slides": "https://nips.cc/virtual/2023/poster/72233", "video": "https://nips.cc/virtual/2023/poster/72233", "author_site": "Minkyu Choi, Kuan Han, Xiaokai Wang, Yizhen Zhang, Zhongming Liu", "tldr": "", "abstract": "The human visual system uses two parallel pathways for spatial processing and object recognition. In contrast, computer vision systems tend to use a single feedforward pathway, rendering them less robust, adaptive, or efficient than human vision. To bridge this gap, we developed a dual-stream vision model inspired by the human eyes and brain. At the input level, the model samples two complementary visual patterns to mimic how the human eyes use magnocellular and parvocellular retinal ganglion cells to separate retinal inputs to the brain. At the backend, the model processes the separate input patterns through two branches of convolutional neural networks (CNN) to mimic how the human brain uses the dorsal and ventral cortical pathways for parallel visual processing. The first branch (WhereCNN) samples a global view to learn spatial attention and control eye movements. The second branch (WhatCNN) samples a local view to represent the object around the fixation. Over time, the two branches interact recurrently to build a scene representation from moving fixations. We compared this model with the human brains processing the same movie and evaluated their functional alignment by linear transformation. The WhereCNN and WhatCNN branches were found to differentially match the dorsal and ventral pathways of the visual cortex, respectively, primarily due to their different learning objectives, rather than their distinctions in retinal sampling or sensitivity to attention-driven eye movements. These model-based results lead us to speculate that the distinct responses and representations of the ventral and dorsal streams are more influenced by their distinct goals in visual attention and object recognition than by their specific bias or selectivity in retinal inputs. This dual-stream model takes a further step in brain-inspired computer vision, enabling parallel neural networks to actively explore and understand the visual surroundings.", "keywords": "brain-inspired AI;retina transformation;eye movements;deep neural networks", "primary_area": "", "supplementary_material": "/attachment/d78612ad22ed014e7b1f8297fb65f4b773c9b186.pdf", "author": "Minkyu Choi;Kuan Han;Xiaokai Wang;Yizhen Zhang;Zhongming Liu", "authorids": "~Minkyu_Choi1;~Kuan_Han1;~Xiaokai_Wang1;~Yizhen_Zhang1;~Zhongming_Liu1", "gender": "M;M;F;F;", "homepage": ";;;https://sites.google.com/view/yizhen-zhang/;https://libi.engin.umich.edu/", "dblp": "20/8344;181/8416;;59/2147;08/1653", "google_scholar": "QiKqiT4AAAAJ;;xM59TkMAAAAJ;z_KXE18AAAAJ;VtWD4JcAAAAJ", "orcid": ";;0000-0002-9915-4792;0000-0002-2836-2666;", "linkedin": "minkyu-choi-1b02b1131/;;;;", "or_profile": "~Minkyu_Choi1;~Kuan_Han1;~Xiaokai_Wang1;~Yizhen_Zhang1;~Zhongming_Liu1", "aff": "University of Michigan;University of Michigan;University of Michigan - Ann Arbor;University of California, San Francisco;University of Michigan", "aff_domain": "umich.edu;umich.edu;umich.edu;ucsf.edu;umich.edu", "position": "PhD student;PhD student;PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nchoi2023a,\ntitle={A Dual-Stream Neural Network Explains the Functional Segregation of Dorsal and Ventral Visual Pathways in Human Brains},\nauthor={Minkyu Choi and Kuan Han and Xiaokai Wang and Yizhen Zhang and Zhongming Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Fy1S3v4UAk}\n}", "github": "", "project": "", "reviewers": "7ncd;ZoBi;xqDS;E5bU", "pdf_size": 6673385, "rating": "5;5;7;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "74;109;51;67", "wc_strengths": "88;20;120;138", "wc_weaknesses": "181;114;160;134", "wc_questions": "446;68;3;128", "wc_limitations": "10;14;7;73", "wc_review": "799;325;341;540", "wc_reply_reviewers": "198;75;13;21", "wc_reply_authors": "246;97;12;19", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.25, 21.194043974664204 ], "wc_strengths_avg": [ 91.5, 44.997222136483046 ], "wc_weaknesses_avg": [ 147.25, 25.410381736605217 ], "wc_questions_avg": [ 161.25, 170.2400878171766 ], "wc_limitations_avg": [ 26.0, 27.248853186877426 ], "wc_review_avg": [ 501.25, 191.63816817116574 ], "wc_reply_reviewers_avg": [ 76.75, 73.95395526947831 ], "wc_reply_authors_avg": [ 93.5, 94.15545655988292 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15810234079628652761&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "umich.edu;umich.edu;umich.edu;ucsf.edu;umich.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Michigan;University of California, San Francisco", "aff_unique_dep": ";", "aff_unique_url": "https://www.umich.edu;https://www.ucsf.edu", "aff_unique_abbr": "UM;UCSF", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Ann Arbor;San Francisco", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Evolutionary Neural Architecture Search for Transformer in Knowledge Tracing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72232", "id": "G14N38AjpU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3e53d82a1113e3d240059a9195668edc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=G14N38AjpU", "openreview": "https://openreview.net/forum?id=G14N38AjpU", "poster": "/media/PosterPDFs/NeurIPS%202023/72232.png?t=1698744206.7106464", "slides": "https://nips.cc/virtual/2023/poster/72232", "video": "https://nips.cc/virtual/2023/poster/72232", "author_site": "Shangshang Yang, Xiaoshan Yu, Ye Tian, Xueming Yan, Haiping Ma, Xingyi Zhang", "tldr": "", "abstract": "Knowledge tracing (KT) aims to trace students' knowledge states by predicting whether students answer correctly on exercises. Despite the excellent performance of existing Transformer-based KT approaches, they are criticized for the manually selected input features for fusion and the defect of single global context modelling to directly capture students' forgetting behavior in KT, when the related records are distant from the current record in terms of time. To address the issues, this paper first considers adding convolution operations to the Transformer to enhance its local context modelling ability used for students' forgetting behavior, then proposes an evolutionary neural architecture search approach to automate the input feature selection and automatically determine where to apply which operation for achieving the balancing of the local/global context modelling. In the search space, the original global path containing the attention module in Transformer is replaced with the sum of a global path and a local path that could contain different convolutions, and the selection of input features is also considered. To search the best architecture, we employ an effective evolutionary algorithm to explore the search space and also suggest a search space reduction strategy to accelerate the convergence of the algorithm. Experimental results on the two largest and most challenging education datasets demonstrate the effectiveness of the architecture found by the proposed approach.", "keywords": "Knowledge tracing;intelligent education;neural architecture search;Transformer", "primary_area": "", "supplementary_material": "/attachment/ee6e609045a7982c0e4fe7c68312f10cc224c809.zip", "author": "Shangshang Yang;Xiaoshan Yu;Ye Tian;Xueming Yan;Haiping Ma;Xingyi Zhang", "authorids": "~Shangshang_Yang1;~Xiaoshan_Yu2;~Ye_Tian11;~Xueming_Yan1;~Haiping_Ma2;~Xingyi_Zhang2", "gender": "M;;M;;;M", "homepage": "https://github.com/DevilYangS;;https://www.researchgate.net/profile/Ye-Tian-84;;;https://cs.ahu.edu.cn/2023/0815/c20806a313390/page.htm", "dblp": "232/8063;;32/5495-9;;;93/1107", "google_scholar": "https://scholar.google.com.hk/citations?user=arzbiNUAAAAJ;;;;;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0003-0837-5424;;0000-0002-3487-5126;;;0000-0002-5052-000X", "linkedin": ";;;;;", "or_profile": "~Shangshang_Yang1;~Xiaoshan_Yu2;~Ye_Tian11;~Xueming_Yan1;~Haiping_Ma2;~Xingyi_Zhang2", "aff": "Anhui University;;Anhui University;;;Anhui University", "aff_domain": "ahu.edu.cn;;ahu.edu.cn;;;ahu.edu.cn", "position": "Postdoc;;Associate Professor;;;Full Professor", "bibtex": "@inproceedings{\nyang2023evolutionary,\ntitle={Evolutionary Neural Architecture Search for Transformer in Knowledge Tracing},\nauthor={Shangshang Yang and Xiaoshan Yu and Ye Tian and Xueming Yan and Haiping Ma and Xingyi Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=G14N38AjpU}\n}", "github": "", "project": "", "reviewers": "XrVw;YzcD;YAb1;rdQD;EGZn", "pdf_size": 959402, "rating": "5;5;6;6;7", "confidence": "2;3;5;3;4", "soundness": "3;3;3;2;3", "novelty": "2;3;3;2;3", "presentation": "3;2;4;3;3", "wc_summary": "17;83;61;22;85", "wc_strengths": "8;61;13;13;68", "wc_weaknesses": "139;63;82;48;99", "wc_questions": "2;145;15;46;1", "wc_limitations": "7;10;14;1;1", "wc_review": "173;362;185;130;254", "wc_reply_reviewers": "0;0;22;9;150", "wc_reply_authors": "0;0;18;17;650", "reply_reviewers": "0;0;1;1;2", "reply_authors": "1;1;2;2;3", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 53.6, 29.131426329653 ], "wc_strengths_avg": [ 32.6, 26.203816515919964 ], "wc_weaknesses_avg": [ 86.2, 31.517614122899595 ], "wc_questions_avg": [ 41.8, 54.10138630386471 ], "wc_limitations_avg": [ 6.6, 5.083306010855534 ], "wc_review_avg": [ 220.8, 81.05652348824245 ], "wc_reply_reviewers_avg": [ 36.2, 57.46790408567203 ], "wc_reply_authors_avg": [ 137.0, 256.6195627772754 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6289709020331509, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16288302919494890691&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "email": "ahu.edu.cn;;ahu.edu.cn;;;ahu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Anhui University", "aff_unique_dep": "", "aff_unique_url": "http://www.ahu.edu.cn/", "aff_unique_abbr": "AHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "On the Sublinear Regret of GP-UCB", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72231", "id": "G3aubF5Wnw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6ec2be0bb10be9a0e5db4cc2a921f301-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=G3aubF5Wnw", "openreview": "https://openreview.net/forum?id=G3aubF5Wnw", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72231", "video": "https://nips.cc/virtual/2023/poster/72231", "author_site": "Justin Whitehouse, Aaditya Ramdas, Steven Wu", "tldr": "", "abstract": "In the kernelized bandit problem, a learner aims to sequentially compute the optimum of a function lying in a reproducing kernel Hilbert space given only noisy evaluations at sequentially chosen points. In particular, the learner aims to minimize regret, which is a measure of the suboptimality of the choices made.\nArguably the most popular algorithm is the Gaussian Process Upper Confidence Bound (GP-UCB) algorithm, which involves acting based on a simple linear estimator of the unknown function.\nDespite its popularity, existing analyses of GP-UCB give a suboptimal regret rate, which fails to be sublinear for many commonly used kernels such as the Matern kernel. This has led to a longstanding open question: are existing regret analyses for GP-UCB tight, or can bounds be improved by using more sophisticated analytical techniques?\nIn this work, we resolve this open question and show that GP-UCB enjoys nearly optimal regret. In particular, our results yield sublinear regret rates for the Matern kernel, improving over the state-of-the-art analyses and partially resolving a COLT open problem posed by Vakili et al. Our improvements rely on a key technical contribution --- regularizing kernel ridge estimators in proportion to the smoothness of the underlying kernel $k$. Applying this key idea together with a largely overlooked concentration result in separable Hilbert spaces (for which we provide an independent, simplified derivation), we are able to provide a tighter analysis of the GP-UCB algorithm.", "keywords": "Kernel Bandits;Online Learning;Self-Normalized Concentration;Online Regression", "primary_area": "", "supplementary_material": "/attachment/3c793c7a58ab5119b290af1e538599057f22d026.zip", "author": "Justin Whitehouse;Aaditya Ramdas;Steven Wu", "authorids": "~Justin_Whitehouse1;~Aaditya_Ramdas2;~Steven_Wu1", "gender": ";M;M", "homepage": "https://jwhitehouse11.github.io/;http://stat.cmu.edu/~aramdas;https://zstevenwu.com/", "dblp": "218/6673;117/3518;137/8350", "google_scholar": "https://scholar.google.nl/citations?user=LxpnsSMAAAAJ;ZvFaPxUAAAAJ;MbF6rTEAAAAJ", "orcid": ";0000-0003-0497-311X;", "linkedin": ";;zstevenwu/", "or_profile": "~Justin_Whitehouse1;~Aaditya_Ramdas2;~Zhiwei_Steven_Wu1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwhitehouse2023on,\ntitle={On the Sublinear Regret of {GP}-{UCB}},\nauthor={Justin Whitehouse and Aaditya Ramdas and Steven Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=G3aubF5Wnw}\n}", "github": "", "project": "", "reviewers": "x2D4;uTye;e62U;GVZJ;CiaU;J83V", "pdf_size": 508427, "rating": "1;4;4;6;6;7", "confidence": "5;4;5;2;4;3", "soundness": "4;3;3;3;4;3", "novelty": "1;2;4;3;2;3", "presentation": "1;3;2;2;4;3", "wc_summary": "42;38;43;53;96;91", "wc_strengths": "27;45;87;59;129;1", "wc_weaknesses": "349;154;308;102;226;1", "wc_questions": "1;3;33;2;98;1", "wc_limitations": "1;1;1;1;23;1", "wc_review": "420;241;472;217;572;95", "wc_reply_reviewers": "146;127;482;0;75;0", "wc_reply_authors": "199;171;426;0;50;0", "reply_reviewers": "2;1;1;0;1;0", "reply_authors": "2;2;2;1;2;1", "rating_avg": [ 4.666666666666667, 1.9720265943665387 ], "confidence_avg": [ 3.8333333333333335, 1.0671873729054748 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.5, 0.9574271077563381 ], "presentation_avg": [ 2.5, 0.9574271077563381 ], "wc_summary_avg": [ 60.5, 23.81001189975903 ], "wc_strengths_avg": [ 58.0, 41.356176483487125 ], "wc_weaknesses_avg": [ 190.0, 119.20710269666542 ], "wc_questions_avg": [ 23.0, 35.435387209210326 ], "wc_limitations_avg": [ 4.666666666666667, 8.198915917499228 ], "wc_review_avg": [ 336.1666666666667, 164.57765813000123 ], "wc_reply_reviewers_avg": [ 138.33333333333334, 163.59367007585865 ], "wc_reply_authors_avg": [ 141.0, 149.1822152045388 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.6871842709362768 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7391491482878364, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=570230909577835382&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cs.cmu.edu;cmu.edu;cmu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Implicit Bias of Gradient Descent for Two-layer ReLU and Leaky ReLU Networks on Nearly-orthogonal Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72230", "id": "G560qr59Gi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/602f5c1b803c53b2aaf0b3864bf3383a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=G560qr59Gi", "openreview": "https://openreview.net/forum?id=G560qr59Gi", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72230", "video": "https://nips.cc/virtual/2023/poster/72230", "author_site": "Yiwen Kou, Zixiang Chen, Quanquan Gu", "tldr": "", "abstract": "The implicit bias towards solutions with favorable properties is believed to be a key reason why neural networks trained by gradient-based optimization can generalize well. While the implicit bias of gradient flow has been widely studied for homogeneous neural networks (including ReLU and leaky ReLU networks), the implicit bias of gradient descent is currently only understood for smooth neural networks. Therefore, implicit bias in non-smooth neural networks trained by gradient descent remains an open question. In this paper, we aim to answer this question by studying the implicit bias of gradient descent for training two-layer fully connected (leaky) ReLU neural networks. We showed that when the training data are nearly-orthogonal, for leaky ReLU activation function, gradient descent will find a network with a stable rank that converges to $1$, whereas for ReLU activation function, gradient descent will find a neural network with a stable rank that is upper bounded by a constant. Additionally, we show that gradient descent will find a neural network such that all the training data points have the same normalized margin asymptotically. Experiments on both synthetic and real data backup our theoretical findings.", "keywords": "ReLU Neural Networks;Implicit Bias;Deep Learning Theory", "primary_area": "", "supplementary_material": "/attachment/e4a50dec775a6ca71a65b003efd05c51de21a475.zip", "author": "Yiwen Kou;Zixiang Chen;Quanquan Gu", "authorids": "~Yiwen_Kou1;~Zixiang_Chen1;~Quanquan_Gu1", "gender": "F;M;M", "homepage": "https://evankou.github.io/;https://sites.google.com/view/zxchen;http://web.cs.ucla.edu/~qgu/", "dblp": "323/9058;137/3624;50/4597", "google_scholar": "https://scholar.google.com/citations?hl=en;6nrCHr0AAAAJ;GU9HgNAAAAAJ", "orcid": ";;", "linkedin": "yiwen-kou-5a444916b/;;", "or_profile": "~Yiwen_Kou1;~Zixiang_Chen1;~Quanquan_Gu1", "aff": "University of California, Los Angeles; University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;cs.ucla.edu;cs.ucla.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nkou2023implicit,\ntitle={Implicit Bias of Gradient Descent for Two-layer Re{LU} and Leaky Re{LU} Networks on Nearly-orthogonal Data},\nauthor={Yiwen Kou and Zixiang Chen and Quanquan Gu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=G560qr59Gi}\n}", "github": "", "project": "", "reviewers": "4Kvz;c8EH;6oj7;BcDb", "pdf_size": 867781, "rating": "6;6;7;7", "confidence": "4;3;4;3", "soundness": "3;3;4;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "51;113;112;168", "wc_strengths": "25;60;164;91", "wc_weaknesses": "55;113;254;186", "wc_questions": "2;60;4;21", "wc_limitations": "1;1;5;9", "wc_review": "134;347;539;475", "wc_reply_reviewers": "0;10;85;25", "wc_reply_authors": "0;0;123;11", "reply_reviewers": "0;1;2;1", "reply_authors": "1;1;3;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 111.0, 41.394444071638404 ], "wc_strengths_avg": [ 85.0, 51.2396330978277 ], "wc_weaknesses_avg": [ 152.0, 74.98333148106984 ], "wc_questions_avg": [ 21.75, 23.284920012746447 ], "wc_limitations_avg": [ 4.0, 3.3166247903554 ], "wc_review_avg": [ 373.75, 154.7213220600186 ], "wc_reply_reviewers_avg": [ 30.0, 32.977264895682296 ], "wc_reply_authors_avg": [ 33.5, 51.867619956963516 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5287539627309809955&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ucla.edu;cs.ucla.edu;cs.ucla.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Pick-a-Pic: An Open Dataset of User Preferences for Text-to-Image Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72229", "id": "G5RwHpBUv0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/73aacd8b3b05b4b503d58310b523553c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=G5RwHpBUv0", "openreview": "https://openreview.net/forum?id=G5RwHpBUv0", "poster": "/media/PosterPDFs/NeurIPS%202023/72229.png?t=1701797912.8245661", "slides": "https://nips.cc/virtual/2023/poster/72229", "video": "https://nips.cc/virtual/2023/poster/72229", "author_site": "Yuval Kirstain, Adam Polyak, Uriel Singer, Shahbuland Matiana, Joe Penna, Omer Levy", "tldr": "", "abstract": "The ability to collect a large dataset of human preferences from text-to-image users is usually limited to companies, making such datasets inaccessible to the public. To address this issue, we create a web app that enables text-to-image users to generate images and specify their preferences. Using this web app we build Pick-a-Pic, a large, open dataset of text-to-image prompts and real users\u2019 preferences over generated images. We leverage this dataset to train a CLIP-based scoring function, PickScore, which exhibits superhuman performance on the task of predicting human preferences. Then, we test PickScore\u2019s ability to perform model evaluation and observe that it correlates better with human rankings than other automatic evaluation metrics. Therefore, we recommend using PickScore for evaluating future text-to-image generation models, and using Pick-a-Pic prompts as a more relevant dataset than MS-COCO. Finally, we demonstrate how PickScore can enhance existing text-to-image models via ranking.", "keywords": "text-to-image;human-preferences;dataset", "primary_area": "", "supplementary_material": "/attachment/24e078c419aeddd841b30d52bc0a1ac83fc71da7.pdf", "author": "Yuval Kirstain;Adam Polyak;Uriel Singer;Shahbuland Matiana;Joe Penna;Omer Levy", "authorids": "~Yuval_Kirstain1;~Adam_Polyak1;~Uriel_Singer1;~Shahbuland_Matiana1;~Joe_Penna1;~Omer_Levy1", "gender": ";;;M;M;M", "homepage": "https://www.yuvalkirstain.com/;;https://il.linkedin.com/in/urielsinger;;https://en.wikipedia.org/wiki/Joe_Penna;", "dblp": "283/4382;;238/0243;304/2425;;117/4866", "google_scholar": "sJKiJQwAAAAJ;;nIEep3cAAAAJ;JUwVT7cAAAAJ;;PZVd2h8AAAAJ", "orcid": ";;0000-0001-8451-8533;;;0000-0001-7300-8191", "linkedin": "https://www.linkedin.com/mwlite/in/yuval-kirstain-ab2359187;;;shahbuland/;joe-penna-74000011b/;", "or_profile": "~Yuval_Kirstain1;~Adam_Polyak1;~Uriel_Singer1;~Shahbuland_Matiana1;~Joe_Penna1;~Omer_Levy1", "aff": "Tel Aviv University;;Meta AI Research;University of Waterloo;;Tel Aviv University", "aff_domain": "tau.ac.il;;meta.com;uwaterloo.ca;;tau.ac.il", "position": "PhD student;;Researcher;Undergrad student;;Senior Lecturer", "bibtex": "@inproceedings{\nkirstain2023pickapic,\ntitle={Pick-a-Pic: An Open Dataset of User Preferences for Text-to-Image Generation},\nauthor={Yuval Kirstain and Adam Polyak and Uriel Singer and Shahbuland Matiana and Joe Penna and Omer Levy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=G5RwHpBUv0}\n}", "github": "", "project": "", "reviewers": "UkC5;c5Um;UW8i;hbT1", "pdf_size": 6755208, "rating": "6;6;7;8", "confidence": "5;4;4;5", "soundness": "3;3;4;3", "novelty": "3;3;3;4", "presentation": "3;3;4;4", "wc_summary": "60;48;145;60", "wc_strengths": "43;43;91;90", "wc_weaknesses": "89;243;71;99", "wc_questions": "267;3;59;20", "wc_limitations": "62;1;68;18", "wc_review": "521;338;434;287", "wc_reply_reviewers": "16;0;26;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 78.25, 38.84826250940961 ], "wc_strengths_avg": [ 66.75, 23.75263143316967 ], "wc_weaknesses_avg": [ 125.5, 68.57659950741214 ], "wc_questions_avg": [ 87.25, 105.74586280323217 ], "wc_limitations_avg": [ 37.25, 28.472574523565655 ], "wc_review_avg": [ 395.0, 89.87491307367145 ], "wc_reply_reviewers_avg": [ 13.5, 9.313968005098578 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 355, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15735538213898405416&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tau.ac.il;;meta.com;uwaterloo.ca;;tau.ac.il", "author_num": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Tel Aviv University;Meta;University of Waterloo", "aff_unique_dep": ";Meta AI Research;", "aff_unique_url": "https://www.tau.ac.il;https://meta.com;https://uwaterloo.ca", "aff_unique_abbr": "TAU;Meta AI;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Israel;United States;Canada" }, { "id": "G6yq9v8O0U", "title": "Factorized Tensor Networks for Multi-Task and Multi-Domain Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Multi-task and multi-domain learning methods seek to learn multiple tasks/domains, jointly or one after another, using a single unified network. The key challenge and opportunity is to exploit shared information across tasks and domains to improve the efficiency of the unified network. The efficiency can be in terms of accuracy, storage cost, computation, or sample complexity. In this paper, we propose a factorized tensor network (FTN) that can achieve accuracy comparable to independent single-task/domain networks with a small number of additional parameters. FTN uses a frozen backbone network from a source model and incrementally adds task/domain-specific low-rank tensor factors to the shared frozen network. This approach can adapt to a large number of target domains and tasks without catastrophic forgetting. Furthermore, FTN requires a significantly smaller number of task-specific parameters compared to existing methods. We performed experiments on widely used multi-domain and multi-task datasets. We observed that FTN achieves similar accuracy as single-task/domain methods while using 2--6\\% additional parameters per task. We also demonstrate the effectiveness of FTN with domain adaptation for image generation.", "keywords": "multi-task learning;multi-domain learning;low-rank tensors;multipath networks", "primary_area": "", "supplementary_material": "/attachment/63c930a327b938b15d280a0cd0f86232ecf27af1.pdf", "author": "Yash Garg;Nebiyou Yismaw;Rakib Hyder;Ashley Prater-Bennette;M. Salman Asif", "authorids": "~Yash_Garg3;~Nebiyou_Yismaw1;~Rakib_Hyder1;~Ashley_Prater-Bennette1;~M._Salman_Asif1", "gender": "M;;M;F;", "homepage": ";;;;", "dblp": ";;212/6501.html;158/9018;", "google_scholar": "Lz5Jao4AAAAJ;;1q1a4wsAAAAJ;f1WPBE8AAAAJ;", "orcid": "0000-0001-5584-3169;;0000-0003-4191-301X;;", "linkedin": "yash-garg-881b73137/;;rakib-hyder-b03123a2;;", "or_profile": "~Yash_Garg3;~Nebiyou_Yismaw1;~Rakib_Hyder1;~Ashley_Prater-Bennette1;~M._Salman_Asif1", "aff": "University of California, Riverside;;Oppo, Seattle, USA;Air Force Research Laboratory;", "aff_domain": "ucr.edu;;oppo.com;us.af.mil;", "position": "PhD student;;Researcher;Principal Researcher;", "bibtex": "@misc{\ngarg2023factorized,\ntitle={Factorized Tensor Networks for Multi-Task and Multi-Domain Learning},\nauthor={Yash Garg and Nebiyou Yismaw and Rakib Hyder and Ashley Prater-Bennette and M. Salman Asif},\nyear={2023},\nurl={https://openreview.net/forum?id=G6yq9v8O0U}\n}", "github": "", "project": "", "reviewers": "TB3s;nJmA;yeXy;xDWK", "site": "https://openreview.net/forum?id=G6yq9v8O0U", "pdf_size": 537595, "rating": "6;6;6;6", "confidence": "5;4;4;3", "soundness": "3;3;3;3", "novelty": "3;2;3;2", "presentation": "3;3;3;3", "wc_summary": "47;67;86;213", "wc_strengths": "21;169;74;63", "wc_weaknesses": "644;247;50;164", "wc_questions": "9;55;21;73", "wc_limitations": "15;13;8;107", "wc_review": "736;551;239;620", "wc_reply_reviewers": "197;78;72;69", "wc_reply_authors": "2321;53;58;37", "reply_reviewers": "2;1;1;1", "reply_authors": "5;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 103.25, 64.84741706498417 ], "wc_strengths_avg": [ 81.75, 54.117349343810254 ], "wc_weaknesses_avg": [ 276.25, 223.54236175722937 ], "wc_questions_avg": [ 39.5, 25.666125535421198 ], "wc_limitations_avg": [ 35.75, 41.215136782497765 ], "wc_review_avg": [ 536.5, 184.04415231134078 ], "wc_reply_reviewers_avg": [ 104.0, 53.791263231123324 ], "wc_reply_authors_avg": [ 617.25, 983.6911037007502 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17284805109434481145&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Riverside;OPPO;Air Force Research Laboratory", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucr.edu;;https://www.afrl.af.mil/", "aff_unique_abbr": "UCR;;AFRL", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Riverside;Seattle;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CSLP-AE: A Contrastive Split-Latent Permutation Autoencoder Framework for Zero-Shot Electroencephalography Signal Conversion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72228", "id": "G7Y145tm2F", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2aab54135bd206ef6d4949ce17528d98-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=G7Y145tm2F", "openreview": "https://openreview.net/forum?id=G7Y145tm2F", "poster": "/media/PosterPDFs/NeurIPS%202023/72228.png?t=1702326466.7278285", "slides": "https://nips.cc/virtual/2023/poster/72228", "video": "https://nips.cc/virtual/2023/poster/72228", "author_site": "Anders N\u00f8rskov, Alexander Neergaard Zahid, Morten M\u00f8rup", "tldr": "", "abstract": "Electroencephalography (EEG) is a prominent non-invasive neuroimaging technique providing insights into brain function. Unfortunately, EEG data exhibit a high degree of noise and variability across subjects hampering generalizable signal extraction. Therefore, a key aim in EEG analysis is to extract the underlying neural activation (content) as well as to account for the individual subject variability (style). We hypothesize that the ability to convert EEG signals between tasks and subjects requires the extraction of latent representations accounting for content and style. Inspired by recent advancements in voice conversion technologies, we propose a novel contrastive split-latent permutation autoencoder (CSLP-AE) framework that directly optimizes for EEG conversion. Importantly, the latent representations are guided using contrastive learning to promote the latent splits to explicitly represent subject (style) and task (content). We contrast CSLP-AE to conventional supervised, unsupervised (AE), and self-supervised (contrastive learning) training and find that the proposed approach provides favorable generalizable characterizations of subject and task. Importantly, the procedure also enables zero-shot conversion between unseen subjects. While the present work only considers conversion of EEG, the proposed CSLP-AE provides a general framework for signal conversion and extraction of content (task activation) and style (subject variability) components of general interest for the modeling and analysis of biological signals.", "keywords": "zero-shot conversion;representations learning;contrastive learning;electroencephalography;autoencoder;subject variability;permutation invariant training", "primary_area": "", "supplementary_material": "/attachment/d21760855327552a4b60232b568d39d19dfc89ef.zip", "author": "Anders Vestergaard N\u00f8rskov;Alexander Neergaard Zahid;Morten M\u00f8rup", "authorids": "~Anders_Vestergaard_N\u00f8rskov1;~Alexander_Neergaard_Zahid1;~Morten_M\u00f8rup1", "gender": "Not Specified;M;M", "homepage": ";https://neergaard.github.io;http://www.compute.dtu.dk/~mmor", "dblp": "361/0066;355/9521;69/1866", "google_scholar": ";BS6yUOEAAAAJ;RQonsgMAAAAJ", "orcid": "0009-0000-5675-917X;0000-0001-6881-9766;0000-0003-4985-4368", "linkedin": ";alexanderneergaard/;", "or_profile": "~Anders_Vestergaard_N\u00f8rskov1;~Alexander_Neergaard_Zahid1;~Morten_M\u00f8rup1", "aff": "Technical University of Denmark;Technical University of Denmark;Technical University of Denmark", "aff_domain": "dtu.dk;dtu.dk;dtu.dk", "position": "MS student;Postdoc;Professor", "bibtex": "@inproceedings{\nn{\\o}rskov2023cslpae,\ntitle={{CSLP}-{AE}: A Contrastive Split-Latent Permutation Autoencoder Framework for Zero-Shot Electroencephalography Signal Conversion},\nauthor={Anders Vestergaard N{\\o}rskov and Alexander Neergaard Zahid and Morten M{\\o}rup},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=G7Y145tm2F}\n}", "github": "", "project": "", "reviewers": "MXTP;4KfP;qodj;VVQF", "pdf_size": 9790069, "rating": "5;6;6;6", "confidence": "4;4;3;3", "soundness": "2;3;2;3", "novelty": "2;3;2;2", "presentation": "3;3;2;3", "wc_summary": "219;97;81;66", "wc_strengths": "144;26;115;83", "wc_weaknesses": "427;128;180;257", "wc_questions": "99;125;104;300", "wc_limitations": "4;13;59;7", "wc_review": "893;389;539;713", "wc_reply_reviewers": "15;101;15;173", "wc_reply_authors": "0;389;22;467", "reply_reviewers": "1;1;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 115.75, 60.61095198064456 ], "wc_strengths_avg": [ 92.0, 43.78926809162263 ], "wc_weaknesses_avg": [ 248.0, 113.07740711565684 ], "wc_questions_avg": [ 157.0, 83.13543167627147 ], "wc_limitations_avg": [ 20.75, 22.320114247019436 ], "wc_review_avg": [ 633.5, 188.6604092012948 ], "wc_reply_reviewers_avg": [ 76.0, 66.09841147864296 ], "wc_reply_authors_avg": [ 219.5, 210.45961607871473 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10562754419150347971&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "dtu.dk;dtu.dk;dtu.dk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technical University of Denmark", "aff_unique_dep": "", "aff_unique_url": "https://www.tek.dk", "aff_unique_abbr": "DTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "title": "On the Pareto Front of Multilingual Neural Machine Translation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72227", "id": "G7sQlfTzmY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/690eb240baf1180b69dac48fc905c918-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=G7sQlfTzmY", "openreview": "https://openreview.net/forum?id=G7sQlfTzmY", "poster": "/media/PosterPDFs/NeurIPS%202023/72227.png?t=1701423484.9971666", "slides": "https://nips.cc/virtual/2023/poster/72227", "video": "https://nips.cc/virtual/2023/poster/72227", "author_site": "Liang Chen, Shuming Ma, Dongdong Zhang, Furu Wei, Baobao Chang", "tldr": "", "abstract": "In this work, we study how the performance of a given direction changes with its sampling ratio in Multilingual Neural Machine Translation (MNMT). By training over 200 multilingual models with various model sizes, data sizes, and language directions, we find it interesting that the performance of certain translation direction does not always improve with the increase of its weight in the multi-task optimization objective. Accordingly, scalarization method leads to a multitask trade-off front that deviates from the traditional Pareto front when there exists data imbalance in the training corpus, which poses a great challenge to improve the overall performance of all directions. Based on our observations, we propose the Double Power Law to predict the unique performance trade-off front in MNMT, which is robust across various languages, data adequacy, and the number of tasks. Finally, we formulate the sample ratio selection problem in MNMT as an optimization problem based on the Double Power Law. Extensive experiments show that it achieves better performance than temperature searching and gradient manipulation methods with only 1/5 to 1/2 of the total training budget. We release the code at https://github.com/pkunlp-icler/ParetoMNMT for reproduction.", "keywords": "Multilinugal Neural Machine Translation;Multitask Learning;Pareto Optimization", "primary_area": "", "supplementary_material": "", "author": "Liang Chen;Shuming Ma;Dongdong Zhang;Furu Wei;Baobao Chang", "authorids": "~Liang_Chen10;~Shuming_Ma1;~Dongdong_Zhang4;~Furu_Wei1;~Baobao_Chang1", "gender": "M;;M;M;M", "homepage": "https://chenllliang.github.io;https://www.microsoft.com/en-us/research/people/shumma/;https://www.microsoft.com/en-us/research/people/dozhang/;https://www.microsoft.com/en-us/research/people/fuwei/;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6027", "dblp": "01/5394-24;;02/621-1.html;72/5870;91/6051", "google_scholar": "lMKPaTYAAAAJ;;w2qu71oAAAAJ;G-V1VpwAAAAJ;LaKNyhQAAAAJ", "orcid": ";;;;0000-0003-2824-6750", "linkedin": ";;;;", "or_profile": "~Liang_Chen10;~Shuming_Ma1;~Dongdong_Zhang4;~Furu_Wei1;~Baobao_Chang1", "aff": "Peking University;Microsoft;Microsoft Research Asia;Microsoft Research;Peking University", "aff_domain": "pku.edu.cn;microsoft.com;microsoft.com;microsoft.com;pku.edu.cn", "position": "PhD student;Researcher;Researcher;Distinguished Scientist;Associate Professor", "bibtex": "@inproceedings{\nchen2023on,\ntitle={On the Pareto Front of Multilingual Neural Machine Translation},\nauthor={Liang Chen and Shuming Ma and Dongdong Zhang and Furu Wei and Baobao Chang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=G7sQlfTzmY}\n}", "github": "", "project": "", "reviewers": "hKkr;CMx8;wMmE;mnLR;4R4K", "pdf_size": 1392087, "rating": "5;6;6;6;7", "confidence": "4;4;4;4;4", "soundness": "3;3;3;2;3", "novelty": "2;2;3;3;3", "presentation": "3;3;2;3;3", "wc_summary": "173;81;139;77;124", "wc_strengths": "96;91;115;56;66", "wc_weaknesses": "237;264;153;57;84", "wc_questions": "26;29;47;15;16", "wc_limitations": "5;29;2;1;14", "wc_review": "537;494;456;206;304", "wc_reply_reviewers": "217;44;188;12;24", "wc_reply_authors": "470;44;96;56;25", "reply_reviewers": "2;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 118.8, 36.190606516056064 ], "wc_strengths_avg": [ 84.8, 21.25464655081331 ], "wc_weaknesses_avg": [ 159.0, 81.45428165541699 ], "wc_questions_avg": [ 26.6, 11.568923891183656 ], "wc_limitations_avg": [ 10.2, 10.457533169921097 ], "wc_review_avg": [ 399.4, 124.57222804461675 ], "wc_reply_reviewers_avg": [ 97.0, 87.22843573055749 ], "wc_reply_authors_avg": [ 138.2, 167.5212225361312 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8096739862854386572&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;microsoft.com;microsoft.com;microsoft.com;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "Peking University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "Peking U;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Guide Your Agent with Adaptive Multimodal Rewards", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72226", "id": "G8nal7MpIQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aa933b5abc1be30baece1d230ec575a7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=G8nal7MpIQ", "openreview": "https://openreview.net/forum?id=G8nal7MpIQ", "poster": "/media/PosterPDFs/NeurIPS%202023/72226.png?t=1702387404.3419616", "slides": "https://nips.cc/virtual/2023/poster/72226", "video": "https://nips.cc/virtual/2023/poster/72226", "author_site": "Changyeon Kim, Younggyo Seo, Hao Liu, Lisa Lee, Jinwoo Shin, Honglak Lee, Kimin Lee", "tldr": "", "abstract": "Developing an agent capable of adapting to unseen environments remains a difficult challenge in imitation learning. This work presents Adaptive Return-conditioned Policy (ARP), an efficient framework designed to enhance the agent's generalization ability using natural language task descriptions and pre-trained multimodal encoders. Our key idea is to calculate a similarity between visual observations and natural language instructions in the pre-trained multimodal embedding space (such as CLIP) and use it as a reward signal. We then train a return-conditioned policy using expert demonstrations labeled with multimodal rewards. Because the multimodal rewards provide adaptive signals at each timestep, our ARP effectively mitigates the goal misgeneralization. This results in superior generalization performances even when faced with unseen text instructions, compared to existing text-conditioned policies. To improve the quality of rewards, we also introduce a fine-tuning method for pre-trained multimodal encoders, further enhancing the performance. Video demonstrations and source code are available on the project website: \\url{https://sites.google.com/view/2023arp}.", "keywords": "Reinforcement Learning;Multimodal Representation;Imitation Learning", "primary_area": "", "supplementary_material": "/attachment/b88944f51905a5f3bda0d958ff72c75b05b582ab.zip", "author": "Changyeon Kim;Younggyo Seo;Hao Liu;Lisa Lee;Jinwoo Shin;Honglak Lee;Kimin Lee", "authorids": "~Changyeon_Kim1;~Younggyo_Seo1;~Hao_Liu1;~Lisa_Lee1;~Jinwoo_Shin1;~Honglak_Lee2;~Kimin_Lee1", "gender": "M;M;M;M;M;M;F", "homepage": "https://changyeon.page;https://younggyo.me/;https://sites.google.com/site/mijirim/;https://sites.google.com/view/kiminlee;https://haoliu.ai;http://web.eecs.umich.edu/~honglak;http://leelisa.com/", "dblp": "271/7767;265/5586;31/7062;183/6849;09/3214-55;58/2562;97/9403", "google_scholar": "vEPeAaYAAAAJ;tI1-YwIAAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ;92M8xv4AAAAJ;wtK4Yh4AAAAJ;fmSHtE8AAAAJ;eGIw04UAAAAJ", "orcid": ";;;;;;0000-0002-9791-484X", "linkedin": "changyeon-kim-29972b194/;;;;;;lileee", "or_profile": "~Changyeon_Kim1;~Younggyo_Seo1;~Jinwoo_Shin1;~Kimin_Lee1;~Hao_Liu10;~Honglak_Lee1;~Lisa_Seung-Yeon_Lee1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Google;University of California, Berkeley;University of Michigan;", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;google.com;berkeley.edu;umich.edu;", "position": "MS student;PhD student;Full Professor;Researcher;PhD student;Associate Professor;", "bibtex": "@inproceedings{\nkim2023guide,\ntitle={Guide Your Agent with Adaptive Multimodal Rewards},\nauthor={Changyeon Kim and Younggyo Seo and Hao Liu and Lisa Lee and Jinwoo Shin and Honglak Lee and Kimin Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=G8nal7MpIQ}\n}", "github": "", "project": "", "reviewers": "9gB5;HCX5;nhjK;qrbz", "pdf_size": 5455493, "rating": "4;6;6;7", "confidence": "4;4;3;4", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "3;3;3;4", "wc_summary": "87;130;50;120", "wc_strengths": "47;140;48;59", "wc_weaknesses": "67;422;151;191", "wc_questions": "28;136;76;99", "wc_limitations": "16;73;74;33", "wc_review": "245;901;399;502", "wc_reply_reviewers": "47;251;147;94", "wc_reply_authors": "65;200;504;317", "reply_reviewers": "1;2;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 96.75, 31.33189269737786 ], "wc_strengths_avg": [ 73.5, 38.68139087468288 ], "wc_weaknesses_avg": [ 207.75, 131.54348140443904 ], "wc_questions_avg": [ 84.75, 39.13677937694925 ], "wc_limitations_avg": [ 49.0, 25.228951623085727 ], "wc_review_avg": [ 511.75, 242.63076371309555 ], "wc_reply_reviewers_avg": [ 134.75, 75.86954263734559 ], "wc_reply_authors_avg": [ 271.5, 161.15287772795122 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16302201948863837556&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;google.com;berkeley.edu;umich.edu;", "author_num": 7, "aff_unique_index": "0;0;0;1;2;3", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Google;University of California, Berkeley;University of Michigan", "aff_unique_dep": ";Google;;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.google.com;https://www.berkeley.edu;https://www.umich.edu", "aff_unique_abbr": "KAIST;Google;UC Berkeley;UM", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Berkeley", "aff_country_unique_index": "0;0;0;1;1;1", "aff_country_unique": "South Korea;United States" }, { "title": "KAKURENBO: Adaptively Hiding Samples in Deep Neural Network Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72225", "id": "GAsRl2ElHk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7712b1075f5e0eae297702845714098f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GAsRl2ElHk", "openreview": "https://openreview.net/forum?id=GAsRl2ElHk", "poster": "/media/PosterPDFs/NeurIPS%202023/72225.png?t=1701944839.367833", "slides": "https://nips.cc/virtual/2023/poster/72225", "video": "https://nips.cc/virtual/2023/poster/72225", "author_site": "Truong Thao Nguyen, Balazs Gerofi, Edgar Josafat Martinez-Noriega, Fran\u00e7ois Trahay, Mohamed Wahib", "tldr": "", "abstract": "This paper proposes a method for hiding the least-important samples during the training of deep neural networks to increase efficiency, i.e., to reduce the cost of training. Using information about the loss and prediction confidence during training, we adaptively find samples to exclude in a given epoch based on their contribution to the overall learning process, without significantly degrading accuracy. We explore the converge properties when accounting for the reduction in the number of SGD updates. Empirical results on various large-scale datasets and models used directly in image classification and segmentation show that while the with-replacement importance sampling algorithm performs poorly on large datasets, our method can reduce total training time by up to 22\\% impacting accuracy only by 0.4\\% compared to the baseline.", "keywords": "Deep learning;Sampling", "primary_area": "", "supplementary_material": "/attachment/64ed5b76b2a5a3c0507782a2cb15db2cefeaf02e.pdf", "author": "Thao Nguyen Truong;Balazs Gerofi;Edgar Josafat Martinez-Noriega;Fran\u00e7ois Trahay;Mohamed Wahib", "authorids": "~Thao_Nguyen_Truong1;~Balazs_Gerofi1;~Edgar_Josafat_Martinez-Noriega1;~Fran\u00e7ois_Trahay1;~Mohamed_Wahib1", "gender": "M;M;M;M;M", "homepage": "https://researchmap.jp/nguyentt;http://bgerofi.github.io/;;https://trahay.wp.imtbs-tsp.eu/;https://www.r-ccs.riken.jp/en/research/labs/hpaisrt/", "dblp": "233/1462.html;92/10023.html;;59/6318;10/6150.html", "google_scholar": ";BV9mY_oAAAAJ;bcG_hJkAAAAJ;https://scholar.google.fr/citations?user=DiqC1uUAAAAJ;C3fmEegAAAAJ", "orcid": "0000-0003-3641-374X;;0000-0001-6486-5466;0000-0001-7329-1812;0000-0002-7165-2095", "linkedin": ";;edgar-josafat-martinez-noriega-abb76378/;;", "or_profile": "~Thao_Nguyen_Truong1;~Balazs_Gerofi1;~Edgar_Josafat_Martinez-Noriega1;~Fran\u00e7ois_Trahay1;~Mohamed_Wahib1", "aff": "AIST, National Institute of Advanced Industrial Science and Technology;Intel;AIST, National Institute of Advanced Industrial Science and Technology;Telecom SudParis;RIKEN", "aff_domain": "aist.go.jp;intel.com;aist.go.jp;telecom-sudparis.eu;riken.jp", "position": "Researcher;Researcher;Postdoc;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\ntruong2023kakurenbo,\ntitle={{KAKURENBO}: Adaptively Hiding Samples in Deep Neural Network Training},\nauthor={Thao Nguyen Truong and Balazs Gerofi and Edgar Josafat Martinez-Noriega and Fran{\\c{c}}ois Trahay and Mohamed Wahib},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GAsRl2ElHk}\n}", "github": "", "project": "", "reviewers": "wMxv;apYw;Zijr;gHmy;QXg1", "pdf_size": 2325153, "rating": "4;5;6;6;7", "confidence": "5;4;4;4;3", "soundness": "2;3;2;3;4", "novelty": "2;2;3;3;4", "presentation": "2;3;3;4;3", "wc_summary": "80;76;100;72;137", "wc_strengths": "172;23;45;62;69", "wc_weaknesses": "482;123;47;100;65", "wc_questions": "2;57;94;87;13", "wc_limitations": "3;13;7;10;13", "wc_review": "739;292;293;331;297", "wc_reply_reviewers": "65;85;63;53;26", "wc_reply_authors": "208;53;0;32;0", "reply_reviewers": "2;2;1;1;1", "reply_authors": "2;2;1;2;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 93.0, 24.01666088364492 ], "wc_strengths_avg": [ 74.2, 51.409726706139956 ], "wc_weaknesses_avg": [ 163.4, 161.48634617205258 ], "wc_questions_avg": [ 50.6, 37.483863194713535 ], "wc_limitations_avg": [ 9.2, 3.8157568056677826 ], "wc_review_avg": [ 390.4, 174.89608343241997 ], "wc_reply_reviewers_avg": [ 58.4, 19.24162155328911 ], "wc_reply_authors_avg": [ 58.6, 77.36562544179424 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9302605094190635, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13435211815238795600&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "aist.go.jp;intel.com;aist.go.jp;telecom-sudparis.eu;riken.jp", "author_num": 5, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "National Institute of Advanced Industrial Science and Technology;Intel;Telecom SudParis;RIKEN", "aff_unique_dep": ";Intel Corporation;;", "aff_unique_url": "https://www.aist.go.jp;https://www.intel.com;https://www.telecom-sudparis.eu;https://www.riken.jp", "aff_unique_abbr": "AIST;Intel;TSP;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0", "aff_country_unique": "Japan;United States;France" }, { "title": "TaskMet: Task-driven Metric Learning for Model Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72224", "id": "GCY9C43A4L", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/91a5742235f70ae846436d9780e9f1d4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GCY9C43A4L", "openreview": "https://openreview.net/forum?id=GCY9C43A4L", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72224", "video": "https://nips.cc/virtual/2023/poster/72224", "author_site": "Dishank Bansal, Ricky T. Q. Chen, Mustafa Mukadam, Brandon Amos", "tldr": "", "abstract": "Deep learning models are often used with some downstream task. Models solely trained to achieve accurate predictions may struggle to perform well on the desired downstream tasks. We propose using the task loss to learn a metric which parameterizes a loss to train the model. This approach does not alter the optimal prediction model itself, but rather changes the model learning to emphasize the information important for the downstream task. This enables us to achieve the best of both worlds: a prediction model trained in the original prediction space while also being valuable for the desired downstream task. We validate our approach through experiments conducted in two main settings: 1) decision-focused model learning scenarios involving portfolio optimization and budget allocation, and 2) reinforcement learning in noisy environments with distracting states.", "keywords": "task-based learning;decision-focused learning", "primary_area": "", "supplementary_material": "/attachment/ab59e6d71a55882292a9dbc8d3367984ffecee03.pdf", "author": "Dishank Bansal;Ricky T. Q. Chen;Mustafa Mukadam;Brandon Amos", "authorids": "~Dishank_Bansal1;~Ricky_T._Q._Chen1;~Mustafa_Mukadam1;~Brandon_Amos1", "gender": "M;M;;M", "homepage": "https://dishank-b.github.io/;http://www.mustafamukadam.com;http://bamos.github.io;http://www.rtqichen.com", "dblp": "213/7557;;133/4801.html;228/6698", "google_scholar": "https://scholar.google.ca/citations?user=OxQx8A8AAAAJ;yYpm9LoAAAAJ;d8gdZR4AAAAJ;7MxQd6UAAAAJ", "orcid": ";;;", "linkedin": "imdishank/;mhmukadam/;bdamos;", "or_profile": "~Dishank_Bansal1;~Mustafa_Mukadam1;~Brandon_Amos1;~Tian_Qi_Chen2", "aff": "Meta AI;Meta AI;Meta;FAIR Labs, Meta AI", "aff_domain": "meta.com;meta.com;meta.com;meta.com", "position": "Researcher;Researcher;Research Scientist;Researcher", "bibtex": "@inproceedings{\nbansal2023taskmet,\ntitle={TaskMet: Task-driven Metric Learning for Model Learning},\nauthor={Dishank Bansal and Ricky T. Q. Chen and Mustafa Mukadam and Brandon Amos},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GCY9C43A4L}\n}", "github": "", "project": "", "reviewers": "jBQF;WKyg;wsqn;uXLc", "pdf_size": 800987, "rating": "5;6;7;7", "confidence": "3;3;4;2", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;3;3", "wc_summary": "77;81;40;74", "wc_strengths": "25;40;25;72", "wc_weaknesses": "288;24;9;32", "wc_questions": "56;63;354;51", "wc_limitations": "13;5;11;10", "wc_review": "459;213;439;239", "wc_reply_reviewers": "99;47;17;17", "wc_reply_authors": "128;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 68.0, 16.355427233796124 ], "wc_strengths_avg": [ 40.5, 19.189841062395487 ], "wc_weaknesses_avg": [ 88.25, 115.62087830491515 ], "wc_questions_avg": [ 131.0, 128.81964135953802 ], "wc_limitations_avg": [ 9.75, 2.947456530637899 ], "wc_review_avg": [ 337.5, 112.10151649286463 ], "wc_reply_reviewers_avg": [ 45.0, 33.49626844888845 ], "wc_reply_authors_avg": [ 32.0, 55.42562584220407 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7304276973341447232&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "meta.com;meta.com;meta.com;meta.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta AI", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Facing Off World Model Backbones: RNNs, Transformers, and S4", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72223", "id": "GDYuzX0rwj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e6c65eb9b56719c1aa45ff73874de317-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GDYuzX0rwj", "openreview": "https://openreview.net/forum?id=GDYuzX0rwj", "poster": "/media/PosterPDFs/NeurIPS%202023/72223.png?t=1702265195.977963", "slides": "https://nips.cc/virtual/2023/poster/72223", "video": "https://nips.cc/virtual/2023/poster/72223", "author_site": "Fei Deng, Junyeong Park, Sungjin Ahn", "tldr": "", "abstract": "World models are a fundamental component in model-based reinforcement learning (MBRL). To perform temporally extended and consistent simulations of the future in partially observable environments, world models need to possess long-term memory. However, state-of-the-art MBRL agents, such as Dreamer, predominantly employ recurrent neural networks (RNNs) as their world model backbone, which have limited memory capacity. In this paper, we seek to explore alternative world model backbones for improving long-term memory. In particular, we investigate the effectiveness of Transformers and Structured State Space Sequence (S4) models, motivated by their remarkable ability to capture long-range dependencies in low-dimensional sequences and their complementary strengths. We propose S4WM, the first world model compatible with parallelizable SSMs including S4 and its variants. By incorporating latent variable modeling, S4WM can efficiently generate high-dimensional image sequences through latent imagination. Furthermore, we extensively compare RNN-, Transformer-, and S4-based world models across four sets of environments, which we have tailored to assess crucial memory capabilities of world models, including long-term imagination, context-dependent recall, reward prediction, and memory-based reasoning. Our findings demonstrate that S4WM outperforms Transformer-based world models in terms of long-term memory, while exhibiting greater efficiency during training and imagination. These results pave the way for the development of stronger MBRL agents.", "keywords": "world models;structured state space sequence models;S4;long-term memory;model-based reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/0ba115c09189f48fc83f06c6657676da2d179bb4.zip", "author": "Fei Deng;Junyeong Park;Sungjin Ahn", "authorids": "~Fei_Deng1;~Junyeong_Park1;~Sungjin_Ahn1", "gender": "M;M;", "homepage": ";;", "dblp": "46/10037-1;;", "google_scholar": "https://scholar.google.com/citations?hl=en;o9l_sIAAAAAJ;", "orcid": ";;", "linkedin": ";junyeong-park-043766191/;", "or_profile": "~Fei_Deng1;~Junyeong_Park1;~Sungjin_Ahn1", "aff": "Rutgers University;Hanyang University;", "aff_domain": "rutgers.edu;hanyang.ac.kr;", "position": "PhD student;Undergrad student;", "bibtex": "@inproceedings{\ndeng2023facing,\ntitle={Facing Off World Model Backbones: {RNN}s, Transformers, and S4},\nauthor={Fei Deng and Junyeong Park and Sungjin Ahn},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GDYuzX0rwj}\n}", "github": "", "project": "", "reviewers": "ALhP;kwZk;vwzx;hTvu;TTTx", "pdf_size": 7094414, "rating": "4;5;5;5;6", "confidence": "3;4;4;4;5", "soundness": "3;3;3;3;4", "novelty": "2;2;2;2;3", "presentation": "3;3;2;3;4", "wc_summary": "60;35;130;62;57", "wc_strengths": "40;73;44;87;31", "wc_weaknesses": "111;252;81;235;62", "wc_questions": "42;50;372;162;38", "wc_limitations": "14;9;53;2;43", "wc_review": "267;419;680;548;231", "wc_reply_reviewers": "62;28;124;0;84", "wc_reply_authors": "325;130;611;902;144", "reply_reviewers": "1;1;2;0;2", "reply_authors": "1;1;2;2;1", "rating_avg": [ 5.0, 0.6324555320336759 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 68.8, 32.09610568277716 ], "wc_strengths_avg": [ 55.0, 21.307275752662516 ], "wc_weaknesses_avg": [ 148.2, 79.54721868173645 ], "wc_questions_avg": [ 132.8, 128.18486650147122 ], "wc_limitations_avg": [ 24.2, 20.053927296168197 ], "wc_review_avg": [ 429.0, 168.94377763031108 ], "wc_reply_reviewers_avg": [ 59.6, 43.125862310219375 ], "wc_reply_authors_avg": [ 422.4, 295.99905405254253 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9999999999999999, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6913722058584754505&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "rutgers.edu;hanyang.ac.kr;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Rutgers University;Hanyang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.rutgers.edu;https://www.hanyang.ac.kr", "aff_unique_abbr": "Rutgers;HYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;South Korea" }, { "title": "Smooth Flipping Probability for Differential Private Sign Random Projection Methods", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72222", "id": "GEMHw2sd9S", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2a128987fe57b27fa0c7a0b748b0fa1e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GEMHw2sd9S", "openreview": "https://openreview.net/forum?id=GEMHw2sd9S", "poster": "/media/PosterPDFs/NeurIPS%202023/72222.png?t=1701936936.2179797", "slides": "https://nips.cc/virtual/2023/poster/72222", "video": "https://nips.cc/virtual/2023/poster/72222", "author_site": "Ping Li, Xiaoyun Li", "tldr": "", "abstract": "We develop a series of differential privacy (DP) algorithms from a family of random projection (RP) and sign random projection (SignRP) methods. We first show how to improve the previous DP-RP approach using the ``optimal Gaussian mechanism''. Then, we propose a series of DP-SignRP algorithms that leverage the robustness of the ``sign flipping probability'' of random projections. That is, given $x = \\sum_{i=1}^p u_i w_{i}$ where $u$ is a $p$-dimensional data vector and $w$ is a symmetric random vector, $sign(x)$ only has a fairly small probability to be flipped if there is a small modification on data $u$, depending on the specific distribution of $w$. This robustness leads to our novel design of ``smooth flipping probability'' for SignRP-type algorithms with better utility than using the standard randomized response mechanism. Retrieval and classification experiments demonstrate that, among the presented DP-RP algorithms, \\textbf{DP-SignOPORP} (where OPORP is an improvement over the celebrated count-sketch algorithms), performs the best in general.\n\nIn the industrial practice, DP methods were not very popular for machine learning or search, largely because the performance typically would drop substantially if DP is applied. Since our proposed new DP algorithms have significantly improved the performance, it is anticipated that our work will motivate a wide adoption of DP in practice. Finally, we stress that, since our methods are applied to the original data (i.e., feature vectors), the privacy of downstream tasks is naturally protected.", "keywords": "Differential Privacy;Random Projection", "primary_area": "", "supplementary_material": "/attachment/3518edc944970bd8629d85f87dd4ed0c2a11e16f.pdf", "author": "Ping Li;Xiaoyun Li", "authorids": "~Ping_Li3;~Xiaoyun_Li2", "gender": "M;M", "homepage": "http://www.stat.rutgers.edu/home/pingli/;", "dblp": "62/5860-1;48/1982", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Ping_Li3;~Xiaoyun_Li2", "aff": "LinkedIn;LinkedIn", "aff_domain": "linkedin.com;linkedin.com", "position": "Engineer;Researcher", "bibtex": "@inproceedings{\nli2023smooth,\ntitle={Smooth Flipping Probability for Differential Private Sign Random Projection Methods},\nauthor={Ping Li and Xiaoyun Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GEMHw2sd9S}\n}", "github": "", "project": "", "reviewers": "ywEG;hnU4;TuXU;bvWd", "pdf_size": 741593, "rating": "4;6;7;7", "confidence": "3;3;4;4", "soundness": "2;3;4;4", "novelty": "2;3;3;4", "presentation": "1;4;3;4", "wc_summary": "266;45;128;43", "wc_strengths": "66;108;25;103", "wc_weaknesses": "145;119;32;1", "wc_questions": "79;66;35;17", "wc_limitations": "19;1;1;6", "wc_review": "575;339;221;170", "wc_reply_reviewers": "74;15;16;16", "wc_reply_authors": "352;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 120.5, 90.73725805863873 ], "wc_strengths_avg": [ 75.5, 33.36540124140574 ], "wc_weaknesses_avg": [ 74.25, 59.495272921468306 ], "wc_questions_avg": [ 49.25, 24.539508960042376 ], "wc_limitations_avg": [ 6.75, 7.361215932167728 ], "wc_review_avg": [ 326.25, 156.1495677227446 ], "wc_reply_reviewers_avg": [ 30.25, 25.262373206015305 ], "wc_reply_authors_avg": [ 88.0, 152.4204710660612 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8164965809277259, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5535529767103576731&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 3, "email": "linkedin.com;linkedin.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "LinkedIn Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.linkedin.com", "aff_unique_abbr": "LinkedIn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "$L_2$-Uniform Stability of Randomized Learning Algorithms: Sharper Generalization Bounds and Confidence Boosting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72221", "id": "GEQZ52oqxa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f7fc38fdd95fd146a471791b93ff9f12-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GEQZ52oqxa", "openreview": "https://openreview.net/forum?id=GEQZ52oqxa", "poster": "/media/PosterPDFs/NeurIPS%202023/72221.png?t=1701914728.2695718", "slides": "https://nips.cc/virtual/2023/poster/72221", "video": "https://nips.cc/virtual/2023/poster/72221", "author_site": "Xiaotong Yuan, Ping Li", "tldr": "", "abstract": "Exponential generalization bounds with near-optimal rates have recently been established for uniformly stable algorithms~\\citep{feldman2019high,bousquet2020sharper}. We seek to extend these best known high probability bounds from deterministic learning algorithms to the regime of randomized learning. One simple approach for achieving this goal is to define the stability for the expectation over the algorithm's randomness, which may result in sharper parameter but only leads to guarantees regarding the on-average generalization error. Another natural option is to consider the stability conditioned on the algorithm's randomness, which is way more stringent but may lead to generalization with high probability jointly over the randomness of sample and algorithm. The present paper addresses such a tension between these two alternatives and makes progress towards relaxing it inside a classic framework of confidence-boosting. To this end, we first introduce a novel concept of $L_2$-uniform stability that holds uniformly over data but in second-moment over the algorithm's randomness. Then as a core contribution of this work, we prove a strong exponential bound on the first-moment of generalization error under the notion of $L_2$-uniform stability. As an interesting consequence of the bound, we show that a bagging-based meta algorithm leads to near-optimal generalization with high probability jointly over the randomness of data and algorithm. We further substantialize these generic results to stochastic gradient descent (SGD) to derive sharper exponential bounds for convex or non-convex optimization with natural time-decaying learning rates, which have not been possible to prove with the existing stability-based generalization guarantees.", "keywords": "Uniform stability;Randomized learning algorithms;Confidence boosting;Generalization bounds;Stochastic gradient methods", "primary_area": "", "supplementary_material": "/attachment/5e0e24a5467a2df4c4984e080b228c356c5eeed2.pdf", "author": "Xiaotong Yuan;Ping Li", "authorids": "~Xiaotong_Yuan1;~Ping_Li3", "gender": "M;M", "homepage": "https://sites.google.com/site/xtyuan1980/;http://www.stat.rutgers.edu/home/pingli/", "dblp": "64/5926;62/5860-1", "google_scholar": "yzU6g24AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Xiaotong_Yuan1;~Ping_Li3", "aff": "Nanjing University;LinkedIn", "aff_domain": "nju.edu.cn;linkedin.com", "position": "Full Professor;Engineer", "bibtex": "@inproceedings{\nyuan2023luniform,\ntitle={\\$L\\_2\\$-Uniform Stability of Randomized Learning Algorithms: Sharper Generalization Bounds and Confidence Boosting},\nauthor={Xiaotong Yuan and Ping Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GEQZ52oqxa}\n}", "github": "", "project": "", "reviewers": "7EkU;RTFd;Yv2V;3pL8;ZVGG", "pdf_size": 396201, "rating": "5;6;7;7;8", "confidence": "3;4;4;3;3", "soundness": "2;3;3;3;4", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;4", "wc_summary": "38;89;283;124;235", "wc_strengths": "123;181;64;63;140", "wc_weaknesses": "189;146;82;87;1", "wc_questions": "125;241;52;24;47", "wc_limitations": "6;1;1;32;1", "wc_review": "481;658;482;330;424", "wc_reply_reviewers": "71;81;37;42;10", "wc_reply_authors": "47;16;34;48;6", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 153.8, 91.41422208825058 ], "wc_strengths_avg": [ 114.2, 45.490218728865216 ], "wc_weaknesses_avg": [ 101.0, 63.7589209444451 ], "wc_questions_avg": [ 97.8, 79.20959537833784 ], "wc_limitations_avg": [ 8.2, 12.056533498481228 ], "wc_review_avg": [ 475.0, 106.9579356569675 ], "wc_reply_reviewers_avg": [ 48.2, 25.372425977820882 ], "wc_reply_authors_avg": [ 30.2, 16.737980762326142 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.08006407690254361, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1220288736648039813&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "nju.edu.cn;linkedin.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Nanjing University;LinkedIn Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.linkedin.com", "aff_unique_abbr": "Nanjing U;LinkedIn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "title": "Transition-constant Normalization for Image Enhancement", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72220", "id": "GEWzHeHpLr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4163873c9ad623a87989d0a6eefe9442-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GEWzHeHpLr", "openreview": "https://openreview.net/forum?id=GEWzHeHpLr", "poster": "/media/PosterPDFs/NeurIPS%202023/72220.png?t=1699755838.8094113", "slides": "https://nips.cc/virtual/2023/poster/72220", "video": "https://nips.cc/virtual/2023/poster/72220", "author_site": "Jie Huang, man zhou, Jinghao Zhang, Gang Yang, Mingde Yao, Chongyi Li, Zhiwei Xiong, Feng Zhao", "tldr": "", "abstract": "Normalization techniques that capture image style by statistical representation have become a popular component in deep neural networks.\nAlthough image enhancement can be considered as a form of style transformation, there has been little exploration of how normalization affect the enhancement performance. \nTo fully leverage the potential of normalization, we present a novel Transition-Constant Normalization (TCN) for various image enhancement tasks.\nSpecifically, it consists of two streams of normalization operations arranged under an invertible constraint, along with a feature sub-sampling operation that satisfies the normalization constraint.\nTCN enjoys several merits, including being parameter-free, plug-and-play, and incurring no additional computational costs.\nWe provide various formats to utilize TCN for image enhancement, including seamless integration with enhancement networks, incorporation into encoder-decoder architectures for downsampling, and implementation of efficient architectures.\nThrough extensive experiments on multiple image enhancement tasks, like low-light enhancement, exposure correction, SDR2HDR translation, and image dehazing, our TCN consistently demonstrates performance improvements.\nBesides, it showcases extensive ability in other tasks including pan-sharpening and medical segmentation.\nThe code is available at \\textit{\\textcolor{blue}{https://github.com/huangkevinj/TCNorm}}.", "keywords": "Image Enhancement;Normalization;Image Restoration", "primary_area": "", "supplementary_material": "/attachment/8f1a4268e2e617fb6e04eef297348d437053094d.pdf", "author": "Jie Huang;Man Zhou;JingHao Zhang;Gang Yang;Mingde Yao;Chongyi Li;Zhiwei Xiong;Feng Zhao", "authorids": "~Jie_Huang4;~Man_Zhou4;~JingHao_Zhang2;~Gang_Yang4;~Mingde_Yao1;~Chongyi_Li1;~Zhiwei_Xiong1;~Feng_Zhao6", "gender": "M;;M;M;M;;M;M", "homepage": ";;https://jinghao99.github.io/;https://github.com/yggame;;;;https://bivlab123.github.io/", "dblp": ";;;;253/9580;;54/6827;181/2734-4", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;Lis9e2MAAAAJ;gctrxXsAAAAJ;fsE3MzwAAAAJ;;Snl0HPEAAAAJ;https://scholar.google.co.uk/citations?hl=en", "orcid": "0000-0002-3518-3404;;0000-0002-5407-4641;0000-0001-9403-5818;;;;0000-0001-6767-8105", "linkedin": ";;;;;;;", "or_profile": "~Jie_Huang4;~Man_Zhou4;~JingHao_Zhang2;~Gang_Yang4;~Mingde_Yao1;~Chongyi_Li1;~Zhiwei_Xiong1;~Feng_Zhao6", "aff": "University of Science and Technology of China;;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;;USTC;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;;PhD student;PhD student;PhD student;;Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2023transitionconstant,\ntitle={Transition-constant Normalization for Image Enhancement},\nauthor={Jie Huang and Man Zhou and JingHao Zhang and Gang Yang and Mingde Yao and Chongyi Li and Zhiwei Xiong and Feng Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GEWzHeHpLr}\n}", "github": "", "project": "", "reviewers": "uwKS;HV1b;yudq;5FQQ;LSyJ", "pdf_size": 2134889, "rating": "5;7;8;8;8", "confidence": "4;5;5;5;5", "soundness": "3;4;4;4;3", "novelty": "3;4;3;3;4", "presentation": "2;4;3;3;3", "wc_summary": "67;84;46;31;42", "wc_strengths": "24;83;76;86;45", "wc_weaknesses": "385;263;113;103;174", "wc_questions": "4;131;39;10;53", "wc_limitations": "8;5;28;12;16", "wc_review": "488;566;302;242;330", "wc_reply_reviewers": "0;22;41;48;25", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.2, 1.16619037896906 ], "confidence_avg": [ 4.8, 0.39999999999999997 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 54.0, 19.00526242912736 ], "wc_strengths_avg": [ 62.8, 24.260255563369487 ], "wc_weaknesses_avg": [ 207.6, 105.4127127058212 ], "wc_questions_avg": [ 47.4, 45.548216210956056 ], "wc_limitations_avg": [ 13.8, 8.009993757800315 ], "wc_review_avg": [ 385.6, 121.43409735325577 ], "wc_reply_reviewers_avg": [ 27.2, 16.70209567689037 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9432422182837988, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1561794568511340428&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "iSCAN: Identifying Causal Mechanism Shifts among Nonlinear Additive Noise Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72219", "id": "GEtXhqKW6X", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8c1d92835eb4e601f396c97ec60439fe-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GEtXhqKW6X", "openreview": "https://openreview.net/forum?id=GEtXhqKW6X", "poster": "/media/PosterPDFs/NeurIPS%202023/72219.png?t=1699604989.0832274", "slides": "https://nips.cc/virtual/2023/poster/72219", "video": "https://nips.cc/virtual/2023/poster/72219", "author_site": "Tianyu Chen, Kevin Bello, Bryon Aragam, Pradeep Ravikumar", "tldr": "", "abstract": "Structural causal models (SCMs) are widely used in various disciplines to represent causal relationships among variables in complex systems.\nUnfortunately, the underlying causal structure is often unknown, and estimating it from data remains a challenging task. \nIn many situations, however, the end goal is to localize the changes (shifts) in the causal mechanisms between related datasets instead of learning the full causal structure of the individual datasets. \nSome applications include root cause analysis, analyzing gene regulatory network structure changes between healthy and cancerous individuals, or explaining distribution shifts. \nThis paper focuses on identifying the causal mechanism shifts in two or more related datasets over the same set of variables---*without estimating the entire DAG structure of each SCM*.\nPrior work under this setting assumed linear models with Gaussian noises; instead, in this work we assume that each SCM belongs to the more general class of *nonlinear* additive noise models (ANMs).\nA key technical contribution of this work is to show that the Jacobian of the score function for the *mixture distribution* allows for the identification of shifts under general non-parametric functional mechanisms.\nOnce the shifted variables are identified, we leverage recent work to estimate the structural differences, if any, for the shifted variables.\nExperiments on synthetic and real-world data are provided to showcase the applicability of this approach.\nCode implementing the proposed method is open-source and publicly available at https://github.com/kevinsbello/iSCAN.", "keywords": "distribution shifts;heterogeneous data;feature-shift;structural causal models;additive noise models;causality;root-cause analysis", "primary_area": "", "supplementary_material": "", "author": "Tianyu Chen;Kevin Bello;Bryon Aragam;Pradeep Kumar Ravikumar", "authorids": "tianyuchen@uchicago.edu;~Kevin_Bello1;~Bryon_Aragam1;~Pradeep_Kumar_Ravikumar1", "gender": ";M;;M", "homepage": ";https://www.cs.cmu.edu/~kbello;http://bryonaragam.com/;http://www.cs.cmu.edu/~pradeepr/", "dblp": ";202/2531;140/7564;94/3594", "google_scholar": ";pCS09UsAAAAJ;u-W3_9QAAAAJ;https://scholar.google.com.tw/citations?user=Q4DTPw4AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "tianyuchen@uchicago.edu;~Kevin_Bello1;~Bryon_Aragam1;~Pradeep_Kumar_Ravikumar1", "aff": ";University of Chicago;Booth School of Business;Carnegie Mellon University", "aff_domain": ";uchicago.edu;chicagobooth.edu;cmu.edu", "position": ";Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023iscan,\ntitle={i{SCAN}: Identifying Causal Mechanism Shifts among Nonlinear Additive Noise Models},\nauthor={Tianyu Chen and Kevin Bello and Bryon Aragam and Pradeep Kumar Ravikumar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GEtXhqKW6X}\n}", "github": "", "project": "", "reviewers": "PS2y;YGJy;appw;m244", "pdf_size": 3258686, "rating": "5;5;6;7", "confidence": "3;4;4;4", "soundness": "2;4;3;3", "novelty": "2;2;3;3", "presentation": "2;3;2;4", "wc_summary": "39;175;65;47", "wc_strengths": "35;201;76;71", "wc_weaknesses": "77;262;166;128", "wc_questions": "36;313;682;599", "wc_limitations": "5;136;48;15", "wc_review": "192;1087;1037;860", "wc_reply_reviewers": "160;37;217;193", "wc_reply_authors": "619;215;433;967", "reply_reviewers": "2;1;1;2", "reply_authors": "3;2;2;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 81.5, 54.79735395071554 ], "wc_strengths_avg": [ 95.75, 62.79082337412052 ], "wc_weaknesses_avg": [ 158.25, 67.7140125823304 ], "wc_questions_avg": [ 407.5, 254.44301916146176 ], "wc_limitations_avg": [ 51.0, 51.58972765968047 ], "wc_review_avg": [ 794.0, 357.6513665568748 ], "wc_reply_reviewers_avg": [ 151.75, 69.27255950230222 ], "wc_reply_authors_avg": [ 558.5, 275.80563808595355 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3307210042761226613&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";uchicago.edu;chicagobooth.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Chicago;University of Chicago Booth School of Business;Carnegie Mellon University", "aff_unique_dep": ";Booth School of Business;", "aff_unique_url": "https://www.uchicago.edu;https://www.chicagobooth.edu;https://www.cmu.edu", "aff_unique_abbr": "UChicago;Booth;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "An NLP Benchmark Dataset for Assessing Corporate Climate Policy Engagement", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73645", "id": "GF5l0F19Bt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7ccaa4f9a89cce6619093226f26b84e6-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=GF5l0F19Bt", "openreview": "https://openreview.net/forum?id=GF5l0F19Bt", "poster": "/media/PosterPDFs/NeurIPS%202023/73645.png?t=1700611588.0126936", "slides": "https://nips.cc/virtual/2023/poster/73645", "video": "https://nips.cc/virtual/2023/poster/73645", "author_site": "Gaku Morio, Christopher D Manning", "tldr": "", "abstract": "As societal awareness of climate change grows, corporate climate policy engagements are attracting attention.\nWe propose a dataset to estimate corporate climate policy engagement from various PDF-formatted documents.\nOur dataset comes from LobbyMap (a platform operated by global think tank InfluenceMap) that provides engagement categories and stances on the documents.\nTo convert the LobbyMap data into the structured dataset, we developed a pipeline using text extraction and OCR.\nOur contributions are: (i) Building an NLP dataset including 10K documents on corporate climate policy engagement. (ii) Analyzing the properties and challenges of the dataset. (iii) Providing experiments for the dataset using pre-trained language models.\nThe results show that while Longformer outperforms baselines and other pre-trained models, there is still room for significant improvement.\nWe hope our work begins to bridge research on NLP and climate change.", "keywords": "natural language processing;corporate climate policy engagement;climatebert;greenwashing", "primary_area": "", "supplementary_material": "", "author": "Gaku Morio;Christopher D Manning", "authorids": "~Gaku_Morio1;~Christopher_D_Manning1", "gender": ";M", "homepage": ";https://nlp.stanford.edu/~manning/", "dblp": ";m/ChristopherDManning", "google_scholar": ";1zmDOdwAAAAJ", "orcid": ";0000-0001-6155-649X", "linkedin": ";christopher-manning-011575/", "or_profile": "~Gaku_Morio1;~Christopher_D_Manning1", "aff": ";Computer Science Department, Stanford University", "aff_domain": ";cs.stanford.edu", "position": ";Full Professor", "bibtex": "@inproceedings{\nmorio2023an,\ntitle={An {NLP} Benchmark Dataset for Assessing Corporate Climate Policy Engagement},\nauthor={Gaku Morio and Christopher D Manning},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=GF5l0F19Bt}\n}", "github": "", "project": "", "reviewers": "JKh5;kg8y;mNPN;nhwc", "pdf_size": 1310033, "rating": "6;7;8;9", "confidence": "2;3;3;4", "wc_summary_and_contributions": "72;112;34;29", "wc_strengths": "66;25;47;38", "wc_improvement": "136;232;117;2", "wc_limitations": "1;102;106;5", "wc_correctness": "1;44;17;1", "wc_clarity": "11;5;15;1", "wc_relation_to_prior_work": "19;13;59;1", "wc_documentation": "42;1;65;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "349;535;461;83", "wc_reply_reviewers": "38;51;66;0", "wc_reply_authors": "466;805;590;5", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 61.75, 33.439310698637314 ], "wc_strengths_avg": [ 44.0, 14.916433890176299 ], "wc_improvement_avg": [ 121.75, 81.73241401059924 ], "wc_limitations_avg": [ 53.5, 50.53958844312051 ], "wc_correctness_avg": [ 15.75, 17.5695048308141 ], "wc_clarity_avg": [ 8.0, 5.385164807134504 ], "wc_relation_to_prior_work_avg": [ 23.0, 21.77154105707724 ], "wc_documentation_avg": [ 28.25, 26.564779313971346 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 357.0, 171.49344010777787 ], "wc_reply_reviewers_avg": [ 38.75, 24.468091466234142 ], "wc_reply_authors_avg": [ 466.5, 292.7528821378194 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9486832980505139, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17493371058776248635&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";cs.stanford.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "GenImage: A Million-Scale Benchmark for Detecting AI-Generated Image", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73644", "id": "GF84C0z45H", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f4d4a021f9051a6c18183b059117e8b5-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=GF84C0z45H", "openreview": "https://openreview.net/forum?id=GF84C0z45H", "poster": "/media/PosterPDFs/NeurIPS%202023/73644.png?t=1698203717.0436957", "slides": "https://nips.cc/virtual/2023/poster/73644", "video": "https://nips.cc/virtual/2023/poster/73644", "author_site": "Mingjian Zhu, Hanting Chen, Qiangyu YAN, Xudong Huang, Guanyu Lin, Wei Li, Zhijun Tu, Hailin Hu, Jie Hu, Yunhe Wang", "tldr": "", "abstract": "The extraordinary ability of generative models to generate photographic images has intensified concerns about the spread of disinformation, thereby leading to the demand for detectors capable of distinguishing between AI-generated fake images and real images. However, the lack of large datasets containing images from the most advanced image generators poses an obstacle to the development of such detectors. In this paper, we introduce the GenImage dataset, which has the following advantages: 1) Plenty of Images, including over one million pairs of AI-generated fake images and collected real images. 2) Rich Image Content, encompassing a broad range of image classes. 3) State-of-the-art Generators, synthesizing images with advanced diffusion models and GANs. The aforementioned advantages allow the detectors trained on GenImage to undergo a thorough evaluation and demonstrate strong applicability to diverse images. We conduct a comprehensive analysis of the dataset and propose two tasks for evaluating the detection method in resembling real-world scenarios. The cross-generator image classification task measures the performance of a detector trained on one generator when tested on the others. The degraded image classification task assesses the capability of the detectors in handling degraded images such as low-resolution, blurred, and compressed images. With the GenImage dataset, researchers can effectively expedite the development and evaluation of superior AI-generated image detectors in comparison to prevailing methodologies.", "keywords": "AI Generated Image Detector; Generative Model", "primary_area": "", "supplementary_material": "/attachment/c86132e77126fd50c6211f765b696c42e8dada40.zip", "author": "Mingjian Zhu;Hanting Chen;Qiangyu YAN;Xudong Huang;Guanyu Lin;Wei Li;Zhijun Tu;Hailin Hu;Jie Hu;Yunhe Wang", "authorids": "~Mingjian_Zhu1;~Hanting_Chen1;~Qiangyu_YAN1;~Xudong_Huang1;~Guanyu_Lin3;~Wei_Li60;~Zhijun_Tu1;~Hailin_Hu1;~Jie_Hu8;~Yunhe_Wang1", "gender": ";M;F;M;;M;M;;M;M", "homepage": ";;https://github.com/YANDaoyu;;;https://scholar.google.com/citations?user=_0DYhbcAAAAJ&hl=en;;;;https://www.wangyunhe.site/", "dblp": ";232/2060;304/1125;;;64/6025-152.html;228/8537;209/7188-2;;63/8217-1", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;;;;_0DYhbcAAAAJ;kSPs6FsAAAAJ;rvYUgBwAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.sg/citations?user=isizOkYAAAAJ", "orcid": ";;;0000-0002-3857-0047;;0009-0004-3136-1105;0000-0001-8740-7927;;;0000-0002-0142-509X", "linkedin": ";;;;;;;;%E6%9D%B0-%E8%83%A1-b6a598118/;", "or_profile": "~Mingjian_Zhu1;~Hanting_Chen1;~Qiangyu_YAN1;~Xudong_Huang1;~Guanyu_Lin3;~Wei_Li60;~Zhijun_Tu1;~Hailin_Hu1;~Jie_Hu8;~Yunhe_Wang1", "aff": ";Huawei Technologies Ltd.;Huawei Noah's Ark Lab;Huawei Technologies Ltd.;;Huawei Noah's Ark Lab;Huawei Noah's Ark Lab;Huawei Noah's Ark Lab;Huawei Technologies Ltd.;Huawei Noah's Ark Lab", "aff_domain": ";huawei.com;huawei.com;huawei.com;;huawei.com;huawei.com;huawei.com;huawei.com;huawei.com", "position": ";Researcher;Researcher;Researcher;;Researcher;Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nzhu2023genimage,\ntitle={GenImage: A Million-Scale Benchmark for Detecting {AI}-Generated Image},\nauthor={Mingjian Zhu and Hanting Chen and Qiangyu YAN and Xudong Huang and Guanyu Lin and Wei Li and Zhijun Tu and Hailin Hu and Jie Hu and Yunhe Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=GF84C0z45H}\n}", "github": "", "project": "", "reviewers": "APC2;GjRx;27YU;ZWev", "pdf_size": 976199, "rating": "5;6;6;9", "confidence": "5;2;3;5", "wc_summary_and_contributions": "62;300;126;80", "wc_strengths": "104;93;61;65", "wc_improvement": "66;12;156;180", "wc_limitations": "184;118;33;46", "wc_correctness": "6;1;16;5", "wc_clarity": "6;1;75;8", "wc_relation_to_prior_work": "27;3;29;7", "wc_documentation": "34;1;37;15", "wc_additional_feedback": "1;1;1;1", "wc_review": "490;530;534;407", "wc_reply_reviewers": "0;0;100;154", "wc_reply_authors": "824;499;843;660", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.75, 1.299038105676658 ], "wc_summary_and_contributions_avg": [ 142.0, 94.15943925066674 ], "wc_strengths_avg": [ 80.75, 18.226011631731172 ], "wc_improvement_avg": [ 103.5, 67.79933627993714 ], "wc_limitations_avg": [ 95.25, 60.61095198064456 ], "wc_correctness_avg": [ 7.0, 5.522680508593631 ], "wc_clarity_avg": [ 22.5, 30.41792234851026 ], "wc_relation_to_prior_work_avg": [ 16.5, 11.6081867662439 ], "wc_documentation_avg": [ 21.75, 14.652218262092603 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 490.25, 51.050832510351874 ], "wc_reply_reviewers_avg": [ 63.5, 66.30799348494871 ], "wc_reply_authors_avg": [ 706.5, 139.33502790038116 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.3207501495497921, "gs_citation": 137, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13801430704038025914&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": ";huawei.com;huawei.com;huawei.com;;huawei.com;huawei.com;huawei.com;huawei.com;huawei.com", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "CORNN: Convex optimization of recurrent neural networks for rapid inference of neural dynamics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72218", "id": "GGIA1p9fDT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a103529738706979331778377f2d5864-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GGIA1p9fDT", "openreview": "https://openreview.net/forum?id=GGIA1p9fDT", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72218", "video": "https://nips.cc/virtual/2023/poster/72218", "author_site": "Fatih Dinc, Adam Shai, Mark Schnitzer, Hidenori Tanaka", "tldr": "", "abstract": "Advances in optical and electrophysiological recording technologies have made it possible to record the dynamics of thousands of neurons, opening up new possibilities for interpreting and controlling large neural populations in behaving animals. A promising way to extract computational principles from these large datasets is to train data-constrained recurrent neural networks (dRNNs). Performing this training in real-time could open doors for research techniques and medical applications to model and control interventions at single-cell resolution and drive desired forms of animal behavior. However, existing training algorithms for dRNNs are inefficient and have limited scalability, making it a challenge to analyze large neural recordings even in offline scenarios. To address these issues, we introduce a training method termed Convex Optimization of Recurrent Neural Networks (CORNN). In studies of simulated recordings, CORNN attained training speeds $\\sim$100-fold faster than traditional optimization approaches while maintaining or enhancing modeling accuracy. We further validated CORNN on simulations with thousands of cells that performed simple computations such as those of a 3-bit flip-flop or the execution of a timed response. Finally, we showed that CORNN can robustly reproduce network dynamics and underlying attractor structures despite mismatches between generator and inference models, severe subsampling of observed neurons, or mismatches in neural time-scales. Overall, by training dRNNs with millions of parameters in subminute processing times on a standard computer, CORNN constitutes a first step towards real-time network reproduction constrained on large-scale neural recordings and a powerful computational tool for advancing the understanding of neural computation.", "keywords": "brain-machine interfaces;recurrent neural networks;convex optimization;computational neuroscience", "primary_area": "", "supplementary_material": "", "author": "Fatih Dinc;Adam Shai;Mark Schnitzer;Hidenori Tanaka", "authorids": "~Fatih_Dinc1;~Adam_Shai1;~Mark_Schnitzer1;~Hidenori_Tanaka1", "gender": "M;M;M;", "homepage": "https://sites.google.com/view/fatihdinc/;https://profiles.stanford.edu/adam-shai?releaseVersion=10.5.1;https://pyramidal.stanford.edu/;https://sites.google.com/view/htanaka/home", "dblp": "218/5297;;;", "google_scholar": "https://scholar.google.com.tr/citations?user=jFHyg0oAAAAJ;;;f_pWOGIAAAAJ", "orcid": "0000-0003-0921-0162;;;", "linkedin": ";;;", "or_profile": "~Fatih_Dinc1;~Adam_Shai1;~Mark_Schnitzer1;~Hidenori_Tanaka1", "aff": "Stanford University;Stanford University;Stanford University;Physics & Informatics Lab, NTT Research, Inc.", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;ntt-research.com", "position": "PhD student;Postdoc;Full Professor;Senior Research Scientist", "bibtex": "@inproceedings{\ndinc2023cornn,\ntitle={{CORNN}: Convex optimization of recurrent neural networks for rapid inference of neural dynamics},\nauthor={Fatih Dinc and Adam Shai and Mark Schnitzer and Hidenori Tanaka},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GGIA1p9fDT}\n}", "github": "", "project": "", "reviewers": "GD24;mPZo;Gp7K;1Kip", "pdf_size": 14568328, "rating": "5;7;8;8", "confidence": "3;5;4;4", "soundness": "3;3;4;4", "novelty": "2;3;4;4", "presentation": "3;3;3;3", "wc_summary": "109;144;20;181", "wc_strengths": "109;44;52;33", "wc_weaknesses": "989;48;104;18", "wc_questions": "195;107;1;153", "wc_limitations": "3;161;1;1", "wc_review": "1405;504;178;386", "wc_reply_reviewers": "1179;88;44;65", "wc_reply_authors": "1826;42;83;33", "reply_reviewers": "4;2;1;1", "reply_authors": "5;2;2;2", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 113.5, 59.68458762528229 ], "wc_strengths_avg": [ 59.5, 29.364093720052047 ], "wc_weaknesses_avg": [ 289.75, 404.8903400922279 ], "wc_questions_avg": [ 114.0, 72.2841614740048 ], "wc_limitations_avg": [ 41.5, 68.99818838201479 ], "wc_review_avg": [ 618.25, 468.9852742890762 ], "wc_reply_reviewers_avg": [ 344.0, 482.3385740328053 ], "wc_reply_authors_avg": [ 496.0, 768.1070888880014 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2632284426535519675&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "stanford.edu;stanford.edu;stanford.edu;ntt-research.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Stanford University;NTT Research, Inc.", "aff_unique_dep": ";Physics & Informatics Lab", "aff_unique_url": "https://www.stanford.edu;https://www.ntt-research.com", "aff_unique_abbr": "Stanford;NTT Research", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Domain Adaptive Imitation Learning with Visual Observation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72217", "id": "GGbBXSkX3r", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/899511e37a8e01e1bd6f6f1d377cc250-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GGbBXSkX3r", "openreview": "https://openreview.net/forum?id=GGbBXSkX3r", "poster": "/media/PosterPDFs/NeurIPS%202023/72217.png?t=1701425685.6849887", "slides": "https://nips.cc/virtual/2023/poster/72217", "video": "https://nips.cc/virtual/2023/poster/72217", "author_site": "Sungho Choi, Seungyul Han, Woojun Kim, Jongseong Chae, Whiyoung Jung, Youngchul Sung", "tldr": "", "abstract": "In this paper, we consider domain-adaptive imitation learning with visual observation, where an agent in a target domain learns to perform a task by observing expert demonstrations in a source domain. Domain adaptive imitation learning arises in practical scenarios where a robot, receiving visual sensory data, needs to mimic movements by visually observing other robots from different angles or observing robots of different shapes. To overcome the domain shift in cross-domain imitation learning with visual observation, we propose a novel framework for extracting domain-independent behavioral features from input observations that can be used to train the learner, based on dual feature extraction and image reconstruction. Empirical results demonstrate that our approach outperforms previous algorithms for imitation learning from visual observation with domain shift.", "keywords": "Reinforcement Learning;Deep Reinforcement Learning;Imitation Learning", "primary_area": "", "supplementary_material": "/attachment/b22ed0f05aa1ac784bcae0b8c81cad06cb632a03.zip", "author": "Sungho Choi;Seungyul Han;Woojun Kim;Jongseong Chae;Whiyoung Jung;Youngchul Sung", "authorids": "~Sungho_Choi1;~Seungyul_Han1;~Woojun_Kim1;~Jongseong_Chae1;~Whiyoung_Jung1;~Youngchul_Sung1", "gender": "M;M;M;M;M;M", "homepage": "https://sites.google.com/view/sisrelkaist/members/shchoi;https://mllab.unist.ac.kr;;https://sites.google.com/view/sisrelkaist/members/jschae;;https://sites.google.com/view/youngchulsung", "dblp": "60/1680;183/6417;236/4974;;256/1642;17/6798", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=ko;https://scholar.google.co.kr/citations?user=bcHWCBoAAAAJ;https://scholar.google.com/citations?hl=ko;72La2OEAAAAJ;-9D2k3UAAAAJ", "orcid": ";;;;;0000-0003-4536-6690", "linkedin": ";;;;;", "or_profile": "~Sungho_Choi1;~Seungyul_Han1;~Woojun_Kim1;~Jongseong_Chae1;~Whiyoung_Jung1;~Youngchul_Sung1", "aff": "Korea Advanced Institute of Science & Technology;Ulsan National Institute of Science and Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;unist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Assistant Professor;Postdoc;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nchoi2023domain,\ntitle={Domain Adaptive Imitation Learning with Visual Observation},\nauthor={Sungho Choi and Seungyul Han and Woojun Kim and Jongseong Chae and Whiyoung Jung and Youngchul Sung},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GGbBXSkX3r}\n}", "github": "", "project": "", "reviewers": "V2Bv;33qr;szho;3tvh", "pdf_size": 3265232, "rating": "5;5;6;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "62;239;81;44", "wc_strengths": "36;220;29;54", "wc_weaknesses": "73;456;104;82", "wc_questions": "43;10;38;138", "wc_limitations": "14;11;9;29", "wc_review": "228;936;261;347", "wc_reply_reviewers": "13;0;0;54", "wc_reply_authors": "27;60;60;30", "reply_reviewers": "1;0;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 106.5, 77.60959992165918 ], "wc_strengths_avg": [ 84.75, 78.61734859431473 ], "wc_weaknesses_avg": [ 178.75, 160.46709164186905 ], "wc_questions_avg": [ 57.25, 48.28755015529365 ], "wc_limitations_avg": [ 15.75, 7.854139036202504 ], "wc_review_avg": [ 443.0, 287.92967891483505 ], "wc_reply_reviewers_avg": [ 16.75, 22.151467220028564 ], "wc_reply_authors_avg": [ 44.25, 15.785673884886892 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2969172995348144087&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "kaist.ac.kr;unist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Ulsan National Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.unist.ac.kr", "aff_unique_abbr": "KAIST;UNIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "High dimensional, tabular deep learning with an auxiliary knowledge graph", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72216", "id": "GGylthmehy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/53dd219b6b11abc8ce523921c18c7a3e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GGylthmehy", "openreview": "https://openreview.net/forum?id=GGylthmehy", "poster": "/media/PosterPDFs/NeurIPS%202023/72216.png?t=1702450694.1190798", "slides": "https://nips.cc/virtual/2023/poster/72216", "video": "https://nips.cc/virtual/2023/poster/72216", "author_site": "Camilo Ruiz, Hongyu Ren, Kexin Huang, Jure Leskovec", "tldr": "", "abstract": "Machine learning models exhibit strong performance on datasets with abundant labeled samples. However, for tabular datasets with extremely high $d$-dimensional features but limited $n$ samples (i.e. $d \\gg n$), machine learning models struggle to achieve strong performance due to the risk of overfitting. Here, our key insight is that there is often abundant, auxiliary domain information describing input features which can be structured as a heterogeneous knowledge graph (KG). We propose PLATO, a method that achieves strong performance on tabular data with $d \\gg n$ by using an auxiliary KG describing input features to regularize a multilayer perceptron (MLP). In PLATO, each input feature corresponds to a node in the auxiliary KG. In the MLP\u2019s first layer, each input feature also corresponds to a weight vector. PLATO is based on the inductive bias that two input features corresponding to similar nodes in the auxiliary KG should have similar weight vectors in the MLP's first layer. PLATO captures this inductive bias by inferring the weight vector for each input feature from its corresponding node in the KG via a trainable message-passing function. Across 6 $d \\gg n$ datasets, PLATO outperforms 13 state-of-the-art baselines by up to 10.19%.", "keywords": "Tabular Data;Deep Learning;Knowledge Graph;Regularization", "primary_area": "", "supplementary_material": "", "author": "Camilo Ruiz;Hongyu Ren;Kexin Huang;Jure Leskovec", "authorids": "~Camilo_Ruiz1;~Hongyu_Ren1;~Kexin_Huang1;~Jure_Leskovec1", "gender": "M;;M;", "homepage": "https://profiles.stanford.edu/camilo-ruiz;;https://www.kexinhuang.com/;http://cs.stanford.edu/~jure/", "dblp": ";30/10885;;l/JureLeskovec", "google_scholar": "ELRsLTgAAAAJ;;ogEXTOgAAAAJ;Q_kKkIUAAAAJ", "orcid": "0000-0002-8395-2853;;;0000-0002-5411-923X", "linkedin": "camilo-ruiz-a50a348a;;;leskovec/", "or_profile": "~Camilo_Ruiz1;~Hongyu_Ren1;~Kexin_Huang1;~Jure_Leskovec1", "aff": "Stanford University;Computer Science Department, Stanford University;Stanford University;Kumo.AI", "aff_domain": "stanford.edu;cs.stanford.edu;stanford.edu;kumo.ai", "position": "PhD student;PhD student;PhD student;Chief Scientist", "bibtex": "@inproceedings{\nruiz2023high,\ntitle={High dimensional, tabular deep learning with an auxiliary knowledge graph},\nauthor={Camilo Ruiz and Hongyu Ren and Kexin Huang and Jure Leskovec},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GGylthmehy}\n}", "github": "", "project": "", "reviewers": "p8DV;87H1;E2W5;dZQm", "pdf_size": 986316, "rating": "5;5;5;7", "confidence": "3;5;3;4", "soundness": "2;2;2;4", "novelty": "3;2;3;3", "presentation": "2;4;3;4", "wc_summary": "105;111;109;211", "wc_strengths": "97;123;97;67", "wc_weaknesses": "141;845;109;65", "wc_questions": "47;161;71;62", "wc_limitations": "38;12;12;63", "wc_review": "428;1252;398;468", "wc_reply_reviewers": "104;116;10;28", "wc_reply_authors": "516;81;39;39", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 134.0, 44.50842616853577 ], "wc_strengths_avg": [ 96.0, 19.82422760159901 ], "wc_weaknesses_avg": [ 290.0, 321.56336856053736 ], "wc_questions_avg": [ 85.25, 44.56666355023674 ], "wc_limitations_avg": [ 31.25, 21.182244923520265 ], "wc_review_avg": [ 636.5, 356.2257009256912 ], "wc_reply_reviewers_avg": [ 64.5, 46.138378818506396 ], "wc_reply_authors_avg": [ 168.75, 201.2167674424773 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13752370516352981404&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "stanford.edu;cs.stanford.edu;stanford.edu;kumo.ai", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Stanford University;Kumo.AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.kumo.ai", "aff_unique_abbr": "Stanford;Kumo.AI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Machine learning detects terminal singularities", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72215", "id": "GI4Pp01prW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d453490ada2b1991852f053fbd213a6a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GI4Pp01prW", "openreview": "https://openreview.net/forum?id=GI4Pp01prW", "poster": "/media/PosterPDFs/NeurIPS%202023/72215.png?t=1701340970.5305376", "slides": "https://nips.cc/virtual/2023/poster/72215", "video": "https://nips.cc/virtual/2023/poster/72215", "author_site": "Tom Coates, Alexander Kasprzyk, Sara Veneziale", "tldr": "", "abstract": "Algebraic varieties are the geometric shapes defined by systems of polynomial equations; they are ubiquitous across mathematics and science. Amongst these algebraic varieties are Q-Fano varieties: positively curved shapes which have Q-factorial terminal singularities. Q-Fano varieties are of fundamental importance in geometry as they are `atomic pieces\u2019 of more complex shapes \u2013 the process of breaking a shape into simpler pieces in this sense is called the Minimal Model Programme.\n\nDespite their importance, the classification of Q-Fano varieties remains unknown. In this paper we demonstrate that machine learning can be used to understand this classification. We focus on eight-dimensional positively-curved algebraic varieties that have toric symmetry and Picard rank two, and develop a neural network classifier that predicts with 95% accuracy whether or not such an algebraic variety is Q-Fano. We use this to give a first sketch of the landscape of Q-Fano varieties in dimension eight.\n\nHow the neural network is able to detect Q-Fano varieties with such accuracy remains mysterious, and hints at some deep mathematical theory waiting to be uncovered. Furthermore, when visualised using the quantum period, an invariant that has played an important role in recent theoretical developments, we observe that the classification as revealed by ML appears to fall within a bounded region, and is stratified by the Fano index. This suggests that it may be possible to state and prove conjectures on completeness in the future.\n\nInspired by the ML analysis, we formulate and prove a new global combinatorial criterion for a positively curved toric variety of Picard rank two to have terminal singularities. Together with the first sketch of the landscape of Q-Fano varieties in higher dimensions, this gives strong new evidence that machine learning can be an essential tool in developing mathematical conjectures and accelerating theoretical discovery.", "keywords": "mathematics;geometry;Fano varieties;terminal singularities;theorem discovery;neural network classifier;supervised learning", "primary_area": "", "supplementary_material": "/attachment/bdb418fb6a1118e0f995c830a698877f05bf0956.pdf", "author": "Tom Coates;Alexander M. Kasprzyk;Sara Veneziale", "authorids": "t.coates@imperial.ac.uk;a.m.kasprzyk@nottingham.ac.uk;~Sara_Veneziale1", "gender": ";;F", "homepage": ";;https://sites.google.com/view/saraveneziale/home", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "t.coates@imperial.ac.uk;a.m.kasprzyk@nottingham.ac.uk;~Sara_Veneziale1", "aff": ";;Imperial College London", "aff_domain": ";;ic.ac.uk", "position": ";;PhD student", "bibtex": "@inproceedings{\ncoates2023machine,\ntitle={Machine learning detects terminal singularities},\nauthor={Tom Coates and Alexander M. Kasprzyk and Sara Veneziale},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GI4Pp01prW}\n}", "github": "", "project": "", "reviewers": "B8mu;Ukzu;VZKS;9Hdp", "pdf_size": 3081160, "rating": "6;6;7;7", "confidence": "4;4;3;3", "soundness": "4;2;3;4", "novelty": "3;2;3;4", "presentation": "3;2;4;3", "wc_summary": "64;170;66;335", "wc_strengths": "24;184;47;132", "wc_weaknesses": "91;177;38;80", "wc_questions": "33;34;109;48", "wc_limitations": "1;18;14;1", "wc_review": "213;583;274;596", "wc_reply_reviewers": "5;24;31;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 158.75, 110.4205030780063 ], "wc_strengths_avg": [ 96.75, 64.46462208064203 ], "wc_weaknesses_avg": [ 96.5, 50.50990001969911 ], "wc_questions_avg": [ 56.0, 31.1688947510174 ], "wc_limitations_avg": [ 8.5, 7.632168761236874 ], "wc_review_avg": [ 416.5, 174.39968463274238 ], "wc_reply_reviewers_avg": [ 15.0, 12.864680330268607 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4677380905519716615&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 7, "email": ";;ic.ac.uk", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "Two-Stage Learning to Defer with Multiple Experts", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72214", "id": "GIlsH0T4b2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0b17d256cf1fe1cc084922a8c6b565b7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GIlsH0T4b2", "openreview": "https://openreview.net/forum?id=GIlsH0T4b2", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72214", "video": "https://nips.cc/virtual/2023/poster/72214", "author_site": "Anqi Mao, Christopher Mohri, Mehryar Mohri, Yutao Zhong", "tldr": "", "abstract": "We study a two-stage scenario for learning to defer with multiple experts, which is crucial in practice for many applications. In this scenario, a predictor is derived in a first stage by training with a common loss function such as cross-entropy. In the second stage, a deferral function is learned to assign the most suitable expert to each input. We design a new family of surrogate loss functions for this scenario both in the score-based and the predictor-rejector settings and prove that they are supported by $H$-consistency bounds, which implies their Bayes-consistency. Moreover, we show that, for a constant cost function, our two-stage surrogate losses are realizable $H$-consistent. While the main focus of this work is a theoretical analysis, we also report the results of several experiments on CIFAR-10 and SVHN datasets.", "keywords": "learning to defer;learning theory", "primary_area": "", "supplementary_material": "/attachment/c40ed68956710f2506b195eddb09052bd7630c1d.pdf", "author": "Anqi Mao;Christopher Mohri;Mehryar Mohri;Yutao Zhong", "authorids": "~Anqi_Mao1;~Christopher_Mohri1;~Mehryar_Mohri2;~Yutao_Zhong1", "gender": "F;M;M;", "homepage": "https://anqi-mao.github.io;;https://cs.nyu.edu/~mohri/;", "dblp": "241/6864;;03/5448;51/3178-2", "google_scholar": "nkjIZ-oAAAAJ;_otSGXcAAAAJ;ktwwLjsAAAAJ;", "orcid": ";;;", "linkedin": ";christopher-mohri-3429841a0/;mehryar-mohri-3737b981/;", "or_profile": "~Anqi_Mao1;~Christopher_Mohri1;~Mehryar_Mohri2;~Yutao_Zhong1", "aff": "Courant Institute of Mathematical Sciences, NYU;Cornell University;Google Research;Google", "aff_domain": "cims.nyu.edu;cornell.edu;google.com;google.com", "position": "PhD student;Undergrad student;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nmao2023twostage,\ntitle={Two-Stage Learning to Defer with Multiple Experts},\nauthor={Anqi Mao and Christopher Mohri and Mehryar Mohri and Yutao Zhong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GIlsH0T4b2}\n}", "github": "", "project": "", "reviewers": "Jgi5;yH2K;Z2p9;fJtW", "pdf_size": 444390, "rating": "5;7;7;7", "confidence": "4;1;2;4", "soundness": "2;4;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "87;62;98;227", "wc_strengths": "25;62;75;182", "wc_weaknesses": "301;83;129;260", "wc_questions": "84;4;272;40", "wc_limitations": "12;34;26;3", "wc_review": "509;245;600;712", "wc_reply_reviewers": "77;9;30;52", "wc_reply_authors": "95;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 118.5, 63.98632666437416 ], "wc_strengths_avg": [ 86.0, 58.382360349680965 ], "wc_weaknesses_avg": [ 193.25, 89.92879127398521 ], "wc_questions_avg": [ 100.0, 103.26664514740469 ], "wc_limitations_avg": [ 18.75, 12.028611723719408 ], "wc_review_avg": [ 516.5, 172.45361695250116 ], "wc_reply_reviewers_avg": [ 42.0, 25.288337232803585 ], "wc_reply_authors_avg": [ 23.75, 41.13620667976084 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14785368338532497888&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "cims.nyu.edu;cornell.edu;google.com;google.com", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "New York University;Cornell University;Google", "aff_unique_dep": "Courant Institute of Mathematical Sciences;;Google Research", "aff_unique_url": "https://www.courant.nyu.edu;https://www.cornell.edu;https://research.google", "aff_unique_abbr": "NYU;Cornell;Google Research", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "New York;;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Estimating Koopman operators with sketching to provably learn large scale dynamical systems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72213", "id": "GItLpB1vhK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f3d1e34a15c0af0954ae36a7f811c754-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GItLpB1vhK", "openreview": "https://openreview.net/forum?id=GItLpB1vhK", "poster": "/media/PosterPDFs/NeurIPS%202023/72213.png?t=1702193770.0879533", "slides": "https://nips.cc/virtual/2023/poster/72213", "video": "https://nips.cc/virtual/2023/poster/72213", "author_site": "Giacomo Meanti, Antoine Chatalic, Vladimir Kostic, Pietro Novelli, Massimiliano Pontil, Lorenzo Rosasco", "tldr": "", "abstract": "The theory of Koopman operators allows to deploy non-parametric machine learning algorithms to predict and analyze complex dynamical systems.\nEstimators such as principal component regression (PCR) or reduced rank regression (RRR) in kernel spaces can be shown to provably learn Koopman operators from finite empirical observations of the system's time evolution. \nScaling these approaches to very long trajectories is a challenge and requires introducing suitable approximations to make computations feasible. \nIn this paper, we boost the efficiency of \ndifferent kernel-based Koopman operator estimators using random projections (sketching).\nWe derive, implement and test the new ``sketched'' estimators with extensive experiments on synthetic and large-scale molecular dynamics datasets. \nFurther, we establish non asymptotic error bounds giving a sharp characterization of the trade-offs between statistical learning rates and computational efficiency.\nOur empirical and theoretical analysis shows that the proposed estimators provide a sound and efficient way to learn large scale dynamical systems.\nIn particular our experiments indicate that the proposed estimators retain the same accuracy of PCR or RRR, while being much faster.", "keywords": "dynamical systems;kernel methods;koopman operator;sketching;molecular dynamics;efficient machine learning", "primary_area": "", "supplementary_material": "", "author": "Giacomo Meanti;Antoine Chatalic;Vladimir R Kostic;Pietro Novelli;Massimiliano Pontil;Lorenzo Rosasco", "authorids": "~Giacomo_Meanti1;~Antoine_Chatalic1;~Vladimir_R_Kostic1;~Pietro_Novelli1;~Massimiliano_Pontil4;~Lorenzo_Rosasco1", "gender": "M;;M;M;Not Specified;", "homepage": "https://gmeanti.com;;https://vladi-iit.github.io/;;https://www.iit.it/web/computational-statistics-and-machine-learning;", "dblp": ";;94/879;318/3513;;", "google_scholar": ";;66gV7SAAAAAJ;;lcOacs8AAAAJ;", "orcid": "0000-0002-4633-2954;;;0000-0003-1623-5659;0000-0001-9415-098X;", "linkedin": ";;vladimir-kostic-77500652/;;;", "or_profile": "~Giacomo_Meanti1;~Antoine_Chatalic1;~Vladimir_R_Kostic1;~Pietro_Novelli1;~Massimiliano_Pontil4;~Lorenzo_Rosasco1", "aff": "University of Genoa;;University of Novi Sad;Istituto Italiano di Tecnologia;University College London, University of London;", "aff_domain": "unige.it;;uns.ac.rs;iit.it;ucl.ac.uk;", "position": "PhD student;;Associate Professor;Postdoc;Full Professor;", "bibtex": "@inproceedings{\nmeanti2023estimating,\ntitle={Estimating Koopman operators with sketching to provably learn large scale dynamical systems},\nauthor={Giacomo Meanti and Antoine Chatalic and Vladimir R Kostic and Pietro Novelli and Massimiliano Pontil and Lorenzo Rosasco},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GItLpB1vhK}\n}", "github": "", "project": "", "reviewers": "Hh1S;FXhE;8Gvs;jbb3", "pdf_size": 5400953, "rating": "6;6;7;7", "confidence": "3;3;3;3", "soundness": "3;3;3;4", "novelty": "2;2;4;3", "presentation": "2;3;3;4", "wc_summary": "54;52;34;25", "wc_strengths": "34;59;114;169", "wc_weaknesses": "122;156;439;61", "wc_questions": "45;33;67;47", "wc_limitations": "25;9;54;3", "wc_review": "280;309;708;305", "wc_reply_reviewers": "26;24;88;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 41.25, 12.193748398257199 ], "wc_strengths_avg": [ 94.0, 52.08166663999915 ], "wc_weaknesses_avg": [ 194.5, 145.20760999341599 ], "wc_questions_avg": [ 48.0, 12.206555615733702 ], "wc_limitations_avg": [ 22.75, 19.753164303473 ], "wc_review_avg": [ 400.5, 177.88268606022342 ], "wc_reply_reviewers_avg": [ 37.25, 29.86113695089321 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16973232437555044821&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "unige.it;;uns.ac.rs;iit.it;ucl.ac.uk;", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Genoa;University of Novi Sad;Istituto Italiano di Tecnologia;University College London", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.unige.it;https://www.uns.ac.rs;https://www.iit.it;https://www.ucl.ac.uk", "aff_unique_abbr": "UniGe;UNS;IIT;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "Italy;Serbia;United Kingdom" }, { "title": "D4Explainer: In-distribution Explanations of Graph Neural Network via Discrete Denoising Diffusion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72212", "id": "GJtP1ZEzua", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f978c8f3b5f399cae464e85f72e28503-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GJtP1ZEzua", "openreview": "https://openreview.net/forum?id=GJtP1ZEzua", "poster": "/media/PosterPDFs/NeurIPS%202023/72212.png?t=1699600418.6531122", "slides": "https://nips.cc/virtual/2023/poster/72212", "video": "https://nips.cc/virtual/2023/poster/72212", "author_site": "Jialin Chen, Shirley Wu, Abhijit Gupta, Rex Ying", "tldr": "", "abstract": "The widespread deployment of Graph Neural Networks (GNNs) sparks significant interest in their explainability, which plays a vital role in model auditing and ensuring trustworthy graph learning. The objective of GNN explainability is to discern the underlying graph structures that have the most significant impact on model predictions. Ensuring that explanations generated are reliable necessitates consideration of the in-distribution property, particularly due to the vulnerability of GNNs to out-of-distribution data. Unfortunately, prevailing explainability methods tend to constrain the generated explanations to the structure of the original graph, thereby downplaying the significance of the in-distribution property and resulting in explanations that lack reliability.\nTo address these challenges, we propose D4Explainer, a novel approach that provides in-distribution GNN explanations for both counterfactual and model-level explanation scenarios. The proposed D4Explainer incorporates generative graph distribution learning into the optimization objective, which accomplishes two goals: 1) generate a collection of diverse counterfactual graphs that conform to the in-distribution property for a given instance, and 2) identify the most discriminative graph patterns that contribute to a specific class prediction, thus serving as model-level explanations. It is worth mentioning that D4Explainer is the first unified framework that combines both counterfactual and model-level explanations.\nEmpirical evaluations conducted on synthetic and real-world datasets provide compelling evidence of the state-of-the-art performance achieved by D4Explainer in terms of explanation accuracy, faithfulness, diversity, and robustness.", "keywords": "Explainability;Graph Neural Network;Diffusion Model", "primary_area": "", "supplementary_material": "", "author": "Jialin Chen;Shirley Wu;Abhijit Gupta;Zhitao Ying", "authorids": "~Jialin_Chen2;~Shirley_Wu1;~Abhijit_Gupta1;~Zhitao_Ying1", "gender": "F;M;M;F", "homepage": "https://github.com/Cather-learner;;https://www.cs.yale.edu/homes/ying-rex;https://cs.stanford.edu/~shirwu", "dblp": ";;209/4936;79/4173-2", "google_scholar": "rHyMKPYAAAAJ;;6fqNXooAAAAJ;r2cVEucAAAAJ", "orcid": "0009-0007-0909-4620;;;", "linkedin": ";avgupta456;rex-ying-92770148/;", "or_profile": "~Jialin_Chen2;~Abhijit_Gupta1;~Zhitao_Ying1;~Yingxin_Wu1", "aff": "Yale University;Yale University;Yale University;Computer Science Department, Stanford University", "aff_domain": "yale.edu;yale.edu;yale.edu;cs.stanford.edu", "position": "PhD student;Undergrad student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nchen2023dexplainer,\ntitle={D4Explainer: In-distribution Explanations of Graph Neural Network via Discrete Denoising Diffusion},\nauthor={Jialin Chen and Shirley Wu and Abhijit Gupta and Zhitao Ying},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GJtP1ZEzua}\n}", "github": "", "project": "", "reviewers": "Tz8g;ofrf;MWPm;XFfB", "pdf_size": 6160044, "rating": "5;6;6;7", "confidence": "5;4;3;4", "soundness": "2;3;3;4", "novelty": "2;2;3;4", "presentation": "1;2;3;3", "wc_summary": "69;80;90;14", "wc_strengths": "43;38;49;33", "wc_weaknesses": "276;86;188;33", "wc_questions": "240;170;59;23", "wc_limitations": "11;13;8;30", "wc_review": "639;387;394;133", "wc_reply_reviewers": "85;11;0;0", "wc_reply_authors": "344;0;0;0", "reply_reviewers": "2;1;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 63.25, 29.38856069970083 ], "wc_strengths_avg": [ 40.75, 5.931905258852336 ], "wc_weaknesses_avg": [ 145.75, 93.58518846484202 ], "wc_questions_avg": [ 123.0, 86.59387969135001 ], "wc_limitations_avg": [ 15.5, 8.558621384311845 ], "wc_review_avg": [ 388.25, 178.9292807228599 ], "wc_reply_reviewers_avg": [ 24.0, 35.503520952153465 ], "wc_reply_authors_avg": [ 86.0, 148.95636945092346 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8162943965791571110&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "yale.edu;yale.edu;yale.edu;cs.stanford.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Yale University;Stanford University", "aff_unique_dep": ";Computer Science Department", "aff_unique_url": "https://www.yale.edu;https://www.stanford.edu", "aff_unique_abbr": "Yale;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Adversarial Training for Graph Neural Networks: Pitfalls, Solutions, and New Directions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72211", "id": "GPtroppvUM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b5a801e6bc4f4ffa3e6786518a324488-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GPtroppvUM", "openreview": "https://openreview.net/forum?id=GPtroppvUM", "poster": "/media/PosterPDFs/NeurIPS%202023/72211.png?t=1702070358.3478656", "slides": "https://nips.cc/virtual/2023/poster/72211", "video": "https://nips.cc/virtual/2023/poster/72211", "author_site": "Lukas Gosch, Simon Geisler, Daniel Sturm, Bertrand Charpentier, Daniel Z\u00fcgner, Stephan G\u00fcnnemann", "tldr": "", "abstract": "Despite its success in the image domain, adversarial training did not (yet) stand out as an effective defense for Graph Neural Networks (GNNs) against graph structure perturbations. In the pursuit of fixing adversarial training (1) we show and overcome fundamental theoretical as well as practical limitations of the adopted graph learning setting in prior work; (2) we reveal that flexible GNNs based on learnable graph diffusion are able to adjust to adversarial perturbations, while the learned message passing scheme is naturally interpretable; (3) we introduce the first attack for structure perturbations that, while targeting multiple nodes at once, is capable of handling global (graph-level) as well as local (node-level) constraints. Including these contributions, we demonstrate that adversarial training is a state-of-the-art defense against adversarial structure perturbations.", "keywords": "adversarial training;adversarial examples;robust graph learning;graph machine learning;graph neural networks;graphs", "primary_area": "", "supplementary_material": "", "author": "Lukas Gosch;Simon Geisler;Daniel Sturm;Bertrand Charpentier;Daniel Z\u00fcgner;Stephan G\u00fcnnemann", "authorids": "~Lukas_Gosch1;~Simon_Geisler1;~Daniel_Sturm1;~Bertrand_Charpentier2;~Daniel_Z\u00fcgner1;~Stephan_G\u00fcnnemann1", "gender": "M;M;;M;M;M", "homepage": "https://saper0.github.io/;;https://sharpenb.github.io/;;http://www.daml.in.tum.de;https://www.in.tum.de/en/daml/team/simon-geisler/", "dblp": "289/9853;;222/1875;172/6951;43/3011;237/0253", "google_scholar": "ilCFrEMAAAAJ;;0rqI-ycAAAAJ;;;00x9jJwAAAAJ", "orcid": ";;;;;0000-0003-0867-1856", "linkedin": ";daniel-sturm-a306b8205;bertrand-charpentier-76995ab6/;;;simon-geisler-ai/", "or_profile": "~Lukas_Gosch1;~Daniel_Sturm1;~Bertrand_Charpentier2;~Daniel_Z\u00fcgner1;~Stephan_G\u00fcnnemann1;~Simon_Markus_Geisler1", "aff": "Technical University of Munich;Allianz Versicherungs-AG;Technical University Munich;Microsoft;Technical University Munich;Technical University Munich", "aff_domain": "tum.de;allianz.de;tum.de;microsoft.com;tum.de;tum.de", "position": "PhD student;Researcher;PhD student;Postdoc;Professor;PhD student", "bibtex": "@inproceedings{\ngosch2023adversarial,\ntitle={Adversarial Training for Graph Neural Networks: Pitfalls, Solutions, and New Directions},\nauthor={Lukas Gosch and Simon Geisler and Daniel Sturm and Bertrand Charpentier and Daniel Z{\\\"u}gner and Stephan G{\\\"u}nnemann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GPtroppvUM}\n}", "github": "", "project": "", "reviewers": "bw8r;fJD8;zNuu;ZTec", "pdf_size": 4271742, "rating": "5;7;7;7", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "119;66;108;49", "wc_strengths": "58;71;145;29", "wc_weaknesses": "305;300;66;31", "wc_questions": "100;21;55;9", "wc_limitations": "1;1;1;21", "wc_review": "583;459;375;139", "wc_reply_reviewers": "95;32;30;23", "wc_reply_authors": "664;37;14;8", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 85.5, 28.90069203323685 ], "wc_strengths_avg": [ 75.75, 42.774846580671685 ], "wc_weaknesses_avg": [ 175.5, 127.61367481582842 ], "wc_questions_avg": [ 46.25, 35.32262023123426 ], "wc_limitations_avg": [ 6.0, 8.660254037844387 ], "wc_review_avg": [ 389.0, 162.19741058352318 ], "wc_reply_reviewers_avg": [ 45.0, 29.06028217344078 ], "wc_reply_authors_avg": [ 180.75, 279.2144113401026 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=264338481867893811&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "tum.de;allianz.de;tum.de;microsoft.com;tum.de;tum.de", "author_num": 6, "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "Technical University of Munich;Allianz Versicherungs-AG;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.tum.de;https://www.allianz.com;https://www.microsoft.com", "aff_unique_abbr": "TUM;Allianz;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "Germany;United States" }, { "title": "4D Panoptic Scene Graph Generation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72210", "id": "GRHZiTbDDI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dc6319dde4fb182b22fb902da9418566-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GRHZiTbDDI", "openreview": "https://openreview.net/forum?id=GRHZiTbDDI", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72210", "video": "https://nips.cc/virtual/2023/poster/72210", "author_site": "Jingkang Yang, Jun CEN, WENXUAN PENG, Shuai Liu, Fangzhou Hong, Xiangtai Li, Kaiyang Zhou, Qifeng Chen, Ziwei Liu", "tldr": "", "abstract": "We are living in a three-dimensional space while moving forward through a fourth dimension: time. To allow artificial intelligence to develop a comprehensive understanding of such a 4D environment, we introduce **4D Panoptic Scene Graph (PSG-4D)**, a new representation that bridges the raw visual data perceived in a dynamic 4D world and high-level visual understanding. Specifically, PSG-4D abstracts rich 4D sensory data into nodes, which represent entities with precise location and status information, and edges, which capture the temporal relations. To facilitate research in this new area, we build a richly annotated PSG-4D dataset consisting of 3K RGB-D videos with a total of 1M frames, each of which is labeled with 4D panoptic segmentation masks as well as fine-grained, dynamic scene graphs. To solve PSG-4D, we propose PSG4DFormer, a Transformer-based model that can predict panoptic segmentation masks, track masks along the time axis, and generate the corresponding scene graphs via a relation component. Extensive experiments on the new dataset show that our method can serve as a strong baseline for future research on PSG-4D. In the end, we provide a real-world application example to demonstrate how we can achieve dynamic scene understanding by integrating a large language model into our PSG-4D system.", "keywords": "Scene Graph Generation;4D Understanding;4D Perception.", "primary_area": "", "supplementary_material": "", "author": "Jingkang Yang;Jun CEN;Wenxuan Peng;Shuai Liu;Fangzhou Hong;Xiangtai Li;Kaiyang Zhou;Qifeng Chen;Ziwei Liu", "authorids": "~Jingkang_Yang1;~Jun_CEN1;~Wenxuan_Peng2;~Shuai_Liu14;~Fangzhou_Hong1;~Xiangtai_Li1;~Kaiyang_Zhou1;~Qifeng_Chen1;~Ziwei_Liu1", "gender": "M;M;;M;M;;M;M;M", "homepage": "https://jingkang50.github.io/;https://cen-jun.com;;https://github.com/choiszt;;;https://kaiyangzhou.github.io/;http://cqf.io/;https://liuziwei7.github.io/", "dblp": "175/5365.html;280/3156;;;261/3476;;203/3155;117/4819;05/6300-2", "google_scholar": "S-YjbUYAAAAJ;7SKAhBwAAAAJ;;W9190BQAAAAJ;mhaiL5MAAAAJ;;https://scholar.google.co.uk/citations?user=gRIejugAAAAJ;lLMX9hcAAAAJ;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ", "orcid": ";0000-0002-7578-7667;;;;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Jingkang_Yang1;~Jun_CEN1;~Wenxuan_Peng2;~Shuai_Liu14;~Fangzhou_Hong1;~Xiangtai_Li1;~Kaiyang_Zhou1;~Qifeng_Chen1;~Ziwei_Liu1", "aff": "Nanyang Technological University;Hong Kong University of Science and Technology;;Shanghai AI Laboratory;Nanyang Technological University;;Hong Kong Baptist University;Hong Kong University of Science and Technology;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ust.hk;;pjlab.org.cn;ntu.edu.sg;;hkbu.edu.hk;hkust.edu;ntu.edu.sg", "position": "PhD student;PhD student;;Research Intern;PhD student;;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nyang2023d,\ntitle={4D Panoptic Scene Graph Generation},\nauthor={Jingkang Yang and Jun CEN and Wenxuan Peng and Shuai Liu and Fangzhou Hong and Xiangtai Li and Kaiyang Zhou and Qifeng Chen and Ziwei Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GRHZiTbDDI}\n}", "github": "", "project": "", "reviewers": "z3XZ;CrvQ;4B6f;PY8p;2QCE", "pdf_size": 13823220, "rating": "5;5;7;7;8", "confidence": "4;2;4;5;4", "soundness": "3;3;3;3;3", "novelty": "3;3;3;4;4", "presentation": "4;4;4;3;3", "wc_summary": "71;81;143;80;56", "wc_strengths": "27;25;129;42;107", "wc_weaknesses": "72;124;116;31;97", "wc_questions": "1;5;5;5;22", "wc_limitations": "7;28;8;2;10", "wc_review": "178;263;401;160;292", "wc_reply_reviewers": "0;86;0;37;23", "wc_reply_authors": "0;105;675;0;0", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 86.2, 29.78187368182197 ], "wc_strengths_avg": [ 66.0, 43.423495944016295 ], "wc_weaknesses_avg": [ 88.0, 33.66303610787357 ], "wc_questions_avg": [ 7.6, 7.364781055808788 ], "wc_limitations_avg": [ 11.0, 8.899438184514796 ], "wc_review_avg": [ 258.8, 86.75344373568117 ], "wc_reply_reviewers_avg": [ 29.2, 31.72002522067093 ], "wc_reply_authors_avg": [ 156.0, 262.6670896781704 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.578351744823806, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5387739382498205019&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ntu.edu.sg;ust.hk;;pjlab.org.cn;ntu.edu.sg;;hkbu.edu.hk;hkust.edu;ntu.edu.sg", "author_num": 9, "aff_unique_index": "0;1;2;0;3;1;0", "aff_unique_norm": "Nanyang Technological University;Hong Kong University of Science and Technology;Shanghai AI Laboratory;Hong Kong Baptist University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.ust.hk;https://www.shanghai-ai-lab.com;https://www.hkbu.edu.hk", "aff_unique_abbr": "NTU;HKUST;SAIL;HKBU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;1;0;1;1;0", "aff_country_unique": "Singapore;China" }, { "title": "LLaVA-Med: Training a Large Language-and-Vision Assistant for Biomedicine in One Day", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73643", "id": "GSuP99u2kR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5abcdf8ecdcacba028c6662789194572-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=GSuP99u2kR", "openreview": "https://openreview.net/forum?id=GSuP99u2kR", "poster": "/media/PosterPDFs/NeurIPS%202023/73643.png?t=1702106885.489689", "slides": "https://nips.cc/virtual/2023/poster/73643", "video": "https://nips.cc/virtual/2023/poster/73643", "author_site": "Chunyuan Li, Cliff Wong, Sheng Zhang, Naoto Usuyama, Haotian Liu, Jianwei Yang, Tristan Naumann, Hoifung Poon, Jianfeng Gao", "tldr": "", "abstract": "Conversational generative AI has demonstrated remarkable promise for empowering biomedical practitioners, but current investigations focus on unimodal text. Multimodal conversational AI has seen rapid progress by leveraging billions of image-text pairs from the public web, but such general-domain vision-language models still lack sophistication in understanding and conversing about biomedical images. In this paper, we propose a cost-efficient approach for training a vision-language conversational assistant that can answer open-ended research questions of biomedical images. The key idea is to leverage a large-scale, broad-coverage biomedical figure-caption dataset extracted from PubMed Central, use GPT-4 to self-instruct open-ended instruction-following data from the captions, and then fine-tune a large general-domain vision-language model using a novel curriculum learning method. Specifically, the model first learns to align biomedical vocabulary using the figure-caption pairs as is, then learns to master open-ended conversational semantics using GPT-4 generated instruction-following data, broadly mimicking how a layperson gradually acquires biomedical knowledge. This enables us to train a Large Language and Vision Assistant for BioMedicine (LLaVA-Med) in less than 15 hours (with eight A100s). LLaVA-Med exhibits excellent multimodal conversational capability and can follow open-ended instruction to assist with inquiries about a biomedical image. On three standard biomedical visual question answering datasets, LLaVA-Med outperforms previous supervised state-of-the-art on certain metrics. To facilitate biomedical multimodal research, we will release our instruction-following data and the LLaVA-Med model.", "keywords": "Multimodal assistant;biomedical multimodal instruction-following data", "primary_area": "", "supplementary_material": "/attachment/ec28e7deef5473b2c7d3e5bc5b17116ba689cc37.pdf", "author": "Chunyuan Li;Cliff Wong;Sheng Zhang;Naoto Usuyama;Haotian Liu;Jianwei Yang;Tristan Naumann;Hoifung Poon;Jianfeng Gao", "authorids": "~Chunyuan_Li1;~Cliff_Wong1;~Sheng_Zhang9;~Naoto_Usuyama1;~Haotian_Liu1;~Jianwei_Yang1;~Tristan_Naumann1;~Hoifung_Poon1;~Jianfeng_Gao1", "gender": ";M;M;M;;M;M;M;M", "homepage": "http://chunyuan.li/;https://www.microsoft.com/en-us/research/people/clwon/;https://sheng-z.github.io/;https://www.microsoft.com/en-us/research/people/naotous/;https://hliu.cc;https://www.microsoft.com/en-us/research/people/tristan/;https://www.microsoft.com/en-us/research/people/hoifung/;https://www.microsoft.com/en-us/research/people/jfgao/;https://jwyang.github.io/", "dblp": "64/9590;239/4225;69/6137-12;154/3752;66/10511;148/5539;78/4609;92/5339;", "google_scholar": "Zd7WmXUAAAAJ;Sl05ifcAAAAJ;-LVEXQ8AAAAJ;;Xo6wfnQAAAAJ;cjlSeqwAAAAJ;yqqmVbkAAAAJ;https://scholar.google.com/citations?hl=en;Cl9byD8AAAAJ", "orcid": ";0000-0001-7867-090X;;0000-0003-0888-929X;;0000-0003-2150-1747;0000-0002-9067-0918;;", "linkedin": ";cliffwong/;sheng-z/;;;tristan-naumann/;hoifung-poon-9559943/;;", "or_profile": "~Chunyuan_Li1;~Cliff_Wong1;~Sheng_Zhang9;~Naoto_Usuyama1;~Haotian_Liu1;~Tristan_Naumann1;~Hoifung_Poon1;~Jianfeng_Gao1;~Jianwei_Yang2", "aff": "Microsoft Research;Microsoft Research;Microsoft;Microsoft;Department of Computer Science, University of Wisconsin - Madison;Microsoft Research;Microsoft;Microsoft Research;Microsoft", "aff_domain": "microsoft.com;research.microsoft.com;microsoft.com;microsoft.com;cs.wisc.edu;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "Principal Researcher;Researcher;Researcher;Researcher;PhD student;Principal Researcher;General Manager;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nli2023llavamed,\ntitle={{LL}a{VA}-Med: Training a Large Language-and-Vision Assistant for Biomedicine in One Day},\nauthor={Chunyuan Li and Cliff Wong and Sheng Zhang and Naoto Usuyama and Haotian Liu and Jianwei Yang and Tristan Naumann and Hoifung Poon and Jianfeng Gao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=GSuP99u2kR}\n}", "github": "", "project": "", "reviewers": "73Lh;kmHu;LUar;SDgf;1nwT", "pdf_size": 3855129, "rating": "5;7;7;8;9", "confidence": "4;3;4;4;4", "wc_summary_and_contributions": "93;130;81;194;57", "wc_strengths": "141;60;37;42;39", "wc_improvement": "726;62;116;211;19", "wc_limitations": "65;14;68;87;18", "wc_correctness": "24;29;27;13;19", "wc_clarity": "6;12;12;7;1", "wc_relation_to_prior_work": "51;28;16;18;2", "wc_documentation": "50;9;29;4;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "1157;345;387;577;157", "wc_reply_reviewers": "521;0;0;41;0", "wc_reply_authors": "1087;110;541;768;190", "reply_reviewers": "2;0;0;1;0", "reply_authors": "3;1;1;1;1", "rating_avg": [ 7.2, 1.32664991614216 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 111.0, 47.7283982551269 ], "wc_strengths_avg": [ 63.8, 39.45326348985594 ], "wc_improvement_avg": [ 226.8, 257.70789665821263 ], "wc_limitations_avg": [ 50.4, 29.110822729699688 ], "wc_correctness_avg": [ 22.4, 5.782732917920384 ], "wc_clarity_avg": [ 7.6, 4.127953488110059 ], "wc_relation_to_prior_work_avg": [ 23.0, 16.272676485446393 ], "wc_documentation_avg": [ 18.6, 18.488915598271305 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 524.6, 343.2186475120488 ], "wc_reply_reviewers_avg": [ 112.4, 204.91617798504834 ], "wc_reply_authors_avg": [ 539.2, 362.92996569586256 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0753778361444409, "gs_citation": 828, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17907075843876440465&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "microsoft.com;research.microsoft.com;microsoft.com;microsoft.com;cs.wisc.edu;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "author_num": 9, "aff_unique_index": "0;0;0;0;1;0;0;0;0", "aff_unique_norm": "Microsoft;University of Wisconsin-Madison", "aff_unique_dep": "Microsoft Research;Department of Computer Science", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.wisc.edu", "aff_unique_abbr": "MSR;UW-Madison", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "ChatGPT-Powered Hierarchical Comparisons for Image Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72209", "id": "GTYaYNsFyv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dc81297c791bb989deade65c6bd8c1d8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GTYaYNsFyv", "openreview": "https://openreview.net/forum?id=GTYaYNsFyv", "poster": "/media/PosterPDFs/NeurIPS%202023/72209.png?t=1702180369.0240345", "slides": "https://nips.cc/virtual/2023/poster/72209", "video": "https://nips.cc/virtual/2023/poster/72209", "author_site": "Zhiyuan Ren, Yiyang Su, Xiaoming Liu", "tldr": "", "abstract": "The zero-shot open-vocabulary setting poses challenges for image classification.\nFortunately, utilizing a vision-language model like CLIP, pre-trained on image-text\npairs, allows for classifying images by comparing embeddings. Leveraging large\nlanguage models (LLMs) such as ChatGPT can further enhance CLIP\u2019s accuracy\nby incorporating class-specific knowledge in descriptions. However, CLIP still\nexhibits a bias towards certain classes and generates similar descriptions for similar\nclasses, disregarding their differences. To address this problem, we present a\nnovel image classification framework via hierarchical comparisons. By recursively\ncomparing and grouping classes with LLMs, we construct a class hierarchy. With\nsuch a hierarchy, we can classify an image by descending from the top to the bottom\nof the hierarchy, comparing image and text embeddings at each level. Through\nextensive experiments and analyses, we demonstrate that our proposed approach is\nintuitive, effective, and explainable. Code will be released upon publication.", "keywords": "ChatGPT;Hierarchical Comparisons;Image Classification;Zero shot", "primary_area": "", "supplementary_material": "/attachment/acc1c9958162d0b97eb72d155faef97e36d789cb.zip", "author": "Zhiyuan Ren;Yiyang Su;Xiaoming Liu", "authorids": "~Zhiyuan_Ren1;~Yiyang_Su1;~Xiaoming_Liu2", "gender": "M;;M", "homepage": "https://zhiyuan-r.github.io/;;http://www.cse.msu.edu/~liuxm/", "dblp": ";;l/XiaomingLiu0002", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": "zhiyuan-ren-585365223/;;xiaoming-liu-5a7807b/", "or_profile": "~Zhiyuan_Ren1;~Yiyang_Su1;~Xiaoming_Liu2", "aff": "Michigan State University;;Michigan State University", "aff_domain": "msu.edu;;msu.edu", "position": "PhD student;;Professor", "bibtex": "@inproceedings{\nren2023chatgptpowered,\ntitle={Chat{GPT}-Powered Hierarchical Comparisons for Image Classification},\nauthor={Zhiyuan Ren and Yiyang Su and Xiaoming Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GTYaYNsFyv}\n}", "github": "", "project": "", "reviewers": "Y2ri;5GjG;ufWY;bCYj", "pdf_size": 1583646, "rating": "5;5;6;7", "confidence": "4;4;3;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "2;4;3;3", "wc_summary": "129;54;62;60", "wc_strengths": "160;33;79;94", "wc_weaknesses": "378;148;98;33", "wc_questions": "90;4;4;9", "wc_limitations": "36;4;21;6", "wc_review": "793;243;264;202", "wc_reply_reviewers": "390;14;17;38", "wc_reply_authors": "841;15;19;17", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 76.25, 30.597181242722343 ], "wc_strengths_avg": [ 91.5, 45.489009661675425 ], "wc_weaknesses_avg": [ 164.25, 129.969948449632 ], "wc_questions_avg": [ 26.75, 36.574410453211684 ], "wc_limitations_avg": [ 16.75, 12.910751333675357 ], "wc_review_avg": [ 375.5, 242.07281962252597 ], "wc_reply_reviewers_avg": [ 114.75, 159.18444490590153 ], "wc_reply_authors_avg": [ 223.0, 356.805269019391 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16655408883804457048&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "msu.edu;;msu.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Michigan State University", "aff_unique_dep": "", "aff_unique_url": "https://www.msu.edu", "aff_unique_abbr": "MSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Structured Neural-PI Control with End-to-End Stability and Output Tracking Guarantees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72208", "id": "GWIRpKF6yU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d79c1390baa2e4835586b094d82e5ffb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GWIRpKF6yU", "openreview": "https://openreview.net/forum?id=GWIRpKF6yU", "poster": "/media/PosterPDFs/NeurIPS%202023/72208.png?t=1702176315.788776", "slides": "https://nips.cc/virtual/2023/poster/72208", "video": "https://nips.cc/virtual/2023/poster/72208", "author_site": "Wenqi Cui, Yan Jiang, Baosen Zhang, Yuanyuan Shi", "tldr": "", "abstract": "We study the optimal control of multiple-input and multiple-output dynamical systems via the design of neural network-based controllers with stability and output tracking guarantees. While neural network-based nonlinear controllers have shown superior performance in various applications, their lack of provable guarantees has restricted their adoption in high-stake real-world applications. This paper bridges the gap between neural network-based controllers and the need for stabilization guarantees. Using equilibrium-independent passivity, a property present in a wide range of physical systems, we propose neural Proportional-Integral (PI) controllers that have provable guarantees of stability and zero steady-state output tracking error. The key structure is the strict monotonicity on proportional and integral terms, which is parameterized as gradients of strictly convex neural networks (SCNN). We construct SCNN with tunable softplus-$\\beta$ activations, which yields universal approximation capability and is also useful in incorporating communication constraints. In addition, the SCNNs serve as Lyapunov functions, giving us end-to-end performance guarantees. Experiments on traffic and power networks demonstrate that the proposed approach improves both transient and steady-state performances, while unstructured neural networks lead to unstable behaviors.", "keywords": "Control;Stability;Tracking;Passivity;Neural network-based controllers;Power systems", "primary_area": "", "supplementary_material": "/attachment/3fe6f0b7c7deec07a837d6ae4a982dca5619c40d.pdf", "author": "Wenqi Cui;Yan Jiang;Baosen Zhang;Yuanyuan Shi", "authorids": "~Wenqi_Cui1;~Yan_Jiang1;~Baosen_Zhang1;~Yuanyuan_Shi1", "gender": "F;;M;", "homepage": ";https://www.yanjiang.info/;http://zhangbaosen.github.io;", "dblp": "253/7356;11/6484-5;63/8760;", "google_scholar": "https://scholar.google.com/citations?hl=en;0cAUBzEAAAAJ;3svZOGAAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Wenqi_Cui1;~Yan_Jiang1;~Baosen_Zhang1;~Yuanyuan_Shi1", "aff": "University of Washington;;University of Washington, Seattle;", "aff_domain": "uw.edu;;uw.edu;", "position": "PhD student;;Assistant Professor;", "bibtex": "@inproceedings{\ncui2023structured,\ntitle={Structured Neural-{PI} Control with End-to-End Stability and Output Tracking Guarantees},\nauthor={Wenqi Cui and Yan Jiang and Baosen Zhang and Yuanyuan Shi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GWIRpKF6yU}\n}", "github": "", "project": "", "reviewers": "nsH5;F58a;aJSd;Cp4m;eDxT", "pdf_size": 6035492, "rating": "4;5;6;6;7", "confidence": "4;3;5;4;4", "soundness": "2;2;4;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;4", "wc_summary": "134;100;68;54;56", "wc_strengths": "101;57;67;90;39", "wc_weaknesses": "1581;27;175;54;50", "wc_questions": "102;22;2;42;72", "wc_limitations": "70;42;2;36;2", "wc_review": "1988;248;314;276;219", "wc_reply_reviewers": "68;10;21;20;14", "wc_reply_authors": "190;12;20;20;10", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 82.4, 30.604574821421714 ], "wc_strengths_avg": [ 70.8, 22.346364357541475 ], "wc_weaknesses_avg": [ 377.4, 604.0160925008538 ], "wc_questions_avg": [ 48.0, 35.552777669262355 ], "wc_limitations_avg": [ 30.4, 25.873538606073964 ], "wc_review_avg": [ 609.0, 690.2138798952104 ], "wc_reply_reviewers_avg": [ 26.6, 21.086488564955523 ], "wc_reply_authors_avg": [ 50.4, 69.91881005852431 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.31008683647302115, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15244297595311033103&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "uw.edu;;uw.edu;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "FineMoGen: Fine-Grained Spatio-Temporal Motion Generation and Editing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72207", "id": "GYjV1M5s0D", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2d52879ef2ba487445ca2e143b104c3b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GYjV1M5s0D", "openreview": "https://openreview.net/forum?id=GYjV1M5s0D", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72207", "video": "https://nips.cc/virtual/2023/poster/72207", "author_site": "Mingyuan Zhang, Huirong Li, Zhongang Cai, Jiawei Ren, Lei Yang, Ziwei Liu", "tldr": "", "abstract": "Text-driven motion generation has achieved substantial progress with the emergence of diffusion models. However, existing methods still struggle to generate complex motion sequences that correspond to fine-grained descriptions, depicting detailed and accurate spatio-temporal actions.This lack of fine controllability limits the usage of motion generation to a larger audience. To tackle these challenges, we present FineMoGen, a diffusion-based motion generation and editing framework that can synthesize fine-grained motions, with spatial-temporal composition to the user instructions. Specifically, FineMoGen builds upon diffusion model with a novel transformer architecture dubbed Spatio-Temporal Mixture Attention SAMI. SAMI optimizes the generation of the global attention template from two perspectives: 1) explicitly modeling the constraints of spatio-temporal composition; and 2) utilizing sparsely-activated mixture-of-experts to adaptively extract fine-grained features. To facilitate a large-scale study on this new fine-grained motion generation task, we contribute the HuMMan-MoGen dataset, which consists of 2,968 videos and 102,336 fine-grained spatio-temporal descriptions. Extensive experiments validate that FineMoGen exhibits superior motion generation quality over state-of-the-art methods. Notably, FineMoGen further enables zero-shot motion editing capabilities with the aid of modern large language models (LLM), which faithfully manipulates motion sequences with fine-grained instructions.", "keywords": "Motion Generation;Diffusion Model", "primary_area": "", "supplementary_material": "/attachment/fc5fbd8f844c6abfd3505d709b01708d1f7b042a.zip", "author": "Mingyuan Zhang;Huirong Li;Zhongang Cai;Jiawei Ren;Lei Yang;Ziwei Liu", "authorids": "~Mingyuan_Zhang1;~Huirong_Li1;~Zhongang_Cai1;~Jiawei_Ren1;~Lei_Yang7;~Ziwei_Liu1", "gender": "M;;M;Unspecified;M;M", "homepage": "https://mingyuan-zhang.github.io/;;https://caizhongang.com;https://jiawei-ren.github.io/;https://www.yanglei.me;https://liuziwei7.github.io/", "dblp": ";;232/3190;122/3626-1;50/2484-45;05/6300-2", "google_scholar": "2QLD4fAAAAAJ;;WrDKqIAAAAAJ;https://scholar.google.com.sg/citations?user=YUKPVCoAAAAJ;jZH2IPYAAAAJ;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ", "orcid": ";;0000-0002-1810-3855;0000-0003-1950-5976;0000-0002-0571-5924;", "linkedin": ";;caizhongang/;;;", "or_profile": "~Mingyuan_Zhang1;~Huirong_Li1;~Zhongang_Cai1;~Jiawei_Ren1;~Lei_Yang7;~Ziwei_Liu1", "aff": "Nanyang Technological University;;Nanyang Technological University;Nanyang Technological University;Sensetime Ltd.;Nanyang Technological University", "aff_domain": "ntu.edu.sg;;ntu.edu.sg;ntu.edu.sg;sensetime.com;ntu.edu.sg", "position": "PhD student;;PhD student;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023finemogen,\ntitle={FineMoGen: Fine-Grained Spatio-Temporal Motion Generation and Editing},\nauthor={Mingyuan Zhang and Huirong Li and Zhongang Cai and Jiawei Ren and Lei Yang and Ziwei Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GYjV1M5s0D}\n}", "github": "", "project": "", "reviewers": "4S8a;DsXZ;CFoF;C9Az", "pdf_size": 1952732, "rating": "5;5;6;6", "confidence": "4;3;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;2", "wc_summary": "113;47;95;211", "wc_strengths": "58;41;34;126", "wc_weaknesses": "143;88;25;154", "wc_questions": "4;20;27;89", "wc_limitations": "12;9;52;10", "wc_review": "330;205;233;590", "wc_reply_reviewers": "204;17;14;41", "wc_reply_authors": "493;19;28;21", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 116.5, 59.65525961723744 ], "wc_strengths_avg": [ 64.75, 36.42372166596928 ], "wc_weaknesses_avg": [ 102.5, 51.25670687822229 ], "wc_questions_avg": [ 35.0, 32.27227912620985 ], "wc_limitations_avg": [ 20.75, 18.07449860992 ], "wc_review_avg": [ 339.5, 151.88235578894606 ], "wc_reply_reviewers_avg": [ 69.0, 78.64159204899148 ], "wc_reply_authors_avg": [ 140.25, 203.687720542992 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17650412257948269861&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 5, "email": "ntu.edu.sg;;ntu.edu.sg;ntu.edu.sg;sensetime.com;ntu.edu.sg", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Nanyang Technological University;SenseTime", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.sensetime.com", "aff_unique_abbr": "NTU;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Singapore;China" }, { "title": "Conditional Matrix Flows for Gaussian Graphical Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72206", "id": "GYnbubCXhE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4eef8829319316d0b552328715c836c3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GYnbubCXhE", "openreview": "https://openreview.net/forum?id=GYnbubCXhE", "poster": "/media/PosterPDFs/NeurIPS%202023/72206.png?t=1701263669.496748", "slides": "https://nips.cc/virtual/2023/poster/72206", "video": "https://nips.cc/virtual/2023/poster/72206", "author_site": "Marcello Massimo Negri, Fabricio Arend Torres, Volker Roth", "tldr": "", "abstract": "Studying conditional independence among many variables with few observations is a challenging task.\nGaussian Graphical Models (GGMs) tackle this problem by encouraging sparsity in the precision matrix through $l_q$ regularization with $q\\leq1$.\nHowever, most GMMs rely on the $l_1$ norm because the objective is highly non-convex for sub-$l_1$ pseudo-norms.\nIn the frequentist formulation, the $l_1$ norm relaxation provides the solution path as a function of the shrinkage parameter $\\lambda$.\nIn the Bayesian formulation, sparsity is instead encouraged through a Laplace prior, but posterior inference for different $\\lambda$ requires repeated runs of expensive Gibbs samplers.\nHere we propose a general framework for variational inference with matrix-variate Normalizing Flow in GGMs, which unifies the benefits of frequentist and Bayesian frameworks.\nAs a key improvement on previous work, we train with one flow a continuum of sparse regression models jointly for all regularization parameters $\\lambda$ and all $l_q$ norms, including non-convex sub-$l_1$ pseudo-norms.\nWithin one model we thus have access to (i) the evolution of the posterior for any $\\lambda$ and any $l_q$ (pseudo-) norm, (ii) the marginal log-likelihood for model selection, and (iii) the frequentist solution paths through simulated annealing in the MAP limit.", "keywords": "normalizing flow;variational inference;graphical lasso;gaussian graphical model;bayesian inference", "primary_area": "", "supplementary_material": "", "author": "Marcello Massimo Negri;Fabricio Arend Torres;Volker Roth", "authorids": "~Marcello_Massimo_Negri1;~Fabricio_Arend_Torres1;~Volker_Roth1", "gender": "M;M;M", "homepage": ";;", "dblp": ";;23/1185-1", "google_scholar": ";https://scholar.google.com/citations?authuser=1;https://scholar.google.ch/citations?user=v1qj03cAAAAJ", "orcid": ";;0000-0003-0991-0273", "linkedin": "marcello-negri-b7b025176/;;", "or_profile": "~Marcello_Massimo_Negri1;~Fabricio_Arend_Torres1;~Volker_Roth1", "aff": "University of Basel;University of Basel;University of Basel", "aff_domain": "unibas.ch;unibas.ch;unibas.ch", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nnegri2023conditional,\ntitle={Conditional Matrix Flows for Gaussian Graphical Models},\nauthor={Marcello Massimo Negri and Fabricio Arend Torres and Volker Roth},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GYnbubCXhE}\n}", "github": "", "project": "", "reviewers": "3sWQ;vqBu;pTVu;CnQu", "pdf_size": 961130, "rating": "5;6;6;6", "confidence": "3;3;3;4", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;4;3", "wc_summary": "114;42;63;60", "wc_strengths": "8;25;92;76", "wc_weaknesses": "145;69;125;31", "wc_questions": "1;24;10;34", "wc_limitations": "11;10;40;4", "wc_review": "279;170;330;205", "wc_reply_reviewers": "0;15;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 69.75, 26.78035660703569 ], "wc_strengths_avg": [ 50.25, 34.744603897583865 ], "wc_weaknesses_avg": [ 92.5, 45.13036671687922 ], "wc_questions_avg": [ 17.25, 12.676257334087218 ], "wc_limitations_avg": [ 16.25, 13.970952007647869 ], "wc_review_avg": [ 246.0, 62.45398305952952 ], "wc_reply_reviewers_avg": [ 3.75, 6.49519052838329 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10606079828572558998&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "unibas.ch;unibas.ch;unibas.ch", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Basel", "aff_unique_dep": "", "aff_unique_url": "https://www.unibas.ch", "aff_unique_abbr": "UniBas", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Cal-QL: Calibrated Offline RL Pre-Training for Efficient Online Fine-Tuning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72205", "id": "GcEIvidYSw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c44a04289beaf0a7d968a94066a1d696-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GcEIvidYSw", "openreview": "https://openreview.net/forum?id=GcEIvidYSw", "poster": "/media/PosterPDFs/NeurIPS%202023/72205.png?t=1702182344.9376574", "slides": "https://nips.cc/virtual/2023/poster/72205", "video": "https://nips.cc/virtual/2023/poster/72205", "author_site": "Mitsuhiko Nakamoto, Simon Zhai, Anikait Singh, Max Sobol Mark, Yi Ma, Chelsea Finn, Aviral Kumar, Sergey Levine", "tldr": "", "abstract": "A compelling use case of offline reinforcement learning (RL) is to obtain a policy initialization from existing datasets followed by fast online fine-tuning with limited interaction. However, existing offline RL methods tend to behave poorly during fine-tuning. In this paper, we devise an approach for learning an effective initialization from offline data that also enables fast online fine-tuning capabilities. Our approach, calibrated Q-learning (Cal-QL), accomplishes this by learning a conservative value function initialization that underestimates the value of the learned policy from offline data, while also being calibrated, in the sense that the learned Q-values are at a reasonable scale. We refer to this property as calibration, and define it formally as providing a lower bound on the true value function of the learned policy and an upper bound on the value of some other (suboptimal) reference policy, which may simply be the behavior policy. We show that offline RL algorithms that learn such calibrated value functions lead to effective online fine-tuning, enabling us to take the benefits of offline initializations in online fine-tuning. In practice, Cal-QL can be implemented on top of the conservative Q learning (CQL) for offline RL within a one-line code change. Empirically, Cal-QL outperforms state-of-the-art methods on 9/11 fine-tuning benchmark tasks that we study in this paper. Code and video are available at https://nakamotoo.github.io/Cal-QL", "keywords": "offline reinforcement learning;online fine-tuning", "primary_area": "", "supplementary_material": "/attachment/20be61b6088fc96b5714e315a84711587333c9f6.zip", "author": "Mitsuhiko Nakamoto;Yuexiang Zhai;Anikait Singh;Max Sobol Mark;Yi Ma;Chelsea Finn;Aviral Kumar;Sergey Levine", "authorids": "~Mitsuhiko_Nakamoto1;~Yuexiang_Zhai1;~Anikait_Singh1;~Max_Sobol_Mark1;~Yi_Ma4;~Chelsea_Finn1;~Aviral_Kumar2;~Sergey_Levine1", "gender": ";;M;M;M;F;M;M", "homepage": "https://nakamotoo.github.io/;;https://asap7772.github.io/;https://github.com/MaxSobolMark/;http://people.eecs.berkeley.edu/~yima/;https://ai.stanford.edu/~cbfinn/;https://aviralkumar2907.github.io/;https://people.eecs.berkeley.edu/~svlevine/", "dblp": ";241/6124.html;302/3876;;;131/1783;202/7961;80/7594", "google_scholar": "wIDVzroAAAAJ;78WTKm4AAAAJ;lPaISmIAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=XqLiBQMAAAAJ;vfPE6hgAAAAJ;;8R35rCwAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;asap7772/;max-sobol-mark/;;;;", "or_profile": "~Mitsuhiko_Nakamoto1;~Yuexiang_Zhai1;~Anikait_Singh1;~Max_Sobol_Mark1;~Yi_Ma4;~Chelsea_Finn1;~Aviral_Kumar2;~Sergey_Levine1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Computer Science Department, Stanford University;University of California, Berkeley;Google;University of California, Berkeley;Google", "aff_domain": "eecs.berkeley.edu;berkeley.edu;berkeley.edu;cs.stanford.edu;berkeley.edu;google.com;berkeley.edu;google.com", "position": "PhD student;PhD student;Undergrad student;MS student;Full Professor;Research Scientist;PhD student;Research Scientist", "bibtex": "@inproceedings{\nnakamoto2023calql,\ntitle={Cal-{QL}: Calibrated Offline {RL} Pre-Training for Efficient Online Fine-Tuning},\nauthor={Mitsuhiko Nakamoto and Yuexiang Zhai and Anikait Singh and Max Sobol Mark and Yi Ma and Chelsea Finn and Aviral Kumar and Sergey Levine},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GcEIvidYSw}\n}", "github": "", "project": "", "reviewers": "J85q;zhZZ;zucz;vsVq", "pdf_size": 5322150, "rating": "5;5;6;6", "confidence": "2;4;3;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "52;99;105;66", "wc_strengths": "31;52;108;31", "wc_weaknesses": "99;107;123;110", "wc_questions": "16;91;2;3", "wc_limitations": "2;94;1;16", "wc_review": "200;443;339;226", "wc_reply_reviewers": "0;0;0;57", "wc_reply_authors": "152;143;48;105", "reply_reviewers": "0;0;0;1", "reply_authors": "3;3;2;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 80.5, 22.1641602593015 ], "wc_strengths_avg": [ 55.5, 31.5 ], "wc_weaknesses_avg": [ 109.75, 8.642193008721803 ], "wc_questions_avg": [ 28.0, 36.78994427829431 ], "wc_limitations_avg": [ 28.25, 38.42118556213486 ], "wc_review_avg": [ 302.0, 96.73417183188162 ], "wc_reply_reviewers_avg": [ 14.25, 24.681724007856502 ], "wc_reply_authors_avg": [ 112.0, 40.94508517514648 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 129, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12832206243301436224&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "eecs.berkeley.edu;berkeley.edu;berkeley.edu;cs.stanford.edu;berkeley.edu;google.com;berkeley.edu;google.com", "author_num": 8, "aff_unique_index": "0;0;0;1;0;2;0;2", "aff_unique_norm": "University of California, Berkeley;Stanford University;Google", "aff_unique_dep": ";Computer Science Department;Google", "aff_unique_url": "https://www.berkeley.edu;https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Stanford;Google", "aff_campus_unique_index": "0;0;0;1;0;2;0;2", "aff_campus_unique": "Berkeley;Stanford;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "May the Force be with You: Unified Force-Centric Pre-Training for 3D Molecular Conformations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72204", "id": "Ge8Mhggq0z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e637029c42aa593850eeebf46616444d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ge8Mhggq0z", "openreview": "https://openreview.net/forum?id=Ge8Mhggq0z", "poster": "/media/PosterPDFs/NeurIPS%202023/72204.png?t=1702184957.3153799", "slides": "https://nips.cc/virtual/2023/poster/72204", "video": "https://nips.cc/virtual/2023/poster/72204", "author_site": "Rui Feng, Qi Zhu, Huan Tran, Binghong Chen, Aubrey Toland, Rampi Ramprasad, Chao Zhang", "tldr": "", "abstract": "Recent works have shown the promise of learning pre-trained models for 3D molecular representation.\nHowever, existing pre-training models focus predominantly on equilibrium data and largely overlook off-equilibrium conformations.\nIt is challenging to extend these methods to off-equilibrium data because their training objective relies on assumptions of\nconformations being the local energy minima. We address this gap by proposing a force-centric pretraining model for 3D molecular conformations covering both equilibrium and off-equilibrium data.\nFor off-equilibrium data, our model learns directly from their atomic forces. For equilibrium data, we introduce zero-force regularization and forced-based denoising techniques to approximate near-equilibrium forces.\nWe obtain a unified pre-trained model for 3D molecular representation with over 15 million diverse conformations. Experiments show that, with our pre-training objective, we increase forces accuracy by around 3 times compared to the un-pre-trained Equivariant Transformer model. By incorporating regularizations on equilibrium data, we solved the problem of unstable MD simulations in vanilla Equivariant Transformers, achieving state-of-the-art simulation performance with 2.45 times faster inference time than NequIP. As a powerful molecular encoder, our pre-trained model achieves on-par performance with state-of-the-art property prediction tasks.", "keywords": "molecular pretraining;molecular representation learning", "primary_area": "", "supplementary_material": "/attachment/8f68539cb71530be6ccebeaff7417c3b834336f7.pdf", "author": "Rui Feng;Qi Zhu;Huan Tran;Binghong Chen;Aubrey Toland;Rampi Ramprasad;Chao Zhang", "authorids": "~Rui_Feng1;~Qi_Zhu7;htran81@gatech.edu;~Binghong_Chen1;artoland@gatech.edu;rramprasad3@gatech.edu;~Chao_Zhang15", "gender": "M;M;;M;;;", "homepage": ";https://gentlezhu.github.io/;;http://binghongchen.net/;;;http://chaozhang.org/", "dblp": "28/4423;66/5923-8;;192/2022;;;94/3019-14", "google_scholar": ";xCHy4c8AAAAJ;;6Px5HxsAAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-0129-8542;;;;;0000-0003-3009-598X", "linkedin": ";qi-zhu-22633598/;;binghong-chen-91b697181/;;;", "or_profile": "~Rui_Feng1;~Qi_Zhu7;htran81@gatech.edu;~Binghong_Chen1;artoland@gatech.edu;rramprasad3@gatech.edu;~Chao_Zhang15", "aff": "Georgia Institute of Technology;University of Illinois, Urbana Champaign;;;;;Georgia Institute of Technology", "aff_domain": "gatech.edu;illinois.edu;;;;;gatech.edu", "position": "PhD student;PhD student;;;;;Assistant Professor", "bibtex": "@inproceedings{\nfeng2023may,\ntitle={May the Force be with You: Unified Force-Centric Pre-Training for 3D Molecular Conformations},\nauthor={Rui Feng and Qi Zhu and Huan Tran and Binghong Chen and Aubrey Toland and Rampi Ramprasad and Chao Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ge8Mhggq0z}\n}", "github": "", "project": "", "reviewers": "8Ywg;EQHq;1y4N", "pdf_size": 1165538, "rating": "4;5;7", "confidence": "4;5;4", "soundness": "3;2;3", "novelty": "2;3;3", "presentation": "3;2;3", "wc_summary": "40;45;40", "wc_strengths": "54;26;61", "wc_weaknesses": "146;711;32", "wc_questions": "39;1;55", "wc_limitations": "43;1;56", "wc_review": "322;784;244", "wc_reply_reviewers": "0;100;0", "wc_reply_authors": "68;823;0", "reply_reviewers": "0;1;0", "reply_authors": "2;2;1", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 41.666666666666664, 2.357022603955158 ], "wc_strengths_avg": [ 47.0, 15.121728296285006 ], "wc_weaknesses_avg": [ 296.3333333333333, 296.88418991624025 ], "wc_questions_avg": [ 31.666666666666668, 22.647050335284035 ], "wc_limitations_avg": [ 33.333333333333336, 23.471022323045258 ], "wc_review_avg": [ 450.0, 238.31072153807935 ], "wc_reply_reviewers_avg": [ 33.333333333333336, 47.14045207910317 ], "wc_reply_authors_avg": [ 297.0, 372.97274252506264 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.18898223650461357, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3504793858724922664&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "gatech.edu;illinois.edu;;;;;gatech.edu", "author_num": 7, "aff_unique_index": "0;1;0", "aff_unique_norm": "Georgia Institute of Technology;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://illinois.edu", "aff_unique_abbr": "Georgia Tech;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Human-Aligned Calibration for AI-Assisted Decision Making", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72203", "id": "GfITbjrIOd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2f1d1196426ba84f47d115cac3dcb9d8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GfITbjrIOd", "openreview": "https://openreview.net/forum?id=GfITbjrIOd", "poster": "/media/PosterPDFs/NeurIPS%202023/72203.png?t=1701809231.3099873", "slides": "https://nips.cc/virtual/2023/poster/72203", "video": "https://nips.cc/virtual/2023/poster/72203", "author_site": "Nina Corvelo Benz, Manuel Rodriguez", "tldr": "", "abstract": "Whenever a binary classifier is used to provide decision support, it typically provides both a label prediction and a confidence value. Then, the decision maker is supposed to use the confidence value to calibrate how much to trust the prediction. In this context, it has been often argued that the confidence value should correspond to a well calibrated estimate of the probability that the predicted label matches the ground truth label. However, multiple lines of empirical evidence suggest that decision makers have difficulties at developing a good sense on when to trust a prediction using these confidence values. In this paper, our goal is first to understand why and then investigate how to construct more useful confidence values. We first argue that, for a broad class of utility functions, there exists data distributions for which a rational decision maker is, in general, unlikely to discover the optimal decision policy using the above confidence values\u2014an optimal decision maker would need to sometimes place more (less) trust on predictions with lower (higher) confidence values. However, we then show that, if the confidence values satisfy a natural alignment property with respect to the decision maker\u2019s confidence on her own predictions, there always exists an optimal decision policy under which the level of trust the decision maker would need to place on predictions is monotone on the confidence values, facilitating its discoverability. Further, we show that multicalibration with respect to the decision maker\u2019s confidence on her own prediction is a sufficient condition for alignment. Experiments on a real AI-assisted decision making scenario where a classifier provides decision support to human decision makers validate our theoretical results and suggest that alignment may lead to better decisions.", "keywords": "Calibration;Trustworthy Machine Learning;Human-Centric ML;Probabilistic Models and Methods", "primary_area": "", "supplementary_material": "/attachment/141375df12edc0a9f601fe418a27880b8e75cc4a.zip", "author": "Nina L. Corvelo Benz;Manuel Gomez Rodriguez", "authorids": "~Nina_L._Corvelo_Benz1;~Manuel_Gomez_Rodriguez1", "gender": "M;F", "homepage": "https://www.mpi-sws.org/~manuelgr/;https://ninacobe.github.io", "dblp": "73/8260;244/8378", "google_scholar": "https://scholar.google.com.tw/citations?user=UcuXmuwAAAAJ;7hmCYJYAAAAJ", "orcid": ";", "linkedin": ";nina-corvelo-benz-261b40200/", "or_profile": "~Manuel_Gomez_Rodriguez1;~Nina_Laura_Corvelo_Benz1", "aff": "MPI-SWS;MPI-SWS", "aff_domain": "mpi-sws.org;mpi-sws.org", "position": "Associate Professor;PhD student", "bibtex": "@inproceedings{\nbenz2023humanaligned,\ntitle={Human-Aligned Calibration for {AI}-Assisted Decision Making},\nauthor={Nina L. Corvelo Benz and Manuel Gomez Rodriguez},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GfITbjrIOd}\n}", "github": "", "project": "", "reviewers": "iDbU;Q29R;TBZV;oDhU", "pdf_size": 1440867, "rating": "5;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;2;4", "presentation": "2;3;3;3", "wc_summary": "125;128;69;123", "wc_strengths": "83;254;50;90", "wc_weaknesses": "211;487;146;124", "wc_questions": "22;158;128;90", "wc_limitations": "19;32;1;23", "wc_review": "460;1059;394;450", "wc_reply_reviewers": "16;290;68;6", "wc_reply_authors": "0;28;19;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 111.25, 24.457871943405053 ], "wc_strengths_avg": [ 119.25, 79.25078863960913 ], "wc_weaknesses_avg": [ 242.0, 145.02241206103284 ], "wc_questions_avg": [ 99.5, 50.820763473210434 ], "wc_limitations_avg": [ 18.75, 11.277743568639961 ], "wc_review_avg": [ 590.75, 271.51185517395 ], "wc_reply_reviewers_avg": [ 95.0, 115.0173899895142 ], "wc_reply_authors_avg": [ 11.75, 12.173228823939851 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15631211213301491795&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "mpi-sws.org;mpi-sws.org", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Max Planck Institute for Software Systems", "aff_unique_dep": "", "aff_unique_url": "https://www.mpi-sws.org", "aff_unique_abbr": "MPI-SWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Consistent Diffusion Models: Mitigating Sampling Drift by Learning to be Consistent", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72202", "id": "GfZGdJHj27", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/831406cfe7e4a0aed5ac5c8a8389d1f5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GfZGdJHj27", "openreview": "https://openreview.net/forum?id=GfZGdJHj27", "poster": "/media/PosterPDFs/NeurIPS%202023/72202.png?t=1702322971.1919634", "slides": "https://nips.cc/virtual/2023/poster/72202", "video": "https://nips.cc/virtual/2023/poster/72202", "author_site": "Giannis Daras, Yuval Dagan, Alex Dimakis, Constantinos Daskalakis", "tldr": "", "abstract": "Imperfect score-matching leads to a shift between the training and the sampling distribution of diffusion models. Due to the recursive nature of the generation process, errors in previous steps yield sampling iterates that drift away from the training distribution. However, the standard training objective via Denoising Score Matching (DSM) is only designed to optimize over non-drifted data. To train on drifted data, we propose to enforce a \\emph{Consistency} property (CP) which states that predictions of the model on its own\ngenerated data are consistent across time. Theoretically, we show that the differential equation that describes CP together with the one that describes a conservative vector field, have a unique solution given some initial condition. Consequently, if the score is learned well on non-drifted points via DSM (enforcing the true initial condition) then enforcing CP on drifted points propagates true score values. Empirically, we show that enforcing CP improves the generation quality for conditional and unconditional generation on CIFAR-10, and in AFHQ and FFHQ. \nWe open-source our code and models: https://github.com/giannisdaras/cdm.", "keywords": "diffusion models;sampling drift;Fokker-Planck;invariances;Stochastic Differential Equations;Martingales", "primary_area": "", "supplementary_material": "/attachment/8d152c3d34d40d11e6d74fab4db2300a4ecdc13d.zip", "author": "Giannis Daras;Yuval Dagan;Alex Dimakis;Constantinos Costis Daskalakis", "authorids": "~Giannis_Daras1;~Yuval_Dagan1;~Alex_Dimakis1;~Constantinos_Costis_Daskalakis1", "gender": "M;M;M;M", "homepage": "https://giannisdaras.github.io/;https://yuvaldagan.wordpress.com/;https://people.eecs.berkeley.edu/~alexdimakis/;http://people.csail.mit.edu/costis/", "dblp": "254/2703;190/7292;19/5000.html;", "google_scholar": "LaScvbQAAAAJ;;JSFmVQEAAAAJ;iTv2cOgAAAAJ", "orcid": ";;;", "linkedin": ";;alex-dimakis-b1b20320/;", "or_profile": "~Giannis_Daras1;~Yuval_Dagan1;~Alex_Dimakis1;~Constantinos_Costis_Daskalakis1", "aff": "University of Texas, Austin;Massachusetts Institute of Technology;University of Texas at Austin;Massachusetts Institute of Technology", "aff_domain": "utexas.edu;mit.edu;utexas.edu;mit.edu", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\ndaras2023consistent,\ntitle={Consistent Diffusion Models: Mitigating Sampling Drift by Learning to be Consistent},\nauthor={Giannis Daras and Yuval Dagan and Alex Dimakis and Constantinos Costis Daskalakis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GfZGdJHj27}\n}", "github": "", "project": "", "reviewers": "w9Z2;Qt1z;G7yx;UfbR", "pdf_size": 1907199, "rating": "5;5;7;7", "confidence": "4;2;4;3", "soundness": "4;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;2;3", "wc_summary": "111;142;364;128", "wc_strengths": "80;33;154;70", "wc_weaknesses": "108;94;920;93", "wc_questions": "87;27;207;49", "wc_limitations": "9;41;48;15", "wc_review": "395;337;1693;355", "wc_reply_reviewers": "118;139;38;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 186.25, 103.20943513070885 ], "wc_strengths_avg": [ 84.25, 43.91113184603649 ], "wc_weaknesses_avg": [ 303.75, 355.8415202024632 ], "wc_questions_avg": [ 92.5, 69.50359702921857 ], "wc_limitations_avg": [ 28.25, 16.57369904396722 ], "wc_review_avg": [ 695.0, 576.5778351619147 ], "wc_reply_reviewers_avg": [ 78.75, 50.70194769434405 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6085707543599575433&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "utexas.edu;mit.edu;utexas.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Texas at Austin;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://web.mit.edu", "aff_unique_abbr": "UT Austin;MIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "CoDrug: Conformal Drug Property Prediction with Density Estimation under Covariate Shift", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72201", "id": "GgdFLb94Ld", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7691484a7a35d5e2742279c1d926b778-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GgdFLb94Ld", "openreview": "https://openreview.net/forum?id=GgdFLb94Ld", "poster": "/media/PosterPDFs/NeurIPS%202023/72201.png?t=1701814848.8495617", "slides": "https://nips.cc/virtual/2023/poster/72201", "video": "https://nips.cc/virtual/2023/poster/72201", "author_site": "Siddhartha Laghuvarapu, Zhen Lin, Jimeng Sun", "tldr": "", "abstract": "In drug discovery, it is vital to confirm the predictions of pharmaceutical properties from computational models using costly wet-lab experiments. Hence, obtaining reliable uncertainty estimates is crucial for prioritizing drug molecules for subsequent experimental validation. Conformal Prediction (CP) is a promising tool for creating such prediction sets for molecular properties with a coverage guarantee. However, the exchangeability assumption of CP is often challenged with covariate shift in drug discovery tasks: Most datasets contain limited labeled data, which may not be representative of the vast chemical space from which molecules are drawn. To address this limitation, we propose a method called CoDrug that employs an energy-based model leveraging both training data and unlabelled data, and Kernel Density Estimation (KDE) to assess the densities of a molecule set. The estimated densities are then used to weigh the molecule samples while building prediction sets and rectifying for distribution shift. In extensive experiments involving realistic distribution drifts in various small-molecule drug discovery tasks, we demonstrate the ability of CoDrug to provide valid prediction sets and its utility in addressing the distribution shift arising from de novo drug design models. On average, using CoDrug can reduce the coverage gap by over 35% when compared to conformal prediction sets not adjusted for covariate shift.", "keywords": "drug discovery;molecule property prediction;conformal prediction", "primary_area": "", "supplementary_material": "/attachment/6cff64a75d46bfdb9fd88210ef435130ebcaf2ad.zip", "author": "Siddhartha Laghuvarapu;Zhen Lin;Jimeng Sun", "authorids": "~Siddhartha_Laghuvarapu1;~Zhen_Lin2;~Jimeng_Sun3", "gender": "M;;", "homepage": ";;http://sunlab.org", "dblp": "258/1129;;", "google_scholar": "n8x8MVQAAAAJ;;9jmmp5sAAAAJ", "orcid": "0000-0002-1269-3778;;0000-0003-1512-6426", "linkedin": "https://in.linkedin.com/in/sid-laghu;;jimengsun/", "or_profile": "~Siddhartha_Laghuvarapu1;~Zhen_Lin2;~Jimeng_Sun3", "aff": "Department of Computer Science;;Georgia Institute of Technology", "aff_domain": "cs.illinois.edu;;gatech.edu", "position": "PhD student;;Associate Professor", "bibtex": "@inproceedings{\nlaghuvarapu2023codrug,\ntitle={CoDrug: Conformal Drug Property Prediction with Density Estimation under Covariate Shift},\nauthor={Siddhartha Laghuvarapu and Zhen Lin and Jimeng Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GgdFLb94Ld}\n}", "github": "", "project": "", "reviewers": "e8Bh;uhvu;nCYs;SDHG", "pdf_size": 1052480, "rating": "3;6;7;7", "confidence": "3;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "3;3;3;4", "wc_summary": "115;123;97;37", "wc_strengths": "52;82;87;78", "wc_weaknesses": "164;84;75;9", "wc_questions": "26;46;121;2", "wc_limitations": "12;1;32;6", "wc_review": "369;336;412;132", "wc_reply_reviewers": "71;23;5;19", "wc_reply_authors": "56;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 93.0, 33.67491648096547 ], "wc_strengths_avg": [ 74.75, 13.516193990913271 ], "wc_weaknesses_avg": [ 83.0, 55.00454526673228 ], "wc_questions_avg": [ 48.75, 44.52737921773524 ], "wc_limitations_avg": [ 12.75, 11.776565713313877 ], "wc_review_avg": [ 312.25, 107.49970930193253 ], "wc_reply_reviewers_avg": [ 29.5, 24.8746859276655 ], "wc_reply_authors_avg": [ 14.0, 24.24871130596428 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7624928516630233, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2473595769491416576&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cs.illinois.edu;;gatech.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Unknown Institution;Georgia Institute of Technology", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": ";https://www.gatech.edu", "aff_unique_abbr": ";Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";United States" }, { "title": "PreDiff: Precipitation Nowcasting with Latent Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72200", "id": "Gh67ZZ6zkS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f82ba6a6b981fbbecf5f2ee5de7db39c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Gh67ZZ6zkS", "openreview": "https://openreview.net/forum?id=Gh67ZZ6zkS", "poster": "/media/PosterPDFs/NeurIPS%202023/72200.png?t=1699807659.1305287", "slides": "https://nips.cc/virtual/2023/poster/72200", "video": "https://nips.cc/virtual/2023/poster/72200", "author_site": "Zhihan Gao, Xingjian Shi, Boran Han, Hao Wang, Xiaoyong Jin, Danielle Maddix, Yi Zhu, Mu Li, Yuyang (Bernie) Wang", "tldr": "", "abstract": "Earth system forecasting has traditionally relied on complex physical models that are computationally expensive and require significant domain expertise.\nIn the past decade, the unprecedented increase in spatiotemporal Earth observation data has enabled data-driven forecasting models using deep learning techniques.\nThese models have shown promise for diverse Earth system forecasting tasks but either struggle with handling uncertainty or neglect domain-specific prior knowledge, resulting in averaging possible futures to blurred forecasts or generating physically implausible predictions.\nTo address these limitations, we propose a two-stage pipeline for probabilistic spatiotemporal forecasting: 1) We develop *PreDiff*, a conditional latent diffusion model capable of probabilistic forecasts. 2) We incorporate an explicit knowledge alignment mechanism to align forecasts with domain-specific physical constraints. \nThis is achieved by estimating the deviation from imposed constraints at each denoising step and adjusting the transition distribution accordingly.\nWe conduct empirical studies on two datasets: N-body MNIST, a synthetic dataset with chaotic behavior, and SEVIR, a real-world precipitation nowcasting dataset. \nSpecifically, we impose the law of conservation of energy in N-body MNIST and anticipated precipitation intensity in SEVIR. \nExperiments demonstrate the effectiveness of PreDiff in handling uncertainty, incorporating domain-specific prior knowledge, and generating forecasts that exhibit high operational utility.", "keywords": "Machine Learning for Earth Science;Spatiotemporal Forecasting;Generative Models;Diffusion Models", "primary_area": "", "supplementary_material": "/attachment/89b143e3dbf24f11d2f3f830bc5aa7d3891558fd.zip", "author": "Zhihan Gao;Xingjian Shi;Boran Han;Hao Wang;Xiaoyong Jin;Danielle C. Maddix;Yi Zhu;Mu Li;Bernie Wang", "authorids": "~Zhihan_Gao1;~Xingjian_Shi1;~Boran_Han1;~Hao_Wang3;~Xiaoyong_Jin1;~Danielle_C._Maddix1;~Yi_Zhu1;~Mu_Li4;~Bernie_Wang1", "gender": "M;M;;;M;;M;;M", "homepage": "https://gaozhihan.github.io/;https://sxjscience.github.io/;;;;https://dcmaddix.github.io/;https://bryanyzhu.github.io/;https://github.com/mli;http://web.mit.edu/~ywang02/www/", "dblp": "117/3136-1.html;145/9987;;;239/8414;216/8804;;;43/8355-1", "google_scholar": "P6ACUAUAAAAJ;https://scholar.google.com.hk/citations?user=P4G6H7oAAAAJ;;;;IPDByA8AAAAJ;IXw4UiwAAAAJ;;IKUm624AAAAJ", "orcid": ";;;;;;0000-0002-6482-6712;;0000-0002-0291-7184", "linkedin": "zhihan-gao-4224a8293/;;;;xiaoyong-jin-185821104/;danielle-maddix-robinson/;yi-zhu-546a437a/;;", "or_profile": "~Zhihan_Gao1;~Xingjian_Shi1;~Boran_Han1;~Hao_Wang3;~Xiaoyong_Jin1;~Danielle_C._Maddix1;~Yi_Zhu1;~Mu_Li4;~Bernie_Wang1", "aff": "Amazon;Amazon Web Services;;;Amazon;AWS AI Labs;Amazon;Amazon;Amazon", "aff_domain": "amazon.com;amazon.com;;;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com", "position": "Applied scientist intern;Applied Scientist;;;Researcher;Applied Scientist;Applied Scientist;Researcher;Principal Researcher", "bibtex": "@inproceedings{\ngao2023prediff,\ntitle={PreDiff: Precipitation Nowcasting with Latent Diffusion Models},\nauthor={Zhihan Gao and Xingjian Shi and Boran Han and Hao Wang and Xiaoyong Jin and Danielle C. Maddix and Yi Zhu and Mu Li and Bernie Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Gh67ZZ6zkS}\n}", "github": "", "project": "", "reviewers": "Ntzp;zn7Z;sDg9;yf9m", "pdf_size": 10101658, "rating": "4;5;6;8", "confidence": "4;4;4;4", "soundness": "2;2;3;4", "novelty": "2;2;3;4", "presentation": "2;2;3;4", "wc_summary": "51;53;63;195", "wc_strengths": "35;32;42;61", "wc_weaknesses": "151;144;182;1", "wc_questions": "4;38;224;58", "wc_limitations": "4;4;22;8", "wc_review": "245;271;533;323", "wc_reply_reviewers": "0;64;203;23", "wc_reply_authors": "0;0;766;0", "reply_reviewers": "0;1;3;1", "reply_authors": "1;1;4;1", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 90.5, 60.50413209029611 ], "wc_strengths_avg": [ 42.5, 11.280514172678478 ], "wc_weaknesses_avg": [ 119.5, 69.89456345095805 ], "wc_questions_avg": [ 81.0, 84.78797084492588 ], "wc_limitations_avg": [ 9.5, 7.399324293474371 ], "wc_review_avg": [ 343.0, 113.23427043081966 ], "wc_reply_reviewers_avg": [ 72.5, 78.75436495839453 ], "wc_reply_authors_avg": [ 191.5, 331.68772964944003 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13324510776030632122&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "amazon.com;amazon.com;;;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com", "author_num": 9, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Combining Behaviors with the Successor Features Keyboard", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72199", "id": "GhNCFtLSsy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1f69928210578f4cf5b538a8c8806798-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GhNCFtLSsy", "openreview": "https://openreview.net/forum?id=GhNCFtLSsy", "poster": "/media/PosterPDFs/NeurIPS%202023/72199.png?t=1702325262.7366824", "slides": "https://nips.cc/virtual/2023/poster/72199", "video": "https://nips.cc/virtual/2023/poster/72199", "author_site": "Wilka Carvalho Carvalho, Andre Saraiva, Angelos Filos, Andrew Lampinen, Loic Matthey, Richard L Lewis, Honglak Lee, Satinder Singh, Danilo Jimenez Rezende, Daniel Zoran", "tldr": "", "abstract": "The Option Keyboard (OK) was recently proposed as a method for transferring behavioral knowledge across tasks. OK transfers knowledge by adaptively combining subsets of known behaviors using Successor Features (SFs) and Generalized Policy Improvement (GPI).\nHowever, it relies on hand-designed state-features and task encodings which are cumbersome to design for every new environment.\nIn this work, we propose the \"Successor Features Keyboard\" (SFK), which enables transfer with discovered state-features and task encodings.\nTo enable discovery, we propose the \"Categorical Successor Feature Approximator\" (CSFA), a novel learning algorithm for estimating SFs while jointly discovering state-features and task encodings.\nWith SFK and CSFA, we achieve the first demonstration of transfer with SFs in a challenging 3D environment where all the necessary representations are discovered.\nWe first compare CSFA against other methods for approximating SFs and show that only CSFA discovers representations compatible with SF&GPI at this scale.\nWe then compare SFK against transfer learning baselines and show that it transfers most quickly to long-horizon tasks.", "keywords": "deep reinforcement learning;successor features;transfer;generalization;feature-discovery", "primary_area": "", "supplementary_material": "/attachment/ad36edaaf3a55ce3bd8a3ac629761989ddda7c20.zip", "author": "Wilka Carvalho;Andre Saraiva;Angelos Filos;Andrew Kyle Lampinen;Loic Matthey;Richard Lewis;Honglak Lee;Satinder Singh;Danilo Jimenez Rezende;Daniel Zoran", "authorids": "~Wilka_Carvalho2;~Andre_Saraiva1;~Angelos_Filos1;~Andrew_Kyle_Lampinen1;~Loic_Matthey1;~Richard_Lewis1;~Honglak_Lee2;~Satinder_Singh2;~Danilo_Jimenez_Rezende2;~Daniel_Zoran1", "gender": "M;M;M;M;M;M;M;M;;M", "homepage": ";;https://github.com/google/BIG-bench;https://matthey.me/;;;;http://web.eecs.umich.edu/~honglak;;https://wcarvalho.github.io/", "dblp": ";https://dblp.uni-trier.de/pers/hd/f/Filos:Angelos;https://dblp.uni-trier.de/pers/hd/l/Lampinen:Andrew_K=;34/3990;12/590;18/9054;31/11107;58/2562;;230/3919", "google_scholar": "-_yUSOoAAAAJ;SGjYdrEAAAAJ;_N44XxAAAAAJ;https://scholar.google.co.uk/citations?user=f520HmwAAAAJ;;1JQDH_AAAAAJ;https://scholar.google.co.uk/citations?user=UGlyhFMAAAAJ;fmSHtE8AAAAJ;;tvJTXwoAAAAJ", "orcid": ";;;;;;;;;", "linkedin": "andresnds/;;;;;;;;;wilkacarvalho", "or_profile": "~Andre_Saraiva1;~Angelos_Filos1;~Andrew_Kyle_Lampinen1;~Loic_Matthey1;~Richard_Lewis1;~Daniel_Zoran1;~Danilo_Jimenez_Rezende1;~Honglak_Lee1;~Satinder_Baveja2;~Wilka_Torrico_Carvalho1", "aff": "Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;University of Michigan - Ann Arbor;Google DeepMind;Google DeepMind;University of Michigan;Google DeepMind;Google", "aff_domain": "deepmind.com;deepmind.com;google.com;google.com;umich.edu;google.com;google.com;umich.edu;google.com;google.com", "position": "Research Engineer;Researcher;Research Scientist;Staff Research Scientist;Full Professor;Research Scientist;Director;Associate Professor;Research Scientist;Research Scientist Intern", "bibtex": "@inproceedings{\ncarvalho2023combining,\ntitle={Combining Behaviors with the Successor Features Keyboard},\nauthor={Wilka Carvalho and Andre Saraiva and Angelos Filos and Andrew Kyle Lampinen and Loic Matthey and Richard Lewis and Honglak Lee and Satinder Singh and Danilo Jimenez Rezende and Daniel Zoran},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GhNCFtLSsy}\n}", "github": "", "project": "", "reviewers": "z7ra;cEsx;xth2;RZN8", "pdf_size": 5765876, "rating": "4;5;5;8", "confidence": "3;3;3;4", "soundness": "3;2;3;4", "novelty": "3;2;2;4", "presentation": "2;2;3;3", "wc_summary": "242;48;123;193", "wc_strengths": "273;20;68;105", "wc_weaknesses": "251;216;112;133", "wc_questions": "151;4;40;135", "wc_limitations": "42;101;6;1", "wc_review": "959;389;349;567", "wc_reply_reviewers": "0;44;0;29", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 151.5, 73.20689858203255 ], "wc_strengths_avg": [ 116.5, 95.24835956592638 ], "wc_weaknesses_avg": [ 178.0, 57.34544445725397 ], "wc_questions_avg": [ 82.5, 62.082606259724635 ], "wc_limitations_avg": [ 37.5, 39.928060308509856 ], "wc_review_avg": [ 566.0, 241.2819926973416 ], "wc_reply_reviewers_avg": [ 18.25, 19.00493356999703 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.9622504486493763, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10566040288438482483&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "deepmind.com;deepmind.com;google.com;google.com;umich.edu;google.com;google.com;umich.edu;google.com;google.com", "author_num": 10, "aff_unique_index": "0;0;0;0;1;0;0;1;0;0", "aff_unique_norm": "Google;University of Michigan", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.umich.edu", "aff_unique_abbr": "DeepMind;UM", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Ann Arbor;Mountain View", "aff_country_unique_index": "0;0;0;0;1;0;0;1;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Bi-Level Offline Policy Optimization with Limited Exploration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72198", "id": "GiUe0ZFiVe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ac6de776b8de8c9aed1d356997eb54b8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GiUe0ZFiVe", "openreview": "https://openreview.net/forum?id=GiUe0ZFiVe", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72198", "video": "https://nips.cc/virtual/2023/poster/72198", "tldr": "", "abstract": "We study offline reinforcement learning (RL) which seeks to learn a good policy based on a fixed, pre-collected dataset. A fundamental challenge behind this task is the distributional shift due to the dataset lacking sufficient exploration, especially under function approximation. To tackle this issue, we propose a bi-level structured policy optimization algorithm that models a hierarchical interaction between the policy (upper-level) and the value function (lower-level). The lower level focuses on constructing a confidence set of value estimates that maintain sufficiently small weighted average Bellman errors, while controlling uncertainty arising from distribution mismatch. Subsequently, at the upper level, the policy aims to maximize a conservative value estimate from the confidence set formed at the lower level. This novel formulation preserves the maximum flexibility of the implicitly induced exploratory data distribution, enabling the power of model extrapolation. In practice, it can be solved through a computationally efficient, penalized adversarial estimation procedure. Our theoretical regret guarantees do not rely on any data-coverage and completeness-type assumptions, only requiring realizability. These guarantees also demonstrate that the learned policy represents the ``best effort'' among all policies, as no other policies can outperform it. We evaluate our model using a blend of synthetic, benchmark, and real-world datasets for offline RL, showing that it performs competitively with state-of-the-art methods.", "keywords": "Offline Reinforcement Learning;Sample Efficiency;Regret Bound;Data Coverage", "primary_area": "", "supplementary_material": "/attachment/5e1b9935fcd08067ba0c380654d32f15d8c0d092.pdf", "author": "Wenzhuo Zhou", "authorids": "~Wenzhuo_Zhou1", "gender": "M", "homepage": "https://sites.google.com/view/wenzhuozhou", "dblp": "281/9005", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Wenzhuo_Zhou1", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nzhou2023bilevel,\ntitle={Bi-Level Offline Policy Optimization with Limited Exploration},\nauthor={Wenzhuo Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GiUe0ZFiVe}\n}", "github": "", "project": "", "reviewers": "75WN;UsZq;q6tp;C4A2;7QtD", "pdf_size": 488143, "rating": "4;6;7;7;7", "confidence": "4;2;3;3;4", "soundness": "2;3;4;2;3", "novelty": "2;3;3;4;3", "presentation": "3;3;3;3;3", "wc_summary": "164;89;56;138;79", "wc_strengths": "98;72;62;46;89", "wc_weaknesses": "306;40;84;400;66", "wc_questions": "2;42;110;81;165", "wc_limitations": "2;1;4;7;2", "wc_review": "572;244;316;672;401", "wc_reply_reviewers": "114;12;20;229;0", "wc_reply_authors": "1201;142;25;785;112", "reply_reviewers": "1;1;1;2;0", "reply_authors": "3;3;2;3;2", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.2, 39.756257369123674 ], "wc_strengths_avg": [ 73.4, 18.607525359380812 ], "wc_weaknesses_avg": [ 179.2, 145.66042702120572 ], "wc_questions_avg": [ 80.0, 55.95355216606002 ], "wc_limitations_avg": [ 3.2, 2.1354156504062622 ], "wc_review_avg": [ 441.0, 159.0949402086691 ], "wc_reply_reviewers_avg": [ 75.0, 87.01264275954385 ], "wc_reply_authors_avg": [ 453.0, 461.7172294814219 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.6, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.2750095491084634, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5719890715650379083&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "", "author_num": 1 }, { "title": "Multi-Player Zero-Sum Markov Games with Networked Separable Interactions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72197", "id": "GiiOpKinGm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/75c411b0a06fa9e78f2a516b57b2ce62-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GiiOpKinGm", "openreview": "https://openreview.net/forum?id=GiiOpKinGm", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72197", "video": "https://nips.cc/virtual/2023/poster/72197", "author_site": "Chanwoo Park, Kaiqing Zhang, Asuman Ozdaglar", "tldr": "", "abstract": "We study a new class of Markov games, \\textit{(multi-player) zero-sum Markov Games} with {\\it Networked separable interactions} (zero-sum NMGs), to model the local interaction structure in non-cooperative multi-agent sequential decision-making. We define a zero-sum NMG as a model where {the payoffs of the auxiliary games associated with each state are zero-sum and} have some separable (i.e., polymatrix) structure across the neighbors over some interaction network. \nWe first identify the necessary and sufficient conditions under which an MG can be presented as a zero-sum NMG, and show that the set of Markov coarse correlated equilibrium (CCE) collapses to the set of Markov Nash equilibrium (NE) in these games, in that the {product of} per-state marginalization of the former for all players yields the latter. Furthermore, we show that finding approximate Markov \\emph{stationary} CCE in infinite-horizon discounted zero-sum NMGs is \\texttt{PPAD}-hard, unless the underlying network has a ``star topology''. Then, we propose fictitious-play-type dynamics, the classical learning dynamics in normal-form games, for zero-sum NMGs, and establish convergence guarantees to Markov stationary NE under a star-shaped network structure. Finally, in light of the hardness result, we focus on computing a Markov \\emph{non-stationary} NE and provide finite-iteration guarantees for a series of value-iteration-based algorithms. We also provide numerical experiments to corroborate our theoretical results.", "keywords": "Markov Games;Local Interaction;PPAD-Hardness;Fictitious Play", "primary_area": "", "supplementary_material": "/attachment/9f28ebd47414b6579027f835e6a85b608f40d277.pdf", "author": "Chanwoo Park;Kaiqing Zhang;Asuman E. Ozdaglar", "authorids": "~Chanwoo_Park2;~Kaiqing_Zhang3;~Asuman_E._Ozdaglar1", "gender": "M;F;M", "homepage": "https://chanwoo-park-official.github.io/;https://asu.mit.edu/;https://kzhang66.github.io/", "dblp": ";35/2875;", "google_scholar": "https://scholar.google.com/citations?hl=ko;https://scholar.google.com.tw/citations?user=nWnBSOsAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": "chanwoo-park-ab5096237/;;", "or_profile": "~Chanwoo_Park2;~Asuman_E._Ozdaglar1;~kaiqing_zhang1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;University of Maryland, College Park", "aff_domain": "mit.edu;mit.edu;umd.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\npark2023multiplayer,\ntitle={Multi-Player Zero-Sum Markov Games with Networked Separable Interactions},\nauthor={Chanwoo Park and Kaiqing Zhang and Asuman E. Ozdaglar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GiiOpKinGm}\n}", "github": "", "project": "", "reviewers": "AdnB;xjXv;SApy;J94i;GndM", "pdf_size": 460278, "rating": "4;6;6;7;7", "confidence": "4;3;4;4;4", "soundness": "3;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;2;3;4;3", "wc_summary": "115;114;94;279;82", "wc_strengths": "35;90;144;52;41", "wc_weaknesses": "573;227;328;83;41", "wc_questions": "138;40;146;36;36", "wc_limitations": "20;1;7;2;2", "wc_review": "881;472;719;452;202", "wc_reply_reviewers": "812;31;8;31;26", "wc_reply_authors": "1925;54;0;0;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "5;2;1;1;1", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 136.8, 72.1814380571626 ], "wc_strengths_avg": [ 72.4, 40.588668369386056 ], "wc_weaknesses_avg": [ 250.4, 191.0555940034209 ], "wc_questions_avg": [ 79.2, 51.35912771844942 ], "wc_limitations_avg": [ 6.4, 7.116178749862878 ], "wc_review_avg": [ 545.2, 234.43498032503595 ], "wc_reply_reviewers_avg": [ 181.6, 315.3135582241905 ], "wc_reply_authors_avg": [ 395.8, 764.8859784307725 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 1.5491933384829668 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9833539186676097978&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "mit.edu;mit.edu;umd.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;University of Maryland", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www/umd.edu", "aff_unique_abbr": "MIT;UMD", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Neural Latent Geometry Search: Product Manifold Inference via Gromov-Hausdorff-Informed Bayesian Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72196", "id": "Gij638d76O", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/78efbc5386c5a7c241e7fcc482d3c3dc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Gij638d76O", "openreview": "https://openreview.net/forum?id=Gij638d76O", "poster": "/media/PosterPDFs/NeurIPS%202023/72196.png?t=1701447438.7749429", "slides": "https://nips.cc/virtual/2023/poster/72196", "video": "https://nips.cc/virtual/2023/poster/72196", "author_site": "Haitz S\u00e1ez de Oc\u00e1riz Borde, Alvaro Arroyo, Ismael Morales, Ingmar Posner, Xiaowen Dong", "tldr": "", "abstract": "Recent research indicates that the performance of machine learning models can be improved by aligning the geometry of the latent space with the underlying data structure. Rather than relying solely on Euclidean space, researchers have proposed using hyperbolic and spherical spaces with constant curvature, or combinations thereof, to better model the latent space and enhance model performance. However, little attention has been given to the problem of automatically identifying the optimal latent geometry for the downstream task. We mathematically define this novel formulation and coin it as neural latent geometry search (NLGS). More specifically, we introduce an initial attempt to search for a latent geometry composed of a product of constant curvature model spaces with a small number of query evaluations, under some simplifying assumptions. To accomplish this, we propose a novel notion of distance between candidate latent geometries based on the Gromov-Hausdorff distance from metric geometry. In order to compute the Gromov-Hausdorff distance, we introduce a mapping function that enables the comparison of different manifolds by embedding them in a common high-dimensional ambient space. We then design a graph search space based on the notion of smoothness between latent geometries and employ the calculated distances as an additional inductive bias. Finally, we use Bayesian optimization to search for the optimal latent geometry in a query-efficient manner. This is a general method which can be applied to search for the optimal latent geometry for a variety of models and downstream tasks. We perform experiments on synthetic and real-world datasets to identify the optimal latent geometry for multiple machine learning problems.", "keywords": "Representation Learning;Product Manifolds;Bayesian Optimization;Gromov-Hausdorff Distance", "primary_area": "", "supplementary_material": "/attachment/9a37fac2de30289474dd7ccc829667340927fb5d.zip", "author": "Haitz S\u00e1ez de Oc\u00e1riz Borde;Alvaro Arroyo;Ismael Morales L\u00f3pez;Ingmar Posner;Xiaowen Dong", "authorids": "~Haitz_S\u00e1ez_de_Oc\u00e1riz_Borde1;~Alvaro_Arroyo1;ismael.morales@hertford.ox.ac.uk;~Ingmar_Posner1;~Xiaowen_Dong1", "gender": "M;M;;;", "homepage": "https://www.linkedin.com/in/haitz-s%C3%A1ez-de-oc%C3%A1riz-borde-0933a9199/;https://scholar.google.co.uk/citations?user=P1qHzNYAAAAJ&hl=en;;;https://web.media.mit.edu/~xdong/", "dblp": ";;;59/542;91/9827-1", "google_scholar": "aP0OakUAAAAJ;;;dPk-iwsAAAAJ;_8tUq8kAAAAJ", "orcid": ";;;0000-0001-6270-700X;", "linkedin": ";;;ingmar-posner-20b49a;", "or_profile": "~Haitz_S\u00e1ez_de_Oc\u00e1riz_Borde1;~Alvaro_Arroyo1;ismael.morales@hertford.ox.ac.uk;~Ingmar_Posner1;~Xiaowen_Dong1", "aff": "University of Oxford;University of Oxford;;University of Oxford;Massachusetts Institute of Technology", "aff_domain": "ox.ac.uk;ox.ac.uk;;ox.ac.uk;mit.edu", "position": "PhD student;PhD student;;Full Professor;Research Affiliate", "bibtex": "@inproceedings{\nborde2023neural,\ntitle={Neural Latent Geometry Search: Product Manifold Inference via Gromov-Hausdorff-Informed Bayesian Optimization},\nauthor={Haitz S{\\'a}ez de Oc{\\'a}riz Borde and Alvaro Arroyo and Ismael Morales L{\\'o}pez and Ingmar Posner and Xiaowen Dong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Gij638d76O}\n}", "github": "", "project": "", "reviewers": "aQg4;6Vwv;v4xj;cFVp;1UmR", "pdf_size": 1065821, "rating": "6;6;6;6;7", "confidence": "2;4;4;3;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "102;146;162;309;201", "wc_strengths": "78;113;104;105;83", "wc_weaknesses": "148;227;455;442;80", "wc_questions": "97;321;174;86;5", "wc_limitations": "17;8;138;63;8", "wc_review": "442;815;1033;1005;377", "wc_reply_reviewers": "38;109;237;87;0", "wc_reply_authors": "0;295;124;22;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;2;2;2;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 184.0, 70.09422230112835 ], "wc_strengths_avg": [ 96.6, 13.602940858505562 ], "wc_weaknesses_avg": [ 270.4, 152.73585040847482 ], "wc_questions_avg": [ 136.6, 106.62945184141199 ], "wc_limitations_avg": [ 46.8, 49.95758200713882 ], "wc_review_avg": [ 734.4, 276.44717397723565 ], "wc_reply_reviewers_avg": [ 94.2, 80.86383616920484 ], "wc_reply_authors_avg": [ 88.2, 113.12718506177019 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.13363062095621223, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16985812575284528079&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ox.ac.uk;ox.ac.uk;;ox.ac.uk;mit.edu", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Oxford;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://web.mit.edu", "aff_unique_abbr": "Oxford;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Multitask Learning with No Regret: from Improved Confidence Bounds to Active Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72195", "id": "GjJRbEZ1dc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/15d15045f93b44d933a260b249608d43-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GjJRbEZ1dc", "openreview": "https://openreview.net/forum?id=GjJRbEZ1dc", "poster": "/media/PosterPDFs/NeurIPS%202023/72195.png?t=1701775069.9857953", "slides": "https://nips.cc/virtual/2023/poster/72195", "video": "https://nips.cc/virtual/2023/poster/72195", "author_site": "Pier Giuseppe Sessa, Pierre Laforgue, Nicol\u00f2 Cesa-Bianchi, Andreas Krause", "tldr": "", "abstract": "Multitask learning is a powerful framework that enables one to simultaneously learn multiple related tasks by sharing information between them. Quantifying uncertainty in the estimated tasks is of pivotal importance for many downstream applications, such as online or active learning. In this work, we provide novel confidence intervals for multitask regression in the challenging agnostic setting, i.e., when neither the similarity between tasks nor the tasks' features are available to the learner. The obtained intervals do not require i.i.d. data and can be directly applied to bound the regret in online learning. Through a refined analysis of the multitask information gain, we obtain new regret guarantees that, depending on a task similarity parameter, can significantly improve over treating tasks independently. We further propose a novel online learning algorithm that achieves such improved regret without knowing this parameter in advance, i.e., automatically adapting to task similarity. As a second key application of our results, we introduce a novel multitask active learning setup where several tasks must be simultaneously optimized, but only one of them can be queried for feedback by the learner at each round. For this problem, we design a no-regret algorithm that uses our confidence intervals to decide which task should be queried. Finally, we empirically validate our bounds and algorithms on synthetic and real-world (drug discovery) data.", "keywords": "multitask learning;confidence intervals;online learning theory;active learning;regret", "primary_area": "", "supplementary_material": "/attachment/e22e4e2f96e0423359972b932dc33e58e21b7f63.pdf", "author": "Pier Giuseppe Sessa;Pierre Laforgue;Nicol\u00f2 Cesa-Bianchi;Andreas Krause", "authorids": "~Pier_Giuseppe_Sessa1;~Pierre_Laforgue1;~Nicol\u00f2_Cesa-Bianchi1;~Andreas_Krause1", "gender": ";;M;M", "homepage": ";https://plaforgue.github.io/;http://cesa-bianchi.di.unimi.it/;https://las.inf.ethz.ch/krausea", "dblp": ";182/2785;c/NicoloCesaBianchi;87/1831-1.html", "google_scholar": ";https://scholar.google.fr/citations?hl=fr;https://scholar.google.it/citations?user=BWADJUkAAAAJ;https://scholar.google.ch/citations?user=eDHv58AAAAAJ", "orcid": ";;0000-0001-8477-4748;0000-0001-7260-9673", "linkedin": ";;;krausea/", "or_profile": "~Pier_Giuseppe_Sessa1;~Pierre_Laforgue1;~Nicol\u00f2_Cesa-Bianchi1;~Andreas_Krause1", "aff": ";University of Milan;University of Milan;ETH Zurich", "aff_domain": ";unimi.it;unimi.it;ethz.ch", "position": ";Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsessa2023multitask,\ntitle={Multitask Learning with No Regret: from Improved Confidence Bounds to Active Learning},\nauthor={Pier Giuseppe Sessa and Pierre Laforgue and Nicol{\\`o} Cesa-Bianchi and Andreas Krause},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GjJRbEZ1dc}\n}", "github": "", "project": "", "reviewers": "ckqH;EaHC;kioq;J9Sp;w9gE", "pdf_size": 1436451, "rating": "3;5;6;6;7", "confidence": "3;5;3;2;5", "soundness": "2;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "1;2;3;3;3", "wc_summary": "76;75;287;106;304", "wc_strengths": "77;42;93;63;39", "wc_weaknesses": "229;48;51;183;20", "wc_questions": "123;26;36;34;200", "wc_limitations": "26;71;10;14;1", "wc_review": "531;262;477;400;564", "wc_reply_reviewers": "758;0;21;0;114", "wc_reply_authors": "803;0;0;0;134", "reply_reviewers": "2;0;1;0;1", "reply_authors": "3;1;1;1;2", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 3.6, 1.2 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 169.6, 103.53859183898533 ], "wc_strengths_avg": [ 62.8, 20.556264252047356 ], "wc_weaknesses_avg": [ 106.2, 83.47790126734141 ], "wc_questions_avg": [ 83.8, 68.03646081330216 ], "wc_limitations_avg": [ 24.4, 24.646297896438725 ], "wc_review_avg": [ 446.8, 107.77643527228018 ], "wc_reply_reviewers_avg": [ 178.6, 292.74944918820944 ], "wc_reply_authors_avg": [ 187.4, 312.1445818847414 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2211629342323457, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17107480641334942822&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": ";unimi.it;unimi.it;ethz.ch", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Milan;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.unimi.it;https://www.ethz.ch", "aff_unique_abbr": "UniMi;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Italy;Switzerland" }, { "title": "DICES Dataset: Diversity in Conversational AI Evaluation for Safety", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73642", "id": "GjNvvswoUL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a74b697bce4cac6c91896372abaa8863-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=GjNvvswoUL", "openreview": "https://openreview.net/forum?id=GjNvvswoUL", "poster": "/media/PosterPDFs/NeurIPS%202023/73642.png?t=1697466253.8162177", "slides": "https://nips.cc/virtual/2023/poster/73642", "video": "https://nips.cc/virtual/2023/poster/73642", "author_site": "Lora Aroyo, Alex Taylor, Mark D\u00edaz, Christopher Homan, Alicia Parrish, Gregory Serapio-Garc\u00eda, Vinodkumar Prabhakaran, Ding Wang", "tldr": "", "abstract": "Machine learning approaches often require training and evaluation datasets with a clear separation between positive and negative examples. This requirement overly simplifies the natural subjectivity present in many tasks, and obscures the inherent diversity in human perceptions and opinions about many content items. Preserving the variance in content and diversity in human perceptions in datasets is often quite expensive and laborious. This is especially troubling when building safety datasets for conversational AI systems, as safety is socio-culturally situated in this context. To demonstrate this crucial aspect of conversational AI safety, and to facilitate in-depth model performance analyses, we introduce the DICES (Diversity In Conversational AI Evaluation for Safety) dataset that contains fine-grained demographics information about raters, high replication of ratings per item to ensure statistical power for analyses, and encodes rater votes as distributions across different demographics to allow for in-depth explorations of different aggregation strategies. The DICES dataset enables the observation and measurement of variance, ambiguity, and diversity in the context of safety for conversational AI. We further describe a set of metrics that show how rater diversity influences safety perception across different geographic regions, ethnicity groups, age groups, and genders. The goal of the DICES dataset is to be used as a shared resource and benchmark that respects diverse perspectives during safety evaluation of conversational AI systems.", "keywords": "conversational AI;human evaluation;human annotation;safety task;disagreement;variance in human annotations;diversity of rater pool", "primary_area": "", "supplementary_material": "/attachment/3c6c566e44d633180eff242a5000a2fe03ef0d91.pdf", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\naroyo2023dices,\ntitle={{DICES} Dataset: Diversity in Conversational {AI} Evaluation for Safety},\nauthor={Lora Aroyo and Alex Taylor and Mark Diaz and Christopher M Homan and Alicia Parrish and Greg Serapio-Garcia and Vinodkumar Prabhakaran and Ding Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=GjNvvswoUL}\n}", "github": "", "project": "", "reviewers": "45Pu;aL9d;jiuK;HcmN", "pdf_size": 386864, "rating": "6;7;7;9", "confidence": "4;4;3;4", "wc_summary_and_contributions": "95;87;149;125", "wc_strengths": "89;111;108;147", "wc_improvement": "108;57;142;140", "wc_limitations": "75;36;32;183", "wc_correctness": "3;26;1;1", "wc_clarity": "5;6;1;1", "wc_relation_to_prior_work": "11;15;1;1", "wc_documentation": "10;11;1;57", "wc_additional_feedback": "1;1;1;1", "wc_review": "397;350;436;656", "wc_reply_reviewers": "120;0;21;0", "wc_reply_authors": "1347;287;786;788", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 7.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 114.0, 24.677925358506133 ], "wc_strengths_avg": [ 113.75, 20.96872671384698 ], "wc_improvement_avg": [ 111.75, 34.368408458932166 ], "wc_limitations_avg": [ 81.5, 60.96105314050931 ], "wc_correctness_avg": [ 7.75, 10.568230693924125 ], "wc_clarity_avg": [ 3.25, 2.277608394786075 ], "wc_relation_to_prior_work_avg": [ 7.0, 6.164414002968976 ], "wc_documentation_avg": [ 19.75, 21.856063231972954 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 459.75, 117.32513584053504 ], "wc_reply_reviewers_avg": [ 35.25, 49.67582409985767 ], "wc_reply_authors_avg": [ 802.0, 375.06732728938147 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7009626226155297055&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "", "author_num": 1 }, { "title": "Exploiting Contextual Objects and Relations for 3D Visual Grounding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72194", "id": "GlWzQhf2lV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9b91ee0da3bcd61905fcd89e770168fc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GlWzQhf2lV", "openreview": "https://openreview.net/forum?id=GlWzQhf2lV", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72194", "video": "https://nips.cc/virtual/2023/poster/72194", "author_site": "Li Yang, chunfeng yuan, Ziqi Zhang, Zhongang Qi, Yan Xu, Wei Liu, Ying Shan, Bing Li, Weiping Yang, Peng Li, Yan Wang, Weiming Hu", "tldr": "", "abstract": "3D visual grounding, the task of identifying visual objects in 3D scenes based on natural language inputs, plays a critical role in enabling machines to understand and engage with the real-world environment. However, this task is challenging due to the necessity to capture 3D contextual information to distinguish target objects from complex 3D scenes. The absence of annotations for contextual objects and relations further exacerbates the difficulties. In this paper, we propose a novel model, CORE-3DVG, to address these challenges by explicitly learning about contextual objects and relations. Our method accomplishes 3D visual grounding via three sequential modular networks, including a text-guided object detection network, a relation matching network, and a target identification network. During training, we introduce a pseudo-label self-generation strategy and a weakly-supervised method to facilitate the learning of contextual objects and relations, respectively. The proposed techniques allow the networks to focus more effectively on referred objects within 3D scenes by understanding their context better. We validate our model on the challenging Nr3D, Sr3D, and ScanRefer datasets and demonstrate state-of-the-art performance. Our code will be public at https://github.com/yangli18/CORE-3DVG.", "keywords": "3D Visual Grounding;Contextual Object;Contextual Relation", "primary_area": "", "supplementary_material": "/attachment/55105049617dfda18b34c789ab833a5f8a741bd6.pdf", "author": "Li Yang;Chunfeng Yuan;Ziqi Zhang;Zhongang Qi;Yan Xu;Wei Liu;Ying Shan;Bing Li;Weiping Yang;Peng Li;Yan Wang;Weiming Hu", "authorids": "~Li_Yang9;~Chunfeng_Yuan1;~Ziqi_Zhang5;~Zhongang_Qi1;~Yan_Xu8;~Wei_Liu26;~Ying_Shan2;~Bing_Li1;yangweiping@moe.edu.cn;~Peng_Li16;~Yan_Wang26;~Weiming_Hu1", "gender": ";F;M;M;M;M;M;M;;;M;M", "homepage": ";;;;https://decayale.github.io/;https://ia.cas.cn/rcdw/fyjy/202404/t20240422_7129902.html;;http://www.escience.cn/people/BingLi;;;;http://weiminghu.people-ai.net/", "dblp": ";;;08/9990;;;68/5910;13/2692-1;;;;", "google_scholar": ";_qPX-hcAAAAJ;igML-F8AAAAJ;zJvrrusAAAAJ;https://scholar.google.com/citations?pli=1;https://scholar.google.com.hk/citations?hl=zh-CN;4oXBp9UAAAAJ;;;BXj8Qc0AAAAJ;;", "orcid": "0000-0002-3410-7856;;;;0000-0002-3462-7931;0000-0001-9873-304X;0000-0001-7673-8325;;;;;0000-0001-9237-8825", "linkedin": ";;;;samuel-yan-xu;;YingShanProfile/;;;;%E7%82%8E-%E7%8E%8B-4ba17123b/;", "or_profile": "~Li_Yang9;~Chunfeng_Yuan1;~Ziqi_Zhang5;~Zhongang_Qi1;~Yan_Xu8;~Wei_Liu26;~Ying_Shan2;~Bing_Li1;yangweiping@moe.edu.cn;~Peng_Li16;~Yan_Wang26;~Weiming_Hu1", "aff": "Institute of Automation, Chinese Academy of Sciences;, Institute of automation, Chinese academy of science;Institute of Automation, Chinese Academy of Sciences;Tencent PCG ARC Lab;The Chinese University of Hong Kong;Institute of Automation, Chinese Academy of Sciences;Tencent PCG ARC Lab;Institute of Automation, Chinese Academy of Sciences;;Alibaba Group;Alibaba Group;Institute of automation, Chinese academy of science", "aff_domain": "nlpr.ia.ac.cn;nlpr.ia.ac.cn;ia.ac.cn;tencent.com;cuhk.edu.hk;ia.ac.cn;arc.tencent.com;ia.ac.cn;;alibaba-inc.com;alibaba-inc.com;nlpr.ia.ac.cn", "position": "PhD student;Associate Professor;Assistant Professor;Researcher;PhD student;Associate Professor;Director;Full Professor;;Expert;Researcher;Full Professor", "bibtex": "@inproceedings{\nyang2023exploiting,\ntitle={Exploiting Contextual Objects and Relations for 3D Visual Grounding},\nauthor={Li Yang and Chunfeng Yuan and Ziqi Zhang and Zhongang Qi and Yan Xu and Wei Liu and Ying Shan and Bing Li and Weiping Yang and Peng Li and Yan Wang and Weiming Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GlWzQhf2lV}\n}", "github": "", "project": "", "reviewers": "2pN3;Dhe4;VN72;oYfR;DS3o", "pdf_size": 3144352, "rating": "4;4;6;6;6", "confidence": "5;3;4;5;4", "soundness": "2;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;2;2;3;3", "wc_summary": "66;56;134;80;177", "wc_strengths": "100;45;19;74;212", "wc_weaknesses": "232;122;668;178;168", "wc_questions": "4;2;80;5;49", "wc_limitations": "8;1;1;5;12", "wc_review": "410;226;902;342;618", "wc_reply_reviewers": "0;0;99;77;56", "wc_reply_authors": "270;238;109;128;323", "reply_reviewers": "0;0;1;1;1", "reply_authors": "2;2;2;2;3", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 102.6, 45.91993031353597 ], "wc_strengths_avg": [ 90.0, 66.79221511523629 ], "wc_weaknesses_avg": [ 273.6, 200.27740761254125 ], "wc_questions_avg": [ 28.0, 31.3878957561669 ], "wc_limitations_avg": [ 5.4, 4.223742416388575 ], "wc_review_avg": [ 499.6, 238.18782504569793 ], "wc_reply_reviewers_avg": [ 46.4, 40.252204908551285 ], "wc_reply_authors_avg": [ 213.6, 82.47811831025244 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.21821789023599233, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14213268097750575350&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nlpr.ia.ac.cn;nlpr.ia.ac.cn;ia.ac.cn;tencent.com;cuhk.edu.hk;ia.ac.cn;arc.tencent.com;ia.ac.cn;;alibaba-inc.com;alibaba-inc.com;nlpr.ia.ac.cn", "author_num": 12, "aff_unique_index": "0;0;0;1;2;0;1;0;3;3;0", "aff_unique_norm": "Chinese Academy of Sciences;Tencent;Chinese University of Hong Kong;Alibaba Group", "aff_unique_dep": "Institute of Automation;PCG ARC Lab;;", "aff_unique_url": "http://www.ia.cas.cn;https://www.tencent.com;https://www.cuhk.edu.hk;https://www.alibaba.com", "aff_unique_abbr": "CAS;Tencent;CUHK;Alibaba", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Defending Pre-trained Language Models as Few-shot Learners against Backdoor Attacks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72193", "id": "GqXbfVmEPW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/677c8dc72c99482507323f313faf4738-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GqXbfVmEPW", "openreview": "https://openreview.net/forum?id=GqXbfVmEPW", "poster": "/media/PosterPDFs/NeurIPS%202023/72193.png?t=1701308648.201473", "slides": "https://nips.cc/virtual/2023/poster/72193", "video": "https://nips.cc/virtual/2023/poster/72193", "author_site": "Zhaohan Xi, Tianyu Du, Tianyu Du, Changjiang Li, Ren Pang, Shouling Ji, Jinghui Chen, Fenglong Ma, Ting Wang", "tldr": "", "abstract": "Pre-trained language models (PLMs) have demonstrated remarkable performance as few-shot learners. However, their security risks under such settings are largely unexplored. In this work, we conduct a pilot study showing that PLMs as few-shot learners are highly vulnerable to backdoor attacks while existing defenses are inadequate due to the unique challenges of few-shot scenarios. To address such challenges, we advocate MDP, a novel lightweight, pluggable, and effective defense for PLMs as few-shot learners. Specifically, MDP leverages the gap between the masking-sensitivity of poisoned and clean samples: with reference to the limited few-shot data as distributional anchors, it compares the representations of given samples under varying masking and identifies poisoned samples as ones with significant variations. We show analytically that MDP creates an interesting dilemma for the attacker to choose between attack effectiveness and detection evasiveness. The empirical evaluation using benchmark datasets and representative attacks validates the efficacy of MDP. The code of MDP is publicly available.", "keywords": "few-shot learning;prompt learning;language model;backdoor defense", "primary_area": "", "supplementary_material": "/attachment/aa1ea76a1033c6adc2b76b225e759fc83b852027.pdf", "author": "Zhaohan Xi;Tianyu Du;Changjiang Li;Ren Pang;Shouling Ji;Jinghui Chen;Fenglong Ma;Ting Wang", "authorids": "~Zhaohan_Xi1;~Tianyu_Du2;~Changjiang_Li1;~Ren_Pang1;~Shouling_Ji1;~Jinghui_Chen1;~Fenglong_Ma1;~Ting_Wang1", "gender": "M;F;M;M;M;M;M;M", "homepage": "https://zhaohan-xi.github.io;https://tydusky.github.io/;;https://ain-soph.github.io/;https://nesa.zju.edu.cn/;https://jinghuichen.github.io/;https://fenglong-ma.github.io/;https://alps-lab.github.io/", "dblp": "224/9296;128/2982.html;;252/5223;07/8388;67/5633;85/10856;12/2633-6.html", "google_scholar": "wQgnjMIAAAAJ;kBqTzrwAAAAJ;qBBlW0gAAAAJ;M8YmxzYAAAAJ;https://scholar.google.com.vn/citations?hl=en;mKia7Y4AAAAJ;DLJIxNMAAAAJ;cwcBTegAAAAJ", "orcid": ";0000-0003-0896-0690;;;0000-0003-4268-372X;;0000-0002-4999-0303;", "linkedin": ";;;;;;fenglong-ma-69805832/;", "or_profile": "~Zhaohan_Xi1;~Tianyu_Du2;~Changjiang_Li1;~Ren_Pang1;~Shouling_Ji1;~Jinghui_Chen1;~Fenglong_Ma1;~Ting_Wang1", "aff": ";Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;Zhejiang University;Pennsylvania State University;Pennsylvania State University;Pennsylvania State University", "aff_domain": ";psu.edu;psu.edu;psu.edu;zju.edu.cn;psu.edu;psu.edu;psu.edu", "position": ";Postdoc;PhD student;PhD student;Full Professor;Assistant Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nxi2023defending,\ntitle={Defending Pre-trained Language Models as Few-shot Learners against Backdoor Attacks},\nauthor={Zhaohan Xi and Tianyu Du and Changjiang Li and Ren Pang and Shouling Ji and Jinghui Chen and Fenglong Ma and Ting Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GqXbfVmEPW}\n}", "github": "", "project": "", "reviewers": "fW9g;3mvM;x4jz;6q5A", "pdf_size": 1416321, "rating": "4;5;7;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "159;59;129;23", "wc_strengths": "200;58;146;20", "wc_weaknesses": "246;153;127;173", "wc_questions": "1;1;18;9", "wc_limitations": "1;1;13;6", "wc_review": "607;272;433;231", "wc_reply_reviewers": "76;170;19;4", "wc_reply_authors": "518;597;28;0", "reply_reviewers": "1;2;1;1", "reply_authors": "4;3;2;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 92.5, 54.09944546850735 ], "wc_strengths_avg": [ 106.0, 70.95068710026705 ], "wc_weaknesses_avg": [ 174.75, 44.25141240683737 ], "wc_questions_avg": [ 7.25, 7.013380069552769 ], "wc_limitations_avg": [ 5.25, 4.9180788932265 ], "wc_review_avg": [ 385.75, 148.3835823128691 ], "wc_reply_reviewers_avg": [ 67.25, 65.12056126907999 ], "wc_reply_authors_avg": [ 285.75, 273.36091070231674 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15736551079804746561&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 9, "email": ";psu.edu;psu.edu;psu.edu;zju.edu.cn;psu.edu;psu.edu;psu.edu", "author_num": 8, "aff_unique_index": "0;0;0;1;0;0;0", "aff_unique_norm": "Pennsylvania State University;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.psu.edu;https://www.zju.edu.cn", "aff_unique_abbr": "PSU;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "$\\varepsilon$-fractional core stability in Hedonic Games.", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72192", "id": "GqtpYUCwnu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5b755cf5598a4324d253025e1fbbba52-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GqtpYUCwnu", "openreview": "https://openreview.net/forum?id=GqtpYUCwnu", "poster": "/media/PosterPDFs/NeurIPS%202023/72192.png?t=1701444828.4815743", "slides": "https://nips.cc/virtual/2023/poster/72192", "video": "https://nips.cc/virtual/2023/poster/72192", "author_site": "Simone Fioravanti, Michele Flammini, Bojana Kodric, Giovanna Varricchio", "tldr": "", "abstract": "Hedonic Games (HGs) are a classical framework modeling coalition formation of strategic agents guided by their individual preferences. \nAccording to these preferences, it is desirable that a coalition structure (i.e. a partition of agents into coalitions) satisfies some form of stability. The most well-known and natural of such notions is arguably core-stability. Informally, a partition is core-stable if no subset of agents would like to deviate by regrouping in a so-called core-blocking coalition. Unfortunately, core-stable partitions seldom exist and even when they do, it is often computationally intractable to find one. To circumvent these problems, we propose the notion of $\\varepsilon$-fractional core-stability, where at most an $\\varepsilon$-fraction of all possible coalitions is allowed to core-block. It turns out that such a relaxation may guarantee both existence and polynomial-time computation. Specifically, we design efficient algorithms returning an $\\varepsilon$-fractional core-stable partition, with $\\varepsilon$ exponentially decreasing in the number of agents, for two fundamental classes of HGs: Simple Fractional and Anonymous. From a probabilistic point of view, being the definition of $\\varepsilon$-fractional core equivalent to requiring that uniformly sampled coalitions core-block with probability lower than $\\varepsilon$, we further extend the definition to handle more complex sampling distributions. Along this line, when valuations have to be learned from samples in a PAC-learning fashion, we give positive and negative results on which distributions allow the efficient computation of outcomes that are $\\varepsilon$-fractional core-stable with arbitrarily high confidence.", "keywords": "Game Theory;Hedonic Games;Core stability;Coalition Formation;Social Choice;PAC learning", "primary_area": "", "supplementary_material": "/attachment/01b15d7f8c908dd3f010e78cbea3529bf250b7a9.zip", "author": "Simone Fioravanti;Michele Flammini;Bojana Kodric;Giovanna Varricchio", "authorids": "~Simone_Fioravanti2;~Michele_Flammini1;bojana.kodric@unive.it;~Giovanna_Varricchio1", "gender": "M;M;;F", "homepage": ";https://www.micheleflammini.com;;", "dblp": "228/8521;f/MicheleFlammini;;259/2278", "google_scholar": "https://scholar.google.it/citations?user=8RtNNREAAAAJ;cA-nbvkAAAAJ;;V98oGCEAAAAJ", "orcid": "0000-0002-3850-0786;0000-0003-0327-3728;;0000-0001-6839-8551", "linkedin": "simone-fioravanti-5ab957124;micheleflammini/;;", "or_profile": "~Simone_Fioravanti2;~Michele_Flammini1;bojana.kodric@unive.it;~Giovanna_Varricchio1", "aff": "Gran Sasso Science Institute (GSSI);Gran Sasso Science Institute;;Johann Wolfgang Goethe Universit\u00e4t Frankfurt am Main", "aff_domain": "gssi.it;gssi.it;;uni-frankfurt.de", "position": "PhD student;Full Professor;;Postdoc", "bibtex": "@inproceedings{\nfioravanti2023varepsilonfractional,\ntitle={\\${\\textbackslash}varepsilon\\$-fractional core stability in Hedonic Games.},\nauthor={Simone Fioravanti and Michele Flammini and Bojana Kodric and Giovanna Varricchio},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GqtpYUCwnu}\n}", "github": "", "project": "", "reviewers": "UFzk;eoec;psiE;ot5e;1m1t", "pdf_size": 517423, "rating": "5;5;6;7;7", "confidence": "3;2;4;3;3", "soundness": "3;3;3;3;4", "novelty": "3;3;3;3;3", "presentation": "3;3;3;2;4", "wc_summary": "180;91;98;133;128", "wc_strengths": "39;44;181;37;77", "wc_weaknesses": "61;14;115;213;110", "wc_questions": "34;22;107;26;62", "wc_limitations": "1;6;7;2;10", "wc_review": "315;177;508;411;387", "wc_reply_reviewers": "12;11;8;159;5", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 126.0, 31.55312979721663 ], "wc_strengths_avg": [ 75.6, 54.66113793180672 ], "wc_weaknesses_avg": [ 102.6, 66.29811460365974 ], "wc_questions_avg": [ 50.2, 31.650592411517355 ], "wc_limitations_avg": [ 5.2, 3.3105890714493698 ], "wc_review_avg": [ 359.6, 110.22449818438731 ], "wc_reply_reviewers_avg": [ 39.0, 60.049979184009715 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.35355339059327373, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "gssi.it;gssi.it;;uni-frankfurt.de", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Gran Sasso Science Institute;Johann Wolfgang Goethe University Frankfurt am Main", "aff_unique_dep": ";", "aff_unique_url": "https://www.gssi.it;https://www.uni-frankfurt.de", "aff_unique_abbr": "GSSI;JWGU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Frankfurt am Main", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Italy;Germany" }, { "title": "Score-based Generative Modeling through Stochastic Evolution Equations in Hilbert Spaces", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72191", "id": "GrElRvXnEj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/76c6f9f2475b275b92d03a83ea270af4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GrElRvXnEj", "openreview": "https://openreview.net/forum?id=GrElRvXnEj", "poster": "/media/PosterPDFs/NeurIPS%202023/72191.png?t=1700165447.1630623", "slides": "https://nips.cc/virtual/2023/poster/72191", "video": "https://nips.cc/virtual/2023/poster/72191", "author_site": "Sungbin Lim, EUN BI YOON, Taehyun Byun, Taewon Kang, Seungwoo Kim, Kyungjae Lee, Sungjoon Choi", "tldr": "", "abstract": "Continuous-time score-based generative models consist of a pair of stochastic differential equations (SDEs)\u2014a forward SDE that smoothly transitions data into a noise space and a reverse SDE that incrementally eliminates noise from a Gaussian prior distribution to generate data distribution samples\u2014are intrinsically connected by the time-reversal theory on diffusion processes. In this paper, we investigate the use of stochastic evolution equations in Hilbert spaces, which expand the applicability of SDEs in two aspects: sample space and evolution operator, so they enable encompassing recent variations of diffusion models, such as generating functional data or replacing drift coefficients with image transformation. To this end, we derive a generalized time-reversal formula to build a bridge between probabilistic diffusion models and stochastic evolution equations and propose a score-based generative model called Hilbert Diffusion Model (HDM). Combining with Fourier neural operator, we verify the superiority of HDM for sampling functions from functional datasets with a power of kernel two-sample test of 4.2 on Quadratic, 0.2 on Melbourne, and 3.6 on Gridwatch, which outperforms existing diffusion models formulated in function spaces. Furthermore, the proposed method shows its strength in motion synthesis tasks by utilizing the Wiener process with values in Hilbert space. Finally, our empirical results on image datasets also validate a connection between HDM and diffusion models using heat dissipation, revealing the potential for exploring evolution operators and sample spaces.", "keywords": "Generative Model;Score-based Method;Diffusion Model", "primary_area": "", "supplementary_material": "/attachment/d89004f04ac2c1e2f4279ccdeb4387bd246bc95d.pdf", "author": "Sungbin Lim;Eunbi Yoon;Taehyun Byun;Taewon Kang;Seungwoo Kim;Kyungjae Lee;Sungjoon Choi", "authorids": "~Sungbin_Lim1;~Eunbi_Yoon1;~Taehyun_Byun1;~Taewon_Kang2;~Seungwoo_Kim1;~Kyungjae_Lee1;~Sungjoon_Choi4", "gender": "M;F;M;M;;M;M", "homepage": "https://www.sungbin-lim.net;https://www.notion.so/a40c00e4fc73410191966a7078c46ec3?v=dd6e536bca534182b442d1a4a87a1e4a;;https://itsc.kr;;https://sites.google.com/view/kyungjaelee;https://sites.google.com/view/sungjoon-choi", "dblp": "206/6907;;313/9966;;;13/7265-1;81/618", "google_scholar": "https://scholar.google.com/citations?hl=ko;;https://scholar.google.com/citations?hl=ko;3wfiEzsAAAAJ;;https://scholar.google.co.kr/citations?user=OZZJagIAAAAJ;T3-0OQ8AAAAJ", "orcid": "0000-0003-2684-2022;;;;0009-0000-9620-5710;0000-0003-0147-2715;", "linkedin": "sungbin-lim-43b739b5/;;;taewon-kang/;seungwoo-kim-baaa6b195;;", "or_profile": "~Sungbin_Lim1;~Eunbi_Yoon1;~Taehyun_Byun1;~Taewon_Kang2;~Seungwoo_Kim1;~Kyungjae_Lee1;~Sungjoon_Choi4", "aff": "Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology;Korea University;KAIST AI;Ulsan National Institute of Science and Technology;ChungAng University;Korea University", "aff_domain": "unist.ac.kr;unist.ac.kr;korea.ac.kr;kaist.ac.kr;unist.ac.kr;cau.ac.kr;korea.ac.kr", "position": "Assistant Professor;MS student;PhD student;Intern;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nlim2023scorebased,\ntitle={Score-based Generative Modeling through Stochastic Evolution Equations in Hilbert Spaces},\nauthor={Sungbin Lim and Eunbi Yoon and Taehyun Byun and Taewon Kang and Seungwoo Kim and Kyungjae Lee and Sungjoon Choi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GrElRvXnEj}\n}", "github": "", "project": "", "reviewers": "8erp;gdSN;yicF;sBaK", "pdf_size": 35159389, "rating": "7;7;7;7", "confidence": "3;4;4;3", "soundness": "3;4;4;4", "novelty": "2;3;3;4", "presentation": "3;3;3;3", "wc_summary": "106;82;52;119", "wc_strengths": "80;134;116;79", "wc_weaknesses": "496;280;60;66", "wc_questions": "382;2;128;45", "wc_limitations": "4;16;11;6", "wc_review": "1068;514;367;315", "wc_reply_reviewers": "517;50;59;19", "wc_reply_authors": "762;0;33;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.75, 25.518375732009275 ], "wc_strengths_avg": [ 102.25, 23.62599204266352 ], "wc_weaknesses_avg": [ 225.5, 179.56266315690465 ], "wc_questions_avg": [ 139.25, 147.2877710470221 ], "wc_limitations_avg": [ 9.25, 4.656984002549289 ], "wc_review_avg": [ 566.0, 298.877065028416 ], "wc_reply_reviewers_avg": [ 161.25, 205.92762684982313 ], "wc_reply_authors_avg": [ 198.75, 325.47148492609915 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1102177630928225105&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "unist.ac.kr;unist.ac.kr;korea.ac.kr;kaist.ac.kr;unist.ac.kr;cau.ac.kr;korea.ac.kr", "author_num": 7, "aff_unique_index": "0;0;1;2;0;3;1", "aff_unique_norm": "Ulsan National Institute of Science and Technology;Korea University;Korea Advanced Institute of Science and Technology;Chungang University", "aff_unique_dep": ";;KAIST AI;", "aff_unique_url": "https://www.unist.ac.kr;https://www.korea.ac.kr;https://www.kaist.edu;http://www.cau.ac.kr", "aff_unique_abbr": "UNIST;KU;KAIST;CAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Demographic Parity Constrained Minimax Optimal Regression under Linear Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72190", "id": "GrFsx4mBWF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1b4acad19cc425a7352a71d4e4468393-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GrFsx4mBWF", "openreview": "https://openreview.net/forum?id=GrFsx4mBWF", "poster": "/media/PosterPDFs/NeurIPS%202023/72190.png?t=1701931008.3134224", "slides": "https://nips.cc/virtual/2023/poster/72190", "video": "https://nips.cc/virtual/2023/poster/72190", "author_site": "Kazuto Fukuchi, Jun Sakuma", "tldr": "", "abstract": "We explore the minimax optimal error associated with a demographic parity-constrained regression problem within the context of a linear model. Our proposed model encompasses a broader range of discriminatory bias sources compared to the model presented by Chzhen and Schreuder. Our analysis reveals that the minimax optimal error for the demographic parity-constrained regression problem under our model is characterized by $\\Theta(\\frac{dM}{n})$, where $n$ denotes the sample size, $d$ represents the dimensionality, and $M$ signifies the number of demographic groups arising from sensitive attributes. Moreover, we demonstrate that the minimax error increases in conjunction with a larger bias present in the model.", "keywords": "demographic parity;regression;minimax optimal", "primary_area": "", "supplementary_material": "/attachment/276f7f11c51763802d5bcc9d531626e2c7aebcbc.pdf", "author": "Kazuto Fukuchi;Jun Sakuma", "authorids": "~Kazuto_Fukuchi2;~Jun_Sakuma1", "gender": "M;M", "homepage": "https://kfukuchi.me/;https://sites.google.com/view/junsakuma/english", "dblp": "133/7753;43/5716.html", "google_scholar": "https://scholar.google.co.jp/citations?user=496_ICsAAAAJ;v5emswQAAAAJ", "orcid": "0000-0003-3895-219X;", "linkedin": ";", "or_profile": "~Kazuto_Fukuchi2;~Jun_Sakuma1", "aff": "RIKEN;University of Tsukuba", "aff_domain": "riken.jp;tsukuba.ac.jp", "position": "Researcher;Full Professor", "bibtex": "@inproceedings{\nfukuchi2023demographic,\ntitle={Demographic Parity Constrained Minimax Optimal Regression under Linear Model},\nauthor={Kazuto Fukuchi and Jun Sakuma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GrFsx4mBWF}\n}", "github": "", "project": "", "reviewers": "6KUa;1oyt;TKh2;okbq", "pdf_size": 507473, "rating": "6;7;7;7", "confidence": "3;3;3;4", "soundness": "4;3;4;4", "novelty": "4;3;3;4", "presentation": "3;3;4;4", "wc_summary": "37;55;62;65", "wc_strengths": "109;86;89;102", "wc_weaknesses": "56;54;9;36", "wc_questions": "13;99;44;89", "wc_limitations": "1;20;10;3", "wc_review": "216;314;214;295", "wc_reply_reviewers": "30;8;4;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 54.75, 10.871407452579449 ], "wc_strengths_avg": [ 96.5, 9.394147114027968 ], "wc_weaknesses_avg": [ 38.75, 18.859679212542297 ], "wc_questions_avg": [ 61.25, 34.71581051912803 ], "wc_limitations_avg": [ 8.5, 7.433034373659253 ], "wc_review_avg": [ 259.75, 45.256905550424015 ], "wc_reply_reviewers_avg": [ 10.5, 11.6081867662439 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5836788993178518091&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "riken.jp;tsukuba.ac.jp", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "RIKEN;University of Tsukuba", "aff_unique_dep": ";", "aff_unique_url": "https://www.riken.jp;https://www.tsukuba.ac.jp", "aff_unique_abbr": "RIKEN;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "STXD: Structural and Temporal Cross-Modal Distillation for Multi-View 3D Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72189", "id": "Grz2ijKrWI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5d8c01de2dc698c54201c1c7d0b86974-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Grz2ijKrWI", "openreview": "https://openreview.net/forum?id=Grz2ijKrWI", "poster": "/media/PosterPDFs/NeurIPS%202023/72189.png?t=1699018665.0646977", "slides": "https://nips.cc/virtual/2023/poster/72189", "video": "https://nips.cc/virtual/2023/poster/72189", "author_site": "Sujin Jang, Dae Ung Jo, Sung Ju Hwang, Dongwook Lee, Daehyun Ji", "tldr": "", "abstract": "3D object detection (3DOD) from multi-view images is an economically appealing alternative to expensive LiDAR-based detectors, but also an extremely challenging task due to the absence of precise spatial cues. Recent studies have leveraged the teacher-student paradigm for cross-modal distillation, where a strong LiDAR-modality teacher transfers useful knowledge to a multi-view-based image-modality student. However, prior approaches have only focused on minimizing global distances between cross-modal features, which may lead to suboptimal knowledge distillation results. Based on these insights, we propose a novel structural and temporal cross-modal knowledge distillation (STXD) framework for multi-view 3DOD. First, STXD reduces redundancy of the feature components of the student by regularizing the cross-correlation of cross-modal features, while maximizing their similarities. Second, to effectively transfer temporal knowledge, STXD encodes temporal relations of features across a sequence of frames via similarity maps. Lastly, STXD also adopts a response distillation method to further enhance the quality of knowledge distillation at the output-level. Our extensive experiments demonstrate that STXD significantly improves the NDS and mAP of the based student detectors by 2.8%~4.5% on the nuScenes testing dataset.", "keywords": "knowledge distillation;cross-modal learning;3d object detection", "primary_area": "", "supplementary_material": "/attachment/a034887fec29e4d96fa9911af545d219d186a955.zip", "author": "Sujin Jang;Dae Ung Jo;Sung Ju Hwang;Dongwook Lee;Daehyun Ji", "authorids": "~Sujin_Jang2;~Dae_Ung_Jo2;~Sung_Ju_Hwang1;~Dongwook_Lee4;~Daehyun_Ji1", "gender": "M;;;M;", "homepage": "https://sujinjang.github.io/;;;;", "dblp": "146/6241;;;;274/9684", "google_scholar": "https://scholar.google.co.kr/citations?hl=en;;;;https://scholar.google.co.kr/citations?user=WEZZefcAAAAJ", "orcid": "0000-0002-2723-5606;;;;", "linkedin": "sujin-jang-7996b354;;;dongwook-lee-4903644b;captainzone/", "or_profile": "~Sujin_Jang2;~Dae_Ung_Jo2;~Sung_Ju_Hwang1;~Dongwook_Lee4;~Daehyun_Ji1", "aff": "Samsung Advanced Institute of Technology (SAIT);;;Samsung;Samsung", "aff_domain": "samsung.com;;;samsung.com;samsung.com", "position": "Researcher;;;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\njang2023stxd,\ntitle={{STXD}: Structural and Temporal Cross-Modal Distillation for Multi-View 3D Object Detection},\nauthor={Sujin Jang and Dae Ung Jo and Sung Ju Hwang and Dongwook Lee and Daehyun Ji},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Grz2ijKrWI}\n}", "github": "", "project": "", "reviewers": "1M3u;U3eQ;X2eU;XsGg;4Udp", "pdf_size": 7939124, "rating": "4;5;5;5;7", "confidence": "5;5;4;5;4", "soundness": "2;3;3;2;4", "novelty": "1;3;3;3;4", "presentation": "3;3;3;4;4", "wc_summary": "77;90;52;79;59", "wc_strengths": "49;63;60;49;59", "wc_weaknesses": "237;73;72;221;125", "wc_questions": "62;77;6;77;38", "wc_limitations": "10;1;9;35;1", "wc_review": "435;304;199;461;282", "wc_reply_reviewers": "274;47;25;151;46", "wc_reply_authors": "1351;0;0;466;0", "reply_reviewers": "2;1;1;2;1", "reply_authors": "3;1;1;3;1", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.9797958971132712 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 71.4, 13.893883546366725 ], "wc_strengths_avg": [ 56.0, 5.865151319446072 ], "wc_weaknesses_avg": [ 145.6, 70.92418487370863 ], "wc_questions_avg": [ 52.0, 27.062889720057612 ], "wc_limitations_avg": [ 11.2, 12.496399481450647 ], "wc_review_avg": [ 336.2, 98.11707292821164 ], "wc_reply_reviewers_avg": [ 108.6, 93.65596617407778 ], "wc_reply_authors_avg": [ 363.4, 525.7488373738928 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6666666666666666, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5250063733892774069&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 4, "email": "samsung.com;;;samsung.com;samsung.com", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Samsung", "aff_unique_dep": "Samsung Advanced Institute of Technology", "aff_unique_url": "https://www.sait.samsung.com", "aff_unique_abbr": "SAIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "REFINE: A Fine-Grained Medication Recommendation System Using Deep Learning and Personalized Drug Interaction Modeling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72188", "id": "GsCTjmYe5v", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4b7439a4ab0b8e4bcb4e2412c6a10a58-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GsCTjmYe5v", "openreview": "https://openreview.net/forum?id=GsCTjmYe5v", "poster": "/media/PosterPDFs/NeurIPS%202023/72188.png?t=1702109862.044264", "slides": "https://nips.cc/virtual/2023/poster/72188", "video": "https://nips.cc/virtual/2023/poster/72188", "author_site": "Suman Bhoi, Mong Li Lee, Wynne Hsu, Ngiap Chuan Tan", "tldr": "", "abstract": "Patients with co-morbidities often require multiple medications to manage their conditions. However, existing medication recommendation systems only offer class-level medications and regard all interactions among drugs to have the same level of severity. This limits their ability to provide personalized and safe recommendations tailored to individual needs. In this work, we introduce a deep learning-based fine-grained medication recommendation system called REFINE, which is designed to improve treatment outcomes and minimize adverse drug interactions. In order to better characterize patients\u2019 health conditions, we model the trend in medication dosage titrations and lab test responses, and adapt the vision transformer to obtain effective patient representations. We also model drug interaction severity levels as weighted graphs to learn safe drug combinations and design a balanced loss function to avoid overly conservative recommendations and miss medications that might be needed for certain conditions. Extensive experiments on two real-world datasets show that REFINE outperforms state-of-the-art techniques.", "keywords": "Fine-grained Medication recommendation;Drug Interaction Severity", "primary_area": "", "supplementary_material": "/attachment/6c755fb4936dbb8c97f2b26441aadd94e34b2118.pdf", "author": "Suman Bhoi;Mong-Li Lee;Wynne Hsu;Ngiap Chuan Tan", "authorids": "~Suman_Bhoi1;~Mong-Li_Lee1;~Wynne_Hsu1;~Ngiap_Chuan_Tan1", "gender": "F;F;F;M", "homepage": ";https://www.comp.nus.edu.sg/~leeml/;http://www.comp.nus.edu.sg/~whsu/;", "dblp": "202/5448;l/MongLiLee;h/WynneHsu;", "google_scholar": ";https://scholar.google.com.tw/citations?user=_xFTK8wAAAAJ;https://scholar.google.com.tw/citations?user=ljyBjv8AAAAJ;", "orcid": "0000-0003-0460-9182;0000-0002-9636-388X;0000-0002-4142-8893;0000-0002-5946-1149", "linkedin": ";;;", "or_profile": "~Suman_Bhoi1;~Mong-Li_Lee1;~Wynne_Hsu1;~Ngiap_Chuan_Tan1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;", "aff_domain": "nus.edu;nus.edu.sg;nus.edu.sg;", "position": "PhD student;Full Professor;Full Professor;", "bibtex": "@inproceedings{\nbhoi2023refine,\ntitle={{REFINE}: A Fine-Grained Medication Recommendation System Using Deep Learning and Personalized Drug Interaction Modeling},\nauthor={Suman Bhoi and Mong-Li Lee and Wynne Hsu and Ngiap Chuan Tan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GsCTjmYe5v}\n}", "github": "", "project": "", "reviewers": "Eh21;RMGb;ebob;nz6X", "pdf_size": 619679, "rating": "5;6;6;6", "confidence": "5;5;4;3", "soundness": "3;2;3;2", "novelty": "2;1;3;2", "presentation": "3;1;4;3", "wc_summary": "40;61;30;169", "wc_strengths": "101;33;42;97", "wc_weaknesses": "226;55;95;208", "wc_questions": "3;468;82;408", "wc_limitations": "1;19;1;61", "wc_review": "371;636;250;943", "wc_reply_reviewers": "0;326;49;296", "wc_reply_authors": "36;241;204;113", "reply_reviewers": "0;3;2;3", "reply_authors": "2;3;3;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 75.0, 55.41209254305417 ], "wc_strengths_avg": [ 68.25, 30.946526460977815 ], "wc_weaknesses_avg": [ 146.0, 72.67392930067838 ], "wc_questions_avg": [ 240.25, 200.83622058782126 ], "wc_limitations_avg": [ 20.5, 24.510201957552287 ], "wc_review_avg": [ 550.0, 266.40476722461256 ], "wc_reply_reviewers_avg": [ 167.75, 144.6830587871296 ], "wc_reply_authors_avg": [ 148.5, 79.92652876235775 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9843254255198039719&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nus.edu;nus.edu.sg;nus.edu.sg;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Robust covariance estimation with missing values and cell-wise contamination", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72187", "id": "GtYlxtwO74", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e444859b2a22df6b56af9381ad1e9480-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GtYlxtwO74", "openreview": "https://openreview.net/forum?id=GtYlxtwO74", "poster": "/media/PosterPDFs/NeurIPS%202023/72187.png?t=1701855647.6420069", "slides": "https://nips.cc/virtual/2023/poster/72187", "video": "https://nips.cc/virtual/2023/poster/72187", "author_site": "Gr\u00e9goire Pacreau, Karim Lounici", "tldr": "", "abstract": "Large datasets are often affected by cell-wise outliers in the form of missing or erroneous data. However, discarding any samples containing outliers may result in a dataset that is too small to accurately estimate the covariance matrix. Moreover, the robust procedures designed to address this problem require the invertibility of the covariance operator and thus are not effective on high-dimensional data. In this paper, we propose an unbiased estimator for the covariance in the presence of missing values that does not require any imputation step and still achieves near minimax statistical accuracy with the operator norm. We also advocate for its use in combination with cell-wise outlier detection methods to tackle cell-wise contamination in a high-dimensional and low-rank setting, where state-of-the-art methods may suffer from numerical instability and long computation times. To complement our theoretical findings, we conducted an experimental study which demonstrates the superiority of our approach over the state of the art both in low and high dimension settings.", "keywords": "robust statistics;missing values;cell-wise contamination", "primary_area": "", "supplementary_material": "/attachment/68fbeb00136bb77e56f1c1c0767a4f9d787952af.zip", "author": "gregoire pacreau;Karim Lounici", "authorids": "~gregoire_pacreau1;~Karim_Lounici1", "gender": "M;", "homepage": ";", "dblp": ";", "google_scholar": "l7hggicAAAAJ;", "orcid": ";", "linkedin": "gr%C3%A9goire-pacreau-3aa053147/;", "or_profile": "~gregoire_pacreau1;~Karim_Lounici1", "aff": "\u00c9cole Polytechnique;", "aff_domain": "polytechnique.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\npacreau2023robust,\ntitle={Robust covariance estimation with missing values and cell-wise contamination},\nauthor={gregoire pacreau and Karim Lounici},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GtYlxtwO74}\n}", "github": "", "project": "", "reviewers": "PVnK;zw68;LnKP;vhsW", "pdf_size": 1607564, "rating": "6;6;7;7", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;4", "wc_summary": "68;119;99;88", "wc_strengths": "61;28;80;90", "wc_weaknesses": "626;108;8;113", "wc_questions": "104;57;19;180", "wc_limitations": "158;13;1;1", "wc_review": "1017;325;207;472", "wc_reply_reviewers": "946;17;0;16", "wc_reply_authors": "1887;0;0;21", "reply_reviewers": "2;1;0;1", "reply_authors": "5;1;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 93.5, 18.445866745696716 ], "wc_strengths_avg": [ 64.75, 23.636571240347024 ], "wc_weaknesses_avg": [ 213.75, 241.66958331573298 ], "wc_questions_avg": [ 90.0, 60.054142238483436 ], "wc_limitations_avg": [ 43.25, 66.43182595714195 ], "wc_review_avg": [ 505.25, 310.0148181942276 ], "wc_reply_reviewers_avg": [ 244.75, 404.9230636800033 ], "wc_reply_authors_avg": [ 477.0, 814.1090221831472 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16443328615019582184&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "polytechnique.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Ecole Polytechnique", "aff_unique_dep": "", "aff_unique_url": "https://www.polytechnique.edu", "aff_unique_abbr": "X", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "title": "Joint Data-Task Generation for Auxiliary Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72186", "id": "GtgFo5lmOB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2a91fb5a4c03e0b6d889e1c52f775480-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GtgFo5lmOB", "openreview": "https://openreview.net/forum?id=GtgFo5lmOB", "poster": "/media/PosterPDFs/NeurIPS%202023/72186.png?t=1701930344.059107", "slides": "https://nips.cc/virtual/2023/poster/72186", "video": "https://nips.cc/virtual/2023/poster/72186", "author_site": "Hong Chen, Xin Wang, Yuwei Zhou, Yijian Qin, Chaoyu Guan, Wenwu Zhu", "tldr": "", "abstract": "Current auxiliary learning methods mainly adopt the methodology of reweighing losses for the manually collected auxiliary data and tasks. However, these methods heavily rely on domain knowledge during data collection, which may be hardly available in reality. Therefore, current methods will become less effective and even do harm to the primary task when unhelpful auxiliary data and tasks are employed. To tackle the problem, we propose a joint data-task generation framework for auxiliary learning (DTG-AuxL), which can bring benefits to the primary task by generating the new auxiliary data and task in a joint manner. The proposed DTG-AuxL framework contains a joint generator and a bi-level optimization strategy. Specifically, the joint generator contains a feature generator and a label generator, which are designed to be applicable and expressive for various auxiliary learning scenarios. The bi-level optimization strategy optimizes the joint generator and the task learning model, where the joint generator is effectively optimized in the upper level via the implicit gradient from the primary loss and the explicit gradient of our proposed instance regularization, while the task learning model is optimized in the lower level by the generated data and task. Extensive experiments show that our proposed DTG-AuxL framework consistently outperforms existing methods in various auxiliary learning scenarios, particularly when the manually collected auxiliary data and tasks are unhelpful.", "keywords": "auxiliary learning;data-task joint generation", "primary_area": "", "supplementary_material": "/attachment/eafc7d4bf6bd1201bd30c125c3e15a27a5c89815.pdf", "author": "Hong Chen;Xin Wang;Yuwei Zhou;Yijian Qin;Chaoyu Guan;Wenwu Zhu", "authorids": "~Hong_Chen9;~Xin_Wang17;~Yuwei_Zhou1;~Yijian_Qin2;~Chaoyu_Guan1;~Wenwu_Zhu1", "gender": "M;M;M;M;M;M", "homepage": "https://forchchch.github.io/;http://mn.cs.tsinghua.edu.cn/xinwang/;;http://www.cs.tsinghua.edu.cn/;;http://media.cs.tsinghua.edu.cn/en/zww", "dblp": "52/4150-11;10/5630-19;124/2955;290/1902;242/3864;97/6308-1.html", "google_scholar": ";YPOBHYUAAAAJ;Ed748H0AAAAJ;bSKCQwkAAAAJ;;https://scholar.google.com.tw/citations?user=7t2jzpgAAAAJ", "orcid": "0000-0002-0943-2286;0000-0002-0351-2939;0000-0001-9582-7331;;;0000-0003-2236-9290", "linkedin": ";;;;;", "or_profile": "~Hong_Chen9;~Xin_Wang17;~Yuwei_Zhou1;~Yijian_Qin2;~Chaoyu_Guan1;~Wenwu_Zhu1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Assistant Professor;PhD student;PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nchen2023joint,\ntitle={Joint Data-Task Generation for Auxiliary Learning},\nauthor={Hong Chen and Xin Wang and Yuwei Zhou and Yijian Qin and Chaoyu Guan and Wenwu Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GtgFo5lmOB}\n}", "github": "", "project": "", "reviewers": "Gctb;81dw;iWiK;HDHq;SrwV", "pdf_size": 6098136, "rating": "5;6;6;7;7", "confidence": "2;3;4;5;5", "soundness": "2;3;3;4;3", "novelty": "3;3;3;3;3", "presentation": "3;2;3;4;4", "wc_summary": "121;104;102;87;74", "wc_strengths": "207;59;61;75;73", "wc_weaknesses": "137;236;265;72;85", "wc_questions": "5;131;2;2;29", "wc_limitations": "2;26;1;12;1", "wc_review": "472;556;431;248;262", "wc_reply_reviewers": "0;46;0;23;0", "wc_reply_authors": "0;24;0;29;0", "reply_reviewers": "0;1;0;1;0", "reply_authors": "1;2;1;2;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.8, 1.16619037896906 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 97.6, 15.982490419205636 ], "wc_strengths_avg": [ 95.0, 56.356011214421486 ], "wc_weaknesses_avg": [ 159.0, 78.35049457406124 ], "wc_questions_avg": [ 33.8, 49.64433502425025 ], "wc_limitations_avg": [ 8.4, 9.728309205612248 ], "wc_review_avg": [ 393.8, 120.36344960161287 ], "wc_reply_reviewers_avg": [ 13.8, 18.400000000000002 ], "wc_reply_authors_avg": [ 10.6, 13.078226179417452 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9625334218796219, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1523932487322638895&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Robustifying Generalizable Implicit Shape Networks with a Tunable Non-Parametric Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72185", "id": "Gtse4R6iS4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/525c95ffca1f57a10e3527d3584f3cf1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Gtse4R6iS4", "openreview": "https://openreview.net/forum?id=Gtse4R6iS4", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72185", "video": "https://nips.cc/virtual/2023/poster/72185", "author_site": "Amine Ouasfi, Adnane Boukhayma", "tldr": "", "abstract": "Feedforward generalizable models for implicit shape reconstruction from unoriented point cloud present multiple advantages, including high performance and inference speed. However, they still suffer from generalization issues, ranging from underfitting the input point cloud, to misrepresenting samples outside of the training data distribution, or with toplogies unseen at training. We propose here an efficient mechanism to remedy some of these limitations at test time. We combine the inter-shape data prior of the network with an intra-shape regularization prior of a Nystr\u00f6m Kernel Ridge Regression, that we further adapt by fitting its hyperprameters to the current shape. The resulting shape function defined in a shape specific Reproducing Kernel Hilbert Space benefits from desirable stability and efficiency properties and grants a shape adaptive expressiveness-robustness trade-off. We demonstrate the improvement obtained through our method with respect to baselines and the state-of-the-art using synthetic and real data.", "keywords": "implicit neural representations;3D reconstruction from unoriented point could;kernel ridge regression", "primary_area": "", "supplementary_material": "/attachment/06aafc0b571ad86e220e4d0b309a29a5e6c75894.pdf", "author": "Amine Ouasfi;Adnane Boukhayma", "authorids": "~Amine_Ouasfi1;~Adnane_Boukhayma2", "gender": ";M", "homepage": ";https://boukhayma.github.io/", "dblp": "324/2085;172/2146", "google_scholar": "IdcK7TcAAAAJ;", "orcid": ";", "linkedin": "amine-ouasfi/;", "or_profile": "~Amine_Ouasfi1;~Adnane_Boukhayma2", "aff": "INRIA;INRIA", "aff_domain": "inria.fr;inria.fr", "position": "PhD student;Researcher", "bibtex": "@inproceedings{\nouasfi2023robustifying,\ntitle={Robustifying Generalizable Implicit Shape Networks with a Tunable Non-Parametric Model},\nauthor={Amine Ouasfi and Adnane Boukhayma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Gtse4R6iS4}\n}", "github": "", "project": "", "reviewers": "q2Vh;7xQW;6hDU;LSLk;Pm8Q;JKo3", "pdf_size": 3945457, "rating": "5;5;5;5;6;6", "confidence": "2;3;4;4;4;2", "soundness": "4;3;3;3;3;3", "novelty": "4;2;2;2;3;2", "presentation": "3;2;2;2;3;4", "wc_summary": "98;87;38;138;80;81", "wc_strengths": "59;56;21;70;63;95", "wc_weaknesses": "41;269;193;264;196;114", "wc_questions": "62;118;42;90;2;8", "wc_limitations": "50;28;6;9;1;6", "wc_review": "310;558;300;571;342;304", "wc_reply_reviewers": "16;295;23;166;17;19", "wc_reply_authors": "0;341;0;0;0;0", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "1;2;1;1;1;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.1666666666666665, 0.8975274678557507 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.5, 0.7637626158259734 ], "presentation_avg": [ 2.6666666666666665, 0.7453559924999298 ], "wc_summary_avg": [ 87.0, 29.46183972531247 ], "wc_strengths_avg": [ 60.666666666666664, 21.868292622475632 ], "wc_weaknesses_avg": [ 179.5, 80.70264349243256 ], "wc_questions_avg": [ 53.666666666666664, 41.671999658710355 ], "wc_limitations_avg": [ 16.666666666666668, 17.18203195847983 ], "wc_review_avg": [ 397.5, 118.91698785287156 ], "wc_reply_reviewers_avg": [ 89.33333333333333, 106.56244908763854 ], "wc_reply_authors_avg": [ 56.833333333333336, 127.08319672123807 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.13130643285972252, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5441082861742097466&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "inria.fr;inria.fr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Unified Segment-to-Segment Framework for Simultaneous Sequence Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72184", "id": "GuErIOGLie", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8df705957a5262de3cb37ba9f1fb96f3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GuErIOGLie", "openreview": "https://openreview.net/forum?id=GuErIOGLie", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72184", "video": "https://nips.cc/virtual/2023/poster/72184", "author_site": "Shaolei Zhang, Yang Feng", "tldr": "", "abstract": "Simultaneous sequence generation is a pivotal task for real-time scenarios, such as streaming speech recognition, simultaneous machine translation and simultaneous speech translation, where the target sequence is generated while receiving the source sequence. The crux of achieving high-quality generation with low latency lies in identifying the optimal moments for generating, accomplished by learning a mapping between the source and target sequences. However, existing methods often rely on task-specific heuristics for different sequence types, limiting the model\u2019s capacity to adaptively learn the source-target mapping and hindering the exploration of multi-task learning for various simultaneous tasks. In this paper, we propose a unified segment-to-segment framework (Seg2Seg) for simultaneous sequence generation, which learns the mapping in an adaptive and unified manner. During the process of simultaneous generation, the model alternates between waiting for a source segment and generating a target segment, making the segment serve as the natural bridge between the source and target. To accomplish this, Seg2Seg introduces a latent segment as the pivot between source to target and explores all potential source-target mappings via the proposed expectation training, thereby learning the optimal moments for generating. Experiments on multiple simultaneous generation tasks demonstrate that Seg2Seg achieves state-of-the-art performance and exhibits better generality across various tasks.", "keywords": "Machine Translation;Speech Translation;Speech Recognition;Simultaneous Generation;Simultaneous Translation", "primary_area": "", "supplementary_material": "", "author": "Shaolei Zhang;Yang Feng", "authorids": "~Shaolei_Zhang1;~Yang_Feng4", "gender": "M;", "homepage": "https://zhangshaolei1998.github.io/;http://people.ucas.edu.cn/~yangfeng?language=en", "dblp": ";07/6095-4.html", "google_scholar": "https://scholar.google.com.hk/citations?user=gWwAWo4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-7254-9380;", "linkedin": ";", "or_profile": "~Shaolei_Zhang1;~Yang_Feng4", "aff": "Key Laboratory of Intelligent Information Processing Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023unified,\ntitle={Unified Segment-to-Segment Framework for Simultaneous Sequence Generation},\nauthor={Shaolei Zhang and Yang Feng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GuErIOGLie}\n}", "github": "", "project": "", "reviewers": "ooDH;CEsh;D731;ZDUt", "pdf_size": 4792037, "rating": "5;6;6;8", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "3;3;2;4", "presentation": "2;3;3;4", "wc_summary": "57;91;131;85", "wc_strengths": "39;51;23;46", "wc_weaknesses": "134;40;37;12", "wc_questions": "64;79;80;133", "wc_limitations": "1;4;1;1", "wc_review": "295;265;272;277", "wc_reply_reviewers": "30;77;0;83", "wc_reply_authors": "8;29;0;68", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 91.0, 26.419689627245813 ], "wc_strengths_avg": [ 39.75, 10.568230693924125 ], "wc_weaknesses_avg": [ 55.75, 46.46705822407956 ], "wc_questions_avg": [ 89.0, 26.182054923172092 ], "wc_limitations_avg": [ 1.75, 1.299038105676658 ], "wc_review_avg": [ 277.25, 11.098986440211556 ], "wc_reply_reviewers_avg": [ 47.5, 34.2527371169079 ], "wc_reply_authors_avg": [ 26.25, 26.328454189336675 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13106856596260013466&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ict.ac.cn;ict.ac.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology", "aff_unique_url": "http://www.ict.ac.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Distributional Learning of Variational AutoEncoder: Application to Synthetic Data Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72183", "id": "GxL6PrmEUw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b456a00e145ad56f6f251f79f8c8a7de-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GxL6PrmEUw", "openreview": "https://openreview.net/forum?id=GxL6PrmEUw", "poster": "/media/PosterPDFs/NeurIPS%202023/72183.png?t=1701000821.3586118", "slides": "https://nips.cc/virtual/2023/poster/72183", "video": "https://nips.cc/virtual/2023/poster/72183", "author_site": "Seunghwan An, Jong-June Jeon", "tldr": "", "abstract": "The Gaussianity assumption has been consistently criticized as a main limitation of the Variational Autoencoder (VAE) despite its efficiency in computational modeling. In this paper, we propose a new approach that expands the model capacity (i.e., expressive power of distributional family) without sacrificing the computational advantages of the VAE framework. Our VAE model's decoder is composed of an infinite mixture of asymmetric Laplace distribution, which possesses general distribution fitting capabilities for continuous variables. Our model is represented by a special form of a nonparametric M-estimator for estimating general quantile functions, and we theoretically establish the relevance between the proposed model and quantile estimation. We apply the proposed model to synthetic data generation, and particularly, our model demonstrates superiority in easily adjusting the level of data privacy.", "keywords": "Variational AutoEncoder;distributional learning;synthetic data generation;CRPS;asymmetric Laplace distribution", "primary_area": "", "supplementary_material": "/attachment/83f65a05ba212bd200f6c6ef0d9d3d7d6e71373a.zip", "author": "Seunghwan An;Jong-June Jeon", "authorids": "~Seunghwan_An2;~Jong-June_Jeon1", "gender": "M;M", "homepage": "https://sites.google.com/view/an-seunghwan;http://ranking.uos.ac.kr/index_en.html", "dblp": "293/9384;203/0387", "google_scholar": "6eDsapQAAAAJ;https://scholar.google.co.kr/citations?user=A-E3uEMAAAAJ", "orcid": "0000-0002-1891-1174;0000-0002-1423-4292", "linkedin": "seunghwan-an-baa727264;jong-june-jeon-9789b4b3/", "or_profile": "~Seunghwan_An2;~Jong-June_Jeon1", "aff": "University of Seoul;University of Seoul", "aff_domain": "uos.ac.kr;uos.ac.kr", "position": "MS student;Full Professor", "bibtex": "@inproceedings{\nan2023distributional,\ntitle={Distributional Learning of Variational AutoEncoder: Application to Synthetic Data Generation},\nauthor={Seunghwan An and Jong-June Jeon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GxL6PrmEUw}\n}", "github": "", "project": "", "reviewers": "4gp6;B22e;a4Hg", "pdf_size": 817061, "rating": "6;6;6", "confidence": "3;2;2", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;2;3", "wc_summary": "46;64;66", "wc_strengths": "31;29;62", "wc_weaknesses": "22;33;54", "wc_questions": "130;15;8", "wc_limitations": "9;15;12", "wc_review": "238;156;202", "wc_reply_reviewers": "13;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 58.666666666666664, 8.993825042154695 ], "wc_strengths_avg": [ 40.666666666666664, 15.107025591499546 ], "wc_weaknesses_avg": [ 36.333333333333336, 13.27487183449325 ], "wc_questions_avg": [ 51.0, 55.93448548674303 ], "wc_limitations_avg": [ 12.0, 2.449489742783178 ], "wc_review_avg": [ 198.66666666666666, 33.55923452974192 ], "wc_reply_reviewers_avg": [ 4.333333333333333, 6.128258770283412 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=462192602587102827&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "uos.ac.kr;uos.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Seoul", "aff_unique_dep": "", "aff_unique_url": "http://www.useoul.edu", "aff_unique_abbr": "UOS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Functional Renyi Differential Privacy for Generative Modeling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72182", "id": "GzlDKZlwie", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2f9ee101e35b890d9eae79ee27bcd69a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=GzlDKZlwie", "openreview": "https://openreview.net/forum?id=GzlDKZlwie", "poster": "/media/PosterPDFs/NeurIPS%202023/72182.png?t=1701716344.824196", "slides": "https://nips.cc/virtual/2023/poster/72182", "video": "https://nips.cc/virtual/2023/poster/72182", "author_site": "Dihong Jiang, Sun Sun, Yaoliang Yu", "tldr": "", "abstract": "Differential privacy (DP) has emerged as a rigorous notion to quantify data privacy. Subsequently, Renyi differential privacy (RDP) becomes an alternative to the ordinary DP notion in both theoretical and empirical studies, for its convenient compositional rules and flexibility. However, most mechanisms with DP (RDP) guarantees are essentially based on randomizing a fixed, finite-dimensional vector output. In this work, following Hall et al. (2013) we further extend RDP to functional outputs, where the output space can be infinite-dimensional, and develop all necessary tools, *e.g.*, (subsampled) Gaussian mechanism, composition, and post-processing rules, to facilitate its practical adoption. As an illustration, we apply functional RDP (f-RDP) to functions in the reproducing kernel Hilbert space (RKHS) to develop a differentially private generative model (DPGM), where training can be interpreted as iteratively releasing loss functions (in an RKHS) with DP (RDP) guarantees. Empirically, the new training paradigm achieves a significant improvement in privacy-utility trade-off compared to existing alternatives, especially when $\\epsilon=0.2$. Our code is available at https://github.com/dihjiang/DP-kernel.", "keywords": "Renyi differential privacy;RKHS;MMD;Gaussian process;generative model", "primary_area": "", "supplementary_material": "", "author": "Dihong Jiang;Sun Sun;Yaoliang Yu", "authorids": "~Dihong_Jiang1;~Sun_Sun1;~Yaoliang_Yu1", "gender": "M;F;M", "homepage": "https://dihjiang.github.io/;;https://cs.uwaterloo.ca/~y328yu/", "dblp": "234/8064;;90/4989", "google_scholar": "Cen4GYkAAAAJ;2X_jP6kAAAAJ;https://scholar.google.ca/citations?user=zbXIQMsAAAAJ", "orcid": ";;0000-0002-3823-0720", "linkedin": ";;", "or_profile": "~Dihong_Jiang1;~Sun_Sun1;~Yaoliang_Yu1", "aff": "University of Waterloo;National Research Council Canada;University of Waterloo", "aff_domain": "uwaterloo.ca;nrc-cnrc.gc.ca;uwaterloo.ca", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\njiang2023functional,\ntitle={Functional Renyi Differential Privacy for Generative Modeling},\nauthor={Dihong Jiang and Sun Sun and Yaoliang Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=GzlDKZlwie}\n}", "github": "", "project": "", "reviewers": "i7xA;iWpL;1DXa;JRvJ", "pdf_size": 6164719, "rating": "5;5;7;7", "confidence": "4;4;3;3", "soundness": "2;2;4;3", "novelty": "2;2;3;4", "presentation": "2;2;3;3", "wc_summary": "52;44;132;49", "wc_strengths": "38;31;159;59", "wc_weaknesses": "98;89;175;41", "wc_questions": "79;179;655;178", "wc_limitations": "1;2;204;5", "wc_review": "268;345;1325;332", "wc_reply_reviewers": "14;53;267;28", "wc_reply_authors": "0;85;140;5", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 69.25, 36.34126442489309 ], "wc_strengths_avg": [ 71.75, 51.416801728617855 ], "wc_weaknesses_avg": [ 100.75, 48.03319164910864 ], "wc_questions_avg": [ 272.75, 224.39961564138207 ], "wc_limitations_avg": [ 53.0, 87.19231617522269 ], "wc_review_avg": [ 567.5, 438.31295896881716 ], "wc_reply_reviewers_avg": [ 90.5, 102.8554811373706 ], "wc_reply_authors_avg": [ 57.5, 58.36308764964376 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6703189217116160486&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uwaterloo.ca;nrc-cnrc.gc.ca;uwaterloo.ca", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Waterloo;National Research Council Canada", "aff_unique_dep": ";", "aff_unique_url": "https://uwaterloo.ca;https://www.nrc-cnrc.gc.ca", "aff_unique_abbr": "UW;NRC-CNRC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Training Fully Connected Neural Networks is $\\exists\\mathbb{R}$-Complete", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72181", "id": "H15KtcyHvn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/71c31ebf577ffdad5f4a74156daad518-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=H15KtcyHvn", "openreview": "https://openreview.net/forum?id=H15KtcyHvn", "poster": "/media/PosterPDFs/NeurIPS%202023/72181.png?t=1699620024.1074107", "slides": "https://nips.cc/virtual/2023/poster/72181", "video": "https://nips.cc/virtual/2023/poster/72181", "author_site": "Daniel Bertschinger, Christoph Hertrich, Paul Jungeblut, Tillmann Miltzow, Simon Weber", "tldr": "", "abstract": "We consider the algorithmic problem of finding the optimal weights and biases for a two-layer fully connected neural network to fit a given set of data points, also known as empirical risk minimization. We show that the problem is $\\exists\\mathbb{R}$-complete. This complexity class can be defined as the set of algorithmic problems that are polynomial-time equivalent to finding real roots of a multivariate polynomial with integer coefficients. Furthermore, we show that arbitrary algebraic numbers are required as weights to be able to train some instances to optimality, even if all data points are rational. Our result already applies to fully connected instances with two inputs, two outputs, and one hidden layer of ReLU neurons. Thereby, we strengthen a result by Abrahamsen, Kleist and Miltzow [NeurIPS 2021]. A consequence of this is that a combinatorial search algorithm like the one by Arora, Basu, Mianjy and Mukherjee [ICLR 2018] is impossible for networks with more than one output dimension, unless $\\text{NP} = \\exists\\mathbb{R}$.", "keywords": "Neural Network Training;Computational Complexity;Existential Theory of the Reals;Algebraic Universality;Empirical Risk Minimization", "primary_area": "", "supplementary_material": "/attachment/e6c587a239a3cd8b97fb869836518ca256cbdc41.pdf", "author": "Daniel Bertschinger;Christoph Hertrich;Paul Jungeblut;Tillmann Miltzow;Simon Weber", "authorids": "~Daniel_Bertschinger1;~Christoph_Hertrich1;~Paul_Jungeblut1;~Tillmann_Miltzow1;~Simon_Weber2", "gender": ";;;;M", "homepage": ";https://christophhertrich.gitlab.io;https://i11www.iti.kit.edu/en/members/paul_jungeblut/index;https://sites.google.com/view/miltzow/home;https://people.inf.ethz.ch/siweber/", "dblp": "258/5126;234/8939;229/4297;37/8210.html;31/9828", "google_scholar": "https://scholar.google.ch/citations?user=aD2Xm5wAAAAJ;bbMbGU4AAAAJ;https://scholar.google.de/citations?user=e4QMTOQAAAAJ;y8ae9ZYAAAAJ;aJCaNXAAAAAJ", "orcid": ";0000-0001-5646-8567;0000-0001-8241-2102;;0000-0003-1901-3621", "linkedin": "dbertschinger/;;;;", "or_profile": "~Daniel_Bertschinger1;~Christoph_Hertrich1;~Paul_Jungeblut1;~Tillmann_Miltzow1;~Simon_Weber2", "aff": ";London School of Economics and Political Science;Karlsruher Institut f\u00fcr Technologie;Utrecht University;Department of Computer Science, ETHZ - ETH Zurich", "aff_domain": ";lse.ac.uk;kit.edu;uu.nl;inf.ethz.ch", "position": ";Postdoc;PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nbertschinger2023training,\ntitle={Training Fully Connected Neural Networks is \\${\\textbackslash}exists{\\textbackslash}mathbb\\{R\\}\\$-Complete},\nauthor={Daniel Bertschinger and Christoph Hertrich and Paul Jungeblut and Tillmann Miltzow and Simon Weber},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=H15KtcyHvn}\n}", "github": "", "project": "", "reviewers": "2pEa;64NS;QSnu;LVwc", "pdf_size": 425898, "rating": "5;6;6;7", "confidence": "2;4;4;3", "soundness": "3;4;3;3", "novelty": "3;2;3;4", "presentation": "2;4;3;4", "wc_summary": "55;88;29;117", "wc_strengths": "41;106;24;46", "wc_weaknesses": "112;160;328;156", "wc_questions": "2;234;27;78", "wc_limitations": "1;1;38;14", "wc_review": "211;589;446;411", "wc_reply_reviewers": "11;28;19;63", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 72.25, 33.23683950077083 ], "wc_strengths_avg": [ 54.25, 30.97075233183721 ], "wc_weaknesses_avg": [ 189.0, 82.43178998420451 ], "wc_questions_avg": [ 85.25, 90.14259536978065 ], "wc_limitations_avg": [ 13.5, 15.107944929738128 ], "wc_review_avg": [ 414.25, 134.9692094516375 ], "wc_reply_reviewers_avg": [ 30.25, 19.84155991851447 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": ";lse.ac.uk;kit.edu;uu.nl;inf.ethz.ch", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "London School of Economics and Political Science;Karlsruher Institut f\u00fcr Technologie;Utrecht University;ETH Zurich", "aff_unique_dep": ";;;Department of Computer Science", "aff_unique_url": "https://www.lse.ac.uk;https://www.kit.edu;https://www.uu.nl;https://www.ethz.ch", "aff_unique_abbr": "LSE;KIT;UU;ETHZ", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;1;2;3", "aff_country_unique": "United Kingdom;Germany;Netherlands;Switzerland" }, { "title": "CBD: A Certified Backdoor Detector Based on Local Dominant Probability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72180", "id": "H1CQZqpgdQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0fbf046448d7eea18b982001320b9a10-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=H1CQZqpgdQ", "openreview": "https://openreview.net/forum?id=H1CQZqpgdQ", "poster": "/media/PosterPDFs/NeurIPS%202023/72180.png?t=1701802431.7941375", "slides": "https://nips.cc/virtual/2023/poster/72180", "video": "https://nips.cc/virtual/2023/poster/72180", "author_site": "Zhen Xiang, Zidi Xiong, Bo Li", "tldr": "", "abstract": "Backdoor attack is a common threat to deep neural networks. During testing, samples embedded with a backdoor trigger will be misclassified as an adversarial target by a backdoored model, while samples without the backdoor trigger will be correctly classified. In this paper, we present the first certified backdoor detector (CBD), which is based on a novel, adjustable conformal prediction scheme based on our proposed statistic local dominant probability. For any classifier under inspection, CBD provides 1) a detection inference, 2) the condition under which the attacks are guaranteed to be detectable for the same classification domain, and 3) a probabilistic upper bound for the false positive rate. Our theoretical results show that attacks with triggers that are more resilient to test-time noise and have smaller perturbation magnitudes are more likely to be detected with guarantees. Moreover, we conduct extensive experiments on four benchmark datasets considering various backdoor types, such as BadNet, CB, and Blend. CBD achieves comparable or even higher detection accuracy than state-of-the-art detectors, and it in addition provides detection certification. Notably, for backdoor attacks with random perturbation triggers bounded by $\\ell_2\\leq0.75$ which achieves more than 90\\% attack success rate, CBD achieves 100\\% (98\\%), 100\\% (84\\%), 98\\% (98\\%), and 72\\% (40\\%) empirical (certified) detection true positive rates on the four benchmark datasets GTSRB, SVHN, CIFAR-10, and TinyImageNet, respectively, with low false positive rates.", "keywords": "backdoor;Trojan;certification;adversarial learning;deep neural network;conformal prediction", "primary_area": "", "supplementary_material": "/attachment/10e25bbafee9367b503071fb4e9e60e595a6edde.pdf", "author": "Zhen Xiang;Zidi Xiong;Bo Li", "authorids": "~Zhen_Xiang1;~Zidi_Xiong2;~Bo_Li19", "gender": "M;M;F", "homepage": "https://zhenxianglance.github.io/;https://polaris-73.github.io/;http://boli.cs.illinois.edu/", "dblp": "20/2799.html;314/6808;50/3402-26", "google_scholar": "https://scholar.google.com/citations?hl=en;XL6QafwAAAAJ;K8vJkTcAAAAJ", "orcid": ";;", "linkedin": ";https://www.linkedin.com/public-profile/settings;", "or_profile": "~Zhen_Xiang1;~Zidi_Xiong2;~Bo_Li19", "aff": "University of Illinois, Urbana Champaign;Department of Computer Science, University of Illinois at Urbana-Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;cs.illinois.edu;illinois.edu", "position": "Postdoc;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nxiang2023cbd,\ntitle={{CBD}: A Certified Backdoor Detector Based on Local Dominant Probability},\nauthor={Zhen Xiang and Zidi Xiong and Bo Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=H1CQZqpgdQ}\n}", "github": "", "project": "", "reviewers": "MHNa;XxVd;xfbM;fRFm;jLvZ", "pdf_size": 735467, "rating": "5;6;6;7;7", "confidence": "3;3;4;4;3", "soundness": "3;3;3;4;2", "novelty": "3;3;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "121;131;40;201;70", "wc_strengths": "49;50;28;55;44", "wc_weaknesses": "473;12;142;247;23", "wc_questions": "2;121;18;15;285", "wc_limitations": "1;51;15;15;1", "wc_review": "646;365;243;533;423", "wc_reply_reviewers": "68;63;56;58;20", "wc_reply_authors": "49;38;54;29;26", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 112.6, 55.33389557947281 ], "wc_strengths_avg": [ 45.2, 9.282241108697834 ], "wc_weaknesses_avg": [ 179.4, 170.09009377385854 ], "wc_questions_avg": [ 88.2, 107.25931195005869 ], "wc_limitations_avg": [ 16.6, 18.30409790183608 ], "wc_review_avg": [ 442.0, 138.41098222323257 ], "wc_reply_reviewers_avg": [ 53.0, 17.017637908946117 ], "wc_reply_authors_avg": [ 39.2, 10.906878563548783 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3273268353539886, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18012920847091864400&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 9, "email": "illinois.edu;cs.illinois.edu;illinois.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Accelerated Training via Incrementally Growing Neural Networks using Variance Transfer and Learning Rate Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72179", "id": "H1a7bVVnPK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/359ffa88712bd688963a0ca641d8330b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=H1a7bVVnPK", "openreview": "https://openreview.net/forum?id=H1a7bVVnPK", "poster": "/media/PosterPDFs/NeurIPS%202023/72179.png?t=1702000357.1399505", "slides": "https://nips.cc/virtual/2023/poster/72179", "video": "https://nips.cc/virtual/2023/poster/72179", "author_site": "Xin Yuan, Pedro Savarese, Michael Maire", "tldr": "", "abstract": "We develop an approach to efficiently grow neural networks, within which parameterization and optimization strategies are designed by considering their effects on the training dynamics. Unlike existing growing methods, which follow simple replication heuristics or utilize auxiliary gradient-based local optimization, we craft a parameterization scheme which dynamically stabilizes weight, activation, and gradient scaling as the architecture evolves, and maintains the inference functionality of the network. To address the optimization difficulty resulting from imbalanced training effort distributed to subnetworks fading in at different growth phases, we propose a learning rate adaption mechanism that rebalances the gradient contribution of these separate subcomponents. Experiments show that our method achieves comparable or better accuracy than training large fixed-size models, while saving a substantial portion of the original training computation budget. We demonstrate that these gains translate into real wall-clock training speedups.", "keywords": "network growing;efficient network training", "primary_area": "", "supplementary_material": "", "author": "Xin Yuan;Pedro Henrique Pamplona Savarese;Michael Maire", "authorids": "~Xin_Yuan5;~Pedro_Henrique_Pamplona_Savarese1;~Michael_Maire1", "gender": "M;M;M", "homepage": ";https://ttic.uchicago.edu/~savarese/;http://people.cs.uchicago.edu/~mmaire/", "dblp": "78/713-6;;73/1498.html", "google_scholar": "EiD_2e0AAAAJ;;HXowq5YAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Xin_Yuan5;~Pedro_Henrique_Pamplona_Savarese1;~Michael_Maire1", "aff": "University of Chicago;Toyota Technological Institute at Chicago;University of Chicago", "aff_domain": "uchicago.edu;ttic.edu;uchicago.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nyuan2023accelerated,\ntitle={Accelerated Training via Incrementally Growing Neural Networks using Variance Transfer and Learning Rate Adaptation},\nauthor={Xin Yuan and Pedro Henrique Pamplona Savarese and Michael Maire},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=H1a7bVVnPK}\n}", "github": "", "project": "", "reviewers": "j2wb;iBKk;GgUs;Tyrx;8dnv;NBgL", "pdf_size": 3394210, "rating": "5;5;6;6;7;7", "confidence": "4;4;2;3;4;4", "soundness": "3;2;3;3;3;3", "novelty": "3;2;2;3;3;3", "presentation": "3;2;3;3;3;3", "wc_summary": "36;45;126;92;49;46", "wc_strengths": "22;28;155;85;105;91", "wc_weaknesses": "37;300;159;33;54;383", "wc_questions": "14;31;19;197;221;197", "wc_limitations": "60;22;44;39;16;22", "wc_review": "169;426;503;446;445;739", "wc_reply_reviewers": "11;8;0;0;12;26", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "1;1;0;0;1;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.5, 0.7637626158259734 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 65.66666666666667, 32.417416443771224 ], "wc_strengths_avg": [ 81.0, 45.566800780100124 ], "wc_weaknesses_avg": [ 161.0, 136.5149564455607 ], "wc_questions_avg": [ 113.16666666666667, 92.31904221532822 ], "wc_limitations_avg": [ 33.833333333333336, 15.34510417762689 ], "wc_review_avg": [ 454.6666666666667, 166.22140522675036 ], "wc_reply_reviewers_avg": [ 9.5, 8.789197915623474 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16784179330898474094&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "uchicago.edu;ttic.edu;uchicago.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Chicago;Toyota Technological Institute at Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.uchicago.edu;https://www.tti-chicago.org", "aff_unique_abbr": "UChicago;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Tree-Based Diffusion Schr\u00f6dinger Bridge with Applications to Wasserstein Barycenters", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72178", "id": "H2SuXHbFIn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ad08767706825033b99122332293033d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=H2SuXHbFIn", "openreview": "https://openreview.net/forum?id=H2SuXHbFIn", "poster": "/media/PosterPDFs/NeurIPS%202023/72178.png?t=1701781851.1518102", "slides": "https://nips.cc/virtual/2023/poster/72178", "video": "https://nips.cc/virtual/2023/poster/72178", "author_site": "Maxence Noble, Valentin De Bortoli, Arnaud Doucet, Alain Durmus", "tldr": "", "abstract": "Multi-marginal Optimal Transport (mOT), a generalization of OT, aims at minimizing the integral of a cost function with respect to a distribution with some prescribed marginals. In this paper, we consider an entropic version of mOT\n with a tree-structured quadratic cost, i.e., a function that can be written as\n a sum of pairwise cost functions between the nodes of a tree. To address this\n problem, we develop Tree-based Diffusion Schr\\\"odinger Bridge (TreeDSB), an\n extension of the Diffusion Schr\\\"odinger Bridge (DSB) algorithm. TreeDSB\n corresponds to a dynamic and continuous state-space counterpart of the\n multimarginal Sinkhorn algorithm. A notable use case of our methodology is to\n compute Wasserstein barycenters which can be recast as the solution of a mOT\n problem on a star-shaped tree. We demonstrate that our methodology can be applied in high-dimensional settings such as image interpolation and\n Bayesian fusion.", "keywords": "Schr\u00f6dinger bridge;optimal transport;diffusion model;Wasserstein barycenter", "primary_area": "", "supplementary_material": "/attachment/b978ff4245ec616f5708ee95e9df6a74d4f0a316.pdf", "author": "Maxence Noble;Valentin De Bortoli;Arnaud Doucet;Alain Durmus", "authorids": "~Maxence_Noble1;~Valentin_De_Bortoli1;~Arnaud_Doucet2;~Alain_Durmus1", "gender": "M;;;M", "homepage": "https://maxencenoble.github.io/;https://vdeborto.github.io/;https://www.stats.ox.ac.uk/~doucet/;", "dblp": "306/7678;224/9338;68/1628;01/11275", "google_scholar": "4eGHx3gAAAAJ;;W4SZGV8AAAAJ;", "orcid": ";;0000-0002-7662-419X;", "linkedin": "maxence-noble-393588172/;;;", "or_profile": "~Maxence_Noble1;~Valentin_De_Bortoli1;~Arnaud_Doucet2;~Alain_Durmus1", "aff": "\u00c9cole Polytechnique;University of Oxford;University of Oxford;\u00c9cole Polytechnique", "aff_domain": "polytechnique.fr;ox.ac.uk;ox.ac.uk;polytechnique.fr", "position": "PhD student;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nnoble2023treebased,\ntitle={Tree-Based Diffusion Schr\\\"odinger Bridge with Applications to Wasserstein Barycenters},\nauthor={Maxence Noble and Valentin De Bortoli and Arnaud Doucet and Alain Durmus},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=H2SuXHbFIn}\n}", "github": "", "project": "", "reviewers": "GsNC;2tnG;fo2c;XAZX;BvM6", "pdf_size": 6688106, "rating": "4;5;7;7;7", "confidence": "2;3;3;3;3", "soundness": "2;2;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;2;2;3;3", "wc_summary": "57;356;133;60;64", "wc_strengths": "32;69;57;36;42", "wc_weaknesses": "124;333;17;127;30", "wc_questions": "484;126;649;123;37", "wc_limitations": "23;18;1;1;5", "wc_review": "720;902;857;347;178", "wc_reply_reviewers": "11;22;43;11;7", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 1.2649110640673518 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 134.0, 114.53383779477574 ], "wc_strengths_avg": [ 47.2, 13.81882773609976 ], "wc_weaknesses_avg": [ 126.2, 113.09358956191991 ], "wc_questions_avg": [ 283.8, 238.7964823861524 ], "wc_limitations_avg": [ 9.6, 9.156418513807678 ], "wc_review_avg": [ 600.8, 287.6604943331635 ], "wc_reply_reviewers_avg": [ 18.8, 13.090454537562858 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7905694150420949, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11134740102946098816&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "polytechnique.fr;ox.ac.uk;ox.ac.uk;polytechnique.fr", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Ecole Polytechnique;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://www.polytechnique.edu;https://www.ox.ac.uk", "aff_unique_abbr": "X;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "France;United Kingdom" }, { "title": "Lo-Hi: Practical ML Drug Discovery Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73641", "id": "H2Yb28qGLV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cb82f1f97ad0ca1d92df852a44a3bd73-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=H2Yb28qGLV", "openreview": "https://openreview.net/forum?id=H2Yb28qGLV", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73641", "video": "https://nips.cc/virtual/2023/poster/73641", "tldr": "", "abstract": "Finding new drugs is getting harder and harder. One of the hopes of drug discovery is to use machine learning models to predict molecular properties. That is why models for molecular property prediction are being developed and tested on benchmarks such as MoleculeNet. However, existing benchmarks are unrealistic and are too different from applying the models in practice. We have created a new practical \\emph{Lo-Hi} benchmark consisting of two tasks: Lead Optimization (Lo) and Hit Identification (Hi), corresponding to the real drug discovery process. For the Hi task, we designed a novel molecular splitting algorithm that solves the Balanced Vertex Minimum $k$-Cut problem. We tested state-of-the-art and classic ML models, revealing which works better under practical settings. We analyzed modern benchmarks and showed that they are unrealistic and overoptimistic.\n\nReview: https://openreview.net/forum?id=H2Yb28qGLV\n\nLo-Hi benchmark: https://github.com/SteshinSS/lohi_neurips2023\n\nLo-Hi splitter library: https://github.com/SteshinSS/lohi_splitter", "keywords": "Molecular Property Prediction;Benchmark;Hit Identification;Lead Optimization;ML Drug Discovery", "primary_area": "", "supplementary_material": "/attachment/1792178062507c2bcb12135d316262947d168c33.zip", "author": "Simon Steshin", "authorids": "~Simon_Steshin1", "gender": "M", "homepage": "https://twitter.com/ZdarovaAll", "dblp": "", "google_scholar": "BWfi-TMAAAAJ", "orcid": "0000-0002-4896-6379", "linkedin": "simon-steshin-506ab1197/", "or_profile": "~Simon_Steshin1", "aff": "Gero PTE", "aff_domain": "gero.ai", "position": "Researcher", "bibtex": "@inproceedings{\nsteshin2023lohi,\ntitle={Lo-Hi: Practical {ML} Drug Discovery Benchmark},\nauthor={Simon Steshin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=H2Yb28qGLV}\n}", "github": "", "project": "", "reviewers": "jiMQ;H78M;ZG59;igzy", "pdf_size": 907137, "rating": "3;4;7;9", "confidence": "4;3;4;3", "wc_summary_and_contributions": "146;66;79;306", "wc_strengths": "39;56;192;81", "wc_improvement": "202;287;216;151", "wc_limitations": "2;149;23;92", "wc_correctness": "337;71;9;34", "wc_clarity": "23;12;6;44", "wc_relation_to_prior_work": "13;20;31;54", "wc_documentation": "10;22;15;36", "wc_additional_feedback": "1;1;1;1", "wc_review": "773;684;572;799", "wc_reply_reviewers": "0;392;97;18", "wc_reply_authors": "1349;1017;315;733", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;1;1", "rating_avg": [ 5.75, 2.384848003542364 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 149.25, 95.45515963005876 ], "wc_strengths_avg": [ 92.0, 59.63639828158639 ], "wc_improvement_avg": [ 214.0, 48.595267259271246 ], "wc_limitations_avg": [ 66.5, 58.11411188343155 ], "wc_correctness_avg": [ 112.75, 131.3361621945761 ], "wc_clarity_avg": [ 21.25, 14.48059045757458 ], "wc_relation_to_prior_work_avg": [ 29.5, 15.532224567009067 ], "wc_documentation_avg": [ 20.75, 9.781998773256925 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 707.0, 88.84537129192493 ], "wc_reply_reviewers_avg": [ 126.75, 157.42835672139884 ], "wc_reply_authors_avg": [ 853.5, 379.7219377386563 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.3144854510165755, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4750517376753284455&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "gero.ai", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Gero PTE", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "Singapore" }, { "title": "Grassmann Manifold Flows for Stable Shape Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72177", "id": "H2udtfMbl4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e50e253e21cbcdcd200394f61d73acc8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=H2udtfMbl4", "openreview": "https://openreview.net/forum?id=H2udtfMbl4", "poster": "/media/PosterPDFs/NeurIPS%202023/72177.png?t=1702056220.3354926", "slides": "https://nips.cc/virtual/2023/poster/72177", "video": "https://nips.cc/virtual/2023/poster/72177", "author_site": "Ryoma Yataka, Kazuki Hirashima, Masashi Shiraishi", "tldr": "", "abstract": "Recently, studies on machine learning have focused on methods that use symmetry implicit in a specific manifold as an inductive bias.\nGrassmann manifolds provide the ability to handle fundamental shapes represented as shape spaces, enabling stable shape analysis. \nIn this paper, we present a novel approach in which we establish the theoretical foundations for learning distributions on the Grassmann manifold via continuous normalization flows, with the explicit goal of generating stable shapes.\nOur approach facilitates more robust generation by effectively eliminating the influence of extraneous transformations, such as rotations and inversions, through learning and generating within a Grassmann manifold designed to accommodate the essential shape information of the object.\nThe experimental results indicated that the proposed method could generate high-quality samples by capturing the data structure.\nFurthermore, the proposed method significantly outperformed state-of-the-art methods in terms of the log-likelihood or evidence lower bound.\nThe results obtained are expected to stimulate further research in this field, leading to advances for stable shape generation and analysis.", "keywords": "Generative Models;Geometric Deep Learning;Normalizing Flows;Shape Analysis;Grassmann Manifold", "primary_area": "", "supplementary_material": "/attachment/5e42ae9f3615b65e511fa5d262916b91cd076012.pdf", "author": "Ryoma Yataka;Kazuki Hirashima;Masashi Shiraishi", "authorids": "~Ryoma_Yataka1;~Kazuki_Hirashima1;~Masashi_Shiraishi1", "gender": "M;;M", "homepage": ";;", "dblp": "199/9334;294/2318;40/1353", "google_scholar": "MA56tMUAAAAJ;https://scholar.google.co.jp/citations?user=gHD7ArkAAAAJ;", "orcid": "0009-0004-7311-6431;;", "linkedin": "ryoma-yataka-92724328a/;;", "or_profile": "~Ryoma_Yataka1;~Kazuki_Hirashima1;~Masashi_Shiraishi1", "aff": "Mitsubishi Electric Research Labs;Mitsubishi Electric Corporation;", "aff_domain": "merl.com;co.jp;", "position": "Visiting Resarcher;Researcher;", "bibtex": "@inproceedings{\nyataka2023grassmann,\ntitle={Grassmann Manifold Flows for Stable Shape Generation},\nauthor={Ryoma Yataka and Kazuki Hirashima and Masashi Shiraishi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=H2udtfMbl4}\n}", "github": "", "project": "", "reviewers": "5FfM;gPKP;D7Hq;SWNf;ioxc", "pdf_size": 7443095, "rating": "4;4;6;6;8", "confidence": "3;4;3;3;3", "soundness": "2;2;4;3;4", "novelty": "2;2;3;4;4", "presentation": "2;2;3;4;4", "wc_summary": "51;112;118;90;96", "wc_strengths": "40;56;78;162;79", "wc_weaknesses": "158;132;73;133;60", "wc_questions": "225;32;215;54;40", "wc_limitations": "161;13;15;1;73", "wc_review": "635;345;499;440;348", "wc_reply_reviewers": "0;0;94;0;0", "wc_reply_authors": "0;0;37;0;0", "reply_reviewers": "0;0;1;0;0", "reply_authors": "1;1;2;1;1", "rating_avg": [ 5.6, 1.4966629547095764 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 93.4, 23.52530552405218 ], "wc_strengths_avg": [ 83.0, 42.09513035969837 ], "wc_weaknesses_avg": [ 111.2, 37.891423831785474 ], "wc_questions_avg": [ 113.2, 87.54290376723861 ], "wc_limitations_avg": [ 52.6, 59.68450385150236 ], "wc_review_avg": [ 453.4, 107.7935062979213 ], "wc_reply_reviewers_avg": [ 18.8, 37.6 ], "wc_reply_authors_avg": [ 7.4, 14.8 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5345224838248487, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5145426109097907714&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "merl.com;co.jp;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Mitsubishi Electric Research Laboratories;Mitsubishi Electric Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.merl.com;https://www.mitsubishielectric.com", "aff_unique_abbr": "MERL;MEC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Japan" }, { "title": "On the Overlooked Structure of Stochastic Gradients", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72176", "id": "H4GsteoL0M", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d0b2eda0386f477ab14d7e181e16c899-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=H4GsteoL0M", "openreview": "https://openreview.net/forum?id=H4GsteoL0M", "poster": "/media/PosterPDFs/NeurIPS%202023/72176.png?t=1697961781.4571252", "slides": "https://nips.cc/virtual/2023/poster/72176", "video": "https://nips.cc/virtual/2023/poster/72176", "author_site": "Zeke Xie, Qian-Yuan Tang, Mingming Sun, Ping Li", "tldr": "", "abstract": "Stochastic gradients closely relate to both optimization and generalization of deep neural networks (DNNs). Some works attempted to explain the success of stochastic optimization for deep learning by the arguably heavy-tail properties of gradient noise, while other works presented theoretical and empirical evidence against the heavy-tail hypothesis on gradient noise. Unfortunately, formal statistical tests for analyzing the structure and heavy tails of stochastic gradients in deep learning are still under-explored. In this paper, we mainly make two contributions. First, we conduct formal statistical tests on the distribution of stochastic gradients and gradient noise across both parameters and iterations. Our statistical tests reveal that dimension-wise gradients usually exhibit power-law heavy tails, while iteration-wise gradients and stochastic gradient noise caused by minibatch training usually do not exhibit power-law heavy tails. Second, we further discover that the covariance spectra of stochastic gradients have the power-law structures overlooked by previous studies and present its theoretical implications for training of DNNs. While previous studies believed that the anisotropic structure of stochastic gradients matters to deep learning, they did not expect the gradient covariance can have such an elegant mathematical structure. Our work challenges the existing belief and provides novel insights on the structure of stochastic gradients in deep learning.", "keywords": "Gradient Noise;SGD;Deep Learning;Heavy Tails", "primary_area": "", "supplementary_material": "/attachment/6753caa268118d361fcdff9817ce213376f2d016.pdf", "author": "Zeke Xie;Qian-Yuan Tang;Mingming Sun;Ping Li", "authorids": "~Zeke_Xie1;~Qian-Yuan_Tang1;~Mingming_Sun1;~Ping_Li3", "gender": "M;;M;M", "homepage": "https://sites.google.com/view/zeke-xie;;;http://www.stat.rutgers.edu/home/pingli/", "dblp": "210/1039;;87/8665-1.html;62/5860-1", "google_scholar": "https://scholar.google.co.jp/citations?user=ysXmZCMAAAAJ;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Zeke_Xie1;~Qian-Yuan_Tang1;~Mingming_Sun1;~Ping_Li3", "aff": "Baidu;;Baidu;LinkedIn", "aff_domain": "baidu.com;;baidu.com;linkedin.com", "position": "Researcher;;Principal Researcher;Engineer", "bibtex": "@inproceedings{\nxie2023on,\ntitle={On the Overlooked Structure of Stochastic Gradients},\nauthor={Zeke Xie and Qian-Yuan Tang and Mingming Sun and Ping Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=H4GsteoL0M}\n}", "github": "", "project": "", "reviewers": "8wue;AgKN;tTSf;HTRt;jrig", "pdf_size": 1458029, "rating": "5;6;6;6;7", "confidence": "4;3;3;4;4", "soundness": "3;3;3;3;2", "novelty": "3;2;3;3;4", "presentation": "3;3;3;3;3", "wc_summary": "42;95;107;55;68", "wc_strengths": "5;75;50;66;49", "wc_weaknesses": "221;1;201;46;680", "wc_questions": "1;74;47;28;28", "wc_limitations": "1;1;10;22;32", "wc_review": "270;246;415;217;857", "wc_reply_reviewers": "0;9;26;0;158", "wc_reply_authors": "45;0;0;0;0", "reply_reviewers": "0;1;1;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.4, 24.286621831782202 ], "wc_strengths_avg": [ 49.0, 24.091492274244864 ], "wc_weaknesses_avg": [ 229.8, 240.71510131273442 ], "wc_questions_avg": [ 35.6, 24.154502685834792 ], "wc_limitations_avg": [ 13.2, 12.155657119218196 ], "wc_review_avg": [ 401.0, 237.9806714840514 ], "wc_reply_reviewers_avg": [ 38.6, 60.45031017290152 ], "wc_reply_authors_avg": [ 9.0, 18.0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12660976260603781601&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "baidu.com;;baidu.com;linkedin.com", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Baidu;LinkedIn Corporation", "aff_unique_dep": "Baidu, Inc.;", "aff_unique_url": "https://www.baidu.com;https://www.linkedin.com", "aff_unique_abbr": "Baidu;LinkedIn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "Facilitating Graph Neural Networks with Random Walk on Simplicial Complexes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72175", "id": "H57w5EOj6O", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/345208bdbbb6104616311dfc1d093fe7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=H57w5EOj6O", "openreview": "https://openreview.net/forum?id=H57w5EOj6O", "poster": "/media/PosterPDFs/NeurIPS%202023/72175.png?t=1699804198.1994004", "slides": "https://nips.cc/virtual/2023/poster/72175", "video": "https://nips.cc/virtual/2023/poster/72175", "author_site": "Cai Zhou, Xiyuan Wang, Muhan Zhang", "tldr": "", "abstract": "Node-level random walk has been widely used to improve Graph Neural Networks. However, there is limited attention to random walk on edge and, more generally, on $k$-simplices. This paper systematically analyzes how random walk on different orders of simplicial complexes (SC) facilitates GNNs in their theoretical expressivity. First, on $0$-simplices or node level, we establish a connection between existing positional encoding (PE) and structure encoding (SE) methods through the bridge of random walk. Second, on $1$-simplices or edge level, we bridge edge-level random walk and Hodge $1$-Laplacians and design corresponding edge PE respectively. In spatial domain, we directly make use of edge level random walk to construct EdgeRWSE. Based on spectral analysis of Hodge $1$-Laplcians, we propose Hodge1Lap, a permutation equivariant and expressive edge-level positional encoding. Third, we generalize our theory to random walk on higher-order simplices and propose the general principle to design PE on simplices based on random walk and Hodge Laplacians. Inter-level random walk is also introduced to unify a wide range of simplicial networks. Extensive experiments verify the effectiveness of our random walk-based methods.", "keywords": "random walk on simplicials;Hodge Laplacian;graph neural networks;edge-level positional encoding", "primary_area": "", "supplementary_material": "", "author": "Cai Zhou;Xiyuan Wang;Muhan Zhang", "authorids": "~Cai_Zhou2;~Xiyuan_Wang1;~Muhan_Zhang1", "gender": "M;;M", "homepage": "http://homepage.zhouc.ai/;;https://muhanzhang.github.io/", "dblp": "235/4068;95/8542;157/5518", "google_scholar": "3Pd27KUAAAAJ;;https://scholar.google.com.hk/citations?user=OBBqkosAAAAJ", "orcid": ";;0000-0002-7680-6401", "linkedin": ";%E5%B8%8C%E5%85%83-%E7%8E%8B-969660221/;jerry-muhan-zhang-a33a1777/", "or_profile": "~Cai_Zhou2;~Xiyuan_Wang1;~Muhan_Zhang1", "aff": "Tsinghua University;Peking University;Peking University", "aff_domain": "tsinghua.edu.cn;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhou2023facilitating,\ntitle={Facilitating Graph Neural Networks with Random Walk on Simplicial Complexes},\nauthor={Cai Zhou and Xiyuan Wang and Muhan Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=H57w5EOj6O}\n}", "github": "", "project": "", "reviewers": "j6x1;1k26;kVcw;Djvd;r8fz", "pdf_size": 755385, "rating": "5;5;6;6;7", "confidence": "2;2;3;3;3", "soundness": "3;3;3;3;4", "novelty": "3;2;3;3;3", "presentation": "1;2;3;2;2", "wc_summary": "72;128;134;298;64", "wc_strengths": "74;30;60;170;38", "wc_weaknesses": "224;119;146;357;88", "wc_questions": "38;206;60;157;205", "wc_limitations": "38;31;14;137;11", "wc_review": "446;514;414;1119;406", "wc_reply_reviewers": "34;20;57;159;792", "wc_reply_authors": "68;176;63;223;1106", "reply_reviewers": "1;1;1;2;3", "reply_authors": "2;4;3;4;3", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.0, 0.6324555320336759 ], "wc_summary_avg": [ 139.2, 84.30990451898283 ], "wc_strengths_avg": [ 74.4, 50.285584415416714 ], "wc_weaknesses_avg": [ 186.8, 96.30659375141455 ], "wc_questions_avg": [ 133.2, 71.33414329758226 ], "wc_limitations_avg": [ 46.2, 46.51623372544256 ], "wc_review_avg": [ 579.8, 272.2736858383491 ], "wc_reply_reviewers_avg": [ 212.4, 293.86432243469096 ], "wc_reply_authors_avg": [ 327.2, 394.26762484383624 ], "reply_reviewers_avg": [ 1.6, 0.8 ], "reply_authors_avg": [ 3.2, 0.7483314773547882 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8728715609439696, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10969585141481732711&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Tsinghua University;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "THU;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Failure-Aware Gaussian Process Optimization with Regret Bounds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72174", "id": "H5pwAeYAun", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4ccf72339d1f650cb898c55dccbc5cda-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=H5pwAeYAun", "openreview": "https://openreview.net/forum?id=H5pwAeYAun", "poster": "/media/PosterPDFs/NeurIPS%202023/72174.png?t=1701876056.6085045", "slides": "https://nips.cc/virtual/2023/poster/72174", "video": "https://nips.cc/virtual/2023/poster/72174", "author_site": "Shogo Iwazaki, Shion Takeno, Tomohiko Tanabe, Mitsuru Irie", "tldr": "", "abstract": "Real-world optimization problems often require black-box optimization with observation failure, where we can obtain the objective function value if we succeed, otherwise, we can only obtain a fact of failure. Moreover, this failure region can be complex by several latent constraints, whose number is also unknown. For this problem, we propose a failure-aware Gaussian process upper confidence bound (F-GP-UCB), which only requires a mild assumption for the observation failure that an optimal solution lies on an interior of a feasible region. Furthermore, we show that the number of successful observations grows linearly, by which we provide the first regret upper bounds and the convergence of F-GP-UCB. We demonstrate the effectiveness of F-GP-UCB in several benchmark functions, including the simulation function motivated by material synthesis experiments.", "keywords": "Gaussian process optimization;regret analysis;black-box optimization;Bayesian optimization", "primary_area": "", "supplementary_material": "/attachment/6a921b73e99dc716aa44d3dfb5d9dd6f8e6574d7.pdf", "author": "Shogo Iwazaki;Shion Takeno;Tomohiko Tanabe;Mitsuru Irie", "authorids": "~Shogo_Iwazaki1;~Shion_Takeno1;~Tomohiko_Tanabe1;irie@mi-6.co.jp", "gender": "M;M;M;", "homepage": ";https://takeno1995.github.io/myhomepage/;;", "dblp": "251/9091;;;", "google_scholar": ";https://scholar.google.co.jp/citations?user=oGaC1SgAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;0000-0003-1990-191X;", "linkedin": "shogo-iwazaki-0692a1185/;;;", "or_profile": "~Shogo_Iwazaki1;~Shion_Takeno1;~Tomohiko_Tanabe1;irie@mi-6.co.jp", "aff": "LY Corporation;Nagoya Institute of Technology;MI-6 Ltd.;", "aff_domain": "lycorp.co.jp;nitech.ac.jp;mi-6.co.jp;", "position": "Researcher;PhD student;Researcher;", "bibtex": "@inproceedings{\niwazaki2023failureaware,\ntitle={Failure-Aware Gaussian Process Optimization with Regret Bounds},\nauthor={Shogo Iwazaki and Shion Takeno and Tomohiko Tanabe and Mitsuru Irie},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=H5pwAeYAun}\n}", "github": "", "project": "", "reviewers": "Mr4k;ven9;GLK5;Yqeo", "pdf_size": 722113, "rating": "5;6;6;7", "confidence": "3;4;5;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "2;3;3;2", "wc_summary": "84;57;45;101", "wc_strengths": "45;21;29;242", "wc_weaknesses": "179;61;59;758", "wc_questions": "136;25;54;300", "wc_limitations": "64;7;1;29", "wc_review": "508;171;188;1430", "wc_reply_reviewers": "72;10;0;51", "wc_reply_authors": "164;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 71.75, 22.01561945528674 ], "wc_strengths_avg": [ 84.25, 91.48599619613923 ], "wc_weaknesses_avg": [ 264.25, 289.1776054607272 ], "wc_questions_avg": [ 128.75, 106.9237461932568 ], "wc_limitations_avg": [ 25.25, 24.681724007856502 ], "wc_review_avg": [ 574.25, 511.9806514898781 ], "wc_reply_reviewers_avg": [ 33.25, 29.422567868899545 ], "wc_reply_authors_avg": [ 41.0, 71.01408311032397 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15757563507840893345&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "lycorp.co.jp;nitech.ac.jp;mi-6.co.jp;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "LY Corporation;Nagoya Institute of Technology;MI-6", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.nitech.ac.jp;https://www.mi6.gov.uk", "aff_unique_abbr": ";NIT;MI-6", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;2", "aff_country_unique": ";Japan;United Kingdom" }, { "title": "Training Transformers with 4-bit Integers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72173", "id": "H9hWlfMT6O", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/99fc8bc48b917c301a80cb74d91c0c06-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=H9hWlfMT6O", "openreview": "https://openreview.net/forum?id=H9hWlfMT6O", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72173", "video": "https://nips.cc/virtual/2023/poster/72173", "author_site": "Haocheng Xi, ChangHao Li, Jianfei Chen, Jun Zhu", "tldr": "", "abstract": "Quantizing the activation, weight, and gradient to 4-bit is promising to accelerate neural network training. However, existing 4-bit training methods require custom numerical formats which are not supported by contemporary hardware. In this work, we propose a training method for transformers with all matrix multiplications implemented with the INT4 arithmetic. \nTraining with an ultra-low INT4 precision is challenging. To achieve this, we carefully analyze the specific structures of activation and gradients in transformers to propose dedicated quantizers for them. For forward propagation, we identify the challenge of outliers and propose a Hadamard quantizer to suppress the outliers. For backpropagation, we leverage the structural sparsity of gradients by proposing bit splitting and leverage score sampling techniques to quantize gradients accurately. Our algorithm achieves competitive accuracy on a wide range of tasks including natural language understanding, machine translation, and image classification. Unlike previous 4-bit training methods, our algorithm can be implemented on the current generation of GPUs. Our prototypical linear operator implementation is up to 2.2 times faster than the FP16 counterparts and speeds up the training by 17.8\\% on average for sufficiently large models. Our code is available at https://github.com/xijiu9/Train\\_Transformers\\_with\\_INT4.", "keywords": "neural network quantization;transformer;matrix multiplication;randomized numerical linear algebra", "primary_area": "", "supplementary_material": "/attachment/c8b3aca4259e51bf70f548ffceee83e338e96fbb.zip", "author": "Haocheng Xi;ChangHao Li;Jianfei Chen;Jun Zhu", "authorids": "~Haocheng_Xi1;~ChangHao_Li2;~Jianfei_Chen1;~Jun_Zhu2", "gender": "M;M;M;M", "homepage": "https://haochengxi.github.io/;http://ml.cs.tsinghua.edu.cn/~jianfei;http://ml.cs.tsinghua.edu.cn/~jun;https://lichangh20.github.io/", "dblp": "349/7931;48/6809-1;50/2644-1;", "google_scholar": "klZ2MMcAAAAJ;di5RZ1MAAAAJ;axsP38wAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": "haocheng-xi-412511323/;;;", "or_profile": "~Haocheng_Xi1;~Jianfei_Chen1;~Jun_Zhu2;~ChangHao_Li3", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": "Undergrad student;Assistant Professor;Professor;Undergrad student", "bibtex": "@inproceedings{\nxi2023training,\ntitle={Training Transformers with 4-bit Integers},\nauthor={Haocheng Xi and ChangHao Li and Jianfei Chen and Jun Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=H9hWlfMT6O}\n}", "github": "", "project": "", "reviewers": "XvWB;7XeV;au4S;jhrg", "pdf_size": 3929363, "rating": "4;5;6;6", "confidence": "4;4;3;5", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;4;3", "wc_summary": "62;205;53;56", "wc_strengths": "37;98;39;69", "wc_weaknesses": "168;304;65;76", "wc_questions": "129;158;27;4", "wc_limitations": "1;73;18;1", "wc_review": "397;838;202;206", "wc_reply_reviewers": "101;24;0;7", "wc_reply_authors": "717;0;124;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 94.0, 64.16774890862231 ], "wc_strengths_avg": [ 60.75, 24.963723680572976 ], "wc_weaknesses_avg": [ 153.25, 95.7845890527281 ], "wc_questions_avg": [ 79.5, 65.32419153728578 ], "wc_limitations_avg": [ 23.25, 29.549746191803408 ], "wc_review_avg": [ 410.75, 258.9549912629606 ], "wc_reply_reviewers_avg": [ 33.0, 40.21815510438041 ], "wc_reply_authors_avg": [ 210.25, 296.9194966653419 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11490923679986115581&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "On quantum backpropagation, information reuse, and cheating measurement collapse", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72172", "id": "HF6bnhfSqH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8c3caae2f725c8e2a55ecd600563d172-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HF6bnhfSqH", "openreview": "https://openreview.net/forum?id=HF6bnhfSqH", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72172", "video": "https://nips.cc/virtual/2023/poster/72172", "author_site": "Amira Abbas, Robbie King, Hsin-Yuan Huang, William J. Huggins, Ramis Movassagh, Dar Gilboa, Jarrod McClean", "tldr": "", "abstract": "The success of modern deep learning hinges on the ability to train neural networks at scale. Through clever reuse of intermediate information, backpropagation facilitates training through gradient computation at a total cost roughly proportional to running the function, rather than incurring an additional factor proportional to the number of parameters -- which can now be in the trillions. Naively, one expects that quantum measurement collapse entirely rules out the reuse of quantum information as in backpropagation. But recent developments in shadow tomography, which assumes access to multiple copies of a quantum state, have challenged that notion. Here, we investigate whether parameterized quantum models can train as efficiently as classical neural networks. We show that achieving backpropagation scaling is impossible without access to multiple copies of a state. With this added ability, we introduce an algorithm with foundations in shadow tomography that matches backpropagation scaling in quantum resources while reducing classical auxiliary computational costs to open problems in shadow tomography. These results highlight the nuance of reusing quantum information for practical purposes and clarify the unique difficulties in training large quantum models, which could alter the course of quantum machine learning.", "keywords": "Backpropagation;quantum computing;shadow tomography;gentle measurement", "primary_area": "", "supplementary_material": "/attachment/af59182f9361b6de148b85153773b5f2b8c86dba.pdf", "author": "Amira Abbas;Robbie King;Hsin-Yuan Huang;William J. Huggins;Ramis Movassagh;Dar Gilboa;Jarrod Ryan McClean", "authorids": "~Amira_Abbas1;~Robbie_King1;~Hsin-Yuan_Huang1;whuggins@google.com;movassagh@google.com;darg@google.com;~Jarrod_Ryan_McClean1", "gender": "F;M;M;;;;M", "homepage": ";https://www.robbieking.net/;https://hsinyuan-huang.github.io/;;;;https://jarrodmcclean.com", "dblp": "268/8404;;97/2540;;;;217/3528", "google_scholar": "https://scholar.google.co.uk/citations?user=-v3wO_UAAAAJ;03PDREcAAAAJ;2y5YF-gAAAAJ;;;;yVy2ZIwAAAAJ", "orcid": "0000-0003-3383-2287;;;;;;", "linkedin": "amira-abbas/;;;;;;", "or_profile": "~Amira_Abbas1;~Robbie_King1;~Hsin-Yuan_Huang1;whuggins@google.com;movassagh@google.com;darg@google.com;~Jarrod_Ryan_McClean1", "aff": "University of KwaZulu-Natal;California Institute of Technology;California Institute of Technology;;;;Research, Google", "aff_domain": "ukzn.ac.za;caltech.edu;caltech.edu;;;;research.google.com", "position": "PhD student;PhD student;PhD student;;;;Researcher", "bibtex": "@inproceedings{\nabbas2023on,\ntitle={On quantum backpropagation, information reuse, and cheating measurement collapse},\nauthor={Amira Abbas and Robbie King and Hsin-Yuan Huang and William J. Huggins and Ramis Movassagh and Dar Gilboa and Jarrod Ryan McClean},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HF6bnhfSqH}\n}", "github": "", "project": "", "reviewers": "efhf;vTRX;mok7;Hoi6", "pdf_size": 575354, "rating": "5;6;6;7", "confidence": "3;3;3;4", "soundness": "3;3;3;4", "novelty": "3;3;2;4", "presentation": "2;3;2;3", "wc_summary": "86;74;145;36", "wc_strengths": "35;76;40;46", "wc_weaknesses": "62;73;149;38", "wc_questions": "134;71;102;35", "wc_limitations": "0;1;24;16", "wc_review": "317;295;460;171", "wc_reply_reviewers": "10;16;11;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 85.25, 39.124001584705006 ], "wc_strengths_avg": [ 49.25, 15.927570436196476 ], "wc_weaknesses_avg": [ 80.5, 41.52408939398912 ], "wc_questions_avg": [ 85.5, 36.69127961791467 ], "wc_limitations_avg": [ 10.25, 10.158124826955023 ], "wc_review_avg": [ 310.75, 102.58258867858619 ], "wc_reply_reviewers_avg": [ 9.25, 5.80409338312195 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2244867526743060454&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ukzn.ac.za;caltech.edu;caltech.edu;;;;research.google.com", "author_num": 7, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of KwaZulu-Natal;California Institute of Technology;Google", "aff_unique_dep": ";;Google Research", "aff_unique_url": "https://ukzn.ac.za;https://www.caltech.edu;https://research.google", "aff_unique_abbr": "UKZN;Caltech;Google", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Pasadena;Mountain View", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "South Africa;United States" }, { "title": "ResMem: Learn what you can and memorize the rest", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72171", "id": "HFQFAyNucq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bf0857cb9a41c73639f028a80301cdf0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HFQFAyNucq", "openreview": "https://openreview.net/forum?id=HFQFAyNucq", "poster": "/media/PosterPDFs/NeurIPS%202023/72171.png?t=1702145908.7032263", "slides": "https://nips.cc/virtual/2023/poster/72171", "video": "https://nips.cc/virtual/2023/poster/72171", "author_site": "Zitong Yang, MICHAL LUKASIK, Vaishnavh Nagarajan, Zonglin Li, Ankit Rawat, Manzil Zaheer, Aditya Menon, Sanjiv Kumar", "tldr": "", "abstract": "The impressive generalization performance of modern neural networks is attributed in part to their ability to implicitly memorize complex training patterns.\nInspired by this, we explore a novel mechanism to improve model generalization via explicit memorization.\nSpecifically, we propose the residual-memorization (ResMem) algorithm, a new method that augments an existing prediction model (e.g., a neural network) by fitting the model's residuals with a nearest-neighbor based regressor.\nThe final prediction is then the sum of the original model and the fitted residual regressor.\nBy construction, ResMem can explicitly memorize the training labels.\nWe start by formulating a stylized linear regression problem and rigorously show that ResMem results in a more favorable test risk over a base linear neural network.\nThen, we empirically show that ResMem consistently improves the test set generalization of the original prediction model across standard vision and natural language processing benchmarks.", "keywords": "deep learning;generalization;memorization;deep learning theory;boosting;nearest neighbor", "primary_area": "", "supplementary_material": "/attachment/be7ec4e26cd8bdc14ec61e65df1c83e134adec1c.pdf", "author": "Zitong Yang;Michal Lukasik;Vaishnavh Nagarajan;Zonglin Li;Ankit Singh Rawat;Manzil Zaheer;Aditya Krishna Menon;Sanjiv Kumar", "authorids": "~Zitong_Yang1;~Michal_Lukasik1;~Vaishnavh_Nagarajan3;~Zonglin_Li2;~Ankit_Singh_Rawat1;~Manzil_Zaheer1;~Aditya_Krishna_Menon1;~Sanjiv_Kumar1", "gender": ";;M;M;M;;M;M", "homepage": "https://zitongyang.github.io/;https://mlukasik.github.io/;;https://ankitsrawat.github.io/home/;https://www.aclweb.org/anthology/people/m/manzil-zaheer/;http://www.sanjivk.com/;https://akmenon.github.io/;https://vaishnavh.github.io/", "dblp": "242/7793.html;72/11338;142/9188;https://dblp.org/pers/hd/r/Rawat:Ankit_Singh;40/10701;;89/3514;161/0079", "google_scholar": "ZqGaKSgAAAAJ;https://scholar.google.co.uk/citations?user=cLZLZCQAAAAJ;;http://scholar.google.com/citations?user=U0_ab4cAAAAJ;A33FhJMAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.nl/citations?user=LrsjJfwAAAAJ", "orcid": ";;;;;;;", "linkedin": "zitongyang/;;lizonglin;;;;;", "or_profile": "~Zitong_Yang1;~Michal_Lukasik1;~Zonglin_Li2;~Ankit_Singh_Rawat1;~Manzil_Zaheer1;~Sanjiv_Kumar1;~Aditya_Menon1;~Vaishnavh_Nagarajan1", "aff": "Stanford University;Google Research;Google;Google;Google DeepMind;Google;Google;Google", "aff_domain": "stanford.edu;google.com;google.com;google.com;deepmind.com;google.com;google.com;google.com", "position": "PhD student;Research Scientist;Researcher;Research Scientist;Researcher;Research Scientist;Research Scientist;Researcher", "bibtex": "@inproceedings{\nyang2023resmem,\ntitle={ResMem: Learn what you can and memorize the rest},\nauthor={Zitong Yang and Michal Lukasik and Vaishnavh Nagarajan and Zonglin Li and Ankit Singh Rawat and Manzil Zaheer and Aditya Krishna Menon and Sanjiv Kumar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HFQFAyNucq}\n}", "github": "", "project": "", "reviewers": "diiC;Kq4J;M8UD;1sN3", "pdf_size": 1237830, "rating": "5;6;6;6", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;4;3;3", "wc_summary": "60;65;163;78", "wc_strengths": "21;59;94;31", "wc_weaknesses": "194;129;280;135", "wc_questions": "24;161;106;218", "wc_limitations": "28;5;22;3", "wc_review": "327;419;665;465", "wc_reply_reviewers": "0;132;96;15", "wc_reply_authors": "0;416;65;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;3;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 91.5, 41.80011961705373 ], "wc_strengths_avg": [ 51.25, 28.340562803162538 ], "wc_weaknesses_avg": [ 184.5, 60.70625997374571 ], "wc_questions_avg": [ 127.25, 71.56596607326698 ], "wc_limitations_avg": [ 14.5, 10.735455276791944 ], "wc_review_avg": [ 469.0, 123.58802530989804 ], "wc_reply_reviewers_avg": [ 60.75, 55.00624964492671 ], "wc_reply_authors_avg": [ 120.25, 172.80100549475978 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14911861094894489048&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "stanford.edu;google.com;google.com;google.com;deepmind.com;google.com;google.com;google.com", "author_num": 8, "aff_unique_index": "0;1;1;1;1;1;1;1", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.stanford.edu;https://research.google", "aff_unique_abbr": "Stanford;Google Research", "aff_campus_unique_index": "0;1;1;1;1;1;1", "aff_campus_unique": "Stanford;Mountain View;", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Aligning Language Models with Human Preferences via a Bayesian Approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72170", "id": "HGFcM3UU50", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/99b419554537c66bf27e5eb7a74c7de4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HGFcM3UU50", "openreview": "https://openreview.net/forum?id=HGFcM3UU50", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72170", "video": "https://nips.cc/virtual/2023/poster/72170", "author_site": "Jiashuo WANG, Haozhao Wang, Shichao Sun, Wenjie Li", "tldr": "", "abstract": "In the quest to advance human-centric natural language generation (NLG) systems, ensuring alignment between NLG models and human preferences is crucial. For this alignment, current popular methods leverage a reinforcement learning (RL) approach with a reward model trained on feedback from humans. However, inherent disagreements due to the subjective nature of human preferences pose a significant challenge for training the reward model, resulting in a deterioration of the NLG performance. To tackle this issue, previous approaches typically rely on majority voting or averaging to consolidate multiple inconsistent preferences into a merged one. Although straightforward to understand and execute, such methods suffer from an inability to capture the nuanced degrees of disaggregation among humans and may only represent a specialized subset of individuals, thereby lacking the ability to quantitatively disclose the universality of human preferences. To address this challenge, this paper proposes a novel approach, which employs a Bayesian framework to account for the distribution of disagreements among human preferences as training a preference model, and names it as $\\textbf{d-PM}$. \nBesides, considering the RL strategy's inefficient and complex training process over the training efficiency, we further propose utilizing the contrastive learning strategy to train the NLG model with the preference scores derived from the d-PM model. Extensive experiments on two human-centric NLG tasks, i.e., emotional support conversation and integrity ``Rule-of-Thumb'' generation, show that our method consistently exceeds previous SOTA models in both automatic and human evaluations.", "keywords": "Aligned Models; Human-centric NLG", "primary_area": "", "supplementary_material": "/attachment/16bc8d446e85d367ebbeefdcfc399e0ef9dcbf54.zip", "author": "Jiashuo WANG;Haozhao Wang;Shichao Sun;Wenjie Li", "authorids": "~Jiashuo_WANG1;~Haozhao_Wang1;~Shichao_Sun1;~Wenjie_Li1", "gender": "F;M;M;F", "homepage": "http://www4.comp.polyu.edu.hk/~csjwang/;https://wanghaozhao.mysxl.cn/;https://shichaosun.github.io;https://web.comp.polyu.edu.hk/cswjli/", "dblp": "204/7570;224/4500.html;;33/3999-2.html", "google_scholar": "uklMlHkAAAAJ;https://scholar.google.com.hk/citations?user=yFrOuMEAAAAJ;https://scholar.google.com/citations?hl=en;Rx5swD4AAAAJ", "orcid": "0000-0002-8254-8138;0000-0002-7591-5315;;0000-0002-7360-8864", "linkedin": ";;;", "or_profile": "~Jiashuo_WANG1;~Haozhao_Wang1;~Shichao_Sun1;~Wenjie_Li1", "aff": "The Hong Kong Polytechnic University, Hong Kong Polytechnic University;Huazhong University of Science and Technology;The Hong Kong Polytechnic University;The Hong Kong Polytechnic University, The Hong Kong Polytechnic University", "aff_domain": "comp.polyu.edu.hk;hust.edu.cn;polyu.edu.hk;comp.polyu.edu.hk", "position": "PhD student;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2023aligning,\ntitle={Aligning Language Models with Human Preferences via a Bayesian Approach},\nauthor={Jiashuo WANG and Haozhao Wang and Shichao Sun and Wenjie Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HGFcM3UU50}\n}", "github": "", "project": "", "reviewers": "mWz9;VnTR;otcY;Ru2e", "pdf_size": 3730679, "rating": "4;4;6;6", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "50;74;129;59", "wc_strengths": "83;57;242;40", "wc_weaknesses": "202;781;271;163", "wc_questions": "35;112;92;150", "wc_limitations": "1;117;2;1", "wc_review": "371;1141;736;413", "wc_reply_reviewers": "0;612;72;0", "wc_reply_authors": "268;1436;759;0", "reply_reviewers": "0;3;1;0", "reply_authors": "2;5;3;1", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 78.0, 30.667572450391308 ], "wc_strengths_avg": [ 105.5, 80.28231436624134 ], "wc_weaknesses_avg": [ 354.25, 249.40065657491763 ], "wc_questions_avg": [ 97.25, 41.541395017500314 ], "wc_limitations_avg": [ 30.25, 50.08679965819338 ], "wc_review_avg": [ 665.25, 308.8514003529853 ], "wc_reply_reviewers_avg": [ 171.0, 256.30255558616653 ], "wc_reply_authors_avg": [ 615.75, 546.2162460967268 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14556288232479508404&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "comp.polyu.edu.hk;hust.edu.cn;polyu.edu.hk;comp.polyu.edu.hk", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Hong Kong Polytechnic University;Huazhong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.polyu.edu.hk;http://www.hust.edu.cn", "aff_unique_abbr": "PolyU;HUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Embedding Space Interpolation Beyond Mini-Batch, Beyond Pairs and Beyond Examples", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72169", "id": "HKueO74ZTB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c3532dd633e600e9f6db57aa7ae0c858-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HKueO74ZTB", "openreview": "https://openreview.net/forum?id=HKueO74ZTB", "poster": "/media/PosterPDFs/NeurIPS%202023/72169.png?t=1701380284.669163", "slides": "https://nips.cc/virtual/2023/poster/72169", "video": "https://nips.cc/virtual/2023/poster/72169", "author_site": "Shashanka Venkataramanan, Ewa Kijak, laurent amsaleg, Yannis Avrithis", "tldr": "", "abstract": "Mixup refers to interpolation-based data augmentation, originally motivated as a way to go beyond empirical risk minimization (ERM). Its extensions mostly focus on the definition of interpolation and the space (input or feature) where it takes place, while the augmentation process itself is less studied. In most methods, the number of generated examples is limited to the mini-batch size and the number of examples being interpolated is limited to two (pairs), in the input space.\n\nWe make progress in this direction by introducing MultiMix, which generates an arbitrarily large number of interpolated examples beyond the mini-batch size and interpolates the entire mini-batch in the embedding space. Effectively, we sample on the entire convex hull of the mini-batch rather than along linear segments between pairs of examples.\n\nOn sequence data, we further extend to Dense MultiMix. We densely interpolate features and target labels at each spatial location and also apply the loss densely. To mitigate the lack of dense labels, we inherit labels from examples and weight interpolation factors by attention as a measure of confidence.\n\nOverall, we increase the number of loss terms per mini-batch by orders of magnitude at little additional cost. This is only possible because of interpolating in the embedding space. We empirically show that our solutions yield significant improvement over state-of-the-art mixup methods on four different benchmarks, despite interpolation being only linear. By analyzing the embedding space, we show that the classes are more tightly clustered and uniformly spread over the embedding space, thereby explaining the improved behavior.", "keywords": "Interpolation based data augmentation;mixup;dense interpolation;robustness;representation learning", "primary_area": "", "supplementary_material": "/attachment/994a2cb2f6174f7d3a8758a60e2aa21e64a7d986.pdf", "author": "Shashanka Venkataramanan;Ewa Kijak;laurent amsaleg;Yannis Avrithis", "authorids": "~Shashanka_Venkataramanan2;~Ewa_Kijak1;~laurent_amsaleg1;~Yannis_Avrithis2", "gender": "M;;;", "homepage": "https://shashankvkt.github.io/;;;https://avrithis.net/", "dblp": "218/8893;;a/LAmsaleg;a/YSAvrithis", "google_scholar": "CbfH47IAAAAJ;;;AF2SxG0AAAAJ", "orcid": ";;;0000-0001-7476-4482", "linkedin": "shashank-venkataramanan-1b2b9993/;;;yannisavrithis/", "or_profile": "~Shashanka_Venkataramanan2;~Ewa_Kijak1;~laurent_amsaleg1;~Yannis_Avrithis2", "aff": "INRIA;;IRISA;IARAI", "aff_domain": "inria.fr;;irisa.fr;iarai.ac.at", "position": "PhD student;;researcher;Principal Investigator", "bibtex": "@inproceedings{\nvenkataramanan2023embedding,\ntitle={Embedding Space Interpolation Beyond Mini-Batch, Beyond Pairs and Beyond Examples},\nauthor={Shashanka Venkataramanan and Ewa Kijak and laurent amsaleg and Yannis Avrithis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HKueO74ZTB}\n}", "github": "", "project": "", "reviewers": "Hzfq;6vc1;SS7F;Xd45", "pdf_size": 1764779, "rating": "4;5;6;6", "confidence": "4;4;3;3", "soundness": "2;3;4;3", "novelty": "2;2;3;2", "presentation": "2;3;4;4", "wc_summary": "53;60;254;103", "wc_strengths": "23;142;199;162", "wc_weaknesses": "93;434;57;501", "wc_questions": "89;311;200;5", "wc_limitations": "12;92;1;16", "wc_review": "270;1039;711;787", "wc_reply_reviewers": "0;205;555;43", "wc_reply_authors": "0;42;1093;96", "reply_reviewers": "0;1;4;1", "reply_authors": "1;2;5;3", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 117.5, 81.1002466087496 ], "wc_strengths_avg": [ 131.5, 65.8957510011078 ], "wc_weaknesses_avg": [ 271.25, 198.0837890893649 ], "wc_questions_avg": [ 151.25, 115.28307551414474 ], "wc_limitations_avg": [ 30.25, 36.07197665778797 ], "wc_review_avg": [ 701.75, 277.26284911614107 ], "wc_reply_reviewers_avg": [ 200.75, 218.34419502244614 ], "wc_reply_authors_avg": [ 307.75, 454.63962376810053 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9845995386316806506&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "inria.fr;;irisa.fr;iarai.ac.at", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "INRIA;Institut de Recherche en Informatique et Automatique;Institute of Advanced Research in Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.inria.fr;https://www.irisa.fr;https://www.iarai.ac.at", "aff_unique_abbr": "INRIA;IRISA;IARAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "France;Austria" }, { "title": "Unifying GANs and Score-Based Diffusion as Generative Particle Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72168", "id": "HMhEFKDQ6J", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bbc461518c59a2a8d64e70e2c38c4a0e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HMhEFKDQ6J", "openreview": "https://openreview.net/forum?id=HMhEFKDQ6J", "poster": "/media/PosterPDFs/NeurIPS%202023/72168.png?t=1701726786.350733", "slides": "https://nips.cc/virtual/2023/poster/72168", "video": "https://nips.cc/virtual/2023/poster/72168", "author_site": "Jean-Yves Franceschi, Mike Gartrell, Ludovic Dos Santos, Thibaut Issenhuth, Emmanuel de B\u00e9zenac, Mickael Chen, Alain Rakotomamonjy", "tldr": "", "abstract": "Particle-based deep generative models, such as gradient flows and score-based diffusion models, have recently gained traction thanks to their striking performance. Their principle of displacing particle distributions using differential equations is conventionally seen as opposed to the previously widespread generative adversarial networks (GANs), which involve training a pushforward generator network. In this paper we challenge this interpretation, and propose a novel framework that unifies particle and adversarial generative models by framing generator training as a generalization of particle models. This suggests that a generator is an optional addition to any such generative model. Consequently, integrating a generator into a score-based diffusion model and training a GAN without a generator naturally emerge from our framework. We empirically test the viability of these original models as proofs of concepts of potential applications of our framework.", "keywords": "deep learning;generative models;GANs;generative adversarial networks;diffusion;score-based;gradient flows", "primary_area": "", "supplementary_material": "/attachment/09e59cc4051853971622281d46d5f8fe18636222.zip", "author": "Jean-Yves Franceschi;Mike Gartrell;Ludovic Dos Santos;Thibaut Issenhuth;Emmanuel de Bezenac;Mickael Chen;Alain Rakotomamonjy", "authorids": "~Jean-Yves_Franceschi1;~Mike_Gartrell1;~Ludovic_Dos_Santos1;~Thibaut_Issenhuth1;~Emmanuel_de_Bezenac2;~Mickael_Chen1;~Alain_Rakotomamonjy1", "gender": "M;M;;M;M;M;", "homepage": "http://jyfranceschi.fr;https://cgartrel.github.io;;https://www.linkedin.com/in/thibautissenhuth;;https://sites.google.com/view/mickaelchen/home;", "dblp": "215/4886;75/3021;;225/5545;;190/7274;", "google_scholar": "https://scholar.google.fr/citations?user=IL2OzksAAAAJ;NX6eiWYAAAAJ;;Y63igKQAAAAJ;https://scholar.google.fr/citations?user=KvZw5gYAAAAJ;https://scholar.google.fr/citations?user=QnRpMJAAAAAJ;", "orcid": ";;;;;;", "linkedin": ";mikegartrell/;;thibautissenhuth;;mickael-chen-ml/;", "or_profile": "~Jean-Yves_Franceschi1;~Mike_Gartrell1;~Ludovic_Dos_Santos1;~Thibaut_Issenhuth1;~Emmanuel_de_Bezenac2;~Mickael_Chen1;~Alain_Rakotomamonjy1", "aff": "Criteo;Criteo AI Lab;;Criteo;ETHZ - ETH Zurich;Valeo;", "aff_domain": "criteo.com;criteo.com;;criteo.com;ethz.ch;valeo.com;", "position": "Researcher;Senior Researcher;;PhD student;Postdoc;Researcher;", "bibtex": "@inproceedings{\nfranceschi2023unifying,\ntitle={Unifying {GAN}s and Score-Based Diffusion as Generative Particle Models},\nauthor={Jean-Yves Franceschi and Mike Gartrell and Ludovic Dos Santos and Thibaut Issenhuth and Emmanuel de Bezenac and Mickael Chen and Alain Rakotomamonjy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HMhEFKDQ6J}\n}", "github": "", "project": "", "reviewers": "6P1y;kQU7;iGU3;XiV8;HYQ5;AQyb", "pdf_size": 2629010, "rating": "5;5;6;6;7;7", "confidence": "5;2;3;4;2;3", "soundness": "3;3;3;3;3;3", "novelty": "4;3;3;2;3;3", "presentation": "3;2;3;3;3;3", "wc_summary": "156;107;212;58;75;113", "wc_strengths": "82;80;159;22;24;57", "wc_weaknesses": "289;61;130;325;59;77", "wc_questions": "695;43;215;80;14;114", "wc_limitations": "22;17;34;39;29;8", "wc_review": "1244;308;750;524;201;369", "wc_reply_reviewers": "0;0;15;79;9;23", "wc_reply_authors": "774;0;0;0;0;0", "reply_reviewers": "0;0;1;1;1;1", "reply_authors": "2;1;1;1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.1666666666666665, 1.0671873729054748 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 120.16666666666667, 51.39201191711499 ], "wc_strengths_avg": [ 70.66666666666667, 46.100132561872556 ], "wc_weaknesses_avg": [ 156.83333333333334, 109.23738167658338 ], "wc_questions_avg": [ 193.5, 233.10852265271927 ], "wc_limitations_avg": [ 24.833333333333332, 10.446955962809879 ], "wc_review_avg": [ 566.0, 349.73847372000694 ], "wc_reply_reviewers_avg": [ 21.0, 27.172289806590342 ], "wc_reply_authors_avg": [ 129.0, 288.45276909747287 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.382546027838003, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8838356454952299715&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 31, "email": "criteo.com;criteo.com;;criteo.com;ethz.ch;valeo.com;", "author_num": 7, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Criteo;ETH Zurich;Valeo", "aff_unique_dep": ";;", "aff_unique_url": "https://www.criteo.com;https://www.ethz.ch;https://www.valeo.com", "aff_unique_abbr": "Criteo;ETHZ;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "France;Switzerland" }, { "title": "A Simple Yet Effective Strategy to Robustify the Meta Learning Paradigm", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72167", "id": "HMqGYxnlpv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2a28bea6298d106eed091ac403d8c22b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HMqGYxnlpv", "openreview": "https://openreview.net/forum?id=HMqGYxnlpv", "poster": "/media/PosterPDFs/NeurIPS%202023/72167.png?t=1697294637.1144814", "slides": "https://nips.cc/virtual/2023/poster/72167", "video": "https://nips.cc/virtual/2023/poster/72167", "author_site": "Qi Wang, Yiqin Lv, yanghe feng, Zheng Xie, Jincai Huang", "tldr": "", "abstract": "Meta learning is a promising paradigm to enable skill transfer across tasks.\nMost previous methods employ the empirical risk minimization principle in optimization.\nHowever, the resulting worst fast adaptation to a subset of tasks can be catastrophic in risk-sensitive scenarios.\nTo robustify fast adaptation, this paper optimizes meta learning pipelines from a distributionally robust perspective and meta trains models with the measure of tail task risk.\nWe take the two-stage strategy as heuristics to solve the robust meta learning problem, controlling the worst fast adaptation cases at a certain probabilistic level. \nExperimental results show that our simple method can improve the robustness of meta learning to task distributions and reduce the conditional expectation of the worst fast adaptation risk.", "keywords": "meta learning;robust fast adaptation;model agnostic meta learning", "primary_area": "", "supplementary_material": "/attachment/3dbc765b55517b74bcaf0638a79ced3e3447503f.pdf", "author": "Cheems Wang;Yiqin Lv;Yanghe Feng;Zheng Xie;Jincai Huang", "authorids": "~Cheems_Wang1;~Yiqin_Lv1;~Yanghe_Feng2;~Zheng_Xie4;~Jincai_Huang1", "gender": "F;;M;M;", "homepage": "https://dblp.org/pid/291/3737;;;https://sites.google.com/view/albert-q-wang-at-ai-community/home;https://scholar.google.com/citations?hl=en&user=loLhupYAAAAJ", "dblp": "291/3737;;;375/3186;06/8481.html", "google_scholar": ";8SuC0QoAAAAJ;;Mvbvv3IAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-1181-0212;;0000-0003-2937-3065;0000-0001-6135-6965;0000-0003-1608-8695", "linkedin": ";;;qi-cheems-wang-518a421a1/;", "or_profile": "~Yiqin_Lv1;~Zheng_Xie4;~Jincai_Huang1;~Qi_Wang11;~yanghe_feng1", "aff": "National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;Tsinghua University;National University of Defense Technology", "aff_domain": "nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;cs.tsinghua.edu.cn;nudt.edu.cn", "position": "PhD student;Full Professor;Full Professor;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nwang2023a,\ntitle={A Simple Yet Effective Strategy to Robustify the Meta Learning Paradigm},\nauthor={Cheems Wang and Yiqin Lv and Yanghe Feng and Zheng Xie and Jincai Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HMqGYxnlpv}\n}", "github": "", "project": "", "reviewers": "Wn6T;NA6N;vUMr;YmWX;ANpj", "pdf_size": 2004886, "rating": "5;5;6;6;7", "confidence": "4;4;4;3;3", "soundness": "3;2;2;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "73;58;36;52;50", "wc_strengths": "65;46;45;42;56", "wc_weaknesses": "271;340;138;154;96", "wc_questions": "136;40;7;2;208", "wc_limitations": "36;1;1;1;53", "wc_review": "581;485;227;251;463", "wc_reply_reviewers": "42;74;39;21;145", "wc_reply_authors": "14;39;16;52;69", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;3;2;3;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 53.8, 12.006664815842909 ], "wc_strengths_avg": [ 50.8, 8.51821577561874 ], "wc_weaknesses_avg": [ 199.8, 91.01296610923083 ], "wc_questions_avg": [ 78.6, 80.65134840782267 ], "wc_limitations_avg": [ 18.4, 21.978170988505845 ], "wc_review_avg": [ 401.4, 138.61832490691842 ], "wc_reply_reviewers_avg": [ 64.2, 43.860688549086866 ], "wc_reply_authors_avg": [ 38.0, 21.06181378704123 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7637626158259733, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14750861309191595242&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;cs.tsinghua.edu.cn;nudt.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "National University of Defense Technology;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nudt.edu.cn/;https://www.tsinghua.edu.cn", "aff_unique_abbr": "NUDT;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Heat Diffusion Perspective on Geodesic Preserving Dimensionality Reduction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72166", "id": "HNd4qTJxkW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/16063a1c0f0cddd4894585cf44cebb2c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HNd4qTJxkW", "openreview": "https://openreview.net/forum?id=HNd4qTJxkW", "poster": "/media/PosterPDFs/NeurIPS%202023/72166.png?t=1702085834.7723494", "slides": "https://nips.cc/virtual/2023/poster/72166", "video": "https://nips.cc/virtual/2023/poster/72166", "author_site": "Guillaume Huguet, Alexander Tong, Edward De Brouwer, Yanlei Zhang, Guy Wolf, Ian Adelstein, Smita Krishnaswamy", "tldr": "", "abstract": "Diffusion-based manifold learning methods have proven useful in representation learning and dimensionality reduction of modern high dimensional, high throughput, noisy datasets. Such datasets are especially present in fields like biology and physics. While it is thought that these methods preserve underlying manifold structure of data by learning a proxy for geodesic distances, no specific theoretical links have been established. Here, we establish such a link via results in Riemannian geometry explicitly connecting heat diffusion to manifold distances. In this process, we also formulate a more general heat kernel based manifold embedding method that we call heat geodesic embeddings. This novel perspective makes clearer the choices available in manifold learning and denoising. Results show that our method outperforms existing state of the art in preserving ground truth manifold distances, and preserving cluster structure in toy datasets. We also showcase our method on single cell RNA-sequencing datasets with both continuum and cluster structure, where our method enables interpolation of withheld timepoints of data. Finally, we show that parameters of our more general method can be configured to give results similar to PHATE (a state-of-the-art diffusion based manifold learning method) as well as SNE (an attraction/repulsion neighborhood based method that forms the basis of t-SNE).", "keywords": "manifold learning;heat diffusion;geodesic;metric preserving;dimensionality reduction;embedding", "primary_area": "", "supplementary_material": "/attachment/0ecef5d5535d5b1138558a230b75f5627f5b34b1.pdf", "author": "Guillaume Huguet;Alexander Tong;Edward De Brouwer;Yanlei Zhang;Guy Wolf;Ian Adelstein;Smita Krishnaswamy", "authorids": "~Guillaume_Huguet1;~Alexander_Tong1;~Edward_De_Brouwer1;~Yanlei_Zhang1;~Guy_Wolf1;~Ian_Adelstein1;~Smita_Krishnaswamy1", "gender": "M;;M;M;M;;F", "homepage": "https://mila.quebec/personne/guillaume-huguet/;https://alextong.net;https://edwarddebrouwer.xyz;https://sites.google.com/view/yanleizhang/home;http://guywolf.org;https://sites.google.com/view/adelstein;http://www.krishnaswamylab.org", "dblp": "286/5365;153/9296;;335/2128;120/1308;;74/2457", "google_scholar": "L8kYu9IAAAAJ;CS80pt4AAAAJ;-Pm4XtAAAAAJ;https://scholar.google.com/citations?hl=en;g0k3SjcAAAAJ;;l2Pr9m8AAAAJ", "orcid": ";0000-0002-2031-4096;;;0000-0002-6740-059X;;", "linkedin": ";atong01/;edwarddebrouwer/;;;;", "or_profile": "~Guillaume_Huguet1;~Alexander_Tong1;~Edward_De_Brouwer1;~Yanlei_Zhang1;~Guy_Wolf1;~Ian_Adelstein1;~Smita_Krishnaswamy1", "aff": "University of Montreal;Universit\u00e9 de Montr\u00e9al;Yale University;Montreal Institute for Learning Algorithm;University of Montreal;Yale University;Yale University", "aff_domain": "umontreal.ca;umontreal.ca;yale.edu;mila.umontreal.ca;umontreal.ca;yale.edu;yale.edu", "position": "PhD student;Postdoc;Postdoc;Postdoc;Associate Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nhuguet2023a,\ntitle={A Heat Diffusion Perspective on Geodesic Preserving Dimensionality Reduction},\nauthor={Guillaume Huguet and Alexander Tong and Edward De Brouwer and Yanlei Zhang and Guy Wolf and Ian Adelstein and Smita Krishnaswamy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HNd4qTJxkW}\n}", "github": "", "project": "", "reviewers": "QP8B;Yhnd;7Dv3;B5m7;rWBe", "pdf_size": 12429000, "rating": "4;5;5;6;8", "confidence": "3;3;3;4;4", "soundness": "3;2;3;3;4", "novelty": "2;3;3;2;3", "presentation": "2;2;2;3;4", "wc_summary": "41;164;155;63;48", "wc_strengths": "44;34;40;179;82", "wc_weaknesses": "248;960;18;54;27", "wc_questions": "163;2;94;4;4", "wc_limitations": "9;2;2;1;5", "wc_review": "505;1162;309;301;166", "wc_reply_reviewers": "6;21;0;6;15", "wc_reply_authors": "36;100;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;1;1", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 94.2, 53.86427387424804 ], "wc_strengths_avg": [ 75.8, 54.27485605692566 ], "wc_weaknesses_avg": [ 261.4, 359.28350922356566 ], "wc_questions_avg": [ 53.4, 65.08947687606654 ], "wc_limitations_avg": [ 3.8, 2.9257477676655586 ], "wc_review_avg": [ 488.6, 353.62613025623546 ], "wc_reply_reviewers_avg": [ 9.6, 7.445804187594515 ], "wc_reply_authors_avg": [ 27.2, 38.97896868825546 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8427009716003845, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10141239651962205029&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 9, "email": "umontreal.ca;umontreal.ca;yale.edu;mila.umontreal.ca;umontreal.ca;yale.edu;yale.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;0;2;2", "aff_unique_norm": "University of Montreal;Universit\u00e9 de Montr\u00e9al;Yale University;Montreal Institute for Learning Algorithm", "aff_unique_dep": ";;;", "aff_unique_url": "https://wwwumontreal.ca;https://www.umontreal.ca;https://www.yale.edu;https://mila.quebec", "aff_unique_abbr": "UM;UdeM;Yale;MILA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;1;1", "aff_country_unique": "Canada;United States" }, { "title": "Learning Dense Flow Field for Highly-accurate Cross-view Camera Localization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72165", "id": "HPrd17Qvbp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/df5f94d6ac6e13d830d70536cde9f0d2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HPrd17Qvbp", "openreview": "https://openreview.net/forum?id=HPrd17Qvbp", "poster": "/media/PosterPDFs/NeurIPS%202023/72165.png?t=1699500093.001155", "slides": "https://nips.cc/virtual/2023/poster/72165", "video": "https://nips.cc/virtual/2023/poster/72165", "author_site": "Zhenbo Song, ze xianghui, Jianfeng Lu, Yujiao Shi", "tldr": "", "abstract": "This paper addresses the problem of estimating the 3-DoF camera pose for a ground-level image with respect to a satellite image that encompasses the local surroundings. We propose a novel end-to-end approach that leverages the learning of dense pixel-wise flow fields in pairs of ground and satellite images to calculate the camera pose. Our approach differs from existing methods by constructing the feature metric at the pixel level, enabling full-image supervision for learning distinctive geometric configurations and visual appearances across views. Specifically, our method employs two distinct convolution networks for ground and satellite feature extraction. Then, we project the ground feature map to the bird's eye view (BEV) using a fixed camera height assumption to achieve preliminary geometric alignment. To further establish the content association between the BEV and satellite features, we introduce a residual convolution block to refine the projected BEV feature. Optical flow estimation is performed on the refined BEV feature map and the satellite feature map using flow decoder networks based on RAFT. After obtaining dense flow correspondences, we apply the least square method to filter matching inliers and regress the ground camera pose. Extensive experiments demonstrate significant improvements compared to state-of-the-art methods. Notably, our approach reduces the median localization error by 89\\%, 19\\%, 80\\%, and 35\\% on the KITTI, Ford multi-AV, VIGOR, and Oxford RobotCar datasets, respectively.", "keywords": "Optical flow;correspondence learning;cross-view;camera localization", "primary_area": "", "supplementary_material": "/attachment/cff0df98b344ac3349079735c19e1a27415bc579.zip", "author": "Zhenbo Song;Xianghui Ze;Jianfeng Lu;Yujiao Shi", "authorids": "~Zhenbo_Song1;~Xianghui_Ze2;~Jianfeng_Lu3;~Yujiao_Shi1", "gender": "M;;M;F", "homepage": ";https://github.com/zexianghui;;https://shiyujiao.github.io/", "dblp": "267/1972;;82/6187-3;159/2546", "google_scholar": "9KXd7qQAAAAJ;;;rVsRpZEAAAAJ", "orcid": "0000-0002-5020-4277;;0000-0002-9190-507X;0000-0001-6028-9051", "linkedin": ";;;yujiao-shi-053a12198/", "or_profile": "~Zhenbo_Song1;~Xianghui_Ze2;~Jianfeng_Lu3;~Yujiao_Shi1", "aff": "Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing university of Science & Technology;Australian National University", "aff_domain": "njust.edu.cn;njust.edu.cn;njust.edu.cn;anu.edu.au", "position": "Postdoc;MS student;Full Professor;PhD student", "bibtex": "@inproceedings{\nsong2023learning,\ntitle={Learning Dense Flow Field for Highly-accurate Cross-view Camera Localization},\nauthor={Zhenbo Song and Xianghui Ze and Jianfeng Lu and Yujiao Shi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HPrd17Qvbp}\n}", "github": "", "project": "", "reviewers": "1HCJ;63Cb;NkYA;Bgje;pNph", "pdf_size": 4549975, "rating": "3;4;5;5;8", "confidence": "5;4;3;4;3", "soundness": "2;2;3;3;4", "novelty": "2;1;2;2;4", "presentation": "2;3;3;3;4", "wc_summary": "65;68;135;89;80", "wc_strengths": "30;43;55;147;102", "wc_weaknesses": "326;100;184;359;49", "wc_questions": "3;127;75;4;50", "wc_limitations": "11;27;1;72;8", "wc_review": "435;365;450;671;289", "wc_reply_reviewers": "61;112;5;58;13", "wc_reply_authors": "220;307;0;25;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;1;2;1", "rating_avg": [ 5.0, 1.6733200530681511 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.2, 0.9797958971132712 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 87.4, 25.30296425322535 ], "wc_strengths_avg": [ 75.4, 43.26938871766043 ], "wc_weaknesses_avg": [ 203.6, 121.77783049471691 ], "wc_questions_avg": [ 51.8, 46.61072837877563 ], "wc_limitations_avg": [ 23.8, 25.560907652115954 ], "wc_review_avg": [ 442.0, 127.97812313047882 ], "wc_reply_reviewers_avg": [ 49.8, 38.529988320787226 ], "wc_reply_authors_avg": [ 110.4, 128.32240646122563 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7985957062499248, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11032307129000603108&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "email": "njust.edu.cn;njust.edu.cn;njust.edu.cn;anu.edu.au", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Nanjing University of Science and Technology;Australian National University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nust.edu.cn/;https://www.anu.edu.au", "aff_unique_abbr": "NUST;ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;Australia" }, { "title": "Direct Preference Optimization: Your Language Model is Secretly a Reward Model", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72164", "id": "HPuSIXJaa9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HPuSIXJaa9", "openreview": "https://openreview.net/forum?id=HPuSIXJaa9", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72164", "video": "https://nips.cc/virtual/2023/poster/72164", "author_site": "Rafael Rafailov, Archit Sharma, Eric Mitchell, Christopher D Manning, Stefano Ermon, Chelsea Finn", "tldr": "", "abstract": "While large-scale unsupervised language models (LMs) learn broad world knowledge and some reasoning skills, achieving precise control of their behavior is difficult due to the completely unsupervised nature of their training. Existing methods for gaining such steerability collect human labels of the relative quality of model generations and fine-tune the unsupervised LM to align with these preferences, often with reinforcement learning from human feedback (RLHF). However, RLHF is a complex and often unstable procedure, first fitting a reward model that reflects the human preferences, and then fine-tuning the large unsupervised LM using reinforcement learning to maximize this estimated reward without drifting too far from the original model. In this paper, we leverage a mapping between reward functions and optimal policies to show that this constrained reward maximization problem can be optimized exactly with a single stage of policy training, essentially solving a classification problem on the human preference data. The resulting algorithm, which we call Direct Preference Optimization (DPO), is stable, performant, and computationally lightweight, eliminating the need for fitting a reward model, sampling from the LM during fine-tuning, or performing significant hyperparameter tuning. Our experiments show that DPO can fine-tune LMs to align with human preferences as well as or better than existing methods. Notably, fine-tuning with DPO exceeds RLHF's ability to control sentiment of generations and improves response quality in summarization and single-turn dialogue while being substantially simpler to implement and train.", "keywords": "reinforcement learning from human feedback;language models;RLHF;preferences", "primary_area": "", "supplementary_material": "/attachment/140240afc2922cc7a2c9479ced99e819c7191f9f.pdf", "author": "Rafael Rafailov;Archit Sharma;Eric Mitchell;Christopher D Manning;Stefano Ermon;Chelsea Finn", "authorids": "~Rafael_Rafailov1;~Archit_Sharma1;~Eric_Mitchell1;~Christopher_D_Manning1;~Stefano_Ermon1;~Chelsea_Finn1", "gender": "M;M;M;M;M;F", "homepage": "https://rmrafailov.github.io/;;https://ericmitchell.ai;https://nlp.stanford.edu/~manning/;http://cs.stanford.edu/~ermon/;https://ai.stanford.edu/~cbfinn/", "dblp": "272/5358;220/3163.html;238/0419;m/ChristopherDManning;47/8135;131/1783", "google_scholar": "TwABcRgAAAAJ;_0IIzxgAAAAJ;q77J4fgAAAAJ;1zmDOdwAAAAJ;;vfPE6hgAAAAJ", "orcid": ";;0000-0002-7487-1744;0000-0001-6155-649X;;", "linkedin": ";;;christopher-manning-011575/;;", "or_profile": "~Rafael_Rafailov1;~Archit_Sharma1;~Eric_Mitchell1;~Christopher_D_Manning1;~Stefano_Ermon1;~Chelsea_Finn1", "aff": "Stanford University;Stanford University;Stanford University;Computer Science Department, Stanford University;Stanford University;Google", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu;google.com", "position": "PhD student;Graduate Student;PhD student;Full Professor;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nrafailov2023direct,\ntitle={Direct Preference Optimization: Your Language Model is Secretly a Reward Model},\nauthor={Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D Manning and Stefano Ermon and Chelsea Finn},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HPuSIXJaa9}\n}", "github": "", "project": "", "reviewers": "cpnK;eRhN;GfFq;FksS", "pdf_size": 929308, "rating": "7;8;8;8", "confidence": "5;3;3;4", "soundness": "3;4;3;4", "novelty": "4;4;4;4", "presentation": "3;4;2;3", "wc_summary": "88;119;157;191", "wc_strengths": "41;56;142;119", "wc_weaknesses": "129;25;75;587", "wc_questions": "330;1;44;83", "wc_limitations": "62;1;8;20", "wc_review": "650;202;426;1000", "wc_reply_reviewers": "41;16;20;120", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 4.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 138.75, 38.8225127986327 ], "wc_strengths_avg": [ 89.5, 42.13371571556442 ], "wc_weaknesses_avg": [ 204.0, 224.1628872048181 ], "wc_questions_avg": [ 114.5, 127.75464766496755 ], "wc_limitations_avg": [ 22.75, 23.657715443381257 ], "wc_review_avg": [ 569.5, 294.72826467782147 ], "wc_reply_reviewers_avg": [ 49.25, 41.936708263763386 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 3284, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16165520781869849938&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 15, "email": "stanford.edu;stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu;google.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "Stanford;Google", "aff_campus_unique_index": "0;0;0;0;0;1", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Guiding The Last Layer in Federated Learning with Pre-Trained Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72163", "id": "HRGd5dcVfw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dcc0ac74ac8b95dc1939804acce0317d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HRGd5dcVfw", "openreview": "https://openreview.net/forum?id=HRGd5dcVfw", "poster": "/media/PosterPDFs/NeurIPS%202023/72163.png?t=1700797794.621318", "slides": "https://nips.cc/virtual/2023/poster/72163", "video": "https://nips.cc/virtual/2023/poster/72163", "author_site": "Gwen Legate, Nicolas Bernier, Lucas Page-Caccia, Edouard Oyallon, Eugene Belilovsky", "tldr": "", "abstract": "Federated Learning (FL) is an emerging paradigm that allows a model to be trained across a number of participants without sharing data. Recent works have begun to consider the effects of using pre-trained models as an initialization point for existing FL algorithms; however, these approaches ignore the vast body of efficient transfer learning literature from the centralized learning setting. Here we revisit the problem of FL from a pre-trained model considered in prior work and expand it to a set of computer vision transfer learning problems. We first observe that simply fitting a linear classification head can be efficient in many cases. We then show that in the FL setting, fitting a classifier using the Nearest Class Means (NCM) can be done exactly and orders of magnitude more efficiently than existing proposals, while obtaining strong performance. Finally, we demonstrate that using a two-stage approach of obtaining the classifier and then fine-tuning the model can yield rapid convergence and improved generalization in the federated setting. We demonstrate the potential our method has to reduce communication and compute costs while achieving better model performance.", "keywords": "federated learning; transfer learning; nearest mean classifier; continual learning;", "primary_area": "", "supplementary_material": "/attachment/672fe4c2c0c251ff15f1ea0139311b7c505fb7aa.zip", "author": "Gwen Legate;Nicolas Bernier;Lucas Caccia;Edouard Oyallon;Eugene Belilovsky", "authorids": "~Gwen_Legate1;nbernier99@gmail.com;~Lucas_Caccia1;~Edouard_Oyallon1;~Eugene_Belilovsky1", "gender": ";;M;;M", "homepage": ";;https://www.cs.mcgill.ca/~lpagec/;;http://eugenium.github.io", "dblp": "344/5422;;;;42/11445", "google_scholar": "hwERHFYAAAAJ;;fuvIITUAAAAJ;;https://scholar.google.fr/citations?user=CffJDoEAAAAJ", "orcid": ";;;;", "linkedin": "https://linkedin.com/in/gwen-legate-3038a452;;;;", "or_profile": "~Gwen_Legate1;nbernier99@gmail.com;~Lucas_Caccia1;~Edouard_Oyallon1;~Eugene_Belilovsky1", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;;McGill University;;Concordia University, Montreal", "aff_domain": "mila.umontreal.ca;;mcgill.ca;;concordia.ca", "position": "MS student;;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nlegate2023guiding,\ntitle={Guiding The Last Layer in Federated Learning with Pre-Trained Models},\nauthor={Gwen Legate and Nicolas Bernier and Lucas Caccia and Edouard Oyallon and Eugene Belilovsky},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HRGd5dcVfw}\n}", "github": "", "project": "", "reviewers": "63Tk;8wWr;tHeD;1Br4", "pdf_size": 3052239, "rating": "4;5;6;7", "confidence": "3;3;5;4", "soundness": "3;3;4;3", "novelty": "2;2;3;3", "presentation": "2;2;4;3", "wc_summary": "53;42;95;110", "wc_strengths": "15;66;55;42", "wc_weaknesses": "63;143;86;127", "wc_questions": "47;84;115;75", "wc_limitations": "3;36;1;19", "wc_review": "181;371;352;373", "wc_reply_reviewers": "0;231;105;160", "wc_reply_authors": "0;736;0;502", "reply_reviewers": "0;3;1;3", "reply_authors": "1;4;1;3", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 75.0, 28.275431031197385 ], "wc_strengths_avg": [ 44.5, 19.03286631067428 ], "wc_weaknesses_avg": [ 104.75, 31.830606340439072 ], "wc_questions_avg": [ 80.25, 24.262883175748097 ], "wc_limitations_avg": [ 14.75, 14.113380176272443 ], "wc_review_avg": [ 319.25, 80.23831690657525 ], "wc_reply_reviewers_avg": [ 124.0, 84.38305517104723 ], "wc_reply_authors_avg": [ 309.5, 320.36658689694843 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.674199862463242, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9311703340510091193&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "mila.umontreal.ca;;mcgill.ca;;concordia.ca", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Montreal;McGill University;Concordia University", "aff_unique_dep": "Montreal Institute for Learning Algorithms;;", "aff_unique_url": "https://www.mila.quebec;https://www.mcgill.ca;https://www.concordia.ca", "aff_unique_abbr": "MILA;McGill;Concordia", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Montreal;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Detecting hidden confounding in observational data using multiple environments", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72162", "id": "HUuEMMM8Ik", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/89e541b817ea043a971840a926e12b37-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HUuEMMM8Ik", "openreview": "https://openreview.net/forum?id=HUuEMMM8Ik", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72162", "video": "https://nips.cc/virtual/2023/poster/72162", "author_site": "Rickard Karlsson, Jesse Krijthe", "tldr": "", "abstract": "A common assumption in causal inference from observational data is that there is no hidden confounding. Yet it is, in general, impossible to verify the presence of hidden confounding factors from a single dataset. Under the assumption of independent causal mechanisms underlying the data-generating process, we demonstrate a way to detect unobserved confounders when having multiple observational datasets coming from different environments. We present a theory for testable conditional independencies that are only absent when there is hidden confounding and examine cases where we violate its assumptions: degenerate & dependent mechanisms, and faithfulness violations. Additionally, we propose a procedure to test these independencies and study its empirical finite-sample behavior using simulation studies and semi-synthetic data based on a real-world dataset. In most cases, the proposed procedure correctly predicts the presence of hidden confounding, particularly when the confounding bias is large.", "keywords": "causal inference;hidden confounding;multiple environments;independent causal mechansisms;independence testing", "primary_area": "", "supplementary_material": "/attachment/f237a26cb4ecfbb59acbeefcb794780b3c455cae.zip", "author": "Rickard Karlsson;JH Krijthe", "authorids": "~Rickard_Karlsson1;~JH_Krijthe1", "gender": ";M", "homepage": ";https://www.jessekrijthe.com", "dblp": ";126/0900", "google_scholar": ";https://scholar.google.nl/citations?user=Rm7OUa0AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Rickard_Karlsson1;~JH_Krijthe1", "aff": ";Delft University of Technology", "aff_domain": ";tudelft.nl", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nkarlsson2023detecting,\ntitle={Detecting hidden confounding in observational data using multiple environments},\nauthor={Rickard Karlsson and JH Krijthe},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HUuEMMM8Ik}\n}", "github": "", "project": "", "reviewers": "ZLeH;g1qg;7Ey5;1FZJ", "pdf_size": 1820597, "rating": "4;5;6;7", "confidence": "4;3;3;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;4;3", "wc_summary": "66;25;113;98", "wc_strengths": "45;80;62;42", "wc_weaknesses": "20;202;222;127", "wc_questions": "237;13;95;265", "wc_limitations": "8;8;122;23", "wc_review": "376;328;614;555", "wc_reply_reviewers": "154;24;0;12", "wc_reply_authors": "372;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.5, 33.737960815674676 ], "wc_strengths_avg": [ 57.25, 15.188400179084036 ], "wc_weaknesses_avg": [ 142.75, 79.22554827831739 ], "wc_questions_avg": [ 152.5, 103.15401107082555 ], "wc_limitations_avg": [ 40.25, 47.59398596461532 ], "wc_review_avg": [ 468.25, 119.3196861377032 ], "wc_reply_reviewers_avg": [ 47.5, 62.07052440571128 ], "wc_reply_authors_avg": [ 93.0, 161.0807251039056 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=373801258286322776&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "email": ";tudelft.nl", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Delft University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.tudelft.nl", "aff_unique_abbr": "TU Delft", "aff_country_unique_index": "0", "aff_country_unique": "Netherlands" }, { "title": "Online RL in Linearly $q^\\pi$-Realizable MDPs Is as Easy as in Linear MDPs If You Learn What to Ignore", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72161", "id": "HV85SiyrsV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b973a107336177a274069cefb011244c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HV85SiyrsV", "openreview": "https://openreview.net/forum?id=HV85SiyrsV", "poster": "/media/PosterPDFs/NeurIPS%202023/72161.png?t=1702352820.398029", "slides": "https://nips.cc/virtual/2023/poster/72161", "video": "https://nips.cc/virtual/2023/poster/72161", "author_site": "Gellert Weisz, Andr\u00e1s Gy\u00f6rgy, Csaba Szepesvari", "tldr": "", "abstract": "We consider online reinforcement learning (RL) in episodic Markov decision processes (MDPs) under the linear $q^\\pi$-realizability assumption, where it is assumed that the action-values of all policies can be expressed as linear functions of state-action features. This class is known to be more general than linear MDPs, where the transition kernel and the reward function are assumed to be linear functions of the feature vectors. As our first contribution, we show that the difference between the two classes is the presence of states in linearly $q^\\pi$-realizable MDPs where for any policy, all the actions have approximately equal values, and skipping over these states by following an arbitrarily fixed policy in those states transforms the problem to a linear MDP. Based on this observation, we derive a novel (computationally inefficient) learning algorithm for linearly $q^\\pi$-realizable MDPs that simultaneously learns what states should be skipped over and runs another learning algorithm on the linear MDP hidden in the problem. The method returns an $\\epsilon$-optimal policy after $\\text{polylog}(H, d)/\\epsilon^2$ interactions with the MDP, where $H$ is the time horizon and $d$ is the dimension of the feature vectors, giving the first polynomial-sample-complexity online RL algorithm for this setting. The results are proved for the misspecified case, where the sample complexity is shown to degrade gracefully with the misspecification error.", "keywords": "Reinforcement learning;linear function approximation;online learning", "primary_area": "", "supplementary_material": "", "author": "Gell\u00e9rt Weisz;Andr\u00e1s Gy\u00f6rgy;Csaba Szepesvari", "authorids": "~Gell\u00e9rt_Weisz2;~Andr\u00e1s_Gy\u00f6rgy2;~Csaba_Szepesvari1", "gender": "M;;", "homepage": "https://sites.ualberta.ca/~szepesva/;http://www.cs.bme.hu/~gya;", "dblp": "http://dblp.uni-trier.de/pers/hd/s/Szepesv=aacute=ri:Csaba;72/251-1;215/3618.html", "google_scholar": "https://scholar.google.ca/citations?user=zvC19mQAAAAJ;https://scholar.google.com/citations?hl=en;8u-RYZcAAAAJ", "orcid": ";0000-0003-0586-4337;", "linkedin": "csaba-szepesvari-09376b1?trk=hp-identity-name;;", "or_profile": "~Csaba_Szepesvari1;~Andras_Gyorgy1;~Gellert_Weisz1", "aff": "Google DeepMind;Google DeepMind;Google DeepMind", "aff_domain": "google.com;deepmind.com;deepmind.com", "position": "Research Scientist;Research Scientist;Researcher", "bibtex": "@inproceedings{\nweisz2023online,\ntitle={Online {RL} in Linearly \\$q{\\textasciicircum}{\\textbackslash}pi\\$-Realizable {MDP}s Is as Easy as in Linear {MDP}s If You Learn What to Ignore},\nauthor={Gell{\\'e}rt Weisz and Andr{\\'a}s Gy{\\\"o}rgy and Csaba Szepesvari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HV85SiyrsV}\n}", "github": "", "project": "", "reviewers": "ap44;dNHS;PFYK;Wpjh", "pdf_size": 289248, "rating": "4;5;8;8", "confidence": "3;3;3;4", "soundness": "3;3;4;3", "novelty": "4;3;4;4", "presentation": "1;3;3;2", "wc_summary": "144;135;139;105", "wc_strengths": "54;43;134;78", "wc_weaknesses": "656;90;85;58", "wc_questions": "31;46;201;97", "wc_limitations": "5;9;32;11", "wc_review": "890;323;591;349", "wc_reply_reviewers": "87;0;137;30", "wc_reply_authors": "0;0;324;10", "reply_reviewers": "1;0;2;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.25, 1.7853571071357126 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 130.75, 15.20485119953497 ], "wc_strengths_avg": [ 77.25, 35.12388788275011 ], "wc_weaknesses_avg": [ 222.25, 250.7213343535009 ], "wc_questions_avg": [ 93.75, 66.57843119209103 ], "wc_limitations_avg": [ 14.25, 10.473180032826706 ], "wc_review_avg": [ 538.25, 228.3959007950887 ], "wc_reply_reviewers_avg": [ 63.5, 52.69962049199216 ], "wc_reply_authors_avg": [ 83.5, 138.91274239608114 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5659164584181102, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5019214450749808662&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "google.com;deepmind.com;deepmind.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "On the Convergence and Sample Complexity Analysis of Deep Q-Networks with $\\epsilon$-Greedy Exploration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72160", "id": "HWGWeaN76q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2a91de02871011d0090e662ffd6f2328-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HWGWeaN76q", "openreview": "https://openreview.net/forum?id=HWGWeaN76q", "poster": "/media/PosterPDFs/NeurIPS%202023/72160.png?t=1699283786.137444", "slides": "https://nips.cc/virtual/2023/poster/72160", "video": "https://nips.cc/virtual/2023/poster/72160", "author_site": "Shuai Zhang, Hongkang Li, Meng Wang, Miao Liu, Pin-Yu Chen, Songtao Lu, Songtao Lu, Sijia Liu, Keerthiram Murugesan, Subhajit Chaudhury", "tldr": "", "abstract": "This paper provides a theoretical understanding of deep Q-Network (DQN) with the $\\varepsilon$-greedy exploration in deep reinforcement learning.\nDespite the tremendous empirical achievement of the DQN, its theoretical characterization remains underexplored.\nFirst, the exploration strategy is either impractical or ignored in the existing analysis. \nSecond, in contrast to conventional Q-learning algorithms, the DQN employs the target network and experience replay to acquire an unbiased estimation of the mean-square Bellman error (MSBE) utilized in training the Q-network. However,\nthe existing theoretical analysis of DQNs lacks convergence analysis or bypasses the technical challenges by deploying a significantly overparameterized neural network, which is not computationally efficient. \nThis paper provides the first theoretical convergence and sample complexity analysis of the\n practical setting of DQNs with $\\epsilon$-greedy policy. We prove an iterative procedure with decaying $\\epsilon$ converges to the optimal Q-value function geometrically. Moreover, a higher level of $\\epsilon$ values enlarges the region of convergence but slows down the convergence, while the opposite holds for a lower level of $\\epsilon$ values. Experiments justify our established theoretical insights on DQNs.", "keywords": "Reinforcement learning;Deep Q Network;Convergence analysis;Sample complexity;Generalization analysis", "primary_area": "", "supplementary_material": "/attachment/3a800e26e9961782de96d33cc4037ecdc5dea54e.pdf", "author": "Shuai Zhang;Hongkang Li;Meng Wang;Miao Liu;Pin-Yu Chen;Songtao Lu;Sijia Liu;Keerthiram Murugesan;Subhajit Chaudhury", "authorids": "~Shuai_Zhang6;~Hongkang_Li1;~Meng_Wang4;~Miao_Liu1;~Pin-Yu_Chen1;~Songtao_Lu1;~Sijia_Liu1;~Keerthiram_Murugesan1;~Subhajit_Chaudhury1", "gender": "M;;F;M;M;M;M;M;M", "homepage": "https://inchs708.github.io/shuaizhang.github.io/index.html;https://lohek330.github.io/lihongkang.github.io/;https://www.ecse.rpi.edu/~wang/index.html;https://sites.google.com/view/miaoliuhome;http://www.pinyuchen.com;https://songtaogithub.github.io/;https://lsjxjtu.github.io/;https://keerthi166.github.io;https://subhajitchaudhury.github.io/", "dblp": "71/208-15;318/8643;93/6765-3;;39/8969;05/2887;128/6972-1;178/2877;http://dblp2.uni-trier.de/pers/hd/c/Chaudhury:Subhajit", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.hk/citations?user=DVlDPjMAAAAJ;;7QHvAEYAAAAJ;jxwlCUUAAAAJ;LRsjX7kAAAAJ;C7dO_UgAAAAJ;-698GEMAAAAJ;https://scholar.google.co.jp/citations?user=EBTpFrQAAAAJ", "orcid": "0000-0001-8280-6988;;;;0000-0003-1039-8369;;;0000-0001-6847-522X;", "linkedin": ";hongkang-li-b7a341173/;;miao-liu-3273a32b;pin-yu-chen-940062a2;;;https://linkedin.com/in/keerthiram;subhajit-chaudhury-24955455/", "or_profile": "~Shuai_Zhang6;~Hongkang_Li1;~Meng_Wang4;~Miao_Liu1;~Pin-Yu_Chen1;~Songtao_Lu1;~Sijia_Liu1;~Keerthiram_Murugesan1;~Subhajit_Chaudhury1", "aff": "Rensselaer Polytechnic Institute;Rensselaer Polytechnic Institute;Rensselaer Polytechnic Institute;International Business Machines;International Business Machines;IBM Thomas J. Watson Research Center;Michigan State University;International Business Machines;International Business Machines", "aff_domain": "rpi.edu;rpi.edu;rpi.edu;ibm.com;ibm.com;ibm.com;msu.edu;ibm.com;ibm.com", "position": "Postdoc;PhD student;Associate Professor;Research Staff Member;Principal Researcher;Researcher;Assistant Professor;Researcher;Research Scientist", "bibtex": "@inproceedings{\nzhang2023on,\ntitle={On the Convergence and Sample Complexity Analysis of Deep Q-Networks with \\${\\textbackslash}epsilon\\$-Greedy Exploration},\nauthor={Shuai Zhang and Hongkang Li and Meng Wang and Miao Liu and Pin-Yu Chen and Songtao Lu and Sijia Liu and Keerthiram Murugesan and Subhajit Chaudhury},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HWGWeaN76q}\n}", "github": "", "project": "", "reviewers": "2co6;fZ7E;5oZJ;KLjC", "pdf_size": 618700, "rating": "4;5;7;7", "confidence": "3;3;4;3", "soundness": "3;2;3;4", "novelty": "2;2;3;4", "presentation": "2;3;3;3", "wc_summary": "18;34;78;71", "wc_strengths": "11;11;76;117", "wc_weaknesses": "66;198;69;23", "wc_questions": "40;231;243;49", "wc_limitations": "3;17;56;0", "wc_review": "138;491;522;260", "wc_reply_reviewers": "0;515;73;92", "wc_reply_authors": "652;1579;89;35", "reply_reviewers": "0;4;1;1", "reply_authors": "2;5;2;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 50.25, 25.02373872945448 ], "wc_strengths_avg": [ 53.75, 45.14075209829805 ], "wc_weaknesses_avg": [ 89.0, 65.50954128979991 ], "wc_questions_avg": [ 140.75, 96.39599317399038 ], "wc_limitations_avg": [ 19.0, 22.304708023195463 ], "wc_review_avg": [ 352.75, 160.0615116135044 ], "wc_reply_reviewers_avg": [ 170.0, 202.12496134817195 ], "wc_reply_authors_avg": [ 588.75, 620.6820341366423 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14232362771121716867&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "rpi.edu;rpi.edu;rpi.edu;ibm.com;ibm.com;ibm.com;msu.edu;ibm.com;ibm.com", "author_num": 9, "aff_unique_index": "0;0;0;1;1;2;3;1;1", "aff_unique_norm": "Rensselaer Polytechnic Institute;International Business Machines Corporation;IBM;Michigan State University", "aff_unique_dep": ";;Research;", "aff_unique_url": "https://www.rpi.edu;https://www.ibm.com;https://www.ibm.com/research;https://www.msu.edu", "aff_unique_abbr": "RPI;IBM;IBM;MSU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Yorktown Heights", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Video Prediction Models as Rewards for Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72159", "id": "HWNl9PAYIP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d9042abf40782fbce28901c1c9c0e8d8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HWNl9PAYIP", "openreview": "https://openreview.net/forum?id=HWNl9PAYIP", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72159", "video": "https://nips.cc/virtual/2023/poster/72159", "author_site": "Alejandro Escontrela, Ademi Adeniji, Wilson Yan, Ajay Jain, Xue Bin Peng, Ken Goldberg, Youngwoon Lee, Danijar Hafner, Pieter Abbeel", "tldr": "", "abstract": "Specifying reward signals that allow agents to learn complex behaviors is a long-standing challenge in reinforcement learning.\nA promising approach is to extract preferences for behaviors from unlabeled videos, which are widely available on the internet. We present Video Prediction Rewards (VIPER), an algorithm that leverages pretrained video prediction models as action-free reward signals for reinforcement learning. Specifically, we first train an autoregressive transformer on expert videos and then use the video prediction likelihoods as reward signals for a reinforcement learning agent. VIPER enables expert-level control without programmatic task rewards across a wide range of DMC, Atari, and RLBench tasks. Moreover, generalization of the video prediction model allows us to derive rewards for an out-of-distribution environment where no expert data is available, enabling cross-embodiment generalization for tabletop manipulation. We see our work as starting point for scalable reward specification from unlabeled videos that will benefit from the rapid advances in generative modeling. Source code and datasets are available on the project website: https://ViperRL.com", "keywords": "Reinforcement learning;generative modeling;learning from demonstrations;video prediction;unsupervised reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Alejandro Escontrela;Ademi Adeniji;Wilson Yan;Ajay Jain;Xue Bin Peng;Ken Goldberg;Youngwoon Lee;Danijar Hafner;Pieter Abbeel", "authorids": "~Alejandro_Escontrela1;~Ademi_Adeniji1;~Wilson_Yan1;~Ajay_Jain1;~Xue_Bin_Peng1;~Ken_Goldberg1;~Youngwoon_Lee1;~Danijar_Hafner1;~Pieter_Abbeel2", "gender": "M;M;M;M;M;M;M;;M", "homepage": "https://www.escontrela.me;;https://wilson1yan.github.io/;https://ajayj.com;https://xbpeng.github.io;http://goldberg.berkeley.edu/;https://youngwoon.github.io;https://danijar.com;https://people.eecs.berkeley.edu/~pabbeel/", "dblp": ";;;;;g/KennethYGoldberg;117/4767;184/8088;", "google_scholar": "53OxjmYAAAAJ;KFnmktMAAAAJ;tR2Qw0YAAAAJ;Ih7iLuUAAAAJ;https://scholar.google.ca/citations?user=FwxfQosAAAAJ;https://scholar.google.com.tw/citations?user=8fztli4AAAAJ;CDPa3AgAAAAJ;VINmGpYAAAAJ;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ", "orcid": ";;;;;0000-0001-6747-9499;0000-0001-9918-1056;0000-0002-9534-7271;", "linkedin": "alejandro-escontrela/;ademi-adeniji/;;ajay-jain;;goldbergken/;;;", "or_profile": "~Alejandro_Escontrela1;~Ademi_Adeniji1;~Wilson_Yan1;~Ajay_Jain1;~Xue_Bin_Peng1;~Ken_Goldberg1;~Youngwoon_Lee1;~Danijar_Hafner1;~Pieter_Abbeel2", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Simon Fraser University;University of California, Berkeley;University of California, Berkeley;University of Toronto;Covariant", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;sfu.ca;berkeley.edu;berkeley.edu;cs.toronto;covariant.ai", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor;Full Professor;Postdoc;PhD student;Founder", "bibtex": "@inproceedings{\nescontrela2023video,\ntitle={Video Prediction Models as Rewards for Reinforcement Learning},\nauthor={Alejandro Escontrela and Ademi Adeniji and Wilson Yan and Ajay Jain and Xue Bin Peng and Ken Goldberg and Youngwoon Lee and Danijar Hafner and Pieter Abbeel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HWNl9PAYIP}\n}", "github": "", "project": "", "reviewers": "Zre9;SQjv;BsvD;1FB4;qQw6;d6Ah", "pdf_size": 2104705, "rating": "3;5;5;6;6;7", "confidence": "4;5;3;4;3;4", "soundness": "2;3;2;3;3;3", "novelty": "2;2;2;2;3;3", "presentation": "4;2;3;3;3;3", "wc_summary": "55;87;59;67;68;53", "wc_strengths": "18;80;49;72;41;53", "wc_weaknesses": "113;93;87;122;17;107", "wc_questions": "201;91;193;59;3;49", "wc_limitations": "9;29;1;1;1;35", "wc_review": "396;380;389;321;130;297", "wc_reply_reviewers": "242;121;189;47;12;56", "wc_reply_authors": "752;163;222;201;305;25", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "2;2;2;2;2;2", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.8333333333333335, 0.6871842709362768 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 64.83333333333333, 11.378586125798854 ], "wc_strengths_avg": [ 52.166666666666664, 20.292992769809867 ], "wc_weaknesses_avg": [ 89.83333333333333, 34.61414290270508 ], "wc_questions_avg": [ 99.33333333333333, 73.73300180757295 ], "wc_limitations_avg": [ 12.666666666666666, 14.06729856400613 ], "wc_review_avg": [ 318.8333333333333, 91.97720128138036 ], "wc_reply_reviewers_avg": [ 111.16666666666667, 81.92154105538351 ], "wc_reply_authors_avg": [ 278.0, 227.9268888627813 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.1296407447104329, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7868652087288841956&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 9, "email": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;sfu.ca;berkeley.edu;berkeley.edu;cs.toronto;covariant.ai", "author_num": 9, "aff_unique_index": "0;0;0;0;1;0;0;2;3", "aff_unique_norm": "University of California, Berkeley;Simon Fraser University;University of Toronto;Covariant", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.berkeley.edu;https://www.sfu.ca;https://www.utoronto.ca;", "aff_unique_abbr": "UC Berkeley;SFU;U of T;", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;1;0;0;1", "aff_country_unique": "United States;Canada;" }, { "title": "Perception Test: A Diagnostic Benchmark for Multimodal Video Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73640", "id": "HYEGXFnPoq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8540fba4abdc7f9f7a7b1cc6cd60e409-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=HYEGXFnPoq", "openreview": "https://openreview.net/forum?id=HYEGXFnPoq", "poster": "/media/PosterPDFs/NeurIPS%202023/73640.png?t=1701986755.2035153", "slides": "https://nips.cc/virtual/2023/poster/73640", "video": "https://nips.cc/virtual/2023/poster/73640", "author_site": "Viorica Patraucean, Lucas Smaira, Ankush Gupta, Adria Recasens, Larisa Markeeva, Dylan Banarse, Skanda Koppula, joseph heyward, Mateusz Malinowski, Yi Yang, Carl Doersch, Tatiana Matejovicova, Yury Sulsky, Antoine Miech, Alexandre Fr\u00e9chette, Hanna Klimczak, Raphael Koster, Junlin Zhang, Stephanie Winkler, Yusuf Aytar, Simon Osindero, Dima Damen, Andrew Zisserman, Joao Carreira", "tldr": "", "abstract": "We propose a novel multimodal video benchmark - the Perception Test - to evaluate the perception and reasoning skills of pre-trained multimodal models (e.g. Flamingo, BEiT-3, or GPT-4). Compared to existing benchmarks that focus on computational tasks (e.g. classification, detection or tracking), the Perception Test focuses on skills (Memory, Abstraction, Physics, Semantics) and types of reasoning (descriptive, explanatory, predictive, counterfactual) across video, audio, and text modalities, to provide a comprehensive and efficient evaluation tool. The benchmark probes pre-trained models for their transfer capabilities, in a zero-shot / few-shot or limited finetuning regime. For these purposes, the Perception Test introduces 11.6k real-world videos, 23s average length, designed to show perceptually interesting situations, filmed by around 100 participants worldwide. The videos are densely annotated with six types of labels (multiple-choice and grounded video question-answers, object and point tracks, temporal action and sound segments), enabling both language and non-language evaluations. The fine-tuning and validation splits of the benchmark are publicly available (CC-BY license), in addition to a challenge server with a held-out test split. Human baseline results compared to state-of-the-art video QA models show a substantial gap in performance (91.4% vs 46.2%), suggesting that there is significant room for improvement in multimodal video understanding.\nDataset, baselines code, and challenge server are available at https://github.com/deepmind/perception_test", "keywords": "evaluation;multimodal video models;perception;reasoning;diagnostic;memory;abstraction;physics;semantics", "primary_area": "", "supplementary_material": "/attachment/bd527818afd83dcd8ad60d75e2b18df30bf4d5da.pdf", "author": "Viorica Patraucean;Lucas Smaira;Ankush Gupta;Adria Recasens Continente;Larisa Markeeva;Dylan Sunil Banarse;Skanda Koppula;Joseph Heyward;Mateusz Malinowski;Yi Yang;Carl Doersch;Tatiana Matejovicova;Yury Sulsky;Antoine Miech;Alexandre Fr\u00e9chette;Hanna Klimczak;Raphael Koster;Junlin Zhang;Stephanie Winkler;Yusuf Aytar;Simon Osindero;Dima Damen;Andrew Zisserman;Joao Carreira", "authorids": "~Viorica_Patraucean1;~Lucas_Smaira1;~Ankush_Gupta1;~Adria_Recasens_Continente1;~Larisa_Markeeva1;~Dylan_Sunil_Banarse1;~Skanda_Koppula1;~Joseph_Heyward2;~Mateusz_Malinowski1;~Yi_Yang10;~Carl_Doersch1;~Tatiana_Matejovicova1;~Yury_Sulsky1;~Antoine_Miech1;~Alexandre_Fr\u00e9chette1;~Hanna_Klimczak1;~Raphael_Koster1;~Junlin_Zhang2;~Stephanie_Winkler1;~Yusuf_Aytar1;~Simon_Osindero1;~Dima_Damen1;~Andrew_Zisserman1;~Joao_Carreira1", "gender": "F;M;M;M;F;M;;M;;M;M;F;M;M;M;F;M;M;F;M;Non-Binary;F;;M", "homepage": ";;http://www.ankushgupta.org;https://www.csail.mit.edu/person/adria-recasens-continente;https://github.com/rerrayne;https://2ne1.com;;https://uk.linkedin.com/in/joe-heyward-71623595;http://mateuszmalinowski.com/;https://yangyi02.github.io/;;;http://github.com/ysulsky;http://www.di.ens.fr/~miech/;;https://pl.linkedin.com/in/hklimczak;;;https://www.linkedin.com/mwlite/in/stephanie-manuela-winkler-576371b9;;;http://dimadamen.github.io/;;", "dblp": "21/8618;;46/879-1;http://dblp.uni-trier.de/pers/hd/r/Recasens:Adri=agrave=;;;;;http://dblp.uni-trier.de/pers/hd/m/Malinowski:Mateusz;33/4854-7;12/8654;;;202/1721;126/5045;;;;;41/5577;05/5467;95/3618;;61/5621-1", "google_scholar": "https://scholar.google.fr/citations?user=hWzXZUMAAAAJ;https://scholar.google.com/citations?hl=en;23LELwEAAAAJ;https://scholar.google.es/citations?user=e0nmxyIAAAAJ;https://scholar.google.com/citations?hl=en;UPcOdkQAAAAJ;;;https://scholar.google.de/citations?user=IqJ3zskAAAAJ;-BO7TXUAAAAJ;SBTxvCoAAAAJ;;;https://scholar.google.fr/citations?user=9tfacCoAAAAJ;;;;;;0ncQNL8AAAAJ;Jq8ZS5kAAAAJ;https://scholar.google.co.uk/citations?user=OxL9Wn8AAAAJ;;https://scholar.google.pt/citations?user=IUZ-7_cAAAAJ", "orcid": ";;;;;;;;;;;;;;;;;;;;;0000-0001-8804-6238;;", "linkedin": ";lsmaira/;;adri%C3%A0-recasens-continente-29b67421/;https://ru.linkedin.com/in/rerrayne;;;https://linkedin.com/in/joe-heyward-71623595;;;;tatiana-matejovicova-08672484/;;;;;;junlin-zhang-aca-78242a82;;;;dimadamen;;jo%C3%A3o-carreira-56238a7/", "or_profile": "~Viorica_Patraucean1;~Lucas_Smaira1;~Ankush_Gupta1;~Adria_Recasens_Continente1;~Larisa_Markeeva1;~Dylan_Sunil_Banarse1;~Skanda_Koppula1;~Joseph_Heyward2;~Mateusz_Malinowski1;~Yi_Yang10;~Carl_Doersch1;~Tatiana_Matejovicova1;~Yury_Sulsky1;~Antoine_Miech1;~Alexandre_Fr\u00e9chette1;~Hanna_Klimczak1;~Raphael_Koster1;~Junlin_Zhang2;~Stephanie_Winkler1;~Yusuf_Aytar1;~Simon_Osindero1;~Dima_Damen1;~Andrew_Zisserman1;~Joao_Carreira1", "aff": "Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;;Imagination Technologies;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;;Google DeepMind;Google DeepMind;Google DeepMind;;;Google DeepMind;Google DeepMind;Google;University of Bristol;;Google DeepMind", "aff_domain": "google.com;deepmind.com;google.com;google.com;deepmind.com;deepmind.com;;imagination.com;deepmind.com;deepmind.com;google.com;google.com;;deepmind.com;deepmind.com;deepmind.com;;;deepmind.com;google.com;google.com;bristol.ac.uk;;google.com", "position": "Research scientist;Researcher;Research Scientist;Research Scientist;Research Engineer;Researcher;;Researcher;Research Scientist;Researcher;Research Scientist;Researcher;;Researcher;Researcher;Researcher;;;Program Associate;Research Scientist;Scientist;Full Professor;;Research Scientist", "bibtex": "@inproceedings{\npatraucean2023perception,\ntitle={Perception Test: A Diagnostic Benchmark for Multimodal Video Models},\nauthor={Viorica Patraucean and Lucas Smaira and Ankush Gupta and Adria Recasens Continente and Larisa Markeeva and Dylan Sunil Banarse and Skanda Koppula and Joseph Heyward and Mateusz Malinowski and Yi Yang and Carl Doersch and Tatiana Matejovicova and Yury Sulsky and Antoine Miech and Alexandre Fr{\\'e}chette and Hanna Klimczak and Raphael Koster and Junlin Zhang and Stephanie Winkler and Yusuf Aytar and Simon Osindero and Dima Damen and Andrew Zisserman and Joao Carreira},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=HYEGXFnPoq}\n}", "github": "", "project": "", "reviewers": "mVd6;nYjf;t6Pk;zp8s;7rcX", "pdf_size": 2836536, "rating": "5;6;7;8;9", "confidence": "5;4;4;3;4", "wc_summary_and_contributions": "126;88;275;118;65", "wc_strengths": "149;209;139;197;193", "wc_improvement": "183;108;149;224;158", "wc_limitations": "101;24;37;9;33", "wc_correctness": "8;74;6;40;13", "wc_clarity": "193;64;15;32;12", "wc_relation_to_prior_work": "97;129;35;11;30", "wc_documentation": "29;140;13;12;12", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "887;837;670;644;517", "wc_reply_reviewers": "28;0;7;22;0", "wc_reply_authors": "672;308;194;372;552", "reply_reviewers": "1;0;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 134.4, 73.58967318856634 ], "wc_strengths_avg": [ 177.4, 27.954248335449837 ], "wc_improvement_avg": [ 164.4, 38.35935348777401 ], "wc_limitations_avg": [ 40.8, 31.6 ], "wc_correctness_avg": [ 28.2, 25.956887332652194 ], "wc_clarity_avg": [ 63.2, 67.47858919687044 ], "wc_relation_to_prior_work_avg": [ 60.4, 44.84462063614765 ], "wc_documentation_avg": [ 41.2, 49.82128059373825 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 711.0, 134.65362973199052 ], "wc_reply_reviewers_avg": [ 11.4, 11.551623262554923 ], "wc_reply_authors_avg": [ 419.6, 171.37747810024513 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 24, 0 ], "corr_rating_confidence": -0.6708203932499368, "gs_citation": 144, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7672040760335962624&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "google.com;deepmind.com;google.com;google.com;deepmind.com;deepmind.com;;imagination.com;deepmind.com;deepmind.com;google.com;google.com;;deepmind.com;deepmind.com;deepmind.com;;;deepmind.com;google.com;google.com;bristol.ac.uk;;google.com", "author_num": 24, "aff_unique_index": "0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;2;0", "aff_unique_norm": "Google;Imagination Technologies;University of Bristol", "aff_unique_dep": "Google DeepMind;;", "aff_unique_url": "https://deepmind.com;https://www.imgtec.com;https://www.bristol.ac.uk", "aff_unique_abbr": "DeepMind;IMG;Bristol", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Alignment with human representations supports robust few-shot learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72158", "id": "HYGnmSLBCf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e8ddc03b001d4c4b44b29bc1167e7fdd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HYGnmSLBCf", "openreview": "https://openreview.net/forum?id=HYGnmSLBCf", "poster": "/media/PosterPDFs/NeurIPS%202023/72158.png?t=1701746920.3345912", "slides": "https://nips.cc/virtual/2023/poster/72158", "video": "https://nips.cc/virtual/2023/poster/72158", "author_site": "Ilia Sucholutsky, Tom Griffiths", "tldr": "", "abstract": "Should we care whether AI systems have representations of the world that are similar to those of humans? We provide an information-theoretic analysis that suggests that there should be a U-shaped relationship between the degree of representational alignment with humans and performance on few-shot learning tasks. We confirm this prediction empirically, finding such a relationship in an analysis of the performance of 491 computer vision models. We also show that highly-aligned models are more robust to both natural adversarial attacks and domain shifts. Our results suggest that human-alignment is often a sufficient, but not necessary, condition for models to make effective use of limited data, be robust, and generalize well.", "keywords": "representation learning;supervised learning;human alignment;few-shot learning", "primary_area": "", "supplementary_material": "/attachment/1d45a56584da02312f8987fa3e2f55a31b4e43b9.zip", "author": "Ilia Sucholutsky;Thomas L. Griffiths", "authorids": "~Ilia_Sucholutsky1;~Thomas_L._Griffiths1", "gender": "M;", "homepage": "https://ilia10000.github.io/;http://cocosci.princeton.edu/tom/", "dblp": "239/5108;34/4472", "google_scholar": "https://scholar.google.ca/citations?user=6MfHyuMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-4121-7479;", "linkedin": "iliasu/;", "or_profile": "~Ilia_Sucholutsky1;~Thomas_L._Griffiths1", "aff": "Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu", "position": "Postdoc;Professor", "bibtex": "@inproceedings{\nsucholutsky2023alignment,\ntitle={Alignment with human representations supports robust few-shot learning},\nauthor={Ilia Sucholutsky and Thomas L. Griffiths},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HYGnmSLBCf}\n}", "github": "", "project": "", "reviewers": "225s;wjPt;kVdE;CS1b", "pdf_size": 1347224, "rating": "6;6;7;7", "confidence": "3;4;4;3", "soundness": "3;4;3;3", "novelty": "2;3;3;4", "presentation": "2;3;3;3", "wc_summary": "59;56;87;55", "wc_strengths": "179;130;60;71", "wc_weaknesses": "324;217;307;38", "wc_questions": "94;108;28;44", "wc_limitations": "40;9;19;51", "wc_review": "696;520;501;259", "wc_reply_reviewers": "85;33;66;37", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.25, 13.216939887886303 ], "wc_strengths_avg": [ 110.0, 47.91137651956996 ], "wc_weaknesses_avg": [ 221.5, 113.47797143058207 ], "wc_questions_avg": [ 68.5, 33.35790760824186 ], "wc_limitations_avg": [ 29.75, 16.60383991732033 ], "wc_review_avg": [ 494.0, 155.5265250688769 ], "wc_reply_reviewers_avg": [ 55.25, 21.3819433167334 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=853666499927405335&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "princeton.edu;princeton.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "SLIBO-Net: Floorplan Reconstruction via Slicing Box Representation with Local Geometry Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72157", "id": "HYo2Ao3hP8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/987bed997ab668f91c822a09bce3ea12-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HYo2Ao3hP8", "openreview": "https://openreview.net/forum?id=HYo2Ao3hP8", "poster": "/media/PosterPDFs/NeurIPS%202023/72157.png?t=1701671321.7365174", "slides": "https://nips.cc/virtual/2023/poster/72157", "video": "https://nips.cc/virtual/2023/poster/72157", "author_site": "Jheng-Wei Su, Kuei-Yu Tung, Chi-Han Peng, Peter Wonka, Hung-Kuo (James) Chu", "tldr": "", "abstract": "This paper focuses on improving the reconstruction of 2D floorplans from unstructured 3D point clouds. We identify opportunities for enhancement over the existing methods in three main areas: semantic quality, efficient representation, and local geometric details. To address these, we presents SLIBO-Net, an innovative approach to reconstructing 2D floorplans from unstructured 3D point clouds. We propose a novel transformer-based architecture that employs an efficient floorplan representation, providing improved room shape supervision and allowing for manageable token numbers. By incorporating geometric priors as a regularization mechanism and post-processing step, we enhance the capture of local geometric details. We also propose a scale-independent evaluation metric, correcting the discrepancy in error treatment between varying floorplan sizes. Our approach notably achieves a new state-of-the-art on the Structure3D dataset. The resultant floorplans exhibit enhanced semantic plausibility, substantially improving the overall quality and realism of the reconstructions. Our code and dataset are available online.", "keywords": "deep learning;layout reconstruction", "primary_area": "", "supplementary_material": "/attachment/edfc385df6ee9e17af8f0464a0030819c15694f1.pdf", "author": "Jheng-Wei Su;Kuei-Yu Tung;Chi-Han Peng;Peter Wonka;Hung-Kuo Chu", "authorids": "~Jheng-Wei_Su1;~Kuei-Yu_Tung1;~Chi-Han_Peng1;~Peter_Wonka1;~Hung-Kuo_Chu2", "gender": "M;M;M;M;M", "homepage": "https://deepai.org/profile/jheng-wei-su;;http://www.pengchihan.co;http://peterwonka.net;http://cgv.cs.nthu.edu.tw/hkchu/", "dblp": "250/9523;;20/7697;98/5522;67/2946", "google_scholar": ";;jp3YHjcAAAAJ;https://scholar.google.com.tw/citations?user=0EKXSXgAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0003-0627-9746;0000-0001-7153-4411", "linkedin": ";%E5%A5%8E%E4%BD%91-%E8%91%A3-6a94a8255/;;;", "or_profile": "~Jheng-Wei_Su1;~Kuei-Yu_Tung1;~Chi-Han_Peng1;~Peter_Wonka1;~Hung-Kuo_Chu2", "aff": "National Tsing Hua University;Department of Computer Science, National Tsing Hua University, National Tsinghua University;National Yang Ming Chiao Tung University;KAUST;National Tsinghua University", "aff_domain": "nthu.edu.tw;cs.nthu.edu.tw;nctu.edu.tw;kaust.edu.sa;nthu.edu.tw", "position": "PhD student;MS student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsu2023slibonet,\ntitle={{SLIBO}-Net: Floorplan Reconstruction via Slicing Box Representation with Local Geometry Regularization},\nauthor={Jheng-Wei Su and Kuei-Yu Tung and Chi-Han Peng and Peter Wonka and Hung-Kuo Chu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HYo2Ao3hP8}\n}", "github": "", "project": "", "reviewers": "EcBt;84jG;mWNJ;qV7s", "pdf_size": 4209473, "rating": "5;6;6;7", "confidence": "4;5;3;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "58;56;84;80", "wc_strengths": "107;239;79;62", "wc_weaknesses": "173;75;107;157", "wc_questions": "3;211;173;78", "wc_limitations": "43;11;7;27", "wc_review": "384;592;450;404", "wc_reply_reviewers": "10;13;21;76", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.5, 12.599603168354152 ], "wc_strengths_avg": [ 121.75, 69.57504940709708 ], "wc_weaknesses_avg": [ 128.0, 39.102429592034305 ], "wc_questions_avg": [ 116.25, 81.3737519105516 ], "wc_limitations_avg": [ 22.0, 14.247806848775006 ], "wc_review_avg": [ 457.5, 81.2573073637073 ], "wc_reply_reviewers_avg": [ 30.0, 26.860752037126584 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3371998169566137261&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nthu.edu.tw;cs.nthu.edu.tw;nctu.edu.tw;kaust.edu.sa;nthu.edu.tw", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "National Tsing Hua University;National Tsinghua University;National Yang Ming Chiao Tung University;King Abdullah University of Science and Technology;Tsinghua University", "aff_unique_dep": ";Department of Computer Science;;;", "aff_unique_url": "https://www.nthu.edu.tw;https://www.tsinghua.edu.cn;https://www.nycu.edu.tw;https://www.kaust.edu.sa;https://www.tsinghua.edu.cn", "aff_unique_abbr": "NTHU;THU;NYCU;KAUST;THU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;Saudi Arabia" }, { "title": "ID and OOD Performance Are Sometimes Inversely Correlated on Real-world Datasets", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72156", "id": "HZQZli6amV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e304d374c85e385eb217ed4a025b6b63-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HZQZli6amV", "openreview": "https://openreview.net/forum?id=HZQZli6amV", "poster": "/media/PosterPDFs/NeurIPS%202023/72156.png?t=1701808402.482206", "slides": "https://nips.cc/virtual/2023/poster/72156", "video": "https://nips.cc/virtual/2023/poster/72156", "author_site": "Damien Teney, Yong Lin, Seong Joon Oh, Ehsan Abbasnejad", "tldr": "", "abstract": "Several studies have compared the in-distribution (ID) and out-of-distribution (OOD) performance of models in computer vision and NLP. They report a frequent positive correlation and some surprisingly never even observe an inverse correlation indicative of a necessary trade-off. The possibility of inverse patterns is important to determine whether ID performance can serve as a proxy for OOD generalization capabilities.\n\nThis paper shows that inverse correlations between ID and OOD performance do happen with multiple real-world datasets, not only in artificial worst-case settings. We explain theoretically how these cases arise and how past studies missed them because of improper methodologies that examined a biased selection of models.\n\nOur observations lead to recommendations that contradict those found in much of the current literature.\n- High OOD performance sometimes requires trading off ID performance.\n- Focusing on ID performance alone may not lead to optimal OOD performance. It may produce diminishing (eventually negative) returns in OOD performance.\n- In these cases, studies on OOD generalization that use ID performance for model selection (a common recommended practice) will necessarily miss the best-performing models, making these studies blind to a whole range of phenomena.", "keywords": "Generalisation; machine learning", "primary_area": "", "supplementary_material": "/attachment/c64b202c4fa03cd25d98dcb4585a142b56a1b7c4.pdf", "author": "Damien Teney;LIN Yong;Seong Joon Oh;Ehsan Abbasnejad", "authorids": "~Damien_Teney1;~LIN_Yong1;~Seong_Joon_Oh1;~Ehsan_Abbasnejad3", "gender": "M;;M;", "homepage": "https://www.damienteney.info;;https://seongjoonoh.com;", "dblp": "62/10068;;168/8835;", "google_scholar": "https://scholar.google.com.au/citations?user=iS_jP_3dpD8J;;https://scholar.google.de/citations?user=kmXOOdsAAAAJ;", "orcid": ";;0000-0002-8985-7689;", "linkedin": ";;seong-joon-oh-32113479/;", "or_profile": "~Damien_Teney1;~LIN_Yong1;~Seong_Joon_Oh1;~Ehsan_Abbasnejad3", "aff": "Idiap Research Institute;;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;", "aff_domain": "idiap.ch;;uni-tuebingen.de;", "position": "Researcher;;Associate Professor;", "bibtex": "@inproceedings{\nteney2023id,\ntitle={{ID} and {OOD} Performance Are Sometimes Inversely Correlated on Real-world Datasets},\nauthor={Damien Teney and LIN Yong and Seong Joon Oh and Ehsan Abbasnejad},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HZQZli6amV}\n}", "github": "", "project": "", "reviewers": "p2Jz;yjiA;Q2JA;3Cei;zdij", "pdf_size": 5045591, "rating": "5;6;6;7;8", "confidence": "4;4;3;4;4", "soundness": "3;3;3;4;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;4;4", "wc_summary": "78;58;35;54;156", "wc_strengths": "258;239;30;43;205", "wc_weaknesses": "174;170;41;158;162", "wc_questions": "115;14;91;12;162", "wc_limitations": "5;22;1;7;6", "wc_review": "630;503;198;274;691", "wc_reply_reviewers": "253;65;186;234;112", "wc_reply_authors": "64;0;0;113;46", "reply_reviewers": "1;1;1;2;1", "reply_authors": "2;1;1;2;2", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 76.2, 42.17297712991105 ], "wc_strengths_avg": [ 155.0, 98.31988608618299 ], "wc_weaknesses_avg": [ 141.0, 50.3189825016365 ], "wc_questions_avg": [ 78.8, 58.382874201258716 ], "wc_limitations_avg": [ 8.2, 7.194442299441979 ], "wc_review_avg": [ 459.2, 193.57003900397396 ], "wc_reply_reviewers_avg": [ 170.0, 71.596089278675 ], "wc_reply_authors_avg": [ 44.6, 42.50929310162662 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.1961161351381841, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17899019854638596593&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "idiap.ch;;uni-tuebingen.de;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Idiap Research Institute;Eberhard Karls University of T\u00fcbingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.idiap.ch;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Idiap;Uni T\u00fcbingen", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;1", "aff_country_unique": "Switzerland;Germany" }, { "title": "RealTime QA: What's the Answer Right Now?", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73639", "id": "HfKOIPCvsv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9941624ef7f867a502732b5154d30cb7-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=HfKOIPCvsv", "openreview": "https://openreview.net/forum?id=HfKOIPCvsv", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73639", "video": "https://nips.cc/virtual/2023/poster/73639", "author_site": "Jungo Kasai, Keisuke Sakaguchi, yoichi takahashi, Ronan Le Bras, Akari Asai, Xinyan Yu, Dragomir Radev, Noah Smith, Yejin Choi, Kentaro Inui", "tldr": "", "abstract": "We introduce RealTime QA, a dynamic question answering (QA) platform that announces questions and evaluates systems on a regular basis (weekly in this version). RealTime QA inquires about the current world, and QA systems need to answer questions about novel events or information. It therefore challenges static, conventional assumptions in open-domain QA datasets and pursues instantaneous applications. We build strong baseline models upon large pretrained language models, including GPT-3 and T5. Our benchmark is an ongoing effort, and this paper presents real-time evaluation results over the past year. Our experimental results show that GPT-3 can often properly update its generation results, based on newly-retrieved documents, highlighting the importance of up-to-date information retrieval. Nonetheless, we find that GPT-3 tends to return outdated answers when retrieved documents do not provide sufficient information to find an answer. This suggests an important avenue for future research: can an open-domain QA system identify such unanswerable cases and communicate with the user or even the retrieval module to modify the retrieval results? We hope that RealTime QA will spur progress in instantaneous applications of question answering and beyond.", "keywords": "question answering;time sensitivity;information retrieval;large language models", "primary_area": "", "supplementary_material": "/attachment/049adf066c5052cf21dc9e36c6af624aaabb0f0f.pdf", "author": "Jungo Kasai;Keisuke Sakaguchi;yoichi takahashi;Ronan Le Bras;Akari Asai;Xinyan Velocity Yu;Dragomir Radev;Noah A. Smith;Yejin Choi;Kentaro Inui", "authorids": "~Jungo_Kasai1;~Keisuke_Sakaguchi2;~yoichi_takahashi1;~Ronan_Le_Bras1;~Akari_Asai2;~Xinyan_Velocity_Yu1;~Dragomir_Radev2;~Noah_A._Smith2;~Yejin_Choi1;~Kentaro_Inui1", "gender": "M;;;M;F;F;M;;M;F", "homepage": "https://homes.cs.washington.edu/~jkasai/;https://keisuke-sakaguchi.github.io/;https://nouai.blog.fc2.com/;https://rlebras.github.io/index.html;https://akariasai.github.io/;https://yejinc.github.io/;http://www.cl.ecei.tohoku.ac.jp/~inui/;http://www.cs.yale.edu/~radev;https://homes.cs.washington.edu/~nasmith/;https://velocitycavalry.github.io", "dblp": "205/9020;127/0185.html;;;;89/579-1;90/3315;r/DragomirRRadev;90/5204.html;165/9117-1", "google_scholar": "nHCLoIwAAAAJ;6CRBF-MAAAAJ;;8dXLDSsAAAAJ;gqB4u_wAAAAJ;vhP-tlcAAAAJ;https://scholar.google.co.jp/citations?user=38_o3-kAAAAJ;vIqWvgwAAAAJ;https://scholar.google.com/citations?hl=en;PoZv5KkAAAAJ", "orcid": ";;;;;;0000-0001-6510-604X;0000-0002-0213-7487;0000-0002-2310-6380;", "linkedin": ";;;;;;kentaro-inui-52401a31/;dragomir-radev/;;", "or_profile": "~Jungo_Kasai1;~Keisuke_Sakaguchi2;~yoichi_takahashi1;~Ronan_Le_Bras1;~Akari_Asai2;~Yejin_Choi1;~Kentaro_Inui1;~Dragomir_Radkov_Radev1;~Noah_Smith1;~Xinyan_Yu2", "aff": "Paul G. Allen School of Computer Science & Engineering, University of Washington;Tohoku University;Tohoku University;Allen Institute for Artificial Intelligence;Paul G. Allen School of Computer Science & Engineering, University of Washington;Department of Computer Science, University of Washington;Tohoku University;Yale University;Allen Institute for Artificial Intelligence;Department of Computer Science, University of Washington", "aff_domain": "cs.washington.edu;tohoku.ac.jp;tohoku.ac.jp;allenai.org;cs.washington.edu;cs.washington.edu;tohoku.ac.jp;yale.edu;allenai.org;cs.washington.edu", "position": "PhD student;Associate Professor;Researcher;Researcher;PhD student;Full Professor;Full Professor;Full Professor;Senior Director of NLP Research;MS student", "bibtex": "@inproceedings{\nkasai2023realtime,\ntitle={RealTime {QA}: What's the Answer Right Now?},\nauthor={Jungo Kasai and Keisuke Sakaguchi and yoichi takahashi and Ronan Le Bras and Akari Asai and Xinyan Velocity Yu and Dragomir Radev and Noah A. Smith and Yejin Choi and Kentaro Inui},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=HfKOIPCvsv}\n}", "github": "", "project": "", "reviewers": "4qFr;SxTx;bnpu;nqj1;CU6H", "pdf_size": 2340348, "rating": "5;5;7;7;9", "confidence": "4;4;4;4;4", "wc_summary_and_contributions": "78;72;103;113;193", "wc_strengths": "66;36;39;119;61", "wc_improvement": "88;52;127;174;223", "wc_limitations": "75;12;9;15;25", "wc_correctness": "20;1;14;35;18", "wc_clarity": "10;1;6;11;39", "wc_relation_to_prior_work": "52;1;72;24;35", "wc_documentation": "27;1;4;18;24", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "417;177;375;510;619", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "344;225;235;225;490", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 1.4966629547095764 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 111.8, 43.35619909540041 ], "wc_strengths_avg": [ 64.2, 29.822139426942528 ], "wc_improvement_avg": [ 132.8, 60.66761904014365 ], "wc_limitations_avg": [ 27.2, 24.498163196452097 ], "wc_correctness_avg": [ 17.6, 10.9288608738514 ], "wc_clarity_avg": [ 13.4, 13.275541420220874 ], "wc_relation_to_prior_work_avg": [ 36.8, 24.16112580158466 ], "wc_documentation_avg": [ 14.8, 10.49571341072154 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 419.6, 147.5013220279737 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 303.8, 103.3816231251957 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 136, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4643234751473764591&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 9, "email": "cs.washington.edu;tohoku.ac.jp;tohoku.ac.jp;allenai.org;cs.washington.edu;cs.washington.edu;tohoku.ac.jp;yale.edu;allenai.org;cs.washington.edu", "author_num": 10, "aff_unique_index": "0;1;1;2;0;0;1;3;2;0", "aff_unique_norm": "University of Washington;Tohoku University;Allen Institute for Artificial Intelligence;Yale University", "aff_unique_dep": "Paul G. Allen School of Computer Science & Engineering;;;", "aff_unique_url": "https://www.washington.edu;https://www.tohoku.ac.jp;https://allenai.org;https://www.yale.edu", "aff_unique_abbr": "UW;Tohoku U;AI2;Yale", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;1;1;0;0;0;1;0;0;0", "aff_country_unique": "United States;Japan" }, { "title": "Diffusion-Based Probabilistic Uncertainty Estimation for Active Domain Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72155", "id": "HffQOS3mk8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/374050dc3f211267bd6bf0ea24eae184-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HffQOS3mk8", "openreview": "https://openreview.net/forum?id=HffQOS3mk8", "poster": "/media/PosterPDFs/NeurIPS%202023/72155.png?t=1698223652.166943", "slides": "https://nips.cc/virtual/2023/poster/72155", "video": "https://nips.cc/virtual/2023/poster/72155", "author_site": "Zhekai Du, Jingjing Li", "tldr": "", "abstract": "Active Domain Adaptation (ADA) has emerged as an attractive technique for assisting domain adaptation by actively annotating a small subset of target samples. Most ADA methods focus on measuring the target representativeness beyond traditional active learning criteria to handle the domain shift problem, while leaving the uncertainty estimation to be performed by an uncalibrated deterministic model. In this work, we introduce a probabilistic framework that captures both data-level and prediction-level uncertainties beyond a point estimate. Specifically, we use variational inference to approximate the joint posterior distribution of latent representation and model prediction. The variational objective of labeled data can be formulated by a variational autoencoder and a latent diffusion classifier, and the objective of unlabeled data can be implemented in a knowledge distillation framework. We utilize adversarial learning to ensure an invariant latent space. The resulting diffusion classifier enables efficient sampling of all possible predictions for each individual to recover the predictive distribution. We then leverage a t-test-based criterion upon the sampling and select informative unlabeled target samples based on the p-value, which encodes both prediction variability and cross-category ambiguity. Experiments on both ADA and Source-Free ADA settings show that our method provides more calibrated predictions than previous ADA methods and achieves favorable performance on three domain adaptation datasets.", "keywords": "diffusion-based models;active learning;domain adaptation;source-free domain adaptation;uncertainty estimation", "primary_area": "", "supplementary_material": "/attachment/681eb174791f1ba724168aa12ed557c9c7ed9047.zip", "author": "Zhekai Du;Jingjing Li", "authorids": "~Zhekai_Du1;~Jingjing_Li1", "gender": "M;", "homepage": ";https://lijin118.github.io/", "dblp": "259/2953;65/4699-1", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.ca/citations?view_op=list_works", "orcid": ";", "linkedin": ";", "or_profile": "~Zhekai_Du1;~Jingjing_Li1", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China", "aff_domain": "uestc.edu.cn;uestc.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ndu2023diffusionbased,\ntitle={Diffusion-Based Probabilistic Uncertainty Estimation for Active Domain Adaptation},\nauthor={Zhekai Du and Jingjing Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HffQOS3mk8}\n}", "github": "", "project": "", "reviewers": "1fQe;3siL;fVMQ;Grmw;rDAp", "pdf_size": 3221960, "rating": "5;5;6;7;8", "confidence": "4;3;3;4;4", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "71;59;179;61;66", "wc_strengths": "48;61;44;155;157", "wc_weaknesses": "133;59;144;116;127", "wc_questions": "115;233;140;59;95", "wc_limitations": "15;22;13;12;21", "wc_review": "382;434;520;403;466", "wc_reply_reviewers": "48;38;32;0;38", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.2, 46.08861030666905 ], "wc_strengths_avg": [ 93.0, 51.749396131742444 ], "wc_weaknesses_avg": [ 115.8, 29.808723555362114 ], "wc_questions_avg": [ 128.4, 58.6296853138408 ], "wc_limitations_avg": [ 16.6, 4.127953488110059 ], "wc_review_avg": [ 441.0, 48.662100242385755 ], "wc_reply_reviewers_avg": [ 31.2, 16.424372134118247 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4900980294098034, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17213170279606785515&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uestc.edu.cn;uestc.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Electronic Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "https://www.uestc.edu.cn", "aff_unique_abbr": "UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Benchmarking Large Language Models on CMExam - A comprehensive Chinese Medical Exam Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73638", "id": "HhcQ0zeqZp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a48ad12d588c597f4725a8b84af647b5-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=HhcQ0zeqZp", "openreview": "https://openreview.net/forum?id=HhcQ0zeqZp", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73638", "video": "https://nips.cc/virtual/2023/poster/73638", "author_site": "Junling Liu, Peilin Zhou, Yining Hua, Dading Chong, Zhongyu Tian, Andrew Liu, Helin Wang, Chenyu You, Zhenhua Guo, LEI ZHU, Michael Lingzhi Li", "tldr": "", "abstract": "Recent advancements in large language models (LLMs) have transformed the field of question answering (QA). However, evaluating LLMs in the medical field is challenging due to the lack of standardized and comprehensive datasets. To address this gap, we introduce CMExam, sourced from the Chinese National Medical Licensing Examination. CMExam consists of 60K+ multiple-choice questions for standardized and objective evaluations, as well as solution explanations for model reasoning evaluation in an open-ended manner. For in-depth analyses of LLMs, we invited medical professionals to label five additional question-wise annotations, including disease groups, clinical departments, medical disciplines, areas of competency, and question difficulty levels. Alongside the dataset, we further conducted thorough experiments with representative LLMs and QA algorithms on CMExam. The results show that GPT-4 had the best accuracy of 61.6% and a weighted F1 score of 0.617. These results highlight a great disparity when compared to human accuracy, which stood at 71.6%. For explanation tasks, while LLMs could generate relevant reasoning and demonstrate improved performance after finetuning, they fall short of a desired standard, indicating ample room for improvement. To the best of our knowledge, CMExam is the first Chinese medical exam dataset to provide comprehensive medical annotations. The experiments and findings of LLM evaluation also provide valuable insights into the challenges and potential solutions in developing Chinese medical QA systems and LLM evaluation pipelines.", "keywords": "large language model;chinese medical licensing exam;multi-choice;question answering", "primary_area": "", "supplementary_material": "", "author": "Junling Liu;Peilin Zhou;Yining Hua;Dading Chong;Zhongyu Tian;Andrew Liu;Helin Wang;Chenyu You;Zhenhua Guo;Zhu Lei;Michael Lingzhi Li", "authorids": "~Junling_Liu1;~Peilin_Zhou1;~Yining_Hua1;~Dading_Chong1;~Zhongyu_Tian1;~Andrew_Liu5;~Helin_Wang1;~Chenyu_You1;~Zhenhua_Guo3;~Zhu_Lei1;~Michael_Lingzhi_Li1", "gender": "M;M;Not Specified;M;M;;M;M;;M;", "homepage": ";https://palin2018.github.io;https://ningkko.wordpress.com/about-me/;;;https://andrewliu.com;;https://chenyuyou.me/;;https://scholar.google.com/citations?hl=zh-CN&tzom=-480&user=E2ARG28AAAAJ;", "dblp": "16/870.html;164/9272;;232/0256;;;;191/9432;;;", "google_scholar": "https://scholar.google.com.hk/citations?user=rXS0vhsAAAAJ;3dx8O1AAAAAJ;1uaV0JEAAAAJ;tPSb8YoAAAAJ;;;I_V0zBMAAAAJ;hy_wB7cAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";;;;0000-0002-6538-3749;;;0000-0001-8365-7822;;;", "linkedin": "https://www.linkedin.cn/incareer/in/ACoAADAu538BuKRWcX4dZ9EESgLTtH6p9BaroRc;;;;;;;chenyu-you-b07475a4/;;%E7%A3%8A-%E6%9C%B1-231b6192/;", "or_profile": "~Junling_Liu1;~Peilin_Zhou1;~Yining_Hua1;~Dading_Chong1;~Zhongyu_Tian1;~Andrew_Liu5;~Helin_Wang1;~Chenyu_You1;~Zhenhua_Guo3;~Zhu_Lei1;~Michael_Lingzhi_Li1", "aff": "Alibaba Group;Hong Kong University of Science and Technology (Guangzhou);Harvard University;Zoom;;Amazon;Johns Hopkins University;Yale University;;Alibaba Group;", "aff_domain": "alibaba-inc.com;hkust-gz.edu.cn;harvard.edu;zoom.com;;amazon.com;jh.edu;yale.edu;;alibaba-inc.com;", "position": "Researcher;PhD student;MS student;Undergrad student;;Intern;PhD student;PhD student;;Researcher;", "bibtex": "@inproceedings{\nliu2023benchmarking,\ntitle={Benchmarking Large Language Models on {CME}xam - A comprehensive Chinese Medical Exam Dataset},\nauthor={Junling Liu and Peilin Zhou and Yining Hua and Dading Chong and Zhongyu Tian and Andrew Liu and Helin Wang and Chenyu You and Zhenhua Guo and Zhu Lei and Michael Lingzhi Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=HhcQ0zeqZp}\n}", "github": "", "project": "", "reviewers": "989i;Vnjt;V544", "pdf_size": 2467444, "rating": "5;7;8", "confidence": "4;3;4", "wc_summary_and_contributions": "74;95;75", "wc_strengths": "37;52;121", "wc_improvement": "65;173;86", "wc_limitations": "12;13;19", "wc_correctness": "178;15;19", "wc_clarity": "11;12;9", "wc_relation_to_prior_work": "224;39;34", "wc_documentation": "44;45;51", "wc_additional_feedback": "1;1;1", "wc_review": "646;445;415", "wc_reply_reviewers": "0;568;75", "wc_reply_authors": "1852;2869;481", "reply_reviewers": "0;3;1", "reply_authors": "4;9;2", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 81.33333333333333, 9.672412085697939 ], "wc_strengths_avg": [ 70.0, 36.578682316343766 ], "wc_improvement_avg": [ 108.0, 46.75467891024384 ], "wc_limitations_avg": [ 14.666666666666666, 3.0912061651652345 ], "wc_correctness_avg": [ 70.66666666666667, 75.91369368492677 ], "wc_clarity_avg": [ 10.666666666666666, 1.247219128924647 ], "wc_relation_to_prior_work_avg": [ 99.0, 88.41191473249896 ], "wc_documentation_avg": [ 46.666666666666664, 3.0912061651652345 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 502.0, 102.55730105653132 ], "wc_reply_reviewers_avg": [ 214.33333333333334, 251.94752540073807 ], "wc_reply_authors_avg": [ 1734.0, 978.461036526238 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 5.0, 2.943920288775949 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.18898223650461363, "gs_citation": 97, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10665326654616555377&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "email": "alibaba-inc.com;hkust-gz.edu.cn;harvard.edu;zoom.com;;amazon.com;jh.edu;yale.edu;;alibaba-inc.com;", "author_num": 11, "aff_unique_index": "0;1;2;3;4;5;6;0", "aff_unique_norm": "Alibaba Group;Hong Kong University of Science and Technology;Harvard University;Zoom Video Communications Inc.;Amazon;Johns Hopkins University;Yale University", "aff_unique_dep": ";;;;Amazon.com, Inc.;;", "aff_unique_url": "https://www.alibaba.com;https://www.ust.hk;https://www.harvard.edu;https://zoom.us;https://www.amazon.com;https://www.jhu.edu;https://www.yale.edu", "aff_unique_abbr": "Alibaba;HKUST;Harvard;Zoom;Amazon;JHU;Yale", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;1;1;1;1;0", "aff_country_unique": "China;United States" }, { "title": "Learning in the Presence of Low-dimensional Structure: A Spiked Random Matrix Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72154", "id": "HlIAoCHDWW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/38a1671ab0747b6ffe4d1c6ef117a3a9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HlIAoCHDWW", "openreview": "https://openreview.net/forum?id=HlIAoCHDWW", "poster": "/media/PosterPDFs/NeurIPS%202023/72154.png?t=1701924045.4517636", "slides": "https://nips.cc/virtual/2023/poster/72154", "video": "https://nips.cc/virtual/2023/poster/72154", "author_site": "Jimmy Ba, Murat Erdogdu, Taiji Suzuki, Zhichao Wang, Denny Wu", "tldr": "", "abstract": "We consider the learning of a single-index target function $f_*: \\mathbb{R}^d\\to\\mathbb{R}$ under spiked covariance data: \n$$f_*(\\boldsymbol{x}) = \\textstyle\\sigma_*(\\frac{1}{\\sqrt{1+\\theta}}\\langle\\boldsymbol{x},\\boldsymbol{\\mu}\\rangle), ~~ \\boldsymbol{x}\\overset{\\small\\mathrm{i.i.d.}}{\\sim}\\mathcal{N}(0,\\boldsymbol{I_d} + \\theta\\boldsymbol{\\mu}\\boldsymbol{\\mu}^\\top), ~~ \\theta\\asymp d^{\\beta} \\text{ for } \\beta\\in[0,1), $$ \nwhere the link function $\\sigma_*:\\mathbb{R}\\to\\mathbb{R}$ is a degree-$p$ polynomial with information exponent $k$ (defined as the lowest degree in the Hermite expansion of $\\sigma_*$), and it depends on the projection of input $\\boldsymbol{x}$ onto the spike (signal) direction $\\boldsymbol{\\mu}\\in\\mathbb{R}^d$. \nIn the proportional asymptotic limit where the number of training examples $n$ and the dimensionality $d$ jointly diverge: $n,d\\to\\infty, n/d\\to\\psi\\in(0,\\infty)$, we ask the following question: how large should the spike magnitude $\\theta$ (i.e., the strength of the low-dimensional component) be, in order for $(i)$ kernel methods, $(ii)$ neural networks optimized by gradient descent, to learn $f_*$? \n\nWe show that for kernel ridge regression, $\\beta\\ge 1-\\frac{1}{p}$ is both sufficient and necessary. Whereas for two-layer neural networks trained with gradient descent, $\\beta>1-\\frac{1}{k}$ suffices. Our results demonstrate that both kernel methods and neural networks benefit from low-dimensional structures in the data. Further, since $k\\le p$ by definition, neural networks can adapt to such structures more effectively.", "keywords": "random matrix theory;high-dimensional statistics;neural network;kernel method;feature learning", "primary_area": "", "supplementary_material": "", "author": "Jimmy Ba;Murat A Erdogdu;Taiji Suzuki;Zhichao Wang;Denny Wu", "authorids": "~Jimmy_Ba1;~Murat_A_Erdogdu1;~Taiji_Suzuki1;~Zhichao_Wang3;~Denny_Wu2", "gender": "M;M;M;M;M", "homepage": "http://jimmylba.github.io;http://www.cs.toronto.edu/~erdogdu/;http://ibis.t.u-tokyo.ac.jp/suzuki/;https://mathweb.ucsd.edu/~zhw036/;https://dennywu1.github.io/", "dblp": "https://dblp.org/pers/b/Ba:Jimmy.html;139/1292;08/312;02/10606;", "google_scholar": "https://scholar.google.ca/citations?user=ymzxRhAAAAAJ;Lqc4cdAAAAAJ;x8osrBsAAAAJ;IjXnDdoAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0003-3886-5053;", "linkedin": ";;;;", "or_profile": "~Jimmy_Ba1;~Murat_A_Erdogdu1;~Taiji_Suzuki1;~Zhichao_Wang3;~Denny_Wu2", "aff": "Department of Computer Science, University of Toronto;Vector Institute;The University of Tokyo;University of California, San Diego;University of Toronto", "aff_domain": "cs.toronto.edu;vectorinstitute.ai;tokyo.ac.jp;ucsd.edu;toronto.edu", "position": "Assistant Professor;Faculty;Associate Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nba2023learning,\ntitle={Learning in the Presence of Low-dimensional Structure: A Spiked Random Matrix Perspective},\nauthor={Jimmy Ba and Murat A Erdogdu and Taiji Suzuki and Zhichao Wang and Denny Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HlIAoCHDWW}\n}", "github": "", "project": "", "reviewers": "sXLf;qX6D;b5dX;GruC", "pdf_size": 818507, "rating": "5;6;6;6", "confidence": "4;3;3;3", "soundness": "3;3;3;2", "novelty": "3;2;2;3", "presentation": "3;3;3;3", "wc_summary": "118;192;65;201", "wc_strengths": "67;35;35;78", "wc_weaknesses": "96;239;35;399", "wc_questions": "50;297;24;66", "wc_limitations": "24;1;6;74", "wc_review": "355;764;165;818", "wc_reply_reviewers": "0;175;10;0", "wc_reply_authors": "38;158;0;0", "reply_reviewers": "0;2;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 144.0, 55.83457710057451 ], "wc_strengths_avg": [ 53.75, 19.149086140074676 ], "wc_weaknesses_avg": [ 192.25, 140.46596562868885 ], "wc_questions_avg": [ 109.25, 109.42891528293607 ], "wc_limitations_avg": [ 26.25, 28.86498744153546 ], "wc_review_avg": [ 525.5, 274.53096364526897 ], "wc_reply_reviewers_avg": [ 46.25, 74.44586959664048 ], "wc_reply_authors_avg": [ 49.0, 64.81512169239521 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13995438957340840452&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cs.toronto.edu;vectorinstitute.ai;tokyo.ac.jp;ucsd.edu;toronto.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of Toronto;Vector Institute;University of Tokyo;University of California, San Diego", "aff_unique_dep": "Department of Computer Science;;;", "aff_unique_url": "https://www.utoronto.ca;https://vectorinstitute.ai/;https://www.u-tokyo.ac.jp;https://www.ucsd.edu", "aff_unique_abbr": "U of T;Vector Institute;UTokyo;UCSD", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Toronto;;San Diego", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "Canada;Japan;United States" }, { "title": "DVSOD: RGB-D Video Salient Object Detection", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73637", "id": "Hm1Ih3uLII", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1b88e65f737256d437e56764d39ba06d-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Hm1Ih3uLII", "openreview": "https://openreview.net/forum?id=Hm1Ih3uLII", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73637", "video": "https://nips.cc/virtual/2023/poster/73637", "author_site": "Jingjing Li, Wei Ji, Size Wang, Wenbo Li, Li cheng", "tldr": "", "abstract": "Salient object detection (SOD) aims to identify standout elements in a scene, with recent advancements primarily focused on integrating depth data (RGB-D) or temporal data from videos to enhance SOD in complex scenes. However, the unison of two types of crucial information remains largely underexplored due to data constraints. To bridge this gap, we in this work introduce the DViSal dataset, fueling further research in the emerging field of RGB-D video salient object detection (DVSOD). Our dataset features 237 diverse RGB-D videos alongside comprehensive annotations, including object and instance-level markings, as well as bounding boxes and scribbles. These resources enable a broad scope for potential research directions. We also conduct benchmarking experiments using various SOD models, affirming the efficacy of multimodal video input for salient object detection. Lastly, we highlight some intriguing findings and promising future research avenues. To foster growth in this field, our dataset and benchmark results are publicly accessible at: https://dvsod.github.io/.", "keywords": "Salient Object Detection;RGB-D Video Object Detection;Benchmark Dataset", "primary_area": "", "supplementary_material": "/attachment/3bd13d6f112fd3d4796304c98e1b13f471db0a38.pdf", "author": "Jingjing Li;Wei Ji;Size Wang;Wenbo Li;Li Cheng", "authorids": "~Jingjing_Li5;~Wei_Ji2;~Size_Wang1;~Wenbo_Li5;~Li_Cheng1", "gender": "F;;;M;Not Specified", "homepage": ";;https://wanggsz.github.io;https://www.albany.edu/~wl523363/main.html;https://www.ece.ualberta.ca/~lcheng5/", "dblp": ";;;;13/4938-1", "google_scholar": "1QYsOAUAAAAJ;;;gIAOq9YAAAAJ;https://scholar.google.ca/citations?user=9IRFiEQAAAAJ", "orcid": ";;;;0000-0003-3261-3533", "linkedin": ";;;wenbo-li-2003a829/;", "or_profile": "~Jingjing_Li5;~Wei_Ji2;~Size_Wang1;~Wenbo_Li5;~Li_Cheng1", "aff": "University of Alberta;;Beihang University;Samsung Research America AI Center;University of Alberta", "aff_domain": "ualberta.ca;;buaa.edu.cn;samsung.com;ualberta.ca", "position": "PhD student;;Undergrad student;Senior researcher;Full Professor", "bibtex": "@inproceedings{\nli2023dvsod,\ntitle={{DVSOD}: {RGB}-D Video Salient Object Detection},\nauthor={Jingjing Li and Wei Ji and Size Wang and Wenbo Li and Li Cheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Hm1Ih3uLII}\n}", "github": "", "project": "", "reviewers": "to2U;qgpw;4mrx;Vq9N", "pdf_size": 4737441, "rating": "6;6;7;7", "confidence": "4;4;4;3", "wc_summary_and_contributions": "50;104;71;132", "wc_strengths": "51;68;53;76", "wc_improvement": "55;155;19;259", "wc_limitations": "21;112;81;13", "wc_correctness": "12;97;17;21", "wc_clarity": "10;11;10;4", "wc_relation_to_prior_work": "41;64;15;13", "wc_documentation": "22;26;19;9", "wc_additional_feedback": "1;1;1;1", "wc_review": "263;638;286;528", "wc_reply_reviewers": "0;0;22;0", "wc_reply_authors": "641;913;651;655", "reply_reviewers": "0;0;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 89.25, 31.299960063872287 ], "wc_strengths_avg": [ 62.0, 10.41633332799983 ], "wc_improvement_avg": [ 122.0, 93.4826187052973 ], "wc_limitations_avg": [ 56.75, 41.33022501753408 ], "wc_correctness_avg": [ 36.75, 34.93118234471888 ], "wc_clarity_avg": [ 8.75, 2.7726341266023544 ], "wc_relation_to_prior_work_avg": [ 33.25, 20.90902915010642 ], "wc_documentation_avg": [ 19.0, 6.284902544988268 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 428.75, 159.28492552655445 ], "wc_reply_reviewers_avg": [ 5.5, 9.526279441628825 ], "wc_reply_authors_avg": [ 715.0, 114.42901729893515 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3512252740407501759&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ualberta.ca;;buaa.edu.cn;samsung.com;ualberta.ca", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Alberta;Beihang University;Samsung", "aff_unique_dep": ";;AI Center", "aff_unique_url": "https://www.ualberta.ca;http://www.buaa.edu.cn/;https://www.samsung.com/us/research/", "aff_unique_abbr": "UAlberta;BUAA;SRA", "aff_campus_unique_index": "1", "aff_campus_unique": ";America", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Canada;China;United States" }, { "title": "Ensemble-based Deep Reinforcement Learning for Vehicle Routing Problems under Distribution Shift", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72153", "id": "HoBbZ1vPAh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a68120d2eb2f53f7d9e71547591aef11-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HoBbZ1vPAh", "openreview": "https://openreview.net/forum?id=HoBbZ1vPAh", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72153", "video": "https://nips.cc/virtual/2023/poster/72153", "author_site": "YUAN JIANG, Zhiguang Cao, Yaoxin Wu, Wen Song, Jie Zhang", "tldr": "", "abstract": "While performing favourably on the independent and identically distributed (i.i.d.) instances, most of the existing neural methods for vehicle routing problems (VRPs) struggle to generalize in the presence of a distribution shift. To tackle this issue, we propose an ensemble-based deep reinforcement learning method for VRPs, which learns a group of diverse sub-policies to cope with various instance distributions. In particular, to prevent convergence of the parameters to the same one, we enforce diversity across sub-policies by leveraging Bootstrap with random initialization. Moreover, we also explicitly pursue inequality between sub-policies by exploiting regularization terms during training to further enhance diversity. Experimental results show that our method is able to outperform the state-of-the-art neural baselines on randomly generated instances of various distributions, and also generalizes favourably on the benchmark instances from TSPLib and CVRPLib, which confirmed the effectiveness of the whole method and the respective designs.", "keywords": "Vehicle Routing Problem;Distribution shift;Deep Reinforcement Learning;Ensemble Learning", "primary_area": "", "supplementary_material": "", "author": "Yuan Jiang;Zhiguang Cao;Yaoxin Wu;Wen Song;Jie Zhang", "authorids": "~Yuan_Jiang7;~Zhiguang_Cao1;~Yaoxin_Wu2;~Wen_Song1;~Jie_Zhang9", "gender": "M;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=oFg-ifMAAAAJ&hl=en;https://zhiguangcaosg.github.io/;https://songwenas12.github.io/;https://personal.ntu.edu.sg/zhangj/;https://research.tue.nl/en/persons/yaoxin-wu", "dblp": "02/393-7;178/8621;50/5489;84/6889-2;192/4964", "google_scholar": "oFg-ifMAAAAJ;https://scholar.google.com.sg/citations?user=2R-cOkYAAAAJ;s8Nz-xoAAAAJ;IFV_RdMAAAAJ;0qRnmK8AAAAJ", "orcid": "0000-0003-4629-9901;0000-0002-4499-759X;0000-0001-7624-1861;;0000-0002-3625-6599", "linkedin": ";;;;", "or_profile": "~Yuan_Jiang7;~Zhiguang_Cao1;~Wen_Song1;~Jie_Zhang9;~YAOXIN_WU1", "aff": "Nanyang Technological University;Institute for Infocomm Research, A*STAR;Shandong University;Nanyang Technological University;Eindhoven University of Technology", "aff_domain": "scse.ntu.edu.sg;i2r.a-star.edu.sg;sdu.edu.cn;ntu.edu.sg;tue.nl", "position": "Researcher;Scientist ;Associate Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\njiang2023ensemblebased,\ntitle={Ensemble-based Deep Reinforcement Learning for Vehicle Routing Problems under Distribution Shift},\nauthor={Yuan Jiang and Zhiguang Cao and Yaoxin Wu and Wen Song and Jie Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HoBbZ1vPAh}\n}", "github": "", "project": "", "reviewers": "WvQ5;L9hy;PAi5;eih2", "pdf_size": 644255, "rating": "5;5;5;7", "confidence": "4;4;4;4", "soundness": "3;3;2;4", "novelty": "3;2;2;4", "presentation": "3;3;3;3", "wc_summary": "46;114;170;52", "wc_strengths": "38;158;51;24", "wc_weaknesses": "129;419;378;31", "wc_questions": "62;83;9;151", "wc_limitations": "7;45;8;7", "wc_review": "282;819;616;265", "wc_reply_reviewers": "0;49;61;34", "wc_reply_authors": "0;15;460;11", "reply_reviewers": "0;1;2;1", "reply_authors": "1;2;4;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 95.5, 50.584088407324295 ], "wc_strengths_avg": [ 67.75, 52.97346033628538 ], "wc_weaknesses_avg": [ 239.25, 163.6190315947384 ], "wc_questions_avg": [ 76.25, 50.88897228280406 ], "wc_limitations_avg": [ 16.75, 16.315253599009733 ], "wc_review_avg": [ 495.5, 233.3907667411031 ], "wc_reply_reviewers_avg": [ 36.0, 22.880122377295102 ], "wc_reply_authors_avg": [ 121.5, 195.5102299113783 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14309904541664799666&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "scse.ntu.edu.sg;i2r.a-star.edu.sg;sdu.edu.cn;ntu.edu.sg;tue.nl", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Nanyang Technological University;Institute for Infocomm Research;Shandong University;Eindhoven University of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.i2r.a-star.edu.sg;http://www.sdu.edu.cn;https://www.tue.nl", "aff_unique_abbr": "NTU;I2R;SDU;TU/e", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;2", "aff_country_unique": "Singapore;China;Netherlands" }, { "title": "Construction of Hierarchical Neural Architecture Search Spaces based on Context-free Grammars", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72152", "id": "Hpt1i5j6wh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4869f3f967dfe954439408dd92c50ee1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Hpt1i5j6wh", "openreview": "https://openreview.net/forum?id=Hpt1i5j6wh", "poster": "/media/PosterPDFs/NeurIPS%202023/72152.png?t=1701274223.5962589", "slides": "https://nips.cc/virtual/2023/poster/72152", "video": "https://nips.cc/virtual/2023/poster/72152", "author_site": "Simon Schrodi, Danny Stoll, Binxin Ru, Rhea Sukthanker, Thomas Brox, Frank Hutter", "tldr": "", "abstract": "The discovery of neural architectures from simple building blocks is a long-standing goal of Neural Architecture Search (NAS). Hierarchical search spaces are a promising step towards this goal but lack a unifying search space design framework and typically only search over some limited aspect of architectures. In this work, we introduce a unifying search space design framework based on context-free grammars that can naturally and compactly generate expressive hierarchical search spaces that are 100s of orders of magnitude larger than common spaces from the literature. By enhancing and using their properties, we effectively enable search over the complete architecture and can foster regularity. Further, we propose an efficient hierarchical kernel design for a Bayesian Optimization search strategy to efficiently search over such huge spaces. We demonstrate the versatility of our search space design framework and show that our search strategy can be superior to existing NAS approaches. Code is available at https://github.com/automl/hierarchical_nas_construction.", "keywords": "Neural Architecture Search;Hierarchical Search Space;Context-free Grammars;Bayesian Optimization", "primary_area": "", "supplementary_material": "", "author": "Simon Schrodi;Danny Stoll;Binxin Ru;Rhea Sanjay Sukthanker;Thomas Brox;Frank Hutter", "authorids": "~Simon_Schrodi1;~Danny_Stoll1;~Binxin_Ru1;~Rhea_Sanjay_Sukthanker3;~Thomas_Brox1;~Frank_Hutter1", "gender": "M;M;M;F;M;M", "homepage": "https://lmb.informatik.uni-freiburg.de/people/schrodi/;https://ml.informatik.uni-freiburg.de/profile/stoll/;;https://rheasukthanker.github.io/;https://lmb.informatik.uni-freiburg.de/people/brox/index.en.html;http://ml.informatik.uni-freiburg.de/~hutter/", "dblp": "289/1328;232/3297;;277/5077;97/4586;89/5383", "google_scholar": "https://scholar.google.de/citations?user=yC-y0PEAAAAJ;;https://scholar.google.co.uk/citations?user=4piw-XMAAAAJ;OsamqmMAAAAJ;https://scholar.google.com/citations?hl=de;https://scholar.google.de/citations?user=YUrxwrkAAAAJ", "orcid": "0009-0003-7006-953X;;;;0000-0002-6282-8861;0000-0002-2037-3694", "linkedin": "simon-schrodi-7b55161bb/;Danny-Stoll-AI/;;rhea-sukthanker-006502116/;;frank-hutter-9190b24b/", "or_profile": "~Simon_Schrodi1;~Danny_Stoll1;~Binxin_Ru1;~Rhea_Sanjay_Sukthanker3;~Thomas_Brox1;~Frank_Hutter1", "aff": "University of Freiburg, Albert-Ludwigs-Universit\u00e4t Freiburg;University of Freiburg;;University of Freiburg, Albert-Ludwigs-Universit\u00e4t Freiburg;University of Freiburg;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_domain": "cs.uni-freiburg.de;uni-freiburg.de;;cs.uni-freiburg.de;uni-freiburg.de;uni-freiburg.de", "position": "PhD student;PhD student;;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nschrodi2023construction,\ntitle={Construction of Hierarchical Neural Architecture Search Spaces based on Context-free Grammars},\nauthor={Simon Schrodi and Danny Stoll and Binxin Ru and Rhea Sanjay Sukthanker and Thomas Brox and Frank Hutter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Hpt1i5j6wh}\n}", "github": "", "project": "", "reviewers": "sktw;Q2bD;gqtA;mjYa", "pdf_size": 1387580, "rating": "6;7;7;8", "confidence": "3;3;2;4", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "48;126;22;76", "wc_strengths": "129;45;14;173", "wc_weaknesses": "80;137;37;251", "wc_questions": "2;49;1;81", "wc_limitations": "1;40;29;13", "wc_review": "260;397;103;594", "wc_reply_reviewers": "0;42;0;33", "wc_reply_authors": "0;35;0;30", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.0, 38.54867053479276 ], "wc_strengths_avg": [ 90.25, 63.66072179923819 ], "wc_weaknesses_avg": [ 126.25, 80.28503907951966 ], "wc_questions_avg": [ 33.25, 33.70738049745189 ], "wc_limitations_avg": [ 20.75, 14.905955185763842 ], "wc_review_avg": [ 338.5, 180.50277006184697 ], "wc_reply_reviewers_avg": [ 18.75, 19.01808349965895 ], "wc_reply_authors_avg": [ 16.25, 16.345871038277526 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17489882721230853543&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "cs.uni-freiburg.de;uni-freiburg.de;;cs.uni-freiburg.de;uni-freiburg.de;uni-freiburg.de", "author_num": 6, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of Freiburg;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-freiburg.de;https://www.uni-freiburg.de", "aff_unique_abbr": "UoF;Albert-Ludwigs-Universit\u00e4t", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Freiburg;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Assessor360: Multi-sequence Network for Blind Omnidirectional Image Quality Assessment", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72151", "id": "HrL1oblm1a", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ccf4a7323b9ee3e54bf77f0e876b3f8b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HrL1oblm1a", "openreview": "https://openreview.net/forum?id=HrL1oblm1a", "poster": "/media/PosterPDFs/NeurIPS%202023/72151.png?t=1697189995.4533422", "slides": "https://nips.cc/virtual/2023/poster/72151", "video": "https://nips.cc/virtual/2023/poster/72151", "author_site": "Tianhe Wu, Shuwei Shi, Haoming Cai, Mingdeng Cao, Jing Xiao, Yinqiang Zheng, Yujiu Yang", "tldr": "", "abstract": "Blind Omnidirectional Image Quality Assessment (BOIQA) aims to objectively assess the human perceptual quality of omnidirectional images (ODIs) without relying on pristine-quality image information. It is becoming more significant with the increasing advancement of virtual reality (VR) technology. However, the quality assessment of ODIs is severely hampered by the fact that the existing BOIQA pipeline lacks the modeling of the observer's browsing process. To tackle this issue, we propose a novel multi-sequence network for BOIQA called Assessor360, which is derived from the realistic multi-assessor ODI quality assessment procedure. Specifically, we propose a generalized Recursive Probability Sampling (RPS) method for the BOIQA task, combining content and details information to generate multiple pseudo viewport sequences from a given starting point. Additionally, we design a Multi-scale Feature Aggregation (MFA) module with a Distortion-aware Block (DAB) to fuse distorted and semantic features of each viewport. We also devise Temporal Modeling Module (TMM) to learn the viewport transition in the temporal domain. Extensive experimental results demonstrate that Assessor360 outperforms state-of-the-art methods on multiple OIQA datasets. The code and models are available at https://github.com/TianheWu/Assessor360.", "keywords": "Blind omnidirectional image quality assessment;Multi-sequence network;Viewport sequence", "primary_area": "", "supplementary_material": "/attachment/fe1ad886242f7512d9775604e93a4822f6196a64.pdf", "author": "Tianhe Wu;Shuwei Shi;Haoming Cai;Mingdeng Cao;Jing Xiao;Yinqiang Zheng;Yujiu Yang", "authorids": "~Tianhe_Wu1;~Shuwei_Shi1;~Haoming_Cai2;~Mingdeng_Cao1;~Jing_Xiao3;~Yinqiang_Zheng1;~Yujiu_Yang2", "gender": "M;M;M;M;M;;M", "homepage": "https://tianhewu.github.io/tianhe-page/;https://shuweis.github.io/;https://www.haomingcai.com;https://github.com/ljzycmd;http://www.cs.cmu.edu/~jxiao/;;https://sites.google.com/view/iigroup-thu", "dblp": "318/9291;283/1279;271/0165;290/8525;67/4008-6.html;79/5068;30/3847", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;mePn76IAAAAJ;EcS0L5sAAAAJ;mcBd8KUAAAAJ;https://scholar.google.co.jp/citations?user=5cIOWEAAAAAJ;4gH3sxsAAAAJ", "orcid": ";;;;0000-0001-9615-4749;;0000-0002-6427-1024", "linkedin": ";;;;jing-xiao-8653051/;;", "or_profile": "~Tianhe_Wu1;~Shuwei_Shi1;~Haoming_Cai2;~Mingdeng_Cao1;~Jing_Xiao3;~Yinqiang_Zheng1;~Yujiu_Yang2", "aff": "Tsinghua University;Tsinghua University;University of Maryland, College Park;The University of Tokyo ;Pingan Group;The University of Tokyo;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn;umd.edu;u-tokyo.ac.jp;pingan.com.cn;u-tokyo.ac.jp;tsinghua.edu.cn", "position": "MS student;MS student;PhD student;PhD student;Chief Scientist;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2023assessor,\ntitle={Assessor360: Multi-sequence Network for Blind Omnidirectional Image Quality Assessment},\nauthor={Tianhe Wu and Shuwei Shi and Haoming Cai and Mingdeng Cao and Jing Xiao and Yinqiang Zheng and Yujiu Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HrL1oblm1a}\n}", "github": "", "project": "", "reviewers": "7uSg;rvUB;hPRU;BfkF", "pdf_size": 1042583, "rating": "5;6;6;7", "confidence": "4;3;5;5", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "3;3;2;3", "wc_summary": "82;67;10;63", "wc_strengths": "10;70;24;39", "wc_weaknesses": "184;113;94;42", "wc_questions": "4;135;26;79", "wc_limitations": "1;30;1;2", "wc_review": "281;415;155;225", "wc_reply_reviewers": "7;15;33;24", "wc_reply_authors": "187;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 55.5, 27.207535720825582 ], "wc_strengths_avg": [ 35.75, 22.275266552838374 ], "wc_weaknesses_avg": [ 108.25, 50.87423218093812 ], "wc_questions_avg": [ 61.0, 50.68037095365423 ], "wc_limitations_avg": [ 8.5, 12.419742348374221 ], "wc_review_avg": [ 269.0, 95.38343671728336 ], "wc_reply_reviewers_avg": [ 19.75, 9.730750228014282 ], "wc_reply_authors_avg": [ 46.75, 80.97337525384502 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12999102642974523481&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 9, "email": "mails.tsinghua.edu.cn;tsinghua.edu.cn;umd.edu;u-tokyo.ac.jp;pingan.com.cn;u-tokyo.ac.jp;tsinghua.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;2;3;2;0", "aff_unique_norm": "Tsinghua University;University of Maryland;University of Tokyo;Ping An Group", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www/umd.edu;https://www.u-tokyo.ac.jp;https://www.pingan.com.cn", "aff_unique_abbr": "THU;UMD;UTokyo;Ping An", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;1;2;0;2;0", "aff_country_unique": "China;United States;Japan" }, { "title": "Causal Component Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72150", "id": "HszLRiHyfO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/67089958e98b243d5cc1881ad60418b8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HszLRiHyfO", "openreview": "https://openreview.net/forum?id=HszLRiHyfO", "poster": "/media/PosterPDFs/NeurIPS%202023/72150.png?t=1701854999.8530877", "slides": "https://nips.cc/virtual/2023/poster/72150", "video": "https://nips.cc/virtual/2023/poster/72150", "author_site": "Liang Wendong, Armin Keki\u0107, Julius von K\u00fcgelgen, Simon Buchholz, Michel Besserve, Luigi Gresele, Bernhard Sch\u00f6lkopf", "tldr": "", "abstract": "Independent Component Analysis (ICA) aims to recover independent latent variables from observed mixtures thereof. Causal Representation Learning (CRL) aims instead to infer causally related (thus often statistically _dependent_) latent variables, together with the unknown graph encoding their causal relationships. We introduce an intermediate problem termed _Causal Component Analysis (CauCA)_. CauCA can be viewed as a generalization of ICA, modelling the causal dependence among the latent components, and as a special case of CRL. In contrast to CRL, it presupposes knowledge of the causal graph, focusing solely on learning the unmixing function and the causal mechanisms. Any impossibility results regarding the recovery of the ground truth in CauCA also apply for CRL, while possibility results may serve as a stepping stone for extensions to CRL. We characterize CauCA identifiability from multiple datasets generated through different types of interventions on the latent causal variables. As a corollary, this interventional perspective also leads to new identifiability results for nonlinear ICA\u2014a special case of CauCA with an empty graph\u2014requiring strictly fewer datasets than previous results. We introduce a likelihood-based approach using normalizing flows to estimate both the unmixing function and the causal mechanisms, and demonstrate its effectiveness through extensive synthetic experiments in the CauCA and ICA setting.", "keywords": "Causality;independent component analysis;causal inference;interventions;latent variable models;identifiability", "primary_area": "", "supplementary_material": "", "author": "Wendong Liang;Armin Keki\u0107;Julius von K\u00fcgelgen;Simon Buchholz;Michel Besserve;Luigi Gresele;Bernhard Sch\u00f6lkopf", "authorids": "~Wendong_Liang1;~Armin_Keki\u01071;~Julius_von_K\u00fcgelgen2;~Simon_Buchholz1;~Michel_Besserve1;~Luigi_Gresele1;~Bernhard_Sch\u00f6lkopf1", "gender": "M;;;;M;M;", "homepage": "https://wendongl.github.io/;https://arminkekic.com/;;https://www.is.mpg.de/person/sbuchholz;https://computational-homeostasis.com;https://lgresele.github.io/;", "dblp": "317/6907;330/4165;;207/9068;71/511;211/6114;", "google_scholar": "bGVdtT0AAAAJ;b7GNNQ8AAAAJ;;;https://scholar.google.de/citations?user=Nbq6kI0AAAAJ;JdZ8DWwAAAAJ;", "orcid": "0000-0002-8984-8619;0000-0002-1940-2523;;;;;", "linkedin": "wendong-liang/;arminkekic/;;;;;", "or_profile": "~Wendong_Liang1;~Armin_Keki\u01071;~Julius_von_K\u00fcgelgen2;~Simon_Buchholz1;~Michel_Besserve1;~Luigi_Gresele1;~Bernhard_Sch\u00f6lkopf1", "aff": "Ecole Normale Superieure Paris-Saclay;Max Planck Institute for Intelligent Systems, Max-Planck Institute;;Max-Planck Institute;MPI for Intelligent Systems;Max-Planck-Institute for Intelligent Systems, Max-Planck Institute;", "aff_domain": "ens-paris-saclay.fr;tue.mpg.de;;mpg.de;tuebingen.mpg.de;is.mpg.de;", "position": "MS student;PhD student;;Postdoc;Senior research scientist;PhD student;", "bibtex": "@inproceedings{\nliang2023causal,\ntitle={Causal Component Analysis},\nauthor={Wendong Liang and Armin Keki{\\'c} and Julius von K{\\\"u}gelgen and Simon Buchholz and Michel Besserve and Luigi Gresele and Bernhard Sch{\\\"o}lkopf},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HszLRiHyfO}\n}", "github": "", "project": "", "reviewers": "xr1H;8r7j;nsXw;xM6s", "pdf_size": 8202788, "rating": "4;5;7;7", "confidence": "4;3;2;1", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;4;3;4", "wc_summary": "66;100;115;106", "wc_strengths": "40;125;44;123", "wc_weaknesses": "354;89;148;1", "wc_questions": "57;108;155;1", "wc_limitations": "13;33;30;17", "wc_review": "530;455;492;248", "wc_reply_reviewers": "362;105;31;19", "wc_reply_authors": "1310;141;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 96.75, 18.538810641462412 ], "wc_strengths_avg": [ 83.0, 41.030476477857285 ], "wc_weaknesses_avg": [ 148.0, 129.92882667060454 ], "wc_questions_avg": [ 80.25, 57.39936846342475 ], "wc_limitations_avg": [ 23.25, 8.437268515343103 ], "wc_review_avg": [ 431.25, 109.07193727077556 ], "wc_reply_reviewers_avg": [ 129.25, 138.3552944415211 ], "wc_reply_authors_avg": [ 362.75, 549.916073142075 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9467292624062574, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1664971109598634525&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ens-paris-saclay.fr;tue.mpg.de;;mpg.de;tuebingen.mpg.de;is.mpg.de;", "author_num": 7, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Ecole Normale Superieure Paris-Saclay;Max Planck Institute for Intelligent Systems;Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.;Max-Planck-Institute for Intelligent Systems", "aff_unique_dep": ";Intelligent Systems;;Intelligent Systems", "aff_unique_url": "https://www.ensparis-saclay.fr;https://www.mpi-is.mpg.de;https://www.mpg.de;https://www.mpi-is.mpg.de", "aff_unique_abbr": "ENS Paris-Saclay;MPI-IS;MPG;MPI-IS", "aff_campus_unique_index": "0", "aff_campus_unique": "Paris-Saclay;", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "France;Germany" }, { "title": "Addressing the speed-accuracy simulation trade-off for adaptive spiking neurons", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72149", "id": "Ht79ZTVMsn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b9f253c2758a323f9d2095f91de9a974-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ht79ZTVMsn", "openreview": "https://openreview.net/forum?id=Ht79ZTVMsn", "poster": "/media/PosterPDFs/NeurIPS%202023/72149.png?t=1699961897.4462075", "slides": "https://nips.cc/virtual/2023/poster/72149", "video": "https://nips.cc/virtual/2023/poster/72149", "author_site": "Luke Taylor, Andrew King, Nicol S Harper", "tldr": "", "abstract": "The adaptive leaky integrate-and-fire (ALIF) model is fundamental within computational neuroscience and has been instrumental in studying our brains $\\textit{in silico}$. Due to the sequential nature of simulating these neural models, a commonly faced issue is the speed-accuracy trade-off: either accurately simulate a neuron using a small discretisation time-step (DT), which is slow, or more quickly simulate a neuron using a larger DT and incur a loss in simulation accuracy. Here we provide a solution to this dilemma, by algorithmically reinterpreting the ALIF model, reducing the sequential simulation complexity and permitting a more efficient parallelisation on GPUs. We computationally validate our implementation to obtain over a $50\\times$ training speedup using small DTs on synthetic benchmarks. We also obtained a comparable performance to the standard ALIF implementation on different supervised classification tasks - yet in a fraction of the training time. Lastly, we showcase how our model makes it possible to quickly and accurately fit real electrophysiological recordings of cortical neurons, where very fine sub-millisecond DTs are crucial for capturing exact spike timing.", "keywords": "spiking neural network;surrogate gradient descent;adaptive leaky integrate and fire neuron;speed-accuracy trade-off;electrophysiological recordings", "primary_area": "", "supplementary_material": "/attachment/8b111ae29458fa10ae3e5ca8bdde3ef101345902.pdf", "author": "Luke Taylor;Andrew J King;Nicol Spencer Harper", "authorids": "~Luke_Taylor1;~Andrew_J_King1;~Nicol_Spencer_Harper1", "gender": ";M;", "homepage": ";;https://www.dpag.ox.ac.uk/team/nicol-harper", "dblp": "205/2581;;", "google_scholar": "https://scholar.google.co.za/citations?user=C3DoHSkAAAAJ;;https://scholar.google.co.uk/citations?user=GUALUxwAAAAJ", "orcid": ";0000-0001-5180-7179;", "linkedin": "luke-t-7963078a/;;", "or_profile": "~Luke_Taylor1;~Andrew_J_King1;~Nicol_Spencer_Harper1", "aff": "University of Oxford;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;oxford.ac.uk;ox.ac.uk", "position": "PhD student;Full Professor;Postdoc", "bibtex": "@inproceedings{\ntaylor2023addressing,\ntitle={Addressing the speed-accuracy simulation trade-off for adaptive spiking neurons},\nauthor={Luke Taylor and Andrew J King and Nicol Spencer Harper},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ht79ZTVMsn}\n}", "github": "", "project": "", "reviewers": "Fo7F;9z9h;x3Zh;gA8o", "pdf_size": 891589, "rating": "5;5;7;7", "confidence": "3;4;3;4", "soundness": "2;3;2;4", "novelty": "2;2;3;4", "presentation": "2;4;3;3", "wc_summary": "98;253;51;89", "wc_strengths": "101;54;54;27", "wc_weaknesses": "111;251;113;206", "wc_questions": "51;139;2;0", "wc_limitations": "23;57;5;1", "wc_review": "384;754;225;323", "wc_reply_reviewers": "53;345;21;14", "wc_reply_authors": "0;413;0;0", "reply_reviewers": "1;3;1;1", "reply_authors": "1;4;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 122.75, 77.24109981091672 ], "wc_strengths_avg": [ 59.0, 26.636441203734407 ], "wc_weaknesses_avg": [ 170.25, 60.38780920020199 ], "wc_questions_avg": [ 48.0, 56.36931789546508 ], "wc_limitations_avg": [ 21.5, 22.107690969434145 ], "wc_review_avg": [ 421.5, 200.1730501341277 ], "wc_reply_reviewers_avg": [ 108.25, 137.47613429246547 ], "wc_reply_authors_avg": [ 103.25, 178.83424588148657 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2706822071046173348&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ox.ac.uk;oxford.ac.uk;ox.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Understanding and Mitigating Copying in Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72148", "id": "HtMXRGbUMt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9521b6e7f33e039e7d92e23f5e37bbf4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HtMXRGbUMt", "openreview": "https://openreview.net/forum?id=HtMXRGbUMt", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72148", "video": "https://nips.cc/virtual/2023/poster/72148", "author_site": "Gowthami Somepalli, Vasu Singla, Micah Goldblum, Jonas Geiping, Tom Goldstein", "tldr": "", "abstract": "Images generated by diffusion models like Stable Diffusion are increasingly widespread. Recent works and even lawsuits have shown that these models are prone to replicating their training data, unbeknownst to the user. In this paper, we first analyze this memorization problem in text-to-image diffusion models. While it is widely believed that duplicated images in the training set are responsible for content replication at inference time, we observe that the text conditioning of the model plays a similarly important role. In fact, we see in our experiments that data replication often does not happen for unconditional models, while it is common in the text-conditional case. Motivated by our findings, we then propose several techniques for reducing data replication at both training and inference time by randomizing and augmenting image captions in the training set. Code is available at https://github.com/somepago/DCR.", "keywords": "diffusion models;memorization;data replication;model safety", "primary_area": "", "supplementary_material": "/attachment/7ebde2b6bb4e02a4b9b10fc8e23d78f6bec6d513.pdf", "author": "Gowthami Somepalli;Vasu Singla;Micah Goldblum;Jonas Geiping;Tom Goldstein", "authorids": "~Gowthami_Somepalli1;~Vasu_Singla1;~Micah_Goldblum1;~Jonas_Geiping1;~Tom_Goldstein1", "gender": "F;M;;M;M", "homepage": "https://somepago.github.io/;https://www.cs.umd.edu/people/vsingla;;https://jonasgeiping.github.io/;https://www.cs.umd.edu/~tomg/", "dblp": "286/5012;270/9234;241/7231;190/7229;25/8184", "google_scholar": "T2ezBDsAAAAJ;geHpT2IAAAAJ;pGDKzuUAAAAJ;https://scholar.google.de/citations?user=206vNCEAAAAJ;KmSuVtgAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Gowthami_Somepalli1;~Vasu_Singla1;~Micah_Goldblum1;~Jonas_Geiping1;~Tom_Goldstein1", "aff": "University of Maryland, College Park;Cruise LLC;New York University;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;getcruise.com;nyu.edu;umd.edu;umd.edu", "position": "PhD student;Intern;Postdoc;Postdoc;Full Professor", "bibtex": "@inproceedings{\nsomepalli2023understanding,\ntitle={Understanding and Mitigating Copying in Diffusion Models},\nauthor={Gowthami Somepalli and Vasu Singla and Micah Goldblum and Jonas Geiping and Tom Goldstein},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HtMXRGbUMt}\n}", "github": "", "project": "", "reviewers": "4RBY;rsTQ;MEzn;qP9c;ciAp;R9Bm", "pdf_size": 15678059, "rating": "4;5;6;6;6;7", "confidence": "4;5;1;3;3;5", "soundness": "2;2;2;3;2;3", "novelty": "3;4;3;3;3;4", "presentation": "3;2;3;3;3;4", "wc_summary": "87;207;118;53;46;138", "wc_strengths": "105;178;35;25;48;93", "wc_weaknesses": "559;358;53;80;63;351", "wc_questions": "38;70;48;83;450;22", "wc_limitations": "56;1;36;1;7;84", "wc_review": "845;814;290;242;614;688", "wc_reply_reviewers": "0;319;11;9;63;8", "wc_reply_authors": "0;521;0;0;17;0", "reply_reviewers": "0;2;1;1;1;1", "reply_authors": "1;3;1;1;2;1", "rating_avg": [ 5.666666666666667, 0.9428090415820632 ], "confidence_avg": [ 3.5, 1.3844373104863459 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 108.16666666666667, 54.94062451612852 ], "wc_strengths_avg": [ 80.66666666666667, 52.39168721170267 ], "wc_weaknesses_avg": [ 244.0, 191.40184603777118 ], "wc_questions_avg": [ 118.5, 149.59250204026492 ], "wc_limitations_avg": [ 30.833333333333332, 31.184486884061798 ], "wc_review_avg": [ 582.1666666666666, 236.68433032675023 ], "wc_reply_reviewers_avg": [ 68.33333333333333, 114.00243662113347 ], "wc_reply_authors_avg": [ 89.66666666666667, 192.9979850211453 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 1.5, 0.7637626158259734 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.1276884796138123, "gs_citation": 132, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4302269978158026077&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 8, "email": "umd.edu;getcruise.com;nyu.edu;umd.edu;umd.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Maryland;Cruise LLC;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://www/umd.edu;https://www.cruisellc.com;https://www.nyu.edu", "aff_unique_abbr": "UMD;Cruise;NYU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Chameleon: Plug-and-Play Compositional Reasoning with Large Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72147", "id": "HtqnVSCj3q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/871ed095b734818cfba48db6aeb25a62-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HtqnVSCj3q", "openreview": "https://openreview.net/forum?id=HtqnVSCj3q", "poster": "/media/PosterPDFs/NeurIPS%202023/72147.png?t=1699614582.8695493", "slides": "https://nips.cc/virtual/2023/poster/72147", "video": "https://nips.cc/virtual/2023/poster/72147", "author_site": "Pan Lu, Baolin Peng, Hao Cheng, Michel Galley, Kai-Wei Chang, Ying Nian Wu, Song-Chun Zhu, Jianfeng Gao", "tldr": "", "abstract": "Large language models (LLMs) have achieved remarkable progress in solving various natural language processing tasks due to emergent reasoning abilities. However, LLMs have inherent limitations as they are incapable of accessing up-to-date information (stored on the Web or in task-specific knowledge bases), using external tools, and performing precise mathematical and logical reasoning. In this paper, we present Chameleon, an AI system that mitigates these limitations by augmenting LLMs with plug-and-play modules for compositional reasoning. Chameleon synthesizes programs by composing various tools (e.g., LLMs, off-the-shelf vision models, web search engines, Python functions, and heuristic-based modules) for accomplishing complex reasoning tasks. At the heart of Chameleon is an LLM-based planner that assembles a sequence of tools to execute to generate the final response. We showcase the effectiveness of Chameleon on two multi-modal knowledge-intensive reasoning tasks: ScienceQA and TabMWP. Chameleon, powered by GPT-4, achieves an 86.54% overall accuracy on ScienceQA, improving the best published few-shot result by 11.37%. On TabMWP, GPT-4-powered Chameleon improves the accuracy by 17.0%, lifting the state of the art to 98.78%. Our analysis also shows that the GPT-4-powered planner exhibits more consistent and rational tool selection via inferring potential constraints from instructions, compared to a ChatGPT-powered planner.", "keywords": "large language models;compositional reasoning;tool use;multi-modal reasoning;mathematical reasoning", "primary_area": "", "supplementary_material": "", "author": "Pan Lu;Baolin Peng;Hao Cheng;Michel Galley;Kai-Wei Chang;Ying Nian Wu;Song-Chun Zhu;Jianfeng Gao", "authorids": "~Pan_Lu2;~Baolin_Peng2;~Hao_Cheng4;~Michel_Galley1;~Kai-Wei_Chang1;~Ying_Nian_Wu1;~Song-Chun_Zhu1;~Jianfeng_Gao1", "gender": "M;M;M;M;M;M;M;M", "homepage": ";https://sites.google.com/site/hcheng2site/Home;http://research.microsoft.com/~mgalley;http://kwchang.net;https://zhusongchun.net/;https://www.microsoft.com/en-us/research/people/jfgao/;https://lupantech.github.io/;http://www.stat.ucla.edu/~ywu/", "dblp": "144/2759;09/5158-2;05/3289;18/2428;10/10313;92/5339;;18/568.html", "google_scholar": "u1CNjgwAAAAJ;https://scholar.google.com/citations?hl=en;rs1M7CAAAAAJ;fqDBtzYAAAAJ;https://scholar.google.com.tw/citations?user=Al8dyb4AAAAJ;https://scholar.google.com/citations?hl=en;IyucsdQAAAAJ;7k_1QFIAAAAJ", "orcid": ";0000-0001-7988-3149;0000-0002-3310-1831;0000-0001-5365-0072;;;;", "linkedin": ";;michelgalley;kai-wei-chang-41239040;;;pan-lu-9308909a/;", "or_profile": "~Baolin_Peng2;~Hao_Cheng4;~Michel_Galley1;~Kai-Wei_Chang1;~Song-Chun_Zhu1;~Jianfeng_Gao1;~Pan_Lu1;~Yingnian_Wu1", "aff": "Tencent AI Lab;Microsoft Research;Microsoft;Amazon;Peking University;Microsoft Research;University of California, Los Angeles;UCLA", "aff_domain": "tencent.com;microsoft.com;microsoft.com;amazon.com;pku.edu.cn;microsoft.com;ucla.edu;stat.ucla.edu", "position": "Researcher;Researcher;Researcher;Researcher;Full Professor;Principal Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nlu2023chameleon,\ntitle={Chameleon: Plug-and-Play Compositional Reasoning with Large Language Models},\nauthor={Pan Lu and Baolin Peng and Hao Cheng and Michel Galley and Kai-Wei Chang and Ying Nian Wu and Song-Chun Zhu and Jianfeng Gao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HtqnVSCj3q}\n}", "github": "", "project": "", "reviewers": "uov2;yhWh;GU39;1xpZ", "pdf_size": 2039005, "rating": "4;5;7;7", "confidence": "4;4;4;4", "soundness": "4;3;4;3", "novelty": "3;2;3;3", "presentation": "2;3;4;3", "wc_summary": "122;248;78;43", "wc_strengths": "220;150;108;55", "wc_weaknesses": "320;379;98;236", "wc_questions": "1076;123;7;42", "wc_limitations": "64;1;13;8", "wc_review": "1802;901;304;384", "wc_reply_reviewers": "1077;450;19;242", "wc_reply_authors": "2147;1362;661;908", "reply_reviewers": "2;1;1;2", "reply_authors": "7;4;3;4", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 122.75, 77.54152113545362 ], "wc_strengths_avg": [ 133.25, 60.34639591558057 ], "wc_weaknesses_avg": [ 258.25, 105.55656066772923 ], "wc_questions_avg": [ 312.0, 443.0976190412221 ], "wc_limitations_avg": [ 21.5, 24.904818810824544 ], "wc_review_avg": [ 847.75, 596.6901939030002 ], "wc_reply_reviewers_avg": [ 447.0, 394.37228604454447 ], "wc_reply_authors_avg": [ 1269.5, 565.578685949179 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 4.5, 1.5 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 456, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8452068999722017704&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "tencent.com;microsoft.com;microsoft.com;amazon.com;pku.edu.cn;microsoft.com;ucla.edu;stat.ucla.edu", "author_num": 8, "aff_unique_index": "0;1;1;2;3;1;4;4", "aff_unique_norm": "Tencent;Microsoft;Amazon;Peking University;University of California, Los Angeles", "aff_unique_dep": "Tencent AI Lab;Microsoft Research;Amazon.com, Inc.;;", "aff_unique_url": "https://ai.tencent.com;https://www.microsoft.com/en-us/research;https://www.amazon.com;http://www.pku.edu.cn;https://www.ucla.edu", "aff_unique_abbr": "Tencent AI Lab;MSR;Amazon;Peking U;UCLA", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;1;1;0;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Bullying10K: A Large-Scale Neuromorphic Dataset towards Privacy-Preserving Bullying Recognition", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73636", "id": "HuG4eOFLO9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/05ffe69463062b7f9fb506c8351ffdd7-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=HuG4eOFLO9", "openreview": "https://openreview.net/forum?id=HuG4eOFLO9", "poster": "/media/PosterPDFs/NeurIPS%202023/73636.png?t=1699332393.4148417", "slides": "https://nips.cc/virtual/2023/poster/73636", "video": "https://nips.cc/virtual/2023/poster/73636", "author_site": "Yiting Dong, Yang Li, Dongcheng Zhao, Guobin Shen, Yi Zeng", "tldr": "", "abstract": "The prevalence of violence in daily life poses significant threats to individuals' physical and mental well-being. Using surveillance cameras in public spaces has proven effective in proactively deterring and preventing such incidents. However, concerns regarding privacy invasion have emerged due to their widespread deployment.To address the problem, we leverage Dynamic Vision Sensors (DVS) cameras to detect violent incidents and preserve privacy since it captures pixel brightness variations instead of static imagery. We introduce the Bullying10K dataset, encompassing various actions, complex movements, and occlusions from real-life scenarios. It provides three benchmarks for evaluating different tasks: action recognition, temporal action localization, and pose estimation. With 10,000 event segments, totaling 12 billion events and 255 GB of data, Bullying10K contributes significantly by balancing violence detection and personal privacy persevering. And it also poses a challenge to the neuromorphic dataset. It will serve as a valuable resource for training and developing privacy-protecting video systems. The Bullying10K opens new possibilities for innovative approaches in these domains.", "keywords": "violence detection;privacy protection;event-based dataset;Dynamic Vision Sensors (DVS);bullying", "primary_area": "", "supplementary_material": "/attachment/4eac646a35ead65e2065995d9483eeca3a28fdb7.pdf", "author": "Yiting Dong;Yang Li;Dongcheng Zhao;Guobin Shen;Yi Zeng", "authorids": "~Yiting_Dong1;~Yang_Li44;~Dongcheng_Zhao2;~Guobin_Shen1;~Yi_Zeng1", "gender": "M;M;M;;M", "homepage": ";;;;https://bii.ia.ac.cn/~yizeng", "dblp": "176/1090;37/4190-141;177/8581;;75/148-1", "google_scholar": ";3QpRLTgAAAAJ;2E9Drq8AAAAJ;;", "orcid": "0000-0002-8405-3139;0000-0003-0161-9801;;;0000-0002-9595-9091", "linkedin": ";;;;", "or_profile": "~Yiting_Dong1;~Yang_Li44;~Dongcheng_Zhao2;~Guobin_Shen1;~Yi_Zeng1", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn;;ia.ac.cn", "position": "PhD student;PhD student;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\ndong2023bullyingk,\ntitle={Bullying10K: A Large-Scale Neuromorphic Dataset towards Privacy-Preserving Bullying Recognition},\nauthor={Yiting Dong and Yang Li and Dongcheng Zhao and Guobin Shen and Yi Zeng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=HuG4eOFLO9}\n}", "github": "", "project": "", "reviewers": "MGZy;4C71;dVaU;W6Gq", "pdf_size": 11484202, "rating": "5;5;6;7", "confidence": "4;3;4;4", "wc_summary_and_contributions": "36;95;142;34", "wc_strengths": "51;71;13;103", "wc_improvement": "28;64;315;137", "wc_limitations": "234;36;300;14", "wc_correctness": "7;74;17;1", "wc_clarity": "1;57;12;6", "wc_relation_to_prior_work": "13;1;5;4", "wc_documentation": "1;40;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "372;439;806;301", "wc_reply_reviewers": "0;0;45;10", "wc_reply_authors": "1070;1615;1157;660", "reply_reviewers": "0;0;2;1", "reply_authors": "3;4;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 76.75, 44.94093345715017 ], "wc_strengths_avg": [ 59.5, 32.630507198019465 ], "wc_improvement_avg": [ 136.0, 110.55541596864443 ], "wc_limitations_avg": [ 146.0, 123.47469376354006 ], "wc_correctness_avg": [ 24.75, 29.0032325784558 ], "wc_clarity_avg": [ 19.0, 22.282279955157193 ], "wc_relation_to_prior_work_avg": [ 5.75, 4.437059837324712 ], "wc_documentation_avg": [ 10.75, 16.887495373796554 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 479.5, 194.7183863943002 ], "wc_reply_reviewers_avg": [ 13.75, 18.498310733685926 ], "wc_reply_authors_avg": [ 1125.5, 339.25396092013426 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=465740969246357473&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ia.ac.cn;ia.ac.cn;ia.ac.cn;;ia.ac.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "HvWfTrjwWa", "title": "Generalized Balancing Weights via Deep Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Estimating causal effects from observational data is a central problem in many domains.\nA general approach is to balance covariates with weights such that the distribution of the data mimics randomization.\nWe present generalized balancing weights, *Neural Balancing Weights* (NBW), to estimate the causal effects of an arbitrary mixture of discrete and continuous interventions.\nThe weights were obtained through direct estimation of the density ratio between the source and balanced distributions by optimizing the variational representation of $f$-divergence.\nFor this, we selected $\\alpha$-divergence as it presents efficient optimization because it has an estimator whose sample complexity is independent of its ground truth value and unbiased mini-batch gradients; moreover, it is advantageous for the vanishing-gradient problem.\nIn addition, we provide the following two methods for estimating the balancing weights: improving the generalization performance of the balancing weights and checking the balance of the distribution changed by the weights.\nFinally, we discuss the sample size requirements for the weights as a general problem of a curse of dimensionality when balancing multidimensional data.\nOur study provides a basic approach for estimating the balancing weights of multidimensional data using variational $f$-divergences.", "keywords": "causal inference;density ratio estimation", "primary_area": "", "supplementary_material": "/attachment/15ff24dfa51a6127261b17f4c3654aabc290c390.zip", "author": "Yoshiaki Kitazawa", "authorids": "~Yoshiaki_Kitazawa1", "gender": "M", "homepage": "", "dblp": "", "google_scholar": "-htCt0UAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Yoshiaki_Kitazawa1", "aff": "NTT DATA Mathematical Systems Inc. ", "aff_domain": "msi.co.jp", "position": "Researcher", "bibtex": "@misc{\nkitazawa2023generalized,\ntitle={Generalized Balancing Weights via Deep Neural Networks},\nauthor={Yoshiaki Kitazawa},\nyear={2023},\nurl={https://openreview.net/forum?id=HvWfTrjwWa}\n}", "github": "", "project": "", "reviewers": "bV7F;LPz4;L7FM;vQ5H", "site": "https://openreview.net/forum?id=HvWfTrjwWa", "pdf_size": 598521, "rating": "4;4;4;4", "confidence": "2;4;2;1", "soundness": "2;3;4;2", "novelty": "3;2;4;2", "presentation": "1;3;2;2", "wc_summary": "83;81;119;89", "wc_strengths": "41;56;77;18", "wc_weaknesses": "183;164;132;219", "wc_questions": "310;302;76;3", "wc_limitations": "7;87;6;3", "wc_review": "624;690;410;332", "wc_reply_reviewers": "807;204;133;0", "wc_reply_authors": "825;235;27;0", "reply_reviewers": "7;1;1;0", "reply_authors": "6;2;2;1", "rating_avg": [ 4.0, 0.0 ], "confidence_avg": [ 2.25, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 93.0, 15.297058540778355 ], "wc_strengths_avg": [ 48.0, 21.529050141610984 ], "wc_weaknesses_avg": [ 174.5, 31.5 ], "wc_questions_avg": [ 172.75, 135.7559851350945 ], "wc_limitations_avg": [ 25.75, 35.39332564199075 ], "wc_review_avg": [ 514.0, 147.49237268414933 ], "wc_reply_reviewers_avg": [ 286.0, 309.58439883172406 ], "wc_reply_authors_avg": [ 271.75, 332.1094510850301 ], "reply_reviewers_avg": [ 2.25, 2.7726341266023544 ], "reply_authors_avg": [ 2.75, 1.920286436967152 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1813846077417198260&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "NTT DATA Mathematical Systems Inc.", "aff_unique_dep": "", "aff_unique_url": "https://www.ntt-data.com/", "aff_unique_abbr": "NTT DATA MS", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "title": "Towards a Comprehensive Benchmark for High-Level Synthesis Targeted to FPGAs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73635", "id": "HvcLKgtbco", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8dfc3a2720a4112243a285b98e0d4415-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=HvcLKgtbco", "openreview": "https://openreview.net/forum?id=HvcLKgtbco", "poster": "/media/PosterPDFs/NeurIPS%202023/73635.png?t=1701493043.4241729", "slides": "https://nips.cc/virtual/2023/poster/73635", "video": "https://nips.cc/virtual/2023/poster/73635", "author_site": "Yunsheng Bai, Atefeh Sohrabizadeh, Zongyue Qin, Ziniu Hu, Yizhou Sun, Jason Cong", "tldr": "", "abstract": "High-level synthesis (HLS) aims to raise the abstraction layer in hardware design, enabling the design of domain-specific accelerators (DSAs) like field-programmable gate arrays (FPGAs) using C/C++ instead of hardware description languages (HDLs). Compiler directives in the form of pragmas play a crucial role in modifying the microarchitecture within the HLS framework. However, the space of possible microarchitectures grows exponentially with the number of pragmas. Moreover, the evaluation of each candidate design using the HLS tool consumes significant time, ranging from minutes to hours, leading to a time-consuming optimization process. To accelerate this process, machine learning models have been used to predict design quality in milliseconds. However, existing open-source datasets for training such models are limited in terms of design complexity and available optimizations. In this paper, we present HLSyn, the first benchmark that addresses these limitations. It contains more complex programs with a wider range of optimization pragmas, making it a comprehensive dataset for training and evaluating design quality prediction models. The HLSyn benchmark consists of 42 unique programs/kernels, resulting in over 42,000 labeled designs. We conduct an extensive comparison of state-of-the-art baselines to assess their effectiveness in predicting design quality. As an ongoing project, we anticipate expanding the HLSyn benchmark in terms of both quantity and variety of programs to further support the development of this field.", "keywords": "integrated circuit design;FPGAs;hardware design automation;hardware design optimization;representation learning;source code;assembly code;multi-modality;graph neural network;transformer;pretraining;high level synthesis;benchmark", "primary_area": "", "supplementary_material": "/attachment/c522a3b37cf1414ebf6cd44dec9e597df55d9c32.zip", "author": "Yunsheng Bai;Atefeh Sohrabizadeh;Zongyue Qin;Ziniu Hu;Yizhou Sun;Jason Cong", "authorids": "~Yunsheng_Bai1;~Atefeh_Sohrabizadeh1;~Zongyue_Qin1;~Ziniu_Hu1;~Yizhou_Sun1;~Jason_Cong1", "gender": "M;;M;M;F;M", "homepage": "https://yunshengb.com/;https://web.cs.ucla.edu/~atefehsz/;https://web.cs.ucla.edu/~qinzongyue/;http://acbull.github.io;http://web.cs.ucla.edu/~yzsun/;https://cadlab.cs.ucla.edu/~cong/", "dblp": "225/5377.html;259/3786;253/0403;180/5436;37/3868;c/JasonCong", "google_scholar": ";;8QDAVAgAAAAJ;x6ct1CsAAAAJ;https://scholar.google.com.tw/citations?user=TQgOjK0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;0000-0003-2887-6963", "linkedin": ";;;;;", "or_profile": "~Yunsheng_Bai1;~Atefeh_Sohrabizadeh1;~Zongyue_Qin1;~Ziniu_Hu1;~Yizhou_Sun1;~Jason_Cong1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;ucla.edu;ucla.edu;ucla.edu;ucla.edu;cs.ucla.edu", "position": "PhD student;PhD student;PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nbai2023towards,\ntitle={Towards a Comprehensive Benchmark for High-Level Synthesis Targeted to {FPGA}s},\nauthor={Yunsheng Bai and Atefeh Sohrabizadeh and Zongyue Qin and Ziniu Hu and Yizhou Sun and Jason Cong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=HvcLKgtbco}\n}", "github": "", "project": "", "reviewers": "mcz1;C7Q3;qZxJ;455o;JA6Z", "pdf_size": 3854489, "rating": "5;6;6;7;7", "confidence": "5;4;3;3;3", "wc_summary_and_contributions": "249;55;36;27;63", "wc_strengths": "38;81;53;63;45", "wc_improvement": "225;169;69;253;37", "wc_limitations": "169;122;13;50;17", "wc_correctness": "11;1;20;9;31", "wc_clarity": "7;11;8;14;24", "wc_relation_to_prior_work": "2;6;11;23;5", "wc_documentation": "24;16;12;7;29", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "726;462;223;447;252", "wc_reply_reviewers": "134;0;0;5;0", "wc_reply_authors": "433;70;54;229;18", "reply_reviewers": "1;0;0;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.8 ], "wc_summary_and_contributions_avg": [ 86.0, 82.51060537894507 ], "wc_strengths_avg": [ 56.0, 15.019986684414869 ], "wc_improvement_avg": [ 150.6, 84.76225575101219 ], "wc_limitations_avg": [ 74.2, 61.440703120976735 ], "wc_correctness_avg": [ 14.4, 10.26839812239475 ], "wc_clarity_avg": [ 12.8, 6.11228271597445 ], "wc_relation_to_prior_work_avg": [ 9.4, 7.391887445030531 ], "wc_documentation_avg": [ 17.6, 7.964923100695951 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 422.0, 180.6333302577351 ], "wc_reply_reviewers_avg": [ 27.8, 53.13529900169943 ], "wc_reply_authors_avg": [ 160.8, 154.1342272177079 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8685990362153793, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17371717371650791628&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "cs.ucla.edu;ucla.edu;ucla.edu;ucla.edu;ucla.edu;cs.ucla.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Synthetic-to-Real Pose Estimation with Geometric Reconstruction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72146", "id": "HvhagNdf5z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a8223b0ad64007423ffb308b0dd92298-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HvhagNdf5z", "openreview": "https://openreview.net/forum?id=HvhagNdf5z", "poster": "/media/PosterPDFs/NeurIPS%202023/72146.png?t=1701871570.35783", "slides": "https://nips.cc/virtual/2023/poster/72146", "video": "https://nips.cc/virtual/2023/poster/72146", "author_site": "Qiuxia Lin, Kerui Gu, Linlin Yang, Angela Yao", "tldr": "", "abstract": "Pose estimation is remarkably successful under supervised learning, but obtaining annotations, especially for new deployments, is costly and time-consuming. This work tackles adapting models trained on synthetic data to real-world target domains with only unlabelled data. A common approach is model fine-tuning with pseudo-labels from the target domain; yet many pseudo-labelling strategies cannot provide sufficient high-quality pose labels. This work proposes a reconstruction-based strategy as a complement to pseudo-labelling for synthetic-to-real domain adaptation. We generate the driving image by geometrically transforming a base image according to the predicted keypoints and enforce a reconstruction loss to refine the predictions. It provides a novel solution to effectively correct confident yet inaccurate keypoint locations through image reconstruction in domain adaptation. Our approach outperforms the previous state-of-the-arts by 8% for PCK on four large-scale hand and human real-world datasets. In particular, we excel on endpoints such as fingertips and head, with 7.2% and 29.9% improvements in PCK.", "keywords": "pose estimation;domain adaptation", "primary_area": "", "supplementary_material": "/attachment/818b41313470bb743c428946da43793ba135584d.pdf", "author": "Qiuxia Lin;Kerui Gu;Linlin Yang;Angela Yao", "authorids": "~Qiuxia_Lin1;~Kerui_Gu1;~Linlin_Yang1;~Angela_Yao1", "gender": "F;M;M;", "homepage": ";https://www.comp.nus.edu.sg/~keruigu/;https://www.mu4yang.com;http://www.angelayao.com", "dblp": "243/1756;315/5511;;64/8484", "google_scholar": ";if-RXSEAAAAJ;https://scholar.google.com.hk/citations?user=gI55gF0AAAAJ;https://scholar.google.ch/citations?user=-LJCZMMAAAAJ", "orcid": ";;0000-0001-6752-0252;", "linkedin": ";;;", "or_profile": "~Qiuxia_Lin1;~Kerui_Gu1;~Linlin_Yang1;~Angela_Yao1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu.sg;nus.edu;nus.edu.sg", "position": "PhD student;PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nlin2023synthetictoreal,\ntitle={Synthetic-to-Real Pose Estimation with Geometric Reconstruction},\nauthor={Qiuxia Lin and Kerui Gu and Linlin Yang and Angela Yao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HvhagNdf5z}\n}", "github": "", "project": "", "reviewers": "mnmU;RVKB;kmmR;QXSS", "pdf_size": 565531, "rating": "5;5;5;5", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "64;85;123;106", "wc_strengths": "59;38;16;143", "wc_weaknesses": "183;87;196;119", "wc_questions": "121;6;9;62", "wc_limitations": "44;8;55;6", "wc_review": "471;224;399;436", "wc_reply_reviewers": "22;15;0;60", "wc_reply_authors": "0;0;59;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 94.5, 22.1641602593015 ], "wc_strengths_avg": [ 64.0, 48.07806152498247 ], "wc_weaknesses_avg": [ 146.25, 44.94093345715017 ], "wc_questions_avg": [ 49.5, 46.90682253148256 ], "wc_limitations_avg": [ 28.25, 21.614520582238228 ], "wc_review_avg": [ 382.5, 94.9855252130555 ], "wc_reply_reviewers_avg": [ 24.25, 22.117583502724706 ], "wc_reply_authors_avg": [ 14.75, 25.54774941164094 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11596267058488085935&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "u.nus.edu;nus.edu.sg;nus.edu;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "HwWkIwzzKF", "title": "Contextual Bandits with Knapsacks beyond Worst Cases via Re-Solving", "track": "main", "status": "Reject", "tldr": "", "abstract": "Contextual Bandits with Knapsacks (CBwK) is a fundamental and essential framework for modeling a dynamic decision-making scenario with resource constraints. \nUnder this framework, an agent selects an action in each round upon observing a request, leading to a reward and resource consumption that are further associated with an unknown external factor. \nThe agent's target is to maximize the total reward under the initial inventory. \nWhile previous research has already established an $\\widetilde{O}(\\sqrt{T})$ worst-case regret for this problem, this work offers two results that go beyond the worst-case perspective, one for worst-case locations, and another for logarithmic regret rates.\nWe start by demonstrating that the unique-optimality and degeneracy of the fluid LP problem, which is both succinct and easily verifiable, is a sufficient condition for the existence of an $\\Omega(\\sqrt{T})$ regret lower bound. \nTo supplement this worst-case location result, we merge the re-solving heuristic with distribution estimation skills and propose an algorithm that achieves an $\\widetilde{O}(1)$ regret as long as the fluid LP has a unique and non-degenerate solution. \nThis condition is mild as it is satisfied for most problem instances. \nFurthermore, we prove our algorithm maintains a near-optimal $\\widetilde{O}(\\sqrt{T})$ regret even in the worst cases, and extend these results to the setting where request and external factor are continuous. \nRegarding information, our regret results are obtained under two feedback models, respectively, where the algorithm accesses the external factor at the end of each round and at the end of a round only when a non-null action is executed. ", "keywords": "Contextual bandits with knapsacks;Re-solving;Regularity", "primary_area": "", "supplementary_material": "/attachment/faadf1d251c6d36fb16835ab0604882a1b749fe6.pdf", "author": "Rui Ai;Zhaohua Chen;Xiaotie Deng;Yuqi Pan;Chang Wang;Mingwei Yang", "authorids": "~Rui_Ai1;~Zhaohua_Chen1;~Xiaotie_Deng1;~Yuqi_Pan1;~Chang_Wang4;~Mingwei_Yang1", "gender": "M;M;M;F;;M", "homepage": "https://air-8.github.io/;https://daleczh.github.io/;https://cfcs.pku.edu.cn/english/people/faculty/xiaotiedeng/index.htm;;;https://mingwei-yang.netlify.app/", "dblp": "184/2621-2;121/7325-1;d/XiaotieDeng;52/4131.html;;193/9236-2", "google_scholar": "IoU4kj8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.tw/citations?user=OBUwP_oAAAAJ;Kg3H9PsAAAAJ;;nVfIRLMAAAAJ", "orcid": ";0000-0002-8895-5236;0000-0002-5282-6467;;;", "linkedin": ";;;;;", "or_profile": "~Rui_Ai1;~Zhaohua_Chen1;~Xiaotie_Deng1;~Yuqi_Pan1;~Chang_Wang4;~Mingwei_Yang1", "aff": "Peking University;Peking University;Peking University;Peking University;;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;;pku.edu.cn", "position": "Undergrad student;PhD student;Full Professor;Undergrad student;;Undergrad student", "bibtex": "@misc{\nai2023contextual,\ntitle={Contextual Bandits with Knapsacks beyond Worst Cases via Re-Solving},\nauthor={Rui Ai and Zhaohua Chen and Xiaotie Deng and Yuqi Pan and Chang Wang and Mingwei Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=HwWkIwzzKF}\n}", "github": "", "project": "", "reviewers": "VG5f;Wp2p;CJMH;uK79", "site": "https://openreview.net/forum?id=HwWkIwzzKF", "pdf_size": 330260, "rating": "4;4;6;6", "confidence": "4;4;4;2", "soundness": "3;3;2;2", "novelty": "2;3;3;2", "presentation": "3;2;3;3", "wc_summary": "49;66;55;56", "wc_strengths": "23;93;77;20", "wc_weaknesses": "212;327;129;142", "wc_questions": "26;37;209;11", "wc_limitations": "1;9;55;1", "wc_review": "311;532;525;230", "wc_reply_reviewers": "96;0;0;30", "wc_reply_authors": "36;0;0;3", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 56.5, 6.103277807866851 ], "wc_strengths_avg": [ 53.25, 32.26743714644843 ], "wc_weaknesses_avg": [ 202.5, 78.50636916836748 ], "wc_questions_avg": [ 70.75, 80.35040447938019 ], "wc_limitations_avg": [ 16.5, 22.46664193866097 ], "wc_review_avg": [ 399.5, 132.16372422113415 ], "wc_reply_reviewers_avg": [ 31.5, 39.20140303611594 ], "wc_reply_authors_avg": [ 9.75, 15.20485119953497 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FR8_Ll7PtgkJ:scholar.google.com/&scioq=Contextual+Bandits+with+Knapsacks+beyond+Worst+Cases+via+Re-Solving&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Future-Dependent Value-Based Off-Policy Evaluation in POMDPs", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72145", "id": "HwhRehMr4a", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3380e8116452e0efbf36f35d95e88c94-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=HwhRehMr4a", "openreview": "https://openreview.net/forum?id=HwhRehMr4a", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72145", "video": "https://nips.cc/virtual/2023/poster/72145", "author_site": "Masatoshi Uehara, Haruka Kiyohara, Andrew Bennett, Victor Chernozhukov, Nan Jiang, Nathan Kallus, Chengchun Shi, Wen Sun", "tldr": "", "abstract": "We study off-policy evaluation (OPE) for partially observable MDPs (POMDPs) with general function approximation. Existing methods such as sequential importance sampling estimators and fitted-Q evaluation suffer from the curse of horizon in POMDPs. To circumvent this problem, we develop a novel model-free OPE method by introducing future-dependent value functions that take future proxies as inputs. Future-dependent value functions play similar roles as classical value functions in fully-observable MDPs. We derive a new off-policy Bellman equation for future-dependent value functions as conditional moment equations that use history proxies as instrumental variables. We further propose a minimax learning method to learn future-dependent value functions using the new Bellman equation. We obtain the PAC result, which implies our OPE estimator is close to the true policy value as long as futures and histories contain sufficient information about latent states, and the Bellman completeness. Our code is available at https://github.com/aiueola/neurips2023-future-dependent-ope", "keywords": "Reinforcement learning theory;POMDP;PAC RL;Off-policy evaluation;Offilne reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/fa610c0bebf7174ea70e7f28fd83f65b9d349ae1.zip", "author": "Masatoshi Uehara;Haruka Kiyohara;Andrew Bennett;Victor Chernozhukov;Nan Jiang;Nathan Kallus;Chengchun Shi;Wen Sun", "authorids": "~Masatoshi_Uehara1;~Haruka_Kiyohara1;~Andrew_Bennett5;~Victor_Chernozhukov1;~Nan_Jiang2;~Nathan_Kallus1;~Chengchun_Shi1;~Wen_Sun1", "gender": "M;;Not Specified;M;;M;;M", "homepage": "https://www.masatoshiuehara.com/;https://sites.google.com/view/harukakiyohara;https://www.victorchernozhukov.com/;http://nanjiang.cs.illinois.edu;http://nathankallus.com/;https://callmespring.github.io/;https://wensun.github.io;https://awbennett.net/", "dblp": "225/6517;294/5273;;06/4489-8;142/2900;;;57/6380", "google_scholar": "https://scholar.google.co.jp/citations?user=xuLKJboAAAAJ;wkZLqMMAAAAJ;6VW1kJgAAAAJ;nUlanA8AAAAJ;K2WfIlsAAAAJ;dDGy3N0AAAAJ;iOLC30YAAAAJ;", "orcid": "0000-0001-9017-3105;0009-0000-6378-4365;;;0000-0003-1672-0507;;;", "linkedin": ";haruka-kiyohara-0b2384191;;nan-jiang-28139937/;;;;", "or_profile": "~Masatoshi_Uehara1;~Haruka_Kiyohara1;~Victor_Chernozhukov1;~Nan_Jiang2;~Nathan_Kallus1;~Chengchun_Shi1;~Wen_Sun1;~Andrew_Bennett4", "aff": "Cornell University;Tokyo Institute of Technology, Tokyo Institute of Technology;Massachusetts Institute of Technology;University of Illinois, Urbana Champaign;Cornell University;London School of Economics and Political Science, University of London;Cornell University;Cornell University", "aff_domain": "cornell.edu;titech.ac.jp;mit.edu;illinois.edu;cornell.edu;lse.ac.uk;cornell.edu;cornell.edu", "position": "PhD student;Undergrad student;Full Professor;Assistant Professor;Associate Professor;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nuehara2023futuredependent,\ntitle={Future-Dependent Value-Based Off-Policy Evaluation in {POMDP}s},\nauthor={Masatoshi Uehara and Haruka Kiyohara and Andrew Bennett and Victor Chernozhukov and Nan Jiang and Nathan Kallus and Chengchun Shi and Wen Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=HwhRehMr4a}\n}", "github": "", "project": "", "reviewers": "MVwZ;4aE2;RwWH;489Z;W56S", "pdf_size": 584588, "rating": "3;7;7;7;7", "confidence": "3;3;4;4;3", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;4", "presentation": "2;4;4;3;3", "wc_summary": "53;85;92;78;68", "wc_strengths": "26;96;41;88;150", "wc_weaknesses": "130;92;13;40;254", "wc_questions": "30;129;287;141;22", "wc_limitations": "1;8;66;1;24", "wc_review": "240;410;499;348;518", "wc_reply_reviewers": "61;29;0;0;22", "wc_reply_authors": "464;0;0;0;0", "reply_reviewers": "1;1;0;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.2, 1.6000000000000003 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 75.2, 13.644046320648432 ], "wc_strengths_avg": [ 80.2, 43.947241096569414 ], "wc_weaknesses_avg": [ 105.8, 84.47579534991073 ], "wc_questions_avg": [ 121.8, 96.01958133630868 ], "wc_limitations_avg": [ 20.0, 24.48673110074107 ], "wc_review_avg": [ 403.0, 102.06272581114027 ], "wc_reply_reviewers_avg": [ 22.4, 22.526428922490133 ], "wc_reply_authors_avg": [ 92.8, 185.6 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6116674728740207232&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 11, "email": "cornell.edu;titech.ac.jp;mit.edu;illinois.edu;cornell.edu;lse.ac.uk;cornell.edu;cornell.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;0;4;0;0", "aff_unique_norm": "Cornell University;Tokyo Institute of Technology;Massachusetts Institute of Technology;University of Illinois Urbana-Champaign;London School of Economics and Political Science", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.cornell.edu;https://www.titech.ac.jp;https://web.mit.edu;https://illinois.edu;https://www.lse.ac.uk", "aff_unique_abbr": "Cornell;Titech;MIT;UIUC;LSE", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Tokyo;Urbana-Champaign;London", "aff_country_unique_index": "0;1;0;0;0;2;0;0", "aff_country_unique": "United States;Japan;United Kingdom" }, { "title": "GeoCLIP: Clip-Inspired Alignment between Locations and Images for Effective Worldwide Geo-localization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72144", "id": "I18BXotQ7j", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1b57aaddf85ab01a2445a79c9edc1f4b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=I18BXotQ7j", "openreview": "https://openreview.net/forum?id=I18BXotQ7j", "poster": "/media/PosterPDFs/NeurIPS%202023/72144.png?t=1699563561.4654796", "slides": "https://nips.cc/virtual/2023/poster/72144", "video": "https://nips.cc/virtual/2023/poster/72144", "author_site": "Vicente Vivanco Cepeda, Gaurav Kumar Nayak, Mubarak Shah", "tldr": "", "abstract": "Worldwide Geo-localization aims to pinpoint the precise location of images taken anywhere on Earth. This task has considerable challenges due to the immense variation in geographic landscapes. The image-to-image retrieval-based approaches fail to solve this problem on a global scale as it is not feasible to construct a large gallery of images covering the entire world. Instead, existing approaches divide the globe into discrete geographic cells, transforming the problem into a classification task. However, their performance is limited by the predefined classes and often results in inaccurate localizations when an image's location significantly deviates from its class center. To overcome these limitations, we propose GeoCLIP, a novel CLIP-inspired Image-to-GPS retrieval approach that enforces alignment between the image and its corresponding GPS locations. GeoCLIP's location encoder models the Earth as a continuous function by employing positional encoding through random Fourier features and constructing a hierarchical representation that captures information at varying resolutions to yield a semantically rich high-dimensional feature suitable to use even beyond geo-localization. To the best of our knowledge, this is the first work employing GPS encoding for geo-localization. We demonstrate the efficacy of our method via extensive experiments and ablations on benchmark datasets. We achieve competitive performance with just 20% of training data, highlighting its effectiveness even in limited-data settings. Furthermore, we qualitatively demonstrate geo-localization using a text query by leveraging the CLIP backbone of our image encoder. The project webpage is available at: https://vicentevivan.github.io/GeoCLIP", "keywords": "Geo-localization;Image-to-GPS retrieval;CLIP;Random Fourier Features", "primary_area": "", "supplementary_material": "/attachment/32c21bd1bc6a1832dc5d9220d1cec3dbc2932f84.pdf", "author": "Vicente Vivanco Cepeda;Gaurav Kumar Nayak;Mubarak Shah", "authorids": "~Vicente_Vivanco_Cepeda1;~Gaurav_Kumar_Nayak2;~Mubarak_Shah3", "gender": "M;M;M", "homepage": ";https://sites.google.com/view/gauravnayak;https://www.crcv.ucf.edu/person/mubarak-shah/", "dblp": "342/2897;241/6244;s/MubarakShah", "google_scholar": ";https://scholar.google.co.in/citations?user=cLCeKTkAAAAJ;https://scholar.google.com.tw/citations?user=p8gsO3gAAAAJ", "orcid": ";0000-0002-6406-6178;0000-0002-8216-1128", "linkedin": "vicente-vivanco-cepeda-461315182/;;mubarak-shah-b6aa68213/", "or_profile": "~Vicente_Vivanco_Cepeda1;~Gaurav_Kumar_Nayak2;~Mubarak_Shah3", "aff": "University of Central Florida;University of Central Florida;University of Central Florida", "aff_domain": "ucf.edu;ucf.edu;ucf.edu", "position": "Undergrad student;Postdoc;Full Professor", "bibtex": "@inproceedings{\ncepeda2023geoclip,\ntitle={Geo{CLIP}: Clip-Inspired Alignment between Locations and Images for Effective Worldwide Geo-localization},\nauthor={Vicente Vivanco Cepeda and Gaurav Kumar Nayak and Mubarak Shah},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=I18BXotQ7j}\n}", "github": "", "project": "", "reviewers": "AH57;SMLq;AAT4;vH7S;UZhc", "pdf_size": 3248321, "rating": "5;6;6;6;7", "confidence": "4;4;4;4;5", "soundness": "3;4;4;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "71;94;158;69;60", "wc_strengths": "71;243;159;100;30", "wc_weaknesses": "108;377;186;22;157", "wc_questions": "23;80;72;176;4", "wc_limitations": "8;5;71;11;1", "wc_review": "281;799;646;378;252", "wc_reply_reviewers": "0;70;95;38;75", "wc_reply_authors": "0;49;83;0;65", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;2;1;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 90.4, 35.61235740582193 ], "wc_strengths_avg": [ 120.6, 74.22829649129771 ], "wc_weaknesses_avg": [ 170.0, 117.49212739583874 ], "wc_questions_avg": [ 71.0, 59.833101206606365 ], "wc_limitations_avg": [ 19.2, 26.11053427258814 ], "wc_review_avg": [ 471.2, 214.9040716226661 ], "wc_reply_reviewers_avg": [ 55.6, 33.27822110630314 ], "wc_reply_authors_avg": [ 39.4, 33.921084888310986 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7905694150420948, "gs_citation": 92, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14554956059543872026&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 5, "email": "ucf.edu;ucf.edu;ucf.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Central Florida", "aff_unique_dep": "", "aff_unique_url": "https://www.ucf.edu", "aff_unique_abbr": "UCF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Randomized and Deterministic Maximin-share Approximations for Fractionally Subadditive Valuations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72143", "id": "I3k2NHt1zu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b7ed46bd87cd51d4c031b96d9b1a8eb6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=I3k2NHt1zu", "openreview": "https://openreview.net/forum?id=I3k2NHt1zu", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72143", "video": "https://nips.cc/virtual/2023/poster/72143", "author_site": "Hannaneh Akrami, Kurt Mehlhorn, Masoud Seddighin, Golnoosh Shahkarami", "tldr": "", "abstract": "We consider the problem of guaranteeing maximin-share ($\\MMS$) when allocating a set of indivisible items to a set of agents with fractionally subadditive ($\\XOS$) valuations. \nFor $\\XOS$ valuations, it has been previously shown that for some instances no allocation can guarantee a fraction better than $1/2$ of maximin-share to all the agents. Also, a deterministic allocation exists that guarantees $0.219225$ of the maximin-share of each agent. \nOur results involve both deterministic and randomized allocations. On the deterministic side, we improve the best approximation guarantee for fractionally subadditive valuations to $3/13 = 0.230769$. We develop new ideas on allocating large items in our allocation algorithm which might be of independent interest. Furthermore, we investigate randomized algorithms and the Best-of-both-worlds fairness guarantees. We propose a randomized allocation that is $1/4$-$\\MMS$ ex-ante and $1/8$-$\\MMS$ ex-post for $\\XOS$ valuations. Moreover, we prove an upper bound of $3/4$ on the ex-ante guarantee for this class of valuations.", "keywords": "Algorithmic game theory;Fairness;Randomized;Allocation;Maximin-share;Fractionally Subadditive", "primary_area": "", "supplementary_material": "/attachment/4706c1c4f0d3d95db3734740735e25b90160e852.pdf", "author": "Hannaneh Akrami;Kurt Mehlhorn;Masoud Seddighin;Golnoosh Shahkarami", "authorids": "hakrami@mpi-inf.mpg.de;~Kurt_Mehlhorn1;m.seddighin@teias.institute;~Golnoosh_Shahkarami1", "gender": ";M;;F", "homepage": ";https://www.mpi-inf.mpg.de/~mehlhorn/;;https://www.mpi-inf.mpg.de/departments/algorithms-complexity/people/current-members/golnoosh-shahkarami", "dblp": ";m/KurtMehlhorn;;228/7906.html", "google_scholar": ";https://scholar.google.com.tw/citations?user=28CWXPUAAAAJ;;w2vrFwUAAAAJ", "orcid": ";0000-0003-4020-4334;;0000-0002-6169-7337", "linkedin": ";;;golnoosh-shahkarami-435b25138/?originalSubdomain=de", "or_profile": "hakrami@mpi-inf.mpg.de;~Kurt_Mehlhorn1;m.seddighin@teias.institute;~Golnoosh_Shahkarami1", "aff": ";;;Saarland Informatics Campus, Max-Planck Institute", "aff_domain": ";;;mpi-inf.mpg.de", "position": ";;;PhD student", "bibtex": "@inproceedings{\nakrami2023randomized,\ntitle={Randomized and Deterministic Maximin-share Approximations for Fractionally Subadditive Valuations},\nauthor={Hannaneh Akrami and Kurt Mehlhorn and Masoud Seddighin and Golnoosh Shahkarami},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=I3k2NHt1zu}\n}", "github": "", "project": "", "reviewers": "Y5Uh;U5pm;TogA;DbzA", "pdf_size": 308469, "rating": "6;7;7;7", "confidence": "4;4;4;3", "soundness": "3;3;4;4", "novelty": "3;3;4;4", "presentation": "2;3;3;4", "wc_summary": "100;178;116;153", "wc_strengths": "17;62;43;57", "wc_weaknesses": "28;54;26;46", "wc_questions": "443;6;28;26", "wc_limitations": "1;12;13;1", "wc_review": "589;312;226;283", "wc_reply_reviewers": "25;0;6;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;0;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 136.75, 30.605350839354873 ], "wc_strengths_avg": [ 44.75, 17.469616481193857 ], "wc_weaknesses_avg": [ 38.5, 11.863810517704673 ], "wc_questions_avg": [ 125.75, 183.3662659815049 ], "wc_limitations_avg": [ 6.75, 5.7608593109014565 ], "wc_review_avg": [ 352.5, 140.00446421453853 ], "wc_reply_reviewers_avg": [ 10.25, 9.229707470987366 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4375338115863820494&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": ";;;mpi-inf.mpg.de", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Max-Planck Institute", "aff_unique_dep": "Informatics", "aff_unique_url": "https://www.mpi-sws.org", "aff_unique_abbr": "MPI-SWS", "aff_campus_unique_index": "0", "aff_campus_unique": "Saarland", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "Provably Bounding Neural Network Preimages", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72142", "id": "I50HbChk3U", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fe061ec0ae03c5cf5b5323a2b9121bfd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=I50HbChk3U", "openreview": "https://openreview.net/forum?id=I50HbChk3U", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72142", "video": "https://nips.cc/virtual/2023/poster/72142", "author_site": "Suhas Kotha, Christopher Brix, J. Zico Kolter, Krishnamurthy Dvijotham, Huan Zhang", "tldr": "", "abstract": "Most work on the formal verification of neural networks has focused on bounding the set of outputs that correspond to a given set of inputs (for example, bounded perturbations of a nominal input). However, many use cases of neural network verification require solving the inverse problem, or over-approximating the set of inputs that lead to certain outputs. We present the INVPROP algorithm for verifying properties over the preimage of a linearly constrained output set, which can be combined with branch-and-bound to increase precision. Contrary to other approaches, our efficient algorithm is GPU-accelerated and does not require a linear programming solver. We demonstrate our algorithm for identifying safe control regions for a dynamical system via backward reachability analysis, verifying adversarial robustness, and detecting out-of-distribution inputs to a neural network. Our results show that in certain settings, we find over-approximations over $2500\\times$ tighter than prior work while being $2.5\\times$ faster. By strengthening robustness verification with output constraints, we consistently verify more properties than the previous state-of-the-art on multiple benchmarks, including a large model with 167k neurons in VNN-COMP 2023. Our algorithm has been incorporated into the $\\alpha,\\beta$-CROWN verifier, available at https://abcrown.org.", "keywords": "Trustworthy ML;Formal Verification;Safe Control;OOD Detection", "primary_area": "", "supplementary_material": "", "author": "Suhas Kotha;Christopher Brix;J Zico Kolter;Krishnamurthy Dj Dvijotham;Huan Zhang", "authorids": "~Suhas_Kotha1;~Christopher_Brix1;~J_Zico_Kolter1;~Krishnamurthy_Dj_Dvijotham1;~Huan_Zhang1", "gender": "M;M;M;M;M", "homepage": "https://www.andrew.cmu.edu/user/suhask/;https://christopher-brix.de/;http://huan-zhang.com;http://www.zicokolter.com;http://dvij.github.io", "dblp": "312/5932.html;228/5443;23/1797-1.html;67/2526;16/8758", "google_scholar": ";https://scholar.google.com/citations?hl=en;LTa3GzEAAAAJ;UXh1I6UAAAAJ;BUtloecAAAAJ", "orcid": ";;;;", "linkedin": ";christopher-brix;;;", "or_profile": "~Suhas_Kotha1;~Christopher_Brix1;~Huan_Zhang1;~Zico_Kolter1;~Krishnamurthy_Dvijotham2", "aff": "Carnegie Mellon University;RWTH Aachen University, Rheinisch Westf\u00e4lische Technische Hochschule Aachen;Carnegie Mellon University;Carnegie Mellon University;Google Brain", "aff_domain": "cmu.edu;cs.rwth-aachen.de;cmu.edu;cmu.edu;google.com", "position": "Undergrad student;PhD student;Postdoc;Full Professor;research scientist ", "bibtex": "@inproceedings{\nkotha2023provably,\ntitle={Provably Bounding Neural Network Preimages},\nauthor={Suhas Kotha and Christopher Brix and J Zico Kolter and Krishnamurthy Dj Dvijotham and Huan Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=I50HbChk3U}\n}", "github": "", "project": "", "reviewers": "XnFH;K762;PXif;bSxe", "pdf_size": 1630050, "rating": "7;7;7;7", "confidence": "3;4;4;4", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "4;3;2;3", "wc_summary": "31;49;248;83", "wc_strengths": "73;41;41;70", "wc_weaknesses": "179;311;85;64", "wc_questions": "153;155;118;27", "wc_limitations": "6;106;40;2", "wc_review": "442;662;532;246", "wc_reply_reviewers": "112;19;47;24", "wc_reply_authors": "265;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 102.75, 85.9138376514517 ], "wc_strengths_avg": [ 56.25, 15.286840746210448 ], "wc_weaknesses_avg": [ 159.75, 97.47147018486999 ], "wc_questions_avg": [ 113.25, 51.924825469133744 ], "wc_limitations_avg": [ 38.5, 41.674332628129754 ], "wc_review_avg": [ 470.5, 151.38279294556565 ], "wc_reply_reviewers_avg": [ 50.5, 37.04389288398291 ], "wc_reply_authors_avg": [ 66.25, 114.74836600143811 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5390269553383458687&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 8, "email": "cmu.edu;cs.rwth-aachen.de;cmu.edu;cmu.edu;google.com", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Carnegie Mellon University;RWTH Aachen University;Google", "aff_unique_dep": ";;Google Brain", "aff_unique_url": "https://www.cmu.edu;https://www.rwth-aachen.de;https://brain.google.com", "aff_unique_abbr": "CMU;RWTH;Google Brain", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Aachen;Mountain View", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Germany" }, { "title": "On Robust Streaming for Learning with Experts: Algorithms and Lower Bounds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72141", "id": "I5SM5y57k2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fb71332951af4ae27fbd457daadc5341-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=I5SM5y57k2", "openreview": "https://openreview.net/forum?id=I5SM5y57k2", "poster": "/media/PosterPDFs/NeurIPS%202023/72141.png?t=1701476479.6239977", "slides": "https://nips.cc/virtual/2023/poster/72141", "video": "https://nips.cc/virtual/2023/poster/72141", "author_site": "David Woodruff, Fred Zhang, Samson Zhou", "tldr": "", "abstract": "In the online learning with experts problem, an algorithm makes predictions about an outcome on each of $T$ days, given a set of $n$ experts who make predictions on each day. The algorithm is given feedback on the outcomes of each day, including the cost of its prediction and the cost of the expert predictions, and the goal is to make a prediction with the minimum cost, compared to the best expert in hindsight. However, often the predictions made by experts or algorithms at some time influence future outcomes, so that the input is adaptively generated. \n\nIn this paper, we study robust algorithms for the experts problem under memory constraints. We first give a randomized algorithm that is robust to adaptive inputs that uses $\\widetilde{O}\\left(\\frac{n}{R\\sqrt{T}}\\right)$ space for $M=O\\left(\\frac{R^2 T}{\\log^2 n}\\right)$, thereby showing a smooth space-regret trade-off. We then show a space lower bound of $\\widetilde{\\Omega}\\left(\\frac{nM}{RT}\\right)$ for any randomized algorithm that achieves regret $R$ with probability $1-2^{-\\Omega(T)}$, when the best expert makes $M$ mistakes. Our result implies that the natural deterministic algorithm, which iterates through pools of experts until each expert in the pool has erred, is optimal up to polylogarithmic factors. Finally, we empirically demonstrate the benefit of using robust procedures against a white-box adversary that has access to the internal state of the algorithm.", "keywords": "online learning;memory efficiency;sub-linear algorithm;communication lower bound", "primary_area": "", "supplementary_material": "/attachment/d5359bcaf6a6b642625beddeb36580a8342a47ff.zip", "author": "David Woodruff;Fred Zhang;Samson Zhou", "authorids": "~David_Woodruff1;~Fred_Zhang1;~Samson_Zhou1", "gender": "M;M;", "homepage": "http://www.cs.cmu.edu/~dwoodruf/;http://fredzhang.me/;https://samsonzhou.github.io/", "dblp": "w/DPWoodruff;232/9071;179/2683", "google_scholar": "https://scholar.google.com.tw/citations?user=0G2t-6sAAAAJ;guJ_kBQAAAAJ;NpjsgocAAAAJ", "orcid": ";;", "linkedin": ";fred-zhang-0/;", "or_profile": "~David_Woodruff1;~Fred_Zhang1;~Samson_Zhou1", "aff": "Carnegie Mellon University;University of California, Berkeley;University of California, Berkeley", "aff_domain": "cmu.edu;berkeley.edu;berkeley.edu", "position": "Full Professor;PhD student;Postdoc", "bibtex": "@inproceedings{\nwoodruff2023on,\ntitle={On Robust Streaming for Learning with Experts: Algorithms and Lower Bounds},\nauthor={David Woodruff and Fred Zhang and Samson Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=I5SM5y57k2}\n}", "github": "", "project": "", "reviewers": "757a;BvCn;Tz12;gy2q", "pdf_size": 509006, "rating": "5;6;6;6", "confidence": "2;3;2;3", "soundness": "2;4;3;3", "novelty": "2;2;3;3", "presentation": "2;4;3;2", "wc_summary": "53;217;105;235", "wc_strengths": "63;67;35;109", "wc_weaknesses": "453;113;39;245", "wc_questions": "11;40;68;254", "wc_limitations": "37;27;0;7", "wc_review": "617;464;247;850", "wc_reply_reviewers": "57;69;18;20", "wc_reply_authors": "40;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 152.5, 76.03124357788711 ], "wc_strengths_avg": [ 68.5, 26.433879775772606 ], "wc_weaknesses_avg": [ 212.5, 157.24105697940345 ], "wc_questions_avg": [ 93.25, 94.97203535778308 ], "wc_limitations_avg": [ 17.75, 14.889173919328098 ], "wc_review_avg": [ 544.5, 219.98465855600023 ], "wc_reply_reviewers_avg": [ 41.0, 22.416511771459895 ], "wc_reply_authors_avg": [ 10.0, 17.320508075688775 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8348872786020807422&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cmu.edu;berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Carnegie Mellon University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.berkeley.edu", "aff_unique_abbr": "CMU;UC Berkeley", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Deductive Verification of Chain-of-Thought Reasoning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72140", "id": "I5rsM4CY2z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/72393bd47a35f5b3bee4c609e7bba733-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=I5rsM4CY2z", "openreview": "https://openreview.net/forum?id=I5rsM4CY2z", "poster": "/media/PosterPDFs/NeurIPS%202023/72140.png?t=1701834534.4943933", "slides": "https://nips.cc/virtual/2023/poster/72140", "video": "https://nips.cc/virtual/2023/poster/72140", "author_site": "Zhan Ling, Yunhao Fang, Xuanlin Li, Zhiao Huang, Mingu Lee, Roland Memisevic, Hao Su", "tldr": "", "abstract": "Large Language Models (LLMs) significantly benefit from Chain-of-thought (CoT) prompting in performing various reasoning tasks. While CoT allows models to produce more comprehensive reasoning processes, its emphasis on intermediate reasoning steps can inadvertently introduce hallucinations and accumulated errors, thereby limiting models\u2019 ability to solve complex reasoning tasks. Inspired by how humans engage in careful and meticulous deductive logical reasoning processes to solve tasks, we seek to enable language models to perform explicit and rigorous deductive reasoning, and also ensure the trustworthiness of their reasoning process through self-verification. However, directly verifying the validity of an entire deductive reasoning process is challenging, even with advanced models like ChatGPT. In light of this, we propose to decompose a reasoning verification process into a series of step-by-step subprocesses, each only receiving their necessary context and premises. To facilitate this procedure, we propose Natural Program, a natural language-based deductive reasoning format. Our approach enables models to generate precise reasoning steps where subsequent steps are more rigorously grounded on prior steps. It also empowers language models to carry out reasoning self-verification in a step-by-step manner. By integrating this verification process into each deductive reasoning stage, we significantly enhance the rigor and trustfulness of generated reasoning steps. Along this process, we also improve the answer correctness on complex reasoning tasks.", "keywords": "Chain-of-thought;Large language model;Reasoning", "primary_area": "", "supplementary_material": "/attachment/e78ccd09640820cd430002b7e09158744191ba43.pdf", "author": "Zhan Ling;Yunhao Fang;Xuanlin Li;Zhiao Huang;Mingu Lee;Roland Memisevic;Hao Su", "authorids": "~Zhan_Ling2;~Yunhao_Fang1;~Xuanlin_Li1;~Zhiao_Huang1;~Mingu_Lee1;~Roland_Memisevic4;~Hao_Su1", "gender": "M;M;;M;M;M;M", "homepage": ";https://seerkfang.github.io/;https://xuanlinli17.github.io/;;;http://ai.ucsd.edu/~haosu;", "dblp": "254/1980;;251/3029;172/1410;;09/4945-1;98/4508", "google_scholar": "vsRxnYAAAAAJ;;7vyVxxQAAAAJ;;;1P8Zu04AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;", "linkedin": "zhan-ling-069a59149/;yunhao-fang-8b318221a/;xuanlin-li-4684b8145/;;mingu-lee-0aa28aa5/;;", "or_profile": "~Zhan_Ling2;~Yunhao_Fang1;~Xuanlin_Li1;~Zhiao_Huang1;~Mingu_Lee1;~Hao_Su1;~Roland_Memisevic1", "aff": "Qualcomm Inc, QualComm;University of California, San Diego;University of California, San Diego;University of California, San Diego, University of California, San Diego;Qualcomm Inc, QualComm;University of California, San Diego;Qualcomm Inc, Qualcomm", "aff_domain": "qti.qualcomm.com;ucsd.edu;ucsd.edu;eng.ucsd.edu;qti.qualcomm.com;ucsd.edu;qti.qualcomm.com", "position": "Intern;MS student;PhD student;PhD student;Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nling2023deductive,\ntitle={Deductive Verification of Chain-of-Thought Reasoning},\nauthor={Zhan Ling and Yunhao Fang and Xuanlin Li and Zhiao Huang and Mingu Lee and Roland Memisevic and Hao Su},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=I5rsM4CY2z}\n}", "github": "", "project": "", "reviewers": "TPbw;kmTd;bmW6;BLD6;LzaJ", "pdf_size": 438425, "rating": "3;5;6;6;6", "confidence": "4;3;4;4;4", "soundness": "3;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;4;3;3", "wc_summary": "190;78;100;87;83", "wc_strengths": "109;29;56;52;43", "wc_weaknesses": "370;171;171;240;25", "wc_questions": "296;14;118;119;205", "wc_limitations": "9;13;9;41;31", "wc_review": "974;305;454;539;387", "wc_reply_reviewers": "487;34;6;114;156", "wc_reply_authors": "726;86;0;26;311", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 107.6, 41.84065009055189 ], "wc_strengths_avg": [ 57.8, 27.227926839919338 ], "wc_weaknesses_avg": [ 195.4, 111.98321302766767 ], "wc_questions_avg": [ 150.4, 94.67967046837457 ], "wc_limitations_avg": [ 20.6, 13.047605144240073 ], "wc_review_avg": [ 531.8, 234.1156978931571 ], "wc_reply_reviewers_avg": [ 159.4, 172.42459221352388 ], "wc_reply_authors_avg": [ 229.8, 271.2300868266646 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.08574929257125442, "gs_citation": 138, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3205644836692572249&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "qti.qualcomm.com;ucsd.edu;ucsd.edu;eng.ucsd.edu;qti.qualcomm.com;ucsd.edu;qti.qualcomm.com", "author_num": 7, "aff_unique_index": "0;1;1;1;0;1;0", "aff_unique_norm": "Qualcomm Incorporated;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://www.qualcomm.com;https://www.ucsd.edu", "aff_unique_abbr": "Qualcomm;UCSD", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Robust Concept Erasure via Kernelized Rate-Distortion Maximization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72139", "id": "I6aOjhpcNQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/86bd650f85480c595ecab29081a3774e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=I6aOjhpcNQ", "openreview": "https://openreview.net/forum?id=I6aOjhpcNQ", "poster": "/media/PosterPDFs/NeurIPS%202023/72139.png?t=1701375377.5991461", "slides": "https://nips.cc/virtual/2023/poster/72139", "video": "https://nips.cc/virtual/2023/poster/72139", "author_site": "Somnath Basu Roy Chowdhury, Nicholas Monath, Kumar Avinava Dubey, Amr Ahmed, Snigdha Chaturvedi", "tldr": "", "abstract": "Distributed representations provide a vector space that captures meaningful relationships between data instances. The distributed nature of these representations, however, entangles together multiple attributes or concepts of data instances (e.g., the topic or sentiment of a text, characteristics of the author (age, gender, etc), etc). Recent work has proposed the task of concept erasure, in which rather than making a concept predictable, the goal is to remove an attribute from distributed representations while retaining other information from the original representation space as much as possible. In this paper, we propose a new distance metric learning-based objective, the Kernelized Rate-Distortion Maximizer (KRaM), for performing concept erasure. KRaM fits a transformation of representations to match a specified distance measure (defined by a labeled concept to erase) using a modified rate-distortion function. Specifically, KRaM's objective function aims to make instances with similar concept labels dissimilar in the learned representation space while retaining other information. We find that optimizing KRaM effectively erases various types of concepts\u2014categorical, continuous, and vector-valued variables\u2014from data representations across diverse domains. We also provide a theoretical analysis of several properties of KRaM's objective. To assess the quality of the learned representations, we propose an alignment score to evaluate their similarity with the original representation space. Additionally, we conduct experiments to showcase KRaM's efficacy in various settings, from erasing binary gender variables in word embeddings to vector-valued variables in GPT-3 representations.", "keywords": "Concept Erasure;Representation Learning;Rate distortion;Fairness;Debiasing", "primary_area": "", "supplementary_material": "/attachment/20f735f1313529090053ebf57333ac178854d7c3.zip", "author": "Somnath Basu Roy Chowdhury;Nicholas Monath;Kumar Avinava Dubey;Amr Ahmed;Snigdha Chaturvedi", "authorids": "~Somnath_Basu_Roy_Chowdhury3;~Nicholas_Monath1;~Kumar_Avinava_Dubey1;~Amr_Ahmed1;~Snigdha_Chaturvedi2", "gender": ";M;M;F;M", "homepage": "https://www.cs.unc.edu/~somnath/;https://nmonath.github.io/;https://research.google/people/AmrAhmed/;https://sites.google.com/site/snigdhac/;https://sites.google.com/site/kumaravinavadubey/", "dblp": "190/7535;131/4309;49/2951;77/8700;10/7789", "google_scholar": "https://scholar.google.co.in/citations?user=xGbyrIUAAAAJ;PTfhfCQAAAAJ;ivUi2T0AAAAJ;gZD3EesAAAAJ;tBbUAfsAAAAJ", "orcid": ";0000-0002-5135-2423;;;", "linkedin": ";nicholas-monath-8627581aa/;amr-ahmed-b998965/;;", "or_profile": "~Somnath_Basu_Roy_Chowdhury3;~Nicholas_Monath1;~Amr_Ahmed1;~Snigdha_Chaturvedi2;~Kumar_A_Dubey1", "aff": "Department of Computer Science, University of North Carolina, Chapel Hill;Google;;Department of Computer Science, University of North Carolina, Chapel Hill;Google Research", "aff_domain": "cs.unc.edu;google.com;;cs.unc.edu;google.com", "position": "PhD student;Researcher;;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nchowdhury2023robust,\ntitle={Robust Concept Erasure via Kernelized Rate-Distortion Maximization},\nauthor={Somnath Basu Roy Chowdhury and Nicholas Monath and Kumar Avinava Dubey and Amr Ahmed and Snigdha Chaturvedi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=I6aOjhpcNQ}\n}", "github": "", "project": "", "reviewers": "iqZj;x4Ej;GgzL;yWDJ", "pdf_size": 1088535, "rating": "5;6;7;7", "confidence": "5;4;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;4", "wc_summary": "55;75;83;210", "wc_strengths": "16;88;44;160", "wc_weaknesses": "223;132;84;170", "wc_questions": "2;23;168;41", "wc_limitations": "1;38;16;14", "wc_review": "297;356;395;595", "wc_reply_reviewers": "0;64;330;14", "wc_reply_authors": "24;24;683;24", "reply_reviewers": "0;1;2;1", "reply_authors": "2;2;4;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 105.75, 61.04660105198323 ], "wc_strengths_avg": [ 77.0, 54.35991169970753 ], "wc_weaknesses_avg": [ 152.25, 50.962608842169765 ], "wc_questions_avg": [ 58.5, 64.70896383036897 ], "wc_limitations_avg": [ 17.25, 13.292385038058445 ], "wc_review_avg": [ 410.75, 111.95171950443637 ], "wc_reply_reviewers_avg": [ 102.0, 133.7684566704722 ], "wc_reply_authors_avg": [ 188.75, 285.35537054697255 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2207612749661107476&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.unc.edu;google.com;;cs.unc.edu;google.com", "author_num": 5, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of North Carolina;Google", "aff_unique_dep": "Department of Computer Science;Google", "aff_unique_url": "https://www.unc.edu;https://www.google.com", "aff_unique_abbr": "UNC;Google", "aff_campus_unique_index": "0;1;0;1", "aff_campus_unique": "Chapel Hill;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "State Regularized Policy Optimization on Data with Dynamics Shift", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72138", "id": "I8t9RKDnz2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/67dd6a41bf9539cffc0fc0165e4d0616-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=I8t9RKDnz2", "openreview": "https://openreview.net/forum?id=I8t9RKDnz2", "poster": "/media/PosterPDFs/NeurIPS%202023/72138.png?t=1701757161.9313312", "slides": "https://nips.cc/virtual/2023/poster/72138", "video": "https://nips.cc/virtual/2023/poster/72138", "author_site": "Zhenghai Xue, Qingpeng Cai, Shuchang Liu, Dong Zheng, Peng Jiang, Kun Gai, Bo An", "tldr": "", "abstract": "In many real-world scenarios, Reinforcement Learning (RL) algorithms are trained on data with dynamics shift, i.e., with different underlying environment dynamics. A majority of current methods address such issue by training context encoders to identify environment parameters. Data with dynamics shift are separated according to their environment parameters to train the corresponding policy.\nHowever, these methods can be sample inefficient as data are used \\textit{ad hoc}, and policies trained for one dynamics cannot benefit from data collected in all other environments with different dynamics. In this paper, we find that in many environments with similar structures and different dynamics, optimal policies have similar stationary state distributions. We exploit such property and learn the stationary state distribution from data with dynamics shift for efficient data reuse. Such distribution is used to regularize the policy trained in a new environment, leading to the SRPO (\\textbf{S}tate \\textbf{R}egularized \\textbf{P}olicy \\textbf{O}ptimization) algorithm. \nTo conduct theoretical analyses, the intuition of similar environment structures is characterized by the notion of homomorphous MDPs. We then demonstrate a lower-bound performance guarantee on policies regularized by the stationary state distribution. In practice, SRPO can be an add-on module to context-based algorithms in both online and offline RL settings.\nExperimental results show that SRPO can make several context-based algorithms far more data efficient and significantly improve their overall performance.", "keywords": "Reinforcement Learning;Dynamics Shift;Stationary State Distribution;Offline RL;Off-Policy RL", "primary_area": "", "supplementary_material": "/attachment/97867a3fde0a22d6f6cf08ce41d3c44a1d74b0b4.pdf", "author": "Zhenghai Xue;Qingpeng Cai;Shuchang Liu;Dong Zheng;Peng Jiang;Kun Gai;Bo An", "authorids": "~Zhenghai_Xue1;~Qingpeng_Cai2;~Shuchang_Liu1;~Dong_Zheng1;~Peng_Jiang6;~Kun_Gai1;~Bo_An2", "gender": ";M;M;M;M;M;M", "homepage": ";https://qingpengcai.github.io/;;https://scholar.google.com/citations?user=KI7sbM4AAAAJ;;;https://personal.ntu.edu.sg/boan/", "dblp": ";183/0940-1;335/1645;;;59/2902;42/6178-1.html", "google_scholar": ";uU6s1tYAAAAJ;kivnB4QAAAAJ;KI7sbM4AAAAJ;https://scholar.google.com/citations?hl=en;PXO4ygEAAAAJ;PEEpuNwAAAAJ", "orcid": ";0000-0001-6451-9299;0000-0002-1440-911X;0000-0003-0424-9658;0000-0002-9266-0780;;0000-0002-7064-7438", "linkedin": ";;;;;;", "or_profile": "~Zhenghai_Xue1;~Qingpeng_Cai2;~Shuchang_Liu1;~Dong_Zheng1;~Peng_Jiang6;~Kun_Gai1;~Bo_An2", "aff": ";Kuaishou;Kuaishou;Kuaishou Technology;Kuaishou Technology;Kuaishou- \u5feb\u624b\u79d1\u6280;Nanyang Technological University", "aff_domain": ";kuaishou.com;kuaishou.com;kuaishou.com;kuaishou.com;kuaishou.com;ntu.edu.sg", "position": ";Senior Staff Algorithm Engineer;Researcher;Researcher;Vice President;Instructor;Full Professor", "bibtex": "@inproceedings{\nxue2023state,\ntitle={State Regularized Policy Optimization on Data with Dynamics Shift},\nauthor={Zhenghai Xue and Qingpeng Cai and Shuchang Liu and Dong Zheng and Peng Jiang and Kun Gai and Bo An},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=I8t9RKDnz2}\n}", "github": "", "project": "", "reviewers": "PXH4;ZrWb;mfzT;6EPu", "pdf_size": 1011727, "rating": "4;6;6;7", "confidence": "3;3;4;5", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;4;4", "wc_summary": "100;82;160;122", "wc_strengths": "37;83;36;120", "wc_weaknesses": "212;54;5;8", "wc_questions": "30;1;351;315", "wc_limitations": "29;1;20;12", "wc_review": "408;221;572;577", "wc_reply_reviewers": "0;12;91;357", "wc_reply_authors": "54;24;80;609", "reply_reviewers": "0;1;1;2", "reply_authors": "2;2;3;3", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 116.0, 29.086079144497972 ], "wc_strengths_avg": [ 69.0, 35.035696082709705 ], "wc_weaknesses_avg": [ 69.75, 84.39305362409871 ], "wc_questions_avg": [ 174.25, 159.58912086981368 ], "wc_limitations_avg": [ 15.5, 10.307764064044152 ], "wc_review_avg": [ 444.5, 145.85695046860127 ], "wc_reply_reviewers_avg": [ 115.0, 144.02603931234103 ], "wc_reply_authors_avg": [ 191.75, 241.713027162377 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7608859102526822, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2488504250363669903&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";kuaishou.com;kuaishou.com;kuaishou.com;kuaishou.com;kuaishou.com;ntu.edu.sg", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Kuaishou Technology;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kuaishou.com;https://www.ntu.edu.sg", "aff_unique_abbr": "Kuaishou;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "Formulating Discrete Probability Flow Through Optimal Transport", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72137", "id": "I9GNrInbdf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e6e706454d72c18582b9c1ff70b11f7d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=I9GNrInbdf", "openreview": "https://openreview.net/forum?id=I9GNrInbdf", "poster": "/media/PosterPDFs/NeurIPS%202023/72137.png?t=1700623153.339128", "slides": "https://nips.cc/virtual/2023/poster/72137", "video": "https://nips.cc/virtual/2023/poster/72137", "author_site": "Pengze Zhang, Hubery Yin, Chen Li, Xiaohua Xie", "tldr": "", "abstract": "Continuous diffusion models are commonly acknowledged to display a deterministic probability flow, whereas discrete diffusion models do not. In this paper, we aim to establish the fundamental theory for the probability flow of discrete diffusion models. Specifically, we first prove that the continuous probability flow is the Monge optimal transport map under certain conditions, and also present an equivalent evidence for discrete cases. In view of these findings, we are then able to define the discrete probability flow in line with the principles of optimal transport. Finally, drawing upon our newly established definitions, we propose a novel sampling method that surpasses previous discrete diffusion models in its ability to generate more certain outcomes. Extensive experiments on the synthetic toy dataset and the CIFAR-10 dataset have validated the effectiveness of our proposed discrete probability flow. Code is released at: https://github.com/PangzeCheung/Discrete-Probability-Flow.", "keywords": "Discrete Probability Flow;Optimal Transport", "primary_area": "", "supplementary_material": "/attachment/729c7ef9edbf309cbd472c56e17f4cf17001f701.pdf", "author": "Pengze Zhang;Hubery Yin;Chen Li;Xiaohua Xie", "authorids": "~Pengze_Zhang1;~Hubery_Yin1;~Chen_Li11;~Xiaohua_Xie1", "gender": "M;;;M", "homepage": "https://github.com/PangzeCheung;;;https://cse.sysu.edu.cn/content/2478", "dblp": "315/5033;;;22/5763", "google_scholar": "sWE5xtEAAAAJ;;;5YZ3kvoAAAAJ", "orcid": ";;;0000-0002-0310-4679", "linkedin": ";;;", "or_profile": "~Pengze_Zhang1;~Hubery_Yin1;~Chen_Li11;~Xiaohua_Xie1", "aff": "SUN YAT-SEN UNIVERSITY;;;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;;;sysu.edu.cn", "position": "PhD student;;;Full Professor", "bibtex": "@inproceedings{\nzhang2023formulating,\ntitle={Formulating Discrete Probability Flow Through Optimal Transport},\nauthor={Pengze Zhang and Hubery Yin and Chen Li and Xiaohua Xie},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=I9GNrInbdf}\n}", "github": "", "project": "", "reviewers": "WcKm;v2T5;wEPe;Rnpe", "pdf_size": 5380290, "rating": "7;7;7;8", "confidence": "4;3;2;4", "soundness": "3;3;4;4", "novelty": "4;3;4;4", "presentation": "3;2;4;4", "wc_summary": "73;91;284;64", "wc_strengths": "250;105;37;66", "wc_weaknesses": "236;315;5;151", "wc_questions": "105;106;47;1", "wc_limitations": "17;40;17;1", "wc_review": "681;657;390;283", "wc_reply_reviewers": "38;116;62;49", "wc_reply_authors": "23;365;32;30", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 128.0, 90.58973451776973 ], "wc_strengths_avg": [ 114.5, 81.86727062752244 ], "wc_weaknesses_avg": [ 176.75, 114.87465995597114 ], "wc_questions_avg": [ 64.75, 43.87695864574025 ], "wc_limitations_avg": [ 18.75, 13.899190623917638 ], "wc_review_avg": [ 502.75, 170.71083006066135 ], "wc_reply_reviewers_avg": [ 66.25, 29.953088321573787 ], "wc_reply_authors_avg": [ 112.5, 145.81923741399828 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4625462728039080615&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "sysu.edu.cn;;;sysu.edu.cn", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Sun Yat-sen University", "aff_unique_dep": "", "aff_unique_url": "http://www.sysu.edu.cn", "aff_unique_abbr": "SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Evaluating and Inducing Personality in Pre-trained Language Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72136", "id": "I9xE1Jsjfx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/21f7b745f73ce0d1f9bcea7f40b1388e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=I9xE1Jsjfx", "openreview": "https://openreview.net/forum?id=I9xE1Jsjfx", "poster": "/media/PosterPDFs/NeurIPS%202023/72136.png?t=1701403065.9372535", "slides": "https://nips.cc/virtual/2023/poster/72136", "video": "https://nips.cc/virtual/2023/poster/72136", "author_site": "Guangyuan Jiang, Manjie Xu, Song-Chun Zhu, Wenjuan Han, Chi Zhang, Yixin Zhu", "tldr": "", "abstract": "Standardized and quantified evaluation of machine behaviors is a crux of understanding LLMs. In this study, we draw inspiration from psychometric studies by leveraging human personality theory as a tool for studying machine behaviors. Originating as a philosophical quest for human behaviors, the study of personality delves into how individuals differ in thinking, feeling, and behaving. Toward building and understanding human-like social machines, we are motivated to ask: Can we assess machine behaviors by leveraging human psychometric tests in a **principled** and **quantitative** manner? If so, can we induce a specific personality in LLMs? To answer these questions, we introduce the Machine Personality Inventory (MPI) tool for studying machine behaviors; MPI follows standardized\npersonality tests, built upon the Big Five Personality Factors (Big Five) theory and personality assessment inventories. By systematically evaluating LLMs with MPI, we provide the first piece of evidence demonstrating the efficacy of MPI in studying LLMs behaviors. We further devise a Personality Prompting (P$^2$) method to induce LLMs with specific personalities in a **controllable** way, capable of producing diverse and verifiable behaviors. We hope this work sheds light on future studies by adopting personality as the essential indicator for various downstream tasks, and could further motivate research into equally intriguing human-like machine behaviors.", "keywords": "machine personality;machine behavior;personality trait theory;psychometric;large language models;prompt", "primary_area": "", "supplementary_material": "/attachment/d2d76d2f0518e389831da55c9003258e3b8f6be2.zip", "author": "Guangyuan Jiang;Manjie Xu;Song-Chun Zhu;Wenjuan Han;Chi Zhang;Yixin Zhu", "authorids": "~Guangyuan_Jiang1;~Manjie_Xu1;~Song-Chun_Zhu1;~Wenjuan_Han1;~Chi_Zhang12;~Yixin_Zhu1", "gender": "M;M;M;F;;M", "homepage": "https://jiang.gy/;https://mjtsu.github.io;https://zhusongchun.net/;https://scholar.google.com/citations?user=rfVLLfAAAAAJ;;https://yzhu.io/", "dblp": "322/5214;322/5851;10/10313;188/9071;;91/1103-1.html", "google_scholar": "3L79mEAAAAAJ;j-WwUGEAAAAJ;https://scholar.google.com.tw/citations?user=Al8dyb4AAAAJ;rfVLLfAAAAAJ;;qG9l6JEAAAAJ", "orcid": ";;;0000-0002-2327-0842;;0000-0001-7024-1545", "linkedin": ";;;;;", "or_profile": "~Guangyuan_Jiang1;~Manjie_Xu1;~Song-Chun_Zhu1;~Wenjuan_Han1;~Chi_Zhang12;~Yixin_Zhu1", "aff": "Peking University;Tencent AI Lab;Peking University;Beijing Jiaotong University;;Peking University", "aff_domain": "pku.edu.cn;tencent.com;pku.edu.cn;bjtu.edu.cn;;pku.edu.cn", "position": "Undergrad student;Intern;Full Professor;Associate Professor;;Assistant Professor", "bibtex": "@inproceedings{\njiang2023evaluating,\ntitle={Evaluating and Inducing Personality in Pre-trained Language Models},\nauthor={Guangyuan Jiang and Manjie Xu and Song-Chun Zhu and Wenjuan Han and Chi Zhang and Yixin Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=I9xE1Jsjfx}\n}", "github": "", "project": "", "reviewers": "6q42;nVLW;U4fj;ptR4;oK1V", "pdf_size": 4386627, "rating": "5;5;7;7;8", "confidence": "3;4;3;4;4", "soundness": "3;3;3;3;4", "novelty": "3;2;3;3;4", "presentation": "3;2;4;4;4", "wc_summary": "176;114;151;110;113", "wc_strengths": "62;182;160;71;145", "wc_weaknesses": "114;251;203;112;56", "wc_questions": "26;200;1;182;1", "wc_limitations": "51;189;1;12;2", "wc_review": "429;936;516;487;317", "wc_reply_reviewers": "39;22;58;0;23", "wc_reply_authors": "35;31;31;0;30", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 132.8, 26.31653472628948 ], "wc_strengths_avg": [ 124.0, 48.485049242008614 ], "wc_weaknesses_avg": [ 147.2, 70.0668252456182 ], "wc_questions_avg": [ 82.0, 89.6459703500386 ], "wc_limitations_avg": [ 51.0, 71.36665888214188 ], "wc_review_avg": [ 537.0, 210.8108156618156 ], "wc_reply_reviewers_avg": [ 28.4, 19.31424344881259 ], "wc_reply_authors_avg": [ 25.4, 12.815615474880634 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.2721655269759087, "gs_citation": 143, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12841151047820386225&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 8, "email": "pku.edu.cn;tencent.com;pku.edu.cn;bjtu.edu.cn;;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Peking University;Tencent;Beijing Jiao Tong University", "aff_unique_dep": ";Tencent AI Lab;", "aff_unique_url": "http://www.pku.edu.cn;https://ai.tencent.com;http://www.njtu.edu.cn/en", "aff_unique_abbr": "Peking U;Tencent AI Lab;BJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Estimating Causal Effects Identifiable from a Combination of Observations and Experiments", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72135", "id": "IEJzoOBM0z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/917d55788726131e3bb21bf39d477f58-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IEJzoOBM0z", "openreview": "https://openreview.net/forum?id=IEJzoOBM0z", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72135", "video": "https://nips.cc/virtual/2023/poster/72135", "author_site": "Yonghan Jung, Ivan Diaz, Jin Tian, Elias Bareinboim", "tldr": "", "abstract": "Learning cause and effect relations is arguably one of the central challenges found throughout the data sciences.\nFormally, determining whether a collection of observational and interventional distributions can be combined to learn a target causal relation is known as the problem of generalized identification (or g-identification) [Lee et al., 2019]. \nAlthough g-identification has been well understood and solved in theory, it turns out to be challenging to apply these results in practice, in particular when considering the estimation of the target distribution from finite samples. \nIn this paper, we develop a new, general estimator that exhibits multiply robustness properties for g-identifiable causal functionals. \nSpecifically, we show that any g-identifiable causal effect can be expressed as a function of generalized multi-outcome sequential back-door adjustments that are amenable to estimation. \nWe then construct a corresponding estimator for the g-identification expression that exhibits robustness properties to bias. We analyze the asymptotic convergence properties of the estimator. Finally, we illustrate the use of the proposed estimator in experimental studies. Simulation results corroborate the theory.", "keywords": "Causal Effect Estimation;Causal Effect Identification;Data Fusion;Double Machine Learning;Doubly Robust Estimator", "primary_area": "", "supplementary_material": "", "author": "Yonghan Jung;Ivan Diaz;Jin Tian;Elias Bareinboim", "authorids": "~Yonghan_Jung1;~Ivan_Diaz1;~Jin_Tian1;~Elias_Bareinboim2", "gender": ";M;M;M", "homepage": "https://sites.google.com/view/yonghanjung;https://www.idiaz.xyz;https://mbzuai.ac.ae/study/faculty/jin-tian/;https://causalai.net", "dblp": "201/0684.html;;04/4658-1;85/9005", "google_scholar": "D9ATOa4AAAAJ;;T0crkfoAAAAJ;r5U-D7YAAAAJ", "orcid": ";;0000-0001-5313-1600;", "linkedin": "yhansjung/;;;", "or_profile": "~Yonghan_Jung1;~Ivan_Diaz1;~Jin_Tian1;~Elias_Bareinboim2", "aff": "Purdue University;New York University;Iowa State University;Columbia University", "aff_domain": "purdue.edu;nyu.edu;iastate.edu;columbia.edu", "position": "PhD student;Associate Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\njung2023estimating,\ntitle={Estimating Causal Effects Identifiable from a Combination of Observations and Experiments},\nauthor={Yonghan Jung and Ivan Diaz and Jin Tian and Elias Bareinboim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IEJzoOBM0z}\n}", "github": "", "project": "", "reviewers": "t8rU;8jnJ;sTQB;PJpp;T9Rz", "pdf_size": 1008921, "rating": "5;6;6;7;7", "confidence": "4;3;2;3;1", "soundness": "3;4;3;3;3", "novelty": "2;4;3;3;3", "presentation": "3;1;3;2;2", "wc_summary": "98;17;65;48;74", "wc_strengths": "103;26;12;28;56", "wc_weaknesses": "253;88;111;42;44", "wc_questions": "152;17;5;30;119", "wc_limitations": "1;10;5;13;53", "wc_review": "607;158;198;161;346", "wc_reply_reviewers": "32;74;16;5;66", "wc_reply_authors": "74;29;0;0;42", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;1;1;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 2.6, 1.019803902718557 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 60.4, 27.045147439050872 ], "wc_strengths_avg": [ 45.0, 32.323366161339074 ], "wc_weaknesses_avg": [ 107.6, 77.31131870560739 ], "wc_questions_avg": [ 64.6, 59.351832322178566 ], "wc_limitations_avg": [ 16.4, 18.757398540309367 ], "wc_review_avg": [ 294.0, 170.92337464489754 ], "wc_reply_reviewers_avg": [ 38.6, 27.155846515989882 ], "wc_reply_authors_avg": [ 29.0, 27.84241368847177 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6813851438692469, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1458845073610135344&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "purdue.edu;nyu.edu;iastate.edu;columbia.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Purdue University;New York University;Iowa State University;Columbia University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.purdue.edu;https://www.nyu.edu;https://www.iastate.edu;https://www.columbia.edu", "aff_unique_abbr": "Purdue;NYU;ISU;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "SHAP-IQ: Unified Approximation of any-order Shapley Interactions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72134", "id": "IEMLNF4gK4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/264f2e10479c9370972847e96107db7f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IEMLNF4gK4", "openreview": "https://openreview.net/forum?id=IEMLNF4gK4", "poster": "/media/PosterPDFs/NeurIPS%202023/72134.png?t=1700836414.175761", "slides": "https://nips.cc/virtual/2023/poster/72134", "video": "https://nips.cc/virtual/2023/poster/72134", "author_site": "Fabian Fumagalli, Maximilian Muschalik, Patrick Kolpaczki, Eyke H\u00fcllermeier, Barbara Hammer", "tldr": "", "abstract": "Predominately in explainable artificial intelligence (XAI) research, the Shapley value (SV) is applied to determine feature attributions for any black box model. Shapley interaction indices extend the SV to define any-order feature interactions. Defining a unique Shapley interaction index is an open research question and, so far, three definitions have been proposed, which differ by their choice of axioms. Moreover, each definition requires a specific approximation technique. Here, we propose SHAPley Interaction Quantification (SHAP-IQ), an efficient sampling-based approximator to compute Shapley interactions for arbitrary cardinal interaction indices (CII), i.e. interaction indices that satisfy the linearity, symmetry and dummy axiom. SHAP-IQ is based on a novel representation and, in contrast to existing methods, we provide theoretical guarantees for its approximation quality, as well as estimates for the variance of the point estimates. For the special case of SV, our approach reveals a novel representation of the SV and corresponds to Unbiased KernelSHAP with a greatly simplified calculation. We illustrate the computational efficiency and effectiveness by explaining language, image classification and high-dimensional synthetic models.", "keywords": "Explainable Artificial Intelligence;Feature Interaction;Shapley Interaction;Shapley Value", "primary_area": "", "supplementary_material": "/attachment/ba3c406620f3b9268f1344f9c95ef8c730a82fcd.zip", "author": "Fabian Fumagalli;Maximilian Muschalik;Patrick Kolpaczki;Eyke H\u00fcllermeier;Barbara Hammer", "authorids": "~Fabian_Fumagalli1;~Maximilian_Muschalik1;~Patrick_Kolpaczki1;~Eyke_H\u00fcllermeier1;~Barbara_Hammer4", "gender": "M;M;M;M;F", "homepage": "https://hammer-lab.techfak.uni-bielefeld.de/people/316634936/;https://maxmuschalik.com/;https://www.kiml.ifi.lmu.de/people/employees/kolpaczki/index.html;https://cs.uni-paderborn.de/index.php?id=60202;https://www.techfak.uni-bielefeld.de/~bhammer/", "dblp": "329/4508;329/4090;304/9952;h/EykeHullermeier;h/BarbaraHammer", "google_scholar": "anUMB08AAAAJ;https://scholar.google.de/citations?user=jJBCW74AAAAJ;PVwqZS8AAAAJ;https://scholar.google.de/citations?user=usVJeNN3xFAC;1d3OxaUAAAAJ", "orcid": "0000-0003-3955-3510;0000-0002-6921-0204;;0000-0002-9944-4108;0000-0002-2615-8151", "linkedin": "fabian-fumagalli/;maximilian-muschalik/;;;", "or_profile": "~Fabian_Fumagalli1;~Maximilian_Muschalik1;~Patrick_Kolpaczki1;~Eyke_H\u00fcllermeier1;~Barbara_Hammer4", "aff": "Universit\u00e4t Bielefeld;Institute of Computer Science, Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Universit\u00e4t Paderborn;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Universit\u00e4t Bielefeld", "aff_domain": "uni-bielefeld.de;ifi.lmu.de;uni-paderborn.de;lmu.de;uni-bielefeld.de", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nfumagalli2023shapiq,\ntitle={{SHAP}-{IQ}: Unified Approximation of any-order Shapley Interactions},\nauthor={Fabian Fumagalli and Maximilian Muschalik and Patrick Kolpaczki and Eyke H{\\\"u}llermeier and Barbara Hammer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IEMLNF4gK4}\n}", "github": "", "project": "", "reviewers": "JZ6i;wrD8;VdQo;xK2B;cGA2", "pdf_size": 1453607, "rating": "5;6;7;7;8", "confidence": "3;4;3;4;4", "soundness": "3;3;4;3;3", "novelty": "3;3;3;3;3", "presentation": "4;3;4;3;4", "wc_summary": "75;186;50;250;89", "wc_strengths": "45;96;33;241;39", "wc_weaknesses": "101;109;68;25;18", "wc_questions": "48;3;5;10;67", "wc_limitations": "86;8;13;7;20", "wc_review": "355;402;169;533;233", "wc_reply_reviewers": "0;54;0;33;18", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 130.0, 75.68619424967805 ], "wc_strengths_avg": [ 90.8, 78.36938177630343 ], "wc_weaknesses_avg": [ 64.2, 37.541443765523994 ], "wc_questions_avg": [ 26.6, 26.035360569809665 ], "wc_limitations_avg": [ 26.8, 29.955967685921948 ], "wc_review_avg": [ 338.4, 128.05873652351877 ], "wc_reply_reviewers_avg": [ 21.0, 20.610676844781203 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4803844614152616, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1476441530829741681&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": "uni-bielefeld.de;ifi.lmu.de;uni-paderborn.de;lmu.de;uni-bielefeld.de", "author_num": 5, "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "Universit\u00e4t Bielefeld;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;University of Paderborn", "aff_unique_dep": ";Institute of Computer Science;", "aff_unique_url": "https://www.uni-bielefeld.de/;https://www.uni-muenchen.de;https://www.uni-paderborn.de", "aff_unique_abbr": "Uni Bielefeld;LMU M\u00fcnchen;UPB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Connecting Multi-modal Contrastive Representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72133", "id": "IGTbT9P1ti", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/46362971bfc3a97e6a271f2eb90fba17-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IGTbT9P1ti", "openreview": "https://openreview.net/forum?id=IGTbT9P1ti", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72133", "video": "https://nips.cc/virtual/2023/poster/72133", "author_site": "Zehan Wang, Yang Zhao, Xize \u6210, Haifeng Huang, Jiageng Liu, Aoxiong Yin, Li Tang, Linjun Li, Yongqi Wang, Ziang Zhang, Zhou Zhao", "tldr": "", "abstract": "Multi-modal Contrastive Representation (MCR) learning aims to encode different modalities into a semantically aligned shared space. This paradigm shows remarkable generalization ability on numerous downstream tasks across various modalities. However, the reliance on massive high-quality data pairs limits its further development on more modalities. This paper proposes a novel training-efficient method for learning MCR without paired data called Connecting Multi-modal Contrastive Representations (C-MCR). Specifically, given two existing MCRs pre-trained on $(\\mathcal{A}$, $\\mathcal{B})$ and $(\\mathcal{B}$, $\\mathcal{C})$ modality pairs, we project them to a new space and use the data from the overlapping modality $\\mathcal{B}$ to aligning the two MCRs in the new space. Meanwhile, since the modality pairs $(\\mathcal{A}$, $\\mathcal{B})$ and $(\\mathcal{B}$, $\\mathcal{C})$ are already aligned within each MCR, the connection learned by overlapping modality can also be transferred to non-overlapping modality pair $(\\mathcal{A}$, $\\mathcal{C})$. To unleash the potential of C-MCR, we further introduce a semantic-enhanced inter- and intra-MCR connection method. We first enhance the semantic consistency and completion of embeddings across different modalities for more robust alignment. Then we utilize the inter-MCR alignment to establish the connection, and employ the intra-MCR alignment to better maintain the connection for inputs from non-overlapping modalities. To demonstrate the effectiveness of C-MCR, we take the field of audio-visual and 3D-language learning as examples. Specifically, we connect CLIP and CLAP via texts to derive audio-visual representations, and integrate CLIP and ULIP via images for 3D-language representations. Remarkably, without using any paired data, C-MCR for audio-visual achieves state-of-the-art performance on audio-image retrieval, audio-visual source localization, and counterfactual audio-image recognition tasks. Furthermore, C-MCR for 3D-language also attains advanced zero-shot 3D point cloud classification accuracy on ModelNet40. Our project page is available at \\url{https://c-mcr.github.io/C-MCR/}", "keywords": "multi-modal;representation learning;contrastive learning", "primary_area": "", "supplementary_material": "/attachment/dbd440a6b9f82d0a38ccf95bf27be7b20d97ed3e.zip", "author": "Zehan Wang;Yang Zhao;Xize Cheng;Haifeng Huang;Jiageng Liu;Aoxiong Yin;Li Tang;Linjun Li;Yongqi Wang;Ziang Zhang;Zhou Zhao", "authorids": "~Zehan_Wang2;~Yang_Zhao14;~Xize_Cheng1;~Haifeng_Huang3;~Jiageng_Liu1;~Aoxiong_Yin1;~Li_Tang3;~Linjun_Li2;~Yongqi_Wang1;~Ziang_Zhang1;~Zhou_Zhao2", "gender": "M;M;M;M;M;;M;;M;M;M", "homepage": "https://github.com/12zehan17;;https://exgc.github.io/;https://zzzzchs.github.io/;https://jiagengliu02.github.io;;;;;;https://dblp.uni-trier.de/pid/75/7785.html?", "dblp": "126/7826-1;50/2082-22;334/2167;;;;;;;;75/7785", "google_scholar": "euXK0lkAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;oUm2gZUAAAAJ;;;https://scholar.google.com.hk/citations?hl=en;;9_79D6IAAAAJ;DptGMnYAAAAJ;https://scholar.google.com.hk/citations?user=IIoFY90AAAAJ", "orcid": "0009-0007-7509-7563;;0000-0001-9708-3225;;;;;;0000-0003-4695-3440;;0000-0001-6121-0384", "linkedin": ";;;haifeng-huang-784b2b249/;;;;;;;", "or_profile": "~Zehan_Wang2;~Yang_Zhao14;~Xize_Cheng1;~Haifeng_Huang3;~Jiageng_Liu1;~Aoxiong_Yin1;~Li_Tang3;~Linjun_Li2;~Yongqi_Wang1;~Ziang_Zhang1;~Zhou_Zhao2", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;;Zhejiang University;;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;zju.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;MS student;PhD student;MS student;Undergrad student;;Undergrad student;;MS student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nwang2023connecting,\ntitle={Connecting Multi-modal Contrastive Representations},\nauthor={Zehan Wang and Yang Zhao and Xize Cheng and Haifeng Huang and Jiageng Liu and Aoxiong Yin and Li Tang and Linjun Li and Yongqi Wang and Ziang Zhang and Zhou Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IGTbT9P1ti}\n}", "github": "", "project": "", "reviewers": "3nbF;MbUd;XBwt;5S9J;Szdi", "pdf_size": 8275318, "rating": "5;5;6;6;7", "confidence": "5;4;3;4;4", "soundness": "3;3;4;3;4", "novelty": "2;2;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "80;88;160;116;150", "wc_strengths": "47;73;91;63;93", "wc_weaknesses": "104;222;150;128;123", "wc_questions": "270;4;90;77;78", "wc_limitations": "1;1;8;40;32", "wc_review": "502;388;499;424;476", "wc_reply_reviewers": "304;36;106;68;29", "wc_reply_authors": "1266;0;0;0;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "5;1;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 118.8, 32.03997503120126 ], "wc_strengths_avg": [ 73.4, 17.315888657530692 ], "wc_weaknesses_avg": [ 145.4, 41.00536550257783 ], "wc_questions_avg": [ 103.8, 88.49497160856089 ], "wc_limitations_avg": [ 16.4, 16.40243884305014 ], "wc_review_avg": [ 457.8, 44.71420355994278 ], "wc_reply_reviewers_avg": [ 108.6, 101.43293350781097 ], "wc_reply_authors_avg": [ 253.2, 506.4 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 1.6000000000000003 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9748219438255061926&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;zju.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 11, "aff_unique_index": "0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Leveraging sparse and shared feature activations for disentangled representation learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72132", "id": "IHR83ufYPy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/57fabaa549352c52d5d312171b16970e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IHR83ufYPy", "openreview": "https://openreview.net/forum?id=IHR83ufYPy", "poster": "/media/PosterPDFs/NeurIPS%202023/72132.png?t=1697452221.9609766", "slides": "https://nips.cc/virtual/2023/poster/72132", "video": "https://nips.cc/virtual/2023/poster/72132", "author_site": "Marco Fumero, Florian Wenzel, Luca Zancato, Alessandro Achille, Emanuele Rodol\u00e0, Stefano Soatto, Bernhard Sch\u00f6lkopf, Francesco Locatello", "tldr": "", "abstract": "Recovering the latent factors of variation of high dimensional data has so far focused on simple synthetic settings. Mostly building on unsupervised and weakly-supervised objectives, prior work missed out on the positive implications for representation learning on real world data. In this work, we propose to leverage knowledge extracted from a diversified set of supervised tasks to learn a common disentangled representation. Assuming each supervised task only depends on an unknown subset of the factors of variation, we disentangle the feature space of a supervised multi-task model, with features activating sparsely across different tasks and information being shared as appropriate. Importantly, we never directly observe the factors of variations but establish that access to multiple tasks is sufficient for identifiability under sufficiency and minimality assumptions.\nWe validate our approach on six real world distribution shift benchmarks, and different data modalities (images, text), demonstrating how disentangled representations can be transferred to real settings.", "keywords": "disentanglement;OOD generalization;multitask learning", "primary_area": "", "supplementary_material": "/attachment/c262ccc26bb622bd2daa91207159a6dc5f5d99c1.pdf", "author": "Marco Fumero;Florian Wenzel;Luca Zancato;Alessandro Achille;Emanuele Rodol\u00e0;Stefano Soatto;Bernhard Sch\u00f6lkopf;Francesco Locatello", "authorids": "~Marco_Fumero1;~Florian_Wenzel1;~Luca_Zancato1;~Alessandro_Achille1;~Emanuele_Rodol\u00e01;~Stefano_Soatto3;~Bernhard_Sch\u00f6lkopf1;~Francesco_Locatello1", "gender": ";M;M;M;M;;;M", "homepage": ";;;;;;;https://twitter.com/FrancescoLocat8", "dblp": "273/9625;04/9709;274/1481;190/7328;54/8401;;;195/6074", "google_scholar": "VYEljYEAAAAJ;;Z2Mhh2UAAAAJ;;-EH4wBYAAAAJ;;;", "orcid": "0000-0001-5614-5004;;;;0000-0003-0091-7241;;;", "linkedin": ";;;;;;;", "or_profile": "~Marco_Fumero1;~Florian_Wenzel1;~Luca_Zancato1;~Alessandro_Achille1;~Emanuele_Rodol\u00e01;~Stefano_Soatto3;~Bernhard_Sch\u00f6lkopf1;~Francesco_Locatello1", "aff": "Sapienza University of Rome;Amazon;Amazon Web Services;California Institute of Technology;Sapienza University of Rome;;;Amazon", "aff_domain": "uniroma1.it;amazon.com;amazon.it;caltech.edu;uniroma1.it;;;amazon.com", "position": "PhD student;Researcher;Applied Scientist;Postdoc;Full Professor;;;Senior Applied Scientist", "bibtex": "@inproceedings{\nfumero2023leveraging,\ntitle={Leveraging sparse and shared feature activations for disentangled representation learning},\nauthor={Marco Fumero and Florian Wenzel and Luca Zancato and Alessandro Achille and Emanuele Rodol{\\`a} and Stefano Soatto and Bernhard Sch{\\\"o}lkopf and Francesco Locatello},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IHR83ufYPy}\n}", "github": "", "project": "", "reviewers": "S5sy;vgpp;BjS9;Aoq2", "pdf_size": 1466102, "rating": "7;7;7;8", "confidence": "4;4;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "98;97;187;72", "wc_strengths": "52;107;54;109", "wc_weaknesses": "201;106;427;62", "wc_questions": "367;218;30;28", "wc_limitations": "3;15;1;11", "wc_review": "721;543;699;282", "wc_reply_reviewers": "330;56;28;0", "wc_reply_authors": "652;259;50;35", "reply_reviewers": "2;2;1;0", "reply_authors": "3;3;2;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 113.5, 43.69496538504179 ], "wc_strengths_avg": [ 80.5, 27.518175811634027 ], "wc_weaknesses_avg": [ 199.0, 140.89535123629878 ], "wc_questions_avg": [ 160.75, 141.89322570158168 ], "wc_limitations_avg": [ 7.5, 5.722761571129799 ], "wc_review_avg": [ 561.25, 175.22039692912466 ], "wc_reply_reviewers_avg": [ 103.5, 132.2601602902401 ], "wc_reply_authors_avg": [ 249.0, 248.95079835180283 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14366466390273331048&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "uniroma1.it;amazon.com;amazon.it;caltech.edu;uniroma1.it;;;amazon.com", "author_num": 8, "aff_unique_index": "0;1;1;2;0;1", "aff_unique_norm": "Sapienza University of Rome;Amazon;California Institute of Technology", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.uniroma1.it;https://www.amazon.com;https://www.caltech.edu", "aff_unique_abbr": "Sapienza;Amazon;Caltech", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Rome;;Pasadena", "aff_country_unique_index": "0;1;1;1;0;1", "aff_country_unique": "Italy;United States" }, { "title": "Goal-Conditioned Predictive Coding for Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72131", "id": "IJblKO45YU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/51053d7b8473df7d5a2165b2a8ee9629-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IJblKO45YU", "openreview": "https://openreview.net/forum?id=IJblKO45YU", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72131", "video": "https://nips.cc/virtual/2023/poster/72131", "author_site": "Zilai Zeng, Ce Zhang, Shijie Wang, Chen Sun", "tldr": "", "abstract": "Recent work has demonstrated the effectiveness of formulating decision making as supervised learning on offline-collected trajectories. Powerful sequence models, such as GPT or BERT, are often employed to encode the trajectories. However, the benefits of performing sequence modeling on trajectory data remain unclear. In this work, we investigate whether sequence modeling has the ability to condense trajectories into useful representations that enhance policy learning. We adopt a two-stage framework that first leverages sequence models to encode trajectory-level representations, and then learns a goal-conditioned policy employing the encoded representations as its input. This formulation allows us to consider many existing supervised offline RL methods as specific instances of our framework. Within this framework, we introduce Goal-Conditioned Predictive Coding (GCPC), a sequence modeling objective that yields powerful trajectory representations and leads to performant policies. Through extensive empirical evaluations on AntMaze, FrankaKitchen and Locomotion environments, we observe that sequence modeling can have a significant impact on challenging decision making tasks. Furthermore, we demonstrate that GCPC learns a goal-conditioned latent representation encoding the future trajectory, which enables competitive performance on all three benchmarks.", "keywords": "reinforcement learning;offline RL;self-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Zilai Zeng;Ce Zhang;Shijie Wang;Chen Sun", "authorids": "~Zilai_Zeng1;~Ce_Zhang7;~Shijie_Wang2;~Chen_Sun1", "gender": "M;M;M;M", "homepage": "https://zilaiz.github.io;https://ceezh.github.io/;https://wang-sj16.github.io/;https://chensun.me", "dblp": "306/6661;97/919-10;;01/6072-2", "google_scholar": "nyqMsxQAAAAJ;zGA2ReUAAAAJ;https://scholar.google.com/citations?hl=en;vQa7heEAAAAJ", "orcid": ";;;", "linkedin": ";;shijie-wang-a38413132/;", "or_profile": "~Zilai_Zeng1;~Ce_Zhang7;~Shijie_Wang2;~Chen_Sun1", "aff": "Brown University;Brown University;Brown University;Google", "aff_domain": "brown.edu;brown.edu;brown.edu;google.com", "position": "MS student;MS student;PhD student;Research Scientist", "bibtex": "@inproceedings{\nzeng2023goalconditioned,\ntitle={Goal-Conditioned Predictive Coding for Offline Reinforcement Learning},\nauthor={Zilai Zeng and Ce Zhang and Shijie Wang and Chen Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IJblKO45YU}\n}", "github": "", "project": "", "reviewers": "tnif;eFoi;EZxU;3wNA", "pdf_size": 1686344, "rating": "3;5;6;8", "confidence": "4;4;4;5", "soundness": "2;2;4;4", "novelty": "2;3;4;4", "presentation": "2;2;2;4", "wc_summary": "184;118;86;88", "wc_strengths": "152;102;179;49", "wc_weaknesses": "61;749;348;183", "wc_questions": "40;51;218;7", "wc_limitations": "1;30;61;9", "wc_review": "438;1050;892;336", "wc_reply_reviewers": "302;178;943;256", "wc_reply_authors": "0;437;1703;232", "reply_reviewers": "1;1;5;1", "reply_authors": "1;2;7;2", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 119.0, 39.61060464067672 ], "wc_strengths_avg": [ 120.5, 49.671420354163416 ], "wc_weaknesses_avg": [ 335.25, 259.6847848835199 ], "wc_questions_avg": [ 79.0, 81.86879747498432 ], "wc_limitations_avg": [ 25.25, 23.19886850689059 ], "wc_review_avg": [ 679.0, 299.47453981933086 ], "wc_reply_reviewers_avg": [ 419.75, 305.3329125724903 ], "wc_reply_authors_avg": [ 593.0, 659.2431266232512 ], "reply_reviewers_avg": [ 2.0, 1.7320508075688772 ], "reply_authors_avg": [ 3.0, 2.345207879911715 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8006407690254357, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7684338365067727276&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "brown.edu;brown.edu;brown.edu;google.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Brown University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.brown.edu;https://www.google.com", "aff_unique_abbr": "Brown;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "QuACK: Accelerating Gradient-Based Quantum Optimization with Koopman Operator Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72130", "id": "IKQOS8rqwr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5159aaee380391c366b27994ed225e4f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IKQOS8rqwr", "openreview": "https://openreview.net/forum?id=IKQOS8rqwr", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72130", "video": "https://nips.cc/virtual/2023/poster/72130", "author_site": "Di Luo, Jiayu Shen, Rumen Dangovski, Marin Soljacic", "tldr": "", "abstract": "Quantum optimization, a key application of quantum computing, has traditionally been stymied by the linearly increasing complexity of gradient calculations with an increasing number of parameters. This work bridges the gap between Koopman operator theory, which has found utility in applications because it allows for a linear representation of nonlinear dynamical systems, and natural gradient methods in quantum optimization, leading to a significant acceleration of gradient-based quantum optimization. We present Quantum-circuit Alternating Controlled Koopman learning (QuACK), a novel framework that leverages an alternating algorithm for efficient prediction of gradient dynamics on quantum computers. We demonstrate QuACK's remarkable ability to accelerate gradient-based optimization across a range of applications in quantum optimization and machine learning. In fact, our empirical studies, spanning quantum chemistry, quantum condensed matter, quantum machine learning, and noisy environments, have shown accelerations of more than 200x speedup in the overparameterized regime, 10x speedup in the smooth regime, and 3x speedup in the non-smooth regime. With QuACK, we offer a robust advancement that harnesses the advantage of gradient-based quantum optimization for practical benefits.", "keywords": "Koopman operator;quantum optimization;machine learning", "primary_area": "", "supplementary_material": "", "author": "Di Luo;Jiayu Shen;Rumen Dangovski;Marin Soljacic", "authorids": "~Di_Luo1;~Jiayu_Shen1;~Rumen_Dangovski1;~Marin_Soljacic1", "gender": "M;M;M;", "homepage": ";;http://super-ms.mit.edu/rumen.html;https://www.rle.mit.edu/marin/", "dblp": ";;207/8546;131/2044", "google_scholar": "OxZytTQAAAAJ;;;", "orcid": ";0000-0002-2949-4038;;", "linkedin": ";;;", "or_profile": "~Di_Luo1;~Jiayu_Shen1;~Rumen_Dangovski1;~Marin_Soljacic1", "aff": "Massachusetts Institute of Technology;University of Illinois, Urbana Champaign;Massachusetts Institute of Technology;", "aff_domain": "mit.edu;illinois.edu;mit.edu;", "position": "Postdoc;PhD student;PhD student;", "bibtex": "@inproceedings{\nluo2023quack,\ntitle={Qu{ACK}: Accelerating Gradient-Based Quantum Optimization with Koopman Operator Learning},\nauthor={Di Luo and Jiayu Shen and Rumen Dangovski and Marin Soljacic},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IKQOS8rqwr}\n}", "github": "", "project": "", "reviewers": "sYsu;kkUu;ePBi;M6jg", "pdf_size": 1969406, "rating": "6;6;7;7", "confidence": "3;5;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "66;75;67;99", "wc_strengths": "68;76;83;156", "wc_weaknesses": "57;284;686;20", "wc_questions": "30;22;70;28", "wc_limitations": "1;8;47;6", "wc_review": "222;465;953;309", "wc_reply_reviewers": "21;14;67;6", "wc_reply_authors": "23;19;53;19", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 76.75, 13.311179511974137 ], "wc_strengths_avg": [ 95.75, 35.187888541371734 ], "wc_weaknesses_avg": [ 261.75, 264.97582436894123 ], "wc_questions_avg": [ 37.5, 18.993419913222578 ], "wc_limitations_avg": [ 15.5, 18.364367672206956 ], "wc_review_avg": [ 487.25, 282.643215910094 ], "wc_reply_reviewers_avg": [ 27.0, 23.695991222145572 ], "wc_reply_authors_avg": [ 28.5, 14.239030865898142 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1871740046804276785&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "mit.edu;illinois.edu;mit.edu;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://illinois.edu", "aff_unique_abbr": "MIT;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Towards Label Position Bias in Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72129", "id": "IKjOMA8olL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4798eef078de031518beaf54f4b5fb5f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IKjOMA8olL", "openreview": "https://openreview.net/forum?id=IKjOMA8olL", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72129", "video": "https://nips.cc/virtual/2023/poster/72129", "author_site": "Haoyu Han, Xiaorui Liu, Feng Shi, MohamadAli Torkamani, Charu Aggarwal, Jiliang Tang", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have emerged as a powerful tool for semi-supervised node classification tasks. However, recent studies have revealed various biases in GNNs stemming from both node features and graph topology. In this work, we uncover a new bias - label position bias, which indicates that the node closer to the labeled nodes tends to perform better. We introduce a new metric, the Label Proximity Score, to quantify this bias, and find that it is closely related to performance disparities. To address the label position bias, we propose a novel optimization framework for learning a label position unbiased graph structure, which can be applied to existing GNNs. Extensive experiments demonstrate that our proposed method not only outperforms backbone methods but also significantly mitigates the issue of label position bias in GNNs.", "keywords": "Graph Neural Networks;Label Position Bias;Graph Structure Learning", "primary_area": "", "supplementary_material": "", "author": "Haoyu Han;Xiaorui Liu;Feng Shi;MohamadAli Torkamani;Charu C. Aggarwal;Jiliang Tang", "authorids": "~Haoyu_Han1;~Xiaorui_Liu1;~Feng_Shi5;~MohamadAli_Torkamani1;~Charu_C._Aggarwal2;~Jiliang_Tang1", "gender": "M;M;M;M;M;M", "homepage": "https://cse.msu.edu/~hanhaoy1/;https://sites.google.com/ncsu.edu/xiaorui/;https://www.billshi.net;;http://www.charuaggarwal.net;https://www.cse.msu.edu/~tangjili/", "dblp": "257/5633-1;172/0995;181/2781;137/3244;a/CharuCAggarwal;64/10812", "google_scholar": ";NhvN1KoAAAAJ;o249gxgAAAAJ;UtE9noAAAAAJ;x_wsduUAAAAJ;WtzKMWAAAAAJ", "orcid": "0000-0002-2529-6042;0000-0001-8217-5688;;;0000-0003-2579-7581;0000-0001-7125-3898", "linkedin": ";;;ali-torkamani-8474587/;;", "or_profile": "~Haoyu_Han1;~Xiaorui_Liu1;~Feng_Shi5;~MohamadAli_Torkamani1;~Charu_C._Aggarwal2;~Jiliang_Tang1", "aff": "Michigan State University;North Carolina State University;TigerGraph;AWS AI;International Business Machines;Michigan State University", "aff_domain": "msu.edu;ncsu.edu;tigergraph.com;amazon.com;ibm.com;msu.edu", "position": "PhD student;Assistant Professor;Researcher;Researcher;Distinguished Research Staff Member;Full Professor", "bibtex": "@inproceedings{\nhan2023towards,\ntitle={Towards Label Position Bias in Graph Neural Networks},\nauthor={Haoyu Han and Xiaorui Liu and Feng Shi and MohamadAli Torkamani and Charu C. Aggarwal and Jiliang Tang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IKjOMA8olL}\n}", "github": "", "project": "", "reviewers": "tP9u;Lj4a;VK13;fFnJ", "pdf_size": 2699387, "rating": "5;5;5;6", "confidence": "3;5;4;5", "soundness": "3;3;3;3", "novelty": "3;2;2;3", "presentation": "3;3;3;3", "wc_summary": "113;93;77;45", "wc_strengths": "134;99;118;20", "wc_weaknesses": "33;140;270;72", "wc_questions": "73;108;4;151", "wc_limitations": "1;60;16;1", "wc_review": "354;500;485;289", "wc_reply_reviewers": "0;43;69;33", "wc_reply_authors": "48;76;214;416", "reply_reviewers": "0;1;2;1", "reply_authors": "2;3;3;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.0, 24.879710609249457 ], "wc_strengths_avg": [ 92.75, 43.79140897482062 ], "wc_weaknesses_avg": [ 128.75, 90.09266063337235 ], "wc_questions_avg": [ 84.0, 53.819141576208736 ], "wc_limitations_avg": [ 19.5, 24.171263930543642 ], "wc_review_avg": [ 407.0, 88.69329174182228 ], "wc_reply_reviewers_avg": [ 36.25, 24.71209218176397 ], "wc_reply_authors_avg": [ 188.5, 145.60477327340612 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5110648884411727502&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "msu.edu;ncsu.edu;tigergraph.com;amazon.com;ibm.com;msu.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;4;0", "aff_unique_norm": "Michigan State University;North Carolina State University;TigerGraph;Amazon;International Business Machines Corporation", "aff_unique_dep": ";;;AWS AI;", "aff_unique_url": "https://www.msu.edu;https://www.ncsu.edu;https://www.tigergraph.com;https://aws.amazon.com;https://www.ibm.com", "aff_unique_abbr": "MSU;NCSU;TigerGraph;AWS;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Initialization Matters: Privacy-Utility Analysis of Overparameterized Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72128", "id": "IKvxmnHjkL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1165af8b913fb836c6280b42d6e0084f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IKvxmnHjkL", "openreview": "https://openreview.net/forum?id=IKvxmnHjkL", "poster": "/media/PosterPDFs/NeurIPS%202023/72128.png?t=1701525411.6685774", "slides": "https://nips.cc/virtual/2023/poster/72128", "video": "https://nips.cc/virtual/2023/poster/72128", "author_site": "Jiayuan Ye, Zhenyu Zhu, Fanghui Liu, Reza Shokri, Volkan Cevher", "tldr": "", "abstract": "We analytically investigate how over-parameterization of models in randomized machine learning algorithms impacts the information leakage about their training data. Specifically, we prove a privacy bound for the KL divergence between model distributions on worst-case neighboring datasets, and explore its dependence on the initialization, width, and depth of fully connected neural networks. We find that this KL privacy bound is largely determined by the expected squared gradient norm relative to model parameters during training. Notably, for the special setting of linearized network, our analysis indicates that the squared gradient norm (and therefore the escalation of privacy loss) is tied directly to the per-layer variance of the initialization distribution. By using this analysis, we demonstrate that privacy bound improves with increasing depth under certain initializations (LeCun and Xavier), while degrades with increasing depth under other initializations (He and NTK). Our work reveals a complex interplay between privacy and depth that depends on the chosen initialization distribution. We further prove excess empirical risk bounds under a fixed KL privacy budget, and show that the interplay between privacy utility trade-off and depth is similarly affected by the initialization.", "keywords": "overparameterized neural network;privacy", "primary_area": "", "supplementary_material": "/attachment/17c8ff5de7363afab183bb1beb2166f6ba67bbe1.pdf", "author": "Jiayuan Ye;Zhenyu Zhu;Fanghui Liu;Reza Shokri;Volkan Cevher", "authorids": "~Jiayuan_Ye1;~Zhenyu_Zhu1;~Fanghui_Liu1;~Reza_Shokri1;~Volkan_Cevher1", "gender": ";M;M;;M", "homepage": ";https://zhuzhenyu1997.github.io/;http://www.lfhsgre.org;;http://lions.epfl.ch", "dblp": ";;119/1038;;70/5301", "google_scholar": ";rft3OB4AAAAJ;AKxBgssAAAAJ;;https://scholar.google.ch/citations?user=hlWhzU8AAAAJ", "orcid": ";;0000-0003-4133-7921;;", "linkedin": ";zhenyu-zhu-045471139/;;;", "or_profile": "~Jiayuan_Ye1;~Zhenyu_Zhu1;~Fanghui_Liu1;~Reza_Shokri1;~Volkan_Cevher1", "aff": ";Swiss Federal Institute of Technology Lausanne;\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL);;Amazon Development Center Germany", "aff_domain": ";epfl.ch;epfl.ch;;amazon.de", "position": ";PhD student;Postdoc;;Amazon Scholar", "bibtex": "@inproceedings{\nye2023initialization,\ntitle={Initialization Matters: Privacy-Utility Analysis of Overparameterized Neural Networks},\nauthor={Jiayuan Ye and Zhenyu Zhu and Fanghui Liu and Reza Shokri and Volkan Cevher},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IKvxmnHjkL}\n}", "github": "", "project": "", "reviewers": "R6b6;Ej4s;LMCn;NB2b", "pdf_size": 554482, "rating": "5;5;6;7", "confidence": "4;3;2;4", "soundness": "2;3;3;3", "novelty": "3;2;2;3", "presentation": "3;2;2;3", "wc_summary": "35;82;43;80", "wc_strengths": "44;48;30;49", "wc_weaknesses": "207;271;85;70", "wc_questions": "149;57;22;145", "wc_limitations": "4;114;36;2", "wc_review": "439;572;216;346", "wc_reply_reviewers": "379;26;20;18", "wc_reply_authors": "1355;56;22;27", "reply_reviewers": "3;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 60.0, 21.20141504711419 ], "wc_strengths_avg": [ 42.75, 7.595228765481656 ], "wc_weaknesses_avg": [ 158.25, 84.0278971532669 ], "wc_questions_avg": [ 93.25, 55.17415608779168 ], "wc_limitations_avg": [ 39.0, 45.35416188179427 ], "wc_review_avg": [ 393.25, 130.09107386750253 ], "wc_reply_reviewers_avg": [ 110.75, 154.90218687933364 ], "wc_reply_authors_avg": [ 365.0, 571.724146770101 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13239795406650048469&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 10, "email": ";epfl.ch;epfl.ch;;amazon.de", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL;Amazon", "aff_unique_dep": ";;Development Center", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch;https://www.amazon.de", "aff_unique_abbr": "EPFL;EPFL;Amazon", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Switzerland;Germany" }, { "title": "EmbodiedGPT: Vision-Language Pre-Training via Embodied Chain of Thought", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72127", "id": "IL5zJqfxAa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4ec43957eda1126ad4887995d05fae3b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IL5zJqfxAa", "openreview": "https://openreview.net/forum?id=IL5zJqfxAa", "poster": "/media/PosterPDFs/NeurIPS%202023/72127.png?t=1701680694.4746222", "slides": "https://nips.cc/virtual/2023/poster/72127", "video": "https://nips.cc/virtual/2023/poster/72127", "author_site": "Yao Mu, Qinglong Zhang, Mengkang Hu, Wenhai Wang, Mingyu Ding, Jun Jin, Bin Wang, Jifeng Dai, Yu Qiao, Ping Luo", "tldr": "", "abstract": "Embodied AI is a crucial frontier in robotics, capable of planning and executing action sequences for robots to accomplish long-horizon tasks in physical environments.\nIn this work, we introduce EmbodiedGPT, an end-to-end multi-modal foundation model for embodied AI, empowering embodied agents with multi-modal understanding and execution capabilities. To achieve this, we have made the following efforts: (i) We craft a large-scale embodied planning dataset, termed EgoCOT. The dataset consists of carefully selected videos from the Ego4D dataset, along with corresponding high-quality language instructions. Specifically, we generate a sequence of sub-goals with the \"Chain of Thoughts\" mode for effective embodied planning.\n(ii) We introduce an efficient training approach to EmbodiedGPT for high-quality plan generation, by adapting a 7B large language model (LLM) to the EgoCOT dataset via prefix tuning. (iii) We introduce a paradigm for extracting task-related features from LLM-generated planning queries to form a closed loop between high-level planning and low-level control.\nExtensive experiments show the effectiveness of EmbodiedGPT on embodied tasks, including embodied planning, embodied control, visual captioning, and visual question answering.\nNotably, EmbodiedGPT significantly enhances the success rate of the embodied control task by extracting more effective features. It has achieved a remarkable 1.6 times increase in success rate on the Franka Kitchen benchmark and a 1.3 times increase on the Meta-World benchmark, compared to the BLIP-2 baseline fine-tuned with the Ego4D dataset.", "keywords": "Embodied AI;Multi-modal Foundation Model;Embodied Control", "primary_area": "", "supplementary_material": "/attachment/f2e0bf80a50e171403948a0ceb7398d88660ed64.pdf", "author": "Yao Mu;Qinglong Zhang;Mengkang Hu;Wenhai Wang;Mingyu Ding;Jun Jin;Bin Wang;Jifeng Dai;Yu Qiao;Ping Luo", "authorids": "~Yao_Mu1;~Qinglong_Zhang1;~Mengkang_Hu1;~Wenhai_Wang2;~Mingyu_Ding1;~Jun_Jin1;~Bin_Wang12;~Jifeng_Dai1;~Yu_Qiao1;~Ping_Luo2", "gender": "M;M;M;;M;;M;M;;", "homepage": "https://yaomarkmu.github.io/;;https://aaron617.github.io/;;https://dingmyu.github.io/;;http://binwang.top;https://jifengdai.org/;;", "dblp": "260/0674;165/0559;321/0644;;188/5243;78/8436.html;13/1898-34;14/9399;;", "google_scholar": ";LYR7l98AAAAJ;FhVRimUAAAAJ;;w4yTWwoAAAAJ;a6grwUcAAAAJ;KWZG_YsAAAAJ;SH_-B_AAAAAJ;;", "orcid": ";;0009-0009-3779-3378;;0000-0001-6556-8359;0000-0003-4413-8565;0000-0002-0267-3749;;;", "linkedin": ";;;;dingmyu/;;;;;", "or_profile": "~Yao_Mu1;~Qinglong_Zhang1;~Mengkang_Hu1;~Wenhai_Wang2;~Mingyu_Ding1;~Jun_Jin1;~Bin_Wang12;~Jifeng_Dai1;~Yu_Qiao1;~Ping_Luo2", "aff": "The University of Hong Kong;Shanghai Artificial Intelligence Laboratory;Harbin Institute of Technology;;University of California, Berkeley;Huawei Technologies Ltd. Canada;Huawei Noah's Ark Lab;Tsinghua University;;", "aff_domain": "hku.hk;shlab.org.cn;hit.edu.cn;;berkeley.edu;huawei.com;huawei.com;tsinghua.edu.cn;;", "position": "PhD student;Researcher;Undergrad student;;Postdoc;Researcher;Senior Researcher;Associate Professor;;", "bibtex": "@inproceedings{\nmu2023embodiedgpt,\ntitle={Embodied{GPT}: Vision-Language Pre-Training via Embodied Chain of Thought},\nauthor={Yao Mu and Qinglong Zhang and Mengkang Hu and Wenhai Wang and Mingyu Ding and Jun Jin and Bin Wang and Jifeng Dai and Yu Qiao and Ping Luo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IL5zJqfxAa}\n}", "github": "", "project": "", "reviewers": "5cDX;BD8Q;eM5s;P2YP", "pdf_size": 1318296, "rating": "3;7;8;8", "confidence": "5;4;4;5", "soundness": "2;3;4;4", "novelty": "2;3;3;4", "presentation": "2;2;3;4", "wc_summary": "52;213;252;76", "wc_strengths": "29;87;176;122", "wc_weaknesses": "352;462;92;260", "wc_questions": "3;384;25;2", "wc_limitations": "3;27;6;13", "wc_review": "439;1173;551;473", "wc_reply_reviewers": "441;71;12;0", "wc_reply_authors": "632;185;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "4;3;1;1", "rating_avg": [ 6.5, 2.0615528128088303 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 148.25, 85.79153513022132 ], "wc_strengths_avg": [ 103.5, 53.434539391670626 ], "wc_weaknesses_avg": [ 291.5, 135.5756246528114 ], "wc_questions_avg": [ 103.5, 162.2074289297503 ], "wc_limitations_avg": [ 12.25, 9.256754290786809 ], "wc_review_avg": [ 659.0, 299.52295404526177 ], "wc_reply_reviewers_avg": [ 131.0, 180.98480599210532 ], "wc_reply_authors_avg": [ 204.25, 258.2521781127896 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.48507125007266594, "gs_citation": 246, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15654824552713443796&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "hku.hk;shlab.org.cn;hit.edu.cn;;berkeley.edu;huawei.com;huawei.com;tsinghua.edu.cn;;", "author_num": 10, "aff_unique_index": "0;1;2;3;4;4;5", "aff_unique_norm": "University of Hong Kong;Shanghai Artificial Intelligence Laboratory;Harbin Institute of Technology;University of California, Berkeley;Huawei;Tsinghua University", "aff_unique_dep": ";;;;Huawei Technologies;", "aff_unique_url": "https://www.hku.hk;http://www.shailab.org/;http://www.hit.edu.cn/;https://www.berkeley.edu;https://www.huawei.com/ca-en/;https://www.tsinghua.edu.cn", "aff_unique_abbr": "HKU;Shanghai AI Lab;HIT;UC Berkeley;Huawei;THU", "aff_campus_unique_index": "0;2;3", "aff_campus_unique": "Hong Kong SAR;;Harbin;Berkeley", "aff_country_unique_index": "0;0;0;1;2;0;0", "aff_country_unique": "China;United States;Canada" }, { "id": "IL7F4soYyg", "title": "CellPLM: Pre-training of Cell Language Model Beyond Single Cells", "track": "main", "status": "Reject", "tldr": "", "abstract": "The current state-of-the-art single-cell pre-trained models are greatly inspired by the success of large language models. They trained transformers by treating genes as tokens and cells as sentences. However, three fundamental differences between single-cell data and natural language data are overlooked: (1) scRNA-seq data are presented as bag-of-genes instead of sequences of RNAs; (2) Cell-cell relations are more intricate and important than inter-sentence relations; and (3) The quantity of single-cell data is considerably inferior to text data, and they are very noisy. In light of these characteristics, we propose a new pre-trained model \\method, which takes cells as tokens and tissues as sentences. In addition, we leverage spatially-resolved transcriptomic data in pre-training to facilitate learning cell-cell relationships and introduce a Gaussian mixture prior distribution as an additional inductive bias to overcome data limitation. \\method is the first single-cell pre-trained transformer that encodes cell-cell relations and it achieves state-of-the-art performance in various downstream tasks.\n", "keywords": "single-cell analysis;pretrained models;AI for science", "primary_area": "", "supplementary_material": "/attachment/61c3f4f7c0f9c9105a337389aa03c71e4459dc8e.zip", "author": "Hongzhi Wen;Wenzhuo Tang;Jiayuan Ding;Wei Jin;Yuying Xie;Jiliang Tang", "authorids": "~Hongzhi_Wen1;~Wenzhuo_Tang1;~Jiayuan_Ding1;~Wei_Jin4;~Yuying_Xie1;~Jiliang_Tang1", "gender": "M;M;M;;M;M", "homepage": "https://www.cse.msu.edu/~wenhongz/;;;http://www.cs.emory.edu/~wjin30/;https://cmse.msu.edu/directory/faculty/yuying-xie/;https://www.cse.msu.edu/~tangjili/", "dblp": "179/0477;;197/1055;66/2173-9;24/2813-1;64/10812", "google_scholar": ";;7lwkXGEAAAAJ;eWow24EAAAAJ;https://scholar.google.com/citations?hl=en;WtzKMWAAAAAJ", "orcid": "0000-0003-0775-8538;;;;0000-0002-1049-2219;0000-0001-7125-3898", "linkedin": ";wenzhuo-tang-66b757207;jiayuand/;;yuying-xie-b754bb17/;", "or_profile": "~Hongzhi_Wen1;~Wenzhuo_Tang1;~Jiayuan_Ding1;~Wei_Jin4;~Yuying_Xie1;~Jiliang_Tang1", "aff": "Michigan State University;Michigan State University;Michigan State University;Michigan State University;Michigan State University;Michigan State University", "aff_domain": "msu.edu;msu.edu;msu.edu;msu.edu;msu.edu;msu.edu", "position": "PhD student;PhD student;PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@misc{\nwen2023cellplm,\ntitle={Cell{PLM}: Pre-training of Cell Language Model Beyond Single Cells},\nauthor={Hongzhi Wen and Wenzhuo Tang and Jiayuan Ding and Wei Jin and Yuying Xie and Jiliang Tang},\nyear={2023},\nurl={https://openreview.net/forum?id=IL7F4soYyg}\n}", "github": "", "project": "", "reviewers": "neja;L5kJ;hJQZ;bpVn;HcmX", "site": "https://openreview.net/forum?id=IL7F4soYyg", "pdf_size": 635480, "rating": "4;5;5;6;6", "confidence": "4;4;4;3;3", "soundness": "2;2;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;2;3;3", "wc_summary": "114;43;92;146;100", "wc_strengths": "23;77;47;126;116", "wc_weaknesses": "23;144;73;105;93", "wc_questions": "153;149;298;104;17", "wc_limitations": "1;28;8;1;19", "wc_review": "314;441;518;482;345", "wc_reply_reviewers": "98;154;0;18;7", "wc_reply_authors": "1467;500;0;31;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "3;2;1;2;1", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 99.0, 33.52610922848042 ], "wc_strengths_avg": [ 77.8, 39.331412382471086 ], "wc_weaknesses_avg": [ 87.6, 39.74720115932693 ], "wc_questions_avg": [ 144.2, 91.16007898197543 ], "wc_limitations_avg": [ 11.4, 10.594338110519224 ], "wc_review_avg": [ 420.0, 78.4219357067906 ], "wc_reply_reviewers_avg": [ 55.4, 60.57590279971071 ], "wc_reply_authors_avg": [ 399.6, 566.5066989895178 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8728715609439696, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15887212670928548591&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Michigan State University", "aff_unique_dep": "", "aff_unique_url": "https://www.msu.edu", "aff_unique_abbr": "MSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "\u201cWhy Not Looking backward?\u201d A Robust Two-Step Method to Automatically Terminate Bayesian Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72126", "id": "IMiGRqltQQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/870c1e0589822bf37590b84984c345c4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IMiGRqltQQ", "openreview": "https://openreview.net/forum?id=IMiGRqltQQ", "poster": "/media/PosterPDFs/NeurIPS%202023/72126.png?t=1697455569.9236522", "slides": "https://nips.cc/virtual/2023/poster/72126", "video": "https://nips.cc/virtual/2023/poster/72126", "author_site": "Shuang Li, Ke Li, Wei Li", "tldr": "", "abstract": "Bayesian Optimization (BO) is a powerful method for tackling expensive black-box optimization problems. As a sequential model-based optimization strategy, BO iteratively explores promising solutions until a predetermined budget, either iterations or time, is exhausted. The decision on when to terminate BO significantly influences both the quality of solutions and its computational efficiency. In this paper, we propose a simple, yet theoretically grounded, two-step method for automatically terminating BO. Our core concept is to proactively identify if the search is within a convex region by examining previously observed samples. BO is halted once the local regret within this convex region falls below a predetermined threshold. To enhance numerical stability, we propose an approximation method for calculating the termination indicator by solving a bilevel optimization problem. We conduct extensive empirical studies on diverse benchmark problems, including synthetic functions, reinforcement learning, and hyperparameter optimization. Experimental results demonstrate that our proposed method saves up to $\\approx 80\\%$ computational budget yet is with an order of magnitude smaller performance degradation, comparing against the other peer methods. In addition, our proposed termination method is robust in terms of the setting of its termination criterion.", "keywords": "Bayesian Optimization;Termination Criterion;Looking Backward", "primary_area": "", "supplementary_material": "/attachment/77453766296b7af556bf9c8e332352ebf3058b93.pdf", "author": "Shuang Li;Ke Li;Wei Li", "authorids": "~Shuang_Li12;~Ke_Li5;~Wei_Li72", "gender": "M;M;M", "homepage": ";https://colalab.ai/;http://homepage.hit.edu.cn/liwei", "dblp": ";75/6627-1.html;", "google_scholar": ";https://scholar.google.co.uk/citations?user=lUFU8KsAAAAJ;", "orcid": "0000-0001-6261-177X;0000-0001-7200-4244;", "linkedin": ";ke-li-29423226/;", "or_profile": "~Shuang_Li12;~Ke_Li5;~Wei_Li72", "aff": "Harbin Institute of Technology;University of Exeter;Harbin Institute of Technology", "aff_domain": "hit.edu;exeter.ac.uk;hit.edu.cn", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nli2023why,\ntitle={{\\textquotedblleft}Why Not Looking backward?{\\textquotedblright} A Robust Two-Step Method to Automatically Terminate Bayesian Optimization},\nauthor={Shuang Li and Ke Li and Wei Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IMiGRqltQQ}\n}", "github": "", "project": "", "reviewers": "ffwe;pzre;TmDT", "pdf_size": 3769475, "rating": "4;6;7", "confidence": "5;3;4", "soundness": "2;2;3", "novelty": "2;2;4", "presentation": "2;1;2", "wc_summary": "49;81;511", "wc_strengths": "16;23;229", "wc_weaknesses": "196;76;309", "wc_questions": "134;50;360", "wc_limitations": "1;16;26", "wc_review": "396;246;1435", "wc_reply_reviewers": "201;22;197", "wc_reply_authors": "1084;9;156", "reply_reviewers": "2;1;1", "reply_authors": "3;2;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 213.66666666666666, 210.65189821651794 ], "wc_strengths_avg": [ 89.33333333333333, 98.80058479359096 ], "wc_weaknesses_avg": [ 193.66666666666666, 95.13615973376731 ], "wc_questions_avg": [ 181.33333333333334, 130.90794051121912 ], "wc_limitations_avg": [ 14.333333333333334, 10.274023338281626 ], "wc_review_avg": [ 692.3333333333334, 528.7030252314515 ], "wc_reply_reviewers_avg": [ 140.0, 83.45457846437586 ], "wc_reply_authors_avg": [ 416.3333333333333, 475.91058917499294 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6546536707079772, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10892931021741323833&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "hit.edu;exeter.ac.uk;hit.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Harbin Institute of Technology;University of Exeter", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;https://www.exeter.ac.uk", "aff_unique_abbr": "HIT;Exeter", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Task-aware world model learning with meta weighting via bi-level optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72125", "id": "IN3hQx1BrC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a995960dd0193654d6b18eca4ac5b936-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IN3hQx1BrC", "openreview": "https://openreview.net/forum?id=IN3hQx1BrC", "poster": "/media/PosterPDFs/NeurIPS%202023/72125.png?t=1697429213.7325683", "slides": "https://nips.cc/virtual/2023/poster/72125", "video": "https://nips.cc/virtual/2023/poster/72125", "author_site": "Huining Yuan, Hongkun Dou, Xingyu Jiang, Yue Deng", "tldr": "", "abstract": "Aligning the world model with the environment for the agent\u2019s specific task is crucial in model-based reinforcement learning. While value-equivalent models may achieve better task awareness than maximum-likelihood models, they sacrifice a large amount of semantic information and face implementation issues. To combine the benefits of both types of models, we propose Task-aware Environment Modeling Pipeline with bi-level Optimization (TEMPO), a bi-level model learning framework that introduces an additional level of optimization on top of a maximum-likelihood model by incorporating a meta weighter network that weights each training sample. The meta weighter in the upper level learns to generate novel sample weights by minimizing a proposed task-aware model loss. The model in the lower level focuses on important samples while maintaining rich semantic information in state representations. We evaluate TEMPO on a variety of continuous and discrete control tasks from the DeepMind Control Suite and Atari video games. Our results demonstrate that TEMPO achieves state-of-the-art performance regarding asymptotic performance, training stability, and convergence speed.", "keywords": "Model-based reinforcement learning;world model;generative model;meta-learning;bi-level optimization", "primary_area": "", "supplementary_material": "/attachment/d44a72e70ebfc2273a621b8dd8766e3bc914eca2.pdf", "author": "Huining Yuan;Hongkun Dou;Xingyu Jiang;Yue Deng", "authorids": "~Huining_Yuan1;~Hongkun_Dou3;~Xingyu_Jiang2;~Yue_Deng4", "gender": "M;M;M;M", "homepage": ";https://orcid.org/0009-0006-0282-622X;;", "dblp": "266/7202-2;;35/8109-1;285/8223", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;;pSNEkEwAAAAJ", "orcid": "0000-0002-3438-3535;0009-0006-0282-622X;;0000-0001-6185-5369", "linkedin": ";;;", "or_profile": "~Huining_Yuan1;~Xingyu_Jiang2;~Yue_Deng4;~hongkun_dou1", "aff": "Beihang University;Beihang University;Beihang University;Beihang University", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "position": "MS student;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nyuan2023taskaware,\ntitle={Task-aware world model learning with meta weighting via bi-level optimization},\nauthor={Huining Yuan and Hongkun Dou and Xingyu Jiang and Yue Deng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IN3hQx1BrC}\n}", "github": "", "project": "", "reviewers": "kk1X;YURA;RGcg;9zSq", "pdf_size": 3834601, "rating": "5;7;7;7", "confidence": "4;5;4;3", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "2;2;4;3", "wc_summary": "69;109;102;60", "wc_strengths": "24;111;91;169", "wc_weaknesses": "172;352;177;149", "wc_questions": "1;119;22;156", "wc_limitations": "1;21;7;54", "wc_review": "267;712;399;588", "wc_reply_reviewers": "41;163;99;130", "wc_reply_authors": "9;23;32;69", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 85.0, 20.89258241577618 ], "wc_strengths_avg": [ 98.75, 51.799493240764434 ], "wc_weaknesses_avg": [ 212.5, 81.22961282685029 ], "wc_questions_avg": [ 74.5, 64.7707495710834 ], "wc_limitations_avg": [ 20.75, 20.522853115490545 ], "wc_review_avg": [ 491.5, 170.94516664708598 ], "wc_reply_reviewers_avg": [ 108.25, 44.94093345715017 ], "wc_reply_authors_avg": [ 33.25, 22.20782519743885 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16246744923201911549&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "TopoSRL: Topology preserving self-supervised Simplicial Representation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72124", "id": "INS3ltgjg7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/caba69fbc9fa0b06241b98a44cab8b31-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=INS3ltgjg7", "openreview": "https://openreview.net/forum?id=INS3ltgjg7", "poster": "/media/PosterPDFs/NeurIPS%202023/72124.png?t=1702217478.4667015", "slides": "https://nips.cc/virtual/2023/poster/72124", "video": "https://nips.cc/virtual/2023/poster/72124", "author_site": "Hiren Madhu, Sundeep Prabhakar Chepuri", "tldr": "", "abstract": "In this paper, we introduce $\\texttt{TopoSRL}$, a novel self-supervised learning (SSL) method for simplicial complexes to effectively capture higher-order interactions and preserve topology in the learned representations. $\\texttt{TopoSRL}$ addresses the limitations of existing graph-based SSL methods that typically concentrate on pairwise relationships, neglecting long-range dependencies crucial to capture topological information. We propose a new simplicial augmentation technique that generates two views of the simplicial complex that enriches the representations while being efficient. Next, we propose a new simplicial contrastive loss function that contrasts the generated simplices to preserve local and global information present in the simplicial complexes. Extensive experimental results demonstrate the superior performance of $\\texttt{TopoSRL}$ compared to state-of-the-art graph SSL techniques and supervised simplicial neural models across various datasets corroborating the efficacy of $\\texttt{TopoSRL}$ in processing simplicial complex data in a self-supervised setting.", "keywords": "Simplicial representation learning;Self-supervised learning;Message passing simplicial networks", "primary_area": "", "supplementary_material": "/attachment/f23b5f99b3f4947ea5c273baa4bb2fdaec216db2.pdf", "author": "Hiren Madhu;Sundeep Prabhakar Chepuri", "authorids": "~Hiren_Madhu1;~Sundeep_Prabhakar_Chepuri1", "gender": "M;M", "homepage": "http://hirenmadhu.github.io;https://ece.iisc.ac.in/~spchepuri/", "dblp": ";72/10237.html", "google_scholar": "Bt8Q-x0AAAAJ;Gu8FjdwAAAAJ", "orcid": "0000-0002-6701-6782;", "linkedin": "hiren-madhu/;", "or_profile": "~Hiren_Madhu1;~Sundeep_Prabhakar_Chepuri1", "aff": "Indian Institute of Science, Bengaluru;Indian Institute of Science", "aff_domain": "iisc.ac.in;iisc.ac.in", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nmadhu2023toposrl,\ntitle={Topo{SRL}: Topology preserving self-supervised Simplicial Representation Learning},\nauthor={Hiren Madhu and Sundeep Prabhakar Chepuri},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=INS3ltgjg7}\n}", "github": "", "project": "", "reviewers": "Lthj;T7H9;3cmn;VjqH;RqW1", "pdf_size": 966778, "rating": "4;4;5;6;6", "confidence": "4;3;3;3;4", "soundness": "2;2;3;2;3", "novelty": "3;2;3;2;2", "presentation": "1;1;3;3;4", "wc_summary": "83;29;60;65;231", "wc_strengths": "46;11;46;82;115", "wc_weaknesses": "325;51;170;71;400", "wc_questions": "62;42;11;72;43", "wc_limitations": "79;1;14;5;1", "wc_review": "595;134;301;295;790", "wc_reply_reviewers": "67;69;224;21;133", "wc_reply_authors": "161;268;144;17;33", "reply_reviewers": "1;1;3;1;1", "reply_authors": "2;2;3;2;2", "rating_avg": [ 5.0, 0.8944271909999159 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 1.2 ], "wc_summary_avg": [ 93.6, 70.86776418090244 ], "wc_strengths_avg": [ 60.0, 35.50211261319529 ], "wc_weaknesses_avg": [ 203.4, 138.06462255045642 ], "wc_questions_avg": [ 46.0, 20.890189084831185 ], "wc_limitations_avg": [ 20.0, 29.879759035172956 ], "wc_review_avg": [ 423.0, 236.2464814552801 ], "wc_reply_reviewers_avg": [ 102.8, 70.30903213670346 ], "wc_reply_authors_avg": [ 124.6, 91.9056037464528 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10119903110821269252&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "iisc.ac.in;iisc.ac.in", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Indian Institute of Science", "aff_unique_dep": "", "aff_unique_url": "https://www.iisc.ac.in", "aff_unique_abbr": "IISc", "aff_campus_unique_index": "0", "aff_campus_unique": "Bengaluru;", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "title": "DSR: Dynamical Surface Representation as Implicit Neural Networks for Protein", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72123", "id": "IOSaJ7ukgf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2d025936bae21d2c2d4cc74779aa77c7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IOSaJ7ukgf", "openreview": "https://openreview.net/forum?id=IOSaJ7ukgf", "poster": "/media/PosterPDFs/NeurIPS%202023/72123.png?t=1699801550.11479", "slides": "https://nips.cc/virtual/2023/poster/72123", "video": "https://nips.cc/virtual/2023/poster/72123", "author_site": "Daiwen Sun, He Huang, Yao Li, Xinqi Gong, Qiwei Ye", "tldr": "", "abstract": "We propose a novel neural network-based approach to modeling protein dynamics using an implicit representation of a protein\u2019s surface in 3D and time. Our method utilizes the zero-level set of signed distance functions (SDFs) to represent protein surfaces, enabling temporally and spatially continuous representations of protein dynamics. Our experimental results demonstrate that our model accurately captures protein dynamic trajectories and can interpolate and extrapolate in 3D and time. Importantly, this is the first study to introduce this method and successfully model large-scale protein dynamics. This approach offers a promising alternative to current methods, overcoming the limitations of first-principles-based and deep learning methods, and provides a more scalable and efficient approach to modeling protein dynamics. Additionally, our surface representation approach simplifies calculations and allows identifying movement trends and amplitudes of protein domains, making it a useful tool for protein dynamics research. Codes are available at https://github.com/Sundw-818/DSR, and we have a project webpage that shows some video results, https://sundw-818.github.io/DSR/.", "keywords": "Protein molecular dynamics;Protein surface representation;Implicit neural representation;Signed distance function;Continuous time modeling", "primary_area": "", "supplementary_material": "/attachment/d063f01e61013a281a83d8dd177a7d9bc9142250.pdf", "author": "Daiwen Sun;He Huang;Yao Li;Xinqi Gong;Qiwei Ye", "authorids": "~Daiwen_Sun1;~He_Huang6;~Yao_Li11;~Xinqi_Gong1;~Qiwei_Ye1", "gender": "F;M;F;M;M", "homepage": "https://sundw-818.github.io/;;;https://www.researchgate.net/profile/Xinqi-Gong;", "dblp": ";;;200/7127;50/995", "google_scholar": "qIYH94UAAAAJ;vV9bZ4AAAAAJ;;JDAZPZ0AAAAJ;RJ6SuR8AAAAJ", "orcid": ";;0009-0003-1450-5325;0000-0003-2802-6176;0000-0003-4264-5846", "linkedin": ";;;;qiwei-ye-15282964/", "or_profile": "~Daiwen_Sun1;~He_Huang6;~Yao_Li11;~Xinqi_Gong1;~Qiwei_Ye1", "aff": "Renmin University of China;Renmin University of China;;Renmin University of China;Beijing Academy of Artificial Intelligence", "aff_domain": "ruc.edu.cn;ruc.edu.cn;;ruc.edu.cn;baai.ac.cn", "position": "PhD student;PhD student;;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nsun2023dsr,\ntitle={{DSR}: Dynamical Surface Representation as Implicit Neural Networks for Protein},\nauthor={Daiwen Sun and He Huang and Yao Li and Xinqi Gong and Qiwei Ye},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IOSaJ7ukgf}\n}", "github": "", "project": "", "reviewers": "Dvrt;MFaL;EXzZ;ZJGr;2SKi", "pdf_size": 6021312, "rating": "5;5;5;5;7", "confidence": "4;3;3;4;3", "soundness": "3;3;3;3;3", "novelty": "3;3;3;2;4", "presentation": "3;3;3;2;2", "wc_summary": "115;119;66;37;19", "wc_strengths": "118;69;46;51;18", "wc_weaknesses": "199;183;16;230;98", "wc_questions": "81;36;61;36;56", "wc_limitations": "7;12;54;13;12", "wc_review": "520;419;243;367;203", "wc_reply_reviewers": "86;101;0;326;13", "wc_reply_authors": "0;473;0;704;18", "reply_reviewers": "1;2;0;2;1", "reply_authors": "1;3;1;3;2", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 71.2, 40.31079259950119 ], "wc_strengths_avg": [ 60.4, 33.1215941645326 ], "wc_weaknesses_avg": [ 145.2, 78.01897205167471 ], "wc_questions_avg": [ 54.0, 16.911534525287763 ], "wc_limitations_avg": [ 19.6, 17.32743489383238 ], "wc_review_avg": [ 350.4, 115.7645887134749 ], "wc_reply_reviewers_avg": [ 105.2, 117.22354712258114 ], "wc_reply_authors_avg": [ 239.0, 294.6401194678009 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 0.8944271909999159 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6908397655972375493&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ruc.edu.cn;ruc.edu.cn;;ruc.edu.cn;baai.ac.cn", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Renmin University of China;Beijing Academy of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;https://www.baaic.cn", "aff_unique_abbr": "RUC;BAAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "HQA-Attack: Toward High Quality Black-Box Hard-Label Adversarial Attack on Text", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72122", "id": "IOuuLBrGJR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a124b5e7385d35e5c8ad05d192106e19-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IOuuLBrGJR", "openreview": "https://openreview.net/forum?id=IOuuLBrGJR", "poster": "/media/PosterPDFs/NeurIPS%202023/72122.png?t=1699598918.3944104", "slides": "https://nips.cc/virtual/2023/poster/72122", "video": "https://nips.cc/virtual/2023/poster/72122", "author_site": "Han Liu, Zhi Xu, Xiaotong Zhang, Feng Zhang, Fenglong Ma, Hongyang Chen, Hong Yu, Xianchao Zhang", "tldr": "", "abstract": "Black-box hard-label adversarial attack on text is a practical and challenging task, as the text data space is inherently discrete and non-differentiable, and only the predicted label is accessible. Research on this problem is still in the embryonic stage and only a few methods are available. Nevertheless, existing methods rely on the complex heuristic algorithm or unreliable gradient estimation strategy, which probably fall into the local optimum and inevitably consume numerous queries, thus are difficult to craft satisfactory adversarial examples with high semantic similarity and low perturbation rate in a limited query budget. To alleviate above issues, we propose a simple yet effective framework to generate high quality textual adversarial examples under the black-box hard-label attack scenarios, named HQA-Attack. Specifically, after initializing an adversarial example randomly, HQA-attack first constantly substitutes original words back as many as possible, thus shrinking the perturbation rate. Then it leverages the synonym set of the remaining changed words to further optimize the adversarial example with the direction which can improve the semantic similarity and satisfy the adversarial condition simultaneously. In addition, during the optimizing procedure, it searches a transition synonym word for each changed word, thus avoiding traversing the whole synonym set and reducing the query number to some extent. Extensive experimental results on five text classification datasets, three natural language inference datasets and two real-world APIs have shown that the proposed HQA-Attack method outperforms other strong baselines significantly.", "keywords": "High-quality adversarial example;Black-box hard-label textual adversarial attack", "primary_area": "", "supplementary_material": "/attachment/e7a545cc8d3c4f0e216f8e4eacc4c2d69d8f1f07.zip", "author": "Han Liu;Zhi Xu;Xiaotong Zhang;Feng Zhang;Fenglong Ma;Hongyang Chen;Hong Yu;Xianchao Zhang", "authorids": "~Han_Liu3;~Zhi_Xu4;~Xiaotong_Zhang1;~Feng_Zhang5;~Fenglong_Ma1;~Hongyang_Chen2;~Hong_Yu2;~Xianchao_Zhang1", "gender": "M;M;F;;M;M;F;M", "homepage": "http://faculty.dlut.edu.cn/liuhan/zh_CN/index.htm;https://www.linkedin.com/in/zhi-xu-3b5630264/;http://faculty.dlut.edu.cn/zhangxiaotong/zh_CN/index.htm;;https://fenglong-ma.github.io/;https://www.linkedin.com/in/hongyangchen/;http://faculty.dlut.edu.cn/2003011105/zh_CN/index.htm;", "dblp": "35/2899-8;50/3857-8;31/2303-3;;85/10856;13/3715;55/6749-5;40/4372", "google_scholar": "https://scholar.google.com.hk/citations?user=idpbcG0AAAAJ;ZxF34DEAAAAJ;wV8Y2gQAAAAJ;;DLJIxNMAAAAJ;https://scholar.google.ca/citations?user=s-HDT8UAAAAJ;;", "orcid": "0000-0001-6921-2050;0009-0005-5459-9447;0000-0002-5013-8476;;0000-0002-4999-0303;0000-0002-7626-0162;0000-0003-4807-1812;", "linkedin": ";zhi-xu-3b5630264/;;;fenglong-ma-69805832/;hongyangchen/;;", "or_profile": "~Han_Liu3;~Zhi_Xu4;~Xiaotong_Zhang1;~Feng_Zhang5;~Fenglong_Ma1;~Hongyang_Chen2;~Hong_Yu2;~Xianchao_Zhang1", "aff": "Dalian University of Technology;Dalian University of Technology;Dalian University of Technology;;Pennsylvania State University;Zhejiang Lab, China;Dalian University of Technology;Dalian University of Technology", "aff_domain": "dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;;psu.edu;zhejianglab.com;dlut.edu.cn;dlut.edu.cn", "position": "Associate Professor;PhD student;Associate Professor;;Assistant Professor;Senior Research Expert;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023hqaattack,\ntitle={{HQA}-Attack: Toward High Quality Black-Box Hard-Label Adversarial Attack on Text},\nauthor={Han Liu and Zhi Xu and Xiaotong Zhang and Feng Zhang and Fenglong Ma and Hongyang Chen and Hong Yu and Xianchao Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IOuuLBrGJR}\n}", "github": "", "project": "", "reviewers": "RbiV;gEht;BWcn;pVxP;27hb", "pdf_size": 517348, "rating": "5;5;6;6;6", "confidence": "3;4;3;5;4", "soundness": "3;3;2;2;3", "novelty": "3;2;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "128;80;51;70;78", "wc_strengths": "39;93;29;64;117", "wc_weaknesses": "52;108;120;340;643", "wc_questions": "236;15;15;9;16", "wc_limitations": "11;14;16;15;1", "wc_review": "466;310;231;498;855", "wc_reply_reviewers": "52;0;0;1142;126", "wc_reply_authors": "39;31;31;1818;60", "reply_reviewers": "1;0;0;5;1", "reply_authors": "2;2;2;7;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 81.4, 25.452701231892853 ], "wc_strengths_avg": [ 68.4, 32.87308929808697 ], "wc_weaknesses_avg": [ 252.6, 218.53292658087017 ], "wc_questions_avg": [ 58.2, 88.9345826998699 ], "wc_limitations_avg": [ 11.4, 5.4626001134990645 ], "wc_review_avg": [ 472.0, 215.2793533992519 ], "wc_reply_reviewers_avg": [ 264.0, 441.42134067124573 ], "wc_reply_authors_avg": [ 395.8, 711.1791335521593 ], "reply_reviewers_avg": [ 1.4, 1.8547236990991407 ], "reply_authors_avg": [ 3.0, 2.0 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.32732683535398854, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5432597451195742891&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;;psu.edu;zhejianglab.com;dlut.edu.cn;dlut.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;1;2;0;0", "aff_unique_norm": "Dalian University of Technology;Pennsylvania State University;Zhejiang Lab", "aff_unique_dep": ";;", "aff_unique_url": "http://www.dlut.edu.cn/;https://www.psu.edu;http://www.zhejianglab.com", "aff_unique_abbr": "DUT;PSU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Towards Characterizing the First-order Query Complexity of Learning (Approximate) Nash Equilibria in Zero-sum Matrix Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72121", "id": "IPNg84RF1k", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2af57f909a99113db071672da236a5f2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IPNg84RF1k", "openreview": "https://openreview.net/forum?id=IPNg84RF1k", "poster": "/media/PosterPDFs/NeurIPS%202023/72121.png?t=1702132059.1211689", "slides": "https://nips.cc/virtual/2023/poster/72121", "video": "https://nips.cc/virtual/2023/poster/72121", "author_site": "Hedi Hadiji, Sarah Sachs, Tim van Erven, Wouter Koolen", "tldr": "", "abstract": "In the first-order query model for zero-sum $K\\times K$ matrix games, players observe the expected pay-offs for all their possible actions under the randomized action played by their opponent. This classical model has received renewed interest after the discovery by Rakhlin and Sridharan that $\\epsilon$-approximate Nash equilibria can be computed efficiently from $O(\\frac{\\ln K}{\\epsilon})$ instead of $O(\\frac{\\ln K}{\\epsilon^2})$ queries. Surprisingly, the optimal number of such queries, as a function of both $\\epsilon$ and $K$, is not known. We make progress on this question on two fronts. First, we fully characterise the query complexity of learning exact equilibria ($\\epsilon=0$), by showing that they require a number of queries that is linear in $K$, which means that it is essentially as hard as querying the whole matrix, which can also be done with $K$ queries. Second, for $\\epsilon > 0$, the current query complexity upper bound stands at $O(\\min(\\frac{\\ln(K)}{\\epsilon} , K))$. We argue that, unfortunately, obtaining a matching lower bound is not possible with existing techniques: we prove that no lower bound can be derived by constructing hard matrices whose entries take values in a known countable set, because such matrices can be fully identified by a single query. This rules out, for instance, reducing to an optimization problem over the hypercube by encoding it as a binary payoff matrix. We then introduce a new technique for lower bounds, which allows us to obtain lower bounds of order $\\tilde\\Omega(\\log(\\frac{1}{K\\epsilon})$ for any $\\epsilon \\leq 1 / (cK^4)$, where $c$ is a constant independent of $K$. We further discuss possible future directions to improve on our techniques in order to close the gap with the upper bounds.", "keywords": "game theory;minimax optimization;lower bounds", "primary_area": "", "supplementary_material": "/attachment/9b9e8addc8e3b1e575401b7021b280622840318a.pdf", "author": "Hedi Hadiji;Sarah Sachs;Tim van Erven;Wouter M Koolen", "authorids": "~Hedi_Hadiji1;~Sarah_Sachs1;~Tim_van_Erven1;~Wouter_M_Koolen1", "gender": "M;F;M;M", "homepage": "https://hedi-hadiji.github.io/;https://www.uva.nl/en/profile/s/a/s.c.sachs/s.c.sachs.html?cb;http://www.timvanerven.nl;http://wouterkoolen.info/", "dblp": ";;82/1868;08/2694", "google_scholar": ";;https://scholar.google.nl/citations?user=kdxqEMQAAAAJ;34JTfUcAAAAJ", "orcid": ";;;0000-0002-1053-6701", "linkedin": ";;;", "or_profile": "~Hedi_Hadiji1;~Sarah_Sachs1;~Tim_van_Erven1;~Wouter_M_Koolen1", "aff": "CentraleSupelec;University of Amsterdam;University of Amsterdam;Centrum voor Wiskunde en Informatica", "aff_domain": "centralesupelec.fr;uva.nl;uva.nl;cwi.nl", "position": "Assistant Professor;PhD student;Associate Professor;Senior Researcher", "bibtex": "@inproceedings{\nhadiji2023towards,\ntitle={Towards Characterizing the First-order Query Complexity of Learning (Approximate) Nash Equilibria in Zero-sum Matrix Games},\nauthor={Hedi Hadiji and Sarah Sachs and Tim van Erven and Wouter M Koolen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IPNg84RF1k}\n}", "github": "", "project": "", "reviewers": "3esb;VSPH;EExk;Wvc4;nqee", "pdf_size": 370136, "rating": "5;5;6;7;7", "confidence": "5;3;2;4;3", "soundness": "2;3;3;3;4", "novelty": "3;3;3;3;4", "presentation": "2;3;3;3;4", "wc_summary": "273;164;108;172;130", "wc_strengths": "1;123;84;102;33", "wc_weaknesses": "174;87;54;30;24", "wc_questions": "418;4;30;36;19", "wc_limitations": "1;52;12;1;6", "wc_review": "867;430;288;341;212", "wc_reply_reviewers": "41;31;31;41;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 169.4, 56.729533754473955 ], "wc_strengths_avg": [ 68.6, 45.053745682240454 ], "wc_weaknesses_avg": [ 73.8, 54.781018610463974 ], "wc_questions_avg": [ 101.4, 158.67400543252194 ], "wc_limitations_avg": [ 14.4, 19.23122460999299 ], "wc_review_avg": [ 427.6, 230.8892375144411 ], "wc_reply_reviewers_avg": [ 28.8, 15.07846145997661 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2192645048267573, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7739637578963845042&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 18, "email": "centralesupelec.fr;uva.nl;uva.nl;cwi.nl", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "CentraleSup\u00e9lec;University of Amsterdam;Centrum voor Wiskunde en Informatica", "aff_unique_dep": ";;", "aff_unique_url": "https://www.centralesupelec.fr;https://www.uva.nl;https://www.cwi.nl/", "aff_unique_abbr": "CS;UvA;CWI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "France;Netherlands" }, { "id": "IQ6GI7fM2z", "title": "Gradient strikes back: How filtering out high frequencies improves explanations", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent years have witnessed an explosion in the development of novel prediction-based attribution methods, which have slowly been supplanting older gradient-based methods to explain the decisions of deep neural networks. However, it is still not clear why prediction-based methods outperform gradient-based ones.\nHere, we start with an empirical observation: these two approaches yield attribution maps with very different power spectra, with gradient-based methods revealing more high-frequency content than prediction-based methods.\nThis observation raises multiple questions: What is the source of this high-frequency information, and does it truly reflect decisions made by the system? Lastly, why would the absence of high-frequency information in prediction-based methods yield better explainability scores along multiple metrics? We analyze the gradient of three representative visual classification models and observe that it contains noisy information emanating from high-frequencies. Furthermore, our analysis reveals that the operations used in Convolutional Neural Networks (CNNs) for downsampling appear to be a significant source of this high-frequency content -- suggesting aliasing as a possible underlying basis. We then apply an optimal low-pass filter for attribution maps and demonstrate that it improves gradient-based attribution methods. We show that (i) removing high-frequency noise yields significant improvements in the explainability scores obtained with gradient-based methods across multiple models -- leading to (ii) a novel ranking of state-of-the-art methods with gradient-based methods at the top. We believe that our results will spur renewed interest in simpler and computationally more efficient gradient-based methods for explainability.", "keywords": "Fourier;Explainability;Attribution methods;Deep Learning;Computer Vision", "primary_area": "", "supplementary_material": "/attachment/f9567ae23d50568242c08862e0a52e3260c5ae68.pdf", "author": "Sabine Muzellec;L\u00e9o And\u00e9ol;Thomas FEL;Rufin VanRullen;Thomas Serre", "authorids": "~Sabine_Muzellec1;~L\u00e9o_And\u00e9ol1;~Thomas_FEL1;~Rufin_VanRullen1;~Thomas_Serre1", "gender": ";;M;M;M", "homepage": ";;https://thomasfel.me;https://rufinv.github.io;https://serre-lab.clps.brown.edu/", "dblp": ";;274/2390;83/2121;", "google_scholar": ";;1m5Mlx4AAAAJ;1pwyaYgAAAAJ;kZlPW4wAAAAJ", "orcid": ";;;0000-0002-3611-7716;", "linkedin": ";;;;", "or_profile": "~Sabine_Muzellec1;~L\u00e9o_And\u00e9ol1;~Thomas_FEL1;~Rufin_VanRullen1;~Thomas_Serre1", "aff": ";;Brown University;CNRS;Universit\u00e9 de Toulouse", "aff_domain": ";;brown.edu;cnrs.fr;univ-toulouse.fr", "position": ";;PhD student;Research Director;Full Professor", "bibtex": "@misc{\nmuzellec2023gradient,\ntitle={Gradient strikes back: How filtering out high frequencies improves explanations},\nauthor={Sabine Muzellec and L{\\'e}o And{\\'e}ol and Thomas FEL and Rufin VanRullen and Thomas Serre},\nyear={2023},\nurl={https://openreview.net/forum?id=IQ6GI7fM2z}\n}", "github": "", "project": "", "reviewers": "vA9f;mo1q;QucS;bkGG", "site": "https://openreview.net/forum?id=IQ6GI7fM2z", "pdf_size": 9173720, "rating": "3;4;5;5", "confidence": "4;3;4;4", "soundness": "2;3;3;2", "novelty": "2;3;2;2", "presentation": "2;3;3;3", "wc_summary": "114;84;74;150", "wc_strengths": "89;61;46;134", "wc_weaknesses": "311;378;238;400", "wc_questions": "43;15;36;5", "wc_limitations": "1;46;1;9", "wc_review": "558;584;395;698", "wc_reply_reviewers": "272;76;85;41", "wc_reply_authors": "462;59;21;33", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 4.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 105.5, 29.609964538985857 ], "wc_strengths_avg": [ 82.5, 33.5 ], "wc_weaknesses_avg": [ 331.75, 63.278649637930805 ], "wc_questions_avg": [ 24.75, 15.368392889303683 ], "wc_limitations_avg": [ 14.25, 18.619546181365433 ], "wc_review_avg": [ 558.75, 108.2159299733639 ], "wc_reply_reviewers_avg": [ 118.5, 90.1346215391178 ], "wc_reply_authors_avg": [ 143.75, 184.25440971656553 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13080528532802559128&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2", "aff_unique_norm": "Brown University;Centre National de la Recherche Scientifique;Universit\u00e9 de Toulouse", "aff_unique_dep": ";;", "aff_unique_url": "https://www.brown.edu;https://www.cnrs.fr;https://www.univ-toulouse.fr", "aff_unique_abbr": "Brown;CNRS;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;France" }, { "title": "Mutual-Information Regularized Multi-Agent Policy Iteration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72120", "id": "IQRc3FrYOG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0799492e7be38b66d10ead5e8809616d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IQRc3FrYOG", "openreview": "https://openreview.net/forum?id=IQRc3FrYOG", "poster": "/media/PosterPDFs/NeurIPS%202023/72120.png?t=1700498893.893116", "slides": "https://nips.cc/virtual/2023/poster/72120", "video": "https://nips.cc/virtual/2023/poster/72120", "author_site": "Wang, Deheng Ye, Zongqing Lu", "tldr": "", "abstract": "Despite the success of cooperative multi-agent reinforcement learning algorithms, most of them focus on a single team composition, which prevents them from being used in more realistic scenarios where dynamic team composition is possible. While some studies attempt to solve this problem via multi-task learning in a fixed set of team compositions, there is still a risk of overfitting to the training set, which may lead to catastrophic performance when facing dramatically varying team compositions during execution. To address this problem, we propose to use mutual information (MI) as an augmented reward to prevent individual policies from relying too much on team-related information and encourage agents to learn policies that are robust in different team compositions. Optimizing this MI-augmented objective in an off-policy manner can be intractable due to the existence of dynamic marginal distribution. To alleviate this problem, we first propose a multi-agent policy iteration algorithm with a fixed marginal distribution and prove its convergence and optimality. Then, we propose to employ the Blahut\u2013Arimoto algorithm and an imaginary team composition distribution for optimization with approximate marginal distribution as the practical implementation. Empirically, our method demonstrates strong zero-shot generalization to dynamic team compositions in complex cooperative tasks.", "keywords": "Multi-Agent Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/e9e74025724bd11d209a1803f73fb8ad7e91e0e3.zip", "author": "Jiangxing Wang;Deheng Ye;Zongqing Lu", "authorids": "~Jiangxing_Wang2;~Deheng_Ye1;~Zongqing_Lu2", "gender": "M;M;", "homepage": "https://github.com/RetiaAdolf;http://yedeheng.github.io/;", "dblp": ";159/9503;", "google_scholar": ";jz5XKuQAAAAJ;", "orcid": ";0000-0002-1754-1837;", "linkedin": ";;", "or_profile": "~Jiangxing_Wang2;~Deheng_Ye1;~Zongqing_Lu2", "aff": "Peking University;Tencent;", "aff_domain": "pku.edu.cn;tencent.com;", "position": "PhD student;Team Manager;", "bibtex": "@inproceedings{\nwang2023mutualinformation,\ntitle={Mutual-Information Regularized Multi-Agent Policy Iteration},\nauthor={Jiangxing Wang and Deheng Ye and Zongqing Lu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IQRc3FrYOG}\n}", "github": "", "project": "", "reviewers": "KNmz;b1Qn;KG7B;NEb7", "pdf_size": 1105307, "rating": "5;6;6;6", "confidence": "3;3;3;4", "soundness": "3;4;4;2", "novelty": "3;3;3;2", "presentation": "2;4;4;2", "wc_summary": "576;40;60;53", "wc_strengths": "2;25;27;42", "wc_weaknesses": "2;74;43;210", "wc_questions": "2;6;65;72", "wc_limitations": "20;5;5;2", "wc_review": "602;150;200;379", "wc_reply_reviewers": "17;34;20;76", "wc_reply_authors": "27;88;15;106", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 182.25, 227.44491091251086 ], "wc_strengths_avg": [ 24.0, 14.300349646075091 ], "wc_weaknesses_avg": [ 82.25, 78.05246632874582 ], "wc_questions_avg": [ 36.25, 32.37572393013012 ], "wc_limitations_avg": [ 8.0, 7.035623639735144 ], "wc_review_avg": [ 332.75, 177.23906877435348 ], "wc_reply_reviewers_avg": [ 36.75, 23.551804601770964 ], "wc_reply_authors_avg": [ 59.0, 38.76209488662861 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9334079447785242770&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "pku.edu.cn;tencent.com;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Peking University;Tencent", "aff_unique_dep": ";Tencent Holdings Limited", "aff_unique_url": "http://www.pku.edu.cn;https://www.tencent.com", "aff_unique_abbr": "Peking U;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Coordinating Distributed Example Orders for Provably Accelerated Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72119", "id": "ISRyILhAyS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/af9ac087ed9123957bb3a45dca56b9d4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ISRyILhAyS", "openreview": "https://openreview.net/forum?id=ISRyILhAyS", "poster": "/media/PosterPDFs/NeurIPS%202023/72119.png?t=1701641937.8607829", "slides": "https://nips.cc/virtual/2023/poster/72119", "video": "https://nips.cc/virtual/2023/poster/72119", "author_site": "A. Feder Cooper, Wentao Guo, Duc Khiem Pham, Tiancheng Yuan, Charlie Ruan, Yucheng Lu, Christopher De Sa", "tldr": "", "abstract": "Recent research on online Gradient Balancing (GraB) has revealed that there exist permutation-based example orderings for SGD that are guaranteed to outperform random reshuffling (RR). Whereas RR arbitrarily permutes training examples, GraB leverages stale gradients from prior epochs to order examples -- achieving a provably faster convergence rate than RR. However, GraB is limited by design: while it demonstrates an impressive ability to scale-up training on centralized data, it does not naturally extend to modern distributed ML workloads. We therefore propose Coordinated Distributed GraB (CD-GraB), which uses insights from prior work on kernel thinning to translate the benefits of provably faster permutation-based example ordering to distributed settings. With negligible overhead, CD-GraB exhibits a linear speedup in convergence rate over centralized GraB and outperforms distributed RR on a variety of benchmark tasks.", "keywords": "permuted example ordering;distributed training;scalable training;herding", "primary_area": "", "supplementary_material": "/attachment/07cc16bb94204620cce679f5b06f77551df1beb0.pdf", "author": "A. Feder Cooper;Wentao Guo;Khiem Pham;Tiancheng Yuan;Charlie F. Ruan;Yucheng Lu;Christopher De Sa", "authorids": "~A._Feder_Cooper1;~Wentao_Guo1;~Khiem_Pham3;~Tiancheng_Yuan1;~Charlie_F._Ruan1;~Yucheng_Lu1;~Christopher_De_Sa2", "gender": ";M;M;M;M;M;M", "homepage": "https://afedercooper.info;http://wentaoguo.me/;;https://www.yucheng-lu.me/;https://www.charlieruan.com/;http://cs.cornell.edu/~cdesa;https://drproduck.github.io/", "dblp": "260/0514;;;;334/4470.html;154/6336;", "google_scholar": "https://scholar.google.ch/citations?hl=en;7uHQMsYAAAAJ;;FsBgPhQAAAAJ;ZYVCncwAAAAJ;;NSkcWG0AAAAJ", "orcid": "0000-0002-4892-681X;;;;0009-0000-5369-4593;;", "linkedin": ";wentao-guo-11b03217b/;tcyuan22/;;charlie-ruan/;;khiem-duc-pham/", "or_profile": "~A._Feder_Cooper1;~Wentao_Guo1;~Tiancheng_Yuan1;~Yucheng_Lu1;~Charlie_Fang_Ruan1;~Christopher_De_Sa1;~Khiem_Pham2", "aff": "Cornell University;Department of Computer Science, Cornell University;Cornell University;Cornell University;Cornell University;Cornell University;Cornell University", "aff_domain": "cornell.edu;cs.cornell.edu;cornell.edu;cornell.edu;cornell.edu;cornell.edu;cornell.edu", "position": "PhD student;MS student;PhD student;PhD student;Undergrad student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\ncooper2023cdgrab,\ntitle={Coordinating Distributed Example Orders for Provably Accelerated Training},\nauthor={A. Feder Cooper and Wentao Guo and Khiem Pham and Tiancheng Yuan and Charlie F. Ruan and Yucheng Lu and Christopher De Sa},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ISRyILhAyS}\n}", "github": "", "project": "", "reviewers": "xnQq;AQrA;dDcg;T8gT;ZrZn", "pdf_size": 1188659, "rating": "5;5;5;6;7", "confidence": "4;4;3;2;4", "soundness": "3;3;3;3;3", "novelty": "2;3;2;3;4", "presentation": "3;3;3;3;4", "wc_summary": "102;76;103;51;53", "wc_strengths": "63;70;61;72;54", "wc_weaknesses": "238;151;309;65;79", "wc_questions": "8;67;47;23;18", "wc_limitations": "8;1;69;1;6", "wc_review": "419;365;589;212;210", "wc_reply_reviewers": "178;0;829;0;63", "wc_reply_authors": "0;0;844;0;15", "reply_reviewers": "1;0;1;0;1", "reply_authors": "1;1;2;1;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 77.0, 22.60088493842664 ], "wc_strengths_avg": [ 64.0, 6.48074069840786 ], "wc_weaknesses_avg": [ 168.4, 93.38008352962638 ], "wc_questions_avg": [ 32.6, 21.453204888780604 ], "wc_limitations_avg": [ 17.0, 26.145745351777602 ], "wc_review_avg": [ 359.0, 141.66580391894158 ], "wc_reply_reviewers_avg": [ 214.0, 314.29731147434273 ], "wc_reply_authors_avg": [ 171.8, 336.15020452172865 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.06250000000000006, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14836715344867898911&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cornell.edu;cs.cornell.edu;cornell.edu;cornell.edu;cornell.edu;cornell.edu;cornell.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Implicit Bias of Gradient Descent for Logistic Regression at the Edge of Stability", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72118", "id": "IT9mWLYNpQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eb189151ced0ff808abafd16a51fec92-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IT9mWLYNpQ", "openreview": "https://openreview.net/forum?id=IT9mWLYNpQ", "poster": "/media/PosterPDFs/NeurIPS%202023/72118.png?t=1699131547.1982517", "slides": "https://nips.cc/virtual/2023/poster/72118", "video": "https://nips.cc/virtual/2023/poster/72118", "author_site": "Jingfeng Wu, Vladimir Braverman, Jason Lee", "tldr": "", "abstract": "Recent research has observed that in machine learning optimization, gradient descent (GD) often operates at the edge of stability (EoS) [Cohen et al., 2021], where the stepsizes are set to be large, resulting in non-monotonic losses induced by the GD iterates. This paper studies the convergence and implicit bias of constant-stepsize GD for logistic regression on linearly separable data in the EoS regime. Despite the presence of local oscillations, we prove that the logistic loss can be minimized by GD with any constant stepsize over a long time scale. Furthermore, we prove that with any constant stepsize, the GD iterates tend to infinity when projected to a max-margin direction (the hard-margin SVM direction) and converge to a fixed vector that minimizes a strongly convex potential when projected to the orthogonal complement of the max-margin direction. In contrast, we also show that in the EoS regime, GD iterates may diverge catastrophically under the exponential loss, highlighting the superiority of the logistic loss. These theoretical findings are in line with numerical simulations and complement existing theories on the convergence and implicit bias of GD for logistic regression, which are only applicable when the stepsizes are sufficiently small.", "keywords": "gd; implicit bias; edge of stability", "primary_area": "", "supplementary_material": "/attachment/055c83481e1e11e1182e9a5be1447a06da1a8f80.zip", "author": "Jingfeng Wu;Vladimir Braverman;Jason D. Lee", "authorids": "~Jingfeng_Wu1;~Vladimir_Braverman1;~Jason_D._Lee1", "gender": "M;Unspecified;M", "homepage": "https://uuujf.github.io;http://www.cs.jhu.edu/~vova/;https://jasondlee88.github.io/", "dblp": ";14/4758;88/3262", "google_scholar": "z-KILD8AAAAJ;https://scholar.google.com.tw/citations?user=DTthB48AAAAJ;GR_DsT0AAAAJ", "orcid": "0009-0009-3414-4487;;", "linkedin": "jingfeng-wu-79205b184/;;", "or_profile": "~Jingfeng_Wu1;~Vladimir_Braverman1;~Jason_D._Lee1", "aff": "Johns Hopkins University;Department of Computer Science, Whiting School of Engineering;Princeton University", "aff_domain": "jhu.edu;cs.jhu.edu;princeton.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwu2023implicit,\ntitle={Implicit Bias of Gradient Descent for Logistic Regression at the Edge of Stability},\nauthor={Jingfeng Wu and Vladimir Braverman and Jason D. Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IT9mWLYNpQ}\n}", "github": "", "project": "", "reviewers": "pcDT;rZLX;PfDy;iDYE;QkkA", "pdf_size": 575746, "rating": "5;6;7;7;8", "confidence": "3;4;3;4;3", "soundness": "3;3;4;3;4", "novelty": "2;3;3;3;4", "presentation": "3;3;4;3;4", "wc_summary": "65;44;97;60;90", "wc_strengths": "109;94;73;88;80", "wc_weaknesses": "51;61;30;19;243", "wc_questions": "48;41;118;40;5", "wc_limitations": "26;1;1;0;3", "wc_review": "299;241;319;207;421", "wc_reply_reviewers": "10;0;0;18;17", "wc_reply_authors": "23;0;0;0;0", "reply_reviewers": "1;0;0;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 71.2, 19.610201426808445 ], "wc_strengths_avg": [ 88.8, 12.351518125315609 ], "wc_weaknesses_avg": [ 80.8, 82.44852939864967 ], "wc_questions_avg": [ 50.4, 36.96809435175148 ], "wc_limitations_avg": [ 6.2, 9.947864092356712 ], "wc_review_avg": [ 297.4, 73.61956261755431 ], "wc_reply_reviewers_avg": [ 9.0, 7.848566748139434 ], "wc_reply_authors_avg": [ 4.6, 9.199999999999998 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.08006407690254361, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3228566761711850884&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "jhu.edu;cs.jhu.edu;princeton.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Johns Hopkins University;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.jhu.edu;https://www.princeton.edu", "aff_unique_abbr": "JHU;Princeton", "aff_campus_unique_index": "1", "aff_campus_unique": ";Baltimore", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Are Emergent Abilities of Large Language Models a Mirage?", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72117", "id": "ITw9edRDlD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/adc98a266f45005c403b8311ca7e8bd7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ITw9edRDlD", "openreview": "https://openreview.net/forum?id=ITw9edRDlD", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72117", "video": "https://nips.cc/virtual/2023/poster/72117", "author_site": "Rylan Schaeffer, Brando Miranda, Sanmi Koyejo", "tldr": "", "abstract": "Recent work claims that large language models display \\textit{emergent abilities}, abilities not present in smaller-scale models that are present in larger-scale models.\nWhat makes emergent abilities intriguing is two-fold: their \\textit{sharpness}, transitioning seemingly instantaneously from not present to present, and their \\textit{unpredictability}, appearing at seemingly unforeseeable model scales.\nHere, we present an alternative explanation for emergent abilities: that for a particular task and model family, when analyzing fixed model outputs, emergent abilities appear due the researcher\u2019s choice of metric rather than due to fundamental changes in model behavior with scale. Specifically, nonlinear or discontinuous metrics produce apparent emergent abilities, whereas linear or continuous metrics produce smooth, continuous, predictable changes in model performance.\nWe present our alternative explanation in a simple mathematical model, then test it in three complementary ways: we (1) make, test and confirm three predictions on the effect of metric choice using the InstructGPT/GPT-3 family on tasks with claimed emergent abilities, (2) make, test and confirm two predictions about metric choices in a meta-analysis of emergent abilities on BIG-Bench; and (3) show how to choose metrics to produce never-before-seen seemingly emergent abilities in multiple vision tasks across diverse deep networks.\nVia all three analyses, we provide evidence that alleged emergent abilities evaporate with different metrics or with better statistics, and may not be a fundamental property of scaling AI models.", "keywords": "large language models;foundation models;natural language processing;language modeling;emergent abilities", "primary_area": "", "supplementary_material": "/attachment/3867bc492a264c38b861a89807bd05eb6d7c07da.pdf", "author": "Rylan Schaeffer;Brando Miranda;Sanmi Koyejo", "authorids": "~Rylan_Schaeffer2;~Brando_Miranda1;~Sanmi_Koyejo1", "gender": "M;M;M", "homepage": "https://rylanschaeffer.github.io;https://cbmm.mit.edu/about/people/miranda;https://cs.stanford.edu/~sanmi/", "dblp": "280/1341;;14/8885", "google_scholar": "6tMEGz8AAAAJ;_NQJoBkAAAAJ;EaaOeJwAAAAJ", "orcid": ";;0000-0002-4023-419X", "linkedin": "rylanschaeffer/;brando-miranda-40821046/;sanmi-koyejo-984754/", "or_profile": "~Rylan_Schaeffer2;~Brando_Miranda1;~Oluwasanmi_O_Koyejo1", "aff": "Massachusetts Institute of Technology;Stanford University;Google", "aff_domain": "mit.edu;stanford.edu;google.com", "position": "Researcher;PhD student;Research Scientist", "bibtex": "@inproceedings{\nschaeffer2023are,\ntitle={Are Emergent Abilities of Large Language Models a Mirage?},\nauthor={Rylan Schaeffer and Brando Miranda and Sanmi Koyejo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ITw9edRDlD}\n}", "github": "", "project": "", "reviewers": "ccWP;Cfbn;eK8N;GXCv", "pdf_size": 1896474, "rating": "7;7;8;9", "confidence": "4;4;3;4", "soundness": "4;2;3;4", "novelty": "3;4;4;4", "presentation": "4;3;4;4", "wc_summary": "74;133;111;26", "wc_strengths": "63;35;88;16", "wc_weaknesses": "242;1069;238;11", "wc_questions": "2;7;48;61", "wc_limitations": "6;7;4;1", "wc_review": "387;1251;489;115", "wc_reply_reviewers": "673;248;30;11", "wc_reply_authors": "811;243;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 7.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 86.0, 40.55243519198323 ], "wc_strengths_avg": [ 50.5, 27.35415873317986 ], "wc_weaknesses_avg": [ 390.0, 403.0167490316004 ], "wc_questions_avg": [ 29.5, 25.480384612481814 ], "wc_limitations_avg": [ 4.5, 2.29128784747792 ], "wc_review_avg": [ 560.5, 421.4483954175173 ], "wc_reply_reviewers_avg": [ 240.5, 266.50187616600374 ], "wc_reply_authors_avg": [ 263.5, 331.3008451543702 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 513, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=159530739906694638&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "mit.edu;stanford.edu;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Massachusetts Institute of Technology;Stanford University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://web.mit.edu;https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "MIT;Stanford;Google", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Fine-grained Late-interaction Multi-modal Retrieval for Retrieval Augmented Visual Question Answering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72116", "id": "IWWWulAX7g", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/47393e8594c82ce8fd83adc672cf9872-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IWWWulAX7g", "openreview": "https://openreview.net/forum?id=IWWWulAX7g", "poster": "/media/PosterPDFs/NeurIPS%202023/72116.png?t=1701377609.5499158", "slides": "https://nips.cc/virtual/2023/poster/72116", "video": "https://nips.cc/virtual/2023/poster/72116", "author_site": "Weizhe Lin, Jinghong Chen, Jingbiao Mei, Alexandru Coca, Bill Byrne, Bill Byrne", "tldr": "", "abstract": "Knowledge-based Visual Question Answering (KB-VQA) requires VQA systems to utilize knowledge from external knowledge bases to answer visually-grounded questions. Retrieval-Augmented Visual Question Answering (RA-VQA), a strong framework to tackle KB-VQA, first retrieves related documents with Dense Passage Retrieval (DPR) and then uses them to answer questions. This paper proposes Fine-grained Late-interaction Multi-modal Retrieval (FLMR) which significantly improves knowledge retrieval in RA-VQA. FLMR addresses two major limitations in RA-VQA's retriever: (1) the image representations obtained via image-to-text transforms can be incomplete and inaccurate and (2) similarity scores between queries and documents are computed with one-dimensional embeddings, which can be insensitive to finer-grained similarities.\nFLMR overcomes these limitations by obtaining image representations that complement those from the image-to-text transform using a vision model aligned with an existing text-based retriever through a simple alignment network. FLMR also encodes images and questions using multi-dimensional embeddings to capture finer-grained similarities between queries and documents. \nFLMR significantly improves the original RA-VQA retriever's PRRecall@5 by approximately 8\\%. Finally, we equipped RA-VQA with two state-of-the-art large multi-modal/language models to achieve $\\sim62$% VQA score in the OK-VQA dataset.", "keywords": "knowledge-based visual question answering;knowledge retrieval;multi-modality;vision-and-language", "primary_area": "", "supplementary_material": "/attachment/442b60a75aa20278ee8d7684640435f124830e20.pdf", "author": "Weizhe Lin;Jinghong Chen;Jingbiao Mei;Alexandru Coca;Bill Byrne", "authorids": "~Weizhe_Lin1;~Jinghong_Chen2;jm2245@cam.ac.uk;~Alexandru_Coca1;~Bill_Byrne1", "gender": "M;M;;M;M", "homepage": "https://linweizhedragon.github.io/;https://www.jinghong-chen.net/;;;https://sites.google.com/view/bill-byrne/", "dblp": "254/9170;21/1754;;270/3329;b/WilliamJByrne", "google_scholar": "4hMhIecAAAAJ;pYOXaKEAAAAJ;;https://scholar.google.co.uk/citations?user=WqcTDlkAAAAJ;BVUcMU4AAAAJ", "orcid": ";;;;", "linkedin": ";jinghong-chen/;;alexandru-coca-21256255/;wjbyrne/", "or_profile": "~Weizhe_Lin1;~Jinghong_Chen2;jm2245@cam.ac.uk;~Alexandru_Coca1;~Bill_Byrne1", "aff": "University of Cambridge;University of Cambridge;;University of Cambridge;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk;;cam.ac.uk;cam.ac.uk", "position": "PhD student;PhD student;;PhD student;Full Professor", "bibtex": "@inproceedings{\nlin2023finegrained,\ntitle={Fine-grained Late-interaction Multi-modal Retrieval for Retrieval Augmented Visual Question Answering},\nauthor={Weizhe Lin and Jinghong Chen and Jingbiao Mei and Alexandru Coca and Bill Byrne},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IWWWulAX7g}\n}", "github": "", "project": "", "reviewers": "jgsy;XziC;ujV3;Bs2i;d3hH;shyj;xLzi", "pdf_size": 2707672, "rating": "4;5;5;5;5;6;7", "confidence": "4;4;4;3;3;4;3", "soundness": "2;3;3;2;3;4;4", "novelty": "2;2;3;2;3;3;4", "presentation": "3;3;4;3;4;3;4", "wc_summary": "77;36;38;65;62;73;71", "wc_strengths": "39;63;34;78;41;106;66", "wc_weaknesses": "151;217;74;25;30;186;94", "wc_questions": "35;3;5;27;27;6;1", "wc_limitations": "1;3;1;1;1;6;1", "wc_review": "303;322;152;196;161;377;233", "wc_reply_reviewers": "35;24;11;28;14;0;16", "wc_reply_authors": "0;0;0;0;0;0;0", "reply_reviewers": "1;1;1;1;1;0;1", "reply_authors": "1;1;1;1;1;1;1", "rating_avg": [ 5.285714285714286, 0.880630571852711 ], "confidence_avg": [ 3.5714285714285716, 0.49487165930539345 ], "soundness_avg": [ 3.0, 0.7559289460184544 ], "novelty_avg": [ 2.7142857142857144, 0.6998542122237652 ], "presentation_avg": [ 3.4285714285714284, 0.4948716593053935 ], "wc_summary_avg": [ 60.285714285714285, 15.433861526936576 ], "wc_strengths_avg": [ 61.0, 23.77273348066766 ], "wc_weaknesses_avg": [ 111.0, 69.77105417004964 ], "wc_questions_avg": [ 14.857142857142858, 13.141304256085863 ], "wc_limitations_avg": [ 2.0, 1.7728105208558367 ], "wc_review_avg": [ 249.14285714285714, 80.04182580095872 ], "wc_reply_reviewers_avg": [ 18.285714285714285, 10.806271799472823 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8571428571428571, 0.3499271061118826 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.37463432463267765, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18433954440474573665&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cam.ac.uk;cam.ac.uk;;cam.ac.uk;cam.ac.uk", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Optimization or Architecture: How to Hack Kalman Filtering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72115", "id": "IXWaWPkGke", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9dfcc83c01e94d02c751c47517855c9f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IXWaWPkGke", "openreview": "https://openreview.net/forum?id=IXWaWPkGke", "poster": "/media/PosterPDFs/NeurIPS%202023/72115.png?t=1696840602.746336", "slides": "https://nips.cc/virtual/2023/poster/72115", "video": "https://nips.cc/virtual/2023/poster/72115", "author_site": "Ido Greenberg, Netanel Yannay, Shie Mannor", "tldr": "", "abstract": "In non-linear filtering, it is traditional to compare non-linear architectures such as neural networks to the standard linear Kalman Filter (KF). We observe that this mixes the evaluation of two separate components: the non-linear architecture, and the parameters optimization method. In particular, the non-linear model is often optimized, whereas the reference KF model is not. We argue that both should be optimized similarly, and to that end present the Optimized KF (OKF). We demonstrate that the KF may become competitive to neural models \u2013 if optimized using OKF. This implies that experimental conclusions of certain previous studies were derived from a flawed process. The advantage of OKF over the standard KF is further studied theoretically and empirically, in a variety of problems. Conveniently, OKF can replace the KF in real-world systems by merely updating the parameters.", "keywords": "non-linear filtering;Kalman filter;noise estimation;optimization;Cholesky parameterization", "primary_area": "", "supplementary_material": "/attachment/2a7362915603600a6b07d891f9aa22ea120cf4ae.pdf", "author": "Ido Greenberg;Netanel Yannay;Shie Mannor", "authorids": "~Ido_Greenberg1;~Netanel_Yannay1;~Shie_Mannor2", "gender": "M;M;M", "homepage": "https://idogreenberg.neocities.org/;https://www.linkedin.com/in/nati-yannay-9693b524/;https://shie.net.technion.ac.il", "dblp": ";;20/1669", "google_scholar": "LnwyFkkAAAAJ;;https://scholar.google.com.tw/citations?user=q1HlbIUAAAAJ", "orcid": ";;", "linkedin": "ido-greenberg-87245852/;;", "or_profile": "~Ido_Greenberg1;~Netanel_Yannay1;~Shie_Mannor2", "aff": "Technion, Technion;;Technion - Israel Institute of Technology, Technion", "aff_domain": "technion.ac.il;;technion.il", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\ngreenberg2023optimization,\ntitle={Optimization or Architecture: How to Hack Kalman Filtering},\nauthor={Ido Greenberg and Netanel Yannay and Shie Mannor},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IXWaWPkGke}\n}", "github": "", "project": "", "reviewers": "mJXo;hyJH;GLsN;JSXK", "pdf_size": 2308736, "rating": "5;6;6;6", "confidence": "3;2;3;3", "soundness": "2;3;3;3", "novelty": "3;2;3;3", "presentation": "2;4;2;3", "wc_summary": "70;114;220;144", "wc_strengths": "27;97;22;196", "wc_weaknesses": "147;134;31;285", "wc_questions": "5;29;20;233", "wc_limitations": "1;51;37;19", "wc_review": "250;425;330;877", "wc_reply_reviewers": "614;0;0;79", "wc_reply_authors": "673;0;0;143", "reply_reviewers": "4;0;0;1", "reply_authors": "4;1;1;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 137.0, 54.67174773134658 ], "wc_strengths_avg": [ 85.5, 70.35090617753264 ], "wc_weaknesses_avg": [ 149.25, 90.34482552974464 ], "wc_questions_avg": [ 71.75, 93.49164401164417 ], "wc_limitations_avg": [ 27.0, 18.81488772222678 ], "wc_review_avg": [ 470.5, 242.73081798568555 ], "wc_reply_reviewers_avg": [ 173.25, 256.5028021289436 ], "wc_reply_authors_avg": [ 204.0, 276.99909747145387 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15011376722214862899&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "technion.ac.il;;technion.il", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Intriguing Properties of Quantization at Scale", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72114", "id": "IYe8j7Gy8f", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6c0ff499edc529c7d8c9f05c7c0ccb82-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IYe8j7Gy8f", "openreview": "https://openreview.net/forum?id=IYe8j7Gy8f", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72114", "video": "https://nips.cc/virtual/2023/poster/72114", "author_site": "Arash Ahmadian, Saurabh Dash, Hongyu Chen, Bharat Venkitesh, Zhen Stephen Gou, Phil Blunsom, Ahmet \u00dcst\u00fcn, Sara Hooker", "tldr": "", "abstract": "Emergent properties have been widely adopted as a term to describe behavior not present in smaller models but observed in larger models (Wei et al., 2022a). Recent work suggests that the trade-off incurred by quantization is also an emergent property, with sharp drops in performance in models over 6B parameters. In this work, we ask _are quantization cliffs in performance solely a factor of scale?_ Against a backdrop of increased research focus on why certain emergent properties surface at scale, this work provides a useful counter-example. We posit that it is possible to optimize for a quantization friendly training recipe that suppresses large activation magnitude outliers. Here, we find that outlier dimensions are not an inherent product of scale, but rather sensitive to the optimization conditions present during pre-training. This both opens up directions for more efficient quantization, and poses the question of whether other emergent properties are inherent or can be altered and conditioned by optimization and architecture design choices. We successfully quantize models ranging in size from 410M to 52B with minimal degradation in performance.", "keywords": "Quantization;optimization;language modelling;efficiency", "primary_area": "", "supplementary_material": "/attachment/4e4056e83c0b63ccfefff29719c2fc6fb94731ec.pdf", "author": "Arash Ahmadian;Saurabh Dash;Hongyu Chen;Bharat Venkitesh;Zhen Stephen Gou;Phil Blunsom;Ahmet \u00dcst\u00fcn;Sara Hooker", "authorids": "~Arash_Ahmadian1;~Saurabh_Dash1;~Hongyu_Chen1;~Bharat_Venkitesh1;~Zhen_Stephen_Gou1;~Phil_Blunsom1;~Ahmet_\u00dcst\u00fcn1;~Sara_Hooker2", "gender": "M;M;F;M;M;;M;", "homepage": "https://twitter.com/aahmadian_;https://saurabhdash.com;;;https://www.linkedin.com/in/zhen-stephen-gou/;;https://ahmetustun.github.io/;https://www.sarahooker.me/", "dblp": "330/4756;190/7336;28/3046;194/3553;239/9671;96/4705;186/0896;210/2611", "google_scholar": "https://scholar.google.com/citations?hl=en;bboszRcAAAAJ;;r6fDYb0AAAAJ;;https://scholar.google.co.uk/citations?user=eJwbbXEAAAAJ;fvotcRIAAAAJ;2xy6h3sAAAAJ", "orcid": "0000-0003-3855-970X;;0009-0002-4064-0793;;;;;", "linkedin": "arash-ahmadian/;;hyu-chen/;bharat-venkitesh-92350671/;;;ahmet-%C3%BCst%C3%BCn/;", "or_profile": "~Arash_Ahmadian1;~Saurabh_Dash1;~Hongyu_Chen1;~Bharat_Venkitesh1;~Zhen_Stephen_Gou1;~Phil_Blunsom1;~Ahmet_\u00dcst\u00fcn1;~Sara_Hooker1", "aff": "Cohere;Georgia Institute of Technology;Cohere;Cohere;;Department of Computer Science, University of Oxford;University of Groningen;Cohere For AI", "aff_domain": "cohere.om;gatech.edu;cohere.ai;cohere.ai;;cs.ox.ac.uk;rug.nl;cohere.com", "position": "Research Scholar;PhD student;Researcher;Member of Technical Staff;;Associate Professor;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nahmadian2023intriguing,\ntitle={Intriguing Properties of Quantization at Scale},\nauthor={Arash Ahmadian and Saurabh Dash and Hongyu Chen and Bharat Venkitesh and Zhen Stephen Gou and Phil Blunsom and Ahmet {\\\"U}st{\\\"u}n and Sara Hooker},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IYe8j7Gy8f}\n}", "github": "", "project": "", "reviewers": "TcCw;egL3;vYyu;xK34", "pdf_size": 423383, "rating": "5;5;6;7", "confidence": "3;3;4;1", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;4;3", "wc_summary": "123;222;106;177", "wc_strengths": "108;92;18;152", "wc_weaknesses": "278;483;148;21", "wc_questions": "3;88;44;1", "wc_limitations": "1;26;1;2", "wc_review": "513;911;317;353", "wc_reply_reviewers": "324;0;0;0", "wc_reply_authors": "144;69;108;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 157.0, 45.77663159298639 ], "wc_strengths_avg": [ 92.5, 48.29855070289377 ], "wc_weaknesses_avg": [ 232.5, 170.801785704951 ], "wc_questions_avg": [ 34.0, 35.58791929854849 ], "wc_limitations_avg": [ 7.5, 10.688779163215974 ], "wc_review_avg": [ 523.5, 235.57323701982787 ], "wc_reply_reviewers_avg": [ 81.0, 140.29611541307906 ], "wc_reply_authors_avg": [ 80.25, 53.38714732967102 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.6225430174794673, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15147397015620839494&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "cohere.om;gatech.edu;cohere.ai;cohere.ai;;cs.ox.ac.uk;rug.nl;cohere.com", "author_num": 8, "aff_unique_index": "0;1;0;0;2;3;0", "aff_unique_norm": "Cohere;Georgia Institute of Technology;University of Oxford;University of Groningen", "aff_unique_dep": ";;Department of Computer Science;", "aff_unique_url": "https://cohere.ai;https://www.gatech.edu;https://www.ox.ac.uk;https://www.rug.nl", "aff_unique_abbr": ";Georgia Tech;Oxford;RUG", "aff_campus_unique_index": "1", "aff_campus_unique": ";Oxford", "aff_country_unique_index": "0;0;0;0;1;2;0", "aff_country_unique": "United States;United Kingdom;Netherlands" }, { "id": "IYnsTEVTIb", "title": "Det-CGD: Compressed Gradient Descent with Matrix Stepsizes for Non-Convex Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper introduces a new method for minimizing matrix-smooth non-convex objectives through the use of novel Compressed Gradient Descent (CGD) algorithms enhanced with a matrix-valued stepsize. \nThe proposed algorithms are theoretically analyzed first in the single-node and subsequently in the distributed settings. \nOur theoretical results reveal that the matrix stepsize in CGD can capture the objective's structure and lead to faster convergence compared to a scalar stepsize. \nAs a byproduct of our general results, we emphasize the importance of selecting the compression mechanism and the matrix stepsize in a layer-wise manner, taking advantage of model structure. \nMoreover, we provide theoretical guarantees for free compression, by designing specific layer-wise compressors for the non-convex matrix smooth objectives. Our findings are supported with empirical evidence.\n", "keywords": "Optimization;First-order optimization;Non-convex optimization;Distributed optimization", "primary_area": "", "supplementary_material": "/attachment/2311fcbe3defd3991517ed469a3a7c3e87dbf1c6.pdf", "author": "Hanmin Li;Avetik Karagulyan;Peter Richt\u00e1rik", "authorids": "~Hanmin_Li1;~Avetik_Karagulyan1;~Peter_Richt\u00e1rik1", "gender": "M;M;M", "homepage": "https://cemse.kaust.edu.sa/ai/people/person/hanmin-li;https://avetx.github.io/;https://richtarik.org", "dblp": "340/3604;207/8322;62/8001", "google_scholar": "https://scholar.google.com/citations?hl=en;1-_KDtoAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-2587-640X;;0000-0003-4380-5848", "linkedin": "hanmin-li-034b8b245/;;richtarik/", "or_profile": "~Hanmin_Li1;~Avetik_Karagulyan1;~Peter_Richtarik1", "aff": "King Abdullah University of Science and Technology;King Abdullah University of Science and Technology;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "position": "MS student;Postdoc;Full Professor", "bibtex": "@misc{\nli2023detcgd,\ntitle={Det-{CGD}: Compressed Gradient Descent with Matrix Stepsizes for Non-Convex Optimization},\nauthor={Hanmin Li and Avetik Karagulyan and Peter Richt{\\'a}rik},\nyear={2023},\nurl={https://openreview.net/forum?id=IYnsTEVTIb}\n}", "github": "", "project": "", "reviewers": "DJA3;DW9L;i86q;WhEe;72cD", "site": "https://openreview.net/forum?id=IYnsTEVTIb", "pdf_size": 544583, "rating": "5;5;6;7;7", "confidence": "2;2;1;5;3", "soundness": "3;2;3;4;4", "novelty": "2;2;3;3;3", "presentation": "3;3;2;3;3", "wc_summary": "14;147;48;140;78", "wc_strengths": "26;58;6;83;83", "wc_weaknesses": "107;127;10;311;73", "wc_questions": "4;2;52;47;95", "wc_limitations": "1;9;7;1;16", "wc_review": "152;343;123;582;345", "wc_reply_reviewers": "79;0;15;27;36", "wc_reply_authors": "5;0;5;5;5", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 2.6, 1.3564659966250538 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 85.4, 51.62789943431749 ], "wc_strengths_avg": [ 51.2, 30.81168609472711 ], "wc_weaknesses_avg": [ 125.6, 100.84760780504415 ], "wc_questions_avg": [ 40.0, 34.51955967274206 ], "wc_limitations_avg": [ 6.8, 5.6 ], "wc_review_avg": [ 309.0, 165.06120077110793 ], "wc_reply_reviewers_avg": [ 31.4, 26.687824939473806 ], "wc_reply_authors_avg": [ 4.0, 2.0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.659380473395787, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13265996746913950779&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 11, "aff_unique_index": "0;0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kast.kau.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Saudi Arabia" }, { "title": "Efficient Test-Time Adaptation for Super-Resolution with Second-Order Degradation and Reconstruction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72113", "id": "IZRlMABK4l", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ec3d49763c653ad7c8d587f52220c129-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IZRlMABK4l", "openreview": "https://openreview.net/forum?id=IZRlMABK4l", "poster": "/media/PosterPDFs/NeurIPS%202023/72113.png?t=1702001134.847528", "slides": "https://nips.cc/virtual/2023/poster/72113", "video": "https://nips.cc/virtual/2023/poster/72113", "author_site": "Zeshuai Deng, Zhuokun Chen, Shuaicheng Niu, Thomas Li, Bohan Zhuang, Mingkui Tan", "tldr": "", "abstract": "Image super-resolution (SR) aims to learn a mapping from low-resolution (LR) to high-resolution (HR) using paired HR-LR training images. Conventional SR methods typically gather the paired training data by synthesizing LR images from HR images using a predetermined degradation model, e.g., Bicubic down-sampling. However, the realistic degradation type of test images may mismatch with the training-time degradation type due to the dynamic changes of the real-world scenarios, resulting in inferior-quality SR images. To address this, existing methods attempt to estimate the degradation model and train an image-specific model, which, however, is quite time-consuming and impracticable to handle rapidly changing domain shifts. Moreover, these methods largely concentrate on the estimation of one degradation type (e.g., blur degradation), overlooking other degradation types like noise and JPEG in real-world test-time scenarios, thus limiting their practicality. To tackle these problems, we present an efficient test-time adaptation framework for SR, named SRTTA, which is able to quickly adapt SR models to test domains with different/unknown degradation types. Specifically, we design a second-order degradation scheme to construct paired data based on the degradation type of the test image, which is predicted by a pre-trained degradation classifier. Then, we adapt the SR model by implementing feature-level reconstruction learning from the initial test image to its second-order degraded counterparts, which helps the SR model generate plausible HR images. Extensive experiments are conducted on newly synthesized corrupted DIV2K datasets with 8 different degradations and several real-world datasets, demonstrating that our SRTTA framework achieves an impressive improvement over existing methods with satisfying speed. The source code is available at https://github.com/DengZeshuai/SRTTA.", "keywords": "Image Super-resolution;Test-time Adaptation;Self-supervised Learning;Second-Order Degradation", "primary_area": "", "supplementary_material": "/attachment/24d0297360080836571125a5fd839f8fc3b23c90.pdf", "author": "Zeshuai Deng;Zhuokun Chen;Shuaicheng Niu;Thomas H. Li;Bohan Zhuang;Mingkui Tan", "authorids": "~Zeshuai_Deng1;~Zhuokun_Chen1;~Shuaicheng_Niu1;~Thomas_H._Li3;~Bohan_Zhuang1;~Mingkui_Tan2", "gender": "M;M;M;M;M;M", "homepage": "https://dengzeshuai.github.io/;https://github.com/Caesarhhh;https://niushuaicheng.cn/;http://pku.edu.cn;https://bohanzhuang.github.io/;https://tanmingkui.github.io/", "dblp": "260/6986;;254/1388;213/4037;145/1096;49/2007", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.au/citations?user=DFuDBBwAAAAJ;https://scholar.google.com.sg/citations?user=EVsoTGkAAAAJ", "orcid": "0000-0003-4521-4195;;0000-0001-8212-1831;;;0000-0001-8856-756X", "linkedin": ";;;;bohan-zhuang/;", "or_profile": "~Zeshuai_Deng1;~Zhuokun_Chen1;~Shuaicheng_Niu1;~Thomas_H._Li3;~Bohan_Zhuang1;~Mingkui_Tan1", "aff": "South China University of Technology;;South China University of Technology;AIIT, Peking University;Monash University;South China University of Technology", "aff_domain": "scut.edu.cn;;scut.edu.cn;aiit.org.cn;monash.edu;scut.edu.cn", "position": "PhD student;;PhD student;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ndeng2023efficient,\ntitle={Efficient Test-Time Adaptation for Super-Resolution with Second-Order Degradation and Reconstruction},\nauthor={Zeshuai Deng and Zhuokun Chen and Shuaicheng Niu and Thomas H. Li and Bohan Zhuang and Mingkui Tan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IZRlMABK4l}\n}", "github": "", "project": "", "reviewers": "aSmb;tgyp;QTZE;G7Pf;pkZJ", "pdf_size": 2536392, "rating": "4;5;7;7;7", "confidence": "4;4;4;5;3", "soundness": "3;3;3;3;3", "novelty": "3;2;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "69;86;124;104;51", "wc_strengths": "21;22;62;117;45", "wc_weaknesses": "97;23;112;169;191", "wc_questions": "52;206;130;4;11", "wc_limitations": "1;4;13;10;2", "wc_review": "240;341;441;404;300", "wc_reply_reviewers": "90;24;134;32;203", "wc_reply_authors": "520;82;201;86;725", "reply_reviewers": "1;1;1;1;1", "reply_authors": "4;3;3;3;4", "rating_avg": [ 6.0, 1.2649110640673518 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.8, 25.607811308270765 ], "wc_strengths_avg": [ 53.4, 35.285124344403265 ], "wc_weaknesses_avg": [ 118.4, 59.04439008068421 ], "wc_questions_avg": [ 80.6, 77.06516722878112 ], "wc_limitations_avg": [ 6.0, 4.69041575982343 ], "wc_review_avg": [ 345.2, 71.78133462119523 ], "wc_reply_reviewers_avg": [ 96.6, 66.6441295239123 ], "wc_reply_authors_avg": [ 322.8, 256.7281831042319 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.4, 0.4898979485566356 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6276384964075052952&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "scut.edu.cn;;scut.edu.cn;aiit.org.cn;monash.edu;scut.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "South China University of Technology;Peking University;Monash University", "aff_unique_dep": ";AIIT;", "aff_unique_url": "https://www.scut.edu.cn;http://www.pku.edu.cn;https://www.monash.edu", "aff_unique_abbr": "SCUT;PKU;Monash", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;Australia" }, { "title": "ResoNet: Noise-Trained Physics-Informed MRI Off-Resonance Correction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72112", "id": "Ia4dmqst0Z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2e0bd92a1d3600d4288df51ac5e6be5f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ia4dmqst0Z", "openreview": "https://openreview.net/forum?id=Ia4dmqst0Z", "poster": "/media/PosterPDFs/NeurIPS%202023/72112.png?t=1701814531.9716585", "slides": "https://nips.cc/virtual/2023/poster/72112", "video": "https://nips.cc/virtual/2023/poster/72112", "author_site": "Alfredo De Goyeneche Macaya, Shreya Ramachandran, Ke Wang, Ekin Karasan, Joseph Y. Cheng, Stella X. Yu, Michael Lustig", "tldr": "", "abstract": "Magnetic Resonance Imaging (MRI) is a powerful medical imaging modality that offers diagnostic information without harmful ionizing radiation. Unlike optical imaging, MRI sequentially samples the spatial Fourier domain (k-space) of the image. \nMeasurements are collected in multiple shots, or readouts, and in each shot, data along a smooth trajectory is sampled.\nConventional MRI data acquisition relies on sampling k-space row-by-row in short intervals, which is slow and inefficient. More efficient, non-Cartesian sampling trajectories (e.g., Spirals) use longer data readout intervals, but are more susceptible to magnetic field inhomogeneities, leading to off-resonance artifacts. Spiral trajectories cause off-resonance blurring in the image, and the mathematics of this blurring resembles that of optical blurring, where magnetic field variation corresponds to depth and readout duration to aperture size. Off-resonance blurring is a system issue with a physics-based, accurate forward model. We present a physics-informed deep learning framework for off-resonance correction in MRI, which is trained exclusively on synthetic, noise-like data with representative marginal statistics. Our approach allows for fat/water separation and is compatible with parallel imaging acceleration. Through end-to-end training using synthetic randomized data (i.e., noise-like images, coil sensitivities, field maps), we train the network to reverse off-resonance effects across diverse anatomies and contrasts without retraining. We demonstrate the effectiveness of our approach through results on phantom and in-vivo data. This work has the potential to facilitate the clinical adoption of non-Cartesian sampling trajectories, enabling efficient, rapid, and motion-robust MRI scans. Code is publicly available at: https://github.com/mikgroup/ResoNet.", "keywords": "Inverse problem;MRI;Medical Imaging;Computational Imaging;Deep Learning;Off-Resonance", "primary_area": "", "supplementary_material": "/attachment/2d28364dd6f3f3a192b8d9315f2d43e8967e185d.pdf", "author": "Alfredo De Goyeneche;Shreya Ramachandran;Ke Wang;Ekin Karasan;Joseph Yitan Cheng;Stella X. Yu;Michael Lustig", "authorids": "~Alfredo_De_Goyeneche1;~Shreya_Ramachandran1;~Ke_Wang8;~Ekin_Karasan1;~Joseph_Yitan_Cheng1;~Stella_X._Yu2;~Michael_Lustig2", "gender": ";;M;F;M;;", "homepage": ";;https://people.eecs.berkeley.edu/~kewang/;https://scholar.google.com/citations?user=6Rasfn4AAAAJ&hl=en&oi=aoscholar.google.com/citations?user=6Rasfn4AAAAJ&hl=en&oi=ao;;;https://people.eecs.berkeley.edu/~mlustig/", "dblp": ";;;229/0897;202/2088;;", "google_scholar": ";;Iz3m3v4AAAAJ;6Rasfn4AAAAJ;kq0bsOwAAAAJ;;", "orcid": ";;;0000-0001-5662-8145;;;", "linkedin": ";;;ekin-karasan;;;", "or_profile": "~Alfredo_De_Goyeneche1;~Shreya_Ramachandran1;~Ke_Wang8;~Ekin_Karasan1;~Joseph_Yitan_Cheng1;~Stella_X._Yu2;~Michael_Lustig2", "aff": ";;Electrical Engineering and Computer Sciences, University of California, Berkeley;University of California, Berkeley;Humane;;University of California, Berkeley", "aff_domain": ";;berkeley.edu;berkeley.edu;humane.com;;berkeley.edu", "position": ";;PhD student;PhD student;Computer Vision Engineer;;Full Professor", "bibtex": "@inproceedings{\ngoyeneche2023resonet,\ntitle={ResoNet: a Physics-Informed {DL} Framework for Off-Resonance Correction in {MRI} Trained with Noise},\nauthor={Alfredo De Goyeneche and Shreya Ramachandran and Ke Wang and Ekin Karasan and Joseph Yitan Cheng and Stella X. Yu and Michael Lustig},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ia4dmqst0Z}\n}", "github": "", "project": "", "reviewers": "tQmb;h1kS;XeBV;9HEj", "pdf_size": 15537990, "rating": "4;5;7;9", "confidence": "4;4;3;5", "soundness": "2;2;4;4", "novelty": "2;3;3;3", "presentation": "1;3;3;4", "wc_summary": "48;74;53;160", "wc_strengths": "83;87;49;48", "wc_weaknesses": "200;59;23;118", "wc_questions": "20;1;23;4", "wc_limitations": "6;13;4;1", "wc_review": "357;234;152;331", "wc_reply_reviewers": "127;0;16;254", "wc_reply_authors": "370;0;0;306", "reply_reviewers": "2;0;1;2", "reply_authors": "2;1;1;2", "rating_avg": [ 6.25, 1.920286436967152 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 83.75, 45.090880452703516 ], "wc_strengths_avg": [ 66.75, 18.30812661087966 ], "wc_weaknesses_avg": [ 100.0, 66.95894264398147 ], "wc_questions_avg": [ 12.0, 9.617692030835672 ], "wc_limitations_avg": [ 6.0, 4.415880433163924 ], "wc_review_avg": [ 268.5, 81.3956387038028 ], "wc_reply_reviewers_avg": [ 99.25, 101.85620992359769 ], "wc_reply_authors_avg": [ 169.0, 170.5080643254154 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3682298471593293, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9025334923497122462&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 4, "email": ";;berkeley.edu;berkeley.edu;humane.com;;berkeley.edu", "author_num": 7, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of California, Berkeley;Humane", "aff_unique_dep": "Electrical Engineering and Computer Sciences;", "aff_unique_url": "https://www.berkeley.edu;", "aff_unique_abbr": "UC Berkeley;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "title": "Can Language Models Teach? Teacher Explanations Improve Student Performance via Personalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72111", "id": "IacxcFpvWQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c6afe9a5d1e1068796d32613ddca1ab7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IacxcFpvWQ", "openreview": "https://openreview.net/forum?id=IacxcFpvWQ", "poster": "/media/PosterPDFs/NeurIPS%202023/72111.png?t=1701902519.8028624", "slides": "https://nips.cc/virtual/2023/poster/72111", "video": "https://nips.cc/virtual/2023/poster/72111", "author_site": "Swarnadeep Saha, Peter Hase, Mohit Bansal", "tldr": "", "abstract": "A hallmark property of explainable AI models is the ability to teach other agents, communicating knowledge of how to perform a task. While Large Language Models (LLMs) perform complex reasoning by generating explanations for their predictions, it is unclear whether they also make good teachers for weaker agents. To address this, we consider a student-teacher framework between two LLM agents and study if, when, and how the teacher should intervene with natural language explanations to improve the student\u2019s performance. Since communication is expensive, we define a budget such that the teacher only communicates explanations for a fraction of the data, after which the student should perform well on its own. We decompose the teaching problem along four axes: (1) if teacher\u2019s test time in- tervention improve student predictions, (2) when it is worth explaining a data point, (3) how the teacher should personalize explanations to better teach the student, and (4) if teacher explanations also improve student performance on future unexplained data. We first show that teacher LLMs can indeed intervene on student reasoning to improve their performance. Next, inspired by the Theory of Mind abilities of effective teachers, we propose building two few-shot mental models of the student. The first model defines an Intervention Function that simulates the utility of an intervention, allowing the teacher to intervene when this utility is the highest and improving student performance at lower budgets. The second model enables the teacher to personalize explanations for a particular student and outperform unpersonalized teachers. We also demonstrate that in multi-turn interactions, teacher explanations generalize and learning from explained data improves student performance on future unexplained data. Finally, we also verify that misaligned teachers can lower student performance to random chance by intentionally misleading them.", "keywords": "Language Models;Reasoning;Explanations", "primary_area": "", "supplementary_material": "/attachment/062065cbe7db53a1ab4a20d5a9cf4f62a4deae09.zip", "author": "Swarnadeep Saha;Peter Hase;Mohit Bansal", "authorids": "~Swarnadeep_Saha2;~Peter_Hase1;~Mohit_Bansal2", "gender": ";;M", "homepage": ";;https://www.cs.unc.edu/~mbansal/", "dblp": ";;32/5243.html", "google_scholar": ";;DN8QtscAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Swarnadeep_Saha2;~Peter_Hase1;~Mohit_Bansal2", "aff": ";;University of North Carolina at Chapel Hill", "aff_domain": ";;unc.edu", "position": ";;Full Professor", "bibtex": "@inproceedings{\nsaha2023can,\ntitle={Can Language Models Teach? Teacher Explanations Improve Student Performance via Personalization},\nauthor={Swarnadeep Saha and Peter Hase and Mohit Bansal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IacxcFpvWQ}\n}", "github": "", "project": "", "reviewers": "R7Nj;fVpe;S1Bk;UsZZ", "pdf_size": 594956, "rating": "4;5;6;7", "confidence": "3;4;2;4", "soundness": "2;3;3;3", "novelty": "2;3;3;4", "presentation": "3;3;2;4", "wc_summary": "367;192;211;120", "wc_strengths": "20;129;77;51", "wc_weaknesses": "44;296;596;493", "wc_questions": "9;137;412;399", "wc_limitations": "5;104;126;16", "wc_review": "445;858;1422;1079", "wc_reply_reviewers": "23;337;665;288", "wc_reply_authors": "36;786;679;120", "reply_reviewers": "1;2;2;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 222.5, 90.06802984411283 ], "wc_strengths_avg": [ 69.25, 39.96482828688246 ], "wc_weaknesses_avg": [ 357.25, 210.5390403226917 ], "wc_questions_avg": [ 239.25, 172.36063210605838 ], "wc_limitations_avg": [ 62.75, 52.96874078171011 ], "wc_review_avg": [ 951.0, 354.580738337547 ], "wc_reply_reviewers_avg": [ 328.25, 228.18564262459634 ], "wc_reply_authors_avg": [ 405.25, 330.76530576830453 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.1348399724926484, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12862675643486918721&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";;unc.edu", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of North Carolina", "aff_unique_dep": "", "aff_unique_url": "https://www.unc.edu", "aff_unique_abbr": "UNC", "aff_campus_unique_index": "0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "IaoovD6nDx", "title": "Toward Causal-Aware RL: State-Wise Action-Refined Temporal Difference", "track": "main", "status": "Reject", "tldr": "", "abstract": "Although it is well known that exploration plays a key role in Reinforcement Learning (RL), prevailing exploration strategies for continuous control tasks in RL are mainly based on naive isotropic Gaussian noise regardless of the causality relationship between action space and the task and consider all dimensions of actions equally important. In this work, we propose to conduct interventions on the primal action space to discover the causal relationship between the action space and the task reward. We propose the method of State-Wise Action Refined (SWAR), which addresses the issue of action space redundancy and promote causality discovery in RL. We formulate causality discovery in RL tasks as a state-dependent action space selection problem and propose two practical algorithms as solutions. The first approach, TD-SWAR, detects task-related actions during temporal difference learning, while the second approach, Dyn-SWAR, reveals important actions through dynamic model prediction. Empirically, both methods provide approaches to understand the decisions made by RL agents and improve learning efficiency in action-redundant tasks. ", "keywords": "Action Space Refinery", "primary_area": "", "supplementary_material": "/attachment/43bccca83109b1cf4685ac9627d3b46c51d567e7.pdf", "author": "Hao Sun", "authorids": "~Hao_Sun1", "gender": "M", "homepage": "https://holarissun.github.io", "dblp": "SunLLZL19", "google_scholar": "7ZNoHJkAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Hao_Sun1", "aff": "University of Cambridge", "aff_domain": "cam.ac.uk", "position": "PhD student", "bibtex": "@misc{\nsun2023toward,\ntitle={Toward Causal-Aware {RL}: State-Wise Action-Refined Temporal Difference},\nauthor={Hao Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=IaoovD6nDx}\n}", "github": "", "project": "", "reviewers": "48ca;oAvw;zze8;dv5c", "site": "https://openreview.net/forum?id=IaoovD6nDx", "pdf_size": 1561491, "rating": "2;5;5;6", "confidence": "4;3;3;3", "soundness": "1;2;2;3", "novelty": "2;2;2;3", "presentation": "2;2;3;3", "wc_summary": "71;96;127;93", "wc_strengths": "79;27;125;97", "wc_weaknesses": "134;65;585;127", "wc_questions": "46;2;78;103", "wc_limitations": "10;61;54;16", "wc_review": "340;251;969;436", "wc_reply_reviewers": "698;23;254;24", "wc_reply_authors": "2542;514;789;455", "reply_reviewers": "4;1;3;1", "reply_authors": "7;3;4;3", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 96.75, 19.954636052807377 ], "wc_strengths_avg": [ 82.0, 35.73513677041127 ], "wc_weaknesses_avg": [ 227.75, 207.99924879672042 ], "wc_questions_avg": [ 57.25, 37.75827723824274 ], "wc_limitations_avg": [ 35.25, 22.487496525847426 ], "wc_review_avg": [ 499.0, 279.1298980761466 ], "wc_reply_reviewers_avg": [ 249.75, 275.3746311845011 ], "wc_reply_authors_avg": [ 1075.0, 856.3010568719392 ], "reply_reviewers_avg": [ 2.25, 1.299038105676658 ], "reply_authors_avg": [ 4.25, 1.6393596310755 ], "replies_avg": [ 41, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.9622504486493763, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15576634271505193170&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "Stochastic Approximation Approaches to Group Distributionally Robust Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72110", "id": "IcIQbCWoFj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a4b6ad6b48850c0c331d1259fc66a69c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IcIQbCWoFj", "openreview": "https://openreview.net/forum?id=IcIQbCWoFj", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72110", "video": "https://nips.cc/virtual/2023/poster/72110", "author_site": "Lijun Zhang, Peng Zhao, Zhen-Hua Zhuang, Tianbao Yang, Zhi-Hua Zhou", "tldr": "", "abstract": "This paper investigates group distributionally robust optimization (GDRO), with the purpose to learn a model that performs well over $m$ different distributions. First, we formulate GDRO as a stochastic convex-concave saddle-point problem, and demonstrate that stochastic mirror descent (SMD), using $m$ samples in each iteration, achieves an $O(m (\\log m)/\\epsilon^2)$ sample complexity for finding an $\\epsilon$-optimal solution, which matches the $\\Omega(m/\\epsilon^2)$ lower bound up to a logarithmic factor. Then, we make use of techniques from online learning to reduce the number of samples required in each round from $m$ to $1$, keeping the same sample complexity. Specifically, we cast GDRO as a two-players game where one player simply performs SMD and the other executes an online algorithm for non-oblivious multi-armed bandits. Next, we consider a more practical scenario where the number of samples that can be drawn from each distribution is different, and propose a novel formulation of weighted GDRO, which allows us to derive distribution-dependent convergence rates. Denote by $n_i$ the sample budget for the $i$-th distribution, and assume $n_1 \\geq n_2 \\geq \\cdots \\geq n_m$. In the first approach, we incorporate non-uniform sampling into SMD such that the sample budget is satisfied in expectation, and prove that the excess risk of the $i$-th distribution decreases at an $O(\\sqrt{n_1 \\log m}/n_i)$ rate. In the second approach, we use mini-batches to meet the budget exactly and also reduce the variance in stochastic gradients, and then leverage stochastic mirror-prox algorithm, which can exploit small variances, to optimize a carefully designed weighted GDRO problem. Under appropriate conditions, it attains an $O((\\log m)/\\sqrt{n_i})$ convergence rate, which almost matches the optimal $O(\\sqrt{1/n_i})$ rate of only learning from the $i$-th distribution with $n_i$ samples.", "keywords": "Group distributionally robust optimization;Stochastic mirror descent;Non-oblivious online learning;Sample complexity;Stochastic mirror-prox algorithm;Mini-batch", "primary_area": "", "supplementary_material": "", "author": "Lijun Zhang;Peng Zhao;Zhenhua Zhuang;Tianbao Yang;Zhi-Hua Zhou", "authorids": "~Lijun_Zhang1;~Peng_Zhao1;~Zhenhua_Zhuang1;~Tianbao_Yang1;~Zhi-Hua_Zhou2", "gender": ";;M;M;", "homepage": ";;http://www.lamda.nju.edu.cn/zhuangzh/;https://people.tamu.edu/~tianbao-yang/publications.html;", "dblp": ";;;56/7047;", "google_scholar": ";;;https://scholar.google.com.tw/citations?user=BCxFU0EAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Lijun_Zhang1;~Peng_Zhao1;~Zhenhua_Zhuang1;~Tianbao_Yang1;~Zhi-Hua_Zhou2", "aff": ";;Nanjing University;Texas A&M University - College Station;", "aff_domain": ";;nju.edu;tamu.edu;", "position": ";;MS student;Associate Professor;", "bibtex": "@inproceedings{\nzhang2023stochastic,\ntitle={Stochastic Approximation Approaches to Group Distributionally Robust Optimization},\nauthor={Lijun Zhang and Peng Zhao and Zhenhua Zhuang and Tianbao Yang and Zhi-Hua Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IcIQbCWoFj}\n}", "github": "", "project": "", "reviewers": "Gpdz;EKF4;41n7;JWj5;evdu", "pdf_size": 888014, "rating": "4;5;6;7;8", "confidence": "4;3;2;4;4", "soundness": "3;3;3;3;4", "novelty": "2;2;2;4;4", "presentation": "2;3;3;4;4", "wc_summary": "76;94;50;91;126", "wc_strengths": "41;37;29;75;144", "wc_weaknesses": "316;142;37;14;28", "wc_questions": "82;29;146;1;69", "wc_limitations": "12;9;10;66;24", "wc_review": "527;311;272;247;391", "wc_reply_reviewers": "74;69;13;26;39", "wc_reply_authors": "42;55;23;23;23", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.9797958971132712 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 87.4, 24.8 ], "wc_strengths_avg": [ 65.2, 42.418863728298994 ], "wc_weaknesses_avg": [ 107.4, 113.7499010988581 ], "wc_questions_avg": [ 65.4, 49.51201874292745 ], "wc_limitations_avg": [ 24.2, 21.581473536345936 ], "wc_review_avg": [ 349.6, 101.21580904186855 ], "wc_reply_reviewers_avg": [ 44.2, 23.81092186371624 ], "wc_reply_authors_avg": [ 33.2, 13.151425778218877 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.1767766952966369, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6152725146266403167&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 10, "email": ";;nju.edu;tamu.edu;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Nanjing University;Texas A&M University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.tamu.edu", "aff_unique_abbr": "Nanjing U;TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "title": "NIS3D: A Completely Annotated Benchmark for Dense 3D Nuclei Image Segmentation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73634", "id": "Icxwnu9hcO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0f2cd3d09a132757555b602e2dd43784-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Icxwnu9hcO", "openreview": "https://openreview.net/forum?id=Icxwnu9hcO", "poster": "/media/PosterPDFs/NeurIPS%202023/73634.png?t=1701376465.612191", "slides": "https://nips.cc/virtual/2023/poster/73634", "video": "https://nips.cc/virtual/2023/poster/73634", "author_site": "Wei Zheng, Cheng Peng, Zeyuan Hou, Boyu Lyu, Mengfan Wang, Xuelong Mi, Shuoxuan Qiao, Yinan Wan, Guoqiang Yu", "tldr": "", "abstract": "3D segmentation of nuclei images is a fundamental task for many biological studies. Despite the rapid advances of large-volume 3D imaging acquisition methods and the emergence of sophisticated algorithms to segment the nuclei in recent years, a benchmark with all cells completely annotated is still missing, making it hard to accurately assess and further improve the performance of the algorithms. The existing nuclei segmentation benchmarks either worked on 2D only or annotated a small number of 3D cells, perhaps due to the high cost of 3D annotation for large-scale data. To fulfill the critical need, we constructed NIS3D, a 3D, high cell density, large-volume, and completely annotated Nuclei Image Segmentation benchmark, assisted by our newly designed semi-automatic annotation software. NIS3D provides more than 22,000 cells across multiple most-used species in this area. Each cell is labeled by three independent annotators, so we can measure the variability of each annotation. A confidence score is computed for each cell, allowing more nuanced testing and performance comparison. A comprehensive review on the methods of segmenting 3D dense nuclei was conducted. The benchmark was used to evaluate the performance of several selected state-of-the-art segmentation algorithms. The best of current methods is still far away from human-level accuracy, corroborating the necessity of generating such a benchmark. The testing results also demonstrated the strength and weakness of each method and pointed out the directions of further methodological development. The dataset can be downloaded here: https://github.com/yu-lab-vt/NIS3D.", "keywords": "benchmark;image segmentation;embryonic nuclei image;3D;fluorescent", "primary_area": "", "supplementary_material": "/attachment/d71b4807edcfe49f77d6683a861354be917a5ea9.pdf", "author": "Wei Zheng;James Cheng Peng;Zeyuan Hou;Boyu Lyu;Mengfan Wang;Xuelong Mi;Shuoxuan Qiao;Yinan Wan;Guoqiang Yu", "authorids": "~Wei_Zheng6;~James_Cheng_Peng1;~Zeyuan_Hou1;~Boyu_Lyu1;~Mengfan_Wang1;~Xuelong_Mi1;~Shuoxuan_Qiao2;~Yinan_Wan1;~Guoqiang_Yu1", "gender": "M;M;M;M;;M;M;;M", "homepage": ";https://sites.google.com/view/jameschengpeng/home;;;;;https://github.com/kevinsxqiao;;https://ece.vt.edu/people/profile/yu", "dblp": ";;;;188/7548;195/7884;;;28/816", "google_scholar": ";OWLTjJkAAAAJ;;cDVPkN4AAAAJ;;dZt3dT8AAAAJ;;gGeZcZcAAAAJ;8vXwC1QAAAAJ", "orcid": ";0000-0002-8020-0509;;;;;;;", "linkedin": "wei-zheng-b20809226/;jc-peng/;stanhou/;;mengfan-wang-29735314a/;;;;", "or_profile": "~Wei_Zheng6;~James_Cheng_Peng1;~Zeyuan_Hou1;~Boyu_Lyu1;~Mengfan_Wang1;~Xuelong_Mi1;~Shuoxuan_Qiao2;~Yinan_Wan1;~Guoqiang_Yu1", "aff": "Virginia Polytechnic Institute and State University;Virginia Polytechnic Institute and State University;Virginia Polytechnic Institute and State University;;Virginia Tech;Virginia Tech;Virginia Polytechnic Institute and State University;University of Basel;Virginia Tech", "aff_domain": "vt.edu;vt.edu;vt.edu;;vt.edu;vt.edu;vt.edu;unibas.ch;vt.edu", "position": "PhD student;PhD student;MS student;;PhD student;PhD student;MS student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzheng2023nisd,\ntitle={{NIS}3D: A Completely Annotated Benchmark for Dense 3D Nuclei Image Segmentation},\nauthor={Wei Zheng and James Cheng Peng and Zeyuan Hou and Boyu Lyu and Mengfan Wang and Xuelong Mi and Shuoxuan Qiao and Yinan Wan and Guoqiang Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Icxwnu9hcO}\n}", "github": "", "project": "", "reviewers": "W2ro;Rsqm;d7Xp;bNXG;x8hy", "pdf_size": 21429348, "rating": "6;6;8;8;8", "confidence": "3;5;3;3;5", "wc_summary_and_contributions": "68;51;108;24;23", "wc_strengths": "85;38;64;80;39", "wc_improvement": "92;282;78;92;167", "wc_limitations": "4;24;38;131;1", "wc_correctness": "35;27;16;11;1", "wc_clarity": "6;9;5;5;25", "wc_relation_to_prior_work": "14;4;13;12;75", "wc_documentation": "12;3;21;45;7", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "317;439;344;401;339", "wc_reply_reviewers": "0;146;0;9;24", "wc_reply_authors": "282;898;123;376;363", "reply_reviewers": "0;2;0;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 7.2, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "wc_summary_and_contributions_avg": [ 54.8, 31.555665101531293 ], "wc_strengths_avg": [ 61.2, 19.792928029980807 ], "wc_improvement_avg": [ 142.2, 76.57780357257579 ], "wc_limitations_avg": [ 39.6, 47.65962651972841 ], "wc_correctness_avg": [ 18.0, 11.933147112141038 ], "wc_clarity_avg": [ 10.0, 7.64198926981712 ], "wc_relation_to_prior_work_avg": [ 23.6, 25.943014474035202 ], "wc_documentation_avg": [ 17.6, 14.961283367412035 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 368.0, 45.05108211796915 ], "wc_reply_reviewers_avg": [ 35.8, 55.79390647732062 ], "wc_reply_authors_avg": [ 408.4, 260.86364254146264 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.16666666666666663, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14804203309851785049&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "vt.edu;vt.edu;vt.edu;;vt.edu;vt.edu;vt.edu;unibas.ch;vt.edu", "author_num": 9, "aff_unique_index": "0;0;0;0;0;0;1;0", "aff_unique_norm": "Virginia Tech;University of Basel", "aff_unique_dep": ";", "aff_unique_url": "https://www.vt.edu;https://www.unibas.ch", "aff_unique_abbr": "VT;UniBas", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;0", "aff_country_unique": "United States;Switzerland" }, { "title": "Online Performative Gradient Descent for Learning Nash Equilibria in Decision-Dependent Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72109", "id": "IdF7VT6eEs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/95a704bd2fdf8ef8242b4adcc7ce3c93-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IdF7VT6eEs", "openreview": "https://openreview.net/forum?id=IdF7VT6eEs", "poster": "/media/PosterPDFs/NeurIPS%202023/72109.png?t=1701577621.556501", "slides": "https://nips.cc/virtual/2023/poster/72109", "video": "https://nips.cc/virtual/2023/poster/72109", "author_site": "Zihan Zhu, Ethan Fang, Zhuoran Yang", "tldr": "", "abstract": "We study the multi-agent game within the innovative framework of decision-dependent games, which establishes a feedback mechanism that population data reacts to agents\u2019 actions and further characterizes the strategic interactions between agents. We focus on finding the Nash equilibrium of decision-dependent games in the bandit feedback setting. However, since agents are strategically coupled, traditional gradient-based methods are infeasible without the gradient oracle. To overcome this challenge, we model the strategic interactions by a general parametric model and propose a novel online algorithm, Online Performative Gradient Descent (OPGD), which leverages the ideas of online stochastic approximation and projected gradient descent to learn the Nash equilibrium in the context of function approximation for the unknown gradient. In particular, under mild assumptions on the function classes defined in the parametric model, we prove that OPGD can find the Nash equilibrium efficiently for strongly monotone decision-dependent games. Synthetic numerical experiments validate our theory.", "keywords": "Performative Prediction;Nash Equilibrium;Reproducing Kernel Hilbert Space;Online Learning;Stochastic Gradient Methods", "primary_area": "", "supplementary_material": "", "author": "Zihan Zhu;Ethan X Fang;Zhuoran Yang", "authorids": "~Zihan_Zhu2;~Ethan_X_Fang1;~Zhuoran_Yang1", "gender": "M;M;M", "homepage": ";https://ethanfangduke.github.io/homepage/;https://zhuoranyang.github.io/", "dblp": ";;", "google_scholar": ";uglffdcAAAAJ;", "orcid": ";;", "linkedin": "zihan-zhu-48950b233;;", "or_profile": "~Zihan_Zhu2;~Ethan_X_Fang1;~Zhuoran_Yang1", "aff": "Duke University;Duke University;Yale University", "aff_domain": "duke.edu;duke.edu;yale.edu", "position": "MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhu2023online,\ntitle={Online Performative Gradient Descent for Learning Nash Equilibria in Decision-Dependent Games},\nauthor={Zihan Zhu and Ethan X Fang and Zhuoran Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IdF7VT6eEs}\n}", "github": "", "project": "", "reviewers": "QMrd;AyUq;wDhV;sLZ8;jNzm", "pdf_size": 714010, "rating": "5;5;6;7;8", "confidence": "3;4;3;2;3", "soundness": "3;4;3;3;3", "novelty": "2;2;2;2;3", "presentation": "4;4;3;3;3", "wc_summary": "73;126;60;79;117", "wc_strengths": "32;15;41;39;61", "wc_weaknesses": "254;57;69;24;34", "wc_questions": "18;9;47;341;42", "wc_limitations": "1;2;1;20;6", "wc_review": "378;209;218;503;260", "wc_reply_reviewers": "12;37;262;88;30", "wc_reply_authors": "66;62;915;405;67", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;3;4;3;3", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 91.0, 25.80697580112788 ], "wc_strengths_avg": [ 37.6, 14.853955702101715 ], "wc_weaknesses_avg": [ 87.6, 84.72213406188492 ], "wc_questions_avg": [ 91.4, 125.60987222348409 ], "wc_limitations_avg": [ 6.0, 7.238784428341543 ], "wc_review_avg": [ 313.6, 112.22584372594397 ], "wc_reply_reviewers_avg": [ 85.8, 91.6436577183604 ], "wc_reply_authors_avg": [ 303.0, 333.13480754793545 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.2, 0.39999999999999997 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5423261445466404, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14148044569094037744&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "duke.edu;duke.edu;yale.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Duke University;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.duke.edu;https://www.yale.edu", "aff_unique_abbr": "Duke;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "IfRHdBy3wb", "title": "GOAt: Explaining Graph Neural Networks via Graph Output Attribution", "track": "main", "status": "Reject", "tldr": "", "abstract": "Understanding the decision-making process of Graph Neural Networks (GNNs) is crucial to their interpretability. Present methods for explaining GNNs typically rely on training auxiliary models, and may struggle with issues such as overfitting to noise, insufficient discriminability, and inconsistent explanations across data samples of the same class. This paper introduces Graph Output Attribution (GOAt), a novel method to attribute graph outputs to input graph features, creating GNN explanations that are faithful, discriminative, as well as stable across similar samples. By expanding the GNN as a sum of scalar products involving node features, edge features and activation patterns, we propose an efficient analytical method to compute contribution of each node or edge feature to each scalar product and aggregate the contributions from all scalar products in the expansion form to derive the importance of each node and edge. Through extensive experiments on synthetic and real data, we show that our method has consistently outperformed various state-of-the-art GNN explainers in terms of fidelity, discriminability, and stability.", "keywords": "Graph Neural Networks;explainability;interpretability;local-level explanation;instance-level explanation", "primary_area": "", "supplementary_material": "/attachment/f379d727425c28639e828da137ab7e90ca64dfeb.zip", "author": "Shengyao Lu;Keith G. Mills;Jiao He;Bang Liu;Di Niu", "authorids": "~Shengyao_Lu1;~Keith_G._Mills1;~Jiao_He1;~Bang_Liu1;~Di_Niu1", "gender": "F;M;M;M;M", "homepage": "https://sluxsr.github.io/;https://kgmills.github.io/;https://github.com/JonHe878;http://www-labs.iro.umontreal.ca/~liubang/;https://www.ualberta.ca/~dniu", "dblp": "320/4184;299/5864;;;82/4953", "google_scholar": "https://scholar.google.ca/citations?user=MSsab9EAAAAJ;CBOD_ngAAAAJ;;lmfAnP4AAAAJ;https://scholar.google.ca/citations?user=3kC5OogAAAAJ", "orcid": ";0000-0001-6054-1798;;0000-0002-9483-8984;0000-0002-5250-7327", "linkedin": ";kgmills/;;bang-liu-12b66789/?originalSubdomain=ca;", "or_profile": "~Shengyao_Lu1;~Keith_G._Mills1;~Jiao_He1;~Bang_Liu1;~Di_Niu1", "aff": "University of Alberta;Huawei Technologies Ltd.;huawei;University of Montreal;University of Alberta", "aff_domain": "ualberta.ca;huawei.com;huawei.com;umontreal.ca;ualberta.ca", "position": "PhD student;Research Intern;Chief engineer;Assistant Professor;Associate Professor", "bibtex": "@misc{\nlu2023goat,\ntitle={{GOA}t: Explaining Graph Neural Networks via Graph Output Attribution},\nauthor={Shengyao Lu and Keith G. Mills and Jiao He and Bang Liu and Di Niu},\nyear={2023},\nurl={https://openreview.net/forum?id=IfRHdBy3wb}\n}", "github": "", "project": "", "reviewers": "b4jf;v7M6;33zp;x34t;LXep", "site": "https://openreview.net/forum?id=IfRHdBy3wb", "pdf_size": 1671483, "rating": "5;5;5;6;6", "confidence": "3;4;3;5;4", "soundness": "2;3;2;3;3", "novelty": "2;3;2;3;3", "presentation": "3;2;2;2;2", "wc_summary": "93;172;121;106;56", "wc_strengths": "34;85;26;68;25", "wc_weaknesses": "45;105;171;105;95", "wc_questions": "316;33;75;52;307", "wc_limitations": "34;46;4;53;14", "wc_review": "522;441;397;384;497", "wc_reply_reviewers": "104;31;24;22;185", "wc_reply_authors": "213;0;0;19;112", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;2;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 109.6, 37.90831043452082 ], "wc_strengths_avg": [ 47.6, 24.40163928919531 ], "wc_weaknesses_avg": [ 104.2, 40.13178291578883 ], "wc_questions_avg": [ 156.6, 127.20471689367497 ], "wc_limitations_avg": [ 30.2, 18.616122045152157 ], "wc_review_avg": [ 448.2, 54.0792011775322 ], "wc_reply_reviewers_avg": [ 73.2, 63.672285964931405 ], "wc_reply_authors_avg": [ 68.8, 83.19471137037497 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7637626158259732, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13711532416544159943&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "University of Alberta;Huawei;University of Montreal", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.ualberta.ca;https://www.huawei.com;https://wwwumontreal.ca", "aff_unique_abbr": "UAlberta;Huawei;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "Canada;China" }, { "title": "Conditional independence testing under misspecified inductive biases", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72108", "id": "Ifq8GMdqJK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b6f2b16abf590e80c9df30bb5f8e2b7d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ifq8GMdqJK", "openreview": "https://openreview.net/forum?id=Ifq8GMdqJK", "poster": "/media/PosterPDFs/NeurIPS%202023/72108.png?t=1701664715.8686047", "slides": "https://nips.cc/virtual/2023/poster/72108", "video": "https://nips.cc/virtual/2023/poster/72108", "author_site": "Felipe Maia Polo, Yuekai Sun, Moulinath Banerjee", "tldr": "", "abstract": "Conditional independence (CI) testing is a fundamental and challenging task in modern statistics and machine learning. Many modern methods for CI testing rely on powerful supervised learning methods to learn regression functions or Bayes predictors as an intermediate step; we refer to this class of tests as regression-based tests. Although these methods are guaranteed to control Type-I error when the supervised learning methods accurately estimate the regression functions or Bayes predictors of interest, their behavior is less understood when they fail due to misspecified inductive biases; in other words, when the employed models are not flexible enough or when the training algorithm does not induce the desired predictors. Then, we study the performance of regression-based CI tests under misspecified inductive biases. Namely, we propose new approximations or upper bounds for the testing errors of three regression-based tests that depend on misspecification errors. Moreover, we introduce the Rao-Blackwellized Predictor Test (RBPT), a regression-based CI test robust against misspecified inductive biases. Finally, we conduct experiments with artificial and real data, showcasing the usefulness of our theory and methods.", "keywords": "conditional independence;hypothesis testing;misspecification", "primary_area": "", "supplementary_material": "", "author": "Felipe Maia Polo;Yuekai Sun;Moulinath Banerjee", "authorids": "~Felipe_Maia_Polo1;~Yuekai_Sun1;~Moulinath_Banerjee1", "gender": "M;;M", "homepage": "https://felipemaiapolo.github.io/;https://yuekai.github.io/;https://lsa.umich.edu/stats/people/faculty/moulib.html", "dblp": "261/9581;;", "google_scholar": "CJbgmnkAAAAJ;6T1XtW8AAAAJ;", "orcid": "0000-0002-4950-2795;;", "linkedin": ";;", "or_profile": "~Felipe_Maia_Polo1;~Yuekai_Sun1;~Moulinath_Banerjee1", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;umich.edu;umich.edu", "position": "PhD student;Assistant \u2192 Associate Professor of Statistics;Full Professor", "bibtex": "@inproceedings{\npolo2023conditional,\ntitle={Conditional independence testing under misspecified inductive biases},\nauthor={Felipe Maia Polo and Yuekai Sun and Moulinath Banerjee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ifq8GMdqJK}\n}", "github": "", "project": "", "reviewers": "t2AA;hFX5;8LZA;zPGB;Hib1", "pdf_size": 1751849, "rating": "6;7;7;7;8", "confidence": "1;3;2;3;4", "soundness": "2;4;4;4;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;1;3", "wc_summary": "142;24;72;267;47", "wc_strengths": "11;28;23;146;164", "wc_weaknesses": "71;23;63;372;192", "wc_questions": "71;132;30;464;1", "wc_limitations": "1;1;1;323;26", "wc_review": "296;208;189;1572;430", "wc_reply_reviewers": "10;8;3;55;15", "wc_reply_authors": "0;0;0;17;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 7.0, 0.6324555320336759 ], "confidence_avg": [ 2.6, 1.019803902718557 ], "soundness_avg": [ 3.4, 0.8 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 110.4, 87.7282166694388 ], "wc_strengths_avg": [ 74.4, 66.28604679719557 ], "wc_weaknesses_avg": [ 144.2, 127.13677674064259 ], "wc_questions_avg": [ 139.6, 168.06022729962018 ], "wc_limitations_avg": [ 70.4, 126.67059643026869 ], "wc_review_avg": [ 539.0, 523.465376123388 ], "wc_reply_reviewers_avg": [ 18.2, 18.79787222001469 ], "wc_reply_authors_avg": [ 3.4, 6.8 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9302605094190632, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7388378265175788208&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "umich.edu;umich.edu;umich.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient Model-Free Exploration in Low-Rank MDPs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72107", "id": "IgDa5Ynm9l", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d2dc4d6c7b102d05f111c02a32e7c6bc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IgDa5Ynm9l", "openreview": "https://openreview.net/forum?id=IgDa5Ynm9l", "poster": "/media/PosterPDFs/NeurIPS%202023/72107.png?t=1701458976.1582446", "slides": "https://nips.cc/virtual/2023/poster/72107", "video": "https://nips.cc/virtual/2023/poster/72107", "author_site": "Zak Mhammedi, Adam Block, Dylan J Foster, Alexander Rakhlin", "tldr": "", "abstract": "A major challenge in reinforcement learning is to develop practical, sample-efficient algorithms for exploration in high-dimensional domains where generalization and function approximation is required. Low-Rank Markov Decision Processes---where transition probabilities admit a low-rank factorization based on an unknown feature embedding---offer a simple, yet expressive framework for RL with function approximation, yet existing algorithms either (1) are computationally intractable, or (2) require restrictive statistical assumptions such as latent variable structure or access to model-based function approximation. In this work, we propose the first provably sample-efficient algorithm for exploration in Low-Rank MDPs that is both computationally efficient and model-free, allowing for general function approximation while requiring no structural assumptions beyond a reachability condition that we show is substantially weaker than that assumed in prior work. Our algorithm, SpanRL, uses the notion of a barycentric spanner for the feature embedding as an efficiently computable basis for exploration, performing efficient spanner computation by interleaving representation learning and policy optimization subroutines. Our analysis---which is appealingly simple and modular---carefully combines several techniques, including a new approach to error-tolerant barycentric spanner computation, and a new analysis of a certain minimax representation learning objective found in prior work.", "keywords": "Reinforcement learning;Representation Learning;Low-rank MDPs;Model-Free Learning", "primary_area": "", "supplementary_material": "", "author": "Zakaria Mhammedi;Adam Block;Dylan J Foster;Alexander Rakhlin", "authorids": "~Zakaria_Mhammedi1;~Adam_Block1;~Dylan_J_Foster1;~Alexander_Rakhlin1", "gender": "M;;;M", "homepage": ";https://abblock.github.io/index.html;http://dylanfoster.net;http://www.mit.edu/~rakhlin/", "dblp": "192/1360;258/1018;167/4271;59/407", "google_scholar": ";;RqwU8xsAAAAJ;https://scholar.google.com.tw/citations?user=fds2VpgAAAAJ", "orcid": ";0000-0003-1677-2665;;", "linkedin": ";;;", "or_profile": "~Zakaria_Mhammedi1;~Adam_Block1;~Dylan_J_Foster1;~Alexander_Rakhlin1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Microsoft Research;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;microsoft.com;mit.edu", "position": "Postdoc;PhD student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nmhammedi2023efficient,\ntitle={Efficient Model-Free Exploration in Low-Rank {MDP}s},\nauthor={Zakaria Mhammedi and Adam Block and Dylan J Foster and Alexander Rakhlin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IgDa5Ynm9l}\n}", "github": "", "project": "", "reviewers": "qxYM;4TUV;VYmd;y7X6", "pdf_size": 608743, "rating": "6;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "32;61;72;46", "wc_strengths": "32;67;93;55", "wc_weaknesses": "104;199;76;198", "wc_questions": "2;3;38;86", "wc_limitations": "1;8;1;15", "wc_review": "171;338;280;400", "wc_reply_reviewers": "14;60;0;22", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 52.75, 15.122417134836613 ], "wc_strengths_avg": [ 61.75, 21.992896580487074 ], "wc_weaknesses_avg": [ 144.25, 55.14696274501434 ], "wc_questions_avg": [ 32.25, 34.2518247689083 ], "wc_limitations_avg": [ 6.25, 5.80409338312195 ], "wc_review_avg": [ 297.25, 84.34267899468216 ], "wc_reply_reviewers_avg": [ 24.0, 22.22611077089287 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4131103082159641278&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mit.edu;mit.edu;microsoft.com;mit.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://web.mit.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MIT;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Bayes beats Cross Validation: Efficient and Accurate Ridge Regression via Expectation Maximization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72106", "id": "Ih2yL7o2Gq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3eec5006051d9544e717067de3220198-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ih2yL7o2Gq", "openreview": "https://openreview.net/forum?id=Ih2yL7o2Gq", "poster": "/media/PosterPDFs/NeurIPS%202023/72106.png?t=1701874977.660313", "slides": "https://nips.cc/virtual/2023/poster/72106", "video": "https://nips.cc/virtual/2023/poster/72106", "author_site": "Shu Yu Tew, Mario Boley, Daniel Schmidt", "tldr": "", "abstract": "We present a novel method for tuning the regularization hyper-parameter, $\\lambda$, of a ridge regression that is faster to compute than leave-one-out cross-validation (LOOCV) while yielding estimates of the regression parameters of equal, or particularly in the setting of sparse covariates, superior quality to those obtained by minimising the LOOCV risk. The LOOCV risk can suffer from multiple and bad local minima for finite $n$ and thus requires the specification of a set of candidate $\\lambda$, which can fail to provide good solutions. In contrast, we show that the proposed method is guaranteed to find a unique optimal solution for large enough $n$, under relatively mild conditions, without requiring the specification of any difficult to determine hyper-parameters. This is based on a Bayesian formulation of ridge regression that we prove to have a unimodal posterior for large enough $n$, allowing for both the optimal $\\lambda$ and the regression coefficients to be jointly learned within an iterative expectation maximization (EM) procedure. Importantly, we show that by utilizing an appropriate preprocessing step, a single iteration of the main EM loop can be implemented in $O(\\min(n, p))$ operations, for input data with $n$ rows and $p$ columns. In contrast, evaluating a single value of $\\lambda$ using fast LOOCV costs $O(n \\min(n, p))$ operations when using the same preprocessing. This advantage amounts to an asymptotic improvement of a factor of $l$ for $l$ candidate values for $\\lambda$ (in the regime $q, p \\in O(\\sqrt{n})$ where $q$ is the number of regression targets).", "keywords": "Ridge Regression;Cross validation;Expectation Maximisation;Bayesian methods", "primary_area": "", "supplementary_material": "/attachment/941b07bff650dd6675fb26787d7a77581286c3a9.zip", "author": "Shu Tew;Mario Boley;Daniel F. Schmidt", "authorids": "~Shu_Tew1;~Mario_Boley2;~Daniel_F._Schmidt1", "gender": ";M;M", "homepage": "https://www.linkedin.com/in/shu-yu-tew-82202517b;https://marioboley.github.io/;https://github.com/dfschmidt80", "dblp": "332/5922;41/5449;48/4653", "google_scholar": ";https://scholar.google.de/citations?hl=en;https://scholar.google.com.au/citations?user=z2YfSogAAAAJ", "orcid": ";0000-0002-0704-4968;0000-0002-1788-2375", "linkedin": ";;", "or_profile": "~Shu_Tew1;~Mario_Boley2;~Daniel_F._Schmidt1", "aff": "Monash University;Monash University;Monash University", "aff_domain": "monash.edu;monash.edu;monash.edu", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\ntew2023bayes,\ntitle={Bayes beats Cross Validation: Efficient and Accurate Ridge Regression via Expectation Maximization},\nauthor={Shu Tew and Mario Boley and Daniel F. Schmidt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ih2yL7o2Gq}\n}", "github": "", "project": "", "reviewers": "3fTV;nvLc;fBDz;iDip;Ahob", "pdf_size": 474399, "rating": "5;5;6;7;7", "confidence": "3;2;4;4;4", "soundness": "4;3;4;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;4;3;3", "wc_summary": "75;63;95;81;164", "wc_strengths": "170;17;55;121;45", "wc_weaknesses": "58;35;1028;313;45", "wc_questions": "16;2;46;495;5", "wc_limitations": "18;1;55;24;12", "wc_review": "337;118;1279;1034;271", "wc_reply_reviewers": "138;14;622;154;8", "wc_reply_authors": "0;0;503;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 95.6, 35.71890255872932 ], "wc_strengths_avg": [ 81.6, 55.83403979652556 ], "wc_weaknesses_avg": [ 295.8, 380.4940998228488 ], "wc_questions_avg": [ 112.8, 191.7325220196094 ], "wc_limitations_avg": [ 22.0, 18.16590212458495 ], "wc_review_avg": [ 607.8, 460.1797040287631 ], "wc_reply_reviewers_avg": [ 187.2, 225.69217974932138 ], "wc_reply_authors_avg": [ 100.6, 201.19999999999996 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8385254915624212, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13251731157228057453&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "monash.edu;monash.edu;monash.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Monash University", "aff_unique_dep": "", "aff_unique_url": "https://www.monash.edu", "aff_unique_abbr": "Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "Calibration by Distribution Matching: Trainable Kernel Calibration Metrics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72105", "id": "IhxD94i5ra", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/52493d82db00e73abb2858a5a5f28717-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IhxD94i5ra", "openreview": "https://openreview.net/forum?id=IhxD94i5ra", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72105", "video": "https://nips.cc/virtual/2023/poster/72105", "author_site": "Charlie Marx, Sofian Zalouk, Stefano Ermon", "tldr": "", "abstract": "Calibration ensures that probabilistic forecasts meaningfully capture uncertainty by requiring that predicted probabilities align with empirical frequencies. However, many existing calibration methods are specialized for post-hoc recalibration, which can worsen the sharpness of forecasts. Drawing on the insight that calibration can be viewed as a distribution matching task, we introduce kernel-based calibration metrics that unify and generalize popular forms of calibration for both classification and regression. These metrics admit differentiable sample estimates, making it easy to incorporate a calibration objective into empirical risk minimization. Furthermore, we provide intuitive mechanisms to tailor calibration metrics to a decision task, and enforce accurate loss estimation and no regret decisions. Our empirical evaluation demonstrates that employing these metrics as regularizers enhances calibration, sharpness, and decision-making across a range of regression and classification tasks, outperforming methods relying solely on post-hoc recalibration.", "keywords": "Uncertainty Quantification;Calibration;Decision Making;Probabilistic Forecasting", "primary_area": "", "supplementary_material": "/attachment/901aa2707746a1ce0fc6f624a052b26cc85429ed.pdf", "author": "Charles Thomas Marx;Sofian Zalouk;Stefano Ermon", "authorids": "~Charles_Thomas_Marx1;~Sofian_Zalouk1;~Stefano_Ermon1", "gender": "M;M;M", "homepage": "https://charliemarx.github.io/;https://github.com/szalouk;http://cs.stanford.edu/~ermon/", "dblp": ";;47/8135", "google_scholar": "LCiFW3IAAAAJ;mU5D8d4AAAAJ;", "orcid": ";;", "linkedin": "charlie-marx-9b63b3163/;;", "or_profile": "~Charles_Thomas_Marx1;~Sofian_Zalouk1;~Stefano_Ermon1", "aff": "Stanford University;Computer Science Department, Stanford University;Stanford University", "aff_domain": "stanford.edu;cs.stanford.edu;stanford.edu", "position": "PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\nmarx2023calibration,\ntitle={Calibration by Distribution Matching: Trainable Kernel Calibration Metrics},\nauthor={Charles Thomas Marx and Sofian Zalouk and Stefano Ermon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IhxD94i5ra}\n}", "github": "", "project": "", "reviewers": "hWT9;UW9V;1Qie;cmLZ", "pdf_size": 1762488, "rating": "5;6;6;6", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "100;92;87;58", "wc_strengths": "24;34;131;63", "wc_weaknesses": "108;132;181;118", "wc_questions": "121;5;88;127", "wc_limitations": "5;1;23;6", "wc_review": "358;264;510;372", "wc_reply_reviewers": "0;19;280;20", "wc_reply_authors": "0;0;1045;0", "reply_reviewers": "0;1;3;1", "reply_authors": "1;1;4;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.25, 15.848895860595462 ], "wc_strengths_avg": [ 63.0, 41.79114738793373 ], "wc_weaknesses_avg": [ 134.75, 28.030117730755254 ], "wc_questions_avg": [ 85.25, 48.65375114007141 ], "wc_limitations_avg": [ 8.75, 8.437268515343103 ], "wc_review_avg": [ 376.0, 87.80660567406076 ], "wc_reply_reviewers_avg": [ 79.75, 115.88868581531159 ], "wc_reply_authors_avg": [ 261.25, 452.4982734773692 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4751572950584256804&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 9, "email": "stanford.edu;cs.stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Benchmarking Foundation Models with Language-Model-as-an-Examiner", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73633", "id": "IiRHQ7gvnq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f64e55d03e2fe61aa4114e49cb654acb-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=IiRHQ7gvnq", "openreview": "https://openreview.net/forum?id=IiRHQ7gvnq", "poster": "/media/PosterPDFs/NeurIPS%202023/73633.png?t=1702024197.983692", "slides": "https://nips.cc/virtual/2023/poster/73633", "video": "https://nips.cc/virtual/2023/poster/73633", "author_site": "Yushi Bai, Jiahao Ying, Yixin Cao, Xin Lv, Yuze He, Xiaozhi Wang, Jifan Yu, Kaisheng Zeng, Yijia Xiao, Haozhe Lyu, Jiayin Zhang, Juanzi Li, Lei Hou", "tldr": "", "abstract": "Numerous benchmarks have been established to assess the performance of foundation models on open-ended question answering, which serves as a comprehensive test of a model's ability to understand and generate language in a manner similar to humans.\nMost of these works focus on proposing new datasets, however, we see two main issues within previous benchmarking pipelines, namely testing leakage and evaluation automation. In this paper, we propose a novel benchmarking framework, Language-Model-as-an-Examiner, where the LM serves as a knowledgeable examiner that formulates questions based on its knowledge and evaluates responses in a reference-free manner. Our framework allows for effortless extensibility as various LMs can be adopted as the examiner, and the questions can be constantly updated given more diverse trigger topics. For a more comprehensive and equitable evaluation, we devise three strategies: (1) We instruct the LM examiner to generate questions across a multitude of domains to probe for a broad acquisition, and raise follow-up questions to engage in a more in-depth assessment. (2) Upon evaluation, the examiner combines both scoring and ranking measurements, providing a reliable result as it aligns closely with human annotations. (3) We additionally propose a decentralized Peer-examination method to address the biases in a single examiner. Our data and benchmarking results are available at: http://lmexam.xlore.cn.", "keywords": "Question answering;Automatic evaluation;Decentralized evaluation", "primary_area": "", "supplementary_material": "/attachment/5cb79e17739d001dee729517ed95d98e436a2643.zip", "author": "Yushi Bai;Jiahao Ying;Yixin Cao;Xin Lv;Yuze He;Xiaozhi Wang;Jifan Yu;Kaisheng Zeng;Yijia Xiao;Haozhe Lyu;Jiayin Zhang;Juanzi Li;Lei Hou", "authorids": "~Yushi_Bai1;~Jiahao_Ying1;~Yixin_Cao2;~Xin_Lv1;~Yuze_He1;~Xiaozhi_Wang1;~Jifan_Yu2;~Kaisheng_Zeng1;~Yijia_Xiao1;~Haozhe_Lyu1;~Jiayin_Zhang2;~Juanzi_Li1;~Lei_Hou2", "gender": "M;M;M;M;M;M;M;M;M;M;M;;M", "homepage": "https://bys0318.github.io/;;https://sites.google.com/view/yixin-homepage;https://davidlvxin.github.io;;https://bakser.github.io/;https://yujifan0326.github.io/;https://github.com/alpc43;https://yijia-xiao.com;https://github.com/HaozheLyu;https://github.com/zhangjiayin20;;https://www.cs.tsinghua.edu.cn/csen/info/1305/4466.htm", "dblp": "302/4421;303/6904;20/8038-2;;;03/2015;239/6130.html;199/8788.html;238/7281.html;;;;32/5685-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;JOEJg9UAAAAJ;https://scholar.google.co.uk/citations?user=CnhTvdoAAAAJ;rJzgbYQAAAAJ;bYeKwD8AAAAJ;DjpXXZkAAAAJ;https://scholar.google.com.tw/citations?hl=zh-CN;https://scholar.google.com/citations?view_op=list_works;xLwcZvYAAAAJ;;;;YnIq4hsAAAAJ", "orcid": ";;;;;0000-0002-5727-143X;0000-0003-3430-4048;0000-0002-8104-9652;;;;;0000-0002-8907-3526", "linkedin": ";jiahao-ying-89b456265/;;;;xiaozhiwang098/?locale=en_US;;https://cn.linkedin.com/in/%E5%BC%80%E8%83%9C-%E6%9B%BE-496566107;yijia-xiao/;;;;", "or_profile": "~Yushi_Bai1;~Jiahao_Ying1;~Yixin_Cao2;~Xin_Lv1;~Yuze_He1;~Xiaozhi_Wang1;~Jifan_Yu2;~Kaisheng_Zeng1;~Yijia_Xiao1;~Haozhe_Lyu1;~Jiayin_Zhang2;~Juanzi_Li1;~Lei_Hou2", "aff": "Tsinghua University;Singapore Management University;Singapore Management University;Tsinghua University;Tsinghua University;Department of Computer Science and Technology, Tsinghua University;;Tsinghua University;University of California, Los Angeles;Beijing University of Posts and Telecommunications;Tsinghua University;;Tsinghua University", "aff_domain": "tsinghua.edu.cn;smu.edu.sg;smu.edu.sg;tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;;tsinghua.edu.cn;ucla.edu;bupt.edu.cn;mail.tsinghua.edu.cn;;tsinghua.edu.cn", "position": "PhD student;PhD student;Assistant Professor;PhD student;PhD student;PhD student;;PhD student;PhD student;Undergrad student;Undergrad student;;Assistant Professor", "bibtex": "@inproceedings{\nbai2023benchmarking,\ntitle={Benchmarking Foundation Models with Language-Model-as-an-Examiner},\nauthor={Yushi Bai and Jiahao Ying and Yixin Cao and Xin Lv and Yuze He and Xiaozhi Wang and Jifan Yu and Kaisheng Zeng and Yijia Xiao and Haozhe Lyu and Jiayin Zhang and Juanzi Li and Lei Hou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=IiRHQ7gvnq}\n}", "github": "", "project": "", "reviewers": "JGap;zXrq;6wH3", "pdf_size": 7981086, "rating": "5;7;8", "confidence": "4;3;4", "wc_summary_and_contributions": "60;91;224", "wc_strengths": "49;79;110", "wc_improvement": "109;131;103", "wc_limitations": "6;45;81", "wc_correctness": "6;42;136", "wc_clarity": "59;12;55", "wc_relation_to_prior_work": "2;51;10", "wc_documentation": "2;13;47", "wc_additional_feedback": "1;1;1", "wc_review": "294;465;767", "wc_reply_reviewers": "0;15;66", "wc_reply_authors": "472;399;355", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 125.0, 71.13836283375284 ], "wc_strengths_avg": [ 79.33333333333333, 24.904261125803796 ], "wc_improvement_avg": [ 114.33333333333333, 12.036980056845191 ], "wc_limitations_avg": [ 44.0, 30.62678566222711 ], "wc_correctness_avg": [ 61.333333333333336, 54.80470377217228 ], "wc_clarity_avg": [ 42.0, 21.275964529643932 ], "wc_relation_to_prior_work_avg": [ 21.0, 21.463146709340332 ], "wc_documentation_avg": [ 20.666666666666668, 19.154343864744856 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 508.6666666666667, 195.5544823202873 ], "wc_reply_reviewers_avg": [ 27.0, 28.24889378365107 ], "wc_reply_authors_avg": [ 408.6666666666667, 48.25165512417395 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.18898223650461363, "gs_citation": 141, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2159235206091373030&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;smu.edu.sg;smu.edu.sg;tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;;tsinghua.edu.cn;ucla.edu;bupt.edu.cn;mail.tsinghua.edu.cn;;tsinghua.edu.cn", "author_num": 13, "aff_unique_index": "0;1;1;0;0;0;0;2;3;0;0", "aff_unique_norm": "Tsinghua University;Singapore Management University;University of California, Los Angeles;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.smu.edu.sg;https://www.ucla.edu;http://www.bupt.edu.cn/", "aff_unique_abbr": "THU;SMU;UCLA;BUPT", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Los Angeles;Beijing", "aff_country_unique_index": "0;1;1;0;0;0;0;2;0;0;0", "aff_country_unique": "China;Singapore;United States" }, { "title": "On the Adversarial Robustness of Out-of-distribution Generalization Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72104", "id": "IiwTFcGGTq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d9888cc7baa04c2e44e8115588133515-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IiwTFcGGTq", "openreview": "https://openreview.net/forum?id=IiwTFcGGTq", "poster": "/media/PosterPDFs/NeurIPS%202023/72104.png?t=1695784184.1592264", "slides": "https://nips.cc/virtual/2023/poster/72104", "video": "https://nips.cc/virtual/2023/poster/72104", "author_site": "Xin Zou, Weiwei Liu", "tldr": "", "abstract": "Out-of-distribution (OOD) generalization has attracted increasing research attention in recent years, due to its promising experimental results in real-world applications. Interestingly, we find that existing OOD generalization methods are vulnerable to adversarial attacks. This motivates us to study OOD adversarial robustness. We first present theoretical analyses of OOD adversarial robustness in two different complementary settings. Motivated by the theoretical results, we design two algorithms to improve the OOD adversarial robustness. Finally, we conduct experiments to validate the effectiveness of our proposed algorithms.", "keywords": "Adversarial Robustness;Out-of-distribution Generalization", "primary_area": "", "supplementary_material": "/attachment/58c7bc5be4d0c4bbd4e197f3cd5460f7c932283b.zip", "author": "Xin Zou;Weiwei Liu", "authorids": "~Xin_Zou3;~Weiwei_Liu1", "gender": "M;M", "homepage": "https://zouxinn.github.io/;https://sites.google.com/site/weiweiliuhomepage/", "dblp": "18/6081-2;54/6677-3.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";", "linkedin": ";weiwei-liu-4a7849134/", "or_profile": "~Xin_Zou3;~Weiwei_Liu1", "aff": "Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;whu.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nzou2023on,\ntitle={On the Adversarial Robustness of Out-of-distribution Generalization Models},\nauthor={Xin Zou and Weiwei Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IiwTFcGGTq}\n}", "github": "", "project": "", "reviewers": "rjrX;YSCG;sycY;sfFB", "pdf_size": 585593, "rating": "3;6;7;7", "confidence": "5;4;4;4", "soundness": "2;3;3;4", "novelty": "2;2;3;4", "presentation": "1;2;3;4", "wc_summary": "67;45;52;71", "wc_strengths": "38;49;208;223", "wc_weaknesses": "382;484;48;63", "wc_questions": "2;1;22;37", "wc_limitations": "2;1;1;1", "wc_review": "491;580;331;395", "wc_reply_reviewers": "30;566;41;57", "wc_reply_authors": "5;251;17;18", "reply_reviewers": "1;3;1;1", "reply_authors": "2;5;2;2", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 58.75, 10.638961415476606 ], "wc_strengths_avg": [ 129.5, 86.2510869496727 ], "wc_weaknesses_avg": [ 244.25, 192.23732077825053 ], "wc_questions_avg": [ 15.5, 14.974979131871937 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 449.25, 94.55785266174354 ], "wc_reply_reviewers_avg": [ 173.5, 226.81324917208872 ], "wc_reply_authors_avg": [ 72.75, 103.03973748025564 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9684959969581861, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=68624853909245452&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "whu.edu.cn;whu.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "StableFDG: Style and Attention Based Learning for Federated Domain Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72103", "id": "IjZa2fQ8tL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dae8bdacd265399b193e6b43d44a80f0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IjZa2fQ8tL", "openreview": "https://openreview.net/forum?id=IjZa2fQ8tL", "poster": "/media/PosterPDFs/NeurIPS%202023/72103.png?t=1701747959.0396547", "slides": "https://nips.cc/virtual/2023/poster/72103", "video": "https://nips.cc/virtual/2023/poster/72103", "author_site": "Jungwuk Park, Dong-Jun Han, Jinho Kim, Shiqiang Wang, Christopher Brinton, Jaekyun Moon", "tldr": "", "abstract": "Traditional federated learning (FL) algorithms operate under the assumption that the data distributions at training (source domains) and testing (target domain) are the same. The fact that domain shifts often occur in practice necessitates equipping FL methods with a domain generalization (DG) capability. However, existing DG algorithms face fundamental challenges in FL setups due to the lack of samples/domains in each client\u2019s local dataset. In this paper, we propose StableFDG, a style and attention based learning strategy for accomplishing federated domain generalization, introducing two key contributions. The first is style-based learning, which enables each client to explore novel styles beyond the original source domains in its local dataset, improving domain diversity based on the proposed style sharing, shifting, and exploration strategies. Our second contribution is an attention-based feature highlighter, which captures the similarities between the features of data samples in the same class, and emphasizes the important/common characteristics to better learn the domain-invariant characteristics of each class in data-poor FL scenarios. Experimental results show that StableFDG outperforms existing baselines on various DG benchmark datasets, demonstrating its efficacy.", "keywords": "Federated Learning;Domain Generalization", "primary_area": "", "supplementary_material": "", "author": "Jungwuk Park;Dong-Jun Han;Jinho Kim;Shiqiang Wang;Christopher Brinton;Jaekyun Moon", "authorids": "~Jungwuk_Park1;~Dong-Jun_Han1;~Jinho_Kim2;~Shiqiang_Wang1;~Christopher_Brinton1;~Jaekyun_Moon2", "gender": "M;M;M;M;;M", "homepage": ";https://sites.google.com/view/djhan930/home?authuser=0;;https://shiqiang.wang;https://www.cbrinton.net/;http://comstolab.kaist.ac.kr/people.html", "dblp": "307/4735;201/0078;;87/5094-1;;78/2744", "google_scholar": "ek4xQy0AAAAJ;https://scholar.google.co.kr/citations?user=-YR-GxUAAAAJ;sjYVSDgAAAAJ;kA_vmOcAAAAJ;vWmHA5MAAAAJ;", "orcid": ";;;;;", "linkedin": "jungwuk-park-458b25199;;;;;", "or_profile": "~Jungwuk_Park1;~Dong-Jun_Han1;~Jinho_Kim2;~Shiqiang_Wang1;~Christopher_Brinton1;~Jaekyun_Moon2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;KAIST;IBM, International Business Machines;Purdue University;KAIST", "aff_domain": "kaist.ac.kr;kaist.ac.kr;ee.kaist.ac.kr;us.ibm.com;purdue.edu;kaist.edu", "position": "PhD student;Postdoc;MS student;Research Staff Member;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\npark2023stablefdg,\ntitle={Stable{FDG}: Style and Attention Based Learning for Federated Domain Generalization},\nauthor={Jungwuk Park and Dong-Jun Han and Jinho Kim and Shiqiang Wang and Christopher Brinton and Jaekyun Moon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IjZa2fQ8tL}\n}", "github": "", "project": "", "reviewers": "wPUB;fAoj;hWZZ;i1eF", "pdf_size": 1003511, "rating": "5;5;6;6", "confidence": "3;4;3;4", "soundness": "2;3;4;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "85;39;59;111", "wc_strengths": "33;32;33;41", "wc_weaknesses": "89;195;11;25", "wc_questions": "34;3;33;107", "wc_limitations": "52;12;8;39", "wc_review": "293;281;144;323", "wc_reply_reviewers": "0;51;12;46", "wc_reply_authors": "73;102;28;93", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.5, 27.1062723368596 ], "wc_strengths_avg": [ 34.75, 3.6314597615834874 ], "wc_weaknesses_avg": [ 80.0, 72.61542535852834 ], "wc_questions_avg": [ 44.25, 38.310409812477864 ], "wc_limitations_avg": [ 27.75, 18.38987493160299 ], "wc_review_avg": [ 260.25, 68.83812533763539 ], "wc_reply_reviewers_avg": [ 27.25, 21.741377601246892 ], "wc_reply_authors_avg": [ 74.0, 28.556960622587273 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7999087842675515355&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "kaist.ac.kr;kaist.ac.kr;ee.kaist.ac.kr;us.ibm.com;purdue.edu;kaist.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;International Business Machines;Purdue University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.ibm.com;https://www.purdue.edu", "aff_unique_abbr": "KAIST;IBM;Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Perturbation Towards Easy Samples Improves Targeted Adversarial Transferability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72102", "id": "IkD1EWFF8c", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/028fcbcf85435d39a40c4d61b42c99a4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IkD1EWFF8c", "openreview": "https://openreview.net/forum?id=IkD1EWFF8c", "poster": "/media/PosterPDFs/NeurIPS%202023/72102.png?t=1698489674.7788167", "slides": "https://nips.cc/virtual/2023/poster/72102", "video": "https://nips.cc/virtual/2023/poster/72102", "author_site": "Junqi Gao, Biqing Qi, Yao Li, Zhichang Guo, Dong Li, Yuming Xing, Dazhi Zhang", "tldr": "", "abstract": "The transferability of adversarial perturbations provides an effective shortcut for black-box attacks. Targeted perturbations have greater practicality but are more difficult to transfer between models. In this paper, we experimentally and theoretically demonstrated that neural networks trained on the same dataset have more consistent performance in High-Sample-Density-Regions (HSDR) of each class instead of low sample density regions. Therefore, in the target setting, adding perturbations towards HSDR of the target class is more effective in improving transferability. However, density estimation is challenging in high-dimensional scenarios. Further theoretical and experimental verification demonstrates that easy samples with low loss are more likely to be located in HSDR. Perturbations towards such easy samples in the target class can avoid density estimation for HSDR location. Based on the above facts, we verified that adding perturbations to easy samples in the target class improves targeted adversarial transferability of existing attack methods. A generative targeted attack strategy named Easy Sample Matching Attack (ESMA) is proposed, which has a higher success rate for targeted attacks and outperforms the SOTA generative method. Moreover, ESMA requires only $5\\%$ of the storage space and much less computation time comparing to the current SOTA, as ESMA attacks all classes with only one model instead of seperate models for each class. Our code is available at https://github.com/gjq100/ESMA", "keywords": "Adversarial Attacks; Generative Attack; Transferable Targeted Attack", "primary_area": "", "supplementary_material": "", "author": "Junqi Gao;Biqing Qi;Yao Li;Zhichang Guo;Dong Li;Yuming Xing;Dazhi Zhang", "authorids": "~Junqi_Gao1;~Biqing_Qi1;~Yao_Li7;mathgzc@gmail.com;arvinlee826@gmail.com;xyuming@hit.edu.cn;~Dazhi_Zhang2", "gender": "M;M;M;;;;", "homepage": ";https://biqing-qi.github.io/;;;;;http://homepage.hit.edu.cn/zhangdazhi", "dblp": "81/9266.html;233/4949.html;;;;;", "google_scholar": ";;;;;;", "orcid": "0009-0007-1644-5812;0000-0002-4072-0577;0000-0002-1754-4528;;;;", "linkedin": ";;;;;;", "or_profile": "~Junqi_Gao1;~Biqing_Qi1;~Yao_Li7;mathgzc@gmail.com;arvinlee826@gmail.com;xyuming@hit.edu.cn;~Dazhi_Zhang2", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;;;;", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn;;;;", "position": "PhD student;PhD student;Assistant Professor;;;;", "bibtex": "@inproceedings{\ngao2023perturbation,\ntitle={Perturbation Towards Easy Samples Improves Targeted Adversarial Transferability},\nauthor={Junqi Gao and Biqing Qi and Yao Li and Zhichang Guo and Dong Li and Yuming Xing and Dazhi Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IkD1EWFF8c}\n}", "github": "", "project": "", "reviewers": "YLC1;YEMS;YJBk;tYob", "pdf_size": 4060251, "rating": "4;6;6;7", "confidence": "5;4;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "53;53;54;67", "wc_strengths": "28;54;28;143", "wc_weaknesses": "198;125;353;55", "wc_questions": "7;4;32;58", "wc_limitations": "1;15;6;11", "wc_review": "287;251;473;334", "wc_reply_reviewers": "96;192;169;14", "wc_reply_authors": "533;822;481;21", "reply_reviewers": "1;5;2;1", "reply_authors": "2;6;3;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 56.75, 5.931905258852336 ], "wc_strengths_avg": [ 63.25, 47.251322732808234 ], "wc_weaknesses_avg": [ 182.75, 110.53591045447628 ], "wc_questions_avg": [ 25.25, 21.810261346439663 ], "wc_limitations_avg": [ 8.25, 5.261891294962297 ], "wc_review_avg": [ 336.25, 84.25964336501787 ], "wc_reply_reviewers_avg": [ 117.75, 69.60019755719088 ], "wc_reply_authors_avg": [ 464.25, 286.9942290360557 ], "reply_reviewers_avg": [ 2.25, 1.6393596310755 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6914325062283550794&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "hit.edu.cn;hit.edu.cn;hit.edu.cn;;;;", "author_num": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Harbin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "IklhryC2up", "title": "Computational Complexity of Detecting Proximity to Losslessly Compressible Neural Network Parameters", "track": "main", "status": "Reject", "tldr": "", "abstract": "To better understand complexity in neural networks, we theoretically investigate the idealised phenomenon of lossless network compressibility, whereby an identical function can be implemented with a smaller network. We give an efficient formal algorithm for optimal lossless compression in the setting of single-hidden-layer hyperbolic tangent networks. To measure lossless compressibility, we define the rank of a parameter as the minimum number of hidden units required to implement the same function. Losslessly compressible parameters are atypical, but their existence has implications for nearby parameters. We define the proximate rank of a parameter as the rank of the most compressible parameter within a small $L^\\infty$ neighbourhood. Unfortunately, detecting nearby losslessly compressible parameters is not so easy: we show that bounding the proximate rank is an NP-complete problem, using a reduction from Boolean satisfiability via a novel abstract clustering problem involving covering points with small squares. These results underscore the computational complexity of measuring neural network complexity, laying a foundation for future theoretical and empirical work in this direction.\n", "keywords": "theory;neural network theory;structural redundancy;compressibility;lossless compressibility;computational complexity;NP-completeness", "primary_area": "", "supplementary_material": "/attachment/9eab8ffb907a464471edda7081cbb026d54b5b7b.zip", "author": "Matthew Farrugia-Roberts", "authorids": "~Matthew_Farrugia-Roberts1", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@misc{\nfarrugia-roberts2023computational,\ntitle={Computational Complexity of Detecting Proximity to Losslessly Compressible Neural Network Parameters},\nauthor={Matthew Farrugia-Roberts},\nyear={2023},\nurl={https://openreview.net/forum?id=IklhryC2up}\n}", "github": "", "project": "", "reviewers": "aLjx;t3vj;ubUt;Coti", "site": "https://openreview.net/forum?id=IklhryC2up", "pdf_size": 252791, "rating": "4;6;6;7", "confidence": "2;3;1;2", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "4;4;3;4", "wc_summary": "110;109;71;110", "wc_strengths": "44;76;158;99", "wc_weaknesses": "81;195;67;36", "wc_questions": "52;90;5;60", "wc_limitations": "46;10;7;6", "wc_review": "333;480;308;311", "wc_reply_reviewers": "37;232;0;30", "wc_reply_authors": "0;14;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 2.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 100.0, 16.748134224444225 ], "wc_strengths_avg": [ 94.25, 41.66758332325022 ], "wc_weaknesses_avg": [ 94.75, 60.12642929694063 ], "wc_questions_avg": [ 51.75, 30.48257699079919 ], "wc_limitations_avg": [ 17.25, 16.663958113245485 ], "wc_review_avg": [ 358.0, 71.09500685702196 ], "wc_reply_reviewers_avg": [ 74.75, 91.84599882411862 ], "wc_reply_authors_avg": [ 3.5, 6.06217782649107 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7o0RlY6ozYAJ:scholar.google.com/&scioq=Computational+Complexity+of+Detecting+Proximity+to+Losslessly+Compressible+Neural+Network+Parameters&hl=en&as_sdt=0,5", "gs_version_total": 0 }, { "title": "Improved Bayes Risk Can Yield Reduced Social Welfare Under Competition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72101", "id": "IltQ87ZdT6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d3602fc92fb8b9e0d55356c9e8815e2b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IltQ87ZdT6", "openreview": "https://openreview.net/forum?id=IltQ87ZdT6", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72101", "video": "https://nips.cc/virtual/2023/poster/72101", "author_site": "Meena Jagadeesan, Michael Jordan, Jacob Steinhardt, Nika Haghtalab", "tldr": "", "abstract": "As the scale of machine learning models increases, trends such as scaling laws anticipate consistent downstream improvements in predictive accuracy. However, these trends take the perspective of a single model-provider in isolation, while in reality providers often compete with each other for users. In this work, we demonstrate that competition can fundamentally alter the behavior of these scaling trends, even causing overall predictive accuracy across users to be non-monotonic or decreasing with scale. We define a model of competition for classification tasks, and use data representations as a lens for studying the impact of increases in scale. We find many settings where improving data representation quality (as measured by Bayes risk) decreases the overall predictive accuracy across users (i.e., social welfare) for a marketplace of competing model-providers. Our examples range from closed-form formulas in simple settings to simulations with pretrained representations on CIFAR-10. At a conceptual level, our work suggests that favorable scaling trends for individual model-providers need not translate to downstream improvements in social welfare in marketplaces with multiple model providers.", "keywords": "competition;equilibria;inverse scaling;digital marketplaces", "primary_area": "", "supplementary_material": "/attachment/c8ad8ccf55eb5dab126d4357504b3cae0c2b49f1.pdf", "author": "Meena Jagadeesan;Michael Jordan;Jacob Steinhardt;Nika Haghtalab", "authorids": "~Meena_Jagadeesan1;~Michael_Jordan1;~Jacob_Steinhardt1;~Nika_Haghtalab2", "gender": "F;M;;F", "homepage": "https://mjagadeesan.github.io;http://www.cs.berkeley.edu/~jordan/;;https://people.eecs.berkeley.edu/~nika/", "dblp": "205/2407;j/MichaelIJordan;35/10625;", "google_scholar": "XW62DrcAAAAJ;https://scholar.google.com.tw/citations?user=yxUduqMAAAAJ;;", "orcid": ";0000-0001-8935-817X;;", "linkedin": ";;;", "or_profile": "~Meena_Jagadeesan1;~Michael_Jordan1;~Jacob_Steinhardt1;~Nika_Haghtalab2", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "position": "PhD student;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\njagadeesan2023improved,\ntitle={Improved Bayes Risk Can Yield Reduced Social Welfare Under Competition},\nauthor={Meena Jagadeesan and Michael Jordan and Jacob Steinhardt and Nika Haghtalab},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IltQ87ZdT6}\n}", "github": "", "project": "", "reviewers": "4Cpq;tyS5;YGiJ;5U8U;Gqjx", "pdf_size": 1929313, "rating": "5;6;6;6;7", "confidence": "3;4;2;3;2", "soundness": "3;3;3;2;4", "novelty": "3;2;3;3;3", "presentation": "3;3;2;3;4", "wc_summary": "131;90;85;127;87", "wc_strengths": "89;70;70;89;133", "wc_weaknesses": "812;95;44;159;142", "wc_questions": "56;55;25;2;54", "wc_limitations": "151;35;1;8;51", "wc_review": "1239;345;225;385;467", "wc_reply_reviewers": "174;65;0;39;64", "wc_reply_authors": "618;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 104.0, 20.513410247932935 ], "wc_strengths_avg": [ 90.2, 23.025203582161875 ], "wc_weaknesses_avg": [ 250.4, 283.63681002295874 ], "wc_questions_avg": [ 38.4, 21.601851772475435 ], "wc_limitations_avg": [ 49.2, 54.01629383806334 ], "wc_review_avg": [ 532.2, 361.9118124626495 ], "wc_reply_reviewers_avg": [ 68.4, 57.83632076818165 ], "wc_reply_authors_avg": [ 123.6, 247.20000000000002 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16932595656030058060&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 9, "email": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Utilitarian Algorithm Configuration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72100", "id": "InB9Loet1u", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d98d9cef0c189f1db95f1d94652f7051-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=InB9Loet1u", "openreview": "https://openreview.net/forum?id=InB9Loet1u", "poster": "/media/PosterPDFs/NeurIPS%202023/72100.png?t=1702038857.3527193", "slides": "https://nips.cc/virtual/2023/poster/72100", "video": "https://nips.cc/virtual/2023/poster/72100", "author_site": "Devon Graham, Kevin Leyton-Brown, Tim Roughgarden", "tldr": "", "abstract": "We present the first nontrivial procedure for configuring heuristic algorithms to maximize the utility provided to their end users while also offering theoretical guarantees about performance. Existing procedures seek configurations that minimize expected runtime. However, very recent theoretical work argues that expected runtime minimization fails to capture algorithm designers' preferences. Here we show that the utilitarian objective also confers significant algorithmic benefits. Intuitively, this is because mean runtime is dominated by extremely long runs even when they are incredibly rare; indeed, even when an algorithm never gives rise to such long runs, configuration procedures that provably minimize mean runtime must perform a huge number of experiments to demonstrate this fact. In contrast, utility is bounded and monotonically decreasing in runtime, allowing for meaningful empirical bounds on a configuration's performance. This paper builds on this idea to describe effective and theoretically sound configuration procedures. We prove upper bounds on the runtime of these procedures that are similar to theoretical lower bounds, while also demonstrating their performance empirically.", "keywords": "algorithm configuration;algorithm selection;data-driven algorithm design;utility of runtime", "primary_area": "", "supplementary_material": "/attachment/4d3ce0820e1f87392442e62285224bc01531045b.pdf", "author": "Devon R. Graham;Kevin Leyton-Brown;Tim Roughgarden", "authorids": "~Devon_R._Graham1;~Kevin_Leyton-Brown1;~Tim_Roughgarden1", "gender": "M;Not Specified;", "homepage": ";http://cs.ubc.ca/~kevinlb;https://timroughgarden.org", "dblp": "217/3515;81/1149;r/TimRoughgarden", "google_scholar": ";_4dnp0IAAAAJ;0lcJYs8AAAAJ", "orcid": ";0000-0002-7644-5327;", "linkedin": ";kevinleytonbrown/;", "or_profile": "~Devon_R._Graham1;~Kevin_Leyton-Brown1;~Tim_Roughgarden1", "aff": ", University of British Columbia;University of British Columbia;a16z Crypto", "aff_domain": "cs.ubc.ca;ubc.ca;a16z.com", "position": "PhD student;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\ngraham2023utilitarian,\ntitle={Utilitarian Algorithm Configuration},\nauthor={Devon R. Graham and Kevin Leyton-Brown and Tim Roughgarden},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=InB9Loet1u}\n}", "github": "", "project": "", "reviewers": "Qzq9;Azei;3kwZ;Sepd", "pdf_size": 718039, "rating": "2;5;7;7", "confidence": "5;2;4;3", "soundness": "1;3;4;3", "novelty": "1;2;3;4", "presentation": "4;1;4;3", "wc_summary": "38;197;163;88", "wc_strengths": "18;46;18;202", "wc_weaknesses": "512;78;12;299", "wc_questions": "8;126;2;91", "wc_limitations": "5;15;11;8", "wc_review": "581;462;206;688", "wc_reply_reviewers": "2460;44;11;25", "wc_reply_authors": "2109;0;0;0", "reply_reviewers": "21;1;1;1", "reply_authors": "10;1;1;1", "rating_avg": [ 5.25, 2.0463381929681126 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 121.5, 62.28362545645525 ], "wc_strengths_avg": [ 71.0, 76.49182962905255 ], "wc_weaknesses_avg": [ 225.25, 196.73761079163282 ], "wc_questions_avg": [ 56.75, 53.25117369598533 ], "wc_limitations_avg": [ 9.75, 3.6996621467371855 ], "wc_review_avg": [ 484.25, 179.43853404439082 ], "wc_reply_reviewers_avg": [ 635.0, 1053.7293295718782 ], "wc_reply_authors_avg": [ 527.25, 913.2237882906905 ], "reply_reviewers_avg": [ 6.0, 8.660254037844387 ], "reply_authors_avg": [ 3.25, 3.897114317029974 ], "replies_avg": [ 51, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4917225282373378, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13526841251739769273&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "email": "cs.ubc.ca;ubc.ca;a16z.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of British Columbia;Andreessen Horowitz", "aff_unique_dep": ";Crypto", "aff_unique_url": "https://www.ubc.ca;https://a16z.com/", "aff_unique_abbr": "UBC;a16z", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Canada;United States" }, { "title": "SLM: A Smoothed First-Order Lagrangian Method for Structured Constrained Nonconvex Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72099", "id": "IobxuwPnWt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fe90657b12193c7b52a3418bdc351807-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IobxuwPnWt", "openreview": "https://openreview.net/forum?id=IobxuwPnWt", "poster": "/media/PosterPDFs/NeurIPS%202023/72099.png?t=1699128882.5647678", "slides": "https://nips.cc/virtual/2023/poster/72099", "video": "https://nips.cc/virtual/2023/poster/72099", "author_site": "Songtao Lu, Songtao Lu", "tldr": "", "abstract": "Functional constrained optimization (FCO) has emerged as a powerful tool for solving various machine learning problems. However, with the rapid increase in applications of neural networks in recent years, it has become apparent that both the objective and constraints often involve nonconvex functions, which poses significant challenges in obtaining high-quality solutions. In this work, we focus on a class of nonconvex FCO problems with nonconvex constraints, where the two optimization variables are nonlinearly coupled in the inequality constraint. Leveraging the primal-dual optimization framework, we propose a smoothed first-order Lagrangian method (SLM) for solving this class of problems. We establish the theoretical convergence guarantees of SLM to the Karush-Kuhn-Tucker (KKT) solutions through quantifying dual error bounds. By establishing connections between this structured FCO and equilibrium-constrained nonconvex problems (also known as bilevel optimization), we apply the proposed SLM to tackle bilevel optimization oriented problems where the lower-level problem is nonconvex. Numerical results obtained from both toy examples and hyper-data cleaning problems demonstrate the superiority of SLM compared to benchmark methods.", "keywords": "Functional constrained optimization;bilevel optimization;primal dual method;Lagrangian method", "primary_area": "", "supplementary_material": "/attachment/3e29e4b50b49913cd014fdb87bf91ee5241de516.pdf", "author": "Songtao Lu", "authorids": "~Songtao_Lu1", "gender": "M", "homepage": "https://songtaogithub.github.io/", "dblp": "05/2887", "google_scholar": "LRsjX7kAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Songtao_Lu1", "aff": "IBM Thomas J. Watson Research Center", "aff_domain": "ibm.com", "position": "Researcher", "bibtex": "@inproceedings{\nlu2023slm,\ntitle={{SLM}: A Smoothed First-Order Lagrangian Method for Structured Constrained Nonconvex Optimization},\nauthor={Songtao Lu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IobxuwPnWt}\n}", "github": "", "project": "", "reviewers": "VNHZ;Ggqj;aS1n;7siX", "pdf_size": 570430, "rating": "4;4;6;6", "confidence": "3;3;4;2", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "137;68;69;52", "wc_strengths": "30;32;79;16", "wc_weaknesses": "188;7;102;8", "wc_questions": "128;130;251;134", "wc_limitations": "1;18;14;3", "wc_review": "484;255;515;213", "wc_reply_reviewers": "82;0;20;0", "wc_reply_authors": "830;0;59;0", "reply_reviewers": "2;0;1;0", "reply_authors": "3;1;2;1", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.5, 32.745228660065884 ], "wc_strengths_avg": [ 39.25, 23.763154251908563 ], "wc_weaknesses_avg": [ 76.25, 75.17438060935388 ], "wc_questions_avg": [ 160.75, 52.15062319857741 ], "wc_limitations_avg": [ 9.0, 7.176350047203662 ], "wc_review_avg": [ 366.75, 134.02681634658043 ], "wc_reply_reviewers_avg": [ 25.5, 33.62662635472075 ], "wc_reply_authors_avg": [ 222.25, 351.71037445602883 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8456357548885710716&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ibm.com", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "IBM", "aff_unique_dep": "Research", "aff_unique_url": "https://www.ibm.com/research", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "0", "aff_campus_unique": "Yorktown Heights", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Skill-it! A data-driven skills framework for understanding and training language models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72098", "id": "IoizwO1NLf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/70b8505ac79e3e131756f793cd80eb8d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IoizwO1NLf", "openreview": "https://openreview.net/forum?id=IoizwO1NLf", "poster": "/media/PosterPDFs/NeurIPS%202023/72098.png?t=1702269868.2727485", "slides": "https://nips.cc/virtual/2023/poster/72098", "video": "https://nips.cc/virtual/2023/poster/72098", "author_site": "Mayee Chen, Nicholas Roberts, Kush Bhatia, Jue WANG, Ce Zhang, Ce Zhang, Frederic Sala, Christopher R\u00e9", "tldr": "", "abstract": "The quality of training data impacts the performance of pre-trained large language models (LMs). Given a fixed budget of tokens, we study how to best select data that leads to good downstream model performance across tasks. We develop a new framework based on a simple hypothesis: just as humans acquire interdependent skills in a deliberate order, language models also follow a natural order when learning a set of skills from their training data. If such an order exists, it can be utilized for improved understanding of LMs and for data-efficient training. Using this intuition, our framework formalizes the notion of a skill and of an ordered set of skills in terms of the associated data. First, using both synthetic and real data, we demonstrate that these ordered skill sets exist, and that their existence enables more advanced skills to be learned with less data when we train on their prerequisite skills. Second, using our proposed framework, we introduce an online data sampling algorithm, Skill-It, over mixtures of skills for both continual pre-training and fine-tuning regimes, where the objective is to efficiently learn multiple skills in the former and an individual skill in the latter. On the LEGO synthetic in the continual pre-training setting, Skill-It obtains 37.5 points higher accuracy than random sampling. On the Natural Instructions dataset in the fine-tuning setting, Skill-It reduces the validation loss on the target skill by 13.6% versus training on data associated with the target skill itself. \nWe apply our skills framework on the RedPajama dataset to continually pre-train a 3B-parameter LM, achieving higher accuracy on the LM Evaluation Harness with 1B tokens than the baseline approach of sampling uniformly over data sources with 3B tokens.", "keywords": "language models;data selection", "primary_area": "", "supplementary_material": "", "author": "Mayee F Chen;Nicholas Roberts;Kush Bhatia;Jue WANG;Ce Zhang;Frederic Sala;Christopher Re", "authorids": "~Mayee_F_Chen1;~Nicholas_Roberts2;~Kush_Bhatia3;~Jue_WANG1;~Ce_Zhang1;~Frederic_Sala1;~Christopher_Re1", "gender": ";;;M;;M;", "homepage": ";;;https://juewang.me/about/;;https://pages.cs.wisc.edu/~fredsala/;", "dblp": ";;;69/393-19;97/919;133/3602;", "google_scholar": ";;;PykI8xcAAAAJ;;9KhIkNkAAAAJ;", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Mayee_F_Chen1;~Nicholas_Roberts2;~Kush_Bhatia3;~Jue_WANG1;~Ce_Zhang1;~Frederic_Sala1;~Christopher_Re1", "aff": ";;;Zhejiang University;University of Chicago;University of Wisconsin, Madison;", "aff_domain": ";;;zju.edu.cn;uchicago.edu;wisc.edu;", "position": ";;;PhD student;Associate Professor;Assistant Professor;", "bibtex": "@inproceedings{\nchen2023skillit,\ntitle={Skill-it! A data-driven skills framework for understanding and training language models},\nauthor={Mayee F Chen and Nicholas Roberts and Kush Bhatia and Jue WANG and Ce Zhang and Frederic Sala and Christopher Re},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IoizwO1NLf}\n}", "github": "", "project": "", "reviewers": "j55A;XvRR;paFF;6a64", "pdf_size": 1956665, "rating": "4;7;7;8", "confidence": "4;3;4;5", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "324;119;110;78", "wc_strengths": "132;305;106;84", "wc_weaknesses": "436;128;200;42", "wc_questions": "2;69;149;1", "wc_limitations": "11;7;2;15", "wc_review": "905;628;567;220", "wc_reply_reviewers": "0;0;38;0", "wc_reply_authors": "0;0;27;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 157.75, 97.18635449485694 ], "wc_strengths_avg": [ 156.75, 87.26217680071933 ], "wc_weaknesses_avg": [ 201.5, 146.48805412046403 ], "wc_questions_avg": [ 55.25, 60.73868207328835 ], "wc_limitations_avg": [ 8.75, 4.815340071064556 ], "wc_review_avg": [ 580.0, 243.77140931618703 ], "wc_reply_reviewers_avg": [ 9.5, 16.454482671904334 ], "wc_reply_authors_avg": [ 6.75, 11.691342951089922 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.23570226039551584, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16273034720809068269&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": ";;;zju.edu.cn;uchicago.edu;wisc.edu;", "author_num": 7, "aff_unique_index": "0;1;2", "aff_unique_norm": "Zhejiang University;University of Chicago;University of Wisconsin", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.uchicago.edu;https://www.wisc.edu", "aff_unique_abbr": "ZJU;UChicago;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "Improving the Privacy and Practicality of Objective Perturbation for Differentially Private Linear Learners", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72097", "id": "IpUJd3KG3c", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2ceda49041816da6d5a34eb3b612607f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IpUJd3KG3c", "openreview": "https://openreview.net/forum?id=IpUJd3KG3c", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72097", "video": "https://nips.cc/virtual/2023/poster/72097", "author_site": "Rachel Redberg, Antti Koskela, Yu-Xiang Wang", "tldr": "", "abstract": "In the arena of privacy-preserving machine learning, differentially private stochastic gradient descent (DP-SGD) has outstripped the objective perturbation mechanism in popularity and interest. Though unrivaled in versatility, DP-SGD requires a non-trivial privacy overhead (for privately tuning the model\u2019s hyperparameters) and a computational complexity which might be extravagant for simple models such as linear and logistic regression. This paper revamps the objective perturbation mechanism with tighter privacy analyses and new computational tools that boost it to perform competitively with DP-SGD on unconstrained convex generalized linear problems.", "keywords": "differential privacy;empirical risk minimization;objective perturbation", "primary_area": "", "supplementary_material": "/attachment/4bc515846a126d7058e891682b127a1ceb4687e2.pdf", "author": "Rachel Emily Redberg;Antti Koskela;Yu-Xiang Wang", "authorids": "~Rachel_Emily_Redberg1;~Antti_Koskela1;~Yu-Xiang_Wang1", "gender": "F;M;", "homepage": ";;http://www.cs.ucsb.edu/~yuxiangw/publications.html", "dblp": "259/2266;124/9273;62/1637-3.html", "google_scholar": ";https://scholar.google.fi/citations?hl=fi;HGNZ1fkAAAAJ", "orcid": "0000-0001-5592-7186;;", "linkedin": "rachel-redberg-08026a45;;", "or_profile": "~Rachel_Emily_Redberg1;~Antti_Koskela1;~Yu-Xiang_Wang1", "aff": "UC Santa Barbara;Nokia Bell Labs;UC Santa Barbara", "aff_domain": "ucsb.edu;nokia-bell-labs.com;ucsb.edu", "position": "PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nredberg2023improving,\ntitle={Improving the Privacy and Practicality of Objective Perturbation for Differentially Private Linear Learners},\nauthor={Rachel Emily Redberg and Antti Koskela and Yu-Xiang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IpUJd3KG3c}\n}", "github": "", "project": "", "reviewers": "NU3P;RDzn;udpp;eK3K;MKLH", "pdf_size": 676027, "rating": "3;5;6;7;7", "confidence": "4;2;4;5;4", "soundness": "3;3;4;4;4", "novelty": "2;3;4;3;3", "presentation": "1;3;2;4;4", "wc_summary": "48;22;50;31;140", "wc_strengths": "19;77;73;126;41", "wc_weaknesses": "431;138;281;119;118", "wc_questions": "70;4;240;6;118", "wc_limitations": "1;1;44;1;6", "wc_review": "569;242;688;283;423", "wc_reply_reviewers": "373;18;210;9;87", "wc_reply_authors": "1589;0;970;0;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "4;1;3;1;1", "rating_avg": [ 5.6, 1.4966629547095764 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 1.16619037896906 ], "wc_summary_avg": [ 58.2, 42.220374228564104 ], "wc_strengths_avg": [ 67.2, 36.322995471188776 ], "wc_weaknesses_avg": [ 217.4, 122.91232647704622 ], "wc_questions_avg": [ 87.6, 87.30085910230208 ], "wc_limitations_avg": [ 10.6, 16.81190054693401 ], "wc_review_avg": [ 441.0, 168.6902486808292 ], "wc_reply_reviewers_avg": [ 139.4, 137.14313690447656 ], "wc_reply_authors_avg": [ 511.8, 656.6772114212583 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 1.2649110640673518 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3546040716334876, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15810625668639730616&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "ucsb.edu;nokia-bell-labs.com;ucsb.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Santa Barbara;Nokia Bell Labs", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsb.edu;https://www.nokialabs.com", "aff_unique_abbr": "UCSB;Nokia Bell Labs", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "GEO-Bench: Toward Foundation Models for Earth Monitoring", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73632", "id": "IptxZvA3at", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a0644215d9cff6646fa334dfa5d29c5a-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=IptxZvA3at", "openreview": "https://openreview.net/forum?id=IptxZvA3at", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73632", "video": "https://nips.cc/virtual/2023/poster/73632", "author_site": "Alexandre Lacoste, Nils Lehmann, Pau Rodriguez, Evan Sherwin, Hannah Kerner, Bj\u00f6rn L\u00fctjens, Jeremy Irvin, David Dao, Hamed Alemohammad, Alexandre Drouin, Mehmet Gunturkun, Gabriel Huang, David Vazquez, Dava Newman, Yoshua Bengio, Stefano Ermon, Xiaoxiang Zhu", "tldr": "", "abstract": "Recent progress in self-supervision has shown that pre-training large neural networks on vast amounts of unsupervised data can lead to substantial increases in generalization to downstream tasks. \nSuch models, recently coined foundation models, have been transformational to the field of natural language processing.\nVariants have also been proposed for image data, but their applicability to remote sensing tasks is limited.\nTo stimulate the development of foundation models for Earth monitoring, we propose a benchmark comprised of six classification and six segmentation tasks, which were carefully curated and adapted to be both relevant to the field and well-suited for model evaluation. We accompany this benchmark with a robust methodology for evaluating models and reporting aggregated results to enable a reliable assessment of progress. Finally, we report results for 20 baselines to gain information about the performance of existing models.\nWe believe that this benchmark will be a driver of progress across a variety of Earth monitoring tasks.", "keywords": "computer vision;earth monitoring;earth observation;dataset;benchmark", "primary_area": "", "supplementary_material": "/attachment/9b9f3c34c54de139ecd2067b036cc4416492759a.pdf", "author": "Alexandre Lacoste;Nils Lehmann;Pau Rodriguez;Evan David Sherwin;Hannah Kerner;Bj\u00f6rn L\u00fctjens;Jeremy Andrew Irvin;David Dao;Hamed Alemohammad;Alexandre Drouin;Mehmet Gunturkun;Gabriel Huang;David Vazquez;Dava Newman;Yoshua Bengio;Stefano Ermon;Xiao Xiang Zhu", "authorids": "~Alexandre_Lacoste1;~Nils_Lehmann1;~Pau_Rodriguez2;~Evan_David_Sherwin1;~Hannah_Kerner1;~Bj\u00f6rn_L\u00fctjens1;~Jeremy_Andrew_Irvin1;~David_Dao1;~Hamed_Alemohammad1;~Alexandre_Drouin2;~Mehmet_Gunturkun1;~Gabriel_Huang1;~David_Vazquez1;~Dava_Newman1;~Yoshua_Bengio1;~Stefano_Ermon1;~Xiao_Xiang_Zhu1", "gender": "M;;M;F;M;;M;M;M;M;M;M;F;M;M;;F", "homepage": ";https://nilsleh.info/;https://www.evansherwin.com/;https://hannah-rae.github.io/;https://blutjens.github.io/;https://jirvin16.github.io;https://daviddao.org;https://hamedalemo.github.io/;https://alexdrouin.com;;;http://www.david-vazquez.com;https://davanewman.com;http://yoshuabengio.org;http://cs.stanford.edu/~ermon/;https://prlz77.github.io;https://www.sipeo.bgu.tum.de/", "dblp": "59/6239.html;;;218/2646;;209/9633;;230/4508;117/3861;;;94/8653;;56/953;47/8135;190/7735;35/8954", "google_scholar": ";dWXUzLoAAAAJ;LPDyDHYAAAAJ;g5CD7dQAAAAJ;AayqHVcAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.ca/citations?user=XHeNA_8AAAAJ;ysq7m-YAAAAJ;https://scholar.google.ca/citations?user=LR6aJcEAAAAJ;;https://scholar.google.ca/citations?hl=en;1jHvtfsAAAAJ;;kukA0LcAAAAJ;;https://scholar.google.es/citations?user=IwBx73wAAAAJ;https://scholar.google.de/citations?user=CNakdIgAAAAJ", "orcid": ";;0000-0003-2180-4297;0000-0002-3259-7759;0000-0002-1616-4830;;;0000-0001-5662-3643;0000-0001-7718-0319;;;0000-0002-2845-8158;0000-0001-6190-348X;;;0000-0002-1689-8084;0000-0001-5530-3613", "linkedin": ";;evansherwin/;hannahkerner/;bjorn-lutjens/;;;hamedalemo/;drouinalexandre/;mehmetgunturkun/;;https://www.linkedin.com/company/david-vazquez/;;yoshuabengio/?originalSubdomain=ca;;;xiaoxiang-zhu-90b473228/", "or_profile": "~Alexandre_Lacoste1;~Nils_Lehmann1;~Evan_David_Sherwin1;~Hannah_Kerner1;~Bj\u00f6rn_L\u00fctjens1;~Jeremy_Andrew_Irvin1;~David_Dao1;~Hamed_Alemohammad1;~Alexandre_Drouin2;~Mehmet_Gunturkun1;~Gabriel_Huang1;~David_Vazquez1;~Dava_Newman1;~Yoshua_Bengio1;~Stefano_Ermon1;~Pau_Rodriguez_Lopez1;~Xiaoxiang_Zhu1", "aff": "ServiceNow;Technische Universit\u00e4t M\u00fcnchen;Stanford University;Arizona State University;Massachusetts Institute of Technology;Stanford University;ETHZ - ETH Zurich;Clark University;ServiceNow Research ;;ServiceNow Inc;ServiceNow research;Massachusetts Institute of Technology;University of Montreal;Stanford University;Apple;Technical University Munich", "aff_domain": "servicenow.com;tum.de;stanford.edu;asu.edu;mit.edu;stanford.edu;ethz.ch;clarku.edu;servicenow.com;;servicenow.com;servicenow.com;mit.edu;umontreal.ca;stanford.edu;apple.com;tum.de", "position": "Research Scientist;PhD student;Postdoc;Assistant Professor;PhD student;PhD student;PhD student;Associate Professor;Research Scientist;;Researcher;Researcher;Full Professor;Full Professor;Associate Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nlacoste2023geobench,\ntitle={{GEO}-Bench: Toward Foundation Models for Earth Monitoring},\nauthor={Alexandre Lacoste and Nils Lehmann and Pau Rodriguez and Evan David Sherwin and Hannah Kerner and Bj{\\\"o}rn L{\\\"u}tjens and Jeremy Andrew Irvin and David Dao and Hamed Alemohammad and Alexandre Drouin and Mehmet Gunturkun and Gabriel Huang and David Vazquez and Dava Newman and Yoshua Bengio and Stefano Ermon and Xiao Xiang Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=IptxZvA3at}\n}", "github": "", "project": "", "reviewers": "xuaP;2NRk;4soD;HGjk;PQwP", "pdf_size": 6348938, "rating": "7;7;8;9;10", "confidence": "4;3;4;3;3", "wc_summary_and_contributions": "61;165;43;60;110", "wc_strengths": "157;68;74;22;123", "wc_improvement": "58;68;154;129;37", "wc_limitations": "14;14;17;8;32", "wc_correctness": "22;27;14;1;33", "wc_clarity": "4;7;24;1;16", "wc_relation_to_prior_work": "12;8;20;10;32", "wc_documentation": "29;16;14;23;51", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "358;374;361;255;435", "wc_reply_reviewers": "88;0;0;0;31", "wc_reply_authors": "377;329;266;226;113", "reply_reviewers": "1;0;0;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 8.2, 1.16619037896906 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 87.8, 44.61120935370392 ], "wc_strengths_avg": [ 88.8, 46.76494413553811 ], "wc_improvement_avg": [ 89.2, 44.56635502259524 ], "wc_limitations_avg": [ 17.0, 8.049844718999243 ], "wc_correctness_avg": [ 19.4, 11.11035552986492 ], "wc_clarity_avg": [ 10.4, 8.452218643646177 ], "wc_relation_to_prior_work_avg": [ 16.4, 8.8 ], "wc_documentation_avg": [ 26.6, 13.305637902783918 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 356.6, 57.953774682931574 ], "wc_reply_reviewers_avg": [ 23.8, 34.271854341427165 ], "wc_reply_authors_avg": [ 262.2, 90.803964671153 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 17, 0 ], "corr_rating_confidence": -0.4900980294098034, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5787031216169949864&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "servicenow.com;tum.de;stanford.edu;asu.edu;mit.edu;stanford.edu;ethz.ch;clarku.edu;servicenow.com;;servicenow.com;servicenow.com;mit.edu;umontreal.ca;stanford.edu;apple.com;tum.de", "author_num": 17, "aff_unique_index": "0;1;2;3;4;2;5;6;0;0;0;4;7;2;8;9", "aff_unique_norm": "ServiceNow;Technische Universit\u00e4t M\u00fcnchen;Stanford University;Arizona State University;Massachusetts Institute of Technology;ETH Zurich;Clark University;University of Montreal;Apple;Technical University of Munich", "aff_unique_dep": ";;;;;;;;Apple Inc.;", "aff_unique_url": "https://www.servicenow.com;https://www.tum.de;https://www.stanford.edu;https://www.asu.edu;https://web.mit.edu;https://www.ethz.ch;https://www.clarku.edu;https://wwwumontreal.ca;https://www.apple.com;https://www.tum.de", "aff_unique_abbr": "ServiceNow;TUM;Stanford;ASU;MIT;ETHZ;Clark U;UM;Apple;TUM", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;0;0;0;0;2;0;0;0;0;0;3;0;0;1", "aff_country_unique": "United States;Germany;Switzerland;Canada" }, { "title": "Emergent and Predictable Memorization in Large Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72096", "id": "Iq0DvhB4Kf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/59404fb89d6194641c69ae99ecdf8f6d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Iq0DvhB4Kf", "openreview": "https://openreview.net/forum?id=Iq0DvhB4Kf", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72096", "video": "https://nips.cc/virtual/2023/poster/72096", "author_site": "Stella Biderman, USVSN PRASHANTH, Lintang Sutawika, Lintang Sutawika, Hailey Schoelkopf, Quentin Anthony, Shivanshu Purohit, Edward Raff", "tldr": "", "abstract": "Memorization, or the tendency of large language models (LLMs) to output entire sequences from their training data verbatim, is a key concern for deploying language models. In particular, it is vital to minimize a model's memorization of sensitive datapoints such as those containing personal identifiable information (PII). The prevalence of such undesirable memorization can pose issues for model trainers, and may even require discarding an otherwise functional model. We therefore seek to predict which sequences will be memorized before a large model's full train-time by extrapolating the memorization behavior of lower-compute trial runs. We measure memorization in the Pythia model suite and plot scaling laws for forecasting memorization, allowing us to provide equi-compute recommendations to maximize the reliability (recall) of such predictions. We additionally provide further novel discoveries on the distribution of memorization scores across models and data. We release all code and data necessary to reproduce the results in this paper at https://github.com/EleutherAI/pythia.", "keywords": "large language model;emergent properties;memorization", "primary_area": "", "supplementary_material": "", "author": "Stella Biderman;USVSN Sai Prashanth;Lintang Sutawika;Hailey Schoelkopf;Quentin Gregory Anthony;Shivanshu Purohit;Edward Raff", "authorids": "~Stella_Biderman1;~USVSN_Sai_Prashanth1;~Lintang_Sutawika1;~Hailey_Schoelkopf1;~Quentin_Gregory_Anthony1;~Shivanshu_Purohit1;~Edward_Raff1", "gender": "F;M;M;F;M;M;M", "homepage": "http://www.stellabiderman.com;;https://lintang.sutawika.com;;https://quentin-anthony.github.io/;;http://www.edwardraff.com/", "dblp": "239/5641;;304/3270.html;;;318/2975;204/3369", "google_scholar": "bO7H0DAAAAAJ;hh_x9HgAAAAJ;pVgdC6wAAAAJ;XLahYIYAAAAJ;https://scholar.google.com/citations?hl=en;PbFnD-0AAAAJ;debM2bUAAAAJ", "orcid": "0000-0001-8228-1042;;;;0000-0002-6823-9080;;0000-0002-9900-1972", "linkedin": "stellabiderman;usvsnsp;;;quentin-anthony;https://linkedin.com/in/shivanshu-purohit;edward-raff-09992040/", "or_profile": "~Stella_Biderman1;~USVSN_Sai_Prashanth1;~Lintang_Sutawika1;~Hailey_Schoelkopf1;~Quentin_Gregory_Anthony1;~Shivanshu_Purohit1;~Edward_Raff1", "aff": "Booz Allen Hamilton;Matrusri Engineering College;EleutherAI;Yale University;Ohio State University, Columbus;;Syracuse University", "aff_domain": "boozallen.com;matrusri.edu.in;eleuther.ai;yale.edu;osu.edu;;syr.edu", "position": "Industry researcher;Undergrad student;Researcher;Undergrad student;PhD student;;MBA student", "bibtex": "@inproceedings{\nbiderman2023emergent,\ntitle={Emergent and Predictable Memorization in Large Language Models},\nauthor={Stella Biderman and USVSN Sai Prashanth and Lintang Sutawika and Hailey Schoelkopf and Quentin Gregory Anthony and Shivanshu Purohit and Edward Raff},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Iq0DvhB4Kf}\n}", "github": "", "project": "", "reviewers": "S833;5bNj;kL1W;c2gw;bdyA", "pdf_size": 1191233, "rating": "5;6;6;6;8", "confidence": "4;3;4;4;4", "soundness": "3;2;3;3;3", "novelty": "3;3;3;2;3", "presentation": "3;3;3;3;4", "wc_summary": "48;60;90;64;85", "wc_strengths": "52;19;50;37;98", "wc_weaknesses": "35;33;80;66;133", "wc_questions": "13;68;2;23;110", "wc_limitations": "1;10;1;1;15", "wc_review": "149;190;223;191;441", "wc_reply_reviewers": "47;9;0;53;31", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 69.4, 15.768322675541619 ], "wc_strengths_avg": [ 51.2, 26.19465594353169 ], "wc_weaknesses_avg": [ 69.4, 36.543672502910816 ], "wc_questions_avg": [ 43.2, 40.23630201695976 ], "wc_limitations_avg": [ 5.6, 5.851495535331117 ], "wc_review_avg": [ 238.8, 103.79287066075396 ], "wc_reply_reviewers_avg": [ 28.0, 20.688160865577203 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.10206207261596578, "gs_citation": 175, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1285792547068778435&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "boozallen.com;matrusri.edu.in;eleuther.ai;yale.edu;osu.edu;;syr.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "Booz Allen Hamilton;Matrusri Engineering College;EleutherAI;Yale University;Ohio State University;Syracuse University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.boozallen.com;https://www.matrusri.ac.in;https://www.eleuther.ai;https://www.yale.edu;https://www.osu.edu;https://www.syracuse.edu", "aff_unique_abbr": "BAH;;EleutherAI;Yale;OSU;Syracuse", "aff_campus_unique_index": "1", "aff_campus_unique": ";Columbus", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;India" }, { "title": "Debiasing Pretrained Generative Models by Uniformly Sampling Semantic Attributes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72095", "id": "Iq7v0sZw2H", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8d7060b2ee6ff728692398783e3d59d1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Iq7v0sZw2H", "openreview": "https://openreview.net/forum?id=Iq7v0sZw2H", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72095", "video": "https://nips.cc/virtual/2023/poster/72095", "author_site": "Walter Gerych, Walter Gerych, Kevin Hickey, Luke Buquicchio, Kavin Chandrasekaran, Abdulaziz Alajaji, Elke A. Rundensteiner, Emmanuel Agu", "tldr": "", "abstract": "Generative models are being increasingly used in science and industry applications. Unfortunately, they often perpetuate the biases present in their training sets, such as societal biases causing certain groups to be underrepresented in the data. For instance, image generators may overwhelmingly produce images of white people due to few non-white samples in their training data. It is imperative to debias generative models so they synthesize an equal number of instances for each group, while not requiring retraining of the model to avoid prohibitive expense. We thus propose a *distribution mapping module* that produces samples from a *fair noise distribution*, such that the pretrained generative model produces *semantically uniform* outputs - an equal number of instances for each group - when conditioned on these samples. This does *not* involve retraining the generator, nor does it require *any* real training data. Experiments on debiasing generators trained on popular real-world datasets show that our method outperforms existing approaches.", "keywords": "generative models;generative modeling;bias;GANs;debiasing", "primary_area": "", "supplementary_material": "/attachment/b99d0ed3ec35999c064c32160b4835918c82ab2d.zip", "author": "Walter Gerych;Kevin Hickey;Luke Buquicchio;Kavin Chandrasekaran;Abdulaziz Alajaji;Elke Rundensteiner;Emmanuel Agu", "authorids": "~Walter_Gerych2;~Kevin_Hickey1;~Luke_Buquicchio1;~Kavin_Chandrasekaran1;~Abdulaziz_Alajaji1;~Elke_Rundensteiner2;~Emmanuel_Agu1", "gender": "M;M;;M;M;F;M", "homepage": "https://waltergerych.github.io/;;;https://kavincsekaran.github.io/;;https://www.wpi.edu/people/faculty/rundenst;https://www.wpi.edu/people/faculty/emmanuel", "dblp": "237/9060;263/2975;248/3289;248/3538.html;74/10202;r/EARundensteiner;23/3629", "google_scholar": "https://scholar.google.com/citations?hl=en;;D3BKoHMAAAAJ;wziaKmQAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=Ke6ex0wAAAAJ", "orcid": ";;0000-0002-9639-8660;;0000-0001-6725-4054;0000-0001-5375-9254;", "linkedin": "walter-gerych-84165112b/;kevin-hickey-0a7968b0/;luke-buquicchio-a55774141/;;;elke-rundensteiner-4a2825/;", "or_profile": "~Walter_Gerych2;~Kevin_Hickey1;~Luke_Buquicchio1;~Kavin_Chandrasekaran1;~Abdulaziz_Alajaji1;~Elke_Rundensteiner2;~Emmanuel_Agu1", "aff": "Worcester Polytechnic Institute;Worcester Polytechnic Institute;Worcester Polytechnic Institute;Worcester Polytechnic Institute;King Saud University;Worcester Polytechnic Institute;Worcester Polytechnic Institute", "aff_domain": "wpi.edu;wpi.edu;wpi.edu;wpi.edu;ksu.edu.sa;wpi.edu;wpi.edu", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ngerych2023debiasing,\ntitle={Debiasing Pretrained Generative Models by Uniformly Sampling Semantic Attributes},\nauthor={Walter Gerych and Kevin Hickey and Luke Buquicchio and Kavin Chandrasekaran and Abdulaziz Alajaji and Elke Rundensteiner and Emmanuel Agu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Iq7v0sZw2H}\n}", "github": "", "project": "", "reviewers": "CzLf;K9u1;h8Jf;yDHD", "pdf_size": 1427993, "rating": "5;6;7;7", "confidence": "4;3;2;4", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;4;4;3", "wc_summary": "43;75;102;154", "wc_strengths": "28;57;105;84", "wc_weaknesses": "84;96;88;175", "wc_questions": "14;115;6;55", "wc_limitations": "93;6;8;6", "wc_review": "262;349;309;474", "wc_reply_reviewers": "112;51;0;271", "wc_reply_authors": "643;17;0;292", "reply_reviewers": "2;2;0;2", "reply_authors": "3;2;1;3", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 93.5, 40.697051490249265 ], "wc_strengths_avg": [ 68.5, 28.91798748184251 ], "wc_weaknesses_avg": [ 110.75, 37.34551512564795 ], "wc_questions_avg": [ 47.5, 43.176961449365564 ], "wc_limitations_avg": [ 28.25, 37.392345473371954 ], "wc_review_avg": [ 348.5, 78.72896544474594 ], "wc_reply_reviewers_avg": [ 108.5, 101.85406226557681 ], "wc_reply_authors_avg": [ 238.0, 260.9722207438945 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4545454545454545, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11795406129361029223&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "wpi.edu;wpi.edu;wpi.edu;wpi.edu;ksu.edu.sa;wpi.edu;wpi.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Worcester Polytechnic Institute;King Saud University", "aff_unique_dep": ";", "aff_unique_url": "https://www.wpi.edu;https://www.ksu.edu.sa", "aff_unique_abbr": "WPI;KSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "United States;Saudi Arabia" }, { "title": "Why Did This Model Forecast This Future? Information-Theoretic Saliency for Counterfactual Explanations of Probabilistic Regression Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72094", "id": "IrEYkhuxup", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/694ec0018b9fd0ebe863ec29fa5a89b9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IrEYkhuxup", "openreview": "https://openreview.net/forum?id=IrEYkhuxup", "poster": "/media/PosterPDFs/NeurIPS%202023/72094.png?t=1702357131.0036604", "slides": "https://nips.cc/virtual/2023/poster/72094", "video": "https://nips.cc/virtual/2023/poster/72094", "author_site": "Chirag Raman, Alec Nonnemaker, Amelia Villegas-Morcillo, Hayley Hung, Marco Loog", "tldr": "", "abstract": "We propose a post hoc saliency-based explanation framework for counterfactual reasoning in probabilistic multivariate time-series forecasting (regression) settings. Building upon Miller's framework of explanations derived from research in multiple social science disciplines, we establish a conceptual link between counterfactual reasoning and saliency-based explanation techniques. To address the lack of a principled notion of saliency, we leverage a unifying definition of information-theoretic saliency grounded in preattentive human visual cognition and extend it to forecasting settings. Specifically, we obtain a closed-form expression for commonly used density functions to identify which observed timesteps appear salient to an underlying model in making its probabilistic forecasts. We empirically validate our framework in a principled manner using synthetic data to establish ground-truth saliency that is unavailable for real-world data. Finally, using real-world data and forecasting models, we demonstrate how our framework can assist domain experts in forming new data-driven hypotheses about the causal relationships between features in the wild.", "keywords": "Probabilistic Forecasting;Saliency;Explainability;XAI;Probabilistic Regression", "primary_area": "", "supplementary_material": "/attachment/4dbccd8023f620b08b9728fce593b2eb2f7641b4.zip", "author": "Chirag Raman;Alec Nonnemaker;Amelia Villegas-Morcillo;Hayley Hung;Marco Loog", "authorids": "~Chirag_Raman2;~Alec_Nonnemaker1;~Amelia_Villegas-Morcillo1;~Hayley_Hung2;~Marco_Loog1", "gender": "M;M;;F;", "homepage": "http://chiragraman.com;;https://amelvim.github.io/;http://homepage.tudelft.nl/3e2t5/;", "dblp": "195/8280;;;13/4646.html;", "google_scholar": "TeoDF6MAAAAJ;;6rBKoy0AAAAJ;ka-LsrYAAAAJ;", "orcid": ";;0000-0002-3286-049X;0000-0001-9574-5395;", "linkedin": ";alec-nonnemaker-3b4b2a1ab/;amelvim;hayley-hung-2b89591/;", "or_profile": "~Chirag_Raman2;~Alec_Nonnemaker1;~Amelia_Villegas-Morcillo1;~Hayley_Hung2;~Marco_Loog1", "aff": "Delft University of Technology;Delft University of Technology;Delft University of Technology;Delft University of Technology;", "aff_domain": "tudelft.nl;tudelft.nl;tudelft.nl;tudelft.nl;", "position": "Assistant Professor;MS student;Postdoc;Associate Professor;", "bibtex": "@inproceedings{\nraman2023why,\ntitle={Why Did This Model Forecast This Future? Information-Theoretic Saliency for Counterfactual Explanations of Probabilistic Regression Models},\nauthor={Chirag Raman and Alec Nonnemaker and Amelia Villegas-Morcillo and Hayley Hung and Marco Loog},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IrEYkhuxup}\n}", "github": "", "project": "", "reviewers": "f9yh;N3J3;L4MH;82xZ;VPQB", "pdf_size": 22830666, "rating": "5;6;6;7;7", "confidence": "3;3;2;3;4", "soundness": "2;3;2;3;3", "novelty": "3;2;2;3;3", "presentation": "2;3;2;4;4", "wc_summary": "63;70;61;54;93", "wc_strengths": "38;56;30;65;53", "wc_weaknesses": "161;105;101;84;36", "wc_questions": "45;3;443;26;84", "wc_limitations": "52;5;16;8;4", "wc_review": "359;239;651;237;270", "wc_reply_reviewers": "63;3;23;11;5", "wc_reply_authors": "98;44;10;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;2;2;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 68.2, 13.407460609675494 ], "wc_strengths_avg": [ 48.4, 12.658593918757328 ], "wc_weaknesses_avg": [ 97.4, 40.16266923400386 ], "wc_questions_avg": [ 120.2, 163.5694347975807 ], "wc_limitations_avg": [ 17.0, 18.0 ], "wc_review_avg": [ 351.2, 156.31046030256582 ], "wc_reply_reviewers_avg": [ 21.0, 22.126906697502932 ], "wc_reply_authors_avg": [ 30.4, 37.467852887508776 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.42257712736425823, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7805850579884562823&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "tudelft.nl;tudelft.nl;tudelft.nl;tudelft.nl;", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Delft University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.tudelft.nl", "aff_unique_abbr": "TU Delft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Netherlands" }, { "title": "Model-free Posterior Sampling via Learning Rate Randomization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72093", "id": "IrjXmIKFyx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e985dfca10e1167c0836a70880ef0858-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IrjXmIKFyx", "openreview": "https://openreview.net/forum?id=IrjXmIKFyx", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72093", "video": "https://nips.cc/virtual/2023/poster/72093", "author_site": "Daniil Tiapkin, Denis Belomestny, Daniele Calandriello, Eric Moulines, Remi Munos, Alexey Naumov, Pierre Perrault, Michal Valko, Pierre M\u00e9nard", "tldr": "", "abstract": "In this paper, we introduce Randomized Q-learning (RandQL), a novel randomized model-free algorithm for regret minimization in episodic Markov Decision Processes (MDPs). To the best of our knowledge, RandQL is the first tractable model-free posterior sampling-based algorithm. We analyze the performance of RandQL in both tabular and non-tabular metric space settings. In tabular MDPs, RandQL achieves a regret bound of order $\\widetilde{\\mathcal{O}}(\\sqrt{H^{5}SAT})$, where $H$ is the planning horizon, $S$ is the number of states, $A$ is the number of actions, and $T$ is the number of episodes. For a metric state-action space, RandQL enjoys a regret bound of order $\\widetilde{\\mathcal{O}}(H^{5/2} T^{(d_z+1)/(d_z+2)})$, where $d_z$ denotes the zooming dimension. Notably, RandQL achieves optimistic exploration without using bonuses, relying instead on a novel idea of learning rate randomization. Our empirical study shows that RandQL outperforms existing approaches on baseline exploration environments.", "keywords": "reinforcement learning;exploration;q-learning", "primary_area": "", "supplementary_material": "/attachment/2f6c624f0ba350a339babebef0332fa43234c912.zip", "author": "Daniil Tiapkin;Denis Belomestny;Daniele Calandriello;Eric Moulines;Remi Munos;Alexey Naumov;pierre perrault;Michal Valko;Pierre MENARD", "authorids": "~Daniil_Tiapkin1;~Denis_Belomestny1;~Daniele_Calandriello1;~Eric_Moulines1;~Remi_Munos1;~Alexey_Naumov1;~pierre_perrault2;~Michal_Valko1;~Pierre_MENARD1", "gender": "M;M;M;M;M;M;M;M;Not Specified", "homepage": "https://d-tiapkin.github.io/;https://denbel.github.io;;;http://researchers.lille.inria.fr/~munos/;https://www.hse.ru/en/staff/anaumov;;https://misovalko.github.io/research.html;https://menardprr.github.io/", "dblp": "267/5445;;129/1542;54/2358;69/6815;196/2848;222/3254;03/5455;176/5039", "google_scholar": "https://scholar.google.ru/citations?user=AB23PXQAAAAJ;https://scholar.google.de/citations?user=WFjIBlcAAAAJ;;https://scholar.google.fr/citations?user=_XE1LvQAAAAJ;https://scholar.google.com/citations?hl=en;5723KoYAAAAJ;https://scholar.google.fr/citations?user=KIIpLJsAAAAJ;jrazNCQAAAAJ;KXimUncAAAAJ", "orcid": "0000-0002-8832-7926;0000-0002-9482-6430;;0000-0002-2058-0693;;;;;", "linkedin": "daniil-tiapkin-049714240/;;;;;;;michalvalko/;", "or_profile": "~Daniil_Tiapkin1;~Denis_Belomestny1;~Daniele_Calandriello1;~Eric_Moulines1;~Remi_Munos1;~Alexey_Naumov1;~pierre_perrault2;~Michal_Valko1;~Pierre_MENARD1", "aff": "HSE University;Duisburg-Essen University;Google DeepMind;Ecole polytechnique;Google DeepMind;Higher School of Economics;IDEMIA;Google DeepMind;Ecole Normale Sup\u00e9rieure de Lyon", "aff_domain": "hse.ru;uni-due.de;deepmind.com;polytechnique.edu;google.com;hse.ru;idemia.com;deepmind.com;ens-lyon.fr", "position": "MS student;Full Professor;Researcher;Full Professor;Research scientist;Full Professor;Researcher;Senior Staff Research Scientist;Postdoc", "bibtex": "@inproceedings{\ntiapkin2023modelfree,\ntitle={Model-free Posterior Sampling via Learning Rate Randomization},\nauthor={Daniil Tiapkin and Denis Belomestny and Daniele Calandriello and Eric Moulines and Remi Munos and Alexey Naumov and pierre perrault and Michal Valko and Pierre MENARD},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IrjXmIKFyx}\n}", "github": "", "project": "", "reviewers": "xW74;eHBt;XC7g;4Am8;axbs;PYAU", "pdf_size": 10648100, "rating": "5;5;6;6;6;6", "confidence": "2;3;3;4;4;3", "soundness": "3;3;3;4;4;3", "novelty": "2;2;4;2;3;3", "presentation": "3;3;3;3;4;3", "wc_summary": "45;50;45;83;222;126", "wc_strengths": "34;19;49;220;57;54", "wc_weaknesses": "82;40;199;321;31;53", "wc_questions": "66;115;42;23;79;25", "wc_limitations": "1;100;6;1;1;1", "wc_review": "228;324;341;648;390;259", "wc_reply_reviewers": "74;72;16;98;10;18", "wc_reply_authors": "263;0;0;0;0;0", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "2;1;1;1;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.1666666666666665, 0.6871842709362768 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.7453559924999298 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 95.16666666666667, 63.58568147695587 ], "wc_strengths_avg": [ 72.16666666666667, 67.37313180258795 ], "wc_weaknesses_avg": [ 121.0, 105.58566821938162 ], "wc_questions_avg": [ 58.333333333333336, 32.4636548911069 ], "wc_limitations_avg": [ 18.333333333333332, 36.56804919902376 ], "wc_review_avg": [ 365.0, 137.22001797599842 ], "wc_reply_reviewers_avg": [ 48.0, 34.448028487370166 ], "wc_reply_authors_avg": [ 43.833333333333336, 98.01431301374078 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.6859943405700354, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6556097373797972288&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "hse.ru;uni-due.de;deepmind.com;polytechnique.edu;google.com;hse.ru;idemia.com;deepmind.com;ens-lyon.fr", "author_num": 9, "aff_unique_index": "0;1;2;3;2;0;4;2;5", "aff_unique_norm": "Higher School of Economics;University of Duisburg-Essen;Google;Ecole Polytechnique;IDEMIA;Ecole Normale Sup\u00e9rieure de Lyon", "aff_unique_dep": ";;Google DeepMind;;;", "aff_unique_url": "https://hse.ru;https://www.uni-due.de;https://deepmind.com;https://www.polytechnique.edu;https://www.idemia.com;https://www.ens-lyon.fr", "aff_unique_abbr": "HSE;UDE;DeepMind;X;IDEMIA;ENS de Lyon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;3;2;0;3;2;3", "aff_country_unique": "Russian Federation;Germany;United Kingdom;France" }, { "title": "Hidden Poison: Machine Unlearning Enables Camouflaged Poisoning Attacks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72092", "id": "Isy7gl1Hqc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8b4add8b0aa8749d80a34ca5d941c355-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Isy7gl1Hqc", "openreview": "https://openreview.net/forum?id=Isy7gl1Hqc", "poster": "/media/PosterPDFs/NeurIPS%202023/72092.png?t=1702002831.7744172", "slides": "https://nips.cc/virtual/2023/poster/72092", "video": "https://nips.cc/virtual/2023/poster/72092", "author_site": "Jimmy Di, Jack Douglas, Jayadev Acharya, Gautam Kamath, Ayush Sekhari", "tldr": "", "abstract": "We introduce camouflaged data poisoning attacks, a new attack vector that arises in the context of machine unlearning and other settings when model retraining may be induced. An adversary first adds a few carefully crafted points to the training dataset such that the impact on the model's predictions is minimal. The adversary subsequently triggers a request to remove a subset of the introduced points at which point the attack is unleashed and the model's predictions are negatively affected. In particular, we consider clean-label targeted attacks (in which the goal is to cause the model to misclassify a specific test point) on datasets including CIFAR-10, Imagenette, and Imagewoof. This attack is realized by constructing camouflage datapoints that mask the effect of a poisoned dataset. We demonstrate efficacy of our attack when unlearning is performed via retraining from scratch, the idealized setting of machine unlearning which other efficient methods attempt to emulate, as well as against the approximate unlearning approach of Graves et al. (2021).", "keywords": "Machine unlearning;new attack vector;Camouflaging poisoning attacks", "primary_area": "", "supplementary_material": "/attachment/da09e6bbb240b5cef2c35d494d61f724168ea03a.zip", "author": "Jimmy Z. Di;Jack Douglas;Jayadev Acharya;Gautam Kamath;Ayush Sekhari", "authorids": "~Jimmy_Z._Di1;~Jack_Douglas2;~Jayadev_Acharya2;~Gautam_Kamath1;~Ayush_Sekhari1", "gender": "M;M;M;M;M", "homepage": ";;https://people.ece.cornell.edu/acharya/;http://www.gautamkamath.com/;https://ayush.sekhari.com/", "dblp": ";;74/5865;73/11140;203/8152", "google_scholar": ";;70vJVxcAAAAJ;MK6zHkYAAAAJ;jH9i188AAAAJ", "orcid": ";;;;", "linkedin": "jimmy-di-0319/;jack-douglas-910896150/;;;", "or_profile": "~Jimmy_Z._Di1;~Jack_Douglas2;~Jayadev_Acharya2;~Gautam_Kamath1;~Ayush_Sekhari1", "aff": "University of Waterloo;University of Waterloo;Cornell University;University of Waterloo;Massachusetts Institute of Technology", "aff_domain": "uwaterloo.ca;uwaterloo.ca;cornell.edu;uwaterloo.ca;mit.edu", "position": "MS student;Undergrad student;Assistant Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\ndi2023hidden,\ntitle={Hidden Poison: Machine Unlearning Enables Camouflaged Poisoning Attacks},\nauthor={Jimmy Z. Di and Jack Douglas and Jayadev Acharya and Gautam Kamath and Ayush Sekhari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Isy7gl1Hqc}\n}", "github": "", "project": "", "reviewers": "dUYW;VjL7;zrPH;eD7Z;oSYx", "pdf_size": 44523657, "rating": "4;5;6;6;7", "confidence": "4;3;5;4;4", "soundness": "3;3;3;3;4", "novelty": "3;2;3;3;4", "presentation": "3;3;3;2;4", "wc_summary": "74;70;86;130;106", "wc_strengths": "51;9;43;112;7", "wc_weaknesses": "276;13;35;351;38", "wc_questions": "2;280;488;54;4", "wc_limitations": "2;11;1;34;3", "wc_review": "405;383;653;681;158", "wc_reply_reviewers": "0;209;115;274;0", "wc_reply_authors": "92;163;20;265;0", "reply_reviewers": "0;1;1;2;0", "reply_authors": "3;3;2;3;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 93.2, 22.25668438918969 ], "wc_strengths_avg": [ 44.4, 38.12400818382034 ], "wc_weaknesses_avg": [ 142.6, 141.80352604924886 ], "wc_questions_avg": [ 165.6, 190.97811392931914 ], "wc_limitations_avg": [ 10.2, 12.416118556135006 ], "wc_review_avg": [ 456.0, 192.9600995024619 ], "wc_reply_reviewers_avg": [ 119.6, 109.96472161561633 ], "wc_reply_authors_avg": [ 108.0, 97.30159299826494 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.4, 0.8 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.31008683647302115, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4576916714254473890&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uwaterloo.ca;uwaterloo.ca;cornell.edu;uwaterloo.ca;mit.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "University of Waterloo;Cornell University;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://uwaterloo.ca;https://www.cornell.edu;https://web.mit.edu", "aff_unique_abbr": "UW;Cornell;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "Canada;United States" }, { "title": "Accelerating Exploration with Unlabeled Prior Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72091", "id": "Itorzn4Kwf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d53d51e88d92d3723755f6d425bc513b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Itorzn4Kwf", "openreview": "https://openreview.net/forum?id=Itorzn4Kwf", "poster": "/media/PosterPDFs/NeurIPS%202023/72091.png?t=1699459865.220939", "slides": "https://nips.cc/virtual/2023/poster/72091", "video": "https://nips.cc/virtual/2023/poster/72091", "author_site": "Qiyang Li, Jason Zhang, Dibya Ghosh, Amy Zhang, Sergey Levine", "tldr": "", "abstract": "Learning to solve tasks from a sparse reward signal is a major challenge for standard reinforcement learning (RL) algorithms. However, in the real world, agents rarely need to solve sparse reward tasks entirely from scratch. More often, we might possess prior experience to draw on that provides considerable guidance about which actions and outcomes are possible in the world, which we can use to explore more effectively for new tasks. In this work, we study how prior data without reward labels may be used to guide and accelerate exploration for an agent solving a new sparse reward task. We propose a simple approach that learns a reward model from online experience, labels the unlabeled prior data with optimistic rewards, and then uses it concurrently alongside the online data for downstream policy and critic optimization. This general formula leads to rapid exploration in several challenging sparse-reward domains where tabula rasa exploration is insufficient, including the AntMaze domain, Adroit hand manipulation domain, and a visual simulated robotic manipulation domain. Our results highlight the ease of incorporating unlabeled prior data into existing online RL algorithms, and the (perhaps surprising) effectiveness of doing so.", "keywords": "Reinforcement Learning;Exploration", "primary_area": "", "supplementary_material": "", "author": "Qiyang Li;Jason Zhang;Dibya Ghosh;Amy Zhang;Sergey Levine", "authorids": "~Qiyang_Li1;jason.z@berkeley.edu;~Dibya_Ghosh1;~Amy_Zhang1;~Sergey_Levine1", "gender": "M;;M;;M", "homepage": "https://colinqiyangli.github.io/;;https://dibyaghosh.com;;https://people.eecs.berkeley.edu/~svlevine/", "dblp": ";;210/2547;;80/7594", "google_scholar": "qlwwdfEAAAAJ;;znnl0kwAAAAJ;;8R35rCwAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Qiyang_Li1;jason.z@berkeley.edu;~Dibya_Ghosh1;~Amy_Zhang1;~Sergey_Levine1", "aff": "University of California, Berkeley;;University of California, Berkeley;;Google", "aff_domain": "berkeley.edu;;berkeley.edu;;google.com", "position": "PhD student;;PhD student;;Research Scientist", "bibtex": "@inproceedings{\nli2023accelerating,\ntitle={Accelerating Exploration with Unlabeled Prior Data},\nauthor={Qiyang Li and Jason Zhang and Dibya Ghosh and Amy Zhang and Sergey Levine},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Itorzn4Kwf}\n}", "github": "", "project": "", "reviewers": "egZc;vsqs;hFD7;Mcz4", "pdf_size": 2776028, "rating": "5;6;6;6", "confidence": "3;4;4;4", "soundness": "4;4;3;3", "novelty": "2;4;3;3", "presentation": "3;4;4;2", "wc_summary": "45;70;74;111", "wc_strengths": "50;82;73;104", "wc_weaknesses": "289;175;167;210", "wc_questions": "2;41;54;89", "wc_limitations": "34;1;62;39", "wc_review": "420;369;430;553", "wc_reply_reviewers": "19;0;144;160", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 75.0, 23.569047498785352 ], "wc_strengths_avg": [ 77.25, 19.356846334049358 ], "wc_weaknesses_avg": [ 210.25, 48.25647624930772 ], "wc_questions_avg": [ 46.5, 31.11671576500322 ], "wc_limitations_avg": [ 34.0, 21.783020910791965 ], "wc_review_avg": [ 443.0, 67.59067983087608 ], "wc_reply_reviewers_avg": [ 80.75, 71.7891879045863 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8167818740610926032&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "berkeley.edu;;berkeley.edu;;google.com", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Google", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Berkeley;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "IvEWhB1P90", "title": "It begins with a boundary: A geometric view on probabilistically robust learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Although deep neural networks have achieved super-human performance on many classification tasks, they often exhibit a worrying lack of robustness towards adversarially generated examples. Thus, considerable effort has been invested into reformulating Empirical Risk Minimization (ERM) into an adversarially robust framework. Recently, attention has shifted towards approaches which interpolate between the robustness offered by adversarial training and the higher clean accuracy and faster training times of ERM. In this paper, we take a fresh and geometric view on one such method---Probabilistically Robust Learning (PRL) (Robey et al., ICML, 2022). We propose a geometric framework for understanding PRL, which allows us to identify a subtle flaw in its original formulation and to introduce a family of probabilistic nonlocal perimeter functionals to address this. We prove existence of solutions using novel relaxation methods and study properties as well as local limits of the introduced perimeters.", "keywords": "probabilistically robust learning;adversarial attacks;existence of solutions;nonlocal perimeters;regularization", "primary_area": "", "supplementary_material": "/attachment/8c4827d1722cd04b4df0ed71cf1a895564c30e84.zip", "author": "Leon Bungert;Nicolas Garcia Trillos;Matt Jacobs;Daniel McKenzie;Djordje Nikolic;Qingsong Wang", "authorids": "~Leon_Bungert1;~Nicolas_Garcia_Trillos1;~Matt_Jacobs1;~Daniel_McKenzie1;~Djordje_Nikolic1;~Qingsong_Wang1", "gender": "M;;M;;M;", "homepage": "https://sites.google.com/view/leon-bungert;https://www.nicolasgarciat.com/;https://www.math.purdue.edu/~jacob225/;http://danielmckenzie.github.io/;https://nikolic-djordje.github.io;", "dblp": "198/2233;154/6448;198/6693;86/7257;;", "google_scholar": "ToD4fG0AAAAJ;iZJlAscAAAAJ;_zMasWkAAAAJ;kP12IskAAAAJ;;", "orcid": "0000-0002-6554-9892;0000-0002-7711-5901;;;;", "linkedin": ";;;;;", "or_profile": "~Leon_Bungert1;~Nicolas_Garcia_Trillos1;~Matt_Jacobs1;~Daniel_McKenzie1;~Djordje_Nikolic1;~Qingsong_Wang1", "aff": "University of Bonn;University of Wisconsin, Madison;Purdue University;Colorado School of Mines;University of California, Santa Barbara;", "aff_domain": "uni-bonn.de;wisc.edu;purdue.edu;mines.edu;ucsb.edu;", "position": "Postdoc;Assistant Professor;Assistant Professor;Assistant Professor;PhD student;", "bibtex": "@misc{\nbungert2023it,\ntitle={It begins with a boundary: A geometric view on probabilistically robust learning},\nauthor={Leon Bungert and Nicolas Garcia Trillos and Matt Jacobs and Daniel McKenzie and Djordje Nikolic and Qingsong Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=IvEWhB1P90}\n}", "github": "", "project": "", "reviewers": "CsNz;u3Qk;LMsa;j7Q6", "site": "https://openreview.net/forum?id=IvEWhB1P90", "pdf_size": 625164, "rating": "3;5;7;7", "confidence": "5;2;1;4", "soundness": "3;3;4;3", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "90;170;211;95", "wc_strengths": "225;98;98;21", "wc_weaknesses": "2856;127;87;9", "wc_questions": "2;198;72;66", "wc_limitations": "6;162;11;12", "wc_review": "3179;755;479;203", "wc_reply_reviewers": "652;223;11;10", "wc_reply_authors": "284;204;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.0, 1.5811388300841898 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 141.5, 51.12973694436536 ], "wc_strengths_avg": [ 110.5, 73.20006830597906 ], "wc_weaknesses_avg": [ 769.75, 1205.244243919049 ], "wc_questions_avg": [ 84.5, 71.04048141728771 ], "wc_limitations_avg": [ 47.75, 66.00142043926024 ], "wc_review_avg": [ 1154.0, 1185.311351502212 ], "wc_reply_reviewers_avg": [ 224.0, 261.892153376156 ], "wc_reply_authors_avg": [ 122.0, 125.23577763562615 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5720775535473555, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8019481231767571821&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "University of Bonn;University of Wisconsin;Purdue University;Colorado School of Mines;University of California, Santa Barbara", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.uni-bonn.de/;https://www.wisc.edu;https://www.purdue.edu;https://www.mines.edu;https://www.ucsb.edu", "aff_unique_abbr": "UBonn;UW;Purdue;CSM;UCSB", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Madison;Santa Barbara", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Germany;United States" }, { "title": "Conformal Meta-learners for Predictive Inference of Individual Treatment Effects", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72090", "id": "IwnINorSZ5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/94ab02a30b0e4a692a42ccd0b4c55399-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IwnINorSZ5", "openreview": "https://openreview.net/forum?id=IwnINorSZ5", "poster": "/media/PosterPDFs/NeurIPS%202023/72090.png?t=1702156423.06683", "slides": "https://nips.cc/virtual/2023/poster/72090", "video": "https://nips.cc/virtual/2023/poster/72090", "author_site": "Ahmed Alaa, Zaid Ahmad, Mark van der Laan", "tldr": "", "abstract": "We investigate the problem of machine learning-based (ML) predictive inference on individual treatment effects (ITEs). Previous work has focused primarily on developing ML-based \u201cmeta-learners\u201d that can provide point estimates of the conditional average treatment effect (CATE)\u2014these are model-agnostic approaches for combining intermediate nuisance estimates to produce estimates of CATE. In this paper, we develop conformal meta-learners, a general framework for issuing predictive intervals for ITEs by applying the standard conformal prediction (CP) procedure on top of CATE meta-learners. We focus on a broad class of meta-learners based on two-stage pseudo-outcome regression and develop a stochastic ordering framework to study their validity. We show that inference with conformal meta-learners is marginally valid if their (pseudo-outcome) conformity scores stochastically dominate \u201coracle\u201d conformity scores evaluated on the unobserved ITEs. Additionally, we prove that commonly used CATE meta-learners, such as the doubly-robust learner, satisfy a model- and distribution-free stochastic (or convex) dominance condition, making their conformal inferences valid for practically-relevant levels of target coverage. Whereas existing procedures conduct inference on nuisance parameters (i.e., potential outcomes) via weighted CP, conformal meta-learners enable direct inference on the target parameter (ITE). Numerical experiments show that conformal meta-learners provide valid intervals with competitive efficiency while retaining the favorable point estimation properties of CATE meta-learners.", "keywords": "Heterogeneous treatment effects;conformal prediction", "primary_area": "", "supplementary_material": "", "author": "Ahmed Alaa;Zaid Ahmad;Mark van der Laan", "authorids": "~Ahmed_Alaa1;zaidahmad@berkeley.edu;laan@berkeley.edu", "gender": "M;;", "homepage": "https://alaalab.berkeley.edu/;;", "dblp": "140/7324;;", "google_scholar": "https://scholar.google.com.eg/citations?user=_pv1sEcAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ahmed_Alaa1;zaidahmad@berkeley.edu;laan@berkeley.edu", "aff": "University of California, Berkeley;;", "aff_domain": "berkeley.edu;;", "position": "Assistant Professor;;", "bibtex": "@inproceedings{\nalaa2023conformal,\ntitle={Conformal Meta-learners for Predictive Inference of Individual Treatment Effects},\nauthor={Ahmed Alaa and Zaid Ahmad and Mark van der Laan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IwnINorSZ5}\n}", "github": "", "project": "", "reviewers": "SDYU;V6dY;vDHA;855y", "pdf_size": 3966067, "rating": "6;7;7;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;4;4;2", "wc_summary": "108;38;116;121", "wc_strengths": "106;27;30;142", "wc_weaknesses": "277;90;6;63", "wc_questions": "403;48;93;143", "wc_limitations": "1;2;38;13", "wc_review": "895;205;283;482", "wc_reply_reviewers": "66;14;0;19", "wc_reply_authors": "45;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 95.75, 33.662850443775554 ], "wc_strengths_avg": [ 76.25, 49.428610136235875 ], "wc_weaknesses_avg": [ 109.0, 101.62430811572594 ], "wc_questions_avg": [ 171.75, 137.6760236933069 ], "wc_limitations_avg": [ 13.5, 14.908051515875574 ], "wc_review_avg": [ 466.25, 267.35124368515665 ], "wc_reply_reviewers_avg": [ 24.75, 24.81305100143874 ], "wc_reply_authors_avg": [ 11.25, 19.48557158514987 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6980119330924294559&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "berkeley.edu;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Perceptual adjustment queries and an inverted measurement paradigm for low-rank metric learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72089", "id": "IwyymRXfzL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3a07c3a67cfe50d3236b71fb674c7f30-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IwyymRXfzL", "openreview": "https://openreview.net/forum?id=IwyymRXfzL", "poster": "/media/PosterPDFs/NeurIPS%202023/72089.png?t=1702234654.698198", "slides": "https://nips.cc/virtual/2023/poster/72089", "video": "https://nips.cc/virtual/2023/poster/72089", "author_site": "Austin Xu, Andrew McRae, Jingyan Wang, Mark Davenport, Ashwin Pananjady", "tldr": "", "abstract": "We introduce a new type of query mechanism for collecting human feedback, called the perceptual adjustment query (PAQ). Being both informative and cognitively lightweight, the PAQ adopts an inverted measurement scheme, and combines advantages from both cardinal and ordinal queries. We showcase the PAQ in the metric learning problem, where we collect PAQ measurements to learn an unknown Mahalanobis distance. This gives rise to a high-dimensional, low-rank matrix estimation problem to which standard matrix estimators cannot be applied. Consequently, we develop a two-stage estimator for metric learning from PAQs, and provide sample complexity guarantees for this estimator. We present numerical simulations demonstrating the performance of the estimator and its notable properties.", "keywords": "human querying;high dimensional low rank matrix estimation;metric learning", "primary_area": "", "supplementary_material": "/attachment/9df5b18f1acfa1103a049ce7ab901032300a6ecb.zip", "author": "Austin Xu;Andrew McRae;Jingyan Wang;Mark A. Davenport;Ashwin Pananjady", "authorids": "~Austin_Xu1;~Andrew_McRae1;~Jingyan_Wang1;~Mark_A._Davenport1;~Ashwin_Pananjady1", "gender": "M;;;;M", "homepage": "https://austinxu87.github.io;https://admcrae.github.io/;https://jingyanw.github.io/;;https://sites.gatech.edu/ashwin-pananjady/", "dblp": "274/3031;245/0016;59/8206-1;;132/9037", "google_scholar": "https://scholar.google.com/citations?hl=en;GT_Ml_cAAAAJ;7-2bbBgAAAAJ;;kAOvHSoAAAAJ", "orcid": ";;0000-0002-2052-1108;;0000-0003-0824-9815", "linkedin": ";;;;", "or_profile": "~Austin_Xu1;~Andrew_McRae1;~Jingyan_Wang1;~Mark_A._Davenport1;~Ashwin_Pananjady1", "aff": "Duolingo;EPFL - EPF Lausanne;Georgia Institute of Technology;;Georgia Institute of Technology", "aff_domain": "duolingo.com;epfl.ch;gatech.edu;;gatech.edu", "position": "Intern;Postdoc;Postdoc;;Assistant Professor", "bibtex": "@inproceedings{\nxu2023perceptual,\ntitle={Perceptual adjustment queries and an inverted measurement paradigm for low-rank metric learning},\nauthor={Austin Xu and Andrew McRae and Jingyan Wang and Mark A. Davenport and Ashwin Pananjady},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IwyymRXfzL}\n}", "github": "", "project": "", "reviewers": "eVNV;H5Pr;sAC6;Ehde", "pdf_size": 6543143, "rating": "4;5;6;6", "confidence": "4;3;3;3", "soundness": "2;2;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "161;86;41;61", "wc_strengths": "27;149;89;28", "wc_weaknesses": "224;189;185;34", "wc_questions": "68;206;3;28", "wc_limitations": "37;16;1;2", "wc_review": "517;646;319;153", "wc_reply_reviewers": "0;120;18;0", "wc_reply_authors": "0;760;0;0", "reply_reviewers": "0;2;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.25, 45.46633369868303 ], "wc_strengths_avg": [ 73.25, 50.43002577829998 ], "wc_weaknesses_avg": [ 158.0, 73.18128175975056 ], "wc_questions_avg": [ 76.25, 78.416755224888 ], "wc_limitations_avg": [ 14.0, 14.543039572248986 ], "wc_review_avg": [ 408.75, 188.0616587717975 ], "wc_reply_reviewers_avg": [ 34.5, 49.907414278842374 ], "wc_reply_authors_avg": [ 190.0, 329.0896534380867 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4799169933508232678&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "duolingo.com;epfl.ch;gatech.edu;;gatech.edu", "author_num": 5, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Duolingo;EPFL;Georgia Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.duolingo.com;https://www.epfl.ch;https://www.gatech.edu", "aff_unique_abbr": "Duolingo;EPFL;Georgia Tech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Switzerland" }, { "title": "A Unified Approach to Count-Based Weakly Supervised Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72088", "id": "IyAHCbMq3a", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/79a0c8e7ae8e403e39341ea6b0ba4c21-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IyAHCbMq3a", "openreview": "https://openreview.net/forum?id=IyAHCbMq3a", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72088", "video": "https://nips.cc/virtual/2023/poster/72088", "author_site": "Vinay Shukla, Zhe Zeng, Kareem Ahmed, Guy Van den Broeck", "tldr": "", "abstract": "High-quality labels are often very scarce, whereas unlabeled data with inferred weak labels occurs more naturally. In many cases, these weak labels dictate the frequency of each respective class over a set of instances. In this paper, we develop a unified approach to learning from such weakly-labeled data, which we call *count-based weakly-supervised learning*. At the heart of our approach is the ability to compute the probability of exactly $k$ out of $n$ outputs being set to true. This computation is differentiable, exact, and efficient. Building upon the previous computation, we derive a *count loss* penalizing the model for deviations in its distribution from an arithmetic constraint defined over label counts.", "keywords": "weakly supervised learning;constraint;label proportion;learning from positive and unlabeled data;multiple instance learning", "primary_area": "", "supplementary_material": "/attachment/c7679676478de0ada4c9b9968a61a57ce7194d43.pdf", "author": "Vinay Shukla;Zhe Zeng;Kareem Ahmed;Guy Van den Broeck", "authorids": "~Vinay_Shukla1;~Zhe_Zeng1;~Kareem_Ahmed2;~Guy_Van_den_Broeck1", "gender": "M;F;M;M", "homepage": ";https://zzeng.me/;http://kareemahmed.com;http://web.cs.ucla.edu/~guyvdb/", "dblp": "336/5291;27/10464;188/6144;96/7521.html", "google_scholar": "https://scholar.google.com.au/citations?user=U-dBkj4AAAAJ;PyK6cB0AAAAJ;hkM0hbIAAAAJ;d0KQ9z0AAAAJ", "orcid": ";;;0000-0003-3434-2503", "linkedin": "vinay-shukla-4ba406149/;;kareem-yousrii/;guyvdb", "or_profile": "~Vinay_Shukla1;~Zhe_Zeng1;~Kareem_Ahmed2;~Guy_Van_den_Broek1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;cs.ucla.edu;cs.ucla.edu;ucla.edu", "position": "Undergrad student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nshukla2023a,\ntitle={A Unified Approach to Count-Based Weakly Supervised Learning},\nauthor={Vinay Shukla and Zhe Zeng and Kareem Ahmed and Guy Van den Broeck},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IyAHCbMq3a}\n}", "github": "", "project": "", "reviewers": "5jbX;jVGG;rn5b;YHmN;Q9rM", "pdf_size": 908146, "rating": "5;5;6;7;7", "confidence": "4;4;3;4;3", "soundness": "3;3;3;3;4", "novelty": "2;2;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "98;170;51;108;40", "wc_strengths": "63;52;83;49;82", "wc_weaknesses": "288;63;109;103;89", "wc_questions": "233;99;19;3;74", "wc_limitations": "23;31;31;17;7", "wc_review": "705;415;293;280;292", "wc_reply_reviewers": "307;96;18;0;0", "wc_reply_authors": "314;0;0;0;0", "reply_reviewers": "2;1;1;0;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 93.4, 46.370680391816556 ], "wc_strengths_avg": [ 65.8, 14.4138821973818 ], "wc_weaknesses_avg": [ 130.4, 80.37810647184966 ], "wc_questions_avg": [ 85.6, 81.6078427603622 ], "wc_limitations_avg": [ 21.8, 9.086253353280437 ], "wc_review_avg": [ 397.0, 161.68982651979067 ], "wc_reply_reviewers_avg": [ 84.2, 116.91090624916052 ], "wc_reply_authors_avg": [ 62.8, 125.6 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.45643546458763845, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=767779362873126003&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ucla.edu;cs.ucla.edu;cs.ucla.edu;ucla.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Assumption violations in causal discovery and the robustness of score matching", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72087", "id": "IyTArtpuCK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/93ed74938a54a73b5e4c52bbaf42ca8e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IyTArtpuCK", "openreview": "https://openreview.net/forum?id=IyTArtpuCK", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72087", "video": "https://nips.cc/virtual/2023/poster/72087", "author_site": "Francesco Montagna, Atalanti Mastakouri, Elias Eulig, Nicoletta Noceti, Lorenzo Rosasco, Dominik Janzing, Bryon Aragam, Francesco Locatello", "tldr": "", "abstract": "When domain knowledge is limited and experimentation is restricted by ethical, financial, or time constraints, practitioners turn to observational causal discovery methods to recover the causal structure, exploiting the statistical properties of their data. Because causal discovery without further assumptions is an ill-posed problem, each algorithm comes with its own set of usually untestable assumptions, some of which are hard to meet in real datasets. Motivated by these considerations, this paper extensively benchmarks the empirical performance of recent causal discovery methods on observational _iid_ data generated under different background conditions, allowing for violations of the critical assumptions required by each selected approach. \nOur experimental findings show that score matching-based methods demonstrate surprising performance in the false positive and false negative rate of the inferred graph in these challenging scenarios, and we provide theoretical insights into their performance. This work is also the first effort to benchmark the stability of causal discovery algorithms with respect to the values of their hyperparameters. Finally, we hope this paper will set a new standard for the evaluation of causal discovery methods and can serve as an accessible entry point for practitioners interested in the field, highlighting the empirical implications of different algorithm choices.", "keywords": "Causal discovery; empirical study; robust inference; benchmark", "primary_area": "", "supplementary_material": "/attachment/d84eada9859ad58f8c9d5d7ce2a2c003947a8547.zip", "author": "Francesco Montagna;Atalanti A. Mastakouri;Elias Eulig;Nicoletta Noceti;Lorenzo Rosasco;Dominik Janzing;Bryon Aragam;Francesco Locatello", "authorids": "~Francesco_Montagna2;~Atalanti_A._Mastakouri1;~Elias_Eulig1;~Nicoletta_Noceti1;~Lorenzo_Rosasco1;~Dominik_Janzing3;~Bryon_Aragam1;~Francesco_Locatello1", "gender": ";;;F;;;;M", "homepage": "https://www.francescomontagna.com/;;https://www.eeulig.com/;https://ml.unige.it;;;http://bryonaragam.com/;https://twitter.com/FrancescoLocat8", "dblp": ";200/8042;299/7602;13/3585;;;140/7564;195/6074", "google_scholar": "StwghVgAAAAJ;https://scholar.google.de/citations?user=iMlGLH8AAAAJ;https://scholar.google.de/citations?user=1b4dCgUAAAAJ;7i3HX4wAAAAJ;;;u-W3_9QAAAAJ;", "orcid": ";;0000-0001-6269-6320;0000-0002-6482-4768;;;;", "linkedin": "francesco-montagna/;;https://linkedin.com/in/eeulig;nicoletta-noceti-494a43156/;;;;", "or_profile": "~Francesco_Montagna2;~Atalanti_A._Mastakouri1;~Elias_Eulig1;~Nicoletta_Noceti1;~Lorenzo_Rosasco1;~Dominik_Janzing3;~Bryon_Aragam1;~Francesco_Locatello1", "aff": "Amazon Development Center Germany;Amazon Development Center Germany;Deutsches Krebsforschungszentrum;Universit\u00e0 degli Studi di Genova;;;Booth School of Business;Amazon", "aff_domain": "amazon.de;amazon.de;dkfz.de;unige.it;;;chicagobooth.edu;amazon.com", "position": "Intern;Researcher;PhD student;Associate Professor;;;Assistant Professor;Senior Applied Scientist", "bibtex": "@inproceedings{\nmontagna2023assumption,\ntitle={Assumption violations in causal discovery and the robustness of score matching},\nauthor={Francesco Montagna and Atalanti A. Mastakouri and Elias Eulig and Nicoletta Noceti and Lorenzo Rosasco and Dominik Janzing and Bryon Aragam and Francesco Locatello},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IyTArtpuCK}\n}", "github": "", "project": "", "reviewers": "QK5D;eJF4;JGo2", "pdf_size": 7640984, "rating": "3;6;8", "confidence": "4;4;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "49;71;82", "wc_strengths": "48;153;68", "wc_weaknesses": "352;698;265", "wc_questions": "273;105;118", "wc_limitations": "1;1;74", "wc_review": "723;1028;607", "wc_reply_reviewers": "0;101;108", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 67.33333333333333, 13.719410418171117 ], "wc_strengths_avg": [ 89.66666666666667, 45.52166761249221 ], "wc_weaknesses_avg": [ 438.3333333333333, 187.01574502936614 ], "wc_questions_avg": [ 165.33333333333334, 76.31659205062961 ], "wc_limitations_avg": [ 25.333333333333332, 34.41253001774532 ], "wc_review_avg": [ 786.0, 177.55187035530398 ], "wc_reply_reviewers_avg": [ 69.66666666666667, 49.34459330959056 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15180072608289584800&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "amazon.de;amazon.de;dkfz.de;unige.it;;;chicagobooth.edu;amazon.com", "author_num": 8, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Amazon;Deutsches Krebsforschungszentrum;Universit\u00e0 degli Studi di Genova;University of Chicago Booth School of Business", "aff_unique_dep": "Development Center;;;Booth School of Business", "aff_unique_url": "https://www.amazon.de;https://www.dkfz.de;https://www.unige.it;https://www.chicagobooth.edu", "aff_unique_abbr": "Amazon;DKFZ;UniGe;Booth", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;1;2;2", "aff_country_unique": "Germany;Italy;United States" }, { "title": "Block Coordinate Plug-and-Play Methods for Blind Inverse Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72086", "id": "IyWpP2e0bF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f810c2ba07bae78dfe9d25c5d40c5536-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IyWpP2e0bF", "openreview": "https://openreview.net/forum?id=IyWpP2e0bF", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72086", "video": "https://nips.cc/virtual/2023/poster/72086", "author_site": "Weijie Gan, shirin shoushtari, Yuyang Hu, Jiaming Liu, Hongyu An, Ulugbek Kamilov", "tldr": "", "abstract": "Plug-and-play (PnP) prior is a well-known class of methods for solving imaging inverse problems by computing fixed-points of operators combining physical measurement models and learned image denoisers. While PnP methods have been extensively used for image recovery with known measurement operators, there is little work on PnP for solving blind inverse problems. We address this gap by presenting a new block-coordinate PnP (BC-PnP) method that efficiently solves this joint estimation problem by introducing learned denoisers as priors on both the unknown image and the unknown measurement operator. We present a new convergence theory for BC-PnP compatible with blind inverse problems by considering nonconvex data-fidelity terms and expansive denoisers. Our theory analyzes the convergence of BC-PnP to a stationary point of an implicit function associated with an approximate minimum mean-squared error (MMSE) denoiser. We numerically validate our method on two blind inverse problems: automatic coil sensitivity estimation in magnetic resonance imaging (MRI) and blind image deblurring. Our results show that BC-PnP provides an efficient and principled framework for using denoisers as PnP priors for jointly estimating measurement operators and images.", "keywords": "inverse problems;plug-and-play priors;computational imaging;nonconvex optimization", "primary_area": "", "supplementary_material": "/attachment/87919b113ca27a91e4c2f463c7f5c1d7fe2f91b4.zip", "author": "Weijie Gan;Shirin Shoushtari;Yuyang Hu;Jiaming Liu;Hongyu An;Ulugbek Kamilov", "authorids": "~Weijie_Gan1;~Shirin_Shoushtari1;~Yuyang_Hu1;~Jiaming_Liu3;~Hongyu_An3;~Ulugbek_Kamilov1", "gender": "M;F;M;M;F;Not Specified", "homepage": "https://wjgancn.github.io;;https://hu-yuyang.github.io/;https://jiamingliu-jeremy.github.io/;;https://ukmlv.github.io", "dblp": "275/3691;321/1728;;33/5934-1;;73/9223", "google_scholar": "Ib20Ge0AAAAJ;https://scholar.google.com/citations?view_op=list_works;FvRrgTsAAAAJ;KEucBooAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=3qYUSDwAAAAJ", "orcid": ";;;0000-0002-1042-4443;0000-0001-6459-2269;0000-0001-6770-3278", "linkedin": ";;;;hongyu-an-29a54624/;", "or_profile": "~Weijie_Gan1;~Shirin_Shoushtari1;~Yuyang_Hu1;~Jiaming_Liu3;~Hongyu_An3;~Ulugbek_Kamilov1", "aff": "Washington University, Saint Louis;Washington University, Saint Louis;Washington University in Saint Louis;Mitsubishi Electric Research Labs;Washington University, Saint Louis;Washington University, St. Louis", "aff_domain": "wustl.edu;wustl.edu;wustl.edu;merl.com;wustl.edu;wustl.edu", "position": "PhD student;PhD student;PhD student;Intern;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ngan2023block,\ntitle={Block Coordinate Plug-and-Play Methods for Blind Inverse Problems},\nauthor={Weijie Gan and Shirin Shoushtari and Yuyang Hu and Jiaming Liu and Hongyu An and Ulugbek Kamilov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IyWpP2e0bF}\n}", "github": "", "project": "", "reviewers": "8yHJ;7kUv;NG5f;tTXT", "pdf_size": 3080564, "rating": "4;6;7;7", "confidence": "4;4;3;5", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "48;70;65;67", "wc_strengths": "56;40;40;53", "wc_weaknesses": "228;126;76;65", "wc_questions": "3;28;27;34", "wc_limitations": "1;37;14;2", "wc_review": "336;301;222;221", "wc_reply_reviewers": "132;50;0;0", "wc_reply_authors": "175;101;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 62.5, 8.558621384311845 ], "wc_strengths_avg": [ 47.25, 7.327175444876422 ], "wc_weaknesses_avg": [ 123.75, 64.42970976187927 ], "wc_questions_avg": [ 23.0, 11.853269591129697 ], "wc_limitations_avg": [ 13.5, 14.5 ], "wc_review_avg": [ 270.0, 50.05496978322932 ], "wc_reply_reviewers_avg": [ 45.5, 53.95136698916905 ], "wc_reply_authors_avg": [ 69.0, 73.79363116150336 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10769384878946636811&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "wustl.edu;wustl.edu;wustl.edu;merl.com;wustl.edu;wustl.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Washington University in St. Louis;Mitsubishi Electric Research Laboratories", "aff_unique_dep": ";", "aff_unique_url": "https://wustl.edu;https://www.merl.com", "aff_unique_abbr": "WUSTL;MERL", "aff_campus_unique_index": "0;0;1;0;1", "aff_campus_unique": "Saint Louis;St. Louis;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Conditional Adapters: Parameter-efficient Transfer Learning with Fast Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72085", "id": "IyYyKov0Aj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/19d7204af519eae9993f7f72377a0ec0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IyYyKov0Aj", "openreview": "https://openreview.net/forum?id=IyYyKov0Aj", "poster": "/media/PosterPDFs/NeurIPS%202023/72085.png?t=1699592425.8037589", "slides": "https://nips.cc/virtual/2023/poster/72085", "video": "https://nips.cc/virtual/2023/poster/72085", "author_site": "Tao Lei, Junwen Bai, Siddhartha Brahma, Joshua Ainslie, Kenton Lee, Yanqi Zhou, Nan Du, Vincent Zhao, Yuexin Wu, Bo Li, Yu Zhang, Ming-Wei Chang", "tldr": "", "abstract": "We propose Conditional Adapter (CoDA), a parameter-efficient transfer learning method that also improves inference efficiency. CoDA generalizes beyond standard adapter approaches to enable a new way of balancing speed and accuracy using conditional computation.\nStarting with an existing dense pretrained model, CoDA adds sparse activation together with a small number of new parameters and a light-weight training phase.\nOur experiments demonstrate that the CoDA approach provides an unexpectedly efficient way to transfer knowledge.\nAcross a variety of language, vision, and speech tasks, CoDA achieves a 2x to 8x inference speed-up compared to the state-of-the-art Adapter approaches with moderate to no accuracy loss and the same parameter efficiency.", "keywords": "conditional computation;inference efficiency;parameter efficiency;large models", "primary_area": "", "supplementary_material": "", "author": "Tao Lei;Junwen Bai;Siddhartha Brahma;Joshua Ainslie;Kenton Lee;Yanqi Zhou;Nan Du;Vincent Y Zhao;Yuexin Wu;Bo Li;Yu Zhang;Ming-Wei Chang", "authorids": "~Tao_Lei1;~Junwen_Bai1;~Siddhartha_Brahma1;~Joshua_Ainslie1;~Kenton_Lee1;~Yanqi_Zhou1;~Nan_Du1;~Vincent_Y_Zhao1;~Yuexin_Wu1;~Bo_Li1;~Yu_Zhang2;~Ming-Wei_Chang3", "gender": "M;M;M;;M;F;M;M;M;;M;", "homepage": ";http://www.cs.cornell.edu/~junwen/;;;https://kentonl.com/;https://zhouyanqi.github.io/;;https://foo.bar;https://crickwu.github.io;;;https://mingweichang.org/", "dblp": ";188/6479;;263/3363;121/7560;;;301/7889;09/1661;50/3402-28;50/671-33;69/4618", "google_scholar": "g2uay50AAAAJ;JD7wLV4AAAAJ;OZj382cAAAAJ;;qXwJkr8AAAAJ;ZKEDQXYAAAAJ;v474hP4AAAAJ;;sd0nprMAAAAJ;iRhp1PAAAAAJ;;GiCqMFkAAAAJ", "orcid": ";0000-0001-7939-4927;;;;;;;;0000-0002-6711-3603;;", "linkedin": ";junwen-bai-7ba354155/;sidbrahma;;;;dunangatech/;;;;;ming-wei-chang-4962497/", "or_profile": "~Tao_Lei1;~Junwen_Bai1;~Siddhartha_Brahma1;~Joshua_Ainslie1;~Kenton_Lee1;~Yanqi_Zhou1;~Nan_Du1;~Vincent_Y_Zhao1;~Yuexin_Wu1;~Bo_Li1;~Yu_Zhang2;~Ming-Wei_Chang2", "aff": "Google;Google;Research, Google;Google;Google Research;Google Brain;Google Brain;Google;Google;Google;Google;Google Deepmind", "aff_domain": "google.com;google.com;research.google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "position": "Research scientist;Researcher;Researcher;Software Engineer;Research Scientist;Research Scientist;Research Scientist;Researcher;Software Engineer;Research Scientist;Research Scientist;Research scientist", "bibtex": "@inproceedings{\nlei2023conditional,\ntitle={Conditional Adapters: Parameter-efficient Transfer Learning with Fast Inference},\nauthor={Tao Lei and Junwen Bai and Siddhartha Brahma and Joshua Ainslie and Kenton Lee and Yanqi Zhou and Nan Du and Vincent Y Zhao and Yuexin Wu and Bo Li and Yu Zhang and Ming-Wei Chang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IyYyKov0Aj}\n}", "github": "", "project": "", "reviewers": "2t3i;XqSH;yqit;TS35", "pdf_size": 3589285, "rating": "5;5;5;8", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "novelty": "3;2;3;4", "presentation": "2;3;3;4", "wc_summary": "75;46;70;92", "wc_strengths": "61;40;60;79", "wc_weaknesses": "97;211;75;27", "wc_questions": "92;2;2;150", "wc_limitations": "1;2;1;9", "wc_review": "326;301;208;357", "wc_reply_reviewers": "79;36;0;31", "wc_reply_authors": "439;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 70.75, 16.452583383772897 ], "wc_strengths_avg": [ 60.0, 13.80217374184226 ], "wc_weaknesses_avg": [ 102.5, 67.56293362488044 ], "wc_questions_avg": [ 61.5, 62.93448974926229 ], "wc_limitations_avg": [ 3.25, 3.344772040064913 ], "wc_review_avg": [ 298.0, 55.619241274940094 ], "wc_reply_reviewers_avg": [ 36.5, 28.146935890075138 ], "wc_reply_authors_avg": [ 109.75, 190.09257613068428 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10160757146653208318&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com;google.com;research.google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "author_num": 12, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0;1", "aff_unique_norm": "Google;DeepMind", "aff_unique_dep": "Google;DeepMind", "aff_unique_url": "https://www.google.com;https://deepmind.com", "aff_unique_abbr": "Google;DeepMind", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Swap Agnostic Learning, or Characterizing Omniprediction via Multicalibration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72084", "id": "IzlRh5qwmG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7d693203215325902ff9dbdd067a50ac-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=IzlRh5qwmG", "openreview": "https://openreview.net/forum?id=IzlRh5qwmG", "poster": "/media/PosterPDFs/NeurIPS%202023/72084.png?t=1701447510.8928378", "slides": "https://nips.cc/virtual/2023/poster/72084", "video": "https://nips.cc/virtual/2023/poster/72084", "author_site": "Parikshit Gopalan, Michael Kim, Omer Reingold", "tldr": "", "abstract": "We introduce and study the notion of Swap Agnostic Learning.\nThe problem can be phrased as a game between a *predictor* and an *adversary*: first, the predictor selects a hypothesis $h$; then, the adversary plays in response, and for each level set of the predictor, selects a loss-minimizing hypothesis $c_v \\in \\mathcal{C}$; the predictor wins if $h$ competes with the adaptive adversary's loss.\nDespite the strength of the adversary, our main result demonstrates the feasibility Swap Agnostic Learning for any convex loss.\nSomewhat surprisingly, the result follows by proving an *equivalence* between Swap Agnostic Learning and swap variants of the recent notions Omniprediction (ITCS'22) and Multicalibration (ICML'18).\nBeyond this equivalence, we establish further connections to the literature on Outcome Indistinguishability (STOC'20, ITCS'23), revealing a unified notion of OI that captures all existing notions of omniprediction and multicalibration.", "keywords": "Agnostic Learning;Omniprediction;Multicalibration", "primary_area": "", "supplementary_material": "/attachment/8a41dee6536ebe687ec26df286c6a95ea23faff2.pdf", "author": "Parikshit Gopalan;Michael P. Kim;Omer Reingold", "authorids": "~Parikshit_Gopalan1;~Michael_P._Kim2;~Omer_Reingold3", "gender": "M;M;M", "homepage": "https://parikg.github.io/;https://omereingold.wordpress.com/;https://cs.stanford.edu/~mpkim/", "dblp": "16/1585;r/OmerReingold.html;165/2964", "google_scholar": "fb2-dasAAAAJ;TD9RhcgAAAAJ;2sFj-kcAAAAJ", "orcid": ";;", "linkedin": ";omer-reingold-9616262/;", "or_profile": "~Parikshit_Gopalan1;~Omer_Reingold3;~Michael_P_Kim1", "aff": "Apple;Stanford University;", "aff_domain": "apple.com;stanford.edu;", "position": "Principal Researcher;Full Professor;", "bibtex": "@inproceedings{\ngopalan2023swap,\ntitle={Swap Agnostic Learning, or Characterizing Omniprediction via Multicalibration},\nauthor={Parikshit Gopalan and Michael P. Kim and Omer Reingold},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=IzlRh5qwmG}\n}", "github": "", "project": "", "reviewers": "HdZq;7mxb;Xn6q;7A6v;if87;XpDf", "pdf_size": 485761, "rating": "5;5;7;7;8;8", "confidence": "2;2;4;4;4;3", "soundness": "3;2;4;4;4;3", "novelty": "3;2;3;3;3;3", "presentation": "2;3;3;4;4;3", "wc_summary": "79;59;69;328;209;83", "wc_strengths": "53;32;45;62;99;63", "wc_weaknesses": "43;127;72;103;46;81", "wc_questions": "1;10;227;106;229;104", "wc_limitations": "1;12;2;1;1;1", "wc_review": "177;240;415;600;584;332", "wc_reply_reviewers": "0;21;9;1;37;6", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "0;1;1;1;1;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.1666666666666665, 0.8975274678557507 ], "soundness_avg": [ 3.3333333333333335, 0.7453559924999298 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 3.1666666666666665, 0.6871842709362768 ], "wc_summary_avg": [ 137.83333333333334, 98.86761631371294 ], "wc_strengths_avg": [ 59.0, 20.760539492026695 ], "wc_weaknesses_avg": [ 78.66666666666667, 29.769484749021476 ], "wc_questions_avg": [ 112.83333333333333, 91.0447081871807 ], "wc_limitations_avg": [ 3.0, 4.041451884327381 ], "wc_review_avg": [ 391.3333333333333, 159.99131920895653 ], "wc_reply_reviewers_avg": [ 12.333333333333334, 13.008544200725238 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.372677996249965 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7940666671767443, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9557310149409040756&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "apple.com;stanford.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Apple;Stanford University", "aff_unique_dep": "Apple Inc.;", "aff_unique_url": "https://www.apple.com;https://www.stanford.edu", "aff_unique_abbr": "Apple;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Smoothed Online Learning for Prediction in Piecewise Affine Systems", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72083", "id": "Izt7rDD7jN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/82096f4f6f897529ecd3eabea603e9cc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Izt7rDD7jN", "openreview": "https://openreview.net/forum?id=Izt7rDD7jN", "poster": "/media/PosterPDFs/NeurIPS%202023/72083.png?t=1701382372.1804664", "slides": "https://nips.cc/virtual/2023/poster/72083", "video": "https://nips.cc/virtual/2023/poster/72083", "author_site": "Adam Block, Max Simchowitz, Russ Tedrake", "tldr": "", "abstract": "The problem of piecewise affine (PWA) regression and planning is of foundational importance to the study of online learning, control, and robotics, where it provides a theoretically and empirically tractable setting to study systems undergoing sharp changes in the dynamics. Unfortunately, due to the discontinuities that arise when crossing into different ``pieces,'' learning in general sequential settings is impossible and practical algorithms are forced to resort to heuristic approaches. This paper builds on the recently developed smoothed online learning framework and provides the first algorithms for prediction and simulation in PWA systems whose regret is polynomial in all relevant problem parameters under a weak smoothness assumption; moreover, our algorithms are efficient in the number of calls to an optimization oracle. We further apply our results to the problems of one-step prediction and multi-step simulation regret in piecewise affine dynamical systems, where the learner is tasked with simulating trajectories and regret is measured in terms of the Wasserstein distance between simulated and true data. Along the way, we develop several technical tools of more general interest.", "keywords": "Smoothed Online Learning;Piecewise Affine Prediction;Learning Dynamics", "primary_area": "", "supplementary_material": "/attachment/68d9c9ae14c9310a95a97ff8ad82941210cbd106.pdf", "author": "Adam Block;Max Simchowitz;Russ Tedrake", "authorids": "~Adam_Block1;~Max_Simchowitz1;~Russ_Tedrake1", "gender": ";M;M", "homepage": "https://abblock.github.io/index.html;;http://people.csail.mit.edu/russt", "dblp": "258/1018;176/5165;73/1296", "google_scholar": ";;nxNkEiYAAAAJ", "orcid": "0000-0003-1677-2665;;", "linkedin": ";;", "or_profile": "~Adam_Block1;~Max_Simchowitz1;~Russ_Tedrake1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nblock2023smoothed,\ntitle={Smoothed Online Learning for Prediction in Piecewise Affine Systems},\nauthor={Adam Block and Max Simchowitz and Russ Tedrake},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Izt7rDD7jN}\n}", "github": "", "project": "", "reviewers": "7mVW;Zpmv;MA9v;QtL6", "pdf_size": 700758, "rating": "4;7;8;8", "confidence": "2;4;3;3", "soundness": "2;3;4;4", "novelty": "2;3;4;4", "presentation": "1;3;3;3", "wc_summary": "55;85;37;80", "wc_strengths": "25;166;73;63", "wc_weaknesses": "187;358;171;41", "wc_questions": "8;191;8;28", "wc_limitations": "22;7;8;39", "wc_review": "297;807;297;251", "wc_reply_reviewers": "0;42;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 1.6393596310755 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 64.25, 19.40843888621648 ], "wc_strengths_avg": [ 81.75, 51.83326634507997 ], "wc_weaknesses_avg": [ 189.25, 112.68623474053963 ], "wc_questions_avg": [ 58.75, 76.78989191293344 ], "wc_limitations_avg": [ 19.0, 12.98075498574717 ], "wc_review_avg": [ 413.0, 228.24986308867744 ], "wc_reply_reviewers_avg": [ 10.5, 18.186533479473212 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6469966392206306, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4047657924756871714&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mit.edu;mit.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "SUBP: Soft Uniform Block Pruning for 1$\\times$N Sparse CNNs Multithreading Acceleration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72082", "id": "J0Pvvxspmz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a36c3dbe676fa8445715a31a90c66ab3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=J0Pvvxspmz", "openreview": "https://openreview.net/forum?id=J0Pvvxspmz", "poster": "/media/PosterPDFs/NeurIPS%202023/72082.png?t=1699239001.6394668", "slides": "https://nips.cc/virtual/2023/poster/72082", "video": "https://nips.cc/virtual/2023/poster/72082", "author_site": "JINGYANG XIANG, Siqi Li, Jun Chen, Guang Dai, Shipeng Bai, Yukai Ma, Yong Liu", "tldr": "", "abstract": "The study of sparsity in Convolutional Neural Networks (CNNs) has become widespread to compress and accelerate models in environments with limited resources. By constraining N consecutive weights along the output channel to be group-wise non-zero, the recent network with 1$\\times$N sparsity has received tremendous popularity for its three outstanding advantages: 1) A large amount of storage space saving by a \\emph{Block Sparse Row} matrix. 2) Excellent performance at a high sparsity. 3) Significant speedups on CPUs with Advanced Vector Extensions. Recent work requires selecting and fine-tuning 1$\\times$N sparse weights based on dense pre-trained weights, leading to the problems such as expensive training cost and memory access, sub-optimal model quality, as well as unbalanced workload across threads (different sparsity across output channels). To overcome them, this paper proposes a novel \\emph{\\textbf{S}oft \\textbf{U}niform \\textbf{B}lock \\textbf{P}runing} (SUBP) approach to train a uniform 1$\\times$N sparse structured network from scratch. Specifically, our approach tends to repeatedly allow pruned blocks to regrow to the network based on block angular redundancy and importance sampling in a uniform manner throughout the training process. It not only makes the model less dependent on pre-training, reduces the model redundancy and the risk of pruning the important blocks permanently but also achieves balanced workload. Empirically, on ImageNet, comprehensive experiments across various CNN architectures show that our SUBP consistently outperforms existing 1$\\times$N and structured sparsity methods based on pre-trained models or training from scratch. Source codes and models are available at \\url{https://github.com/JingyangXiang/SUBP}.", "keywords": "Soft Uniform Block Pruning;Block Angular Redundancy;Hardware Acceleration", "primary_area": "", "supplementary_material": "/attachment/305bbe1d58071045c9c07edbc38df08489000b4c.pdf", "author": "Jingyang Xiang;Siqi Li;Jun Chen;Guang Dai;Shipeng Bai;Yukai Ma;Yong Liu", "authorids": "~Jingyang_Xiang2;~Siqi_Li5;~Jun_Chen9;~Guang_Dai1;~Shipeng_Bai1;~Yukai_Ma1;~Yong_Liu11", "gender": ";F;M;M;M;;M", "homepage": ";https://april.zju.edu.cn/team/siqi-li/;;;https://april.zju.edu.cn/team/shipeng-bai/;https://april.zju.edu.cn/team/yukai-ma/;https://person.zju.edu.cn/en/yongliu", "dblp": ";;;;;;29/4867-7", "google_scholar": ";;YKc2O78AAAAJ;;;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=qYcgBbEAAAAJ", "orcid": ";;0000-0001-6568-8801;0000-0002-3529-9087;;0000-0001-8135-9012;0000-0003-4822-8939", "linkedin": ";;;;;;", "or_profile": "~Jingyang_Xiang2;~Siqi_Li5;~Jun_Chen9;~Guang_Dai1;~Shipeng_Bai1;~Yukai_Ma1;~Yong_Liu11", "aff": ";Zhejiang University;Zhejiang University;SGIT AI;zhejiang university;Zhejiang University;Zhejiang University", "aff_domain": ";zju.edu.cn;zju.edu.cn;sgcc.com.cn;zju.edu;zju.edu.cn;zju.edu.cn", "position": ";PhD student;PhD student;Principal Researcher;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nxiang2023subp,\ntitle={{SUBP}: Soft Uniform Block Pruning for 1\\${\\textbackslash}times\\$N Sparse {CNN}s Multithreading Acceleration},\nauthor={Jingyang Xiang and Siqi Li and Jun Chen and Guang Dai and Shipeng Bai and Yukai Ma and Yong Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=J0Pvvxspmz}\n}", "github": "", "project": "", "reviewers": "nWst;cAVo;Ah3e;Njjh", "pdf_size": 1541843, "rating": "3;5;6;6", "confidence": "5;5;5;4", "soundness": "2;3;3;3", "novelty": "1;3;3;3", "presentation": "1;3;3;2", "wc_summary": "52;91;70;37", "wc_strengths": "20;52;64;34", "wc_weaknesses": "331;198;44;15", "wc_questions": "4;98;29;105", "wc_limitations": "18;22;42;29", "wc_review": "425;461;249;220", "wc_reply_reviewers": "276;0;36;0", "wc_reply_authors": "657;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 62.5, 20.180436070610565 ], "wc_strengths_avg": [ 42.5, 16.815171720800237 ], "wc_weaknesses_avg": [ 147.0, 126.97440687004605 ], "wc_questions_avg": [ 59.0, 43.47988040461933 ], "wc_limitations_avg": [ 27.75, 9.12071817347735 ], "wc_review_avg": [ 338.75, 105.5233978793329 ], "wc_reply_reviewers_avg": [ 78.0, 115.25623627379127 ], "wc_reply_authors_avg": [ 164.25, 284.4893451431881 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12996027343112151140&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";zju.edu.cn;zju.edu.cn;sgcc.com.cn;zju.edu;zju.edu.cn;zju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Zhejiang University;SGIT AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;", "aff_unique_abbr": "ZJU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "A graphon-signal analysis of graph neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72081", "id": "J0RD92Tmfc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cb7943be26bb34f036c7e4068c490903-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=J0RD92Tmfc", "openreview": "https://openreview.net/forum?id=J0RD92Tmfc", "poster": "/media/PosterPDFs/NeurIPS%202023/72081.png?t=1701794186.975845", "slides": "https://nips.cc/virtual/2023/poster/72081", "video": "https://nips.cc/virtual/2023/poster/72081", "tldr": "", "abstract": "We present an approach for analyzing message passing graph neural networks (MPNNs) based on an extension of graphon analysis to a so called graphon-signal analysis. A MPNN is a function that takes a graph and a signal on the graph (a graph-signal) and returns some value. Since the input space of MPNNs is non-Euclidean, i.e., graphs can be of any size and topology, properties such as generalization are less well understood for MPNNs than for Euclidean neural networks. We claim that one important missing ingredient in past work is a meaningful notion of graph-signal similarity measure, that endows the space of inputs to MPNNs with a regular structure. We present such a similarity measure, called the graphon-signal cut distance, which makes the space of all graph-signals a dense subset of a compact metric space -- the graphon-signal space. Informally, two deterministic graph-signals are close in cut-distance if they ``look like'' they were sampled from the same random graph-signal model. Hence, our cut distance is a natural notion of graph-signal similarity, which allows comparing any pair of graph-signals of any size and topology. We prove that MPNNs are Lipschitz continuous functions over the graphon-signal metric space. We then give two applications of this result: 1) a generalization bound for MPNNs, and, 2) the stability of MPNNs to subsampling of graph-signals. Our results apply to any regular enough MPNN on any distribution of graph-signals, making the analysis rather universal.", "keywords": "graph neural network;graphon;generalization;stability;sampling;Szemer\u00e9di regularity lemma", "primary_area": "", "supplementary_material": "/attachment/355d2efd39298712e001fb4d9e727c4d07c24229.pdf", "author": "Ron Levie", "authorids": "~Ron_Levie1", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nlevie2023a,\ntitle={A graphon-signal analysis of graph neural networks},\nauthor={Ron Levie},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=J0RD92Tmfc}\n}", "github": "", "project": "", "reviewers": "9vxX;3Bfr;KJfc;Leoh", "pdf_size": 662619, "rating": "5;6;7;7", "confidence": "3;2;3;2", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;2;3", "wc_summary": "78;40;98;126", "wc_strengths": "35;25;60;45", "wc_weaknesses": "230;100;56;55", "wc_questions": "42;8;68;64", "wc_limitations": "8;10;42;15", "wc_review": "393;183;324;305", "wc_reply_reviewers": "126;0;35;73", "wc_reply_authors": "347;0;0;174", "reply_reviewers": "1;0;1;2", "reply_authors": "2;1;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 85.5, 31.316928329579195 ], "wc_strengths_avg": [ 41.25, 12.93010054098575 ], "wc_weaknesses_avg": [ 110.25, 71.48557546806208 ], "wc_questions_avg": [ 45.5, 23.806511714234826 ], "wc_limitations_avg": [ 18.75, 13.663363421939708 ], "wc_review_avg": [ 301.25, 75.71781494470109 ], "wc_reply_reviewers_avg": [ 58.5, 46.74665763453041 ], "wc_reply_authors_avg": [ 130.25, 143.89644714168588 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10784707527826339889&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "", "author_num": 1 }, { "title": "Quantifying & Modeling Multimodal Interactions: An Information Decomposition Framework", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72080", "id": "J1gBijopla", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/575286a73f238b6516ce0467d67eadb2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=J1gBijopla", "openreview": "https://openreview.net/forum?id=J1gBijopla", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72080", "video": "https://nips.cc/virtual/2023/poster/72080", "author_site": "Paul Pu Liang, Yun Cheng, Xiang Fan, Chun Kai Ling, Suzanne Nie, Richard Chen, Zihao Deng, Nicholas Allen, Randy Auerbach, Faisal Mahmood, Russ Salakhutdinov, Louis-Philippe Morency", "tldr": "", "abstract": "The recent explosion of interest in multimodal applications has resulted in a wide selection of datasets and methods for representing and integrating information from different modalities. Despite these empirical advances, there remain fundamental research questions: How can we quantify the interactions that are necessary to solve a multimodal task? Subsequently, what are the most suitable multimodal models to capture these interactions? To answer these questions, we propose an information-theoretic approach to quantify the degree of redundancy, uniqueness, and synergy relating input modalities with an output task. We term these three measures as the PID statistics of a multimodal distribution (or PID for short), and introduce two new estimators for these PID statistics that scale to high-dimensional distributions. To validate PID estimation, we conduct extensive experiments on both synthetic datasets where the PID is known and on large-scale multimodal benchmarks where PID estimations are compared with human annotations. Finally, we demonstrate their usefulness in (1) quantifying interactions within multimodal datasets, (2) quantifying interactions captured by multimodal models, (3) principled approaches for model selection, and (4) three real-world case studies engaging with domain experts in pathology, mood prediction, and robotic perception where our framework helps to recommend strong multimodal models for each application.", "keywords": "multimodal learning;feature interactions;partial information decomposition;information theory;quantification;model selection", "primary_area": "", "supplementary_material": "", "author": "Paul Pu Liang;Yun Cheng;Xiang Fan;Chun Kai Ling;Suzanne Nie;Richard J. Chen;Zihao Deng;Nicholas Allen;Randy Auerbach;Faisal Mahmood;Ruslan Salakhutdinov;Louis-Philippe Morency", "authorids": "~Paul_Pu_Liang1;~Yun_Cheng2;~Xiang_Fan1;~Chun_Kai_Ling2;~Suzanne_Nie1;~Richard_J._Chen1;~Zihao_Deng2;~Nicholas_Allen1;~Randy_Auerbach1;~Faisal_Mahmood1;~Ruslan_Salakhutdinov1;~Louis-Philippe_Morency1", "gender": "M;F;;M;;M;M;;M;M;M;M", "homepage": "https://pliang279.github.io/;https://kapikantzari.github.io;https://xiangfan.io/;https://lingchunkai.github.io/;;http://richarizardd.me;;https://psychology.uoregon.edu/profile/nallen3;https://www.auerbachlab.com/;http://www.mahmoodlab.org;https://www.cs.cmu.edu/~morency/;https://www.cs.cmu.edu/~rsalakhu/", "dblp": "207/9749;;;172/1134;338/7155;244/1941;;;;;31/739;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;m4xFU6EAAAAJ;foMuvWEAAAAJ;;yhGqdMgAAAAJ;;ksVPCBsAAAAJ;iEsTa0cAAAAJ;9MsdbKoAAAAJ;https://scholar.google.com.tw/citations?user=APgaFK0AAAAJ;", "orcid": ";0009-0009-8916-6667;0009-0002-7221-3710;;0000-0002-8423-7464;0000-0003-0389-1331;;0000-0002-1086-6639;;0000-0001-7587-1562;0000-0001-6376-7696;", "linkedin": ";cheng-yun/;;;;richardchen95;https://www.linkedin.cn/in/zihao-deng-32b1401b5;nick-allen-42256218a/;;;morency?challengeId=AQELGK_OvMa0vwAAAY72L-VV4X9hW8juuY80VHVeeSGHZ1PJHeeEa5LTFoeTmDGU0t1OL07MXJTYC9EAi6qgPDd2z9ztnbdFYA&submissionId=09a0ff34-04ac-c717-bef7-8c9c8811b463&challengeSource=AgFhxWkU3q7v4wAAAY72L-1xRE0eG-BnZUNE9e3eAG95pgOCZ9u1nxEg-1dK2Dw&challegeType=AgHMzV0lqKgEFwAAAY72L-11X6DHMd3V_A3Iur8XZeyYF2-oBzoufs8&memberId=AgH4yz7pZ_riCgAAAY72L-146jmR2pdr3dmhy2icxBtEQzQ&recognizeDevice=AgFDCNyrhKiFSAAAAY72L-16m7z2EH2t0ueWmMKjyk1_ZJAkfFVe;", "or_profile": "~Paul_Pu_Liang1;~Yun_Cheng2;~Xiang_Fan1;~Chun_Kai_Ling2;~Suzanne_Nie1;~Richard_J._Chen1;~Zihao_Deng2;~Nicholas_Allen1;~Randy_Auerbach1;~Faisal_Mahmood1;~Louis-Philippe_Morency1;~Russ_Salakhutdinov1", "aff": "Carnegie Mellon University;Princeton University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Machine Learning Department, School of Computer Science;Harvard University;University of Pennsylvania;University of Oregon;;Harvard University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University", "aff_domain": "cs.cmu.edu;princeton.edu;cmu.edu;cs.cmu.edu;mld.cs.cmu.edu;harvard.edu;seas.upenn.edu;uoregon.edu;;harvard.edu;cmu.edu;cs.cmu.edu", "position": "PhD student;PhD student;Undergrad student;PhD student;MS student;PhD student;MS student;Full Professor;;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nliang2023quantifying,\ntitle={Quantifying \\& Modeling Multimodal Interactions: An Information Decomposition Framework},\nauthor={Paul Pu Liang and Yun Cheng and Xiang Fan and Chun Kai Ling and Suzanne Nie and Richard J. Chen and Zihao Deng and Nicholas Allen and Randy Auerbach and Faisal Mahmood and Ruslan Salakhutdinov and Louis-Philippe Morency},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=J1gBijopla}\n}", "github": "", "project": "", "reviewers": "xXH1;td1f;oDWq;uMC4;4r5p", "pdf_size": 3391102, "rating": "6;6;6;7;7", "confidence": "2;3;4;4;3", "soundness": "2;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;4;3;4;3", "wc_summary": "84;144;167;189;59", "wc_strengths": "71;81;178;112;26", "wc_weaknesses": "117;313;177;246;28", "wc_questions": "11;99;92;355;23", "wc_limitations": "7;6;1;38;41", "wc_review": "290;643;615;940;177", "wc_reply_reviewers": "46;63;44;109;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 128.6, 49.38258802452541 ], "wc_strengths_avg": [ 93.6, 50.400793644544926 ], "wc_weaknesses_avg": [ 176.2, 99.05029025702045 ], "wc_questions_avg": [ 116.0, 124.62744481052317 ], "wc_limitations_avg": [ 18.6, 17.21162397916013 ], "wc_review_avg": [ 533.0, 272.1242363333336 ], "wc_reply_reviewers_avg": [ 52.4, 35.13744441475504 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.32732683535398854, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3692342246022861234&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.cmu.edu;princeton.edu;cmu.edu;cs.cmu.edu;mld.cs.cmu.edu;harvard.edu;seas.upenn.edu;uoregon.edu;;harvard.edu;cmu.edu;cs.cmu.edu", "author_num": 12, "aff_unique_index": "0;1;0;0;0;2;3;4;2;0;0", "aff_unique_norm": "Carnegie Mellon University;Princeton University;Harvard University;University of Pennsylvania;University of Oregon", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.cmu.edu;https://www.princeton.edu;https://www.harvard.edu;https://www.upenn.edu;https://www.uoregon.edu", "aff_unique_abbr": "CMU;Princeton;Harvard;UPenn;UO", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DesCo: Learning Object Recognition with Rich Language Descriptions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72079", "id": "J2Cso0wWZX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/761c3284ee4859bff3c7e5d9299a45ee-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=J2Cso0wWZX", "openreview": "https://openreview.net/forum?id=J2Cso0wWZX", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72079", "video": "https://nips.cc/virtual/2023/poster/72079", "author_site": "Liunian Li, Zi-Yi Dou, Nanyun Peng, Kai-Wei Chang", "tldr": "", "abstract": "Recent development in vision-language approaches has instigated a paradigm shift in learning visual recognition models from language supervision. These approaches align objects with language queries (e.g. \"a photo of a cat\") and thus improve the models' adaptability to novel objects and domains. Recent studies have attempted to query these models with complex language expressions that include specifications of fine-grained details, such as colors, shapes, and relations. However, simply incorporating language descriptions into queries does not guarantee accurate interpretation by the models. In fact, our experiments show that GLIP, a state-of-the-art vision-language model for object detection, often disregards contextual information in the language descriptions and instead relies heavily on detecting objects solely by their names. To tackle the challenge, we propose a new description-conditioned (DesCo) paradigm of learning object recognition models with rich language descriptions consisting of two innovations: 1) we employ a large language model as a commonsense knowledge engine to generate rich language descriptions of objects; 2) we design context-sensitive queries to improve the model's ability in deciphering intricate nuances embedded within descriptions and enforce the model to focus on context rather than object names alone. On two novel object detection benchmarks, LVIS and OminiLabel, under the zero-shot detection setting, our approach achieves 34.8 APr minival (+9.1) and 29.3 AP (+3.6), respectively, surpassing the prior state-of-the-art models, GLIP and FIBER, by a large margin.", "keywords": "Vision language;fine-grained recognition;object detection", "primary_area": "", "supplementary_material": "", "author": "Liunian Harold Li;Zi-Yi Dou;Nanyun Peng;Kai-Wei Chang", "authorids": "~Liunian_Harold_Li1;~Zi-Yi_Dou1;~Nanyun_Peng1;~Kai-Wei_Chang1", "gender": "M;;F;M", "homepage": ";https://zdou0830.github.io/;https://violetpeng.github.io/;http://kwchang.net", "dblp": "236/6323;205/8985;117/4036;18/2428", "google_scholar": "ntbhn9UAAAAJ;RWogNsEAAAAJ;XxRXvX0AAAAJ;fqDBtzYAAAAJ", "orcid": ";;;0000-0001-5365-0072", "linkedin": ";;;kai-wei-chang-41239040", "or_profile": "~Liunian_Harold_Li1;~Zi-Yi_Dou1;~Nanyun_Peng1;~Kai-Wei_Chang1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;Amazon", "aff_domain": "cs.ucla.edu;ucla.edu;ucla.edu;amazon.com", "position": "PhD student;PhD student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nli2023desco,\ntitle={DesCo: Learning Object Recognition with Rich Language Descriptions},\nauthor={Liunian Harold Li and Zi-Yi Dou and Nanyun Peng and Kai-Wei Chang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=J2Cso0wWZX}\n}", "github": "", "project": "", "reviewers": "9ZAR;xbf9;hkVU;Wg2t;BSUS", "pdf_size": 5522653, "rating": "4;5;6;7;8", "confidence": "3;5;5;3;4", "soundness": "3;3;3;3;4", "novelty": "2;2;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "72;282;120;100;53", "wc_strengths": "32;84;82;138;30", "wc_weaknesses": "152;298;228;14;71", "wc_questions": "97;108;10;77;148", "wc_limitations": "12;6;14;7;1", "wc_review": "365;778;454;336;303", "wc_reply_reviewers": "0;41;0;160;43", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 125.4, 81.59803919212764 ], "wc_strengths_avg": [ 73.2, 39.891853805006356 ], "wc_weaknesses_avg": [ 152.6, 102.65982661197124 ], "wc_questions_avg": [ 88.0, 45.35636669752109 ], "wc_limitations_avg": [ 8.0, 4.604345773288535 ], "wc_review_avg": [ 447.2, 172.85300113101886 ], "wc_reply_reviewers_avg": [ 48.8, 58.69037399778604 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8415527594779391178&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "cs.ucla.edu;ucla.edu;ucla.edu;amazon.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of California, Los Angeles;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://www.amazon.com", "aff_unique_abbr": "UCLA;Amazon", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Stability-penalty-adaptive follow-the-regularized-leader: Sparsity, game-dependency, and best-of-both-worlds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72078", "id": "J3taqrzyyA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9408564a4229f4a933ac9bd09a29ee96-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=J3taqrzyyA", "openreview": "https://openreview.net/forum?id=J3taqrzyyA", "poster": "/media/PosterPDFs/NeurIPS%202023/72078.png?t=1701956846.5642602", "slides": "https://nips.cc/virtual/2023/poster/72078", "video": "https://nips.cc/virtual/2023/poster/72078", "author_site": "Taira Tsuchiya, Shinji Ito, Junya Honda", "tldr": "", "abstract": "Adaptivity to the difficulties of a problem is a key property in sequential decision-making problems to broaden the applicability of algorithms. Follow-the-regularized-leader (FTRL) has recently emerged as one of the most promising approaches for obtaining various types of adaptivity in bandit problems. Aiming to further generalize this adaptivity, we develop a generic adaptive learning rate, called stability-penalty-adaptive (SPA) learning rate for FTRL. This learning rate yields a regret bound jointly depending on stability and penalty of the algorithm, into which the regret of FTRL is typically decomposed. With this result, we establish several algorithms with three types of adaptivity: sparsity, game-dependency, and best-of-both-worlds (BOBW). Despite the fact that sparsity appears frequently in real problems, existing sparse multi-armed bandit algorithms with $k$-arms assume that the sparsity level $s \\leq k$ is known in advance, which is often not the case in real-world scenarios. To address this issue, we first establish $s$-agnostic algorithms with regret bounds of $\\tilde{O}(\\sqrt{sT})$ in the adversarial regime for $T$ rounds, which matches the existing lower bound up to a logarithmic factor. Meanwhile, BOBW algorithms aim to achieve a near-optimal regret in both the stochastic and adversarial regimes. Leveraging the SPA learning rate and the technique for $s$-agnostic algorithms combined with a new analysis to bound the variation in FTRL output in response to changes in a regularizer, we establish the first BOBW algorithm with a sparsity-dependent bound. Additionally, we explore partial monitoring and demonstrate that the proposed SPA learning rate framework allows us to achieve a game-dependent bound and the BOBW simultaneously.", "keywords": "follow-the-regularized-leader;adaptive learning rate;multi-armed bandits;partial monitoring;data-dependent bound;sparsity;game-dependency;best-of-both-worlds", "primary_area": "", "supplementary_material": "/attachment/ad2f71f7e9fb460b203d5bb1f4826ac4b1542ca4.pdf", "author": "Taira Tsuchiya;Shinji Ito;Junya Honda", "authorids": "~Taira_Tsuchiya1;~Shinji_Ito1;~Junya_Honda1", "gender": "M;M;M", "homepage": "https://tsuchhiii.github.io/;https://researchmap.jp/shinji_ito?lang=en;http://stat.sys.i.kyoto-u.ac.jp/honda/index.html", "dblp": "226/5536;49/852;56/9070", "google_scholar": "https://scholar.google.co.jp/citations?view_op=list_works;https://scholar.google.co.jp/citations?user=GX0V06wAAAAJ;https://scholar.google.co.jp/citations?user=Aw8OrxQAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Taira_Tsuchiya1;~Shinji_Ito1;~Junya_Honda1", "aff": "Kyoto University;NEC;Kyoto University", "aff_domain": "kyoto-u.ac.jp;nec.com;kyoto-u.ac.jp", "position": "PhD student;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\ntsuchiya2023stabilitypenaltyadaptive,\ntitle={Stability-penalty-adaptive follow-the-regularized-leader: Sparsity, game-dependency, and best-of-both-worlds},\nauthor={Taira Tsuchiya and Shinji Ito and Junya Honda},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=J3taqrzyyA}\n}", "github": "", "project": "", "reviewers": "WVsf;x1zD;TasV;Utqx;1eun", "pdf_size": 537308, "rating": "6;6;6;7;7", "confidence": "1;1;1;3;3", "soundness": "3;3;3;4;3", "novelty": "3;3;3;3;2", "presentation": "4;3;2;3;2", "wc_summary": "84;50;95;80;104", "wc_strengths": "30;51;58;28;45", "wc_weaknesses": "70;39;63;46;44", "wc_questions": "1;34;30;1;3", "wc_limitations": "1;7;1;1;5", "wc_review": "186;181;247;156;201", "wc_reply_reviewers": "22;79;26;10;0", "wc_reply_authors": "0;466;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 1.8, 0.9797958971132713 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 82.6, 18.347751905887545 ], "wc_strengths_avg": [ 42.4, 11.706408501329518 ], "wc_weaknesses_avg": [ 52.4, 11.943198901466893 ], "wc_questions_avg": [ 13.8, 14.931845163944072 ], "wc_limitations_avg": [ 3.0, 2.5298221281347035 ], "wc_review_avg": [ 194.2, 30.115776596329038 ], "wc_reply_reviewers_avg": [ 27.4, 27.375901811629873 ], "wc_reply_authors_avg": [ 93.2, 186.39999999999998 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6915612400401896019&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": "kyoto-u.ac.jp;nec.com;kyoto-u.ac.jp", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Kyoto University;NEC Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.kyoto-u.ac.jp;https://www.nec.com", "aff_unique_abbr": "Kyoto U;NEC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "title": "Kernel Quadrature with Randomly Pivoted Cholesky", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72077", "id": "J66ptjMkAG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cf7ba4b2d14e0f6a0e8247af77745094-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=J66ptjMkAG", "openreview": "https://openreview.net/forum?id=J66ptjMkAG", "poster": "/media/PosterPDFs/NeurIPS%202023/72077.png?t=1701217835.3285584", "slides": "https://nips.cc/virtual/2023/poster/72077", "video": "https://nips.cc/virtual/2023/poster/72077", "author_site": "Ethan Epperly, Elvira Moreno", "tldr": "", "abstract": "This paper presents new quadrature rules for functions in a reproducing kernel Hilbert space using nodes drawn by a sampling algorithm known as randomly pivoted Cholesky. The resulting computational procedure compares favorably to previous kernel quadrature methods, which either achieve low accuracy or require solving a computationally challenging sampling problem. Theoretical and numerical results show that randomly pivoted Cholesky is fast and achieves comparable quadrature error rates to more computationally expensive quadrature schemes based on continuous volume sampling, thinning, and recombination. Randomly pivoted Cholesky is easily adapted to complicated geometries with arbitrary kernels, unlocking new potential for kernel quadrature.", "keywords": "kernel quadrature;Nystr\u00f6m approximation;reproducing kernel Hilbert space;randomly pivoted Cholesky", "primary_area": "", "supplementary_material": "/attachment/2805a46cc40c89664f8406042347a1f36c8b4404.zip", "author": "Ethan Nicholas Epperly;Elvira Moreno Ferreira", "authorids": "~Ethan_Nicholas_Epperly1;~Elvira_Moreno_Ferreira1", "gender": "M;F", "homepage": "https://www.ethanepperly.com;", "dblp": "254/1116;345/0474", "google_scholar": "Z4fYpcgAAAAJ;", "orcid": "0000-0003-0712-8296;0000-0002-0477-7744", "linkedin": ";elvira-moreno-677b0a15a/", "or_profile": "~Ethan_Nicholas_Epperly1;~Elvira_Moreno_Ferreira1", "aff": "California Institute of Technology;California Institute of Technology", "aff_domain": "caltech.edu;caltech.edu", "position": "PhD student;PhD student", "bibtex": "@inproceedings{\nepperly2023kernel,\ntitle={Kernel Quadrature with Randomly Pivoted Cholesky},\nauthor={Ethan Nicholas Epperly and Elvira Moreno Ferreira},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=J66ptjMkAG}\n}", "github": "", "project": "", "reviewers": "dsgV;PPmk;yiWH;4xjP;D4rs", "pdf_size": 1137295, "rating": "4;6;6;7;8", "confidence": "5;3;3;2;4", "soundness": "3;3;3;4;4", "novelty": "2;3;2;3;3", "presentation": "4;3;3;3;4", "wc_summary": "217;76;53;92;86", "wc_strengths": "61;65;24;37;73", "wc_weaknesses": "325;35;95;153;76", "wc_questions": "85;88;27;101;99", "wc_limitations": "1;52;7;19;5", "wc_review": "689;316;206;402;339", "wc_reply_reviewers": "10;73;10;27;6", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 1.32664991614216 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 104.8, 57.652059807087554 ], "wc_strengths_avg": [ 52.0, 18.439088914585774 ], "wc_weaknesses_avg": [ 136.8, 101.47787936294293 ], "wc_questions_avg": [ 80.0, 27.202941017470888 ], "wc_limitations_avg": [ 16.8, 18.594622878671135 ], "wc_review_avg": [ 390.4, 162.1586877105263 ], "wc_reply_reviewers_avg": [ 25.2, 24.97518768698245 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5026155365907202, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3238207201809236216&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "caltech.edu;caltech.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "California Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.caltech.edu", "aff_unique_abbr": "Caltech", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pasadena", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Glance and Focus: Memory Prompting for Multi-Event Video Question Answering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72076", "id": "J6Niv3yrMq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6baec7c4ba0a8734ccbd528a8090cb1f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=J6Niv3yrMq", "openreview": "https://openreview.net/forum?id=J6Niv3yrMq", "poster": "/media/PosterPDFs/NeurIPS%202023/72076.png?t=1702050530.830262", "slides": "https://nips.cc/virtual/2023/poster/72076", "video": "https://nips.cc/virtual/2023/poster/72076", "author_site": "Ziyi Bai, Ruiping Wang, Xilin Chen", "tldr": "", "abstract": "Video Question Answering (VideoQA) has emerged as a vital tool to evaluate agents\u2019 ability to understand human daily behaviors. Despite the recent success of large vision language models in many multi-modal tasks, complex situation reasoning over videos involving multiple human-object interaction events still remains challenging. In contrast, humans can easily tackle it by using a series of episode memories as anchors to quickly locate question-related key moments for reasoning. To mimic this effective reasoning strategy, we propose the Glance- Focus model. One simple way is to apply an action detection model to predict a set of actions as key memories. However, these actions within a closed set vocabulary are hard to generalize to various video domains. Instead of that, we train an Encoder-Decoder to generate a set of dynamic event memories at the glancing stage. Apart from using supervised bipartite matching to obtain the event memories, we further design an unsupervised memory generation method to get rid of dependence on event annotations. Next, at the focusing stage, these event memories act as a bridge to establish the correlation between the questions with high-level event concepts and low-level lengthy video content. Given the question, the model first focuses on the generated key event memory, then focuses on the most relevant moment for reasoning through our designed multi-level cross- attention mechanism. We conduct extensive experiments on four Multi-Event VideoQA benchmarks including STAR, EgoTaskQA, AGQA, and NExT-QA. Our proposed model achieves state-of-the-art results, surpassing current large models in various challenging reasoning tasks. The code and models are available at https://github.com/ByZ0e/Glance-Focus.", "keywords": "Video Question Answering; Multi-Event Reasoning; Spatial-Temporal Reasoning", "primary_area": "", "supplementary_material": "/attachment/3f2dc639bc18506c3386cb14b0cd6dab22b22d98.pdf", "author": "Ziyi Bai;Ruiping Wang;Xilin CHEN", "authorids": "~Ziyi_Bai1;~Ruiping_Wang1;~Xilin_CHEN2", "gender": "F;M;M", "homepage": ";https://rpwang.net/;http://vipl.ict.ac.cn/people/_xlchen/", "dblp": "311/2526;60/1529-1;c/XilinChen", "google_scholar": "jRe11usAAAAJ;duIUwpwAAAAJ;vVx2v20AAAAJ", "orcid": "0009-0004-5917-5400;0000-0003-1830-2595;0000-0003-3024-4404", "linkedin": ";;", "or_profile": "~Ziyi_Bai1;~Ruiping_Wang1;~Xilin_Chen4", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nbai2023glance,\ntitle={Glance and Focus: Memory Prompting for Multi-Event Video Question Answering},\nauthor={Ziyi Bai and Ruiping Wang and Xilin CHEN},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=J6Niv3yrMq}\n}", "github": "", "project": "", "reviewers": "yVqG;xLg6;16Tf;veHn;sLiX;izc9", "pdf_size": 1248414, "rating": "3;6;6;6;7;7", "confidence": "4;3;4;4;4;5", "soundness": "2;2;3;3;3;3", "novelty": "2;2;2;3;3;3", "presentation": "2;2;3;3;2;3", "wc_summary": "198;25;187;98;154;113", "wc_strengths": "74;37;61;92;123;208", "wc_weaknesses": "256;168;294;216;129;82", "wc_questions": "35;52;5;42;4;48", "wc_limitations": "9;1;12;164;21;11", "wc_review": "572;283;559;612;431;462", "wc_reply_reviewers": "64;13;16;726;13;104", "wc_reply_authors": "357;30;43;864;37;68", "reply_reviewers": "1;1;1;2;1;1", "reply_authors": "2;2;2;3;2;2", "rating_avg": [ 5.833333333333333, 1.3437096247164253 ], "confidence_avg": [ 4.0, 0.5773502691896257 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 129.16666666666666, 58.8541039822675 ], "wc_strengths_avg": [ 99.16666666666667, 55.4058260073393 ], "wc_weaknesses_avg": [ 190.83333333333334, 72.70125323694374 ], "wc_questions_avg": [ 31.0, 19.4593593591019 ], "wc_limitations_avg": [ 36.333333333333336, 57.3924114224946 ], "wc_review_avg": [ 486.5, 110.61155756369524 ], "wc_reply_reviewers_avg": [ 156.0, 257.08818201802535 ], "wc_reply_authors_avg": [ 233.16666666666666, 304.54524385640235 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.3726779962499649 ], "reply_authors_avg": [ 2.1666666666666665, 0.3726779962499649 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.21483446221182986, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2633548321297512289&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ict.ac.cn;ict.ac.cn;ict.ac.cn", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Chinese Academy of Sciences;Institute of Computing Technology", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;http://www.ict.ac.cn", "aff_unique_abbr": "CAS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Unlocking Feature Visualization for Deep Network with MAgnitude Constrained Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72075", "id": "J7VoDuzuKs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/76d2f8e328e1081c22a77ca0fa330ca5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=J7VoDuzuKs", "openreview": "https://openreview.net/forum?id=J7VoDuzuKs", "poster": "/media/PosterPDFs/NeurIPS%202023/72075.png?t=1701676704.616556", "slides": "https://nips.cc/virtual/2023/poster/72075", "video": "https://nips.cc/virtual/2023/poster/72075", "author_site": "Thomas FEL, Thibaut Boissin, Victor Boutin, Agustin PICARD, Paul Novello, Julien Colin, Drew Linsley, Tom ROUSSEAU, Remi Cadene, Lore Goetschalckx, Laurent Gardes, Thomas Serre", "tldr": "", "abstract": "Feature visualization has gained significant popularity as an explainability method, particularly after the influential work by Olah et al. in 2017. Despite its success, its widespread adoption has been limited due to issues in scaling to deeper neural networks and the reliance on tricks to generate interpretable images. Here, we describe MACO, a simple approach to address these shortcomings. It consists in optimizing solely an image's phase spectrum while keeping its magnitude constant to ensure that the generated explanations lie in the space of natural images. Our approach yields significantly better results -- both qualitatively and quantitatively -- unlocking efficient and interpretable feature visualizations for state-of-the-art neural networks. We also show that our approach exhibits an attribution mechanism allowing to augment feature visualizations with spatial importance. Furthermore, we enable quantitative evaluation of feature visualizations by introducing 3 metrics: transferability, plausibility, and alignment with natural images. We validate our method on various applications and we introduce a website featuring MACO visualizations for all classes of the ImageNet dataset, which will be made available upon acceptance. \nOverall, our study unlocks feature visualizations for the largest, state-of-the-art classification networks without resorting to any parametric prior image model, effectively advancing a field that has been stagnating since 2017 (Olah et al, 2017).", "keywords": "explainable AI;feature visualization;interpretability;optimization", "primary_area": "", "supplementary_material": "/attachment/2dd6b93146a14b108ad4fadec2ab559ebe23e4eb.pdf", "author": "Thomas FEL;Thibaut Boissin;Victor Boutin;Agustin Martin Picard;Paul Novello;Julien Colin;Drew Linsley;Tom ROUSSEAU;Remi Cadene;Lore Goetschalckx;Laurent Gardes;Thomas Serre", "authorids": "~Thomas_FEL1;~Thibaut_Boissin1;~Victor_Boutin2;~Agustin_Martin_Picard1;~Paul_Novello1;~Julien_Colin2;~Drew_Linsley1;~Tom_ROUSSEAU1;~Remi_Cadene1;~Lore_Goetschalckx1;l.gardes@sncf.fr;~Thomas_Serre1", "gender": "M;M;M;M;M;M;;M;M;F;;M", "homepage": "https://thomasfel.me;;;;;;;https://github.com/Tom-Rousseau;http://remicadene.com;https://loregoetschalckx.github.io/;;https://serre-lab.clps.brown.edu/", "dblp": "274/2390;;228/3333;;283/7771;308/6238;194/2308;349/3907;;249/8615;;", "google_scholar": "1m5Mlx4AAAAJ;zC-MstIAAAAJ;Z-YF5FsAAAAJ;ABDSUgEAAAAJ;https://scholar.google.fr/citations?user=uaJK95oAAAAJ;https://scholar.google.com/citations?hl=fr;cXZlAuQAAAAJ;;2n5nHU4AAAAJ;9nZ0bZkAAAAJ;;kZlPW4wAAAAJ", "orcid": ";;0000-0003-3372-5940;;0000-0002-1053-8694;0000-0003-0279-7095;;;;0000-0002-9638-7881;;", "linkedin": ";;;;paul-novello-a036b1a1/;;;tom-r-611100b4/;;lore-goetschalckx/;;", "or_profile": "~Thomas_FEL1;~Thibaut_Boissin1;~Victor_Boutin2;~Agustin_Martin_Picard1;~Paul_Novello1;~Julien_Colin2;~Drew_Linsley1;~Tom_ROUSSEAU1;~Remi_Cadene1;~Lore_Goetschalckx1;l.gardes@sncf.fr;~Thomas_Serre1", "aff": "Brown University;IRT Saint exup\u00e9ry;Brown University;Scalian;IRT Saint Exupery;Universidad de Alicante;Brown University;Soci\u00e9t\u00e9 Nationale des Chemins de fer Fran\u00e7ais;;Brown University;;Universit\u00e9 de Toulouse", "aff_domain": "brown.edu;irt-saintexupery.com;brown.edu;scalian.com;irt-saintexupery.com;ua.es;brown.edu;sncf.fr;;brown.edu;;univ-toulouse.fr", "position": "PhD student;Researcher;Postdoc;Researcher;Researcher;PhD student;Assistant Professor;Researcher;;Postdoc;;Full Professor", "bibtex": "@inproceedings{\nfel2023unlocking,\ntitle={Unlocking Feature Visualization for Deep Network with {MA}gnitude Constrained Optimization},\nauthor={Thomas FEL and Thibaut Boissin and Victor Boutin and Agustin Martin Picard and Paul Novello and Julien Colin and Drew Linsley and Tom ROUSSEAU and Remi Cadene and Lore Goetschalckx and Laurent Gardes and Thomas Serre},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=J7VoDuzuKs}\n}", "github": "", "project": "", "reviewers": "UD3m;qNv3;6q7R;Gqq4;mBBq", "pdf_size": 8588706, "rating": "4;4;4;5;7", "confidence": "3;5;4;4;5", "soundness": "3;2;2;3;3", "novelty": "2;2;2;3;4", "presentation": "3;3;3;3;3", "wc_summary": "81;44;93;108;114", "wc_strengths": "80;18;39;80;168", "wc_weaknesses": "183;176;300;316;255", "wc_questions": "20;154;60;9;171", "wc_limitations": "1;15;47;1;52", "wc_review": "365;407;539;514;760", "wc_reply_reviewers": "62;441;120;0;258", "wc_reply_authors": "150;297;192;0;91", "reply_reviewers": "1;1;2;0;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 4.8, 1.16619037896906 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.0, 24.843510218968657 ], "wc_strengths_avg": [ 77.0, 51.42761903880054 ], "wc_weaknesses_avg": [ 246.0, 57.9068217052188 ], "wc_questions_avg": [ 82.8, 67.46673254278734 ], "wc_limitations_avg": [ 23.2, 22.130521909796887 ], "wc_review_avg": [ 517.0, 137.6560932178449 ], "wc_reply_reviewers_avg": [ 176.2, 157.51622138687813 ], "wc_reply_authors_avg": [ 146.0, 99.27134531172628 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.5041841733655162, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11940152377448489317&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 26, "email": "brown.edu;irt-saintexupery.com;brown.edu;scalian.com;irt-saintexupery.com;ua.es;brown.edu;sncf.fr;;brown.edu;;univ-toulouse.fr", "author_num": 12, "aff_unique_index": "0;1;0;2;3;4;0;5;0;6", "aff_unique_norm": "Brown University;IRT Saint Exup\u00e9ry;Scalian;IRT Saint Exupery;Universidad de Alicante;Soci\u00e9t\u00e9 Nationale des Chemins de fer Fran\u00e7ais;Universit\u00e9 de Toulouse", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.brown.edu;;https://www.scalian.com;;https://www.ua.es;https://www.sncf.fr;https://www.univ-toulouse.fr", "aff_unique_abbr": "Brown;;;;UA;SNCF;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1;2;0;1;0;1", "aff_country_unique": "United States;France;Spain" }, { "title": "LLM-Pruner: On the Structural Pruning of Large Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72074", "id": "J8Ajf9WfXP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/44956951349095f74492a5471128a7e0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=J8Ajf9WfXP", "openreview": "https://openreview.net/forum?id=J8Ajf9WfXP", "poster": "/media/PosterPDFs/NeurIPS%202023/72074.png?t=1701504890.7937734", "slides": "https://nips.cc/virtual/2023/poster/72074", "video": "https://nips.cc/virtual/2023/poster/72074", "author_site": "Xinyin Ma, Gongfan Fang, Xinchao Wang", "tldr": "", "abstract": "Large language models (LLMs) have shown remarkable capabilities in language understanding and generation. However, such impressive capability typically comes with a substantial model size, which presents significant challenges in both the deployment, inference, and training stages. With LLM being a general-purpose task solver, we explore its compression in a task-agnostic manner, which aims to preserve the multi-task solving and language generation ability of the original LLM. One challenge to achieving this is the enormous size of the training corpus of LLM, which makes both data transfer and model post-training over-burdensome. Thus, we tackle the compression of LLMs within the bound of two constraints: being task-agnostic and minimizing the reliance on the original training dataset. Our method, named LLM-pruner, adopts structural pruning that selectively removes non-critical coupled structures based on gradient information, maximally preserving the majority of the LLM's functionality. To this end, the performance of pruned models can be efficiently recovered through tuning techniques, LoRA, in merely 3 hours, requiring only 50K data. We validate the LLM-Pruner on three LLMs, including LLaMA, Vicuna, and ChatGLM, and demonstrate that the compressed models still exhibit satisfactory capabilities in zero-shot classification and generation. The code will be made public.", "keywords": "model compression;structural pruning;large language model", "primary_area": "", "supplementary_material": "", "author": "Xinyin Ma;Gongfan Fang;Xinchao Wang", "authorids": "~Xinyin_Ma1;~Gongfan_Fang2;~Xinchao_Wang1", "gender": "F;M;M", "homepage": "https://horseee.github.io;https://fangggf.github.io/;https://sites.google.com/site/sitexinchaowang/", "dblp": "267/2244;243/5768;", "google_scholar": "jFUKS0oAAAAJ;489YZ_kAAAAJ;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Xinyin_Ma1;~Gongfan_Fang2;~Xinchao_WANG3", "aff": "National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;u.nus.edu;nus.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nma2023llmpruner,\ntitle={{LLM}-Pruner: On the Structural Pruning of Large Language Models},\nauthor={Xinyin Ma and Gongfan Fang and Xinchao Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=J8Ajf9WfXP}\n}", "github": "", "project": "", "reviewers": "mPK3;3DoF;ahc6;AFsw;VdKx", "pdf_size": 1386468, "rating": "4;4;6;7;7", "confidence": "4;4;4;3;3", "soundness": "3;2;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;3;3;4", "wc_summary": "104;59;70;75;40", "wc_strengths": "76;20;70;50;35", "wc_weaknesses": "161;84;40;169;23", "wc_questions": "25;30;96;23;1", "wc_limitations": "30;1;1;0;1", "wc_review": "396;194;277;317;100", "wc_reply_reviewers": "0;306;13;15;22", "wc_reply_authors": "320;802;33;35;29", "reply_reviewers": "0;2;1;1;1", "reply_authors": "3;4;2;2;2", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 69.6, 20.981896959045432 ], "wc_strengths_avg": [ 50.2, 20.97999046710937 ], "wc_weaknesses_avg": [ 95.4, 60.268067830319566 ], "wc_questions_avg": [ 35.0, 32.08114711166046 ], "wc_limitations_avg": [ 6.6, 11.706408501329518 ], "wc_review_avg": [ 256.8, 101.92036106686436 ], "wc_reply_reviewers_avg": [ 71.2, 117.61530512650128 ], "wc_reply_authors_avg": [ 243.8, 300.52181285224543 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8427009716003845, "gs_citation": 621, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=412717662846503802&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "u.nus.edu;u.nus.edu;nus.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Make Pre-trained Model Reversible: From Parameter to Memory Efficient Fine-Tuning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72073", "id": "J8McuwS3zY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3151e460c41ba67dc55412861184ef35-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=J8McuwS3zY", "openreview": "https://openreview.net/forum?id=J8McuwS3zY", "poster": "/media/PosterPDFs/NeurIPS%202023/72073.png?t=1701618693.6560912", "slides": "https://nips.cc/virtual/2023/poster/72073", "video": "https://nips.cc/virtual/2023/poster/72073", "author_site": "Baohao Liao, Shaomu Tan, Christof Monz", "tldr": "", "abstract": "Parameter-efficient fine-tuning (PEFT) of pre-trained language models (PLMs) has emerged as a highly successful approach, with training only a small number of parameters without sacrificing performance and becoming the de-facto learning paradigm with the increasing size of PLMs. However, existing PEFT methods are not memory-efficient, because they still require caching most of the intermediate activations for the gradient calculation, akin to fine-tuning. One effective way to reduce the activation memory is to apply a reversible model, so the intermediate activations are not necessary to be cached and can be recomputed. Nevertheless, modifying a PLM to its reversible variant is not straightforward, since the reversible model has a distinct architecture from the currently released PLMs. In this paper, we first investigate what is a key factor for the success of existing PEFT methods, and realize that it's essential to preserve the PLM's starting point when initializing a PEFT method. With this finding, we propose memory-efficient fine-tuning (MEFT) that inserts adapters into a PLM, preserving the PLM's starting point and making it reversible without additional pre-training. We evaluate MEFT on the GLUE benchmark and five question-answering tasks with various backbones, BERT, RoBERTa, BART and OPT. MEFT significantly reduces the activation memory up to 84% of full fine-tuning with a negligible amount of trainable parameters. Moreover, MEFT achieves the same score on GLUE and a comparable score on the question-answering tasks as full fine-tuning. A similar finding is also observed for the image classification task.", "keywords": "large language model;parameter-efficient learning;memory-efficient learning;reversible neural network", "primary_area": "", "supplementary_material": "/attachment/e20a67c71635e2cd332fc702c21731466d40bf4d.pdf", "author": "Baohao Liao;Shaomu Tan;Christof Monz", "authorids": "~Baohao_Liao1;~Shaomu_Tan1;c.monz@uva.nl", "gender": "M;Non-Binary;", "homepage": "https://baohaoliao.github.io/;https://smu-tan.github.io/;", "dblp": "234/4096;336/3005;", "google_scholar": "Fbys5c8AAAAJ;KJRzX-gAAAAJ;", "orcid": "0000-0001-8335-4573;;", "linkedin": "baohaoliao;shaomutan/;", "or_profile": "~Baohao_Liao1;~Shaomu_Tan1;c.monz@uva.nl", "aff": "University of Amsterdam;University of Amsterdam;", "aff_domain": "uva.nl;uva.nl;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\nliao2023make,\ntitle={Make Pre-trained Model Reversible: From Parameter to Memory Efficient Fine-Tuning},\nauthor={Baohao Liao and Shaomu Tan and Christof Monz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=J8McuwS3zY}\n}", "github": "", "project": "", "reviewers": "dsz7;X2di;mzDQ", "pdf_size": 811160, "rating": "4;6;6", "confidence": "4;4;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;2", "wc_summary": "84;250;61", "wc_strengths": "34;82;27", "wc_weaknesses": "230;306;60", "wc_questions": "22;295;6", "wc_limitations": "48;29;1", "wc_review": "418;962;155", "wc_reply_reviewers": "213;22;0", "wc_reply_authors": "582;48;27", "reply_reviewers": "1;1;0", "reply_authors": "2;2;2", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 131.66666666666666, 84.19949854693647 ], "wc_strengths_avg": [ 47.666666666666664, 24.44494948973214 ], "wc_weaknesses_avg": [ 198.66666666666666, 102.84400268799776 ], "wc_questions_avg": [ 107.66666666666667, 132.6256217914003 ], "wc_limitations_avg": [ 26.0, 19.30457631409368 ], "wc_review_avg": [ 511.6666666666667, 336.04794631454337 ], "wc_reply_reviewers_avg": [ 78.33333333333333, 95.64633930382398 ], "wc_reply_authors_avg": [ 219.0, 256.8228961755552 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18115159696274827348&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "uva.nl;uva.nl;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "title": "Grounded Decoding: Guiding Text Generation with Grounded Models for Embodied Agents", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72072", "id": "JCCi58IUsh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bb3cfcb0284642a973dd631ec9184f2f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JCCi58IUsh", "openreview": "https://openreview.net/forum?id=JCCi58IUsh", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72072", "video": "https://nips.cc/virtual/2023/poster/72072", "author_site": "Wenlong Huang, Fei Xia, Dhruv Shah, Danny Driess, Andy Zeng, Yao Lu, Pete Florence, Igor Mordatch, Sergey Levine, Karol Hausman, brian ichter", "tldr": "", "abstract": "Recent progress in large language models (LLMs) has demonstrated the ability to learn and leverage Internet-scale knowledge through pre-training with autoregressive models. Unfortunately, applying such models to settings with embodied agents, such as robots, is challenging due to their lack of experience with the physical world, inability to parse non-language observations, and ignorance of rewards or safety constraints that robots may require. On the other hand, language-conditioned robotic policies that learn from interaction data can provide the necessary grounding that allows the agent to be correctly situated in the real world, but such policies are limited by the lack of high-level semantic understanding due to the limited breadth of the interaction data available for training them. Thus, if we want to make use of the semantic knowledge in a language model while still situating it in an embodied setting, we must construct an action sequence that is both likely according to the language model and also realizable according to grounded models of the environment. We frame this as a problem similar to probabilistic filtering: decode a sequence that both has high probability under the language model and high probability under a set of grounded model objectives. We demonstrate how such grounded models can be obtained across three simulation and real-world domains, and that the proposed decoding strategy is able to solve complex, long-horizon embodiment tasks in a robotic setting by leveraging the knowledge of both models.", "keywords": "robotics;language models;embodied agents", "primary_area": "", "supplementary_material": "", "author": "Wenlong Huang;Fei Xia;Dhruv Shah;Danny Driess;Andy Zeng;Yao Lu;Pete Florence;Igor Mordatch;Sergey Levine;Karol Hausman;brian ichter", "authorids": "~Wenlong_Huang1;~Fei_Xia1;~Dhruv_Shah1;~Danny_Driess1;~Andy_Zeng3;~Yao_Lu13;~Pete_Florence1;~Igor_Mordatch5;~Sergey_Levine1;~Karol_Hausman2;~brian_ichter1", "gender": "M;M;M;;;;;M;;M;M", "homepage": "https://wenlong.page;;http://cs.berkeley.edu/~shah;https://dannydriess.github.io/;;http://www.peteflorence.com/;;https://people.eecs.berkeley.edu/~svlevine/;;http://andyzeng.github.io/;https://karolhausman.github.io/", "dblp": "82/2872;;;;26/5662-6;;;80/7594;;http://dblp.uni-trier.de/pers/hd/z/Zeng:Andy;135/8164", "google_scholar": "hYVMrzsAAAAJ;pqP5_PgAAAAJ;;https://scholar.google.de/citations?user=wxnzyjwAAAAJ;OI7zFmwAAAAJ;;Vzr1RukAAAAJ;8R35rCwAAAAJ;-w5DuHgAAAAJ;q7nFtUcAAAAJ;yy0UFOwAAAAJ", "orcid": ";0000-0003-4343-1444;;;;;;;;;", "linkedin": ";;;;;;;;;;karolhausman/", "or_profile": "~Wenlong_Huang1;~Fei_Xia1;~Dhruv_Shah1;~Danny_Driess1;~Yao_Lu13;~Pete_Florence1;~Igor_Mordatch5;~Sergey_Levine1;~brian_ichter1;~Andy_Zeng1;~Karol_Hausman1", "aff": "Stanford University;Google;UC Berkeley;Technische Universit\u00e4t Berlin;Google;Google;Research, Google;Google;Google;Google;Google Brain", "aff_domain": "stanford.edu;google.com;berkeley.edu;tu-berlin.de;google.com;google.com;research.google.com;google.com;google.com;google.com;google.com", "position": "PhD student;Researcher;PhD student;PhD student;Researcher;Research Scientist;Researcher;Research Scientist;Research Scientist;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nhuang2023grounded,\ntitle={Grounded Decoding: Guiding Text Generation with Grounded Models for Embodied Agents},\nauthor={Wenlong Huang and Fei Xia and Dhruv Shah and Danny Driess and Andy Zeng and Yao Lu and Pete Florence and Igor Mordatch and Sergey Levine and Karol Hausman and brian ichter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JCCi58IUsh}\n}", "github": "", "project": "", "reviewers": "XxHZ;TJzA;WRL4;VNBy;pMcP", "pdf_size": 2943626, "rating": "5;6;6;6;6", "confidence": "4;3;4;4;4", "soundness": "3;3;3;4;3", "novelty": "2;2;2;4;2", "presentation": "4;2;3;4;4", "wc_summary": "167;90;78;77;54", "wc_strengths": "211;73;35;123;100", "wc_weaknesses": "152;128;75;198;119", "wc_questions": "25;36;76;46;94", "wc_limitations": "8;7;20;12;29", "wc_review": "563;334;284;456;396", "wc_reply_reviewers": "20;15;11;11;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 93.2, 38.69573619922485 ], "wc_strengths_avg": [ 108.4, 59.0782531901545 ], "wc_weaknesses_avg": [ 134.4, 40.40099008687782 ], "wc_questions_avg": [ 55.4, 25.70291812226775 ], "wc_limitations_avg": [ 15.2, 8.280096617793781 ], "wc_review_avg": [ 406.6, 97.27815787729534 ], "wc_reply_reviewers_avg": [ 11.4, 6.590902821313632 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.2500000000000001, "gs_citation": 142, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16775018457982535023&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "stanford.edu;google.com;berkeley.edu;tu-berlin.de;google.com;google.com;research.google.com;google.com;google.com;google.com;google.com", "author_num": 11, "aff_unique_index": "0;1;2;3;1;1;1;1;1;1;1", "aff_unique_norm": "Stanford University;Google;University of California, Berkeley;Technische Universit\u00e4t Berlin", "aff_unique_dep": ";Google;;", "aff_unique_url": "https://www.stanford.edu;https://www.google.com;https://www.berkeley.edu;https://www.tu-berlin.de", "aff_unique_abbr": "Stanford;Google;UC Berkeley;TU Berlin", "aff_campus_unique_index": "0;1;2;1;1;1;1;1;1;1", "aff_campus_unique": "Stanford;Mountain View;Berkeley;", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0;0;0", "aff_country_unique": "United States;Germany" }, { "title": "Deep Non-line-of-sight Imaging from Under-scanning Measurements", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72071", "id": "JCN9YsZiwB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b91cc0a242e6518ee731f74e82b2eebd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JCN9YsZiwB", "openreview": "https://openreview.net/forum?id=JCN9YsZiwB", "poster": "/media/PosterPDFs/NeurIPS%202023/72071.png?t=1698668729.2963157", "slides": "https://nips.cc/virtual/2023/poster/72071", "video": "https://nips.cc/virtual/2023/poster/72071", "author_site": "Yue Li, Yueyi Zhang, Juntian Ye, Feihu Xu, Zhiwei Xiong", "tldr": "", "abstract": "Active confocal non-line-of-sight (NLOS) imaging has successfully enabled seeing around corners relying on high-quality transient measurements. However, acquiring spatial-dense transient measurement is time-consuming, raising the question of how to reconstruct satisfactory results from under-scanning measurements (USM). The existing solutions, involving the traditional algorithms, however, are hindered by unsatisfactory results or long computing times. To this end, we propose the first deep-learning-based approach to NLOS imaging from USM. Our proposed end-to-end network is composed of two main components: the transient recovery network (TRN) and the volume reconstruction network (VRN). Specifically, TRN takes the under-scanning measurements as input, utilizes a multiple kernel feature extraction module and a multiple feature fusion module, and outputs sufficient-scanning measurements at the high-spatial resolution. Afterwards, VRN incorporates the linear physics prior of the light-path transport model and reconstructs the hidden volume representation. Besides, we introduce regularized constraints that enhance the perception of more local details while suppressing smoothing effects. The proposed method achieves superior performance on both synthetic data and public real-world data, as demonstrated by extensive experimental results with different under-scanning grids. Moreover, the proposed method delivers impressive robustness at an extremely low scanning grid (i.e., 8$\\times$8) and offers high-speed inference (i.e., 50 times faster than the existing iterative solution).", "keywords": "Non-line-of-sight imaging;Transient Recovery;Volume Reconstruction", "primary_area": "", "supplementary_material": "/attachment/040a41954217c4335ebadf2126e3cb7e0fcfe135.pdf", "author": "Yue Li;Yueyi Zhang;Juntian Ye;Feihu Xu;Zhiwei Xiong", "authorids": "~Yue_Li11;~Yueyi_Zhang2;~Juntian_Ye1;~Feihu_Xu1;~Zhiwei_Xiong1", "gender": ";;M;M;M", "homepage": ";;http://quantum.ustc.edu.cn/web/node/659;https://quantum.ustc.edu.cn/web/en/node/475;", "dblp": ";;;;54/6827", "google_scholar": ";LatWlFAAAAAJ;;;Snl0HPEAAAAJ", "orcid": ";;;;", "linkedin": ";;https://www.linkedin.cn/incareer/in/ACoAADB0h1IBzCDl1QrA9Xrx5XQzxhJsOcO73uI;;", "or_profile": "~Yue_Li11;~Yueyi_Zhang2;~Juntian_Ye1;~Feihu_Xu1;~Zhiwei_Xiong1", "aff": ";University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;USTC", "aff_domain": ";ustc.edu.cn;ustc.edu.cn;ustc.edu;ustc.edu.cn", "position": ";Associate Researcher;PhD student;Full Professor;Professor", "bibtex": "@inproceedings{\nli2023deep,\ntitle={Deep Non-line-of-sight Imaging from Under-scanning Measurements},\nauthor={Yue Li and Yueyi Zhang and Juntian Ye and Feihu Xu and Zhiwei Xiong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JCN9YsZiwB}\n}", "github": "", "project": "", "reviewers": "VzCp;9P8E;iKMu;r4bJ", "pdf_size": 1960997, "rating": "5;5;5;5", "confidence": "4;4;3;2", "soundness": "3;3;2;3", "novelty": "2;2;3;2", "presentation": "3;3;2;3", "wc_summary": "237;52;68;74", "wc_strengths": "89;67;23;68", "wc_weaknesses": "56;170;49;61", "wc_questions": "117;4;21;5", "wc_limitations": "1;4;50;1", "wc_review": "500;297;211;209", "wc_reply_reviewers": "53;16;0;32", "wc_reply_authors": "0;0;0;213", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 107.75, 75.05456348550699 ], "wc_strengths_avg": [ 61.75, 24.0351305384431 ], "wc_weaknesses_avg": [ 84.0, 49.83472684785179 ], "wc_questions_avg": [ 36.75, 46.82080200082011 ], "wc_limitations_avg": [ 14.0, 20.820662813657012 ], "wc_review_avg": [ 304.25, 118.46808641992999 ], "wc_reply_reviewers_avg": [ 25.25, 19.613452016409553 ], "wc_reply_authors_avg": [ 53.25, 92.23170550304272 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14663896331733099559&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";ustc.edu.cn;ustc.edu.cn;ustc.edu;ustc.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Improving Few-Shot Generalization by Exploring and Exploiting Auxiliary Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72070", "id": "JDnLXc4NOn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4e3c5399729e06d2f0c22d04416904ab-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JDnLXc4NOn", "openreview": "https://openreview.net/forum?id=JDnLXc4NOn", "poster": "/media/PosterPDFs/NeurIPS%202023/72070.png?t=1699139173.4481535", "slides": "https://nips.cc/virtual/2023/poster/72070", "video": "https://nips.cc/virtual/2023/poster/72070", "author_site": "Alon Albalak, Colin Raffel, William Yang Wang", "tldr": "", "abstract": "Few-shot learning is valuable in many real-world applications, but learning a generalizable model without overfitting to the few labeled datapoints is challenging.\nIn this work, we focus on Few-shot Learning with Auxiliary Data (FLAD), a training paradigm that assumes access to auxiliary data during few-shot learning in hopes of improving generalization.\nPrevious works have proposed automated methods for mixing auxiliary and target data, but these methods typically scale linearly (or worse) with the number of auxiliary datasets, limiting their practicality.\nIn this work we relate FLAD to the explore-exploit dilemma that is central to the multi-armed bandit setting and derive algorithms whose computational complexity is independent of the number of auxiliary datasets, allowing us to scale to 100x more auxiliary datasets than prior methods.\nWe propose two algorithms -- EXP3-FLAD and UCB1-FLAD -- and compare them with prior FLAD methods that either explore or exploit, finding that the combination of exploration and exploitation is crucial.\nThrough extensive experimentation we find that our methods outperform all pre-existing FLAD methods by 4% and lead to the first 3 billion parameter language models that outperform the 175 billion parameter GPT-3.\nOverall, our work suggests that the discovery of better, more efficient mixing strategies for FLAD may provide a viable path towards substantially improving generalization in few-shot learning.", "keywords": "Few-shot learning;natural language processing;few shot learning;NLP;multi-armed bandit;multi armed bandit", "primary_area": "", "supplementary_material": "/attachment/c932e1be0038668837124e8c64d3a4da3759b339.zip", "author": "Alon Albalak;Colin Raffel;William Yang Wang", "authorids": "~Alon_Albalak1;~Colin_Raffel1;~William_Yang_Wang2", "gender": ";;M", "homepage": "https://alon-albalak.github.io/;http://colinraffel.com;https://www.cs.ucsb.edu/~william/", "dblp": "283/4427;149/0082;08/9282", "google_scholar": "F6J_7d8AAAAJ;I66ZBYwAAAAJ;gf8Ms_8AAAAJ", "orcid": "0000-0003-0809-1704;;", "linkedin": "alonalbalak;;", "or_profile": "~Alon_Albalak1;~Colin_Raffel1;~William_Wang1", "aff": "University of California, Santa Barbara;University of North Carolina, Chapel Hill;UC Santa Barbara", "aff_domain": "ucsb.edu;unc.edu;ucsb.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nalbalak2023improving,\ntitle={Improving Few-Shot Generalization by Exploring and Exploiting Auxiliary Data},\nauthor={Alon Albalak and Colin Raffel and William Yang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JDnLXc4NOn}\n}", "github": "", "project": "", "reviewers": "EHmY;jiND;5GTn;yskH", "pdf_size": 1991903, "rating": "5;6;7;7", "confidence": "4;3;2;3", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "2;2;3;4", "wc_summary": "47;74;120;183", "wc_strengths": "55;13;165;80", "wc_weaknesses": "79;20;125;17", "wc_questions": "7;137;125;69", "wc_limitations": "1;58;23;5", "wc_review": "189;302;558;354", "wc_reply_reviewers": "0;30;9;23", "wc_reply_authors": "134;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 106.0, 51.55094567512802 ], "wc_strengths_avg": [ 78.25, 55.51294893986447 ], "wc_weaknesses_avg": [ 60.25, 44.81838350498599 ], "wc_questions_avg": [ 84.5, 51.58245825859795 ], "wc_limitations_avg": [ 21.75, 22.509720122649238 ], "wc_review_avg": [ 350.75, 133.69999065071022 ], "wc_reply_reviewers_avg": [ 15.5, 11.715374513859981 ], "wc_reply_authors_avg": [ 33.5, 58.023702053557386 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=617016302851502674&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "ucsb.edu;unc.edu;ucsb.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Santa Barbara;University of North Carolina", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsb.edu;https://www.unc.edu", "aff_unique_abbr": "UCSB;UNC", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Santa Barbara;Chapel Hill", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Adversarial Examples Might be Avoidable: The Role of Data Concentration in Adversarial Robustness", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72069", "id": "JDoA6admhv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/92d21245424f3898b7110f555a00e829-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JDoA6admhv", "openreview": "https://openreview.net/forum?id=JDoA6admhv", "poster": "/media/PosterPDFs/NeurIPS%202023/72069.png?t=1702165190.095367", "slides": "https://nips.cc/virtual/2023/poster/72069", "video": "https://nips.cc/virtual/2023/poster/72069", "author_site": "Ambar Pal, Jeremias Sulam, Rene Vidal", "tldr": "", "abstract": "The susceptibility of modern machine learning classifiers to adversarial examples has motivated theoretical results suggesting that these might be unavoidable. However, these results can be too general to be applicable to natural data distributions. Indeed, humans are quite robust for tasks involving vision. This apparent conflict motivates a deeper dive into the question: Are adversarial examples truly unavoidable? \nIn this work, we theoretically demonstrate that a key property of the data distribution -- concentration on small-volume subsets of the input space -- determines whether a robust classifier exists. We further demonstrate that, for a data distribution concentrated on a union of low-dimensional linear subspaces, utilizing structure in data naturally leads to classifiers that enjoy data-dependent polyhedral robustness guarantees, improving upon methods for provable certification in certain regimes.", "keywords": "Adversarial Robustness;Geometry in Data;Low Dimensional Modeling", "primary_area": "", "supplementary_material": "", "author": "Ambar Pal;Jeremias Sulam;Rene Vidal", "authorids": "~Ambar_Pal1;~Jeremias_Sulam1;~Rene_Vidal1", "gender": "M;M;", "homepage": "http://www.cis.jhu.edu/~ambar/;;http://www.vision.jhu.edu", "dblp": "170/0102;156/3028;v/ReneVidal", "google_scholar": "gO1731YAAAAJ;1awx1aIAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;rene-vidal-74844928/", "or_profile": "~Ambar_Pal1;~Jeremias_Sulam1;~Rene_Vidal1", "aff": "Johns Hopkins University;Johns Hopkins University;Amazon", "aff_domain": "jhu.edu;jhu.edu;amazon.com", "position": "PhD student;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\npal2023adversarial,\ntitle={Adversarial Examples Might be Avoidable: The Role of Data Concentration in Adversarial Robustness},\nauthor={Ambar Pal and Jeremias Sulam and Rene Vidal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JDoA6admhv}\n}", "github": "", "project": "", "reviewers": "Ui3o;kZpV;8n5k;eEv4", "pdf_size": 1696530, "rating": "5;6;7;8", "confidence": "4;3;2;4", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "153;88;111;64", "wc_strengths": "80;29;224;77", "wc_weaknesses": "268;137;570;100", "wc_questions": "126;4;114;239", "wc_limitations": "1;10;14;34", "wc_review": "628;268;1033;514", "wc_reply_reviewers": "0;0;48;79", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 104.0, 32.810059433045836 ], "wc_strengths_avg": [ 102.5, 73.00856114182774 ], "wc_weaknesses_avg": [ 268.75, 184.78822338017108 ], "wc_questions_avg": [ 120.75, 83.1966796212445 ], "wc_limitations_avg": [ 14.75, 12.07010770457331 ], "wc_review_avg": [ 610.75, 276.3289479949576 ], "wc_reply_reviewers_avg": [ 31.75, 33.58850249713434 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.1348399724926484, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6001039242596674213&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "email": "jhu.edu;jhu.edu;amazon.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Johns Hopkins University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.jhu.edu;https://www.amazon.com", "aff_unique_abbr": "JHU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Partial Multi-Label Learning with Probabilistic Graphical Disambiguation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72068", "id": "JDw50IX4TY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/04e05ba5cbc36044f6499d1edf15247e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JDw50IX4TY", "openreview": "https://openreview.net/forum?id=JDw50IX4TY", "poster": "/media/PosterPDFs/NeurIPS%202023/72068.png?t=1700727788.8420572", "slides": "https://nips.cc/virtual/2023/poster/72068", "video": "https://nips.cc/virtual/2023/poster/72068", "author_site": "Jun-Yi Hang, Min-Ling Zhang", "tldr": "", "abstract": "In partial multi-label learning (PML), each training example is associated with a set of candidate labels, among which only some labels are valid. As a common strategy to tackle PML problem, disambiguation aims to recover the ground-truth labeling information from such inaccurate annotations. However, existing approaches mainly rely on heuristics or ad-hoc rules to disambiguate candidate labels, which may not be universal enough in complicated real-world scenarios. To provide a principled way for disambiguation, we make a first attempt to explore the probabilistic graphical model for PML problem, where a directed graph is tailored to infer latent ground-truth labeling information from the generative process of partial multi-label data. Under the framework of stochastic gradient variational Bayes, a unified variational lower bound is derived for this graphical model, which is further relaxed probabilistically so that the desired prediction model can be induced with simultaneously identified ground-truth labeling information. Comprehensive experiments on multiple synthetic and real-world data sets show that our approach outperforms the state-of-the-art counterparts.", "keywords": "Machine learning;multi-label learning;partial multi-label learning;label disambiguation", "primary_area": "", "supplementary_material": "/attachment/1ba87dadeb445bde855d9a41928a33f7a9041099.pdf", "author": "Jun-Yi Hang;Min-Ling Zhang", "authorids": "~Jun-Yi_Hang1;~Min-Ling_Zhang2", "gender": "M;M", "homepage": ";http://palm.seu.edu.cn/zhangml/", "dblp": "https://dblp.uni-trier.de/pid/299/4577;84/271.html", "google_scholar": "https://scholar.google.com.hk/citations?user=s-4VLP0AAAAJ;uFHCIM0AAAAJ", "orcid": ";0000-0003-1880-5918", "linkedin": ";", "or_profile": "~Jun-Yi_Hang1;~Min-Ling_Zhang2", "aff": "Southeast University;Southeast University", "aff_domain": "seu.edu.cn;seu.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nhang2023partial,\ntitle={Partial Multi-Label Learning with Probabilistic Graphical Disambiguation},\nauthor={Jun-Yi Hang and Min-Ling Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JDw50IX4TY}\n}", "github": "", "project": "", "reviewers": "65gX;GJDR;uhwx;mCzx;fZq4", "pdf_size": 1611817, "rating": "2;5;5;6;7", "confidence": "5;3;5;2;3", "soundness": "2;3;2;3;3", "novelty": "2;2;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "85;80;60;53;70", "wc_strengths": "18;85;36;49;96", "wc_weaknesses": "237;121;203;126;97", "wc_questions": "86;117;7;86;11", "wc_limitations": "1;1;14;5;32", "wc_review": "427;404;320;319;306", "wc_reply_reviewers": "510;0;21;42;11", "wc_reply_authors": "131;0;27;73;21", "reply_reviewers": "2;0;1;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 5.0, 1.6733200530681511 ], "confidence_avg": [ 3.6, 1.2 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.6, 11.943198901466893 ], "wc_strengths_avg": [ 56.8, 29.430596324233733 ], "wc_weaknesses_avg": [ 156.8, 53.61492329566461 ], "wc_questions_avg": [ 61.4, 44.27459768309589 ], "wc_limitations_avg": [ 10.6, 11.706408501329518 ], "wc_review_avg": [ 355.2, 50.013598150902915 ], "wc_reply_reviewers_avg": [ 116.8, 197.08617404577114 ], "wc_reply_authors_avg": [ 50.4, 46.81709089638099 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6972166887783962, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17640154727088028701&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "seu.edu.cn;seu.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Southeast University", "aff_unique_dep": "", "aff_unique_url": "https://www.seu.edu.cn/", "aff_unique_abbr": "SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "JG4BshgCas", "title": "Generalizing to Unseen Domains for Regression", "track": "main", "status": "Reject", "tldr": "", "abstract": "In the context of classification, domain generalization (DG) aims to predict the labels of unseen target-domain data using only labeled source-domain data, where the source and target domains usually share the same label set. However, in the context of regression, DG is not well studied in the literature, and the main reason is that the ranges of response variables in two domains are often different, even disjoint under some extreme conditions. In this paper, we systematically investigate domain generalization in the regression setting and propose a weighted meta-learning strategy to obtain optimal initialization across domains to tackle the challenge. Unlike classification, the labels (responding values) in regression naturally have ordinal relatedness. The relatedness brings a core challenge in meta-learning for regression: the hard meta-tasks with less ordinal relatedness are under-sampled from training domains. To further address the hard meta-tasks, we adopt the feature discrepancy to calculate the discrepancy between any two domains and take the discrepancy as the importance of meta-tasks in the meta-learning framework. Extensive regression experiments on the standard benchmark DomainBed demonstrate the superiority of the proposed method.", "keywords": "domain generalization;regression;meta-learning", "primary_area": "", "supplementary_material": "/attachment/bfce6933fbe15151c712075385d0a27300185f51.zip", "author": "Ning Ma;Feng Liu;Haishuai Wang;Xiang Zhang;Hongyang Chen;Bo Han;Jiajun Bu", "authorids": "~Ning_Ma1;~Feng_Liu2;~Haishuai_Wang2;~Xiang_Zhang10;~Hongyang_Chen2;~Bo_Han1;~Jiajun_Bu1", "gender": "M;M;M;M;M;M;M", "homepage": "https://ningma-ai.github.io/;https://fengliu90.github.io/index.html;https://www.linkedin.com/in/haishuai-wang-b5241775/;http://xiangzhang.info/;https://www.linkedin.com/in/hongyangchen/;https://person.zju.edu.cn/bjj;https://bhanml.github.io/", "dblp": "60/3634/;77/1318-3;163/0767;https://dblp.uni-trier.de/pers/hd/z/Zhang_0012:Xiang;13/3715;50/3147;241/0472-3", "google_scholar": "ZjX-TDIAAAAJ;https://scholar.google.com/citations?hl=en;;0hCzMi4AAAAJ;https://scholar.google.ca/citations?user=s-HDT8UAAAAJ;OgZP2okAAAAJ;nTNjqHwAAAAJ", "orcid": ";0000-0002-5005-9129;0000-0003-1617-0920;;0000-0002-7626-0162;0000-0002-1097-2044;", "linkedin": ";alexfengliu;;;hongyangchen/;;", "or_profile": "~Ning_Ma1;~Feng_Liu2;~Haishuai_Wang2;~Xiang_Zhang10;~Hongyang_Chen2;~Jiajun_Bu1;~bo_han2", "aff": "Zhejiang University;University of Melbourne;Zhejiang University;University of North Carolina at Charlotte;Zhejiang Lab, China;Zhejiang University;RIKEN", "aff_domain": "zju.edu.cn;unimelb.edu.au;zju.edu.cn;uncc.edu;zhejianglab.com;zju.edu.cn;riken.jp", "position": "PhD student;Assistant Professor;Research Professor;Assistant Professor;Senior Research Expert;Full Professor;Adjunct Scientist", "bibtex": "@misc{\nma2023generalizing,\ntitle={Generalizing to Unseen Domains for Regression},\nauthor={Ning Ma and Feng Liu and Haishuai Wang and Xiang Zhang and Hongyang Chen and Bo Han and Jiajun Bu},\nyear={2023},\nurl={https://openreview.net/forum?id=JG4BshgCas}\n}", "github": "", "project": "", "reviewers": "phkn;2irV;tR9s;JfrS;ajvr", "site": "https://openreview.net/forum?id=JG4BshgCas", "pdf_size": 1849058, "rating": "3;5;6;7;7", "confidence": "5;4;4;4;4", "soundness": "2;3;3;3;4", "novelty": "2;2;3;3;4", "presentation": "2;3;3;3;3", "wc_summary": "76;112;36;147;197", "wc_strengths": "34;50;28;263;156", "wc_weaknesses": "153;228;20;234;35", "wc_questions": "24;11;6;48;109", "wc_limitations": "1;8;1;5;3", "wc_review": "288;409;91;697;500", "wc_reply_reviewers": "0;0;0;62;30", "wc_reply_authors": "43;265;0;30;81", "reply_reviewers": "0;0;0;1;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 5.6, 1.4966629547095764 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 113.6, 55.69416486491202 ], "wc_strengths_avg": [ 106.2, 91.16227289838709 ], "wc_weaknesses_avg": [ 134.0, 91.64496712858814 ], "wc_questions_avg": [ 39.6, 37.62233379257592 ], "wc_limitations_avg": [ 3.6, 2.65329983228432 ], "wc_review_avg": [ 397.0, 203.11080719646603 ], "wc_reply_reviewers_avg": [ 18.4, 24.703036250631218 ], "wc_reply_authors_avg": [ 83.8, 94.25794396229954 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8685990362153793, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_D0tCPIcth4J:scholar.google.com/&scioq=Generalizing+to+Unseen+Domains+for+Regression&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;3;0;4", "aff_unique_norm": "Zhejiang University;University of Melbourne;University of North Carolina at Charlotte;Zhejiang Lab;RIKEN", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.unimelb.edu.au;https://www.uncc.edu;http://www.zhejianglab.com;https://www.riken.jp", "aff_unique_abbr": "ZJU;UniMelb;UNCC;;RIKEN", "aff_campus_unique_index": "1", "aff_campus_unique": ";Charlotte", "aff_country_unique_index": "0;1;0;2;0;0;3", "aff_country_unique": "China;Australia;United States;Japan" }, { "title": "GeoDE: a Geographically Diverse Evaluation Dataset for Object Recognition", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73631", "id": "JGVSxwKHbq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d08b6801f24dda81199079a3371d77f9-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=JGVSxwKHbq", "openreview": "https://openreview.net/forum?id=JGVSxwKHbq", "poster": "/media/PosterPDFs/NeurIPS%202023/73631.png?t=1701378138.4452274", "slides": "https://nips.cc/virtual/2023/poster/73631", "video": "https://nips.cc/virtual/2023/poster/73631", "author_site": "Vikram V. Ramaswamy, Sing Yu Lin, Dora Zhao, Aaron Adcock, Laurens van der Maaten, Deepti Ghadiyaram, Olga Russakovsky", "tldr": "", "abstract": "Current dataset collection methods typically scrape large amounts of data from the web. While this technique is extremely scalable, data collected in this way tends to reinforce stereotypical biases, can contain personally identifiable information, and typically originates from Europe and North America. In this work, we rethink the dataset collection paradigm and introduce GeoDE, a geographically diverse dataset with 61,940 images from 40 classes and 6 world regions, and no personally identifiable information, collected by soliciting images from people across the world. We analyse GeoDE to understand differences in images collected in this manner compared to web-scraping. Despite the smaller size of this dataset, we demonstrate its use as both an evaluation and training dataset, allowing us to highlight shortcomings in current models, as well as demonstrate improved performance even when training on this small dataset. We release the full dataset and code at https://geodiverse-data-collection.cs.princeton.edu/", "keywords": "Crowdsourcing;geodiversity", "primary_area": "", "supplementary_material": "/attachment/43c6af12593136adc081332d3220c64ae9561625.pdf", "author": "Vikram V. Ramaswamy;Sing Yu Lin;Dora Zhao;Aaron Bryan Adcock;Laurens van der Maaten;Deepti Ghadiyaram;Olga Russakovsky", "authorids": "~Vikram_V._Ramaswamy1;~Sing_Yu_Lin1;~Dora_Zhao1;~Aaron_Bryan_Adcock1;~Laurens_van_der_Maaten3;~Deepti_Ghadiyaram2;~Olga_Russakovsky1", "gender": "Trans Male;;F;M;;F;F", "homepage": ";;https://dorazhao99.github.io;;;https://deeptigp.github.io/;http://cs.princeton.edu/~olgarus", "dblp": "280/0025;;295/8515;133/2099;;158/9332;52/6883", "google_scholar": "OoHs7BgAAAAJ;;I-OInyYAAAAJ;oa78zHUAAAAJ;;NyKCrmoAAAAJ;TB5OwW8AAAAJ", "orcid": ";;;;;;0000-0001-5272-3241", "linkedin": ";;;aaron-adcock-79855383/;;;", "or_profile": "~Vikram_V._Ramaswamy1;~Sing_Yu_Lin1;~Dora_Zhao1;~Aaron_Bryan_Adcock1;~Laurens_van_der_Maaten3;~Deepti_Ghadiyaram2;~Olga_Russakovsky1", "aff": "Princeton University;;Sony AI;Facebook / Meta;;Runway;Princeton University", "aff_domain": "princeton.edu;;sony.com;meta.com;;runway.com;princeton.edu", "position": "PhD student;;AI Engineer;Researcher;;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nramaswamy2023geode,\ntitle={Geo{DE}: a Geographically Diverse Evaluation Dataset for Object Recognition},\nauthor={Vikram V. Ramaswamy and Sing Yu Lin and Dora Zhao and Aaron Bryan Adcock and Laurens van der Maaten and Deepti Ghadiyaram and Olga Russakovsky},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=JGVSxwKHbq}\n}", "github": "", "project": "", "reviewers": "1Lpg;8Bhq;TgPt", "pdf_size": 19257286, "rating": "6;7;7", "confidence": "4;3;4", "wc_summary_and_contributions": "30;131;38", "wc_strengths": "19;95;65", "wc_improvement": "81;129;43", "wc_limitations": "25;8;12", "wc_correctness": "2;20;1", "wc_clarity": "1;20;3", "wc_relation_to_prior_work": "1;9;12", "wc_documentation": "15;18;1", "wc_additional_feedback": "1;1;1", "wc_review": "175;431;176", "wc_reply_reviewers": "41;20;0", "wc_reply_authors": "194;168;83", "reply_reviewers": "1;1;0", "reply_authors": "2;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 66.33333333333333, 45.84272631023983 ], "wc_strengths_avg": [ 59.666666666666664, 31.255221785949445 ], "wc_improvement_avg": [ 84.33333333333333, 35.188381921057726 ], "wc_limitations_avg": [ 15.0, 7.2571803523590805 ], "wc_correctness_avg": [ 7.666666666666667, 8.73053390247253 ], "wc_clarity_avg": [ 8.0, 8.524474568362947 ], "wc_relation_to_prior_work_avg": [ 7.333333333333333, 4.642796092394707 ], "wc_documentation_avg": [ 11.333333333333334, 7.408703590297622 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 260.6666666666667, 120.44454694542584 ], "wc_reply_reviewers_avg": [ 20.333333333333332, 16.73983937265296 ], "wc_reply_authors_avg": [ 148.33333333333334, 47.401359567937384 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5982885881847581082&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 9, "email": "princeton.edu;;sony.com;meta.com;;runway.com;princeton.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Princeton University;Sony;Meta;Runway", "aff_unique_dep": ";Sony AI;Meta Platforms, Inc.;", "aff_unique_url": "https://www.princeton.edu;https://www.sony.com;https://meta.com;https://www.runwayml.com", "aff_unique_abbr": "Princeton;Sony AI;Meta;Runway", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Japan" }, { "title": "DatasetDM: Synthesizing Data with Perception Annotations Using Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72067", "id": "JIKM2vS8XU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ab6e7ad2354f350b451b5a8e14d04f51-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JIKM2vS8XU", "openreview": "https://openreview.net/forum?id=JIKM2vS8XU", "poster": "/media/PosterPDFs/NeurIPS%202023/72067.png?t=1698936431.5827792", "slides": "https://nips.cc/virtual/2023/poster/72067", "video": "https://nips.cc/virtual/2023/poster/72067", "author_site": "Weijia Wu, Yuzhong Zhao, Hao Chen, Yuchao Gu, Rui Zhao, Yefei He, Hong Zhou, Mike Zheng Shou, Chunhua Shen", "tldr": "", "abstract": "Current deep networks are very data-hungry and benefit from training on large-scale datasets, which are often time-consuming to collect and annotate. By contrast, synthetic data can be generated infinitely using generative models such as DALL-E and diffusion models, with minimal effort and cost. In this paper, we present DatasetDM, a generic dataset generation model that can produce diverse synthetic\nimages and the corresponding high-quality perception annotations (e.g., segmentation masks, and depth). Our method builds upon the pre-trained diffusion model and extends text-guided image synthesis to perception data generation. We show that the rich latent code of the diffusion model can be effectively decoded as accurate perception annotations using a decoder module. Training the decoder only needs less than 1% (around 100 images) of manually labeled images, enabling the generation of an infinitely large annotated dataset. Then these synthetic data can be used for training various perception models on downstream tasks. To showcase the power of the proposed approach, we generate datasets with rich dense pixel-wise labels for a wide range of downstream tasks, including semantic15\nsegmentation, instance segmentation, and depth estimation. Notably, it achieves 1) state-of-the-art results on semantic segmentation and instance segmentation; 2) significantly more efficient and robust in domain generalization than the real data; 3) state-of-the-art results in zero-shot segmentation setting; and 4) flexibility for efficient application and novel task composition (e.g., image editing)", "keywords": "Diffusion Model; Text-guided dataset generation", "primary_area": "", "supplementary_material": "/attachment/240bda0f9c182ef967bd6fbc318817cfbea9c7c8.pdf", "author": "Weijia Wu;Yuzhong Zhao;Hao Chen;Yuchao Gu;Rui Zhao;Yefei He;Hong Zhou;Mike Zheng Shou;Chunhua Shen", "authorids": "~Weijia_Wu2;~Yuzhong_Zhao1;~Hao_Chen17;~Yuchao_Gu1;~Rui_Zhao12;~Yefei_He1;~Hong_Zhou3;~Mike_Zheng_Shou1;~Chunhua_Shen2", "gender": "M;M;;M;M;M;M;;", "homepage": "https://weijiawu.github.io/;https://callsys.github.io/zhaoyuzhong.github.io-main/;;https://ycgu.site/;;https://hexy.tech/;https://person.zju.edu.cn/zhouhong;;", "dblp": "87/7695-1;42/8750;;266/4395;26/2578-19;92/6254;45/3426;;", "google_scholar": "NgjTRe4AAAAJ;tStQNm4AAAAJ;;YpfrXyQAAAAJ;https://scholar.google.com.hk/citations?user=wYs7vogAAAAJ;CTEQwwwAAAAJ;;;", "orcid": "0000-0003-3912-7212;0000-0002-2425-6786;;;0000-0003-4271-0206;0000-0002-2171-4518;;;", "linkedin": "%E5%A8%81%E4%BD%B3-%E5%90%B4-07a852280/;;;;;;;;", "or_profile": "~Weijia_Wu2;~Yuzhong_Zhao1;~Hao_Chen17;~Yuchao_Gu1;~Rui_Zhao12;~Yefei_He1;~Hong_Zhou3;~Mike_Zheng_Shou1;~Chunhua_Shen2", "aff": "Zhejiang University;University of Chinese Academy of Sciences;;National University of Singapore;National University of Singapore;Zhejiang University;Zhejiang University;;", "aff_domain": "zju.edu.cn;ucas.ac.cn;;u.nus.edu;u.nus.edu;zju.edu.cn;zju.edu.cn;;", "position": "PhD student;MS student;;PhD student;PhD student;PhD student;Full Professor;;", "bibtex": "@inproceedings{\nwu2023datasetdm,\ntitle={Dataset{DM}: Synthesizing Data with Perception Annotations Using Diffusion Models},\nauthor={Weijia Wu and Yuzhong Zhao and Hao Chen and Yuchao Gu and Rui Zhao and Yefei He and Hong Zhou and Mike Zheng Shou and Chunhua Shen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JIKM2vS8XU}\n}", "github": "", "project": "", "reviewers": "DG4j;rde2;aPc6;PHiv;21oc", "pdf_size": 8795306, "rating": "4;4;5;5;5", "confidence": "5;4;4;4;5", "soundness": "3;3;2;3;2", "novelty": "1;2;2;3;2", "presentation": "3;3;3;4;3", "wc_summary": "88;44;154;54;155", "wc_strengths": "30;31;61;29;125", "wc_weaknesses": "198;375;191;92;965", "wc_questions": "104;43;203;8;121", "wc_limitations": "8;5;20;9;14", "wc_review": "428;498;629;192;1380", "wc_reply_reviewers": "360;502;26;23;0", "wc_reply_authors": "2975;1742;226;83;0", "reply_reviewers": "3;2;1;1;0", "reply_authors": "9;4;3;3;1", "rating_avg": [ 4.6, 0.48989794855663565 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 99.0, 47.60672221441001 ], "wc_strengths_avg": [ 55.2, 36.912870384189844 ], "wc_weaknesses_avg": [ 364.2, 313.9397394405493 ], "wc_questions_avg": [ 95.8, 67.36586672789122 ], "wc_limitations_avg": [ 11.2, 5.2687759489277965 ], "wc_review_avg": [ 625.4, 403.1047506542189 ], "wc_reply_reviewers_avg": [ 182.2, 208.24255088718058 ], "wc_reply_authors_avg": [ 1005.2, 1173.9692329869638 ], "reply_reviewers_avg": [ 1.4, 1.019803902718557 ], "reply_authors_avg": [ 4.0, 2.6832815729997477 ], "replies_avg": [ 37, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.16666666666666663, "gs_citation": 120, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13551616491363406887&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "zju.edu.cn;ucas.ac.cn;;u.nus.edu;u.nus.edu;zju.edu.cn;zju.edu.cn;;", "author_num": 9, "aff_unique_index": "0;1;2;2;0;0", "aff_unique_norm": "Zhejiang University;University of Chinese Academy of Sciences;National University of Singapore", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;http://www.ucas.ac.cn;https://www.nus.edu.sg", "aff_unique_abbr": "ZJU;UCAS;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Simple, Scalable and Effective Clustering via One-Dimensional Projections", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72066", "id": "JIYdbHDonF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cbaffeeda13dbd8bf9489feb3f198ff4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JIYdbHDonF", "openreview": "https://openreview.net/forum?id=JIYdbHDonF", "poster": "/media/PosterPDFs/NeurIPS%202023/72066.png?t=1701890249.5917609", "slides": "https://nips.cc/virtual/2023/poster/72066", "video": "https://nips.cc/virtual/2023/poster/72066", "author_site": "Moses Charikar, Monika Henzinger, Lunjia Hu, Maximilian V\u00f6tsch, Erik Waingarten", "tldr": "", "abstract": "Clustering is a fundamental problem in unsupervised machine learning with many applications in data analysis. Popular clustering algorithms such as Lloyd's algorithm and $k$-means++ can take $\\Omega(ndk)$ time when clustering $n$ points in a $d$-dimensional space (represented by an $n\\times d$ matrix $X$) into $k$ clusters. On massive datasets with moderate to large $k$, the multiplicative $k$ factor can become very expensive. We introduce a simple randomized clustering algorithm that provably runs in expected time $O(\\mathsf{nnz}(X) + n\\log n)$ for arbitrary $k$. Here $\\mathsf{nnz}(X)$ is the total number of non-zero entries in the input dataset $X$, which is upper bounded by $nd$ and can be significantly smaller for sparse datasets. We prove that our algorithm achieves approximation ratio $\\widetilde{O}(k^4)$ on any input dataset for the $k$-means objective, and our experiments show that the quality of the clusters found by our algorithm is usually much better than this worst-case bound. We use our algorithm for $k$-means clustering and for coreset construction; our experiments show that it gives a new tradeoff between running time and cluster quality compared to previous state-of-the-art methods for these tasks. Our theoretical analysis is based on novel results of independent interest. We show that the approximation ratio achieved after a random one-dimensional projection can be lifted to the original points and that $k$-means++ seeding can be implemented in expected time $O(n\\log n)$ in one dimension.", "keywords": "clustering;k-means;random projection;massive datasets", "primary_area": "", "supplementary_material": "/attachment/13129883ff686cb54818b6b14423ff0c930f4f96.zip", "author": "Moses Charikar;Monika Henzinger;Lunjia Hu;Maximilian V\u00f6tsch;Erik Waingarten", "authorids": "~Moses_Charikar1;~Monika_Henzinger1;~Lunjia_Hu1;~Maximilian_V\u00f6tsch1;~Erik_Waingarten1", "gender": "M;;M;Not Specified;M", "homepage": "https://profiles.stanford.edu/moses-charikar;;https://lunjiahu.com;;https://sites.google.com/site/erikwaing/home", "dblp": "https://dblp.uni-trier.de/pers/hd/c/Charikar:Moses;;195/6273;335/2264;", "google_scholar": "zX3ba1kAAAAJ;NXbggxYAAAAJ;ss7CIgcAAAAJ;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Moses_Charikar1;~Monika_Henzinger1;~Lunjia_Hu1;~Maximilian_V\u00f6tsch1;~Erik_Waingarten1", "aff": "Stanford University;Universit\u00e4t Vienna;Stanford University;Universit\u00e4t Vienna;, University of Pennsylvania", "aff_domain": "stanford.edu;univie.ac.at;stanford.edu;univie.ac.at;cis.upenn.edu", "position": "Full Professor;Full Professor;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncharikar2023simple,\ntitle={Simple, Scalable and Effective Clustering via One-Dimensional Projections},\nauthor={Moses Charikar and Monika Henzinger and Lunjia Hu and Maximilian V{\\\"o}tsch and Erik Waingarten},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JIYdbHDonF}\n}", "github": "", "project": "", "reviewers": "7JVT;fbAv;t2Mh;sFfD;UQjJ", "pdf_size": 1445159, "rating": "4;5;6;6;7", "confidence": "4;4;2;3;4", "soundness": "2;3;2;3;4", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;4", "wc_summary": "107;60;105;64;189", "wc_strengths": "74;52;81;58;163", "wc_weaknesses": "136;99;70;155;332", "wc_questions": "2;28;207;9;114", "wc_limitations": "1;4;12;1;12", "wc_review": "320;243;475;287;810", "wc_reply_reviewers": "5;80;0;5;41", "wc_reply_authors": "0;387;0;0;0", "reply_reviewers": "1;2;0;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 105.0, 46.40258613482658 ], "wc_strengths_avg": [ 85.6, 40.092892138133415 ], "wc_weaknesses_avg": [ 158.4, 91.64409419051508 ], "wc_questions_avg": [ 72.0, 78.4780224011793 ], "wc_limitations_avg": [ 6.0, 5.019960159204453 ], "wc_review_avg": [ 427.0, 206.83229921847314 ], "wc_reply_reviewers_avg": [ 26.2, 30.65550521521379 ], "wc_reply_authors_avg": [ 77.4, 154.8 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2941742027072762, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14951749087423777830&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "stanford.edu;univie.ac.at;stanford.edu;univie.ac.at;cis.upenn.edu", "author_num": 5, "aff_unique_index": "0;1;0;1;2", "aff_unique_norm": "Stanford University;University of Vienna;University of Pennsylvania", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://univie.ac.at;https://www.upenn.edu", "aff_unique_abbr": "Stanford;UV;UPenn", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "United States;Austria" }, { "title": "Global Identifiability of $\\ell_1$-based Dictionary Learning via Matrix Volume Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72065", "id": "JK2oPrP8B3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/717b9fd2ede6b8a9971a296d5179df89-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JK2oPrP8B3", "openreview": "https://openreview.net/forum?id=JK2oPrP8B3", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72065", "video": "https://nips.cc/virtual/2023/poster/72065", "author_site": "Jingzhou Hu, Kejun Huang", "tldr": "", "abstract": "We propose a novel formulation for dictionary learning that minimizes the determinant of the dictionary matrix, also known as its volume, subject to the constraint that each row of the sparse coefficient matrix has unit $\\ell_1$ norm. The main motivation for the proposed formulation is that it provides global identifiability guarantee of the groundtruth dictionary and sparse coefficient matrices, up to the inherent and inconsequential permutation and scaling ambiguity, if a set of vectors obtained from the coefficient matrix lies inside the $\\ell_\\infty$ norm ball but contains the $\\ell_2$ norm ball in their convex hull. Unlike existing work on identifiability of dictionary learning, our result is global, meaning that a globally optimal solution to our proposed formulation has to be a permuted and rescaled version of the groundtruth factors. Another major improvement in our result is that there is no additional assumption on the dictionary matrix other than it is nonsingular, unlike most other work that require the atoms of the dictionary to be mutually incoherent. We also provide a probabilistic analysis and show that if the sparse coefficient matrix is generated from the widely adopted Bernoulli-Gaussian model, then it is globally identifiable if the sample size is bigger than a constant times $k\\log k$, where $k$ is the number atoms in the dictionary, with overwhelming probability. The bound is essentially the same as those local identifiability results, but we show that it is also global. Finally, we propose algorithms to solve the new proposed formulation, specifically one based on the linearized-ADMM with efficient per-iteration updates. The proposed algorithms exhibit surprisingly effective performance in correctly and efficiently recovering the dictionary, as demonstrated in the numerical experiments.", "keywords": "dictionary learning;matrix volume;nonconvex optimization", "primary_area": "", "supplementary_material": "/attachment/1ebbab90b0d7fbb134044c8242cfcc84e1938b27.pdf", "author": "Jingzhou Hu;Kejun Huang", "authorids": "~Jingzhou_Hu1;~Kejun_Huang1", "gender": ";M", "homepage": ";https://www.cise.ufl.edu/~kejun/", "dblp": "359/7735;140/8874", "google_scholar": "T6FpaqMAAAAJ;-RIDViAAAAAJ", "orcid": ";", "linkedin": "jingzhouhu/;", "or_profile": "~Jingzhou_Hu1;~Kejun_Huang1", "aff": "University of Florida;University of Florida", "aff_domain": "ufl.edu;ufl.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhu2023global,\ntitle={Global Identifiability of \\${\\textbackslash}ell\\_1\\$-based Dictionary Learning via Matrix Volume Optimization},\nauthor={Jingzhou Hu and Kejun Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JK2oPrP8B3}\n}", "github": "", "project": "", "reviewers": "cSox;Fbew;Jcds", "pdf_size": 805529, "rating": "5;6;7", "confidence": "3;4;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "88;84;48", "wc_strengths": "77;19;29", "wc_weaknesses": "88;86;176", "wc_questions": "10;101;139", "wc_limitations": "1;1;1", "wc_review": "264;291;393", "wc_reply_reviewers": "39;8;292", "wc_reply_authors": "0;0;245", "reply_reviewers": "1;1;1", "reply_authors": "1;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.33333333333333, 17.987650084309387 ], "wc_strengths_avg": [ 41.666666666666664, 25.315783394730033 ], "wc_weaknesses_avg": [ 116.66666666666667, 41.96294661828324 ], "wc_questions_avg": [ 83.33333333333333, 54.12536887223547 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 316.0, 55.5517776493246 ], "wc_reply_reviewers_avg": [ 113.0, 127.20324943438617 ], "wc_reply_authors_avg": [ 81.66666666666667, 115.49410759380275 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3367298756930985601&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ufl.edu;ufl.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Florida", "aff_unique_dep": "", "aff_unique_url": "https://www.ufl.edu", "aff_unique_abbr": "UF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Vocabulary-free Image Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72064", "id": "JKhyQHpx7B", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/619cbddb92b8c6fecaf2b86463153be9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JKhyQHpx7B", "openreview": "https://openreview.net/forum?id=JKhyQHpx7B", "poster": "/media/PosterPDFs/NeurIPS%202023/72064.png?t=1701785540.7330818", "slides": "https://nips.cc/virtual/2023/poster/72064", "video": "https://nips.cc/virtual/2023/poster/72064", "author_site": "Alessandro Conti, Enrico Fini, Massimiliano Mancini, Paolo Rota, Yiming Wang, Elisa Ricci", "tldr": "", "abstract": "Recent advances in large vision-language models have revolutionized the image classification paradigm. Despite showing impressive zero-shot capabilities, a pre-defined set of categories, a.k.a. the vocabulary, is assumed at test time for composing the textual prompts. However, such assumption can be impractical when the semantic context is unknown and evolving. We thus formalize a novel task, termed as Vocabulary-free Image Classification (VIC), where we aim to assign to an input image a class that resides in an unconstrained language-induced semantic space, without the prerequisite of a known vocabulary. VIC is a challenging task as the semantic space is extremely large, containing millions of concepts, with hard-to-discriminate fine-grained categories. In this work, we first empirically verify that representing this semantic space by means of an external vision-language database is the most effective way to obtain semantically relevant content for classifying the image. We then propose Category Search from External Databases (CaSED), a method that exploits a pre-trained vision-language model and an external vision-language database to address VIC in a training-free manner. CaSED first extracts a set of candidate categories from captions retrieved from the database based on their semantic similarity to the image, and then assigns to the image the best matching candidate category according to the same vision-language model. Experiments on benchmark datasets validate that CaSED outperforms other complex vision-language frameworks, while being efficient with much fewer parameters, paving the way for future research in this direction.", "keywords": "language and vision;zero-shot classification;image classification", "primary_area": "", "supplementary_material": "", "author": "Alessandro Conti;Enrico Fini;Massimiliano Mancini;Paolo Rota;Yiming Wang;Elisa Ricci", "authorids": "~Alessandro_Conti1;~Enrico_Fini1;~Massimiliano_Mancini1;~Paolo_Rota1;~Yiming_Wang2;~Elisa_Ricci1", "gender": ";M;M;M;F;F", "homepage": "https://alessandroconti.me;;https://mancinimassimiliano.github.io/;;https://www.yimingwang.it/;http://elisaricci.eu/", "dblp": "52/8670;252/5371;192/2058;119/1606;71/3182-2;88/397", "google_scholar": "https://scholar.google.com/citations?hl=en;OQMtSKIAAAAJ;https://scholar.google.it/citations?user=bqTPA8kAAAAJ;https://scholar.google.it/citations?user=K1goGQ4AAAAJ;https://scholar.google.co.uk/citations?user=KBZ3zrEAAAAJ;https://scholar.google.it/citations?user=xf1T870AAAAJ", "orcid": "0000-0002-3044-1320;;0000-0001-8595-9955;0000-0003-0663-5659;0000-0002-5932-4371;", "linkedin": "altndrr/;ef21;;;yiming-wang-8b878685/;", "or_profile": "~Alessandro_Conti1;~Enrico_Fini1;~Massimiliano_Mancini1;~Paolo_Rota1;~Yiming_Wang2;~Elisa_Ricci1", "aff": "University of Trento;Amazon;University of Tuebingen;University of Trento;Fondazione Bruno Kessler;University of Trento", "aff_domain": "unitn.it;amazon.com;uni-tuebingen.de;unitn.it;fbk.eu;unitn.it", "position": "PhD student;Intern;Postdoc;Assistant Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nconti2023vocabularyfree,\ntitle={Vocabulary-free Image Classification},\nauthor={Alessandro Conti and Enrico Fini and Massimiliano Mancini and Paolo Rota and Yiming Wang and Elisa Ricci},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JKhyQHpx7B}\n}", "github": "", "project": "", "reviewers": "xx1r;3fhc;m2jc;panm", "pdf_size": 5768507, "rating": "3;5;7;7", "confidence": "5;4;3;4", "soundness": "3;3;3;3", "novelty": "2;2;3;2", "presentation": "3;3;2;4", "wc_summary": "75;163;99;96", "wc_strengths": "102;31;66;122", "wc_weaknesses": "283;205;401;292", "wc_questions": "8;152;45;26", "wc_limitations": "1;38;12;16", "wc_review": "469;589;623;552", "wc_reply_reviewers": "697;60;0;14", "wc_reply_authors": "1439;345;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "3;2;1;1", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 108.25, 32.93459427410637 ], "wc_strengths_avg": [ 80.25, 34.802119188348286 ], "wc_weaknesses_avg": [ 295.25, 69.80105658226098 ], "wc_questions_avg": [ 57.75, 55.96594946929785 ], "wc_limitations_avg": [ 16.75, 13.442005058770064 ], "wc_review_avg": [ 558.25, 57.32091677564134 ], "wc_reply_reviewers_avg": [ 192.75, 291.9737787884385 ], "wc_reply_authors_avg": [ 446.0, 590.356248378892 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16636573754884269538&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "unitn.it;amazon.com;uni-tuebingen.de;unitn.it;fbk.eu;unitn.it", "author_num": 6, "aff_unique_index": "0;1;2;0;3;0", "aff_unique_norm": "University of Trento;Amazon;University of Tuebingen;Fondazione Bruno Kessler", "aff_unique_dep": ";Amazon.com, Inc.;;", "aff_unique_url": "https://www.unitn.it;https://www.amazon.com;https://www.uni-tuebingen.de/;https://www.fbk.eu", "aff_unique_abbr": "UniTN;Amazon;Uni T\u00fcbingen;FBK", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;0;0", "aff_country_unique": "Italy;United States;Germany" }, { "title": "Generalised f-Mean Aggregation for Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72063", "id": "JMrIeKjTAe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6c78ae0c1140902bf3a430b1725bcc4e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JMrIeKjTAe", "openreview": "https://openreview.net/forum?id=JMrIeKjTAe", "poster": "/media/PosterPDFs/NeurIPS%202023/72063.png?t=1701670820.1315205", "slides": "https://nips.cc/virtual/2023/poster/72063", "video": "https://nips.cc/virtual/2023/poster/72063", "author_site": "Ryan Kortvelesy, Steven Morad, Amanda Prorok", "tldr": "", "abstract": "Graph Neural Network (GNN) architectures are defined by their implementations of update and aggregation modules. While many works focus on new ways to parametrise the update modules, the aggregation modules receive comparatively little attention. Because it is difficult to parametrise aggregation functions, currently most methods select a ``standard aggregator'' such as mean, sum, or max. While this selection is often made without any reasoning, it has been shown that the choice in aggregator has a significant impact on performance, and the best choice in aggregator is problem-dependent. Since aggregation is a lossy operation, it is crucial to select the most appropriate aggregator in order to minimise information loss. In this paper, we present GenAgg, a generalised aggregation operator, which parametrises a function space that includes all standard aggregators. In our experiments, we show that GenAgg is able to represent the standard aggregators with much higher accuracy than baseline methods. We also show that using GenAgg as a drop-in replacement for an existing aggregator in a GNN often leads to a significant boost in performance across various tasks.", "keywords": "Aggregation;Graph Neural Networks", "primary_area": "", "supplementary_material": "/attachment/f9d4181314ef9e518d235526d13be4780901a6a5.pdf", "author": "Ryan Kortvelesy;Steven Morad;Amanda Prorok", "authorids": "~Ryan_Kortvelesy1;~Steven_Morad1;~Amanda_Prorok1", "gender": "M;M;", "homepage": ";http://www.dangersteve.com/home;", "dblp": "289/0863;247/9311;", "google_scholar": "fMxXjiIAAAAJ;KvCgriAAAAAJ;", "orcid": "0000-0001-6654-0796;0000-0002-8413-2953;", "linkedin": ";;", "or_profile": "~Ryan_Kortvelesy1;~Steven_Morad1;~Amanda_Prorok1", "aff": "University of Cambridge;University of Cambridge;", "aff_domain": "cam.ac.uk;cam.ac.uk;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\nkortvelesy2023generalised,\ntitle={Generalised f-Mean Aggregation for Graph Neural Networks},\nauthor={Ryan Kortvelesy and Steven Morad and Amanda Prorok},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JMrIeKjTAe}\n}", "github": "", "project": "", "reviewers": "uMEa;3j4u;PwWR;rgaT", "pdf_size": 9901420, "rating": "5;5;7;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;4", "wc_summary": "43;65;214;160", "wc_strengths": "59;38;88;171", "wc_weaknesses": "221;112;109;121", "wc_questions": "2;26;67;198", "wc_limitations": "25;28;8;31", "wc_review": "350;269;486;681", "wc_reply_reviewers": "469;305;19;317", "wc_reply_authors": "964;381;0;410", "reply_reviewers": "4;3;1;2", "reply_authors": "4;4;1;4", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 120.5, 69.62219473702334 ], "wc_strengths_avg": [ 89.0, 50.5618433208284 ], "wc_weaknesses_avg": [ 140.75, 46.54231945230061 ], "wc_questions_avg": [ 73.25, 75.68148716826329 ], "wc_limitations_avg": [ 23.0, 8.916277250063503 ], "wc_review_avg": [ 446.5, 156.02003076528348 ], "wc_reply_reviewers_avg": [ 277.5, 162.64301399076444 ], "wc_reply_authors_avg": [ 438.75, 343.7116342226431 ], "reply_reviewers_avg": [ 2.5, 1.118033988749895 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4059463129821412628&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;cam.ac.uk;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "On Slicing Optimality for Mutual Information", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72062", "id": "JMuKfZx2xU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/749b64078a64fa5734a49fb40bc9fd65-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JMuKfZx2xU", "openreview": "https://openreview.net/forum?id=JMuKfZx2xU", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72062", "video": "https://nips.cc/virtual/2023/poster/72062", "author_site": "Ammar Fayad, Majd Ibrahim", "tldr": "", "abstract": "Measuring dependence between two random variables is of great importance in various domains but is difficult to compute in today's complex environments with high-dimensional data. Recently, slicing methods have shown to be a scalable approach to measuring mutual information (MI) between high-dimensional variables by projecting these variables into one-dimensional spaces. Unfortunately, these methods use uniform distributions of slicing directions, which generally discard informative features between variables and thus lead to inaccurate quantification of dependence. In this paper, we propose a principled framework that searches for an \\textit{optimal} distribution of slices for MI. Importantly, we answer theoretical questions about finding the optimal slicing distribution in the context of MI and develop corresponding theoretical analyses. We also develop a practical algorithm, connecting our theoretical results with modern machine learning frameworks. Through comprehensive experiments in benchmark domains, we demonstrate significant gains in our information measure than state-of-the-art baselines.", "keywords": "Mutual information;Information Theory", "primary_area": "", "supplementary_material": "/attachment/3d7027975c56990a4684efd55b0d2053aface9c1.pdf", "author": "Ammar Fayad;Majd Ibrahim", "authorids": "~Ammar_Fayad1;~Majd_Ibrahim1", "gender": ";M", "homepage": ";", "dblp": ";", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Ammar_Fayad1;~Majd_Ibrahim1", "aff": "Massachusetts Institute of Technology;Higher Institute for Applied Sciences and Technology", "aff_domain": "mit.edu;hiast.edu.sy", "position": "Undergrad student;Undergrad student", "bibtex": "@inproceedings{\nfayad2023on,\ntitle={On Slicing Optimality for Mutual Information},\nauthor={Ammar Fayad and Majd Ibrahim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JMuKfZx2xU}\n}", "github": "", "project": "", "reviewers": "8cdY;k46z;w6uv;8Sok", "pdf_size": 857876, "rating": "5;6;6;7", "confidence": "4;3;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "3;3;4;4", "wc_summary": "273;89;87;53", "wc_strengths": "121;55;63;23", "wc_weaknesses": "403;30;81;74", "wc_questions": "279;44;112;99", "wc_limitations": "7;4;4;12", "wc_review": "1083;222;347;261", "wc_reply_reviewers": "45;50;0;19", "wc_reply_authors": "0;137;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 125.5, 86.35247535537125 ], "wc_strengths_avg": [ 65.5, 35.36594407053204 ], "wc_weaknesses_avg": [ 147.0, 149.08889965386425 ], "wc_questions_avg": [ 133.5, 87.79664002682563 ], "wc_limitations_avg": [ 6.75, 3.2691742076555053 ], "wc_review_avg": [ 478.25, 352.06915158815036 ], "wc_reply_reviewers_avg": [ 28.5, 20.22992832414391 ], "wc_reply_authors_avg": [ 34.25, 59.322740159234044 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13544526788919638481&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "mit.edu;hiast.edu.sy", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Higher Institute for Applied Sciences and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;", "aff_unique_abbr": "MIT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "United States;" }, { "title": "Towards Combinatorial Generalization for Catalysts: A Kohn-Sham Charge-Density Approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72061", "id": "JOHp5SmckS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/be82bb4bf8333107b0fe430e1017831a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JOHp5SmckS", "openreview": "https://openreview.net/forum?id=JOHp5SmckS", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72061", "video": "https://nips.cc/virtual/2023/poster/72061", "author_site": "Phillip Pope, David Jacobs", "tldr": "", "abstract": "The Kohn-Sham equations underlie many important applications such as the discovery of new catalysts. Recent machine learning work on catalyst modeling has focused on prediction of the energy, but has so far not yet demonstrated significant out-of-distribution generalization. Here we investigate another approach based on the pointwise learning of the Kohn-Sham charge-density. On a new dataset of bulk catalysts with charge densities, we show density models can generalize to new structures with combinations of elements not seen at train time, a form of combinatorial generalization. We show that over 80% of binary and ternary test cases achieve faster convergence than standard baselines in Density Functional Theory, amounting to an average reduction of 13% in the number of iterations required to reach convergence, which may be of independent interest. Our results suggest that density learning is a viable alternative, trading greater inference costs for a step towards combinatorial generalization, a key property for applications.", "keywords": "graph neural networks;equivariance;materials science;chemistry;density functional theory;combinatorial generalization;catalysts", "primary_area": "", "supplementary_material": "/attachment/e32149647c9588c0bdff3efc29ce42e72a9a4aa4.pdf", "author": "Phil Pope;David Jacobs", "authorids": "~Phil_Pope1;~David_Jacobs1", "gender": ";M", "homepage": "https://ppope.github.io/;http://www.cs.umd.edu/~djacobs", "dblp": "254/1952;j/DavidWJacobs.html", "google_scholar": "w_Y1qcwAAAAJ;WH2KmRgAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Phil_Pope1;~David_W._Jacobs1", "aff": "University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu", "position": "PhD student;Professor", "bibtex": "@inproceedings{\npope2023towards,\ntitle={Towards Combinatorial Generalization for Catalysts: A Kohn-Sham Charge-Density Approach},\nauthor={Phil Pope and David Jacobs},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JOHp5SmckS}\n}", "github": "", "project": "", "reviewers": "XGHX;Fg4b;iUWH;7uBr", "pdf_size": 1075535, "rating": "5;5;5;7", "confidence": "2;3;5;4", "soundness": "2;3;3;3", "novelty": "3;2;2;3", "presentation": "2;3;4;3", "wc_summary": "136;64;122;181", "wc_strengths": "42;42;64;137", "wc_weaknesses": "59;76;127;216", "wc_questions": "33;121;696;113", "wc_limitations": "73;33;75;37", "wc_review": "343;336;1084;684", "wc_reply_reviewers": "16;98;11;14", "wc_reply_authors": "0;56;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 125.75, 41.78740839056665 ], "wc_strengths_avg": [ 71.25, 39.00881310678396 ], "wc_weaknesses_avg": [ 119.5, 61.075772610749674 ], "wc_questions_avg": [ 240.75, 265.081473324712 ], "wc_limitations_avg": [ 54.5, 19.56399754651385 ], "wc_review_avg": [ 611.75, 306.7999144393623 ], "wc_reply_reviewers_avg": [ 34.75, 36.56073713698891 ], "wc_reply_authors_avg": [ 14.0, 24.24871130596428 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10281884151614911888&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "umd.edu;umd.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "MMD-Fuse: Learning and Combining Kernels for Two-Sample Testing Without Data Splitting", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72060", "id": "JOkgEY9os2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/edd00cead3425393baf13004de993017-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JOkgEY9os2", "openreview": "https://openreview.net/forum?id=JOkgEY9os2", "poster": "/media/PosterPDFs/NeurIPS%202023/72060.png?t=1702227920.0569577", "slides": "https://nips.cc/virtual/2023/poster/72060", "video": "https://nips.cc/virtual/2023/poster/72060", "author_site": "Felix Biggs, Antonin Schrab, Arthur Gretton", "tldr": "", "abstract": "We propose novel statistics which maximise the power of a two-sample test based on the Maximum Mean Discrepancy (MMD), by\nadapting over the set of kernels used in defining it.\nFor finite sets, this reduces to combining (normalised) MMD values under each of these kernels via a weighted soft maximum.\nExponential concentration bounds are proved for our proposed statistics under the null and alternative.\nWe further show how these kernels can be chosen in a data-dependent but permutation-independent way, in a well-calibrated test, avoiding data splitting.\nThis technique applies more broadly to general permutation-based MMD testing, and includes the use of deep kernels with features learnt using unsupervised models such as auto-encoders.\nWe highlight the applicability of our MMD-Fuse tests on both synthetic low-dimensional and real-world high-dimensional data, and compare its performance in terms of power against current state-of-the-art kernel tests.", "keywords": "Testing;MMD;Kernel Methods;Two-sample testing", "primary_area": "", "supplementary_material": "", "author": "Felix Biggs;Antonin Schrab;Arthur Gretton", "authorids": "~Felix_Biggs1;~Antonin_Schrab1;~Arthur_Gretton1", "gender": ";;M", "homepage": "https://www.felixbiggs.com;;http://www.gatsby.ucl.ac.uk/~gretton/", "dblp": "267/9447;;56/2574", "google_scholar": "EqNqaqoAAAAJ;;OUv7J6QAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Felix_Biggs1;~Antonin_Schrab1;~Arthur_Gretton1", "aff": "University College London;;University College London", "aff_domain": "ucl.ac.uk;;ucl.ac.uk", "position": "PhD student;;Professor", "bibtex": "@inproceedings{\nbiggs2023mmdfuse,\ntitle={{MMD}-Fuse: Learning and Combining Kernels for Two-Sample Testing Without Data Splitting},\nauthor={Felix Biggs and Antonin Schrab and Arthur Gretton},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JOkgEY9os2}\n}", "github": "", "project": "", "reviewers": "fFeS;xB7K;1tja;jkkz", "pdf_size": 2538613, "rating": "7;7;7;8", "confidence": "4;5;5;2", "soundness": "3;3;4;4", "novelty": "3;2;4;4", "presentation": "4;3;2;4", "wc_summary": "225;446;74;164", "wc_strengths": "107;1;69;86", "wc_weaknesses": "194;1;179;91", "wc_questions": "277;1;17;57", "wc_limitations": "2;1;1;1", "wc_review": "805;450;340;399", "wc_reply_reviewers": "445;463;19;11", "wc_reply_authors": "1125;1718;33;17", "reply_reviewers": "3;4;1;1", "reply_authors": "3;5;2;2", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 227.25, 137.24316922892737 ], "wc_strengths_avg": [ 65.75, 39.73270063813936 ], "wc_weaknesses_avg": [ 116.25, 77.30257110859897 ], "wc_questions_avg": [ 88.0, 111.00900864344298 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 498.5, 181.1884378209603 ], "wc_reply_reviewers_avg": [ 234.5, 219.61045057100537 ], "wc_reply_authors_avg": [ 723.25, 729.0687124681733 ], "reply_reviewers_avg": [ 2.25, 1.299038105676658 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9428090415820632, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13314556436894446163&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "ucl.ac.uk;;ucl.ac.uk", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University College London", "aff_unique_dep": "", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Latent exploration for Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72059", "id": "JSVXZKqfLU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b0ca717599b7ba84d5e4f4c8b1ef6657-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JSVXZKqfLU", "openreview": "https://openreview.net/forum?id=JSVXZKqfLU", "poster": "/media/PosterPDFs/NeurIPS%202023/72059.png?t=1702265264.5035763", "slides": "https://nips.cc/virtual/2023/poster/72059", "video": "https://nips.cc/virtual/2023/poster/72059", "author_site": "Alberto Silvio Chiappa, Alessandro Marin Vargas, Ann Huang, Alexander Mathis", "tldr": "", "abstract": "In Reinforcement Learning, agents learn policies by exploring and interacting with the environment. Due to the curse of dimensionality, learning policies that map high-dimensional sensory input to motor output is particularly challenging. During training, state of the art methods (SAC, PPO, etc.) explore the environment by perturbing the actuation with independent Gaussian noise. While this unstructured exploration has proven successful in numerous tasks, it can be suboptimal for overactuated systems. When multiple actuators, such as motors or muscles, drive behavior, uncorrelated perturbations risk diminishing each other's effect, or modifying the behavior in a task-irrelevant way. While solutions to introduce time correlation across action perturbations exist, introducing correlation across actuators has been largely ignored. Here, we propose LATent TIme-Correlated Exploration (Lattice), a method to inject temporally-correlated noise into the latent state of the policy network, which can be seamlessly integrated with on- and off-policy algorithms. We demonstrate that the noisy actions generated by perturbing the network's activations can be modeled as a multivariate Gaussian distribution with a full covariance matrix. In the PyBullet locomotion tasks, Lattice-SAC achieves state of the art results, and reaches 18\\% higher reward than unstructured exploration in the Humanoid environment. In the musculoskeletal control environments of MyoSuite, Lattice-PPO achieves higher reward in most reaching and object manipulation tasks, while also finding more energy-efficient policies with reductions of 20-60\\%. Overall, we demonstrate the effectiveness of structured action noise in time and actuator space for complex motor control tasks. The code is available at: https://github.com/amathislab/lattice.", "keywords": "Reinforcement learning;efficient exploration;curse of dimensionality;motor control;musculoskeletal control", "primary_area": "", "supplementary_material": "", "author": "Alberto Silvio Chiappa;Alessandro Marin Vargas;Ann Huang;Alexander Mathis", "authorids": "~Alberto_Silvio_Chiappa1;~Alessandro_Marin_Vargas1;~Ann_Huang1;~Alexander_Mathis1", "gender": "M;M;F;M", "homepage": ";;;", "dblp": "269/4002;261/9159;;117/7258", "google_scholar": "Cv5lSo0AAAAJ;https://scholar.google.it/citations?user=IoHdcnUAAAAJ;https://scholar.google.ca/citations?user=zPJUEzsAAAAJ;https://scholar.google.ch/citations?user=Y1xCzE0AAAAJ", "orcid": "0009-0001-2764-6552;;;0000-0002-3777-2202", "linkedin": "albertochiappa/;alessandro-marin-vargas-594914170/;;", "or_profile": "~Alberto_Silvio_Chiappa1;~Alessandro_Marin_Vargas1;~Ann_Huang1;~Alexander_Mathis1", "aff": "EPFL - EPF Lausanne;EPFL - EPF Lausanne;McGill University, McGill University;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch;mail.mcgill.ca;epfl.ch", "position": "PhD student;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nchiappa2023latent,\ntitle={Latent exploration for Reinforcement Learning},\nauthor={Alberto Silvio Chiappa and Alessandro Marin Vargas and Ann Huang and Alexander Mathis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JSVXZKqfLU}\n}", "github": "", "project": "", "reviewers": "p1VW;Q9SL;9Q2U;dZAS;mQQQ", "pdf_size": 5900234, "rating": "5;6;6;7;7", "confidence": "4;4;3;4;2", "soundness": "3;4;4;3;4", "novelty": "3;3;3;3;3", "presentation": "3;4;3;2;4", "wc_summary": "75;55;113;108;18", "wc_strengths": "33;47;33;69;50", "wc_weaknesses": "91;50;45;137;87", "wc_questions": "113;183;8;290;3", "wc_limitations": "28;1;9;9;1", "wc_review": "340;336;208;613;159", "wc_reply_reviewers": "12;64;25;32;12", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 73.8, 35.141997666609676 ], "wc_strengths_avg": [ 46.4, 13.29059818066892 ], "wc_weaknesses_avg": [ 82.0, 33.23853185686757 ], "wc_questions_avg": [ 119.4, 108.76506792164477 ], "wc_limitations_avg": [ 9.6, 9.871170143402452 ], "wc_review_avg": [ 331.2, 157.69768546177207 ], "wc_reply_reviewers_avg": [ 29.0, 19.120669444347392 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4677071733467426, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15660652832651413693&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "epfl.ch;epfl.ch;mail.mcgill.ca;epfl.ch", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "EPFL;McGill University", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.mcgill.ca", "aff_unique_abbr": "EPFL;McGill", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Switzerland;Canada" }, { "title": "Randomized Sparse Neural Galerkin Schemes for Solving Evolution Equations with Deep Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72058", "id": "JTKd7zYROf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0cb310ed8121549488fea8e8c2056096-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JTKd7zYROf", "openreview": "https://openreview.net/forum?id=JTKd7zYROf", "poster": "/media/PosterPDFs/NeurIPS%202023/72058.png?t=1702074742.946345", "slides": "https://nips.cc/virtual/2023/poster/72058", "video": "https://nips.cc/virtual/2023/poster/72058", "author_site": "Jules Berman, Benjamin Peherstorfer", "tldr": "", "abstract": "Training neural networks sequentially in time to approximate solution fields of time-dependent partial differential equations can be beneficial for preserving causality and other physics properties; however, the sequential-in-time training is numerically challenging because training errors quickly accumulate and amplify over time. This work introduces Neural Galerkin schemes that update randomized sparse subsets of network parameters at each time step. The randomization avoids overfitting locally in time and so helps prevent the error from accumulating quickly over the sequential-in-time training, which is motivated by dropout that addresses a similar issue of overfitting due to neuron co-adaptation. The sparsity of the update reduces the computational costs of training without losing expressiveness because many of the network parameters are redundant locally at each time step. In numerical experiments with a wide range of evolution equations, the proposed scheme with randomized sparse updates is up to two orders of magnitude more accurate at a fixed computational budget and up to two orders of magnitude faster at a fixed accuracy than schemes with dense updates.", "keywords": "numerical methods;deep networks;evolution equations;scientific computing;partial differential equations;model reduction", "primary_area": "", "supplementary_material": "/attachment/700b00b835ce5c2dcd5b58ddf002f78a649d111f.zip", "author": "Jules Berman;Benjamin Peherstorfer", "authorids": "~Jules_Berman1;~Benjamin_Peherstorfer2", "gender": "M;", "homepage": ";https://cims.nyu.edu/~pehersto/", "dblp": "308/1410;96/8557", "google_scholar": "g44S1mAAAAAJ;C81WhlkAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Jules_Berman1;~Benjamin_Peherstorfer2", "aff": "New York University;New York University", "aff_domain": "nyu.edu;nyu.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nberman2023randomized,\ntitle={Randomized Sparse Neural Galerkin Schemes for Solving Evolution Equations with Deep Networks},\nauthor={Jules Berman and Benjamin Peherstorfer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JTKd7zYROf}\n}", "github": "", "project": "", "reviewers": "YG46;mkqi;8WGb;LKD5;Gbtz", "pdf_size": 1212905, "rating": "6;7;7;7;8", "confidence": "5;4;2;4;3", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;2;3;4;3", "wc_summary": "131;51;105;298;80", "wc_strengths": "89;85;62;82;118", "wc_weaknesses": "846;96;159;62;22", "wc_questions": "118;97;50;442;28", "wc_limitations": "111;19;10;23;6", "wc_review": "1295;348;386;907;254", "wc_reply_reviewers": "506;49;102;323;0", "wc_reply_authors": "588;55;108;447;0", "reply_reviewers": "2;1;1;2;0", "reply_authors": "3;2;2;3;1", "rating_avg": [ 7.0, 0.6324555320336759 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 133.0, 86.65564032421663 ], "wc_strengths_avg": [ 87.2, 17.993332098307974 ], "wc_weaknesses_avg": [ 237.0, 307.79733592089457 ], "wc_questions_avg": [ 147.0, 150.9542977195416 ], "wc_limitations_avg": [ 33.8, 39.076335549792795 ], "wc_review_avg": [ 638.0, 399.7674323903837 ], "wc_reply_reviewers_avg": [ 196.0, 190.3102729754755 ], "wc_reply_authors_avg": [ 239.6, 233.75251870300775 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6201736729460422, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11181481170761046526&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "nyu.edu;nyu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "MEGABYTE: Predicting Million-byte Sequences with Multiscale Transformers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72057", "id": "JTmO2V9Xpz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f8f78f8043f35890181a824e53a57134-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JTmO2V9Xpz", "openreview": "https://openreview.net/forum?id=JTmO2V9Xpz", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72057", "video": "https://nips.cc/virtual/2023/poster/72057", "author_site": "LILI YU, Daniel Simig, Colin Flaherty, Armen Aghajanyan, Luke Zettlemoyer, Mike Lewis", "tldr": "", "abstract": "Autoregressive transformers are spectacular models for short sequences but scale poorly to long sequences such as high-resolution images, podcasts, code, or books. We proposed Megabyte, a multi-scale decoder architecture that enables end-to-end differentiable modeling of sequences of over one million bytes. Megabyte segments sequences into patches and uses a local submodel within patches and a global model between patches. This enables sub-quadratic self-attention, much larger feedforward layers for the same compute, and improved parallelism during decoding---unlocking better performance at reduced cost for both training and generation. Extensive experiments show that Megabyte allows byte-level models to perform competitively with subword models on long context language modeling, achieve state-of-the-art density estimation on ImageNet, and model audio from raw files. Together, these results establish the viability of tokenization-free autoregressive sequence modeling at scale.", "keywords": "byte level language model;model architecture;efficient pretraining", "primary_area": "", "supplementary_material": "/attachment/e44ab1c47f36b0c1c360dc5a23a406777487b0d2.pdf", "author": "LILI YU;Daniel Simig;Colin Flaherty;Armen Aghajanyan;Luke Zettlemoyer;Mike Lewis", "authorids": "~LILI_YU1;~Daniel_Simig1;~Colin_Flaherty1;~Armen_Aghajanyan1;~Luke_Zettlemoyer1;~Mike_Lewis1", "gender": "F;M;M;;M;M", "homepage": "https://scholar.google.com/citations?hl=en&user=wY932-AAAAAJ&view_op=list_works&authuser=1&sortby=pubdate;;https://www.linkedin.com/in/flahertycolin/;;https://www.cs.washington.edu/people/faculty/lsz/;", "dblp": ";;;;21/6793;19/6214", "google_scholar": "https://scholar.google.com/citations?hl=en;;xM5HqQQAAAAJ;;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;SnQnQicAAAAJ", "orcid": ";;0000-0003-3021-1450;;;", "linkedin": "lili-yu-6771961a/;daniel-simig-206a42b8;flahertycolin/;;luke-zettlemoyer-a0109b226/;", "or_profile": "~LILI_YU1;~Daniel_Simig1;~Colin_Flaherty1;~Armen_Aghajanyan1;~Luke_Zettlemoyer1;~Mike_Lewis1", "aff": "Meta Facebook;Meta Facebook;FAIR (Meta AI);;Meta;Facebook AI Research", "aff_domain": "fb.com;fb.com;meta.com;;meta.com;fb.com", "position": "Researcher;Research Engineer;Research Engineer;;Researcher;Research Scientist", "bibtex": "@inproceedings{\nyu2023megabyte,\ntitle={{MEGABYTE}: Predicting Million-byte Sequences with Multiscale Transformers},\nauthor={LILI YU and Daniel Simig and Colin Flaherty and Armen Aghajanyan and Luke Zettlemoyer and Mike Lewis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JTmO2V9Xpz}\n}", "github": "", "project": "", "reviewers": "bsD9;W4cB;RJHZ;tnBu", "pdf_size": 942738, "rating": "6;7;7;8", "confidence": "4;4;5;4", "soundness": "3;2;3;4", "novelty": "3;4;3;4", "presentation": "2;4;3;4", "wc_summary": "32;90;145;108", "wc_strengths": "48;80;13;97", "wc_weaknesses": "157;219;43;43", "wc_questions": "30;3;443;41", "wc_limitations": "9;19;1;42", "wc_review": "276;411;645;331", "wc_reply_reviewers": "6;8;0;15", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 93.75, 40.794454279963105 ], "wc_strengths_avg": [ 59.5, 32.09750769140807 ], "wc_weaknesses_avg": [ 115.5, 75.7413361382013 ], "wc_questions_avg": [ 129.25, 181.67054659465305 ], "wc_limitations_avg": [ 17.75, 15.384651442265437 ], "wc_review_avg": [ 415.75, 140.7930662355217 ], "wc_reply_reviewers_avg": [ 7.25, 5.356071321407137 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 91, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11105671153049163129&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "fb.com;fb.com;meta.com;;meta.com;fb.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Diffusion-Based Adversarial Sample Generation for Improved Stealthiness and Controllability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72056", "id": "JTwxylP6U9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/088463cd3126aef2002ffc69da42ec59-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JTwxylP6U9", "openreview": "https://openreview.net/forum?id=JTwxylP6U9", "poster": "/media/PosterPDFs/NeurIPS%202023/72056.png?t=1699480815.535691", "slides": "https://nips.cc/virtual/2023/poster/72056", "video": "https://nips.cc/virtual/2023/poster/72056", "author_site": "Haotian Xue, Alexandre Araujo, Bin Hu, Yongxin Chen", "tldr": "", "abstract": "Neural networks are known to be susceptible to adversarial samples: small variations of natural examples crafted to deliberately\nmislead the models. While they can be easily generated using gradient-based techniques in digital and physical scenarios, they often differ greatly from the actual data distribution of natural images, resulting in a trade-off between strength and stealthiness. In this paper, we propose a novel framework dubbed Diffusion-Based Projected Gradient Descent (Diff-PGD) for generating realistic adversarial samples. By exploiting a gradient guided by a diffusion model, Diff-PGD ensures that adversarial samples remain close to the original data distribution while maintaining their effectiveness. Moreover, our framework can be easily customized for specific tasks such as digital attacks, physical-world attacks, and style-based attacks. Compared with existing methods for generating natural-style adversarial samples, our framework enables the separation of optimizing adversarial loss from other surrogate losses (e.g. content/smoothness/style loss), making it more stable and controllable. Finally, we demonstrate that the samples generated using Diff-PGD have better transferability and anti-purification power than traditional gradient-based methods.", "keywords": "Robustness;Adversarial Samples;Diffusion Model", "primary_area": "", "supplementary_material": "/attachment/463e649dde369cee72601015f6c570b5f2a0b7af.pdf", "author": "Haotian Xue;Alexandre Araujo;Bin Hu;Yongxin Chen", "authorids": "~Haotian_Xue1;~Alexandre_Araujo3;~Bin_Hu2;~Yongxin_Chen1", "gender": "M;M;M;M", "homepage": ";https://yongxin.ae.gatech.edu/;https://alexandrearaujo.com/;https://xavihart.github.io", "dblp": ";;228/6599;", "google_scholar": ";X8BYiV4AAAAJ;https://scholar.google.fr/citations?user=wsu61VYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";;;haotian-xue-gatech/", "or_profile": "~Bin_Hu2;~Yongxin_Chen1;~Alexandre_ARAUJO1;~Xue_Haotian1", "aff": "University of Illinois, Urbana Champaign;Georgia Institute of Technology;New York University;Georgia Institute of Technology", "aff_domain": "illinois.edu;gatech.edu;nyu.edu;gatech.edu", "position": "Assistant Professor;Assistant Professor;Postdoc;PhD student", "bibtex": "@inproceedings{\nxue2023diffusionbased,\ntitle={Diffusion-Based Adversarial Sample Generation for Improved Stealthiness and Controllability},\nauthor={Haotian Xue and Alexandre Araujo and Bin Hu and Yongxin Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JTwxylP6U9}\n}", "github": "", "project": "", "reviewers": "B84v;x4rZ;PJE2;PvL7", "pdf_size": 12724555, "rating": "5;5;6;7", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "68;27;35;103", "wc_strengths": "42;34;33;112", "wc_weaknesses": "171;72;32;50", "wc_questions": "16;24;30;8", "wc_limitations": "94;9;29;9", "wc_review": "391;166;159;282", "wc_reply_reviewers": "422;0;14;0", "wc_reply_authors": "770;0;0;0", "reply_reviewers": "2;0;1;0", "reply_authors": "3;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 58.25, 30.06139550985616 ], "wc_strengths_avg": [ 55.25, 32.94977238161138 ], "wc_weaknesses_avg": [ 81.25, 53.71859547679928 ], "wc_questions_avg": [ 19.5, 8.2915619758885 ], "wc_limitations_avg": [ 35.25, 34.888214342382156 ], "wc_review_avg": [ 249.5, 95.1853455107455 ], "wc_reply_reviewers_avg": [ 109.0, 180.8009955724802 ], "wc_reply_authors_avg": [ 192.5, 333.41978045700887 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7377341919898782261&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "illinois.edu;gatech.edu;nyu.edu;gatech.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Georgia Institute of Technology;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;https://www.gatech.edu;https://www.nyu.edu", "aff_unique_abbr": "UIUC;Georgia Tech;NYU", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "DIFUSCO: Graph-based Diffusion Solvers for Combinatorial Optimization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72055", "id": "JV8Ff0lgVV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0ba520d93c3df592c83a611961314c98-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JV8Ff0lgVV", "openreview": "https://openreview.net/forum?id=JV8Ff0lgVV", "poster": "/media/PosterPDFs/NeurIPS%202023/72055.png?t=1701988567.0039449", "slides": "https://nips.cc/virtual/2023/poster/72055", "video": "https://nips.cc/virtual/2023/poster/72055", "author_site": "Zhiqing Sun, Yiming Yang", "tldr": "", "abstract": "Neural network-based Combinatorial Optimization (CO) methods have shown promising results in solving various NP-complete (NPC) problems without relying on hand-crafted domain knowledge. This paper broadens the current scope of neural solvers for NPC problems by introducing a new graph-based diffusion framework, namely DIFUSCO. It formulates NPC problems into a discrete {0, 1}-vector space and uses graph-based denoising diffusion models to generate high-quality solutions. Specifically, we explore diffusion models with Gaussian and Bernoulli noise, respectively, and also introduce an effective inference schedule to improve the generation quality. We evaluate our methods on two well-studied combinatorial optimization problems: Traveling Salesman Problem (TSP) and Maximal Independent Set (MIS). Experimental results show that DIFUSCO strongly outperforms the previous state-of-the-art neural solvers, improving the performance gap between ground-truth and neural solvers from 1.76% to 0.46% on TSP-500, from 2.46% to 1.17% on TSP-1000, and from 3.19% to 2.58% on TSP-10000. For the MIS problem, DIFUSCO outperforms the previous state-of-the-art neural solver on the challenging SATLIB benchmark. Our code is available at [this url](https://github.com/Edward-Sun/DIFUSCO).", "keywords": "neural-symbolic reasoning;combinatorial optimization;diffusion models", "primary_area": "", "supplementary_material": "", "author": "Zhiqing Sun;Yiming Yang", "authorids": "~Zhiqing_Sun1;~Yiming_Yang1", "gender": "M;F", "homepage": "https://www.cs.cmu.edu/~zhiqings/;http://www.cs.cmu.edu/~yiming/", "dblp": "211/7692;25/1666", "google_scholar": "https://scholar.google.com/citations?hl=en;MlZq4XwAAAAJ", "orcid": ";0000-0001-8322-607X", "linkedin": "zhiqing-sun-5781b3100/;yiming-yang-24100924/", "or_profile": "~Zhiqing_Sun1;~Yiming_Yang1", "aff": "Carnegie Mellon University;School of Computer Science, Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cs.cmu.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nsun2023difusco,\ntitle={{DIFUSCO}: Graph-based Diffusion Solvers for Combinatorial Optimization},\nauthor={Zhiqing Sun and Yiming Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JV8Ff0lgVV}\n}", "github": "", "project": "", "reviewers": "NgPq;6WFF;zZSi;b1y4;WFYx", "pdf_size": 2793213, "rating": "6;6;7;7;8", "confidence": "3;5;4;3;4", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;3", "presentation": "3;3;3;4;4", "wc_summary": "120;133;83;49;152", "wc_strengths": "100;78;76;75;200", "wc_weaknesses": "188;226;83;127;81", "wc_questions": "3;54;78;8;164", "wc_limitations": "1;27;28;6;11", "wc_review": "412;518;348;265;608", "wc_reply_reviewers": "35;26;78;80;66", "wc_reply_authors": "59;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 107.4, 36.903116399567125 ], "wc_strengths_avg": [ 105.8, 47.99333287030606 ], "wc_weaknesses_avg": [ 141.0, 57.609027070416666 ], "wc_questions_avg": [ 61.4, 58.51358816548511 ], "wc_limitations_avg": [ 14.6, 11.001818031580054 ], "wc_review_avg": [ 430.2, 121.40906061740203 ], "wc_reply_reviewers_avg": [ 57.0, 22.342784070030305 ], "wc_reply_authors_avg": [ 11.8, 23.599999999999994 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.07142857142857144, "gs_citation": 158, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4271601981686232164&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "cs.cmu.edu;cs.cmu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "EgoSchema: A Diagnostic Benchmark for Very Long-form Video Language Understanding", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73630", "id": "JVlWseddak", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/90ce332aff156b910b002ce4e6880dec-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=JVlWseddak", "openreview": "https://openreview.net/forum?id=JVlWseddak", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73630", "video": "https://nips.cc/virtual/2023/poster/73630", "author_site": "Karttikeya Mangalam, Raiymbek Akshulakov, Jitendra Malik", "tldr": "", "abstract": "We introduce EgoSchema, a very long-form video question-answering dataset, and benchmark to evaluate long video understanding capabilities of modern vision and language systems. Derived from Ego4D, EgoSchema consists of over 5000 human curated multiple choice question answer pairs, spanning over 250 hours of real video data, covering a very broad range of natural human activity and behavior. For each question, EgoSchema requires the correct answer to be selected between five given options based on a three-minute-long video clip. While some prior works have proposed video datasets with long clip lengths, we posit that merely the length of the video clip does not truly capture the temporal difficulty of the video task that is being considered. To remedy this, we introduce temporal certificate sets, a general notion for capturing the intrinsic temporal understanding length associated with a broad range of video understanding tasks & datasets. Based on this metric, we find EgoSchema to have intrinsic temporal lengths over 5.7x longer than the second closest dataset and 10x to 100x longer than any other video understanding dataset. Further, our evaluation of several current state-of-the-art video and language models shows them to be severely lacking in long-term video understanding capabilities. Even models with several billions of parameters achieve QA accuracy less than 33% (random is 20%) on the EgoSchema multi-choice question answering task, while humans achieve about 76% accuracy. We posit that EgoSchema, with its long intrinsic temporal structures and diverse complexity, would serve as a valuable evaluation probe for developing effective long-term video understanding systems in the future. Data and Zero-shot model evaluation code will all be open-sourced under the Ego4D license at http://egoschema.github.io.", "keywords": "video understanding;long-term understanding;video question answering;vision and language", "primary_area": "", "supplementary_material": "/attachment/4f7967948446bdb3464bb63620cb89d247e2251d.zip", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nmangalam2023egoschema,\ntitle={EgoSchema: A Diagnostic Benchmark for Very Long-form Video Language Understanding},\nauthor={Karttikeya Mangalam and Raiymbek Akshulakov and Jitendra Malik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=JVlWseddak}\n}", "github": "", "project": "", "reviewers": "uDzm;bgjf;GM5p;vggB;ZNHy", "pdf_size": 2985224, "rating": "7;7;7;8;8", "confidence": "2;4;3;5;4", "wc_summary_and_contributions": "88;96;241;95;56", "wc_strengths": "69;189;148;91;56", "wc_improvement": "25;141;147;225;73", "wc_limitations": "32;46;6;43;2", "wc_correctness": "25;17;13;17;17", "wc_clarity": "1;6;25;9;9", "wc_relation_to_prior_work": "9;33;7;9;11", "wc_documentation": "19;27;12;30;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "269;556;600;520;226", "wc_reply_reviewers": "0;142;58;26;21", "wc_reply_authors": "213;382;379;238;356", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.4, 0.48989794855663565 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "wc_summary_and_contributions_avg": [ 115.2, 64.57058153679584 ], "wc_strengths_avg": [ 110.6, 50.281606975115665 ], "wc_improvement_avg": [ 122.2, 68.41754161032097 ], "wc_limitations_avg": [ 25.8, 18.443427013437606 ], "wc_correctness_avg": [ 17.8, 3.919183588453085 ], "wc_clarity_avg": [ 10.0, 8.049844718999243 ], "wc_relation_to_prior_work_avg": [ 13.8, 9.682974749528164 ], "wc_documentation_avg": [ 17.8, 10.49571341072154 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 434.2, 155.12884967020156 ], "wc_reply_reviewers_avg": [ 49.4, 49.886270656363955 ], "wc_reply_authors_avg": [ 313.6, 72.92352158254565 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": 0.7205766921228919, "gs_citation": 261, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4345797088852263491&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "", "author_num": 1 }, { "title": "ImageReward: Learning and Evaluating Human Preferences for Text-to-Image Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72054", "id": "JVzeOYEx6d", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/33646ef0ed554145eab65f6250fab0c9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JVzeOYEx6d", "openreview": "https://openreview.net/forum?id=JVzeOYEx6d", "poster": "/media/PosterPDFs/NeurIPS%202023/72054.png?t=1701697068.1728945", "slides": "https://nips.cc/virtual/2023/poster/72054", "video": "https://nips.cc/virtual/2023/poster/72054", "author_site": "Jiazheng Xu, Xiao Liu, Yuchen Wu, Yuxuan Tong, Qinkai Li, Ming Ding, Jie Tang, Yuxiao Dong", "tldr": "", "abstract": "We present a comprehensive solution to learn and improve text-to-image models from human preference feedback.\nTo begin with, we build ImageReward---the first general-purpose text-to-image human preference reward model---to effectively encode human preferences.\nIts training is based on our systematic annotation pipeline including rating and ranking, which collects 137k expert comparisons to date.\nIn human evaluation, ImageReward outperforms existing scoring models and metrics, making it a promising automatic metric for evaluating text-to-image synthesis.\nOn top of it, we propose Reward Feedback Learning (ReFL), a direct tuning algorithm to optimize diffusion models against a scorer.\nBoth automatic and human evaluation support ReFL's advantages over compared methods.\nAll code and datasets are provided at \\url{https://github.com/THUDM/ImageReward}.", "keywords": "Generative Models;Text-to-Image;Learning from Human Feedback;Multimodality;Evaluation", "primary_area": "", "supplementary_material": "", "author": "Jiazheng Xu;Xiao Liu;Yuchen Wu;Yuxuan Tong;Qinkai Li;Ming Ding;Jie Tang;Yuxiao Dong", "authorids": "~Jiazheng_Xu1;~Xiao_Liu15;~Yuchen_Wu5;~Yuxuan_Tong2;~Qinkai_Li1;~Ming_Ding1;~Jie_Tang1;~Yuxiao_Dong1", "gender": "M;M;M;;;M;;M", "homepage": "https://github.com/xujz18;https://github.com/xiao9905;https://github.com/wuyuchen2003;;https://github.com/clear-train;;;https://keg.cs.tsinghua.edu.cn/yuxiao/", "dblp": "313/9484;82/1364-36;26/317;;344/5448;48/3462-4;;17/9267", "google_scholar": "7--T2_4AAAAJ;VKI8EhUAAAAJ;;;;Va50YzkAAAAJ;;https://scholar.google.com.hk/citations?hl=en", "orcid": ";0000-0002-9226-4569;;;;;;0000-0002-6092-2002", "linkedin": ";;;;;;;", "or_profile": "~Jiazheng_Xu1;~Xiao_Liu15;~Yuchen_Wu5;~Yuxuan_Tong2;~Qinkai_Li1;~Ming_Ding1;~Jie_Tang1;~Yuxiao_Dong1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;;Beijing University of Posts and Telecommunications;Tsinghua University;;Tsinghua University", "aff_domain": "cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;;bupt.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn", "position": "PhD student;PhD student;Undergrad student;;Undergrad student;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nxu2023imagereward,\ntitle={ImageReward: Learning and Evaluating Human Preferences for Text-to-Image Generation},\nauthor={Jiazheng Xu and Xiao Liu and Yuchen Wu and Yuxuan Tong and Qinkai Li and Ming Ding and Jie Tang and Yuxiao Dong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JVzeOYEx6d}\n}", "github": "", "project": "", "reviewers": "6uei;JPbK;7vWQ;kP49;5G85", "pdf_size": 17676705, "rating": "6;6;6;7;8", "confidence": "4;5;3;4;5", "soundness": "3;3;3;3;4", "novelty": "3;3;3;4;4", "presentation": "2;3;2;4;4", "wc_summary": "105;56;117;64;55", "wc_strengths": "60;44;80;72;54", "wc_weaknesses": "82;28;309;99;74", "wc_questions": "32;118;11;4;20", "wc_limitations": "43;29;1;22;7", "wc_review": "322;275;518;261;210", "wc_reply_reviewers": "21;15;20;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 79.4, 26.26480534860291 ], "wc_strengths_avg": [ 62.0, 12.77497553813705 ], "wc_weaknesses_avg": [ 118.4, 98.1562020455152 ], "wc_questions_avg": [ 37.0, 41.569219381653056 ], "wc_limitations_avg": [ 20.4, 15.12084653714864 ], "wc_review_avg": [ 317.2, 106.55965465409506 ], "wc_reply_reviewers_avg": [ 11.2, 9.368030742904295 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.4677071733467427, "gs_citation": 536, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11223291249077460856&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;;bupt.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Tsinghua University;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.bupt.edu.cn/", "aff_unique_abbr": "THU;BUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Parameterizing Non-Parametric Meta-Reinforcement Learning Tasks via Subtask Decomposition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72053", "id": "JX6UloWrmE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/86c1fd74fa25bd6be0072937803e0bd1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JX6UloWrmE", "openreview": "https://openreview.net/forum?id=JX6UloWrmE", "poster": "/media/PosterPDFs/NeurIPS%202023/72053.png?t=1701654663.1074936", "slides": "https://nips.cc/virtual/2023/poster/72053", "video": "https://nips.cc/virtual/2023/poster/72053", "author_site": "Suyoung Lee, Myungsik Cho, Youngchul Sung", "tldr": "", "abstract": "Meta-reinforcement learning (meta-RL) techniques have demonstrated remarkable success in generalizing deep reinforcement learning across a range of tasks. Nevertheless, these methods often struggle to generalize beyond tasks with parametric variations. To overcome this challenge, we propose Subtask Decomposition and Virtual Training (SDVT), a novel meta-RL approach that decomposes each non-parametric task into a collection of elementary subtasks and parameterizes the task based on its decomposition. We employ a Gaussian mixture VAE to meta-learn the decomposition process, enabling the agent to reuse policies acquired from common subtasks. Additionally, we propose a virtual training procedure, specifically designed for non-parametric task variability, which generates hypothetical subtask compositions, thereby enhancing generalization to previously unseen subtask compositions. Our method significantly improves performance on the Meta-World ML-10 and ML-45 benchmarks, surpassing current state-of-the-art techniques.", "keywords": "Deep reinforcement learning;Meta-reinforcement learning;Subtask decomposition", "primary_area": "", "supplementary_material": "/attachment/222b4dd26954281034b235f66cd0b0f16ad5ea1d.zip", "author": "Suyoung Lee;Myungsik Cho;Youngchul Sung", "authorids": "~Suyoung_Lee4;~Myungsik_Cho1;~Youngchul_Sung1", "gender": "M;M;M", "homepage": ";https://sites.google.com/view/youngchulsung;https://suyoung-lee.github.io/", "dblp": "233/3959;17/6798;31/4163", "google_scholar": "https://scholar.google.com/citations?hl=en;-9D2k3UAAAAJ;CWbdBy8AAAAJ", "orcid": ";0000-0003-4536-6690;", "linkedin": ";;", "or_profile": "~Myungsik_Cho1;~Youngchul_Sung1;~Su_Young_Lee1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nlee2023parameterizing,\ntitle={Parameterizing Non-Parametric Meta-Reinforcement Learning Tasks via Subtask Decomposition},\nauthor={Suyoung Lee and Myungsik Cho and Youngchul Sung},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JX6UloWrmE}\n}", "github": "", "project": "", "reviewers": "4fbE;PDTa;mp13;juhC", "pdf_size": 1443378, "rating": "5;6;6;7", "confidence": "5;4;4;3", "soundness": "2;3;3;4", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "107;54;139;68", "wc_strengths": "74;37;127;66", "wc_weaknesses": "208;35;738;160", "wc_questions": "491;88;113;25", "wc_limitations": "8;26;14;3", "wc_review": "888;240;1131;322", "wc_reply_reviewers": "56;20;33;58", "wc_reply_authors": "435;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 92.0, 33.36914742692717 ], "wc_strengths_avg": [ 76.0, 32.50384592629001 ], "wc_weaknesses_avg": [ 285.25, 268.915762832899 ], "wc_questions_avg": [ 179.25, 182.82283090467666 ], "wc_limitations_avg": [ 12.75, 8.584142356694699 ], "wc_review_avg": [ 645.25, 375.3660713223826 ], "wc_reply_reviewers_avg": [ 41.75, 15.943258763502524 ], "wc_reply_authors_avg": [ 108.75, 188.3605253231154 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10936261001879326450&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Switching Temporary Teachers for Semi-Supervised Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72052", "id": "JXvszuOqY3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7eeb42802d3750ca59e8a0523068e9e6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JXvszuOqY3", "openreview": "https://openreview.net/forum?id=JXvszuOqY3", "poster": "/media/PosterPDFs/NeurIPS%202023/72052.png?t=1699608479.2531822", "slides": "https://nips.cc/virtual/2023/poster/72052", "video": "https://nips.cc/virtual/2023/poster/72052", "author_site": "Jaemin Na, Jung-Woo Ha, Hyung Jin Chang, Dongyoon Han, Wonjun Hwang", "tldr": "", "abstract": "The teacher-student framework, prevalent in semi-supervised semantic segmentation, mainly employs the exponential moving average (EMA) to update a single teacher's weights based on the student's. However, EMA updates raise a problem in that the weights of the teacher and student are getting coupled, causing a potential performance bottleneck. Furthermore, this problem may become more severe when training with more complicated labels such as segmentation masks but with few annotated data. This paper introduces Dual Teacher, a simple yet effective approach that employs dual temporary teachers aiming to alleviate the coupling problem for the student. The temporary teachers work in shifts and are progressively improved, so consistently prevent the teacher and student from becoming excessively close. Specifically, the temporary teachers periodically take turns generating pseudo-labels to train a student model and maintain the distinct characteristics of the student model for each epoch. Consequently, Dual Teacher achieves competitive performance on the PASCAL VOC, Cityscapes, and ADE20K benchmarks with remarkably shorter training times than state-of-the-art methods. Moreover, we demonstrate that our approach is model-agnostic and compatible with both CNN- and Transformer-based models. Code is available at https://github.com/naver-ai/dual-teacher.", "keywords": "Semi-supervised Learning;Semantic Segmentation", "primary_area": "", "supplementary_material": "/attachment/d2f51b44c18e6bfeb0180d6ade64c7da16c4ca8c.pdf", "author": "Jaemin Na;Jung-Woo Ha;Hyung Jin Chang;Dongyoon Han;Wonjun Hwang", "authorids": "~Jaemin_Na1;~Jung-Woo_Ha1;~Hyung_Jin_Chang2;~Dongyoon_Han1;~Wonjun_Hwang1", "gender": "M;M;M;M;M", "homepage": "https://najaemin92.github.io;https://aidljwha.wordpress.com/;https://www.cs.bham.ac.uk/~changhj;https://dongyoonhan.github.io/;https://sites.google.com/view/siglearnlab", "dblp": "258/8825;66/867-1;96/3551;151/8876;23/798", "google_scholar": "C1Jl61oAAAAJ;https://scholar.google.co.kr/citations?user=eGj3ay4AAAAJ;https://scholar.google.com/citations?hl=en;jcP7m1QAAAAJ;-I8AfBAAAAAJ", "orcid": "0000-0002-8604-2839;0000-0002-7400-7681;;0000-0002-9130-8195;0000-0001-8895-0411", "linkedin": ";jung-woo-ha-b2782862?trk=hp-identity-name;;https://linkedin.com/in/dongyoon-han-04961a120/en;", "or_profile": "~Jaemin_Na1;~Jung-Woo_Ha1;~Hyung_Jin_Chang2;~Dongyoon_Han1;~Wonjun_Hwang1", "aff": "Ajou University;NAVER AI Lab;University of Birmingham;NAVER;NAVER", "aff_domain": "ajou.ac.kr;navercorp.com;bham.ac.uk;navercorp.com;navercorp.com", "position": "PhD student;Head (Executive Director);Associate Professor;Research Scientist;Advisory Committee", "bibtex": "@inproceedings{\nna2023switching,\ntitle={Switching Temporary Teachers for Semi-Supervised Semantic Segmentation},\nauthor={Jaemin Na and Jung-Woo Ha and Hyung Jin Chang and Dongyoon Han and Wonjun Hwang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JXvszuOqY3}\n}", "github": "", "project": "", "reviewers": "gkhQ;vns8;WnDT;3Udz", "pdf_size": 788520, "rating": "3;4;6;6", "confidence": "5;5;5;3", "soundness": "2;2;3;3", "novelty": "1;1;3;3", "presentation": "2;3;3;3", "wc_summary": "29;112;76;113", "wc_strengths": "21;32;70;111", "wc_weaknesses": "162;328;142;32", "wc_questions": "34;43;99;46", "wc_limitations": "5;2;12;1", "wc_review": "251;517;399;303", "wc_reply_reviewers": "206;167;180;10", "wc_reply_authors": "733;923;994;0", "reply_reviewers": "1;2;2;1", "reply_authors": "2;3;3;1", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 1.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.5, 34.29650127928503 ], "wc_strengths_avg": [ 58.5, 35.344730866141845 ], "wc_weaknesses_avg": [ 166.0, 105.82060290888538 ], "wc_questions_avg": [ 55.5, 25.5 ], "wc_limitations_avg": [ 5.0, 4.301162633521313 ], "wc_review_avg": [ 367.5, 101.33484099755621 ], "wc_reply_reviewers_avg": [ 140.75, 76.78338036320099 ], "wc_reply_authors_avg": [ 662.5, 394.2172624327859 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13586789179061765702&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ajou.ac.kr;navercorp.com;bham.ac.uk;navercorp.com;navercorp.com", "author_num": 5, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Ajou University;NAVER Corporation;University of Birmingham", "aff_unique_dep": ";NAVER AI Lab;", "aff_unique_url": "https://www.ajou.ac.kr;https://www.naver.com;https://www.birmingham.ac.uk", "aff_unique_abbr": "Ajou;NAVER;Birmingham", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "South Korea;United Kingdom" }, { "title": "Joint Attribute and Model Generalization Learning for Privacy-Preserving Action Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72051", "id": "JYUN0vYjh9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b762632135b16f1225672f9fe2a9740b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JYUN0vYjh9", "openreview": "https://openreview.net/forum?id=JYUN0vYjh9", "poster": "/media/PosterPDFs/NeurIPS%202023/72051.png?t=1701502852.679678", "slides": "https://nips.cc/virtual/2023/poster/72051", "video": "https://nips.cc/virtual/2023/poster/72051", "author_site": "Duo Peng, Li Xu, Qiuhong Ke, Ping Hu, Ping Hu, Jun Liu", "tldr": "", "abstract": "Privacy-Preserving Action Recognition (PPAR) aims to transform raw videos into anonymous ones to prevent privacy leakage while maintaining action clues, which is an increasingly important problem in intelligent vision applications. Despite recent efforts in this task, it is still challenging to deal with novel privacy attributes and novel privacy attack models that are unavailable during the training phase. In this paper, from the perspective of meta-learning (learning to learn), we propose a novel Meta Privacy-Preserving Action Recognition (MPPAR) framework to improve both generalization abilities above (i.e., generalize to *novel privacy attributes* and *novel privacy attack models*) in a unified manner. Concretely, we simulate train/test task shifts by constructing disjoint support/query sets w.r.t. privacy attributes or attack models. Then, a virtual training and testing scheme is applied based on support/query sets to provide feedback to optimize the model's learning toward better generalization. Extensive experiments demonstrate the effectiveness and generalization of the proposed framework compared to state-of-the-arts.", "keywords": "Privacy Preservation;Action Recognition;Meta-Learning", "primary_area": "", "supplementary_material": "/attachment/6951906c0b22b9ce498f63bcb8a042e8039c6d4f.pdf", "author": "Duo Peng;Li Xu;Qiuhong Ke;Ping Hu;Jun Liu", "authorids": "~Duo_Peng1;~Li_Xu7;~Qiuhong_Ke6;~Ping_Hu3;~Jun_Liu8", "gender": "M;F;M;M;M", "homepage": ";https://research.monash.edu/en/persons/qiuhong-ke;http://feinanshan.github.io;;", "dblp": "85/2168;151/3574;53/5490-1;95/3736-36;175/3967", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;84qxdhsAAAAJ;ddrD2TgAAAAJ;Q5Ild8UAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;;0000-0003-3281-0772", "linkedin": ";;;;", "or_profile": "~Li_Xu7;~Qiuhong_Ke6;~Ping_Hu3;~Jun_Liu8;~Peng_Duo1", "aff": "Singapore University of Technology and Design;Monash University;Boston University, Boston University;Singapore University of Technology and Design;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;monash.edu;bu.edu;sutd.edu.sg;mymail.sutd.edu.sg", "position": "PhD student;Lecturer;Postdoc;Assistant Professor;PhD student", "bibtex": "@inproceedings{\npeng2023joint,\ntitle={Joint Attribute and Model Generalization Learning for Privacy-Preserving Action Recognition},\nauthor={Duo Peng and Li Xu and Qiuhong Ke and Ping Hu and Jun Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JYUN0vYjh9}\n}", "github": "", "project": "", "reviewers": "4pCF;hKip;oDYm;EKAb", "pdf_size": 545707, "rating": "5;5;6;7", "confidence": "3;3;4;3", "soundness": "2;2;2;3", "novelty": "2;2;3;3", "presentation": "3;3;2;2", "wc_summary": "74;54;74;133", "wc_strengths": "42;42;41;102", "wc_weaknesses": "186;146;199;74", "wc_questions": "2;2;97;126", "wc_limitations": "11;1;11;30", "wc_review": "315;245;422;465", "wc_reply_reviewers": "45;12;167;66", "wc_reply_authors": "68;23;957;70", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;4;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 83.75, 29.583568074186047 ], "wc_strengths_avg": [ 56.75, 26.12828926661675 ], "wc_weaknesses_avg": [ 151.25, 48.68970630431036 ], "wc_questions_avg": [ 56.75, 55.70177286227073 ], "wc_limitations_avg": [ 13.25, 10.497023387608508 ], "wc_review_avg": [ 361.75, 86.75648390754434 ], "wc_reply_reviewers_avg": [ 72.5, 57.85542325486868 ], "wc_reply_authors_avg": [ 279.5, 391.60598820753495 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9117480607396828373&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "sutd.edu.sg;monash.edu;bu.edu;sutd.edu.sg;mymail.sutd.edu.sg", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Singapore University of Technology and Design;Monash University;Boston University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sutd.edu.sg;https://www.monash.edu;https://www.bu.edu", "aff_unique_abbr": "SUTD;Monash;BU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "Singapore;Australia;United States" }, { "title": "Scissorhands: Exploiting the Persistence of Importance Hypothesis for LLM KV Cache Compression at Test Time", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72050", "id": "JZfg6wGi6g", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a452a7c6c463e4ae8fbdc614c6e983e6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JZfg6wGi6g", "openreview": "https://openreview.net/forum?id=JZfg6wGi6g", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72050", "video": "https://nips.cc/virtual/2023/poster/72050", "author_site": "Zichang Liu, Aditya Desai, Fangshuo Liao, Weitao Wang, Victor Xie, Zhaozhuo Xu, Anastasios Kyrillidis, Anshumali Shrivastava", "tldr": "", "abstract": "Large language models(LLMs) have sparked a new wave of exciting AI applications. Hosting these models at scale requires significant memory resources. One crucial memory bottleneck for the deployment stems from the context window. It is commonly recognized that model weights are memory hungry; however, the size of key-value embedding stored during the generation process (KV cache) can easily surpass the model size. The enormous size of the KV cache puts constraints on the inference batch size, which is crucial for high throughput inference workload. Inspired by an interesting observation of the attention scores, we hypothesize the persistence of importance: only pivotal tokens, which had a substantial influence at one step, will significantly influence future generations. Based on our empirical verification and theoretical analysis around this hypothesis, we propose scissorhands, a system that maintains the memory usage of the KV cache at a fixed budget without finetuning the model. In essence, Scissorhands manages the KV cache by storing the pivotal tokens with a higher probability. We validate that scissorhands reduces the inference memory usage of the KV cache by up to 5$\\times$ without compromising model quality. We further demonstrate that scissorhands can be combined with 4-bit quantization, traditionally used to compress model weights, to achieve up to 20$\\times$ compression.", "keywords": "Large language model; KV Cache Compression", "primary_area": "", "supplementary_material": "/attachment/23b721958b5b0609724f41bd8eaec4e19f6df17a.pdf", "author": "Zichang Liu;Aditya Desai;Fangshuo Liao;Weitao Wang;Victor Xie;Zhaozhuo Xu;Anastasios Kyrillidis;Anshumali Shrivastava", "authorids": "~Zichang_Liu1;~Aditya_Desai1;~Fangshuo_Liao1;wtwang@rice.edu;vyx2@rice.edu;~Zhaozhuo_Xu1;~Anastasios_Kyrillidis2;~Anshumali_Shrivastava1", "gender": "F;;M;;;M;M;M", "homepage": ";;https://jasperliao.github.io/;;;https://ottovonxu.github.io/;http://akyrillidis.github.io;https://www.cs.rice.edu/~as143/", "dblp": "227/4714;;308/2837;;;195/4352;53/9879;63/9828", "google_scholar": ";;WIwcFN8AAAAJ;;;7tDlVAsAAAAJ;TEGzkZMAAAAJ;https://scholar.google.com.tw/citations?user=SGT23RAAAAAJ", "orcid": "0009-0004-1098-2869;;;;;;;", "linkedin": "zichang-liu/;;fangshuo-liao-698043141/;;;;;", "or_profile": "~Zichang_Liu1;~Aditya_Desai1;~Fangshuo_Liao1;wtwang@rice.edu;vyx2@rice.edu;~Zhaozhuo_Xu1;~Anastasios_Kyrillidis2;~Anshumali_Shrivastava1", "aff": "Rice University;;Rice University;;;Rice University;Rice University;ThirdAI Corp.", "aff_domain": "rice.edu;;rice.edu;;;rice.edu;rice.edu;thirdai.com", "position": "PhD student;;PhD student;;;PhD student;Assistant Professor;CEO", "bibtex": "@inproceedings{\nliu2023scissorhands,\ntitle={Scissorhands: Exploiting the Persistence of Importance Hypothesis for {LLM} {KV} Cache Compression at Test Time},\nauthor={Zichang Liu and Aditya Desai and Fangshuo Liao and Weitao Wang and Victor Xie and Zhaozhuo Xu and Anastasios Kyrillidis and Anshumali Shrivastava},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JZfg6wGi6g}\n}", "github": "", "project": "", "reviewers": "UGwp;g2hZ;h9xt;H8Ve;AWSm", "pdf_size": 1037905, "rating": "3;5;6;7;7", "confidence": "5;3;3;4;5", "soundness": "1;3;2;3;4", "novelty": "1;3;3;3;4", "presentation": "2;3;3;2;4", "wc_summary": "54;121;38;99;91", "wc_strengths": "15;41;56;99;39", "wc_weaknesses": "151;13;161;340;86", "wc_questions": "47;222;164;125;10", "wc_limitations": "45;6;6;34;3", "wc_review": "312;403;425;697;229", "wc_reply_reviewers": "0;322;315;155;0", "wc_reply_authors": "0;356;248;28;0", "reply_reviewers": "0;2;2;2;0", "reply_authors": "1;2;3;2;1", "rating_avg": [ 5.6, 1.4966629547095764 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 2.6, 1.019803902718557 ], "novelty_avg": [ 2.8, 0.9797958971132712 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 80.6, 30.33545780106178 ], "wc_strengths_avg": [ 50.0, 27.799280566230486 ], "wc_weaknesses_avg": [ 150.2, 108.70768142132367 ], "wc_questions_avg": [ 113.6, 76.92749833447076 ], "wc_limitations_avg": [ 18.8, 17.29045979724079 ], "wc_review_avg": [ 413.2, 158.08655856839948 ], "wc_reply_reviewers_avg": [ 158.4, 142.4648728634536 ], "wc_reply_authors_avg": [ 126.4, 147.74247865796755 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.1494035761667992, "gs_citation": 200, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6205159589585211087&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "rice.edu;;rice.edu;;;rice.edu;rice.edu;thirdai.com", "author_num": 8, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Rice University;ThirdAI Corp.", "aff_unique_dep": ";", "aff_unique_url": "https://www.rice.edu;", "aff_unique_abbr": "Rice;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Neural Functional Transformers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72049", "id": "JdhyIa0azI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f4757db82a02eea015670ecca605d5cc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JdhyIa0azI", "openreview": "https://openreview.net/forum?id=JdhyIa0azI", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72049", "video": "https://nips.cc/virtual/2023/poster/72049", "author_site": "Allan Zhou, Kaien Yang, Yiding Jiang, Kaylee Burns, Winnie Xu, Samuel Sokota, J. Zico Kolter, Chelsea Finn", "tldr": "", "abstract": "The recent success of neural networks as implicit representation of data has driven growing interest in neural functionals: models that can process other neural networks as input by operating directly over their weight spaces. Nevertheless, constructing expressive and efficient neural functional architectures that can handle high-dimensional weight-space objects remains challenging. This paper uses the attention mechanism to define a novel set of permutation equivariant weight-space layers and composes them into deep equivariant models called neural functional Transformers (NFTs). NFTs respect weight-space permutation symmetries while incorporating the advantages of attention, which have exhibited remarkable success across multiple domains. In experiments processing the weights of feedforward MLPs and CNNs, we find that NFTs match or exceed the performance of prior weight-space methods. We also leverage NFTs to develop Inr2Array, a novel method for computing permutation invariant latent representations from the weights of implicit neural representations (INRs). Our proposed method improves INR classification accuracy by up to $+17\\\\%$ over existing methods. We provide an implementation of our layers at https://github.com/AllanYangZhou/nfn.", "keywords": "equivariance;permutation;implicit neural representation;generalization;transformers;attention", "primary_area": "", "supplementary_material": "", "author": "Allan Zhou;Kaien Yang;Yiding Jiang;Kaylee Burns;Winnie Xu;Samuel Sokota;J Zico Kolter;Chelsea Finn", "authorids": "~Allan_Zhou1;kaieny@stanford.edu;~Yiding_Jiang2;~Kaylee_Burns2;~Winnie_Xu1;~Samuel_Sokota1;~J_Zico_Kolter1;~Chelsea_Finn1", "gender": ";;M;F;F;M;;F", "homepage": "http://bland.website;;https://yidingjiang.github.io/;https://kayburns.github.io;https://winniexu.ca;https://ssokota.github.io/;;https://ai.stanford.edu/~cbfinn/", "dblp": "195/6907;;;217/3002;285/6560;243/5881;;131/1783", "google_scholar": ";;x9qzWg8AAAAJ;N_rVVG8AAAAJ;k4l-zNYAAAAJ;;;vfPE6hgAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;;https://linkedin.com/in/winnie-xu;samuel-sokota-87a153149/;;", "or_profile": "~Allan_Zhou1;kaieny@stanford.edu;~Yiding_Jiang2;~Kaylee_Burns2;~Winnie_Xu1;~Samuel_Sokota1;~J_Zico_Kolter1;~Chelsea_Finn1", "aff": "Google Deepmind;;Carnegie Mellon University;Stanford University;;Carnegie Mellon University;;Google", "aff_domain": "google.com;;andrew.cmu.edu;stanford.edu;;cmu.edu;;google.com", "position": "Intern;;PhD student;PhD student;;PhD student;;Research Scientist", "bibtex": "@inproceedings{\nzhou2023neural,\ntitle={Neural Functional Transformers},\nauthor={Allan Zhou and Kaien Yang and Yiding Jiang and Kaylee Burns and Winnie Xu and Samuel Sokota and J Zico Kolter and Chelsea Finn},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JdhyIa0azI}\n}", "github": "", "project": "", "reviewers": "73nt;5PwT;zT4L;e4bE;BGFR", "pdf_size": 2924528, "rating": "6;6;6;6;6", "confidence": "4;3;4;3;3", "soundness": "2;2;3;3;3", "novelty": "2;2;3;2;3", "presentation": "3;2;4;3;3", "wc_summary": "59;117;85;59;269", "wc_strengths": "52;53;54;41;71", "wc_weaknesses": "481;556;161;82;34", "wc_questions": "77;6;195;2;41", "wc_limitations": "9;28;2;8;18", "wc_review": "678;760;497;192;433", "wc_reply_reviewers": "242;161;281;77;18", "wc_reply_authors": "484;281;287;0;0", "reply_reviewers": "2;2;2;1;1", "reply_authors": "2;2;2;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 117.8, 78.55291210388066 ], "wc_strengths_avg": [ 54.2, 9.620810776644554 ], "wc_weaknesses_avg": [ 262.8, 213.99943925160176 ], "wc_questions_avg": [ 64.2, 70.80508456318657 ], "wc_limitations_avg": [ 13.0, 9.077444574328174 ], "wc_review_avg": [ 512.0, 198.96029754702317 ], "wc_reply_reviewers_avg": [ 155.8, 98.31663134993997 ], "wc_reply_authors_avg": [ 210.4, 186.67897578463408 ], "reply_reviewers_avg": [ 1.6, 0.4898979485566356 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7567635659418703487&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "google.com;;andrew.cmu.edu;stanford.edu;;cmu.edu;;google.com", "author_num": 8, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "DeepMind;Carnegie Mellon University;Stanford University;Google", "aff_unique_dep": "DeepMind;;;Google", "aff_unique_url": "https://deepmind.com;https://www.cmu.edu;https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "DeepMind;CMU;Stanford;Google", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Mountain View", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Minimax-Optimal Location Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72048", "id": "JeKXmYb4kd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/02a589ef9a4f6f1e2dcc1cfb3b978a51-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JeKXmYb4kd", "openreview": "https://openreview.net/forum?id=JeKXmYb4kd", "poster": "/media/PosterPDFs/NeurIPS%202023/72048.png?t=1702277143.221933", "slides": "https://nips.cc/virtual/2023/poster/72048", "video": "https://nips.cc/virtual/2023/poster/72048", "author_site": "Shivam Gupta, Jasper Lee, Eric Price, Paul Valiant", "tldr": "", "abstract": "Location estimation is one of the most basic questions in parametric statistics. \nSuppose we have a known distribution density $f$, and we get $n$ i.i.d. samples from $f(x-\\mu)$ for some unknown shift $\\mu$.\nThe task is to estimate $\\mu$ to high accuracy with high probability.\nThe maximum likelihood estimator (MLE) is known to be asymptotically optimal as $n \\to \\infty$, but what is possible for finite $n$?\nIn this paper, we give two location estimators that are optimal under different criteria: 1) an estimator that has minimax-optimal estimation error subject to succeeding with probability $1-\\delta$ and 2) a confidence interval estimator which, subject to its output interval containing $\\mu$ with probability at least $1-\\delta$, has the minimum expected squared interval width among all shift-invariant estimators.\nThe latter construction can be generalized to minimizing the expectation of any loss function on the interval width.", "keywords": "location estimation;minimax estimation", "primary_area": "", "supplementary_material": "", "author": "Shivam Gupta;Jasper C.H. Lee;Eric Price;Paul Valiant", "authorids": "~Shivam_Gupta1;~Jasper_C.H._Lee1;~Eric_Price1;~Paul_Valiant1", "gender": "M;M;;M", "homepage": "https://shivamgupta2.github.io/;https://jasperchlee.github.io/;;https://www.cs.purdue.edu/homes/pvaliant/", "dblp": "29/8830-2;150/4950;;", "google_scholar": "HsbPV-EAAAAJ;z0Y4snAAAAAJ;;abUcBIkAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Shivam_Gupta1;~Jasper_C.H._Lee1;~Eric_Price1;~Paul_Valiant1", "aff": "University of Texas, Austin;University of Wisconsin - Madison;;Purdue University", "aff_domain": "utexas.edu;wisc.edu;;purdue.edu", "position": "PhD student;Postdoc;;Associate Professor", "bibtex": "@inproceedings{\ngupta2023minimaxoptimal,\ntitle={Minimax-Optimal Location Estimation},\nauthor={Shivam Gupta and Jasper C.H. Lee and Eric Price and Paul Valiant},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JeKXmYb4kd}\n}", "github": "", "project": "", "reviewers": "b2QA;j3jn;ng2F;cZWD", "pdf_size": 548983, "rating": "5;6;6;8", "confidence": "2;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "134;146;53;191", "wc_strengths": "15;3;38;31", "wc_weaknesses": "35;3;330;1", "wc_questions": "2;3;2;1", "wc_limitations": "2;3;1;1", "wc_review": "188;158;424;225", "wc_reply_reviewers": "105;12;25;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 131.0, 49.794578018093496 ], "wc_strengths_avg": [ 21.75, 13.663363421939708 ], "wc_weaknesses_avg": [ 92.25, 137.9263843504933 ], "wc_questions_avg": [ 2.0, 0.7071067811865476 ], "wc_limitations_avg": [ 1.75, 0.82915619758885 ], "wc_review_avg": [ 248.75, 103.92635613741108 ], "wc_reply_reviewers_avg": [ 35.5, 41.08831950810352 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6515847209058535147&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "utexas.edu;wisc.edu;;purdue.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Texas at Austin;University of Wisconsin-Madison;Purdue University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://www.wisc.edu;https://www.purdue.edu", "aff_unique_abbr": "UT Austin;UW-Madison;Purdue", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Austin;Madison;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Module-wise Adaptive Distillation for Multimodality Foundation Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72047", "id": "JhQP33aMx2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dc9544b26ad3579477e567588db18cfc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JhQP33aMx2", "openreview": "https://openreview.net/forum?id=JhQP33aMx2", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72047", "video": "https://nips.cc/virtual/2023/poster/72047", "author_site": "Chen Liang, Jiahui Yu, Ming-Hsuan Yang, Matthew Brown, Yin Cui, Tuo Zhao, Boqing Gong, Tianyi Zhou", "tldr": "", "abstract": "Pre-trained multimodal foundation models have demonstrated remarkable generalizability but pose challenges for deployment due to their large sizes. One effective approach to reducing their sizes is layerwise distillation, wherein small student models are trained to match the hidden representations of large teacher models at each layer. Motivated by our observation that certain architecture components, referred to as modules, contribute more significantly to the student's performance than others, we propose to track the contributions of individual modules by recording the loss decrement after distillation each module and choose the module with a greater contribution to distill more frequently. Such an approach can be naturally formulated as a multi-armed bandit (MAB) problem, where modules and loss decrements are considered as arms and rewards, respectively. We then develop a modified-Thompson sampling algorithm named OPTIMA to address the nonstationarity of module contributions resulting from model updating. Specifically, we leverage the observed contributions in recent history to estimate the changing contribution of each module and select modules based on these estimations to maximize the cumulative contribution. We evaluate the effectiveness of OPTIMA through distillation experiments on various multimodal understanding and image captioning tasks, using the CoCa-Large model \\citep{yu2022coca} as the teacher model.", "keywords": "Multimodality foundation models;knowledge distillation", "primary_area": "", "supplementary_material": "/attachment/3846b4a58288ddad8118b3dae8e9d9100a9f1056.zip", "author": "Chen Liang;Jiahui Yu;Ming-Hsuan Yang;Matthew Brown;Yin Cui;Tuo Zhao;Boqing Gong;Tianyi Zhou", "authorids": "~Chen_Liang3;~Jiahui_Yu1;~Ming-Hsuan_Yang1;~Matthew_Brown1;~Yin_Cui1;~Tuo_Zhao1;~Boqing_Gong1;~Tianyi_Zhou1", "gender": "F;M;M;M;M;M;M;M", "homepage": "https://cliang1453.github.io/;http://jiahuiyu.com/;https://faculty.ucmerced.edu/mhyang/;http://matthewalunbrown.com;https://ycui.me/;http://www2.isye.gatech.edu/~tzhao80;http://boqinggong.info;https://tianyizhou.github.io/", "dblp": "35/3221-6;185/1060;79/3711.html;21/3987;47/8023.html;;29/7457;88/8205-1", "google_scholar": "https://scholar.google.com/citations?hl=en;-CLCMk4AAAAJ;p9-ohHsAAAAJ;WZfM0qsAAAAJ;iP5m52IAAAAJ;EJXN6tYAAAAJ;lv9ZeVUAAAAJ;OKvgizMAAAAJ", "orcid": ";;0000-0003-4848-2304;;0000-0003-2882-2033;;;0000-0001-5348-0632", "linkedin": ";jiahuiyuu/;minghsuanyang/;matthewalunbrown;;;boqing-gong-46aa5821/;tianyizhou", "or_profile": "~Chen_Liang3;~Jiahui_Yu1;~Ming-Hsuan_Yang1;~Matthew_Brown1;~Yin_Cui1;~Tuo_Zhao1;~Boqing_Gong1;~Tianyi_Zhou1", "aff": "Georgia Institute of Technology;Google Brain;University of California at Merced;Google;Google;Georgia Institute of Technology;Google;University of Maryland, College Park", "aff_domain": "gatech.edu;google.com;umcerced.edu;google.com;google.com;gatech.edu;google.com;umd.edu", "position": "PhD student;Research Scientist;Professor;Research Scientist;Research Scientist;Associate Professor;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nliang2023modulewise,\ntitle={Module-wise Adaptive Distillation for Multimodality Foundation Models},\nauthor={Chen Liang and Jiahui Yu and Ming-Hsuan Yang and Matthew Brown and Yin Cui and Tuo Zhao and Boqing Gong and Tianyi Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JhQP33aMx2}\n}", "github": "", "project": "", "reviewers": "fQyt;8iuR;1KcH;3XdA;zaxt", "pdf_size": 771358, "rating": "5;5;5;6;6", "confidence": "4;4;3;3;2", "soundness": "3;3;3;3;3", "novelty": "3;2;2;3;3", "presentation": "3;2;3;3;3", "wc_summary": "64;68;42;83;31", "wc_strengths": "59;72;31;104;20", "wc_weaknesses": "183;142;121;107;20", "wc_questions": "8;5;122;9;1", "wc_limitations": "10;1;7;6;1", "wc_review": "324;288;323;309;73", "wc_reply_reviewers": "37;77;19;57;16", "wc_reply_authors": "26;110;124;95;95", "reply_reviewers": "2;1;1;1;1", "reply_authors": "2;3;3;2;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 57.6, 18.68261223705079 ], "wc_strengths_avg": [ 57.2, 29.942611776530118 ], "wc_weaknesses_avg": [ 114.6, 53.80929287771769 ], "wc_questions_avg": [ 29.0, 46.58325879540846 ], "wc_limitations_avg": [ 5.0, 3.521363372331802 ], "wc_review_avg": [ 263.4, 96.0845461039391 ], "wc_reply_reviewers_avg": [ 41.2, 23.137847782367313 ], "wc_reply_authors_avg": [ 90.0, 33.769809001532714 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.7637626158259732, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12079860399289446820&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "gatech.edu;google.com;umcerced.edu;google.com;google.com;gatech.edu;google.com;umd.edu", "author_num": 8, "aff_unique_index": "0;1;2;1;1;0;1;3", "aff_unique_norm": "Georgia Institute of Technology;Google;University of California, Merced;University of Maryland", "aff_unique_dep": ";Google Brain;;", "aff_unique_url": "https://www.gatech.edu;https://brain.google.com;https://www.ucmerced.edu;https://www/umd.edu", "aff_unique_abbr": "Georgia Tech;Google Brain;UC Merced;UMD", "aff_campus_unique_index": "1;2;1;1;1;3", "aff_campus_unique": ";Mountain View;Merced;College Park", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Towards Symmetry-Aware Generation of Periodic Materials", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72046", "id": "Jkc74vn1aZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a73474c359ed523e6cd3174ed29a4d56-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Jkc74vn1aZ", "openreview": "https://openreview.net/forum?id=Jkc74vn1aZ", "poster": "/media/PosterPDFs/NeurIPS%202023/72046.png?t=1701747405.794368", "slides": "https://nips.cc/virtual/2023/poster/72046", "video": "https://nips.cc/virtual/2023/poster/72046", "author_site": "Youzhi Luo, Chengkai Liu, Shuiwang Ji", "tldr": "", "abstract": "We consider the problem of generating periodic materials with deep models. While symmetry-aware molecule generation has been studied extensively, periodic materials possess different symmetries, which have not been completely captured by existing methods.\nIn this work, we propose SyMat, a novel material generation approach that can capture physical symmetries of periodic material structures. SyMat generates atom types and lattices of materials through generating atom type sets, lattice lengths and lattice angles with a variational auto-encoder model. In addition, SyMat employs a score-based diffusion model to generate atom coordinates of materials, in which a novel symmetry-aware probabilistic model is used in the coordinate diffusion process. We show that SyMat is theoretically invariant to all symmetry transformations on materials and demonstrate that SyMat achieves promising performance on random generation and property optimization tasks. Our code is publicly available as part of the AIRS library (https://github.com/divelab/AIRS).", "keywords": "material generation;symmetries;variational auto-encoder;score-based diffusion model", "primary_area": "", "supplementary_material": "", "author": "Youzhi Luo;Chengkai Liu;Shuiwang Ji", "authorids": "~Youzhi_Luo1;~Chengkai_Liu1;~Shuiwang_Ji1", "gender": "M;Not Specified;M", "homepage": "https://lyzustc.github.io/;https://chengkai-liu.github.io/;http://people.tamu.edu/~sji", "dblp": "280/0590;271/6853.html;84/6405", "google_scholar": "3lqQFIoAAAAJ;Keab81kAAAAJ;BZGj6sAAAAAJ", "orcid": "0000-0002-3763-0239;0009-0002-5033-1847;0000-0002-4205-4563", "linkedin": "youzhi-luo-139981172/;chengkai-liu-0502a1198/;shuiwang-ji-9a040715/", "or_profile": "~Youzhi_Luo1;~Chengkai_Liu1;~Shuiwang_Ji1", "aff": "Texas A&M University;Texas A&M University - College Station;Texas A&M University", "aff_domain": "tamu.edu;tamu.edu;tamu.edu", "position": "PhD student;PhD student;Professor", "bibtex": "@inproceedings{\nluo2023towards,\ntitle={Towards Symmetry-Aware Generation of Periodic Materials},\nauthor={Youzhi Luo and Chengkai Liu and Shuiwang Ji},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Jkc74vn1aZ}\n}", "github": "", "project": "", "reviewers": "sa6H;ukbg;QiTe;oTTM", "pdf_size": 744719, "rating": "5;7;7;7", "confidence": "3;3;3;1", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "76;62;82;103", "wc_strengths": "16;26;70;90", "wc_weaknesses": "49;7;131;99", "wc_questions": "215;48;100;18", "wc_limitations": "24;20;21;15", "wc_review": "380;163;404;325", "wc_reply_reviewers": "0;0;20;57", "wc_reply_authors": "0;0;27;35", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 80.75, 14.7542366796795 ], "wc_strengths_avg": [ 50.5, 30.540956108150905 ], "wc_weaknesses_avg": [ 71.5, 47.33656092282159 ], "wc_questions_avg": [ 95.25, 75.10451051701223 ], "wc_limitations_avg": [ 20.0, 3.24037034920393 ], "wc_review_avg": [ 318.0, 93.96009791395494 ], "wc_reply_reviewers_avg": [ 19.25, 23.27418097377435 ], "wc_reply_authors_avg": [ 15.5, 15.75595125658873 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18117661604674704071&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "tamu.edu;tamu.edu;tamu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "On Single-Index Models beyond Gaussian Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72045", "id": "JkmvrheMe7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2063a00c435aafbcc58c16ce1e522139-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JkmvrheMe7", "openreview": "https://openreview.net/forum?id=JkmvrheMe7", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72045", "video": "https://nips.cc/virtual/2023/poster/72045", "author_site": "Aaron Zweig, Loucas PILLAUD-VIVIEN, Joan Bruna", "tldr": "", "abstract": "Sparse high-dimensional functions have arisen as a rich framework to study the behavior of gradient-descent methods using shallow neural networks, and showcasing its ability to perform feature learning beyond linear models. \nAmongst those functions, the simplest are single-index models $f(x) = \\phi( x \\cdot \\theta^*)$, where the labels are generated by an arbitrary non-linear link function $\\phi$ of an unknown one-dimensional projection $\\theta^*$ of the input data. By focusing on Gaussian data, several recent works have built a remarkable picture, where the so-called information exponent (related to the regularity of the link function) controls the required sample complexity. In essence, these tools exploit the stability and spherical symmetry of Gaussian distributions.\n\nIn this work, we explore extensions of this picture beyond the Gaussian setting, where both stability or symmetry might be violated. Focusing on the planted setting where $\\phi$ is known, our main results establish that Stochastic Gradient Descent recovers the unknown direction $\\theta^*$ with constant probability in the high-dimensional regime, under mild assumptions that significantly extend ~[Yehudai and Shamir,20].", "keywords": "gradient descent;shallow neural networks", "primary_area": "", "supplementary_material": "/attachment/66e278c8a702069d911bebd3069bd945700af20b.pdf", "author": "Aaron Zweig;Loucas Pillaud-Vivien;Joan Bruna", "authorids": "~Aaron_Zweig2;~Loucas_Pillaud-Vivien1;~Joan_Bruna1", "gender": "M;M;M", "homepage": ";https://thebiglouloup.github.io/loucaspillaudvivien/;http://cims.nyu.edu/~bruna", "dblp": "180/8473;211/7988;44/8776", "google_scholar": ";https://scholar.google.com/citations?hl=en;L4bNmsMAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Aaron_Zweig2;~Loucas_Pillaud-Vivien1;~Joan_Bruna1", "aff": "New York University;Flatiron Institute;New York University", "aff_domain": "nyu.edu;flatironinstitute.org;nyu.edu", "position": "PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nzweig2023on,\ntitle={On Single-Index Models beyond Gaussian Data},\nauthor={Aaron Zweig and Loucas Pillaud-Vivien and Joan Bruna},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JkmvrheMe7}\n}", "github": "", "project": "", "reviewers": "kUyA;CJfX;PMRM;tgdy;VAjP", "pdf_size": 425825, "rating": "5;6;6;6;6", "confidence": "4;4;4;3;4", "soundness": "3;3;3;3;3", "novelty": "3;2;2;2;2", "presentation": "2;4;3;3;3", "wc_summary": "134;143;62;86;271", "wc_strengths": "107;174;65;92;132", "wc_weaknesses": "117;290;155;148;262", "wc_questions": "264;110;132;206;314", "wc_limitations": "15;5;17;4;1", "wc_review": "637;722;431;536;980", "wc_reply_reviewers": "192;282;267;0;29", "wc_reply_authors": "252;1135;95;0;0", "reply_reviewers": "1;3;1;0;1", "reply_authors": "2;4;2;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 139.2, 72.39171223282399 ], "wc_strengths_avg": [ 114.0, 37.03511846882631 ], "wc_weaknesses_avg": [ 194.4, 68.41812625320865 ], "wc_questions_avg": [ 205.2, 77.09319036075755 ], "wc_limitations_avg": [ 8.4, 6.374950980203691 ], "wc_review_avg": [ 661.2, 186.85973349012355 ], "wc_reply_reviewers_avg": [ 154.0, 118.26918449029739 ], "wc_reply_authors_avg": [ 296.4, 429.299242952978 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2500000000000001, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3242005615254144694&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nyu.edu;flatironinstitute.org;nyu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "New York University;Flatiron Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://flatironinstitute.org", "aff_unique_abbr": "NYU;Flatiron", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "ALGO: Synthesizing Algorithmic Programs with Generated Oracle Verifiers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72044", "id": "JolrEmMim6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/abe1eb21ceb046209c96a0f5e7544ccc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JolrEmMim6", "openreview": "https://openreview.net/forum?id=JolrEmMim6", "poster": "/media/PosterPDFs/NeurIPS%202023/72044.png?t=1701377425.7068577", "slides": "https://nips.cc/virtual/2023/poster/72044", "video": "https://nips.cc/virtual/2023/poster/72044", "author_site": "Kexun Zhang, Kexun Zhang, Danqing Wang, Jingtao Xia, William Yang Wang, Lei Li", "tldr": "", "abstract": "Large language models (LLMs) excel at implementing code from functionality descriptions but struggle with algorithmic problems that require not only implementation but also identification of the suitable algorithm. Moreover, LLM-generated programs lack guaranteed correctness and require human verification. To address these challenges, we propose ALGO, a framework that synthesizes Algorithmic programs with LLM-Generated Oracles to guide the generation and verify their correctness. ALGO first generates a reference oracle by prompting an LLM to exhaustively enumerate all the combinations of relevant variables. This oracle is then utilized to guide an arbitrary search strategy in exploring the algorithm space and to verify the synthesized algorithms. Our study shows that the LLM-generated\noracles are correct for 88% of the cases. With the oracles as verifiers, ALGO can be integrated with any existing code generation model in a model-agnostic manner to enhance its performance. Experiments show that when equipped with ALGO, we achieve an 8\u00d7 better one-submission pass rate over the Codex model and a 2.6\u00d7 better one-submission pass rate over CodeT, the current state-of-the-art model on CodeContests. We can also get 1.3\u00d7 better pass rate over the ChatGPT Code Interpreter on unseen problems. The problem set we used for testing, the prompts we used, the verifier and solution programs, and the test cases generated by ALGO\nare available at https://github.com/zkx06111/ALGO.", "keywords": "Large Language Models;Code Generation;Code Intelligence;Automatic Verification", "primary_area": "", "supplementary_material": "/attachment/c334ce10e8056d25544caaf020ef5d3b99563e83.zip", "author": "Kexun Zhang;Danqing Wang;Jingtao Xia;William Yang Wang;Lei Li", "authorids": "~Kexun_Zhang1;~Danqing_Wang1;~Jingtao_Xia1;~William_Yang_Wang2;~Lei_Li11", "gender": "M;F;M;M;M", "homepage": "https://zkx06111.github.io;;;https://www.cs.cmu.edu/~leili;https://www.cs.ucsb.edu/~william/", "dblp": "295/8815;226/6524.html;136/5165.html;13/7007-5.html;08/9282", "google_scholar": ";https://scholar.google.com/citations?hl=en-US;;BYXqAlwAAAAJ;gf8Ms_8AAAAJ", "orcid": ";;;0000-0003-3095-9776;", "linkedin": ";;;;", "or_profile": "~Kexun_Zhang1;~Danqing_Wang1;~Jingtao_Xia1;~Lei_Li11;~William_Wang1", "aff": "University of California, Santa Barbara;University of California, Santa Barbara;University of California, Santa Barbara;Computer Science Department, UC Santa Barbara;UC Santa Barbara", "aff_domain": "ucsb.edu;ucsb.edu;ucsb.edu;cs.ucsb.edu;ucsb.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023algo,\ntitle={{ALGO}: Synthesizing Algorithmic Programs with Generated Oracle Verifiers},\nauthor={Kexun Zhang and Danqing Wang and Jingtao Xia and William Yang Wang and Lei Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JolrEmMim6}\n}", "github": "", "project": "", "reviewers": "oqZ5;MLQT;UrDq;MVRV", "pdf_size": 1457531, "rating": "5;5;6;7", "confidence": "4;5;4;4", "soundness": "3;2;3;3", "novelty": "3;3;2;4", "presentation": "2;3;3;3", "wc_summary": "67;122;119;117", "wc_strengths": "19;56;107;79", "wc_weaknesses": "297;365;96;216", "wc_questions": "10;56;998;9", "wc_limitations": "39;1;102;12", "wc_review": "432;600;1422;433", "wc_reply_reviewers": "34;40;400;92", "wc_reply_authors": "0;0;1133;70", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;5;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 106.25, 22.730761095924613 ], "wc_strengths_avg": [ 65.25, 32.23643125409511 ], "wc_weaknesses_avg": [ 243.5, 100.17110361775995 ], "wc_questions_avg": [ 268.25, 421.7489626543259 ], "wc_limitations_avg": [ 38.5, 39.18226639693013 ], "wc_review_avg": [ 721.75, 410.03193473191817 ], "wc_reply_reviewers_avg": [ 141.5, 150.9395574393936 ], "wc_reply_authors_avg": [ 300.75, 481.3488210227589 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6906804221247861062&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "ucsb.edu;ucsb.edu;ucsb.edu;cs.ucsb.edu;ucsb.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Attentive Transfer Entropy to Exploit Transient Emergence of Coupling Effect", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72043", "id": "JpU5YmMKx7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/00bb4e415ef117f2dee2fc3b778d806d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JpU5YmMKx7", "openreview": "https://openreview.net/forum?id=JpU5YmMKx7", "poster": "/media/PosterPDFs/NeurIPS%202023/72043.png?t=1702054400.7147803", "slides": "https://nips.cc/virtual/2023/poster/72043", "video": "https://nips.cc/virtual/2023/poster/72043", "author_site": "Xiaolei Ru, XINYA ZHANG, Zijia Liu, Jack Murdoch Moore, Gang Yan", "tldr": "", "abstract": "We consider the problem of reconstructing coupled networks (e.g., biological neural networks) connecting large numbers of variables (e.g.,nerve cells), of which state evolution is governed by dissipative dynamics consisting of strong self-drive (dominants the evolution) and weak coupling-drive. The core difficulty is sparseness of coupling effect that emerges (the coupling force is significant) only momentarily and otherwise remains quiescent in time series (e.g., neuronal activity sequence). Here we learn the idea from attention mechanism to guide the classifier to make inference focusing on the critical regions of time series data where coupling effect may manifest. Specifically, attention coefficients are assigned autonomously by artificial neural networks trained to maximise the Attentive Transfer Entropy (ATEn), which is a novel generalization of the iconic transfer entropy metric. Our results show that, without any prior knowledge of dynamics, ATEn explicitly identifies areas where the strength of coupling-drive is distinctly greater than zero. This innovation substantially improves reconstruction performance for both synthetic and real directed coupling networks using data generated by neuronal models widely used in neuroscience.", "keywords": "Directed coupled network reconstruction; Neuronal dynamics; Mutual information estimator; Attention mechanism; Transfer entropy.", "primary_area": "", "supplementary_material": "/attachment/f5e8b45764858a46e8e8da367ec62b456790a338.zip", "author": "Xiaolei Ru;Xin-Ya Zhang;Zijia Liu;Jack Murdoch Moore;Gang Yan", "authorids": "~Xiaolei_Ru1;~Xin-Ya_Zhang1;~Zijia_Liu1;~Jack_Murdoch_Moore1;~Gang_Yan2", "gender": "M;F;M;M;M", "homepage": ";https://xinyacheung.github.io/;https://www.linkedin.com/in/zijia-liu-771137334/;https://jackmurdochmoore.github.io/JackMurdochMoore/;", "dblp": ";;137/1847;225/1858;", "google_scholar": ";C9GFvD0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;AFDBPpYAAAAJ;a7-tzj8AAAAJ", "orcid": "0000-0003-0572-9768;0000-0002-7826-3076;0000-0001-6798-8816;0000-0003-1552-3755;0000-0001-6196-2615", "linkedin": ";xin-ya-zhang-6638171b5;zijia-liu-771137334/;;", "or_profile": "~Xiaolei_Ru1;~Xin-Ya_Zhang1;~Zijia_Liu1;~Jack_Murdoch_Moore1;~Gang_Yan2", "aff": "Tongji University;Tongji University;Tongji University;Tongji University;Tongji University", "aff_domain": "tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn", "position": "Postdoc;PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nru2023attentive,\ntitle={Attentive Transfer Entropy to Exploit Transient Emergence of Coupling Effect},\nauthor={Xiaolei Ru and Xin-Ya Zhang and Zijia Liu and Jack Murdoch Moore and Gang Yan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JpU5YmMKx7}\n}", "github": "", "project": "", "reviewers": "wNf4;xCSe;JWtX;f3Wr", "pdf_size": 9557998, "rating": "6;7;7;8", "confidence": "3;3;3;4", "soundness": "3;3;2;4", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "45;75;76;40", "wc_strengths": "23;21;92;50", "wc_weaknesses": "167;163;86;57", "wc_questions": "399;25;53;102", "wc_limitations": "9;1;13;33", "wc_review": "643;285;320;282", "wc_reply_reviewers": "112;66;0;7", "wc_reply_authors": "144;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 59.0, 16.59819267269783 ], "wc_strengths_avg": [ 46.5, 28.6574597618142 ], "wc_weaknesses_avg": [ 118.25, 47.88201645712093 ], "wc_questions_avg": [ 144.75, 149.3559088218474 ], "wc_limitations_avg": [ 14.0, 11.789826122551595 ], "wc_review_avg": [ 382.5, 151.1398359136333 ], "wc_reply_reviewers_avg": [ 46.25, 45.805976684271236 ], "wc_reply_authors_avg": [ 36.0, 62.353829072479584 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6795343138942976696&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tongji University", "aff_unique_dep": "", "aff_unique_url": "https://www.tongji.edu.cn", "aff_unique_abbr": "Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "LithoBench: Benchmarking AI Computational Lithography for Semiconductor Manufacturing", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73629", "id": "JqWtIIaS8n", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/604b9fa9e1c16284e6517d923cf9ff20-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=JqWtIIaS8n", "openreview": "https://openreview.net/forum?id=JqWtIIaS8n", "poster": "/media/PosterPDFs/NeurIPS%202023/73629.png?t=1697555418.8319793", "slides": "https://nips.cc/virtual/2023/poster/73629", "video": "https://nips.cc/virtual/2023/poster/73629", "author_site": "Su Zheng, Haoyu Yang, Binwu Zhu, Bei Yu, Martin Wong", "tldr": "", "abstract": "Computational lithography provides algorithmic and mathematical support for resolution enhancement in optical lithography, which is the critical step in semiconductor manufacturing. \nThe time-consuming lithography simulation and mask optimization processes limit the practical application of inverse lithography technology (ILT), a promising solution to the challenges of advanced-node lithography. \nAlthough various machine learning methods for ILT have shown promise for reducing the computational burden, this field is in lack of a dataset that can train the models thoroughly and evaluate the performance comprehensively. \nTo boost the development of AI-driven computational lithography, we present the LithoBench dataset, a collection of circuit layout tiles for deep-learning-based lithography simulation and mask optimization. \nLithoBench consists of more than 120k tiles that are cropped from real circuit designs or synthesized according to the layout topologies of famous ILT testcases. \nThe ground truths are generated by a famous lithography model in academia and an advanced ILT method. \nBased on the data, we provide a framework to design and evaluate deep neural networks (DNNs) with the data. \nThe framework is used to benchmark state-of-the-art models on lithography simulation and mask optimization. \nWe hope LithoBench can promote the research and development of computational lithography. \nLithoBench is available at https://anonymous.4open.science/r/lithobench-APPL.", "keywords": "Computational Lithography;Inverse Lithography Technology;Semiconductor Manufacturing;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/1e598eac13fbde55538ab4ec8e2a892b21fba75e.pdf", "author": "Su Zheng;Haoyu Yang;Binwu Zhu;Bei Yu;Martin D.F. Wong", "authorids": "~Su_Zheng2;~Haoyu_Yang4;bwzhu@cse.cuhk.edu.hk;~Bei_Yu2;mdfwong@cuhk.edu.hk", "gender": "Non-Binary;M;;M;", "homepage": "https://shelljane.github.io/;https://phdyang007.github.io/;;http://www.cse.cuhk.edu.hk/~byu/index.html;", "dblp": ";;;28/4556-1.html;", "google_scholar": "imuy2mcAAAAJ;https://scholar.google.com.hk/citations?user=aTJ0RJUAAAAJ;;tGneTm4AAAAJ;", "orcid": ";;;0000-0001-6406-4810;", "linkedin": ";;;yubei/;", "or_profile": "~Su_Zheng2;~Haoyu_Yang4;bwzhu@cse.cuhk.edu.hk;~Bei_Yu2;mdfwong@cuhk.edu.hk", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;NVIDIA;;Department of Computer Science and Engineering, The Chinese University of Hong Kong;", "aff_domain": "cse.cuhk.edu.hk;nvidia.com;;cse.cuhk.edu.hk;", "position": "PhD student;Researcher;;Associate Professor;", "bibtex": "@inproceedings{\nzheng2023lithobench,\ntitle={LithoBench: Benchmarking {AI} Computational Lithography for Semiconductor Manufacturing},\nauthor={Su Zheng and Haoyu Yang and Binwu Zhu and Bei Yu and Martin D.F. Wong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=JqWtIIaS8n}\n}", "github": "", "project": "", "reviewers": "zijQ;uJf3;hRPs;URA3;EsWu", "pdf_size": 660246, "rating": "6;6;7;7;8", "confidence": "3;3;2;3;4", "wc_summary_and_contributions": "36;41;60;116;102", "wc_strengths": "61;35;51;84;18", "wc_improvement": "145;84;119;71;19", "wc_limitations": "157;37;13;51;9", "wc_correctness": "4;6;11;74;3", "wc_clarity": "8;125;1;7;3", "wc_relation_to_prior_work": "9;36;10;105;30", "wc_documentation": "4;6;1;48;5", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "425;371;267;557;190", "wc_reply_reviewers": "20;77;0;20;87", "wc_reply_authors": "501;862;307;301;202", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;2;1;1;2", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 71.0, 32.34810659064917 ], "wc_strengths_avg": [ 49.8, 22.480213522117623 ], "wc_improvement_avg": [ 87.6, 43.070175295672996 ], "wc_limitations_avg": [ 53.4, 54.057746900883686 ], "wc_correctness_avg": [ 19.6, 27.339348931530903 ], "wc_clarity_avg": [ 28.8, 48.16803919613087 ], "wc_relation_to_prior_work_avg": [ 38.0, 35.16248000354924 ], "wc_documentation_avg": [ 12.8, 17.679366504487657 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 362.0, 127.03070494963019 ], "wc_reply_reviewers_avg": [ 40.8, 34.568193473191506 ], "wc_reply_authors_avg": [ 434.6, 234.6542989165125 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.42257712736425823, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14418867076489199475&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cse.cuhk.edu.hk;nvidia.com;;cse.cuhk.edu.hk;", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;NVIDIA", "aff_unique_dep": "Department of Computer Science and Engineering;NVIDIA Corporation", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.nvidia.com", "aff_unique_abbr": "CUHK;NVIDIA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "SugarCrepe: Fixing Hackable Benchmarks for Vision-Language Compositionality", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73628", "id": "Jsc7WSCZd4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/63461de0b4cb760fc498e85b18a7fe81-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Jsc7WSCZd4", "openreview": "https://openreview.net/forum?id=Jsc7WSCZd4", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73628", "video": "https://nips.cc/virtual/2023/poster/73628", "author_site": "Cheng-Yu Hsieh, Jieyu Zhang, Zixian Ma, Aniruddha Kembhavi, Ranjay Krishna", "tldr": "", "abstract": "In the last year alone, a surge of new benchmarks to measure $\\textit{compositional}$ understanding of vision-language models have permeated the machine learning ecosystem.\nGiven an image, these benchmarks probe a model's ability to identify its associated caption amongst a set of compositional distractors.\nSurprisingly, we find significant biases in $\\textit{all}$ these benchmarks rendering them hackable. This hackability is so dire that blind models with no access to the image outperform state-of-the-art vision-language models.\nTo remedy this rampant vulnerability, we introduce $\\textit{SugarCrepe}$, a new benchmark for vision-language compositionality evaluation.\nWe employ large language models, instead of rule-based templates used in previous benchmarks, to generate fluent and sensical hard negatives, and utilize an adversarial refinement mechanism to maximally reduce biases. We re-evaluate state-of-the-art models and recently proposed compositionality inducing strategies, and find that their improvements were hugely overestimated, suggesting that more innovation is needed in this important direction.\nWe release $\\textit{SugarCrepe}$ and the code for evaluation at: https://github.com/RAIVNLab/sugar-crepe.", "keywords": "vision-language models;contrastive training;CLIP;compositional understanding;compositionality;dataset artifacts", "primary_area": "", "supplementary_material": "/attachment/d49bde2d343c5d8a14e2567a139be80e665da1cc.pdf", "author": "Cheng-Yu Hsieh;Jieyu Zhang;Zixian Ma;Aniruddha Kembhavi;Ranjay Krishna", "authorids": "~Cheng-Yu_Hsieh1;~Jieyu_Zhang1;~Zixian_Ma1;~Aniruddha_Kembhavi1;~Ranjay_Krishna1", "gender": "M;M;F;M;M", "homepage": "https://chengyuhsieh.github.io/;https://jieyuz2.github.io/;https://zixianma.github.io/;https://anikem.github.io/;http://ranjaykrishna.com", "dblp": "40/4421;;311/3682;81/7583;167/3785", "google_scholar": "WXX6ZwwAAAAJ;T_INUHUAAAAJ;0E-IY2IAAAAJ;JnUevM0AAAAJ;IcqahyAAAAAJ", "orcid": ";0000-0002-1846-2436;;;0000-0001-8784-2531", "linkedin": ";jieyu-zhang-3baaa8154/;zixian-ma/;;ranjay-krishna-1a344444/", "or_profile": "~Cheng-Yu_Hsieh1;~Jieyu_Zhang1;~Zixian_Ma1;~Aniruddha_Kembhavi1;~Ranjay_Krishna1", "aff": "Google;University of Washington;Department of Computer Science, University of Washington;Allen Institute for Artificial Intelligence;University of Washington", "aff_domain": "google.com;cs.washington.edu;cs.washington.edu;allenai.org;cs.washington.edu", "position": "Intern;PhD student;PhD student;Research Manager;Assistant Professor", "bibtex": "@inproceedings{\nhsieh2023sugarcrepe,\ntitle={SugarCrepe: Fixing Hackable Benchmarks for Vision-Language Compositionality},\nauthor={Cheng-Yu Hsieh and Jieyu Zhang and Zixian Ma and Aniruddha Kembhavi and Ranjay Krishna},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Jsc7WSCZd4}\n}", "github": "", "project": "", "reviewers": "iLag;bMrd;6iRd;eSt3", "pdf_size": 1744479, "rating": "6;7;7;9", "confidence": "4;4;3;4", "wc_summary_and_contributions": "101;61;84;109", "wc_strengths": "91;62;82;99", "wc_improvement": "157;141;52;153", "wc_limitations": "15;15;48;14", "wc_correctness": "21;62;1;32", "wc_clarity": "9;10;206;8", "wc_relation_to_prior_work": "12;9;14;57", "wc_documentation": "8;10;8;22", "wc_additional_feedback": "1;1;1;1", "wc_review": "415;371;496;495", "wc_reply_reviewers": "15;223;0;23", "wc_reply_authors": "377;1240;613;571", "reply_reviewers": "1;2;0;1", "reply_authors": "1;3;1;1", "rating_avg": [ 7.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 88.75, 18.38987493160299 ], "wc_strengths_avg": [ 83.5, 13.793114224133722 ], "wc_improvement_avg": [ 125.75, 42.98473566279081 ], "wc_limitations_avg": [ 23.0, 14.439529078193651 ], "wc_correctness_avg": [ 29.0, 22.056745000112777 ], "wc_clarity_avg": [ 58.25, 85.30643293445108 ], "wc_relation_to_prior_work_avg": [ 23.0, 19.710403344427025 ], "wc_documentation_avg": [ 12.0, 5.830951894845301 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 444.25, 53.560129760858494 ], "wc_reply_reviewers_avg": [ 65.25, 91.45046473364692 ], "wc_reply_authors_avg": [ 700.25, 324.0905544751343 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 132, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=282094575079215248&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "google.com;cs.washington.edu;cs.washington.edu;allenai.org;cs.washington.edu", "author_num": 5, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Google;University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "Google;UW;AI2", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Mountain View;;Seattle", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "T2T: From Distribution Learning in Training to Gradient Search in Testing for Combinatorial Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72042", "id": "JtF0ugNMv2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9c93b3cd3bc60c0fe7b0c2d74a2da966-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JtF0ugNMv2", "openreview": "https://openreview.net/forum?id=JtF0ugNMv2", "poster": "/media/PosterPDFs/NeurIPS%202023/72042.png?t=1701506552.389186", "slides": "https://nips.cc/virtual/2023/poster/72042", "video": "https://nips.cc/virtual/2023/poster/72042", "author_site": "Yang Li, Jinpei Guo, Runzhong Wang, Junchi Yan", "tldr": "", "abstract": "Extensive experiments have gradually revealed the potential performance bottleneck of modeling Combinatorial Optimization (CO) solving as neural solution prediction tasks. The neural networks, in their pursuit of minimizing the average objective score across the distribution of historical problem instances, diverge from the core target of CO of seeking optimal solutions for every test instance. This calls for an effective search on each problem instance, while the model should serve to provide supporting knowledge that benefits the search. To this end, we propose T2T (Training to Testing) framework that first leverages the generative modeling to estimate the high-quality solution distribution for each instance during training, and then conducts a gradient-based search within the solution space during testing. The proposed neural search paradigm consistently leverages generative modeling, specifically diffusion, for graduated solution improvement. It disrupts the local structure of the given solution by introducing noise and reconstructs a lower-cost solution guided by the optimization objective. Experimental results on Traveling Salesman Problem (TSP) and Maximal Independent Set (MIS) show the significant superiority of T2T, demonstrating an average performance gain of 49.15% for TSP solving and 17.27% for MIS solving compared to the previous state-of-the-art.", "keywords": "Machine Learning;Combinatorial Optimization;Generative Modeling;Diffusion Model", "primary_area": "", "supplementary_material": "", "author": "Yang Li;Jinpei Guo;Runzhong Wang;Junchi Yan", "authorids": "~Yang_Li32;~Jinpei_Guo1;~Runzhong_Wang1;~Junchi_Yan2", "gender": "M;M;M;M", "homepage": "https://yangco-le.github.io;https://jp-guo.github.io/;http://runzhong.wang;http://thinklab.sjtu.edu.cn/", "dblp": ";;239/4351;60/7949.html", "google_scholar": "ecE0xDIAAAAJ;;uoM0g3cAAAAJ;ga230VoAAAAJ", "orcid": "0000-0002-5249-3471;;0000-0002-9566-738X;0000-0001-9639-7679", "linkedin": ";;;", "or_profile": "~Yang_Li32;~Jinpei_Guo1;~Runzhong_Wang1;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Undergrad student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nli2023from,\ntitle={From Distribution Learning in Training to Gradient Search in Testing for Combinatorial Optimization},\nauthor={Yang Li and Jinpei Guo and Runzhong Wang and Junchi Yan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JtF0ugNMv2}\n}", "github": "", "project": "", "reviewers": "gU16;uaDN;uUyD;p83a", "pdf_size": 3691396, "rating": "6;6;6;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "72;97;79;76", "wc_strengths": "126;31;48;69", "wc_weaknesses": "387;178;661;26", "wc_questions": "127;177;12;70", "wc_limitations": "9;37;18;22", "wc_review": "721;520;818;263", "wc_reply_reviewers": "113;23;14;21", "wc_reply_authors": "561;31;30;20", "reply_reviewers": "3;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.0, 9.565563234854496 ], "wc_strengths_avg": [ 68.5, 35.82247897619594 ], "wc_weaknesses_avg": [ 313.0, 238.31386866903068 ], "wc_questions_avg": [ 96.5, 61.751518199960074 ], "wc_limitations_avg": [ 21.5, 10.111874208078342 ], "wc_review_avg": [ 580.5, 212.49294105922672 ], "wc_reply_reviewers_avg": [ 42.75, 40.69628361410904 ], "wc_reply_authors_avg": [ 160.5, 231.26878302096893 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 75, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7835787749123335010&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Penalising the biases in norm regularisation enforces sparsity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72041", "id": "JtIqG47DAQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b444ad72520a5f5c467343be88e352ed-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JtIqG47DAQ", "openreview": "https://openreview.net/forum?id=JtIqG47DAQ", "poster": "/media/PosterPDFs/NeurIPS%202023/72041.png?t=1699350244.5279856", "slides": "https://nips.cc/virtual/2023/poster/72041", "video": "https://nips.cc/virtual/2023/poster/72041", "author_site": "Etienne Boursier, Nicolas Flammarion", "tldr": "", "abstract": "Controlling the parameters' norm often yields good generalisation when training neural networks. Beyond simple intuitions, the relation between regularising parameters' norm and obtained estimators remains theoretically misunderstood. \nFor one hidden ReLU layer networks with unidimensional data, this work shows the parameters' norm required to represent a function is given by the total variation of its second derivative, weighted by a $\\sqrt{1+x^2}$ factor. Notably, this weighting factor disappears when the norm of bias terms is not regularised. The presence of this additional weighting factor is of utmost significance as it is shown to enforce the uniqueness and sparsity (in the number of kinks) of the minimal norm interpolator. Conversely, omitting the bias' norm allows for non-sparse solutions.\nPenalising the bias terms in the regularisation, either explicitly or implicitly, thus leads to sparse estimators.", "keywords": "Neural networks;Min norm interpolators;Sparsity;Representational cost", "primary_area": "", "supplementary_material": "/attachment/6b8d2fb96ad0317c1cb155bbf93915f13f83ac22.zip", "author": "Etienne Boursier;Nicolas Flammarion", "authorids": "~Etienne_Boursier1;~Nicolas_Flammarion1", "gender": "M;M", "homepage": "https://eboursier.github.io/;", "dblp": "203/8633;164/7417", "google_scholar": "https://scholar.google.fr/citations?user=-9todDUAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Etienne_Boursier1;~Nicolas_Flammarion1", "aff": "Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology Lausanne", "aff_domain": "epfl.ch;epfl.ch", "position": "Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nboursier2023penalising,\ntitle={Penalising the biases in norm regularisation enforces sparsity},\nauthor={Etienne Boursier and Nicolas Flammarion},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JtIqG47DAQ}\n}", "github": "", "project": "", "reviewers": "fxbg;ZvaF;3DUe;brcW", "pdf_size": 748984, "rating": "5;5;6;7", "confidence": "3;3;3;4", "soundness": "3;4;3;4", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "136;131;262;79", "wc_strengths": "72;96;86;41", "wc_weaknesses": "151;216;245;64", "wc_questions": "242;100;144;46", "wc_limitations": "111;24;26;4", "wc_review": "712;567;763;234", "wc_reply_reviewers": "16;10;13;22", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 152.0, 67.31641701695062 ], "wc_strengths_avg": [ 73.75, 20.740961887048538 ], "wc_weaknesses_avg": [ 169.0, 69.52337736330134 ], "wc_questions_avg": [ 133.0, 71.8679344353238 ], "wc_limitations_avg": [ 41.25, 41.17872630376029 ], "wc_review_avg": [ 569.0, 206.3455839120382 ], "wc_reply_reviewers_avg": [ 15.25, 4.437059837324712 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13400372448264586705&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 8, "email": "epfl.ch;epfl.ch", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Train Hard, Fight Easy: Robust Meta Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72040", "id": "JvOZ4IIjwP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d74e6bfe9ce029526e69db14d2c281ec-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JvOZ4IIjwP", "openreview": "https://openreview.net/forum?id=JvOZ4IIjwP", "poster": "/media/PosterPDFs/NeurIPS%202023/72040.png?t=1696840929.708914", "slides": "https://nips.cc/virtual/2023/poster/72040", "video": "https://nips.cc/virtual/2023/poster/72040", "author_site": "Ido Greenberg, Shie Mannor, Gal Chechik, Eli Meirom", "tldr": "", "abstract": "A major challenge of reinforcement learning (RL) in real-world applications is the variation between environments, tasks or clients. Meta-RL (MRL) addresses this issue by learning a meta-policy that adapts to new tasks. Standard MRL methods optimize the average return over tasks, but often suffer from poor results in tasks of high risk or difficulty. This limits system reliability since test tasks are not known in advance. In this work, we define a robust MRL objective with a controlled robustness level. Optimization of analogous robust objectives in RL is known to lead to both **biased gradients** and **data inefficiency**. We prove that the gradient bias disappears in our proposed MRL framework. The data inefficiency is addressed via the novel Robust Meta RL algorithm (RoML). RoML is a meta-algorithm that generates a robust version of any given MRL algorithm, by identifying and over-sampling harder tasks throughout training. We demonstrate that RoML achieves robust returns on multiple navigation and continuous control benchmarks.", "keywords": "meta reinforcement learning;robust reinforcement learning;safe reinforcement learning;risk sensitive reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/16bdbc6df277637b1aa6259e9576c5250eb93016.pdf", "author": "Ido Greenberg;Shie Mannor;Gal Chechik;Eli Meirom", "authorids": "~Ido_Greenberg1;~Shie_Mannor2;~Gal_Chechik1;~Eli_Meirom2", "gender": "M;M;;", "homepage": "https://idogreenberg.neocities.org/;https://shie.net.technion.ac.il;https://chechiklab.biu.ac.il/~gal/;", "dblp": ";20/1669;c/GalChechik;132/8961", "google_scholar": "LnwyFkkAAAAJ;https://scholar.google.com.tw/citations?user=q1HlbIUAAAAJ;Wk2gAZUAAAAJ;ZYEgD7wAAAAJ", "orcid": ";;0000-0001-9164-5303;", "linkedin": "ido-greenberg-87245852/;;;", "or_profile": "~Ido_Greenberg1;~Shie_Mannor2;~Gal_Chechik1;~Eli_Meirom2", "aff": "Technion, Technion;Technion - Israel Institute of Technology, Technion;NVIDIA;", "aff_domain": "technion.ac.il;technion.il;nvidia.com;", "position": "PhD student;Full Professor;Principal Researcher;", "bibtex": "@inproceedings{\ngreenberg2023train,\ntitle={Train Hard, Fight Easy: Robust Meta Reinforcement Learning},\nauthor={Ido Greenberg and Shie Mannor and Gal Chechik and Eli Meirom},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JvOZ4IIjwP}\n}", "github": "", "project": "", "reviewers": "FerX;vMWn;CM4Y;ib31", "pdf_size": 5314266, "rating": "6;6;6;6", "confidence": "3;4;4;3", "soundness": "4;2;3;3", "novelty": "4;3;3;3", "presentation": "4;3;3;3", "wc_summary": "126;134;85;133", "wc_strengths": "46;50;65;57", "wc_weaknesses": "17;298;60;40", "wc_questions": "1;10;43;47", "wc_limitations": "1;42;24;9", "wc_review": "191;534;277;286", "wc_reply_reviewers": "10;21;13;98", "wc_reply_authors": "0;0;0;300", "reply_reviewers": "1;1;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 119.5, 20.155644370746373 ], "wc_strengths_avg": [ 54.5, 7.22841614740048 ], "wc_weaknesses_avg": [ 103.75, 113.17768110365223 ], "wc_questions_avg": [ 25.25, 20.054612935681405 ], "wc_limitations_avg": [ 19.0, 15.636495771111889 ], "wc_review_avg": [ 322.0, 127.89253301111836 ], "wc_reply_reviewers_avg": [ 35.5, 36.30771267926417 ], "wc_reply_authors_avg": [ 75.0, 129.9038105676658 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13526858463205047047&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "technion.ac.il;technion.il;nvidia.com;", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Technion - Israel Institute of Technology;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.nvidia.com", "aff_unique_abbr": "Technion;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Israel;United States" }, { "title": "DASpeech: Directed Acyclic Transformer for Fast and High-quality Speech-to-Speech Translation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72039", "id": "JvYSSPtQyk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e5b1c0d4866f72393c522c8a00eed4eb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JvYSSPtQyk", "openreview": "https://openreview.net/forum?id=JvYSSPtQyk", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72039", "video": "https://nips.cc/virtual/2023/poster/72039", "author_site": "Qingkai Fang, Yan Zhou, Yang Feng", "tldr": "", "abstract": "Direct speech-to-speech translation (S2ST) translates speech from one language into another using a single model. However, due to the presence of linguistic and acoustic diversity, the target speech follows a complex multimodal distribution, posing challenges to achieving both high-quality translations and fast decoding speeds for S2ST models. In this paper, we propose DASpeech, a non-autoregressive direct S2ST model which realizes both fast and high-quality S2ST. To better capture the complex distribution of the target speech, DASpeech adopts the two-pass architecture to decompose the generation process into two steps, where a linguistic decoder first generates the target text, and an acoustic decoder then generates the target speech based on the hidden states of the linguistic decoder. Specifically, we use the decoder of DA-Transformer as the linguistic decoder, and use FastSpeech 2 as the acoustic decoder. DA-Transformer models translations with a directed acyclic graph (DAG). To consider all potential paths in the DAG during training, we calculate the expected hidden states for each target token via dynamic programming, and feed them into the acoustic decoder to predict the target mel-spectrogram. During inference, we select the most probable path and take hidden states on that path as input to the acoustic decoder. Experiments on the CVSS Fr$\\rightarrow$En benchmark demonstrate that DASpeech can achieve comparable or even better performance than the state-of-the-art S2ST model Translatotron 2, while preserving up to 18.53$\\times$ speedup compared to the autoregressive baseline. Compared with the previous non-autoregressive S2ST model, DASpeech does not rely on knowledge distillation and iterative decoding, achieving significant improvements in both translation quality and decoding speed. Furthermore, DASpeech shows the ability to preserve the speaker's voice of the source speech during translation.", "keywords": "speech-to-speech translation;non-autoregressive translation;speech translation;directed acyclic transformer", "primary_area": "", "supplementary_material": "/attachment/78e7f270d6c3fc6256159d2f0332b53dfac8c420.zip", "author": "Qingkai Fang;Yan Zhou;Yang Feng", "authorids": "~Qingkai_Fang1;~Yan_Zhou9;~Yang_Feng4", "gender": "M;M;", "homepage": "https://fangqingkai.github.io/;;http://people.ucas.edu.cn/~yangfeng?language=en", "dblp": "301/3107;;07/6095-4.html", "google_scholar": "n2lRntoAAAAJ;n2TrZ-IAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-8575-591X;;", "linkedin": ";;", "or_profile": "~Qingkai_Fang1;~Yan_Zhou9;~Yang_Feng4", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Tsinghua University;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;mail.tsinghua.edu.cn;ict.ac.cn", "position": "PhD student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nfang2023daspeech,\ntitle={{DAS}peech: Directed Acyclic Transformer for Fast and High-quality Speech-to-Speech Translation},\nauthor={Qingkai Fang and Yan Zhou and Yang Feng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JvYSSPtQyk}\n}", "github": "", "project": "", "reviewers": "PNQE;RzGP;47bw;nvMe;WJ3s", "pdf_size": 2988227, "rating": "5;5;5;7;8", "confidence": "5;4;4;4;5", "soundness": "3;2;2;4;4", "novelty": "3;2;2;3;4", "presentation": "3;3;2;3;4", "wc_summary": "147;95;77;118;46", "wc_strengths": "364;42;25;44;65", "wc_weaknesses": "1;173;62;37;61", "wc_questions": "47;1;15;105;62", "wc_limitations": "1;2;1;18;1", "wc_review": "560;313;180;322;235", "wc_reply_reviewers": "0;51;0;88;35", "wc_reply_authors": "0;735;0;17;13", "reply_reviewers": "0;2;0;1;1", "reply_authors": "1;3;1;2;2", "rating_avg": [ 6.0, 1.2649110640673518 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 96.6, 34.482459309045815 ], "wc_strengths_avg": [ 108.0, 128.6281462200245 ], "wc_weaknesses_avg": [ 66.8, 57.53746605473689 ], "wc_questions_avg": [ 46.0, 36.67151483099655 ], "wc_limitations_avg": [ 4.6, 6.7111846942250075 ], "wc_review_avg": [ 322.0, 129.96768829212897 ], "wc_reply_reviewers_avg": [ 34.8, 33.2108416033078 ], "wc_reply_authors_avg": [ 153.0, 291.08005771608606 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3227486121839514, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=379287828693459478&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ict.ac.cn;mail.tsinghua.edu.cn;ict.ac.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese Academy of Sciences;Tsinghua University", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CAS;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "CODA: Generalizing to Open and Unseen Domains with Compaction and Disambiguation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72038", "id": "Jw0KRTjsGA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/29f3514801f3f327d808799f5ac122ba-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Jw0KRTjsGA", "openreview": "https://openreview.net/forum?id=Jw0KRTjsGA", "poster": "/media/PosterPDFs/NeurIPS%202023/72038.png?t=1702236248.7892795", "slides": "https://nips.cc/virtual/2023/poster/72038", "video": "https://nips.cc/virtual/2023/poster/72038", "author_site": "Chaoqi Chen, Luyao Tang, Yue Huang, Xiaoguang Han, Yizhou Yu", "tldr": "", "abstract": "The generalization capability of machine learning systems degenerates notably when the test distribution drifts from the training distribution. Recently, Domain Generalization (DG) has been gaining momentum in enabling machine learning models to generalize to unseen domains. However, most DG methods assume that training and test data share an identical label space, ignoring the potential unseen categories in many real-world applications. In this paper, we delve into a more general but difficult problem termed Open Test-Time DG (OTDG), where both domain shift and open class may occur on the unseen test data. We propose Compaction and Disambiguation (CODA), a novel two-stage framework for learning compact representations and adapting to open classes in the wild. To meaningfully regularize the model's decision boundary, CODA introduces virtual unknown classes and optimizes a new training objective to insert unknowns into the latent space by compacting the embedding space of source known classes. To adapt target samples to the source model, we then disambiguate the decision boundaries between known and unknown classes with a test-time training objective, mitigating the adaptivity gap and catastrophic forgetting challenges. Experiments reveal that CODA can significantly outperform the previous best method on standard DG datasets and harmonize the classification accuracy between known and unknown classes.", "keywords": "Domain generalization;domain shift;open class;source compaction;target disambiguation", "primary_area": "", "supplementary_material": "/attachment/4fd417fc2d1bba0f91f6ad0765313e57e7703ffe.pdf", "author": "Chaoqi Chen;Luyao Tang;Yue Huang;Xiaoguang Han;Yizhou Yu", "authorids": "~Chaoqi_Chen2;~Luyao_Tang1;~Yue_Huang1;~Xiaoguang_Han2;~Yizhou_Yu1", "gender": "M;M;F;M;M", "homepage": ";https://lytang63.github.io/;https://huangyue05.github.io/;https://gaplab.cuhk.edu.cn/;", "dblp": "230/4601;163/8474;48/2209-1;60/8294;90/6896.html", "google_scholar": "https://scholar.google.com.hk/citations?user=MmUZ_AQAAAAJ;kKaYkMcAAAAJ;smxgn4YAAAAJ;;e38fTZQAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Chaoqi_Chen2;~Luyao_Tang1;~Yue_Huang1;~Xiaoguang_Han2;~Yizhou_Yu1", "aff": "The University of Hong Kong;Xiamen University;Xiamen University;The Chinese University of Hong Kong, Shenzhen;The University of Hong Kong", "aff_domain": "hku.hk;xmu.edu.cn;xmu.edu.cn;cuhk.edu.cn;hku.hk", "position": "PhD student;MS student;Full Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023coda,\ntitle={{CODA}: Generalizing to Open and Unseen Domains with Compaction and Disambiguation},\nauthor={Chaoqi Chen and Luyao Tang and Yue Huang and Xiaoguang Han and Yizhou Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Jw0KRTjsGA}\n}", "github": "", "project": "", "reviewers": "nkce;bs2E;shJ8;BmaN", "pdf_size": 1649644, "rating": "6;7;8;8", "confidence": "3;4;4;4", "soundness": "3;3;3;4", "novelty": "2;3;4;3", "presentation": "3;1;3;4", "wc_summary": "105;192;105;129", "wc_strengths": "99;139;116;55", "wc_weaknesses": "200;322;237;26", "wc_questions": "28;180;2;15", "wc_limitations": "87;3;7;62", "wc_review": "519;836;467;287", "wc_reply_reviewers": "61;116;40;22", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 132.75, 35.58352849282937 ], "wc_strengths_avg": [ 102.25, 30.75203245315665 ], "wc_weaknesses_avg": [ 196.25, 107.78769642217983 ], "wc_questions_avg": [ 56.25, 72.03601529790498 ], "wc_limitations_avg": [ 39.75, 35.88436288970448 ], "wc_review_avg": [ 527.25, 197.95501382890004 ], "wc_reply_reviewers_avg": [ 59.75, 35.28721439841915 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1473676182220928288&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "hku.hk;xmu.edu.cn;xmu.edu.cn;cuhk.edu.cn;hku.hk", "author_num": 5, "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "University of Hong Kong;Xiamen University;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.hku.hk;https://www.xmu.edu.cn;https://www.cuhk.edu.cn", "aff_unique_abbr": "HKU;XMU;CUHK", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Hong Kong SAR;;Shenzhen", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Provably (More) Sample-Efficient Offline RL with Options", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72037", "id": "JwNXeBdkeo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d91b532a76ea98ac1ef5226b862bfc49-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JwNXeBdkeo", "openreview": "https://openreview.net/forum?id=JwNXeBdkeo", "poster": "/media/PosterPDFs/NeurIPS%202023/72037.png?t=1701176099.8165545", "slides": "https://nips.cc/virtual/2023/poster/72037", "video": "https://nips.cc/virtual/2023/poster/72037", "author_site": "Xiaoyan Hu, Ho-fung Leung", "tldr": "", "abstract": "The options framework yields empirical success in long-horizon planning problems of reinforcement learning (RL). Recent works show that options help improve the sample efficiency in online RL. However, these results are no longer applicable to scenarios where exploring the environment online is risky, e.g., automated driving and healthcare. In this paper, we provide the first analysis of the sample complexity for offline RL with options, where the agent learns from a dataset without further interaction with the environment. We derive a novel information-theoretic lower bound, which generalizes the one for offline learning with actions. We propose the PEssimistic Value Iteration for Learning with Options (PEVIO) algorithm and establish near-optimal suboptimality bounds for two popular data-collection procedures, where the first one collects state-option transitions and the second one collects state-action transitions. We show that compared to offline RL with actions, using options not only enjoys a faster finite-time convergence rate (to the optimal value) but also attains a better performance when either the options are carefully designed or the offline data is limited. Based on these results, we analyze the pros and cons of the data-collection procedures.", "keywords": "Learning with Options;Offline RL;Provably Efficient RL", "primary_area": "", "supplementary_material": "/attachment/1187f0895bf6bdb234913fc3d701fda250c7a3de.pdf", "author": "Xiaoyan Hu;Ho-fung Leung", "authorids": "~Xiaoyan_Hu2;~Ho-fung_Leung1", "gender": "M;M", "homepage": "https://yannxiaoyanhu.github.io;http://www.cse.cuhk.edu.hk/~lhf/", "dblp": ";l/HofungLeung", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=JDErdKcAAAAJ", "orcid": "0000-0002-5766-1059;0000-0003-4914-2934", "linkedin": "xiaoyan-hu-9a26661b9/;ho-fung-leung-1a73135/", "or_profile": "~Xiaoyan_Hu2;~Ho-fung_Leung1", "aff": "The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;cuhk.edu.hk", "position": "PhD student;Professor", "bibtex": "@inproceedings{\nhu2023provably,\ntitle={Provably (More) Sample-Efficient Offline {RL} with Options},\nauthor={Xiaoyan Hu and Ho-fung Leung},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JwNXeBdkeo}\n}", "github": "", "project": "", "reviewers": "FY62;UEUz;V1tj;BJAs", "pdf_size": 494820, "rating": "6;6;7;7", "confidence": "2;2;3;3", "soundness": "3;3;2;3", "novelty": "2;3;3;2", "presentation": "3;2;3;2", "wc_summary": "96;63;173;101", "wc_strengths": "52;15;105;84", "wc_weaknesses": "87;27;403;67", "wc_questions": "170;186;71;48", "wc_limitations": "11;8;58;35", "wc_review": "416;299;810;335", "wc_reply_reviewers": "26;35;625;0", "wc_reply_authors": "8;8;624;0", "reply_reviewers": "1;1;3;0", "reply_authors": "2;2;4;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 108.25, 40.13337140086788 ], "wc_strengths_avg": [ 64.0, 34.007352146263905 ], "wc_weaknesses_avg": [ 146.0, 149.9433226255841 ], "wc_questions_avg": [ 118.75, 60.072352209647995 ], "wc_limitations_avg": [ 28.0, 20.23610634484806 ], "wc_review_avg": [ 465.0, 203.6430701006052 ], "wc_reply_reviewers_avg": [ 171.5, 262.14356753504364 ], "wc_reply_authors_avg": [ 160.0, 267.9104327942456 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TgWZYBy1A0wJ:scholar.google.com/&scioq=Provably+(More)+Sample-Efficient+Offline+RL+with+Options&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "cse.cuhk.edu.hk;cuhk.edu.hk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Opening the Vocabulary of Egocentric Actions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72036", "id": "JzQ7QClAdF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/690e82a09bcb3f101831962bf3cb54ec-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JzQ7QClAdF", "openreview": "https://openreview.net/forum?id=JzQ7QClAdF", "poster": "/media/PosterPDFs/NeurIPS%202023/72036.png?t=1701413934.432982", "slides": "https://nips.cc/virtual/2023/poster/72036", "video": "https://nips.cc/virtual/2023/poster/72036", "author_site": "Dibyadip Chatterjee, Fadime Sener, Shugao Ma, Angela Yao", "tldr": "", "abstract": "Human actions in egocentric videos often feature hand-object interactions composed of a verb (performed by the hand) applied to an object. Despite their extensive scaling up, egocentric datasets still face two limitations \u2014 sparsity of action compositions and a closed set of interacting objects. This paper proposes a novel open vocabulary action recognition task. Given a set of verbs and objects observed during training, the goal is to generalize the verbs to an open vocabulary of actions with seen and novel objects. To this end, we decouple the verb and object predictions via an object-agnostic _verb encoder_ and a prompt-based _object encoder_. The prompting leverages CLIP representations to predict an open vocabulary of interacting objects. We create open vocabulary benchmarks on the EPIC-KITCHENS-100 and Assembly101 datasets; whereas closed-action methods fail to generalize, our proposed method is effective. In addition, our object encoder significantly outperforms existing open-vocabulary visual recognition methods in recognizing novel interacting objects.", "keywords": "video understanding;egocentric videos;open vocabulary", "primary_area": "", "supplementary_material": "/attachment/3b5372fb52c1f1226dcd4f2277d7b6cdea8a6c8e.pdf", "author": "Dibyadip Chatterjee;Fadime Sener;Shugao Ma;Angela Yao", "authorids": "~Dibyadip_Chatterjee1;~Fadime_Sener1;~Shugao_Ma3;~Angela_Yao1", "gender": "M;F;M;", "homepage": "https://dibschat.github.io/;https://fadimesener.github.io/;https://shugaoma.github.io/;http://www.angelayao.com", "dblp": "268/3564;119/1497;70/418;64/8484", "google_scholar": "6AxH8lcAAAAJ;-juoweoAAAAJ;SUd2LJUAAAAJ;https://scholar.google.ch/citations?user=-LJCZMMAAAAJ", "orcid": ";0000-0001-5004-6005;;", "linkedin": ";;;", "or_profile": "~Dibyadip_Chatterjee1;~Fadime_Sener1;~Shugao_Ma3;~Angela_Yao1", "aff": "National University of Singapore;Meta;Meta;National University of Singapore", "aff_domain": "nus.edu.sg;meta.com;meta.com;nus.edu.sg", "position": "Researcher;Researcher;Research Scientist Manager;Associate Professor", "bibtex": "@inproceedings{\nchatterjee2023opening,\ntitle={Opening the Vocabulary of Egocentric Actions},\nauthor={Dibyadip Chatterjee and Fadime Sener and Shugao Ma and Angela Yao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JzQ7QClAdF}\n}", "github": "", "project": "", "reviewers": "TsuW;FCUw;DGnw;KvE6;jmBw", "pdf_size": 2975571, "rating": "4;5;5;5;5", "confidence": "5;3;3;4;3", "soundness": "3;2;3;2;3", "novelty": "3;2;3;3;3", "presentation": "2;3;2;3;3", "wc_summary": "82;99;80;109;177", "wc_strengths": "43;106;47;103;169", "wc_weaknesses": "148;325;197;303;351", "wc_questions": "105;207;21;140;91", "wc_limitations": "30;12;13;26;4", "wc_review": "408;749;358;681;792", "wc_reply_reviewers": "35;25;25;23;0", "wc_reply_authors": "30;48;46;46;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;2;1", "rating_avg": [ 4.8, 0.39999999999999997 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 109.4, 35.47731669672891 ], "wc_strengths_avg": [ 93.6, 46.17185289762584 ], "wc_weaknesses_avg": [ 264.8, 78.4255060551094 ], "wc_questions_avg": [ 112.8, 60.95375296074886 ], "wc_limitations_avg": [ 17.0, 9.591663046625438 ], "wc_review_avg": [ 597.6, 179.45762731073873 ], "wc_reply_reviewers_avg": [ 21.6, 11.586198686368192 ], "wc_reply_authors_avg": [ 34.0, 18.19890106572372 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8750000000000001, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13150083855644819796&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "nus.edu.sg;meta.com;meta.com;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "National University of Singapore;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.nus.edu.sg;https://meta.com", "aff_unique_abbr": "NUS;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Singapore;United States" }, { "title": "Block Low-Rank Preconditioner with Shared Basis for Stochastic Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72035", "id": "JzQlGqBm8d", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/389cfad711d2b1e2128e931feee80230-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=JzQlGqBm8d", "openreview": "https://openreview.net/forum?id=JzQlGqBm8d", "poster": "/media/PosterPDFs/NeurIPS%202023/72035.png?t=1701068549.390607", "slides": "https://nips.cc/virtual/2023/poster/72035", "video": "https://nips.cc/virtual/2023/poster/72035", "author_site": "Jui-Nan Yen, Sai Surya Duvvuri, Inderjit Dhillon, Cho-Jui Hsieh", "tldr": "", "abstract": "Adaptive methods with non-diagonal preconditioning have shown state-of-the-art results on various tasks. However, their computational complexity and memory requirement makes it challenging to scale these methods to modern neural network architectures. To address this challenge, some previous works have adopted block-diagonal preconditioners. However, the memory cost of storing the block-diagonal matrix remains substantial, leading to the use of smaller block sizes and ultimately resulting in suboptimal performance. To reduce the time and memory complexity without sacrificing performance, we propose approximating each diagonal block of the second moment matrix by low-rank matrices and enforcing the same basis for the blocks within each layer. We provide theoretical justification for such sharing and design an algorithm to efficiently maintain this shared-basis block low-rank approximation during training. Our results on a deep autoencoder and a transformer benchmark demonstrate that the proposed method outperforms first-order methods with slightly more time and memory usage, while also achieving competitive or superior performance compared to other second-order methods with less time and memory usage.", "keywords": "Second Order Optimization;Optimization for deep networks", "primary_area": "", "supplementary_material": "/attachment/f5666e2f32b18ae4b9a0be04daf6c9f4cb5eb9f0.pdf", "author": "Jui-Nan Yen;Sai Surya Duvvuri;Inderjit S Dhillon;Cho-Jui Hsieh", "authorids": "~Jui-Nan_Yen1;~Sai_Surya_Duvvuri1;~Inderjit_S_Dhillon1;~Cho-Jui_Hsieh1", "gender": "M;M;M;M", "homepage": ";;http://www.cs.utexas.edu/users/inderjit/;http://web.cs.ucla.edu/~chohsieh/index.html", "dblp": "312/3782.html;277/6122;d/InderjitSDhillon;14/2770", "google_scholar": ";UL3980gAAAAJ;xBv5ZfkAAAAJ;Wy89g4IAAAAJ", "orcid": "0000-0002-4068-6348;;;", "linkedin": ";sai-surya-duvvuri-79903511b/;inderjit-dhillon-a20888b0/;", "or_profile": "~Jui-Nan_Yen1;~Sai_Surya_Duvvuri1;~Inderjit_S_Dhillon1;~Cho-Jui_Hsieh1", "aff": "University of California, Los Angeles;University of Texas at Austin;University of Texas, Austin;Amazon", "aff_domain": "cs.ucla.edu;cs.utexas.edu;utexas.edu;amazon.com", "position": "PhD student;PhD student;Full Professor;visiting scholar", "bibtex": "@inproceedings{\nyen2023block,\ntitle={Block Low-Rank Preconditioner with Shared Basis for Stochastic Optimization},\nauthor={Jui-Nan Yen and Sai Surya Duvvuri and Inderjit S Dhillon and Cho-Jui Hsieh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=JzQlGqBm8d}\n}", "github": "", "project": "", "reviewers": "ssTg;Z6xV;BzFF;9J53;auuW;Yba5", "pdf_size": 1052548, "rating": "5;5;6;6;6;7", "confidence": "3;2;3;3;5;3", "soundness": "2;2;4;3;3;4", "novelty": "2;2;3;3;3;3", "presentation": "3;2;3;3;3;3", "wc_summary": "49;100;322;115;78;75", "wc_strengths": "58;23;71;66;49;100", "wc_weaknesses": "87;33;78;175;112;78", "wc_questions": "24;15;534;147;508;112", "wc_limitations": "10;32;36;129;35;7", "wc_review": "228;203;1041;632;782;372", "wc_reply_reviewers": "0;98;35;285;760;0", "wc_reply_authors": "0;296;0;428;889;0", "reply_reviewers": "0;1;1;2;3;0", "reply_authors": "1;2;1;3;3;1", "rating_avg": [ 5.833333333333333, 0.6871842709362768 ], "confidence_avg": [ 3.1666666666666665, 0.8975274678557507 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 123.16666666666667, 91.28602789523099 ], "wc_strengths_avg": [ 61.166666666666664, 23.24805846134731 ], "wc_weaknesses_avg": [ 93.83333333333333, 43.14091896203521 ], "wc_questions_avg": [ 223.33333333333334, 215.6027726063734 ], "wc_limitations_avg": [ 41.5, 40.82789079375356 ], "wc_review_avg": [ 543.0, 304.7162614630207 ], "wc_reply_reviewers_avg": [ 196.33333333333334, 270.30024458409616 ], "wc_reply_authors_avg": [ 268.8333333333333, 323.3916184580066 ], "reply_reviewers_avg": [ 1.1666666666666667, 1.0671873729054748 ], "reply_authors_avg": [ 1.8333333333333333, 0.8975274678557508 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3152641443777314, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7183356626987037755&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cs.ucla.edu;cs.utexas.edu;utexas.edu;amazon.com", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of California, Los Angeles;University of Texas at Austin;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://www.utexas.edu;https://www.amazon.com", "aff_unique_abbr": "UCLA;UT Austin;Amazon", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Los Angeles;Austin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "K10zWxlEGI", "title": "Expressive dynamics models with nonlinear injective readouts enable reliable recovery of latent features from neural activity", "track": "main", "status": "Reject", "tldr": "", "abstract": "The advent of large-scale neural recordings has enabled new approaches that aim to discover the computational mechanisms of neural circuits by understanding the rules that govern how their state evolves over time. While these \\textit{neural dynamics} cannot be directly measured, they can typically be approximated by low-dimensional models in a latent space. How these models represent the mapping from latent space to neural space can affect the interpretability of the latent representation. Typical choices for this mapping (e.g., linear layer or MLP) lack the property of injectivity, meaning that changes in latent state may have no effect on neural activity. During training, non-injective readouts incentivize the invention of dynamics that misrepresent the underlying system and the computation it performs. Combining our injective Flow readout with prior work on interpretable latent dynamics models, we created the Ordinary Differential equations autoencoder with Injective Nonlinear readout (ODIN), which learns to capture latent dynamical systems that are nonlinearly embedded into observed neural firing rates via an approximately injective nonlinear mapping. We show that ODIN can recover nonlinearly embedded systems from simulated neural activity, even when the nature of the system and embedding are unknown. Additionally, we show that ODIN enables the unsupervised recovery of underlying dynamical features (e.g., fixed-points) and embedding geometry. When applied to biological neural recordings, ODIN can reconstruct neural activity with comparable accuracy to previous state-of-the-art methods while using substantially fewer latent dimensions. Overall, ODIN's accuracy in recovering ground-truth latent features and ability to accurately reconstruct neural activity with low dimensionality make it a promising method for distilling interpretable dynamics that can help explain neural computation.", "keywords": "computational neuroscience;systems neuroscience;neural population dynamics;interpretability;neuroscience;latent dynamics;neural manifolds;neural ODEs;RNNs;sequential autoencoders;invertible neural networks", "primary_area": "", "supplementary_material": "/attachment/06b7e6c9f7694222e85091ed1307bbda2eb81bec.pdf", "author": "Christopher Versteeg;Andrew R Sedler;Jonathan David McCart;Chethan Pandarinath", "authorids": "~Christopher_Versteeg1;~Andrew_R_Sedler1;jmccart6@gatech.edu;~Chethan_Pandarinath1", "gender": "M;;;M", "homepage": ";;;http://snel.gatech.edu", "dblp": ";;;", "google_scholar": "YBB0YAQAAAAJ;;;M3-z9G4AAAAJ", "orcid": ";;;0000-0003-1241-1432", "linkedin": ";;;", "or_profile": "~Christopher_Versteeg1;~Andrew_R_Sedler1;jmccart6@gatech.edu;~Chethan_Pandarinath1", "aff": "Emory University;;;Georgia Institute of Technology", "aff_domain": "emory.edu;;;gatech.edu", "position": "Postdoc;;;Assistant Professor", "bibtex": "@misc{\nversteeg2023expressive,\ntitle={Expressive dynamics models with nonlinear injective readouts enable reliable recovery of latent features from neural activity},\nauthor={Christopher Versteeg and Andrew R Sedler and Jonathan David McCart and Chethan Pandarinath},\nyear={2023},\nurl={https://openreview.net/forum?id=K10zWxlEGI}\n}", "github": "", "project": "", "reviewers": "dR2R;x81c;UThX;GbAv", "site": "https://openreview.net/forum?id=K10zWxlEGI", "pdf_size": 1628402, "rating": "5;5;5;7", "confidence": "3;4;4;4", "soundness": "3;2;3;3", "novelty": "3;2;2;3", "presentation": "3;3;4;3", "wc_summary": "64;57;91;95", "wc_strengths": "85;37;57;113", "wc_weaknesses": "296;101;198;141", "wc_questions": "67;4;24;32", "wc_limitations": "13;1;1;42", "wc_review": "525;200;371;423", "wc_reply_reviewers": "264;0;192;0", "wc_reply_authors": "454;0;0;0", "reply_reviewers": "2;0;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 76.75, 16.498105951896417 ], "wc_strengths_avg": [ 73.0, 28.705400188814647 ], "wc_weaknesses_avg": [ 184.0, 73.2768722039908 ], "wc_questions_avg": [ 31.75, 22.76373211931646 ], "wc_limitations_avg": [ 14.25, 16.753730927766508 ], "wc_review_avg": [ 379.75, 117.6379509342117 ], "wc_reply_reviewers_avg": [ 114.0, 116.8075340035907 ], "wc_reply_authors_avg": [ 113.5, 196.58776665906757 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11943399780450523974&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff_unique_index": "0;1", "aff_unique_norm": "Emory University;Georgia Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.emory.edu;https://www.gatech.edu", "aff_unique_abbr": "Emory;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning Mask-aware CLIP Representations for Zero-Shot Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72034", "id": "K1Uzj8tuwd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6ffe484a646db13891bb6435ca39d667-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=K1Uzj8tuwd", "openreview": "https://openreview.net/forum?id=K1Uzj8tuwd", "poster": "/media/PosterPDFs/NeurIPS%202023/72034.png?t=1702110286.9029052", "slides": "https://nips.cc/virtual/2023/poster/72034", "video": "https://nips.cc/virtual/2023/poster/72034", "author_site": "Siyu Jiao, Yunchao Wei, Yaowei Wang, Yao Zhao, Humphrey Shi", "tldr": "", "abstract": "Recently, pre-trained vision-language models have been increasingly used to tackle the challenging zero-shot segmentation task. Typical solutions follow the paradigm of first generating mask proposals and then adopting CLIP to classify them. To maintain the CLIP's zero-shot transferability, previous practices favour to freeze CLIP during training. However, in the paper, we reveal that CLIP is insensitive to different mask proposals and tends to produce similar predictions for various mask proposals of the same image. This insensitivity results in numerous false positives when classifying mask proposals. This issue mainly relates to the fact that CLIP is trained with image-level supervision. To alleviate this issue, we propose a simple yet effective method, named Mask-aware Fine-tuning (MAFT). Specifically, Image-Proposals CLIP Encoder (IP-CLIP Encoder) is proposed to handle arbitrary numbers of image and mask proposals simultaneously. Then, *mask-aware loss* and *self-distillation loss* are designed to fine-tune IP-CLIP Encoder, ensuring CLIP is responsive to different mask proposals while not sacrificing transferability. In this way, mask-aware representations can be easily learned to make the true positives stand out. Notably, our solution can seamlessly plug into most existing methods without introducing any new parameters during the fine-tuning process. We conduct extensive experiments on the popular zero-shot benchmarks. With MAFT, the performance of the state-of-the-art methods is promoted by a large margin: 50.4\\% (+ 8.2\\%) on COCO, 81.8\\% (+ 3.2\\%) on Pascal-VOC, and 8.7\\% (+4.3\\%) on ADE20K in terms of mIoU for unseen classes. Codes will be provided for reproducibility. Code is available at https://github.com/jiaosiyu1999/MAFT.git .", "keywords": "Zero-Shot Segmentation;Open-Vocabulary Segmentation;Fine-tuning", "primary_area": "", "supplementary_material": "/attachment/60dcb6c2fd8e8bf61048d151e1db1be857492a97.pdf", "author": "Siyu Jiao;Yunchao Wei;Yaowei Wang;Yao Zhao;Humphrey Shi", "authorids": "~Siyu_Jiao1;~Yunchao_Wei1;~Yaowei_Wang1;~Yao_Zhao1;~Humphrey_Shi1", "gender": "M;M;M;M;M", "homepage": "https://github.com/jiaosiyu1999;https://weiyc.github.io/;https://dblp.org/pid/68/2992.html;http://mepro.bjtu.edu.cn;https://www.humphreyshi.com", "dblp": "337/2521;118/5394;68/2992-1;45/2091-1.html;176/5516", "google_scholar": ";https://scholar.google.com.sg/citations?user=qL9Csv0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;474TbQYAAAAJ;WBvt5A8AAAAJ", "orcid": ";;0000-0002-6110-4036;;0000-0002-2922-5663", "linkedin": ";;yaowei-wang-971ab310/;;humphreyshi", "or_profile": "~Siyu_Jiao1;~Yunchao_Wei1;~Yaowei_Wang1;~Yao_Zhao1;~Honghui_Shi1", "aff": "Beijing Jiaotong University;Beijing Jiaotong University;Pengcheng Laboratory;Beijing Jiaotong University;University of Oregon", "aff_domain": "bjtu.edu.cn;bjtu.edu.cn;pcl.ac.cn;bjtu.edu.cn;uoregon.edu", "position": "MS student;Full Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\njiao2023learning,\ntitle={Learning Mask-aware {CLIP} Representations for Zero-Shot Segmentation},\nauthor={Siyu Jiao and Yunchao Wei and Yaowei Wang and Yao Zhao and Humphrey Shi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=K1Uzj8tuwd}\n}", "github": "", "project": "", "reviewers": "uMA5;fJQP;q7Qn;XVP7", "pdf_size": 0, "rating": "4;5;6;7", "confidence": "5;4;4;5", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "81;61;93;43", "wc_strengths": "44;35;68;93", "wc_weaknesses": "135;61;61;52", "wc_questions": "3;237;16;2", "wc_limitations": "8;1;15;1", "wc_review": "271;395;253;191", "wc_reply_reviewers": "0;42;0;0", "wc_reply_authors": "0;536;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.5, 19.09842925478428 ], "wc_strengths_avg": [ 60.0, 22.54994456755936 ], "wc_weaknesses_avg": [ 77.25, 33.54381463101655 ], "wc_questions_avg": [ 64.5, 99.74592723515082 ], "wc_limitations_avg": [ 6.25, 5.80409338312195 ], "wc_review_avg": [ 277.5, 74.04559406203721 ], "wc_reply_reviewers_avg": [ 10.5, 18.186533479473212 ], "wc_reply_authors_avg": [ 134.0, 232.09480821422954 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4976258469767746707&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 5, "email": "bjtu.edu.cn;bjtu.edu.cn;pcl.ac.cn;bjtu.edu.cn;uoregon.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Beijing Jiao Tong University;Pengcheng Laboratory;University of Oregon", "aff_unique_dep": ";;", "aff_unique_url": "http://www.njtu.edu.cn/en;;https://www.uoregon.edu", "aff_unique_abbr": "BJTU;;UO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Controlling Text-to-Image Diffusion by Orthogonal Finetuning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72033", "id": "K30wTdIIYc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/faacb7a4827b4d51e201666b93ab5fa7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=K30wTdIIYc", "openreview": "https://openreview.net/forum?id=K30wTdIIYc", "poster": "/media/PosterPDFs/NeurIPS%202023/72033.png?t=1701834220.6207519", "slides": "https://nips.cc/virtual/2023/poster/72033", "video": "https://nips.cc/virtual/2023/poster/72033", "author_site": "Zeju Qiu, Weiyang Liu, Haiwen Feng, Yuxuan Xue, Yao Feng, Zhen Liu, Dan Zhang, Adrian Weller, Bernhard Sch\u00f6lkopf", "tldr": "", "abstract": "Large text-to-image diffusion models have impressive capabilities in generating photorealistic images from text prompts. How to effectively guide or control these powerful models to perform different downstream tasks becomes an important open problem. To tackle this challenge, we introduce a principled finetuning method -- Orthogonal Finetuning (OFT), for adapting text-to-image diffusion models to downstream tasks. Unlike existing methods, OFT can provably preserve hyperspherical energy which characterizes the pairwise neuron relationship on the unit hypersphere. We find that this property is crucial for preserving the semantic generation ability of text-to-image diffusion models. To improve finetuning stability, we further propose Constrained Orthogonal Finetuning (COFT) which imposes an additional radius constraint to the hypersphere. Specifically, we consider two important finetuning text-to-image tasks: subject-driven generation where the goal is to generate subject-specific images given a few images of a subject and a text prompt, and controllable generation where the goal is to enable the model to take in additional control signals. We empirically show that our OFT framework outperforms existing methods in generation quality and convergence speed.", "keywords": "Text-to-image;diffusion models;finetuning;generative models;orthogonality", "primary_area": "", "supplementary_material": "", "author": "Zeju Qiu;Weiyang Liu;Haiwen Feng;Yuxuan Xue;Yao Feng;Zhen Liu;Dan Zhang;Adrian Weller;Bernhard Sch\u00f6lkopf", "authorids": "~Zeju_Qiu1;~Weiyang_Liu1;~Haiwen_Feng1;~Yuxuan_Xue1;~Yao_Feng3;~Zhen_Liu6;~Dan_Zhang1;~Adrian_Weller1;~Bernhard_Sch\u00f6lkopf1", "gender": "M;M;M;;F;M;;M;", "homepage": ";http://wyliu.com/;https://ps.is.mpg.de/person/hfeng;http://yuxuan-xue.com;https://ps.is.tuebingen.mpg.de/person/yfeng;;;http://mlg.eng.cam.ac.uk/adrian/;", "dblp": "276/4222;137/1532;119/9168;254/6994;05/9861;77/35-19;21/802-17;73/8324;", "google_scholar": "7y5RN9wAAAAJ;DMjROf0AAAAJ;g5co-iIAAAAJ;5SKNmhcAAAAJ;wNQQhSIAAAAJ;I1IiJCAAAAAJ;https://scholar.google.de/citations?user=yazO-mMAAAAJ;https://scholar.google.co.uk/citations?user=Ek4hM10AAAAJ;", "orcid": ";;;;0000-0002-9481-9783;;0000-0003-0930-9162;;", "linkedin": "zeju-qiu-729b8018a/;;;;;;;;", "or_profile": "~Zeju_Qiu1;~Weiyang_Liu1;~Haiwen_Feng1;~Yuxuan_Xue1;~Yao_Feng3;~Zhen_Liu6;~Dan_Zhang1;~Adrian_Weller1;~Bernhard_Sch\u00f6lkopf1", "aff": "Technische Universit\u00e4t M\u00fcnchen;University of Cambridge;Max Planck Institute for Intelligent Systems, Max-Planck Institute;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;ETHZ - ETH Zurich;University of Montreal;Robert Bosch GmbH, Bosch;University of Cambridge;", "aff_domain": "tum.de;cam.ac.uk;tuebingen.mpg.de;uni-tuebingen.de;ethz.ch;umontreal.ca;de.bosch.com;cam.ac.uk;", "position": "MS student;Researcher;PhD student;PhD student;PhD student;PhD student;Research Scientist;Principal Researcher;", "bibtex": "@inproceedings{\nqiu2023controlling,\ntitle={Controlling Text-to-Image Diffusion by Orthogonal Finetuning},\nauthor={Zeju Qiu and Weiyang Liu and Haiwen Feng and Yuxuan Xue and Yao Feng and Zhen Liu and Dan Zhang and Adrian Weller and Bernhard Sch{\\\"o}lkopf},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=K30wTdIIYc}\n}", "github": "", "project": "", "reviewers": "jibc;q6NS;YBiq;styH;i8ZS", "pdf_size": 22932587, "rating": "6;6;6;6;7", "confidence": "4;3;4;4;4", "soundness": "3;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "3;3;3;2;3", "wc_summary": "50;87;18;65;85", "wc_strengths": "101;105;65;40;53", "wc_weaknesses": "167;137;184;25;95", "wc_questions": "101;1;107;12;3", "wc_limitations": "8;38;1;12;5", "wc_review": "427;368;375;154;241", "wc_reply_reviewers": "47;20;0;38;14", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 61.0, 25.44798616786798 ], "wc_strengths_avg": [ 72.8, 25.926048676958086 ], "wc_weaknesses_avg": [ 121.6, 56.97578432983613 ], "wc_questions_avg": [ 44.8, 48.51556451284474 ], "wc_limitations_avg": [ 12.8, 13.105723940324701 ], "wc_review_avg": [ 313.0, 100.32945728947207 ], "wc_reply_reviewers_avg": [ 23.8, 16.83330033000065 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.2500000000000001, "gs_citation": 123, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7541867207410028432&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "tum.de;cam.ac.uk;tuebingen.mpg.de;uni-tuebingen.de;ethz.ch;umontreal.ca;de.bosch.com;cam.ac.uk;", "author_num": 9, "aff_unique_index": "0;1;2;3;4;5;6;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;University of Cambridge;Max Planck Institute for Intelligent Systems;Eberhard Karls University of T\u00fcbingen;ETH Zurich;University of Montreal;Robert Bosch GmbH", "aff_unique_dep": ";;Intelligent Systems;;;;", "aff_unique_url": "https://www.tum.de;https://www.cam.ac.uk;https://www.mpi-is.mpg.de;https://www.uni-tuebingen.de/;https://www.ethz.ch;https://wwwumontreal.ca;https://www.bosch.com", "aff_unique_abbr": "TUM;Cambridge;MPI-IS;Uni T\u00fcbingen;ETHZ;UM;Bosch", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Cambridge;T\u00fcbingen", "aff_country_unique_index": "0;1;0;0;2;3;0;1", "aff_country_unique": "Germany;United Kingdom;Switzerland;Canada" }, { "id": "K3BMejPSyQ", "title": "Two-timescale Derivative Free Optimization for Performative Prediction with Markovian Data", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper studies the performative prediction problem where the learner aims to minimize the expected loss with a decision-dependent data distribution. Such setting is motivated when outcomes can be affected by the prediction model. We consider a state-dependent setting where the data distribution evolves according to an underlying controlled Markov chain. We focus on derivative free optimization (DFO) where the learner is given access to a loss function evaluation oracle with the above Markov chain data. We propose a two-timescale DFO($\\lambda$) algorithm that features (i) a novel forgetting factor $\\lambda$ to utilize every observed sample as opposed to the common sample burn-in approach, and (ii) a two-timescale diminishing step size to balance the rates of DFO updates and bias reduction. Under a general non-convex optimization setting, we show that DFO($\\lambda$) requires at most ${\\cal O}(d^2/\\epsilon^3)$ samples (up to a log factor) to attain a near-stationary solution with expected squared gradient norm less than $\\epsilon$. Numerical experiments verify our analysis.", "keywords": "Performative Prediction;Derivative Free Optimization;Markovian data", "primary_area": "", "supplementary_material": "/attachment/0401044f96e02262fadcdeba22df69542f21e054.pdf", "author": "Haitong LIU;Qiang LI;Hoi To Wai", "authorids": "antonyhtliu@link.cuhk.edu.hk;~Qiang_LI7;~Hoi_To_Wai1", "gender": ";M;M", "homepage": ";;http://www1.se.cuhk.edu.hk/~htwai/", "dblp": ";;29/9875", "google_scholar": ";NjVNiJ8AAAAJ;https://scholar.google.com.hk/citations?user=5-J7LeMAAAAJ", "orcid": ";0009-0006-1024-1344;", "linkedin": ";;", "or_profile": "antonyhtliu@link.cuhk.edu.hk;~Qiang_LI7;~Hoi_To_Wai1", "aff": ";Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": ";se.cuhk.edu.hk;cuhk.edu.hk", "position": ";PhD student;Assistant Professor", "bibtex": "@misc{\nliu2023twotimescale,\ntitle={Two-timescale Derivative Free Optimization for Performative Prediction with Markovian Data},\nauthor={Haitong LIU and Qiang LI and Hoi To Wai},\nyear={2023},\nurl={https://openreview.net/forum?id=K3BMejPSyQ}\n}", "github": "", "project": "", "reviewers": "3uod;bs2h;pcvM;NH9K;vEJR", "site": "https://openreview.net/forum?id=K3BMejPSyQ", "pdf_size": 1769693, "rating": "5;5;5;6;7", "confidence": "2;4;3;2;4", "soundness": "3;4;4;3;4", "novelty": "3;1;2;3;3", "presentation": "3;2;3;3;4", "wc_summary": "48;48;195;58;53", "wc_strengths": "38;28;51;44;80", "wc_weaknesses": "70;256;422;20;157", "wc_questions": "152;83;45;81;74", "wc_limitations": "7;1;1;15;1", "wc_review": "315;416;714;218;365", "wc_reply_reviewers": "21;11;28;26;14", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 80.4, 57.41985719243823 ], "wc_strengths_avg": [ 48.2, 17.6 ], "wc_weaknesses_avg": [ 185.0, 143.1390931926006 ], "wc_questions_avg": [ 87.0, 35.24202037341219 ], "wc_limitations_avg": [ 5.0, 5.513619500836088 ], "wc_review_avg": [ 405.6, 167.49280581565287 ], "wc_reply_reviewers_avg": [ 20.0, 6.603029607687671 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.2795084971874737, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=368747994511129964&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Fast Optimal Locally Private Mean Estimation via Random Projections", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72032", "id": "K3JgUvDSYX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/34822dab66c13f0100017b8ea373038a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=K3JgUvDSYX", "openreview": "https://openreview.net/forum?id=K3JgUvDSYX", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72032", "video": "https://nips.cc/virtual/2023/poster/72032", "author_site": "Hilal Asi, Vitaly Feldman, Jelani Nelson, Huy Nguyen, Kunal Talwar", "tldr": "", "abstract": "We study the problem of locally private mean estimation of high-dimensional vectors in the Euclidean ball. Existing algorithms for this problem either incur sub-optimal error or have high communication and/or run-time complexity. We propose a new algorithmic framework, namely ProjUnit, for private mean estimation that yields algorithms that are computationally efficient, have low communication complexity, and incur optimal error up to a $1+o(1)$-factor. Our framework is deceptively simple: each randomizer projects its input to a random low-dimensional subspace and then runs an optimal algorithm such a PrivUnitG in the lower dimensional space. We analyze the error of the algorithm in terms of properties of the random projection ensemble, and study two instantiations. We conduct several experiments for private mean estimation and private federated learning which demonstrate that our algorithms obtain nearly the same utility as optimal algorithms while having significantly lower communication and computational cost.", "keywords": "Differential Privacy;mean estimation;private federated learning;communication complexity", "primary_area": "", "supplementary_material": "/attachment/ae94c2efa0084af6cb36c7feada97fb6d5d260b5.pdf", "author": "Hilal Asi;Vitaly Feldman;Jelani Nelson;Huy Nguyen;Kunal Talwar", "authorids": "~Hilal_Asi1;~Vitaly_Feldman1;~Jelani_Nelson2;~Huy_Nguyen1;~Kunal_Talwar1", "gender": "M;M;M;M;M", "homepage": "http://web.stanford.edu/~asi/;https://vtaly.net;https://www.khoury.northeastern.edu/~hlnguyen/;http://www.kunaltalwar.org;http://people.eecs.berkeley.edu/~minilek", "dblp": ";67/1162;62/3796;06/3696;68/3296.html", "google_scholar": "QGcz9-kAAAAJ;GqZBmfgAAAAJ;https://scholar.google.com.tw/citations?user=MDCu0WEAAAAJ;XD_01h8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": ";;;kunal-talwar-128a6159;minilek/", "or_profile": "~Hilal_Asi1;~Vitaly_Feldman1;~Huy_Nguyen1;~Kunal_Talwar1;~Jelani_Nelson1", "aff": "Apple;Apple AI Research;Northeastern University;Apple;University of California, Berkeley", "aff_domain": "apple.com;apple.com;northeastern.edu;apple.com;berkeley.edu", "position": "Researcher;Research Scientist;Associate Professor;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nasi2023fast,\ntitle={Fast Optimal Locally Private Mean Estimation via Random Projections},\nauthor={Hilal Asi and Vitaly Feldman and Jelani Nelson and Huy Nguyen and Kunal Talwar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=K3JgUvDSYX}\n}", "github": "", "project": "", "reviewers": "rS8z;kcoe;r6Zt;Dc2U", "pdf_size": 420165, "rating": "6;6;7;7", "confidence": "3;5;4;4", "soundness": "4;3;4;4", "novelty": "3;3;3;3", "presentation": "4;3;3;4", "wc_summary": "79;35;42;304", "wc_strengths": "23;54;118;99", "wc_weaknesses": "179;123;133;3", "wc_questions": "13;69;39;2", "wc_limitations": "1;22;96;5", "wc_review": "295;303;428;413", "wc_reply_reviewers": "82;18;22;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 115.0, 110.39248162805292 ], "wc_strengths_avg": [ 73.5, 37.286056375004314 ], "wc_weaknesses_avg": [ 109.5, 65.01346014480386 ], "wc_questions_avg": [ 30.75, 25.849323008543184 ], "wc_limitations_avg": [ 31.0, 38.34709897762802 ], "wc_review_avg": [ 359.75, 61.04660105198323 ], "wc_reply_reviewers_avg": [ 30.5, 30.866648668101305 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5713022239774987504&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "apple.com;apple.com;northeastern.edu;apple.com;berkeley.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Apple;Northeastern University;University of California, Berkeley", "aff_unique_dep": "Apple Inc.;;", "aff_unique_url": "https://www.apple.com;https://www.northeastern.edu;https://www.berkeley.edu", "aff_unique_abbr": "Apple;NEU;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "MAG-GNN: Reinforcement Learning Boosted Graph Neural Network", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72031", "id": "K4FK7I8Jnl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2788b4cdf421e03650868cc4184bfed8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=K4FK7I8Jnl", "openreview": "https://openreview.net/forum?id=K4FK7I8Jnl", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72031", "video": "https://nips.cc/virtual/2023/poster/72031", "author_site": "Lecheng Kong, Jiarui Feng, Hao Liu, Dacheng Tao, Yixin Chen, Muhan Zhang", "tldr": "", "abstract": "While Graph Neural Networks (GNNs) recently became powerful tools in graph learning tasks, considerable efforts have been spent on improving GNNs' structural encoding ability. A particular line of work proposed subgraph GNNs that use subgraph information to improve GNNs' expressivity and achieved great success. However, such effectivity sacrifices the efficiency of GNNs by enumerating all possible subgraphs. In this paper, we analyze the necessity of complete subgraph enumeration and show that a model can achieve a comparable level of expressivity by considering a small subset of the subgraphs. We then formulate the identification of the optimal subset as a combinatorial optimization problem and propose Magnetic Graph Neural Network (MAG-GNN), a reinforcement learning (RL) boosted GNN, to solve the problem. Starting with a candidate subgraph set, MAG-GNN employs an RL agent to iteratively update the subgraphs to locate the most expressive set for prediction. This reduces the exponential complexity of subgraph enumeration to the constant complexity of a subgraph search algorithm while keeping good expressivity. We conduct extensive experiments on many datasets, showing that MAG-GNN achieves competitive performance to state-of-the-art methods and even outperforms many subgraph GNNs. We also demonstrate that MAG-GNN effectively reduces the running time of subgraph GNNs.", "keywords": "Graph Neural Network; Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Lecheng Kong;Jiarui Feng;Hao Liu;Dacheng Tao;Yixin Chen;Muhan Zhang", "authorids": "~Lecheng_Kong1;~Jiarui_Feng1;~Hao_Liu25;~Dacheng_Tao1;~Yixin_Chen1;~Muhan_Zhang1", "gender": "M;M;F;;M;M", "homepage": "https://LechengKong.github.io/;https://jiaruifeng.github.io/;https://haoliu-cola.github.io/;;https://www.cse.wustl.edu/~yixin.chen/;https://muhanzhang.github.io/", "dblp": "319/5576;77/8797;09/3214-57;;59/983;157/5518", "google_scholar": "yk3-_EgAAAAJ;6CSGUR8AAAAJ;;;NByrsK0AAAAJ;https://scholar.google.com.hk/citations?user=OBBqkosAAAAJ", "orcid": "0000-0001-9427-8799;0000-0002-3409-6819;;;;0000-0002-7680-6401", "linkedin": ";;;;;jerry-muhan-zhang-a33a1777/", "or_profile": "~Lecheng_Kong1;~Jiarui_Feng1;~Hao_Liu25;~Dacheng_Tao1;~Yixin_Chen1;~Muhan_Zhang1", "aff": "Washington University, Saint Louis;Washington University, Saint Louis;Washington University in St. Louis;;Washington University, Saint Louis;Peking University", "aff_domain": "wustl.edu;wustl.edu;wustl.edu;;wustl.edu;pku.edu.cn", "position": "PhD student;PhD student;PhD student;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nkong2023maggnn,\ntitle={{MAG}-{GNN}: Reinforcement Learning Boosted Graph Neural Network},\nauthor={Lecheng Kong and Jiarui Feng and Hao Liu and Dacheng Tao and Yixin Chen and Muhan Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=K4FK7I8Jnl}\n}", "github": "", "project": "", "reviewers": "9Mgd;FiDZ;jrFK;SjU6;kfK6;egpV", "pdf_size": 597684, "rating": "4;4;6;6;7;7", "confidence": "2;3;4;2;2;3", "soundness": "3;2;2;3;3;3", "novelty": "3;2;2;2;3;3", "presentation": "1;3;3;3;3;3", "wc_summary": "33;47;168;52;30;41", "wc_strengths": "21;21;64;27;37;58", "wc_weaknesses": "17;109;502;120;1;86", "wc_questions": "259;28;254;22;1;99", "wc_limitations": "10;10;1;1;1;5", "wc_review": "340;215;989;222;70;289", "wc_reply_reviewers": "0;0;243;0;0;67", "wc_reply_authors": "115;94;685;88;0;100", "reply_reviewers": "0;0;2;0;0;1", "reply_authors": "2;2;4;2;1;3", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 0.7453559924999298 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.6666666666666665, 0.7453559924999298 ], "wc_summary_avg": [ 61.833333333333336, 48.07430591167062 ], "wc_strengths_avg": [ 38.0, 17.204650534085253 ], "wc_weaknesses_avg": [ 139.16666666666666, 168.20960799616122 ], "wc_questions_avg": [ 110.5, 107.55270026053894 ], "wc_limitations_avg": [ 4.666666666666667, 4.0276819911981905 ], "wc_review_avg": [ 354.1666666666667, 295.81718265772787 ], "wc_reply_reviewers_avg": [ 51.666666666666664, 88.99563035465405 ], "wc_reply_authors_avg": [ 180.33333333333334, 228.73176915816092 ], "reply_reviewers_avg": [ 0.5, 0.7637626158259734 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.059761430466719695, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3491286374994265301&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "wustl.edu;wustl.edu;wustl.edu;;wustl.edu;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Washington University in St. Louis;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://wustl.edu;http://www.pku.edu.cn", "aff_unique_abbr": "WUSTL;Peking U", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Saint Louis;St. Louis;", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Invariant Learning via Probability of Sufficient and Necessary Causes", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72030", "id": "K5e5tFZuur", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fc657b7fd7b9aaa462f2ef9f0362b273-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=K5e5tFZuur", "openreview": "https://openreview.net/forum?id=K5e5tFZuur", "poster": "/media/PosterPDFs/NeurIPS%202023/72030.png?t=1701739919.0612195", "slides": "https://nips.cc/virtual/2023/poster/72030", "video": "https://nips.cc/virtual/2023/poster/72030", "author_site": "Mengyue Yang, Zhen Fang, Yonggang Zhang, Yali Du, Furui Liu, Jean-Francois Ton, Jianhong Wang, Jun Wang", "tldr": "", "abstract": "Out-of-distribution (OOD) generalization is indispensable for learning models in the wild, where testing distribution typically unknown and different from the training. Recent methods derived from causality have shown great potential in achieving OOD generalization. \nHowever, existing methods mainly focus on the invariance property of causes, while largely overlooking the property of sufficiency and necessity conditions. Namely, a necessary but insufficient cause (feature) is invariant to distribution shift, yet it may not have required accuracy. By contrast, a sufficient yet unnecessary cause (feature) tends to fit specific data well but may have a risk of adapting to a new domain. \nTo capture the information of sufficient and necessary causes, we employ a classical concept, the probability of sufficiency and necessary causes (PNS), which indicates the probability of whether one is the necessary and sufficient cause. \nTo associate PNS with OOD generalization, we propose PNS risk and formulate an algorithm to learn representation with a high PNS value. We theoretically analyze and prove the generalizability of the PNS risk. Experiments on both synthetic and real-world benchmarks demonstrate the effectiveness of the proposed method. The detailed implementation can be found at the GitHub repository: https://github.com/ymy4323460/CaSN.", "keywords": "OOD Generalization;Invariant Representation Learning", "primary_area": "", "supplementary_material": "", "author": "Mengyue Yang;Zhen Fang;Yonggang Zhang;Yali Du;Furui Liu;Jean-Francois Ton;Jianhong Wang;Jun Wang", "authorids": "~Mengyue_Yang1;~Zhen_Fang2;~Yonggang_Zhang1;~Yali_Du1;~Furui_Liu1;~Jean-Francois_Ton2;~Jianhong_Wang1;~Jun_Wang2", "gender": "F;M;M;;M;Not Specified;M;M", "homepage": "https://ymy4323460.github.io/;https://fang-zhen.github.io/index.html;https://yonggangzhangben.github.io/index.html;;;https://savior287.github.io/JFT-webpage/;https://hsvgbkhgbv.github.io/;http://www0.cs.ucl.ac.uk/staff/jun.wang/", "dblp": "262/3824.html;;27/6859-3;;116/7289;;;w/JunWang12", "google_scholar": "kJJkqdcAAAAJ;OzD6WJcAAAAJ;XSbEr98AAAAJ;;https://scholar.google.com.hk/citations?user=DJY8NXMAAAAJ;WWVOu4kAAAAJ;K1FKF3IAAAAJ;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ", "orcid": ";0000-0003-0602-6255;0000-0002-4080-7592;;;;;", "linkedin": ";;;;;;jianhong-wang-45995b100/;", "or_profile": "~Mengyue_Yang1;~Zhen_Fang2;~Yonggang_Zhang1;~Yali_Du1;~Furui_Liu1;~Jean-Francois_Ton2;~Jianhong_Wang1;~Jun_Wang2", "aff": "University College London;University of Technology Sydney;Hong Kong Baptist University;;Zhejiang Lab & UCAS & Zhejiang University;Bytedance;Imperial College London;University College London", "aff_domain": "ucl.ac.uk;uts.edu.au;hkbu.edu.hk;;zhejianglab.com;bytedance.com;ic.ac.uk;ucl.ac.uk", "position": "PhD student;Postdoc;Postdoc;;Associate Professor;Researcher;PhD student;Professor", "bibtex": "@inproceedings{\nyang2023invariant,\ntitle={Invariant Learning via Probability of Sufficient and Necessary Causes},\nauthor={Mengyue Yang and Yonggang Zhang and Zhen Fang and Yali Du and Furui Liu and Jean-Francois Ton and Jianhong Wang and Jun Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=K5e5tFZuur}\n}", "github": "", "project": "", "reviewers": "qVZ2;CQCE;pPMP;aG69;jeo8", "pdf_size": 789960, "rating": "5;7;7;7;8", "confidence": "3;1;4;4;4", "soundness": "2;3;3;3;4", "novelty": "3;3;3;3;4", "presentation": "3;3;3;2;3", "wc_summary": "88;48;55;90;59", "wc_strengths": "57;37;90;42;44", "wc_weaknesses": "545;16;54;286;53", "wc_questions": "80;31;6;4;1", "wc_limitations": "148;11;1;4;1", "wc_review": "918;143;206;426;158", "wc_reply_reviewers": "64;0;20;30;33", "wc_reply_authors": "31;0;23;58;33", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;3;2", "rating_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 68.0, 17.515707236649053 ], "wc_strengths_avg": [ 54.0, 19.172897537930986 ], "wc_weaknesses_avg": [ 190.8, 201.38460715754815 ], "wc_questions_avg": [ 24.4, 29.789931184881915 ], "wc_limitations_avg": [ 33.0, 57.61597000832321 ], "wc_review_avg": [ 370.2, 292.1673493051542 ], "wc_reply_reviewers_avg": [ 29.4, 20.8 ], "wc_reply_authors_avg": [ 29.0, 18.64403389827427 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.2100420126042015, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10505097570797699904&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ucl.ac.uk;uts.edu.au;hkbu.edu.hk;;zhejianglab.com;bytedance.com;ic.ac.uk;ucl.ac.uk", "author_num": 8, "aff_unique_index": "0;1;2;3;4;5;0", "aff_unique_norm": "University College London;University of Technology Sydney;Hong Kong Baptist University;Zhejiang University;ByteDance;Imperial College London", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.uts.edu.au;https://www.hkbu.edu.hk;http://www.zju.edu.cn;https://www.bytedance.com;https://www.imperial.ac.uk", "aff_unique_abbr": "UCL;UTS;HKBU;ZJU;Bytedance;ICL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;2;2;2;0;0", "aff_country_unique": "United Kingdom;Australia;China" }, { "title": "Neural Modulation for Flash Memory: An Unsupervised Learning Framework for Improved Reliability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72029", "id": "K7u3RkoBP9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/da7e0d7210b99ebc91c4a5f911962d6c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=K7u3RkoBP9", "openreview": "https://openreview.net/forum?id=K7u3RkoBP9", "poster": "/media/PosterPDFs/NeurIPS%202023/72029.png?t=1699521356.3353195", "slides": "https://nips.cc/virtual/2023/poster/72029", "video": "https://nips.cc/virtual/2023/poster/72029", "author_site": "Jonathan Zedaka, Elisha Halperin, Evgeny Blaichman, Amit Berman", "tldr": "", "abstract": "Recent years have witnessed a significant increase in the storage density of NAND flash memory, making it a critical component in modern electronic devices. However, with the rise in storage capacity comes an increased likelihood of errors in data storage and retrieval. The growing number of errors poses ongoing challenges for system designers and engineers, in terms of the characterization, modeling, and optimization of NAND-based systems. We present a novel approach for modeling and preventing errors by utilizing the capabilities of generative and unsupervised machine learning methods. As part of our research, we constructed and trained a neural modulator that translates information bits into programming operations on each memory cell in NAND devices. Our modulator, tailored explicitly for flash memory channels, provides a smart writing scheme that reduces programming errors as well as compensates for data degradation over time. Specifically, the modulator is based on an auto-encoder architecture with an additional channel model embedded between the encoder and the decoder. A conditional generative adversarial network (cGAN) was used to construct the channel model. Optimized for the end-of-life work-point, the learned memory system outperforms the prior art by up to 56\\% in raw bit error rate (RBER) and extends the lifetime of the flash memory block by up to 25\\%.", "keywords": "WGAN;GAN;Autoencoder;Unsupervised Learning;Generative models;Flash Memory;NAND;Modulation;Reliability;Flash;Communication system", "primary_area": "", "supplementary_material": "/attachment/a8732b8aec9ba856c61a55a5af7fdeb70256d5cd.pdf", "author": "Jonathan Zedaka;Elisha Halperin;Evgeny Blaichman;Amit Berman", "authorids": "~Jonathan_Zedaka1;~Elisha_Halperin1;~Evgeny_Blaichman1;amit.berman@samsung.com", "gender": "M;;M;", "homepage": ";;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": "jonathan-zedaka-4b8916275/;elisha-halperin-251223173/;evgeny-blaichman-350bb12a/;", "or_profile": "~Jonathan_Zedaka1;~Elisha_Halperin1;~Evgeny_Blaichman1;amit.berman@samsung.com", "aff": "Samsung;Samsung;Samsung;", "aff_domain": "samsung.com;samsung.com;samsung.com;", "position": "Researcher;Researcher;Researcher;", "bibtex": "@inproceedings{\nzedaka2023neural,\ntitle={Neural Modulation for Flash Memory: An Unsupervised Learning Framework for Improved Reliability},\nauthor={Jonathan Zedaka and Elisha Halperin and Evgeny Blaichman and Amit Berman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=K7u3RkoBP9}\n}", "github": "", "project": "", "reviewers": "EdAW;KCDD;zVdi;8xzT;yAhR", "pdf_size": 636265, "rating": "6;6;7;7;7", "confidence": "4;2;5;4;3", "soundness": "3;3;2;3;3", "novelty": "3;3;3;2;4", "presentation": "3;3;3;3;3", "wc_summary": "70;13;10;220;85", "wc_strengths": "236;67;4;45;105", "wc_weaknesses": "10;3;1;23;81", "wc_questions": "23;62;26;198;129", "wc_limitations": "18;11;1;1;1", "wc_review": "357;156;42;487;401", "wc_reply_reviewers": "9;0;0;15;19", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.6, 76.30622517200021 ], "wc_strengths_avg": [ 91.4, 79.34885002317299 ], "wc_weaknesses_avg": [ 23.6, 29.71598896217321 ], "wc_questions_avg": [ 87.6, 67.11959475443814 ], "wc_limitations_avg": [ 6.4, 6.974238309665077 ], "wc_review_avg": [ 288.6, 164.35887563499577 ], "wc_reply_reviewers_avg": [ 8.6, 7.709734106958553 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4803844614152613, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:rVZCOr3qDVMJ:scholar.google.com/&scioq=Neural+Modulation+for+Flash+Memory:+An+Unsupervised+Learning+Framework+for+Improved+Reliability&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "samsung.com;samsung.com;samsung.com;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Samsung", "aff_unique_dep": "Samsung", "aff_unique_url": "https://www.samsung.com", "aff_unique_abbr": "Samsung", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Regularization properties of adversarially-trained linear regression", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72028", "id": "K8gLHZIgVW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4aa13186c795a52ba88f5b822f4b77eb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=K8gLHZIgVW", "openreview": "https://openreview.net/forum?id=K8gLHZIgVW", "poster": "/media/PosterPDFs/NeurIPS%202023/72028.png?t=1699555253.8887694", "slides": "https://nips.cc/virtual/2023/poster/72028", "video": "https://nips.cc/virtual/2023/poster/72028", "author_site": "Antonio Ribeiro, Dave Zachariah, Francis Bach, Thomas Sch\u00f6n", "tldr": "", "abstract": "State-of-the-art machine learning models can be vulnerable to very small input perturbations that are adversarially constructed. Adversarial training is an effective approach to defend against it. Formulated as a min-max problem, it searches for the best solution when the training data were corrupted by the worst-case attacks. Linear models are among the simple models where vulnerabilities can be observed and are the focus of our study. In this case, adversarial training leads to a convex optimization problem which can be formulated as the minimization of a finite sum. We provide a comparative analysis between the solution of adversarial training in linear regression and other regularization methods. Our main findings are that: (A) Adversarial training yields the minimum-norm interpolating solution in the overparameterized regime (more parameters than data), as long as the maximum disturbance radius is smaller than a threshold. And, conversely, the minimum-norm interpolator is the solution to adversarial training with a given radius. (B) Adversarial training can be equivalent to parameter shrinking methods (ridge regression and Lasso). This happens in the underparametrized region, for an appropriate choice of adversarial radius and zero-mean symmetrically distributed covariates. (C) For $\\ell_\\infty$-adversarial training---as in square-root Lasso---the choice of adversarial radius for optimal bounds does not depend on the additive noise variance. We confirm our theoretical findings with numerical examples.", "keywords": "adversarial training; regularization; linear models", "primary_area": "", "supplementary_material": "/attachment/4865be2bd7fbc66848b12bfdd1e71946955f7e0d.pdf", "author": "Antonio H. Ribeiro;Dave Zachariah;Francis Bach;Thomas B. Sch\u00f6n", "authorids": "~Antonio_H._Ribeiro1;~Dave_Zachariah1;~Francis_Bach1;~Thomas_B._Sch\u00f6n1", "gender": "M;;M;M", "homepage": "https://antonior92.github.io/;;http://www.di.ens.fr/~fbach;http://user.it.uu.se/~thosc112/index.html", "dblp": "202/1699.html;84/2663;b/FrancisRBach;85/4891", "google_scholar": "https://scholar.google.com.br/citations?user=5t_sZdMAAAAJ;;https://scholar.google.fr/citations?user=6PJWcFEAAAAJ;https://scholar.google.se/citations?user=FUqUC2oAAAAJ", "orcid": "0000-0003-3632-8529;;;0000-0001-5183-234X", "linkedin": ";;;thomas-sch%C3%B6n-2b587b1/", "or_profile": "~Antonio_H._Ribeiro1;~Dave_Zachariah1;~Francis_Bach1;~Thomas_B._Sch\u00f6n1", "aff": "Uppsala University;Uppsala University;Ecole Normale Superieure;Uppsala University", "aff_domain": "uu.se;it.uu.se;ens.fr;uu.se", "position": "Postdoc;Associate Professor;Faculty;Full Professor", "bibtex": "@inproceedings{\nribeiro2023regularization,\ntitle={Regularization properties of adversarially-trained linear regression},\nauthor={Antonio H. Ribeiro and Dave Zachariah and Francis Bach and Thomas B. Sch{\\\"o}n},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=K8gLHZIgVW}\n}", "github": "", "project": "", "reviewers": "Zg7q;6WVQ;CYXN;uRwm", "pdf_size": 943020, "rating": "6;6;6;8", "confidence": "4;3;4;4", "soundness": "3;2;3;4", "novelty": "3;2;3;3", "presentation": "3;2;3;4", "wc_summary": "27;107;63;109", "wc_strengths": "27;32;53;142", "wc_weaknesses": "107;261;135;238", "wc_questions": "11;127;110;3", "wc_limitations": "1;1;7;31", "wc_review": "173;528;368;523", "wc_reply_reviewers": "4;154;65;100", "wc_reply_authors": "0;41;67;19", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 76.5, 33.98161267509239 ], "wc_strengths_avg": [ 63.5, 46.360004314063644 ], "wc_weaknesses_avg": [ 185.25, 65.51478840689329 ], "wc_questions_avg": [ 62.75, 56.1443452183744 ], "wc_limitations_avg": [ 10.0, 12.36931687685298 ], "wc_review_avg": [ 398.0, 144.95689014324225 ], "wc_reply_reviewers_avg": [ 80.75, 54.48566325190508 ], "wc_reply_authors_avg": [ 31.75, 24.993749218554626 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10059032470185692931&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "uu.se;it.uu.se;ens.fr;uu.se", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Uppsala University;Ecole Normale Superieure", "aff_unique_dep": ";", "aff_unique_url": "https://www.uu.se;https://www.ens.fr", "aff_unique_abbr": "UU;ENS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Sweden;France" }, { "title": "Corruption-Robust Offline Reinforcement Learning with General Function Approximation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72027", "id": "K9M7XNS9BX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/71b52a5b3fe2e9303433a174b60e160d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=K9M7XNS9BX", "openreview": "https://openreview.net/forum?id=K9M7XNS9BX", "poster": "/media/PosterPDFs/NeurIPS%202023/72027.png?t=1701915477.0627415", "slides": "https://nips.cc/virtual/2023/poster/72027", "video": "https://nips.cc/virtual/2023/poster/72027", "author_site": "Chenlu Ye, Rui Yang, Quanquan Gu, Tong Zhang", "tldr": "", "abstract": "We investigate the problem of corruption robustness in offline reinforcement learning (RL) with general function approximation, where an adversary can corrupt each sample in the offline dataset, and the corruption level $\\zeta\\geq0$ quantifies the cumulative corruption amount over $n$ episodes and $H$ steps. Our goal is to find a policy that is robust to such corruption and minimizes the suboptimality gap with respect to the optimal policy for the uncorrupted Markov decision processes (MDPs). Drawing inspiration from the uncertainty-weighting technique from the robust online RL setting \\citep{he2022nearly,ye2022corruptionrobust}, we design a new uncertainty weight iteration procedure to efficiently compute on batched samples and propose a corruption-robust algorithm for offline RL. Notably, under the assumption of single policy coverage and the knowledge of $\\zeta$, our proposed algorithm achieves a suboptimality bound that is worsened by an additive factor of $\\mathcal O(\\zeta \\cdot (\\text CC(\\lambda,\\hat{\\mathcal F},\\mathcal Z_n^H))^{1/2} (C(\\hat{\\mathcal F},\\mu))^{-1/2} n^{-1})$ due to the corruption. Here $\\text CC(\\lambda,\\hat{\\mathcal F},\\mathcal Z_n^H)$ is the coverage coefficient that depends on the regularization parameter $\\lambda$, the confidence set $\\hat{\\mathcal F}$, and the dataset $\\mathcal Z_n^H$, and $C(\\hat{\\mathcal F},\\mu)$ is a coefficient that depends on $\\hat{\\mathcal F}$ and the underlying data distribution $\\mu$. When specialized to linear MDPs, the corruption-dependent error term reduces to $\\mathcal O(\\zeta d n^{-1})$ with $d$ being the dimension of the feature map, which matches the existing lower bound for corrupted linear MDPs. This suggests that our analysis is tight in terms of the corruption-dependent term.", "keywords": "offline RL; adversarial corruption; general function approximation", "primary_area": "", "supplementary_material": "/attachment/d731fe3108741811662970d879c60ab378ff1e29.pdf", "author": "Chenlu Ye;Rui Yang;Quanquan Gu;Tong Zhang", "authorids": "~Chenlu_Ye1;~Rui_Yang8;~Quanquan_Gu1;~Tong_Zhang2", "gender": "F;M;M;M", "homepage": "https://chenluye99.github.io/;https://yangrui2015.github.io;http://web.cs.ucla.edu/~qgu/;http://tongzhang-ml.org", "dblp": "336/2092;92/1942-10;50/4597;07/4227-1", "google_scholar": "c8yK5XsAAAAJ;QHSUy3MAAAAJ;GU9HgNAAAAAJ;LurWtuYAAAAJ", "orcid": ";0000-0003-3525-1726;;0000-0002-5511-2558", "linkedin": "https://www.linkedin.cn/incareer/in/chenlu-ye-9b015b184;;;", "or_profile": "~Chenlu_Ye1;~Rui_Yang8;~Quanquan_Gu1;~Tong_Zhang2", "aff": ";Hong Kong University of Science and Technology;University of California, Los Angeles;Hong Kong University of Science and Technology", "aff_domain": ";ust.hk;cs.ucla.edu;ust.hk", "position": ";PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nye2023corruptionrobust,\ntitle={Corruption-Robust Offline Reinforcement Learning with General Function Approximation},\nauthor={Chenlu Ye and Rui Yang and Quanquan Gu and Tong Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=K9M7XNS9BX}\n}", "github": "", "project": "", "reviewers": "wcW2;GirP;H58a;p9Rd", "pdf_size": 956225, "rating": "6;6;6;7", "confidence": "3;3;2;4", "soundness": "3;3;3;4", "novelty": "3;3;2;4", "presentation": "4;4;3;3", "wc_summary": "89;124;65;130", "wc_strengths": "103;58;34;142", "wc_weaknesses": "107;76;22;153", "wc_questions": "19;5;70;85", "wc_limitations": "3;14;1;1", "wc_review": "321;277;192;511", "wc_reply_reviewers": "0;17;18;64", "wc_reply_authors": "0;42;26;25", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 102.0, 26.485845276298054 ], "wc_strengths_avg": [ 84.25, 41.53537648800116 ], "wc_weaknesses_avg": [ 89.5, 47.6366455578056 ], "wc_questions_avg": [ 44.75, 33.54381463101655 ], "wc_limitations_avg": [ 4.75, 5.402545696243577 ], "wc_review_avg": [ 325.25, 116.83829637580308 ], "wc_reply_reviewers_avg": [ 24.75, 23.763154251908563 ], "wc_reply_authors_avg": [ 23.25, 15.022899187573616 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16784631177798528115&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": ";ust.hk;cs.ucla.edu;ust.hk", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.ucla.edu", "aff_unique_abbr": "HKUST;UCLA", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Hong Kong SAR;Los Angeles", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "A Recurrent Neural Circuit Mechanism of Temporal-scaling Equivariant Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72026", "id": "K9dmkfZcMu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c5e44243e16c9d61d3897ba1095f5f6c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=K9dmkfZcMu", "openreview": "https://openreview.net/forum?id=K9dmkfZcMu", "poster": "/media/PosterPDFs/NeurIPS%202023/72026.png?t=1701415356.2891662", "slides": "https://nips.cc/virtual/2023/poster/72026", "video": "https://nips.cc/virtual/2023/poster/72026", "author_site": "Junfeng Zuo, Xiao Liu, Ying Nian Wu, Si Wu, Wenhao Zhang", "tldr": "", "abstract": "Time perception is critical in our daily life. An important feature of time perception is temporal scaling (TS): the ability to generate temporal sequences (e.g., motor actions) at different speeds. However, it is largely unknown about the math principle underlying temporal scaling in recurrent circuits in the brain. To shed insight, the present study investigates the temporal scaling from the Lie group point of view. \nWe propose a canonical nonlinear recurrent circuit dynamics, modeled as a continuous attractor network, whose neuronal population responses embed a temporal sequence that is TS equivariant. Furthermore, we found the TS group operators can be explicitly represented by a control input fed into the recurrent circuit, where the input gain determines the temporal scaling factor (group parameter), and the spatial offset between the control input and network state emerges the generator. The neuronal responses in the recurrent circuit are also consistent with experimental findings. We illustrated that the recurrent circuit can drive a feedforward circuit to generate complex temporal sequences with different time scales, even in the case of negative time scaling (''time reversal''). Our work for the first time analytically links the abstract temporal scaling group and concrete neural circuit dynamics.", "keywords": "temporal-scaling group;equivariant representation;disentangled representation;motor timing;continuous attractor networks", "primary_area": "", "supplementary_material": "/attachment/1799efc12c0efe56adec80d10a18ac218e6dcb37.zip", "author": "Junfeng Zuo;Xiao Liu;Ying Nian Wu;Si Wu;Wenhao Zhang", "authorids": "~Junfeng_Zuo1;~Xiao_Liu11;~Ying_Nian_Wu1;~Si_Wu1;~Wenhao_Zhang3", "gender": ";F;M;M;M", "homepage": "https://www.researchgate.net/profile/Junfeng-Zuo;;https://mgv.pku.edu.cn/english/people/lbd/soeeace/267528.htm;https://www.zhang-cnl.org/;http://www.stat.ucla.edu/~ywu/", "dblp": "346/0879;82/1364;25/437-1;57/7458-2;18/568.html", "google_scholar": ";;;TqGPd9QAAAAJ;7k_1QFIAAAAJ", "orcid": ";;;0000-0001-7641-5024;", "linkedin": ";;;;", "or_profile": "~Junfeng_Zuo1;~Xiao_Liu11;~Si_Wu1;~Wenhao_Zhang3;~Yingnian_Wu1", "aff": "Peking University;Peking University;Peking University;University of Texas Southwestern Medical Center;UCLA", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;utsouthwestern.edu;stat.ucla.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzuo2023a,\ntitle={A Recurrent Neural Circuit Mechanism of Temporal-scaling Equivariant Representation},\nauthor={Junfeng Zuo and Xiao Liu and Ying Nian Wu and Si Wu and Wenhao Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=K9dmkfZcMu}\n}", "github": "", "project": "", "reviewers": "aiKz;tVWd;NfgN;ZBA5", "pdf_size": 3213244, "rating": "4;6;7;7", "confidence": "2;3;2;4", "soundness": "3;3;4;4", "novelty": "2;2;3;3", "presentation": "3;3;3;2", "wc_summary": "87;104;100;89", "wc_strengths": "126;174;42;53", "wc_weaknesses": "82;193;110;155", "wc_questions": "195;4;1;90", "wc_limitations": "9;9;1;1", "wc_review": "499;484;254;388", "wc_reply_reviewers": "71;16;14;9", "wc_reply_authors": "153;39;13;20", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.0, 7.176350047203662 ], "wc_strengths_avg": [ 98.75, 54.12658773652741 ], "wc_weaknesses_avg": [ 135.0, 42.420513905420805 ], "wc_questions_avg": [ 72.5, 79.24171881023278 ], "wc_limitations_avg": [ 5.0, 4.0 ], "wc_review_avg": [ 406.25, 97.67388340800215 ], "wc_reply_reviewers_avg": [ 27.5, 25.243811122728676 ], "wc_reply_authors_avg": [ 56.25, 56.66292879828927 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4923659639173309, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12094618934244655331&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;utsouthwestern.edu;stat.ucla.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Peking University;University of Texas Southwestern Medical Center;University of California, Los Angeles", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.utsouthwestern.edu;https://www.ucla.edu", "aff_unique_abbr": "Peking U;UT Southwestern;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "China;United States" }, { "title": "Graph Mixture of Experts: Learning on Large-Scale Graphs with Explicit Diversity Modeling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72025", "id": "K9xHDD6mic", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9f4064d145bad5e361206c3303bda7b8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=K9xHDD6mic", "openreview": "https://openreview.net/forum?id=K9xHDD6mic", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72025", "video": "https://nips.cc/virtual/2023/poster/72025", "author_site": "Haotao Wang, Ziyu Jiang, Yuning You, Yan Han, Gaowen Liu, Jayanth Srinivasa, Ramana Kompella, Zhangyang \"Atlas\" Wang", "tldr": "", "abstract": "Graph neural networks (GNNs) have found extensive applications in learning from graph data. However, real-world graphs often possess diverse structures and comprise nodes and edges of varying types. To bolster the generalization capacity of GNNs, it has become customary to augment training graph structures through techniques like graph augmentations and large-scale pre-training on a wider array of graphs. Balancing this diversity while avoiding increased computational costs and the notorious trainability issues of GNNs is crucial. This study introduces the concept of Mixture-of-Experts (MoE) to GNNs, with the aim of augmenting their capacity to adapt to a diverse range of training graph structures, without incurring explosive computational overhead. The proposed Graph Mixture of Experts (GMoE) model empowers individual nodes in the graph to dynamically and adaptively select more general information aggregation experts. These experts are trained to capture distinct subgroups of graph structures and to incorporate information with varying hop sizes, where those with larger hop sizes specialize in gathering information over longer distances. The effectiveness of GMoE is validated through a series of experiments on a diverse set of tasks, including graph, node, and link prediction, using the OGB benchmark. Notably, it enhances ROC-AUC by $1.81\\%$ in ogbg-molhiv and by $1.40\\%$ in ogbg-molbbbp, when compared to the non-MoE baselines. Our code is publicly available at https://github.com/VITA-Group/Graph-Mixture-of-Experts.", "keywords": "graph neural networks", "primary_area": "", "supplementary_material": "", "author": "Haotao Wang;Ziyu Jiang;Yuning You;Yan Han;Gaowen Liu;Jayanth Srinivasa;Ramana Rao Kompella;Zhangyang Wang", "authorids": "~Haotao_Wang1;~Ziyu_Jiang1;~Yuning_You1;~Yan_Han2;~Gaowen_Liu4;~Jayanth_Srinivasa1;~Ramana_Rao_Kompella1;~Zhangyang_Wang1", "gender": ";M;M;M;F;M;M;M", "homepage": ";https://geekjzy.github.io/;https://yyou1996.github.io/;https://yannhan.github.io;;;https://linkedin.com/en/rkompella;https://vita-group.github.io", "dblp": "236/5090;232/9728;240/8556;79/4311-1.html;136/1007;285/5006;98/2327;119/4026", "google_scholar": "aMIJhlEAAAAJ;t5KUxs4AAAAJ;Pv-V2igAAAAJ;swtJHJEAAAAJ;NIv_aeQAAAAJ;HtNfeKYAAAAJ;uf9RZboAAAAJ;pxFyKAIAAAAJ", "orcid": ";;;0000-0001-7164-2295;0009-0000-9194-1233;;;", "linkedin": ";;;;;;;", "or_profile": "~Haotao_Wang1;~Ziyu_Jiang1;~Yuning_You1;~Yan_Han2;~Gaowen_Liu4;~Jayanth_Srinivasa1;~Ramana_Rao_Kompella1;~Zhangyang_Wang1", "aff": "University of Texas, Austin;Texas A&M;Texas A&M University;University of Texas, Austin;Cisco Systems;Cisco;Cisco;University of Texas, Austin", "aff_domain": "utexas.edu;tamu.edu;tamu.edu;utexas.edu;cisco.com;cisco.com;cisco.com;utexas.edu", "position": "PhD student;PhD student;PhD student;PhD student;Researcher;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nwang2023graph,\ntitle={Graph Mixture of Experts: Learning on Large-Scale Graphs with Explicit Diversity Modeling},\nauthor={Haotao Wang and Ziyu Jiang and Yuning You and Yan Han and Gaowen Liu and Jayanth Srinivasa and Ramana Rao Kompella and Zhangyang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=K9xHDD6mic}\n}", "github": "", "project": "", "reviewers": "Deyj;BUC6;NqGe;7Ua4;k4PS", "pdf_size": 382627, "rating": "4;4;6;6;7", "confidence": "4;4;4;4;4", "soundness": "2;2;3;3;3", "novelty": "1;2;3;3;4", "presentation": "2;3;3;3;3", "wc_summary": "73;97;52;83;121", "wc_strengths": "12;42;80;102;120", "wc_weaknesses": "271;84;120;148;201", "wc_questions": "4;60;4;130;201", "wc_limitations": "4;9;1;104;1", "wc_review": "364;292;257;567;644", "wc_reply_reviewers": "326;0;43;16;130", "wc_reply_authors": "490;389;18;123;188", "reply_reviewers": "2;0;1;1;2", "reply_authors": "3;4;2;3;2", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 1.019803902718557 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 85.2, 23.137847782367313 ], "wc_strengths_avg": [ 71.2, 39.407613477601004 ], "wc_weaknesses_avg": [ 164.8, 65.43210221290464 ], "wc_questions_avg": [ 79.8, 76.27948610209694 ], "wc_limitations_avg": [ 23.8, 40.20646714149355 ], "wc_review_avg": [ 424.8, 153.4658268149623 ], "wc_reply_reviewers_avg": [ 103.0, 120.1965057728385 ], "wc_reply_authors_avg": [ 241.6, 173.41580089484347 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4454953732400047823&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "utexas.edu;tamu.edu;tamu.edu;utexas.edu;cisco.com;cisco.com;cisco.com;utexas.edu", "author_num": 8, "aff_unique_index": "0;1;1;0;2;2;2;0", "aff_unique_norm": "University of Texas at Austin;Texas A&M University;Cisco Systems", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://www.tamu.edu;https://www.cisco.com", "aff_unique_abbr": "UT Austin;TAMU;Cisco", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DropCompute: simple and more robust distributed synchronous training via compute variance reduction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72024", "id": "KAWaeKOEkx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/972cd27c994a806e187ef1c2f5254059-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KAWaeKOEkx", "openreview": "https://openreview.net/forum?id=KAWaeKOEkx", "poster": "/media/PosterPDFs/NeurIPS%202023/72024.png?t=1701695395.9364588", "slides": "https://nips.cc/virtual/2023/poster/72024", "video": "https://nips.cc/virtual/2023/poster/72024", "author_site": "Niv Giladi, Shahar Gottlieb, moran shkolnik, Asaf Karnieli, Ron Banner, Elad Hoffer, Kfir Y. Levy, Daniel Soudry", "tldr": "", "abstract": "Background: Distributed training is essential for large scale training of deep neural networks (DNNs). The dominant methods for large scale DNN training are synchronous (e.g. All-Reduce), but these require waiting for all workers in each step. Thus, these methods are limited by the delays caused by straggling workers.\nResults: We study a typical scenario in which workers are straggling due to variability in compute time. We find an analytical relation between compute time properties and scalability limitations, caused by such straggling workers. With these findings, we propose a simple yet effective decentralized method to reduce the variation among workers and thus improve the robustness of synchronous training. This method can be integrated with the widely used All-Reduce. Our findings are validated on large-scale training tasks using 200 Gaudi Accelerators.", "keywords": "distributed optimization;large-scale parallel SGD;synchronous training", "primary_area": "", "supplementary_material": "/attachment/e36600c86c0dff50468bd0fd231f992e7fdf596f.pdf", "author": "Niv Giladi;Shahar Gottlieb;Moran Shkolnik;Asaf Karnieli;Ron Banner;Elad Hoffer;Kfir Yehuda Levy;Daniel Soudry", "authorids": "~Niv_Giladi1;~Shahar_Gottlieb1;~Moran_Shkolnik1;~Asaf_Karnieli1;~Ron_Banner1;~Elad_Hoffer1;~Kfir_Yehuda_Levy1;~Daniel_Soudry1", "gender": ";M;F;;M;M;M;M", "homepage": ";https://github.com/ShaharGottlieb;;;;http://www.deeplearning.co.il;http://kfiryehud.wixsite.com/kfir-y-levy;https://soudry.github.io/", "dblp": ";;249/2235;;03/5857;156/0135;83/11388;126/1779", "google_scholar": ";;8x9rOboAAAAJ;;;https://scholar.google.co.il/citations?user=iEfTH7AAAAAJ;;https://scholar.google.co.il/citations?user=AEBWEm8AAAAJ", "orcid": ";;;;;;;0000-0001-9368-6352", "linkedin": ";;moran-shkolnik-b8b76132/;;https://il.linkedin.com/in/ron-banner-69403a51;;;daniel-soudry-2aa3a88/", "or_profile": "~Niv_Giladi1;~Shahar_Gottlieb1;~Moran_Shkolnik1;~Asaf_Karnieli1;~Ron_Banner1;~Elad_Hoffer1;~Kfir_Yehuda_Levy1;~Daniel_Soudry1", "aff": ";Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;;;Intel;Habana Labs (Intel);Technion - Israel Institute of Technology, Technion;Technion - Israel Institute of Technology, Technion", "aff_domain": ";campus.technion.ac.il;;;intel.com;habana.ai;technion.ac.il;technion.ac.il", "position": ";MS student;;;Researcher;Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\ngiladi2023dropcompute,\ntitle={DropCompute: simple and more robust distributed synchronous training via compute variance reduction},\nauthor={Niv Giladi and Shahar Gottlieb and Moran Shkolnik and Asaf Karnieli and Ron Banner and Elad Hoffer and Kfir Yehuda Levy and Daniel Soudry},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KAWaeKOEkx}\n}", "github": "", "project": "", "reviewers": "PA5g;btsj;5jtg;Zc33", "pdf_size": 1058574, "rating": "6;6;6;6", "confidence": "2;4;4;3", "soundness": "2;3;3;3", "novelty": "3;2;2;2", "presentation": "3;3;3;3", "wc_summary": "50;49;56;200", "wc_strengths": "79;44;53;186", "wc_weaknesses": "40;285;197;65", "wc_questions": "22;144;120;5", "wc_limitations": "1;94;3;10", "wc_review": "192;616;429;466", "wc_reply_reviewers": "0;409;244;45", "wc_reply_authors": "0;916;28;0", "reply_reviewers": "0;3;1;1", "reply_authors": "1;3;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.75, 64.2859821422991 ], "wc_strengths_avg": [ 90.5, 56.61492736019362 ], "wc_weaknesses_avg": [ 146.75, 99.64530846959128 ], "wc_questions_avg": [ 72.75, 60.15552759306496 ], "wc_limitations_avg": [ 27.0, 38.82653731663435 ], "wc_review_avg": [ 425.75, 152.0400851749301 ], "wc_reply_reviewers_avg": [ 174.5, 163.5856045011296 ], "wc_reply_authors_avg": [ 236.0, 392.7645605194033 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ziILJiDBYQIJ:scholar.google.com/&scioq=DropCompute:+simple+and+more+robust+distributed+synchronous+training+via+compute+variance+reduction&hl=en&as_sdt=0,14", "gs_version_total": 7, "email": ";campus.technion.ac.il;;;intel.com;habana.ai;technion.ac.il;technion.ac.il", "author_num": 8, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Technion - Israel Institute of Technology;Intel;Habana Labs", "aff_unique_dep": ";Intel Corporation;", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.intel.com;https://www.habana.ai", "aff_unique_abbr": "Technion;Intel;Habana Labs", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "Israel;United States" }, { "title": "PromptIR: Prompting for All-in-One Image Restoration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72023", "id": "KAlSIL4tXU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e187897ed7780a579a0d76fd4a35d107-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KAlSIL4tXU", "openreview": "https://openreview.net/forum?id=KAlSIL4tXU", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72023", "video": "https://nips.cc/virtual/2023/poster/72023", "author_site": "Vaishnav Potlapalli, Syed Waqas Zamir, Salman Khan, Fahad Shahbaz Khan", "tldr": "", "abstract": "Image restoration involves recovering a high-quality clean image from its degraded version. Deep learning-based methods have significantly improved image restoration performance, however, they have limited generalization ability to different degradation types and levels. This restricts their real-world application since it requires training individual models for each specific degradation and knowing the input degradation type to apply the relevant model. We present a prompt-based learning approach, PromptIR, for All-In-One image restoration that can effectively restore images from various types and levels of degradation. In particular, our method uses prompts to encode degradation-specific information, which is then used to dynamically guide the restoration network. This allows our method to generalize to different degradation types and levels, while still achieving state-of-the-art results on image denoising, deraining, and dehazing. Overall, PromptIR offers a generic and efficient plugin module with few lightweight prompts that can be used to restore images of various types and levels of degradation with no prior information on the corruptions present in the image. Our code and pre-trained models are available here: https://github.com/va1shn9v/PromptIR", "keywords": "Image Restoration", "primary_area": "", "supplementary_material": "/attachment/c43951b4a7178b4a07a8e9e79dc5ec9c1cf6ecbc.pdf", "author": "Vaishnav Potlapalli;Syed Waqas Zamir;Salman Khan;Fahad Khan", "authorids": "~Vaishnav_Potlapalli2;~Syed_Waqas_Zamir2;~Salman_Khan4;~Fahad_Khan1", "gender": "M;M;M;M", "homepage": "https://salman-h-khan.github.io/;https://sites.google.com/view/fahadkhans/home;;https://www.vaishnavrao.com/", "dblp": "32/11535-1;05/8618;140/7811;317/5236", "google_scholar": "https://scholar.google.es/citations?user=M59O9lkAAAAJ;zvaeYnUAAAAJ;POoai-QAAAAJ;UOB0uKgAAAAJ", "orcid": "0000-0002-9502-1749;;;", "linkedin": ";;;", "or_profile": "~Salman_Khan4;~Fahad_Khan1;~Syed_Waqas_Zamir1;~Vaishnav_Rao_Potlapalli1", "aff": "Australian National University;Link\u00f6ping University;Inception Institute of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "anu.edu.au;liu.se;inceptioniai.org;mbzuai.ac.ae", "position": "Lecturer;Associate Professor;Researcher;Researcher", "bibtex": "@inproceedings{\npotlapalli2023promptir,\ntitle={Prompt{IR}: Prompting for All-in-One Image Restoration},\nauthor={Vaishnav Potlapalli and Syed Waqas Zamir and Salman Khan and Fahad Khan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KAlSIL4tXU}\n}", "github": "", "project": "", "reviewers": "3Jtv;4MQM;vXsu;UJGQ;Jp7P", "pdf_size": 36470786, "rating": "4;5;5;5;5", "confidence": "5;4;5;5;5", "soundness": "2;3;3;3;3", "novelty": "2;3;3;2;3", "presentation": "2;3;3;2;3", "wc_summary": "59;35;54;51;71", "wc_strengths": "36;57;27;44;25", "wc_weaknesses": "253;72;8;210;119", "wc_questions": "2;53;182;129;3", "wc_limitations": "2;6;35;33;8", "wc_review": "352;223;306;467;226", "wc_reply_reviewers": "429;0;32;22;0", "wc_reply_authors": "913;0;0;0;0", "reply_reviewers": "2;0;1;1;0", "reply_authors": "4;1;1;1;1", "rating_avg": [ 4.8, 0.39999999999999997 ], "confidence_avg": [ 4.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 54.0, 11.696153213770756 ], "wc_strengths_avg": [ 37.8, 11.754148203932091 ], "wc_weaknesses_avg": [ 132.4, 89.29636050814166 ], "wc_questions_avg": [ 73.8, 71.21067335729947 ], "wc_limitations_avg": [ 16.8, 14.190137420053409 ], "wc_review_avg": [ 314.8, 90.48624204816996 ], "wc_reply_reviewers_avg": [ 96.6, 166.66805332756485 ], "wc_reply_authors_avg": [ 182.6, 365.2 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2500000000000001, "gs_citation": 223, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15705145715302105565&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "anu.edu.au;liu.se;inceptioniai.org;mbzuai.ac.ae", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Australian National University;Link\u00f6ping University;Inception Institute of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.anu.edu.au;https://www.liu.se;https://www.inceptioniai.org;https://mbzuai.ac.ae", "aff_unique_abbr": "ANU;LiU;;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "Australia;Sweden;United Arab Emirates" }, { "title": "LIMA: Less Is More for Alignment", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72022", "id": "KBMOKmX2he", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ac662d74829e4407ce1d126477f4a03a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KBMOKmX2he", "openreview": "https://openreview.net/forum?id=KBMOKmX2he", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72022", "video": "https://nips.cc/virtual/2023/poster/72022", "author_site": "Chunting Zhou, Pengfei Liu, Puxin Xu, Srinivasan Iyer, Jiao Sun, Yuning Mao, Xuezhe Ma, Avia Efrat, Ping Yu, LILI YU, Susan Zhang, Gargi Ghosh, Mike Lewis, Luke Zettlemoyer, Omer Levy", "tldr": "", "abstract": "Large language models are trained in two stages: (1) unsupervised pretraining from raw text, to learn general-purpose representations, and (2) large scale instruction tuning and reinforcement learning, to better align to end tasks and user preferences. \nWe measure the relative importance of these two stages by training LIMA, a 65B parameter LLaMa language model fine-tuned with the standard supervised loss on only 1,000 carefully curated prompts and responses, without any reinforcement learning or human preference modeling.\nLIMA demonstrates remarkably strong performance, learning to follow specific response formats from only a handful of examples in the training data, including complex queries that range from planning trip itineraries to speculating about alternate history.\nMoreover, the model tends to generalize well to unseen tasks that did not appear in the training data.\nIn a controlled human study, responses from LIMA are either equivalent or strictly preferred to GPT-4 in 43\\% of cases; this statistic is as high as 58\\% when compared to Bard and 65\\% versus DaVinci003, which was trained with human feedback.\nTaken together, these results strongly suggest that almost all knowledge in large language models is learned during pretraining, and only limited instruction tuning data is necessary to teach models to produce high quality output.", "keywords": "large language models;supervised instruction fine-tuning;chat assistant", "primary_area": "", "supplementary_material": "/attachment/82b6d96ccc8c33cfc020effe9bc4e7427880c463.pdf", "author": "Chunting Zhou;Pengfei Liu;Puxin Xu;Srini Iyer;Jiao Sun;Yuning Mao;Xuezhe Ma;Avia Efrat;Ping Yu;LILI YU;Susan Zhang;Gargi Ghosh;Mike Lewis;Luke Zettlemoyer;Omer Levy", "authorids": "~Chunting_Zhou1;~Pengfei_Liu1;~Puxin_Xu1;~Srini_Iyer1;~Jiao_Sun1;~Yuning_Mao1;~Xuezhe_Ma1;~Avia_Efrat1;~Ping_Yu2;~LILI_YU1;~Susan_Zhang2;~Gargi_Ghosh3;~Mike_Lewis1;~Luke_Zettlemoyer1;~Omer_Levy1", "gender": "F;M;M;M;;;M;F;F;F;;F;M;M;M", "homepage": "https://violet-zct.github.io/;http://pfliu.com/;https://github.com/jacobyxu;http://sriniiyer.github.io;https://sunjiao123sun.github.io/;https://morningmoni.github.io/;https://xuezhemax.github.io/;;https://yuping1.wixsite.com/mysite;https://scholar.google.com/citations?hl=en&user=wY932-AAAAAJ&view_op=list_works&authuser=1&sortby=pubdate;;https://www.linkedin.com/in/gargi-ghosh-5b1087b;;https://www.cs.washington.edu/people/faculty/lsz/;", "dblp": "161/2679;34/3381-3;345/2002;78/4928.html;;178/3692;127/0230;;;;;;19/6214;21/6793;117/4866", "google_scholar": "mR5W7EgAAAAJ;oIz_CYEAAAAJ;;jNjde2wAAAAJ;;steJe6IAAAAJ;6_MQLIcAAAAJ;4QZmEqsAAAAJ;-V7TJhwAAAAJ;https://scholar.google.com/citations?hl=en;;k5akwCcAAAAJ;SnQnQicAAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;PZVd2h8AAAAJ", "orcid": ";;;;;;;0000-0003-4114-3836;;;;;;;0000-0001-7300-8191", "linkedin": ";;puxin-xu-jacob/;;;morningmoni/;xuezhe-ma-b5354731;;ping-yu-05ba8212b/;lili-yu-6771961a/;suchenzang;gargi-ghosh-5b1087b;;luke-zettlemoyer-a0109b226/;", "or_profile": "~Chunting_Zhou1;~Pengfei_Liu1;~Puxin_Xu1;~Srini_Iyer1;~Jiao_Sun1;~Yuning_Mao1;~Xuezhe_Ma1;~Avia_Efrat1;~Ping_Yu2;~LILI_YU1;~Susan_Zhang2;~Gargi_Ghosh3;~Mike_Lewis1;~Luke_Zettlemoyer1;~Omer_Levy1", "aff": "Meta AI;Carnegie Mellon University;Facebook AI Research;Meta Facebook;University of Southern California;Meta;USC/ISI;Tel Aviv University;Meta Facebook;Meta Facebook;Meta AI;Meta AI;Facebook AI Research;Meta;Tel Aviv University", "aff_domain": "meta.com;cmu.edu;meta.com;meta.com;usc.edu;meta.com;isi.edu;tau.ac.il;fb.com;fb.com;meta.com;meta.com;fb.com;meta.com;tau.ac.il", "position": "Researcher;Postdoc;Research Data Engineer;Principal Researcher;PhD student;Researcher;Assistant Professor;PhD student;Researcher;Researcher;Researcher;Researcher;Research Scientist;Researcher;Senior Lecturer", "bibtex": "@inproceedings{\nzhou2023lima,\ntitle={{LIMA}: Less Is More for Alignment},\nauthor={Chunting Zhou and Pengfei Liu and Puxin Xu and Srini Iyer and Jiao Sun and Yuning Mao and Xuezhe Ma and Avia Efrat and Ping Yu and LILI YU and Susan Zhang and Gargi Ghosh and Mike Lewis and Luke Zettlemoyer and Omer Levy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KBMOKmX2he}\n}", "github": "", "project": "", "reviewers": "fPTm;xtqr;w2T8;s3dY;a3cj", "pdf_size": 427754, "rating": "5;5;6;7;7", "confidence": "4;5;4;3;4", "soundness": "2;4;3;3;3", "novelty": "3;4;3;3;3", "presentation": "4;4;3;2;4", "wc_summary": "67;89;143;187;115", "wc_strengths": "39;119;27;94;103", "wc_weaknesses": "36;259;174;80;376", "wc_questions": "160;52;36;115;79", "wc_limitations": "1;50;35;8;1", "wc_review": "303;569;415;484;674", "wc_reply_reviewers": "9;324;15;9;30", "wc_reply_authors": "0;198;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 120.2, 41.98285364288616 ], "wc_strengths_avg": [ 76.4, 36.52725010180756 ], "wc_weaknesses_avg": [ 185.0, 122.7224510837361 ], "wc_questions_avg": [ 88.4, 44.706151701974974 ], "wc_limitations_avg": [ 19.0, 19.929877069364977 ], "wc_review_avg": [ 489.0, 127.0606154557737 ], "wc_reply_reviewers_avg": [ 77.4, 123.53881980980714 ], "wc_reply_authors_avg": [ 39.6, 79.2 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 1106, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=593239175873479851&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "meta.com;cmu.edu;meta.com;meta.com;usc.edu;meta.com;isi.edu;tau.ac.il;fb.com;fb.com;meta.com;meta.com;fb.com;meta.com;tau.ac.il", "author_num": 15, "aff_unique_index": "0;1;0;0;2;0;2;3;0;0;0;0;0;0;3", "aff_unique_norm": "Meta;Carnegie Mellon University;University of Southern California;Tel Aviv University", "aff_unique_dep": "Meta AI;;;", "aff_unique_url": "https://meta.com;https://www.cmu.edu;https://www.usc.edu;https://www.tau.ac.il", "aff_unique_abbr": "Meta;CMU;USC;TAU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Los Angeles;ISI", "aff_country_unique_index": "0;0;0;0;0;0;0;1;0;0;0;0;0;0;1", "aff_country_unique": "United States;Israel" }, { "title": "Towards Free Data Selection with General-Purpose Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72021", "id": "KBXcDAaZE7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/047682108c3b053c61ad2da5a6057b4e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KBXcDAaZE7", "openreview": "https://openreview.net/forum?id=KBXcDAaZE7", "poster": "/media/PosterPDFs/NeurIPS%202023/72021.png?t=1698462372.381346", "slides": "https://nips.cc/virtual/2023/poster/72021", "video": "https://nips.cc/virtual/2023/poster/72021", "author_site": "Yichen Xie, Mingyu Ding, Masayoshi TOMIZUKA, Wei Zhan", "tldr": "", "abstract": "A desirable data selection algorithm can efficiently choose the most informative samples to maximize the utility of limited annotation budgets. However, current approaches, represented by active learning methods, typically follow a cumbersome pipeline that iterates the time-consuming model training and batch data selection repeatedly. In this paper, we challenge this status quo by designing a distinct data selection pipeline that utilizes existing general-purpose models to select data from various datasets with a single-pass inference without the need for additional training or supervision. A novel free data selection (FreeSel) method is proposed following this new pipeline. Specifically, we define semantic patterns extracted from inter-mediate features of the general-purpose model to capture subtle local information in each image. We then enable the selection of all data samples in a single pass through distance-based sampling at the fine-grained semantic pattern level. FreeSel bypasses the heavy batch selection process, achieving a significant improvement in efficiency and being 530x faster than existing active learning methods. Extensive experiments verify the effectiveness of FreeSel on various computer vision tasks.", "keywords": "data selection;unsupervised learning", "primary_area": "", "supplementary_material": "/attachment/5b185b55b78e6d7a5cd8ba9095cb8c2757c0a4a3.pdf", "author": "Yichen Xie;Mingyu Ding;Masayoshi Tomizuka;Wei Zhan", "authorids": "~Yichen_Xie1;~Mingyu_Ding1;~Masayoshi_Tomizuka1;~Wei_Zhan2", "gender": "M;M;M;", "homepage": ";https://dingmyu.github.io/;https://me.berkeley.edu/people/masayoshi-tomizuka/;", "dblp": ";188/5243;10/4434;", "google_scholar": "SdX6DaEAAAAJ;w4yTWwoAAAAJ;;", "orcid": ";0000-0001-6556-8359;;", "linkedin": ";dingmyu/;;", "or_profile": "~Yichen_Xie1;~Mingyu_Ding1;~Masayoshi_Tomizuka1;~Wei_Zhan2", "aff": "GM Cruise LLC;University of California, Berkeley;University of California, Berkeley;", "aff_domain": "getcruise.com;berkeley.edu;berkeley.edu;", "position": "Intern;Postdoc;Full Professor;", "bibtex": "@inproceedings{\nxie2023towards,\ntitle={Towards Free Data Selection with General-Purpose Models},\nauthor={Yichen Xie and Mingyu Ding and Masayoshi Tomizuka and Wei Zhan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KBXcDAaZE7}\n}", "github": "", "project": "", "reviewers": "yLeE;xhjx;7NQc;SbPr", "pdf_size": 2387578, "rating": "3;3;6;6", "confidence": "4;5;4;5", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "62;94;173;101", "wc_strengths": "25;49;86;178", "wc_weaknesses": "93;198;324;126", "wc_questions": "14;61;76;10", "wc_limitations": "1;24;6;57", "wc_review": "195;426;665;472", "wc_reply_reviewers": "18;94;167;0", "wc_reply_authors": "51;97;45;51", "reply_reviewers": "1;2;1;0", "reply_authors": "2;2;2;2", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 107.5, 40.574006457336694 ], "wc_strengths_avg": [ 84.5, 58.19149422381247 ], "wc_weaknesses_avg": [ 185.25, 88.64923857541022 ], "wc_questions_avg": [ 40.25, 28.77824699317177 ], "wc_limitations_avg": [ 22.0, 21.94310825749169 ], "wc_review_avg": [ 439.5, 167.23411733255867 ], "wc_reply_reviewers_avg": [ 69.75, 66.31129240182248 ], "wc_reply_authors_avg": [ 61.0, 20.92844953645635 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15399517658826684343&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "getcruise.com;berkeley.edu;berkeley.edu;", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "GM Cruise LLC;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.gmcruise.com;https://www.berkeley.edu", "aff_unique_abbr": "GM Cruise;UC Berkeley", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The probability flow ODE is provably fast", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72020", "id": "KD6MFeWSAd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d84a27ff694345aacc21c72097a69ea2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KD6MFeWSAd", "openreview": "https://openreview.net/forum?id=KD6MFeWSAd", "poster": "/media/PosterPDFs/NeurIPS%202023/72020.png?t=1700845184.048251", "slides": "https://nips.cc/virtual/2023/poster/72020", "video": "https://nips.cc/virtual/2023/poster/72020", "author_site": "Sitan Chen, Sinho Chewi, Holden Lee, Yuanzhi Li, Jianfeng Lu, Adil Salim", "tldr": "", "abstract": "We provide the first polynomial-time convergence guarantees for the probabilistic flow ODE implementation (together with a corrector step) of score-based generative modeling. Our analysis is carried out in the wake of recent results obtaining such guarantees for the SDE-based implementation (i.e., denoising diffusion probabilistic modeling or DDPM), but requires the development of novel techniques for studying deterministic dynamics without contractivity. Through the use of a specially chosen corrector step based on the underdamped Langevin diffusion, we obtain better dimension dependence than prior works on DDPM ($O(\\sqrt d)$ vs. $O(d)$, assuming smoothness of the data distribution), highlighting potential advantages of the ODE framework.", "keywords": "DDIM;deterministic samplers;diffusion models;predictor-corrector;probability flow ODE;score-based generative modeling", "primary_area": "", "supplementary_material": "/attachment/6beba36b6609ca846fc97843b0df7e1aa9a15d43.zip", "author": "Sitan Chen;Sinho Chewi;Holden Lee;Yuanzhi Li;Jianfeng Lu;Adil Salim", "authorids": "~Sitan_Chen1;~Sinho_Chewi1;~Holden_Lee1;~Yuanzhi_Li1;~Jianfeng_Lu1;~Adil_Salim2", "gender": "M;M;M;M;M;M", "homepage": "https://sitanchen.com;https://chewisinho.github.io/;http://holdenlee.github.io;;https://services.math.duke.edu/~jianfeng/;https://adil-salim.github.io", "dblp": "141/7670;200/8964;150/3407;73/3628;82/6187-1.html;192/3273", "google_scholar": "YnJVsp4AAAAJ;u_fAQO4AAAAJ;hR9rFHgAAAAJ;;ej9SRrAAAAAJ;", "orcid": ";0000-0003-2701-0703;;;0000-0001-6255-5165;", "linkedin": ";chewisinho/;;;;", "or_profile": "~Sitan_Chen1;~Sinho_Chewi1;~Holden_Lee1;~Yuanzhi_Li1;~Jianfeng_Lu1;~Adil_Salim2", "aff": "University of California, Berkeley;Massachusetts Institute of Technology;Johns Hopkins University;Carnegie Mellon University;Duke University;Microsoft", "aff_domain": "berkeley.edu;mit.edu;jh.edu;andrew.cmu.edu;duke.edu;microsoft.com", "position": "Postdoc;PhD student;Assistant Professor;Assistant Professor;Professor;Researcher", "bibtex": "@inproceedings{\nchen2023the,\ntitle={The probability flow {ODE} is provably fast},\nauthor={Sitan Chen and Sinho Chewi and Holden Lee and Yuanzhi Li and Jianfeng Lu and Adil Salim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KD6MFeWSAd}\n}", "github": "", "project": "", "reviewers": "L7v8;bw63;rLPm;Q98a;tQnV", "pdf_size": 1074119, "rating": "4;5;5;6;8", "confidence": "4;3;3;4;3", "soundness": "2;3;2;3;4", "novelty": "2;3;3;3;3", "presentation": "3;3;2;3;4", "wc_summary": "38;52;203;38;136", "wc_strengths": "12;98;24;37;182", "wc_weaknesses": "79;188;360;148;212", "wc_questions": "4;38;28;146;192", "wc_limitations": "6;43;295;1;30", "wc_review": "139;419;910;370;752", "wc_reply_reviewers": "57;389;0;167;143", "wc_reply_authors": "224;566;101;414;102", "reply_reviewers": "1;2;0;2;1", "reply_authors": "3;5;2;3;2", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 93.4, 65.84709560793095 ], "wc_strengths_avg": [ 70.6, 63.07963221199058 ], "wc_weaknesses_avg": [ 197.4, 92.9507396420276 ], "wc_questions_avg": [ 81.6, 73.66301650081945 ], "wc_limitations_avg": [ 75.0, 111.07294900199598 ], "wc_review_avg": [ 518.0, 277.04367886670866 ], "wc_reply_reviewers_avg": [ 151.2, 133.1170913143763 ], "wc_reply_authors_avg": [ 281.4, 182.53942040008783 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 3.0, 1.0954451150103321 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3611575592573077, "gs_citation": 114, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12971302857963068754&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "berkeley.edu;mit.edu;jh.edu;andrew.cmu.edu;duke.edu;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "University of California, Berkeley;Massachusetts Institute of Technology;Johns Hopkins University;Carnegie Mellon University;Duke University;Microsoft", "aff_unique_dep": ";;;;;Microsoft Corporation", "aff_unique_url": "https://www.berkeley.edu;https://web.mit.edu;https://www.jhu.edu;https://www.cmu.edu;https://www.duke.edu;https://www.microsoft.com", "aff_unique_abbr": "UC Berkeley;MIT;JHU;CMU;Duke;Microsoft", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "On the Generalization Error of Stochastic Mirror Descent for Quadratically-Bounded Losses: an Improved Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72019", "id": "KF4LCXz8Np", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/daca83eba0a30a5ff2a3b9c53ff5a976-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KF4LCXz8Np", "openreview": "https://openreview.net/forum?id=KF4LCXz8Np", "poster": "/media/PosterPDFs/NeurIPS%202023/72019.png?t=1702330908.4937172", "slides": "https://nips.cc/virtual/2023/poster/72019", "video": "https://nips.cc/virtual/2023/poster/72019", "author_site": "Ta Duy Nguyen, Alina Ene, Huy Nguyen", "tldr": "", "abstract": "In this work, we revisit the generalization error of stochastic mirror descent for quadratically bounded losses studied in Telgarsky (2022). Quadratically bounded losses is a broad class of loss functions, capturing both Lipschitz and smooth functions, for both regression and classification problems. We study the high probability generalization for this class of losses on linear predictors in both realizable and non-realizable cases when the data are sampled IID or from a Markov chain. The prior work relies on an intricate coupling argument between the iterates of the original problem and those projected onto a bounded domain. This approach enables blackbox application of concentration inequalities, but also leads to suboptimal guarantees due in part to the use of a union bound across all iterations. In this work, we depart significantly from the prior work of Telgarsky (2022), and introduce a novel approach for establishing high probability generalization guarantees. In contrast to the prior work, our work directly analyzes the moment generating function of a novel supermartingale sequence and leverages the structure of stochastic mirror descent. As a result, we obtain improved bounds in all aforementioned settings. Specifically, in the realizable case and non-realizable case with light-tailed sub-Gaussian data, we improve the bounds by a $\\log T$ factor, matching the correct rates of $1/T$ and $1/\\sqrt{T}$, respectively. In the more challenging case of heavy-tailed polynomial data, we improve the existing bound by a $\\mathrm{poly}\\ T$ factor.", "keywords": "high probability;generalization;convex optimization;nonconvex optimization", "primary_area": "", "supplementary_material": "/attachment/ac437d40bcfa8d31ca4e8faf35d17c893ee2fa88.pdf", "author": "Ta Duy Nguyen;Alina Ene;Huy Nguyen", "authorids": "~Ta_Duy_Nguyen1;~Alina_Ene1;~Huy_Nguyen1", "gender": ";;M", "homepage": "https://nguyentaduy.github.io/;;https://www.khoury.northeastern.edu/~hlnguyen/", "dblp": ";;62/3796", "google_scholar": ";;https://scholar.google.com.tw/citations?user=MDCu0WEAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ta_Duy_Nguyen1;~Alina_Ene1;~Huy_Nguyen1", "aff": "Boston University;;Northeastern University", "aff_domain": "bu.edu;;northeastern.edu", "position": "PhD student;;Associate Professor", "bibtex": "@inproceedings{\nnguyen2023on,\ntitle={On the Generalization Error of Stochastic Mirror Descent for Quadratically-Bounded Losses: an Improved Analysis},\nauthor={Ta Duy Nguyen and Alina Ene and Huy Nguyen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KF4LCXz8Np}\n}", "github": "", "project": "", "reviewers": "H14F;sY4Q;TJAZ;p99H", "pdf_size": 345367, "rating": "5;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "75;78;63;79", "wc_strengths": "75;91;57;146", "wc_weaknesses": "1;94;87;223", "wc_questions": "36;54;2;34", "wc_limitations": "1;18;1;3", "wc_review": "188;335;210;485", "wc_reply_reviewers": "0;204;34;18", "wc_reply_authors": "77;415;0;0", "reply_reviewers": "0;3;1;1", "reply_authors": "2;4;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.75, 6.378675411086537 ], "wc_strengths_avg": [ 92.25, 33.281939546847326 ], "wc_weaknesses_avg": [ 101.25, 79.26025170285544 ], "wc_questions_avg": [ 31.5, 18.728320800328042 ], "wc_limitations_avg": [ 5.75, 7.119515432949071 ], "wc_review_avg": [ 304.5, 118.33532862167579 ], "wc_reply_reviewers_avg": [ 64.0, 81.71903083125741 ], "wc_reply_authors_avg": [ 123.0, 171.49198231987407 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eRL_xeICrucJ:scholar.google.com/&scioq=On+the+Generalization+Error+of+Stochastic+Mirror+Descent+for+Quadratically-Bounded+Losses:+an+Improved+Analysis&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "bu.edu;;northeastern.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Boston University;Northeastern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.bu.edu;https://www.northeastern.edu", "aff_unique_abbr": "BU;NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Multi-Step Generalized Policy Improvement by Leveraging Approximate Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72018", "id": "KFj0Q1EXvU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/77c7faab15002432ba1151e8d5cc389a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KFj0Q1EXvU", "openreview": "https://openreview.net/forum?id=KFj0Q1EXvU", "poster": "/media/PosterPDFs/NeurIPS%202023/72018.png?t=1699156402.4181478", "slides": "https://nips.cc/virtual/2023/poster/72018", "video": "https://nips.cc/virtual/2023/poster/72018", "author_site": "Lucas N. Alegre, Ana Bazzan, Ann Nowe, Bruno C. da Silva", "tldr": "", "abstract": "We introduce a principled method for performing zero-shot transfer in reinforcement learning (RL) by exploiting approximate models of the environment. Zero-shot transfer in RL has been investigated by leveraging methods rooted in generalized policy improvement (GPI) and successor features (SFs). Although computationally efficient, these methods are model-free: they analyze a library of policies---each solving a particular task---and identify which action the agent should take. We investigate the more general setting where, in addition to a library of policies, the agent has access to an approximate environment model. Even though model-based RL algorithms can identify near-optimal policies, they are typically computationally intensive. We introduce $h$-GPI, a multi-step extension of GPI that interpolates between these extremes---standard model-free GPI and fully model-based planning---as a function of a parameter, $h$, regulating the amount of time the agent has to reason. We prove that $h$-GPI's performance lower bound is strictly better than GPI's, and show that $h$-GPI generally outperforms GPI as $h$ increases. Furthermore, we prove that as $h$ increases, $h$-GPI's performance becomes arbitrarily less susceptible to sub-optimality in the agent's policy library. Finally, we introduce novel bounds characterizing the gains achievable by $h$-GPI as a function of approximation errors in both the agent's policy library and its (possibly learned) model. These bounds strictly generalize those known in the literature. We evaluate $h$-GPI on challenging tabular and continuous-state problems under value function approximation and show that it consistently outperforms GPI and state-of-the-art competing methods under various levels of approximation errors.", "keywords": "generalized policy improvement;successor features;transfer learning;model-based reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/6a4415c8e8fd715dc441d0b009f05ceebf64a8e6.zip", "author": "Lucas Nunes Alegre;Ana L. C. Bazzan;Ann Nowe;Bruno Castro da Silva", "authorids": "~Lucas_Nunes_Alegre1;~Ana_L._C._Bazzan2;~Ann_Nowe1;~Bruno_Castro_da_Silva1", "gender": "M;;F;M", "homepage": "http://www.inf.ufrgs.br/~lnalegre;http://www.inf.ufrgs.br/~bazzan;https://ai.vub.ac.be/team/ann-nowe/?utm_source=www.google.com&utm_medium=organic&utm_campaign=Google&referrer-analytics=1;https://people.cs.umass.edu/~bsilva/", "dblp": "250/5118;b/AnaLCBazzan;95/232.html;75/3139", "google_scholar": "https://scholar.google.com/citations?hl=en;K6Z40w0AAAAJ;https://scholar.google.be/citations?user=LH5QKbgAAAAJ;eskJDVUAAAAJ", "orcid": "0000-0001-5465-4390;0000-0002-2803-9607;;", "linkedin": "lucas-alegre-b80628127;;;", "or_profile": "~Lucas_Nunes_Alegre1;~Ana_L._C._Bazzan2;~Ann_Nowe1;~Bruno_Castro_da_Silva1", "aff": "Vrije Universiteit Brussel;UFRGS;Vrije Universiteit Brussel;University of Massachusetts, Amherst", "aff_domain": "vub.be;inf.ufrgs.br;vub.be;umass.edu", "position": "PhD student;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nalegre2023multistep,\ntitle={Multi-Step Generalized Policy Improvement by Leveraging Approximate Models},\nauthor={Lucas Nunes Alegre and Ana L. C. Bazzan and Ann Nowe and Bruno Castro da Silva},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KFj0Q1EXvU}\n}", "github": "", "project": "", "reviewers": "Y3y3;CAn3;MMCf;obSw", "pdf_size": 1565290, "rating": "5;6;6;7", "confidence": "4;3;2;4", "soundness": "3;3;3;3", "novelty": "2;2;3;4", "presentation": "3;3;2;4", "wc_summary": "29;49;48;105", "wc_strengths": "39;58;41;95", "wc_weaknesses": "159;292;105;113", "wc_questions": "4;79;92;157", "wc_limitations": "4;14;21;34", "wc_review": "235;492;307;504", "wc_reply_reviewers": "15;86;107;22", "wc_reply_authors": "0;0;339;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 57.75, 28.41984341969533 ], "wc_strengths_avg": [ 58.25, 22.465250944514285 ], "wc_weaknesses_avg": [ 167.25, 74.91453463781244 ], "wc_questions_avg": [ 83.0, 54.34611301647985 ], "wc_limitations_avg": [ 18.25, 10.917302780449024 ], "wc_review_avg": [ 384.5, 116.39695013186557 ], "wc_reply_reviewers_avg": [ 57.5, 39.777506206397604 ], "wc_reply_authors_avg": [ 84.75, 146.79130594146235 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10773159877128998949&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "vub.be;inf.ufrgs.br;vub.be;umass.edu", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Vrije Universiteit Brussel;Universidade Federal do Rio Grande do Sul;University of Massachusetts Amherst", "aff_unique_dep": ";;", "aff_unique_url": "https://www.vub.be;https://www.ufrgs.br;https://www.umass.edu", "aff_unique_abbr": "VUB;UFRGS;UMass Amherst", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Brussels;;Amherst", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "Belgium;Brazil;United States" }, { "title": "MeCo: Zero-Shot NAS with One Data and Single Forward Pass via Minimum Eigenvalue of Correlation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72017", "id": "KFm2lZiI7n", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bfa815ac6f08f4ada34fe22be054f2b9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KFm2lZiI7n", "openreview": "https://openreview.net/forum?id=KFm2lZiI7n", "poster": "/media/PosterPDFs/NeurIPS%202023/72017.png?t=1701509109.381477", "slides": "https://nips.cc/virtual/2023/poster/72017", "video": "https://nips.cc/virtual/2023/poster/72017", "author_site": "Tangyu Jiang, Haodi Wang, Rongfang Bie", "tldr": "", "abstract": "Neural Architecture Search (NAS) is a promising paradigm in automatic architecture engineering. Zero-shot NAS can evaluate the network without training via some specific metrics called zero-cost proxies. Though effective, the existing zero-cost proxies either invoke at least one backpropagation or depend highly on the data and labels. To alleviate the above issues, in this paper, we first reveal how the Pearson correlation matrix of the feature maps impacts the convergence rate and the generalization capacity of an over-parameterized neural network. Enlightened by the theoretical analysis, we propose a novel zero-cost proxy called $\\mathsf{MeCo}$, which requires only one random data for a single forward pass. We further propose an optimization approach $\\mathsf{MeCo_{opt}}$ to improve the performance of our method. We design comprehensive experiments and extensively evaluate $\\mathsf{MeCo}$ on multiple popular benchmarks. $\\mathsf{MeCo}$ achieves the highest correlation with the ground truth (e.g., 0.89 on NATS-Bench-TSS with CIFAR-10) among all the state-of-the-art proxies, which is also fully independent of the data and labels. Moreover, we integrate $\\mathsf{MeCo}$ with the existing generation method to comprise a complete NAS. The experimental results illustrate that $\\mathsf{MeCo}$-based NAS can select the architecture with the highest accuracy and a low search cost. For instance, the best network searched by $\\mathsf{MeCo}$-based NAS achieves 97.31% on CIFAR-10, which is 0.04% higher than the baselines under the same settings. Our code is available at https://github.com/HamsterMimi/MeCo", "keywords": "Neural Architecture Search;Zero-Cost Proxy;Evaluation Strategy;Feature Map", "primary_area": "", "supplementary_material": "/attachment/b4b4d5fa909f7fa62e5e8eb9b2753f3029992e14.pdf", "author": "Tangyu Jiang;Haodi Wang;Rongfang Bie", "authorids": "~Tangyu_Jiang1;whd@mail.bnu.edu.cn;rfbie@bnu.edu.cn", "gender": "M;;", "homepage": "https://github.com/HamsterMimi;;", "dblp": "369/7675;;", "google_scholar": ";;", "orcid": "0000-0003-2578-0595;;", "linkedin": ";;", "or_profile": "~Tangyu_Jiang1;whd@mail.bnu.edu.cn;rfbie@bnu.edu.cn", "aff": "Beijing Normal University;;", "aff_domain": "bnu.edu.cn;;", "position": "PhD student;;", "bibtex": "@inproceedings{\njiang2023meco,\ntitle={MeCo: Zero-Shot {NAS} with One Data and Single Forward Pass via Minimum Eigenvalue of Correlation},\nauthor={Tangyu Jiang and Haodi Wang and Rongfang Bie},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KFm2lZiI7n}\n}", "github": "", "project": "", "reviewers": "1Ted;5B3N;gqC1;4Zmk;BGPn", "pdf_size": 4163999, "rating": "5;6;6;7;7", "confidence": "4;4;5;5;4", "soundness": "3;3;3;2;4", "novelty": "3;3;3;2;3", "presentation": "3;3;2;3;3", "wc_summary": "53;126;50;91;63", "wc_strengths": "36;69;22;51;47", "wc_weaknesses": "58;141;111;1186;315", "wc_questions": "166;60;105;3;109", "wc_limitations": "6;101;24;3;22", "wc_review": "319;497;312;1334;556", "wc_reply_reviewers": "16;23;85;1028;0", "wc_reply_authors": "57;26;391;1459;0", "reply_reviewers": "1;1;2;3;0", "reply_authors": "3;2;4;5;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 76.6, 28.62586243242289 ], "wc_strengths_avg": [ 45.0, 15.658863304850707 ], "wc_weaknesses_avg": [ 362.2, 420.8189159246528 ], "wc_questions_avg": [ 88.6, 54.4448344657232 ], "wc_limitations_avg": [ 31.2, 35.88537306480176 ], "wc_review_avg": [ 603.6, 377.6615415951166 ], "wc_reply_reviewers_avg": [ 230.4, 399.8432692943574 ], "wc_reply_authors_avg": [ 386.6, 554.6510975379027 ], "reply_reviewers_avg": [ 1.4, 1.019803902718557 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.32732683535398843, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17416607415815470529&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "bnu.edu.cn;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Beijing Normal University", "aff_unique_dep": "", "aff_unique_url": "https://www.bnu.edu.cn", "aff_unique_abbr": "BNU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "SEGA: Instructing Text-to-Image Models using Semantic Guidance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72016", "id": "KIPAIy329j", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4ff83037e8d97b2171b2d3e96cb8e677-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KIPAIy329j", "openreview": "https://openreview.net/forum?id=KIPAIy329j", "poster": "/media/PosterPDFs/NeurIPS%202023/72016.png?t=1700229376.9499166", "slides": "https://nips.cc/virtual/2023/poster/72016", "video": "https://nips.cc/virtual/2023/poster/72016", "author_site": "Manuel Brack, Felix Friedrich, Dominik Hintersdorf, Lukas Struppek, Patrick Schramowski, Kristian Kersting", "tldr": "", "abstract": "Text-to-image diffusion models have recently received a lot of interest for their astonishing ability to produce high-fidelity images from text only. However, achieving one-shot generation that aligns with the user\u2019s intent is nearly impossible, yet small changes to the input prompt often result in very different images. This leaves the user with little semantic control. To put the user in control, we show how to interact with the diffusion process to flexibly steer it along semantic directions. This semantic guidance (SEGA) generalizes to any generative architecture using classifier-free guidance. More importantly, it allows for subtle and extensive edits, composition and style changes, and optimizing the overall artistic conception. We demonstrate SEGA\u2019s effectiveness on both latent and pixel-based diffusion models such as Stable Diffusion, Paella, and DeepFloyd-IF using a variety of tasks, thus providing strong evidence for its versatility and flexibility.", "keywords": "diffusion;text-to-image;generation;semantics", "primary_area": "", "supplementary_material": "", "author": "Manuel Brack;Felix Friedrich;Dominik Hintersdorf;Lukas Struppek;Patrick Schramowski;Kristian Kersting", "authorids": "~Manuel_Brack1;~Felix_Friedrich1;~Dominik_Hintersdorf1;~Lukas_Struppek1;~Patrick_Schramowski1;~Kristian_Kersting1", "gender": "M;;M;M;M;M", "homepage": ";https://ml-research.github.io/people/ffriedrich/;https://d0mih.github.io/;https://lukasstruppek.github.io/;https://ml-research.github.io/people/pschramowski/index.html;http://www.ml.informatik.tu-darmstadt.de/", "dblp": "326/8265;18/4626;306/1325;306/1485;217/1650;40/3793", "google_scholar": "kJ9Abf8AAAAJ;RfM9ud0AAAAJ;DKITUfsAAAAJ;tU8K5qsAAAAJ;GD481RkAAAAJ;QY-earAAAAAJ", "orcid": ";0000-0001-8387-793X;0000-0003-4976-6894;0000-0003-0626-3672;0000-0003-1231-7120;0000-0002-2873-9152", "linkedin": ";;;lukas-struppek/;;", "or_profile": "~Manuel_Brack1;~Felix_Friedrich1;~Dominik_Hintersdorf1;~Lukas_Struppek1;~Patrick_Schramowski1;~Kristian_Kersting1", "aff": "Adobe Systems;TU Darmstadt;CS Department, TU Darmstadt, Technische Universit\u00e4t Darmstadt;Technische Universit\u00e4t Darmstadt;TU Darmstadt;TU Darmstadt", "aff_domain": "adobe.com;tu-darmstadt.de;cs.tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de", "position": "Intern;PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nbrack2023sega,\ntitle={{SEGA}: Instructing Text-to-Image Models using Semantic Guidance},\nauthor={Manuel Brack and Felix Friedrich and Dominik Hintersdorf and Lukas Struppek and Patrick Schramowski and Kristian Kersting},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KIPAIy329j}\n}", "github": "", "project": "", "reviewers": "Lm7m;2nZZ;ZrN7;i1VN;q8hy", "pdf_size": 0, "rating": "3;5;6;6;7", "confidence": "5;5;5;3;4", "soundness": "2;3;4;3;3", "novelty": "2;3;3;4;3", "presentation": "3;3;3;3;4", "wc_summary": "76;64;71;45;145", "wc_strengths": "49;39;34;28;26", "wc_weaknesses": "109;196;113;96;49", "wc_questions": "11;2;58;29;5", "wc_limitations": "12;2;5;9;1", "wc_review": "257;303;281;207;226", "wc_reply_reviewers": "0;0;0;70;8", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 4.4, 0.8 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 80.2, 34.06699282296575 ], "wc_strengths_avg": [ 35.2, 8.28009661779378 ], "wc_weaknesses_avg": [ 112.6, 47.51673389449237 ], "wc_questions_avg": [ 21.0, 20.73644135332772 ], "wc_limitations_avg": [ 5.8, 4.166533331199932 ], "wc_review_avg": [ 254.8, 35.0108554594143 ], "wc_reply_reviewers_avg": [ 15.6, 27.375901811629877 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.51604684654214, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18084161850289417848&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "adobe.com;tu-darmstadt.de;cs.tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Adobe;Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "Adobe Systems Incorporated;", "aff_unique_url": "https://www.adobe.com;https://www.tu-darmstadt.de", "aff_unique_abbr": "Adobe;TU Darmstadt", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "United States;Germany" }, { "title": "SE(3) Equivariant Augmented Coupling Flows", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72015", "id": "KKxO6wwx8p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fa55eb802a531c8087e225ecf2dcfbca-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KKxO6wwx8p", "openreview": "https://openreview.net/forum?id=KKxO6wwx8p", "poster": "/media/PosterPDFs/NeurIPS%202023/72015.png?t=1701268685.7829988", "slides": "https://nips.cc/virtual/2023/poster/72015", "video": "https://nips.cc/virtual/2023/poster/72015", "author_site": "Laurence Midgley, Vincent Stimper, Javier Antor\u00e1n, Emile Mathieu, Bernhard Sch\u00f6lkopf, Jos\u00e9 Miguel Hern\u00e1ndez-Lobato", "tldr": "", "abstract": "Coupling normalizing flows allow for fast sampling and density evaluation, making them the tool of choice for probabilistic modeling of physical systems. \nHowever, the standard coupling architecture precludes endowing flows that operate on the Cartesian coordinates of atoms with the SE(3) and permutation invariances of physical systems. \nThis work proposes a coupling flow that preserves SE(3) and permutation equivariance by performing coordinate splits along additional augmented dimensions. At each layer, the flow maps atoms' positions into learned SE(3) invariant bases, where we apply standard flow transformations, such as monotonic rational-quadratic splines, before returning to the original basis.\nCrucially, our flow preserves fast sampling and density evaluation, and may be used to produce unbiased estimates of expectations with respect to the target distribution via importance sampling.\nWhen trained on the DW4, LJ13, and QM9-positional datasets, our flow is competitive with equivariant continuous normalizing flows and diffusion models, while allowing sampling more than an order of magnitude faster.\nMoreover, to the best of our knowledge, we are the first to learn the full Boltzmann distribution of alanine dipeptide by only modeling the Cartesian positions of its atoms.\nLastly, we demonstrate that our flow can be trained to approximately sample from the Boltzmann distribution of the DW4 and LJ13 particle systems using only their energy functions.", "keywords": "Boltzmann generator;normalizing flow;diffusion;molecular dynamics", "primary_area": "", "supplementary_material": "", "author": "Laurence Illing Midgley;Vincent Stimper;Javier Antoran;Emile Mathieu;Bernhard Sch\u00f6lkopf;Jos\u00e9 Miguel Hern\u00e1ndez-Lobato", "authorids": "~Laurence_Illing_Midgley1;~Vincent_Stimper1;~Javier_Antoran1;~Emile_Mathieu1;~Bernhard_Sch\u00f6lkopf1;~Jos\u00e9_Miguel_Hern\u00e1ndez-Lobato1", "gender": "M;M;Unspecified;M;;", "homepage": "https://lollcat.github.io/laurence-midgley;https://is.mpg.de/person/vstimper;https://javierantoran.github.io/about/;http://emilemathieu.fr;;", "dblp": "275/3152;253/8559;234/8818.html;223/6084.html;;", "google_scholar": "vMd1gwMAAAAJ;https://scholar.google.de/citations?user=vIiExQQAAAAJ;_b-Cs2cAAAAJ;g9BjTqgAAAAJ;;", "orcid": "0000-0002-3147-6066;0000-0002-4965-4297;0000-0003-2877-2689;;;", "linkedin": "laurencemidgley/;vincent-stimper-502a9412a/;javier-antoran/;;;", "or_profile": "~Laurence_Illing_Midgley1;~Vincent_Stimper1;~Javier_Antoran1;~Emile_Mathieu1;~Bernhard_Sch\u00f6lkopf1;~Jos\u00e9_Miguel_Hern\u00e1ndez-Lobato1", "aff": "University of Cambridge;University of Cambridge;University of Cambridge;University of Cambridge;;", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk;;", "position": "PhD student;PhD student;PhD student;Postdoc;;", "bibtex": "@inproceedings{\nmidgley2023se,\ntitle={{SE}(3) Equivariant Augmented Coupling Flows},\nauthor={Laurence Illing Midgley and Vincent Stimper and Javier Antoran and Emile Mathieu and Bernhard Sch{\\\"o}lkopf and Jos{\\'e} Miguel Hern{\\'a}ndez-Lobato},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KKxO6wwx8p}\n}", "github": "", "project": "", "reviewers": "goAi;Xvtr;JYCH;2cbB;mxfv", "pdf_size": 2153652, "rating": "5;6;6;7;7", "confidence": "3;3;4;3;2", "soundness": "3;4;3;3;3", "novelty": "2;3;2;2;3", "presentation": "1;4;3;4;2", "wc_summary": "35;192;281;68;226", "wc_strengths": "48;33;87;55;52", "wc_weaknesses": "274;29;333;123;32", "wc_questions": "6;64;139;59;1", "wc_limitations": "1;16;1;1;12", "wc_review": "364;334;841;306;323", "wc_reply_reviewers": "59;212;211;0;0", "wc_reply_authors": "526;298;357;0;0", "reply_reviewers": "2;2;1;0;0", "reply_authors": "3;2;2;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 1.16619037896906 ], "wc_summary_avg": [ 160.4, 93.92465065146637 ], "wc_strengths_avg": [ 55.0, 17.69745744450315 ], "wc_weaknesses_avg": [ 158.2, 124.75800575514182 ], "wc_questions_avg": [ 53.8, 49.92554456388033 ], "wc_limitations_avg": [ 6.2, 6.493073232299171 ], "wc_review_avg": [ 433.6, 204.57428968470109 ], "wc_reply_reviewers_avg": [ 96.4, 96.41701094723898 ], "wc_reply_authors_avg": [ 236.2, 206.87039420854788 ], "reply_reviewers_avg": [ 1.0, 0.8944271909999159 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4620278861999064339&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk;;", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "First Order Methods with Markovian Noise: from Acceleration to Variational Inequalities", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72014", "id": "KMeFZopsqP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8c3e38ce55a0fa44bc325bc6fdb7f4e5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KMeFZopsqP", "openreview": "https://openreview.net/forum?id=KMeFZopsqP", "poster": "/media/PosterPDFs/NeurIPS%202023/72014.png?t=1703170195.936126", "slides": "https://nips.cc/virtual/2023/poster/72014", "video": "https://nips.cc/virtual/2023/poster/72014", "author_site": "Aleksandr Beznosikov, Sergey Samsonov, Marina Sheshukova, Alexander Gasnikov, Alexey Naumov, Eric Moulines", "tldr": "", "abstract": "This paper delves into stochastic optimization problems that involve Markovian noise. We present a unified approach for the theoretical analysis of first-order gradient methods for stochastic optimization and variational inequalities. Our approach covers scenarios for both non-convex and strongly convex minimization problems. To achieve an optimal (linear) dependence on the mixing time of the underlying noise sequence, we use the randomized batching scheme, which is based on the multilevel Monte Carlo method. Moreover, our technique allows us to eliminate the limiting assumptions of previous research on Markov noise, such as the need for a bounded domain and uniformly bounded stochastic gradients. Our extension to variational inequalities under Markovian noise is original. Additionally, we provide lower bounds that match the oracle complexity of our method in the case of strongly convex optimization problems.", "keywords": "convex optimization;stochastic optimization;Markovian noise;acceleration;variational inequalities;lower bounds", "primary_area": "", "supplementary_material": "/attachment/28eb08f2a717ee53c429d32326ed868af8648db0.pdf", "author": "Aleksandr Beznosikov;Sergey Samsonov;Marina Sheshukova;Alexander Gasnikov;Alexey Naumov;Eric Moulines", "authorids": "~Aleksandr_Beznosikov1;~Sergey_Samsonov1;~Marina_Sheshukova1;~Alexander_Gasnikov1;~Alexey_Naumov1;~Eric_Moulines1", "gender": ";M;F;;M;M", "homepage": ";https://www.hse.ru/org/persons/219484540;https://www.hse.ru/org/persons/305137706;;https://www.hse.ru/en/staff/anaumov;", "dblp": ";23/8962;348/5731.html;;196/2848;54/2358", "google_scholar": ";https://scholar.google.ru/citations?user=8BwDmyMAAAAJ;;;5723KoYAAAAJ;https://scholar.google.fr/citations?user=_XE1LvQAAAAJ", "orcid": ";;;;;0000-0002-2058-0693", "linkedin": ";;;;;", "or_profile": "~Aleksandr_Beznosikov1;~Sergey_Samsonov1;~Marina_Sheshukova1;~Alexander_Gasnikov1;~Alexey_Naumov1;~Eric_Moulines1", "aff": ";Higher School of Economics;Higher School of Economics;;Higher School of Economics;Ecole polytechnique", "aff_domain": ";hse.ru;hse.ru;;hse.ru;polytechnique.edu", "position": ";PhD student;Undergrad student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nbeznosikov2023first,\ntitle={First Order Methods with Markovian Noise: from Acceleration to Variational Inequalities},\nauthor={Aleksandr Beznosikov and Sergey Samsonov and Marina Sheshukova and Alexander Gasnikov and Alexey Naumov and Eric Moulines},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KMeFZopsqP}\n}", "github": "", "project": "", "reviewers": "RVr5;9UP4;8XQi;11kd", "pdf_size": 938816, "rating": "6;6;7;7", "confidence": "4;4;4;4", "soundness": "4;3;4;3", "novelty": "3;3;3;3", "presentation": "3;4;4;3", "wc_summary": "327;109;189;37", "wc_strengths": "74;46;129;71", "wc_weaknesses": "55;174;337;8", "wc_questions": "15;130;158;20", "wc_limitations": "33;1;4;1", "wc_review": "504;460;817;137", "wc_reply_reviewers": "99;13;22;57", "wc_reply_authors": "25;13;12;25", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 165.5, 107.63247651150651 ], "wc_strengths_avg": [ 80.0, 30.306764921383476 ], "wc_weaknesses_avg": [ 143.5, 127.04821919255697 ], "wc_questions_avg": [ 80.75, 64.04441817988513 ], "wc_limitations_avg": [ 9.75, 13.47915056670857 ], "wc_review_avg": [ 479.5, 240.93204436106046 ], "wc_reply_reviewers_avg": [ 47.75, 33.84800584968042 ], "wc_reply_authors_avg": [ 18.75, 6.2599920127744575 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4912720156272161750&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": ";hse.ru;hse.ru;;hse.ru;polytechnique.edu", "author_num": 6, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Higher School of Economics;Ecole Polytechnique", "aff_unique_dep": ";", "aff_unique_url": "https://www.hse.ru;https://www.polytechnique.edu", "aff_unique_abbr": "HSE;X", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Russian Federation;France" }, { "title": "One-Pass Distribution Sketch for Measuring Data Heterogeneity in Federated Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72013", "id": "KMxRQO7P98", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/32c2f3e0a44d55820da7fbcee0a1d95c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KMxRQO7P98", "openreview": "https://openreview.net/forum?id=KMxRQO7P98", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72013", "video": "https://nips.cc/virtual/2023/poster/72013", "author_site": "Zichang Liu, Zhaozhuo Xu, Benjamin Coleman, Anshumali Shrivastava", "tldr": "", "abstract": "Federated learning (FL) is a machine learning paradigm where multiple client devices train models collaboratively without data exchange. Data heterogeneity problem is naturally inherited in FL since data in different clients follow diverse distributions. To mitigate the negative influence of data heterogeneity, we need to start by measuring it across clients. However, the efficient measurement between distributions is a challenging problem, especially in high dimensionality. In this paper, we propose a one-pass distribution sketch to represent the client data distribution. Our sketching algorithm only requires a single pass of the client data, which is efficient in terms of time and memory. Moreover, we show in both theory and practice that the distance between two distribution sketches represents the divergence between their corresponding distributions. Furthermore, we demonstrate with extensive experiments that our distribution sketch improves the client selection in the FL training. We also showcase that our distribution sketch is an efficient solution to the cold start problem in FL for new clients with unlabeled data.", "keywords": "Distribution sketch;federated learning", "primary_area": "", "supplementary_material": "/attachment/b3c8bc324d935690244b7a79a00382d9a163648f.pdf", "author": "Zichang Liu;Zhaozhuo Xu;Benjamin Coleman;Anshumali Shrivastava", "authorids": "~Zichang_Liu1;~Zhaozhuo_Xu1;~Benjamin_Coleman1;~Anshumali_Shrivastava1", "gender": "F;M;M;M", "homepage": ";https://ottovonxu.github.io/;https://randorithms.com/research;https://www.cs.rice.edu/~as143/", "dblp": "227/4714;195/4352;217/2220;63/9828", "google_scholar": ";7tDlVAsAAAAJ;fInuVkEAAAAJ;https://scholar.google.com.tw/citations?user=SGT23RAAAAAJ", "orcid": "0009-0004-1098-2869;;;", "linkedin": "zichang-liu/;;;", "or_profile": "~Zichang_Liu1;~Zhaozhuo_Xu1;~Benjamin_Coleman1;~Anshumali_Shrivastava1", "aff": "Rice University;Rice University;Google DeepMind;ThirdAI Corp.", "aff_domain": "rice.edu;rice.edu;google.com;thirdai.com", "position": "PhD student;PhD student;Researcher;CEO", "bibtex": "@inproceedings{\nliu2023onepass,\ntitle={One-Pass Distribution Sketch for Measuring Data Heterogeneity in Federated Learning},\nauthor={Zichang Liu and Zhaozhuo Xu and Benjamin Coleman and Anshumali Shrivastava},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KMxRQO7P98}\n}", "github": "", "project": "", "reviewers": "Jfur;iXQ3;c2Su;QJNz", "pdf_size": 475043, "rating": "3;6;7;7", "confidence": "3;3;4;4", "soundness": "2;2;3;3", "novelty": "1;2;3;3", "presentation": "3;2;3;3", "wc_summary": "123;179;76;105", "wc_strengths": "40;126;54;64", "wc_weaknesses": "216;870;63;126", "wc_questions": "70;396;79;23", "wc_limitations": "7;20;9;23", "wc_review": "456;1591;281;341", "wc_reply_reviewers": "382;321;125;0", "wc_reply_authors": "547;655;308;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 120.75, 37.5790832778023 ], "wc_strengths_avg": [ 71.0, 32.87856444554719 ], "wc_weaknesses_avg": [ 318.75, 322.8756533094436 ], "wc_questions_avg": [ 142.0, 148.18063301254992 ], "wc_limitations_avg": [ 14.75, 6.869315832017043 ], "wc_review_avg": [ 667.25, 537.0215894170364 ], "wc_reply_reviewers_avg": [ 207.0, 152.63846173229078 ], "wc_reply_authors_avg": [ 377.5, 251.53180713380962 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7624928516630233, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=662685786698597459&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "rice.edu;rice.edu;google.com;thirdai.com", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Rice University;Google;ThirdAI Corp.", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://www.rice.edu;https://deepmind.com;", "aff_unique_abbr": "Rice;DeepMind;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Fast Attention Requires Bounded Entries", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72012", "id": "KOVWXcrFIK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c72861451d6fa9dfa64831102b9bb71a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KOVWXcrFIK", "openreview": "https://openreview.net/forum?id=KOVWXcrFIK", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72012", "video": "https://nips.cc/virtual/2023/poster/72012", "author_site": "Josh Alman, Zhao Song", "tldr": "", "abstract": "In modern machine learning, inner product attention computation is a fundamental task for training large language models such as Transformer, GPT-1, BERT, GPT-2, GPT-3 and ChatGPT. Formally, in this problem, one is given as input three matrices $Q, K, V \\in [-B,B]^{n \\times d}$, and the goal is to construct the matrix $\\mathrm{Att}(Q,K,V) := \\mathrm{diag}(A {\\bf 1}_n)^{-1} A V \\in \\mathbb{R}^{n \\times d}$, where $A = \\exp(QK^\\top/d)$ is the `attention matrix', and $\\exp$ is applied entry-wise. Straightforward methods for this problem explicitly compute the $n \\times n$ attention matrix $A$, and hence require time $\\Omega(n^2)$ even when $d = n^{o(1)}$ is small. \n\n\nIn this paper, we investigate whether faster algorithms are possible by \\emph{implicitly} making use of the matrix $A$. We present two results, showing that there is a sharp transition at $B = \\Theta(\\sqrt{\\log n})$.\n\n$\\bullet$ If $d = O(\\log n)$ and $B = o(\\sqrt{\\log n})$, there is an $n^{1+o(1)}$ time algorithm to approximate $\\mathrm{Att}(Q,K,V)$ up to $1/\\mathrm{poly}(n)$ additive error.\n\n$\\bullet$ If $d = O(\\log n)$ and $B = \\Theta (\\sqrt{\\log n})$, assuming the Strong Exponential Time Hypothesis from fine-grained complexity theory, it is impossible to approximate $\\mathrm{Att}(Q,K,V)$ up to $1/\\mathrm{poly}(n)$ additive error in truly subquadratic time $n^{2 - \\Omega(1)}$.\n\n\nThis gives a theoretical explanation for the phenomenon observed in practice that attention computation is much more efficient when the input matrices have smaller entries.", "keywords": "fast attention computation;algorithm;hardness", "primary_area": "", "supplementary_material": "", "author": "Josh Alman;Zhao Song", "authorids": "~Josh_Alman1;~Zhao_Song3", "gender": "M;M", "homepage": "http://joshalman.com;https://www.youtube.com/@zhaosong2031", "dblp": "166/1624;76/4051-2", "google_scholar": "yyDMlesAAAAJ;yDZct7UAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Josh_Alman1;~Zhao_Song3", "aff": "Columbia University;Adobe", "aff_domain": "columbia.edu;adobe.com", "position": "Assistant Professor;Researcher", "bibtex": "@inproceedings{\nalman2023fast,\ntitle={Fast Attention Requires Bounded Entries},\nauthor={Josh Alman and Zhao Song},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KOVWXcrFIK}\n}", "github": "", "project": "", "reviewers": "Ze7L;LkJK;ioBT;ZPux", "pdf_size": 356030, "rating": "3;6;6;7", "confidence": "4;3;2;4", "soundness": "2;2;4;3", "novelty": "2;2;3;4", "presentation": "3;3;4;3", "wc_summary": "124;91;275;113", "wc_strengths": "61;33;144;73", "wc_weaknesses": "155;43;118;55", "wc_questions": "54;27;21;59", "wc_limitations": "5;22;28;9", "wc_review": "399;216;586;309", "wc_reply_reviewers": "0;22;20;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 150.75, 72.71304903523163 ], "wc_strengths_avg": [ 77.75, 40.91072597742553 ], "wc_weaknesses_avg": [ 92.75, 45.86052223863134 ], "wc_questions_avg": [ 40.25, 16.48294573187693 ], "wc_limitations_avg": [ 16.0, 9.354143466934854 ], "wc_review_avg": [ 377.5, 136.66473575871723 ], "wc_reply_reviewers_avg": [ 10.5, 10.523782589924593 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3015113445777637, "gs_citation": 110, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5183513021099098486&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "columbia.edu;adobe.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Columbia University;Adobe", "aff_unique_dep": ";Adobe Inc.", "aff_unique_url": "https://www.columbia.edu;https://www.adobe.com", "aff_unique_abbr": "Columbia;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Fairness Continual Learning Approach to Semantic Scene Understanding in Open-World Environments", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72011", "id": "KQ25VgEvOJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ce3cf998b7f59271e80ce03fb74a7115-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KQ25VgEvOJ", "openreview": "https://openreview.net/forum?id=KQ25VgEvOJ", "poster": "/media/PosterPDFs/NeurIPS%202023/72011.png?t=1699739922.68301", "slides": "https://nips.cc/virtual/2023/poster/72011", "video": "https://nips.cc/virtual/2023/poster/72011", "author_site": "Thanh-Dat Truong, Hoang-Quan Nguyen, Bhiksha Raj, Khoa Luu", "tldr": "", "abstract": "Continual semantic segmentation aims to learn new classes while maintaining the information from the previous classes. Although prior studies have shown impressive progress in recent years, the fairness concern in the continual semantic segmentation needs to be better addressed. Meanwhile, fairness is one of the most vital factors in deploying the deep learning model, especially in human-related or safety applications. In this paper, we present a novel Fairness Continual Learning approach to the semantic segmentation problem.\nIn particular, under the fairness objective, a new fairness continual learning framework is proposed based on class distributions.\nThen, a novel Prototypical Contrastive Clustering loss is proposed to address the significant challenges in continual learning, i.e., catastrophic forgetting and background shift. Our proposed loss has also been proven as a novel, generalized learning paradigm of knowledge distillation commonly used in continual learning. Moreover, the proposed Conditional Structural Consistency loss further regularized the structural constraint of the predicted segmentation. Our proposed approach has achieved State-of-the-Art performance on three standard scene understanding benchmarks, i.e., ADE20K, Cityscapes, and Pascal VOC, and promoted the fairness of the segmentation model.", "keywords": "Fairness Continual Learning; Semantic Segmentation; Contrastive Clustering;", "primary_area": "", "supplementary_material": "/attachment/8d52bf595e6e7ecd81dd7c81d123a284b7f32d3c.pdf", "author": "Thanh-Dat Truong;Hoang-Quan Nguyen;Bhiksha Raj;Khoa Luu", "authorids": "~Thanh-Dat_Truong1;~Hoang-Quan_Nguyen1;~Bhiksha_Raj1;~Khoa_Luu2", "gender": "M;M;M;M", "homepage": "https://truongthanhdat.github.io/;https://nhquanqt.github.io/;https://www.cs.cmu.edu/directory/bhikshar/;https://uark-cviu.github.io", "dblp": "213/5771;302/3196;60/3996;43/8092", "google_scholar": "qrmxykkAAAAJ;https://scholar.google.com/citations?hl=en;;JPAl8-gAAAAJ", "orcid": ";;;0000-0003-2104-0901", "linkedin": "%08truongthanhdat/;hoangquan-nguyen-hn016/;;khoa-luu-90900215/", "or_profile": "~Thanh-Dat_Truong1;~Hoang-Quan_Nguyen1;~Bhiksha_Raj1;~Khoa_Luu2", "aff": "University of Arkansas, Fayetteville;University of Arkansas - Fayetteville;Mohamed bin Zayed University of Artificial Intelligence;University of Arkansas, Fayetteville", "aff_domain": "uark.edu;uark.edu;mbzuai.ac.ae;uark.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ntruong2023fairness,\ntitle={Fairness Continual Learning Approach to Semantic Scene Understanding in Open-World Environments},\nauthor={Thanh-Dat Truong and Hoang-Quan Nguyen and Bhiksha Raj and Khoa Luu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KQ25VgEvOJ}\n}", "github": "", "project": "", "reviewers": "ReY5;9fXu;2Nqo;jVAT", "pdf_size": 5083164, "rating": "5;6;6;7", "confidence": "4;5;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;2;4", "wc_summary": "98;74;53;123", "wc_strengths": "160;24;50;56", "wc_weaknesses": "274;152;128;86", "wc_questions": "138;250;42;31", "wc_limitations": "1;14;26;19", "wc_review": "671;514;299;315", "wc_reply_reviewers": "17;10;10;32", "wc_reply_authors": "30;18;20;32", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 87.0, 26.182054923172092 ], "wc_strengths_avg": [ 72.5, 51.9302416709185 ], "wc_weaknesses_avg": [ 160.0, 69.92853494818836 ], "wc_questions_avg": [ 115.25, 88.23087611488396 ], "wc_limitations_avg": [ 15.0, 9.137833441248533 ], "wc_review_avg": [ 449.75, 153.2667201319321 ], "wc_reply_reviewers_avg": [ 17.25, 8.98262211161084 ], "wc_reply_authors_avg": [ 25.0, 6.082762530298219 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16005177263220922299&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uark.edu;uark.edu;mbzuai.ac.ae;uark.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Arkansas;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.uark.edu;https://mbzuai.ac.ae", "aff_unique_abbr": "UARK;MBZUAI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Fayetteville;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;United Arab Emirates" }, { "title": "SOC: Semantic-Assisted Object Cluster for Referring Video Object Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72010", "id": "KQyXyIAfK8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/542c14ff4622e45384df40dc97b9cf90-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KQyXyIAfK8", "openreview": "https://openreview.net/forum?id=KQyXyIAfK8", "poster": "/media/PosterPDFs/NeurIPS%202023/72010.png?t=1698584540.429101", "slides": "https://nips.cc/virtual/2023/poster/72010", "video": "https://nips.cc/virtual/2023/poster/72010", "author_site": "Zhuoyan Luo, Yicheng Xiao, Yong Liu, Shuyan Li, Yitong Wang, Yansong Tang, Xiu Li, Yujiu Yang", "tldr": "", "abstract": "This paper studies referring video object segmentation (RVOS) by boosting video-level visual-linguistic alignment. Recent approaches model the RVOS task as a sequence prediction problem and perform multi-modal interaction as well as segmentation for each frame separately. However, the lack of a global view of video content leads to difficulties in effectively utilizing inter-frame relationships and understanding textual descriptions of object temporal variations. To address this issue, we propose Semantic-assisted Object Cluster (SOC), which aggregates video content and textual guidance for unified temporal modeling and cross-modal alignment. By associating a group of frame-level object embeddings with language tokens, SOC facilitates joint space learning across modalities and time steps. Moreover, we present multi-modal contrastive supervision to help construct well-aligned joint space at the video level. We conduct extensive experiments on popular RVOS benchmarks, and our method outperforms state-of-the-art competitors on all benchmarks by a remarkable margin. Besides, the emphasis on temporal coherence enhances the segmentation stability and adaptability of our method in processing text expressions with temporal variations. Code is available at https://github.com/RobertLuo1/NeurIPS2023_SOC.", "keywords": "Referring Video Object Segmentation;Video-Level Multi-Modal Understanding;Object Cluster;Visual-Linguistic Contrastive Learning", "primary_area": "", "supplementary_material": "/attachment/e9d1949e34a120f73bbee5cab90968dda2cdca60.zip", "author": "Zhuoyan Luo;Yicheng Xiao;Yong Liu;Shuyan Li;Yitong Wang;Yansong Tang;Xiu Li;Yujiu Yang", "authorids": "~Zhuoyan_Luo1;~Yicheng_Xiao1;~Yong_Liu16;~Shuyan_Li3;~Yitong_Wang1;~Yansong_Tang1;~Xiu_Li1;~Yujiu_Yang2", "gender": "M;M;M;F;M;M;F;M", "homepage": "https://robertluo1.github.io/;;https://yongliu20.github.io/;http://Lily1994.github.io;;https://andytang15.github.io/;https://thusigsiclab.github.io/thu.github.io/introduction.html;https://sites.google.com/view/iigroup-thu", "dblp": ";322/9380;;12/3189;;214/9568;13/1206-1;30/3847", "google_scholar": ";oakZP0cAAAAJ;i9keb3IAAAAJ;https://scholar.google.com.sg/citations?hl=zh-CN;NfFTKfYAAAAJ;TIbistUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;4gH3sxsAAAAJ", "orcid": ";;;;;;0000-0003-0403-1923;0000-0002-6427-1024", "linkedin": ";;;;;;;", "or_profile": "~Zhuoyan_Luo1;~Yicheng_Xiao1;~Yong_Liu16;~Shuyan_Li3;~Yitong_Wang1;~Yansong_Tang1;~Xiu_Li1;~Yujiu_Yang2", "aff": "Southeast University;Xi'an University of Electronic Science and Technology;Tsinghua University;University of Cambridge;ByteDance Inc;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "seu.edu.cn;xidian.edu.cn;tsinghua.edu.cn;cam.ac.uk;bytedance.com;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "Undergrad student;Undergrad student;PhD student;Postdoc;Researcher;Assistant Professor;Professor;Associate Professor", "bibtex": "@inproceedings{\nluo2023soc,\ntitle={{SOC}: Semantic-Assisted Object Cluster for Referring Video Object Segmentation},\nauthor={Zhuoyan Luo and Yicheng Xiao and Yong Liu and Shuyan Li and Yitong Wang and Yansong Tang and Xiu Li and Yujiu Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KQyXyIAfK8}\n}", "github": "", "project": "", "reviewers": "iFku;d9RC;ihRG;NrNc;8Kjg;9DMq", "pdf_size": 2313866, "rating": "4;5;6;6;6;7", "confidence": "2;4;4;4;4;2", "soundness": "3;3;3;3;2;3", "novelty": "3;2;3;3;2;3", "presentation": "3;3;3;2;3;3", "wc_summary": "44;30;63;53;92;36", "wc_strengths": "28;49;34;129;161;26", "wc_weaknesses": "120;48;123;96;250;1", "wc_questions": "22;14;93;88;114;16", "wc_limitations": "22;3;10;46;42;14", "wc_review": "236;144;323;412;659;93", "wc_reply_reviewers": "91;12;41;16;27;0", "wc_reply_authors": "1176;63;0;0;47;0", "reply_reviewers": "1;1;1;1;2;0", "reply_authors": "4;2;1;1;2;1", "rating_avg": [ 5.666666666666667, 0.9428090415820632 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 53.0, 20.493901531919196 ], "wc_strengths_avg": [ 71.16666666666667, 53.526992152454156 ], "wc_weaknesses_avg": [ 106.33333333333333, 77.14632561279608 ], "wc_questions_avg": [ 57.833333333333336, 41.345764259742765 ], "wc_limitations_avg": [ 22.833333333333332, 16.025153838748494 ], "wc_review_avg": [ 311.1666666666667, 188.1873327889585 ], "wc_reply_reviewers_avg": [ 31.166666666666668, 29.616530669355733 ], "wc_reply_authors_avg": [ 214.33333333333334, 430.79796760069434 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 1.8333333333333333, 1.0671873729054748 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.12499999999999992, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4438966549936530075&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "seu.edu.cn;xidian.edu.cn;tsinghua.edu.cn;cam.ac.uk;bytedance.com;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;3;4;2;2;2", "aff_unique_norm": "Southeast University;Xi'an University of Electronic Science and Technology;Tsinghua University;University of Cambridge;ByteDance", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.seu.edu.cn/;http://www.xidian.edu.cn/;https://www.tsinghua.edu.cn;https://www.cam.ac.uk;https://www.bytedance.com", "aff_unique_abbr": "SEU;Xidian University;THU;Cambridge;ByteDance", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Xi'an;Cambridge", "aff_country_unique_index": "0;0;0;1;0;0;0;0", "aff_country_unique": "China;United Kingdom" }, { "title": "PUG: Photorealistic and Semantically Controllable Synthetic Data for Representation Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73627", "id": "KRBoWULo2w", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8d352fd0f07fde4a74f9476603b3773b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=KRBoWULo2w", "openreview": "https://openreview.net/forum?id=KRBoWULo2w", "poster": "/media/PosterPDFs/NeurIPS%202023/73627.png?t=1701374634.9143786", "slides": "https://nips.cc/virtual/2023/poster/73627", "video": "https://nips.cc/virtual/2023/poster/73627", "author_site": "Florian Bordes, Shashank Shekhar, Mark Ibrahim, Diane Bouchacourt, Pascal Vincent, Ari Morcos", "tldr": "", "abstract": "Synthetic image datasets offer unmatched advantages for designing and evaluating deep neural networks: they make it possible to (i) render as many data samples as needed, (ii) precisely control each scene and yield granular ground truth labels (and captions), (iii) precisely control distribution shifts between training and testing to isolate variables of interest for sound experimentation.\nDespite such promise, the use of synthetic image data is still limited -- and often played down -- mainly due to their lack of realism. Most works therefore rely on datasets of real images, which have often been scraped from public images on the internet, and may have issues with regards to privacy, bias, and copyright, while offering little control over how objects precisely appear.\nIn this work, we present a path to democratize the use of photorealistic synthetic data: we develop a new generation of interactive environments for representation learning research, that offer both controllability and realism. \nWe use the Unreal Engine, a powerful game engine well known in the entertainment industry, to produce PUG (Photorealistic Unreal Graphics) environments and datasets for representation learning. Using PUG for evaluation and fine-tuning, we demonstrate the potential of PUG to both enable more rigorous evaluations and to improve model training.", "keywords": "Synthetic images;Unreal Engine;Representation learning", "primary_area": "", "supplementary_material": "", "author": "Florian Bordes;Shashank Shekhar;Mark Ibrahim;Diane Bouchacourt;Pascal Vincent;Ari S. Morcos", "authorids": "~Florian_Bordes1;~Shashank_Shekhar2;~Mark_Ibrahim1;~Diane_Bouchacourt3;~Pascal_Vincent1;~Ari_S._Morcos1", "gender": "M;M;;M;M;F", "homepage": ";http://shashankshekhar.com;https://markibrahim.me/;http://www.iro.umontreal.ca/~vincentp;http://www.arimorcos.com;https://dianebouchacourt.github.io/", "dblp": "194/9862;18/6368-6;180/5660;43/861;217/3720;176/1498", "google_scholar": "OADfWhUAAAAJ;https://scholar.google.fr/citations?hl=en;AqYyoCMAAAAJ;WBCKQMsAAAAJ;v-A_7UsAAAAJ;", "orcid": ";;;;;", "linkedin": "florianbordes;;;;;", "or_profile": "~Florian_Bordes1;~Shashank_Shekhar2;~Mark_Ibrahim1;~Pascal_Vincent1;~Ari_Morcos1;~Diane_Nicole_Bouchacourt1", "aff": "University of Montreal;Meta Facebook;Facebook AI Research (FAIR) Meta;Facebook A.I. Research;Meta AI (FAIR);Meta AI Research", "aff_domain": "umontreal.ca;meta.com;ai.facebook.com;fb.com;meta.com;meta.com", "position": "PhD student;Researcher;Researcher;Research Scientist;Research Scientist;Researcher", "bibtex": "@inproceedings{\nbordes2023pug,\ntitle={{PUG}: Photorealistic and Semantically Controllable Synthetic Data for Representation Learning},\nauthor={Florian Bordes and Shashank Shekhar and Mark Ibrahim and Diane Bouchacourt and Pascal Vincent and Ari S. Morcos},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=KRBoWULo2w}\n}", "github": "", "project": "", "reviewers": "b1EE;hTqw;bwEW;PGeG;7VBW", "pdf_size": 44148298, "rating": "5;6;7;7;8", "confidence": "4;4;3;3;4", "wc_summary_and_contributions": "108;25;114;94;48", "wc_strengths": "25;28;152;76;28", "wc_improvement": "213;71;363;128;53", "wc_limitations": "7;6;10;81;9", "wc_correctness": "14;1;55;9;4", "wc_clarity": "9;1;43;189;1", "wc_relation_to_prior_work": "12;1;115;190;1", "wc_documentation": "9;11;174;71;9", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "398;145;1027;839;154", "wc_reply_reviewers": "0;0;29;106;7", "wc_reply_authors": "331;270;1412;838;158", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 77.8, 35.10213668710212 ], "wc_strengths_avg": [ 61.8, 48.942415142696014 ], "wc_improvement_avg": [ 165.6, 113.36242763808474 ], "wc_limitations_avg": [ 22.6, 29.234226516191598 ], "wc_correctness_avg": [ 16.6, 19.703806738800502 ], "wc_clarity_avg": [ 48.6, 71.8932542037151 ], "wc_relation_to_prior_work_avg": [ 63.8, 76.31356366990077 ], "wc_documentation_avg": [ 54.8, 64.16354104941529 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 512.6, 360.000333333179 ], "wc_reply_reviewers_avg": [ 28.4, 40.232325311868316 ], "wc_reply_authors_avg": [ 601.8, 467.4605437895267 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.32025630761017426, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15426478654422257644&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "umontreal.ca;meta.com;ai.facebook.com;fb.com;meta.com;meta.com", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "University of Montreal;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://wwwumontreal.ca;https://meta.com", "aff_unique_abbr": "UM;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "Canada;United States" }, { "title": "DAW: Exploring the Better Weighting Function for Semi-supervised Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72009", "id": "KRlG7NJUCD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c28ef8449dc21c90696c80ce47b3b5cc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KRlG7NJUCD", "openreview": "https://openreview.net/forum?id=KRlG7NJUCD", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72009", "video": "https://nips.cc/virtual/2023/poster/72009", "author_site": "Rui Sun, Huayu Mai, Tianzhu Zhang, Feng Wu", "tldr": "", "abstract": "The critical challenge of semi-supervised semantic segmentation lies in how to fully exploit a large volume of unlabeled data to improve the model\u2019s generalization performance for robust segmentation. Existing methods tend to employ certain criteria (weighting function) to select pixel-level pseudo labels. However, the trade-off exists between inaccurate yet utilized pseudo-labels, and correct yet discarded pseudo-labels in these methods when handling pseudo-labels without thoughtful consideration of the weighting function, hindering the generalization ability of the model. In this paper, we systematically analyze the trade-off in previous methods when dealing with pseudo-labels. We formally define the trade-off between inaccurate yet utilized pseudo-labels, and correct yet discarded pseudo-labels by explicitly modeling the confidence distribution of correct and inaccurate pseudo-labels, equipped with a unified weighting function. To this end, we propose Distribution-Aware Weighting (DAW) to strive to minimize the negative equivalence impact raised by the trade-off. We find an interesting fact that the optimal solution for the weighting function is a hard step function, with the jump point located at the intersection of the two confidence distributions. Besides, we devise distribution alignment to mitigate the issue of the discrepancy between the prediction distributions of labeled and unlabeled data. Extensive experimental results on multiple benchmarks including mitochondria segmentation demonstrate that DAW performs favorably against state-of-the-art methods.", "keywords": "semi-supervised semantic segmentation", "primary_area": "", "supplementary_material": "/attachment/9562d02549aeb0494f4ea88df3c8790e935b8a59.pdf", "author": "Rui Sun;Huayu Mai;Tianzhu Zhang;Feng Wu", "authorids": "~Rui_Sun5;~Huayu_Mai1;~Tianzhu_Zhang1;~Feng_Wu1", "gender": "M;M;M;M", "homepage": ";https://;https://scholar.google.com/citations?user=9sCGe-gAAAAJ&hl=zh-CN;", "dblp": "01/3595-6;354/1017.html;;25/3972-1", "google_scholar": ";https://scholar.google.com.hk/citations?user=HSbNHT4AAAAJ;9sCGe-gAAAAJ;5bInRDEAAAAJ", "orcid": "0000-0002-8009-4240;;;", "linkedin": ";;;", "or_profile": "~Rui_Sun5;~Huayu_Mai1;~Tianzhu_Zhang1;~Feng_Wu1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsun2023daw,\ntitle={{DAW}: Exploring the Better Weighting Function for Semi-supervised Semantic Segmentation},\nauthor={Rui Sun and Huayu Mai and Tianzhu Zhang and Feng Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KRlG7NJUCD}\n}", "github": "", "project": "", "reviewers": "SG4E;mWrq;ahfa;UScw;Sjij", "pdf_size": 3958267, "rating": "4;5;5;7;7", "confidence": "4;4;1;4;5", "soundness": "3;2;3;4;3", "novelty": "2;2;2;4;3", "presentation": "3;2;2;3;3", "wc_summary": "76;117;64;85;73", "wc_strengths": "71;88;25;52;123", "wc_weaknesses": "139;93;143;105;32", "wc_questions": "28;5;5;12;137", "wc_limitations": "13;7;1;9;10", "wc_review": "327;310;238;263;375", "wc_reply_reviewers": "0;0;0;123;13", "wc_reply_authors": "0;118;100;26;0", "reply_reviewers": "0;0;0;1;1", "reply_authors": "1;2;2;2;1", "rating_avg": [ 5.6, 1.2 ], "confidence_avg": [ 3.6, 1.3564659966250536 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 83.0, 18.275666882497067 ], "wc_strengths_avg": [ 71.8, 33.065994616826515 ], "wc_weaknesses_avg": [ 102.4, 40.09788024322482 ], "wc_questions_avg": [ 37.4, 50.503861238523136 ], "wc_limitations_avg": [ 8.0, 4.0 ], "wc_review_avg": [ 302.6, 48.23525681490666 ], "wc_reply_reviewers_avg": [ 27.2, 48.163886886338396 ], "wc_reply_authors_avg": [ 48.8, 50.38412448380938 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3931785497463924, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6850553582898069960&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Aligning Optimization Trajectories with Diffusion Models for Constrained Design Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72008", "id": "KTR33hMnMX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a2fe4bb50fc6f3564cee1551d6309fea-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KTR33hMnMX", "openreview": "https://openreview.net/forum?id=KTR33hMnMX", "poster": "/media/PosterPDFs/NeurIPS%202023/72008.png?t=1699450255.8640113", "slides": "https://nips.cc/virtual/2023/poster/72008", "video": "https://nips.cc/virtual/2023/poster/72008", "author_site": "Giorgio Giannone, Akash Srivastava, Ole Winther, Faez Ahmed", "tldr": "", "abstract": "Generative models have significantly influenced both vision and language domains, ushering in innovative multimodal applications. Although these achievements have motivated exploration in scientific and engineering fields, challenges emerge, particularly in constrained settings with limited data where precision is crucial. Traditional engineering optimization methods rooted in physics often surpass generative models in these contexts. To address these challenges, we introduce Diffusion Optimization Models (DOM) and Trajectory Alignment (TA), a learning framework that demonstrates the efficacy of aligning the sampling trajectory of diffusion models with the trajectory derived from physics-based iterative optimization methods. This alignment ensures that the sampling process remains grounded in the underlying physical principles. This alignment eliminates the need for costly preprocessing, external surrogate models, or extra labeled data, generating feasible and high-performance designs efficiently. We apply our framework to structural topology optimization, a fundamental problem in mechanical design, evaluating its performance on in- and out-of-distribution configurations. Our results demonstrate that TA outperforms state-of-the-art deep generative models on in-distribution configurations and halves the inference computational cost. When coupled with a few steps of optimization, it also improves manufacturability for out-of-distribution conditions. \nDOM's efficiency and performance improvements significantly expedite design processes and steer them toward optimal and manufacturable outcomes, highlighting the potential of generative models in data-driven design.", "keywords": "diffusion models;engineering design;generative optimization;trajectory matching", "primary_area": "", "supplementary_material": "", "author": "Giorgio Giannone;Akash Srivastava;Ole Winther;Faez Ahmed", "authorids": "~Giorgio_Giannone1;~Akash_Srivastava1;~Ole_Winther1;~Faez_Ahmed1", "gender": ";M;M;", "homepage": ";http://akashgit.github.io;https://olewinther.github.io/;https://decode.mit.edu", "dblp": ";24/9528;36/1568;45/10603", "google_scholar": ";https://scholar.google.co.uk/citations?user=2h6SZeEAAAAJ;7VAwhzUAAAAJ;5iElzo8AAAAJ", "orcid": ";;0000-0002-1966-3205;", "linkedin": ";https://uk.linkedin.com/in/akash-srivastava-aa97361b;owinther/;", "or_profile": "~Giorgio_Giannone1;~Akash_Srivastava1;~Ole_Winther1;~Faez_Ahmed1", "aff": ";MIT-IBM Watson AI Research Lab;Technical University of Denmark;Massachusetts Institute of Technology", "aff_domain": ";ibm.com;dtu.dk;mit.edu", "position": ";Research Scientist;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ngiannone2023aligning,\ntitle={Aligning Optimization Trajectories with Diffusion Models for Constrained Design Generation},\nauthor={Giorgio Giannone and Akash Srivastava and Ole Winther and Faez Ahmed},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KTR33hMnMX}\n}", "github": "", "project": "", "reviewers": "hnNz;dj1b;q5Ex;Db6Y", "pdf_size": 5594808, "rating": "5;6;6;6", "confidence": "3;3;2;3", "soundness": "3;2;2;3", "novelty": "2;3;2;3", "presentation": "2;2;2;3", "wc_summary": "31;124;40;128", "wc_strengths": "36;45;34;107", "wc_weaknesses": "416;497;23;191", "wc_questions": "54;123;254;46", "wc_limitations": "62;2;16;9", "wc_review": "599;791;367;481", "wc_reply_reviewers": "229;615;27;0", "wc_reply_authors": "671;3495;19;0", "reply_reviewers": "1;3;1;0", "reply_authors": "2;8;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 80.75, 45.38378014224906 ], "wc_strengths_avg": [ 55.5, 30.02082610455615 ], "wc_weaknesses_avg": [ 281.75, 186.77710646650462 ], "wc_questions_avg": [ 119.25, 83.35878777909382 ], "wc_limitations_avg": [ 22.25, 23.47738273317535 ], "wc_review_avg": [ 559.5, 156.82075755460437 ], "wc_reply_reviewers_avg": [ 217.75, 245.83264124196364 ], "wc_reply_authors_avg": [ 1046.25, 1439.363639772799 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 3.25, 2.7726341266023544 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10823370957662254268&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": ";ibm.com;dtu.dk;mit.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Technical University of Denmark", "aff_unique_dep": "MIT-IBM Watson AI Research Lab;", "aff_unique_url": "https://www.mitibmwatsonailab.org;https://www.tek.dk", "aff_unique_abbr": "MIT-IBM AI Lab;DTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Denmark" }, { "title": "Conformal Prediction for Time Series with Modern Hopfield Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72007", "id": "KTRwpWCMsC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aef75887979ae1287b5deb54a1e3cbda-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KTRwpWCMsC", "openreview": "https://openreview.net/forum?id=KTRwpWCMsC", "poster": "/media/PosterPDFs/NeurIPS%202023/72007.png?t=1700757273.3490593", "slides": "https://nips.cc/virtual/2023/poster/72007", "video": "https://nips.cc/virtual/2023/poster/72007", "author_site": "Andreas Auer, Martin Gauch, Daniel Klotz, Sepp Hochreiter", "tldr": "", "abstract": "To quantify uncertainty, conformal prediction methods are gaining continuously more interest and have already been successfully applied to various domains. However, they are difficult to apply to time series as the autocorrelative structure of time series violates basic assumptions required by conformal prediction. We propose HopCPT, a novel conformal prediction approach for time series that not only copes with temporal structures but leverages them. We show that our approach is theoretically well justified for time series where temporal dependencies are present. In experiments, we demonstrate that our new approach outperforms state-of-the-art conformal prediction methods on multiple real-world time series datasets from four different domains.", "keywords": "time series;uncertainty;prediction interval;conformal prediction;modern hopfield networks", "primary_area": "", "supplementary_material": "/attachment/e7f7306b2eca7785c638ee55d3651f7c74fade9f.zip", "author": "Andreas Auer;Martin Gauch;Daniel Klotz;Sepp Hochreiter", "authorids": "~Andreas_Auer2;~Martin_Gauch1;~Daniel_Klotz1;~Sepp_Hochreiter1", "gender": "M;;;M", "homepage": "https://apointa.github.io/;https://gauchm.github.io/;;https://www.jku.at/en/institute-for-machine-learning/about-us/team/sepp-hochreiter/", "dblp": ";235/0335;;h/SeppHochreiter.html", "google_scholar": "Rg_Ooc8AAAAJ;;;https://scholar.google.at/citations?user=tvUH3WMAAAAJ", "orcid": ";;;0000-0001-7449-2528", "linkedin": "andreas-auer-cs/;;;https://linkedin.com/in/sepp-hochreiter-41514846", "or_profile": "~Andreas_Auer2;~Martin_Gauch1;~Daniel_Klotz1;~Sepp_Hochreiter1", "aff": "Johannes Kepler Universit\u00e4t Linz;Google Research;;Johannes Kepler University Linz", "aff_domain": "jku.at;research.google.com;;jku.at", "position": "PhD student;Intern;;Full Professor", "bibtex": "@inproceedings{\nauer2023conformal,\ntitle={Conformal Prediction for Time Series with Modern Hopfield Networks},\nauthor={Andreas Auer and Martin Gauch and Daniel Klotz and Sepp Hochreiter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KTRwpWCMsC}\n}", "github": "", "project": "", "reviewers": "YCj6;Fgat;yfWn;tZC3", "pdf_size": 1188372, "rating": "5;6;6;7", "confidence": "4;3;3;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "110;50;75;87", "wc_strengths": "34;29;40;86", "wc_weaknesses": "94;177;51;61", "wc_questions": "57;115;122;100", "wc_limitations": "29;32;46;25", "wc_review": "324;403;334;359", "wc_reply_reviewers": "34;171;22;26", "wc_reply_authors": "0;160;0;0", "reply_reviewers": "1;3;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 80.5, 21.639085008382402 ], "wc_strengths_avg": [ 47.25, 22.708753818736948 ], "wc_weaknesses_avg": [ 95.75, 49.53471005264894 ], "wc_questions_avg": [ 98.5, 25.243811122728676 ], "wc_limitations_avg": [ 33.0, 7.905694150420948 ], "wc_review_avg": [ 355.0, 30.504098085339287 ], "wc_reply_reviewers_avg": [ 63.25, 62.35934172199062 ], "wc_reply_authors_avg": [ 40.0, 69.2820323027551 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3781050686778834037&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "jku.at;research.google.com;;jku.at", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Johannes Kepler University Linz;Google;Johannes Kepler University", "aff_unique_dep": ";Google Research;", "aff_unique_url": "https://www.jku.at;https://research.google;https://www.jku.at", "aff_unique_abbr": "JKU;Google Research;JKU", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Linz;Mountain View", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Austria;United States" }, { "title": "On the Constrained Time-Series Generation Problem", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72006", "id": "KTZttLZekH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bfb6a69c0d9e2bc596e1cd31f16fcdde-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KTZttLZekH", "openreview": "https://openreview.net/forum?id=KTZttLZekH", "poster": "/media/PosterPDFs/NeurIPS%202023/72006.png?t=1700599688.5482626", "slides": "https://nips.cc/virtual/2023/poster/72006", "video": "https://nips.cc/virtual/2023/poster/72006", "author_site": "Andrea Coletta, Sriram Gopalakrishnan, Daniel Borrajo, Svitlana Vyetrenko", "tldr": "", "abstract": "Synthetic time series are often used in practical applications to augment the historical time series dataset, \namplify the occurrence of rare events and also create counterfactual scenarios.\nDistributional-similarity (which we refer to as realism) as well as the satisfaction of certain numerical constraints are common requirements for counterfactual time series generation. For instance, the US Federal Reserve publishes synthetic market stress scenarios given by the constrained time series for financial institutions to assess their performance in hypothetical recessions.\nExisting approaches for generating constrained time series usually penalize training loss to enforce constraints, and reject non-conforming samples. However, these approaches would require re-training if we change constraints, and rejection sampling can be computationally expensive, or impractical for complex constraints.\nIn this paper, we propose a novel set of methods to tackle the constrained time series generation problem and provide efficient sampling while ensuring the realism of generated time series. \nIn particular, we frame the problem using a constrained optimization framework and then we propose a set of generative methods including 'GuidedDiffTime', a guided diffusion model. \nWe empirically evaluate our work on several datasets for financial and energy data, where incorporating constraints is critical. We show that our approaches outperform existing work both qualitatively and quantitatively, and that 'GuidedDiffTime' does not require re-training for new constraints, resulting in a significant carbon footprint reduction, up to 92% w.r.t. existing deep learning methods.", "keywords": "time-series;generative models;constrained optimization;machine learning", "primary_area": "", "supplementary_material": "/attachment/868d2bfc30a9390746df4289cfed15e5cbb799ec.zip", "author": "Andrea Coletta;Sriram Gopalakrishnan;Daniel Borrajo;Svitlana Vyetrenko", "authorids": "~Andrea_Coletta1;~Sriram_Gopalakrishnan1;~Daniel_Borrajo1;~Svitlana_Vyetrenko1", "gender": "Not Specified;M;M;", "homepage": ";;http://www.plg.inf.uc3m.es/~dborrajo/index.php;", "dblp": "252/1425;207/9272;05/2730;26/8396.html", "google_scholar": "https://scholar.google.it/citations?user=O50E4VMAAAAJ;So86Wl4AAAAJ;https://scholar.google.es/citations?user=gWi0D8IAAAAJ;", "orcid": "0000-0003-1401-1715;;0000-0001-5282-0463;", "linkedin": ";sriram-gopalakrishnan-7a07b727;https://linkedin.com/in/daniel-borrajo-2a63864;", "or_profile": "~Andrea_Coletta1;~Sriram_Gopalakrishnan1;~Daniel_Borrajo1;~Svitlana_Vyetrenko1", "aff": "J.P. Morgan Chase;J.P. Morgan Chase;J.P. Morgan Chase;J.P. Morgan Chase", "aff_domain": "jpmorgan.com;jpmchase.com;jpmorgan.com;jpmorgan.com", "position": "Researcher;Researcher;Principal Researcher;AI Research Director", "bibtex": "@inproceedings{\ncoletta2023on,\ntitle={On the Constrained Time-Series Generation Problem},\nauthor={Andrea Coletta and Sriram Gopalakrishnan and Daniel Borrajo and Svitlana Vyetrenko},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KTZttLZekH}\n}", "github": "", "project": "", "reviewers": "DGpF;U4ao;DrNe;5edF;5wsv", "pdf_size": 6022949, "rating": "5;6;7;8;8", "confidence": "2;4;3;3;3", "soundness": "3;3;3;3;4", "novelty": "3;3;2;3;4", "presentation": "2;3;4;4;4", "wc_summary": "56;67;136;57;88", "wc_strengths": "39;35;36;57;63", "wc_weaknesses": "155;263;157;3;10", "wc_questions": "46;5;75;52;1", "wc_limitations": "7;5;4;2;22", "wc_review": "303;375;408;171;184", "wc_reply_reviewers": "18;0;29;29;15", "wc_reply_authors": "0;169;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.8, 1.16619037896906 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 80.8, 29.902508255997525 ], "wc_strengths_avg": [ 46.0, 11.661903789690601 ], "wc_weaknesses_avg": [ 117.6, 98.79595133405012 ], "wc_questions_avg": [ 35.8, 28.505438077672128 ], "wc_limitations_avg": [ 8.0, 7.183313998427188 ], "wc_review_avg": [ 288.2, 96.6424337441892 ], "wc_reply_reviewers_avg": [ 18.2, 10.721940122944167 ], "wc_reply_authors_avg": [ 33.8, 67.6 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2711630722733202, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6874488516805872883&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "jpmorgan.com;jpmchase.com;jpmorgan.com;jpmorgan.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "JPMorgan Chase & Co.", "aff_unique_dep": "", "aff_unique_url": "https://www.jpmorganchase.com", "aff_unique_abbr": "JPM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Reinforcement Learning with Fast and Forgetful Memory", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72005", "id": "KTfAtro6vP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e3bf2f0f10774c474de22a12cb060e2c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KTfAtro6vP", "openreview": "https://openreview.net/forum?id=KTfAtro6vP", "poster": "/media/PosterPDFs/NeurIPS%202023/72005.png?t=1701379780.5918562", "slides": "https://nips.cc/virtual/2023/poster/72005", "video": "https://nips.cc/virtual/2023/poster/72005", "author_site": "Steven Morad, Ryan Kortvelesy, Stephan Liwicki, Amanda Prorok", "tldr": "", "abstract": "Nearly all real world tasks are inherently partially observable, necessitating the use of memory in Reinforcement Learning (RL). Most model-free approaches summarize the trajectory into a latent Markov state using memory models borrowed from Supervised Learning (SL), even though RL tends to exhibit different training and efficiency characteristics. Addressing this discrepancy, we introduce Fast and Forgetful Memory, an algorithm-agnostic memory model designed specifically for RL. Our approach constrains the model search space via strong structural priors inspired by computational psychology. It is a drop-in replacement for recurrent neural networks (RNNs) in recurrent RL algorithms, achieving greater reward than RNNs across various recurrent benchmarks and algorithms _without changing any hyperparameters_. Moreover, Fast and Forgetful Memory exhibits training speeds two orders of magnitude faster than RNNs, attributed to its logarithmic time and linear space complexity. Our implementation is available at https://github.com/proroklab/ffm.", "keywords": "reinforcement learning;partially observable;POMDP;memory;rnn;transformer", "primary_area": "", "supplementary_material": "/attachment/b31ff9f4908111c3ef7d3c5dd62b9b204c0aa0e9.zip", "author": "Steven Morad;Ryan Kortvelesy;Stephan Liwicki;Amanda Prorok", "authorids": "~Steven_Morad1;~Ryan_Kortvelesy1;~Stephan_Liwicki3;~Amanda_Prorok1", "gender": "M;M;;", "homepage": "http://www.dangersteve.com/home;;;", "dblp": "247/9311;289/0863;;", "google_scholar": "KvCgriAAAAAJ;fMxXjiIAAAAJ;;", "orcid": "0000-0002-8413-2953;0000-0001-6654-0796;;", "linkedin": ";;;", "or_profile": "~Steven_Morad1;~Ryan_Kortvelesy1;~Stephan_Liwicki3;~Amanda_Prorok1", "aff": "University of Cambridge;University of Cambridge;;", "aff_domain": "cam.ac.uk;cam.ac.uk;;", "position": "PhD student;PhD student;;", "bibtex": "@inproceedings{\nmorad2023reinforcement,\ntitle={Reinforcement Learning with Fast and Forgetful Memory},\nauthor={Steven Morad and Ryan Kortvelesy and Stephan Liwicki and Amanda Prorok},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KTfAtro6vP}\n}", "github": "", "project": "", "reviewers": "AuBN;ARGZ;Zkgc;qKYK", "pdf_size": 2562953, "rating": "4;5;7;8", "confidence": "4;2;3;4", "soundness": "2;2;3;4", "novelty": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "61;117;158;33", "wc_strengths": "17;51;110;74", "wc_weaknesses": "391;448;15;96", "wc_questions": "124;2;74;20", "wc_limitations": "27;1;52;18", "wc_review": "620;619;409;241", "wc_reply_reviewers": "0;190;45;40", "wc_reply_authors": "0;647;0;0", "reply_reviewers": "0;2;1;1", "reply_authors": "1;3;1;1", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 92.25, 48.53542520674976 ], "wc_strengths_avg": [ 63.0, 33.87476937190864 ], "wc_weaknesses_avg": [ 237.5, 185.3382043724391 ], "wc_questions_avg": [ 55.0, 47.843494855622744 ], "wc_limitations_avg": [ 24.5, 18.418740456393863 ], "wc_review_avg": [ 472.25, 158.77873755638694 ], "wc_reply_reviewers_avg": [ 68.75, 72.14352015254038 ], "wc_reply_authors_avg": [ 161.75, 280.1592181242659 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.19069251784911848, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1444222480776439587&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cam.ac.uk;cam.ac.uk;;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Trading-off price for data quality to achieve fair online allocation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72004", "id": "KUBFYAPdqN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7e0af0d1bc0ec2a90fc294be2e00447e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KUBFYAPdqN", "openreview": "https://openreview.net/forum?id=KUBFYAPdqN", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72004", "video": "https://nips.cc/virtual/2023/poster/72004", "author_site": "Mathieu Molina, Nicolas Gast, Patrick Loiseau, Vianney Perchet", "tldr": "", "abstract": "We consider the problem of online allocation subject to a long-term fairness penalty. Contrary to existing works, however, we do not assume that the decision-maker observes the protected attributes---which is often unrealistic in practice. Instead they can purchase data that help estimate them from sources of different quality; and hence reduce the fairness penalty at some cost. We model this problem as a multi-armed bandit problem where each arm corresponds to the choice of a data source, coupled with the fair online allocation problem. We propose an algorithm that jointly solves both problems and show that it has a regret bounded by $\\mathcal{O}(\\sqrt{T})$. A key difficulty is that the rewards received by selecting a source are correlated by the fairness penalty, which leads to a need for randomization (despite a stochastic setting). Our algorithm takes into account contextual information available before the source selection, and can adapt to many different fairness notions.", "keywords": "Fairness;Online allocation;Bandits algorithms", "primary_area": "", "supplementary_material": "/attachment/793a9fbce099c18725f0983263bf54a3b8b1374c.zip", "author": "Mathieu Molina;Nicolas Gast;Patrick Loiseau;Vianney Perchet", "authorids": "~Mathieu_Molina1;~Nicolas_Gast1;~Patrick_Loiseau1;~Vianney_Perchet3", "gender": "M;M;;M", "homepage": ";http://polaris.imag.fr/nicolas.gast/;https://patrickloiseau.github.io/;", "dblp": "311/6041;64/4367;10/7062;83/7398", "google_scholar": "oTs2F_IAAAAJ;https://scholar.google.fr/citations?user=KbEN-HoAAAAJ;https://scholar.google.fr/citations?user=q98gB0AAAAAJ;", "orcid": ";0000-0001-6884-8698;;", "linkedin": ";;;", "or_profile": "~Mathieu_Molina1;~Nicolas_Gast1;~Patrick_Loiseau1;~Vianney_Perchet1", "aff": "INRIA - CREST ;INRIA;Inria;", "aff_domain": "inria.fr;inria.fr;inria.fr;", "position": "PhD student;Assistant Professor;Research scientist;", "bibtex": "@inproceedings{\nmolina2023tradingoff,\ntitle={Trading-off price for data quality to achieve fair online allocation},\nauthor={Mathieu Molina and Nicolas Gast and Patrick Loiseau and Vianney Perchet},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KUBFYAPdqN}\n}", "github": "", "project": "", "reviewers": "uY3q;2Yz9;Gv3t;aPcw", "pdf_size": 600759, "rating": "5;5;6;7", "confidence": "2;2;3;3", "soundness": "4;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;2;4", "wc_summary": "67;111;94;71", "wc_strengths": "16;112;89;87", "wc_weaknesses": "94;199;163;20", "wc_questions": "9;2;171;1", "wc_limitations": "3;1;9;1", "wc_review": "189;425;526;180", "wc_reply_reviewers": "75;14;51;0", "wc_reply_authors": "44;0;27;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 85.75, 17.851820635442202 ], "wc_strengths_avg": [ 76.0, 36.00694377477767 ], "wc_weaknesses_avg": [ 119.0, 68.48722508614289 ], "wc_questions_avg": [ 45.75, 72.37877796702567 ], "wc_limitations_avg": [ 3.5, 3.278719262151 ], "wc_review_avg": [ 330.0, 149.85159325145662 ], "wc_reply_reviewers_avg": [ 35.0, 29.67322024991558 ], "wc_reply_authors_avg": [ 17.75, 18.73999733191016 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5194734293740550913&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "inria.fr;inria.fr;inria.fr;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "CREST", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Faster Relative Entropy Coding with Greedy Rejection Coding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72003", "id": "KXbAgvLi2l", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9e720fce64f91114c49cfd640d821da3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KXbAgvLi2l", "openreview": "https://openreview.net/forum?id=KXbAgvLi2l", "poster": "/media/PosterPDFs/NeurIPS%202023/72003.png?t=1701889302.8184652", "slides": "https://nips.cc/virtual/2023/poster/72003", "video": "https://nips.cc/virtual/2023/poster/72003", "author_site": "Gergely Flamich, Stratis Markou, Jos\u00e9 Miguel Hern\u00e1ndez-Lobato", "tldr": "", "abstract": "Relative entropy coding (REC) algorithms encode a sample from a target distribution $Q$ using a proposal distribution $P$ using as few bits as possible. Unlike entropy coding, REC does not assume discrete distributions and require quantisation.\nAs such, it can be naturally integrated into communication pipelines such as learnt compression and differentially private federated learning. Unfortunately, despite their practical benefits, REC algorithms have not seen widespread application, due to their prohibitively slow runtimes or restrictive assumptions. In this paper, we make progress towards addressing these issues. We introduce Greedy Rejection Coding (GRC), which generalises the rejection sampling-based algorithm of Harsha et al. (2007) to arbitrary probability spaces and partitioning schemes. We first show that GRC terminates almost surely and returns unbiased samples from $Q$, and then focus on two variants of GRC, namely GRCS and GRCD. We show that for continuous $Q$ and $P$ over $\\mathbb{R}$ with unimodal $dQ/dP$, the expected runtime of GRCS is upper bounded by $\\beta D_{KL}(Q||P) + \\mathcal{O}(1)$ where $\\beta \\approx 4.82$, and its expected codelength is optimal. This makes GRCS the first REC algorithm with guaranteed optimal runtime for this class of distributions, up to the multiplicative constant $\\beta$. This significantly improves upon the previous state-of-the-art method, A* coding (Flamich et al., 2022). Under the same assumptions, we experimentally observe and conjecture that the expected runtime and codelength of GRCD are upper bounded by $D_{KL}(Q||P) + \\mathcal{O}(1)$. Finally, we evaluate GRC in a compression pipeline with variational autoencoders on MNIST, and show that a modified training objective and a codelength-compression method can further improve compression efficiency.", "keywords": "Compression;Learnt Compression;Relative Entropy Coding;Information Theory", "primary_area": "", "supplementary_material": "/attachment/5dc6db09c0078e63a5a1b06e17d448234707d3e6.pdf", "author": "Gergely Flamich;Stratis Markou;Jos\u00e9 Miguel Hern\u00e1ndez-Lobato", "authorids": "~Gergely_Flamich1;~Stratis_Markou1;~Jos\u00e9_Miguel_Hern\u00e1ndez-Lobato1", "gender": "M;M;", "homepage": "https://gergely-flamich.github.io/;;http://jmhl.org", "dblp": "187/9709;300/3941;40/6058", "google_scholar": "4Iw9TH8AAAAJ;;BEBccCQAAAAJ", "orcid": "0009-0009-9831-7455;;0000-0001-7610-949X", "linkedin": "gergely-flamich-142773102;stratos-m-85884b94/;", "or_profile": "~Gergely_Flamich1;~Stratis_Markou1;~Jose_Miguel_Hernandez_Lobato1", "aff": "University of Cambridge;University of Cambridge;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nflamich2023faster,\ntitle={Faster Relative Entropy Coding with Greedy Rejection Coding},\nauthor={Gergely Flamich and Stratis Markou and Jos{\\'e} Miguel Hern{\\'a}ndez-Lobato},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KXbAgvLi2l}\n}", "github": "", "project": "", "reviewers": "BHnG;ULav;tpKF;t3AN", "pdf_size": 951150, "rating": "5;5;6;6", "confidence": "1;1;3;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "77;128;64;59", "wc_strengths": "38;138;36;83", "wc_weaknesses": "24;221;85;38", "wc_questions": "4;4;93;81", "wc_limitations": "6;8;1;1", "wc_review": "149;499;279;262", "wc_reply_reviewers": "0;0;16;9", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.0, 1.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.0, 27.358728040608906 ], "wc_strengths_avg": [ 73.75, 41.583500333665995 ], "wc_weaknesses_avg": [ 92.0, 77.82994282408282 ], "wc_questions_avg": [ 45.5, 41.716303767232304 ], "wc_limitations_avg": [ 4.0, 3.082207001484488 ], "wc_review_avg": [ 297.25, 126.74457582082162 ], "wc_reply_reviewers_avg": [ 6.25, 6.722164829874376 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8860084342181984046&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "ReContrast: Domain-Specific Anomaly Detection via Contrastive Reconstruction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72002", "id": "KYxD9YCQBH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/228b9279ecf9bbafe582406850c57115-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KYxD9YCQBH", "openreview": "https://openreview.net/forum?id=KYxD9YCQBH", "poster": "/media/PosterPDFs/NeurIPS%202023/72002.png?t=1701761416.3066676", "slides": "https://nips.cc/virtual/2023/poster/72002", "video": "https://nips.cc/virtual/2023/poster/72002", "author_site": "Jia Guo, shuai lu, Lize Jia, Weihang Zhang, Huiqi Li", "tldr": "", "abstract": "Most advanced unsupervised anomaly detection (UAD) methods rely on modeling feature representations of frozen encoder networks pre-trained on large-scale datasets, e.g. ImageNet. However, the features extracted from the encoders that are borrowed from natural image domains coincide little with the features required in the target UAD domain, such as industrial inspection and medical imaging. In this paper, we propose a novel epistemic UAD method, namely ReContrast, which optimizes the entire network to reduce biases towards the pre-trained image domain and orients the network in the target domain. We start with a feature reconstruction approach that detects anomalies from errors. Essentially, the elements of contrastive learning are elegantly embedded in feature reconstruction to prevent the network from training instability, pattern collapse, and identical shortcut, while simultaneously optimizing both the encoder and decoder on the target domain. To demonstrate our transfer ability on various image domains, we conduct extensive experiments across two popular industrial defect detection benchmarks and three medical image UAD tasks, which shows our superiority over current state-of-the-art methods.", "keywords": "Unsupervised Anomaly Detection;Contrastive Learning;Medical Anomaly Detection;Transfer Learning", "primary_area": "", "supplementary_material": "/attachment/0becdafbac28a736de71a87617d454bf8ed23b1a.pdf", "author": "Jia Guo;shuai lu;LIze JIa;Weihang Zhang;Huiqi Li", "authorids": "~Jia_Guo6;~shuai_lu2;~LIze_JIa1;~Weihang_Zhang2;~Huiqi_Li1", "gender": "M;M;M;M;F", "homepage": ";https://lushuai.com.cn;https://www.huiqililab.net/content/partners/postgraduates.html;;", "dblp": ";62/2062-3;;;40/4823", "google_scholar": "nTQvKUAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?hl=en", "orcid": ";0000-0002-3532-7498;;0000-0002-6633-1801;0000-0002-8720-3374", "linkedin": ";;;;", "or_profile": "~Jia_Guo6;~shuai_lu2;~LIze_JIa1;~Weihang_Zhang2;~Huiqi_Li1", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn", "position": "MS student;PhD student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nguo2023recontrast,\ntitle={ReContrast: Domain-Specific Anomaly Detection via Contrastive Reconstruction},\nauthor={Jia Guo and shuai lu and LIze JIa and Weihang Zhang and Huiqi Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KYxD9YCQBH}\n}", "github": "", "project": "", "reviewers": "e2Vq;Jpdb;S3Cv;xS4v;dCnY", "pdf_size": 27584165, "rating": "4;4;6;7;7", "confidence": "5;5;4;5;5", "soundness": "3;3;3;3;2", "novelty": "2;2;2;3;2", "presentation": "2;3;3;3;3", "wc_summary": "51;40;134;94;97", "wc_strengths": "20;48;66;70;54", "wc_weaknesses": "101;147;253;116;180", "wc_questions": "262;9;99;172;64", "wc_limitations": "1;8;1;14;14", "wc_review": "435;252;553;466;409", "wc_reply_reviewers": "29;22;12;34;17", "wc_reply_authors": "0;246;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 4.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 83.2, 34.03174988154444 ], "wc_strengths_avg": [ 51.6, 17.681628884240276 ], "wc_weaknesses_avg": [ 159.4, 54.098428812674406 ], "wc_questions_avg": [ 121.2, 88.04407986912011 ], "wc_limitations_avg": [ 7.6, 5.817215828899595 ], "wc_review_avg": [ 423.0, 98.31581764904364 ], "wc_reply_reviewers_avg": [ 22.8, 7.934733769950949 ], "wc_reply_authors_avg": [ 49.2, 98.4 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.14744195615489716, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6654098473605066617&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Beijing Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.bit.edu.cn/", "aff_unique_abbr": "BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MLFMF: Data Sets for Machine Learning for Mathematical Formalization", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73626", "id": "KZjSvE2mJz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9efe8db7fab57e19eed25718abedbbd2-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=KZjSvE2mJz", "openreview": "https://openreview.net/forum?id=KZjSvE2mJz", "poster": "/media/PosterPDFs/NeurIPS%202023/73626.png?t=1702122052.4101396", "slides": "https://nips.cc/virtual/2023/poster/73626", "video": "https://nips.cc/virtual/2023/poster/73626", "author_site": "Andrej Bauer, Matej Petkovi\u0107, Ljupco Todorovski", "tldr": "", "abstract": "We introduce MLFMF, a collection of data sets for benchmarking recommendation systems used to support formalization of mathematics with proof assistants. These systems help humans identify which previous entries (theorems, constructions, datatypes, and postulates) are relevant in proving a new theorem or carrying out a new construction. Each data set is derived from a library of formalized mathematics written in proof assistants Agda or Lean. The collection includes the largest Lean 4 library Mathlib, and some of the largest Agda libraries: the standard library, the library of univalent mathematics Agda-unimath, and the TypeTopology library. Each data set represents the corresponding library in two ways: as a heterogeneous network, and as a list of s-expressions representing the syntax trees of all the entries in the library. The network contains the (modular) structure of the library and the references between entries, while the s-expressions give complete and easily parsed information about every entry.\nWe report baseline results using standard graph and word embeddings, tree ensembles, and instance-based learning algorithms. The MLFMF data sets provide solid benchmarking support for further investigation of the numerous machine learning approaches to formalized mathematics. The methodology used to extract the networks and the s-expressions readily applies to other libraries, and is applicable to other proof assistants. With more than $250\\,000$ entries in total, this is currently the largest collection of formalized mathematical knowledge in machine learnable format.", "keywords": "recommendation system;formalized mathematics;proof assistant;Lean;Agda;link prediction;graph learning", "primary_area": "", "supplementary_material": "/attachment/7d06311ab086905e96b5120a6a68b11c33df8a67.pdf", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nbauer2023mlfmf,\ntitle={{MLFMF}: Data Sets for Machine Learning for Mathematical Formalization},\nauthor={Andrej Bauer and Matej Petkovi{\\'c} and Ljupco Todorovski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=KZjSvE2mJz}\n}", "github": "", "project": "", "reviewers": "XBPs;yGPU;zApR;F1US;7Q4p", "pdf_size": 291605, "rating": "5;5;6;8;8", "confidence": "4;2;3;5;3", "wc_summary_and_contributions": "108;48;133;50;125", "wc_strengths": "38;34;91;152;68", "wc_improvement": "99;28;402;79;23", "wc_limitations": "175;117;24;27;14", "wc_correctness": "1;5;73;16;8", "wc_clarity": "153;20;100;6;5", "wc_relation_to_prior_work": "33;11;79;9;15", "wc_documentation": "45;18;58;7;13", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "653;282;961;347;272", "wc_reply_reviewers": "0;0;417;0;0", "wc_reply_authors": "160;151;637;55;46", "reply_reviewers": "0;0;2;0;0", "reply_authors": "1;1;3;1;1", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "wc_summary_and_contributions_avg": [ 92.8, 36.66824239038463 ], "wc_strengths_avg": [ 76.6, 43.060887125093 ], "wc_improvement_avg": [ 126.2, 140.94452809527584 ], "wc_limitations_avg": [ 71.4, 63.75766620571993 ], "wc_correctness_avg": [ 20.6, 26.657831869827675 ], "wc_clarity_avg": [ 56.8, 59.56307581043813 ], "wc_relation_to_prior_work_avg": [ 29.4, 26.211447880649402 ], "wc_documentation_avg": [ 28.2, 19.772708463940898 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 503.0, 267.8813170043779 ], "wc_reply_reviewers_avg": [ 83.4, 166.8 ], "wc_reply_authors_avg": [ 209.8, 218.73765108000956 ], "reply_reviewers_avg": [ 0.4, 0.8000000000000002 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": 0.4626519455729922, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4684739231904714086&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "", "author_num": 1 }, { "title": "Prediction and Control in Continual Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72001", "id": "KakzVASqul", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c94bbbef466ab1b2cfa100e41413b3a8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KakzVASqul", "openreview": "https://openreview.net/forum?id=KakzVASqul", "poster": "/media/PosterPDFs/NeurIPS%202023/72001.png?t=1702434285.3709335", "slides": "https://nips.cc/virtual/2023/poster/72001", "video": "https://nips.cc/virtual/2023/poster/72001", "author_site": "Nishanth Anand, Doina Precup", "tldr": "", "abstract": "Temporal difference (TD) learning is often used to update the estimate of the value function which is used by RL agents to extract useful policies. In this paper, we focus on value function estimation in continual reinforcement learning. We propose to decompose the value function into two components which update at different timescales: a _permanent_ value function, which holds general knowledge that persists over time, and a _transient_ value function, which allows quick adaptation to new situations. We establish theoretical results showing that our approach is well suited for continual learning and draw connections to the complementary learning systems (CLS) theory from neuroscience. Empirically, this approach improves performance significantly on both prediction and control problems.", "keywords": "reinforcement learning;continual reinforcement learning;lifelong learning;never-ending learning;prediction;control;multi-task learning;complementary learning systems", "primary_area": "", "supplementary_material": "/attachment/6dd83987e62e8d7bc5117802863e2cbd3777178f.pdf", "author": "Nishanth Anand;Doina Precup", "authorids": "~Nishanth_Anand1;~Doina_Precup1", "gender": "M;F", "homepage": "https://itsnva7.com;http://cs.mcgill.ca/~dprecup/", "dblp": "241/7250;p/DoinaPrecup", "google_scholar": "pRNasKQAAAAJ;https://scholar.google.com.tw/citations?user=j54VcVEAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Nishanth_Anand1;~Doina_Precup1", "aff": "McGill University;McGill University", "aff_domain": "mcgill.ca;mcgill.ca", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nanand2023prediction,\ntitle={Prediction and Control in Continual Reinforcement Learning},\nauthor={Nishanth Anand and Doina Precup},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KakzVASqul}\n}", "github": "", "project": "", "reviewers": "mFKc;27Kd;mEyq;zZwF", "pdf_size": 5421155, "rating": "4;6;7;7", "confidence": "3;4;3;3", "soundness": "2;3;4;3", "novelty": "2;3;2;3", "presentation": "3;3;3;4", "wc_summary": "106;53;64;82", "wc_strengths": "57;36;61;76", "wc_weaknesses": "72;45;30;206", "wc_questions": "18;168;101;264", "wc_limitations": "9;3;1;43", "wc_review": "262;305;257;671", "wc_reply_reviewers": "0;13;33;16", "wc_reply_authors": "0;0;137;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 76.25, 20.054612935681405 ], "wc_strengths_avg": [ 57.5, 14.291605927956452 ], "wc_weaknesses_avg": [ 88.25, 69.62892717829278 ], "wc_questions_avg": [ 137.75, 90.20081762378875 ], "wc_limitations_avg": [ 14.0, 17.0 ], "wc_review_avg": [ 373.75, 172.62875629512018 ], "wc_reply_reviewers_avg": [ 15.5, 11.757976016304847 ], "wc_reply_authors_avg": [ 34.25, 59.322740159234044 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16795090028742018532&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mcgill.ca;mcgill.ca", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "McGill University", "aff_unique_dep": "", "aff_unique_url": "https://www.mcgill.ca", "aff_unique_abbr": "McGill", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Blockwise Parallel Transformers for Large Context Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/72000", "id": "KbqQMoqfLQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1bfd87d2d92f0556819467dc08034f76-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KbqQMoqfLQ", "openreview": "https://openreview.net/forum?id=KbqQMoqfLQ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/72000", "video": "https://nips.cc/virtual/2023/poster/72000", "author_site": "Hao Liu, Pieter Abbeel", "tldr": "", "abstract": "Transformers have emerged as the cornerstone of state-of-the-art natural language processing models, showcasing exceptional performance across a wide range of AI applications. However, the memory demands posed by the self-attention mechanism and the large feedforward network in Transformers limit their ability to handle long sequences, thereby creating challenges for tasks involving multiple long sequences or long-term dependencies. We present a distinct approach, Blockwise Parallel Transformer (BPT), that leverages blockwise computation of self-attention and feedforward network fusion to minimize memory costs. By processing longer input sequences while maintaining memory efficiency, BPT enables training sequences 32 times longer than vanilla Transformers and up to 4 times longer than previous memory-efficient methods. Extensive experiments on language modeling and reinforcement learning tasks demonstrate the effectiveness of BPT in reducing memory requirements and improving performance.", "keywords": "Language Model;Long Context Modeling;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Hao Liu;Pieter Abbeel", "authorids": "~Hao_Liu1;~Pieter_Abbeel2", "gender": "M;M", "homepage": "https://people.eecs.berkeley.edu/~pabbeel/;https://haoliu.ai", "dblp": ";09/3214-55", "google_scholar": "https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;wtK4Yh4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Pieter_Abbeel2;~Hao_Liu10", "aff": "Covariant;University of California, Berkeley", "aff_domain": "covariant.ai;berkeley.edu", "position": "Founder;PhD student", "bibtex": "@inproceedings{\nliu2023blockwise,\ntitle={Blockwise Parallel Transformers for Large Context Models},\nauthor={Hao Liu and Pieter Abbeel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KbqQMoqfLQ}\n}", "github": "", "project": "", "reviewers": "t1yD;KeWq;DQeN;wLgp", "pdf_size": 739249, "rating": "6;7;7;7", "confidence": "4;5;3;4", "soundness": "3;3;3;3", "novelty": "2;2;3;2", "presentation": "3;4;2;3", "wc_summary": "73;131;106;285", "wc_strengths": "31;86;63;206", "wc_weaknesses": "19;25;42;103", "wc_questions": "96;64;25;38", "wc_limitations": "1;7;73;66", "wc_review": "220;313;309;698", "wc_reply_reviewers": "16;0;0;0", "wc_reply_authors": "29;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 148.75, 81.30920919551487 ], "wc_strengths_avg": [ 96.5, 66.16834590648311 ], "wc_weaknesses_avg": [ 47.25, 33.27442711753277 ], "wc_questions_avg": [ 55.75, 27.151197027018902 ], "wc_limitations_avg": [ 36.75, 32.91181398829302 ], "wc_review_avg": [ 385.0, 184.4952573916197 ], "wc_reply_reviewers_avg": [ 4.0, 6.928203230275509 ], "wc_reply_authors_avg": [ 7.25, 12.55736835487436 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13105565405453272569&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "covariant.ai;berkeley.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Covariant;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": ";https://www.berkeley.edu", "aff_unique_abbr": ";UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "1", "aff_country_unique": ";United States" }, { "title": "Layer-Neighbor Sampling --- Defusing Neighborhood Explosion in GNNs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71999", "id": "Kd5W4JRsfV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/51f9036d5e7ae822da8f6d4adda1fb39-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Kd5W4JRsfV", "openreview": "https://openreview.net/forum?id=Kd5W4JRsfV", "poster": "/media/PosterPDFs/NeurIPS%202023/71999.png?t=1700099359.1271658", "slides": "https://nips.cc/virtual/2023/poster/71999", "video": "https://nips.cc/virtual/2023/poster/71999", "author_site": "Muhammed Fatih Balin, \u00dcmit \u00c7ataly\u00fcrek", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have received significant attention recently, but training them at a large scale remains a challenge.\nMini-batch training coupled with sampling is used to alleviate this challenge.\nHowever, existing approaches either suffer from the neighborhood explosion phenomenon or have suboptimal performance. \nTo address these issues, we propose a new sampling algorithm called LAyer-neighBOR sampling (LABOR). \nIt is designed to be a direct replacement for Neighbor Sampling (NS) with the same fanout hyperparameter while sampling up to 7 times fewer vertices, without sacrificing quality.\nBy design, the variance of the estimator of each vertex matches NS from the point of view of a single vertex.\nMoreover, under the same vertex sampling budget constraints, LABOR converges \nfaster than existing layer sampling approaches and can use up to 112 times larger batch sizes compared to NS.", "keywords": "Graph Neural Networks;Graph Sampling;GNN;Layer Sampling;Minibatch Training", "primary_area": "", "supplementary_material": "", "author": "Muhammed Fatih Balin;Umit Catalyurek", "authorids": "~Muhammed_Fatih_Balin1;~Umit_Catalyurek1", "gender": "M;M", "homepage": "http://mfbal.in;https://www.cc.gatech.edu/~umit/", "dblp": "234/8533;https://dblp.uni-trier.de/pid/c/UmitVCatalyurek.html", "google_scholar": "https://scholar.google.com.tr/citations?user=xfzbywYAAAAJ;OLDMURQAAAAJ", "orcid": "0000-0001-9935-2687;", "linkedin": "mfbalin/;catalyurek/", "or_profile": "~Muhammed_Fatih_Balin1;~Umit_Catalyurek1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nbalin2023layerneighbor,\ntitle={Layer-Neighbor Sampling --- Defusing Neighborhood Explosion in {GNN}s},\nauthor={Muhammed Fatih Balin and Umit Catalyurek},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Kd5W4JRsfV}\n}", "github": "", "project": "", "reviewers": "QdGo;99pb;rBrD;qFRg;gLfM", "pdf_size": 1354442, "rating": "4;5;6;6;6", "confidence": "2;4;3;5;4", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;2;3", "wc_summary": "35;90;117;81;157", "wc_strengths": "26;96;55;42;51", "wc_weaknesses": "56;75;13;144;61", "wc_questions": "35;69;18;75;28", "wc_limitations": "10;1;310;7;4", "wc_review": "162;331;513;349;301", "wc_reply_reviewers": "48;103;9;460;20", "wc_reply_authors": "0;0;0;1525;0", "reply_reviewers": "1;1;1;3;1", "reply_authors": "1;1;1;3;1", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 96.0, 40.3583944180142 ], "wc_strengths_avg": [ 54.0, 23.246505113672463 ], "wc_weaknesses_avg": [ 69.8, 42.48952812164428 ], "wc_questions_avg": [ 45.0, 22.777181564012697 ], "wc_limitations_avg": [ 66.4, 121.83694021108705 ], "wc_review_avg": [ 331.2, 112.16844476054752 ], "wc_reply_reviewers_avg": [ 128.0, 169.15909671075926 ], "wc_reply_authors_avg": [ 305.0, 610.0 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6864064729836441, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16031659804437972824&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "gatech.edu;gatech.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "KexMPvrFgJ", "title": "Learning Depth-regularized Radiance Fields from Asynchronous RGB-D Sequences", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recently it is shown that learning radiance fields with depth rendering and depth supervision can effectively promote the view synthesis quality and convergence. But this paradigm requires input RGB-D sequences to be synchronized, hindering its usage in the UAV city modeling scenario. To this end, we propose to jointly learn large-scale depth-regularized radiance fields and calibrate the mismatch between RGB-D frames. Although this joint learning problem can be simply addressed by adding new variables, we exploit the prior that RGB-D frames are actually sampled from the same physical trajectory. As such, we propose a novel time-pose function, which is an implicit network that maps timestamps to SE(3) elements. Our algorithm is designed in an alternative way consisting of three steps: (1) time-pose function fitting; (2) radiance field bootstrapping; (3) joint pose error compensation and radiance field refinement. In order to systematically evaluate under this new problem setting, we propose a large synthetic dataset with diverse controlled mismatch and ground truth. Through extensive experiments, we demonstrate that our method outperforms strong baselines. We also show qualitatively improved results on a real-world asynchronous RGB-D sequence captured by drones. Codes, data, and models will be made publicly available.", "keywords": "Neural Radiance Fields", "primary_area": "", "supplementary_material": "/attachment/83f66bd75a230b2fdd81c2647095f31cb5942590.zip", "author": "Zirui Wu;Yuantao Chen;Runyi Yang;Zhenxin Zhu;Chao Hou;Yongliang Shi;Hao Zhao;Guyue Zhou", "authorids": "~Zirui_Wu2;~Yuantao_Chen2;~Runyi_Yang1;~Zhenxin_Zhu1;~Chao_Hou3;~Yongliang_Shi1;~Hao_Zhao1;~Guyue_Zhou2", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://wuzirui.github.io/;https://runyiyang.github.io/;;;https://sites.google.com/view/fromandto;https://air.tsinghua.edu.cn/en/info/1046/1196.htm;;https://tao-11-chen.github.io/", "dblp": ";330/3750;329/6347;;08/3737-2.html;133/4199;;", "google_scholar": "Vi7WbO8AAAAJ;l5PcSqwAAAAJ;NkJw6owAAAAJ;;ygQznUQAAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;rVSzbhUAAAAJ", "orcid": "0000-0001-7481-0768;0009-0005-5967-6982;;0000-0003-3086-729X;;;;", "linkedin": ";runyiyang/;;;;;;", "or_profile": "~Zirui_Wu2;~Runyi_Yang1;~Zhenxin_Zhu1;~Yongliang_Shi1;~Hao_Zhao1;~Guyue_Zhou2;~CHAO_HOU2;~yuantao_Chen1", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Jingwei Hirain Technologies Co., Inc.;Tsinghua University;Peking University;Tsinghua University;University of Hong Kong;Xi'an University of Architecture and Technology", "aff_domain": "bit.edu.cn;bit.edu.cn;hirain.com;tsinghua.edu.cn;pku.edu.cn;tsinghua.edu.cn;hku.hk;xauat.edu.cn", "position": "Undergrad student;Undergrad student;Intern;Postdoc;Postdoc;Associate Professor;MS student;Undergrad student", "bibtex": "@misc{\nwu2023learning,\ntitle={Learning Depth-regularized Radiance Fields from Asynchronous {RGB}-D Sequences},\nauthor={Zirui Wu and Yuantao Chen and Runyi Yang and Zhenxin Zhu and Chao Hou and Yongliang Shi and Hao Zhao and Guyue Zhou},\nyear={2023},\nurl={https://openreview.net/forum?id=KexMPvrFgJ}\n}", "github": "", "project": "", "reviewers": "ScBX;GDqd;9aLD;2eof;JhZu", "site": "https://openreview.net/forum?id=KexMPvrFgJ", "pdf_size": 2280946, "rating": "3;5;6;6;7", "confidence": "4;4;4;4;4", "soundness": "1;2;4;3;3", "novelty": "2;2;4;3;3", "presentation": "2;2;4;3;4", "wc_summary": "38;74;120;90;72", "wc_strengths": "13;98;113;93;102", "wc_weaknesses": "207;247;163;215;31", "wc_questions": "3;136;10;61;4", "wc_limitations": "1;66;1;5;1", "wc_review": "262;621;407;464;210", "wc_reply_reviewers": "0;51;31;38;49", "wc_reply_authors": "0;33;0;0;7", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;1;1;2", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6, 1.019803902718557 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 78.8, 26.67133292507144 ], "wc_strengths_avg": [ 83.8, 36.007776937767204 ], "wc_weaknesses_avg": [ 172.6, 75.70891625165426 ], "wc_questions_avg": [ 42.8, 51.34744394806815 ], "wc_limitations_avg": [ 14.8, 25.64683216305671 ], "wc_review_avg": [ 392.8, 146.88144879459762 ], "wc_reply_reviewers_avg": [ 33.8, 18.410866356584094 ], "wc_reply_authors_avg": [ 8.0, 12.790621564255586 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UmRnXyUofdYJ:scholar.google.com/&scioq=Learning+Depth-regularized+Radiance+Fields+from+Asynchronous+RGB-D+Sequences&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;3;2;4;5", "aff_unique_norm": "Beijing Institute of Technology;Beijing Jingwei Hirain Technologies Co., Inc.;Tsinghua University;Peking University;University of Hong Kong;Xi'an University of Architecture and Technology", "aff_unique_dep": ";;;;;", "aff_unique_url": "http://www.bit.edu.cn/;;https://www.tsinghua.edu.cn;http://www.pku.edu.cn;https://www.hku.hk;http://www.xauat.edu.cn", "aff_unique_abbr": "BIT;;THU;Peking U;HKU;XAUAT", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;Xi'an", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Wasserstein distributional robustness of neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71998", "id": "KfOUAlraMP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/53be3798fcc46e68ca0819c29a004652-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KfOUAlraMP", "openreview": "https://openreview.net/forum?id=KfOUAlraMP", "poster": "/media/PosterPDFs/NeurIPS%202023/71998.png?t=1699488760.7723272", "slides": "https://nips.cc/virtual/2023/poster/71998", "video": "https://nips.cc/virtual/2023/poster/71998", "author_site": "Xingjian Bai, Guangyi He, Yifan Jiang, Jan Obloj", "tldr": "", "abstract": "Deep neural networks are known to be vulnerable to adversarial attacks (AA). For an image recognition task, this means that a small perturbation of the original can result in the image being misclassified. Design of such attacks as well as methods of adversarial training against them are subject of intense research. We re-cast the problem using techniques of Wasserstein distributionally robust optimization (DRO) and obtain novel contributions leveraging recent insights from DRO sensitivity analysis. We consider a set of distributional threat models. Unlike the traditional pointwise attacks, which assume a uniform bound on perturbation of each input data point, distributional threat models allow attackers to perturb inputs in a non-uniform way. We link these more general attacks with questions of out-of-sample performance and Knightian uncertainty. To evaluate the distributional robustness of neural networks, we propose a first-order AA algorithm and its multistep version. Our attack algorithms include Fast Gradient Sign Method (FGSM) and Projected Gradient Descent (PGD) as special cases. Furthermore, we provide a new asymptotic estimate of the adversarial accuracy against distributional threat models. The bound is fast to compute and first-order accurate, offering new insights even for the pointwise AA. It also naturally yields out-of-sample performance guarantees. We conduct numerical experiments on CIFAR-10, CIFAR-100, ImageNet datasets using DNNs on RobustBench to illustrate our theoretical results. Our code is available at https://github.com/JanObloj/W-DRO-Adversarial-Methods.", "keywords": "adversarial attack;adversarial robustness of DNN;adversarial training;Wasserstein distance;distributionally robust optimization;sensitivity analysis;asymptotic bounds", "primary_area": "", "supplementary_material": "/attachment/7b4ad040e119ee58b357fabfb75bb21a6032336d.zip", "author": "Xingjian Bai;Guangyi He;Yifan Jiang;Jan Obloj", "authorids": "~Xingjian_Bai1;~Guangyi_He1;~Yifan_Jiang5;~Jan_Obloj1", "gender": "M;M;M;", "homepage": "https://xingjianbai.com/;https://github.com/Guangyi-Mira;https://yifanjiang233.github.io/;http://www.maths.ox.ac.uk/people/jan.obloj", "dblp": "188/9534;341/6801;;02/9640", "google_scholar": ";;q9POUTYAAAAJ;eWreIb0AAAAJ", "orcid": ";;;0000-0002-5686-5498", "linkedin": ";;;", "or_profile": "~Xingjian_Bai1;~Guangyi_He1;~Yifan_Jiang5;~Jan_Obloj1", "aff": "University of Oxford;University of Oxford;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk;ox.ac.uk;ox.ac.uk", "position": "Undergrad student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nbai2023wasserstein,\ntitle={Wasserstein distributional robustness of neural networks},\nauthor={Xingjian Bai and Guangyi He and Yifan Jiang and Jan Obloj},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KfOUAlraMP}\n}", "github": "", "project": "", "reviewers": "5QWe;fHjF;1Lzk;Q9Xf", "pdf_size": 829492, "rating": "4;6;7;8", "confidence": "4;4;4;3", "soundness": "2;3;4;4", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "74;106;97;122", "wc_strengths": "71;135;196;82", "wc_weaknesses": "179;165;68;36", "wc_questions": "105;42;83;185", "wc_limitations": "65;39;4;6", "wc_review": "494;487;448;431", "wc_reply_reviewers": "0;50;10;16", "wc_reply_authors": "0;56;11;22", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 99.75, 17.354754391808605 ], "wc_strengths_avg": [ 121.0, 49.60342730094363 ], "wc_weaknesses_avg": [ 112.0, 61.25765258316711 ], "wc_questions_avg": [ 103.75, 52.07386580617959 ], "wc_limitations_avg": [ 28.5, 25.243811122728676 ], "wc_review_avg": [ 465.0, 26.315394733881533 ], "wc_reply_reviewers_avg": [ 19.0, 18.788294228055936 ], "wc_reply_authors_avg": [ 22.25, 20.980645843252777 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6831300510639732, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15650352752304211376&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ox.ac.uk;ox.ac.uk;ox.ac.uk;ox.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Towards Optimal Effective Resistance Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71997", "id": "KffE8iXAw7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b8e2046160a568145af6d42eeef199f4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KffE8iXAw7", "openreview": "https://openreview.net/forum?id=KffE8iXAw7", "poster": "/media/PosterPDFs/NeurIPS%202023/71997.png?t=1701830134.9469612", "slides": "https://nips.cc/virtual/2023/poster/71997", "video": "https://nips.cc/virtual/2023/poster/71997", "author_site": "Rajat Vadiraj Dwaraknath, Ishani Karmarkar, Aaron Sidford", "tldr": "", "abstract": "We provide new algorithms and conditional hardness for the problem of estimating effective resistances in $n$-node $m$-edge undirected, expander graphs. We provide an $\\widetilde{O}(m\\epsilon^{-1})$-time algorithm that produces with high probability, an $\\widetilde{O}(n\\epsilon^{-1})$-bit sketch from which the effective resistance between any pair of nodes can be estimated, to $(1 \\pm \\epsilon)$-multiplicative accuracy, in $\\widetilde{O}(1)$-time. Consequently, we obtain an $\\widetilde{O}(m\\epsilon^{-1})$-time algorithm for estimating the effective resistance of all edges in such graphs, improving (for sparse graphs) on the previous fastest runtimes of $\\widetilde{O}(m\\epsilon^{-3/2})$ [Chu et. al. 2018] and $\\widetilde{O}(n^2\\epsilon^{-1})$ [Jambulapati, Sidford, 2018] for general graphs and $\\widetilde{O}(m + n\\epsilon^{-2})$ for expanders [Li, Sachdeva 2022]. \nWe complement this result by showing a conditional lower bound that a broad set of algorithms for computing such estimates of the effective resistances between all pairs of nodes require $\\widetilde{\\Omega}(n^2 \\epsilon^{-1/2})$-time, improving upon the previous best such lower bound of $\\widetilde{\\Omega}(n^2 \\epsilon^{-1/13})$ [Musco et. al. 2017]. Further, we leverage the tools underlying these results to obtain improved algorithms and conditional hardness for more general problems of sketching the pseudoinverse of positive semidefinite matrices and estimating functions of their eigenvalues.", "keywords": "effective resistances;spectral sketch;fine-grained complexity;triangle detection;numerical linear algebra", "primary_area": "", "supplementary_material": "/attachment/874e1ee40a792835f338f00bb8d43b5489bddf8e.pdf", "author": "Rajat Vadiraj Dwaraknath;Ishani Karmarkar;Aaron Sidford", "authorids": "~Rajat_Vadiraj_Dwaraknath1;~Ishani_Karmarkar1;~Aaron_Sidford1", "gender": "M;F;", "homepage": "https://eigentales.com;https://ishanikarmarkar.github.io/;", "dblp": "289/1785;350/0555;", "google_scholar": ";https://scholar.google.ch/citations?user=yg8x6wsAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Rajat_Vadiraj_Dwaraknath1;~Ishani_Karmarkar1;~Aaron_Sidford1", "aff": "Stanford University;Stanford University;", "aff_domain": "stanford.edu;stanford.edu;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\ndwaraknath2023towards,\ntitle={Towards Optimal Effective Resistance Estimation},\nauthor={Rajat Vadiraj Dwaraknath and Ishani Karmarkar and Aaron Sidford},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KffE8iXAw7}\n}", "github": "", "project": "", "reviewers": "UUSU;AnJp;Craf;T2X2;9w9Q;btut", "pdf_size": 481377, "rating": "6;6;6;6;7;8", "confidence": "3;2;4;3;4;4", "soundness": "4;3;4;4;3;4", "novelty": "2;3;3;3;4;4", "presentation": "3;3;4;3;3;4", "wc_summary": "142;322;306;90;47;71", "wc_strengths": "56;48;245;82;87;58", "wc_weaknesses": "100;234;152;77;32;15", "wc_questions": "26;217;186;24;37;27", "wc_limitations": "6;24;10;4;1;12", "wc_review": "330;845;899;277;204;183", "wc_reply_reviewers": "176;55;13;11;6;36", "wc_reply_authors": "150;27;0;0;0;46", "reply_reviewers": "2;1;1;1;1;1", "reply_authors": "2;2;1;1;1;2", "rating_avg": [ 6.5, 0.7637626158259734 ], "confidence_avg": [ 3.3333333333333335, 0.7453559924999298 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.1666666666666665, 0.6871842709362768 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 163.0, 110.61946784660766 ], "wc_strengths_avg": [ 96.0, 68.10041605355042 ], "wc_weaknesses_avg": [ 101.66666666666667, 74.18595254868373 ], "wc_questions_avg": [ 86.16666666666667, 82.14502757657067 ], "wc_limitations_avg": [ 9.5, 7.433034373659253 ], "wc_review_avg": [ 456.3333333333333, 298.1943810484847 ], "wc_reply_reviewers_avg": [ 49.5, 59.05011995471869 ], "wc_reply_authors_avg": [ 37.166666666666664, 53.317966536202 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.3726779962499649 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5855400437691199, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2788492781449046083&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "stanford.edu;stanford.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Demystifying Oversmoothing in Attention-Based Graph Neural Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71996", "id": "Kg65qieiuB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6e4cdfdd909ea4e34bfc85a12774cba0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Kg65qieiuB", "openreview": "https://openreview.net/forum?id=Kg65qieiuB", "poster": "/media/PosterPDFs/NeurIPS%202023/71996.png?t=1701919194.8628604", "slides": "https://nips.cc/virtual/2023/poster/71996", "video": "https://nips.cc/virtual/2023/poster/71996", "author_site": "Xinyi Wu, Amir Ajorlou, Zihui Wu, Ali Jadbabaie", "tldr": "", "abstract": "Oversmoothing in Graph Neural Networks (GNNs) refers to the phenomenon where increasing network depth leads to homogeneous node representations. While previous work has established that Graph Convolutional Networks (GCNs) exponentially lose expressive power, it remains controversial whether the graph attention mechanism can mitigate oversmoothing. In this work, we provide a definitive answer to this question through a rigorous mathematical analysis, by viewing attention-based GNNs as nonlinear time-varying dynamical systems and incorporating tools and techniques from the theory of products of inhomogeneous matrices and the joint spectral radius. We establish that, contrary to popular belief, the graph attention mechanism cannot prevent oversmoothing and loses expressive power exponentially. The proposed framework extends the existing results on oversmoothing for symmetric GCNs to a significantly broader class of GNN models, including random walk GCNs, Graph Attention Networks (GATs) and (graph) transformers. In particular, our analysis accounts for asymmetric, state-dependent and time-varying aggregation operators and a wide range of common nonlinear activation functions, such as ReLU, LeakyReLU, GELU and SiLU.", "keywords": "graph neural networks;attention mechanisms;oversmoothing;dynamical systems;theory", "primary_area": "", "supplementary_material": "/attachment/64a46171b5327e0f42b6e4233c23c2981b22996d.pdf", "author": "Xinyi Wu;Amir Ajorlou;Zihui Wu;Ali Jadbabaie", "authorids": "~Xinyi_Wu3;~Amir_Ajorlou1;~Zihui_Wu2;~Ali_Jadbabaie1", "gender": "F;M;M;M", "homepage": "https://xinyiwu98.github.io;http://www.mit.edu/~ajorlou/;https://zihuiwu.github.io/;http://www.mit.edu/~jadbabai/www", "dblp": "98/7827;;;83/3158", "google_scholar": ";_2r1jtYAAAAJ;SU7yjxAAAAAJ;ZBc_WwYAAAAJ", "orcid": ";;0000-0002-7622-3548;", "linkedin": ";;zihui-ray-wu/;", "or_profile": "~Xinyi_Wu3;~Amir_Ajorlou1;~Zihui_Wu2;~Ali_Jadbabaie1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Deparment of Computing + Mathematical Sciences, California Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;cms.caltech.edu;mit.edu", "position": "PhD student;Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nwu2023demystifying,\ntitle={Demystifying Oversmoothing in Attention-Based Graph Neural Networks},\nauthor={Xinyi Wu and Amir Ajorlou and Zihui Wu and Ali Jadbabaie},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Kg65qieiuB}\n}", "github": "", "project": "", "reviewers": "Ac2q;UfCo;DswA;QRK7", "pdf_size": 563430, "rating": "6;7;7;8", "confidence": "5;4;3;4", "soundness": "3;4;4;3", "novelty": "3;4;3;3", "presentation": "3;3;3;4", "wc_summary": "40;43;79;124", "wc_strengths": "35;27;74;132", "wc_weaknesses": "127;18;49;79", "wc_questions": "79;27;47;91", "wc_limitations": "1;1;10;1", "wc_review": "282;116;259;427", "wc_reply_reviewers": "24;0;0;8", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.5, 33.974254958718376 ], "wc_strengths_avg": [ 67.0, 41.527099585692234 ], "wc_weaknesses_avg": [ 68.25, 40.195615432531945 ], "wc_questions_avg": [ 61.0, 25.37715508089904 ], "wc_limitations_avg": [ 3.25, 3.897114317029974 ], "wc_review_avg": [ 271.0, 110.2565190816398 ], "wc_reply_reviewers_avg": [ 8.0, 9.797958971132712 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18248665846921657884&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mit.edu;mit.edu;cms.caltech.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;0;2;0", "aff_unique_norm": "Massachusetts Institute of Technology;;California Institute of Technology", "aff_unique_dep": ";;Mathematical Sciences", "aff_unique_url": "https://web.mit.edu;;https://www.caltech.edu", "aff_unique_abbr": "MIT;;Caltech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pasadena", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "title": "VoxDet: Voxel Learning for Novel Instance Detection", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71995", "id": "KgqucdSwIe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/21f1c5bbf2519321c1bee9bfa9edcd46-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KgqucdSwIe", "openreview": "https://openreview.net/forum?id=KgqucdSwIe", "poster": "/media/PosterPDFs/NeurIPS%202023/71995.png?t=1701895735.2480078", "slides": "https://nips.cc/virtual/2023/poster/71995", "video": "https://nips.cc/virtual/2023/poster/71995", "author_site": "Bowen Li, Jiashun Wang, Yaoyu Hu, Chen Wang, Sebastian Scherer", "tldr": "", "abstract": "Detecting unseen instances based on multi-view templates is a challenging problem due to its open-world nature. Traditional methodologies, which primarily rely on $2 \\mathrm{D}$ representations and matching techniques, are often inadequate in handling pose variations and occlusions. To solve this, we introduce VoxDet, a pioneer 3D geometry-aware framework that fully utilizes the strong 3D voxel representation and reliable voxel matching mechanism. VoxDet first ingeniously proposes template voxel aggregation (TVA) module, effectively transforming multi-view 2D images into 3D voxel features. By leveraging associated camera poses, these features are aggregated into a compact 3D template voxel. In novel instance detection, this voxel representation demonstrates heightened resilience to occlusion and pose variations. We also discover that a $3 \\mathrm{D}$ reconstruction objective helps to pre-train the 2D-3D mapping in TVA. Second, to quickly align with the template voxel, VoxDet incorporates a Query Voxel Matching (QVM) module. The 2D queries are first converted into their voxel representation with the learned 2D-3D mapping. We find that since the 3D voxel representations encode the geometry, we can first estimate the relative rotation and then compare the aligned voxels, leading to improved accuracy and efficiency. In addition to method, we also introduce the first instance detection benchmark, RoboTools, where 20 unique instances are video-recorded with camera extrinsic. RoboTools also provides 24 challenging cluttered scenarios with more than $9 \\mathrm{k}$ box annotations. Exhaustive experiments are conducted on the demanding LineMod-Occlusion, YCB-video, and RoboTools benchmarks, where VoxDet outperforms various $2 \\mathrm{D}$ baselines remarkably with faster speed. To the best of our knowledge, VoxDet is the first to incorporate implicit 3D knowledge for 2D novel instance detection tasks.", "keywords": "Unseen object detection;instance perception;voxel representation", "primary_area": "", "supplementary_material": "/attachment/75fa65594031b88f23df056580581553ebb72955.zip", "author": "Bowen Li;Jiashun Wang;Yaoyu Hu;Chen Wang;Sebastian Scherer", "authorids": "~Bowen_Li7;~Jiashun_Wang1;~Yaoyu_Hu1;~Chen_Wang2;~Sebastian_Scherer1", "gender": "M;M;M;M;M", "homepage": "https://jaraxxus-me.github.io/;https://jiashunwang.github.io/;https://huyaoyu.com;https://sairlab.org/chenw/;https://theairlab.org", "dblp": "75/10470-7;260/6495;223/3385;82/4206-33;253/5743", "google_scholar": "XIAMHVMAAAAJ;gdO9Gb0AAAAJ;;vZfmKl4AAAAJ;gxoPfIYAAAAJ", "orcid": ";;;0000-0002-4630-0805;0000-0002-8373-4688", "linkedin": ";;;wang-chen/;sebastian-scherer-a026961a/", "or_profile": "~Bowen_Li7;~Jiashun_Wang1;~Yaoyu_Hu1;~Chen_Wang2;~Sebastian_Scherer1", "aff": "School of Computer Science, Carnegie Mellon University;Boston Dynamics AI Institute;Carnegie Mellon University;University at Buffalo;Near Earth Autonomy Inc.", "aff_domain": "cs.cmu.edu;theaiinstitute.com;andrew.cmu.edu;buffalo.edu;nearearth.aero", "position": "PhD student;Intern;Researcher;Assistant Professor;Senior Scientist", "bibtex": "@inproceedings{\nli2023voxdet,\ntitle={VoxDet: Voxel Learning for Novel Instance Detection},\nauthor={Bowen Li and Jiashun Wang and Yaoyu Hu and Chen Wang and Sebastian Scherer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KgqucdSwIe}\n}", "github": "", "project": "", "reviewers": "6hBZ;NFHY;dtGe;W6QG;7Hv8", "pdf_size": 16654043, "rating": "6;7;7;7;7", "confidence": "3;3;4;3;3", "soundness": "3;4;4;3;3", "novelty": "3;3;3;3;3", "presentation": "3;4;4;3;3", "wc_summary": "110;127;141;105;75", "wc_strengths": "45;122;112;118;53", "wc_weaknesses": "96;9;209;182;76", "wc_questions": "6;177;18;2;51", "wc_limitations": "1;1;9;61;28", "wc_review": "258;436;489;468;283", "wc_reply_reviewers": "55;13;35;48;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 111.6, 22.303362975121036 ], "wc_strengths_avg": [ 90.0, 33.72239611889997 ], "wc_weaknesses_avg": [ 114.4, 72.72028602804035 ], "wc_questions_avg": [ 50.8, 65.40764481312563 ], "wc_limitations_avg": [ 20.0, 22.7508241608958 ], "wc_review_avg": [ 386.8, 96.77065670956253 ], "wc_reply_reviewers_avg": [ 30.2, 20.79807683416907 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.25000000000000006, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4004908199636603279&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cs.cmu.edu;theaiinstitute.com;andrew.cmu.edu;buffalo.edu;nearearth.aero", "author_num": 5, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "Carnegie Mellon University;Boston Dynamics AI Institute;University at Buffalo;Near Earth Autonomy", "aff_unique_dep": "School of Computer Science;AI Institute;;", "aff_unique_url": "https://www.cmu.edu;https://www.bostondynamics.com/;https://www.buffalo.edu;https://www.nearearthautonomy.com", "aff_unique_abbr": "CMU;BD AI;UB;NEA", "aff_campus_unique_index": "0", "aff_campus_unique": "Pittsburgh;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Online Label Shift: Optimal Dynamic Regret meets Practical Algorithms", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71994", "id": "Ki6DqBXss4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cf42f133f355e0e07a8957b508b26a1b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ki6DqBXss4", "openreview": "https://openreview.net/forum?id=Ki6DqBXss4", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71994", "video": "https://nips.cc/virtual/2023/poster/71994", "author_site": "Dheeraj Baby, Saurabh Garg, Tzu-Ching Yen, Sivaraman Balakrishnan, Zachary Lipton, Yu-Xiang Wang", "tldr": "", "abstract": "This paper focuses on supervised and unsupervised online label shift,\nwhere the class marginals $Q(y)$ varies\nbut the class-conditionals $Q(x|y)$ remain invariant. In the unsupervised setting, our goal is to adapt a learner, trained on some offline labeled data, to changing label distributions given unlabeled online data. In the supervised setting, we must both learn a classifier and adapt to the dynamically evolving class marginals given only labeled online data. We develop novel algorithms that reduce the adaptation problem to online regression and guarantee optimal dynamic regret without any prior knowledge of the extent of drift in the label distribution. Our solution is based on bootstrapping the estimates of *online regression oracles* that track the drifting proportions. Experiments across numerous simulated and real-world online label shift scenarios demonstrate the superior performance of our proposed approaches, often achieving 1-3% improvement in accuracy while being sample and computationally efficient. Code is publicly available at https://github.com/Anon-djiwh/OnlineLabelShift", "keywords": "online learning;label shift;distribution shift;unsupervised domain adaptation", "primary_area": "", "supplementary_material": "", "author": "Dheeraj Baby;Saurabh Garg;Tzu-Ching Yen;Sivaraman Balakrishnan;Zachary Chase Lipton;Yu-Xiang Wang", "authorids": "~Dheeraj_Baby1;~Saurabh_Garg3;~Tzu-Ching_Yen1;~Sivaraman_Balakrishnan1;~Zachary_Chase_Lipton1;~Yu-Xiang_Wang1", "gender": ";M;;M;Unspecified;", "homepage": "https://dheeraj-b.github.io/home/;http://saurabhgarg1996.github.io/;;http://www.stat.cmu.edu/~siva/;http://zacklipton.com;http://www.cs.ucsb.edu/~yuxiangw/publications.html", "dblp": ";80/208;348/6007;52/10671;;62/1637-3.html", "google_scholar": "L3YF8nIAAAAJ;SAnJ1hIAAAAJ;OK6RSVkAAAAJ;o7yFQXUAAAAJ;MN9Kfg8AAAAJ;HGNZ1fkAAAAJ", "orcid": ";;;;;", "linkedin": ";saurabh-garg-b680b5b8/;;;;", "or_profile": "~Dheeraj_Baby1;~Saurabh_Garg3;~Tzu-Ching_Yen1;~Sivaraman_Balakrishnan1;~Zachary_Chase_Lipton1;~Yu-Xiang_Wang1", "aff": "University of California, Santa Barbara;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;UC Santa Barbara", "aff_domain": "cs.ucsb.edu;cmu.edu;cmu.edu;cmu.edu;cmu.edu;ucsb.edu", "position": "PhD student;PhD student;MS student;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nbaby2023online,\ntitle={Online Label Shift: Optimal Dynamic Regret meets Practical Algorithms},\nauthor={Dheeraj Baby and Saurabh Garg and Tzu-Ching Yen and Sivaraman Balakrishnan and Zachary Chase Lipton and Yu-Xiang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ki6DqBXss4}\n}", "github": "", "project": "", "reviewers": "hAtb;Cpae;vA3S;Gcxc", "pdf_size": 806586, "rating": "7;7;8;8", "confidence": "3;4;3;3", "soundness": "4;3;4;3", "novelty": "4;3;4;3", "presentation": "4;2;4;3", "wc_summary": "128;69;234;33", "wc_strengths": "140;78;170;71", "wc_weaknesses": "88;665;277;90", "wc_questions": "74;19;25;37", "wc_limitations": "9;43;12;1", "wc_review": "439;874;718;232", "wc_reply_reviewers": "34;139;10;0", "wc_reply_authors": "0;197;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 116.0, 76.10190536379493 ], "wc_strengths_avg": [ 114.75, 41.69757187175291 ], "wc_weaknesses_avg": [ 280.0, 235.158457215555 ], "wc_questions_avg": [ 38.75, 21.358546298847212 ], "wc_limitations_avg": [ 16.25, 15.958931668504631 ], "wc_review_avg": [ 565.75, 247.8168426479524 ], "wc_reply_reviewers_avg": [ 45.75, 55.23755515951082 ], "wc_reply_authors_avg": [ 49.25, 85.3035022727672 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4055933680026109574&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "cs.ucsb.edu;cmu.edu;cmu.edu;cmu.edu;cmu.edu;ucsb.edu", "author_num": 6, "aff_unique_index": "0;1;1;1;1;0", "aff_unique_norm": "University of California, Santa Barbara;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsb.edu;https://www.cmu.edu", "aff_unique_abbr": "UCSB;CMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Transferable Adversarial Robustness for Categorical Data via Universal Robust Embeddings", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71993", "id": "Kig2YJVYfq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/22a25fc3da528794d52664dacc7bd470-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Kig2YJVYfq", "openreview": "https://openreview.net/forum?id=Kig2YJVYfq", "poster": "/media/PosterPDFs/NeurIPS%202023/71993.png?t=1701723372.2395353", "slides": "https://nips.cc/virtual/2023/poster/71993", "video": "https://nips.cc/virtual/2023/poster/71993", "author_site": "Klim Kireev, Maksym Andriushchenko, Carmela Troncoso, Nicolas Flammarion", "tldr": "", "abstract": "Research on adversarial robustness is primarily focused on image and text data. Yet, many scenarios in which lack of robustness can result in serious risks, such as fraud detection, medical diagnosis, or recommender systems often do not rely on images or text but instead on tabular data. Adversarial robustness in tabular data poses two serious challenges. First, tabular datasets often contain categorical features, and therefore cannot be tackled directly with existing optimization procedures. Second, in the tabular domain, algorithms that are not based on deep networks are widely used and offer great performance, but algorithms to enhance robustness are tailored to neural networks (e.g. adversarial training).\n\nIn this paper, we tackle both challenges. We present a method that allows us to train adversarially robust deep networks for tabular data and to transfer this robustness to other classifiers via universal robust embeddings tailored to categorical data. These embeddings, created using a bilevel alternating minimization framework, can be transferred to boosted trees or random forests making them robust without the need for adversarial training while preserving their high accuracy on tabular data. We show that our methods outperform existing techniques within a practical threat model suitable for tabular data.", "keywords": "Tabular data;Categorical data;Robust ML;Adversarial Robustness", "primary_area": "", "supplementary_material": "/attachment/73dc4b3ada037147435c2908e6d058008206c8a3.zip", "author": "Klim Kireev;Maksym Andriushchenko;Carmela Troncoso;Nicolas Flammarion", "authorids": "~Klim_Kireev1;~Maksym_Andriushchenko1;~Carmela_Troncoso1;~Nicolas_Flammarion1", "gender": ";M;F;M", "homepage": ";https://www.andriushchenko.me/;http://carmelatroncoso.com/;", "dblp": ";200/8865;01/4825;164/7417", "google_scholar": ";ZNtuJYoAAAAJ;sMkt3SgAAAAJ;", "orcid": ";;0000-0002-2374-2248;", "linkedin": ";;carmela-troncoso-b497975/?originalSubdomain=ch;", "or_profile": "~Klim_Kireev1;~Maksym_Andriushchenko1;~Carmela_Troncoso1;~Nicolas_Flammarion1", "aff": ";Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne", "aff_domain": ";epfl.ch;epfl.ch;epfl.ch", "position": ";PhD Student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nkireev2023transferable,\ntitle={Transferable Adversarial Robustness for Categorical Data via Universal Robust Embeddings},\nauthor={Klim Kireev and Maksym Andriushchenko and Carmela Troncoso and Nicolas Flammarion},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Kig2YJVYfq}\n}", "github": "", "project": "", "reviewers": "EHdN;9uXH;9Bah;a4Uk;saBk", "pdf_size": 290935, "rating": "5;5;6;6;6", "confidence": "3;3;4;3;4", "soundness": "3;3;3;2;3", "novelty": "3;3;3;2;3", "presentation": "2;3;3;3;4", "wc_summary": "29;84;29;60;42", "wc_strengths": "8;112;48;46;79", "wc_weaknesses": "63;160;124;33;279", "wc_questions": "39;64;32;52;1", "wc_limitations": "1;37;4;9;16", "wc_review": "140;457;237;200;417", "wc_reply_reviewers": "0;56;43;0;142", "wc_reply_authors": "0;0;20;0;66", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;1;2;1;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 48.8, 20.951372270092477 ], "wc_strengths_avg": [ 58.6, 34.92620792470892 ], "wc_weaknesses_avg": [ 131.8, 86.04510445109588 ], "wc_questions_avg": [ 37.6, 21.341040274550814 ], "wc_limitations_avg": [ 13.4, 12.84678948220138 ], "wc_review_avg": [ 290.2, 124.44018643509017 ], "wc_reply_reviewers_avg": [ 48.2, 52.02460956124515 ], "wc_reply_authors_avg": [ 17.2, 25.599999999999998 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6666666666666665, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12641420857458671199&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": ";epfl.ch;epfl.ch;epfl.ch", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "An Information Theory Perspective on Variance-Invariance-Covariance Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71992", "id": "KipjqOPaZ0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6b1d4c03391b0aa6ddde0b807a78c950-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KipjqOPaZ0", "openreview": "https://openreview.net/forum?id=KipjqOPaZ0", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71992", "video": "https://nips.cc/virtual/2023/poster/71992", "author_site": "Ravid Shwartz-Ziv, Randall Balestriero, Kenji Kawaguchi, Tim G. J. Rudner, Yann LeCun", "tldr": "", "abstract": "Variance-Invariance-Covariance Regularization (VICReg) is a self-supervised learning (SSL) method that has shown promising results on a variety of tasks. However, the fundamental mechanisms underlying VICReg remain unexplored. In this paper, we present an information-theoretic perspective on the VICReg objective. We begin by deriving information-theoretic quantities for deterministic networks as an alternative to unrealistic stochastic network assumptions. We then relate the optimization of the VICReg objective to mutual information optimization, highlighting underlying assumptions and facilitating a constructive comparison with other SSL algorithms and derive a generalization bound for VICReg, revealing its inherent advantages for downstream tasks. Building on these results, we introduce a family of SSL methods derived from information-theoretic principles that outperform existing SSL techniques.", "keywords": "Self-Supervised Learning;Generalization Bounds;Information-Theory;Deep Neural Networks", "primary_area": "", "supplementary_material": "", "author": "Ravid Shwartz-Ziv;Randall Balestriero;Kenji Kawaguchi;Tim G. J. Rudner;Yann LeCun", "authorids": "~Ravid_Shwartz-Ziv2;~Randall_Balestriero1;~Kenji_Kawaguchi1;~Tim_G._J._Rudner2;~Yann_LeCun1", "gender": "M;;M;M;Not Specified", "homepage": "https://randallbalestriero.github.io/;https://ml.comp.nus.edu.sg/#members;http://yann.lecun.com;https://www.ravid-shwartz-ziv.com/;https://timrudner.com", "dblp": "175/5364;;l/YannLeCun;;230/3480", "google_scholar": "S1x_xqcAAAAJ;aLl3rYoAAAAJ;WLN3QrAAAAAJ;https://scholar.google.co.il/citations?user=SqsLFwMAAAAJ;https://scholar.google.de/citations?user=MbBntPgAAAAJ", "orcid": ";;;;", "linkedin": "randallbalestriero/;;;;trudner", "or_profile": "~Randall_Balestriero1;~Kenji_Kawaguchi1;~Yann_LeCun1;~ravid_ziv1;~Tim_Georg_Johann_Rudner1", "aff": "Meta Facebook;National University of Singapore;New York University;New York University;Yale University", "aff_domain": "facebook.com;nus.edu;nyu.edu;nyu.edu;yale.edu", "position": "Postdoc;Presidential Young Professor;Full Professor;Postdoc;Visiting Fellow", "bibtex": "@inproceedings{\nshwartz-ziv2023an,\ntitle={An Information Theory Perspective on Variance-Invariance-Covariance Regularization},\nauthor={Ravid Shwartz-Ziv and Randall Balestriero and Kenji Kawaguchi and Tim G. J. Rudner and Yann LeCun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KipjqOPaZ0}\n}", "github": "", "project": "", "reviewers": "2fL7;tvXs;sVT5;VQBf", "pdf_size": 718607, "rating": "5;5;7;7", "confidence": "3;2;3;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "2;2;3;2", "wc_summary": "149;64;112;311", "wc_strengths": "15;43;57;131", "wc_weaknesses": "235;58;112;406", "wc_questions": "679;52;8;115", "wc_limitations": "75;46;28;7", "wc_review": "1153;263;317;970", "wc_reply_reviewers": "541;0;66;0", "wc_reply_authors": "1193;166;831;65", "reply_reviewers": "3;0;2;0", "reply_authors": "4;2;4;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 159.0, 92.78739138482125 ], "wc_strengths_avg": [ 61.5, 42.88064831599448 ], "wc_weaknesses_avg": [ 202.75, 133.7336438597259 ], "wc_questions_avg": [ 213.5, 271.43369356069263 ], "wc_limitations_avg": [ 39.0, 24.9499498997493 ], "wc_review_avg": [ 675.75, 391.60399321253095 ], "wc_reply_reviewers_avg": [ 151.75, 226.3430747780899 ], "wc_reply_authors_avg": [ 563.75, 467.5293439988553 ], "reply_reviewers_avg": [ 1.25, 1.299038105676658 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3168582331525749817&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "facebook.com;nus.edu;nyu.edu;nyu.edu;yale.edu", "author_num": 5, "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "Meta;National University of Singapore;New York University;Yale University", "aff_unique_dep": "Meta Platforms, Inc.;;;", "aff_unique_url": "https://meta.com;https://www.nus.edu.sg;https://www.nyu.edu;https://www.yale.edu", "aff_unique_abbr": "Meta;NUS;NYU;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Singapore" }, { "title": "Generalizing Importance Weighting to A Universal Solver for Distribution Shift Problems", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71991", "id": "KmdlUP23qh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4c2092ec0b1370cce3fb5965ab255fae-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KmdlUP23qh", "openreview": "https://openreview.net/forum?id=KmdlUP23qh", "poster": "/media/PosterPDFs/NeurIPS%202023/71991.png?t=1701914003.6103637", "slides": "https://nips.cc/virtual/2023/poster/71991", "video": "https://nips.cc/virtual/2023/poster/71991", "author_site": "Tongtong Fang, Nan Lu, Gang Niu, Masashi Sugiyama", "tldr": "", "abstract": "Distribution shift (DS) may have two levels: the distribution itself changes, and the support (i.e., the set where the probability density is non-zero) also changes. When considering the support change between the training and test distributions, there can be four cases: (i) they exactly match; (ii) the training support is wider (and thus covers the test support); (iii) the test support is wider; (iv) they partially overlap. Existing methods are good at cases (i) and (ii), while cases (iii) and (iv) are more common nowadays but still under-explored. In this paper, we generalize importance weighting (IW), a golden solver for cases (i) and (ii), to a universal solver for all cases. Specifically, we first investigate why IW might fail in cases (iii) and (iv); based on the findings, we propose generalized IW (GIW) that could handle cases (iii) and (iv) and would reduce to IW in cases (i) and (ii). In GIW, the test support is split into an in-training (IT) part and an out-of-training (OOT) part, and the expected risk is decomposed into a weighted classification term over the IT part and a standard classification term over the OOT part, which guarantees the risk consistency of GIW. Then, the implementation of GIW consists of three components: (a) the split of validation data is carried out by the one-class support vector machine, (b) the first term of the empirical risk can be handled by any IW algorithm given training data and IT validation data, and (c) the second term just involves OOT validation data. Experiments demonstrate that GIW is a universal solver for DS problems, outperforming IW methods in cases (iii) and (iv).", "keywords": "importance weighting;distribution shift;deep learning", "primary_area": "", "supplementary_material": "", "author": "Tongtong Fang;Nan Lu;Gang Niu;Masashi Sugiyama", "authorids": "~Tongtong_Fang1;~Nan_Lu1;~Gang_Niu1;~Masashi_Sugiyama1", "gender": "F;F;M;M", "homepage": "https://tongtongfang.github.io;;https://niug1984.github.io;http://www.ms.k.u-tokyo.ac.jp/sugi/", "dblp": "232/4810;;26/3367-1;35/1228", "google_scholar": "XZIPnxIAAAAJ;https://scholar.google.co.jp/citations?user=KQUQlG4AAAAJ;https://scholar.google.co.jp/citations?user=HOkcy00AAAAJ;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ", "orcid": ";;;0000-0001-6658-6743", "linkedin": ";;;", "or_profile": "~Tongtong_Fang1;~Nan_Lu1;~Gang_Niu1;~Masashi_Sugiyama1", "aff": "The University of Tokyo;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;RIKEN;The University of Tokyo", "aff_domain": "ms.k.u-tokyo.ac.jp;uni-tuebingen.de;riken.jp;u-tokyo.ac.jp", "position": "PhD student;Postdoc;Research Scientist (tenured);Full Professor", "bibtex": "@inproceedings{\nfang2023generalizing,\ntitle={Generalizing Importance Weighting to A Universal Solver for Distribution Shift Problems},\nauthor={Tongtong Fang and Nan Lu and Gang Niu and Masashi Sugiyama},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KmdlUP23qh}\n}", "github": "", "project": "", "reviewers": "uQtu;UAYX;jhs4;G7Ea", "pdf_size": 3230619, "rating": "6;6;7;8", "confidence": "2;3;4;5", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "52;89;77;73", "wc_strengths": "53;49;75;288", "wc_weaknesses": "91;78;228;41", "wc_questions": "62;57;127;392", "wc_limitations": "6;37;22;27", "wc_review": "264;310;529;821", "wc_reply_reviewers": "19;112;7;27", "wc_reply_authors": "80;937;78;78", "reply_reviewers": "1;1;1;1", "reply_authors": "3;5;3;3", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.75, 13.348689074212494 ], "wc_strengths_avg": [ 116.25, 99.65283488190389 ], "wc_weaknesses_avg": [ 109.5, 70.83254901526557 ], "wc_questions_avg": [ 159.5, 137.04470073665746 ], "wc_limitations_avg": [ 23.0, 11.20267825120404 ], "wc_review_avg": [ 481.0, 220.3599328371653 ], "wc_reply_reviewers_avg": [ 41.25, 41.46308599224134 ], "wc_reply_authors_avg": [ 293.25, 371.67013264452663 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.5, 0.8660254037844386 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9438798074485388, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4958787314032857713&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 9, "email": "ms.k.u-tokyo.ac.jp;uni-tuebingen.de;riken.jp;u-tokyo.ac.jp", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Tokyo;Eberhard Karls University of T\u00fcbingen;RIKEN", "aff_unique_dep": ";;", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.uni-tuebingen.de/;https://www.riken.jp", "aff_unique_abbr": "UTokyo;Uni T\u00fcbingen;RIKEN", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Japan;Germany" }, { "title": "The Drunkard\u2019s Odometry: Estimating Camera Motion in Deforming Scenes", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73625", "id": "Kn6VRkYqYk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/98c9b79e9c686aadd4d81e34a7773dd1-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Kn6VRkYqYk", "openreview": "https://openreview.net/forum?id=Kn6VRkYqYk", "poster": "/media/PosterPDFs/NeurIPS%202023/73625.png?t=1701450500.4852993", "slides": "https://nips.cc/virtual/2023/poster/73625", "video": "https://nips.cc/virtual/2023/poster/73625", "author_site": "David Recasens Lafuente, Martin R. Oswald, Marc Pollefeys, Javier Civera", "tldr": "", "abstract": "Estimating camera motion in deformable scenes poses a complex and open research challenge. Most existing non-rigid structure from motion techniques assume to observe also static scene parts besides deforming scene parts in order to establish an anchoring reference. However, this assumption does not hold true in certain relevant application cases such as endoscopies. Deformable odometry and SLAM pipelines, which tackle the most challenging scenario of exploratory trajectories, suffer from a lack of robustness and proper quantitative evaluation methodologies. To tackle this issue with a common benchmark, we introduce the Drunkard's Dataset, a challenging collection of synthetic data targeting visual navigation and reconstruction in deformable environments. This dataset is the first large set of exploratory camera trajectories with ground truth inside 3D scenes where every surface exhibits non-rigid deformations over time. Simulations in realistic 3D buildings lets us obtain a vast amount of data and ground truth labels, including camera poses, RGB images and depth, optical flow and normal maps at high resolution and quality. We further present a novel deformable odometry method, dubbed the Drunkard\u2019s Odometry, which decomposes optical flow estimates into rigid-body camera motion and non-rigid scene deformations. In order to validate our data, our work contains an evaluation of several baselines as well as a novel tracking error metric which does not require ground truth data. Dataset and code: https://davidrecasens.github.io/TheDrunkard'sOdometry/", "keywords": "odometry;non-rigid dataset;RGB-D tracking;endoscopy", "primary_area": "", "supplementary_material": "/attachment/b27e9dacfed7042606fae9e151c65bb97ebba6da.zip", "author": "David Recasens;Martin R. Oswald;Marc Pollefeys;Javier Civera", "authorids": "~David_Recasens1;~Martin_R._Oswald1;~Marc_Pollefeys2;~Javier_Civera1", "gender": "M;;M;M", "homepage": "https://davidrecasens.github.io/;;;http://webdiis.unizar.es/~jcivera/", "dblp": ";37/7272;p/MarcPollefeys;53/826", "google_scholar": "https://scholar.google.es/citations?user=Q1ocp7wAAAAJ;https://scholar.google.ch/citations?user=biytQP8AAAAJ;YYH0BjEAAAAJ;https://scholar.google.es/citations?user=j_sMzokAAAAJ", "orcid": "0000-0002-5637-2845;0000-0002-1183-9958;;0000-0003-1368-1151", "linkedin": "david-recasens-lafuente/;martin-r-oswald-167461122/;marc-pollefeys-30a7075/;jcivera/", "or_profile": "~David_Recasens1;~Martin_R._Oswald1;~Marc_Pollefeys2;~Javier_Civera1", "aff": "Universidad de Zaragoza;University of Amsterdam;Swiss Federal Institute of Technology;Universidad de Zaragoza", "aff_domain": "unizar.es;uva.nl;ethz.ch;unizar.es", "position": "PhD student;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nrecasens2023the,\ntitle={The Drunkard{\\textquoteright}s Odometry: Estimating Camera Motion in Deforming Scenes},\nauthor={David Recasens and Martin R. Oswald and Marc Pollefeys and Javier Civera},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Kn6VRkYqYk}\n}", "github": "", "project": "", "reviewers": "s3zk;yRQV;etHK;8ABf", "pdf_size": 10475270, "rating": "6;6;6;7", "confidence": "3;3;3;5", "wc_summary_and_contributions": "37;57;78;231", "wc_strengths": "39;54;81;261", "wc_improvement": "368;81;212;341", "wc_limitations": "28;1;8;24", "wc_correctness": "6;1;8;17", "wc_clarity": "17;1;10;14", "wc_relation_to_prior_work": "13;12;8;1", "wc_documentation": "28;1;5;78", "wc_additional_feedback": "1;1;1;1", "wc_review": "537;209;411;968", "wc_reply_reviewers": "70;0;206;0", "wc_reply_authors": "1692;520;1782;1264", "reply_reviewers": "1;0;1;0", "reply_authors": "4;1;4;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_and_contributions_avg": [ 100.75, 76.58451214181625 ], "wc_strengths_avg": [ 108.75, 89.18064532172886 ], "wc_improvement_avg": [ 250.5, 114.24644414597769 ], "wc_limitations_avg": [ 15.25, 11.121488209767612 ], "wc_correctness_avg": [ 8.0, 5.787918451395113 ], "wc_clarity_avg": [ 10.5, 6.020797289396148 ], "wc_relation_to_prior_work_avg": [ 8.5, 4.716990566028302 ], "wc_documentation_avg": [ 28.0, 30.651264247988205 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 531.25, 277.97875368452173 ], "wc_reply_reviewers_avg": [ 69.0, 84.10112960002381 ], "wc_reply_authors_avg": [ 1314.5, 498.70908353467956 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17748307358652349078&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "unizar.es;uva.nl;ethz.ch;unizar.es", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Universidad de Zaragoza;University of Amsterdam;Swiss Federal Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unizar.es;https://www.uva.nl;https://www.ethz.ch", "aff_unique_abbr": "UNIZAR;UvA;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Spain;Netherlands;Switzerland" }, { "title": "Disentangling Voice and Content with Self-Supervision for Speaker Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71990", "id": "KoFYzuwjCA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9d276b0a087efdd2404f3295b26c24c1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KoFYzuwjCA", "openreview": "https://openreview.net/forum?id=KoFYzuwjCA", "poster": "/media/PosterPDFs/NeurIPS%202023/71990.png?t=1698740366.3137062", "slides": "https://nips.cc/virtual/2023/poster/71990", "video": "https://nips.cc/virtual/2023/poster/71990", "author_site": "TIANCHI LIU, Kong Aik Lee, Qiongqiong Wang, Haizhou Li", "tldr": "", "abstract": "For speaker recognition, it is difficult to extract an accurate speaker representation from speech because of its mixture of speaker traits and content. This paper proposes a disentanglement framework that simultaneously models speaker traits and content variability in speech. It is realized with the use of three Gaussian inference layers, each consisting of a learnable transition model that extracts distinct speech components. Notably, a strengthened transition model is specifically designed to model complex speech dynamics. We also propose a self-supervision method to dynamically disentangle content without the use of labels other than speaker identities. The efficacy of the proposed framework is validated via experiments conducted on the VoxCeleb and SITW datasets with 9.56\\% and 8.24\\% average reductions in EER and minDCF, respectively. Since neither additional model training nor data is specifically needed, it is easily applicable in practical use.", "keywords": "speaker recognition;disentanglement learning;self-supervision", "primary_area": "", "supplementary_material": "/attachment/d72eacdc32e0237cffdf9ede64ed7c371a809326.pdf", "author": "Tianchi Liu;Kong Aik Lee;Qiongqiong Wang;Haizhou Li", "authorids": "~Tianchi_Liu3;~Kong_Aik_Lee1;~Qiongqiong_Wang1;~Haizhou_Li3", "gender": "M;M;F;M", "homepage": "https://liu-tianchi.github.io;https://sites.google.com/view/kongaiklee;;https://colips.org/~eleliha/", "dblp": "263/4983;35/4621;81/10013;36/4118", "google_scholar": "1W24GsQAAAAJ;SZegiA4AAAAJ;https://scholar.google.com.sg/citations?user=Ff5izecAAAAJ;https://scholar.google.com.sg/citations?user=z8_x7C8AAAAJ", "orcid": "0000-0003-3472-0703;0000-0001-9133-3000;0000-0002-9903-0618;0000-0001-9158-9401", "linkedin": ";;qiongqiong-wang-69959110b/?originalSubdomain=jp;haizhou-li-4ba74b6/", "or_profile": "~Tianchi_Liu3;~Kong_Aik_Lee1;~Qiongqiong_Wang1;~Haizhou_Li3", "aff": "Institute for Infocomm Research (I2R), A*STAR;A*STAR;Institute for Infocomm Research, A*STAR;National University of Singapore", "aff_domain": "i2r.a-star.edu.sg;a-star.edu.sg;i2r.a-star.edu.sg;nus.edu.sg", "position": "Researcher;Principal Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nliu2023disentangling,\ntitle={Disentangling Voice and Content with Self-Supervision for Speaker Recognition},\nauthor={Tianchi Liu and Kong Aik Lee and Qiongqiong Wang and Haizhou Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KoFYzuwjCA}\n}", "github": "", "project": "", "reviewers": "krxU;48tf;6npJ;P6Zh;ffvX", "pdf_size": 3421572, "rating": "6;6;6;7;7", "confidence": "4;4;3;1;3", "soundness": "3;3;3;3;3", "novelty": "3;2;2;3;3", "presentation": "3;3;2;2;2", "wc_summary": "63;82;142;116;199", "wc_strengths": "57;41;98;41;45", "wc_weaknesses": "177;20;237;26;46", "wc_questions": "147;17;207;27;27", "wc_limitations": "45;8;9;16;56", "wc_review": "489;168;693;226;373", "wc_reply_reviewers": "20;25;77;28;95", "wc_reply_authors": "57;46;42;68;524", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 120.4, 47.818824745072945 ], "wc_strengths_avg": [ 56.4, 21.61110825478416 ], "wc_weaknesses_avg": [ 101.2, 88.8625905541809 ], "wc_questions_avg": [ 85.0, 77.56287771866127 ], "wc_limitations_avg": [ 26.8, 19.853463173965395 ], "wc_review_avg": [ 389.8, 188.72138193644088 ], "wc_reply_reviewers_avg": [ 49.0, 30.848014522818158 ], "wc_reply_authors_avg": [ 147.4, 188.5180097497319 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7453559924999299, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13096567116135994346&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "i2r.a-star.edu.sg;a-star.edu.sg;i2r.a-star.edu.sg;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Institute for Infocomm Research;Agency for Science, Technology and Research;National University of Singapore", "aff_unique_dep": ";;", "aff_unique_url": "https://www.i2r.a-star.edu.sg;https://www.a-star.edu.sg;https://www.nus.edu.sg", "aff_unique_abbr": "I2R;A*STAR;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Distributed Personalized Empirical Risk Minimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71989", "id": "KoQgA0coZ9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dfee09496a5a8b0b01d9d4c589758832-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KoQgA0coZ9", "openreview": "https://openreview.net/forum?id=KoQgA0coZ9", "poster": "/media/PosterPDFs/NeurIPS%202023/71989.png?t=1702062979.3883872", "slides": "https://nips.cc/virtual/2023/poster/71989", "video": "https://nips.cc/virtual/2023/poster/71989", "author_site": "Yuyang Deng, Mohammad Mahdi Kamani, Pouria Mahdavinia, Mehrdad Mahdavi", "tldr": "", "abstract": "This paper advocates a new paradigm Personalized Empirical Risk Minimization (PERM) to facilitate learning from heterogeneous data sources without imposing stringent constraints on computational resources shared by participating devices. In PERM, we aim at learning a distinct model for each client by personalizing the aggregation of local empirical losses by effectively estimating the statistical discrepancy among data distributions, which entails optimal statistical accuracy for all local distributions and overcomes the data heterogeneity issue. To learn personalized models at scale, we propose a distributed algorithm that replaces the standard model averaging with model shuffling to simultaneously optimize \nPERM objectives for all devices. This also allows to learn distinct model architectures (e.g., neural networks with different number of parameters) for different clients, thus confining to underlying memory and compute resources of individual clients. We rigorously analyze the convergence of proposed algorithm and conduct experiments that corroborates the effectiveness of proposed paradigm.", "keywords": "distributed learning;heterogeneous data;heterogeneous system;convergence analysis", "primary_area": "", "supplementary_material": "/attachment/1f5e9a0893240c041b8dd7d9c8901f702222753c.pdf", "author": "Yuyang Deng;Mohammad Mahdi Kamani;Pouria Mahdavinia;Mehrdad Mahdavi", "authorids": "~Yuyang_Deng3;~Mohammad_Mahdi_Kamani2;~Pouria_Mahdavinia1;~Mehrdad_Mahdavi2", "gender": "M;;M;M", "homepage": "https://sites.psu.edu/yuyangdeng/;https://mmkamani.com;;http://www.cse.psu.edu/~mzm616/", "dblp": "261/9253;194/7523.html;331/5410;88/4321", "google_scholar": "bfV3XWUAAAAJ;jUXXvNIAAAAJ;https://scholar.google.com/citations?hl=en;HzxnwocAAAAJ", "orcid": ";0000-0003-3930-4151;;", "linkedin": ";mm-kamani7/;pouria-mahdavinia-486b59212/;", "or_profile": "~Yuyang_Deng3;~Mohammad_Mahdi_Kamani2;~Pouria_Mahdavinia1;~Mehrdad_Mahdavi2", "aff": "Pennsylvania State University;Wyze Labs;Pennsylvania State University;Toyota Technological Institute at Chicago", "aff_domain": "psu.edu;wyze.com;psu.edu;ttic.edu", "position": "PhD student;Researcher;PhD student;Researcher", "bibtex": "@inproceedings{\ndeng2023distributed,\ntitle={Distributed Personalized Empirical Risk Minimization},\nauthor={Yuyang Deng and Mohammad Mahdi Kamani and Pouria Mahdavinia and Mehrdad Mahdavi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KoQgA0coZ9}\n}", "github": "", "project": "", "reviewers": "XAoP;x8t7;DEoF;S5TK", "pdf_size": 919880, "rating": "5;5;6;7", "confidence": "3;1;4;3", "soundness": "2;2;4;4", "novelty": "2;2;3;4", "presentation": "3;3;4;3", "wc_summary": "96;25;83;81", "wc_strengths": "50;17;96;86", "wc_weaknesses": "61;58;170;185", "wc_questions": "83;2;48;43", "wc_limitations": "2;2;62;14", "wc_review": "292;104;459;409", "wc_reply_reviewers": "7;17;61;0", "wc_reply_authors": "0;33;25;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.25, 27.316432783216772 ], "wc_strengths_avg": [ 62.25, 31.227992250543423 ], "wc_weaknesses_avg": [ 118.5, 59.247362810508285 ], "wc_questions_avg": [ 44.0, 28.731515797117282 ], "wc_limitations_avg": [ 20.0, 24.73863375370596 ], "wc_review_avg": [ 316.0, 136.5814775143394 ], "wc_reply_reviewers_avg": [ 21.25, 23.731571797923543 ], "wc_reply_authors_avg": [ 14.5, 14.773286702694158 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.48420012470625223, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11841329475788447157&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "psu.edu;wyze.com;psu.edu;ttic.edu", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Pennsylvania State University;Wyze Labs;Toyota Technological Institute at Chicago", "aff_unique_dep": ";;", "aff_unique_url": "https://www.psu.edu;https://wyze.com;https://www.tti-chicago.org", "aff_unique_abbr": "PSU;Wyze;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "StyleDrop: Text-to-Image Synthesis of Any Style", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71988", "id": "KoaFh16uOc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d33b177b69425e7685b0b1c05bd2a5e4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KoaFh16uOc", "openreview": "https://openreview.net/forum?id=KoaFh16uOc", "poster": "/media/PosterPDFs/NeurIPS%202023/71988.png?t=1701966033.0632505", "slides": "https://nips.cc/virtual/2023/poster/71988", "video": "https://nips.cc/virtual/2023/poster/71988", "author_site": "Kihyuk Sohn, Lu Jiang, Lu Jiang, Jarred Barber, Kimin Lee, Nataniel Ruiz, Dilip Krishnan, Huiwen Chang, Yuanzhen Li, Irfan Essa, Michael Rubinstein, Yuan Hao, Glenn Entis, Irina Blok, Daniel Castro Chin", "tldr": "", "abstract": "Pre-trained large text-to-image models synthesize impressive images with an appropriate use of text prompts. However, ambiguities inherent in natural language, and out-of-distribution effects make it hard to synthesize arbitrary image styles, leveraging a specific design pattern, texture or material. In this paper, we introduce *StyleDrop*, a method that enables the synthesis of images that faithfully follow a specific style using a text-to-image model. StyleDrop is extremely versatile and captures nuances and details of a user-provided style, such as color schemes, shading, design patterns, and local and global effects. StyleDrop works by efficiently learning a new style by fine-tuning very few trainable parameters (less than 1\\% of total model parameters), and improving the quality via iterative training with either human or automated feedback. Better yet, StyleDrop is able to deliver impressive results even when the user supplies only a *single* image specifying the desired style. An extensive study shows that, for the task of style tuning text-to-image models, StyleDrop on Muse convincingly outperforms other methods, including DreamBooth and textual inversion on Imagen or Stable Diffusion. More results are available at our project website: [https://styledrop.github.io](https://styledrop.github.io).", "keywords": "text-to-image synthesis;fine-tuning;stylization", "primary_area": "", "supplementary_material": "", "author": "Kihyuk Sohn;Lu Jiang;Jarred Barber;Kimin Lee;Nataniel Ruiz;Dilip Krishnan;Huiwen Chang;Yuanzhen Li;Irfan Essa;Michael Rubinstein;Yuan Hao;Glenn Entis;Irina Blok;Daniel Castro Chin", "authorids": "~Kihyuk_Sohn1;~Lu_Jiang1;~Jarred_Barber1;~Kimin_Lee1;~Nataniel_Ruiz1;~Dilip_Krishnan1;~Huiwen_Chang2;~Yuanzhen_Li1;~Irfan_Essa1;~Michael_Rubinstein1;~Yuan_Hao1;glennentis@google.com;irinablok@google.com;dcastro@google.com", "gender": "M;M;M;M;M;M;F;F;M;M;;;;", "homepage": "https://sites.google.com/site/kihyuksml/;http://www.lujiang.info/;;https://sites.google.com/view/kiminlee;https://natanielruiz.github.io/;http://dilipkay.wordpress.com;;http://people.csail.mit.edu/yzli/;http://www.irfanessa.com/;http://people.csail.mit.edu/mrub/;;;;", "dblp": "53/10771;22/752-4;;183/6849;205/3222;08/2316;131/4389;97/371;e/IrfanAEssa;16/1356;42/10216;;;", "google_scholar": "VxpypngAAAAJ;jIKjjSYAAAAJ;UbjqML8AAAAJ;92M8xv4AAAAJ;https://scholar.google.fr/citations?user=CiOmcSIAAAAJ;_MEuWIMAAAAJ;eZQNcvcAAAAJ;k1eaag4AAAAJ;https://scholar.google.com.tw/citations?user=XM97iScAAAAJ;ttBdcmsAAAAJ;;;;", "orcid": ";0000-0003-0286-8439;;;;;;0000-0002-9831-8249;0000-0002-6236-2969;;;;;", "linkedin": ";roadjiang/;jarred-barber-77947458/;;nataniel-ruiz/;;;yuanzhen-yz-li-5561655/;irfanessa/;;;;;", "or_profile": "~Kihyuk_Sohn1;~Lu_Jiang1;~Jarred_Barber1;~Kimin_Lee1;~Nataniel_Ruiz1;~Dilip_Krishnan1;~Huiwen_Chang2;~Yuanzhen_Li1;~Irfan_Essa1;~Michael_Rubinstein1;~Yuan_Hao1;glennentis@google.com;irinablok@google.com;dcastro@google.com", "aff": "Google;Google Research;Google;Google;Boston University;Google;Research, Google;Google;Georgia Institute of Technology;Google;Google;;;", "aff_domain": "google.com;google.com;google.com;google.com;bu.edu;google.com;research.google.com;google.com;gatech.edu;google.com;google.com;;;", "position": "Research Scientist;Researcher;Researcher;Researcher;PhD student;Research Scientist;Researcher;Software Engineer;Full Professor;Research Scientist;Researcher;;;", "bibtex": "@inproceedings{\nsohn2023styledrop,\ntitle={StyleDrop: Text-to-Image Synthesis of Any Style},\nauthor={Kihyuk Sohn and Lu Jiang and Jarred Barber and Kimin Lee and Nataniel Ruiz and Dilip Krishnan and Huiwen Chang and Yuanzhen Li and Irfan Essa and Michael Rubinstein and Yuan Hao and Glenn Entis and Irina Blok and Daniel Castro Chin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KoaFh16uOc}\n}", "github": "", "project": "", "reviewers": "9biG;vFSY;xBBj;bhPK;oHZZ", "pdf_size": 0, "rating": "4;6;6;6;7", "confidence": "4;4;4;4;3", "soundness": "2;3;4;3;4", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "90;73;94;78;184", "wc_strengths": "59;20;65;72;83", "wc_weaknesses": "150;108;118;340;237", "wc_questions": "69;9;4;5;17", "wc_limitations": "62;14;1;23;12", "wc_review": "430;224;282;518;533", "wc_reply_reviewers": "81;13;12;0;136", "wc_reply_authors": "80;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 103.8, 40.82352263095384 ], "wc_strengths_avg": [ 59.8, 21.442014830700963 ], "wc_weaknesses_avg": [ 190.6, 87.4244816970624 ], "wc_questions_avg": [ 20.8, 24.530796970339143 ], "wc_limitations_avg": [ 22.4, 21.00095235935742 ], "wc_review_avg": [ 397.4, 124.40192924549041 ], "wc_reply_reviewers_avg": [ 48.4, 52.263180155822894 ], "wc_reply_authors_avg": [ 16.0, 32.0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": -0.6123724356957946, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7307067407906424436&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "google.com;google.com;google.com;google.com;bu.edu;google.com;research.google.com;google.com;gatech.edu;google.com;google.com;;;", "author_num": 14, "aff_unique_index": "0;0;0;0;1;0;0;0;2;0;0", "aff_unique_norm": "Google;Boston University;Georgia Institute of Technology", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.bu.edu;https://www.gatech.edu", "aff_unique_abbr": "Google;BU;Georgia Tech", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Time-Independent Information-Theoretic Generalization Bounds for SGLD", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71987", "id": "Ks0RSFNxPO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/19dbb86f771ddbf9986cf0c9b1c61c17-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ks0RSFNxPO", "openreview": "https://openreview.net/forum?id=Ks0RSFNxPO", "poster": "/media/PosterPDFs/NeurIPS%202023/71987.png?t=1699796328.9882026", "slides": "https://nips.cc/virtual/2023/poster/71987", "video": "https://nips.cc/virtual/2023/poster/71987", "author_site": "Futoshi Futami, Masahiro Fujisawa", "tldr": "", "abstract": "We provide novel information-theoretic generalization bounds for stochastic gradient Langevin dynamics (SGLD) under the assumptions of smoothness and dissipativity, which are widely used in sampling and non-convex optimization studies.\nOur bounds are time-independent and decay to zero as the sample size increases, regardless of the number of iterations and whether the step size is fixed.\nUnlike previous studies, we derive the generalization error bounds by focusing on the time evolution of the Kullback--Leibler divergence, which is related to the stability of datasets and is the upper bound of the mutual information between output parameters and an input dataset.\nAdditionally, we establish the first information-theoretic generalization bound when the training and test loss are the same by showing that a loss function of SGLD is sub-exponential.\nThis bound is also time-independent and removes the problematic step size dependence in existing work, leading to an improved excess risk bound by combining our analysis with the existing non-convex optimization error bounds.", "keywords": "SGLD;Langevin dynamics;Generalization;Information theoretic analysis", "primary_area": "", "supplementary_material": "/attachment/9a5374e1f935313b3ba12a4637f857a6e415dea3.pdf", "author": "Futoshi Futami;Masahiro Fujisawa", "authorids": "~Futoshi_Futami1;~Masahiro_Fujisawa1", "gender": "M;M", "homepage": ";https://msfuji0211.github.io/", "dblp": "209/4960;236/6307", "google_scholar": "https://scholar.google.co.jp/citations?user=WTOG0mMAAAAJ;gS24jX8AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Futoshi_Futami1;~Masahiro_Fujisawa1", "aff": "Osaka University;The University of Tokyo", "aff_domain": "osaka-u.ac.jp;ms.k.u-tokyo.ac.jp", "position": "Lecturer;PhD student", "bibtex": "@inproceedings{\nfutami2023timeindependent,\ntitle={Time-Independent Information-Theoretic Generalization Bounds for {SGLD}},\nauthor={Futoshi Futami and Masahiro Fujisawa},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ks0RSFNxPO}\n}", "github": "", "project": "", "reviewers": "sqKQ;oJ4H;7RWp;h7LH", "pdf_size": 357174, "rating": "5;6;6;7", "confidence": "2;2;3;4", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "3;1;3;3", "wc_summary": "39;51;128;69", "wc_strengths": "43;34;78;20", "wc_weaknesses": "69;211;110;10", "wc_questions": "3;32;233;244", "wc_limitations": "1;1;7;17", "wc_review": "155;329;556;360", "wc_reply_reviewers": "0;142;0;0", "wc_reply_authors": "0;219;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 71.75, 34.18607172519241 ], "wc_strengths_avg": [ 43.75, 21.405314760591587 ], "wc_weaknesses_avg": [ 100.0, 73.28369532167439 ], "wc_questions_avg": [ 128.0, 111.04278454721856 ], "wc_limitations_avg": [ 6.5, 6.5383484153110105 ], "wc_review_avg": [ 350.0, 142.30425151765493 ], "wc_reply_reviewers_avg": [ 35.5, 61.48780366869514 ], "wc_reply_authors_avg": [ 54.75, 94.82978171439603 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13681389791679650900&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "email": "osaka-u.ac.jp;ms.k.u-tokyo.ac.jp", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Osaka University;University of Tokyo", "aff_unique_dep": ";", "aff_unique_url": "https://www.osaka-u.ac.jp;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "Osaka U;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Circuit as Set of Points", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71986", "id": "KsICioDlYs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6697bb267dc517379bc8aa326e844f8d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KsICioDlYs", "openreview": "https://openreview.net/forum?id=KsICioDlYs", "poster": "/media/PosterPDFs/NeurIPS%202023/71986.png?t=1701958619.2970448", "slides": "https://nips.cc/virtual/2023/poster/71986", "video": "https://nips.cc/virtual/2023/poster/71986", "author_site": "Jialv Zou, Xinggang Wang, Jiahao Guo, Wenyu Liu, Qian Zhang, Chang Huang", "tldr": "", "abstract": "As the size of circuit designs continues to grow rapidly, artificial intelligence technologies are being extensively used in Electronic Design Automation (EDA) to assist with circuit design.\nPlacement and routing are the most time-consuming parts of the physical design process, and how to quickly evaluate the placement has become a hot research topic. \nPrior works either transformed circuit designs into images using hand-crafted methods and then used Convolutional Neural Networks (CNN) to extract features, which are limited by the quality of the hand-crafted methods and could not achieve end-to-end training, or treated the circuit design as a graph structure and used Graph Neural Networks (GNN) to extract features, which require time-consuming preprocessing.\nIn our work, we propose a novel perspective for circuit design by treating circuit components as point clouds and using Transformer-based point cloud perception methods to extract features from the circuit. This approach enables direct feature extraction from raw data without any preprocessing, allows for end-to-end training, and results in high performance.\nExperimental results show that our method achieves state-of-the-art performance in congestion prediction tasks on both the CircuitNet and ISPD2015 datasets, as well as in design rule check (DRC) violation prediction tasks on the CircuitNet dataset.\nOur method establishes a bridge between the relatively mature point cloud perception methods and the fast-developing EDA algorithms, enabling us to leverage more collective intelligence to solve this task. To facilitate the research of open EDA design, source codes and pre-trained models are released at https://github.com/hustvl/circuitformer.", "keywords": "EDA;Circuit Design;Congestion prediction;DRC violation prediction", "primary_area": "", "supplementary_material": "", "author": "Jialv Zou;Xinggang Wang;JiaHao Guo;Wenyu Liu;Qian Zhang;Chang Huang", "authorids": "~Jialv_Zou2;~Xinggang_Wang1;~JiaHao_Guo1;~Wenyu_Liu3;~Qian_Zhang7;~Chang_Huang4", "gender": "M;M;;M;M;M", "homepage": "https://jialv-zou.netlify.app/;https://xwcv.github.io/index.htm;https://github.com/gjhhust/;http://eic.hust.edu.cn/professor/liuwenyu/;;", "dblp": "359/5972;95/3056;;42/4110-1.html;04/2024-9;", "google_scholar": "do_ngRUAAAAJ;qNCTLV0AAAAJ;;D7jDk7gAAAAJ;pCY-bikAAAAJ;IyyEKyIAAAAJ", "orcid": ";0000-0001-6732-7823;;0000-0002-4582-7488;;", "linkedin": ";;;;;", "or_profile": "~Jialv_Zou2;~Xinggang_Wang1;~JiaHao_Guo1;~Wenyu_Liu3;~Qian_Zhang7;~Chang_Huang4", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Horizon Robotics;", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;horizon.cc;", "position": "Undergrad student;Full Professor;Undergrad student;Full Professor;Researcher;", "bibtex": "@inproceedings{\nzou2023circuit,\ntitle={Circuit as Set of Points},\nauthor={Jialv Zou and Xinggang Wang and JiaHao Guo and Wenyu Liu and Qian Zhang and Chang Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KsICioDlYs}\n}", "github": "", "project": "", "reviewers": "YScf;DbXo;rfmi;SgH5", "pdf_size": 1442852, "rating": "4;6;6;7", "confidence": "4;2;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;2", "wc_summary": "97;76;60;164", "wc_strengths": "37;43;154;124", "wc_weaknesses": "81;19;1;326", "wc_questions": "131;37;1;52", "wc_limitations": "69;4;32;6", "wc_review": "415;179;248;672", "wc_reply_reviewers": "617;10;0;8", "wc_reply_authors": "1455;0;0;12", "reply_reviewers": "2;1;0;1", "reply_authors": "4;1;1;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.25, 39.619281921811755 ], "wc_strengths_avg": [ 89.5, 50.66803726216361 ], "wc_weaknesses_avg": [ 106.75, 130.0161047716782 ], "wc_questions_avg": [ 55.25, 47.49934210070704 ], "wc_limitations_avg": [ 27.75, 26.252380844411046 ], "wc_review_avg": [ 378.5, 189.93748971701189 ], "wc_reply_reviewers_avg": [ 158.75, 264.59721748348 ], "wc_reply_authors_avg": [ 366.75, 628.3205292683027 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.48420012470625223, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14496081265995202871&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;horizon.cc;", "author_num": 6, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Huazhong University of Science and Technology;Horizon Robotics", "aff_unique_dep": ";", "aff_unique_url": "http://www.hust.edu.cn;https://www.horizon-robotics.com/", "aff_unique_abbr": "HUST;Horizon Robotics", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "IDRNet: Intervention-Driven Relation Network for Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71985", "id": "KtHquQuyA5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a216c27f2f3160b1785c057fa510fdf1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KtHquQuyA5", "openreview": "https://openreview.net/forum?id=KtHquQuyA5", "poster": "/media/PosterPDFs/NeurIPS%202023/71985.png?t=1702196908.0682015", "slides": "https://nips.cc/virtual/2023/poster/71985", "video": "https://nips.cc/virtual/2023/poster/71985", "author_site": "Zhenchao Jin, Xiaowei Hu, Lingting Zhu, Luchuan Song, Li Yuan, Lequan Yu", "tldr": "", "abstract": "Co-occurrent visual patterns suggest that pixel relation modeling facilitates dense prediction tasks, which inspires the development of numerous context modeling paradigms, \\emph{e.g.}, multi-scale-driven and similarity-driven context schemes. Despite the impressive results, these existing paradigms often suffer from inadequate or ineffective contextual information aggregation due to reliance on large amounts of predetermined priors. To alleviate the issues, we propose a novel \\textbf{I}ntervention-\\textbf{D}riven \\textbf{R}elation \\textbf{Net}work (\\textbf{IDRNet}), which leverages a deletion diagnostics procedure to guide the modeling of contextual relations among different pixels. Specifically, we first group pixel-level representations into semantic-level representations with the guidance of pseudo labels and further improve the distinguishability of the grouped representations with a feature enhancement module. Next, a deletion diagnostics procedure is conducted to model relations of these semantic-level representations via perceiving the network outputs and the extracted relations are utilized to guide the semantic-level representations to interact with each other. Finally, the interacted representations are utilized to augment original pixel-level representations for final predictions. Extensive experiments are conducted to validate the effectiveness of IDRNet quantitatively and qualitatively. Notably, our intervention-driven context scheme brings consistent performance improvements to state-of-the-art segmentation frameworks and achieves competitive results on popular benchmark datasets, including ADE20K, COCO-Stuff, PASCAL-Context, LIP, and Cityscapes.", "keywords": "semantic segmentation;relation modeling;object detection", "primary_area": "", "supplementary_material": "", "author": "Zhenchao Jin;Xiaowei Hu;Lingting Zhu;Luchuan Song;Li Yuan;Lequan Yu", "authorids": "~Zhenchao_Jin1;~Xiaowei_Hu3;~Lingting_Zhu1;~Luchuan_Song1;~Li_Yuan2;~Lequan_Yu1", "gender": ";M;M;;;M", "homepage": ";https://xw-hu.github.io/;;;;https://yulequan.github.io/", "dblp": ";151/5859-1;285/9359;;;165/8092", "google_scholar": ";tUb4J0kAAAAJ;TPD_P98AAAAJ;;;https://scholar.google.com.hk/citations?user=llXf3wUAAAAJ", "orcid": ";0000-0002-5708-7018;;;;0000-0002-9315-6527", "linkedin": ";;;;;", "or_profile": "~Zhenchao_Jin1;~Xiaowei_Hu3;~Lingting_Zhu1;~Luchuan_Song1;~Li_Yuan2;~Lequan_Yu1", "aff": ";Shanghai Artificial Intelligence Laboratory;The University of Hong Kong;;;The University of Hong Kong", "aff_domain": ";pjlab.org.cn;hku.hk;;;hku.hk", "position": ";Researcher;PhD student;;;Assistant Professor", "bibtex": "@inproceedings{\njin2023idrnet,\ntitle={{IDRN}et: Intervention-Driven Relation Network for Semantic Segmentation},\nauthor={Zhenchao Jin and Xiaowei Hu and Lingting Zhu and Luchuan Song and Li Yuan and Lequan Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KtHquQuyA5}\n}", "github": "", "project": "", "reviewers": "PhSv;dJpB;3dVM;q3X9", "pdf_size": 1099339, "rating": "4;5;6;6", "confidence": "3;4;3;5", "soundness": "3;3;3;4", "novelty": "3;1;3;4", "presentation": "2;3;3;2", "wc_summary": "58;58;70;83", "wc_strengths": "48;124;64;127", "wc_weaknesses": "54;151;64;116", "wc_questions": "110;43;2;31", "wc_limitations": "27;9;9;3", "wc_review": "297;385;209;360", "wc_reply_reviewers": "0;86;0;48", "wc_reply_authors": "0;222;0;0", "reply_reviewers": "0;2;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 67.25, 10.328964130056798 ], "wc_strengths_avg": [ 90.75, 35.223394214640926 ], "wc_weaknesses_avg": [ 96.25, 39.410499869958514 ], "wc_questions_avg": [ 46.5, 39.57587649060978 ], "wc_limitations_avg": [ 12.0, 9.0 ], "wc_review_avg": [ 312.75, 67.942530862487 ], "wc_reply_reviewers_avg": [ 33.5, 36.093628246547894 ], "wc_reply_authors_avg": [ 55.5, 96.12881982007269 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4545454545454545, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12372272389424731563&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";pjlab.org.cn;hku.hk;;;hku.hk", "author_num": 6, "aff_unique_index": "0;1;1", "aff_unique_norm": "Shanghai Artificial Intelligence Laboratory;University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.shailab.org/;https://www.hku.hk", "aff_unique_abbr": "Shanghai AI Lab;HKU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Describe, Explain, Plan and Select: Interactive Planning with LLMs Enables Open-World Multi-Task Agents", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71984", "id": "KtvPdGb31Z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6b8dfb8c0c12e6fafc6c256cb08a5ca7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KtvPdGb31Z", "openreview": "https://openreview.net/forum?id=KtvPdGb31Z", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71984", "video": "https://nips.cc/virtual/2023/poster/71984", "author_site": "Zihao Wang, Shaofei Cai, Guanzhou Chen, Anji Liu, Xiaojian (Shawn) Ma, Yitao Liang", "tldr": "", "abstract": "In this paper, we study the problem of planning in Minecraft, a popular, democratized yet challenging open-ended environment for developing multi-task embodied agents. We've found two primary challenges of empowering such agents with planning: 1) planning in an open-ended world like Minecraft requires precise and multi-step reasoning due to the long-term nature of the tasks, and 2) as vanilla planners do not consider the achievability of the current agent when ordering parallel sub-goals within a complicated plan, the resulting plan could be inefficient. To this end, we propose ``$\\underline{D}$escribe, $\\underline{E}$xplain, $\\underline{P}$lan and $\\underline{S}$elect'' ($\\textbf{DEPS}$), an interactive planning approach based on Large Language Models (LLMs). Our approach helps with better error correction from the feedback during the long-haul planning, while also bringing the sense of proximity via goal $\\textbf{Selector}$, a learnable module that ranks parallel sub-goals based on the estimated steps of completion and improves the original plan accordingly. Our experiments mark the milestone of the first zero-shot multi-task agent that can robustly accomplish 70+ Minecraft tasks and nearly double the overall performances. Further testing reveals our method's general effectiveness in popularly adopted non-open-ended domains as well (i.e., ALFWorld and tabletop manipulation). The ablation and exploratory studies detail how our design beats the counterparts and provide a promising update on the $\\texttt{ObtainDiamond}$ grand challenge with our approach.", "keywords": "open-ended learning;multi task;large language models;zero-shot planning", "primary_area": "", "supplementary_material": "", "author": "Zihao Wang;Shaofei Cai;Guanzhou Chen;Anji Liu;Xiaojian Ma;Yitao Liang", "authorids": "~Zihao_Wang23;~Shaofei_Cai2;~Guanzhou_Chen1;~Anji_Liu1;~Xiaojian_Ma1;~Yitao_Liang1", "gender": "M;M;M;M;;M", "homepage": "https://zhwang4ai.github.io/;https://phython96.github.io/;https://gzchen4ai.github.io/;https://liuanji.github.io/;;https://web.cs.ucla.edu/~yliang/", "dblp": ";276/3245;379/9947;227/8622;;173/4969", "google_scholar": "I0D-EgQAAAAJ;MZXDSSUAAAAJ;pRJXjSUAAAAJ;k_4zYecAAAAJ;;KVzR1XEAAAAJ", "orcid": "0000-0001-8396-3707;;;;;", "linkedin": ";;;anji-liu-7610b7190/;;", "or_profile": "~Zihao_Wang23;~Shaofei_Cai2;~Guanzhou_Chen1;~Anji_Liu1;~Xiaojian_Ma1;~Yitao_Liang1", "aff": "Peking University;Peking University;Beijing University of Posts and Telecommunications;University of California, Los Angeles;;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;bupt.edu.cn;ucla.edu;;pku.edu.cn", "position": "PhD student;PhD student;Undergrad student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nwang2023describe,\ntitle={Describe, Explain, Plan and Select: Interactive Planning with {LLM}s Enables Open-World Multi-Task Agents},\nauthor={Zihao Wang and Shaofei Cai and Guanzhou Chen and Anji Liu and Xiaojian Ma and Yitao Liang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KtvPdGb31Z}\n}", "github": "", "project": "", "reviewers": "W5LM;LCb8;xCHc;Qeax;FjP3", "pdf_size": 8057558, "rating": "5;5;6;6;9", "confidence": "5;3;5;3;5", "soundness": "3;3;3;4;3", "novelty": "3;3;2;3;3", "presentation": "3;3;4;4;4", "wc_summary": "40;181;59;38;108", "wc_strengths": "116;203;72;42;122", "wc_weaknesses": "110;247;667;44;159", "wc_questions": "98;101;3;6;486", "wc_limitations": "17;1;3;1;122", "wc_review": "381;733;804;131;997", "wc_reply_reviewers": "251;0;73;25;282", "wc_reply_authors": "2025;40;270;40;42", "reply_reviewers": "2;0;2;1;1", "reply_authors": "6;2;2;2;2", "rating_avg": [ 6.2, 1.469693845669907 ], "confidence_avg": [ 4.2, 0.9797958971132712 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 85.2, 54.138341311865105 ], "wc_strengths_avg": [ 111.0, 54.5747194220914 ], "wc_weaknesses_avg": [ 245.4, 220.95664733155235 ], "wc_questions_avg": [ 138.8, 178.72817349259742 ], "wc_limitations_avg": [ 28.8, 46.98254995208327 ], "wc_review_avg": [ 609.2, 311.2596343890418 ], "wc_reply_reviewers_avg": [ 126.2, 117.3429162753338 ], "wc_reply_authors_avg": [ 483.4, 775.9009215099567 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.8, 1.6000000000000003 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.38888888888888884, "gs_citation": 90, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7787033183657350443&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "pku.edu.cn;pku.edu.cn;bupt.edu.cn;ucla.edu;;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Peking University;Beijing University of Posts and Telecommunications;University of California, Los Angeles", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;http://www.bupt.edu.cn/;https://www.ucla.edu", "aff_unique_abbr": "Peking U;BUPT;UCLA", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Beijing;Los Angeles", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Tester-Learners for Halfspaces: Universal Algorithms", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71983", "id": "Kv8GJkV19S", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/204d9a9a4816a45909010587ffc3204b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Kv8GJkV19S", "openreview": "https://openreview.net/forum?id=Kv8GJkV19S", "poster": "/media/PosterPDFs/NeurIPS%202023/71983.png?t=1702058508.898668", "slides": "https://nips.cc/virtual/2023/poster/71983", "video": "https://nips.cc/virtual/2023/poster/71983", "author_site": "Aravind Gollakota, Adam Klivans, Konstantinos Stavropoulos, Arsen Vasilyan", "tldr": "", "abstract": "We give the first tester-learner for halfspaces that succeeds universally over a wide class of structured distributions. Our universal tester-learner runs in fully polynomial time and has the following guarantee: the learner achieves error $O(\\mathrm{opt}) + \\epsilon$ on any labeled distribution that the tester accepts, and moreover, the tester accepts whenever the marginal is any distribution that satisfies a Poincare inequality. In contrast to prior work on testable learning, our tester is not tailored to any single target distribution but rather succeeds for an entire target class of distributions. The class of Poincare distributions includes all strongly log-concave distributions, and, assuming the Kannan--Lovasz--Simonovits (KLS) conjecture, includes all log-concave distributions. In the special case where the label noise is known to be Massart, our tester-learner achieves error $\\mathrm{opt} + \\epsilon$ while accepting all log-concave distributions unconditionally (without assuming KLS).\nOur tests rely on checking hypercontractivity of the unknown distribution using a sum-of-squares (SOS) program, and crucially make use of the fact that Poincare distributions are certifiably hypercontractive in the SOS framework.", "keywords": "testable learning;pac learning;agnostic learning;Massart label noise;adversarial label noise;distribution testing", "primary_area": "", "supplementary_material": "", "author": "Aravind Gollakota;Adam Klivans;Konstantinos Stavropoulos;Arsen Vasilyan", "authorids": "~Aravind_Gollakota1;~Adam_Klivans1;~Konstantinos_Stavropoulos1;~Arsen_Vasilyan1", "gender": "M;M;;", "homepage": "https://aravind-pg.github.io;http://www.cs.utexas.edu/~klivans;;", "dblp": "264/1576;k/AdamRKlivans;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Aravind_Gollakota1;~Adam_Klivans1;~Konstantinos_Stavropoulos1;~Arsen_Vasilyan1", "aff": "University of Texas, Austin;University of Texas, Austin;;", "aff_domain": "utexas.edu;cs.utexas.edu;;", "position": "PhD student;Professor;;", "bibtex": "@inproceedings{\ngollakota2023testerlearners,\ntitle={Tester-Learners for Halfspaces: Universal Algorithms},\nauthor={Aravind Gollakota and Adam Klivans and Konstantinos Stavropoulos and Arsen Vasilyan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Kv8GJkV19S}\n}", "github": "", "project": "", "reviewers": "EFhK;eYN2;9R84;AEBD", "pdf_size": 545974, "rating": "6;7;8;8", "confidence": "3;3;3;4", "soundness": "3;3;4;4", "novelty": "2;3;4;4", "presentation": "3;3;4;3", "wc_summary": "184;242;121;489", "wc_strengths": "76;168;112;29", "wc_weaknesses": "44;95;66;12", "wc_questions": "68;42;23;1", "wc_limitations": "1;4;2;26", "wc_review": "373;551;324;557", "wc_reply_reviewers": "22;21;15;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;0", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 259.0, 139.515232143304 ], "wc_strengths_avg": [ 96.25, 50.81522901650646 ], "wc_weaknesses_avg": [ 54.25, 30.367540236245674 ], "wc_questions_avg": [ 33.5, 24.642443060703215 ], "wc_limitations_avg": [ 8.25, 10.304731922762475 ], "wc_review_avg": [ 451.25, 104.22181873293135 ], "wc_reply_reviewers_avg": [ 14.5, 8.789197915623474 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10435515245412997225&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "utexas.edu;cs.utexas.edu;;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Spatial-frequency channels, shape bias, and adversarial robustness", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71982", "id": "KvPwXVcslY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0cdc1e85736d9c01d366cbf9b4b81672-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=KvPwXVcslY", "openreview": "https://openreview.net/forum?id=KvPwXVcslY", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71982", "video": "https://nips.cc/virtual/2023/poster/71982", "author_site": "Ajay Subramanian, Elena Sizikova, Najib Majaj, Denis Pelli", "tldr": "", "abstract": "What spatial frequency information do humans and neural networks use to recognize objects? In neuroscience, critical band masking is an established tool that can reveal the frequency-selective filters used for object recognition. Critical band masking measures the sensitivity of recognition performance to noise added at each spatial frequency. Existing critical band masking studies show that humans recognize periodic patterns (gratings) and letters by means of a spatial-frequency filter (or \"channel\") that has a frequency bandwidth of one octave (doubling of frequency). Here, we introduce critical band masking as a task for network-human comparison and test 14 humans and 76 neural networks on 16-way ImageNet categorization in the presence of narrowband noise. We find that humans recognize objects in natural images using the same one-octave-wide channel that they use for letters and gratings, making it a canonical feature of human object recognition. Unlike humans, the neural network channel is very broad, 2-4 times wider than the human channel. This means that the network channel extends to frequencies higher and lower than those that humans are sensitive to. Thus, noise at those frequencies will impair network performance and spare human performance. Adversarial and augmented-image training are commonly used to increase network robustness and shape bias. Does this training align network and human object recognition channels? Three network channel properties (bandwidth, center frequency, peak noise sensitivity) correlate strongly with shape bias (51% variance explained) and robustness of adversarially-trained networks (66% variance explained). Adversarial training increases robustness but expands the channel bandwidth even further beyond the human bandwidth. Thus, critical band masking reveals that the network channel is more than twice as wide as the human channel, and that adversarial training only makes it worse. Networks with narrower channels might be more robust.", "keywords": "object recognition;critical band masking;spatial-frequency channels;shape bias;adversarial robustness", "primary_area": "", "supplementary_material": "/attachment/ede11ef43c7ed0ec1a0ea216abbc9b2e579911da.pdf", "author": "Ajay Subramanian;Elena Sizikova;Najib J. Majaj;Denis G. Pelli", "authorids": "~Ajay_Subramanian1;~Elena_Sizikova1;~Najib_J._Majaj1;denis.pelli@nyu.edu", "gender": "M;F;M;", "homepage": "https://ajaysubramanian.com;https://elenasizikova.github.io;https://scholar.google.com/citations?user=N0xjM6EAAAAJ&hl=en&oi=ao;", "dblp": ";123/6103;;", "google_scholar": "6cyu_EgAAAAJ;https://scholar.google.com/citations?hl=en;N0xjM6EAAAAJ;", "orcid": "0000-0003-1017-9000;;;", "linkedin": ";;;", "or_profile": "~Ajay_Subramanian1;~Elena_Sizikova1;~Najib_J._Majaj1;denis.pelli@nyu.edu", "aff": "New York University;Food and Drug Administration;New York University;", "aff_domain": "nyu.edu;fda.hhs.gov;nyu.edu;", "position": "PhD student;Researcher;Assistnat Research Professor;", "bibtex": "@inproceedings{\nsubramanian2023spatialfrequency,\ntitle={Spatial-frequency channels, shape bias, and adversarial robustness},\nauthor={Ajay Subramanian and Elena Sizikova and Najib J. Majaj and Denis G. Pelli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=KvPwXVcslY}\n}", "github": "", "project": "", "reviewers": "QrRK;TCTa;vBv1;9K3N;kLx7", "pdf_size": 4115262, "rating": "4;7;7;7;10", "confidence": "3;5;5;4;5", "soundness": "3;2;3;3;4", "novelty": "2;4;4;2;4", "presentation": "3;4;3;3;4", "wc_summary": "118;87;80;124;105", "wc_strengths": "180;92;143;54;93", "wc_weaknesses": "414;237;427;307;27", "wc_questions": "156;351;51;139;115", "wc_limitations": "7;37;39;47;44", "wc_review": "875;804;740;671;384", "wc_reply_reviewers": "59;126;86;111;13", "wc_reply_authors": "0;0;248;0;0", "reply_reviewers": "1;1;2;1;1", "reply_authors": "1;1;3;1;1", "rating_avg": [ 7.0, 1.8973665961010275 ], "confidence_avg": [ 4.4, 0.7999999999999999 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.2, 0.9797958971132712 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 102.8, 17.057549648176316 ], "wc_strengths_avg": [ 112.4, 44.0663136647485 ], "wc_weaknesses_avg": [ 282.4, 145.68678732129416 ], "wc_questions_avg": [ 162.4, 100.81190405899493 ], "wc_limitations_avg": [ 34.8, 14.344336861632886 ], "wc_review_avg": [ 694.8, 169.47141351862265 ], "wc_reply_reviewers_avg": [ 79.0, 40.094887454636904 ], "wc_reply_authors_avg": [ 49.6, 99.20000000000002 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7905694150420949, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3746877679956067940&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "nyu.edu;fda.hhs.gov;nyu.edu;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "New York University;Food and Drug Administration", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://www.fda.gov", "aff_unique_abbr": "NYU;FDA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Descriptive Image Captioning via Semipermeable Maximum Likelihood Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71981", "id": "Kvaa3DhvlZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fa1cfe4e956d85e016b1f8f49b189a0b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Kvaa3DhvlZ", "openreview": "https://openreview.net/forum?id=Kvaa3DhvlZ", "poster": "/media/PosterPDFs/NeurIPS%202023/71981.png?t=1698241724.2550154", "slides": "https://nips.cc/virtual/2023/poster/71981", "video": "https://nips.cc/virtual/2023/poster/71981", "author_site": "Zihao Yue, Anwen Hu, Liang Zhang, Qin Jin", "tldr": "", "abstract": "Image captioning aims to describe visual content in natural language. As 'a picture is worth a thousand words', there could be various correct descriptions for an image. However, with maximum likelihood estimation as the training objective, the captioning model is penalized whenever its prediction mismatches with the label. For instance, when the model predicts a word expressing richer semantics than the label, it will be penalized and optimized to prefer more concise expressions, referred to as *conciseness optimization*. In contrast, predictions that are more concise than labels lead to *richness optimization*. Such conflicting optimization directions could eventually result in the model generating general descriptions. In this work, we introduce Semipermeable MaxImum Likelihood Estimation (SMILE), which allows richness optimization while blocking conciseness optimization, thus encouraging the model to generate longer captions with more details. Extensive experiments on two mainstream image captioning datasets MSCOCO and Flickr30K demonstrate that SMILE significantly enhances the descriptiveness of generated captions. We further provide in-depth investigations to facilitate a better understanding of how SMILE works.", "keywords": "Image Captioning;Learning Objective;Natural Language Processing", "primary_area": "", "supplementary_material": "", "author": "Zihao Yue;Anwen Hu;Liang Zhang;Qin Jin", "authorids": "~Zihao_Yue1;~Anwen_Hu1;~Liang_Zhang10;~Qin_Jin1", "gender": "M;M;M;F", "homepage": "https://yuezih.github.io/;;https://github.com/zhangliang-04;https://www.jin-qin.com/index.html", "dblp": "339/2864;249/1182.html;;47/2670", "google_scholar": "JyOerJAAAAAJ;FqvDzH8AAAAJ;https://scholar.google.com/citations?;8UkYbCMAAAAJ", "orcid": "0000-0002-3470-5442;;;0000-0001-6486-6020", "linkedin": ";;;qinjin/", "or_profile": "~Zihao_Yue1;~Anwen_Hu1;~Liang_Zhang10;~Qin_Jin1", "aff": "Renmin University of China;Renmin University of China;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "position": "PhD student;PhD student;PhD student;Professor", "bibtex": "@inproceedings{\nyue2023learning,\ntitle={Learning Descriptive Image Captioning via Semipermeable Maximum Likelihood Estimation},\nauthor={Zihao Yue and Anwen Hu and Liang Zhang and Qin Jin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Kvaa3DhvlZ}\n}", "github": "", "project": "", "reviewers": "XLEL;5fp8;nk4n;cENC", "pdf_size": 1945619, "rating": "5;5;6;6", "confidence": "4;3;5;4", "soundness": "2;2;2;3", "novelty": "3;2;2;3", "presentation": "3;2;3;4", "wc_summary": "99;94;344;70", "wc_strengths": "69;104;22;105", "wc_weaknesses": "253;97;26;50", "wc_questions": "2;33;194;4", "wc_limitations": "18;31;7;1", "wc_review": "441;359;593;230", "wc_reply_reviewers": "77;18;0;0", "wc_reply_authors": "579;13;0;0", "reply_reviewers": "2;1;0;0", "reply_authors": "3;2;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 151.75, 111.53558849084897 ], "wc_strengths_avg": [ 75.0, 33.860005906674026 ], "wc_weaknesses_avg": [ 106.5, 88.35298523536146 ], "wc_questions_avg": [ 58.25, 79.32961300800604 ], "wc_limitations_avg": [ 14.25, 11.431863365173676 ], "wc_review_avg": [ 405.75, 131.69923120504538 ], "wc_reply_reviewers_avg": [ 23.75, 31.60992723813201 ], "wc_reply_authors_avg": [ 148.0, 248.89455598706854 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=985112522184689056&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Truly Scale-Equivariant Deep Nets with Fourier Layers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71980", "id": "L0QwnevT0F", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1343edb2739a61a6e20bd8764e814b50-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=L0QwnevT0F", "openreview": "https://openreview.net/forum?id=L0QwnevT0F", "poster": "/media/PosterPDFs/NeurIPS%202023/71980.png?t=1701401513.3584476", "slides": "https://nips.cc/virtual/2023/poster/71980", "video": "https://nips.cc/virtual/2023/poster/71980", "author_site": "Md Ashiqur Rahman, Raymond A. Yeh", "tldr": "", "abstract": "In computer vision, models must be able to adapt to changes in image resolution to effectively carry out tasks such as image segmentation; This is known as scale-equivariance. Recent works have made progress in developing scale-equivariant convolutional neural networks, e.g., through weight-sharing and kernel resizing. However, these networks are not truly scale-equivariant in practice. Specifically, they do not consider anti-aliasing as they formulate the down-scaling operation in the continuous domain. To address this shortcoming, we directly formulate down-scaling in the discrete domain with consideration of anti-aliasing. We then propose a novel architecture based on Fourier layers to achieve truly scale-equivariant deep nets, i.e., absolute zero equivariance-error. Following prior works, we test this model on MNIST-scale and STL-10 datasets. Our proposed model achieves competitive classification performance while maintaining zero equivariance-error.", "keywords": "Scale Equivariance;Fourier Neural Network", "primary_area": "", "supplementary_material": "/attachment/d960f21ca7b8aaed208493bfa158b149fca0a102.pdf", "author": "Md Ashiqur Rahman;Raymond A. Yeh", "authorids": "~Md_Ashiqur_Rahman2;~Raymond_A._Yeh1", "gender": "M;", "homepage": "https://ashiq24.github.io/;", "dblp": "271/3154;", "google_scholar": "isCWj28AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Md_Ashiqur_Rahman2;~Raymond_A._Yeh1", "aff": "NVIDIA;", "aff_domain": "nvidia.com;", "position": "Intern;", "bibtex": "@inproceedings{\nrahman2023truly,\ntitle={Truly Scale-Equivariant Deep Nets with Fourier Layers},\nauthor={Md Ashiqur Rahman and Raymond A. Yeh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=L0QwnevT0F}\n}", "github": "", "project": "", "reviewers": "5utz;JFCH;JmYY;qJeg", "pdf_size": 1695545, "rating": "6;6;7;7", "confidence": "3;3;4;4", "soundness": "3;3;4;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "43;95;73;46", "wc_strengths": "48;53;142;34", "wc_weaknesses": "112;181;512;210", "wc_questions": "6;5;33;172", "wc_limitations": "2;3;39;286", "wc_review": "211;337;799;748", "wc_reply_reviewers": "49;15;171;199", "wc_reply_authors": "45;0;85;46", "reply_reviewers": "1;1;2;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 64.25, 21.25294097295713 ], "wc_strengths_avg": [ 69.25, 42.57566793369189 ], "wc_weaknesses_avg": [ 253.75, 153.29118533040312 ], "wc_questions_avg": [ 54.0, 69.04708538381617 ], "wc_limitations_avg": [ 82.5, 118.43247020982041 ], "wc_review_avg": [ 523.75, 254.33184523374183 ], "wc_reply_reviewers_avg": [ 108.5, 78.06887984337934 ], "wc_reply_authors_avg": [ 44.0, 30.091527046662154 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9185315404782411867&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "nvidia.com;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "NVIDIA", "aff_unique_dep": "NVIDIA Corporation", "aff_unique_url": "https://www.nvidia.com", "aff_unique_abbr": "NVIDIA", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "PrObeD: Proactive Object Detection Wrapper", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71979", "id": "L74NTrzH1O", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f5846131aa6a72d1df3bd6d43a4a960b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=L74NTrzH1O", "openreview": "https://openreview.net/forum?id=L74NTrzH1O", "poster": "/media/PosterPDFs/NeurIPS%202023/71979.png?t=1702149552.5850523", "slides": "https://nips.cc/virtual/2023/poster/71979", "video": "https://nips.cc/virtual/2023/poster/71979", "author_site": "Vishal Asnani, Abhinav Kumar, Suya You, Xiaoming Liu", "tldr": "", "abstract": "Previous research in $2D$ object detection focuses on various tasks, including detecting objects in generic and camouflaged images. These works are regarded as passive works for object detection as they take the input image as is. However, convergence to global minima is not guaranteed to be optimal in neural networks; therefore, we argue that the trained weights in the object detector are not optimal. To rectify this problem, we propose a wrapper based on proactive schemes, PrObeD, which enhances the performance of these object detectors by learning a signal. PrObeD consists of an encoder-decoder architecture, where the encoder network generates an image-dependent signal termed templates to encrypt the input images, and the decoder recovers this template from the encrypted images. We propose that learning the optimum template results in an object detector with an improved detection performance. The template acts as a mask to the input images to highlight semantics useful for the object detector. Finetuning the object detector with these encrypted images enhances the detection performance for both generic and camouflaged. Our experiments on MS-COCO, CAMO, COD$10$K, and NC$4$K datasets show improvement over different detectors after applying PrObeD. Our models/codes are available at https://github.com/vishal3477/Proactive-Object-Detection.", "keywords": "Object detection;proactive;Camouflage;2D", "primary_area": "", "supplementary_material": "/attachment/70405060f366e578d8085882a3950242dd744224.pdf", "author": "Vishal Asnani;Abhinav Kumar;Suya You;Xiaoming Liu", "authorids": "~Vishal_Asnani1;~Abhinav_Kumar1;~Suya_You3;~Xiaoming_Liu2", "gender": "M;M;;M", "homepage": "https://vishal3477.github.io/;https://sites.google.com/view/abhinavkumar;;http://www.cse.msu.edu/~liuxm/", "dblp": "295/8698;115/6458-4;;l/XiaomingLiu0002", "google_scholar": "OA4lkcwAAAAJ;https://scholar.google.co.in/citations?hl=en;;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": "vishal-asnani/;abhinav1kumar;;xiaoming-liu-5a7807b/", "or_profile": "~Vishal_Asnani1;~Abhinav_Kumar1;~Suya_You3;~Xiaoming_Liu2", "aff": "Adobe Systems;Michigan State University;;Michigan State University", "aff_domain": "adobe.com;msu.edu;;msu.edu", "position": "Intern;PhD student;;Professor", "bibtex": "@inproceedings{\nasnani2023probed,\ntitle={PrObeD: Proactive Object Detection Wrapper},\nauthor={Vishal Asnani and Abhinav Kumar and Suya You and Xiaoming Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=L74NTrzH1O}\n}", "github": "", "project": "", "reviewers": "NMUy;rENj;rzBs;V1FT;93HY", "pdf_size": 4307441, "rating": "4;4;5;5;6", "confidence": "3;3;3;4;4", "soundness": "2;3;3;3;2", "novelty": "2;2;3;3;3", "presentation": "3;3;1;3;3", "wc_summary": "51;60;116;97;61", "wc_strengths": "39;35;11;60;66", "wc_weaknesses": "135;141;441;169;110", "wc_questions": "3;14;3;282;2", "wc_limitations": "3;12;1;34;1", "wc_review": "231;262;572;642;240", "wc_reply_reviewers": "11;30;71;18;180", "wc_reply_authors": "11;11;22;10;192", "reply_reviewers": "1;1;1;1;3", "reply_authors": "2;2;2;2;3", "rating_avg": [ 4.8, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 77.0, 25.067907770693587 ], "wc_strengths_avg": [ 42.2, 19.58979326077741 ], "wc_weaknesses_avg": [ 199.2, 122.34770124526246 ], "wc_questions_avg": [ 60.8, 110.6876686898771 ], "wc_limitations_avg": [ 10.2, 12.576167937809991 ], "wc_review_avg": [ 389.4, 179.3271870074362 ], "wc_reply_reviewers_avg": [ 62.0, 62.555575291096154 ], "wc_reply_authors_avg": [ 49.2, 71.53572534055974 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7637626158259733, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=966196888949648255&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "adobe.com;msu.edu;;msu.edu", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Adobe;Michigan State University", "aff_unique_dep": "Adobe Systems Incorporated;", "aff_unique_url": "https://www.adobe.com;https://www.msu.edu", "aff_unique_abbr": "Adobe;MSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient Batched Algorithm for Contextual Linear Bandits with Large Action Space via Soft Elimination", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71978", "id": "L7Whl9pXd0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b1bdb0f22c9748203c62f29aa297ac57-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=L7Whl9pXd0", "openreview": "https://openreview.net/forum?id=L7Whl9pXd0", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71978", "video": "https://nips.cc/virtual/2023/poster/71978", "author_site": "Osama Hanna, Lin Yang, Christina Fragouli", "tldr": "", "abstract": "In this paper, we provide the first efficient batched algorithm for contextual linear bandits with large action spaces. Unlike existing batched algorithms that rely on action elimination, which are not implementable for large action sets, our algorithm only uses a linear optimization oracle over the action set to design the policy. The proposed algorithm achieves a regret upper bound $\\tilde{O}(\\sqrt{T})$ with high probability, and uses $O(\\log\\log T)$ batches, matching the lower bound on the number of batches (Gao et al., 2019). When specialized to linear bandits, our algorithm can achieve a high probability gap-dependent regret bound of $\\tilde{O}(1/\\Delta_{\\min})$ with the optimal $\\log T$ number of batches, where $\\Delta_{\\min}$ is the minimum reward gap between a suboptimal arm and the optimal. Our result is achieved via a novel soft elimination approach, that entails $\\text{``}$shaping$\\text{\"}$ the action sets at each batch so that we can efficiently identify (near) optimal actions.", "keywords": "efficient bandit algorithms;contextual linear bandits", "primary_area": "", "supplementary_material": "/attachment/91d701b946b8aa4877fe18d074affe93bdff4adf.pdf", "author": "Osama Hanna;Lin Yang;Christina Fragouli", "authorids": "~Osama_Hanna1;~Lin_Yang12;~Christina_Fragouli1", "gender": "M;F;M", "homepage": "https://www.arni.ee.ucla.edu/people/osama-hanna/;https://www.arni.ee.ucla.edu;http://www.drlinyang.net", "dblp": ";87/5736;166/6264", "google_scholar": ";sJIAF-gAAAAJ;umivlPQAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Osama_Hanna1;~Christina_Fragouli1;~lin_Yang1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu;ucla.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nhanna2023efficient,\ntitle={Efficient Batched Algorithm for Contextual Linear Bandits with Large Action Space via Soft Elimination},\nauthor={Osama Hanna and Lin Yang and Christina Fragouli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=L7Whl9pXd0}\n}", "github": "", "project": "", "reviewers": "6gSX;azE5;vkke;DZuy", "pdf_size": 688461, "rating": "6;6;6;7", "confidence": "4;3;3;3", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "2;3;3;4", "wc_summary": "86;48;86;159", "wc_strengths": "60;47;103;32", "wc_weaknesses": "134;140;308;20", "wc_questions": "81;38;4;61", "wc_limitations": "1;8;5;41", "wc_review": "362;281;506;313", "wc_reply_reviewers": "27;13;15;38", "wc_reply_authors": "31;21;29;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 94.75, 40.20805267604985 ], "wc_strengths_avg": [ 60.5, 26.462237244798484 ], "wc_weaknesses_avg": [ 150.5, 102.73631295700659 ], "wc_questions_avg": [ 46.0, 28.626910416599273 ], "wc_limitations_avg": [ 13.75, 15.927570436196476 ], "wc_review_avg": [ 365.5, 86.09442490661053 ], "wc_reply_reviewers_avg": [ 23.25, 10.059199769365355 ], "wc_reply_authors_avg": [ 20.25, 12.275483697190918 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=811370357108730051&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ucla.edu;ucla.edu;ucla.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Consensus and Subjectivity of Skin Tone Annotation for ML Fairness", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73624", "id": "L9I9FhHfS3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/60d25b3210c92f5ba2002a8e1f1adf1c-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=L9I9FhHfS3", "openreview": "https://openreview.net/forum?id=L9I9FhHfS3", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73624", "video": "https://nips.cc/virtual/2023/poster/73624", "author_site": "Candice Schumann, Femi Olanubi, Auriel Wright, Ellis Monk, Courtney Heldreth, Susanna Ricco", "tldr": "", "abstract": "Understanding different human attributes and how they affect model behavior may become a standard need for all model creation and usage, from traditional computer vision tasks to the newest multimodal generative AI systems. In computer vision specifically, we have relied on datasets augmented with perceived attribute signals (eg, gender presentation, skin tone, and age) and benchmarks enabled by these datasets. Typically labels for these tasks come from human annotators. However, annotating attribute signals, especially skin tone, is a difficult and subjective task. Perceived skin tone is affected by technical factors, like lighting conditions, and social factors that shape an annotator's lived experience.\nThis paper examines the subjectivity of skin tone annotation through a series of annotation experiments using the Monk Skin Tone (MST) scale, a small pool of professional photographers, and a much larger pool of trained crowdsourced annotators. Along with this study we release the Monk Skin Tone Examples (MST-E) dataset, containing 1515 images and 31 videos spread across the full MST scale. MST-E is designed to help train human annotators to annotate MST effectively.Our study shows that annotators can reliably annotate skin tone in a way that aligns with an expert in the MST scale, even under challenging environmental conditions. We also find evidence that annotators from different geographic regions rely on different mental models of MST categories resulting in annotations that systematically vary across regions. Given this, we advise practitioners to use a diverse set of annotators and a higher replication count for each image when annotating skin tone for fairness research.", "keywords": "Fairness;Skin Tone;MST;Annotation;Dataset", "primary_area": "", "supplementary_material": "", "author": "Candice Schumann;Gbolahan Oluwafemi Olanubi;Auriel Wright;Ellis Monk;Courtney Heldreth;Susanna Ricco", "authorids": "~Candice_Schumann1;~Gbolahan_Oluwafemi_Olanubi1;~Auriel_Wright1;~Ellis_Monk1;~Courtney_Heldreth1;~Susanna_Ricco1", "gender": "Non-Binary;M;F;M;F;F", "homepage": "https://candiceschumann.com;;https://www.Wrightauriel.com;https://www.ellismonk.com;;", "dblp": "206/6260;;;;;66/812", "google_scholar": "IkuWPgoAAAAJ;;;;https://scholar.google.com/citations?hl=en;ci3hussAAAAJ", "orcid": ";;;;;", "linkedin": ";gbolahan-femi-olanubi-phd-796b555a/;aurielw;;courtney-heldreth-phd-3962b329/;", "or_profile": "~Candice_Schumann1;~Gbolahan_Oluwafemi_Olanubi1;~Auriel_Wright1;~Ellis_Monk1;~Courtney_Heldreth1;~Susanna_Ricco1", "aff": "Google;Google;;Harvard University;Research, Google;Google", "aff_domain": "google.com;google.com;;fas.harvard.edu;research.google.com;google.com", "position": "Researcher;Researcher;;Associate Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nschumann2023consensus,\ntitle={Consensus and Subjectivity of Skin Tone Annotation for {ML} Fairness},\nauthor={Candice Schumann and Gbolahan Oluwafemi Olanubi and Auriel Wright and Ellis Monk and Courtney Heldreth and Susanna Ricco},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=L9I9FhHfS3}\n}", "github": "", "project": "", "reviewers": "ph2V;jgJ9;8mCy;JyxF", "pdf_size": 14290189, "rating": "6;6;6;7", "confidence": "4;4;3;5", "wc_summary_and_contributions": "31;57;208;128", "wc_strengths": "29;22;79;85", "wc_improvement": "146;28;148;216", "wc_limitations": "157;33;35;61", "wc_correctness": "1;5;13;73", "wc_clarity": "1;5;6;18", "wc_relation_to_prior_work": "1;24;8;42", "wc_documentation": "1;5;6;17", "wc_additional_feedback": "1;1;1;1", "wc_review": "368;180;504;641", "wc_reply_reviewers": "17;11;95;0", "wc_reply_authors": "340;27;302;274", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 106.0, 68.76408946535975 ], "wc_strengths_avg": [ 53.75, 28.43743131859838 ], "wc_improvement_avg": [ 134.5, 67.63689821391871 ], "wc_limitations_avg": [ 71.5, 50.584088407324295 ], "wc_correctness_avg": [ 23.0, 29.189039038652847 ], "wc_clarity_avg": [ 7.5, 6.34428877022476 ], "wc_relation_to_prior_work_avg": [ 18.75, 15.801503093060482 ], "wc_documentation_avg": [ 7.25, 5.931905258852336 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 423.25, 170.4103503311932 ], "wc_reply_reviewers_avg": [ 30.75, 37.59238619720754 ], "wc_reply_authors_avg": [ 235.75, 122.77698277771775 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12383081053513753980&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "google.com;google.com;;fas.harvard.edu;research.google.com;google.com", "author_num": 6, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Google;Harvard University", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.harvard.edu", "aff_unique_abbr": "Google;Harvard", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Latent Field Discovery in Interacting Dynamical Systems with Neural Fields", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71977", "id": "L9ZTvJ5jVx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6521bd47ebaa28228cd6c74cb85afb65-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=L9ZTvJ5jVx", "openreview": "https://openreview.net/forum?id=L9ZTvJ5jVx", "poster": "/media/PosterPDFs/NeurIPS%202023/71977.png?t=1701952678.431985", "slides": "https://nips.cc/virtual/2023/poster/71977", "video": "https://nips.cc/virtual/2023/poster/71977", "author_site": "Miltiadis (Miltos) Kofinas, Erik Bekkers, Naveen Nagaraja, Efstratios Gavves", "tldr": "", "abstract": "Systems of interacting objects often evolve under the influence of underlying field effects that govern their dynamics, yet previous works have abstracted away from such effects, and assume that systems evolve in a vacuum. In this work, we focus on discovering these fields, and infer them from the observed dynamics alone, without directly observing them. We theorize the presence of latent force fields, and propose neural fields to learn them. Since the observed dynamics constitute the net effect of local object interactions and global field effects, recently popularized equivariant networks are inapplicable, as they fail to capture global information. To address this, we propose to disentangle local object interactions --which are SE(3) equivariant and depend on relative states-- from external global field effects --which depend on absolute states. We model the interactions with equivariant graph networks, and combine them with neural fields in a novel graph network that integrates field forces. Our experiments show that we can accurately discover the underlying fields in charged particles settings, traffic scenes, and gravitational n-body problems, and effectively use them to learn the system and forecast future trajectories.", "keywords": "Graph Neural Networks;Neural Fields;Field Discovery;Equivariance;Interacting Dynamical Systems;Geometric Graphs", "primary_area": "", "supplementary_material": "/attachment/a69e4cb1d8f90d649297361a257524f4f6c61e2a.zip", "author": "Miltiadis Kofinas;Erik J Bekkers;Naveen Shankar Nagaraja;Efstratios Gavves", "authorids": "~Miltiadis_Kofinas2;~Erik_J_Bekkers1;~Naveen_Shankar_Nagaraja1;~Efstratios_Gavves1", "gender": "M;;M;M", "homepage": "https://mkofinas.github.io;https://erikbekkers.bitbucket.io/;https://menaveenshankar.github.io;https://www.egavves.com", "dblp": "305/0160;43/5596;117/5906;03/8693", "google_scholar": "Ur5BV8MAAAAJ;https://scholar.google.nl/citations?user=yeWrfR4AAAAJ;TP-_rb0AAAAJ;https://scholar.google.nl/citations?user=QqfCvsgAAAAJ", "orcid": "0000-0002-3392-4037;;;", "linkedin": "miltiadiskofinas/;;;", "or_profile": "~Miltiadis_Kofinas2;~Erik_J_Bekkers1;~Naveen_Shankar_Nagaraja1;~Efstratios_Gavves1", "aff": "University of Amsterdam;University of Amsterdam;BMW Group;University of Amsterdam", "aff_domain": "uva.nl;uva.nl;bmw.de;uva.nl", "position": "PhD student;Assistant Professor;Software Engineer;Associate Professor", "bibtex": "@inproceedings{\nkofinas2023latent,\ntitle={Latent Field Discovery in Interacting Dynamical Systems with Neural Fields},\nauthor={Miltiadis Kofinas and Erik J Bekkers and Naveen Shankar Nagaraja and Efstratios Gavves},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=L9ZTvJ5jVx}\n}", "github": "", "project": "", "reviewers": "HXpt;DhsC;8BnH;hUPP", "pdf_size": 21283249, "rating": "6;6;6;6", "confidence": "4;5;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;2", "wc_summary": "143;105;100;79", "wc_strengths": "137;57;41;72", "wc_weaknesses": "236;49;12;415", "wc_questions": "74;181;23;4", "wc_limitations": "11;10;13;1", "wc_review": "601;402;189;571", "wc_reply_reviewers": "16;37;13;501", "wc_reply_authors": "0;0;0;958", "reply_reviewers": "1;1;1;3", "reply_authors": "1;1;1;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 106.75, 23.09085316743407 ], "wc_strengths_avg": [ 76.75, 36.47173563185608 ], "wc_weaknesses_avg": [ 178.0, 161.03571032538093 ], "wc_questions_avg": [ 70.5, 68.74045388270287 ], "wc_limitations_avg": [ 8.75, 4.602988159880492 ], "wc_review_avg": [ 440.75, 163.95483371953387 ], "wc_reply_reviewers_avg": [ 141.75, 207.6190923301612 ], "wc_reply_authors_avg": [ 239.5, 414.8261684127461 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1572559459855984809&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uva.nl;uva.nl;bmw.de;uva.nl", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Amsterdam;BMW Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.uva.nl;https://www.bmwgroup.com", "aff_unique_abbr": "UvA;BMW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Netherlands;Germany" }, { "title": "GradOrth: A Simple yet Efficient Out-of-Distribution Detection with Orthogonal Projection of Gradients", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71976", "id": "L9nTuSbAws", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/77cf940349218069bbc230fc2c9c8a21-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=L9nTuSbAws", "openreview": "https://openreview.net/forum?id=L9nTuSbAws", "poster": "/media/PosterPDFs/NeurIPS%202023/71976.png?t=1702088393.6295724", "slides": "https://nips.cc/virtual/2023/poster/71976", "video": "https://nips.cc/virtual/2023/poster/71976", "author_site": "Sima Behpour, Thang Long Doan, Xin Li, Wenbin He, Liang Gou, Liu Ren", "tldr": "", "abstract": "Detecting out-of-distribution (OOD) data is crucial for ensuring the safe deployment of machine learning models in real-world applications. However, existing OOD detection approaches primarily rely on the feature maps or the full gradient space information to derive OOD scores neglecting the role of \\textbf{most important parameters} of the pre-trained network over In-Distribution data. In this study, we propose a novel approach called GradOrth to facilitate OOD detection based on one intriguing observation that the important features to identify OOD data lie in the lower-rank subspace of in-distribution (ID) data.\nIn particular, we identify OOD data by computing the norm of gradient projection on \\textit{the subspaces considered \\textbf{important} for the in-distribution data}. A large orthogonal projection value (i.e. a small projection value) indicates the sample as OOD as it captures a weak correlation of the in-distribution (ID) data. This simple yet effective method exhibits outstanding performance, showcasing a notable reduction in the average false positive rate at a 95\\% true positive rate (FPR95) of up to 8\\% when compared to the current state-of-the-art methods.", "keywords": "out-of-distribution detection;OOD;uncertainty estimation;gradient projection", "primary_area": "", "supplementary_material": "", "author": "Sima Behpour;Thang Doan;Xin Li;Wenbin He;Liang Gou;Liu Ren", "authorids": "~Sima_Behpour1;~Thang_Doan1;~Xin_Li26;~Wenbin_He1;~Liang_Gou2;~Liu_Ren1", "gender": ";;M;M;M;M", "homepage": ";;https://www.xinliaiblog.com/;https://hewenbin.github.io/;;https://sites.google.com/site/liurenshomepage/", "dblp": ";;;;43/7218;65/4250", "google_scholar": ";;;BQG5angAAAAJ;x3VK0fAAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Sima_Behpour1;~Thang_Doan1;~Xin_Li26;~Wenbin_He1;~Liang_Gou2;~Liu_Ren1", "aff": ";;Bosch Reserach;Bosch;Bosch Research North America, Bosch Center for Artificial Intelligence (BCAI);Bosch Research", "aff_domain": ";;us.bosch.com;bosch.com;bosch.com;us.bosch.com", "position": ";;Researcher;Researcher;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nbehpour2023gradorth,\ntitle={GradOrth: A Simple yet Efficient Out-of-Distribution Detection with Orthogonal Projection of Gradients},\nauthor={Sima Behpour and Thang Doan and Xin Li and Wenbin He and Liang Gou and Liu Ren},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=L9nTuSbAws}\n}", "github": "", "project": "", "reviewers": "fiAZ;MCXS;6mNB;ne6G;81Lc", "pdf_size": 850165, "rating": "5;6;6;6;7", "confidence": "5;4;4;4;4", "soundness": "2;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "1;3;4;2;4", "wc_summary": "78;83;51;79;138", "wc_strengths": "30;50;65;39;85", "wc_weaknesses": "377;256;133;81;236", "wc_questions": "30;268;46;152;57", "wc_limitations": "52;6;18;19;31", "wc_review": "567;663;313;370;547", "wc_reply_reviewers": "112;27;43;0;28", "wc_reply_authors": "28;21;25;0;29", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 1.16619037896906 ], "wc_summary_avg": [ 85.8, 28.463309716194285 ], "wc_strengths_avg": [ 53.8, 19.48743184721886 ], "wc_weaknesses_avg": [ 216.6, 102.9885430521279 ], "wc_questions_avg": [ 110.6, 89.47759496097333 ], "wc_limitations_avg": [ 25.2, 15.561490931141527 ], "wc_review_avg": [ 492.0, 130.2428500916653 ], "wc_reply_reviewers_avg": [ 42.0, 37.64571688784795 ], "wc_reply_authors_avg": [ 20.6, 10.669582934679312 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7905694150420949, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4930120505321407056&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": ";;us.bosch.com;bosch.com;bosch.com;us.bosch.com", "author_num": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Bosch Research;Robert Bosch GmbH;Bosch Research North America", "aff_unique_dep": ";;Bosch Center for Artificial Intelligence (BCAI)", "aff_unique_url": "https://research.bosch.com;https://www.bosch.com;https://research.bosch.com/", "aff_unique_abbr": "Bosch;Bosch;Bosch", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Germany;United States" }, { "title": "Explaining the Uncertain: Stochastic Shapley Values for Gaussian Process Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71975", "id": "LAGxc2ybuH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9f0b1220028dfa2ee82ca0a0e0fc52d1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LAGxc2ybuH", "openreview": "https://openreview.net/forum?id=LAGxc2ybuH", "poster": "/media/PosterPDFs/NeurIPS%202023/71975.png?t=1700004529.7343266", "slides": "https://nips.cc/virtual/2023/poster/71975", "video": "https://nips.cc/virtual/2023/poster/71975", "author_site": "Siu Lun Chau, Krikamol Muandet, Dino Sejdinovic", "tldr": "", "abstract": "We present a novel approach for explaining Gaussian processes (GPs) that can utilize the full analytical covariance structure present in GPs. Our method is based on the popular solution concept of Shapley values extended to stochastic cooperative games, resulting in explanations that are random variables. The GP explanations generated using our approach satisfy similar favorable axioms to standard Shapley values and possess a tractable covariance function across features and data observations. This covariance allows for quantifying explanation uncertainties and studying the statistical dependencies between explanations. We further extend our framework to the problem of predictive explanation, and propose a Shapley prior over the explanation function to predict Shapley values for new data based on previously computed ones. Our extensive illustrations demonstrate the effectiveness of the proposed approach.", "keywords": "Gaussian Processes;Shapley values;Uncertainty Modelling", "primary_area": "", "supplementary_material": "/attachment/e70cbce4d1920fdd780d8eb0828e40b90df837b5.zip", "author": "Siu Lun Chau;Krikamol Muandet;Dino Sejdinovic", "authorids": "~Siu_Lun_Chau1;~Krikamol_Muandet1;~Dino_Sejdinovic1", "gender": "M;M;M", "homepage": "https://chau999.github.io/;http://krikamol.org;https://sejdino.github.io/", "dblp": "264/9823;34/1240;31/1783", "google_scholar": "e7ZBlIsAAAAJ;E2z5uYsAAAAJ;v8Dg1lIAAAAJ", "orcid": ";0000-0002-4182-5282;0000-0001-5547-9213", "linkedin": ";krikamol-muandet/;https://linkedin.com/in/dinosejdinovic", "or_profile": "~Siu_Lun_Chau1;~Krikamol_Muandet1;~Dino_Sejdinovic1", "aff": "University of Oxford;CISPA Helmholtz Center for Information Security;University of Adelaide", "aff_domain": "ox.ac.uk;cispa.saarland;adelaide.edu.au", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchau2023explaining,\ntitle={Explaining the Uncertain: Stochastic Shapley Values for Gaussian Process Models},\nauthor={Siu Lun Chau and Krikamol Muandet and Dino Sejdinovic},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LAGxc2ybuH}\n}", "github": "", "project": "", "reviewers": "hgou;uH3p;1zjr;FXFV", "pdf_size": 924656, "rating": "6;7;7;8", "confidence": "3;4;3;3", "soundness": "4;4;4;4", "novelty": "3;3;4;4", "presentation": "3;4;4;3", "wc_summary": "98;86;85;71", "wc_strengths": "287;114;38;151", "wc_weaknesses": "314;396;15;38", "wc_questions": "167;43;10;9", "wc_limitations": "2;48;44;10", "wc_review": "868;687;192;279", "wc_reply_reviewers": "28;68;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 85.0, 9.565563234854496 ], "wc_strengths_avg": [ 147.5, 90.25657870759339 ], "wc_weaknesses_avg": [ 190.75, 166.9870878241788 ], "wc_questions_avg": [ 57.25, 64.82428171603601 ], "wc_limitations_avg": [ 26.0, 20.248456731316587 ], "wc_review_avg": [ 506.5, 280.14683649829067 ], "wc_reply_reviewers_avg": [ 24.0, 27.85677655436824 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16328917015665338705&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "email": "ox.ac.uk;cispa.saarland;adelaide.edu.au", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Oxford;CISPA Helmholtz Center for Information Security;University of Adelaide", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ox.ac.uk;https://www.cispa.de/;https://www.adelaide.edu.au", "aff_unique_abbr": "Oxford;CISPA;Adelaide", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United Kingdom;Germany;Australia" }, { "title": "Trial matching: capturing variability with data-constrained spiking neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71974", "id": "LAbxkhkjbD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ec702dd6e83b2113a43614685a7e2ac6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LAbxkhkjbD", "openreview": "https://openreview.net/forum?id=LAbxkhkjbD", "poster": "/media/PosterPDFs/NeurIPS%202023/71974.png?t=1701339327.1803112", "slides": "https://nips.cc/virtual/2023/poster/71974", "video": "https://nips.cc/virtual/2023/poster/71974", "author_site": "Christos Sourmpis, Carl Petersen, Wulfram Gerstner, Guillaume Bellec", "tldr": "", "abstract": "Simultaneous behavioral and electrophysiological recordings call for new methods to reveal the interactions between neural activity and behavior. A milestone would be an interpretable model of the co-variability of spiking activity and behavior across trials. Here, we model a mouse cortical sensory-motor pathway in a tactile detection task reported by licking with a large recurrent spiking neural network (RSNN), fitted to the recordings via gradient-based optimization. We focus specifically on the difficulty to match the trial-to-trial variability in the data. Our solution relies on optimal transport to define a distance between the distributions of generated and recorded trials. The technique is applied to artificial data and neural recordings covering six cortical areas. We find that the resulting RSNN can generate realistic cortical activity and predict jaw movements across the main modes of trial-to-trial variability. Our analysis also identifies an unexpected mode of variability in the data corresponding to task-irrelevant movements of the mouse.", "keywords": "neuroscience;spiking networks;data-constrained modeling;electrophysiological recordings;optimal transport;trial variability;RNN;interpretable machine learning", "primary_area": "", "supplementary_material": "/attachment/8309ad5d1d885e6b58254c69a70421a761eeb68b.pdf", "author": "Christos Sourmpis;Carl C. H. Petersen;Wulfram Gerstner;Guillaume Bellec", "authorids": "~Christos_Sourmpis2;~Carl_C._H._Petersen1;~Wulfram_Gerstner1;~Guillaume_Bellec1", "gender": "M;;M;M", "homepage": "https://www.epfl.ch/labs/lsens/;https://lcnwww.epfl.ch/gerstner/;https://guillaumebellec.github.io;", "dblp": ";g/WGerstner;;369/8359", "google_scholar": "rej0aokAAAAJ;https://scholar.google.ch/citations?user=vSd2RnEAAAAJ;fSXUVvAAAAAJ;AfK1-sEAAAAJ", "orcid": "0000-0003-3344-4495;0000-0002-4344-2189;0000-0001-7568-4994;0009-0007-0519-1116", "linkedin": ";;;christos-sourmpis-00a282134/", "or_profile": "~Carl_C._H._Petersen1;~Wulfram_Gerstner1;~Guillaume_Bellec1;~CHRISTOS_SOURMPIS1", "aff": "EPFL - EPF Lausanne;EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;epfl.ch", "position": "Full Professor;Full Professor;Postdoc;PhD student", "bibtex": "@inproceedings{\nsourmpis2023trial,\ntitle={Trial matching: capturing variability with data-constrained spiking neural networks},\nauthor={Christos Sourmpis and Carl C. H. Petersen and Wulfram Gerstner and Guillaume Bellec},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LAbxkhkjbD}\n}", "github": "", "project": "", "reviewers": "Rsk5;Myz7;Ec9F;Cbej", "pdf_size": 2168961, "rating": "5;6;6;7", "confidence": "2;4;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;4;3;3", "wc_summary": "109;45;82;148", "wc_strengths": "15;176;55;21", "wc_weaknesses": "40;111;34;90", "wc_questions": "121;110;44;87", "wc_limitations": "20;3;1;1", "wc_review": "305;445;216;347", "wc_reply_reviewers": "77;99;25;171", "wc_reply_authors": "0;0;0;254", "reply_reviewers": "1;1;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 96.0, 37.6497011940334 ], "wc_strengths_avg": [ 66.75, 64.89366301881871 ], "wc_weaknesses_avg": [ 68.75, 32.67548775458447 ], "wc_questions_avg": [ 90.5, 29.516944286290883 ], "wc_limitations_avg": [ 6.25, 7.980444849756184 ], "wc_review_avg": [ 328.25, 82.34493002000791 ], "wc_reply_reviewers_avg": [ 93.0, 52.44044240850758 ], "wc_reply_authors_avg": [ 63.5, 109.9852262806237 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=377723973703051301&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "epfl.ch;epfl.ch;epfl.ch;epfl.ch", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "EPFL;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "SNAP: Self-Supervised Neural Maps for Visual Positioning and Semantic Understanding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71973", "id": "LCHmP68Gtj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/182c433412b33c14e32a7c4fc2c3e290-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LCHmP68Gtj", "openreview": "https://openreview.net/forum?id=LCHmP68Gtj", "poster": "/media/PosterPDFs/NeurIPS%202023/71973.png?t=1701898967.8204563", "slides": "https://nips.cc/virtual/2023/poster/71973", "video": "https://nips.cc/virtual/2023/poster/71973", "author_site": "Paul-Edouard Sarlin, Eduard Trulls, Marc Pollefeys, Jan Hosang, Simon Lynen", "tldr": "", "abstract": "Semantic 2D maps are commonly used by humans and machines for navigation purposes, whether it's walking or driving. However, these maps have limitations: they lack detail, often contain inaccuracies, and are difficult to create and maintain, especially in an automated fashion. Can we use _raw imagery_ to automatically create _better maps_ that can be easily interpreted by both humans and machines? We introduce SNAP, a deep network that learns rich 2D _neural_ maps from ground-level and overhead images. We train our model to align neural maps estimated from different inputs, supervised only with camera poses over tens of millions of StreetView images. SNAP can resolve the location of challenging image queries beyond the reach of traditional methods, outperforming the state of the art in localization by a large margin. Moreover, our neural maps encode not only geometry and appearance but also high-level semantics, discovered without explicit supervision. This enables effective pre-training for data-efficient semantic scene understanding, with the potential to unlock cost-efficient creation of more detailed maps.", "keywords": "neural maps;visual positioning;semantic mapping", "primary_area": "", "supplementary_material": "", "author": "Paul-Edouard Sarlin;Eduard Trulls;Marc Pollefeys;Jan Hosang;Simon Lynen", "authorids": "~Paul-Edouard_Sarlin1;~Eduard_Trulls4;~Marc_Pollefeys2;~Jan_Hosang2;~Simon_Lynen1", "gender": "M;;M;;M", "homepage": "https://psarlin.com/;;;https://janhosang.com/;", "dblp": "227/3472;09/7743;p/MarcPollefeys;20/451;116/6515", "google_scholar": "2fAUgfAAAAAJ;OKZC1CYAAAAJ;YYH0BjEAAAAJ;qsB2vcgAAAAJ;RQip5VgAAAAJ", "orcid": ";;;;0000-0002-6421-541X", "linkedin": ";;marc-pollefeys-30a7075/;;simon-lynen-708b236/", "or_profile": "~Paul-Edouard_Sarlin1;~Eduard_Trulls4;~Marc_Pollefeys2;~Jan_Hosang2;~Simon_Lynen1", "aff": "Google;Google;Swiss Federal Institute of Technology;Google;Google Zurich", "aff_domain": "google.com;google.com;ethz.ch;google.com;google.com", "position": "Intern;Researcher;Full Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nsarlin2023snap,\ntitle={{SNAP}: Self-Supervised Neural Maps for Visual Positioning and Semantic Understanding},\nauthor={Paul-Edouard Sarlin and Eduard Trulls and Marc Pollefeys and Jan Hosang and Simon Lynen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LCHmP68Gtj}\n}", "github": "", "project": "", "reviewers": "PKpW;dKqM;qSVT;Lqgd;sYmT", "pdf_size": 14016159, "rating": "4;5;6;6;7", "confidence": "4;4;4;4;3", "soundness": "2;3;4;3;3", "novelty": "2;3;4;3;3", "presentation": "2;4;4;3;3", "wc_summary": "70;63;84;135;100", "wc_strengths": "57;136;84;38;95", "wc_weaknesses": "118;562;154;118;219", "wc_questions": "67;58;270;73;5", "wc_limitations": "18;4;2;13;5", "wc_review": "330;823;594;377;424", "wc_reply_reviewers": "382;12;869;0;166", "wc_reply_authors": "887;0;1489;0;352", "reply_reviewers": "2;1;2;0;1", "reply_authors": "3;1;3;1;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 90.4, 25.648391762447794 ], "wc_strengths_avg": [ 82.0, 33.61547262794322 ], "wc_weaknesses_avg": [ 234.2, 168.00047618980133 ], "wc_questions_avg": [ 94.6, 90.95185539613801 ], "wc_limitations_avg": [ 8.4, 6.086049621881176 ], "wc_review_avg": [ 509.6, 180.2826669427763 ], "wc_reply_reviewers_avg": [ 285.8, 322.6071294934444 ], "wc_reply_authors_avg": [ 545.6, 572.607579411939 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 0.8944271909999159 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6864064729836443, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13003963979665051242&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 7, "email": "google.com;google.com;ethz.ch;google.com;google.com", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Google;Swiss Federal Institute of Technology", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.ethz.ch", "aff_unique_abbr": "Google;ETH Zurich", "aff_campus_unique_index": "0;0;0;2", "aff_campus_unique": "Mountain View;;Zurich", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "United States;Switzerland" }, { "title": "Cascading Bandits: Optimizing Recommendation Frequency in Delayed Feedback Environments", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71972", "id": "LClyG4vZmS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f95606d8e870020085990d9650b4f2a1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LClyG4vZmS", "openreview": "https://openreview.net/forum?id=LClyG4vZmS", "poster": "/media/PosterPDFs/NeurIPS%202023/71972.png?t=1702057147.6473362", "slides": "https://nips.cc/virtual/2023/poster/71972", "video": "https://nips.cc/virtual/2023/poster/71972", "author_site": "Dairui Wang, Junyu Cao, Yan Zhang, Wei Qi", "tldr": "", "abstract": "Delayed feedback is a critical problem in dynamic recommender systems. In practice, the feedback result often depends on the frequency of recommendation. Most existing online learning literature fails to consider optimization of the recommendation frequency, and regards the reward from each successfully recommended message to be equal. In this paper, we consider a novel cascading bandits setting, where individual messages from a selected list are sent to a user periodically. Whenever a user does not like a message, she may abandon the system with a probability positively correlated with the recommendation frequency. A learning agent needs to learn both the underlying message attraction probabilities and users' abandonment probabilities through the randomly delayed feedback. We first show a dynamic programming solution to finding the optimal message sequence in deterministic scenarios, in which the reward is allowed to vary with different messages. Then we propose a polynomial time UCB-based offline learning algorithm, and discuss its performance by characterizing its regret bound. For the online setting, we propose a learning algorithm which allows adaptive content for a given user. Numerical experiment on AmEx dataset confirms the effectiveness of our algorithms.", "keywords": "delayed feedback;recommender system;frequency control", "primary_area": "", "supplementary_material": "/attachment/e729beaaf11b8eba3d6c01218cb4024adce58eda.pdf", "author": "Dairui Wang;Junyu Cao;Yan Zhang;Wei Qi", "authorids": "~Dairui_Wang1;~Junyu_Cao1;yan.zhang13@mail.mcgill.ca;~Wei_Qi2", "gender": "M;F;;M", "homepage": "https://github.com/diaryw;https://junyucao.com/;;https://wei-qi-home.github.io/", "dblp": ";198/0859;;", "google_scholar": ";rWNjzJsAAAAJ;;KbKisy0AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Dairui_Wang1;~Junyu_Cao1;yan.zhang13@mail.mcgill.ca;~Wei_Qi2", "aff": "Department of Industrial Engineering, Tsinghua University;University of Texas, Austin;;Tsinghua University", "aff_domain": "tsinghua.edu.cn;utexas.edu;;tsinghua.edu.cn", "position": "Undergrad student;Assistant Professor;;Associate Professor", "bibtex": "@inproceedings{\nwang2023cascading,\ntitle={Cascading Bandits: Optimizing Recommendation Frequency in Delayed Feedback Environments},\nauthor={Dairui Wang and Junyu Cao and Yan Zhang and Wei Qi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LClyG4vZmS}\n}", "github": "", "project": "", "reviewers": "MzDh;TbT8;GPGp;RjMs", "pdf_size": 3516633, "rating": "4;6;6;7", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;4;4;3", "wc_summary": "101;131;152;89", "wc_strengths": "63;122;105;66", "wc_weaknesses": "170;188;115;74", "wc_questions": "58;54;17;151", "wc_limitations": "1;1;12;46", "wc_review": "393;496;401;426", "wc_reply_reviewers": "0;18;12;27", "wc_reply_authors": "155;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 118.25, 24.772716847370617 ], "wc_strengths_avg": [ 89.0, 25.248762345905195 ], "wc_weaknesses_avg": [ 136.75, 45.1185937280851 ], "wc_questions_avg": [ 70.0, 49.42165517260627 ], "wc_limitations_avg": [ 15.0, 18.452642087245934 ], "wc_review_avg": [ 429.0, 40.55243519198323 ], "wc_reply_reviewers_avg": [ 14.25, 9.807522622966516 ], "wc_reply_authors_avg": [ 38.75, 67.11696879329399 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14157427221797580954&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;utexas.edu;;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Tsinghua University;University of Texas at Austin", "aff_unique_dep": "Department of Industrial Engineering;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.utexas.edu", "aff_unique_abbr": "Tsinghua;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "Normalization-Equivariant Neural Networks with Application to Image Denoising", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71971", "id": "LCnjG1IEfm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/12143893d9d37c3569dda800b95cabd9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LCnjG1IEfm", "openreview": "https://openreview.net/forum?id=LCnjG1IEfm", "poster": "/media/PosterPDFs/NeurIPS%202023/71971.png?t=1699883483.0485926", "slides": "https://nips.cc/virtual/2023/poster/71971", "video": "https://nips.cc/virtual/2023/poster/71971", "author_site": "S\u00e9bastien Herbreteau, Emmanuel Moebel, Charles Kervrann", "tldr": "", "abstract": "In many information processing systems, it may be desirable to ensure that any change of the input, whether by shifting or scaling, results in a corresponding change in the system response. While deep neural networks are gradually replacing all traditional automatic processing methods, they surprisingly do not guarantee such normalization-equivariance (scale + shift) property, which can be detrimental in many applications. To address this issue, we propose a methodology for adapting existing neural networks so that normalization-equivariance holds by design. Our main claim is that not only ordinary convolutional layers, but also all activation functions, including the ReLU (rectified linear unit), which are applied element-wise to the pre-activated neurons, should be completely removed from neural networks and replaced by better conditioned alternatives. To this end, we introduce affine-constrained convolutions and channel-wise sort pooling layers as surrogates and show that these two architectural modifications do preserve normalization-equivariance without loss of performance. Experimental results in image denoising show that normalization-equivariant neural networks, in addition to their better conditioning, also provide much better generalization across noise levels.", "keywords": "equivariance;normalization;image denoising;activation functions;ReLU;interpretability;robustness;deep learning;analysis of neural networks", "primary_area": "", "supplementary_material": "/attachment/a760aebb4af4a000f9eddd97f023de3cbb9dfe2c.pdf", "author": "S\u00e9bastien Herbreteau;Emmanuel Moebel;Charles Kervrann", "authorids": "~S\u00e9bastien_Herbreteau1;~Emmanuel_Moebel1;~Charles_Kervrann3", "gender": "M;;M", "homepage": "https://sherbret.github.io/;;https://team.inria.fr/serpico/team-members/charles-kervrann-2/", "dblp": "298/5035;166/4827;78/4106", "google_scholar": "49pGY58AAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-8322-3474;0000-0001-6264-3245;", "linkedin": ";;", "or_profile": "~S\u00e9bastien_Herbreteau1;~Emmanuel_Moebel1;~Charles_Kervrann3", "aff": "INRIA;INRIA;INRIA", "aff_domain": "inria.fr;inria.fr;inria.fr", "position": "PhD student;Postdoc;Principal Researcher", "bibtex": "@inproceedings{\nherbreteau2023normalizationequivariant,\ntitle={Normalization-Equivariant Neural Networks with Application to Image Denoising},\nauthor={S{\\'e}bastien Herbreteau and Emmanuel Moebel and Charles Kervrann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LCnjG1IEfm}\n}", "github": "", "project": "", "reviewers": "RNG3;atAH;6aGn;dbvT", "pdf_size": 1887476, "rating": "5;5;6;8", "confidence": "3;4;4;5", "soundness": "3;4;4;4", "novelty": "2;3;2;3", "presentation": "3;4;3;4", "wc_summary": "50;45;39;78", "wc_strengths": "52;32;197;72", "wc_weaknesses": "239;125;213;92", "wc_questions": "67;101;75;75", "wc_limitations": "102;1;52;8", "wc_review": "510;304;576;325", "wc_reply_reviewers": "75;7;100;25", "wc_reply_authors": "168;0;6;17", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 53.0, 14.949916387726054 ], "wc_strengths_avg": [ 88.25, 64.35982830928 ], "wc_weaknesses_avg": [ 167.25, 60.59857671595926 ], "wc_questions_avg": [ 79.5, 12.835497652993435 ], "wc_limitations_avg": [ 40.75, 40.40652793794587 ], "wc_review_avg": [ 428.75, 116.84471532765185 ], "wc_reply_reviewers_avg": [ 51.75, 37.37228250990298 ], "wc_reply_authors_avg": [ 47.75, 69.69352552425512 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8660254037844386, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14726881548625535537&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "inria.fr;inria.fr;inria.fr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Hardness of Low Rank Approximation of Entrywise Transformed Matrix Products", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71970", "id": "LCwToX315b", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a4d92f656cc99f60fe1bfc98386aee34-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LCwToX315b", "openreview": "https://openreview.net/forum?id=LCwToX315b", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71970", "video": "https://nips.cc/virtual/2023/poster/71970", "author_site": "Tamas Sarlos, Xingyou Song, David Woodruff, Richard Zhang", "tldr": "", "abstract": "Inspired by fast algorithms in natural language processing, we study low rank approximation in the entrywise transformed setting where we want to find a good rank $k$ approximation to $f(U \\cdot V)$, where $U, V^\\top \\in \\mathbb{R}^{n \\times r}$ are given, $r = O(\\log(n))$, and $f(x)$ is a general scalar function. Previous work in sublinear low rank approximation has shown that if both (1) $U = V^\\top$ and (2) $f(x)$ is a PSD kernel function, then there is an $O(nk^{\\omega-1})$ time constant relative error approximation algorithm, where $\\omega \\approx 2.376$ is the exponent of matrix multiplication. We give the first conditional time hardness results for this problem, demonstrating that both conditions (1) and (2) are in fact necessary for getting better than $n^{2-o(1)}$ time for a relative error low rank approximation for a wide class of functions. We give novel reductions from the Strong Exponential Time Hypothesis (SETH) that rely on lower bounding the leverage scores of flat sparse vectors and hold even when the rank of the transformed matrix $f(UV)$ and the target rank are $n^{o(1)}$, and when $U = V^\\top$. Furthermore, even when $f(x) = x^p$ is a simple polynomial, we give runtime lower bounds in the case when $U \\neq V^\\top$ of the form $\\Omega(\\min(n^{2-o(1)}, \\Omega(2^p)))$. Lastly, we demonstrate that our lower bounds are tight by giving an $O(n \\cdot \\text{poly}(k, 2^p, 1/\\epsilon))$ time relative error approximation algorithm and a fast $O(n \\cdot \\text{poly}(k, p, 1/\\epsilon))$ additive error approximation using fast tensor-based sketching. Additionally, since our low rank algorithms rely on matrix-vector product subroutines, our lower bounds extend to show that computing $f(UV)W$, for even a small matrix $W$, requires $\\Omega(n^{2-o(1)})$ time.", "keywords": "Low rank approximation;kernel methods;fine-grained complexity", "primary_area": "", "supplementary_material": "/attachment/d47a9eea494f2792f3b7534c2e0c8b5331a00f05.pdf", "author": "Tamas Sarlos;Xingyou Song;David Woodruff;Qiuyi Zhang", "authorids": "~Tamas_Sarlos1;~Xingyou_Song1;~David_Woodruff1;~Qiuyi_Zhang1", "gender": "M;M;M;M", "homepage": "https://sites.google.com/site/stamas/;https://xingyousong.github.io/;http://www.cs.cmu.edu/~dwoodruf/;https://qiuyiz.github.io", "dblp": "48/959;211/7623;w/DPWoodruff;133/8559", "google_scholar": "c4YtO-MAAAAJ;GnpHmO8AAAAJ;https://scholar.google.com.tw/citations?user=0G2t-6sAAAAJ;mE11hO8AAAAJ", "orcid": ";;;", "linkedin": ";xingyou-song-355629a1/;;", "or_profile": "~Tamas_Sarlos1;~Xingyou_Song1;~David_Woodruff1;~Qiuyi_Zhang1", "aff": "Google Research;Google DeepMind;Carnegie Mellon University;Google", "aff_domain": "google.com;google.com;cmu.edu;google.com", "position": "Staff Research Scientist;Senior Research Scientist;Full Professor;Researcher", "bibtex": "@inproceedings{\nsarlos2023hardness,\ntitle={Hardness of Low Rank Approximation of Entrywise Transformed Matrix Products},\nauthor={Tamas Sarlos and Xingyou Song and David Woodruff and Qiuyi Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LCwToX315b}\n}", "github": "", "project": "", "reviewers": "ExZq;GCRp;9ux2;26kb", "pdf_size": 311720, "rating": "6;6;6;7", "confidence": "2;3;1;4", "soundness": "2;3;3;3", "novelty": "3;2;2;3", "presentation": "2;3;3;3", "wc_summary": "93;319;108;146", "wc_strengths": "14;95;5;142", "wc_weaknesses": "24;239;8;65", "wc_questions": "2;44;1;123", "wc_limitations": "2;14;1;1", "wc_review": "135;711;123;477", "wc_reply_reviewers": "45;40;21;13", "wc_reply_authors": "58;571;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 166.5, 90.14016862642315 ], "wc_strengths_avg": [ 64.0, 57.06575155029503 ], "wc_weaknesses_avg": [ 84.0, 91.87219383469625 ], "wc_questions_avg": [ 42.5, 49.61098668641856 ], "wc_limitations_avg": [ 4.5, 5.5 ], "wc_review_avg": [ 361.5, 246.81724007856502 ], "wc_reply_reviewers_avg": [ 29.75, 13.179055353097201 ], "wc_reply_authors_avg": [ 157.25, 240.0493438857936 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7106430972927006461&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com;google.com;cmu.edu;google.com", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Google;Carnegie Mellon University", "aff_unique_dep": "Google Research;", "aff_unique_url": "https://research.google;https://www.cmu.edu", "aff_unique_abbr": "Google Research;CMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Certified Robustness via Dynamic Margin Maximization and Improved Lipschitz Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71969", "id": "LDhhi8HBO3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6c7ca1889f01a9b767c631686fb5fd24-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LDhhi8HBO3", "openreview": "https://openreview.net/forum?id=LDhhi8HBO3", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71969", "video": "https://nips.cc/virtual/2023/poster/71969", "author_site": "Mahyar Fazlyab, Taha Entesari, Aniket Roy, Rama Chellappa", "tldr": "", "abstract": "To improve the robustness of deep classifiers against adversarial perturbations, many approaches have been proposed, such as designing new architectures with better robustness properties (e.g., Lipschitz-capped networks), or modifying the training process itself (e.g., min-max optimization, constrained learning, or regularization). These approaches, however, might not be effective at increasing the margin in the input (feature) space. In this paper, we propose a differentiable regularizer that is a lower bound on the distance of the data points to the classification boundary. The proposed regularizer requires knowledge of the model's Lipschitz constant along certain directions. To this end, we develop a scalable method for calculating guaranteed differentiable upper bounds on the Lipschitz constant of neural networks accurately and efficiently. The relative accuracy of the bounds prevents excessive regularization and allows for more direct manipulation of the decision boundary. Furthermore, our Lipschitz bounding algorithm exploits the monotonicity and Lipschitz continuity of the activation layers, and the resulting bounds can be used to design new layers with controllable bounds on their Lipschitz constant. Experiments on the MNIST, CIFAR-10, and Tiny-ImageNet data sets verify that our proposed algorithm obtains competitively improved results compared to the state-of-the-art.", "keywords": "Deep Learning;Adversarial Robustness;Certified Radius;Lipschitz Constants", "primary_area": "", "supplementary_material": "/attachment/09f89e73e9b3f4ba68cbebc989733e93a479ee5b.pdf", "author": "Mahyar Fazlyab;Taha Entesari;Aniket Roy;Rama Chellappa", "authorids": "~Mahyar_Fazlyab1;~Taha_Entesari1;~Aniket_Roy1;~Rama_Chellappa1", "gender": "M;M;;", "homepage": "https://www.ece.jhu.edu/mahyarfazlyab/;;;", "dblp": "147/4846;332/2244;173/0075;", "google_scholar": "Y3bmjJwAAAAJ;5F1qfQ0AAAAJ;https://scholar.google.co.in/citations?user=9y2gsDwAAAAJ;", "orcid": ";;;", "linkedin": ";tahaentesari/;;", "or_profile": "~Mahyar_Fazlyab1;~Taha_Entesari1;~Aniket_Roy1;~Rama_Chellappa1", "aff": "Johns Hopkins University;Whiting School of Engineering, Johns Hopkins University;Johns Hopkins University;", "aff_domain": "jhu.edu;engineering.jhu.edu;jhu.edu;", "position": "Assistant Professor;MS student;PhD student;", "bibtex": "@inproceedings{\nfazlyab2023certified,\ntitle={Certified Robustness via Dynamic Margin Maximization and Improved Lipschitz Regularization},\nauthor={Mahyar Fazlyab and Taha Entesari and Aniket Roy and Rama Chellappa},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LDhhi8HBO3}\n}", "github": "", "project": "", "reviewers": "XLJR;U11m;9vGY;4zXU", "pdf_size": 1886986, "rating": "5;5;6;7", "confidence": "4;4;4;3", "soundness": "4;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;2;3", "wc_summary": "42;197;56;138", "wc_strengths": "35;21;48;83", "wc_weaknesses": "46;36;262;158", "wc_questions": "2;767;91;492", "wc_limitations": "14;3;1;70", "wc_review": "139;1024;458;941", "wc_reply_reviewers": "15;16;14;36", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 108.25, 63.00942389833445 ], "wc_strengths_avg": [ 46.75, 23.004075725836064 ], "wc_weaknesses_avg": [ 125.5, 92.22120146690781 ], "wc_questions_avg": [ 338.0, 308.8939947619571 ], "wc_limitations_avg": [ 22.0, 28.151376520518493 ], "wc_review_avg": [ 640.5, 361.3104620682883 ], "wc_reply_reviewers_avg": [ 20.25, 9.12071817347735 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17784091784413396389&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "jhu.edu;engineering.jhu.edu;jhu.edu;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Baltimore", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Degraded Polygons Raise Fundamental Questions of Neural Network Perception", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73623", "id": "LE4AN1FGjJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1ec408df112bc9b186d7b8fe0ada902a-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=LE4AN1FGjJ", "openreview": "https://openreview.net/forum?id=LE4AN1FGjJ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73623", "video": "https://nips.cc/virtual/2023/poster/73623", "author_site": "Leonard Tang, Dan Ley", "tldr": "", "abstract": "It is well-known that modern computer vision systems often exhibit behaviors misaligned with those of humans: from adversarial attacks to image corruptions, deep\nlearning vision models suffer in a variety of settings that humans capably handle. In\nlight of these phenomena, here we introduce another, orthogonal perspective studying the human-machine vision gap. We revisit the task of recovering images under\ndegradation, first introduced over 30 years ago in the Recognition-by-Components\ntheory of human vision. Specifically, we study the performance and behavior of\nneural networks on the seemingly simple task of classifying regular polygons at\nvarying orders of degradation along their perimeters. To this end, we implement the\nAutomated Shape Recoverability Test\nfor rapidly generating large-scale datasets\nof perimeter-degraded regular polygons, modernizing the historically manual creation of image recoverability experiments. We then investigate the capacity of\nneural networks to recognize and recover such degraded shapes when initialized\nwith different priors. Ultimately, we find that neural networks\u2019 behavior on this\nsimple task conflicts with human behavior, raising a fundamental question of the\nrobustness and learning capabilities of modern computer vision models", "keywords": "Geometry;Cognitive Science;Psychology;Vision;Robustness;Safety", "primary_area": "", "supplementary_material": "/attachment/134f1a0105514cb03992226e8e09acce49ec565d.pdf", "author": "Leonard Tang;Dan Ley", "authorids": "~Leonard_Tang1;~Dan_Ley1", "gender": "M;M", "homepage": "http://leonardtang.me/;https://www.dan-ley.com/", "dblp": "306/7940;290/1369", "google_scholar": "18ZQFjEAAAAJ;zQ_f9AEAAAAJ", "orcid": ";", "linkedin": "leonard-tang/;dan-ley/", "or_profile": "~Leonard_Tang1;~Dan_Ley1", "aff": "Harvard University;Harvard University, Harvard University", "aff_domain": "harvard.edu;g.harvard.edu", "position": "Undergrad student;PhD student", "bibtex": "@inproceedings{\ntang2023degraded,\ntitle={Degraded Polygons Raise Fundamental Questions of Neural Network Perception},\nauthor={Leonard Tang and Dan Ley},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=LE4AN1FGjJ}\n}", "github": "", "project": "", "reviewers": "Q8xH;d9g1;fh2w;5Kr2", "pdf_size": 929970, "rating": "4;6;7;9", "confidence": "3;3;4;3", "wc_summary_and_contributions": "70;116;104;85", "wc_strengths": "48;32;56;48", "wc_improvement": "373;38;142;84", "wc_limitations": "1;100;55;13", "wc_correctness": "1;32;73;8", "wc_clarity": "4;1;73;51", "wc_relation_to_prior_work": "1;19;27;12", "wc_documentation": "1;2;18;13", "wc_additional_feedback": "1;1;1;1", "wc_review": "500;341;549;315", "wc_reply_reviewers": "0;48;132;0", "wc_reply_authors": "764;476;590;580", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.8027756377319946 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 93.75, 17.612140698961042 ], "wc_strengths_avg": [ 46.0, 8.717797887081348 ], "wc_improvement_avg": [ 159.25, 128.7931966370895 ], "wc_limitations_avg": [ 42.25, 38.90613704802881 ], "wc_correctness_avg": [ 28.5, 28.146935890075138 ], "wc_clarity_avg": [ 32.25, 30.768287245149022 ], "wc_relation_to_prior_work_avg": [ 14.75, 9.54921462739214 ], "wc_documentation_avg": [ 8.5, 7.22841614740048 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 426.25, 100.18826029031545 ], "wc_reply_reviewers_avg": [ 45.0, 53.91660226683428 ], "wc_reply_authors_avg": [ 602.5, 103.3767381957856 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.16012815380508713, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QrPBIfwT4b4J:scholar.google.com/&scioq=Degraded+Polygons+Raise+Fundamental+Questions+of+Neural+Network+Perception&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "harvard.edu;g.harvard.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Handling Data Heterogeneity via Architectural Design for Federated Visual Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71968", "id": "LGKxz9clGG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0ccd06ff26fd6a7829293ce90e0e7f7d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LGKxz9clGG", "openreview": "https://openreview.net/forum?id=LGKxz9clGG", "poster": "/media/PosterPDFs/NeurIPS%202023/71968.png?t=1701500401.460032", "slides": "https://nips.cc/virtual/2023/poster/71968", "video": "https://nips.cc/virtual/2023/poster/71968", "author_site": "Sara Pieri, Jose Restom, Samuel Horv\u00e1th, Hisham Cholakkal", "tldr": "", "abstract": "Federated Learning (FL) is a promising research paradigm that enables the collaborative training of machine learning models among various parties without the need for sensitive information exchange. Nonetheless, retaining data in individual clients introduces fundamental challenges to achieving performance on par with centrally trained models. Our study provides an extensive review of federated learning applied to visual recognition. It underscores the critical role of thoughtful architectural design choices in achieving optimal performance, a factor often neglected in the FL literature. Many existing FL solutions are tested on shallow or simple networks, which may not accurately reflect real-world applications. This practice restricts the transferability of research findings to large-scale visual recognition models. Through an in-depth analysis of diverse cutting-edge architectures such as convolutional neural networks, transformers, and MLP-mixers, we experimentally demonstrate that architectural choices can substantially enhance FL systems' performance, particularly when handling heterogeneous data. We study visual recognition models from five different architectural families on four challenging FL datasets. We also re-investigate the inferior performance convolution-based architectures in the FL setting and analyze the influence of normalization layers on the FL performance. Our findings emphasize the importance of architectural design for computer vision tasks in practical scenarios, effectively narrowing the performance gap between federated and centralized learning.", "keywords": "Computer Vision;Federated Learning;Image Classification;Neural Network Architectures;Transformer;CNN;Data Hetereogenity;non-IID", "primary_area": "", "supplementary_material": "", "author": "Sara Pieri;Jose Renato Restom;Samuel Horv\u00e1th;Hisham Cholakkal", "authorids": "~Sara_Pieri2;~Jose_Renato_Restom1;~Samuel_Horv\u00e1th1;~Hisham_Cholakkal2", "gender": "F;M;M;M", "homepage": ";;https://sites.google.com/view/samuelhorvath;https://mbzuai.ac.ae/pages/hisham-cholakkal/", "dblp": "359/3913;;234/8604;129/2046", "google_scholar": "jLNKLsgAAAAJ;;k252J7kAAAAJ;bZ3YBRcAAAAJ", "orcid": "0009-0009-5461-2719;;0000-0003-0619-9260;", "linkedin": "sara-pieri-94908a202/;jrestom;samuel-horvath/;", "or_profile": "~Sara_Pieri2;~Jose_Renato_Restom1;~Samuel_Horv\u00e1th1;~Hisham_Cholakkal2", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;MBZUAI;MBZUAI", "aff_domain": "mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae", "position": "MS student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\npieri2023handling,\ntitle={Handling Data Heterogeneity via Architectural Design for Federated Visual Recognition},\nauthor={Sara Pieri and Jose Renato Restom and Samuel Horv{\\'a}th and Hisham Cholakkal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LGKxz9clGG}\n}", "github": "", "project": "", "reviewers": "6dpE;nuGC;TNv8;JN2W;vstz", "pdf_size": 798455, "rating": "5;5;6;6;7", "confidence": "4;4;4;4;3", "soundness": "3;3;3;4;3", "novelty": "2;2;3;3;3", "presentation": "3;3;4;4;2", "wc_summary": "62;49;27;107;119", "wc_strengths": "53;46;139;105;39", "wc_weaknesses": "75;154;78;66;194", "wc_questions": "45;5;8;56;55", "wc_limitations": "20;6;1;7;9", "wc_review": "255;260;253;341;416", "wc_reply_reviewers": "103;26;18;0;36", "wc_reply_authors": "307;0;0;0;0", "reply_reviewers": "2;1;1;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 72.8, 34.884953776664226 ], "wc_strengths_avg": [ 76.4, 39.005640617736304 ], "wc_weaknesses_avg": [ 113.4, 51.2234321380362 ], "wc_questions_avg": [ 33.8, 22.639787984873003 ], "wc_limitations_avg": [ 8.6, 6.2801273872430325 ], "wc_review_avg": [ 305.0, 64.5693425706039 ], "wc_reply_reviewers_avg": [ 36.6, 35.234074416677956 ], "wc_reply_authors_avg": [ 61.4, 122.80000000000001 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8017837257372731, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3579979026290901510&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": "", "aff_unique_url": "https://mbzuai.ac.ae", "aff_unique_abbr": "MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Arab Emirates" }, { "title": "Dynamic Tensor Decomposition via Neural Diffusion-Reaction Processes", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71967", "id": "LGqIAn2OaZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4958a8ad01f524de2ec5274678ffa5a4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LGqIAn2OaZ", "openreview": "https://openreview.net/forum?id=LGqIAn2OaZ", "poster": "/media/PosterPDFs/NeurIPS%202023/71967.png?t=1699740313.2308989", "slides": "https://nips.cc/virtual/2023/poster/71967", "video": "https://nips.cc/virtual/2023/poster/71967", "author_site": "Zheng Wang, Shikai Fang, Shibo Li, Shandian Zhe", "tldr": "", "abstract": "Tensor decomposition is an important tool for multiway data analysis. In practice, the data is often sparse yet associated with rich temporal information. Existing methods, however, often under-use the time information and ignore the structural knowledge within the sparsely observed tensor entries. To overcome these limitations and to better capture the underlying temporal structure, we propose Dynamic EMbedIngs fOr dynamic Tensor dEcomposition (DEMOTE). We develop a neural diffusion-reaction process to estimate dynamic embeddings for the entities in each tensor mode. Specifically, based on the observed tensor entries, we build a multi-partite graph to encode the correlation between the entities. We construct a graph diffusion process to co-evolve the embedding trajectories of the correlated entities and use a neural network to construct a reaction process for each individual entity. In this way, our model can capture both the commonalities and personalities during the evolution of the embeddings for different entities. We then use a neural network to model the entry value as a nonlinear function of the embedding trajectories. For model estimation, we combine ODE solvers to develop a stochastic mini-batch learning algorithm. We propose a stratified sampling method to balance the cost of processing each mini-batch so as to improve the overall efficiency. We show the advantage of our approach in both simulation studies and real-world applications. The code is available at https://github.com/wzhut/Dynamic-Tensor-Decomposition-via-Neural-Diffusion-Reaction-Processes.", "keywords": "Tensor Decomposition;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/b7622a2daef46baae4bd9f3e86f442b61cc054f5.pdf", "author": "Zheng Wang;Shikai Fang;Shibo Li;Shandian Zhe", "authorids": "~Zheng_Wang2;~Shikai_Fang2;~Shibo_Li1;~Shandian_Zhe1", "gender": "M;;;", "homepage": ";;https://imshibo.com/;", "dblp": ";;;", "google_scholar": ";;thvPDwgAAAAJ;", "orcid": ";;0009-0009-1076-282X;", "linkedin": ";;;", "or_profile": "~Zheng_Wang2;~Shikai_Fang2;~Shibo_Li1;~Shandian_Zhe1", "aff": "University of Utah;;University of Utah;", "aff_domain": "utah.edu;;utah.edu;", "position": "PhD student;;PhD student;", "bibtex": "@inproceedings{\nwang2023dynamic,\ntitle={Dynamic Tensor Decomposition via Neural Diffusion-Reaction Processes},\nauthor={Zheng Wang and Shikai Fang and Shibo Li and Shandian Zhe},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LGqIAn2OaZ}\n}", "github": "", "project": "", "reviewers": "wffj;3waT;gF6D;9boz", "pdf_size": 684841, "rating": "6;6;6;6", "confidence": "3;4;3;3", "soundness": "2;3;3;3", "novelty": "3;2;2;2", "presentation": "2;3;2;3", "wc_summary": "121;63;78;105", "wc_strengths": "48;55;41;44", "wc_weaknesses": "58;110;127;97", "wc_questions": "52;280;9;18", "wc_limitations": "27;8;16;1", "wc_review": "306;516;271;265", "wc_reply_reviewers": "74;19;29;31", "wc_reply_authors": "10;40;39;37", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;3;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 91.75, 22.620510604316607 ], "wc_strengths_avg": [ 47.0, 5.244044240850758 ], "wc_weaknesses_avg": [ 98.0, 25.42636427018224 ], "wc_questions_avg": [ 89.75, 111.00534897021855 ], "wc_limitations_avg": [ 13.0, 9.669539802906858 ], "wc_review_avg": [ 339.5, 103.098254107429 ], "wc_reply_reviewers_avg": [ 38.25, 21.134982848348848 ], "wc_reply_authors_avg": [ 31.5, 12.459935794377111 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2345220102144371382&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 10, "email": "utah.edu;;utah.edu;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Utah", "aff_unique_dep": "", "aff_unique_url": "https://www.utah.edu", "aff_unique_abbr": "Utah", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Variational Weighting for Kernel Density Ratios", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71966", "id": "LIsJHQHi4z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0ff54b4ec4f70b3ae12c8621ca8a49f4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LIsJHQHi4z", "openreview": "https://openreview.net/forum?id=LIsJHQHi4z", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71966", "video": "https://nips.cc/virtual/2023/poster/71966", "author_site": "Sangwoong Yoon, Frank Park, Gunsu YUN, Iljung Kim, Yung-Kyun Noh", "tldr": "", "abstract": "Kernel density estimation (KDE) is integral to a range of generative and discriminative tasks in machine learning. Drawing upon tools from the multidimensional calculus of variations, we derive an optimal weight function that reduces bias in standard kernel density estimates for density ratios, leading to improved estimates of prediction posteriors and information-theoretic measures. In the process, we shed light on some fundamental aspects of density estimation, particularly from the perspective of algorithms that employ KDEs as their main building blocks.", "keywords": "Kernel Density Estimation;KL-divergence;Density Ratio", "primary_area": "", "supplementary_material": "", "author": "Sangwoong Yoon;Frank C. Park;Gunsu S YUN;Iljung Kim;Yung-Kyun Noh", "authorids": "~Sangwoong_Yoon1;~Frank_C._Park1;~Gunsu_S_YUN1;~Iljung_Kim1;~Yung-Kyun_Noh1", "gender": "M;M;M;M;M", "homepage": "https://swyoon.github.io/;http://robotics.snu.ac.kr;https://sites.google.com/site/p4postech/;http://aais.hanyang.ac.kr;https://github.com/Delta-Life", "dblp": "237/1318;p/FrankChongwooPark;;54/6443;", "google_scholar": "https://scholar.google.co.kr/citations?user=cH2rjfIAAAAJ;u-h3PJIAAAAJ;https://scholar.google.co.kr/citations?user=m6m68D8AAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0002-7251-3230;0000-0002-0293-6975;0000-0002-1880-5865;;", "linkedin": ";;;;", "or_profile": "~Sangwoong_Yoon1;~Frank_C._Park1;~Gunsu_S_YUN1;~Yung-Kyun_Noh1;~Iljung_Kim2", "aff": "Seoul National University;Seoul National University;POSTECH;Korea Institute for Advanced Study;Hanyang University", "aff_domain": "snu.ac.kr;snu.ac.kr;postech.ac.kr;kias.re.kr;hanyang.ac.kr", "position": "PhD student;Full Professor;Associate Professor;Affiliate Professor;Undergrad student", "bibtex": "@inproceedings{\nyoon2023variational,\ntitle={Variational Weighting for Kernel Density Ratios},\nauthor={Sangwoong Yoon and Frank C. Park and Gunsu S YUN and Iljung Kim and Yung-Kyun Noh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LIsJHQHi4z}\n}", "github": "", "project": "", "reviewers": "wV5E;rQ6e;QwPp;oTo7", "pdf_size": 1087124, "rating": "5;6;6;7", "confidence": "4;2;2;3", "soundness": "2;2;3;4", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "71;17;45;182", "wc_strengths": "25;53;88;112", "wc_weaknesses": "13;105;58;220", "wc_questions": "519;11;2;24", "wc_limitations": "4;1;4;12", "wc_review": "632;187;197;550", "wc_reply_reviewers": "29;11;0;59", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 78.75, 62.595427149273455 ], "wc_strengths_avg": [ 69.5, 33.17001658124397 ], "wc_weaknesses_avg": [ 99.0, 77.06166362076542 ], "wc_questions_avg": [ 139.0, 219.5324577368914 ], "wc_limitations_avg": [ 5.25, 4.085033659592048 ], "wc_review_avg": [ 391.5, 201.62651115366748 ], "wc_reply_reviewers_avg": [ 24.75, 22.320114247019436 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11344686753990195560&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "snu.ac.kr;snu.ac.kr;postech.ac.kr;kias.re.kr;hanyang.ac.kr", "author_num": 5, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Seoul National University;Pohang University of Science and Technology;Korea Institute for Advanced Study;Hanyang University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.snu.ac.kr;https://www.postech.ac.kr;http://www.kaist.edu;https://www.hanyang.ac.kr", "aff_unique_abbr": "SNU;POSTECH;KIAS;HYU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pohang", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "LJ4CYEagg3", "title": "Spatially-Guided Temporal Attention (SGuTA) and Shifted-Cube Attention (SCubA) for Video Frame Interpolation", "track": "main", "status": "Reject", "tldr": "", "abstract": "In recent years, methods based on convolutional kernels have achieved state-of-the-art performance in video frame interpolation task. However, due to the inherent limitations of their convolutional kernel size, it seems that their performances have reached a plateau. On the other hand, Transformers are gradually replacing convolutional neural networks as a new backbone structure in image tasks, thanks to their ability to establish global correlations. However, in video tasks, the computational complexity and memory requirements of Transformer will become more challenging. To address this issue, we employ two different Transformers, SGuTA and SCubA, in VFI task. SGuTA utilizes the spatial information of each video frame to guide the generation of temporal vector at each pixel position. Meanwhile, SCubA introduces local attention into the VFI task, which can be viewed as a counterpart of 3D convolution in local attention Transformers. Additionally, we analyze and compare different embedding strategies and propose a more balanced embedding strategy in terms of parameter count, computational complexity, and memory requirements. Extensive quantitative and qualitative experiments demonstrate that our models exhibit high proficiency in handling large motions and providing precise motion estimation, resulting in new state-of-the-art results in various benchmark tests. The source code can be obtained at https://github.com/esthen-bit/SGuTA-SCubA.", "keywords": "Video frame interpolation;Transformer", "primary_area": "", "supplementary_material": "/attachment/443017bad15eaec277b898855f8018a7e719f4ab.zip", "author": "Xin Zhang;Feng Huang;Yixuan Xu;Xianyu Wu", "authorids": "~Xin_Zhang31;~Feng_Huang3;~Yixuan_Xu2;~Xianyu_Wu1", "gender": "M;M;M;M", "homepage": "https://github.com/esthen-bit;;https://github.com/xuyixuan1999;", "dblp": ";;;144/9350.html", "google_scholar": ";;;", "orcid": "0009-0003-9611-9494;0000-0003-4652-4312;0000-0001-5057-289X;0000-0001-6005-7058", "linkedin": ";;;", "or_profile": "~Xin_Zhang31;~Feng_Huang3;~Yixuan_Xu2;~Xianyu_Wu1", "aff": "Fuzhou University;Fuzhou University;Fuzhou University;Fuzhou University", "aff_domain": "fzu.edu.cn;fzu.edu.cn;fzu.edu.cn;fzu.edu.cn", "position": "MS student;Full Professor;MS student;Associate Professor", "bibtex": "@misc{\nzhang2023spatiallyguided,\ntitle={Spatially-Guided Temporal Attention ({SG}u{TA}) and Shifted-Cube Attention ({SC}ubA) for Video Frame Interpolation},\nauthor={Xin Zhang and Feng Huang and Yixuan Xu and Xianyu Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=LJ4CYEagg3}\n}", "github": "", "project": "", "reviewers": "RdUT;BBtn;E6Ri;Dfnf;kW9p", "site": "https://openreview.net/forum?id=LJ4CYEagg3", "pdf_size": 5890212, "rating": "4;4;4;5;5", "confidence": "5;5;5;4;2", "soundness": "2;2;4;2;3", "novelty": "1;2;2;3;3", "presentation": "2;2;3;3;2", "wc_summary": "35;87;75;53;86", "wc_strengths": "11;67;74;35;116", "wc_weaknesses": "136;488;311;113;70", "wc_questions": "87;12;25;89;4", "wc_limitations": "15;10;40;28;11", "wc_review": "284;664;525;318;287", "wc_reply_reviewers": "48;0;250;23;27", "wc_reply_authors": "228;0;776;21;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;2;1", "rating_avg": [ 4.4, 0.48989794855663565 ], "confidence_avg": [ 4.2, 1.16619037896906 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 67.2, 20.22275945562326 ], "wc_strengths_avg": [ 60.6, 35.79161913074065 ], "wc_weaknesses_avg": [ 223.6, 155.59254480854793 ], "wc_questions_avg": [ 43.4, 37.032958293930555 ], "wc_limitations_avg": [ 20.8, 11.548160026601643 ], "wc_review_avg": [ 415.6, 153.00535938325822 ], "wc_reply_reviewers_avg": [ 69.6, 91.47808480723677 ], "wc_reply_authors_avg": [ 205.0, 298.15298086720514 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8401680504168059, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:j1lNO86NnvcJ:scholar.google.com/&scioq=Spatially-Guided+Temporal+Attention+(SGuTA)+and+Shifted-Cube+Attention+(SCubA)+for+Video+Frame+Interpolation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Fuzhou University", "aff_unique_dep": "", "aff_unique_url": "https://www.fznu.edu.cn", "aff_unique_abbr": "FZU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "LJhfKeqZdu", "title": "RL4CO: an Extensive Reinforcement Learning for Combinatorial Optimization Benchmark", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "We introduce RL4CO, an extensive reinforcement learning (RL) for combinatorial optimization (CO) benchmark. RL4CO employs state-of-the-art software libraries as well as best practices in implementation, such as modularity and configuration management, to be efficient and easily modifiable by researchers for adaptations of neural network architecture, environments, and RL algorithms. Contrary to the existing focus on specific tasks like the traveling salesman problem (TSP) for performance assessment, we underline the importance of scalability and generalization capabilities for diverse CO tasks. We also systematically benchmark zero-shot generalization, sample efficiency, and adaptability to changes in data distributions of various models. Our experiments show that some recent state-of-the-art methods fall behind their predecessors when evaluated using these metrics, suggesting the necessity for a more balanced view of the performance of neural CO (NCO) solvers. We hope RL4CO will encourage the exploration of novel solutions to complex real-world tasks, allowing the NCO community to compare with existing methods through a standardized interface that decouples the science from software engineering. We make our library publicly available at https://github.com/kaist-silab/rl4co.", "keywords": "Benchmark;Reinforcement Learning;Neural Combinatorial Optimization;Combinatorial Optimization;TSP;CVRP", "primary_area": "", "supplementary_material": "/attachment/c62dec1040a95b049dba80d8ca682d1ef41b7e22.pdf", "author": "Federico Berto;Chuanbo Hua;Junyoung Park;Minsu Kim;Hyeonah Kim;Jiwoo Son;Haeyeon Kim;Joungho Kim;Jinkyoo Park", "authorids": "~Federico_Berto1;~Chuanbo_Hua1;~Junyoung_Park1;~Minsu_Kim2;~Hyeonah_Kim1;~Jiwoo_Son2;~Haeyeon_Kim1;~Joungho_Kim1;~Jinkyoo_Park1", "gender": "M;M;;M;F;;F;M;M", "homepage": "https://fedebotu.github.io/;https://github.com/cbhua;;https://minsuukim.github.io/;;;http://sites.google.com/view/haeyeon-rachel-kim;;http://silab.kaist.ac.kr/", "dblp": "317/1711;326/5321;;;;348/9675;;;156/7535", "google_scholar": "https://scholar.google.com/citations?hl=en;fjKA5gYAAAAJ;;https://scholar.google.ca/citations?user=VvyLuhAAAAAJ;;zHyj8zAAAAAJ;rP_9IY8AAAAJ;;sH2a0nkAAAAJ", "orcid": "0000-0002-7438-8365;0000-0001-7700-792X;;;0000-0002-0629-1879;0009-0008-1032-6318;;;0000-0003-2620-1479", "linkedin": "federicoberto/;;;;hyeonahkimm/;jiwoo-son-303b31284/;;joungho-kim-3280b1a4/;", "or_profile": "~Federico_Berto1;~Chuanbo_Hua1;~Junyoung_Park1;~Minsu_Kim2;~Hyeonah_Kim1;~Jiwoo_Son2;~Haeyeon_Kim1;~Joungho_Kim1;~Jinkyoo_Park1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;;kaist.ac.kr;kaist.edu;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;;PhD student;PhD student;MS student;PhD student;Full Professor;Associate Professor", "bibtex": "@misc{\nberto2023rlco,\ntitle={{RL}4{CO}: an Extensive Reinforcement Learning for Combinatorial Optimization Benchmark},\nauthor={Federico Berto and Chuanbo Hua and Junyoung Park and Minsu Kim and Hyeonah Kim and Jiwoo Son and Haeyeon Kim and Joungho Kim and Jinkyoo Park},\nyear={2023},\nurl={https://openreview.net/forum?id=LJhfKeqZdu}\n}", "github": "", "project": "", "reviewers": "AoJ6;ajbT;aCTo;Yy73;mnwK", "site": "https://openreview.net/forum?id=LJhfKeqZdu", "pdf_size": 1238689, "rating": "3;4;6;7;8", "confidence": "3;4;3;4;5", "wc_summary_and_contributions": "49;116;58;79;117", "wc_strengths": "5;23;58;36;104", "wc_improvement": "30;99;386;75;92", "wc_limitations": "119;123;12;35;28", "wc_correctness": "1;10;6;21;1", "wc_clarity": "1;26;37;64;1", "wc_relation_to_prior_work": "1;6;179;35;1", "wc_documentation": "1;6;12;21;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "208;410;749;367;346", "wc_reply_reviewers": "70;0;76;38;27", "wc_reply_authors": "1346;1057;2690;981;483", "reply_reviewers": "2;0;1;1;1", "reply_authors": "5;4;6;4;2", "rating_avg": [ 5.6, 1.8547236990991407 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 83.8, 28.421118908304788 ], "wc_strengths_avg": [ 45.2, 34.1021993425644 ], "wc_improvement_avg": [ 136.4, 127.09146312793791 ], "wc_limitations_avg": [ 63.4, 47.63444132138006 ], "wc_correctness_avg": [ 7.8, 7.413501197140255 ], "wc_clarity_avg": [ 25.8, 23.726778120933318 ], "wc_relation_to_prior_work_avg": [ 44.4, 68.47948597937926 ], "wc_documentation_avg": [ 8.2, 7.573638491504595 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 416.0, 179.71644332113854 ], "wc_reply_reviewers_avg": [ 42.2, 28.08843178249722 ], "wc_reply_authors_avg": [ 1311.4, 743.2180299212339 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 4.2, 1.32664991614216 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.6628489803598702, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12686896239119847413&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Disentanglement via Latent Quantization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71965", "id": "LLETO26Ga2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8e63972d4d9d81b31459d787466ce271-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LLETO26Ga2", "openreview": "https://openreview.net/forum?id=LLETO26Ga2", "poster": "/media/PosterPDFs/NeurIPS%202023/71965.png?t=1702335031.6490028", "slides": "https://nips.cc/virtual/2023/poster/71965", "video": "https://nips.cc/virtual/2023/poster/71965", "author_site": "Kyle Hsu, William Dorrell, James Whittington, Jiajun Wu, Chelsea Finn", "tldr": "", "abstract": "In disentangled representation learning, a model is asked to tease apart a dataset's underlying sources of variation and represent them independently of one another. Since the model is provided with no ground truth information about these sources, inductive biases take a paramount role in enabling disentanglement. In this work, we construct an inductive bias towards encoding to and decoding from an organized latent space. Concretely, we do this by (i) quantizing the latent space into discrete code vectors with a separate learnable scalar codebook per dimension and (ii) applying strong model regularization via an unusually high weight decay. Intuitively, the latent space design forces the encoder to combinatorially construct codes from a small number of distinct scalar values, which in turn enables the decoder to assign a consistent meaning to each value. Regularization then serves to drive the model towards this parsimonious strategy. We demonstrate the broad applicability of this approach by adding it to both basic data-reconstructing (vanilla autoencoder) and latent-reconstructing (InfoGAN) generative models. For reliable evaluation, we also propose InfoMEC, a new set of metrics for disentanglement that is cohesively grounded in information theory and fixes well-established shortcomings in previous metrics. Together with regularization, latent quantization dramatically improves the modularity and explicitness of learned representations on a representative suite of benchmark datasets. In particular, our quantized-latent autoencoder (QLAE) consistently outperforms strong methods from prior work in these key disentanglement properties without compromising data reconstruction.", "keywords": "disentanglement;unsupervised learning;quantization", "primary_area": "", "supplementary_material": "/attachment/481bbd1c3be8cb3c48ee57ac79e2349de1d92cd2.pdf", "author": "Kyle Hsu;Will Dorrell;James C. R. Whittington;Jiajun Wu;Chelsea Finn", "authorids": "~Kyle_Hsu1;~Will_Dorrell1;~James_C._R._Whittington1;~Jiajun_Wu1;~Chelsea_Finn1", "gender": "M;M;;M;F", "homepage": "https://www.kylehsu.org;http://www.williamdorrell.co.uk/;http://www.jcrwhittington.com;https://jiajunwu.com;https://ai.stanford.edu/~cbfinn/", "dblp": "217/3841;;198/7308;117/4768;131/1783", "google_scholar": "KCdL5B0AAAAJ;GyVPmtYAAAAJ;https://scholar.google.co.uk/citations?user=zUu0JKYAAAAJ;2efgcS0AAAAJ;vfPE6hgAAAAJ", "orcid": ";;0000-0001-5680-5586;0000-0002-4176-343X;", "linkedin": ";;;jiajunwu/;", "or_profile": "~Kyle_Hsu1;~Will_Dorrell1;~James_C._R._Whittington1;~Jiajun_Wu1;~Chelsea_Finn1", "aff": "Stanford University;University College London, University of London;University of Oxford;Stanford University;Google", "aff_domain": "cs.stanford.edu;ucl.ac.uk;oxford.ac.uk;stanford.edu;google.com", "position": "PhD student;PhD student;Postdoc;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nhsu2023disentanglement,\ntitle={Disentanglement via Latent Quantization},\nauthor={Kyle Hsu and Will Dorrell and James C. R. Whittington and Jiajun Wu and Chelsea Finn},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LLETO26Ga2}\n}", "github": "", "project": "", "reviewers": "Mf8i;hQMG;sKhi;XVcp;iB4p", "pdf_size": 24196742, "rating": "5;5;6;7;7", "confidence": "2;4;3;5;3", "soundness": "2;2;3;3;3", "novelty": "2;2;2;2;3", "presentation": "2;3;3;2;3", "wc_summary": "60;132;239;87;50", "wc_strengths": "8;52;158;107;43", "wc_weaknesses": "71;174;90;171;202", "wc_questions": "103;54;134;285;6", "wc_limitations": "18;109;203;591;8", "wc_review": "260;521;824;1241;309", "wc_reply_reviewers": "48;45;22;181;38", "wc_reply_authors": "391;339;0;255;0", "reply_reviewers": "1;1;1;2;1", "reply_authors": "2;2;1;2;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 113.6, 68.83196931659009 ], "wc_strengths_avg": [ 73.6, 52.81136241378365 ], "wc_weaknesses_avg": [ 141.6, 51.3988326715695 ], "wc_questions_avg": [ 116.4, 94.86537829998888 ], "wc_limitations_avg": [ 185.8, 214.5464052367226 ], "wc_review_avg": [ 631.0, 364.0807602716738 ], "wc_reply_reviewers_avg": [ 66.8, 57.80449809487147 ], "wc_reply_authors_avg": [ 197.0, 166.60252098932958 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4385290096535146, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7697913713697774189&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "cs.stanford.edu;ucl.ac.uk;oxford.ac.uk;stanford.edu;google.com", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Stanford University;University College London;University of Oxford;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.stanford.edu;https://www.ucl.ac.uk;https://www.ox.ac.uk;https://www.google.com", "aff_unique_abbr": "Stanford;UCL;Oxford;Google", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Stanford;;Mountain View", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "An Inverse Scaling Law for CLIP Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71964", "id": "LMU2RNwdh2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/996e2b446391fcb8bf32a3d1645cc799-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LMU2RNwdh2", "openreview": "https://openreview.net/forum?id=LMU2RNwdh2", "poster": "/media/PosterPDFs/NeurIPS%202023/71964.png?t=1702172238.4955647", "slides": "https://nips.cc/virtual/2023/poster/71964", "video": "https://nips.cc/virtual/2023/poster/71964", "author_site": "Xianhang Li, Zeyu Wang, Cihang Xie", "tldr": "", "abstract": "CLIP, one of the pioneering foundation models that connect images and text, has enabled many recent breakthroughs in computer vision. However, its associated training cost is prohibitively high, imposing a significant barrier to its widespread exploration. In this paper, we present a surprising finding that there exists an inverse scaling law for CLIP training, whereby the larger the image/text encoders used, the shorter the sequence length of image/text tokens that can be applied in training. Moreover, we showcase that the strategy for reducing image/text token length plays a crucial role in determining the quality of this scaling law.\n\n\nAs a result of this finding, we are able to successfully train CLIP even with limited computational resources. For example, using 8 A100 GPUs, our CLIP models achieve zero-shot top-1 ImageNet-1k accuracies of 63.2% in ~2 days, 67.8% in ~3 days, and 69.3% in ~4 days. Our method also works well when scaling up --- with G/14, we register a new record of 83.0% ImageNet-1k zero-shot accuracy, and meanwhile accelerate the training by ~33x compared to its OpenCLIP counterpart.\nBy reducing the computation barrier associated with CLIP, we hope to inspire more research in this field, particularly from academics. \nOur code is available at https://github.com/UCSC-VLAA/CLIPA.", "keywords": "CLIP;inverse scaling;efficient training", "primary_area": "", "supplementary_material": "", "author": "Xianhang Li;Zeyu Wang;Cihang Xie", "authorids": "~Xianhang_Li1;~Zeyu_Wang2;~Cihang_Xie3", "gender": "M;;", "homepage": "https://xhl-video.github.io/xianhangli/;;", "dblp": "268/5945;;", "google_scholar": "YKpFz4YAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Xianhang_Li1;~Zeyu_Wang2;~Cihang_Xie3", "aff": "University of California, Santa Cruz;;", "aff_domain": "ucsc.edu;;", "position": "PhD student;;", "bibtex": "@inproceedings{\nli2023an,\ntitle={An Inverse Scaling Law for {CLIP} Training},\nauthor={Xianhang Li and Zeyu Wang and Cihang Xie},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LMU2RNwdh2}\n}", "github": "", "project": "", "reviewers": "KzVG;UF9M;Mhr1;Depv;TeGF", "pdf_size": 2529991, "rating": "3;4;6;7;8", "confidence": "4;4;4;4;5", "soundness": "2;3;3;4;4", "novelty": "3;3;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "212;89;75;71;214", "wc_strengths": "48;85;165;68;150", "wc_weaknesses": "524;293;325;119;428", "wc_questions": "38;47;2;35;72", "wc_limitations": "31;3;30;10;108", "wc_review": "853;517;597;303;972", "wc_reply_reviewers": "225;240;115;4;31", "wc_reply_authors": "451;463;444;16;17", "reply_reviewers": "1;1;2;1;1", "reply_authors": "2;2;3;2;2", "rating_avg": [ 5.6, 1.8547236990991407 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 132.2, 66.24620743861493 ], "wc_strengths_avg": [ 103.2, 46.10162686934161 ], "wc_weaknesses_avg": [ 337.8, 136.27237430968904 ], "wc_questions_avg": [ 38.8, 22.533530571128885 ], "wc_limitations_avg": [ 36.4, 37.44115382837446 ], "wc_review_avg": [ 648.4, 239.0762221551947 ], "wc_reply_reviewers_avg": [ 123.0, 96.72848598008758 ], "wc_reply_authors_avg": [ 278.2, 213.76379487649447 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6469966392206306, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15113030324862139526&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 7, "email": "ucsc.edu;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of California, Santa Cruz", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsc.edu", "aff_unique_abbr": "UCSC", "aff_campus_unique_index": "0", "aff_campus_unique": "Santa Cruz", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Asymptotically Optimal Quantile Pure Exploration for Infinite-Armed Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71963", "id": "LROEcjVkv5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3b3a83a5d86e1d424daefed43d998079-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LROEcjVkv5", "openreview": "https://openreview.net/forum?id=LROEcjVkv5", "poster": "/media/PosterPDFs/NeurIPS%202023/71963.png?t=1702317653.5659876", "slides": "https://nips.cc/virtual/2023/poster/71963", "video": "https://nips.cc/virtual/2023/poster/71963", "author_site": "Evelyn Xiao-Yue Gong, Mark Sellke", "tldr": "", "abstract": "We study pure exploration with infinitely many bandit arms generated \\iid from an unknown distribution. Our goal is to efficiently select a single high quality arm whose average reward is, with probability $1-\\delta$, within $\\varepsilon$ of being with the top $\\eta$-fraction of arms; this is a natural adaptation of the classical PAC guarantee for infinite action sets. We consider both the fixed confidence and fixed budget settings, aiming respectively for optimal \\emph{expected} and \\emph{fixed} sample complexity.\n\n\nFor fixed confidence, we give an algorithm with expected sample complexity $O\\left(\\frac{\\log (1/\\eta)\\log (1/\\delta)}{\\eta\\varepsilon^2}\\right)$. This is optimal except for the $\\log (1/\\eta)$ factor, and the $\\delta$-dependence closes a quadratic gap in the literature. For fixed budget, we show the asymptotically optimal sample complexity as $\\delta\\to 0$ is $c^{-1}\\log(1/\\delta)\\big(\\log\\log(1/\\delta)\\big)^2$ to leading order; equivalently, the optimal failure probability with exactly $N$ samples decays as \n$\\exp\\big(-(1\\pm o(1))\\frac{cN}{\\log^2 N}\\big)$.\nThe value of $c$ depends explicitly on the problem parameters (including the unknown arm distribution) through a certain Fisher information distance. Even the strictly super-linear dependence on $\\log(1/\\delta)$ was not known and resolves a question of Grossman-Moshkovitz (FOCS 2015).", "keywords": "pure exploration;multi-armed bandits;Fisher information", "primary_area": "", "supplementary_material": "/attachment/9c4c513903ecff05dd9538bc3511119cfa3f077d.pdf", "author": "Xiao-Yue Gong;Mark Sellke", "authorids": "~Xiao-Yue_Gong1;~Mark_Sellke1", "gender": "F;M", "homepage": "https://evelyngongcmu.github.io/;https://msellke.com/", "dblp": "222/1960;207/8338", "google_scholar": "uHDiAKQAAAAJ;lXCP2cMAAAAJ", "orcid": "0000-0002-4647-3941;0000-0001-9166-8185", "linkedin": ";mark-sellke-a40b19100/", "or_profile": "~Xiao-Yue_Gong1;~Mark_Sellke1", "aff": "Massachusetts Institute of Technology;Institute for Advanced Study, Princeton", "aff_domain": "mit.edu;ias.edu", "position": "PhD student;Postdoc", "bibtex": "@inproceedings{\ngong2023asymptotically,\ntitle={Asymptotically Optimal Quantile Pure Exploration for Infinite-Armed Bandits},\nauthor={Xiao-Yue Gong and Mark Sellke},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LROEcjVkv5}\n}", "github": "", "project": "", "reviewers": "326s;xhAx;ccFe;o2AR", "pdf_size": 480397, "rating": "6;6;7;8", "confidence": "4;1;3;2", "soundness": "3;3;2;4", "novelty": "3;4;3;4", "presentation": "2;3;2;3", "wc_summary": "108;326;164;58", "wc_strengths": "21;183;105;38", "wc_weaknesses": "329;265;102;107", "wc_questions": "5;19;277;79", "wc_limitations": "1;14;9;15", "wc_review": "464;807;657;297", "wc_reply_reviewers": "11;27;17;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 164.0, 100.7670581092849 ], "wc_strengths_avg": [ 86.75, 63.82936236560726 ], "wc_weaknesses_avg": [ 200.75, 98.88977449665865 ], "wc_questions_avg": [ 95.0, 108.6922260329597 ], "wc_limitations_avg": [ 9.75, 5.539629951540085 ], "wc_review_avg": [ 556.25, 192.83850108316025 ], "wc_reply_reviewers_avg": [ 17.0, 6.164414002968976 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.1348399724926484, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6854551567493878448&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "mit.edu;ias.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Institute for Advanced Study", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://ias.edu", "aff_unique_abbr": "MIT;IAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Princeton", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Three Towers: Flexible Contrastive Learning with Pretrained Image Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71962", "id": "LSYQB4CwD3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/63d4316315900a62e610e5c17bab900a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LSYQB4CwD3", "openreview": "https://openreview.net/forum?id=LSYQB4CwD3", "poster": "/media/PosterPDFs/NeurIPS%202023/71962.png?t=1702302471.0026839", "slides": "https://nips.cc/virtual/2023/poster/71962", "video": "https://nips.cc/virtual/2023/poster/71962", "author_site": "Jannik Kossen, Mark Collier, Basil Mustafa, Xiao Wang, Xiaohua Zhai, Lucas Beyer, Andreas Steiner, Andreas Steiner, Jesse Berent, Rodolphe Jenatton, Effrosyni Kokiopoulou", "tldr": "", "abstract": "We introduce Three Towers (3T), a flexible method to improve the contrastive learning of vision-language models by incorporating pretrained image classifiers. While contrastive models are usually trained from scratch, LiT (Zhai et al., 2022) has recently shown performance gains from using pretrained classifier embeddings. However, LiT directly replaces the image tower with the frozen embeddings, excluding any potential benefits from training the image tower contrastively. With 3T, we propose a more flexible strategy that allows the image tower to benefit from both pretrained embeddings and contrastive training. To achieve this, we introduce a third tower that contains the frozen pretrained embeddings, and we encourage alignment between this third tower and the main image-text towers. Empirically, 3T consistently improves over LiT and the CLIP-style from-scratch baseline for retrieval tasks. For classification, 3T reliably improves over the from-scratch baseline, and while it underperforms relative to LiT for JFT-pretrained models, it outperforms LiT for ImageNet-21k and Places365 pretraining.", "keywords": "three towers;contrastive learning;transformers;vision transformers;pretrained models;representation learning;finetuning;CLIP;ALIGN;classification;zero-shot;few-shot;retrieval", "primary_area": "", "supplementary_material": "/attachment/6cb9d7743ccfb8c052da4112ec42b45080648ef7.pdf", "author": "Jannik Kossen;Mark Collier;Basil Mustafa;Xiao Wang;Xiaohua Zhai;Lucas Beyer;Andreas Peter Steiner;Jesse Berent;Rodolphe Jenatton;Effrosyni Kokiopoulou", "authorids": "~Jannik_Kossen2;~Mark_Collier1;~Basil_Mustafa1;~Xiao_Wang5;~Xiaohua_Zhai2;~Lucas_Beyer1;~Andreas_Peter_Steiner1;~Jesse_Berent1;~Rodolphe_Jenatton3;~Effrosyni_Kokiopoulou1", "gender": "Unspecified;M;M;M;;;M;M;M;F", "homepage": "https://jlko.eu;;https://www.basilmustafa.com/;;;http://lucasb.eyer.be;;;http://rodolphejenatton.com/;", "dblp": "250/2339;;;49/67-38;66/636;126/4720;s/AndreasSteiner;81/397.html;68/8398;05/960", "google_scholar": "i1FIOV0AAAAJ;U4rBrcgAAAAJ;https://scholar.google.co.uk/citations?user=LuxZAJwAAAAJ;ukyXqzMAAAAJ;;p2gwhK4AAAAJ;;;QIR6rygAAAAJ;9om-fCsAAAAJ", "orcid": ";;;;;;;;;", "linkedin": ";mark-collier-aa446032/;basil-mustafa/;;;;andreas-steiner-1859223b/;https://ch.linkedin.com/in/jesse-berent-a1b6875;;", "or_profile": "~Jannik_Kossen2;~Mark_Collier1;~Basil_Mustafa1;~Xiao_Wang5;~Xiaohua_Zhai2;~Lucas_Beyer1;~Andreas_Peter_Steiner1;~Jesse_Berent1;~Rodolphe_Jenatton3;~Effrosyni_Kokiopoulou1", "aff": "University of Oxford;Google;Google;Google Brain;Google Brain;Google Brain;Google DeepMind;Google;Google;Google DeepMind", "aff_domain": "oxford.ac.uk;google.com;google.com;google.com;google.com;google.com;deepmind.com;google.com;google.com;google.com", "position": "PhD student;Researcher;Research Software Engineer;Researcher;Researcher;Researcher;Research Engineer;Researcher;Senior research scientist;Researcher", "bibtex": "@inproceedings{\nkossen2023three,\ntitle={Three Towers: Flexible Contrastive Learning with Pretrained Image Models},\nauthor={Jannik Kossen and Mark Collier and Basil Mustafa and Xiao Wang and Xiaohua Zhai and Lucas Beyer and Andreas Peter Steiner and Jesse Berent and Rodolphe Jenatton and Effrosyni Kokiopoulou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LSYQB4CwD3}\n}", "github": "", "project": "", "reviewers": "Y95u;GeUR;Wqsw;ni71", "pdf_size": 856305, "rating": "5;5;6;6", "confidence": "5;4;3;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "4;3;3;4", "wc_summary": "148;54;87;64", "wc_strengths": "33;47;117;109", "wc_weaknesses": "383;255;136;100", "wc_questions": "4;5;59;85", "wc_limitations": "1;1;1;5", "wc_review": "569;362;400;363", "wc_reply_reviewers": "68;21;96;47", "wc_reply_authors": "1270;92;58;43", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 88.25, 36.51284020724764 ], "wc_strengths_avg": [ 76.5, 36.94252292413175 ], "wc_weaknesses_avg": [ 218.5, 110.95156600967829 ], "wc_questions_avg": [ 38.25, 34.98124497498624 ], "wc_limitations_avg": [ 2.0, 1.7320508075688772 ], "wc_review_avg": [ 423.5, 85.38881659795972 ], "wc_reply_reviewers_avg": [ 58.0, 27.54087870784082 ], "wc_reply_authors_avg": [ 365.75, 522.3707375992649 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14367770520310598200&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "oxford.ac.uk;google.com;google.com;google.com;google.com;google.com;deepmind.com;google.com;google.com;google.com", "author_num": 10, "aff_unique_index": "0;1;1;1;1;1;1;1;1;1", "aff_unique_norm": "University of Oxford;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ox.ac.uk;https://www.google.com", "aff_unique_abbr": "Oxford;Google", "aff_campus_unique_index": "1;1;1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;1;1;0;1;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Sample Efficient Reinforcement Learning in Mixed Systems through Augmented Samples and Its Applications to Queueing Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71961", "id": "LTbIUkN95h", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0663a39baab211328fc865f91abc75ab-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LTbIUkN95h", "openreview": "https://openreview.net/forum?id=LTbIUkN95h", "poster": "/media/PosterPDFs/NeurIPS%202023/71961.png?t=1702138749.5148547", "slides": "https://nips.cc/virtual/2023/poster/71961", "video": "https://nips.cc/virtual/2023/poster/71961", "author_site": "Honghao Wei, Honghao Wei, Xin Liu, Weina Wang, Lei Ying", "tldr": "", "abstract": "This paper considers a class of reinforcement learning problems, which involve systems with two types of states: stochastic and pseudo-stochastic. In such systems, stochastic states follow a stochastic transition kernel while the transitions of pseudo-stochastic states are deterministic {\\em given} the stochastic states/transitions. We refer to such systems as mixed systems, which are widely used in various applications, including Manufacturing systems, communication networks, and queueing networks. We propose a sample-efficient RL method that accelerates learning by generating augmented data samples. The proposed algorithm is data-driven (model-free), but it learns the policy from data samples from both real and augmented samples. This method significantly improves learning by reducing the sample complexity such that the dataset only needs to have sufficient coverage of the stochastic states. We analyze the sample complexity of the proposed method under Fitted Q Iteration (FQI) and demonstrate that the optimality gap decreases as $O\\left(\\sqrt{\\frac{1}{n}}+\\sqrt{\\frac{1}{m}}\\right),$ where $n$ represents the number of real samples, and $m$ is the number of augmented samples per real sample. It is important to note that without augmented samples, the optimality gap is $O(1)$ due to the insufficient data coverage of the pseudo-stochastic states. Our experimental results on multiple queueing network applications confirm that the proposed method indeed significantly accelerates both deep Q-learning and deep policy gradient.", "keywords": "Reinforcement Learning;Mixed Systems;Queueing Network;Sample Efficient", "primary_area": "", "supplementary_material": "/attachment/26a5b58edf7d428193249415efa4ea74e8c1fadf.zip", "author": "Honghao Wei;Xin Liu;Weina Wang;Lei Ying", "authorids": "~Honghao_Wei2;~Xin_Liu14;~Weina_Wang1;~Lei_Ying1", "gender": "M;;;M", "homepage": "https://honghaow.me;;https://www.cs.cmu.edu/~weinaw/;http://leiying.engin.umich.edu/", "dblp": ";76/1820-49;88/2200;27/4818", "google_scholar": ";y0U4EF4AAAAJ;mQnBkmoAAAAJ;7f3HKI8AAAAJ", "orcid": "0000-0002-1131-326X;;0000-0001-6808-0156;", "linkedin": "honghao-wei-19565b155/;;;", "or_profile": "~Honghao_Wei2;~Xin_Liu14;~Weina_Wang1;~Lei_Ying1", "aff": "University of Michigan;ShanghaiTech University;Carnegie Mellon University;University of Michigan, Ann Arbor", "aff_domain": "umich.edu;shanghaitech.edu.cm;csd.cs.cmu.edu;umich.edu", "position": "PhD student;Assistant Professor;Assistant Professor;Professor", "bibtex": "@inproceedings{\nwei2023sample,\ntitle={Sample Efficient Reinforcement Learning in Mixed Systems through Augmented Samples and Its Applications to Queueing Networks},\nauthor={Honghao Wei and Xin Liu and Weina Wang and Lei Ying},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LTbIUkN95h}\n}", "github": "", "project": "", "reviewers": "ciJb;oHp5;d68Z", "pdf_size": 4380312, "rating": "7;7;7", "confidence": "3;4;3", "soundness": "3;3;4", "novelty": "4;2;2", "presentation": "3;4;4", "wc_summary": "196;214;107", "wc_strengths": "92;39;59", "wc_weaknesses": "197;149;79", "wc_questions": "156;132;104", "wc_limitations": "1;22;139", "wc_review": "642;556;488", "wc_reply_reviewers": "13;133;34", "wc_reply_authors": "40;406;14", "reply_reviewers": "1;1;1", "reply_authors": "2;3;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 172.33333333333334, 46.77843757782235 ], "wc_strengths_avg": [ 63.333333333333336, 21.853044537445015 ], "wc_weaknesses_avg": [ 141.66666666666666, 48.45157949495099 ], "wc_questions_avg": [ 130.66666666666666, 21.24983660067897 ], "wc_limitations_avg": [ 54.0, 60.71243694664216 ], "wc_review_avg": [ 562.0, 63.01322612489117 ], "wc_reply_reviewers_avg": [ 60.0, 52.32590180780452 ], "wc_reply_authors_avg": [ 153.33333333333334, 178.97734183099516 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4340106062335109743&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "email": "umich.edu;shanghaitech.edu.cm;csd.cs.cmu.edu;umich.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Michigan;ShanghaiTech University;Carnegie Mellon University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.umich.edu;https://www.shanghaitech.edu.cn;https://www.cmu.edu", "aff_unique_abbr": "UM;ShanghaiTech;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Feature-Learning Networks Are Consistent Across Widths At Realistic Scales", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71960", "id": "LTdfYIvbHc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/03600ae6c3392fd65ad7c3a90c6f7ce8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LTdfYIvbHc", "openreview": "https://openreview.net/forum?id=LTdfYIvbHc", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71960", "video": "https://nips.cc/virtual/2023/poster/71960", "author_site": "Nikhil Vyas, Alexander Atanasov, Blake Bordelon, Depen Morwani, Sabarish Sainathan, Cengiz Pehlevan", "tldr": "", "abstract": "We study the effect of width on the dynamics of feature-learning neural networks across a variety of architectures and datasets. Early in training, wide neural networks trained on online data have not only identical loss curves but also agree in their point-wise test predictions throughout training. For simple tasks such as CIFAR-5m this holds throughout training for networks of realistic widths. We also show that structural properties of the models, including internal representations, preactivation distributions, edge of stability phenomena, and large learning rate effects are consistent across large widths. This motivates the hypothesis that phenomena seen in realistic models can be captured by infinite-width, feature-learning limits. For harder tasks (such as ImageNet and language modeling), and later training times, finite-width deviations grow systematically. Two distinct effects cause these deviations across widths. First, the network output has an initialization-dependent variance scaling inversely with width, which can be removed by ensembling networks. We observe, however, that ensembles of narrower networks perform worse than a single wide network. We call this the bias of narrower width. We conclude with a spectral perspective on the origin of this finite-width bias.", "keywords": "mean-field;muP;feature learning;infinite width;deep ensembles", "primary_area": "", "supplementary_material": "/attachment/5852d0ea32017c935c57e3d4da7acc2248ecc16e.zip", "author": "Nikhil Vyas;Alexander Atanasov;Blake Bordelon;Depen Morwani;Sabarish Sainathan;Cengiz Pehlevan", "authorids": "~Nikhil_Vyas1;~Alexander_Atanasov1;~Blake_Bordelon1;~Depen_Morwani1;~Sabarish_Sainathan1;~Cengiz_Pehlevan2", "gender": "M;M;M;M;M;", "homepage": "https://nikhilvyas.github.io/;http://abatanasov.com/;https://blakebordelon.github.io/;;https://pehlevan.seas.harvard.edu/people/sabarish-sainathan;https://pehlevan.seas.harvard.edu/", "dblp": "176/1074;305/3785.html;228/6993;277/5200;;145/3480", "google_scholar": ";abMQRYIAAAAJ;yeQ8_pgAAAAJ;vOngxFUAAAAJ;;veDLTPEAAAAJ", "orcid": ";0000-0002-3338-0324;0000-0003-0455-9445;;;0000-0001-9767-6063", "linkedin": ";alexatanasov/;;depen-morwani-070298122/;;", "or_profile": "~Nikhil_Vyas1;~Alexander_Atanasov1;~Blake_Bordelon1;~Depen_Morwani1;~Sabarish_Sainathan1;~Cengiz_Pehlevan2", "aff": "Harvard University;Harvard University;Harvard University;Harvard University, Harvard University;Harvard University;School of Engineering and Applied Sciences, Harvard University", "aff_domain": "harvard.edu;harvard.edu;harvard.edu;g.harvard.edu;harvard.edu;seas.harvard.edu", "position": "Postdoc;PhD student;PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nvyas2023featurelearning,\ntitle={Feature-Learning Networks Are Consistent Across Widths At Realistic Scales},\nauthor={Nikhil Vyas and Alexander Atanasov and Blake Bordelon and Depen Morwani and Sabarish Sainathan and Cengiz Pehlevan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LTdfYIvbHc}\n}", "github": "", "project": "", "reviewers": "g9eH;j9zd;8Tjm;gHF9;ddkm", "pdf_size": 2482463, "rating": "5;6;6;6;6", "confidence": "3;3;3;4;3", "soundness": "3;2;3;3;2", "novelty": "3;2;3;3;3", "presentation": "3;3;2;3;2", "wc_summary": "116;94;358;182;102", "wc_strengths": "47;61;374;103;88", "wc_weaknesses": "126;332;691;593;330", "wc_questions": "40;77;620;186;5", "wc_limitations": "1;9;1;45;1", "wc_review": "330;573;2044;1109;526", "wc_reply_reviewers": "19;376;0;16;190", "wc_reply_authors": "0;356;0;34;245", "reply_reviewers": "1;2;0;1;1", "reply_authors": "1;2;1;2;2", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 170.4, 98.79595133405012 ], "wc_strengths_avg": [ 134.6, 121.30391584775819 ], "wc_weaknesses_avg": [ 414.4, 202.72799510674395 ], "wc_questions_avg": [ 185.6, 225.52747061056664 ], "wc_limitations_avg": [ 11.4, 17.083325203250098 ], "wc_review_avg": [ 916.4, 620.1253421688231 ], "wc_reply_reviewers_avg": [ 120.2, 145.50106528819643 ], "wc_reply_authors_avg": [ 127.0, 146.47320574084532 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.2500000000000001, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16922979100666830075&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "harvard.edu;harvard.edu;harvard.edu;g.harvard.edu;harvard.edu;seas.harvard.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Visual Prior via Generative Pre-Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71959", "id": "LUT4b9gOtS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/df4f6e43446b1ee29c5a33d32c279f83-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LUT4b9gOtS", "openreview": "https://openreview.net/forum?id=LUT4b9gOtS", "poster": "/media/PosterPDFs/NeurIPS%202023/71959.png?t=1699588912.7927902", "slides": "https://nips.cc/virtual/2023/poster/71959", "video": "https://nips.cc/virtual/2023/poster/71959", "author_site": "Jinheng Xie, Kai Ye, Yudong Li, Yuexiang Li, Kevin Qinghong Lin, Yefeng Zheng, Linlin Shen, Mike Zheng Shou", "tldr": "", "abstract": "Various stuff and things in visual data possess specific traits, which can be learned by deep neural networks and are implicitly represented as the visual prior, e.g., object location and shape, in the model. Such prior potentially impacts many vision tasks. For example, in conditional image synthesis, spatial conditions failing to adhere to the prior can result in visually inaccurate synthetic results. This work aims to explicitly learn the visual prior and enable the customization of sampling. Inspired by advances in language modeling, we propose to learn Visual prior via Generative Pre-Training, dubbed VisorGPT. By discretizing visual locations, e.g., bounding boxes, human pose, and instance masks, into sequences, VisorGPT can model visual prior through likelihood maximization. Besides, prompt engineering is investigated to unify various visual locations and enable customized sampling of sequential outputs from the learned prior. Experimental results demonstrate the effectiveness of VisorGPT in modeling visual prior and extrapolating to novel scenes, potentially motivating that discrete visual locations can be integrated into the learning paradigm of current language models to further perceive visual world. Code is available at https://sierkinhane.github.io/visor-gpt.", "keywords": "Visual Prior;Generative Pre-Training;Conditional Image Synthesis", "primary_area": "", "supplementary_material": "/attachment/f20e4df516a7c01b7749fc4ba6ff6bf357ee2f50.zip", "author": "Jinheng Xie;Kai Ye;Yudong Li;Yuexiang Li;Kevin Qinghong Lin;Yefeng Zheng;Linlin Shen;Mike Zheng Shou", "authorids": "~Jinheng_Xie1;~Kai_Ye3;~Yudong_Li1;~Yuexiang_Li1;~Kevin_Qinghong_Lin1;~Yefeng_Zheng2;~Linlin_Shen1;~Mike_Zheng_Shou1", "gender": "M;M;M;M;M;M;M;", "homepage": "https://sierkinhane.github.io/;https://szukevin.site/;https://scholar.google.com/citations?user=j4EmuqkAAAAJ&hl=zh-CN;https://yuexiangli.github.io;https://en.westlake.edu.cn/faculty/yefeng-zheng.html;https://csse.szu.edu.cn/pages/user/index?id=594;https://qinghonglin.github.io/;http://www.columbia.edu/~zs2262/", "dblp": "273/4278;85/1383-4;;165/6204;44/6510;88/5607;287/4900;284/0807", "google_scholar": "smbRMokAAAAJ;;j4EmuqkAAAAJ;WsKu4EMAAAAJ;vAIECxgAAAAJ;https://scholar.google.com.hk/citations?user=AZ_y9HgAAAAJ;;h1-3lSoAAAAJ", "orcid": ";;0000-0001-6779-8836;;0000-0003-2195-2847;0000-0003-1420-0815;0009-0008-6779-3435;", "linkedin": ";;;;yefeng-zheng-bb45641/?originalSubdomain=cn;;;", "or_profile": "~Jinheng_Xie1;~Kai_Ye3;~Yudong_Li1;~Yuexiang_Li1;~Yefeng_Zheng2;~Linlin_Shen1;~Qinghong_Lin1;~Zheng_Shou1", "aff": "National University of Singapore;Shenzhen University;Shenzhen University;Tencent Jarvis Lab;Tencent Jarvis Lab;Shenzhen University;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu;szu.edu.cn;szu.edu;tencent.com;tencent.com;szu.edu.cn;u.nus.edu;nus.edu.sg", "position": "PhD student;MS student;PhD student;Researcher;Director;Full Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nxie2023learning,\ntitle={Learning Visual Prior via Generative Pre-Training},\nauthor={Jinheng Xie and Kai Ye and Yudong Li and Yuexiang Li and Kevin Qinghong Lin and Yefeng Zheng and Linlin Shen and Mike Zheng Shou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LUT4b9gOtS}\n}", "github": "", "project": "", "reviewers": "CQCW;xtC6;KtmJ;6eEZ", "pdf_size": 43515850, "rating": "6;6;6;7", "confidence": "3;4;3;4", "soundness": "3;2;3;4", "novelty": "2;2;2;3", "presentation": "3;2;2;4", "wc_summary": "63;81;106;142", "wc_strengths": "45;65;170;90", "wc_weaknesses": "144;534;544;134", "wc_questions": "54;67;2;82", "wc_limitations": "10;8;1;13", "wc_review": "316;755;823;461", "wc_reply_reviewers": "212;49;186;23", "wc_reply_authors": "410;757;130;22", "reply_reviewers": "3;1;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 98.0, 29.63950067055786 ], "wc_strengths_avg": [ 92.5, 47.5 ], "wc_weaknesses_avg": [ 339.0, 200.06249023742558 ], "wc_questions_avg": [ 51.25, 30.11125204969066 ], "wc_limitations_avg": [ 8.0, 4.415880433163924 ], "wc_review_avg": [ 588.75, 208.10138754943466 ], "wc_reply_reviewers_avg": [ 117.5, 82.53029746705145 ], "wc_reply_authors_avg": [ 329.75, 284.42606684338904 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7201592203615755583&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nus.edu;szu.edu.cn;szu.edu;tencent.com;tencent.com;szu.edu.cn;u.nus.edu;nus.edu.sg", "author_num": 8, "aff_unique_index": "0;1;1;2;2;1;0;0", "aff_unique_norm": "National University of Singapore;Shenzhen University;Tencent", "aff_unique_dep": ";;Jarvis Lab", "aff_unique_url": "https://www.nus.edu.sg;https://www.szu.edu.cn;https://www.tencent.com", "aff_unique_abbr": "NUS;SZU;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;0;0", "aff_country_unique": "Singapore;China" }, { "title": "Stochastic Optimal Control for Collective Variable Free Sampling of Molecular Transition Paths", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71958", "id": "LUVqEs90mq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fb7f55f36c53247a704792a721272706-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LUVqEs90mq", "openreview": "https://openreview.net/forum?id=LUVqEs90mq", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71958", "video": "https://nips.cc/virtual/2023/poster/71958", "author_site": "Lars Holdijk, Yuanqi Du, Ferry Hooft, Priyank Jaini, Berend Ensing, Max Welling", "tldr": "", "abstract": "We consider the problem of sampling transition paths between two given metastable states of a molecular system, eg. a folded and unfolded protein or products and reactants of a chemical reaction. Due to the existence of high energy barriers separating the states, these transition paths are unlikely to be sampled with standard Molecular Dynamics (MD) simulation. Traditional methods to augment MD with a bias potential to increase the probability of the transition rely on a dimensionality reduction step based on Collective Variables (CVs). Unfortunately, selecting appropriate CVs requires chemical intuition and traditional methods are therefore not always applicable to larger systems. Additionally, when incorrect CVs are used, the bias potential might not be minimal and bias the system along dimensions irrelevant to the transition. Showing a formal relation between the problem of sampling molecular transition paths, the Schrodinger bridge problem and stochastic optimal control with neural network policies, we propose a machine learning method for sampling said transitions. Unlike previous non-machine learning approaches our method, named PIPS, does not depend on CVs. We show that our method successful generates low energy transitions for Alanine Dipeptide as well as the larger Polyproline and Chignolin proteins.", "keywords": "Transition Path Sampling;Stochastic Optimal Control", "primary_area": "", "supplementary_material": "/attachment/0980e5292b239549c3b4c86a1d9c9fef94ff064e.pdf", "author": "Lars Holdijk;Yuanqi Du;Ferry Hooft;Priyank Jaini;Bernd Ensing;Max Welling", "authorids": "~Lars_Holdijk1;~Yuanqi_Du1;~Ferry_Hooft1;~Priyank_Jaini1;~Bernd_Ensing1;~Max_Welling1", "gender": "M;M;M;M;;M", "homepage": "https://www.larsholdijk.com/;https://yuanqidu.github.io/;https://www.compchem.nl/group_members/ferry-hooft/;https://priyankjaini.github.io/;https://www.compchem.nl/staff_members/dr-ir-b-bernd-ensing;https://staff.fnwi.uva.nl/m.welling/", "dblp": "232/1764;266/2837;;184/4579;;16/2286", "google_scholar": "h_Y2MjoAAAAJ;fAc_zZMAAAAJ;;https://scholar.google.ca/citations?user=keg9BGEAAAAJ;https://scholar.google.nl/citations?user=BfwtSk8AAAAJ;https://scholar.google.nl/citations?user=8200InoAAAAJ", "orcid": ";;;;my-orcid?orcid=0000-0002-4913-3571;0000-0003-1484-2121", "linkedin": "larsholdijk/;;;;;", "or_profile": "~Lars_Holdijk1;~Yuanqi_Du1;~Ferry_Hooft1;~Priyank_Jaini1;~Bernd_Ensing1;~Max_Welling1", "aff": "University of Oxford;Cornell University;University of Amsterdam;Google;University of Amsterdam;University of Amsterdam", "aff_domain": "ox.ac.uk;cornell.edu;uva.nl;google.com;uva.nl;uva.nl", "position": "PhD student;PhD student;PhD student;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nholdijk2023stochastic,\ntitle={Stochastic Optimal Control for Collective Variable Free Sampling of Molecular Transition Paths},\nauthor={Lars Holdijk and Yuanqi Du and Ferry Hooft and Priyank Jaini and Bernd Ensing and Max Welling},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LUVqEs90mq}\n}", "github": "", "project": "", "reviewers": "isij;vdgz;VgsC;KPqs", "pdf_size": 6487934, "rating": "3;5;7;7", "confidence": "3;4;4;4", "soundness": "2;3;3;3", "novelty": "2;2;4;3", "presentation": "2;3;3;3", "wc_summary": "203;98;84;133", "wc_strengths": "190;143;60;75", "wc_weaknesses": "548;157;185;98", "wc_questions": "95;152;40;200", "wc_limitations": "1;74;2;10", "wc_review": "1037;624;371;516", "wc_reply_reviewers": "472;0;10;24", "wc_reply_authors": "219;0;0;0", "reply_reviewers": "2;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 129.5, 46.03531253288067 ], "wc_strengths_avg": [ 117.0, 52.48333068699051 ], "wc_weaknesses_avg": [ 247.0, 176.59699884199617 ], "wc_questions_avg": [ 121.75, 60.07651371376338 ], "wc_limitations_avg": [ 21.75, 30.367540236245674 ], "wc_review_avg": [ 637.0, 247.77308166949857 ], "wc_reply_reviewers_avg": [ 126.5, 199.65658015702863 ], "wc_reply_authors_avg": [ 54.75, 94.82978171439603 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8921521379332419412&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ox.ac.uk;cornell.edu;uva.nl;google.com;uva.nl;uva.nl", "author_num": 6, "aff_unique_index": "0;1;2;3;2;2", "aff_unique_norm": "University of Oxford;Cornell University;University of Amsterdam;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.ox.ac.uk;https://www.cornell.edu;https://www.uva.nl;https://www.google.com", "aff_unique_abbr": "Oxford;Cornell;UvA;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;2;1;2;2", "aff_country_unique": "United Kingdom;United States;Netherlands" }, { "title": "Exploring Question Decomposition for Zero-Shot VQA", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71957", "id": "LV83JEihHu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b14cf0a01f7a8b9cd3e365e40f910272-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LV83JEihHu", "openreview": "https://openreview.net/forum?id=LV83JEihHu", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71957", "video": "https://nips.cc/virtual/2023/poster/71957", "author_site": "Zaid Khan, Vijay Kumar B G, Samuel Schulter, Manmohan Chandraker, Yun Fu", "tldr": "", "abstract": "Visual question answering (VQA) has traditionally been treated as a single-step task where each question receives the same amount of effort, unlike natural human question-answering strategies. We explore a question decomposition strategy for VQA to overcome this limitation. We probe the ability of recently developed large vision-language models to use human-written decompositions and produce their own decompositions of visual questions, finding they are capable of learning both tasks from demonstrations alone.\nHowever, we show that naive application of model-written decompositions can hurt performance.\nWe introduce a model-driven selective decomposition approach for second-guessing predictions and correcting errors, and validate its effectiveness on eight VQA tasks across three domains, showing consistent improvements in accuracy, including improvements of >20% on medical VQA datasets and boosting the zero-shot performance of BLIP-2 above chance on a VQA reformulation of the challenging Winoground task. Project Site: https://zaidkhan.me/decomposition-0shot-vqa/", "keywords": "visual question answering;in-context learning;vision-language", "primary_area": "", "supplementary_material": "/attachment/fc51a284ffb032465ab8d9dd3c1713b4203db3a4.pdf", "author": "Zaid Khan;Vijay Kumar b g;Samuel Schulter;Manmohan Chandraker;Yun Fu", "authorids": "~Zaid_Khan1;~Vijay_Kumar_b_g1;~Samuel_Schulter1;~Manmohan_Chandraker3;~Yun_Fu1", "gender": "Not Specified;;;;M", "homepage": "https://zaidkhan.me;;https://samschulter.github.io;;http://www1.ece.neu.edu/~yunfu/", "dblp": "259/1127-1;;27/9990;;00/5815-1", "google_scholar": "uXXocfgAAAAJ;;VQ6dsFEAAAAJ;;https://scholar.google.com.tw/citations?user=h-JEcQ8AAAAJ", "orcid": ";;;;0000-0002-5098-2853", "linkedin": "https://linkedin.com/in/khan-zaid;;;;furaymond/", "or_profile": "~Zaid_Khan1;~Vijay_Kumar_b_g1;~Samuel_Schulter1;~Manmohan_Chandraker3;~Yun_Fu1", "aff": ";;NEC-Labs;;Northeastern University", "aff_domain": ";;nec-labs.com;;northeastern.edu", "position": ";;Researcher;;Full Professor", "bibtex": "@inproceedings{\nkhan2023exploring,\ntitle={Exploring Question Decomposition for Zero-Shot {VQA}},\nauthor={Zaid Khan and Vijay Kumar b g and Samuel Schulter and Manmohan Chandraker and Yun Fu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LV83JEihHu}\n}", "github": "", "project": "", "reviewers": "TpMW;zebn;37W7;F8RC;yYni", "pdf_size": 1532349, "rating": "5;5;6;6;6", "confidence": "4;5;4;4;5", "soundness": "3;3;3;2;3", "novelty": "3;3;3;2;3", "presentation": "3;4;3;2;3", "wc_summary": "73;101;23;83;112", "wc_strengths": "52;94;29;44;116", "wc_weaknesses": "233;199;331;105;468", "wc_questions": "32;25;4;314;125", "wc_limitations": "1;6;49;18;268", "wc_review": "391;425;436;564;1089", "wc_reply_reviewers": "19;0;0;0;87", "wc_reply_authors": "0;0;0;0;12", "reply_reviewers": "1;0;0;0;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 78.4, 30.85190431723786 ], "wc_strengths_avg": [ 67.0, 32.643529220965064 ], "wc_weaknesses_avg": [ 267.2, 123.70998342898604 ], "wc_questions_avg": [ 100.0, 114.79198578298052 ], "wc_limitations_avg": [ 68.4, 101.18616506222577 ], "wc_review_avg": [ 581.0, 260.696758706356 ], "wc_reply_reviewers_avg": [ 21.2, 33.71290554075694 ], "wc_reply_authors_avg": [ 2.4, 4.8 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.16666666666666663, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1621498382422868717&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";;nec-labs.com;;northeastern.edu", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "NEC Laboratories;Northeastern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nec-labs.com;https://www.northeastern.edu", "aff_unique_abbr": "NEC-Labs;NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Diffusion Models and Semi-Supervised Learners Benefit Mutually with Few Labels", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71956", "id": "LVHEcVgEGm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8735753cc18f6baa92d1f069fd8b14a0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LVHEcVgEGm", "openreview": "https://openreview.net/forum?id=LVHEcVgEGm", "poster": "/media/PosterPDFs/NeurIPS%202023/71956.png?t=1701430810.9280019", "slides": "https://nips.cc/virtual/2023/poster/71956", "video": "https://nips.cc/virtual/2023/poster/71956", "author_site": "Zebin You, Yong Zhong, Fan Bao, Jiacheng Sun, Chongxuan LI, Jun Zhu", "tldr": "", "abstract": "In an effort to further advance semi-supervised generative and classification tasks, we propose a simple yet effective training strategy called *dual pseudo training* (DPT), built upon strong semi-supervised learners and diffusion models. DPT operates in three stages: training a classifier on partially labeled data to predict pseudo-labels; training a conditional generative model using these pseudo-labels to generate pseudo images; and retraining the classifier with a mix of real and pseudo images. Empirically, DPT consistently achieves SOTA performance of semi-supervised generation and classification across various settings. In particular, with one or two labels per class, DPT achieves a Fr\u00e9chet Inception Distance (FID) score of 3.08 or 2.52 on ImageNet $256\\times256$. Besides, DPT outperforms competitive semi-supervised baselines substantially on ImageNet classification tasks, *achieving top-1 accuracies of 59.0 (+2.8), 69.5 (+3.0), and 74.4 (+2.0)* with one, two, or five labels per class, respectively. Notably, our results demonstrate that diffusion can generate realistic images with only a few labels (e.g., $<0.1$%) and generative augmentation remains viable for semi-supervised classification. Our code is available at *https://github.com/ML-GSAI/DPT*.", "keywords": "diffusion models;semi-supervised generation;semi-supervised diffusion models;semi-supervised classification;image generation.", "primary_area": "", "supplementary_material": "/attachment/d58442eb7dd2a35be91e7a42b8eba267436b1540.zip", "author": "Zebin You;Yong Zhong;Fan Bao;Jiacheng Sun;Chongxuan Li;Jun Zhu", "authorids": "~Zebin_You1;~Yong_Zhong2;~Fan_Bao1;~Jiacheng_Sun1;~Chongxuan_Li1;~Jun_Zhu2", "gender": "M;;M;M;M;M", "homepage": "https://yyyouy.github.io/;;https://baofff.github.io/;;http://ml.cs.tsinghua.edu.cn/~chongxuan;http://ml.cs.tsinghua.edu.cn/~jun", "dblp": "340/8587;;71/3877;165/5350;161/9965;50/2644-1", "google_scholar": "SO4cpVQAAAAJ;;;;UKMcQn4AAAAJ;axsP38wAAAAJ", "orcid": ";;;;0000-0002-0912-9076;", "linkedin": ";;;https://www.linkedin.cn/incareer/in/jiacheng-sun-ab622b131;;", "or_profile": "~Zebin_You1;~Yong_Zhong2;~Fan_Bao1;~Jiacheng_Sun1;~Chongxuan_Li1;~Jun_Zhu2", "aff": "Renmin University of China;;Tsinghua University;Huawei Noah's Ark Lab;Renmin University of China;Tsinghua University", "aff_domain": "ruc.edu.cn;;tsinghua.edu.cn;huawei.com;ruc.edu.cn;mail.tsinghua.edu.cn", "position": "PhD student;;PhD student;Senior Researcher;Assistant Professor;Professor", "bibtex": "@inproceedings{\nyou2023diffusion,\ntitle={Diffusion Models and Semi-Supervised Learners Benefit Mutually with Few Labels},\nauthor={Zebin You and Yong Zhong and Fan Bao and Jiacheng Sun and Chongxuan Li and Jun Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LVHEcVgEGm}\n}", "github": "", "project": "", "reviewers": "hJXD;wQJF;JKn2;2c3F;kGQr", "pdf_size": 2200263, "rating": "6;6;7;7;7", "confidence": "4;4;4;3;4", "soundness": "3;2;4;4;3", "novelty": "3;2;4;3;2", "presentation": "2;3;3;4;3", "wc_summary": "82;117;85;101;102", "wc_strengths": "68;46;83;51;61", "wc_weaknesses": "433;138;197;16;190", "wc_questions": "159;90;79;8;49", "wc_limitations": "1;1;42;24;38", "wc_review": "743;392;486;200;440", "wc_reply_reviewers": "37;64;19;0;19", "wc_reply_authors": "29;155;28;0;23", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;3;2;1;2", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 97.4, 12.7216351150314 ], "wc_strengths_avg": [ 61.8, 13.075167302944921 ], "wc_weaknesses_avg": [ 194.8, 135.61917268587064 ], "wc_questions_avg": [ 77.0, 49.88386512691253 ], "wc_limitations_avg": [ 21.2, 17.54308980767071 ], "wc_review_avg": [ 452.2, 174.97702706355483 ], "wc_reply_reviewers_avg": [ 27.8, 21.55365398256175 ], "wc_reply_authors_avg": [ 47.0, 55.01636120282765 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13925627359911888018&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 7, "email": "ruc.edu.cn;;tsinghua.edu.cn;huawei.com;ruc.edu.cn;mail.tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "Renmin University of China;Tsinghua University;Huawei", "aff_unique_dep": ";;Noah's Ark Lab", "aff_unique_url": "http://www.ruc.edu.cn;https://www.tsinghua.edu.cn;https://www.huawei.com", "aff_unique_abbr": "RUC;THU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Large Language Models can Implement Policy Iteration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71955", "id": "LWxjWoBTsr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/60dc7fa827f5f761ad481e2ad40b5573-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LWxjWoBTsr", "openreview": "https://openreview.net/forum?id=LWxjWoBTsr", "poster": "/media/PosterPDFs/NeurIPS%202023/71955.png?t=1701899124.6212904", "slides": "https://nips.cc/virtual/2023/poster/71955", "video": "https://nips.cc/virtual/2023/poster/71955", "author_site": "Ethan Brooks, Logan Walls, Richard L Lewis, Satinder Singh", "tldr": "", "abstract": "In this work, we demonstrate a method for implementing policy iteration using a large language model. While the application of foundation models to RL has received considerable attention, most approaches rely on either (1) the curation of expert demonstrations (either through manual design or task-specific pretraining) or (2) adaptation to the task of interest using gradient methods (either fine-tuning or training of adapter layers). Both of these techniques have drawbacks. Collecting demonstrations is labor-intensive, and algorithms that rely on them do not outperform the experts from which the demonstrations were derived. All gradient techniques are inherently slow, sacrificing the \u201cfew-shot\u201d quality that makes in-context learning attractive to begin with. Our method demonstrates that a large language model can be used to implement policy iteration using the machinery of in-context learning, enabling it to learn to perform RL tasks without expert demonstrations or gradients. Our approach iteratively updates the contents of the prompt from which it derives its policy through trial-and-error interaction with an RL environment. In order to eliminate the role of in-weights learning (on which approaches like Decision Transformer rely heavily), we demonstrate our method using Codex (M. Chen et al. 2021b), a language model with no prior knowledge of the domains on which we evaluate it.", "keywords": "Reinforcement Learning;In-Context Learning;Foundation Models", "primary_area": "", "supplementary_material": "", "author": "Ethan Brooks;Logan A Walls;Richard Lewis;Satinder Singh", "authorids": "~Ethan_Brooks1;~Logan_A_Walls1;~Richard_Lewis1;~Satinder_Singh2", "gender": "M;;M;", "homepage": "https://ethanabrooks.github.io/;https://github.com/LoganWalls;;", "dblp": ";217/9343;12/590;", "google_scholar": "MxDHjTUAAAAJ;;;", "orcid": ";0000-0002-5678-441X;;", "linkedin": ";;;", "or_profile": "~Ethan_Brooks1;~Logan_A_Walls1;~Richard_Lewis1;~Satinder_Baveja2", "aff": "University of Michigan;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;Google DeepMind", "aff_domain": "umich.edu;umich.edu;umich.edu;google.com", "position": "PhD student;PhD student;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nbrooks2023large,\ntitle={Large Language Models can Implement Policy Iteration},\nauthor={Ethan Brooks and Logan A Walls and Richard Lewis and Satinder Singh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LWxjWoBTsr}\n}", "github": "", "project": "", "reviewers": "WHe7;Yb53;6gCR;EN6r;RKry", "pdf_size": 1604726, "rating": "5;5;5;6;7", "confidence": "1;3;4;4;3", "soundness": "4;2;3;2;3", "novelty": "2;2;2;2;4", "presentation": "3;3;2;2;4", "wc_summary": "71;106;102;63;49", "wc_strengths": "41;100;80;80;107", "wc_weaknesses": "35;376;83;327;190", "wc_questions": "1;129;65;3;71", "wc_limitations": "6;108;94;18;45", "wc_review": "154;819;424;491;462", "wc_reply_reviewers": "22;243;0;72;155", "wc_reply_authors": "31;419;179;25;20", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;5;2;2;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 78.2, 22.247696509976038 ], "wc_strengths_avg": [ 81.6, 22.966061917533878 ], "wc_weaknesses_avg": [ 202.2, 132.73643056825054 ], "wc_questions_avg": [ 53.8, 47.84307682413413 ], "wc_limitations_avg": [ 54.2, 40.489010854798615 ], "wc_review_avg": [ 470.0, 211.79140681340212 ], "wc_reply_reviewers_avg": [ 98.4, 89.81002171250155 ], "wc_reply_authors_avg": [ 134.8, 154.09918883628168 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.6, 1.2000000000000002 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.22821773229381925, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17638058676156577176&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "umich.edu;umich.edu;umich.edu;google.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Michigan;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.umich.edu;https://deepmind.com", "aff_unique_abbr": "UM;DeepMind", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "High-dimensional Contextual Bandit Problem without Sparsity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71954", "id": "LZ4WgwmrUJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9b35a0a20d617dc68ae98a7a57df2f51-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LZ4WgwmrUJ", "openreview": "https://openreview.net/forum?id=LZ4WgwmrUJ", "poster": "/media/PosterPDFs/NeurIPS%202023/71954.png?t=1701808462.627125", "slides": "https://nips.cc/virtual/2023/poster/71954", "video": "https://nips.cc/virtual/2023/poster/71954", "author_site": "Junpei Komiyama, Masaaki Imaizumi", "tldr": "", "abstract": "In this research, we investigate the high-dimensional linear contextual bandit problem where the number of features $p$ is greater than the budget $T$, or it may even be infinite. Differing from the majority of previous works in this field, we do not impose sparsity on the regression coefficients. Instead, we rely on recent findings on overparameterized models, which enables us to analyze the performance of the minimum-norm interpolating estimator when data distributions have small effective ranks. We propose an explore-then-commit (EtC) algorithm to address this problem and examine its performance. Through our analysis, we derive the optimal rate of the ETC algorithm in terms of $T$ and show that this rate can be achieved by balancing exploration and exploitation. Moreover, we introduce an adaptive explore-then-commit (AEtC) algorithm that adaptively finds the optimal balance. We assess the performance of the proposed algorithms through a series of simulations.", "keywords": "multi-armed bandits;linear bandits;contextual bandits;overparameterized models;high-dimensional models;online learning", "primary_area": "", "supplementary_material": "/attachment/75172b0c290fdcceef4dde676e7f7803a3c22e07.pdf", "author": "Junpei Komiyama;Masaaki Imaizumi", "authorids": "~Junpei_Komiyama1;~Masaaki_Imaizumi1", "gender": "M;M", "homepage": "https://sites.google.com/view/junpeikomiyama/home;https://sites.google.com/view/mimaizumi/home", "dblp": "137/4226;", "google_scholar": "https://scholar.google.co.jp/citations?user=1uFfImMAAAAJ;https://scholar.google.co.jp/citations?user=6c0Ljd4AAAAJ", "orcid": ";", "linkedin": ";masaaki-imaizumi-38600b157/", "or_profile": "~Junpei_Komiyama1;~Masaaki_Imaizumi1", "aff": "RIKEN;The University of Tokyo", "aff_domain": "riken.jp;u-tokyo.ac.jp", "position": "Researcher;Associate Professor", "bibtex": "@inproceedings{\nkomiyama2023highdimensional,\ntitle={High-dimensional Contextual Bandit Problem without Sparsity},\nauthor={Junpei Komiyama and Masaaki Imaizumi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LZ4WgwmrUJ}\n}", "github": "", "project": "", "reviewers": "57ot;dtiS;C3HD;pLAf;h4Ff", "pdf_size": 742158, "rating": "5;5;6;6;6", "confidence": "3;2;4;3;3", "soundness": "2;2;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;2;3;2;3", "wc_summary": "90;76;102;126;72", "wc_strengths": "46;63;67;64;63", "wc_weaknesses": "257;134;142;196;81", "wc_questions": "83;285;98;28;26", "wc_limitations": "4;20;2;1;1", "wc_review": "480;578;411;415;243", "wc_reply_reviewers": "80;14;0;10;12", "wc_reply_authors": "55;133;0;0;0", "reply_reviewers": "2;1;0;1;1", "reply_authors": "2;3;1;1;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 93.2, 19.5386795869117 ], "wc_strengths_avg": [ 60.6, 7.445804187594514 ], "wc_weaknesses_avg": [ 162.0, 59.876539646175274 ], "wc_questions_avg": [ 104.0, 94.97157469474749 ], "wc_limitations_avg": [ 5.6, 7.28285658241325 ], "wc_review_avg": [ 425.4, 109.41041997908609 ], "wc_reply_reviewers_avg": [ 23.2, 28.80555501982213 ], "wc_reply_authors_avg": [ 37.6, 52.2402143946596 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6454972243679027, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5623094507626106939&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "riken.jp;u-tokyo.ac.jp", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "RIKEN;University of Tokyo", "aff_unique_dep": ";", "aff_unique_url": "https://www.riken.jp;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "RIKEN;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Echoes Beyond Points: Unleashing the Power of Raw Radar Data in Multi-modality Fusion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71953", "id": "LZzsn51DPr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a8f7f12b29d9b8c227785f6b529f63b7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LZzsn51DPr", "openreview": "https://openreview.net/forum?id=LZzsn51DPr", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71953", "video": "https://nips.cc/virtual/2023/poster/71953", "author_site": "Yang Liu, Feng Wang, Naiyan Wang, ZHAO-XIANG ZHANG", "tldr": "", "abstract": "Radar is ubiquitous in autonomous driving systems due to its low cost and good adaptability to bad weather. Nevertheless, the radar detection performance is usually inferior because its point cloud is sparse and not accurate due to the poor azimuth and elevation resolution. Moreover, point cloud generation algorithms already drop weak signals to reduce the false targets which may be suboptimal for the use of deep fusion. In this paper, we propose a novel method named EchoFusion to skip the existing radar signal processing pipeline and then incorporate the radar raw data with other sensors. Specifically, we first generate the Bird's Eye View (BEV) queries and then take corresponding spectrum features from radar to fuse with other sensors. By this approach, our method could utilize both rich and lossless distance and speed clues from radar echoes and rich semantic clues from images, making our method surpass all existing methods on the RADIal dataset, and approach the performance of LiDAR. The code will be released on https://github.com/tusen-ai/EchoFusion.", "keywords": "4D Radar; Transformer; Multi-modality", "primary_area": "", "supplementary_material": "/attachment/5d1d81f1080741e38be2527f3a61987410666c65.pdf", "author": "Yang Liu;Feng Wang;Naiyan Wang;Zhaoxiang Zhang", "authorids": "~Yang_Liu101;~Feng_Wang1;~Naiyan_Wang1;~Zhaoxiang_Zhang3", "gender": "M;M;M;M", "homepage": "http://happynear.wang/;http://winsty.net;http://zhaoxiangzhang.net;https://github.com/DekuLiuTesla", "dblp": "90/4225-15;31/9922;55/2285-1.html;51/3710-347", "google_scholar": "GKGSZUoAAAAJ;yAWtq6QAAAAJ;qxWfV6cAAAAJ;atD1_iMAAAAJ", "orcid": ";;;0009-0005-7193-0795", "linkedin": ";;;", "or_profile": "~Feng_Wang1;~Naiyan_Wang1;~Zhaoxiang_Zhang3;~Jimmy_Liu1", "aff": "TuSimple;Tusimple;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "tusimple.com;tusimple.ai;ia.ac.cn;ia.ac.cn", "position": "Researcher;Chief Scientist;Full Professor;PhD student", "bibtex": "@inproceedings{\nliu2023echoes,\ntitle={Echoes Beyond Points: Unleashing the Power of Raw Radar Data in Multi-modality Fusion},\nauthor={Yang Liu and Feng Wang and Naiyan Wang and Zhaoxiang Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LZzsn51DPr}\n}", "github": "", "project": "", "reviewers": "w8tA;eUpu;K4kM;mPFG;jS6D", "pdf_size": 3736579, "rating": "5;5;5;6;8", "confidence": "3;5;4;2;3", "soundness": "3;3;3;4;4", "novelty": "2;2;3;4;3", "presentation": "3;3;3;2;4", "wc_summary": "61;115;57;81;117", "wc_strengths": "49;61;22;46;298", "wc_weaknesses": "74;202;24;142;108", "wc_questions": "36;26;9;180;2", "wc_limitations": "28;15;7;1;3", "wc_review": "248;419;119;450;528", "wc_reply_reviewers": "103;33;30;233;0", "wc_reply_authors": "0;0;0;65;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;2;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 86.2, 25.662423891752706 ], "wc_strengths_avg": [ 95.2, 102.18688761284395 ], "wc_weaknesses_avg": [ 110.0, 60.305886943150085 ], "wc_questions_avg": [ 50.6, 65.81063743803125 ], "wc_limitations_avg": [ 10.8, 9.846826900072937 ], "wc_review_avg": [ 352.8, 148.4390784126606 ], "wc_reply_reviewers_avg": [ 79.8, 83.72192066597611 ], "wc_reply_authors_avg": [ 13.0, 26.0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4372373160976031, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3836838244887894923&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 7, "email": "tusimple.com;tusimple.ai;ia.ac.cn;ia.ac.cn", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "TuSimple;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "https://www.tusimple.com;http://www.ia.cas.cn", "aff_unique_abbr": "TuSimple;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United States;China" }, { "title": "DataPerf: Benchmarks for Data-Centric AI Development", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73622", "id": "LaFKTgrZMG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/112db88215e25b3ae2750e9eefcded94-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=LaFKTgrZMG", "openreview": "https://openreview.net/forum?id=LaFKTgrZMG", "poster": "/media/PosterPDFs/NeurIPS%202023/73622.png?t=1701969628.9415615", "slides": "https://nips.cc/virtual/2023/poster/73622", "video": "https://nips.cc/virtual/2023/poster/73622", "author_site": "Mark Mazumder, Colby Banbury, Xiaozhe Yao, Bojan Karla\u0161, William Gaviria Rojas, Sudnya Diamos, Greg Diamos, Lynn He, Alicia Parrish, Hannah Rose Kirk, Jessica Quaye, Charvi Rastogi, Douwe Kiela, David Jurado, David Kanter, Rafael Mosquera, Will Cukierski, Juan Ciro, Lora Aroyo, Bilge Acun, Lingjiao Chen, Mehul Raje, Max Bartolo, Evan Sabri Eyuboglu, Amirata Ghorbani, Emmett Goodman, Addison Howard, Oana Inel, Tariq Kane, Christine R. Kirkpatrick, D. Sculley, Tzu-Sheng Kuo, Jonas Mueller, Tristan Thrush, Joaquin Vanschoren, Margaret Warren, Adina Williams, Serena Yeung, Newsha Ardalani, Praveen Paritosh, Ce Zhang, James Zou, Carole-Jean Wu, Cody Coleman, Andrew Ng, Peter Mattson, Vijay Janapa Reddi", "tldr": "", "abstract": "Machine learning research has long focused on models rather than datasets, and prominent datasets are used for common ML tasks without regard to the breadth, difficulty, and faithfulness of the underlying problems. Neglecting the fundamental importance of data has given rise to inaccuracy, bias, and fragility in real-world applications, and research is hindered by saturation across existing dataset benchmarks. In response, we present DataPerf, a community-led benchmark suite for evaluating ML datasets and data-centric algorithms. We aim to foster innovation in data-centric AI through competition, comparability, and reproducibility. We enable the ML community to iterate on datasets, instead of just architectures, and we provide an open, online platform with multiple rounds of challenges to support this iterative development. The first iteration of DataPerf contains five benchmarks covering a wide spectrum of data-centric techniques, tasks, and modalities in vision, speech, acquisition, debugging, and diffusion prompting, and we support hosting new contributed benchmarks from the community. The benchmarks, online evaluation platform, and baseline implementations are open source, and the MLCommons Association will maintain DataPerf to ensure long-term benefits to academia and industry.", "keywords": "datasets;datacentric;vision;speech;acquisition;debugging;prompting", "primary_area": "", "supplementary_material": "", "author": "Mark Mazumder;Colby Banbury;Xiaozhe Yao;Bojan Karla\u0161;William A Gaviria Rojas;Sudnya Diamos;Greg Diamos;Lynn He;Alicia Parrish;Hannah Rose Kirk;Jessica Quaye;Charvi Rastogi;Douwe Kiela;David Jurado;David Kanter;Rafael Mosquera;Will Cukierski;Juan Ciro;Lora Aroyo;Bilge Acun;Lingjiao Chen;Mehul Smriti Raje;Max Bartolo;Sabri Eyuboglu;Amirata Ghorbani;Emmett Daniel Goodman;Addison Howard;Oana Inel;Tariq Kane;Christine Kirkpatrick;D. Sculley;Tzu-Sheng Kuo;Jonas Mueller;Tristan Thrush;Joaquin Vanschoren;Margaret Warren;Adina Williams;Serena Yeung;Newsha Ardalani;Praveen Paritosh;Ce Zhang;James Y. Zou;Carole-Jean Wu;Cody Coleman;Andrew Ng;Peter Mattson;Vijay Janapa Reddi", "authorids": "~Mark_Mazumder1;~Colby_Banbury1;~Xiaozhe_Yao1;~Bojan_Karla\u01611;~William_A_Gaviria_Rojas1;~Sudnya_Diamos1;gregory.diamos@gmail.com;lynnhe@berkeley.edu;~Alicia_Parrish1;~Hannah_Rose_Kirk1;jquaye@g.harvard.edu;~Charvi_Rastogi1;~Douwe_Kiela1;~David_Jurado1;~David_Kanter2;~Rafael_Mosquera1;wcukierski@google.com;juanciro@mlcommons.org;~Lora_Aroyo1;acun@meta.com;~Lingjiao_Chen1;mehul@coactive.ai;~Max_Bartolo1;~Sabri_Eyuboglu1;~Amirata_Ghorbani2;~Emmett_Daniel_Goodman1;addison@kaggle.com;~Oana_Inel2;tariqkane@google.com;~Christine_Kirkpatrick1;~D._Sculley1;~Tzu-Sheng_Kuo1;jonas@cleanlab.ai;~Tristan_Thrush1;~Joaquin_Vanschoren1;~Margaret_Warren1;~Adina_Williams1;~Serena_Yeung1;~Newsha_Ardalani1;~Praveen_Paritosh2;~Ce_Zhang1;~James_Y._Zou1;~Carole-Jean_Wu2;~Cody_Coleman1;ng@deeplearning.ai;~Peter_Mattson1;~Vijay_Janapa_Reddi1", "gender": ";M;M;;M;Not Specified;;;;F;;F;M;M;Not Specified;M;;;F;;;;;;M;M;;F;;F;;;;;M;F;F;F;F;;;M;F;M;;M;M", "homepage": "https://markmaz.com;https://www.colbybanbury.com/;https://about.yao.sh;https://bojan.ninja/;;;;;;https://www.hannahrosekirk.com/;;https://sites.google.com/view/charvirastogi/home;https://douwekiela.github.io;;https://www.realworldtech.com;;;;http://lora-aroyo.org;;;;https://maxbartolo.com;http://www.sabrieyuboglu.com/;http://web.stanford.edu/~amiratag;https://egoodman92.github.io/;;https://oana-inel.github.io;;;http://www.eecs.tufts.edu/~dsculley;;;http://www.tristanthrush.com/;http://www.win.tue.nl/~jvanscho/;https://www.ihmc.us/groups/margaret-warren/;http://www.adinawilliams.com;http://ai.stanford.edu/~syyeung/;;;;;;http://www.codycoleman.com/;;;https://scholar.harvard.edu/vijay-janapa-reddi", "dblp": ";249/5648;212/8935;185/0781;;;;;;284/9434;;209/9629.html;136/9140;;;;;;42/6100;;131/6638.html;;227/3290;298/7563;https://dblp.org/pers/hd/g/Ghorbani:Amirata;;;138/0264;;;https://dblp.uni-trier.de/pers/hd/s/Sculley:D=;;;259/1796;85/5045;;199/2104;147/5023;53/7913.html;;97/919;72/8399;26/9655;https://dblp.uni-trier.de/pers/hd/c/Coleman:Cody;;;88/2610", "google_scholar": ";zsyfpmUAAAAJ;;Uv7RWgkAAAAJ;1_rJ_PwAAAAJ;;;;;Fha8ldEAAAAJ;;OvNdXjsAAAAJ;Q0piorUAAAAJ;;jLyty0sAAAAJ;XC9DJhUAAAAJ;;;https://scholar.google.nl/citations?user=FXGgl5IAAAAJ;;;;jPSWYn4AAAAJ;;BtgIFycAAAAJ;Fy1QH_AAAAAJ;;https://scholar.google.nl/citations?user=mEi2gvgAAAAJ;;;l_O64B8AAAAJ;i305250AAAAJ;;qDDmq54AAAAJ;HhDsD9UAAAAJ;https://scholar.google.com/citations?hl=en;MUtbKt0AAAAJ;Tw2m5kUAAAAJ;w-y4MOcAAAAJ;;;;S1szbyAAAAAJ;https://scholar.google.com/citations?hl=en;;kkR3UOkAAAAJ;https://scholar.google.com/citations?view_op=search_authors", "orcid": ";;;0000-0002-6462-3579;;;;;;0000-0002-7419-5993;;;;;;0009-0009-0812-6330;;;0000-0001-9402-1133;;;;0009-0007-3301-7895;;;;;0000-0003-4691-6586;;0000-0002-4451-8042;;0000-0002-1504-7640;;;0000-0001-7044-9805;0000-0002-6680-2431;0000-0001-5281-3343;0000-0003-0529-0628;;;;;;;;;0000-0002-5259-7721", "linkedin": ";colby-banbury-267956135/;;bojankarlas;williamgaviria/;sudnya/;;;;hannah-rose-kirk;;;;david-fernando-jurado-blanco/;kanterd/;rafael-mosquera/;;;laroyo/;;;;maxbartolo/;;amirata-ghorbani-68438765;emmett-goodman/;;oana-inel-7110995a/;;;d-sculley-90467310/;;;;;margaret-warrenis/;;;;;;;;;;;vijay-janapa-reddi-63a6a173/", "or_profile": "~Mark_Mazumder1;~Colby_Banbury1;~Xiaozhe_Yao1;~Bojan_Karla\u01611;~William_A_Gaviria_Rojas1;~Sudnya_Diamos1;gregory.diamos@gmail.com;lynnhe@berkeley.edu;~Alicia_Parrish1;~Hannah_Rose_Kirk1;jquaye@g.harvard.edu;~Charvi_Rastogi1;~Douwe_Kiela1;~David_Jurado1;~David_Kanter2;~Rafael_Mosquera1;wcukierski@google.com;juanciro@mlcommons.org;~Lora_Aroyo1;acun@meta.com;~Lingjiao_Chen1;mehul@coactive.ai;~Max_Bartolo1;~Sabri_Eyuboglu1;~Amirata_Ghorbani2;~Emmett_Daniel_Goodman1;addison@kaggle.com;~Oana_Inel2;tariqkane@google.com;~Christine_Kirkpatrick1;~D._Sculley1;~Tzu-Sheng_Kuo1;jonas@cleanlab.ai;~Tristan_Thrush1;~Joaquin_Vanschoren1;~Margaret_Warren1;~Adina_Williams1;~Serena_Yeung1;~Newsha_Ardalani1;~Praveen_Paritosh2;~Ce_Zhang1;~James_Y._Zou1;~Carole-Jean_Wu2;~Cody_Coleman1;ng@deeplearning.ai;~Peter_Mattson1;~Vijay_Janapa_Reddi1", "aff": "Harvard University;Harvard University;Department of Computer Science, ETHZ - ETH Zurich;Harvard Medical School;Coactive Systems Inc.;;;;;Alan Turing Institute;;Carnegie Mellon University;Stanford University;MLCommons;Real World Insights;Universidad de Los Andes;;;Google;;Stanford University;;University College London;Stanford University;Stanford University;;;University of Zurich;;San Diego Supercomputer Center, UC San Diego;;Carnegie Mellon University;;Hugging Face;Eindhoven University of Technology;The Institute for Human & Machine Cognition;FAIR (Meta Platforms Inc.);Stanford University;Meta AI;Google;University of Chicago;;Meta;Stanford University;;Google;Harvard University", "aff_domain": "harvard.edu;harvard.edu;inf.ethz.ch;hms.harvard.edu;coactive.ai;;;;;turing.ac.uk;;cmu.edu;stanford.edu;mlcommons.org;realworldtech.com;uniandes.edu.co;;;google.com;;stanford.edu;;ucl.ac.uk;stanford.edu;stanford.edu;;;uzh.ch;;sdsc.edu;;cmu.edu;;huggingface.co;tue.nl;ihmc.us;facebook.com;stanford.edu;meta.com;research.google.com;uchicago.edu;;meta.com;stanford.edu;;google.com;harvard.edu", "position": "PhD student;PhD student;PhD student;Postdoc;Researcher;;;;;Researcher;;PhD student;Adjunct Professor;Machine Learning Engineer;President;MS student;;;Researcher;;PhD student;;PhD student;PhD student;PhD student;;;Postdoc;;Researcher;;PhD student;;Researcher;Associate Professor;Researcher;Research Scientist;Assistant Professor;Researcher;Senior Research Scientist;Associate Professor;;Researcher;PhD student;;Google Engineer;Associate Professor", "bibtex": "@inproceedings{\nmazumder2023dataperf,\ntitle={DataPerf: Benchmarks for Data-Centric {AI} Development},\nauthor={Mark Mazumder and Colby Banbury and Xiaozhe Yao and Bojan Karla{\\v{s}} and William A Gaviria Rojas and Sudnya Diamos and Greg Diamos and Lynn He and Alicia Parrish and Hannah Rose Kirk and Jessica Quaye and Charvi Rastogi and Douwe Kiela and David Jurado and David Kanter and Rafael Mosquera and Will Cukierski and Juan Ciro and Lora Aroyo and Bilge Acun and Lingjiao Chen and Mehul Smriti Raje and Max Bartolo and Sabri Eyuboglu and Amirata Ghorbani and Emmett Daniel Goodman and Addison Howard and Oana Inel and Tariq Kane and Christine Kirkpatrick and D. Sculley and Tzu-Sheng Kuo and Jonas Mueller and Tristan Thrush and Joaquin Vanschoren and Margaret Warren and Adina Williams and Serena Yeung and Newsha Ardalani and Praveen Paritosh and Ce Zhang and James Y. Zou and Carole-Jean Wu and Cody Coleman and Andrew Ng and Peter Mattson and Vijay Janapa Reddi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=LaFKTgrZMG}\n}", "github": "", "project": "", "reviewers": "RDrL;qPz4;sXMf;KK87", "pdf_size": 1920856, "rating": "6;6;8;8", "confidence": "5;5;4;4", "wc_summary_and_contributions": "55;39;54;168", "wc_strengths": "43;59;54;84", "wc_improvement": "200;518;158;323", "wc_limitations": "1;13;14;1", "wc_correctness": "1;6;17;1", "wc_clarity": "1;58;1;55", "wc_relation_to_prior_work": "18;15;1;3", "wc_documentation": "25;25;21;2", "wc_additional_feedback": "1;1;1;1", "wc_review": "345;734;321;638", "wc_reply_reviewers": "56;498;0;0", "wc_reply_authors": "442;1504;282;562", "reply_reviewers": "1;2;0;0", "reply_authors": "2;3;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 79.0, 51.77354536826699 ], "wc_strengths_avg": [ 60.0, 15.016657417681207 ], "wc_improvement_avg": [ 299.75, 139.83628820874787 ], "wc_limitations_avg": [ 7.25, 6.2599920127744575 ], "wc_correctness_avg": [ 6.25, 6.53356717268599 ], "wc_clarity_avg": [ 28.75, 27.770262872360426 ], "wc_relation_to_prior_work_avg": [ 9.25, 7.361215932167728 ], "wc_documentation_avg": [ 18.25, 9.522998477370455 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 509.5, 179.93401568352772 ], "wc_reply_reviewers_avg": [ 138.5, 208.8127151300897 ], "wc_reply_authors_avg": [ 697.5, 476.1100187981765 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 47, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 147, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15223786853090276431&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "harvard.edu;harvard.edu;inf.ethz.ch;hms.harvard.edu;coactive.ai;;;;;turing.ac.uk;;cmu.edu;stanford.edu;mlcommons.org;realworldtech.com;uniandes.edu.co;;;google.com;;stanford.edu;;ucl.ac.uk;stanford.edu;stanford.edu;;;uzh.ch;;sdsc.edu;;cmu.edu;;huggingface.co;tue.nl;ihmc.us;facebook.com;stanford.edu;meta.com;research.google.com;uchicago.edu;;meta.com;stanford.edu;;google.com;harvard.edu", "author_num": 47, "aff_unique_index": "0;0;1;0;2;3;4;5;6;7;8;9;5;10;5;5;11;12;4;13;14;15;16;5;16;9;17;16;5;9;0", "aff_unique_norm": "Harvard University;ETH Zurich;Coactive Systems;Alan Turing Institute;Carnegie Mellon University;Stanford University;MLCommons;Real World Insights;Universidad de los Andes;Google;University College London;University of Zurich;University of California, San Diego;Hugging Face;Eindhoven University of Technology;Institute for Human & Machine Cognition;Meta;University of Chicago", "aff_unique_dep": ";Department of Computer Science;;;;;;;;Google;;;San Diego Supercomputer Center;;;;FAIR;", "aff_unique_url": "https://www.harvard.edu;https://www.ethz.ch;;https://www.turing.ac.uk;https://www.cmu.edu;https://www.stanford.edu;https://mlcommons.org;;https://www.udea.edu.co;https://www.google.com;https://www.ucl.ac.uk;https://www.unizh.ch;https://ucsd.edu;https://huggingface.co;https://www.tue.nl;https://www.ihmc.us;https://www.meta.com;https://www.uchicago.edu", "aff_unique_abbr": "Harvard;ETHZ;CSI;ATI;CMU;Stanford;MLCommons;;UdeA;Google;UCL;UZH;UCSD;Hugging Face;TU/e;IHMC;Meta;UChicago", "aff_campus_unique_index": "1;2;3;4;3;3;3;5;3;4;3;4", "aff_campus_unique": ";Zurich;Boston;Stanford;Mountain View;San Diego", "aff_country_unique_index": "0;0;1;0;0;2;0;0;0;4;0;0;2;0;0;1;0;0;0;5;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;Switzerland;United Kingdom;;Colombia;Netherlands" }, { "title": "Residual Q-Learning: Offline and Online Policy Customization without Value", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71952", "id": "LaNeRwDrTk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c2e4cebba2fdb3dac7d2022421062765-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LaNeRwDrTk", "openreview": "https://openreview.net/forum?id=LaNeRwDrTk", "poster": "/media/PosterPDFs/NeurIPS%202023/71952.png?t=1701586393.7720833", "slides": "https://nips.cc/virtual/2023/poster/71952", "video": "https://nips.cc/virtual/2023/poster/71952", "author_site": "Chenran Li, Chen Tang, Haruki Nishimura, Jean Mercat, Masayoshi TOMIZUKA, Wei Zhan", "tldr": "", "abstract": "Imitation Learning (IL) is a widely used framework for learning imitative behavior from demonstrations. It is especially appealing for solving complex real-world tasks where handcrafting reward function is difficult, or when the goal is to mimic human expert behavior. However, the learned imitative policy can only follow the behavior in the demonstration. When applying the imitative policy, we may need to customize the policy behavior to meet different requirements coming from diverse downstream tasks. Meanwhile, we still want the customized policy to maintain its imitative nature. To this end, we formulate a new problem setting called policy customization. It defines the learning task as training a policy that inherits the characteristics of the prior policy while satisfying some additional requirements imposed by a target downstream task. We propose a novel and principled approach to interpret and determine the trade-off between the two task objectives. Specifically, we formulate the customization problem as a Markov Decision Process (MDP) with a reward function that combines 1) the inherent reward of the demonstration; and 2) the add-on reward specified by the downstream task. We propose a novel framework, Residual Q-learning, which can solve the formulated MDP by leveraging the prior policy without knowing the inherent reward or value function of the prior policy. We derive a family of residual Q-learning algorithms that can realize offline and online policy customization, and show that the proposed algorithms can effectively accomplish policy customization tasks in various environments. Demo videos and code are available on our website: https://sites.google.com/view/residualq-learning.", "keywords": "reinforcement learning;imitation learning", "primary_area": "", "supplementary_material": "/attachment/c8806d09f31f0c8e2db0fd1391e1fd4e14828fab.pdf", "author": "Chenran Li;Chen Tang;Haruki Nishimura;Jean Mercat;Masayoshi Tomizuka;Wei Zhan", "authorids": "~Chenran_Li1;~Chen_Tang2;~Haruki_Nishimura2;~Jean_Mercat1;~Masayoshi_Tomizuka1;~Wei_Zhan2", "gender": ";M;;M;M;", "homepage": ";https://chentangmark.github.io;;http://jean-mercat.netlify.app;https://me.berkeley.edu/people/masayoshi-tomizuka/;", "dblp": ";71/7642;;248/2886;10/4434;", "google_scholar": ";x78TL58AAAAJ;;https://scholar.google.com/citations?hl=fr;;", "orcid": ";;;0000-0002-4012-9082;;", "linkedin": ";chen-tang-08377b5b/;;;;", "or_profile": "~Chenran_Li1;~Chen_Tang2;~Haruki_Nishimura2;~Jean_Mercat1;~Masayoshi_Tomizuka1;~Wei_Zhan2", "aff": ";University of California, Berkeley;;Toyota Research Institute;University of California, Berkeley;", "aff_domain": ";berkeley.edu;;tri.global;berkeley.edu;", "position": ";Postdoc;;Researcher;Full Professor;", "bibtex": "@inproceedings{\nli2023residual,\ntitle={Residual Q-Learning: Offline and Online Policy Customization without Value},\nauthor={Chenran Li and Chen Tang and Haruki Nishimura and Jean Mercat and Masayoshi Tomizuka and Wei Zhan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LaNeRwDrTk}\n}", "github": "", "project": "", "reviewers": "sftZ;2HY5;6WQ5;QjWD;ePZC", "pdf_size": 1952922, "rating": "4;4;5;7;8", "confidence": "5;4;3;4;4", "soundness": "2;3;2;3;3", "novelty": "2;2;2;3;3", "presentation": "3;3;2;4;4", "wc_summary": "135;92;73;184;126", "wc_strengths": "124;60;45;132;116", "wc_weaknesses": "258;99;108;145;27", "wc_questions": "5;80;70;145;81", "wc_limitations": "5;123;29;16;1", "wc_review": "527;454;325;622;351", "wc_reply_reviewers": "27;146;21;246;14", "wc_reply_authors": "42;656;28;440;0", "reply_reviewers": "1;1;1;3;1", "reply_authors": "2;3;2;3;1", "rating_avg": [ 5.6, 1.624807680927192 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 122.0, 38.28837943815329 ], "wc_strengths_avg": [ 95.4, 35.70770225035489 ], "wc_weaknesses_avg": [ 127.4, 75.66928042475361 ], "wc_questions_avg": [ 76.2, 44.4495219321873 ], "wc_limitations_avg": [ 34.8, 45.15927368769343 ], "wc_review_avg": [ 455.8, 110.26041900881748 ], "wc_reply_reviewers_avg": [ 90.8, 91.62401431939118 ], "wc_reply_authors_avg": [ 233.2, 266.2978783242555 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.19462473604038075, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3623565348316490294&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";berkeley.edu;;tri.global;berkeley.edu;", "author_num": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Berkeley;Toyota Research Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.tri.global", "aff_unique_abbr": "UC Berkeley;TRI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "ODE-based Recurrent Model-free Reinforcement Learning for POMDPs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71950", "id": "LdvVd0bNyO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cf70320e93c08b39b1b29a348097a376-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LdvVd0bNyO", "openreview": "https://openreview.net/forum?id=LdvVd0bNyO", "poster": "/media/PosterPDFs/NeurIPS%202023/71950.png?t=1701834573.893612", "slides": "https://nips.cc/virtual/2023/poster/71950", "video": "https://nips.cc/virtual/2023/poster/71950", "author_site": "Xuanle Zhao, Duzhen Zhang, Han Liyuan, Tielin Zhang, Bo Xu", "tldr": "", "abstract": "Neural ordinary differential equations (ODEs) are widely recognized as the standard for modeling physical mechanisms, which help to perform approximate inference in unknown physical or biological environments. In partially observable (PO) environments, how to infer unseen information from raw observations puzzled the agents. By using a recurrent policy with a compact context, context-based reinforcement learning provides a flexible way to extract unobservable information from historical transitions. To help the agent extract more dynamics-related information, we present a novel ODE-based recurrent model combines with model-free reinforcement learning (RL) framework to solve partially observable Markov decision processes (POMDPs). We experimentally demonstrate the efficacy of our methods across various PO continuous control and meta-RL tasks. Furthermore, our experiments illustrate that our method is robust against irregular observations, owing to the ability of ODEs to model irregularly-sampled time series.", "keywords": "neural ode;POMDPs;reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/bf0151df8bdb16d6c8c9b19238427ff676fcf6bd.pdf", "author": "Xuanle Zhao;Duzhen Zhang;Liyuan Han;Tielin Zhang;Bo XU", "authorids": "~Xuanle_Zhao1;~Duzhen_Zhang1;~Liyuan_Han2;~Tielin_Zhang1;~Bo_XU10", "gender": "M;M;M;M;M", "homepage": ";https://bladedancer957.github.io/;;http://140.143.150.151/publication.html;", "dblp": "357/5625;235/0398.html;221/5564;145/7761;", "google_scholar": ";o0jlAfwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;87bZY8YAAAAJ;", "orcid": ";0000-0002-4280-431X;0000-0003-3535-4258;0000-0002-5111-9891;", "linkedin": ";;;;%E6%B3%A2-%E5%BE%90-74210b115/?midToken=AQH1EMB1ZoboJA&midSig=2Q5MzMXmNEH9M1&trk=eml-email_pymk_02-header-22-profile&trkEmail=eml-email_pymk_02-header-22-profile-null-7ydrhe~kpggjoav~k9-null-neptune/profile~vanity.view", "or_profile": "~Xuanle_Zhao1;~Duzhen_Zhang1;~Liyuan_Han2;~Tielin_Zhang1;~Bo_XU10", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn", "position": "PhD student;PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhao2023odebased,\ntitle={{ODE}-based Recurrent Model-free Reinforcement Learning for {POMDP}s},\nauthor={Xuanle Zhao and Duzhen Zhang and Liyuan Han and Tielin Zhang and Bo XU},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LdvVd0bNyO}\n}", "github": "", "project": "", "reviewers": "kjqZ;BSsg;6E9a;FbJ4", "pdf_size": 4792839, "rating": "5;6;6;6", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "3;2;1;3", "wc_summary": "100;77;144;46", "wc_strengths": "27;159;44;122", "wc_weaknesses": "161;96;137;522", "wc_questions": "74;201;170;60", "wc_limitations": "10;63;41;50", "wc_review": "372;596;536;800", "wc_reply_reviewers": "219;34;176;37", "wc_reply_authors": "505;15;358;215", "reply_reviewers": "2;1;3;1", "reply_authors": "3;2;4;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 91.75, 35.73776014245996 ], "wc_strengths_avg": [ 88.0, 54.43803817185186 ], "wc_weaknesses_avg": [ 229.0, 170.75274521951323 ], "wc_questions_avg": [ 126.25, 60.45814668016214 ], "wc_limitations_avg": [ 41.0, 19.53202498462461 ], "wc_review_avg": [ 576.0, 153.12739794040777 ], "wc_reply_reviewers_avg": [ 116.5, 82.42117446384758 ], "wc_reply_authors_avg": [ 273.25, 180.95355066977822 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14289851336785891393&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "FLAIR : a Country-Scale Land Cover Semantic Segmentation Dataset From Multi-Source Optical Imagery", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73621", "id": "LegGqdch92", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/353ca88f722cdd0c481b999428ae113a-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=LegGqdch92", "openreview": "https://openreview.net/forum?id=LegGqdch92", "poster": "/media/PosterPDFs/NeurIPS%202023/73621.png?t=1699528363.252194", "slides": "https://nips.cc/virtual/2023/poster/73621", "video": "https://nips.cc/virtual/2023/poster/73621", "author_site": "Anatol Garioud, Nicolas Gonthier, Loic Landrieu, Apolline De Wit, Marion Valette, Marc Poup\u00e9e, Sebastien Giordano, boris Wattrelos", "tldr": "", "abstract": "We introduce the French Land cover from Aerospace ImageRy (FLAIR), an extensive dataset from the French National Institute of Geographical and Forest Information (IGN) that provides a unique and rich resource for large-scale geospatial analysis. FLAIR contains high-resolution aerial imagery with a ground sample distance of 20 cm and over 20 billion individually labeled pixels for precise land-cover classification. The dataset also integrates temporal and spectral data from optical satellite time series. \n\nFLAIR thus combines data with varying spatial, spectral, and temporal resolutions across over 817 km\u00b2 of acquisitions representing the full landscape diversity of France. This diversity makes FLAIR a valuable resource for the development and evaluation of novel methods for large-scale land-cover semantic segmentation and raises significant challenges in terms of computer vision, data fusion, and geospatial analysis. We also provide powerful uni- and multi-sensor baseline models that can be employed to assess algorithm's performance and for downstream applications.", "keywords": "Semantic segmentation;Satellite time series;Aerial imagery;Land cover;Geospatial Data;Multimodal learning", "primary_area": "", "supplementary_material": "/attachment/f88ca68157a3cf7d392dd20eb03aa18ea8816450.pdf", "author": "Anatol Garioud;Nicolas Gonthier;Loic Landrieu;Apolline De Wit;Marion Valette;Marc Poup\u00e9e;Sebastien Giordano;Boris Wattrelos", "authorids": "~Anatol_Garioud1;~Nicolas_Gonthier1;~Loic_Landrieu1;~Apolline_De_Wit1;~Marion_Valette2;~Marc_Poup\u00e9e1;~Sebastien_Giordano1;~Boris_Wattrelos2", "gender": ";;M;F;;M;M;M", "homepage": ";https://ngonthier.github.io/;https://loiclandrieu.com/;https://ignf.github.io/FLAIR/;;;;", "dblp": ";228/6976;165/7958;;;;;", "google_scholar": ";y6a-lk0AAAAJ;B9VnFRcAAAAJ;;;;https://scholar.google.fr/citations?user=IOPxYVgAAAAJ;", "orcid": ";0000-0002-9236-5394;0000-0002-7738-8141;;;;;", "linkedin": ";;;apolline-de-wit/fr?;marion-valette11/;marc-poup%C3%A9e-4a414628;;wattrelos-boris-920091112", "or_profile": "~Anatol_Garioud1;~Nicolas_Gonthier1;~Loic_Landrieu1;~Apolline_De_Wit1;~Marion_Valette2;~Marc_Poup\u00e9e1;~Sebastien_Giordano1;~Boris_Wattrelos2", "aff": ";IGN;IGN;Institut national de l'information g\u00e9ographique et foresti\u00e8re;Institut national de l'information g\u00e9ographique et foresti\u00e8re;Ecole Nationale des Sciences G\u00e9ographiques;;Institut national de l'information g\u00e9ographique et foresti\u00e8re", "aff_domain": ";ign.fr;ign.fr;ign.fr;ign.fr;ensg.ign.fr;;ign.fr", "position": ";Researcher;Assistant Professor;Engineer;Engineer;Lecturer;;Researcher", "bibtex": "@inproceedings{\ngarioud2023flair,\ntitle={{FLAIR} : a Country-Scale Land Cover Semantic Segmentation Dataset From Multi-Source Optical Imagery},\nauthor={Anatol Garioud and Nicolas Gonthier and Loic Landrieu and Apolline De Wit and Marion Valette and Marc Poup{\\'e}e and Sebastien Giordano and Boris Wattrelos},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=LegGqdch92}\n}", "github": "", "project": "", "reviewers": "uZv1;j7rF;pqJp;g3Jt;GDnf", "pdf_size": 27999607, "rating": "1;5;7;7;9", "confidence": "5;4;4;4;4", "wc_summary_and_contributions": "41;91;44;74;90", "wc_strengths": "136;43;37;25;29", "wc_improvement": "239;315;60;55;211", "wc_limitations": "81;21;20;16;102", "wc_correctness": "97;26;14;18;146", "wc_clarity": "1;1;9;15;244", "wc_relation_to_prior_work": "18;1;9;19;37", "wc_documentation": "63;1;14;59;5", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "677;500;208;282;865", "wc_reply_reviewers": "0;464;0;0;143", "wc_reply_authors": "1132;1939;247;293;1382", "reply_reviewers": "0;2;0;0;1", "reply_authors": "4;4;1;1;3", "rating_avg": [ 5.8, 2.7129319932501077 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 68.0, 21.69792616818483 ], "wc_strengths_avg": [ 54.0, 41.47288270665544 ], "wc_improvement_avg": [ 176.0, 102.5787502360991 ], "wc_limitations_avg": [ 48.0, 36.17181222996714 ], "wc_correctness_avg": [ 60.2, 52.53722489816149 ], "wc_clarity_avg": [ 54.0, 95.14620328736191 ], "wc_relation_to_prior_work_avg": [ 16.8, 12.039933554633928 ], "wc_documentation_avg": [ 28.4, 26.9785099662676 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 506.4, 243.77743948117924 ], "wc_reply_reviewers_avg": [ 121.4, 180.03066405476596 ], "wc_reply_authors_avg": [ 998.6, 649.9103322766917 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 2.6, 1.3564659966250538 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.8846517369293828, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12754102141094281521&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "email": ";ign.fr;ign.fr;ign.fr;ign.fr;ensg.ign.fr;;ign.fr", "author_num": 8, "aff_unique_index": "0;0;1;1;2;1", "aff_unique_norm": "IGN Entertainment;Institut national de l'information g\u00e9ographique et foresti\u00e8re;Ecole Nationale des Sciences G\u00e9ographiques", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ign.com;https://www.ign.fr;https://www.ensg.eu", "aff_unique_abbr": "IGN;IGN;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;1;1", "aff_country_unique": "United States;France" }, { "title": "On Dynamic Programming Decompositions of Static Risk Measures in Markov Decision Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71949", "id": "LelK6Mfoey", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a264726ebd222124514a32bf0143b83d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LelK6Mfoey", "openreview": "https://openreview.net/forum?id=LelK6Mfoey", "poster": "/media/PosterPDFs/NeurIPS%202023/71949.png?t=1702501410.835331", "slides": "https://nips.cc/virtual/2023/poster/71949", "video": "https://nips.cc/virtual/2023/poster/71949", "author_site": "Jia Lin Hau, Erick Delage, Mohammad Ghavamzadeh, Marek Petrik", "tldr": "", "abstract": "Optimizing static risk-averse objectives in Markov decision processes is difficult because they do not admit standard dynamic programming equations common in Reinforcement Learning (RL) algorithms. Dynamic programming decompositions that augment the state space with discrete risk levels have recently gained popularity in the RL community. Prior work has shown that these decompositions are optimal when the risk level is discretized sufficiently. However, we show that these popular decompositions for Conditional-Value-at-Risk (CVaR) and Entropic-Value-at-Risk (EVaR) are inherently suboptimal regardless of the discretization level. In particular, we show that a saddle point property assumed to hold in prior literature may be violated. However, a decomposition does hold for Value-at-Risk and our proof demonstrates how this risk measure differs from CVaR and EVaR. Our findings are significant because risk-averse algorithms are used in high-stake environments, making their correctness much more critical.", "keywords": "reinforcement learning;markov decision processes;monetary risk measures", "primary_area": "", "supplementary_material": "/attachment/9c8b3ff96cd184360361f4ca06be91e5f39fdbb3.pdf", "author": "Jia Lin Hau;Erick Delage;Mohammad Ghavamzadeh;Marek Petrik", "authorids": "~Jia_Lin_Hau1;~Erick_Delage2;~Mohammad_Ghavamzadeh2;~Marek_Petrik2", "gender": "M;M;;", "homepage": ";http://web.hec.ca/pages/erick.delage/;;", "dblp": "329/5798;26/1546;;", "google_scholar": "ygX6pZ0AAAAJ;https://scholar.google.ca/citations?user=ciH2ROgAAAAJ;;", "orcid": ";0000-0002-6740-3600;;", "linkedin": "jia-lin-hau-b61730129/;erick-delage-2105361/?originalSubdomain=ca;;", "or_profile": "~Jia_Lin_Hau1;~Erick_Delage2;~Mohammad_Ghavamzadeh2;~Marek_Petrik2", "aff": "University of New Hampshire;Computer Science Department;;", "aff_domain": "unh.edu;cs.stanford.edu;;", "position": "PhD student;Researcher;;", "bibtex": "@inproceedings{\nhau2023on,\ntitle={On Dynamic Programming Decompositions of Static Risk Measures in Markov Decision Processes},\nauthor={Jia Lin Hau and Erick Delage and Mohammad Ghavamzadeh and Marek Petrik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LelK6Mfoey}\n}", "github": "", "project": "", "reviewers": "bRij;tkg9;eWnC;Gp5S;Vzn6", "pdf_size": 942851, "rating": "3;6;6;7;7", "confidence": "3;4;1;3;3", "soundness": "2;3;3;4;4", "novelty": "2;3;3;3;4", "presentation": "2;3;2;4;3", "wc_summary": "31;35;109;106;197", "wc_strengths": "17;17;73;163;152", "wc_weaknesses": "225;177;38;216;460", "wc_questions": "47;21;1;40;199", "wc_limitations": "1;11;1;8;103", "wc_review": "321;261;222;533;1111", "wc_reply_reviewers": "0;34;16;35;44", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 95.6, 60.688054837834436 ], "wc_strengths_avg": [ 84.4, 63.18734050424975 ], "wc_weaknesses_avg": [ 223.2, 136.06086873160848 ], "wc_questions_avg": [ 61.6, 70.53963424912268 ], "wc_limitations_avg": [ 24.8, 39.29580130242925 ], "wc_review_avg": [ 489.6, 328.72943281671627 ], "wc_reply_reviewers_avg": [ 25.8, 15.778466338652816 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.0277777777777778, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3795656407320923705&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "unh.edu;cs.stanford.edu;;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of New Hampshire;Computer Science Department", "aff_unique_dep": ";Computer Science", "aff_unique_url": "https://www.unh.edu;", "aff_unique_abbr": "UNH;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "United States;" }, { "title": "Hybrid Policy Optimization from Imperfect Demonstrations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71948", "id": "LftAvFt54C", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0f0a30c7b46be23a83317c5cb721fc43-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LftAvFt54C", "openreview": "https://openreview.net/forum?id=LftAvFt54C", "poster": "/media/PosterPDFs/NeurIPS%202023/71948.png?t=1697439729.1462476", "slides": "https://nips.cc/virtual/2023/poster/71948", "video": "https://nips.cc/virtual/2023/poster/71948", "author_site": "Hanlin Yang, Chao Yu, peng sun, Siji Chen", "tldr": "", "abstract": "Exploration is one of the main challenges in Reinforcement Learning (RL), especially in environments with sparse rewards. Learning from Demonstrations (LfD) is a promising approach to solving this problem by leveraging expert demonstrations. However, expert demonstrations of high quality are usually costly or even impossible to collect in real-world applications. In this work, we propose a novel RL algorithm called HYbrid Policy Optimization (HYPO), which uses a small number of imperfect demonstrations to accelerate an agent's online learning process. The key idea is to train an offline guider policy using imitation learning in order to instruct an online agent policy to explore efficiently. Through mutual update of the guider policy and the agent policy, the agent can leverage suboptimal demonstrations for efficient exploration while avoiding the conservative policy caused by imperfect demonstrations. Empirical results show that HYPO significantly outperforms several baselines in various challenging tasks, such as MuJoCo with sparse rewards, Google Research Football, and the AirSim drone simulation.", "keywords": "reinforcement learning;sparse reward;exploration;learning from demonstrations", "primary_area": "", "supplementary_material": "/attachment/46398fd874febd616d871786d4aaeaa1ada698cf.zip", "author": "Hanlin Yang;Chao Yu;peng sun;Siji Chen", "authorids": "~Hanlin_Yang1;~Chao_Yu2;~peng_sun1;~Siji_Chen1", "gender": "M;M;M;M", "homepage": "https://joenghl.github.io;https://cse.sysu.edu.cn/teacher/YuChao;http://pengsun.github.io;https://chensiji.github.io/", "dblp": ";36/6789-4;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Hanlin_Yang1;~Chao_Yu2;~peng_sun1;~Siji_Chen1", "aff": "Sun Yat-sen University;SUN YAT-SEN UNIVERSITY;ByteDance;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;mail.sysu.edu.cn;bytedance.com;sysu.edu.cn", "position": "MS student;Associate Professor;Researcher;MS student", "bibtex": "@inproceedings{\nyang2023hybrid,\ntitle={Hybrid Policy Optimization from Imperfect Demonstrations},\nauthor={Hanlin Yang and Chao Yu and peng sun and Siji Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LftAvFt54C}\n}", "github": "", "project": "", "reviewers": "5BQ6;74Fn;8Gvz;2APk;8vGE", "pdf_size": 2780550, "rating": "5;5;6;7;7", "confidence": "4;4;3;4;4", "soundness": "3;2;3;3;3", "novelty": "3;3;3;2;3", "presentation": "2;3;2;3;3", "wc_summary": "65;118;91;85;142", "wc_strengths": "29;86;86;26;50", "wc_weaknesses": "77;206;84;385;173", "wc_questions": "5;144;285;2;24", "wc_limitations": "1;1;1;15;1", "wc_review": "177;555;547;513;390", "wc_reply_reviewers": "27;0;94;133;11", "wc_reply_authors": "0;0;0;123;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 100.2, 26.90278795961489 ], "wc_strengths_avg": [ 55.4, 26.31805463935357 ], "wc_weaknesses_avg": [ 185.0, 111.75866856758807 ], "wc_questions_avg": [ 92.0, 109.76884803986968 ], "wc_limitations_avg": [ 3.8, 5.6 ], "wc_review_avg": [ 436.4, 142.55328828196141 ], "wc_reply_reviewers_avg": [ 53.0, 51.633322573702344 ], "wc_reply_authors_avg": [ 24.6, 49.2 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3769890295712761895&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "sysu.edu.cn;mail.sysu.edu.cn;bytedance.com;sysu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Sun Yat-sen University;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "http://www.sysu.edu.cn/;https://www.bytedance.com", "aff_unique_abbr": "SYSU;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Deep Stochastic Processes via Functional Markov Transition Operators", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71947", "id": "Lg1ODJGGiI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7749f9c0d5ff109231be21e910a3ced2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Lg1ODJGGiI", "openreview": "https://openreview.net/forum?id=Lg1ODJGGiI", "poster": "/media/PosterPDFs/NeurIPS%202023/71947.png?t=1701879730.7449641", "slides": "https://nips.cc/virtual/2023/poster/71947", "video": "https://nips.cc/virtual/2023/poster/71947", "author_site": "Jin Xu, Emilien Dupont, Kaspar M\u00e4rtens, Thomas Rainforth, Yee Whye Teh", "tldr": "", "abstract": "We introduce Markov Neural Processes (MNPs), a new class of Stochastic Processes (SPs) which are constructed by stacking sequences of neural parameterised Markov transition operators in function space. We prove that these Markov transition operators can preserve the exchangeability and consistency of SPs. Therefore, the proposed iterative construction adds substantial flexibility and expressivity to the original framework of Neural Processes (NPs) without compromising consistency or adding restrictions. Our experiments demonstrate clear advantages of MNPs over baseline models on a variety of tasks.", "keywords": "Neural Processes;Bayesian Nonparammetric Models", "primary_area": "", "supplementary_material": "/attachment/56e45763114ab231c2c541b8f83e803edcfb4f56.pdf", "author": "Jin Xu;Emilien Dupont;Kaspar M\u00e4rtens;Tom Rainforth;Yee Whye Teh", "authorids": "~Jin_Xu7;~Emilien_Dupont3;~Kaspar_M\u00e4rtens1;~Tom_Rainforth1;~Yee_Whye_Teh2", "gender": "M;;M;M;M", "homepage": "https://jinxu06.github.io/;https://emiliendupont.github.io/;https://kaspar.website/;http://www.robots.ox.ac.uk/~twgr;http://csml.stats.ox.ac.uk/people/teh/", "dblp": "97/3265-11;;185/8583;166/1198;88/2483", "google_scholar": "b5JQt5QAAAAJ;;HQsaFH4RsGkC;https://scholar.google.co.uk/citations?user=ieLRNKMAAAAJ;https://scholar.google.co.uk/citations?user=y-nUzMwAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Jin_Xu7;~Emilien_Dupont3;~Kaspar_M\u00e4rtens1;~Tom_Rainforth1;~Yee_Whye_Teh1", "aff": "University of Oxford;Google DeepMind;Alan Turing Institute / University of Oxford;;University of Oxford", "aff_domain": "ox.ac.uk;google.com;turing.ac.uk;ox.ac.uk;ox.ac.uk", "position": "PhD student;Researcher;Postdoc;Postdoc;Full Professor", "bibtex": "@inproceedings{\nxu2023deep,\ntitle={Deep Stochastic Processes via Functional Markov Transition Operators},\nauthor={Jin Xu and Emilien Dupont and Kaspar M{\\\"a}rtens and Tom Rainforth and Yee Whye Teh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Lg1ODJGGiI}\n}", "github": "", "project": "", "reviewers": "UK93;wA6N;kRke;nP7k;ipFc", "pdf_size": 1130011, "rating": "6;6;7;8;8", "confidence": "3;3;3;3;4", "soundness": "2;3;3;3;4", "novelty": "3;3;3;3;4", "presentation": "1;2;3;3;4", "wc_summary": "33;44;36;113;101", "wc_strengths": "41;42;18;55;76", "wc_weaknesses": "180;104;7;43;39", "wc_questions": "1;21;194;26;65", "wc_limitations": "2;5;6;1;19", "wc_review": "257;216;261;238;300", "wc_reply_reviewers": "49;65;53;0;10", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 0.8944271909999159 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 65.4, 34.366262525913406 ], "wc_strengths_avg": [ 46.4, 19.001052602421794 ], "wc_weaknesses_avg": [ 74.6, 61.33384057761263 ], "wc_questions_avg": [ 61.4, 69.46826613641656 ], "wc_limitations_avg": [ 6.6, 6.468384651518492 ], "wc_review_avg": [ 254.4, 27.832355272236665 ], "wc_reply_reviewers_avg": [ 35.4, 25.570295266187287 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5590169943749475, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16147689366854355646&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "ox.ac.uk;google.com;turing.ac.uk;ox.ac.uk;ox.ac.uk", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Oxford;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.ox.ac.uk;https://deepmind.com", "aff_unique_abbr": "Oxford;DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Oxford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "AlberDICE: Addressing Out-Of-Distribution Joint Actions in Offline Multi-Agent RL via Alternating Stationary Distribution Correction Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71946", "id": "LhVJdq4cZm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e5b6eb1dbabff82838d5e99f62de37c8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LhVJdq4cZm", "openreview": "https://openreview.net/forum?id=LhVJdq4cZm", "poster": "/media/PosterPDFs/NeurIPS%202023/71946.png?t=1702382910.1781363", "slides": "https://nips.cc/virtual/2023/poster/71946", "video": "https://nips.cc/virtual/2023/poster/71946", "author_site": "Daiki E. Matsunaga, Jongmin Lee, Jaeseok Yoon, Stefanos Leonardos, Pieter Abbeel, Kee-Eung Kim", "tldr": "", "abstract": "One of the main challenges in offline Reinforcement Learning (RL) is the distribution shift that arises from the learned policy deviating from the data collection policy. This is often addressed by avoiding out-of-distribution (OOD) actions during policy improvement as their presence can lead to substantial performance degradation. This challenge is amplified in the offline Multi-Agent RL (MARL) setting since the joint action space grows exponentially with the number of agents.\nTo avoid this curse of dimensionality, existing MARL methods adopt either value decomposition methods or fully decentralized training of individual agents. However, even when combined with standard conservatism principles, these methods can still result in the selection of OOD joint actions in offline MARL. To this end, we introduce AlberDICE,\nan offline MARL algorithm that alternatively performs centralized training of individual agents based on stationary distribution optimization. AlberDICE circumvents the exponential complexity of MARL by computing the best response of one agent at a time while effectively avoiding OOD joint action selection. Theoretically, we show that the alternating optimization procedure converges to Nash policies. In the experiments, we demonstrate that AlberDICE significantly outperforms baseline algorithms on a standard suite of MARL benchmarks.", "keywords": "Offline Reinforcement Learning;Multi-Agent Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Daiki E. Matsunaga;Jongmin Lee;Jaeseok Yoon;Stefanos Leonardos;Pieter Abbeel;Kee-Eung Kim", "authorids": "~Daiki_E._Matsunaga1;~Jongmin_Lee1;~Jaeseok_Yoon1;~Stefanos_Leonardos1;~Pieter_Abbeel2;~Kee-Eung_Kim2", "gender": "M;M;M;M;M;M", "homepage": "https://sites.google.com/view/daikieddymatsunaga;https://www.jmlee.kr;https://yjaeseok.tistory.com/61;https://stefanosleonardos.com/;https://people.eecs.berkeley.edu/~pabbeel/;http://ailab.kaist.ac.kr", "dblp": ";68/222-4.html;161/3799;192/1237;;35/6703", "google_scholar": "ecKHIXEAAAAJ;https://scholar.google.co.kr/citations?user=rFcK8EEAAAAJ;https://scholar.google.co.kr/citations?user=oIPnG_QAAAAJ;PtiGrVsAAAAJ;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;https://scholar.google.com/citations?hl=ko", "orcid": ";;;;;", "linkedin": ";jmlee123/;https://linkedin.com/in/yjaeseok;stefanos-leonardos/;;", "or_profile": "~Daiki_E._Matsunaga1;~Jongmin_Lee1;~Jaeseok_Yoon1;~Stefanos_Leonardos1;~Pieter_Abbeel2;~Kee-Eung_Kim2", "aff": "Korea Advanced Institute of Science & Technology;University of California, Berkeley;Korea Advanced Institute of Science & Technology;King's College London, University of London;Covariant;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;berkeley.edu;kaist.ac.kr;kcl.ac.uk;covariant.ai;kaist.ac.kr", "position": "PhD student;Postdoc;MS student;Lecturer;Founder;Full Professor", "bibtex": "@inproceedings{\nmatsunaga2023alberdice,\ntitle={Alber{DICE}: Addressing Out-Of-Distribution Joint Actions in Offline Multi-Agent {RL} via Alternating Stationary Distribution Correction Estimation},\nauthor={Daiki E. Matsunaga and Jongmin Lee and Jaeseok Yoon and Stefanos Leonardos and Pieter Abbeel and Kee-Eung Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LhVJdq4cZm}\n}", "github": "", "project": "", "reviewers": "5Yz6;aFwQ;82R1;s9Ry;gucm", "pdf_size": 1731002, "rating": "5;5;5;5;7", "confidence": "4;3;4;3;3", "soundness": "2;2;3;3;3", "novelty": "2;2;2;3;3", "presentation": "3;2;3;2;3", "wc_summary": "81;91;59;66;153", "wc_strengths": "121;43;40;25;54", "wc_weaknesses": "151;134;84;73;178", "wc_questions": "5;23;4;210;76", "wc_limitations": "1;20;24;8;26", "wc_review": "359;311;211;382;487", "wc_reply_reviewers": "27;0;0;55;35", "wc_reply_authors": "45;0;0;47;48", "reply_reviewers": "1;0;0;1;1", "reply_authors": "2;1;1;2;2", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 90.0, 33.430524973443056 ], "wc_strengths_avg": [ 56.6, 33.50582038989644 ], "wc_weaknesses_avg": [ 124.0, 39.86477141537375 ], "wc_questions_avg": [ 63.6, 77.74471043099975 ], "wc_limitations_avg": [ 15.8, 9.682974749528164 ], "wc_review_avg": [ 350.0, 90.23968085049947 ], "wc_reply_reviewers_avg": [ 23.4, 21.17167919651155 ], "wc_reply_authors_avg": [ 28.0, 22.882307575941724 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13072075911855003360&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "kaist.ac.kr;berkeley.edu;kaist.ac.kr;kcl.ac.uk;covariant.ai;kaist.ac.kr", "author_num": 6, "aff_unique_index": "0;1;0;2;3;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;University of California, Berkeley;King's College London;Covariant", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.berkeley.edu;https://www.kcl.ac.uk;", "aff_unique_abbr": "KAIST;UC Berkeley;KCL;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;0;2;0", "aff_country_unique": "South Korea;United States;United Kingdom;" }, { "title": "When Can We Track Significant Preference Shifts in Dueling Bandits?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71945", "id": "LjWJLkSpjh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/78ccee9dfbcf84840165ab4093715969-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LjWJLkSpjh", "openreview": "https://openreview.net/forum?id=LjWJLkSpjh", "poster": "/media/PosterPDFs/NeurIPS%202023/71945.png?t=1701661731.9848156", "slides": "https://nips.cc/virtual/2023/poster/71945", "video": "https://nips.cc/virtual/2023/poster/71945", "author_site": "Joe Suk, Arpit Agarwal, Arpit Agarwal", "tldr": "", "abstract": "The $K$-armed dueling bandits problem, where the feedback is in the form of noisy pairwise preferences, has been widely studied due its applications in information retrieval, recommendation systems, etc. Motivated by concerns that user preferences/tastes can evolve over time, we consider the problem of _dueling bandits with distribution shifts_. Specifically, we study the recent notion of _significant shifts_ (Suk and Kpotufe, 2022), and ask whether one can design an _adaptive_ algorithm for the dueling problem with $O(\\sqrt{K\\tilde{L}T})$ dynamic regret,\nwhere $\\tilde{L}$ is the (unknown) number of significant shifts in preferences. We show that the answer to this question depends on the properties of underlying preference distributions. Firstly, we give an impossibility result that rules out any algorithm with $O(\\sqrt{K\\tilde{L}T})$ dynamic regret under the well-studied Condorcet and SST classes of preference distributions. Secondly, we show that $\\text{SST}\\cap \\text{STI}$ is the largest amongst popular classes of preference distributions where it is possible to design such an algorithm. Overall, our results provides an almost complete resolution of the above question for the hierarchy of distribution classes.", "keywords": "non-stationary;multi-armed bandits;dueling bandits;preference-based learning", "primary_area": "", "supplementary_material": "/attachment/5c0ae149913821ec2093cce5901655db09b8fccc.pdf", "author": "Joe Suk;Arpit Agarwal", "authorids": "~Joe_Suk1;~Arpit_Agarwal2", "gender": "Not Specified;", "homepage": "https://www.columbia.edu/~js5338/;", "dblp": "271/0068;", "google_scholar": "https://scholar.google.com/citations?hl=en;", "orcid": ";", "linkedin": ";", "or_profile": "~Joe_Suk1;~Arpit_Agarwal2", "aff": "Columbia University;", "aff_domain": "columbia.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nsuk2023when,\ntitle={When Can We Track Significant Preference Shifts in Dueling Bandits?},\nauthor={Joe Suk and Arpit Agarwal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LjWJLkSpjh}\n}", "github": "", "project": "", "reviewers": "VW3L;6oeV;FDu2;W1nd", "pdf_size": 590729, "rating": "6;6;6;7", "confidence": "3;3;3;3", "soundness": "4;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "64;101;128;71", "wc_strengths": "103;69;111;54", "wc_weaknesses": "58;246;149;75", "wc_questions": "97;36;4;2", "wc_limitations": "6;9;23;8", "wc_review": "328;461;415;210", "wc_reply_reviewers": "9;15;31;9", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 91.0, 25.485289874749316 ], "wc_strengths_avg": [ 84.25, 23.53056522908024 ], "wc_weaknesses_avg": [ 132.0, 74.17883795261287 ], "wc_questions_avg": [ 34.75, 38.38863764188565 ], "wc_limitations_avg": [ 11.5, 6.726812023536855 ], "wc_review_avg": [ 353.5, 95.63080047767038 ], "wc_reply_reviewers_avg": [ 16.0, 9.0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18107183438707491432&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "columbia.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "LkTbLQDo4I", "title": "Optimizing protein fitness using Bi-level Gibbs sampling with Graph-based Smoothing", "track": "main", "status": "Reject", "tldr": "", "abstract": "The ability to design novel proteins with higher fitness on a given task would be revolutionary for many fields of medicine. However, brute-force search through the combinatorially large space of sequences is infeasible. Prior methods constrain search to a small mutational radius from a reference sequence, but such heuristics drastically limit the design space. Our work seeks to remove the restriction on mutational distance while enabling efficient exploration. We propose $\\textbf{Bi}$-level $\\textbf{G}$ibbs sampling with $\\textbf{G}$raph-based $\\textbf{S}$moothing (BiGGS) which uses the gradients of a trained fitness predictor to sample many mutations towards higher fitness. Bi-level Gibbs first samples sequence locations then sequence edits. We introduce graph-based smoothing to remove noisy gradients that lead to false positives. Our method is state-of-the-art in discovering high-fitness proteins with up to 8 mutations from the training set.\nWe study the GFP and AAV design problems, ablations, and baselines to elucidate the results.", "keywords": "protein design;discrete optimization;Gibbs sampling;protein engineering", "primary_area": "", "supplementary_material": "/attachment/c2c7439f0bd39f856b79673c398a893cf870b132.zip", "author": "Andrew Kirjner;Jason Yim;Raman Samusevich;Tommi S. Jaakkola;Regina Barzilay;Ila R Fiete", "authorids": "kirjner@mit.edu;~Jason_Yim1;raman.samusevich@uochb.cas.cz;~Tommi_S._Jaakkola1;~Regina_Barzilay1;~Ila_R_Fiete1", "gender": ";;;;female;F", "homepage": ";http://people.csail.mit.edu/jyim/;;;https://www.regina.csail.mit.edu/;https://fietelab.mit.edu/", "dblp": ";278/7337;;;b/ReginaBarzilay;", "google_scholar": ";8wDe9NAAAAAJ;;;;uE-CihIAAAAJ", "orcid": ";0000-0003-0575-7400;;;;0000-0003-4738-2539", "linkedin": ";;;;;", "or_profile": "kirjner@mit.edu;~Jason_Yim1;raman.samusevich@uochb.cas.cz;~Tommi_S._Jaakkola1;~Regina_Barzilay1;~Ila_R_Fiete1", "aff": ";Massachusetts Institute of Technology;;;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": ";mit.edu;;;mit.edu;mit.edu", "position": ";PhD student;;;Professor;Professor", "bibtex": "@misc{\nkirjner2023optimizing,\ntitle={Optimizing protein fitness using Bi-level Gibbs sampling with Graph-based Smoothing},\nauthor={Andrew Kirjner and Jason Yim and Raman Samusevich and Tommi S. Jaakkola and Regina Barzilay and Ila R Fiete},\nyear={2023},\nurl={https://openreview.net/forum?id=LkTbLQDo4I}\n}", "github": "", "project": "", "reviewers": "FEBk;DKAZ;bM4W;XaWK;edmB;65UE", "site": "https://openreview.net/forum?id=LkTbLQDo4I", "pdf_size": 1987699, "rating": "4;4;5;5;5;6", "confidence": "4;3;2;4;3;1", "soundness": "3;3;3;3;3;3", "novelty": "2;3;2;3;3;3", "presentation": "3;3;3;3;2;3", "wc_summary": "85;87;60;40;32;59", "wc_strengths": "104;136;61;19;36;151", "wc_weaknesses": "164;120;217;71;50;149", "wc_questions": "4;42;70;145;33;28", "wc_limitations": "75;17;1;13;1;46", "wc_review": "432;402;409;288;152;433", "wc_reply_reviewers": "0;0;103;102;62;136", "wc_reply_authors": "0;0;500;372;327;214", "reply_reviewers": "0;0;1;1;1;1", "reply_authors": "1;1;2;2;2;2", "rating_avg": [ 4.833333333333333, 0.6871842709362768 ], "confidence_avg": [ 2.8333333333333335, 1.0671873729054748 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 60.5, 20.56493779875511 ], "wc_strengths_avg": [ 84.5, 49.41238036497871 ], "wc_weaknesses_avg": [ 128.5, 56.352314356495896 ], "wc_questions_avg": [ 53.666666666666664, 45.27201735681276 ], "wc_limitations_avg": [ 25.5, 26.769074196418025 ], "wc_review_avg": [ 352.6666666666667, 102.30944346550919 ], "wc_reply_reviewers_avg": [ 67.16666666666667, 52.097398100950194 ], "wc_reply_authors_avg": [ 235.5, 186.34533354321846 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.47140452079103173 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7196763181246416, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CHhOYEVT8noJ:scholar.google.com/&scioq=Optimizing+protein+fitness+using+Bi-level+Gibbs+sampling+with+Graph-based+Smoothing&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CS-Isolate: Extracting Hard Confident Examples by Content and Style Isolation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71944", "id": "Lkc0KjsDFv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b6d67c380f8bde2adc4247d0036c0c73-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Lkc0KjsDFv", "openreview": "https://openreview.net/forum?id=Lkc0KjsDFv", "poster": "/media/PosterPDFs/NeurIPS%202023/71944.png?t=1699449408.2068598", "slides": "https://nips.cc/virtual/2023/poster/71944", "video": "https://nips.cc/virtual/2023/poster/71944", "author_site": "Yexiong Lin, Yu Yao, Xiaolong Shi, Mingming Gong, Xu Shen, Dong Xu, Tongliang Liu", "tldr": "", "abstract": "Label noise widely exists in large-scale image datasets. To mitigate the side effects of label noise, state-of-the-art methods focus on selecting confident examples by leveraging semi-supervised learning. Existing research shows that the ability to extract hard confident examples, which are close to the decision boundary, significantly influences the generalization ability of the learned classifier.\nIn this paper, we find that a key reason for some hard examples being close to the decision boundary is due to the entanglement of style factors with content factors. The hard examples become more discriminative when we focus solely on content factors, such as semantic information, while ignoring style factors. Nonetheless, given only noisy data, content factors are not directly observed and have to be inferred.\nTo tackle the problem of inferring content factors for classification when learning with noisy labels, our objective is to ensure that the content factors of all examples in the same underlying clean class remain unchanged as their style information changes.\nTo achieve this, we utilize different data augmentation techniques to alter the styles while regularizing content factors based on some confident examples. By training existing methods with our inferred content factors, CS-Isolate proves their effectiveness in learning hard examples on benchmark datasets. The implementation is available at https://github.com/tmllab/2023_NeurIPS_CS-isolate.", "keywords": "learning with label errors", "primary_area": "", "supplementary_material": "/attachment/157555d3a9aefa66db57c6a095912e1ebfc2813d.pdf", "author": "Yexiong Lin;Yu Yao;Xiaolong Shi;Mingming Gong;Xu Shen;Dong Xu;Tongliang Liu", "authorids": "~Yexiong_Lin1;~Yu_Yao3;~Xiaolong_Shi1;~Mingming_Gong1;~Xu_Shen1;~Dong_Xu2;~Tongliang_Liu1", "gender": "M;M;;M;M;Unspecified;M", "homepage": "https://yexionglin.github.io/;https://a5507203.github.io/;;https://mingming-gong.github.io/;;https://www.cs.hku.hk/people/academic-staff/dongxu;https://tongliang-liu.github.io/", "dblp": "287/6488;230/9625;;98/8479;09/10130-1.html;09/3493-1;150/6667", "google_scholar": "OfsQPbwAAAAJ;OkcaMKAAAAAJ;;https://scholar.google.com.au/citations?user=6BmiCJIAAAAJ;38jwGs8AAAAJ;7Hdu5k4AAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ", "orcid": ";;;0000-0001-7147-5589;;;", "linkedin": ";yu-yao-150377134/;;;;;", "or_profile": "~Yexiong_Lin1;~Yu_Yao3;~Xiaolong_Shi1;~Mingming_Gong1;~Xu_Shen1;~Dong_Xu2;~Tongliang_Liu1", "aff": "University of Sydney;University of Sydney;;University of Melbourne;Alibaba Group;University of Hong Kong;University of Sydney", "aff_domain": "usyd.edu.au;uni.sydney.edu.au;;unimelb.edu.au;alibaba-inc.com;hku.hk;sydney.edu.au", "position": "PhD student;PhD student;;Assistant Professor;Researcher;Full Professor;Lecturer", "bibtex": "@inproceedings{\nlin2023csisolate,\ntitle={{CS}-Isolate: Extracting Hard Confident Examples by Content and Style Isolation},\nauthor={Yexiong Lin and Yu Yao and Xiaolong Shi and Mingming Gong and Xu Shen and Dong Xu and Tongliang Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Lkc0KjsDFv}\n}", "github": "", "project": "", "reviewers": "25vx;koUE;NWTZ;fCED;MtkC", "pdf_size": 3415919, "rating": "3;5;6;6;7", "confidence": "4;2;4;4;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;2;3;3;3", "wc_summary": "65;142;103;55;103", "wc_strengths": "39;74;35;90;257", "wc_weaknesses": "389;221;31;93;201", "wc_questions": "128;109;28;121;98", "wc_limitations": "13;11;27;2;135", "wc_review": "634;557;224;361;794", "wc_reply_reviewers": "234;0;0;9;0", "wc_reply_authors": "410;0;0;17;0", "reply_reviewers": "1;0;0;1;0", "reply_authors": "2;1;1;2;1", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 93.6, 31.071530377501524 ], "wc_strengths_avg": [ 99.0, 81.68965662799667 ], "wc_weaknesses_avg": [ 187.0, 122.74200584966827 ], "wc_questions_avg": [ 96.8, 35.89651793698102 ], "wc_limitations_avg": [ 37.6, 49.35422980859898 ], "wc_review_avg": [ 514.0, 201.0761049950988 ], "wc_reply_reviewers_avg": [ 48.6, 92.7655108324209 ], "wc_reply_authors_avg": [ 85.4, 162.43349408296308 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.1474419561548971, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3376992566090101433&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "usyd.edu.au;uni.sydney.edu.au;;unimelb.edu.au;alibaba-inc.com;hku.hk;sydney.edu.au", "author_num": 7, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "University of Sydney;University of Melbourne;Alibaba Group;University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sydney.edu.au;https://www.unimelb.edu.au;https://www.alibaba.com;https://www.hku.hk", "aff_unique_abbr": "USYD;UniMelb;Alibaba;HKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;1;1;0", "aff_country_unique": "Australia;China" }, { "title": "Training shallow ReLU networks on noisy data using hinge loss: when do we overfit and is it benign?", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71943", "id": "LlERoXEKjh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6e73c39cc428c7d264d9820319f31e79-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LlERoXEKjh", "openreview": "https://openreview.net/forum?id=LlERoXEKjh", "poster": "/media/PosterPDFs/NeurIPS%202023/71943.png?t=1701378369.428139", "slides": "https://nips.cc/virtual/2023/poster/71943", "video": "https://nips.cc/virtual/2023/poster/71943", "author_site": "Erin George, Michael Murray, William Swartworth, Deanna Needell", "tldr": "", "abstract": "We study benign overfitting in two-layer ReLU networks trained using gradient descent and hinge loss on noisy data for binary classification. In particular, we consider linearly separable data for which a relatively small proportion of labels are corrupted or flipped. We identify conditions on the margin of the clean data that give rise to three distinct training outcomes: benign overfitting, in which zero loss is achieved and with high probability test data is classified correctly; overfitting, in which zero loss is achieved but test data is misclassified with probability lower bounded by a constant; and non-overfitting, in which clean points, but not corrupt points, achieve zero loss and again with high probability test data is classified correctly. Our analysis provides a fine-grained description of the dynamics of neurons throughout training and reveals two distinct phases: in the first phase clean points achieve close to zero loss, in the second phase clean points oscillate on the boundary of zero loss while corrupt points either converge towards zero loss or are eventually zeroed by the network. We prove these results using a combinatorial approach that involves bounding the number of clean versus corrupt updates during these phases of training.", "keywords": "benign overfitting;neural networks;relu;hinge loss", "primary_area": "", "supplementary_material": "/attachment/b96401e1b456d7b1e1e906aa9268e231ee565bce.pdf", "author": "Erin George;Michael Murray;William Joseph Swartworth;Deanna Needell", "authorids": "~Erin_George1;~Michael_Murray3;~William_Joseph_Swartworth1;~Deanna_Needell2", "gender": "Non-Binary;M;;Not Specified", "homepage": "http://egeo.cc;https://www.math.ucla.edu/people/visiting/mmurray;https://www.math.ucla.edu/~wswartworth/;https://www.math.ucla.edu/~deanna/index.html", "dblp": "310/1223;;;03/2691", "google_scholar": ";wplO7UoAAAAJ;;", "orcid": "0000-0001-6792-9058;;;0000-0002-8058-8638", "linkedin": ";;;", "or_profile": "~Erin_George1;~Michael_Murray3;~William_Joseph_Swartworth1;~Deanna_Needell2", "aff": "Los Alamos National Laboratory;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "lanl.gov;ucla.edu;ucla.edu;ucla.edu", "position": "Intern;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\ngeorge2023training,\ntitle={Training shallow Re{LU} networks on noisy data using hinge loss: when do we overfit and is it benign?},\nauthor={Erin George and Michael Murray and William Joseph Swartworth and Deanna Needell},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LlERoXEKjh}\n}", "github": "", "project": "", "reviewers": "dAPs;NX3e;8QyX;ZHgu", "pdf_size": 765440, "rating": "6;7;7;8", "confidence": "4;3;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;2;3;3", "wc_summary": "87;133;56;77", "wc_strengths": "38;99;90;142", "wc_weaknesses": "186;369;314;29", "wc_questions": "267;4;59;24", "wc_limitations": "1;5;11;3", "wc_review": "579;610;530;275", "wc_reply_reviewers": "144;4;5;0", "wc_reply_authors": "184;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.25, 28.154706533721853 ], "wc_strengths_avg": [ 92.25, 36.9754986443726 ], "wc_weaknesses_avg": [ 224.5, 130.9513268355842 ], "wc_questions_avg": [ 88.5, 104.92020777714843 ], "wc_limitations_avg": [ 5.0, 3.7416573867739413 ], "wc_review_avg": [ 498.5, 132.1523741746625 ], "wc_reply_reviewers_avg": [ 38.25, 61.08344702126755 ], "wc_reply_authors_avg": [ 46.0, 79.67433714816836 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7045344133755178287&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "lanl.gov;ucla.edu;ucla.edu;ucla.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Los Alamos National Laboratory;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.lanl.gov;https://www.ucla.edu", "aff_unique_abbr": "LANL;UCLA", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Non-autoregressive Machine Translation with Probabilistic Context-free Grammar", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71942", "id": "LloZFVwWvj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/11c7f1dd168439884b6dfb43a7891432-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LloZFVwWvj", "openreview": "https://openreview.net/forum?id=LloZFVwWvj", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71942", "video": "https://nips.cc/virtual/2023/poster/71942", "author_site": "Shangtong Gui, Chenze Shao, Zhengrui Ma, xishan zhang, Yunji Chen, Yang Feng", "tldr": "", "abstract": "Non-autoregressive Transformer(NAT) significantly accelerates the inference of neural machine translation. However, conventional NAT models suffer from limited expression power and performance degradation compared to autoregressive (AT) models due to the assumption of conditional independence among target tokens. To address these limitations, we propose a novel approach called PCFG-NAT, which leverages a specially designed Probabilistic Context-Free Grammar (PCFG) to enhance the ability of NAT models to capture complex dependencies among output tokens. Experimental results on major machine translation benchmarks demonstrate that PCFG-NAT further narrows the gap in translation quality between NAT and AT models. Moreover, PCFG-NAT facilitates a deeper understanding of the generated sentences, addressing the lack of satisfactory explainability in neural machine translation. Code is publicly available at https://github.com/ictnlp/PCFG-NAT.", "keywords": "Machine translation;Non-autoregressive generation;Probabilistic Context-free Grammar", "primary_area": "", "supplementary_material": "", "author": "Shangtong Gui;Chenze Shao;Zhengrui Ma;Xishan Zhang;Yunji Chen;Yang Feng", "authorids": "~Shangtong_Gui1;~Chenze_Shao1;~Zhengrui_Ma1;~Xishan_Zhang1;~Yunji_Chen1;~Yang_Feng4", "gender": "M;M;M;;M;", "homepage": ";;http://nlp.ict.ac.cn/~mazhengrui;;;http://people.ucas.edu.cn/~yangfeng?language=en", "dblp": "342/3895.html;227/3123;276/3133;133/6391;48/474;07/6095-4.html", "google_scholar": "OZ0ZTxUAAAAJ;LH_rZf8AAAAJ;dUgq6tEAAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;", "linkedin": "%E5%B0%9A%E5%BD%A4-%E6%A1%82-9598a6199/;;;;;", "or_profile": "~Shangtong_Gui1;~Chenze_Shao1;~Zhengrui_Ma1;~Xishan_Zhang1;~Yunji_Chen1;~Yang_Feng4", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;, Cambricon Techonologies;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn;cambricon.com;ict.ac.cn;ict.ac.cn", "position": "MS student;PhD student;PhD student;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\ngui2023nonautoregressive,\ntitle={Non-autoregressive Machine Translation with Probabilistic Context-free Grammar},\nauthor={Shangtong Gui and Chenze Shao and Zhengrui Ma and Xishan Zhang and Yunji Chen and Yang Feng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LloZFVwWvj}\n}", "github": "", "project": "", "reviewers": "sYwF;FGhz;8Jr4;8FYH;EKPB", "pdf_size": 931469, "rating": "5;5;6;6;6", "confidence": "4;4;4;3;5", "soundness": "3;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;2;3", "wc_summary": "60;85;73;72;86", "wc_strengths": "53;22;75;28;18", "wc_weaknesses": "190;35;105;125;201", "wc_questions": "26;24;85;195;1", "wc_limitations": "1;1;6;19;2", "wc_review": "330;167;344;439;308", "wc_reply_reviewers": "34;0;8;18;10", "wc_reply_authors": "65;0;26;49;26", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 75.2, 9.579144011862438 ], "wc_strengths_avg": [ 39.2, 21.646246787838304 ], "wc_weaknesses_avg": [ 131.2, 60.512478052051385 ], "wc_questions_avg": [ 66.2, 70.12959432365199 ], "wc_limitations_avg": [ 5.8, 6.8527366796047255 ], "wc_review_avg": [ 317.6, 87.59132377125032 ], "wc_reply_reviewers_avg": [ 14.0, 11.523888232710346 ], "wc_reply_authors_avg": [ 33.2, 22.21170862405682 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6415085105248760514&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ict.ac.cn;ict.ac.cn;ict.ac.cn;cambricon.com;ict.ac.cn;ict.ac.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Cambricon Technologies", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;https://www.cambricon.com", "aff_unique_abbr": "CAS;Cambricon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "What functions can Graph Neural Networks compute on random graphs? The role of Positional Encoding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71941", "id": "LmmjiTwYm0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/271ec4d1a9ff5e6b81a6e21d38b1ba96-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LmmjiTwYm0", "openreview": "https://openreview.net/forum?id=LmmjiTwYm0", "poster": "/media/PosterPDFs/NeurIPS%202023/71941.png?t=1701685716.5016067", "slides": "https://nips.cc/virtual/2023/poster/71941", "video": "https://nips.cc/virtual/2023/poster/71941", "author_site": "Nicolas Keriven, Samuel Vaiter", "tldr": "", "abstract": "We aim to deepen the theoretical understanding of Graph Neural Networks (GNNs) on large graphs, with a focus on their expressive power.\nExisting analyses relate this notion to the graph isomorphism problem, which is mostly relevant for graphs of small sizes, or studied graph classification or regression tasks, while prediction tasks on \\emph{nodes} are far more relevant on large graphs. Recently, several works showed that, on very general random graphs models, GNNs converge to certains functions as the number of nodes grows.\nIn this paper, we provide a more complete and intuitive description of the function space generated by equivariant GNNs for node-tasks, through general notions of convergence that encompass several previous examples. We emphasize the role of input node features, and study the impact of \\emph{node Positional Encodings} (PEs), a recent line of work that has been shown to yield state-of-the-art results in practice. Through the study of several examples of PEs on large random graphs, we extend previously known universality results to significantly more general models. Our theoretical results hint at some normalization tricks, which is shown numerically to have a positive impact on GNN generalization on synthetic and real data. Our proofs contain new concentration inequalities of independent interest.", "keywords": "graph neural network; random graph; positional encoding", "primary_area": "", "supplementary_material": "", "author": "Nicolas Keriven;Samuel Vaiter", "authorids": "~Nicolas_Keriven1;~Samuel_Vaiter1", "gender": ";M", "homepage": "https://nkeriven.github.io/;https://samuelvaiter.com", "dblp": "142/4193;51/10261.html", "google_scholar": ";HkXkm7IAAAAJ", "orcid": ";0000-0002-4077-708X", "linkedin": ";", "or_profile": "~Nicolas_Keriven1;~Samuel_Vaiter1", "aff": "CNRS;CNRS", "aff_domain": "cnrs.fr;cnrs.fr", "position": "Assistant Professor;Researcher", "bibtex": "@inproceedings{\nkeriven2023what,\ntitle={What functions can Graph Neural Networks compute on random graphs? The role of Positional Encoding},\nauthor={Nicolas Keriven and Samuel Vaiter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LmmjiTwYm0}\n}", "github": "", "project": "", "reviewers": "gCL6;rtyh;2UtV;Jsrn;2T44;mk3D", "pdf_size": 574040, "rating": "5;6;6;7;7;7", "confidence": "4;1;3;3;3;4", "soundness": "3;4;4;4;4;3", "novelty": "2;4;2;3;3;3", "presentation": "4;4;2;3;4;3", "wc_summary": "91;135;46;44;91;75", "wc_strengths": "40;60;93;57;204;54", "wc_weaknesses": "203;1;279;322;127;175", "wc_questions": "54;1;84;224;38;313", "wc_limitations": "24;1;2;33;1;1", "wc_review": "412;198;504;680;461;618", "wc_reply_reviewers": "35;0;59;24;31;196", "wc_reply_authors": "0;0;0;0;0;255", "reply_reviewers": "1;0;1;1;1;2", "reply_authors": "1;1;1;1;1;2", "rating_avg": [ 6.333333333333333, 0.7453559924999298 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.8333333333333335, 0.6871842709362768 ], "presentation_avg": [ 3.3333333333333335, 0.7453559924999298 ], "wc_summary_avg": [ 80.33333333333333, 30.93900810016737 ], "wc_strengths_avg": [ 84.66666666666667, 55.706572522658604 ], "wc_weaknesses_avg": [ 184.5, 104.28127029017882 ], "wc_questions_avg": [ 119.0, 111.50186844473355 ], "wc_limitations_avg": [ 10.333333333333334, 13.110640292864757 ], "wc_review_avg": [ 478.8333333333333, 154.98431820312948 ], "wc_reply_reviewers_avg": [ 57.5, 64.3188671127428 ], "wc_reply_authors_avg": [ 42.5, 95.03288904374106 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2553545878944344952&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": "cnrs.fr;cnrs.fr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Centre National de la Recherche Scientifique", "aff_unique_dep": "", "aff_unique_url": "https://www.cnrs.fr", "aff_unique_abbr": "CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Individualized Dosing Dynamics via Neural Eigen Decomposition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71940", "id": "Lmxo0RVNx2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/afc9f18089928eca34c347fee4757f72-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Lmxo0RVNx2", "openreview": "https://openreview.net/forum?id=Lmxo0RVNx2", "poster": "/media/PosterPDFs/NeurIPS%202023/71940.png?t=1701962535.4731553", "slides": "https://nips.cc/virtual/2023/poster/71940", "video": "https://nips.cc/virtual/2023/poster/71940", "author_site": "Stav Belogolovsky, Ido Greenberg, Danny Eytan, Shie Mannor", "tldr": "", "abstract": "Dosing models often use differential equations to model biological dynamics. Neural differential equations in particular can learn to predict the derivative of a process, which permits predictions at irregular points of time. However, this temporal flexibility often comes with a high sensitivity to noise, whereas medical problems often present high noise and limited data. Moreover, medical dosing models must generalize reliably over individual patients and changing treatment policies. To address these challenges, we introduce the Neural Eigen Stochastic Differential Equation algorithm (NESDE). NESDE provides individualized modeling (using a hypernetwork over patient-level parameters); generalization to new treatment policies (using decoupled control); tunable expressiveness according to the noise level (using piecewise linearity); and fast, continuous, closed-form prediction (using spectral representation). We demonstrate the robustness of NESDE in both synthetic and real medical problems, and use the learned dynamics to publish simulated medical gym environments.", "keywords": "personalized medicine;dosing dynamics;sequential prediction;stochastic differential equations;Kalman filter;recurrent neural networks;medical drug control", "primary_area": "", "supplementary_material": "/attachment/97425f7bd567e183bdde8beee818479c541bf9b3.pdf", "author": "Stav Belogolovsky;Ido Greenberg;Danny Eytan;Shie Mannor", "authorids": "~Stav_Belogolovsky1;~Ido_Greenberg1;~Danny_Eytan1;~Shie_Mannor2", "gender": ";M;M;M", "homepage": ";https://idogreenberg.neocities.org/;;https://shie.net.technion.ac.il", "dblp": "241/9591;;10/11026.html;20/1669", "google_scholar": "https://scholar.google.co.il/citations?user=Rsw1cgcAAAAJ;LnwyFkkAAAAJ;;https://scholar.google.com.tw/citations?user=q1HlbIUAAAAJ", "orcid": ";;0000-0001-7684-1429;", "linkedin": ";ido-greenberg-87245852/;;", "or_profile": "~Stav_Belogolovsky1;~Ido_Greenberg1;~Danny_Eytan1;~Shie_Mannor2", "aff": "Technion - Israel Institute of Technology, Technion;Technion, Technion;Technion, Technion;Technion - Israel Institute of Technology, Technion", "aff_domain": "technion.ac.il;technion.ac.il;technion.ac.il;technion.il", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nbelogolovsky2023individualized,\ntitle={Individualized Dosing Dynamics via Neural Eigen Decomposition},\nauthor={Stav Belogolovsky and Ido Greenberg and Danny Eytan and Shie Mannor},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Lmxo0RVNx2}\n}", "github": "", "project": "", "reviewers": "a6Bv;hUXX;uGSy;vmHv", "pdf_size": 2040701, "rating": "6;6;6;7", "confidence": "3;3;3;4", "soundness": "3;3;3;4", "novelty": "3;3;2;4", "presentation": "3;4;3;4", "wc_summary": "18;25;80;27", "wc_strengths": "34;42;93;101", "wc_weaknesses": "43;11;107;73", "wc_questions": "19;30;1;54", "wc_limitations": "16;2;87;22", "wc_review": "130;110;368;277", "wc_reply_reviewers": "14;0;19;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 37.5, 24.763884994079586 ], "wc_strengths_avg": [ 67.5, 29.769951293208393 ], "wc_weaknesses_avg": [ 58.5, 35.563323804166565 ], "wc_questions_avg": [ 26.0, 19.196353820452465 ], "wc_limitations_avg": [ 31.75, 32.713720363174836 ], "wc_review_avg": [ 221.25, 106.47388177388856 ], "wc_reply_reviewers_avg": [ 8.25, 8.437268515343103 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8516263021071570147&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "technion.ac.il;technion.ac.il;technion.ac.il;technion.il", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Israel" }, { "title": "From Tempered to Benign Overfitting in ReLU Neural Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71939", "id": "LnZuxp3Tx7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b52e8c6c1a798fed53ac2e6a5e23ddc8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LnZuxp3Tx7", "openreview": "https://openreview.net/forum?id=LnZuxp3Tx7", "poster": "/media/PosterPDFs/NeurIPS%202023/71939.png?t=1701075206.462396", "slides": "https://nips.cc/virtual/2023/poster/71939", "video": "https://nips.cc/virtual/2023/poster/71939", "author_site": "Guy Kornowski, Gilad Yehudai, Ohad Shamir", "tldr": "", "abstract": "Overparameterized neural networks (NNs) are observed to generalize well even when trained to perfectly fit noisy data. This phenomenon motivated a large body of work on \"benign overfitting\", where interpolating predictors achieve near-optimal performance. Recently, it was conjectured and empirically observed that the behavior of NNs is often better described as \"tempered overfitting\", where the performance is non-optimal yet also non-trivial, and degrades as a function of the noise level. However, a theoretical justification of this claim for non-linear NNs has been lacking so far. In this work, we provide several results that aim at bridging these complementing views. We study a simple classification setting with 2-layer ReLU NNs, and prove that under various assumptions, the type of overfitting transitions from tempered in the extreme case of one-dimensional data, to benign in high dimensions. Thus, we show that the input dimension has a crucial role on the overfitting profile in this setting, which we also validate empirically for intermediate dimensions. Overall, our results shed light on the intricate connections between the dimension, sample size, architecture and training algorithm on the one hand, and the type of resulting overfitting on the other hand.", "keywords": "benign overfitting;implicit bias;interpolating predictors;neural networks;theory", "primary_area": "", "supplementary_material": "/attachment/45de866b62513abd1c6299b144510e7a43749e8b.pdf", "author": "Guy Kornowski;Gilad Yehudai;Ohad Shamir", "authorids": "~Guy_Kornowski1;~Gilad_Yehudai2;~Ohad_Shamir1", "gender": ";M;", "homepage": ";;http://www.wisdom.weizmann.ac.il/~shamiro/", "dblp": "276/7550;239/4344;12/5897", "google_scholar": ";opVT1qkAAAAJ;all0DHsAAAAJ", "orcid": "0000-0001-8058-2909;;", "linkedin": ";;", "or_profile": "~Guy_Kornowski1;~Gilad_Yehudai2;~Ohad_Shamir1", "aff": "Weizmann Institute of Science;Weizmann Institute of Science;Weizmann Institute", "aff_domain": "weizmann.ac.il;weizmann.ac.il;weizmann.ac.il", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nkornowski2023from,\ntitle={From Tempered to Benign Overfitting in Re{LU} Neural Networks},\nauthor={Guy Kornowski and Gilad Yehudai and Ohad Shamir},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LnZuxp3Tx7}\n}", "github": "", "project": "", "reviewers": "s2UC;2xri;dEM1;M9si", "pdf_size": 626161, "rating": "6;6;7;8", "confidence": "4;4;4;3", "soundness": "3;4;4;4", "novelty": "3;3;4;3", "presentation": "4;4;4;4", "wc_summary": "33;107;447;103", "wc_strengths": "190;98;335;63", "wc_weaknesses": "365;138;375;106", "wc_questions": "127;49;266;81", "wc_limitations": "1;3;1;39", "wc_review": "716;395;1424;392", "wc_reply_reviewers": "0;0;0;34", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 4.0, 0.0 ], "wc_summary_avg": [ 172.5, 161.19165611159903 ], "wc_strengths_avg": [ 171.5, 105.17723137637728 ], "wc_weaknesses_avg": [ 246.0, 124.56524394870344 ], "wc_questions_avg": [ 130.75, 82.86246134408512 ], "wc_limitations_avg": [ 11.0, 16.186414056238647 ], "wc_review_avg": [ 731.75, 420.7994623333067 ], "wc_reply_reviewers_avg": [ 8.5, 14.722431864335457 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11595062782447879118&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "weizmann.ac.il;weizmann.ac.il;weizmann.ac.il", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Weizmann Institute of Science", "aff_unique_dep": "", "aff_unique_url": "https://www.weizmann.org.il", "aff_unique_abbr": "Weizmann", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "title": "Flow Factorized Representation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71938", "id": "LnySNEJAQt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9bfc2c20fa2f56a18397eafe1be8a50a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LnySNEJAQt", "openreview": "https://openreview.net/forum?id=LnySNEJAQt", "poster": "/media/PosterPDFs/NeurIPS%202023/71938.png?t=1697981840.8030636", "slides": "https://nips.cc/virtual/2023/poster/71938", "video": "https://nips.cc/virtual/2023/poster/71938", "author_site": "Yue Song, Andy Keller, Nicu Sebe, Max Welling", "tldr": "", "abstract": "A prominent goal of representation learning research is to achieve representations which are factorized in a useful manner with respect to the ground truth factors of variation. The fields of disentangled and equivariant representation learning have approached this ideal from a range of complimentary perspectives; however, to date, most approaches have proven to either be ill-specified or insufficiently flexible to effectively separate all realistic factors of interest in a learned latent space. In this work, we propose an alternative viewpoint on such structured representation learning which we call Flow Factorized Representation Learning, and demonstrate it to learn both more efficient and more usefully structured representations than existing frameworks. Specifically, we introduce a generative model which specifies a distinct set of latent probability paths that define different input transformations. Each latent flow is generated by the gradient field of a learned potential following dynamic optimal transport. Our novel setup brings new understandings to both \\textit{disentanglement} and \\textit{equivariance}. We show that our model achieves higher likelihoods on standard representation learning benchmarks while simultaneously being closer to approximately equivariant models. Furthermore, we demonstrate that the transformations learned by our model are flexibly composable and can also extrapolate to new data, implying a degree of robustness and generalizability approaching the ultimate goal of usefully factorized representation learning.", "keywords": "Generative modelling;latent disentanglement;variational autoencoders", "primary_area": "", "supplementary_material": "/attachment/f3ee61d0145e0570fd3d61b7b69444a1007e7c9d.pdf", "author": "Yue Song;T. Anderson Keller;Nicu Sebe;Max Welling", "authorids": "~Yue_Song1;~T._Anderson_Keller1;~Nicu_Sebe1;~Max_Welling1", "gender": "M;M;M;M", "homepage": "https://kingjamessong.github.io/;http://disi.unitn.it/~sebe/;https://staff.fnwi.uva.nl/m.welling/;https://akandykeller.github.io", "dblp": "11/1346;20/3519;16/2286;183/9966.html", "google_scholar": "Uza2i10AAAAJ;https://scholar.google.it/citations?user=stFCYOAAAAAJ;https://scholar.google.nl/citations?user=8200InoAAAAJ;Tb86kC0AAAAJ", "orcid": ";0000-0002-6597-7248;0000-0003-1484-2121;", "linkedin": ";;;thomas-andy-keller-63abb88b/", "or_profile": "~Yue_Song1;~Nicu_Sebe1;~Max_Welling1;~Thomas_Anderson_Keller1", "aff": "University of Trento, Italy;University of Trento;University of Amsterdam;University of Amsterdam", "aff_domain": "unitn.it;unitn.it;uva.nl;uva.nl", "position": "PhD student;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nsong2023flow,\ntitle={Flow Factorized Representation Learning},\nauthor={Yue Song and T. Anderson Keller and Nicu Sebe and Max Welling},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LnySNEJAQt}\n}", "github": "", "project": "", "reviewers": "znKv;wDok;xnNs;ww47;tD2D", "pdf_size": 14660256, "rating": "5;6;6;6;6", "confidence": "3;3;2;3;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "76;120;75;88;154", "wc_strengths": "80;17;23;153;158", "wc_weaknesses": "61;117;136;78;26", "wc_questions": "7;66;26;130;48", "wc_limitations": "8;8;1;7;33", "wc_review": "232;328;261;456;419", "wc_reply_reviewers": "20;48;61;87;106", "wc_reply_authors": "17;40;36;77;92", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 102.6, 30.42104534693047 ], "wc_strengths_avg": [ 86.2, 60.72692977584162 ], "wc_weaknesses_avg": [ 83.6, 39.29681920970195 ], "wc_questions_avg": [ 55.4, 42.28285704632552 ], "wc_limitations_avg": [ 11.4, 11.11035552986492 ], "wc_review_avg": [ 339.2, 86.88244932090716 ], "wc_reply_reviewers_avg": [ 64.4, 29.977324763894458 ], "wc_reply_authors_avg": [ 52.4, 27.7459907013608 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.25000000000000006, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16609561862610903179&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "unitn.it;unitn.it;uva.nl;uva.nl", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of Trento;University of Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.unitn.it;https://www.uva.nl", "aff_unique_abbr": "UniTN;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "Italy;Netherlands" }, { "title": "Compositional Generalization from First Principles", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71937", "id": "LqOQ1uJmSx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/15f6a10899f557ce53fe39939af6f930-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LqOQ1uJmSx", "openreview": "https://openreview.net/forum?id=LqOQ1uJmSx", "poster": "/media/PosterPDFs/NeurIPS%202023/71937.png?t=1702477097.2997365", "slides": "https://nips.cc/virtual/2023/poster/71937", "video": "https://nips.cc/virtual/2023/poster/71937", "author_site": "Thadd\u00e4us Wiedemer, Prasanna Mayilvahanan, Matthias Bethge, Wieland Brendel", "tldr": "", "abstract": "Leveraging the compositional nature of our world to expedite learning and facilitate generalization is a hallmark of human perception. In machine learning, on the other hand, achieving compositional generalization has proven to be an elusive goal, even for models with explicit compositional priors. To get a better handle on compositional generalization, we here approach it from the bottom up: Inspired by identifiable representation learning, we investigate compositionality as a property of the data-generating process rather than the data itself. This reformulation enables us to derive mild conditions on only the support of the training distribution and the model architecture, which are sufficient for compositional generalization. We further demonstrate how our theoretical framework applies to real-world scenarios and validate our findings empirically. Our results set the stage for a principled theoretical study of compositional generalization.", "keywords": "compositional generalization;compositionality;generalization;combinatorial generalization;out-of-distribution;out-of-domain;identifiability;disentanglement;object-centric learning;DSprites", "primary_area": "", "supplementary_material": "/attachment/788b335b3d6e718db9c98845793d5b79f1413158.zip", "author": "Thadd\u00e4us Wiedemer;Prasanna Mayilvahanan;Matthias Bethge;Wieland Brendel", "authorids": "~Thadd\u00e4us_Wiedemer1;~Prasanna_Mayilvahanan2;~Matthias_Bethge1;~Wieland_Brendel1", "gender": "M;M;M;M", "homepage": ";;https://bethgelab.org;", "dblp": "327/3433;313/4018;77/3005;37/11107", "google_scholar": "aeCiRSYAAAAJ;3xq1YcYAAAAJ;https://scholar.google.com/citations?hl=en;v-JL-hsAAAAJ", "orcid": "0009-0003-6280-0804;;;", "linkedin": "thaddaeuswiedemer/;;;", "or_profile": "~Thadd\u00e4us_Wiedemer1;~Prasanna_Mayilvahanan2;~Matthias_Bethge1;~Wieland_Brendel1", "aff": "Max Planck Institute for Intelligent Systems;Max Planck Institute for Intelligent Systems, Max-Planck Institute;University of Tuebingen;Max-Planck-Institute for Intelligent Systems, Max-Planck Institute", "aff_domain": "is.tuebingen.mpg.de;tuebingen.mpg.de;uni-tuebingen.de;is.mpg.de", "position": "PhD student;PhD student;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nwiedemer2023compositional,\ntitle={Compositional Generalization from First Principles},\nauthor={Thadd{\\\"a}us Wiedemer and Prasanna Mayilvahanan and Matthias Bethge and Wieland Brendel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LqOQ1uJmSx}\n}", "github": "", "project": "", "reviewers": "1tYF;SW1j;5Ysh;yWdt;zapL", "pdf_size": 724449, "rating": "5;5;6;6;7", "confidence": "3;3;4;4;2", "soundness": "3;2;3;3;3", "novelty": "3;2;3;3;3", "presentation": "3;2;3;2;2", "wc_summary": "47;53;182;115;75", "wc_strengths": "75;66;4;40;142", "wc_weaknesses": "152;312;206;300;131", "wc_questions": "43;3;127;19;95", "wc_limitations": "9;1;153;22;10", "wc_review": "326;435;672;496;453", "wc_reply_reviewers": "200;266;94;198;19", "wc_reply_authors": "538;223;473;603;0", "reply_reviewers": "2;2;1;3;1", "reply_authors": "3;2;2;3;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 94.4, 49.870231601627836 ], "wc_strengths_avg": [ 65.4, 45.55260695064553 ], "wc_weaknesses_avg": [ 220.2, 74.30316278598104 ], "wc_questions_avg": [ 57.4, 46.68875667652759 ], "wc_limitations_avg": [ 39.0, 57.39337940912697 ], "wc_review_avg": [ 476.4, 112.71663586179282 ], "wc_reply_reviewers_avg": [ 155.4, 87.64838846208183 ], "wc_reply_authors_avg": [ 367.4, 224.3199500713211 ], "reply_reviewers_avg": [ 1.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2857142857142857, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1699243656868653987&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "is.tuebingen.mpg.de;tuebingen.mpg.de;uni-tuebingen.de;is.mpg.de", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;University of Tuebingen;Max-Planck-Institute for Intelligent Systems", "aff_unique_dep": "Intelligent Systems;;Intelligent Systems", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.uni-tuebingen.de/;https://www.mpi-is.mpg.de", "aff_unique_abbr": "MPI-IS;Uni T\u00fcbingen;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "StEik: Stabilizing the Optimization of Neural Signed Distance Functions and Finer Shape Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71936", "id": "Lqv7VS1iBF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2d6336c1c2987e9d1d9894edd593478d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Lqv7VS1iBF", "openreview": "https://openreview.net/forum?id=Lqv7VS1iBF", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71936", "video": "https://nips.cc/virtual/2023/poster/71936", "author_site": "Huizong Yang, Yuxin Sun, Ganesh Sundaramoorthi, Anthony Yezzi", "tldr": "", "abstract": "We present new insights and a novel paradigm for learning implicit neural representations (INR) of shapes. In particular, we shed light on the popular eikonal loss used for imposing a signed distance function constraint in INR. We show analytically that as the representation power of the network increases, the optimization approaches a partial differential equation (PDE) in the continuum limit that is unstable. We show that this instability can manifest in existing network optimization, leading to irregularities in the reconstructed surface and/or convergence to sub-optimal local minima, and thus fails to capture fine geometric and topological structure. We show analytically how other terms added to the loss, currently used in the literature for other purposes, can actually eliminate these instabilities. However, such terms can over-regularize the surface, preventing the representation of fine shape detail. Based on a similar PDE theory for the continuum limit, we introduce a new regularization term that still counteracts the eikonal instability but without over-regularizing. Furthermore, since stability is now guaranteed in the continuum limit, this stabilization also allows for considering new network structures that are able to represent finer shape detail. We introduce such a structure based on quadratic layers. Experiments on multiple benchmark data sets show that our new regularization and network are able to capture more precise shape details and more accurate topology than existing state-of-the-art.", "keywords": "Implicit Neural Representation;Surface Reconstruction", "primary_area": "", "supplementary_material": "/attachment/5cab3b56ed4804bf583579000f1a3cc9a5120b97.zip", "author": "Huizong Yang;Yuxin Sun;Ganesh Sundaramoorthi;Anthony Yezzi", "authorids": "~Huizong_Yang1;~Yuxin_Sun1;~Ganesh_Sundaramoorthi1;~Anthony_Yezzi1", "gender": "M;M;;M", "homepage": ";;;https://www.ece.gatech.edu/faculty-staff-directory/anthony-joseph-yezzi", "dblp": "300/7844;158/7549;;y/AJYezzi", "google_scholar": ";;;", "orcid": ";0000-0002-9180-8050;;", "linkedin": "huizong-yang-2178a818b/;yuxin-sun-972960140/;;", "or_profile": "~Huizong_Yang1;~Yuxin_Sun1;~Ganesh_Sundaramoorthi1;~Anthony_Yezzi1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;;gatech.edu", "position": "PhD student;PhD student;;Professor", "bibtex": "@inproceedings{\nyang2023stabilizing,\ntitle={Stabilizing the Optimization of Neural Signed Distance Functions and Finer Shape Representation},\nauthor={Huizong Yang and Yuxin Sun and Ganesh Sundaramoorthi and Anthony Yezzi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Lqv7VS1iBF}\n}", "github": "", "project": "", "reviewers": "urXf;Bytg;bsBS;biQ8;Nfwa", "pdf_size": 4699526, "rating": "5;6;7;7;8", "confidence": "3;4;3;5;3", "soundness": "3;4;3;4;4", "novelty": "3;4;3;4;3", "presentation": "3;3;3;3;4", "wc_summary": "61;132;92;183;113", "wc_strengths": "29;78;144;53;93", "wc_weaknesses": "52;96;136;185;65", "wc_questions": "2;48;86;96;7", "wc_limitations": "1;14;6;55;22", "wc_review": "145;368;464;572;300", "wc_reply_reviewers": "7;13;25;138;23", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 116.2, 40.87737760669097 ], "wc_strengths_avg": [ 79.4, 38.979994869163335 ], "wc_weaknesses_avg": [ 106.8, 48.6514131346665 ], "wc_questions_avg": [ 47.8, 38.84533434017526 ], "wc_limitations_avg": [ 19.6, 19.085072700935672 ], "wc_review_avg": [ 369.8, 145.03020375080496 ], "wc_reply_reviewers_avg": [ 41.2, 48.844242240002046 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.04902903378454606, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17434416016977143343&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "gatech.edu;gatech.edu;;gatech.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Bridging RL Theory and Practice with the Effective Horizon", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71935", "id": "Lr2swAfwff", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b8be628bf719550b560de8bec9456e0b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Lr2swAfwff", "openreview": "https://openreview.net/forum?id=Lr2swAfwff", "poster": "/media/PosterPDFs/NeurIPS%202023/71935.png?t=1702357287.9645624", "slides": "https://nips.cc/virtual/2023/poster/71935", "video": "https://nips.cc/virtual/2023/poster/71935", "author_site": "Cassidy Laidlaw, Stuart J Russell, Anca Dragan", "tldr": "", "abstract": "Deep reinforcement learning (RL) works impressively in some environments and fails catastrophically in others. Ideally, RL theory should be able to provide an understanding of why this is, i.e. bounds predictive of practical performance. Unfortunately, current theory does not quite have this ability. We compare standard deep RL algorithms to prior sample complexity bounds by introducing a new dataset, BRIDGE. It consists of 155 MDPs from common deep RL benchmarks, along with their corresponding tabular representations, which enables us to exactly compute instance-dependent bounds. We find that prior bounds do not correlate well with when deep RL succeeds vs. fails, but discover a surprising property that does. When actions with the highest Q-values under the *random* policy also have the highest Q-values under the *optimal* policy\u2014i.e., when it is optimal to act greedily with respect to the random's policy Q function\u2014deep RL tends to succeed; when they don't, deep RL tends to fail. We generalize this property into a new complexity measure of an MDP that we call the *effective horizon*, which roughly corresponds to how many steps of lookahead search would be needed in that MDP in order to identify the next optimal action, when leaf nodes are evaluated with random rollouts. Using BRIDGE, we show that the effective horizon-based bounds are more closely reflective of the empirical performance of PPO and DQN than prior sample complexity bounds across four metrics. We also show that, unlike existing bounds, the effective horizon can predict the effects of using reward shaping or a pre-trained exploration policy. Our code and data are available at https://github.com/cassidylaidlaw/effective-horizon.", "keywords": "reinforcement learning;RL theory;theory of reinforcement learning;instance-dependent bounds;empirical validation of theory", "primary_area": "", "supplementary_material": "", "author": "Cassidy Laidlaw;Stuart Russell;Anca Dragan", "authorids": "~Cassidy_Laidlaw1;~Stuart_Russell1;~Anca_Dragan1", "gender": "M;M;F", "homepage": "https://cassidylaidlaw.com;https://people.eecs.berkeley.edu/~russell/;http://www.ancadragan.com/", "dblp": "241/5375;;", "google_scholar": "DzeJ67UAAAAJ;https://scholar.google.com.tw/citations?user=KJGrjCAAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Cassidy_Laidlaw1;~Stuart_Russell1;~Anca_Dragan1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nlaidlaw2023bridging,\ntitle={Bridging {RL} Theory and Practice with the Effective Horizon},\nauthor={Cassidy Laidlaw and Stuart Russell and Anca Dragan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Lr2swAfwff}\n}", "github": "", "project": "", "reviewers": "iUUZ;GiSj;rChU;KSUv", "pdf_size": 1623459, "rating": "7;7;7;8", "confidence": "3;3;3;3", "soundness": "3;4;4;4", "novelty": "3;4;3;4", "presentation": "2;4;4;4", "wc_summary": "112;58;158;131", "wc_strengths": "114;228;126;92", "wc_weaknesses": "154;204;60;589", "wc_questions": "149;80;132;55", "wc_limitations": "5;10;42;1", "wc_review": "534;580;518;868", "wc_reply_reviewers": "16;57;16;522", "wc_reply_authors": "0;25;0;584", "reply_reviewers": "1;1;1;2", "reply_authors": "1;2;1;2", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 114.75, 36.615399765672365 ], "wc_strengths_avg": [ 140.0, 52.24940191045253 ], "wc_weaknesses_avg": [ 251.75, 201.4576568413323 ], "wc_questions_avg": [ 104.0, 38.03288051147323 ], "wc_limitations_avg": [ 14.5, 16.194134740701646 ], "wc_review_avg": [ 625.0, 142.13022197970423 ], "wc_reply_reviewers_avg": [ 152.75, 213.8426699702377 ], "wc_reply_authors_avg": [ 152.25, 249.47983385436186 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10345001677706430856&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Prompt-augmented Temporal Point Process for Streaming Event Sequence", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71934", "id": "LswqtKU9op", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3c129892b4f9c8326aba665425a470c5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LswqtKU9op", "openreview": "https://openreview.net/forum?id=LswqtKU9op", "poster": "/media/PosterPDFs/NeurIPS%202023/71934.png?t=1697901989.33718", "slides": "https://nips.cc/virtual/2023/poster/71934", "video": "https://nips.cc/virtual/2023/poster/71934", "author_site": "Siqiao Xue, Yan Wang, Zhixuan Chu, Xiaoming Shi, Caigao JIANG, Hongyan Hao, Gangwei Jiang, Xiaoyun Feng, James Zhang, Jun Zhou", "tldr": "", "abstract": "Neural Temporal Point Processes (TPPs) are the prevalent paradigm for modeling continuous-time event sequences, such as user activities on the web and financial transactions. In real world applications, the event data typically comes in a streaming manner, where the distribution of the patterns may shift over time. Under the privacy and memory constraints commonly seen in real scenarios, how to continuously monitor a TPP to learn the streaming event sequence is an important yet under-investigated problem. In this work, we approach this problem by adopting Continual Learning (CL), which aims to enable a model to continuously learn a sequence of tasks without catastrophic forgetting. While CL for event sequence is less well studied, we present a simple yet effective framework, PromptTPP, by integrating the base TPP with a continuous-time retrieval prompt pool. In our proposed framework, prompts are small learnable parameters, maintained in a memory space and jointly optimized with the base TPP so that the model is properly instructed to learn event streams arriving sequentially without buffering past examples or task-specific attributes. We formalize a novel and realistic experimental setup for modeling event streams, where PromptTPP consistently sets state-of-the-art performance across two real user behavior datasets.", "keywords": "prompt;point process;event sequence;continual learning.", "primary_area": "", "supplementary_material": "/attachment/e70cb2250f194ba188e97a055939feace3affa11.zip", "author": "Siqiao Xue;Yan Wang;Zhixuan Chu;Xiaoming Shi;Caigao JIANG;Hongyan Hao;Gangwei Jiang;Xiaoyun Feng;James Y. Zhang;JUN ZHOU", "authorids": "~Siqiao_Xue1;~Yan_Wang34;~Zhixuan_Chu1;~Xiaoming_Shi2;~Caigao_JIANG2;~Hongyan_Hao1;~Gangwei_Jiang1;~Xiaoyun_Feng1;~James_Y._Zhang1;~JUN_ZHOU6", "gender": "M;;M;M;M;M;M;M;M;F", "homepage": "https://www.antgroup.com/en;https://ai.nju.edu.cn/main.htm;;;;;https://gangwJiang.github.io;https://scholar.google.com/citations?user=Ywakh_sAAAAJ;https://scholar.google.com/citations?user=mCVvloEAAAAJ&hl=en;http://home.ustc.edu.cn/~xy2012/", "dblp": "302/7766;;258/1233;65/9789-1;292/3817;264/1941;286/8533;151/3086;99/3847-11;13/5914", "google_scholar": "pZqTpoEAAAAJ;https://scholar.google.com/citations?view_op=list_works;a4IuTngAAAAJ;0WMTWacAAAAJ;;;https://scholar.google.com.hk/citations?hl=zh-CN;Ywakh_sAAAAJ;mCVvloEAAAAJ;8Dbd1S8AAAAJ", "orcid": ";0009-0006-2938-357X;;0000-0003-0764-8961;;0000-0002-0867-7628;;0000-0001-6519-676X;0000-0001-6033-6102;0000-0002-4131-0625", "linkedin": ";;;;caigao-jiang-309710194;;;jamesymzhang/;;", "or_profile": "~Siqiao_Xue1;~Yan_Wang34;~Zhixuan_Chu1;~Xiaoming_Shi2;~Caigao_JIANG2;~Hongyan_Hao1;~Gangwei_Jiang1;~James_Y._Zhang1;~JUN_ZHOU6;~Trudy_Fung2", "aff": "Alibaba;Alibaba Group;Ant Group;Ant Group;Alibaba Group;;University of Science and Technology of China;Ant Group;Ant Group;Ant Group", "aff_domain": "alibaba-inc.com;antgroup.com;antgroup.com;antgroup.com;alibaba-inc.com;;ustc.edu.cn;alipay.com;antgroup.com;antgroup.com", "position": "researcher;Researcher;Researcher;Researcher;Researcher;;PhD student;managing director;Researcher;Researcher", "bibtex": "@inproceedings{\nxue2023promptaugmented,\ntitle={Prompt-augmented Temporal Point Process for Streaming Event Sequence},\nauthor={Siqiao Xue and Yan Wang and Zhixuan Chu and Xiaoming Shi and Caigao JIANG and Hongyan Hao and Gangwei Jiang and Xiaoyun Feng and James Y. Zhang and JUN ZHOU},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LswqtKU9op}\n}", "github": "", "project": "", "reviewers": "GihP;nDRf;26jC;n7nj", "pdf_size": 969315, "rating": "6;6;6;7", "confidence": "4;2;3;2", "soundness": "3;3;2;4", "novelty": "3;3;2;3", "presentation": "2;3;2;4", "wc_summary": "45;77;74;65", "wc_strengths": "20;28;81;50", "wc_weaknesses": "239;46;69;71", "wc_questions": "2;41;98;122", "wc_limitations": "2;12;4;1", "wc_review": "308;204;326;309", "wc_reply_reviewers": "10;20;20;12", "wc_reply_authors": "31;20;0;29", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 65.25, 12.497499749949988 ], "wc_strengths_avg": [ 44.75, 23.636571240347024 ], "wc_weaknesses_avg": [ 106.25, 77.27022388993059 ], "wc_questions_avg": [ 65.75, 47.11886564848521 ], "wc_limitations_avg": [ 4.75, 4.322904116447646 ], "wc_review_avg": [ 286.75, 48.30825498814877 ], "wc_reply_reviewers_avg": [ 15.5, 4.55521678957215 ], "wc_reply_authors_avg": [ 20.0, 12.267844146385297 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15956341351246096801&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "alibaba-inc.com;antgroup.com;antgroup.com;antgroup.com;alibaba-inc.com;;ustc.edu.cn;alipay.com;antgroup.com;antgroup.com", "author_num": 10, "aff_unique_index": "0;1;2;2;1;3;2;2;2", "aff_unique_norm": "Alibaba Group Holding Limited;Alibaba Group;Ant Group;University of Science and Technology of China", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.alibaba.com;https://www.alibaba.com;https://www.antgroup.com;http://www.ustc.edu.cn", "aff_unique_abbr": "Alibaba;Alibaba;Ant Group;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Sharp Spectral Rates for Koopman Operator Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71933", "id": "Lt3jqxsbVO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/663bce02a0050c4a11f1eb8a7f1429d3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Lt3jqxsbVO", "openreview": "https://openreview.net/forum?id=Lt3jqxsbVO", "poster": "/media/PosterPDFs/NeurIPS%202023/71933.png?t=1702311428.8308697", "slides": "https://nips.cc/virtual/2023/poster/71933", "video": "https://nips.cc/virtual/2023/poster/71933", "author_site": "Vladimir Kostic, Karim Lounici, Pietro Novelli, Massimiliano Pontil", "tldr": "", "abstract": "Non-linear dynamical systems can be handily described by the associated Koopman operator, whose action evolves every observable of the system forward in time. Learning the Koopman operator and its spectral decomposition from data is enabled by a number of algorithms. In this work we present for the first time non-asymptotic learning bounds for the Koopman eigenvalues and eigenfunctions. \nWe focus on time-reversal-invariant stochastic dynamical systems, including the important example of Langevin dynamics. \nWe analyze two popular estimators: Extended Dynamic Mode Decomposition (EDMD) and Reduced Rank Regression (RRR). Our results critically hinge on novel {minimax} estimation bounds for the operator norm error, that may be of independent interest. Our spectral learning bounds are driven by the simultaneous control of the operator norm error and a novel metric distortion functional of the estimated eigenfunctions. The bounds indicates that both EDMD and RRR have similar variance, but EDMD suffers from a larger bias which might be detrimental to its learning rate. Our results shed new light on the emergence of spurious eigenvalues, an issue which is well known empirically. Numerical experiments illustrate the implications of the bounds in practice.", "keywords": "Statistical Learning Theory;Dynamical Systems", "primary_area": "", "supplementary_material": "/attachment/f3b34e6f994fb66dfbdca4dc48fe4ef1c805e5d1.zip", "author": "Vladimir R Kostic;Karim Lounici;Pietro Novelli;Massimiliano Pontil", "authorids": "~Vladimir_R_Kostic1;~Karim_Lounici1;~Pietro_Novelli1;~Massimiliano_Pontil4", "gender": "M;;M;Not Specified", "homepage": "https://vladi-iit.github.io/;;;https://www.iit.it/web/computational-statistics-and-machine-learning", "dblp": "94/879;;318/3513;", "google_scholar": "66gV7SAAAAAJ;;;lcOacs8AAAAJ", "orcid": ";;0000-0003-1623-5659;0000-0001-9415-098X", "linkedin": "vladimir-kostic-77500652/;;;", "or_profile": "~Vladimir_R_Kostic1;~Karim_Lounici1;~Pietro_Novelli1;~Massimiliano_Pontil4", "aff": "University of Novi Sad;;Istituto Italiano di Tecnologia;University College London, University of London", "aff_domain": "uns.ac.rs;;iit.it;ucl.ac.uk", "position": "Associate Professor;;Postdoc;Full Professor", "bibtex": "@inproceedings{\nkostic2023sharp,\ntitle={Sharp Spectral Rates for Koopman Operator Learning},\nauthor={Vladimir R Kostic and Karim Lounici and Pietro Novelli and Massimiliano Pontil},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Lt3jqxsbVO}\n}", "github": "", "project": "", "reviewers": "nHa6;ZSn2;GGn4;1FxQ;B3rh", "pdf_size": 1543798, "rating": "6;7;7;7;9", "confidence": "2;3;3;4;3", "soundness": "3;3;3;4;4", "novelty": "3;3;3;4;4", "presentation": "3;4;4;4;4", "wc_summary": "57;83;157;69;102", "wc_strengths": "40;100;38;54;48", "wc_weaknesses": "188;288;100;36;13", "wc_questions": "201;34;80;114;134", "wc_limitations": "46;16;5;15;5", "wc_review": "532;521;380;288;302", "wc_reply_reviewers": "43;61;16;15;49", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.2, 0.9797958971132712 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_avg": [ 93.6, 35.063371201297805 ], "wc_strengths_avg": [ 56.0, 22.733235581412515 ], "wc_weaknesses_avg": [ 125.0, 101.61495952860484 ], "wc_questions_avg": [ 112.6, 55.70493694458329 ], "wc_limitations_avg": [ 17.4, 15.054567413247051 ], "wc_review_avg": [ 404.6, 104.4099612106048 ], "wc_reply_reviewers_avg": [ 36.8, 18.334666618185345 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3227486121839514, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6629215814676430655&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "uns.ac.rs;;iit.it;ucl.ac.uk", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Novi Sad;Istituto Italiano di Tecnologia;University College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uns.ac.rs;https://www.iit.it;https://www.ucl.ac.uk", "aff_unique_abbr": "UNS;IIT;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Serbia;Italy;United Kingdom" }, { "title": "CHAMMI: A benchmark for channel-adaptive models in microscopy imaging", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73620", "id": "Luc1bZLeMY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3ecca655ac67685fdc2155da0eceda6b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Luc1bZLeMY", "openreview": "https://openreview.net/forum?id=Luc1bZLeMY", "poster": "/media/PosterPDFs/NeurIPS%202023/73620.png?t=1702066329.5384727", "slides": "https://nips.cc/virtual/2023/poster/73620", "video": "https://nips.cc/virtual/2023/poster/73620", "author_site": "Zitong Sam Chen, Chau Pham, Siqi Wang, Michael Doron, Nikita Moshkov, Bryan Plummer, Juan C. Caicedo", "tldr": "", "abstract": "Most neural networks assume that input images have a fixed number of channels (three for RGB images). However, there are many settings where the number of channels may vary, such as microscopy images where the number of channels changes depending on instruments and experimental goals. Yet, there has not been a systemic attempt to create and evaluate neural networks that are invariant to the number and type of channels. As a result, trained models remain specific to individual studies and are hardly reusable for other microscopy settings. In this paper, we present a benchmark for investigating channel-adaptive models in microscopy imaging, which consists of 1) a dataset of varied-channel single-cell images, and 2) a biologically relevant evaluation framework. In addition, we adapted several existing techniques to create channel-adaptive models and compared their performance on this benchmark to fixed-channel, baseline models. We find that channel-adaptive models can generalize better to out-of-domain tasks and can be computationally efficient. We contribute a curated dataset and an evaluation API to facilitate objective comparisons in future research and applications.", "keywords": "Deep Learning;Transfer Learning;Bioinformatics and Systems Biology;Computer Vision;Benchmarks", "primary_area": "", "supplementary_material": "/attachment/256fe05138015ef5b83e4b8d3e6b0fd23910ca57.pdf", "author": "Zitong Chen;Chau Pham;Siqi Wang;Michael Doron;Nikita Moshkov;Bryan A. Plummer;Juan C Caicedo", "authorids": "~Zitong_Chen1;~Chau_Pham1;~Siqi_Wang2;~Michael_Doron1;~Nikita_Moshkov1;~Bryan_A._Plummer1;~Juan_C_Caicedo1", "gender": ";M;F;M;Not Specified;;M", "homepage": "https://github.com/Zitong-Chen-16;http://mchaupham.com;https://cs-people.bu.edu/siqiwang/;;;https://morgridge.org/research/labs/caicedo/;http://bryanplummer.com/", "dblp": ";259/7023-1;;;;71/5164;163/2330", "google_scholar": ";fu-qT-wAAAAJ;;X0u_7RcAAAAJ;https://scholar.google.hu/citations?user=AHlTUiwAAAAJ;U50zLvkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;", "linkedin": ";chau-pham-bu;siqi-wang-bab3b9128/;michael-doron-757b1324/;;jccaicedo;", "or_profile": "~Zitong_Chen1;~Chau_Pham1;~Siqi_Wang2;~Michael_Doron1;~Nikita_Moshkov1;~Juan_C_Caicedo1;~Bryan_Allen_Plummer1", "aff": "Broad Institute;Boston University;Boston University;Broad Institute;Broad Institute;Broad Institute of MIT and Harvard;Boston University", "aff_domain": "broadinstitute.org;bu.edu;bu.edu;broadinstitute.org;broadinstitute.org;broadinstitute.org;bu.edu", "position": "Researcher;PhD student;PhD student;Postdoc;Visiting Researcher;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nchen2023chammi,\ntitle={{CHAMMI}: A benchmark for channel-adaptive models in microscopy imaging},\nauthor={Zitong Chen and Chau Pham and Siqi Wang and Michael Doron and Nikita Moshkov and Bryan A. Plummer and Juan C Caicedo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Luc1bZLeMY}\n}", "github": "", "project": "", "reviewers": "jtSk;NWwY;aPrM;AS4C;8Gi5", "pdf_size": 2101576, "rating": "7;7;7;7;9", "confidence": "3;4;3;4;5", "wc_summary_and_contributions": "168;124;94;116;91", "wc_strengths": "40;150;194;105;127", "wc_improvement": "133;234;117;116;163", "wc_limitations": "11;55;7;13;24", "wc_correctness": "34;57;10;38;77", "wc_clarity": "5;5;6;82;24", "wc_relation_to_prior_work": "10;22;10;6;122", "wc_documentation": "25;12;12;44;71", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "427;660;451;521;700", "wc_reply_reviewers": "0;66;29;125;28", "wc_reply_authors": "618;1599;445;1548;1505", "reply_reviewers": "0;1;1;2;1", "reply_authors": "1;3;1;3;3", "rating_avg": [ 7.4, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 118.6, 27.72435752186153 ], "wc_strengths_avg": [ 123.2, 50.987841687994596 ], "wc_improvement_avg": [ 152.6, 44.10260763265592 ], "wc_limitations_avg": [ 22.0, 17.435595774162696 ], "wc_correctness_avg": [ 43.2, 22.569005294872877 ], "wc_clarity_avg": [ 24.4, 29.695790947540022 ], "wc_relation_to_prior_work_avg": [ 34.0, 44.32606456702422 ], "wc_documentation_avg": [ 32.8, 22.408926792686884 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 551.8, 109.86792070481721 ], "wc_reply_reviewers_avg": [ 49.6, 43.13977283203981 ], "wc_reply_authors_avg": [ 1143.0, 503.1568343965925 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8017837257372732, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9870226274162085717&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 8, "email": "broadinstitute.org;bu.edu;bu.edu;broadinstitute.org;broadinstitute.org;broadinstitute.org;bu.edu", "author_num": 7, "aff_unique_index": "0;1;1;0;0;0;1", "aff_unique_norm": "Broad Institute;Boston University", "aff_unique_dep": ";", "aff_unique_url": "https://www.broadinstitute.org;https://www.bu.edu", "aff_unique_abbr": "Broad;BU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Diffused Redundancy in Pre-trained Representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71932", "id": "LyAuNoZkGP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0c86142265c5e2c900613dd1d031cb90-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LyAuNoZkGP", "openreview": "https://openreview.net/forum?id=LyAuNoZkGP", "poster": "/media/PosterPDFs/NeurIPS%202023/71932.png?t=1702025576.0762124", "slides": "https://nips.cc/virtual/2023/poster/71932", "video": "https://nips.cc/virtual/2023/poster/71932", "author_site": "Vedant Nanda, Till Speicher, John Dickerson, Krishna Gummadi, Soheil Feizi, Adrian Weller", "tldr": "", "abstract": "Representations learned by pre-training a neural network on a large dataset are increasingly used successfully to perform a variety of downstream tasks. In this work, we take a closer look at how features are encoded in such pre-trained representations. We find that learned representations in a given layer exhibit a degree of diffuse redundancy, ie, any randomly chosen subset of neurons in the layer that is larger than a threshold size shares a large degree of similarity with the full layer and is able to perform similarly as the whole layer on a variety of downstream tasks. For example, a linear probe trained on $20\\%$ of randomly picked neurons from the penultimate layer of a ResNet50 pre-trained on ImageNet1k achieves an accuracy within $5\\%$ of a linear probe trained on the full layer of neurons for downstream CIFAR10 classification. We conduct experiments on different neural architectures (including CNNs and Transformers) pre-trained on both ImageNet1k and ImageNet21k and evaluate a variety of downstream tasks taken from the VTAB benchmark. We find that the loss \\& dataset used during pre-training largely govern the degree of diffuse redundancy and the \"critical mass\" of neurons needed often depends on the downstream task, suggesting that there is a task-inherent redundancy-performance Pareto frontier. Our findings shed light on the nature of representations learned by pre-trained deep neural networks and suggest that entire layers might not be necessary to perform many downstream tasks. We investigate the potential for exploiting this redundancy to achieve efficient generalization for downstream tasks and also draw caution to certain possible unintended consequences. Our code is available at \\url{https://github.com/nvedant07/diffused-redundancy}.", "keywords": "representation learning;redundancy;transfer learning;fairness", "primary_area": "", "supplementary_material": "/attachment/2324c918a673e5847bd093edf2764b7b053b2b64.pdf", "author": "Vedant Nanda;Till Speicher;John P Dickerson;Krishna P. Gummadi;Soheil Feizi;Adrian Weller", "authorids": "~Vedant_Nanda2;~Till_Speicher1;~John_P_Dickerson1;~Krishna_P._Gummadi1;~Soheil_Feizi2;~Adrian_Weller1", "gender": "M;M;M;M;M;M", "homepage": "https://tillspeicher.com/;https://jpdickerson.com/;https://www.mpi-sws.org/~gummadi/;https://www.cs.umd.edu/~sfeizi/;http://mlg.eng.cam.ac.uk/adrian/;", "dblp": "144/7849;75/8479;g/PKrishnaGummadi;57/2132;73/8324;201/5458", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=QgDpfCQAAAAJ;https://scholar.google.com.tw/citations?user=Bz3APTsAAAAJ;lptAmrMAAAAJ;https://scholar.google.co.uk/citations?user=Ek4hM10AAAAJ;9GfPrCsAAAAJ", "orcid": "0009-0000-1172-2525;0000-0003-2231-680X;;;;", "linkedin": ";john-dickerson-83a74a7/;;;;", "or_profile": "~Till_Speicher1;~John_P_Dickerson1;~Krishna_P._Gummadi1;~Soheil_Feizi2;~Adrian_Weller1;~Vedant_Nanda1", "aff": "MPI-SWS;Optimized Markets, Inc;MPI-SWS;University of Maryland, College Park;University of Cambridge;Amazon", "aff_domain": "mpi-sws.org;optimizedmarkets.com;mpi-sws.org;umd.edu;cam.ac.uk;amazon.com", "position": "PhD student;Consultant;Full Professor;Associate Professor;Principal Researcher;Intern", "bibtex": "@inproceedings{\nnanda2023diffused,\ntitle={Diffused Redundancy in Pre-trained Representations},\nauthor={Vedant Nanda and Till Speicher and John P Dickerson and Krishna P. Gummadi and Soheil Feizi and Adrian Weller},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LyAuNoZkGP}\n}", "github": "", "project": "", "reviewers": "qvdG;1gmQ;yCaj;MR5K;USaP", "pdf_size": 1979874, "rating": "5;6;6;6;7", "confidence": "3;3;4;4;4", "soundness": "2;3;2;3;4", "novelty": "2;3;2;3;3", "presentation": "2;4;3;3;4", "wc_summary": "124;97;100;104;100", "wc_strengths": "40;99;66;65;48", "wc_weaknesses": "84;399;350;161;161", "wc_questions": "61;112;127;63;209", "wc_limitations": "1;29;56;1;15", "wc_review": "310;736;699;394;533", "wc_reply_reviewers": "21;85;58;319;135", "wc_reply_authors": "0;49;0;627;457", "reply_reviewers": "1;1;1;5;1", "reply_authors": "1;2;1;4;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 105.0, 9.757048734120374 ], "wc_strengths_avg": [ 63.6, 20.303694245136768 ], "wc_weaknesses_avg": [ 231.0, 121.48580163953316 ], "wc_questions_avg": [ 114.4, 54.05034689990435 ], "wc_limitations_avg": [ 20.4, 20.60679499582601 ], "wc_review_avg": [ 534.4, 166.01518002881545 ], "wc_reply_reviewers_avg": [ 123.6, 104.52865635795766 ], "wc_reply_authors_avg": [ 226.6, 263.6820813024654 ], "reply_reviewers_avg": [ 1.8, 1.6 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6454972243679028, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12146086054151783720&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "mpi-sws.org;optimizedmarkets.com;mpi-sws.org;umd.edu;cam.ac.uk;amazon.com", "author_num": 6, "aff_unique_index": "0;1;0;2;3;4", "aff_unique_norm": "Max Planck Institute for Software Systems;Optimized Markets, Inc;University of Maryland;University of Cambridge;Amazon", "aff_unique_dep": ";;;;Amazon.com, Inc.", "aff_unique_url": "https://www.mpi-sws.org;;https://www/umd.edu;https://www.cam.ac.uk;https://www.amazon.com", "aff_unique_abbr": "MPI-SWS;;UMD;Cambridge;Amazon", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";College Park;Cambridge", "aff_country_unique_index": "0;1;0;1;2;1", "aff_country_unique": "Germany;United States;United Kingdom" }, { "title": "Transformers learn to implement preconditioned gradient descent for in-context learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71931", "id": "LziniAXEI9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8ed3d610ea4b68e7afb30ea7d01422c6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=LziniAXEI9", "openreview": "https://openreview.net/forum?id=LziniAXEI9", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71931", "video": "https://nips.cc/virtual/2023/poster/71931", "author_site": "Kwangjun Ahn, Xiang Cheng, Hadi Daneshmand, Suvrit Sra", "tldr": "", "abstract": "Several recent works demonstrate that transformers can implement algorithms like gradient descent. By a careful construction of weights, these works show that multiple layers of transformers are expressive enough to simulate iterations of gradient descent. Going beyond the question of expressivity, we ask: \\emph{Can transformers learn to implement such algorithms by training over random problem instances?} To our knowledge, we make the first theoretical progress on this question via an analysis of the loss landscape for linear transformers trained over random instances of linear regression. For a single attention layer, we prove the global minimum of the training objective implements a single iteration of preconditioned gradient descent. Notably, the preconditioning matrix not only adapts to the input distribution but also to the variance induced by data inadequacy. For a transformer with $L$ attention layers, we prove certain critical points of the training objective implement $L$ iterations of preconditioned gradient descent. Our results call for future theoretical studies on learning algorithms by training transformers.", "keywords": "Transformers;In-context learning;adaptive gradient methods", "primary_area": "", "supplementary_material": "/attachment/8d99cff7b7e2899a4a951eb1ef0816aa505f21b0.pdf", "author": "Kwangjun Ahn;Xiang Cheng;Hadi Daneshmand;Suvrit Sra", "authorids": "~Kwangjun_Ahn2;~Xiang_Cheng1;~Hadi_Daneshmand1;~Suvrit_Sra1", "gender": ";M;;M", "homepage": "http://kjahn.mit.edu/;https://sites.google.com/berkeley.edu/xiangcheng/home;https://optml.mit.edu;https://scholar.google.com/citations?user=roFM8XsAAAAJ&hl=en", "dblp": ";29/1059-6;90/930;146/0473", "google_scholar": "z94iNtgAAAAJ;-WJinlEAAAAJ;eyCw9goAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Kwangjun_Ahn2;~Xiang_Cheng1;~Suvrit_Sra1;~SEYEDMOHAMMADHADI_DANESHMAND1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Boston University", "aff_domain": "mit.edu;mit.edu;mit.edu;bu.edu", "position": "PhD student;Postdoc;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nahn2023transformers,\ntitle={Transformers learn to implement preconditioned gradient descent for in-context learning},\nauthor={Kwangjun Ahn and Xiang Cheng and Hadi Daneshmand and Suvrit Sra},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=LziniAXEI9}\n}", "github": "", "project": "", "reviewers": "iy8f;vRHd;k8UL;m6sU", "pdf_size": 1179604, "rating": "6;7;7;7", "confidence": "2;4;4;4", "soundness": "3;4;4;4", "novelty": "2;3;4;4", "presentation": "3;3;4;3", "wc_summary": "53;203;184;68", "wc_strengths": "24;99;122;115", "wc_weaknesses": "127;170;216;54", "wc_questions": "53;418;5;55", "wc_limitations": "28;17;38;1", "wc_review": "285;907;565;293", "wc_reply_reviewers": "39;46;41;15", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 127.0, 67.04848991588104 ], "wc_strengths_avg": [ 90.0, 39.00640972968417 ], "wc_weaknesses_avg": [ 141.75, 59.642162100312895 ], "wc_questions_avg": [ 132.75, 165.90113772967322 ], "wc_limitations_avg": [ 21.0, 13.729530217745982 ], "wc_review_avg": [ 512.5, 254.12742866522692 ], "wc_reply_reviewers_avg": [ 35.25, 11.96609794377432 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 240, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1947267552973315838&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "mit.edu;mit.edu;mit.edu;bu.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Boston University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.bu.edu", "aff_unique_abbr": "MIT;BU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "M03sZkmJXN", "title": "Learning Complete Protein Representation by Deep Coupling of Sequence and Structure", "track": "main", "status": "Reject", "tldr": "", "abstract": "Learning effective representations is crucial for understanding proteins and their biological functions. Recent advancements in language models and graph neural networks have enabled protein models to leverage primary or tertiary structure information to learn representations. However, the lack of practical methods to deeply co-model the relationships between protein sequences and structures has led to suboptimal embeddings. In this work, we propose CoupleNet, a network that couples protein sequence and structure to obtain informative protein representations. CoupleNet incorporates multiple levels of features in proteins, including the residue identities and positions for sequences, as well as geometric representations for tertiary structures. We construct two types of graphs to model the extracted sequential features and structural geometries, achieving completeness on these graphs, respectively, and perform convolution on nodes and edges simultaneously to obtain superior embeddings. Experimental results on a range of tasks, such as protein fold classification and function prediction, demonstrate that our proposed model outperforms the state-of-the-art methods by large margins. ", "keywords": "Bioinformatics;Protein Representation Learning", "primary_area": "", "supplementary_material": "/attachment/b4830c08fb5a86668debca28eaf9f1786eb097e2.zip", "author": "Bozhen Hu;Cheng Tan;Jun Xia;Jiangbin Zheng;Yufei Huang;Lirong Wu;Yue Liu;Yongjie Xu;Stan Z. Li", "authorids": "~Bozhen_Hu1;~Cheng_Tan1;~Jun_Xia1;~Jiangbin_Zheng3;~Yufei_Huang4;~Lirong_Wu1;~Yue_Liu10;~Yongjie_Xu2;~Stan_Z._Li2", "gender": "M;M;M;M;M;;M;M;M", "homepage": ";https://chengtan9907.github.io/;http://junxia97.github.io/;;https://2021.igem.org/Team:ZJU-China;;https://yueliu1999.github.io/;;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": "279/8665;70/1533-12.html;;;68/1946-2;15/10330;74/1932-8;123/9257.html;l/StanZLi", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;6kTV6aMAAAAJ;aPKKpSYAAAAJ;;qmTjdwIAAAAJ;Tk7TrCoAAAAJ;5tfpu3MAAAAJ;https://scholar.google.com.hk/citations?user=ciG27FYAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-3428-0114;;;0000-0003-3305-0103;0009-0007-8184-4529;;;0000-0002-6045-1626;", "linkedin": ";;;;;;;;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Bozhen_Hu1;~Cheng_Tan1;~Jun_Xia1;~Jiangbin_Zheng3;~Yufei_Huang4;~Lirong_Wu1;~Yue_Liu10;~Yongjie_Xu2;~Stan_Z._Li1", "aff": "Westlake University;Zhejiang University & Westlake University;Westlake University, China;Westlake University;Zhejiang University;Westlake University;National University of Defense Technology;Westlake University;Westlake University", "aff_domain": "westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;zju.edu.cn;westlake.edu.cn;nudt.edu.cn;westlake.edu.cn;westlake.edu.cn", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;MS student;PhD student;Chair Professor", "bibtex": "@misc{\nhu2023learning,\ntitle={Learning Complete Protein Representation by Deep Coupling of Sequence and Structure},\nauthor={Bozhen Hu and Cheng Tan and Jun Xia and Jiangbin Zheng and Yufei Huang and Lirong Wu and Yue Liu and Yongjie Xu and Stan Z. Li},\nyear={2023},\nurl={https://openreview.net/forum?id=M03sZkmJXN}\n}", "github": "", "project": "", "reviewers": "rGTX;7zKz;4x6Z;S326", "site": "https://openreview.net/forum?id=M03sZkmJXN", "pdf_size": 2301651, "rating": "4;4;6;6", "confidence": "3;3;4;3", "soundness": "3;3;2;3", "novelty": "2;2;3;2", "presentation": "3;3;2;3", "wc_summary": "68;35;253;104", "wc_strengths": "129;72;51;86", "wc_weaknesses": "204;98;348;267", "wc_questions": "67;119;377;159", "wc_limitations": "1;12;94;1", "wc_review": "469;336;1123;617", "wc_reply_reviewers": "171;338;828;0", "wc_reply_authors": "309;620;1950;0", "reply_reviewers": "1;2;2;0", "reply_authors": "3;3;4;1", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 115.0, 83.3276664739869 ], "wc_strengths_avg": [ 84.5, 28.552583070538468 ], "wc_weaknesses_avg": [ 229.25, 91.36568009925827 ], "wc_questions_avg": [ 180.5, 118.04554205898671 ], "wc_limitations_avg": [ 27.0, 38.942264957241505 ], "wc_review_avg": [ 636.25, 298.08503400875395 ], "wc_reply_reviewers_avg": [ 334.25, 309.10222823525555 ], "wc_reply_authors_avg": [ 719.75, 743.3405595687618 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3953751725974470454&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;0;1;0;2;0;0", "aff_unique_norm": "Westlake University;Zhejiang University;National University of Defense Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.westlake.edu.cn;http://www.zju.edu.cn;http://www.nudt.edu.cn/", "aff_unique_abbr": "WU;ZJU;NUDT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "M1dTz6QmuM", "title": "The RL Perceptron: Generalisation Dynamics of Policy Learning in High Dimensions", "track": "main", "status": "Reject", "tldr": "", "abstract": "Reinforcement learning (RL) algorithms have proven transformative in a range of domains. To tackle real-world domains, these systems often use neural networks to learn policies directly from pixels or other high-dimensional sensory input. By contrast, much theory of RL has focused on discrete state spaces or worst-case analysis, and fundamental questions remain about the dynamics of policy learning in high-dimensional settings. Here, we propose a solvable high-dimensional model of RL that can capture a variety of learning protocols, and derive its typical dynamics as a set of closed-form ordinary differential equations (ODEs). We derive optimal schedules for the learning rates and task difficulty\u2014analogous to annealing schemes and curricula during training in RL, and show that the model exhibits rich behaviour, including delayed learning under sparse rewards; a variety of learning regimes depending on reward baselines; and a speed-accuracy trade-off driven by reward stringency. Experiments on a variant of the Procgen game \u201cBossfight\u201d also show such a speed-accuracy trade-off in practice. Together, these results take a step towards closing the gap between theory and practice in high-dimensional RL.", "keywords": "Statistical physics of learning;generalisation models;reinforcement learning;Reinforce Policy gradient", "primary_area": "", "supplementary_material": "/attachment/09e1ad84abf25b7bb6f15f5668e516a0828afde5.zip", "author": "Nishil Patel;Sebastian Lee;Stefano Sarao Mannelli;Sebastian Goldt;Andrew M Saxe", "authorids": "~Nishil_Patel1;~Sebastian_Lee1;~Stefano_Sarao_Mannelli1;~Sebastian_Goldt1;~Andrew_M_Saxe1", "gender": "M;;M;M;M", "homepage": ";https://seblee97.github.io/;https://stefsmlab.github.io/;https://datascience.sissa.it/research-unit/12/theory-of-neural-networks;https://www.saxelab.org", "dblp": ";;232/3343;234/8941;39/6894", "google_scholar": ";Vl9WQ0EAAAAJ;https://scholar.google.it/citations?user=Kq272_MAAAAJ;R06wsMkAAAAJ;h0Al1fcAAAAJ", "orcid": ";;;;0000-0002-9831-8812", "linkedin": "nishil-patel-6764021b0/;;;;", "or_profile": "~Nishil_Patel1;~Sebastian_Lee1;~Stefano_Sarao_Mannelli1;~Sebastian_Goldt1;~Andrew_M_Saxe1", "aff": "University College London, University of London;Imperial College London, Imperial College London;University College London;SISSA;University College London, University of London", "aff_domain": "ucl.ac.uk;imperial.ac.uk;ucl.ac.uk;sissa.it;ucl.ac.uk", "position": "Intern;PhD student;Postdoc;Assistant Professor;Associate Professor", "bibtex": "@misc{\npatel2023the,\ntitle={The {RL} Perceptron: Generalisation Dynamics of Policy Learning in High Dimensions},\nauthor={Nishil Patel and Sebastian Lee and Stefano Sarao Mannelli and Sebastian Goldt and Andrew M Saxe},\nyear={2023},\nurl={https://openreview.net/forum?id=M1dTz6QmuM}\n}", "github": "", "project": "", "reviewers": "n4bo;PTjw;9GSU;nMd7;vS6U;bMDj", "site": "https://openreview.net/forum?id=M1dTz6QmuM", "pdf_size": 12414285, "rating": "5;6;6;6;6;7", "confidence": "3;4;4;1;2;3", "soundness": "2;3;3;2;2;4", "novelty": "2;3;3;2;3;4", "presentation": "2;3;3;2;1;3", "wc_summary": "198;119;274;122;87;238", "wc_strengths": "282;56;221;20;77;107", "wc_weaknesses": "505;106;353;21;172;14", "wc_questions": "976;27;244;5;12;29", "wc_limitations": "98;72;163;43;10;5", "wc_review": "2059;380;1255;211;358;393", "wc_reply_reviewers": "588;43;345;0;323;0", "wc_reply_authors": "1398;189;484;0;0;0", "reply_reviewers": "4;1;1;0;1;0", "reply_authors": "5;2;2;1;1;1", "rating_avg": [ 6.0, 0.5773502691896257 ], "confidence_avg": [ 2.8333333333333335, 1.0671873729054748 ], "soundness_avg": [ 2.6666666666666665, 0.7453559924999298 ], "novelty_avg": [ 2.8333333333333335, 0.6871842709362768 ], "presentation_avg": [ 2.3333333333333335, 0.7453559924999299 ], "wc_summary_avg": [ 173.0, 68.26907547833548 ], "wc_strengths_avg": [ 127.16666666666667, 93.3191953577731 ], "wc_weaknesses_avg": [ 195.16666666666666, 179.14185874762924 ], "wc_questions_avg": [ 215.5, 350.0498773984454 ], "wc_limitations_avg": [ 65.16666666666667, 54.54483375067605 ], "wc_review_avg": [ 776.0, 667.4558662063982 ], "wc_reply_reviewers_avg": [ 216.5, 219.73980825816093 ], "wc_reply_authors_avg": [ 345.1666666666667, 501.58761835684186 ], "reply_reviewers_avg": [ 1.1666666666666667, 1.3437096247164249 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14712874366052554209&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "University College London;Imperial College London;Scuola Internazionale Superiore di Studi Avanzati", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.imperial.ac.uk;https://www.sissa.it", "aff_unique_abbr": "UCL;ICL;SISSA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United Kingdom;Italy" }, { "title": "H-nobs: Achieving Certified Fairness and Robustness in Distributed Learning on Heterogeneous Datasets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71930", "id": "M4h1UAxI3b", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6ad5d39b10e37915d7dfda2893d8e924-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=M4h1UAxI3b", "openreview": "https://openreview.net/forum?id=M4h1UAxI3b", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71930", "video": "https://nips.cc/virtual/2023/poster/71930", "author_site": "Guanqiang Zhou, Ping Xu, Ping Xu, Yue Wang, Zhi Tian", "tldr": "", "abstract": "Fairness and robustness are two important goals in the design of modern distributed learning systems. Despite a few prior works attempting to achieve both fairness and robustness, some key aspects of this direction remain underexplored. In this paper, we try to answer three largely unnoticed and unaddressed questions that are of paramount significance to this topic: (i) What makes jointly satisfying fairness and robustness difficult? (ii) Is it possible to establish theoretical guarantee for the dual property of fairness and robustness? (iii) How much does fairness have to sacrifice at the expense of robustness being incorporated into the system? To address these questions, we first identify data heterogeneity as the key difficulty of combining fairness and robustness. Accordingly, we propose a fair and robust framework called H-nobs which can offer certified fairness and robustness through the adoption of two key components, a fairness-promoting objective function and a simple robust aggregation scheme called norm-based screening (NBS). We explain in detail why NBS is the suitable scheme in our algorithm in contrast to other robust aggregation measures. In addition, we derive three convergence theorems for H-nobs in cases of the learning model being nonconvex, convex, and strongly convex respectively, which provide theoretical guarantees for both fairness and robustness. Further, we empirically investigate the influence of the robust mechanism (NBS) on the fairness performance of H-nobs, the very first attempt of such exploration.", "keywords": "distributed learning;federated learning;fairness;robustness;Byzantine attack;norm-based screening;q-FFL;optimization;convergence analysis", "primary_area": "", "supplementary_material": "", "author": "Guanqiang Zhou;Ping Xu;Yue Wang;Zhi Tian", "authorids": "~Guanqiang_Zhou1;~Ping_Xu3;~Yue_Wang22;~Zhi_Tian1", "gender": "M;F;M;", "homepage": "http://mason.gmu.edu/~gzhou4/;https://sites.google.com/view/ping-xu-utrgv;http://mason.gmu.edu/~ywang56/;", "dblp": ";75/6813;;", "google_scholar": ";jipCnYIAAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Guanqiang_Zhou1;~Ping_Xu3;~Yue_Wang22;~Zhi_Tian1", "aff": "George Mason University;George Mason University;George Mason University;", "aff_domain": "gmu.edu;gmu.edu;gmu.edu;", "position": "PhD student;Postdoc;Assistant Professor;", "bibtex": "@inproceedings{\nzhou2023hnobs,\ntitle={H-nobs: Achieving Certified Fairness and Robustness in Distributed Learning on Heterogeneous Datasets},\nauthor={Guanqiang Zhou and Ping Xu and Yue Wang and Zhi Tian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=M4h1UAxI3b}\n}", "github": "", "project": "", "reviewers": "9BqC;hrXW;waox;fgN6", "pdf_size": 289837, "rating": "4;5;6;6", "confidence": "3;4;3;3", "soundness": "3;1;4;3", "novelty": "2;2;3;2", "presentation": "2;2;4;3", "wc_summary": "50;47;81;111", "wc_strengths": "59;46;58;60", "wc_weaknesses": "127;142;78;52", "wc_questions": "7;129;13;64", "wc_limitations": "7;74;39;23", "wc_review": "250;438;269;310", "wc_reply_reviewers": "0;595;21;76", "wc_reply_authors": "0;794;0;0", "reply_reviewers": "0;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 72.25, 26.03243169586737 ], "wc_strengths_avg": [ 55.75, 5.673402858955108 ], "wc_weaknesses_avg": [ 99.75, 36.334384541367974 ], "wc_questions_avg": [ 53.25, 49.02231634674151 ], "wc_limitations_avg": [ 35.75, 24.81305100143874 ], "wc_review_avg": [ 316.75, 73.28497458551787 ], "wc_reply_reviewers_avg": [ 173.0, 245.21725061667257 ], "wc_reply_authors_avg": [ 198.5, 343.81208530242213 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7503341773301709669&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "gmu.edu;gmu.edu;gmu.edu;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "George Mason University", "aff_unique_dep": "", "aff_unique_url": "https://www.gmu.edu", "aff_unique_abbr": "GMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Language Models can Solve Computer Tasks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71929", "id": "M6OmjAZ4CX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7cc1005ec73cfbaac9fa21192b622507-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=M6OmjAZ4CX", "openreview": "https://openreview.net/forum?id=M6OmjAZ4CX", "poster": "/media/PosterPDFs/NeurIPS%202023/71929.png?t=1699651158.6059968", "slides": "https://nips.cc/virtual/2023/poster/71929", "video": "https://nips.cc/virtual/2023/poster/71929", "author_site": "Geunwoo Kim, Pierre Baldi, Stephen McAleer", "tldr": "", "abstract": "Agents capable of carrying out general tasks on a computer can improve efficiency and productivity by automating repetitive tasks and assisting in complex problem-solving. Ideally, such agents should be able to solve new computer tasks presented to them through natural language commands. However, previous approaches to this problem require large amounts of expert demonstrations and task-specific reward functions, both of which are impractical for new tasks. In this work, we show that a pre-trained large language model (LLM) agent can execute computer tasks guided by natural language using a simple prompting scheme where the agent \\textbf{R}ecursively \\textbf{C}riticizes and \\textbf{I}mproves its output (RCI). The RCI approach significantly outperforms existing LLM methods for automating computer tasks and surpasses supervised learning (SL) and reinforcement learning (RL) approaches on the MiniWoB++ benchmark. \nWe compare multiple LLMs and find that RCI with the InstructGPT-3+RLHF LLM is state-of-the-art on MiniWoB++, using only a handful of demonstrations per task rather than tens of thousands, and without a task-specific reward function. Furthermore, we demonstrate RCI prompting's effectiveness in enhancing LLMs' reasoning abilities on a suite of natural language reasoning tasks, outperforming chain of thought (CoT) prompting with external feedback. We find that RCI combined with CoT performs better than either separately. Our code can be found here: https://github.com/posgnu/rci-agent.", "keywords": "Large Language models;Web Navigation;Foundation Models;Decision Making", "primary_area": "", "supplementary_material": "", "author": "Geunwoo Kim;Pierre Baldi;Stephen Marcus McAleer", "authorids": "~Geunwoo_Kim1;~Pierre_Baldi1;~Stephen_Marcus_McAleer1", "gender": ";;M", "homepage": "https://posgnu.github.io/;;https://www.andrew.cmu.edu/user/smcaleer/", "dblp": "325/0894;;", "google_scholar": "BXN3bgQAAAAJ;;iEFL4-YAAAAJ", "orcid": ";;", "linkedin": ";;stephen-mcaleer/", "or_profile": "~Geunwoo_Kim1;~Pierre_Baldi1;~Stephen_Marcus_McAleer1", "aff": "University of California, Irvine;;Carnegie Mellon University", "aff_domain": "uci.edu;;cmu.edu", "position": "PhD student;;Postdoc", "bibtex": "@inproceedings{\nkim2023language,\ntitle={Language Models can Solve Computer Tasks},\nauthor={Geunwoo Kim and Pierre Baldi and Stephen Marcus McAleer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=M6OmjAZ4CX}\n}", "github": "", "project": "", "reviewers": "s5S9;qKku;6D7X;kDG8", "pdf_size": 1710399, "rating": "6;6;6;6", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "73;120;102;174", "wc_strengths": "73;88;47;102", "wc_weaknesses": "57;164;154;139", "wc_questions": "17;209;23;134", "wc_limitations": "10;7;19;26", "wc_review": "230;588;345;575", "wc_reply_reviewers": "21;45;62;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 117.25, 36.80607966083864 ], "wc_strengths_avg": [ 77.5, 20.377683872314833 ], "wc_weaknesses_avg": [ 128.5, 42.228544848242166 ], "wc_questions_avg": [ 95.75, 80.28503907951966 ], "wc_limitations_avg": [ 15.5, 7.5 ], "wc_review_avg": [ 434.5, 152.58849891128753 ], "wc_reply_reviewers_avg": [ 34.5, 20.303940504246953 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 374, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2552584892380879541&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "uci.edu;;cmu.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of California, Irvine;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uci.edu;https://www.cmu.edu", "aff_unique_abbr": "UCI;CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Irvine;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Multiply Robust Federated Estimation of Targeted Average Treatment Effects", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71928", "id": "M6UccKMFGl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/def4492b32f0248a0e4d92cc46bbdaad-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=M6UccKMFGl", "openreview": "https://openreview.net/forum?id=M6UccKMFGl", "poster": "/media/PosterPDFs/NeurIPS%202023/71928.png?t=1701325619.139919", "slides": "https://nips.cc/virtual/2023/poster/71928", "video": "https://nips.cc/virtual/2023/poster/71928", "author_site": "Larry Han, Zhu Shen, Jose Zubizarreta", "tldr": "", "abstract": "Federated or multi-site studies have distinct advantages over single-site studies, including increased generalizability, the ability to study underrepresented populations, and the opportunity to study rare exposures and outcomes. However, these studies are complicated by the need to preserve the privacy of each individual's data, heterogeneity in their covariate distributions, and different data structures between sites. We propose a novel federated approach to derive valid causal inferences for a target population using multi-site data. We adjust for covariate shift and accommodate covariate mismatch between sites by developing a multiply-robust and privacy-preserving nuisance function estimation approach. Our methodology incorporates transfer learning to estimate ensemble weights to combine information from source sites. We show that these learned weights are efficient and optimal under different scenarios. We showcase the finite sample advantages of our approach in terms of efficiency and robustness compared to existing state-of-the-art approaches. We apply our approach to study the treatment effect of percutaneous coronary intervention (PCI) on the duration of hospitalization for patients experiencing acute myocardial infarction (AMI) with data from the Centers for Medicare \\& Medicaid Services (CMS).", "keywords": "Causal inference;Covariate mismatch;Federated learning;Multiple robustness;Transportation", "primary_area": "", "supplementary_material": "", "author": "Larry Han;Zhu Shen;Jose R Zubizarreta", "authorids": "~Larry_Han1;~Zhu_Shen1;~Jose_R_Zubizarreta1", "gender": "M;F;", "homepage": "https://larrylehan.github.io/;;https://hcp.hms.harvard.edu/people/jose-r-zubizarreta", "dblp": ";249/0687;", "google_scholar": "iZnTmxMAAAAJ;FXxEo5gAAAAJ;", "orcid": "0000-0002-0577-9661;0000-0003-4564-5438;", "linkedin": ";;", "or_profile": "~Larry_Han1;~Zhu_Shen1;~Jose_R_Zubizarreta1", "aff": "Harvard University;Harvard University, Harvard University;Harvard University", "aff_domain": "g.harvard.edu;g.harvard.edu;harvard.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nhan2023multiply,\ntitle={Multiply Robust Federated Estimation of Targeted Average Treatment Effects},\nauthor={Larry Han and Zhu Shen and Jose R Zubizarreta},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=M6UccKMFGl}\n}", "github": "", "project": "", "reviewers": "LaCg;hesp;7WXj;TqVy;27aW", "pdf_size": 1215100, "rating": "4;5;5;6;7", "confidence": "4;4;3;4;3", "soundness": "3;3;3;4;3", "novelty": "2;3;3;3;3", "presentation": "2;2;2;4;4", "wc_summary": "57;72;151;43;65", "wc_strengths": "66;175;51;34;108", "wc_weaknesses": "380;147;566;218;70", "wc_questions": "155;156;203;5;21", "wc_limitations": "1;48;44;1;17", "wc_review": "659;598;1015;301;281", "wc_reply_reviewers": "48;308;242;15;37", "wc_reply_authors": "203;1292;234;668;19", "reply_reviewers": "1;2;2;1;1", "reply_authors": "2;3;3;3;2", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 77.6, 37.94522367834982 ], "wc_strengths_avg": [ 86.8, 50.45948870133347 ], "wc_weaknesses_avg": [ 276.2, 177.40169108551362 ], "wc_questions_avg": [ 108.0, 79.64420882901656 ], "wc_limitations_avg": [ 22.2, 20.33125672456083 ], "wc_review_avg": [ 570.8, 269.2986446308262 ], "wc_reply_reviewers_avg": [ 130.0, 120.68637039864942 ], "wc_reply_authors_avg": [ 483.2, 457.03452823610604 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.6, 0.4898979485566356 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4803844614152616, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1358371690463322733&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "g.harvard.edu;g.harvard.edu;harvard.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Enhancing Minority Classes by Mixing: An Adaptative Optimal Transport Approach for Long-tailed Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71927", "id": "M7FQpIdo0X", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bdabb5d4262bcfb6a1d529d690a6c82b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=M7FQpIdo0X", "openreview": "https://openreview.net/forum?id=M7FQpIdo0X", "poster": "/media/PosterPDFs/NeurIPS%202023/71927.png?t=1698501204.6261795", "slides": "https://nips.cc/virtual/2023/poster/71927", "video": "https://nips.cc/virtual/2023/poster/71927", "author_site": "Jintong Gao, He Zhao, Zhuo Li, Dandan Guo", "tldr": "", "abstract": "Real-world data usually confronts severe class-imbalance problems, where several majority classes have a significantly larger presence in the training set than minority classes. One effective solution is using mixup-based methods to generate synthetic samples to enhance the presence of minority classes. Previous approaches mix the background images from the majority classes and foreground images from the\nminority classes in a random manner, which ignores the sample-level semantic similarity, possibly resulting in less reasonable or less useful images. In this work, we propose an adaptive image-mixing method based on optimal transport (OT) to incorporate both class-level and sample-level information, which is able to generate semantically reasonable and meaningful mixed images for minority classes. Due to\nits flexibility, our method can be combined with existing long-tailed classification methods to enhance their performance and it can also serve as a general data augmentation method for balanced datasets. Extensive experiments indicate that our method achieves effective performance for long-tailed classification tasks. The code is available at https://github.com/JintongGao/Enhancing-Minority-Classes-by-Mixing.", "keywords": "Long-tailed Classification;Optimal Transport;Image-mixing;Semantic Similarity", "primary_area": "", "supplementary_material": "/attachment/545c84c09c8d3debd8e713162704cb8c1713b4cc.pdf", "author": "Jintong Gao;He Zhao;Zhuo Li;Dan dan Guo", "authorids": "~Jintong_Gao2;~He_Zhao1;~Zhuo_Li5;~Dan_dan_Guo1", "gender": "F;;M;F", "homepage": "https://jintonggao.github.io/gaojt.github.io/;;;https://github.com/Dan123dan", "dblp": "369/7719;;;121/1618", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=QLOY4JkAAAAJ", "orcid": "0000-0002-6832-2618;;0009-0000-6451-4877;", "linkedin": ";;;", "or_profile": "~Jintong_Gao2;~He_Zhao1;~Zhuo_Li5;~Dan_dan_Guo1", "aff": "Jilin University;;The Chinese University of Hong Kong, Shenzhen;Jilin University", "aff_domain": "jlu.edu.cn;;link.cuhk.edu.cn;jlu.edu.cn", "position": "MS student;;PhD student;Lecturer", "bibtex": "@inproceedings{\ngao2023enhancing,\ntitle={Enhancing Minority Classes by Mixing: An Adaptative Optimal Transport Approach for Long-tailed Classification},\nauthor={Jintong Gao and He Zhao and Zhuo Li and Dan dan Guo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=M7FQpIdo0X}\n}", "github": "", "project": "", "reviewers": "mXEg;HKTJ;YYXw;ARR2", "pdf_size": 13310086, "rating": "5;5;6;7", "confidence": "5;4;4;5", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "76;95;80;92", "wc_strengths": "61;98;37;38", "wc_weaknesses": "81;210;184;9", "wc_questions": "5;102;41;42", "wc_limitations": "0;2;38;1", "wc_review": "223;507;380;182", "wc_reply_reviewers": "18;111;158;0", "wc_reply_authors": "58;106;29;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 85.75, 7.949056547792323 ], "wc_strengths_avg": [ 58.5, 24.743686063317245 ], "wc_weaknesses_avg": [ 121.0, 80.67527502277262 ], "wc_questions_avg": [ 47.5, 34.81738071710737 ], "wc_limitations_avg": [ 10.25, 16.037066439969625 ], "wc_review_avg": [ 323.0, 129.40826866935512 ], "wc_reply_reviewers_avg": [ 71.75, 65.22413280374067 ], "wc_reply_authors_avg": [ 48.25, 39.14316670889058 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11975055707218946288&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "jlu.edu.cn;;link.cuhk.edu.cn;jlu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Jilin University;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.jlu.edu.cn;https://www.cuhk.edu.cn", "aff_unique_abbr": "JLU;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Feature Dropout: Revisiting the Role of Augmentations in Contrastive Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71926", "id": "M7hijAPA4B", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c290d4373c495b2cad0625d6288260f0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=M7hijAPA4B", "openreview": "https://openreview.net/forum?id=M7hijAPA4B", "poster": "/media/PosterPDFs/NeurIPS%202023/71926.png?t=1702317451.8773649", "slides": "https://nips.cc/virtual/2023/poster/71926", "video": "https://nips.cc/virtual/2023/poster/71926", "author_site": "Alex Tamkin, Margalit Glasgow, Xiluo He, Noah Goodman", "tldr": "", "abstract": "What role do augmentations play in contrastive learning? Recent work suggests that good augmentations are label-preserving with respect to a specific downstream task. We complicate this picture by showing that label-destroying augmentations can be useful in the foundation model setting, where the goal is to learn diverse, general-purpose representations for multiple downstream tasks. We perform contrastive learning experiments on a range of image and audio datasets with multiple downstream tasks (e.g. for digits superimposed on photographs, predicting the class of one vs. the other). We find that Viewmaker Networks, a recently proposed model for learning augmentations for contrastive learning, produce label-destroying augmentations that stochastically destroy features needed for different downstream tasks. These augmentations are interpretable (e.g. altering shapes, digits, or letters added to images) and surprisingly often result in better performance compared to expert-designed augmentations, despite not preserving label information. To support our empirical results, we theoretically analyze a simple contrastive learning setting with a linear model. In this setting, label-destroying augmentations are crucial for preventing one set of features from suppressing the learning of features useful for another downstream task. Our results highlight the need for analyzing the interaction between multiple downstream tasks when trying to explain the success of foundation models.", "keywords": "self-supervised learning;contrastive learning", "primary_area": "", "supplementary_material": "/attachment/4aebea6129c7a9e56a46f11be31898d468999a66.zip", "author": "Alex Tamkin;Margalit Glasgow;Xiluo He;Noah Goodman", "authorids": "~Alex_Tamkin1;~Margalit_Glasgow1;~Xiluo_He1;~Noah_Goodman1", "gender": ";F;M;", "homepage": ";https://margalitglasgow.github.io/;;https://cocolab.stanford.edu/", "dblp": ";268/0063;;96/1216", "google_scholar": ";ErDOPbEAAAAJ;;OUpIbcQAAAAJ", "orcid": ";;;", "linkedin": ";;xiluohe/;", "or_profile": "~Alex_Tamkin1;~Margalit_Glasgow1;~Xiluo_He1;~Noah_Goodman1", "aff": ";Stanford University;Stanford University;Stanford University", "aff_domain": ";stanford.edu;stanford.edu;stanford.edu", "position": ";PhD student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\ntamkin2023feature,\ntitle={Feature Dropout: Revisiting the Role of Augmentations in Contrastive Learning},\nauthor={Alex Tamkin and Margalit Glasgow and Xiluo He and Noah Goodman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=M7hijAPA4B}\n}", "github": "", "project": "", "reviewers": "1FJs;jcen;eJFh", "pdf_size": 2323035, "rating": "3;6;6", "confidence": "5;3;4", "soundness": "3;2;3", "novelty": "1;3;2", "presentation": "3;2;2", "wc_summary": "103;152;134", "wc_strengths": "70;76;51", "wc_weaknesses": "342;110;1044", "wc_questions": "95;220;71", "wc_limitations": "4;23;18", "wc_review": "614;581;1318", "wc_reply_reviewers": "0;52;123", "wc_reply_authors": "0;6;46", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 129.66666666666666, 20.237478982214054 ], "wc_strengths_avg": [ 65.66666666666667, 10.656244908763854 ], "wc_weaknesses_avg": [ 498.6666666666667, 397.0703827898637 ], "wc_questions_avg": [ 128.66666666666666, 65.32142748661337 ], "wc_limitations_avg": [ 15.0, 8.04155872120988 ], "wc_review_avg": [ 837.6666666666666, 339.9140414215858 ], "wc_reply_reviewers_avg": [ 58.333333333333336, 50.41384289612879 ], "wc_reply_authors_avg": [ 17.333333333333332, 20.417857108151406 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844387, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4532310730447598&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";stanford.edu;stanford.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Geometric Algebra Transformer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71925", "id": "M7r2CO4tJC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6f6dd92b03ff9be7468a6104611c9187-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=M7r2CO4tJC", "openreview": "https://openreview.net/forum?id=M7r2CO4tJC", "poster": "/media/PosterPDFs/NeurIPS%202023/71925.png?t=1701730524.7205517", "slides": "https://nips.cc/virtual/2023/poster/71925", "video": "https://nips.cc/virtual/2023/poster/71925", "author_site": "Johann Brehmer, Pim de Haan, S\u00f6nke Behrends, Taco Cohen", "tldr": "", "abstract": "Problems involving geometric data arise in physics, chemistry, robotics, computer vision, and many other fields. Such data can take numerous forms, for instance points, direction vectors, translations, or rotations, but to date there is no single architecture that can be applied to such a wide variety of geometric types while respecting their symmetries. In this paper we introduce the Geometric Algebra Transformer (GATr), a general-purpose architecture for geometric data. GATr represents inputs, outputs, and hidden states in the projective geometric (or Clifford) algebra, which offers an efficient 16-dimensional vector-space representation of common geometric objects as well as operators acting on them. GATr is equivariant with respect to E(3), the symmetry group of 3D Euclidean space. As a Transformer, GATr is versatile, efficient, and scalable. We demonstrate GATr in problems from n-body modeling to wall-shear-stress estimation on large arterial meshes to robotic motion planning. GATr consistently outperforms both non-geometric and equivariant baselines in terms of error, data efficiency, and scalability.", "keywords": "Geometry;geometric algebra;equivariance;transformer", "primary_area": "", "supplementary_material": "", "author": "Johann Brehmer;Pim De Haan;S\u00f6nke Behrends;Taco Cohen", "authorids": "~Johann_Brehmer1;~Pim_De_Haan1;sbehrend@qti.qualcomm.com;~Taco_Cohen1", "gender": "M;M;;M", "homepage": "https://johannbrehmer.github.io;https://pimdehaan.com;;http://www.ta.co.nl", "dblp": "220/5763;;;142/2903", "google_scholar": "ZdUMvCsAAAAJ;AZeK-REAAAAJ;;a3q4YxEAAAAJ", "orcid": "0000-0003-3344-4209;;;", "linkedin": "johannbrehmer;https://nl.linkedin.com/in/pim-de-haan;;", "or_profile": "~Johann_Brehmer1;~Pim_De_Haan1;sbehrend@qti.qualcomm.com;~Taco_Cohen1", "aff": "Qualcomm AI Research;Qualcomm;;Qualcomm Inc, QualComm", "aff_domain": "qualcomm.com;qualcomm.com;;qti.qualcomm.com", "position": "Researcher;Researcher;;Principal Researcher", "bibtex": "@inproceedings{\nbrehmer2023geometric,\ntitle={Geometric Algebra Transformer},\nauthor={Johann Brehmer and Pim De Haan and S{\\\"o}nke Behrends and Taco Cohen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=M7r2CO4tJC}\n}", "github": "", "project": "", "reviewers": "qqQC;wqKZ;8oHp;yyFc", "pdf_size": 918766, "rating": "6;6;6;8", "confidence": "2;2;3;3", "soundness": "3;3;3;4", "novelty": "3;3;2;4", "presentation": "2;3;3;3", "wc_summary": "50;112;193;71", "wc_strengths": "24;73;159;166", "wc_weaknesses": "33;115;332;119", "wc_questions": "35;89;41;77", "wc_limitations": "25;177;1;8", "wc_review": "167;566;726;441", "wc_reply_reviewers": "0;156;75;0", "wc_reply_authors": "0;271;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 106.5, 54.69232121605372 ], "wc_strengths_avg": [ 105.5, 59.62591718372138 ], "wc_weaknesses_avg": [ 149.75, 110.67830636579149 ], "wc_questions_avg": [ 60.5, 22.994564575133836 ], "wc_limitations_avg": [ 52.75, 72.2647043860279 ], "wc_review_avg": [ 475.0, 204.5128357829894 ], "wc_reply_reviewers_avg": [ 57.75, 64.46074386787667 ], "wc_reply_authors_avg": [ 67.75, 117.34644221279143 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7123316817051507058&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "qualcomm.com;qualcomm.com;;qti.qualcomm.com", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Qualcomm;Qualcomm Incorporated", "aff_unique_dep": "Qualcomm AI Research;", "aff_unique_url": "https://www.qualcomm.com/research;https://www.qualcomm.com", "aff_unique_abbr": "QAI;Qualcomm", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Fast Rank-1 Lattice Targeted Sampling for Black-box Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71924", "id": "M8CYKLHoEN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/218d0323ce235090b43a1166159ee328-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=M8CYKLHoEN", "openreview": "https://openreview.net/forum?id=M8CYKLHoEN", "poster": "/media/PosterPDFs/NeurIPS%202023/71924.png?t=1701144652.2631862", "slides": "https://nips.cc/virtual/2023/poster/71924", "video": "https://nips.cc/virtual/2023/poster/71924", "author_site": "Yueming LYU", "tldr": "", "abstract": "Black-box optimization has gained great attention for its success in recent applications. However, scaling up to high-dimensional problems with good query efficiency remains challenging. This paper proposes a novel Rank-1 Lattice Targeted Sampling (RLTS) technique to address this issue. Our RLTS benefits from random rank-1 lattice Quasi-Monte Carlo, which enables us to perform fast local exact Gaussian processes (GP) training and inference with $O(n \\log n)$ complexity w.r.t. $n$ batch samples. Furthermore, we developed a fast coordinate searching method with $O(n \\log n)$ time complexity for fast targeted sampling. The fast computation enables us to plug our RLTS into the sampling phase of stochastic optimization methods. This improves the query efficiency while scaling up to higher dimensional problems than Bayesian optimization. Moreover, to construct rank-1 lattices efficiently, we proposed a closed-form construction. Extensive experiments on challenging benchmark test functions and black-box prompt fine-tuning for large language models demonstrate the query efficiency of our RLTS technique.", "keywords": "Black-box Optimization;Derivate-free Optimization;Kernel methods", "primary_area": "", "supplementary_material": "/attachment/f922245d1e9788ce729213fe6f526884f9a26051.pdf", "author": "Yueming Lyu", "authorids": "~Yueming_Lyu1", "gender": "M", "homepage": "https://yueminglyu.github.io/", "dblp": "", "google_scholar": "uQXB6-oAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Yueming_Lyu1", "aff": "Agency for Science, Technology and Research (A*STAR)", "aff_domain": "astar.edu.sg", "position": "Researcher", "bibtex": "@inproceedings{\nlyu2023fast,\ntitle={Fast Rank-1 Lattice Targeted Sampling for Black-box Optimization},\nauthor={Yueming Lyu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=M8CYKLHoEN}\n}", "github": "", "project": "", "reviewers": "RbGC;AfkS;RzD5;8XeQ", "pdf_size": 1510338, "rating": "5;5;6;6", "confidence": "4;3;3;1", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "3;2;2;3", "wc_summary": "50;75;78;91", "wc_strengths": "38;62;94;79", "wc_weaknesses": "133;99;441;74", "wc_questions": "3;145;291;4", "wc_limitations": "1;6;1;34", "wc_review": "225;387;905;282", "wc_reply_reviewers": "0;0;207;7", "wc_reply_authors": "0;0;564;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 73.5, 14.84082207965583 ], "wc_strengths_avg": [ 68.25, 20.813156896540228 ], "wc_weaknesses_avg": [ 186.75, 148.27740050324593 ], "wc_questions_avg": [ 110.75, 119.02599505990277 ], "wc_limitations_avg": [ 10.5, 13.720422734012244 ], "wc_review_avg": [ 449.75, 269.1852289781146 ], "wc_reply_reviewers_avg": [ 53.5, 88.66932953394877 ], "wc_reply_authors_avg": [ 141.0, 244.2191638672117 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11253143286959784178&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "astar.edu.sg", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Agency for Science, Technology and Research", "aff_unique_dep": "", "aff_unique_url": "https://www.a-star.edu.sg", "aff_unique_abbr": "A*STAR", "aff_country_unique_index": "0", "aff_country_unique": "Singapore" }, { "title": "Testing the General Deductive Reasoning Capacity of Large Language Models Using OOD Examples", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71923", "id": "MCVfX7HgPO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/09425891e393e64b0535194a81ba15b7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MCVfX7HgPO", "openreview": "https://openreview.net/forum?id=MCVfX7HgPO", "poster": "/media/PosterPDFs/NeurIPS%202023/71923.png?t=1702192637.0153663", "slides": "https://nips.cc/virtual/2023/poster/71923", "video": "https://nips.cc/virtual/2023/poster/71923", "author_site": "Abulhair Saparov, Richard Yuanzhe Pang, Vishakh Padmakumar, Nitish Joshi, Mehran Kazemi, Najoung Kim, He He", "tldr": "", "abstract": "Given the intractably large size of the space of proofs, any model that is capable of general deductive reasoning must generalize to proofs of greater complexity. Recent studies have shown that large language models (LLMs) possess some abstract deductive reasoning ability given chain-of-thought prompts. However, they have primarily been tested on proofs using modus ponens or of a specific size, and from the same distribution as the in-context examples. To measure the general deductive reasoning ability of LLMs, we test on a broad set of deduction rules and measure their ability to generalize to more complex proofs from simpler demonstrations from multiple angles: depth-, width-, and compositional generalization. To facilitate systematic exploration, we construct a new synthetic and programmable reasoning dataset that enables control over deduction rules and proof complexity. Our experiments on four LLMs of various sizes and training objectives show that they are able to generalize to compositional proofs. However, they have difficulty generalizing to longer proofs, and they require explicit demonstrations to produce hypothetical subproofs, specifically in proof by cases and proof by contradiction.", "keywords": "large language models;reasoning;out-of-distribution generalization;chain-of-thought;in-context learning", "primary_area": "", "supplementary_material": "/attachment/53b51538e1463a08b318cb87b69414a9d2a4c5c1.zip", "author": "Abulhair Saparov;Richard Yuanzhe Pang;Vishakh Padmakumar;Nitish Joshi;Mehran Kazemi;Najoung Kim;He He", "authorids": "~Abulhair_Saparov1;~Richard_Yuanzhe_Pang1;~Vishakh_Padmakumar1;~Nitish_Joshi1;~Mehran_Kazemi1;najoung@bu.edu;~He_He2", "gender": "M;M;;M;;;", "homepage": "http://asaparov.org;https://yzpang.me;https://vishakhpk.github.io/;https://joshinh.github.io;;;", "dblp": "117/6287;250/9059;285/5184;242/7973;;;", "google_scholar": "TVNS71sAAAAJ;https://scholar.google.com/citations?hl=en;OeBKZ8AAAAAJ;;;;", "orcid": ";;0000-0002-3396-3589;;;;", "linkedin": ";yuanzhe-richard-pang/;;;;;", "or_profile": "~Abulhair_Saparov1;~Richard_Yuanzhe_Pang1;~Vishakh_Padmakumar1;~Nitish_Joshi1;~Mehran_Kazemi1;najoung@bu.edu;~He_He2", "aff": ";New York University;New York University;New York University;;;", "aff_domain": ";nyu.edu;nyu.edu;nyu.edu;;;", "position": ";PhD student;PhD student;PhD student;;;", "bibtex": "@inproceedings{\nsaparov2023testing,\ntitle={Testing the General Deductive Reasoning Capacity of Large Language Models Using {OOD} Examples},\nauthor={Abulhair Saparov and Richard Yuanzhe Pang and Vishakh Padmakumar and Nitish Joshi and Mehran Kazemi and Najoung Kim and He He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MCVfX7HgPO}\n}", "github": "", "project": "", "reviewers": "bcxG;w9uz;CjCV;ReKG", "pdf_size": 668100, "rating": "6;6;7;7", "confidence": "5;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "48;47;46;66", "wc_strengths": "26;34;153;94", "wc_weaknesses": "341;273;143;173", "wc_questions": "8;5;21;85", "wc_limitations": "17;1;19;2", "wc_review": "440;360;382;420", "wc_reply_reviewers": "49;57;0;57", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 51.75, 8.257572282456872 ], "wc_strengths_avg": [ 76.75, 51.27072751580574 ], "wc_weaknesses_avg": [ 232.5, 78.99841770567306 ], "wc_questions_avg": [ 29.75, 32.460552983583014 ], "wc_limitations_avg": [ 9.75, 8.287792227125388 ], "wc_review_avg": [ 400.5, 31.316928329579195 ], "wc_reply_reviewers_avg": [ 40.75, 23.75263143316967 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 79, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14248154820969156702&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": ";nyu.edu;nyu.edu;nyu.edu;;;", "author_num": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Adversarial Attacks on Online Learning to Rank with Click Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71922", "id": "MCj7DLkYqS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/820e42f39773c6cbbd875553db45658f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MCj7DLkYqS", "openreview": "https://openreview.net/forum?id=MCj7DLkYqS", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71922", "video": "https://nips.cc/virtual/2023/poster/71922", "author_site": "Jinhang Zuo, Zhiyao Zhang, Zhiyong Wang, Shuai Li, Mohammad Hajiesmaili, Adam Wierman", "tldr": "", "abstract": "Online learning to rank (OLTR) is a sequential decision-making problem where a learning agent selects an ordered list of items and receives feedback through user clicks. Although potential attacks against OLTR algorithms may cause serious losses in real-world applications, there is limited knowledge about adversarial attacks on OLTR. This paper studies attack strategies against multiple variants of OLTR. Our first result provides an attack strategy against the UCB algorithm on classical stochastic bandits with binary feedback, which solves the key issues caused by bounded and discrete feedback that previous works cannot handle. Building on this result, we design attack algorithms against UCB-based OLTR algorithms in position-based and cascade models. Finally, we propose a general attack strategy against any algorithm under the general click model. Each attack algorithm manipulates the learning agent into choosing the target attack item $T-o(T)$ times, incurring a cumulative cost of $o(T)$. Experiments on synthetic and real data further validate the effectiveness of our proposed attack algorithms.", "keywords": "online learning to rank;adversarial attack;click model", "primary_area": "", "supplementary_material": "/attachment/f2c3b84f7d75842f80f5450769a979c3cc3bb5c0.zip", "author": "Jinhang Zuo;Zhiyao Zhang;Zhiyong Wang;Shuai Li;Mohammad Hajiesmaili;Adam Wierman", "authorids": "~Jinhang_Zuo1;~Zhiyao_Zhang2;~Zhiyong_Wang9;~Shuai_Li3;~Mohammad_Hajiesmaili1;~Adam_Wierman1", "gender": "M;M;M;F;M;M", "homepage": "https://jhzuo.github.io;https://scholar.google.com/citations?user=uiGWW7kAAAAJ&hl=en;https://zhiyongwangwzy.github.io/;http://shuaili8.github.io;https://groups.cs.umass.edu/hajiesmaili/;https://adamwierman.com/", "dblp": "179/8179;;;57/2281-10;49/7911;56/4447", "google_scholar": "W3YHD10AAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=kMZgQxcAAAAJ;XCGuYKIAAAAJ;4OvOdSgAAAAJ", "orcid": "0000-0002-9557-3551;;;;;0000-0002-5923-0199", "linkedin": ";zhiyao-zhang-b09392335/;zhiyong-wang-a44aaa1a3/;;;adam-wierman-a529474/", "or_profile": "~Jinhang_Zuo1;~Zhiyao_Zhang2;~Zhiyong_Wang9;~Shuai_Li3;~Mohammad_Hajiesmaili1;~Adam_Wierman1", "aff": "California Institute of Technology;Southeast University;Department of Computer Science and Engineering, The Chinese University of Hong Kong;John Hopcroft Center, Shanghai Jiao Tong University;College of Information and Computer Science, University of Massachusetts, Amherst;California Institute of Technology", "aff_domain": "caltech.edu;seu.edu.cn;cse.cuhk.edu.hk;sjtu.edu.cn;cics.umass.edu;caltech.edu", "position": "Postdoc;Undergrad student;PhD student;Assistant Professor;Assistant Professor;Professor", "bibtex": "@inproceedings{\nzuo2023adversarial,\ntitle={Adversarial Attacks on Online Learning to Rank with Click Feedback},\nauthor={Jinhang Zuo and Zhiyao Zhang and Zhiyong Wang and Shuai Li and Mohammad Hajiesmaili and Adam Wierman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MCj7DLkYqS}\n}", "github": "", "project": "", "reviewers": "carj;i6fi;7TEH;E7da", "pdf_size": 3215268, "rating": "5;5;5;6", "confidence": "4;4;3;5", "soundness": "3;2;4;4", "novelty": "3;2;3;4", "presentation": "3;3;4;4", "wc_summary": "102;92;83;161", "wc_strengths": "80;95;57;143", "wc_weaknesses": "124;282;47;114", "wc_questions": "4;7;130;42", "wc_limitations": "1;8;1;1", "wc_review": "311;484;318;461", "wc_reply_reviewers": "10;19;264;25", "wc_reply_authors": "0;0;405;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 109.5, 30.483602149352365 ], "wc_strengths_avg": [ 93.75, 31.491070162825526 ], "wc_weaknesses_avg": [ 141.75, 86.21593530200784 ], "wc_questions_avg": [ 45.75, 50.88405938995041 ], "wc_limitations_avg": [ 2.75, 3.031088913245535 ], "wc_review_avg": [ 393.5, 79.45596264598397 ], "wc_reply_reviewers_avg": [ 79.5, 106.65481705014547 ], "wc_reply_authors_avg": [ 101.25, 175.37014426634883 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12389069930274721489&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "caltech.edu;seu.edu.cn;cse.cuhk.edu.hk;sjtu.edu.cn;cics.umass.edu;caltech.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;4;0", "aff_unique_norm": "California Institute of Technology;Southeast University;Chinese University of Hong Kong;Shanghai Jiao Tong University;University of Massachusetts Amherst", "aff_unique_dep": ";;Department of Computer Science and Engineering;John Hopcroft Center;College of Information and Computer Science", "aff_unique_url": "https://www.caltech.edu;https://www.seu.edu.cn/;https://www.cuhk.edu.hk;https://www.sjtu.edu.cn;https://www.umass.edu", "aff_unique_abbr": "Caltech;SEU;CUHK;SJTU;UMass Amherst", "aff_campus_unique_index": "0;2;3;4;0", "aff_campus_unique": "Pasadena;;Hong Kong SAR;Shanghai;Amherst", "aff_country_unique_index": "0;1;1;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Nash Regret Guarantees for Linear Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71921", "id": "MCkUS1P3Sh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/69bf9fd8d3b7b792b6c8c19149024d22-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MCkUS1P3Sh", "openreview": "https://openreview.net/forum?id=MCkUS1P3Sh", "poster": "/media/PosterPDFs/NeurIPS%202023/71921.png?t=1701971731.8136826", "slides": "https://nips.cc/virtual/2023/poster/71921", "video": "https://nips.cc/virtual/2023/poster/71921", "author_site": "Ayush Sawarni, Ayush Sawarni, Soumyabrata Pal, Siddharth Barman", "tldr": "", "abstract": "We obtain essentially tight upper bounds for a strengthened notion of regret in the stochastic linear bandits framework. The strengthening---referred to as Nash regret---is defined as the difference between the (a priori unknown) optimum and the geometric mean of expected rewards accumulated by the linear bandit algorithm. Since the geometric mean corresponds to the well-studied Nash social welfare (NSW) function, this formulation quantifies the performance of a bandit algorithm as the collective welfare it generates across rounds. NSW is known to satisfy fairness axioms and, hence, an upper bound on Nash regret provides a principled fairness guarantee. \n\nWe consider the stochastic linear bandits problem over a horizon of $\\mathsf{T}$ rounds and with a set of arms ${\\cal X}$ in ambient dimension $d$. Furthermore, we focus on settings in which the stochastic reward---associated with each arm in ${\\cal X}$---is a non-negative, sub-Poisson random variable. For this setting, we develop an algorithm that achieves a Nash regret of $O\\left( \\sqrt{\\frac{d}{\\mathsf{T}}} \\log(\\mathsf{T} |{\\cal X}|)\\right)$. In addition, addressing linear bandit instances in which the set of arms ${\\cal X}$ is not necessarily finite, we obtain a Nash regret upper bound of $O\\left( \\frac{d^\\frac{5}{4}}{\\sqrt{\\mathsf{T}}} \\log(\\mathsf{T})\\right)$. Since bounded random variables are sub-Poisson, these results hold for bounded, non-negative rewards. Our linear bandit algorithm is built upon the successive elimination method with novel technical insights, including tailored concentration bounds and the use of sampling via John ellipsoid in conjunction with the Kiefer\u2013Wolfowitz optimal design.", "keywords": "Sub-Poisson Distribution;Nash Social Welfare;Fairness Quantification;John Ellipsoid;Kiefer-Wolfowitz Optimal Design;Algorithmic Game Theory;Online Learning", "primary_area": "", "supplementary_material": "/attachment/286fa48c0a8a39a5c2042458abd46344848fc483.pdf", "author": "Ayush Sawarni;Soumyabrata Pal;Siddharth Barman", "authorids": "~Ayush_Sawarni1;~Soumyabrata_Pal1;~Siddharth_Barman1", "gender": "M;M;M", "homepage": "https://sawarniayush.github.io/;https://soumyabratap.github.io/;http://www.csa.iisc.ac.in/~barman/", "dblp": ";206/6371;63/478.html", "google_scholar": "U8TSPdAAAAAJ;J4UxoTEAAAAJ;https://scholar.google.co.in/citations?user=HcGQSKIAAAAJ", "orcid": ";;", "linkedin": "ayush-sawarni;;", "or_profile": "~Ayush_Sawarni1;~Soumyabrata_Pal1;~Siddharth_Barman1", "aff": "Indian Institute of Science, Bangalore;Google;Indian Institute of Science", "aff_domain": "iisc.ac.in;google.com;iisc.ac.in", "position": "MS student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nsawarni2023nash,\ntitle={Nash Regret Guarantees for Linear Bandits},\nauthor={Ayush Sawarni and Soumyabrata Pal and Siddharth Barman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MCkUS1P3Sh}\n}", "github": "", "project": "", "reviewers": "K5vq;zbVi;XR4M;4BdU;Cioh", "pdf_size": 1145742, "rating": "6;6;6;6;7", "confidence": "3;3;3;1;3", "soundness": "3;3;3;3;4", "novelty": "3;3;2;2;3", "presentation": "3;3;2;2;4", "wc_summary": "263;41;73;132;78", "wc_strengths": "32;69;41;61;96", "wc_weaknesses": "55;119;29;66;125", "wc_questions": "63;67;187;9;66", "wc_limitations": "119;1;22;10;1", "wc_review": "532;297;352;278;366", "wc_reply_reviewers": "247;55;19;19;18", "wc_reply_authors": "1071;19;0;0;0", "reply_reviewers": "5;1;1;1;1", "reply_authors": "7;2;1;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 2.6, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 117.4, 78.45151368839227 ], "wc_strengths_avg": [ 59.8, 22.4624130493587 ], "wc_weaknesses_avg": [ 78.8, 37.311660375812814 ], "wc_questions_avg": [ 78.4, 58.534092629851195 ], "wc_limitations_avg": [ 30.6, 44.86691431333338 ], "wc_review_avg": [ 365.0, 89.72402130979195 ], "wc_reply_reviewers_avg": [ 71.6, 88.82251966703039 ], "wc_reply_authors_avg": [ 218.0, 426.563477105108 ], "reply_reviewers_avg": [ 1.8, 1.6000000000000003 ], "reply_authors_avg": [ 2.4, 2.33238075793812 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.25, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=725682908640247068&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "iisc.ac.in;google.com;iisc.ac.in", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Indian Institute of Science;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.iisc.ac.in;https://www.google.com", "aff_unique_abbr": "IISc;Google", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Bangalore;Mountain View;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "India;United States" }, { "title": "Imbalanced Mixed Linear Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71920", "id": "MDxZYFR5Me", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aad615d33ba5071045656ba24d800c7b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MDxZYFR5Me", "openreview": "https://openreview.net/forum?id=MDxZYFR5Me", "poster": "/media/PosterPDFs/NeurIPS%202023/71920.png?t=1697985602.2404146", "slides": "https://nips.cc/virtual/2023/poster/71920", "video": "https://nips.cc/virtual/2023/poster/71920", "author_site": "Pini Zilber, Boaz Nadler", "tldr": "", "abstract": "We consider the problem of mixed linear regression (MLR), where each observed sample belongs to one of $K$ unknown linear models. \nIn practical applications, the mixture of the $K$ models may be imbalanced with a significantly different number of samples from each model. Unfortunately, most MLR methods do not perform well in such settings. Motivated by this practical challenge, in this work we propose Mix-IRLS, a novel, simple and fast algorithm for MLR with excellent performance on both balanced and imbalanced mixtures.\nIn contrast to popular approaches that recover the $K$ models simultaneously, Mix-IRLS does it sequentially using tools from robust regression. Empirically, beyond imbalanced mixtures, Mix-IRLS succeeds in a broad range of additional settings where other methods fail, including small sample sizes, presence of outliers, and an unknown number of models $K$. Furthermore, Mix-IRLS outperforms competing methods on several real-world datasets, in some cases by a large margin. We complement our empirical results by deriving a recovery guarantee for Mix-IRLS, which highlights its advantage on imbalanced mixtures.", "keywords": "Mixture regression model;Mixture of linear models;Iteratively reweighted least squares", "primary_area": "", "supplementary_material": "/attachment/cb9973e5436f7c9928245b24786b43f8a656ba3d.pdf", "author": "Pini Zilber;Boaz Nadler", "authorids": "~Pini_Zilber1;~Boaz_Nadler2", "gender": ";M", "homepage": ";https://www.weizmann.ac.il/math/Nadler/home", "dblp": ";53/4192", "google_scholar": ";N3Jj5_cAAAAJ", "orcid": ";0000-0002-9777-4576", "linkedin": ";", "or_profile": "~Pini_Zilber1;~Boaz_Nadler2", "aff": ";Weizmann Institute of Science", "aff_domain": ";weizmann.ac.il", "position": ";Full Professor", "bibtex": "@inproceedings{\nzilber2023imbalanced,\ntitle={Imbalanced Mixed Linear Regression},\nauthor={Pini Zilber and Boaz Nadler},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MDxZYFR5Me}\n}", "github": "", "project": "", "reviewers": "16iH;Yapb;LCfo;2aVS;1Ro7", "pdf_size": 1032808, "rating": "5;6;6;7;7", "confidence": "4;3;3;4;4", "soundness": "2;2;3;3;3", "novelty": "3;2;2;3;3", "presentation": "3;2;3;3;3", "wc_summary": "57;124;113;126;112", "wc_strengths": "23;80;46;98;163", "wc_weaknesses": "475;197;264;75;75", "wc_questions": "102;6;420;122;96", "wc_limitations": "13;12;22;13;16", "wc_review": "670;419;865;434;462", "wc_reply_reviewers": "45;33;82;31;32", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 106.4, 25.33456137374397 ], "wc_strengths_avg": [ 82.0, 48.16222586218374 ], "wc_weaknesses_avg": [ 217.2, 147.98702645840277 ], "wc_questions_avg": [ 149.2, 141.16430143630507 ], "wc_limitations_avg": [ 15.2, 3.655133376499413 ], "wc_review_avg": [ 570.0, 173.19699766450918 ], "wc_reply_reviewers_avg": [ 44.6, 19.37627415165258 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.21821789023599233, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8404912844347030655&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": ";weizmann.ac.il", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Weizmann Institute of Science", "aff_unique_dep": "", "aff_unique_url": "https://www.weizmann.org.il", "aff_unique_abbr": "Weizmann", "aff_country_unique_index": "0", "aff_country_unique": "Israel" }, { "title": "NeuroGraph: Benchmarks for Graph Machine Learning in Brain Connectomics", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73619", "id": "MEa0cQeURw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/14f656f21d09a4114666f60a45aab1aa-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=MEa0cQeURw", "openreview": "https://openreview.net/forum?id=MEa0cQeURw", "poster": "/media/PosterPDFs/NeurIPS%202023/73619.png?t=1701794983.8850112", "slides": "https://nips.cc/virtual/2023/poster/73619", "video": "https://nips.cc/virtual/2023/poster/73619", "author_site": "Anwar Said, Roza Bayrak, Tyler Derr, Mudassir Shabbir, Daniel Moyer, Catie Chang, Xenofon Koutsoukos", "tldr": "", "abstract": "Machine learning provides a valuable tool for analyzing high-dimensional functional neuroimaging data, and is proving effective in predicting various neurological conditions, psychiatric disorders, and cognitive patterns. In functional magnetic resonance imaging (MRI) research, interactions between brain regions are commonly modeled using graph-based representations. The potency of graph machine learning methods has been established across myriad domains, marking a transformative step in data interpretation and predictive modeling. Yet, despite their promise, the transposition of these techniques to the neuroimaging domain has been challenging due to the expansive number of potential preprocessing pipelines and the large parameter search space for graph-based dataset construction. In this paper, we introduce NeuroGraph, a collection of graph-based neuroimaging datasets, and demonstrated its utility for predicting multiple categories of behavioral and cognitive traits. We delve deeply into the dataset generation search space by crafting 35 datasets that encompass static and dynamic brain connectivity, running in excess of 15 baseline methods for benchmarking. Additionally, we provide generic frameworks for learning on both static and dynamic graphs. Our extensive experiments lead to several key observations. Notably, using correlation vectors as node features, incorporating larger number of regions of interest, and employing sparser graphs lead to improved performance. To foster further advancements in graph-based data driven neuroimaging analysis, we offer a comprehensive open-source Python package that includes the benchmark datasets, baseline implementations, model training, and standard evaluation.", "keywords": "Graph Neural Networks;Benchmarking;Datasets;Neuroimaging;fMRI", "primary_area": "", "supplementary_material": "/attachment/107fcb8c246c110147c555263c137206260af2a3.pdf", "author": "Anwar Said;Roza G Bayrak;Tyler Derr;Mudassir Shabbir;Daniel Moyer;Catie Chang;Xenofon D. Koutsoukos", "authorids": "~Anwar_Said1;~Roza_G_Bayrak2;~Tyler_Derr1;~Mudassir_Shabbir1;~Daniel_Moyer3;~Catie_Chang1;~Xenofon_D._Koutsoukos1", "gender": "M;F;;;F;M;M", "homepage": "https://anwar-said.github.io/Representation-Learning-Blogs/index.html;;http://www.tylerderr.com;;https://www.cchanglab.net/;http://engineering.vanderbilt.edu/bio/xenofon-koutsoukos;https://dcmoyer.github.io", "dblp": "212/4751;;207/7927.html;78/7323;43/8258.html;11/5453;187/6201", "google_scholar": "eSEelAgAAAAJ;QHN1CZsAAAAJ;et6IhFcAAAAJ;https://scholar.google.com.pk/citations?user=bRKvwRYAAAAJ;4ndpsi4AAAAJ;https://scholar.google.com.tw/citations?user=NHZdlVkAAAAJ;sKmoxSMAAAAJ", "orcid": "0000-0002-6715-0068;;;;0000-0003-1541-9579;;", "linkedin": "anwar-said-9bb596b4;;tylersnetwork/;;;;", "or_profile": "~Anwar_Said1;~Roza_G_Bayrak2;~Tyler_Derr1;~Mudassir_Shabbir1;~Catie_Chang1;~Xenofon_D._Koutsoukos1;~Daniel_Moyer2", "aff": "Vanderbilt University;Vanderbilt University;Vanderbilt University;ITU of Punjab Lahore, Pakistan;Vanderbilt University;Vanderbilt University;Vanderbilt University", "aff_domain": "vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;itu.edu.pk;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu", "position": "Postdoc;PhD student;Assistant Professor;Associate Professor;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nsaid2023neurograph,\ntitle={NeuroGraph: Benchmarks for Graph Machine Learning in Brain Connectomics},\nauthor={Anwar Said and Roza G Bayrak and Tyler Derr and Mudassir Shabbir and Daniel Moyer and Catie Chang and Xenofon D. Koutsoukos},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=MEa0cQeURw}\n}", "github": "", "project": "", "reviewers": "4Rsw;g2aw;awZz;Zh62;vR2R", "pdf_size": 2309694, "rating": "6;6;7;7;7", "confidence": "3;3;3;3;3", "wc_summary_and_contributions": "100;72;106;93;43", "wc_strengths": "21;114;83;67;46", "wc_improvement": "14;503;21;101;90", "wc_limitations": "148;34;97;4;5", "wc_correctness": "14;35;99;7;29", "wc_clarity": "41;45;46;1;19", "wc_relation_to_prior_work": "12;98;26;1;6", "wc_documentation": "11;60;7;4;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "362;962;486;279;240", "wc_reply_reviewers": "22;280;0;13;0", "wc_reply_authors": "704;1306;1084;545;566", "reply_reviewers": "1;2;0;1;0", "reply_authors": "2;3;2;2;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 82.8, 22.973027662891976 ], "wc_strengths_avg": [ 66.2, 31.68217164273939 ], "wc_improvement_avg": [ 145.8, 182.02131743287654 ], "wc_limitations_avg": [ 57.6, 56.42907052220514 ], "wc_correctness_avg": [ 36.8, 32.6827171453048 ], "wc_clarity_avg": [ 30.4, 17.681628884240276 ], "wc_relation_to_prior_work_avg": [ 28.6, 35.696498427716975 ], "wc_documentation_avg": [ 16.6, 21.950854197502203 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 465.8, 262.0216784924484 ], "wc_reply_reviewers_avg": [ 63.0, 108.81911596773794 ], "wc_reply_authors_avg": [ 841.0, 302.41825341734915 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17816634547381751374&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 15, "email": "vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;itu.edu.pk;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0;0", "aff_unique_norm": "Vanderbilt University;ITU of Punjab", "aff_unique_dep": ";", "aff_unique_url": "https://www.vanderbilt.edu;", "aff_unique_abbr": "Vanderbilt;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lahore", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;Pakistan" }, { "title": "Random Cuts are Optimal for Explainable k-Medians", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71919", "id": "MFWgLCWgUB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d3408794e41dd23e34634344d662f5e9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MFWgLCWgUB", "openreview": "https://openreview.net/forum?id=MFWgLCWgUB", "poster": "/media/PosterPDFs/NeurIPS%202023/71919.png?t=1699826855.0179963", "slides": "https://nips.cc/virtual/2023/poster/71919", "video": "https://nips.cc/virtual/2023/poster/71919", "author_site": "Konstantin Makarychev, Liren Shan", "tldr": "", "abstract": "We show that the RandomCoordinateCut algorithm gives the optimal competitive ratio for explainable $k$-medians in $\\ell_1$. The problem of explainable $k$-medians was introduced by Dasgupta, Frost, Moshkovitz, and Rashtchian in 2020. Several groups of authors independently proposed a simple polynomial-time randomized algorithm for the problem and showed that this algorithm is $O(\\log k \\log\\log k)$ competitive. We provide a tight analysis of the algorithm and prove that its competitive ratio is upper bounded by $2\\ln k+2$. This bound matches the $\\Omega(\\log k)$ lower bound by Dasgupta et al (2020).", "keywords": "Clustering;k-medians;Decision Tree;Explainability", "primary_area": "", "supplementary_material": "/attachment/d8398adbdd7a2d983c92c37af1cd805c47bed1a1.pdf", "author": "Konstantin Makarychev;Liren Shan", "authorids": "~Konstantin_Makarychev1;~Liren_Shan1", "gender": "M;M", "homepage": "http://konstantin.makarychev.net/;https://lirenshan.github.io/", "dblp": "37/1011;191/8146", "google_scholar": "https://scholar.google.com.tw/citations?user=-E3hYj8AAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-9587-3677;", "linkedin": "konstantin-makarychev-143b3a132/;", "or_profile": "~Konstantin_Makarychev1;~Liren_Shan1", "aff": "Northwestern University;Northwestern University, Northwestern University", "aff_domain": "northwestern.edu;u.northwestern.edu", "position": "Full Professor;PhD student", "bibtex": "@inproceedings{\nmakarychev2023random,\ntitle={Random Cuts are Optimal for Explainable k-Medians},\nauthor={Konstantin Makarychev and Liren Shan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MFWgLCWgUB}\n}", "github": "", "project": "", "reviewers": "vAmy;uidA;9ZE5;jrDD", "pdf_size": 541171, "rating": "8;8;8;8", "confidence": "4;4;4;3", "soundness": "4;4;4;4", "novelty": "4;4;4;4", "presentation": "3;4;4;4", "wc_summary": "272;94;86;395", "wc_strengths": "48;70;27;83", "wc_weaknesses": "192;5;108;27", "wc_questions": "174;39;32;14", "wc_limitations": "40;1;9;2", "wc_review": "726;209;262;521", "wc_reply_reviewers": "20;0;0;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;0;0;1", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 4.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 211.75, 129.31429735338625 ], "wc_strengths_avg": [ 57.0, 21.365860619221497 ], "wc_weaknesses_avg": [ 83.0, 73.69871097922947 ], "wc_questions_avg": [ 64.75, 63.73136982679723 ], "wc_limitations_avg": [ 13.0, 15.890248582070704 ], "wc_review_avg": [ 429.5, 207.94290081654628 ], "wc_reply_reviewers_avg": [ 7.75, 8.37779804005802 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 0.5, 0.5 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8401963285247007832&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 8, "email": "northwestern.edu;u.northwestern.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Northwestern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northwestern.edu", "aff_unique_abbr": "NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Should Under-parameterized Student Networks Copy or Average Teacher Weights?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71918", "id": "MG0mYskXN2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f5ccb3ab757131a93586ef61ec701533-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MG0mYskXN2", "openreview": "https://openreview.net/forum?id=MG0mYskXN2", "poster": "/media/PosterPDFs/NeurIPS%202023/71918.png?t=1702573840.5225787", "slides": "https://nips.cc/virtual/2023/poster/71918", "video": "https://nips.cc/virtual/2023/poster/71918", "author_site": "Berfin Simsek, Amire Bendjeddou, Wulfram Gerstner, Johanni Brea", "tldr": "", "abstract": "Any continuous function $f^*$ can be approximated arbitrarily well by a neural network with sufficiently many neurons $k$. We consider the case when $f^*$ itself is a neural network with one hidden layer and $k$ neurons. Approximating $f^*$ with a neural network with $n< k$ neurons can thus be seen as fitting an under-parameterized \"student\" network with $n$ neurons to a \"teacher\" network with $k$ neurons. As the student has fewer neurons than the teacher, it is unclear, whether each of the $n$ student neurons should copy one of the teacher neurons or rather average a group of teacher neurons. For shallow neural networks with erf activation function and for the standard Gaussian input distribution, we prove that \"copy-average\" configurations are critical points if the teacher's incoming vectors are orthonormal and its outgoing weights are unitary. Moreover, the optimum among such configurations is reached when $n-1$ student neurons each copy one teacher neuron and the $n$-th student neuron averages the remaining $k-n+1$ teacher neurons. For the student network with $n=1$ neuron, we provide additionally a closed-form solution of the non-trivial critical point(s) for commonly used activation functions through solving an equivalent constrained optimization problem. Empirically, we find for the erf activation function that gradient flow converges either to the optimal copy-average critical point or to another point where each student neuron approximately copies a different teacher neuron. Finally, we find similar results for the ReLU activation function, suggesting that the optimal solution of underparameterized networks has a universal structure.", "keywords": "shallow neural networks;non-convex optimization;approximation error;loss landscape", "primary_area": "", "supplementary_material": "/attachment/be42cf653c91b7c98badb338191747eaaa3b50e2.pdf", "author": "Berfin Simsek;Amire Bendjeddou;Wulfram Gerstner;Johanni Brea", "authorids": "~Berfin_Simsek1;amire.bendjeddou@epfl.ch;~Wulfram_Gerstner1;~Johanni_Brea1", "gender": "F;;;", "homepage": "https://www.bsimsek.com/;;https://lcnwww.epfl.ch/gerstner/;", "dblp": "244/2455;;g/WGerstner;", "google_scholar": "Ysi38KIAAAAJ;;https://scholar.google.ch/citations?user=vSd2RnEAAAAJ;", "orcid": ";;0000-0002-4344-2189;", "linkedin": ";;;", "or_profile": "~Berfin_Simsek1;amire.bendjeddou@epfl.ch;~Wulfram_Gerstner1;~Johanni_Brea1", "aff": "EPFL;;EPFL - EPF Lausanne;", "aff_domain": "epfl.ch;;epfl.ch;", "position": "PhD student;;Full Professor;", "bibtex": "@inproceedings{\nsimsek2023should,\ntitle={Should Under-parameterized Student Networks Copy or Average Teacher Weights?},\nauthor={Berfin Simsek and Amire Bendjeddou and Wulfram Gerstner and Johanni Brea},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MG0mYskXN2}\n}", "github": "", "project": "", "reviewers": "Zuqd;ThaK;6p1W;Cg9m", "pdf_size": 2772624, "rating": "4;6;6;7", "confidence": "2;4;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;2", "wc_summary": "61;197;192;139", "wc_strengths": "27;40;110;81", "wc_weaknesses": "38;177;471;51", "wc_questions": "97;71;33;137", "wc_limitations": "5;1;27;1", "wc_review": "228;486;833;409", "wc_reply_reviewers": "54;119;0;16", "wc_reply_authors": "102;257;28;28", "reply_reviewers": "1;1;0;1", "reply_authors": "3;4;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 147.25, 54.73744148204225 ], "wc_strengths_avg": [ 64.5, 32.973474187595095 ], "wc_weaknesses_avg": [ 184.25, 174.22883659142076 ], "wc_questions_avg": [ 84.5, 37.90448522272793 ], "wc_limitations_avg": [ 8.5, 10.805091392487155 ], "wc_review_avg": [ 489.0, 219.58255850590683 ], "wc_reply_reviewers_avg": [ 47.25, 45.833257575694965 ], "wc_reply_authors_avg": [ 103.75, 93.49431800917101 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6488856845230502, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6811607799467533039&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 5, "email": "epfl.ch;;epfl.ch;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Learning Efficient Surrogate Dynamic Models with Graph Spline Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71917", "id": "MGPST5I9DO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a4c3a66ed818455b8bbe591b6a5d0f56-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MGPST5I9DO", "openreview": "https://openreview.net/forum?id=MGPST5I9DO", "poster": "/media/PosterPDFs/NeurIPS%202023/71917.png?t=1697527207.088251", "slides": "https://nips.cc/virtual/2023/poster/71917", "video": "https://nips.cc/virtual/2023/poster/71917", "author_site": "Chuanbo Hua, Federico Berto, Michael Poli, Stefano Massaroli, Jinkyoo Park", "tldr": "", "abstract": "While complex simulations of physical systems have been widely used in engineering and scientific computing, lowering their often prohibitive computational requirements has only recently been tackled by deep learning approaches. In this paper, we present GraphSplineNets, a novel deep-learning method to speed up the forecasting of physical systems by reducing the grid size and number of iteration steps of deep surrogate models. Our method uses two differentiable orthogonal spline collocation methods to efficiently predict response at any location in time and space. Additionally, we introduce an adaptive collocation strategy in space to prioritize sampling from the most important regions. GraphSplineNets improve the accuracy-speedup tradeoff in forecasting various dynamical systems with increasing complexity, including the heat equation, damped wave propagation, Navier-Stokes equations, and real-world ocean currents in both regular and irregular domains.", "keywords": "Graph;Spline Collocation Method;Graph Neural Networks;Simulation;Partial Differential Equations;PDEs;Physics;Scientific Computing;Surrogate Models;Weather Forecasting", "primary_area": "", "supplementary_material": "", "author": "Chuanbo Hua;Federico Berto;Michael Poli;Stefano Massaroli;Jinkyoo Park", "authorids": "~Chuanbo_Hua1;~Federico_Berto1;~Michael_Poli1;~Stefano_Massaroli1;~Jinkyoo_Park1", "gender": "M;M;M;;M", "homepage": "https://github.com/cbhua;https://fedebotu.github.io/;;;http://silab.kaist.ac.kr/", "dblp": "326/5321;317/1711;;;156/7535", "google_scholar": "fjKA5gYAAAAJ;https://scholar.google.com/citations?hl=en;RgIBwboAAAAJ;IwCfl4UAAAAJ;sH2a0nkAAAAJ", "orcid": "0000-0001-7700-792X;0000-0002-7438-8365;;;0000-0003-2620-1479", "linkedin": ";federicoberto/;;;", "or_profile": "~Chuanbo_Hua1;~Federico_Berto1;~Michael_Poli1;~Stefano_Massaroli1;~Jinkyoo_Park1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Stanford University;MILA;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;stanford.edu;mila.quebec;kaist.ac.kr", "position": "PhD student;PhD student;PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nhua2023learning,\ntitle={Learning Efficient Surrogate Dynamic Models with Graph Spline Networks},\nauthor={Chuanbo Hua and Federico Berto and Michael Poli and Stefano Massaroli and Jinkyoo Park},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MGPST5I9DO}\n}", "github": "", "project": "", "reviewers": "B9pj;Rs2G;jrMD;3ehZ", "pdf_size": 6740626, "rating": "6;7;7;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "1;3;3;3", "wc_summary": "55;109;59;61", "wc_strengths": "84;49;176;86", "wc_weaknesses": "230;123;184;4", "wc_questions": "90;27;216;64", "wc_limitations": "9;9;37;89", "wc_review": "468;317;672;304", "wc_reply_reviewers": "26;305;288;27", "wc_reply_authors": "44;849;853;44", "reply_reviewers": "1;2;1;1", "reply_authors": "3;3;2;3", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 71.0, 22.045407685048602 ], "wc_strengths_avg": [ 98.75, 46.96474741761101 ], "wc_weaknesses_avg": [ 135.25, 84.75073745991831 ], "wc_questions_avg": [ 99.25, 71.02596356262968 ], "wc_limitations_avg": [ 36.0, 32.66496594212215 ], "wc_review_avg": [ 440.25, 148.5199902370048 ], "wc_reply_reviewers_avg": [ 161.5, 135.13419256428034 ], "wc_reply_authors_avg": [ 447.5, 403.50247830713505 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6011203994488473480&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "kaist.ac.kr;kaist.ac.kr;stanford.edu;mila.quebec;kaist.ac.kr", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Stanford University;Mila", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.stanford.edu;https://mila.quebec", "aff_unique_abbr": "KAIST;Stanford;MILA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "South Korea;United States;Canada" }, { "title": "Debiasing Conditional Stochastic Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71916", "id": "MH7E7AME1r", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f944a7bcfe9e76b34490ebe4e29196d9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MH7E7AME1r", "openreview": "https://openreview.net/forum?id=MH7E7AME1r", "poster": "/media/PosterPDFs/NeurIPS%202023/71916.png?t=1702359549.942462", "slides": "https://nips.cc/virtual/2023/poster/71916", "video": "https://nips.cc/virtual/2023/poster/71916", "author_site": "Lie He, Shiva Kasiviswanathan", "tldr": "", "abstract": "In this paper, we study the conditional stochastic optimization (CSO) problem which covers a variety of applications including portfolio selection, reinforcement learning, robust learning, causal inference, etc. The sample-averaged gradient of the CSO objective is biased due to its nested structure, and therefore requires a high sample complexity for convergence. We introduce a general stochastic extrapolation technique that effectively reduces the bias. We show that for nonconvex smooth objectives, combining this extrapolation with variance reduction techniques can achieve a significantly better sample complexity than the existing bounds. Additionally, we develop new algorithms for the finite-sum variant of the CSO problem that also significantly improve upon existing results. Finally, we believe that our debiasing technique has the potential to be a useful tool for addressing similar challenges in other stochastic optimization problems.", "keywords": "Optimization;Bilevel Optimization;Stochastic Optimization", "primary_area": "", "supplementary_material": "", "author": "Lie He;Shiva Kasiviswanathan", "authorids": "~Lie_He1;~Shiva_Kasiviswanathan1", "gender": "M;M", "homepage": "https://liehe.github.io/;http://www.shivakasiviswanathan.com", "dblp": "225/5245;67/1300", "google_scholar": "rIAYxaMAAAAJ;XnHdkZUAAAAJ", "orcid": ";", "linkedin": ";kasivisw/", "or_profile": "~Lie_He1;~Shiva_Kasiviswanathan1", "aff": "EPFL - EPF Lausanne;Amazon", "aff_domain": "epfl.ch;amazon.com", "position": "PhD student;Research Scientist", "bibtex": "@inproceedings{\nhe2023debiasing,\ntitle={Debiasing Conditional Stochastic Optimization},\nauthor={Lie He and Shiva Kasiviswanathan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MH7E7AME1r}\n}", "github": "", "project": "", "reviewers": "B49C;STJD;me2Y;eDyu;VftD", "pdf_size": 1005742, "rating": "6;6;6;6;6", "confidence": "4;3;2;3;3", "soundness": "3;4;3;3;2", "novelty": "2;3;3;3;2", "presentation": "2;3;3;3;3", "wc_summary": "67;31;196;108;94", "wc_strengths": "41;93;62;46;107", "wc_weaknesses": "230;147;229;206;179", "wc_questions": "94;1;65;32;8", "wc_limitations": "1;1;41;35;8", "wc_review": "433;273;593;427;396", "wc_reply_reviewers": "21;19;23;51;23", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 99.2, 55.06868438595569 ], "wc_strengths_avg": [ 69.8, 25.995384205662358 ], "wc_weaknesses_avg": [ 198.2, 31.656910777901242 ], "wc_questions_avg": [ 40.0, 35.07135583350036 ], "wc_limitations_avg": [ 17.2, 17.278888853164144 ], "wc_review_avg": [ 424.4, 102.21076264268847 ], "wc_reply_reviewers_avg": [ 27.4, 11.892854997854805 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13889230143612625318&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "epfl.ch;amazon.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "EPFL;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.epfl.ch;https://www.amazon.com", "aff_unique_abbr": "EPFL;Amazon", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1", "aff_country_unique": "Switzerland;United States" }, { "title": "Sequential Preference Ranking for Efficient Reinforcement Learning from Human Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71915", "id": "MIYBTjCVjR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/99766cda865be123d55a1d9666c7b9fc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MIYBTjCVjR", "openreview": "https://openreview.net/forum?id=MIYBTjCVjR", "poster": "/media/PosterPDFs/NeurIPS%202023/71915.png?t=1702164350.694726", "slides": "https://nips.cc/virtual/2023/poster/71915", "video": "https://nips.cc/virtual/2023/poster/71915", "author_site": "Minyoung Hwang, Gunmin Lee, Hogun Kee, Chan Woo Kim, Kyungjae Lee, Songhwai Oh", "tldr": "", "abstract": "Reinforcement learning from human feedback (RLHF) alleviates the problem of designing a task-specific reward function in reinforcement learning by learning it from human preference. However, existing RLHF models are considered inefficient as they produce only a single preference data from each human feedback. To tackle this problem, we propose a novel RLHF framework called SeqRank, that uses sequential preference ranking to enhance the feedback efficiency. Our method samples trajectories in a sequential manner by iteratively selecting a defender from the set of previously chosen trajectories $\\mathcal{K}$ and a challenger from the set of unchosen trajectories $\\mathcal{U}\\setminus\\mathcal{K}$, where $\\mathcal{U}$ is the replay buffer. We propose two trajectory comparison methods with different defender sampling strategies: (1) sequential pairwise comparison that selects the most recent trajectory and (2) root pairwise comparison that selects the most preferred trajectory from $\\mathcal{K}$. We construct a data structure and rank trajectories by preference to augment additional queries. The proposed method results in at least 39.2% higher average feedback efficiency than the baseline and also achieves a balance between feedback efficiency and data dependency. We examine the convergence of the empirical risk and the generalization bound of the reward model with Rademacher complexity. While both trajectory comparison methods outperform conventional pairwise comparison, root pairwise comparison improves the average reward in locomotion tasks and the average success rate in manipulation tasks by 29.0% and 25.0%, respectively. The source code and the videos are provided in the supplementary material.", "keywords": "Reinforcement Learning; Reinforcement Learning from Human Feedback; Preference-based Reinforcement Learning; Human-Robot Interaction", "primary_area": "", "supplementary_material": "/attachment/8bcd064973417dd39fe5638d7734f6ada66922f4.zip", "author": "Minyoung Hwang;Gunmin Lee;Hogun Kee;Chan Woo Kim;Kyungjae Lee;Songhwai Oh", "authorids": "~Minyoung_Hwang1;~Gunmin_Lee1;~Hogun_Kee1;~Chan_Woo_Kim2;~Kyungjae_Lee1;~Songhwai_Oh1", "gender": "F;M;M;M;M;", "homepage": "https://minyoung1005.github.io/;;https://github.com/hogunkee/;https://sites.google.com/view/railab/members;https://sites.google.com/view/kyungjaelee;https://rllab.snu.ac.kr/", "dblp": "299/4318;257/4087;274/9254;;13/7265-1;17/3173", "google_scholar": "-KZH9WUAAAAJ;https://scholar.google.com/citations?hl=ko;https://scholar.google.com/citations?hl=ko;;https://scholar.google.co.kr/citations?user=OZZJagIAAAAJ;VEzNY_oAAAAJ", "orcid": ";;;;0000-0003-0147-2715;0000-0002-9781-2018", "linkedin": "minyoung-hwang-81480819b/;;;;;", "or_profile": "~Minyoung_Hwang1;~Gunmin_Lee1;~Hogun_Kee1;~Chan_Woo_Kim2;~Kyungjae_Lee1;~Songhwai_Oh1", "aff": ";Seoul National University;Seoul National University;Chung-Ang University;ChungAng University;Seoul National University", "aff_domain": ";snu.ac.kr;snu.ac.kr;cau.ac.kr;cau.ac.kr;snu.ac.kr", "position": ";PhD student;PhD student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhwang2023sequential,\ntitle={Sequential Preference Ranking for Efficient Reinforcement Learning from Human Feedback},\nauthor={Minyoung Hwang and Gunmin Lee and Hogun Kee and Chan Woo Kim and Kyungjae Lee and Songhwai Oh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MIYBTjCVjR}\n}", "github": "", "project": "", "reviewers": "f3sA;tocg;Mtqb;sxrB", "pdf_size": 2155529, "rating": "6;6;7;7", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;3;3;2", "wc_summary": "30;77;55;165", "wc_strengths": "22;42;79;53", "wc_weaknesses": "23;221;107;298", "wc_questions": "172;72;141;267", "wc_limitations": "1;9;2;40", "wc_review": "248;421;384;823", "wc_reply_reviewers": "16;93;25;195", "wc_reply_authors": "30;824;40;340", "reply_reviewers": "1;1;1;2", "reply_authors": "2;3;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.75, 50.85948780709456 ], "wc_strengths_avg": [ 49.0, 20.579115627256677 ], "wc_weaknesses_avg": [ 162.25, 105.26484455885544 ], "wc_questions_avg": [ 163.0, 70.11062686925571 ], "wc_limitations_avg": [ 13.0, 15.890248582070704 ], "wc_review_avg": [ 469.0, 214.29302368486006 ], "wc_reply_reviewers_avg": [ 82.25, 71.5799378317696 ], "wc_reply_authors_avg": [ 308.5, 322.64027956843825 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13560771950272233357&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";snu.ac.kr;snu.ac.kr;cau.ac.kr;cau.ac.kr;snu.ac.kr", "author_num": 6, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Seoul National University;Chung-Ang University;Chungang University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.snu.ac.kr;http://www.cau.ac.kr;http://www.cau.ac.kr", "aff_unique_abbr": "SNU;CAU;CAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "MJJQRUFzeX", "title": "Unified Convergence Theory of Stochastic and Variance-Reduced Cubic Newton Methods", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study stochastic Cubic Newton methods for solving general possibly non-convex minimization problems. We propose a new framework, which we call the {\\em helper framework}, that provides a unified view of the stochastic and variance-reduced second-order algorithms equipped with global complexity guarantees. It can also be applied to learning with auxiliary information. Our helper framework offers the algorithm designer high flexibility for constructing and analysis of the stochastic Cubic Newton methods, allowing arbitrary size batches, and the use of noisy and possibly biased estimates of the gradients and Hessians, incorporating both the variance reduction and the lazy Hessian updates. We recover the best-known complexities for the stochastic and variance-reduced Cubic Newton, under weak assumptions on the noise and avoiding artificial logarithms. A direct consequence of our theory is the new lazy stochastic second-order method, which significantly improves the arithmetic complexity for large dimension problems. We also establish complexity bounds for the classes of gradient-dominated objectives, that include convex and strongly convex problems. For Auxiliary Learning, we show that using a helper (auxiliary function) can outperform training alone if a given similarity measure is small.", "keywords": "Cubic Newton;auxiliary information;variance reduction;gradient-dominated functions;non-convex optimization", "primary_area": "", "supplementary_material": "/attachment/7e6dde3f9d39bdbc76d61e7734f30c09bd23676a.pdf", "author": "El Mahdi Chayti;Martin Jaggi;Nikita Doikov", "authorids": "~El_Mahdi_Chayti2;~Martin_Jaggi1;~Nikita_Doikov1", "gender": "M;;M", "homepage": "https://mlo.epfl.ch;https://doikov.com;https://people.epfl.ch/el-mahdi.chayti", "dblp": "17/4402;222/9897;", "google_scholar": "https://scholar.google.ch/citations?user=r1TJBr8AAAAJ;YNBhhjUAAAAJ;ZurKqVkAAAAJ", "orcid": "0000-0003-1579-5558;;", "linkedin": ";;el-mahdi-chayti-66434a326//", "or_profile": "~Martin_Jaggi1;~Nikita_Doikov1;~EL_MAHDI_CHAYTI1", "aff": "EPFL;EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne", "aff_domain": "epfl.ch;epfl.ch;epfl.ch", "position": "Associate Professor;Postdoc;PhD student", "bibtex": "@misc{\nchayti2023unified,\ntitle={Unified Convergence Theory of Stochastic and Variance-Reduced Cubic Newton Methods},\nauthor={El Mahdi Chayti and Martin Jaggi and Nikita Doikov},\nyear={2023},\nurl={https://openreview.net/forum?id=MJJQRUFzeX}\n}", "github": "", "project": "", "reviewers": "zomT;bq1t;XQuJ;fBtY", "site": "https://openreview.net/forum?id=MJJQRUFzeX", "pdf_size": 298175, "rating": "3;5;6;8", "confidence": "4;4;4;3", "soundness": "3;3;4;3", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "79;141;67;46", "wc_strengths": "9;52;83;46", "wc_weaknesses": "78;94;92;6", "wc_questions": "27;366;70;130", "wc_limitations": "1;1;13;1", "wc_review": "194;654;325;229", "wc_reply_reviewers": "0;83;0;0", "wc_reply_authors": "0;357;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 83.25, 35.3721288587498 ], "wc_strengths_avg": [ 47.5, 26.291633650269812 ], "wc_weaknesses_avg": [ 67.5, 36.038174204584784 ], "wc_questions_avg": [ 148.25, 130.93199570769553 ], "wc_limitations_avg": [ 4.0, 5.196152422706632 ], "wc_review_avg": [ 350.5, 181.67071860924645 ], "wc_reply_reviewers_avg": [ 20.75, 35.94005425705421 ], "wc_reply_authors_avg": [ 89.25, 154.5855345755223 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8006407690254357, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11848376710466739843&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1", "aff_unique_norm": "EPFL;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Bridging the Domain Gap: Self-Supervised 3D Scene Understanding with Foundation Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71914", "id": "MJbDy2155j", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fa5b423e24b442180bcd4e13ae75a27f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MJbDy2155j", "openreview": "https://openreview.net/forum?id=MJbDy2155j", "poster": "/media/PosterPDFs/NeurIPS%202023/71914.png?t=1699127119.180831", "slides": "https://nips.cc/virtual/2023/poster/71914", "video": "https://nips.cc/virtual/2023/poster/71914", "author_site": "Zhimin Chen, Longlong Jing, Yingwei Li, Bing Li", "tldr": "", "abstract": "Foundation models have achieved remarkable results in 2D and language tasks like image segmentation, object detection, and visual-language understanding. However, their potential to enrich 3D scene representation learning is largely untapped due to the existence of the domain gap. In this work, we propose an innovative methodology called Bridge3D to address this gap by pre-training 3D models using features, semantic masks, and captions sourced from foundation models. Specifically, our method employs semantic masks from foundation models to guide the masking and reconstruction process for the masked autoencoder, enabling more focused attention on foreground representations. Moreover, we bridge the 3D-text gap at the scene level using image captioning foundation models, thereby facilitating scene-level knowledge distillation. We further extend this bridging effort by introducing an innovative object-level knowledge distillation method that harnesses highly accurate object-level masks and semantic text data from foundation models. Our methodology significantly surpasses the performance of existing state-of-the-art methods in 3D object detection and semantic segmentation tasks. For instance, on the ScanNet dataset, Bridge3D improves the baseline by a notable margin of 6.3%. Code will be available at: https://github.com/Zhimin-C/Bridge3D", "keywords": "3D self-supervised learning;Multi-modal Representation Learning;Masked autoencoders;Knowledge distillation", "primary_area": "", "supplementary_material": "/attachment/daf324d2d9b8b7f35ccefd140694fa5348dbbe8c.pdf", "author": "Zhimin Chen;Longlong Jing;Yingwei Li;Bing Li", "authorids": "~Zhimin_Chen1;~Longlong_Jing1;~Yingwei_Li4;~Bing_Li4", "gender": "M;M;M;M", "homepage": "https://zhiminc.website;https://longlong-jing.github.io/;http://yingwei.li/;http://cecas.clemson.edu/bingli", "dblp": ";214/9050;;13/2692-8", "google_scholar": "OIYNwLkAAAAJ;lhdhi5wAAAAJ;phWmJeIAAAAJ;yysOczkAAAAJ", "orcid": ";;;0000-0003-4987-6129", "linkedin": ";;;", "or_profile": "~Zhimin_Chen1;~Longlong_Jing1;~Yingwei_Li4;~Bing_Li4", "aff": "Clemson University;Waymo LLC;Waymo LLC;Clemson University", "aff_domain": "clemson.edu;waymo.com;waymo.com;clemson.edu", "position": "PhD student;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nchen2023bridging,\ntitle={Bridging the Domain Gap: Self-Supervised 3D Scene Understanding with Foundation Models},\nauthor={Zhimin Chen and Longlong Jing and Yingwei Li and Bing Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MJbDy2155j}\n}", "github": "", "project": "", "reviewers": "MtnQ;V9nH;hzsQ;E1Dc;XLcq", "pdf_size": 1299185, "rating": "5;5;6;6;6", "confidence": "4;4;4;4;4", "soundness": "2;3;2;2;3", "novelty": "1;3;2;3;3", "presentation": "2;2;3;2;3", "wc_summary": "82;30;163;49;107", "wc_strengths": "29;36;42;35;51", "wc_weaknesses": "448;18;125;167;59", "wc_questions": "37;157;2;35;8", "wc_limitations": "61;5;2;2;20", "wc_review": "657;246;334;288;245", "wc_reply_reviewers": "129;20;19;17;11", "wc_reply_authors": "446;181;212;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;3;1;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 86.2, 46.67076172508865 ], "wc_strengths_avg": [ 38.6, 7.445804187594514 ], "wc_weaknesses_avg": [ 163.4, 151.34411121678968 ], "wc_questions_avg": [ 47.8, 56.36807607147861 ], "wc_limitations_avg": [ 18.0, 22.512218904408336 ], "wc_review_avg": [ 354.0, 154.99032227852163 ], "wc_reply_reviewers_avg": [ 39.2, 45.00844365227484 ], "wc_reply_authors_avg": [ 167.8, 164.8252407855059 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12072256408208612293&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "clemson.edu;waymo.com;waymo.com;clemson.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Clemson University;Waymo", "aff_unique_dep": ";", "aff_unique_url": "https://www.clemson.edu;https://www.waymo.com", "aff_unique_abbr": "Clemson;Waymo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Diff-Instruct: A Universal Approach for Transferring Knowledge From Pre-trained Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71913", "id": "MLIs5iRq4w", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f115f619b62833aadc5acb058975b0e6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MLIs5iRq4w", "openreview": "https://openreview.net/forum?id=MLIs5iRq4w", "poster": "/media/PosterPDFs/NeurIPS%202023/71913.png?t=1702429939.2113304", "slides": "https://nips.cc/virtual/2023/poster/71913", "video": "https://nips.cc/virtual/2023/poster/71913", "author_site": "Weijian Luo, Tianyang Hu, Shifeng Zhang, Jiacheng Sun, Zhenguo Li, Zhihua Zhang", "tldr": "", "abstract": "Due to the ease of training, ability to scale, and high sample quality, diffusion models (DMs) have become the preferred option for generative modeling, with numerous pre-trained models available for a wide variety of datasets. Containing intricate information about data distributions, pre-trained DMs are valuable assets for downstream applications. In this work, we consider learning from pre-trained DMs and transferring their knowledge to other generative models in a data-free fashion. Specifically, we propose a general framework called Diff-Instruct to instruct the training of arbitrary generative models as long as the generated samples are differentiable with respect to the model parameters. Our proposed Diff-Instruct is built on a rigorous mathematical foundation where the instruction process directly corresponds to minimizing a novel divergence we call Integral Kullback-Leibler (IKL) divergence. IKL is tailored for DMs by calculating the integral of the KL divergence along a diffusion process, which we show to be more robust in comparing distributions with misaligned supports. We also reveal non-trivial connections of our method to existing works such as DreamFusion \\citep{poole2022dreamfusion}, and generative adversarial training. To demonstrate the effectiveness and universality of Diff-Instruct, we consider two scenarios: distilling pre-trained diffusion models and refining existing GAN models. The experiments on distilling pre-trained diffusion models show that Diff-Instruct results in state-of-the-art single-step diffusion-based models. The experiments on refining GAN models show that the Diff-Instruct can consistently improve the pre-trained generators of GAN models across various settings. Our official code is released through \\url{https://github.com/pkulwj1994/diff_instruct}.", "keywords": "diffusion model;data-free distillation;implicit generator;knowledge transfer", "primary_area": "", "supplementary_material": "", "author": "Weijian Luo;Tianyang Hu;Shifeng Zhang;Jiacheng Sun;Zhenguo Li;Zhihua Zhang", "authorids": "~Weijian_Luo1;~Tianyang_Hu1;~Shifeng_Zhang5;~Jiacheng_Sun1;~Zhenguo_Li1;~Zhihua_Zhang1", "gender": ";M;M;M;M;M", "homepage": ";https://hu-tianyang.github.io/;https://github.com/zsffq999;;http://www.ee.columbia.edu/~zgli/;http://www.math.pku.edu.cn/teachers/zhzhang/", "dblp": ";170/2551;;165/5350;23/6479;52/5331", "google_scholar": ";mlA_3r0AAAAJ;;;XboZC1AAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;https://www.linkedin.cn/incareer/in/jiacheng-sun-ab622b131;;", "or_profile": "~Weijian_Luo1;~Tianyang_Hu1;~Shifeng_Zhang5;~Jiacheng_Sun1;~Zhenguo_Li1;~Zhihua_Zhang1", "aff": ";Huawei Noah's Ark Lab;Huawei Technologies Ltd.;Huawei Noah's Ark Lab;Huawei Noah's Ark Lab;Peking University", "aff_domain": ";huawei.com;huawei.com;huawei.com;huawei.com;pku.edu.cn", "position": ";Researcher;Researcher;Senior Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nluo2023diffinstruct,\ntitle={Diff-Instruct: A Universal Approach for Transferring Knowledge From Pre-trained Diffusion Models},\nauthor={Weijian Luo and Tianyang Hu and Shifeng Zhang and Jiacheng Sun and Zhenguo Li and Zhihua Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MLIs5iRq4w}\n}", "github": "", "project": "", "reviewers": "Ln4e;kX6X;6Ze7;62Pb;AdFD", "pdf_size": 20411245, "rating": "4;5;6;6;8", "confidence": "4;2;3;4;5", "soundness": "3;2;3;4;3", "novelty": "3;2;2;3;3", "presentation": "3;1;2;2;3", "wc_summary": "133;122;32;77;73", "wc_strengths": "54;110;30;114;104", "wc_weaknesses": "87;111;125;196;112", "wc_questions": "77;185;4;2;58", "wc_limitations": "38;18;7;4;1", "wc_review": "389;546;198;393;348", "wc_reply_reviewers": "0;14;145;0;190", "wc_reply_authors": "44;16;906;0;753", "reply_reviewers": "0;1;2;0;2", "reply_authors": "2;2;4;1;3", "rating_avg": [ 5.8, 1.32664991614216 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 87.4, 36.499863013441576 ], "wc_strengths_avg": [ 82.4, 33.997646977401246 ], "wc_weaknesses_avg": [ 126.2, 36.994053576216814 ], "wc_questions_avg": [ 65.2, 66.75747149196111 ], "wc_limitations_avg": [ 13.6, 13.484806264830057 ], "wc_review_avg": [ 374.8, 111.1924457865731 ], "wc_reply_reviewers_avg": [ 69.8, 81.19211784403706 ], "wc_reply_authors_avg": [ 343.8, 399.761128675613 ], "reply_reviewers_avg": [ 1.0, 0.8944271909999159 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5321811563901743, "gs_citation": 102, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13284596545879041604&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";huawei.com;huawei.com;huawei.com;huawei.com;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Huawei;Peking University", "aff_unique_dep": "Noah's Ark Lab;", "aff_unique_url": "https://www.huawei.com;http://www.pku.edu.cn", "aff_unique_abbr": "Huawei;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "LOVM: Language-Only Vision Model Selection", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73618", "id": "MLLp6AHQFs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/68c33c4e6fc97f7b31c964dc83303a28-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=MLLp6AHQFs", "openreview": "https://openreview.net/forum?id=MLLp6AHQFs", "poster": "/media/PosterPDFs/NeurIPS%202023/73618.png?t=1699383784.516107", "slides": "https://nips.cc/virtual/2023/poster/73618", "video": "https://nips.cc/virtual/2023/poster/73618", "author_site": "Orr Zohar, Shih-Cheng Huang, Kuan-Chieh Wang, Serena Yeung", "tldr": "", "abstract": "Pre-trained multi-modal vision-language models (VLMs) are becoming increasingly popular due to their exceptional performance on downstream vision applications, particularly in the few- and zero-shot settings. However, selecting the best-performing VLM for some downstream applications is non-trivial, as it is dataset and task-dependent. Meanwhile, the exhaustive evaluation of all available VLMs on a novel application is not only time and computationally demanding but also necessitates the collection of a labeled dataset for evaluation. As the number of open-source VLM variants increases, there is a need for an efficient model selection strategy that does not require access to a curated evaluation dataset. This paper proposes a novel task and benchmark for efficiently evaluating VLMs' zero-shot performance on downstream applications without access to the downstream task dataset. Specifically, we introduce a new task LOVM: **L**anguage-**O**nly **V**ision **M**odel Selection , where methods are expected to perform both model selection and performance prediction based solely on a text description of the desired downstream application. We then introduced an extensive LOVM benchmark consisting of ground-truth evaluations of 35 pre-trained VLMs and 23 datasets, where methods are expected to rank the pre-trained VLMs and predict their zero-shot performance.", "keywords": "Multi-modal models;Language-Vision Models;Foundation Models;Transferability;Model Selection", "primary_area": "", "supplementary_material": "/attachment/5aaad05c03f3314948d82efb64ddaaad4982d8d4.pdf", "author": "Orr Zohar;Shih-Cheng Huang;Kuan-Chieh Wang;Serena Yeung", "authorids": "~Orr_Zohar1;~Shih-Cheng_Huang1;~Kuan-Chieh_Wang1;~Serena_Yeung1", "gender": "M;;;F", "homepage": "https://orrzohar.github.io/;https://www.linkedin.com/in/mschuang/;https://wangkua1.github.io;http://ai.stanford.edu/~syyeung/", "dblp": "335/1624;;13/7562;147/5023", "google_scholar": "Jjw4rL0AAAAJ;;https://scholar.google.ca/citations?user=LgMuT6IAAAAJ;Tw2m5kUAAAAJ", "orcid": ";;;0000-0003-0529-0628", "linkedin": "orr-zohar/;;;", "or_profile": "~Orr_Zohar1;~Shih-Cheng_Huang1;~Kuan-Chieh_Wang1;~Serena_Yeung1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nzohar2023lovm,\ntitle={{LOVM}: Language-Only Vision Model Selection},\nauthor={Orr Zohar and Shih-Cheng Huang and Kuan-Chieh Wang and Serena Yeung},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=MLLp6AHQFs}\n}", "github": "", "project": "", "reviewers": "U8rx;BH74;nEyK;i9wv", "pdf_size": 989982, "rating": "6;7;7;7", "confidence": "3;4;3;4", "wc_summary_and_contributions": "123;63;71;106", "wc_strengths": "37;94;57;30", "wc_improvement": "78;92;126;109", "wc_limitations": "15;32;10;4", "wc_correctness": "1;5;29;1", "wc_clarity": "8;4;1;1", "wc_relation_to_prior_work": "11;5;1;1", "wc_documentation": "1;14;9;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "275;310;305;254", "wc_reply_reviewers": "0;0;19;0", "wc_reply_authors": "430;479;926;858", "reply_reviewers": "0;0;1;0", "reply_authors": "2;2;3;3", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 90.75, 24.661457783350926 ], "wc_strengths_avg": [ 54.5, 24.86463351831271 ], "wc_improvement_avg": [ 101.25, 18.019087102292392 ], "wc_limitations_avg": [ 15.25, 10.425329730996522 ], "wc_correctness_avg": [ 9.0, 11.661903789690601 ], "wc_clarity_avg": [ 3.5, 2.8722813232690143 ], "wc_relation_to_prior_work_avg": [ 4.5, 4.092676385936225 ], "wc_documentation_avg": [ 6.25, 5.539629951540085 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 286.0, 22.814469093099667 ], "wc_reply_reviewers_avg": [ 4.75, 8.227241335952167 ], "wc_reply_authors_avg": [ 673.25, 220.7480181111486 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12924418866199117938&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Enhancing Adversarial Robustness via Score-Based Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71912", "id": "MOAHXRzHhm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a2e707354da36956945dbb288efe82b3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MOAHXRzHhm", "openreview": "https://openreview.net/forum?id=MOAHXRzHhm", "poster": "/media/PosterPDFs/NeurIPS%202023/71912.png?t=1699859967.3339806", "slides": "https://nips.cc/virtual/2023/poster/71912", "video": "https://nips.cc/virtual/2023/poster/71912", "author_site": "Boya Zhang, Weijian Luo, Zhihua Zhang", "tldr": "", "abstract": "Adversarial attacks have the potential to mislead deep neural network classifiers by introducing slight perturbations. Developing algorithms that can mitigate the effects of these attacks is crucial for ensuring the safe use of artificial intelligence. Recent studies have suggested that score-based diffusion models are effective in adversarial defenses. However, existing diffusion-based defenses rely on the sequential simulation of the reversed stochastic differential equations of diffusion models, which are computationally inefficient and yield suboptimal results. In this paper, we introduce a novel adversarial defense scheme named ScoreOpt, which optimizes adversarial samples at test-time, towards original clean data in the direction guided by score-based priors. We conduct comprehensive experiments on multiple datasets, including CIFAR10, CIFAR100 and ImageNet. Our experimental results demonstrate that our approach outperforms existing adversarial defenses in terms of both robustness performance and inference speed.", "keywords": "Adversarial Defense;Adversarial Attack;Score-based Models;Diffusion Models", "primary_area": "", "supplementary_material": "", "author": "Boya Zhang;Weijian Luo;Zhihua Zhang", "authorids": "~Boya_Zhang1;~Weijian_Luo1;~Zhihua_Zhang1", "gender": "F;;M", "homepage": ";;http://www.math.pku.edu.cn/teachers/zhzhang/", "dblp": ";;52/5331", "google_scholar": ";;", "orcid": ";;", "linkedin": "%E5%8D%9A%E9%9B%85-%E5%BC%A0-790ab7239/;;", "or_profile": "~Boya_Zhang1;~Weijian_Luo1;~Zhihua_Zhang1", "aff": "Peking University;;Peking University", "aff_domain": "pku.edu.cn;;pku.edu.cn", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nzhang2023enhancing,\ntitle={Enhancing Adversarial Robustness via Score-Based Optimization},\nauthor={Boya Zhang and Weijian Luo and Zhihua Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MOAHXRzHhm}\n}", "github": "", "project": "", "reviewers": "wKkn;85mv;US8D;epqL", "pdf_size": 2147914, "rating": "4;5;7;7", "confidence": "4;3;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;2", "wc_summary": "43;60;53;199", "wc_strengths": "25;56;74;165", "wc_weaknesses": "82;98;174;187", "wc_questions": "109;60;162;128", "wc_limitations": "7;20;12;100", "wc_review": "266;294;475;779", "wc_reply_reviewers": "278;18;317;187", "wc_reply_authors": "1460;0;1716;551", "reply_reviewers": "1;1;3;1", "reply_authors": "4;1;5;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.75, 63.938935712130835 ], "wc_strengths_avg": [ 80.0, 52.11045960265559 ], "wc_weaknesses_avg": [ 135.25, 45.833257575694965 ], "wc_questions_avg": [ 114.75, 36.873940662749895 ], "wc_limitations_avg": [ 34.75, 37.95638944894522 ], "wc_review_avg": [ 453.5, 204.3336731916695 ], "wc_reply_reviewers_avg": [ 200.0, 115.18029345335077 ], "wc_reply_authors_avg": [ 931.75, 690.5115404538869 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.0, 1.5811388300841898 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9687957078038336108&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;;pku.edu.cn", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "On the Exploration of Local Significant Differences For Two-Sample Test", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71911", "id": "MRiitgpcUy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/10fc83943b4540a9524af6fc67a23fef-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MRiitgpcUy", "openreview": "https://openreview.net/forum?id=MRiitgpcUy", "poster": "/media/PosterPDFs/NeurIPS%202023/71911.png?t=1701857306.9455476", "slides": "https://nips.cc/virtual/2023/poster/71911", "video": "https://nips.cc/virtual/2023/poster/71911", "author_site": "Zhijian Zhou, Jie Ni, Jia-He Yao, Wei Gao", "tldr": "", "abstract": "Recent years have witnessed increasing attentions on two-sample test with diverse real applications, while this work takes one more step on the exploration of local significant differences for two-sample test. We propose the ME$_\\text{MaBiD}$, an effective test for two-sample testing, and the basic idea is to exploit local information by multiple Mahalanobis kernels and introduce bi-directional hypothesis for testing. On the exploration of local significant differences, we first partition the embedding space into several rectangle regions via a new splitting criterion, which is relevant to test power and data correlation. We then explore local significant differences based on our bi-directional masked $p$-value together with the ME$_\\text{MaBiD}$ test. Theoretically, we present the asymptotic distribution and lower bounds of test power for our ME$_\\text{MaBiD}$ test, and control the familywise error rate on the exploration of local significant differences. We finally conduct extensive experiments to validate the effectiveness of our proposed methods on two-sample test and the exploration of local significant differences.", "keywords": "two-sample test;local significant difference;directional information", "primary_area": "", "supplementary_material": "/attachment/ceda82fb54341898de84e97b017d1f5c6f859218.zip", "author": "Zhijian Zhou;Jie Ni;Jia-He Yao;Wei Gao", "authorids": "~Zhijian_Zhou1;~Jie_Ni3;~Jia-He_Yao1;~Wei_Gao7", "gender": "M;M;M;M", "homepage": "http://www.lamda.nju.edu.cn/zhouzj/;http://www.lamda.nju.edu.cn/nij/;https://www.lamda.nju.edu.cn/yaojh/;http://www.lamda.nju.edu.cn/gaow/", "dblp": ";;;28/2073-8", "google_scholar": ";;;pBzGACcAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Zhijian_Zhou1;~Jie_Ni3;~Jia-He_Yao1;~Wei_Gao7", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "MS student;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nzhou2023on,\ntitle={On the Exploration of Local Significant Differences For Two-Sample Test},\nauthor={Zhijian Zhou and Jie Ni and Jia-He Yao and Wei Gao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MRiitgpcUy}\n}", "github": "", "project": "", "reviewers": "CMa2;wXPw;DcFi;42S6", "pdf_size": 0, "rating": "2;5;6;7", "confidence": "3;3;4;5", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;2;3", "wc_summary": "87;66;108;151", "wc_strengths": "84;71;232;106", "wc_weaknesses": "156;181;194;83", "wc_questions": "9;66;54;4", "wc_limitations": "1;48;1;1", "wc_review": "337;432;589;345", "wc_reply_reviewers": "407;38;287;15", "wc_reply_authors": "1037;126;378;0", "reply_reviewers": "2;1;1;1", "reply_authors": "5;2;2;1", "rating_avg": [ 5.0, 1.8708286933869707 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 103.0, 31.44041984452498 ], "wc_strengths_avg": [ 123.25, 64.02099265084851 ], "wc_weaknesses_avg": [ 153.5, 42.933087473416116 ], "wc_questions_avg": [ 33.25, 27.141987768032024 ], "wc_limitations_avg": [ 12.75, 20.351596988934308 ], "wc_review_avg": [ 425.75, 101.34933398893158 ], "wc_reply_reviewers_avg": [ 186.75, 165.970441645493 ], "wc_reply_authors_avg": [ 385.25, 400.1433336943151 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8058229640253803, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3440255489843266498&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "HEDNet: A Hierarchical Encoder-Decoder Network for 3D Object Detection in Point Clouds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71910", "id": "MUwr2YVJfN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a64e641fa00a7eb9500cb7e1835d0495-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MUwr2YVJfN", "openreview": "https://openreview.net/forum?id=MUwr2YVJfN", "poster": "/media/PosterPDFs/NeurIPS%202023/71910.png?t=1701941104.1385596", "slides": "https://nips.cc/virtual/2023/poster/71910", "video": "https://nips.cc/virtual/2023/poster/71910", "author_site": "Gang Zhang, Chen Junnan, Guohuan Gao, Jianmin Li, Xiaolin Hu", "tldr": "", "abstract": "3D object detection in point clouds is important for autonomous driving systems. A primary challenge in 3D object detection stems from the sparse distribution of points within the 3D scene. Existing high-performance methods typically employ 3D sparse convolutional neural networks with small kernels to extract features. To reduce computational costs, these methods resort to submanifold sparse convolutions, which prevent the information exchange among spatially disconnected features. Some recent approaches have attempted to address this problem by introducing large-kernel convolutions or self-attention mechanisms, but they either achieve limited accuracy improvements or incur excessive computational costs. We propose HEDNet, a hierarchical encoder-decoder network for 3D object detection, which leverages encoder-decoder blocks to capture long-range dependencies among features in the spatial space, particularly for large and distant objects. We conducted extensive experiments on the Waymo Open and nuScenes datasets. HEDNet achieved superior detection accuracy on both datasets than previous state-of-the-art methods with competitive efficiency. The code is available at https://github.com/zhanggang001/HEDNet.", "keywords": "3D object detection; encoder-decoder structure", "primary_area": "", "supplementary_material": "/attachment/5141cf8248333909cffc04a40d5fa42b82af9859.pdf", "author": "Gang Zhang;Chen Junnan;Guohuan Gao;Jianmin Li;Xiaolin Hu", "authorids": "~Gang_Zhang3;~Chen_Junnan1;~Guohuan_Gao1;~Jianmin_Li1;~Xiaolin_Hu1", "gender": "Not Specified;M;M;M;M", "homepage": ";http://biic.aia.hust.edu.cn/info/1055/1100.htm;https://github.com/Rrrengar;;http://www.xlhu.cn/", "dblp": ";;;71/5930-1;60/6028-1", "google_scholar": "0QvcF6sAAAAJ;;UESvJ3GUbNQC;PeF1aPkAAAAJ;PksdgoUAAAAJ", "orcid": ";;0009-0008-5831-0198;;0000-0002-4907-7354", "linkedin": ";;;;", "or_profile": "~Gang_Zhang3;~Chen_Junnan1;~Guohuan_Gao1;~Jianmin_Li1;~Xiaolin_Hu1", "aff": "Tsinghua University;Huazhong University of Science and Technology;Beijing Institute of Technology;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;hust.edu.cn;bit.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;MS student;MS student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2023hednet,\ntitle={{HEDN}et: A Hierarchical Encoder-Decoder Network for 3D Object Detection in Point Clouds},\nauthor={Gang Zhang and Chen Junnan and Guohuan Gao and Jianmin Li and Xiaolin Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MUwr2YVJfN}\n}", "github": "", "project": "", "reviewers": "pAfm;k2sW;Nfby;vKEU;zRCr", "pdf_size": 1030302, "rating": "6;6;7;7;7", "confidence": "5;2;5;5;5", "soundness": "3;3;2;4;4", "novelty": "2;3;4;3;4", "presentation": "3;3;3;4;4", "wc_summary": "70;60;40;66;70", "wc_strengths": "94;52;25;87;46", "wc_weaknesses": "66;46;76;194;155", "wc_questions": "69;3;51;37;38", "wc_limitations": "1;1;5;58;4", "wc_review": "300;162;197;442;313", "wc_reply_reviewers": "22;10;29;45;67", "wc_reply_authors": "17;0;25;19;27", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 4.4, 1.2 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 61.2, 11.214276615100948 ], "wc_strengths_avg": [ 60.8, 25.949181104612915 ], "wc_weaknesses_avg": [ 107.4, 56.98280442379086 ], "wc_questions_avg": [ 39.6, 21.648094604375693 ], "wc_limitations_avg": [ 13.8, 22.157617200412144 ], "wc_review_avg": [ 282.8, 98.49548212989264 ], "wc_reply_reviewers_avg": [ 34.6, 19.764614845728715 ], "wc_reply_authors_avg": [ 17.6, 9.54148835350125 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6123724356957944, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1825542260676991431&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;hust.edu.cn;bit.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Tsinghua University;Huazhong University of Science and Technology;Beijing Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.hust.edu.cn;http://www.bit.edu.cn/", "aff_unique_abbr": "THU;HUST;BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Improvements on Uncertainty Quantification for Node Classification via Distance Based Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71909", "id": "MUzdCW2hC6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ad84864002a72c344c2227d7eb8842b1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MUzdCW2hC6", "openreview": "https://openreview.net/forum?id=MUzdCW2hC6", "poster": "/media/PosterPDFs/NeurIPS%202023/71909.png?t=1702266175.3741667", "slides": "https://nips.cc/virtual/2023/poster/71909", "video": "https://nips.cc/virtual/2023/poster/71909", "author_site": "Russell Hart, Linlin Yu, Yifei Lou, Feng Chen", "tldr": "", "abstract": "Deep neural networks have achieved significant success in the last decades, but they are not well-calibrated and often produce unreliable predictions. A large number of literature relies on uncertainty quantification to evaluate the reliability of a learning model, which is particularly important for applications of out-of-distribution (OOD) detection and misclassification detection. We are interested in uncertainty quantification for interdependent node-level classification. We start our analysis based on graph posterior networks (GPNs) that optimize the uncertainty cross-entropy (UCE)-based loss function. We describe the theoretical limitations of the widely-used UCE loss. To alleviate the identified drawbacks, we propose a distance-based regularization that encourages clustered OOD nodes to remain clustered in the latent space. We conduct extensive comparison experiments on eight standard datasets and demonstrate that the proposed regularization outperforms the state-of-the-art in both OOD detection and misclassification detection.", "keywords": "Uncertainty Quantification;Graph Posterior Network;Bayesian", "primary_area": "", "supplementary_material": "/attachment/0386785833ff9ca830bf42b2575d3c98831be57a.pdf", "author": "Russell Alan Hart;Linlin Yu;Yifei Lou;Feng Chen", "authorids": "~Russell_Alan_Hart1;~Linlin_Yu1;~Yifei_Lou2;~Feng_Chen7", "gender": "M;F;F;M", "homepage": ";;https://sites.google.com/site/louyifei/;https://personal.utdallas.edu/~fxc190007/", "dblp": ";204/9716;;21/3047-1", "google_scholar": ";https://scholar.google.com/citations?hl=en;iCiUflEAAAAJ;KOQ-SSYAAAAJ", "orcid": ";0009-0001-5690-9905;0000-0003-1973-5704;", "linkedin": "hartrussell/;linlin-yu-723884249/;;", "or_profile": "~Russell_Alan_Hart1;~Linlin_Yu1;~Yifei_Lou2;~Feng_Chen7", "aff": "The University of Texas at Dallas;The University of Texas at Dallas;University of Texas at Dallas;University of Texas, Dallas", "aff_domain": "cs.utdallas.edu;cs.utdallas.edu;utdallas.edu;utdallas.edu", "position": "PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nhart2023improvements,\ntitle={Improvements on Uncertainty Quantification for Node Classification via Distance Based Regularization},\nauthor={Russell Alan Hart and Linlin Yu and Yifei Lou and Feng Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MUzdCW2hC6}\n}", "github": "", "project": "", "reviewers": "nRfd;XBDF;dzMP;Spkg", "pdf_size": 32328329, "rating": "4;6;7;8", "confidence": "2;1;3;3", "soundness": "2;3;3;3", "novelty": "3;2;3;3", "presentation": "1;3;3;3", "wc_summary": "34;60;50;94", "wc_strengths": "28;71;41;135", "wc_weaknesses": "164;30;16;147", "wc_questions": "35;10;33;29", "wc_limitations": "9;1;6;1", "wc_review": "270;172;146;406", "wc_reply_reviewers": "42;0;0;60", "wc_reply_authors": "44;0;0;39", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 59.5, 21.97157254271983 ], "wc_strengths_avg": [ 68.75, 41.30602256330183 ], "wc_weaknesses_avg": [ 89.25, 66.70597799298051 ], "wc_questions_avg": [ 26.75, 9.908960591303208 ], "wc_limitations_avg": [ 4.25, 3.418698582794336 ], "wc_review_avg": [ 248.5, 102.01347950148549 ], "wc_reply_reviewers_avg": [ 25.5, 26.28212320190285 ], "wc_reply_authors_avg": [ 20.75, 20.825165065372232 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.560611910581388, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3052188519348011442&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 5, "email": "cs.utdallas.edu;cs.utdallas.edu;utdallas.edu;utdallas.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Texas at Dallas", "aff_unique_dep": "", "aff_unique_url": "https://www.utdallas.edu", "aff_unique_abbr": "UT Dallas", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Dallas", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Tanimoto Random Features for Scalable Molecular Machine Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71908", "id": "MV0INFAKGq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6a69d44b3386e50c06f7107ef4f29302-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MV0INFAKGq", "openreview": "https://openreview.net/forum?id=MV0INFAKGq", "poster": "/media/PosterPDFs/NeurIPS%202023/71908.png?t=1701863278.7290266", "slides": "https://nips.cc/virtual/2023/poster/71908", "video": "https://nips.cc/virtual/2023/poster/71908", "author_site": "Austin Tripp, Sergio Bacallado, Sukriti Singh, Jos\u00e9 Miguel Hern\u00e1ndez-Lobato", "tldr": "", "abstract": "The Tanimoto coefficient is commonly used to measure the similarity between molecules represented as discrete fingerprints,\neither as a distance metric or a positive definite kernel. While many kernel methods can be accelerated using random feature approximations, at present there is a lack of such approximations for the Tanimoto kernel. In this paper we propose two kinds of novel random features to allow this kernel to scale to large datasets, and in the process discover a novel extension of the kernel to real-valued vectors. We theoretically characterize these random features, and provide error bounds on the spectral norm of the Gram matrix. Experimentally, we show that these random features are effective at approximating the Tanimoto coefficient of real-world datasets\nand are useful for molecular property prediction and optimization tasks. Future updates to this work will be available at http://arxiv.org/abs/2306.14809.", "keywords": "Tanimoto;Kernel;MinMax;Gaussian process;molecule;chemistry;random features", "primary_area": "", "supplementary_material": "", "author": "Austin Tripp;Sergio Bacallado;Sukriti Singh;Jos\u00e9 Miguel Hern\u00e1ndez-Lobato", "authorids": "~Austin_Tripp1;~Sergio_Bacallado1;~Sukriti_Singh1;~Jos\u00e9_Miguel_Hern\u00e1ndez-Lobato1", "gender": "M;M;;", "homepage": "https://www.austintripp.ca/;http://www.statslab.cam.ac.uk/~sb2116;;", "dblp": "267/5455;;;", "google_scholar": "WAvRaxMAAAAJ;;;", "orcid": "0000-0002-0138-7740;0000-0002-7193-6450;;", "linkedin": ";;;", "or_profile": "~Austin_Tripp1;~Sergio_Bacallado1;~Sukriti_Singh1;~Jos\u00e9_Miguel_Hern\u00e1ndez-Lobato1", "aff": "University of Cambridge;University of Cambridge;;", "aff_domain": "cam.ac.uk;cam.ac.uk;;", "position": "PhD student;Associate Professor;;", "bibtex": "@inproceedings{\ntripp2023tanimoto,\ntitle={Tanimoto Random Features for Scalable Molecular Machine Learning},\nauthor={Austin Tripp and Sergio Bacallado and Sukriti Singh and Jos{\\'e} Miguel Hern{\\'a}ndez-Lobato},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MV0INFAKGq}\n}", "github": "", "project": "", "reviewers": "stq5;pMsa;jyA1;A9mq;b6mP", "pdf_size": 590464, "rating": "5;7;7;7;8", "confidence": "3;3;2;3;2", "soundness": "3;3;4;4;4", "novelty": "2;3;3;3;3", "presentation": "2;3;4;4;4", "wc_summary": "28;73;93;41;59", "wc_strengths": "59;37;204;271;59", "wc_weaknesses": "154;22;367;60;38", "wc_questions": "1066;42;131;76;42", "wc_limitations": "2;9;113;10;1", "wc_review": "1309;183;908;458;199", "wc_reply_reviewers": "213;36;236;17;32", "wc_reply_authors": "3783;20;320;0;0", "reply_reviewers": "2;1;2;1;1", "reply_authors": "6;2;2;1;1", "rating_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 58.8, 22.96432015105172 ], "wc_strengths_avg": [ 126.0, 93.81684283752038 ], "wc_weaknesses_avg": [ 128.2, 127.87243643569164 ], "wc_questions_avg": [ 271.4, 398.63045543460424 ], "wc_limitations_avg": [ 27.0, 43.15089802078283 ], "wc_review_avg": [ 611.4, 436.29100380365395 ], "wc_reply_reviewers_avg": [ 106.8, 96.58447080146993 ], "wc_reply_authors_avg": [ 824.6, 1484.1875353202506 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.4, 1.8547236990991407 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5833333333333334, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15225941881016386917&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;cam.ac.uk;;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Restless Bandits with Average Reward: Breaking the Uniform Global Attractor Assumption", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71907", "id": "MWQjqtV1z4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2a0babff3ddd4ba12062219ec161ce86-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MWQjqtV1z4", "openreview": "https://openreview.net/forum?id=MWQjqtV1z4", "poster": "/media/PosterPDFs/NeurIPS%202023/71907.png?t=1702171312.32789", "slides": "https://nips.cc/virtual/2023/poster/71907", "video": "https://nips.cc/virtual/2023/poster/71907", "author_site": "Yige Hong, Qiaomin Xie, Yudong Chen, Weina Wang", "tldr": "", "abstract": "We study the infinite-horizon restless bandit problem with the average reward criterion, in both discrete-time and continuous-time settings.\nA fundamental goal is to efficiently compute policies that achieve a diminishing optimality gap as the number of arms, $N$, grows large. \nExisting results on asymptotic optimality all rely on the uniform global attractor property (UGAP), a complex and challenging-to-verify assumption. \nIn this paper, we propose a general, simulation-based framework, Follow-the-Virtual-Advice, that converts any single-armed policy into a policy for the original $N$-armed problem. \nThis is done by simulating the single-armed policy on each arm and carefully steering the real state towards the simulated state. \nOur framework can be instantiated to produce a policy with an $O(1/\\sqrt{N})$ optimality gap. \nIn the discrete-time setting, our result holds under a simpler synchronization assumption, which covers some problem instances that violate UGAP. \nMore notably, in the continuous-time setting, we do not require \\emph{any} additional assumptions beyond the standard unichain condition. \nIn both settings, our work is the first asymptotic optimality result that does not require UGAP.", "keywords": "restless bandits;average reward MDP;simulation-based method;asymptotic optimality", "primary_area": "", "supplementary_material": "", "author": "Yige Hong;Qiaomin Xie;Yudong Chen;Weina Wang", "authorids": "~Yige_Hong1;~Qiaomin_Xie1;~Yudong_Chen1;~Weina_Wang1", "gender": "M;F;M;", "homepage": "https://www.cs.cmu.edu/~yigeh/;https://qiaominxie.github.io/;https://pages.cs.wisc.edu/~yudongchen/;https://www.cs.cmu.edu/~weinaw/", "dblp": "301/8925;37/10269;15/1975-1;88/2200", "google_scholar": "D-5fj0kAAAAJ;RVNcy4EAAAAJ;ze5rCdwAAAAJ;mQnBkmoAAAAJ", "orcid": "0000-0001-8534-1063;;0000-0002-6416-5635;0000-0001-6808-0156", "linkedin": "yige-hong-0491a0254;;;", "or_profile": "~Yige_Hong1;~Qiaomin_Xie1;~Yudong_Chen1;~Weina_Wang1", "aff": "Carnegie Mellon University;University of Wisconsin - Madison;Department of Computer Sciences, University of Wisconsin - Madison;Carnegie Mellon University", "aff_domain": "cmu.edu;wisc.edu;cs.wisc.edu;csd.cs.cmu.edu", "position": "PhD student;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nhong2023restless,\ntitle={Restless Bandits with Average Reward: Breaking the Uniform Global Attractor Assumption},\nauthor={Yige Hong and Qiaomin Xie and Yudong Chen and Weina Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MWQjqtV1z4}\n}", "github": "", "project": "", "reviewers": "TTwr;s6TA;TkxD;JXov;S8QK;k12y", "pdf_size": 2610707, "rating": "4;6;6;6;7;8", "confidence": "3;3;4;2;4;2", "soundness": "2;3;3;3;3;3", "novelty": "2;4;3;3;4;3", "presentation": "2;3;3;4;3;3", "wc_summary": "47;98;74;125;34;134", "wc_strengths": "47;83;107;63;84;82", "wc_weaknesses": "379;156;148;80;117;211", "wc_questions": "4;101;156;85;28;5", "wc_limitations": "4;1;12;15;6;4", "wc_review": "481;439;497;368;269;436", "wc_reply_reviewers": "0;0;15;11;0;10", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "0;0;1;1;0;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.166666666666667, 1.2133516482134197 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 3.1666666666666665, 0.6871842709362768 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 85.33333333333333, 37.272271492655534 ], "wc_strengths_avg": [ 77.66666666666667, 18.72312889331149 ], "wc_weaknesses_avg": [ 181.83333333333334, 96.70473388389813 ], "wc_questions_avg": [ 63.166666666666664, 55.74769551789164 ], "wc_limitations_avg": [ 7.0, 4.898979485566356 ], "wc_review_avg": [ 415.0, 77.05193054038295 ], "wc_reply_reviewers_avg": [ 6.0, 6.191391873668904 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.1682316462276133, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11570516665693599755&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "cmu.edu;wisc.edu;cs.wisc.edu;csd.cs.cmu.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Carnegie Mellon University;University of Wisconsin-Madison", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.wisc.edu", "aff_unique_abbr": "CMU;UW-Madison", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "MCUFormer: Deploying Vision Tranformers on Microcontrollers with Limited Memory", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71906", "id": "MWp3SwoHmH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1ae4999aefb509d75d8608e07280922c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MWp3SwoHmH", "openreview": "https://openreview.net/forum?id=MWp3SwoHmH", "poster": "/media/PosterPDFs/NeurIPS%202023/71906.png?t=1697367783.974651", "slides": "https://nips.cc/virtual/2023/poster/71906", "video": "https://nips.cc/virtual/2023/poster/71906", "author_site": "Yinan Liang, Ziwei Wang, Xiuwei Xu, Yansong Tang, Jie Zhou, Jiwen Lu", "tldr": "", "abstract": "Due to the high price and heavy energy consumption of GPUs, deploying deep models on IoT devices such as microcontrollers makes significant contributions for ecological AI. Conventional methods successfully enable convolutional neural network inference of high resolution images on microcontrollers, while the framework for vision transformers that achieve the state-of-the-art performance in many vision applications still remains unexplored. In this paper, we propose a hardware-algorithm co-optimizations method called MCUFormer to deploy vision transformers on microcontrollers with extremely limited memory, where we jointly design transformer architecture and construct the inference operator library to fit the memory resource constraint. More specifically, we generalize the one-shot network architecture search (NAS) to discover the optimal architecture with highest task performance given the memory budget from the microcontrollers, where we enlarge the existing search space of vision transformers by considering the low-rank decomposition dimensions and patch resolution for memory reduction. For the construction of the inference operator library of vision transformers, we schedule the memory buffer during inference through operator integration, patch embedding decomposition, and token overwriting, allowing the memory buffer to be fully utilized to adapt to the forward pass of the vision transformer. Experimental results demonstrate that our MCUFormer achieves 73.62\\% top-1 accuracy on ImageNet for image classification with 320KB memory on STM32F746 microcontroller. Code is available at https://github.com/liangyn22/MCUFormer.", "keywords": "Vision transformer;microcontroller;network architecture search", "primary_area": "", "supplementary_material": "/attachment/2f6e30affbfa43265f85885fddc9c8fe61d59c74.pdf", "author": "Yinan Liang;Ziwei Wang;Xiuwei Xu;Yansong Tang;Jie Zhou;Jiwen Lu", "authorids": "~Yinan_Liang1;~Ziwei_Wang2;~Xiuwei_Xu1;~Yansong_Tang1;~Jie_Zhou3;~Jiwen_Lu1", "gender": "M;M;M;M;M;M", "homepage": "http://ivg.au.tsinghua.edu.cn/index.php;https://ziweiwangthu.github.io/;https://xuxw98.github.io/;https://andytang15.github.io/;https://www.tsinghua.edu.cn/publish/auen/1713/2011/20110506105532098625469/20110506105532098625469_.html;http://ivg.au.tsinghua.edu.cn/Jiwen_Lu/", "dblp": "359/0769;136/5574-1;315/9374;214/9568;00/5012-1;http://dblp.uni-trier.de/pers/hd/l/Lu:Jiwen", "google_scholar": "Zpxs0Z4AAAAJ;cMTW09EAAAAJ;4G627acAAAAJ;TIbistUAAAAJ;;TN8uDQoAAAAJ", "orcid": ";0000-0001-9225-8495;;;;0000-0002-6121-5529", "linkedin": ";;;;;", "or_profile": "~Yinan_Liang1;~Ziwei_Wang2;~Xiuwei_Xu1;~Yansong_Tang1;~Jie_Zhou3;~Jiwen_Lu1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;PhD student;PhD student;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nliang2023mcuformer,\ntitle={{MCUF}ormer: Deploying Vision Tranformers on Microcontrollers with Limited Memory},\nauthor={Yinan Liang and Ziwei Wang and Xiuwei Xu and Yansong Tang and Jie Zhou and Jiwen Lu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MWp3SwoHmH}\n}", "github": "", "project": "", "reviewers": "9JB9;77fa;km2A;67yL;N3hf", "pdf_size": 2858776, "rating": "5;5;5;6;6", "confidence": "4;4;3;3;4", "soundness": "3;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "95;57;63;101;97", "wc_strengths": "56;31;36;130;82", "wc_weaknesses": "183;173;36;114;116", "wc_questions": "70;109;2;93;98", "wc_limitations": "1;47;6;1;1", "wc_review": "405;417;143;439;394", "wc_reply_reviewers": "22;151;0;14;10", "wc_reply_authors": "33;766;0;32;28", "reply_reviewers": "1;2;0;1;1", "reply_authors": "2;3;1;2;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 82.6, 18.650469162999627 ], "wc_strengths_avg": [ 67.0, 36.254654873546926 ], "wc_weaknesses_avg": [ 124.4, 52.515140673904696 ], "wc_questions_avg": [ 74.4, 38.369779775234576 ], "wc_limitations_avg": [ 11.2, 18.00444389588304 ], "wc_review_avg": [ 359.6, 109.3244711855493 ], "wc_reply_reviewers_avg": [ 39.4, 56.24802218745118 ], "wc_reply_authors_avg": [ 171.8, 297.3472044597023 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.16666666666666669, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1377999143857560019&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Explainable and Efficient Randomized Voting Rules", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71905", "id": "MWxsYPVmLS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/47eb2874a790d5b1f554b9bb93b3de9d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MWxsYPVmLS", "openreview": "https://openreview.net/forum?id=MWxsYPVmLS", "poster": "/media/PosterPDFs/NeurIPS%202023/71905.png?t=1702681851.1848645", "slides": "https://nips.cc/virtual/2023/poster/71905", "video": "https://nips.cc/virtual/2023/poster/71905", "author_site": "Soroush Ebadian, Aris Filos-Ratsikas, Mohamad Latifian, Nisarg Shah", "tldr": "", "abstract": "With a rapid growth in the deployment of AI tools for making critical decisions (or aiding humans in doing so), there is a growing demand to be able to explain to the stakeholders how these tools arrive at a decision. Consequently, voting is frequently used to make such decisions due to its inherent explainability. Recent work suggests that using randomized (as opposed to deterministic) voting rules can lead to significant efficiency gains measured via the distortion framework. However, rules that use intricate randomization can often become too complex to explain to the stakeholders; losing explainability can eliminate the key advantage of voting over black-box AI tools, which may outweigh the efficiency gains.\n\nWe study the efficiency gains which can be unlocked by using voting rules that add a simple randomization step to a deterministic rule, thereby retaining explainability. We focus on two such families of rules, randomized positional scoring rules and random committee member rules, and show, theoretically and empirically, that they indeed achieve explainability and efficiency simultaneously to some extent.", "keywords": "explainability;efficiency;voting;distortion;randomized decision-making", "primary_area": "", "supplementary_material": "/attachment/bf79cba1a96376ff7f141a61db2135c84564f6de.zip", "author": "Soroush Ebadian;Aris Filos-Ratsikas;Mohamad Latifian;Nisarg Shah", "authorids": "~Soroush_Ebadian1;~Aris_Filos-Ratsikas1;~Mohamad_Latifian2;~Nisarg_Shah1", "gender": "M;M;;M", "homepage": "https://ebadian.org/;https://arisfilosratsikas.com;;https://www.cs.toronto.edu/~nisarg/", "dblp": "242/8319.html;https://dblp.uni-trier.de/pers/hd/f/Filos=Ratsikas:Aris;;95/9508-1", "google_scholar": "tN4kqvYAAAAJ;https://scholar.google.ch/citations?user=iaxTRPoAAAAJ;;https://scholar.google.ca/citations?user=klcw_tAAAAAJ", "orcid": ";;;0000-0002-0946-3402", "linkedin": ";;;", "or_profile": "~Soroush_Ebadian1;~Aris_Filos-Ratsikas1;~Mohamad_Latifian2;~Nisarg_Shah1", "aff": "University of Toronto;University of Edinburgh, University of Edinburgh;;University of Toronto", "aff_domain": "cs.toronto.edu;ed.ac.uk;;utoronto.ca", "position": "PhD student;Assistant Professor;;Assistant Professor", "bibtex": "@inproceedings{\nebadian2023explainable,\ntitle={Explainable and Efficient Randomized Voting Rules},\nauthor={Soroush Ebadian and Aris Filos-Ratsikas and Mohamad Latifian and Nisarg Shah},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MWxsYPVmLS}\n}", "github": "", "project": "", "reviewers": "c5N6;uYtf;VN7K;Ypnk", "pdf_size": 1280881, "rating": "6;6;6;7", "confidence": "1;4;3;4", "soundness": "3;4;4;4", "novelty": "3;3;3;3", "presentation": "3;4;3;4", "wc_summary": "43;206;239;89", "wc_strengths": "28;132;72;66", "wc_weaknesses": "155;438;177;268", "wc_questions": "3;134;51;157", "wc_limitations": "2;58;1;37", "wc_review": "231;968;540;617", "wc_reply_reviewers": "12;31;21;188", "wc_reply_authors": "0;0;0;28", "reply_reviewers": "1;1;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 144.25, 80.76934752738813 ], "wc_strengths_avg": [ 74.5, 37.239092362730865 ], "wc_weaknesses_avg": [ 259.5, 111.42374073777994 ], "wc_questions_avg": [ 86.25, 62.166610813201004 ], "wc_limitations_avg": [ 24.5, 24.171263930543642 ], "wc_review_avg": [ 589.0, 262.1974446862517 ], "wc_reply_reviewers_avg": [ 63.0, 72.48103200148299 ], "wc_reply_authors_avg": [ 7.0, 12.12435565298214 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3718245715147024799&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "cs.toronto.edu;ed.ac.uk;;utoronto.ca", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Toronto;University of Edinburgh", "aff_unique_dep": ";", "aff_unique_url": "https://www.utoronto.ca;https://www.ed.ac.uk", "aff_unique_abbr": "U of T;Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Canada;United Kingdom" }, { "title": "Efficient Training of Energy-Based Models Using Jarzynski Equality", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71904", "id": "MXxZ0Z5MNz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a4ddb865e0a8ca3cca43fd7387b4b0da-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MXxZ0Z5MNz", "openreview": "https://openreview.net/forum?id=MXxZ0Z5MNz", "poster": "/media/PosterPDFs/NeurIPS%202023/71904.png?t=1699886732.9185996", "slides": "https://nips.cc/virtual/2023/poster/71904", "video": "https://nips.cc/virtual/2023/poster/71904", "author_site": "Davide Carbone, Mengjian Hua, Simon Coste, Eric Vanden-Eijnden", "tldr": "", "abstract": "Energy-based models (EBMs) are generative models inspired by statistical physics with a wide range of applications in unsupervised learning. Their performance is well measured by the cross-entropy (CE) of the model distribution relative to the data distribution. Using the CE as the objective for training is however challenging because the computation of its gradient with respect to the model parameters requires sampling the model distribution. Here we show how results for nonequilibrium thermodynamics based on Jarzynski equality together with tools from sequential Monte-Carlo sampling can be used to perform this computation efficiently and avoid the uncontrolled approximations made using the standard contrastive divergence algorithm. Specifically, we introduce a modification of the unadjusted Langevin algorithm (ULA) in which each walker acquires a weight that enables the estimation of the gradient of the cross-entropy at any step during GD, thereby bypassing sampling biases induced by slow mixing of ULA. We illustrate these results with numerical experiments on Gaussian mixture distributions as well as the MNIST and CIFAR-10 datasets. We show that the proposed approach outperforms methods based on the contrastive divergence algorithm in all the considered situations.", "keywords": "Energy-Based Model;contrastive learning;generative models;Jarzynski identity;ULA", "primary_area": "", "supplementary_material": "/attachment/39aeab640ca333cbff798f013aa3a104f6a6c355.pdf", "author": "Davide Carbone;Mengjian Hua;Simon Coste;Eric Vanden-Eijnden", "authorids": "~Davide_Carbone1;~Mengjian_Hua1;~Simon_Coste1;~Eric_Vanden-Eijnden1", "gender": "M;M;M;M", "homepage": ";;https://scoste.fr;https://wp.nyu.edu/courantinstituteofmathematicalsciences-eve2/", "dblp": ";;;88/7927", "google_scholar": "phYtcDcAAAAJ;llRFiBEAAAAJ;;A5Gx65gAAAAJ", "orcid": "0000-0003-2859-6603;0000-0003-0203-1356;;", "linkedin": "davide-carbone-4070a6209/;;;", "or_profile": "~Davide_Carbone1;~Mengjian_Hua1;~Simon_Coste1;~Eric_Vanden-Eijnden1", "aff": "New York University;Courant Institute of Mathematical Sciences, New York University;LPSM;New York University", "aff_domain": "nyu.edu;nyu.edu;u-paris.fr;nyu.edu", "position": "Intern;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ncarbone2023efficient,\ntitle={Efficient Training of Energy-Based Models Using Jarzynski Equality},\nauthor={Davide Carbone and Mengjian Hua and Simon Coste and Eric Vanden-Eijnden},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MXxZ0Z5MNz}\n}", "github": "", "project": "", "reviewers": "nHa5;1pft;2q1e;ripa;d8UC;YzoY", "pdf_size": 4552519, "rating": "4;5;6;6;7;7", "confidence": "4;4;3;5;4;2", "soundness": "3;3;3;2;3;3", "novelty": "3;2;3;2;3;3", "presentation": "3;3;3;3;3;3", "wc_summary": "43;193;16;228;318;81", "wc_strengths": "36;117;24;168;203;105", "wc_weaknesses": "210;173;45;559;179;120", "wc_questions": "201;31;193;7;179;72", "wc_limitations": "7;11;28;88;34;1", "wc_review": "497;525;306;1050;913;379", "wc_reply_reviewers": "230;211;14;98;59;0", "wc_reply_authors": "349;349;0;583;519;0", "reply_reviewers": "1;1;1;2;1;0", "reply_authors": "2;2;1;3;3;1", "rating_avg": [ 5.833333333333333, 1.0671873729054746 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 146.5, 108.2046671821507 ], "wc_strengths_avg": [ 108.83333333333333, 64.48621730433739 ], "wc_weaknesses_avg": [ 214.33333333333334, 162.9638678426056 ], "wc_questions_avg": [ 113.83333333333333, 79.7253967621917 ], "wc_limitations_avg": [ 28.166666666666668, 29.139987798365475 ], "wc_review_avg": [ 611.6666666666666, 274.2150413250318 ], "wc_reply_reviewers_avg": [ 102.0, 89.70135636284066 ], "wc_reply_authors_avg": [ 300.0, 228.3462283463425 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.38651034126196304, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17118842328298492415&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "nyu.edu;nyu.edu;u-paris.fr;nyu.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "New York University;Laboratoire de Physique des Solides et des Mat\u00e9riaux", "aff_unique_dep": ";Physics", "aff_unique_url": "https://www.nyu.edu;https://www.lpsm.paris-saclay.fr", "aff_unique_abbr": "NYU;LPSM", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;France" }, { "title": "Architecture Matters: Uncovering Implicit Mechanisms in Graph Contrastive Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71903", "id": "MYfqIVcQrp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5acf5a0ee5c17d372bfe7fdaeffd6e33-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MYfqIVcQrp", "openreview": "https://openreview.net/forum?id=MYfqIVcQrp", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71903", "video": "https://nips.cc/virtual/2023/poster/71903", "author_site": "Xiaojun Guo, Yifei Wang, Zeming Wei, Yisen Wang", "tldr": "", "abstract": "With the prosperity of contrastive learning for visual representation learning (VCL), it is also adapted to the graph domain and yields promising performance. However, through a systematic study of various graph contrastive learning (GCL) methods, we observe that some common phenomena among existing GCL methods that are quite different from the original VCL methods, including 1) positive samples are not a must for GCL; 2) negative samples are not necessary for graph classification, neither for node classification when adopting specific normalization modules; 3) data augmentations have much less influence on GCL, as simple domain-agnostic augmentations (e.g., Gaussian noise) can also attain fairly good performance. By uncovering how the implicit inductive bias of GNNs works in contrastive learning, we theoretically provide insights into the above intriguing properties of GCL. Rather than directly porting existing VCL methods to GCL, we advocate for more attention toward the unique architecture of graph learning and consider its implicit influence when designing GCL methods. Code is available at https://github.com/PKU-ML/ArchitectureMattersGCL.", "keywords": "graph contrastive learning", "primary_area": "", "supplementary_material": "/attachment/d2cd4bdecc4cd17ce495367108a295ad07558a36.pdf", "author": "Xiaojun Guo;Yifei Wang;Zeming Wei;Yisen Wang", "authorids": "~Xiaojun_Guo1;~Yifei_Wang1;~Zeming_Wei1;~Yisen_Wang1", "gender": "F;M;M;M", "homepage": "https://zero-lab-pku.github.io/personwise/guoxiaojun/;https://yifeiwang77.com;https://weizeming.github.io;https://yisenwang.github.io/", "dblp": ";00/555-1;276/6608;172/1346-1", "google_scholar": ";-CLy6YsAAAAJ;Kyn1zdQAAAAJ;uMWPDboAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xiaojun_Guo1;~Yifei_Wang1;~Zeming_Wei1;~Yisen_Wang1", "aff": "Peking University;Peking University;University of California, Berkeley;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;berkeley.edu;pku.edu.cn", "position": "PhD student;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nguo2023architecture,\ntitle={Architecture Matters: Uncovering Implicit Mechanisms in Graph Contrastive Learning},\nauthor={Xiaojun Guo and Yifei Wang and Zeming Wei and Yisen Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MYfqIVcQrp}\n}", "github": "", "project": "", "reviewers": "4ia5;nRNc;sd4m;RuBb", "pdf_size": 1464227, "rating": "4;5;7;7", "confidence": "4;5;4;4", "soundness": "2;3;4;4", "novelty": "2;2;3;4", "presentation": "2;3;3;3", "wc_summary": "69;145;120;81", "wc_strengths": "35;41;37;20", "wc_weaknesses": "146;224;640;131", "wc_questions": "6;11;2;55", "wc_limitations": "6;2;2;20", "wc_review": "262;423;801;307", "wc_reply_reviewers": "104;1663;50;15", "wc_reply_authors": "919;3744;0;0", "reply_reviewers": "1;4;1;1", "reply_authors": "4;7;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 103.75, 30.375771595138122 ], "wc_strengths_avg": [ 33.25, 7.949056547792323 ], "wc_weaknesses_avg": [ 285.25, 207.83572238669655 ], "wc_questions_avg": [ 18.5, 21.313141485947114 ], "wc_limitations_avg": [ 7.5, 7.399324293474371 ], "wc_review_avg": [ 448.25, 211.96152363105904 ], "wc_reply_reviewers_avg": [ 458.0, 696.4291062269009 ], "wc_reply_authors_avg": [ 1165.75, 1535.1062463230355 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 3.25, 2.48746859276655 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3213070113013102632&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;pku.edu.cn;berkeley.edu;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Peking University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.berkeley.edu", "aff_unique_abbr": "Peking U;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Benchmarking and Analyzing 3D-aware Image Synthesis with a Modularized Codebase", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73617", "id": "MZopld6S22", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1585da86b5a3c4fb15520a2b3682051f-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=MZopld6S22", "openreview": "https://openreview.net/forum?id=MZopld6S22", "poster": "/media/PosterPDFs/NeurIPS%202023/73617.png?t=1699423865.0622678", "slides": "https://nips.cc/virtual/2023/poster/73617", "video": "https://nips.cc/virtual/2023/poster/73617", "author_site": "Qiuyu Wang, Zifan Shi, Kecheng Zheng, Yinghao Xu, Sida Peng, Yujun Shen", "tldr": "", "abstract": "Despite the rapid advance of 3D-aware image synthesis, existing studies usually adopt a mixture of techniques and tricks, leaving it unclear how each part contributes to the final performance in terms of generality. Following the most popular and effective paradigm in this field, which incorporates a neural radiance field (NeRF) into the generator of a generative adversarial network (GAN), we build\na well-structured codebase through modularizing the generation process. Such a design allows researchers to develop and replace each module independently, and hence offers an opportunity to fairly compare various approaches and recognize their contributions from the module perspective. The reproduction of a range of cutting-edge algorithms demonstrates the availability of our modularized codebase. We also perform a variety of in-depth analyses, such as the comparison across different types of point feature, the necessity of the tailing upsampler in the generator, the reliance on the camera pose prior, etc., which deepen our understanding of existing methods and point out some further directions of the research work. Code and models will be made publicly available to facilitate the development and evaluation of this field.", "keywords": "3D-aware image synthesis", "primary_area": "", "supplementary_material": "/attachment/6db434a66ba5030117ee041f39236a26bae53cc9.zip", "author": "Qiuyu Wang;Zifan Shi;Kecheng Zheng;Yinghao Xu;Sida Peng;Yujun Shen", "authorids": "~Qiuyu_Wang1;~Zifan_Shi2;~Kecheng_Zheng2;~Yinghao_Xu1;~Sida_Peng1;~Yujun_Shen1", "gender": "M;;M;M;M;", "homepage": "https://github.com/qiuyu96;;https://zkcys001.github.io/;https://justimyhxu.github.io/;http://pengsida.net/;", "dblp": "37/9650;;228/1362;232/2482;232/3246;", "google_scholar": "VRsy9v8AAAAJ;;hMDQifQAAAAJ;https://scholar.google.com/citations?hl=en;;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Qiuyu_Wang1;~Zifan_Shi2;~Kecheng_Zheng2;~Yinghao_Xu1;~Sida_Peng1;~Yujun_Shen1", "aff": "Ant Group;;Zhejiang University;Chinese University of Hong Kong;Zhejiang University;", "aff_domain": "antgroup.com;;zju.edu.cn;ie.cuhk.edu.hk;zju.edu.cn;", "position": "Researcher;;Postdoc;PhD student;PhD student;", "bibtex": "@inproceedings{\nwang2023benchmarking,\ntitle={Benchmarking and Analyzing 3D-aware Image Synthesis with a Modularized Codebase},\nauthor={Qiuyu Wang and Zifan Shi and Kecheng Zheng and Yinghao Xu and Sida Peng and Yujun Shen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=MZopld6S22}\n}", "github": "", "project": "", "reviewers": "DaES;M7ju;783m;ggV7", "pdf_size": 9191850, "rating": "7;7;7;7", "confidence": "3;5;5;3", "wc_summary_and_contributions": "40;52;42;54", "wc_strengths": "88;45;44;100", "wc_improvement": "120;128;136;43", "wc_limitations": "22;2;8;69", "wc_correctness": "1;8;1;34", "wc_clarity": "1;1;1;3", "wc_relation_to_prior_work": "1;5;1;4", "wc_documentation": "1;1;71;3", "wc_additional_feedback": "1;1;1;1", "wc_review": "275;243;305;311", "wc_reply_reviewers": "0;40;145;5", "wc_reply_authors": "201;527;854;496", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;3;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.0, 1.0 ], "wc_summary_and_contributions_avg": [ 47.0, 6.082762530298219 ], "wc_strengths_avg": [ 69.25, 25.113492389550284 ], "wc_improvement_avg": [ 106.75, 37.238253181372514 ], "wc_limitations_avg": [ 25.25, 26.280934153869037 ], "wc_correctness_avg": [ 11.0, 13.583077707206124 ], "wc_clarity_avg": [ 1.5, 0.8660254037844386 ], "wc_relation_to_prior_work_avg": [ 2.75, 1.7853571071357126 ], "wc_documentation_avg": [ 19.0, 30.033314835362415 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 283.5, 27.069355367278327 ], "wc_reply_reviewers_avg": [ 47.5, 58.36308764964376 ], "wc_reply_authors_avg": [ 519.5, 231.26878302096893 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7106712372134420165&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "antgroup.com;;zju.edu.cn;ie.cuhk.edu.hk;zju.edu.cn;", "author_num": 6, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Ant Group;Zhejiang University;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.antgroup.com;https://www.zju.edu.cn;https://www.cuhk.edu.hk", "aff_unique_abbr": "Ant Group;ZJU;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Approximate inference of marginals using the IBIA framework", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71902", "id": "MamHShmHiX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e5beb17e56bbb8fd562efeefab79425f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MamHShmHiX", "openreview": "https://openreview.net/forum?id=MamHShmHiX", "poster": "/media/PosterPDFs/NeurIPS%202023/71902.png?t=1700990952.4786246", "slides": "https://nips.cc/virtual/2023/poster/71902", "video": "https://nips.cc/virtual/2023/poster/71902", "author_site": "Shivani Bathla, Vinita Vasudevan", "tldr": "", "abstract": "Exact inference of marginals in probabilistic graphical models (PGM) is known to be intractable, necessitating the use of approximate methods. Most of the existing variational techniques perform iterative message passing in loopy graphs which is slow to converge for many benchmarks. In this paper, we propose a new algorithm for marginal inference that is based on the incremental build-infer-approximate (IBIA) paradigm. Our algorithm converts the PGM into a sequence of linked clique tree forests (SLCTF) with bounded clique sizes, and then uses a heuristic belief update algorithm to infer the marginals. For the special case of Bayesian networks, we show that if the incremental build step in IBIA uses the topological order of variables then (a) the prior marginals are consistent in all CTFs in the SLCTF and (b) the posterior marginals are consistent once all evidence variables are added to the SLCTF. In our approach, the belief propagation step is non-iterative and the accuracy-complexity trade-off is controlled using user-defined clique size bounds. Results for several benchmark sets from recent UAI competitions show that our method gives either better or comparable accuracy than existing variational and sampling based methods, with smaller runtimes.", "keywords": "Bayesian inference;posterior marginals;probabilistic graphical models", "primary_area": "", "supplementary_material": "/attachment/79dfbc81658f371e1420b301ab1fe3eaed1c3480.pdf", "author": "Shivani Bathla;Vinita Vasudevan", "authorids": "~Shivani_Bathla1;~Vinita_Vasudevan1", "gender": "F;", "homepage": ";https://www.ee.iitm.ac.in/~vinita", "dblp": "235/6437;69/2594", "google_scholar": "gHsMrPIAAAAJ;", "orcid": "0000-0002-1109-0836;0000-0001-7039-3821", "linkedin": "shivani-bathla-aa945514/?originalSubdomain=in;", "or_profile": "~Shivani_Bathla1;~Vinita_Vasudevan1", "aff": "Indian Institute of Technology Madras, Indian Institute of Technology, Madras;Indian Institute of Technology, Madras", "aff_domain": "ee.iitm.ac.in;iitm.ac.in", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nbathla2023approximate,\ntitle={Approximate inference of marginals using the {IBIA} framework},\nauthor={Shivani Bathla and Vinita Vasudevan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MamHShmHiX}\n}", "github": "", "project": "", "reviewers": "QKYy;SJmy;vgXS", "pdf_size": 311498, "rating": "4;6;7", "confidence": "5;4;3", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "3;2;3", "wc_summary": "128;94;346", "wc_strengths": "49;63;52", "wc_weaknesses": "106;192;172", "wc_questions": "130;29;224", "wc_limitations": "218;3;10", "wc_review": "631;381;804", "wc_reply_reviewers": "0;0;64", "wc_reply_authors": "0;0;19", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 189.33333333333334, 111.64626679333658 ], "wc_strengths_avg": [ 54.666666666666664, 6.018490028422596 ], "wc_weaknesses_avg": [ 156.66666666666666, 36.745370078721784 ], "wc_questions_avg": [ 127.66666666666667, 79.62551238279238 ], "wc_limitations_avg": [ 77.0, 99.74300309629075 ], "wc_review_avg": [ 605.3333333333334, 173.6401131331378 ], "wc_reply_reviewers_avg": [ 21.333333333333332, 30.169889330626027 ], "wc_reply_authors_avg": [ 6.333333333333333, 8.956685895029603 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9819805060619659, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:tq95E0sCzkIJ:scholar.google.com/&scioq=Approximate+inference+of+marginals+using+the+IBIA+framework&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "ee.iitm.ac.in;iitm.ac.in", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Indian Institute of Technology Madras", "aff_unique_dep": "", "aff_unique_url": "https://www.iitm.ac.in", "aff_unique_abbr": "IIT Madras", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madras", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "title": "Energy Transformer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71901", "id": "MbwVNEx9KS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/57a9b97477b67936298489e3c1417b0a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MbwVNEx9KS", "openreview": "https://openreview.net/forum?id=MbwVNEx9KS", "poster": "/media/PosterPDFs/NeurIPS%202023/71901.png?t=1701394775.596156", "slides": "https://nips.cc/virtual/2023/poster/71901", "video": "https://nips.cc/virtual/2023/poster/71901", "author_site": "Benjamin Hoover, Yuchen Liang, Bao Pham, Rameswar Panda, Hendrik Strobelt, Duen Horng Chau, Mohammed Zaki, Dmitry Krotov", "tldr": "", "abstract": "Our work combines aspects of three promising paradigms in machine learning, namely, attention mechanism, energy-based models, and associative memory. Attention is the power-house driving modern deep learning successes, but it lacks clear theoretical foundations. Energy-based models allow a principled approach to discriminative and generative tasks, but the design of the energy functional is not straightforward. At the same time, Dense Associative Memory models or Modern Hopfield Networks have a well-established theoretical foundation, and allow an intuitive design of the energy function. We propose a novel architecture, called the Energy Transformer (or ET for short), that uses a sequence of attention layers that are purposely designed to minimize a specifically engineered energy function, which is responsible for representing the relationships between the tokens. In this work, we introduce the theoretical foundations of ET, explore its empirical capabilities using the image completion task, and obtain strong quantitative results on the graph anomaly detection and graph classification tasks.", "keywords": "Hopfield Network;Dense Associative Memory;Energy-based models;Attention Mechanism", "primary_area": "", "supplementary_material": "", "author": "Benjamin Hoover;Yuchen Liang;Bao Pham;Rameswar Panda;Hendrik Strobelt;Duen Horng Chau;Mohammed J Zaki;Dmitry Krotov", "authorids": "~Benjamin_Hoover1;~Yuchen_Liang2;~Bao_Pham1;~Rameswar_Panda1;~Hendrik_Strobelt1;~Duen_Horng_Chau1;~Mohammed_J_Zaki1;~Dmitry_Krotov2", "gender": "M;;M;M;M;M;;Not Specified", "homepage": "https://bhoov.com;;;https://rpand002.github.io/;http://hendrik.strobelt.com;http://www.cs.rpi.edu/~zaki;https://mitibmwatsonailab.mit.edu/people/dmitry-krotov/;https://faculty.cc.gatech.edu/~dchau", "dblp": "250/9412;31/8891;;126/0986;67/7527;z/MohammedJaveedZaki.html;182/2341;10/2670", "google_scholar": "n10P0tYAAAAJ;;;_ySuu6gAAAAJ;H4vEe_oAAAAJ;https://scholar.google.com/scholar?q=zaki,+mj;WeD9ll0AAAAJ;https://scholar.google.com.tw/citations?user=YON32W4AAAAJ", "orcid": "0000-0001-5218-3185;;0000-0001-8962-9961;;;0000-0003-4711-0234;;0000-0001-9824-3323", "linkedin": "benhoov/;yuchen-liang-42471430/;;;;mohammed-j-zaki/;krotovdmitry;polochau", "or_profile": "~Benjamin_Hoover1;~Yuchen_Liang2;~Bao_Pham1;~Rameswar_Panda1;~Hendrik_Strobelt1;~Mohammed_J_Zaki1;~Dmitry_Krotov2;~Duen_Chau1", "aff": "International Business Machines;Rensselaer Polytechnic Institute;Rensselaer Polytechnic Institute;MIT-IBM Watson AI Lab;International Business Machines;Rensselaer Polytechnic Institute;Massachusetts Institute of Technology;", "aff_domain": "research.ibm.com;rpi.edu;rpi.edu;ibm.com;ibm.com;rpi.edu;mit.edu;", "position": "AI Research Engineer;PhD student;PhD student;Research Scientist;Principal Researcher;Professor;Researcher;", "bibtex": "@inproceedings{\nhoover2023energy,\ntitle={Energy Transformer},\nauthor={Benjamin Hoover and Yuchen Liang and Bao Pham and Rameswar Panda and Hendrik Strobelt and Duen Horng Chau and Mohammed J Zaki and Dmitry Krotov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MbwVNEx9KS}\n}", "github": "", "project": "", "reviewers": "bQx9;qJXt;q5cg;zsPk", "pdf_size": 18478065, "rating": "4;6;7;8", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "2;3;4;3", "wc_summary": "59;84;99;164", "wc_strengths": "37;55;76;31", "wc_weaknesses": "160;111;75;25", "wc_questions": "66;184;54;242", "wc_limitations": "11;6;1;23", "wc_review": "333;440;305;485", "wc_reply_reviewers": "8;12;5;5", "wc_reply_authors": "37;16;10;10", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 101.5, 38.81043674065006 ], "wc_strengths_avg": [ 49.75, 17.541023345289748 ], "wc_weaknesses_avg": [ 92.75, 49.3982540177282 ], "wc_questions_avg": [ 136.5, 79.31424840468452 ], "wc_limitations_avg": [ 10.25, 8.166241485530538 ], "wc_review_avg": [ 390.75, 74.15650679475132 ], "wc_reply_reviewers_avg": [ 7.5, 2.8722813232690143 ], "wc_reply_authors_avg": [ 18.25, 11.098986440211556 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.09759000729485331, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3803905964826469995&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 8, "email": "research.ibm.com;rpi.edu;rpi.edu;ibm.com;ibm.com;rpi.edu;mit.edu;", "author_num": 8, "aff_unique_index": "0;1;1;2;0;1;2", "aff_unique_norm": "International Business Machines Corporation;Rensselaer Polytechnic Institute;Massachusetts Institute of Technology", "aff_unique_dep": ";;IBM Watson AI Lab", "aff_unique_url": "https://www.ibm.com;https://www.rpi.edu;https://www.mitibmwatsonailab.org", "aff_unique_abbr": "IBM;RPI;MIT-IBM AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Alexa Arena: A User-Centric Interactive Platform for Embodied AI", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73616", "id": "McAS4XoZJP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3d0758f0b95e19abc68c1c8070d36510-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=McAS4XoZJP", "openreview": "https://openreview.net/forum?id=McAS4XoZJP", "poster": "/media/PosterPDFs/NeurIPS%202023/73616.png?t=1701469617.339547", "slides": "https://nips.cc/virtual/2023/poster/73616", "video": "https://nips.cc/virtual/2023/poster/73616", "author_site": "Qiaozi Gao, Govind Thattai, Suhaila Shakiah, Xiaofeng Gao, Shreyas Pansare, Vasu Sharma, Gaurav Sukhatme, Hangjie Shi, Bofei Yang, Desheng Zhang, Lucy Hu, Karthika Arumugam, Shui Hu, Matthew Wen, Dinakar Guthy, Shunan Chung, Rohan Khanna, Osman Ipek, Leslie Ball, Kate Bland, Heather Rocker, Michael Johnston, Reza Ghanadan, Dilek Hakkani-Tur, Prem Natarajan", "tldr": "", "abstract": "We introduce Alexa Arena, a user-centric simulation platform to facilitate research in building assistive conversational embodied agents. Alexa Arena features multi-room layouts and an abundance of interactable objects. With user-friendly graphics and control mechanisms, the platform supports the development of gamified robotic tasks readily accessible to general human users, allowing high-efficiency data collection and EAI system evaluation. Along with the platform, we introduce a dialog-enabled task completion benchmark with online human evaluations.", "keywords": "Human-robot interaction;Embodied AI;Natural Language Processing", "primary_area": "", "supplementary_material": "/attachment/af57d87f612b33607d608239213bf24aa7ab73dd.zip", "author": "Qiaozi Gao;Govind Thattai;Suhaila Shakiah;Xiaofeng Gao;Shreyas Pansare;Vasu Sharma;Gaurav S. Sukhatme;Hangjie Shi;Bofei Yang;Desheng Zhang;Lucy Hu;Karthika Arumugam;Shui Hu;Matthew Wen;Dinakar Venkateswar Guthy;Shunan Cadence Chung;Rohan Khanna;Osman Ipek;Leslie Ball;Kate Bland;Heather Rocker;Michael Johnston;Reza Ghanadan;Dilek Hakkani-Tur;Prem Natarajan", "authorids": "~Qiaozi_Gao1;~Govind_Thattai1;~Suhaila_Shakiah1;~Xiaofeng_Gao1;~Shreyas_Pansare1;~Vasu_Sharma1;~Gaurav_S._Sukhatme1;~Hangjie_Shi1;bofeiy@amazon.com;~Desheng_Zhang3;~Lucy_Hu1;~Karthika_Arumugam1;shui1234@gmail.com;~Matthew_Wen1;~Dinakar_Venkateswar_Guthy1;~Shunan_Cadence_Chung1;~Rohan_Khanna1;~Osman_Ipek1;~Leslie_Ball1;kateblan@amazon.com;~Heather_Rocker1;~Michael_Johnston1;~Reza_Ghanadan1;~Dilek_Hakkani-Tur1;~Prem_Natarajan1", "gender": "M;M;F;M;;M;M;M;;M;F;F;;M;M;F;M;M;;;F;M;M;;", "homepage": ";;https://www.linkedin.com/in/suhailashakiah/;https://xfgao.github.io/;;http://vasusharma.github.io;http://www-robotics.usc.edu/~gaurav/;;;https://www.linkedin.com/in/desheng-z-3a6084a1?trk=people-guest_people_search-card;;https://linkedin.com/in/karthikaarumugam;;https://www.matthewwen.com;;https://www.linkedin.com/in/cadence-chung/;;;https://www.linkedin.com/in/leslie-ball-222b8073;;;;;;", "dblp": "173/1986;279/2880;271/2364;95/6947-2;;165/0762;s/GauravSSukhatme;;;;;;;;;;;;;;;77/2529;50/5680;;", "google_scholar": "Ub3LlsgAAAAJ;ZiagaFYAAAAJ;IP6H8LYAAAAJ;AjTfCjEAAAAJ;;PLUB4dIAAAAJ;https://scholar.google.com.tw/citations?user=lRUi-A8AAAAJ;7r5shcMAAAAJ;;;;;;;;;;;;;;;00ncu3cAAAAJ;;", "orcid": ";;;0000-0003-3331-9846;;;0000-0003-2408-474X;;;;;;;;;;;;;;;;;;", "linkedin": ";govind-thattai-aaa5326/;suhailashakiah/;;shreyas-pansare;vasu-sharma-6b460592?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;gaurav-sukhatme-9b6420b/;hangjie-shi-7565903b;;;lucyhu1;;;;dinakar-guthy-43934510a;;rohan-khanna-83443948;https://linkedin.com/in/uyguripek/;;;hsrocker/;mjrjohnston/;reza-ghanadan-ph-d-mba-820756;;", "or_profile": "~Qiaozi_Gao1;~Govind_Thattai1;~Suhaila_Shakiah1;~Xiaofeng_Gao1;~Shreyas_Pansare1;~Vasu_Sharma1;~Gaurav_S._Sukhatme1;~Hangjie_Shi1;bofeiy@amazon.com;~Desheng_Zhang3;~Lucy_Hu1;~Karthika_Arumugam1;shui1234@gmail.com;~Matthew_Wen1;~Dinakar_Venkateswar_Guthy1;~Shunan_Cadence_Chung1;~Rohan_Khanna1;~Osman_Ipek1;~Leslie_Ball1;kateblan@amazon.com;~Heather_Rocker1;~Michael_Johnston1;~Reza_Ghanadan1;~Dilek_Hakkani-Tur1;~Prem_Natarajan1", "aff": "Amazon;Amazon;Amazon;Amazon;;Meta Facebook;University of Southern California;;;;Amazon;;;;;Amazon;;;Amazon;;;Amazon;Amazon;;", "aff_domain": "amazon.com;amazon.com;amazon.com;amazon.com;;fb.com;usc.edu;;;;amazon.com;;;;;amazon.com;;;amazon.com;;;amazon.com;amazon.com;;", "position": "Scientist;Principal Scientist;Researcher;Scientist;;Researcher;Full Professor;;;;Researcher;;;;;Researcher;;;Sr. Technical Program Manager;;;Principal Researcher;Principal Researcher;;", "bibtex": "@inproceedings{\ngao2023alexa,\ntitle={Alexa Arena: A User-Centric Interactive Platform for Embodied {AI}},\nauthor={Qiaozi Gao and Govind Thattai and Suhaila Shakiah and Xiaofeng Gao and Shreyas Pansare and Vasu Sharma and Gaurav S. Sukhatme and Hangjie Shi and Bofei Yang and Desheng Zhang and Lucy Hu and Karthika Arumugam and Shui Hu and Matthew Wen and Dinakar Venkateswar Guthy and Shunan Cadence Chung and Rohan Khanna and Osman Ipek and Leslie Ball and Kate Bland and Heather Rocker and Michael Johnston and Reza Ghanadan and Dilek Hakkani-Tur and Prem Natarajan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=McAS4XoZJP}\n}", "github": "", "project": "", "reviewers": "WUL3;Yxqr;PEw7;A3RV;zBVW", "pdf_size": 25911872, "rating": "1;6;7;7;8", "confidence": "5;3;4;5;4", "wc_summary_and_contributions": "12;186;402;62;148", "wc_strengths": "7;53;78;184;58", "wc_improvement": "3;166;65;366;85", "wc_limitations": "1189;2;18;1;17", "wc_correctness": "10;2;10;60;11", "wc_clarity": "13;46;8;9;7", "wc_relation_to_prior_work": "1;11;32;1;24", "wc_documentation": "1;4;15;68;40", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "1237;471;629;752;391", "wc_reply_reviewers": "567;17;0;0;8", "wc_reply_authors": "930;301;344;783;219", "reply_reviewers": "1;1;0;0;1", "reply_authors": "3;1;1;1;1", "rating_avg": [ 5.8, 2.4819347291981715 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 162.0, 134.81246233193727 ], "wc_strengths_avg": [ 76.0, 58.79115579744967 ], "wc_improvement_avg": [ 137.0, 125.79825118021316 ], "wc_limitations_avg": [ 245.4, 471.854469089782 ], "wc_correctness_avg": [ 18.6, 20.953281365934075 ], "wc_clarity_avg": [ 16.6, 14.840485167271318 ], "wc_relation_to_prior_work_avg": [ 13.8, 12.416118556135006 ], "wc_documentation_avg": [ 25.6, 25.25549445170298 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 696.0, 297.9785227159837 ], "wc_reply_reviewers_avg": [ 118.4, 224.38770019767128 ], "wc_reply_authors_avg": [ 515.4, 285.202103779057 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 25, 0 ], "corr_rating_confidence": -0.40919396764125054, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7099213706211054350&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "amazon.com;amazon.com;amazon.com;amazon.com;;fb.com;usc.edu;;;;amazon.com;;;;;amazon.com;;;amazon.com;;;amazon.com;amazon.com;;", "author_num": 25, "aff_unique_index": "0;0;0;0;1;2;0;0;0;0;0", "aff_unique_norm": "Amazon;Meta;University of Southern California", "aff_unique_dep": "Amazon.com, Inc.;Meta Platforms, Inc.;", "aff_unique_url": "https://www.amazon.com;https://meta.com;https://www.usc.edu", "aff_unique_abbr": "Amazon;Meta;USC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Optimization of Inter-group criteria for clustering with minimum size constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71900", "id": "MdJX5wwKwx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/20f814ecdaa8c76131e21683447e347b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MdJX5wwKwx", "openreview": "https://openreview.net/forum?id=MdJX5wwKwx", "poster": "/media/PosterPDFs/NeurIPS%202023/71900.png?t=1701385853.1620111", "slides": "https://nips.cc/virtual/2023/poster/71900", "video": "https://nips.cc/virtual/2023/poster/71900", "author_site": "Eduardo Laber, Lucas Murtinho", "tldr": "", "abstract": "Internal measures that are used to assess the quality of a clustering usually take into account intra-group and/or inter-group criteria.\nThere are many papers in the literature that propose algorithms with provable approximation guarantees for optimizing the former. However, the optimization of inter-group criteria is much less understood.\n\nHere, we contribute to the state-of-the-art of this literature by devising algorithms with provable guarantees for the maximization of two natural inter-group criteria, namely the minimum spacing and the minimum spanning tree spacing. The former is the minimum distance between points in different groups while the latter captures separability through the cost of the minimum spanning tree that connects all groups. We obtain results for both the unrestricted case, in which no constraint on the clusters is imposed, and for the constrained case where each group is required to have a minimum number of points. Our constraint is motivated by the fact that the popular Single-Linkage, which optimizes both criteria in the unrestricted case, produces clustering with many tiny groups.\n\nTo complement our work, we present an empirical study with 10 real datasets that provides evidence that our methods work very well in practical settings.", "keywords": "Single Link;clustering;approximation algorithms;complexity;inter-group criterion", "primary_area": "", "supplementary_material": "", "author": "Eduardo Sany Laber;Lucas Murtinho", "authorids": "~Eduardo_Sany_Laber1;~Lucas_Murtinho1", "gender": "M;M", "homepage": "http://www-di.inf.puc-rio.br/~laber/;", "dblp": "49/5557;228/6913", "google_scholar": "https://scholar.google.com.br/citations?hl=pt-BR;QCL9P24AAAAJ", "orcid": "0000-0002-9025-8333;", "linkedin": ";lucas-murtinho/?locale=en_US", "or_profile": "~Eduardo_Sany_Laber1;~Lucas_Murtinho1", "aff": "Pontificia Universidade Catolica, Rio de Janeiro, Brazil;Pontificia Universidade Catolica, Rio de Janeiro, Brazil", "aff_domain": "puc-rio.br;puc-rio.br", "position": "Associate Professor;PhD student", "bibtex": "@inproceedings{\nlaber2023optimization,\ntitle={Optimization of Inter-group criteria for clustering with minimum size constraints},\nauthor={Eduardo Sany Laber and Lucas Murtinho},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MdJX5wwKwx}\n}", "github": "", "project": "", "reviewers": "FsJN;FmRy;Z252;GUvh", "pdf_size": 604344, "rating": "3;5;7;8", "confidence": "4;4;4;4", "soundness": "3;2;4;4", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "99;272;238;222", "wc_strengths": "23;29;66;57", "wc_weaknesses": "107;262;113;119", "wc_questions": "61;37;8;92", "wc_limitations": "4;3;2;1", "wc_review": "294;603;427;491", "wc_reply_reviewers": "144;207;0;243", "wc_reply_authors": "156;150;0;32", "reply_reviewers": "1;2;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.75, 1.920286436967152 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 207.75, 65.33136689217515 ], "wc_strengths_avg": [ 43.75, 18.15729880791744 ], "wc_weaknesses_avg": [ 150.25, 64.65823613430852 ], "wc_questions_avg": [ 49.5, 30.890937182286976 ], "wc_limitations_avg": [ 2.5, 1.118033988749895 ], "wc_review_avg": [ 453.75, 111.69014056755412 ], "wc_reply_reviewers_avg": [ 148.5, 92.76987657639737 ], "wc_reply_authors_avg": [ 84.5, 69.46042038456144 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16145167798770594039&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "puc-rio.br;puc-rio.br", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Pontifical Catholic University of Rio de Janeiro", "aff_unique_dep": "", "aff_unique_url": "http://www.puc-rio.br/", "aff_unique_abbr": "PUC-Rio", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Rio de Janeiro", "aff_country_unique_index": "0;0", "aff_country_unique": "Brazil" }, { "title": "AircraftVerse: A Large-Scale Multimodal Dataset of Aerial Vehicle Designs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73615", "id": "MfhJWSp3Ea", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8b94879b177d9780c17f5a78f62a6a8a-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=MfhJWSp3Ea", "openreview": "https://openreview.net/forum?id=MfhJWSp3Ea", "poster": "/media/PosterPDFs/NeurIPS%202023/73615.png?t=1701455331.5922475", "slides": "https://nips.cc/virtual/2023/poster/73615", "video": "https://nips.cc/virtual/2023/poster/73615", "author_site": "Adam Cobb, Anirban Roy, Daniel Elenius, Frederick Heim, Brian Swenson, Sydney Whittington, James Walker, Theodore Bapty, Joseph Hite, Karthik Ramani, Christopher McComb, Susmit Jha", "tldr": "", "abstract": "We present AircraftVerse, a publicly available aerial vehicle design dataset. Aircraft design encompasses different physics domains and, hence, multiple modalities of representation. The evaluation of these designs requires the use of scientific analytical and simulation models ranging from computer-aided design tools for structural and manufacturing analysis, computational fluid dynamics tools for drag and lift computation, battery models for energy estimation, and simulation models for flight control and dynamics. AircraftVerse contains $27{,}714$ diverse air vehicle designs - the largest corpus of designs with this level of complexity. Each design comprises the following artifacts: a symbolic design tree describing topology, propulsion subsystem, battery subsystem, and other design details; a STandard for the Exchange of Product (STEP) model data; a 3D CAD design using a stereolithography (STL) file format; a 3D point cloud for the shape of the design; and evaluation results from high fidelity state-of-the-art physics models that characterize performance metrics such as maximum flight distance and hover-time. We also present baseline surrogate models that use different modalities of design representation to predict design performance metrics, which we provide as part of our dataset release. Finally, we discuss the potential impact of this dataset on the use of learning in aircraft design, and more generally, in the emerging field of deep learning for scientific design. AircraftVerse is accompanied by a datasheet as suggested in the recent literature, and it is released under Creative Commons Attribution-ShareAlike (CC BY-SA) license. The dataset with baseline models are hosted at http://doi.org/10.5281/zenodo.6525446, code at https://github.com/SRI-CSL/AircraftVerse, and the dataset description at https://uavdesignverse.onrender.com/.", "keywords": "CAD; Multimodal; Multi-Physics; Aircrafts", "primary_area": "", "supplementary_material": "/attachment/b76ae960f3a8b2737964953d209f4e2282d0b595.pdf", "author": "Adam D. Cobb;Anirban Roy;Daniel Elenius;Frederick Michael Heim;Brian Swenson;Sydney Whittington;James D Walker;Theodore Bapty;Joseph Hite;Karthik Ramani;Christopher McComb;Susmit Jha", "authorids": "~Adam_D._Cobb1;~Anirban_Roy3;~Daniel_Elenius1;~Frederick_Michael_Heim1;~Brian_Swenson1;~Sydney_Whittington1;~James_D_Walker1;~Theodore_Bapty1;~Joseph_Hite1;~Karthik_Ramani1;~Christopher_McComb2;~Susmit_Jha1", "gender": "M;M;M;M;M;Non-Binary;;M;M;M;Not Specified;", "homepage": ";;http://www.csl.sri.com/people/elenius/;;;;;http://engineering.vanderbilt.edu/bio/theodore-bapty;;https://engineering.purdue.edu/~ramani/;https://cmccomb.com/;http://susmitjha.github.io/", "dblp": "206/6601;;;;;;;;;01/6965.html;;", "google_scholar": "XW1fyPcAAAAJ;N9eSuR4AAAAJ;;25XaiW0AAAAJ;;;;;;wQ6njfUAAAAJ;0P9w_S0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0009-0000-6889-0204;;;0009-0009-2342-3018;0000-0002-2935-4721;0000-0002-3093-8978;;;0000-0001-8639-5135;;0000-0001-5983-9095", "linkedin": ";anirbanroylinkedin/;;;;;james-walker-37a59482/;;joseph-hite-40bb4aa3/;karthikramani1/;;susmitjha/", "or_profile": "~Adam_D._Cobb1;~Anirban_Roy3;~Daniel_Elenius1;~Frederick_Michael_Heim1;~Brian_Swenson1;~Sydney_Whittington1;~James_D_Walker1;~Theodore_Bapty1;~Joseph_Hite1;~Karthik_Ramani1;~Christopher_McComb2;~Susmit_Jha1", "aff": "SRI International;SRI International;SRI International;;Southwest Research Institute;Southwest Research Institute;Southwest Research Institute;Vanderbilt University;Vanderbilt University;Purdue University;Carnegie Mellon University;SRI International", "aff_domain": "sri.com;sri.com;sri.com;;swri.edu;swri.edu;swri.edu;vanderbilt.edu;vanderbilt.edu;purdue.edu;cmu.edu;sri.com", "position": "Researcher;Sr Scientist;Senior Software Engineer;;Researcher;Researcher;Researcher;Full Professor;Researcher;Full Professor;Associate Professor;Principal Scientist", "bibtex": "@inproceedings{\ncobb2023aircraftverse,\ntitle={AircraftVerse: A Large-Scale Multimodal Dataset of Aerial Vehicle Designs},\nauthor={Adam D. Cobb and Anirban Roy and Daniel Elenius and Frederick Michael Heim and Brian Swenson and Sydney Whittington and James D Walker and Theodore Bapty and Joseph Hite and Karthik Ramani and Christopher McComb and Susmit Jha},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=MfhJWSp3Ea}\n}", "github": "", "project": "", "reviewers": "TaAN;v2Uf;vKs1;prSM;HgCT;29GD", "pdf_size": 3086614, "rating": "5;5;6;7;7;9", "confidence": "4;3;2;2;4;2", "wc_summary_and_contributions": "61;40;52;196;93;288", "wc_strengths": "59;61;28;193;25;117", "wc_improvement": "280;123;37;143;76;279", "wc_limitations": "99;21;25;22;132;17", "wc_correctness": "27;1;8;1;28;198", "wc_clarity": "43;1;15;4;5;9", "wc_relation_to_prior_work": "1;1;7;1;11;13", "wc_documentation": "15;8;17;11;1;3", "wc_additional_feedback": "1;1;1;1;1;1", "wc_review": "586;257;190;572;372;925", "wc_reply_reviewers": "0;20;135;10;0;135", "wc_reply_authors": "284;271;642;122;155;270", "reply_reviewers": "0;1;1;1;0;1", "reply_authors": "1;1;2;1;1;2", "rating_avg": [ 6.5, 1.3844373104863459 ], "confidence_avg": [ 2.8333333333333335, 0.8975274678557507 ], "wc_summary_and_contributions_avg": [ 121.66666666666667, 90.56980119713684 ], "wc_strengths_avg": [ 80.5, 58.690572780756995 ], "wc_improvement_avg": [ 156.33333333333334, 93.38391486522482 ], "wc_limitations_avg": [ 52.666666666666664, 45.49969474867081 ], "wc_correctness_avg": [ 43.833333333333336, 69.82696868370813 ], "wc_clarity_avg": [ 12.833333333333334, 14.19409110236447 ], "wc_relation_to_prior_work_avg": [ 5.666666666666667, 4.988876515698588 ], "wc_documentation_avg": [ 9.166666666666666, 5.842849380986034 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 483.6666666666667, 246.01400140823063 ], "wc_reply_reviewers_avg": [ 50.0, 60.484157705413516 ], "wc_reply_authors_avg": [ 290.6666666666667, 168.89214178153924 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.46945692571039066, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7353853026426915408&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "sri.com;sri.com;sri.com;;swri.edu;swri.edu;swri.edu;vanderbilt.edu;vanderbilt.edu;purdue.edu;cmu.edu;sri.com", "author_num": 12, "aff_unique_index": "0;0;0;1;1;1;2;2;3;4;0", "aff_unique_norm": "SRI International;Southwest Research Institute;Vanderbilt University;Purdue University;Carnegie Mellon University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.sri.com;https://www.swri.org;https://www.vanderbilt.edu;https://www.purdue.edu;https://www.cmu.edu", "aff_unique_abbr": "SRI;SWRI;Vanderbilt;Purdue;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Protein Design with Guided Discrete Diffusion", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71899", "id": "MfiK69Ga6p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/29591f355702c3f4436991335784b503-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MfiK69Ga6p", "openreview": "https://openreview.net/forum?id=MfiK69Ga6p", "poster": "/media/PosterPDFs/NeurIPS%202023/71899.png?t=1702441871.207499", "slides": "https://nips.cc/virtual/2023/poster/71899", "video": "https://nips.cc/virtual/2023/poster/71899", "author_site": "Nate Gruver, Samuel Stanton, Nathan Frey, Tim G. J. Rudner, Isidro Hotzel, Julien Lafrance-Vanasse, Arvind Rajpal, Kyunghyun Cho, Andrew Wilson", "tldr": "", "abstract": "A popular approach to protein design is to combine a generative model with a discriminative model for conditional sampling. The generative model samples plausible sequences while the discriminative model guides a search for sequences with high fitness. Given its broad success in conditional sampling, classifier-guided diffusion modeling is a promising foundation for protein design, leading many to develop guided diffusion models for structure with inverse folding to recover sequences. In this work, we propose diffusioN Optimized Sampling (NOS), a guidance method for discrete diffusion models that follows gradients in the hidden states of the denoising network. NOS makes it possible to perform design directly in sequence space, circumventing significant limitations of structure-based methods, including scarce data and challenging inverse design. Moreover, we use NOS to generalize LaMBO, a Bayesian optimization procedure for sequence design that facilitates multiple objectives and edit-based constraints. The resulting method, LaMBO-2, enables discrete diffusions and stronger performance with limited edits through a novel application of saliency maps. We apply LaMBO-2 to a real-world protein design task, optimizing antibodies for higher expression yield and binding affinity to several therapeutic targets under locality and developability constraints, attaining a 99\\% expression rate and 40\\% binding rate in exploratory in vitro experiments.", "keywords": "protein design;diffusion model;classifier guidance", "primary_area": "", "supplementary_material": "/attachment/457dcc4b441b662916ea1f43ca2f5372d408cbfc.pdf", "author": "Nate Gruver;Samuel Don Stanton;Nathan C. Frey;Tim G. J. Rudner;Isidro Hotzel;Julien Lafrance-Vanasse;Arvind Rajpal;Kyunghyun Cho;Andrew Gordon Wilson", "authorids": "~Nate_Gruver1;~Samuel_Don_Stanton1;~Nathan_C._Frey1;~Tim_G._J._Rudner2;~Isidro_Hotzel1;~Julien_Lafrance-Vanasse1;~Arvind_Rajpal1;~Kyunghyun_Cho1;~Andrew_Gordon_Wilson1", "gender": "M;M;;;;M;M;M;Not Specified", "homepage": "https://ngruver.github.io/;https://samuelstanton.github.io/;https://ncfrey.github.io/;;;;;http://kyunghyuncho.me;https://cims.nyu.edu/~andrewgw", "dblp": "223/5568;264/1895;306/1335;;;;;41/9736;65/10453", "google_scholar": "R5QNdhcAAAAJ;https://scholar.google.com/citations?hl=en;IMUja60AAAAJ;;;;https://scholar.google.com/citations?view_op=search_authors;https://scholar.google.fi/citations?user=0RAmmIAAAAAJ;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ", "orcid": ";;0000-0001-5291-6131;;;0000-0001-8807-6277;;;", "linkedin": ";samuel-stanton-06004997/;ncfrey;;;julienlv/;;;", "or_profile": "~Nate_Gruver1;~Samuel_Don_Stanton1;~Nathan_C._Frey1;~Tim_G._J._Rudner2;~Isidro_Hotzel1;~Julien_Lafrance-Vanasse1;~Arvind_Rajpal1;~Kyunghyun_Cho1;~Andrew_Gordon_Wilson1", "aff": "New York University;Genentech;Prescient Design, Genentech;;;Genentech;;New York University;New York University", "aff_domain": "nyu.edu;gene.com;gene.com;;;gene.com;;nyu.edu;nyu.edu", "position": "PhD student;Researcher;Researcher;;;Researcher;;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ngruver2023protein,\ntitle={Protein Design with Guided Discrete Diffusion},\nauthor={Nate Gruver and Samuel Don Stanton and Nathan C. Frey and Tim G. J. Rudner and Isidro Hotzel and Julien Lafrance-Vanasse and Arvind Rajpal and Kyunghyun Cho and Andrew Gordon Wilson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MfiK69Ga6p}\n}", "github": "", "project": "", "reviewers": "EUPz;gzgm;N1y4;UJds;j6TU", "pdf_size": 8106222, "rating": "5;5;6;6;7", "confidence": "2;3;5;3;3", "soundness": "3;3;3;2;4", "novelty": "3;2;3;3;2", "presentation": "3;3;3;1;3", "wc_summary": "79;37;26;101;40", "wc_strengths": "60;22;44;173;112", "wc_weaknesses": "87;109;45;666;16", "wc_questions": "63;119;206;306;15", "wc_limitations": "11;1;55;55;8", "wc_review": "300;288;376;1301;191", "wc_reply_reviewers": "289;56;452;20;12", "wc_reply_authors": "286;129;965;32;0", "reply_reviewers": "1;1;3;1;1", "reply_authors": "2;2;5;2;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 56.6, 28.527881099023112 ], "wc_strengths_avg": [ 82.2, 54.23799406320259 ], "wc_weaknesses_avg": [ 184.6, 242.8584773072581 ], "wc_questions_avg": [ 141.8, 103.81791752871948 ], "wc_limitations_avg": [ 26.0, 23.89979079406345 ], "wc_review_avg": [ 491.2, 409.14906818908923 ], "wc_reply_reviewers_avg": [ 165.8, 175.5316495678201 ], "wc_reply_authors_avg": [ 282.4, 355.51011237375513 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 2.4, 1.3564659966250536 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.3273268353539886, "gs_citation": 130, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16845395525109634055&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "nyu.edu;gene.com;gene.com;;;gene.com;;nyu.edu;nyu.edu", "author_num": 9, "aff_unique_index": "0;1;1;1;0;0", "aff_unique_norm": "New York University;Genentech", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://www.genentech.com", "aff_unique_abbr": "NYU;Genentech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Optimize Planning Heuristics to Rank, not to Estimate Cost-to-Goal", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71898", "id": "Mgy6sgslPY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/50ea4dbd1cff6bd3daef939eff10c092-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Mgy6sgslPY", "openreview": "https://openreview.net/forum?id=Mgy6sgslPY", "poster": "/media/PosterPDFs/NeurIPS%202023/71898.png?t=1698923184.3896174", "slides": "https://nips.cc/virtual/2023/poster/71898", "video": "https://nips.cc/virtual/2023/poster/71898", "author_site": "Leah Chrestien, Stefan Edelkamp, Antonin Komenda, Tomas Pevny", "tldr": "", "abstract": "In imitation learning for planning, parameters of heuristic functions are optimized against a set of solved problem instances. This work revisits the necessary and sufficient conditions of strictly optimally efficient heuristics for forward search algorithms, mainly A* and greedy best-first search, which expand only states on the returned optimal path. It then proposes a family of loss functions based on ranking tailored for a given variant of the forward search algorithm. Furthermore, from a learning theory point of view, it discusses why optimizing cost-to-goal h* is unnecessarily difficult. The experimental comparison on a diverse set of problems unequivocally supports the derived theory.", "keywords": "Learning heuristic functions;deep learning;Immitation learning;planning;A*;best first search", "primary_area": "", "supplementary_material": "/attachment/009b2607ee85d7f9f092c2e38acc0c90553b2b53.pdf", "author": "Leah Chrestien;Stefan Edelkamp;Antonin Komenda;Tom\u00e1\u0161 Pevn\u00fd", "authorids": "~Leah_Chrestien1;~Stefan_Edelkamp1;~Antonin_Komenda1;~Tom\u00e1\u0161_Pevn\u00fd1", "gender": "F;M;M;M", "homepage": ";http://agents.fel.cvut.cz/~komenda/;https://cs.felk.cvut.cz/en/people/pevnytom;https://www.aic.fel.cvut.cz/members/stefan-edelkamp", "dblp": ";https://dblp.uni-trier.de/pid/96/5384;20/1317;98/3919", "google_scholar": "TW855V8AAAAJ;https://scholar.google.cz/citations?user=dzh73HkAAAAJ;MnXqDssAAAAJ;https://scholar.google.de/citations?user=TKVCz1MAAAAJ", "orcid": ";0000-0002-6947-308X;0000-0002-5768-9713;0000-0001-8435-5025", "linkedin": "leah-chrestien/;akomenda/;;", "or_profile": "~Leah_Chrestien1;~Antonin_Komenda1;~Tom\u00e1\u0161_Pevn\u00fd1;~Stefan_Edelkamp2", "aff": "Czech Technical Univeresity in Prague, Czech Technical University of Prague;Czech Technical University in Prague;Czech Technical University in Prague;AIC FEL CTU Prague", "aff_domain": "fel.cvut.cz;cvut.cz;cvut.cz;aic.fel.cvut.cz", "position": "PhD student;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchrestien2023optimize,\ntitle={Optimize Planning Heuristics to Rank, not to Estimate Cost-to-Goal},\nauthor={Leah Chrestien and Stefan Edelkamp and Antonin Komenda and Tom{\\'a}{\\v{s}} Pevn{\\'y}},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Mgy6sgslPY}\n}", "github": "", "project": "", "reviewers": "7xum;ahr3;458p;jMYD", "pdf_size": 359719, "rating": "5;6;6;7", "confidence": "3;4;4;4", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "24;63;50;137", "wc_strengths": "20;25;57;40", "wc_weaknesses": "236;66;37;38", "wc_questions": "9;1003;203;105", "wc_limitations": "20;21;8;68", "wc_review": "309;1178;355;388", "wc_reply_reviewers": "548;530;91;106", "wc_reply_authors": "389;1111;0;7", "reply_reviewers": "2;3;1;1", "reply_authors": "3;5;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 68.5, 41.96724913548659 ], "wc_strengths_avg": [ 35.5, 14.430869689661812 ], "wc_weaknesses_avg": [ 94.25, 82.66309636107276 ], "wc_questions_avg": [ 330.0, 394.56431668360483 ], "wc_limitations_avg": [ 29.25, 22.949673200287624 ], "wc_review_avg": [ 557.5, 359.3428029055264 ], "wc_reply_reviewers_avg": [ 318.75, 220.40573381833786 ], "wc_reply_authors_avg": [ 376.75, 452.1970671068091 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9425485284545047165&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "fel.cvut.cz;cvut.cz;cvut.cz;aic.fel.cvut.cz", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Czech Technical University in Prague;Czech Technical University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ctu.cz;https://www.ctu.cz", "aff_unique_abbr": "CTU;CTU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Prague", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Czech Republic" }, { "title": "Delayed Algorithms for Distributed Stochastic Weakly Convex Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71897", "id": "MirclT6zpv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/470e23d14e330ab0daa5387916b95f9c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MirclT6zpv", "openreview": "https://openreview.net/forum?id=MirclT6zpv", "poster": "/media/PosterPDFs/NeurIPS%202023/71897.png?t=1702450132.9290636", "slides": "https://nips.cc/virtual/2023/poster/71897", "video": "https://nips.cc/virtual/2023/poster/71897", "author_site": "Wenzhi Gao, Qi Deng", "tldr": "", "abstract": "This paper studies delayed stochastic algorithms for weakly convex optimization in a distributed network with workers connected to a master node. Recently, Xu~et~al.~2022 showed that an inertial stochastic subgradient method converges at a rate of $\\mathcal{O}(\\tau_{\\text{max}}/\\sqrt{K})$ which depends on the maximum information delay $\\tau_{\\text{max}}$. \nIn this work, we show that the delayed stochastic subgradient method ($\\texttt{DSGD}$) obtains a tighter convergence rate which depends on the expected delay $\\bar{\\tau}$. Furthermore, for an important class of composition weakly convex problems, we develop a new delayed stochastic prox-linear ($\\texttt{DSPL}$) method in which the delays only affect the high-order term in the rate and hence, are negligible after a certain number of $\\texttt{DSPL}$ iterations. In addition, we demonstrate the robustness of our proposed algorithms against arbitrary delays. By incorporating a simple safeguarding step in both methods, we achieve convergence rates that depend solely on the number of workers, eliminating the effect of delays. Our numerical experiments further confirm the empirical superiority of our proposed methods.", "keywords": "Stochastic optimization;Distributed optimization;Prox-linear method;Stochastic gradient method", "primary_area": "", "supplementary_material": "/attachment/a0c38b8bdfc12b7b7b7e6a3865437130f7a76673.zip", "author": "Wenzhi Gao;Qi Deng", "authorids": "~Wenzhi_Gao1;~Qi_Deng1", "gender": "M;M", "homepage": "https://github.com/Gwzwpxz;http://sime.shufe.edu.cn/teacher/show/225", "dblp": ";", "google_scholar": "4lDkX_YAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Wenzhi_Gao1;~Qi_Deng1", "aff": "Shanghai University of Finance and Economics;Shanghai University of Finance and Economics", "aff_domain": "shufe.edu.cn;sufe.edu.cn", "position": "MS student;Associate Professor", "bibtex": "@inproceedings{\ngao2023delayed,\ntitle={Delayed Algorithms for Distributed Stochastic Weakly Convex Optimization},\nauthor={Wenzhi Gao and Qi Deng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MirclT6zpv}\n}", "github": "", "project": "", "reviewers": "93YJ;bGwb;SDCr;6vL5;Ld2g", "pdf_size": 2101325, "rating": "4;5;7;8;8", "confidence": "3;5;3;3;2", "soundness": "2;2;3;3;4", "novelty": "2;2;3;3;3", "presentation": "3;2;3;4;4", "wc_summary": "94;61;34;122;97", "wc_strengths": "85;17;40;69;53", "wc_weaknesses": "267;25;20;39;13", "wc_questions": "35;35;1;52;132", "wc_limitations": "29;1;1;13;1", "wc_review": "510;139;96;295;296", "wc_reply_reviewers": "0;9;3;21;10", "wc_reply_authors": "57;11;6;11;6", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.4, 1.624807680927192 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 81.6, 30.70244289954791 ], "wc_strengths_avg": [ 52.8, 23.42989543297195 ], "wc_weaknesses_avg": [ 72.8, 97.47286801977255 ], "wc_questions_avg": [ 51.0, 43.7584277596899 ], "wc_limitations_avg": [ 9.0, 11.027239001672177 ], "wc_review_avg": [ 267.2, 145.8072700519422 ], "wc_reply_reviewers_avg": [ 8.6, 7.227724399837061 ], "wc_reply_authors_avg": [ 18.2, 19.52844079797463 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5527707983925667, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6059571504948732693&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "shufe.edu.cn;sufe.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Shanghai University of Finance and Economics", "aff_unique_dep": "", "aff_unique_url": "http://www.sufe.edu.cn", "aff_unique_abbr": "SUFE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Diverse Conventions for Human-AI Collaboration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71896", "id": "MljeRycu9s", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4818263715b25dc137d393af8af6d2fc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MljeRycu9s", "openreview": "https://openreview.net/forum?id=MljeRycu9s", "poster": "/media/PosterPDFs/NeurIPS%202023/71896.png?t=1699925358.4517965", "slides": "https://nips.cc/virtual/2023/poster/71896", "video": "https://nips.cc/virtual/2023/poster/71896", "author_site": "Bidipta Sarkar, Andy Shih, Dorsa Sadigh", "tldr": "", "abstract": "Conventions are crucial for strong performance in cooperative multi-agent games, because they allow players to coordinate on a shared strategy without explicit communication. Unfortunately, standard multi-agent reinforcement learning techniques, such as self-play, converge to conventions that are arbitrary and non-diverse, leading to poor generalization when interacting with new partners. In this work, we present a technique for generating diverse conventions by (1) maximizing their rewards during self-play, while (2) minimizing their rewards when playing with previously discovered conventions (cross-play), stimulating conventions to be semantically different. To ensure that learned policies act in good faith despite the adversarial optimization of cross-play, we introduce mixed-play, where an initial state is randomly generated by sampling self-play and cross-play transitions and the player learns to maximize the self-play reward from this initial state. We analyze the benefits of our technique on various multi-agent collaborative games, including Overcooked, and find that our technique can adapt to the conventions of humans, surpassing human-level performance when paired with real users.", "keywords": "Multi-Agent RL;Multi-Agent Coordination;Human-AI Coordination", "primary_area": "", "supplementary_material": "/attachment/3c11935a9afa79bbe76166806c835798152b460f.zip", "author": "Bidipta Sarkar;Andy Shih;Dorsa Sadigh", "authorids": "~Bidipta_Sarkar1;~Andy_Shih1;~Dorsa_Sadigh1", "gender": "M;;F", "homepage": "https://github.com/bsarkar321;https://cs.stanford.edu/~andyshih/;https://dorsa.fyi/", "dblp": "260/0072;https://dblp.uni-trier.de/pers/hd/s/Shih:Andy;117/3174", "google_scholar": "wr9RgmcAAAAJ;G85kxUUAAAAJ;ZaJEZpYAAAAJ", "orcid": "0000-0002-0584-3504;;", "linkedin": ";;", "or_profile": "~Bidipta_Sarkar1;~Andy_Shih1;~Dorsa_Sadigh1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;cs.stanford.edu;stanford.edu", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsarkar2023diverse,\ntitle={Diverse Conventions for Human-{AI} Collaboration},\nauthor={Bidipta Sarkar and Andy Shih and Dorsa Sadigh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MljeRycu9s}\n}", "github": "", "project": "", "reviewers": "qsFc;Dzgz;ghEw;9YDu", "pdf_size": 1524680, "rating": "6;7;7;7", "confidence": "3;3;5;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "63;85;38;23", "wc_strengths": "70;164;93;102", "wc_weaknesses": "386;150;132;236", "wc_questions": "72;114;140;52", "wc_limitations": "47;47;1;28", "wc_review": "638;560;404;441", "wc_reply_reviewers": "17;93;17;43", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 52.25, 23.699947257325277 ], "wc_strengths_avg": [ 107.25, 34.78056210011563 ], "wc_weaknesses_avg": [ 226.0, 100.38924245156949 ], "wc_questions_avg": [ 94.5, 34.50724561595724 ], "wc_limitations_avg": [ 30.75, 18.846418757949746 ], "wc_review_avg": [ 510.75, 93.3792669707789 ], "wc_reply_reviewers_avg": [ 42.5, 31.028212968200407 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9677124559181212500&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "stanford.edu;cs.stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Birder: Communication-Efficient 1-bit Adaptive Optimizer for Practical Distributed DNN Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71895", "id": "Mlo2kM11ZB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7c72fcd7b6bffc3864c7152ab5a2dd83-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Mlo2kM11ZB", "openreview": "https://openreview.net/forum?id=Mlo2kM11ZB", "poster": "/media/PosterPDFs/NeurIPS%202023/71895.png?t=1701860379.2162807", "slides": "https://nips.cc/virtual/2023/poster/71895", "video": "https://nips.cc/virtual/2023/poster/71895", "author_site": "Hanyang Peng, Shuang Qin, Yue Yu, Jin Wang, Hui Wang, Ge Li", "tldr": "", "abstract": "Various gradient compression algorithms have been proposed to alleviate the communication bottleneck in distributed learning, and they have demonstrated effectiveness in terms of high compression ratios and theoretical low communication complexity. However, when it comes to practically training modern deep neural networks (DNNs), these algorithms have yet to match the inference performance of uncompressed SGD-momentum (SGDM) and adaptive optimizers (e.g.,Adam). More importantly, recent studies suggest that these algorithms actually offer no speed advantages over SGDM/Adam when used with common distributed DNN training frameworks ( e.g., DistributedDataParallel (DDP)) in the typical settings, due to heavy compression/decompression computation or incompatibility with the efficient All-Reduce or the requirement of uncompressed warmup at the early stage. For these reasons, we propose a novel 1-bit adaptive optimizer, dubbed *Bi*nary *r*andomization a*d*aptive optimiz*er* (**Birder**). The quantization of Birder can be easily and lightly computed, and it does not require warmup with its uncompressed version in the beginning. Also, we devise Hierarchical-1-bit-All-Reduce to further lower the communication volume. We theoretically prove that it promises the same convergence rate as the Adam. Extensive experiments, conducted on 8 to 64 GPUs (1 to 8 nodes) using DDP, demonstrate that Birder achieves comparable inference performance to uncompressed SGDM/Adam, with up to ${2.5 \\times}$ speedup for training ResNet-50 and ${6.3\\times}$ speedup for training BERT-Base. Code is publicly available at https://openi.pcl.ac.cn/c2net_optim/Birder.", "keywords": "optimizer;1-bit optimizer;distributed learning;optimization for deep networks;communication efficiency", "primary_area": "", "supplementary_material": "/attachment/2ed65ac1ac85903c25fb385dd0530e7c3dc444a1.zip", "author": "Hanyang Peng;Shuang Qin;Yue Yu;Jin Wang;Hui Wang;Ge Li", "authorids": "~Hanyang_Peng1;~Shuang_Qin1;~Yue_Yu8;~Jin_Wang14;~Hui_Wang13;~Ge_Li2", "gender": "M;;M;M;M;M", "homepage": ";;http://yuyue.github.io/;https://fourwinds021.github.io/;https://openi.pcl.ac.cn;https://dblp.org/pid/24/712-2.html", "dblp": "162/0123;;55/2008-1;;39/721-73;24/712-2.html", "google_scholar": ";;VnqWgEwAAAAJ;;;", "orcid": ";;0000-0002-9865-2212;;;", "linkedin": ";;;;;", "or_profile": "~Hanyang_Peng1;~Shuang_Qin1;~Yue_Yu8;~Jin_Wang14;~Hui_Wang13;~Ge_Li2", "aff": "Pengcheng Loboratory;Peng Cheng Laboratory;National University of Defense Technology;Tianjin University;Cloud Computing;Peking University Shenzhen Graduate School", "aff_domain": "pcl.ac.cn;pcl.ac.cn;nudt.edu.cn;tju.edu.cn;pcl.ac.cn;pku.edu.cn", "position": "Assistant Professor;Engineer;Associate Professor;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\npeng2023birder,\ntitle={Birder: Communication-Efficient 1-bit Adaptive Optimizer for Practical Distributed {DNN} Training},\nauthor={Hanyang Peng and Shuang Qin and Yue Yu and Jin Wang and Hui Wang and Ge Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Mlo2kM11ZB}\n}", "github": "", "project": "", "reviewers": "5r9C;yyED;jDqW;sXik;gDkh", "pdf_size": 1345250, "rating": "4;5;5;5;7", "confidence": "4;3;3;2;3", "soundness": "3;2;3;3;3", "novelty": "2;2;3;2;3", "presentation": "3;2;3;3;2", "wc_summary": "72;91;74;76;159", "wc_strengths": "42;48;70;36;157", "wc_weaknesses": "26;191;40;48;271", "wc_questions": "351;51;61;12;4", "wc_limitations": "2;34;15;10;5", "wc_review": "493;415;260;182;596", "wc_reply_reviewers": "32;519;0;30;28", "wc_reply_authors": "553;1180;126;70;550", "reply_reviewers": "1;2;0;1;1", "reply_authors": "4;5;3;2;4", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 94.4, 32.98848283871206 ], "wc_strengths_avg": [ 70.6, 44.701677820860375 ], "wc_weaknesses_avg": [ 115.2, 98.1293024534466 ], "wc_questions_avg": [ 95.8, 129.4564019274443 ], "wc_limitations_avg": [ 13.2, 11.303096920755834 ], "wc_review_avg": [ 389.2, 150.8845916586581 ], "wc_reply_reviewers_avg": [ 121.8, 198.94361010095298 ], "wc_reply_authors_avg": [ 495.8, 398.0946621094034 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 3.6, 1.019803902718557 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3227486121839514, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15823502697732285786&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 4, "email": "pcl.ac.cn;pcl.ac.cn;nudt.edu.cn;tju.edu.cn;pcl.ac.cn;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;3;4", "aff_unique_norm": "Pengcheng Laboratory;National University of Defense Technology;Tianjin University;Cloud Computing;Peking University", "aff_unique_dep": ";;;;", "aff_unique_url": ";http://www.nudt.edu.cn/;http://www.tju.edu.cn;;http://www.pku.edu.cn", "aff_unique_abbr": ";NUDT;TJU;;PKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Minimum norm interpolation by perceptra: Explicit regularization and implicit bias", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71894", "id": "MlrFYNo1yc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1b8612e11c75456c90963fd408d75c4d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MlrFYNo1yc", "openreview": "https://openreview.net/forum?id=MlrFYNo1yc", "poster": "/media/PosterPDFs/NeurIPS%202023/71894.png?t=1701581918.2988791", "slides": "https://nips.cc/virtual/2023/poster/71894", "video": "https://nips.cc/virtual/2023/poster/71894", "author_site": "Jiyoung Park, Ian Pelakh, Stephan Wojtowytsch", "tldr": "", "abstract": "We investigate how shallow ReLU networks interpolate between known regions. Our analysis shows that empirical risk minimizers converge to a minimum norm interpolant as the number of data points and parameters tends to infinity when a weight decay regularizer is penalized with a coefficient which vanishes at a precise rate as the network width and the number of data points grow. With and without explicit regularization, we numerically study the implicit bias of common optimization algorithms towards known minimum norm interpolants.", "keywords": "Artificial neural network;interpolation;explicit regularization;implicit bias;weight decay;Barron class", "primary_area": "", "supplementary_material": "/attachment/bf760e6b8154718e4d2544df9689110067de49e4.zip", "author": "Jiyoung Park;Ian Pelakh;Stephan Wojtowytsch", "authorids": "~Jiyoung_Park3;ispelakh@iastate.edu;~Stephan_Wojtowytsch1", "gender": "M;;", "homepage": "https://wldyddl5510.github.io/;;http://www.swojtowytsch.com", "dblp": ";;252/1157", "google_scholar": "https://scholar.google.com/citations?hl=en;;vnluGycAAAAJ", "orcid": "0009-0007-7283-2315;;0000-0003-3766-5332", "linkedin": ";;", "or_profile": "~Jiyoung_Park3;ispelakh@iastate.edu;~Stephan_Wojtowytsch1", "aff": "Texas A&M University - College Station;;Texas A&M", "aff_domain": "tamu.edu;;tamu.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\npark2023minimum,\ntitle={Minimum norm interpolation by perceptra: Explicit regularization and implicit bias},\nauthor={Jiyoung Park and Ian Pelakh and Stephan Wojtowytsch},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MlrFYNo1yc}\n}", "github": "", "project": "", "reviewers": "hUXj;5DHL;CjUq;Gs5Q;etEC;jgQF", "pdf_size": 3438191, "rating": "4;5;5;6;6;6", "confidence": "2;1;2;4;4;4", "soundness": "3;3;3;3;3;3", "novelty": "2;3;3;3;3;2", "presentation": "3;3;3;3;3;1", "wc_summary": "71;92;104;317;229;134", "wc_strengths": "6;51;48;68;44;91", "wc_weaknesses": "17;68;183;324;21;1174", "wc_questions": "17;1;190;42;1;17", "wc_limitations": "1;1;1;42;1;1", "wc_review": "112;213;526;793;296;1417", "wc_reply_reviewers": "0;0;14;20;0;419", "wc_reply_authors": "0;0;0;0;0;261", "reply_reviewers": "0;0;1;1;0;2", "reply_authors": "1;1;1;1;1;2", "rating_avg": [ 5.333333333333333, 0.7453559924999298 ], "confidence_avg": [ 2.8333333333333335, 1.2133516482134197 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.7453559924999298 ], "wc_summary_avg": [ 157.83333333333334, 87.3105886412919 ], "wc_strengths_avg": [ 51.333333333333336, 25.7272013419433 ], "wc_weaknesses_avg": [ 297.8333333333333, 406.17459984045723 ], "wc_questions_avg": [ 44.666666666666664, 66.4245603841096 ], "wc_limitations_avg": [ 7.833333333333333, 15.27979784624856 ], "wc_review_avg": [ 559.5, 443.6029568581947 ], "wc_reply_reviewers_avg": [ 75.5, 153.815636396304 ], "wc_reply_authors_avg": [ 43.5, 97.26895702124085 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.74535599249993 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7985836518841367, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9706803867939334448&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tamu.edu;;tamu.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "0", "aff_campus_unique": "College Station;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Trade-off Between Efficiency and Consistency for Removal-based Explanations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71893", "id": "MmCtXvW6GO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/51484744337f4bf5fea0e4dd92ddab0b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MmCtXvW6GO", "openreview": "https://openreview.net/forum?id=MmCtXvW6GO", "poster": "/media/PosterPDFs/NeurIPS%202023/71893.png?t=1699102467.7535045", "slides": "https://nips.cc/virtual/2023/poster/71893", "video": "https://nips.cc/virtual/2023/poster/71893", "author_site": "Yifan Zhang, Haowei He, Zhiquan Tan, Yang Yuan", "tldr": "", "abstract": "In the current landscape of explanation methodologies, most predominant approaches, such as SHAP and LIME, employ removal-based techniques to evaluate the impact of individual features by simulating various scenarios with specific features omitted. Nonetheless, these methods primarily emphasize efficiency in the original context, often resulting in general inconsistencies. In this paper, we demonstrate that such inconsistency is an inherent aspect of these approaches by establishing the Impossible Trinity Theorem, which posits that interpretability, efficiency, and consistency cannot hold simultaneously. Recognizing that the attainment of an ideal explanation remains elusive, we propose the utilization of interpretation error as a metric to gauge inefficiencies and inconsistencies. To this end, we present two novel algorithms founded on the standard polynomial basis, aimed at minimizing interpretation error. Our empirical findings indicate that the proposed methods achieve a substantial reduction in interpretation error, up to 31.8 times lower when compared to alternative techniques.", "keywords": "AI interpretability;explainable AI;deep learning theory", "primary_area": "", "supplementary_material": "/attachment/27f3e5b090d007a710c5fb567857d3f83302af75.zip", "author": "Yifan Zhang;Haowei He;Zhiquan Tan;Yang Yuan", "authorids": "~Yifan_Zhang16;~Haowei_He1;~Zhiquan_Tan1;~Yang_Yuan4", "gender": ";M;M;M", "homepage": ";https://962086838.github.io/;;http://people.iiis.tsinghua.edu.cn/~yuanyang/index.html", "dblp": ";;326/0177;", "google_scholar": ";IcNEbaMAAAAJ;;", "orcid": ";;;", "linkedin": ";;https://www.linkedin.cn/incareer/in/ACoAAC1A8_QBFX8OlchWmVI_pNXN4zm_t6vPKCs;", "or_profile": "~Yifan_Zhang16;~Haowei_He1;~Zhiquan_Tan1;~Yang_Yuan4", "aff": ";Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": ";tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": ";PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023tradeoff,\ntitle={Trade-off Between Efficiency and Consistency for Removal-based Explanations},\nauthor={Yifan Zhang and Haowei He and Zhiquan Tan and Yang Yuan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MmCtXvW6GO}\n}", "github": "", "project": "", "reviewers": "JLEA;ngxu;PY9W;XVSh;k1pA", "pdf_size": 1872245, "rating": "5;5;6;6;6", "confidence": "2;5;4;4;4", "soundness": "2;2;3;3;4", "novelty": "2;2;3;3;4", "presentation": "2;3;2;3;3", "wc_summary": "70;87;115;81;54", "wc_strengths": "27;47;98;38;94", "wc_weaknesses": "69;85;737;191;50", "wc_questions": "83;58;144;49;34", "wc_limitations": "6;76;11;10;18", "wc_review": "255;353;1105;369;250", "wc_reply_reviewers": "42;7;35;52;8", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 81.4, 20.204949888579286 ], "wc_strengths_avg": [ 60.8, 29.457766378325427 ], "wc_weaknesses_avg": [ 226.4, 259.9427629306113 ], "wc_questions_avg": [ 73.6, 38.62952238897085 ], "wc_limitations_avg": [ 24.2, 26.187019685332654 ], "wc_review_avg": [ 466.4, 323.00934970988067 ], "wc_reply_reviewers_avg": [ 28.8, 18.214280112043955 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.25, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17927788396421437981&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": ";tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "XES3G5M: A Knowledge Tracing Benchmark Dataset with Auxiliary Information", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73614", "id": "Mn9oHNdYCE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/67fc628f17c2ad53621fb961c6bafcaf-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Mn9oHNdYCE", "openreview": "https://openreview.net/forum?id=Mn9oHNdYCE", "poster": "/media/PosterPDFs/NeurIPS%202023/73614.png?t=1702226103.223901", "slides": "https://nips.cc/virtual/2023/poster/73614", "video": "https://nips.cc/virtual/2023/poster/73614", "author_site": "Zitao Liu, Qiongqiong Liu, Teng Guo, Jiahao Chen, Shuyan Huang, Xiangyu Zhao, Jiliang Tang, Weiqi Luo, Jian Weng", "tldr": "", "abstract": "Knowledge tracing (KT) is a task that predicts students' future performance based on their historical learning interactions. With the rapid development of deep learning techniques, existing KT approaches follow a data-driven paradigm that uses massive problem-solving records to model students' learning processes. However, although the educational contexts contain various factors that may have an influence on student learning outcomes, existing public KT datasets mainly consist of anonymized ID-like features, which may hinder the research advances towards this field. Therefore, in this work, we present, \\emph{XES3G5M}, a large-scale dataset with rich auxiliary information about questions and their associated knowledge components (KCs)\\footnote{\\label{ft:kc}A KC is a generalization of everyday terms like concept, principle, fact, or skill.}. The XES3G5M dataset is collected from a real-world online math learning platform, which contains 7,652 questions, and 865 KCs with 5,549,635 interactions from 18,066 students. To the best of our knowledge, the XES3G5M dataset not only has the largest number of KCs in math domain but contains the richest contextual information including tree structured KC relations, question types, textual contents and analysis and student response timestamps. Furthermore, we build a comprehensive benchmark on 19 state-of-the-art deep learning based knowledge tracing (DLKT) models. Extensive experiments demonstrate the effectiveness of leveraging the auxiliary information in our XES3G5M with DLKT models. We hope the proposed dataset can effectively facilitate the KT research work.", "keywords": "knowledge tracing;benchmark;online education", "primary_area": "", "supplementary_material": "/attachment/7f709458345b16d702bcf8b29b015ab251be9f5c.pdf", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nliu2023xesgm,\ntitle={{XES}3G5M: A Knowledge Tracing Benchmark Dataset with Auxiliary Information},\nauthor={Zitao Liu and Qiongqiong Liu and Teng Guo and Jiahao Chen and Shuyan Huang and Xiangyu Zhao and Jiliang Tang and Weiqi Luo and Jian Weng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Mn9oHNdYCE}\n}", "github": "", "project": "", "reviewers": "wWbZ;PA9g;vgkD;adSw;7CjA", "pdf_size": 1297519, "rating": "5;6;6;7;7", "confidence": "3;3;2;4;3", "wc_summary_and_contributions": "95;101;94;58;104", "wc_strengths": "112;36;73;56;79", "wc_improvement": "92;80;34;54;269", "wc_limitations": "1;98;46;32;7", "wc_correctness": "35;8;16;26;10", "wc_clarity": "28;4;48;72;1", "wc_relation_to_prior_work": "50;17;15;34;42", "wc_documentation": "35;50;29;5;3", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "449;395;356;338;516", "wc_reply_reviewers": "0;0;0;20;28", "wc_reply_authors": "375;421;418;412;684", "reply_reviewers": "0;0;0;1;1", "reply_authors": "2;2;2;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 90.4, 16.620469307453384 ], "wc_strengths_avg": [ 71.2, 25.293477420078087 ], "wc_improvement_avg": [ 105.8, 84.05807516235427 ], "wc_limitations_avg": [ 36.8, 34.70677167355097 ], "wc_correctness_avg": [ 19.0, 10.158740079360236 ], "wc_clarity_avg": [ 30.6, 26.85963514271927 ], "wc_relation_to_prior_work_avg": [ 31.6, 13.720058308913996 ], "wc_documentation_avg": [ 24.4, 18.017769007288333 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 410.8, 64.92888417337849 ], "wc_reply_reviewers_avg": [ 9.6, 12.026637102698327 ], "wc_reply_authors_avg": [ 462.0, 112.22299229658778 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": 0.42257712736425823, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2200766434053047205&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "", "author_num": 1 }, { "title": "Task-Robust Pre-Training for Worst-Case Downstream Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71892", "id": "Mr4OpbZEiB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1e4322fddd833f83c855660ac65e428d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Mr4OpbZEiB", "openreview": "https://openreview.net/forum?id=Mr4OpbZEiB", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71892", "video": "https://nips.cc/virtual/2023/poster/71892", "author_site": "Jianghui Wang, Yang Chen, Xingyu Xie, Cong Fang, Zhouchen Lin", "tldr": "", "abstract": "Pre-training has achieved remarkable success when transferred to downstream tasks. In machine learning, we care about not only the good performance of a model but also its behavior under reasonable shifts of condition. The same philosophy holds when pre-training a foundation model. However, the foundation model may not uniformly behave well for a series of related downstream tasks. This happens, for example, when conducting mask recovery regression where the recovery ability or the training instances diverge like pattern features are extracted dominantly on pre-training, but semantic features are also required on a downstream task. This paper considers pre-training a model that guarantees a uniformly good performance over the downstream tasks. We call this goal as *downstream-task robustness*.\nOur method first separates the upstream task into several representative ones and applies a simple minimax loss for pre-training. We then design an efficient algorithm to solve the minimax loss\nand prove its convergence in the convex setting. In the experiments, we show both on large-scale natural language processing and computer vision datasets our method increases the metrics on worse-case downstream tasks. Additionally, some theoretical explanations for why our loss is beneficial are provided. Specifically, we show fewer samples are inherently required for the most challenging downstream task in some cases.", "keywords": "Pre-training;Robustness;Multi-task learning", "primary_area": "", "supplementary_material": "", "author": "Jianghui Wang;Yang Chen;Xingyu Xie;Cong Fang;Zhouchen Lin", "authorids": "~Jianghui_Wang1;~Yang_Chen17;~Xingyu_Xie1;~Cong_Fang1;~Zhouchen_Lin1", "gender": ";M;M;M;M", "homepage": "https://jianghui-wang.github.io/;https://zero-lab-pku.github.io/personwise/chenyang/;;https://congfang-ml.github.io/;https://zhouchenlin.github.io", "dblp": ";;174/9633;140/6568;l/ZhouchenLin", "google_scholar": ";;BpFCmZMAAAAJ;N2M9RPoAAAAJ;https://scholar.google.com.tw/citations?user=TanjFwoAAAAJ", "orcid": ";;;;0000-0003-1493-7569", "linkedin": ";;;;", "or_profile": "~Jianghui_Wang1;~Yang_Chen17;~Xingyu_Xie1;~Cong_Fang1;~Zhouchen_Lin1", "aff": "Beijing Institute for General Artificial Intelligence;Peking University;Peking University;Peking University;Peking University", "aff_domain": "bigai.ai;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "Intern;PhD student;PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\nwang2023taskrobust,\ntitle={Task-Robust Pre-Training for Worst-Case Downstream Adaptation},\nauthor={Jianghui Wang and Yang Chen and Xingyu Xie and Cong Fang and Zhouchen Lin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Mr4OpbZEiB}\n}", "github": "", "project": "", "reviewers": "hh6P;UtAg;Jr2o;sSwq", "pdf_size": 3234984, "rating": "4;6;6;6", "confidence": "3;3;3;3", "soundness": "3;4;3;3", "novelty": "2;3;2;3", "presentation": "2;4;3;4", "wc_summary": "191;69;33;73", "wc_strengths": "54;66;23;88", "wc_weaknesses": "247;53;69;160", "wc_questions": "182;62;37;44", "wc_limitations": "30;26;5;1", "wc_review": "704;276;167;366", "wc_reply_reviewers": "463;18;0;15", "wc_reply_authors": "502;0;0;0", "reply_reviewers": "2;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 91.5, 59.52100469582146 ], "wc_strengths_avg": [ 57.75, 23.47738273317535 ], "wc_weaknesses_avg": [ 132.25, 77.81187248742958 ], "wc_questions_avg": [ 81.25, 58.87858269353976 ], "wc_limitations_avg": [ 15.5, 12.658988901172163 ], "wc_review_avg": [ 378.25, 200.83871016315555 ], "wc_reply_reviewers_avg": [ 124.0, 195.8404963229005 ], "wc_reply_authors_avg": [ 125.5, 217.3723763498941 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:nlnegqStDcoJ:scholar.google.com/&scioq=Task-Robust+Pre-Training+for+Worst-Case+Downstream+Adaptation&hl=en&as_sdt=0,44", "gs_version_total": 7, "email": "bigai.ai;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Beijing Institute for General Artificial Intelligence;Peking University", "aff_unique_dep": ";", "aff_unique_url": "http://www.bigaiai.org/;http://www.pku.edu.cn", "aff_unique_abbr": "BIGAI;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Convolutional Neural Operators for robust and accurate learning of PDEs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71891", "id": "MtekhXRP4h", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f3c1951b34f7f55ffaecada7fde6bd5a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MtekhXRP4h", "openreview": "https://openreview.net/forum?id=MtekhXRP4h", "poster": "/media/PosterPDFs/NeurIPS%202023/71891.png?t=1702226680.4792414", "slides": "https://nips.cc/virtual/2023/poster/71891", "video": "https://nips.cc/virtual/2023/poster/71891", "author_site": "Bogdan Raonic, Roberto Molinaro, Tim De Ryck, Tobias Rohner, Francesca Bartolucci, Rima Alaifari, Siddhartha Mishra, Emmanuel de B\u00e9zenac", "tldr": "", "abstract": "Although very successfully used in conventional machine learning, convolution based neural network architectures -- believed to be inconsistent in function space -- have been largely ignored in the context of learning solution operators of PDEs. Here, we present novel adaptations for convolutional neural networks to demonstrate that they are indeed able to process functions as inputs and outputs. The resulting architecture, termed as convolutional neural operators (CNOs), is designed specifically to preserve its underlying continuous nature, even when implemented in a discretized form on a computer. We prove a universality theorem to show that CNOs can approximate operators arising in PDEs to desired accuracy. CNOs are tested on a novel suite of benchmarks, encompassing a diverse set of PDEs with multi-scale solutions and are observed to significantly outperform baselines, paving the way for an alternative framework for robust and accurate operator learning.", "keywords": "PDEs;Neural Operators;Scientific Machine Learning;Convolutional Neural Networks", "primary_area": "", "supplementary_material": "/attachment/e9eb8e9ce0d1df3e3659d343619c43954ca92099.pdf", "author": "Bogdan Raonic;Roberto Molinaro;Tim De Ryck;Tobias Rohner;Francesca Bartolucci;Rima Alaifari;Siddhartha Mishra;Emmanuel de Bezenac", "authorids": "~Bogdan_Raonic1;~Roberto_Molinaro1;~Tim_De_Ryck1;~Tobias_Rohner1;~Francesca_Bartolucci1;~Rima_Alaifari1;~Siddhartha_Mishra1;~Emmanuel_de_Bezenac2", "gender": "M;M;M;M;F;F;M;M", "homepage": "https://www.linkedin.com/in/bogdan-raoni%C4%87-210066167;;https://people.math.ethz.ch/~deryckt/;;https://sites.google.com/view/bartoluccifrancesca;http://www.alaifari.com/;http://www.sam.math.ethz.ch/;", "dblp": "339/6810;249/2799;255/5932;339/6659;255/8922;159/8644;07/2856.html;", "google_scholar": "DN9CCpkAAAAJ;2ohT8yYAAAAJ;o95Uj80AAAAJ;;NUUWKPwAAAAJ;WiX5uI4AAAAJ;FmEqyNcAAAAJ;https://scholar.google.fr/citations?user=KvZw5gYAAAAJ", "orcid": ";;0000-0001-6860-1345;;0000-0001-8748-413X;0000-0003-1608-8580;;", "linkedin": "bogdan-raoni%C4%87-210066167;;;tobias-rohner-502a27214/;;rima-alaifari-6b9b39153/?originalSubdomain=ch;;", "or_profile": "~Bogdan_Raonic1;~Roberto_Molinaro1;~Tim_De_Ryck1;~Tobias_Rohner1;~Francesca_Bartolucci1;~Rima_Alaifari1;~Siddhartha_Mishra1;~Emmanuel_de_Bezenac2", "aff": "ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich;Swiss Federal Institute of Technology;ETHZ - ETH Zurich", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "MS student;PhD student;PhD student;PhD student;Postdoc;Assistant Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nraonic2023convolutional,\ntitle={Convolutional Neural Operators for robust and accurate learning of {PDE}s},\nauthor={Bogdan Raonic and Roberto Molinaro and Tim De Ryck and Tobias Rohner and Francesca Bartolucci and Rima Alaifari and Siddhartha Mishra and Emmanuel de Bezenac},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MtekhXRP4h}\n}", "github": "", "project": "", "reviewers": "MYhB;KkQX;uN3N;ZTbu", "pdf_size": 1188253, "rating": "5;6;7;8", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;4;3;3", "wc_summary": "131;74;98;67", "wc_strengths": "61;65;73;29", "wc_weaknesses": "281;309;217;77", "wc_questions": "16;147;87;4", "wc_limitations": "2;29;32;17", "wc_review": "491;624;507;194", "wc_reply_reviewers": "45;0;0;24", "wc_reply_authors": "332;38;0;13", "reply_reviewers": "1;0;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 92.5, 25.024987512484397 ], "wc_strengths_avg": [ 57.0, 16.73320053068151 ], "wc_weaknesses_avg": [ 221.0, 89.57678270623477 ], "wc_questions_avg": [ 63.5, 57.708318291213445 ], "wc_limitations_avg": [ 20.0, 11.811011811017716 ], "wc_review_avg": [ 454.0, 158.64898360846817 ], "wc_reply_reviewers_avg": [ 17.25, 18.779976038323372 ], "wc_reply_authors_avg": [ 95.75, 137.08095236027506 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6927620143470800274&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;1;0", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "An Efficient Dataset Condensation Plugin and Its Application to Continual Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71890", "id": "Murj6wcjRw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d5f34e7e70d80f5037ab16a48e2d186e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Murj6wcjRw", "openreview": "https://openreview.net/forum?id=Murj6wcjRw", "poster": "/media/PosterPDFs/NeurIPS%202023/71890.png?t=1700465997.516629", "slides": "https://nips.cc/virtual/2023/poster/71890", "video": "https://nips.cc/virtual/2023/poster/71890", "author_site": "Enneng Yang, Li Shen, Zhenyi Wang, Zhenyi Wang, Tongliang Liu, Guibing Guo", "tldr": "", "abstract": "Dataset condensation (DC) distills a large real-world dataset into a small synthetic dataset, with the goal of training a network from scratch on the latter that performs similarly to the former. State-of-the-art (SOTA) DC methods have achieved satisfactory results through techniques such as accuracy, gradient, training trajectory, or distribution matching. However, these works all perform matching in the high-dimension pixel spaces, ignoring that natural images are usually locally connected and have lower intrinsic dimensions, resulting in low condensation efficiency. In this work, we propose a simple-yet-efficient dataset condensation plugin that matches the raw and synthetic datasets in a low-dimensional manifold. Specifically, our plugin condenses raw images into two low-rank matrices instead of parameterized image matrices. Our plugin can be easily incorporated into existing DC methods, thereby containing richer raw dataset information at limited storage costs to improve the downstream applications' performance. We verify on multiple public datasets that when the proposed plugin is combined with SOTA DC methods, the performance of the network trained on synthetic data is significantly improved compared to traditional DC methods. Moreover, when applying the DC methods as a plugin to continual learning tasks, we observed that our approach effectively mitigates catastrophic forgetting of old tasks under limited memory buffer constraints and avoids the problem of raw data privacy leakage.", "keywords": "Data Condensation;Continual Learning;Few-shot Learning", "primary_area": "", "supplementary_material": "", "author": "Enneng Yang;Li Shen;Zhenyi Wang;Tongliang Liu;Guibing Guo", "authorids": "~Enneng_Yang1;~Li_Shen1;~Zhenyi_Wang1;~Tongliang_Liu1;~Guibing_Guo1", "gender": "M;M;M;;M", "homepage": ";https://sites.google.com/site/mathshenli/home;https://tongliang-liu.github.io/;;https://joey-wang123.github.io/", "dblp": "246/2889;91/3680-8;150/6667;;10/10222-1", "google_scholar": ";yVhgENIAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;;F4uLsroAAAAJ", "orcid": "0000-0001-5419-5286;;;;", "linkedin": ";;;;", "or_profile": "~Enneng_Yang1;~Li_Shen1;~Tongliang_Liu1;~Guibing_Guo1;~Zhenyi_Wang8", "aff": "Northeastern University;JD Explore Academy;University of Sydney;;State University of New York, Buffalo", "aff_domain": "neu.edu.cn;jd.com;sydney.edu.au;;buffalo.edu", "position": "PhD student;Researcher;Lecturer;;PhD student", "bibtex": "@inproceedings{\nyang2023an,\ntitle={An Efficient Dataset Condensation Plugin and Its Application to Continual Learning},\nauthor={Enneng Yang and Li Shen and Zhenyi Wang and Tongliang Liu and Guibing Guo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Murj6wcjRw}\n}", "github": "", "project": "", "reviewers": "FpMf;Lynm;Lbw7;JHfi;VZYQ", "pdf_size": 3655518, "rating": "4;5;6;6;7", "confidence": "4;3;4;4;4", "soundness": "2;3;3;3;4", "novelty": "1;2;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "76;50;116;44;117", "wc_strengths": "47;27;112;22;131", "wc_weaknesses": "184;115;564;171;108", "wc_questions": "12;4;9;5;24", "wc_limitations": "1;12;1;8;58", "wc_review": "320;208;802;250;438", "wc_reply_reviewers": "204;43;33;16;100", "wc_reply_authors": "1214;152;0;0;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "6;4;1;1;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 1.019803902718557 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 80.6, 31.225630498037987 ], "wc_strengths_avg": [ 67.8, 45.0395381859095 ], "wc_weaknesses_avg": [ 228.4, 170.44013611822774 ], "wc_questions_avg": [ 10.8, 7.19444229944198 ], "wc_limitations_avg": [ 16.0, 21.419617176784463 ], "wc_review_avg": [ 403.6, 213.89118728923825 ], "wc_reply_reviewers_avg": [ 79.2, 68.47890186035404 ], "wc_reply_authors_avg": [ 273.2, 474.06936201361924 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.6, 2.0591260281974 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.29417420270727607, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11177734727097854123&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "neu.edu.cn;jd.com;sydney.edu.au;;buffalo.edu", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Northeastern University;JD;University of Sydney;State University of New York at Buffalo", "aff_unique_dep": ";JD Explore Academy;;", "aff_unique_url": "https://www.northeastern.edu;;https://www.sydney.edu.au;https://www.buffalo.edu", "aff_unique_abbr": "NEU;;USYD;SUNY Buffalo", "aff_campus_unique_index": "1", "aff_campus_unique": ";Buffalo", "aff_country_unique_index": "0;2;0", "aff_country_unique": "United States;;Australia" }, { "title": "Near-Linear Time Algorithm for the Chamfer Distance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71889", "id": "Mv96iC6TMX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d2fe3a5711a6d488da9e9a78b84ee24c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Mv96iC6TMX", "openreview": "https://openreview.net/forum?id=Mv96iC6TMX", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71889", "video": "https://nips.cc/virtual/2023/poster/71889", "author_site": "Ainesh Bakshi, Piotr Indyk, Rajesh Jayaram, Sandeep Silwal, Erik Waingarten", "tldr": "", "abstract": "For any two point sets $A,B \\subset \\mathbb{R}^d$ of size up to $n$, the Chamfer distance from $A$ to $B$ is defined as $\\texttt{CH}(A,B)=\\sum_{a \\in A} \\min_{b \\in B} d_X(a,b)$, where $d_X$ is the underlying distance measure (e.g., the Euclidean or Manhattan distance). The Chamfer distance is a popular measure of dissimilarity between point clouds, used in many machine learning, computer vision, and graphics applications, and admits a straightforward $O(d n^2)$-time brute force algorithm. Further, Chamfer distance is often used as a proxy for the more computationally demanding Earth-Mover (Optimal Transport) Distance. However, the \\emph{quadratic} dependence on $n$ in the running time makes the naive approach intractable for large datasets.\n\nWe overcome this bottleneck and present the first $(1+\\epsilon)$-approximate algorithm for estimating Chamfer distance with a near-linear running time. Specifically, our algorithm runs in time $O(nd \\log (n)/\\epsilon^2)$ and is implementable. Our experiments demonstrate that it is both accurate and fast on large high-dimensional datasets. We believe that our algorithm will open new avenues for analyzing large high-dimensional point clouds. We also give evidence that if the goal is to report a $(1+\\epsilon)$-approximate mapping from $A$ to $B$ (as opposed to just its value), then any sub-quadratic time algorithm is unlikely to exist.", "keywords": "chamfer distance;earth mover distance;high dimensional data analysis;nearest neighbor search;high dimensional data;high-dimensional geometry;sublinear algorithms;point clouds;theory", "primary_area": "", "supplementary_material": "/attachment/391563ba22453453e46f16b898fcaa3eb8b4ecdb.zip", "author": "Ainesh Bakshi;Piotr Indyk;Rajesh Jayaram;Sandeep Silwal;Erik Waingarten", "authorids": "~Ainesh_Bakshi1;~Piotr_Indyk1;~Rajesh_Jayaram1;~Sandeep_Silwal1;~Erik_Waingarten1", "gender": "M;;;M;M", "homepage": "http://aineshbakshi.com/;https://people.csail.mit.edu/indyk/;http://rajeshjayaram.com/;https://sandeepsilwal.com;https://sites.google.com/site/erikwaing/home", "dblp": "132/1905;i/PiotrIndyk;202/9970.html;225/4637;", "google_scholar": ";oOwNKsAAAAAJ;Cerc8UYAAAAJ;MnDnUvcAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Ainesh_Bakshi1;~Piotr_Indyk1;~Rajesh_Jayaram1;~Sandeep_Silwal1;~Erik_Waingarten1", "aff": "School of Computer Science, Carnegie Mellon University;Massachusetts Institute of Technology;Google;Massachusetts Institute of Technology;, University of Pennsylvania", "aff_domain": "cs.cmu.edu;mit.edu;google.com;mit.edu;cis.upenn.edu", "position": "PhD student;Full Professor;Researcher;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nbakshi2023nearlinear,\ntitle={Near-Linear Time Algorithm for the Chamfer Distance},\nauthor={Ainesh Bakshi and Piotr Indyk and Rajesh Jayaram and Sandeep Silwal and Erik Waingarten},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Mv96iC6TMX}\n}", "github": "", "project": "", "reviewers": "5NRb;qK6W;3E3v;cYq9;WXaW", "pdf_size": 950203, "rating": "5;6;7;7;7", "confidence": "3;4;4;3;5", "soundness": "3;4;3;3;4", "novelty": "3;3;2;3;3", "presentation": "2;3;3;3;4", "wc_summary": "137;115;154;131;303", "wc_strengths": "36;103;78;98;20", "wc_weaknesses": "82;9;66;71;72", "wc_questions": "66;146;5;83;6", "wc_limitations": "2;3;5;1;1", "wc_review": "323;376;308;384;402", "wc_reply_reviewers": "76;0;10;9;60", "wc_reply_authors": "0;0;0;0;114", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 168.0, 68.64400920692205 ], "wc_strengths_avg": [ 67.0, 33.310658954755006 ], "wc_weaknesses_avg": [ 60.0, 26.023066690918657 ], "wc_questions_avg": [ 61.2, 52.71584202116097 ], "wc_limitations_avg": [ 2.4, 1.4966629547095764 ], "wc_review_avg": [ 358.6, 36.49438312946254 ], "wc_reply_reviewers_avg": [ 31.0, 30.828558188796308 ], "wc_reply_authors_avg": [ 22.8, 45.6 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4677071733467428, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8495662249684502142&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.cmu.edu;mit.edu;google.com;mit.edu;cis.upenn.edu", "author_num": 5, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Carnegie Mellon University;Massachusetts Institute of Technology;Google;University of Pennsylvania", "aff_unique_dep": "School of Computer Science;;Google;", "aff_unique_url": "https://www.cmu.edu;https://web.mit.edu;https://www.google.com;https://www.upenn.edu", "aff_unique_abbr": "CMU;MIT;Google;UPenn", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Pittsburgh;;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Mitigating the Popularity Bias of Graph Collaborative Filtering: A Dimensional Collapse Perspective", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71888", "id": "MvCq52yt9Y", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d5753be6f71fbfefaf47aa27ec41279c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MvCq52yt9Y", "openreview": "https://openreview.net/forum?id=MvCq52yt9Y", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71888", "video": "https://nips.cc/virtual/2023/poster/71888", "author_site": "Yifei Zhang, Hao Zhu, yankai Chen, Zixing Song, Piotr Koniusz, Irwin King", "tldr": "", "abstract": "Graph-based Collaborative Filtering (GCF) is widely used in personalized recommendation systems. However, GCF suffers from a fundamental problem where features tend to occupy the embedding space inefficiently (by spanning only a low-dimensional subspace). Such an effect is characterized in GCF by the embedding space being dominated by a few of popular items with the user embeddings highly concentrated around them. This enhances the so-called Matthew effect of the popularity bias where popular items are highly recommend whereas remaining items are ignored. In this paper, we analyze the above effect in GCF and reveal that the simplified graph convolution operation (typically used in GCF) shrinks the singular space of the feature matrix. As typical approaches (i.e., optimizing the uniformity term) fail to prevent the embedding space degradation, we propose a decorrelation-enhanced GCF objective that promotes feature diversity by leveraging the so-called principle of redundancy reduction in embeddings. However, unlike conventional methods that use the Euclidean geometry to relax hard constraints for decorrelation, we exploit non-Euclidean geometry. Such a choice helps maintain the range space of the matrix and obtain small condition number, which prevents the embedding space degradation. Our method outperforms contrastive-based GCF models on several benchmark datasets and improves the performance for unpopular items.", "keywords": "Graph;Collaborative Filtering;Recommendation", "primary_area": "", "supplementary_material": "/attachment/fd6925ebec0942ee43e3c07c6d485007c9c1595f.pdf", "author": "Yifei Zhang;Hao Zhu;Yankai Chen;Zixing Song;Piotr Koniusz;Irwin King", "authorids": "~Yifei_Zhang6;~Hao_Zhu2;~Yankai_Chen2;~Zixing_Song2;~Piotr_Koniusz1;~Irwin_King1", "gender": "M;;M;;;M", "homepage": "https://yifeiacc.github.io/;;https://yankai-chen.github.io/;;https://www.koniusz.com;https://www.cse.cuhk.edu.hk/irwin.king/", "dblp": "55/5266-1.html;;96/5327-1;;25/8616;k/IrwinKing", "google_scholar": "DmwXESQAAAAJ;;https://scholar.google.com.hk/citations?user=5ZOi7UAAAAAJ;;https://scholar.google.co.uk/citations?user=wZ7-1tUAAAAJ;MXvC7tkAAAAJ", "orcid": "0000-0003-4185-8663;;0000-0001-5741-2047;;0000-0002-6340-5289;0000-0001-8106-6447", "linkedin": ";;;;;irwinking/", "or_profile": "~Yifei_Zhang6;~Hao_Zhu2;~Yankai_Chen2;~Zixing_Song2;~Piotr_Koniusz1;~Irwin_King1", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;;Department of Computer Science and Engineering, The Chinese University of Hong Kong;;Data61, CSIRO;The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;;cse.cuhk.edu.hk;;data61.csiro.au;cuhk.edu.hk", "position": "PhD student;;PhD student;;senior research scientist;Full Professor", "bibtex": "@inproceedings{\nzhang2023mitigating,\ntitle={Mitigating the Popularity Bias of Graph Collaborative Filtering: A Dimensional Collapse Perspective},\nauthor={Yifei Zhang and Hao Zhu and Yankai Chen and Zixing Song and Piotr Koniusz and Irwin King},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MvCq52yt9Y}\n}", "github": "", "project": "", "reviewers": "VmvT;pKCn;CD6b;bxqe;Qs55", "pdf_size": 1750526, "rating": "5;5;6;7;8", "confidence": "3;4;3;2;3", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "3;3;3;3;3", "wc_summary": "95;37;31;112;74", "wc_strengths": "134;41;38;32;53", "wc_weaknesses": "383;97;153;12;12", "wc_questions": "82;35;80;13;17", "wc_limitations": "60;1;1;12;9", "wc_review": "754;211;303;181;165", "wc_reply_reviewers": "18;0;29;63;21", "wc_reply_authors": "15;0;12;15;12", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.8, 31.669543728951954 ], "wc_strengths_avg": [ 59.6, 37.82380202993877 ], "wc_weaknesses_avg": [ 131.4, 136.7224926630582 ], "wc_questions_avg": [ 45.4, 30.003999733368886 ], "wc_limitations_avg": [ 16.6, 22.132329294495868 ], "wc_review_avg": [ 322.8, 220.83151948940622 ], "wc_reply_reviewers_avg": [ 26.2, 20.701690752206687 ], "wc_reply_authors_avg": [ 10.8, 5.564171097297422 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5423261445466404, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7533496559593259044&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cse.cuhk.edu.hk;;cse.cuhk.edu.hk;;data61.csiro.au;cuhk.edu.hk", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;CSIRO", "aff_unique_dep": "Department of Computer Science and Engineering;Data61", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.csiro.au", "aff_unique_abbr": "CUHK;CSIRO", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;Australia" }, { "title": "State Sequences Prediction via Fourier Transform for Representation Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71887", "id": "MvoMDD6emT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d5b94ca503b33d07f9bef8ed8ee4678b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MvoMDD6emT", "openreview": "https://openreview.net/forum?id=MvoMDD6emT", "poster": "/media/PosterPDFs/NeurIPS%202023/71887.png?t=1701438977.1152344", "slides": "https://nips.cc/virtual/2023/poster/71887", "video": "https://nips.cc/virtual/2023/poster/71887", "author_site": "Mingxuan Ye, Yufei Kuang, Jie Wang, Yang Rui, Wengang Zhou, Houqiang Li, Feng Wu", "tldr": "", "abstract": "While deep reinforcement learning (RL) has been demonstrated effective in solving complex control tasks, sample efficiency remains a key challenge due to the large amounts of data required for remarkable performance. Existing research explores the application of representation learning for data-efficient RL, e.g., learning predictive representations by predicting long-term future states. However, many existing methods do not fully exploit the structural information inherent in sequential state signals, which can potentially improve the quality of long-term decision-making but is difficult to discern in the time domain. To tackle this problem, we propose State Sequences Prediction via Fourier Transform (SPF), a novel method that exploits the frequency domain of state sequences to extract the underlying patterns in time series data for learning expressive representations efficiently. Specifically, we theoretically analyze the existence of structural information in state sequences, which is closely related to policy performance and signal regularity, and then propose to predict the Fourier transform of infinite-step future state sequences to extract such information. One of the appealing features of SPF is that it is simple to implement while not requiring storage of infinite-step future states as prediction targets. Experiments demonstrate that the proposed method outperforms several state-of-the-art algorithms in terms of both sample efficiency and performance.", "keywords": "Reinforcement learning;Representation learning;State sequences prediction;Fourier transform", "primary_area": "", "supplementary_material": "/attachment/a007c0dc31255cd357625ab314b59de7dfa16ad5.zip", "author": "Mingxuan Ye;Yufei Kuang;Jie Wang;Rui Yang;Wengang Zhou;Houqiang Li;Feng Wu", "authorids": "~Mingxuan_Ye1;~Yufei_Kuang1;~Jie_Wang1;~Rui_Yang9;~Wengang_Zhou1;~Houqiang_Li1;~Feng_Wu1", "gender": "F;M;M;M;M;M;M", "homepage": "https://miralab.ai/people/mingxuan-ye/;https://miralab.ai/people/yufei-kuang/;http://staff.ustc.edu.cn/~jwangx;http://staff.ustc.edu.cn/~zhwg/index.html;https://staff.ustc.edu.cn/~lihq/;;https://www.researchgate.net/profile/Rui_Yang161", "dblp": "320/7470;280/1134;29/5259-5;22/4544-1;59/7017.html;25/3972-1;", "google_scholar": "oyWd5QMAAAAJ;STN3F_oAAAAJ;OugG4dUAAAAJ;8s1JF8YAAAAJ;7sFMIKoAAAAJ;5bInRDEAAAAJ;8cwrNo0AAAAJ", "orcid": ";;;0000-0003-1690-9836;0000-0003-2188-3028;;0009-0004-5137-9302", "linkedin": ";;;;;;", "or_profile": "~Mingxuan_Ye1;~Yufei_Kuang1;~Jie_Wang1;~Wengang_Zhou1;~Houqiang_Li1;~Feng_Wu1;~Yang_Rui1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;PhD student;Full Professor;Full Professor;Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nye2023state,\ntitle={State Sequences Prediction via Fourier Transform for Representation Learning},\nauthor={Mingxuan Ye and Yufei Kuang and Jie Wang and Rui Yang and Wengang Zhou and Houqiang Li and Feng Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MvoMDD6emT}\n}", "github": "", "project": "", "reviewers": "i4e2;KaZ7;1q5C;ov12", "pdf_size": 8965598, "rating": "5;5;6;8", "confidence": "4;3;4;3", "soundness": "2;1;3;4", "novelty": "2;2;3;4", "presentation": "2;2;3;4", "wc_summary": "60;67;77;239", "wc_strengths": "53;9;70;29", "wc_weaknesses": "147;88;120;73", "wc_questions": "43;50;86;201", "wc_limitations": "14;1;32;8", "wc_review": "317;215;385;550", "wc_reply_reviewers": "85;0;199;191", "wc_reply_authors": "537;0;524;245", "reply_reviewers": "1;0;2;1", "reply_authors": "2;1;3;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 110.75, 74.29123434160991 ], "wc_strengths_avg": [ 40.25, 23.1880896151451 ], "wc_weaknesses_avg": [ 107.0, 28.661821295933027 ], "wc_questions_avg": [ 95.0, 63.33640343435993 ], "wc_limitations_avg": [ 13.75, 11.497282287566918 ], "wc_review_avg": [ 366.75, 121.87775637908666 ], "wc_reply_reviewers_avg": [ 118.75, 82.00724053399188 ], "wc_reply_authors_avg": [ 326.5, 221.6760023096772 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16707777966402408652&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Mitigating Test-Time Bias for Fair Image Retrieval", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71886", "id": "Mxhb2lCOKL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e24570da4fa1c005b189104250993aee-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Mxhb2lCOKL", "openreview": "https://openreview.net/forum?id=Mxhb2lCOKL", "poster": "/media/PosterPDFs/NeurIPS%202023/71886.png?t=1702958143.1108043", "slides": "https://nips.cc/virtual/2023/poster/71886", "video": "https://nips.cc/virtual/2023/poster/71886", "author_site": "Fanjie Kong, Shuai Yuan, Weituo Hao, Ricardo Henao", "tldr": "", "abstract": "We address the challenge of generating fair and unbiased image retrieval results given neutral textual queries (with no explicit gender or race connotations), while maintaining the utility (performance) of the underlying vision-language (VL) model. Previous methods aim to disentangle learned representations of images and text queries from gender and racial characteristics. However, we show these are inadequate at alleviating bias for the desired equal representation result, as there usually exists test-time bias in the target retrieval set. So motivated, we introduce a straightforward technique, Post-hoc Bias Mitigation (PBM), that post-processes the outputs from the pre-trained vision-language model. We evaluate our algorithm on real-world image search datasets, Occupation 1 and 2, as well as two large-scale image-text datasets, MS-COCO and Flickr30k. Our approach achieves the lowest bias, compared with various existing bias-mitigation methods, in text-based image retrieval result while maintaining satisfactory retrieval performance. The source code is publicly available at \\url{https://github.com/timqqt/Fair_Text_based_Image_Retrieval}.", "keywords": "Vision-language;Fairness;Text-based Image Retrieval;Deep Learning;Application", "primary_area": "", "supplementary_material": "/attachment/b8da1af60e1b108c88338fe791ac09a021458381.zip", "author": "Fanjie Kong;Shuai Yuan;Weituo Hao;Ricardo Henao", "authorids": "~Fanjie_Kong1;~Shuai_Yuan3;weituohao@tiktok.com;~Ricardo_Henao1", "gender": "M;M;;M", "homepage": "https://github.com/timqqt;https://shuaiyuan1996.github.io/home/;;http://rhenaog.github.io", "dblp": "197/2743;19/1243;;27/3207", "google_scholar": ";uViWK0EAAAAJ;;p_mm4-YAAAAJ", "orcid": ";0000-0003-4039-0464;;0000-0003-4980-845X", "linkedin": ";;;", "or_profile": "~Fanjie_Kong1;~Shuai_Yuan3;weituohao@tiktok.com;~Ricardo_Henao1", "aff": "Duke University;Duke University;;King Abdullah University of Science and Technology", "aff_domain": "duke.edu;cs.duke.edu;;kaust.edu.sa", "position": "PhD student;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nkong2023mitigating,\ntitle={Mitigating Test-Time Bias for Fair Image Retrieval},\nauthor={Fanjie Kong and Shuai Yuan and Weituo Hao and Ricardo Henao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Mxhb2lCOKL}\n}", "github": "", "project": "", "reviewers": "PMeK;VHZT;TTMH;KCNe", "pdf_size": 836082, "rating": "6;6;6;6", "confidence": "4;3;4;4", "soundness": "3;3;1;3", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "135;125;161;107", "wc_strengths": "60;27;61;138", "wc_weaknesses": "300;82;213;277", "wc_questions": "364;134;193;32", "wc_limitations": "237;12;68;96", "wc_review": "1096;380;696;650", "wc_reply_reviewers": "226;51;138;91", "wc_reply_authors": "736;32;37;44", "reply_reviewers": "2;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 132.0, 19.519221295943137 ], "wc_strengths_avg": [ 71.5, 40.75843470988551 ], "wc_weaknesses_avg": [ 218.0, 84.74373133158582 ], "wc_questions_avg": [ 180.75, 120.46031504192574 ], "wc_limitations_avg": [ 103.25, 82.93182441017441 ], "wc_review_avg": [ 705.5, 255.73961366984193 ], "wc_reply_reviewers_avg": [ 126.5, 65.17860078277225 ], "wc_reply_authors_avg": [ 212.25, 302.4172407452988 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8145292127373911666&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "duke.edu;cs.duke.edu;;kaust.edu.sa", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Duke University;King Abdullah University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.duke.edu;https://www.kast.kau.edu.sa", "aff_unique_abbr": "Duke;KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Saudi Arabia" }, { "title": "CORL: Research-oriented Deep Offline Reinforcement Learning Library", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73613", "id": "MzZcXPeqcU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/62d2cec62b7fd46dd35fa8f2d4aeb52d-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=MzZcXPeqcU", "openreview": "https://openreview.net/forum?id=MzZcXPeqcU", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73613", "video": "https://nips.cc/virtual/2023/poster/73613", "author_site": "Denis Tarasov, Alexander Nikulin, Dmitry Akimov, Vladislav Kurenkov, Sergey Kolesnikov", "tldr": "", "abstract": "CORL is an open-source library that provides thoroughly benchmarked single-file implementations of both deep offline and offline-to-online reinforcement learning algorithms. It emphasizes a simple developing experience with a straightforward codebase and a modern analysis tracking tool. In CORL, we isolate methods implementation into separate single files, making performance-relevant details easier to recognize. Additionally, an experiment tracking feature is available to help log metrics, hyperparameters, dependencies, and more to the cloud. Finally, we have ensured the reliability of the implementations by benchmarking commonly employed D4RL datasets providing a transparent source of results that can be reused for robust evaluation tools such as performance profiles, probability of improvement, or expected online performance.", "keywords": "Offline Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/cc51a40541d49aad5104af5eabbe124e3969e8ff.pdf", "author": "Denis Tarasov;Alexander Nikulin;Dmitry Akimov;Vladislav Kurenkov;Sergey Kolesnikov", "authorids": "~Denis_Tarasov1;~Alexander_Nikulin1;~Dmitry_Akimov2;~Vladislav_Kurenkov1;~Sergey_Kolesnikov1", "gender": ";M;;M;M", "homepage": "https://dt6a.github.io/;https://howuhh.github.io/;;https://vkurenkov.me;https://scitator.com", "dblp": "255/7697;314/6349;;251/9126;191/1945", "google_scholar": "LQcCkD8AAAAJ;yACvnqUAAAAJ;l7lXoM4AAAAJ;w09vtVsAAAAJ;iukbpVEAAAAJ", "orcid": "0000-0001-9744-5265;;;0000-0003-4078-1086;", "linkedin": "tarasovdeal/;;;;scitator/", "or_profile": "~Denis_Tarasov1;~Alexander_Nikulin1;~Dmitry_Akimov2;~Vladislav_Kurenkov1;~Sergey_Kolesnikov1", "aff": "Jacobs University Bremen;Higher School of Economics, Higher School of Economics;Tinkoff;Tinkoff;Tinkoff", "aff_domain": "jacobs-university.de;edu.hse.ru;tinkoff.ai;tinkoff.ai;tinkoff.ru", "position": "Undergrad student;MS student;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\ntarasov2023corl,\ntitle={{CORL}: Research-oriented Deep Offline Reinforcement Learning Library},\nauthor={Denis Tarasov and Alexander Nikulin and Dmitry Akimov and Vladislav Kurenkov and Sergey Kolesnikov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=MzZcXPeqcU}\n}", "github": "", "project": "", "reviewers": "KZpC;ueDk;JMRf;aaf5;BnL5", "pdf_size": 3897760, "rating": "6;7;7;7;8", "confidence": "3;4;4;4;4", "wc_summary_and_contributions": "92;77;109;61;100", "wc_strengths": "48;51;64;120;82", "wc_improvement": "148;51;115;85;90", "wc_limitations": "9;1;267;1;1", "wc_correctness": "11;1;14;7;1", "wc_clarity": "11;31;50;5;1", "wc_relation_to_prior_work": "13;1;72;1;1", "wc_documentation": "27;1;96;8;8", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "360;215;788;289;285", "wc_reply_reviewers": "23;9;28;0;21", "wc_reply_authors": "456;307;724;408;449", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 0.6324555320336759 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 87.8, 17.03408347989407 ], "wc_strengths_avg": [ 73.0, 26.38181191654584 ], "wc_improvement_avg": [ 97.8, 32.344396732664535 ], "wc_limitations_avg": [ 55.8, 105.64544476691836 ], "wc_correctness_avg": [ 6.8, 5.230678732248808 ], "wc_clarity_avg": [ 19.6, 18.369540005128055 ], "wc_relation_to_prior_work_avg": [ 17.6, 27.594202289611488 ], "wc_documentation_avg": [ 28.0, 35.08275929855005 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 387.4, 205.48537660865313 ], "wc_reply_reviewers_avg": [ 16.2, 10.225458424931373 ], "wc_reply_authors_avg": [ 468.8, 138.24528925066488 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7905694150420949, "gs_citation": 102, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3611783950490269193&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "jacobs-university.de;edu.hse.ru;tinkoff.ai;tinkoff.ai;tinkoff.ru", "author_num": 5, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "Jacobs University;Higher School of Economics;Tinkoff Bank", "aff_unique_dep": ";;", "aff_unique_url": "https://www.jacobs-university.de;https://www.hse.ru;https://www.tinkoff.ru", "aff_unique_abbr": "JUB;HSE;Tinkoff", "aff_campus_unique_index": "0", "aff_campus_unique": "Bremen;", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Germany;Russian Federation" }, { "title": "A Holistic Approach to Unifying Automatic Concept Extraction and Concept Importance Estimation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71885", "id": "MziFFGjpkb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/abf3682c9cf9245a0294a4bebe4544ff-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=MziFFGjpkb", "openreview": "https://openreview.net/forum?id=MziFFGjpkb", "poster": "/media/PosterPDFs/NeurIPS%202023/71885.png?t=1701676548.035762", "slides": "https://nips.cc/virtual/2023/poster/71885", "video": "https://nips.cc/virtual/2023/poster/71885", "author_site": "Thomas FEL, Victor Boutin, Louis B\u00e9thune, Louis B\u00e9thune, Remi Cadene, Mazda Moayeri, L\u00e9o And\u00e9ol, Mathieu Chalvidal, Thomas Serre", "tldr": "", "abstract": "In recent years, concept-based approaches have emerged as some of the most promising explainability methods to help us interpret the decisions of Artificial Neural Networks (ANNs). These methods seek to discover intelligible visual ``concepts'' buried within the complex patterns of ANN activations in two key steps: (1) concept extraction followed by (2) importance estimation. While these two steps are shared across methods, they all differ in their specific implementations. Here, we introduce a unifying theoretical framework that recast the first step -- concept extraction problem -- as a special case of **dictionary learning**, and we formalize the second step -- concept importance estimation -- as a more general form of **attribution method**.\nThis framework offers several advantages as it allows us: (i) to propose new evaluation metrics for comparing different concept extraction approaches; (ii) to leverage modern attribution methods and evaluation metrics to extend and systematically evaluate state-of-the-art concept-based approaches and importance estimation techniques; (iii) to derive theoretical guarantees regarding the optimality of such methods. \n\nWe further leverage our framework to try to tackle a crucial question in explainability: how to *efficiently* identify clusters of data points that are classified based on a similar shared strategy.\nTo illustrate these findings and to highlight the main strategies of a model, we introduce a visual representation called the strategic cluster graph. Finally, we present Lens, a dedicated website that offers a complete compilation of these visualizations for all classes of the ImageNet dataset.", "keywords": "Explainable AI;Concept-based explainability;Interpretability;Concept extraction;Concept importance;Attribution methods", "primary_area": "", "supplementary_material": "/attachment/ed855997c1ffe9b478e9cbff5e8860f495c0bc6d.pdf", "author": "Thomas FEL;Victor Boutin;Louis B\u00e9thune;Remi Cadene;Mazda Moayeri;L\u00e9o And\u00e9ol;Mathieu Chalvidal;Thomas Serre", "authorids": "~Thomas_FEL1;~Victor_Boutin2;~Louis_B\u00e9thune1;~Remi_Cadene1;~Mazda_Moayeri1;~L\u00e9o_And\u00e9ol1;~Mathieu_Chalvidal1;~Thomas_Serre1", "gender": "M;M;M;M;;M;M;M", "homepage": "https://thomasfel.me;;https://louis-bethune.fr/;http://remicadene.com;https://www.cs.umd.edu/people/mmoayeri;;https://serre-lab.clps.brown.edu/;https://leo.andeol.eu", "dblp": "274/2390;228/3333;270/0797;;261/8493;258/0419;;248/3518", "google_scholar": "1m5Mlx4AAAAJ;Z-YF5FsAAAAJ;1zvpCDcAAAAJ;2n5nHU4AAAAJ;4f4m6O0AAAAJ;LB9Moj8AAAAJ;kZlPW4wAAAAJ;SoOpehAAAAAJ", "orcid": ";0000-0003-3372-5940;0000-0003-1498-8251;;;;;0000-0002-8704-4748", "linkedin": ";;;;;;;", "or_profile": "~Thomas_FEL1;~Victor_Boutin2;~Louis_B\u00e9thune1;~Remi_Cadene1;~Mazda_Moayeri1;~Mathieu_Chalvidal1;~Thomas_Serre1;~Leo_Andeol1", "aff": "Brown University;Brown University;Institut de Recherche en Informatique de Toulouse;;University of Maryland, College Park;Brown University;Universit\u00e9 de Toulouse;Institut de Math\u00e9matique de Toulouse", "aff_domain": "brown.edu;brown.edu;irit.fr;;umd.edu;brown.edu;univ-toulouse.fr;math.univ-toulouse.fr", "position": "PhD student;Postdoc;PhD student;;PhD student;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nfel2023a,\ntitle={A Holistic Approach to Unifying Automatic Concept Extraction and Concept Importance Estimation},\nauthor={Thomas FEL and Victor Boutin and Louis B{\\'e}thune and Remi Cadene and Mazda Moayeri and L{\\'e}o And{\\'e}ol and Mathieu Chalvidal and Thomas Serre},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=MziFFGjpkb}\n}", "github": "", "project": "", "reviewers": "hQqh;d9UA;Y6po;xA3z", "pdf_size": 6714058, "rating": "5;7;7;7", "confidence": "4;3;3;5", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;4;4", "wc_summary": "83;168;149;117", "wc_strengths": "85;234;179;45", "wc_weaknesses": "240;320;182;120", "wc_questions": "496;772;19;32", "wc_limitations": "45;76;26;6", "wc_review": "949;1570;555;320", "wc_reply_reviewers": "363;337;0;35", "wc_reply_authors": "0;855;0;0", "reply_reviewers": "1;2;0;1", "reply_authors": "1;3;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 129.25, 32.3293597214668 ], "wc_strengths_avg": [ 135.75, 74.72407577213652 ], "wc_weaknesses_avg": [ 215.5, 73.76143979071992 ], "wc_questions_avg": [ 329.75, 319.54841182518805 ], "wc_limitations_avg": [ 38.25, 25.791229129298976 ], "wc_review_avg": [ 848.5, 473.3172825917093 ], "wc_reply_reviewers_avg": [ 183.75, 166.96313215797073 ], "wc_reply_authors_avg": [ 213.75, 370.2258601178475 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11294492153770817871&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "brown.edu;brown.edu;irit.fr;;umd.edu;brown.edu;univ-toulouse.fr;math.univ-toulouse.fr", "author_num": 8, "aff_unique_index": "0;0;1;2;0;3;4", "aff_unique_norm": "Brown University;Institut de Recherche en Informatique de Toulouse;University of Maryland;Universit\u00e9 de Toulouse;Institut de Math\u00e9matique de Toulouse", "aff_unique_dep": ";Informatique;;;Math\u00e9matique", "aff_unique_url": "https://www.brown.edu;https://www.irit.fr;https://www/umd.edu;https://www.univ-toulouse.fr;https://www.imtoulouse.fr", "aff_unique_abbr": "Brown;IRIT;UMD;UT;IMT", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;1;0;0;1;1", "aff_country_unique": "United States;France" }, { "title": "Theoretical Analysis of the Inductive Biases in Deep Convolutional Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71884", "id": "N0KwVdaaaJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eb1bad7a84ef68a64f1afd6577725d45-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=N0KwVdaaaJ", "openreview": "https://openreview.net/forum?id=N0KwVdaaaJ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71884", "video": "https://nips.cc/virtual/2023/poster/71884", "author_site": "Zihao Wang, Lei Wu", "tldr": "", "abstract": "In this paper, we provide a theoretical analysis of the inductive biases in convolutional neural networks (CNNs). We start by examining the universality of CNNs, i.e., the ability to approximate any continuous functions. We prove that a depth of $\\mathcal{O}(\\log d)$ suffices for deep CNNs to achieve this universality, where $d$ in the input dimension. Additionally, we establish that learning sparse functions with CNNs requires only $\\widetilde{\\mathcal{O}}(\\log^2d)$ samples, indicating that deep CNNs can efficiently capture {\\em long-range} sparse correlations. These results are made possible through a novel combination of the multichanneling and downsampling when increasing the network depth. \n\nWe also delve into the distinct roles of weight sharing and locality in CNNs. To this end, we compare the performance of CNNs, locally-connected networks (LCNs), and fully-connected networks (FCNs) on a simple regression task, where LCNs can be viewed as CNNs without weight sharing. On the one hand, we prove that LCNs require ${\\Omega}(d)$ samples while CNNs need only $\\widetilde{\\mathcal{O}}(\\log^2d)$ samples, highlighting the critical role of weight sharing. On the other hand, we prove that FCNs require $\\Omega(d^2)$ samples, whereas LCNs need only $\\widetilde{\\mathcal{O}}(d)$ samples, underscoring the importance of locality. These provable separations quantify the difference between the two biases, and the major observation behind our proof is that weight sharing and locality break different symmetries in the learning process.", "keywords": "Convolutional neural network;Inductive bias;Universality;Sparse function;Equivariance group", "primary_area": "", "supplementary_material": "", "author": "Zihao Wang;Lei Wu", "authorids": "~Zihao_Wang25;~Lei_Wu1", "gender": "Not Specified;M", "homepage": ";https://leiwu0.github.io/", "dblp": ";", "google_scholar": "GMvmr8QAAAAJ;CMweeYcAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Zihao_Wang25;~Lei_Wu1", "aff": "Peking University;Peking University", "aff_domain": "pku.edu.cn;math.pku.edu.cn", "position": "Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nwang2023theoretical,\ntitle={Theoretical Analysis of the Inductive Biases in Deep Convolutional Networks},\nauthor={Zihao Wang and Lei Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=N0KwVdaaaJ}\n}", "github": "", "project": "", "reviewers": "mzq4;o2NN;eXvS;35ph", "pdf_size": 669921, "rating": "5;6;7;7", "confidence": "3;3;5;4", "soundness": "2;4;3;3", "novelty": "2;2;3;3", "presentation": "2;4;3;3", "wc_summary": "51;49;60;81", "wc_strengths": "81;105;121;91", "wc_weaknesses": "150;104;98;381", "wc_questions": "90;35;26;267", "wc_limitations": "1;18;9;15", "wc_review": "373;311;314;835", "wc_reply_reviewers": "440;0;0;451", "wc_reply_authors": "1208;78;0;764", "reply_reviewers": "2;0;0;3", "reply_authors": "4;2;1;3", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 60.25, 12.676257334087218 ], "wc_strengths_avg": [ 99.5, 15.058220346375597 ], "wc_weaknesses_avg": [ 183.25, 115.92966617738533 ], "wc_questions_avg": [ 104.5, 96.96519994307236 ], "wc_limitations_avg": [ 10.75, 6.49519052838329 ], "wc_review_avg": [ 458.25, 218.91707905049344 ], "wc_reply_reviewers_avg": [ 222.75, 222.78394803037315 ], "wc_reply_authors_avg": [ 512.5, 499.6045936538214 ], "reply_reviewers_avg": [ 1.25, 1.299038105676658 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8181818181818182, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4421882854386005288&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;math.pku.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Active Learning-Based Species Range Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71883", "id": "N0m9c0FqUV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/82eec786fdfbbfa53450c5feb7d1ac92-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=N0m9c0FqUV", "openreview": "https://openreview.net/forum?id=N0m9c0FqUV", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71883", "video": "https://nips.cc/virtual/2023/poster/71883", "author_site": "Christian Lange, Elijah Cole, Grant Van Horn, Grant Horn, Oisin Mac Aodha", "tldr": "", "abstract": "We propose a new active learning approach for efficiently estimating the geographic range of a species from a limited number of on the ground observations. We model the range of an unmapped species of interest as the weighted combination of estimated ranges obtained from a set of different species. We show that it is possible to generate this candidate set of ranges by using models that have been trained on large weakly supervised community collected observation data. From this, we develop a new active querying approach that sequentially selects geographic locations to visit that best reduce our uncertainty over an unmapped species\u2019 range. We conduct a detailed evaluation of our approach and compare it to existing active learning methods using an evaluation dataset containing expert-derived ranges for one thousand species. Our results demonstrate that our method outperforms alternative active learning methods and approaches the performance of end-to-end trained models, even when only using a fraction of the data. This highlights the utility of active learning via transfer learned spatial representations for species range estimation. It also emphasizes the value of leveraging emerging large-scale crowdsourced datasets, not only for modeling a species' range, but also for actively discovering them.", "keywords": "species range estimation;active learning;implicit networks", "primary_area": "", "supplementary_material": "", "author": "Christian Lange;Elijah Cole;Grant Van Horn;Oisin Mac Aodha", "authorids": "~Christian_Lange1;~Elijah_Cole1;~Grant_Van_Horn1;~Oisin_Mac_Aodha4", "gender": "M;M;M;M", "homepage": "https://chris-lange.github.io/;https://elijahcole.me/;https://gvh.codes/;https://homepages.inf.ed.ac.uk/omacaod/", "dblp": ";195/2520;144/8033;90/8653", "google_scholar": "ibwKxpwAAAAJ;-atuVWQAAAAJ;PxYY_nsAAAAJ;IfZBjkUAAAAJ", "orcid": "0009-0008-3907-5057;0000-0001-6623-0966;0000-0003-2953-9651;0000-0002-5787-5073", "linkedin": "christian-lange-38a24a2a9/;elicole/;;oisin-mac-aodha-406273273/", "or_profile": "~Christian_Lange1;~Elijah_Cole1;~Grant_Van_Horn1;~Oisin_Mac_Aodha2", "aff": "University of Edinburgh, University of Edinburgh;California Institute of Technology;Cornell University;University of Edinburgh, University of Edinburgh", "aff_domain": "ed.ac.uk;caltech.edu;cornell.edu;ed.ac.uk", "position": "Researcher;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nlange2023active,\ntitle={Active Learning-Based Species Range Estimation},\nauthor={Christian Lange and Elijah Cole and Grant Van Horn and Oisin Mac Aodha},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=N0m9c0FqUV}\n}", "github": "", "project": "", "reviewers": "B3F8;RtuW;yFB1;WCfR;yX8t", "pdf_size": 12202826, "rating": "4;6;6;6;7", "confidence": "4;4;3;4;3", "soundness": "4;4;3;4;3", "novelty": "2;3;2;3;3", "presentation": "4;4;3;4;4", "wc_summary": "132;97;58;49;774", "wc_strengths": "102;189;56;31;147", "wc_weaknesses": "184;118;170;85;87", "wc_questions": "158;96;55;36;77", "wc_limitations": "13;6;73;53;15", "wc_review": "589;506;412;254;1100", "wc_reply_reviewers": "231;29;23;20;29", "wc_reply_authors": "434;0;0;37;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_avg": [ 222.0, 277.58025866404836 ], "wc_strengths_avg": [ 105.0, 57.803114102961615 ], "wc_weaknesses_avg": [ 128.8, 41.296004649360455 ], "wc_questions_avg": [ 84.4, 41.98380640199266 ], "wc_limitations_avg": [ 32.0, 26.260236099471765 ], "wc_review_avg": [ 572.2, 286.40977636945286 ], "wc_reply_reviewers_avg": [ 66.4, 82.37378223682582 ], "wc_reply_authors_avg": [ 94.2, 170.50325510089243 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5833333333333334, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5002597035014818654&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ed.ac.uk;caltech.edu;cornell.edu;ed.ac.uk", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Edinburgh;California Institute of Technology;Cornell University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ed.ac.uk;https://www.caltech.edu;https://www.cornell.edu", "aff_unique_abbr": "Edinburgh;Caltech;Cornell", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pasadena", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Proportional Response: Contextual Bandits for Simple and Cumulative Regret Minimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71882", "id": "N1feehMSG9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6058d0c628a03fd95dfe5c72cbdf9e64-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=N1feehMSG9", "openreview": "https://openreview.net/forum?id=N1feehMSG9", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71882", "video": "https://nips.cc/virtual/2023/poster/71882", "author_site": "Sanath Kumar Krishnamurthy, Ruohan Zhan, Susan Athey, Emma Brunskill", "tldr": "", "abstract": "In many applications, e.g. in healthcare and e-commerce, the goal of a contextual bandit may be to learn an optimal treatment assignment policy at the end of the experiment. That is, to minimize simple regret. However, this objective remains understudied. We propose a new family of computationally efficient bandit algorithms for the stochastic contextual bandit setting, where a tuning parameter determines the weight placed on cumulative regret minimization (where we establish near-optimal minimax guarantees) versus simple regret minimization (where we establish state-of-the-art guarantees). Our algorithms work with any function class, are robust to model misspecification, and can be used in continuous arm settings. This flexibility comes from constructing and relying on \u201cconformal arm sets\" (CASs). CASs provide a set of arms for every context, encompassing the context-specific optimal arm with a certain probability across the context distribution. Our positive results on simple and cumulative regret guarantees are contrasted with a negative result, which shows that no algorithm can achieve instance-dependent simple regret guarantees while simultaneously achieving minimax optimal cumulative regret guarantees.", "keywords": "Contextual Bandits; Adaptive Experimentation; Simple Regret; Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/9f234d894e232011a6a00ff0983954b8a5f43b52.pdf", "author": "Sanath Kumar Krishnamurthy;Ruohan Zhan;Susan Athey;Emma Brunskill", "authorids": "~Sanath_Kumar_Krishnamurthy1;~Ruohan_Zhan1;~Susan_Athey1;~Emma_Brunskill2", "gender": ";F;F;", "homepage": "https://sites.google.com/view/sanath-kumar/home;https://ruohanzhan.github.io;https://athey.people.stanford.edu/;", "dblp": ";;59/6032;", "google_scholar": "lw7Zo2gAAAAJ;;UdaJi94AAAAJ;", "orcid": ";;0000-0001-6934-562X;", "linkedin": ";;;", "or_profile": "~Sanath_Kumar_Krishnamurthy1;~Ruohan_Zhan1;~Susan_Athey1;~Emma_Brunskill2", "aff": "Stanford University;Hong Kong University of Science and Technology;Stanford University;", "aff_domain": "stanford.edu;ust.hk;stanford.edu;", "position": "PhD student;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nkrishnamurthy2023proportional,\ntitle={Proportional Response: Contextual Bandits for Simple and Cumulative Regret Minimization},\nauthor={Sanath Kumar Krishnamurthy and Ruohan Zhan and Susan Athey and Emma Brunskill},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=N1feehMSG9}\n}", "github": "", "project": "", "reviewers": "G9eA;VVeg;FhEq;X2Xo", "pdf_size": 384287, "rating": "5;6;7;8", "confidence": "4;3;4;4", "soundness": "2;2;3;3", "novelty": "2;2;3;4", "presentation": "2;1;3;3", "wc_summary": "86;59;57;47", "wc_strengths": "60;47;76;86", "wc_weaknesses": "88;267;98;138", "wc_questions": "130;47;4;3", "wc_limitations": "16;1;5;1", "wc_review": "380;421;240;275", "wc_reply_reviewers": "306;27;33;10", "wc_reply_authors": "178;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 62.25, 14.446020213193666 ], "wc_strengths_avg": [ 67.25, 14.922717580923388 ], "wc_weaknesses_avg": [ 147.75, 71.34554996634338 ], "wc_questions_avg": [ 46.0, 51.647846034466916 ], "wc_limitations_avg": [ 5.75, 6.139014578904337 ], "wc_review_avg": [ 329.0, 73.99662154450026 ], "wc_reply_reviewers_avg": [ 94.0, 122.68863028007118 ], "wc_reply_authors_avg": [ 44.5, 77.07626093681505 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17804232389296442587&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "stanford.edu;ust.hk;stanford.edu;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Stanford University;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.ust.hk", "aff_unique_abbr": "Stanford;HKUST", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Stanford;Hong Kong SAR", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "title": "Neural Multi-Objective Combinatorial Optimization with Diversity Enhancement", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71881", "id": "N4JkStI1fe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7b5ae891000049b91b3b62de596b1560-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=N4JkStI1fe", "openreview": "https://openreview.net/forum?id=N4JkStI1fe", "poster": "/media/PosterPDFs/NeurIPS%202023/71881.png?t=1699954674.004879", "slides": "https://nips.cc/virtual/2023/poster/71881", "video": "https://nips.cc/virtual/2023/poster/71881", "author_site": "Jinbiao Chen, Zizhen Zhang, Zhiguang Cao, Yaoxin Wu, Yining Ma, Te Ye, Jiahai Wang", "tldr": "", "abstract": "Most of existing neural methods for multi-objective combinatorial optimization (MOCO) problems solely rely on decomposition, which often leads to repetitive solutions for the respective subproblems, thus a limited Pareto set. Beyond decomposition, we propose a novel neural heuristic with diversity enhancement (NHDE) to produce more Pareto solutions from two perspectives. On the one hand, to hinder duplicated solutions for different subproblems, we propose an indicator-enhanced deep reinforcement learning method to guide the model, and design a heterogeneous graph attention mechanism to capture the relations between the instance graph and the Pareto front graph. On the other hand, to excavate more solutions in the neighborhood of each subproblem, we present a multiple Pareto optima strategy to sample and preserve desirable solutions. Experimental results on classic MOCO problems show that our NHDE is able to generate a Pareto front with higher diversity, thereby achieving superior overall performance. Moreover, our NHDE is generic and can be applied to different neural methods for MOCO.", "keywords": "neural heuristic;diversity enhancement;deep reinforcement learning;multi-objective combinatorial optimization", "primary_area": "", "supplementary_material": "/attachment/b4d394abab050a5baeb2c3e6413dced977437980.pdf", "author": "Jinbiao Chen;Zizhen Zhang;Zhiguang Cao;Yaoxin Wu;Yining Ma;Te Ye;Jiahai Wang", "authorids": "~Jinbiao_Chen1;~Zizhen_Zhang1;~Zhiguang_Cao1;~Yaoxin_Wu2;~Yining_Ma1;~Te_Ye1;~Jiahai_Wang1", "gender": "M;;M;M;M;M;M", "homepage": ";;https://zhiguangcaosg.github.io/;https://yining043.github.io/;;;https://research.tue.nl/en/persons/yaoxin-wu", "dblp": ";45/9055;178/8621;160/6245-1;;00/2989;192/4964", "google_scholar": ";;https://scholar.google.com.sg/citations?user=2R-cOkYAAAAJ;4_VyBTsAAAAJ;;;0qRnmK8AAAAJ", "orcid": "0000-0001-7417-0430;;0000-0002-4499-759X;0000-0002-6639-8547;0000-0001-8152-9931;;0000-0002-3625-6599", "linkedin": ";;;yiningma/;;;", "or_profile": "~Jinbiao_Chen1;~Zizhen_Zhang1;~Zhiguang_Cao1;~Yining_Ma1;~Te_Ye1;~Jiahai_Wang1;~YAOXIN_WU1", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Institute for Infocomm Research, A*STAR;National University of Singapore;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Eindhoven University of Technology", "aff_domain": "sysu.edu.cn;sysu.edu.cn;i2r.a-star.edu.sg;u.nus.edu;sysu.edu.cn;sysu.edu.cn;tue.nl", "position": "PhD student;Associate Professor;Scientist ;PhD student;MS student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2023neural,\ntitle={Neural Multi-Objective Combinatorial Optimization with Diversity Enhancement},\nauthor={Jinbiao Chen and Zizhen Zhang and Zhiguang Cao and Yaoxin Wu and Yining Ma and Te Ye and Jiahai Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=N4JkStI1fe}\n}", "github": "", "project": "", "reviewers": "ETpH;DbfB;TPc1;fppw", "pdf_size": 608691, "rating": "5;5;6;6", "confidence": "3;4;4;3", "soundness": "3;2;2;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "65;111;130;93", "wc_strengths": "34;129;71;63", "wc_weaknesses": "58;394;324;13", "wc_questions": "294;39;26;73", "wc_limitations": "14;15;12;1", "wc_review": "465;688;563;243", "wc_reply_reviewers": "16;95;26;14", "wc_reply_authors": "14;66;14;14", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 99.75, 23.951774464536026 ], "wc_strengths_avg": [ 74.25, 34.47734763580284 ], "wc_weaknesses_avg": [ 197.25, 164.4040373591841 ], "wc_questions_avg": [ 108.0, 108.74971264329851 ], "wc_limitations_avg": [ 10.5, 5.5901699437494745 ], "wc_review_avg": [ 489.75, 162.91619778278647 ], "wc_reply_reviewers_avg": [ 37.75, 33.36446462930284 ], "wc_reply_authors_avg": [ 27.0, 22.516660498395403 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=161400816181701682&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "sysu.edu.cn;sysu.edu.cn;i2r.a-star.edu.sg;u.nus.edu;sysu.edu.cn;sysu.edu.cn;tue.nl", "author_num": 7, "aff_unique_index": "0;0;1;2;0;0;3", "aff_unique_norm": "Sun Yat-sen University;Institute for Infocomm Research;National University of Singapore;Eindhoven University of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.sysu.edu.cn;https://www.i2r.a-star.edu.sg;https://www.nus.edu.sg;https://www.tue.nl", "aff_unique_abbr": "SYSU;I2R;NUS;TU/e", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;0;2", "aff_country_unique": "China;Singapore;Netherlands" }, { "title": "PackQViT: Faster Sub-8-bit Vision Transformers via Full and Packed Quantization on the Mobile", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71880", "id": "N56hAiQvot", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1c92edb990a05f2269f0cc3afbb4c952-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=N56hAiQvot", "openreview": "https://openreview.net/forum?id=N56hAiQvot", "poster": "/media/PosterPDFs/NeurIPS%202023/71880.png?t=1701737890.9432602", "slides": "https://nips.cc/virtual/2023/poster/71880", "video": "https://nips.cc/virtual/2023/poster/71880", "author_site": "Peiyan Dong, LEI LU, Chao Wu, Cheng Lyu, Cheng Lyu, Geng Yuan, Hao Tang, Yanzhi Wang", "tldr": "", "abstract": "While Vision Transformers (ViTs) have undoubtedly made impressive strides in computer vision (CV), their intricate network structures necessitate substantial computation and memory resources. \nA decision-making process for CV tasks typically entails performing computations with low latency, which is a tricky problem for ViT models.\nModel quantization is a widely-used technique to optimize the hardware efficiency of deep neural networks.\nFull quantization under Sub-8-bit precision, in particular, is a promising solution to reduce inference latency significantly. \nUnfortunately, current commodity hardware, such as CPUs and GPUs, still struggles to efficiently execute these sub-8-bit quantized networks, as their SIMD instructions only support a granularity of 8 bits or wider.\nAlso, there is a scarcity of literature that presents a full quantization paradigm for ViTs.\nIn this paper, we propose an activation-aware fully sub-8-bit quantization-aware training (QAT) framework called PackQViT for efficient yet accurate ViT acceleration on mobile devices to facilitate real-time AI-powered decision-making.\nSpecifically, in revisiting data activation within the ViT dataflow, two characteristics are relevant to quantization strategy and precision: the long-tailed distribution and systematic channel-wise outliers.\nIn response, we employ either log2 quantization or clipping to address the long-tailed distribution and incorporate outlier-aware training for residual link quantization to regulate the various channel-wise outliers more consistently.\nNotably, due to the systematic fixed pattern, outlier-aware training approach can predict the channel indices and regularized scales of outliers in advance, thus avoiding the runtime data-adaptive selection during inference.\nFurthermore, we employ Int-$2^{n}$-Softmax, Int-LayerNorm, and Integer GELU to enable integer-only computation flow. Finally, we develop a SIMD-based 4-bit packed multiplier to achieve end-to-end ViT acceleration on mobile phones.\nCompared to prior studies on ViT quantization using 8-bit precision, PackQViT surpasses other works by an improved accuracy ranging from 0.4\\% to 17.9\\% for various widely used ViTs on ImageNet dataset; under 4-bit precision, PackQViT demonstrates 0.4%$\\sim$2.8% higher accuracy. Compared to the baseline multiplier, our implementations on the Realme GT Android smartphone with Snapdragon 870 SoC CPU achieve 2.6x$\\sim$3.7x speedup under 8-bit scenario and 3.8x$\\sim$5.9x speedup under 4-bit which ensures practical real-time performance.", "keywords": "Vision Transformers;Quantization;Real-time on mobile;Sub-8-bit", "primary_area": "", "supplementary_material": "/attachment/65b89f0cd9928efa8c83caeee3311b8daf8e6bfc.pdf", "author": "Peiyan Dong;LEI LU;Chao Wu;Cheng Lyu;Geng Yuan;Hao Tang;Yanzhi Wang", "authorids": "~Peiyan_Dong1;~LEI_LU2;~Chao_Wu4;~Cheng_Lyu2;~Geng_Yuan1;~Hao_Tang6;~Yanzhi_Wang3", "gender": "F;M;M;F;M;M;M", "homepage": "https://peiyanflying.github.io/Peggy_Peiyan.github.io/;;;;;https://ha0tang.github.io/;https://web.northeastern.edu/yanzhiwang/", "dblp": "254/1329;;45/3158-6.html;;205/3007;07/5751-5;", "google_scholar": "OGU3CVoAAAAJ;;;;tBIAgtgAAAAJ;9zJkeEMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-0482-8994;;;0000-0001-9844-992X;0000-0002-2077-1246;", "linkedin": ";;;cheng-lyu/;;hao-tang-887475138/;", "or_profile": "~Peiyan_Dong1;~LEI_LU2;~Chao_Wu4;~Cheng_Lyu2;~Geng_Yuan1;~Hao_Tang6;~Yanzhi_Wang3", "aff": "Northeastern University;Northeastern University;Northeastern University;COCOPIE.INC;Northeastern University;ETH Zurich;Northeastern University", "aff_domain": "northeastern.edu;northeastern.edu;neu.edu;cocopie.ai;northeastern.edu;vision.ee.ethz.ch;northeastern.edu", "position": "PhD student;PhD student;Postdoc;Researcher;PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\ndong2023packqvit,\ntitle={Pack{QV}iT: Faster Sub-8-bit Vision Transformers via Full and Packed Quantization on the Mobile},\nauthor={Peiyan Dong and LEI LU and Chao Wu and Cheng Lyu and Geng Yuan and Hao Tang and Yanzhi Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=N56hAiQvot}\n}", "github": "", "project": "", "reviewers": "ysuz;ktdW;7eJH;R5sB;yEDd;QVMx", "pdf_size": 2312015, "rating": "4;5;5;6;6;7", "confidence": "4;4;5;4;4;5", "soundness": "2;2;3;3;4;3", "novelty": "2;2;3;3;3;3", "presentation": "3;2;3;3;3;2", "wc_summary": "56;85;100;36;70;102", "wc_strengths": "55;50;81;18;66;148", "wc_weaknesses": "226;208;226;26;136;56", "wc_questions": "142;73;157;9;10;76", "wc_limitations": "19;10;3;11;1;53", "wc_review": "498;426;567;100;283;435", "wc_reply_reviewers": "135;552;18;0;22;22", "wc_reply_authors": "440;700;0;0;0;0", "reply_reviewers": "1;2;1;0;1;1", "reply_authors": "3;2;1;1;1;1", "rating_avg": [ 5.5, 0.9574271077563381 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 74.83333333333333, 23.667253513850923 ], "wc_strengths_avg": [ 69.66666666666667, 39.89430480100581 ], "wc_weaknesses_avg": [ 146.33333333333334, 80.87370126039463 ], "wc_questions_avg": [ 77.83333333333333, 57.37425864928936 ], "wc_limitations_avg": [ 16.166666666666668, 17.477763650485212 ], "wc_review_avg": [ 384.8333333333333, 153.67868716542606 ], "wc_reply_reviewers_avg": [ 124.83333333333333, 196.0955691040015 ], "wc_reply_authors_avg": [ 190.0, 278.98626011567904 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 1.5, 0.7637626158259734 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.36927447293799814, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16269259409678911007&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "northeastern.edu;northeastern.edu;neu.edu;cocopie.ai;northeastern.edu;vision.ee.ethz.ch;northeastern.edu", "author_num": 7, "aff_unique_index": "0;0;0;1;0;2;0", "aff_unique_norm": "Northeastern University;COCOPIE.INC;ETH Zurich", "aff_unique_dep": ";;", "aff_unique_url": "https://www.northeastern.edu;;https://www.ethz.ch", "aff_unique_abbr": "NEU;;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;2;0", "aff_country_unique": "United States;;Switzerland" }, { "title": "Understanding and Improving Ensemble Adversarial Defense", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71879", "id": "N5uUTWLz0E", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b589d92785e39486e978fa273d0dc343-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=N5uUTWLz0E", "openreview": "https://openreview.net/forum?id=N5uUTWLz0E", "poster": "/media/PosterPDFs/NeurIPS%202023/71879.png?t=1699291553.424924", "slides": "https://nips.cc/virtual/2023/poster/71879", "video": "https://nips.cc/virtual/2023/poster/71879", "author_site": "Yian Deng, Tingting Mu", "tldr": "", "abstract": "The strategy of ensemble has become popular in adversarial defense, which trains multiple base classifiers to defend against adversarial attacks in a cooperative manner. Despite the empirical success, theoretical explanations on why an ensemble of adversarially trained classifiers is more robust than single ones remain unclear. To fill in this gap, we develop a new error theory dedicated to understanding ensemble adversarial defense, demonstrating a provable 0-1 loss reduction on challenging sample sets in adversarial defense scenarios. Guided by this theory, we propose an effective approach to improve ensemble adversarial defense, named interactive global adversarial training (iGAT). The proposal includes (1) a probabilistic distributing rule that selectively allocates to different base classifiers adversarial examples that are globally challenging to the ensemble, and (2) a regularization term to rescue the severest weaknesses of the base classifiers. Being tested over various existing ensemble adversarial defense techniques, iGAT is capable of boosting their performance by up to 17\\% evaluated using CIFAR10 and CIFAR100 datasets under both white-box and black-box attacks.", "keywords": "adversarial defense;ensemble diversity;robustness;curvature", "primary_area": "", "supplementary_material": "/attachment/c241092dcc17560b2bf981d52353eec8f1ee4c6f.pdf", "author": "Yian Deng;Tingting Mu", "authorids": "~Yian_Deng1;~Tingting_Mu1", "gender": "M;F", "homepage": "https://research.manchester.ac.uk/en/persons/yian.deng;https://personalpages.manchester.ac.uk/staff/tingting.mu/Site/About_Me.html", "dblp": "190/8626.html;89/4352", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.co.uk/citations?user=dOG10IUAAAAJ", "orcid": "0000-0002-5825-7197;", "linkedin": ";", "or_profile": "~Yian_Deng1;~Tingting_Mu1", "aff": "University of Manchester;University of Manchester", "aff_domain": "cs.manchester.ac.uk;manchester.ac.uk", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\ndeng2023understanding,\ntitle={Understanding and Improving Ensemble Adversarial Defense},\nauthor={Yian Deng and Tingting Mu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=N5uUTWLz0E}\n}", "github": "", "project": "", "reviewers": "d7H2;XCW5;Xydx;hPTz;cK43", "pdf_size": 329881, "rating": "5;5;5;6;7", "confidence": "4;2;4;3;2", "soundness": "2;3;3;2;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;2;3", "wc_summary": "117;47;193;110;94", "wc_strengths": "88;80;152;75;66", "wc_weaknesses": "141;28;192;53;87", "wc_questions": "6;52;13;227;84", "wc_limitations": "20;15;1;4;14", "wc_review": "372;222;551;469;345", "wc_reply_reviewers": "146;11;0;12;26", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 112.2, 47.199152534764856 ], "wc_strengths_avg": [ 92.2, 30.740201690945362 ], "wc_weaknesses_avg": [ 100.2, 59.50932700005941 ], "wc_questions_avg": [ 76.4, 80.38557084452408 ], "wc_limitations_avg": [ 10.8, 7.138627319029899 ], "wc_review_avg": [ 391.8, 111.9989285663037 ], "wc_reply_reviewers_avg": [ 39.0, 54.13316912947181 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5590169943749475, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13052406050369981000&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "cs.manchester.ac.uk;manchester.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Manchester", "aff_unique_dep": "", "aff_unique_url": "https://www.manchester.ac.uk", "aff_unique_abbr": "UoM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Label Robust and Differentially Private Linear Regression: Computational and Statistical Efficiency", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71878", "id": "N6FhEMnxCU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/47e74fca60b4af4846b7abab188b85f2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=N6FhEMnxCU", "openreview": "https://openreview.net/forum?id=N6FhEMnxCU", "poster": "/media/PosterPDFs/NeurIPS%202023/71878.png?t=1702333824.4736092", "slides": "https://nips.cc/virtual/2023/poster/71878", "video": "https://nips.cc/virtual/2023/poster/71878", "author_site": "Xiyang Liu, Prateek Jain, Weihao Kong, Sewoong Oh, Arun Suggala", "tldr": "", "abstract": "We study the canonical problem of linear regression under $(\\varepsilon,\\delta)$-differential privacy when the datapoints are sampled i.i.d.~from a distribution and a fraction of response variables are adversarially corrupted. We provide the first provably efficient -- both computationally and statistically -- method for this problem, assuming standard assumptions on the data distribution. Our algorithm is a variant of the popular differentially private stochastic gradient descent (DP-SGD) algorithm with two key innovations: a full-batch gradient descent to improve sample complexity and a novel adaptive clipping to guarantee robustness. Our method requires only linear time in input size, and still matches the information theoretical optimal sample complexity up to a data distribution dependent condition number factor. Interestingly, the same algorithm, when applied to a setting where there is no adversarial corruption, still improves upon the existing state-of-the-art and achieves a near optimal sample complexity.", "keywords": "Differential Privacy; Private Estimation", "primary_area": "", "supplementary_material": "/attachment/e7a644b735f236707aed43a498b61194059db9cc.pdf", "author": "Xiyang Liu;Prateek Jain;Weihao Kong;Sewoong Oh;Arun Suggala", "authorids": "~Xiyang_Liu1;~Prateek_Jain1;~Weihao_Kong1;~Sewoong_Oh1;~Arun_Suggala1", "gender": ";M;;M;M", "homepage": "https://xiyangl3.github.io/;http://prateekjain.org;https://weihaokong.github.io/;https://homes.cs.washington.edu/~sewoong/;", "dblp": ";https://dblp.uni-trier.de/pers/j/Jain_0002:Prateek.html;117/4343;80/4366;164/7327", "google_scholar": "7yobGX4AAAAJ;qYhRbJoAAAAJ;loxOHhoAAAAJ;55TAOdgAAAAJ;CKgmfDMAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Xiyang_Liu1;~Prateek_Jain1;~Weihao_Kong1;~Sewoong_Oh1;~Arun_Suggala1", "aff": "University of Washington;Google;Google;University of Washington;Google", "aff_domain": "cs.washington.edu;google.com;google.com;uw.edu;google.com", "position": "PhD student;Researcher;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nliu2023label,\ntitle={Label Robust and Differentially Private Linear Regression: Computational and Statistical Efficiency},\nauthor={Xiyang Liu and Prateek Jain and Weihao Kong and Sewoong Oh and Arun Suggala},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=N6FhEMnxCU}\n}", "github": "", "project": "", "reviewers": "yhsn;1PSa;kzNP;xavj", "pdf_size": 478539, "rating": "5;5;6;6", "confidence": "2;3;2;3", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "204;78;73;76", "wc_strengths": "67;30;28;60", "wc_weaknesses": "25;44;154;30", "wc_questions": "34;160;6;29", "wc_limitations": "15;5;1;4", "wc_review": "345;317;262;199", "wc_reply_reviewers": "20;41;11;19", "wc_reply_authors": "11;0;11;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 107.75, 55.59844871936626 ], "wc_strengths_avg": [ 46.25, 17.440971876589906 ], "wc_weaknesses_avg": [ 63.25, 52.855345046645944 ], "wc_questions_avg": [ 57.25, 60.2551864987571 ], "wc_limitations_avg": [ 6.25, 5.261891294962297 ], "wc_review_avg": [ 280.75, 55.84968665981932 ], "wc_reply_reviewers_avg": [ 22.75, 11.098986440211556 ], "wc_reply_authors_avg": [ 5.5, 5.5 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7481562709300567395&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.washington.edu;google.com;google.com;uw.edu;google.com", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "University of Washington;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.washington.edu;https://www.google.com", "aff_unique_abbr": "UW;Google", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Online Learning under Adversarial Nonlinear Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71877", "id": "N6YNe4KxDc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a6f2763089c0bd8f56006c42f09ee24c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=N6YNe4KxDc", "openreview": "https://openreview.net/forum?id=N6YNe4KxDc", "poster": "/media/PosterPDFs/NeurIPS%202023/71877.png?t=1697203272.6752408", "slides": "https://nips.cc/virtual/2023/poster/71877", "video": "https://nips.cc/virtual/2023/poster/71877", "author_site": "Pavel Kolev, Georg Martius, Michael Muehlebach", "tldr": "", "abstract": "In many applications, learning systems are required to process continuous non-stationary data streams.\nWe study this problem in an online learning framework and propose an algorithm that can deal with adversarial time-varying and nonlinear constraints.\nAs we show in our work, the algorithm called Constraint Violation Velocity Projection (CVV-Pro) achieves $\\sqrt{T}$ regret and converges to the feasible set at a rate of $1/\\sqrt{T}$, despite the fact that the feasible set is slowly time-varying and a priori unknown to the learner. \nCVV-Pro only relies on local sparse linear approximations of the feasible set and therefore avoids optimizing over the entire set at each iteration, which is in sharp contrast to projected gradients or Frank-Wolfe methods. \nWe also empirically evaluate our algorithm on two-player games, where the players are subjected to a shared constraint.", "keywords": "online learning;online convex optimization;constrained optimization;adversarial nonlinear constraints;constraint violation oracle", "primary_area": "", "supplementary_material": "/attachment/9cef40c8491f5fa8f65259dd83ca1104b60033ce.zip", "author": "Pavel Kolev;Georg Martius;Michael Muehlebach", "authorids": "~Pavel_Kolev1;~Georg_Martius1;~Michael_Muehlebach1", "gender": "M;M;", "homepage": "http://pavelkolev.github.io/;https://uni-tuebingen.de/de/264672;https://sites.google.com/view/mmuehlebach/", "dblp": "153/5818.html;47/2706;142/1129", "google_scholar": "https://scholar.google.de/citations?user=m1j0aaoAAAAJ;https://scholar.google.de/citations?user=b-JF-UIAAAAJ;uTfYBAsAAAAJ", "orcid": ";;", "linkedin": "pavel-kolev-72495b1a/;;", "or_profile": "~Pavel_Kolev1;~Georg_Martius1;~Michael_Muehlebach1", "aff": ";Max Planck Institute for Intelligent Systems;Max-Planck Institute", "aff_domain": ";tuebingen.mpg.de;mpg.de", "position": ";Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nkolev2023online,\ntitle={Online Learning under Adversarial Nonlinear Constraints},\nauthor={Pavel Kolev and Georg Martius and Michael Muehlebach},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=N6YNe4KxDc}\n}", "github": "", "project": "", "reviewers": "pGFm;7SGB;dZ67;9WyZ;TfpV", "pdf_size": 474082, "rating": "5;5;5;6;7", "confidence": "2;2;2;2;4", "soundness": "2;3;3;4;4", "novelty": "2;3;3;3;4", "presentation": "1;2;3;3;4", "wc_summary": "30;107;193;91;80", "wc_strengths": "27;66;162;49;29", "wc_weaknesses": "209;164;572;53;65", "wc_questions": "15;50;215;68;1", "wc_limitations": "6;14;53;17;1", "wc_review": "287;401;1195;278;176", "wc_reply_reviewers": "36;57;54;29;11", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 2.4, 0.8 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 100.2, 53.063735262418156 ], "wc_strengths_avg": [ 66.6, 49.785941790830876 ], "wc_weaknesses_avg": [ 212.6, 189.10378103041728 ], "wc_questions_avg": [ 69.8, 76.43925692993096 ], "wc_limitations_avg": [ 18.2, 18.301912468373352 ], "wc_review_avg": [ 467.4, 370.713150562534 ], "wc_reply_reviewers_avg": [ 37.4, 16.906803364326443 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.875, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17690451220581976166&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";tuebingen.mpg.de;mpg.de", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.", "aff_unique_dep": "Intelligent Systems;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.mpg.de", "aff_unique_abbr": "MPI-IS;MPG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "SLaM: Student-Label Mixing for Distillation with Unlabeled Examples", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71876", "id": "N7tw0QXx3z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d56b84c063265da949fe0feb815dcce8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=N7tw0QXx3z", "openreview": "https://openreview.net/forum?id=N7tw0QXx3z", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71876", "video": "https://nips.cc/virtual/2023/poster/71876", "author_site": "Vasilis Kontonis, Fotis Iliopoulos, Khoa Trinh, Cenk Baykal, Gaurav Menghani, Erik Vee", "tldr": "", "abstract": "Knowledge distillation with unlabeled examples is a powerful training paradigm for generating compact and lightweight student models in applications where the amount of labeled data is limited but one has access to a large pool of unlabeled data. In this setting, a large teacher model generates \"soft\" pseudo-labels for the unlabeled dataset which are then used for training the student model. Despite its success in a wide variety of applications, a shortcoming of this approach is that the teacher's pseudo-labels are often noisy, leading to impaired student performance. In this paper, we present a principled method for knowledge distillation with unlabeled examples that we call Student-Label Mixing (SLaM) and we show that it consistently improves over prior approaches by evaluating it on several standard benchmarks. \nFinally, we show that SLaM comes with theoretical guarantees; along the way we give an algorithm improving the best-known sample complexity for learning halfspaces with margin under random classification noise, \nand provide the first convergence analysis for so-called ``forward loss-adjustment\" methods.", "keywords": "Distillation;teacher;student", "primary_area": "", "supplementary_material": "/attachment/b7602c9a91b829ff561aa45ea165eb7e608f7a28.zip", "author": "Vasilis Kontonis;Fotis Iliopoulos;Khoa Trinh;Cenk Baykal;Gaurav Menghani;Erik Vee", "authorids": "~Vasilis_Kontonis1;~Fotis_Iliopoulos1;~Khoa_Trinh2;~Cenk_Baykal1;~Gaurav_Menghani1;~Erik_Vee1", "gender": "M;M;M;M;M;", "homepage": "http://vkonton.github.io/;http://www.filiop.org/;;https://people.csail.mit.edu/baykal/;http://gaurav.ai;", "dblp": "203/8777;147/4790;47/9680;151/9349;137/0537.html;", "google_scholar": "7_44KWAAAAAJ;v3e5F-AAAAAJ;pVTeodYAAAAJ;lRxoOlwAAAAJ;XvncD4IAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Vasilis_Kontonis1;~Fotis_Iliopoulos1;~Khoa_Trinh2;~Cenk_Baykal1;~Gaurav_Menghani1;~Erik_Vee1", "aff": ", University of Texas at Austin;Google;;Google;Google Research;", "aff_domain": "cs.utexas.edu;google.com;;google.com;google.com;", "position": "Postdoc;Researcher;;Research Scientist;Software Engineer;", "bibtex": "@inproceedings{\nkontonis2023slam,\ntitle={{SL}aM: Student-Label Mixing for Distillation with Unlabeled Examples},\nauthor={Vasilis Kontonis and Fotis Iliopoulos and Khoa Trinh and Cenk Baykal and Gaurav Menghani and Erik Vee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=N7tw0QXx3z}\n}", "github": "", "project": "", "reviewers": "7BQA;Un32;E5Wk;PQzZ", "pdf_size": 6055999, "rating": "5;5;6;7", "confidence": "4;4;3;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "65;103;66;69", "wc_strengths": "37;52;69;38", "wc_weaknesses": "426;41;171;218", "wc_questions": "3;150;74;1", "wc_limitations": "2;6;1;13", "wc_review": "533;352;381;339", "wc_reply_reviewers": "310;108;9;32", "wc_reply_authors": "576;374;8;26", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.75, 15.801503093060482 ], "wc_strengths_avg": [ 49.0, 12.98075498574717 ], "wc_weaknesses_avg": [ 214.0, 138.50812250550507 ], "wc_questions_avg": [ 57.0, 61.216827751852676 ], "wc_limitations_avg": [ 5.5, 4.716990566028302 ], "wc_review_avg": [ 401.25, 77.57053242050102 ], "wc_reply_reviewers_avg": [ 114.75, 118.53137770227764 ], "wc_reply_authors_avg": [ 246.0, 239.96249706985463 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=807242703163069443&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "cs.utexas.edu;google.com;;google.com;google.com;", "author_num": 6, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Texas at Austin;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.utexas.edu;https://www.google.com", "aff_unique_abbr": "UT Austin;Google", "aff_campus_unique_index": "0;1;1;1", "aff_campus_unique": "Austin;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Alternation makes the adversary weaker in two-player games", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71875", "id": "NBMIsOS6B7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3acb49252187efa352a1ae0e4b066ced-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NBMIsOS6B7", "openreview": "https://openreview.net/forum?id=NBMIsOS6B7", "poster": "/media/PosterPDFs/NeurIPS%202023/71875.png?t=1702237614.1854591", "slides": "https://nips.cc/virtual/2023/poster/71875", "video": "https://nips.cc/virtual/2023/poster/71875", "author_site": "Volkan Cevher, Ashok Cutkosky, Ali Kavis, Georgios Piliouras, Stratis Skoulakis, Luca Viano", "tldr": "", "abstract": "Motivated by alternating game-play in two-player games, we study an altenating variant of the \\textit{Online Linear Optimization} (OLO). In alternating OLO, a \\textit{learner} at each round $t \\in [n]$ selects a vector $x^t$ and then an \\textit{adversary} selects a cost-vector $c^t \\in [-1,1]^n$. The learner then experiences cost $(c^t + c^{t-1})^\\top x^t$ instead of $(c^t)^\\top x^t$ as in standard OLO. We establish that under this small twist, the $\\Omega(\\sqrt{T})$ lower bound on the regret is no longer valid. More precisely, we present two online learning algorithms for alternating OLO that respectively admit $\\mathcal{O}((\\log n)^{4/3} T^{1/3})$ regret for the $n$-dimensional simplex and $\\mathcal{O}(\\rho \\log T)$ regret for the ball of radius $\\rho>0$. Our results imply that in alternating game-play, an agent can always guarantee $\\mathcal{\\tilde{O}}((\\log n)^{4/3} T^{1/3})$ regardless the strategies of the other agent while the regret bound improves to $\\mathcal{O}(\\log T)$ in case the agent admits only two actions.", "keywords": "Online Learning;Regret Minimization;Game Theory", "primary_area": "", "supplementary_material": "/attachment/37b6e7258c24dc2353198c98cde9e01080bd5d98.pdf", "author": "Volkan Cevher;Ashok Cutkosky;Ali Kavis;Georgios Piliouras;Stratis Skoulakis;Luca Viano", "authorids": "~Volkan_Cevher1;~Ashok_Cutkosky1;~Ali_Kavis1;~Georgios_Piliouras1;~Stratis_Skoulakis2;~Luca_Viano1", "gender": "M;;;;M;", "homepage": "http://lions.epfl.ch;http://www.cs.stanford.edu/~ashokc;https://alikavis.github.io;;http://www.corelab.ntua.gr/~sskoul/;https://scholar.google.com/citations?hl=en&user=e9Bpg5gAAAAJ", "dblp": "70/5301;191/6725;231/7697;62/1236;183/0979.html;268/8179", "google_scholar": "https://scholar.google.ch/citations?user=hlWhzU8AAAAJ;h4AbGp0AAAAJ;sPrPq6oAAAAJ;;Juo2Tk8AAAAJ;E_dAUKEAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Volkan_Cevher1;~Ashok_Cutkosky1;~Ali_Kavis1;~Georgios_Piliouras1;~Stratis_Skoulakis2;~Luca_Viano1", "aff": "Amazon Development Center Germany;Boston University;Swiss Federal Institute of Technology Lausanne;Singapore University of Technology and Design;EPFL - EPF Lausanne;EPFL - EPF Lausanne", "aff_domain": "amazon.de;bu.edu;epfl.ch;sutd.edu.sg;epfl.ch;epfl.ch", "position": "Amazon Scholar;Assistant Professor;PhD student;Associate Professor;Postdoc;PhD student", "bibtex": "@inproceedings{\ncevher2023alternation,\ntitle={Alternation makes the adversary weaker in two-player games},\nauthor={Volkan Cevher and Ashok Cutkosky and Ali Kavis and Georgios Piliouras and Stratis Skoulakis and Luca Viano},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NBMIsOS6B7}\n}", "github": "", "project": "", "reviewers": "5x6H;gHiT;waxV;EHgn", "pdf_size": 360430, "rating": "5;6;7;7", "confidence": "3;4;4;4", "soundness": "2;4;4;3", "novelty": "3;3;4;3", "presentation": "2;4;4;3", "wc_summary": "58;108;48;78", "wc_strengths": "56;93;48;78", "wc_weaknesses": "458;133;18;239", "wc_questions": "94;72;271;30", "wc_limitations": "55;1;1;5", "wc_review": "721;407;386;430", "wc_reply_reviewers": "55;17;22;30", "wc_reply_authors": "33;28;28;35", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 73.0, 22.9128784747792 ], "wc_strengths_avg": [ 68.75, 17.795715776557007 ], "wc_weaknesses_avg": [ 212.0, 162.11261517846168 ], "wc_questions_avg": [ 116.75, 91.97655951382396 ], "wc_limitations_avg": [ 15.5, 22.863726730347352 ], "wc_review_avg": [ 486.0, 136.566833455272 ], "wc_reply_reviewers_avg": [ 31.0, 14.611639196202457 ], "wc_reply_authors_avg": [ 31.0, 3.082207001484488 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1363660897068624402&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "amazon.de;bu.edu;epfl.ch;sutd.edu.sg;epfl.ch;epfl.ch", "author_num": 6, "aff_unique_index": "0;1;2;3;4;4", "aff_unique_norm": "Amazon;Boston University;Swiss Federal Institute of Technology Lausanne;Singapore University of Technology and Design;EPFL", "aff_unique_dep": "Development Center;;;;", "aff_unique_url": "https://www.amazon.de;https://www.bu.edu;https://www.epfl.ch;https://www.sutd.edu.sg;https://www.epfl.ch", "aff_unique_abbr": "Amazon;BU;EPFL;SUTD;EPFL", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1;2;3;2;2", "aff_country_unique": "Germany;United States;Switzerland;Singapore" }, { "title": "Frequency Domain-Based Dataset Distillation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71874", "id": "NEawU0TgKG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ddbbcd937d63d5c6b935c07b1a8222ec-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NEawU0TgKG", "openreview": "https://openreview.net/forum?id=NEawU0TgKG", "poster": "/media/PosterPDFs/NeurIPS%202023/71874.png?t=1701323753.803621", "slides": "https://nips.cc/virtual/2023/poster/71874", "video": "https://nips.cc/virtual/2023/poster/71874", "author_site": "Donghyeok Shin, Seungjae Shin, Il-chul Moon", "tldr": "", "abstract": "This paper presents FreD, a novel parameterization method for dataset distillation, which utilizes the frequency domain to distill a small-sized synthetic dataset from a large-sized original dataset. Unlike conventional approaches that focus on the spatial domain, FreD employs frequency-based transforms to optimize the frequency representations of each data instance. By leveraging the concentration of spatial domain information on specific frequency components, FreD intelligently selects a subset of frequency dimensions for optimization, leading to a significant reduction in the required budget for synthesizing an instance. Through the selection of frequency dimensions based on the explained variance, FreD demonstrates both theoretical and empirical evidence of its ability to operate efficiently within a limited budget, while better preserving the information of the original dataset compared to conventional parameterization methods. Furthermore, Based on the orthogonal compatibility of FreD with existing methods, we confirm that FreD consistently improves the performances of existing distillation methods over the evaluation scenarios with different benchmark datasets. We release the code at https://github.com/sdh0818/FreD.", "keywords": "Dataset distillation;Frequency domain;Dataset condensation", "primary_area": "", "supplementary_material": "/attachment/46b238e55754f860bf6d0d3dee178e6adc81713d.pdf", "author": "DongHyeok Shin;Seungjae Shin;Il-chul Moon", "authorids": "~DongHyeok_Shin1;~Seungjae_Shin1;~Il-chul_Moon1", "gender": ";M;", "homepage": ";https://sites.google.com/view/seungjae-shin;", "dblp": ";29/551;", "google_scholar": ";https://scholar.google.com/citations?hl=en;", "orcid": ";;", "linkedin": ";seungjae-shin-hoodie/;", "or_profile": "~DongHyeok_Shin1;~Seungjae_Shin1;~Il-chul_Moon1", "aff": ";Korea Advanced Institute of Science & Technology;", "aff_domain": ";kaist.ac.kr;", "position": ";PhD student;", "bibtex": "@inproceedings{\nshin2023frequency,\ntitle={Frequency Domain-Based Dataset Distillation},\nauthor={DongHyeok Shin and Seungjae Shin and Il-chul Moon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NEawU0TgKG}\n}", "github": "", "project": "", "reviewers": "ajn2;Xfn3;5mRL;tks8", "pdf_size": 1309659, "rating": "4;5;6;8", "confidence": "4;5;4;5", "soundness": "2;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "140;52;98;76", "wc_strengths": "88;30;154;32", "wc_weaknesses": "148;119;165;123", "wc_questions": "58;2;38;23", "wc_limitations": "25;2;76;1", "wc_review": "459;205;531;255", "wc_reply_reviewers": "41;31;0;15", "wc_reply_authors": "309;43;0;35", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 91.5, 32.38440982942255 ], "wc_strengths_avg": [ 76.0, 50.695167422546305 ], "wc_weaknesses_avg": [ 138.75, 18.793283374652763 ], "wc_questions_avg": [ 30.25, 20.498475553074673 ], "wc_limitations_avg": [ 26.0, 30.422031490352513 ], "wc_review_avg": [ 362.5, 136.07626538085177 ], "wc_reply_reviewers_avg": [ 21.75, 15.610493265749165 ], "wc_reply_authors_avg": [ 96.75, 123.60496551514426 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.50709255283711, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10083146020053336396&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";kaist.ac.kr;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "title": "Monte Carlo Tree Search with Boltzmann Exploration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71873", "id": "NG4DaApavi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f670ef96387d9a5a8a51e2ed80cb148d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NG4DaApavi", "openreview": "https://openreview.net/forum?id=NG4DaApavi", "poster": "/media/PosterPDFs/NeurIPS%202023/71873.png?t=1702041264.953116", "slides": "https://nips.cc/virtual/2023/poster/71873", "video": "https://nips.cc/virtual/2023/poster/71873", "author_site": "Michael Painter, Mohamed Baioumy, Nick Hawes, Bruno Lacerda", "tldr": "", "abstract": "Monte-Carlo Tree Search (MCTS) methods, such as Upper Confidence Bound applied to Trees (UCT), are instrumental to automated planning techniques. However, UCT can be slow to explore an optimal action when it initially appears inferior to other actions. Maximum ENtropy Tree-Search (MENTS) incorporates the maximum entropy principle into an MCTS approach, utilising Boltzmann policies to sample actions, naturally encouraging more exploration. In this paper, we highlight a major limitation of MENTS: optimal actions for the maximum entropy objective do not necessarily correspond to optimal actions for the original objective. We introduce two algorithms, Boltzmann Tree Search (BTS) and Decaying ENtropy Tree-Search (DENTS), that address these limitations and preserve the benefits of Boltzmann policies, such as allowing actions to be sampled faster by using the Alias method. Our empirical analysis shows that our algorithms show consistent high performance across several benchmark domains, including the game of Go.", "keywords": "Monte Carlo Tree Search;Planning;Entropy;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/69705bf872aa4918064a05c039f07a2d76f77926.zip", "author": "Michael Painter;Mohamed Baioumy;Nick Hawes;Bruno Lacerda", "authorids": "~Michael_Painter1;~Mohamed_Baioumy1;~Nick_Hawes1;~Bruno_Lacerda1", "gender": "M;;M;M", "homepage": ";;https://www.robots.ox.ac.uk/~nickh/;https://bfalacerda.github.io/", "dblp": ";;35/1190;87/10333", "google_scholar": "Io6qV-AAAAAJ;;bRsi4zoAAAAJ;https://scholar.google.co.uk/citations?user=k9XjG_MAAAAJ", "orcid": ";;0000-0002-7556-6098;0000-0003-0862-331X", "linkedin": ";;;", "or_profile": "~Michael_Painter1;~Mohamed_Baioumy1;~Nick_Hawes1;~Bruno_Lacerda1", "aff": "University of Oxford;;University of Oxford;University of Oxford", "aff_domain": "oxford.ac.uk;;ox.ac.uk;ox.ac.uk", "position": "PhD student;;Associate Professor;Senior Researcher", "bibtex": "@inproceedings{\npainter2023monte,\ntitle={Monte Carlo Tree Search with Boltzmann Exploration},\nauthor={Michael Painter and Mohamed Baioumy and Nick Hawes and Bruno Lacerda},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NG4DaApavi}\n}", "github": "", "project": "", "reviewers": "yYGy;gH3c;f8iz;nux6", "pdf_size": 585296, "rating": "5;5;6;6", "confidence": "3;3;4;3", "soundness": "3;2;3;3", "novelty": "3;2;3;4", "presentation": "3;3;3;3", "wc_summary": "74;197;255;50", "wc_strengths": "61;151;96;35", "wc_weaknesses": "131;126;242;141", "wc_questions": "174;93;217;70", "wc_limitations": "8;16;93;7", "wc_review": "448;583;903;303", "wc_reply_reviewers": "138;184;29;64", "wc_reply_authors": "235;696;0;768", "reply_reviewers": "2;2;1;1", "reply_authors": "3;3;1;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 144.0, 84.94998528546076 ], "wc_strengths_avg": [ 85.75, 43.447525821385966 ], "wc_weaknesses_avg": [ 160.0, 47.64976390287784 ], "wc_questions_avg": [ 138.5, 59.550398823181695 ], "wc_limitations_avg": [ 31.0, 35.96526101670889 ], "wc_review_avg": [ 559.25, 221.7931186939757 ], "wc_reply_reviewers_avg": [ 103.75, 60.78805392509288 ], "wc_reply_authors_avg": [ 424.75, 319.30187519023434 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5858253472231049451&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 8, "email": "oxford.ac.uk;;ox.ac.uk;ox.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Zero-sum Polymatrix Markov Games: Equilibrium Collapse and Efficient Computation of Nash Equilibria", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71872", "id": "NGiq8qCQNk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bcdcd565f83a8a6681a8269d325a5304-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NGiq8qCQNk", "openreview": "https://openreview.net/forum?id=NGiq8qCQNk", "poster": "/media/PosterPDFs/NeurIPS%202023/71872.png?t=1702404764.797683", "slides": "https://nips.cc/virtual/2023/poster/71872", "video": "https://nips.cc/virtual/2023/poster/71872", "author_site": "Fivos Kalogiannis, Ioannis Panageas", "tldr": "", "abstract": "The works of (Daskalakis et al., 2009, 2022; Jin et al., 2022; Deng et al., 2023) indicate that computing Nash equilibria in multi-player Markov games is a computationally hard task. This fact raises the question of whether or not computational intractability can be circumvented if one focuses on specific classes of Markov games. One such example is two-player zero-sum Markov games, in which efficient ways to compute a Nash equilibrium are known. Inspired by zero-sum polymatrix normal-form games (Cai et al., 2016), we define a class of zero-sum multi-agent Markov games in which there are only pairwise interactions described by a graph that changes per state.\nFor this class of Markov games, we show that an $\\epsilon$-approximate Nash equilibrium can be found efficiently. To do so, we generalize the techniques of (Cai et al., 2016), by showing that the set of coarse-correlated equilibria collapses to the set of Nash equilibria. Afterwards, it is possible to use any algorithm in the literature that computes approximate coarse-correlated equilibria Markovian policies to get an approximate Nash equilibrium.", "keywords": "network games;Nash equilibrium;equilibrium;game theory;learning", "primary_area": "", "supplementary_material": "/attachment/85a7e090064bdd9aef6f35162297b4983c4394a3.pdf", "author": "Fivos Kalogiannis;Ioannis Panageas", "authorids": "~Fivos_Kalogiannis1;~Ioannis_Panageas1", "gender": "M;M", "homepage": "https://fivoskal.github.io/;https://panageas.github.io", "dblp": "305/7347;139/3829", "google_scholar": "FVEj9MIAAAAJ;5NiFWuwAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Fivos_Kalogiannis1;~Ioannis_Panageas1", "aff": "University of California, Irvine;Donald Bren School of Information and Computer Sciences, University of California, Irvine", "aff_domain": "uci.edu;ics.uci.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nkalogiannis2023zerosum,\ntitle={Zero-sum Polymatrix Markov Games: Equilibrium Collapse and Efficient Computation of Nash Equilibria},\nauthor={Fivos Kalogiannis and Ioannis Panageas},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NGiq8qCQNk}\n}", "github": "", "project": "", "reviewers": "zxYQ;u7Ge;5Tpt;PoXL", "pdf_size": 398360, "rating": "6;7;7;7", "confidence": "4;4;4;4", "soundness": "3;4;3;3", "novelty": "3;3;4;2", "presentation": "3;3;2;3", "wc_summary": "122;109;142;107", "wc_strengths": "82;14;27;38", "wc_weaknesses": "106;215;9;191", "wc_questions": "77;221;447;244", "wc_limitations": "1;51;1;1", "wc_review": "388;610;626;581", "wc_reply_reviewers": "23;14;37;67", "wc_reply_authors": "0;0;0;40", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 120.0, 13.946325680981353 ], "wc_strengths_avg": [ 40.25, 25.557533136044253 ], "wc_weaknesses_avg": [ 130.25, 80.87451700010331 ], "wc_questions_avg": [ 247.25, 131.89460754708662 ], "wc_limitations_avg": [ 13.5, 21.650635094610966 ], "wc_review_avg": [ 551.25, 95.62263068960192 ], "wc_reply_reviewers_avg": [ 35.25, 20.07952937695503 ], "wc_reply_authors_avg": [ 10.0, 17.320508075688775 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11804505965866412110&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "uci.edu;ics.uci.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Irvine", "aff_unique_dep": "", "aff_unique_url": "https://www.uci.edu", "aff_unique_abbr": "UCI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Irvine", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Boundary Guided Learning-Free Semantic Control with Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71871", "id": "NIrTSCiIZ7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f737da5ea0e122870fad209509f87d5b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NIrTSCiIZ7", "openreview": "https://openreview.net/forum?id=NIrTSCiIZ7", "poster": "/media/PosterPDFs/NeurIPS%202023/71871.png?t=1698534189.8221128", "slides": "https://nips.cc/virtual/2023/poster/71871", "video": "https://nips.cc/virtual/2023/poster/71871", "author_site": "Ye Zhu, Yu Wu, Zhiwei Deng, Olga Russakovsky, Yan Yan", "tldr": "", "abstract": "Applying pre-trained generative denoising diffusion models (DDMs) for downstream tasks such as image semantic editing usually requires either fine-tuning DDMs or learning auxiliary editing networks in the existing literature. In this work, we present our BoundaryDiffusion method for efficient, effective and light-weight semantic control with frozen pre-trained DDMs, without learning any extra networks. As one of the first learning-free diffusion editing works, we start by seeking a more comprehensive understanding of the intermediate high-dimensional latent spaces by theoretically and empirically analyzing their probabilistic and geometric behaviors in the Markov chain. We then propose to further explore the critical step in the denoising trajectory that characterizes the convergence of a pre-trained DDM and introduce an automatic search method. Last but not least, in contrast to the conventional understanding that DDMs have relatively poor semantic behaviors (in generic latent spaces), we prove that the critical latent space we found already forms semantic subspace boundaries at the generic level in unconditional DDMs, which allows us to do controllable manipulation by guiding the denoising trajectory towards the targeted boundary via a single-step operation. We conduct extensive experiments on multiple DPMs architectures (DDPM, iDDPM) and datasets (CelebA, CelebA-HQ, LSUN-church, LSUN-bedroom, AFHQ-dog) with different resolutions (64, 256), achieving superior or state-of-the-art performance in various task scenarios (image semantic editing, text-based editing, unconditional semantic control) to demonstrate the effectiveness.", "keywords": "Diffusion probabilistic models;learning-free applications;high-dimensional semantic boundary;markov mixing", "primary_area": "", "supplementary_material": "/attachment/445570c47b1566e4d8d9cc297d65b4a48ec1b881.zip", "author": "Ye Zhu;Yu Wu;Zhiwei Deng;Olga Russakovsky;Yan Yan", "authorids": "~Ye_Zhu3;~Yu_Wu3;~Zhiwei_Deng3;~Olga_Russakovsky1;~Yan_Yan6", "gender": "F;M;M;F;M", "homepage": "https://l-yezhu.github.io/;https://yu-wu.net;http://www.zhiweideng.com;http://cs.princeton.edu/~olgarus;", "dblp": ";22/0-11;160/3578;52/6883;13/3953-2", "google_scholar": "uk5WuyIAAAAJ;23SZHUwAAAAJ;tWBPUHwAAAAJ;TB5OwW8AAAAJ;", "orcid": ";;;0000-0001-5272-3241;", "linkedin": ";;;;", "or_profile": "~Ye_Zhu3;~Yu_Wu3;~Zhiwei_Deng3;~Olga_Russakovsky1;~Yan_Yan6", "aff": "Illinois Institute of Technology;Wuhan University;Google Deepmind;Princeton University;", "aff_domain": "iit.edu;whu.edu.cn;google.com;princeton.edu;", "position": "PhD student;Full Professor;Research Scientist;Assistant Professor;", "bibtex": "@inproceedings{\nzhu2023boundary,\ntitle={Boundary Guided Learning-Free Semantic Control with Diffusion Models},\nauthor={Ye Zhu and Yu Wu and Zhiwei Deng and Olga Russakovsky and Yan Yan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NIrTSCiIZ7}\n}", "github": "", "project": "", "reviewers": "TWGA;5XEa;sSmU;EhMb", "pdf_size": 4209945, "rating": "5;5;6;7", "confidence": "3;3;4;3", "soundness": "2;2;3;3", "novelty": "3;2;3;3", "presentation": "2;3;2;3", "wc_summary": "74;61;129;120", "wc_strengths": "39;35;39;135", "wc_weaknesses": "39;231;373;99", "wc_questions": "202;1;94;68", "wc_limitations": "13;7;3;24", "wc_review": "367;335;638;446", "wc_reply_reviewers": "915;28;253;29", "wc_reply_authors": "2574;0;473;43", "reply_reviewers": "6;1;3;1", "reply_authors": "8;1;3;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 96.0, 29.04307146291521 ], "wc_strengths_avg": [ 62.0, 42.17819341792628 ], "wc_weaknesses_avg": [ 185.5, 128.61862229086424 ], "wc_questions_avg": [ 91.25, 72.38568573965436 ], "wc_limitations_avg": [ 11.75, 7.917543811056558 ], "wc_review_avg": [ 446.5, 117.71257366993554 ], "wc_reply_reviewers_avg": [ 306.25, 363.2157588816873 ], "wc_reply_authors_avg": [ 772.5, 1056.4124431300495 ], "reply_reviewers_avg": [ 2.75, 2.0463381929681126 ], "reply_authors_avg": [ 3.5, 2.692582403567252 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11054957325010394034&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "iit.edu;whu.edu.cn;google.com;princeton.edu;", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Illinois Institute of Technology;Wuhan University;DeepMind;Princeton University", "aff_unique_dep": ";;DeepMind;", "aff_unique_url": "https://www.iit.edu;http://www.whu.edu.cn/;https://deepmind.com;https://www.princeton.edu", "aff_unique_abbr": "IIT;WHU;DeepMind;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "United States;China;United Kingdom" }, { "title": "Variational Gaussian processes for linear inverse problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71870", "id": "NJK3aSB0z4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5c25c15b5b2fd386ab188a918e54c7d5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NJK3aSB0z4", "openreview": "https://openreview.net/forum?id=NJK3aSB0z4", "poster": "/media/PosterPDFs/NeurIPS%202023/71870.png?t=1702307775.0102081", "slides": "https://nips.cc/virtual/2023/poster/71870", "video": "https://nips.cc/virtual/2023/poster/71870", "author_site": "Thibault RANDRIANARISOA, Botond Szabo", "tldr": "", "abstract": "By now Bayesian methods are routinely used in practice for solving inverse problems. In inverse problems the parameter or signal of interest is observed only indirectly, as an image of a given map, and the observations are typically further corrupted with noise. Bayes offers a natural way to regularize these problems via the prior distribution and provides a probabilistic solution, quantifying the remaining uncertainty in the problem. However, the computational costs of standard, sampling based Bayesian approaches can be overly large in such complex models. Therefore, in practice variational Bayes is becoming increasingly popular. Nevertheless, the theoretical understanding of these methods is still relatively limited, especially in context of inverse problems.\n\nIn our analysis we investigate variational Bayesian methods for Gaussian process priors to solve linear inverse problems. We consider both mildly and severely ill-posed inverse problems and work with the popular inducing variable variational Bayes approach proposed by Titsias [Titsias, 2009]. We derive posterior contraction rates for the variational posterior in general settings and show that the minimax estimation rate can be attained by correctly tunned procedures. As specific examples we consider a collection of inverse problems including the heat equation, Volterra operator and Radon transform and inducing variable methods based on population and empirical spectral features.", "keywords": "Linear inverse problems;Gaussian processes;Variational inference;Inducing variables;Asymptotics;Contraction rates", "primary_area": "", "supplementary_material": "/attachment/cf28322956e1ee81214bde8b651a7bd1ad0ca7d3.pdf", "author": "Thibault Christophe RANDRIANARISOA;Botond Szabo", "authorids": "~Thibault_Christophe_RANDRIANARISOA1;~Botond_Szabo2", "gender": "M;", "homepage": "https://thibaultrandrianarisoa.netlify.app;https://botondszabo.com/", "dblp": ";", "google_scholar": "329uecAAAAAJ;", "orcid": ";", "linkedin": "thrandrianarisoa/;", "or_profile": "~Thibault_Christophe_RANDRIANARISOA1;~Botond_Szabo2", "aff": "Bocconi University;Bocconi University", "aff_domain": "unibocconi.it;unibocconi.it", "position": "Postdoc;Associate Professor", "bibtex": "@inproceedings{\nrandrianarisoa2023variational,\ntitle={Variational Gaussian processes for linear inverse problems},\nauthor={Thibault Christophe RANDRIANARISOA and Botond Szabo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NJK3aSB0z4}\n}", "github": "", "project": "", "reviewers": "Sbdt;VbCA;KBMK;rih9;TgVX", "pdf_size": 416480, "rating": "5;5;6;6;7", "confidence": "3;2;2;3;3", "soundness": "3;4;3;3;3", "novelty": "3;3;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "60;83;96;57;125", "wc_strengths": "31;28;86;86;136", "wc_weaknesses": "38;46;149;29;551", "wc_questions": "14;12;130;249;10", "wc_limitations": "1;5;23;63;33", "wc_review": "144;174;484;484;855", "wc_reply_reviewers": "13;0;28;98;11", "wc_reply_authors": "0;0;11;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 84.2, 25.023189245178163 ], "wc_strengths_avg": [ 73.4, 40.237296131822774 ], "wc_weaknesses_avg": [ 162.6, 199.00211054157188 ], "wc_questions_avg": [ 83.0, 94.75864076695065 ], "wc_limitations_avg": [ 25.0, 22.3069495897579 ], "wc_review_avg": [ 428.2, 258.369038392761 ], "wc_reply_reviewers_avg": [ 30.0, 35.151102400920514 ], "wc_reply_authors_avg": [ 2.2, 4.4 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3273268353539886, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9738490639286842528&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "unibocconi.it;unibocconi.it", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Bocconi University", "aff_unique_dep": "", "aff_unique_url": "https://www.bocconi.edu", "aff_unique_abbr": "Bocconi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Italy" }, { "title": "Robust low-rank training via approximate orthonormal constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71869", "id": "NJPSvv0u3R", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d073692637b4fb8c4eb4b81f0fa2df7b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NJPSvv0u3R", "openreview": "https://openreview.net/forum?id=NJPSvv0u3R", "poster": "/media/PosterPDFs/NeurIPS%202023/71869.png?t=1701545406.5095847", "slides": "https://nips.cc/virtual/2023/poster/71869", "video": "https://nips.cc/virtual/2023/poster/71869", "author_site": "Dayana Savostianova, Emanuele Zangrando, Gianluca Ceruti, Francesco Tudisco, Francesco Tudisco", "tldr": "", "abstract": "With the growth of model and data sizes, a broad effort has been made to design pruning techniques that reduce the resource demand of deep learning pipelines, while retaining model performance. In order to reduce both inference and training costs, a prominent line of work uses low-rank matrix factorizations to represent the network weights. Although able to retain accuracy, we observe that low-rank methods tend to compromise model robustness against adversarial perturbations. By modeling robustness in terms of the condition number of the neural network, we argue that this loss of robustness is due to the exploding singular values of the low-rank weight matrices. Thus, we introduce a robust low-rank training algorithm that maintains the network's weights on the low-rank matrix manifold while simultaneously enforcing approximate orthonormal constraints. The resulting model \nreduces both training and inference costs while ensuring well-conditioning and thus better adversarial robustness, without compromising model accuracy. This is shown by extensive numerical evidence and by our main approximation theorem that shows the computed robust low-rank network well-approximates the ideal full model, provided a highly performing low-rank sub-network exists.", "keywords": "low-rank neural networks;Stiefel manifold;orthogonal neural networks;pruning;adversarial robustness;neural network condition number;neural network singular values", "primary_area": "", "supplementary_material": "", "author": "Dayana Savostianova;Emanuele Zangrando;Gianluca Ceruti;Francesco Tudisco", "authorids": "~Dayana_Savostianova1;~Emanuele_Zangrando1;~Gianluca_Ceruti1;~Francesco_Tudisco1", "gender": "F;M;M;M", "homepage": ";;;https://ftudisco.gitlab.io/", "dblp": ";321/1701;;136/5777", "google_scholar": "IIXvow8AAAAJ;https://scholar.google.it/citations?hl=it;eyptuo8AAAAJ;uND_5REAAAAJ", "orcid": "0000-0001-6271-9190;;;0000-0002-8150-4475", "linkedin": ";;;", "or_profile": "~Dayana_Savostianova1;~Emanuele_Zangrando1;~Gianluca_Ceruti1;~Francesco_Tudisco1", "aff": "Gran Sasso Science Institute;Gran Sasso Science Institute;EPFL - EPF Lausanne;Gran Sasso Science Institute", "aff_domain": "gssi.it;gssi.it;epfl.ch;gssi.it", "position": "PhD student;PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nsavostianova2023robust,\ntitle={Robust low-rank training via approximate orthonormal constraints},\nauthor={Dayana Savostianova and Emanuele Zangrando and Gianluca Ceruti and Francesco Tudisco},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NJPSvv0u3R}\n}", "github": "", "project": "", "reviewers": "sVC2;8Zii;t9ct;UZkq;J9p9", "pdf_size": 932589, "rating": "5;6;6;7;7", "confidence": "5;4;4;4;4", "soundness": "2;3;3;4;3", "novelty": "2;3;3;3;3", "presentation": "2;3;2;4;3", "wc_summary": "47;37;75;39;58", "wc_strengths": "47;32;78;108;63", "wc_weaknesses": "210;24;213;75;59", "wc_questions": "2;155;175;335;29", "wc_limitations": "1;1;127;7;20", "wc_review": "307;249;668;564;229", "wc_reply_reviewers": "16;11;192;37;0", "wc_reply_authors": "0;0;30;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 51.2, 14.005713120009277 ], "wc_strengths_avg": [ 65.6, 26.203816515919964 ], "wc_weaknesses_avg": [ 116.2, 79.54721868173644 ], "wc_questions_avg": [ 139.2, 119.02671968932019 ], "wc_limitations_avg": [ 31.2, 48.4 ], "wc_review_avg": [ 403.4, 178.52349985366072 ], "wc_reply_reviewers_avg": [ 51.2, 71.41820496204032 ], "wc_reply_authors_avg": [ 6.0, 12.0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8017837257372733, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12837520152557322105&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "gssi.it;gssi.it;epfl.ch;gssi.it", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Gran Sasso Science Institute;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.gssi.it;https://www.epfl.ch", "aff_unique_abbr": ";EPFL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Italy;Switzerland" }, { "title": "Latent Diffusion for Language Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71868", "id": "NKdtztladR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b2a2bd5d5051ff6af52e1ef60aefd255-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NKdtztladR", "openreview": "https://openreview.net/forum?id=NKdtztladR", "poster": "/media/PosterPDFs/NeurIPS%202023/71868.png?t=1702305940.627381", "slides": "https://nips.cc/virtual/2023/poster/71868", "video": "https://nips.cc/virtual/2023/poster/71868", "author_site": "Justin Lovelace, Varsha Kishore, Chao Wan, Eliot Shekhtman, Kilian Weinberger", "tldr": "", "abstract": "Diffusion models have achieved great success in modeling continuous data modalities such as images, audio, and video, but have seen limited use in discrete domains such as language. Recent attempts to adapt diffusion to language have presented diffusion as an alternative to existing pretrained language models. We view diffusion and existing language models as complementary. We demonstrate that encoder-decoder language models can be utilized to efficiently learn high-quality language autoencoders. We then demonstrate that continuous diffusion models can be learned in the latent space of the language autoencoder, enabling us to sample continuous latent representations that can be decoded into natural language with the pretrained decoder. We validate the effectiveness of our approach for unconditional, class-conditional, and sequence-to-sequence language generation. We demonstrate across multiple diverse data sets that our latent language diffusion models are significantly more effective than previous diffusion language models. Our code is available at \\url{https://github.com/justinlovelace/latent-diffusion-for-language}.", "keywords": "diffusion;language generation", "primary_area": "", "supplementary_material": "", "author": "Justin Lovelace;Varsha Kishore;Chao Wan;Eliot Seo Shekhtman;Kilian Q Weinberger", "authorids": "~Justin_Lovelace1;~Varsha_Kishore1;~Chao_Wan1;~Eliot_Seo_Shekhtman1;~Kilian_Q_Weinberger1", "gender": "M;F;;M;M", "homepage": "https://justinlovelace.github.io/;;;;http://www.cs.cornell.edu/~kilian/", "dblp": "251/9496;239/5696;;;88/4801", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;jsxk8vsAAAAJ", "orcid": ";;0009-0004-9562-6328;;0009-0008-9313-7239", "linkedin": ";;;eliot-shekhtman/;", "or_profile": "~Justin_Lovelace1;~Varsha_Kishore1;~Chao_Wan1;~Eliot_Seo_Shekhtman1;~Kilian_Q_Weinberger1", "aff": "Cornell University;Cornell University;Cornell University;;ASAPP Inc.", "aff_domain": "cornell.edu;cornell.edu;cornell.edu;;asapp.com", "position": "PhD student;PhD student;PhD student;;Principal Researcher", "bibtex": "@inproceedings{\nlovelace2023latent,\ntitle={Latent Diffusion for Language Generation},\nauthor={Justin Lovelace and Varsha Kishore and Chao Wan and Eliot Seo Shekhtman and Kilian Q Weinberger},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NKdtztladR}\n}", "github": "", "project": "", "reviewers": "T1gz;LWig;SqZp;TpnG;5zaG", "pdf_size": 857732, "rating": "3;4;5;6;6", "confidence": "4;5;4;4;4", "soundness": "3;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;2;3;3", "wc_summary": "43;63;94;118;52", "wc_strengths": "63;23;60;39;75", "wc_weaknesses": "131;164;220;163;38", "wc_questions": "24;19;16;70;115", "wc_limitations": "10;5;1;8;4", "wc_review": "271;274;391;398;284", "wc_reply_reviewers": "258;62;57;23;13", "wc_reply_authors": "1105;287;166;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;4;2;1;1", "rating_avg": [ 4.8, 1.16619037896906 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 74.0, 27.93564031841762 ], "wc_strengths_avg": [ 52.0, 18.568791021496256 ], "wc_weaknesses_avg": [ 143.2, 59.89791315229605 ], "wc_questions_avg": [ 48.8, 38.49883115108821 ], "wc_limitations_avg": [ 5.6, 3.1368774282716245 ], "wc_review_avg": [ 323.6, 58.09165172380624 ], "wc_reply_reviewers_avg": [ 82.6, 89.71198359193714 ], "wc_reply_authors_avg": [ 311.6, 411.2121593532954 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.2, 1.16619037896906 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3429971702850177, "gs_citation": 94, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9207206647173322214&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 8, "email": "cornell.edu;cornell.edu;cornell.edu;;asapp.com", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Cornell University;ASAPP Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.asapp.com", "aff_unique_abbr": "Cornell;ASAPP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Understanding the Limitations of Deep Models for Molecular property prediction: Insights and Solutions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71867", "id": "NLFqlDeuzt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cc83e97320000f4e08cb9e293b12cf7e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NLFqlDeuzt", "openreview": "https://openreview.net/forum?id=NLFqlDeuzt", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71867", "video": "https://nips.cc/virtual/2023/poster/71867", "author_site": "Jun Xia, Lecheng Zhang, Xiao Zhu, Yue Liu, Zhangyang Gao, Bozhen Hu, Cheng Tan, Jiangbin Zheng, Siyuan Li, Stan Z. Li", "tldr": "", "abstract": "Molecular Property Prediction (MPP) is a crucial task in the AI-driven Drug Discovery (AIDD) pipeline, which has recently gained considerable attention thanks to advancements in deep learning. However, recent research has revealed that deep models struggle to beat traditional non-deep ones on MPP. In this study, we benchmark 12 representative models (3 non-deep models and 9 deep models) on 15 molecule datasets. Through the most comprehensive study to date, we make the following key observations: \\textbf{(\\romannumeral 1)} Deep models are generally unable to outperform non-deep ones; \\textbf{(\\romannumeral 2)} The failure of deep models on MPP cannot be solely attributed to the small size of molecular datasets; \\textbf{(\\romannumeral 3)} In particular, some traditional models including XGB and RF that use molecular fingerprints as inputs tend to perform better than other competitors. Furthermore, we conduct extensive empirical investigations into the unique patterns of molecule data and inductive biases of various models underlying these phenomena. These findings stimulate us to develop a simple-yet-effective feature mapping method for molecule data prior to feeding them into deep models. Empirically, deep models equipped with this mapping method can beat non-deep ones in most MoleculeNet datasets. Notably, the effectiveness is further corroborated by extensive experiments on cutting-edge dataset related to COVID-19 and activity cliff datasets.", "keywords": "Graph Neural Networks", "primary_area": "", "supplementary_material": "", "author": "Jun Xia;Lecheng Zhang;Xiao Zhu;Yue Liu;Zhangyang Gao;Bozhen Hu;Cheng Tan;Jiangbin Zheng;Siyuan Li;Stan Z. Li", "authorids": "~Jun_Xia1;~Lecheng_Zhang1;~Xiao_Zhu4;~Yue_Liu10;~Zhangyang_Gao1;~Bozhen_Hu1;~Cheng_Tan1;~Jiangbin_Zheng3;~Siyuan_Li6;~Stan_Z._Li2", "gender": "M;M;M;M;M;M;M;M;M;M", "homepage": "http://junxia97.github.io/;https://westlake.edu.cn/;https://github.com/HexagonStar;https://yueliu1999.github.io/;;;https://chengtan9907.github.io/;;https://lupin1998.github.io/;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": ";;;74/1932-8;275/3266;279/8665;70/1533-12.html;;63/9705-2;l/StanZLi", "google_scholar": "aPKKpSYAAAAJ;;X8b1RoYAAAAJ;5tfpu3MAAAAJ;4SclT-QAAAAJ;https://scholar.google.com/citations?hl=zh-CN;6kTV6aMAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;;0000-0003-1026-6083;0000-0002-3428-0114;;0000-0003-3305-0103;0000-0001-6806-2468;", "linkedin": ";;;;;;;;https://www.linkedin.cn/incareer/in/siyuan-li-lupin1998/;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Jun_Xia1;~Lecheng_Zhang1;~Xiao_Zhu4;~Yue_Liu10;~Zhangyang_Gao1;~Bozhen_Hu1;~Cheng_Tan1;~Jiangbin_Zheng3;~Siyuan_Li6;~Stan_Z._Li1", "aff": "Westlake University, China;Westlake University;Westlake University;National University of Defense Technology;Westlake University, China;Westlake University;Zhejiang University & Westlake University;Westlake University;Alibaba Group;Westlake University", "aff_domain": "westlake.edu.cn;westlake.edu;westlake.edu.cn;nudt.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;alibaba-inc.com;westlake.edu.cn", "position": "PhD student;Undergrad student;Intern;MS student;PhD student;PhD student;PhD student;PhD student;Intern;Chair Professor", "bibtex": "@inproceedings{\nxia2023understanding,\ntitle={Understanding the Limitations of Deep Models for Molecular property prediction: Insights and Solutions},\nauthor={Jun Xia and Lecheng Zhang and Xiao Zhu and Yue Liu and Zhangyang Gao and Bozhen Hu and Cheng Tan and Jiangbin Zheng and Siyuan Li and Stan Z. Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NLFqlDeuzt}\n}", "github": "", "project": "", "reviewers": "ah6B;zBbT;rZmN;SHsm;1yt1", "pdf_size": 500334, "rating": "3;3;4;5;7", "confidence": "4;4;4;5;4", "soundness": "1;2;3;3;3", "novelty": "2;1;2;2;4", "presentation": "2;3;3;3;3", "wc_summary": "178;142;71;102;93", "wc_strengths": "308;33;77;93;160", "wc_weaknesses": "562;87;344;396;216", "wc_questions": "21;137;8;76;56", "wc_limitations": "16;51;1;7;6", "wc_review": "1085;450;501;674;531", "wc_reply_reviewers": "356;37;167;312;0", "wc_reply_authors": "2438;1985;1631;1185;1017", "reply_reviewers": "1;1;2;1;0", "reply_authors": "6;5;5;4;3", "rating_avg": [ 4.4, 1.4966629547095764 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.2, 0.9797958971132712 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 117.2, 38.11246515249309 ], "wc_strengths_avg": [ 134.2, 96.00291662236101 ], "wc_weaknesses_avg": [ 321.0, 161.25507743944064 ], "wc_questions_avg": [ 59.6, 45.66223822810266 ], "wc_limitations_avg": [ 16.2, 18.059900331950892 ], "wc_review_avg": [ 648.2, 230.7148889863851 ], "wc_reply_reviewers_avg": [ 174.4, 142.31036504766615 ], "wc_reply_authors_avg": [ 1651.2, 519.6992976712592 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 4.6, 1.0198039027185568 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.20044593143431827, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13358498626187258377&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "westlake.edu.cn;westlake.edu;westlake.edu.cn;nudt.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;alibaba-inc.com;westlake.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;1;0;0;2;0;3;0", "aff_unique_norm": "Westlake University;National University of Defense Technology;Zhejiang University;Alibaba Group", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.westlake.edu.cn;http://www.nudt.edu.cn/;http://www.zju.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "WU;NUDT;ZJU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Policy Gradient for Rectangular Robust Markov Decision Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71866", "id": "NLpXRrjpa6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ba8aee784ffe0813890288b334444eda-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NLpXRrjpa6", "openreview": "https://openreview.net/forum?id=NLpXRrjpa6", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71866", "video": "https://nips.cc/virtual/2023/poster/71866", "author_site": "Navdeep Kumar, Esther Derman, Matthieu Geist, Kfir Y. Levy, Shie Mannor", "tldr": "", "abstract": "Policy gradient methods have become a standard for training reinforcement learning agents in a scalable and efficient manner. However, they do not account for transition uncertainty, whereas learning robust policies can be computationally expensive. In this paper, we introduce robust policy gradient (RPG), a policy-based method that efficiently solves rectangular robust Markov decision processes (MDPs). We provide a closed-form expression for the worst occupation measure. Incidentally, we find that the worst kernel is a rank-one perturbation of the nominal. Combining the worst occupation measure with a robust Q-value estimation yields an explicit form of the robust gradient. Our resulting RPG can be estimated from data with the same time complexity as its non-robust equivalent. Hence, it relieves the computational burden of convex optimization problems required for training robust policies by current policy gradient approaches.", "keywords": "robust Markov decision process;policy gradient", "primary_area": "", "supplementary_material": "/attachment/374fa1e354a83a1adabc2a898293c5a0f5cb3d15.pdf", "author": "Navdeep Kumar;Esther Derman;Matthieu Geist;Kfir Yehuda Levy;Shie Mannor", "authorids": "~Navdeep_Kumar1;~Esther_Derman1;~Matthieu_Geist1;~Kfir_Yehuda_Levy1;~Shie_Mannor2", "gender": "M;;M;M;M", "homepage": ";;;http://kfiryehud.wixsite.com/kfir-y-levy;https://shie.net.technion.ac.il", "dblp": ";;38/6508;83/11388;20/1669", "google_scholar": ";;ectPLEUAAAAJ;;https://scholar.google.com.tw/citations?user=q1HlbIUAAAAJ", "orcid": ";;;;", "linkedin": "navdeepsjb/;;;;", "or_profile": "~Navdeep_Kumar1;~Esther_Derman1;~Matthieu_Geist1;~Kfir_Yehuda_Levy1;~Shie_Mannor2", "aff": "Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;;Google;Technion - Israel Institute of Technology, Technion;Technion - Israel Institute of Technology, Technion", "aff_domain": "campus.technion.ac.il;;google.com;technion.ac.il;technion.il", "position": "PhD student;;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nkumar2023policy,\ntitle={Policy Gradient for Rectangular Robust Markov Decision Processes},\nauthor={Navdeep Kumar and Esther Derman and Matthieu Geist and Kfir Yehuda Levy and Shie Mannor},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NLpXRrjpa6}\n}", "github": "", "project": "", "reviewers": "bn4R;rkBw;qyVV;2g5m;c24r", "pdf_size": 436002, "rating": "5;6;6;6;7", "confidence": "4;3;3;3;4", "soundness": "3;4;3;3;3", "novelty": "3;4;3;3;3", "presentation": "4;4;3;4;3", "wc_summary": "69;59;60;53;160", "wc_strengths": "30;89;58;69;190", "wc_weaknesses": "113;215;72;80;172", "wc_questions": "95;27;111;25;91", "wc_limitations": "23;30;3;1;13", "wc_review": "330;420;304;228;626", "wc_reply_reviewers": "75;20;8;20;37", "wc_reply_authors": "217;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 80.2, 40.226359517112655 ], "wc_strengths_avg": [ 87.2, 54.82116379647553 ], "wc_weaknesses_avg": [ 130.4, 55.029446662673244 ], "wc_questions_avg": [ 69.8, 36.38900932974131 ], "wc_limitations_avg": [ 14.0, 11.20714058089752 ], "wc_review_avg": [ 381.6, 136.735657383142 ], "wc_reply_reviewers_avg": [ 32.0, 23.400854685246006 ], "wc_reply_authors_avg": [ 43.4, 86.8 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15118591071664536762&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "campus.technion.ac.il;;google.com;technion.ac.il;technion.il", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Technion - Israel Institute of Technology;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.google.com", "aff_unique_abbr": "Technion;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Israel;United States" }, { "title": "Extracting Reward Functions from Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71865", "id": "NN60HKTur2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9d23562fcedc078e27a3be813ff6feb5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NN60HKTur2", "openreview": "https://openreview.net/forum?id=NN60HKTur2", "poster": "/media/PosterPDFs/NeurIPS%202023/71865.png?t=1702163036.859294", "slides": "https://nips.cc/virtual/2023/poster/71865", "video": "https://nips.cc/virtual/2023/poster/71865", "author_site": "Felipe Nuti, Tim Franzmeyer, Jo\u00e3o Henriques", "tldr": "", "abstract": "Diffusion models have achieved remarkable results in image generation, and have similarly been used to learn high-performing policies in sequential decision-making tasks. \nDecision-making diffusion models can be trained on lower-quality data, and then be steered with a reward function to generate near-optimal trajectories.\nWe consider the problem of extracting a reward function by comparing a decision-making diffusion model that models low-reward behavior and one that models high-reward behavior; a setting related to inverse reinforcement learning. \nWe first define the notion of a \\emph{relative reward function of two diffusion models} and show conditions under which it exists and is unique. \nWe then devise a practical learning algorithm for extracting it by aligning the gradients of a reward function -- parametrized by a neural network -- to the difference in outputs of both diffusion models.\nOur method finds correct reward functions in navigation environments, and we demonstrate that steering the base model with the learned reward functions results in significantly increased performance in standard locomotion benchmarks.\nFinally, we demonstrate that our approach generalizes beyond sequential decision-making by learning a reward-like function from two large-scale image generation diffusion models. The extracted reward function successfully assigns lower rewards to harmful images.", "keywords": "Diffusion models;sequential decision making;inverse reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Felipe Pinto Coelho Nuti;Tim Franzmeyer;Joao F. Henriques", "authorids": "~Felipe_Pinto_Coelho_Nuti1;~Tim_Franzmeyer1;~Joao_F._Henriques1", "gender": "M;;M", "homepage": "https://felipenuti.github.io/;https://www.robots.ox.ac.uk/~frtim/;http://www.robots.ox.ac.uk/~joao/", "dblp": ";298/1117;31/8617.html", "google_scholar": "BLMdYoEAAAAJ;Jvv1rkkAAAAJ;aCQjyp0AAAAJ", "orcid": ";;", "linkedin": "felipe-nuti/;tim-franzmeyer-370257110/;", "or_profile": "~Felipe_Pinto_Coelho_Nuti1;~Tim_Franzmeyer1;~Joao_F._Henriques1", "aff": "Citadel Securities;University of Oxford;University of Oxford", "aff_domain": "citadelsecurities.com;ox.ac.uk;ox.ac.uk", "position": "Intern;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nnuti2023extracting,\ntitle={Extracting Reward Functions from Diffusion Models},\nauthor={Felipe Pinto Coelho Nuti and Tim Franzmeyer and Joao F. Henriques},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NN60HKTur2}\n}", "github": "", "project": "", "reviewers": "TdMi;CPFx;rnSj;qcPp;rtGf;NVyM", "pdf_size": 17005053, "rating": "4;4;5;5;7;7", "confidence": "2;4;3;4;3;3", "soundness": "2;3;3;3;4;4", "novelty": "2;2;3;2;3;2", "presentation": "2;2;3;4;3;4", "wc_summary": "65;172;108;251;119;118", "wc_strengths": "24;39;90;117;74;58", "wc_weaknesses": "56;614;76;271;163;99", "wc_questions": "2;74;16;276;20;78", "wc_limitations": "1;35;1;1;1;7", "wc_review": "148;934;291;916;377;360", "wc_reply_reviewers": "0;465;9;633;14;15", "wc_reply_authors": "34;945;34;744;0;0", "reply_reviewers": "0;1;1;2;1;1", "reply_authors": "2;4;2;4;1;1", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.1666666666666665, 0.6871842709362768 ], "soundness_avg": [ 3.1666666666666665, 0.6871842709362768 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 138.83333333333334, 59.03506490966949 ], "wc_strengths_avg": [ 67.0, 31.069813860616115 ], "wc_weaknesses_avg": [ 213.16666666666666, 192.82065645452914 ], "wc_questions_avg": [ 77.66666666666667, 93.27676857372126 ], "wc_limitations_avg": [ 7.666666666666667, 12.418624006798105 ], "wc_review_avg": [ 504.3333333333333, 306.48907901515116 ], "wc_reply_reviewers_avg": [ 189.33333333333334, 258.950874792026 ], "wc_reply_authors_avg": [ 292.8333333333333, 394.623202843196 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.06482037235521647, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4907181018284852788&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "citadelsecurities.com;ox.ac.uk;ox.ac.uk", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Citadel Securities;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://www.citadel.com;https://www.ox.ac.uk", "aff_unique_abbr": "Citadel;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "To Stay or Not to Stay in the Pre-train Basin: Insights on Ensembling in Transfer Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71864", "id": "NNooZoQpP4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/336572db3e99930814d6b328d4220cb6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NNooZoQpP4", "openreview": "https://openreview.net/forum?id=NNooZoQpP4", "poster": "/media/PosterPDFs/NeurIPS%202023/71864.png?t=1701547241.4671352", "slides": "https://nips.cc/virtual/2023/poster/71864", "video": "https://nips.cc/virtual/2023/poster/71864", "author_site": "Ildus Sadrtdinov, Dmitrii Pozdeev, Dmitry Vetrov, Ekaterina Lobacheva", "tldr": "", "abstract": "Transfer learning and ensembling are two popular techniques for improving the performance and robustness of neural networks. \nDue to the high cost of pre-training, ensembles of models fine-tuned from a single pre-trained checkpoint are often used in practice. \nSuch models end up in the same basin of the loss landscape, which we call the pre-train basin, and thus have limited diversity. \nIn this work, we show that ensembles trained from a single pre-trained checkpoint may be improved by better exploring the pre-train basin, however, leaving the basin results in losing the benefits of transfer learning and in degradation of the ensemble quality. \nBased on the analysis of existing exploration methods, we propose a more effective modification of the Snapshot Ensembles (SSE) for transfer learning setup, StarSSE, which results in stronger ensembles and uniform model soups.", "keywords": "ensembles;transfer learning;loss landscape basins;model soups", "primary_area": "", "supplementary_material": "", "author": "Ildus Sadrtdinov;Dmitrii Pozdeev;Dmitry P. Vetrov;Ekaterina Lobacheva", "authorids": "~Ildus_Sadrtdinov1;~Dmitrii_Pozdeev2;~Dmitry_P._Vetrov1;~Ekaterina_Lobacheva1", "gender": "Not Specified;M;M;", "homepage": ";;https://constructor.university/faculty-member/dmitry-vetrov;https://tipt0p.github.io/", "dblp": "298/1173;342/2878;89/3348;176/1464", "google_scholar": "XhqNegUAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ru/citations?user=7HU0UoUAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0000-1295-6091;;;", "linkedin": ";;;ekaterina-lobacheva-164412a8/", "or_profile": "~Ildus_Sadrtdinov1;~Dmitrii_Pozdeev2;~Dmitry_P._Vetrov1;~Ekaterina_Lobacheva1", "aff": "HSE University;Higher School of Economics, Higher School of Economics;National Research University Higher School of Economics;", "aff_domain": "hse.ru;edu.hse.ru;hse.ru;", "position": "MS student;Undergrad student;Full Professor;", "bibtex": "@inproceedings{\nsadrtdinov2023to,\ntitle={To Stay or Not to Stay in the Pre-train Basin: Insights on Ensembling in Transfer Learning},\nauthor={Ildus Sadrtdinov and Dmitrii Pozdeev and Dmitry P. Vetrov and Ekaterina Lobacheva},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NNooZoQpP4}\n}", "github": "", "project": "", "reviewers": "rBpj;1VUP;a9o6;XbMH", "pdf_size": 1500696, "rating": "5;6;6;6", "confidence": "3;4;3;4", "soundness": "2;3;3;2", "novelty": "3;3;2;2", "presentation": "3;3;2;3", "wc_summary": "165;82;63;85", "wc_strengths": "92;73;107;85", "wc_weaknesses": "173;147;164;151", "wc_questions": "42;71;128;123", "wc_limitations": "6;18;5;19", "wc_review": "478;391;467;463", "wc_reply_reviewers": "29;21;405;110", "wc_reply_authors": "0;0;419;0", "reply_reviewers": "1;1;3;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 98.75, 39.168705620686524 ], "wc_strengths_avg": [ 89.25, 12.295832627357937 ], "wc_weaknesses_avg": [ 158.75, 10.353139620424328 ], "wc_questions_avg": [ 91.0, 36.0347054934545 ], "wc_limitations_avg": [ 12.0, 6.519202405202649 ], "wc_review_avg": [ 449.75, 34.361133566865924 ], "wc_reply_reviewers_avg": [ 141.25, 156.2055936898548 ], "wc_reply_authors_avg": [ 104.75, 181.43232209283988 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13892023169797654578&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "hse.ru;edu.hse.ru;hse.ru;", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Higher School of Economics;National Research University Higher School of Economics", "aff_unique_dep": ";", "aff_unique_url": "https://hse.ru;https://hse.ru", "aff_unique_abbr": "HSE;HSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "Efficient Low-rank Backpropagation for Vision Transformer Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71863", "id": "NNtsO5L27J", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2f75a57e9c71e8369da0150ea769d5a2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NNtsO5L27J", "openreview": "https://openreview.net/forum?id=NNtsO5L27J", "poster": "/media/PosterPDFs/NeurIPS%202023/71863.png?t=1697429607.4683466", "slides": "https://nips.cc/virtual/2023/poster/71863", "video": "https://nips.cc/virtual/2023/poster/71863", "author_site": "Yuedong Yang, Hung-Yueh Chiang, Guihong Li, Diana Marculescu, Radu Marculescu", "tldr": "", "abstract": "The increasing scale of vision transformers (ViT) has made the efficient fine-tuning of these large models for specific needs a significant challenge in various applications. This issue originates from the computationally demanding matrix multiplications required during the backpropagation process through linear layers in ViT.\nIn this paper, we tackle this problem by proposing a new Low-rank BackPropagation via Walsh-Hadamard Transformation (LBP-WHT) method. Intuitively, LBP-WHT projects the gradient into a low-rank space and carries out backpropagation. This approach substantially reduces the computation needed for adapting ViT, as matrix multiplication in the low-rank space is far less resource-intensive. We conduct extensive experiments with different models (ViT, hybrid convolution-ViT model) on multiple datasets to demonstrate the effectiveness of our method. For instance, when adapting an EfficientFormer-L1 model on CIFAR100, our LBP-WHT achieves 10.4\\% higher accuracy than the state-of-the-art baseline, while requiring 9 MFLOPs less computation.\nAs the first work to accelerate ViT adaptation with low-rank backpropagation, our LBP-WHT method is complementary to many prior efforts and can be combined with them for better performance.", "keywords": "Low-rank backpropagation;model adaptation;transfer learning;vision transformer;Edge AI", "primary_area": "", "supplementary_material": "/attachment/8554b46f583dc18605f228bb375eafd5011d3ced.pdf", "author": "Yuedong Yang;Hung-Yueh Chiang;Guihong Li;Diana Marculescu;Radu Marculescu", "authorids": "~Yuedong_Yang2;~Hung-Yueh_Chiang1;~Guihong_Li1;~Diana_Marculescu4;~Radu_Marculescu2", "gender": "M;M;Unspecified;;M", "homepage": "https://github.com/AlbertYoung0112;https://hychiang.info/;https://liguihong.github.io/;;https://radum.ece.utexas.edu/", "dblp": ";209/4093;143/6649.html;;88/3494", "google_scholar": ";2uTRM0MAAAAJ;;;ZCmYP5cAAAAJ", "orcid": ";;0000-0001-8537-8632;;0000-0003-1826-7646", "linkedin": ";hung-yueh-chiang-25898ba3?original_referer=https%3A%2F%2Fhychiang.info%2F;;;", "or_profile": "~Yuedong_Yang2;~Hung-Yueh_Chiang1;~Guihong_Li1;~Diana_Marculescu4;~Radu_Marculescu2", "aff": "University of Texas, Austin;The University of Texas at Austin;University of Texas, Austin;;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;;utexas.edu", "position": "PhD student;PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nyang2023efficient,\ntitle={Efficient Low-rank Backpropagation for Vision Transformer Adaptation},\nauthor={Yuedong Yang and Hung-Yueh Chiang and Guihong Li and Diana Marculescu and Radu Marculescu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NNtsO5L27J}\n}", "github": "", "project": "", "reviewers": "Hdqi;EswK;F5LH;idrd;j7ix", "pdf_size": 503836, "rating": "4;5;5;6;6", "confidence": "4;5;4;4;4", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;3", "presentation": "3;3;3;2;2", "wc_summary": "70;97;53;46;95", "wc_strengths": "47;113;19;20;115", "wc_weaknesses": "200;206;59;24;174", "wc_questions": "32;31;38;79;6", "wc_limitations": "1;6;7;26;6", "wc_review": "350;453;176;195;396", "wc_reply_reviewers": "0;119;21;0;60", "wc_reply_authors": "0;563;0;0;152", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;2;1;1;2", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 72.2, 20.951372270092477 ], "wc_strengths_avg": [ 62.8, 42.9995348812054 ], "wc_weaknesses_avg": [ 132.6, 75.96736141264878 ], "wc_questions_avg": [ 37.2, 23.608473055240147 ], "wc_limitations_avg": [ 9.2, 8.657944328765346 ], "wc_review_avg": [ 314.0, 110.04181023592805 ], "wc_reply_reviewers_avg": [ 40.0, 45.17078701993137 ], "wc_reply_authors_avg": [ 143.0, 218.09539197332896 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.13363062095621225, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10525236148296018943&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "utexas.edu;utexas.edu;utexas.edu;;utexas.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Fairly Recommending with Social Attributes: A Flexible and Controllable Optimization Approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71862", "id": "NP5xb00Y6a", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/436d042b2dd81214d23ae43eb196b146-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NP5xb00Y6a", "openreview": "https://openreview.net/forum?id=NP5xb00Y6a", "poster": "/media/PosterPDFs/NeurIPS%202023/71862.png?t=1699274583.1414325", "slides": "https://nips.cc/virtual/2023/poster/71862", "video": "https://nips.cc/virtual/2023/poster/71862", "author_site": "Jinqiu Jin, Haoxuan Li, Fuli Feng, Sihao Ding, Peng Wu, Xiangnan He", "tldr": "", "abstract": "Item-side group fairness (IGF) requires a recommendation model to treat different item groups similarly, and has a crucial impact on information diffusion, consumption activity, and market equilibrium. Previous IGF notions only focus on the direct utility of the item exposures, i.e., the exposure numbers across different item groups. Nevertheless, the item exposures also facilitate utility gained from the neighboring users via social influence, called social utility, such as information sharing on the social media. To fill this gap, this paper introduces two social attribute-aware IGF metrics, which require similar user social attributes on the exposed items across the different item groups. In light of the trade-off between the direct utility and social utility, we formulate a new multi-objective optimization problem for training recommender models with flexible trade-off while ensuring controllable accuracy. To solve this problem, we develop a gradient-based optimization algorithm and theoretically show that the proposed algorithm can find Pareto optimal solutions with varying trade-off and guaranteed accuracy. Extensive experiments on two real-world datasets validate the effectiveness of our approach.", "keywords": "Recommender System;Fairness", "primary_area": "", "supplementary_material": "", "author": "Jinqiu Jin;Haoxuan Li;Fuli Feng;Sihao Ding;Peng Wu;Xiangnan He", "authorids": "~Jinqiu_Jin1;~Haoxuan_Li6;~Fuli_Feng1;~Sihao_Ding2;~Peng_Wu5;~Xiangnan_He1", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/mitao-cat;https://haoxuanli-pku.github.io/;https://fulifeng.github.io/;;https://pengwu.site/;http://staff.ustc.edu.cn/~hexn", "dblp": ";145/4965-1.html;183/9198;https://dblp.uni-trier.de/pid/133/4721-3;15/6146-12;59/1007", "google_scholar": ";gtDqiucAAAAJ;https://scholar.google.com.sg/citations?user=QePM4u8AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.sg/citations?user=X45Go24AAAAJ", "orcid": ";0000-0003-3620-3769;0000-0002-5828-9842;0000-0003-1796-8504;0000-0001-7154-8880;0000-0001-8472-7992", "linkedin": ";;;;;", "or_profile": "~Jinqiu_Jin1;~Haoxuan_Li6;~Fuli_Feng1;~Sihao_Ding2;~Peng_Wu5;~Xiangnan_He1", "aff": "University of Science and Technology of China;Peking University;University of Science and Technology of China;University of Science and Technology of China;Beijing Technology and Business University;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;pku.edu.cn;ustc.edu.cn;ustc.edu.cn;btbu.edu.cn;ustc.edu.cn", "position": "Undergrad student;PhD student;Full Professor;PhD student;Associate Professor;Professor", "bibtex": "@inproceedings{\njin2023fairly,\ntitle={Fairly Recommending with Social Attributes: A Flexible and Controllable Optimization Approach},\nauthor={Jinqiu Jin and Haoxuan Li and Fuli Feng and Sihao Ding and Peng Wu and Xiangnan He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NP5xb00Y6a}\n}", "github": "", "project": "", "reviewers": "pts6;cJkK;ZQVW;PYwN", "pdf_size": 461595, "rating": "2;5;6;7", "confidence": "5;4;3;3", "soundness": "3;2;2;3", "novelty": "2;2;3;3", "presentation": "2;2;2;3", "wc_summary": "72;83;132;107", "wc_strengths": "69;110;110;33", "wc_weaknesses": "81;374;95;33", "wc_questions": "13;71;46;1", "wc_limitations": "1;1;52;6", "wc_review": "236;639;435;180", "wc_reply_reviewers": "43;0;13;17", "wc_reply_authors": "0;436;34;29", "reply_reviewers": "1;0;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 5.0, 1.8708286933869707 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 98.5, 23.11384866265244 ], "wc_strengths_avg": [ 80.5, 32.12864765283469 ], "wc_weaknesses_avg": [ 145.75, 133.7710263846398 ], "wc_questions_avg": [ 32.75, 27.55335732719336 ], "wc_limitations_avg": [ 15.0, 21.459263733874934 ], "wc_review_avg": [ 372.5, 180.70486988457174 ], "wc_reply_reviewers_avg": [ 18.25, 15.610493265749165 ], "wc_reply_authors_avg": [ 124.75, 180.16849752384573 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9669875568304565, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15360898283725287478&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "ustc.edu.cn;pku.edu.cn;ustc.edu.cn;ustc.edu.cn;btbu.edu.cn;ustc.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "University of Science and Technology of China;Peking University;Beijing Technology and Business University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ustc.edu.cn;http://www.pku.edu.cn;http://www.btbu.edu.cn", "aff_unique_abbr": "USTC;Peking U;BTBU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Geometric Analysis of Matrix Sensing over Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71861", "id": "NRnm5xO8Hz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/be7b70477c8fca697f14b1dbb1c086d1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NRnm5xO8Hz", "openreview": "https://openreview.net/forum?id=NRnm5xO8Hz", "poster": "/media/PosterPDFs/NeurIPS%202023/71861.png?t=1701387965.6951494", "slides": "https://nips.cc/virtual/2023/poster/71861", "video": "https://nips.cc/virtual/2023/poster/71861", "author_site": "Haixiang Zhang, Ying Chen, Javad Lavaei", "tldr": "", "abstract": "In this work, we consider the problem of matrix sensing over graphs (MSoG). As a general case of matrix completion and matrix sensing problems, the MSoG problem has not been analyzed in the literature and the existing results cannot be directly applied to the MSoG problem. This work provides the first theoretical results on the optimization landscape of the MSoG problem. More specifically, we propose a new condition, named the $\\Omega$-RIP condition, to characterize the optimization complexity of the problem. In addition, with an improved regularizer of the incoherence, we prove that the strict saddle property holds for the MSoG problem with high probability under the incoherence condition and the $\\Omega$-RIP condition, which guarantees the polynomial-time global convergence of saddle-avoiding methods. Compared with state-of-the-art results, the bounds in this work are tight up to a constant. Besides the theoretical guarantees, we numerically illustrate the close relation between the $\\Omega$-RIP condition and the optimization complexity.", "keywords": "Low-rank matrix optimization;non-convex optimization", "primary_area": "", "supplementary_material": "/attachment/907bf01e3d404ae85d677939f93bff6262757ed9.pdf", "author": "Haixiang Zhang;Ying Chen;Javad Lavaei", "authorids": "~Haixiang_Zhang1;ying-chen@berkeley.edu;~Javad_Lavaei1", "gender": "M;;", "homepage": "https://math.berkeley.edu/~haixiang;;", "dblp": "80/2587;;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Haixiang_Zhang1;ying-chen@berkeley.edu;~Javad_Lavaei1", "aff": "University of California, Berkeley;;", "aff_domain": "berkeley.edu;;", "position": "PhD student;;", "bibtex": "@inproceedings{\nzhang2023geometric,\ntitle={Geometric Analysis of Matrix Sensing over Graphs},\nauthor={Haixiang Zhang and Ying Chen and Javad Lavaei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NRnm5xO8Hz}\n}", "github": "", "project": "", "reviewers": "do7X;S3ob;jSiy;xgrK", "pdf_size": 533750, "rating": "5;6;6;6", "confidence": "3;3;4;3", "soundness": "3;3;2;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "80;38;116;74", "wc_strengths": "30;33;57;69", "wc_weaknesses": "66;42;111;135", "wc_questions": "54;1;53;62", "wc_limitations": "1;1;5;1", "wc_review": "231;115;342;341", "wc_reply_reviewers": "0;0;11;0", "wc_reply_authors": "83;0;83;0", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.0, 27.65863337187866 ], "wc_strengths_avg": [ 47.25, 16.345871038277526 ], "wc_weaknesses_avg": [ 88.5, 36.52738698565776 ], "wc_questions_avg": [ 42.5, 24.212600025606502 ], "wc_limitations_avg": [ 2.0, 1.7320508075688772 ], "wc_review_avg": [ 257.25, 93.70265471159288 ], "wc_reply_reviewers_avg": [ 2.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 41.5, 41.5 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=722849404403914482&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "berkeley.edu;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "First- and Second-Order Bounds for Adversarial Linear Contextual Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71860", "id": "NTSbj2otOA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c2201e444d2b22a10ca50116a522b9a9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NTSbj2otOA", "openreview": "https://openreview.net/forum?id=NTSbj2otOA", "poster": "/media/PosterPDFs/NeurIPS%202023/71860.png?t=1702225792.4304624", "slides": "https://nips.cc/virtual/2023/poster/71860", "video": "https://nips.cc/virtual/2023/poster/71860", "author_site": "Julia Olkhovskaya, Jack Mayo, Tim van Erven, Gergely Neu, Chen-Yu Wei", "tldr": "", "abstract": "We consider the adversarial linear contextual bandit setting, which\nallows for the loss functions associated with each of $K$ arms to change\nover time without restriction. Assuming the $d$-dimensional contexts are\ndrawn from a fixed known distribution, the worst-case expected regret\nover the course of $T$ rounds is known to scale as $\\tilde O(\\sqrt{Kd\nT})$. Under the additional assumption that the density of the contexts\nis log-concave, we obtain a second-order bound of order $\\tilde\nO(K\\sqrt{d V_T})$ in terms of the cumulative second moment of the\nlearner's losses $V_T$, and a closely related first-order bound of order\n$\\tilde O(K\\sqrt{d L_T^*})$ in terms of the cumulative loss of the best\npolicy $L_T^*$. Since $V_T$ or $L_T^*$ may be significantly smaller than\n$T$, these improve over the worst-case regret whenever the environment\nis relatively benign. Our results are obtained using a truncated version\nof the continuous exponential weights algorithm over the probability\nsimplex, which we analyse by exploiting a novel connection to the linear\nbandit setting without contexts.", "keywords": "contextual bandits;bandits;sequential learning;regret bounds", "primary_area": "", "supplementary_material": "/attachment/f4241fe87328b981af2c48a90a5aa58d89911d53.pdf", "author": "Julia Olkhovskaya;Jack Mayo;Tim van Erven;Gergely Neu;Chen-Yu Wei", "authorids": "~Julia_Olkhovskaya1;~Jack_Mayo1;~Tim_van_Erven1;~Gergely_Neu1;~Chen-Yu_Wei1", "gender": "F;M;M;M;M", "homepage": "https://sites.google.com/view/julia-olkhovskaya/home;;http://www.timvanerven.nl;http://cs.bme.hu/~gergo;https://bahh723.github.io/", "dblp": ";;82/1868;83/7606;183/1729", "google_scholar": "https://scholar.google.com/citations?hl=en;Qwhl1AEAAAAJ;https://scholar.google.nl/citations?user=kdxqEMQAAAAJ;https://scholar.google.ch/citations?user=uz27G84AAAAJ;2L2cR-kAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Julia_Olkhovskaya1;~Jack_Mayo1;~Tim_van_Erven1;~Gergely_Neu1;~Chen-Yu_Wei1", "aff": "Vrije Universiteit Amsterdam;University of Amsterdam;University of Amsterdam;Universitat Pompeu Fabra;Massachusetts Institute of Technology", "aff_domain": "vu.nl;uva.nl;uva.nl;upf.edu;mit.edu", "position": "Postdoc;PhD student;Associate Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nolkhovskaya2023first,\ntitle={First- and Second-Order Bounds for Adversarial Linear Contextual Bandits},\nauthor={Julia Olkhovskaya and Jack Mayo and Tim van Erven and Gergely Neu and Chen-Yu Wei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NTSbj2otOA}\n}", "github": "", "project": "", "reviewers": "zjgE;QEEf;pxmm;hPEo", "pdf_size": 420901, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "1;2;3;2", "presentation": "3;3;3;3", "wc_summary": "191;77;120;51", "wc_strengths": "54;79;42;24", "wc_weaknesses": "467;230;107;69", "wc_questions": "10;14;359;55", "wc_limitations": "47;8;1;6", "wc_review": "769;408;629;205", "wc_reply_reviewers": "16;30;17;0", "wc_reply_authors": "5;5;6;4", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 109.75, 52.98761647781489 ], "wc_strengths_avg": [ 49.75, 19.97967717456916 ], "wc_weaknesses_avg": [ 218.25, 155.45638455849922 ], "wc_questions_avg": [ 109.5, 145.1215008191412 ], "wc_limitations_avg": [ 15.5, 18.364367672206956 ], "wc_review_avg": [ 502.75, 214.74447024312408 ], "wc_reply_reviewers_avg": [ 15.75, 10.638961415476606 ], "wc_reply_authors_avg": [ 5.0, 0.7071067811865476 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6987210836455214734&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 10, "email": "vu.nl;uva.nl;uva.nl;upf.edu;mit.edu", "author_num": 5, "aff_unique_index": "0;1;1;2;3", "aff_unique_norm": "Vrije Universiteit Amsterdam;University of Amsterdam;Universitat Pompeu Fabra;Massachusetts Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.vu.nl;https://www.uva.nl;https://www.upf.edu/;https://web.mit.edu", "aff_unique_abbr": "VU Amsterdam;UvA;UPF;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;2", "aff_country_unique": "Netherlands;Spain;United States" }, { "title": "SceneScape: Text-Driven Consistent Scene Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71859", "id": "NU2kGsA4TT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7d62a85ebfed2f680eb5544beae93191-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NU2kGsA4TT", "openreview": "https://openreview.net/forum?id=NU2kGsA4TT", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71859", "video": "https://nips.cc/virtual/2023/poster/71859", "author_site": "Rafail Fridman, Amit Abecasis, Yoni Kasten, Tali Dekel", "tldr": "", "abstract": "We present a method for text-driven perpetual view generation -- synthesizing long-term videos of various scenes solely, given an input text prompt describing the scene and camera poses. We introduce a novel framework that generates such videos in an online fashion by combining the generative power of a pre-trained text-to-image model with the geometric priors learned by a pre-trained monocular depth prediction model. To tackle the pivotal challenge of achieving 3D consistency, i.e., synthesizing videos that depict geometrically-plausible scenes, we deploy an online test-time training to encourage the predicted depth map of the current frame to be geometrically consistent with the synthesized scene. The depth maps are used to construct a \\emph{unified} mesh representation of the scene, which is progressively constructed along the video generation process. In contrast to previous works, which are applicable only to limited domains, our method generates diverse scenes, such as walkthroughs in spaceships, caves, or ice castles.", "keywords": "Computer Vision;Image & Video Editing;Video Generation;Perpetual View Generation;Texture Synthesis & Inpainting", "primary_area": "", "supplementary_material": "/attachment/0418c03ee48cb421f16e51b5dd1b8f8929cb2bec.zip", "author": "Rafail Fridman;Amit Abecasis;Yoni Kasten;Tali Dekel", "authorids": "~Rafail_Fridman1;~Amit_Abecasis1;~Yoni_Kasten1;~Tali_Dekel1", "gender": ";;;F", "homepage": ";;https://ykasten.github.io/;https://www.weizmann.ac.il/math/dekel/home", "dblp": ";;183/6527;", "google_scholar": "qBPARd8AAAAJ;;https://scholar.google.co.il/citations?user=kc4-e8oAAAAJ;https://scholar.google.co.il/citations?user=T0-Wo0EAAAAJ", "orcid": ";;;", "linkedin": "rafail-fridman/;;yoni-kasten-788a87b3;", "or_profile": "~Rafail_Fridman1;~Amit_Abecasis1;~Yoni_Kasten1;~Tali_Dekel1", "aff": "Weizmann Institute of Science;;NVIDIA;Google", "aff_domain": "weizmann.ac.il;;nvidia.com;google.com", "position": "MS student;;Researcher;Researcher", "bibtex": "@inproceedings{\nfridman2023scenescape,\ntitle={SceneScape: Text-Driven Consistent Scene Generation},\nauthor={Rafail Fridman and Amit Abecasis and Yoni Kasten and Tali Dekel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NU2kGsA4TT}\n}", "github": "", "project": "", "reviewers": "Zj8B;GkJU;ooiE;R7aK;9ntp", "pdf_size": 32358017, "rating": "3;4;5;7;8", "confidence": "4;4;4;4;4", "soundness": "2;2;3;4;3", "novelty": "2;2;2;3;4", "presentation": "3;3;4;3;4", "wc_summary": "66;51;105;75;39", "wc_strengths": "147;67;94;115;56", "wc_weaknesses": "621;374;211;128;24", "wc_questions": "126;188;18;88;22", "wc_limitations": "1;7;16;13;10", "wc_review": "961;687;444;419;151", "wc_reply_reviewers": "141;142;32;0;0", "wc_reply_authors": "135;184;0;0;0", "reply_reviewers": "1;2;1;0;0", "reply_authors": "2;2;1;1;1", "rating_avg": [ 5.4, 1.8547236990991407 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 67.2, 22.577865266672134 ], "wc_strengths_avg": [ 95.8, 32.88403868140287 ], "wc_weaknesses_avg": [ 271.6, 208.88523164647137 ], "wc_questions_avg": [ 88.4, 64.34158841682415 ], "wc_limitations_avg": [ 9.4, 5.1613951602255765 ], "wc_review_avg": [ 532.4, 273.4005120697472 ], "wc_reply_reviewers_avg": [ 63.0, 65.15212966588275 ], "wc_reply_authors_avg": [ 63.8, 79.66027868392126 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 99, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1240105595595616019&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 6, "email": "weizmann.ac.il;;nvidia.com;google.com", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Weizmann Institute of Science;NVIDIA;Google", "aff_unique_dep": ";NVIDIA Corporation;Google", "aff_unique_url": "https://www.weizmann.org.il;https://www.nvidia.com;https://www.google.com", "aff_unique_abbr": "Weizmann;NVIDIA;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Israel;United States" }, { "title": "Prefix-Tree Decoding for Predicting Mass Spectra from Molecules", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71858", "id": "NWEbeI2HNQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/97d596ca21d0751ba2c633bad696cf7f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NWEbeI2HNQ", "openreview": "https://openreview.net/forum?id=NWEbeI2HNQ", "poster": "/media/PosterPDFs/NeurIPS%202023/71858.png?t=1698071210.084188", "slides": "https://nips.cc/virtual/2023/poster/71858", "video": "https://nips.cc/virtual/2023/poster/71858", "author_site": "Samuel Goldman, John Bradshaw, Jiayi Xin, Connor Coley", "tldr": "", "abstract": "Computational predictions of mass spectra from molecules have enabled the discovery of clinically relevant metabolites. However, such predictive tools are still limited as they occupy one of two extremes, either operating (a) by fragmenting molecules combinatorially with overly rigid constraints on potential rearrangements and poor time complexity or (b) by decoding lossy and nonphysical discretized spectra vectors. In this work, we use a new intermediate strategy for predicting mass spectra from molecules by treating mass spectra as sets of molecular formulae, which are themselves multisets of atoms. After first encoding an input molecular graph, we decode a set of molecular subformulae, each of which specify a predicted peak in the mass spectrum, the intensities of which are predicted by a second model. Our key insight is to overcome the combinatorial possibilities for molecular subformulae by decoding the formula set using a prefix tree structure, atom-type by atom-type, representing a general method for ordered multiset decoding. We show promising empirical results on mass spectra prediction tasks.", "keywords": "molecules;prefix tree;mass spectra;mass spectrum prediction;metabolomics;GNNs;chemistry;biology", "primary_area": "", "supplementary_material": "/attachment/b62b754ce7c54890e4b459685a4fd0c5acd30b4e.pdf", "author": "Samuel Goldman;John Bradshaw;Jiayi Xin;Connor W. Coley", "authorids": "~Samuel_Goldman1;~John_Bradshaw1;~Jiayi_Xin1;~Connor_W._Coley1", "gender": "M;;F;M", "homepage": "https://samgoldman97.github.io/;https://john-bradshaw.com/;;https://coley.mit.edu", "dblp": "267/5461.html;58/1303;128/0543;206/6284", "google_scholar": "bOEEpOAAAAAJ;CnPDIr4AAAAJ;ltQ26LQAAAAJ;l015S80AAAAJ", "orcid": "0000-0002-3928-6873;;0000-0003-3693-3809;0000-0002-8271-8723", "linkedin": "samuel-goldman-918697105/;bradshaw-john/;jiayi-xin-978511334/;", "or_profile": "~Samuel_Goldman1;~John_Bradshaw1;~Jiayi_Xin1;~Connor_Coley1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;University of Hong Kong;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;hku.hk;mit.edu", "position": "PhD student;Postdoc;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\ngoldman2023prefixtree,\ntitle={Prefix-Tree Decoding for Predicting Mass Spectra from Molecules},\nauthor={Samuel Goldman and John Bradshaw and Jiayi Xin and Connor W. Coley},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NWEbeI2HNQ}\n}", "github": "", "project": "", "reviewers": "188s;ZFnW;8EJR;sZMH", "pdf_size": 1226939, "rating": "7;7;8;8", "confidence": "5;5;4;4", "soundness": "4;3;4;3", "novelty": "3;2;4;4", "presentation": "4;3;4;3", "wc_summary": "180;50;76;252", "wc_strengths": "114;79;44;279", "wc_weaknesses": "309;242;454;349", "wc_questions": "77;201;31;174", "wc_limitations": "28;65;1;99", "wc_review": "708;637;606;1153", "wc_reply_reviewers": "29;83;26;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 139.5, 81.14647250497092 ], "wc_strengths_avg": [ 129.0, 90.06941767325911 ], "wc_weaknesses_avg": [ 338.5, 76.86514164431104 ], "wc_questions_avg": [ 120.75, 69.36272413912245 ], "wc_limitations_avg": [ 48.25, 37.076778446893144 ], "wc_review_avg": [ 776.0, 220.7793015660662 ], "wc_reply_reviewers_avg": [ 34.5, 30.18691769624716 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3644837882610890185&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "mit.edu;mit.edu;hku.hk;mit.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.hku.hk", "aff_unique_abbr": "MIT;HKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Moment Matching Denoising Gibbs Sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71857", "id": "NWrN6cMG2x", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4a4a3c197deac042461c677219efd36c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NWrN6cMG2x", "openreview": "https://openreview.net/forum?id=NWrN6cMG2x", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71857", "video": "https://nips.cc/virtual/2023/poster/71857", "author_site": "Mingtian Zhang, Alex Hawkins-Hooker, Brooks Paige, David Barber", "tldr": "", "abstract": "Energy-Based Models (EBMs) offer a versatile framework for modelling complex data distributions. However, training and sampling from EBMs continue to pose significant challenges. The widely-used Denoising Score Matching (DSM) method for scalable EBM training suffers from inconsistency issues, causing the energy model to learn a noisy data distribution. In this work, we propose an efficient sampling framework: (pseudo)-Gibbs sampling with moment matching, which enables effective sampling from the underlying clean model when given a noisy model that has been well-trained via DSM. We explore the benefits of our approach compared to related methods and demonstrate how to scale the method to high-dimensional datasets.", "keywords": "denoising score-matching;gibbs sampling;diffusion model", "primary_area": "", "supplementary_material": "/attachment/141f4b39e13289a5b640c173d5d16ce785ea3ff4.pdf", "author": "Mingtian Zhang;Alex Hawkins-Hooker;Brooks Paige;David Barber", "authorids": "~Mingtian_Zhang1;~Alex_Hawkins-Hooker1;~Brooks_Paige1;~David_Barber2", "gender": "M;;M;M", "homepage": "http://tomo.wiki;;https://tbrx.github.io;http://www.cs.ucl.ac.uk/staff/D.Barber/", "dblp": "230/8340;;https://dblp.uni-trier.de/pers/p/Paige:Brooks;", "google_scholar": ";k-wS8fsAAAAJ;JrFJmx0AAAAJ;https://scholar.google.com.tw/citations?user=Nej1FcgAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Mingtian_Zhang1;~Alex_Hawkins-Hooker1;~Brooks_Paige1;~David_Barber1", "aff": "University College London;University College London, University of London;University College London;University College London", "aff_domain": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;", "position": "PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023moment,\ntitle={Moment Matching Denoising Gibbs Sampling},\nauthor={Mingtian Zhang and Alex Hawkins-Hooker and Brooks Paige and David Barber},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NWrN6cMG2x}\n}", "github": "", "project": "", "reviewers": "afYY;DnqP;oYaT;1SZs;ddyX", "pdf_size": 5097431, "rating": "5;5;5;6;6", "confidence": "1;4;3;3;3", "soundness": "3;3;3;3;3", "novelty": "3;2;3;2;2", "presentation": "3;3;2;3;2", "wc_summary": "55;121;26;126;96", "wc_strengths": "25;9;43;46;72", "wc_weaknesses": "7;66;130;178;74", "wc_questions": "1;21;72;26;5", "wc_limitations": "1;1;1;45;24", "wc_review": "89;218;272;421;271", "wc_reply_reviewers": "0;38;13;0;69", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 84.8, 38.67505656104461 ], "wc_strengths_avg": [ 39.0, 21.213203435596427 ], "wc_weaknesses_avg": [ 91.0, 58.412327466040935 ], "wc_questions_avg": [ 25.0, 25.306125740618615 ], "wc_limitations_avg": [ 14.4, 17.704236781064584 ], "wc_review_avg": [ 254.2, 106.7921345418285 ], "wc_reply_reviewers_avg": [ 24.0, 26.43482551483932 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.16666666666666666, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4065295139813470505&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University College London", "aff_unique_dep": "", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Intra-Modal Proxy Learning for Zero-Shot Visual Categorization with CLIP", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71856", "id": "NXLjaYdgaL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/50a057e9fe79ffa3f4120fb6fb88071a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NXLjaYdgaL", "openreview": "https://openreview.net/forum?id=NXLjaYdgaL", "poster": "/media/PosterPDFs/NeurIPS%202023/71856.png?t=1701457396.2090225", "slides": "https://nips.cc/virtual/2023/poster/71856", "video": "https://nips.cc/virtual/2023/poster/71856", "author_site": "Qi Qian, Yuanhong Xu, Juhua Hu", "tldr": "", "abstract": "Vision-language pre-training methods, e.g., CLIP, demonstrate an impressive zero-shot performance on visual categorizations with the class proxy from the text embedding of the class name. However, the modality gap between the text and vision space can result in a sub-optimal performance. We theoretically show that the gap cannot be reduced sufficiently by minimizing the contrastive loss in CLIP and the optimal proxy for vision tasks may reside only in the vision space. Therefore, given unlabeled target vision data, we propose to learn the vision proxy directly with the help from the text proxy for zero-shot transfer. Moreover, according to our theoretical analysis, strategies are developed to further refine the pseudo label obtained by the text proxy to facilitate the intra-modal proxy learning (InMaP) for vision. Experiments on extensive downstream tasks confirm the effectiveness and efficiency of our proposal. Concretely, InMaP can obtain the vision proxy within one minute on a single GPU while improving the zero-shot accuracy from $77.02\\%$ to $80.21\\%$ on ImageNet with ViT-L/14@336 pre-trained by CLIP.", "keywords": "zero-shot; clip; proxy learning", "primary_area": "", "supplementary_material": "", "author": "Qi Qian;Yuanhong Xu;Juhua Hu", "authorids": "~Qi_Qian1;~Yuanhong_Xu1;~Juhua_Hu1", "gender": ";M;F", "homepage": "http://qi-qian.com;;http://faculty.washington.edu/juhuah/", "dblp": "05/2084-1;223/4687;147/2228", "google_scholar": "Rp_40_gAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";%E6%B8%8A%E9%B8%BF-%E5%BE%90-37a542113/;", "or_profile": "~Qi_Qian1;~Yuanhong_Xu1;~Juhua_Hu1", "aff": "Alibaba Group;Alibaba Group;University of Washington", "aff_domain": "alibaba-inc.com;alibaba-inc.com;uw.edu", "position": "Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nqian2023intramodal,\ntitle={Intra-Modal Proxy Learning for Zero-Shot Visual Categorization with {CLIP}},\nauthor={Qi Qian and Yuanhong Xu and Juhua Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NXLjaYdgaL}\n}", "github": "", "project": "", "reviewers": "DJum;fSSP;eKBJ;dkv6", "pdf_size": 324596, "rating": "5;5;6;7", "confidence": "3;3;2;4", "soundness": "3;3;4;4", "novelty": "2;2;3;4", "presentation": "2;2;3;4", "wc_summary": "68;78;154;38", "wc_strengths": "24;49;78;53", "wc_weaknesses": "143;79;8;10", "wc_questions": "4;61;3;53", "wc_limitations": "5;25;10;18", "wc_review": "244;292;253;172", "wc_reply_reviewers": "16;186;0;0", "wc_reply_authors": "0;324;0;0", "reply_reviewers": "1;2;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 84.5, 42.74049601958311 ], "wc_strengths_avg": [ 51.0, 19.144189719076646 ], "wc_weaknesses_avg": [ 60.0, 55.79874550561151 ], "wc_questions_avg": [ 30.25, 26.901440481877547 ], "wc_limitations_avg": [ 14.5, 7.632168761236874 ], "wc_review_avg": [ 240.25, 43.338060639581 ], "wc_reply_reviewers_avg": [ 50.5, 78.50318464877715 ], "wc_reply_authors_avg": [ 81.0, 140.29611541307906 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15330915144451204534&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "alibaba-inc.com;alibaba-inc.com;uw.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Alibaba Group;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.washington.edu", "aff_unique_abbr": "Alibaba;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "Disambiguated Attention Embedding for Multi-Instance Partial-Label Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71855", "id": "NYwbmCrrni", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b1917a4bcfab403c3cdd6c6bbaf9fda0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NYwbmCrrni", "openreview": "https://openreview.net/forum?id=NYwbmCrrni", "poster": "/media/PosterPDFs/NeurIPS%202023/71855.png?t=1699705358.5710697", "slides": "https://nips.cc/virtual/2023/poster/71855", "video": "https://nips.cc/virtual/2023/poster/71855", "author_site": "Wei Tang, Weijia Zhang, Min-Ling Zhang", "tldr": "", "abstract": "In many real-world tasks, the concerned objects can be represented as a multi-instance bag associated with a candidate label set, which consists of one ground-truth label and several false positive labels. Multi-instance partial-label learning (MIPL) is a learning paradigm to deal with such tasks and has achieved favorable performances. Existing MIPL approach follows the instance-space paradigm by assigning augmented candidate label sets of bags to each instance and aggregating bag-level labels from instance-level labels. However, this scheme may be suboptimal as global bag-level information is ignored and the predicted labels of bags are sensitive to predictions of negative instances. In this paper, we study an alternative scheme where a multi-instance bag is embedded into a single vector representation. Accordingly, an intuitive algorithm named DEMIPL, i.e., Disambiguated attention Embedding for Multi-Instance Partial-Label learning, is proposed. DEMIPL employs a disambiguation attention mechanism to aggregate a multi-instance bag into a single vector representation, followed by a momentum-based disambiguation strategy to identify the ground-truth label from the candidate label set. Furthermore, we introduce a real-world MIPL dataset for colorectal cancer classification. Experimental results on benchmark and real-world datasets validate the superiority of DEMIPL against the compared MIPL and partial-label learning approaches.", "keywords": "Machine Learning;Multi-Instance Partial-Label Learning;Multi-Instance Learning;Partial-Label Learning", "primary_area": "", "supplementary_material": "", "author": "Wei Tang;Weijia Zhang;Min-Ling Zhang", "authorids": "~Wei_Tang16;~Weijia_Zhang2;~Min-Ling_Zhang2", "gender": ";;M", "homepage": "https://tangw-seu.github.io/;https://www.weijiazhangxh.com/;http://palm.seu.edu.cn/zhangml/", "dblp": "58/1874-17;158/5387-1;84/271.html", "google_scholar": "BLvzAjgAAAAJ;https://scholar.google.com.au/citations?user=7jmAPvAAAAAJ;uFHCIM0AAAAJ", "orcid": "0000-0001-9080-9281;0000-0001-8103-5325;0000-0003-1880-5918", "linkedin": ";weijia-zhang-86152337/;", "or_profile": "~Wei_Tang16;~Weijia_Zhang2;~Min-Ling_Zhang2", "aff": "Southeast University;Southeast University;Southeast University", "aff_domain": "seu.edu.cn;seu.edu.cn;seu.edu.cn", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ntang2023disambiguated,\ntitle={Disambiguated Attention Embedding for Multi-Instance Partial-Label Learning},\nauthor={Wei Tang and Weijia Zhang and Min-Ling Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NYwbmCrrni}\n}", "github": "", "project": "", "reviewers": "7xX7;pq8x;suFM;EhJn;hzaM", "pdf_size": 403914, "rating": "5;5;7;7;8", "confidence": "3;3;3;4;4", "soundness": "2;3;3;3;3", "novelty": "2;2;3;2;4", "presentation": "3;3;3;3;4", "wc_summary": "114;121;64;71;153", "wc_strengths": "19;47;58;107;127", "wc_weaknesses": "117;60;63;180;21", "wc_questions": "175;16;38;3;109", "wc_limitations": "1;3;3;3;8", "wc_review": "426;247;226;364;418", "wc_reply_reviewers": "0;142;16;12;14", "wc_reply_authors": "0;65;18;6;7", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;2;2;2", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 104.6, 33.097431924546655 ], "wc_strengths_avg": [ 71.6, 39.69685126052191 ], "wc_weaknesses_avg": [ 88.2, 55.14127310826256 ], "wc_questions_avg": [ 68.2, 64.74380279223642 ], "wc_limitations_avg": [ 3.6, 2.3323807579381204 ], "wc_review_avg": [ 336.2, 84.41421681209866 ], "wc_reply_reviewers_avg": [ 36.8, 52.893855975907066 ], "wc_reply_authors_avg": [ 19.2, 23.625410049351522 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.748455199183749, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9857106407337198684&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "email": "seu.edu.cn;seu.edu.cn;seu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Southeast University", "aff_unique_dep": "", "aff_unique_url": "https://www.seu.edu.cn/", "aff_unique_abbr": "SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Geometric Neural Diffusion Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71854", "id": "NaYAsbv2jF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a797c2d2e0c1fdabf4d1ab8cd0b465c6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NaYAsbv2jF", "openreview": "https://openreview.net/forum?id=NaYAsbv2jF", "poster": "/media/PosterPDFs/NeurIPS%202023/71854.png?t=1701706433.13777", "slides": "https://nips.cc/virtual/2023/poster/71854", "video": "https://nips.cc/virtual/2023/poster/71854", "author_site": "Emile Mathieu, Vincent Dutordoir, Michael Hutchinson, Valentin De Bortoli, Yee Whye Teh, Richard Turner", "tldr": "", "abstract": "Denoising diffusion models have proven to be a flexible and effective paradigm for generative modelling.\nTheir recent extension to infinite dimensional Euclidean spaces has allowed for the modelling of stochastic processes.\nHowever, many problems in the natural sciences incorporate symmetries and involve data living in non-Euclidean spaces.\nIn this work, we extend the framework of diffusion models to incorporate a series of geometric priors in infinite-dimension modelling.\nWe do so by a) constructing a noising process which admits, as limiting distribution, a geometric Gaussian process that transforms under the symmetry group of interest, and b) approximating the score with a neural network that is equivariant w.r.t. this group.\nWe show that with these conditions, the generative functional model admits the same symmetry.\nWe demonstrate scalability and capacity of the model, using a novel Langevin-based conditional sampler, to fit complex scalar and vector fields, with Euclidean and spherical codomain, on synthetic and real-world weather data.", "keywords": "diffusion model;functional space;stochastic process;time-series;neural processes;Gaussian processes;random fields;invariance;equivariance;symmetries;stationarity", "primary_area": "", "supplementary_material": "/attachment/8c2a1df126084f6660c2f01269884b38a4fe86f7.pdf", "author": "Emile Mathieu;Vincent Dutordoir;Michael John Hutchinson;Valentin De Bortoli;Yee Whye Teh;Richard E Turner", "authorids": "~Emile_Mathieu1;~Vincent_Dutordoir1;~Michael_John_Hutchinson1;~Valentin_De_Bortoli1;~Yee_Whye_Teh2;~Richard_E_Turner1", "gender": "M;M;M;;M;M", "homepage": "http://emilemathieu.fr;;https://mjhutchinson.github.io;https://vdeborto.github.io/;https://rich-turner-group.github.io/;http://csml.stats.ox.ac.uk/people/teh/", "dblp": "223/6084.html;212/5487;352/6313.html;224/9338;40/5352;88/2483", "google_scholar": "g9BjTqgAAAAJ;;ot1m2GUAAAAJ;;https://scholar.google.co.uk/citations?user=DgLEyZgAAAAJ;https://scholar.google.co.uk/citations?user=y-nUzMwAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Emile_Mathieu1;~Vincent_Dutordoir1;~Michael_John_Hutchinson1;~Valentin_De_Bortoli1;~Richard_E_Turner1;~Yee_Whye_Teh1", "aff": "University of Cambridge;University of Cambridge;University of Oxford;University of Oxford;Microsoft Research;University of Oxford", "aff_domain": "cam.ac.uk;cam.ac.uk;ox.ac.uk;ox.ac.uk;research.microsoft.com;ox.ac.uk", "position": "Postdoc;PhD student;PhD student;Postdoc;Researcher;Full Professor", "bibtex": "@inproceedings{\nmathieu2023geometric,\ntitle={Geometric Neural Diffusion Processes},\nauthor={Emile Mathieu and Vincent Dutordoir and Michael John Hutchinson and Valentin De Bortoli and Yee Whye Teh and Richard E Turner},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NaYAsbv2jF}\n}", "github": "", "project": "", "reviewers": "fmuk;uLvV;zVLR;NoKK", "pdf_size": 2348428, "rating": "6;6;7;7", "confidence": "4;3;3;4", "soundness": "3;2;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "51;142;190;84", "wc_strengths": "50;104;45;111", "wc_weaknesses": "79;986;26;109", "wc_questions": "1;143;58;106", "wc_limitations": "1;40;1;8", "wc_review": "182;1415;320;418", "wc_reply_reviewers": "0;65;11;19", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 116.75, 53.38246434925986 ], "wc_strengths_avg": [ 77.5, 30.153772566629204 ], "wc_weaknesses_avg": [ 300.0, 397.1756538359319 ], "wc_questions_avg": [ 77.0, 53.23063027994314 ], "wc_limitations_avg": [ 12.5, 16.132265804901678 ], "wc_review_avg": [ 583.75, 487.19009380322996 ], "wc_reply_reviewers_avg": [ 23.75, 24.752525123712125 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14888687461111131430&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cam.ac.uk;cam.ac.uk;ox.ac.uk;ox.ac.uk;research.microsoft.com;ox.ac.uk", "author_num": 6, "aff_unique_index": "0;0;1;1;2;1", "aff_unique_norm": "University of Cambridge;University of Oxford;Microsoft", "aff_unique_dep": ";;Microsoft Research", "aff_unique_url": "https://www.cam.ac.uk;https://www.ox.ac.uk;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Cambridge;Oxford;MSR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Provably Robust Temporal Difference Learning for Heavy-Tailed Rewards", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71853", "id": "NapL36HSBT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/516ca2e9e7bffbb4027a25d9f8838bc9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NapL36HSBT", "openreview": "https://openreview.net/forum?id=NapL36HSBT", "poster": "/media/PosterPDFs/NeurIPS%202023/71853.png?t=1702059986.2035277", "slides": "https://nips.cc/virtual/2023/poster/71853", "video": "https://nips.cc/virtual/2023/poster/71853", "author_site": "Semih Cayci, Atilla Eryilmaz", "tldr": "", "abstract": "In a broad class of reinforcement learning applications, stochastic rewards have heavy-tailed distributions, which lead to infinite second-order moments for stochastic (semi)gradients in policy evaluation and direct policy optimization. In such instances, the existing RL methods may fail miserably due to frequent statistical outliers. In this work, we establish that temporal difference (TD) learning with a dynamic gradient clipping mechanism, and correspondingly operated natural actor-critic (NAC), can be provably robustified against heavy-tailed reward distributions. It is shown in the framework of linear function approximation that a favorable tradeoff between bias and variability of the stochastic gradients can be achieved with this dynamic gradient clipping mechanism. In particular, we prove that robust versions of TD learning achieve sample complexities of order $\\mathcal{O}(\\varepsilon^{-\\frac{1}{p}})$ and $\\mathcal{O}(\\varepsilon^{-1-\\frac{1}{p}})$ with and without the full-rank assumption on the feature matrix, respectively, under heavy-tailed rewards with finite moments of order $(1+p)$ for some $p\\in(0,1]$, both in expectation and with high probability. We show that a robust variant of NAC based on Robust TD learning achieves $\\tilde{\\mathcal{O}}(\\varepsilon^{-4-\\frac{2}{p}})$ sample complexity. We corroborate our theoretical results with numerical experiments.", "keywords": "temporal difference learning;natural actor-critic;reinforcement learning;policy evaluation;policy gradient;markov decision processes", "primary_area": "", "supplementary_material": "/attachment/ba8cac9f32a490c5d8488f74e78be53e0f1ae7ec.pdf", "author": "Semih Cayci;Atilla Eryilmaz", "authorids": "~Semih_Cayci1;~Atilla_Eryilmaz1", "gender": "M;M", "homepage": "https://www.semihcayci.com;https://www.atillaeryilmaz.com", "dblp": ";56/5751", "google_scholar": "pt_pXpcAAAAJ;kVX9Zc4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Semih_Cayci1;~Atilla_Eryilmaz1", "aff": "Rheinisch Westf\u00e4lische Technische Hochschule Aachen;Ohio State University", "aff_domain": "rwth-aachen.de;osu.edu", "position": "Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ncayci2023provably,\ntitle={Provably Robust Temporal Difference Learning for Heavy-Tailed Rewards},\nauthor={Semih Cayci and Atilla Eryilmaz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NapL36HSBT}\n}", "github": "", "project": "", "reviewers": "GiX9;z9uh;pbb6;jt3m;db5F;ZkyR", "pdf_size": 932606, "rating": "4;5;5;6;7;7", "confidence": "2;2;3;3;2;3", "soundness": "2;3;3;3;3;3", "novelty": "2;2;2;3;3;2", "presentation": "2;2;3;3;3;4", "wc_summary": "31;68;39;26;68;166", "wc_strengths": "12;24;36;57;118;90", "wc_weaknesses": "47;141;159;60;84;206", "wc_questions": "2;40;180;42;99;55", "wc_limitations": "1;25;7;1;14;29", "wc_review": "93;298;421;186;383;546", "wc_reply_reviewers": "113;11;227;20;16;253", "wc_reply_authors": "190;12;1179;12;24;277", "reply_reviewers": "1;1;2;1;1;2", "reply_authors": "2;2;3;2;2;2", "rating_avg": [ 5.666666666666667, 1.1055415967851332 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.8333333333333335, 0.6871842709362768 ], "wc_summary_avg": [ 66.33333333333333, 47.541794478355804 ], "wc_strengths_avg": [ 56.166666666666664, 37.32924084711907 ], "wc_weaknesses_avg": [ 116.16666666666667, 57.00121831056674 ], "wc_questions_avg": [ 69.66666666666667, 56.96977756280566 ], "wc_limitations_avg": [ 12.833333333333334, 10.991158062531698 ], "wc_review_avg": [ 321.1666666666667, 150.11486342871655 ], "wc_reply_reviewers_avg": [ 106.66666666666667, 100.67880721493586 ], "wc_reply_authors_avg": [ 282.3333333333333, 413.4128955683679 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.1666666666666665, 0.3726779962499649 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17181027285544742307&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "rwth-aachen.de;osu.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "RWTH Aachen University;Ohio State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.rwth-aachen.de;https://www.osu.edu", "aff_unique_abbr": "RWTH;OSU", "aff_campus_unique_index": "0", "aff_campus_unique": "Aachen;", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;United States" }, { "id": "NbkjMn7X8H", "title": "Robust Nonparametric Regression under Poisoning Attack", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper studies robust nonparametric regression, in which an adversarial attacker can modify the values of up to $q$ samples from a training dataset of size $N$. Our initial solution is an M-estimator based on Huber loss minimization. Compared with simple kernel regression, i.e. the Nadaraya-Watson estimator, this method can significantly weaken the impact of malicious samples on the regression performance. We provide the convergence rate as well as the corresponding minimax lower bound. The result shows that, with proper bandwidth selection, $\\ell_\\infty$ error is minimax optimal. The $\\ell_2$ error is optimal if $q\\lesssim \\sqrt{N/\\ln^2 N}$, but is suboptimal with larger $q$. The reason is that this estimator is vulnerable if there are many attacked samples concentrating in a small region. To address this issue, we propose a correction method by projecting the initial estimate to the space of Lipschitz functions. The final estimate is nearly minimax optimal for arbitrary $q$, up to a $\\ln N$ factor.", "keywords": "Poisoning attack;adversarial machine learning", "primary_area": "", "supplementary_material": "/attachment/a711cabf212491754516636757f322e6ee672331.zip", "author": "Puning Zhao;Zhiguo Wan", "authorids": "~Puning_Zhao1;wanzhiguo@zhejianglab.com", "gender": "M;", "homepage": "https://scst.sysu.edu.cn/members/members01/1417942.htm;", "dblp": "216/2680;", "google_scholar": "1jc7kasAAAAJ;", "orcid": "0009-0002-3264-3417;", "linkedin": ";", "or_profile": "~Puning_Zhao1;wanzhiguo@zhejianglab.com", "aff": "Zhejiang Lab;", "aff_domain": "zhejianglab.com;", "position": "Researcher;", "bibtex": "@misc{\nzhao2023robust,\ntitle={Robust Nonparametric Regression under Poisoning Attack},\nauthor={Puning Zhao and Zhiguo Wan},\nyear={2023},\nurl={https://openreview.net/forum?id=NbkjMn7X8H}\n}", "github": "", "project": "", "reviewers": "i98Z;pyjJ;QLoZ;yg2j", "site": "https://openreview.net/forum?id=NbkjMn7X8H", "pdf_size": 878103, "rating": "5;5;6;6", "confidence": "3;3;3;2", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "2;2;3;3", "wc_summary": "126;157;195;24", "wc_strengths": "80;17;53;18", "wc_weaknesses": "189;418;56;35", "wc_questions": "44;63;59;9", "wc_limitations": "12;1;10;4", "wc_review": "451;656;373;90", "wc_reply_reviewers": "220;240;0;11", "wc_reply_authors": "589;309;0;0", "reply_reviewers": "2;2;0;1", "reply_authors": "3;3;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 125.5, 63.49212549600147 ], "wc_strengths_avg": [ 42.0, 26.296387584609413 ], "wc_weaknesses_avg": [ 174.5, 152.48360567615129 ], "wc_questions_avg": [ 43.75, 21.276454121869087 ], "wc_limitations_avg": [ 6.75, 4.437059837324712 ], "wc_review_avg": [ 392.5, 202.94149403214712 ], "wc_reply_reviewers_avg": [ 117.75, 112.5397152120086 ], "wc_reply_authors_avg": [ 224.5, 245.35739238914323 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13517056404023305206&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "aff_unique_index": "0", "aff_unique_norm": "Zhejiang Lab", "aff_unique_dep": "", "aff_unique_url": "http://www.zhejianglab.com", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Minimum Description Length and Generalization Guarantees for Representation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71852", "id": "Ncb0MvVqRV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/054e9f9a286671ababa3213d6e59c1c2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ncb0MvVqRV", "openreview": "https://openreview.net/forum?id=Ncb0MvVqRV", "poster": "/media/PosterPDFs/NeurIPS%202023/71852.png?t=1701951910.4853387", "slides": "https://nips.cc/virtual/2023/poster/71852", "video": "https://nips.cc/virtual/2023/poster/71852", "author_site": "Milad Sefidgaran, Abdellatif Zaidi, Piotr Krasnowski", "tldr": "", "abstract": "A major challenge in designing efficient statistical supervised learning algorithms is finding representations that perform well not only on available training samples but also on unseen data. While the study of representation learning has spurred much interest, most existing such approaches are heuristic; and very little is known about theoretical generalization guarantees. For example, the information bottleneck method seeks a good generalization by finding a minimal description of the input that is maximally informative about the label variable, where minimality and informativeness are both measured by Shannon\u2019s mutual information. \n\nIn this paper, we establish a compressibility framework that allows us to derive upper bounds on the generalization error of a representation learning algorithm in terms of the ``Minimum Description Length'' (MDL) of the labels or the latent variables (representations). Rather than the mutual information between the encoder\u2019s input and the representation, which is often believed to reflect the algorithm\u2019s generalization capability in the related literature but in fact, falls short of doing so, our new bounds involve the \"multi-letter\" relative entropy between the distribution of the representations (or labels) of the training and test sets and a fixed prior. In particular, these new bounds reflect the structure of the encoder and are not vacuous for deterministic algorithms. Our compressibility approach, which is information-theoretic in nature, builds upon that of Blum-Langford for PAC-MDL bounds and introduces two essential ingredients: block-coding and lossy-compression. The latter allows our approach to subsume the so-called geometrical compressibility as a special case. To the best knowledge of the authors, the established generalization bounds are the first of their kind for Information Bottleneck type encoders and representation learning. Finally, we partly exploit the theoretical results by introducing a new data-dependent prior. Numerical simulations illustrate the advantages of well-chosen such priors over classical priors used in IB.", "keywords": "Information Bottleneck;Representation Learning;Generalization Error;Minimum Description Length", "primary_area": "", "supplementary_material": "/attachment/cdfce50a7967b21a4fc876a3eaf6549aaf874d0e.pdf", "author": "Milad Sefidgaran;Abdellatif Zaidi;Piotr Krasnowski", "authorids": "~Milad_Sefidgaran1;~Abdellatif_Zaidi1;~Piotr_Krasnowski1", "gender": "M;M;", "homepage": ";http://www-syscom.univ-mlv.fr/~zaidi/;", "dblp": "56/9885.html;07/3113;", "google_scholar": "https://scholar.google.com/citations?hl=en;;", "orcid": ";;0000-0002-4564-7946", "linkedin": "milad-sefidgaran;;", "or_profile": "~Milad_Sefidgaran1;~Abdellatif_Zaidi1;~Piotr_Krasnowski1", "aff": "Huawei Technologies Ltd. (Pairs Resaerch Center);Universit\u00e9 Gustave Eiffel;Huawei Technologies Ltd.", "aff_domain": "huawei.com;univ-eiffel.fr;huawei.com", "position": "Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nsefidgaran2023minimum,\ntitle={Minimum Description Length and Generalization Guarantees for Representation Learning},\nauthor={Milad Sefidgaran and Abdellatif Zaidi and Piotr Krasnowski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ncb0MvVqRV}\n}", "github": "", "project": "", "reviewers": "2eZy;Lcin;D6jh;FoJr", "pdf_size": 1036036, "rating": "6;7;7;7", "confidence": "4;4;2;3", "soundness": "3;3;4;4", "novelty": "2;3;4;3", "presentation": "2;3;3;3", "wc_summary": "283;200;111;123", "wc_strengths": "130;59;26;138", "wc_weaknesses": "1074;29;180;55", "wc_questions": "585;490;69;52", "wc_limitations": "5;13;2;12", "wc_review": "2077;791;388;380", "wc_reply_reviewers": "30;27;13;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 179.25, 68.95061638593234 ], "wc_strengths_avg": [ 88.25, 47.2989164780759 ], "wc_weaknesses_avg": [ 334.5, 430.7496372604393 ], "wc_questions_avg": [ 299.0, 240.92841260424225 ], "wc_limitations_avg": [ 8.0, 4.636809247747852 ], "wc_review_avg": [ 909.0, 694.51961815344 ], "wc_reply_reviewers_avg": [ 17.5, 11.968709203585824 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17585580125497530670&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "huawei.com;univ-eiffel.fr;huawei.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Huawei;Universit\u00e9 Gustave Eiffel", "aff_unique_dep": "Huawei Technologies Ltd.;", "aff_unique_url": "https://www.huawei.com;https://www.univ-gustave-eiffel.fr", "aff_unique_abbr": "Huawei;UGE", "aff_campus_unique_index": "0", "aff_campus_unique": "Pairs Resaerch Center;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;France" }, { "title": "Reward-agnostic Fine-tuning: Provable Statistical Benefits of Hybrid Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71851", "id": "Nd3FennRJZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ade04fd4f26263f86b47ffb535c4cafb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Nd3FennRJZ", "openreview": "https://openreview.net/forum?id=Nd3FennRJZ", "poster": "/media/PosterPDFs/NeurIPS%202023/71851.png?t=1701889575.7487514", "slides": "https://nips.cc/virtual/2023/poster/71851", "video": "https://nips.cc/virtual/2023/poster/71851", "author_site": "Gen Li, Wenhao Zhan, Jason Lee, Yuejie Chi, Yuxin Chen", "tldr": "", "abstract": "This paper studies tabular reinforcement learning (RL) in the hybrid setting, which assumes access to both an offline dataset and online interactions with the unknown environment. A central question boils down to how to efficiently utilize online data to strengthen and complement the offline dataset and enable effective policy fine-tuning. Leveraging recent advances in reward-agnostic exploration and offline RL, we design a three-stage hybrid RL algorithm that beats the best of both worlds --- pure offline RL and pure online RL --- in terms of sample complexities. The proposed algorithm does not require any reward information during data collection. Our theory is developed based on a new notion called **single-policy partial concentrability**, which captures the trade-off between distribution mismatch and miscoverage and guides the interplay between offline and online data.", "keywords": "reward-agnostic reinforcement learning;policy finetuning;offline reinforcement learning;online reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Gen Li;Wenhao Zhan;Jason D. Lee;Yuejie Chi;Yuxin Chen", "authorids": "~Gen_Li2;~Wenhao_Zhan1;~Jason_D._Lee1;~Yuejie_Chi1;~Yuxin_Chen5", "gender": "M;M;M;;M", "homepage": ";;https://jasondlee88.github.io/;;https://yuxinchen2020.github.io/", "dblp": "28/538-5.html;275/3558;88/3262;;11/5123-2", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;GR_DsT0AAAAJ;;RtNVud4AAAAJ", "orcid": "0000-0002-3078-9191;;;;0000-0001-9256-5815", "linkedin": ";;;;", "or_profile": "~Gen_Li2;~Wenhao_Zhan1;~Jason_D._Lee1;~Yuejie_Chi1;~Yuxin_Chen5", "aff": "The Wharton School, University of Pennsylvania;Princeton University;Princeton University;;University of Pennsylvania", "aff_domain": "wharton.upenn.edu;princeton.edu;princeton.edu;;upenn.edu", "position": "Postdoc;PhD student;Assistant Professor;;Associate Professor", "bibtex": "@inproceedings{\nli2023rewardagnostic,\ntitle={Reward-agnostic Fine-tuning: Provable Statistical Benefits of Hybrid Reinforcement Learning},\nauthor={Gen Li and Wenhao Zhan and Jason D. Lee and Yuejie Chi and Yuxin Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Nd3FennRJZ}\n}", "github": "", "project": "", "reviewers": "Lcd6;sEvV;GVvQ;Ukmm;Vg7V;R4EB", "pdf_size": 438348, "rating": "4;6;6;6;7;7", "confidence": "3;2;2;3;3;3", "soundness": "3;3;3;2;3;3", "novelty": "2;3;3;3;3;3", "presentation": "2;3;3;3;4;3", "wc_summary": "59;135;78;55;112;96", "wc_strengths": "47;56;67;34;92;87", "wc_weaknesses": "164;66;38;22;100;56", "wc_questions": "49;20;58;187;49;4", "wc_limitations": "2;1;1;5;35;8", "wc_review": "321;278;242;303;388;251", "wc_reply_reviewers": "49;9;12;11;6;12", "wc_reply_authors": "63;0;0;18;0;0", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "2;1;1;2;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 89.16666666666667, 28.492201662131265 ], "wc_strengths_avg": [ 63.833333333333336, 20.715667715255737 ], "wc_weaknesses_avg": [ 74.33333333333333, 46.83896763261215 ], "wc_questions_avg": [ 61.166666666666664, 59.30547098614839 ], "wc_limitations_avg": [ 8.666666666666666, 12.036980056845191 ], "wc_review_avg": [ 297.1666666666667, 48.98100538871052 ], "wc_reply_reviewers_avg": [ 16.5, 14.68275632615802 ], "wc_reply_authors_avg": [ 13.5, 23.092206477510977 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12677135397223204613&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 12, "email": "wharton.upenn.edu;princeton.edu;princeton.edu;;upenn.edu", "author_num": 5, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Pennsylvania;Princeton University", "aff_unique_dep": "The Wharton School;", "aff_unique_url": "https://www.wharton.upenn.edu;https://www.princeton.edu", "aff_unique_abbr": "UPenn Wharton;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "GNeSF: Generalizable Neural Semantic Fields", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71850", "id": "NemifGnD2E", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/72d32f4fe0b7af03732bd227bf1c4a5f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NemifGnD2E", "openreview": "https://openreview.net/forum?id=NemifGnD2E", "poster": "/media/PosterPDFs/NeurIPS%202023/71850.png?t=1701920000.368756", "slides": "https://nips.cc/virtual/2023/poster/71850", "video": "https://nips.cc/virtual/2023/poster/71850", "author_site": "Hanlin Chen, Hanlin Chen, Chen Li, Mengqi Guo, Zhiwen Yan, Gim Hee Lee", "tldr": "", "abstract": "3D scene segmentation based on neural implicit representation has emerged recently with the advantage of training only on 2D supervision. However, existing approaches still requires expensive per-scene optimization that prohibits generalization to novel scenes during inference. To circumvent this problem, we introduce a \\textit{generalizable} 3D segmentation framework based on implicit representation. Specifically, our framework takes in multi-view image features and semantic maps as the inputs instead of only spatial information to avoid overfitting to scene-specific geometric and semantic information. We propose a novel soft voting mechanism to aggregate the 2D semantic information from different views for each 3D point. In addition to the image features, view difference information is also encoded in our framework to predict the voting scores. Intuitively, this allows the semantic information from nearby views to contribute more compared to distant ones. Furthermore, a visibility module is also designed to detect and filter out detrimental information from occluded views. Due to the generalizability of our proposed method, we can synthesize semantic maps or conduct 3D semantic segmentation for novel scenes with solely 2D semantic supervision. Experimental results show that our approach achieves comparable performance with scene-specific approaches. More importantly, our approach can even outperform existing strong supervision-based approaches with only 2D annotations.", "keywords": "NeRF; Semantic Segmentation; 3D vision; Scene understanding; Generalizable", "primary_area": "", "supplementary_material": "/attachment/f9c9f5b7c06b592b6dc8ade4e865bdf28346d115.pdf", "author": "Hanlin Chen;Chen Li;Mengqi Guo;Zhiwen Yan;Gim Hee Lee", "authorids": "~Hanlin_Chen2;~Chen_Li13;~Mengqi_Guo1;~Zhiwen_Yan1;~Gim_Hee_Lee1", "gender": "M;F;M;M;", "homepage": "https://hlinchen.github.io/;https://chaneyddtt.github.io/;https://dreamguo.github.io/;;https://www.comp.nus.edu.sg/~leegh/", "dblp": ";164/3294-38;176/3481;;49/9455", "google_scholar": "fBpYOzAAAAAJ;6_rJ2pcAAAAJ;Qa4BlOoAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.sg/citations?user=7hNKrPsAAAAJ", "orcid": "0000-0002-3323-8213;0009-0000-6807-3490;0000-0003-1907-9196;;0000-0002-1583-0475", "linkedin": ";;mengqi-guo-956065228/;;", "or_profile": "~Hanlin_Chen2;~Chen_Li13;~Mengqi_Guo1;~Zhiwen_Yan1;~Gim_Hee_Lee1", "aff": "National University of Singapore; National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu;nus.edu.sg;nus.edu.sg;u.nus.edu;nus.edu.sg", "position": "PhD student;Postdoc;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nchen2023gnesf,\ntitle={{GN}e{SF}: Generalizable Neural Semantic Fields},\nauthor={Hanlin Chen and Chen Li and Mengqi Guo and Zhiwen Yan and Gim Hee Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NemifGnD2E}\n}", "github": "", "project": "", "reviewers": "1Moh;G1Te;kUSs;xsYz;jaaU", "pdf_size": 3758010, "rating": "4;5;5;6;7", "confidence": "5;4;4;4;4", "soundness": "3;3;3;3;3", "novelty": "3;3;2;3;2", "presentation": "2;2;3;3;3", "wc_summary": "92;74;56;71;59", "wc_strengths": "101;32;47;90;62", "wc_weaknesses": "256;65;390;143;615", "wc_questions": "74;31;41;54;57", "wc_limitations": "6;1;18;32;50", "wc_review": "529;203;552;390;843", "wc_reply_reviewers": "146;16;248;19;655", "wc_reply_authors": "492;0;453;0;514", "reply_reviewers": "2;1;2;1;2", "reply_authors": "4;1;2;1;2", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 70.4, 12.784365451597509 ], "wc_strengths_avg": [ 66.4, 25.819372571772536 ], "wc_weaknesses_avg": [ 293.8, 194.3927982205102 ], "wc_questions_avg": [ 51.4, 14.65059725744995 ], "wc_limitations_avg": [ 21.4, 17.861690849412884 ], "wc_review_avg": [ 503.4, 210.44961392219278 ], "wc_reply_reviewers_avg": [ 216.8, 235.56349462512225 ], "wc_reply_authors_avg": [ 291.8, 239.053466822801 ], "reply_reviewers_avg": [ 1.6, 0.4898979485566356 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6864064729836443, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18274102726404635611&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "email": "nus.edu;nus.edu.sg;nus.edu.sg;u.nus.edu;nus.edu.sg", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Bucks for Buckets (B4B): Active Defenses Against Stealing Encoders", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71849", "id": "NfpYgGZC3B", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ad1efab57a04d93f097e7fbb2d4fc054-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NfpYgGZC3B", "openreview": "https://openreview.net/forum?id=NfpYgGZC3B", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71849", "video": "https://nips.cc/virtual/2023/poster/71849", "author_site": "Jan Dubi\u0144ski, Stanis\u0142aw Pawlak, Franziska Boenisch, Tomasz Trzcinski, Adam Dziedzic", "tldr": "", "abstract": "Machine Learning as a Service (MLaaS) APIs provide ready-to-use and high-utility encoders that generate vector representations for given inputs. Since these encoders are very costly to train, they become lucrative targets for model stealing attacks during which an adversary leverages query access to the API to replicate the encoder locally at a fraction of the original training costs. We propose *Bucks for Buckets (B4B)*, the first *active defense* that prevents stealing while the attack is happening without degrading representation quality for legitimate API users. Our defense relies on the observation that the representations returned to adversaries who try to steal the encoder's functionality cover a significantly larger fraction of the embedding space than representations of legitimate users who utilize the encoder to solve a particular downstream task. B4B leverages this to adaptively adjust the utility of the returned representations according to a user's coverage of the embedding space. To prevent adaptive adversaries from eluding our defense by simply creating multiple user accounts (sybils), B4B also individually transforms each user's representations. This prevents the adversary from directly aggregating representations over multiple accounts to create their stolen encoder copy. Our active defense opens a new path towards securely sharing and democratizing encoders over public APIs.", "keywords": "model stealing;model defenses;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/0f7444dcce3e6546e6b42e5132306400b1250022.zip", "author": "Jan Dubi\u0144ski;Stanis\u0142aw Pawlak;Franziska Boenisch;Tomasz Trzcinski;Adam Dziedzic", "authorids": "~Jan_Dubi\u0144ski1;~Stanis\u0142aw_Pawlak1;~Franziska_Boenisch2;~Tomasz_Trzcinski2;~Adam_Dziedzic1", "gender": ";M;;M;", "homepage": ";;;https://cvlab.ii.pw.edu.pl/ttrzcins/;", "dblp": ";321/3654;;05/11408;", "google_scholar": ";;;https://scholar.google.pl/citations?user=bJMRBFoAAAAJ;", "orcid": ";0000-0001-7511-9995;;;", "linkedin": ";;;;", "or_profile": "~Jan_Dubi\u0144ski1;~Stanis\u0142aw_Pawlak1;~Franziska_Boenisch2;~Tomasz_Trzcinski2;~Adam_Dziedzic1", "aff": ";Warsaw University of Technology;;;", "aff_domain": ";pw.edu.pl;;;", "position": ";PhD student;;;", "bibtex": "@inproceedings{\ndubi{\\'n}ski2023bucks,\ntitle={Bucks for Buckets (B4B): Active Defenses Against Stealing Encoders},\nauthor={Jan Dubi{\\'n}ski and Stanis{\\l}aw Pawlak and Franziska Boenisch and Tomasz Trzcinski and Adam Dziedzic},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NfpYgGZC3B}\n}", "github": "", "project": "", "reviewers": "pRsJ;weVe;N8vt;FtSs", "pdf_size": 1024520, "rating": "5;5;5;6", "confidence": "4;3;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;4;3", "wc_summary": "76;63;44;135", "wc_strengths": "7;37;66;53", "wc_weaknesses": "19;93;298;234", "wc_questions": "2;3;23;181", "wc_limitations": "568;3;21;15", "wc_review": "672;199;452;618", "wc_reply_reviewers": "0;10;0;0", "wc_reply_authors": "437;219;0;104", "reply_reviewers": "0;1;0;0", "reply_authors": "3;3;1;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 79.5, 34.00367627183861 ], "wc_strengths_avg": [ 40.75, 22.02697210240209 ], "wc_weaknesses_avg": [ 161.0, 110.55089325735908 ], "wc_questions_avg": [ 52.25, 74.80432808334021 ], "wc_limitations_avg": [ 151.75, 240.4094164129184 ], "wc_review_avg": [ 485.25, 184.0806548771489 ], "wc_reply_reviewers_avg": [ 2.5, 4.330127018922194 ], "wc_reply_authors_avg": [ 190.0, 162.28524270555226 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5007931711664149679&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": ";pw.edu.pl;;;", "author_num": 5, "aff_unique_index": "0", "aff_unique_norm": "Warsaw University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.pw.edu.pl", "aff_unique_abbr": "WUT", "aff_country_unique_index": "0", "aff_country_unique": "Poland" }, { "title": "Improving neural network representations using human similarity judgments", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71848", "id": "Nh5dp6Uuvx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9febda1c8344cc5f2d51713964864e93-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Nh5dp6Uuvx", "openreview": "https://openreview.net/forum?id=Nh5dp6Uuvx", "poster": "/media/PosterPDFs/NeurIPS%202023/71848.png?t=1701516797.7315514", "slides": "https://nips.cc/virtual/2023/poster/71848", "video": "https://nips.cc/virtual/2023/poster/71848", "author_site": "Lukas Muttenthaler, Lorenz Linhardt, Lorenz Linhardt, Jonas Dippel, Robert Vandermeulen, Katherine Hermann, Andrew Lampinen, Simon Kornblith", "tldr": "", "abstract": "Deep neural networks have reached human-level performance on many computer vision tasks. However, the objectives used to train these networks enforce only that similar images are embedded at similar locations in the representation space, and do not directly constrain the global structure of the resulting space. Here, we explore the impact of supervising this global structure by linearly aligning it with human similarity judgments. We find that a naive approach leads to large changes in local representational structure that harm downstream performance. Thus, we propose a novel method that aligns the global structure of representations while preserving their local structure. This global-local transform considerably improves accuracy across a variety of few-shot learning and anomaly detection tasks. Our results indicate that human visual representations are globally organized in a way that facilitates learning from few examples, and incorporating this global structure into neural network representations improves performance on downstream tasks.", "keywords": "representational alignment; human similarity judgments; neural networks; representation learning; few-shot learning; anomaly detection", "primary_area": "", "supplementary_material": "/attachment/62cb487c67f0e2a2831728474f0c4f943417433e.pdf", "author": "Lukas Muttenthaler;Lorenz Linhardt;Jonas Dippel;Robert A. Vandermeulen;Katherine Hermann;Andrew Kyle Lampinen;Simon Kornblith", "authorids": "~Lukas_Muttenthaler1;~Lorenz_Linhardt2;~Jonas_Dippel1;~Robert_A._Vandermeulen2;~Katherine_Hermann1;~Andrew_Kyle_Lampinen1;~Simon_Kornblith1", "gender": "M;;M;F;M;M;M", "homepage": "https://lukasmut.github.io/;;;;https://github.com/google/BIG-bench;;https://www.user.tu-berlin.de/rvdm/", "dblp": "245/4369;210/5418;249/3158;254/1923;https://dblp.uni-trier.de/pers/hd/l/Lampinen:Andrew_K=;220/4059;137/3375", "google_scholar": "https://scholar.google.com/citations?hl=en;579iMjgAAAAJ;ZLQCgRoAAAAJ;owcAYmEAAAAJ;_N44XxAAAAAJ;1O3RPmsAAAAJ;eSjfzOUAAAAJ", "orcid": "0000-0002-0804-4687;0000-0002-5533-5524;0000-0002-0552-8977;;;;0000-0001-6863-7006", "linkedin": "lukas-muttenthaler/;;jdippel/;;;;", "or_profile": "~Lukas_Muttenthaler1;~Lorenz_Linhardt2;~Jonas_Dippel1;~Katherine_Hermann1;~Andrew_Kyle_Lampinen1;~Simon_Kornblith1;~Robert_Vandermeulen1", "aff": "TU Berlin;TU Berlin;Technische Universit\u00e4t Berlin;Google;Google DeepMind;Google;Berlin Institute for the Foundations of Learning and Data", "aff_domain": "tu-berlin.de;tu-berlin.de;tu-berlin.de;google.com;google.com;google.com;tu-berlin.de", "position": "PhD student;PhD student;PhD student;Researcher;Research Scientist;Research Scientist;Researcher", "bibtex": "@inproceedings{\nmuttenthaler2023improving,\ntitle={Improving neural network representations using human similarity judgments},\nauthor={Lukas Muttenthaler and Lorenz Linhardt and Jonas Dippel and Robert A. Vandermeulen and Katherine Hermann and Andrew Kyle Lampinen and Simon Kornblith},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Nh5dp6Uuvx}\n}", "github": "", "project": "", "reviewers": "dfyY;eWR8;BF6t;2yQv", "pdf_size": 14288834, "rating": "6;6;7;7", "confidence": "4;4;3;5", "soundness": "2;3;4;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "64;96;232;105", "wc_strengths": "54;72;31;78", "wc_weaknesses": "48;101;8;141", "wc_questions": "3;75;109;180", "wc_limitations": "6;23;12;98", "wc_review": "175;367;392;602", "wc_reply_reviewers": "7;19;22;0", "wc_reply_authors": "0;0;21;21", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 124.25, 64.04832160174067 ], "wc_strengths_avg": [ 58.75, 18.2944663764757 ], "wc_weaknesses_avg": [ 74.5, 50.61867244406949 ], "wc_questions_avg": [ 91.75, 63.72352391385775 ], "wc_limitations_avg": [ 34.75, 37.022797031018605 ], "wc_review_avg": [ 384.0, 151.2927625499647 ], "wc_reply_reviewers_avg": [ 12.0, 8.916277250063503 ], "wc_reply_authors_avg": [ 10.5, 10.5 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14119108002399360647&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "tu-berlin.de;tu-berlin.de;tu-berlin.de;google.com;google.com;google.com;tu-berlin.de", "author_num": 7, "aff_unique_index": "0;0;0;1;1;1;2", "aff_unique_norm": "Technische Universit\u00e4t Berlin;Google;Berlin Institute for the Foundations of Learning and Data", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.tu-berlin.de;https://www.google.com;https://www.bifold.berlin", "aff_unique_abbr": "TU Berlin;Google;BIFOLD", "aff_campus_unique_index": "0;0;2;2", "aff_campus_unique": "Berlin;;Mountain View", "aff_country_unique_index": "0;0;0;1;2;1;0", "aff_country_unique": "Germany;United States;United Kingdom" }, { "title": "Lexinvariant Language Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71847", "id": "NiQTy0NW1L", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4b734e95f0788a030a69caa987516186-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NiQTy0NW1L", "openreview": "https://openreview.net/forum?id=NiQTy0NW1L", "poster": "/media/PosterPDFs/NeurIPS%202023/71847.png?t=1701715591.0025246", "slides": "https://nips.cc/virtual/2023/poster/71847", "video": "https://nips.cc/virtual/2023/poster/71847", "author_site": "Qian Huang, Eric Zelikman, Sarah Chen, Yuhuai Wu, Gregory Valiant, Percy Liang", "tldr": "", "abstract": "Token embeddings, a mapping from discrete lexical symbols to continuous vectors, are at the heart of any language model (LM). However, lexical symbol meanings can also be determined and even redefined by their structural role in a long context. In this paper, we ask: is it possible for a language model to be performant without \\emph{any} fixed token embeddings? Such a language model would have to rely entirely on the co-occurence and repetition of tokens in the context rather than the \\textit{a priori} identity of any token. To answer this, we study \\textit{lexinvariant}language models that are invariant to lexical symbols and therefore do not need fixed token embeddings in practice. First, we prove that we can construct a lexinvariant LM to converge to the true language model at a uniform rate that is polynomial in terms of the context length, with a constant factor that is sublinear in the vocabulary size. Second, to build a lexinvariant LM, we simply encode tokens using random Gaussian vectors, such that each token maps to the same representation within each sequence but different representations across sequences. Empirically, we demonstrate that it can indeed attain perplexity comparable to that of a standard language model, given a sufficiently long context. We further explore two properties of the lexinvariant language models: First, given text generated from a substitution cipher of English, it implicitly implements Bayesian in-context deciphering and infers the mapping to the underlying real tokens with high accuracy. Second, it has on average 4X better accuracy over synthetic in-context reasoning tasks. Finally, we discuss regularizing standard language models towards lexinvariance and potential practical applications.", "keywords": "Large Language Model;in-context learning;pretraining", "primary_area": "", "supplementary_material": "/attachment/4b00044e1bd6b1dc0fe524d521a338377cef8b00.zip", "author": "Qian Huang;Eric Zelikman;Sarah Li Chen;Yuhuai Wu;Gregory Valiant;Percy Liang", "authorids": "~Qian_Huang2;~Eric_Zelikman1;~Sarah_Li_Chen1;~Yuhuai_Wu1;~Gregory_Valiant1;~Percy_Liang1", "gender": "F;M;;M;Unspecified;", "homepage": "https://q-hwang.github.io/;https://zelikman.me;;http://www.cs.toronto.edu/~ywu/;https://theory.stanford.edu/~valiant/;https://cs.stanford.edu/~pliang/", "dblp": "07/4378.html;217/2378;;;80/6006;04/1701", "google_scholar": "L3hkmG0AAAAJ;V5B8dSUAAAAJ;;https://scholar.google.ca/citations?user=bOQGfFIAAAAJ;https://scholar.google.com.tw/citations?user=CgItEbQAAAAJ;pouyVyUAAAAJ", "orcid": ";;;;;", "linkedin": "qian-huang-b20315149/;ericzelikman/;sarah-chen1/;;;", "or_profile": "~Qian_Huang2;~Eric_Zelikman1;~Sarah_Li_Chen1;~Yuhuai_Wu1;~Gregory_Valiant1;~Percy_Liang1", "aff": "Stanford University;Google;Stanford University;Stanford University;Computer Science Department, Stanford University;Stanford University", "aff_domain": "stanford.edu;google.com;stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu", "position": "PhD student;Research Intern;Undergrad student;Postdoc;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nhuang2023lexinvariant,\ntitle={Lexinvariant Language Models},\nauthor={Qian Huang and Eric Zelikman and Sarah Li Chen and Yuhuai Wu and Gregory Valiant and Percy Liang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NiQTy0NW1L}\n}", "github": "", "project": "", "reviewers": "Kmx7;gQ1h;1RPb;xQSK;zdSX", "pdf_size": 1552313, "rating": "3;6;7;7;8", "confidence": "4;4;4;2;4", "soundness": "1;3;3;4;3", "novelty": "2;3;3;3;4", "presentation": "2;3;3;2;3", "wc_summary": "70;120;120;80;215", "wc_strengths": "69;94;127;79;155", "wc_weaknesses": "583;63;170;64;344", "wc_questions": "25;35;2;65;79", "wc_limitations": "2;12;2;7;1", "wc_review": "749;324;421;295;794", "wc_reply_reviewers": "0;31;6;20;68", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 1.7204650534085253 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.8, 0.9797958971132712 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 121.0, 51.22499389946279 ], "wc_strengths_avg": [ 104.8, 31.864714026647093 ], "wc_weaknesses_avg": [ 244.8, 197.76491094225992 ], "wc_questions_avg": [ 41.2, 27.686819969075536 ], "wc_limitations_avg": [ 4.8, 4.166533331199932 ], "wc_review_avg": [ 516.6, 212.74454164560836 ], "wc_reply_reviewers_avg": [ 25.0, 24.066574330386118 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.23249527748763857, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=758476090315859046&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "stanford.edu;google.com;stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "Stanford;Google", "aff_campus_unique_index": "0;1;0;0;0;0", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Towards Generic Semi-Supervised Framework for Volumetric Medical Image Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71846", "id": "NibgkUin5n", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/05dc08730e32441edff52b0fa6caab5f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NibgkUin5n", "openreview": "https://openreview.net/forum?id=NibgkUin5n", "poster": "/media/PosterPDFs/NeurIPS%202023/71846.png?t=1698237945.858637", "slides": "https://nips.cc/virtual/2023/poster/71846", "video": "https://nips.cc/virtual/2023/poster/71846", "author_site": "Haonan Wang, Xiaomeng Li", "tldr": "", "abstract": "Volume-wise labeling in 3D medical images is a time-consuming task that requires expertise. As a result, there is growing interest in using semi-supervised learning (SSL) techniques to train models with limited labeled data. However, the challenges and practical applications extend beyond SSL to settings such as unsupervised domain adaptation (UDA) and semi-supervised domain generalization (SemiDG). This work aims to develop a generic SSL framework that can handle all three settings. We identify two main obstacles to achieving this goal in the existing SSL framework: 1) the weakness of capturing distribution-invariant features; and 2) the tendency for unlabeled data to be overwhelmed by labeled data, leading to over-fitting to the labeled data during training. To address these issues, we propose an Aggregating & Decoupling framework. The aggregating part consists of a Diffusion encoder that constructs a \"common knowledge set\" by extracting distribution-invariant features from aggregated information from multiple distributions/domains. The decoupling part consists of three decoders that decouple the training process with labeled and unlabeled data, thus avoiding over-fitting to labeled data, specific domains and classes. We evaluate our proposed framework on four benchmark datasets for SSL, Class-imbalanced SSL, UDA and SemiDG. The results showcase notable improvements compared to state-of-the-art methods across all four settings, indicating the potential of our framework to tackle more challenging SSL scenarios. Code and models are available at: https://github.com/xmed-lab/GenericSSL.", "keywords": "Volumetric Medical Image Segmentation;Semi-supervised Learning;Unsupervised Domain Adaptation;Semi-supervised Domain Generalization", "primary_area": "", "supplementary_material": "/attachment/3c134cb7b9e15bb3ec0ad91c45b917f37553e7b8.pdf", "author": "Haonan Wang;Xiaomeng Li", "authorids": "~Haonan_Wang5;~Xiaomeng_Li1", "gender": "M;F", "homepage": "https://mcgregorwwww.github.io/;https://xmengli.github.io/", "dblp": ";02/9850-1", "google_scholar": "KDNRnW0AAAAJ;uVTzPpoAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Haonan_Wang5;~Xiaomeng_Li1", "aff": "University of Hong Kong;Hong Kong University of Science and Technology", "aff_domain": "hku.hk;ust.hk", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nwang2023towards,\ntitle={Towards Generic Semi-Supervised Framework for Volumetric Medical Image Segmentation},\nauthor={Haonan Wang and Xiaomeng Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NibgkUin5n}\n}", "github": "", "project": "", "reviewers": "HPgc;GWTw;yd6u;suEd;GWMf", "pdf_size": 1941573, "rating": "4;4;5;5;7", "confidence": "5;5;3;4;4", "soundness": "3;3;3;3;3", "novelty": "3;3;3;2;3", "presentation": "2;3;3;3;3", "wc_summary": "41;68;27;138;143", "wc_strengths": "40;41;20;150;31", "wc_weaknesses": "164;130;8;150;163", "wc_questions": "4;2;69;462;1", "wc_limitations": "4;1;10;20;3", "wc_review": "253;242;134;920;341", "wc_reply_reviewers": "441;44;0;61;45", "wc_reply_authors": "1176;353;0;289;0", "reply_reviewers": "3;1;0;1;1", "reply_authors": "3;2;1;2;1", "rating_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 83.4, 48.47514827207855 ], "wc_strengths_avg": [ 56.4, 47.407172453121476 ], "wc_weaknesses_avg": [ 123.0, 58.794557571258245 ], "wc_questions_avg": [ 107.6, 179.0738395187862 ], "wc_limitations_avg": [ 7.6, 6.887670143089026 ], "wc_review_avg": [ 378.0, 278.85121480818407 ], "wc_reply_reviewers_avg": [ 118.2, 162.66948084997384 ], "wc_reply_authors_avg": [ 363.6, 431.29600044516997 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.48795003647426666, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7461062866729094565&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "hku.hk;ust.hk", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Hong Kong;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;https://www.ust.hk", "aff_unique_abbr": "HKU;HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Policy Optimization in a Noisy Neighborhood: On Return Landscapes in Continuous Control", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71845", "id": "Nn0daSf6CW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6191ab7080c840f67eaf5dff7d5edfcb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Nn0daSf6CW", "openreview": "https://openreview.net/forum?id=Nn0daSf6CW", "poster": "/media/PosterPDFs/NeurIPS%202023/71845.png?t=1701470784.8509417", "slides": "https://nips.cc/virtual/2023/poster/71845", "video": "https://nips.cc/virtual/2023/poster/71845", "author_site": "Nate Rahn, Pierluca D'Oro, Harley Wiltzer, Pierre-Luc Bacon, Marc Bellemare", "tldr": "", "abstract": "Deep reinforcement learning agents for continuous control are known to exhibit significant instability in their performance over time. In this work, we provide a fresh perspective on these behaviors by studying the return landscape: the mapping between a policy and a return. We find that popular algorithms traverse noisy neighborhoods of this landscape, in which a single update to the policy parameters leads to a wide range of returns. By taking a distributional view of these returns, we map the landscape, characterizing failure-prone regions of policy space and revealing a hidden dimension of policy quality. We show that the landscape exhibits surprising structure by finding simple paths in parameter space which improve the stability of a policy. To conclude, we develop a distribution-aware procedure which finds such paths, navigating away from noisy neighborhoods in order to improve the robustness of a policy. Taken together, our results provide new insight into the optimization, evaluation, and design of agents.", "keywords": "deep reinforcement learning;continuous control;return landscape;stability", "primary_area": "", "supplementary_material": "/attachment/aa9a0270665e95ed618faaeb29956e74cd4cd777.pdf", "author": "Nathan Rahn;Pierluca D'Oro;Harley Wiltzer;Pierre-Luc Bacon;Marc G Bellemare", "authorids": "~Nathan_Rahn1;~Pierluca_D'Oro1;~Harley_Wiltzer1;~Pierre-Luc_Bacon1;~Marc_G_Bellemare1", "gender": ";M;M;;M", "homepage": ";https://proceduralia.github.io;https://harwiltz.github.io/about;;http://www.marcgbellemare.info", "dblp": ";248/8326;321/0992;;38/4525", "google_scholar": ";https://scholar.google.it/citations?user=AuVp7pkAAAAJ;;;https://scholar.google.co.uk/citations?user=uyYPun0AAAAJ", "orcid": ";;;;", "linkedin": ";;harley-wiltzer-4998547a;;", "or_profile": "~Nathan_Rahn1;~Pierluca_D'Oro1;~Harley_Wiltzer1;~Pierre-Luc_Bacon1;~Marc_G_Bellemare1", "aff": ";Universit\u00e9 de Montr\u00e9al;Mila;;Google", "aff_domain": ";umontreal.ca;mila.quebec;;google.com", "position": ";PhD student;PhD student;;Research Scientist", "bibtex": "@inproceedings{\nrahn2023policy,\ntitle={Policy Optimization in a Noisy Neighborhood: On Return Landscapes in Continuous Control},\nauthor={Nathan Rahn and Pierluca D'Oro and Harley Wiltzer and Pierre-Luc Bacon and Marc G Bellemare},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Nn0daSf6CW}\n}", "github": "", "project": "", "reviewers": "ErLG;QuAp;Lizq;Voa2;ZEGo", "pdf_size": 8752478, "rating": "4;5;6;7;7", "confidence": "4;3;3;3;4", "soundness": "2;3;2;2;3", "novelty": "2;2;2;3;4", "presentation": "1;3;3;4;3", "wc_summary": "81;121;87;114;34", "wc_strengths": "226;113;68;109;41", "wc_weaknesses": "805;100;246;202;130", "wc_questions": "23;148;121;158;2", "wc_limitations": "247;6;23;1;2", "wc_review": "1382;488;545;584;209", "wc_reply_reviewers": "1423;68;142;42;37", "wc_reply_authors": "928;0;533;0;0", "reply_reviewers": "2;1;2;1;1", "reply_authors": "3;1;2;1;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 87.4, 30.754511864115155 ], "wc_strengths_avg": [ 111.4, 63.215820804605556 ], "wc_weaknesses_avg": [ 296.6, 259.37895057232384 ], "wc_questions_avg": [ 90.4, 65.08640411022873 ], "wc_limitations_avg": [ 55.8, 95.92788958379101 ], "wc_review_avg": [ 641.6, 392.8300395845511 ], "wc_reply_reviewers_avg": [ 342.4, 541.6015509578973 ], "wc_reply_authors_avg": [ 292.2, 379.0432165334185 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.21004201260420152, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9464427197098666341&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";umontreal.ca;mila.quebec;;google.com", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;Mila;Google", "aff_unique_dep": ";Quebec Artificial Intelligence Institute;Google", "aff_unique_url": "https://www.umontreal.ca;https://mila.quebec;https://www.google.com", "aff_unique_abbr": "UdeM;Mila;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Canada;United States" }, { "title": "Mix-of-Show: Decentralized Low-Rank Adaptation for Multi-Concept Customization of Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71844", "id": "NnIaEaBfXD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3340ee1e4a8bad8d32c35721712b4d0a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NnIaEaBfXD", "openreview": "https://openreview.net/forum?id=NnIaEaBfXD", "poster": "/media/PosterPDFs/NeurIPS%202023/71844.png?t=1699513573.6154013", "slides": "https://nips.cc/virtual/2023/poster/71844", "video": "https://nips.cc/virtual/2023/poster/71844", "author_site": "Yuchao Gu, Xintao Wang, Jay Zhangjie Wu, Yujun Shi, Yunpeng Chen, Zihan Fan, WUYOU XIAO, Rui Zhao, Shuning Chang, Weijia Wu, Yixiao Ge, Ying Shan, Mike Zheng Shou", "tldr": "", "abstract": "Public large-scale text-to-image diffusion models, such as Stable Diffusion, have gained significant attention from the community. These models can be easily customized for new concepts using low-rank adaptations (LoRAs). However, the utilization of multiple-concept LoRAs to jointly support multiple customized concepts presents a challenge. We refer to this scenario as decentralized multi-concept customization, which involves single-client concept tuning and center-node concept fusion. In this paper, we propose a new framework called Mix-of-Show that addresses the challenges of decentralized multi-concept customization, including concept conflicts resulting from existing single-client LoRA tuning and identity loss during model fusion. Mix-of-Show adopts an embedding-decomposed LoRA (ED-LoRA) for single-client tuning and gradient fusion for the center node to preserve the in-domain essence of single concepts and support theoretically limitless concept fusion. Additionally, we introduce regionally controllable sampling, which extends spatially controllable sampling (e.g., ControlNet and T2I-Adapter) to address attribute binding and missing object problems in multi-concept sampling. Extensive experiments demonstrate that Mix-of-Show is capable of composing multiple customized concepts with high fidelity, including characters, objects, and scenes.", "keywords": "Text-to-Image Diffusion Models;Concept Customization", "primary_area": "", "supplementary_material": "/attachment/327eded97d188f17e797604b9b12bd4f6eefaa9f.pdf", "author": "Yuchao Gu;Xintao Wang;Jay Zhangjie Wu;Yujun Shi;Yunpeng Chen;Zihan Fan;WUYOU XIAO;Rui Zhao;Shuning Chang;Weijia Wu;Yixiao Ge;Ying Shan;Mike Zheng Shou", "authorids": "~Yuchao_Gu1;~Xintao_Wang1;~Jay_Zhangjie_Wu1;~Yujun_Shi1;~Yunpeng_Chen1;~Zihan_Fan1;~WUYOU_XIAO1;~Rui_Zhao12;~Shuning_Chang1;~Weijia_Wu2;~Yixiao_Ge2;~Ying_Shan2;~Mike_Zheng_Shou1", "gender": "M;;M;M;;F;;M;M;M;F;M;", "homepage": "https://ycgu.site/;;https://zhangjiewu.github.io/;https://yujun-shi.github.io/;;;;;https://www.ece.nus.edu.sg/lv/people_student.html;https://weijiawu.github.io/;https://geyixiao.com/;;", "dblp": "266/4395;;322/0749;146/4499;;;;26/2578-19;;87/7695-1;228/6649;68/5910;", "google_scholar": "YpfrXyQAAAAJ;;WVp4yjoAAAAJ;Okeolr8AAAAJ;;;;https://scholar.google.com.hk/citations?user=wYs7vogAAAAJ;;NgjTRe4AAAAJ;TtU74NAAAAAJ;4oXBp9UAAAAJ;", "orcid": ";;;;;;;0000-0003-4271-0206;;0000-0003-3912-7212;;0000-0001-7673-8325;", "linkedin": ";;;;;fan-fano-zihan/;%E6%97%A0%E5%BF%A7-%E8%82%96-47720825a;;;%E5%A8%81%E4%BD%B3-%E5%90%B4-07a852280/;;YingShanProfile/;", "or_profile": "~Yuchao_Gu1;~Xintao_Wang1;~Jay_Zhangjie_Wu1;~Yujun_Shi1;~Yunpeng_Chen1;~Zihan_Fan1;~WUYOU_XIAO1;~Rui_Zhao12;~Shuning_Chang1;~Weijia_Wu2;~Yixiao_Ge2;~Ying_Shan2;~Mike_Zheng_Shou1", "aff": "National University of Singapore;;National University of Singapore;National University of Singapore;;National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore;Zhejiang University;Tencent;Tencent PCG ARC Lab;", "aff_domain": "u.nus.edu;;u.nus.edu;u.nus.edu;;nus.edu;nus.edu;u.nus.edu;u.nus.edu;zju.edu.cn;tencent.com;arc.tencent.com;", "position": "PhD student;;PhD student;PhD student;;MS student;MS student;PhD student;PhD student;PhD student;Researcher;Director;", "bibtex": "@inproceedings{\ngu2023mixofshow,\ntitle={Mix-of-Show: Decentralized Low-Rank Adaptation for Multi-Concept Customization of Diffusion Models},\nauthor={Yuchao Gu and Xintao Wang and Jay Zhangjie Wu and Yujun Shi and Yunpeng Chen and Zihan Fan and WUYOU XIAO and Rui Zhao and Shuning Chang and Weijia Wu and Yixiao Ge and Ying Shan and Mike Zheng Shou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NnIaEaBfXD}\n}", "github": "", "project": "", "reviewers": "Khyz;B3ui;NQJc;TCkG;acdR", "pdf_size": 36298753, "rating": "4;5;6;6;6", "confidence": "4;4;4;2;3", "soundness": "1;3;3;3;3", "novelty": "3;2;3;3;2", "presentation": "1;2;3;3;3", "wc_summary": "62;65;57;88;79", "wc_strengths": "38;69;50;67;80", "wc_weaknesses": "92;354;86;49;28", "wc_questions": "9;141;17;6;1", "wc_limitations": "11;1;8;20;6", "wc_review": "212;630;218;230;194", "wc_reply_reviewers": "197;50;88;33;0", "wc_reply_authors": "251;672;474;0;499", "reply_reviewers": "1;2;2;1;0", "reply_authors": "3;4;3;1;2", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.6, 0.8000000000000002 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 70.2, 11.513470371699404 ], "wc_strengths_avg": [ 60.8, 14.905032707109369 ], "wc_weaknesses_avg": [ 121.8, 118.47767722233584 ], "wc_questions_avg": [ 34.8, 53.353162980277006 ], "wc_limitations_avg": [ 9.2, 6.305553108173779 ], "wc_review_avg": [ 296.8, 167.00467059336995 ], "wc_reply_reviewers_avg": [ 73.6, 67.90758425978647 ], "wc_reply_authors_avg": [ 379.2, 232.07705616885093 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.6, 1.019803902718557 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.5625, "gs_citation": 178, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=489962155836734973&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "u.nus.edu;;u.nus.edu;u.nus.edu;;nus.edu;nus.edu;u.nus.edu;u.nus.edu;zju.edu.cn;tencent.com;arc.tencent.com;", "author_num": 13, "aff_unique_index": "0;0;0;0;0;0;0;1;2;2", "aff_unique_norm": "National University of Singapore;Zhejiang University;Tencent", "aff_unique_dep": ";;Tencent Holdings Limited", "aff_unique_url": "https://www.nus.edu.sg;https://www.zju.edu.cn;https://www.tencent.com", "aff_unique_abbr": "NUS;ZJU;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;1;1;1", "aff_country_unique": "Singapore;China" }, { "title": "Understanding Diffusion Objectives as the ELBO with Simple Data Augmentation", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71843", "id": "NnMEadcdyD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ce79fbf9baef726645bc2337abb0ade2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NnMEadcdyD", "openreview": "https://openreview.net/forum?id=NnMEadcdyD", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71843", "video": "https://nips.cc/virtual/2023/poster/71843", "author_site": "Diederik Kingma, Ruiqi Gao", "tldr": "", "abstract": "To achieve the highest perceptual quality, state-of-the-art diffusion models are optimized with objectives that typically look very different from the maximum likelihood and the Evidence Lower Bound (ELBO) objectives. In this work, we reveal that diffusion model objectives are actually closely related to the ELBO.\n\nSpecifically, we show that all commonly used diffusion model objectives equate to a weighted integral of ELBOs over different noise levels, where the weighting depends on the specific objective used. Under the condition of monotonic weighting, the connection is even closer: the diffusion objective then equals the ELBO, combined with simple data augmentation, namely Gaussian noise perturbation. We show that this condition holds for a number of state-of-the-art diffusion models. \n\nIn experiments, we explore new monotonic weightings and demonstrate their effectiveness, achieving state-of-the-art FID scores on the high-resolution ImageNet benchmark.", "keywords": "Diffusion Model;Evidence Lower Bound;Maximum Likelihood", "primary_area": "", "supplementary_material": "", "author": "Diederik P Kingma;Ruiqi Gao", "authorids": "~Diederik_P_Kingma1;~Ruiqi_Gao1", "gender": "M;F", "homepage": "http://www.dpkingma.com;http://www.stat.ucla.edu/~ruiqigao/", "dblp": "http://dblp.uni-trier.de/pers/hd/k/Kingma:Diederik_P=;206/7084", "google_scholar": "https://scholar.google.nl/citations?user=yyIoQu4AAAAJ;VdlgOXoAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Diederik_P_Kingma1;~Ruiqi_Gao1", "aff": "Google;Google", "aff_domain": "google.com;google.com", "position": "Research Scientist;Researcher", "bibtex": "@inproceedings{\nkingma2023understanding,\ntitle={Understanding Diffusion Objectives as the {ELBO} with Simple Data Augmentation},\nauthor={Diederik P Kingma and Ruiqi Gao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NnMEadcdyD}\n}", "github": "", "project": "", "reviewers": "Qo9S;kL4L;bjDS;eoU3;DESf", "pdf_size": 4259371, "rating": "6;6;8;8;9", "confidence": "4;2;3;3;4", "soundness": "3;3;3;4;4", "novelty": "3;3;4;4;4", "presentation": "2;3;4;4;4", "wc_summary": "98;67;262;71;134", "wc_strengths": "63;33;171;55;87", "wc_weaknesses": "454;16;684;1;97", "wc_questions": "136;1;201;1;99", "wc_limitations": "16;1;25;11;1", "wc_review": "767;118;1343;139;418", "wc_reply_reviewers": "216;0;39;0;0", "wc_reply_authors": "126;72;72;72;72", "reply_reviewers": "1;0;1;0;0", "reply_authors": "3;2;2;2;2", "rating_avg": [ 7.4, 1.2 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 126.4, 71.90159942588203 ], "wc_strengths_avg": [ 81.8, 47.8263525684324 ], "wc_weaknesses_avg": [ 250.4, 272.0798412231233 ], "wc_questions_avg": [ 87.6, 77.88607064167508 ], "wc_limitations_avg": [ 10.8, 9.173875952943773 ], "wc_review_avg": [ 557.0, 457.96550088407315 ], "wc_reply_reviewers_avg": [ 51.0, 83.87133002403145 ], "wc_reply_authors_avg": [ 82.8, 21.599999999999998 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3563483225498991, "gs_citation": 128, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9793755809795447904&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com;google.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Human spatiotemporal pattern learning as probabilistic program synthesis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71842", "id": "NnXznLurw5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aa5c083f9d387c49514eb5c4dc2dc16b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NnXznLurw5", "openreview": "https://openreview.net/forum?id=NnXznLurw5", "poster": "/media/PosterPDFs/NeurIPS%202023/71842.png?t=1702393261.4659593", "slides": "https://nips.cc/virtual/2023/poster/71842", "video": "https://nips.cc/virtual/2023/poster/71842", "author_site": "Tracey Mills, Josh Tenenbaum, Samuel Cheyette", "tldr": "", "abstract": "People are adept at learning a wide variety of structured patterns from small amounts of data, presenting a conundrum from the standpoint of the bias-variance tradeoff: what kinds of representations and algorithms support the joint flexibility and data-paucity of human learning? One possibility is that people \"learn by programming\": inducing probabilistic models to fit observed data. Here, we experimentally test human learning in the domain of structured 2-dimensional patterns, using a task in which participants repeatedly predicted where a dot would move based on its previous trajectory. We evaluate human performance against standard parametric and non-parametric time-series models, as well as two Bayesian program synthesis models whose hypotheses vary in their degree of structure: a compositional Gaussian Process model and a structured \"Language of Thought\" (LoT) model. We find that signatures of human pattern learning are best explained by the LoT model, supporting the idea that the flexibility and data-efficiency of human structure learning can be understood as probabilistic inference over an expressive space of programs.", "keywords": "pattern learning; probabilistic programs; program synthesis; gaussian process; human learning", "primary_area": "", "supplementary_material": "/attachment/ce57eb5e237ef12a98aa8c1bb515dff354e257f0.pdf", "author": "Tracey Mills;Joshua B. Tenenbaum;Samuel J Cheyette", "authorids": "~Tracey_Mills1;~Joshua_B._Tenenbaum1;~Samuel_J_Cheyette1", "gender": "F;;Not Specified", "homepage": ";;http://colala.berkeley.edu/people/SamCheyette/", "dblp": ";t/JoshuaBTenenbaum;", "google_scholar": ";;", "orcid": ";;", "linkedin": "tracey-mills-7229441a7/;;", "or_profile": "~Tracey_Mills1;~Joshua_B._Tenenbaum1;~Samuel_J_Cheyette1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu", "position": "Researcher;Professor;Postdoc", "bibtex": "@inproceedings{\nmills2023human,\ntitle={Human spatiotemporal pattern learning as probabilistic program synthesis},\nauthor={Tracey Mills and Joshua B. Tenenbaum and Samuel J Cheyette},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NnXznLurw5}\n}", "github": "", "project": "", "reviewers": "zTCY;y3JJ;XJBu;igwV;iZWm", "pdf_size": 2902400, "rating": "5;6;6;7;7", "confidence": "2;4;4;4;3", "soundness": "3;3;3;3;4", "novelty": "2;2;3;3;3", "presentation": "2;2;4;4;3", "wc_summary": "98;132;68;70;192", "wc_strengths": "63;44;36;45;169", "wc_weaknesses": "419;435;13;35;400", "wc_questions": "186;37;51;54;426", "wc_limitations": "135;89;8;81;28", "wc_review": "901;737;176;285;1215", "wc_reply_reviewers": "313;0;17;35;80", "wc_reply_authors": "338;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 112.0, 46.25148646259923 ], "wc_strengths_avg": [ 71.4, 49.59274140436279 ], "wc_weaknesses_avg": [ 260.4, 193.46276127461843 ], "wc_questions_avg": [ 150.8, 147.82070220371705 ], "wc_limitations_avg": [ 68.2, 45.38457887873369 ], "wc_review_avg": [ 662.8, 386.49108657251077 ], "wc_reply_reviewers_avg": [ 89.0, 115.13296660817873 ], "wc_reply_authors_avg": [ 67.6, 135.2 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5345224838248488, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17848300995603648758&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "mit.edu;mit.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "IPMix: Label-Preserving Data Augmentation Method for Training Robust Classifiers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71841", "id": "No52399wXA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c917d8b9e01427f3184d80ade22f4d1f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=No52399wXA", "openreview": "https://openreview.net/forum?id=No52399wXA", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71841", "video": "https://nips.cc/virtual/2023/poster/71841", "author_site": "Zhenglin Huang, Xiaoan Bao, Na Zhang, Qingqi Zhang, Xiao Tu, Biao Wu, Xi Yang", "tldr": "", "abstract": "Data augmentation has been proven effective for training high-accuracy convolutional neural network classifiers by preventing overfitting. However, building deep neural networks in real-world scenarios requires not only high accuracy on clean data but also robustness when data distributions shift. While prior methods have proposed that there is a trade-off between accuracy and robustness, we propose IPMix, a simple data augmentation approach to improve robustness without hurting clean accuracy. IPMix integrates three levels of data augmentation (image-level, patch-level, and pixel-level) into a coherent and label-preserving technique to increase the diversity of training data with limited computational overhead. To further improve the robustness, IPMix introduces structural complexity at different levels to generate more diverse images and adopts the random mixing method for multi-scale information fusion. Experiments demonstrate that IPMix outperforms state-of-the-art corruption robustness on CIFAR-C and ImageNet-C. In addition, we show that IPMix also significantly improves the other safety measures, including robustness to adversarial perturbations, calibration, prediction consistency, and anomaly detection, achieving state-of-the-art or comparable results on several benchmarks, including ImageNet-R, ImageNet-A, and ImageNet-O.", "keywords": "data augmentation;robustness;safety", "primary_area": "", "supplementary_material": "/attachment/08619af5bc86f67fb4c7ad6e41399b07f7618abd.pdf", "author": "Zhenglin Huang;Xiaoan Bao;Na Zhang;Qingqi Zhang;Xiao mei Tu;Biao Wu;Xi Yang", "authorids": "~Zhenglin_Huang2;~Xiaoan_Bao1;~Na_Zhang4;~Qingqi_Zhang1;~Xiao_mei_Tu1;~Biao_Wu2;~Xi_Yang13", "gender": ";M;;;F;M;M", "homepage": ";https://yjsxt.zstu.edu.cn/open/dsfc/search/F56F7CC7AB543059E0530100007F4C41;;;;https://math.sci.zstu.edu.cn/;", "dblp": ";;;;;;", "google_scholar": ";;;;;;ztJiAk8AAAAJ", "orcid": ";0000-0001-8305-0369;0000-0001-5131-6417;;0000-0002-6841-2509;;", "linkedin": ";;;;;;", "or_profile": "~Zhenglin_Huang2;~Xiaoan_Bao1;~Na_Zhang4;~Qingqi_Zhang1;~Xiao_mei_Tu1;~Biao_Wu2;~Xi_Yang13", "aff": ";Zhejiang Sci-Tech University;Zhejiang Sci-Tech University;;Zhejiang Guangsha Vocational and Technical University of construction;Zhejiang Sci-Tech University;University of Science and Technology of China", "aff_domain": ";zstu.edu.cn;zstu.edu.cn;;zjgsdx.edu.cn;zstu.edu.cn;ustc.edu.cn", "position": ";Full Professor;Associate Professor;;Lecturer;Lecturer;PhD student", "bibtex": "@inproceedings{\nhuang2023ipmix,\ntitle={{IPM}ix: Label-Preserving Data Augmentation Method for Training Robust Classifiers},\nauthor={Zhenglin Huang and Xiaoan Bao and Na Zhang and Qingqi Zhang and Xiao mei Tu and Biao Wu and Xi Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=No52399wXA}\n}", "github": "", "project": "", "reviewers": "kzv7;jACH;Bz96;FKxz;jXDs", "pdf_size": 1373109, "rating": "4;5;6;6;7", "confidence": "4;5;3;4;4", "soundness": "3;2;2;2;3", "novelty": "2;2;2;2;3", "presentation": "3;3;2;3;3", "wc_summary": "64;66;63;54;208", "wc_strengths": "58;52;50;57;58", "wc_weaknesses": "239;291;418;144;165", "wc_questions": "65;3;30;162;344", "wc_limitations": "1;13;11;10;1", "wc_review": "427;425;572;427;776", "wc_reply_reviewers": "115;64;91;55;55", "wc_reply_authors": "912;54;987;58;52", "reply_reviewers": "1;1;2;1;1", "reply_authors": "4;2;4;2;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 91.0, 58.64469285451157 ], "wc_strengths_avg": [ 55.0, 3.3466401061363023 ], "wc_weaknesses_avg": [ 251.4, 98.46542540404728 ], "wc_questions_avg": [ 120.8, 123.89576263940586 ], "wc_limitations_avg": [ 7.2, 5.1536394906900505 ], "wc_review_avg": [ 525.4, 137.417029512357 ], "wc_reply_reviewers_avg": [ 76.0, 23.5457002444183 ], "wc_reply_authors_avg": [ 412.6, 439.02236845062913 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.8, 0.9797958971132712 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.31008683647302115, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4050152173130277303&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 9, "email": ";zstu.edu.cn;zstu.edu.cn;;zjgsdx.edu.cn;zstu.edu.cn;ustc.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Zhejiang Sci-Tech University;Zhejiang Guangsha Vocational and Technical University;University of Science and Technology of China", "aff_unique_dep": ";Department of Construction;", "aff_unique_url": "https://www.zstu.edu.cn;;http://www.ustc.edu.cn", "aff_unique_abbr": "ZSTU;;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Benchmarking Encoder-Decoder Architectures for Biplanar X-ray to 3D Bone Shape Reconstruction", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73612", "id": "NoE8g3LRAM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/412732f172bdd5ad0efde2fafa110700-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=NoE8g3LRAM", "openreview": "https://openreview.net/forum?id=NoE8g3LRAM", "poster": "/media/PosterPDFs/NeurIPS%202023/73612.png?t=1701598864.7162852", "slides": "https://nips.cc/virtual/2023/poster/73612", "video": "https://nips.cc/virtual/2023/poster/73612", "author_site": "Mahesh Shakya, Bishesh Khanal", "tldr": "", "abstract": "Various deep learning models have been proposed for 3D bone shape reconstruction from two orthogonal (biplanar) X-ray images.\nHowever, it is unclear how these models compare against each other since they are evaluated on different anatomy, cohort and (often privately held) datasets.\nMoreover, the impact of the commonly optimized image-based segmentation metrics such as dice score on the estimation of clinical parameters relevant in 2D-3D bone shape reconstruction is not well known.\nTo move closer toward clinical translation, we propose a benchmarking framework that evaluates tasks relevant to real-world clinical scenarios, including reconstruction of fractured bones, bones with implants, robustness to population shift, and error in estimating clinical parameters.\nOur open-source platform provides reference implementations of 8 models (many of whose implementations were not publicly available), APIs to easily collect and preprocess 6 public datasets, and the implementation of automatic clinical parameter and landmark extraction methods. \nWe present an extensive evaluation of 8 2D-3D models on equal footing using 6 public datasets comprising images for four different anatomies.\nOur results show that attention-based methods that capture global spatial relationships tend to perform better across all anatomies and datasets; performance on clinically relevant subgroups may be overestimated without disaggregated reporting; ribs are substantially more difficult to reconstruct compared to femur, hip and spine; and the dice score improvement does not always bring corresponding improvement in the automatic estimation of clinically relevant parameters.", "keywords": "Benchmark;Reproduciblity;2D-3D Reconstruction;X-ray Imaging;Encoder-Decoder Architecture;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/1b09c4bfa64b7f8b7d45624d362943590f60fb24.pdf", "author": "Mahesh Shakya;Bishesh Khanal", "authorids": "~Mahesh_Shakya1;~Bishesh_Khanal4", "gender": ";M", "homepage": "https://www.naamii.org.np/teams/mahesh-shakya/;https://bishesh.github.io/", "dblp": "358/2809;18/10556", "google_scholar": "dzcUdHQAAAAJ;https://scholar.google.co.uk/citations?user=ZfaUCG5h3xsC", "orcid": ";0000-0002-2775-4748", "linkedin": ";", "or_profile": "~Mahesh_Shakya1;~Bishesh_Khanal4", "aff": "NAAMII,Nepal;NepAl Applied Mathematics and Informatics Institute for Research (NAAMII)", "aff_domain": "naamii.org.np;naamii.org.np", "position": "Researcher;Research scientist", "bibtex": "@inproceedings{\nshakya2023benchmarking,\ntitle={Benchmarking Encoder-Decoder Architectures for Biplanar X-ray to 3D Bone Shape Reconstruction},\nauthor={Mahesh Shakya and Bishesh Khanal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=NoE8g3LRAM}\n}", "github": "", "project": "", "reviewers": "HcgH;EdSm;KPqo;x1j9;bLVp", "pdf_size": 2656382, "rating": "6;6;6;6;8", "confidence": "3;5;3;3;4", "wc_summary_and_contributions": "56;42;72;78;75", "wc_strengths": "74;56;95;97;40", "wc_improvement": "116;222;53;51;223", "wc_limitations": "91;167;10;48;6", "wc_correctness": "39;10;22;26;1", "wc_clarity": "6;20;9;40;35", "wc_relation_to_prior_work": "12;64;17;26;1", "wc_documentation": "23;28;11;21;27", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "418;610;290;388;409", "wc_reply_reviewers": "0;83;0;0;27", "wc_reply_authors": "554;1971;494;273;385", "reply_reviewers": "0;1;0;0;1", "reply_authors": "1;5;1;2;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.8 ], "wc_summary_and_contributions_avg": [ 64.6, 13.61763562443936 ], "wc_strengths_avg": [ 72.4, 22.07804339156892 ], "wc_improvement_avg": [ 133.0, 76.72548468403441 ], "wc_limitations_avg": [ 64.4, 59.788293168479065 ], "wc_correctness_avg": [ 19.6, 13.124023773218335 ], "wc_clarity_avg": [ 22.0, 13.579396157414365 ], "wc_relation_to_prior_work_avg": [ 24.0, 21.568495543268657 ], "wc_documentation_avg": [ 22.0, 6.06630035524124 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 423.0, 104.02307436333537 ], "wc_reply_reviewers_avg": [ 22.0, 32.24282866002919 ], "wc_reply_authors_avg": [ 735.4, 625.2169543446498 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 1.5491933384829668 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.24999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4131734010284899180&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "naamii.org.np;naamii.org.np", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "NAAMII;Nepal Applied Mathematics and Informatics Institute for Research", "aff_unique_dep": ";Applied Mathematics and Informatics", "aff_unique_url": ";", "aff_unique_abbr": ";NAAMII", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Nepal" }, { "title": "Distributionally Robust Skeleton Learning of Discrete Bayesian Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71840", "id": "NpyZkaEEun", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c80addda8bcd95339921cba7581ac7bd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NpyZkaEEun", "openreview": "https://openreview.net/forum?id=NpyZkaEEun", "poster": "/media/PosterPDFs/NeurIPS%202023/71840.png?t=1700367702.038013", "slides": "https://nips.cc/virtual/2023/poster/71840", "video": "https://nips.cc/virtual/2023/poster/71840", "author_site": "Yeshu Li, Brian Ziebart", "tldr": "", "abstract": "We consider the problem of learning the exact skeleton of general discrete Bayesian networks from potentially corrupted data. Building on distributionally robust optimization and a regression approach, we propose to optimize the most adverse risk over a family of distributions within bounded Wasserstein distance or KL divergence to the empirical distribution. The worst-case risk accounts for the effect of outliers. The proposed approach applies for general categorical random variables without assuming faithfulness, an ordinal relationship or a specific form of conditional distribution. We present efficient algorithms and show the proposed methods are closely related to the standard regularized regression approach. Under mild assumptions, we derive non-asymptotic guarantees for successful structure learning with logarithmic sample complexities for bounded-degree graphs. Numerical study on synthetic and real datasets validates the effectiveness of our method.", "keywords": "structure learning;Bayesian network;robustness", "primary_area": "", "supplementary_material": "/attachment/f8e8a83718f9a2fada6425cf54d47095e74170f0.pdf", "author": "Yeshu Li;Brian D Ziebart", "authorids": "~Yeshu_Li1;~Brian_D_Ziebart1", "gender": "M;M", "homepage": ";https://www.cs.uic.edu/Ziebart/", "dblp": "204/1493;39/10481", "google_scholar": "PYXmSwkAAAAJ;https://scholar.google.com.tw/citations?user=_JjIgGcAAAAJ", "orcid": "0000-0001-5075-1062;", "linkedin": "yeshu-li-a49a98111/;", "or_profile": "~Yeshu_Li1;~Brian_D_Ziebart1", "aff": "University of Illinois, Chicago;University of Illinois, Chicago", "aff_domain": "uic.edu;uic.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nli2023distributionally,\ntitle={Distributionally Robust Skeleton Learning of Discrete Bayesian Networks},\nauthor={Yeshu Li and Brian D Ziebart},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NpyZkaEEun}\n}", "github": "", "project": "", "reviewers": "YEGV;onGN;AT8n;Np2S;Hyho", "pdf_size": 642953, "rating": "4;5;7;8;8", "confidence": "3;1;2;3;4", "soundness": "2;3;4;3;3", "novelty": "2;3;3;3;3", "presentation": "3;2;3;4;4", "wc_summary": "244;43;62;16;75", "wc_strengths": "36;4;167;23;0", "wc_weaknesses": "198;11;138;6;0", "wc_questions": "1;392;8;92;89", "wc_limitations": "1;1;115;1;0", "wc_review": "480;451;490;138;164", "wc_reply_reviewers": "43;110;60;0;14", "wc_reply_authors": "69;0;0;0;0", "reply_reviewers": "2;1;1;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.4, 1.624807680927192 ], "confidence_avg": [ 2.6, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 88.0, 80.48602363143554 ], "wc_strengths_avg": [ 46.0, 61.886993787063204 ], "wc_weaknesses_avg": [ 70.6, 81.83300067821051 ], "wc_questions_avg": [ 116.4, 143.08682678709457 ], "wc_limitations_avg": [ 23.6, 45.7016411083891 ], "wc_review_avg": [ 344.6, 158.80503770346834 ], "wc_reply_reviewers_avg": [ 45.4, 38.57252908482926 ], "wc_reply_authors_avg": [ 13.8, 27.6 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.45866432210600416, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9270epG72FoJ:scholar.google.com/&scioq=Distributionally+Robust+Skeleton+Learning+of+Discrete+Bayesian+Networks&hl=en&as_sdt=0,5", "gs_version_total": 8, "email": "uic.edu;uic.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois at Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uic.edu", "aff_unique_abbr": "UIC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "On the Convergence to a Global Solution of Shuffling-Type Gradient Algorithms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71839", "id": "Nr1XSeDzpn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eeb57fdf745eb31a3c7ef22c59a4661d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Nr1XSeDzpn", "openreview": "https://openreview.net/forum?id=Nr1XSeDzpn", "poster": "/media/PosterPDFs/NeurIPS%202023/71839.png?t=1699544783.8909001", "slides": "https://nips.cc/virtual/2023/poster/71839", "video": "https://nips.cc/virtual/2023/poster/71839", "author_site": "Lam Nguyen, Trang H. Tran", "tldr": "", "abstract": "Stochastic gradient descent (SGD) algorithm is the method of choice in many machine learning tasks thanks to its scalability and efficiency in dealing with large-scale problems. In this paper, we focus on the shuffling version of SGD which matches the mainstream practical heuristics. We show the convergence to a global solution of shuffling SGD for a class of non-convex functions under over-parameterized settings. Our analysis employs more relaxed non-convex assumptions than previous literature. Nevertheless, we maintain the desired computational complexity as shuffling SGD has achieved in the general convex setting.", "keywords": "stochastic gradient;shuffling type gradient method;global convergence", "primary_area": "", "supplementary_material": "/attachment/c8ccecf57f2819d6fa04eed96858a0569ad04c50.pdf", "author": "Lam M. Nguyen;Trang H. Tran", "authorids": "~Lam_M._Nguyen1;~Trang_H._Tran1", "gender": "M;F", "homepage": "https://lamnguyen-mltd.github.io/;https://htt-trangtran.github.io/", "dblp": "181/1428;279/4007", "google_scholar": "DeFL5Q8AAAAJ;EWGuYl4AAAAJ", "orcid": ";0000-0002-9551-4738", "linkedin": "lam-m-nguyen-71b54750/;trang-tran-313b49195/", "or_profile": "~Lam_M_Nguyen1;~Trang_H_Tran1", "aff": "IBM Research, Thomas J. Watson Research Center;Cornell University", "aff_domain": "ibm.com;cornell.edu", "position": "Staff Research Scientist;PhD student", "bibtex": "@inproceedings{\nnguyen2023on,\ntitle={On the Convergence to a Global Solution of Shuffling-Type Gradient Algorithms},\nauthor={Lam M. Nguyen and Trang H. Tran},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Nr1XSeDzpn}\n}", "github": "", "project": "", "reviewers": "JrBJ;MeiV;7CVX;VXbB", "pdf_size": 502620, "rating": "5;6;6;6", "confidence": "4;3;3;3", "soundness": "2;4;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "37;56;54;55", "wc_strengths": "30;83;103;53", "wc_weaknesses": "105;55;155;176", "wc_questions": "26;76;216;86", "wc_limitations": "1;1;7;1", "wc_review": "199;271;535;371", "wc_reply_reviewers": "6;8;19;116", "wc_reply_authors": "356;63;103;782", "reply_reviewers": "1;1;1;2", "reply_authors": "4;2;3;4", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 50.5, 7.826237921249264 ], "wc_strengths_avg": [ 67.25, 27.913930214142187 ], "wc_weaknesses_avg": [ 122.75, 46.852828089668186 ], "wc_questions_avg": [ 101.0, 70.178344238091 ], "wc_limitations_avg": [ 2.5, 2.598076211353316 ], "wc_review_avg": [ 344.0, 126.05950975630518 ], "wc_reply_reviewers_avg": [ 37.25, 45.73497020880193 ], "wc_reply_authors_avg": [ 326.0, 286.24028367789185 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7858081784069991478&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "email": "ibm.com;cornell.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "IBM;Cornell University", "aff_unique_dep": "IBM Research;", "aff_unique_url": "https://www.ibm.com/research;https://www.cornell.edu", "aff_unique_abbr": "IBM;Cornell", "aff_campus_unique_index": "0", "aff_campus_unique": "Yorktown Heights;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "NrG1fURihk", "title": "A representation-learning game for classes of prediction tasks", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce a formulation for learning dimensionality-reducing representations of unlabeled feature vectors, when a prior knowledge on future prediction tasks is available. The formulation is based on a three-player game, in which the first player chooses a representation, the second player then adversarially chooses a prediction task, and the third player predicts the response based on the represented features. The first and third player aim is to minimize, and the second player to maximize, the regret: The minimal prediction loss using the representation compared to the same loss using the original features. Our first contribution is theoretical and addresses the mean squared error loss function, and the case in which the representation, the response to predict and the predictors are all linear functions. We establish the optimal representation in pure strategies, which shows the effectiveness of the prior knowledge, and the optimal regret in mixed strategies, which shows the usefulness of randomizing the representation. We prove that optimal randomization requires a precisely characterized finite number of representations, which is smaller than the dimension of the feature vector, and potentially much smaller. Our second contribution is an efficient gradient-based iterative algorithm that approximates the optimal mixed representation for a general loss function, and general classes of representations, response functions and predictors.", "keywords": "representation learning;dimensionality-reduction;regret;minimax solution;mixed strategies;multiplicative weights update", "primary_area": "", "supplementary_material": "/attachment/8d4b574c401e63cc82cf71f57273e18308aae625.pdf", "author": "Neria Uzan;Nir Weinberger", "authorids": "~Neria_Uzan1;~Nir_Weinberger1", "gender": "M;M", "homepage": ";https://sites.google.com/view/nir-weinberger/home", "dblp": ";82/11151.html", "google_scholar": ";zRkNfH8AAAAJ", "orcid": ";", "linkedin": "neria-uzan-369803107/;", "or_profile": "~Neria_Uzan1;~Nir_Weinberger1", "aff": "Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;Electrical Engineering Department, Technion \u2013 Israel Institute of Technology, Technion - Israel Institute of Technology", "aff_domain": "campus.technion.ac.il;ee.technion.ac.il", "position": "MS student;Assistant Professor", "bibtex": "@misc{\nuzan2023a,\ntitle={A representation-learning game for classes of prediction tasks},\nauthor={Neria Uzan and Nir Weinberger},\nyear={2023},\nurl={https://openreview.net/forum?id=NrG1fURihk}\n}", "github": "", "project": "", "reviewers": "xuhY;rwLL;WSXs;3ghR;fmep", "site": "https://openreview.net/forum?id=NrG1fURihk", "pdf_size": 458950, "rating": "4;4;6;6;6", "confidence": "3;3;4;3;3", "soundness": "2;3;2;3;3", "novelty": "2;2;2;2;3", "presentation": "3;3;3;3;2", "wc_summary": "61;63;92;108;103", "wc_strengths": "22;35;74;76;122", "wc_weaknesses": "234;76;121;240;194", "wc_questions": "2;10;87;15;256", "wc_limitations": "1;1;40;5;1", "wc_review": "320;185;414;444;676", "wc_reply_reviewers": "84;0;74;166;319", "wc_reply_authors": "361;0;0;0;28", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;1;1;2", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 85.4, 19.80504986108341 ], "wc_strengths_avg": [ 65.8, 35.204545161101 ], "wc_weaknesses_avg": [ 173.0, 64.44222218390672 ], "wc_questions_avg": [ 74.0, 95.97291284523982 ], "wc_limitations_avg": [ 9.6, 15.278743403827423 ], "wc_review_avg": [ 407.8, 161.63464975060268 ], "wc_reply_reviewers_avg": [ 128.6, 108.77426166147946 ], "wc_reply_authors_avg": [ 77.8, 142.01464713190677 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3MeR4ktXjMcJ:scholar.google.com/&scioq=A+representation-learning+game+for+classes+of+prediction+tasks&hl=en&as_sdt=0,5", "gs_version_total": 7, "aff_unique_index": "0;1", "aff_unique_norm": "Technion - Israel Institute of Technology;Technion \u2013 Israel Institute of Technology", "aff_unique_dep": ";Electrical Engineering Department", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.technion.ac.il", "aff_unique_abbr": "Technion;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "On the Complexity of Differentially Private Best-Arm Identification with Fixed Confidence", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71838", "id": "NsPbMwyxRl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e14de1a0ebc31d9b989f5f5528c125bb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NsPbMwyxRl", "openreview": "https://openreview.net/forum?id=NsPbMwyxRl", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71838", "video": "https://nips.cc/virtual/2023/poster/71838", "author_site": "Achraf Azize, Marc Jourdan, Aymen Al Marjani, Debabrota Basu", "tldr": "", "abstract": "Best Arm Identification (BAI) problems are progressively used for data-sensitive applications, such as designing adaptive clinical trials, tuning hyper-parameters, and conducting user studies to name a few. Motivated by the data privacy concerns invoked by these applications, we study the problem of BAI with fixed confidence under $\\epsilon$-global Differential Privacy (DP). First, to quantify the cost of privacy, we derive a lower bound on the sample complexity of any $\\delta$-correct BAI algorithm satisfying $\\epsilon$-global DP. Our lower bound suggests the existence of two privacy regimes depending on the privacy budget $\\epsilon$. In the high-privacy regime (small $\\epsilon$), the hardness depends on a coupled effect of privacy and a novel information-theoretic quantity, called the Total Variation Characteristic Time. In the low-privacy regime (large $\\epsilon$), the sample complexity lower bound reduces to the classical non-private lower bound. Second, we propose AdaP-TT, an $\\epsilon$-global DP variant of the Top Two algorithm. AdaP-TT runs in *arm-dependent adaptive episodes* and adds *Laplace noise* to ensure a good privacy-utility trade-off. We derive an asymptotic upper bound on the sample complexity of AdaP-TT that matches with the lower bound up to multiplicative constants in the high-privacy regime. Finally, we provide an experimental analysis of AdaP-TT that validates our theoretical results.", "keywords": "Differential Privacy;Multi-armed Bandits;Best Arm Identification;Fixed Confidence", "primary_area": "", "supplementary_material": "/attachment/482757400f9db06b3fb174bb3ae86a4fb890e9c0.zip", "author": "Achraf Azize;Marc Jourdan;Aymen Al Marjani;Debabrota Basu", "authorids": "~Achraf_Azize1;~Marc_Jourdan1;~Aymen_Al_Marjani1;~Debabrota_Basu1", "gender": "M;M;;", "homepage": "https://achraf-azize.github.io/;https://marcjourdan.github.io;;https://debabrota-basu.github.io/", "dblp": "287/4270;228/8157;;126/2209", "google_scholar": "9RKFStAAAAAJ;BOXGjhgAAAAJ;;https://scholar.google.co.in/citations?user=e26Maa4AAAAJ", "orcid": ";0000-0002-2449-4549;;", "linkedin": "achraf-azize/;marc-jourdan/;;", "or_profile": "~Achraf_Azize1;~Marc_Jourdan1;~Aymen_Al_Marjani1;~Debabrota_Basu1", "aff": "INRIA;INRIA;;INRIA", "aff_domain": "inria.fr;inria.fr;;inria.fr", "position": "PhD student;PhD student;;Faculty", "bibtex": "@inproceedings{\nazize2023on,\ntitle={On the Complexity of Differentially Private Best-Arm Identification with Fixed Confidence},\nauthor={Achraf Azize and Marc Jourdan and Aymen Al Marjani and Debabrota Basu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NsPbMwyxRl}\n}", "github": "", "project": "", "reviewers": "RUVA;Z6yo;rwuX;NTgq;jHc4;Qtw8", "pdf_size": 720369, "rating": "5;5;6;7;8;8", "confidence": "2;3;3;4;1;3", "soundness": "3;3;3;2;4;3", "novelty": "3;3;2;2;4;4", "presentation": "1;1;3;3;4;3", "wc_summary": "246;103;84;66;49;86", "wc_strengths": "89;87;59;108;34;107", "wc_weaknesses": "360;71;138;271;13;121", "wc_questions": "118;5;3;9;1;44", "wc_limitations": "18;5;1;1;23;6", "wc_review": "831;271;285;455;120;364", "wc_reply_reviewers": "51;12;34;53;0;251", "wc_reply_authors": "0;0;0;0;0;337", "reply_reviewers": "1;1;1;1;0;2", "reply_authors": "1;1;1;1;1;2", "rating_avg": [ 6.5, 1.2583057392117916 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 105.66666666666667, 64.9888879390589 ], "wc_strengths_avg": [ 80.66666666666667, 26.449112566503164 ], "wc_weaknesses_avg": [ 162.33333333333334, 118.19710468347165 ], "wc_questions_avg": [ 30.0, 41.98412398355677 ], "wc_limitations_avg": [ 9.0, 8.465616732800196 ], "wc_review_avg": [ 387.6666666666667, 222.64895139109808 ], "wc_reply_reviewers_avg": [ 66.83333333333333, 84.56834842632057 ], "wc_reply_authors_avg": [ 56.166666666666664, 125.59248473623818 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.14048787173725413, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15447581044252628911&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 11, "email": "inria.fr;inria.fr;;inria.fr", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Reverse Engineering Self-Supervised Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71837", "id": "NsVEjx6YPd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b63ad8c24354b0e5bcb7aea16490beab-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NsVEjx6YPd", "openreview": "https://openreview.net/forum?id=NsVEjx6YPd", "poster": "/media/PosterPDFs/NeurIPS%202023/71837.png?t=1699549957.7507021", "slides": "https://nips.cc/virtual/2023/poster/71837", "video": "https://nips.cc/virtual/2023/poster/71837", "author_site": "Ido Ben-Shaul, Ravid Shwartz-Ziv, Tomer Galanti, Shai Dekel, Yann LeCun", "tldr": "", "abstract": "Understanding the learned representation and underlying mechanisms of Self-Supervised Learning (SSL) often poses a challenge. In this paper, we \u2018reverse engineer\u2019 SSL, conducting an in-depth empirical analysis of its learned internal representations, encompassing diverse models, architectures, and hyperparameters. Our study reveals an intriguing process within the SSL training: an inherent facilitation of semantic label-based clustering, which is surprisingly driven by the regularization component of the SSL objective. This clustering not only enhances downstream classification, but also compresses the information. We further illustrate that the alignment of the SSL-trained representation is more pronounced with semantic classes rather than random functions. Remarkably, the learned representations align with semantic classes across various hierarchical levels, with this alignment intensifying when going deeper into the network. This \u2018reverse engineering\u2019 approach provides valuable insights into the inner mechanism of SSL and their influences on the performance across different class sets.", "keywords": "Self-Supervised Learning;Deep Learning;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/f06415eef460bfafb6efb568769bebae9df0befb.zip", "author": "Ido Ben-Shaul;Ravid Shwartz-Ziv;Tomer Galanti;Shai Dekel;Yann LeCun", "authorids": "~Ido_Ben-Shaul1;~Ravid_Shwartz-Ziv2;~Tomer_Galanti1;~Shai_Dekel1;~Yann_LeCun1", "gender": "M;M;;M;M", "homepage": "https://www.idobenshaul.com;https://tomergalanti.github.io;https://www.shaidekel.com/;http://yann.lecun.com;https://www.ravid-shwartz-ziv.com/", "dblp": "270/8226;198/1490;72/6758;l/YannLeCun;", "google_scholar": "ArjvABYAAAAJ;;;WLN3QrAAAAAJ;https://scholar.google.co.il/citations?user=SqsLFwMAAAAJ", "orcid": ";;;;", "linkedin": "ido-ben-shaul-482449147/;tomer-galanti-5880b1104/;;;", "or_profile": "~Ido_Ben-Shaul1;~Tomer_Galanti1;~Shai_Dekel1;~Yann_LeCun1;~ravid_ziv1", "aff": "eBay;Massachusetts Institute of Technology;Tel Aviv University;New York University;New York University", "aff_domain": "ebay.com;mit.edu;tau.ac.il;nyu.edu;nyu.edu", "position": "Researcher;Postdoc;Associate Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nben-shaul2023reverse,\ntitle={Reverse Engineering Self-Supervised Learning},\nauthor={Ido Ben-Shaul and Ravid Shwartz-Ziv and Tomer Galanti and Shai Dekel and Yann LeCun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NsVEjx6YPd}\n}", "github": "", "project": "", "reviewers": "YeWL;7nAJ;b2bW;MBnG", "pdf_size": 3345396, "rating": "5;5;6;7", "confidence": "3;3;5;5", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "137;47;105;48", "wc_strengths": "79;47;44;56", "wc_weaknesses": "169;181;310;79", "wc_questions": "507;27;299;54", "wc_limitations": "4;14;2;9", "wc_review": "896;316;760;246", "wc_reply_reviewers": "78;45;453;0", "wc_reply_authors": "574;194;311;138", "reply_reviewers": "1;1;2;0", "reply_authors": "3;3;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 84.25, 38.45370593323874 ], "wc_strengths_avg": [ 56.5, 13.720422734012244 ], "wc_weaknesses_avg": [ 184.75, 82.36010866918522 ], "wc_questions_avg": [ 221.75, 195.83331560283608 ], "wc_limitations_avg": [ 7.25, 4.656984002549289 ], "wc_review_avg": [ 554.5, 278.795175711489 ], "wc_reply_reviewers_avg": [ 144.0, 180.5366998701372 ], "wc_reply_authors_avg": [ 304.25, 167.78315618678772 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2293431688127428271&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ebay.com;mit.edu;tau.ac.il;nyu.edu;nyu.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "eBay Inc.;Massachusetts Institute of Technology;Tel Aviv University;New York University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ebay.com;https://web.mit.edu;https://www.tau.ac.il;https://www.nyu.edu", "aff_unique_abbr": "eBay;MIT;TAU;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;Israel" }, { "title": "Blocked Collaborative Bandits: Online Collaborative Filtering with Per-Item Budget Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71836", "id": "Ntd6X7uWYF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4feccf7f781e1844f3a5d70eb779147a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ntd6X7uWYF", "openreview": "https://openreview.net/forum?id=Ntd6X7uWYF", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71836", "video": "https://nips.cc/virtual/2023/poster/71836", "author_site": "Soumyabrata Pal, Arun Suggala, Karthikeyan Shanmugam, Prateek Jain", "tldr": "", "abstract": "We consider the problem of \\emph{blocked} collaborative bandits where there are multiple users, each with an associated multi-armed bandit problem. These users are grouped into \\emph{latent} clusters such that the mean reward vectors of users within the same cluster are identical. Our goal is to design algorithms that maximize the cumulative reward accrued by all the users over time, under the \\emph{constraint} that no arm of a user is pulled more than $\\mathsf{B}$ times. \nThis problem has been originally considered by \\cite{Bresler:2014}, and designing regret-optimal algorithms for it has since remained an open problem.\nIn this work, we propose an algorithm called B-LATTICE (Blocked Latent bAndiTs via maTrIx ComplEtion) that collaborates across users, while simultaneously satisfying the budget constraints, to maximize their cumulative rewards. Theoretically, under certain reasonable assumptions on the latent structure, with \n$\\mathsf{M}$ users, $\\mathsf{N}$ arms, $\\mathsf{T}$ rounds per user, and $\\mathsf{C}=O(1)$ latent clusters, B-LATTICE achieves a per-user regret of $\\widetilde{O}(\\sqrt{\\mathsf{T}(1 + \\mathsf{N}\\mathsf{M}^{-1})})$ under a budget constraint of $\\mathsf{B}=\\Theta(\\log \n\\mathsf{T})$. These are the first sub-linear regret bounds for this problem, and match the minimax regret bounds when $\\mathsf{B}=\\mathsf{T}$. Empirically, we demonstrate that our algorithm has superior performance over baselines even when $\\mathsf{B}=1$. B-LATTICE is a phased algorithm where in each phase it clusters users into groups and collaborates across users within a group to quickly learn their reward models.", "keywords": "Blocked Bandits;Collaborative Filtering;Clustering", "primary_area": "", "supplementary_material": "/attachment/c6a502e3deb945624d4c8aab809f7f95874947ad.pdf", "author": "Soumyabrata Pal;Arun Suggala;Karthikeyan Shanmugam;Prateek Jain", "authorids": "~Soumyabrata_Pal1;~Arun_Suggala1;~Karthikeyan_Shanmugam1;~Prateek_Jain1", "gender": "M;M;M;M", "homepage": "https://soumyabratap.github.io/;;https://sites.google.com/corp/view/karthikeyan-shanmugam/;http://prateekjain.org", "dblp": "206/6371;164/7327;;https://dblp.uni-trier.de/pers/j/Jain_0002:Prateek.html", "google_scholar": "J4UxoTEAAAAJ;CKgmfDMAAAAJ;https://scholar.google.ca/citations?user=m4DyPcUAAAAJ;qYhRbJoAAAAJ", "orcid": ";;0009-0008-2879-5868;", "linkedin": ";;;", "or_profile": "~Soumyabrata_Pal1;~Arun_Suggala1;~Karthikeyan_Shanmugam1;~Prateek_Jain1", "aff": "Google;Google;Google Research;Google", "aff_domain": "google.com;google.com;google.com;google.com", "position": "Postdoc;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\npal2023blocked,\ntitle={Blocked Collaborative Bandits: Online Collaborative Filtering with Per-Item Budget Constraints},\nauthor={Soumyabrata Pal and Arun Suggala and Karthikeyan Shanmugam and Prateek Jain},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ntd6X7uWYF}\n}", "github": "", "project": "", "reviewers": "tct3;2VtF;HmVS;7P26;tR2a", "pdf_size": 878871, "rating": "4;5;6;7;7", "confidence": "3;2;3;4;3", "soundness": "3;3;2;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;3;3;2", "wc_summary": "87;59;261;78;95", "wc_strengths": "37;40;32;98;57", "wc_weaknesses": "67;97;88;450;208", "wc_questions": "38;21;182;24;2", "wc_limitations": "5;1;2;4;1", "wc_review": "234;218;565;654;363", "wc_reply_reviewers": "237;0;63;174;20", "wc_reply_authors": "1308;0;290;205;24", "reply_reviewers": "2;0;1;1;1", "reply_authors": "3;1;2;2;2", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 116.0, 73.48469228349535 ], "wc_strengths_avg": [ 52.8, 24.111408088288826 ], "wc_weaknesses_avg": [ 182.0, 142.67865993203048 ], "wc_questions_avg": [ 53.4, 65.31646040624062 ], "wc_limitations_avg": [ 2.6, 1.624807680927192 ], "wc_review_avg": [ 406.8, 175.24771039873815 ], "wc_reply_reviewers_avg": [ 98.8, 91.65893300709975 ], "wc_reply_authors_avg": [ 365.4, 483.7311650080032 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5423261445466404, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12329723255474534904&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "google.com;google.com;google.com;google.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Label-Only Model Inversion Attacks via Knowledge Transfer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71835", "id": "NuoIThPPag", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d9827e811c5a205c1313fb950c072c7d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NuoIThPPag", "openreview": "https://openreview.net/forum?id=NuoIThPPag", "poster": "/media/PosterPDFs/NeurIPS%202023/71835.png?t=1701907869.3250089", "slides": "https://nips.cc/virtual/2023/poster/71835", "video": "https://nips.cc/virtual/2023/poster/71835", "author_site": "Bao-Ngoc Nguyen, Keshigeyan Chandrasegaran, Milad Abdollahzadeh, Ngai-Man (Man) Cheung", "tldr": "", "abstract": "In a model inversion (MI) attack, an adversary abuses access to a machine learning (ML) model to infer and reconstruct private training data. Remarkable progress has been made in the white-box and black-box setups, where the adversary has access to the complete model or the model's soft output respectively. However, there is very limited study in the most challenging but practically important setup: Label-only MI attacks, where the adversary only has access to the model's predicted label (hard label) without confidence scores nor any other model information. \n\nIn this work, we propose LOKT, a novel approach for label-only MI attacks. Our idea is based on transfer of knowledge from the opaque target model to surrogate models. Subsequently, using these surrogate models, our approach can harness advanced white-box attacks. \nWe propose knowledge transfer based on generative modelling, and introduce a new model, Target model-assisted ACGAN (T-ACGAN), for effective knowledge transfer. Our method casts the challenging label-only MI into the more tractable white-box setup. We provide analysis to support that surrogate models based on our approach serve as effective proxies for the target model for MI. Our experiments show that our method significantly outperforms existing SOTA Label-only MI attack by more than 15% across all MI benchmarks. Furthermore, our method compares favorably in terms of query budget. Our study highlights rising privacy threats for ML models even when minimal information (i.e., hard labels) is exposed. Our study highlights rising privacy threats for ML models even when minimal information (i.e., hard labels) is exposed. Our code, demo, models and reconstructed data are available at our project page:\nhttps://ngoc-nguyen-0.github.io/lokt/", "keywords": "Model Inversion attacks;Generative models;Surrogate models;Knowledge transfer", "primary_area": "", "supplementary_material": "/attachment/7d0b4d7db4c974581cd82f190c9d102774cdefe1.pdf", "author": "Ngoc-Bao Nguyen;Keshigeyan Chandrasegaran;Milad Abdollahzadeh;Ngai-man Cheung", "authorids": "~Ngoc-Bao_Nguyen1;~Keshigeyan_Chandrasegaran1;~Milad_Abdollahzadeh1;~Ngai-man_Cheung1", "gender": "F;M;M;M", "homepage": ";https://keshik6.github.io/;;https://sites.google.com/site/mancheung0407/", "dblp": "151/8719;289/0842;211/7797;82/3605", "google_scholar": "zQPES6kAAAAJ;vh2Ywj8AAAAJ;SYDsMNAAAAAJ;https://scholar.google.com.sg/citations?hl=en", "orcid": "0000-0002-4689-5861;;0000-0003-4011-4670;0000-0003-0135-3791", "linkedin": ";keshigeyan-chandrasegaran/;milad-abdollahzadeh-b0764361/;", "or_profile": "~Ngoc-Bao_Nguyen1;~Keshigeyan_Chandrasegaran1;~Milad_Abdollahzadeh1;~Ngai-man_Cheung1", "aff": "Singapore University of Technology and Design;Singapore University of Technology and Design;Singapore University of Technology and Design;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;sutd.edu.sg;sutd.edu.sg;sutd.edu.sg", "position": "PhD student;Researcher;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nnguyen2023labelonly,\ntitle={Label-Only Model Inversion Attacks via Knowledge Transfer},\nauthor={Ngoc-Bao Nguyen and Keshigeyan Chandrasegaran and Milad Abdollahzadeh and Ngai-man Cheung},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NuoIThPPag}\n}", "github": "", "project": "", "reviewers": "txcd;Um8m;5AoF;VBSE", "pdf_size": 2239194, "rating": "5;5;6;6", "confidence": "4;3;5;5", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;2;3", "wc_summary": "121;62;115;84", "wc_strengths": "33;53;77;17", "wc_weaknesses": "351;111;249;18", "wc_questions": "70;64;102;158", "wc_limitations": "17;6;66;1", "wc_review": "592;296;609;278", "wc_reply_reviewers": "178;26;176;26", "wc_reply_authors": "433;1470;764;77", "reply_reviewers": "1;1;2;1", "reply_authors": "2;3;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 95.5, 23.90083680543424 ], "wc_strengths_avg": [ 45.0, 22.44994432064365 ], "wc_weaknesses_avg": [ 182.25, 127.46249448367155 ], "wc_questions_avg": [ 98.5, 37.265936188428164 ], "wc_limitations_avg": [ 22.5, 25.773047937719745 ], "wc_review_avg": [ 443.75, 156.99422760088984 ], "wc_reply_reviewers_avg": [ 101.5, 75.5033111856692 ], "wc_reply_authors_avg": [ 686.0, 513.719281320061 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11204729720595250373&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "sutd.edu.sg;sutd.edu.sg;sutd.edu.sg;sutd.edu.sg", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Singapore University of Technology and Design", "aff_unique_dep": "", "aff_unique_url": "https://www.sutd.edu.sg", "aff_unique_abbr": "SUTD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Sheaf Hypergraph Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71834", "id": "NvcVXzJvhX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/27f243af2887d7f248f518d9b967a882-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NvcVXzJvhX", "openreview": "https://openreview.net/forum?id=NvcVXzJvhX", "poster": "/media/PosterPDFs/NeurIPS%202023/71834.png?t=1702044728.812009", "slides": "https://nips.cc/virtual/2023/poster/71834", "video": "https://nips.cc/virtual/2023/poster/71834", "author_site": "Iulia Duta, Giulia Cassar\u00e0, Fabrizio Silvestri, Pietro Li\u00f3", "tldr": "", "abstract": "Higher-order relations are widespread in nature, with numerous phenomena involving complex interactions that extend beyond simple pairwise connections. As a result, advancements in higher-order processing can accelerate the growth of various fields requiring structured data. Current approaches typically represent these interactions using hypergraphs.\nWe enhance this representation by introducing cellular sheaves for hypergraphs, a mathematical construction that adds extra structure to the conventional hypergraph while maintaining their local, higher-order connectivity. Drawing inspiration from existing Laplacians in the literature, we develop two unique formulations of sheaf hypergraph Laplacians: linear and non-linear. Our theoretical analysis demonstrates that incorporating sheaves into the hypergraph Laplacian provides a more expressive inductive bias than standard hypergraph diffusion, creating a powerful instrument for effectively modelling complex data structures.\nWe employ these sheaf hypergraph Laplacians to design two categories of models: Sheaf Hypergraph Neural Networks and Sheaf Hypergraph Convolutional Networks. These models generalize classical Hypergraph Networks often found in the literature. Through extensive experimentation, we show that this generalization significantly improves performance, achieving top results on multiple benchmark datasets for hypergraph node classification.", "keywords": "hypergraph neural networks;hypergraph;sheaf;higher-order", "primary_area": "", "supplementary_material": "/attachment/8deda041748afb29ca853adf1c291fbf75410db2.pdf", "author": "Iulia Duta;Giulia Cassar\u00e0;Fabrizio Silvestri;Pietro Lio", "authorids": "~Iulia_Duta1;~Giulia_Cassar\u00e01;~Fabrizio_Silvestri2;~Pietro_Lio1", "gender": "F;F;M;M", "homepage": "https://iuliaduta.github.io/;https://giuliacassara.github.io/;https://sites.google.com/diag.uniroma1.it/fabriziosilvestri;https://www.cst.cam.ac.uk/people/pl219", "dblp": "https://dblp.uni-trier.de/pers/hd/d/Duta:Iulia;;s/FabrizioSilvestri;l/PietroLio.html", "google_scholar": "7MRI1DsAAAAJ;SYvKkrEAAAAJ;pi985dQAAAAJ;https://scholar.google.co.uk/citations?user=3YrWf7EAAAAJ", "orcid": ";;0000-0001-7669-9055;0000-0002-0540-5053", "linkedin": ";giuliacassara/;fabrizio-silvestri-a6b0391/;", "or_profile": "~Iulia_Duta1;~Giulia_Cassar\u00e01;~Fabrizio_Silvestri2;~Pietro_Lio1", "aff": "University of Cambridge;University of Roma \"La Sapienza\";Sapienza University of Rome;University of Cambridge", "aff_domain": "cam.ac.uk;uniroma1.it;uniroma1.it;cam.ac.uk", "position": "PhD;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nduta2023sheaf,\ntitle={Sheaf Hypergraph Networks},\nauthor={Iulia Duta and Giulia Cassar{\\`a} and Fabrizio Silvestri and Pietro Lio},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NvcVXzJvhX}\n}", "github": "", "project": "", "reviewers": "qurz;C7Ao;XTYg;2wJ1", "pdf_size": 916299, "rating": "4;4;7;7", "confidence": "4;3;4;4", "soundness": "2;2;3;4", "novelty": "2;2;3;4", "presentation": "3;3;3;3", "wc_summary": "72;35;94;32", "wc_strengths": "32;38;234;95", "wc_weaknesses": "188;130;135;2", "wc_questions": "53;21;148;167", "wc_limitations": "10;1;6;1", "wc_review": "355;225;617;297", "wc_reply_reviewers": "26;15;15;21", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 58.25, 25.96512083545925 ], "wc_strengths_avg": [ 99.75, 81.31535832793212 ], "wc_weaknesses_avg": [ 113.75, 68.40458683450986 ], "wc_questions_avg": [ 97.25, 61.66998864926116 ], "wc_limitations_avg": [ 4.5, 3.774917217635375 ], "wc_review_avg": [ 373.5, 147.93495192144417 ], "wc_reply_reviewers_avg": [ 19.25, 4.602988159880492 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896258, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8062834078702698788&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 10, "email": "cam.ac.uk;uniroma1.it;uniroma1.it;cam.ac.uk", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Cambridge;University of Rome La Sapienza;Sapienza University of Rome", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.uniroma1.it;https://www.uniroma1.it", "aff_unique_abbr": "Cambridge;La Sapienza;Sapienza", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Cambridge;Rome", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United Kingdom;Italy" }, { "title": "Efficient Policy Adaptation with Contrastive Prompt Ensemble for Embodied Agents", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71833", "id": "Ny3GcHLyzj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ad72633e034990a97e878fc2fc100afb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ny3GcHLyzj", "openreview": "https://openreview.net/forum?id=Ny3GcHLyzj", "poster": "/media/PosterPDFs/NeurIPS%202023/71833.png?t=1699790330.6703937", "slides": "https://nips.cc/virtual/2023/poster/71833", "video": "https://nips.cc/virtual/2023/poster/71833", "author_site": "wonje choi, Woo Kyung Kim, SeungHyun Kim, Honguk Woo", "tldr": "", "abstract": "For embodied reinforcement learning (RL) agents interacting with the environment, it is desirable to have rapid policy adaptation to unseen visual observations, but achieving zero-shot adaptation capability is considered as a challenging problem in the RL context. To address the problem, we present a novel contrastive prompt ensemble (ConPE) framework which utilizes a pretrained vision-language model and a set of visual prompts, thus enables efficient policy learning and adaptation upon a wide range of environmental and physical changes encountered by embodied agents. Specifically, we devise a guided-attention-based ensemble approach with multiple visual prompts on the vision-language model to construct robust state representations. Each prompt is contrastively learned in terms of an individual domain factors that significantly affects the agent's egocentric perception and observation. For a given task, the attention-based ensemble and policy are jointly learned so that the resulting state representations not only generalize to various domains but are also optimized for learning the task. Through experiments, we show that ConPE outperforms other state-of-the-art algorithms for several embodied agent tasks including navigation in AI2THOR, manipulation in Metaworld, and autonomous driving in CARLA, while also improving the sample efficiency of policy learning and adaptation.", "keywords": "Prompt Learining;Domain Adaptation;Embodied AI", "primary_area": "", "supplementary_material": "/attachment/5413a8e37735c6cfb57284964cfddf9e449d7dc6.zip", "author": "Wonje Choi;Woo Kyung Kim;SeungHyun Kim;Honguk Woo", "authorids": "~Wonje_Choi2;~Woo_Kyung_Kim1;~SeungHyun_Kim4;~Honguk_Woo1", "gender": "M;M;M;M", "homepage": "http://115.145.179.118/students/;;;https://sites.google.com/view/csi-agent-group/about", "dblp": "163/3705-3;306/0140;;63/6072", "google_scholar": "L4d1CjEAAAAJ;OFFacb0AAAAJ;;https://scholar.google.co.kr/citations?user=Gaxjc7UAAAAJ", "orcid": "0000-0001-5138-0101;0000-0001-6214-4171;;0000-0001-6948-3440", "linkedin": ";;andy-kim-54a553217/;", "or_profile": "~Wonje_Choi2;~Woo_Kyung_Kim1;~SeungHyun_Kim4;~Honguk_Woo1", "aff": "Sung Kyun Kwan University;Sungkyunkwan University;Sungkyunkwan University;Sungkyunkwan University", "aff_domain": "skku.edu;skku.edu;skku.edu;skku.edu", "position": "PhD student;PhD student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nchoi2023efficient,\ntitle={Efficient Policy Adaptation with Contrastive Prompt Ensemble for Embodied Agents},\nauthor={Wonje Choi and Woo Kyung Kim and SeungHyun Kim and Honguk Woo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ny3GcHLyzj}\n}", "github": "", "project": "", "reviewers": "aYZP;QsRA;FnQE;gM92;okL6;TuT5", "pdf_size": 854918, "rating": "5;5;6;6;6;6", "confidence": "3;4;4;3;3;4", "soundness": "3;3;3;3;3;3", "novelty": "3;2;3;3;3;3", "presentation": "3;1;2;3;3;3", "wc_summary": "45;89;97;106;59;111", "wc_strengths": "60;86;39;72;66;108", "wc_weaknesses": "68;1108;145;62;221;220", "wc_questions": "25;13;53;10;93;55", "wc_limitations": "1;18;28;1;46;52", "wc_review": "199;1314;362;251;485;546", "wc_reply_reviewers": "0;0;0;0;74;95", "wc_reply_authors": "0;0;0;0;55;70", "reply_reviewers": "0;0;0;0;1;1", "reply_authors": "1;1;1;1;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 2.5, 0.7637626158259734 ], "wc_summary_avg": [ 84.5, 24.329337571472568 ], "wc_strengths_avg": [ 71.83333333333333, 21.450848209077627 ], "wc_weaknesses_avg": [ 304.0, 365.1260056473655 ], "wc_questions_avg": [ 41.5, 28.992815201931208 ], "wc_limitations_avg": [ 24.333333333333332, 19.90533150244482 ], "wc_review_avg": [ 526.1666666666666, 372.4859803476576 ], "wc_reply_reviewers_avg": [ 28.166666666666668, 40.29233453427862 ], "wc_reply_authors_avg": [ 20.833333333333332, 29.779280642009397 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13253941902009573464&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "skku.edu;skku.edu;skku.edu;skku.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Sungkyunkwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.skku.edu", "aff_unique_abbr": "SKKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Information Design in Multi-Agent Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71832", "id": "NyQwBttTnG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/511d7c4e61878cf08ece6351ea3c529e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=NyQwBttTnG", "openreview": "https://openreview.net/forum?id=NyQwBttTnG", "poster": "/media/PosterPDFs/NeurIPS%202023/71832.png?t=1701404511.6633606", "slides": "https://nips.cc/virtual/2023/poster/71832", "video": "https://nips.cc/virtual/2023/poster/71832", "author_site": "Yue Lin, Wenhao Li, Hongyuan Zha, Baoxiang Wang", "tldr": "", "abstract": "Reinforcement learning (RL) is inspired by the way human infants and animals learn from the environment. The setting is somewhat idealized because, in actual tasks, other agents in the environment have their own goals and behave adaptively to the ego agent. To thrive in those environments, the agent needs to influence other agents so their actions become more helpful and less harmful. Research in computational economics distills two ways to influence others directly: by providing tangible goods (mechanism design) and by providing information (information design). This work investigates information design problems for a group of RL agents. The main challenges are two-fold. One is the information provided will immediately affect the transition of the agent trajectories, which introduces additional non-stationarity. The other is the information can be ignored, so the sender must provide information that the receiver is willing to respect. We formulate the Markov signaling game, and develop the notions of signaling gradient and the extended obedience constraints that address these challenges. Our algorithm is efficient on various mixed-motive tasks and provides further insights into computational economics. Our code is publicly available at https://github.com/YueLin301/InformationDesignMARL.", "keywords": "multi-agent reinforcement learning;multi-agent communication;information design;signaling gradient;obedience constraints", "primary_area": "", "supplementary_material": "/attachment/c1fa0d0d6a53cfc74d37ce6687f2979eb3e79ec9.pdf", "author": "Yue Lin;Wenhao Li;Hongyuan Zha;Baoxiang Wang", "authorids": "~Yue_Lin2;~Wenhao_Li2;~Hongyuan_Zha1;~Baoxiang_Wang1", "gender": "M;M;;", "homepage": "https://yuelin301.github.io/about/;https://tomaxent.com;;", "dblp": ";;z/HongyuanZha;", "google_scholar": "fbvQHX4AAAAJ;HAtzuaYAAAAJ;n1DQMIsAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yue_Lin2;~Wenhao_Li2;~Hongyuan_Zha1;~Baoxiang_Wang1", "aff": "The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn;", "position": "Research Assistant;Postdoc;Full Professor;", "bibtex": "@inproceedings{\nlin2023information,\ntitle={Information Design in Multi-Agent Reinforcement Learning},\nauthor={Yue Lin and Wenhao Li and Hongyuan Zha and Baoxiang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=NyQwBttTnG}\n}", "github": "", "project": "", "reviewers": "YNGH;6N28;5nfz;bi5b;sBPo", "pdf_size": 3634955, "rating": "5;5;6;6;6", "confidence": "3;3;3;1;3", "soundness": "3;2;3;3;3", "novelty": "2;2;3;2;2", "presentation": "3;2;4;3;3", "wc_summary": "62;32;133;85;50", "wc_strengths": "46;33;100;35;46", "wc_weaknesses": "485;155;94;53;140", "wc_questions": "120;2;167;4;43", "wc_limitations": "9;6;40;20;45", "wc_review": "722;228;534;197;324", "wc_reply_reviewers": "148;17;91;0;61", "wc_reply_authors": "663;118;61;0;55", "reply_reviewers": "2;1;1;0;1", "reply_authors": "3;3;2;1;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 2.6, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 72.4, 34.85168575549826 ], "wc_strengths_avg": [ 52.0, 24.60081299469593 ], "wc_weaknesses_avg": [ 185.4, 154.0319447387457 ], "wc_questions_avg": [ 67.2, 65.69444420953724 ], "wc_limitations_avg": [ 24.0, 15.8871016865884 ], "wc_review_avg": [ 401.0, 199.09997488698988 ], "wc_reply_reviewers_avg": [ 63.4, 53.09839922257544 ], "wc_reply_authors_avg": [ 179.4, 244.67006355498418 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16065100340887283069&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.cn", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Evaluating the Moral Beliefs Encoded in LLMs", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71831", "id": "O06z2G18me", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a2cf225ba392627529efef14dc857e22-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=O06z2G18me", "openreview": "https://openreview.net/forum?id=O06z2G18me", "poster": "/media/PosterPDFs/NeurIPS%202023/71831.png?t=1702078298.4448667", "slides": "https://nips.cc/virtual/2023/poster/71831", "video": "https://nips.cc/virtual/2023/poster/71831", "author_site": "Nino Scherrer, Claudia Shi, Amir Feder, David Blei", "tldr": "", "abstract": "This paper presents a case study on the design, administration, post-processing, and evaluation of surveys on large language models (LLMs). It comprises two components:\n(1) A statistical method for eliciting beliefs encoded in LLMs. We introduce statistical measures and evaluation metrics that quantify the probability of an LLM \"making a choice\", the associated uncertainty, and the consistency of that choice.\n(2) We apply this method to study what moral beliefs are encoded in different LLMs, especially in ambiguous cases where the right choice is not obvious.\nWe design a large-scale survey comprising 680 high-ambiguity moral scenarios (e.g., \"Should I tell a white lie?\") and 687 low-ambiguity moral scenarios (e.g., \"Should I stop for a pedestrian on the road?\"). Each scenario includes a description, two possible actions, and auxiliary labels indicating violated rules (e.g., \"do not kill\"). We administer the survey to 28 open- and closed-source LLMs.\nWe find that (a) in unambiguous scenarios, most models ``choose\" actions that align with commonsense. In ambiguous cases, most models express uncertainty.\n(b) Some models are uncertain about choosing the commonsense action because their responses are sensitive to the question-wording.\n(c) Some models reflect clear preferences in ambiguous scenarios. Specifically, closed-source models tend to agree with each other.", "keywords": "Language Models;Moral Decision Making;Social Aspects of Machine Learning;Ethics", "primary_area": "", "supplementary_material": "/attachment/c1ac89982f3f4a1a997d590dbd49b98b65f12c64.pdf", "author": "Nino Scherrer;Claudia Shi;Amir Feder;David Blei", "authorids": "~Nino_Scherrer1;~Claudia_Shi1;~Amir_Feder1;~David_Blei2", "gender": "M;;;M", "homepage": "https://ninodimontalcino.github.io/;https://claudiajshi.com/;https://www.amirfeder.com/;http://www.cs.columbia.edu/~blei/", "dblp": "295/0198;;214/3604;86/1910", "google_scholar": "CG9n26kAAAAJ;WHKniLsAAAAJ;ERwoPLIAAAAJ;https://scholar.google.com.tw/citations?user=8OYE6iEAAAAJ", "orcid": ";;0000-0001-5472-1135;", "linkedin": ";;amir-feder-b65b7035/;", "or_profile": "~Nino_Scherrer1;~Claudia_Shi1;~Amir_Feder1;~David_Blei2", "aff": "FAR AI;Columbia University;Google;Columbia University", "aff_domain": "far.ai;columbia.edu;google.com;columbia.edu", "position": "Researcher;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nscherrer2023evaluating,\ntitle={Evaluating the Moral Beliefs Encoded in {LLM}s},\nauthor={Nino Scherrer and Claudia Shi and Amir Feder and David Blei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=O06z2G18me}\n}", "github": "", "project": "", "reviewers": "wYm3;tn2x;JwcR;53sf;t4xk;L2Nx", "pdf_size": 5381188, "rating": "5;6;7;7;7;8", "confidence": "5;4;5;4;4;4", "soundness": "3;3;3;3;3;4", "novelty": "3;2;3;3;3;3", "presentation": "4;2;4;3;4;4", "wc_summary": "64;103;103;164;103;130", "wc_strengths": "32;37;197;48;89;60", "wc_weaknesses": "208;217;395;153;116;270", "wc_questions": "45;86;78;12;230;92", "wc_limitations": "24;23;27;44;13;77", "wc_review": "373;466;800;421;551;629", "wc_reply_reviewers": "295;41;60;18;0;100", "wc_reply_authors": "325;0;0;0;0;368", "reply_reviewers": "1;1;1;1;0;1", "reply_authors": "2;1;1;1;1;3", "rating_avg": [ 6.666666666666667, 0.9428090415820632 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 3.5, 0.7637626158259734 ], "wc_summary_avg": [ 111.16666666666667, 30.470842164637034 ], "wc_strengths_avg": [ 77.16666666666667, 56.713950860632366 ], "wc_weaknesses_avg": [ 226.5, 89.74918755435431 ], "wc_questions_avg": [ 90.5, 68.15607089614248 ], "wc_limitations_avg": [ 34.666666666666664, 21.044925490219462 ], "wc_review_avg": [ 540.0, 143.27595750857853 ], "wc_reply_reviewers_avg": [ 85.66666666666667, 98.82419856605073 ], "wc_reply_authors_avg": [ 115.5, 163.8126470494062 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.5, 0.7637626158259734 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5000000000000001, "gs_citation": 142, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4223395938890596669&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "email": "far.ai;columbia.edu;google.com;columbia.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "FAR AI;Columbia University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.far.ai;https://www.columbia.edu;https://www.google.com", "aff_unique_abbr": "FAR AI;Columbia;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A U-turn on Double Descent: Rethinking Parameter Counting in Statistical Learning", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71830", "id": "O0Lz8XZT2b", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aec5e2847c5ae90f939ab786774856cc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=O0Lz8XZT2b", "openreview": "https://openreview.net/forum?id=O0Lz8XZT2b", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71830", "video": "https://nips.cc/virtual/2023/poster/71830", "author_site": "Alicia Curth, Alan Jeffares, Mihaela van der Schaar", "tldr": "", "abstract": "Conventional statistical wisdom established a well-understood relationship between model complexity and prediction error, typically presented as a _U-shaped curve_ reflecting a transition between under- and overfitting regimes. However, motivated by the success of overparametrized neural networks, recent influential work has suggested this theory to be generally incomplete, introducing an additional regime that exhibits a second descent in test error as the parameter count $p$ grows past sample size $n$ -- a phenomenon dubbed _double descent_. While most attention has naturally been given to the deep-learning setting, double descent was shown to emerge more generally across non-neural models: known cases include _linear regression, trees, and boosting_. In this work, we take a closer look at the evidence surrounding these more classical statistical machine learning methods and challenge the claim that observed cases of double descent truly extend the limits of a traditional U-shaped complexity-generalization curve therein. We show that once careful consideration is given to _what is being plotted_ on the x-axes of their double descent plots, it becomes apparent that there are implicitly multiple, distinct complexity axes along which the parameter count grows. We demonstrate that the second descent appears exactly (and _only_) when and where the transition between these underlying axes occurs, and that its location is thus _not_ inherently tied to the interpolation threshold $p=n$. We then gain further insight by adopting a classical nonparametric statistics perspective. We interpret the investigated methods as _smoothers_ and propose a generalized measure for the _effective_ number of parameters they use _on unseen examples_, using which we find that their apparent double descent curves do indeed fold back into more traditional convex shapes -- providing a resolution to the ostensible tension between double descent and traditional statistical intuition.", "keywords": "Double Descent;Statistical Machine Learning;Interpolation Regime;Effective Parameters", "primary_area": "", "supplementary_material": "", "author": "Alicia Curth;Alan Jeffares;Mihaela van der Schaar", "authorids": "~Alicia_Curth1;~Alan_Jeffares1;~Mihaela_van_der_Schaar2", "gender": "F;;F", "homepage": ";https://alanjeffares.com;https://www.vanderschaar-lab.com", "dblp": "261/8064;304/1985;", "google_scholar": "eWRBqsYAAAAJ;e65kJ08AAAAJ;DZ3S--MAAAAJ", "orcid": ";;", "linkedin": ";alanjeffares;", "or_profile": "~Alicia_Curth1;~Alan_Jeffares1;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;University of Cambridge;University of California, Los Angeles", "aff_domain": "cam.ac.uk;cam.ac.uk;ucla.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ncurth2023a,\ntitle={A U-turn on Double Descent: Rethinking Parameter Counting in Statistical Learning},\nauthor={Alicia Curth and Alan Jeffares and Mihaela van der Schaar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=O0Lz8XZT2b}\n}", "github": "", "project": "", "reviewers": "tnvy;4SoX;Uot3;vbD3;WN4r;trNU", "pdf_size": 903567, "rating": "7;7;7;7;7;9", "confidence": "4;2;3;3;4;4", "soundness": "3;3;3;3;4;3", "novelty": "4;3;3;3;3;4", "presentation": "4;3;4;3;3;4", "wc_summary": "273;172;106;72;100;82", "wc_strengths": "151;32;45;43;47;337", "wc_weaknesses": "698;34;57;70;217;1", "wc_questions": "207;54;18;71;154;40", "wc_limitations": "32;2;6;3;1;1", "wc_review": "1361;294;232;259;519;461", "wc_reply_reviewers": "77;12;17;39;49;0", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "1;1;1;1;1;0", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 7.333333333333333, 0.7453559924999298 ], "confidence_avg": [ 3.3333333333333335, 0.7453559924999298 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 134.16666666666666, 69.82458179052863 ], "wc_strengths_avg": [ 109.16666666666667, 109.52384316769668 ], "wc_weaknesses_avg": [ 179.5, 241.61867339535937 ], "wc_questions_avg": [ 90.66666666666667, 67.23507682419613 ], "wc_limitations_avg": [ 7.5, 11.086778913041726 ], "wc_review_avg": [ 521.0, 390.0636700164047 ], "wc_reply_reviewers_avg": [ 32.333333333333336, 25.856441277862572 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4000000000000001, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9364645103589100890&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cam.ac.uk;cam.ac.uk;ucla.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;UCLA", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Cambridge;Los Angeles", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "S-CLIP: Semi-supervised Vision-Language Learning using Few Specialist Captions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71829", "id": "O1lYncfVOO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c06f788963f0ce069f5b2dbf83fe7822-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=O1lYncfVOO", "openreview": "https://openreview.net/forum?id=O1lYncfVOO", "poster": "/media/PosterPDFs/NeurIPS%202023/71829.png?t=1699294193.5049756", "slides": "https://nips.cc/virtual/2023/poster/71829", "video": "https://nips.cc/virtual/2023/poster/71829", "author_site": "Sangwoo Mo, Minkyu Kim, Kyungmin Lee, Jinwoo Shin", "tldr": "", "abstract": "Vision-language models, such as contrastive language-image pre-training (CLIP), have demonstrated impressive results in natural image domains. However, these models often struggle when applied to specialized domains like remote sensing, and adapting to such domains is challenging due to the limited number of image-text pairs available for training. To address this, we propose S-CLIP, a semi-supervised learning method for training CLIP that utilizes additional unpaired images. S-CLIP employs two pseudo-labeling strategies specifically designed for contrastive learning and the language modality. The caption-level pseudo-label is given by a combination of captions of paired images, obtained by solving an optimal transport problem between unpaired and paired images. The keyword-level pseudo-label is given by a keyword in the caption of the nearest paired image, trained through partial label learning that assumes a candidate set of labels for supervision instead of the exact one. By combining these objectives, S-CLIP significantly enhances the training of CLIP using only a few image-text pairs, as demonstrated in various specialist domains, including remote sensing, fashion, scientific figures, and comics. For instance, S-CLIP improves CLIP by 10% for zero-shot classification and 4% for image-text retrieval on the remote sensing benchmark, matching the performance of supervised CLIP while using three times fewer image-text pairs.", "keywords": "vision-language model;semi-supervised learning;specialist domain", "primary_area": "", "supplementary_material": "", "author": "Sangwoo Mo;Minkyu Kim;Kyungmin Lee;Jinwoo Shin", "authorids": "~Sangwoo_Mo1;~Minkyu_Kim2;~Kyungmin_Lee1;~Jinwoo_Shin1", "gender": "M;M;M;M", "homepage": "https://sites.google.com/view/sangwoomo;https://github.com/kimmk135;https://kyungmnlee.github.io/;https://sites.google.com/site/mijirim/", "dblp": "198/0432;83/6739-4;57/5118;31/7062", "google_scholar": "https://scholar.google.co.kr/citations?user=Sq9y3NMAAAAJ;f-kVmJwAAAAJ;6dpime0AAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ", "orcid": ";;;", "linkedin": ";kimmk135/;;", "or_profile": "~Sangwoo_Mo1;~Minkyu_Kim2;~Kyungmin_Lee1;~Jinwoo_Shin1", "aff": "KAIST;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nmo2023sclip,\ntitle={S-{CLIP}: Semi-supervised Vision-Language Learning using Few Specialist Captions},\nauthor={Sangwoo Mo and Minkyu Kim and Kyungmin Lee and Jinwoo Shin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=O1lYncfVOO}\n}", "github": "", "project": "", "reviewers": "Q2NM;XrtM;7J1f;CjR3;EbWV", "pdf_size": 1686143, "rating": "5;6;6;6;6", "confidence": "4;4;3;4;3", "soundness": "3;3;3;4;3", "novelty": "3;3;3;4;3", "presentation": "4;3;3;4;3", "wc_summary": "243;87;88;72;60", "wc_strengths": "246;55;177;30;46", "wc_weaknesses": "188;100;136;26;56", "wc_questions": "30;65;6;103;165", "wc_limitations": "5;5;8;1;3", "wc_review": "712;312;415;232;330", "wc_reply_reviewers": "15;13;21;21;23", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 110.0, 67.29933134883288 ], "wc_strengths_avg": [ 110.8, 85.44331454244971 ], "wc_weaknesses_avg": [ 101.2, 57.3494550976729 ], "wc_questions_avg": [ 73.8, 56.147662462474784 ], "wc_limitations_avg": [ 4.4, 2.33238075793812 ], "wc_review_avg": [ 400.2, 166.39519223823748 ], "wc_reply_reviewers_avg": [ 18.6, 3.8781438859330635 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4023118564831966436&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Training biologically plausible recurrent neural networks on cognitive tasks with long-term dependencies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71828", "id": "O453PHSthc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/65ccdfe02045fa0b823c5fa7ffd56b66-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=O453PHSthc", "openreview": "https://openreview.net/forum?id=O453PHSthc", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71828", "video": "https://nips.cc/virtual/2023/poster/71828", "author_site": "Wayne Soo, Vishwa Goudar, Xiao-Jing Wang", "tldr": "", "abstract": "Training recurrent neural networks (RNNs) has become a go-to approach for generating and evaluating mechanistic neural hypotheses for cognition. The ease and efficiency of training RNNs with backpropagation through time and the availability of robustly supported deep learning libraries has made RNN modeling more approachable and accessible to neuroscience. Yet, a major technical hindrance remains. Cognitive processes such as working memory and decision making involve neural population dynamics over a long period of time within a behavioral trial and across trials. It is difficult to train RNNs to accomplish tasks where neural representations and dynamics have long temporal dependencies without gating mechanisms such as LSTMs or GRUs which currently lack experimental support and prohibit direct comparison between RNNs and biological neural circuits. We tackled this problem based on the idea of specialized skip-connections through time to support the emergence of task-relevant dynamics, and subsequently reinstitute biological plausibility by reverting to the original architecture. We show that this approach enables RNNs to successfully learn cognitive tasks that prove impractical if not impossible to learn using conventional methods. Over numerous tasks considered here, we achieve less training steps and shorter wall-clock times, particularly in tasks that require learning long-term dependencies via temporal integration over long timescales or maintaining a memory of past events in hidden-states. Our methods expand the range of experimental tasks that biologically plausible RNN models can learn, thereby supporting the development of theory for the emergent neural mechanisms of computations involving long-term dependencies.", "keywords": "neuroscience;recurrent neural network;neural circuits;cortical circuits;cognitive tasks;working memory", "primary_area": "", "supplementary_material": "/attachment/f8596aaf7de436a2cc463ce4e35affedf6785774.pdf", "author": "Wayne WM Soo;Vishwa Goudar;Xiao-Jing Wang", "authorids": "~Wayne_WM_Soo1;~Vishwa_Goudar1;~Xiao-Jing_Wang1", "gender": "M;;M", "homepage": ";;http://www.cns.nyu.edu/wanglab/", "dblp": ";;", "google_scholar": ";https://scholar.google.com/citations?hl=en;cv-YgL0AAAAJ", "orcid": "0000-0002-0621-1955;;", "linkedin": "wayne-soo-8bb097147/;vgoudar/;", "or_profile": "~Wayne_WM_Soo1;~Vishwa_Goudar1;~Xiao-Jing_Wang1", "aff": "University of Cambridge;University of California, Los Angeles;New York University", "aff_domain": "cam.ac.uk;ucla.edu;nyu.edu", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nsoo2023training,\ntitle={Training biologically plausible recurrent neural networks on cognitive tasks with long-term dependencies},\nauthor={Wayne WM Soo and Vishwa Goudar and Xiao-Jing Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=O453PHSthc}\n}", "github": "", "project": "", "reviewers": "fBiB;CT76;nbN6;cfQC;T2Cs", "pdf_size": 619817, "rating": "4;5;6;7;7", "confidence": "3;3;3;2;4", "soundness": "3;3;2;3;4", "novelty": "2;2;2;3;3", "presentation": "2;3;2;3;2", "wc_summary": "111;23;71;28;142", "wc_strengths": "89;37;16;46;131", "wc_weaknesses": "222;39;286;45;266", "wc_questions": "54;20;273;19;7", "wc_limitations": "9;1;1;20;8", "wc_review": "485;120;647;158;554", "wc_reply_reviewers": "0;24;153;13;73", "wc_reply_authors": "0;67;375;0;0", "reply_reviewers": "0;1;2;1;1", "reply_authors": "1;2;2;1;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 75.0, 46.29038777111292 ], "wc_strengths_avg": [ 63.8, 41.16017492674199 ], "wc_weaknesses_avg": [ 171.6, 107.84173589107327 ], "wc_questions_avg": [ 74.6, 100.42828286892095 ], "wc_limitations_avg": [ 7.8, 6.968500556073738 ], "wc_review_avg": [ 392.8, 213.84798339007082 ], "wc_reply_reviewers_avg": [ 52.6, 55.94497296451219 ], "wc_reply_authors_avg": [ 88.4, 145.6304913127742 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15545149384957795989&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "cam.ac.uk;ucla.edu;nyu.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu;https://www.nyu.edu", "aff_unique_abbr": "Cambridge;UCLA;NYU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Cambridge;Los Angeles;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Scalable Primal-Dual Actor-Critic Method for Safe Multi-Agent RL with General Utilities", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71827", "id": "O63qgtebjH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/72a1ec14aed36985ffba175e0bba3fec-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=O63qgtebjH", "openreview": "https://openreview.net/forum?id=O63qgtebjH", "poster": "/media/PosterPDFs/NeurIPS%202023/71827.png?t=1701892225.3891819", "slides": "https://nips.cc/virtual/2023/poster/71827", "video": "https://nips.cc/virtual/2023/poster/71827", "author_site": "Donghao Ying, Yunkai Zhang, Yuhao Ding, Alec Koppel, Javad Lavaei", "tldr": "", "abstract": "We investigate safe multi-agent reinforcement learning, where agents seek to collectively maximize an aggregate sum of local objectives while satisfying their own safety constraints. The objective and constraints are described by general utilities, i.e., nonlinear functions of the long-term state-action occupancy measure, which encompass broader decision-making goals such as risk, exploration, or imitations. The exponential growth of the state-action space size with the number of agents presents challenges for global observability, further exacerbated by the global coupling arising from agents' safety constraints. To tackle this issue, we propose a primal-dual method utilizing shadow reward and $\\kappa$-hop neighbor truncation under a form of correlation decay property, where $\\kappa$ is the communication radius. In the exact setting, our algorithm converges to a first-order stationary point (FOSP) at the rate of $\\mathcal{O}\\left(T^{-2/3}\\right)$. In the sample-based setting, we demonstrate that, with high probability, our algorithm requires $\\widetilde{\\mathcal{O}}\\left(\\epsilon^{-3.5}\\right)$ samples to achieve an $\\epsilon$-FOSP with an approximation error of $\\mathcal{O}(\\phi_0^{2\\kappa})$, where $\\phi_0\\in (0,1)$. Finally, we demonstrate the effectiveness of our model through extensive numerical experiments.", "keywords": "Reinforcement Learning Theory;Safe reinforcement learning;Multi-agent reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/49f654938bcc9fab04b441f4d018631e04bd275b.pdf", "author": "Donghao Ying;Yunkai Zhang;Yuhao Ding;Alec Koppel;Javad Lavaei", "authorids": "~Donghao_Ying1;~Yunkai_Zhang2;~Yuhao_Ding2;~Alec_Koppel1;~Javad_Lavaei1", "gender": "M;;M;M;", "homepage": "https://sites.google.com/view/donghao-ying;;https://yuhaod.github.io/homepage/;http://koppel.netlify.app/;", "dblp": ";;218/2837;149/0076;", "google_scholar": "NzMQHG4AAAAJ;J48boCIAAAAJ;Q65PtLgAAAAJ;8ClxyjIAAAAJ;", "orcid": ";;;0000-0003-2447-2873;", "linkedin": "donghao-ying-2507071a7/;zhang-yunkai;;alec-koppel-9860b697/;", "or_profile": "~Donghao_Ying1;~Yunkai_Zhang2;~Yuhao_Ding2;~Alec_Koppel1;~Javad_Lavaei1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;J.P. Morgan Chase;", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;jpmorgan.com;", "position": "PhD student;PhD student;PhD student;Research Team Lead;", "bibtex": "@inproceedings{\nying2023scalable,\ntitle={Scalable Primal-Dual Actor-Critic Method for Safe Multi-Agent {RL} with General Utilities},\nauthor={Donghao Ying and Yunkai Zhang and Yuhao Ding and Alec Koppel and Javad Lavaei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=O63qgtebjH}\n}", "github": "", "project": "", "reviewers": "bKHc;Vbe7;pEsM;fkzS", "pdf_size": 767188, "rating": "5;6;7;7", "confidence": "3;4;4;3", "soundness": "3;4;4;3", "novelty": "1;4;4;3", "presentation": "2;4;4;3", "wc_summary": "90;97;69;86", "wc_strengths": "38;58;136;65", "wc_weaknesses": "189;16;58;160", "wc_questions": "66;278;59;296", "wc_limitations": "4;47;6;40", "wc_review": "387;496;328;647", "wc_reply_reviewers": "0;0;28;25", "wc_reply_authors": "0;0;8;12", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 1.224744871391589 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 85.5, 10.307764064044152 ], "wc_strengths_avg": [ 74.25, 37.002533697032156 ], "wc_weaknesses_avg": [ 105.75, 71.0787415476667 ], "wc_questions_avg": [ 174.75, 112.45749196918807 ], "wc_limitations_avg": [ 24.25, 19.421315609401955 ], "wc_review_avg": [ 464.5, 121.38471897236488 ], "wc_reply_reviewers_avg": [ 13.25, 13.292385038058445 ], "wc_reply_authors_avg": [ 5.0, 5.196152422706632 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13213094340088629703&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "berkeley.edu;berkeley.edu;berkeley.edu;jpmorgan.com;", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of California, Berkeley;JPMorgan Chase & Co.", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.jpmorganchase.com", "aff_unique_abbr": "UC Berkeley;JPM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "OAOt75zsdP", "title": "Geometry-Calibrated DRO: Combating Over-Pessimism with Free Energy Implications", "track": "main", "status": "Reject", "tldr": "", "abstract": "Machine learning algorithms minimizing average risk are susceptible to distributional shifts. Distributionally Robust Optimization (DRO) addresses this issue by optimizing the worst-case risk within an uncertainty set. However, DRO suffers from over-pessimism, leading to low-confidence predictions, poor parameter estimations as well as poor generalization. In this work, we conduct a theoretical analysis of a probable root cause of over-pessimism: excessive focus on noisy samples. To alleviate the impact of noise, we incorporate data geometry into calibration terms in DRO, resulting in our novel Geometry-Calibrated DRO (GCDRO) for regression. We establish that our risk objective aligns with the Helmholtz free energy in statistical physics, and this free-energy-based risk can extend to standard DRO methods. Leveraging gradient flow in Wasserstein space, we develop an approximate minimax optimization algorithm with a bounded error ratio and standard convergence rate and elucidate how our approach mitigates noisy sample effects. Comprehensive experiments confirm GCDRO's superiority over conventional DRO methods.", "keywords": "Distributionally Robust Optimization;Free Energy;Over-pessimism;Calibration term", "primary_area": "", "supplementary_material": "/attachment/90acbf78acdb26940db1b63f871e61c5c9f01307.pdf", "author": "Jiashuo Liu;Jiayun Wu;Tianyu Wang;Hao Zou;Bo Li;Peng Cui", "authorids": "~Jiashuo_Liu1;~Jiayun_Wu1;~Tianyu_Wang6;~Hao_Zou1;~Bo_Li29;~Peng_Cui1", "gender": "M;M;M;M;M;M", "homepage": "https://ljsthu.github.io;https://ic-hub.github.io;https://wangtianyu61.github.io;https://scholar.google.com/citations?user=f5cbI4cAAAAJ&hl=en;http://www.sem.tsinghua.edu.cn/en/libo;http://pengcui.thumedialab.com/", "dblp": "180/2823;00/9456;;13/4741-1;50/3402-64;31/891-1", "google_scholar": "b7bpt5MAAAAJ;https://scholar.google.com/citations?hl=en;mKT6mKEAAAAJ;f5cbI4cAAAAJ;GaJXFWMAAAAJ;https://scholar.google.com.tw/citations?user=G8x97ZgAAAAJ", "orcid": ";0009-0007-7131-7290;0009-0000-2095-431X;0000-0002-6000-6936;0000-0001-5599-8857;0000-0003-2957-8511", "linkedin": "jiashuo-liu-244a6b1a4;jiayun-wu-4aa86323a/;;;;", "or_profile": "~Jiashuo_Liu1;~Jiayun_Wu1;~Tianyu_Wang6;~Hao_Zou1;~Bo_Li29;~Peng_Cui1", "aff": "Stanford University;Tsinghua University;Columbia University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "stanford.edu;mails.tsinghua.edu.cn;columbia.edu;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "Researcher;MS student;PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@misc{\nliu2023geometrycalibrated,\ntitle={Geometry-Calibrated {DRO}: Combating Over-Pessimism with Free Energy Implications},\nauthor={Jiashuo Liu and Jiayun Wu and Tianyu Wang and Hao Zou and Bo Li and Peng Cui},\nyear={2023},\nurl={https://openreview.net/forum?id=OAOt75zsdP}\n}", "github": "", "project": "", "reviewers": "aeCE;rKdP;DBQo;WEic", "site": "https://openreview.net/forum?id=OAOt75zsdP", "pdf_size": 5171206, "rating": "5;6;7;8", "confidence": "5;3;1;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "124;164;269;71", "wc_strengths": "61;97;257;133", "wc_weaknesses": "96;161;105;97", "wc_questions": "2;14;1;38", "wc_limitations": "1;40;2;10", "wc_review": "284;476;634;349", "wc_reply_reviewers": "0;19;0;70", "wc_reply_authors": "0;0;0;26", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 1.4142135623730951 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 157.0, 72.59132179537717 ], "wc_strengths_avg": [ 137.0, 73.81056834898374 ], "wc_weaknesses_avg": [ 114.75, 26.929305598176867 ], "wc_questions_avg": [ 13.75, 14.905955185763842 ], "wc_limitations_avg": [ 13.25, 15.833114033569013 ], "wc_review_avg": [ 435.75, 133.67568028628094 ], "wc_reply_reviewers_avg": [ 22.25, 28.63891583143468 ], "wc_reply_authors_avg": [ 6.5, 11.258330249197702 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6324555320336758, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10152050426102300533&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "Stanford University;Tsinghua University;Columbia University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.tsinghua.edu.cn;https://www.columbia.edu", "aff_unique_abbr": "Stanford;THU;Columbia", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;0;1;1;1", "aff_country_unique": "United States;China" }, { "title": "Evaluating and Improving Tool-Augmented Computation-Intensive Math Reasoning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73611", "id": "OB10WTlwmX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4a47dd69242d5af908cdd5d51c971cbf-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=OB10WTlwmX", "openreview": "https://openreview.net/forum?id=OB10WTlwmX", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73611", "video": "https://nips.cc/virtual/2023/poster/73611", "author_site": "Beichen Zhang, Kun Zhou, Xilin Wei, Xin Zhao, Jing Sha, Shijin Wang, Ji-Rong Wen", "tldr": "", "abstract": "Chain-of-thought prompting (CoT) and tool augmentation have been validated in recent work as effective practices for improving large language models (LLMs) to perform step-by-step reasoning on complex math-related tasks.\nHowever, most existing math reasoning datasets may not be able to fully evaluate and analyze the ability of LLMs in manipulating tools and performing reasoning, as they often only require very few invocations of tools or miss annotations for evaluating intermediate reasoning steps, thus supporting only outcome evaluation.\nTo address the issue, we construct **CARP**, a new Chinese dataset consisting of 4,886 computation-intensive algebra problems with formulated annotations on intermediate steps, facilitating the evaluation of the intermediate reasoning process.\nIn CARP, we test four LLMs with CoT prompting, and find that they are all prone to make mistakes at the early steps of the solution, leading to incorrect answers.\nBased on this finding, we propose a new approach that can facilitate the deliberation on reasoning steps with tool interfaces, namely **DELI**.\nIn DELI, we first initialize a step-by-step solution based on retrieved exemplars, then iterate two deliberation procedures that check and refine the intermediate steps of the generated solution, from both tool manipulation and natural language reasoning perspectives, until solutions converge or the maximum iteration is achieved.\nExperimental results on CARP and six other datasets show that the proposed DELI mostly outperforms competitive baselines, and can further boost the performance of existing CoT methods.\nOur data and code are available at https://github.com/RUCAIBox/CARP.", "keywords": "Math Problem Solving", "primary_area": "", "supplementary_material": "", "author": "Beichen Zhang;Kun Zhou;Xilin Wei;Xin Zhao;Jing Sha;Shijin Wang;Ji-Rong Wen", "authorids": "~Beichen_Zhang1;~Kun_Zhou2;~Xilin_Wei1;~Xin_Zhao10;~Jing_Sha1;~Shijin_Wang1;~Ji-Rong_Wen1", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/ToheartZhang;https://lancelot39.github.io/;https://github.com/Wiselnn570;https://gsai.ruc.edu.cn/addons/teacher/index/info.html?user_id=5&ruccode=20140041&ln=cn;;;https://gsai.ruc.edu.cn/english/jrwen", "dblp": "71/9257;48/3927-2.html;11/1871.html;https://dblp.uni-trier.de/pid/52/8700.html;96/5272;74/5750-1.html;w/JRWen", "google_scholar": ";bmRJVjwAAAAJ;zxtbqQwAAAAJ;JNhNacoAAAAJ;;;tbxCHJgAAAAJ", "orcid": ";;;0000-0002-8333-6196;;0000-0002-9202-7678;0000-0002-9777-9676", "linkedin": ";;;;jing-sha-52482737/;;", "or_profile": "~Beichen_Zhang1;~Kun_Zhou2;~Xilin_Wei1;~Xin_Zhao10;~Jing_Sha1;~Shijin_Wang1;~Ji-Rong_Wen1", "aff": "Renmin University of China;Renmin University of China;;Renmin University of China;iFLYTEK Research;State Key Laboratory of Cognitive Intelligence;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;;ruc.edu.cn;iflytek.com;iflytek.com;ruc.edu.cn", "position": "MS student;PhD student;;Full Professor;Researcher;Vice Dean;Full Professor", "bibtex": "@inproceedings{\nzhang2023evaluating,\ntitle={Evaluating and Improving Tool-Augmented Computation-Intensive Math Reasoning},\nauthor={Beichen Zhang and Kun Zhou and Xilin Wei and Xin Zhao and Jing Sha and Shijin Wang and Ji-Rong Wen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=OB10WTlwmX}\n}", "github": "", "project": "", "reviewers": "73Fj;mfSt;snqK;mtC2;fi3X", "pdf_size": 1725983, "rating": "4;7;7;7;7", "confidence": "5;3;4;3;4", "wc_summary_and_contributions": "75;117;80;92;73", "wc_strengths": "121;76;109;87;39", "wc_improvement": "281;20;118;256;184", "wc_limitations": "9;7;14;100;7", "wc_correctness": "1;11;17;77;1", "wc_clarity": "1;4;5;61;1", "wc_relation_to_prior_work": "1;1;10;15;15", "wc_documentation": "1;1;10;8;3", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "491;238;364;697;324", "wc_reply_reviewers": "423;0;0;36;0", "wc_reply_authors": "3836;945;763;2265;1766", "reply_reviewers": "2;0;0;1;0", "reply_authors": "7;2;2;5;4", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 87.4, 16.206171663906314 ], "wc_strengths_avg": [ 86.4, 28.50684128415493 ], "wc_improvement_avg": [ 171.8, 94.97452289956502 ], "wc_limitations_avg": [ 27.4, 36.39010854614204 ], "wc_correctness_avg": [ 21.4, 28.464714999451513 ], "wc_clarity_avg": [ 14.4, 23.354656923192 ], "wc_relation_to_prior_work_avg": [ 8.4, 6.3118935352238 ], "wc_documentation_avg": [ 4.6, 3.7202150475476548 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 422.8, 159.50347958586985 ], "wc_reply_reviewers_avg": [ 91.8, 166.18591998120658 ], "wc_reply_authors_avg": [ 1915.0, 1104.8027878313849 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 4.0, 1.8973665961010275 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.801783725737273, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=462373400546044315&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ruc.edu.cn;ruc.edu.cn;;ruc.edu.cn;iflytek.com;iflytek.com;ruc.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Renmin University of China;iFLYTEK;State Key Laboratory of Cognitive Intelligence", "aff_unique_dep": ";Research;", "aff_unique_url": "http://www.ruc.edu.cn;https://www.iflytek.com;", "aff_unique_abbr": "RUC;iFLYTEK;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Guide Through the Zoo of Biased SGD", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71826", "id": "OCtv4NyahI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/484d254ff80e99d543159440a06db0de-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OCtv4NyahI", "openreview": "https://openreview.net/forum?id=OCtv4NyahI", "poster": "/media/PosterPDFs/NeurIPS%202023/71826.png?t=1699816941.6934083", "slides": "https://nips.cc/virtual/2023/poster/71826", "video": "https://nips.cc/virtual/2023/poster/71826", "author_site": "Yury Demidovich, Grigory Malinovsky, Igor Sokolov, Peter Richtarik", "tldr": "", "abstract": "Stochastic Gradient Descent (SGD) is arguably the most important single algorithm in modern machine learning. Although SGD with unbiased gradient estimators has been studied extensively over at least half a century, SGD variants relying on biased estimators are rare. Nevertheless, there has been an increased interest in this topic in recent years. However, existing literature on SGD with biased estimators lacks coherence since each new paper relies on a different set of assumptions, without any clear understanding of how they are connected, which may lead to confusion. We address this gap by establishing connections among the existing assumptions, and presenting a comprehensive map of the underlying relationships. Additionally, we introduce a new set of assumptions that is provably weaker than all previous assumptions, and use it to present a thorough analysis of BiasedSGD in both convex and non-convex settings, offering advantages over previous results. We also provide examples where biased estimators outperform their unbiased counterparts or where unbiased versions are simply not available. Finally, we demonstrate the effectiveness of our framework through experimental results that validate our theoretical findings.", "keywords": "Stochastic optimization;biased SGD;Non-convex analysis", "primary_area": "", "supplementary_material": "/attachment/eac91d4a392d0ffc4115ccb30553595bc30503a0.pdf", "author": "Yury Demidovich;Grigory Malinovsky;Igor Sokolov;Peter Richt\u00e1rik", "authorids": "~Yury_Demidovich1;~Grigory_Malinovsky1;~Igor_Sokolov3;~Peter_Richt\u00e1rik1", "gender": "M;M;M;M", "homepage": ";https://grigory-malinovsky.github.io;https://cemse.kaust.edu.sa/people/person/igor-sokolov;https://richtarik.org", "dblp": "326/7284;262/3277.html;202/5678-1;62/8001", "google_scholar": "https://scholar.google.com/citations?hl=ru;4w2W9KQAAAAJ;https://scholar.google.ru/citations?user=OBbPecwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-2338-0187;0000-0003-4380-5848", "linkedin": "yuradem/;;igor-sokolov-7a6b47147/;richtarik/", "or_profile": "~Yury_Demidovich1;~Grigory_Malinovsky1;~Igor_Sokolov3;~Peter_Richtarik1", "aff": "King Abdullah University of Science and Technology;King Abdullah University of Science and Technology;King Abdullah University of Science and Technology;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "position": "Postdoc;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ndemidovich2023a,\ntitle={A Guide Through the Zoo of Biased {SGD}},\nauthor={Yury Demidovich and Grigory Malinovsky and Igor Sokolov and Peter Richt{\\'a}rik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OCtv4NyahI}\n}", "github": "", "project": "", "reviewers": "PDsK;R984;84Sm;8vPv", "pdf_size": 495268, "rating": "5;6;6;7", "confidence": "2;4;3;4", "soundness": "3;4;3;3", "novelty": "3;3;4;3", "presentation": "3;4;3;2", "wc_summary": "37;57;115;117", "wc_strengths": "28;68;125;127", "wc_weaknesses": "102;507;72;458", "wc_questions": "175;61;9;338", "wc_limitations": "38;54;70;1", "wc_review": "380;747;391;1041", "wc_reply_reviewers": "121;68;26;109", "wc_reply_authors": "0;59;26;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 81.5, 35.224281397922084 ], "wc_strengths_avg": [ 87.0, 41.49096287144949 ], "wc_weaknesses_avg": [ 284.75, 198.7905618986978 ], "wc_questions_avg": [ 145.75, 126.19305646508448 ], "wc_limitations_avg": [ 40.75, 25.586861863073402 ], "wc_review_avg": [ 639.75, 274.7047278442801 ], "wc_reply_reviewers_avg": [ 81.0, 37.34300469967568 ], "wc_reply_authors_avg": [ 21.25, 24.24226680820092 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=854043202663142787&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kast.kau.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Saudi Arabia" }, { "title": "Imagine the Unseen World: A Benchmark for Systematic Generalization in Visual World Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73610", "id": "ODB01Fyr4a", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/58af908d6293810f1a29e69bf723dc48-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=ODB01Fyr4a", "openreview": "https://openreview.net/forum?id=ODB01Fyr4a", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73610", "video": "https://nips.cc/virtual/2023/poster/73610", "author_site": "Yeongbin Kim, Gautam Singh, Junyeong Park, Caglar Gulcehre, Sungjin Ahn", "tldr": "", "abstract": "Systematic compositionality, or the ability to adapt to novel situations by creating a mental model of the world using reusable pieces of knowledge, remains a significant challenge in machine learning. While there has been considerable progress in the language domain, efforts towards systematic visual imagination, or envisioning the dynamical implications of a visual observation, are in their infancy. We introduce the Systematic Visual Imagination Benchmark (SVIB), the first benchmark designed to address this problem head-on. SVIB offers a novel framework for a minimal world modeling problem, where models are evaluated based on their ability to generate one-step image-to-image transformations under a latent world dynamics. The framework provides benefits such as the possibility to jointly optimize for systematic perception and imagination, a range of difficulty levels, and the ability to control the fraction of possible factor combinations used during training. We provide a comprehensive evaluation of various baseline models on SVIB, offering insight into the current state-of-the-art in systematic visual imagination. We hope that this benchmark will help advance visual systematic compositionality.", "keywords": "Systematic Compositionality;Visual Imagination;Benchmark;World Modeling", "primary_area": "", "supplementary_material": "/attachment/e27b1e05410f359bac5686a9814589c971e39a04.pdf", "author": "Yeongbin Kim;Gautam Singh;Junyeong Park;Caglar Gulcehre;Sungjin Ahn", "authorids": "~Yeongbin_Kim1;~Gautam_Singh3;~Junyeong_Park1;~Caglar_Gulcehre1;~Sungjin_Ahn1", "gender": "M;M;M;M;", "homepage": "https://www.notion.so/Yeongbin-Kim-bc9a65400f10426aa1d7736bba7c2e1a;https://singhgautam.github.io;;http://caglarg.com;", "dblp": "232/6519;35/2642;;125/2132;", "google_scholar": ";lXpFxDwAAAAJ;o9l_sIAAAAAJ;https://scholar.google.ca/citations?user=7hwJ2ckAAAAJ;", "orcid": ";;;;", "linkedin": ";gautam-singh-61302463/;junyeong-park-043766191/;;", "or_profile": "~Yeongbin_Kim1;~Gautam_Singh3;~Junyeong_Park1;~Caglar_Gulcehre1;~Sungjin_Ahn1", "aff": "Korea Advanced Institute of Science & Technology;Rutgers University;Hanyang University;Deepmind;", "aff_domain": "kaist.ac.kr;rutgers.edu;hanyang.ac.kr;google.com;", "position": "MS student;PhD student;Undergrad student;Research Scientist;", "bibtex": "@inproceedings{\nkim2023imagine,\ntitle={Imagine the Unseen World: A Benchmark for Systematic Generalization in Visual World Models},\nauthor={Yeongbin Kim and Gautam Singh and Junyeong Park and Caglar Gulcehre and Sungjin Ahn},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=ODB01Fyr4a}\n}", "github": "", "project": "", "reviewers": "zB71;A3fU;LyM3;ikkV", "pdf_size": 1604332, "rating": "6;6;6;9", "confidence": "3;3;3;4", "wc_summary_and_contributions": "91;76;27;234", "wc_strengths": "100;63;37;113", "wc_improvement": "347;65;344;127", "wc_limitations": "62;25;12;8", "wc_correctness": "25;25;31;43", "wc_clarity": "18;9;366;5", "wc_relation_to_prior_work": "8;18;27;57", "wc_documentation": "19;15;39;40", "wc_additional_feedback": "1;1;1;1", "wc_review": "671;297;884;628", "wc_reply_reviewers": "27;9;833;0", "wc_reply_authors": "1424;340;3094;446", "reply_reviewers": "1;1;2;0", "reply_authors": "3;2;6;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 107.0, 77.04868590702894 ], "wc_strengths_avg": [ 78.25, 30.06139550985616 ], "wc_improvement_avg": [ 220.75, 126.66565240821997 ], "wc_limitations_avg": [ 26.75, 21.299941314473145 ], "wc_correctness_avg": [ 31.0, 7.3484692283495345 ], "wc_clarity_avg": [ 99.5, 153.93586326779084 ], "wc_relation_to_prior_work_avg": [ 27.5, 18.309833423600555 ], "wc_documentation_avg": [ 28.25, 11.344051304538427 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 620.0, 210.17254815983938 ], "wc_reply_reviewers_avg": [ 217.25, 355.636313528301 ], "wc_reply_authors_avg": [ 1326.0, 1104.7651334107172 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 1.8708286933869707 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9613997215900943825&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "kaist.ac.kr;rutgers.edu;hanyang.ac.kr;google.com;", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Rutgers University;Hanyang University;DeepMind", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.rutgers.edu;https://www.hanyang.ac.kr;https://deepmind.com", "aff_unique_abbr": "KAIST;Rutgers;HYU;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "South Korea;United States;United Kingdom" }, { "title": "Experimental Designs for Heteroskedastic Variance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71825", "id": "OFDApY678F", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d01db5cd2555ba11f75da0454d57b903-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OFDApY678F", "openreview": "https://openreview.net/forum?id=OFDApY678F", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71825", "video": "https://nips.cc/virtual/2023/poster/71825", "author_site": "Justin Weltz, Tanner Fiez, Alexander Volfovsky, Eric Laber, Blake Mason, houssam nassif, Lalit Jain", "tldr": "", "abstract": "Most linear experimental design problems assume homogeneous variance, while the presence of heteroskedastic noise is present in many realistic settings. \nLet a learner have access to a finite set of measurement vectors $\\mathcal{X}\\subset \\mathbb{R}^d$ that can be probed to receive noisy linear responses of the form $y=x^{\\top}\\theta^{\\ast}+\\eta$. \nHere $\\theta^{\\ast}\\in \\mathbb{R}^d$ is an unknown parameter vector, and $\\eta$ is independent mean-zero $\\sigma_x^2$-sub-Gaussian noise defined by a flexible heteroskedastic variance model, $\\sigma_x^2 = x^{\\top}\\Sigma^{\\ast}x$. Assuming that $\\Sigma^{\\ast}\\in \\mathbb{R}^{d\\times d}$ is an unknown matrix, we propose, analyze and empirically evaluate a novel design for uniformly bounding estimation error of the variance parameters, $\\sigma_x^2$. \nWe demonstrate this method on two adaptive experimental design problems under heteroskedastic noise, fixed confidence transductive best-arm identification and level-set identification and prove the first instance-dependent lower bounds in these settings.\nLastly, we construct near-optimal algorithms and demonstrate the large improvements in sample complexity gained from accounting for heteroskedastic variance in these designs empirically.", "keywords": "Heteroskedastic Variance;Linear Bandits;Experimental design", "primary_area": "", "supplementary_material": "/attachment/be25d531e89ede4c31b95c9b5b391e99753f0998.zip", "author": "Justin David Naggar Weltz;Tanner Fiez;Alexander Volfovsky;Eric Laber;Blake Mason;houssam nassif;Lalit K Jain", "authorids": "~Justin_David_Naggar_Weltz1;~Tanner_Fiez1;~Alexander_Volfovsky1;~Eric_Laber1;~Blake_Mason1;~houssam_nassif1;~Lalit_K_Jain1", "gender": "M;;;M;M;M;", "homepage": ";;https://volfovsky.github.io;https://laber-labs.com;https://blakemas.github.io/blakemas/;http://pages.cs.wisc.edu/~hous21/;http://www.lalitjain.com", "dblp": ";195/5645;180/1072.html;95/10964;184/0279;49/7789;178/3228", "google_scholar": "https://scholar.google.com/citations?hl=en;_B6SVAcAAAAJ;9qHFzDoAAAAJ;https://scholar.google.com/citations?hl=en;gLO_20kAAAAJ;https://scholar.google.com/citations?hl=en;hGMSFu4AAAAJ", "orcid": ";;;;;;", "linkedin": "justin-weltz-874849133/;tannerfiez/;;;;houssamnassif;", "or_profile": "~Justin_David_Naggar_Weltz1;~Tanner_Fiez1;~Alexander_Volfovsky1;~Eric_Laber1;~Blake_Mason1;~houssam_nassif1;~Lalit_K_Jain1", "aff": "Duke University;Amazon;Duke University;Duke University;University of Wisconsin, Madison;Meta;University of Washington", "aff_domain": "duke.edu;amazon.com;duke.edu;duke.edu;wisc.edu;meta.com;uw.edu", "position": "PhD student;Researcher;Assistant Professor;Full Professor;Postdoc;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nweltz2023experimental,\ntitle={Experimental Designs for Heteroskedastic Variance},\nauthor={Justin David Naggar Weltz and Tanner Fiez and Alexander Volfovsky and Eric Laber and Blake Mason and houssam nassif and Lalit K Jain},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OFDApY678F}\n}", "github": "", "project": "", "reviewers": "mrcD;jG9W;Jphn;VoEg", "pdf_size": 766226, "rating": "3;6;7;7", "confidence": "4;2;1;4", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "74;64;118;375", "wc_strengths": "11;17;58;3", "wc_weaknesses": "76;119;97;3", "wc_questions": "35;92;9;3", "wc_limitations": "2;6;1;1", "wc_review": "198;298;283;385", "wc_reply_reviewers": "123;11;140;11", "wc_reply_authors": "0;0;86;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 157.75, 127.06371433261346 ], "wc_strengths_avg": [ 22.25, 21.22940178149163 ], "wc_weaknesses_avg": [ 73.75, 43.58540466715893 ], "wc_questions_avg": [ 34.75, 35.173676236640375 ], "wc_limitations_avg": [ 2.5, 2.0615528128088303 ], "wc_review_avg": [ 291.0, 66.3287268082239 ], "wc_reply_reviewers_avg": [ 71.25, 60.549050364146915 ], "wc_reply_authors_avg": [ 21.5, 37.239092362730865 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4989221802511868, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14420244488702807467&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "duke.edu;amazon.com;duke.edu;duke.edu;wisc.edu;meta.com;uw.edu", "author_num": 7, "aff_unique_index": "0;1;0;0;2;3;4", "aff_unique_norm": "Duke University;Amazon;University of Wisconsin;Meta;University of Washington", "aff_unique_dep": ";Amazon.com, Inc.;;Meta Platforms, Inc.;", "aff_unique_url": "https://www.duke.edu;https://www.amazon.com;https://www.wisc.edu;https://meta.com;https://www.washington.edu", "aff_unique_abbr": "Duke;Amazon;UW;Meta;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Jaccard Metric Losses: Optimizing the Jaccard Index with Soft Labels", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71824", "id": "OFMPrCAMKi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ee208bfc04b1bf6125a6a34baa1c28d3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OFMPrCAMKi", "openreview": "https://openreview.net/forum?id=OFMPrCAMKi", "poster": "/media/PosterPDFs/NeurIPS%202023/71824.png?t=1701773715.9072287", "slides": "https://nips.cc/virtual/2023/poster/71824", "video": "https://nips.cc/virtual/2023/poster/71824", "author_site": "Zifu Wang, Xuefei Ning, Matthew Blaschko", "tldr": "", "abstract": "Intersection over Union (IoU) losses are surrogates that directly optimize the Jaccard index. Leveraging IoU losses as part of the loss function have demonstrated superior performance in semantic segmentation tasks compared to optimizing pixel-wise losses such as the cross-entropy loss alone. However, we identify a lack of flexibility in these losses to support vital training techniques like label smoothing, knowledge distillation, and semi-supervised learning, mainly due to their inability to process soft labels. To address this, we introduce Jaccard Metric Losses (JMLs), which are identical to the soft Jaccard loss in standard settings with hard labels but are fully compatible with soft labels. We apply JMLs to three prominent use cases of soft labels: label smoothing, knowledge distillation and semi-supervised learning, and demonstrate their potential to enhance model accuracy and calibration. Our experiments show consistent improvements over the cross-entropy loss across 4 semantic segmentation datasets (Cityscapes, PASCAL VOC, ADE20K, DeepGlobe Land) and 13 architectures, including classic CNNs and recent vision transformers. Remarkably, our straightforward approach significantly outperforms state-of-the-art knowledge distillation and semi-supervised learning methods. The code is available at \\href{https://github.com/zifuwanggg/JDTLosses}{https://github.com/zifuwanggg/JDTLosses}.", "keywords": "Semantic Segmentation", "primary_area": "", "supplementary_material": "", "author": "Zifu Wang;Xuefei Ning;Matthew B. Blaschko", "authorids": "~Zifu_Wang1;~Xuefei_Ning1;~Matthew_B._Blaschko1", "gender": "M;Not Specified;M", "homepage": "https://zifuwang.com;https://nics-effalg.com/ningxuefei/;http://homes.esat.kuleuven.be/~mblaschk/", "dblp": ";202/9525;12/5233", "google_scholar": "https://scholar.google.com/citations?hl=en;oVslpJsAAAAJ;EmmO7LcAAAAJ", "orcid": ";;0000-0002-2640-181X", "linkedin": ";;matthew-blaschko-5b7a51b0/", "or_profile": "~Zifu_Wang1;~Xuefei_Ning1;~Matthew_Blaschko1", "aff": "KU Leuven;Huawei Technologies Ltd.;KU Leuven", "aff_domain": "kuleuven.be;huawei.com;esat.kuleuven.be", "position": "PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nwang2023jaccard,\ntitle={Jaccard Metric Losses: Optimizing the Jaccard Index with Soft Labels},\nauthor={Zifu Wang and Xuefei Ning and Matthew B. Blaschko},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OFMPrCAMKi}\n}", "github": "", "project": "", "reviewers": "L5GW;jE79;83cg;KUTw;FApN", "pdf_size": 4175907, "rating": "5;5;6;7;7", "confidence": "3;3;3;5;4", "soundness": "3;3;4;3;3", "novelty": "3;2;4;3;3", "presentation": "3;3;4;3;3", "wc_summary": "55;52;50;64;76", "wc_strengths": "37;33;124;53;174", "wc_weaknesses": "216;46;136;124;32", "wc_questions": "21;6;13;48;62", "wc_limitations": "1;1;6;2;15", "wc_review": "330;138;329;291;359", "wc_reply_reviewers": "0;121;43;34;10", "wc_reply_authors": "51;176;13;0;0", "reply_reviewers": "0;2;1;1;1", "reply_authors": "2;2;2;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 59.4, 9.58331884056875 ], "wc_strengths_avg": [ 84.2, 55.625174157030735 ], "wc_weaknesses_avg": [ 110.8, 66.76046734408021 ], "wc_questions_avg": [ 30.0, 21.419617176784463 ], "wc_limitations_avg": [ 5.0, 5.329165037789691 ], "wc_review_avg": [ 289.4, 78.72128047739061 ], "wc_reply_reviewers_avg": [ 41.6, 42.645515590739436 ], "wc_reply_authors_avg": [ 48.0, 66.67233309252046 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8385254915624212, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11022165103554569466&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "kuleuven.be;huawei.com;esat.kuleuven.be", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Katholieke Universiteit Leuven;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.kuleuven.be;https://www.huawei.com", "aff_unique_abbr": "KU Leuven;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Belgium;China" }, { "title": "Inserting Anybody in Diffusion Models via Celeb Basis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71823", "id": "OGQWZ3p0Zn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e6d37cc5723e810b793c834bcb6647cf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OGQWZ3p0Zn", "openreview": "https://openreview.net/forum?id=OGQWZ3p0Zn", "poster": "/media/PosterPDFs/NeurIPS%202023/71823.png?t=1701944207.5601892", "slides": "https://nips.cc/virtual/2023/poster/71823", "video": "https://nips.cc/virtual/2023/poster/71823", "author_site": "Ge Yuan, Xiaodong Cun, Yong Zhang, Maomao Li, Chenyang Qi, Xintao Wang, Ying Shan, Huicheng Zheng", "tldr": "", "abstract": "Exquisite demand exists for customizing the pretrained large text-to-image model, $e.g.$ Stable Diffusion, to generate innovative concepts, such as the users themselves. However, the newly-added concept from previous customization methods often shows weaker combination abilities than the original ones even given several images during training. We thus propose a new personalization method that allows for the seamless integration of a unique individual into the pre-trained diffusion model using just $one\\ facial\\ photograph$ and only $1024\\ learnable\\ parameters$ under $3\\ minutes$. So we can effortlessly generate stunning images of this person in any pose or position, interacting with anyone and doing anything imaginable from text prompts. To achieve this, we first analyze and build a well-defined celeb basis from the embedding space of the pre-trained large text encoder. Then, given one facial photo as the target identity, we generate its own embedding by optimizing the weight of this basis and locking all other parameters. Empowered by the proposed celeb basis, the new identity in our customized model showcases a better concept combination ability than previous personalization methods. Besides, our model can also learn several new identities at once and interact with each other where the previous customization model fails to. Project page is at: http://celeb-basis.github.io. Code is at: https://github.com/ygtxr1997/CelebBasis.", "keywords": "Text-to-Image Synthesis;Personalized Synthesis;Face Embedding", "primary_area": "", "supplementary_material": "", "author": "Ge Yuan;Xiaodong Cun;Yong Zhang;Maomao Li;Chenyang Qi;Xintao Wang;Ying Shan;Huicheng Zheng", "authorids": "~Ge_Yuan1;~Xiaodong_Cun1;~Yong_Zhang6;~Maomao_Li2;~Chenyang_Qi1;~Xintao_Wang1;~Ying_Shan2;~Huicheng_Zheng1", "gender": "M;M;M;F;M;;M;M", "homepage": "https://github.com/ygtxr1997;https://vinthony.github.io;https://yzhang2016.github.io/yongnorriszhang.github.io/;;https://chenyangqiqi.github.io/;;;https://cse.sysu.edu.cn/teacher/ZhengHuicheng", "dblp": "324/0596.html;210/0897;66/4615-34.html;https://dblp.uni-trier.de/pid/246/5743.html;299/1389;;68/5910;00/3034.html", "google_scholar": "nkTgchcAAAAJ;p42qwXcAAAAJ;a_zSeVEAAAAJ;;qNweIR4AAAAJ;;4oXBp9UAAAAJ;", "orcid": ";0000-0003-3607-2236;;;;;0000-0001-7673-8325;0000-0002-6729-4176", "linkedin": ";;;;chenyang-qi-5196a6137;;YingShanProfile/;", "or_profile": "~Ge_Yuan1;~Xiaodong_Cun1;~Yong_Zhang6;~Maomao_Li2;~Chenyang_Qi1;~Xintao_Wang1;~Ying_Shan2;~Huicheng_Zheng1", "aff": "SUN YAT-SEN UNIVERSITY;;Tencent AI Lab;Tencent AI Lab;Hong Kong University of Science and Technology;;Tencent PCG ARC Lab;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;;tencent.com;tencent.com;ust.hk;;arc.tencent.com;sysu.edu.cn", "position": "MS student;;Researcher;Researcher;PhD student;;Director;Associate Professor", "bibtex": "@inproceedings{\nyuan2023inserting,\ntitle={Inserting Anybody in Diffusion Models via Celeb Basis},\nauthor={Ge Yuan and Xiaodong Cun and Yong Zhang and Maomao Li and Chenyang Qi and Xintao Wang and Ying Shan and Huicheng Zheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OGQWZ3p0Zn}\n}", "github": "", "project": "", "reviewers": "RQjb;zJJ7;Qr1K;MXrx;ABqC", "pdf_size": 34612206, "rating": "5;5;6;7;7", "confidence": "5;4;4;5;2", "soundness": "3;3;3;3;3", "novelty": "2;2;3;3;4", "presentation": "3;3;3;3;3", "wc_summary": "82;161;56;114;54", "wc_strengths": "51;150;75;80;87", "wc_weaknesses": "153;183;200;100;1", "wc_questions": "2;96;64;26;8", "wc_limitations": "1;223;27;3;34", "wc_review": "289;813;422;323;184", "wc_reply_reviewers": "24;18;22;103;0", "wc_reply_authors": "301;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 4.0, 1.0954451150103321 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.4, 40.187560264340505 ], "wc_strengths_avg": [ 88.6, 33.00060605504087 ], "wc_weaknesses_avg": [ 127.4, 71.75681152336689 ], "wc_questions_avg": [ 39.2, 35.70098037869548 ], "wc_limitations_avg": [ 57.6, 83.70806412765738 ], "wc_review_avg": [ 406.2, 217.14824429407668 ], "wc_reply_reviewers_avg": [ 33.4, 35.81954773583831 ], "wc_reply_authors_avg": [ 60.2, 120.4 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.4082482904638631, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2056278626886754059&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "sysu.edu.cn;;tencent.com;tencent.com;ust.hk;;arc.tencent.com;sysu.edu.cn", "author_num": 8, "aff_unique_index": "0;1;1;2;1;0", "aff_unique_norm": "Sun Yat-sen University;Tencent;Hong Kong University of Science and Technology", "aff_unique_dep": ";Tencent AI Lab;", "aff_unique_url": "http://www.sysu.edu.cn;https://ai.tencent.com;https://www.ust.hk", "aff_unique_abbr": "SYSU;Tencent AI Lab;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Building the Bridge of Schr\u00f6dinger: A Continuous Entropic Optimal Transport Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73609", "id": "OHimIaixXk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3c4688b6a76f25f2311daa0d75a58f1a-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=OHimIaixXk", "openreview": "https://openreview.net/forum?id=OHimIaixXk", "poster": "/media/PosterPDFs/NeurIPS%202023/73609.png?t=1701493598.4974518", "slides": "https://nips.cc/virtual/2023/poster/73609", "video": "https://nips.cc/virtual/2023/poster/73609", "author_site": "Nikita Gushchin, Alexander Kolesov, Petr Mokrov, Polina Karpikova, Andrei Spiridonov, Evgeny Burnaev, Alexander Korotin", "tldr": "", "abstract": "Over the last several years, there has been significant progress in developing neural solvers for the Schr\u00f6dinger Bridge (SB) problem and applying them to generative modelling. This new research field is justifiably fruitful as it is interconnected with the practically well-performing diffusion models and theoretically grounded entropic optimal transport (EOT). Still, the area lacks non-trivial tests allowing a researcher to understand how well the methods solve SB or its equivalent continuous EOT problem. We fill this gap and propose a novel way to create pairs of probability distributions for which the ground truth OT solution is known by the construction. Our methodology is generic and works for a wide range of OT formulations, in particular, it covers the EOT which is equivalent to SB (the main interest of our study). This development allows us to create continuous benchmark distributions with the known EOT and SB solutions on high-dimensional spaces such as spaces of images. As an illustration, we use these benchmark pairs to test how well existing neural EOT/SB solvers actually compute the EOT solution. Our code for constructing benchmark pairs under different setups is available at: https://github.com/ngushchin/EntropicOTBenchmark", "keywords": "entropic optimal transport;schr\u00f6dinger bridge;benchmark;continuous distributions;neural methods;generative modeling", "primary_area": "", "supplementary_material": "/attachment/d2fdac9f234739941de89984484fb09290c2a1e8.pdf", "author": "Nikita Gushchin;Alexander Kolesov;Petr Mokrov;Polina Karpikova;Andrei Spiridonov;Evgeny Burnaev;Alexander Korotin", "authorids": "~Nikita_Gushchin1;~Alexander_Kolesov1;~Petr_Mokrov1;~Polina_Karpikova1;~Andrei_Spiridonov1;~Evgeny_Burnaev1;~Alexander_Korotin2", "gender": "M;M;M;;M;M;M", "homepage": ";https://github.com/Kolessov;https://github.com/PetrMokrov;;https://github.com/Penchekrak;http://faculty.skoltech.ru/people/evgenyburnaev;https://akorotin.netlify.app", "dblp": "332/1999;287/4380;;;345/4134;144/7845;209/9906", "google_scholar": "UaRTbNoAAAAJ;WyAI_wUAAAAJ;CRsi4IkAAAAJ;;;https://scholar.google.ru/citations?user=pCRdcOwAAAAJ;https://scholar.google.ru/citations?user=1rIIvjAAAAAJ", "orcid": ";;;;;0000-0001-8424-0690;0000-0003-4286-925X", "linkedin": "nikita-gushchin-937522145/;;;https://linkedin.com/in/polina-karpikova-381828235;;;", "or_profile": "~Nikita_Gushchin1;~Alexander_Kolesov1;~Petr_Mokrov1;~Polina_Karpikova1;~Andrei_Spiridonov1;~Evgeny_Burnaev1;~Alexander_Andreevich_Korotin1", "aff": "Skolkovo Institute of Science and Technology;The Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology;Higher School of Economics, Higher School of Economics;Higher School of Economics;Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology", "aff_domain": "skoltech.ru;skoltech.ru;skolkovotech.ru;edu.hse.ru;hse.ru;skoltech.ru;skoltech.ru", "position": "PhD student;PhD student;PhD student;MS student;MS student;Full Professor;Head of Research Group", "bibtex": "@inproceedings{\ngushchin2023building,\ntitle={Building the Bridge of Schr\\\"odinger: A Continuous Entropic Optimal Transport Benchmark},\nauthor={Nikita Gushchin and Alexander Kolesov and Petr Mokrov and Polina Karpikova and Andrei Spiridonov and Evgeny Burnaev and Alexander Korotin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=OHimIaixXk}\n}", "github": "", "project": "", "reviewers": "p1pQ;XL1t;jvUZ;oGez;UzdZ", "pdf_size": 24147880, "rating": "6;6;6;7;9", "confidence": "4;3;4;1;4", "wc_summary_and_contributions": "69;118;91;72;41", "wc_strengths": "63;69;53;128;27", "wc_improvement": "76;156;57;186;3", "wc_limitations": "7;35;1;2;23", "wc_correctness": "18;14;1;10;1", "wc_clarity": "5;35;1;4;1", "wc_relation_to_prior_work": "14;45;1;4;10", "wc_documentation": "5;15;25;6;9", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "258;488;231;413;116", "wc_reply_reviewers": "20;0;0;33;0", "wc_reply_authors": "354;916;616;663;121", "reply_reviewers": "1;0;0;1;0", "reply_authors": "3;3;2;3;1", "rating_avg": [ 6.8, 1.16619037896906 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "wc_summary_and_contributions_avg": [ 78.2, 25.51391777050322 ], "wc_strengths_avg": [ 68.0, 33.26259160077579 ], "wc_improvement_avg": [ 95.6, 66.73709613101248 ], "wc_limitations_avg": [ 13.6, 13.290598180668919 ], "wc_correctness_avg": [ 8.8, 6.8527366796047255 ], "wc_clarity_avg": [ 9.2, 12.998461447417537 ], "wc_relation_to_prior_work_avg": [ 14.8, 15.765785740013087 ], "wc_documentation_avg": [ 12.0, 7.37563556583431 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 301.2, 133.03142485894077 ], "wc_reply_reviewers_avg": [ 10.6, 13.61763562443936 ], "wc_reply_authors_avg": [ 534.0, 272.8582049343578 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 0.8 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.02941176470588238, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=822990586466538690&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "skoltech.ru;skoltech.ru;skolkovotech.ru;edu.hse.ru;hse.ru;skoltech.ru;skoltech.ru", "author_num": 7, "aff_unique_index": "0;0;0;1;1;0;0", "aff_unique_norm": "Skolkovo Institute of Science and Technology;Higher School of Economics", "aff_unique_dep": ";", "aff_unique_url": "https://www.skoltech.ru;https://www.hse.ru", "aff_unique_abbr": "Skoltech;HSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "RePo: Resilient Model-Based Reinforcement Learning by Regularizing Posterior Predictability", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71822", "id": "OIJ3VXDy6s", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6692e1b0e8a31e8de84bd90ad4d8d9e0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OIJ3VXDy6s", "openreview": "https://openreview.net/forum?id=OIJ3VXDy6s", "poster": "/media/PosterPDFs/NeurIPS%202023/71822.png?t=1702261134.4694734", "slides": "https://nips.cc/virtual/2023/poster/71822", "video": "https://nips.cc/virtual/2023/poster/71822", "author_site": "Chuning Zhu, Max Simchowitz, Siri Gadipudi, Abhishek Gupta", "tldr": "", "abstract": "Visual model-based RL methods typically encode image observations into low-dimensional representations in a manner that does not eliminate redundant information. This leaves them susceptible to spurious variations -- changes in task-irrelevant components such as background distractors or lighting conditions. In this paper, we propose a visual model-based RL method that learns a latent representation resilient to such spurious variations. Our training objective encourages the representation to be maximally predictive of dynamics and reward, while constraining the information flow from the observation to the latent representation. We demonstrate that this objective significantly bolsters the resilience of visual model-based RL methods to visual distractors, allowing them to operate in dynamic environments. We then show that while the learned encoder is able to operate in dynamic environments, it is not invariant under significant distribution shift. To address this, we propose a simple reward-free alignment procedure that enables test time adaptation of the encoder. This allows for quick adaptation to widely differing environments without having to relearn the dynamics and policy. Our effort is a step towards making model-based RL a practical and useful tool for dynamic, diverse domains and we show its effectiveness in simulation tasks with significant spurious variations.", "keywords": "Model-Based Reinforcement Learning;Deep Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/c71e3e4a8572df12f0e2e0f1438e8e66cd741e56.zip", "author": "Chuning Zhu;Max Simchowitz;Siri Gadipudi;Abhishek Gupta", "authorids": "~Chuning_Zhu1;~Max_Simchowitz1;siri.gadipudi9@gmail.com;~Abhishek_Gupta1", "gender": "M;M;;M", "homepage": "https://homes.cs.washington.edu/~zchuning/;;;https://homes.cs.washington.edu/~abhgupta/", "dblp": "295/9468;176/5165;;18/6404-4", "google_scholar": ";;;1wLVDP4AAAAJ", "orcid": ";;;", "linkedin": "chuning-zhu-39b086167/;;;", "or_profile": "~Chuning_Zhu1;~Max_Simchowitz1;siri.gadipudi9@gmail.com;~Abhishek_Gupta1", "aff": "University of Washington;Massachusetts Institute of Technology;;University of Washington", "aff_domain": "cs.washington.edu;mit.edu;;uw.edu", "position": "PhD student;Postdoc;;Assistant Professor", "bibtex": "@inproceedings{\nzhu2023repo,\ntitle={RePo: Resilient Model-Based Reinforcement Learning by Regularizing Posterior Predictability},\nauthor={Chuning Zhu and Max Simchowitz and Siri Gadipudi and Abhishek Gupta},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OIJ3VXDy6s}\n}", "github": "", "project": "", "reviewers": "z3JG;ZoCZ;CzT8;zSkA", "pdf_size": 7338507, "rating": "5;5;7;7", "confidence": "4;4;3;4", "soundness": "2;3;4;3", "novelty": "2;3;4;3", "presentation": "2;3;4;3", "wc_summary": "117;108;55;175", "wc_strengths": "65;59;79;42", "wc_weaknesses": "579;248;38;33", "wc_questions": "43;108;53;68", "wc_limitations": "16;23;22;8", "wc_review": "820;546;247;326", "wc_reply_reviewers": "33;39;0;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 113.75, 42.56392251661024 ], "wc_strengths_avg": [ 61.25, 13.273563952458284 ], "wc_weaknesses_avg": [ 224.5, 222.3044084133286 ], "wc_questions_avg": [ 68.0, 24.748737341529164 ], "wc_limitations_avg": [ 17.25, 5.973901572674261 ], "wc_review_avg": [ 484.75, 222.41332581479915 ], "wc_reply_reviewers_avg": [ 21.5, 15.46770829825802 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=837767217557883322&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cs.washington.edu;mit.edu;;uw.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Washington;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.washington.edu;https://web.mit.edu", "aff_unique_abbr": "UW;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "LLMScore: Unveiling the Power of Large Language Models in Text-to-Image Synthesis Evaluation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71821", "id": "OJ0c6um1An", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/47f30d67bce3e9824928267e9355420f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OJ0c6um1An", "openreview": "https://openreview.net/forum?id=OJ0c6um1An", "poster": "/media/PosterPDFs/NeurIPS%202023/71821.png?t=1701675341.4331622", "slides": "https://nips.cc/virtual/2023/poster/71821", "video": "https://nips.cc/virtual/2023/poster/71821", "author_site": "Yujie Lu, Xianjun Yang, Xiujun Li, Xin Eric Wang, William Yang Wang", "tldr": "", "abstract": "Existing automatic evaluation on text-to-image synthesis can only provide an image-text matching score, without considering the object-level compositionality, which results in poor correlation with human judgments. In this work, we propose LLMScore, a new framework that offers evaluation scores with multi-granularity compositionality. LLMScore leverages the large language models (LLMs) to evaluate text-to-image models. Initially, it transforms the image into image-level and object-level visual descriptions. Then an evaluation instruction is fed into the LLMs to measure the alignment between the synthesized image and the text, ultimately generating a score accompanied by a rationale. \nOur substantial analysis reveals the highest correlation of LLMScore with human judgments on a wide range of datasets (Attribute Binding Contrast, Concept Conjunction, MSCOCO, DrawBench, PaintSkills). Notably, our LLMScore achieves Kendall's tau correlation with human evaluations that is 58.8% and 31.2% higher than the commonly-used text-image matching metrics CLIP and BLIP, respectively.", "keywords": "Text-to-Image Evaluation;Visio-linguistic Compositionality;Large Language Models", "primary_area": "", "supplementary_material": "/attachment/38033c7286bd489a9fc41cd9b8cdf5d506eb10c4.zip", "author": "Yujie Lu;Xianjun Yang;Xiujun Li;Xin Eric Wang;William Yang Wang", "authorids": "~Yujie_Lu1;~Xianjun_Yang1;~Xiujun_Li1;~Xin_Eric_Wang2;~William_Yang_Wang2", "gender": ";M;M;M;M", "homepage": "https://yujielu10.github.io/;;https://xjli.github.io/;https://eric-xw.github.io;https://www.cs.ucsb.edu/~william/", "dblp": ";37/10237;30/9646.html;10/5630-61;08/9282", "google_scholar": "pcmr6GMAAAAJ;Tunh15sAAAAJ;SW_WaQ0AAAAJ;YjqluE0AAAAJ;gf8Ms_8AAAAJ", "orcid": ";0000-0003-3318-8444;;0000-0003-2605-5504;", "linkedin": ";xianjun-yang-0062aa1a6/;;;", "or_profile": "~Yujie_Lu1;~Xianjun_Yang1;~Xiujun_Li1;~Xin_Eric_Wang2;~William_Wang1", "aff": "UC Santa Barbara;Shanghai Artificial Intelligence Laboratory;University of Washington;University of California, Santa Cruz;UC Santa Barbara", "aff_domain": "ucsb.edu;pjlab.org.cn;washington.edu;ucsc.edu;ucsb.edu", "position": "PhD student;Intern;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nlu2023llmscore,\ntitle={{LLMS}core: Unveiling the Power of Large Language Models in Text-to-Image Synthesis Evaluation},\nauthor={Yujie Lu and Xianjun Yang and Xiujun Li and Xin Eric Wang and William Yang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OJ0c6um1An}\n}", "github": "", "project": "", "reviewers": "ZDTU;nuAT;9B3k;mWLn;BWFw", "pdf_size": 9804686, "rating": "6;6;6;7;7", "confidence": "5;4;4;4;4", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "84;134;100;101;107", "wc_strengths": "69;81;27;213;71", "wc_weaknesses": "243;244;59;180;70", "wc_questions": "52;69;40;30;111", "wc_limitations": "82;15;6;31;41", "wc_review": "530;543;232;555;400", "wc_reply_reviewers": "59;5;0;0;6", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.2, 16.28987415543779 ], "wc_strengths_avg": [ 92.2, 63.177211081211865 ], "wc_weaknesses_avg": [ 159.2, 80.7995049489785 ], "wc_questions_avg": [ 60.4, 28.443628460518184 ], "wc_limitations_avg": [ 35.0, 26.465071320516028 ], "wc_review_avg": [ 452.0, 123.35153018913061 ], "wc_reply_reviewers_avg": [ 14.0, 22.63625410707346 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.40824829046386313, "gs_citation": 80, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2037633364989029744&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ucsb.edu;pjlab.org.cn;washington.edu;ucsc.edu;ucsb.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of California, Santa Barbara;Shanghai Artificial Intelligence Laboratory;University of Washington;University of California, Santa Cruz", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ucsb.edu;http://www.shailab.org/;https://www.washington.edu;https://www.ucsc.edu", "aff_unique_abbr": "UCSB;Shanghai AI Lab;UW;UCSC", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Santa Barbara;;Santa Cruz", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Quilt-1M: One Million Image-Text Pairs for Histopathology", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73608", "id": "OL2JQoO0kq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/775ec578876fa6812c062644964b9870-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=OL2JQoO0kq", "openreview": "https://openreview.net/forum?id=OL2JQoO0kq", "poster": "/media/PosterPDFs/NeurIPS%202023/73608.png?t=1699579069.5950267", "slides": "https://nips.cc/virtual/2023/poster/73608", "video": "https://nips.cc/virtual/2023/poster/73608", "author_site": "Wisdom Ikezogwo, Saygin Seyfioglu, Fatemeh Ghezloo, Dylan Geva, Fatwir Sheikh Mohammed, Pavan Kumar Anand, Ranjay Krishna, Linda Shapiro", "tldr": "", "abstract": "Recent accelerations in multi-modal applications have been made possible with the plethora of image and text data available online. However, the scarcity of analogous data in the medical field, specifically in histopathology, has slowed comparable progress. \nTo enable similar representation learning for histopathology, we turn to YouTube, an untapped resource of videos, offering $1,087$ hours of valuable educational histopathology videos from expert clinicians.\nFrom YouTube, we curate QUILT: a large-scale vision-language dataset consisting of $802, 144$ image and text pairs.\nQUILT was automatically curated using a mixture of models, including large language models, handcrafted algorithms, human knowledge databases, and automatic speech recognition.\nIn comparison, the most comprehensive datasets curated for histopathology amass only around $200$K samples.\nWe combine QUILT with datasets from other sources, including Twitter, research papers, and the internet in general, to create an even larger dataset: QUILT-1M, with $1$M paired image-text samples, marking it as the largest vision-language histopathology dataset to date. \nWe demonstrate the value of QUILT-1M by fine-tuning a pre-trained CLIP model. \nOur model outperforms state-of-the-art models on both zero-shot and linear probing tasks for classifying new histopathology images across $13$ diverse patch-level datasets of $8$ different sub-pathologies and cross-modal retrieval tasks.", "keywords": "Vision-Language;Dataset;Histopathology;Medical;Video;Image-Text", "primary_area": "", "supplementary_material": "/attachment/8a4074f8fc67cf55cf04c9aebe716ba8127ec264.pdf", "author": "Wisdom Oluchi Ikezogwo;Mehmet Saygin Seyfioglu;Fatemeh Ghezloo;Dylan Stefan Chan Geva;Fatwir Sheikh Mohammed;Pavan Kumar Anand;Ranjay Krishna;Linda Shapiro", "authorids": "~Wisdom_Oluchi_Ikezogwo1;~Mehmet_Saygin_Seyfioglu1;~Fatemeh_Ghezloo1;~Dylan_Stefan_Chan_Geva1;~Fatwir_Sheikh_Mohammed1;~Pavan_Kumar_Anand1;~Ranjay_Krishna1;~Linda_Shapiro1", "gender": "M;Not Specified;F;M;M;M;M;F", "homepage": "https://wisdomikezogwo.github.io/;https://mehmetsayginseyfioglu.github.io/;https://fghezloo.github.io/;;;;http://ranjaykrishna.com;http://cs.washington.edu/homes/shapiro/", "dblp": ";153/9328;269/1115;;;;167/3785;s/LindaGShapiro", "google_scholar": "gt5I_iYAAAAJ;65TuoYUAAAAJ;GAkNK7QAAAAJ;;m6FrYWwAAAAJ;;IcqahyAAAAAJ;https://scholar.google.com.tw/citations?user=6pGeV2wAAAAJ", "orcid": ";;0000-0003-3888-2793;;;;0000-0001-8784-2531;", "linkedin": ";;fghezloo/;dylan-geva-08364b123/;;pka2000/;ranjay-krishna-1a344444/;", "or_profile": "~Wisdom_Oluchi_Ikezogwo1;~Mehmet_Saygin_Seyfioglu1;~Fatemeh_Ghezloo1;~Dylan_Stefan_Chan_Geva1;~Fatwir_Sheikh_Mohammed1;~Pavan_Kumar_Anand1;~Ranjay_Krishna1;~Linda_Shapiro1", "aff": "University of Washington;University of Washington;University of Washington;Department of Computer Science;University of Washington;University of Washington;University of Washington;University of Washington", "aff_domain": "cs.washington.edu;uw.edu;uw.edu;cs.washington.edu;uw.edu;uw.edu;cs.washington.edu;washington.edu", "position": "PhD student;PhD student;PhD student;Undergrad student;MS student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nikezogwo2023quiltm,\ntitle={Quilt-1M: One Million Image-Text Pairs for Histopathology},\nauthor={Wisdom Oluchi Ikezogwo and Mehmet Saygin Seyfioglu and Fatemeh Ghezloo and Dylan Stefan Chan Geva and Fatwir Sheikh Mohammed and Pavan Kumar Anand and Ranjay Krishna and Linda Shapiro},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=OL2JQoO0kq}\n}", "github": "", "project": "", "reviewers": "MJNv;jVw5;7iJR;rAj5;8biP", "pdf_size": 8551576, "rating": "7;8;8;9;10", "confidence": "3;4;5;4;5", "wc_summary_and_contributions": "52;107;136;350;83", "wc_strengths": "114;99;125;175;59", "wc_improvement": "80;54;2;591;104", "wc_limitations": "93;75;75;8;34", "wc_correctness": "3;12;16;24;1", "wc_clarity": "111;1;1;5;5", "wc_relation_to_prior_work": "49;14;5;10;21", "wc_documentation": "19;1;9;154;7", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "522;364;370;1318;315", "wc_reply_reviewers": "0;21;48;20;24", "wc_reply_authors": "336;379;357;1229;401", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 8.4, 1.0198039027185568 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 145.6, 105.86897562553442 ], "wc_strengths_avg": [ 114.4, 37.659527347007426 ], "wc_improvement_avg": [ 166.2, 215.0836116490515 ], "wc_limitations_avg": [ 57.0, 31.22178726466504 ], "wc_correctness_avg": [ 11.2, 8.47112743381895 ], "wc_clarity_avg": [ 24.6, 43.23702117398931 ], "wc_relation_to_prior_work_avg": [ 19.8, 15.509996776273036 ], "wc_documentation_avg": [ 38.0, 58.2889354852188 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 577.8, 376.5540598639191 ], "wc_reply_reviewers_avg": [ 22.6, 15.278743403827423 ], "wc_reply_authors_avg": [ 540.4, 344.98324597000357 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.681385143869247, "gs_citation": 132, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15804862380471364886&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "cs.washington.edu;uw.edu;uw.edu;cs.washington.edu;uw.edu;uw.edu;cs.washington.edu;washington.edu", "author_num": 8, "aff_unique_index": "0;0;0;1;0;0;0;0", "aff_unique_norm": "University of Washington;Unknown Institution", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.washington.edu;", "aff_unique_abbr": "UW;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "An $\\varepsilon$-Best-Arm Identification Algorithm for Fixed-Confidence and Beyond", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71820", "id": "OLk3F64eSg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/358e4a39b8ace4744fbad77e84a7e757-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OLk3F64eSg", "openreview": "https://openreview.net/forum?id=OLk3F64eSg", "poster": "/media/PosterPDFs/NeurIPS%202023/71820.png?t=1696926573.4689958", "slides": "https://nips.cc/virtual/2023/poster/71820", "video": "https://nips.cc/virtual/2023/poster/71820", "author_site": "Marc Jourdan, R\u00e9my Degenne, Emilie Kaufmann", "tldr": "", "abstract": "We propose EB-TC$\\varepsilon$, a novel sampling rule for $\\varepsilon$-best arm identification in stochastic bandits.\n\tIt is the first instance of Top Two algorithm analyzed for approximate best arm identification. EB-TC$\\varepsilon$ is an *anytime* sampling rule that can therefore be employed without modification for fixed confidence or fixed budget identification (without prior knowledge of the budget).\n\tWe provide three types of theoretical guarantees for EB-TC$\\varepsilon$.\n\tFirst, we prove bounds on its expected sample complexity in the fixed confidence setting, notably showing its asymptotic optimality in combination with an adaptive tuning of its exploration parameter.\n\tWe complement these findings with upper bounds on its probability of error at any time and for any slack parameter, which further yield upper bounds on its simple regret at any time.\n\tFinally, we show through numerical simulations that EB-TC$\\varepsilon$ performs favorably compared to existing algorithms for different approximate best arm identification tasks.", "keywords": "multi-armed bandits;pure-exploration;epsilon best arm identification;Top Two algorithm;anytime", "primary_area": "", "supplementary_material": "/attachment/50772544152175c1cd72fc1904f3cca5ff45c121.zip", "author": "Marc Jourdan;R\u00e9my Degenne;Emilie Kaufmann", "authorids": "~Marc_Jourdan1;~R\u00e9my_Degenne1;~Emilie_Kaufmann1", "gender": "M;M;F", "homepage": "https://marcjourdan.github.io;https://remydegenne.github.io/;https://emiliekaufmann.github.io/", "dblp": "228/8157;157/1070;67/11350", "google_scholar": "BOXGjhgAAAAJ;https://scholar.google.fr/citations?user=H-uIBOwAAAAJ;9GE1vx4AAAAJ", "orcid": "0000-0002-2449-4549;;", "linkedin": "marc-jourdan/;;", "or_profile": "~Marc_Jourdan1;~R\u00e9my_Degenne1;~Emilie_Kaufmann1", "aff": "INRIA;INRIA;CNRS", "aff_domain": "inria.fr;inria.fr;cnrs.fr", "position": "PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\njourdan2023an,\ntitle={An \\${\\textbackslash}varepsilon\\$-Best-Arm Identification Algorithm for Fixed-Confidence and Beyond},\nauthor={Marc Jourdan and R{\\'e}my Degenne and Emilie Kaufmann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OLk3F64eSg}\n}", "github": "", "project": "", "reviewers": "B3zV;kKhf;PH2Y;KTru;6z6m", "pdf_size": 1835888, "rating": "5;6;6;7;7", "confidence": "2;3;3;2;3", "soundness": "3;3;3;4;3", "novelty": "3;2;3;4;3", "presentation": "2;3;3;2;3", "wc_summary": "59;75;67;44;116", "wc_strengths": "54;30;70;36;86", "wc_weaknesses": "163;31;22;136;75", "wc_questions": "56;41;39;72;319", "wc_limitations": "1;1;1;8;1", "wc_review": "333;178;199;296;597", "wc_reply_reviewers": "28;75;0;99;29", "wc_reply_authors": "0;0;0;147;0", "reply_reviewers": "1;1;1;2;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 72.2, 24.17767565337909 ], "wc_strengths_avg": [ 55.2, 20.846102753272614 ], "wc_weaknesses_avg": [ 85.4, 55.980710963688196 ], "wc_questions_avg": [ 105.4, 107.45901544309811 ], "wc_limitations_avg": [ 2.4, 2.8 ], "wc_review_avg": [ 320.6, 149.85139305325126 ], "wc_reply_reviewers_avg": [ 46.2, 35.717782685939504 ], "wc_reply_authors_avg": [ 29.4, 58.8 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.21821789023599233, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17297376800210131884&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "inria.fr;inria.fr;cnrs.fr", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "INRIA;Centre National de la Recherche Scientifique", "aff_unique_dep": ";", "aff_unique_url": "https://www.inria.fr;https://www.cnrs.fr", "aff_unique_abbr": "INRIA;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "EICIL: Joint Excitatory Inhibitory Cycle Iteration Learning for Deep Spiking Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71819", "id": "OMDgOjdqoZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/65e876f6a98c6799d0b3145966dd73e2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OMDgOjdqoZ", "openreview": "https://openreview.net/forum?id=OMDgOjdqoZ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71819", "video": "https://nips.cc/virtual/2023/poster/71819", "author_site": "Zihang Shao, Xuanye Fang, Yaxin Li, Chaoran Feng, Jiangrong Shen, Qi Xu", "tldr": "", "abstract": "Spiking neural networks (SNNs) have undergone continuous development and extensive study for decades, leading to increased biological plausibility and optimal energy efficiency. However, traditional training methods for deep SNNs have some limitations, as they rely on strategies such as pre-training and fine-tuning, indirect coding and reconstruction, and approximate gradients. These strategies lack a complete training model and require gradient approximation. To overcome these limitations, we propose a novel learning method named Joint Excitatory Inhibitory Cycle Iteration learning for Deep Spiking Neural Networks (EICIL) that integrates both excitatory and inhibitory behaviors inspired by the signal transmission of biological neurons.By organically embedding these two behavior patterns into one framework, the proposed EICIL significantly improves the bio-mimicry and adaptability of spiking neuron models, as well as expands the representation space of spiking neurons. Extensive experiments based on EICIL and traditional learning methods demonstrate that EICIL outperforms traditional methods on various datasets, such as CIFAR10 and CIFAR100, revealing the crucial role of the learning approach that integrates both behaviors during training.", "keywords": "spiking neural networks cycle learning spike encoding", "primary_area": "", "supplementary_material": "/attachment/8faec87157177ff9d547dd14c37aa1cdca1c014b.zip", "author": "Zihang Shao;Xuanye Fang;Yaxin Li;Chaoran Feng;Jiangrong Shen;Qi Xu", "authorids": "~Zihang_Shao2;~Xuanye_Fang1;~Yaxin_Li4;~Chaoran_Feng1;~Jiangrong_Shen1;~Qi_Xu1", "gender": "M;;M;F;M;M", "homepage": ";;https://www.falcary.com;;https://www.researchgate.net/profile/Qi_Xu43;", "dblp": ";143/0251-3;137/0641-1;208/3564;;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;3XK6COkAAAAJ;dGEcAuYAAAAJ;https://scholar.google.com/citations?view_op=list_works", "orcid": "0000-0002-6437-2087;0000-0003-0160-8950;0009-0001-8329-1389;;0000-0001-9245-5544;", "linkedin": ";;;;;", "or_profile": "~Xuanye_Fang1;~Yaxin_Li4;~Chaoran_Feng1;~Jiangrong_Shen1;~Qi_Xu1;~Zhang_Shah1", "aff": "Dalian University of Technology;Dalian University of Technology;Dalian University of Technology;;School of Computer Science and Technology;Dalian University of Technology", "aff_domain": "dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;;dlut.edu.cn;dlut.edu.cn", "position": "MS student;MS student;Undergrad student;;Associate Professor;Undergrad student", "bibtex": "@inproceedings{\nshao2023eicil,\ntitle={{EICIL}: Joint Excitatory Inhibitory Cycle Iteration Learning for Deep Spiking Neural Networks},\nauthor={Zihang Shao and Xuanye Fang and Yaxin Li and Chaoran Feng and Jiangrong Shen and Qi Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OMDgOjdqoZ}\n}", "github": "", "project": "", "reviewers": "imxR;4jP2;5jTx;a2bZ", "pdf_size": 3730341, "rating": "3;6;7;8", "confidence": "3;4;5;4", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "2;3;2;2", "wc_summary": "94;56;85;52", "wc_strengths": "80;142;98;90", "wc_weaknesses": "219;101;57;109", "wc_questions": "117;126;103;57", "wc_limitations": "31;25;41;8", "wc_review": "541;450;384;316", "wc_reply_reviewers": "117;0;107;77", "wc_reply_authors": "426;0;366;406", "reply_reviewers": "1;0;2;2", "reply_authors": "3;1;3;2", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.75, 18.08832496390973 ], "wc_strengths_avg": [ 102.5, 23.680160472429236 ], "wc_weaknesses_avg": [ 121.5, 59.6720202439971 ], "wc_questions_avg": [ 100.75, 26.55536668924005 ], "wc_limitations_avg": [ 26.25, 11.986972094736853 ], "wc_review_avg": [ 422.75, 83.10046630434753 ], "wc_reply_reviewers_avg": [ 75.25, 45.871423566311954 ], "wc_reply_authors_avg": [ 299.5, 174.26058074045318 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10102787068350156569&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;;dlut.edu.cn;dlut.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Dalian University of Technology;School of Computer Science and Technology", "aff_unique_dep": ";Computer Science and Technology", "aff_unique_url": "http://www.dlut.edu.cn/;", "aff_unique_abbr": "DUT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "title": "OpenLane-V2: A Topology Reasoning Benchmark for Unified 3D HD Mapping", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73607", "id": "OMOOO3ls6g", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3c0a4c8c236144f1b99b7e1531debe9c-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=OMOOO3ls6g", "openreview": "https://openreview.net/forum?id=OMOOO3ls6g", "poster": "/media/PosterPDFs/NeurIPS%202023/73607.png?t=1701923954.3003056", "slides": "https://nips.cc/virtual/2023/poster/73607", "video": "https://nips.cc/virtual/2023/poster/73607", "author_site": "Huijie Wang, Tianyu Li, Yang Li, Li Chen, Chonghao Sima, Zhenbo Liu, Bangjun Wang, Peijin Jia, Yuting Wang, Shengyin Jiang, Feng Wen, Hang Xu, Ping Luo, Junchi Yan, Wei Zhang, Hongyang Li", "tldr": "", "abstract": "Accurately depicting the complex traffic scene is a vital component for autonomous vehicles to execute correct judgments. However, existing benchmarks tend to oversimplify the scene by solely focusing on lane perception tasks. Observing that human drivers rely on both lanes and traffic signals to operate their vehicles safely, we present OpenLane-V2, the first dataset on topology reasoning for traffic scene structure. The objective of the presented dataset is to advance research in understanding the structure of road scenes by examining the relationship between perceived entities, such as traffic elements and lanes. Leveraging existing datasets, OpenLane-V2 consists of 2,000 annotated road scenes that describe traffic elements and their correlation to the lanes. It comprises three primary sub-tasks, including the 3D lane detection inherited from OpenLane, accompanied by corresponding metrics to evaluate the model\u2019s performance. We evaluate various state-of-the-art methods, and present their quantitative and qualitative results on OpenLane-V2 to indicate future avenues for investigating topology reasoning in traffic scenes.", "keywords": "3D Lane Detection;Traffic Element Recognition;Topology Reasoning;Scene Understanding;Autonomous Driving;Dataset and Benchmark", "primary_area": "", "supplementary_material": "/attachment/dd03dcf81dfe664ed6e7f969904d6962a6e58664.pdf", "author": "Huijie Wang;Tianyu Li;Yang Li;Li Chen;Chonghao Sima;Zhenbo Liu;Bangjun Wang;Peijin Jia;Yuting Wang;Shengyin Jiang;Feng Wen;Hang Xu;Ping Luo;Junchi Yan;Wei Zhang;Hongyang Li", "authorids": "~Huijie_Wang1;~Tianyu_Li5;~Yang_Li68;~Li_Chen15;~Chonghao_Sima1;~Zhenbo_Liu1;~Bangjun_Wang1;~Peijin_Jia1;~Yuting_Wang1;~Shengyin_Jiang1;~Feng_Wen1;~Hang_Xu1;~Ping_Luo2;~Junchi_Yan2;~Wei_Zhang45;~Hongyang_Li1", "gender": ";M;;M;;;;F;M;M;M;M;;;M;M", "homepage": ";https://github.com/sephyli;https://github.com/RicardLee;https://ilnehc.github.io/;;;;https://github.com/PeggyPeppa;https://github.com/huangmozhi9527;https://github.com/GG-Bonds;;;;;;https://datascience.hku.hk/people/hongyang-li/", "dblp": ";;;181/2847;317/0445;;;;;;;;;;10/4661-81;95/8433-1", "google_scholar": "Xg4cp-EAAAAJ;X6vTmEMAAAAJ;;ulZxvY0AAAAJ;dgYJ6esAAAAJ;;;;bbLqggoAAAAJ;JDWaIw0AAAAJ;;https://scholar.google.com.hk/citations?user=J_8TX6sAAAAJ;;;;https://scholar.google.com.hk/citations?user=Hfrih1EAAAAJ", "orcid": "0000-0003-3960-084X;0009-0008-3838-160X;;;;;;;;;;0000-0003-3645-8972;;;;0000-0001-9110-5534", "linkedin": ";sephy-li/;;;;;;;;;%E4%B8%B0-%E6%B8%A9-867b72132/;;;;;hongyangli2020/", "or_profile": "~Huijie_Wang1;~Tianyu_Li5;~Yang_Li68;~Li_Chen15;~Chonghao_Sima1;~Zhenbo_Liu1;~Bangjun_Wang1;~Peijin_Jia1;~Yuting_Wang1;~Shengyin_Jiang1;~Feng_Wen1;~Hang_Xu1;~Ping_Luo2;~Junchi_Yan2;~Wei_Zhang45;~Hongyang_Li1", "aff": "OpenDriveLab;Beihang University;Shanghai AI Lab;Shanghai AI Laboratory;Purdue University;;;Tsinghua University;Tsinghua University;Shanghai AI Laboratory ;Huawei Noah's Ark Lab;Huawei Noah\u2018s Ark Lab;;;Huawei Technologies Ltd;Shanghai AI Lab", "aff_domain": "opendrivelab.com;buaa.edu.cn;pjlab.org.cn;pjlab.org.cn;purdue.edu;;;tsinghua.edu.cn;tsinghua.edu.cn;pjlab.org.cn;huawei.com;huawei.com;;;huawei.com;pjlab.org.cn", "position": "Researcher;MS student;engineer;Researcher;PhD student;;;MS student;MS student;Intern;Principal Researcher;Researcher;;;Researcher;Researcher", "bibtex": "@inproceedings{\nwang2023openlanev,\ntitle={OpenLane-V2: A Topology Reasoning Benchmark for Unified 3D {HD} Mapping},\nauthor={Huijie Wang and Tianyu Li and Yang Li and Li Chen and Chonghao Sima and Zhenbo Liu and Bangjun Wang and Peijin Jia and Yuting Wang and Shengyin Jiang and Feng Wen and Hang Xu and Ping Luo and Junchi Yan and Wei Zhang and Hongyang Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=OMOOO3ls6g}\n}", "github": "", "project": "", "reviewers": "wxkw;79Ej;hUvy;bqmb", "pdf_size": 2638653, "rating": "4;6;6;8", "confidence": "5;3;4;4", "wc_summary_and_contributions": "127;72;105;96", "wc_strengths": "50;13;137;127", "wc_improvement": "186;201;216;84", "wc_limitations": "39;7;1;66", "wc_correctness": "1;4;1;69", "wc_clarity": "1;8;1;13", "wc_relation_to_prior_work": "23;76;1;27", "wc_documentation": "1;9;1;77", "wc_additional_feedback": "1;1;1;1", "wc_review": "429;391;464;560", "wc_reply_reviewers": "0;0;45;19", "wc_reply_authors": "499;564;563;334", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 100.0, 19.710403344427025 ], "wc_strengths_avg": [ 81.75, 52.04505259868608 ], "wc_improvement_avg": [ 171.75, 51.760868423935854 ], "wc_limitations_avg": [ 28.25, 26.14741861063918 ], "wc_correctness_avg": [ 18.75, 29.03769102390891 ], "wc_clarity_avg": [ 5.75, 5.0682837331783235 ], "wc_relation_to_prior_work_avg": [ 31.75, 27.39867697535777 ], "wc_documentation_avg": [ 22.0, 31.921779399024736 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 461.0, 62.717621128355944 ], "wc_reply_reviewers_avg": [ 16.0, 18.452642087245934 ], "wc_reply_authors_avg": [ 490.0, 93.83762571591419 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1737950739419136037&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "opendrivelab.com;buaa.edu.cn;pjlab.org.cn;pjlab.org.cn;purdue.edu;;;tsinghua.edu.cn;tsinghua.edu.cn;pjlab.org.cn;huawei.com;huawei.com;;;huawei.com;pjlab.org.cn", "author_num": 16, "aff_unique_index": "0;1;2;3;4;5;5;3;6;6;6;2", "aff_unique_norm": "OpenDriveLab;Beihang University;Shanghai AI Lab;Shanghai AI Laboratory;Purdue University;Tsinghua University;Huawei", "aff_unique_dep": ";;;;;;Noah's Ark Lab", "aff_unique_url": ";http://www.buaa.edu.cn/;https://www.shanghaiailab.com;https://www.shanghai-ai-lab.com;https://www.purdue.edu;https://www.tsinghua.edu.cn;https://www.huawei.com", "aff_unique_abbr": ";BUAA;SAIL;SAIL;Purdue;THU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1;2;1;1;1;1;1;1;1", "aff_country_unique": ";China;United States" }, { "title": "Robust Contrastive Language-Image Pretraining against Data Poisoning and Backdoor Attacks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71818", "id": "ONwL9ucoYG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2232e8fee69b150005ac420bfa83d705-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ONwL9ucoYG", "openreview": "https://openreview.net/forum?id=ONwL9ucoYG", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71818", "video": "https://nips.cc/virtual/2023/poster/71818", "author_site": "Wenhan Yang, Jingdong Gao, Baharan Mirzasoleiman", "tldr": "", "abstract": "Contrastive vision-language representation learning has achieved state-of-the-art performance for zero-shot classification, by learning from millions of image-caption pairs crawled from the internet. However, the massive data that powers large multimodal models such as CLIP, makes them extremely vulnerable to various types of targeted data poisoning and backdoor attacks. Despite this vulnerability, robust contrastive vision-language pre-training against such attacks has remained unaddressed. In this work, we propose RoCLIP, the first effective method for robust pre-training multimodal vision-language models against targeted data poisoning and backdoor attacks. RoCLIP effectively breaks the association between poisoned image-caption pairs by considering a relatively large and varying pool of random captions, and matching every image with the text that is most similar to it in the pool instead of its own caption, every few epochs.It also leverages image and text augmentations to further strengthen the defense and improve the performance of the model. Our extensive experiments show that RoCLIP renders state-of-the-art targeted data poisoning and backdoor attacks ineffective during pre-training CLIP models. In particular, RoCLIP decreases the success rate for targeted data poisoning attacks from 93.75% to 12.5% and that of backdoor attacks down to 0%, while improving the model's linear probe performance by 10% and maintains a similar zero shot performance compared to CLIP. By increasing the frequency of matching, RoCLIP is able to defend strong attacks, which add up to 1% poisoned examples to the data, and successfully maintain a low attack success rate of 12.5%, while trading off the performance on some tasks.", "keywords": "Contrastive Learning;Adversarial Learning;Model Robustness", "primary_area": "", "supplementary_material": "", "author": "Wenhan Yang;Jingdong Gao;Baharan Mirzasoleiman", "authorids": "~Wenhan_Yang5;~Jingdong_Gao1;~Baharan_Mirzasoleiman1", "gender": "M;;F", "homepage": ";https://github.com/mxuan0;http://web.cs.ucla.edu/~baharan/", "dblp": ";;52/10075", "google_scholar": ";;x63j7HEAAAAJ", "orcid": ";;", "linkedin": "wenhan-yang-6413981b4/;;", "or_profile": "~Wenhan_Yang5;~Jingdong_Gao1;~Baharan_Mirzasoleiman1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu;ucla.edu", "position": "PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nyang2023robust,\ntitle={Robust Contrastive Language-Image Pretraining against Data Poisoning and Backdoor Attacks},\nauthor={Wenhan Yang and Jingdong Gao and Baharan Mirzasoleiman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ONwL9ucoYG}\n}", "github": "", "project": "", "reviewers": "99eL;e25B;cYJD;8GQL", "pdf_size": 4262823, "rating": "5;6;6;7", "confidence": "4;4;3;4", "soundness": "3;3;3;2", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "119;68;50;121", "wc_strengths": "90;122;33;31", "wc_weaknesses": "66;232;217;182", "wc_questions": "71;4;21;58", "wc_limitations": "7;4;6;11", "wc_review": "353;430;327;403", "wc_reply_reviewers": "25;12;22;122", "wc_reply_authors": "302;105;1166;349", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;3;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.5, 31.164884084494844 ], "wc_strengths_avg": [ 69.0, 38.6975451417787 ], "wc_weaknesses_avg": [ 174.25, 65.07831820199412 ], "wc_questions_avg": [ 38.5, 27.07858932810201 ], "wc_limitations_avg": [ 7.0, 2.5495097567963922 ], "wc_review_avg": [ 378.25, 40.48070528041724 ], "wc_reply_reviewers_avg": [ 45.25, 44.572272771309294 ], "wc_reply_authors_avg": [ 480.5, 406.2219221066239 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11907019299281685364&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "ucla.edu;ucla.edu;ucla.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Are aligned neural networks adversarially aligned?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71817", "id": "OQQoD8Vc3B", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c1f0b856a35986348ab3414177266f75-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OQQoD8Vc3B", "openreview": "https://openreview.net/forum?id=OQQoD8Vc3B", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71817", "video": "https://nips.cc/virtual/2023/poster/71817", "author_site": "Nicholas Carlini, Milad Nasr, Christopher A. Choquette-Choo, Matthew Jagielski, Irena Gao, Pang Wei Koh, Daphne Ippolito, Florian Tramer, Ludwig Schmidt", "tldr": "", "abstract": "Large language models are now tuned to align with the goals of their creators, namely to be \"helpful and harmless.\" These models should respond helpfully to user questions, but refuse to answer requests that could cause harm. However, adversarial users can construct inputs which circumvent attempts at alignment. In this work, we study adversarial alignment, and ask to what extent these models remain aligned when interacting with an adversarial user who constructs worst-case inputs (adversarial examples). These inputs are designed to cause the model to emit harmful content that would otherwise be prohibited.\n\nWe show that existing NLP-based optimization attacks are insufficiently powerful to reliably attack aligned text models: even when current NLP-based attacks fail, we can find adversarial inputs with brute force. As a result, the failure of current attacks should not be seen as proof that aligned text models remain aligned under adversarial inputs. However the recent trend in large-scale ML models is multimodal models that allow users to provide images that influence the text that is generated. We show these models can be easily attacked, i.e., induced to perform arbitrary un-aligned behavior through adversarial perturbation of the input image. We conjecture that improved NLP attacks may demonstrate this same level of adversarial control over text-only models.", "keywords": "Adversarial examples;large language models;alignment", "primary_area": "", "supplementary_material": "/attachment/22788ed5964ff87adf53c52b0ab931f9848c1a2e.pdf", "author": "Nicholas Carlini;Milad Nasr;Christopher A. Choquette-Choo;Matthew Jagielski;Irena Gao;Pang Wei Koh;Daphne Ippolito;Florian Tram\u00e8r;Ludwig Schmidt", "authorids": "~Nicholas_Carlini1;~Milad_Nasr2;~Christopher_A._Choquette-Choo1;~Matthew_Jagielski1;~Irena_Gao1;~Pang_Wei_Koh1;~Daphne_Ippolito1;~Florian_Tram\u00e8r1;~Ludwig_Schmidt1", "gender": ";;M;M;;M;F;M;M", "homepage": "http://nicholas.carlini.com;https://people.cs.umass.edu/~milad/;https://www.christopherchoquette.com;https://jagielski.github.io/;https://i-gao.github.io;http://cs.stanford.edu/~pangwei;http://www.daphnei.com;http://people.csail.mit.edu/ludwigs/;http://floriantramer.com", "dblp": "145/1806;;250/9674;218/5156;193/1492;10/10453;192/2031.html;141/2720;158/7224", "google_scholar": ";k6-nvDAAAAAJ;oDE4I64AAAAJ;_8rw_GMAAAAJ;;Nn990CkAAAAJ;;SWMKy70AAAAJ;https://scholar.google.ch/citations?user=ijH0-a8AAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;christopher-choquette-choo/;;;;;ludwig-schmidt-87ba3612/;", "or_profile": "~Nicholas_Carlini1;~Milad_Nasr2;~Christopher_A._Choquette-Choo1;~Matthew_Jagielski1;~Irena_Gao1;~Pang_Wei_Koh1;~Daphne_Ippolito1;~Ludwig_Schmidt1;~Florian_Tramer1", "aff": "Google;Google;Google Research, Brain Team;Google;Stanford University;Google;Carnegie Mellon University;Allen Institute for Artificial Intelligence;ETHZ - ETH Zurich", "aff_domain": "google.com;google.com;google.com;google.com;stanford.edu;google.com;cmu.edu;allenai.org;ethz.ch", "position": "Researcher;Researcher;Researcher;Researcher;Undergrad student;Researcher;Assistant Professor;Researcher;Assistant Professor", "bibtex": "@inproceedings{\ncarlini2023are,\ntitle={Are aligned neural networks adversarially aligned?},\nauthor={Nicholas Carlini and Milad Nasr and Christopher A. Choquette-Choo and Matthew Jagielski and Irena Gao and Pang Wei Koh and Daphne Ippolito and Florian Tram{\\`e}r and Ludwig Schmidt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OQQoD8Vc3B}\n}", "github": "", "project": "", "reviewers": "n3yx;NFJs;5kqR;DTQ2", "pdf_size": 1201470, "rating": "4;5;6;6", "confidence": "5;4;5;4", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "1;1;3;3", "wc_summary": "38;387;76;142", "wc_strengths": "21;58;93;96", "wc_weaknesses": "208;222;355;229", "wc_questions": "218;3;18;99", "wc_limitations": "6;1;8;1", "wc_review": "491;671;550;567", "wc_reply_reviewers": "0;27;102;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 1.0 ], "wc_summary_avg": [ 160.75, 135.82226437517525 ], "wc_strengths_avg": [ 67.0, 30.47129797038518 ], "wc_weaknesses_avg": [ 253.5, 59.08680055646946 ], "wc_questions_avg": [ 84.5, 85.28921385497699 ], "wc_limitations_avg": [ 4.0, 3.082207001484488 ], "wc_review_avg": [ 569.75, 64.90521935869256 ], "wc_reply_reviewers_avg": [ 32.25, 41.75149697915034 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 312, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3768131676399480172&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "google.com;google.com;google.com;google.com;stanford.edu;google.com;cmu.edu;allenai.org;ethz.ch", "author_num": 9, "aff_unique_index": "0;0;0;0;1;0;2;3;4", "aff_unique_norm": "Google;Stanford University;Carnegie Mellon University;Allen Institute for Artificial Intelligence;ETH Zurich", "aff_unique_dep": "Google;;;;", "aff_unique_url": "https://www.google.com;https://www.stanford.edu;https://www.cmu.edu;https://allenai.org;https://www.ethz.ch", "aff_unique_abbr": "Google;Stanford;CMU;AI2;ETHZ", "aff_campus_unique_index": "0;0;0;0;1;0", "aff_campus_unique": "Mountain View;Stanford;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;1", "aff_country_unique": "United States;Switzerland" }, { "title": "Rigorous Runtime Analysis of MOEA/D for Solving Multi-Objective Minimum Weight Base Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71816", "id": "ORmVvN94B9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/72416ded78a439907ff72165ac9c56e0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ORmVvN94B9", "openreview": "https://openreview.net/forum?id=ORmVvN94B9", "poster": "/media/PosterPDFs/NeurIPS%202023/71816.png?t=1702188788.0406876", "slides": "https://nips.cc/virtual/2023/poster/71816", "video": "https://nips.cc/virtual/2023/poster/71816", "author_site": "Anh Viet Do, Aneta Neumann, Frank Neumann, Andrew Sutton", "tldr": "", "abstract": "We study the multi-objective minimum weight base problem, an abstraction of classical NP-hard combinatorial problems such as the multi-objective minimum spanning tree problem. We prove some important properties of the convex hull of the non-dominated front, such as its approximation quality and an upper bound on the number of extreme points. Using these properties, we give the first run-time analysis of the MOEA/D algorithm for this problem, an evolutionary algorithm that effectively optimizes by decomposing the objectives into single-objective components. We show that the MOEA/D, given an appropriate decomposition setting, finds all extreme points within expected fixed-parameter polynomial time, in the oracle model. Experiments are conducted on random bi-objective minimum spanning tree instances, and the results agree with our theoretical findings. Furthermore, compared with a previously studied evolutionary algorithm for the problem GSEMO, MOEA/D finds all extreme points much faster across all instances.", "keywords": "minimum weight base problem;multi-objective optimization;approximation;evolutionary algorithm", "primary_area": "", "supplementary_material": "/attachment/3b6569ef43edeeaafbbab3628f69380a15c2773a.zip", "author": "Anh Viet Do;Aneta Neumann;Frank Neumann;Andrew M. Sutton", "authorids": "~Anh_Viet_Do1;~Aneta_Neumann1;~Frank_Neumann1;~Andrew_M._Sutton1", "gender": "M;F;M;M", "homepage": ";https://researchers.adelaide.edu.au/profile/aneta.neumann;;https://www.d.umn.edu/~amsutton", "dblp": "263/3148;179/2274;n/FrankNeumann;45/1948", "google_scholar": ";rjWkgoAAAAAJ;;py3kTykAAAAJ", "orcid": "0000-0003-3850-1671;0000-0002-0036-4782;;0000-0003-1295-6715", "linkedin": ";anetaneumann/;;", "or_profile": "~Anh_Viet_Do1;~Aneta_Neumann1;~Frank_Neumann1;~Andrew_M._Sutton1", "aff": "University of Adelaide;The University of Adelaide;University of Adelaide;University of Minnesota Duluth", "aff_domain": "adelaide.edu.au;adelaide.edu.au;adelaide.edu.au;d.umn.edu", "position": "PhD student;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ndo2023rigorous,\ntitle={Rigorous Runtime Analysis of {MOEA}/D for Solving Multi-Objective Minimum Weight Base Problems},\nauthor={Anh Viet Do and Aneta Neumann and Frank Neumann and Andrew M. Sutton},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ORmVvN94B9}\n}", "github": "", "project": "", "reviewers": "rEPr;EzWR;8vmn;RGyY;vbRF", "pdf_size": 260239, "rating": "4;6;6;7;7", "confidence": "4;4;3;4;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;2;2;3;2", "wc_summary": "55;121;48;18;79", "wc_strengths": "18;84;47;54;166", "wc_weaknesses": "155;66;51;17;247", "wc_questions": "26;65;96;86;53", "wc_limitations": "8;11;22;1;1", "wc_review": "262;347;264;176;546", "wc_reply_reviewers": "23;14;0;249;20", "wc_reply_authors": "0;0;0;189;0", "reply_reviewers": "1;1;0;4;1", "reply_authors": "1;1;1;3;1", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 64.2, 34.42905749508691 ], "wc_strengths_avg": [ 73.8, 50.653331578485535 ], "wc_weaknesses_avg": [ 107.2, 83.45154282576208 ], "wc_questions_avg": [ 65.2, 24.766105870725823 ], "wc_limitations_avg": [ 8.6, 7.761443164772902 ], "wc_review_avg": [ 319.0, 125.72668769994699 ], "wc_reply_reviewers_avg": [ 61.2, 94.23247847743367 ], "wc_reply_authors_avg": [ 37.8, 75.6 ], "reply_reviewers_avg": [ 1.4, 1.3564659966250536 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.372677996249965, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8126105769298758083&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "adelaide.edu.au;adelaide.edu.au;adelaide.edu.au;d.umn.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Adelaide;University of Minnesota Duluth", "aff_unique_dep": ";", "aff_unique_url": "https://www.adelaide.edu.au;https://d.umn.edu", "aff_unique_abbr": "Adelaide;UMD", "aff_campus_unique_index": "1", "aff_campus_unique": ";Duluth", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Australia;United States" }, { "title": "QLoRA: Efficient Finetuning of Quantized LLMs", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71815", "id": "OUIFPHEgJU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1feb87871436031bdc0f2beaa62a049b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OUIFPHEgJU", "openreview": "https://openreview.net/forum?id=OUIFPHEgJU", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71815", "video": "https://nips.cc/virtual/2023/poster/71815", "author_site": "Tim Dettmers, Artidoro Pagnoni, Ari Holtzman, Luke Zettlemoyer", "tldr": "", "abstract": "We present QLoRA, an efficient finetuning approach that reduces memory usage enough to finetune a 65B parameter model on a single 48GB GPU while preserving full 16-bit finetuning task performance. QLoRA backpropagates gradients through a frozen, 4-bit quantized pretrained language model into Low Rank Adapters~(LoRA). Our best model family, which we name Guanaco, outperforms all previous openly released models on the Vicuna benchmark, reaching 99.3% of the performance level of ChatGPT while only requiring 24 hours of finetuning on a single GPU. QLoRA introduces a number of innovations to save memory without sacrificing performance: (a) 4-bit NormalFloat (NF4), a new data type that is information-theoretically optimal for normally distributed weights (b) Double Quantization to reduce the average memory footprint by quantizing the quantization constants, and (c) Paged Optimziers to manage memory spikes. We use QLoRA to finetune more than 1,000 models, providing a detailed analysis of instruction following and chatbot performance across 8 instruction datasets, multiple model types (LLaMA, T5), and model scales that would be infeasible to run with regular finetuning (e.g. 33B and 65B parameter models). Our results show that QLoRA finetuning on a small, high-quality dataset leads to state-of-the-art results, even when using smaller models than the previous SoTA. We provide a detailed analysis of chatbot performance based on both human and GPT-4 evaluations, showing that GPT-4 evaluations are a cheap and reasonable alternative to human evaluation. Furthermore, we find that current chatbot benchmarks are not trustworthy to accurately evaluate the performance levels of chatbots. A lemon-picked analysis demonstrates where Guanaco fails compared to ChatGPT. We release all of our models and code, including CUDA kernels for 4-bit training.", "keywords": "finetuning;llama;instructions;quantization", "primary_area": "", "supplementary_material": "/attachment/fd52cb849c593de766b9b189e7de1a4e7eb7df27.pdf", "author": "Tim Dettmers;Artidoro Pagnoni;Ari Holtzman;Luke Zettlemoyer", "authorids": "~Tim_Dettmers2;~Artidoro_Pagnoni1;~Ari_Holtzman1;~Luke_Zettlemoyer1", "gender": "M;M;M;M", "homepage": "https://timdettmers.com/;https://artidoro.github.io/;http://ariholtzman.com;https://www.cs.washington.edu/people/faculty/lsz/", "dblp": "172/1045;223/9977;https://dblp.uni-trier.de/pers/hd/h/Holtzman:Ari;21/6793", "google_scholar": "lHI3w5kAAAAJ;oLXBw0YAAAAJ;https://scholar.google.com/citations?authuser=2;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ", "orcid": ";;;", "linkedin": ";artidoro-pagnoni/;;luke-zettlemoyer-a0109b226/", "or_profile": "~Tim_Dettmers2;~Artidoro_Pagnoni1;~Ari_Holtzman1;~Luke_Zettlemoyer1", "aff": "University of Washington;University of Washington;Department of Computer Science, University of Washington;Meta", "aff_domain": "cs.washington.edu;uw.edu;cs.was;meta.com", "position": "PhD student;PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\ndettmers2023qlora,\ntitle={{QL}o{RA}: Efficient Finetuning of Quantized {LLM}s},\nauthor={Tim Dettmers and Artidoro Pagnoni and Ari Holtzman and Luke Zettlemoyer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OUIFPHEgJU}\n}", "github": "", "project": "", "reviewers": "uL2c;pPZQ;enuE;hHMr", "pdf_size": 1110975, "rating": "7;7;8;9", "confidence": "5;3;4;5", "soundness": "3;3;3;4", "novelty": "4;3;4;4", "presentation": "3;2;3;4", "wc_summary": "50;50;63;73", "wc_strengths": "46;55;65;116", "wc_weaknesses": "11;43;10;193", "wc_questions": "2;57;59;48", "wc_limitations": "2;11;1;9", "wc_review": "111;216;198;439", "wc_reply_reviewers": "0;0;0;7", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 59.0, 9.669539802906858 ], "wc_strengths_avg": [ 70.5, 27.115493725912497 ], "wc_weaknesses_avg": [ 64.25, 75.50951926744071 ], "wc_questions_avg": [ 41.5, 23.178653972998518 ], "wc_limitations_avg": [ 5.75, 4.322904116447646 ], "wc_review_avg": [ 241.0, 121.01446194566995 ], "wc_reply_reviewers_avg": [ 1.75, 3.031088913245535 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4545454545454545, "gs_citation": 2834, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8079217396100949644&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "cs.washington.edu;uw.edu;cs.was;meta.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Washington;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.washington.edu;https://meta.com", "aff_unique_abbr": "UW;Meta", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Laughing Hyena Distillery: Extracting Compact Recurrences From Convolutions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71814", "id": "OWELckerm6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/371355cd42caaf83412c3fbef4688979-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OWELckerm6", "openreview": "https://openreview.net/forum?id=OWELckerm6", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71814", "video": "https://nips.cc/virtual/2023/poster/71814", "author_site": "Stefano Massaroli, Michael Poli, Dan Fu, Hermann Kumbong, Rom Parnichkun, David Romero, Aman Timalsina, Quinn McIntyre, Beidi Chen, Atri Rudra, Ce Zhang, Ce Zhang, Christopher R\u00e9, Stefano Ermon, Yoshua Bengio", "tldr": "", "abstract": "Recent advances in attention-free sequence models rely on convolutions as alternatives to the attention operator at the core of Transformers. In particular, long convolution sequence models have achieved state-of-the-art performance in many domains, but incur a significant cost during auto-regressive inference workloads -- naively requiring a full pass (or caching of activations) over the input sequence for each generated token -- similarly to attention-based models. In this paper, we seek to enable $\\mathcal O(1)$ compute and memory cost per token in any pre-trained long convolution architecture to reduce memory footprint and increase throughput during generation. Concretely, our methods consist in extracting low-dimensional linear state-space models from each convolution layer, building upon rational interpolation and model-order reduction techniques. We further introduce architectural improvements to convolution-based layers such as Hyena: by weight-tying the filters across channels into heads, we achieve higher pre-training quality and reduce the number of filters to be distilled. The resulting model achieves 10x higher throughput than Transformers and 1.5x higher than Hyena at 1.3B parameters, without any loss in quality after distillation.", "keywords": "Long convolutions;recurrence;attention;language models;signal processing;throughput;auto-regressive generation", "primary_area": "", "supplementary_material": "", "author": "Stefano Massaroli;Michael Poli;Daniel Y Fu;Hermann Kumbong;Rom Nishijima Parnichkun;David W. Romero;Aman Timalsina;Quinn McIntyre;Beidi Chen;Atri Rudra;Ce Zhang;Christopher Re;Stefano Ermon;Yoshua Bengio", "authorids": "~Stefano_Massaroli1;~Michael_Poli1;~Daniel_Y_Fu1;~Hermann_Kumbong1;~Rom_Nishijima_Parnichkun1;~David_W._Romero1;~Aman_Timalsina1;~Quinn_McIntyre1;~Beidi_Chen1;~Atri_Rudra1;~Ce_Zhang1;~Christopher_Re1;~Stefano_Ermon1;~Yoshua_Bengio1", "gender": ";M;;M;M;M;M;M;F;M;;;M;M", "homepage": ";;;https://kumbong.github.io/;https://github.com/ruke1ire;https://davidwromero.xyz/;;;https://www.andrew.cmu.edu/user/beidic/;http://www.cse.buffalo.edu/faculty/atri/;;;http://cs.stanford.edu/~ermon/;http://yoshuabengio.org", "dblp": ";;;359/5994;359/5796;254/1396;;;192/1339;04/4980;97/919;;47/8135;56/953", "google_scholar": "IwCfl4UAAAAJ;RgIBwboAAAAJ;;NnL2qHgAAAAJ;https://scholar.google.com/citations?hl=en;7tdzmVoAAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.com.tw/citations?user=_e5H8IoAAAAJ;;;;kukA0LcAAAAJ", "orcid": ";;;;;;;;;;;;;", "linkedin": ";;;hermannkumbong/;;david-w-romero-05893567/;;quinn-mcintyre-b25a75254;;;;;;yoshuabengio/?originalSubdomain=ca", "or_profile": "~Stefano_Massaroli1;~Michael_Poli1;~Daniel_Y_Fu1;~Hermann_Kumbong1;~Rom_Nishijima_Parnichkun1;~David_W._Romero1;~Aman_Timalsina1;~Quinn_McIntyre1;~Beidi_Chen1;~Atri_Rudra1;~Ce_Zhang1;~Christopher_Re1;~Stefano_Ermon1;~Yoshua_Bengio1", "aff": "MILA;Stanford University;;Stanford University;The University of Tokyo;Vrije Universiteit Amsterdam;Purdue University;Stanford University;Meta Facebook;State University of New York, Buffalo;University of Chicago;;Stanford University;University of Montreal", "aff_domain": "mila.quebec;stanford.edu;;stanford.edu;u-tokyo.ac.jp;vu.nl;purdue.edu;stanford.edu;fb.com;buffalo.edu;uchicago.edu;;stanford.edu;umontreal.ca", "position": "Postdoc;PhD student;;MS student;PhD student;PhD student;MS student;Undergrad student;Researcher;Professor;Associate Professor;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nmassaroli2023laughing,\ntitle={Laughing Hyena Distillery: Extracting Compact Recurrences From Convolutions},\nauthor={Stefano Massaroli and Michael Poli and Daniel Y Fu and Hermann Kumbong and Rom Nishijima Parnichkun and David W. Romero and Aman Timalsina and Quinn McIntyre and Beidi Chen and Atri Rudra and Ce Zhang and Christopher Re and Stefano Ermon and Yoshua Bengio},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OWELckerm6}\n}", "github": "", "project": "", "reviewers": "n6Pj;6dqz;wEPT;4kyN", "pdf_size": 11151091, "rating": "6;7;7;7", "confidence": "4;4;2;5", "soundness": "3;4;3;4", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "92;81;109;48", "wc_strengths": "28;81;35;15", "wc_weaknesses": "111;255;52;15", "wc_questions": "335;121;20;14", "wc_limitations": "1;17;33;5", "wc_review": "567;555;249;97", "wc_reply_reviewers": "0;63;0;30", "wc_reply_authors": "45;11;33;17", "reply_reviewers": "0;1;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.5, 22.276669409945463 ], "wc_strengths_avg": [ 39.75, 24.873429598670143 ], "wc_weaknesses_avg": [ 108.25, 91.38209616768484 ], "wc_questions_avg": [ 122.5, 129.8431746377144 ], "wc_limitations_avg": [ 14.0, 12.449899597988733 ], "wc_review_avg": [ 367.0, 201.35044077428785 ], "wc_reply_reviewers_avg": [ 23.25, 26.013217794036937 ], "wc_reply_authors_avg": [ 26.5, 13.369741957120938 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8064799336379828276&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 8, "email": "mila.quebec;stanford.edu;;stanford.edu;u-tokyo.ac.jp;vu.nl;purdue.edu;stanford.edu;fb.com;buffalo.edu;uchicago.edu;;stanford.edu;umontreal.ca", "author_num": 14, "aff_unique_index": "0;1;1;2;3;4;1;5;6;7;1;8", "aff_unique_norm": "Mila;Stanford University;University of Tokyo;Vrije Universiteit Amsterdam;Purdue University;Meta;State University of New York at Buffalo;University of Chicago;University of Montreal", "aff_unique_dep": ";;;;;Meta Platforms, Inc.;;;", "aff_unique_url": "https://mila.quebec;https://www.stanford.edu;https://www.u-tokyo.ac.jp;https://www.vu.nl;https://www.purdue.edu;https://meta.com;https://www.buffalo.edu;https://www.uchicago.edu;https://wwwumontreal.ca", "aff_unique_abbr": "MILA;Stanford;UTokyo;VU Amsterdam;Purdue;Meta;SUNY Buffalo;UChicago;UM", "aff_campus_unique_index": "1;1;1;2;1", "aff_campus_unique": ";Stanford;Buffalo", "aff_country_unique_index": "0;1;1;2;3;1;1;1;1;1;1;0", "aff_country_unique": "Canada;United States;Japan;Netherlands" }, { "title": "A Dataset for Analyzing Streaming Media Performance over HTTP/3 Browsers", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73606", "id": "OXOLiS0ak6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f5da8ac52cf8857157c63c4803b6690b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=OXOLiS0ak6", "openreview": "https://openreview.net/forum?id=OXOLiS0ak6", "poster": "/media/PosterPDFs/NeurIPS%202023/73606.png?t=1702485552.8500962", "slides": "https://nips.cc/virtual/2023/poster/73606", "video": "https://nips.cc/virtual/2023/poster/73606", "author_site": "Sapna Chaudhary, Mukulika Maity, Sandip Chakraborty, Naval Shukla", "tldr": "", "abstract": "HTTP/3 is a new application layer protocol supported by most browsers. It uses QUIC as an underlying transport protocol. QUIC provides multiple benefits, like faster connection establishment, reduced latency, and improved connection migration. Hence, most popular browsers like Chrome/Chromium, Microsoft Edge, Apple Safari, and Mozilla Firefox have started supporting it. In this paper, we present an HTTP/3-supported browser dataset collection tool named H3B. It collects the application and network-level logs during YouTube streaming. We consider YouTube, as it the most popular video streaming application supporting QUIC. Using this tool, we collected a dataset of over 5936 YouTube sessions covering 5464 hours of streaming over 5 different geographical locations and 5 different bandwidth patterns. We believe our tool and as well as the dataset could be used in multiple applications such as a better configuration of application/transport protocols based on the network conditions, intelligent integration of network and application, predicting YouTube's QoE etc. \nWe analyze the dataset and observe that during an HTTP/3 streaming not all requests are served by HTTP/3. Instead whenever the network condition is not favorable the browser chooses to fallback, and the application requests are transmitted using HTTP/2 over the old-standing transport protocol TCP. We observe that such switching of protocols impacts the performance of video streaming applications.", "keywords": "HTTP/3;QUIC;Measurement tool;Streaming Media Dataset;YouTube", "primary_area": "", "supplementary_material": "/attachment/462a2b648cec0dceb32f1755c5b80e1ee46a0c60.pdf", "author": "Sapna Chaudhary;Mukulika Maity;Sandip Chakraborty;Naval Kumar Shukla", "authorids": "~Sapna_Chaudhary1;~Mukulika_Maity1;~Sandip_Chakraborty1;naval19065@iiitd.ac.in", "gender": "F;F;M;", "homepage": ";https://www.iiitd.ac.in/mukulika;http://cse.iitkgp.ac.in/~sandipc/;", "dblp": ";;28/9571;", "google_scholar": "l9qK0CkAAAAJ;https://scholar.google.com.tw/citations?user=F5sooVMAAAAJ;https://scholar.google.com.tw/citations?user=dEpbTokAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Sapna_Chaudhary1;~Mukulika_Maity1;~Sandip_Chakraborty1;naval19065@iiitd.ac.in", "aff": "Indraprastha Institute of Information Technology, Delhi;Indraprastha Institute of Information Technology, Delhi;Indian Institute of Technology Kharagpur;", "aff_domain": "iiitd.ac.in;iiitd.ac.in;iitkgp.ac.in;", "position": "PhD student;Assistant Professor;Associate Professor;", "bibtex": "@inproceedings{\nchaudhary2023a,\ntitle={A Dataset for Analyzing Streaming Media Performance over {HTTP}/3 Browsers},\nauthor={Sapna Chaudhary and Mukulika Maity and Sandip Chakraborty and Naval Kumar Shukla},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=OXOLiS0ak6}\n}", "github": "", "project": "", "reviewers": "5bNA;WFF1;q9Aw;eSYT;dwAt", "pdf_size": 926820, "rating": "3;5;6;7;8", "confidence": "5;3;3;3;4", "wc_summary_and_contributions": "39;74;213;150;27", "wc_strengths": "39;59;81;40;47", "wc_improvement": "22;88;139;42;215", "wc_limitations": "124;58;268;59;31", "wc_correctness": "11;17;14;8;6", "wc_clarity": "20;11;90;2;10", "wc_relation_to_prior_work": "21;32;9;2;12", "wc_documentation": "1;37;52;1;33", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "278;377;867;305;382", "wc_reply_reviewers": "0;16;310;0;375", "wc_reply_authors": "744;711;1080;40;1270", "reply_reviewers": "0;1;1;0;2", "reply_authors": "1;1;2;1;3", "rating_avg": [ 5.8, 1.7204650534085253 ], "confidence_avg": [ 3.6, 0.8 ], "wc_summary_and_contributions_avg": [ 100.6, 70.70106081240931 ], "wc_strengths_avg": [ 53.2, 15.625619987699688 ], "wc_improvement_avg": [ 101.2, 69.75786693986564 ], "wc_limitations_avg": [ 108.0, 85.65745735194338 ], "wc_correctness_avg": [ 11.2, 3.9698866482558417 ], "wc_clarity_avg": [ 26.6, 32.20931542271583 ], "wc_relation_to_prior_work_avg": [ 15.2, 10.380751417888783 ], "wc_documentation_avg": [ 24.8, 20.439178065665946 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 441.8, 216.3861363396463 ], "wc_reply_reviewers_avg": [ 140.2, 166.55377509981574 ], "wc_reply_authors_avg": [ 769.0, 420.2741962100457 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4940524646612318, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7MnOdJThsp8J:scholar.google.com/&scioq=A+Dataset+for+Analyzing+Streaming+Media+Performance+over+HTTP/3+Browsers&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "iiitd.ac.in;iiitd.ac.in;iitkgp.ac.in;", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Indraprastha Institute of Information Technology;Indian Institute of Technology Kharagpur", "aff_unique_dep": ";", "aff_unique_url": "http://www.iiitd.ac.in;https://www.iitkgp.ac.in", "aff_unique_abbr": "IIIT-D;IIT Kharagpur", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Delhi;Kharagpur", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "title": "Sub-optimality of the Naive Mean Field approximation for proportional high-dimensional Linear Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71813", "id": "OXhymu6MeN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c9a7214961b9bd0ee93755bfa0abcea7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OXhymu6MeN", "openreview": "https://openreview.net/forum?id=OXhymu6MeN", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71813", "video": "https://nips.cc/virtual/2023/poster/71813", "tldr": "", "abstract": "The Na\u00efve Mean Field (NMF) approximation is widely employed in modern Machine Learning due to the huge computational gains it bestows on the statistician. Despite its popularity in practice, theoretical guarantees for high-dimensional problems are only available under strong structural assumptions (e.g. sparsity). Moreover, existing theory often does not explain empirical observations noted in the existing literature. \n \nIn this paper, we take a step towards addressing these problems by deriving sharp asymptotic characterizations for the NMF approximation in high-dimensional linear regression. Our results apply to a wide class of natural priors and allow for model mismatch (i.e. the underlying statistical model can be different from the fitted model). We work under an iid Gaussian design and the proportional asymptotic regime, where the number of features and number of observations grow at a proportional rate. As a consequence of our asymptotic characterization, we establish two concrete corollaries: (a) we establish the inaccuracy of the NMF approximation for the log-normalizing constant in this regime, and (b) we provide theoretical results backing the empirical observation that the NMF approximation can be overconfident in terms of uncertainty quantification.\n\nOur results utilize recent advances in the theory of Gaussian comparison inequalities. To the best of our knowledge, this is the first application of these ideas to the analysis of Bayesian variational inference problems. Our theoretical results are corroborated by numerical experiments. Lastly, we believe our results can be generalized to non-Gaussian designs and provide empirical evidence to support it.", "keywords": "Variational Bayes; Naive Mean Field; Gaussian comparison inequalities; High-dimensional statistics; Proportional asymptotic.", "primary_area": "", "supplementary_material": "/attachment/e0694baa11be177f2d575c65233a31c4f2e31254.zip", "author": "Jiaze Qiu", "authorids": "~Jiaze_Qiu1", "gender": "", "homepage": "https://www.jiazeqiu.com", "dblp": "", "google_scholar": "", "orcid": "0000-0003-3895-1859", "linkedin": "", "or_profile": "~Jiaze_Qiu1", "aff": "Harvard University, Harvard University", "aff_domain": "g.harvard.edu", "position": "PhD student", "bibtex": "@inproceedings{\nqiu2023suboptimality,\ntitle={Sub-optimality of the Naive Mean Field approximation for proportional high-dimensional Linear Regression},\nauthor={Jiaze Qiu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OXhymu6MeN}\n}", "github": "", "project": "", "reviewers": "Qw44;JMXP;6Ztj;zux4", "pdf_size": 687341, "rating": "5;7;7;7", "confidence": "3;3;3;4", "soundness": "3;4;3;3", "novelty": "3;4;3;3", "presentation": "2;2;3;3", "wc_summary": "54;48;118;81", "wc_strengths": "40;54;89;38", "wc_weaknesses": "69;116;211;138", "wc_questions": "152;69;57;113", "wc_limitations": "19;5;1;1", "wc_review": "334;292;476;371", "wc_reply_reviewers": "0;12;32;128", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 75.25, 27.63489641739227 ], "wc_strengths_avg": [ 55.25, 20.437404434027332 ], "wc_weaknesses_avg": [ 133.5, 51.21767273119699 ], "wc_questions_avg": [ 97.75, 37.62562291843153 ], "wc_limitations_avg": [ 6.5, 7.399324293474371 ], "wc_review_avg": [ 368.25, 68.19961510155318 ], "wc_reply_reviewers_avg": [ 43.0, 50.38849074937649 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7055544383493532455&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "g.harvard.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Scale Alone Does not Improve Mechanistic Interpretability in Vision Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71812", "id": "OZ7aImD4uQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b4aadf04d6fde46346db455402860708-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OZ7aImD4uQ", "openreview": "https://openreview.net/forum?id=OZ7aImD4uQ", "poster": "/media/PosterPDFs/NeurIPS%202023/71812.png?t=1701687771.2176948", "slides": "https://nips.cc/virtual/2023/poster/71812", "video": "https://nips.cc/virtual/2023/poster/71812", "author_site": "Roland S. Zimmermann, Thomas Klein, Wieland Brendel", "tldr": "", "abstract": "In light of the recent widespread adoption of AI systems, understanding the internal information processing of neural networks has become increasingly critical. Most recently, machine vision has seen remarkable progress by scaling neural networks to unprecedented levels in dataset and model size. We here ask whether this extraordinary increase in scale also positively impacts the field of mechanistic interpretability. In other words, has our understanding of the inner workings of scaled neural networks improved as well? We use a psychophysical paradigm to quantify one form of mechanistic interpretability for a diverse suite of nine models and find no scaling effect for interpretability - neither for model nor dataset size. Specifically, none of the investigated state-of-the-art models are easier to interpret than the GoogLeNet model from almost a decade ago. Latest-generation vision models appear even less interpretable than older architectures, hinting at a regression rather than improvement, with modern models sacrificing interpretability for accuracy. These results highlight the need for models explicitly designed to be mechanistically interpretable and the need for more helpful interpretability methods to increase our understanding of networks at an atomic level. We release a dataset containing more than 130'000 human responses from our psychophysical evaluation of 767 units across nine models. This dataset facilitates research on automated instead of human-based interpretability evaluations, which can ultimately be leveraged to directly optimize the mechanistic interpretability of models.", "keywords": "feature visualization;interpretability;explainability;deep learning;neural networks;analysis;activation maximization;psychophysics", "primary_area": "", "supplementary_material": "", "author": "Roland S. Zimmermann;Thomas Klein;Wieland Brendel", "authorids": "~Roland_S._Zimmermann1;~Thomas_Klein1;~Wieland_Brendel1", "gender": ";;M", "homepage": ";;", "dblp": ";;37/11107", "google_scholar": ";;v-JL-hsAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Roland_S._Zimmermann1;~Thomas_Klein1;~Wieland_Brendel1", "aff": ";;Max-Planck-Institute for Intelligent Systems, Max-Planck Institute", "aff_domain": ";;is.mpg.de", "position": ";;Principal Researcher", "bibtex": "@inproceedings{\nzimmermann2023scale,\ntitle={Scale Alone Does not Improve Mechanistic Interpretability in Vision Models},\nauthor={Roland S. Zimmermann and Thomas Klein and Wieland Brendel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OZ7aImD4uQ}\n}", "github": "", "project": "", "reviewers": "6B8X;q3wq;fVHW;fNyx", "pdf_size": 6126837, "rating": "7;7;7;7", "confidence": "4;3;4;5", "soundness": "4;2;3;3", "novelty": "3;2;3;4", "presentation": "3;3;4;4", "wc_summary": "82;109;179;143", "wc_strengths": "51;63;206;173", "wc_weaknesses": "183;11;146;361", "wc_questions": "60;12;154;108", "wc_limitations": "3;27;11;4", "wc_review": "379;222;696;789", "wc_reply_reviewers": "68;25;11;45", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 128.25, 36.409991760504425 ], "wc_strengths_avg": [ 123.25, 67.40317129037773 ], "wc_weaknesses_avg": [ 175.25, 124.8967073224911 ], "wc_questions_avg": [ 83.5, 52.99764145695542 ], "wc_limitations_avg": [ 11.25, 9.60143218483576 ], "wc_review_avg": [ 521.5, 230.2243471051661 ], "wc_reply_reviewers_avg": [ 37.25, 21.47527648250425 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14117083203045009330&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";;is.mpg.de", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Max-Planck-Institute for Intelligent Systems", "aff_unique_dep": "Intelligent Systems", "aff_unique_url": "https://www.mpi-is.mpg.de", "aff_unique_abbr": "MPI-IS", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "SyncDiffusion: Coherent Montage via Synchronized Joint Diffusions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71811", "id": "OZEfMD7axv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9ee3a664ccfeabc0da16ac6f1f1cfe59-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OZEfMD7axv", "openreview": "https://openreview.net/forum?id=OZEfMD7axv", "poster": "/media/PosterPDFs/NeurIPS%202023/71811.png?t=1701876283.64308", "slides": "https://nips.cc/virtual/2023/poster/71811", "video": "https://nips.cc/virtual/2023/poster/71811", "author_site": "Yuseung Lee, Kunho Kim, Hyunjin Kim, Minhyuk Sung", "tldr": "", "abstract": "The remarkable capabilities of pretrained image diffusion models have been utilized not only for generating fixed-size images but also for creating panoramas. However, naive stitching of multiple images often results in visible seams. Recent techniques have attempted to address this issue by performing joint diffusions in multiple windows and averaging latent features in overlapping regions. However, these approaches, which focus on seamless montage generation, often yield incoherent outputs by blending different scenes within a single image. To overcome this limitation, we propose SyncDiffusion, a plug-and-play module that synchronizes multiple diffusions through gradient descent from a perceptual similarity loss. Specifically, we compute the gradient of the perceptual loss using the predicted denoised images at each denoising step, providing meaningful guidance for achieving coherent montages. Our experimental results demonstrate that our method produces significantly more coherent outputs compared to previous methods (66.35% vs. 33.65% in our user study) while still maintaining fidelity (as assessed by GIQA) and compatibility with the input prompt (as measured by CLIP score). We further demonstrate the versatility of our method across three plug-and-play applications: layout-guided image generation, conditional image generation and 360-degree panorama generation. Our project page is at https://syncdiffusion.github.io.", "keywords": "Diffusion model;Text-to-image generation;Panorama generation", "primary_area": "", "supplementary_material": "/attachment/824f9bfa88bc86af02a6d091d0176319d997dc08.pdf", "author": "Yuseung Lee;Kunho Kim;Hyunjin Kim;Minhyuk Sung", "authorids": "~Yuseung_Lee1;~Kunho_Kim3;~Hyunjin_Kim2;~Minhyuk_Sung1", "gender": "M;M;;M", "homepage": "https://phillipinseoul.github.io/;https://soulmates2.github.io/;;https://mhsung.github.io/", "dblp": "389/9579;116/6701;;171/6792", "google_scholar": "h_Jb0wcAAAAJ;3_0fPnwAAAAJ;https://scholar.google.com/citations?hl=ko;PcIYMp4AAAAJ", "orcid": ";0009-0006-4087-2577;;", "linkedin": "yuseung-lee-6b085223a/;%EA%B1%B4%ED%98%B8-%EA%B9%80-192897184/;;mhsung", "or_profile": "~Yuseung_Lee1;~Kunho_Kim3;~Hyunjin_Kim2;~Minhyuk_Sung1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "Undergrad student;MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nlee2023syncdiffusion,\ntitle={SyncDiffusion: Coherent Montage via Synchronized Joint Diffusions},\nauthor={Yuseung Lee and Kunho Kim and Hyunjin Kim and Minhyuk Sung},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OZEfMD7axv}\n}", "github": "", "project": "", "reviewers": "DATV;SgT1;AhXa;X4Nm", "pdf_size": 24395021, "rating": "6;6;7;7", "confidence": "4;4;3;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "67;71;71;91", "wc_strengths": "88;60;72;51", "wc_weaknesses": "174;156;79;14", "wc_questions": "101;2;57;1", "wc_limitations": "8;28;6;1", "wc_review": "438;317;285;158", "wc_reply_reviewers": "100;22;88;21", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.0, 9.38083151964686 ], "wc_strengths_avg": [ 67.75, 13.863170633011771 ], "wc_weaknesses_avg": [ 105.75, 63.86851728355685 ], "wc_questions_avg": [ 40.25, 41.75748435909424 ], "wc_limitations_avg": [ 10.75, 10.280442597476044 ], "wc_review_avg": [ 299.5, 99.6506397370333 ], "wc_reply_reviewers_avg": [ 57.75, 36.49914382557487 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6108415123452346785&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "The Gain from Ordering in Online Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71810", "id": "OaUT4hX40s", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c9b1fe9c41f1eeec3a659154d575a282-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OaUT4hX40s", "openreview": "https://openreview.net/forum?id=OaUT4hX40s", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71810", "video": "https://nips.cc/virtual/2023/poster/71810", "author_site": "Vasilis Kontonis, Mingchen Ma, Christos Tzamos", "tldr": "", "abstract": "We study fixed-design online learning where the learner is allowed to choose the order of the datapoints in order to minimize their regret (aka self-directed online learning). We focus on the fundamental task of online linear regression: the learner is given a dataset $X$ with $n$ examples in $d$ dimensions and at step $t$ they select a point $x_t \\in X$, predict a value $\\widetilde y_t$, and suffer loss $(\\widetilde y_t - w^\\ast \\cdot x_t)^2$. The goal is to design algorithms that order the examples and achieve better \nregret than random- or worst-order online algorithms.\n\nFor an arbitrary dataset $X$, we show that, under the Exponential Time Hypothesis, no efficient algorithm can approximate the optimal (best-order) regret within a factor of $d^{1/\\poly(\\log \\log d)}$.\n\n\nWe then show that, for structured datasets, we can bypass the above hardness result and achieve nearly optimal regret. When the examples of $X$ are drawn i.i.d.\\ from the uniform distribution on the sphere, we present an algorithm based on the greedy heuristic of selecting ``easiest'' examples first that achieves a $\\log d$-approximation of the optimal regret.", "keywords": "Online Learning;Self-directed Learning;Hardness of Approximation", "primary_area": "", "supplementary_material": "/attachment/9e55b7ddcef1a6dba1d099840e15b9abd618fe6c.pdf", "author": "Vasilis Kontonis;Mingchen Ma;Christos Tzamos", "authorids": "~Vasilis_Kontonis1;~Mingchen_Ma1;~Christos_Tzamos1", "gender": "M;;", "homepage": "http://vkonton.github.io/;https://mmingchen.github.io/;https://tzamos.com", "dblp": "203/8777;270/6320;79/8819", "google_scholar": "7_44KWAAAAAJ;w84UnLsAAAAJ;wB01auEAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Vasilis_Kontonis1;~Mingchen_Ma1;~Christos_Tzamos1", "aff": ", University of Texas at Austin;University of Wisconsin - Madison;University of Wisconsin, Madison", "aff_domain": "cs.utexas.edu;wisc.edu;wisc.edu", "position": "Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nkontonis2023the,\ntitle={The Gain from Ordering in Online Learning},\nauthor={Vasilis Kontonis and Mingchen Ma and Christos Tzamos},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OaUT4hX40s}\n}", "github": "", "project": "", "reviewers": "N13f;Kpn9;eLsL;5ur6", "pdf_size": 368662, "rating": "5;6;6;7", "confidence": "1;2;3;4", "soundness": "3;3;4;3", "novelty": "3;3;2;3", "presentation": "3;3;3;2", "wc_summary": "120;327;141;231", "wc_strengths": "4;139;21;84", "wc_weaknesses": "1;97;71;183", "wc_questions": "1;53;114;58", "wc_limitations": "1;5;1;6", "wc_review": "127;621;348;562", "wc_reply_reviewers": "0;94;19;31", "wc_reply_authors": "0;0;0;13", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 204.75, 81.97674975259754 ], "wc_strengths_avg": [ 62.0, 53.52102390649865 ], "wc_weaknesses_avg": [ 88.0, 65.12296062065974 ], "wc_questions_avg": [ 56.5, 40.00312487793922 ], "wc_limitations_avg": [ 3.25, 2.277608394786075 ], "wc_review_avg": [ 414.5, 194.60023124343917 ], "wc_reply_reviewers_avg": [ 36.0, 35.26329536501091 ], "wc_reply_authors_avg": [ 3.25, 5.629165124598851 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9486832980505138, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13178854356955590650&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cs.utexas.edu;wisc.edu;wisc.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Texas at Austin;University of Wisconsin-Madison;University of Wisconsin", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "UT Austin;UW-Madison;UW", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Austin;Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Aging with GRACE: Lifelong Model Editing with Discrete Key-Value Adaptors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71809", "id": "Oc1SIKxwdV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/95b6e2ff961580e03c0a662a63a71812-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Oc1SIKxwdV", "openreview": "https://openreview.net/forum?id=Oc1SIKxwdV", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71809", "video": "https://nips.cc/virtual/2023/poster/71809", "author_site": "Tom Hartvigsen, Swami Sankaranarayanan, Hamid Palangi, Yoon Kim, Marzyeh Ghassemi", "tldr": "", "abstract": "Deployed language models decay over time due to shifting inputs, changing user needs, or emergent world-knowledge gaps. When such problems are identified, we want to make targeted edits while avoiding expensive retraining. However, current model editors, which modify such behaviors of pre-trained models, degrade model performance quickly across multiple, sequential edits. We propose GRACE, a \\textit{lifelong} model editing method, which implements spot-fixes on streaming errors of a deployed model, ensuring minimal impact on unrelated inputs. GRACE writes new mappings into a pre-trained model's latent space, creating a discrete, local codebook of edits without altering model weights. This is the first method enabling thousands of sequential edits using only streaming errors. Our experiments on T5, BERT, and GPT models show GRACE's state-of-the-art performance in making and retaining edits, while generalizing to unseen inputs. Our code is available at [github.com/thartvigsen/grace](https://www.github.com/thartvigsen/grace}).", "keywords": "Model Editing;Continual Learning;Model Repair", "primary_area": "", "supplementary_material": "", "author": "Thomas Hartvigsen;Swami Sankaranarayanan;Hamid Palangi;Yoon Kim;Marzyeh Ghassemi", "authorids": "~Thomas_Hartvigsen1;~Swami_Sankaranarayanan1;~Hamid_Palangi1;~Yoon_Kim1;~Marzyeh_Ghassemi2", "gender": "M;M;M;;F", "homepage": "https://www.tomhartvigsen.com;https://swamiviv.github.io;https://www.hamidpalangi.com/;https://people.csail.mit.edu/yoonkim/;https://www.healthyml.org/", "dblp": "211/5752;172/9983;01/963;;145/6563", "google_scholar": "rIjeeRsAAAAJ;w3KgvQIAAAAJ;https://scholar.google.ca/citations?user=B1lAghgAAAAJ;n_ts4eYAAAAJ;", "orcid": ";;;;", "linkedin": ";swamiviv/;;;", "or_profile": "~Thomas_Hartvigsen1;~Swami_Sankaranarayanan1;~Hamid_Palangi1;~Yoon_Kim1;~Marzyeh_Ghassemi2", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Google;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;google.com;mit.edu;mit.edu", "position": "Postdoc;Postdoctoral Associate;Staff Research Scientist;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nhartvigsen2023aging,\ntitle={Aging with {GRACE}: Lifelong Model Editing with Discrete Key-Value Adaptors},\nauthor={Thomas Hartvigsen and Swami Sankaranarayanan and Hamid Palangi and Yoon Kim and Marzyeh Ghassemi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Oc1SIKxwdV}\n}", "github": "", "project": "", "reviewers": "2Vyt;2Kqs;wTwG;nd9W;Qd7q", "pdf_size": 1594673, "rating": "4;6;7;7;7", "confidence": "4;4;4;2;3", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;4;3;4;3", "wc_summary": "66;156;78;209;87", "wc_strengths": "46;248;174;77;110", "wc_weaknesses": "160;659;194;88;315", "wc_questions": "1;30;167;44;136", "wc_limitations": "4;28;12;3;43", "wc_review": "277;1121;625;421;691", "wc_reply_reviewers": "0;393;81;35;84", "wc_reply_authors": "47;506;49;0;62", "reply_reviewers": "0;2;1;1;1", "reply_authors": "2;3;2;1;2", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 119.2, 54.74084398326354 ], "wc_strengths_avg": [ 131.0, 72.27724399837061 ], "wc_weaknesses_avg": [ 283.2, 201.72991845534463 ], "wc_questions_avg": [ 75.6, 64.25760655362134 ], "wc_limitations_avg": [ 18.0, 15.375304875026057 ], "wc_review_avg": [ 627.0, 287.42720817626156 ], "wc_reply_reviewers_avg": [ 118.6, 140.68489613316703 ], "wc_reply_authors_avg": [ 132.8, 187.78221428026671 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5144957554275266, "gs_citation": 142, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18105772126499047143&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "mit.edu;mit.edu;google.com;mit.edu;mit.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://web.mit.edu;https://www.google.com", "aff_unique_abbr": "MIT;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Red Teaming Deep Neural Networks with Feature Synthesis Tools", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71808", "id": "Od6CHhPM7I", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/febe5c5c6973f713cc43bf0f7c90edbe-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Od6CHhPM7I", "openreview": "https://openreview.net/forum?id=Od6CHhPM7I", "poster": "/media/PosterPDFs/NeurIPS%202023/71808.png?t=1701884325.7544134", "slides": "https://nips.cc/virtual/2023/poster/71808", "video": "https://nips.cc/virtual/2023/poster/71808", "author_site": "Stephen Casper, Tong Bu, Yuxiao Li, Jiawei Li, Kevin Zhang, Kaivalya Hariharan, Dylan Hadfield-Menell", "tldr": "", "abstract": "Interpretable AI tools are often motivated by the goal of understanding model behavior in out-of-distribution (OOD) contexts. Despite the attention this area of study receives, there are comparatively few cases where these tools have identified previously unknown bugs in models. We argue that this is due, in part, to a common feature of many interpretability methods: they analyze model behavior by using a particular dataset. This only allows for the study of the model in the context of features that the user can sample in advance. To address this, a growing body of research involves interpreting models using feature synthesis methods that do not depend on a dataset. In this paper, we benchmark the usefulness of interpretability tools for model debugging. Our key insight is that we can implant human-interpretable trojans into models and then evaluate these tools based on whether they can help humans discover them. This is analogous to finding OOD bugs, except the ground truth is known, allowing us to know when a user's interpretation is correct. We make four contributions. (1) We propose trojan discovery as an evaluation task for interpretability tools and introduce a benchmark with 12 trojans of 3 different types. (2) We demonstrate the difficulty of this benchmark with a preliminary evaluation of 16 state-of-the-art feature attribution/saliency tools. Even under ideal conditions, given direct access to data with the trojan trigger, these methods still often fail to identify bugs. (3) We evaluate 7 feature-synthesis methods on our benchmark. (4) We introduce and evaluate 2 new variants of the best-performing method from the previous evaluation.", "keywords": "interpretability;benchmarking;auditing;diagnostics;debugging;adversarial attacks;feature synthesis", "primary_area": "", "supplementary_material": "/attachment/4acdd9d14a29c75585cc1b65a013872274d49336.zip", "author": "Stephen Casper;Tong Bu;Yuxiao Li;Jiawei Li;Kevin Zhang;Kaivalya Hariharan;Dylan Hadfield-Menell", "authorids": "~Stephen_Casper1;~Tong_Bu1;~Yuxiao_Li2;~Jiawei_Li11;~Kevin_Zhang5;~Kaivalya_Hariharan1;~Dylan_Hadfield-Menell2", "gender": "M;;F;;M;M;M", "homepage": "https://stephencasper.com/;;https://github.com/JadeLilyx;;;;http://people.csail.mit.edu/dhm/", "dblp": "255/5295.html;;78/10768;;;;135/8332", "google_scholar": "N4aglP4AAAAJ;;2CGCf9cAAAAJ;;;;4mVPFQ8AAAAJ", "orcid": "0000-0003-0084-1937;;0000-0002-6496-9991;;0000-0003-3335-4932;;0000-0002-6168-4763", "linkedin": ";;;;kevin-zhang-95706b136/;kaivalya-hariharan-44a698204;", "or_profile": "~Stephen_Casper1;~Tong_Bu1;~Yuxiao_Li2;~Jiawei_Li11;~Kevin_Zhang5;~Kaivalya_Hariharan1;~Dylan_Hadfield-Menell2", "aff": "Massachusetts Institute of Technology;;Department of Electronic Engineering, Tsinghua University;;Peking University;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;;tsinghua.edu.cn;;pku.edu.cn;mit.edu;mit.edu", "position": "Graduate Student;;PhD student;;Undergrad student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\ncasper2023red,\ntitle={Red Teaming Deep Neural Networks with Feature Synthesis Tools},\nauthor={Stephen Casper and Tong Bu and Yuxiao Li and Jiawei Li and Kevin Zhang and Kaivalya Hariharan and Dylan Hadfield-Menell},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Od6CHhPM7I}\n}", "github": "", "project": "", "reviewers": "ChpR;PpyC;EMti;WymJ", "pdf_size": 10131454, "rating": "4;6;6;7", "confidence": "2;4;3;3", "soundness": "3;3;2;4", "novelty": "3;3;2;4", "presentation": "1;2;2;3", "wc_summary": "108;303;62;110", "wc_strengths": "107;105;18;121", "wc_weaknesses": "171;267;139;99", "wc_questions": "4;154;15;161", "wc_limitations": "8;49;8;18", "wc_review": "398;878;242;509", "wc_reply_reviewers": "0;34;65;210", "wc_reply_authors": "45;34;54;444", "reply_reviewers": "0;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 145.75, 92.7964843084047 ], "wc_strengths_avg": [ 87.75, 40.739262389002576 ], "wc_weaknesses_avg": [ 169.0, 62.0644825967316 ], "wc_questions_avg": [ 83.5, 74.1434420565973 ], "wc_limitations_avg": [ 20.75, 16.813313177360374 ], "wc_review_avg": [ 506.75, 234.38789964501154 ], "wc_reply_reviewers_avg": [ 77.25, 80.01679511202633 ], "wc_reply_authors_avg": [ 144.25, 173.20562202191937 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6488856845230502, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17762979219147247801&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "mit.edu;;tsinghua.edu.cn;;pku.edu.cn;mit.edu;mit.edu", "author_num": 7, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Tsinghua University;Peking University", "aff_unique_dep": ";Department of Electronic Engineering;", "aff_unique_url": "https://web.mit.edu;https://www.tsinghua.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "MIT;THU;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United States;China" }, { "id": "OdylEgIR1D", "title": "PufferLib: Making Reinforcement Learning Libraries and Environments Play Nice", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Reinforcement learning (RL) frameworks often falter in complex environments due to inherent simplifying assumptions. This gap necessitates labor-intensive and error-prone intermediate conversion layers, limiting the applicability of RL as a whole. To address this challenge, we introduce PufferLib, a novel middleware solution. PufferLib transforms complex environments into a broadly compatible, vectorized format, eliminating the need for bespoke conversion layers and enabling more rigorous testing. Users interact with PufferLib through concise bindings, significantly reducing the technical overhead. We release PufferLib's complete source code under the MIT license, a pip module, a containerized setup, comprehensive documentation, and example integrations. We also maintain a community Discord channel to facilitate support and discussion.", "keywords": "reinforcement learning; infrastructure; tooling", "primary_area": "", "supplementary_material": "/attachment/d49832a441f79fddf956deccbbc066b90a36b9db.pdf", "author": "Joseph Suarez", "authorids": "~Joseph_Suarez1", "gender": "M", "homepage": "https://jsuarez5341.github.io", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Joseph_Suarez1", "aff": "Massachusetts Institute of Technology", "aff_domain": "mit.edu", "position": "PhD student", "bibtex": "@misc{\nsuarez2023pufferlib,\ntitle={PufferLib: Making Reinforcement Learning Libraries and Environments Play Nice},\nauthor={Joseph Suarez},\nyear={2023},\nurl={https://openreview.net/forum?id=OdylEgIR1D}\n}", "github": "", "project": "", "reviewers": "C2b5;gJrA;CCab;J5Gf", "site": "https://openreview.net/forum?id=OdylEgIR1D", "pdf_size": 404079, "rating": "6;6;6;7", "confidence": "2;3;3;3", "wc_summary_and_contributions": "306;65;51;238", "wc_strengths": "2;93;96;10", "wc_improvement": "304;46;628;3", "wc_limitations": "281;48;26;2", "wc_correctness": "1;14;176;1", "wc_clarity": "8;57;29;1", "wc_relation_to_prior_work": "1;32;74;1", "wc_documentation": "14;46;42;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "918;402;1123;258", "wc_reply_reviewers": "94;0;64;133", "wc_reply_authors": "593;323;439;293", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 165.0, 109.77932410067025 ], "wc_strengths_avg": [ 50.25, 44.35298749802543 ], "wc_improvement_avg": [ 245.25, 249.16598383406995 ], "wc_limitations_avg": [ 89.25, 111.89587794016364 ], "wc_correctness_avg": [ 48.0, 74.09116006650186 ], "wc_clarity_avg": [ 23.75, 21.787324296480282 ], "wc_relation_to_prior_work_avg": [ 27.0, 29.941609843159736 ], "wc_documentation_avg": [ 25.75, 18.872930350107268 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 675.25, 356.43048059895216 ], "wc_reply_reviewers_avg": [ 72.75, 48.60748399166532 ], "wc_reply_authors_avg": [ 412.0, 117.8685708745126 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5688003771566927357&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Practical Differentially Private Hyperparameter Tuning with Subsampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71807", "id": "OeLInnFKUK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/59b9582cd35f555ea8415030073e7b22-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OeLInnFKUK", "openreview": "https://openreview.net/forum?id=OeLInnFKUK", "poster": "/media/PosterPDFs/NeurIPS%202023/71807.png?t=1702477917.5307257", "slides": "https://nips.cc/virtual/2023/poster/71807", "video": "https://nips.cc/virtual/2023/poster/71807", "author_site": "Antti Koskela, Tejas Kulkarni", "tldr": "", "abstract": "Tuning the hyperparameters of differentially private (DP) machine learning (ML) algorithms often requires use of sensitive data and this may leak private information via hyperparameter values. Recently, Papernot and Steinke (2022) proposed a certain class of DP hyperparameter tuning algorithms, where the number of random search samples is randomized. Commonly, these algorithms still considerably increase the DP privacy parameter $\\varepsilon$ over non-tuned DP ML model training and can be computationally heavy as evaluating each hyperparameter candidate requires a new training run. We focus on lowering both the DP bounds and the compute cost of these methods by using only a random subset of the sensitive data for the hyperparameter tuning and by appropriately extrapolating the optimal values to a larger dataset. We carry out a R\u00e9nyi differential privacy analysis for the proposed method and experimentally show that it consistently leads to better privacy-utility trade-off than the baseline method by Papernot and Steinke.", "keywords": "differential privacy;hyperparameter tuning;R\u00e9nyi differential privacy;computational efficiency;DP-SGD", "primary_area": "", "supplementary_material": "", "author": "Antti Koskela;Tejas Kulkarni", "authorids": "~Antti_Koskela1;~Tejas_Kulkarni2", "gender": "M;M", "homepage": ";https://tejasvk.github.io/", "dblp": "124/9273;46/10579", "google_scholar": "https://scholar.google.fi/citations?hl=fi;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";tejas-kulkarni-45465332/?originalSubdomain=uk", "or_profile": "~Antti_Koskela1;~Tejas_Kulkarni2", "aff": "Nokia Bell Labs;Nokia Bell Labs", "aff_domain": "nokia-bell-labs.com;nokia-bell-labs.com", "position": "Researcher;Researcher", "bibtex": "@inproceedings{\nkoskela2023practical,\ntitle={Practical Differentially Private Hyperparameter Tuning with Subsampling},\nauthor={Antti Koskela and Tejas Kulkarni},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OeLInnFKUK}\n}", "github": "", "project": "", "reviewers": "kuHB;on32;VkdY;zJd6", "pdf_size": 5435744, "rating": "6;6;6;7", "confidence": "4;3;4;4", "soundness": "4;3;3;4", "novelty": "3;4;2;3", "presentation": "3;3;3;3", "wc_summary": "81;111;79;100", "wc_strengths": "59;88;131;10", "wc_weaknesses": "47;988;55;9", "wc_questions": "46;467;32;186", "wc_limitations": "1;8;1;11", "wc_review": "234;1662;298;316", "wc_reply_reviewers": "0;26;26;8", "wc_reply_authors": "16;24;15;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 92.75, 13.348689074212494 ], "wc_strengths_avg": [ 72.0, 44.017042154147525 ], "wc_weaknesses_avg": [ 274.75, 412.16160362168625 ], "wc_questions_avg": [ 182.75, 174.81043304105165 ], "wc_limitations_avg": [ 5.25, 4.380353866983808 ], "wc_review_avg": [ 627.5, 598.0457758399435 ], "wc_reply_reviewers_avg": [ 15.0, 11.357816691600547 ], "wc_reply_authors_avg": [ 13.75, 8.671072598012312 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10561785191474076674&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "nokia-bell-labs.com;nokia-bell-labs.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Nokia Bell Labs", "aff_unique_dep": "", "aff_unique_url": "https://www.nokialabs.com", "aff_unique_abbr": "Nokia Bell Labs", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "The Transient Nature of Emergent In-Context Learning in Transformers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71806", "id": "Of0GBzow8P", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/58692a1701314e09cbd7a5f5f3871cc9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Of0GBzow8P", "openreview": "https://openreview.net/forum?id=Of0GBzow8P", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71806", "video": "https://nips.cc/virtual/2023/poster/71806", "author_site": "Aaditya Singh, Stephanie Chan, Ted Moskovitz, Erin Grant, Andrew Saxe, Felix Hill", "tldr": "", "abstract": "Transformer neural networks can exhibit a surprising capacity for in-context learning (ICL) despite not being explicitly trained for it. Prior work has provided a deeper understanding of how ICL emerges in transformers, e.g. through the lens of mechanistic interpretability, Bayesian inference, or by examining the distributional properties of training data. However, in each of these cases, ICL is treated largely as a persistent phenomenon; namely, once ICL emerges, it is assumed to persist asymptotically. Here, we show that the emergence of ICL during transformer training is, in fact, often transient. We train transformers on synthetic data designed so that both ICL and in-weights learning (IWL) strategies can lead to correct predictions. We find that ICL first emerges, then disappears and gives way to IWL, all while the training loss decreases, indicating an asymptotic preference for IWL. The transient nature of ICL is observed in transformers across a range of model sizes and datasets, raising the question of how much to ``overtrain'' transformers when seeking compact, cheaper-to-run models. We find that L2 regularization may offer a path to more persistent ICL that removes the need for early stopping based on ICL-style validation tasks. Finally, we present initial evidence that ICL transience may be caused by competition between ICL and IWL circuits.", "keywords": "in-context learning;transformers;emergence;transience", "primary_area": "", "supplementary_material": "", "author": "Aaditya K Singh;Stephanie C.Y. Chan;Ted Moskovitz;Erin Grant;Andrew M Saxe;Felix Hill", "authorids": "~Aaditya_K_Singh1;~Stephanie_C.Y._Chan1;~Ted_Moskovitz1;~Erin_Grant1;~Andrew_M_Saxe1;~Felix_Hill1", "gender": "F;M;F;M;;M", "homepage": "https://scychan.github.io/;https://tedmoskovitz.github.io/;https://eringrant.github.io/;https://www.saxelab.org;https://fh295.github.io/;https://aadityasingh.github.io/", "dblp": "255/7866;;169/3175;39/6894;116/0509;", "google_scholar": "https://scholar.google.com/citations?hl=en;pPVXrTYAAAAJ;OSg3D9MAAAAJ;h0Al1fcAAAAJ;https://scholar.google.co.uk/citations?user=4HLUnhIAAAAJ;9OPKqmMAAAAJ", "orcid": ";;0009-0002-8623-7254;0000-0002-9831-8812;;", "linkedin": "scychan;;eringrant914;;;", "or_profile": "~Stephanie_C.Y._Chan1;~Ted_Moskovitz1;~Erin_Grant1;~Andrew_M_Saxe1;~Felix_Hill1;~Aaditya_Singh1", "aff": "Google DeepMind;Gatsby Computational Neuroscience Unit;University College London;University College London, University of London;Google;Meta Facebook", "aff_domain": "deepmind.com;gatsby.ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;google.com;meta.com", "position": "Research Scientist;PhD student;Postdoc;Associate Professor;Researcher;Intern", "bibtex": "@inproceedings{\nsingh2023the,\ntitle={The Transient Nature of Emergent In-Context Learning in Transformers},\nauthor={Aaditya K Singh and Stephanie C.Y. Chan and Ted Moskovitz and Erin Grant and Andrew M Saxe and Felix Hill},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Of0GBzow8P}\n}", "github": "", "project": "", "reviewers": "oS5Q;wdqh;iyV5;8T18", "pdf_size": 4246630, "rating": "5;6;6;7", "confidence": "2;4;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;4;3;3", "wc_summary": "60;104;162;193", "wc_strengths": "33;139;57;702", "wc_weaknesses": "150;115;83;334", "wc_questions": "182;309;159;223", "wc_limitations": "1;52;74;55", "wc_review": "426;719;535;1507", "wc_reply_reviewers": "141;98;22;0", "wc_reply_authors": "216;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 129.75, 51.402212987380224 ], "wc_strengths_avg": [ 232.75, 273.75753414289807 ], "wc_weaknesses_avg": [ 170.5, 97.32548484338518 ], "wc_questions_avg": [ 218.25, 57.18992481198065 ], "wc_limitations_avg": [ 45.5, 27.04163456597992 ], "wc_review_avg": [ 796.75, 423.22238539566877 ], "wc_reply_reviewers_avg": [ 65.25, 56.874313182666214 ], "wc_reply_authors_avg": [ 54.0, 93.53074360871938 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4873105608599094641&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "deepmind.com;gatsby.ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;google.com;meta.com", "author_num": 6, "aff_unique_index": "0;1;1;1;0;2", "aff_unique_norm": "Google;University College London;Meta", "aff_unique_dep": "Google DeepMind;Gatsby Computational Neuroscience Unit;Meta Platforms, Inc.", "aff_unique_url": "https://deepmind.com;https://www.ucl.ac.uk;https://meta.com", "aff_unique_abbr": "DeepMind;UCL;Meta", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "EgoDistill: Egocentric Head Motion Distillation for Efficient Video Understanding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71805", "id": "OfjVAKx44G", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6a412f0037b0df295a39a198666ea6a6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OfjVAKx44G", "openreview": "https://openreview.net/forum?id=OfjVAKx44G", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71805", "video": "https://nips.cc/virtual/2023/poster/71805", "author_site": "Shuhan Tan, Tushar Nagarajan, Kristen Grauman", "tldr": "", "abstract": "Recent advances in egocentric video understanding models are promising, but their heavy computational expense is a barrier for many real-world applications. To address this challenge, we propose EgoDistill, a distillation-based approach that learns to reconstruct heavy ego-centric video clip features by combining the semantics from a sparse set of video frames with head motion from lightweight IMU readings. We further devise a novel IMU-based self-supervised pretraining strategy. Our method leads to significant improvements in efficiency, requiring 200\u00d7 fewer GFLOPs than equivalent video models. We demonstrate its effectiveness on the Ego4D and EPIC- Kitchens datasets, where our method outperforms state-of-the-art efficient video understanding methods.", "keywords": "Egocentric Video; IMU; Efficient Video Understanding", "primary_area": "", "supplementary_material": "/attachment/1dc0b0e0ecd9fef89605ba9120569e91b16d4bc9.pdf", "author": "Shuhan Tan;Tushar Nagarajan;Kristen Grauman", "authorids": "~Shuhan_Tan2;~Tushar_Nagarajan1;~Kristen_Grauman1", "gender": "M;;F", "homepage": "https://ariostgx.github.io/website/;https://tushar-n.github.io/;http://www.cs.utexas.edu/~grauman/", "dblp": ";207/8308;57/4553", "google_scholar": "Ro6enEEAAAAJ;KAKqSwIAAAAJ;Jp6Mz1sAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Shuhan_Tan2;~Tushar_Nagarajan1;~Kristen_Grauman1", "aff": "NVIDIA;University of Texas, Austin;University of Texas, Austin", "aff_domain": "nvidia.com;utexas.edu;utexas.edu", "position": "Research Intern;PhD student;Professor", "bibtex": "@inproceedings{\ntan2023egodistill,\ntitle={EgoDistill: Egocentric Head Motion Distillation for Efficient Video Understanding},\nauthor={Shuhan Tan and Tushar Nagarajan and Kristen Grauman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OfjVAKx44G}\n}", "github": "", "project": "", "reviewers": "F46j;gfRs;xst2;M5sy;K65f", "pdf_size": 21199847, "rating": "4;5;5;6;8", "confidence": "3;5;4;4;4", "soundness": "1;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "37;146;46;64;108", "wc_strengths": "22;109;85;65;49", "wc_weaknesses": "117;223;66;97;3", "wc_questions": "5;2;47;3;63", "wc_limitations": "10;1;21;3;46", "wc_review": "191;481;265;232;269", "wc_reply_reviewers": "0;0;0;0;37", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.9797958971132712 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 80.2, 40.98975481751507 ], "wc_strengths_avg": [ 66.0, 29.785902705810344 ], "wc_weaknesses_avg": [ 101.2, 72.08994382020283 ], "wc_questions_avg": [ 24.0, 25.830214865540704 ], "wc_limitations_avg": [ 16.2, 16.460862674841803 ], "wc_review_avg": [ 287.6, 100.68088199852045 ], "wc_reply_reviewers_avg": [ 7.4, 14.8 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.23312620206007845, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16302804978904242648&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "nvidia.com;utexas.edu;utexas.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "NVIDIA;University of Texas at Austin", "aff_unique_dep": "NVIDIA Corporation;", "aff_unique_url": "https://www.nvidia.com;https://www.utexas.edu", "aff_unique_abbr": "NVIDIA;UT Austin", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Og2HCj3V1I", "title": "Attribute Based Interpretable Evaluation Metrics for Generative Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "While generative models continue to evolve, the field of evaluation metrics has largely remained stagnant. Despite the annual publication of metric papers, the majority of these metrics share a common characteristic: they measure distributional distance using pre-trained embeddings without considering the interpretability of the underlying information. This limits their usefulness and makes it difficult to gain a comprehensive understanding of the data.\nTo address this issue, we propose using a new type of interpretable embedding. We demonstrate how we can transform deeply encoded embeddings into interpretable embeddings by measuring their correspondence with text attributes. With this new type of embedding, we introduce two novel metrics that measure and explain the diversity of the generator: the first metric compares the frequency of appearance of the training set and the attribute, and the second metric evaluates whether the relationships between attributes in the training set are preserved. By introducing these new metrics, we hope to enhance the interpretability and usefulness of evaluation metrics in the field of generative models.", "keywords": "evaluation metric for Generative models", "primary_area": "", "supplementary_material": "/attachment/9c0869865d6ffca0f411eae379d7ba7da3025cc5.zip", "author": "Dongkyun Kim;Mingi Kwon;Youngjung Uh", "authorids": "~Dongkyun_Kim2;~Mingi_Kwon1;~Youngjung_Uh2", "gender": "M;M;", "homepage": "https://vilab.yonsei.ac.kr;https://github.com/kwonminki;https://vilab.yonsei.ac.kr/member/professor", "dblp": ";327/3276;57/10511", "google_scholar": ";https://scholar.google.co.kr/citations?user=W8vK8BwAAAAJ;BWBGrEEAAAAJ", "orcid": ";;", "linkedin": ";kwonmingi/;youngjung-uh-78b459b5/", "or_profile": "~Dongkyun_Kim2;~Mingi_Kwon1;~Youngjung_Uh2", "aff": "Yonsei University;Yonsei University;Yonsei University", "aff_domain": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr", "position": "MS student;PhD student;Associate Professor", "bibtex": "@misc{\nkim2023attribute,\ntitle={Attribute Based Interpretable Evaluation Metrics for Generative Models},\nauthor={Dongkyun Kim and Mingi Kwon and Youngjung Uh},\nyear={2023},\nurl={https://openreview.net/forum?id=Og2HCj3V1I}\n}", "github": "", "project": "", "reviewers": "ch3Y;1cWg;E51D;o9jM;T7Dd", "site": "https://openreview.net/forum?id=Og2HCj3V1I", "pdf_size": 3519261, "rating": "4;5;6;6;6", "confidence": "2;3;4;4;3", "soundness": "1;3;3;3;2", "novelty": "1;2;3;3;2", "presentation": "3;3;3;3;2", "wc_summary": "214;152;52;121;135", "wc_strengths": "19;52;59;52;83", "wc_weaknesses": "409;190;280;43;218", "wc_questions": "221;43;162;339;47", "wc_limitations": "4;2;9;9;55", "wc_review": "867;439;562;564;538", "wc_reply_reviewers": "202;0;40;109;185", "wc_reply_authors": "1044;21;0;82;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "4;2;1;2;1", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 134.8, 52.18198923000157 ], "wc_strengths_avg": [ 53.0, 20.464603587658374 ], "wc_weaknesses_avg": [ 228.0, 119.35995978551601 ], "wc_questions_avg": [ 162.4, 111.53044427419806 ], "wc_limitations_avg": [ 15.8, 19.79292802998081 ], "wc_review_avg": [ 594.0, 143.9541593702662 ], "wc_reply_reviewers_avg": [ 107.2, 78.80456839549342 ], "wc_reply_authors_avg": [ 229.4, 408.4064641995766 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8685990362153791, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8110686798915227576&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "Yonsei University", "aff_unique_dep": "", "aff_unique_url": "https://www.yonsei.ac.kr", "aff_unique_abbr": "Yonsei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Quantum speedups for stochastic optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71804", "id": "OiatK9W6tR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6ed9931d6e1fb6a85efa1b2c014a47e1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OiatK9W6tR", "openreview": "https://openreview.net/forum?id=OiatK9W6tR", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71804", "video": "https://nips.cc/virtual/2023/poster/71804", "author_site": "Aaron Sidford, Chenyi Zhang", "tldr": "", "abstract": "We consider the problem of minimizing a continuous function given given access to a natural quantum generalization of a stochastic gradient oracle. We provide two new methods for the special case of minimizing a Lipschitz convex function. Each method obtains a dimension versus accuracy trade-off which is provably unachievable classically and we prove that one method is asymptotically optimal in low-dimensional settings. Additionally, we provide quantum algorithms for computing a critical point of a smooth non-convex function at rates not known to be achievable classically. To obtain these results we build upon the quantum multivariate mean estimation result of Cornelissen et al. and provide a general quantum variance reduction technique of independent interest.", "keywords": "continuous optimization;quantum algorithms;stochastic optimization;gradient oracle", "primary_area": "", "supplementary_material": "", "author": "Aaron Sidford;Chenyi Zhang", "authorids": "~Aaron_Sidford1;~Chenyi_Zhang2", "gender": ";M", "homepage": ";https://chenyizhang2000.github.io", "dblp": ";", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN", "orcid": ";", "linkedin": ";", "or_profile": "~Aaron_Sidford1;~Chenyi_Zhang2", "aff": ";Stanford University", "aff_domain": ";stanford.edu", "position": ";PhD student", "bibtex": "@inproceedings{\nsidford2023quantum,\ntitle={Quantum speedups for stochastic optimization},\nauthor={Aaron Sidford and Chenyi Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OiatK9W6tR}\n}", "github": "", "project": "", "reviewers": "krht;ewVR;PEHT;YvE3;Zhqz", "pdf_size": 523674, "rating": "4;5;7;7;7", "confidence": "3;2;3;3;4", "soundness": "1;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "44;48;138;88;40", "wc_strengths": "19;56;51;127;28", "wc_weaknesses": "74;103;271;123;128", "wc_questions": "94;96;36;85;6", "wc_limitations": "28;11;45;48;19", "wc_review": "259;314;541;471;221", "wc_reply_reviewers": "52;35;11;18;21", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 1.2649110640673518 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.8000000000000002 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 71.6, 37.40374312819507 ], "wc_strengths_avg": [ 56.2, 37.996841974037785 ], "wc_weaknesses_avg": [ 139.8, 68.29172717101245 ], "wc_questions_avg": [ 63.4, 36.086562596068916 ], "wc_limitations_avg": [ 30.2, 14.386104406683554 ], "wc_review_avg": [ 361.2, 123.8650878980837 ], "wc_reply_reviewers_avg": [ 27.4, 14.568459081179451 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.49999999999999994, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1813497781546928229&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";stanford.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Augmentation-Aware Self-Supervision for Data-Efficient GAN Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71803", "id": "OiivS2mqQf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6464638c2472e4cae607f0c96a6fe774-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OiivS2mqQf", "openreview": "https://openreview.net/forum?id=OiivS2mqQf", "poster": "/media/PosterPDFs/NeurIPS%202023/71803.png?t=1699536240.3679879", "slides": "https://nips.cc/virtual/2023/poster/71803", "video": "https://nips.cc/virtual/2023/poster/71803", "author_site": "Liang Hou, Liang Hou, Qi Cao, Yige Yuan, Songtao Zhao, Chongyang Ma, Siyuan Pan, Pengfei Wan, Zhongyuan Wang, Zhongyuan Wang, Huawei Shen, Xueqi Cheng", "tldr": "", "abstract": "Training generative adversarial networks (GANs) with limited data is challenging because the discriminator is prone to overfitting. Previously proposed differentiable augmentation demonstrates improved data efficiency of training GANs. However, the augmentation implicitly introduces undesired invariance to augmentation for the discriminator since it ignores the change of semantics in the label space caused by data transformation, which may limit the representation learning ability of the discriminator and ultimately affect the generative modeling performance of the generator. To mitigate the negative impact of invariance while inheriting the benefits of data augmentation, we propose a novel augmentation-aware self-supervised discriminator that predicts the augmentation parameter of the augmented data. Particularly, the prediction targets of real data and generated data are required to be distinguished since they are different during training. We further encourage the generator to adversarially learn from the self-supervised discriminator by generating augmentation-predictable real and not fake data. This formulation connects the learning objective of the generator and the arithmetic $-$ harmonic mean divergence under certain assumptions. We compare our method with state-of-the-art (SOTA) methods using the class-conditional BigGAN and unconditional StyleGAN2 architectures on data-limited CIFAR-10, CIFAR-100, FFHQ, LSUN-Cat, and five low-shot datasets. Experimental results demonstrate significant improvements of our method over SOTA methods in training data-efficient GANs.", "keywords": "generative adversarial networks;limited data;self-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Liang Hou;Qi Cao;Yige Yuan;Songtao Zhao;Chongyang Ma;Siyuan Pan;Pengfei Wan;Zhongyuan Wang;Huawei Shen;Xueqi Cheng", "authorids": "~Liang_Hou1;~Qi_Cao1;~Yige_Yuan1;~Songtao_Zhao1;~Chongyang_Ma1;~Siyuan_Pan1;~Pengfei_Wan1;~Zhongyuan_Wang5;~Huawei_Shen1;~Xueqi_Cheng1", "gender": "M;F;Not Specified;M;;M;M;M;M;M", "homepage": "https://liang-hou.github.io/;https://caoqi92.github.io/biography/;https://yuanyige.github.io;https://zhaosongtao.com;;;;;https://www.ict.ac.cn/sourcedb/cn/jssrck/201402/t20140221_4037648.html;https://people.ucas.ac.cn/~cxq?language=en", "dblp": ";40/5905;205/6235;;;250/5783;;84/6394-6.html;;44/912", "google_scholar": "X48pntMAAAAJ;FflWb1gAAAAJ;lf6GtCIAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;P6MraaYAAAAJ;4XVJrRAAAAAJ;;hY8aLqAAAAAJ", "orcid": ";;0000-0001-8856-668X;;;;0000-0001-7225-565X;;0000-0002-1081-8119;", "linkedin": ";;;;;;;;;", "or_profile": "~Liang_Hou1;~Qi_Cao1;~Yige_Yuan1;~Songtao_Zhao1;~Chongyang_Ma1;~Siyuan_Pan1;~Pengfei_Wan1;~Zhongyuan_Wang5;~Huawei_Shen1;~Xueqi_Cheng1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences, China;Institute of Computing Technology, Chinese Academy of Sciences;Kuaishou Technology;;;Kuaishou Technology;Kuaishou Inc.;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn;kuaishou.com;;;kuaishou.com;kuaishou.com;ict.ac.cn;ict.ac.cn", "position": "PhD student;Associate Professor;PhD student;Researcher;;;Director;Principal Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhou2023augmentationaware,\ntitle={Augmentation-Aware Self-Supervision for Data-Efficient {GAN} Training},\nauthor={Liang Hou and Qi Cao and Yige Yuan and Songtao Zhao and Chongyang Ma and Siyuan Pan and Pengfei Wan and Zhongyuan Wang and Huawei Shen and Xueqi Cheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OiivS2mqQf}\n}", "github": "", "project": "", "reviewers": "NBv7;nM6o;b4Yw;CpCq;xecU", "pdf_size": 24280993, "rating": "5;5;5;5;5", "confidence": "5;4;5;2;3", "soundness": "3;3;3;2;2", "novelty": "2;3;3;2;2", "presentation": "2;3;4;3;3", "wc_summary": "99;61;63;106;48", "wc_strengths": "30;40;68;36;24", "wc_weaknesses": "170;118;106;98;231", "wc_questions": "5;127;3;41;33", "wc_limitations": "21;6;3;17;21", "wc_review": "325;352;243;298;357", "wc_reply_reviewers": "301;176;0;33;34", "wc_reply_authors": "451;1420;0;0;0", "reply_reviewers": "2;2;0;1;1", "reply_authors": "2;4;1;1;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.8, 1.16619037896906 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 75.4, 22.826300620118012 ], "wc_strengths_avg": [ 39.6, 15.200000000000001 ], "wc_weaknesses_avg": [ 144.6, 49.95838267998675 ], "wc_questions_avg": [ 41.8, 45.15927368769343 ], "wc_limitations_avg": [ 13.6, 7.631513611335565 ], "wc_review_avg": [ 315.0, 41.727688649145186 ], "wc_reply_reviewers_avg": [ 108.8, 113.69678975239363 ], "wc_reply_authors_avg": [ 374.2, 551.3026029323642 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 1.1661903789690604 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=33092804416035167&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "ict.ac.cn;ict.ac.cn;ict.ac.cn;kuaishou.com;;;kuaishou.com;kuaishou.com;ict.ac.cn;ict.ac.cn", "author_num": 10, "aff_unique_index": "0;0;0;1;1;2;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Kuaishou Technology;Kuaishou Inc.", "aff_unique_dep": "Institute of Computing Technology;;", "aff_unique_url": "http://www.ict.ac.cn;https://www.kuaishou.com;https://www.kuaishou.com", "aff_unique_abbr": "CAS;Kuaishou;Kuaishou", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Transformers are uninterpretable with myopic methods: a case study with bounded Dyck grammars", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71802", "id": "OitmaxSAUu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/79ba1b827d3fc58e129d1cbfc8ff69f2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OitmaxSAUu", "openreview": "https://openreview.net/forum?id=OitmaxSAUu", "poster": "/media/PosterPDFs/NeurIPS%202023/71802.png?t=1701793527.8739405", "slides": "https://nips.cc/virtual/2023/poster/71802", "video": "https://nips.cc/virtual/2023/poster/71802", "author_site": "Kaiyue Wen, Yuchen Li, Bingbin Liu, Andrej Risteski", "tldr": "", "abstract": "Transformer interpretability aims to understand the algorithm implemented by a learned Transformer by examining various aspects of the model, such as the weight matrices or the attention patterns.\nIn this work, through a combination of theoretical results and carefully controlled experiments on synthetic data, we take a critical view\nof methods that exclusively focus on individual parts of the model, rather than consider the network as a whole.\nWe consider a simple synthetic setup of learning a (bounded) Dyck language. Theoretically, we show that the set of models that (exactly or approximately) solve this task satisfy a structural characterization derived from ideas in formal languages (the pumping lemma).\nWe use this characterization to show that the set of optima is qualitatively rich; in particular, the attention pattern of a single layer can be \"nearly randomized\", while preserving the functionality of the network.\nWe also show via extensive experiments that these constructions are not merely a theoretical artifact: even with severe constraints to the architecture of the model, vastly different solutions can be reached via standard training. Thus, interpretability claims based on inspecting individual heads or weight matrices in the Transformer can be misleading.", "keywords": "Transformer;Self Attention;Dyck Language;Context Free Grammar;Formal Language;Theory;Interpretability", "primary_area": "", "supplementary_material": "/attachment/1cf257d5664b3613ed0f795a1905eed545057531.zip", "author": "Kaiyue Wen;Yuchen Li;Bingbin Liu;Andrej Risteski", "authorids": "~Kaiyue_Wen1;~Yuchen_Li5;~Bingbin_Liu1;~Andrej_Risteski2", "gender": "M;;F;M", "homepage": "https://whenwen.github.io/;https://yuchenli01.github.io/;https://clarabing.github.io/;", "dblp": "322/0395;;222/1554;63/11143", "google_scholar": ";https://scholar.google.com/citations?hl=en;2ud06rQAAAAJ;", "orcid": "0000-0002-3128-868X;;;", "linkedin": "kaiyue-wen-a3a336192/;yuchenli01/;;", "or_profile": "~Kaiyue_Wen1;~Yuchen_Li5;~Bingbin_Liu1;~Andrej_Risteski2", "aff": "Stanford University;Google;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "stanford.edu;google.com;cmu.edu;cmu.edu", "position": "Intern;Intern;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwen2023transformers,\ntitle={Transformers are uninterpretable with myopic methods: a case study with bounded Dyck grammars},\nauthor={Kaiyue Wen and Yuchen Li and Bingbin Liu and Andrej Risteski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OitmaxSAUu}\n}", "github": "", "project": "", "reviewers": "ksf8;46oE;QYzn;ZsLs", "pdf_size": 1929483, "rating": "5;5;6;7", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "novelty": "3;2;3;4", "presentation": "2;3;2;2", "wc_summary": "82;97;281;263", "wc_strengths": "58;72;201;234", "wc_weaknesses": "85;378;447;128", "wc_questions": "37;4;146;918", "wc_limitations": "10;4;1;1", "wc_review": "272;555;1076;1544", "wc_reply_reviewers": "0;46;48;575", "wc_reply_authors": "0;376;0;738", "reply_reviewers": "0;1;1;4", "reply_authors": "1;3;1;4", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 180.75, 91.62525579773298 ], "wc_strengths_avg": [ 141.25, 77.29610274781 ], "wc_weaknesses_avg": [ 259.5, 155.67674842441951 ], "wc_questions_avg": [ 276.25, 374.22210984921776 ], "wc_limitations_avg": [ 4.0, 3.6742346141747673 ], "wc_review_avg": [ 861.75, 488.1774139593105 ], "wc_reply_reviewers_avg": [ 167.25, 236.19629019101887 ], "wc_reply_authors_avg": [ 278.5, 306.5008156595998 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13366284831389506957&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "stanford.edu;google.com;cmu.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Stanford University;Google;Carnegie Mellon University", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.stanford.edu;https://www.google.com;https://www.cmu.edu", "aff_unique_abbr": "Stanford;Google;CMU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Stanford;Mountain View;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Eliminating Catastrophic Overfitting Via Abnormal Adversarial Examples Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71801", "id": "Oj7Mrb4009", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d65befe6b80ecf7f180b4def503d7776-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Oj7Mrb4009", "openreview": "https://openreview.net/forum?id=Oj7Mrb4009", "poster": "/media/PosterPDFs/NeurIPS%202023/71801.png?t=1698137192.7070446", "slides": "https://nips.cc/virtual/2023/poster/71801", "video": "https://nips.cc/virtual/2023/poster/71801", "author_site": "Runqi Lin, Chaojian Yu, Tongliang Liu", "tldr": "", "abstract": "Single-step adversarial training (SSAT) has demonstrated the potential to achieve both efficiency and robustness. However, SSAT suffers from catastrophic overfitting (CO), a phenomenon that leads to a severely distorted classifier, making it vulnerable to multi-step adversarial attacks. In this work, we observe that some adversarial examples generated on the SSAT-trained network exhibit anomalous behaviour, that is, although these training samples are generated by the inner maximization process, their associated loss decreases instead, which we named abnormal adversarial examples (AAEs). Upon further analysis, we discover a close relationship between AAEs and classifier distortion, as both the number and outputs of AAEs undergo a significant variation with the onset of CO. Given this observation, we re-examine the SSAT process and uncover that before the occurrence of CO, the classifier already displayed a slight distortion, indicated by the presence of few AAEs. Furthermore, the classifier directly optimizing these AAEs will accelerate its distortion, and correspondingly, the variation of AAEs will sharply increase as a result. In such a vicious circle, the classifier rapidly becomes highly distorted and manifests as CO within a few iterations. These observations motivate us to eliminate CO by hindering the generation of AAEs. Specifically, we design a novel method, termed Abnormal Adversarial Examples Regularization (AAER), which explicitly regularizes the variation of AAEs to hinder the classifier from becoming distorted. Extensive experiments demonstrate that our method can effectively eliminate CO and further boost adversarial robustness with negligible additional computational overhead. Our implementation can be found at https://github.com/tmllab/2023_NeurIPS_AAER.", "keywords": "adversarial training;catastrophic overfitting", "primary_area": "", "supplementary_material": "", "author": "Runqi Lin;Chaojian Yu;Tongliang Liu", "authorids": "~Runqi_Lin1;~Chaojian_Yu1;~Tongliang_Liu1", "gender": "M;M;M", "homepage": "https://runqilin.github.io;;https://tongliang-liu.github.io/", "dblp": "359/1108;223/9872;150/6667", "google_scholar": "Zg7PKbcAAAAJ;b3ltuG8AAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ", "orcid": "0009-0000-6607-7754;;", "linkedin": ";;", "or_profile": "~Runqi_Lin1;~Chaojian_Yu1;~Tongliang_Liu1", "aff": "University of Sydney;The University of Sydney;University of Sydney", "aff_domain": "usyd.edu.au;uni.sydney.edu.au;sydney.edu.au", "position": "PhD student;PhD student;Lecturer", "bibtex": "@inproceedings{\nlin2023eliminating,\ntitle={Eliminating Catastrophic Overfitting Via Abnormal Adversarial Examples Regularization},\nauthor={Runqi Lin and Chaojian Yu and Tongliang Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Oj7Mrb4009}\n}", "github": "", "project": "", "reviewers": "6vch;EF7n;N8c9;GeQY", "pdf_size": 12150803, "rating": "4;4;4;4", "confidence": "4;5;4;3", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "3;4;3;4", "wc_summary": "53;55;79;87", "wc_strengths": "28;46;49;65", "wc_weaknesses": "56;115;256;26", "wc_questions": "23;43;74;204", "wc_limitations": "2;9;20;1", "wc_review": "162;268;478;383", "wc_reply_reviewers": "0;0;45;0", "wc_reply_authors": "98;106;430;89", "reply_reviewers": "0;0;1;0", "reply_authors": "2;2;4;2", "rating_avg": [ 4.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 68.5, 14.79019945774904 ], "wc_strengths_avg": [ 47.0, 13.133925536563698 ], "wc_weaknesses_avg": [ 113.25, 88.41768771009565 ], "wc_questions_avg": [ 86.0, 70.50886469090251 ], "wc_limitations_avg": [ 8.0, 7.582875444051551 ], "wc_review_avg": [ 322.75, 118.92303183151698 ], "wc_reply_reviewers_avg": [ 11.25, 19.48557158514987 ], "wc_reply_authors_avg": [ 180.75, 144.03016177176224 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1741218577849407485&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "usyd.edu.au;uni.sydney.edu.au;sydney.edu.au", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Sydney", "aff_unique_dep": "", "aff_unique_url": "https://www.sydney.edu.au", "aff_unique_abbr": "USYD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "Percentile Criterion Optimization in Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71800", "id": "OjlZqQzw51", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1dec73169509c223220744b2c9b2df37-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OjlZqQzw51", "openreview": "https://openreview.net/forum?id=OjlZqQzw51", "poster": "/media/PosterPDFs/NeurIPS%202023/71800.png?t=1701811438.67052", "slides": "https://nips.cc/virtual/2023/poster/71800", "video": "https://nips.cc/virtual/2023/poster/71800", "author_site": "Cyrus Cousins, Elita Lobo, Marek Petrik, Yair Zick", "tldr": "", "abstract": "In reinforcement learning, robust policies for high-stakes decision-making problems with limited data are usually computed by optimizing the percentile criterion. The percentile criterion is optimized by constructing an uncertainty set that contains the true model with high probability and optimizing the policy for the worst model in the set. Since the percentile criterion is non-convex, constructing these sets itself is challenging. Existing works use Bayesian credible regions as uncertainty sets, but they are often unnecessarily large and result in learning overly conservative policies. To overcome these shortcomings, we propose a novel Value-at-Risk based dynamic programming algorithm to optimize the percentile criterion without explicitly constructing any uncertainty sets. Our theoretical and empirical results show that our algorithm implicitly constructs much smaller uncertainty sets and learns less-conservative robust policies.", "keywords": "Reinforcement Learning;Bayesian Uncertainty;Robustness", "primary_area": "", "supplementary_material": "/attachment/637a6969023dd6e9182fabdd607465be2a69ab63.zip", "author": "Cyrus Cousins;Elita Lobo;Marek Petrik;Yair Zick", "authorids": "~Cyrus_Cousins1;~Elita_Lobo1;~Marek_Petrik2;~Yair_Zick1", "gender": ";F;;M", "homepage": "https://www.cyruscousins.online/;https://elitalobo.github.io;;https://people.cs.umass.edu/~yzick/", "dblp": "202/6684;;;90/9924", "google_scholar": "https://scholar.google.com/citations?hl=en;;;https://scholar.google.com.tw/citations?user=m0PW6DQAAAAJ", "orcid": "0000-0002-1691-0282;;;0000-0002-0635-6230", "linkedin": ";;;", "or_profile": "~Cyrus_Cousins1;~Elita_Lobo1;~Marek_Petrik2;~Yair_Zick1", "aff": "University of Massachusetts Amherst;University of New Hampshire;;University of Massachusetts, Amherst", "aff_domain": "umass.edu;wildcats.unh.edu;;umass.edu", "position": "Postdoc;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\ncousins2023percentile,\ntitle={Percentile Criterion Optimization in Offline Reinforcement Learning},\nauthor={Cyrus Cousins and Elita Lobo and Marek Petrik and Yair Zick},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OjlZqQzw51}\n}", "github": "", "project": "", "reviewers": "sypg;8VhD;CU8M;CS3B;JNMN", "pdf_size": 807246, "rating": "5;5;5;6;6", "confidence": "3;2;4;3;2", "soundness": "3;2;3;3;3", "novelty": "3;2;2;3;3", "presentation": "4;3;3;3;2", "wc_summary": "86;99;101;109;95", "wc_strengths": "84;48;64;66;78", "wc_weaknesses": "90;130;170;83;291", "wc_questions": "142;121;31;56;89", "wc_limitations": "6;5;7;63;8", "wc_review": "408;403;373;377;561", "wc_reply_reviewers": "0;0;530;11;332", "wc_reply_authors": "0;0;1715;0;771", "reply_reviewers": "0;0;2;1;1", "reply_authors": "1;1;4;1;3", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 98.0, 7.536577472566709 ], "wc_strengths_avg": [ 68.0, 12.457929201917949 ], "wc_weaknesses_avg": [ 152.8, 75.80343000155072 ], "wc_questions_avg": [ 87.8, 40.691030952778775 ], "wc_limitations_avg": [ 17.8, 22.62211307548435 ], "wc_review_avg": [ 424.4, 69.67811708133335 ], "wc_reply_reviewers_avg": [ 174.6, 218.54939945010145 ], "wc_reply_authors_avg": [ 497.2, 678.1779707421939 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 1.2649110640673518 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.32732683535398854, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15796668566142605138&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "umass.edu;wildcats.unh.edu;;umass.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Massachusetts Amherst;University of New Hampshire", "aff_unique_dep": ";", "aff_unique_url": "https://www.umass.edu;https://www.unh.edu", "aff_unique_abbr": "UMass Amherst;UNH", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "KD-Zero: Evolving Knowledge Distiller for Any Teacher-Student Pairs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71799", "id": "OlMKa5YZ8e", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dbc8ce0fdfcd55172d73fb05dbae07fc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OlMKa5YZ8e", "openreview": "https://openreview.net/forum?id=OlMKa5YZ8e", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71799", "video": "https://nips.cc/virtual/2023/poster/71799", "author_site": "Lujun Li, Peijie Dong, Anggeng Li, Zimian Wei, Ya Yang", "tldr": "", "abstract": "Knowledge distillation (KD) has emerged as an effective technique for compressing models that can enhance the lightweight model. Conventional KD methods propose various designs to allow student model to imitate the teacher better. However, these handcrafted KD designs heavily rely on expert knowledge and may be sub-optimal for various teacher-student pairs. In this paper, we present a novel framework, KD-Zero, which utilizes evolutionary search to automatically discover promising distiller from scratch for any teacher-student architectures. Specifically, we first decompose the generalized distiller into knowledge transformations, distance functions, and loss weights. Then, we construct our distiller search space by selecting advanced operations for these three components. With sharpness and represent gap as fitting objectives, we evolve candidate populations and generate better distillers by crossover and mutation. To ensure efficient searching, we employ the loss-rejection protocol, search space shrinkage, and proxy settings during the search process. In this manner, the discovered distiller can address the capacity gap and cross-architecture challenges for any teacher-student pairs in the final distillation stage. Comprehensive experiments reveal that KD-Zero consistently outperforms other state-of-the-art methods across diverse architectures on classification, detection, and segmentation tasks. Noticeably, we provide some practical insights in designing the distiller by analyzing the distiller discovered. Codes are available in supplementary materials.", "keywords": "Knowledge distillation", "primary_area": "", "supplementary_material": "/attachment/1a14b811f0bda0da0d792ed24770ab50118347f2.pdf", "author": "Lujun Li;Peijie Dong;Anggeng Li;Zimian Wei;Yang Ya", "authorids": "~Lujun_Li1;~Peijie_Dong1;~Anggeng_Li1;~Zimian_Wei1;~Yang_Ya1", "gender": ";M;M;F;F", "homepage": ";https://pprp.github.io;https://github.com/AgL2;;https://github.com/yyya9/Miracle.github.io", "dblp": ";315/4734;354/8829.html;221/0636;77/10618", "google_scholar": ";TqS6s4gAAAAJ;;;", "orcid": ";0000-0003-1952-4544;;;", "linkedin": ";;;;", "or_profile": "~Lujun_Li1;~Peijie_Dong1;~Anggeng_Li1;~Zimian_Wei1;~Yang_Ya1", "aff": ";National University of Defense Technology;Huawei Technologies Ltd.;National University of Defense Technology;City University of Hong Kong", "aff_domain": ";nudt.edu.cn;huawei.com;nudt.edu.cn;cityu.edu", "position": ";MS student;Intern;PhD student;PhD student", "bibtex": "@inproceedings{\nli2023kdzero,\ntitle={{KD}-Zero: Evolving Knowledge Distiller for Any Teacher-Student Pairs},\nauthor={Lujun Li and Peijie Dong and Anggeng Li and Zimian Wei and Yang Ya},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OlMKa5YZ8e}\n}", "github": "", "project": "", "reviewers": "BLS2;AvFD;WY3i;3PTD", "pdf_size": 12633663, "rating": "3;6;7;7", "confidence": "5;4;4;5", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "36;54;118;53", "wc_strengths": "8;36;55;17", "wc_weaknesses": "272;68;120;102", "wc_questions": "2;27;4;54", "wc_limitations": "5;22;1;1", "wc_review": "323;207;298;227", "wc_reply_reviewers": "0;18;66;14", "wc_reply_authors": "1012;125;400;195", "reply_reviewers": "0;1;2;1", "reply_authors": "4;3;4;3", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 65.25, 31.283981524096323 ], "wc_strengths_avg": [ 29.0, 18.096961070853858 ], "wc_weaknesses_avg": [ 140.5, 78.18407766291037 ], "wc_questions_avg": [ 21.75, 21.05201890555868 ], "wc_limitations_avg": [ 7.25, 8.671072598012312 ], "wc_review_avg": [ 263.75, 48.100805606559234 ], "wc_reply_reviewers_avg": [ 24.5, 24.8746859276655 ], "wc_reply_authors_avg": [ 433.0, 349.2270035378135 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.45749571099781405, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10316656086392497721&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";nudt.edu.cn;huawei.com;nudt.edu.cn;cityu.edu", "author_num": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "National University of Defense Technology;Huawei;City University of Hong Kong", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "http://www.nudt.edu.cn/;https://www.huawei.com;https://www.cityu.edu.hk", "aff_unique_abbr": "NUDT;Huawei;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Federated Multi-Objective Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71798", "id": "OlSTwlz96r", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7cb2c2a8d35576c00078b6591ec26a7d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OlSTwlz96r", "openreview": "https://openreview.net/forum?id=OlSTwlz96r", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71798", "video": "https://nips.cc/virtual/2023/poster/71798", "author_site": "Haibo Yang, Zhuqing Liu, Jia Liu, Chaosheng Dong, Michinari Momma", "tldr": "", "abstract": "In recent years, multi-objective optimization (MOO) emerges as a foundational problem underpinning many multi-agent multi-task learning applications. However, existing algorithms in MOO literature remain limited to centralized learning settings, which do not satisfy the distributed nature and data privacy needs of such multi-agent multi-task learning applications. This motivates us to propose a new federated multi-objective learning (FMOL) framework with multiple clients distributively and collaboratively solving an MOO problem while keeping their training data private. Notably, our FMOL framework allows a different set of objective functions across different clients to support a wide range of applications, which advances and generalizes the MOO formulation to the federated learning paradigm for the first time. For this FMOL framework, we propose two new federated multi-objective optimization (FMOO) algorithms called federated multi-gradient descent averaging (FMGDA) and federated stochastic multi-gradient descent averaging (FSMGDA). Both algorithms allow local updates to significantly reduce communication costs, while achieving the {\\em same} convergence rates as those of their algorithmic counterparts in the single-objective federated learning. Our extensive experiments also corroborate the efficacy of our proposed FMOO algorithms.", "keywords": "Multi-Objective Learning;Federated Learning", "primary_area": "", "supplementary_material": "/attachment/715578b53073b1fb6a6cee36f97543a3d85eb4f4.pdf", "author": "Haibo Yang;Zhuqing Liu;Jia Liu;Chaosheng Dong;Michinari Momma", "authorids": "~Haibo_Yang1;~Zhuqing_Liu2;~Jia_Liu1;~Chaosheng_Dong1;~Michinari_Momma2", "gender": "M;F;M;M;", "homepage": "https://haibo-yang-osu.github.io/homepage/;https://github.com/Zhuqing-Liu;https://kevinliu-osu.github.io/index.html;https://chaoshengdong.github.io/;", "dblp": "43/7829-1;195/1161;;225/6556;34/6761.html", "google_scholar": "eyy22VoAAAAJ;;Ofx3dScAAAAJ;nPratvEAAAAJ;UDoWGDUAAAAJ", "orcid": "0000-0002-3245-2728;0000-0003-0146-5101;;0000-0003-4491-0594;", "linkedin": ";;;chaosheng-dong/;", "or_profile": "~Haibo_Yang1;~Zhuqing_Liu2;~Jia_Liu1;~Chaosheng_Dong1;~Michinari_Momma2", "aff": "Ohio State University;Ohio State University;The Ohio State University;Amazon;Amazon", "aff_domain": "osu.edu;osu.edu;osu.edu;amazon.com;amazon.com", "position": "PhD student;PhD student;Assistant Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nyang2023federated,\ntitle={Federated Multi-Objective Learning},\nauthor={Haibo Yang and Zhuqing Liu and Jia Liu and Chaosheng Dong and Michinari Momma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OlSTwlz96r}\n}", "github": "", "project": "", "reviewers": "gLj4;TvWz;8EfV;nQxG;kXdB", "pdf_size": 1954176, "rating": "3;4;6;7;7", "confidence": "3;3;3;3;3", "soundness": "3;3;3;4;3", "novelty": "2;2;3;4;3", "presentation": "3;3;3;4;4", "wc_summary": "37;112;174;93;34", "wc_strengths": "47;20;104;57;41", "wc_weaknesses": "171;113;123;12;33", "wc_questions": "32;58;35;1;108", "wc_limitations": "1;4;18;4;10", "wc_review": "288;307;454;167;226", "wc_reply_reviewers": "0;186;81;5;10", "wc_reply_authors": "43;1476;198;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;5;2;1;1", "rating_avg": [ 5.4, 1.624807680927192 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 90.0, 51.94997593839674 ], "wc_strengths_avg": [ 53.8, 27.86682615584344 ], "wc_weaknesses_avg": [ 90.4, 59.179726258238134 ], "wc_questions_avg": [ 46.8, 35.57189902155914 ], "wc_limitations_avg": [ 7.4, 6.053098380168622 ], "wc_review_avg": [ 288.4, 96.3340023044823 ], "wc_reply_reviewers_avg": [ 56.4, 71.2421223715296 ], "wc_reply_authors_avg": [ 343.4, 570.9660585358818 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 1.469693845669907 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8254080459370900525&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "osu.edu;osu.edu;osu.edu;amazon.com;amazon.com", "author_num": 5, "aff_unique_index": "0;0;0;1;1", "aff_unique_norm": "Ohio State University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.osu.edu;https://www.amazon.com", "aff_unique_abbr": "OSU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "MAViL: Masked Audio-Video Learners", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71797", "id": "OmTMaTbjac", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/40b60852a4abdaa696b5a1a78da34635-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OmTMaTbjac", "openreview": "https://openreview.net/forum?id=OmTMaTbjac", "poster": "/media/PosterPDFs/NeurIPS%202023/71797.png?t=1702394687.102992", "slides": "https://nips.cc/virtual/2023/poster/71797", "video": "https://nips.cc/virtual/2023/poster/71797", "author_site": "Po-Yao Huang, Vasu Sharma, Hu Xu, Chaitanya Ryali, Chaitanya Ryali, haoqi fan, Yanghao Li, Shang-Wen Li, Gargi Ghosh, Jitendra Malik, Christoph Feichtenhofer", "tldr": "", "abstract": "We present Masked Audio-Video Learners (MAViL) to learn audio-visual representations with three complementary forms of self-supervision: (1) reconstructing masked raw audio and video inputs, (2) intra-modal and inter-modal contrastive learning with masking, and (3) self-training to predict aligned and contextualized audio-video representations learned from the first two objectives. Empirically, MAViL achieves state-of-the-art audio-video classification performance on AudioSet (53.3 mAP) and VGGSound (67.1\\% accuracy), surpassing recent self-supervised models and supervised models that utilize external labeled data. Notably, pre-training with MAViL not only enhances performance in multimodal classification and retrieval tasks, but it also improves the representations of each modality in isolation, without relying on information from the other modality during uni-modal fine-tuning or inference. The code and models are available at https://github.com/facebookresearch/MAViL.", "keywords": "self-supervised learning;audio representation learning;audio classification", "primary_area": "", "supplementary_material": "/attachment/849a9ffeee80d81d03bf83450bf85426c787b5a1.pdf", "author": "Po-Yao Huang;Vasu Sharma;Hu Xu;Chaitanya Ryali;Haoqi Fan;Yanghao Li;Shang-Wen Li;Gargi Ghosh;Jitendra Malik;Christoph Feichtenhofer", "authorids": "~Po-Yao_Huang2;~Vasu_Sharma1;~Hu_Xu1;~Chaitanya_Ryali1;~Haoqi_Fan2;~Yanghao_Li1;~Shang-Wen_Li1;~Gargi_Ghosh3;~Jitendra_Malik2;~Christoph_Feichtenhofer4", "gender": "M;M;;M;M;M;F;M;M;M", "homepage": "http://vasusharma.github.io;https://howardhsu.github.io/;;https://haoqifan.github.io/;https://lyttonhao.github.io/;https://swdanielli.github.io/;https://www.linkedin.com/in/gargi-ghosh-5b1087b;https://people.eecs.berkeley.edu/~malik/;http://feichtenhofer.github.io/;https://berniebear.github.io/", "dblp": "165/0762;;231/7672;137/5747;159/3873;35/9232-1.html;;58/2944;127/1937;154/3943-1", "google_scholar": "PLUB4dIAAAAJ;SaH2yWMAAAAJ;;76B8lrgAAAAJ;-VgS8AIAAAAJ;wFI97HUAAAAJ;k5akwCcAAAAJ;oY9R5YQAAAAJ;UxuqG1EAAAAJ;E8K25LIAAAAJ", "orcid": ";;;;;;;0000-0003-3695-1580;;", "linkedin": "vasu-sharma-6b460592?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;;;;;shang-wen-daniel-li-0109b579/;gargi-ghosh-5b1087b;;christoph-feichtenhofer-549433a1;", "or_profile": "~Vasu_Sharma1;~Hu_Xu1;~Chaitanya_Ryali1;~Haoqi_Fan2;~Yanghao_Li1;~Shang-Wen_Li1;~Gargi_Ghosh3;~Jitendra_Malik2;~Christoph_Feichtenhofer4;~Po-Yao_Huang1", "aff": "Meta Facebook;FAIR, AMI Foundation;University of California, San Diego;Facebook AI Research;Meta;Meta Facebook;Meta AI;University of California, Berkeley;Meta FAIR;Meta", "aff_domain": "fb.com;meta.com;ucsd.edu;fb.com;meta.com;fb.com;meta.com;berkeley.edu;meta.com;meta.com", "position": "Researcher;Research Scientist;PhD student;Researcher;Researcher;Research Manager;Researcher;Full Professor;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nhuang2023mavil,\ntitle={{MAV}iL: Masked Audio-Video Learners},\nauthor={Po-Yao Huang and Vasu Sharma and Hu Xu and Chaitanya Ryali and Haoqi Fan and Yanghao Li and Shang-Wen Li and Gargi Ghosh and Jitendra Malik and Christoph Feichtenhofer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OmTMaTbjac}\n}", "github": "", "project": "", "reviewers": "dLj4;z1cD;RTTA;kuAy", "pdf_size": 8375130, "rating": "4;4;7;8", "confidence": "5;5;4;4", "soundness": "3;3;4;4", "novelty": "2;2;3;4", "presentation": "3;3;4;4", "wc_summary": "74;87;61;79", "wc_strengths": "96;62;88;78", "wc_weaknesses": "206;131;101;38", "wc_questions": "258;584;88;33", "wc_limitations": "3;107;39;10", "wc_review": "637;971;377;238", "wc_reply_reviewers": "51;292;49;0", "wc_reply_authors": "163;644;56;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 75.25, 9.443913383762052 ], "wc_strengths_avg": [ 81.0, 12.68857754044952 ], "wc_weaknesses_avg": [ 119.0, 60.41109169680681 ], "wc_questions_avg": [ 240.75, 214.83176557483299 ], "wc_limitations_avg": [ 39.75, 41.105808591973954 ], "wc_review_avg": [ 555.75, 279.2627570944611 ], "wc_reply_reviewers_avg": [ 98.0, 113.852975367357 ], "wc_reply_authors_avg": [ 215.75, 254.09090400878188 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.9801960588196068, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13837764868772904396&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "fb.com;meta.com;ucsd.edu;fb.com;meta.com;fb.com;meta.com;berkeley.edu;meta.com;meta.com", "author_num": 10, "aff_unique_index": "0;1;2;0;0;0;0;3;0;0", "aff_unique_norm": "Meta;FAIR;University of California, San Diego;University of California, Berkeley", "aff_unique_dep": "Meta Platforms, Inc.;AMI Foundation;;", "aff_unique_url": "https://meta.com;https://www.fair.iai.uni-sb.de/;https://www.ucsd.edu;https://www.berkeley.edu", "aff_unique_abbr": "Meta;FAIR;UCSD;UC Berkeley", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";San Diego;Berkeley", "aff_country_unique_index": "0;1;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;France" }, { "title": "Reading Relevant Feature from Global Representation Memory for Visual Object Tracking", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71796", "id": "On0IDMYKw2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2349293cb1bf2ce36d5c566f660f957e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=On0IDMYKw2", "openreview": "https://openreview.net/forum?id=On0IDMYKw2", "poster": "/media/PosterPDFs/NeurIPS%202023/71796.png?t=1699690601.3794389", "slides": "https://nips.cc/virtual/2023/poster/71796", "video": "https://nips.cc/virtual/2023/poster/71796", "author_site": "Xinyu Zhou, Pinxue Guo, Lingyi Hong, Jinglun Li, Wei Zhang, Weifeng Ge, Wenqiang Zhang", "tldr": "", "abstract": "Reference features from a template or historical frames are crucial for visual object tracking. Prior works utilize all features from a fixed template or memory for visual object tracking. However, due to the dynamic nature of videos, the required reference historical information for different search regions at different time steps is also inconsistent. Therefore, using all features in the template and memory can lead to redundancy and impair tracking performance. To alleviate this issue, we propose a novel tracking paradigm, consisting of a relevance attention mechanism and a global representation memory, which can adaptively assist the search region in selecting the most relevant historical information from reference features. Specifically, the proposed relevance attention mechanism in this work differs from previous approaches in that it can dynamically choose and build the optimal global representation memory for the current frame by accessing cross-\nframe information globally. Moreover, it can flexibly read the relevant historical information from the constructed memory to reduce redundancy and counteract the negative effects of harmful information. Extensive experiments validate the effectiveness of the proposed method, achieving competitive performance on five challenging datasets with 71 FPS.", "keywords": "object tracking;global representation memory;transformer", "primary_area": "", "supplementary_material": "", "author": "Xinyu Zhou;Pinxue Guo;Lingyi Hong;Jinglun Li;Wei Zhang;Weifeng Ge;Wenqiang Zhang", "authorids": "~Xinyu_Zhou5;~Pinxue_Guo1;~Lingyi_Hong1;~Jinglun_Li1;~Wei_Zhang8;~Weifeng_Ge2;~Wenqiang_Zhang1", "gender": "M;M;M;M;M;M;M", "homepage": "https://www.researchgate.net/profile/Xinyu-Zhou-21;;https://lingyihongfd.github.io/;http://www.fudanroilab.com/2020/05/01/JinglunLi.html;http://homepage.fudan.edu.cn/weizh/;http://www.weifengge.net/;https://www.fudanroilab.com/2021/07/01/WenqiangZhang.html", "dblp": ";333/7534;311/7466;;;155/3277.html;", "google_scholar": "https://scholar.google.com.hk/citations?user=Zdm-YgkAAAAJ;d_7fUjoAAAAJ;wHh_m_IAAAAJ;;;wFs402oAAAAJ;vL-VEJYAAAAJ", "orcid": ";;;0009-0001-4930-6284;0000-0002-2358-8543;0000-0002-6258-6225;0000-0002-3339-8751", "linkedin": ";;;;;;", "or_profile": "~Xinyu_Zhou5;~Pinxue_Guo1;~Lingyi_Hong1;~Jinglun_Li1;~Wei_Zhang8;~Weifeng_Ge2;~Wenqiang_Zhang1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fdu.edu;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;PhD student;PhD student;PhD student;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2023reading,\ntitle={Reading Relevant Feature from Global Representation Memory for Visual Object Tracking},\nauthor={Xinyu Zhou and Pinxue Guo and Lingyi Hong and Jinglun Li and Wei Zhang and Weifeng Ge and Wenqiang Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=On0IDMYKw2}\n}", "github": "", "project": "", "reviewers": "vhAu;nyT7;UhjK;jJzj", "pdf_size": 1074523, "rating": "4;5;6;7", "confidence": "5;4;4;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "68;68;45;111", "wc_strengths": "46;73;66;46", "wc_weaknesses": "381;148;110;87", "wc_questions": "6;2;61;24", "wc_limitations": "1;2;1;6", "wc_review": "502;293;283;274", "wc_reply_reviewers": "0;73;13;74", "wc_reply_authors": "655;1202;68;308", "reply_reviewers": "0;1;1;2", "reply_authors": "3;4;2;3", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.0, 23.86419912756345 ], "wc_strengths_avg": [ 57.75, 12.007809958522827 ], "wc_weaknesses_avg": [ 181.5, 117.22307793263236 ], "wc_questions_avg": [ 23.25, 23.31710745354149 ], "wc_limitations_avg": [ 2.5, 2.0615528128088303 ], "wc_review_avg": [ 338.0, 94.9236535327207 ], "wc_reply_reviewers_avg": [ 40.0, 33.81567683782183 ], "wc_reply_authors_avg": [ 558.25, 426.24662755264114 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9486832980505139, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12035207664337861672&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "email": "fudan.edu.cn;fdu.edu;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Semantic Image Synthesis with Unconditional Generator", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71795", "id": "OoPLRGBKjM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/683464f40aa1a6b7c939c3e9cd64b1fd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OoPLRGBKjM", "openreview": "https://openreview.net/forum?id=OoPLRGBKjM", "poster": "/media/PosterPDFs/NeurIPS%202023/71795.png?t=1701975860.0117285", "slides": "https://nips.cc/virtual/2023/poster/71795", "video": "https://nips.cc/virtual/2023/poster/71795", "author_site": "JungWoo Chae, Hyunin Cho, Sooyeon Go, Kyungmook Choi, Youngjung Uh", "tldr": "", "abstract": "Semantic image synthesis (SIS) aims to generate realistic images according to semantic masks given by a user. Although recent methods produce high quality results with fine spatial control, SIS requires expensive pixel-level annotation of the training images. On the other hand, manipulating intermediate feature maps in a pretrained unconditional generator such as StyleGAN supports coarse spatial control without heavy annotation. In this paper, we introduce a new approach, for reflecting user's detailed guiding masks on a pretrained unconditional generator. \bOur method converts a user's guiding mask to a proxy mask through a semantic mapper. Then the proxy mask conditions the resulting image through a rearranging network based on cross-attention mechanism. The proxy mask is simple clustering of intermediate feature maps in the generator. The semantic mapper and the rearranging network are easy to train (less than half an hour). Our method is useful for many tasks: semantic image synthesis, spatially editing real images, and unaligned local transplantation. Last but not least, it is generally applicable to various datasets such as human faces, animal faces, and churches.", "keywords": "Generative model", "primary_area": "", "supplementary_material": "/attachment/be6f354697f77ed0286e77c5f90e622cb31bd588.pdf", "author": "JungWoo Chae;Hyunin Cho;Sooyeon Go;Kyungmook Choi;Youngjung Uh", "authorids": "~JungWoo_Chae2;~Hyunin_Cho1;~Sooyeon_Go1;~Kyungmook_Choi1;~Youngjung_Uh2", "gender": "M;;F;M;", "homepage": "https://github.com/JungWoo-Chae;https://github.com/hhyunn2;;https://chkmook.github.io;https://vilab.yonsei.ac.kr/member/professor", "dblp": ";369/7100;;;57/10511", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;aMWDMqsAAAAJ;https://scholar.google.com/citations?view_op=list_works;fnxwuWAAAAAJ;BWBGrEEAAAAJ", "orcid": ";;;;", "linkedin": ";;;;youngjung-uh-78b459b5/", "or_profile": "~JungWoo_Chae2;~Hyunin_Cho1;~Sooyeon_Go1;~Kyungmook_Choi1;~Youngjung_Uh2", "aff": "Yonsei University;Yonsei University;Yonsei University;Yonsei University;Yonsei University", "aff_domain": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr", "position": "MS student;PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nchae2023semantic,\ntitle={Semantic Image Synthesis with Unconditional Generator},\nauthor={JungWoo Chae and Hyunin Cho and Sooyeon Go and Kyungmook Choi and Youngjung Uh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OoPLRGBKjM}\n}", "github": "", "project": "", "reviewers": "pArX;4iJ7;iJE3;Lq4s;swNj", "pdf_size": 29278379, "rating": "4;5;5;5;6", "confidence": "4;3;5;5;5", "soundness": "3;3;3;3;4", "novelty": "3;3;2;3;3", "presentation": "3;2;3;2;4", "wc_summary": "42;55;60;110;106", "wc_strengths": "23;67;26;71;208", "wc_weaknesses": "157;41;320;97;169", "wc_questions": "10;42;7;91;6", "wc_limitations": "2;9;5;35;29", "wc_review": "234;214;418;404;518", "wc_reply_reviewers": "131;15;10;0;42", "wc_reply_authors": "250;29;20;57;20", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.0, 0.6324555320336759 ], "confidence_avg": [ 4.4, 0.7999999999999999 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 74.6, 27.9256154811313 ], "wc_strengths_avg": [ 79.0, 67.5188862467384 ], "wc_weaknesses_avg": [ 156.8, 93.56153055609981 ], "wc_questions_avg": [ 31.2, 32.74996183203883 ], "wc_limitations_avg": [ 16.0, 13.386560424545209 ], "wc_review_avg": [ 357.6, 116.1268272192089 ], "wc_reply_reviewers_avg": [ 39.6, 47.768608939344254 ], "wc_reply_authors_avg": [ 75.2, 88.44749855140054 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3952847075210474, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2492895322324306289&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Yonsei University", "aff_unique_dep": "", "aff_unique_url": "https://www.yonsei.ac.kr", "aff_unique_abbr": "Yonsei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Modulated Neural ODEs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71794", "id": "Op9z2QfXbC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8bc74514d554a90c996576f6c373f5f3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Op9z2QfXbC", "openreview": "https://openreview.net/forum?id=Op9z2QfXbC", "poster": "/media/PosterPDFs/NeurIPS%202023/71794.png?t=1701422230.20261", "slides": "https://nips.cc/virtual/2023/poster/71794", "video": "https://nips.cc/virtual/2023/poster/71794", "author_site": "Ilze Amanda Auzina, \u00c7a\u011fatay Y\u0131ld\u0131z, Sara Magliacane, Matthias Bethge, Efstratios Gavves", "tldr": "", "abstract": "Neural ordinary differential equations (NODEs) have been proven useful for learning non-linear dynamics of arbitrary trajectories. However, current NODE methods capture variations across trajectories only via the initial state value or by auto-regressive encoder updates. In this work, we introduce Modulated Neural ODEs (MoNODEs), a novel framework that sets apart dynamics states from underlying static factors of variation and improves the existing NODE methods. In particular, we introduce *time-invariant modulator variables* that are learned from the data. We incorporate our proposed framework into four existing NODE variants. We test MoNODE on oscillating systems, videos and human walking trajectories, where each trajectory has trajectory-specific modulation. Our framework consistently improves the existing model ability to generalize to new dynamic parameterizations and to perform far-horizon forecasting. In addition, we verify that the proposed modulator variables are informative of the true unknown factors of variation as measured by $R^2$ scores.", "keywords": "Neural ODEs;Modulator Variables;Dynamical Systems;Disentanglment", "primary_area": "", "supplementary_material": "", "author": "Ilze Amanda Auzina;Cagatay Yildiz;Sara Magliacane;Matthias Bethge;Efstratios Gavves", "authorids": "~Ilze_Amanda_Auzina1;~Cagatay_Yildiz1;~Sara_Magliacane1;~Matthias_Bethge1;~Efstratios_Gavves1", "gender": "F;M;F;M;M", "homepage": "https://ilzeamandaa.github.io/;http://cagatayyildiz.github.io/;http://saramagliacane.github.io;https://bethgelab.org;https://www.egavves.com", "dblp": "277/1377.html;202/7085;120/5256;77/3005;03/8693", "google_scholar": "AJIXYb0AAAAJ;dNloPBUAAAAJ;https://scholar.google.nl/citations?user=H3j_zQ4AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.nl/citations?user=QqfCvsgAAAAJ", "orcid": ";0000-0002-7808-502X;;;", "linkedin": "ilze-amanda-auzina;;magliacane/;;", "or_profile": "~Ilze_Amanda_Auzina1;~Cagatay_Yildiz1;~Sara_Magliacane1;~Matthias_Bethge1;~Efstratios_Gavves1", "aff": "University of Amsterdam, University of Amsterdam;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;MIT-IBM Watson AI Lab;University of Tuebingen;University of Amsterdam", "aff_domain": "ivi.uva.nl;uni-tuebingen.de;mit.edu;uni-tuebingen.de;uva.nl", "position": "PhD student;Postdoc;Research Scientist;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nauzina2023modulated,\ntitle={Modulated Neural {ODE}s},\nauthor={Ilze Amanda Auzina and Cagatay Yildiz and Sara Magliacane and Matthias Bethge and Efstratios Gavves},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Op9z2QfXbC}\n}", "github": "", "project": "", "reviewers": "Y7gY;dvMp;Znd9;m2DA;VxYT;8J7y", "pdf_size": 7791466, "rating": "4;4;6;6;6;6", "confidence": "5;4;3;3;3;5", "soundness": "3;2;3;3;3;3", "novelty": "2;1;3;3;2;2", "presentation": "2;4;3;3;3;3", "wc_summary": "66;63;198;101;45;76", "wc_strengths": "39;22;67;46;16;68", "wc_weaknesses": "122;68;127;1;57;31", "wc_questions": "145;21;188;26;2;87", "wc_limitations": "7;1;48;19;34;4", "wc_review": "379;175;628;193;154;266", "wc_reply_reviewers": "22;31;189;11;11;15", "wc_reply_authors": "57;80;0;0;0;0", "reply_reviewers": "1;2;1;1;1;1", "reply_authors": "2;2;1;1;1;1", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.8333333333333335, 0.8975274678557507 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.1666666666666665, 0.6871842709362768 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 91.5, 50.493398908504204 ], "wc_strengths_avg": [ 43.0, 19.983326383095815 ], "wc_weaknesses_avg": [ 67.66666666666667, 45.41536695387978 ], "wc_questions_avg": [ 78.16666666666667, 68.79781165770383 ], "wc_limitations_avg": [ 18.833333333333332, 17.121299275723466 ], "wc_review_avg": [ 299.1666666666667, 165.00850146448684 ], "wc_reply_reviewers_avg": [ 46.5, 64.10863176411323 ], "wc_reply_authors_avg": [ 22.833333333333332, 32.96673407475414 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.3726779962499649 ], "reply_authors_avg": [ 1.3333333333333333, 0.47140452079103173 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5252257314388902, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6898899188363550217&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ivi.uva.nl;uni-tuebingen.de;mit.edu;uni-tuebingen.de;uva.nl", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of Amsterdam;Eberhard Karls University of T\u00fcbingen;Massachusetts Institute of Technology;University of Tuebingen", "aff_unique_dep": ";;IBM Watson AI Lab;", "aff_unique_url": "https://www.uva.nl;https://www.uni-tuebingen.de/;https://www.mitibmwatsonailab.org;https://www.uni-tuebingen.de/", "aff_unique_abbr": "UvA;Uni T\u00fcbingen;MIT-IBM AI Lab;Uni T\u00fcbingen", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;1;2;1;0", "aff_country_unique": "Netherlands;Germany;United States" }, { "title": "Weakly Supervised 3D Open-vocabulary Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71793", "id": "Orp1K2dZvY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a76b693f36916a5ed84d6e5b39a0dc03-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Orp1K2dZvY", "openreview": "https://openreview.net/forum?id=Orp1K2dZvY", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71793", "video": "https://nips.cc/virtual/2023/poster/71793", "author_site": "Kunhao Liu, Fangneng Zhan, Jiahui Zhang, MUYU XU, Yingchen Yu, Abdulmotaleb El Saddik, Christian Theobalt, Eric Xing, Shijian Lu", "tldr": "", "abstract": "Open-vocabulary segmentation of 3D scenes is a fundamental function of human perception and thus a crucial objective in computer vision research. However, this task is heavily impeded by the lack of large-scale and diverse 3D open-vocabulary segmentation datasets for training robust and generalizable models. Distilling knowledge from pre-trained 2D open-vocabulary segmentation models helps but it compromises the open-vocabulary feature as the 2D models are mostly finetuned with close-vocabulary datasets. We tackle the challenges in 3D open-vocabulary segmentation by exploiting pre-trained foundation models CLIP and DINO in a weakly supervised manner. Specifically, given only the open-vocabulary text descriptions of the objects in a scene, we distill the open-vocabulary multimodal knowledge and object reasoning capability of CLIP and DINO into a neural radiance field (NeRF), which effectively lifts 2D features into view-consistent 3D segmentation. A notable aspect of our approach is that it does not require any manual segmentation annotations for either the foundation models or the distillation process. Extensive experiments show that our method even outperforms fully supervised models trained with segmentation annotations in certain scenes, suggesting that 3D open-vocabulary segmentation can be effectively learned from 2D images and text-image pairs. Code is available at https://github.com/Kunhao-Liu/3D-OVS.", "keywords": "3D;open-vocabulary segmentation;neural radiance field", "primary_area": "", "supplementary_material": "/attachment/45b7e3e5fc27e08c24162f82bdd1327621513913.pdf", "author": "Kunhao Liu;Fangneng Zhan;Jiahui Zhang;MUYU XU;Yingchen Yu;Abdulmotaleb El Saddik;Christian Theobalt;Eric Xing;Shijian Lu", "authorids": "~Kunhao_Liu1;~Fangneng_Zhan1;~Jiahui_Zhang4;~MUYU_XU1;~Yingchen_Yu1;~Abdulmotaleb_El_Saddik1;~Christian_Theobalt2;~Eric_Xing1;~Shijian_Lu1", "gender": "M;M;M;Not Specified;M;;M;M;M", "homepage": "https://kunhao-liu.github.io/;https://fnzhan.com/;https://github.com/jhzhang99;;https://yingchen001.github.io/;;https://www.mpi-inf.mpg.de/~theobalt/;http://www.cs.cmu.edu/~epxing/;https://personal.ntu.edu.sg/shijian.lu/", "dblp": "342/9226;223/4246;;;281/8186;;55/3346;36/3855;42/2718", "google_scholar": "fAc8WqwAAAAJ;https://scholar.google.com.sg/citations?user=8zbcfzAAAAAJ;DXpYbWkAAAAJ;;0cet0X8AAAAJ;;https://scholar.google.com.tw/citations?user=eIWg8NMAAAAJ;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ;https://scholar.google.com.sg/scholar?hl=en", "orcid": "0000-0003-1757-0547;0000-0003-1502-6847;;;0000-0002-7893-0764;;;;", "linkedin": ";;;muyu-xu-427525268;;;;;", "or_profile": "~Kunhao_Liu1;~Fangneng_Zhan1;~Jiahui_Zhang4;~MUYU_XU1;~Yingchen_Yu1;~Abdulmotaleb_El_Saddik1;~Christian_Theobalt2;~Eric_Xing1;~Shijian_Lu1", "aff": "Nanyang Technological University;Max Planck Institute for Informatics;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;;Max-Planck-Institute for Informatics, Saarland Informatics Campus;School of Computer Science, Carnegie Mellon University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;mpi-inf.mpg.de;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;;mpi-inf.mpg.de;cs.cmu.edu;ntu.edu.sg", "position": "PhD student;Postdoc;PhD student;PhD student;PhD student;;Director;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2023weakly,\ntitle={Weakly Supervised 3D Open-vocabulary Segmentation},\nauthor={Kunhao Liu and Fangneng Zhan and Jiahui Zhang and MUYU XU and Yingchen Yu and Abdulmotaleb El Saddik and Christian Theobalt and Eric Xing and Shijian Lu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Orp1K2dZvY}\n}", "github": "", "project": "", "reviewers": "kCD4;t9cD;1C3c;L8we;1xbk", "pdf_size": 11882184, "rating": "2;4;5;6;6", "confidence": "4;4;4;4;3", "soundness": "1;2;4;3;2", "novelty": "2;2;3;3;2", "presentation": "3;3;3;3;3", "wc_summary": "116;49;62;87;46", "wc_strengths": "38;58;60;102;16", "wc_weaknesses": "461;119;266;587;96", "wc_questions": "341;2;44;98;36", "wc_limitations": "33;20;12;78;14", "wc_review": "989;248;444;952;208", "wc_reply_reviewers": "697;41;248;333;16", "wc_reply_authors": "841;117;0;401;8", "reply_reviewers": "2;1;1;2;1", "reply_authors": "4;2;1;4;2", "rating_avg": [ 4.6, 1.4966629547095764 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.4, 1.019803902718557 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.0, 26.328691574022436 ], "wc_strengths_avg": [ 54.8, 28.47033543883879 ], "wc_weaknesses_avg": [ 305.8, 191.65322851441871 ], "wc_questions_avg": [ 104.2, 122.33789273973947 ], "wc_limitations_avg": [ 31.4, 24.42621542523524 ], "wc_review_avg": [ 568.2, 338.2522135921656 ], "wc_reply_reviewers_avg": [ 267.0, 246.4524294869093 ], "wc_reply_authors_avg": [ 273.4, 318.77804190376725 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.6, 1.2000000000000002 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.4677071733467426, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1777555363195906986&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ntu.edu.sg;mpi-inf.mpg.de;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;;mpi-inf.mpg.de;cs.cmu.edu;ntu.edu.sg", "author_num": 9, "aff_unique_index": "0;1;0;0;0;2;3;0", "aff_unique_norm": "Nanyang Technological University;Max Planck Institute for Informatics;Max-Planck-Institute for Informatics;Carnegie Mellon University", "aff_unique_dep": ";;;School of Computer Science", "aff_unique_url": "https://www.ntu.edu.sg;https://mpi-inf.mpg.de;https://mpi-inf.mpg.de;https://www.cmu.edu", "aff_unique_abbr": "NTU;MPII;MPII;CMU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Saarland;Pittsburgh", "aff_country_unique_index": "0;1;0;0;0;1;2;0", "aff_country_unique": "Singapore;Germany;United States" }, { "title": "Learning to Augment Distributions for Out-of-distribution Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71792", "id": "OtU6VvXJue", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e812af67a942c21dd0104bd929f99da1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OtU6VvXJue", "openreview": "https://openreview.net/forum?id=OtU6VvXJue", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71792", "video": "https://nips.cc/virtual/2023/poster/71792", "author_site": "Qizhou Wang, Zhen Fang, Yonggang Zhang, Feng Liu, Yixuan Li, Bo Han", "tldr": "", "abstract": "Open-world classification systems should discern out-of-distribution (OOD) data whose labels deviate from those of in-distribution (ID) cases, motivating recent studies in OOD detection. Advanced works, despite their promising progress, may still fail in the open world, owing to the lacking knowledge about unseen OOD data in advance. Although one can access auxiliary OOD data (distinct from unseen ones) for model training, it remains to analyze how such auxiliary data will work in the open world. To this end, we delve into such a problem from a learning theory perspective, finding that the distribution discrepancy between the auxiliary and the unseen real OOD data is the key to affect the open-world detection performance. Accordingly, we propose Distributional-Augmented OOD Learning (DAOL), alleviating the OOD distribution discrepancy by crafting an OOD distribution set that contains all distributions in a Wasserstein ball centered on the auxiliary OOD distribution. We justify that the predictor trained over the worst OOD data in the ball can shrink the OOD distribution discrepancy, thus improving the open-world detection performance given only the auxiliary OOD data. We conduct extensive evaluations across representative OOD detection setups, demonstrating the superiority of our DAOL over its advanced counterparts.", "keywords": "OOD Detection", "primary_area": "", "supplementary_material": "/attachment/a560b861e45d43f58d6f8a9363fefa7a9b20d9c5.pdf", "author": "Qizhou Wang;Zhen Fang;Yonggang Zhang;Feng Liu;Yixuan Li;Bo Han", "authorids": "~Qizhou_Wang1;~Zhen_Fang2;~Yonggang_Zhang1;~Feng_Liu2;~Yixuan_Li1;~Bo_Han1", "gender": ";M;M;M;F;", "homepage": ";https://fang-zhen.github.io/index.html;https://yonggangzhangben.github.io/index.html;https://fengliu90.github.io/index.html;http://pages.cs.wisc.edu/~sharonli/;", "dblp": ";;27/6859-3;77/1318-3;144/6087-1;", "google_scholar": ";OzD6WJcAAAAJ;XSbEr98AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;", "orcid": ";0000-0003-0602-6255;0000-0002-4080-7592;0000-0002-5005-9129;;", "linkedin": ";;;alexfengliu;liyixuan;", "or_profile": "~Qizhou_Wang1;~Zhen_Fang2;~Yonggang_Zhang1;~Feng_Liu2;~Yixuan_Li1;~Bo_Han1", "aff": ";University of Technology Sydney;Hong Kong Baptist University;University of Melbourne;Cornell University;", "aff_domain": ";uts.edu.au;hkbu.edu.hk;unimelb.edu.au;cornell.edu;", "position": ";Postdoc;Postdoc;Assistant Professor;Graduate Student;", "bibtex": "@inproceedings{\nwang2023learning,\ntitle={Learning to Augment Distributions for Out-of-distribution Detection},\nauthor={Qizhou Wang and Zhen Fang and Yonggang Zhang and Feng Liu and Yixuan Li and Bo Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OtU6VvXJue}\n}", "github": "", "project": "", "reviewers": "xghg;3p7h;UEQu;REz1", "pdf_size": 871692, "rating": "6;7;7;7", "confidence": "3;3;4;2", "soundness": "3;4;3;3", "novelty": "3;4;3;3", "presentation": "3;4;3;3", "wc_summary": "42;72;72;83", "wc_strengths": "90;80;112;105", "wc_weaknesses": "72;27;167;47", "wc_questions": "6;72;3;130", "wc_limitations": "1;35;1;19", "wc_review": "211;286;355;384", "wc_reply_reviewers": "0;14;0;22", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 67.25, 15.2540978100968 ], "wc_strengths_avg": [ 96.75, 12.517487767120047 ], "wc_weaknesses_avg": [ 78.25, 53.6627198341642 ], "wc_questions_avg": [ 52.75, 52.437462753264484 ], "wc_limitations_avg": [ 14.0, 14.177446878757825 ], "wc_review_avg": [ 309.0, 66.84683986547158 ], "wc_reply_reviewers_avg": [ 9.0, 9.433981132056603 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8937627517499700660&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";uts.edu.au;hkbu.edu.hk;unimelb.edu.au;cornell.edu;", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Technology Sydney;Hong Kong Baptist University;University of Melbourne;Cornell University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uts.edu.au;https://www.hkbu.edu.hk;https://www.unimelb.edu.au;https://www.cornell.edu", "aff_unique_abbr": "UTS;HKBU;UniMelb;Cornell", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "Australia;China;United States" }, { "title": "Learning Multi-agent Behaviors from Distributed and Streaming Demonstrations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71791", "id": "Ou1VRZ4j4y", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a7affe50ab177b9a7f0a05f07a9ca205-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ou1VRZ4j4y", "openreview": "https://openreview.net/forum?id=Ou1VRZ4j4y", "poster": "/media/PosterPDFs/NeurIPS%202023/71791.png?t=1698084372.8528981", "slides": "https://nips.cc/virtual/2023/poster/71791", "video": "https://nips.cc/virtual/2023/poster/71791", "author_site": "Shicheng Liu, Minghui Zhu", "tldr": "", "abstract": "This paper considers the problem of inferring the behaviors of multiple interacting experts by estimating their reward functions and constraints where the distributed demonstrated trajectories are sequentially revealed to a group of learners. We formulate the problem as a distributed online bi-level optimization problem where the outer-level problem is to estimate the reward functions and the inner-level problem is to learn the constraints and corresponding policies. We propose a novel ``multi-agent behavior inference from distributed and streaming demonstrations\" (MA-BIRDS) algorithm that allows the learners to solve the outer-level and inner-level problems in a single loop through intermittent communications. We formally guarantee that the distributed learners achieve consensus on reward functions, constraints, and policies, the average local regret (over $N$ online iterations) decreases at the rate of $O(1/N^{1-\\eta_1}+1/N^{1-\\eta_2}+1/N)$, and the cumulative constraint violation increases sub-linearly at the rate of $O(N^{\\eta_2}+1)$ where $\\eta_1,\\eta_2\\in (1/2,1)$.", "keywords": "inverse reinforcement learning; distributed online bi-level optimization", "primary_area": "", "supplementary_material": "/attachment/73a66f15a07211148cca03cbb3d006bf3c53db04.zip", "author": "Shicheng Liu;Minghui Zhu", "authorids": "~Shicheng_Liu1;~Minghui_Zhu1", "gender": ";", "homepage": ";", "dblp": "231/6618;", "google_scholar": ";", "orcid": ";", "linkedin": "shicheng-liu-404a21239/;", "or_profile": "~Shicheng_Liu1;~Minghui_Zhu1", "aff": "Pennsylvania State University;", "aff_domain": "psu.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nliu2023learning,\ntitle={Learning Multi-agent Behaviors from Distributed and Streaming Demonstrations},\nauthor={Shicheng Liu and Minghui Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ou1VRZ4j4y}\n}", "github": "", "project": "", "reviewers": "QLML;grYB;9ZLE;UrMu", "pdf_size": 650447, "rating": "5;5;7;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;2", "wc_summary": "112;74;180;124", "wc_strengths": "66;18;57;35", "wc_weaknesses": "118;17;58;122", "wc_questions": "1;190;53;47", "wc_limitations": "1;22;37;2", "wc_review": "298;321;385;330", "wc_reply_reviewers": "13;0;172;0", "wc_reply_authors": "0;0;747;0", "reply_reviewers": "1;0;3;0", "reply_authors": "1;1;3;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 122.5, 37.9835490706174 ], "wc_strengths_avg": [ 44.0, 18.774983355518586 ], "wc_weaknesses_avg": [ 78.75, 43.74571407578119 ], "wc_questions_avg": [ 72.75, 70.62002194845311 ], "wc_limitations_avg": [ 15.5, 14.974979131871937 ], "wc_review_avg": [ 333.5, 31.941352507368876 ], "wc_reply_reviewers_avg": [ 46.25, 72.79551840601178 ], "wc_reply_authors_avg": [ 186.75, 323.4604883134878 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12712981484838269333&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "psu.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Importance-aware Co-teaching for Offline Model-based Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71790", "id": "OvPnc5kVsb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ae8b0b5838ba510daff1198474e7b984-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OvPnc5kVsb", "openreview": "https://openreview.net/forum?id=OvPnc5kVsb", "poster": "/media/PosterPDFs/NeurIPS%202023/71790.png?t=1698982895.2218013", "slides": "https://nips.cc/virtual/2023/poster/71790", "video": "https://nips.cc/virtual/2023/poster/71790", "author_site": "Ye Yuan, Can (Sam) Chen, Zixuan Liu, Willie Neiswanger, Xue (Steve) Liu", "tldr": "", "abstract": "Offline model-based optimization aims to find a design that maximizes a property of interest using only an offline dataset, with applications in robot, protein, and molecule design, among others. A prevalent approach is gradient ascent, where a proxy model is trained on the offline dataset and then used to optimize the design. This method suffers from an out-of-distribution issue, where the proxy is not accurate for unseen designs. To mitigate this issue, we explore using a pseudo-labeler to generate valuable data for fine-tuning the proxy. \nSpecifically, we propose $\\textit{\\textbf{I}mportance-aware \\textbf{C}o-\\textbf{T}eaching for Offline Model-based Optimization}~(\\textbf{ICT})$. This method maintains three symmetric proxies with their mean ensemble as the final proxy, and comprises two steps. The first step is $\\textit{pseudo-label-driven co-teaching}$. In this step, one proxy is iteratively selected as the pseudo-labeler for designs near the current optimization point, generating pseudo-labeled data. Subsequently, a co-teaching process identifies small-loss samples as valuable data and exchanges them between the other two proxies for fine-tuning, promoting knowledge transfer. This procedure is repeated three times, with a different proxy chosen as the pseudo-labeler each time, ultimately enhancing the ensemble performance.\nTo further improve accuracy of pseudo-labels, we perform a secondary step of $\\textit{meta-learning-based sample reweighting}$,\nwhich assigns importance weights to samples in the pseudo-labeled dataset and updates them via meta-learning. ICT achieves state-of-the-art results across multiple design-bench tasks, achieving the best mean rank $3.1$ and median rank $2$ among $15$ methods.\nOur source code can be accessed here.", "keywords": "offline model-based optimization;co-teaching;meta-learning;sample reweighting", "primary_area": "", "supplementary_material": "/attachment/9f0ce0b071f376d7e31b978cb8a04d300ec0e6d6.pdf", "author": "Ye Yuan;Can Chen;Zixuan Liu;Willie Neiswanger;Xue Liu", "authorids": "~Ye_Yuan17;~Can_Chen3;~Zixuan_Liu1;~Willie_Neiswanger2;~Xue_Liu1", "gender": "M;Not Specified;M;M;M", "homepage": "https://github.com/StevenYuan666;;;https://willieneis.github.io/;http://www.cs.mcgill.ca/~xueliu/", "dblp": ";370/4546.html;;120/7593.html;l/XueLiu", "google_scholar": ";;yjKOHbEAAAAJ;QwKHApEAAAAJ;https://scholar.google.com.tw/citations?user=rfLIRakAAAAJ", "orcid": "0009-0001-3288-247X;;;;", "linkedin": ";can-chen-018851202/;;;", "or_profile": "~Ye_Yuan17;~Can_Chen3;~Zixuan_Liu1;~Willie_Neiswanger2;~Xue_Liu1", "aff": "Mila - Quebec Artificial Intelligence Institute;Mila - Quebec AI Institute;University of Washington;Stanford University;McGill University", "aff_domain": "mila.quebec;mila.quebec;uw.edu;stanford.edu;mcgill.ca", "position": "PhD student;PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nyuan2023importanceaware,\ntitle={Importance-aware Co-teaching for Offline Model-based Optimization},\nauthor={Ye Yuan and Can Chen and Zixuan Liu and Willie Neiswanger and Xue Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OvPnc5kVsb}\n}", "github": "", "project": "", "reviewers": "P8Ru;VPeC;j29o;DRKm", "pdf_size": 3644171, "rating": "5;6;6;7", "confidence": "3;3;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "167;62;76;91", "wc_strengths": "29;48;57;83", "wc_weaknesses": "196;129;111;215", "wc_questions": "4;17;160;179", "wc_limitations": "1;1;31;11", "wc_review": "397;257;435;579", "wc_reply_reviewers": "57;59;36;15", "wc_reply_authors": "33;188;38;30", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.0, 40.577087130546964 ], "wc_strengths_avg": [ 54.25, 19.434183800715687 ], "wc_weaknesses_avg": [ 162.75, 43.739998856881556 ], "wc_questions_avg": [ 90.0, 79.9155804583812 ], "wc_limitations_avg": [ 11.0, 12.24744871391589 ], "wc_review_avg": [ 417.0, 114.63856244737195 ], "wc_reply_reviewers_avg": [ 41.75, 17.879807045938723 ], "wc_reply_authors_avg": [ 72.25, 66.88936761548878 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1681007967493437131&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mila.quebec;mila.quebec;uw.edu;stanford.edu;mcgill.ca", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Quebec Artificial Intelligence Institute;Quebec AI Institute;University of Washington;Stanford University;McGill University", "aff_unique_dep": "Artificial Intelligence;AI Institute;;;", "aff_unique_url": "https://mila.quebec;https://mila.quebec;https://www.washington.edu;https://www.stanford.edu;https://www.mcgill.ca", "aff_unique_abbr": "Mila;Mila;UW;Stanford;McGill", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "Canada;United States" }, { "title": "Towards Unbounded Machine Unlearning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71789", "id": "OveBaTtUAT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/062d711fb777322e2152435459e6e9d9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OveBaTtUAT", "openreview": "https://openreview.net/forum?id=OveBaTtUAT", "poster": "/media/PosterPDFs/NeurIPS%202023/71789.png?t=1701773952.690563", "slides": "https://nips.cc/virtual/2023/poster/71789", "video": "https://nips.cc/virtual/2023/poster/71789", "author_site": "Meghdad Kurmanji, Peter Triantafillou, Jamie Hayes, Eleni Triantafillou", "tldr": "", "abstract": "Deep machine unlearning is the problem of 'removing' from a trained neural network a subset of its training set. This problem is very timely and has many applications, including the key tasks of removing biases (RB), resolving confusion (RC) (caused by mislabelled data in trained models), as well as allowing users to exercise their 'right to be forgotten' to protect User Privacy (UP). This paper is the first, to our knowledge, to study unlearning for different applications (RB, RC, UP), with the view that each has its own desiderata, definitions for 'forgetting' and associated metrics for forget quality. For UP, we propose a novel adaptation of a strong Membership Inference Attack for unlearning. We also propose SCRUB, a novel unlearning algorithm, which is the only method that is consistently a top performer for forget quality across the different application-dependent metrics for RB, RC, and UP. At the same time, SCRUB is also consistently a top performer on metrics that measure model utility (i.e. accuracy on retained data and generalization), and is more efficient than previous work. The above are substantiated through a comprehensive empirical evaluation against previous state-of-the-art.", "keywords": "machine unlearning;deep learning", "primary_area": "", "supplementary_material": "/attachment/ad6abb7807c176a4ee8e0a7d68120cb6d998ea8d.zip", "author": "Meghdad Kurmanji;Peter Triantafillou;Jamie Hayes;Eleni Triantafillou", "authorids": "~Meghdad_Kurmanji1;~Peter_Triantafillou1;~Jamie_Hayes2;~Eleni_Triantafillou1", "gender": "M;;M;F", "homepage": ";https://warwick.ac.uk/fac/sci/dcs/people/peter_triantafillou/;;http://www.cs.toronto.edu/~eleni/", "dblp": ";t/PeterTriantafillou;;183/8430", "google_scholar": "7t9HbecAAAAJ;;https://scholar.google.com/citations?hl=en;Y5x2ZgQAAAAJ", "orcid": "0009-0007-5089-7759;;;", "linkedin": "meghdad-kurmanji-948380113/;;;", "or_profile": "~Meghdad_Kurmanji1;~Peter_Triantafillou1;~Jamie_Hayes2;~Eleni_Triantafillou1", "aff": "University of Warwick;University of Warwick;Google DeepMind;Google", "aff_domain": "warwick.ac.uk;warwick.ac.uk;google.com;google.com", "position": "PhD student;Full Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nkurmanji2023towards,\ntitle={Towards Unbounded Machine Unlearning},\nauthor={Meghdad Kurmanji and Peter Triantafillou and Jamie Hayes and Eleni Triantafillou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OveBaTtUAT}\n}", "github": "", "project": "", "reviewers": "YAUt;LTMH;Xo5D;QAcd", "pdf_size": 1474472, "rating": "5;5;6;8", "confidence": "4;3;5;3", "soundness": "3;3;3;3", "novelty": "2;3;3;4", "presentation": "3;1;2;4", "wc_summary": "49;115;90;48", "wc_strengths": "244;59;66;48", "wc_weaknesses": "350;364;736;45", "wc_questions": "109;1;184;30", "wc_limitations": "97;2;107;1", "wc_review": "849;541;1183;172", "wc_reply_reviewers": "61;36;178;0", "wc_reply_authors": "70;0;547;0", "reply_reviewers": "1;1;2;0", "reply_authors": "2;1;3;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 75.5, 28.412145290350743 ], "wc_strengths_avg": [ 104.25, 80.93940634820594 ], "wc_weaknesses_avg": [ 373.75, 244.92894377757807 ], "wc_questions_avg": [ 81.0, 71.40378141247143 ], "wc_limitations_avg": [ 51.75, 50.37546525839737 ], "wc_review_avg": [ 686.25, 373.76421377654657 ], "wc_reply_reviewers_avg": [ 68.75, 66.69848199172152 ], "wc_reply_authors_avg": [ 154.25, 228.5479982410697 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.24618298195866545, "gs_citation": 201, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=441748673262234548&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "email": "warwick.ac.uk;warwick.ac.uk;google.com;google.com", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of Warwick;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.warwick.ac.uk;https://deepmind.com", "aff_unique_abbr": "Warwick;DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Jigsaw: Learning to Assemble Multiple Fractured Objects", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71788", "id": "OwpaO4w6K7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/30ae2af8612ac74357363e8ae877d80c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OwpaO4w6K7", "openreview": "https://openreview.net/forum?id=OwpaO4w6K7", "poster": "/media/PosterPDFs/NeurIPS%202023/71788.png?t=1701424422.137735", "slides": "https://nips.cc/virtual/2023/poster/71788", "video": "https://nips.cc/virtual/2023/poster/71788", "author_site": "Jiaxin Lu, Yifan Sun, Qixing Huang", "tldr": "", "abstract": "Automated assembly of 3D fractures is essential in orthopedics, archaeology, and our daily life. This paper presents Jigsaw, a novel framework for assembling physically broken 3D objects from multiple pieces. Our approach leverages hierarchical features of global and local geometry to match and align the fracture surfaces. Our framework consists of four components: (1) front-end point feature extractor with attention layers, (2) surface segmentation to separate fracture and original parts, (3) multi-parts matching to find correspondences among fracture surface points, and (4) robust global alignment to recover the global poses of the pieces. We show how to jointly learn segmentation and matching and seamlessly integrate feature matching and rigidity constraints. We evaluate Jigsaw on the Breaking Bad dataset and achieve superior performance compared to state-of-the-art methods. Our method also generalizes well to diverse fracture modes, objects, and unseen instances. To the best of our knowledge, this is the first learning-based method designed specifically for 3D fracture assembly over multiple pieces. Our code is available at https://jiaxin-lu.github.io/Jigsaw/.", "keywords": "Shape Matching; Reassembly; Shape Segmentation;", "primary_area": "", "supplementary_material": "/attachment/1138dc0ea7eafe489b3042c001b5c6297fd0c952.pdf", "author": "Jiaxin Lu;Yifan Sun;Qixing Huang", "authorids": "~Jiaxin_Lu1;~Yifan_Sun4;~Qixing_Huang1", "gender": "F;M;M", "homepage": "https://jiaxin-lu.github.io/;https://yifansun12.wixsite.com/mysite;https://www.cs.utexas.edu/~huangqx/", "dblp": ";99/10261-7;82/241", "google_scholar": "VWTpWhEAAAAJ;;https://scholar.google.com.tw/citations?user=pamL_rIAAAAJ", "orcid": "0009-0004-4485-9615;;", "linkedin": "jiaxin-lu-9a422127a/;;", "or_profile": "~Jiaxin_Lu1;~Yifan_Sun4;~Qixing_Huang1", "aff": "University of Texas at Austin;University of Texas, Austin;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nlu2023jigsaw,\ntitle={Jigsaw: Learning to Assemble Multiple Fractured Objects},\nauthor={Jiaxin Lu and Yifan Sun and Qixing Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OwpaO4w6K7}\n}", "github": "", "project": "", "reviewers": "jmgU;Pfa7;qeDQ;LoVE;nZZN", "pdf_size": 3471841, "rating": "4;5;6;6;7", "confidence": "3;5;3;4;5", "soundness": "2;4;3;3;3", "novelty": "2;4;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "47;96;93;32;91", "wc_strengths": "9;62;93;101;71", "wc_weaknesses": "355;266;146;58;71", "wc_questions": "86;31;82;20;350", "wc_limitations": "8;10;17;16;4", "wc_review": "505;465;431;227;587", "wc_reply_reviewers": "442;297;154;0;32", "wc_reply_authors": "954;218;232;0;0", "reply_reviewers": "2;1;2;0;1", "reply_authors": "4;2;2;1;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 71.8, 26.843248685656512 ], "wc_strengths_avg": [ 67.2, 32.362941769870055 ], "wc_weaknesses_avg": [ 179.2, 114.83797281387372 ], "wc_questions_avg": [ 113.8, 121.01966782304436 ], "wc_limitations_avg": [ 11.0, 4.898979485566356 ], "wc_review_avg": [ 443.0, 119.88661309754313 ], "wc_reply_reviewers_avg": [ 185.0, 165.64298958905565 ], "wc_reply_authors_avg": [ 280.8, 351.34621102268915 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.43852900965351466, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7574573881595072574&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "utexas.edu;utexas.edu;utexas.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CAPP-130: A Corpus of Chinese Application Privacy Policy Summarization and Interpretation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73605", "id": "OyTIV57Prb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/92225ec7e87b97a9e007ca6ab7944b14-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=OyTIV57Prb", "openreview": "https://openreview.net/forum?id=OyTIV57Prb", "poster": "/media/PosterPDFs/NeurIPS%202023/73605.png?t=1699519324.753303", "slides": "https://nips.cc/virtual/2023/poster/73605", "video": "https://nips.cc/virtual/2023/poster/73605", "author_site": "pengyun zhu, Long Wen, Jinfei Liu, Feng Xue, Jian Lou, Zhibo Wang, Kui Ren", "tldr": "", "abstract": "A privacy policy serves as an online internet protocol crafted by service providers, which details how service providers collect, process, store, manage, and use personal information when users engage with applications. However, these privacy policies are often filled with technobabble and legalese, making them \"incomprehensible''. As a result, users often agree to all terms unknowingly, even some terms may conflict with the law, thereby posing a considerable risk to personal privacy information. One potential solution to alleviate this challenge is to automatically summarize privacy policies using NLP techniques. However, existing techniques primarily focus on extracting key sentences, resulting in comparatively shorter agreements, but failing to address the poor readability caused by the \"incomprehensible'' of technobabble and legalese. Moreover, research on Chinese application privacy policy summarization is currently almost nonexistent, and there is a lack of a high-quality corpus suitable for addressing readability issues. To tackle these challenges, we introduce a fine-grained CAPP-130 corpus and a TCSI-pp framework. CAPP-130 contains 130 Chinese privacy policies from popular applications that have been carefully annotated and interpreted by legal experts, resulting in 52,489 annotations and 20,555 rewritten sentences. TCSI-pp first extracts sentences related to the topic specified by users and then uses a generative model to rewrite the sentences into comprehensible summarization. Built upon TSCI-pp, we construct a summarization tool TSCI-pp-zh by selecting RoBERTa from six classification models for sentence extraction and selecting mT5 from five generative models for sentence rewriting. Experimental results show that TCSI-pp-zh outperforms GPT-4 and other baselines in Chinese application privacy policy summarization, demonstrating exceptional readability and reliability. Our data, annotation guidelines, benchmark models, and source code are publicly available at https://github.com/EnlightenedAI/CAPP-130.", "keywords": "Privacy Policy;CAPP-130 Corpus;TCSI-pp framework;Summarization;Interpretation", "primary_area": "", "supplementary_material": "/attachment/cb799bd595c82bc40c4afdee1e41be56a0e76d64.pdf", "author": "Pengyun Zhu;Long Wen;Jinfei Liu;Feng Xue;Jian Lou;Zhibo Wang;Kui Ren", "authorids": "~Pengyun_Zhu2;~Long_Wen2;~Jinfei_Liu1;~Feng_Xue4;~Jian_Lou2;~Zhibo_Wang1;~Kui_Ren4", "gender": "M;M;M;;M;M;M", "homepage": ";https://person.zju.edu.cn/jinfeiliu#947644;https://scholar.google.com/citations?user=QZyNoQcAAAAJ;https://sites.google.com/view/jianlou;https://person.zju.edu.cn/en/zhibowang;;", "dblp": ";89/9935;;05/4625-1;31/5772-1.html;20/6179-1.html;346/2216", "google_scholar": ";;;;0ox7zDkAAAAJ;https://scholar.google.com/citations?view_op=list_works;N-QtEdEAAAAJ", "orcid": "0009-0003-4966-6516;;;0000-0002-4110-2068;0000-0002-5804-3279;0000-0003-3441-6277;0000-0002-2637-2908", "linkedin": ";;;;;;", "or_profile": "~Long_Wen2;~Jinfei_Liu1;~Feng_Xue4;~Jian_Lou2;~Zhibo_Wang1;~Kui_Ren4;~Pengyun_Z1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;www.hoiying.net;Zhejiang University;Zhejiang University;Jiaxing Reserch Institute, Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;hoiying.net;zju.edu.cn;zju.edu.cn;jri.zju.edu.cn", "position": "MS student;Full Professor;Researcher;Researcher;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nzhu2023capp,\ntitle={{CAPP}-130: A Corpus of Chinese Application Privacy Policy Summarization and Interpretation},\nauthor={Pengyun Zhu and Long Wen and Jinfei Liu and Feng Xue and Jian Lou and Zhibo Wang and Kui Ren},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=OyTIV57Prb}\n}", "github": "", "project": "", "reviewers": "b2mX;X4HM;wFwd;qTDr;nRQo", "pdf_size": 6372166, "rating": "3;6;6;6;8", "confidence": "5;4;4;5;4", "wc_summary_and_contributions": "84;74;25;152;78", "wc_strengths": "31;45;7;169;131", "wc_improvement": "386;41;25;193;194", "wc_limitations": "54;78;9;1;1", "wc_correctness": "20;12;1;1;18", "wc_clarity": "73;19;6;1;7", "wc_relation_to_prior_work": "6;20;1;1;70", "wc_documentation": "20;44;4;1;5", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "675;334;79;520;505", "wc_reply_reviewers": "0;0;0;0;64", "wc_reply_authors": "1172;806;233;886;650", "reply_reviewers": "0;0;0;0;1", "reply_authors": "3;2;1;2;1", "rating_avg": [ 5.8, 1.6 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "wc_summary_and_contributions_avg": [ 82.6, 40.57388322554301 ], "wc_strengths_avg": [ 76.6, 62.32046212922365 ], "wc_improvement_avg": [ 167.8, 130.69261647086265 ], "wc_limitations_avg": [ 28.6, 31.601265797432863 ], "wc_correctness_avg": [ 10.4, 8.114185110040317 ], "wc_clarity_avg": [ 21.2, 26.56614386771253 ], "wc_relation_to_prior_work_avg": [ 19.6, 26.14268540146555 ], "wc_documentation_avg": [ 14.8, 16.01749044014074 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 422.6, 202.9252078968998 ], "wc_reply_reviewers_avg": [ 12.8, 25.6 ], "wc_reply_authors_avg": [ 749.4, 308.8699402661256 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.6634034720037773, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10633697050923594743&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;hoiying.net;zju.edu.cn;zju.edu.cn;jri.zju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0;0", "aff_unique_norm": "Zhejiang University;Hoiying Limited", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;http://www.hoiying.net", "aff_unique_abbr": "ZJU;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Jiaxing", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "VCC: Scaling Transformers to 128K Tokens or More by Prioritizing Important Tokens", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71787", "id": "Ozc8XVzwd4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4054556fcaa934b0bf76da52cf4f92cb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ozc8XVzwd4", "openreview": "https://openreview.net/forum?id=Ozc8XVzwd4", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71787", "video": "https://nips.cc/virtual/2023/poster/71787", "author_site": "Zhanpeng Zeng, Cole Hawkins, Mingyi Hong, Aston Zhang, Nikolaos Pappas, Vikas Singh, Shuai Zheng", "tldr": "", "abstract": "Transformers are central in modern natural language processing and computer vision applications. Despite recent works devoted to reducing the quadratic cost of such models with respect to sequence length, dealing with ultra long sequences (e.g., $>$16K tokens) remains challenging. Applications such as answering questions based on a book or summarizing a scientific article are inefficient or infeasible. Here, we propose to significantly improve the efficiency of Transformers for ultra long sequences, by compressing the sequence into a much smaller representation at each layer. Specifically, by exploiting the fact that in many tasks, only a small subset of special tokens, which we call VIP-tokens, are most relevant to the final prediction, we propose a VIP-token centric compression (VCC) scheme which selectively compresses the sequence based on their impact on approximating the representation of the VIP-tokens. Compared with competitive baselines, our algorithm is not only efficient (achieving more than $3\\times$ compute efficiency gain compared to baselines on 4K and 16K lengths), but also offers competitive/better performance on a large number of tasks. Further, we show that our algorithm scales to 128K tokens (or more) while consistently offering accuracy improvement. Code is available at https://github.com/mlpen/VCC.", "keywords": "efficient;transformer;roberta;T5;language modeling;question answering;summarization", "primary_area": "", "supplementary_material": "", "author": "Zhanpeng Zeng;Cole Hawkins;Mingyi Hong;Aston Zhang;Nikolaos Pappas;Vikas Singh;Shuai Zheng", "authorids": "~Zhanpeng_Zeng1;~Cole_Hawkins1;~Mingyi_Hong1;~Aston_Zhang2;~Nikolaos_Pappas1;~Vikas_Singh1;~Shuai_Zheng1", "gender": "M;;M;;M;M;", "homepage": ";;http://people.ece.umn.edu/~mhong/mingyi.html;;http://nik0spapp.github.io/;http://vsingh-www.cs.wisc.edu/;http://www.cse.ust.hk/~szhengac/", "dblp": "284/9150;;57/8053;;36/8968-2.html;;13/8659-4", "google_scholar": "P9ctuRUAAAAJ;;qRnP-p0AAAAJ;;https://scholar.google.ch/citations?user=daiFj_cAAAAJ;d32BmwcAAAAJ;82FZpFYAAAAJ", "orcid": ";;;;0000-0002-2004-8111;;", "linkedin": ";;;;nik0spapp/;;", "or_profile": "~Zhanpeng_Zeng1;~Cole_Hawkins1;~Mingyi_Hong1;~Aston_Zhang2;~Nikolaos_Pappas1;~Vikas_Singh1;~Shuai_Zheng1", "aff": "University of Wisconsin, Madison;;University of Minnesota, Minneapolis;;AWS AI Labs;University of Wisconsin, Madison;Amazon Web Services", "aff_domain": "wisc.edu;;umn.edu;;amazon.com;wisc.edu;amazon.com", "position": "PhD student;;Associate Professor;;Researcher;Professor;Senior Applied Scientist", "bibtex": "@inproceedings{\nzeng2023vcc,\ntitle={{VCC}: Scaling Transformers to 128K Tokens or More by Prioritizing Important Tokens},\nauthor={Zhanpeng Zeng and Cole Hawkins and Mingyi Hong and Aston Zhang and Nikolaos Pappas and Vikas Singh and Shuai Zheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ozc8XVzwd4}\n}", "github": "", "project": "", "reviewers": "JBnL;xiLt;z4Ur;2SnC;FRn4", "pdf_size": 1633821, "rating": "5;6;6;6;6", "confidence": "5;4;4;4;4", "soundness": "2;3;3;3;3", "novelty": "3;3;3;2;3", "presentation": "3;4;4;3;3", "wc_summary": "56;67;51;59;161", "wc_strengths": "62;44;60;66;57", "wc_weaknesses": "163;247;142;91;141", "wc_questions": "123;43;2;39;77", "wc_limitations": "47;28;2;19;9", "wc_review": "451;429;257;274;445", "wc_reply_reviewers": "46;14;17;0;0", "wc_reply_authors": "68;12;45;39;39", "reply_reviewers": "1;1;1;0;0", "reply_authors": "3;2;3;2;2", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 78.8, 41.42656152759965 ], "wc_strengths_avg": [ 57.8, 7.493997598078077 ], "wc_weaknesses_avg": [ 156.8, 50.936823615141144 ], "wc_questions_avg": [ 56.8, 40.7450610503899 ], "wc_limitations_avg": [ 21.0, 15.709869509324385 ], "wc_review_avg": [ 371.2, 86.76957992292 ], "wc_reply_reviewers_avg": [ 15.4, 16.823792675850473 ], "wc_reply_authors_avg": [ 40.6, 17.85049018934774 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13804543679326838512&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "wisc.edu;;umn.edu;;amazon.com;wisc.edu;amazon.com", "author_num": 7, "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "University of Wisconsin;University of Minnesota;Amazon", "aff_unique_dep": ";;AWS AI Labs", "aff_unique_url": "https://www.wisc.edu;https://www.minnesota.edu;https://aws.amazon.com", "aff_unique_abbr": "UW;UMN;AWS", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Madison;Minneapolis;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "STARSS23: An Audio-Visual Dataset of Spatial Recordings of Real Scenes with Spatiotemporal Annotations of Sound Events", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73604", "id": "OzcPJz7rgg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e6c9671ed3b3106b71cafda3ba225c1a-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=OzcPJz7rgg", "openreview": "https://openreview.net/forum?id=OzcPJz7rgg", "poster": "/media/PosterPDFs/NeurIPS%202023/73604.png?t=1701396057.608898", "slides": "https://nips.cc/virtual/2023/poster/73604", "video": "https://nips.cc/virtual/2023/poster/73604", "author_site": "Kazuki Shimada, Archontis Politis, Parthasaarathy Sudarsanam, Daniel A. Krause, Kengo Uchida, Sharath Adavanne, Aapo Hakala, Yuichiro Koyama, Naoya Takahashi, Shusuke Takahashi, Tuomas Virtanen, Yuki Mitsufuji", "tldr": "", "abstract": "While direction of arrival (DOA) of sound events is generally estimated from multichannel audio data recorded in a microphone array, sound events usually derive from visually perceptible source objects, e.g., sounds of footsteps come from the feet of a walker. This paper proposes an audio-visual sound event localization and detection (SELD) task, which uses multichannel audio and video information to estimate the temporal activation and DOA of target sound events. Audio-visual SELD systems can detect and localize sound events using signals from a microphone array and audio-visual correspondence. We also introduce an audio-visual dataset, Sony-TAu Realistic Spatial Soundscapes 2023 (STARSS23), which consists of multichannel audio data recorded with a microphone array, video data, and spatiotemporal annotation of sound events. Sound scenes in STARSS23 are recorded with instructions, which guide recording participants to ensure adequate activity and occurrences of sound events. STARSS23 also serves human-annotated temporal activation labels and human-confirmed DOA labels, which are based on tracking results of a motion capture system. Our benchmark results demonstrate the benefits of using visual object positions in audio-visual SELD tasks. The data is available at https://zenodo.org/record/7880637.", "keywords": "audio-visual dataset;sound event localization and detection;audio-visual sound source localization;direction of arrival estimation", "primary_area": "", "supplementary_material": "", "author": "Kazuki Shimada;Archontis Politis;Parthasaarathy Sudarsanam;Daniel Aleksander Krause;Kengo Uchida;Sharath Adavanne;Aapo Hakala;Yuichiro Koyama;Naoya Takahashi;Shusuke Takahashi;Tuomas Virtanen;Yuki Mitsufuji", "authorids": "~Kazuki_Shimada1;~Archontis_Politis1;~Parthasaarathy_Sudarsanam1;~Daniel_Aleksander_Krause1;~Kengo_Uchida1;~Sharath_Adavanne1;~Aapo_Hakala1;~Yuichiro_Koyama1;~Naoya_Takahashi1;~Shusuke_Takahashi1;~Tuomas_Virtanen2;~Yuki_Mitsufuji1", "gender": "M;M;M;M;;M;M;;M;M;;M", "homepage": ";;;;;https://www.aane.in/;https://www.linkedin.com/in/aapo-hakala-261046117/;;;;https://homepages.tuni.fi/tuomas.virtanen/;https://www.yukimitsufuji.com/", "dblp": "212/6224;;;;;198/1324.html;;;19/8442;;;136/5043", "google_scholar": "https://scholar.google.co.jp/citations?user=-t9IslAAAAAJ;DuCqB3sAAAAJ;yxZ1qAIAAAAJ;pSLng-8AAAAJ;https://scholar.google.co.jp/citations?user=zAXF4EYAAAAJ;xCEvnG8AAAAJ;;;https://scholar.google.co.jp/citations?user=JbtYJMoAAAAJ;https://scholar.google.co.jp/citations?user=_mhxayYAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-5389-2346;0000-0002-0595-2356;;;;0000-0002-5001-6911;;;;;;0000-0002-6806-6140", "linkedin": "kazuki-shimada-9a02a51a3/;;;daniel~krause/;;sharathadavanne/;;yuichiro-koyama/;naoyatakahashi/;;;mittu1204", "or_profile": "~Kazuki_Shimada1;~Archontis_Politis1;~Parthasaarathy_Sudarsanam1;~Daniel_Aleksander_Krause1;~Kengo_Uchida1;~Sharath_Adavanne1;~Aapo_Hakala1;~Yuichiro_Koyama1;~Naoya_Takahashi1;~Shusuke_Takahashi1;~Tuomas_Virtanen2;~Yuki_Mitsufuji1", "aff": "Sony AI;Tampere University;Tampere University;Tampere University;Sony Group Corporation;;Tampere University;Sony Group Corporation ;Sony Group Corporation;Sony Group Corporation;Tampere University;Tokyo Institute of Technology, Tokyo Institute of Technology", "aff_domain": "sony.com;tuni.fi;tuni.fi;tuni.fi;sony.com;;tuni.fi;sony.com;sony.com;sony.com;tuni.fi;titech.ac.jp", "position": "Researcher;Assistant Professor;PhD student;PhD student;Research Engineer;;MS student;Researcher;Researcher;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nshimada2023starss,\ntitle={{STARSS}23: An Audio-Visual Dataset of Spatial Recordings of Real Scenes with Spatiotemporal Annotations of Sound Events},\nauthor={Kazuki Shimada and Archontis Politis and Parthasaarathy Sudarsanam and Daniel Aleksander Krause and Kengo Uchida and Sharath Adavanne and Aapo Hakala and Yuichiro Koyama and Naoya Takahashi and Shusuke Takahashi and Tuomas Virtanen and Yuki Mitsufuji},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=OzcPJz7rgg}\n}", "github": "", "project": "", "reviewers": "pjsy;zAcc;7Cu6;CWXv", "pdf_size": 2179701, "rating": "5;5;6;7", "confidence": "5;4;3;2", "wc_summary_and_contributions": "80;117;85;47", "wc_strengths": "17;96;72;14", "wc_improvement": "2;169;47;18", "wc_limitations": "177;1;63;15", "wc_correctness": "9;8;65;7", "wc_clarity": "3;7;37;8", "wc_relation_to_prior_work": "2;9;29;4", "wc_documentation": "2;7;34;15", "wc_additional_feedback": "1;1;1;1", "wc_review": "293;415;433;129", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "903;896;1137;201", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "wc_summary_and_contributions_avg": [ 82.25, 24.81305100143874 ], "wc_strengths_avg": [ 49.75, 35.301380992816696 ], "wc_improvement_avg": [ 59.0, 65.52480446365331 ], "wc_limitations_avg": [ 64.0, 69.17369442208505 ], "wc_correctness_avg": [ 22.25, 24.691850882426778 ], "wc_clarity_avg": [ 13.75, 13.5531361684298 ], "wc_relation_to_prior_work_avg": [ 11.0, 10.700467279516348 ], "wc_documentation_avg": [ 14.5, 12.175795661885921 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 317.5, 121.42796218334556 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 784.25, 350.4292902997693 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.9438798074485388, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2283371755090142533&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "sony.com;tuni.fi;tuni.fi;tuni.fi;sony.com;;tuni.fi;sony.com;sony.com;sony.com;tuni.fi;titech.ac.jp", "author_num": 12, "aff_unique_index": "0;1;1;1;2;1;2;2;2;1;3", "aff_unique_norm": "Sony;Tampere University;Sony Group Corporation;Tokyo Institute of Technology", "aff_unique_dep": "Sony AI;;;", "aff_unique_url": "https://www.sony.com;https://www.tuni.fi;https://www.sony.com;https://www.titech.ac.jp", "aff_unique_abbr": "Sony AI;Tuni;Sony;Titech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Tokyo", "aff_country_unique_index": "0;1;1;1;0;1;0;0;0;1;0", "aff_country_unique": "Japan;Finland" }, { "title": "Achieving $\\mathcal{O}(\\epsilon^{-1.5})$ Complexity in Hessian/Jacobian-free Stochastic Bilevel Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71786", "id": "OzjBohmLvE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7c3a8d20ceadb7c519e9ac1bb77a15ff-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OzjBohmLvE", "openreview": "https://openreview.net/forum?id=OzjBohmLvE", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71786", "video": "https://nips.cc/virtual/2023/poster/71786", "author_site": "Yifan Yang, Peiyao Xiao, Kaiyi Ji", "tldr": "", "abstract": "In this paper, we revisit the bilevel optimization problem, in which the upper-level objective function is generally nonconvex and the lower-level objective function is strongly convex. Although this type of problem has been studied extensively, it still remains an open question how to achieve an $\\mathcal{O}(\\epsilon^{-1.5})$ sample complexity in Hessian/Jacobian-free stochastic bilevel optimization without any second-order derivative computation. To fill this gap, we propose a novel Hessian/Jacobian-free bilevel optimizer named FdeHBO, which features a simple fully single-loop structure, a projection-aided finite-difference Hessian/Jacobian-vector approximation, and momentum-based updates. Theoretically, we show that FdeHBO requires $\\mathcal{O}(\\epsilon^{-1.5})$ iterations (each using $\\mathcal{O}(1)$ samples and only first-order gradient information) to find an $\\epsilon$-accurate stationary point. As far as we know, this is the first Hessian/Jacobian-free method with an $\\mathcal{O}(\\epsilon^{-1.5})$ sample complexity for nonconvex-strongly-convex stochastic bilevel optimization.", "keywords": "Stochastic bilevel optimization;Hessian-free algorithms;near-optimal complexity", "primary_area": "", "supplementary_material": "/attachment/3197d9c0ea63df3162f1afb879deee12ee74ae53.zip", "author": "Yifan Yang;Peiyao Xiao;Kaiyi Ji", "authorids": "~Yifan_Yang13;~Peiyao_Xiao1;~Kaiyi_Ji1", "gender": ";M;M", "homepage": ";https://xiaopeiyao.github.io/index.html;https://cse.buffalo.edu/~kaiyiji/", "dblp": ";;205/3164", "google_scholar": ";_gf0LboAAAAJ;E0A3lSIAAAAJ", "orcid": ";;", "linkedin": ";xiao-peiyao-915430266;", "or_profile": "~Yifan_Yang13;~Peiyao_Xiao1;~Kaiyi_Ji1", "aff": ";State University of New York at Buffalo;State University of New York at Buffalo", "aff_domain": ";buffalo.edu;buffalo.edu", "position": ";PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyang2023achieving,\ntitle={Achieving \\${\\textbackslash}mathcal\\{O\\}({\\textbackslash}epsilon{\\textasciicircum}\\{-1.5\\})\\$ Complexity in Hessian/Jacobian-free Stochastic Bilevel Optimization},\nauthor={Yifan Yang and Peiyao Xiao and Kaiyi Ji},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OzjBohmLvE}\n}", "github": "", "project": "", "reviewers": "GD5q;ySP3;Xzpd;HtEu", "pdf_size": 1674659, "rating": "5;5;6;7", "confidence": "4;4;2;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "31;56;77;122", "wc_strengths": "17;30;53;129", "wc_weaknesses": "88;98;52;136", "wc_questions": "103;4;32;13", "wc_limitations": "1;34;14;1", "wc_review": "240;222;228;401", "wc_reply_reviewers": "526;37;7;5", "wc_reply_authors": "619;69;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "5;2;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.5, 33.39535895899309 ], "wc_strengths_avg": [ 57.25, 43.384184906484066 ], "wc_weaknesses_avg": [ 93.5, 29.912372022292047 ], "wc_questions_avg": [ 38.0, 38.86515148561755 ], "wc_limitations_avg": [ 12.5, 13.5 ], "wc_review_avg": [ 272.75, 74.32824160438615 ], "wc_reply_reviewers_avg": [ 143.75, 221.05584701608777 ], "wc_reply_authors_avg": [ 172.0, 259.6083588792934 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6363636363636364, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": ";buffalo.edu;buffalo.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "State University of New York at Buffalo", "aff_unique_dep": "", "aff_unique_url": "https://www.buffalo.edu", "aff_unique_abbr": "SUNY Buffalo", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Buffalo", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Rethinking the Backward Propagation for Adversarial Transferability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71785", "id": "OzpTd2EsH1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/05fe0c633ae41756540dba2a99a36306-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=OzpTd2EsH1", "openreview": "https://openreview.net/forum?id=OzpTd2EsH1", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71785", "video": "https://nips.cc/virtual/2023/poster/71785", "author_site": "Wang Xiaosen, Kangheng Tong, Kun He", "tldr": "", "abstract": "Transfer-based attacks generate adversarial examples on the surrogate model, which can mislead other black-box models without access, making it promising to attack real-world applications. Recently, several works have been proposed to boost adversarial transferability, in which the surrogate model is usually overlooked. In this work, we identify that non-linear layers (e.g., ReLU, max-pooling, etc.) truncate the gradient during backward propagation, making the gradient w.r.t. input image imprecise to the loss function. We hypothesize and empirically validate that such truncation undermines the transferability of adversarial examples. Based on these findings, we propose a novel method called Backward Propagation Attack (BPA) to increase the relevance between the gradient w.r.t. input image and loss function so as to generate adversarial examples with higher transferability. Specifically, BPA adopts a non-monotonic function as the derivative of ReLU and incorporates softmax with temperature to smooth the derivative of max-pooling, thereby mitigating the information loss during the backward propagation of gradients. Empirical results on the ImageNet dataset demonstrate that not only does our method substantially boost the adversarial transferability, but it is also general to existing transfer-based attacks. Code is available at https://github.com/Trustworthy-AI-Group/RPA.", "keywords": "Adversarial examples;Convolutional neural networks;Adversarial transferability;Backward propagation", "primary_area": "", "supplementary_material": "", "author": "Xiaosen Wang;Kangheng Tong;Kun He", "authorids": "~Xiaosen_Wang1;~Kangheng_Tong1;~Kun_He1", "gender": "M;M;F", "homepage": "https://xiaosen-wang.github.io/;;http://faculty.hust.edu.cn/hekun/zh_CN/more/1411001/jsjjgd/index.htm", "dblp": "241/6284;;59/1028-1", "google_scholar": "sVeDOcsAAAAJ;GXdMG4EAAAAJ;YTQnGJsAAAAJ", "orcid": ";;0000-0001-7627-4604", "linkedin": ";;", "or_profile": "~Xiaosen_Wang1;~Kangheng_Tong1;~Kun_He1", "aff": "Huawei Technologies Ltd.;Huazhong University of Science and Technology;Huazhong University of Sceince and Technology", "aff_domain": "huawei.com;hust.edu.cn;hust.edu.cn", "position": "Researcher;MS student;Full Professor", "bibtex": "@inproceedings{\nwang2023rethinking,\ntitle={Rethinking the Backward Propagation for Adversarial Transferability},\nauthor={Xiaosen Wang and Kangheng Tong and Kun He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=OzpTd2EsH1}\n}", "github": "", "project": "", "reviewers": "5iKz;YaZN;g444;gcCC;NESJ", "pdf_size": 509291, "rating": "5;6;6;7;7", "confidence": "4;4;3;5;3", "soundness": "3;2;2;4;3", "novelty": "2;2;2;4;3", "presentation": "3;3;3;4;3", "wc_summary": "148;97;59;65;46", "wc_strengths": "115;109;50;99;31", "wc_weaknesses": "134;116;153;100;103", "wc_questions": "385;145;19;7;2", "wc_limitations": "24;1;6;1;2", "wc_review": "806;468;287;272;184", "wc_reply_reviewers": "20;19;19;0;0", "wc_reply_authors": "0;0;54;0;0", "reply_reviewers": "1;1;1;0;0", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 83.0, 36.578682316343766 ], "wc_strengths_avg": [ 80.8, 33.83725757209056 ], "wc_weaknesses_avg": [ 121.2, 19.913814300630605 ], "wc_questions_avg": [ 111.6, 146.554563217936 ], "wc_limitations_avg": [ 6.8, 8.795453370918407 ], "wc_review_avg": [ 403.4, 221.45482609326896 ], "wc_reply_reviewers_avg": [ 11.6, 9.478396488858229 ], "wc_reply_authors_avg": [ 10.8, 21.6 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.07142857142857145, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17150583087164616751&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "huawei.com;hust.edu.cn;hust.edu.cn", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Huawei;Huazhong University of Science and Technology", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;http://www.hust.edu.cn", "aff_unique_abbr": "Huawei;HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Differentially Private Image Classification by Learning Priors from Random Processes", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71784", "id": "P0Avuii9iI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7058bc192a37f5e5a57398887b05f6f6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=P0Avuii9iI", "openreview": "https://openreview.net/forum?id=P0Avuii9iI", "poster": "/media/PosterPDFs/NeurIPS%202023/71784.png?t=1701041453.6476429", "slides": "https://nips.cc/virtual/2023/poster/71784", "video": "https://nips.cc/virtual/2023/poster/71784", "author_site": "Xinyu Tang, Ashwinee Panda, Vikash Sehwag, Prateek Mittal", "tldr": "", "abstract": "In privacy-preserving machine learning, differentially private stochastic gradient descent (DP-SGD) performs worse than SGD due to per-sample gradient clipping and noise addition.\nA recent focus in private learning research is improving the performance of DP-SGD on private data by incorporating priors that are learned on real-world public data.\nIn this work, we explore how we can improve the privacy-utility tradeoff of DP-SGD by learning priors from images generated by random processes and transferring these priors to private data. \nWe propose DP-RandP, a three-phase approach. \nWe attain new state-of-the-art accuracy when training from scratch on CIFAR10, CIFAR100, MedMNIST and ImageNet for a range of privacy budgets $\\\\varepsilon \\\\in [1, 8]$. In particular, we improve the previous best reported accuracy on CIFAR10 from $60.6 \\\\%$ to $72.3 \\\\%$ for $\\\\varepsilon=1$.", "keywords": "Differential privacy;image classification;deep learning", "primary_area": "", "supplementary_material": "", "author": "Xinyu Tang;Ashwinee Panda;Vikash Sehwag;Prateek Mittal", "authorids": "~Xinyu_Tang1;~Ashwinee_Panda1;~Vikash_Sehwag1;~Prateek_Mittal1", "gender": ";M;M;", "homepage": ";https://kiddyboots216.github.io/;https://vsehwag.github.io/;http://www.princeton.edu/~pmittal/", "dblp": "65/5518;270/1582.html;187/5613;", "google_scholar": "uwcdL7gAAAAJ;FM7JCgQAAAAJ;JAkeEG8AAAAJ;https://scholar.google.com.tw/citations?user=xTKD8J4AAAAJ", "orcid": ";;;0000-0002-4057-0118", "linkedin": ";https://linkedin.com/in/ashwineepanda;;", "or_profile": "~Xinyu_Tang1;~Ashwinee_Panda1;~Vikash_Sehwag1;~Prateek_Mittal1", "aff": "Princeton University;Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu;princeton.edu", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ntang2023differentially,\ntitle={Differentially Private Image Classification by Learning Priors from Random Processes},\nauthor={Xinyu Tang and Ashwinee Panda and Vikash Sehwag and Prateek Mittal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=P0Avuii9iI}\n}", "github": "", "project": "", "reviewers": "mcxP;BUrr;dEgE;t5fK", "pdf_size": 1533329, "rating": "6;6;7;8", "confidence": "4;4;4;4", "soundness": "4;3;2;3", "novelty": "3;3;3;4", "presentation": "3;3;4;3", "wc_summary": "94;57;88;150", "wc_strengths": "32;89;75;60", "wc_weaknesses": "88;205;119;349", "wc_questions": "79;91;75;2", "wc_limitations": "1;2;4;2", "wc_review": "294;444;361;563", "wc_reply_reviewers": "78;144;150;13", "wc_reply_authors": "278;100;210;0", "reply_reviewers": "1;1;2;1", "reply_authors": "2;3;2;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 97.25, 33.53636086399358 ], "wc_strengths_avg": [ 64.0, 21.130546609115438 ], "wc_weaknesses_avg": [ 190.25, 101.18145828164367 ], "wc_questions_avg": [ 61.75, 34.99553542953729 ], "wc_limitations_avg": [ 2.25, 1.0897247358851685 ], "wc_review_avg": [ 415.5, 100.3755448304018 ], "wc_reply_reviewers_avg": [ 96.25, 55.75112106496155 ], "wc_reply_authors_avg": [ 147.0, 106.00471687618433 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12631959318250592666&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "princeton.edu;princeton.edu;princeton.edu;princeton.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Hierarchically Gated Recurrent Neural Network for Sequence Modeling", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71783", "id": "P1TCHxJwLB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/694be3548697e9cc8999d45e8d16fe1e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=P1TCHxJwLB", "openreview": "https://openreview.net/forum?id=P1TCHxJwLB", "poster": "/media/PosterPDFs/NeurIPS%202023/71783.png?t=1701938122.570914", "slides": "https://nips.cc/virtual/2023/poster/71783", "video": "https://nips.cc/virtual/2023/poster/71783", "author_site": "Zhen Qin, Songlin Yang, Yiran Zhong", "tldr": "", "abstract": "Transformers have surpassed RNNs in popularity due to their superior abilities in parallel training and long-term dependency modeling.\nRecently, there has been a renewed interest in using linear RNNs for efficient sequence modeling.\nThese linear RNNs often employ gating mechanisms in the output of the linear recurrence layer while ignoring the significance of using forget gates within the recurrence. In this paper, we propose a gated linear RNN model dubbed Hierarchically Gated Recurrent Neural Network (HGRN), which includes forget gates that are lower bounded by a learnable value. The lower bound increases monotonically when moving up layers. This allows the upper layers to model long-term dependencies and the lower layers to model more local, short-term dependencies. Experiments on language modeling, image classification, and long-range arena benchmarks showcase the efficiency and effectiveness of our proposed model. The source code is available at https://github.com/OpenNLPLab/HGRN.", "keywords": "RNN;Sequence Modeling;NLP", "primary_area": "", "supplementary_material": "/attachment/58f3f3c16b8907a09b3306617c6850e78de0e99e.pdf", "author": "Zhen Qin;Songlin Yang;Yiran Zhong", "authorids": "~Zhen_Qin6;~Songlin_Yang1;~Yiran_Zhong1", "gender": ";F;M", "homepage": "https://github.com/Doraemonzzz;https://sustcsonglin.github.io;", "dblp": ";;158/9624", "google_scholar": "https://scholar.google.com.sg/citations?user=IcBRtycAAAAJ;1chlis0AAAAJ;https://scholar.google.com.sg/citations?user=E9NVOBUAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zhen_Qin6;~Songlin_Yang1;~Yiran_Zhong1", "aff": "Sensetime;ShanghaiTech University;Shanghai AI Lab", "aff_domain": "sensetime.com;shanghaitech.edu.cn;pjlab.org.cn", "position": "Researcher;MS student;PI", "bibtex": "@inproceedings{\nqin2023hierarchically,\ntitle={Hierarchically Gated Recurrent Neural Network for Sequence Modeling},\nauthor={Zhen Qin and Songlin Yang and Yiran Zhong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=P1TCHxJwLB}\n}", "github": "", "project": "", "reviewers": "pKf6;QJYn;rGhK;TZWK", "pdf_size": 694779, "rating": "4;6;6;8", "confidence": "4;5;4;5", "soundness": "2;2;4;4", "novelty": "2;3;4;4", "presentation": "3;1;2;3", "wc_summary": "33;86;92;165", "wc_strengths": "54;115;36;115", "wc_weaknesses": "78;695;282;27", "wc_questions": "1;1197;321;54", "wc_limitations": "1;3;36;27", "wc_review": "167;2096;767;388", "wc_reply_reviewers": "42;1396;246;0", "wc_reply_authors": "314;987;454;0", "reply_reviewers": "1;5;3;0", "reply_authors": "4;6;4;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 94.0, 46.98403984333403 ], "wc_strengths_avg": [ 80.0, 35.57386681259152 ], "wc_weaknesses_avg": [ 270.5, 263.0023764151191 ], "wc_questions_avg": [ 393.25, 479.63130371150714 ], "wc_limitations_avg": [ 16.75, 15.105876340020794 ], "wc_review_avg": [ 854.5, 748.2073576222035 ], "wc_reply_reviewers_avg": [ 421.0, 570.5549929673738 ], "wc_reply_authors_avg": [ 438.75, 356.67728761444846 ], "reply_reviewers_avg": [ 2.25, 1.920286436967152 ], "reply_authors_avg": [ 3.75, 1.7853571071357126 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 93, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5022220585250974071&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "sensetime.com;shanghaitech.edu.cn;pjlab.org.cn", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "SenseTime;ShanghaiTech University;Shanghai AI Lab", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sensetime.com;https://www.shanghaitech.edu.cn;https://www.shanghaiailab.com", "aff_unique_abbr": "SenseTime;ShanghaiTech;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "EvoFed: Leveraging Evolutionary Strategies for Communication-Efficient Federated Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71782", "id": "P3Z59Okb5I", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c48fe446e651cd49fb58a6833e015103-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=P3Z59Okb5I", "openreview": "https://openreview.net/forum?id=P3Z59Okb5I", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71782", "video": "https://nips.cc/virtual/2023/poster/71782", "author_site": "Mohammad Mahdi Rahimi, Hasnain Irshad Bhatti, Younghyun Park, Humaira Kousar, Do-Yeon Kim, Jaekyun Moon", "tldr": "", "abstract": "Federated Learning (FL) is a decentralized machine learning paradigm that enables collaborative model training across dispersed nodes without having to force individual nodes to share data.\nHowever, its broad adoption is hindered by the high communication costs of transmitting a large number of model parameters. \nThis paper presents EvoFed, a novel approach that integrates Evolutionary Strategies (ES) with FL to address these challenges.\nEvoFed employs a concept of `fitness-based information sharing\u2019, deviating significantly from the conventional model-based FL. \nRather than exchanging the actual updated model parameters, each node transmits a distance-based similarity measure between the locally updated model and each member of the noise-perturbed model population. Each node, as well as the server, generates an identical population set of perturbed models in a completely synchronized fashion using the same random seeds. \nWith properly chosen noise variance and population size, perturbed models can be combined to closely reflect the actual model updated using the local dataset, allowing the transmitted similarity measures (or fitness values) to carry nearly the complete information about the model parameters.\nAs the population size is typically much smaller than the number of model parameters, the savings in communication load is large. The server aggregates these fitness values and is able to update the global model. This global fitness vector is then disseminated back to the nodes, each of which applies the same update to be synchronized to the global model. Our analysis shows that EvoFed converges, and our experimental results validate that at the cost of increased local processing loads, EvoFed achieves performance comparable to FedAvg while reducing overall communication requirements drastically in various practical settings.", "keywords": "evolutionary strategies;federated learning;gradient compression;distributed learning", "primary_area": "", "supplementary_material": "/attachment/0285776f32da74647013c0014775e95ce3ca1564.pdf", "author": "Mohammad Mahdi Rahimi;Hasnain Irshad Bhatti;Younghyun Park;Humaira Kousar;Do-Yeon Kim;Jaekyun Moon", "authorids": "~Mohammad_Mahdi_Rahimi1;~Hasnain_Irshad_Bhatti1;~Younghyun_Park1;~Humaira_Kousar1;~Do-Yeon_Kim1;~Jaekyun_Moon2", "gender": "M;M;M;F;;M", "homepage": "https://mahi97.github.io;https://hasnainirshad.github.io/;https://github.com/MoonLab-YH;;;http://comstolab.kaist.ac.kr/people.html", "dblp": "229/4393;326/1184;137/2568;;;78/2744", "google_scholar": "O-KLBfAAAAAJ;https://scholar.google.co.kr/citations?user=aP3vFRcAAAAJ;;m0X7B_IAAAAJ;;", "orcid": "0000-0002-6614-4512;;;;;", "linkedin": ";hasnainirshad/;;humaira-kousar-32b775136/;;", "or_profile": "~Mohammad_Mahdi_Rahimi1;~Hasnain_Irshad_Bhatti1;~Younghyun_Park1;~Humaira_Kousar1;~Do-Yeon_Kim1;~Jaekyun_Moon2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;;KAIST", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.edu;;kaist.edu", "position": "PhD student;PhD student;PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nrahimi2023evofed,\ntitle={EvoFed: Leveraging Evolutionary Strategies for Communication-Efficient Federated Learning},\nauthor={Mohammad Mahdi Rahimi and Hasnain Irshad Bhatti and Younghyun Park and Humaira Kousar and Do-Yeon Kim and Jaekyun Moon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=P3Z59Okb5I}\n}", "github": "", "project": "", "reviewers": "91Ta;rYCA;cJ4W;3tMK", "pdf_size": 1632507, "rating": "6;6;6;6", "confidence": "4;4;3;3", "soundness": "3;2;2;3", "novelty": "3;2;2;3", "presentation": "3;2;2;2", "wc_summary": "70;150;310;136", "wc_strengths": "136;51;70;37", "wc_weaknesses": "100;31;143;151", "wc_questions": "26;471;34;6", "wc_limitations": "7;7;5;21", "wc_review": "339;710;562;351", "wc_reply_reviewers": "0;34;11;23", "wc_reply_authors": "0;11;0;18", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 166.5, 88.1858832240172 ], "wc_strengths_avg": [ 73.5, 37.937448517263256 ], "wc_weaknesses_avg": [ 106.25, 47.57822506147114 ], "wc_questions_avg": [ 134.25, 194.689977913605 ], "wc_limitations_avg": [ 10.0, 6.4031242374328485 ], "wc_review_avg": [ 490.5, 154.68112360595265 ], "wc_reply_reviewers_avg": [ 17.0, 12.747548783981962 ], "wc_reply_authors_avg": [ 7.25, 7.660776723022281 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8070365814965612972&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 4, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.edu;;kaist.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Window-Based Distribution Shift Detection for Deep Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71781", "id": "P3n4wFJGs5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4791edcba96fbd82a8962b0f790b52c9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=P3n4wFJGs5", "openreview": "https://openreview.net/forum?id=P3n4wFJGs5", "poster": "/media/PosterPDFs/NeurIPS%202023/71781.png?t=1701538468.8277175", "slides": "https://nips.cc/virtual/2023/poster/71781", "video": "https://nips.cc/virtual/2023/poster/71781", "author_site": "Guy Bar-Shalom, Guy Bar Shalom, Yonatan Geifman, Ran El-Yaniv", "tldr": "", "abstract": "To deploy and operate deep neural models in production, the quality of their predictions, which might be contaminated benignly or manipulated maliciously by input distributional deviations, must be monitored and assessed. Specifically, we study the case of monitoring the healthy operation of a deep neural network (DNN) receiving a stream of data, with the aim of detecting input distributional deviations over which the quality of the network's predictions is potentially damaged. Using selective prediction principles, we propose a distribution deviation detection method for DNNs. The proposed method is derived from a tight coverage generalization bound computed over a sample of instances drawn from the true underlying distribution. Based on this bound, our detector continuously monitors the operation of the network over a test window and fires off an alarm whenever a deviation is detected. Our novel detection method performs on-par or better than the state-of-the-art, while consuming substantially lower computation time (five orders of magnitude reduction) and space complexity. Unlike previous methods, which require at least linear dependence on the size of the source distribution for each detection, rendering them inapplicable to ``Google-Scale'' datasets, our approach eliminates this dependence, making it suitable for real-world applications. Code is available at [https://github.com/BarSGuy/Window-Based-Distribution-Shift-Detection](https://github.com/BarSGuy/Window-Based-Distribution-Shift-Detection).", "keywords": "Distribution shift detection;Window-based detection", "primary_area": "", "supplementary_material": "/attachment/9ada405f1d989a69444fcf1de9a6ecdf8b61368d.zip", "author": "Guy Bar-Shalom;Yonatan Geifman;Ran El-Yaniv", "authorids": "~Guy_Bar-Shalom1;~Yonatan_Geifman1;~Ran_El-Yaniv1", "gender": "M;;M", "homepage": "https://barsguy.github.io/;;http://www.cs.technion.ac.il/~rani/", "dblp": "321/1651;200/9012;04/1896", "google_scholar": "9Zvzm5MAAAAJ;;https://scholar.google.com.tw/citations?user=D9eVSd8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Guy_Bar-Shalom1;~Yonatan_Geifman1;~Ran_El-Yaniv1", "aff": "Technion, Technion;;Deci", "aff_domain": "technion.ac.il;;deci.ai", "position": "PhD student;;Chief Scientist", "bibtex": "@inproceedings{\nbar-shalom2023windowbased,\ntitle={Window-Based Distribution Shift Detection for Deep Neural Networks},\nauthor={Guy Bar-Shalom and Yonatan Geifman and Ran El-Yaniv},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=P3n4wFJGs5}\n}", "github": "", "project": "", "reviewers": "BTJN;HyPc;M7ue;7FwS;MZpo;UhJJ;a2HS", "pdf_size": 1012599, "rating": "5;5;6;6;6;7;7", "confidence": "3;3;2;3;3;3;2", "soundness": "3;2;3;3;3;3;3", "novelty": "2;3;2;3;3;3;3", "presentation": "3;2;2;3;2;4;3", "wc_summary": "60;75;77;111;93;111;37", "wc_strengths": "81;104;40;73;139;60;41", "wc_weaknesses": "152;289;132;206;93;178;138", "wc_questions": "68;173;8;20;93;184;27", "wc_limitations": "11;3;18;9;66;70;4", "wc_review": "372;644;275;419;484;603;247", "wc_reply_reviewers": "14;844;52;0;113;199;144", "wc_reply_authors": "0;1286;0;0;0;0;0", "reply_reviewers": "1;3;1;0;1;1;1", "reply_authors": "1;3;1;1;1;1;1", "rating_avg": [ 6.0, 0.7559289460184544 ], "confidence_avg": [ 2.7142857142857144, 0.4517539514526256 ], "soundness_avg": [ 2.857142857142857, 0.34992710611188266 ], "novelty_avg": [ 2.7142857142857144, 0.4517539514526256 ], "presentation_avg": [ 2.7142857142857144, 0.6998542122237652 ], "wc_summary_avg": [ 80.57142857142857, 24.99061048163686 ], "wc_strengths_avg": [ 76.85714285714286, 32.86086935389686 ], "wc_weaknesses_avg": [ 169.71428571428572, 58.84778011522436 ], "wc_questions_avg": [ 81.85714285714286, 66.91877501105046 ], "wc_limitations_avg": [ 25.857142857142858, 27.063040539919125 ], "wc_review_avg": [ 434.85714285714283, 141.15586984548938 ], "wc_reply_reviewers_avg": [ 195.14285714285714, 273.01932352932124 ], "wc_reply_authors_avg": [ 183.71428571428572, 450.00625845988105 ], "reply_reviewers_avg": [ 1.1428571428571428, 0.8329931278350428 ], "reply_authors_avg": [ 1.2857142857142858, 0.6998542122237652 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.41833001326703784, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13486218862402292723&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "technion.ac.il;;deci.ai", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Technion - Israel Institute of Technology;Deci", "aff_unique_dep": ";", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.deci.ai", "aff_unique_abbr": "Technion;Deci", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Temporal Robustness against Data poisoning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71780", "id": "P5vzRpoOj2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/94bcb01789fccf15afe2764d8fe0f40e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=P5vzRpoOj2", "openreview": "https://openreview.net/forum?id=P5vzRpoOj2", "poster": "/media/PosterPDFs/NeurIPS%202023/71780.png?t=1702182983.0098763", "slides": "https://nips.cc/virtual/2023/poster/71780", "video": "https://nips.cc/virtual/2023/poster/71780", "author_site": "Wenxiao Wang, Soheil Feizi", "tldr": "", "abstract": "Data poisoning considers cases when an adversary manipulates the behavior of machine learning algorithms through malicious training data. Existing threat models of data poisoning center around a single metric, the number of poisoned samples. In consequence, if attackers can poison more samples than expected with affordable overhead, as in many practical scenarios, they may be able to render existing defenses ineffective in a short time. To address this issue, we leverage timestamps denoting the birth dates of data, which are often available but neglected in the past. Benefiting from these timestamps, we propose a temporal threat model of data poisoning with two novel metrics, earliness and duration, which respectively measure how long an attack started in advance and how long an attack lasted. Using these metrics, we define the notions of temporal robustness against data poisoning, providing a meaningful sense of protection even with unbounded amounts of poisoned samples when the attacks are temporally bounded. We present a benchmark with an evaluation protocol simulating continuous data collection and periodic deployments of updated models, thus enabling empirical evaluation of temporal robustness. Lastly, we develop and also empirically verify a baseline defense, namely temporal aggregation, offering provable temporal robustness and highlighting the potential of our temporal threat model for data poisoning.", "keywords": "Robustness;Data Poisoning;Security;Machine Learning;Backdoor;Adversarial", "primary_area": "", "supplementary_material": "/attachment/89837f380ee75286a1b661517bce087743eef851.zip", "author": "Wenxiao Wang;Soheil Feizi", "authorids": "~Wenxiao_Wang1;~Soheil_Feizi2", "gender": "M;M", "homepage": "https://wangwenxiao.github.io;https://www.cs.umd.edu/~sfeizi/", "dblp": "243/5853-2;57/2132", "google_scholar": "hn0u5VgAAAAJ;lptAmrMAAAAJ", "orcid": ";", "linkedin": "wenxiaowang/;", "or_profile": "~Wenxiao_Wang1;~Soheil_Feizi2", "aff": "Sony AI;University of Maryland, College Park", "aff_domain": "sony.com;umd.edu", "position": "Intern;Associate Professor", "bibtex": "@inproceedings{\nwang2023temporal,\ntitle={Temporal Robustness against Data poisoning},\nauthor={Wenxiao Wang and Soheil Feizi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=P5vzRpoOj2}\n}", "github": "", "project": "", "reviewers": "exCr;TaU1;NyPv;EzrD", "pdf_size": 1673616, "rating": "5;5;5;6", "confidence": "5;4;4;5", "soundness": "3;3;3;2", "novelty": "2;2;3;3", "presentation": "3;3;4;2", "wc_summary": "117;223;77;71", "wc_strengths": "20;61;87;38", "wc_weaknesses": "41;48;288;295", "wc_questions": "407;8;46;8", "wc_limitations": "1;400;9;4", "wc_review": "586;740;507;416", "wc_reply_reviewers": "683;40;0;317", "wc_reply_authors": "1006;0;48;679", "reply_reviewers": "3;2;0;2", "reply_authors": "4;1;2;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 122.0, 60.93439094632849 ], "wc_strengths_avg": [ 51.5, 25.124689052802225 ], "wc_weaknesses_avg": [ 168.0, 123.54958518748657 ], "wc_questions_avg": [ 117.25, 168.00502224636026 ], "wc_limitations_avg": [ 103.5, 171.2082065790072 ], "wc_review_avg": [ 562.25, 118.95456065237684 ], "wc_reply_reviewers_avg": [ 260.0, 273.0283868025448 ], "wc_reply_authors_avg": [ 433.25, 425.6050839686951 ], "reply_reviewers_avg": [ 1.75, 1.0897247358851685 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12337083443628703550&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "sony.com;umd.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Sony;University of Maryland", "aff_unique_dep": "Sony AI;", "aff_unique_url": "https://www.sony.com;https://www/umd.edu", "aff_unique_abbr": "Sony AI;UMD", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;1", "aff_country_unique": "Japan;United States" }, { "title": "Cause-Effect Inference in Location-Scale Noise Models: Maximum Likelihood vs. Independence Testing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71779", "id": "P9I2VQv1uC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/11715d433f6f8b9106baae0df023deb3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=P9I2VQv1uC", "openreview": "https://openreview.net/forum?id=P9I2VQv1uC", "poster": "/media/PosterPDFs/NeurIPS%202023/71779.png?t=1701378973.7817104", "slides": "https://nips.cc/virtual/2023/poster/71779", "video": "https://nips.cc/virtual/2023/poster/71779", "author_site": "Xiangyu Sun, Oliver Schulte", "tldr": "", "abstract": "A fundamental problem of causal discovery is cause-effect inference, to learn the correct causal direction between two random variables. Significant progress has been made through modelling the effect as a function of its cause and a noise term, which allows us to leverage assumptions about the generating function class. The recently introduced heteroscedastic location-scale noise functional models (LSNMs) combine expressive power with identifiability guarantees. LSNM model selection based on maximizing likelihood achieves state-of-the-art accuracy, when the noise distributions are correctly specified. However, through an extensive empirical evaluation, we demonstrate that the accuracy deteriorates sharply when the form of the noise distribution is misspecified by the user. Our analysis shows that the failure occurs mainly when the conditional variance in the anti-causal direction is smaller than that in the causal direction. As an alternative, we find that causal model selection through residual independence testing is much more robust to noise misspecification and misleading conditional variance.", "keywords": "Causal Discovery;Cause-Effect Inference;Location-Scale Noise Models", "primary_area": "", "supplementary_material": "", "author": "Xiangyu Sun;Oliver Schulte", "authorids": "~Xiangyu_Sun1;~Oliver_Schulte1", "gender": ";M", "homepage": ";http://www.cs.sfu.ca/~oschulte/", "dblp": ";s/OliverSchulte", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Xiangyu_Sun1;~Oliver_Schulte1", "aff": ";Simon Fraser University", "aff_domain": ";sfu.ca", "position": ";Full Professor", "bibtex": "@inproceedings{\nsun2023causeeffect,\ntitle={Cause-Effect Inference in Location-Scale Noise Models: Maximum Likelihood vs. Independence Testing},\nauthor={Xiangyu Sun and Oliver Schulte},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=P9I2VQv1uC}\n}", "github": "", "project": "", "reviewers": "bEFo;ttx5;GXXz;yfAc", "pdf_size": 1997968, "rating": "3;6;7;8", "confidence": "4;4;3;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "2;4;3;4", "wc_summary": "108;78;32;123", "wc_strengths": "103;35;52;61", "wc_weaknesses": "280;97;31;46", "wc_questions": "131;251;94;55", "wc_limitations": "50;39;48;1", "wc_review": "672;500;257;286", "wc_reply_reviewers": "273;366;16;14", "wc_reply_authors": "849;394;13;12", "reply_reviewers": "2;2;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 85.25, 34.751798514609284 ], "wc_strengths_avg": [ 62.75, 25.043711785596 ], "wc_weaknesses_avg": [ 113.5, 99.19299370419263 ], "wc_questions_avg": [ 132.75, 73.37020853180124 ], "wc_limitations_avg": [ 34.5, 19.78004044485248 ], "wc_review_avg": [ 428.75, 168.91029423927958 ], "wc_reply_reviewers_avg": [ 167.25, 155.76163680444552 ], "wc_reply_authors_avg": [ 317.0, 344.38132934292474 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3086066999241838, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11356540570852811183&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";sfu.ca", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Simon Fraser University", "aff_unique_dep": "", "aff_unique_url": "https://www.sfu.ca", "aff_unique_abbr": "SFU", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "Type-to-Track: Retrieve Any Object via Prompt-based Tracking", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71778", "id": "PARMyW6xX0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/098491b37deebbe6c007e69815729e09-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PARMyW6xX0", "openreview": "https://openreview.net/forum?id=PARMyW6xX0", "poster": "/media/PosterPDFs/NeurIPS%202023/71778.png?t=1701217962.7829685", "slides": "https://nips.cc/virtual/2023/poster/71778", "video": "https://nips.cc/virtual/2023/poster/71778", "author_site": "Pha Nguyen, Kha Gia Quach, Kris Kitani, Khoa Luu", "tldr": "", "abstract": "One of the recent trends in vision problems is to use natural language captions to describe the objects of interest. This approach can overcome some limitations of traditional methods that rely on bounding boxes or category annotations. This paper introduces a novel paradigm for Multiple Object Tracking called Type-to-Track, which allows users to track objects in videos by typing natural language descriptions. We present a new dataset for that Grounded Multiple Object Tracking task, called GroOT, that contains videos with various types of objects and their corresponding textual captions describing their appearance and action in detail. Additionally, we introduce two new evaluation protocols and formulate evaluation metrics specifically for this task. We develop a new efficient method that models a transformer-based eMbed-ENcoDE-extRact framework (MENDER) using the third-order tensor decomposition. The experiments in five scenarios show that our MENDER approach outperforms another two-stage design in terms of accuracy and efficiency, up to 14.7\\% accuracy and $4\\times$ speed faster.", "keywords": "Grounded Object Tracking;Multiple Object Tracking;Vision Language", "primary_area": "", "supplementary_material": "/attachment/bc536b9c565c8142b9cc28a391f5158432e2e040.pdf", "author": "Pha Nguyen;Kha Gia Quach;Kris M. Kitani;Khoa Luu", "authorids": "~Pha_Nguyen1;~Kha_Gia_Quach1;~Kris_M._Kitani1;~Khoa_Luu2", "gender": ";;M;M", "homepage": ";;http://www.cs.cmu.edu/~kkitani/;https://uark-cviu.github.io", "dblp": ";;42/163;43/8092", "google_scholar": ";;yv3sH74AAAAJ;JPAl8-gAAAAJ", "orcid": ";;0000-0002-9389-4060;0000-0003-2104-0901", "linkedin": ";;;khoa-luu-90900215/", "or_profile": "~Pha_Nguyen1;~Kha_Gia_Quach1;~Kris_M._Kitani1;~Khoa_Luu2", "aff": ";;Carnegie Mellon University;University of Arkansas, Fayetteville", "aff_domain": ";;cmu.edu;uark.edu", "position": ";;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2023typetotrack,\ntitle={Type-to-Track: Retrieve Any Object via Prompt-based Tracking},\nauthor={Pha Nguyen and Kha Gia Quach and Kris M. Kitani and Khoa Luu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PARMyW6xX0}\n}", "github": "", "project": "", "reviewers": "j89C;HDeT;RiBQ;9rR3;h9ub", "pdf_size": 7253864, "rating": "5;5;6;7;7", "confidence": "5;4;4;4;4", "soundness": "3;3;3;3;3", "novelty": "3;3;3;4;3", "presentation": "3;2;4;3;3", "wc_summary": "59;29;43;123;66", "wc_strengths": "44;42;366;104;87", "wc_weaknesses": "184;133;245;365;131", "wc_questions": "4;103;99;172;17", "wc_limitations": "10;1;10;8;11", "wc_review": "301;308;763;772;312", "wc_reply_reviewers": "81;53;75;373;239", "wc_reply_authors": "81;25;92;84;97", "reply_reviewers": "1;1;1;2;2", "reply_authors": "2;2;3;3;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 64.0, 32.17452408350433 ], "wc_strengths_avg": [ 128.6, 121.1207661798752 ], "wc_weaknesses_avg": [ 211.6, 87.27336363404358 ], "wc_questions_avg": [ 79.0, 61.796440026914176 ], "wc_limitations_avg": [ 8.0, 3.63318042491699 ], "wc_review_avg": [ 491.2, 225.64343553491648 ], "wc_reply_reviewers_avg": [ 164.2, 123.64206403971102 ], "wc_reply_authors_avg": [ 75.8, 26.026140705068048 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5590169943749476, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1989966085901478459&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";;cmu.edu;uark.edu", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Carnegie Mellon University;University of Arkansas", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.uark.edu", "aff_unique_abbr": "CMU;UARK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Fayetteville", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Effective Robustness against Natural Distribution Shifts for Models with Different Training Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71777", "id": "PAYXfIUKWY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e9000ecb86d45c442a1d38fae68dd8fb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PAYXfIUKWY", "openreview": "https://openreview.net/forum?id=PAYXfIUKWY", "poster": "/media/PosterPDFs/NeurIPS%202023/71777.png?t=1702153591.9513986", "slides": "https://nips.cc/virtual/2023/poster/71777", "video": "https://nips.cc/virtual/2023/poster/71777", "author_site": "Zhouxing Shi, Nicholas Carlini, Ananth Balashankar, Ludwig Schmidt, Cho-Jui Hsieh, Alex Beutel, Yao Qin", "tldr": "", "abstract": "``Effective robustness'' measures the extra out-of-distribution (OOD) robustness beyond what can be predicted from the in-distribution (ID) performance. Existing effective robustness evaluations typically use a single test set such as ImageNet to evaluate the ID accuracy. This becomes problematic when evaluating models trained on different data distributions, e.g., comparing models trained on ImageNet vs. zero-shot language-image pre-trained models trained on LAION. In this paper, we propose a new evaluation metric to evaluate and compare the effective robustness of models trained on different data. To do this, we control for the accuracy on multiple ID test sets that cover the training distributions for all the evaluated models. Our new evaluation metric provides a better estimate of effective robustness when there are models with different training data. It may also explain the surprising effective robustness gains of zero-shot CLIP-like models exhibited in prior works that used ImageNet as the only ID test set, while the gains diminish under our new evaluation. Additional artifacts including interactive visualizations are provided at https://shizhouxing.github.io/effective-robustness.", "keywords": "Effective robustness;natural distribution shifts;out-of-distribution robustness", "primary_area": "", "supplementary_material": "", "author": "Zhouxing Shi;Nicholas Carlini;Ananth Balashankar;Ludwig Schmidt;Cho-Jui Hsieh;Alex Beutel;Yao Qin", "authorids": "~Zhouxing_Shi1;~Nicholas_Carlini1;~Ananth_Balashankar1;~Ludwig_Schmidt1;~Cho-Jui_Hsieh1;~Alex_Beutel1;~Yao_Qin1", "gender": ";;M;M;M;;", "homepage": "https://shizhouxing.github.io;http://nicholas.carlini.com;https://ananthbalashankar.github.io/;http://people.csail.mit.edu/ludwigs/;http://web.cs.ucla.edu/~chohsieh/index.html;;https://yaoqin1.github.io", "dblp": "232/2169;145/1806;141/2020;141/2720;14/2770;;66/10420-1", "google_scholar": "YFIr4PwAAAAJ;;dr5VLwEAAAAJ;SWMKy70AAAAJ;Wy89g4IAAAAJ;;https://scholar.google.com/citations?view_op=list_works", "orcid": ";;;;;;", "linkedin": ";;;ludwig-schmidt-87ba3612/;;;", "or_profile": "~Zhouxing_Shi1;~Nicholas_Carlini1;~Ananth_Balashankar1;~Ludwig_Schmidt1;~Cho-Jui_Hsieh1;~Alex_Beutel1;~Yao_Qin1", "aff": "University of California, Los Angeles;Google;Google;Allen Institute for Artificial Intelligence;Amazon;;Google", "aff_domain": "ucla.edu;google.com;google.com;allenai.org;amazon.com;;google.com", "position": "PhD student;Researcher;Researcher;Researcher;visiting scholar;;Researcher", "bibtex": "@inproceedings{\nshi2023effective,\ntitle={Effective Robustness against Natural Distribution Shifts for Models with Different Training Data},\nauthor={Zhouxing Shi and Nicholas Carlini and Ananth Balashankar and Ludwig Schmidt and Cho-Jui Hsieh and Alex Beutel and Yao Qin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PAYXfIUKWY}\n}", "github": "", "project": "", "reviewers": "sN9Q;mbLQ;Kcyx;MjfW", "pdf_size": 2754954, "rating": "5;5;6;7", "confidence": "4;3;3;5", "soundness": "3;2;2;4", "novelty": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "79;41;192;80", "wc_strengths": "103;25;107;190", "wc_weaknesses": "201;344;308;540", "wc_questions": "65;2;290;469", "wc_limitations": "53;1;5;1", "wc_review": "501;413;902;1280", "wc_reply_reviewers": "147;565;1802;45", "wc_reply_authors": "168;467;1376;13", "reply_reviewers": "2;1;5;1", "reply_authors": "2;2;7;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 98.0, 56.50221234606659 ], "wc_strengths_avg": [ 106.25, 58.36683561749771 ], "wc_weaknesses_avg": [ 348.25, 122.56503375759335 ], "wc_questions_avg": [ 206.5, 185.55390052488792 ], "wc_limitations_avg": [ 15.0, 22.0 ], "wc_review_avg": [ 774.0, 345.4236529249264 ], "wc_reply_reviewers_avg": [ 639.75, 698.7386403370004 ], "wc_reply_authors_avg": [ 506.0, 528.1368193943687 ], "reply_reviewers_avg": [ 2.25, 1.6393596310755 ], "reply_authors_avg": [ 3.25, 2.165063509461097 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1506600567066871549&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 8, "email": "ucla.edu;google.com;google.com;allenai.org;amazon.com;;google.com", "author_num": 7, "aff_unique_index": "0;1;1;2;3;1", "aff_unique_norm": "University of California, Los Angeles;Google;Allen Institute for Artificial Intelligence;Amazon", "aff_unique_dep": ";Google;;Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://www.google.com;https://allenai.org;https://www.amazon.com", "aff_unique_abbr": "UCLA;Google;AI2;Amazon", "aff_campus_unique_index": "0;1;1;1", "aff_campus_unique": "Los Angeles;Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "ATMAN: Understanding Transformer Predictions Through Memory Efficient Attention Manipulation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71776", "id": "PBpEb86bj7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c83bc020a020cdeb966ed10804619664-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PBpEb86bj7", "openreview": "https://openreview.net/forum?id=PBpEb86bj7", "poster": "/media/PosterPDFs/NeurIPS%202023/71776.png?t=1699537238.3615713", "slides": "https://nips.cc/virtual/2023/poster/71776", "video": "https://nips.cc/virtual/2023/poster/71776", "author_site": "Bj\u00f6rn Deiseroth, Mayukh Deb, Samuel Weinbach, Manuel Brack, Patrick Schramowski, Kristian Kersting", "tldr": "", "abstract": "Generative transformer models have become increasingly complex, with large numbers of parameters and the ability to process multiple input modalities. Current methods for explaining their predictions are resource-intensive. Most crucially, they require prohibitively large amounts of additional memory, since they rely on backpropagation which allocates almost twice as much GPU memory as the forward pass. \nThis makes it difficult, if not impossible, to use explanations in production. \nWe present AtMan that provides explanations of generative transformer models at almost no extra cost. Specifically, AtMan is a modality-agnostic perturbation method that manipulates the attention mechanisms of transformers to produce relevance maps for the input with respect to the output prediction. Instead of using backpropagation, AtMan applies a parallelizable token-based search method relying on cosine similarity neighborhood in the embedding space. \nOur exhaustive experiments on text and image-text benchmarks demonstrate that AtMan outperforms current state-of-the-art gradient-based methods on several metrics while being computationally efficient. As such, AtMan is suitable for use in large model inference deployments.", "keywords": "explainability;attention manipulation;perturbation;large language model;multi-modality;generative decoder;efficiency;transformer", "primary_area": "", "supplementary_material": "/attachment/32aa1413f18d3d5a24da31bd0f2c95f8f168df6a.pdf", "author": "Bj\u00f6rn Deiseroth;Mayukh Deb;Samuel Weinbach;Manuel Brack;Patrick Schramowski;Kristian Kersting", "authorids": "~Bj\u00f6rn_Deiseroth1;~Mayukh_Deb2;~Samuel_Weinbach1;~Manuel_Brack1;~Patrick_Schramowski1;~Kristian_Kersting1", "gender": "M;M;M;M;M;M", "homepage": "https://mayukhdeb.github.io/;https://aleph-alpha.com;;https://ml-research.github.io/people/pschramowski/index.html;http://www.ml.informatik.tu-darmstadt.de/;", "dblp": ";278/8408;326/8265;217/1650;40/3793;28/10842", "google_scholar": ";;kJ9Abf8AAAAJ;GD481RkAAAAJ;QY-earAAAAAJ;https://scholar.google.de/citations?user=OjuwG6YAAAAJ", "orcid": "0000-0003-2826-2857;0000-0001-9481-5363;;0000-0003-1231-7120;0000-0002-2873-9152;", "linkedin": "mayukhdeb/;samuel-weinbach;;;;bj%C3%B6rn-deiseroth-03b4a7147/", "or_profile": "~Mayukh_Deb2;~Samuel_Weinbach1;~Manuel_Brack1;~Patrick_Schramowski1;~Kristian_Kersting1;~Bjoern_Deiseroth1", "aff": "Amrita Vishwa Vidyapeetham;Aleph Alpha GmbH;Adobe Systems;TU Darmstadt;TU Darmstadt;Technische Universit\u00e4t Darmstadt", "aff_domain": "amrita.edu;aleph-alpha.com;adobe.com;tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de", "position": "Undergrad student;Researcher;Intern;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\ndeiseroth2023atman,\ntitle={{ATMAN}: Understanding Transformer Predictions Through Memory Efficient Attention Manipulation},\nauthor={Bj{\\\"o}rn Deiseroth and Mayukh Deb and Samuel Weinbach and Manuel Brack and Patrick Schramowski and Kristian Kersting},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PBpEb86bj7}\n}", "github": "", "project": "", "reviewers": "HHf8;wnDz;FKKY;sfjw;YuQG", "pdf_size": 19089676, "rating": "3;5;6;6;7", "confidence": "4;4;3;4;2", "soundness": "2;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;4;3;2;4", "wc_summary": "47;48;76;62;69", "wc_strengths": "52;79;96;61;38", "wc_weaknesses": "251;310;133;105;1", "wc_questions": "79;210;5;15;1", "wc_limitations": "1;60;31;26;1", "wc_review": "430;707;341;269;110", "wc_reply_reviewers": "27;128;94;0;0", "wc_reply_authors": "0;187;150;0;0", "reply_reviewers": "1;1;1;0;0", "reply_authors": "1;2;2;1;1", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 60.4, 11.42978564978364 ], "wc_strengths_avg": [ 65.2, 20.350921355064003 ], "wc_weaknesses_avg": [ 160.0, 109.37641427657061 ], "wc_questions_avg": [ 62.0, 79.2111103318215 ], "wc_limitations_avg": [ 23.8, 21.939917957914062 ], "wc_review_avg": [ 371.4, 197.88946409548944 ], "wc_reply_reviewers_avg": [ 49.8, 52.055355151991805 ], "wc_reply_authors_avg": [ 67.4, 83.37289727483386 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7003492917357614, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13926363923554718796&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "amrita.edu;aleph-alpha.com;adobe.com;tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de", "author_num": 6, "aff_unique_index": "0;1;2;3;3;3", "aff_unique_norm": "Amrita Vishwa Vidyapeetham;Aleph Alpha;Adobe;Technische Universit\u00e4t Darmstadt", "aff_unique_dep": ";;Adobe Systems Incorporated;", "aff_unique_url": "https://www.amrita.edu;https://www.aleph-alpha.com;https://www.adobe.com;https://www.tu-darmstadt.de", "aff_unique_abbr": "Amrita;Aleph Alpha;Adobe;TU Darmstadt", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;1;2;1;1;1", "aff_country_unique": "India;Germany;United States" }, { "title": "On the Need for a Language Describing Distribution Shifts: Illustrations on Tabular Datasets", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73603", "id": "PF0lxayYST", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a134eaebd55b7406ff29cd75d5f1a622-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=PF0lxayYST", "openreview": "https://openreview.net/forum?id=PF0lxayYST", "poster": "/media/PosterPDFs/NeurIPS%202023/73603.png?t=1701636498.9203691", "slides": "https://nips.cc/virtual/2023/poster/73603", "video": "https://nips.cc/virtual/2023/poster/73603", "author_site": "Jiashuo Liu, Tianyu Wang, Peng Cui, Hongseok Namkoong", "tldr": "", "abstract": "Different distribution shifts require different algorithmic and operational\n interventions. Methodological research must be grounded by the specific\n shifts they address. Although nascent benchmarks provide a promising\n empirical foundation, they \\emph{implicitly} focus on covariate\n shifts, and the validity of empirical findings depends on the type of shift, \n e.g., previous observations on algorithmic performance can fail to be valid when\n the $Y|X$ distribution changes. We conduct a thorough investigation of\n natural shifts in 5 tabular datasets over 86,000 model configurations, and\n find that $Y|X$-shifts are most prevalent. To encourage researchers to\n develop a refined language for distribution shifts, we build\n ``WhyShift``, an empirical testbed of curated real-world shifts where\n we characterize the type of shift we benchmark performance over. Since\n $Y|X$-shifts are prevalent in tabular settings, we \\emph{identify covariate\n regions} that suffer the biggest $Y|X$-shifts and discuss implications for\n algorithmic and data-based interventions. Our testbed highlights the\n importance of future research that builds an understanding of why\n distributions differ.", "keywords": "distribution shift types;natural distribution shifts;tabular datasets", "primary_area": "", "supplementary_material": "/attachment/a0ecbadeaf74eaa089a86bbdf7dbce05faf40bf6.pdf", "author": "Jiashuo Liu;Tianyu Wang;Peng Cui;Hongseok Namkoong", "authorids": "~Jiashuo_Liu1;~Tianyu_Wang6;~Peng_Cui1;~Hongseok_Namkoong2", "gender": "M;M;M;M", "homepage": "https://ljsthu.github.io;https://wangtianyu61.github.io;http://pengcui.thumedialab.com/;https://hsnamkoong.github.io", "dblp": "180/2823;;31/891-1;191/6680", "google_scholar": "b7bpt5MAAAAJ;mKT6mKEAAAAJ;https://scholar.google.com.tw/citations?user=G8x97ZgAAAAJ;dyXX1EgAAAAJ", "orcid": ";0009-0000-2095-431X;0000-0003-2957-8511;", "linkedin": "jiashuo-liu-244a6b1a4;;;", "or_profile": "~Jiashuo_Liu1;~Tianyu_Wang6;~Peng_Cui1;~Hongseok_Namkoong2", "aff": "Stanford University;Columbia University;Tsinghua University;Columbia University", "aff_domain": "stanford.edu;columbia.edu;tsinghua.edu.cn;columbia.edu", "position": "Researcher;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nliu2023on,\ntitle={On the Need for a Language Describing Distribution Shifts: Illustrations on Tabular Datasets},\nauthor={Jiashuo Liu and Tianyu Wang and Peng Cui and Hongseok Namkoong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=PF0lxayYST}\n}", "github": "", "project": "", "reviewers": "Tsg7;wGZu;np4K;AmNg", "pdf_size": 2259754, "rating": "5;6;7;8", "confidence": "4;2;3;3", "wc_summary_and_contributions": "41;58;76;172", "wc_strengths": "15;96;192;138", "wc_improvement": "38;61;89;421", "wc_limitations": "73;88;98;58", "wc_correctness": "86;14;116;376", "wc_clarity": "1;7;8;567", "wc_relation_to_prior_work": "44;12;130;249", "wc_documentation": "1;17;46;170", "wc_additional_feedback": "1;1;1;1", "wc_review": "300;354;756;2152", "wc_reply_reviewers": "0;13;23;742", "wc_reply_authors": "830;551;576;1770", "reply_reviewers": "0;1;1;3", "reply_authors": "2;1;2;6", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 86.75, 50.751231512151506 ], "wc_strengths_avg": [ 110.25, 64.66983454439945 ], "wc_improvement_avg": [ 152.25, 156.2103949806158 ], "wc_limitations_avg": [ 79.25, 15.155444566227676 ], "wc_correctness_avg": [ 148.0, 136.75525584049777 ], "wc_clarity_avg": [ 145.75, 243.22353401757815 ], "wc_relation_to_prior_work_avg": [ 108.75, 91.75340593133315 ], "wc_documentation_avg": [ 58.5, 66.36452365533863 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 890.5, 749.3322026978421 ], "wc_reply_reviewers_avg": [ 194.5, 316.2044433590395 ], "wc_reply_authors_avg": [ 931.75, 496.1211419603079 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 1.920286436967152 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3162277660168379, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11260288087838883453&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "stanford.edu;columbia.edu;tsinghua.edu.cn;columbia.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Stanford University;Columbia University;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.columbia.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Stanford;Columbia;THU", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Minigrid & Miniworld: Modular & Customizable Reinforcement Learning Environments for Goal-Oriented Tasks", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73602", "id": "PFfmfspm28", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e8916198466e8ef218a2185a491b49fa-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=PFfmfspm28", "openreview": "https://openreview.net/forum?id=PFfmfspm28", "poster": "/media/PosterPDFs/NeurIPS%202023/73602.png?t=1698672444.8311317", "slides": "https://nips.cc/virtual/2023/poster/73602", "video": "https://nips.cc/virtual/2023/poster/73602", "author_site": "Maxime Chevalier-Boisvert, Bolun Dai, Mark Towers, Rodrigo Perez-Vicente, Lucas Willems, Salem Lahlou, Suman Pal, Pablo Samuel Castro, J Terry", "tldr": "", "abstract": "We present the Minigrid and Miniworld libraries which provide a suite of goal-oriented 2D and 3D environments. The libraries were explicitly created with a minimalistic design paradigm to allow users to rapidly develop new environments for a wide range of research-specific needs. As a result, both have received widescale adoption by the RL community, facilitating research in a wide range of areas. In this paper, we outline the design philosophy, environment details, and their world generation API. We also showcase the additional capabilities brought by the unified API between Minigrid and Miniworld through case studies on transfer learning (for both RL agents and humans) between the different observation spaces. The source code of Minigrid and Miniworld can be found at https://github.com/Farama-Foundation/Minigrid and https://github.com/Farama-Foundation/Miniworld along with their documentation at https://minigrid.farama.org/ and https://miniworld.farama.org/.", "keywords": "Reinforcement Learning;Simulation Environment", "primary_area": "", "supplementary_material": "/attachment/abb514c57843d598a28cb81e19cee692f3f2eb52.pdf", "author": "Maxime Chevalier-Boisvert;Bolun Dai;Mark Towers;Rodrigo De Lazcano Perez-Vicente;Lucas Willems;Salem Lahlou;Suman Pal;Pablo Samuel Castro;J K Terry", "authorids": "~Maxime_Chevalier-Boisvert1;~Bolun_Dai1;~Mark_Towers1;~Rodrigo_De_Lazcano_Perez-Vicente2;~Lucas_Willems1;~Salem_Lahlou3;suman7495@gmail.com;~Pablo_Samuel_Castro1;~J_K_Terry1", "gender": "F;;M;M;M;M;;M;F", "homepage": ";https://bolundai0216.github.io;;;http://www.lucaswillems.com;https://la7.lu;;https://psc-g.github.io/;", "dblp": "27/7915.html;;;;;228/8314;;05/5455;", "google_scholar": "lSg6I8gAAAAJ;;mBjei5sAAAAJ;https://scholar.google.com/citations?hl=en;;xLSkCrIAAAAJ;;https://scholar.google.ca/citations?user=jn5r6TsAAAAJ;QcDnpLgAAAAJ", "orcid": ";;0000-0002-2609-2041;;;;;;", "linkedin": "maximecb;;markttowers/;;;;;pablo-samuel-castro-2113641b/;", "or_profile": "~Maxime_Chevalier-Boisvert1;~Bolun_Dai1;~Mark_Towers1;~Rodrigo_De_Lazcano_Perez-Vicente2;~Lucas_Willems1;~Salem_Lahlou3;suman7495@gmail.com;~Pablo_Samuel_Castro1;~J_K_Terry1", "aff": "Shopify;New York University;University of Southampton;Farama Foundation;;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;;Google;University of Maryland, College Park", "aff_domain": "shopify.com;nyu.edu;soton.ac.uk;farama.org;;mila.umontreal.ca;;google.com;umd.edu", "position": "Researcher;PhD student;PhD student;Researcher;;PhD student;;Researcher;PhD student", "bibtex": "@inproceedings{\nchevalier-boisvert2023minigrid,\ntitle={Minigrid \\& Miniworld: Modular \\& Customizable Reinforcement Learning Environments for Goal-Oriented Tasks},\nauthor={Maxime Chevalier-Boisvert and Bolun Dai and Mark Towers and Rodrigo De Lazcano Perez-Vicente and Lucas Willems and Salem Lahlou and Suman Pal and Pablo Samuel Castro and J K Terry},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=PFfmfspm28}\n}", "github": "", "project": "", "reviewers": "2jWC;FdB5;Semn;UmzS;RPEh", "pdf_size": 4283417, "rating": "4;6;7;7;8", "confidence": "4;3;5;4;4", "wc_summary_and_contributions": "87;107;42;64;193", "wc_strengths": "71;60;45;77;64", "wc_improvement": "116;81;241;50;68", "wc_limitations": "17;63;2;20;21", "wc_correctness": "14;14;21;1;33", "wc_clarity": "11;10;1;4;6", "wc_relation_to_prior_work": "15;96;2;1;1", "wc_documentation": "4;6;1;1;23", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "336;438;356;219;410", "wc_reply_reviewers": "335;138;31;14;0", "wc_reply_authors": "1121;1116;529;160;636", "reply_reviewers": "1;1;1;1;0", "reply_authors": "3;2;1;2;2", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 98.6, 51.99461510579725 ], "wc_strengths_avg": [ 63.4, 10.89219904335208 ], "wc_improvement_avg": [ 111.2, 68.4029239141135 ], "wc_limitations_avg": [ 24.6, 20.382345301755635 ], "wc_correctness_avg": [ 16.6, 10.44222198576529 ], "wc_clarity_avg": [ 6.4, 3.7202150475476548 ], "wc_relation_to_prior_work_avg": [ 23.0, 36.88360069190642 ], "wc_documentation_avg": [ 7.0, 8.221921916437786 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 351.8, 75.79023683826301 ], "wc_reply_reviewers_avg": [ 103.6, 125.5079280364392 ], "wc_reply_authors_avg": [ 712.4, 367.27787845172486 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.23312620206007845, "gs_citation": 244, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17469216374499512843&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "shopify.com;nyu.edu;soton.ac.uk;farama.org;;mila.umontreal.ca;;google.com;umd.edu", "author_num": 9, "aff_unique_index": "0;1;2;3;4;5;6", "aff_unique_norm": "Shopify Inc.;New York University;University of Southampton;Farama Foundation;University of Montreal;Google;University of Maryland", "aff_unique_dep": ";;;;Montreal Institute for Learning Algorithms;Google;", "aff_unique_url": "https://www.shopify.com;https://www.nyu.edu;https://www.southampton.ac.uk;https://www.faramafoundation.org;https://www.mila.quebec;https://www.google.com;https://www/umd.edu", "aff_unique_abbr": "Shopify;NYU;Southampton;;MILA;Google;UMD", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Montreal;Mountain View;College Park", "aff_country_unique_index": "0;1;2;1;0;1;1", "aff_country_unique": "Canada;United States;United Kingdom" }, { "title": "An Iterative Self-Learning Framework for Medical Domain Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71775", "id": "PHKkBbuJWM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ac0035c349f3fe8af6a93fe44697b5bd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PHKkBbuJWM", "openreview": "https://openreview.net/forum?id=PHKkBbuJWM", "poster": "/media/PosterPDFs/NeurIPS%202023/71775.png?t=1702204492.0872388", "slides": "https://nips.cc/virtual/2023/poster/71775", "video": "https://nips.cc/virtual/2023/poster/71775", "author_site": "Zhenbang Wu, Huaxiu Yao, David Liebovitz, Jimeng Sun", "tldr": "", "abstract": "Deep learning models have been widely used to assist doctors with clinical decision-making. However, these models often encounter a significant performance drop when applied to data that differs from the distribution they were trained on. This challenge is known as the domain shift problem. Existing domain generalization algorithms attempt to address this problem by assuming the availability of domain IDs and training a single model to handle all domains. However, in healthcare settings, patients can be classified into numerous latent domains, where the actual domain categorizations are unknown. Furthermore, each patient domain exhibits distinct clinical characteristics, making it sub-optimal to train a single model for all domains. To overcome these limitations, we propose SLGD, a self-learning framework that iteratively discovers decoupled domains and trains personalized classifiers for each decoupled domain. We evaluate the generalizability of SLGD across spatial and temporal data distribution shifts on two real-world public EHR datasets: eICU and MIMIC-IV. Our results show that SLGD achieves up to 11% improvement in the AUPRC score over the best baseline.", "keywords": "healthcare;clinical predictive model;domain generalization", "primary_area": "", "supplementary_material": "/attachment/800cee1dcbe981fef2ed1c79dd646a132f276a9d.pdf", "author": "Zhenbang Wu;Huaxiu Yao;David Liebovitz;Jimeng Sun", "authorids": "~Zhenbang_Wu1;~Huaxiu_Yao1;~David_Liebovitz1;~Jimeng_Sun3", "gender": "M;M;M;", "homepage": ";http://huaxiuyao.mystrikingly.com;;http://sunlab.org", "dblp": "315/0212;197/1635;;", "google_scholar": "N8p-spIAAAAJ;A20BZnQAAAAJ;;9jmmp5sAAAAJ", "orcid": ";;0000-0002-2518-5940;0000-0003-1512-6426", "linkedin": ";huaxiuyao/;;jimengsun/", "or_profile": "~Zhenbang_Wu1;~Huaxiu_Yao1;~David_Liebovitz1;~Jimeng_Sun3", "aff": "University of Illinois Urbana Champaign;Computer Science Department, Stanford University;Northwestern University;Georgia Institute of Technology", "aff_domain": "illinois.edu;cs.stanford.edu;northwestern.edu;gatech.edu", "position": "PhD student;Postdoc;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2023an,\ntitle={An Iterative Self-Learning Framework for Medical Domain Generalization},\nauthor={Zhenbang Wu and Huaxiu Yao and David Liebovitz and Jimeng Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PHKkBbuJWM}\n}", "github": "", "project": "", "reviewers": "Cuot;36fo;cG9z;1qdB;RZ7L", "pdf_size": 779234, "rating": "3;6;6;7;7", "confidence": "4;3;3;3;3", "soundness": "2;3;3;2;3", "novelty": "2;3;2;4;3", "presentation": "3;2;2;4;4", "wc_summary": "89;84;200;118;87", "wc_strengths": "88;246;115;99;96", "wc_weaknesses": "314;344;384;222;69", "wc_questions": "3;145;116;188;46", "wc_limitations": "65;35;37;54;89", "wc_review": "559;854;852;681;387", "wc_reply_reviewers": "214;23;20;65;13", "wc_reply_authors": "777;42;64;55;47", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;2;2;2;2", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 115.6, 43.939048692478536 ], "wc_strengths_avg": [ 128.8, 59.25335433542982 ], "wc_weaknesses_avg": [ 266.6, 112.29888690454595 ], "wc_questions_avg": [ 99.6, 66.85686202627222 ], "wc_limitations_avg": [ 56.0, 19.879637823662684 ], "wc_review_avg": [ 666.6, 178.57950610302404 ], "wc_reply_reviewers_avg": [ 67.0, 75.72846228466547 ], "wc_reply_authors_avg": [ 197.0, 290.0958462301727 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9525793444156804, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2503882993176759445&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "illinois.edu;cs.stanford.edu;northwestern.edu;gatech.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Illinois Urbana-Champaign;Stanford University;Northwestern University;Georgia Institute of Technology", "aff_unique_dep": ";Computer Science Department;;", "aff_unique_url": "https://illinois.edu;https://www.stanford.edu;https://www.northwestern.edu;https://www.gatech.edu", "aff_unique_abbr": "UIUC;Stanford;NU;Georgia Tech", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Urbana-Champaign;Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Contrastive Moments: Unsupervised Halfspace Learning in Polynomial Time", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71774", "id": "PHbqznMa1i", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e5a71ba556c84fef542aaace56b6cfe9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PHbqznMa1i", "openreview": "https://openreview.net/forum?id=PHbqznMa1i", "poster": "/media/PosterPDFs/NeurIPS%202023/71774.png?t=1702063828.2858639", "slides": "https://nips.cc/virtual/2023/poster/71774", "video": "https://nips.cc/virtual/2023/poster/71774", "author_site": "Xinyuan Cao, Santosh Vempala", "tldr": "", "abstract": "We give a polynomial-time algorithm for learning high-dimensional halfspaces with margins in $d$-dimensional space to within desired Total Variation (TV) distance when the ambient distribution is an unknown affine transformation of the $d$-fold product of an (unknown) symmetric one-dimensional logconcave distribution, and the halfspace is introduced by deleting at least an $\\epsilon$ fraction of the data in one of the component distributions. Notably, our algorithm does not need labels and establishes the unique (and efficient) identifiability of the hidden halfspace under this distributional assumption. The sample and time complexity of the algorithm are polynomial in the dimension and $1/\\epsilon$. The algorithm uses only the first two moments of *suitable re-weightings* of the empirical distribution, which we call *contrastive moments*; its analysis uses classical facts about generalized Dirichlet polynomials and relies crucially on a new monotonicity property of the moment ratio of truncations of logconcave distributions. Such algorithms, based only on first and second moments were suggested in earlier work, but hitherto eluded rigorous guarantees.\n\nPrior work addressed the special case when the underlying distribution is Gaussian via Non-Gaussian Component Analysis. We improve on this by providing polytime guarantees based on TV distance, in place of existing moment-bound guarantees that can be super-polynomial. Our work is also the first to go beyond Gaussians in this setting.", "keywords": "Unsupervised Learning;Learning Halfspaces;Non-Gaussian Component analysis", "primary_area": "", "supplementary_material": "", "author": "Xinyuan Cao;Santosh Vempala", "authorids": "~Xinyuan_Cao1;~Santosh_Vempala1", "gender": "F;M", "homepage": "https://github.com/youki-cao;http://www.cc.gatech.edu/~vempala/", "dblp": "271/2539;v/SantoshVempala", "google_scholar": "XRgHwgkAAAAJ;https://scholar.google.com.tw/citations?user=hRggMmIAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Xinyuan_Cao1;~Santosh_Vempala1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu", "position": "PhD student;Professor", "bibtex": "@inproceedings{\ncao2023contrastive,\ntitle={Contrastive Moments: Unsupervised Halfspace Learning in Polynomial Time},\nauthor={Xinyuan Cao and Santosh Vempala},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PHbqznMa1i}\n}", "github": "", "project": "", "reviewers": "YLvd;6stc;efHe;Ua7t;rhCN", "pdf_size": 835229, "rating": "5;6;6;7;8", "confidence": "3;4;1;4;3", "soundness": "3;3;2;4;4", "novelty": "2;3;2;3;4", "presentation": "2;3;2;3;4", "wc_summary": "145;66;49;91;64", "wc_strengths": "38;43;50;84;61", "wc_weaknesses": "294;65;46;219;74", "wc_questions": "157;78;37;39;133", "wc_limitations": "1;2;4;9;21", "wc_review": "635;254;186;442;353", "wc_reply_reviewers": "26;0;12;17;69", "wc_reply_authors": "0;0;0;0;35", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 83.0, 33.80532502432124 ], "wc_strengths_avg": [ 55.2, 16.33891061240008 ], "wc_weaknesses_avg": [ 139.6, 98.7655810492704 ], "wc_questions_avg": [ 88.8, 48.754076752616285 ], "wc_limitations_avg": [ 7.4, 7.337574531137657 ], "wc_review_avg": [ 374.0, 156.7992346920099 ], "wc_reply_reviewers_avg": [ 24.8, 23.64233491007181 ], "wc_reply_authors_avg": [ 7.0, 14.0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.17902871850985824, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2345916740257331017&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "gatech.edu;gatech.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Domain Watermark: Effective and Harmless Dataset Copyright Protection is Closed at Hand", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71773", "id": "PIDNxRRJ8w", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aa6287ca31ae1474ea802342d0c8ba63-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PIDNxRRJ8w", "openreview": "https://openreview.net/forum?id=PIDNxRRJ8w", "poster": "/media/PosterPDFs/NeurIPS%202023/71773.png?t=1697946797.0449502", "slides": "https://nips.cc/virtual/2023/poster/71773", "video": "https://nips.cc/virtual/2023/poster/71773", "author_site": "Junfeng Guo, Yiming Li, Lixu Wang, Shu-Tao Xia, Heng Huang, Cong Liu, Bo Li", "tldr": "", "abstract": "The prosperity of deep neural networks (DNNs) is largely benefited from open-source datasets, based on which users can evaluate and improve their methods. In this paper, we revisit backdoor-based dataset ownership verification (DOV), which is currently the only feasible approach to protect the copyright of open-source datasets. We reveal that these methods are fundamentally harmful given that they could introduce malicious misclassification behaviors to watermarked DNNs by the adversaries. In this paper, we design DOV from another perspective by making watermarked models (trained on the protected dataset) correctly classify some `hard' samples that will be misclassified by the benign model. Our method is inspired by the generalization property of DNNs, where we find a \\emph{hardly-generalized domain} for the original dataset (as its \\emph{domain watermark}). It can be easily learned with the protected dataset containing modified samples. Specifically, we formulate the domain generation as a bi-level optimization and propose to optimize a set of visually-indistinguishable clean-label modified data with similar effects to domain-watermarked samples from the hardly-generalized domain to ensure watermark stealthiness. We also design a hypothesis-test-guided ownership verification via our domain watermark and provide the theoretical analyses of our method. Extensive experiments on three benchmark datasets are conducted, which verify the effectiveness of our method and its resistance to potential adaptive methods.", "keywords": "Ownership Verification;Dataset Protection;Copyright Protection;Backdoor Attack;AI Security", "primary_area": "", "supplementary_material": "/attachment/5f0c79a445e96b0d7fc3b94c85fbf57ca7338f9b.zip", "author": "Junfeng Guo;Yiming Li;Lixu Wang;Shu-Tao Xia;Heng Huang;Cong Liu;Bo Li", "authorids": "~Junfeng_Guo2;~Yiming_Li1;~Lixu_Wang1;~Shu-Tao_Xia1;~Heng_Huang1;~Cong_Liu2;~Bo_Li19", "gender": "M;M;;M;M;;F", "homepage": "https://junfenggo.github.io/;http://liyiming.tech;;https://www.sigs.tsinghua.edu.cn/xst/list.htm;https://www.cs.umd.edu/~heng/;https://intra.ece.ucr.edu/~cong/;http://boli.cs.illinois.edu/", "dblp": ";l/YimingLi-4;;03/6195;03/281;https://dblp.uni-trier.de/pers/l/Liu_0005:Cong.html;50/3402-26", "google_scholar": "TqblqYcAAAAJ;mSW7kU8AAAAJ;;https://scholar.google.com.hk/citations?user=koAXTXgAAAAJ;4OqLaDwAAAAJ;vpc4bggAAAAJ;K8vJkTcAAAAJ", "orcid": ";0000-0002-2258-265X;;0000-0002-8639-982X;;;", "linkedin": ";yiming-li-thu/;;;;;", "or_profile": "~Junfeng_Guo2;~Yiming_Li1;~Lixu_Wang1;~Shu-Tao_Xia1;~Heng_Huang1;~Cong_Liu2;~Bo_Li19", "aff": "University of Texas, Dallas;Tsinghua University;;Shenzhen International Graduate School, Tsinghua University;University of Pittsburgh;University of California, Riverside;University of Illinois, Urbana Champaign", "aff_domain": "utdallas.edu;mails.tsinghua.edu.cn;;sz.tsinghua.edu.cn;pitt.edu;ucr.edu;illinois.edu", "position": "PhD student;PhD student;;Full Professor;Full Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nguo2023domain,\ntitle={Domain Watermark: Effective and Harmless Dataset Copyright Protection is Closed at Hand},\nauthor={Junfeng Guo and Yiming Li and Lixu Wang and Shu-Tao Xia and Heng Huang and Cong Liu and Bo Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PIDNxRRJ8w}\n}", "github": "", "project": "", "reviewers": "xWCD;FQjV;Tjx6;DDiA;a3iY", "pdf_size": 9039224, "rating": "4;5;5;6;6", "confidence": "2;4;2;2;2", "soundness": "2;3;3;2;3", "novelty": "3;3;3;2;3", "presentation": "3;2;3;3;3", "wc_summary": "83;101;43;128;46", "wc_strengths": "23;54;49;60;33", "wc_weaknesses": "208;128;64;271;132", "wc_questions": "3;136;26;10;19", "wc_limitations": "3;6;2;17;5", "wc_review": "320;425;184;486;235", "wc_reply_reviewers": "352;0;0;0;0", "wc_reply_authors": "525;153;154;148;198", "reply_reviewers": "2;0;0;0;0", "reply_authors": "4;3;3;3;4", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 2.4, 0.8000000000000002 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 80.2, 32.49246066397557 ], "wc_strengths_avg": [ 43.8, 13.731715115017497 ], "wc_weaknesses_avg": [ 160.6, 71.62010890804342 ], "wc_questions_avg": [ 38.8, 49.223571589229486 ], "wc_limitations_avg": [ 6.6, 5.388877434122992 ], "wc_review_avg": [ 330.0, 112.96194049324754 ], "wc_reply_reviewers_avg": [ 70.4, 140.8 ], "wc_reply_authors_avg": [ 235.6, 145.82263198831652 ], "reply_reviewers_avg": [ 0.4, 0.8000000000000002 ], "reply_authors_avg": [ 3.4, 0.4898979485566356 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.13363062095621214, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6388529652070253878&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "utdallas.edu;mails.tsinghua.edu.cn;;sz.tsinghua.edu.cn;pitt.edu;ucr.edu;illinois.edu", "author_num": 7, "aff_unique_index": "0;1;1;2;3;4", "aff_unique_norm": "University of Texas at Dallas;Tsinghua University;University of Pittsburgh;University of California, Riverside;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.utdallas.edu;https://www.tsinghua.edu.cn;https://www.pitt.edu;https://www.ucr.edu;https://illinois.edu", "aff_unique_abbr": "UT Dallas;THU;Pitt;UCR;UIUC", "aff_campus_unique_index": "0;2;3;4", "aff_campus_unique": "Dallas;;Shenzhen;Riverside;Urbana-Champaign", "aff_country_unique_index": "0;1;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "User-Level Differential Privacy With Few Examples Per User", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71772", "id": "PITeSdYQkv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3d57795f0e263aa69577f1bbceade46b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PITeSdYQkv", "openreview": "https://openreview.net/forum?id=PITeSdYQkv", "poster": "/media/PosterPDFs/NeurIPS%202023/71772.png?t=1702024816.4261847", "slides": "https://nips.cc/virtual/2023/poster/71772", "video": "https://nips.cc/virtual/2023/poster/71772", "author_site": "Badih Ghazi, Pritish Kamath, Ravi Kumar, Pasin Manurangsi, Raghu Meka, Chiyuan Zhang", "tldr": "", "abstract": "Previous work on user-level differential privacy (DP) [Ghazi et al. NeurIPS 2021, Bun et al. STOC 2023] obtained generic algorithms that work for various learning tasks. However, their focus was on the *example-rich* regime, where the users have so many examples that each user could themselves solve the problem. In this work we consider the *example-scarce* regime, where each user has only a few examples, and obtain the following results:\n* For approximate-DP, we give a generic transformation of any item-level DP algorithm to a user-level DP algorithm. Roughly speaking, the latter gives a (multiplicative) savings of $O_{\\varepsilon,\\delta}(\\sqrt{m})$ in terms of the number of users required for achieving the same utility, where $m$ is the number of examples per user. This algorithm, while recovering most known bounds for specific problems, also gives new bounds, e.g., for PAC learning. \n* For pure-DP, we present a simple technique for adapting the exponential mechanism [McSherry & Talwar, FOCS 2007] to the user-level setting. This gives new bounds for a variety of tasks, such as private PAC learning, hypothesis selection, and distribution learning. For some of these problems, we show that our bounds are near-optimal.", "keywords": "differential privacy;user-level privacy;PAC learning", "primary_area": "", "supplementary_material": "", "author": "Badih Ghazi;Pritish Kamath;Ravi Kumar;Pasin Manurangsi;Raghu Meka;Chiyuan Zhang", "authorids": "~Badih_Ghazi1;~Pritish_Kamath2;~Ravi_Kumar1;~Pasin_Manurangsi2;~Raghu_Meka1;~Chiyuan_Zhang1", "gender": ";M;M;M;M;M", "homepage": "https://sites.google.com/view/badihghazi/home;https://pritishkamath.github.io/;https://sites.google.com/site/ravik53/;https://pasin30055.github.io/;http://raghumeka.org;http://pluskid.org", "dblp": "125/2134;https://dblp.org/pers/k/Kamath:Pritish.html;k/RaviKumar.html;133/2059;76/1906;21/8315", "google_scholar": "GBJLTN8AAAAJ;1JFARhUAAAAJ;J_XhIsgAAAAJ;35hM-PkAAAAJ;xuDZ9-sAAAAJ;l_G2vr0AAAAJ", "orcid": ";;0000-0002-2203-2586;;;", "linkedin": "badih-ghazi-608379132/;;ravi-kumar-a3a9631;;;", "or_profile": "~Badih_Ghazi1;~Pritish_Kamath2;~Ravi_Kumar1;~Pasin_Manurangsi2;~Raghu_Meka1;~Chiyuan_Zhang1", "aff": "Google;Google Research;Google;Google;University of California, Los Angeles;Google", "aff_domain": "google.com;google.com;google.com;google.com;ucla.edu;google.com", "position": "Researcher;Research Scientist;Research Scientist;Research Scientist;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nghazi2023userlevel,\ntitle={User-Level Differential Privacy With Few Examples Per User},\nauthor={Badih Ghazi and Pritish Kamath and Ravi Kumar and Pasin Manurangsi and Raghu Meka and Chiyuan Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PITeSdYQkv}\n}", "github": "", "project": "", "reviewers": "iTdY;pUZs;KRgS;Hupm", "pdf_size": 566912, "rating": "7;7;8;8", "confidence": "3;4;4;3", "soundness": "3;4;4;4", "novelty": "4;3;4;4", "presentation": "4;3;3;4", "wc_summary": "52;205;66;115", "wc_strengths": "71;82;52;115", "wc_weaknesses": "117;143;80;37", "wc_questions": "43;206;132;133", "wc_limitations": "7;18;10;14", "wc_review": "290;654;340;414", "wc_reply_reviewers": "0;261;5;7", "wc_reply_authors": "0;98;40;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 109.5, 59.89365575751742 ], "wc_strengths_avg": [ 80.0, 22.880122377295102 ], "wc_weaknesses_avg": [ 94.25, 39.921015768639954 ], "wc_questions_avg": [ 128.5, 57.768936289324216 ], "wc_limitations_avg": [ 12.25, 4.14578098794425 ], "wc_review_avg": [ 424.5, 139.6522466700769 ], "wc_reply_reviewers_avg": [ 68.25, 111.31346504354269 ], "wc_reply_authors_avg": [ 34.5, 40.13415004706092 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9591800176378517251&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "google.com;google.com;google.com;google.com;ucla.edu;google.com", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Google;University of California, Los Angeles", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.ucla.edu", "aff_unique_abbr": "Google;UCLA", "aff_campus_unique_index": "0;0;0;0;1;0", "aff_campus_unique": "Mountain View;Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Dynamics Generalisation in Reinforcement Learning via Adaptive Context-Aware Policies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71771", "id": "PJhjkSFlbG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7e7b768198d24d883d69704eee57efb0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PJhjkSFlbG", "openreview": "https://openreview.net/forum?id=PJhjkSFlbG", "poster": "/media/PosterPDFs/NeurIPS%202023/71771.png?t=1701441003.1248507", "slides": "https://nips.cc/virtual/2023/poster/71771", "video": "https://nips.cc/virtual/2023/poster/71771", "author_site": "Michael Beukman, Devon Jarvis, Richard Klein, Steven James, Benjamin Rosman", "tldr": "", "abstract": "While reinforcement learning has achieved remarkable successes in several domains, its real-world application is limited due to many methods failing to generalise to unfamiliar conditions. In this work, we consider the problem of generalising to new transition dynamics, corresponding to cases in which the environment's response to the agent's actions differs. For example, the gravitational force exerted on a robot depends on its mass and changes the robot's mobility. Consequently, in such cases, it is necessary to condition an agent's actions on extrinsic state information and pertinent contextual information reflecting how the environment responds. While the need for context-sensitive policies has been established, the manner in which context is incorporated architecturally has received less attention. Thus, in this work, we present an investigation into how context information should be incorporated into behaviour learning to improve generalisation. To this end, we introduce a neural network architecture, the Decision Adapter, which generates the weights of an adapter module and conditions the behaviour of an agent on the context information. We show that the Decision Adapter is a useful generalisation of a previously proposed architecture and empirically demonstrate that it results in superior generalisation performance compared to previous approaches in several environments. Beyond this, the Decision Adapter is more robust to irrelevant distractor variables than several alternative methods.", "keywords": "Deep Reinforment Learning;Contextual Markov Decision Process;Neural Network Architecture", "primary_area": "", "supplementary_material": "", "author": "Michael Beukman;Devon Jarvis;Richard Klein;Steven James;Benjamin Rosman", "authorids": "~Michael_Beukman1;~Devon_Jarvis1;~Richard_Klein1;~Steven_James1;~Benjamin_Rosman1", "gender": ";M;M;M;M", "homepage": ";https://jarvisdevon.github.io/;https://www.wits.ac.za/staff/academic-a-z-listing/k/richardkleinwitsacza/;;http://www.raillab.org", "dblp": ";320/3650;26/8293;195/8202;45/4591", "google_scholar": ";https://scholar.google.co.za/citations?user=MJjN5nEAAAAJ;https://scholar.google.co.za/citations?user=QZ_MjosAAAAJ;;https://scholar.google.co.za/citations?user=pWJ0SocAAAAJ", "orcid": ";0000-0003-2362-7538;0000-0003-0783-2072;;", "linkedin": ";devon-jarvis-6b059a139;;;", "or_profile": "~Michael_Beukman1;~Devon_Jarvis1;~Richard_Klein1;~Steven_James1;~Benjamin_Rosman1", "aff": ";University College London, University of London;University of the Witwatersrand;University of the Witwatersrand;University of the Witwatersrand", "aff_domain": ";ucl.ac.uk;wits.ac.za;wits.ac.za;wits.ac.za", "position": ";Researcher;Associate Professor;Lecturer;Full Professor", "bibtex": "@inproceedings{\nbeukman2023dynamics,\ntitle={Dynamics Generalisation in Reinforcement Learning via Adaptive Context-Aware Policies},\nauthor={Michael Beukman and Devon Jarvis and Richard Klein and Steven James and Benjamin Rosman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PJhjkSFlbG}\n}", "github": "", "project": "", "reviewers": "DXBg;sPQr;8dZQ;rS79", "pdf_size": 1089767, "rating": "6;6;7;8", "confidence": "4;4;3;5", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "82;103;80;36", "wc_strengths": "35;59;121;125", "wc_weaknesses": "200;283;194;435", "wc_questions": "76;306;20;125", "wc_limitations": "14;30;27;14", "wc_review": "407;781;442;735", "wc_reply_reviewers": "184;1296;30;183", "wc_reply_authors": "559;1782;20;776", "reply_reviewers": "1;3;1;1", "reply_authors": "2;4;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 75.25, 24.38621536852326 ], "wc_strengths_avg": [ 85.0, 38.961519477556315 ], "wc_weaknesses_avg": [ 278.0, 97.22911086706492 ], "wc_questions_avg": [ 131.75, 107.24358955201005 ], "wc_limitations_avg": [ 21.25, 7.327175444876422 ], "wc_review_avg": [ 591.25, 167.99758182783467 ], "wc_reply_reviewers_avg": [ 423.25, 507.7644015682864 ], "wc_reply_authors_avg": [ 784.25, 638.4333853269267 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7983697562743797874&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "email": ";ucl.ac.uk;wits.ac.za;wits.ac.za;wits.ac.za", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University College London;University of the Witwatersrand", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://www.wits.ac.za", "aff_unique_abbr": "UCL;Wits", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United Kingdom;South Africa" }, { "id": "PL4WWjvm9D", "title": "Decoupling Quantile Representations from Loss Function", "track": "main", "status": "Reject", "tldr": "", "abstract": "The simultaneous quantile regression (SQR) technique has been used to estimate 2 uncertainties for deep learning models, but its application is limited by the requirement that the solution at the median quantile $(\\tau = 0.5)$ must minimize the mean absolute error (MAE). In this article, we address this limitation by demonstrating a duality between quantiles and estimated probabilities in the case of simultaneous\nbinary quantile regression (SBQR). This allows us to decouple the construction of quantile representations from the loss function, enabling us to assign an arbitrary classifier $f(x)$ at the median quantile and generate the full spectrum of SBQR quantile representations at different $\\tau $values. We validate our approach through two applications: (i) detecting out-of-distribution samples, where we show that\nquantile representations outperform standard probability outputs, and (ii) calibrating models, where we demonstrate the robustness of quantile representations to distortions. We conclude with a discussion of several hypotheses arising from these findings.", "keywords": "Quantiles;Duality;Calibration;OOD Detection;simultaneous binary quantile regression (SBQR);invariant to distortion", "primary_area": "", "supplementary_material": "/attachment/5f9d0c9c1dd897194680d2513cc0238a309271b8.zip", "author": "Aditya Challa;Soma S Dhavala;Snehanshu Saha", "authorids": "~Aditya_Challa1;~Soma_S_Dhavala1;~Snehanshu_Saha1", "gender": ";M;Not Specified", "homepage": ";https://www.linkedin.com/in/somasdhavala/;https://www.bits-pilani.ac.in/goa/snehanshus/profile", "dblp": ";;130/3938", "google_scholar": ";Rkh1zb8AAAAJ;C-Qm2LcAAAAJ", "orcid": ";;0000-0002-8458-604X", "linkedin": ";somasdhavala/;snehanshusaha/", "or_profile": "~Aditya_Challa1;~Soma_S_Dhavala1;~Snehanshu_Saha1", "aff": ";Wadhwani Institute for AI;Birla Institute of Technology and Science, Dhirubhai Ambani Institute Of Information and Communication Technology", "aff_domain": ";wadhwaniai.org;bits-pilani.ac.in", "position": ";Principal Researcher;Full Professor", "bibtex": "@misc{\nchalla2023decoupling,\ntitle={Decoupling Quantile Representations from Loss Function},\nauthor={Aditya Challa and Soma S Dhavala and Snehanshu Saha},\nyear={2023},\nurl={https://openreview.net/forum?id=PL4WWjvm9D}\n}", "github": "", "project": "", "reviewers": "4AGX;zXud;d48X;dSRG;EyEA", "site": "https://openreview.net/forum?id=PL4WWjvm9D", "pdf_size": 905121, "rating": "3;3;4;5;6", "confidence": "3;3;3;3;1", "soundness": "2;2;2;2;3", "novelty": "2;3;2;2;3", "presentation": "3;2;2;2;2", "wc_summary": "57;141;38;94;31", "wc_strengths": "42;38;13;59;43", "wc_weaknesses": "360;236;221;229;21", "wc_questions": "42;59;53;127;1", "wc_limitations": "25;16;1;1;17", "wc_review": "526;490;326;510;113", "wc_reply_reviewers": "154;361;307;79;0", "wc_reply_authors": "171;255;362;7;0", "reply_reviewers": "2;1;5;1;0", "reply_authors": "2;2;4;2;1", "rating_avg": [ 4.2, 1.16619037896906 ], "confidence_avg": [ 2.6, 0.8 ], "soundness_avg": [ 2.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 72.2, 40.75978410148905 ], "wc_strengths_avg": [ 39.0, 14.845874847916509 ], "wc_weaknesses_avg": [ 213.4, 108.92309213385379 ], "wc_questions_avg": [ 56.4, 40.69201395851525 ], "wc_limitations_avg": [ 12.0, 9.50789145920377 ], "wc_review_avg": [ 393.0, 157.2742827038165 ], "wc_reply_reviewers_avg": [ 180.2, 135.76951056846306 ], "wc_reply_authors_avg": [ 159.0, 140.6797782198991 ], "reply_reviewers_avg": [ 1.8, 1.7204650534085255 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7717436331412899, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sKjU8a-s2LUJ:scholar.google.com/&scioq=Decoupling+Quantile+Representations+from+Loss+Function&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Wadhwani Institute for AI;Birla Institute of Technology and Science", "aff_unique_dep": ";", "aff_unique_url": "https://www.wadhwaniai.com;https://www.bits-pilani.ac.in", "aff_unique_abbr": ";BITS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;India" }, { "title": "How Re-sampling Helps for Long-Tail Learning?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71770", "id": "PLzCXefcpE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eeffa70bcbbd43f6bd067edebc6595e8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PLzCXefcpE", "openreview": "https://openreview.net/forum?id=PLzCXefcpE", "poster": "/media/PosterPDFs/NeurIPS%202023/71770.png?t=1697534648.0892172", "slides": "https://nips.cc/virtual/2023/poster/71770", "video": "https://nips.cc/virtual/2023/poster/71770", "author_site": "Jiang-Xin Shi, Tong Wei, Yuke Xiang, Yu-Feng Li", "tldr": "", "abstract": "Long-tail learning has received significant attention in recent years due to the challenge it poses with extremely imbalanced datasets. In these datasets, only a few classes (known as the head classes) have an adequate number of training samples, while the rest of the classes (known as the tail classes) are infrequent in the training data. Re-sampling is a classical and widely used approach for addressing class imbalance issues. Unfortunately, recent studies claim that re-sampling brings negligible performance improvements in modern long-tail learning tasks. This paper aims to investigate this phenomenon systematically. Our research shows that re-sampling can considerably improve generalization when the training images do not contain semantically irrelevant contexts. In other scenarios, however, it can learn unexpected spurious correlations between irrelevant contexts and target labels. We design experiments on two homogeneous datasets, one containing irrelevant context and the other not, to confirm our findings. To prevent the learning of spurious correlations, we propose a new context shift augmentation module that generates diverse training images for the tail class by maintaining a context bank extracted from the head-class images. Experiments demonstrate that our proposed module can boost the generalization and outperform other approaches, including class-balanced re-sampling, decoupled classifier re-training, and data augmentation methods. The source code is available at https://www.lamda.nju.edu.cn/code_CSA.ashx.", "keywords": "long-tail learning;class-imbalanced learning;re-sampling", "primary_area": "", "supplementary_material": "/attachment/79bd75811069430d26405abe9fb98132338618c0.zip", "author": "Jiang-Xin Shi;Tong Wei;Yuke Xiang;Yu-Feng Li", "authorids": "~Jiang-Xin_Shi1;~Tong_Wei1;~Yuke_Xiang1;~Yu-Feng_Li1", "gender": ";M;F;M", "homepage": "http://www.lamda.nju.edu.cn/shijx;https://palm.seu.edu.cn/weit/;;https://cs.nju.edu.cn/liyf/index.htm", "dblp": "299/5485.html;49/933-1;;57/413", "google_scholar": "KEgtGncAAAAJ;EFCZuW4AAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-0318-0911;0000-0002-2766-8209;;0000-0002-2220-5248", "linkedin": ";;ykxiang/;", "or_profile": "~Jiang-Xin_Shi1;~Tong_Wei1;~Yuke_Xiang1;~Yu-feng_Li2", "aff": "Nanjing University;Southeast University;Huawei Technologies Ltd.;Nanjing University", "aff_domain": "nju.edu.cn;seu.edu.cn;huawei.com;nju.edu.cn", "position": "PhD student;Associate Professor;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nshi2023how,\ntitle={How Re-sampling Helps for Long-Tail Learning?},\nauthor={Jiang-Xin Shi and Tong Wei and Yuke Xiang and Yu-Feng Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PLzCXefcpE}\n}", "github": "", "project": "", "reviewers": "J7TL;qgru;jD4D;gT6X;gDP8", "pdf_size": 2301873, "rating": "5;5;5;6;7", "confidence": "4;4;4;4;4", "soundness": "1;3;4;3;3", "novelty": "1;3;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "82;70;51;81;95", "wc_strengths": "57;48;35;189;73", "wc_weaknesses": "222;68;114;228;58", "wc_questions": "121;437;73;90;28", "wc_limitations": "38;1;1;1;6", "wc_review": "520;624;274;589;260", "wc_reply_reviewers": "247;35;0;36;14", "wc_reply_authors": "410;0;62;0;0", "reply_reviewers": "2;1;0;1;1", "reply_authors": "4;1;2;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.8, 0.9797958971132712 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 75.8, 14.715977711317723 ], "wc_strengths_avg": [ 80.4, 55.69057370866276 ], "wc_weaknesses_avg": [ 138.0, 73.52822587278983 ], "wc_questions_avg": [ 149.8, 146.7125079875605 ], "wc_limitations_avg": [ 9.4, 14.430523206037956 ], "wc_review_avg": [ 453.4, 155.8943231808009 ], "wc_reply_reviewers_avg": [ 66.4, 91.30301199850967 ], "wc_reply_authors_avg": [ 94.4, 159.61654049627816 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 1.1661903789690602 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8056845144745796766&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 9, "email": "nju.edu.cn;seu.edu.cn;huawei.com;nju.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Nanjing University;Southeast University;Huawei", "aff_unique_dep": ";;Huawei Technologies", "aff_unique_url": "https://www.nju.edu.cn;https://www.seu.edu.cn/;https://www.huawei.com", "aff_unique_abbr": "Nanjing U;SEU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Fair Adaptive Experiments", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71769", "id": "PMvudWa53L", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3d007df4ae13adf9001f8969555b11bd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PMvudWa53L", "openreview": "https://openreview.net/forum?id=PMvudWa53L", "poster": "/media/PosterPDFs/NeurIPS%202023/71769.png?t=1702068936.0075784", "slides": "https://nips.cc/virtual/2023/poster/71769", "video": "https://nips.cc/virtual/2023/poster/71769", "author_site": "Waverly Wei, Xinwei Ma, Jingshen Wang", "tldr": "", "abstract": "Randomized experiments have been the gold standard for assessing the effectiveness of a treatment, policy, or intervention, spanning various fields, including social sciences, biomedical studies, and e-commerce. The classical complete randomization approach assigns treatments based on a pre-specified probability and may lead to inefficient use of data. Adaptive experiments improve upon complete randomization by sequentially learning and updating treatment assignment probabilities using accrued evidence during the experiment. Hence, they can help achieve efficient data use and higher estimation efficiency. However, their application can also raise fairness and equity concerns, as assignment probabilities may vary drastically across groups of participants. Furthermore, when treatment is expected to be extremely beneficial to certain groups of participants, it is more appropriate to expose many of these participants to favorable treatment. In response to these challenges, we propose a fair adaptive experiment strategy that simultaneously enhances data use efficiency, achieves an ``envy-free'' treatment assignment guarantee, and improves the overall welfare of participants. An important feature of our proposed strategy is that we do not impose parametric modeling assumptions on the outcome variables, making it more versatile and applicable to a wider array of applications. Through our theoretical investigation, we characterize the convergence rate of the estimated treatment effects and the associated standard deviations at the group level and further prove that our adaptive treatment assignment algorithm, despite not having a closed-form expression, approaches the optimal allocation rule asymptotically. Our proof strategy takes into account the fact that the allocation decisions in our design depend on sequentially accumulated data, which poses a significant challenge in characterizing the properties and conducting statistical inference of our method. We further provide simulation evidence and two synthetic data studies to showcase the performance of our fair adaptive experiment strategy.", "keywords": "Adaptive Randomized Experiment; Adaptive Design; Causal Inference", "primary_area": "", "supplementary_material": "/attachment/2dc60347d5de66c7e2896e0cd396fe112ea2c8e3.zip", "author": "Waverly Wei;Xinwei Ma;Jingshen Wang", "authorids": "~Waverly_Wei1;x1ma@ucsd.edu;~Jingshen_Wang1", "gender": ";;", "homepage": ";;https://sites.google.com/berkeley.edu/jingshenwang/", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Waverly_Wei1;x1ma@ucsd.edu;~Jingshen_Wang1", "aff": ";;University of California, Berkeley", "aff_domain": ";;berkeley.edu", "position": ";;Assistant Professor", "bibtex": "@inproceedings{\nwei2023fair,\ntitle={Fair Adaptive Experiments},\nauthor={Waverly Wei and Xinwei Ma and Jingshen Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PMvudWa53L}\n}", "github": "", "project": "", "reviewers": "62S1;gk4J;fjNx;ZNjb", "pdf_size": 1527811, "rating": "2;6;6;6", "confidence": "3;4;4;3", "soundness": "2;3;3;4", "novelty": "1;3;3;3", "presentation": "2;3;3;4", "wc_summary": "191;58;87;192", "wc_strengths": "36;42;119;58", "wc_weaknesses": "507;609;147;133", "wc_questions": "344;101;49;138", "wc_limitations": "19;3;80;35", "wc_review": "1097;813;482;556", "wc_reply_reviewers": "430;20;0;0", "wc_reply_authors": "263;14;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.0, 1.7320508075688772 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 132.0, 60.377976117124035 ], "wc_strengths_avg": [ 63.75, 32.89661836724255 ], "wc_weaknesses_avg": [ 349.0, 212.14617602021488 ], "wc_questions_avg": [ 158.0, 111.94418251968256 ], "wc_limitations_avg": [ 34.25, 28.734778579275673 ], "wc_review_avg": [ 737.0, 241.43425606156222 ], "wc_reply_reviewers_avg": [ 112.5, 183.4904629674251 ], "wc_reply_authors_avg": [ 69.25, 112.00753322879672 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10983524693893611634&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";;berkeley.edu", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Efficient Beam Tree Recursion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71768", "id": "PR5znB6BZ2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5cf93940e37f7a7877cd57b6dba6b7ab-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PR5znB6BZ2", "openreview": "https://openreview.net/forum?id=PR5znB6BZ2", "poster": "/media/PosterPDFs/NeurIPS%202023/71768.png?t=1702065260.0390134", "slides": "https://nips.cc/virtual/2023/poster/71768", "video": "https://nips.cc/virtual/2023/poster/71768", "author_site": "Jishnu Ray Chowdhury, Cornelia Caragea", "tldr": "", "abstract": "Beam Tree Recursive Neural Network (BT-RvNN) was recently proposed as an extension of Gumbel Tree RvNN and it was shown to achieve state-of-the-art length generalization performance in ListOps while maintaining comparable performance on other tasks. However, although better than previous approaches in terms of memory usage, BT-RvNN can be still exorbitantly expensive. In this paper, we identify the main bottleneck in BT-RvNN's memory usage to be the entanglement of the scorer function and the recursive cell function. We propose strategies to remove this bottleneck and further simplify its memory usage. Overall, our strategies not only reduce the memory usage of BT-RvNN by $10-16$ times but also create a new state-of-the-art in ListOps while maintaining similar performance in other tasks. In addition, we also propose a strategy to utilize the induced latent-tree node representations produced by BT-RvNN to turn BT-RvNN from a sentence encoder of the form $f:\\mathbb{R}^{n \\times d} \\rightarrow \\mathbb{R}^{d}$ into a token contextualizer of the form $f:\\mathbb{R}^{n \\times d} \\rightarrow \\mathbb{R}^{n \\times d}$. Thus, our proposals not only open up a path for further scalability of RvNNs but also standardize a way to use BT-RvNNs as another building block in the deep learning toolkit that can be easily stacked or interfaced with other popular models such as Transformers and Structured State Space models. Our code is available at the link: https://github.com/JRC1995/BeamRecursionFamily.", "keywords": "Recursive Models;Recursive Neural Networks;RvNNs;Length Generalization;Structured Encoding;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/00103441b51d77ea1e2b260e49cd66cfbe5613ac.zip", "author": "Jishnu Ray Chowdhury;Cornelia Caragea", "authorids": "~Jishnu_Ray_Chowdhury2;~Cornelia_Caragea2", "gender": ";", "homepage": ";https://www.cs.uic.edu/~cornelia/", "dblp": ";69/6680.html", "google_scholar": ";vkX6VV4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Jishnu_Ray_Chowdhury2;~Cornelia_Caragea2", "aff": ";University of Illinois at Chicago", "aff_domain": ";uic.edu", "position": ";Full Professor", "bibtex": "@inproceedings{\nchowdhury2023efficient,\ntitle={Efficient Beam Tree Recursion},\nauthor={Jishnu Ray Chowdhury and Cornelia Caragea},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PR5znB6BZ2}\n}", "github": "", "project": "", "reviewers": "S18L;oTCB;x8Vp;XtZN", "pdf_size": 1122603, "rating": "5;5;5;7", "confidence": "3;3;4;3", "soundness": "4;2;3;3", "novelty": "3;2;2;3", "presentation": "3;3;3;2", "wc_summary": "138;94;105;115", "wc_strengths": "31;59;105;28", "wc_weaknesses": "132;108;13;29", "wc_questions": "13;94;19;52", "wc_limitations": "6;7;1;4", "wc_review": "320;362;243;228", "wc_reply_reviewers": "57;132;0;0", "wc_reply_authors": "14;435;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 113.0, 16.232683080747925 ], "wc_strengths_avg": [ 55.75, 30.898017735770686 ], "wc_weaknesses_avg": [ 70.5, 50.53958844312051 ], "wc_questions_avg": [ 44.5, 32.20636583037583 ], "wc_limitations_avg": [ 4.5, 2.29128784747792 ], "wc_review_avg": [ 288.25, 55.056221265175836 ], "wc_reply_reviewers_avg": [ 47.25, 54.181985013471035 ], "wc_reply_authors_avg": [ 112.25, 186.42743226252944 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18031906425547082412&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": ";uic.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Illinois at Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uic.edu", "aff_unique_abbr": "UIC", "aff_campus_unique_index": "0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Policy Space Diversity for Non-Transitive Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71767", "id": "PRgvdEbhdH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d61819e9b4a607b8448de762235148c4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PRgvdEbhdH", "openreview": "https://openreview.net/forum?id=PRgvdEbhdH", "poster": "/media/PosterPDFs/NeurIPS%202023/71767.png?t=1702350771.2422507", "slides": "https://nips.cc/virtual/2023/poster/71767", "video": "https://nips.cc/virtual/2023/poster/71767", "author_site": "Jian Yao, Weiming Liu, Haobo Fu, Yaodong Yang, Stephen McAleer, Qiang Fu, Wei Yang", "tldr": "", "abstract": "Policy-Space Response Oracles (PSRO) is an influential algorithm framework for approximating a Nash Equilibrium (NE) in multi-agent non-transitive games. Many previous studies have been trying to promote policy diversity in PSRO. A major weakness with existing diversity metrics is that a more diverse (according to their diversity metrics) population does not necessarily mean (as we proved in the paper) a better approximation to a NE. To alleviate this problem, we propose a new diversity metric, the improvement of which guarantees a better approximation to a NE. Meanwhile, we develop a practical and well-justified method to optimize our diversity metric using only state-action samples. By incorporating our diversity regularization into the best response solving of PSRO, we obtain a new PSRO variant, \\textit{Policy Space Diversity} PSRO (PSD-PSRO). We present the convergence property of PSD-PSRO. Empirically, extensive experiments on single-state games, Leduc, and Goofspiel demonstrate that PSD-PSRO is more effective in producing significantly less exploitable policies than state-of-the-art PSRO variants.", "keywords": "Policy Diversity;Policy-Space Response Oracles;Nash Equilibrium;Multi-agent Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Jian Yao;Weiming Liu;Haobo Fu;Yaodong Yang;Stephen Marcus McAleer;QIANG FU;Yang Wei", "authorids": "~Jian_Yao6;~Weiming_Liu3;~Haobo_Fu2;~Yaodong_Yang1;~Stephen_Marcus_McAleer1;~QIANG_FU8;~Yang_Wei2", "gender": "M;M;M;M;M;M;M", "homepage": ";;;https://www.yangyaodong.com;https://www.andrew.cmu.edu/user/smcaleer/;;", "dblp": "40/4105-4;00/105-4.html;85/8571;170/1496-1;;;03/1094-32.html", "google_scholar": "yMwUW7YAAAAJ;fIPGDMMAAAAJ;LFdJXNcAAAAJ;https://scholar.google.co.uk/citations?user=6yL0xw8AAAAJ;iEFL4-YAAAAJ;gANaxT0AAAAJ;", "orcid": ";;;0000-0001-8132-5613;;;", "linkedin": ";;haobo-fu-382b0784/;yaodong-yang;stephen-mcaleer/;;", "or_profile": "~Jian_Yao6;~Weiming_Liu3;~Haobo_Fu2;~Yaodong_Yang1;~Stephen_Marcus_McAleer1;~QIANG_FU8;~Yang_Wei2", "aff": "Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Peking University;Carnegie Mellon University;Tencent AI Lab;Tencent AI Lab", "aff_domain": "tencent.com;tencent.com;tencent.com;pku.edu.cn;cmu.edu;tencent.com;tencent.com", "position": "Researcher;Researcher;Principal Researcher;Assistant Professor;Postdoc;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nyao2023policy,\ntitle={Policy Space Diversity for Non-Transitive Games},\nauthor={Jian Yao and Weiming Liu and Haobo Fu and Yaodong Yang and Stephen Marcus McAleer and QIANG FU and Yang Wei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PRgvdEbhdH}\n}", "github": "", "project": "", "reviewers": "YW2F;jhEH;Mw94;6jtZ", "pdf_size": 5392040, "rating": "5;5;7;7", "confidence": "4;3;4;3", "soundness": "1;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;4;4", "wc_summary": "59;93;69;105", "wc_strengths": "22;128;96;80", "wc_weaknesses": "145;269;168;41", "wc_questions": "4;293;417;169", "wc_limitations": "4;14;99;15", "wc_review": "234;797;849;410", "wc_reply_reviewers": "202;127;524;0", "wc_reply_authors": "480;298;771;0", "reply_reviewers": "2;2;2;0", "reply_authors": "3;3;4;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 81.5, 18.350749303502567 ], "wc_strengths_avg": [ 81.5, 38.4545185901475 ], "wc_weaknesses_avg": [ 155.75, 81.02275914827882 ], "wc_questions_avg": [ 220.75, 152.80113710309882 ], "wc_limitations_avg": [ 33.0, 38.34709897762802 ], "wc_review_avg": [ 572.5, 258.76678689507276 ], "wc_reply_reviewers_avg": [ 213.25, 193.39515893630843 ], "wc_reply_authors_avg": [ 387.25, 280.0869284704304 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11636158791034136644&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "tencent.com;tencent.com;tencent.com;pku.edu.cn;cmu.edu;tencent.com;tencent.com", "author_num": 7, "aff_unique_index": "0;0;0;1;2;0;0", "aff_unique_norm": "Tencent;Peking University;Carnegie Mellon University", "aff_unique_dep": "Tencent AI Lab;;", "aff_unique_url": "https://ai.tencent.com;http://www.pku.edu.cn;https://www.cmu.edu", "aff_unique_abbr": "Tencent AI Lab;Peking U;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Beyond Exponential Graph: Communication-Efficient Topologies for Decentralized Learning via Finite-time Convergence", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71766", "id": "PSngfm5B9q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f201b3f3d0f08c6ab46c36b9052c1b64-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PSngfm5B9q", "openreview": "https://openreview.net/forum?id=PSngfm5B9q", "poster": "/media/PosterPDFs/NeurIPS%202023/71766.png?t=1697184951.0925944", "slides": "https://nips.cc/virtual/2023/poster/71766", "video": "https://nips.cc/virtual/2023/poster/71766", "author_site": "Yuki Takezawa, Ryoma Sato, Han Bao, Kenta Niwa, Makoto Yamada", "tldr": "", "abstract": "Decentralized learning has recently been attracting increasing attention for its applications in parallel computation and privacy preservation. Many recent studies stated that the underlying network topology with a faster consensus rate (a.k.a. spectral gap) leads to a better convergence rate and accuracy for decentralized learning. However, a topology with a fast consensus rate, e.g., the exponential graph, generally has a large maximum degree, which incurs significant communication costs. Thus, seeking topologies with both a fast consensus rate and small maximum degree is important. In this study, we propose a novel topology combining both a fast consensus rate and small maximum degree called the Base-$\\left(k+1\\right)$ Graph. Unlike the existing topologies, the Base-$\\left(k+1\\right)$ Graph enables all nodes to reach the exact consensus after a finite number of iterations for any number of nodes and maximum degree $k$. Thanks to this favorable property, the Base-$\\left(k+1\\right)$ Graph endows Decentralized SGD (DSGD) with both a faster convergence rate and more communication efficiency than the exponential graph. We conducted experiments with various topologies, demonstrating that the Base-$\\left(k+1\\right)$ Graph enables various decentralized learning methods to achieve higher accuracy with better communication efficiency than the existing topologies. Our code is available at https://github.com/yukiTakezawa/BaseGraph.", "keywords": "decentralized learning;distributed optimization;network topology;consensus rate", "primary_area": "", "supplementary_material": "/attachment/c4e1999a50103c3a6f77e983a1498e356e6020f1.pdf", "author": "Yuki Takezawa;Ryoma Sato;Han Bao;Kenta Niwa;Makoto Yamada", "authorids": "~Yuki_Takezawa1;~Ryoma_Sato1;~Han_Bao2;~Kenta_Niwa1;~Makoto_Yamada3", "gender": "M;M;M;M;M", "homepage": "https://yukitakezawa.github.io/;https://joisino.net/en/;https://hermite.jp/;http://www.kecl.ntt.co.jp/icl/ls/members/niwa/index.html;https://groups.oist.jp/mlds", "dblp": "284/1294;227/2014;120/1444-2;64/1008.html;56/4937", "google_scholar": "eaKQb8IAAAAJ;https://scholar.google.co.jp/citations?user=S4kMic4AAAAJ;MqMzjeMAAAAJ;Btla06EAAAAJ;1cKNu1gAAAAJ", "orcid": "0000-0002-8532-2775;;0000-0002-4473-2604;0000-0002-6911-0238;", "linkedin": ";;;;", "or_profile": "~Yuki_Takezawa1;~Ryoma_Sato1;~Han_Bao2;~Kenta_Niwa1;~Makoto_Yamada3", "aff": "Kyoto University;Kyoto University;Kyoto University, Kyoto University;NTT Corporation;Kyoto University", "aff_domain": "kyoto-u.ac.jp;kyoto-u.ac.jp;i.kyoto-u.ac.jp;ntt.co.jp;kyoto-u.ac.jp", "position": "MS student;PhD student;Assistant Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\ntakezawa2023beyond,\ntitle={Beyond Exponential Graph: Communication-Efficient Topologies for Decentralized Learning via Finite-time Convergence},\nauthor={Yuki Takezawa and Ryoma Sato and Han Bao and Kenta Niwa and Makoto Yamada},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PSngfm5B9q}\n}", "github": "", "project": "", "reviewers": "BhdZ;UfzT;hhaz;pdDT", "pdf_size": 1189639, "rating": "5;6;6;7", "confidence": "5;3;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;4;4;4", "wc_summary": "72;213;152;68", "wc_strengths": "101;43;48;32", "wc_weaknesses": "187;97;279;127", "wc_questions": "100;69;70;210", "wc_limitations": "115;26;1;1", "wc_review": "575;448;550;438", "wc_reply_reviewers": "96;4;0;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 126.25, 60.259335376354755 ], "wc_strengths_avg": [ 56.0, 26.61766330841233 ], "wc_weaknesses_avg": [ 172.5, 69.50359702921857 ], "wc_questions_avg": [ 112.25, 57.79435526069999 ], "wc_limitations_avg": [ 35.75, 46.87949978402073 ], "wc_review_avg": [ 502.75, 60.50361559444196 ], "wc_reply_reviewers_avg": [ 29.0, 39.1279950930277 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15127192605051981102&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "kyoto-u.ac.jp;kyoto-u.ac.jp;i.kyoto-u.ac.jp;ntt.co.jp;kyoto-u.ac.jp", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Kyoto University;NTT Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.kyoto-u.ac.jp;https://www.ntt.co.jp", "aff_unique_abbr": "Kyoto U;NTT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Kyoto", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Japan" }, { "title": "Simplicity Bias in 1-Hidden Layer Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71765", "id": "PTvxck0QDE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/196c4e02b7464c554f0f5646af5d502e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PTvxck0QDE", "openreview": "https://openreview.net/forum?id=PTvxck0QDE", "poster": "/media/PosterPDFs/NeurIPS%202023/71765.png?t=1700763423.419189", "slides": "https://nips.cc/virtual/2023/poster/71765", "video": "https://nips.cc/virtual/2023/poster/71765", "author_site": "Depen Morwani, Jatin Batra, Prateek Jain, Praneeth Netrapalli", "tldr": "", "abstract": "Recent works have demonstrated that neural networks exhibit extreme *simplicity bias* (SB). That is, they learn *only the simplest* features to solve a task at hand, even in the presence of other, more robust but more complex features. Due to the lack of a general and rigorous definition of *features*, these works showcase SB on *semi-synthetic* datasets such as Color-MNIST , MNIST-CIFAR where\n defining features is relatively easier. \n\nIn this work, we rigorously define as well as thoroughly establish SB for *one hidden layer* neural networks in the infinite width regime. More concretely, (i) we define SB as the network essentially being a function of a low dimensional projection of the inputs \n(ii) theoretically, we show that when the data is linearly separable, the network primarily depends on only the linearly separable ($1$-dimensional) subspace even in the presence of an arbitrarily large number of other, more complex features which could have led to a significantly more robust classifier, (iii) empirically, we show that models trained on *real* datasets such as Imagenet and Waterbirds-Landbirds indeed depend on a low dimensional projection of the inputs, thereby demonstrating SB on these datasets, iv) finally, we present a natural ensemble approach that encourages diversity in models by training successive models on features not used by earlier models, and demonstrate that it yields models that are significantly more robust to Gaussian noise.", "keywords": "Simplicity Bias;Gradient Descent;Implicit Bias;Neural Networks", "primary_area": "", "supplementary_material": "/attachment/828947f71b4574356860c81bb26730fab3d34286.zip", "author": "Depen Morwani;jatin batra;Prateek Jain;Praneeth Netrapalli", "authorids": "~Depen_Morwani1;~jatin_batra1;~Prateek_Jain1;~Praneeth_Netrapalli1", "gender": "M;;M;M", "homepage": ";;http://prateekjain.org;http://praneethnetrapalli.org/", "dblp": "277/5200;157/6041;https://dblp.uni-trier.de/pers/j/Jain_0002:Prateek.html;http://dblp.uni-trier.de/pers/hd/n/Netrapalli:Praneeth", "google_scholar": "vOngxFUAAAAJ;;qYhRbJoAAAAJ;https://scholar.google.co.in/citations?user=mim8FQkAAAAJ", "orcid": ";;;", "linkedin": "depen-morwani-070298122/;;;", "or_profile": "~Depen_Morwani1;~jatin_batra1;~Prateek_Jain1;~Praneeth_Netrapalli1", "aff": "Harvard University, Harvard University;Tata institute of fundamental research, Mumbai;Google;Google", "aff_domain": "g.harvard.edu;tifr.res.in;google.com;google.com", "position": "PhD student;Assistant Professor;Researcher;Research Scientist", "bibtex": "@inproceedings{\nmorwani2023simplicity,\ntitle={Simplicity Bias in 1-Hidden Layer Neural Networks},\nauthor={Depen Morwani and jatin batra and Prateek Jain and Praneeth Netrapalli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PTvxck0QDE}\n}", "github": "", "project": "", "reviewers": "W8La;gAuS;aqCb;PpnG", "pdf_size": 1867586, "rating": "5;6;6;6", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "3;3;3;2", "presentation": "2;3;2;1", "wc_summary": "107;292;172;91", "wc_strengths": "72;71;69;62", "wc_weaknesses": "813;114;110;174", "wc_questions": "459;205;697;64", "wc_limitations": "12;46;13;7", "wc_review": "1463;728;1061;398", "wc_reply_reviewers": "553;5;5;100", "wc_reply_authors": "691;0;0;76", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;1;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 165.5, 79.08381629638266 ], "wc_strengths_avg": [ 68.5, 3.905124837953327 ], "wc_weaknesses_avg": [ 302.75, 295.68173345676934 ], "wc_questions_avg": [ 356.25, 242.3606558416609 ], "wc_limitations_avg": [ 19.5, 15.46770829825802 ], "wc_review_avg": [ 912.5, 394.9218277077123 ], "wc_reply_reviewers_avg": [ 165.75, 226.91779899338 ], "wc_reply_authors_avg": [ 191.75, 289.90720498118014 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9122465623094215232&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "g.harvard.edu;tifr.res.in;google.com;google.com", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Harvard University;Tata Institute of Fundamental Research;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.harvard.edu;https://www.tifr.res.in;https://www.google.com", "aff_unique_abbr": "Harvard;TIFR;Google", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Mumbai;Mountain View", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;India" }, { "title": "Universality laws for Gaussian mixtures in generalized linear models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71764", "id": "PU3deePP2S", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/abccb8a90b30d45b948360ba41f5a20f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PU3deePP2S", "openreview": "https://openreview.net/forum?id=PU3deePP2S", "poster": "/media/PosterPDFs/NeurIPS%202023/71764.png?t=1701959701.477459", "slides": "https://nips.cc/virtual/2023/poster/71764", "video": "https://nips.cc/virtual/2023/poster/71764", "author_site": "Yatin Dandi, Ludovic Stephan, Florent Krzakala, Bruno Loureiro, Lenka Zdeborov\u00e1", "tldr": "", "abstract": "A recent line of work in high-dimensional statistics working under the Gaussian mixture hypothesis has led to a number of results in the context of empirical risk minimization, Bayesian uncertainty quantification, separation of kernel methods and neural networks, ensembling and fluctuation of random features. We provide rigorous proofs for the applicability of these results to a general class of datasets $(\\mathbf{x_i},y_i, {i=1,\\dots,n})$ containing independent samples from a mixture distribution $\\sum_{c\\in\\mathcal{C}} \\rho_{c}P_{c}^{\\mathbf{x}}$. Specifically, we consider the hypothesis class of generalized linear models $\\hat{y} = F(\\mathbf{\\Theta}^{\\top}\\mathbf{x})$ and investigate the asymptotic joint statistics of a family of generalized linear estimators $(\\mathbf{\\Theta}^{(1)}, \\dots, \\mathbf{\\Theta}^{(M)})$, obtained either from (a) minimizing an empirical risk $\\hat{R_n}^{(m)}(\\mathbf{\\Theta}^{(m)};\\mathbf{X},\\mathbf{y})$ or (b) sampling from the associated Gibbs measure $\\exp(-\\beta n \\hat{R_n}^{(m)}(\\mathbf{\\Theta}^{(m)};\\mathbf{X},\\mathbf{y}))$. Our main contribution is to characterize under which conditions the asymptotic joint statistics of this family depends (on a weak sense) only on the means and covariances of the class conditional features distribution $P_{c}^{\\mathbf{x}}$. This allows us to prove the universality of different quantities of interest, including training, generalization errors, as well as the geometrical properties and correlations of the estimators.", "keywords": "theoretical analysis;high-dimensional statistics;Universality;weak convergence;mixture models;sampling;statistical physics", "primary_area": "", "supplementary_material": "/attachment/43803b99e34ac063156679431240d367dc36f5bd.pdf", "author": "Yatin Dandi;Ludovic Stephan;Florent Krzakala;Bruno Loureiro;Lenka Zdeborova", "authorids": "~Yatin_Dandi1;~Ludovic_Stephan2;~Florent_Krzakala1;~Bruno_Loureiro1;~Lenka_Zdeborova1", "gender": "M;;M;F;M", "homepage": "https://yatindandi.github.io/;http://Krzakala.org;https://brloureiro.github.io/;http://artax.karlin.mff.cuni.cz/~zdebl9am/;", "dblp": "255/6032;25/1282;207/1834;27/6064.html;230/4096", "google_scholar": "UiEzYkMAAAAJ;https://scholar.google.fr/citations?user=3jDeUlMAAAAJ;DXl3ir8AAAAJ;https://scholar.google.fr/citations?user=gkCjy_UAAAAJ;mEd3WCsAAAAJ", "orcid": ";0000-0003-2313-2578;0000-0002-6327-4688;;0000-0001-5612-3577", "linkedin": ";;bruno-loureiro-43183b14a/;;", "or_profile": "~Yatin_Dandi1;~Florent_Krzakala1;~Bruno_Loureiro1;~Lenka_Zdeborova1;~Ludovic_STEPHAN1", "aff": "EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne;Ecole Normale Sup\u00e9rieure, Ecole Normale Sup\u00e9rieure de Paris;Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch;di.ens.fr;epfl.ch;epfl.ch", "position": "PhD student;Full Professor;Researcher;Associate Professor;Postdoc", "bibtex": "@inproceedings{\ndandi2023universality,\ntitle={Universality laws for Gaussian mixtures in generalized linear models},\nauthor={Yatin Dandi and Ludovic Stephan and Florent Krzakala and Bruno Loureiro and Lenka Zdeborova},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PU3deePP2S}\n}", "github": "", "project": "", "reviewers": "KxLS;Wbxr;tZtP;JkJQ", "pdf_size": 545402, "rating": "3;6;7;7", "confidence": "4;4;3;3", "soundness": "2;3;4;3", "novelty": "2;3;4;3", "presentation": "1;4;3;3", "wc_summary": "24;153;165;102", "wc_strengths": "8;42;253;105", "wc_weaknesses": "222;50;63;282", "wc_questions": "17;193;32;57", "wc_limitations": "1;14;18;55", "wc_review": "272;452;531;601", "wc_reply_reviewers": "104;16;68;91", "wc_reply_authors": "55;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 111.0, 55.520266569965244 ], "wc_strengths_avg": [ 102.0, 93.86959039007255 ], "wc_weaknesses_avg": [ 154.25, 100.13085188891584 ], "wc_questions_avg": [ 74.75, 69.75089605159205 ], "wc_limitations_avg": [ 22.0, 20.062402647738878 ], "wc_review_avg": [ 464.0, 122.7456720214607 ], "wc_reply_reviewers_avg": [ 69.75, 33.60338524613257 ], "wc_reply_authors_avg": [ 13.75, 23.81569860407206 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7624928516630233, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16384753056203605942&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "epfl.ch;epfl.ch;di.ens.fr;epfl.ch;epfl.ch", "author_num": 5, "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "EPFL;Swiss Federal Institute of Technology Lausanne;Ecole Normale Sup\u00e9rieure de Paris", "aff_unique_dep": ";;", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch;https://www.ens.psl.eu", "aff_unique_abbr": "EPFL;EPFL;ENS Paris", "aff_campus_unique_index": "0;0;1;0;0", "aff_campus_unique": "Lausanne;Paris", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Switzerland;France" }, { "title": "RaLEs: a Benchmark for Radiology Language Evaluations", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73601", "id": "PWLGrvoqiR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eb5683d06bdef51ed4dff644908eef4b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=PWLGrvoqiR", "openreview": "https://openreview.net/forum?id=PWLGrvoqiR", "poster": "/media/PosterPDFs/NeurIPS%202023/73601.png?t=1701667131.3019683", "slides": "https://nips.cc/virtual/2023/poster/73601", "video": "https://nips.cc/virtual/2023/poster/73601", "author_site": "Juanma Zambrano Chaves, Nandita Bhaskhar, Maayane Attias, Jean-Benoit Delbrouck, Daniel Rubin, Andreas Loening, Curtis Langlotz, Akshay Chaudhari", "tldr": "", "abstract": "The radiology report is the main form of communication between radiologists and other clinicians. Prior work in natural language processing in radiology reports has shown the value of developing methods tailored for individual tasks such as identifying reports with critical results or disease detection. Meanwhile, English and biomedical natural language understanding benchmarks such as the General Language Understanding and Evaluation as well as Biomedical Language Understanding and Reasoning Benchmark have motivated the development of models that can be easily adapted to address many tasks in those domains. Here, we characterize the radiology report as a distinct domain and introduce RaLEs, the Radiology Language Evaluations, as a benchmark for natural language understanding and generation in radiology. RaLEs is comprised of seven natural language understanding and generation evaluations including the extraction of anatomical and disease entities and their relations, procedure selection, and report summarization. We characterize the performance of models designed for the general, biomedical, clinical and radiology domains across these tasks. We find that advances in the general and biomedical domains do not necessarily translate to radiology, and that improved models from the general domain can perform comparably to smaller clinical-specific models. The limited performance of existing pre-trained models on RaLEs highlights the opportunity to improve domain-specific self-supervised models for natural language processing in radiology. We propose RaLEs as a benchmark to promote and track the development of such domain-specific radiology language models.", "keywords": "radiology;benchmark;natural language understanding;natural language generation", "primary_area": "", "supplementary_material": "", "author": "Juan Manuel Zambrano Chaves;Nandita Bhaskhar;Maayane Attias;Jean-Benoit Delbrouck;Daniel Rubin;Andreas Markus Loening;Curtis Langlotz;Akshay S Chaudhari", "authorids": "~Juan_Manuel_Zambrano_Chaves1;~Nandita_Bhaskhar1;~Maayane_Attias1;~Jean-Benoit_Delbrouck1;~Daniel_Rubin1;~Andreas_Markus_Loening1;~Curtis_Langlotz1;~Akshay_S_Chaudhari1", "gender": "M;F;F;;;M;M;", "homepage": "https://jmzam.github.io;https://web.stanford.edu/~nanbhas/;;;http://rubin.web.stanford.edu;https://profiles.stanford.edu/andreas-loening;https://profiles.stanford.edu/curtis-langlotz;", "dblp": ";;;;;;12/1751;", "google_scholar": "ngMOlmYAAAAJ;https://scholar.google.com/scholar?hl=en;;;;;WQkBYwQAAAAJ;", "orcid": "0000-0002-7274-8072;;;;;0000-0003-3316-7467;0000-0002-8972-8051;", "linkedin": "juanmzambrano/;nanditabhaskhar/;maayaneattias;;;;langlotz/;", "or_profile": "~Juan_Manuel_Zambrano_Chaves1;~Nandita_Bhaskhar1;~Maayane_Attias1;~Jean-Benoit_Delbrouck1;~Daniel_Rubin1;~Andreas_Markus_Loening1;~Curtis_Langlotz1;~Akshay_S_Chaudhari1", "aff": "Stanford University;Stanford University;Stanford University;;Stanford University;Stanford University;Stanford University;", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;;stanford.edu;stanford.edu;stanford.edu;", "position": "PhD student;PhD student;MS student;;Full Professor;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nchaves2023rales,\ntitle={Ra{LE}s: a Benchmark for Radiology Language Evaluations},\nauthor={Juan Manuel Zambrano Chaves and Nandita Bhaskhar and Maayane Attias and Jean-Benoit Delbrouck and Daniel Rubin and Andreas Markus Loening and Curtis Langlotz and Akshay S Chaudhari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=PWLGrvoqiR}\n}", "github": "", "project": "", "reviewers": "mpiG;SKzi;JDoA;w7FQ;cMGK", "pdf_size": 1475118, "rating": "5;6;6;6;8", "confidence": "4;3;3;4;3", "wc_summary_and_contributions": "88;997;91;49;145", "wc_strengths": "135;61;85;74;46", "wc_improvement": "245;48;171;28;89", "wc_limitations": "42;39;48;3;27", "wc_correctness": "3;1;46;1;21", "wc_clarity": "11;72;33;1;5", "wc_relation_to_prior_work": "57;38;1;1;22", "wc_documentation": "57;145;1;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "639;1402;477;159;357", "wc_reply_reviewers": "29;0;62;0;0", "wc_reply_authors": "1296;1747;729;414;340", "reply_reviewers": "1;0;1;0;0", "reply_authors": "4;4;1;1;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 274.0, 362.7891949879434 ], "wc_strengths_avg": [ 80.2, 30.34073169849402 ], "wc_improvement_avg": [ 116.2, 80.94788446895942 ], "wc_limitations_avg": [ 31.8, 15.942396306703706 ], "wc_correctness_avg": [ 14.4, 17.499714283381884 ], "wc_clarity_avg": [ 24.4, 26.241951146970763 ], "wc_relation_to_prior_work_avg": [ 23.8, 21.664717861075413 ], "wc_documentation_avg": [ 41.0, 56.34181395730883 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 606.8, 427.3529688676563 ], "wc_reply_reviewers_avg": [ 18.2, 24.612192100664252 ], "wc_reply_authors_avg": [ 905.2, 539.0968002130971 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 1.469693845669907 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5833333333333334, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9090425336916134435&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "stanford.edu;stanford.edu;stanford.edu;;stanford.edu;stanford.edu;stanford.edu;", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "PXUHrqIL9O", "title": "Selective Mixup Helps with Distribution Shifts, But Not (Only) because of Mixup", "track": "main", "status": "Reject", "tldr": "", "abstract": "Mixup is a highly successful technique to improve generalization of neural networks by augmenting the training data with combinations of random pairs. Selective mixup is a family of methods that apply mixup to specific pairs, e.g. only combining examples across classes or domains. These methods have claimed remarkable improvements on benchmarks with distribution shifts, but their mechanisms and limitations remain poorly understood.\n \nWe examine an overlooked aspect of selective mixup that explains its success in a completely new light. We find that the non-random selection of pairs affects the training distribution and improve generalization by means completely unrelated to the mixing. For example in binary classification, mixup across classes implicitly resamples the data for a uniform class distribution - a classical solution to label shift.\nWe show empirically that this implicit resampling explains much of the improvements in prior work. Theoretically, these results rely on a \"regression toward the mean\", an accidental property that we identify in several datasets.\n\nTakeaways:\nWe have found a new equivalence between two successful methods: selective mixup and resampling. We identify limits of the former, confirm the effectiveness of the latter, and find better combinations of their respective benefits.", "keywords": "mixup;distribution shifts;OOD generalization;weighted training", "primary_area": "", "supplementary_material": "/attachment/be5ed055a001aff57d030e7ba7cdaed39fa001bb.pdf", "author": "Damien Teney;Jindong Wang;Ehsan Abbasnejad", "authorids": "~Damien_Teney1;~Jindong_Wang1;~Ehsan_Abbasnejad3", "gender": "M;M;M", "homepage": "https://www.damienteney.info;https://ehsanabb.github.io/;https://jd92.wang/", "dblp": "62/10068;30/11191;19/2969-1", "google_scholar": "https://scholar.google.com.au/citations?user=iS_jP_3dpD8J;https://scholar.google.com/citations?hl=en;hBZ_tKsAAAAJ", "orcid": ";;0000-0002-4833-0880", "linkedin": ";;jindong-wang/", "or_profile": "~Damien_Teney1;~Ehsan_M_Abbasnejad1;~Jindong_Wang4", "aff": "Idiap Research Institute;University of Adelaide;Microsoft Research", "aff_domain": "idiap.ch;adelaide.edu.au;microsoft.com", "position": "Researcher;Assistant Professor;Researcher", "bibtex": "@misc{\nteney2023selective,\ntitle={Selective Mixup Helps with Distribution Shifts, But Not (Only) because of Mixup},\nauthor={Damien Teney and Jindong Wang and Ehsan Abbasnejad},\nyear={2023},\nurl={https://openreview.net/forum?id=PXUHrqIL9O}\n}", "github": "", "project": "", "reviewers": "SGQh;xtzm;h74Y;hGwy", "site": "https://openreview.net/forum?id=PXUHrqIL9O", "pdf_size": 773041, "rating": "4;4;5;6", "confidence": "3;4;3;4", "soundness": "3;3;2;2", "novelty": "3;2;2;3", "presentation": "4;3;3;1", "wc_summary": "32;49;141;55", "wc_strengths": "29;13;117;111", "wc_weaknesses": "206;28;166;50", "wc_questions": "52;51;509;141", "wc_limitations": "1;14;145;2", "wc_review": "320;155;1078;359", "wc_reply_reviewers": "135;39;6;329", "wc_reply_authors": "190;276;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 69.25, 42.2751404492049 ], "wc_strengths_avg": [ 67.5, 46.89083066016212 ], "wc_weaknesses_avg": [ 112.5, 75.25124583686306 ], "wc_questions_avg": [ 188.25, 188.75562905513573 ], "wc_limitations_avg": [ 40.5, 60.549566472436446 ], "wc_review_avg": [ 478.0, 354.7724622909732 ], "wc_reply_reviewers_avg": [ 127.25, 125.75049701691043 ], "wc_reply_authors_avg": [ 116.5, 120.40245014118275 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10502332873890205900&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2", "aff_unique_norm": "Idiap Research Institute;University of Adelaide;Microsoft", "aff_unique_dep": ";;Microsoft Research", "aff_unique_url": "https://www.idiap.ch;https://www.adelaide.edu.au;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Idiap;Adelaide;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Switzerland;Australia;United States" }, { "title": "SHOT: Suppressing the Hessian along the Optimization Trajectory for Gradient-Based Meta-Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71763", "id": "PXsqbAjpQd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c1cdf3236050ad902c6581458e55f0c5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PXsqbAjpQd", "openreview": "https://openreview.net/forum?id=PXsqbAjpQd", "poster": "/media/PosterPDFs/NeurIPS%202023/71763.png?t=1698704471.7361066", "slides": "https://nips.cc/virtual/2023/poster/71763", "video": "https://nips.cc/virtual/2023/poster/71763", "author_site": "JunHoo Lee, Jayeon Yoo, Nojun Kwak", "tldr": "", "abstract": "In this paper, we hypothesize that gradient-based meta-learning (GBML) implicitly suppresses the Hessian along the optimization\n trajectory in the inner loop. Based on this hypothesis, we introduce an algorithm called\n SHOT (Suppressing the Hessian along the Optimization Trajectory) that minimizes the distance between the parameters of the target and reference models to suppress the Hessian in the inner loop. Despite dealing with\n high-order terms, SHOT does not increase the computational complexity of the baseline model much.\n It is agnostic to both the algorithm and architecture used in GBML, making it highly\n versatile and applicable to any GBML baseline. To validate the effectiveness of SHOT,\n we conduct empirical tests on standard few-shot learning tasks and qualitatively\n analyze its dynamics. We confirm our hypothesis empirically and demonstrate that SHOT\n outperforms the corresponding baseline.", "keywords": "meta learning;Hessian;Gradient-Based meta learning;Feature Reuse;Implicit Prior", "primary_area": "", "supplementary_material": "/attachment/f221aa138e0082ae6849f5e5d81af21d0e57c8c6.zip", "author": "JunHoo Lee;Jayeon Yoo;Nojun Kwak", "authorids": "~JunHoo_Lee1;~Jayeon_Yoo1;~Nojun_Kwak1", "gender": "M;F;M", "homepage": "https://junhoo-lee.com;;http://mipal.snu.ac.kr", "dblp": "376/0719;281/8521;49/2806", "google_scholar": "https://scholar.google.com/citations?hl=ko;JAeV59wAAAAJ;h_8-1M0AAAAJ", "orcid": ";;0000-0002-1792-0327", "linkedin": ";;", "or_profile": "~JunHoo_Lee1;~Jayeon_Yoo1;~Nojun_Kwak1", "aff": "Seoul National University;NAVER;Seoul National University", "aff_domain": "snu.ac.kr;navercorp.com;snu.ac.kr", "position": "PhD student;Intern;Full Professor", "bibtex": "@inproceedings{\nlee2023shot,\ntitle={{SHOT}: Suppressing the Hessian along the Optimization Trajectory for Gradient-Based Meta-Learning},\nauthor={JunHoo Lee and Jayeon Yoo and Nojun Kwak},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PXsqbAjpQd}\n}", "github": "", "project": "", "reviewers": "d83w;Xqme;eLqV;GyHa;Tzy2", "pdf_size": 1849064, "rating": "5;5;6;6;6", "confidence": "3;2;4;4;3", "soundness": "3;2;2;3;3", "novelty": "3;3;3;3;3", "presentation": "3;1;2;2;3", "wc_summary": "80;45;43;84;70", "wc_strengths": "72;74;61;10;27", "wc_weaknesses": "65;81;497;204;71", "wc_questions": "22;220;525;85;9", "wc_limitations": "34;1;21;1;3", "wc_review": "273;421;1147;384;180", "wc_reply_reviewers": "112;30;149;14;84", "wc_reply_authors": "302;0;78;0;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "2;1;2;1;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 64.4, 17.281203661782357 ], "wc_strengths_avg": [ 48.8, 25.701361831622854 ], "wc_weaknesses_avg": [ 183.6, 164.8679471577177 ], "wc_questions_avg": [ 172.2, 191.59895615582042 ], "wc_limitations_avg": [ 12.0, 13.326664999166145 ], "wc_review_avg": [ 481.0, 343.6364357864282 ], "wc_reply_reviewers_avg": [ 77.8, 50.264898288965036 ], "wc_reply_authors_avg": [ 76.0, 116.96837179340405 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7637626158259733, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9644352445577886736&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "snu.ac.kr;navercorp.com;snu.ac.kr", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Seoul National University;NAVER Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.naver.com", "aff_unique_abbr": "SNU;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Eliciting User Preferences for Personalized Multi-Objective Decision Making through Comparative Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71762", "id": "PYASzxr2OP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/286e7ab0ce6a68282394c92361c27b57-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PYASzxr2OP", "openreview": "https://openreview.net/forum?id=PYASzxr2OP", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71762", "video": "https://nips.cc/virtual/2023/poster/71762", "author_site": "Han Shao, Lee Cohen, Avrim Blum, Yishay Mansour, Aadirupa Saha, Matthew Walter", "tldr": "", "abstract": "In this work, we propose a multi-objective decision making framework that accommodates different user preferences over objectives, where preferences are learned via policy comparisons. Our model consists of a known Markov decision process with a vector-valued reward function, with each user having an unknown preference vector that expresses the relative importance of each objective. The goal is to efficiently compute a near-optimal policy for a given user. We consider two user feedback models. We first address the case where a user is provided with two policies and returns their preferred policy as feedback. We then move to a different user feedback model, where a user is instead provided with two small weighted sets of representative trajectories and selects the preferred one. In both cases, we suggest an algorithm that finds a nearly optimal policy for the user using a number of comparison queries that scales quasilinearly in the number of objectives.", "keywords": "preference learning;algorithms;linear model;Markov decision processes;learning theory;multi-objective decision making;preference elicitation", "primary_area": "", "supplementary_material": "/attachment/c1b0d54226eff41f547cfe97861d7281f91707a2.pdf", "author": "Han Shao;Lee Cohen;Avrim Blum;Yishay Mansour;Aadirupa Saha;Matthew Walter", "authorids": "~Han_Shao4;~Lee_Cohen1;~Avrim_Blum1;~Yishay_Mansour2;~Aadirupa_Saha1;~Matthew_Walter1", "gender": "F;F;M;;M;M", "homepage": "https://sites.google.com/view/hanshao/;https://sites.google.com/view/leecohen;https://home.ttic.edu/~avrim/;http://aadirupa.github.io/;http://ttic.edu/walter;https://www.cs.tau.ac.il/~mansour/", "dblp": ";162/2494.html;b/AvrimBlum;;50/7734;m/YishayMansour", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com.tw/citations?user=Jlv4MR4AAAAJ;https://scholar.google.co.in/citations?user=7a49tQYAAAAJ;RAiewnEAAAAJ;OEJUgwkAAAAJ", "orcid": "0009-0005-9206-1357;;;0000-0003-4965-6417;0000-0003-1425-6050;0000-0001-6891-2645", "linkedin": ";;;aadirupa-saha;;", "or_profile": "~Han_Shao4;~Lee_Cohen1;~Avrim_Blum1;~Aadirupa_Saha1;~Matthew_Walter1;~Yishay_Mansour1", "aff": "Toyota Technological Institute at Chicago;Toyota Technological Institute at Chicago;Toyota Technological Institute at Chicago;Apple;Toyota Technological Institute at Chicago;School of Computer Science, Tel Aviv University", "aff_domain": "ttic.edu;ttic.edu;ttic.edu;apple.com;ttic.edu;cs.tau.ac.il", "position": "PhD student;Researcher;Full Professor;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nshao2023eliciting,\ntitle={Eliciting User Preferences for Personalized Multi-Objective Decision Making through Comparative Feedback},\nauthor={Han Shao and Lee Cohen and Avrim Blum and Yishay Mansour and Aadirupa Saha and Matthew Walter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PYASzxr2OP}\n}", "github": "", "project": "", "reviewers": "BDkG;pTmr;wfQV;5457;Gnoo", "pdf_size": 508419, "rating": "3;5;5;7;7", "confidence": "4;4;3;2;4", "soundness": "2;4;3;3;4", "novelty": "2;2;3;3;3", "presentation": "1;2;4;3;3", "wc_summary": "103;59;29;45;204", "wc_strengths": "22;41;50;73;64", "wc_weaknesses": "143;105;49;71;128", "wc_questions": "3;122;63;35;152", "wc_limitations": "112;7;3;7;20", "wc_review": "383;334;194;231;568", "wc_reply_reviewers": "0;87;0;33;254", "wc_reply_authors": "0;258;0;0;44", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;2;1;1;2", "rating_avg": [ 5.4, 1.4966629547095764 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 88.0, 63.01111013146809 ], "wc_strengths_avg": [ 50.0, 17.832554500127006 ], "wc_weaknesses_avg": [ 99.2, 34.91933561796387 ], "wc_questions_avg": [ 75.0, 54.89262245511686 ], "wc_limitations_avg": [ 29.8, 41.49891564848412 ], "wc_review_avg": [ 342.0, 131.94392748436738 ], "wc_reply_reviewers_avg": [ 74.8, 95.07765247417501 ], "wc_reply_authors_avg": [ 60.4, 100.25886494470203 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4677071733467426, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11937072759443678069&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ttic.edu;ttic.edu;ttic.edu;apple.com;ttic.edu;cs.tau.ac.il", "author_num": 6, "aff_unique_index": "0;0;0;1;0;2", "aff_unique_norm": "Toyota Technological Institute at Chicago;Apple;Tel Aviv University", "aff_unique_dep": ";Apple Inc.;School of Computer Science", "aff_unique_url": "https://www.tti-chicago.org;https://www.apple.com;https://www.tau.ac.il", "aff_unique_abbr": "TTI Chicago;Apple;TAU", "aff_campus_unique_index": "0;0;0;0;2", "aff_campus_unique": "Chicago;;Tel Aviv", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;Israel" }, { "title": "Feature Learning for Interpretable, Performant Decision Trees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71761", "id": "PYEgC56flW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d1b4076ae067dd23bad5ac2693547a01-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PYEgC56flW", "openreview": "https://openreview.net/forum?id=PYEgC56flW", "poster": "/media/PosterPDFs/NeurIPS%202023/71761.png?t=1702416292.4433289", "slides": "https://nips.cc/virtual/2023/poster/71761", "video": "https://nips.cc/virtual/2023/poster/71761", "author_site": "Jack Good, Torin Kovach, Kyle Miller, Artur Dubrawski", "tldr": "", "abstract": "Decision trees are regarded for high interpretability arising from their hierarchical partitioning structure built on simple decision rules. However, in practice, this is not realized because axis-aligned partitioning of realistic data results in deep trees, and because ensemble methods are used to mitigate overfitting. Even then, model complexity and performance remain sensitive to transformation of the input, and extensive expert crafting of features from the raw data is common. We propose the first system to alternate sparse feature learning with differentiable decision tree construction to produce small, interpretable trees with good performance. We benchmark against conventional tree-based models and demonstrate several notions of interpretation of a model and its predictions.", "keywords": "explainability;interpretability;decision tree;feature learning", "primary_area": "", "supplementary_material": "/attachment/922d7d33fe9d2669662f94958050777d44cf6ad0.pdf", "author": "Jack Henry Good;Torin Kovach;Kyle Miller;Artur Dubrawski", "authorids": "~Jack_Henry_Good1;tkovach@andrew.cmu.edu;~Kyle_Miller1;~Artur_Dubrawski2", "gender": "M;;;M", "homepage": "https://www.ri.cmu.edu/ri-people/jack-henry-good/;;;https://www.autonlab.org", "dblp": "221/2759.html;;92/11514;76/48", "google_scholar": ";;;O3gezzcAAAAJ", "orcid": "0000-0003-1886-9217;;;0000-0002-2372-0831", "linkedin": ";;;artur-dubrawski-33a2a87/", "or_profile": "~Jack_Henry_Good1;tkovach@andrew.cmu.edu;~Kyle_Miller1;~Artur_Dubrawski2", "aff": "Carnegie Mellon University;;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;;andrew;cmu.edu", "position": "PhD student;;Project scientist;Research Professor", "bibtex": "@inproceedings{\ngood2023feature,\ntitle={Feature Learning for Interpretable, Performant Decision Trees},\nauthor={Jack Henry Good and Torin Kovach and Kyle Miller and Artur Dubrawski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PYEgC56flW}\n}", "github": "", "project": "", "reviewers": "js5S;ejNS;vnHK;b4Qh", "pdf_size": 840493, "rating": "5;5;5;6", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "2;2;2;2", "presentation": "2;3;3;2", "wc_summary": "137;108;84;80", "wc_strengths": "164;81;43;109", "wc_weaknesses": "278;147;134;75", "wc_questions": "102;233;121;71", "wc_limitations": "41;1;1;7", "wc_review": "722;570;383;342", "wc_reply_reviewers": "93;15;106;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 102.25, 22.741756748325315 ], "wc_strengths_avg": [ 99.25, 44.115615149286995 ], "wc_weaknesses_avg": [ 158.5, 74.13669806512831 ], "wc_questions_avg": [ 131.75, 61.120270778196 ], "wc_limitations_avg": [ 12.5, 16.635804759614125 ], "wc_review_avg": [ 504.25, 152.28653092115533 ], "wc_reply_reviewers_avg": [ 56.25, 43.51651985166093 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9041474277610503714&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 4, "email": "cmu.edu;;andrew;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "ALIM: Adjusting Label Importance Mechanism for Noisy Partial Label Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71760", "id": "PYSfn5xXEe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7988e9b3876ad689e921ce05d711442f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PYSfn5xXEe", "openreview": "https://openreview.net/forum?id=PYSfn5xXEe", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71760", "video": "https://nips.cc/virtual/2023/poster/71760", "author_site": "Mingyu Xu, Zheng Lian, Lei Feng, Bin Liu, Jianhua Tao", "tldr": "", "abstract": "Noisy partial label learning (noisy PLL) is an important branch of weakly supervised learning. Unlike PLL where the ground-truth label must conceal in the candidate label set, noisy PLL relaxes this constraint and allows the ground-truth label may not be in the candidate label set. To address this challenging problem, most of the existing works attempt to detect noisy samples and estimate the ground-truth label for each noisy sample. However, detection errors are unavoidable. These errors can accumulate during training and continuously affect model optimization. To this end, we propose a novel framework for noisy PLL with theoretical interpretations, called ``Adjusting Label Importance Mechanism (ALIM)''. It aims to reduce the negative impact of detection errors by trading off the initial candidate set and model outputs. ALIM is a plug-in strategy that can be integrated with existing PLL approaches. Experimental results on multiple benchmark datasets demonstrate that our method can achieve state-of-the-art performance on noisy PLL. Our code is available at: https://github.com/zeroQiaoba/ALIM.", "keywords": "Partial label learning; Noisy label learning", "primary_area": "", "supplementary_material": "/attachment/eaead0cb916ac0f1806b588ea4fb4b559923994f.zip", "author": "Mingyu Xu;Zheng Lian;Lei Feng;Bin Liu;Jianhua Tao", "authorids": "~Mingyu_Xu1;~Zheng_Lian3;~Lei_Feng1;~Bin_Liu13;~Jianhua_Tao1", "gender": ";M;M;M;", "homepage": ";https://zeroqiaoba.github.io/Homepage/;https://lfeng1995.github.io/;https://people.ucas.ac.cn/~bin.liu;", "dblp": ";;76/847-6;35/837-41;", "google_scholar": ";S34nWz0AAAAJ;https://scholar.google.com.sg/citations?user=KomQOFkAAAAJ;;", "orcid": ";0000-0001-9477-0599;0000-0003-2839-5799;;", "linkedin": ";;;;", "or_profile": "~Mingyu_Xu1;~Zheng_Lian3;~Lei_Feng1;~Bin_Liu13;~Jianhua_Tao1", "aff": ";Institute of Automation, Chinese Academy of Sciences;Nanyang Technological University;Institute of automation, Chinese academy of science;", "aff_domain": ";ia.ac.cn;ntu.edu.sg;nlpr.ia.ac.cn;", "position": ";Assistant Professor;Visiting Professor;Associate Professor;", "bibtex": "@inproceedings{\nxu2023alim,\ntitle={{ALIM}: Adjusting Label Importance Mechanism for Noisy Partial Label Learning},\nauthor={Mingyu Xu and Zheng Lian and Lei Feng and Bin Liu and Jianhua Tao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PYSfn5xXEe}\n}", "github": "", "project": "", "reviewers": "q7G6;7LXj;BGEz;7tpZ", "pdf_size": 882005, "rating": "5;5;7;7", "confidence": "3;4;3;3", "soundness": "2;2;3;3", "novelty": "3;3;3;3", "presentation": "2;2;3;3", "wc_summary": "74;49;74;155", "wc_strengths": "28;54;36;102", "wc_weaknesses": "199;115;36;17", "wc_questions": "63;156;19;135", "wc_limitations": "1;13;7;1", "wc_review": "365;387;172;410", "wc_reply_reviewers": "19;17;10;24", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 88.0, 40.006249511795026 ], "wc_strengths_avg": [ 55.0, 28.722813232690143 ], "wc_weaknesses_avg": [ 91.75, 72.00477414727443 ], "wc_questions_avg": [ 93.25, 55.01988276977696 ], "wc_limitations_avg": [ 5.5, 4.9749371855331 ], "wc_review_avg": [ 333.5, 94.58990432387591 ], "wc_reply_reviewers_avg": [ 17.5, 5.024937810560445 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5667813512887869513&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "email": ";ia.ac.cn;ntu.edu.sg;nlpr.ia.ac.cn;", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese Academy of Sciences;Nanyang Technological University", "aff_unique_dep": "Institute of Automation;", "aff_unique_url": "http://www.ia.cas.cn;https://www.ntu.edu.sg", "aff_unique_abbr": "CAS;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;Singapore" }, { "title": "Performance Bounds for Policy-Based Average Reward Reinforcement Learning Algorithms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71759", "id": "PaSpImjKm2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3da8e709fa1a7d9e23bee89d3c25b5b4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PaSpImjKm2", "openreview": "https://openreview.net/forum?id=PaSpImjKm2", "poster": "/media/PosterPDFs/NeurIPS%202023/71759.png?t=1702071838.1008337", "slides": "https://nips.cc/virtual/2023/poster/71759", "video": "https://nips.cc/virtual/2023/poster/71759", "author_site": "Yashaswini Murthy, Mehrdad Moharrami, R. Srikant", "tldr": "", "abstract": "Many policy-based reinforcement learning (RL) algorithms can be viewed as instantiations of approximate policy iteration (PI), i.e., where policy improvement and policy evaluation are both performed approximately. In applications where the average reward objective is the meaningful performance metric, often discounted reward formulations are used with the discount factor being close to $1,$ which is equivalent to making the expected horizon very large. However, the corresponding theoretical bounds for error performance scale with the square of the horizon. Thus, even after dividing the total reward by the length of the horizon, the corresponding performance bounds for average reward problems go to infinity. Therefore, an open problem has been to obtain meaningful performance bounds for approximate PI and RL algorithms for the average-reward setting. In this paper, we solve this open problem by obtaining the first non-trivial finite time error bounds for average-reward MDPs which go to zero in the limit as policy evaluation and policy improvement errors go to zero.", "keywords": "Average Reward MDPs;Reinforcement Learning Theory;Approximate Policy Iteration;Policy Based Methods;Performance Bounds", "primary_area": "", "supplementary_material": "/attachment/c7740a8a671d371cf3356f0d26f4938b9ca44020.pdf", "author": "Yashaswini Murthy;Mehrdad Moharrami;R. Srikant", "authorids": "~Yashaswini_Murthy1;~Mehrdad_Moharrami1;~R._Srikant1", "gender": "F;M;", "homepage": "https://yashaswinimurthy.web.illinois.edu/home.html;https://sites.google.com/view/moharrami;", "dblp": "230/4629;145/5522.html;s/RSrikant", "google_scholar": ";6irb5qMAAAAJ;", "orcid": "0000-0002-8788-6873;0000-0003-3907-8406;", "linkedin": "yashaswini-murthy-74b475b9/;mmoharrami/;", "or_profile": "~Yashaswini_Murthy1;~Mehrdad_Moharrami1;~R._Srikant1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "uiuc.edu;illinois.edu;illinois.edu", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nmurthy2023performance,\ntitle={Performance Bounds for Policy-Based Average Reward Reinforcement Learning Algorithms},\nauthor={Yashaswini Murthy and Mehrdad Moharrami and R. Srikant},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PaSpImjKm2}\n}", "github": "", "project": "", "reviewers": "qfjY;KNuh;z18A;WTrY;M1AZ;h7bf", "pdf_size": 281398, "rating": "3;4;6;6;7;7", "confidence": "4;3;4;4;5;1", "soundness": "3;3;3;3;3;4", "novelty": "2;2;3;3;3;4", "presentation": "2;3;4;4;3;4", "wc_summary": "17;65;81;142;41;67", "wc_strengths": "20;49;211;129;41;44", "wc_weaknesses": "49;265;131;106;126;58", "wc_questions": "2;39;50;7;797;20", "wc_limitations": "11;1;13;7;7;7", "wc_review": "99;419;486;391;1012;196", "wc_reply_reviewers": "0;0;166;22;943;36", "wc_reply_authors": "0;0;174;0;1778;0", "reply_reviewers": "0;0;1;1;4;1", "reply_authors": "1;1;2;1;6;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 1.2583057392117916 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.6871842709362768 ], "presentation_avg": [ 3.3333333333333335, 0.7453559924999298 ], "wc_summary_avg": [ 68.83333333333333, 38.68857138168267 ], "wc_strengths_avg": [ 82.33333333333333, 66.9668242905064 ], "wc_weaknesses_avg": [ 122.5, 70.97593488875883 ], "wc_questions_avg": [ 152.5, 288.71713377167856 ], "wc_limitations_avg": [ 7.666666666666667, 3.7712361663282534 ], "wc_review_avg": [ 433.8333333333333, 290.8982735520367 ], "wc_reply_reviewers_avg": [ 194.5, 339.51030912182915 ], "wc_reply_authors_avg": [ 325.3333333333333, 652.7517819882906 ], "reply_reviewers_avg": [ 1.1666666666666667, 1.3437096247164249 ], "reply_authors_avg": [ 2.0, 1.8257418583505538 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6016559912446393936&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 8, "email": "uiuc.edu;illinois.edu;illinois.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Interpretable and Explainable Logical Policies via Neurally Guided Symbolic Abstraction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71758", "id": "PbMBfRpVgU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9f42f06a54ce3b709ad78d34c73e4363-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PbMBfRpVgU", "openreview": "https://openreview.net/forum?id=PbMBfRpVgU", "poster": "/media/PosterPDFs/NeurIPS%202023/71758.png?t=1701641947.3855615", "slides": "https://nips.cc/virtual/2023/poster/71758", "video": "https://nips.cc/virtual/2023/poster/71758", "author_site": "Quentin Delfosse, Hikaru Shindo, Devendra Dhami, Kristian Kersting", "tldr": "", "abstract": "The limited priors required by neural networks make them the dominating choice to encode and learn policies using reinforcement learning (RL). However, they are also black-boxes, making it hard to understand the agent's behavior, especially when working on the image level. Therefore, neuro-symbolic RL aims at creating policies that are interpretable in the first place.\nUnfortunately, interpretability is not explainability. To achieve both, we introduce Neurally gUided Differentiable loGic policiEs (NUDGE). NUDGE exploits trained neural network-based agents to guide the search of candidate-weighted logic rules, then uses differentiable logic to train the logic agents. Our experimental evaluation demonstrates that NUDGE agents can induce interpretable and explainable policies while outperforming purely neural ones and showing good flexibility to environments of different initial states and problem sizes.", "keywords": "Reinforcement Learning;First-Order-Logic;Symbolic Abstraction;Interpretable Reinforcement Learning;Logic Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/c2d8b633cc8b93afb75d18cea14acb3b98be7721.zip", "author": "Quentin Delfosse;Hikaru Shindo;Devendra Singh Dhami;Kristian Kersting", "authorids": "~Quentin_Delfosse1;~Hikaru_Shindo1;~Devendra_Singh_Dhami1;~Kristian_Kersting1", "gender": "M;M;M;M", "homepage": "https://quentindelfosse.me/;https://www.hikarushindo.com/;https://sites.google.com/view/devendradhami;http://www.ml.informatik.tu-darmstadt.de/", "dblp": "286/1466.html;227/1466;201/2130;40/3793", "google_scholar": "k1E0FgIAAAAJ;Ws03zBoAAAAJ;aVlaHfkAAAAJ;QY-earAAAAAJ", "orcid": ";;;0000-0002-2873-9152", "linkedin": "quentin-delfosse-70b377150/;hkrsnd;;", "or_profile": "~Quentin_Delfosse1;~Hikaru_Shindo1;~Devendra_Singh_Dhami1;~Kristian_Kersting1", "aff": "CS Department, TU Darmstadt, TU Darmstadt;TU Darmstadt;CS Department, TU Darmstadt, TU Darmstadt;TU Darmstadt", "aff_domain": "cs.tu-darmstadt.de;tu-darmstadt.de;cs.tu-darmstadt.de;tu-darmstadt.de", "position": "PhD student;PhD student;Postdoctoral researcher;Full Professor", "bibtex": "@inproceedings{\ndelfosse2023interpretable,\ntitle={Interpretable and Explainable Logical Policies via Neurally Guided Symbolic Abstraction},\nauthor={Quentin Delfosse and Hikaru Shindo and Devendra Singh Dhami and Kristian Kersting},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PbMBfRpVgU}\n}", "github": "", "project": "", "reviewers": "AhLx;AnR8;Jkdb;NLZb;hGCz", "pdf_size": 3607684, "rating": "2;5;6;6;7", "confidence": "4;2;3;4;4", "soundness": "2;2;4;3;2", "novelty": "2;2;3;2;3", "presentation": "1;3;3;3;3", "wc_summary": "73;94;43;116;55", "wc_strengths": "40;36;51;113;59", "wc_weaknesses": "276;34;230;194;17", "wc_questions": "41;131;112;76;464", "wc_limitations": "1;7;6;28;28", "wc_review": "431;302;442;527;623", "wc_reply_reviewers": "1619;43;29;209;422", "wc_reply_authors": "2587;122;0;138;697", "reply_reviewers": "4;1;1;2;3", "reply_authors": "7;3;1;2;4", "rating_avg": [ 5.2, 1.7204650534085253 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 76.2, 26.316534726289476 ], "wc_strengths_avg": [ 59.8, 27.80935094532053 ], "wc_weaknesses_avg": [ 150.2, 105.22053031609373 ], "wc_questions_avg": [ 164.8, 152.74999181669372 ], "wc_limitations_avg": [ 14.0, 11.610340218959994 ], "wc_review_avg": [ 465.0, 106.82883505870501 ], "wc_reply_reviewers_avg": [ 464.4, 594.5215218980721 ], "wc_reply_authors_avg": [ 708.8, 969.5688526350256 ], "reply_reviewers_avg": [ 2.2, 1.16619037896906 ], "reply_authors_avg": [ 3.4, 2.0591260281974 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.05812381937190963, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2137527699725984140&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "cs.tu-darmstadt.de;tu-darmstadt.de;cs.tu-darmstadt.de;tu-darmstadt.de", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TU Darmstadt", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Revisiting Adversarial Training for ImageNet: Architectures, Training and Generalization across Threat Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71757", "id": "Pbpk9jUzAi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2d3b007613940def7a5ec9d6d635937b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Pbpk9jUzAi", "openreview": "https://openreview.net/forum?id=Pbpk9jUzAi", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71757", "video": "https://nips.cc/virtual/2023/poster/71757", "author_site": "Naman Deep Singh, Francesco Croce, Matthias Hein", "tldr": "", "abstract": "While adversarial training has been extensively studied for ResNet architectures and low resolution datasets like CIFAR-10, much less is known for ImageNet. Given the recent debate about whether transformers are more robust than convnets, we revisit adversarial training on ImageNet comparing ViTs and ConvNeXts. Extensive experiments show that minor changes in architecture, most notably replacing PatchStem with ConvStem, and training scheme have a significant impact on the achieved robustness. These changes not only increase robustness in the seen $\\ell_\\infty$-threat model, but even more so improve generalization to unseen $\\ell_1/\\ell_2$-attacks. \nOur modified ConvNeXt, ConvNeXt + ConvStem, yields the most robust $\\ell_\\infty$-models across different ranges of model parameters and FLOPs, while our ViT + ConvStem yields the best generalization to unseen threat models.", "keywords": "adversarial robustness;deep learning;vision transformers;convnext", "primary_area": "", "supplementary_material": "", "author": "Naman Deep Singh;Francesco Croce;Matthias Hein", "authorids": "~Naman_Deep_Singh1;~Francesco_Croce1;~Matthias_Hein2", "gender": "M;M;M", "homepage": "https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/maschinelles-lernen/team/;;https://uni-tuebingen.de/de/164260", "dblp": "230/3694.html;52/4288;97/1213-1", "google_scholar": "https://scholar.google.de/citations?user=zfObWM0AAAAJ;https://scholar.google.de/citations?view_op=list_works;0ZAb3tsAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Naman_Deep_Singh1;~Francesco_Croce1;~Matthias_Hein2", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of Tuebingen;University of T\u00fcbingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nsingh2023revisiting,\ntitle={Revisiting Adversarial Training for ImageNet: Architectures, Training and Generalization across Threat Models},\nauthor={Naman Deep Singh and Francesco Croce and Matthias Hein},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Pbpk9jUzAi}\n}", "github": "", "project": "", "reviewers": "qHdB;bR1u;LmXJ;zQGv;vpT1", "pdf_size": 715067, "rating": "5;5;5;6;6", "confidence": "4;4;5;3;5", "soundness": "2;3;3;3;3", "novelty": "3;3;2;3;3", "presentation": "2;3;3;3;4", "wc_summary": "188;26;64;51;68", "wc_strengths": "69;9;115;44;64", "wc_weaknesses": "337;91;65;37;80", "wc_questions": "304;47;97;57;2", "wc_limitations": "34;26;1;23;14", "wc_review": "932;199;342;212;228", "wc_reply_reviewers": "852;0;11;169;16", "wc_reply_authors": "1409;33;33;731;0", "reply_reviewers": "2;0;1;2;1", "reply_authors": "3;2;2;2;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 79.4, 56.24802218745118 ], "wc_strengths_avg": [ 60.2, 34.57976286789717 ], "wc_weaknesses_avg": [ 122.0, 109.01742979909221 ], "wc_questions_avg": [ 101.4, 105.71395366743218 ], "wc_limitations_avg": [ 19.6, 11.288932633336067 ], "wc_review_avg": [ 382.6, 279.35754867194834 ], "wc_reply_reviewers_avg": [ 209.6, 327.1639344426583 ], "wc_reply_authors_avg": [ 441.2, 556.5128569943375 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.21821789023599233, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1380258507456300626&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;University of Tuebingen;University of T\u00fcbingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "On the Statistical Consistency of Risk-Sensitive Bayesian Decision-Making", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71756", "id": "PcKHQFsvel", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a6df53f082619d02b9fad64a022e5de3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PcKHQFsvel", "openreview": "https://openreview.net/forum?id=PcKHQFsvel", "poster": "/media/PosterPDFs/NeurIPS%202023/71756.png?t=1701884303.5085847", "slides": "https://nips.cc/virtual/2023/poster/71756", "video": "https://nips.cc/virtual/2023/poster/71756", "author_site": "Prateek Jaiswal, Harsha Honnappa, Vinayak Rao", "tldr": "", "abstract": "We study data-driven decision-making problems in the Bayesian framework, where the expectation in the Bayes risk is replaced by a risk-sensitive entropic risk measure with respect to the posterior distribution. We focus on problems where calculating the posterior distribution is intractable, a typical situation in modern applications with large datasets and complex data generating models. We leverage a dual representation of the entropic risk measure to introduce a novel risk-sensitive variational Bayesian (RSVB) framework for jointly computing a risk-sensitive posterior approximation and the corresponding decision rule. Our general framework includes \\textit{loss-calibrated} VB (Lacoste-Julien et al. [2011] ) as a special case. We also study the impact of these computational approximations on the predictive performance of the inferred decision rules. We compute the convergence rates of the RSVB approximate posterior and the corresponding optimal value. We illustrate our theoretical findings in parametric and nonparametric settings with the help of three examples.", "keywords": "Variational Bayes;Loss Calibration;Bayesian Statistics;Variational Inference;Statistical Theory", "primary_area": "", "supplementary_material": "/attachment/cc98e5096aade3b5ec0fccff94df7ff7747d478a.pdf", "author": "Prateek Jaiswal;Harsha Honnappa;Vinayak Rao", "authorids": "~Prateek_Jaiswal1;~Harsha_Honnappa1;~Vinayak_Rao1", "gender": "M;;M", "homepage": ";;https://varao.github.io/", "dblp": "235/4678;;59/4025", "google_scholar": ";;IQibv4UAAAAJ", "orcid": "0000-0002-7637-2754;;", "linkedin": ";;", "or_profile": "~Prateek_Jaiswal1;~Harsha_Honnappa1;~Vinayak_Rao1", "aff": "Texas A&M University - College Station;;Purdue University", "aff_domain": "tamu.edu;;purdue.edu", "position": "Postdoc;;Associate Professor", "bibtex": "@inproceedings{\njaiswal2023on,\ntitle={On the Statistical Consistency of Risk-Sensitive Bayesian Decision-Making},\nauthor={Prateek Jaiswal and Harsha Honnappa and Vinayak Rao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PcKHQFsvel}\n}", "github": "", "project": "", "reviewers": "FLVD;j4gK;r5cb;3wrT", "pdf_size": 763413, "rating": "6;7;7;7", "confidence": "2;5;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "91;101;178;53", "wc_strengths": "158;176;60;66", "wc_weaknesses": "76;128;92;24", "wc_questions": "50;1;83;106", "wc_limitations": "10;3;52;31", "wc_review": "385;409;465;280", "wc_reply_reviewers": "161;130;102;20", "wc_reply_authors": "183;31;58;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.75, 45.394795957246025 ], "wc_strengths_avg": [ 115.0, 52.43090691567332 ], "wc_weaknesses_avg": [ 80.0, 37.416573867739416 ], "wc_questions_avg": [ 60.0, 39.45250308915773 ], "wc_limitations_avg": [ 24.0, 19.170289512680814 ], "wc_review_avg": [ 384.75, 67.08343685292219 ], "wc_reply_reviewers_avg": [ 103.25, 52.399308201540215 ], "wc_reply_authors_avg": [ 68.0, 69.49460410708159 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8203724671187439409&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "tamu.edu;;purdue.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Texas A&M University;Purdue University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tamu.edu;https://www.purdue.edu", "aff_unique_abbr": "TAMU;Purdue", "aff_campus_unique_index": "0", "aff_campus_unique": "College Station;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Responsible AI (RAI) Games and Ensembles", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71755", "id": "PcNpL9Q39p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e6057bf047bcc5f86ebf4e8db6e24a1f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PcNpL9Q39p", "openreview": "https://openreview.net/forum?id=PcNpL9Q39p", "poster": "/media/PosterPDFs/NeurIPS%202023/71755.png?t=1699809288.2838898", "slides": "https://nips.cc/virtual/2023/poster/71755", "video": "https://nips.cc/virtual/2023/poster/71755", "author_site": "Yash Gupta, Runtian Zhai, Arun Suggala, Pradeep Ravikumar", "tldr": "", "abstract": "Several recent works have studied the societal effects of AI; these include issues such as fairness, robustness, and safety. In many of these objectives, a learner seeks to minimize its worst-case loss over a set of predefined distributions (known as uncertainty sets), with usual examples being perturbed versions of the empirical distribution. In other words, the aforementioned problems can be written as min-max problems over these uncertainty sets. In this work, we provide a general framework for studying these problems, which we refer to as Responsible AI (RAI) games. We provide two classes of algorithms for solving these games: (a) game-play based algorithms, and (b) greedy stagewise estimation algorithms. The former class is motivated by online learning and game theory, whereas the latter class is motivated by the classical statistical literature on boosting, and regression. We empirically demonstrate the applicability and competitive performance of our techniques for solving several RAI problems, particularly around subpopulation shift.", "keywords": "Responsible AI;fairness;DRO;robustness", "primary_area": "", "supplementary_material": "/attachment/351a04ac0c9ac290c880816f5cafa109570bd9bc.zip", "author": "Yash Gupta;Runtian Zhai;Arun Suggala;Pradeep Kumar Ravikumar", "authorids": "~Yash_Gupta1;~Runtian_Zhai1;~Arun_Suggala1;~Pradeep_Kumar_Ravikumar1", "gender": "M;M;M;M", "homepage": ";http://www.runtianzhai.com;;http://www.cs.cmu.edu/~pradeepr/", "dblp": ";242/8411;164/7327;94/3594", "google_scholar": "VLGA0VkAAAAJ;EXd0ES8AAAAJ;CKgmfDMAAAAJ;https://scholar.google.com.tw/citations?user=Q4DTPw4AAAAJ", "orcid": ";0000-0003-3332-3466;;", "linkedin": ";;;", "or_profile": "~Yash_Gupta1;~Runtian_Zhai1;~Arun_Suggala1;~Pradeep_Kumar_Ravikumar1", "aff": "School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Google;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cmu.edu;google.com;cmu.edu", "position": "MS student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\ngupta2023responsible,\ntitle={Responsible {AI} ({RAI}) Games and Ensembles},\nauthor={Yash Gupta and Runtian Zhai and Arun Suggala and Pradeep Kumar Ravikumar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PcNpL9Q39p}\n}", "github": "", "project": "", "reviewers": "KQkZ;yXJG;TUyo;qrVx", "pdf_size": 640939, "rating": "5;5;6;6", "confidence": "2;3;3;3", "soundness": "2;3;4;3", "novelty": "2;2;2;3", "presentation": "1;2;4;3", "wc_summary": "159;62;96;167", "wc_strengths": "26;46;40;77", "wc_weaknesses": "71;31;243;101", "wc_questions": "38;400;37;40", "wc_limitations": "16;30;19;49", "wc_review": "310;569;435;434", "wc_reply_reviewers": "21;17;158;26", "wc_reply_authors": "0;0;530;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 121.0, 43.777848279695064 ], "wc_strengths_avg": [ 47.25, 18.64638034579366 ], "wc_weaknesses_avg": [ 111.5, 79.8795968943259 ], "wc_questions_avg": [ 128.75, 156.60998531383623 ], "wc_limitations_avg": [ 28.5, 12.932517156377562 ], "wc_review_avg": [ 437.0, 91.6051308606674 ], "wc_reply_reviewers_avg": [ 55.5, 59.26423879541523 ], "wc_reply_authors_avg": [ 132.5, 229.49673200287623 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15023956080184791817&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.cmu.edu;cmu.edu;google.com;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": "School of Computer Science;Google", "aff_unique_url": "https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "CMU;Google", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Pittsburgh;;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "An Information-Theoretic Evaluation of Generative Models in Learning Multi-modal Distributions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71754", "id": "PdZhf6PiAb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1f5c5cd01b864d53cc5fa0a3472e152e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PdZhf6PiAb", "openreview": "https://openreview.net/forum?id=PdZhf6PiAb", "poster": "/media/PosterPDFs/NeurIPS%202023/71754.png?t=1701594524.1105015", "slides": "https://nips.cc/virtual/2023/poster/71754", "video": "https://nips.cc/virtual/2023/poster/71754", "author_site": "Mohammad Jalali, Cheuk Ting Li, Farzan Farnia", "tldr": "", "abstract": "The evaluation of generative models has received significant attention in the machine learning community. When applied to a multi-modal distribution which is common among image datasets, an intuitive evaluation criterion is the number of modes captured by the generative model. While several scores have been proposed to evaluate the quality and diversity of a model's generated data, the correspondence between existing scores and the number of modes in the distribution is unclear. In this work, we propose an information-theoretic diversity evaluation method for multi-modal underlying distributions. We utilize the R\\'enyi Kernel Entropy (RKE) as an evaluation score based on quantum information theory to measure the number of modes in generated samples. To interpret the proposed evaluation method, we show that the RKE score can output the number of modes of a mixture of sub-Gaussian components. We also prove estimation error bounds for estimating the RKE score from limited data, suggesting a fast convergence of the empirical RKE score to the score for the underlying data distribution. Utilizing the RKE score, we conduct an extensive evaluation of state-of-the-art generative models over standard image datasets. The numerical results indicate that while the recent algorithms for training generative models manage to improve the mode-based diversity over the earlier architectures, they remain incapable of capturing the full diversity of real data. Our empirical results provide a ranking of widely-used generative models based on the RKE score of their generated samples.", "keywords": "Generative Models; Evaluation in Learning; Information Measures", "primary_area": "", "supplementary_material": "/attachment/a69ba0c4e42a2c429a2e8d3cab8c5fff1731195f.pdf", "author": "Mohammad Jalali;Cheuk Ting Li;Farzan Farnia", "authorids": "~Mohammad_Jalali1;~Cheuk_Ting_Li1;~Farzan_Farnia1", "gender": "M;M;M", "homepage": "https://mjalali.github.io/;https://www.ie.cuhk.edu.hk/people/ctli.shtml;https://www.cse.cuhk.edu.hk/~farnia/", "dblp": ";120/7097;132/7757", "google_scholar": "NxaTDyUAAAAJ;;GYPCqcYAAAAJ", "orcid": "0000-0003-1203-6812;;0000-0002-6049-9232", "linkedin": "mjalali/;;farzan-farnia-00798335", "or_profile": "~Mohammad_Jalali1;~Cheuk_Ting_Li1;~Farzan_Farnia1", "aff": "Isfahan University of Technology;The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": "iut.ac.ir;cuhk.edu.hk;cuhk.edu.hk", "position": "Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\njalali2023an,\ntitle={An Information-Theoretic Evaluation of Generative Models in Learning Multi-modal Distributions},\nauthor={Mohammad Jalali and Cheuk Ting Li and Farzan Farnia},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PdZhf6PiAb}\n}", "github": "", "project": "", "reviewers": "uN6a;3kP8;YzmH;5vQM", "pdf_size": 11436738, "rating": "5;6;6;8", "confidence": "4;4;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;2;4", "wc_summary": "77;79;74;55", "wc_strengths": "37;74;49;89", "wc_weaknesses": "182;134;183;27", "wc_questions": "66;99;7;68", "wc_limitations": "10;19;7;8", "wc_review": "372;405;320;247", "wc_reply_reviewers": "102;31;18;38", "wc_reply_authors": "54;38;39;38", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 71.25, 9.54921462739214 ], "wc_strengths_avg": [ 62.25, 20.41292482717751 ], "wc_weaknesses_avg": [ 131.5, 63.5 ], "wc_questions_avg": [ 60.0, 33.279122584587476 ], "wc_limitations_avg": [ 11.0, 4.743416490252569 ], "wc_review_avg": [ 336.0, 59.65316420777694 ], "wc_reply_reviewers_avg": [ 47.25, 32.41431011143072 ], "wc_reply_authors_avg": [ 42.25, 6.796138609534093 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7437995483828785344&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "iut.ac.ir;cuhk.edu.hk;cuhk.edu.hk", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Isfahan University of Technology;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.iut.ac.ir;https://www.cuhk.edu.hk", "aff_unique_abbr": "IUT;CUHK", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Iran;China" }, { "title": "Learning Transformer Programs", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71753", "id": "Pe9WxkN8Ff", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/995f693b73050f90977ed2828202645c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Pe9WxkN8Ff", "openreview": "https://openreview.net/forum?id=Pe9WxkN8Ff", "poster": "/media/PosterPDFs/NeurIPS%202023/71753.png?t=1702060658.9265637", "slides": "https://nips.cc/virtual/2023/poster/71753", "video": "https://nips.cc/virtual/2023/poster/71753", "author_site": "Dan Friedman, Alexander Wettig, Danqi Chen", "tldr": "", "abstract": "Recent research in mechanistic interpretability has attempted to reverse-engineer Transformer models by carefully inspecting network weights and activations. However, these approaches require considerable manual effort and still fall short of providing complete, faithful descriptions of the underlying algorithms. In this work, we introduce a procedure for training Transformers that are mechanistically interpretable by design. We build on RASP [Weiss et al., 2021], a programming language that can be compiled into Transformer weights. Instead of compiling human-written programs into Transformers, we design a modified Transformer that can be trained using gradient-based optimization and then automatically converted into a discrete, human-readable program. We refer to these models as Transformer Programs. To validate our approach, we learn Transformer Programs for a variety of problems, including an in-context learning task, a suite of algorithmic problems (e.g. sorting, recognizing Dyck languages), and NLP tasks including named entity recognition and text classification. The Transformer Programs can automatically find reasonable solutions, performing on par with standard Transformers of comparable size; and, more importantly, they are easy to interpret. To demonstrate these advantages, we convert Transformers into Python programs and use off-the-shelf code analysis tools to debug model errors and identify the \u201ccircuits\u201d used to solve different sub-problems. We hope that Transformer Programs open a new path toward the goal of intrinsically interpretable machine learning.", "keywords": "mechanistic interpretability;transformers", "primary_area": "", "supplementary_material": "/attachment/fce5c2f04d332b4ee67bbd01bdfcc9d75ac2b4cc.pdf", "author": "Dan Friedman;Alexander Wettig;Danqi Chen", "authorids": "~Dan_Friedman2;~Alexander_Wettig1;~Danqi_Chen1", "gender": ";;F", "homepage": "http://danfriedman0.github.io/;https://www.cs.princeton.edu/~awettig/;https://www.cs.princeton.edu/~danqic/", "dblp": "205/9386;302/0235;87/7949", "google_scholar": "1UMQ_KwAAAAJ;N_jSE08AAAAJ;sVR8ktkAAAAJ", "orcid": ";;", "linkedin": ";alexander-wettig/;", "or_profile": "~Dan_Friedman2;~Alexander_Wettig1;~Danqi_Chen1", "aff": "Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;cs.princeton.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nfriedman2023learning,\ntitle={Learning Transformer Programs},\nauthor={Dan Friedman and Alexander Wettig and Danqi Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Pe9WxkN8Ff}\n}", "github": "", "project": "", "reviewers": "TaJd;HW9X;vQnY;Kcxc", "pdf_size": 1700214, "rating": "6;7;7;7", "confidence": "2;3;5;3", "soundness": "2;4;4;3", "novelty": "3;3;3;3", "presentation": "4;3;4;3", "wc_summary": "190;96;98;147", "wc_strengths": "126;53;54;221", "wc_weaknesses": "332;87;59;188", "wc_questions": "197;8;165;421", "wc_limitations": "16;48;3;35", "wc_review": "861;292;379;1012", "wc_reply_reviewers": "24;31;0;261", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 132.75, 38.854697270729055 ], "wc_strengths_avg": [ 113.5, 68.76227163205125 ], "wc_weaknesses_avg": [ 166.5, 106.92170032318042 ], "wc_questions_avg": [ 197.75, 147.4099301268405 ], "wc_limitations_avg": [ 25.5, 17.269916039170543 ], "wc_review_avg": [ 636.0, 306.75152811355315 ], "wc_reply_reviewers_avg": [ 79.0, 105.7047775646872 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15238644730674936888&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 5, "email": "princeton.edu;princeton.edu;cs.princeton.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Behavior Alignment via Reward Function Optimization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71752", "id": "PfpAQuyZCB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a5357781c204d4412e44ed9cbcdb08d5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PfpAQuyZCB", "openreview": "https://openreview.net/forum?id=PfpAQuyZCB", "poster": "/media/PosterPDFs/NeurIPS%202023/71752.png?t=1702390385.7340965", "slides": "https://nips.cc/virtual/2023/poster/71752", "video": "https://nips.cc/virtual/2023/poster/71752", "author_site": "Dhawal Gupta, Yash Chandak, Scott Jordan, Philip Thomas, Bruno C. da Silva", "tldr": "", "abstract": "Designing reward functions for efficiently guiding reinforcement learning (RL) agents toward specific behaviors is a complex task.\nThis is challenging since it requires the identification of reward structures that are not sparse and that avoid inadvertently inducing undesirable behaviors. Naively modifying the reward structure to offer denser and more frequent feedback can lead to unintended outcomes and promote behaviors that are not aligned with the designer's intended goal. Although potential-based reward shaping is often suggested as a remedy, we systematically investigate settings where deploying it often significantly impairs performance. To address these issues, we introduce a new framework that uses a bi-level objective to learn \\emph{behavior alignment reward functions}. These functions integrate auxiliary rewards reflecting a designer's heuristics and domain knowledge with the environment's primary rewards. Our approach automatically determines the most effective way to blend these types of feedback, thereby enhancing robustness against heuristic reward misspecification. Remarkably, it can also adapt an agent's policy optimization process to mitigate suboptimalities resulting from limitations and biases inherent in the underlying RL algorithms. We evaluate our method's efficacy on a diverse set of tasks, from small-scale experiments to high-dimensional control challenges. We investigate heuristic auxiliary rewards of varying quality---some of which are beneficial and others detrimental to the learning process. Our results show that our framework offers a robust and principled way to integrate designer-specified heuristics. It not only addresses key shortcomings of existing approaches but also consistently leads to high-performing solutions, even when given misaligned or poorly-specified auxiliary reward functions.", "keywords": "Reinforcement Learning;Behavior Alignment;Implicit Gradient;Bi-level Optimization", "primary_area": "", "supplementary_material": "/attachment/020a46d06da4bdfece3a0304c02491988e9053cc.zip", "author": "Dhawal Gupta;Yash Chandak;Scott M. Jordan;Philip S. Thomas;Bruno Castro da Silva", "authorids": "~Dhawal_Gupta1;~Yash_Chandak1;~Scott_M._Jordan1;~Philip_S._Thomas1;~Bruno_Castro_da_Silva1", "gender": "M;;M;M;M", "homepage": "https://dhawgupta.github.io/;https://yashchandak.github.io/;https://scottjordan.github.io/scottjordan/;http://psthomas.com;https://people.cs.umass.edu/~bsilva/", "dblp": "231/0618;168/8450;222/1982;46/11107;75/3139", "google_scholar": "n1Lsp_8AAAAJ;AsgUcSEAAAAJ;qg8AOdgAAAAJ;e8Gzgo4AAAAJ;eskJDVUAAAAJ", "orcid": ";;0000-0003-4567-8627;;", "linkedin": "dhawgupta/;;;;", "or_profile": "~Dhawal_Gupta1;~Yash_Chandak1;~Scott_M._Jordan1;~Philip_S._Thomas1;~Bruno_Castro_da_Silva1", "aff": "Department of Computer Science, University of Massachusetts at Amherst;Computer Science Department, Stanford University;University of Alberta;College of Information and Computer Science, University of Massachusetts, Amherst;University of Massachusetts, Amherst", "aff_domain": "cs.umass.edu;cs.stanford.edu;ualberta.ca;cs.umass.edu;umass.edu", "position": "PhD student;Postdoc;Postdoc;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ngupta2023behavior,\ntitle={Behavior Alignment via Reward Function Optimization},\nauthor={Dhawal Gupta and Yash Chandak and Scott M. Jordan and Philip S. Thomas and Bruno Castro da Silva},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PfpAQuyZCB}\n}", "github": "", "project": "", "reviewers": "tPX3;GcHf;BidQ;8Eto", "pdf_size": 6679099, "rating": "6;6;8;8", "confidence": "3;3;4;4", "soundness": "3;3;3;4", "novelty": "3;2;3;4", "presentation": "3;3;4;4", "wc_summary": "225;72;83;150", "wc_strengths": "95;79;0;124", "wc_weaknesses": "211;192;0;20", "wc_questions": "6;3;155;1", "wc_limitations": "6;3;0;10", "wc_review": "543;349;238;305", "wc_reply_reviewers": "17;17;17;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 132.5, 61.18210522693707 ], "wc_strengths_avg": [ 74.5, 45.93745748297352 ], "wc_weaknesses_avg": [ 105.75, 96.24545443811878 ], "wc_questions_avg": [ 41.25, 65.69769782876718 ], "wc_limitations_avg": [ 4.75, 3.6996621467371855 ], "wc_review_avg": [ 358.75, 113.48210211306451 ], "wc_reply_reviewers_avg": [ 12.75, 7.361215932167728 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13753570202018247830&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.umass.edu;cs.stanford.edu;ualberta.ca;cs.umass.edu;umass.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Massachusetts Amherst;Stanford University;University of Alberta", "aff_unique_dep": "Department of Computer Science;Computer Science Department;", "aff_unique_url": "https://www.umass.edu;https://www.stanford.edu;https://www.ualberta.ca", "aff_unique_abbr": "UMass Amherst;Stanford;UAlberta", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Amherst;Stanford;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Overcoming Recency Bias of Normalization Statistics in Continual Learning: Balance and Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71751", "id": "Ph65E1bE6A", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/50ca96a1a9ebe0b5e5688a504feb6107-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ph65E1bE6A", "openreview": "https://openreview.net/forum?id=Ph65E1bE6A", "poster": "/media/PosterPDFs/NeurIPS%202023/71751.png?t=1698300964.1226895", "slides": "https://nips.cc/virtual/2023/poster/71751", "video": "https://nips.cc/virtual/2023/poster/71751", "author_site": "Yilin Lyu, Liyuan Wang, Xingxing Zhang, Zicheng Sun, Hang Su, Jun Zhu, Liping Jing", "tldr": "", "abstract": "Continual learning entails learning a sequence of tasks and balancing their knowledge appropriately. With limited access to old training samples, much of the current work in deep neural networks has focused on overcoming catastrophic forgetting of old tasks in gradient-based optimization. However, the normalization layers provide an exception, as they are updated interdependently by the gradient and statistics of currently observed training samples, which require specialized strategies to mitigate recency bias. In this work, we focus on the most popular Batch Normalization (BN) and provide an in-depth theoretical analysis of its sub-optimality in continual learning. Our analysis demonstrates the dilemma between balance and adaptation of BN statistics for incremental tasks, which potentially affects training stability and generalization. Targeting on these particular challenges, we propose Adaptive Balance of BN (AdaB$^2$N), which incorporates appropriately a Bayesian-based strategy to adapt task-wise contributions and a modified momentum to balance BN statistics, corresponding to the training and testing stages. By implementing BN in a continual learning fashion, our approach achieves significant performance gains across a wide range of benchmarks, particularly for the challenging yet realistic online scenarios (e.g., up to 7.68\\%, 6.86\\% and 4.26\\% on Split CIFAR-10, Split CIFAR-100 and Split Mini-ImageNet, respectively). Our code is available at https://github.com/lvyilin/AdaB2N.", "keywords": "Continual Learning;Batch Normalization;Recency Bias;Catastrophic Forgetting", "primary_area": "", "supplementary_material": "/attachment/693f822f69eadf028c39cb5ea27df5771a7c3b78.zip", "author": "Yilin Lyu;Liyuan Wang;Xingxing Zhang;Zicheng Sun;Hang Su;Jun Zhu;Liping Jing", "authorids": "~Yilin_Lyu1;~Liyuan_Wang1;~Xingxing_Zhang3;~Zicheng_Sun1;~Hang_Su3;~Jun_Zhu2;~Liping_Jing3", "gender": "M;M;F;M;M;M;F", "homepage": ";https://lywang3081.github.io/;https://indussky8.github.io/;;http://ml.cs.tsinghua.edu.cn/~jun;;", "dblp": "235/0438;121/6094;;;50/2644-1;26/5371-6;", "google_scholar": "9bPZ_08AAAAJ;UAgdoY4AAAAJ;https://scholar.google.com.hk/citations?user=RKjiLyAAAAAJ;https://scholar.google.cz/citations?user=lmF94L0AAAAJ;axsP38wAAAAJ;dxN1_X0AAAAJ;zStEDu4AAAAJ", "orcid": "0000-0002-0318-7988;;0000-0002-2909-1589;;;;", "linkedin": ";;;;;;", "or_profile": "~Yilin_Lyu1;~Liyuan_Wang1;~Xingxing_Zhang3;~Zicheng_Sun1;~Jun_Zhu2;~Hang_Su2;~liping_jing1", "aff": "Beijing Jiaotong University;Tsinghua University;Tsinghua University;Beijing Jiaotong University;Tsinghua University;Tsinghua University;Beijing Jiaotong University", "aff_domain": "bjtu.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;bjtu.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;bjtu.edu.cn", "position": "PhD student;PhD student;Researcher;Undergrad student;Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nlyu2023overcoming,\ntitle={Overcoming Recency Bias of Normalization Statistics in Continual Learning: Balance and Adaptation},\nauthor={Yilin Lyu and Liyuan Wang and Xingxing Zhang and Zicheng Sun and Hang Su and Jun Zhu and Liping Jing},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ph65E1bE6A}\n}", "github": "", "project": "", "reviewers": "rCcF;SBWS;24XQ;Qyzj", "pdf_size": 2282517, "rating": "5;5;7;7", "confidence": "5;4;4;4", "soundness": "2;2;4;4", "novelty": "2;2;3;4", "presentation": "3;2;3;4", "wc_summary": "92;61;146;40", "wc_strengths": "66;45;146;35", "wc_weaknesses": "305;264;353;129", "wc_questions": "79;136;150;17", "wc_limitations": "79;3;16;1", "wc_review": "621;509;811;222", "wc_reply_reviewers": "150;31;281;20", "wc_reply_authors": "60;18;413;13", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 84.75, 39.90848907187542 ], "wc_strengths_avg": [ 73.0, 43.60619222083029 ], "wc_weaknesses_avg": [ 262.75, 83.3977667566704 ], "wc_questions_avg": [ 95.5, 52.547597471245055 ], "wc_limitations_avg": [ 24.75, 31.846310618343217 ], "wc_review_avg": [ 540.75, 213.35694856273136 ], "wc_reply_reviewers_avg": [ 120.5, 105.76034228386366 ], "wc_reply_authors_avg": [ 126.0, 166.70182962403263 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12510358745169623131&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "bjtu.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;bjtu.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;bjtu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;0;1;1;0", "aff_unique_norm": "Beijing Jiao Tong University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.njtu.edu.cn/en;https://www.tsinghua.edu.cn", "aff_unique_abbr": "BJTU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Provable Guarantees for Generative Behavior Cloning: Bridging Low-Level Stability and High-Level Behavior", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71750", "id": "PhFVF0gwid", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/97c903fbf21a7d863af2015d8803ca8f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PhFVF0gwid", "openreview": "https://openreview.net/forum?id=PhFVF0gwid", "poster": "/media/PosterPDFs/NeurIPS%202023/71750.png?t=1701377594.9207177", "slides": "https://nips.cc/virtual/2023/poster/71750", "video": "https://nips.cc/virtual/2023/poster/71750", "author_site": "Adam Block, Ali Jadbabaie, Daniel Pfrommer, Max Simchowitz, Russ Tedrake", "tldr": "", "abstract": "We propose a theoretical framework for studying behavior cloning of complex expert demonstrations using generative modeling.\nOur framework invokes low-level controllers - either learned or implicit in position-command control - to stabilize imitation around expert demonstrations. We show that with (a) a suitable low-level stability guarantee and (b) a powerful enough generative model as our imitation learner, pure supervised behavior cloning can generate trajectories matching the per-time step distribution of essentially arbitrary expert trajectories in an optimal transport cost. Our analysis relies on a stochastic continuity property of the learned policy we call \"total variation continuity\" (TVC). We then show that TVC can be ensured with minimal degradation of accuracy by combining a popular data-augmentation regimen with a novel algorithmic trick: adding augmentation noise at execution time. We instantiate our guarantees for policies parameterized by diffusion models and prove that if the learner accurately estimates the score of the (noise-augmented) expert policy, then the distribution of imitator trajectories is close to the demonstrator distribution in a natural optimal transport distance. Our analysis constructs intricate couplings between noise-augmented trajectories, a technique that may be of independent interest. We conclude by empirically validating our algorithmic recommendations, and discussing implications for future research directions for better behavior cloning with generative modeling.", "keywords": "Imitation Learning;Control;Diffusion Models;Optimal Transport", "primary_area": "", "supplementary_material": "/attachment/a0521ff7e6a8820a430f598160507817046ab189.zip", "author": "Adam Block;Ali Jadbabaie;Daniel Pfrommer;Max Simchowitz;Russ Tedrake", "authorids": "~Adam_Block1;~Ali_Jadbabaie1;~Daniel_Pfrommer1;~Max_Simchowitz1;~Russ_Tedrake1", "gender": ";M;M;M;M", "homepage": "https://abblock.github.io/index.html;http://www.mit.edu/~jadbabai/www;https://dan.pfrommer.us/;;http://people.csail.mit.edu/russt", "dblp": "258/1018;83/3158;;176/5165;73/1296", "google_scholar": ";ZBc_WwYAAAAJ;RrVLV3sAAAAJ;;nxNkEiYAAAAJ", "orcid": "0000-0003-1677-2665;;;;", "linkedin": ";;;;", "or_profile": "~Adam_Block1;~Ali_Jadbabaie1;~Daniel_Pfrommer1;~Max_Simchowitz1;~Russ_Tedrake1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;Full Professor;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nblock2023provable,\ntitle={Provable Guarantees for Generative Behavior Cloning: Bridging Low-Level Stability and High-Level Behavior},\nauthor={Adam Block and Ali Jadbabaie and Daniel Pfrommer and Max Simchowitz and Russ Tedrake},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PhFVF0gwid}\n}", "github": "", "project": "", "reviewers": "rprV;dAio;VsMF;NhZ6;RM4v", "pdf_size": 1201295, "rating": "3;6;6;6;7", "confidence": "3;3;1;3;3", "soundness": "2;3;2;3;3", "novelty": "2;3;2;2;3", "presentation": "3;2;2;3;3", "wc_summary": "84;170;132;118;64", "wc_strengths": "129;234;55;153;64", "wc_weaknesses": "121;104;149;71;81", "wc_questions": "46;95;80;50;112", "wc_limitations": "90;8;1;53;25", "wc_review": "470;611;417;445;346", "wc_reply_reviewers": "0;11;29;11;8", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 2.6, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 113.6, 37.08153179144572 ], "wc_strengths_avg": [ 127.0, 65.24109134586882 ], "wc_weaknesses_avg": [ 105.2, 28.017137612539937 ], "wc_questions_avg": [ 76.6, 25.48411269791436 ], "wc_limitations_avg": [ 35.4, 32.659454986266994 ], "wc_review_avg": [ 457.8, 87.11693291203497 ], "wc_reply_reviewers_avg": [ 11.8, 9.495261976375375 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.1474419561548971, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13587587407958806748&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Fine-grained View-Invariant Representations from Unpaired Ego-Exo Videos via Temporal Alignment", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71749", "id": "Pj6X6GqNy8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a845fdc3f87751710218718adb634fe7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Pj6X6GqNy8", "openreview": "https://openreview.net/forum?id=Pj6X6GqNy8", "poster": "/media/PosterPDFs/NeurIPS%202023/71749.png?t=1702161496.5533705", "slides": "https://nips.cc/virtual/2023/poster/71749", "video": "https://nips.cc/virtual/2023/poster/71749", "author_site": "Zihui (Sherry) Xue, Kristen Grauman", "tldr": "", "abstract": "The egocentric and exocentric viewpoints of a human activity look dramatically different, yet invariant representations to link them are essential for many potential applications in robotics and augmented reality. Prior work is limited to learning view-invariant features from paired synchronized viewpoints. We relax that strong data assumption and propose to learn fine-grained action features that are invariant to the viewpoints by aligning egocentric and exocentric videos in time, even when not captured simultaneously or in the same environment. To this end, we propose AE2, a self-supervised embedding approach with two key designs: (1) an object-centric encoder that explicitly focuses on regions corresponding to hands and active objects; (2) a contrastive-based alignment objective that leverages temporally reversed frames as negative samples. For evaluation, we establish a benchmark for fine-grained video understanding in the ego-exo context, comprising four datasets---including an ego tennis forehand dataset we collected, along with dense per-frame labels we annotated for each dataset. On the four datasets, our AE2 method strongly outperforms prior work in a variety of fine-grained downstream tasks, both in regular and cross-view settings.", "keywords": "fine-grained video understanding;egocentric video;self-supervised learning;temporal alignment", "primary_area": "", "supplementary_material": "/attachment/9522a2e3745c925297c6377339f5499327e21558.zip", "author": "Zihui Xue;Kristen Grauman", "authorids": "~Zihui_Xue1;~Kristen_Grauman1", "gender": "F;F", "homepage": "https://zihuixue.github.io;http://www.cs.utexas.edu/~grauman/", "dblp": "256/9549;57/4553", "google_scholar": "JCV9BQ0AAAAJ;Jp6Mz1sAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Zihui_Xue1;~Kristen_Grauman1", "aff": "University of Texas, Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu", "position": "PhD student;Professor", "bibtex": "@inproceedings{\nxue2023learning,\ntitle={Learning Fine-grained View-Invariant Representations from Unpaired Ego-Exo Videos via Temporal Alignment},\nauthor={Zihui Xue and Kristen Grauman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Pj6X6GqNy8}\n}", "github": "", "project": "", "reviewers": "G2AN;CYjc;ghSs;YpGy", "pdf_size": 25477151, "rating": "5;5;6;6", "confidence": "4;5;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "83;65;86;127", "wc_strengths": "36;61;61;112", "wc_weaknesses": "111;189;81;133", "wc_questions": "26;2;2;69", "wc_limitations": "4;1;5;7", "wc_review": "260;318;235;448", "wc_reply_reviewers": "59;0;13;32", "wc_reply_authors": "355;0;0;36", "reply_reviewers": "2;0;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 90.25, 22.68672519338126 ], "wc_strengths_avg": [ 67.5, 27.64507189355817 ], "wc_weaknesses_avg": [ 128.5, 39.506328606946 ], "wc_questions_avg": [ 24.75, 27.36215452043205 ], "wc_limitations_avg": [ 4.25, 2.165063509461097 ], "wc_review_avg": [ 315.25, 82.34493002000791 ], "wc_reply_reviewers_avg": [ 26.0, 22.192341021172147 ], "wc_reply_authors_avg": [ 97.75, 149.24874371330569 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15806772155474050677&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 7, "email": "utexas.edu;utexas.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Implicit Bias of (Stochastic) Gradient Descent for Rank-1 Linear Neural Network", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71748", "id": "PjBEUTVzoe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b5b528767aa35f5b1a60fe0aaeca0563-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PjBEUTVzoe", "openreview": "https://openreview.net/forum?id=PjBEUTVzoe", "poster": "/media/PosterPDFs/NeurIPS%202023/71748.png?t=1702263164.0516286", "slides": "https://nips.cc/virtual/2023/poster/71748", "video": "https://nips.cc/virtual/2023/poster/71748", "author_site": "Bochen Lyu, Zhanxing Zhu", "tldr": "", "abstract": "Studying the implicit bias of gradient descent (GD) and stochastic gradient descent (SGD) is critical to unveil the underlying mechanism of deep learning. Unfortunately, even for standard linear networks in regression setting, a comprehensive characterization of the implicit bias is still an open problem. This paper proposes to investigate a new proxy model of standard linear network, rank-1 linear network, where each weight matrix is parameterized as a rank-1 form. For over-parameterized regression problem, we precisely analyze the implicit bias of GD and SGD---by identifying a \u201cpotential\u201d function such that GD converges to its minimizer constrained by zero training error (i.e., interpolation solution), and further characterizing the role of the noise introduced by SGD in perturbing the form of this potential. Our results explicitly connect the depth of the network and the initialization with the implicit bias of GD and SGD. Furthermore, we emphasize a new implicit bias of SGD jointly induced by stochasticity and over-parameterization, which can reduce the dependence of the SGD's solution on the initialization. Our findings regarding the implicit bias are different from that of a recently popular model, the diagonal linear network. We highlight that the induced bias of our rank-1 model is more consistent with standard linear network while the diagonal one is not. This suggests that the proposed rank-1 linear network might be a plausible proxy for standard linear net.", "keywords": "implicit bias;gradient descent;stochastic gradient descent;linear networks", "primary_area": "", "supplementary_material": "/attachment/7b7a06ea986cc9bc3ef4ff3dca407929d2efeb0c.pdf", "author": "Bochen Lyu;Zhanxing Zhu", "authorids": "~Bochen_Lyu1;~Zhanxing_Zhu1", "gender": ";M", "homepage": ";https://zhanxingzhu.github.io/", "dblp": ";87/7756.html", "google_scholar": ";a2sHceIAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Bochen_Lyu1;~Zhanxing_Zhu1", "aff": ";University of Southampton", "aff_domain": ";soton.ac.uk", "position": ";Associate Professor", "bibtex": "@inproceedings{\nlyu2023implicit,\ntitle={Implicit Bias of (Stochastic) Gradient Descent for Rank-1 Linear Neural Network},\nauthor={Bochen Lyu and Zhanxing Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PjBEUTVzoe}\n}", "github": "", "project": "", "reviewers": "zKfq;hVR8;B6bN;dEGz", "pdf_size": 1059680, "rating": "5;6;6;6", "confidence": "4;4;4;4", "soundness": "3;4;3;4", "novelty": "2;3;3;3", "presentation": "3;4;4;4", "wc_summary": "121;133;71;189", "wc_strengths": "54;135;86;652", "wc_weaknesses": "647;302;144;209", "wc_questions": "154;275;4;41", "wc_limitations": "86;3;7;63", "wc_review": "1062;848;312;1154", "wc_reply_reviewers": "222;20;0;26", "wc_reply_authors": "469;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 128.5, 41.961291686505554 ], "wc_strengths_avg": [ 231.75, 244.34031083716008 ], "wc_weaknesses_avg": [ 325.5, 193.9258879056636 ], "wc_questions_avg": [ 118.5, 105.91151967562358 ], "wc_limitations_avg": [ 39.75, 35.716767770894386 ], "wc_review_avg": [ 844.0, 326.5976117487695 ], "wc_reply_reviewers_avg": [ 67.0, 90.00555538409837 ], "wc_reply_authors_avg": [ 117.25, 203.08295718745086 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16717555065674155255&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": ";soton.ac.uk", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Southampton", "aff_unique_dep": "", "aff_unique_url": "https://www.southampton.ac.uk", "aff_unique_abbr": "Southampton", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "One Less Reason for Filter Pruning: Gaining Free Adversarial Robustness with Structured Grouped Kernel Pruning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71747", "id": "Pjky9XG8zP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c3aba4234afd1c8116d879ba183f4835-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Pjky9XG8zP", "openreview": "https://openreview.net/forum?id=Pjky9XG8zP", "poster": "/media/PosterPDFs/NeurIPS%202023/71747.png?t=1702220372.7991016", "slides": "https://nips.cc/virtual/2023/poster/71747", "video": "https://nips.cc/virtual/2023/poster/71747", "author_site": "Shaochen (Henry) Zhong, Zaichuan You, Jiamu Zhang, Sebastian Zhao, Zachary LeClaire, Zirui Liu, Daochen Zha, Vipin Chaudhary, Shuai Xu, Xia Hu", "tldr": "", "abstract": "Densely structured pruning methods utilizing simple pruning heuristics can deliver immediate compression and acceleration benefits with acceptable benign performances. However, empirical findings indicate such naively pruned networks are extremely fragile under simple adversarial attacks. Naturally, we would be interested in knowing if such a phenomenon also holds for carefully designed modern structured pruning methods. If so, then to what extent is the severity? And what kind of remedies are available? Unfortunately, both the questions and the solution remain largely unaddressed: no prior art is able to provide a thorough investigation on the adversarial performance of modern structured pruning methods (spoiler: it is not good), yet the few works that attempt to provide mitigation often do so at various extra costs with only to-be-desired performance.\n\nIn this work, we answer both questions by fairly and comprehensively investigating the adversarial performance of 10+ popular structured pruning methods. Solution-wise, we take advantage of *Grouped Kernel Pruning (GKP)*'s recent success in pushing densely structured pruning freedom to a more fine-grained level. By mixing up kernel smoothness \u2014 a classic robustness-related kernel-level metric \u2014 into a modified GKP procedure, we present a one-shot-post-train-weight-dependent GKP method capable of advancing SOTA performance on both the benign and adversarial scale, while requiring no extra (in fact, often less) cost than a standard pruning procedure. Please refer to our [GitHub repository](https://github.com/henryzhongsc/adv_robust_gkp) for code implementation, tool sharing, and model checkpoints.", "keywords": "pruning;structured pruning;adversarial robustness;grouped kernel pruning;CNN;one-shot", "primary_area": "", "supplementary_material": "", "author": "Shaochen Zhong;Zaichuan You;Jiamu Zhang;Sebastian Zhao;Zachary LeClaire;Zirui Liu;Daochen Zha;Vipin Chaudhary;Shuai Xu;Xia Hu", "authorids": "~Shaochen_Zhong1;zxy456@case.edu;~Jiamu_Zhang1;~Sebastian_Zhao1;~Zachary_LeClaire1;~Zirui_Liu1;~Daochen_Zha1;~Vipin_Chaudhary2;~Shuai_Xu2;~Xia_Hu4", "gender": "M;;M;M;M;M;;M;M;", "homepage": "https://openreview.net/profile?id=~Shaochen_Zhong1;;;;;https://zirui-ray-liu.github.io/;http://dczha.com/;https://engineering.case.edu/profiles/vxc204;https://engineering.case.edu/profiles/sxx214;", "dblp": "326/7286.html;;;;;196/8629-1.html;167/0903;c/VipinChaudhary.html;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;eQpW5EIAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;jK0NgMcAAAAJ;vJbjqpIAAAAJ;wu-vtI4AAAAJ;", "orcid": ";;;;;;0000-0002-6677-7504;0000-0001-9672-6225;;", "linkedin": "shaochen-henry-zhong-96a941249/;;jiamu-zhang-morris;https://linkedin.com/in/sebbyzhao;zachary-leclaire-18b69b162/;;daochen-zha;vipin-chaudhary-379529/;;", "or_profile": "~Shaochen_Zhong1;zxy456@case.edu;~Jiamu_Zhang1;~Sebastian_Zhao1;~Zachary_LeClaire1;~Zirui_Liu1;~Daochen_Zha1;~Vipin_Chaudhary2;~Shuai_Xu2;~Xia_Hu4", "aff": "Rice University;;Case Western Reserve University;UC Berkeley, University of California, Berkeley;Case Western Reserve University;Rice University;Rice University;Case Western Reserve University;Case Western Reserve University;", "aff_domain": "rice.edu;;case.edu;cs.berkeley.edu;case.edu;rice.edu;rice.edu;case.edu;case.edu;", "position": "PhD student;;Undergrad student;Undergrad student;Undergrad student;PhD student;PhD student;Full Professor;Assistant Professor;", "bibtex": "@inproceedings{\nzhong2023one,\ntitle={One Less Reason for Filter Pruning: Gaining Free Adversarial Robustness with Structured Grouped Kernel Pruning},\nauthor={Shaochen Zhong and Zaichuan You and Jiamu Zhang and Sebastian Zhao and Zachary LeClaire and Zirui Liu and Daochen Zha and Vipin Chaudhary and Shuai Xu and Xia Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Pjky9XG8zP}\n}", "github": "", "project": "", "reviewers": "rEnW;sejQ;xyz8;Tjyo", "pdf_size": 1072824, "rating": "6;6;7;7", "confidence": "3;4;4;3", "soundness": "2;2;2;3", "novelty": "3;2;1;3", "presentation": "2;3;2;2", "wc_summary": "92;55;79;106", "wc_strengths": "23;48;14;44", "wc_weaknesses": "129;66;291;55", "wc_questions": "102;47;30;10", "wc_limitations": "29;1;15;5", "wc_review": "375;217;429;220", "wc_reply_reviewers": "157;111;191;13", "wc_reply_authors": "1298;1113;1310;0", "reply_reviewers": "2;2;2;1", "reply_authors": "4;4;4;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 83.0, 18.774983355518586 ], "wc_strengths_avg": [ 32.25, 14.184057952504283 ], "wc_weaknesses_avg": [ 135.25, 94.25066312764065 ], "wc_questions_avg": [ 47.25, 34.2153109002388 ], "wc_limitations_avg": [ 12.5, 10.805091392487155 ], "wc_review_avg": [ 310.25, 93.72132894917785 ], "wc_reply_reviewers_avg": [ 118.0, 66.94027188471824 ], "wc_reply_authors_avg": [ 930.25, 542.7275444456454 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6166069045070138592&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "rice.edu;;case.edu;cs.berkeley.edu;case.edu;rice.edu;rice.edu;case.edu;case.edu;", "author_num": 10, "aff_unique_index": "0;1;2;1;0;0;1;1", "aff_unique_norm": "Rice University;Case Western Reserve University;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rice.edu;https://www.case.edu;https://www.berkeley.edu", "aff_unique_abbr": "Rice;CWRU;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "EHRXQA: A Multi-Modal Question Answering Dataset for Electronic Health Records with Chest X-ray Images", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73600", "id": "Pk2x7FPuZ4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0c007ebef1d11fd48da6ce4f54687db6-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Pk2x7FPuZ4", "openreview": "https://openreview.net/forum?id=Pk2x7FPuZ4", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73600", "video": "https://nips.cc/virtual/2023/poster/73600", "author_site": "Seongsu Bae, Daeun Kyung, Jaehee Ryu, Eunbyeol Cho, Gyubok Lee, Sunjun Kweon, Jungwoo Oh, Lei Ji, Eric Chang, Tackeun Kim, Edward Choi", "tldr": "", "abstract": "Electronic Health Records (EHRs), which contain patients' medical histories in various multi-modal formats, often overlook the potential for joint reasoning across imaging and table modalities underexplored in current EHR Question Answering (QA) systems. In this paper, we introduce EHRXQA, a novel multi-modal question answering dataset combining structured EHRs and chest X-ray images. To develop our dataset, we first construct two uni-modal resources: 1) The MIMIC- CXR-VQA dataset, our newly created medical visual question answering (VQA) benchmark, specifically designed to augment the imaging modality in EHR QA, and 2) EHRSQL (MIMIC-IV), a refashioned version of a previously established table-based EHR QA dataset. By integrating these two uni-modal resources, we successfully construct a multi-modal EHR QA dataset that necessitates both uni-modal and cross-modal reasoning. To address the unique challenges of multi-modal questions within EHRs, we propose a NeuralSQL-based strategy equipped with an external VQA API. This pioneering endeavor enhances engagement with multi-modal EHR sources and we believe that our dataset can catalyze advances in real-world medical scenarios such as clinical decision-making and research. EHRXQA is available at https://github.com/baeseongsu/ehrxqa.", "keywords": "healthcare;multi-modal question-answering;semantic parsing;visual question answering;electronic health records;chest x-ray", "primary_area": "", "supplementary_material": "/attachment/3fb78045056d413126ad17363bd2f9272a741338.pdf", "author": "Seongsu Bae;Daeun Kyung;Jaehee Ryu;Eunbyeol Cho;Gyubok Lee;Sunjun Kweon;Jungwoo Oh;Lei Ji;Eric I-Chao Chang;Tackeun Kim;Edward Choi", "authorids": "~Seongsu_Bae1;~Daeun_Kyung1;~Jaehee_Ryu1;~Eunbyeol_Cho1;~Gyubok_Lee1;~Sunjun_Kweon1;~Jungwoo_Oh1;~Lei_Ji1;~Eric_I-Chao_Chang1;~Tackeun_Kim1;~Edward_Choi1", "gender": "M;;;Not Specified;M;M;M;F;M;M;M", "homepage": ";https://dek924.github.io/;https://github.com/unnjena;;https://sites.google.com/view/gyuboklee;;;;;;http://mp2893.com", "dblp": "307/5358;321/9474;;;249/4944;;18/9560;42/2721-1;117/6621;;41/3886", "google_scholar": "hJKVzt4AAAAJ;WCMzXVoAAAAJ;;;UYzauyYAAAAJ;mKFQKpwAAAAJ;YlZmoPQAAAAJ;;FcQTsdkAAAAJ;https://scholar.google.co.kr/citations?hl=ko;GUlGIPkAAAAJ", "orcid": ";;;;;;0000-0002-4804-6150;;;my-orcid?orcid=0000-0002-4375-8095;", "linkedin": "seongsu-bae-17297b180/;;;eunbyeol-cho;gyubok-lee-104915229;;;;ericichaochang;;", "or_profile": "~Seongsu_Bae1;~Daeun_Kyung1;~Jaehee_Ryu1;~Eunbyeol_Cho1;~Gyubok_Lee1;~Sunjun_Kweon1;~Jungwoo_Oh1;~Lei_Ji1;~Eric_I-Chao_Chang1;~Tackeun_Kim1;~Edward_Choi1", "aff": "Research, Microsoft (Asia);LG Corporation;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Microsoft Research;Microsoft Research;Seoul National University Bundang Hospital;Korea Advanced Institute of Science & Technology", "aff_domain": "research.microsoft.com;lgresearch.ai;kaist.edu;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;research.microsoft.com;research.microsoft.com;snubh.org;kaist.ac.kr", "position": "Intern;Intern;MS student;MS student;PhD student;PhD student;PhD student;Researcher;Principal Researcher;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nbae2023ehrxqa,\ntitle={{EHRXQA}: A Multi-Modal Question Answering Dataset for Electronic Health Records with Chest X-ray Images},\nauthor={Seongsu Bae and Daeun Kyung and Jaehee Ryu and Eunbyeol Cho and Gyubok Lee and Sunjun Kweon and Jungwoo Oh and Lei Ji and Eric I-Chao Chang and Tackeun Kim and Edward Choi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Pk2x7FPuZ4}\n}", "github": "", "project": "", "reviewers": "NgV3;obQA;cGyp", "pdf_size": 1163357, "rating": "7;7;9", "confidence": "3;4;3", "wc_summary_and_contributions": "52;87;36", "wc_strengths": "44;31;79", "wc_improvement": "44;140;20", "wc_limitations": "50;42;9", "wc_correctness": "20;9;7", "wc_clarity": "48;71;5", "wc_relation_to_prior_work": "18;16;9", "wc_documentation": "5;11;7", "wc_additional_feedback": "1;1;1", "wc_review": "282;408;173", "wc_reply_reviewers": "21;26;6", "wc_reply_authors": "1518;2728;91", "reply_reviewers": "1;1;1", "reply_authors": "4;6;1", "rating_avg": [ 7.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 58.333333333333336, 21.29684379328438 ], "wc_strengths_avg": [ 51.333333333333336, 20.270394394014364 ], "wc_improvement_avg": [ 68.0, 51.84592558726288 ], "wc_limitations_avg": [ 33.666666666666664, 17.745108872274887 ], "wc_correctness_avg": [ 12.0, 5.715476066494082 ], "wc_clarity_avg": [ 41.333333333333336, 27.353650985238193 ], "wc_relation_to_prior_work_avg": [ 14.333333333333334, 3.858612300930075 ], "wc_documentation_avg": [ 7.666666666666667, 2.494438257849294 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 287.6666666666667, 96.02198822260566 ], "wc_reply_reviewers_avg": [ 17.666666666666668, 8.498365855987975 ], "wc_reply_authors_avg": [ 1445.6666666666667, 1077.7650743810339 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.6666666666666665, 2.0548046676563256 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4868484612018132790&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "research.microsoft.com;lgresearch.ai;kaist.edu;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;research.microsoft.com;research.microsoft.com;snubh.org;kaist.ac.kr", "author_num": 11, "aff_unique_index": "0;1;2;2;2;2;2;0;0;3;2", "aff_unique_norm": "Microsoft;LG;Korea Advanced Institute of Science and Technology;Seoul National University", "aff_unique_dep": "Research;LG Corporation;;Hospital", "aff_unique_url": "https://www.microsoft.com;https://www.lg.com;https://www.kaist.ac.kr;https://www.snuh.org", "aff_unique_abbr": "MS;LG;KAIST;SNUH", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Asia;;Bundang", "aff_country_unique_index": "0;1;1;1;1;1;1;2;2;1;1", "aff_country_unique": "China;South Korea;United States" }, { "title": "ViCA-NeRF: View-Consistency-Aware 3D Editing of Neural Radiance Fields", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71746", "id": "Pk49a9snPe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c1e2faff6f588870935f114ebe04a3e5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Pk49a9snPe", "openreview": "https://openreview.net/forum?id=Pk49a9snPe", "poster": "/media/PosterPDFs/NeurIPS%202023/71746.png?t=1701891465.5990956", "slides": "https://nips.cc/virtual/2023/poster/71746", "video": "https://nips.cc/virtual/2023/poster/71746", "author_site": "Jiahua Dong, Yu-Xiong Wang", "tldr": "", "abstract": "We introduce ViCA-NeRF, the *first* view-consistency-aware method for 3D editing with text instructions. In addition to the implicit neural radiance field (NeRF) modeling, our key insight is to exploit two sources of regularization that *explicitly* propagate the editing information across different views, thus ensuring multi-view consistency. For *geometric regularization*, we leverage the depth information derived from NeRF to establish image correspondences between different views. For *learned regularization*, we align the latent codes in the 2D diffusion model between edited and unedited images, enabling us to edit key views and propagate the update throughout the entire scene. Incorporating these two strategies, our ViCA-NeRF operates in two stages. In the initial stage, we blend edits from different views to create a preliminary 3D edit. This is followed by a second stage of NeRF training, dedicated to further refining the scene's appearance. Experimental results demonstrate that ViCA-NeRF provides more flexible, efficient (3 times faster) editing with higher levels of consistency and details, compared with the state of the art. Our code is available at: https://github.com/Dongjiahua/VICA-NeRF", "keywords": "neural radiance field;diffusion model;editing", "primary_area": "", "supplementary_material": "/attachment/0ee12d66bac2c5bcd678a8e18429191b3de0c509.zip", "author": "Jiahua Dong;Yu-Xiong Wang", "authorids": "~Jiahua_Dong3;~Yu-Xiong_Wang1", "gender": "M;", "homepage": ";https://yxw.cs.illinois.edu/", "dblp": "247/5746-2;35/10700", "google_scholar": ";T_Q-xDkAAAAJ", "orcid": ";", "linkedin": "jiahua-dong-190431268/;", "or_profile": "~Jiahua_Dong3;~Yu-Xiong_Wang1", "aff": "University of Illinois Urbana-Champaign;Department of Computer Science, University of Illinois Urbana-Champaign", "aff_domain": "illinois.edu;cs.illinois.edu", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\ndong2023vicanerf,\ntitle={Vi{CA}-Ne{RF}: View-Consistency-Aware 3D Editing of Neural Radiance Fields},\nauthor={Jiahua Dong and Yu-Xiong Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Pk49a9snPe}\n}", "github": "", "project": "", "reviewers": "6ncD;x1ev;CQtY;iE4L;EWGc", "pdf_size": 6747423, "rating": "4;4;5;6;6", "confidence": "4;4;4;3;4", "soundness": "4;3;3;2;3", "novelty": "2;2;2;2;3", "presentation": "2;3;2;3;3", "wc_summary": "95;88;100;85;75", "wc_strengths": "86;48;64;157;109", "wc_weaknesses": "131;80;222;181;34", "wc_questions": "121;116;25;58;124", "wc_limitations": "11;10;1;4;28", "wc_review": "444;342;412;485;370", "wc_reply_reviewers": "279;48;80;0;320", "wc_reply_authors": "1552;66;304;0;548", "reply_reviewers": "1;1;1;0;1", "reply_authors": "4;2;3;1;2", "rating_avg": [ 5.0, 0.8944271909999159 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 88.6, 8.593020423576334 ], "wc_strengths_avg": [ 92.8, 38.122958961759515 ], "wc_weaknesses_avg": [ 129.6, 67.49992592588528 ], "wc_questions_avg": [ 88.8, 40.08690559272441 ], "wc_limitations_avg": [ 10.8, 9.368030742904295 ], "wc_review_avg": [ 410.6, 51.00431354307202 ], "wc_reply_reviewers_avg": [ 145.4, 129.02650890417829 ], "wc_reply_authors_avg": [ 494.0, 563.1340870520982 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5590169943749475, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15327951982315915675&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "illinois.edu;cs.illinois.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Optimal Preconditioning and Fisher Adaptive Langevin Sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71745", "id": "Pk9CdOZYRA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5da6d5818a156791090c875abeca3cf8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Pk9CdOZYRA", "openreview": "https://openreview.net/forum?id=Pk9CdOZYRA", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71745", "video": "https://nips.cc/virtual/2023/poster/71745", "tldr": "", "abstract": "We define an optimal preconditioning for the Langevin diffusion by analytically optimizing the expected squared jumped distance. This yields as the optimal preconditioning an inverse Fisher information covariance matrix, where the covariance matrix is computed as the outer product of log target gradients averaged under the target. We apply this result to the Metropolis adjusted Langevin algorithm (MALA) and derive a computationally efficient adaptive MCMC scheme that learns the preconditioning from the history of gradients produced as the algorithm runs. We show in several experiments that the proposed algorithm is very robust in high dimensions and significantly outperforms other methods, including a closely related adaptive MALA scheme that learns the preconditioning with standard adaptive MCMC as well as the position-dependent Riemannian manifold MALA sampler.", "keywords": "MCMC;Langevin diffusion;preconditioning;Fisher information;adaptive MCMC;score function", "primary_area": "", "supplementary_material": "/attachment/2563e5e4e7a6f321988cc4d29db5ebf77902e5d3.pdf", "author": "Michalis Titsias", "authorids": "~Michalis_Titsias1", "gender": "M", "homepage": "https://mtitsias.github.io/", "dblp": "19/5385", "google_scholar": "https://scholar.google.gr/citations?user=B-SbkAwAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Michalis_Titsias1", "aff": "Google DeepMind", "aff_domain": "google.com", "position": "Research Scientist", "bibtex": "@inproceedings{\ntitsias2023optimal,\ntitle={Optimal Preconditioning and Fisher Adaptive Langevin Sampling},\nauthor={Michalis Titsias},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Pk9CdOZYRA}\n}", "github": "", "project": "", "reviewers": "Zcpk;K8aZ;NzaV;bdsU", "pdf_size": 2102467, "rating": "6;7;7;8", "confidence": "5;5;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;4;3;4", "wc_summary": "77;90;126;83", "wc_strengths": "77;54;78;8", "wc_weaknesses": "24;152;75;9", "wc_questions": "137;48;122;56", "wc_limitations": "1;4;18;63", "wc_review": "316;348;419;219", "wc_reply_reviewers": "22;10;11;0", "wc_reply_authors": "0;271;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 94.0, 19.03943276465977 ], "wc_strengths_avg": [ 54.25, 28.375825979167548 ], "wc_weaknesses_avg": [ 65.0, 55.87038571551122 ], "wc_questions_avg": [ 90.75, 39.213358693180055 ], "wc_limitations_avg": [ 21.5, 24.804233509624925 ], "wc_review_avg": [ 325.5, 71.90445048813042 ], "wc_reply_reviewers_avg": [ 10.75, 7.790218225441442 ], "wc_reply_authors_avg": [ 67.75, 117.34644221279143 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17288484491989804685&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "google.com", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "Truncating Trajectories in Monte Carlo Policy Evaluation: an Adaptive Approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71744", "id": "PkKpTK7hJ6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/28312c9491d60ed0c77f7fff4ad86dd1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PkKpTK7hJ6", "openreview": "https://openreview.net/forum?id=PkKpTK7hJ6", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71744", "video": "https://nips.cc/virtual/2023/poster/71744", "author_site": "Riccardo Poiani, Nicole Nobili, Alberto Maria Metelli, Marcello Restelli", "tldr": "", "abstract": "Policy evaluation via Monte Carlo (MC) simulation is at the core of many MC Reinforcement Learning (RL) algorithms (e.g., policy gradient methods). In this context, the designer of the learning system specifies an interaction budget that the agent usually spends by collecting trajectories of *fixed length* within a simulator. However, is this data collection strategy the best option? To answer this question, in this paper, we consider as quality index the variance of an unbiased policy return estimator that uses trajectories of different lengths, i.e., *truncated*. We first derive a closed-form expression of this variance that clearly shows the sub-optimality of the fixed-length trajectory schedule. Furthermore, it suggests that adaptive data collection strategies that spend the available budget sequentially might be able to allocate a larger portion of transitions in timesteps in which more accurate sampling is required to reduce the variance of the final estimate. Building on these findings, we present an *adaptive* algorithm called **R**obust and **I**terative **D**ata collection strategy **O**ptimization (RIDO). The main intuition behind RIDO is to split the available interaction budget into mini-batches. At each round, the agent determines the most convenient schedule of trajectories that minimizes an empirical and robust estimate of the estimator's variance. After discussing the theoretical properties of our method, we conclude by assessing its performance across multiple domains. Our results show that RIDO can adapt its trajectory schedule toward timesteps where more sampling is required to increase the quality of the final estimation.", "keywords": "Reinforcement Learning;Policy Evaluation;Budget Optimization;Monte Carlo", "primary_area": "", "supplementary_material": "/attachment/0505a4cd5ba6cfb2b6ab0b1af1219e3baa652e72.pdf", "author": "Riccardo Poiani;Nicole Nobili;Alberto Maria Metelli;Marcello Restelli", "authorids": "~Riccardo_Poiani3;~Nicole_Nobili1;~Alberto_Maria_Metelli2;~Marcello_Restelli1", "gender": "M;F;M;M", "homepage": ";;https://albertometelli.github.io/;http://home.deib.polimi.it/restelli/", "dblp": "268/8198;;209/4941;64/1011", "google_scholar": "WQWOAkkAAAAJ;;R31IsPwAAAAJ;https://scholar.google.com.tw/citations?user=xdgxRiEAAAAJ", "orcid": ";;0000-0002-3424-5212;0000-0002-6322-1076", "linkedin": ";nicole-nobili/;;", "or_profile": "~Riccardo_Poiani3;~Nicole_Nobili1;~Alberto_Maria_Metelli2;~Marcello_Restelli1", "aff": "Polytechnic Institute of Milan;Polytechnic Institute of Milan;Politecnico di Milano;Politecnico di Milano", "aff_domain": "polimi.it;polimi.it;polimi.it;polimi.it", "position": "PhD student;Undergrad student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\npoiani2023truncating,\ntitle={Truncating Trajectories in Monte Carlo Policy Evaluation: an Adaptive Approach},\nauthor={Riccardo Poiani and Nicole Nobili and Alberto Maria Metelli and Marcello Restelli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PkKpTK7hJ6}\n}", "github": "", "project": "", "reviewers": "Acm2;Fu4w;scHF;ccYJ;ZDY9", "pdf_size": 491218, "rating": "5;5;5;6;7", "confidence": "3;2;3;3;2", "soundness": "3;2;2;3;3", "novelty": "2;2;2;2;3", "presentation": "2;2;2;2;3", "wc_summary": "219;59;157;150;324", "wc_strengths": "143;67;97;48;84", "wc_weaknesses": "272;90;304;347;201", "wc_questions": "130;46;3;9;2", "wc_limitations": "11;1;3;26;11", "wc_review": "775;263;564;580;622", "wc_reply_reviewers": "155;232;242;149;13", "wc_reply_authors": "183;593;823;338;18", "reply_reviewers": "1;1;2;2;1", "reply_authors": "2;2;3;2;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 181.8, 87.53376491388909 ], "wc_strengths_avg": [ 87.8, 32.13347164562211 ], "wc_weaknesses_avg": [ 242.8, 90.05642675567357 ], "wc_questions_avg": [ 38.0, 48.76474136094644 ], "wc_limitations_avg": [ 10.4, 8.8 ], "wc_review_avg": [ 560.8, 166.54777092474097 ], "wc_reply_reviewers_avg": [ 158.2, 82.03267641616968 ], "wc_reply_authors_avg": [ 391.0, 287.2733889520573 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18079810746441866389&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "polimi.it;polimi.it;polimi.it;polimi.it", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Polytechnic Institute of Milan;Politecnico di Milano", "aff_unique_dep": ";", "aff_unique_url": "https://www.polimi.it/;https://www.polimi.it", "aff_unique_abbr": "Politecnico di Milano;Polimi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Italy" }, { "title": "Taking the neural sampling code very seriously: A data-driven approach for evaluating generative models of the visual system", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71743", "id": "Pl416tPkNv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/458d9f2dd5c7565af60143630dc62f10-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Pl416tPkNv", "openreview": "https://openreview.net/forum?id=Pl416tPkNv", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71743", "video": "https://nips.cc/virtual/2023/poster/71743", "author_site": "Suhas Shrinivasan, Konstantin-Klemens Lurz, Kelli Restivo, George Denfield, Andreas Tolias, Edgar Walker, Fabian Sinz", "tldr": "", "abstract": "Prevailing theories of perception hypothesize that the brain implements perception via Bayesian inference in a generative model of the world.\nOne prominent theory, the Neural Sampling Code (NSC), posits that neuronal responses to a stimulus represent samples from the posterior distribution over latent world state variables that cause the stimulus.\nAlthough theoretically elegant, NSC does not specify the exact form of the generative model or prescribe how to link the theory to recorded neuronal activity.\nPrevious works assume simple generative models and test their qualitative agreement with neurophysiological data.\nCurrently, there is no precise alignment of the normative theory with neuronal recordings, especially in response to natural stimuli, and a quantitative, experimental evaluation of models under NSC has been lacking.\nHere, we propose a novel formalization of NSC, that (a) allows us to directly fit NSC generative models to recorded neuronal activity in response to natural images, (b) formulate richer and more flexible generative models, and (c) employ standard metrics to quantitatively evaluate different generative models under NSC.\nFurthermore, we derive a stimulus-conditioned predictive model of neuronal responses from the trained generative model using our formalization that we compare to neural system identification models.\nWe demonstrate our approach by fitting and comparing classical- and flexible deep learning-based generative models on population recordings from the macaque primary visual cortex (V1) to natural images, and show that the flexible models outperform classical models in both their generative- and predictive-model performance.\nOverall, our work is an important step towards a quantitative evaluation of NSC. \nIt provides a framework that lets us \\textit{learn} the generative model directly from neuronal population recordings, paving the way for an experimentally-informed understanding of probabilistic computational principles underlying perception and behavior.", "keywords": "Neural Sampling Code;Probabilistic Inference;Bayesian Brain;Macaque V1;Natural Images;Population Recordings;Normalizing Flows;Probabilistic Models;Computational Neuroscience;Theoretical Neuroscience", "primary_area": "", "supplementary_material": "/attachment/e7f90eb24c25f57cdb782a359b988e35651d933f.pdf", "author": "Suhas Shrinivasan;Konstantin-Klemens Lurz;Kelli Restivo;George Denfield;Andreas S. Tolias;Edgar Y. Walker;Fabian H. Sinz", "authorids": "~Suhas_Shrinivasan1;~Konstantin-Klemens_Lurz1;~Kelli_Restivo1;~George_Denfield1;~Andreas_S._Tolias1;~Edgar_Y._Walker1;~Fabian_H._Sinz1", "gender": "M;;F;;;M;M", "homepage": "https://sinzlab.org/people/suhas_shrinivasan.html;https://sinzlab.org/;;;;https://edgarwalker.com;https://sinzlab.org", "dblp": "234/7660;295/5344;;;32/3057;224/0176;53/5834", "google_scholar": "LQsNQAoAAAAJ;UIKKMbwAAAAJ;;;;;https://scholar.google.com/citations?hl=de", "orcid": ";0000-0003-1652-4274;;0000-0003-3699-3828;;0000-0003-0057-957X;0000-0002-1348-9736", "linkedin": "suhasshrinivasan/;;kellirestivo/;;;edgar-y-walker-379947109/;", "or_profile": "~Suhas_Shrinivasan1;~Konstantin-Klemens_Lurz1;~Kelli_Restivo1;~George_Denfield1;~Andreas_S._Tolias1;~Edgar_Y._Walker1;~Fabian_H._Sinz1", "aff": "Georg-August Universit\u00e4t G\u00f6ttingen;University of Tuebingen;Baylor College of Medicine;;Baylor College of Medicine;University of Washington;Baylor College of Medicine", "aff_domain": "uni-goettingen.de;uni-tuebingen.de;bcm.edu;;bcm.edu;uw.edu;bcm.edu", "position": "PhD student;PhD student;PhD student;;Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nshrinivasan2023taking,\ntitle={Taking the neural sampling code very seriously: A data-driven approach for evaluating generative models of the visual system},\nauthor={Suhas Shrinivasan and Konstantin-Klemens Lurz and Kelli Restivo and George Denfield and Andreas S. Tolias and Edgar Y. Walker and Fabian H. Sinz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Pl416tPkNv}\n}", "github": "", "project": "", "reviewers": "GaXp;D2be;1S3S;AuQ9", "pdf_size": 8613457, "rating": "2;6;6;6", "confidence": "4;4;4;4", "soundness": "2;3;3;4", "novelty": "1;3;3;4", "presentation": "3;3;3;2", "wc_summary": "31;119;53;73", "wc_strengths": "8;123;143;98", "wc_weaknesses": "69;78;156;57", "wc_questions": "78;9;19;250", "wc_limitations": "1;6;5;24", "wc_review": "187;335;376;502", "wc_reply_reviewers": "585;46;78;139", "wc_reply_authors": "1279;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.0, 1.7320508075688772 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.0, 32.46536616149585 ], "wc_strengths_avg": [ 93.0, 51.59941860137573 ], "wc_weaknesses_avg": [ 90.0, 38.82653731663435 ], "wc_questions_avg": [ 89.0, 96.62039122255716 ], "wc_limitations_avg": [ 9.0, 8.860022573334675 ], "wc_review_avg": [ 350.0, 112.44331905453521 ], "wc_reply_reviewers_avg": [ 212.0, 217.92774031774843 ], "wc_reply_authors_avg": [ 319.75, 553.8232457201485 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11710811678643778100&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uni-goettingen.de;uni-tuebingen.de;bcm.edu;;bcm.edu;uw.edu;bcm.edu", "author_num": 7, "aff_unique_index": "0;1;2;2;3;2", "aff_unique_norm": "Georg-August Universit\u00e4t G\u00f6ttingen;University of Tuebingen;Baylor College of Medicine;University of Washington", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uni-goettingen.de;https://www.uni-tuebingen.de/;https://www.bcm.edu;https://www.washington.edu", "aff_unique_abbr": "GAU;Uni T\u00fcbingen;BCM;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;1;1", "aff_country_unique": "Germany;United States" }, { "title": "Neural Processes with Stability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71742", "id": "PmlNxZoXr4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f8cea6a15db693dc525cde5e688410a9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PmlNxZoXr4", "openreview": "https://openreview.net/forum?id=PmlNxZoXr4", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71742", "video": "https://nips.cc/virtual/2023/poster/71742", "author_site": "Huafeng Liu, Liping Jing, Jian Yu", "tldr": "", "abstract": "Unlike traditional statistical models depending on hand-specified priors, neural processes (NPs) have recently emerged as a class of powerful neural statistical models that combine the strengths of neural networks and stochastic processes. NPs can define a flexible class of stochastic processes well suited for highly non-trivial functions by encoding contextual knowledge into the function space. However, noisy context points introduce challenges to the algorithmic stability that small changes in training data may significantly change the models and yield lower generalization performance. In this paper, we provide theoretical guidelines for deriving stable solutions with high generalization by introducing the notion of algorithmic stability into NPs, which can be flexible to work with various NPs and achieves less biased approximation with theoretical guarantees. To illustrate the superiority of the proposed model, we perform experiments on both synthetic and real-world data, and the results demonstrate that our approach not only helps to achieve more accurate performance but also improves model robustness.", "keywords": "Neural processes;stability", "primary_area": "", "supplementary_material": "", "author": "Huafeng Liu;Liping Jing;Jian Yu", "authorids": "~Huafeng_Liu3;~Liping_Jing3;~Jian_Yu1", "gender": "M;M;F", "homepage": "https://faculty.bjtu.edu.cn/10087/;https://faculty.bjtu.edu.cn/6463/;", "dblp": "48/4950-1;52/5812-1;", "google_scholar": "https://scholar.google.com.hk/citations?user=POINLLMAAAAJ;;zStEDu4AAAAJ", "orcid": "0000-0002-7914-6867;;", "linkedin": ";;", "or_profile": "~Huafeng_Liu3;~Jian_Yu1;~liping_jing1", "aff": "The University of Hong Kong;Beijing Jiaotong University;Beijing Jiaotong University", "aff_domain": "hku.hk;bjtu.edu.cn;bjtu.edu.cn", "position": "Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023neural,\ntitle={Neural Processes with Stability},\nauthor={Huafeng Liu and Liping Jing and Jian Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PmlNxZoXr4}\n}", "github": "", "project": "", "reviewers": "jySn;nxGu;i5nU;Gzsw", "pdf_size": 641330, "rating": "3;4;5;7", "confidence": "4;3;5;4", "soundness": "1;3;2;3", "novelty": "1;2;3;3", "presentation": "3;3;3;4", "wc_summary": "38;83;52;58", "wc_strengths": "29;46;36;21", "wc_weaknesses": "381;249;267;61", "wc_questions": "3;20;298;68", "wc_limitations": "4;17;29;41", "wc_review": "455;415;682;249", "wc_reply_reviewers": "41;131;105;89", "wc_reply_authors": "18;455;460;536", "reply_reviewers": "1;1;2;2", "reply_authors": "2;2;3;3", "rating_avg": [ 4.75, 1.479019945774904 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 57.75, 16.284578594486256 ], "wc_strengths_avg": [ 33.0, 9.192388155425117 ], "wc_weaknesses_avg": [ 239.5, 114.81615739955767 ], "wc_questions_avg": [ 97.25, 118.32872643614483 ], "wc_limitations_avg": [ 22.75, 13.754544703478919 ], "wc_review_avg": [ 450.25, 154.49494328294372 ], "wc_reply_reviewers_avg": [ 91.5, 32.78337993557101 ], "wc_reply_authors_avg": [ 367.25, 204.17807791239488 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.23904572186687872, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8ik5DpQe3BwJ:scholar.google.com/&scioq=Neural+Processes+with+Stability&hl=en&as_sdt=0,47", "gs_version_total": 4, "email": "hku.hk;bjtu.edu.cn;bjtu.edu.cn", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Hong Kong;Beijing Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;http://www.njtu.edu.cn/en", "aff_unique_abbr": "HKU;BJTU", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Adaptive Principal Component Regression with Applications to Panel Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71741", "id": "PmqBJ02V1p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f37265d7493377170a3b4ba91823119a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PmqBJ02V1p", "openreview": "https://openreview.net/forum?id=PmqBJ02V1p", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71741", "video": "https://nips.cc/virtual/2023/poster/71741", "author_site": "Anish Agarwal, Keegan Harris, Justin Whitehouse, Steven Wu", "tldr": "", "abstract": "Principal component regression (PCR) is a popular technique for fixed-design error-in-variables regression, a generalization of the linear regression setting in which the observed covariates are corrupted with random noise. We provide the first time-uniform finite sample guarantees for online (regularized) PCR whenever data is collected adaptively. Since the proof techniques for PCR in the fixed design setting do not readily extend to the online setting, our results rely on adapting tools from modern martingale concentration to the error-in-variables setting. As an application of our bounds, we provide a framework for counterfactual estimation of unit-specific treatment effects in panel data settings when interventions are assigned adaptively. Our framework may be thought of as a generalization of the synthetic interventions framework where data is collected via an adaptive intervention assignment policy.", "keywords": "adaptive data collection;principal component regression;error-in-variables regression;panel data;synthetic controls;synthetic interventions;causal inference", "primary_area": "", "supplementary_material": "/attachment/e96aa69aa6b9a6ec879bf95749dae0b0388f409b.pdf", "author": "Anish Agarwal;Keegan Harris;Justin Whitehouse;Steven Wu", "authorids": "~Anish_Agarwal1;~Keegan_Harris1;~Justin_Whitehouse1;~Steven_Wu1", "gender": ";M;;M", "homepage": "https://sites.google.com/view/anishagarwal;https://keeganharris.github.io/;https://jwhitehouse11.github.io/;https://zstevenwu.com/", "dblp": ";294/5044;218/6673;137/8350", "google_scholar": ";TnvQIrYAAAAJ;https://scholar.google.nl/citations?user=LxpnsSMAAAAJ;MbF6rTEAAAAJ", "orcid": ";;;", "linkedin": ";;;zstevenwu/", "or_profile": "~Anish_Agarwal1;~Keegan_Harris1;~Justin_Whitehouse1;~Zhiwei_Steven_Wu1", "aff": "Columbia University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "columbia.edu;cmu.edu;cs.cmu.edu;cmu.edu", "position": "Assistant Professor;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nagarwal2023adaptive,\ntitle={Adaptive Principal Component Regression with Applications to Panel Data},\nauthor={Anish Agarwal and Keegan Harris and Justin Whitehouse and Steven Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PmqBJ02V1p}\n}", "github": "", "project": "", "reviewers": "Dfi8;1mVx;jhsg;M8r1", "pdf_size": 671187, "rating": "4;6;6;6", "confidence": "4;2;4;3", "soundness": "2;4;4;4", "novelty": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "58;54;97;56", "wc_strengths": "20;46;70;86", "wc_weaknesses": "43;101;158;82", "wc_questions": "539;34;161;61", "wc_limitations": "1;5;1;10", "wc_review": "661;240;487;295", "wc_reply_reviewers": "417;72;42;0", "wc_reply_authors": "1401;0;0;0", "reply_reviewers": "6;1;1;0", "reply_authors": "7;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 66.25, 17.80975856096876 ], "wc_strengths_avg": [ 55.5, 24.95495942693556 ], "wc_weaknesses_avg": [ 96.0, 41.454794656348255 ], "wc_questions_avg": [ 198.75, 202.0598611797999 ], "wc_limitations_avg": [ 4.25, 3.6996621467371855 ], "wc_review_avg": [ 420.75, 166.27744134427857 ], "wc_reply_reviewers_avg": [ 132.75, 166.0924065091478 ], "wc_reply_authors_avg": [ 350.25, 606.6507953509993 ], "reply_reviewers_avg": [ 2.0, 2.345207879911715 ], "reply_authors_avg": [ 2.5, 2.598076211353316 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3455551861079863891&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "columbia.edu;cmu.edu;cs.cmu.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Columbia University;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.columbia.edu;https://www.cmu.edu", "aff_unique_abbr": "Columbia;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Trajectory Alignment: Understanding the Edge of Stability Phenomenon via Bifurcation Theory", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71740", "id": "PnJaA0A8Lr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e2a9256bd816ab9e082dfaa22f1f62a2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PnJaA0A8Lr", "openreview": "https://openreview.net/forum?id=PnJaA0A8Lr", "poster": "/media/PosterPDFs/NeurIPS%202023/71740.png?t=1701747960.6677878", "slides": "https://nips.cc/virtual/2023/poster/71740", "video": "https://nips.cc/virtual/2023/poster/71740", "author_site": "Minhak Song, Chulhee Yun", "tldr": "", "abstract": "Cohen et al. (2021) empirically study the evolution of the largest eigenvalue of the loss Hessian, also known as sharpness, along the gradient descent (GD) trajectory and observe the Edge of Stability (EoS) phenomenon. The sharpness increases at the early phase of training (referred to as progressive sharpening), and eventually saturates close to the threshold of $2 / \\text{(step size)}$. In this paper, we start by demonstrating through empirical studies that when the EoS phenomenon occurs, different GD trajectories (after a proper reparameterization) align on a specific bifurcation diagram independent of initialization. We then rigorously prove this trajectory alignment phenomenon for a two-layer fully-connected linear network and a single-neuron nonlinear network trained with a single data point. Our trajectory alignment analysis establishes both progressive sharpening and EoS phenomena, encompassing and extending recent findings in the literature.", "keywords": "non-convex optimization;trajectory alignment of GD;edge of stability;progressive sharpening;bifurcation theory", "primary_area": "", "supplementary_material": "", "author": "Minhak Song;Chulhee Yun", "authorids": "~Minhak_Song1;~Chulhee_Yun1", "gender": "M;M", "homepage": "https://songminhak.github.io;https://chulheeyun.github.io/", "dblp": ";138/0148.html", "google_scholar": "https://scholar.google.com/citations?hl=en;Ukl64ggAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Minhak_Song1;~Chulhee_Yun1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "position": "Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nsong2023trajectory,\ntitle={Trajectory Alignment: Understanding the Edge of Stability Phenomenon via Bifurcation Theory},\nauthor={Minhak Song and Chulhee Yun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PnJaA0A8Lr}\n}", "github": "", "project": "", "reviewers": "6YQZ;mAcJ;ft7H;Qm1U", "pdf_size": 34630109, "rating": "3;5;6;7", "confidence": "2;3;3;4", "soundness": "2;2;4;4", "novelty": "2;2;3;3", "presentation": "2;2;4;4", "wc_summary": "82;75;99;183", "wc_strengths": "64;62;99;29", "wc_weaknesses": "104;419;103;165", "wc_questions": "4;59;135;113", "wc_limitations": "1;10;22;9", "wc_review": "255;625;458;499", "wc_reply_reviewers": "0;100;84;39", "wc_reply_authors": "0;72;253;314", "reply_reviewers": "0;1;3;1", "reply_authors": "1;2;4;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 109.75, 43.18202751145434 ], "wc_strengths_avg": [ 63.5, 24.763884994079586 ], "wc_weaknesses_avg": [ 197.75, 130.18328425723482 ], "wc_questions_avg": [ 77.75, 50.77093164400275 ], "wc_limitations_avg": [ 10.5, 7.5 ], "wc_review_avg": [ 459.25, 133.01574154963765 ], "wc_reply_reviewers_avg": [ 55.75, 39.194227891361756 ], "wc_reply_authors_avg": [ 159.75, 128.168590145948 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9561828874675149, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6080852077325468863&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "kaist.ac.kr;kaist.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "FABind: Fast and Accurate Protein-Ligand Binding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71739", "id": "PnWakgg1RL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aee1de5f335558b546b7e58c380be087-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PnWakgg1RL", "openreview": "https://openreview.net/forum?id=PnWakgg1RL", "poster": "/media/PosterPDFs/NeurIPS%202023/71739.png?t=1701414288.5219138", "slides": "https://nips.cc/virtual/2023/poster/71739", "video": "https://nips.cc/virtual/2023/poster/71739", "author_site": "Qizhi Pei, Kaiyuan Gao, Lijun Wu, Jinhua Zhu, Yingce Xia, Shufang Xie, Tao Qin, Kun He, Tie-Yan Liu, Rui Yan", "tldr": "", "abstract": "Modeling the interaction between proteins and ligands and accurately predicting their binding structures is a critical yet challenging task in drug discovery. Recent advancements in deep learning have shown promise in addressing this challenge, with sampling-based and regression-based methods emerging as two prominent approaches. However, these methods have notable limitations. Sampling-based methods often suffer from low efficiency due to the need for generating multiple candidate structures for selection. On the other hand, regression-based methods offer fast predictions but may experience decreased accuracy. Additionally, the variation in protein sizes often requires external modules for selecting suitable binding pockets, further impacting efficiency. In this work, we propose FABind, an end-to-end model that combines pocket prediction and docking to achieve accurate and fast protein-ligand binding. FABind incorporates a unique ligand-informed pocket prediction module, which is also leveraged for docking pose estimation. The model further enhances the docking process by incrementally integrating the predicted pocket to optimize protein-ligand binding, reducing discrepancies between training and inference. Through extensive experiments on benchmark datasets, our proposed FABind demonstrates strong advantages in terms of effectiveness and efficiency compared to existing methods. Our code is available at https://github.com/QizhiPei/FABind.", "keywords": "protein-ligand docking", "primary_area": "", "supplementary_material": "/attachment/299fa65047dd461c1da04969c5ea8bd5a584a353.zip", "author": "Qizhi Pei;Kaiyuan Gao;Lijun Wu;Jinhua Zhu;Yingce Xia;Shufang Xie;Tao Qin;Kun He;Tie-Yan Liu;Rui Yan", "authorids": "~Qizhi_Pei1;~Kaiyuan_Gao1;~Lijun_Wu1;~Jinhua_Zhu1;~Yingce_Xia1;~Shufang_Xie1;~Tao_Qin1;~Kun_He1;~Tie-Yan_Liu1;~Rui_Yan2", "gender": ";M;M;M;M;M;M;F;M;M", "homepage": "https://qizhipei.github.io/;https://kygao.github.io;https://apeterswu.github.io/;https://github.com/teslacool;https://www.microsoft.com/en-us/research/people/yinxia/;;https://www.microsoft.com/en-us/research/people/taoqin/;http://faculty.hust.edu.cn/hekun/zh_CN/more/1411001/jsjjgd/index.htm;http://member.acm.org/~tieyanliu;https://gsai.ruc.edu.cn/english/ruiyan", "dblp": "322/9716;180/6731;68/1284-3;18/1965-1;http://dblp.uni-trier.de/pers/hd/x/Xia:Yingce;https://dblp.uni-trier.de/pid/163/2704-3;14/6841;59/1028-1;l/TieYanLiu;19/2405-1", "google_scholar": "sf3xGU8AAAAJ;Or77MPQAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=FvGy0LQAAAAJ;GS5wRxYAAAAJ;;Bl4SRU0AAAAJ;YTQnGJsAAAAJ;Nh832fgAAAAJ;eLw6g-UAAAAJ", "orcid": "0000-0002-7242-422X;0009-0002-8862-8320;0000-0002-3530-590X;0000-0003-2157-9077;;;;0000-0001-7627-4604;0000-0002-0476-8020;0000-0002-3356-6823", "linkedin": "%E5%90%AF%E6%99%BA-%E8%A3%B4-680192218/en?trk=people-guest_people_search-card;;lijun-wu-59340478/;;;;;;;", "or_profile": "~Qizhi_Pei1;~Kaiyuan_Gao1;~Lijun_Wu1;~Jinhua_Zhu1;~Yingce_Xia1;~Shufang_Xie1;~Tao_Qin1;~Kun_He1;~Tie-Yan_Liu1;~Rui_Yan2", "aff": "Microsoft;Huazhong University of Science and Technology;Microsoft Research;University of Science and Technology of China;Microsoft;Renmin University of China;;Huazhong University of Sceince and Technology;Microsoft;Renmin University of China", "aff_domain": "microsoft.com;hust.edu.cn;microsoft.com;ustc.edu.cn;microsoft.com;ruc.edu.cn;;hust.edu.cn;microsoft.com;ruc.edu.cn", "position": "Intern;PhD student;Researcher;PhD student;Researcher;PhD student;;Full Professor;Distinguished Scientist;Associate Professor", "bibtex": "@inproceedings{\npei2023fabind,\ntitle={{FAB}ind: Fast and Accurate Protein-Ligand Binding},\nauthor={Qizhi Pei and Kaiyuan Gao and Lijun Wu and Jinhua Zhu and Yingce Xia and Shufang Xie and Tao Qin and Kun He and Tie-Yan Liu and Rui Yan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PnWakgg1RL}\n}", "github": "", "project": "", "reviewers": "5BP6;oHVp;Vntv;N7PK;wgwB", "pdf_size": 1147424, "rating": "5;5;6;6;6", "confidence": "3;3;4;4;3", "soundness": "3;2;3;3;2", "novelty": "3;2;3;3;3", "presentation": "3;3;2;3;2", "wc_summary": "68;65;75;103;603", "wc_strengths": "59;23;44;35;59", "wc_weaknesses": "213;65;248;77;235", "wc_questions": "2;38;235;4;164", "wc_limitations": "1;8;22;28;45", "wc_review": "343;199;624;247;1106", "wc_reply_reviewers": "114;34;106;0;207", "wc_reply_authors": "714;443;111;0;336", "reply_reviewers": "2;1;1;0;1", "reply_authors": "3;2;2;1;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 182.8, 210.52923787445772 ], "wc_strengths_avg": [ 44.0, 13.942740046346701 ], "wc_weaknesses_avg": [ 167.6, 79.75362060746835 ], "wc_questions_avg": [ 88.6, 94.16496163648134 ], "wc_limitations_avg": [ 20.8, 15.458331087151679 ], "wc_review_avg": [ 503.8, 335.2010739839597 ], "wc_reply_reviewers_avg": [ 92.2, 71.73952885264859 ], "wc_reply_authors_avg": [ 320.8, 251.67391601038037 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.6666666666666665, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15362592394220999235&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "microsoft.com;hust.edu.cn;microsoft.com;ustc.edu.cn;microsoft.com;ruc.edu.cn;;hust.edu.cn;microsoft.com;ruc.edu.cn", "author_num": 10, "aff_unique_index": "0;1;0;2;0;3;1;0;3", "aff_unique_norm": "Microsoft;Huazhong University of Science and Technology;University of Science and Technology of China;Renmin University of China", "aff_unique_dep": "Microsoft Corporation;;;", "aff_unique_url": "https://www.microsoft.com;http://www.hust.edu.cn;http://www.ustc.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "Microsoft;HUST;USTC;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0;1;1;0;1", "aff_country_unique": "United States;China" }, { "title": "Goal Driven Discovery of Distributional Differences via Language Descriptions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71738", "id": "PnbCA4ylIc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7e810b2c75d69be186cadd2fe3febeab-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PnbCA4ylIc", "openreview": "https://openreview.net/forum?id=PnbCA4ylIc", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71738", "video": "https://nips.cc/virtual/2023/poster/71738", "author_site": "Ruiqi Zhong, Peter Zhang, Steve Li, Jinwoo Ahn, Dan Klein, Jacob Steinhardt", "tldr": "", "abstract": "Exploring large corpora can generate useful discoveries but is time-consuming for humans.\n We formulate a new task, D5, that automatically discovers differences between two large corpora in a goal-driven way. \n The task input is a problem comprising a user-specified research goal (\u201c*comparing the side effects of drug A and drug*\u201d) and a corpus pair (two large collections of patients' self-reported reactions after taking each drug). \n The output is a goal-related description (discovery) of how these corpora differ (patients taking drug A \u201c*mention feelings of paranoia*\u201d more often).\n We build a D5 system, and to quantitatively evaluate its performance, we 1) build a diagnostic benchmark, SynD5, to test whether it can recover known differences between two synthetic corpora, and 2) contribute a meta-dataset, OpenD5, aggregating 675 open-ended problems ranging across business, social sciences, humanities, machine learning, and health.\n With both synthetic and real datasets, we confirm that language models can leverage the user-specified goals to propose more relevant candidate discoveries, and they sometimes produce discoveries previously unknown to the authors, including demographic differences in discussion topics, political stances in speech, insights in commercial reviews, and error patterns in NLP models.\n Finally, we discuss the limitations of the current D5 system, which discovers correlation rather than causation and has the potential to reinforce societal biases when misused; therefore, practitioners should treat the outputs of our system with caution.", "keywords": "large language model;prompting;exploratory text analysis", "primary_area": "", "supplementary_material": "/attachment/6a1ec15a533474cc0287c79ad5c37b32c4a8302a.zip", "author": "Ruiqi Zhong;Peter Zhang;Steve Li;Jinwoo Ahn;Dan Klein;Jacob Steinhardt", "authorids": "~Ruiqi_Zhong1;~Peter_Zhang3;~Steve_Li1;~Jinwoo_Ahn3;~Dan_Klein1;~Jacob_Steinhardt1", "gender": "M;M;M;M;;", "homepage": "https://ruiqi-zhong.github.io;http://peterzha.ng/;http://www.steve-li.com/;https://jwahnn.github.io/;http://people.eecs.berkeley.edu/~klein/;", "dblp": "222/3024;;334/4389;;;35/10625", "google_scholar": "GskOShAAAAAJ;TyjRjlcAAAAJ;;hiLdPNcAAAAJ;;", "orcid": ";0000-0002-8271-0107;;;;", "linkedin": ";pjz/;steveshenli/;jinwoo-ahn-075034243;dan-klein/;", "or_profile": "~Ruiqi_Zhong1;~Peter_Zhang3;~Steve_Li1;~Jinwoo_Ahn3;~Dan_Klein1;~Jacob_Steinhardt1", "aff": "University of California, Berkeley;University of California, Berkeley;Harvard University;Boeing Research & Technology;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;harvard.edu;boeing.com;berkeley.edu;berkeley.edu", "position": "PhD student;Undergrad student;Undergrad student;Research Intern;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhong2023goal,\ntitle={Goal Driven Discovery of Distributional Differences via Language Descriptions},\nauthor={Ruiqi Zhong and Peter Zhang and Steve Li and Jinwoo Ahn and Dan Klein and Jacob Steinhardt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PnbCA4ylIc}\n}", "github": "", "project": "", "reviewers": "EmG8;vHRK;sfxq;cy6L;Eiwe", "pdf_size": 892540, "rating": "3;6;6;6;7", "confidence": "3;4;3;4;2", "soundness": "2;2;3;2;3", "novelty": "3;2;3;2;3", "presentation": "3;2;3;3;3", "wc_summary": "58;53;51;198;121", "wc_strengths": "39;13;29;183;78", "wc_weaknesses": "115;215;317;1115;126", "wc_questions": "68;114;29;875;1", "wc_limitations": "23;17;24;452;10", "wc_review": "303;412;450;2823;336", "wc_reply_reviewers": "65;22;178;283;34", "wc_reply_authors": "132;0;83;62;0", "reply_reviewers": "1;1;1;2;1", "reply_authors": "3;1;2;2;1", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 96.2, 57.17831756881274 ], "wc_strengths_avg": [ 68.4, 61.173850622631235 ], "wc_weaknesses_avg": [ 377.6, 375.8007982961186 ], "wc_questions_avg": [ 217.4, 330.9903926098158 ], "wc_limitations_avg": [ 105.2, 173.47207268030206 ], "wc_review_avg": [ 864.8, 980.4981183051807 ], "wc_reply_reviewers_avg": [ 116.4, 99.89314290780925 ], "wc_reply_authors_avg": [ 55.4, 50.6185736661949 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.11821656093586512, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16793703526588871704&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "berkeley.edu;berkeley.edu;harvard.edu;boeing.com;berkeley.edu;berkeley.edu", "author_num": 6, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "University of California, Berkeley;Harvard University;Boeing", "aff_unique_dep": ";;Research & Technology", "aff_unique_url": "https://www.berkeley.edu;https://www.harvard.edu;https://www.boeing.com/research-technology/", "aff_unique_abbr": "UC Berkeley;Harvard;Boeing R&T", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "What Knowledge Gets Distilled in Knowledge Distillation?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71737", "id": "Poj71ASubN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2433fec2144ccf5fea1c9c5ebdbc3924-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Poj71ASubN", "openreview": "https://openreview.net/forum?id=Poj71ASubN", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71737", "video": "https://nips.cc/virtual/2023/poster/71737", "author_site": "Utkarsh Ojha, Yuheng Li, Anirudh Sundara Rajan, Yingyu Liang, Yong Jae Lee", "tldr": "", "abstract": "Knowledge distillation aims to transfer useful information from a teacher network to a student network, with the primary goal of improving the student's performance for the task at hand. Over the years, there has a been a deluge of novel techniques and use cases of knowledge distillation. Yet, despite the various improvements, there seems to be a glaring gap in the community's fundamental understanding of the process. Specifically, what is the knowledge that gets distilled in knowledge distillation? In other words, in what ways does the student become similar to the teacher? Does it start to localize objects in the same way? Does it get fooled by the same adversarial samples? Does its data invariance properties become similar? Our work presents a comprehensive study to try to answer these questions. \nWe show that existing methods can indeed indirectly distill these properties beyond improving task performance. We further study why knowledge distillation might work this way, and show that our findings have practical implications as well.", "keywords": "knowledge distillation", "primary_area": "", "supplementary_material": "/attachment/4349e73509c35b22cbf53804752ce42b2427f3b2.zip", "author": "Utkarsh Ojha;Yuheng Li;Anirudh Sundara Rajan;Yingyu Liang;Yong Jae Lee", "authorids": "~Utkarsh_Ojha1;~Yuheng_Li1;~Anirudh_Sundara_Rajan1;~Yingyu_Liang1;~Yong_Jae_Lee2", "gender": "M;M;M;;", "homepage": "https://utkarshojha.github.io/;;https://anisundar18.github.io;;", "dblp": "194/5532;39/3954;369/5939;;", "google_scholar": "QGdSgfoAAAAJ;ZphbAXEAAAAJ;3k0yXxcAAAAJ;;", "orcid": ";;;;", "linkedin": "utkarsh-ojha-16a20b11b/;;anirudhsundar/;;", "or_profile": "~Utkarsh_Ojha1;~Yuheng_Li1;~Anirudh_Sundara_Rajan1;~Yingyu_Liang1;~Yong_Jae_Lee2", "aff": "University of Wisconsin - Madison;University of Wisconsin - Madison;Department of Computer Science, University of Wisconsin - Madison;;", "aff_domain": "wisc.edu;wisc.edu;cs.wisc.edu;;", "position": "PhD student;PhD student;MS student;;", "bibtex": "@inproceedings{\nojha2023what,\ntitle={What Knowledge Gets Distilled in Knowledge Distillation?},\nauthor={Utkarsh Ojha and Yuheng Li and Anirudh Sundara Rajan and Yingyu Liang and Yong Jae Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Poj71ASubN}\n}", "github": "", "project": "", "reviewers": "QC8K;r3A2;B1Ad;8Y8m", "pdf_size": 3707705, "rating": "5;6;6;7", "confidence": "4;3;4;3", "soundness": "2;2;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;4", "wc_summary": "92;102;79;193", "wc_strengths": "82;61;58;106", "wc_weaknesses": "165;157;152;226", "wc_questions": "26;214;2;85", "wc_limitations": "1;101;2;47", "wc_review": "366;635;293;657", "wc_reply_reviewers": "10;64;23;127", "wc_reply_authors": "48;17;44;105", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 116.5, 44.91380634058975 ], "wc_strengths_avg": [ 76.75, 19.253246479490155 ], "wc_weaknesses_avg": [ 175.0, 29.80771712157776 ], "wc_questions_avg": [ 81.75, 82.11082449957496 ], "wc_limitations_avg": [ 37.75, 40.97178907492325 ], "wc_review_avg": [ 487.75, 160.52939761925228 ], "wc_reply_reviewers_avg": [ 56.0, 45.57960070031329 ], "wc_reply_authors_avg": [ 53.5, 32.035136959282696 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15938132398195126726&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "wisc.edu;wisc.edu;cs.wisc.edu;;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Wisconsin-Madison", "aff_unique_dep": "", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW-Madison", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Spectral Algorithm for List-Decodable Covariance Estimation in Relative Frobenius Norm", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71736", "id": "PpI7XvOXkF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/98b2b307aa4aa323df2ba3a83460f25e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PpI7XvOXkF", "openreview": "https://openreview.net/forum?id=PpI7XvOXkF", "poster": "/media/PosterPDFs/NeurIPS%202023/71736.png?t=1702002542.2827203", "slides": "https://nips.cc/virtual/2023/poster/71736", "video": "https://nips.cc/virtual/2023/poster/71736", "author_site": "Ilias Diakonikolas, Daniel Kane, Jasper Lee, Ankit Pensia, Ankit Pensia, Thanasis Pittas", "tldr": "", "abstract": "We study the problem of list-decodable Gaussian covariance estimation. Given a multiset $T$ of $n$ points in $\\mathbb{R}^d$ such that an unknown $\\alpha<1/2$ fraction of points in $T$ are i.i.d. samples from an unknown Gaussian $\\mathcal{N}(\\mu, \\Sigma)$, the goal is to output a list of $O(1/\\alpha)$ hypotheses at least one of which is close to $\\Sigma$ in relative Frobenius norm. Our main result is a $\\mathrm{poly}(d,1/\\alpha)$ sample and time algorithm for this task that guarantees relative Frobenius norm error of $\\mathrm{poly}(1/\\alpha)$. Importantly, our algorithm relies purely on spectral techniques. As a corollary, we obtain an efficient spectral algorithm for robust partial clustering of Gaussian mixture models (GMMs) --- a key ingredient in the recent work of [BakDJKKV22] on robustly learning arbitrary GMMs. Combined with the other components of [BakDJKKV22], our new method yields the first Sum-of-Squares-free algorithm for robustly learning GMMs, resolving an open problem proposed by Vempala and Kothari. At the technical level, we develop a novel multi-filtering method for list-decodable covariance estimation that may be useful in other settings.", "keywords": "robust statistics;covariance estimation;list-decodable learning", "primary_area": "", "supplementary_material": "", "author": "Ilias Diakonikolas;Daniel Kane;Jasper C.H. Lee;Ankit Pensia;Thanasis Pittas", "authorids": "~Ilias_Diakonikolas1;~Daniel_Kane1;~Jasper_C.H._Lee1;~Ankit_Pensia1;~Thanasis_Pittas1", "gender": "M;M;M;M;M", "homepage": "http://www.iliasdiakonikolas.org/;http://cseweb.ucsd.edu/~dakane/;https://jasperchlee.github.io/;https://ankitp.net/;https://thanasispittas.github.io/", "dblp": "d/IliasDiakonikolas;52/6817;150/4950;213/7640;284/9676", "google_scholar": "Vb3FLmkAAAAJ;https://scholar.google.com.tw/citations?user=DulpV-cAAAAJ;z0Y4snAAAAAJ;u1Qs7YIAAAAJ;pkIOtwcAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Ilias_Diakonikolas1;~Daniel_Kane1;~Jasper_C.H._Lee1;~Ankit_Pensia1;~Thanasis_Pittas1", "aff": "University of Wisconsin, Madison;University of California, San Diego;University of Wisconsin - Madison;University of Wisconsin, Madison;University of Wisconsin, Madison", "aff_domain": "wisc.edu;ucsd.edu;wisc.edu;wisc.edu;wisc.edu", "position": "Associate Professor;Assistant Professor;Postdoc;PhD student;PhD student", "bibtex": "@inproceedings{\ndiakonikolas2023a,\ntitle={A Spectral Algorithm for List-Decodable Covariance Estimation in Relative Frobenius Norm},\nauthor={Ilias Diakonikolas and Daniel Kane and Jasper C.H. Lee and Ankit Pensia and Thanasis Pittas},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PpI7XvOXkF}\n}", "github": "", "project": "", "reviewers": "WZLc;64Ly;Wync;ymEk", "pdf_size": 507289, "rating": "7;7;8;8", "confidence": "3;4;4;3", "soundness": "3;4;4;4", "novelty": "3;4;4;4", "presentation": "3;3;4;4", "wc_summary": "220;233;77;322", "wc_strengths": "105;34;70;113", "wc_weaknesses": "150;44;37;91", "wc_questions": "39;82;1;63", "wc_limitations": "15;6;1;5", "wc_review": "529;399;186;594", "wc_reply_reviewers": "9;23;0;9", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 213.0, 87.7866732482784 ], "wc_strengths_avg": [ 80.5, 31.34086788842964 ], "wc_weaknesses_avg": [ 80.5, 45.18019477602991 ], "wc_questions_avg": [ 46.25, 30.243801017729236 ], "wc_limitations_avg": [ 6.75, 5.11737237261468 ], "wc_review_avg": [ 427.0, 155.85089027657173 ], "wc_reply_reviewers_avg": [ 10.25, 8.227241335952167 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1831278244355117686&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "wisc.edu;ucsd.edu;wisc.edu;wisc.edu;wisc.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Wisconsin;University of California, San Diego;University of Wisconsin-Madison", "aff_unique_dep": ";;", "aff_unique_url": "https://www.wisc.edu;https://www.ucsd.edu;https://www.wisc.edu", "aff_unique_abbr": "UW;UCSD;UW-Madison", "aff_campus_unique_index": "0;1;0;0;0", "aff_campus_unique": "Madison;San Diego", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Metis: Understanding and Enhancing In-Network Regular Expressions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71735", "id": "Pplq1TRnma", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f54bd48aba0dff7acdac86123188f1b6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Pplq1TRnma", "openreview": "https://openreview.net/forum?id=Pplq1TRnma", "poster": "/media/PosterPDFs/NeurIPS%202023/71735.png?t=1701380968.6332808", "slides": "https://nips.cc/virtual/2023/poster/71735", "video": "https://nips.cc/virtual/2023/poster/71735", "author_site": "Zhengxin Zhang, Yucheng Huang, Guanglin Duan, Qing Li, Dan Zhao, Yong Jiang, Lianbo Ma, Xi Xiao, Hengyang Xu", "tldr": "", "abstract": "Regular expressions (REs) offer one-shot solutions for many networking tasks, e.g., network intrusion detection. However, REs purely rely on expert knowledge and cannot utilize labeled data for better accuracy. Today, neural networks (NNs) have shown superior accuracy and flexibility, thanks to their ability to learn from rich labeled data. Nevertheless, NNs are often incompetent in cold-start scenarios and too complex for deployment on network devices. In this paper, we propose Metis, a general framework that converts REs to network device affordable models for superior accuracy and throughput by taking advantage of REs' expert knowledge and NNs' learning ability. In Metis, we convert REs to byte-level recurrent neural networks (BRNNs) without training. The BRNNs preserve expert knowledge from REs and offer adequate accuracy in cold-start scenarios. When rich labeled data is available, the performance of BRNNs can be improved by training. Furthermore, we design a semi-supervised knowledge distillation to transform the BRNNs into pooling soft random forests (PSRFs) that can be deployed on network devices. To the best of our knowledge, this is the first method to employ model inference as an alternative to RE matching in network scenarios. We collect network traffic data on our campus for three weeks and evaluate Metis on them. Experimental results show that Metis is more accurate than original REs and other baselines, achieving superior throughput when deployed on network devices.", "keywords": "Network Security; Regular Expression; Knowledge Distillation; Machine Learning; Programmable Switch", "primary_area": "", "supplementary_material": "", "author": "Zhengxin Zhang;Yucheng Huang;Guanglin Duan;Qing Li;Dan Zhao;Yong Jiang;Lianbo Ma;Xi Xiao;Hengyang Xu", "authorids": "~Zhengxin_Zhang2;~Yucheng_Huang3;~Guanglin_Duan1;~Qing_Li15;~Dan_Zhao2;~Yong_Jiang3;~Lianbo_Ma1;~Xi_Xiao1;piresxu@tencent.com", "gender": "M;M;M;M;F;M;M;M;", "homepage": "https://youarespecialtome.github.io/;;;https://smartinternet.group/qing-li/;;;http://faculty.neu.edu.cn/swc/malb/;https://www.sigs.tsinghua.edu.cn/xx_en/main.htm;", "dblp": ";;https://dblp.org/rec/conf/infocom/XieLDDJD22.html;181/2689-6;10/3489-3;74/1552-1.html;144/0830;;", "google_scholar": "dGb05PMAAAAJ;https://scholar.google.nl/citations?user=yFybDEsAAAAJ;;54AuaywAAAAJ;;;https://scholar.google.com.sg/citations?hl=zh-CN;;", "orcid": ";;;0000-0002-6071-473X;0000-0001-9016-5594;;;;", "linkedin": ";;guanglin-duan-08a748212/;;;;;;", "or_profile": "~Zhengxin_Zhang2;~Yucheng_Huang3;~Guanglin_Duan1;~Qing_Li15;~Dan_Zhao2;~Yong_Jiang3;~Lianbo_Ma1;~Xi_Xiao1;piresxu@tencent.com", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Pengcheng Laboratory;Peng Cheng Laborotary;Tsinghua University;Northeastern University;Shenzhen International Graduate School, Tsinghua University;", "aff_domain": "tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;pcl.ac.cn;pcl.ac.cn;tsinghua.edu.cn;neu.edu.cn;tsinghua.edu.cn;", "position": "MS student;MS student;MS student;Associate Professor;Researcher;Full Professor;Full Professor;Associate Professor;", "bibtex": "@inproceedings{\nzhang2023metis,\ntitle={Metis: Understanding and Enhancing In-Network Regular Expressions},\nauthor={Zhengxin Zhang and Yucheng Huang and Guanglin Duan and Qing Li and Dan Zhao and Yong Jiang and Lianbo Ma and Xi Xiao and Hengyang Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Pplq1TRnma}\n}", "github": "", "project": "", "reviewers": "Uzg2;38Wo;qD4Y;W2tD;wpui;Jimb", "pdf_size": 643662, "rating": "3;4;6;6;6;7", "confidence": "5;2;4;3;3;3", "soundness": "2;2;3;3;3;4", "novelty": "2;2;2;4;3;3", "presentation": "2;2;3;3;3;3", "wc_summary": "89;50;127;102;205;53", "wc_strengths": "109;56;173;85;82;52", "wc_weaknesses": "56;64;49;254;206;115", "wc_questions": "29;52;66;17;177;76", "wc_limitations": "13;23;15;9;1;99", "wc_review": "296;245;430;467;671;395", "wc_reply_reviewers": "881;0;0;89;113;0", "wc_reply_authors": "3237;876;262;405;1705;0", "reply_reviewers": "2;0;0;2;1;0", "reply_authors": "7;3;2;2;5;1", "rating_avg": [ 5.333333333333333, 1.3743685418725535 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820635 ], "soundness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "novelty_avg": [ 2.6666666666666665, 0.7453559924999298 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 104.33333333333333, 52.40122984137766 ], "wc_strengths_avg": [ 92.83333333333333, 40.60138202354967 ], "wc_weaknesses_avg": [ 124.0, 79.11384202527394 ], "wc_questions_avg": [ 69.5, 52.14323222304757 ], "wc_limitations_avg": [ 26.666666666666668, 33.01346526629534 ], "wc_review_avg": [ 417.3333333333333, 136.56093471007327 ], "wc_reply_reviewers_avg": [ 180.5, 316.58845525382003 ], "wc_reply_authors_avg": [ 1080.8333333333333, 1108.755671412277 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.8975274678557507 ], "reply_authors_avg": [ 3.3333333333333335, 2.0548046676563256 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.3429971702850176, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12401913916434328556&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;pcl.ac.cn;pcl.ac.cn;tsinghua.edu.cn;neu.edu.cn;tsinghua.edu.cn;", "author_num": 9, "aff_unique_index": "0;0;0;1;1;0;2;0", "aff_unique_norm": "Tsinghua University;Pengcheng Laboratory;Northeastern University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;;https://www.northeastern.edu", "aff_unique_abbr": "THU;;NEU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "The Shaped Transformer: Attention Models in the Infinite Depth-and-Width Limit", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71734", "id": "PqfPjS9JRX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aa31dc84098add7dd2ffdd20646f2043-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PqfPjS9JRX", "openreview": "https://openreview.net/forum?id=PqfPjS9JRX", "poster": "/media/PosterPDFs/NeurIPS%202023/71734.png?t=1697124320.6294754", "slides": "https://nips.cc/virtual/2023/poster/71734", "video": "https://nips.cc/virtual/2023/poster/71734", "author_site": "Lorenzo Noci, Chuning Li, Mufan Li, Bobby He, Thomas Hofmann, Chris Maddison, Dan Roy", "tldr": "", "abstract": "In deep learning theory, the covariance matrix of the representations serves as a\nproxy to examine the network\u2019s trainability. Motivated by the success of Transform-\ners, we study the covariance matrix of a modified Softmax-based attention model\nwith skip connections in the proportional limit of infinite-depth-and-width. We\nshow that at initialization the limiting distribution can be described by a stochastic\ndifferential equation (SDE) indexed by the depth-to-width ratio. To achieve a\nwell-defined stochastic limit, the Transformer\u2019s attention mechanism is modified\nby centering the Softmax output at identity, and scaling the Softmax logits by a\nwidth-dependent temperature parameter. We examine the stability of the network\nthrough the corresponding SDE, showing how the scale of both the drift and diffu-\nsion can be elegantly controlled with the aid of residual connections. The existence\nof a stable SDE implies that the covariance structure is well-behaved, even for very\nlarge depth and width, thus preventing the notorious issues of rank degeneracy\nin deep attention models. Finally, we show, through simulations, that the SDE\nprovides a surprisingly good description of the corresponding finite-size model.\nWe coin the name shaped Transformer for these architectural modifications.", "keywords": "Deep Learning Theory;Covariance SDE;Attention Mechanism;Infinite-Depth-and-Width;Scaling Limit", "primary_area": "", "supplementary_material": "", "author": "Lorenzo Noci;Chuning Li;Mufan Bill Li;Bobby He;Thomas Hofmann;Chris J. Maddison;Daniel M. Roy", "authorids": "~Lorenzo_Noci1;~Chuning_Li1;~Mufan_Bill_Li1;~Bobby_He1;~Thomas_Hofmann1;~Chris_J._Maddison1;~Daniel_M._Roy1", "gender": "M;F;;M;M;M;M", "homepage": ";https://www.cs.toronto.edu/~chuning/;http://csml.stats.ox.ac.uk/people/he/;http://www.da.inf.ethz.ch/;http://www.cs.toronto.edu/~cmaddis/;http://danroy.org;https://mufan-li.github.io/", "dblp": "268/6839;;270/3685;h/ThHofmann;139/1388;04/2068;277/1227", "google_scholar": ";;;T3hAyLkAAAAJ;https://scholar.google.ca/citations?user=WjCG3owAAAAJ;https://scholar.google.ca/citations?user=vA6ZQ_AAAAAJ;9dSlc_cAAAAJ", "orcid": ";;;;;;", "linkedin": "lorenzo-noci-97aa59130;;;thomas-hofmann-1ab2402/;;;", "or_profile": "~Lorenzo_Noci1;~Chuning_Li1;~Bobby_He1;~Thomas_Hofmann1;~Chris_J_Maddison1;~Daniel_M_Roy1;~Mufan_Li1", "aff": "ETHZ - ETH Zurich;Department of Computer Science;University of Oxford;Swiss Federal Institute of Technology;Google;University of Toronto;University of Toronto", "aff_domain": "ethz.ch;cs.toronto.edu;ox.ac.uk;ethz.ch;google.com;utoronto.ca;utstat.toronto.edu", "position": "PhD student;MS student;PhD student;Full Professor;Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\nnoci2023the,\ntitle={The Shaped Transformer: Attention Models in the Infinite Depth-and-Width Limit},\nauthor={Lorenzo Noci and Chuning Li and Mufan Bill Li and Bobby He and Thomas Hofmann and Chris J. Maddison and Daniel M. Roy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PqfPjS9JRX}\n}", "github": "", "project": "", "reviewers": "h41x;5C73;4kLr;J5pd", "pdf_size": 960252, "rating": "6;6;7;7", "confidence": "3;3;4;4", "soundness": "3;4;4;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "92;110;63;90", "wc_strengths": "65;45;81;105", "wc_weaknesses": "171;200;48;56", "wc_questions": "124;22;3;128", "wc_limitations": "52;4;18;11", "wc_review": "504;381;213;390", "wc_reply_reviewers": "148;45;10;26", "wc_reply_authors": "164;15;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 88.75, 16.78354849249705 ], "wc_strengths_avg": [ 74.0, 21.97726097583591 ], "wc_weaknesses_avg": [ 118.75, 67.5920668421968 ], "wc_questions_avg": [ 69.25, 57.16369039871376 ], "wc_limitations_avg": [ 21.25, 18.430613120566555 ], "wc_review_avg": [ 372.0, 103.81473883798967 ], "wc_reply_reviewers_avg": [ 57.25, 53.839460435632155 ], "wc_reply_authors_avg": [ 44.75, 69.12081813751918 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13665086996817617663&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "email": "ethz.ch;cs.toronto.edu;ox.ac.uk;ethz.ch;google.com;utoronto.ca;utstat.toronto.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;4;5;5", "aff_unique_norm": "ETH Zurich;Unknown Institution;University of Oxford;Swiss Federal Institute of Technology;Google;University of Toronto", "aff_unique_dep": ";Department of Computer Science;;;Google;", "aff_unique_url": "https://www.ethz.ch;;https://www.ox.ac.uk;https://www.ethz.ch;https://www.google.com;https://www.utoronto.ca", "aff_unique_abbr": "ETHZ;;Oxford;ETH Zurich;Google;U of T", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;2;0;3;4;4", "aff_country_unique": "Switzerland;;United Kingdom;United States;Canada" }, { "title": "Contextually Affinitive Neighborhood Refinery for Deep Clustering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71733", "id": "Psj0jHocm1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/123cfe7d8b7702ac97aaf4468fc05fa5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Psj0jHocm1", "openreview": "https://openreview.net/forum?id=Psj0jHocm1", "poster": "/media/PosterPDFs/NeurIPS%202023/71733.png?t=1699514813.668714", "slides": "https://nips.cc/virtual/2023/poster/71733", "video": "https://nips.cc/virtual/2023/poster/71733", "author_site": "Chunlin Yu, Ye Shi, Jingya Wang", "tldr": "", "abstract": "Previous endeavors in self-supervised learning have enlightened the research of deep clustering from an instance discrimination perspective. Built upon this foundation, recent studies further highlight the importance of grouping semantically similar instances. One effective method to achieve this is by promoting the semantic structure preserved by neighborhood consistency. However, the samples in the local neighborhood may be limited due to their close proximity to each other,\n which may not provide substantial and diverse supervision signals. Inspired by the versatile re-ranking methods in the context of image retrieval, we propose to employ an efficient online re-ranking process to mine more informative neighbors in a Contextually Affinitive (ConAff) Neighborhood, and then encourage the cross-view neighborhood consistency. To further mitigate the intrinsic neighborhood noises near cluster boundaries, we propose a progressively relaxed boundary filtering strategy to circumvent the issues brought by noisy neighbors. Our method can be easily integrated into the generic self-supervised frameworks and outperforms the state-of-the-art methods on several popular benchmarks.", "keywords": "Deep Clustering;Self-supervised learning;re-ranking", "primary_area": "", "supplementary_material": "/attachment/9bd6f652741bf330a6dec06eab93f4fdf2f8d24d.pdf", "author": "Chunlin Yu;Ye Shi;Jingya Wang", "authorids": "~Chunlin_Yu1;~Ye_Shi1;~Jingya_Wang3", "gender": ";M;F", "homepage": "https://cly234.github.io/;http://faculty.sist.shanghaitech.edu.cn/faculty/shiye;https://faculty.sist.shanghaitech.edu.cn/faculty/wangjingya/", "dblp": "242/1326;34/11191-1;", "google_scholar": ";gMqbZPUAAAAJ;https://scholar.google.com.au/citations?user=vmvJV_IAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Chunlin_Yu1;~Ye_Shi1;~Jingya_Wang3", "aff": "ShanghaiTech University;ShanghaiTech University;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "position": "MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nyu2023contextually,\ntitle={Contextually Affinitive Neighborhood Refinery for Deep Clustering},\nauthor={Chunlin Yu and Ye Shi and Jingya Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Psj0jHocm1}\n}", "github": "", "project": "", "reviewers": "tqX4;7yhr;Prtf;vt5R;cuTM", "pdf_size": 626317, "rating": "4;6;6;7;7", "confidence": "4;5;3;4;5", "soundness": "3;3;2;4;4", "novelty": "2;2;3;3;4", "presentation": "3;3;2;4;3", "wc_summary": "149;117;47;100;80", "wc_strengths": "113;70;18;50;131", "wc_weaknesses": "309;372;53;383;70", "wc_questions": "135;17;2;33;36", "wc_limitations": "97;25;13;14;33", "wc_review": "803;601;133;580;350", "wc_reply_reviewers": "172;0;0;127;0", "wc_reply_authors": "862;0;0;0;0", "reply_reviewers": "1;0;0;1;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 98.6, 34.319673658122106 ], "wc_strengths_avg": [ 76.4, 41.15628749049165 ], "wc_weaknesses_avg": [ 237.4, 145.92408985496536 ], "wc_questions_avg": [ 44.6, 46.81281875725921 ], "wc_limitations_avg": [ 36.4, 31.187176852033275 ], "wc_review_avg": [ 493.4, 230.38281185887112 ], "wc_reply_reviewers_avg": [ 59.8, 74.6093827879577 ], "wc_reply_authors_avg": [ 172.4, 344.8 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.24397501823713333, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11332589122390480514&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "ShanghaiTech University", "aff_unique_dep": "", "aff_unique_url": "https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Interpretable Graph Networks Formulate Universal Algebra Conjectures", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71732", "id": "Psnph85KYc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2b2011a7d5396faf5899863d896a3c24-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Psnph85KYc", "openreview": "https://openreview.net/forum?id=Psnph85KYc", "poster": "/media/PosterPDFs/NeurIPS%202023/71732.png?t=1701769523.544931", "slides": "https://nips.cc/virtual/2023/poster/71732", "video": "https://nips.cc/virtual/2023/poster/71732", "author_site": "Francesco Giannini, Stefano Fioravanti, Oguzhan Keskin, Alisia Lupidi, Alisia Lupidi, Lucie Charlotte Magister, Pietro Li\u00f3, Pietro Barbiero", "tldr": "", "abstract": "The rise of Artificial Intelligence (AI) recently empowered researchers to investigate hard mathematical problems which eluded traditional approaches for decades. Yet, the use of AI in Universal Algebra (UA)---one of the fields laying the foundations of modern mathematics---is still completely unexplored. \nThis work proposes the first use of AI to investigate UA's conjectures with an equivalent equational and topological characterization. While topological representations would enable the analysis of such properties using graph neural networks, the limited transparency and brittle explainability of these models hinder their straightforward use to empirically validate existing conjectures or to formulate new ones. \nTo bridge these gaps, we propose a general algorithm generating AI-ready datasets based on UA's conjectures, and introduce a novel neural layer to build fully interpretable graph networks. The results of our experiments demonstrate that interpretable graph networks: (i) enhance interpretability without sacrificing task accuracy, (ii) strongly generalize when predicting universal algebra's properties, (iii) generate simple explanations that empirically validate existing conjectures, and (iv) identify subgraphs suggesting the formulation of novel conjectures.", "keywords": "universal algebra;interpretability;graph neural networks;concept-based models", "primary_area": "", "supplementary_material": "/attachment/684502d7476a0c1f19c548f735444fd347809bde.pdf", "author": "Francesco Giannini;Stefano Fioravanti;Oguzhan Keskin;Alisia Maria Lupidi;Lucie Charlotte Magister;Pietro Lio;Pietro Barbiero", "authorids": "~Francesco_Giannini1;~Stefano_Fioravanti1;~Oguzhan_Keskin1;~Alisia_Maria_Lupidi1;~Lucie_Charlotte_Magister1;~Pietro_Lio1;~Pietro_Barbiero1", "gender": "M;M;;F;F;M;M", "homepage": "https://www.francescogiannini.eu/;https://sailab.diism.unisi.it/people/stefano-fioravanti/;;https://www.linkedin.com/in/alisia-maria-lupidi/;;https://www.cst.cam.ac.uk/people/pl219;http://www.pietrobarbiero.eu/", "dblp": "198/0854;47/3460;332/1039;;298/1032;l/PietroLio.html;238/7860", "google_scholar": "RO8aInMAAAAJ;GOMJJjcAAAAJ;xOl-Y_sAAAAJ;5xzdzq4AAAAJ;do6o-rYAAAAJ;https://scholar.google.co.uk/citations?user=3YrWf7EAAAAJ;https://scholar.google.it/citations?user=4gbToQoAAAAJ", "orcid": "0000-0001-8492-8110;0000-0001-6918-1805;;;0000-0003-3499-5475;0000-0002-0540-5053;0000-0003-3155-2564", "linkedin": "https://www.linkedin.com/search/results/all/?fetchDeterministicClustersOnly=true&heroEntityKey=urn%3Ali%3Afsd_profile%3AACoAAEZY56YBnC1EDCTXy7QNDbkYThgd6vpD6i8&keywords=francesco%20giannini&origin=RICH_QUERY_SUGGESTION&position=0&searchId=95dc79fd-e2ea-4d21-b3dc-7ad787ee929e&sid=JQw&spellCorrectionEnabled=false;;oguzhan-keskin/;;;;", "or_profile": "~Francesco_Giannini1;~Stefano_Fioravanti1;~Oguzhan_Keskin1;~Alisia_Maria_Lupidi1;~Lucie_Charlotte_Magister1;~Pietro_Lio1;~Pietro_Barbiero1", "aff": "CINI;University of Siena;University of Cambridge;University of Cambridge;Google;University of Cambridge;University of Cambridge", "aff_domain": "consorzio-cini.it;unisi.it;cam.ac.uk;cam.ac.uk;google.com;cam.ac.uk;cam.ac.uk", "position": "Researcher;Postdoc;MS student;MS student;Research Intern;Full Professor;PhD student", "bibtex": "@inproceedings{\ngiannini2023interpretable,\ntitle={Interpretable Graph Networks Formulate Universal Algebra Conjectures},\nauthor={Francesco Giannini and Stefano Fioravanti and Oguzhan Keskin and Alisia Maria Lupidi and Lucie Charlotte Magister and Pietro Lio and Pietro Barbiero},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Psnph85KYc}\n}", "github": "", "project": "", "reviewers": "3uvy;LfMh;swnT;AeQ9", "pdf_size": 932230, "rating": "4;5;6;6", "confidence": "3;2;3;3", "soundness": "3;3;3;3", "novelty": "3;2;3;2", "presentation": "2;2;3;3", "wc_summary": "57;136;127;105", "wc_strengths": "84;88;46;58", "wc_weaknesses": "157;109;14;640", "wc_questions": "31;209;1;78", "wc_limitations": "16;25;101;24", "wc_review": "345;567;289;905", "wc_reply_reviewers": "50;144;0;220", "wc_reply_authors": "208;250;0;574", "reply_reviewers": "1;2;0;2", "reply_authors": "2;2;1;3", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 106.25, 30.589009464185008 ], "wc_strengths_avg": [ 69.0, 17.578395831246947 ], "wc_weaknesses_avg": [ 230.0, 242.24264694722933 ], "wc_questions_avg": [ 79.75, 79.50904036648915 ], "wc_limitations_avg": [ 41.5, 34.528973341239094 ], "wc_review_avg": [ 526.5, 241.99741734159065 ], "wc_reply_reviewers_avg": [ 103.5, 84.83366077212511 ], "wc_reply_authors_avg": [ 258.0, 205.53831759552767 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5606622126446046149&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "consorzio-cini.it;unisi.it;cam.ac.uk;cam.ac.uk;google.com;cam.ac.uk;cam.ac.uk", "author_num": 7, "aff_unique_index": "0;1;2;2;3;2;2", "aff_unique_norm": "Consorzio Interuniversitario Nazionale per l'Informatica;University of Siena;University of Cambridge;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.cini.it;https://www.unisi.it;https://www.cam.ac.uk;https://www.google.com", "aff_unique_abbr": "CINI;UniSi;Cambridge;Google", "aff_campus_unique_index": "1;1;2;1;1", "aff_campus_unique": ";Cambridge;Mountain View", "aff_country_unique_index": "0;0;1;1;2;1;1", "aff_country_unique": "Italy;United Kingdom;United States" }, { "title": "Understanding Few-Shot Learning: Measuring Task Relatedness and Adaptation Difficulty via Attributes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71731", "id": "Pvgxecj5aS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3df38ca67befaed9c03b95ffee07d9f8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Pvgxecj5aS", "openreview": "https://openreview.net/forum?id=Pvgxecj5aS", "poster": "/media/PosterPDFs/NeurIPS%202023/71731.png?t=1701420512.658321", "slides": "https://nips.cc/virtual/2023/poster/71731", "video": "https://nips.cc/virtual/2023/poster/71731", "author_site": "Minyang Hu, Hong Chang, Zong Guo, Bingpeng MA, Shiguang Shan, Xilin Chen", "tldr": "", "abstract": "Few-shot learning (FSL) aims to learn novel tasks with very few labeled samples by leveraging experience from \\emph{related} training tasks.\n In this paper, we try to understand FSL by exploring two key questions:\n (1) How to quantify the relationship between \\emph{ training} and \\emph{novel} tasks?\n (2) How does the relationship affect the \\emph{adaptation difficulty} on novel tasks for different models?\n To answer the first question, we propose Task Attribute Distance (TAD) as a metric to quantify the task relatedness via attributes.\n Unlike other metrics, TAD is independent of models, making it applicable to different FSL models.\n To address the second question, we utilize TAD metric to establish a theoretical connection between task relatedness and task adaptation difficulty.\n By deriving the generalization error bound on a novel task, we discover how TAD measures the adaptation difficulty on novel tasks for different models.\n To validate our theoretical results, we conduct experiments on three benchmarks.\n Our experimental results confirm that TAD metric effectively quantifies the task relatedness and reflects the adaptation difficulty on novel tasks for various FSL methods, even if some of them do not learn attributes explicitly or human-annotated attributes are not provided.\n Our code is available at \n \\href{https://github.com/hu-my/TaskAttributeDistance}{https://github.com/hu-my/TaskAttributeDistance}.", "keywords": "Few-shot Learning;Meta-Learning;Task Relatedness;Task Adaptation Difficulty", "primary_area": "", "supplementary_material": "/attachment/dc12591d19c4ab9617c2343529a62301958e1957.zip", "author": "Minyang Hu;Hong Chang;Zong Guo;Bingpeng Ma;Shiguang Shan;Xilin CHEN", "authorids": "~Minyang_Hu1;~Hong_Chang1;~Zong_Guo2;~Bingpeng_Ma1;~Shiguang_Shan2;~Xilin_CHEN2", "gender": "F;M;M;M;M;M", "homepage": ";;http://people.ucas.edu.cn/~bpma;http://vipl.ict.ac.cn/people/sgshan/;;http://vipl.ict.ac.cn/people/_xlchen/", "dblp": ";327/8757;62/1822;s/ShiguangShan;325/1940;c/XilinChen", "google_scholar": "LX6MnNsAAAAJ;;;https://scholar.google.com.tw/citations?user=Vkzd7MIAAAAJ;6Saa1ugAAAAJ;vVx2v20AAAAJ", "orcid": ";;0000-0001-8984-205X;0000-0002-8348-392X;;0000-0003-3024-4404", "linkedin": ";;;;;", "or_profile": "~Hong_Chang1;~Zong_Guo2;~Bingpeng_Ma1;~Shiguang_Shan2;~Hu_Minyang1;~Xilin_Chen4", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;, Chinese Academy of Sciences;Institute of Computing Technology", "aff_domain": "ict.ac.cn;ict.ac.cn;ucas.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "Full Professor;PhD student;Full Professor;Full Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nhu2023understanding,\ntitle={Understanding Few-Shot Learning: Measuring Task Relatedness and Adaptation Difficulty via Attributes},\nauthor={Minyang Hu and Hong Chang and Zong Guo and Bingpeng Ma and Shiguang Shan and Xilin CHEN},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Pvgxecj5aS}\n}", "github": "", "project": "", "reviewers": "aTLp;b4dX;PSzT;mMtZ", "pdf_size": 446316, "rating": "4;6;6;7", "confidence": "3;5;5;3", "soundness": "3;3;4;3", "novelty": "2;2;3;3", "presentation": "2;4;4;3", "wc_summary": "105;190;97;67", "wc_strengths": "148;93;98;15", "wc_weaknesses": "185;195;148;139", "wc_questions": "81;34;19;83", "wc_limitations": "14;26;7;55", "wc_review": "533;538;369;359", "wc_reply_reviewers": "186;147;63;21", "wc_reply_authors": "443;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 114.75, 45.6966902521397 ], "wc_strengths_avg": [ 88.5, 47.573627147822144 ], "wc_weaknesses_avg": [ 166.75, 23.731571797923543 ], "wc_questions_avg": [ 54.25, 28.261059781968545 ], "wc_limitations_avg": [ 25.5, 18.33712082089225 ], "wc_review_avg": [ 449.75, 85.8410595228181 ], "wc_reply_reviewers_avg": [ 104.25, 65.46516249120596 ], "wc_reply_authors_avg": [ 110.75, 191.82462693825315 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.2294157338705618, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13902239151436690159&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ict.ac.cn;ict.ac.cn;ucas.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;0;2", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Computing Technology", "aff_unique_dep": "Institute of Computing Technology;;", "aff_unique_url": "http://www.ict.ac.cn;http://www.ucas.ac.cn;http://www.ict.ac.cn", "aff_unique_abbr": "CAS;UCAS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning Exponential Families from Truncated Samples", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71730", "id": "PxcWJqO3qj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6d5f304fb4ed0243851e41699dca4287-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PxcWJqO3qj", "openreview": "https://openreview.net/forum?id=PxcWJqO3qj", "poster": "/media/PosterPDFs/NeurIPS%202023/71730.png?t=1699938758.3666668", "slides": "https://nips.cc/virtual/2023/poster/71730", "video": "https://nips.cc/virtual/2023/poster/71730", "author_site": "Jane Lee, Andre Wibisono, Emmanouil Zampetakis", "tldr": "", "abstract": "Missing data problems have many manifestations across many scientific fields. A fundamental type of missing data problem arises when samples are \\textit{truncated}, i.e., samples that lie in a subset of the support are not observed. Statistical estimation from truncated samples is a classical problem in statistics which dates back to Galton, Pearson, and Fisher. A recent line of work provides the first efficient estimation algorithms for the parameters of a Gaussian distribution and for linear regression with Gaussian noise.\n\nIn this paper we generalize these results to log-concave exponential families. We provide an estimation algorithm that shows that \\textit{extrapolation} is possible for a much larger class of distributions while it maintains a polynomial sample and time complexity on average. Our algorithm is based on Projected Stochastic Gradient Descent and is not only applicable in a more general setting but is also simpler and more efficient than recent algorithms. Our work also has interesting implications for learning general log-concave distributions and sampling given only access to truncated data.", "keywords": "truncated statistics;robustness;exponential families;extrapolation", "primary_area": "", "supplementary_material": "/attachment/e936f40d0e979f873c82b43fe6f4ca4eb294233f.pdf", "author": "Jane Lee;Andre Wibisono;Manolis Zampetakis", "authorids": "~Jane_Lee1;~Andre_Wibisono1;~Manolis_Zampetakis2", "gender": ";M;M", "homepage": "https://janehjlee.github.io;http://www.cs.yale.edu/homes/wibisono/;https://mzampet.com/", "dblp": "245/7420;64/10962;", "google_scholar": "4PgBYBkAAAAJ;;", "orcid": ";;", "linkedin": "janehjlee/;;", "or_profile": "~Jane_Lee1;~Andre_Wibisono1;~Manolis_Zampetakis2", "aff": "Yale University;Yale University;Yale University", "aff_domain": "yale.edu;yale.edu;yale.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nlee2023learning,\ntitle={Learning Exponential Families from Truncated Samples},\nauthor={Jane Lee and Andre Wibisono and Manolis Zampetakis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PxcWJqO3qj}\n}", "github": "", "project": "", "reviewers": "SUu1;UJWe;P6iL;Dgrb", "pdf_size": 638589, "rating": "6;6;6;7", "confidence": "3;3;3;1", "soundness": "4;3;3;3", "novelty": "2;3;4;3", "presentation": "2;3;3;3", "wc_summary": "186;60;178;86", "wc_strengths": "59;20;180;97", "wc_weaknesses": "454;40;350;285", "wc_questions": "106;38;90;4", "wc_limitations": "62;16;1;7", "wc_review": "867;174;799;479", "wc_reply_reviewers": "223;89;146;151", "wc_reply_authors": "72;66;0;51", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 127.5, 55.34211777660844 ], "wc_strengths_avg": [ 89.0, 59.173473786824445 ], "wc_weaknesses_avg": [ 282.25, 152.29966349273394 ], "wc_questions_avg": [ 59.5, 40.72775466435635 ], "wc_limitations_avg": [ 21.5, 23.984369910422913 ], "wc_review_avg": [ 579.75, 276.2999954759319 ], "wc_reply_reviewers_avg": [ 152.25, 47.557202398795496 ], "wc_reply_authors_avg": [ 47.25, 28.331740151286155 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13732894477373380569&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "yale.edu;yale.edu;yale.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Private estimation algorithms for stochastic block models and mixture models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71729", "id": "Pya0kCEpDk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d702d78b2468d2bc80b22a2fc3e59faf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Pya0kCEpDk", "openreview": "https://openreview.net/forum?id=Pya0kCEpDk", "poster": "/media/PosterPDFs/NeurIPS%202023/71729.png?t=1701856551.1958435", "slides": "https://nips.cc/virtual/2023/poster/71729", "video": "https://nips.cc/virtual/2023/poster/71729", "author_site": "Hongjie Chen, Vincent Cohen-Addad, Tommaso d\u2019Orsi, Alessandro Epasto, Jacob Imola, David Steurer, Stefan Tiegel", "tldr": "", "abstract": "We introduce general tools for designing efficient private estimation algorithms, in the high-dimensional settings, whose statistical guarantees almost match those of the best known non-private algorithms.\nTo illustrate our techniques, we consider two problems: recovery of stochastic block models and learning mixtures of spherical Gaussians.\n\nFor the former, we present the first efficient $(\\epsilon, \\delta)$-differentially private algorithm for both weak recovery and exact recovery. Previously known algorithms achieving comparable guarantees required quasi-polynomial time. \n\nFor the latter, we design an $(\\epsilon, \\delta)$-differentially private algorithm that recovers the centers of the $k$-mixture when the minimum separation is at least $\tO(k^{1/t}\\sqrt{t})$. For all choices of $t$, this algorithm requires sample complexity $n\\geq k^{O(1)}d^{O(t)}$ and time complexity $(nd)^{O(t)}$. Prior work required either an additional additive $\\Omega(\\sqrt{\\log n})$ term in the minimum separation or an explicit upper bound on the Euclidean norm of the centers.", "keywords": "differential privacy;stochastic block model;Gaussian mixture model;sum of squares", "primary_area": "", "supplementary_material": "", "author": "Hongjie Chen;Vincent Cohen-Addad;Tommaso d'Orsi;Alessandro Epasto;Jacob Imola;David Steurer;Stefan Tiegel", "authorids": "~Hongjie_Chen2;~Vincent_Cohen-Addad1;~Tommaso_d'Orsi1;~Alessandro_Epasto3;~Jacob_Imola1;~David_Steurer1;~Stefan_Tiegel1", "gender": ";;;M;;;", "homepage": "https://chen-hj.github.io;;https://tommasodorsi.github.io;https://epasto.org;https://cseweb.ucsd.edu/~jimola/;;https://stefantiegel.com", "dblp": "80/4761-4;136/5814;275/8135;58/7802;244/2598;;218/5553", "google_scholar": "bPRICwkAAAAJ;;;https://scholar.google.com/citations?hl=en;;;https://scholar.google.ch/citations?user=WvpFkwsAAAAJ", "orcid": ";;;0000-0003-0456-3217;;;", "linkedin": ";;;https://www.linkedin.com/pub/alessandro-epasto/85/649/733/;;;", "or_profile": "~Hongjie_Chen2;~Vincent_Cohen-Addad1;~Tommaso_d'Orsi1;~Alessandro_Epasto3;~Jacob_Imola1;~David_Steurer1;~Stefan_Tiegel1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;Google;Swiss Federal Institute of Technology;Google;University of California, San Diego, University of California, San Diego;;Swiss Federal Institute of Technology", "aff_domain": "inf.ethz.ch;google.com;ethz.ch;google.com;eng.ucsd.edu;;ethz.ch", "position": "PhD student;Researcher;PhD student;Research Scientist;PhD student;;PhD student", "bibtex": "@inproceedings{\nchen2023private,\ntitle={Private estimation algorithms for stochastic block models and mixture models},\nauthor={Hongjie Chen and Vincent Cohen-Addad and Tommaso d'Orsi and Alessandro Epasto and Jacob Imola and David Steurer and Stefan Tiegel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Pya0kCEpDk}\n}", "github": "", "project": "", "reviewers": "NQm5;GVoS;cXQY;SDAx", "pdf_size": 737822, "rating": "7;7;8;8", "confidence": "3;4;4;3", "soundness": "4;4;3;4", "novelty": "4;3;4;4", "presentation": "2;4;3;4", "wc_summary": "88;155;241;91", "wc_strengths": "37;58;76;106", "wc_weaknesses": "79;3;14;184", "wc_questions": "387;98;1;69", "wc_limitations": "1;11;20;14", "wc_review": "592;325;352;464", "wc_reply_reviewers": "23;12;10;19", "wc_reply_authors": "401;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 143.75, 62.19877410367506 ], "wc_strengths_avg": [ 69.25, 25.31180554602931 ], "wc_weaknesses_avg": [ 70.0, 71.94094800598613 ], "wc_questions_avg": [ 138.75, 147.58789753905975 ], "wc_limitations_avg": [ 11.5, 6.87386354243376 ], "wc_review_avg": [ 433.25, 105.43570315599929 ], "wc_reply_reviewers_avg": [ 16.0, 5.244044240850758 ], "wc_reply_authors_avg": [ 100.25, 173.63809345877993 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7025358552360495091&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "inf.ethz.ch;google.com;ethz.ch;google.com;eng.ucsd.edu;;ethz.ch", "author_num": 7, "aff_unique_index": "0;1;2;1;3;2", "aff_unique_norm": "ETH Zurich;Google;Swiss Federal Institute of Technology;University of California, San Diego", "aff_unique_dep": "Department of Computer Science;Google;;", "aff_unique_url": "https://www.ethz.ch;https://www.google.com;https://www.ethz.ch;https://www.ucsd.edu", "aff_unique_abbr": "ETHZ;Google;ETH Zurich;UCSD", "aff_campus_unique_index": "0;1;1;3", "aff_campus_unique": "Zurich;Mountain View;;San Diego", "aff_country_unique_index": "0;1;0;1;1;0", "aff_country_unique": "Switzerland;United States" }, { "title": "Vulnerabilities in Video Quality Assessment Models: The Challenge of Adversarial Attacks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71728", "id": "Pz8xvVCLNJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a1c716638d9b618a1a40a96f473c8250-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Pz8xvVCLNJ", "openreview": "https://openreview.net/forum?id=Pz8xvVCLNJ", "poster": "/media/PosterPDFs/NeurIPS%202023/71728.png?t=1697640587.9657173", "slides": "https://nips.cc/virtual/2023/poster/71728", "video": "https://nips.cc/virtual/2023/poster/71728", "author_site": "Aoxiang Zhang, Yu Ran, Weixuan Tang, Yuan-Gen Wang", "tldr": "", "abstract": "No-Reference Video Quality Assessment (NR-VQA) plays an essential role in improving the viewing experience of end-users. Driven by deep learning, recent NR-VQA models based on Convolutional Neural Networks (CNNs) and Transformers have achieved outstanding performance. To build a reliable and practical assessment system, it is of great necessity to evaluate their robustness. However, such issue has received little attention in the academic community. In this paper, we make the first attempt to evaluate the robustness of NR-VQA models against\nadversarial attacks, and propose a patch-based random search method for black-box attack. Specifically, considering both the attack effect on quality score and the visual quality of adversarial video, the attack problem is formulated as misleading the estimated quality score under the constraint of just-noticeable difference (JND). Built upon such formulation, a novel loss function called Score-Reversed Boundary Loss is designed to push the adversarial video\u2019s estimated quality score far away from its ground-truth score towards a specific boundary, and the JND constraint is modeled as a strict $L_2$ and $L_\\infty$ norm restriction. By this means, both white-box and black-box attacks can be launched in an effective and imperceptible manner. The source code is available at https://github.com/GZHU-DVL/AttackVQA.", "keywords": "video quality assessment;adversarial attack;black-box;just noticeable difference", "primary_area": "", "supplementary_material": "/attachment/8cfebfc442d9b792582e5a2bc13f9ba478395a7c.zip", "author": "Aoxiang Zhang;Yu Ran;Weixuan Tang;Yuan-Gen Wang", "authorids": "~Aoxiang_Zhang1;~Yu_Ran3;~Weixuan_Tang1;~Yuan-Gen_Wang1", "gender": "M;;M;M", "homepage": ";;;http://dvl.gzhu.edu.cn/info/1021/1010.htm", "dblp": "https://dblp.uni-trier.de/pid/321/6799;79/8499;;51/8843", "google_scholar": ";;https://scholar.google.com.hk/citations?user=4DvBvYgAAAAJ;-3obl74AAAAJ", "orcid": ";;0000-0002-6573-8124;", "linkedin": ";;;", "or_profile": "~Aoxiang_Zhang1;~Yu_Ran3;~Weixuan_Tang1;~Yuan-Gen_Wang1", "aff": "Guangzhou University;Guangzhou University;Guangzhou University;Guangzhou University", "aff_domain": "gzhu.edu.cn;gzhu.edu.cn;gzhu.edu.cn;gzhu.edu.cn", "position": "MS student;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023vulnerabilities,\ntitle={Vulnerabilities in Video Quality Assessment Models: The Challenge of Adversarial Attacks},\nauthor={Aoxiang Zhang and Yu Ran and Weixuan Tang and Yuan-Gen Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Pz8xvVCLNJ}\n}", "github": "", "project": "", "reviewers": "xq3Y;C8CZ;mjyw;NoLq", "pdf_size": 2325274, "rating": "5;6;7;8", "confidence": "4;5;4;5", "soundness": "3;3;2;3", "novelty": "3;2;2;3", "presentation": "2;2;3;3", "wc_summary": "48;18;51;58", "wc_strengths": "29;30;38;60", "wc_weaknesses": "53;106;143;177", "wc_questions": "71;31;10;39", "wc_limitations": "7;1;13;2", "wc_review": "208;186;255;336", "wc_reply_reviewers": "5;33;29;176", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 43.75, 15.303185942802891 ], "wc_strengths_avg": [ 39.25, 12.47747971346778 ], "wc_weaknesses_avg": [ 119.75, 45.996603135449035 ], "wc_questions_avg": [ 37.75, 21.924586655168667 ], "wc_limitations_avg": [ 5.75, 4.763139720814412 ], "wc_review_avg": [ 246.25, 57.499456519170685 ], "wc_reply_reviewers_avg": [ 60.75, 67.39575283354286 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4472135954999579, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11563530831049255361&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "gzhu.edu.cn;gzhu.edu.cn;gzhu.edu.cn;gzhu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Guangzhou University", "aff_unique_dep": "", "aff_unique_url": "http://www.gzhu.edu.cn", "aff_unique_abbr": "GU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Does Visual Pretraining Help End-to-End Reasoning?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71727", "id": "PzYAMXmIT3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/43ba0466af2b1ac76aa85d8fbec714e3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=PzYAMXmIT3", "openreview": "https://openreview.net/forum?id=PzYAMXmIT3", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71727", "video": "https://nips.cc/virtual/2023/poster/71727", "author_site": "Chen Sun, Calvin Luo, Xingyi Zhou, Anurag Arnab, Cordelia Schmid", "tldr": "", "abstract": "We aim to investigate whether end-to-end learning of visual reasoning can be achieved with general-purpose neural networks, with the help of visual pretraining. A positive result would refute the common belief that explicit visual abstraction (e.g. object detection) is essential for compositional generalization on visual reasoning, and confirm the feasibility of a neural network ''generalist'' to solve visual recognition and reasoning tasks. We propose a simple and general self-supervised framework which ''compresses'' each video frame into a small set of tokens with a transformer network, and reconstructs the remaining frames based on the compressed temporal context. To minimize the reconstruction loss, the network must learn a compact representation for each image, as well as capture temporal dynamics and object permanence from temporal context. We perform evaluation on two visual reasoning benchmarks, CATER and ACRE. We observe that pretraining is essential to achieve compositional generalization for end-to-end visual reasoning. Our proposed framework outperforms traditional supervised pretraining, including image classification and explicit object detection, by large margins.", "keywords": "visual reasoning;self-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Chen Sun;Calvin Luo;Xingyi Zhou;Anurag Arnab;Cordelia Schmid", "authorids": "~Chen_Sun1;~Calvin_Luo2;~Xingyi_Zhou2;~Anurag_Arnab1;~Cordelia_Schmid1", "gender": "M;M;M;;F", "homepage": "https://chensun.me;https://calvinyluo.com/;http://xingyizhou.xyz;;https://cordeliaschmid.github.io/", "dblp": "01/6072-2;;182/2328;;s/CordeliaSchmid", "google_scholar": "vQa7heEAAAAJ;https://scholar.google.com/citations?hl=en;47n-0mwAAAAJ;;IvqCXP4AAAAJ", "orcid": ";;0000-0002-0914-8525;;", "linkedin": ";;xingyi-zhou-21925290/;;cordelia-schmid-47985a9", "or_profile": "~Chen_Sun1;~Calvin_Luo2;~Xingyi_Zhou2;~Anurag_Arnab1;~Cordelia_Schmid1", "aff": "Google;Brown University;Google;;Inria", "aff_domain": "google.com;brown.edu;google.com;;inria.fr", "position": "Research Scientist;PhD student;Researcher;;Researcher", "bibtex": "@inproceedings{\nsun2023does,\ntitle={Does Visual Pretraining Help End-to-End Reasoning?},\nauthor={Chen Sun and Calvin Luo and Xingyi Zhou and Anurag Arnab and Cordelia Schmid},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=PzYAMXmIT3}\n}", "github": "", "project": "", "reviewers": "zViR;PrZv;e8rc;t1ps", "pdf_size": 2515151, "rating": "5;6;6;6", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "3;4;3;3", "wc_summary": "101;39;102;60", "wc_strengths": "89;49;161;31", "wc_weaknesses": "382;133;159;164", "wc_questions": "22;3;39;63", "wc_limitations": "19;1;11;32", "wc_review": "613;225;472;350", "wc_reply_reviewers": "287;32;42;44", "wc_reply_authors": "194;0;0;95", "reply_reviewers": "2;1;1;2", "reply_authors": "2;1;1;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.5, 27.04163456597992 ], "wc_strengths_avg": [ 82.5, 49.947472408521335 ], "wc_weaknesses_avg": [ 209.5, 100.28584147326082 ], "wc_questions_avg": [ 31.75, 22.083647796503186 ], "wc_limitations_avg": [ 15.75, 11.344051304538427 ], "wc_review_avg": [ 415.0, 143.85583060828642 ], "wc_reply_reviewers_avg": [ 101.25, 107.3391238086095 ], "wc_reply_authors_avg": [ 72.25, 80.28192511393831 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9228407957050173818&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "google.com;brown.edu;google.com;;inria.fr", "author_num": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Google;Brown University;INRIA", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.brown.edu;https://www.inria.fr", "aff_unique_abbr": "Google;Brown;Inria", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;France" }, { "title": "Near-optimal learning with average H\u00f6lder smoothness", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71726", "id": "Q0ntwxVtcy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/42afce512806ab874b9f99ed9a08055e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Q0ntwxVtcy", "openreview": "https://openreview.net/forum?id=Q0ntwxVtcy", "poster": "/media/PosterPDFs/NeurIPS%202023/71726.png?t=1701086394.3598666", "slides": "https://nips.cc/virtual/2023/poster/71726", "video": "https://nips.cc/virtual/2023/poster/71726", "author_site": "Guy Kornowski, Steve Hanneke, Aryeh Kontorovich", "tldr": "", "abstract": "We generalize the notion of average Lipschitz smoothness proposed by Ashlagi et al. (COLT 2021) by extending it to H\u00f6lder smoothness. This measure of the \"effective smoothness\" of a function is sensitive to the underlying distribution and can be dramatically smaller than its classic \"worst-case\" H\u00f6lder constant.\nWe consider both the realizable and the agnostic (noisy) regression settings, proving upper and lower risk bounds in terms of the average H\u00f6lder smoothness; these rates improve upon both previously known rates even in the special case of average Lipschitz smoothness.\nMoreover, our lower bound is tight in the realizable setting up to log factors, thus we establish the minimax rate.\nFrom an algorithmic perspective, since our notion of average smoothness is defined with respect to the unknown underlying distribution, the learner does not have an explicit representation of the function class, hence is unable to execute ERM. Nevertheless, we provide distinct learning algorithms that achieve both (nearly) optimal learning rates.\nOur results hold in any totally bounded metric space, and are stated in terms of its intrinsic geometry.\nOverall, our results show that the classic worst-case notion of H\u00f6lder smoothness can be essentially replaced by its average, yielding considerably sharper guarantees.", "keywords": "H\u00f6lder smoothness;average smoothness;bracketing numbers;generalization;risk bounds;metric space", "primary_area": "", "supplementary_material": "/attachment/886dd7b8a16490bef4ece6bf3832450610defdf7.pdf", "author": "Guy Kornowski;Steve Hanneke;Aryeh Kontorovich", "authorids": "~Guy_Kornowski1;~Steve_Hanneke1;~Aryeh_Kontorovich1", "gender": ";M;", "homepage": ";http://www.stevehanneke.com;http://www.cs.bgu.ac.il/~karyeh/", "dblp": "276/7550;40/154;20/10289", "google_scholar": ";fEhNO7YAAAAJ;https://scholar.google.co.il/citations?user=UNVQ5DsAAAAJ", "orcid": "0000-0001-8058-2909;;", "linkedin": ";;prof-aryeh-kontorovich-7b236055/", "or_profile": "~Guy_Kornowski1;~Steve_Hanneke1;~Aryeh_Kontorovich1", "aff": "Weizmann Institute of Science;Purdue University;Ben Gurion University of the Negev", "aff_domain": "weizmann.ac.il;purdue.edu;bgu.ac.il", "position": "PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\nkornowski2023nearoptimal,\ntitle={Near-optimal learning with average H\\\"older smoothness},\nauthor={Guy Kornowski and Steve Hanneke and Aryeh Kontorovich},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Q0ntwxVtcy}\n}", "github": "", "project": "", "reviewers": "LpFP;uWsQ;VoQT", "pdf_size": 488151, "rating": "6;7;7", "confidence": "2;3;3", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "63;54;208", "wc_strengths": "133;168;42", "wc_weaknesses": "122;67;58", "wc_questions": "2;165;66", "wc_limitations": "1;10;14", "wc_review": "321;464;388", "wc_reply_reviewers": "9;14;12", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 108.33333333333333, 70.57068954050415 ], "wc_strengths_avg": [ 114.33333333333333, 53.1057644914582 ], "wc_weaknesses_avg": [ 82.33333333333333, 28.288199345702832 ], "wc_questions_avg": [ 77.66666666666667, 67.05387551978052 ], "wc_limitations_avg": [ 8.333333333333334, 5.436502143433364 ], "wc_review_avg": [ 391.0, 58.418033745297066 ], "wc_reply_reviewers_avg": [ 11.666666666666666, 2.0548046676563256 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13023704677912599908&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "weizmann.ac.il;purdue.edu;bgu.ac.il", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Weizmann Institute of Science;Purdue University;Ben Gurion University of the Negev", "aff_unique_dep": ";;", "aff_unique_url": "https://www.weizmann.org.il;https://www.purdue.edu;https://www.bgu.ac.il", "aff_unique_abbr": "Weizmann;Purdue;BGU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Israel;United States" }, { "title": "OneNet: Enhancing Time Series Forecasting Models under Concept Drift by Online Ensembling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71725", "id": "Q25wMXsaeZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dd6a47bc0aad6f34aa5e77706d90cdc4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Q25wMXsaeZ", "openreview": "https://openreview.net/forum?id=Q25wMXsaeZ", "poster": "/media/PosterPDFs/NeurIPS%202023/71725.png?t=1696838731.0629554", "slides": "https://nips.cc/virtual/2023/poster/71725", "video": "https://nips.cc/virtual/2023/poster/71725", "author_site": "yifan zhang, Qingsong Wen, xue wang, Weiqi Chen, Liang Sun, Zhang Zhang, Liang Wang, Rong Jin, Tieniu Tan", "tldr": "", "abstract": "Online updating of time series forecasting models aims to address the concept drifting problem by efficiently updating forecasting models based on streaming data. Many algorithms are designed for online time series forecasting, with some exploiting cross-variable dependency while others assume independence among variables. Given every data assumption has its own pros and cons in online time series modeling, we propose **On**line **e**nsembling **Net**work (**OneNet**). It dynamically updates and combines two models, with one focusing on modeling the dependency across the time dimension and the other on cross-variate dependency. Our method incorporates a reinforcement learning-based approach into the traditional online convex programming framework, allowing for the linear combination of the two models with dynamically adjusted weights. OneNet addresses the main shortcoming of classical online learning methods that tend to be slow in adapting to the concept drift. Empirical results show that OneNet reduces online forecasting error by more than $\\mathbf{50}\\\\%$ compared to the State-Of-The-Art (SOTA) method.", "keywords": "Time series forecasting;concept drift;online learning;online convex programming", "primary_area": "", "supplementary_material": "/attachment/a030154c31312f439190f8ea0a01717dcba84c04.pdf", "author": "YiFan Zhang;Qingsong Wen;Xue Wang;Weiqi Chen;Liang Sun;Zhang Zhang;Liang Wang;Rong Jin;Tieniu Tan", "authorids": "~YiFan_Zhang8;~Qingsong_Wen2;~Xue_Wang9;~Weiqi_Chen1;~Liang_Sun2;~Zhang_Zhang1;~Liang_Wang3;~Rong_Jin3;~Tieniu_Tan1", "gender": ";;;M;M;;M;M;", "homepage": ";;;https://github.com/DAMO-DI-ML;https://www.linkedin.com/in/liang-sun-a0a87621/;https://zhangzhang80.github.io/;;https://www.cse.msu.edu/~rongjin/;", "dblp": ";;;;18/5837-1;94/2468-1;56/4499-1;j/RongJin;", "google_scholar": ";;;dMg_soMAAAAJ;D_cOMBgAAAAJ;rnRNwEMAAAAJ;;;", "orcid": ";;;0009-0007-9246-9402;0009-0002-5835-7259;0000-0001-9425-3065;;;", "linkedin": ";;;;;;;;", "or_profile": "~YiFan_Zhang8;~Qingsong_Wen2;~Xue_Wang9;~Weiqi_Chen1;~Liang_Sun2;~Zhang_Zhang1;~Liang_Wang3;~Rong_Jin3;~Tieniu_Tan1", "aff": ";;;Alibaba Group;Alibaba Group;Institute of Automation, Chinese Academy of Sciences;Institute of Automation\uff0c CAS\uff0cChina;Twitter;", "aff_domain": ";;;alibaba-inc.com;alibaba-inc.com;ia.ac.cn;ia.ac.cn;twitter.com;", "position": ";;;Researcher;Staff Software Engineer;Associate Professor;Full Professor;Researcher;", "bibtex": "@inproceedings{\nzhang2023onenet,\ntitle={OneNet: Enhancing Time Series Forecasting Models under Concept Drift by Online Ensembling},\nauthor={YiFan Zhang and Qingsong Wen and Xue Wang and Weiqi Chen and Liang Sun and Zhang Zhang and Liang Wang and Rong Jin and Tieniu Tan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Q25wMXsaeZ}\n}", "github": "", "project": "", "reviewers": "UB5d;r9Pc;QKSm;7dRB", "pdf_size": 4934428, "rating": "5;5;7;7", "confidence": "4;4;3;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;2;4;3", "wc_summary": "42;172;79;54", "wc_strengths": "3;34;126;47", "wc_weaknesses": "3;115;173;15", "wc_questions": "189;2;44;23", "wc_limitations": "1;1;47;4", "wc_review": "238;324;469;143", "wc_reply_reviewers": "0;558;127;60", "wc_reply_authors": "167;1940;57;0", "reply_reviewers": "0;3;1;1", "reply_authors": "2;6;2;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 86.75, 50.99693618248061 ], "wc_strengths_avg": [ 52.5, 45.34589286804263 ], "wc_weaknesses_avg": [ 76.5, 70.67354526270775 ], "wc_questions_avg": [ 64.5, 73.39788825300084 ], "wc_limitations_avg": [ 13.25, 19.524023663169434 ], "wc_review_avg": [ 293.5, 119.85512087516328 ], "wc_reply_reviewers_avg": [ 186.25, 219.281069634385 ], "wc_reply_authors_avg": [ 541.0, 809.9404299082742 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 1.920286436967152 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13201490875722513143&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";;;alibaba-inc.com;alibaba-inc.com;ia.ac.cn;ia.ac.cn;twitter.com;", "author_num": 9, "aff_unique_index": "0;0;1;1;2", "aff_unique_norm": "Alibaba Group;Chinese Academy of Sciences;Twitter, Inc.", "aff_unique_dep": ";Institute of Automation;", "aff_unique_url": "https://www.alibaba.com;http://www.ia.cas.cn;https://twitter.com", "aff_unique_abbr": "Alibaba;CAS;Twitter", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Approximate Allocation Matching for Structural Causal Bandits with Unobserved Confounders", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71724", "id": "Q3CRHnttxW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d938b739ac250e22729cc26e6176f65e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Q3CRHnttxW", "openreview": "https://openreview.net/forum?id=Q3CRHnttxW", "poster": "/media/PosterPDFs/NeurIPS%202023/71724.png?t=1702416428.5145257", "slides": "https://nips.cc/virtual/2023/poster/71724", "video": "https://nips.cc/virtual/2023/poster/71724", "author_site": "Lai Wei, Muhammad Qasim Elahi, Mahsa Ghasemi, Murat Kocaoglu", "tldr": "", "abstract": "Structural causal bandit provides a framework for online decision-making problems when causal information is available. It models the stochastic environment with a structural causal model (SCM) that governs the causal relations between random variables. In each round, an agent applies an intervention (or no intervention) by setting certain variables to some constants and receives a stochastic reward from a non-manipulable variable. Though the causal structure is given, the observational and interventional distributions of these random variables are unknown beforehand, and they can only be learned through interactions with the environment. Therefore, to maximize the expected cumulative reward, it is critical to balance the explore-versus-exploit tradeoff. We assume each random variable takes a finite number of distinct values, and consider a semi-Markovian setting, where random variables are affected by unobserved confounders. Using the canonical SCM formulation to discretize the domains of unobserved variables, we efficiently integrate samples to reduce model uncertainty. This gives the decision maker a natural advantage over those in a classical multi-armed bandit setup. We provide a logarithmic asymptotic regret lower bound for the structural causal bandit problem. Inspired by the lower bound, we design an algorithm that can utilize the causal structure to accelerate the learning process and take informative and rewarding interventions. We establish that our algorithm achieves a logarithmic regret and demonstrate that it outperforms the existing methods via simulations.", "keywords": "multi-armed bandits;causal Inference;sequential decision-making", "primary_area": "", "supplementary_material": "", "author": "Lai Wei;Muhammad Qasim Elahi;Mahsa Ghasemi;Murat Kocaoglu", "authorids": "~Lai_Wei5;~Muhammad_Qasim_Elahi1;~Mahsa_Ghasemi1;~Murat_Kocaoglu1", "gender": "M;M;F;M", "homepage": ";https://www.linkedin.com/in/qasim-elahi-b59948133/;https://mahsaghasemi.github.io/;https://www.muratkocaoglu.com", "dblp": "36/4168-2;;206/6477;74/11343", "google_scholar": "45PJl9AAAAAJ;M7C8dFAAAAAJ;7KqsRJ8AAAAJ;7N7bzdwAAAAJ", "orcid": ";;;", "linkedin": ";;;mkocaoglu/", "or_profile": "~Lai_Wei5;~Muhammad_Qasim_Elahi1;~Mahsa_Ghasemi1;~Murat_Kocaoglu1", "aff": "Purdue University;Purdue University;Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;purdue.edu;purdue.edu", "position": "Postdoc;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwei2023approximate,\ntitle={Approximate Allocation Matching for Structural Causal Bandits with Unobserved Confounders},\nauthor={Lai Wei and Muhammad Qasim Elahi and Mahsa Ghasemi and Murat Kocaoglu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Q3CRHnttxW}\n}", "github": "", "project": "", "reviewers": "BoCS;v6qA;gxS9;EzBh", "pdf_size": 3135117, "rating": "4;5;6;7", "confidence": "4;3;2;3", "soundness": "3;3;3;4", "novelty": "1;3;3;3", "presentation": "2;2;3;2", "wc_summary": "38;33;65;62", "wc_strengths": "10;28;103;112", "wc_weaknesses": "10;127;52;104", "wc_questions": "927;3;2;254", "wc_limitations": "66;2;2;164", "wc_review": "1051;193;224;696", "wc_reply_reviewers": "39;0;0;24", "wc_reply_authors": "96;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 49.5, 14.150971698084906 ], "wc_strengths_avg": [ 63.25, 44.81838350498599 ], "wc_weaknesses_avg": [ 73.25, 45.515793962096275 ], "wc_questions_avg": [ 296.5, 378.2224874329923 ], "wc_limitations_avg": [ 58.5, 66.27782434570405 ], "wc_review_avg": [ 541.0, 355.56926188859467 ], "wc_reply_reviewers_avg": [ 15.75, 16.618889854620253 ], "wc_reply_authors_avg": [ 24.0, 41.569219381653056 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6324555320336758, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15878152426246557062&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 6, "email": "purdue.edu;purdue.edu;purdue.edu;purdue.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Fast and Simple Spectral Clustering in Theory and Practice", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71723", "id": "Q3FXnCPZ1X", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6c5b82193c5d8e6aa5806239676ddc97-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Q3FXnCPZ1X", "openreview": "https://openreview.net/forum?id=Q3FXnCPZ1X", "poster": "/media/PosterPDFs/NeurIPS%202023/71723.png?t=1701565190.3112485", "slides": "https://nips.cc/virtual/2023/poster/71723", "video": "https://nips.cc/virtual/2023/poster/71723", "tldr": "", "abstract": "Spectral clustering is a popular and effective algorithm designed to find $k$ clusters in a graph $G$.\nIn the classical spectral clustering algorithm, the vertices of $G$ are embedded into $\\mathbb{R}^k$ using $k$ eigenvectors of the graph Laplacian matrix.\nHowever, computing this embedding is computationally expensive and dominates the running time of the algorithm.\nIn this paper, we present a simple spectral clustering algorithm based on a vertex embedding with $O(\\log(k))$ vectors computed by the power method.\nThe vertex embedding is computed in nearly-linear time with respect to the size of the graph, and\nthe algorithm provably recovers the ground truth clusters under natural assumptions on the input graph.\nWe evaluate the new algorithm on several synthetic and real-world datasets, finding that it is significantly faster than alternative clustering algorithms,\nwhile producing results with approximately the same clustering accuracy.", "keywords": "spectral clustering;power method;spectral graph theory;graph algorithms", "primary_area": "", "supplementary_material": "/attachment/cd6678b8f3137e11169d088bb0f011864d286ed9.zip", "author": "Peter Macgregor", "authorids": "~Peter_Macgregor1", "gender": "", "homepage": "https://pmacg.io", "dblp": "294/8868", "google_scholar": "https://scholar.google.co.uk/citations?user=t72xITMAAAAJ", "orcid": "0000-0002-1066-8798", "linkedin": "peter-macgregor-4626a993/", "or_profile": "~Peter_Macgregor1", "aff": "University of Edinburgh, University of Edinburgh", "aff_domain": "ed.ac.uk", "position": "Postdoc", "bibtex": "@inproceedings{\nmacgregor2023fast,\ntitle={Fast and Simple Spectral Clustering in Theory and Practice},\nauthor={Peter Macgregor},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Q3FXnCPZ1X}\n}", "github": "", "project": "", "reviewers": "6nQB;8rfq;2unS;1Mto;6vxh", "pdf_size": 491732, "rating": "4;5;6;6;7", "confidence": "5;3;5;3;2", "soundness": "4;3;3;3;4", "novelty": "3;3;2;3;4", "presentation": "3;3;3;3;4", "wc_summary": "246;36;72;300;87", "wc_strengths": "57;63;28;108;42", "wc_weaknesses": "299;64;136;243;63", "wc_questions": "10;33;83;22;68", "wc_limitations": "9;1;22;37;11", "wc_review": "621;197;341;710;271", "wc_reply_reviewers": "240;86;13;214;18", "wc_reply_authors": "139;0;0;24;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.6, 1.2 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 148.2, 104.64110091163988 ], "wc_strengths_avg": [ 59.6, 27.089481353470024 ], "wc_weaknesses_avg": [ 161.0, 95.29533042075042 ], "wc_questions_avg": [ 43.2, 27.76616646208115 ], "wc_limitations_avg": [ 16.0, 12.457929201917949 ], "wc_review_avg": [ 428.0, 201.17256274154286 ], "wc_reply_reviewers_avg": [ 114.2, 95.99666660879429 ], "wc_reply_authors_avg": [ 32.6, 54.00592560080792 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.6210344279375829, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17102665120746020306&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ed.ac.uk", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "VanillaNet: the Power of Minimalism in Deep Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71722", "id": "Q5Eb6qIKux", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/16336d94a5ffca8de019087ab7fe403f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Q5Eb6qIKux", "openreview": "https://openreview.net/forum?id=Q5Eb6qIKux", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71722", "video": "https://nips.cc/virtual/2023/poster/71722", "author_site": "Hanting Chen, Yunhe Wang, Jianyuan Guo, Dacheng Tao", "tldr": "", "abstract": "At the heart of foundation models is the philosophy of \"more is different\", exemplified by the astonishing success in computer vision and natural language processing. However, the challenges of optimization and inherent complexity of transformer models call for a paradigm shift towards simplicity. In this study, we introduce VanillaNet, a neural network architecture that embraces elegance in design. By avoiding high depth, shortcuts, and intricate operations like self-attention, VanillaNet is refreshingly concise yet remarkably powerful. Each layer is carefully crafted to be compact and straightforward, with nonlinear activation functions pruned after training to restore the original architecture. VanillaNet overcomes the challenges of inherent complexity, making it ideal for resource-constrained environments. Its easy-to-understand and highly simplified architecture opens new possibilities for efficient deployment. Extensive experimentation demonstrates that VanillaNet delivers performance on par with renowned deep neural networks and vision transformers, showcasing the power of minimalism in deep learning. This visionary journey of VanillaNet has significant potential to redefine the landscape and challenge the status quo of foundation model, setting a new path for elegant and effective model design. Pre-trained models and codes are available at https://github.com/huawei-noah/VanillaNet and https://gitee.com/mindspore/models/tree/master/research/cv/vanillanet", "keywords": "computer vision;foundation models.", "primary_area": "", "supplementary_material": "/attachment/3ef6884512c7515e0b8a0c1914ec47d033621116.pdf", "author": "Hanting Chen;Yunhe Wang;Jianyuan Guo;Dacheng Tao", "authorids": "~Hanting_Chen1;~Yunhe_Wang1;~Jianyuan_Guo1;~Dacheng_Tao1", "gender": "M;M;M;", "homepage": ";https://www.wangyunhe.site/;https://ggjy.github.io/;", "dblp": "232/2060;63/8217-1;190/0258;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=isizOkYAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";0000-0002-0142-509X;;", "linkedin": ";;;", "or_profile": "~Hanting_Chen1;~Yunhe_Wang1;~Jianyuan_Guo1;~Dacheng_Tao1", "aff": "Huawei Technologies Ltd.;Huawei Noah's Ark Lab;University of Sydney;", "aff_domain": "huawei.com;huawei.com;usyd.edu.au;", "position": "Researcher;Principal Researcher;PhD student;", "bibtex": "@inproceedings{\nchen2023vanillanet,\ntitle={VanillaNet: the Power of Minimalism in Deep Learning},\nauthor={Hanting Chen and Yunhe Wang and Jianyuan Guo and Dacheng Tao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Q5Eb6qIKux}\n}", "github": "", "project": "", "reviewers": "R2Cr;prVa;jvnA;xYue;qK4N", "pdf_size": 604030, "rating": "5;5;6;7;8", "confidence": "4;4;2;5;4", "soundness": "3;4;3;3;3", "novelty": "3;3;3;4;4", "presentation": "2;3;3;3;3", "wc_summary": "97;81;34;109;136", "wc_strengths": "53;170;75;128;69", "wc_weaknesses": "196;87;47;108;199", "wc_questions": "91;52;23;62;4", "wc_limitations": "29;1;6;1;5", "wc_review": "466;391;185;408;413", "wc_reply_reviewers": "105;0;0;0;26", "wc_reply_authors": "121;0;0;0;0", "reply_reviewers": "1;0;0;0;1", "reply_authors": "3;1;1;1;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 91.4, 33.86207317929603 ], "wc_strengths_avg": [ 99.0, 43.529300476805275 ], "wc_weaknesses_avg": [ 127.4, 60.506528573369664 ], "wc_questions_avg": [ 46.4, 30.36181812737834 ], "wc_limitations_avg": [ 8.4, 10.49952379872535 ], "wc_review_avg": [ 372.6, 97.09912460985423 ], "wc_reply_reviewers_avg": [ 26.2, 40.66644808684427 ], "wc_reply_authors_avg": [ 24.2, 48.39999999999999 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2100420126042015, "gs_citation": 161, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11059239708552408304&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "huawei.com;huawei.com;usyd.edu.au;", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Huawei;University of Sydney", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;https://www.sydney.edu.au", "aff_unique_abbr": "Huawei;USYD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;Australia" }, { "title": "Refining Diffusion Planner for Reliable Behavior Synthesis by Automatic Detection of Infeasible Plans", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71721", "id": "Q5tuGgqJwt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4c5722bad9759216474df8fc46c97af2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Q5tuGgqJwt", "openreview": "https://openreview.net/forum?id=Q5tuGgqJwt", "poster": "/media/PosterPDFs/NeurIPS%202023/71721.png?t=1701327629.212098", "slides": "https://nips.cc/virtual/2023/poster/71721", "video": "https://nips.cc/virtual/2023/poster/71721", "author_site": "Kyowoon Lee, Seongun Kim, Jaesik Choi", "tldr": "", "abstract": "Diffusion-based planning has shown promising results in long-horizon, sparse-reward tasks by training trajectory diffusion models and conditioning the sampled trajectories using auxiliary guidance functions. However, due to their nature as generative models, diffusion models are not guaranteed to generate feasible plans, resulting in failed execution and precluding planners from being useful in safety-critical applications. In this work, we propose a novel approach to refine unreliable plans generated by diffusion models by providing refining guidance to error-prone plans. To this end, we suggest a new metric named restoration gap for evaluating the quality of individual plans generated by the diffusion model. A restoration gap is estimated by a gap predictor which produces restoration gap guidance to refine a diffusion planner. We additionally present an attribution map regularizer to prevent adversarial refining guidance that could be generated from the sub-optimal gap predictor, which enables further refinement of infeasible plans. We demonstrate the effectiveness of our approach on three different benchmarks in offline control settings that require long-horizon planning. We also illustrate that our approach presents explainability by presenting the attribution maps of the gap predictor and highlighting error-prone transitions, allowing for a deeper understanding of the generated plans.", "keywords": "Offline Reinforcement Learning;Trajectory Optimization;Diffusion Models;Sequential Decision Making", "primary_area": "", "supplementary_material": "", "author": "Kyowoon Lee;Seongun Kim;Jaesik Choi", "authorids": "~Kyowoon_Lee1;~Seongun_Kim1;~Jaesik_Choi1", "gender": ";;M", "homepage": "https://leekwoon.github.io/;;https://sailab.kaist.ac.kr/jaesik", "dblp": "219/6226;309/2897;13/1402", "google_scholar": "UrtK8HUAAAAJ;https://scholar.google.com/citations?hl=ko;RqMLVzUAAAAJ", "orcid": ";;", "linkedin": ";seongun-kim-663665237/;", "or_profile": "~Kyowoon_Lee1;~Seongun_Kim1;~Jaesik_Choi1", "aff": "Ulsan National Institute of Science and Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "unist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nlee2023refining,\ntitle={Refining Diffusion Planner for Reliable Behavior Synthesis by Automatic Detection of Infeasible Plans},\nauthor={Kyowoon Lee and Seongun Kim and Jaesik Choi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Q5tuGgqJwt}\n}", "github": "", "project": "", "reviewers": "9DAN;mq5w;N3Wq;iw4y;pN8B", "pdf_size": 4305987, "rating": "5;7;7;7;8", "confidence": "4;3;3;4;3", "soundness": "2;3;3;3;3", "novelty": "1;3;3;3;3", "presentation": "3;4;3;2;3", "wc_summary": "73;251;96;202;47", "wc_strengths": "61;248;121;52;73", "wc_weaknesses": "205;342;107;229;62", "wc_questions": "441;242;74;145;34", "wc_limitations": "61;43;37;1;31", "wc_review": "841;1126;435;629;247", "wc_reply_reviewers": "309;17;19;142;0", "wc_reply_authors": "390;94;101;80;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;2;1", "rating_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 133.8, 78.7994923841518 ], "wc_strengths_avg": [ 111.0, 72.517584074485 ], "wc_weaknesses_avg": [ 189.0, 98.09994903158717 ], "wc_questions_avg": [ 187.2, 145.24654901236036 ], "wc_limitations_avg": [ 34.6, 19.571407716360106 ], "wc_review_avg": [ 655.6, 307.23775809623396 ], "wc_reply_reviewers_avg": [ 97.4, 117.35518735871882 ], "wc_reply_authors_avg": [ 133.0, 133.48557974552907 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6666666666666667, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=722104127281711064&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "unist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Ulsan National Institute of Science and Technology;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.unist.ac.kr;https://www.kaist.ac.kr", "aff_unique_abbr": "UNIST;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Unified 3D Segmenter As Prototypical Classifiers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71720", "id": "Q6zd1hr7sD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/916cb4e1aeafaa0757953c9bacd17337-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Q6zd1hr7sD", "openreview": "https://openreview.net/forum?id=Q6zd1hr7sD", "poster": "/media/PosterPDFs/NeurIPS%202023/71720.png?t=1697343899.932113", "slides": "https://nips.cc/virtual/2023/poster/71720", "video": "https://nips.cc/virtual/2023/poster/71720", "author_site": "Zheyun Qin, Zheyun Qin, Cheng Han, Qifan Wang, Xiushan Nie, Yilong Yin, Lu Xiankai", "tldr": "", "abstract": "The task of point cloud segmentation, comprising semantic, instance, and panoptic segmentation, has been mainly tackled by designing task-specific network architectures, which often lack the flexibility to generalize across tasks, thus resulting in a fragmented research landscape. In this paper, we introduce ProtoSEG, a prototype-based model that unifies semantic, instance, and panoptic segmentation tasks. Our approach treats these three homogeneous tasks as a classification problem with different levels of granularity. By leveraging a Transformer architecture, we extract point embeddings to optimize prototype-class distances and dynamically learn class prototypes to accommodate the end tasks. Our prototypical design enjoys simplicity and transparency, powerful representational learning, and ad-hoc explainability. Empirical results demonstrate that ProtoSEG outperforms concurrent well-known specialized architectures on 3D point cloud benchmarks, achieving 72.3%, 76.4% and 74.2% mIoU for semantic segmentation on S3DIS, ScanNet V2 and SemanticKITTI, 66.8% mCov and 51.2% mAP for instance segmentation on S3DIS and ScanNet V2, 62.4% PQ for panoptic segmentation on SemanticKITTI, validating the strength of our concept and the effectiveness of our algorithm. The code and models are available at https://github.com/zyqin19/PROTOSEG.", "keywords": "Point Cloud Segmentation;Prototypical Classifier;Unified Framework", "primary_area": "", "supplementary_material": "/attachment/4452f49685358440643aa43b2cc5b98487db15f4.pdf", "author": "Zheyun Qin;Cheng Han;Qifan Wang;Xiushan Nie;Yilong Yin;Xiankai Lu", "authorids": "~Zheyun_Qin1;~Cheng_Han1;~Qifan_Wang2;~Xiushan_Nie1;~Yilong_Yin1;~Xiankai_Lu1", "gender": "M;M;M;M;M;M", "homepage": "https://zyqin19.github.io/;https://chenghan111.github.io/;https://wqfcr.github.io/;http://niexsh.sdufe.edu.cn;https://faculty.sdu.edu.cn/ylyin;https://sites.google.com/site/xiankailu111/", "dblp": "256/8991.html;53/6096-1.html;33/8610;03/8117;;153/2122", "google_scholar": "https://scholar.google.cz/citations?user=Kp6ek-kAAAAJ;VgkEKZwAAAAJ;LrSyLosAAAAJ;;;QS5V5b8AAAAJ", "orcid": "0000-0003-2564-071X;0000-0002-8145-3436;0000-0002-7570-5756;0000-0001-9644-9723;;", "linkedin": ";chenghan-87129219a/;;;;", "or_profile": "~Zheyun_Qin1;~Cheng_Han1;~Qifan_Wang2;~Xiushan_Nie1;~Yilong_Yin1;~Xiankai_Lu1", "aff": "Shandong University;Rochester Institute of Technology;Meta AI;Shandong Jianzhu University;Shandong University;Shandong University", "aff_domain": "sdu.edu.cn;rit.edu;fb.com;sdjzu.edu.cn;sdu.edu.cn;sdu.edu.cn", "position": "PhD student;PhD student;Principal Researcher;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nqin2023unified,\ntitle={Unified 3D Segmenter As Prototypical Classifiers},\nauthor={Zheyun Qin and Cheng Han and Qifan Wang and Xiushan Nie and Yilong Yin and Xiankai Lu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Q6zd1hr7sD}\n}", "github": "", "project": "", "reviewers": "MoHU;Mj9E;saLq;VQFt", "pdf_size": 1741527, "rating": "4;5;6;6", "confidence": "4;4;4;3", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "2;3;4;3", "wc_summary": "41;55;39;96", "wc_strengths": "46;61;54;72", "wc_weaknesses": "125;75;181;366", "wc_questions": "36;6;5;92", "wc_limitations": "6;10;5;31", "wc_review": "254;207;284;657", "wc_reply_reviewers": "0;0;71;432", "wc_reply_authors": "0;0;88;832", "reply_reviewers": "0;0;1;3", "reply_authors": "1;1;2;4", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 57.75, 22.92787604642 ], "wc_strengths_avg": [ 58.25, 9.54921462739214 ], "wc_weaknesses_avg": [ 186.75, 110.07355495304037 ], "wc_questions_avg": [ 34.75, 35.32262023123426 ], "wc_limitations_avg": [ 13.0, 10.559356040971437 ], "wc_review_avg": [ 350.5, 179.0733090105837 ], "wc_reply_reviewers_avg": [ 125.75, 179.1736238959295 ], "wc_reply_authors_avg": [ 230.0, 349.4166567294696 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4408498548908559837&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sdu.edu.cn;rit.edu;fb.com;sdjzu.edu.cn;sdu.edu.cn;sdu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "Shandong University;Rochester Institute of Technology;Meta;Shandong Jianzhu University", "aff_unique_dep": ";;Meta AI;", "aff_unique_url": "http://www.sdu.edu.cn;https://www.rit.edu;https://meta.com;http://www.sdjzu.edu.cn", "aff_unique_abbr": "SDU;RIT;Meta;SDJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "SegRefiner: Towards Model-Agnostic Segmentation Refinement with Discrete Diffusion Process", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71719", "id": "Q9CNA7B7v2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fc0cc55dca3d791c4a0bb2d8ddeefe4f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Q9CNA7B7v2", "openreview": "https://openreview.net/forum?id=Q9CNA7B7v2", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71719", "video": "https://nips.cc/virtual/2023/poster/71719", "author_site": "Mengyu Wang, Henghui Ding, Jun Hao Liew, Jiajun Liu, Yao Zhao, Yunchao Wei", "tldr": "", "abstract": "In this paper, we explore a principal way to enhance the quality of object masks produced by different segmentation models. We propose a model-agnostic solution called SegRefiner, which offers a novel perspective on this problem by interpreting segmentation refinement as a data generation process. As a result, the refinement process can be smoothly implemented through a series of denoising diffusion steps. Specifically, SegRefiner takes coarse masks as inputs and refines them using a discrete diffusion process. By predicting the label and corresponding states-transition probabilities for each pixel, SegRefiner progressively refines the noisy masks in a conditional denoising manner. To assess the effectiveness of SegRefiner, we conduct comprehensive experiments on various segmentation tasks, including semantic segmentation, instance segmentation, and dichotomous image segmentation. The results demonstrate the superiority of our SegRefiner from multiple aspects. Firstly, it consistently improves both the segmentation metrics and boundary metrics across different types of coarse masks. Secondly, it outperforms previous model-agnostic refinement methods by a significant margin. Lastly, it exhibits a strong capability to capture extremely fine details when refining high-resolution images. The source code and trained models are available at [SegRefiner.git](https://github.com/MengyuWang826/SegRefiner)", "keywords": "Refinement;Segmentation;Discrete Diffusion", "primary_area": "", "supplementary_material": "/attachment/e6266f4a020bcb9fef8d45c399f0847e85d93892.pdf", "author": "Mengyu Wang;Henghui Ding;Jun Hao Liew;Jiajun Liu;Yao Zhao;Yunchao Wei", "authorids": "~Mengyu_Wang5;~Henghui_Ding2;~Jun_Hao_Liew1;~Jiajun_Liu2;~Yao_Zhao1;~Yunchao_Wei1", "gender": "M;;M;M;M;M", "homepage": ";;https://github.com/liu6381810;http://mepro.bjtu.edu.cn;https://weiyc.github.io/;https://henghuiding.github.io/", "dblp": "179/8319-3;;;45/2091-1.html;118/5394;230/1216", "google_scholar": "K5RXxUoAAAAJ;https://scholar.google.com.sg/citations?user=8gm-CYYAAAAJ;;474TbQYAAAAJ;https://scholar.google.com.sg/citations?user=qL9Csv0AAAAJ;WI_flSwAAAAJ", "orcid": ";;;;;0000-0003-4868-6526", "linkedin": ";;;;;", "or_profile": "~Mengyu_Wang5;~Jun_Hao_Liew1;~Jiajun_Liu2;~Yao_Zhao1;~Yunchao_Wei1;~Henghui_Ding1", "aff": "Beijing Jiaotong University;ByteDance;;Beijing Jiaotong University;Beijing Jiaotong University;Nanyang Technological University", "aff_domain": "bjtu.edu.cn;bytedance.com;;bjtu.edu.cn;bjtu.edu.cn;ntu.edu.sg", "position": "Undergrad student;Researcher;;Full Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nwang2023segrefiner,\ntitle={SegRefiner: Towards Model-Agnostic Segmentation Refinement with Discrete Diffusion Process},\nauthor={Mengyu Wang and Henghui Ding and Jun Hao Liew and Jiajun Liu and Yao Zhao and Yunchao Wei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Q9CNA7B7v2}\n}", "github": "", "project": "", "reviewers": "Xhma;oFWK;cXkh;cPQx;ygQQ", "pdf_size": 47528955, "rating": "6;6;7;7;8", "confidence": "3;4;4;3;4", "soundness": "3;3;3;4;4", "novelty": "3;3;2;3;4", "presentation": "3;3;3;3;3", "wc_summary": "78;167;52;152;71", "wc_strengths": "165;70;21;40;154", "wc_weaknesses": "152;122;121;82;11", "wc_questions": "145;39;3;5;51", "wc_limitations": "22;15;51;9;53", "wc_review": "562;413;248;288;340", "wc_reply_reviewers": "16;28;193;15;0", "wc_reply_authors": "0;0;761;0;0", "reply_reviewers": "1;1;2;1;0", "reply_authors": "1;1;3;1;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 104.0, 46.35083602266522 ], "wc_strengths_avg": [ 90.0, 58.96100406200695 ], "wc_weaknesses_avg": [ 97.6, 48.67278500353149 ], "wc_questions_avg": [ 48.6, 51.71305444469511 ], "wc_limitations_avg": [ 30.0, 18.439088914585774 ], "wc_review_avg": [ 370.2, 110.65333253002369 ], "wc_reply_reviewers_avg": [ 50.4, 71.8515135539955 ], "wc_reply_authors_avg": [ 152.2, 304.4 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3273268353539886, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10820101128826270783&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "bjtu.edu.cn;bytedance.com;;bjtu.edu.cn;bjtu.edu.cn;ntu.edu.sg", "author_num": 6, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Beijing Jiao Tong University;ByteDance;Nanyang Technological University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.njtu.edu.cn/en;https://www.bytedance.com;https://www.ntu.edu.sg", "aff_unique_abbr": "BJTU;ByteDance;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "DiffAttack: Evasion Attacks Against Diffusion-Based Adversarial Purification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71718", "id": "QB7ot7p6j7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ea0b28cbbd0cbc45ec4ac38e92da9cb2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QB7ot7p6j7", "openreview": "https://openreview.net/forum?id=QB7ot7p6j7", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71718", "video": "https://nips.cc/virtual/2023/poster/71718", "author_site": "Mintong Kang, Dawn Song, Bo Li", "tldr": "", "abstract": "Diffusion-based purification defenses leverage diffusion models to remove crafted perturbations of adversarial examples and achieve state-of-the-art robustness. Recent studies show that even advanced attacks cannot break such defenses effectively, since the purification process induces an extremely deep computational graph which poses the potential problem of gradient obfuscation, high memory cost, and unbounded randomness. In this paper, we propose a unified framework DiffAttack to perform effective and efficient attacks against diffusion-based purification defenses, including both DDPM and score-based approaches. In particular, we propose a deviated-reconstruction loss at intermediate diffusion steps to induce inaccurate density gradient estimation to tackle the problem of vanishing/exploding gradients. We also provide a segment-wise forwarding-backwarding algorithm, which leads to memory-efficient gradient backpropagation. We validate the attack effectiveness of DiffAttack compared with existing adaptive attacks on CIFAR-10 and ImageNet. We show that DiffAttack decreases the robust accuracy of models compared with SOTA attacks by over 20\\% on CIFAR-10 under $\\ell_\\infty$ attack $(\\epsilon=8/255)$, and over 10\\% on ImageNet under $\\ell_\\infty$ attack $(\\epsilon=4/255)$. We conduct a series of ablations studies, and we find 1) DiffAttack with the deviated-reconstruction loss added over uniformly sampled time steps is more effective than that added over only initial/final steps, and 2) diffusion-based purification with a moderate diffusion length is more robust under DiffAttack.", "keywords": "adversarial attack;adversarial purification;adversarial robustness;diffusion model", "primary_area": "", "supplementary_material": "", "author": "Mintong Kang;Dawn Song;Bo Li", "authorids": "~Mintong_Kang1;~Dawn_Song1;~Bo_Li19", "gender": "M;F;F", "homepage": "https://kangmintong.github.io/;;http://boli.cs.illinois.edu/", "dblp": "303/0335.html;s/DXSong;50/3402-26", "google_scholar": "oHXw2SAAAAAJ;;K8vJkTcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Mintong_Kang1;~Dawn_Song1;~Bo_Li19", "aff": "University of Illinois, Urbana Champaign;University of California, Berkeley;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;berkeley.edu;illinois.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nkang2023diffattack,\ntitle={DiffAttack: Evasion Attacks Against Diffusion-Based Adversarial Purification},\nauthor={Mintong Kang and Dawn Song and Bo Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QB7ot7p6j7}\n}", "github": "", "project": "", "reviewers": "Vu7G;3df8;yE5n;EofG", "pdf_size": 1645773, "rating": "5;5;6;6", "confidence": "2;4;4;3", "soundness": "3;3;2;4", "novelty": "3;2;2;3", "presentation": "3;3;1;3", "wc_summary": "63;128;122;77", "wc_strengths": "53;51;197;22", "wc_weaknesses": "191;337;1393;60", "wc_questions": "3;86;47;26", "wc_limitations": "1;20;1;1", "wc_review": "311;622;1760;186", "wc_reply_reviewers": "30;182;567;14", "wc_reply_authors": "0;414;986;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;2;4;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 97.5, 28.02231253840411 ], "wc_strengths_avg": [ 80.75, 68.22893447797642 ], "wc_weaknesses_avg": [ 495.25, 527.4961492750444 ], "wc_questions_avg": [ 40.5, 30.532769281544052 ], "wc_limitations_avg": [ 5.75, 8.227241335952167 ], "wc_review_avg": [ 719.75, 621.2166993087034 ], "wc_reply_reviewers_avg": [ 198.25, 222.76487043517432 ], "wc_reply_authors_avg": [ 350.0, 404.2251847671048 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15982365209760644095&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "illinois.edu;berkeley.edu;illinois.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.berkeley.edu", "aff_unique_abbr": "UIUC;UC Berkeley", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Urbana-Champaign;Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Nearly Tight Bounds For Differentially Private Multiway Cut", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71717", "id": "QDByreuQyk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4e8f257e054abd24c550d55e57cec274-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QDByreuQyk", "openreview": "https://openreview.net/forum?id=QDByreuQyk", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71717", "video": "https://nips.cc/virtual/2023/poster/71717", "author_site": "Mina Dalirrooyfard, Slobodan Mitrovic, Yuriy Nevmyvaka", "tldr": "", "abstract": "Finding min $s$-$t$ cuts in graphs is a basic algorithmic tool, with applications in image segmentation, community detection, reinforcement learning, and data clustering. In this problem, we are given two nodes as terminals and the goal is to remove the smallest number of edges from the graph so that these two terminals are disconnected. We study the complexity of differential privacy for the min $s$-$t$ cut problem and show nearly tight lower and upper bounds where we achieve privacy at no cost for running time efficiency. We also develop a differentially private algorithm for the multiway $k$-cut problem, in which we are given $k$ nodes as terminals that we would like to disconnect.\n As a function of $k$, we obtain privacy guarantees that are exponentially more efficient than applying the advanced composition theorem to known algorithms for multiway $k$-cut.\n Finally, we empirically evaluate the approximation of our differentially private min $s$-$t$ cut algorithm and show that it almost matches the quality of the output of non-private ones.", "keywords": "Differential Privacy;clustering;multiway cut;min cut;graph partitioning", "primary_area": "", "supplementary_material": "/attachment/38ad5946889dc2c42415737fb2f5de8490beb750.pdf", "author": "Mina Dalirrooyfard;Slobodan Mitrovic;Yuriy Nevmyvaka", "authorids": "~Mina_Dalirrooyfard1;~Slobodan_Mitrovic1;~Yuriy_Nevmyvaka1", "gender": "F;;", "homepage": "https://ca.linkedin.com/in/mina-dalirrooyfard-6691a153?original_referer=https%3A%2F%2Fwww.google.com%2F;;", "dblp": "209/5851;;92/1859", "google_scholar": "vatvqfAAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Mina_Dalirrooyfard1;~Slobodan_Mitrovic1;~Yuriy_Nevmyvaka1", "aff": "Morgan Stanley;;Morgan Stanley", "aff_domain": "morganstanley.com;;morganstanley.com", "position": "Researcher;;Principal Researcher", "bibtex": "@inproceedings{\ndalirrooyfard2023nearly,\ntitle={Nearly Tight Bounds For Differentially Private Multiway Cut},\nauthor={Mina Dalirrooyfard and Slobodan Mitrovic and Yuriy Nevmyvaka},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QDByreuQyk}\n}", "github": "", "project": "", "reviewers": "PcBW;u9Zr;pWxc", "pdf_size": 422237, "rating": "6;6;8", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "4;4;4", "wc_summary": "95;79;19", "wc_strengths": "86;29;50", "wc_weaknesses": "49;168;4", "wc_questions": "120;54;113", "wc_limitations": "9;1;1", "wc_review": "359;331;187", "wc_reply_reviewers": "22;74;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 4.0, 0.0 ], "wc_summary_avg": [ 64.33333333333333, 32.71425105702746 ], "wc_strengths_avg": [ 55.0, 23.53720459187964 ], "wc_weaknesses_avg": [ 73.66666666666667, 69.18734630616272 ], "wc_questions_avg": [ 95.66666666666667, 29.6010510323911 ], "wc_limitations_avg": [ 3.6666666666666665, 3.7712361663282534 ], "wc_review_avg": [ 292.3333333333333, 75.35397947170556 ], "wc_reply_reviewers_avg": [ 32.0, 31.026870075253587 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13393163592472064502&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "morganstanley.com;;morganstanley.com", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Morgan Stanley", "aff_unique_dep": "", "aff_unique_url": "https://www.morganstanley.com", "aff_unique_abbr": "Morgan Stanley", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "YouTube-ASL: A Large-Scale, Open-Domain American Sign Language-English Parallel Corpus", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73599", "id": "QEDjXv9OyY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5c61452daca5f0c260e683b317d13a3f-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=QEDjXv9OyY", "openreview": "https://openreview.net/forum?id=QEDjXv9OyY", "poster": "/media/PosterPDFs/NeurIPS%202023/73599.png?t=1697661937.0858037", "slides": "https://nips.cc/virtual/2023/poster/73599", "video": "https://nips.cc/virtual/2023/poster/73599", "author_site": "Dave Uthus, Garrett Tanzer, Manfred Georg", "tldr": "", "abstract": "Machine learning for sign languages is bottlenecked by data. In this paper, we present YouTube-ASL, a large-scale, open-domain corpus of American Sign Language (ASL) videos and accompanying English captions drawn from YouTube. With ~1000 hours of videos and >2500 unique signers, YouTube-ASL is ~3x as large and has ~10x as many unique signers as the largest prior ASL dataset. We train baseline models for ASL to English translation on YouTube-ASL and evaluate them on How2Sign, where we achieve a new fine-tuned state of the art of 12.397 BLEU and, for the first time, nontrivial zero-shot results.", "keywords": "Sign language translation;dataset", "primary_area": "", "supplementary_material": "/attachment/62fc1c35806e7321ab435ab8f12a166a9e5ffd29.pdf", "author": "David Uthus;Garrett Tanzer;Manfred Georg", "authorids": "~David_Uthus1;~Garrett_Tanzer1;~Manfred_Georg2", "gender": ";M;M", "homepage": ";;", "dblp": "09/2971.html;238/9928.html;42/6239", "google_scholar": "9k31iVQAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;", "linkedin": ";;", "or_profile": "~David_Uthus1;~Garrett_Tanzer1;~Manfred_Georg2", "aff": "Google;Google;Google", "aff_domain": "google.com;google.com;google.com", "position": "Software Engineer;Researcher;Researcher", "bibtex": "@inproceedings{\nuthus2023youtubeasl,\ntitle={YouTube-{ASL}: A Large-Scale, Open-Domain American Sign Language-English Parallel Corpus},\nauthor={David Uthus and Garrett Tanzer and Manfred Georg},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=QEDjXv9OyY}\n}", "github": "", "project": "", "reviewers": "3tD6;HbjK;Px9y;f1hs;t1a9", "pdf_size": 283952, "rating": "5;5;6;7;9", "confidence": "5;5;4;3;4", "wc_summary_and_contributions": "61;75;86;28;51", "wc_strengths": "30;45;66;21;82", "wc_improvement": "93;94;119;90;11", "wc_limitations": "111;6;1;12;11", "wc_correctness": "16;4;1;4;8", "wc_clarity": "7;5;1;24;8", "wc_relation_to_prior_work": "216;8;1;1;9", "wc_documentation": "34;4;30;1;22", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "569;242;306;182;203", "wc_reply_reviewers": "0;16;0;0;0", "wc_reply_authors": "996;520;455;285;6", "reply_reviewers": "0;1;0;0;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.4, 1.4966629547095764 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 60.2, 20.033971149025845 ], "wc_strengths_avg": [ 48.8, 22.533530571128885 ], "wc_improvement_avg": [ 81.4, 36.707492423209736 ], "wc_limitations_avg": [ 28.2, 41.58557442190741 ], "wc_correctness_avg": [ 6.6, 5.2 ], "wc_clarity_avg": [ 9.0, 7.874007874011811 ], "wc_relation_to_prior_work_avg": [ 47.0, 84.56713309554723 ], "wc_documentation_avg": [ 18.2, 13.422369388450013 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 300.4, 140.7786915694275 ], "wc_reply_reviewers_avg": [ 3.2, 6.400000000000001 ], "wc_reply_authors_avg": [ 452.4, 324.76859454078993 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6071428571428572, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7650186718440627975&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "google.com;google.com;google.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "QFcE9QGP5I", "title": "Adaptive Quasi-Newton and Anderson Acceleration Framework with Explicit Global Convergence Rates", "track": "main", "status": "Reject", "tldr": "", "abstract": "Despite the impressive numerical performance of quasi-Newton and Anderson/nonlinear acceleration methods, their global convergence rates have remained elusive for over 50 years. This paper addresses this long-standing question by introducing a framework that derives novel and adaptive quasi-Newton or nonlinear/Anderson acceleration schemes. Under mild assumptions, the proposed iterative methods exhibit explicit, non-asymptotic convergence rates that blend those of gradient descent and Cubic Regularized Newton's method. Notably, these rates are achieved adaptively, as the method autonomously determines the optimal step size using a simple backtracking strategy. The proposed approach also includes an accelerated version that improves the convergence rate on convex functions. Numerical experiments demonstrate the efficiency of the proposed framework, even compared to a fine-tuned BFGS algorithm with line search.", "keywords": "Quasi Newton;Nonlinear acceleration;Accelerated;Convergence;Rate;Adaptive;Nonasymptotic", "primary_area": "", "supplementary_material": "/attachment/30b18e047683e73c78288d5e630090ebfd18b126.pdf", "author": "Damien Scieur", "authorids": "~Damien_Scieur3", "gender": "M", "homepage": "https://damienscieur.com/", "dblp": "191/6712", "google_scholar": "https://scholar.google.fr/citations?user=hNscQzgAAAAJ", "orcid": "", "linkedin": "damien-scieur-6873ba82/", "or_profile": "~Damien_Scieur3", "aff": "Samsung", "aff_domain": "samsung.com", "position": "Researcher", "bibtex": "@misc{\nscieur2023adaptive,\ntitle={Adaptive Quasi-Newton and Anderson Acceleration Framework with Explicit Global Convergence Rates},\nauthor={Damien Scieur},\nyear={2023},\nurl={https://openreview.net/forum?id=QFcE9QGP5I}\n}", "github": "", "project": "", "reviewers": "4Qfc;SDpo;FnQv;dfNf", "site": "https://openreview.net/forum?id=QFcE9QGP5I", "pdf_size": 367735, "rating": "4;4;6;7", "confidence": "3;4;4;4", "soundness": "2;2;3;3", "novelty": "3;2;3;3", "presentation": "2;2;2;3", "wc_summary": "88;37;89;66", "wc_strengths": "83;26;85;59", "wc_weaknesses": "634;1058;508;70", "wc_questions": "92;7;196;207", "wc_limitations": "22;7;1;27", "wc_review": "919;1135;879;429", "wc_reply_reviewers": "16;9;82;22", "wc_reply_authors": "110;108;238;23", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 70.0, 21.15419580130618 ], "wc_strengths_avg": [ 63.25, 23.81569860407206 ], "wc_weaknesses_avg": [ 567.5, 352.1572802030366 ], "wc_questions_avg": [ 125.5, 81.81839646436491 ], "wc_limitations_avg": [ 14.25, 10.615436872781073 ], "wc_review_avg": [ 840.5, 256.7620493764606 ], "wc_reply_reviewers_avg": [ 32.25, 29.089302157322372 ], "wc_reply_authors_avg": [ 119.75, 76.77361200308346 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:djmN-0ENMDcJ:scholar.google.com/&scioq=Adaptive+Quasi-Newton+and+Anderson+Acceleration+Framework+with+Explicit+Global+Convergence+Rates&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Samsung", "aff_unique_dep": "Samsung", "aff_unique_url": "https://www.samsung.com", "aff_unique_abbr": "Samsung", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "title": "CAT-Walk: Inductive Hypergraph Learning via Set Walks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71716", "id": "QG4nJBNEar", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6739d8df16b5bce3587ca5f18662a6aa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QG4nJBNEar", "openreview": "https://openreview.net/forum?id=QG4nJBNEar", "poster": "/media/PosterPDFs/NeurIPS%202023/71716.png?t=1702393462.3904421", "slides": "https://nips.cc/virtual/2023/poster/71716", "video": "https://nips.cc/virtual/2023/poster/71716", "author_site": "Ali Behrouz, Farnoosh Hashemi, Sadaf Sadeghian, Margo Seltzer", "tldr": "", "abstract": "Temporal hypergraphs provide a powerful paradigm for modeling time-dependent, higher-order interactions in complex systems. Representation learning for hypergraphs is essential for extracting patterns of the higher-order interactions that are critically important in real-world problems in social network analysis, neuroscience, finance, etc. However, existing methods are typically designed only for specific tasks or static hypergraphs. We present CAT-Walk, an inductive method that learns the underlying dynamic laws that govern the temporal and structural processes underlying a temporal hypergraph. CAT-Walk introduces a temporal, higher-order walk on hypergraphs, SetWalk, that extracts higher-order causal patterns. CAT-Walk uses a novel adaptive and permutation invariant pooling strategy, SetMixer, along with a set-based anonymization process that hides the identity of hyperedges. Finally, we present a simple yet effective neural network model to encode hyperedges. Our evaluation on 10 hypergraph benchmark datasets shows that CAT-Walk attains outstanding performance on temporal hyperedge prediction benchmarks in both inductive and transductive settings. It also shows competitive performance with state-of-the-art methods for node classification. (https://github.com/ubc-systopia/CATWalk)", "keywords": "Hypergraph Learning;Temporal Networks;Higher-order Temporal Motifs;Inductive Representation Learning", "primary_area": "", "supplementary_material": "/attachment/b04d42a747286811000db4364482f0f7256decbe.zip", "author": "Ali Behrouz;Farnoosh Hashemi;Sadaf Sadeghian;Margo Seltzer", "authorids": "~Ali_Behrouz1;~Farnoosh_Hashemi1;~Sadaf_Sadeghian1;~Margo_Seltzer1", "gender": "M;F;F;F", "homepage": "https://Abehrouz.github.io;https://farnooshha.github.io/;;https://www.seltzer.com/margo", "dblp": "220/4163;318/9574;277/2578;s/MargoISeltzer", "google_scholar": "UbwVuqIAAAAJ;https://scholar.google.com/citations?hl=en;6xYHgP0AAAAJ;https://scholar.google.com.tw/citations?user=XeyiyUYAAAAJ", "orcid": ";;;", "linkedin": "ali-behrouz-506aa2127;farnoosh-hashemi-a48328123/;sadaf-sadeghian-53b8b4174/;", "or_profile": "~Ali_Behrouz1;~Farnoosh_Hashemi1;~Sadaf_Sadeghian1;~Margo_I._Seltzer1", "aff": "University of British Columbia;University of British Columbia;University of British Columbia;University of British Columbia", "aff_domain": "cs.ubc.ca;ubc.ca;cs.ubc.ca;ubc.ca", "position": "MS student;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nbehrouz2023catwalk,\ntitle={{CAT}-Walk: Inductive Hypergraph Learning via Set Walks},\nauthor={Ali Behrouz and Farnoosh Hashemi and Sadaf Sadeghian and Margo Seltzer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QG4nJBNEar}\n}", "github": "", "project": "", "reviewers": "H3jP;eydA;nNT7;BLYu", "pdf_size": 1880504, "rating": "6;7;7;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;4;4;3", "wc_summary": "45;151;57;117", "wc_strengths": "82;128;64;76", "wc_weaknesses": "111;449;165;53", "wc_questions": "13;10;64;31", "wc_limitations": "1;9;17;9", "wc_review": "252;747;367;286", "wc_reply_reviewers": "0;18;8;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 92.5, 43.41370751271999 ], "wc_strengths_avg": [ 87.5, 24.264171117101856 ], "wc_weaknesses_avg": [ 194.5, 152.17999211460094 ], "wc_questions_avg": [ 29.5, 21.47673159491453 ], "wc_limitations_avg": [ 9.0, 5.656854249492381 ], "wc_review_avg": [ 413.0, 197.30813465237566 ], "wc_reply_reviewers_avg": [ 9.5, 6.5383484153110105 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3902504454773115353&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "cs.ubc.ca;ubc.ca;cs.ubc.ca;ubc.ca", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Neural Oscillators are Universal", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71715", "id": "QGQsOZcQ2H", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/923285deb805c3e14e1aeebc9854d644-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QGQsOZcQ2H", "openreview": "https://openreview.net/forum?id=QGQsOZcQ2H", "poster": "/media/PosterPDFs/NeurIPS%202023/71715.png?t=1702335107.0592122", "slides": "https://nips.cc/virtual/2023/poster/71715", "video": "https://nips.cc/virtual/2023/poster/71715", "author_site": "Samuel Lanthaler, T. Konstantin Rusch, Siddhartha Mishra", "tldr": "", "abstract": "Coupled oscillators are being increasingly used as the basis of machine learning (ML) architectures, for instance in sequence modeling, graph representation learning and in physical neural networks that are used in analog ML devices. We introduce an abstract class of *neural oscillators* that encompasses these architectures and prove that neural oscillators are universal, i.e, they can approximate any continuous and casual operator mapping between time-varying functions, to desired accuracy. This universality result provides theoretical justification for the use of oscillator based ML systems. The proof builds on a fundamental result of independent interest, which shows that a combination of forced harmonic oscillators with a nonlinear read-out suffices to approximate the underlying operators.", "keywords": "neural ODE;universal approximation;oscillator", "primary_area": "", "supplementary_material": "/attachment/0455a6141527f1f874a611352106687daa1183fc.pdf", "author": "Samuel Lanthaler;T. Konstantin Rusch;Siddhartha Mishra", "authorids": "~Samuel_Lanthaler1;~T._Konstantin_Rusch1;~Siddhartha_Mishra1", "gender": "M;;M", "homepage": "https://slanthaler.github.io/;https://konstantinrusch.com;http://www.sam.math.ethz.ch/", "dblp": ";266/1519;07/2856.html", "google_scholar": "v-Jv3LoAAAAJ;9LajlSsAAAAJ;FmEqyNcAAAAJ", "orcid": "0000-0003-1911-246X;;", "linkedin": ";;", "or_profile": "~Samuel_Lanthaler1;~T._Konstantin_Rusch1;~Siddhartha_Mishra1", "aff": "California Institute of Technology;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "caltech.edu;ethz.ch;ethz.ch", "position": "Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nlanthaler2023neural,\ntitle={Neural Oscillators are Universal},\nauthor={Samuel Lanthaler and T. Konstantin Rusch and Siddhartha Mishra},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QGQsOZcQ2H}\n}", "github": "", "project": "", "reviewers": "vPvT;QtjX;y7S4;Ad2m", "pdf_size": 504277, "rating": "6;6;7;7", "confidence": "3;3;3;4", "soundness": "3;2;3;3", "novelty": "3;2;3;4", "presentation": "3;3;4;4", "wc_summary": "57;42;81;190", "wc_strengths": "49;76;60;52", "wc_weaknesses": "20;107;97;161", "wc_questions": "54;41;150;78", "wc_limitations": "1;7;1;67", "wc_review": "181;273;389;548", "wc_reply_reviewers": "13;35;25;17", "wc_reply_authors": "23;14;20;15", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 92.5, 57.984911830578824 ], "wc_strengths_avg": [ 59.25, 10.473180032826706 ], "wc_weaknesses_avg": [ 96.25, 50.30593901320201 ], "wc_questions_avg": [ 80.75, 42.12704000995085 ], "wc_limitations_avg": [ 19.0, 27.820855486487112 ], "wc_review_avg": [ 347.75, 137.1083057294488 ], "wc_reply_reviewers_avg": [ 22.5, 8.411301920630361 ], "wc_reply_authors_avg": [ 18.0, 3.6742346141747673 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17120854331681973417&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "caltech.edu;ethz.ch;ethz.ch", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "California Institute of Technology;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.caltech.edu;https://www.ethz.ch", "aff_unique_abbr": "Caltech;ETH Zurich", "aff_campus_unique_index": "0", "aff_campus_unique": "Pasadena;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Switzerland" }, { "title": "Learning Large-scale Neural Fields via Context Pruned Meta-Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71714", "id": "QGmNMtK3pQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e5b5c402bb7bd5e60bede6961d6fe39e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QGmNMtK3pQ", "openreview": "https://openreview.net/forum?id=QGmNMtK3pQ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71714", "video": "https://nips.cc/virtual/2023/poster/71714", "author_site": "Jihoon Tack, Subin Kim, Sihyun Yu, Jaeho Lee, Jinwoo Shin, Jonathan Richard Schwarz", "tldr": "", "abstract": "We introduce an efficient optimization-based meta-learning technique for large-scale neural field training by realizing significant memory savings through automated online context point selection. This is achieved by focusing each learning step on the subset of data with the highest expected immediate improvement in model quality, resulting in the almost instantaneous modeling of global structure and subsequent refinement of high-frequency details. We further improve the quality of our meta-learned initialization by introducing a bootstrap correction resulting in the minimization of any error introduced by reduced context sets while simultaneously mitigating the well-known myopia of optimization-based meta-learning. Finally, we show how gradient re-scaling at meta-test time allows the learning of extremely high-quality neural fields in significantly shortened optimization procedures. Our framework is model-agnostic, intuitive, straightforward to implement, and shows significant reconstruction improvements for a wide range of signals. We provide an extensive empirical evaluation on nine datasets across multiple multiple modalities, demonstrating state-of-the-art results while providing additional insight through careful analysis of the algorithmic components constituting our method. Code is available at https://github.com/jihoontack/GradNCP", "keywords": "Meta-Learning;Efficient Meta-Learning;Neural Fields;Implicit Neural Representations;Data Pruning", "primary_area": "", "supplementary_material": "/attachment/8bf6f4a4077fc4f8eff28c5d1c7cc05c9a178ade.zip", "author": "Jihoon Tack;Subin Kim;Sihyun Yu;Jaeho Lee;Jinwoo Shin;Jonathan Richard Schwarz", "authorids": "~Jihoon_Tack1;~Subin_Kim2;~Sihyun_Yu2;~Jaeho_Lee3;~Jinwoo_Shin1;~Jonathan_Richard_Schwarz1", "gender": "M;F;M;M;M;M", "homepage": "https://jihoontack.github.io;https://subin-kim-cv.github.io/;https://sihyun-yu.github.io;https://jaeho-lee.github.io;https://sites.google.com/site/mijirim/;https://jonathan-schwarz.github.io", "dblp": "267/5487;183/9520-1.html;287/4627;78/6080-1;31/7062;211/7673", "google_scholar": "eW8-OT4AAAAJ;https://scholar.google.co.kr/citations?user=gdhIzYUAAAAJ;https://scholar.google.com/citations?hl=en;t91zoQMAAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ;Efs3XxQAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;schwarzjonathan/", "or_profile": "~Jihoon_Tack1;~Subin_Kim2;~Sihyun_Yu2;~Jaeho_Lee3;~Jinwoo_Shin1;~Jonathan_Schwarz1", "aff": "University of Oxford;Korea Advanced Institute of Science & Technology;NVIDIA;Pohang University of Science and Technology;Korea Advanced Institute of Science & Technology;Google DeepMind", "aff_domain": "stats.ox.ac.uk;kaist.ac.kr;nvidia.com;postech.ac.kr;kaist.ac.kr;google.com", "position": "Intern;PhD student;Intern;Assistant Professor;Full Professor;Research Scientist", "bibtex": "@inproceedings{\ntack2023learning,\ntitle={Learning Large-scale Neural Fields via Context Pruned Meta-Learning},\nauthor={Jihoon Tack and Subin Kim and Sihyun Yu and Jaeho Lee and Jinwoo Shin and Jonathan Richard Schwarz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QGmNMtK3pQ}\n}", "github": "", "project": "", "reviewers": "9F3K;EMYi;3CuA;hvG5", "pdf_size": 8605815, "rating": "5;6;7;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "66;89;75;79", "wc_strengths": "54;40;131;63", "wc_weaknesses": "427;109;85;250", "wc_questions": "8;6;107;83", "wc_limitations": "12;1;1;8", "wc_review": "567;245;399;483", "wc_reply_reviewers": "273;21;28;29", "wc_reply_authors": "1085;53;54;44", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.25, 8.257572282456872 ], "wc_strengths_avg": [ 72.0, 35.035696082709705 ], "wc_weaknesses_avg": [ 217.75, 136.2669714200767 ], "wc_questions_avg": [ 51.0, 44.81629168059312 ], "wc_limitations_avg": [ 5.5, 4.716990566028302 ], "wc_review_avg": [ 423.5, 118.948518275765 ], "wc_reply_reviewers_avg": [ 87.75, 106.99853970966146 ], "wc_reply_authors_avg": [ 309.0, 448.0407347552229 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14301502140713611898&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "stats.ox.ac.uk;kaist.ac.kr;nvidia.com;postech.ac.kr;kaist.ac.kr;google.com", "author_num": 6, "aff_unique_index": "0;1;2;3;1;4", "aff_unique_norm": "University of Oxford;Korea Advanced Institute of Science and Technology;NVIDIA;Pohang University of Science and Technology;Google", "aff_unique_dep": ";;NVIDIA Corporation;;Google DeepMind", "aff_unique_url": "https://www.ox.ac.uk;https://www.kaist.ac.kr;https://www.nvidia.com;https://www.postech.ac.kr;https://deepmind.com", "aff_unique_abbr": "Oxford;KAIST;NVIDIA;POSTECH;DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pohang", "aff_country_unique_index": "0;1;2;1;1;0", "aff_country_unique": "United Kingdom;South Korea;United States" }, { "title": "RADAR: Robust AI-Text Detection via Adversarial Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71713", "id": "QGrkbaan79", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/30e15e5941ae0cdab7ef58cc8d59a4ca-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QGrkbaan79", "openreview": "https://openreview.net/forum?id=QGrkbaan79", "poster": "/media/PosterPDFs/NeurIPS%202023/71713.png?t=1699290320.7839897", "slides": "https://nips.cc/virtual/2023/poster/71713", "video": "https://nips.cc/virtual/2023/poster/71713", "author_site": "Xiaomeng Hu, Pin-Yu Chen, Tsung-Yi Ho", "tldr": "", "abstract": "Recent advances in large language models (LLMs) and the intensifying popularity of ChatGPT-like applications have blurred the boundary of high-quality text generation between humans and machines. However, in addition to the anticipated revolutionary changes to our technology and society, the difficulty of distinguishing LLM-generated texts (AI-text) from human-generated texts poses new challenges of misuse and fairness, such as fake content generation, plagiarism, and false accusations of innocent writers. While existing works show that current AI-text detectors are not robust to LLM-based paraphrasing, this paper aims to bridge this gap by proposing a new framework called RADAR, which jointly trains a $\\underline{r}$obust $\\underline{A}$I-text $\\underline{d}$etector via $\\underline{a}$dversarial lea$\\underline{r}$ning. RADAR is based on adversarial training of a paraphraser and a detector. The paraphraser's goal is to generate realistic content to evade AI-text detection.\nRADAR uses the feedback from the detector to update the paraphraser, and vice versa.\nEvaluated with 8 different LLMs (Pythia, Dolly 2.0, Palmyra, Camel, GPT-J, Dolly 1.0, LLaMA, and Vicuna) across 4 datasets, experimental results show that RADAR significantly outperforms existing AI-text detection methods, especially when paraphrasing is in place. We also identify the strong transferability of RADAR from instruction-tuned LLMs to other LLMs, and evaluate the improved capability of RADAR via GPT-3.5-Turbo.", "keywords": "Large Language Models;Text Detection;Adversarial Learning;Paraphrase", "primary_area": "", "supplementary_material": "/attachment/ba50750aecf551ec184c844b5f6d2549f2cce1b3.zip", "author": "Xiaomeng Hu;Pin-Yu Chen;Tsung-Yi Ho", "authorids": "~Xiaomeng_Hu1;~Pin-Yu_Chen1;~Tsung-Yi_Ho2", "gender": "M;M;M", "homepage": "https://gregxmhu.github.io/;http://www.pinyuchen.com;https://www.cse.cuhk.edu.hk/people/faculty/tsung-yi-ho/", "dblp": "319/7072;39/8969;63/4181.html", "google_scholar": "u6pbsnkAAAAJ;jxwlCUUAAAAJ;TRDUYkAAAAAJ", "orcid": ";0000-0003-1039-8369;0000-0001-7348-5625", "linkedin": "xiaomeng-hu-greg/;pin-yu-chen-940062a2;", "or_profile": "~Xiaomeng_Hu1;~Pin-Yu_Chen1;~Tsung-Yi_Ho2", "aff": "Northeastern University;International Business Machines;Department of Computer Science and Engineering, The Chinese University of Hong Kong", "aff_domain": "neu.edu.cn;ibm.com;cse.cuhk.edu.hk", "position": "Undergrad student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nhu2023radar,\ntitle={{RADAR}: Robust {AI}-Text Detection via Adversarial Learning},\nauthor={Xiaomeng Hu and Pin-Yu Chen and Tsung-Yi Ho},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QGrkbaan79}\n}", "github": "", "project": "", "reviewers": "uRZ4;acyW;M6sT;8fAM", "pdf_size": 1683182, "rating": "5;6;6;6", "confidence": "3;4;4;3", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "113;94;101;198", "wc_strengths": "25;64;62;61", "wc_weaknesses": "57;172;170;157", "wc_questions": "63;36;181;60", "wc_limitations": "6;5;106;30", "wc_review": "264;371;620;506", "wc_reply_reviewers": "13;18;182;46", "wc_reply_authors": "23;13;117;36", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 126.5, 41.8359892915179 ], "wc_strengths_avg": [ 53.0, 16.20185174601965 ], "wc_weaknesses_avg": [ 139.0, 47.69171835864168 ], "wc_questions_avg": [ 85.0, 56.404787030889494 ], "wc_limitations_avg": [ 36.75, 41.215136782497765 ], "wc_review_avg": [ 440.25, 134.62238855405886 ], "wc_reply_reviewers_avg": [ 64.75, 68.85265063888245 ], "wc_reply_authors_avg": [ 47.25, 41.08755894428385 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 128, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11493215780708036476&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "neu.edu.cn;ibm.com;cse.cuhk.edu.hk", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Northeastern University;International Business Machines Corporation;Chinese University of Hong Kong", "aff_unique_dep": ";;Department of Computer Science and Engineering", "aff_unique_url": "https://www.northeastern.edu;https://www.ibm.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "NEU;IBM;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;China" }, { "title": "Lossy Image Compression with Conditional Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71712", "id": "QIBpzaDCAv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ccf6d8b4a1fe9d9c8192f00c713872ea-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QIBpzaDCAv", "openreview": "https://openreview.net/forum?id=QIBpzaDCAv", "poster": "/media/PosterPDFs/NeurIPS%202023/71712.png?t=1701470149.1149695", "slides": "https://nips.cc/virtual/2023/poster/71712", "video": "https://nips.cc/virtual/2023/poster/71712", "author_site": "Ruihan Yang, Stephan Mandt", "tldr": "", "abstract": "This paper outlines an end-to-end optimized lossy image compression framework using diffusion generative models. The approach relies on the transform coding paradigm, where an image is mapped into a latent space for entropy coding and, from there, mapped back to the data space for reconstruction. In contrast to VAE-based neural compression, where the (mean) decoder is a deterministic neural network, our decoder is a conditional diffusion model. Our approach thus introduces an additional \"content\" latent variable on which the reverse diffusion process is conditioned and uses this variable to store information about the image. The remaining ``texture'' variables characterizing the diffusion process are synthesized at decoding time. We show that the model's performance can be tuned toward perceptual metrics of interest. Our extensive experiments involving multiple datasets and image quality assessment metrics show that our approach yields stronger reported FID scores than the GAN-based model, while also yielding competitive performance with VAE-based models in several distortion metrics. Furthermore, training the diffusion with $\\mathcal{X}$-parameterization enables high-quality reconstructions in only a handful of decoding steps, greatly affecting the model's practicality. Our code is available at: https://github.com/buggyyang/CDC_compression", "keywords": "generative model;diffusion model;image compression;computer vision", "primary_area": "", "supplementary_material": "", "author": "Ruihan Yang;Stephan Mandt", "authorids": "~Ruihan_Yang1;~Stephan_Mandt1", "gender": "M;M", "homepage": ";https://www.stephanmandt.com", "dblp": "225/4834;147/5018", "google_scholar": "mWEXfLwAAAAJ;HOrGe7wAAAAJ", "orcid": ";", "linkedin": ";stephan-mandt-8702795a/", "or_profile": "~Ruihan_Yang1;~Stephan_M_Mandt1", "aff": "Microsoft;University of California, Irvine", "aff_domain": "microsoft.com;uci.edu", "position": "Intern;Associate Professor", "bibtex": "@inproceedings{\nyang2023lossy,\ntitle={Lossy Image Compression with Conditional Diffusion Models},\nauthor={Ruihan Yang and Stephan Mandt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QIBpzaDCAv}\n}", "github": "", "project": "", "reviewers": "wzSD;QC9o;kiiX;9a7p;ys1i", "pdf_size": 14864140, "rating": "5;5;5;6;6", "confidence": "4;4;5;3;3", "soundness": "2;3;3;3;4", "novelty": "3;3;3;3;3", "presentation": "2;3;4;2;4", "wc_summary": "87;85;93;99;173", "wc_strengths": "32;74;51;104;110", "wc_weaknesses": "274;226;179;218;147", "wc_questions": "8;2;56;43;140", "wc_limitations": "41;6;1;29;2", "wc_review": "442;393;380;493;572", "wc_reply_reviewers": "0;354;24;18;0", "wc_reply_authors": "0;251;0;0;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 107.4, 33.163835725078606 ], "wc_strengths_avg": [ 74.2, 29.962643408084006 ], "wc_weaknesses_avg": [ 208.8, 43.217589011882644 ], "wc_questions_avg": [ 49.8, 49.503131214095944 ], "wc_limitations_avg": [ 15.8, 16.216041440499588 ], "wc_review_avg": [ 456.0, 70.3789741329042 ], "wc_reply_reviewers_avg": [ 79.2, 137.73365601769237 ], "wc_reply_authors_avg": [ 50.2, 100.4 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8728715609439693, "gs_citation": 133, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13266797142213475647&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "microsoft.com;uci.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Microsoft;University of California, Irvine", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.uci.edu", "aff_unique_abbr": "Microsoft;UCI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Irvine", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Causal normalizing flows: from theory to practice", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71711", "id": "QIFoCI7ca1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b8402301e7f06bdc97a31bfaa653dc32-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QIFoCI7ca1", "openreview": "https://openreview.net/forum?id=QIFoCI7ca1", "poster": "/media/PosterPDFs/NeurIPS%202023/71711.png?t=1701945308.5581896", "slides": "https://nips.cc/virtual/2023/poster/71711", "video": "https://nips.cc/virtual/2023/poster/71711", "author_site": "Adri\u00e1n Javaloy, Pablo Sanchez-Martin, Isabel Valera", "tldr": "", "abstract": "In this work, we deepen on the use of normalizing flows for causal reasoning. Specifically, we first leverage recent results on non-linear ICA to show that causal models are identifiable from observational data given a causal ordering, and thus can be recovered using autoregressive normalizing flows (NFs). Second, we analyze different design and learning choices for *causal normalizing flows* to capture the underlying causal data-generating process. Third, we describe how to implement the *do-operator* in causal NFs, and thus, how to answer interventional and counterfactual questions. Finally, in our experiments, we validate our design and training choices through a comprehensive ablation study; compare causal NFs to other approaches for approximating causal models; and empirically demonstrate that causal NFs can be used to address real-world problems\u2014where the presence of mixed discrete-continuous data and partial knowledge on the causal graph is the norm. The code for this work can be found at https://github.com/psanch21/causal-flows.", "keywords": "causality;causal inference;normalizing flows;identifiability;interventions;counterfactuals", "primary_area": "", "supplementary_material": "/attachment/9a621aeae59f146973dd162520840e5f8581d9ca.zip", "author": "Adri\u00e1n Javaloy;Pablo Sanchez Martin;Isabel Valera", "authorids": "~Adri\u00e1n_Javaloy1;~Pablo_Sanchez_Martin1;~Isabel_Valera1", "gender": "M;M;F", "homepage": "https://adrianjav.github.io;https://www.is.mpg.de/person/psanchez;https://ivaleram.github.io/", "dblp": "259/2011;;126/1768.html", "google_scholar": "ne3evXwAAAAJ;;https://scholar.google.es/citations?user=cpdQqpsAAAAJ", "orcid": "0000-0002-5184-4460;;", "linkedin": "adrian-javaloy;;", "or_profile": "~Adri\u00e1n_Javaloy1;~Pablo_Sanchez_Martin1;~Isabel_Valera1", "aff": "Saarland University, Saarland University;Max-Planck Institute;Universit\u00e4t des Saarlandes", "aff_domain": "cs.uni-saarland.de;mpg.tuebingen.de;uni-saarland.de", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\njavaloy2023causal,\ntitle={Causal normalizing flows: from theory to practice},\nauthor={Adri{\\'a}n Javaloy and Pablo Sanchez Martin and Isabel Valera},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QIFoCI7ca1}\n}", "github": "", "project": "", "reviewers": "yYim;p91A;515Z;azJz;1x9i", "pdf_size": 5550437, "rating": "6;7;7;8;8", "confidence": "2;3;3;3;4", "soundness": "2;3;3;3;4", "novelty": "2;3;3;4;4", "presentation": "3;3;3;3;3", "wc_summary": "308;61;73;171;70", "wc_strengths": "47;40;110;76;39", "wc_weaknesses": "86;153;64;101;21", "wc_questions": "6;2;29;2;124", "wc_limitations": "4;1;1;45;27", "wc_review": "451;257;277;395;281", "wc_reply_reviewers": "14;31;23;113;0", "wc_reply_authors": "32;20;24;0;0", "reply_reviewers": "1;1;2;1;0", "reply_authors": "2;2;2;1;1", "rating_avg": [ 7.2, 0.7483314773547882 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 136.6, 94.61204997250614 ], "wc_strengths_avg": [ 62.4, 27.339348931530907 ], "wc_weaknesses_avg": [ 85.0, 43.40046082704652 ], "wc_questions_avg": [ 32.6, 46.79145221084723 ], "wc_limitations_avg": [ 15.6, 17.636326148038883 ], "wc_review_avg": [ 332.2, 76.65611521594346 ], "wc_reply_reviewers_avg": [ 36.2, 39.756257369123674 ], "wc_reply_authors_avg": [ 15.2, 12.998461447417537 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8451542547285165, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12228300008684808168&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cs.uni-saarland.de;mpg.tuebingen.de;uni-saarland.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Saarland University;Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.;Universit\u00e4t des Saarlandes", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-saarland.de;https://www.mpg.de;https://www.uni-saarland.de", "aff_unique_abbr": "UdS;MPG;UDS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "An Optimal and Scalable Matrix Mechanism for Noisy Marginals under Convex Loss Functions", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71710", "id": "QKSejqE8Vp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/414f4c9fe9653e5de98fad6964d50315-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QKSejqE8Vp", "openreview": "https://openreview.net/forum?id=QKSejqE8Vp", "poster": "/media/PosterPDFs/NeurIPS%202023/71710.png?t=1703185124.8049006", "slides": "https://nips.cc/virtual/2023/poster/71710", "video": "https://nips.cc/virtual/2023/poster/71710", "author_site": "Yingtai Xiao, Guanlin He, Danfeng Zhang, Daniel Kifer", "tldr": "", "abstract": "Noisy marginals are a common form of confidentiality-protecting data release and are useful for many downstream tasks such as contingency table analysis, construction of Bayesian networks, and even synthetic data generation. Privacy mechanisms that provide unbiased noisy answers to linear queries (such as marginals) are known as matrix mechanisms.\n\nWe propose ResidualPlanner, a matrix mechanism for marginals with Gaussian noise that is both optimal and scalable. ResidualPlanner can optimize for many loss functions that can be written as a convex function of marginal variances (prior work was restricted to just one predefined objective function). ResidualPlanner can optimize the accuracy of marginals in large scale settings in seconds, even when the previous state of the art (HDMM) runs out of memory. It even runs on datasets with 100 attributes in a couple of minutes. Furthermore ResidualPlanner can efficiently compute variance/covariance values for each marginal (prior methods quickly run out of memory, even for relatively small datasets).", "keywords": "differential privacy;marginals;matrix mechanism;scalability", "primary_area": "", "supplementary_material": "/attachment/34aa3dc085b0473f1876f0d79ace957d1d47bee4.zip", "author": "Yingtai Xiao;Guanlin He;Danfeng Zhang;Daniel Kifer", "authorids": "~Yingtai_Xiao1;~Guanlin_He1;~Danfeng_Zhang1;~Daniel_Kifer1", "gender": "M;;M;M", "homepage": "https://jackyxiao98.github.io/;;http://www.cse.psu.edu/~dbz5017/;http://www.cse.psu.edu/~duk17/", "dblp": "244/1962;;;84/114", "google_scholar": "eCBNsH0AAAAJ;zDoizEsAAAAJ;https://scholar.google.com.tw/citations?user=rUZN-zQAAAAJ;https://scholar.google.com.tw/citations?hl=en", "orcid": "0000-0001-8964-6753;0009-0002-7621-7524;;", "linkedin": "yingtai-xiao-2b795515a/;guanlin-he-b68722a0/;;", "or_profile": "~Yingtai_Xiao1;~Guanlin_He1;~Danfeng_Zhang1;~Daniel_Kifer1", "aff": "Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;psu.edu;psu.edu", "position": "PhD student;MS student;Associate Professor;Professor", "bibtex": "@inproceedings{\nxiao2023an,\ntitle={An Optimal and Scalable Matrix Mechanism for Noisy Marginals under Convex Loss Functions},\nauthor={Yingtai Xiao and Guanlin He and Danfeng Zhang and Daniel Kifer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QKSejqE8Vp}\n}", "github": "", "project": "", "reviewers": "rP9C;zbBa;2C53;zYAs", "pdf_size": 513434, "rating": "7;7;7;9", "confidence": "4;3;3;4", "soundness": "3;3;3;4", "novelty": "3;3;4;4", "presentation": "3;2;3;4", "wc_summary": "190;90;61;376", "wc_strengths": "92;228;91;116", "wc_weaknesses": "79;388;217;91", "wc_questions": "219;3;105;16", "wc_limitations": "71;1;53;1", "wc_review": "651;710;527;600", "wc_reply_reviewers": "92;88;15;23", "wc_reply_authors": "715;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 179.25, 123.26267683285155 ], "wc_strengths_avg": [ 131.75, 56.46403722724757 ], "wc_weaknesses_avg": [ 193.75, 124.4977409433601 ], "wc_questions_avg": [ 85.75, 86.36948245763662 ], "wc_limitations_avg": [ 31.5, 31.156861202630793 ], "wc_review_avg": [ 622.0, 67.25696989903723 ], "wc_reply_reviewers_avg": [ 54.5, 35.64056677439347 ], "wc_reply_authors_avg": [ 178.75, 309.6040818529368 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11541342719739829611&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "psu.edu;psu.edu;psu.edu;psu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Where2Explore: Few-shot Affordance Learning for Unseen Novel Categories of Articulated Objects", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71709", "id": "QLllDwizVd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0e7e2af2e5ba822c9ad35a37b31b5dd4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QLllDwizVd", "openreview": "https://openreview.net/forum?id=QLllDwizVd", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71709", "video": "https://nips.cc/virtual/2023/poster/71709", "author_site": "Chuanruo Ning, Ruihai Wu, Haoran Lu, Kaichun Mo, Hao Dong", "tldr": "", "abstract": "Articulated object manipulation is a fundamental yet challenging task in robotics. Due to significant geometric and semantic variations across object categories, previous manipulation models struggle to generalize to novel categories. Few-shot learning is a promising solution for alleviating this issue by allowing robots to perform a few interactions with unseen objects. However, extant approaches often necessitate costly and inefficient test-time interactions with each unseen instance. Recognizing this limitation, we observe that despite their distinct shapes, different categories often share similar local geometries essential for manipulation, such as pullable handles and graspable edges - a factor typically underutilized in previous few-shot learning works. To harness this commonality, we introduce 'Where2Explore', an affordance learning framework that effectively explores novel categories with minimal interactions on a limited number of instances. Our framework explicitly estimates the geometric similarity across different categories, identifying local areas that differ from shapes in the training categories for efficient exploration while concurrently transferring affordance knowledge to similar parts of the objects. Extensive experiments in simulated and real-world environments demonstrate our framework's capacity for efficient few-shot exploration and generalization.", "keywords": "articulated object manipulation;few-shot learning;visual affordance for robotics", "primary_area": "", "supplementary_material": "/attachment/0fb5e86ebb0a3d5850193a3cfe65148451d3aa5b.zip", "author": "Chuanruo Ning;Ruihai Wu;Haoran Lu;Kaichun Mo;Hao Dong", "authorids": "~Chuanruo_Ning1;~Ruihai_Wu1;~Haoran_Lu2;~Kaichun_Mo1;~Hao_Dong3", "gender": "M;M;M;M;M", "homepage": "https://tritiumr.github.io;https://warshallrho.github.io/;https://luhr2003.github.io/;https://cs.stanford.edu/~kaichun/;https://zsdonghao.github.io", "dblp": "342/8955;248/8028.html;;172/1283;14/1525-3.html", "google_scholar": "jnLq85IAAAAJ;https://scholar.google.com/citations?hl=en;wNDTItAAAAAJ;pL7JsOsAAAAJ;xLFL4sMAAAAJ", "orcid": ";;;;0000-0003-2261-9122", "linkedin": ";;;;", "or_profile": "~Chuanruo_Ning1;~Ruihai_Wu1;~Haoran_Lu2;~Kaichun_Mo1;~Hao_Dong3", "aff": "Peking University;Peking University;Peking University;NVIDIA;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;nvidia.com;pku.edu.cn", "position": "Undergrad student;PhD student;Undergrad student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nning2023whereexplore,\ntitle={Where2Explore: Few-shot Affordance Learning for Unseen Novel Categories of Articulated Objects},\nauthor={Chuanruo Ning and Ruihai Wu and Haoran Lu and Kaichun Mo and Hao Dong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QLllDwizVd}\n}", "github": "", "project": "", "reviewers": "8WHp;3SiD;2ur3;WVQD;ztRo", "pdf_size": 3852736, "rating": "5;6;6;6;6", "confidence": "5;4;4;4;2", "soundness": "3;2;3;4;3", "novelty": "3;2;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "95;78;93;52;66", "wc_strengths": "58;62;75;70;44", "wc_weaknesses": "156;81;247;165;252", "wc_questions": "81;52;132;221;29", "wc_limitations": "81;9;5;14;1", "wc_review": "471;282;552;522;392", "wc_reply_reviewers": "0;0;14;11;10", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 76.8, 16.28987415543779 ], "wc_strengths_avg": [ 61.8, 10.703270528207721 ], "wc_weaknesses_avg": [ 180.2, 63.678567823090994 ], "wc_questions_avg": [ 103.0, 68.30226936200583 ], "wc_limitations_avg": [ 22.0, 29.81274895074253 ], "wc_review_avg": [ 443.8, 97.3907593152451 ], "wc_reply_reviewers_avg": [ 7.0, 5.865151319446072 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6123724356957948, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16471640380090731089&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;nvidia.com;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Peking University;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "http://www.pku.edu.cn;https://www.nvidia.com", "aff_unique_abbr": "Peking U;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Adversarial Self-Training Improves Robustness and Generalization for Gradual Domain Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71708", "id": "QNUs3Ramad", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/75b0edb869e2cd509d64d0e8ff446bc1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QNUs3Ramad", "openreview": "https://openreview.net/forum?id=QNUs3Ramad", "poster": "/media/PosterPDFs/NeurIPS%202023/71708.png?t=1701497578.458058", "slides": "https://nips.cc/virtual/2023/poster/71708", "video": "https://nips.cc/virtual/2023/poster/71708", "author_site": "Lianghe Shi, Weiwei Liu", "tldr": "", "abstract": "Gradual Domain Adaptation (GDA), in which the learner is provided with additional intermediate domains, has been theoretically and empirically studied in many contexts. Despite its vital role in security-critical scenarios, the adversarial robustness of the GDA model remains unexplored. In this paper, we adopt the effective gradual self-training method and replace vanilla self-training with adversarial self-training (AST). AST first predicts labels on the unlabeled data and then adversarially trains the model on the pseudo-labeled distribution. Intriguingly, we find that gradual AST improves not only adversarial accuracy but also clean accuracy on the target domain. We reveal that this is because adversarial training (AT) performs better than standard training when the pseudo-labels contain a portion of incorrect labels. Accordingly, we first present the generalization error bounds for gradual AST in a multiclass classification setting. We then use the optimal value of the Subset Sum Problem to bridge the standard error on a real distribution and the adversarial error on a pseudo-labeled distribution. The result indicates that AT may obtain a tighter bound than standard training on data with incorrect pseudo-labels. We further present an example of a conditional Gaussian distribution to provide more insights into why gradual AST can improve the clean accuracy for GDA.", "keywords": "learning theory", "primary_area": "", "supplementary_material": "/attachment/2b2bc7dee7287bdffb38eb963ad46923fe311350.pdf", "author": "Lianghe Shi;Weiwei Liu", "authorids": "~Lianghe_Shi1;~Weiwei_Liu1", "gender": ";M", "homepage": ";https://sites.google.com/site/weiweiliuhomepage/", "dblp": ";54/6677-3.html", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN", "orcid": ";", "linkedin": ";weiwei-liu-4a7849134/", "or_profile": "~Lianghe_Shi1;~Weiwei_Liu1", "aff": ";Wuhan University", "aff_domain": ";whu.edu.cn", "position": ";Full Professor", "bibtex": "@inproceedings{\nshi2023adversarial,\ntitle={Adversarial Self-Training Improves Robustness and Generalization for Gradual Domain Adaptation},\nauthor={Lianghe Shi and Weiwei Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QNUs3Ramad}\n}", "github": "", "project": "", "reviewers": "z2iW;oh7v;QQok;gQtX", "pdf_size": 532617, "rating": "5;6;6;6", "confidence": "3;4;3;4", "soundness": "2;4;3;3", "novelty": "2;3;3;3", "presentation": "3;4;4;3", "wc_summary": "95;94;47;174", "wc_strengths": "58;39;103;148", "wc_weaknesses": "148;62;75;118", "wc_questions": "18;48;41;10", "wc_limitations": "2;6;7;15", "wc_review": "321;249;273;465", "wc_reply_reviewers": "88;18;21;36", "wc_reply_authors": "458;18;22;26", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 102.5, 45.609757727924844 ], "wc_strengths_avg": [ 87.0, 42.19597137168429 ], "wc_weaknesses_avg": [ 100.75, 34.259122872601395 ], "wc_questions_avg": [ 29.25, 15.706288549495072 ], "wc_limitations_avg": [ 7.5, 4.716990566028302 ], "wc_review_avg": [ 327.0, 83.78544026261365 ], "wc_reply_reviewers_avg": [ 40.75, 28.119166061602893 ], "wc_reply_authors_avg": [ 131.0, 188.8147240021286 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12141568719845605012&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";whu.edu.cn", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Fractal Landscapes in Policy Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71707", "id": "QQidjdmyPp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0d21f257b5288385cb6cb8e0ff2ce82e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QQidjdmyPp", "openreview": "https://openreview.net/forum?id=QQidjdmyPp", "poster": "/media/PosterPDFs/NeurIPS%202023/71707.png?t=1699222869.195272", "slides": "https://nips.cc/virtual/2023/poster/71707", "video": "https://nips.cc/virtual/2023/poster/71707", "author_site": "Tao Wang, Sylvia Herbert, Sicun Gao", "tldr": "", "abstract": "Policy gradient lies at the core of deep reinforcement learning (RL) in continuous domains. Despite much success, it is often observed in practice that RL training with policy gradient can fail for many reasons, even on standard control problems with known solutions. We propose a framework for understanding one inherent limitation of the policy gradient approach: the optimization landscape in the policy space can be extremely non-smooth or fractal for certain classes of MDPs, such that there does not exist gradient to be estimated in the first place. We draw on techniques from chaos theory and non-smooth analysis, and analyze the maximal Lyapunov exponents and H\\\"older exponents of the policy optimization objectives. Moreover, we develop a practical method that can estimate the local smoothness of objective function from samples to identify when the training process has encountered fractal landscapes. We show experiments to illustrate how some failure cases of policy optimization can be explained by such fractal landscapes.", "keywords": "Reinforcement learning;policy gradient;non-smooth landscape", "primary_area": "", "supplementary_material": "/attachment/d480f66b65d439cc6fd14a7e992e54bb66e47f30.pdf", "author": "Tao Wang;Sylvia Lee Herbert;Sicun Gao", "authorids": "~Tao_Wang27;~Sylvia_Lee_Herbert1;~Sicun_Gao1", "gender": ";F;M", "homepage": "https://taowang0.github.io/;https://sylviaherbert.com;", "dblp": ";192/3242;22/8296", "google_scholar": ";;", "orcid": ";0000-0002-3863-8945;", "linkedin": ";;", "or_profile": "~Tao_Wang27;~Sylvia_Lee_Herbert1;~Sicun_Gao1", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023fractal,\ntitle={Fractal Landscapes in Policy Optimization},\nauthor={Tao Wang and Sylvia Lee Herbert and Sicun Gao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QQidjdmyPp}\n}", "github": "", "project": "", "reviewers": "Qd1b;dey2;xgpg;bNWE", "pdf_size": 1802034, "rating": "4;4;7;9", "confidence": "3;3;4;4", "soundness": "2;2;4;4", "novelty": "3;3;4;4", "presentation": "2;2;4;3", "wc_summary": "114;125;66;54", "wc_strengths": "60;229;44;85", "wc_weaknesses": "317;1672;31;153", "wc_questions": "502;104;259;60", "wc_limitations": "59;63;1;1", "wc_review": "1052;2193;401;353", "wc_reply_reviewers": "909;0;55;0", "wc_reply_authors": "1027;0;44;0", "reply_reviewers": "2;0;1;0", "reply_authors": "3;1;2;1", "rating_avg": [ 6.0, 2.1213203435596424 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 89.75, 30.30160886817728 ], "wc_strengths_avg": [ 104.5, 73.35018745715651 ], "wc_weaknesses_avg": [ 543.25, 659.5378590346426 ], "wc_questions_avg": [ 231.25, 172.91236942451513 ], "wc_limitations_avg": [ 31.0, 30.033314835362415 ], "wc_review_avg": [ 999.75, 742.1864236834301 ], "wc_reply_reviewers_avg": [ 241.0, 386.32305134433796 ], "wc_reply_authors_avg": [ 267.75, 438.72108166806845 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9428090415820635, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5214000093408971169&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ucsd.edu;ucsd.edu;ucsd.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CADet: Fully Self-Supervised Out-Of-Distribution Detection With Contrastive Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71706", "id": "QRAS5wSgEy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1700ad4e6252e8f2955909f96367b34d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QRAS5wSgEy", "openreview": "https://openreview.net/forum?id=QRAS5wSgEy", "poster": "/media/PosterPDFs/NeurIPS%202023/71706.png?t=1702011460.667876", "slides": "https://nips.cc/virtual/2023/poster/71706", "video": "https://nips.cc/virtual/2023/poster/71706", "author_site": "Charles Guille-Escuret, Pau Rodriguez, David Vazquez, Ioannis Mitliagkas, Joao Monteiro", "tldr": "", "abstract": "Handling out-of-distribution (OOD) samples has become a major stake in the real-world deployment of machine learning systems. This work explores the use of self-supervised contrastive learning to the simultaneous detection of two types of OOD samples: unseen classes and adversarial perturbations. First, we pair self-supervised contrastive learning with the maximum mean discrepancy (MMD) two-sample test. This approach enables us to robustly test whether two independent sets of samples originate from the same distribution, and we demonstrate its effectiveness by discriminating between CIFAR-10 and CIFAR-10.1 with higher confidence than previous work. Motivated by this success, we introduce CADet (Contrastive Anomaly Detection), a novel method for OOD detection of single samples. CADet draws inspiration from MMD, but leverages the similarity between contrastive transformations of a same sample. CADet outperforms existing adversarial detection methods in identifying adversarially perturbed samples on ImageNet and achieves comparable performance to unseen label detection methods on two challenging benchmarks: ImageNet-O and iNaturalist. Significantly, CADet is fully self-supervised and requires neither labels for in-distribution samples nor access to OOD examples.", "keywords": "Contrastive learning;OOD detection;adversarial detection;MMD;ImageNet-O;Anomaly detection;CIFAR-10.1", "primary_area": "", "supplementary_material": "/attachment/d8dc0981bbfc4cf0ecb26b18adc301c003ac3984.pdf", "author": "Charles Guille-Escuret;Pau Rodriguez;David Vazquez;Ioannis Mitliagkas;Joao Monteiro", "authorids": "~Charles_Guille-Escuret1;~Pau_Rodriguez2;~David_Vazquez1;~Ioannis_Mitliagkas1;~Joao_Monteiro1", "gender": "M;M;M;M;", "homepage": ";http://www.david-vazquez.com;http://mitliagkas.github.io/;;https://prlz77.github.io", "dblp": "243/7039;94/8653;83/8757;215/5354-2;190/7735", "google_scholar": "VNgVRmgAAAAJ;1jHvtfsAAAAJ;K757SxgAAAAJ;https://scholar.google.ca/citations?hl=en;https://scholar.google.es/citations?user=IwBx73wAAAAJ", "orcid": ";0000-0002-2845-8158;;;0000-0002-1689-8084", "linkedin": ";https://www.linkedin.com/company/david-vazquez/;;joao-monteiro-47180256/;", "or_profile": "~Charles_Guille-Escuret1;~David_Vazquez1;~Ioannis_Mitliagkas1;~Joao_Monteiro1;~Pau_Rodriguez_Lopez1", "aff": "Apple;ServiceNow research;Mila - Quebec AI Institute;ServiceNow Research;Apple", "aff_domain": "apple.com;servicenow.com;mila.quebec;servicenow.com;apple.com", "position": "Intern;Researcher;Principal Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nguille-escuret2023cadet,\ntitle={{CAD}et: Fully Self-Supervised Out-Of-Distribution Detection With Contrastive Learning},\nauthor={Charles Guille-Escuret and Pau Rodriguez and David Vazquez and Ioannis Mitliagkas and Joao Monteiro},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QRAS5wSgEy}\n}", "github": "", "project": "", "reviewers": "hQ4Y;1ZdX;tQNn;X9Xp;kyFG", "pdf_size": 456541, "rating": "5;5;5;5;6", "confidence": "5;2;4;4;3", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;2;3;4", "wc_summary": "247;65;86;39;79", "wc_strengths": "19;37;50;53;69", "wc_weaknesses": "801;94;188;363;22", "wc_questions": "89;2;16;93;44", "wc_limitations": "1;7;69;9;12", "wc_review": "1157;205;409;557;226", "wc_reply_reviewers": "204;0;0;593;0", "wc_reply_authors": "424;0;0;476;0", "reply_reviewers": "1;0;0;2;0", "reply_authors": "2;1;1;3;1", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 103.2, 73.67604766815333 ], "wc_strengths_avg": [ 45.6, 16.75231327309754 ], "wc_weaknesses_avg": [ 293.6, 278.2190503901557 ], "wc_questions_avg": [ 48.8, 37.037278517731295 ], "wc_limitations_avg": [ 19.6, 24.96076921891631 ], "wc_review_avg": [ 510.8, 347.76336782358203 ], "wc_reply_reviewers_avg": [ 159.4, 230.74800107476554 ], "wc_reply_authors_avg": [ 180.0, 221.06650583025916 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.29417420270727607, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6910436009874140573&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 7, "email": "apple.com;servicenow.com;mila.quebec;servicenow.com;apple.com", "author_num": 5, "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "Apple;ServiceNow;Quebec AI Institute", "aff_unique_dep": "Apple Inc.;research;AI Institute", "aff_unique_url": "https://www.apple.com;https://www.servicenow.com;https://mila.quebec", "aff_unique_abbr": "Apple;ServiceNow;Mila", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Networks are Slacking Off: Understanding Generalization Problem in Image Deraining", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71705", "id": "QRWA5nTWuM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5aca18e0192b2c1300479e5b700c76a9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QRWA5nTWuM", "openreview": "https://openreview.net/forum?id=QRWA5nTWuM", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71705", "video": "https://nips.cc/virtual/2023/poster/71705", "author_site": "Jinjin Gu, Xianzheng Ma, Xiangtao Kong, Yu Qiao, Chao Dong", "tldr": "", "abstract": "Deep deraining networks consistently encounter substantial generalization issues when deployed in real-world applications, although they are successful in laboratory benchmarks. A prevailing perspective in deep learning encourages using highly complex data for training, with the expectation that richer image background content will facilitate overcoming the generalization problem. However, through comprehensive and systematic experimentation, we discover that this strategy does not enhance the generalization capability of these networks. On the contrary, it exacerbates the tendency of networks to overfit specific degradations. Our experiments reveal that better generalization in a deraining network can be achieved by simplifying the complexity of the training background images. This is because that the networks are ``slacking off'' during training, that is, learning the least complex elements in the image background and degradation to minimize training loss. When the background images are less complex than the rain streaks, the network will prioritize the background reconstruction, thereby suppressing overfitting the rain patterns and leading to improved generalization performance. Our research offers a valuable perspective and methodology for better understanding the generalization problem in low-level vision tasks and displays promising potential for practical application.", "keywords": "Image Deraining;Generalization;Interpretation", "primary_area": "", "supplementary_material": "", "author": "Jinjin Gu;Xianzheng Ma;Xiangtao Kong;Yu Qiao;Chao Dong", "authorids": "~Jinjin_Gu1;~Xianzheng_Ma1;~Xiangtao_Kong1;~Yu_Qiao1;~Chao_Dong4", "gender": "M;;M;;M", "homepage": "http://www.jasongt.com;;https://xiangtaokong.github.io/;;http://xpixel.group/2010/01/20/chaodong.html", "dblp": "209/5709;;274/3262;;16/1278-5", "google_scholar": "uMQ-G-QAAAAJ;;lueNzSgAAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-4389-6236;;;;", "linkedin": "jinjingu;;;;", "or_profile": "~Jinjin_Gu1;~Xianzheng_Ma1;~Xiangtao_Kong1;~Yu_Qiao1;~Chao_Dong4", "aff": "University of Sydney;;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences", "aff_domain": "sydney.edu.au;;siat.ac.cn;;siat.ac.cn", "position": "PhD student;;MS student;;Full Professor", "bibtex": "@inproceedings{\ngu2023networks,\ntitle={Networks are Slacking Off: Understanding Generalization Problem in Image Deraining},\nauthor={Jinjin Gu and Xianzheng Ma and Xiangtao Kong and Yu Qiao and Chao Dong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QRWA5nTWuM}\n}", "github": "", "project": "", "reviewers": "UxXt;e5xd;FkGF;w1Er", "pdf_size": 16207289, "rating": "2;3;8;8", "confidence": "5;5;5;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "89;53;58;40", "wc_strengths": "85;32;43;43", "wc_weaknesses": "1110;153;156;52", "wc_questions": "198;4;5;23", "wc_limitations": "1;18;10;6", "wc_review": "1483;260;272;164", "wc_reply_reviewers": "0;66;40;138", "wc_reply_authors": "61;713;68;68", "reply_reviewers": "0;1;1;1", "reply_authors": "2;4;2;2", "rating_avg": [ 5.25, 2.7726341266023544 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 60.0, 17.98610574860495 ], "wc_strengths_avg": [ 50.75, 20.27775875189366 ], "wc_weaknesses_avg": [ 367.75, 430.5777368838291 ], "wc_questions_avg": [ 57.5, 81.46931937852433 ], "wc_limitations_avg": [ 8.75, 6.219927652312364 ], "wc_review_avg": [ 544.75, 543.3136179960889 ], "wc_reply_reviewers_avg": [ 61.0, 50.28916384272063 ], "wc_reply_authors_avg": [ 227.5, 280.3181228533039 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5726371269248889, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10528057293750624043&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "sydney.edu.au;;siat.ac.cn;;siat.ac.cn", "author_num": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Sydney;Chinese Academy of Sciences", "aff_unique_dep": ";Shenzhen Institutes of Advanced Technology", "aff_unique_url": "https://www.sydney.edu.au;http://www.cas.cn", "aff_unique_abbr": "USYD;CAS", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Australia;China" }, { "id": "QSJKrO1Qpy", "title": "Hodge-Aware Learning on Simplicial Complexes", "track": "main", "status": "Reject", "tldr": "", "abstract": " Neural networks on simplicial complexes (SCs) can learn from data residing on simplices such as nodes, edges, triangles, etc. \n However, existing works often overlook the Hodge theory that decomposes simplicial data into three orthogonal characteristic subspaces, such as the identifiable gradient, curl and harmonic components of edge flows.\n In this paper, we aim to incorporate this data inductive bias into learning on SCs. \n Particularly, we present a general convolutional architecture \n which respects the three key principles of uncoupling the lower and upper simplicial adjacencies, accounting for the inter-simplicial couplings, and performing higher-order convolutions. \n To understand these principles, we first use Dirichlet energy minimizations on SCs to interpret their effects on mitigating the simplicial oversmoothing. \n Then, through the lens of spectral simplicial theory,\n we show the three principles promote the Hodge-aware learning of this architecture, in the sense that the three Hodge subspaces are invariant under its learnable functions and the learning in two nontrivial subspaces are independent and expressive.\n To further investigate the learning ability of this architecture, we also study it is stable against small perturbations on simplicial connections.\n Finally, we experimentally validate the three principles by comparing with methods that either violate or do not respect them.\n Overall, this paper bridges learning on SCs with the Hodge decomposition, highlighting its importance for rational and effective learning from simplicial data.", "keywords": "hodge decomposition;simplicial complexes;spectral simplicial theory;simplicial neural network;stability", "primary_area": "", "supplementary_material": "/attachment/2f34bd8cadecf97dba76f29874c45578aec0d467.zip", "author": "Maosheng Yang;Elvin Isufi", "authorids": "~Maosheng_Yang1;~Elvin_Isufi1", "gender": "M;M", "homepage": "https://cookbook-ms.github.io/;https://sites.google.com/site/elvinisufihp/", "dblp": ";156/9608", "google_scholar": "-ka_yNQAAAAJ;wvywFdwAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Maosheng_Yang1;~Elvin_Isufi1", "aff": "Delft University of Technology;Delft University of Technology", "aff_domain": "tudelft.nl;tudelft.nl", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nyang2023hodgeaware,\ntitle={Hodge-Aware Learning on Simplicial Complexes},\nauthor={Maosheng Yang and Elvin Isufi},\nyear={2023},\nurl={https://openreview.net/forum?id=QSJKrO1Qpy}\n}", "github": "", "project": "", "reviewers": "d7bL;shzQ;XgBj;xwZ6;KR9e", "site": "https://openreview.net/forum?id=QSJKrO1Qpy", "pdf_size": 1705676, "rating": "2;4;4;5;7", "confidence": "3;3;3;2;2", "soundness": "2;3;2;3;3", "novelty": "2;2;2;3;3", "presentation": "1;3;3;3;2", "wc_summary": "38;116;78;35;141", "wc_strengths": "41;55;95;49;129", "wc_weaknesses": "160;87;262;2;184", "wc_questions": "202;6;142;124;103", "wc_limitations": "29;34;18;1;13", "wc_review": "470;298;595;211;570", "wc_reply_reviewers": "179;29;0;7;33", "wc_reply_authors": "2140;329;246;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "5;2;2;1;1", "rating_avg": [ 4.4, 1.624807680927192 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 81.6, 41.94567915769156 ], "wc_strengths_avg": [ 73.8, 33.289037234501095 ], "wc_weaknesses_avg": [ 139.0, 88.3945699689749 ], "wc_questions_avg": [ 115.4, 63.879887288566806 ], "wc_limitations_avg": [ 19.0, 11.713240371477058 ], "wc_review_avg": [ 428.8, 150.86603328781464 ], "wc_reply_reviewers_avg": [ 49.6, 65.90781440770131 ], "wc_reply_authors_avg": [ 543.0, 809.2109737268768 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.2, 1.469693845669907 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8040302522073698, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11312994242996534337&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Delft University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.tudelft.nl", "aff_unique_abbr": "TU Delft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "title": "FaceDNeRF: Semantics-Driven Face Reconstruction, Prompt Editing and Relighting with Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71704", "id": "QUkYZNhfc6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ae0cba715b60c4052359b3d52a2cff7f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QUkYZNhfc6", "openreview": "https://openreview.net/forum?id=QUkYZNhfc6", "poster": "/media/PosterPDFs/NeurIPS%202023/71704.png?t=1701709372.8419945", "slides": "https://nips.cc/virtual/2023/poster/71704", "video": "https://nips.cc/virtual/2023/poster/71704", "author_site": "Hao ZHANG, Tianyuan DAI, Yanbo Xu, Yu-Wing Tai, Chi-Keung Tang", "tldr": "", "abstract": "The ability to create high-quality 3D faces from a single image has become increasingly important with wide applications in video conferencing, AR/VR, and advanced video editing in movie industries. In this paper, we propose Face Diffusion NeRF (FaceDNeRF), a new generative method to reconstruct high-quality Face NeRFs from single images, complete with semantic editing and relighting capabilities. FaceDNeRF utilizes high-resolution 3D GAN inversion and expertly trained 2D latent-diffusion model, allowing users to manipulate and construct Face NeRFs in zero-shot learning without the need for explicit 3D data. \nWith carefully designed illumination and identity preserving loss, as well as multi-modal pre-training, FaceDNeRF offers users unparalleled control over the editing process enabling them to create and edit face NeRFs using just single-view images, text prompts, and explicit target lighting. The advanced features of FaceDNeRF have been designed to produce more impressive results than existing 2D editing approaches that rely on 2D segmentation maps for editable attributes. Experiments show that our FaceDNeRF achieves exceptionally realistic results and unprecedented flexibility in editing compared with state-of-the-art 3D face reconstruction and editing methods. Our code will be available at https://github.com/BillyXYB/FaceDNeRF.", "keywords": "NeRF Editing;NeRF Relighting;Face;Diffusion model;3d synthesis;GAN inversion", "primary_area": "", "supplementary_material": "/attachment/11fa35358fbf30a15c37d5a8bb9cf8106b963944.zip", "author": "Hao ZHANG;Tianyuan DAI;Yanbo Xu;Yu-Wing Tai;Chi-Keung Tang", "authorids": "~Hao_ZHANG50;~Tianyuan_DAI1;~Yanbo_Xu3;~Yu-Wing_Tai2;~Chi-Keung_Tang1", "gender": "M;M;;M;Not Specified", "homepage": "https://zhang1023.github.io/ZHANG_Hao.github.io/;https://rogerdai1217.github.io/;https://yanbo-xu.netlify.app;https://yuwingtai.github.io/;http://www.cse.ust.hk/~cktang/", "dblp": "55/2270-106;334/1090;25/6978;40/566;34/4366", "google_scholar": "TgdWSd4AAAAJ;pUuRNGUAAAAJ;8bX2roMAAAAJ;nFhLmFkAAAAJ;https://scholar.google.com.tw/citations?user=EWfpM74AAAAJ", "orcid": "0009-0009-1097-5665;;;0000-0002-3148-0380;", "linkedin": "hao-zhang-547b16254/;tianyuandai/;;;", "or_profile": "~Hao_ZHANG50;~Tianyuan_DAI1;~Yanbo_Xu3;~Yu-Wing_Tai2;~ChiKeung_Tang1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Kuaishou Technology;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;ust.hk;ust.hk;kuaishou.com;ust.hk", "position": "Undergrad student;Undergrad student;Undergrad student;Senior Research Director;Professor", "bibtex": "@inproceedings{\nzhang2023facednerf,\ntitle={Face{DN}e{RF}: Semantics-Driven Face Reconstruction, Prompt Editing and Relighting with Diffusion Models},\nauthor={Hao ZHANG and Tianyuan DAI and Yanbo Xu and Yu-Wing Tai and Chi-Keung Tang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QUkYZNhfc6}\n}", "github": "", "project": "", "reviewers": "4TUt;1wAz;M2ze;JGgm", "pdf_size": 50780767, "rating": "4;5;5;6", "confidence": "5;4;4;4", "soundness": "2;3;3;3", "novelty": "1;2;3;3", "presentation": "2;3;3;3", "wc_summary": "72;57;110;52", "wc_strengths": "30;37;82;156", "wc_weaknesses": "142;174;106;134", "wc_questions": "83;78;58;11", "wc_limitations": "10;15;28;8", "wc_review": "337;361;384;361", "wc_reply_reviewers": "412;257;26;51", "wc_reply_authors": "1002;407;0;0", "reply_reviewers": "2;2;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.75, 22.730761095924613 ], "wc_strengths_avg": [ 76.25, 50.18154541263152 ], "wc_weaknesses_avg": [ 139.0, 24.228082879171435 ], "wc_questions_avg": [ 57.5, 28.429737951659 ], "wc_limitations_avg": [ 15.25, 7.790218225441442 ], "wc_review_avg": [ 360.75, 16.618889854620253 ], "wc_reply_reviewers_avg": [ 186.5, 158.0672325309708 ], "wc_reply_authors_avg": [ 352.25, 410.2842764474408 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=274454995058870220&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ust.hk;ust.hk;ust.hk;kuaishou.com;ust.hk", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Kuaishou Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.kuaishou.com", "aff_unique_abbr": "HKUST;Kuaishou", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Bottleneck Structure in Learned Features: Low-Dimension vs Regularity Tradeoff", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71703", "id": "QVpfk2C3Dm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4a6695df88f2de0d49f875189ea181ef-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QVpfk2C3Dm", "openreview": "https://openreview.net/forum?id=QVpfk2C3Dm", "poster": "/media/PosterPDFs/NeurIPS%202023/71703.png?t=1701781292.0781152", "slides": "https://nips.cc/virtual/2023/poster/71703", "video": "https://nips.cc/virtual/2023/poster/71703", "tldr": "", "abstract": "Previous work has shown that DNNs with\nlarge depth $L$ and $L_{2}$-regularization are biased towards learning\nlow-dimensional representations of the inputs, which can be interpreted\nas minimizing a notion of rank $R^{(0)}(f)$ of the learned function\n$f$, conjectured to be the Bottleneck rank. We compute finite depth\ncorrections to this result, revealing a measure $R^{(1)}$ of regularity\nwhich bounds the pseudo-determinant of the Jacobian $\\left\\|Jf(x)\\right\\|\\_\\+$\nand is subadditive under composition and addition. This formalizes\na balance between learning low-dimensional representations and minimizing\ncomplexity/irregularity in the feature maps, allowing the network\nto learn the `right' inner dimension. Finally, we prove the conjectured\nbottleneck structure in the learned features as $L\\to\\infty$: for\nlarge depths, almost all hidden representations are approximately\n$R^{(0)}(f)$-dimensional, and almost all weight matrices $W_{\\ell}$\nhave $R^{(0)}(f)$ singular values close to 1 while the others are\n$O(L^{-\\frac{1}{2}})$. Interestingly, the use of large learning rates\nis required to guarantee an order $O(L)$ NTK which in turns guarantees\ninfinite depth convergence of the representations of almost all layers.", "keywords": "Feature Learning;Symmetry Learning;Theory of Deep Learning;Weight Decay", "primary_area": "", "supplementary_material": "/attachment/cd92cefe91e936b1ec750c726be276dbed396b60.zip", "author": "Arthur Jacot", "authorids": "~Arthur_Jacot1", "gender": "M", "homepage": "", "dblp": "222/2747", "google_scholar": "https://scholar.google.ch/citations?user=G6OhFawAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Arthur_Jacot1", "aff": "NYU, New York University", "aff_domain": "cims.nyu.edu", "position": "Assistant Professor", "bibtex": "@inproceedings{\njacot2023bottleneck,\ntitle={Bottleneck Structure in Learned Features: Low-Dimension vs Regularity Tradeoff},\nauthor={Arthur Jacot},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QVpfk2C3Dm}\n}", "github": "", "project": "", "reviewers": "expT;tanV;dQhS;SexW", "pdf_size": 597202, "rating": "5;6;6;8", "confidence": "4;2;3;2", "soundness": "2;3;4;4", "novelty": "3;3;3;4", "presentation": "4;2;3;3", "wc_summary": "41;127;111;238", "wc_strengths": "117;51;58;148", "wc_weaknesses": "289;34;211;77", "wc_questions": "2;24;43;63", "wc_limitations": "2;1;3;34", "wc_review": "451;237;426;560", "wc_reply_reviewers": "50;10;28;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 129.25, 70.62710173863854 ], "wc_strengths_avg": [ 93.5, 40.586327747161356 ], "wc_weaknesses_avg": [ 152.75, 102.22126735665138 ], "wc_questions_avg": [ 33.0, 22.594247055390007 ], "wc_limitations_avg": [ 10.0, 13.874436925511608 ], "wc_review_avg": [ 418.5, 116.27230968721659 ], "wc_reply_reviewers_avg": [ 22.0, 19.026297590440446 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.7608859102526822, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10833742827546544340&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cims.nyu.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "0", "aff_campus_unique": "New York", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "CoDA: Collaborative Novel Box Discovery and Cross-modal Alignment for Open-vocabulary 3D Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71702", "id": "QW5ouyyIgG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e352b765e625934ce86919995e2371aa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QW5ouyyIgG", "openreview": "https://openreview.net/forum?id=QW5ouyyIgG", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71702", "video": "https://nips.cc/virtual/2023/poster/71702", "author_site": "Yang Cao, Zeng Yihan, Hang Xu, Dan Xu", "tldr": "", "abstract": "Open-vocabulary 3D Object Detection (OV-3DDet) aims to detect objects from an arbitrary list of categories within a 3D scene, which remains seldom explored in the literature. There are primarily two fundamental problems in OV-3DDet, *i.e.*, localizing and classifying novel objects. This paper aims at addressing the two problems simultaneously via a unified framework, under the condition of limited base categories. To localize novel 3D objects, we propose an effective 3D Novel Object Discovery strategy, which utilizes both the 3D box geometry priors and 2D semantic open-vocabulary priors to generate pseudo box labels of the novel objects. To classify novel object boxes, we further develop a cross-modal alignment module based on discovered novel boxes, to align feature spaces between 3D point\ncloud and image/text modalities. Specifically, the alignment process contains a class-agnostic and a class-discriminative alignment, incorporating not only the base objects with annotations but also the increasingly discovered novel objects, resulting in an iteratively enhanced alignment. The novel box discovery and crossmodal alignment are jointly learned to collaboratively benefit each other. The\nnovel object discovery can directly impact the cross-modal alignment, while a better feature alignment can, in turn, boost the localization capability, leading to a unified OV-3DDet framework, named **CoDA**, for simultaneous novel object localization and classification. Extensive experiments on two challenging datasets (*i.e.*, SUN-RGBD and ScanNet) demonstrate the effectiveness of our method and also show a significant mAP improvement upon the best-performing alternative method by 80%. Codes and pre-trained models are released on [the project page](https://yangcaoai.github.io/publications/CoDA.html).", "keywords": "3D vision;open-vocabulary perception;multi-modal learning;point cloud;3D object detection", "primary_area": "", "supplementary_material": "/attachment/99b27cb1458b731cec4c0f82de5732653731d15a.pdf", "author": "Yang Cao;Yihan Zeng;Hang Xu;Dan Xu", "authorids": "~Yang_Cao9;~Yihan_Zeng1;~Hang_Xu1;~Dan_Xu4", "gender": "M;M;M;M", "homepage": "https://yangcaoai.github.io/;;https://www.danxurgb.net;", "dblp": "25/7045-17;;16/3823-2.html;", "google_scholar": "https://scholar.google.com/citations?hl;https://scholar.google.com.hk/citations?user=J_8TX6sAAAAJ;OuSPv-AAAAAJ;YiDxCoAAAAAJ", "orcid": "0000-0003-3830-7094;0000-0003-3645-8972;0000-0003-0136-9603;", "linkedin": "yang-cao-75b864148;;;", "or_profile": "~Yang_Cao9;~Hang_Xu1;~Dan_Xu4;~Zeng_Yihan1", "aff": "Hong Kong University of Science and Technology;Huawei Noah\u2018s Ark Lab;VGG, University of Oxford;Huawei Technologies Ltd.", "aff_domain": "hkust.edu;huawei.com;ox.ac.uk;huawei.com", "position": "PhD student;Researcher;Postdoc;Researcher", "bibtex": "@inproceedings{\ncao2023coda,\ntitle={Co{DA}: Collaborative Novel Box Discovery and Cross-modal Alignment for Open-vocabulary 3D Object Detection},\nauthor={Yang Cao and Yihan Zeng and Hang Xu and Dan Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QW5ouyyIgG}\n}", "github": "", "project": "", "reviewers": "goao;Qxx5;zhog;DwKh", "pdf_size": 1896668, "rating": "5;5;6;6", "confidence": "5;5;3;4", "soundness": "2;3;3;2", "novelty": "2;2;3;2", "presentation": "2;3;3;3", "wc_summary": "128;84;125;81", "wc_strengths": "80;126;77;14", "wc_weaknesses": "344;231;115;171", "wc_questions": "124;205;40;19", "wc_limitations": "15;24;11;20", "wc_review": "691;670;368;305", "wc_reply_reviewers": "86;0;14;104", "wc_reply_authors": "576;103;33;1222", "reply_reviewers": "1;0;1;1", "reply_authors": "3;3;2;5", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 104.5, 22.051077071199945 ], "wc_strengths_avg": [ 74.25, 39.83952183447989 ], "wc_weaknesses_avg": [ 215.25, 84.90104534103217 ], "wc_questions_avg": [ 97.0, 73.69871097922947 ], "wc_limitations_avg": [ 17.5, 4.924428900898052 ], "wc_review_avg": [ 508.5, 173.5950748149267 ], "wc_reply_reviewers_avg": [ 51.0, 44.73253849269008 ], "wc_reply_authors_avg": [ 483.5, 474.78126542651194 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14324395227725296160&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "hkust.edu;huawei.com;ox.ac.uk;huawei.com", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Huawei;University of Oxford", "aff_unique_dep": ";Noah's Ark Lab;VGG", "aff_unique_url": "https://www.ust.hk;https://www.huawei.com;https://www.ox.ac.uk", "aff_unique_abbr": "HKUST;Huawei;Oxford", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Hong Kong SAR;;Oxford", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "DiffInfinite: Large Mask-Image Synthesis via Parallel Random Patch Diffusion in Histopathology", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73598", "id": "QXTjde8evS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f64927f5de00c47899e6e58c731966b6-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=QXTjde8evS", "openreview": "https://openreview.net/forum?id=QXTjde8evS", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73598", "video": "https://nips.cc/virtual/2023/poster/73598", "author_site": "Marco Aversa, Gabriel Nobis, Miriam H\u00e4gele, Kai Standvoss, Mihaela Chirica, Roderick Murray-Smith, Ahmed Alaa, Lukas Ruff, Daniela Ivanova, Wojciech Samek, Frederick Klauschen, Bruno Sanguinetti, Luis Oala", "tldr": "", "abstract": "We present DiffInfinite, a hierarchical diffusion model that generates arbitrarily large histological images while preserving long-range correlation structural information. Our approach first generates synthetic segmentation masks, subsequently used as conditions for the high-fidelity generative diffusion process. The proposed sampling method can be scaled up to any desired image size while only requiring small patches for fast training. Moreover, it can be parallelized more efficiently than previous large-content generation methods while avoiding tiling artifacts. The training leverages classifier-free guidance to augment a small, sparsely annotated dataset with unlabelled data. Our method alleviates unique challenges in histopathological imaging practice: large-scale information, costly manual annotation, and protective data handling. The biological plausibility of DiffInfinite data is evaluated in a survey by ten experienced pathologists as well as a downstream classification and segmentation task. Samples from the model score strongly on anti-copying metrics which is relevant for the protection of patient data.", "keywords": "diffusion models;generative models;histopathology;medical imaging", "primary_area": "", "supplementary_material": "/attachment/a67782dcb71853b872ee70da924e5ff7caf16023.pdf", "author": "Marco Aversa;Gabriel Nobis;Miriam H\u00e4gele;Kai Standvoss;Mihaela Chirica;Roderick Murray-Smith;Ahmed Alaa;Lukas Ruff;Daniela Ivanova;Wojciech Samek;Frederick Klauschen;Bruno Sanguinetti;Luis Oala", "authorids": "~Marco_Aversa1;~Gabriel_Nobis1;~Miriam_H\u00e4gele2;~Kai_Standvoss1;~Mihaela_Chirica1;~Roderick_Murray-Smith1;~Ahmed_Alaa1;~Lukas_Ruff1;~Daniela_Ivanova1;~Wojciech_Samek1;~Frederick_Klauschen1;~Bruno_Sanguinetti1;~Luis_Oala1", "gender": "M;M;;M;;M;M;M;F;M;M;Not Specified;Non-Binary", "homepage": "https://marcoaversa.github.io;;;;https://www.med.lmu.de/pathologie/de/das-institut/personen/kontaktseite/chirica.html;http://www.dcs.gla.ac.uk/~rod/;https://alaalab.berkeley.edu/;;https://daniela997.github.io/;http://iphome.hhi.de/samek/;https://www.med.lmu.de/pathologie/de/index.html;;https://luisoala.net/", "dblp": "325/5090;;;;;78/604;140/7324;222/9848;315/4290;79/9736;;;https://dblp.uni-trier.de/pid/261/9215", "google_scholar": "XSd_7RgAAAAJ;6adniB0AAAAJ;;FqlYo_IAAAAJ;;https://scholar.google.co.uk/citations?user=laX7LzQAAAAJ;https://scholar.google.com.eg/citations?user=_pv1sEcAAAAJ;https://scholar.google.de/citations?user=40QzNXMAAAAJ;QM7r-egAAAAJ;7aQwO08AAAAJ;;https://scholar.google.ch/citations?user=m60wnowAAAAJ;v3ybnf0AAAAJ", "orcid": "0000-0002-7724-7488;;;;;;;0000-0002-9707-297X;0000-0002-3710-7413;;;;", "linkedin": "marco-aversa-5bb15b169/;;;;;rodms/;;lukasruff/;daniela-s-ivanova;;;;", "or_profile": "~Marco_Aversa1;~Gabriel_Nobis1;~Miriam_H\u00e4gele2;~Kai_Standvoss1;~Mihaela_Chirica1;~Roderick_Murray-Smith1;~Ahmed_Alaa1;~Lukas_Ruff1;~Daniela_Ivanova1;~Wojciech_Samek1;~Frederick_Klauschen1;~Bruno_Sanguinetti1;~Luis_Oala1", "aff": "University of Glasgow;Fraunhofer HHI;;Aignostics GmbH;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;University of Glasgow;University of California, Berkeley;Aignostics GmbH;University of Glasgow;Fraunhofer HHI;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;;Dotphoton", "aff_domain": "gla.ac.uk;hhi.fraunhofer.de;;aignostics.com;lmu.de;gla.ac.uk;berkeley.edu;aignostics.com;glasgow.ac.uk;hhi.fraunhofer.de;lmu.de;;dotphoton.com", "position": "PhD student;PhD student;;Researcher;Researcher;Professor;Assistant Professor;Principal Researcher;PhD student;Assistant Professor;Full Professor;;PhD student", "bibtex": "@inproceedings{\naversa2023diffinfinite,\ntitle={DiffInfinite: Large Mask-Image Synthesis via Parallel Random Patch Diffusion in Histopathology},\nauthor={Marco Aversa and Gabriel Nobis and Miriam H{\\\"a}gele and Kai Standvoss and Mihaela Chirica and Roderick Murray-Smith and Ahmed Alaa and Lukas Ruff and Daniela Ivanova and Wojciech Samek and Frederick Klauschen and Bruno Sanguinetti and Luis Oala},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=QXTjde8evS}\n}", "github": "", "project": "", "reviewers": "Wamo;kGnb;hrbm;LR7b", "pdf_size": 24007199, "rating": "7;7;8;9", "confidence": "4;3;3;3", "wc_summary_and_contributions": "146;62;180;51", "wc_strengths": "134;47;110;48", "wc_improvement": "2;113;388;234", "wc_limitations": "131;21;14;47", "wc_correctness": "12;12;63;18", "wc_clarity": "6;31;4;19", "wc_relation_to_prior_work": "5;11;16;1", "wc_documentation": "14;28;107;14", "wc_additional_feedback": "1;1;1;1", "wc_review": "451;326;883;433", "wc_reply_reviewers": "51;0;22;0", "wc_reply_authors": "921;354;1183;887", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;3;2", "rating_avg": [ 7.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 109.75, 54.728306204376544 ], "wc_strengths_avg": [ 84.75, 38.205856880850085 ], "wc_improvement_avg": [ 184.25, 143.42310657631148 ], "wc_limitations_avg": [ 53.25, 46.54231945230061 ], "wc_correctness_avg": [ 26.25, 21.358546298847212 ], "wc_clarity_avg": [ 15.0, 10.88577052853862 ], "wc_relation_to_prior_work_avg": [ 8.25, 5.717298313014636 ], "wc_documentation_avg": [ 40.75, 38.674119253061214 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 523.25, 213.1271627456247 ], "wc_reply_reviewers_avg": [ 18.25, 20.932928605429293 ], "wc_reply_authors_avg": [ 836.25, 301.06425809119224 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12578195399759715694&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "gla.ac.uk;hhi.fraunhofer.de;;aignostics.com;lmu.de;gla.ac.uk;berkeley.edu;aignostics.com;glasgow.ac.uk;hhi.fraunhofer.de;lmu.de;;dotphoton.com", "author_num": 13, "aff_unique_index": "0;1;2;3;0;4;2;0;1;3;5", "aff_unique_norm": "University of Glasgow;Fraunhofer Heinrich Hertz Institute;Aignostics;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;University of California, Berkeley;Dotphoton", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.gla.ac.uk;https://www.hhi.fraunhofer.de/;;https://www.lmu.de;https://www.berkeley.edu;", "aff_unique_abbr": "Glasgow;HHI;Aignostics;LMU;UC Berkeley;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1;1;0;2;1;0;1;1", "aff_country_unique": "United Kingdom;Germany;United States;" }, { "title": "Counterfactually Fair Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71701", "id": "QZo1cge4Tc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2828ee0c871f78a98ed2a198a166a439-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QZo1cge4Tc", "openreview": "https://openreview.net/forum?id=QZo1cge4Tc", "poster": "/media/PosterPDFs/NeurIPS%202023/71701.png?t=1701380534.749651", "slides": "https://nips.cc/virtual/2023/poster/71701", "video": "https://nips.cc/virtual/2023/poster/71701", "author_site": "Zhiqun Zuo, Mahdi Khalili, Xueru Zhang", "tldr": "", "abstract": "The use of machine learning models in high-stake applications (e.g., healthcare, lending, college admission) has raised growing concerns due to potential biases against protected social groups. Various fairness notions and methods have been proposed to mitigate such biases. In this work, we focus on Counterfactual Fairness (CF), a fairness notion that is dependent on an underlying causal graph and first proposed by Kusner $\\textit{et al.}$; it requires that the outcome an individual perceives is the same in the real world as it would be in a \"counterfactual\" world, in which the individual belongs to another social group. \nLearning fair models satisfying CF can be challenging. It was shown in (Kusner $\\textit{et al.}$) that a sufficient condition for satisfying CF is to $\\textbf{not}$ use features that are descendants of sensitive attributes in the causal graph. This implies a simple method that learns CF models only using non-descendants of sensitive attributes while eliminating all descendants. Although several subsequent works proposed methods that use all features for training CF models, there is no theoretical guarantee that they can satisfy CF. In contrast, this work proposes a new algorithm that trains models using all the available features. We theoretically and empirically show that models trained with this method can satisfy CF.", "keywords": "Counterfactual fairness;Representation learning", "primary_area": "", "supplementary_material": "/attachment/a9d1734918c75524f38c958301488868a2811c9f.pdf", "author": "Zhiqun Zuo;Mohammad Mahdi Khalili;Xueru Zhang", "authorids": "~Zhiqun_Zuo1;~Mohammad_Mahdi_Khalili3;~Xueru_Zhang2", "gender": "M;M;F", "homepage": "https://github.com/zuozhiqun;https://Khalilimahdi.github.io;https://xueruzhang.github.io/", "dblp": "258/4850;159/2163.html;", "google_scholar": ";hSgnKecAAAAJ;PNBO_a4AAAAJ", "orcid": ";0000-0002-4223-3254;", "linkedin": ";mohammad-mahdi-khalili-aa4241127;", "or_profile": "~Zhiqun_Zuo1;~Mohammad_Mahdi_Khalili3;~Xueru_Zhang2", "aff": "Ohio State University, Columbus;Yahoo! Research;Ohio State University", "aff_domain": "osu.edu;yahooinc.com;osu.edu", "position": "PhD student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nzuo2023counterfactually,\ntitle={Counterfactually Fair Representation},\nauthor={Zhiqun Zuo and Mohammad Mahdi Khalili and Xueru Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QZo1cge4Tc}\n}", "github": "", "project": "", "reviewers": "rDmX;7vQZ;jctR;TTY3", "pdf_size": 516918, "rating": "5;5;6;7", "confidence": "3;4;2;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "4;2;3;4", "wc_summary": "49;141;103;101", "wc_strengths": "73;91;58;168", "wc_weaknesses": "181;516;271;32", "wc_questions": "35;219;58;116", "wc_limitations": "6;53;1;165", "wc_review": "344;1020;491;582", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 98.5, 32.72231654391235 ], "wc_strengths_avg": [ 97.5, 42.34678264047931 ], "wc_weaknesses_avg": [ 250.0, 175.70002845759586 ], "wc_questions_avg": [ 107.0, 71.08093977994382 ], "wc_limitations_avg": [ 56.25, 65.9824787348884 ], "wc_review_avg": [ 609.25, 251.8922140519631 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3838845773927458284&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "email": "osu.edu;yahooinc.com;osu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Ohio State University;Yahoo!", "aff_unique_dep": ";Yahoo! Research", "aff_unique_url": "https://www.osu.edu;https://research.yahoo.com", "aff_unique_abbr": "OSU;Yahoo!", "aff_campus_unique_index": "0", "aff_campus_unique": "Columbus;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Adaptive Data Analysis in a Balanced Adversarial Model", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71700", "id": "QatZNssk7T", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/51ba8a68f471d952af625d1faf55e6c6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QatZNssk7T", "openreview": "https://openreview.net/forum?id=QatZNssk7T", "poster": "/media/PosterPDFs/NeurIPS%202023/71700.png?t=1701486784.781592", "slides": "https://nips.cc/virtual/2023/poster/71700", "video": "https://nips.cc/virtual/2023/poster/71700", "author_site": "Kobbi Nissim, Uri Stemmer, Eliad Tsfadia", "tldr": "", "abstract": "In adaptive data analysis, a mechanism gets $n$ i.i.d. samples from an unknown distribution $\\cal{D}$, and\nis required to provide accurate estimations to a sequence of adaptively chosen statistical queries with respect to $\\cal{D}$.\nHardt and Ullman (FOCS 2014) and Steinke and Ullman (COLT 2015) showed that in general, it is computationally hard to answer more than $\\Theta(n^2)$ adaptive queries, assuming the existence of one-way functions. \n\nHowever, these negative results strongly rely on an adversarial model that significantly advantages the adversarial analyst over the mechanism, as the analyst, who chooses the adaptive queries, also chooses the underlying distribution $\\cal{D}$. \nThis imbalance raises questions with respect to the applicability of the obtained hardness results -- an analyst who has complete knowledge of the underlying distribution $\\cal{D}$ would have little need, if at all, to issue statistical queries to a mechanism which only holds a finite number of samples from $\\cal{D}$.\n\nWe consider more restricted adversaries, called \\emph{balanced}, where each such adversary consists of two separated algorithms: The \\emph{sampler} who is the entity that chooses the distribution and provides the samples to the mechanism, and the \\emph{analyst} who chooses the adaptive queries, but has no prior knowledge of the underlying distribution (and hence has no a priori advantage with respect to the mechanism). \nWe improve the quality of previous lower bounds by revisiting them using an efficient \\emph{balanced} adversary, under standard public-key cryptography assumptions. We show that these stronger hardness assumptions are unavoidable in the sense that any computationally bounded \\emph{balanced} adversary that has the structure of all known attacks, implies the existence of public-key cryptography.", "keywords": "Adaptive Data Analysis;Differential Privacy;Statistical Queries", "primary_area": "", "supplementary_material": "/attachment/493e9ab222eae3538f56d70be0864d89b0adc6e3.pdf", "author": "Kobbi Nissim;Uri Stemmer;Eliad Tsfadia", "authorids": "~Kobbi_Nissim2;~Uri_Stemmer1;~Eliad_Tsfadia1", "gender": "M;;M", "homepage": "http://people.cs.georgetown.edu/~kobbi/;https://www.uri.co.il/;https://sites.google.com/view/eliadtsfadia", "dblp": "65/801;125/8532;146/9658", "google_scholar": "https://scholar.google.com.tw/citations?user=U-RE8IgAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Kobbi_Nissim2;~Uri_Stemmer1;~Eliad_Tsfadia1", "aff": "Georgetown University;Tel Aviv University;Georgetown University", "aff_domain": "georgetwon.edu;tau.ac.il;georgetown.edu", "position": "Full Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nnissim2023adaptive,\ntitle={Adaptive Data Analysis in a Balanced Adversarial Model},\nauthor={Kobbi Nissim and Uri Stemmer and Eliad Tsfadia},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QatZNssk7T}\n}", "github": "", "project": "", "reviewers": "Mqg2;Ejfp;vTGp;CEqx", "pdf_size": 363142, "rating": "5;6;6;9", "confidence": "2;2;4;5", "soundness": "3;2;4;4", "novelty": "2;3;3;4", "presentation": "2;2;4;3", "wc_summary": "17;52;370;246", "wc_strengths": "39;32;77;89", "wc_weaknesses": "100;47;480;123", "wc_questions": "11;43;16;39", "wc_limitations": "1;15;13;1", "wc_review": "168;189;956;498", "wc_reply_reviewers": "5;5;58;17", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 171.25, 144.1377379453417 ], "wc_strengths_avg": [ 59.25, 24.252577182641847 ], "wc_weaknesses_avg": [ 187.5, 171.1088834631329 ], "wc_questions_avg": [ 27.25, 13.935117509371782 ], "wc_limitations_avg": [ 7.5, 6.5383484153110105 ], "wc_review_avg": [ 452.75, 318.5728919729361 ], "wc_reply_reviewers_avg": [ 21.25, 21.775846711436962 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8339503888294595, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7902847929708597940&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "georgetwon.edu;tau.ac.il;georgetown.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Georgetown University;Tel Aviv University", "aff_unique_dep": ";", "aff_unique_url": "https://www.georgetown.edu;https://www.tau.ac.il", "aff_unique_abbr": "GU;TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Israel" }, { "title": "Adaptive Privacy Composition for Accuracy-first Mechanisms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71699", "id": "QezJbfW01r", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/33301bb40020a56ef56b8b5081e5c4d5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QezJbfW01r", "openreview": "https://openreview.net/forum?id=QezJbfW01r", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71699", "video": "https://nips.cc/virtual/2023/poster/71699", "author_site": "Ryan Rogers, Gennady Samorodnitsk, Steven Wu, Aaditya Ramdas", "tldr": "", "abstract": "Although there has been work to develop ex-post private mechanisms from Ligett et al. '17 and Whitehouse et al '22 that seeks to provide privacy guarantees subject to a target level of accuracy, there was not a way to use them in conjunction with differentially private mechanisms. Furthermore, there has yet to be work in developing a theory for how these ex-post privacy mechanisms compose, so that we can track the accumulated privacy over several mechanisms. We develop privacy filters that allow an analyst to adaptively switch between differentially private mechanisms and ex-post private mechanisms subject to an overall privacy loss guarantee. \n We show that using a particular ex-post private mechanism --- noise reduction mechanisms --- can substantially outperform baseline approaches that use existing privacy loss composition bounds. We use the common task of returning as many counts as possible subject to a relative error guarantee and an overall privacy budget as a motivating example.", "keywords": "differential privacy;brownian motion;composition;martingale", "primary_area": "", "supplementary_material": "/attachment/e09ed1fb373d908d8808d4cd86edfa5da41158e8.zip", "author": "Ryan Rogers;Gennady Samorodnitsky;Steven Wu;Aaditya Ramdas", "authorids": "~Ryan_Rogers1;~Gennady_Samorodnitsky1;~Steven_Wu1;~Aaditya_Ramdas2", "gender": "M;;M;M", "homepage": "https://www.math.upenn.edu/~ryrogers/;https://people.orie.cornell.edu/gennady/;http://stat.cmu.edu/~aramdas;https://zstevenwu.com/", "dblp": "137/8445;;117/3518;137/8350", "google_scholar": "jr7gGB4AAAAJ;;ZvFaPxUAAAAJ;MbF6rTEAAAAJ", "orcid": ";;0000-0003-0497-311X;", "linkedin": "http://linkedin.com/in/rrogers386;;;zstevenwu/", "or_profile": "~Ryan_Rogers1;~Gennady_Samorodnitsky1;~Aaditya_Ramdas2;~Zhiwei_Steven_Wu1", "aff": "LinkedIn;Cornell University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "linkedin.com;cornell.edu;cmu.edu;cmu.edu", "position": "Senior software engineer;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nrogers2023adaptive,\ntitle={Adaptive Privacy Composition for Accuracy-first Mechanisms},\nauthor={Ryan Rogers and Gennady Samorodnitsky and Steven Wu and Aaditya Ramdas},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QezJbfW01r}\n}", "github": "", "project": "", "reviewers": "njjx;hK6N;dJHg;xyZp;HnZw", "pdf_size": 678164, "rating": "5;5;6;6;7", "confidence": "3;2;3;5;2", "soundness": "3;3;3;4;3", "novelty": "3;4;3;2;3", "presentation": "2;1;4;3;4", "wc_summary": "134;101;46;294;86", "wc_strengths": "30;36;55;59;30", "wc_weaknesses": "69;40;131;210;63", "wc_questions": "39;120;608;8;14", "wc_limitations": "1;2;98;1;12", "wc_review": "273;299;938;572;205", "wc_reply_reviewers": "37;50;395;12;24", "wc_reply_authors": "0;43;111;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;2;2;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 1.16619037896906 ], "wc_summary_avg": [ 132.2, 85.69807465748575 ], "wc_strengths_avg": [ 42.0, 12.505998560690786 ], "wc_weaknesses_avg": [ 102.6, 61.57467011685893 ], "wc_questions_avg": [ 157.8, 228.62230862275885 ], "wc_limitations_avg": [ 22.8, 37.828031934003654 ], "wc_review_avg": [ 457.4, 270.8908267180711 ], "wc_reply_reviewers_avg": [ 103.6, 146.25265809550265 ], "wc_reply_authors_avg": [ 30.8, 43.42073237521449 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9517350242248412724&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "linkedin.com;cornell.edu;cmu.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "LinkedIn Corporation;Cornell University;Carnegie Mellon University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.linkedin.com;https://www.cornell.edu;https://www.cmu.edu", "aff_unique_abbr": "LinkedIn;Cornell;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Ethical Considerations for Responsible Data Curation", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73597", "id": "Qf8uzIT1OK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ad3ebc951f43d1e9ed20187a7b5bc4ee-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Qf8uzIT1OK", "openreview": "https://openreview.net/forum?id=Qf8uzIT1OK", "poster": "/media/PosterPDFs/NeurIPS%202023/73597.png?t=1701503946.5888417", "slides": "https://nips.cc/virtual/2023/poster/73597", "video": "https://nips.cc/virtual/2023/poster/73597", "author_site": "Jerone Andrews, Dora Zhao, William Thong, Apostolos Modas, Orestis Papakyriakopoulos, Alice Xiang", "tldr": "", "abstract": "Human-centric computer vision (HCCV) data curation practices often neglect privacy and bias concerns, leading to dataset retractions and unfair models. HCCV datasets constructed through nonconsensual web scraping lack crucial metadata for comprehensive fairness and robustness evaluations. Current remedies are post hoc, lack persuasive justification for adoption, or fail to provide proper contextualization for appropriate application. Our research focuses on proactive, domain-specific recommendations, covering purpose, privacy and consent, and diversity, for curating HCCV evaluation datasets, addressing privacy and bias concerns. We adopt an ante hoc reflective perspective, drawing from current practices, guidelines, dataset withdrawals, and audits, to inform our considerations and recommendations.", "keywords": "human-centric;datasets;computer vision;fairness;algorithmic bias;robustness;responsible AI", "primary_area": "", "supplementary_material": "", "author": "Jerone Andrews;Dora Zhao;William Thong;Apostolos Modas;Orestis Papakyriakopoulos;Alice Xiang", "authorids": "~Jerone_Andrews1;~Dora_Zhao1;~William_Thong1;~Apostolos_Modas1;~Orestis_Papakyriakopoulos1;~Alice_Xiang1", "gender": ";F;;;;", "homepage": ";https://dorazhao99.github.io;;;https://www.civicmachines.com;", "dblp": ";295/8515;;;203/6747;", "google_scholar": ";I-OInyYAAAAJ;;;9z-fD3sAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Jerone_Andrews1;~Dora_Zhao1;~William_Thong1;~Apostolos_Modas1;~Orestis_Papakyriakopoulos1;~Alice_Xiang1", "aff": ";Sony AI;;;Sony AI;", "aff_domain": ";sony.com;;;sony.com;", "position": ";AI Engineer;;;Researcher;", "bibtex": "@inproceedings{\nandrews2023ethical,\ntitle={Ethical Considerations for Responsible Data Curation},\nauthor={Jerone Andrews and Dora Zhao and William Thong and Apostolos Modas and Orestis Papakyriakopoulos and Alice Xiang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Qf8uzIT1OK}\n}", "github": "", "project": "", "reviewers": "qLN4;c5ch;1b4S;so9D", "pdf_size": 384042, "rating": "7;9;9;9", "confidence": "4;3;5;4", "wc_summary_and_contributions": "100;42;86;50", "wc_strengths": "135;50;175;60", "wc_improvement": "241;29;106;140", "wc_limitations": "23;35;75;60", "wc_correctness": "272;12;6;18", "wc_clarity": "21;8;3;35", "wc_relation_to_prior_work": "27;17;8;29", "wc_documentation": "23;7;1;2", "wc_additional_feedback": "1;1;1;1", "wc_review": "843;201;461;395", "wc_reply_reviewers": "59;78;0;0", "wc_reply_authors": "1036;932;446;735", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;2", "rating_avg": [ 8.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 69.5, 24.181604578687494 ], "wc_strengths_avg": [ 105.0, 52.08166663999915 ], "wc_improvement_avg": [ 129.0, 76.14788243936925 ], "wc_limitations_avg": [ 48.25, 20.41292482717751 ], "wc_correctness_avg": [ 77.0, 112.66321493726335 ], "wc_clarity_avg": [ 16.75, 12.417225938187643 ], "wc_relation_to_prior_work_avg": [ 20.25, 8.407585860400118 ], "wc_documentation_avg": [ 8.25, 8.814051281902097 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 475.0, 232.96780893505436 ], "wc_reply_reviewers_avg": [ 34.25, 34.90254288730264 ], "wc_reply_authors_avg": [ 787.25, 224.72803007190714 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6174975027305747844&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";sony.com;;;sony.com;", "author_num": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Sony", "aff_unique_dep": "Sony AI", "aff_unique_url": "https://www.sony.com", "aff_unique_abbr": "Sony AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Neural-Logic Human-Object Interaction Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71698", "id": "QjI36zxjbW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/42b7c2f6d320d1fe1afa899a6319d6d7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QjI36zxjbW", "openreview": "https://openreview.net/forum?id=QjI36zxjbW", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71698", "video": "https://nips.cc/virtual/2023/poster/71698", "author_site": "Liulei Li, Jianan Wei, Wenguan Wang, Yi Yang", "tldr": "", "abstract": "The interaction decoder utilized in prevalent Transformer-based HOI detectors typically accepts pre-composed human-object pairs as inputs. Though achieving remarkable performance, such a paradigm lacks feasibility and cannot explore novel combinations over entities during decoding. We present LogicHOI, a new HOI detector that leverages neural-logic reasoning and Transformer to infer feasible interactions between. entities. Specifically, we modify. self-attention mechanism in the vanilla Transformer, enabling it to reason over the \u27e8 human, action, object \u27e9 triplet and constitute novel interactions. Meanwhile, such a reasoning process is guided by two crucial properties for understanding HOI: affordances (the potential actions an object can facilitate) and proxemics (the spatial relations between humans and objects). We formulate these two properties in first-order logic and ground them into continuous space to constrain the learning process of our approach, leading to improved performance and zero-shot generalization capabilities. We evaluate L OGIC HOI on V-COCO and HICO-DET under both normal and zero-shot setups, achieving significant improvements over existing methods.", "keywords": "Human-Object Interaction;Neuro-Symbolic Computing;Compositional Generalization", "primary_area": "", "supplementary_material": "/attachment/a4a9f1bd77002746c9a25667257afc89f91edc8d.pdf", "author": "Liulei Li;Jianan Wei;Wenguan Wang;Yi Yang", "authorids": "~Liulei_Li1;~Jianan_Wei2;~Wenguan_Wang4;~Yi_Yang22", "gender": "M;M;M;M", "homepage": ";https://github.com/weijianan1;https://sites.google.com/view/wenguanwang/;https://person.zju.edu.cn/yiyang", "dblp": "295/8925;;145/1078;33/4854-1.html", "google_scholar": "eCrBWngAAAAJ;;CqAQQkgAAAAJ;RMSuNFwAAAAJ", "orcid": "0000-0002-4637-0328;;0000-0002-0802-9567;", "linkedin": ";;wenguanwang;", "or_profile": "~Liulei_Li1;~Jianan_Wei2;~Wenguan_Wang4;~Yi_Yang22", "aff": "University of Technology Sydney;Sichuan University;University of Technology Sydney;Zhejiang University", "aff_domain": "uts.edu.au;scu.edu.cn;uts.edu.au;zju.edu.cn", "position": "PhD student;MS student;Lecturer;Full Professor", "bibtex": "@inproceedings{\nli2023neurallogic,\ntitle={Neural-Logic Human-Object Interaction Detection},\nauthor={Liulei Li and Jianan Wei and Wenguan Wang and Yi Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QjI36zxjbW}\n}", "github": "", "project": "", "reviewers": "uwRp;AydW;jaUK;mDiH;gPtK", "pdf_size": 913749, "rating": "4;4;5;5;6", "confidence": "4;4;5;4;4", "soundness": "2;2;3;3;4", "novelty": "2;2;3;3;3", "presentation": "3;1;3;3;3", "wc_summary": "110;55;31;68;102", "wc_strengths": "55;41;1;41;120", "wc_weaknesses": "41;212;229;203;225", "wc_questions": "5;63;26;5;70", "wc_limitations": "11;7;1;1;21", "wc_review": "222;378;288;318;538", "wc_reply_reviewers": "0;0;0;21;0", "wc_reply_authors": "61;61;61;60;61", "reply_reviewers": "0;0;0;1;0", "reply_authors": "2;2;2;2;2", "rating_avg": [ 4.8, 0.7483314773547882 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 73.2, 29.403401163810965 ], "wc_strengths_avg": [ 51.6, 38.66574711550263 ], "wc_weaknesses_avg": [ 182.0, 71.10555533852471 ], "wc_questions_avg": [ 33.8, 27.86682615584344 ], "wc_limitations_avg": [ 8.2, 7.4404300950953095 ], "wc_review_avg": [ 348.8, 107.11937266433182 ], "wc_reply_reviewers_avg": [ 4.2, 8.4 ], "wc_reply_authors_avg": [ 60.8, 0.4 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.13363062095621217, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5354121877349384395&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "uts.edu.au;scu.edu.cn;uts.edu.au;zju.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Technology Sydney;Sichuan University;Zhejiang University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uts.edu.au;https://www.scu.edu.cn;https://www.zju.edu.cn", "aff_unique_abbr": "UTS;SCU;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Australia;China" }, { "title": "ProPILE: Probing Privacy Leakage in Large Language Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71697", "id": "QkLpGxUboF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/420678bb4c8251ab30e765bc27c3b047-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QkLpGxUboF", "openreview": "https://openreview.net/forum?id=QkLpGxUboF", "poster": "/media/PosterPDFs/NeurIPS%202023/71697.png?t=1701754095.4490647", "slides": "https://nips.cc/virtual/2023/poster/71697", "video": "https://nips.cc/virtual/2023/poster/71697", "author_site": "Siwon Kim, Sangdoo Yun, Hwaran Lee, Martin Gubri, Sungroh Yoon, Seong Joon Oh", "tldr": "", "abstract": "The rapid advancement and widespread use of large language models (LLMs) have raised significant concerns regarding the potential leakage of personally identifiable information (PII). These models are often trained on vast quantities of web-collected data, which may inadvertently include sensitive personal data. This paper presents ProPILE, a novel probing tool designed to empower data subjects, or the owners of the PII, with awareness of potential PII leakage in LLM-based services. ProPILE lets data subjects formulate prompts based on their own PII to evaluate the level of privacy intrusion in LLMs. We demonstrate its application on the OPT-1.3B model trained on the publicly available Pile dataset. We show how hypothetical data subjects may assess the likelihood of their PII being included in the Pile dataset being revealed. ProPILE can also be leveraged by LLM service providers to effectively evaluate their own levels of PII leakage with more powerful prompts specifically tuned for their in-house models. This tool represents a pioneering step towards empowering the data subjects for their awareness and control over their own data on the web.", "keywords": "Personal identifiable information;Private data leakage;Large language model", "primary_area": "", "supplementary_material": "/attachment/8c87a7dff85bc536642178b01430600270202acb.pdf", "author": "Siwon Kim;Sangdoo Yun;Hwaran Lee;Martin Gubri;Sungroh Yoon;Seong Joon Oh", "authorids": "~Siwon_Kim1;~Sangdoo_Yun1;~Hwaran_Lee1;~Martin_Gubri1;~Sungroh_Yoon1;~Seong_Joon_Oh1", "gender": "F;M;F;M;;M", "homepage": ";https://sangdooyun.github.io/;https://hwaranlee.github.io;https://gubri.eu;http://ailab.snu.ac.kr;https://seongjoonoh.com", "dblp": "130/6584;124/3009.html;127/9475;213/7879;99/1474;168/8835", "google_scholar": "https://scholar.google.co.kr/citations?user=xMRzdH0AAAAJ;o0qtjzYAAAAJ;https://scholar.google.co.kr/citations?user=Jf6padoAAAAJ;Jt4OYwMAAAAJ;Bphl_fIAAAAJ;https://scholar.google.de/citations?user=kmXOOdsAAAAJ", "orcid": ";;0000-0002-3773-4871;0000-0001-6744-6662;0000-0002-2367-197X;0000-0002-8985-7689", "linkedin": ";;hwaranlee/;;;seong-joon-oh-32113479/", "or_profile": "~Siwon_Kim1;~Sangdoo_Yun1;~Hwaran_Lee1;~Martin_Gubri1;~Sungroh_Yoon1;~Seong_Joon_Oh1", "aff": "Seoul National University;NAVER;NAVER AI Lab;University of Luxemburg;Seoul National University;Eberhard-Karls-Universit\u00e4t T\u00fcbingen", "aff_domain": "snu.ac.kr;navercorp.com;navercorp.com;uni.lu;snu.ac.kr;uni-tuebingen.de", "position": "PhD student;Research Scientist;Lead;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nkim2023propile,\ntitle={Pro{PILE}: Probing Privacy Leakage in Large Language Models},\nauthor={Siwon Kim and Sangdoo Yun and Hwaran Lee and Martin Gubri and Sungroh Yoon and Seong Joon Oh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QkLpGxUboF}\n}", "github": "", "project": "", "reviewers": "CvUa;EPof;udNB;idGg;9AMv", "pdf_size": 8979889, "rating": "6;7;7;7;7", "confidence": "4;4;3;4;4", "soundness": "3;3;3;4;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "84;68;55;84;91", "wc_strengths": "53;105;70;101;78", "wc_weaknesses": "190;35;65;93;157", "wc_questions": "2;65;37;70;36", "wc_limitations": "7;10;66;1;80", "wc_review": "336;283;293;349;442", "wc_reply_reviewers": "17;9;0;98;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 76.4, 13.093509842666327 ], "wc_strengths_avg": [ 81.4, 19.438106903708498 ], "wc_weaknesses_avg": [ 108.0, 57.494347548259036 ], "wc_questions_avg": [ 42.0, 24.38852189043034 ], "wc_limitations_avg": [ 32.8, 33.246954747766 ], "wc_review_avg": [ 340.6, 56.49283140363917 ], "wc_reply_reviewers_avg": [ 24.8, 37.14512081014141 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.25000000000000006, "gs_citation": 174, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12649216482433071016&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "snu.ac.kr;navercorp.com;navercorp.com;uni.lu;snu.ac.kr;uni-tuebingen.de", "author_num": 6, "aff_unique_index": "0;1;1;2;0;3", "aff_unique_norm": "Seoul National University;NAVER Corporation;University of Luxembourg;Eberhard Karls University of T\u00fcbingen", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.snu.ac.kr;https://www.naver.com;https://wwwen.uniluxembourg.lu;https://www.uni-tuebingen.de/", "aff_unique_abbr": "SNU;NAVER;Uni Lu;Uni T\u00fcbingen", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;0;0;1;0;2", "aff_country_unique": "South Korea;Luxembourg;Germany" }, { "title": "Weakly-Supervised Concealed Object Segmentation with SAM-based Pseudo Labeling and Multi-scale Feature Grouping", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71696", "id": "QlHosp050r", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/61aa557643ae8709b6a4f41140b2234a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QlHosp050r", "openreview": "https://openreview.net/forum?id=QlHosp050r", "poster": "/media/PosterPDFs/NeurIPS%202023/71696.png?t=1701524732.1652708", "slides": "https://nips.cc/virtual/2023/poster/71696", "video": "https://nips.cc/virtual/2023/poster/71696", "author_site": "Chunming He, Kai Li, Yachao Zhang, Guoxia Xu, Longxiang Tang, Yulun Zhang, Zhenhua Guo, Xiu Li", "tldr": "", "abstract": "Weakly-Supervised Concealed Object Segmentation (WSCOS) aims to segment objects well blended with surrounding environments using sparsely-annotated data for model training. It remains a challenging task since (1) it is hard to distinguish concealed objects from the background due to the intrinsic similarity and (2) the sparsely-annotated training data only provide weak supervision for model learning. In this paper, we propose a new WSCOS method to address these two challenges. To tackle the intrinsic similarity challenge, we design a multi-scale feature grouping module that first groups features at different granularities and then aggregates these grouping results. By grouping similar features together, it encourages segmentation coherence, helping obtain complete segmentation results for both single and multiple-object images. For the weak supervision challenge, we utilize the recently-proposed vision foundation model, ``Segment Anything Model (SAM)'', and use the provided sparse annotations as prompts to generate segmentation masks, which are used to train the model. To alleviate the impact of low-quality segmentation masks, we further propose a series of strategies, including multi-augmentation result ensemble, entropy-based pixel-level weighting, and entropy-based image-level selection. These strategies help provide more reliable supervision to train the segmentation model. We verify the effectiveness of our method on various WSCOS tasks, and experiments demonstrate that our method achieves state-of-the-art performance on these tasks.", "keywords": "Concealed Object Segmentation;Weakly-Supervised Learning;Segment Anything Model", "primary_area": "", "supplementary_material": "/attachment/8c610f1f7213455174e3d4da5574a109b46aa949.pdf", "author": "Chunming He;Kai Li;Yachao Zhang;Guoxia Xu;Longxiang Tang;Yulun Zhang;Zhenhua Guo;Xiu Li", "authorids": "~Chunming_He1;~Kai_Li11;~Yachao_Zhang1;~Guoxia_Xu2;~Longxiang_Tang1;~Yulun_Zhang1;~Zhenhua_Guo3;~Xiu_Li1", "gender": "M;;M;M;;M;F;M", "homepage": "https://chunminghe.github.io;;https://yachao-zhang.github.io/;;https://scholar.google.com/citations?user=3oMQsq8AAAAJ;http://yulunzhang.com/;https://thusigsiclab.github.io/thu.github.io/introduction.html;https://www-en.sz.tsinghua.edu.cn/INFORMATIONSCIENCE/108561.jhtml", "dblp": "251/5104;https://dblp.uni-trier.de/pers/hd/l/Li_0012:Kai;40/10584-1;206/7165;347/9498;166/2763-1.html;13/1206-1;41/294-1", "google_scholar": "https://scholar.google.com/citations?hl=en;YsROc4UAAAAJ;https://scholar.google.de/citations?user=a-I8c8EAAAAJ;https://scholar.google.com.hk/citations?user=tfwlUZkAAAAJ;;ORmLjWoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;dbR6bD0AAAAJ", "orcid": "0000-0001-6479-7109;;0000-0002-6153-5004;0000-0002-0036-8820;0009-0005-2704-3718;0000-0002-2288-5079;0000-0003-0403-1923;0000-0002-8201-0864", "linkedin": "https://www.linkedin.com/feed/?trk=guest_homepage-basic_nav-header-join;;;;;yulun-zhang-1116b5b9/;;zhenhua-guo-71589020/", "or_profile": "~Chunming_He1;~Kai_Li11;~Yachao_Zhang1;~Guoxia_Xu2;~Longxiang_Tang1;~Yulun_Zhang1;~Xiu_Li1;~Zhenhua_Guo1", "aff": "Tsinghua University;NEC-Labs;Tsinghua University;Xidian University;Shanghai Artificial Intelligence Laboratory;Swiss Federal Institute of Technology;Tsinghua University;TianyiJiaotong Technology", "aff_domain": "tsinghua.edu.cn;nec-labs.com;tsinghua.edu.cn;xidian.edu;pjlab.org.cn;ethz.ch;tsinghua.edu.cn;tyjt-ai.com", "position": "MS student;NEC Labs, America;Postdoc;Researcher;Intern;Postdoc;Professor;Principal Researcher", "bibtex": "@inproceedings{\nhe2023weaklysupervised,\ntitle={Weakly-Supervised Concealed Object Segmentation with {SAM}-based Pseudo Labeling and Multi-scale Feature Grouping},\nauthor={Chunming He and Kai Li and Yachao Zhang and Guoxia Xu and Longxiang Tang and Yulun Zhang and Zhenhua Guo and Xiu Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QlHosp050r}\n}", "github": "", "project": "", "reviewers": "ka5E;yhjR;76do;DmES", "pdf_size": 1558841, "rating": "5;5;6;7", "confidence": "5;4;5;4", "soundness": "3;2;2;3", "novelty": "3;2;2;2", "presentation": "3;1;3;2", "wc_summary": "70;83;123;113", "wc_strengths": "33;92;45;28", "wc_weaknesses": "161;529;174;357", "wc_questions": "42;57;3;157", "wc_limitations": "1;6;3;11", "wc_review": "307;767;348;666", "wc_reply_reviewers": "37;51;39;320", "wc_reply_authors": "107;56;49;1203", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;4", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 97.25, 21.545011023436494 ], "wc_strengths_avg": [ 49.5, 25.30316185775999 ], "wc_weaknesses_avg": [ 305.25, 150.6459010394906 ], "wc_questions_avg": [ 64.75, 56.79073427945795 ], "wc_limitations_avg": [ 5.25, 3.766629793329841 ], "wc_review_avg": [ 522.0, 198.28136574070697 ], "wc_reply_reviewers_avg": [ 111.75, 120.35234729742498 ], "wc_reply_authors_avg": [ 353.75, 490.8255163497513 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 132, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3038940552279487688&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "tsinghua.edu.cn;nec-labs.com;tsinghua.edu.cn;xidian.edu;pjlab.org.cn;ethz.ch;tsinghua.edu.cn;tyjt-ai.com", "author_num": 8, "aff_unique_index": "0;1;0;2;3;4;0;5", "aff_unique_norm": "Tsinghua University;NEC Laboratories;Xidian University;Shanghai Artificial Intelligence Laboratory;Swiss Federal Institute of Technology;TianyiJiao Tong Technology", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.nec-labs.com;http://www.xidian.edu.cn/;http://www.shailab.org/;https://www.ethz.ch;", "aff_unique_abbr": "THU;NEC-Labs;Xidian;Shanghai AI Lab;ETH Zurich;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;2;0;0", "aff_country_unique": "China;United States;Switzerland" }, { "title": "Goal-conditioned Offline Planning from Curious Exploration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71695", "id": "QlbZabgMdK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/31ceb5aed43e2ec1b132e389cc1dcb56-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QlbZabgMdK", "openreview": "https://openreview.net/forum?id=QlbZabgMdK", "poster": "/media/PosterPDFs/NeurIPS%202023/71695.png?t=1701424176.6096773", "slides": "https://nips.cc/virtual/2023/poster/71695", "video": "https://nips.cc/virtual/2023/poster/71695", "author_site": "Marco Bagatella, Georg Martius", "tldr": "", "abstract": "Curiosity has established itself as a powerful exploration strategy in deep reinforcement learning. Notably, leveraging expected future novelty as intrinsic motivation has been shown to efficiently generate exploratory trajectories, as well as a robust dynamics model. We consider the challenge of extracting goal-conditioned behavior from the products of such unsupervised exploration techniques, without any additional environment interaction. We find that conventional goal-conditioned reinforcement learning approaches for extracting a value function and policy fall short in this difficult offline setting. By analyzing the geometry of optimal goal-conditioned value functions, we relate this issue to a specific class of estimation artifacts in learned values. In order to mitigate their occurrence, we propose to combine model-based planning over learned value landscapes with a graph-based value aggregation scheme. We show how this combination can correct both local and global artifacts, obtaining significant improvements in zero-shot goal-reaching performance across diverse simulated environments.", "keywords": "deep reinforcement learning;unsupervised reinforcement learning;goal-conditioned reinforcement learning;model-based planning", "primary_area": "", "supplementary_material": "", "author": "Marco Bagatella;Georg Martius", "authorids": "~Marco_Bagatella1;~Georg_Martius1", "gender": ";M", "homepage": ";https://uni-tuebingen.de/de/264672", "dblp": ";47/2706", "google_scholar": ";https://scholar.google.de/citations?user=b-JF-UIAAAAJ", "orcid": ";", "linkedin": "marco-bagatella-9b8017197/;", "or_profile": "~Marco_Bagatella1;~Georg_Martius1", "aff": "Max Planck Institute for Intelligent Systems, Max Planck Institute for Intelligent Systems;Max Planck Institute for Intelligent Systems", "aff_domain": "is.tue.mpg.de;tuebingen.mpg.de", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nbagatella2023goalconditioned,\ntitle={Goal-conditioned Offline Planning from Curious Exploration},\nauthor={Marco Bagatella and Georg Martius},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QlbZabgMdK}\n}", "github": "", "project": "", "reviewers": "7YsN;yLbp;nGpW;cdVD", "pdf_size": 2638448, "rating": "5;6;6;6", "confidence": "3;4;4;3", "soundness": "4;4;3;3", "novelty": "3;3;3;3", "presentation": "2;4;4;3", "wc_summary": "79;56;101;89", "wc_strengths": "64;32;175;17", "wc_weaknesses": "486;127;131;138", "wc_questions": "129;13;105;11", "wc_limitations": "38;1;53;11", "wc_review": "796;229;565;266", "wc_reply_reviewers": "103;0;0;0", "wc_reply_authors": "450;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 81.25, 16.528384676065595 ], "wc_strengths_avg": [ 72.0, 61.84254199173899 ], "wc_weaknesses_avg": [ 220.5, 153.33704705647622 ], "wc_questions_avg": [ 64.5, 53.185994397021474 ], "wc_limitations_avg": [ 25.75, 20.753011829611623 ], "wc_review_avg": [ 464.0, 231.76173109467405 ], "wc_reply_reviewers_avg": [ 25.75, 44.60030829489859 ], "wc_reply_authors_avg": [ 112.5, 194.8557158514987 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5710467400218174106&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "is.tue.mpg.de;tuebingen.mpg.de", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Max Planck Institute for Intelligent Systems", "aff_unique_dep": "", "aff_unique_url": "https://www.mpi-is.mpg.de", "aff_unique_abbr": "MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Convergence of Actor-Critic with Multi-Layer Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71694", "id": "QlfGOVD5PO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1dc9fbdb6b4d9955ad377cb983232c9f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QlfGOVD5PO", "openreview": "https://openreview.net/forum?id=QlfGOVD5PO", "poster": "/media/PosterPDFs/NeurIPS%202023/71694.png?t=1701741825.9253373", "slides": "https://nips.cc/virtual/2023/poster/71694", "video": "https://nips.cc/virtual/2023/poster/71694", "author_site": "Haoxing Tian, Alex Olshevsky, Yannis Paschalidis", "tldr": "", "abstract": "The early theory of actor-critic methods considered convergence using linear function approximators for the policy and value functions. Recent work has established convergence using neural network approximators with a single hidden layer. In this work we are taking the natural next step and establish convergence using deep neural networks with an arbitrary number of hidden layers, thus closing a gap between theory and practice. We show that actor-critic updates projected on a ball around the initial condition will converge to a neighborhood where the average of the squared gradients is $\\tilde{O} \\left( 1/\\sqrt{m} \\right) + O \\left( \\epsilon \\right)$, with $m$ being the width of the neural network and $\\epsilon$ the approximation quality of the best critic neural network over the projected set.", "keywords": "Reinforcement Learning;Actor-Critic;gradient splitting;neural network", "primary_area": "", "supplementary_material": "", "author": "Haoxing Tian;Alex Olshevsky;Ioannis Paschalidis", "authorids": "~Haoxing_Tian2;~Alex_Olshevsky1;~Ioannis_Paschalidis1", "gender": "M;M;M", "homepage": "http://sites.bu.edu/paschalidis/;http://sites.bu.edu/aolshevsky/;", "dblp": "44/2060;21/4206;350/3752", "google_scholar": "Es_hZ0QAAAAJ;YKwHoFMAAAAJ;g8jTnD0AAAAJ", "orcid": "0000-0002-3343-2913;;", "linkedin": "yannis-paschalidis-75a921/;alex-olshevsky-43336698/;", "or_profile": "~Ioannis_Paschalidis1;~Alexander_Olshevsky1;~HAOXING_TIAN1", "aff": "Boston University;Boston University;Boston University", "aff_domain": "bu.edu;bu.edu;bu.edu", "position": "Full Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\ntian2023convergence,\ntitle={Convergence of Actor-Critic with Multi-Layer Neural Networks},\nauthor={Haoxing Tian and Alex Olshevsky and Ioannis Paschalidis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QlfGOVD5PO}\n}", "github": "", "project": "", "reviewers": "qbM5;oPsP;ADfc;X8JD;GAW7;7mQ7;FwfS;akFw", "pdf_size": 429086, "rating": "4;4;5;5;6;7;7;7", "confidence": "3;4;1;4;2;3;3;5", "soundness": "2;3;3;3;2;3;4;4", "novelty": "2;2;3;3;3;4;4;4", "presentation": "2;2;3;4;2;4;3;4", "wc_summary": "30;66;14;71;14;67;66;31", "wc_strengths": "4;64;58;64;25;41;49;133", "wc_weaknesses": "160;171;4;152;16;29;258;78", "wc_questions": "29;76;1;49;356;128;3;53", "wc_limitations": "20;132;1;15;34;13;26;1", "wc_review": "243;509;78;351;445;278;402;296", "wc_reply_reviewers": "537;206;81;353;0;0;16;0", "wc_reply_authors": "2086;425;0;942;0;0;0;0", "reply_reviewers": "3;2;1;1;0;0;1;0", "reply_authors": "5;3;1;3;1;1;1;1", "rating_avg": [ 5.625, 1.2183492931011204 ], "confidence_avg": [ 3.125, 1.165922381636102 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.125, 0.7806247497997998 ], "presentation_avg": [ 3.0, 0.8660254037844386 ], "wc_summary_avg": [ 44.875, 23.411735839104285 ], "wc_strengths_avg": [ 54.75, 35.33323506275642 ], "wc_weaknesses_avg": [ 108.5, 84.79976415061542 ], "wc_questions_avg": [ 86.875, 108.73067357006485 ], "wc_limitations_avg": [ 30.25, 39.8928251694462 ], "wc_review_avg": [ 325.25, 125.32133697020632 ], "wc_reply_reviewers_avg": [ 149.125, 188.5221721045034 ], "wc_reply_authors_avg": [ 431.625, 700.8016726399845 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.12099606769169899, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4246045720925151875&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "bu.edu;bu.edu;bu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Boston University", "aff_unique_dep": "", "aff_unique_url": "https://www.bu.edu", "aff_unique_abbr": "BU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Bifurcations and loss jumps in RNN training", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71693", "id": "QmPf29EHyI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/df334022279996b07e0870a629c18857-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QmPf29EHyI", "openreview": "https://openreview.net/forum?id=QmPf29EHyI", "poster": "/media/PosterPDFs/NeurIPS%202023/71693.png?t=1701415365.1498904", "slides": "https://nips.cc/virtual/2023/poster/71693", "video": "https://nips.cc/virtual/2023/poster/71693", "author_site": "Lukas Eisenmann, Zahra Monfared, Niclas G\u00f6ring, Daniel Durstewitz", "tldr": "", "abstract": "Recurrent neural networks (RNNs) are popular machine learning tools for modeling and forecasting sequential data and for inferring dynamical systems (DS) from observed time series. Concepts from DS theory (DST) have variously been used to further our understanding of both, how trained RNNs solve complex tasks, and the training process itself. Bifurcations are particularly important phenomena in DS, including RNNs, that refer to topological (qualitative) changes in a system's dynamical behavior as one or more of its parameters are varied. Knowing the bifurcation structure of an RNN will thus allow to deduce many of its computational and dynamical properties, like its sensitivity to parameter variations or its behavior during training. In particular, bifurcations may account for sudden loss jumps observed in RNN training that could severely impede the training process. Here we first mathematically prove for a particular class of ReLU-based RNNs that certain bifurcations are indeed associated with loss gradients tending toward infinity or zero. We then introduce a novel heuristic algorithm for detecting all fixed points and $k$-cycles in ReLU-based RNNs and their existence and stability regions, hence bifurcation manifolds in parameter space. In contrast to previous numerical algorithms for finding fixed points and common continuation methods, our algorithm provides $\\textit{exact}$ results and returns fixed points and cycles up to high orders with surprisingly good scaling behavior. We exemplify the algorithm on the analysis of the training process of RNNs, and find that the recently introduced technique of generalized teacher forcing completely avoids certain types of bifurcations in training. Thus, besides facilitating the DST analysis of trained RNNs, our algorithm provides a powerful instrument for analyzing the training process itself.", "keywords": "dynamical systems;bifurcations;Recurrent Neural Networks;attractors;training algorithm;BPTT;exploding and vanishing gradient problem;nonlinear dynamics;time series", "primary_area": "", "supplementary_material": "/attachment/c62b1ea2edcd6bd0062bdcd37b86e68c7837e816.pdf", "author": "Lukas Eisenmann;Zahra Monfared;Niclas Alexander G\u00f6ring;Daniel Durstewitz", "authorids": "~Lukas_Eisenmann1;~Zahra_Monfared1;~Niclas_Alexander_G\u00f6ring1;~Daniel_Durstewitz1", "gender": "M;F;M;", "homepage": "https://www.zi-mannheim.de/forschung/abteilungen-ags-institute/theoret-neurowissenschaften.html;;;https://durstewitzlab.github.io", "dblp": "359/6111;;;98/2120", "google_scholar": ";https://scholar.google.pl/citations?user=OPUIwIoAAAAJ;SpoGWKgAAAAJ;https://scholar.google.de/citations?user=2bcbKU0AAAAJ", "orcid": ";;;0000-0002-9340-3786", "linkedin": "lukas-eisenmann/;;;", "or_profile": "~Lukas_Eisenmann1;~Zahra_Monfared1;~Niclas_Alexander_G\u00f6ring1;~Daniel_Durstewitz1", "aff": "Zentralinstitut f\u00fcr Seelische Gesundheit;ZI Mannheim-Heidelberg University;Ruprecht-Karls-Universit\u00e4t Heidelberg;Heidelberg University", "aff_domain": "zi-mannheim.de;zi-manheim.de;uni-heidelberg.de;uni-heidelberg.de", "position": "PhD student;Postdoc;MS student;Full Professor", "bibtex": "@inproceedings{\neisenmann2023bifurcations,\ntitle={Bifurcations and loss jumps in {RNN} training},\nauthor={Lukas Eisenmann and Zahra Monfared and Niclas Alexander G{\\\"o}ring and Daniel Durstewitz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QmPf29EHyI}\n}", "github": "", "project": "", "reviewers": "v6FA;qxJb;T1NJ;1SGy", "pdf_size": 3901845, "rating": "6;6;7;7", "confidence": "3;3;4;4", "soundness": "2;3;4;3", "novelty": "3;3;2;3", "presentation": "2;4;3;3", "wc_summary": "248;58;82;131", "wc_strengths": "61;82;71;69", "wc_weaknesses": "330;88;159;330", "wc_questions": "269;27;50;51", "wc_limitations": "19;25;30;3", "wc_review": "927;280;392;584", "wc_reply_reviewers": "110;74;44;32", "wc_reply_authors": "1606;524;24;35", "reply_reviewers": "1;2;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 129.75, 73.16548024854343 ], "wc_strengths_avg": [ 70.75, 7.495832175282475 ], "wc_weaknesses_avg": [ 226.75, 106.25764678365506 ], "wc_questions_avg": [ 99.25, 98.47429867737064 ], "wc_limitations_avg": [ 19.25, 10.158124826955023 ], "wc_review_avg": [ 545.75, 245.49783603934273 ], "wc_reply_reviewers_avg": [ 65.0, 30.14962686336267 ], "wc_reply_authors_avg": [ 547.25, 643.7551456105031 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4637777666031704323&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "zi-mannheim.de;zi-manheim.de;uni-heidelberg.de;uni-heidelberg.de", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Zentralinstitut f\u00fcr Seelische Gesundheit;ZI Mannheim-Heidelberg University;Ruprecht-Karls-Universit\u00e4t Heidelberg;Heidelberg University", "aff_unique_dep": ";;;", "aff_unique_url": ";;https://www.uni-heidelberg.de/;https://www.uni-heidelberg.de", "aff_unique_abbr": ";;Uni Heidelberg;Uni Heidelberg", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Selective Sampling and Imitation Learning via Online Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71692", "id": "QoeOVgayLp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d470d6e007a19ff1666386562c77517c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QoeOVgayLp", "openreview": "https://openreview.net/forum?id=QoeOVgayLp", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71692", "video": "https://nips.cc/virtual/2023/poster/71692", "author_site": "Ayush Sekhari, Karthik Sridharan, Wen Sun, Runzhe Wu", "tldr": "", "abstract": "We consider the problem of Imitation Learning (IL) by actively querying noisy expert for feedback. While imitation learning has been empirically successful, much of prior work assumes access to noiseless expert feedback which is not practical in many applications. In fact, when one only has access to noisy expert feedback, algorithms that rely on purely offline data (non-interactive IL) can be shown to need a prohibitively large number of samples to be successful. In contrast, in this work, we provide an interactive algorithm for IL that uses selective sampling to actively query the noisy expert for feedback. Our contributions are twofold: First, we provide a new selective sampling algorithm that works with general function classes and multiple actions, and obtains the best-known bounds for the regret and the number of queries. Next, we extend this analysis to the problem of IL with noisy expert feedback and provide a new IL algorithm that makes limited queries. \n\n Our algorithm for selective sampling leverages function approximation, and relies on an online regression oracle w.r.t.~the given model class to predict actions, and to decide whether to query the expert for its label. On the theoretical side, the regret bound of our algorithm is upper bounded by the regret of the online regression oracle, while the query complexity additionally depends on the eluder dimension of the model class. We complement this with a lower bound that demonstrates that our results are tight. We extend our selective sampling algorithm for IL with general function approximation and provide bounds on both the regret and the number of queries made to the noisy expert. A key novelty here is that our regret and query complexity bounds only depend on the number of times the optimal policy (and not the noisy expert, or the learner) go to states that have a small margin.", "keywords": "Selective Sampling;Imitation Learning;Learning from Expert Feedback;Theory;General purpose algorithms", "primary_area": "", "supplementary_material": "/attachment/a9742207615a84311eae21c67ff61c6bf3b17bfe.pdf", "author": "Ayush Sekhari;Karthik Sridharan;Wen Sun;Runzhe Wu", "authorids": "~Ayush_Sekhari1;~Karthik_Sridharan1;~Wen_Sun1;~Runzhe_Wu1", "gender": "M;M;;M", "homepage": "https://ayush.sekhari.com/;http://www.cs.cornell.edu/~sridharan/;https://wensun.github.io;https://ziqian2000.github.io/", "dblp": "203/8152;s/KarthikSridharan;;294/9552", "google_scholar": "jH9i188AAAAJ;https://scholar.google.com.tw/citations?user=nX9D5AoAAAAJ;iOLC30YAAAAJ;eBtFiuAAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ayush_Sekhari1;~Karthik_Sridharan1;~Wen_Sun1;~Runzhe_Wu1", "aff": "Massachusetts Institute of Technology;Cornell University;Cornell University;Cornell University", "aff_domain": "mit.edu;cornell.edu;cornell.edu;cornell.edu", "position": "Postdoc;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nsekhari2023selective,\ntitle={Selective Sampling and Imitation Learning via Online Regression},\nauthor={Ayush Sekhari and Karthik Sridharan and Wen Sun and Runzhe Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QoeOVgayLp}\n}", "github": "", "project": "", "reviewers": "A81c;mVoN;hxAC;hUJx", "pdf_size": 867994, "rating": "5;6;6;7", "confidence": "3;3;2;2", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "2;2;4;3", "wc_summary": "186;123;177;171", "wc_strengths": "62;61;30;107", "wc_weaknesses": "138;148;38;206", "wc_questions": "82;546;1;2", "wc_limitations": "1;201;1;52", "wc_review": "469;1079;247;538", "wc_reply_reviewers": "42;20;0;63", "wc_reply_authors": "26;339;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 164.25, 24.40671014290947 ], "wc_strengths_avg": [ 65.0, 27.44995446262161 ], "wc_weaknesses_avg": [ 132.5, 60.42143659331512 ], "wc_questions_avg": [ 157.75, 226.55283600078812 ], "wc_limitations_avg": [ 63.75, 81.93099230449977 ], "wc_review_avg": [ 583.25, 305.7502044153037 ], "wc_reply_reviewers_avg": [ 31.25, 23.594225988576103 ], "wc_reply_authors_avg": [ 91.25, 143.4318217830339 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17134925554770836256&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "mit.edu;cornell.edu;cornell.edu;cornell.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.cornell.edu", "aff_unique_abbr": "MIT;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Advice Querying under Budget Constraint for Online Algorithms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71691", "id": "QpZubU4yD9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eda830e16044587b5082a853c4f25a90-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QpZubU4yD9", "openreview": "https://openreview.net/forum?id=QpZubU4yD9", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71691", "video": "https://nips.cc/virtual/2023/poster/71691", "author_site": "Ziyad Benomar, Vianney Perchet", "tldr": "", "abstract": "Several problems have been extensively studied in the learning-augmented setting, where the algorithm has access to some, possibly incorrect, predictions. However, it is assumed in most works that the predictions are provided to the algorithm as input, with no constraint on their size. In this paper, we consider algorithms with access to a limited number of predictions, that they can request at any time during their execution. We study three classical problems in competitive analysis, the ski rental problem, the secretary problem, and the non-clairvoyant job scheduling. We address the question of when to query predictions and how to use them.", "keywords": "online algorithms;competitive ratio;learning augmented algorithms;scheduling;ski-rental;secretary", "primary_area": "", "supplementary_material": "/attachment/11b9e45a69d311a85797d918c22a766d17a005c9.zip", "author": "Ziyad Benomar;Vianney Perchet", "authorids": "~Ziyad_Benomar1;~Vianney_Perchet3", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Ziyad_Benomar1;~Vianney_Perchet3", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@inproceedings{\nbenomar2023advice,\ntitle={Advice Querying under Budget Constraint for Online Algorithms},\nauthor={Ziyad Benomar and Vianney Perchet},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QpZubU4yD9}\n}", "github": "", "project": "", "reviewers": "rY4H;HbqC;gTsH;imY3", "pdf_size": 472039, "rating": "4;5;7;8", "confidence": "4;4;4;4", "soundness": "3;2;4;4", "novelty": "2;2;4;3", "presentation": "3;2;3;4", "wc_summary": "117;279;188;59", "wc_strengths": "3;89;63;55", "wc_weaknesses": "29;202;59;327", "wc_questions": "106;252;141;87", "wc_limitations": "5;30;1;27", "wc_review": "260;852;452;555", "wc_reply_reviewers": "115;37;38;0", "wc_reply_authors": "195;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 160.75, 82.14735236147298 ], "wc_strengths_avg": [ 52.5, 31.22098653149833 ], "wc_weaknesses_avg": [ 154.25, 119.25052410786294 ], "wc_questions_avg": [ 146.5, 63.91596044807588 ], "wc_limitations_avg": [ 15.75, 12.871965661856 ], "wc_review_avg": [ 529.75, 214.06351277132683 ], "wc_reply_reviewers_avg": [ 47.5, 41.871828238088675 ], "wc_reply_authors_avg": [ 48.75, 84.43747686898277 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11541792889256359968&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";", "author_num": 2 }, { "title": "Towards a Unified Framework of Contrastive Learning for Disentangled Representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71690", "id": "QrB38MAAEP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d5470483dd38f71f7bd9e68ce1b94145-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QrB38MAAEP", "openreview": "https://openreview.net/forum?id=QrB38MAAEP", "poster": "/media/PosterPDFs/NeurIPS%202023/71690.png?t=1699708423.7292812", "slides": "https://nips.cc/virtual/2023/poster/71690", "video": "https://nips.cc/virtual/2023/poster/71690", "author_site": "Stefan Matthes, Zhiwei Han, Hao Shen", "tldr": "", "abstract": "Contrastive learning has recently emerged as a promising approach for learning data representations that discover and disentangle the explanatory factors of the data.\nPrevious analyses of such approaches have largely focused on individual contrastive losses, such as noise-contrastive estimation (NCE) and InfoNCE, and rely on specific assumptions about the data generating process.\nThis paper extends the theoretical guarantees for disentanglement to a broader family of contrastive methods, while also relaxing the assumptions about the data distribution.\nSpecifically, we prove identifiability of the true latents for four contrastive losses studied in this paper, without imposing common independence assumptions.\nThe theoretical findings are validated on several benchmark datasets.\nFinally, practical limitations of these methods are also investigated.", "keywords": "Disentanglement;Contrastive Learning;Identifiability;Representation Learning;Nonlinear ICA", "primary_area": "", "supplementary_material": "/attachment/056291751011416f0bcc949f300be3c485c5dcf0.zip", "author": "Stefan Matthes;Zhiwei Han;Hao Shen", "authorids": "~Stefan_Matthes1;~Zhiwei_Han1;~Hao_Shen1", "gender": ";M;M", "homepage": ";https://www.researchgate.net/profile/Zhiwei-Han-5;", "dblp": ";;26/2210-2", "google_scholar": ";;Kce9W-8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Stefan_Matthes1;~Zhiwei_Han1;~Hao_Shen1", "aff": ";Fortiss;Fortiss GmbH", "aff_domain": ";fortiss.org;fortiss.org", "position": ";Researcher;Principal Researcher", "bibtex": "@inproceedings{\nmatthes2023towards,\ntitle={Towards a Unified Framework of Contrastive Learning for Disentangled Representations},\nauthor={Stefan Matthes and Zhiwei Han and Hao Shen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QrB38MAAEP}\n}", "github": "", "project": "", "reviewers": "6CFv;bVRB;m4mq;CnBd;TFiH;iM6P", "pdf_size": 709144, "rating": "4;5;6;6;7;7", "confidence": "3;2;4;2;3;4", "soundness": "2;3;4;3;3;4", "novelty": "2;2;3;3;3;4", "presentation": "3;3;3;3;3;4", "wc_summary": "30;73;88;71;66;39", "wc_strengths": "56;84;116;67;121;90", "wc_weaknesses": "24;117;491;16;71;92", "wc_questions": "253;8;177;96;89;11", "wc_limitations": "19;12;91;1;4;7", "wc_review": "382;294;963;251;351;239", "wc_reply_reviewers": "11;0;265;0;12;16", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "1;0;1;0;1;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 5.833333333333333, 1.0671873729054746 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.1666666666666665, 0.6871842709362768 ], "novelty_avg": [ 2.8333333333333335, 0.6871842709362768 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 61.166666666666664, 20.177682941529458 ], "wc_strengths_avg": [ 89.0, 23.636130534981113 ], "wc_weaknesses_avg": [ 135.16666666666666, 163.0372316033638 ], "wc_questions_avg": [ 105.66666666666667, 87.24231898695852 ], "wc_limitations_avg": [ 22.333333333333332, 31.24988888869136 ], "wc_review_avg": [ 413.3333333333333, 250.99313846309738 ], "wc_reply_reviewers_avg": [ 50.666666666666664, 96.04107917390813 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.38254602783800307, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16861016986475309016&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";fortiss.org;fortiss.org", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Fortiss;fortiss GmbH", "aff_unique_dep": ";", "aff_unique_url": "https://www.fortiss.org/;https://www.fortiss.org", "aff_unique_abbr": "Fortiss;Fortiss", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Streaming Factor Trajectory Learning for Temporal Tensor Decomposition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71689", "id": "Qu6Ln7d9df", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b231d91e700c465dfdd6116d091a4194-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Qu6Ln7d9df", "openreview": "https://openreview.net/forum?id=Qu6Ln7d9df", "poster": "/media/PosterPDFs/NeurIPS%202023/71689.png?t=1701719484.1212032", "slides": "https://nips.cc/virtual/2023/poster/71689", "video": "https://nips.cc/virtual/2023/poster/71689", "author_site": "Shikai Fang, Xin Yu, Shibo Li, Zheng Wang, Mike Kirby, Shandian Zhe", "tldr": "", "abstract": "Practical tensor data is often along with time information. Most existing temporal decomposition approaches estimate a set of fixed factors for the objects in each tensor mode, and hence cannot capture the temporal evolution of the objects' representation. More important, we lack an effective approach to capture such evolution from streaming data, which is common in real-world applications. To address these issues, we propose Streaming Factor Trajectory Learning (SFTL) for temporal tensor decomposition. We use Gaussian processes (GPs) to model the trajectory of factors so as to flexibly estimate their temporal evolution. To address the computational challenges in handling streaming data, we convert the GPs into a state-space prior by constructing an equivalent stochastic differential equation (SDE). We develop an efficient online filtering algorithm to estimate a decoupled running posterior of the involved factor states upon receiving new data. The decoupled estimation enables us to conduct standard Rauch-Tung-Striebel smoothing to compute the full posterior of all the trajectories in parallel, without the need for revisiting any previous data. We have shown the advantage of SFTL in both synthetic tasks and real-world applications.", "keywords": "Tensor Decomposition;streaming method;Bayesian model", "primary_area": "", "supplementary_material": "/attachment/4cad768ab49049816312a933f50242870a747cc3.pdf", "author": "Shikai Fang;Xin Yu;Shibo Li;Zheng Wang;Robert Kirby;Shandian Zhe", "authorids": "~Shikai_Fang2;~Xin_Yu4;~Shibo_Li1;~Zheng_Wang2;~Robert_Kirby1;~Shandian_Zhe1", "gender": ";F;;M;;", "homepage": ";https://www.cs.utah.edu/~xiyu;https://imshibo.com/;;;", "dblp": ";;;;;", "google_scholar": ";tWAfvQsAAAAJ;thvPDwgAAAAJ;;;", "orcid": ";;0009-0009-1076-282X;;;", "linkedin": ";;;;;", "or_profile": "~Shikai_Fang2;~Xin_Yu4;~Shibo_Li1;~Zheng_Wang2;~Robert_Kirby1;~Shandian_Zhe1", "aff": ";University of Utah;University of Utah;University of Utah;;", "aff_domain": ";cs.utah.edu;utah.edu;utah.edu;;", "position": ";PhD student;PhD student;PhD student;;", "bibtex": "@inproceedings{\nfang2023streaming,\ntitle={Streaming Factor Trajectory Learning for Temporal Tensor Decomposition},\nauthor={Shikai Fang and Xin Yu and Shibo Li and Zheng Wang and Robert Kirby and Shandian Zhe},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Qu6Ln7d9df}\n}", "github": "", "project": "", "reviewers": "Dugj;ZhxY;vq55;DKpV", "pdf_size": 1164726, "rating": "6;6;6;6", "confidence": "3;2;3;3", "soundness": "3;2;3;3", "novelty": "3;2;3;2", "presentation": "4;2;2;4", "wc_summary": "60;221;83;89", "wc_strengths": "98;160;46;34", "wc_weaknesses": "82;66;91;223", "wc_questions": "101;68;13;72", "wc_limitations": "20;23;1;67", "wc_review": "361;538;234;485", "wc_reply_reviewers": "145;27;62;89", "wc_reply_authors": "104;32;50;33", "reply_reviewers": "2;1;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 113.25, 63.14418025439874 ], "wc_strengths_avg": [ 84.5, 49.78704650810289 ], "wc_weaknesses_avg": [ 115.5, 62.70765503509121 ], "wc_questions_avg": [ 63.5, 31.815876539866068 ], "wc_limitations_avg": [ 27.75, 24.180312239505923 ], "wc_review_avg": [ 404.5, 117.54254548885693 ], "wc_reply_reviewers_avg": [ 80.75, 43.1182965804541 ], "wc_reply_authors_avg": [ 54.75, 29.32042803234632 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12057779097243850605&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "email": ";cs.utah.edu;utah.edu;utah.edu;;", "author_num": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Utah", "aff_unique_dep": "", "aff_unique_url": "https://www.utah.edu", "aff_unique_abbr": "Utah", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "PDE-Refiner: Achieving Accurate Long Rollouts with Neural PDE Solvers", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71688", "id": "Qv6468llWS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d529b943af3dba734f8a7d49efcb6d09-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Qv6468llWS", "openreview": "https://openreview.net/forum?id=Qv6468llWS", "poster": "/media/PosterPDFs/NeurIPS%202023/71688.png?t=1701702800.4209385", "slides": "https://nips.cc/virtual/2023/poster/71688", "video": "https://nips.cc/virtual/2023/poster/71688", "author_site": "Phillip Lippe, Bas Veeling, Paris Perdikaris, Richard Turner, Johannes Brandstetter", "tldr": "", "abstract": "Time-dependent partial differential equations (PDEs) are ubiquitous in science and engineering. Recently, mostly due to the high computational cost of traditional solution techniques, deep neural network based surrogates have gained increased interest. The practical utility of such neural PDE solvers relies on their ability to provide accurate, stable predictions over long time horizons, which is a notoriously hard problem. In this work, we present a large-scale analysis of common temporal rollout strategies, identifying the neglect of non-dominant spatial frequency information, often associated with high frequencies in PDE solutions, as the primary pitfall limiting stable, accurate rollout performance. Based on these insights, we draw inspiration from recent advances in diffusion models to introduce PDE-Refiner; a novel model class that enables more accurate modeling of all frequency components via a multistep refinement process. We validate PDE-Refiner on challenging benchmarks of complex fluid dynamics, demonstrating stable and accurate rollouts that consistently outperform state-of-the-art models, including neural, numerical, and hybrid neural-numerical architectures. We further demonstrate that PDE-Refiner greatly enhances data efficiency, since the denoising objective implicitly induces a novel form of spectral data augmentation. Finally, PDE-Refiner's connection to diffusion models enables an accurate and efficient assessment of the model's predictive uncertainty, allowing us to estimate when the surrogate becomes inaccurate.", "keywords": "Neural PDE Solvers;Neural Operators;Temporal Stability;Long-Horizon Modeling;Autoregressive Forecasting", "primary_area": "", "supplementary_material": "/attachment/89b35c8beee094f26f62a73f6c0ad4082b021757.pdf", "author": "Phillip Lippe;Bastiaan S. Veeling;Paris Perdikaris;Richard E Turner;Johannes Brandstetter", "authorids": "~Phillip_Lippe1;~Bastiaan_S._Veeling1;~Paris_Perdikaris1;~Richard_E_Turner1;~Johannes_Brandstetter1", "gender": "M;M;M;M;", "homepage": "https://phlippe.github.io;https://directory.seas.upenn.edu/paris-perdikaris/;https://rich-turner-group.github.io/;;", "dblp": "267/9431;180/9141;40/5352;251/8691;https://dblp.uni-trier.de/pers/hd/v/Veeling:Bastiaan_S=", "google_scholar": "69hFZp4AAAAJ;h_zkt1oAAAAJ;https://scholar.google.co.uk/citations?user=DgLEyZgAAAAJ;KiRvOHcAAAAJ;qStzdQsAAAAJ", "orcid": "0000-0002-3639-6938;0000-0002-2816-3229;;;", "linkedin": "phillip-lippe/;paris-perdikaris-093068102/;;;", "or_profile": "~Phillip_Lippe1;~Paris_Perdikaris1;~Richard_E_Turner1;~Johannes_Brandstetter1;~Bastiaan_Veeling1", "aff": "Google DeepMind;University of Pennsylvania;Microsoft Research;Microsoft;Microsoft Research", "aff_domain": "google.com;upenn.edu;research.microsoft.com;microsoft.com;research.microsoft.com", "position": "Intern;Associate Professor;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nlippe2023pderefiner,\ntitle={{PDE}-Refiner: Achieving Accurate Long Rollouts with Neural {PDE} Solvers},\nauthor={Phillip Lippe and Bastiaan S. Veeling and Paris Perdikaris and Richard E Turner and Johannes Brandstetter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Qv6468llWS}\n}", "github": "", "project": "", "reviewers": "JgV1;K6r8;YYxA;xf2d", "pdf_size": 2960236, "rating": "6;6;7;8", "confidence": "4;2;3;4", "soundness": "3;2;3;4", "novelty": "3;3;3;3", "presentation": "2;3;4;4", "wc_summary": "53;78;76;157", "wc_strengths": "35;81;74;337", "wc_weaknesses": "207;523;120;286", "wc_questions": "22;198;49;83", "wc_limitations": "4;45;7;195", "wc_review": "321;925;326;1058", "wc_reply_reviewers": "120;236;61;17", "wc_reply_authors": "442;596;0;0", "reply_reviewers": "2;2;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 91.0, 39.350984739901996 ], "wc_strengths_avg": [ 131.75, 119.79018115020946 ], "wc_weaknesses_avg": [ 284.0, 149.95832754468822 ], "wc_questions_avg": [ 88.0, 67.08576600144028 ], "wc_limitations_avg": [ 62.75, 78.04606011836856 ], "wc_review_avg": [ 657.5, 337.2984583421632 ], "wc_reply_reviewers_avg": [ 108.5, 82.1842442320911 ], "wc_reply_authors_avg": [ 259.5, 265.1504290021044 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4545454545454545, "gs_citation": 77, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=998283799574476012&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": "google.com;upenn.edu;research.microsoft.com;microsoft.com;research.microsoft.com", "author_num": 5, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "Google;University of Pennsylvania;Microsoft", "aff_unique_dep": "Google DeepMind;;Microsoft Research", "aff_unique_url": "https://deepmind.com;https://www.upenn.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "DeepMind;UPenn;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Sorting with Predictions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71687", "id": "Qv7rWR9JWa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/544696ef4847c903376ed6ec58f3a703-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Qv7rWR9JWa", "openreview": "https://openreview.net/forum?id=Qv7rWR9JWa", "poster": "/media/PosterPDFs/NeurIPS%202023/71687.png?t=1699604394.2305133", "slides": "https://nips.cc/virtual/2023/poster/71687", "video": "https://nips.cc/virtual/2023/poster/71687", "author_site": "Xingjian Bai, Christian Coester", "tldr": "", "abstract": "We explore the fundamental problem of sorting through the lens of learning-augmented algorithms, where algorithms can leverage possibly erroneous predictions to improve their efficiency. We consider two different settings: In the first setting, each item is provided a prediction of its position in the sorted list. In the second setting, we assume there is a ``quick-and-dirty'' way of comparing items, in addition to slow-and-exact comparisons. For both settings, we design new and simple algorithms using only $O(\\sum_i \\log \\eta_i)$ exact comparisons, where $\\eta_i$ is a suitably defined prediction error for the $i$th element. In particular, as the quality of predictions deteriorates, the number of comparisons degrades smoothly from $O(n)$ to $O(n\\log n)$. We prove that this comparison complexity is theoretically optimal with respect to the examined error measures. An experimental evaluation against existing adaptive and non-adaptive sorting algorithms demonstrates the potential of applying learning-augmented algorithms in sorting tasks.", "keywords": "sorting;learning-augmented algorithms;algorithms with predictions;adaptive sorting", "primary_area": "", "supplementary_material": "", "author": "Xingjian Bai;Christian Coester", "authorids": "~Xingjian_Bai1;~Christian_Coester1", "gender": "M;M", "homepage": "https://xingjianbai.com/;https://www.cs.ox.ac.uk/people/christian.coester/", "dblp": "188/9534;195/5890", "google_scholar": ";mKI_mvEAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Xingjian_Bai1;~Christian_Coester1", "aff": "University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk", "position": "Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nbai2023sorting,\ntitle={Sorting with Predictions},\nauthor={Xingjian Bai and Christian Coester},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Qv7rWR9JWa}\n}", "github": "", "project": "", "reviewers": "JwAo;HoTq;gjeT;S6Cj", "pdf_size": 2262959, "rating": "4;6;7;8", "confidence": "4;3;3;5", "soundness": "3;3;4;4", "novelty": "3;3;3;3", "presentation": "3;3;4;4", "wc_summary": "40;211;102;433", "wc_strengths": "29;74;67;156", "wc_weaknesses": "44;157;62;230", "wc_questions": "249;164;122;170", "wc_limitations": "19;1;1;50", "wc_review": "381;607;354;1039", "wc_reply_reviewers": "12;367;0;281", "wc_reply_authors": "195;13;0;101", "reply_reviewers": "1;1;0;2", "reply_authors": "2;2;1;3", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 196.5, 149.63706091740775 ], "wc_strengths_avg": [ 81.5, 46.295248136282844 ], "wc_weaknesses_avg": [ 123.25, 75.11116761174732 ], "wc_questions_avg": [ 176.25, 45.89321845327477 ], "wc_limitations_avg": [ 17.75, 20.017180121085985 ], "wc_review_avg": [ 595.25, 274.38875250272196 ], "wc_reply_reviewers_avg": [ 165.0, 161.9367160343818 ], "wc_reply_authors_avg": [ 77.25, 78.30189972152655 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.25482359571881275, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1524256665357123113&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "ox.ac.uk;ox.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "SOAR: Improved Indexing for Approximate Nearest Neighbor Search", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71686", "id": "QvIvWMaQdX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0973524e02a712af33325d0688ae6f49-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QvIvWMaQdX", "openreview": "https://openreview.net/forum?id=QvIvWMaQdX", "poster": "/media/PosterPDFs/NeurIPS%202023/71686.png?t=1702438527.505594", "slides": "https://nips.cc/virtual/2023/poster/71686", "video": "https://nips.cc/virtual/2023/poster/71686", "author_site": "Philip Sun, David Simcha, Dave Dopson, Ruiqi Guo, Sanjiv Kumar", "tldr": "", "abstract": "This paper introduces SOAR: **S**pilling with **O**rthogonality-**A**mplified **R**esiduals, a novel data indexing technique for approximate nearest neighbor (ANN) search. SOAR extends upon previous approaches to ANN search, such as spill trees, that utilize multiple redundant representations while partitioning the data to reduce the probability of missing a nearest neighbor during search. Rather than training and computing these redundant representations independently, however, SOAR uses an *orthogonality-amplified residual* loss, which optimizes each representation to compensate for cases where other representations perform poorly. This drastically improves the overall index quality, resulting in state-of-the-art ANN benchmark performance while maintaining fast indexing times and low memory consumption.", "keywords": "ann;quantization;mips;nearest neighbor search;retrieval", "primary_area": "", "supplementary_material": "/attachment/ec69177eb380b4f4540cb1ec559708b431ef2f53.zip", "author": "Philip Sun;David Simcha;Dave Dopson;Ruiqi Guo;Sanjiv Kumar", "authorids": "~Philip_Sun1;~David_Simcha1;ddopson@google.com;~Ruiqi_Guo3;~Sanjiv_Kumar1", "gender": ";M;;M;", "homepage": ";;;http://aqua.cs.uiuc.edu/site/;http://www.sanjivk.com/", "dblp": "280/1666;166/6571;;78/7198;", "google_scholar": "K-GJnwIAAAAJ;;;Cgb68qkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Philip_Sun1;~David_Simcha1;ddopson@google.com;~Ruiqi_Guo3;~Sanjiv_Kumar1", "aff": "Google;Google;;Google;Google", "aff_domain": "google.com;google.com;;google.com;google.com", "position": "Researcher;Researcher;;Researcher;Research Scientist", "bibtex": "@inproceedings{\nsun2023soar,\ntitle={{SOAR}: Improved Indexing for Approximate Nearest Neighbor Search},\nauthor={Philip Sun and David Simcha and Dave Dopson and Ruiqi Guo and Sanjiv Kumar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QvIvWMaQdX}\n}", "github": "", "project": "", "reviewers": "ixrh;J7HP;LtdJ;jSJp", "pdf_size": 4526121, "rating": "4;4;7;8", "confidence": "3;3;5;3", "soundness": "3;2;4;4", "novelty": "2;2;3;4", "presentation": "3;3;3;4", "wc_summary": "67;187;96;87", "wc_strengths": "49;45;276;20", "wc_weaknesses": "75;108;123;49", "wc_questions": "106;85;143;88", "wc_limitations": "5;1;7;7", "wc_review": "302;426;645;251", "wc_reply_reviewers": "0;459;19;96", "wc_reply_authors": "0;378;11;67", "reply_reviewers": "0;2;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 109.25, 46.09975596464693 ], "wc_strengths_avg": [ 97.5, 103.65447409542918 ], "wc_weaknesses_avg": [ 88.75, 28.77824699317177 ], "wc_questions_avg": [ 105.5, 23.092206477510977 ], "wc_limitations_avg": [ 5.0, 2.449489742783178 ], "wc_review_avg": [ 406.0, 151.9555856163241 ], "wc_reply_reviewers_avg": [ 143.5, 185.66704069381836 ], "wc_reply_authors_avg": [ 114.0, 154.52346100188151 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4042260417272216, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12930746572529664075&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 8, "email": "google.com;google.com;;google.com;google.com", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Is Distance Matrix Enough for Geometric Deep Learning?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71685", "id": "QwQ5HhhSNo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/75f1a165c7561e028c41d42fa6286a76-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QwQ5HhhSNo", "openreview": "https://openreview.net/forum?id=QwQ5HhhSNo", "poster": "/media/PosterPDFs/NeurIPS%202023/71685.png?t=1699930567.4893177", "slides": "https://nips.cc/virtual/2023/poster/71685", "video": "https://nips.cc/virtual/2023/poster/71685", "author_site": "Zian Li, Xiyuan Wang, Yinan Huang, Muhan Zhang", "tldr": "", "abstract": "Graph Neural Networks (GNNs) are often used for tasks involving the 3D geometry of a given graph, such as molecular dynamics simulation. While incorporating Euclidean distance into Message Passing Neural Networks (referred to as Vanilla DisGNN) is a straightforward way to learn the geometry, it has been demonstrated that Vanilla DisGNN is geometrically incomplete. In this work, we first construct families of novel and symmetric geometric graphs that Vanilla DisGNN cannot distinguish even when considering all-pair distances, which greatly expands the existing counterexample families. Our counterexamples show the inherent limitation of Vanilla DisGNN to capture symmetric geometric structures. We then propose $k$-DisGNNs, which can effectively exploit the rich geometry contained in the distance matrix. We demonstrate the high expressive power of $k$-DisGNNs from three perspectives: 1. They can learn high-order geometric information that cannot be captured by Vanilla DisGNN. 2. They can unify some existing well-designed geometric models. 3. They are universal function approximators from geometric graphs to scalars (when $k\\geq 2$) and vectors (when $k\\geq 3$). Most importantly, we establish a connection between geometric deep learning (GDL) and traditional graph representation learning (GRL), showing that those highly expressive GNN models originally designed for GRL can also be applied to GDL with impressive performance, and that existing complicated, equivariant models are not the only solution. Experiments verify our theory. Our $k$-DisGNNs achieve many new state-of-the-art results on MD17.", "keywords": "geometric deep learning;expressiveness;equivariant neural networks;universality", "primary_area": "", "supplementary_material": "", "author": "Zian Li;Xiyuan Wang;Yinan Huang;Muhan Zhang", "authorids": "~Zian_Li1;~Xiyuan_Wang1;~Yinan_Huang1;~Muhan_Zhang1", "gender": "M;;;M", "homepage": "https://zian-0427.github.io;;;https://muhanzhang.github.io/", "dblp": "88/11091;95/8542;288/1207;157/5518", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=OBBqkosAAAAJ", "orcid": ";;;0000-0002-7680-6401", "linkedin": ";%E5%B8%8C%E5%85%83-%E7%8E%8B-969660221/;;jerry-muhan-zhang-a33a1777/", "or_profile": "~Zian_Li1;~Xiyuan_Wang1;~Yinan_Huang1;~Muhan_Zhang1", "aff": "Tianjin University;Peking University;Duke University;Peking University", "aff_domain": "tju.edu.cn;pku.edu.cn;duke.edu;pku.edu.cn", "position": "Undergrad student;PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nli2023is,\ntitle={Is Distance Matrix Enough for Geometric Deep Learning?},\nauthor={Zian Li and Xiyuan Wang and Yinan Huang and Muhan Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QwQ5HhhSNo}\n}", "github": "", "project": "", "reviewers": "fGWC;5v5F;w5WJ;UsYi", "pdf_size": 615843, "rating": "5;7;7;7", "confidence": "4;4;3;4", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;3;2", "wc_summary": "110;86;85;68", "wc_strengths": "51;150;66;117", "wc_weaknesses": "165;36;46;264", "wc_questions": "5;1;149;121", "wc_limitations": "14;1;13;63", "wc_review": "345;274;359;633", "wc_reply_reviewers": "51;0;69;444", "wc_reply_authors": "54;0;29;1462", "reply_reviewers": "1;0;1;3", "reply_authors": "2;1;2;4", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.25, 14.956186011146023 ], "wc_strengths_avg": [ 96.0, 39.62953444086872 ], "wc_weaknesses_avg": [ 127.75, 93.61189828221625 ], "wc_questions_avg": [ 69.0, 66.75327707311455 ], "wc_limitations_avg": [ 22.75, 23.79469478686373 ], "wc_review_avg": [ 402.75, 136.78518742904876 ], "wc_reply_reviewers_avg": [ 141.0, 176.75830956421822 ], "wc_reply_authors_avg": [ 386.25, 621.3784575441927 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5860579681506561332&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tju.edu.cn;pku.edu.cn;duke.edu;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Tianjin University;Peking University;Duke University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.tju.edu.cn;http://www.pku.edu.cn;https://www.duke.edu", "aff_unique_abbr": "TJU;Peking U;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Beyond Myopia: Learning from Positive and Unlabeled Data through Holistic Predictive Trends", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71684", "id": "QwvaqV48fB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d5c0f9585592bad5251133813893a6c0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QwvaqV48fB", "openreview": "https://openreview.net/forum?id=QwvaqV48fB", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71684", "video": "https://nips.cc/virtual/2023/poster/71684", "author_site": "Wang Xinrui, Wenhai Wan, Chuanxing Geng, Shao-Yuan Li, Songcan Chen", "tldr": "", "abstract": "Learning binary classifiers from positive and unlabeled data (PUL) is vital in many real-world applications, especially when verifying negative examples is difficult. Despite the impressive empirical performance of recent PUL methods, challenges like accumulated errors and increased estimation bias persist due to the absence of negative labels. In this paper, we unveil an intriguing yet long-overlooked observation in PUL: \\textit{resampling the positive data in each training iteration to ensure a balanced distribution between positive and unlabeled examples results in strong early-stage performance. Furthermore, predictive trends for positive and negative classes display distinctly different patterns.} Specifically, the scores (output probability) of unlabeled negative examples consistently decrease, while those of unlabeled positive examples show largely chaotic trends. Instead of focusing on classification within individual time frames, we innovatively adopt a holistic approach, interpreting the scores of each example as a temporal point process (TPP). This reformulates the core problem of PUL as recognizing trends in these scores. We then propose a novel TPP-inspired measure for trend detection and prove its asymptotic unbiasedness in predicting changes. Notably, our method accomplishes PUL without requiring additional parameter tuning or prior assumptions, offering an alternative perspective for tackling this problem. Extensive experiments verify the superiority of our method, particularly in a highly imbalanced real-world setting, where it achieves improvements of up to $11.3\\%$ in key metrics.", "keywords": "positive and unlabeled learning;machine learning;deep learning;temporal point process;data imbalance", "primary_area": "", "supplementary_material": "/attachment/828c8cafd8d9db9cbf09fcfd0bedfdebd0455d7c.zip", "author": "Wang Xinrui;Wenhai Wan;Chuanxing Geng;Shao-Yuan Li;Songcan Chen", "authorids": "~Wang_Xinrui1;~Wenhai_Wan1;~Chuanxing_Geng1;~Shao-Yuan_Li1;~Songcan_Chen1", "gender": "M;M;M;F;", "homepage": "https://wxr99.github.io/dudusama/;https://openreview.net/;https://faculty.nuaa.edu.cn/gengchuanxing/zh_CN/index.htm;http://parnec.nuaa.edu.cn/lisy;", "dblp": ";342/6476;224/2052;79/1523;", "google_scholar": "3juyXgQAAAAJ;;thqYKQIAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": "0009-0006-0208-9063;;;0000-0003-0610-8568;", "linkedin": ";;;;", "or_profile": "~Wang_Xinrui1;~Wenhai_Wan1;~Chuanxing_Geng1;~Shao-Yuan_Li1;~Songcan_Chen1", "aff": "Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics;", "aff_domain": "nuaa.edu.cn;nuaa.edu.cn;nuaa.edu.cn;nuaa.edu.cn;", "position": "MS student;MS student;Postdoc;Assistant Professor;", "bibtex": "@inproceedings{\nxinrui2023beyond,\ntitle={Beyond Myopia: Learning from Positive and Unlabeled Data through Holistic Predictive Trends},\nauthor={Wang Xinrui and Wenhai Wan and Chuanxing Geng and Shao-Yuan Li and Songcan Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QwvaqV48fB}\n}", "github": "", "project": "", "reviewers": "3dQ9;3ytg;EzoK;vUs8;cdNW", "pdf_size": 425849, "rating": "6;7;7;7;7", "confidence": "4;4;4;4;4", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "92;61;61;53;83", "wc_strengths": "16;41;109;144;71", "wc_weaknesses": "7;137;35;77;69", "wc_questions": "87;98;89;47;103", "wc_limitations": "1;1;12;1;1", "wc_review": "203;338;306;322;327", "wc_reply_reviewers": "0;0;15;14;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.0, 14.859340496805368 ], "wc_strengths_avg": [ 76.2, 45.96259348644287 ], "wc_weaknesses_avg": [ 65.0, 43.83605821695194 ], "wc_questions_avg": [ 84.8, 19.78282083020518 ], "wc_limitations_avg": [ 3.2, 4.4 ], "wc_review_avg": [ 299.2, 49.19105609762815 ], "wc_reply_reviewers_avg": [ 5.8, 7.11055553385247 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1691538698094489087&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "nuaa.edu.cn;nuaa.edu.cn;nuaa.edu.cn;nuaa.edu.cn;", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University of Aeronautics and Astronautics", "aff_unique_dep": "", "aff_unique_url": "http://www.nuaa.edu.cn", "aff_unique_abbr": "NUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Equal Opportunity of Coverage in Fair Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71683", "id": "QxYzmYmQQe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1849b94ed817ae7043a6b6934ef410c1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QxYzmYmQQe", "openreview": "https://openreview.net/forum?id=QxYzmYmQQe", "poster": "/media/PosterPDFs/NeurIPS%202023/71683.png?t=1701633276.3093903", "slides": "https://nips.cc/virtual/2023/poster/71683", "video": "https://nips.cc/virtual/2023/poster/71683", "author_site": "Fangxin Wang, Lu Cheng, Ruocheng Guo, Kay Liu, Philip S Yu", "tldr": "", "abstract": "We study fair machine learning (ML) under predictive uncertainty to enable reliable and trustworthy decision-making. The seminal work of 'equalized coverage' proposed an uncertainty-aware fairness notion. However, it does not guarantee equal coverage rates across more fine-grained groups (e.g., low-income females) conditioning on the true label and is biased in the assessment of uncertainty. To tackle these limitations, we propose a new uncertainty-aware fairness -- Equal Opportunity of Coverage (EOC) -- that aims to achieve two properties: (1) coverage rates for different groups with similar outcomes are close, and (2) the coverage rate for the entire population remains at a predetermined level. Further, the prediction intervals should be narrow to be informative. We propose Binned Fair Quantile Regression (BFQR), a distribution-free post-processing method to improve EOC with reasonable width for any trained ML models. It first calibrates a hold-out set to bound deviation from EOC, then leverages conformal prediction to maintain EOC on a test set, meanwhile optimizing prediction interval width. Experimental results demonstrate the effectiveness of our method in improving EOC.", "keywords": "Equal Opportunity; Fair Machine Learning; Conformal Prediction; Uncertainty Quantification", "primary_area": "", "supplementary_material": "/attachment/1fca7c35fe45c465630693883d031dc771263820.pdf", "author": "Fangxin Wang;Lu Cheng;Ruocheng Guo;Kay Liu;Philip S. Yu", "authorids": "~Fangxin_Wang2;~Lu_Cheng2;~Ruocheng_Guo1;~Kay_Liu1;~Philip_S._Yu1", "gender": "F;F;M;;M", "homepage": "https://fangxin-wang.github.io/;https://lcheng.org/;https://rguo12.github.io;;https://cs.uic.edu/profiles/philip-yu/", "dblp": "142/0351-3.html;17/4969-1;167/4378;;y/PhilipSYu", "google_scholar": "5KRVv-cAAAAJ;9rpkTSkAAAAJ;8Nuj8NwAAAAJ;;D0lL1r0AAAAJ", "orcid": ";0000-0002-2503-2522;;;0000-0002-3491-5968", "linkedin": "fangxin-wang-2000/;;;;", "or_profile": "~Fangxin_Wang2;~Lu_Cheng2;~Ruocheng_Guo1;~Kay_Liu1;~Philip_S._Yu1", "aff": "Columbia University;University of Illinois at Chicago;Bytedance Research;;University of Illinois Chicago", "aff_domain": "columbia.edu;uic.edu;bytedance.com;;uic.edu", "position": "MS student;Assistant Professor;Researcher;;Full Professor", "bibtex": "@inproceedings{\nwang2023equal,\ntitle={Equal Opportunity of Coverage in Fair Regression},\nauthor={Fangxin Wang and Lu Cheng and Ruocheng Guo and Kay Liu and Philip S. Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QxYzmYmQQe}\n}", "github": "", "project": "", "reviewers": "FNuJ;RXWV;219P;1ZNj", "pdf_size": 656940, "rating": "4;4;6;7", "confidence": "4;3;3;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "41;128;182;125", "wc_strengths": "10;100;47;32", "wc_weaknesses": "11;323;49;21", "wc_questions": "73;170;92;516", "wc_limitations": "12;1;1;1", "wc_review": "147;722;371;695", "wc_reply_reviewers": "53;103;0;12", "wc_reply_authors": "285;203;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 119.0, 50.42320894191484 ], "wc_strengths_avg": [ 47.25, 33.17661073708404 ], "wc_weaknesses_avg": [ 101.0, 128.92633555639438 ], "wc_questions_avg": [ 212.75, 178.814673614891 ], "wc_limitations_avg": [ 3.75, 4.763139720814412 ], "wc_review_avg": [ 483.75, 238.48624174153107 ], "wc_reply_reviewers_avg": [ 42.0, 40.329889660151565 ], "wc_reply_authors_avg": [ 122.0, 125.39736839343958 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13864612959575052574&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "columbia.edu;uic.edu;bytedance.com;;uic.edu", "author_num": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Columbia University;University of Illinois at Chicago;ByteDance", "aff_unique_dep": ";;Bytedance Research", "aff_unique_url": "https://www.columbia.edu;https://www.uic.edu;https://www.bytedance.com", "aff_unique_abbr": "Columbia;UIC;Bytedance", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Emergence of Shape Bias in Convolutional Neural Networks through Activation Sparsity", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71682", "id": "QzcZb3fWmW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e31c16c7b3e0ccee5159ae5443154fac-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=QzcZb3fWmW", "openreview": "https://openreview.net/forum?id=QzcZb3fWmW", "poster": "/media/PosterPDFs/NeurIPS%202023/71682.png?t=1702346251.6801043", "slides": "https://nips.cc/virtual/2023/poster/71682", "video": "https://nips.cc/virtual/2023/poster/71682", "author_site": "Tianqin Li, Ziqi Wen, Yangfan Li, Tai Sing Lee", "tldr": "", "abstract": "Current deep-learning models for object recognition are known to be heavily biased toward texture. In contrast, human visual systems are known to be biased toward shape and structure. What could be the design principles in human visual systems that led to this difference? How could we introduce more shape bias into the deep learning models? In this paper, we report that sparse coding, a ubiquitous principle in the brain, can in itself introduce shape bias into the network. We found that enforcing the sparse coding constraint using a non-differential Top-K operation can lead to the emergence of structural encoding in neurons in convolutional neural networks, resulting in a smooth decomposition of objects into parts and subparts and endowing the networks with shape bias. We demonstrated this emergence of shape bias and its functional benefits for different network structures with various datasets. For object recognition convolutional neural networks, the shape bias leads to greater robustness against style and pattern change distraction. For the image synthesis generative adversary networks, the emerged shape bias leads to more coherent and decomposable structures in the synthesized images. Ablation studies suggest that sparse codes tend to encode structures, whereas the more distributed codes tend to favor texture. Our code is host at the github repository: https://topk-shape-bias.github.io/", "keywords": "neuroscience;computer vision;shape & texture bias", "primary_area": "", "supplementary_material": "/attachment/4b4991e7a1e1408e5020d1c22373a1bfbc5db1b8.pdf", "author": "Tianqin Li;Ziqi Wen;Yangfan Li;Tai Sing Lee", "authorids": "~Tianqin_Li2;~Ziqi_Wen2;~Yangfan_Li2;~Tai_Sing_Lee1", "gender": "M;M;M;M", "homepage": "https://github.com/Crazy-Jack;https://github.com/starsky77;;http://www.cnbc.cmu.edu/~tai/", "dblp": "294/5434;328/9856;;21/4105", "google_scholar": "sQjEQEUAAAAJ;CvVgoeQAAAAJ;qh5ePbgAAAAJ;9TAiIIMAAAAJ", "orcid": "0000-0003-2567-8283;;;", "linkedin": "tianqin-li-b16299170/;ziqi-wen-starsky77/;;", "or_profile": "~Tianqin_Li2;~Ziqi_Wen2;~Yangfan_Li2;~Tai_Sing_Lee1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Northwestern University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;andrew.cmu.edu;northwestern.edu;cmu.edu", "position": "PhD student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nli2023emergence,\ntitle={Emergence of Shape Bias in Convolutional Neural Networks through Activation Sparsity},\nauthor={Tianqin Li and Ziqi Wen and Yangfan Li and Tai Sing Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=QzcZb3fWmW}\n}", "github": "", "project": "", "reviewers": "oHBy;ZUQs;muuS;24RM", "pdf_size": 13343801, "rating": "7;7;7;8", "confidence": "3;3;4;3", "soundness": "3;2;4;3", "novelty": "4;3;3;4", "presentation": "3;3;4;4", "wc_summary": "47;84;90;46", "wc_strengths": "69;140;85;114", "wc_weaknesses": "91;91;279;41", "wc_questions": "33;64;52;46", "wc_limitations": "9;38;12;12", "wc_review": "249;417;518;259", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 66.75, 20.363877332178173 ], "wc_strengths_avg": [ 102.0, 27.230497608380205 ], "wc_weaknesses_avg": [ 125.5, 90.94366388044854 ], "wc_questions_avg": [ 48.75, 11.166355717063647 ], "wc_limitations_avg": [ 17.75, 11.755317945508747 ], "wc_review_avg": [ 360.75, 112.61965858587922 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18239639282688699006&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 7, "email": "andrew.cmu.edu;andrew.cmu.edu;northwestern.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Carnegie Mellon University;Northwestern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.northwestern.edu", "aff_unique_abbr": "CMU;NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Two Sides of The Same Coin: Bridging Deep Equilibrium Models and Neural ODEs via Homotopy Continuation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71681", "id": "R2rJq5OHdr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/592da1445a51e54a3987958b5831948f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=R2rJq5OHdr", "openreview": "https://openreview.net/forum?id=R2rJq5OHdr", "poster": "/media/PosterPDFs/NeurIPS%202023/71681.png?t=1699728135.376784", "slides": "https://nips.cc/virtual/2023/poster/71681", "video": "https://nips.cc/virtual/2023/poster/71681", "author_site": "Shutong Ding, Tianyu Cui, Jingya Wang, Ye Shi", "tldr": "", "abstract": "Deep Equilibrium Models (DEQs) and Neural Ordinary Differential Equations (Neural ODEs) are two branches of implicit models that have achieved remarkable success owing to their superior performance and low memory consumption. While both are implicit models, DEQs and Neural ODEs are derived from different mathematical formulations. Inspired by homotopy continuation, we establish a connection between these two models and illustrate that they are actually two sides of the same coin. Homotopy continuation is a classical method of solving nonlinear equations based on a corresponding ODE. Given this connection, we proposed a new implicit model called HomoODE that inherits the property of high accuracy from DEQs and the property of stability from Neural ODEs. Unlike DEQs, which explicitly solve an equilibrium-point-finding problem via Newton's methods in the forward pass, HomoODE solves the equilibrium-point-finding problem implicitly using a modified Neural ODE via homotopy continuation. Further, we developed an acceleration method for HomoODE with a shared learnable initial point. It is worth noting that our model also provides a better understanding of why Augmented Neural ODEs work as long as the augmented part is regarded as the equilibrium point to find. Comprehensive experiments with several image classification tasks demonstrate that HomoODE surpasses existing implicit models in terms of both accuracy and memory consumption.", "keywords": "Deep Equilibrium Models;Neural Ordinary Differential Equations;Homotopy Continuation", "primary_area": "", "supplementary_material": "/attachment/06701decfb35470d414ac20c44841fc304136530.zip", "author": "Shutong Ding;Tianyu Cui;Jingya Wang;Ye Shi", "authorids": "~Shutong_Ding1;~Tianyu_Cui2;~Jingya_Wang3;~Ye_Shi1", "gender": "M;;F;M", "homepage": "https://dingsht.tech/;https://github.com/TianyuCuiOvO;https://faculty.sist.shanghaitech.edu.cn/faculty/wangjingya/;http://faculty.sist.shanghaitech.edu.cn/faculty/shiye", "dblp": ";;;34/11191-1", "google_scholar": "https://scholar.google.com.hk/citations?user=qJyqm40AAAAJ;TGB4zWUAAAAJ;https://scholar.google.com.au/citations?user=vmvJV_IAAAAJ;gMqbZPUAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Shutong_Ding1;~Tianyu_Cui2;~Jingya_Wang3;~Ye_Shi1", "aff": "ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "position": "MS student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nding2023two,\ntitle={Two Sides of The Same Coin: Bridging Deep Equilibrium Models and Neural {ODE}s via Homotopy Continuation},\nauthor={Shutong Ding and Tianyu Cui and Jingya Wang and Ye Shi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=R2rJq5OHdr}\n}", "github": "", "project": "", "reviewers": "aBa7;rMvZ;ZDnx;iBob", "pdf_size": 983897, "rating": "5;5;6;8", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;2;3;4", "wc_summary": "43;65;164;48", "wc_strengths": "25;56;73;81", "wc_weaknesses": "111;175;20;44", "wc_questions": "5;200;176;3", "wc_limitations": "1;21;8;18", "wc_review": "185;517;441;194", "wc_reply_reviewers": "163;113;29;0", "wc_reply_authors": "105;388;0;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 80.0, 49.178247223747206 ], "wc_strengths_avg": [ 58.75, 21.47527648250425 ], "wc_weaknesses_avg": [ 87.5, 60.53304882458838 ], "wc_questions_avg": [ 96.0, 92.39318156660696 ], "wc_limitations_avg": [ 12.0, 7.968688725254614 ], "wc_review_avg": [ 334.25, 147.25721544291133 ], "wc_reply_reviewers_avg": [ 76.25, 65.04373528634406 ], "wc_reply_authors_avg": [ 123.25, 158.7503937002992 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5656292305742182049&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "ShanghaiTech University", "aff_unique_dep": "", "aff_unique_url": "https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Scaling MLPs: A Tale of Inductive Bias", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71680", "id": "R45A8eKcax", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bf2a5ce85aea9ff40d9bf8b2c2561cae-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=R45A8eKcax", "openreview": "https://openreview.net/forum?id=R45A8eKcax", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71680", "video": "https://nips.cc/virtual/2023/poster/71680", "author_site": "Gregor Bachmann, Sotiris Anagnostidis, Thomas Hofmann", "tldr": "", "abstract": "In this work we revisit the most fundamental building block in deep learning, the multi-layer perceptron (MLP), and study the limits of its performance on vision tasks. Empirical insights into MLPs are important for multiple reasons. (1) Given the recent narrative \"less inductive bias is better\", popularized due to transformers eclipsing convolutional models, it is natural to explore the limits of this hypothesis. To that end, MLPs offer an ideal test bed, as they lack any vision-specific inductive bias. (2) MLPs have almost exclusively been the main protagonist in the deep learning theory literature due to their mathematical simplicity, serving as a proxy to explain empirical phenomena observed for more complex architectures. Surprisingly, experimental datapoints for MLPs are very difficult to find in the literature, especially when coupled with large pre-training protocols. This discrepancy between practice and theory is worrying: \\textit{Do MLPs reflect the empirical advances exhibited by practical models?} Or do theorists need to rethink the role of MLPs as a proxy? We provide insights into both these aspects.\nWe show that the performance of MLPs drastically improves with scale (95% on CIFAR10, 82% on CIFAR100, 58% on ImageNet ReaL), highlighting that lack of inductive bias can indeed be compensated. We observe that MLPs mimic the behaviour of their modern counterparts faithfully, with some components in the learning setting however exhibiting stronger or unexpected behaviours. Due to their inherent computational efficiency, large pre-training experiments become more accessible for academic researchers. All of our experiments were run on a single GPU.", "keywords": "MLP;scaling-laws;inductive bias;DL theory", "primary_area": "", "supplementary_material": "", "author": "Gregor Bachmann;Sotiris Anagnostidis;Thomas Hofmann", "authorids": "~Gregor_Bachmann1;~Sotiris_Anagnostidis1;~Thomas_Hofmann1", "gender": "M;M;M", "homepage": "http://www.da.inf.ethz.ch/people/GregorBachmann;;http://www.da.inf.ethz.ch/", "dblp": ";286/1763;h/ThHofmann", "google_scholar": "bbGqqloAAAAJ;qjzTKWUAAAAJ;T3hAyLkAAAAJ", "orcid": ";;", "linkedin": ";sotiris-anagnostidis-b064a5129/;thomas-hofmann-1ab2402/", "or_profile": "~Gregor_Bachmann1;~Sotiris_Anagnostidis1;~Thomas_Hofmann1", "aff": "Swiss Federal Institute of Technology;ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;inf.ethz.ch;ethz.ch", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nbachmann2023scaling,\ntitle={Scaling {MLP}s: A Tale of Inductive Bias},\nauthor={Gregor Bachmann and Sotiris Anagnostidis and Thomas Hofmann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=R45A8eKcax}\n}", "github": "", "project": "", "reviewers": "EAzm;bQKr;xaDV;RAGY", "pdf_size": 9304488, "rating": "4;6;6;8", "confidence": "4;3;5;5", "soundness": "2;2;3;4", "novelty": "2;3;3;4", "presentation": "4;2;3;4", "wc_summary": "84;18;264;232", "wc_strengths": "202;13;348;213", "wc_weaknesses": "379;27;206;170", "wc_questions": "24;239;841;31", "wc_limitations": "24;11;1;53", "wc_review": "713;308;1660;699", "wc_reply_reviewers": "175;0;0;172", "wc_reply_authors": "350;55;66;0", "reply_reviewers": "1;0;0;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 149.5, 101.85651672818976 ], "wc_strengths_avg": [ 194.0, 119.27070050938747 ], "wc_weaknesses_avg": [ 195.5, 125.32457859494282 ], "wc_questions_avg": [ 283.75, 333.1226313236613 ], "wc_limitations_avg": [ 22.25, 19.536824204563032 ], "wc_review_avg": [ 845.0, 497.82878582902373 ], "wc_reply_reviewers_avg": [ 86.75, 86.75648390754434 ], "wc_reply_authors_avg": [ 117.75, 136.4008339417322 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18204899207956332582&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "ethz.ch;inf.ethz.ch;ethz.ch", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Swiss Federal Institute of Technology;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "id": "R4ivHjNi8V", "title": "Efficient and Learnable Transformed Tensor Nuclear Norm with Exact Recoverable Theory", "track": "main", "status": "NeurIPS 2023 Conference Withdrawn Submission", "tldr": "", "abstract": "The tensor nuclear norm represents the low-rank property of tensor slices under a transformation. Finding a good transformation is crucial for the tensor nuclear norm. However, existing transformations are either fixed and not adaptable to the data, leading to ineffective results, or they are nonlinear and non-invertible, which prevents theoretical guarantees for the transformed tensor nuclear norm. Besides, some transformations are too complex and computationally expensive. To address these issues, this paper first proposes a fast data-adaptive and learnable column-orthogonal transformation learning framework with an exact recoverable theoretical guarantee. Extensive experiments have validated the effectiveness of the proposed models and theories. ", "keywords": "Efficient;Learnable Transformed Tensor Nuclear Norm;Exact Recoverable Theory", "primary_area": "", "supplementary_material": "/attachment/f0d794b9b411b8caa022a68a07464a0363dbb891.pdf", "author": "Jiangjun Peng;Hailin Wang;Xiangyong Cao;Hongying Zhang;Xixi Jia;Deyu Meng", "authorids": "~Jiangjun_Peng1;~Hailin_Wang2;~Xiangyong_Cao1;~Hongying_Zhang1;~Xixi_Jia2;~Deyu_Meng1", "gender": "M;M;M;F;M;M", "homepage": "https://teacher.nwpu.edu.cn/pengjj;https://github.com/wanghailin97;http://gr.xjtu.edu.cn/web/caoxiangyong;http://gr.xjtu.edu.cn/web/zhyemily;;http://dymeng.gr.xjtu.edu.cn", "dblp": ";255/2196;175/1407;;216/9686;22/5614", "google_scholar": "3crYjMoAAAAJ;https://scholar.google.com.hk/citations?user=FdYTyjgAAAAJ;IePM9RsAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;an6w-64AAAAJ", "orcid": "0000-0001-9645-5154;0000-0002-7797-2719;;;;0000-0002-1294-8283", "linkedin": ";;;;;", "or_profile": "~Jiangjun_Peng1;~Hailin_Wang2;~Xiangyong_Cao1;~Hongying_Zhang1;~Xixi_Jia2;~Deyu_Meng1", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University;Xidian University;Xi'an Jiaotong University", "aff_domain": "xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;xidian.edu.cn;xjtu.edu.cn", "position": "PhD student;PhD student;Associate Professor;Full Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nanonymous2025efficient,\ntitle={Efficient and Learnable Transformed Tensor Nuclear Norm with Exact Recoverable Theory},\nauthor={Anonymous},\nyear={2025},\nurl={https://openreview.net/forum?id=R4ivHjNi8V}\n}", "github": "", "project": "", "reviewers": "1VQ8;E1Cr;Wzur;kX6Y", "site": "https://openreview.net/forum?id=R4ivHjNi8V", "pdf_size": 2883656, "rating": "3;4;5;6", "confidence": "5;3;4;3", "soundness": "2;3;2;3", "novelty": "2;2;2;3", "presentation": "2;2;3;3", "wc_summary": "84;108;49;63", "wc_strengths": "39;3;65;31", "wc_weaknesses": "221;154;81;82", "wc_questions": "144;3;18;84", "wc_limitations": "6;6;2;6", "wc_review": "494;274;215;266", "wc_reply_reviewers": "457;106;59;23", "wc_reply_authors": "1027;1069;89;18", "reply_reviewers": "3;1;1;1", "reply_authors": "8;8;3;2", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 76.0, 22.282279955157193 ], "wc_strengths_avg": [ 34.5, 22.107690969434145 ], "wc_weaknesses_avg": [ 134.5, 58.0538543078752 ], "wc_questions_avg": [ 62.25, 56.179956390157514 ], "wc_limitations_avg": [ 5.0, 1.7320508075688772 ], "wc_review_avg": [ 312.25, 107.34611078190025 ], "wc_reply_reviewers_avg": [ 161.25, 173.26911871421288 ], "wc_reply_authors_avg": [ 550.75, 498.1045949396572 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 5.25, 2.7726341266023544 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.674199862463242, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3uUo8QqjkpcJ:scholar.google.com/&scioq=Efficient+and+Learnable+Transformed+Tensor+Nuclear+Norm+with+Exact+Recoverable+Theory&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Xi'an Jiao Tong University;Xidian University", "aff_unique_dep": ";", "aff_unique_url": "https://www.xjtu.edu.cn;http://www.xidian.edu.cn/", "aff_unique_abbr": "XJTU;Xidian", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SGFormer: Simplifying and Empowering Transformers for Large-Graph Representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71679", "id": "R4xpvDTWkV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cc57fac10eacadb3b72a907ac48f9a98-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=R4xpvDTWkV", "openreview": "https://openreview.net/forum?id=R4xpvDTWkV", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71679", "video": "https://nips.cc/virtual/2023/poster/71679", "author_site": "Qitian Wu, Wentao Zhao, Chenxiao Yang, Hengrui Zhang, Fan Nie, Haitian Jiang, Yatao Bian, Junchi Yan", "tldr": "", "abstract": "Learning representations on large-sized graphs is a long-standing challenge due to the inter-dependence nature involved in massive data points. Transformers, as an emerging class of foundation encoders for graph-structured data, have shown promising performance on small graphs due to its global attention capable of capturing all-pair influence beyond neighboring nodes. Even so, existing approaches tend to inherit the spirit of Transformers in language and vision tasks, and embrace complicated models by stacking deep multi-head attentions. In this paper, we critically demonstrate that even using a one-layer attention can bring up surprisingly competitive performance across node property prediction benchmarks where node numbers range from thousand-level to billion-level. This encourages us to rethink the design philosophy for Transformers on large graphs, where the global attention is a computation overhead hindering the scalability. We frame the proposed scheme as Simplified Graph Transformers (SGFormer), which is empowered by a simple attention model that can efficiently propagate information among arbitrary nodes in one layer. SGFormer requires none of positional encodings, feature/graph pre-processing or augmented loss. Empirically, SGFormer successfully scales to the web-scale graph ogbn-papers100M and yields up to 141x inference acceleration over SOTA Transformers on medium-sized graphs. Beyond current results, we believe the proposed methodology alone enlightens a new technical path of independent interest for building Transformers on large graphs.", "keywords": "graph transformers;graph neural networks;graph representation learning;large graphs;efficiency;scalability", "primary_area": "", "supplementary_material": "", "author": "Qitian Wu;Wentao Zhao;Chenxiao Yang;Hengrui Zhang;Fan Nie;Haitian Jiang;Yatao Bian;Junchi Yan", "authorids": "~Qitian_Wu1;~Wentao_Zhao1;~Chenxiao_Yang1;~Hengrui_Zhang1;~Fan_Nie1;~Haitian_Jiang2;~Yatao_Bian1;~Junchi_Yan2", "gender": ";;;M;;;;", "homepage": ";https://github.com/WtaoZhao;;https://hengruizhang98.github.io;;;;", "dblp": ";;;;;;;", "google_scholar": ";;;iwffiD0AAAAJ;;;;", "orcid": ";;;0009-0006-1330-0899;;;;", "linkedin": ";;;;;;;", "or_profile": "~Qitian_Wu1;~Wentao_Zhao1;~Chenxiao_Yang1;~Hengrui_Zhang1;~Fan_Nie1;~Haitian_Jiang2;~Yatao_Bian1;~Junchi_Yan2", "aff": ";Shanghai Jiaotong University;;University of Illinois, Chicago;;;;", "aff_domain": ";sjtu.edu.cn;;uic.edu;;;;", "position": ";MS student;;PhD student;;;;", "bibtex": "@inproceedings{\nwu2023simplifying,\ntitle={Simplifying and Empowering Transformers for Large-Graph Representations},\nauthor={Qitian Wu and Wentao Zhao and Chenxiao Yang and Hengrui Zhang and Fan Nie and Haitian Jiang and Yatao Bian and Junchi Yan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=R4xpvDTWkV}\n}", "github": "", "project": "", "reviewers": "bWXq;moxf;9PQr;J3Wh", "pdf_size": 1055557, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "1;3;3;3", "presentation": "2;4;3;4", "wc_summary": "100;56;21;70", "wc_strengths": "32;56;21;38", "wc_weaknesses": "312;29;22;30", "wc_questions": "337;32;28;43", "wc_limitations": "11;13;6;8", "wc_review": "792;186;98;189", "wc_reply_reviewers": "1059;0;0;0", "wc_reply_authors": "1922;0;0;0", "reply_reviewers": "4;0;0;0", "reply_authors": "6;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 61.75, 28.39344114403888 ], "wc_strengths_avg": [ 36.75, 12.676257334087218 ], "wc_weaknesses_avg": [ 98.25, 123.44710405675785 ], "wc_questions_avg": [ 110.0, 131.17354916293147 ], "wc_limitations_avg": [ 9.5, 2.692582403567252 ], "wc_review_avg": [ 316.25, 277.09598968588483 ], "wc_reply_reviewers_avg": [ 264.75, 458.56045130386025 ], "wc_reply_authors_avg": [ 480.5, 832.2504130368455 ], "reply_reviewers_avg": [ 1.0, 1.7320508075688772 ], "reply_authors_avg": [ 2.25, 2.165063509461097 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 121, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14606299797369748214&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": ";sjtu.edu.cn;;uic.edu;;;;", "author_num": 8, "aff_unique_index": "0;1", "aff_unique_norm": "Shanghai Jiao Tong University;University of Illinois at Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.uic.edu", "aff_unique_abbr": "SJTU;UIC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "title": "Additive Decoders for Latent Variables Identification and Cartesian-Product Extrapolation", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71678", "id": "R6KJN1AUAR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4ef594af0d9a519db8fb292452c461fa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=R6KJN1AUAR", "openreview": "https://openreview.net/forum?id=R6KJN1AUAR", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71678", "video": "https://nips.cc/virtual/2023/poster/71678", "author_site": "S\u00e9bastien Lachapelle, Divyat Mahajan, Ioannis Mitliagkas, Simon Lacoste-Julien", "tldr": "", "abstract": "We tackle the problems of latent variables identification and \"out-of-support'' image generation in representation learning. We show that both are possible for a class of decoders that we call additive, which are reminiscent of decoders used for object-centric representation learning (OCRL) and well suited for images that can be decomposed as a sum of object-specific images. We provide conditions under which exactly solving the reconstruction problem using an additive decoder is guaranteed to identify the blocks of latent variables up to permutation and block-wise invertible transformations. This guarantee relies only on very weak assumptions about the distribution of the latent factors, which might present statistical dependencies and have an almost arbitrarily shaped support. Our result provides a new setting where nonlinear independent component analysis (ICA) is possible and adds to our theoretical understanding of OCRL methods. We also show theoretically that additive decoders can generate novel images by recombining observed factors of variations in novel ways, an ability we refer to as Cartesian-product extrapolation. We show empirically that additivity is crucial for both identifiability and extrapolation on simulated data.", "keywords": "identifiability;nonlinear ICA;causal representation learning;disentanglement;object-centric representation learning;extrapolation", "primary_area": "", "supplementary_material": "", "author": "Sebastien Lachapelle;Divyat Mahajan;Ioannis Mitliagkas;Simon Lacoste-Julien", "authorids": "~Sebastien_Lachapelle1;~Divyat_Mahajan1;~Ioannis_Mitliagkas1;~Simon_Lacoste-Julien1", "gender": "M;M;M;M", "homepage": "https://slachapelle.github.io/;http://divyat09.github.io/;http://mitliagkas.github.io/;http://www.iro.umontreal.ca/~slacoste/", "dblp": "224/0080;242/8911.html;83/8757;94/446.html", "google_scholar": "uxHoJp8AAAAJ;https://scholar.google.co.in/citations?user=z5bDMO4AAAAJ;K757SxgAAAAJ;oejm5IUAAAAJ", "orcid": ";;;0000-0001-6485-6180", "linkedin": "s%C3%A9bastien-lachapelle-a4321a122/;divyat-mahajan-6221a0a6/;;simon-lacoste-julien-355b9a3", "or_profile": "~Sebastien_Lachapelle1;~Divyat_Mahajan1;~Ioannis_Mitliagkas1;~Simon_Lacoste-Julien1", "aff": "University of Montreal;Montreal Institute of Learning Algorithms;Mila - Quebec AI Institute;Samsung - SAIT AI Lab, Montreal", "aff_domain": "umontreal.ca;mila.quebec;mila.quebec;samsung.com", "position": "PhD student;PhD student;Principal Researcher;VP Lab Director", "bibtex": "@inproceedings{\nlachapelle2023additive,\ntitle={Additive Decoders for Latent Variables Identification and Cartesian-Product Extrapolation},\nauthor={Sebastien Lachapelle and Divyat Mahajan and Ioannis Mitliagkas and Simon Lacoste-Julien},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=R6KJN1AUAR}\n}", "github": "", "project": "", "reviewers": "JkNs;2rwH;C5F5;CUR1;7aiT", "pdf_size": 4995442, "rating": "6;7;7;7;7", "confidence": "2;4;3;3;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "54;65;52;110;115", "wc_strengths": "62;36;129;158;64", "wc_weaknesses": "137;667;189;298;222", "wc_questions": "25;168;331;183;50", "wc_limitations": "5;8;77;72;24", "wc_review": "283;944;778;821;475", "wc_reply_reviewers": "45;159;24;651;37", "wc_reply_authors": "691;69;12;1078;10", "reply_reviewers": "1;1;1;2;1", "reply_authors": "3;2;2;3;2", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 79.2, 27.592752671670873 ], "wc_strengths_avg": [ 89.8, 45.871123814443436 ], "wc_weaknesses_avg": [ 302.6, 189.5432404492442 ], "wc_questions_avg": [ 151.4, 109.35190899110998 ], "wc_limitations_avg": [ 37.2, 31.173065296823154 ], "wc_review_avg": [ 660.2, 243.6615685741188 ], "wc_reply_reviewers_avg": [ 183.2, 238.8475664519109 ], "wc_reply_authors_avg": [ 372.0, 436.4974226728034 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.790569415042095, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4885064594910141567&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "umontreal.ca;mila.quebec;mila.quebec;samsung.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Montreal;Montreal Institute of Learning Algorithms;Quebec AI Institute;Samsung", "aff_unique_dep": ";Learning Algorithms;AI Institute;SAIT AI Lab", "aff_unique_url": "https://wwwumontreal.ca;https://mila.quebec;https://mila.quebec;https://www.samsung.com", "aff_unique_abbr": "UM;MILA;Mila;Samsung", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Dynamo-Depth: Fixing Unsupervised Depth Estimation for Dynamical Scenes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71677", "id": "R6qMmdl4qP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ac5c594dedf66affb098c39a3bcfdb3d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=R6qMmdl4qP", "openreview": "https://openreview.net/forum?id=R6qMmdl4qP", "poster": "/media/PosterPDFs/NeurIPS%202023/71677.png?t=1701279404.5167465", "slides": "https://nips.cc/virtual/2023/poster/71677", "video": "https://nips.cc/virtual/2023/poster/71677", "author_site": "Yihong Sun, Bharath Hariharan", "tldr": "", "abstract": "Unsupervised monocular depth estimation techniques have demonstrated encouraging results but typically assume that the scene is static. These techniques suffer when trained on dynamical scenes, where apparent object motion can equally be explained by hypothesizing the object's independent motion, or by altering its depth. This ambiguity causes depth estimators to predict erroneous depth for moving objects. To resolve this issue, we introduce Dynamo-Depth, an unifying approach that disambiguates dynamical motion by jointly learning monocular depth, 3D independent flow field, and motion segmentation from unlabeled monocular videos. Specifically, we offer our key insight that a good initial estimation of motion segmentation is sufficient for jointly learning depth and independent motion despite the fundamental underlying ambiguity. Our proposed method achieves state-of-the-art performance on monocular depth estimation on Waymo Open and nuScenes Dataset with significant improvement in the depth of moving objects. Code and additional results are available at https://dynamo-depth.github.io.", "keywords": "monocular;depth estimation;dynamical scenes;motion segmentation;self-supervised", "primary_area": "", "supplementary_material": "", "author": "Yihong Sun;Bharath Hariharan", "authorids": "~Yihong_Sun1;~Bharath_Hariharan3", "gender": "M;M", "homepage": "https://yihongsun.github.io;http://home.bharathh.info", "dblp": "266/1357;05/8412", "google_scholar": "JD2rFJEAAAAJ;TpglobcAAAAJ", "orcid": "0000-0003-4533-3055;", "linkedin": "yihongsun/;", "or_profile": "~Yihong_Sun1;~Bharath_Hariharan2", "aff": "Cornell University;Cornell University", "aff_domain": "cornell.edu;cornell.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsun2023dynamodepth,\ntitle={Dynamo-Depth: Fixing Unsupervised Depth Estimation for Dynamical Scenes},\nauthor={Yihong Sun and Bharath Hariharan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=R6qMmdl4qP}\n}", "github": "", "project": "", "reviewers": "mnoy;SBjP;9riS;9pRa;EvaZ", "pdf_size": 11782684, "rating": "3;4;6;7;7", "confidence": "5;5;5;4;5", "soundness": "1;3;4;4;4", "novelty": "1;2;3;3;4", "presentation": "2;2;3;3;4", "wc_summary": "78;112;122;130;61", "wc_strengths": "27;60;67;154;71", "wc_weaknesses": "507;125;40;195;85", "wc_questions": "9;150;77;77;1", "wc_limitations": "36;6;5;13;1", "wc_review": "657;453;311;569;219", "wc_reply_reviewers": "440;36;54;0;70", "wc_reply_authors": "403;242;0;0;24", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;1;1;2", "rating_avg": [ 5.4, 1.624807680927192 ], "confidence_avg": [ 4.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 1.16619037896906 ], "novelty_avg": [ 2.6, 1.019803902718557 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 100.6, 26.575176386996944 ], "wc_strengths_avg": [ 75.8, 42.06376112522512 ], "wc_weaknesses_avg": [ 190.4, 166.2908295727699 ], "wc_questions_avg": [ 62.8, 54.26011426453136 ], "wc_limitations_avg": [ 12.2, 12.512393855693642 ], "wc_review_avg": [ 441.8, 160.8507382637705 ], "wc_reply_reviewers_avg": [ 120.0, 161.6861156686003 ], "wc_reply_authors_avg": [ 133.8, 162.5034153487243 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4923659639173309, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3572164032177325389&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cornell.edu;cornell.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "The Utility of \u201cEven if\u201d Semifactual Explanation to Optimise Positive Outcomes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71676", "id": "R6wXP7txer", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a5e146ca55a2b18be41942cfa677123d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=R6wXP7txer", "openreview": "https://openreview.net/forum?id=R6wXP7txer", "poster": "/media/PosterPDFs/NeurIPS%202023/71676.png?t=1702267696.6948225", "slides": "https://nips.cc/virtual/2023/poster/71676", "video": "https://nips.cc/virtual/2023/poster/71676", "author_site": "Eoin Kenny, Weipeng Huang", "tldr": "", "abstract": "When users receive either a positive or negative outcome from an automated system, Explainable AI (XAI) has almost exclusively focused on how to mutate negative outcomes into positive ones by crossing a decision boundary using counterfactuals (e.g., *\"If you earn 2k more, we will accept your loan application\"*). Here, we instead focus on positive outcomes, and take the novel step of using XAI to optimise them (e.g., *\"Even if you wish to half your down-payment, we will still accept your loan application\"*). Explanations such as these that employ \"even if...\" reasoning, and do not cross a decision boundary, are known as semifactuals. To instantiate semifactuals in this context, we introduce the concept of *Gain* (i.e., how much a user stands to benefit from the explanation), and consider the first causal formalisation of semifactuals. Tests on benchmark datasets show our algorithms are better at maximising gain compared to prior work, and that causality is important in the process. Most importantly however, a user study supports our main hypothesis by showing people find semifactual explanations more useful than counterfactuals when they receive the positive outcome of a loan acceptance.", "keywords": "Semifactual Explanation;Counterfactual Explanation;Explainable AI;Recourse;User Study", "primary_area": "", "supplementary_material": "/attachment/ab495fd9c777084e18c1dcdaea21d75eccbd0f8c.zip", "author": "Eoin M. Kenny;Weipeng Fuzzy Huang", "authorids": "~Eoin_M._Kenny1;~Weipeng_Fuzzy_Huang1", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Eoin_M._Kenny1;~Weipeng_Fuzzy_Huang1", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@inproceedings{\nkenny2023the,\ntitle={The Utility of {\\textquotedblleft}Even if{\\textquotedblright} Semifactual Explanation to Optimise Positive Outcomes},\nauthor={Eoin M. Kenny and Weipeng Fuzzy Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=R6wXP7txer}\n}", "github": "", "project": "", "reviewers": "4usU;LWa1;vFfC;6hWY", "pdf_size": 1790989, "rating": "3;6;7;7", "confidence": "3;2;3;4", "soundness": "2;3;3;3", "novelty": "3;3;2;3", "presentation": "2;1;3;3", "wc_summary": "94;33;98;80", "wc_strengths": "64;18;150;87", "wc_weaknesses": "368;31;99;41", "wc_questions": "28;311;2;139", "wc_limitations": "43;28;1;7", "wc_review": "597;421;350;354", "wc_reply_reviewers": "321;50;123;39", "wc_reply_authors": "1186;91;2468;0", "reply_reviewers": "1;2;1;1", "reply_authors": "3;2;5;1", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 76.25, 25.849323008543184 ], "wc_strengths_avg": [ 79.75, 47.56245893559331 ], "wc_weaknesses_avg": [ 134.75, 137.1465912810085 ], "wc_questions_avg": [ 120.0, 121.68607151190312 ], "wc_limitations_avg": [ 19.75, 16.753730927766508 ], "wc_review_avg": [ 430.5, 100.18108603923197 ], "wc_reply_reviewers_avg": [ 133.25, 113.10255302158303 ], "wc_reply_authors_avg": [ 936.25, 999.9555927639987 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.2156655464068768, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8420066118276810739&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 5, "email": ";", "author_num": 2 }, { "id": "R7lDPUgpaA", "title": "Input margins can predict generalization too", "track": "main", "status": "Reject", "tldr": "", "abstract": "Understanding generalization in deep neural networks is an active area of research. A promising avenue of exploration has been that of margin measurements: the shortest distance to the decision boundary for a given sample or its representation internal to the network. While margins have been shown to be correlated with the generalization ability of a model when measured at its hidden representations (hidden margins), no such link between large margins and generalization has been established for input margins. We show that while input margins are not generally predictive of generalization, they can be if the search space is appropriately constrained.\nWe develop such a measure based on input margins, which we refer to as 'constrained margins'. The predictive power of this new measure is demonstrated on the 'Predicting Generalization in Deep Learning' (PGDL) dataset and contrasted with hidden representation margins. We find that constrained margins achieve highly competitive scores and outperform other margin measurements in general.", "keywords": "Generalization;margin;decision boundary;predicting generalization", "primary_area": "", "supplementary_material": "/attachment/7bfbaf847d2df2628b016929e7c75881ee0ed779.zip", "author": "Coenraad Mouton;Marthinus Wilhelmus Theunissen;Marelie Hattingh Davel", "authorids": "~Coenraad_Mouton1;~Marthinus_Wilhelmus_Theunissen1;~Marelie_Hattingh_Davel1", "gender": "M;M;F", "homepage": "https://coenraadmouton.com;;http://engineering.nwu.ac.za/must", "dblp": "288/0113;256/9587;79/7158.html", "google_scholar": "CBny1CcAAAAJ;p3bOWQEAAAAJ;Xz-fi1cAAAAJ", "orcid": "0000-0001-8610-2478;0000-0002-7456-7769;0000-0003-3103-5858", "linkedin": "coenraad-mouton-930953105/;tian-theunissen-90640b24a/;marelie-hattingh-davel-5321811/", "or_profile": "~Coenraad_Mouton1;~Marthinus_Wilhelmus_Theunissen1;~Marelie_Hattingh_Davel1", "aff": "North-West University;North-West University;North-West University", "aff_domain": "nwu.ac.za;nwu.ac.za;nwu.ac.za", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@misc{\nmouton2023input,\ntitle={Input margins can predict generalization too},\nauthor={Coenraad Mouton and Marthinus Wilhelmus Theunissen and Marelie Hattingh Davel},\nyear={2023},\nurl={https://openreview.net/forum?id=R7lDPUgpaA}\n}", "github": "", "project": "", "reviewers": "Uvn7;588w;hMFD;oYf3", "site": "https://openreview.net/forum?id=R7lDPUgpaA", "pdf_size": 1936864, "rating": "4;4;5;7", "confidence": "4;4;4;4", "soundness": "3;2;2;3", "novelty": "1;2;2;3", "presentation": "3;2;3;3", "wc_summary": "53;176;63;16", "wc_strengths": "44;30;79;61", "wc_weaknesses": "173;147;99;183", "wc_questions": "84;8;2;192", "wc_limitations": "10;1;2;6", "wc_review": "364;362;245;458", "wc_reply_reviewers": "100;33;0;723", "wc_reply_authors": "0;0;0;432", "reply_reviewers": "1;1;0;3", "reply_authors": "1;1;1;4", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.0, 59.778758769315374 ], "wc_strengths_avg": [ 53.5, 18.364367672206956 ], "wc_weaknesses_avg": [ 150.5, 32.507691397575435 ], "wc_questions_avg": [ 71.5, 76.71212420471747 ], "wc_limitations_avg": [ 4.75, 3.5619517121937516 ], "wc_review_avg": [ 357.25, 75.52938170010397 ], "wc_reply_reviewers_avg": [ 214.0, 296.0717818367701 ], "wc_reply_authors_avg": [ 108.0, 187.06148721743875 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5495661969045272650&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "North-West University", "aff_unique_dep": "", "aff_unique_url": "https://www.nwu.ac.za", "aff_unique_abbr": "NWU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Africa" }, { "title": "Projection-Free Methods for Stochastic Simple Bilevel Optimization with Convex Lower-level Problem", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71675", "id": "R8GF0EsNsI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/136729ae4b0fee25a0d28077442506da-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=R8GF0EsNsI", "openreview": "https://openreview.net/forum?id=R8GF0EsNsI", "poster": "/media/PosterPDFs/NeurIPS%202023/71675.png?t=1702272858.882229", "slides": "https://nips.cc/virtual/2023/poster/71675", "video": "https://nips.cc/virtual/2023/poster/71675", "author_site": "Jincheng Cao, Ruichen Jiang, Nazanin Abolfazli, Erfan Yazdandoost Hamedani, Aryan Mokhtari", "tldr": "", "abstract": "In this paper, we study a class of stochastic bilevel optimization problems, also known as stochastic simple bilevel optimization, where we minimize a smooth stochastic objective function over the optimal solution set of another stochastic convex optimization problem. We introduce novel stochastic bilevel optimization methods that locally approximate the solution set of the lower-level problem via a stochastic cutting plane, and then run a conditional gradient update with variance reduction techniques to control the error induced by using stochastic gradients. For the case that the upper-level function is convex, our method requires $\\mathcal{O}(\\max\\\\{1/\\epsilon_f^{2},1/\\epsilon_g^{2}\\\\}) $ stochastic oracle queries to obtain a solution that is $\\epsilon_f$-optimal for the upper-level and $\\epsilon_g$-optimal for the lower-level. This guarantee improves the previous best-known complexity of $\\mathcal{O}(\\max\\\\{1/\\epsilon_f^{4},1/\\epsilon_g^{4}\\\\})$. Moreover, for the case that the upper-level function is non-convex, our method requires at most $\\mathcal{O}(\\max\\\\{1/\\epsilon_f^{3},1/\\epsilon_g^{3}\\\\}) $ stochastic oracle queries to find an $(\\epsilon_f, \\epsilon_g)$-stationary point. In the finite-sum setting, we show that the number of stochastic oracle calls required by our method are $\\mathcal{O}(\\sqrt{n}/\\epsilon)$ and $\\mathcal{O}(\\sqrt{n}/\\epsilon^{2})$ for the convex and non-convex settings, respectively, where $\\epsilon=\\min \\\\{\\epsilon_f,\\epsilon_g\\\\}$.", "keywords": "Bilevel optimization;stochastic optimization", "primary_area": "", "supplementary_material": "/attachment/84d15e5817f80238fd32d8da4259af101cb0eb47.zip", "author": "Jincheng Cao;Ruichen Jiang;Nazanin Abolfazli;Erfan Yazdandoost Hamedani;Aryan Mokhtari", "authorids": "~Jincheng_Cao1;~Ruichen_Jiang1;~Nazanin_Abolfazli1;~Erfan_Yazdandoost_Hamedani1;~Aryan_Mokhtari3", "gender": "M;;F;M;M", "homepage": "https://www.linkedin.com/in/jc-cao/;https://ruichen-jiang.github.io/;;https://profiles.arizona.edu/person/erfany;https://sites.utexas.edu/mokhtari/", "dblp": ";271/7916;322/4143;191/6717;140/7407", "google_scholar": ";BGFt1UMAAAAJ;-N0detkAAAAJ;imtUGbQAAAAJ;glcep6EAAAAJ", "orcid": ";;;0000-0002-3229-3499;", "linkedin": ";;;;", "or_profile": "~Jincheng_Cao1;~Ruichen_Jiang1;~Nazanin_Abolfazli1;~Erfan_Yazdandoost_Hamedani1;~Aryan_Mokhtari3", "aff": "University of Texas at Austin;University of Texas at Austin;University of Arizona;University of Arizona;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;arizona.edu;arizona.edu;utexas.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ncao2023projectionfree,\ntitle={Projection-Free Methods for Stochastic Simple Bilevel Optimization with Convex Lower-level Problem},\nauthor={Jincheng Cao and Ruichen Jiang and Nazanin Abolfazli and Erfan Yazdandoost Hamedani and Aryan Mokhtari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=R8GF0EsNsI}\n}", "github": "", "project": "", "reviewers": "Xgt8;oeu1;iBKW;FAuV", "pdf_size": 4156594, "rating": "4;6;6;6", "confidence": "4;4;4;2", "soundness": "2;4;3;3", "novelty": "2;3;3;3", "presentation": "2;4;3;2", "wc_summary": "148;53;76;40", "wc_strengths": "89;35;38;73", "wc_weaknesses": "141;135;34;229", "wc_questions": "427;281;16;5", "wc_limitations": "1;21;1;13", "wc_review": "806;525;165;360", "wc_reply_reviewers": "6;11;13;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 79.25, 41.73352968537409 ], "wc_strengths_avg": [ 58.75, 22.982330169066845 ], "wc_weaknesses_avg": [ 134.75, 69.05206369110195 ], "wc_questions_avg": [ 182.25, 179.3814023247672 ], "wc_limitations_avg": [ 9.0, 8.48528137423857 ], "wc_review_avg": [ 464.0, 235.0010638273793 ], "wc_reply_reviewers_avg": [ 11.5, 3.640054944640259 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13472106254342239314&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "utexas.edu;utexas.edu;arizona.edu;arizona.edu;utexas.edu", "author_num": 5, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "University of Texas at Austin;University of Arizona", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.arizona.edu", "aff_unique_abbr": "UT Austin;UA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "PROTES: Probabilistic Optimization with Tensor Sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71674", "id": "R9R7YDOar1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/028957869e560af14243ac37663a471e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=R9R7YDOar1", "openreview": "https://openreview.net/forum?id=R9R7YDOar1", "poster": "/media/PosterPDFs/NeurIPS%202023/71674.png?t=1702229537.8056872", "slides": "https://nips.cc/virtual/2023/poster/71674", "video": "https://nips.cc/virtual/2023/poster/71674", "author_site": "Anastasiia Batsheva, Andrei Chertkov, Gleb Ryzhakov, Ivan Oseledets", "tldr": "", "abstract": "We developed a new method PROTES for black-box optimization, which is based on the probabilistic sampling from a probability density function given in the low-parametric tensor train format. We tested it on complex multidimensional arrays and discretized multivariable functions taken, among others, from real-world applications, including unconstrained binary optimization and optimal control problems, for which the possible number of elements is up to $2^{1000}$. In numerical experiments, both on analytic model functions and on complex problems, PROTES outperforms popular discrete optimization methods (Particle Swarm Optimization, Covariance Matrix Adaptation, Differential Evolution, and others).", "keywords": "Tensor Train;Black Box Optimization;Sampling;Optimal Control", "primary_area": "", "supplementary_material": "/attachment/d8c9f81a23b35da1f10811e481810e6a25e9da3e.zip", "author": "Anastasia Batsheva;Andrei Chertkov;Gleb Ryzhakov;Ivan Oseledets", "authorids": "~Anastasia_Batsheva1;~Andrei_Chertkov1;~Gleb_Ryzhakov1;~Ivan_Oseledets1", "gender": "F;M;M;M", "homepage": "https://github.com/anabatsh;;http://oseledets.github.io;", "dblp": "338/9583;285/5751;56/7175;222/9593", "google_scholar": ";Hf_pNoQAAAAJ;https://scholar.google.ru/citations?user=5kMqBQEAAAAJ;ZqmOtcwAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Anastasia_Batsheva1;~Andrei_Chertkov1;~Ivan_Oseledets1;~Gleb_Vladimirovich_Ryzhakov1", "aff": "Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology;Institute of Numerical Mathematics;Skolkovo Institute of Science and Technology", "aff_domain": "skoltech.ru;skolkovotech.ru;inm.ras.ru;skoltech.ru", "position": "Intern;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nbatsheva2023protes,\ntitle={{PROTES}: Probabilistic Optimization with Tensor Sampling},\nauthor={Anastasia Batsheva and Andrei Chertkov and Gleb Ryzhakov and Ivan Oseledets},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=R9R7YDOar1}\n}", "github": "", "project": "", "reviewers": "7NgC;7wCj;SmA2;U74k;e361", "pdf_size": 1298693, "rating": "4;5;6;6;7", "confidence": "4;4;2;3;3", "soundness": "2;2;2;3;3", "novelty": "2;2;2;3;3", "presentation": "3;2;3;3;3", "wc_summary": "119;130;39;22;77", "wc_strengths": "38;10;76;32;41", "wc_weaknesses": "162;171;56;71;178", "wc_questions": "54;172;16;1;68", "wc_limitations": "35;2;4;21;3", "wc_review": "408;485;191;147;367", "wc_reply_reviewers": "513;33;60;113;21", "wc_reply_authors": "978;30;89;204;39", "reply_reviewers": "1;1;2;1;1", "reply_authors": "3;2;3;2;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 77.4, 42.52340531989413 ], "wc_strengths_avg": [ 39.4, 21.275337835155522 ], "wc_weaknesses_avg": [ 127.6, 52.79621198533092 ], "wc_questions_avg": [ 62.2, 60.06130201718907 ], "wc_limitations_avg": [ 13.0, 13.038404810405298 ], "wc_review_avg": [ 319.6, 129.4196275686188 ], "wc_reply_reviewers_avg": [ 148.0, 185.2285075251647 ], "wc_reply_authors_avg": [ 268.0, 360.36703511836373 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.681385143869247, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12936041336474586078&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "skoltech.ru;skolkovotech.ru;inm.ras.ru;skoltech.ru", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Skolkovo Institute of Science and Technology;Institute of Numerical Mathematics", "aff_unique_dep": ";", "aff_unique_url": "https://www.skoltech.ru;", "aff_unique_abbr": "Skoltech;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Russian Federation;" }, { "title": "Segment Anything in High Quality", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71673", "id": "RA7ND878XP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5f828e38160f31935cfe9f67503ad17c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RA7ND878XP", "openreview": "https://openreview.net/forum?id=RA7ND878XP", "poster": "/media/PosterPDFs/NeurIPS%202023/71673.png?t=1699439034.8406653", "slides": "https://nips.cc/virtual/2023/poster/71673", "video": "https://nips.cc/virtual/2023/poster/71673", "author_site": "Lei Ke, Mingqiao Ye, Martin Danelljan, Yifan liu, Yu-Wing Tai, Chi-Keung Tang, Fisher Yu", "tldr": "", "abstract": "The recent Segment Anything Model (SAM) represents a big leap in scaling up segmentation models, allowing for powerful zero-shot capabilities and flexible prompting. Despite being trained with 1.1 billion masks, SAM's mask prediction quality falls short in many cases, particularly when dealing with objects that have intricate structures. We propose HQ-SAM, equipping SAM with the ability to accurately segment any object, while maintaining SAM's original promptable design, efficiency, and zero-shot generalizability. Our careful design reuses and preserves the pre-trained model weights of SAM, while only introducing minimal additional parameters and computation. We design a learnable High-Quality Output Token, which is injected into SAM's mask decoder and is responsible for predicting the high-quality mask. Instead of only applying it on mask-decoder features, we first fuse them with early and final ViT features for improved mask details. To train our introduced learnable parameters, we compose a dataset of 44K fine-grained masks from several sources. HQ-SAM is only trained on the introduced detaset of 44k masks, which takes only 4 hours on 8 GPUs. We show the efficacy of HQ-SAM in a suite of 10 diverse segmentation datasets across different downstream tasks, where 8 out of them are evaluated in a zero-shot transfer protocol. Our code and pretrained models are at https://github.com/SysCV/SAM-HQ.", "keywords": "segment anything;zero-shot segmentation;high-quality segmentation", "primary_area": "", "supplementary_material": "/attachment/fea477613f1b684fa49196c506bcbe22ac1c0db4.pdf", "author": "Lei Ke;Mingqiao Ye;Martin Danelljan;Yifan liu;Yu-Wing Tai;Chi-Keung Tang;Fisher Yu", "authorids": "~Lei_Ke1;~Mingqiao_Ye1;~Martin_Danelljan4;~Yifan_liu3;~Yu-Wing_Tai2;~Chi-Keung_Tang1;~Fisher_Yu2", "gender": "M;M;M;F;M;M;Not Specified", "homepage": "http://www.kelei.site;https://ymq2017.github.io/;https://martin-danelljan.github.io/;https://irfanicmll.github.io/;https://yuwingtai.github.io/;https://www.yf.io/;http://www.cse.ust.hk/~cktang/", "dblp": "26/5225;285/9253;151/8848;23/4955-1;40/566;117/6314;34/4366", "google_scholar": "WseeNrUAAAAJ;3M9N6S0AAAAJ;NCSSpMkAAAAJ;ksQ4JnQAAAAJ;nFhLmFkAAAAJ;-XCiamcAAAAJ;https://scholar.google.com.tw/citations?user=EWfpM74AAAAJ", "orcid": ";;;;0000-0002-3148-0380;;", "linkedin": ";mingqiao-ye-b034411a2/;;;;;", "or_profile": "~Lei_Ke1;~Mingqiao_Ye1;~Martin_Danelljan4;~Yifan_liu3;~Yu-Wing_Tai2;~Fisher_Yu2;~ChiKeung_Tang1", "aff": "Hong Kong University of Science and Technology;ETHZ - ETH Zurich;ETH Zurich;University of Adelaide;Kuaishou Technology;Swiss Federal Institute of Technology;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;ethz.ch;vision.ee.ethz.ch;adelaide.edu.au;kuaishou.com;ethz.ch;ust.hk", "position": "PhD student;MS student;Principal Researcher;Assistant Professor;Senior Research Director;Assistant Professor;Professor", "bibtex": "@inproceedings{\nke2023segment,\ntitle={Segment Anything in High Quality},\nauthor={Lei Ke and Mingqiao Ye and Martin Danelljan and Yifan liu and Yu-Wing Tai and Chi-Keung Tang and Fisher Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RA7ND878XP}\n}", "github": "", "project": "", "reviewers": "eEa5;E7oL;2HQS;xJfb", "pdf_size": 13250935, "rating": "5;5;6;7", "confidence": "4;5;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "127;44;86;53", "wc_strengths": "42;35;58;80", "wc_weaknesses": "125;58;67;156", "wc_questions": "38;1;4;27", "wc_limitations": "21;1;1;41", "wc_review": "353;139;216;357", "wc_reply_reviewers": "11;0;20;22", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 77.5, 32.57683225852385 ], "wc_strengths_avg": [ 53.75, 17.297037318569906 ], "wc_weaknesses_avg": [ 101.5, 40.635575546557725 ], "wc_questions_avg": [ 17.5, 15.532224567009067 ], "wc_limitations_avg": [ 16.0, 16.583123951777 ], "wc_review_avg": [ 266.25, 92.8422721609074 ], "wc_reply_reviewers_avg": [ 13.25, 8.699856320652657 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 387, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1682375969829449390&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ust.hk;ethz.ch;vision.ee.ethz.ch;adelaide.edu.au;kuaishou.com;ethz.ch;ust.hk", "author_num": 7, "aff_unique_index": "0;1;1;2;3;4;0", "aff_unique_norm": "Hong Kong University of Science and Technology;ETH Zurich;University of Adelaide;Kuaishou Technology;Swiss Federal Institute of Technology", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ust.hk;https://www.ethz.ch;https://www.adelaide.edu.au;https://www.kuaishou.com;https://www.ethz.ch", "aff_unique_abbr": "HKUST;ETHZ;Adelaide;Kuaishou;ETH Zurich", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;1;2;0;1;0", "aff_country_unique": "China;Switzerland;Australia" }, { "title": "Beyond Black-Box Advice: Learning-Augmented Algorithms for MDPs with Q-Value Predictions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71672", "id": "RACcp8Zbr9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8e806d3c56ed5f1dab85d601e13cbe38-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RACcp8Zbr9", "openreview": "https://openreview.net/forum?id=RACcp8Zbr9", "poster": "/media/PosterPDFs/NeurIPS%202023/71672.png?t=1702316665.0804873", "slides": "https://nips.cc/virtual/2023/poster/71672", "video": "https://nips.cc/virtual/2023/poster/71672", "author_site": "Tongxin Li, Yiheng Lin, Shaolei Ren, Adam Wierman", "tldr": "", "abstract": "We study the tradeoff between consistency and robustness in the context of a single-trajectory time-varying Markov Decision Process (MDP) with untrusted machine-learned advice. Our work departs from the typical approach of treating advice as coming from black-box sources by instead considering a setting where additional information about how the advice is generated is available. We prove a first-of-its-kind consistency and robustness tradeoff given Q-value advice under a general MDP model that includes both continuous and discrete state/action spaces. Our results highlight that utilizing Q-value advice enables dynamic pursuit of the better of machine-learned advice and a robust baseline, thus result in near-optimal performance guarantees, which provably improves what can be obtained solely with black-box advice.", "keywords": "Time-varying MDP;Learning-augmented online algorithm;consistency and robustness tradeoff", "primary_area": "", "supplementary_material": "/attachment/54d49154be4ee427a767deac005e88b3d267ab3b.pdf", "author": "Tongxin Li;Yiheng Lin;Shaolei Ren;Adam Wierman", "authorids": "~Tongxin_Li1;~Yiheng_Lin1;~Shaolei_Ren1;~Adam_Wierman1", "gender": "M;M;;M", "homepage": "https://tongxin.me/;;;https://adamwierman.com/", "dblp": "140/7353;;;56/4447", "google_scholar": "qyNc3CkAAAAJ;S1wSEggAAAAJ;;4OvOdSgAAAAJ", "orcid": ";;;0000-0002-5923-0199", "linkedin": ";;;adam-wierman-a529474/", "or_profile": "~Tongxin_Li1;~Yiheng_Lin1;~Shaolei_Ren1;~Adam_Wierman1", "aff": "The Chinese University of Hong Kong, Shenzhen;California Institute of Technology;;California Institute of Technology", "aff_domain": "cuhk.edu.cn;caltech.edu;;caltech.edu", "position": "Assistant Professor;PhD student;;Professor", "bibtex": "@inproceedings{\nli2023beyond,\ntitle={Beyond Black-Box Advice: Learning-Augmented Algorithms for {MDP}s with Q-Value Predictions},\nauthor={Tongxin Li and Yiheng Lin and Shaolei Ren and Adam Wierman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RACcp8Zbr9}\n}", "github": "", "project": "", "reviewers": "9C69;SGod;qteD;BJJd;BSCA;c87H", "pdf_size": 967470, "rating": "5;6;7;7;7;7", "confidence": "1;2;3;3;3;2", "soundness": "3;3;4;4;3;3", "novelty": "2;2;3;4;3;3", "presentation": "3;2;4;3;3;3", "wc_summary": "99;56;120;154;61;160", "wc_strengths": "38;52;81;122;243;19", "wc_weaknesses": "48;37;313;83;382;44", "wc_questions": "25;303;31;120;141;13", "wc_limitations": "11;2;11;96;69;24", "wc_review": "221;450;556;575;896;260", "wc_reply_reviewers": "51;39;58;33;245;5", "wc_reply_authors": "38;50;51;0;245;0", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "2;2;2;1;2;1", "rating_avg": [ 6.5, 0.7637626158259734 ], "confidence_avg": [ 2.3333333333333335, 0.7453559924999298 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.8333333333333335, 0.6871842709362768 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 108.33333333333333, 40.737642979872504 ], "wc_strengths_avg": [ 92.5, 74.90605226993789 ], "wc_weaknesses_avg": [ 151.16666666666666, 140.99812843517543 ], "wc_questions_avg": [ 105.5, 100.88235722860564 ], "wc_limitations_avg": [ 35.5, 34.73110997362451 ], "wc_review_avg": [ 493.0, 224.80510077249878 ], "wc_reply_reviewers_avg": [ 71.83333333333333, 79.2305005804092 ], "wc_reply_authors_avg": [ 64.0, 83.66002629691195 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8783100656536796, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3536264106544546593&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "cuhk.edu.cn;caltech.edu;;caltech.edu", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Chinese University of Hong Kong;California Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.caltech.edu", "aff_unique_abbr": "CUHK;Caltech", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Shenzhen;Pasadena", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "How hard are computer vision datasets? Calibrating dataset difficulty to viewing time", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73596", "id": "RADrFxYqIH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2432e9646556f3a98dc78c1f4a10481b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=RADrFxYqIH", "openreview": "https://openreview.net/forum?id=RADrFxYqIH", "poster": "/media/PosterPDFs/NeurIPS%202023/73596.png?t=1702350952.5050826", "slides": "https://nips.cc/virtual/2023/poster/73596", "video": "https://nips.cc/virtual/2023/poster/73596", "author_site": "David Mayo, Jesse Cummings, Xinyu Lin, Dan Gutfreund, Boris Katz, Andrei Barbu", "tldr": "", "abstract": "Humans outperform object recognizers despite the fact that models perform well on current datasets, including those explicitly designed to challenge machines with debiased images or distribution shift. This problem persists, in part, because we have no guidance on the absolute difficulty of an image or dataset making it hard to objectively assess progress toward human-level performance, to cover the range of human abilities, and to increase the challenge posed by a dataset. We develop a dataset difficulty metric MVT, Minimum Viewing Time, that addresses these three problems. Subjects view an image that flashes on screen and then classify the object in the image. Images that require brief flashes to recognize are easy, those which require seconds of viewing are hard. We compute the ImageNet and ObjectNet image difficulty distribution, which we find significantly undersamples hard images. Nearly 90% of current benchmark performance is derived from images that are easy for humans. Rather than hoping that we will make harder datasets, we can for the first time objectively guide dataset difficulty during development. We can also subset recognition performance as a function of difficulty: model performance drops precipitously while human performance remains stable. Difficulty provides a new lens through which to view model performance, one which uncovers new scaling laws: vision-language models stand out as being the most robust and human-like while all other techniques scale poorly. We release tools to automatically compute MVT, along with image sets which are tagged by difficulty. Objective image difficulty has practical applications \u2013 one can measure how hard a test set is before deploying a real-world system \u2013 and scientific applications such as discovering the neural correlates of image difficulty and enabling new object recognition techniques that eliminate the benchmark-vs- real-world performance gap.", "keywords": "Deep learning;Representation learning;Cognitive science;Computer vision datasets", "primary_area": "", "supplementary_material": "/attachment/c352a3336c4d6653e21c3ecf7c48d6c73d552325.zip", "author": "David Mayo;Jesse Cummings;Xinyu Lin;Dan Gutfreund;Boris Katz;Andrei Barbu", "authorids": "~David_Mayo1;~Jesse_Cummings1;~Xinyu_Lin1;~Dan_Gutfreund1;~Boris_Katz1;~Andrei_Barbu3", "gender": ";M;M;;M;M", "homepage": "http://david-mayo.com;;https://www.linkedin.com/in/xinyu-l-10035b123/;https://researcher.watson.ibm.com/researcher/view.php?person=us-dgutfre;http://people.csail.mit.edu/boris/boris.html;https://0xab.com", "dblp": "190/8836;;;g/DanGutfreund;k/BorisKatz;58/8365", "google_scholar": "QjVd0f8AAAAJ;FTAdd7EAAAAJ;;fRJbyD8AAAAJ;FdNuUb8AAAAJ;t1rjgHgAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;andrei-barbu-1166131", "or_profile": "~David_Mayo1;~Jesse_Cummings1;~Xinyu_Lin1;~Dan_Gutfreund1;~Boris_Katz1;~Andrei_Barbu3", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;;MIT-IBM Watson AI Lab;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;;mit.edu;mit.edu;mit.edu", "position": "PhD student;MS student;;Principal Researcher;Principal Research Scientist;Researcher", "bibtex": "@inproceedings{\nmayo2023how,\ntitle={How hard are computer vision datasets? Calibrating dataset difficulty to viewing time},\nauthor={David Mayo and Jesse Cummings and Xinyu Lin and Dan Gutfreund and Boris Katz and Andrei Barbu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=RADrFxYqIH}\n}", "github": "", "project": "", "reviewers": "ZsVb;HVkw;NiF3;L4sU;cj8A", "pdf_size": 9730546, "rating": "5;5;7;7;10", "confidence": "4;2;3;3;5", "wc_summary_and_contributions": "55;93;111;97;219", "wc_strengths": "76;84;59;44;328", "wc_improvement": "232;261;175;42;85", "wc_limitations": "6;66;49;35;10", "wc_correctness": "33;63;1;9;38", "wc_clarity": "6;220;7;7;11", "wc_relation_to_prior_work": "30;23;1;6;34", "wc_documentation": "17;32;1;1;40", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "456;843;405;242;766", "wc_reply_reviewers": "203;0;29;100;43", "wc_reply_authors": "2164;1537;662;402;393", "reply_reviewers": "2;0;1;2;1", "reply_authors": "5;2;1;1;1", "rating_avg": [ 6.8, 1.8330302779823362 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "wc_summary_and_contributions_avg": [ 115.0, 55.20869496736904 ], "wc_strengths_avg": [ 118.2, 105.80812823219208 ], "wc_improvement_avg": [ 159.0, 83.84986583173523 ], "wc_limitations_avg": [ 33.2, 22.833309002420126 ], "wc_correctness_avg": [ 28.8, 22.076231562474607 ], "wc_clarity_avg": [ 50.2, 84.91737160322381 ], "wc_relation_to_prior_work_avg": [ 18.8, 13.075167302944921 ], "wc_documentation_avg": [ 18.2, 15.866946776238962 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 542.4, 226.68974392327502 ], "wc_reply_reviewers_avg": [ 75.0, 71.79693586776527 ], "wc_reply_authors_avg": [ 1031.6, 704.0694852072486 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 1.5491933384829668 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6847367880174606, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13521848372395192699&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "mit.edu;mit.edu;;mit.edu;mit.edu;mit.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Neural Combinatorial Optimization with Heavy Decoder: Toward Large Scale Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71671", "id": "RBI4oAbdpm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1c10d0c087c14689628124bbc8fa69f6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RBI4oAbdpm", "openreview": "https://openreview.net/forum?id=RBI4oAbdpm", "poster": "/media/PosterPDFs/NeurIPS%202023/71671.png?t=1699420420.7651088", "slides": "https://nips.cc/virtual/2023/poster/71671", "video": "https://nips.cc/virtual/2023/poster/71671", "author_site": "Fu Luo, Xi Lin, Fei Liu, Qingfu Zhang, Zhenkun Wang", "tldr": "", "abstract": "Neural combinatorial optimization (NCO) is a promising learning-based approach for solving challenging combinatorial optimization problems without specialized algorithm design by experts. However, most constructive NCO methods cannot solve problems with large-scale instance sizes, which significantly diminishes their usefulness for real-world applications. In this work, we propose a novel Light Encoder and Heavy Decoder (LEHD) model with a strong generalization ability to address this critical issue. The LEHD model can learn to dynamically capture the relationships between all available nodes of varying sizes, which is beneficial for model generalization to problems of various scales. Moreover, we develop a data-efficient training scheme and a flexible solution construction mechanism for the proposed LEHD model. By training on small-scale problem instances, the LEHD model can generate nearly optimal solutions for the Travelling Salesman Problem (TSP) and the Capacitated Vehicle Routing Problem (CVRP) with up to 1000 nodes, and also generalizes well to solve real-world TSPLib and CVRPLib problems. These results confirm our proposed LEHD model can significantly improve the state-of-the-art performance for constructive NCO.", "keywords": "Neural Combinatorial Optimization;Generalization;Large scale problem;Heavy decoder", "primary_area": "", "supplementary_material": "/attachment/1c7f173607a9a82678512038c6a2392e85861948.pdf", "author": "Fu Luo;Xi Lin;Fei Liu;Qingfu Zhang;Zhenkun Wang", "authorids": "~Fu_Luo1;~Xi_Lin2;~Fei_Liu14;~Qingfu_Zhang1;~Zhenkun_Wang1", "gender": "M;M;M;M;M", "homepage": ";https://xi-l.github.io/;https://www.cs.cityu.edu.hk/~qzhan7/index.html;https://faculty.sustech.edu.cn/wangzk3/en/;https://feiliu36.github.io/", "dblp": "52/9546;43/489-1;98/1240.html;96/9114;64/1350-44", "google_scholar": "i2TLiM8AAAAJ;QB_MUboAAAAJ;https://scholar.google.co.uk/citations?user=nhL9PHwAAAAJ;https://scholar.google.com.sg/citations?user=r9ezy2gAAAAJ;wS0G_qQAAAAJ", "orcid": "0000-0002-3161-6348;;;0000-0003-1152-6780;0000-0001-6719-0409", "linkedin": ";;;;", "or_profile": "~Fu_Luo1;~Xi_Lin2;~Qingfu_Zhang1;~Zhenkun_Wang1;~Fei_LIU13", "aff": "Southern University of Science and Technology;City University of Hong Kong;City University of Hong Kong;Southern University of Science and Technology;City University of Hong Kong", "aff_domain": "sustech.edu;cityu.edu.hk;cityu.edu.hk;sustech.edu.cn;cityu.edu.hk", "position": "Undergrad student;Postdoc;Full Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nluo2023neural,\ntitle={Neural Combinatorial Optimization with Heavy Decoder: Toward Large Scale Generalization},\nauthor={Fu Luo and Xi Lin and Fei Liu and Qingfu Zhang and Zhenkun Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RBI4oAbdpm}\n}", "github": "", "project": "", "reviewers": "6PbV;6SQz;8Nbk;Hd9p", "pdf_size": 1333539, "rating": "5;5;6;7", "confidence": "4;4;3;4", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "110;90;109;54", "wc_strengths": "34;61;57;70", "wc_weaknesses": "155;69;88;26", "wc_questions": "133;73;40;48", "wc_limitations": "1;1;3;1", "wc_review": "433;294;297;199", "wc_reply_reviewers": "362;0;18;126", "wc_reply_authors": "740;53;20;1096", "reply_reviewers": "3;0;1;2", "reply_authors": "4;2;2;4", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 90.75, 22.664675157610354 ], "wc_strengths_avg": [ 55.5, 13.275918047351754 ], "wc_weaknesses_avg": [ 84.5, 46.489246068311324 ], "wc_questions_avg": [ 73.5, 36.44516428828384 ], "wc_limitations_avg": [ 1.5, 0.8660254037844386 ], "wc_review_avg": [ 305.75, 83.37078325168835 ], "wc_reply_reviewers_avg": [ 126.5, 144.2523829959145 ], "wc_reply_authors_avg": [ 477.25, 458.51792494950513 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 82, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=799809624183148617&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "sustech.edu;cityu.edu.hk;cityu.edu.hk;sustech.edu.cn;cityu.edu.hk", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Southern University of Science and Technology;City University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.sustech.edu.cn;https://www.cityu.edu.hk", "aff_unique_abbr": "SUSTech;CityU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Public Opinion Field Effect Fusion in Representation Learning for Trending Topics Diffusion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71670", "id": "RFE1eI0zNZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/32246544c237164c365c0527b677a79a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RFE1eI0zNZ", "openreview": "https://openreview.net/forum?id=RFE1eI0zNZ", "poster": "/media/PosterPDFs/NeurIPS%202023/71670.png?t=1699105020.0417933", "slides": "https://nips.cc/virtual/2023/poster/71670", "video": "https://nips.cc/virtual/2023/poster/71670", "author_site": "Junliang Li, Yang Yajun, Qinghua Hu, Xin Wang, Hong Gao", "tldr": "", "abstract": "Trending topic diffusion and prediction analysis is an important problem and has been well studied in social networks. Representation learning is an effective way to extract node embeddings, which can help for topic propagation analysis by completing downstream tasks such as link prediction and node classification. In real world, there are often several trending topics or opinion leaders in public opinion space at the same time and they can be regarded as different centers of public opinion. A public opinion field will be formed surrounding every center. These public opinion fields compete for public's attention and it will potentially affect the development of public opinion. However, the existing methods do not consider public opinion field effect for trending topics diffusion. In this paper, we introduce three well-known observations about public opinion field effect in media and communication studies, and propose a novel and effective heterogeneous representation learning framework to incorporate public opinion field effect and social circle influence effect. To the best of our knowledge, our work is the first to consider these effects in representation learning for trending topic diffusion. Extensive experiments on real-world datasets validate the superiority of our model.", "keywords": "public opinion field effect;heterogeneous networks;representation learning;trending topic diffusion", "primary_area": "", "supplementary_material": "", "author": "Junliang Li;Yajun Yang;Qinghua Hu;Xin Wang;Hong Gao", "authorids": "~Junliang_Li1;~Yajun_Yang1;~Qinghua_Hu1;~Xin_Wang39;~Hong_Gao1", "gender": "M;M;M;M;F", "homepage": "https://github.com/ki-ljl;http://cic.tju.edu.cn/info/1080/1907.htm;http://cic.tju.edu.cn/faculty/huqinghua/index.html;http://www.tjudb.cn/dbgroup/Xin_Wang;http://mypage.zjnu.edu.cn/gh/zh_CN/index.htm", "dblp": ";;;10/5630-30;32/1438-1", "google_scholar": "blxxnUwAAAAJ;;TVSNq_wAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0002-8170-7926;0000-0002-0824-2931;0000-0001-7765-8095;0000-0001-9651-0651;0000-0002-2000-6683", "linkedin": ";;;;", "or_profile": "~Junliang_Li1;~Yajun_Yang1;~Qinghua_Hu1;~Xin_Wang39;~Hong_Gao1", "aff": "Tianjin University;Tianjin University;Tianjin University;Tianjin University;Zhejiang Normal University", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;zjnu.edu.cn", "position": "MS student;Associate Professor;Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2023public,\ntitle={Public Opinion Field Effect Fusion in Representation Learning for Trending Topics Diffusion},\nauthor={Junliang Li and Yajun Yang and Qinghua Hu and Xin Wang and Hong Gao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RFE1eI0zNZ}\n}", "github": "", "project": "", "reviewers": "kWLJ;aU4Y;7Tx8;NVSH;1ymS", "pdf_size": 2031577, "rating": "5;5;5;5;6", "confidence": "4;3;3;2;4", "soundness": "2;3;3;2;3", "novelty": "3;3;2;3;3", "presentation": "2;2;4;3;3", "wc_summary": "50;167;48;62;50", "wc_strengths": "22;51;33;94;88", "wc_weaknesses": "368;145;79;172;16", "wc_questions": "3;200;68;15;1", "wc_limitations": "8;159;79;54;1", "wc_review": "451;722;307;397;156", "wc_reply_reviewers": "17;42;0;24;14", "wc_reply_authors": "36;48;0;31;14", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 75.4, 46.0677761564415 ], "wc_strengths_avg": [ 57.6, 28.862432329933664 ], "wc_weaknesses_avg": [ 156.0, 119.03780911962384 ], "wc_questions_avg": [ 57.4, 75.34613460556552 ], "wc_limitations_avg": [ 60.2, 57.241243871879654 ], "wc_review_avg": [ 406.6, 186.68754645128314 ], "wc_reply_reviewers_avg": [ 19.4, 13.734627770711517 ], "wc_reply_authors_avg": [ 25.8, 16.90443728729235 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5345224838248488, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16951752767602819901&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;zjnu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Tianjin University;Zhejiang Normal University", "aff_unique_dep": ";", "aff_unique_url": "http://www.tju.edu.cn;http://www.zjnu.edu.cn", "aff_unique_abbr": "TJU;ZJNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "On-the-Fly Adapting Code Summarization on Trainable Cost-Effective Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71669", "id": "RFgv7cfMUy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b16e6de5fbbdcb2df237aa66b302bc17-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RFgv7cfMUy", "openreview": "https://openreview.net/forum?id=RFgv7cfMUy", "poster": "/media/PosterPDFs/NeurIPS%202023/71669.png?t=1699617572.2146065", "slides": "https://nips.cc/virtual/2023/poster/71669", "video": "https://nips.cc/virtual/2023/poster/71669", "author_site": "Yufan Cai, Yun Lin, Chenyan Liu, Jinglian Wu, Yifan Zhang, Yiming Liu, Yeyun Gong, Jin Song Dong", "tldr": "", "abstract": "Deep learning models are emerging to summarize source code to comment,\n facilitating tasks of code documentation and program comprehension.\n Scaled-up large language models trained on large open corpus have achieved good performance in such tasks.\n However, in practice, the subject code in one certain project can be specific,\n which may not align with the overall training corpus.\n Some code samples from other projects may be contradictory and introduce inconsistencies when the models try to fit all the samples.\n\n In this work, we introduce a novel approach, Adacom, to improve the performance of comment generators by on-the-fly model adaptation.\n This research is motivated by the observation that deep comment generators\n often need to strike a balance as they need to fit all the training samples.\n Specifically, for one certain target code $c$,\n some training samples $S_p$ could have made more contributions while other samples $S_o$ could have counter effects.\n However, the traditional fine-tuned models need to fit both $S_p$ and $S_o$ from a global perspective, \n leading to compromised performance for one certain target code $c$.\n In this context, we design Adacom to\n (1) detect whether the model might have a compromised performance on a target code $c$ and\n (2) retrieve a few helpful training samples $S_p$ that have contradictory samples in the training dataset and,\n (3) adapt the model on the fly by re-training the $S_p$ to strengthen the helpful samples and unlearn the harmful samples.\n Our extensive experiments on 7 comment generators and 4 public datasets show that\n (1) can significantly boost the performance of comment generation (BLEU4 score by on average 14.9\\%, METEOR by 12.2\\%, and ROUGE-L by 7.4\\%),\n (2) the adaptation on one code sample is cost-effective and acceptable as an on-the-fly solution, and\n (3) can adapt well on out-of-distribution code samples.", "keywords": "Code Summarization;Adaptation;Language Model", "primary_area": "", "supplementary_material": "", "author": "Yufan Cai;Yun Lin;Chenyan Liu;Jinglian Wu;Yifan Zhang;Yiming Liu;Yeyun Gong;Jin Song Dong", "authorids": "~Yufan_Cai2;~Yun_Lin2;~Chenyan_Liu1;~Jinglian_Wu1;~Yifan_Zhang22;~Yiming_Liu6;~Yeyun_Gong2;~Jin_Song_Dong2", "gender": "M;M;M;F;F;M;M;M", "homepage": "https://caiyufan-nus.github.io/;http://linyun.info/;https://chenyan1999.github.io;https://www.linkedin.com/in/jinglian-wu-859b71110/;https://yvonnefanf.github.io/index.html;https://github.com/SalazarPeverelll;;https://www.comp.nus.edu.sg/~dongjs/", "dblp": "289/6597;77/1513-1;;;;;06/10400.html;", "google_scholar": ";fk93YOIAAAAJ;RRnS4icAAAAJ;;;;piUkwMYAAAAJ;tuLa1AsAAAAJ", "orcid": "0009-0008-7579-0824;;;;0009-0001-3030-9960;;;", "linkedin": ";;;;;;;", "or_profile": "~Yufan_Cai2;~Yun_Lin2;~Chenyan_Liu1;~Jinglian_Wu1;~Yifan_Zhang22;~Yiming_Liu6;~Yeyun_Gong2;~Jin_Song_Dong2", "aff": "National University of Singapore;Shanghai Jiaotong University;National University of Singapore;;National University of Singapore;National University of Singapore;Microsoft;National University of Singapore", "aff_domain": "u.nus.edu;sjtu.edu.cn;u.nus.edu;;nus.edu.sg;nus.edu;microsoft.com;nus.edu.sg", "position": "PhD student;Associate Professor;MS student;;Researcher;MS student;Researcher;Full Professor", "bibtex": "@inproceedings{\ncai2023onthefly,\ntitle={On-the-Fly Adapting Code Summarization on Trainable Cost-Effective Language Models},\nauthor={Yufan Cai and Yun Lin and Chenyan Liu and Jinglian Wu and Yifan Zhang and Yiming Liu and Yeyun Gong and Jin Song Dong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RFgv7cfMUy}\n}", "github": "", "project": "", "reviewers": "Ugvn;EdLg;to4c;4N9q", "pdf_size": 445889, "rating": "5;5;6;6", "confidence": "4;5;3;3", "soundness": "3;2;4;2", "novelty": "2;1;3;3", "presentation": "3;1;3;2", "wc_summary": "153;74;225;78", "wc_strengths": "35;80;91;12", "wc_weaknesses": "346;577;307;32", "wc_questions": "8;93;26;12", "wc_limitations": "6;43;2;13", "wc_review": "548;867;651;147", "wc_reply_reviewers": "136;1300;106;77", "wc_reply_authors": "558;1331;17;17", "reply_reviewers": "2;3;1;1", "reply_authors": "5;4;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 132.5, 61.985885490166226 ], "wc_strengths_avg": [ 54.5, 32.283896914715854 ], "wc_weaknesses_avg": [ 315.5, 193.4922479067314 ], "wc_questions_avg": [ 34.75, 34.28829975370607 ], "wc_limitations_avg": [ 16.0, 16.077935190813527 ], "wc_review_avg": [ 553.25, 261.27607525374384 ], "wc_reply_reviewers_avg": [ 404.75, 517.2936182672274 ], "wc_reply_authors_avg": [ 480.75, 538.2891300221471 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14142065265768915303&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "u.nus.edu;sjtu.edu.cn;u.nus.edu;;nus.edu.sg;nus.edu;microsoft.com;nus.edu.sg", "author_num": 8, "aff_unique_index": "0;1;0;0;0;2;0", "aff_unique_norm": "National University of Singapore;Shanghai Jiao Tong University;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.nus.edu.sg;https://www.sjtu.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "NUS;SJTU;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;2;0", "aff_country_unique": "Singapore;China;United States" }, { "title": "Learning Rule-Induced Subgraph Representations for Inductive Relation Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71668", "id": "RHDXkRPNQa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0b06c8673ebb453e5e468f7743d8f54e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RHDXkRPNQa", "openreview": "https://openreview.net/forum?id=RHDXkRPNQa", "poster": "/media/PosterPDFs/NeurIPS%202023/71668.png?t=1698652788.5757823", "slides": "https://nips.cc/virtual/2023/poster/71668", "video": "https://nips.cc/virtual/2023/poster/71668", "author_site": "Tianyu Liu, Qitan Lv, Jie Wang, Shuling Yang, Hanzhu Chen", "tldr": "", "abstract": "Inductive relation prediction (IRP)---where entities can be different during training and inference---has shown great power for completing evolving knowledge graphs. Existing works mainly focus on using graph neural networks (GNNs) to learn the representation of the subgraph induced from the target link, which can be seen as an implicit rule-mining process to measure the plausibility of the target link. However, these methods are not able to differentiate the target link and other links during message passing, hence the final subgraph representation will contain irrelevant rule information to the target link, which reduces the reasoning performance and severely hinders the applications for real-world scenarios. To tackle this problem, we propose a novel $\\textit{single-source edge-wise}$ GNN model to learn the $\\textbf{R}$ule-induc$\\textbf{E}$d $\\textbf{S}$ubgraph represen$\\textbf{T}$ations $(\\textbf{REST}$), which encodes relevant rules and eliminates irrelevant rules within the subgraph. Specifically, we propose a $\\textit{single-source}$ initialization approach to initialize edge features only for the target link, which guarantees the relevance of mined rules and target link. Then we propose several RNN-based functions for $\\textit{edge-wise}$ message passing to model the sequential property of mined rules. REST is a simple and effective approach with theoretical support to learn the $\\textit{rule-induced subgraph representation}$. Moreover, REST does not need node labeling, which significantly accelerates the subgraph preprocessing time by up to $\\textbf{11.66}\\times$. Experiments on inductive relation prediction benchmarks demonstrate the effectiveness of our REST.", "keywords": "inductive relation prediction;knowledge graph completion;knowledge graph reasoning", "primary_area": "", "supplementary_material": "/attachment/cc3e817978c9f4f7abe9ec15ca603b44fff95a02.zip", "author": "Tianyu Liu;Qitan Lv;Jie Wang;Shuling Yang;Hanzhu Chen", "authorids": "~Tianyu_Liu6;~Qitan_Lv1;~Jie_Wang1;~Shuling_Yang1;~Hanzhu_Chen1", "gender": "M;M;M;;", "homepage": "https://smart-lty.github.io/;https://scholar.google.com/citations?hl=zh-CN&user=7yDqr3oAAAAJ;http://staff.ustc.edu.cn/~jwangx;https://miralab.ai/people/shuling-yang/;", "dblp": ";357/3270.html;29/5259-5;;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;OugG4dUAAAAJ;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Tianyu_Liu6;~Qitan_Lv1;~Jie_Wang1;~Shuling_Yang1;~Hanzhu_Chen1", "aff": "University of Science and Technology of China;South China University of Technology;University of Science and Technology of China;University of Science and Technology of China;", "aff_domain": "ustc.edu.cn;scut.edu.cn;ustc.edu.cn;ustc.edu.cn;", "position": "PhD student;Undergrad student;Full Professor;MS student;", "bibtex": "@inproceedings{\nliu2023learning,\ntitle={Learning Rule-Induced Subgraph Representations for Inductive Relation Prediction},\nauthor={Tianyu Liu and Qitan Lv and Jie Wang and Shuling Yang and Hanzhu Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RHDXkRPNQa}\n}", "github": "", "project": "", "reviewers": "eBJJ;s9SP;r6aB;UyGM", "pdf_size": 476809, "rating": "4;6;6;7", "confidence": "4;5;3;3", "soundness": "3;4;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "81;182;193;71", "wc_strengths": "22;71;27;48", "wc_weaknesses": "425;128;37;38", "wc_questions": "1;54;22;51", "wc_limitations": "1;13;25;4", "wc_review": "530;448;304;212", "wc_reply_reviewers": "168;36;0;63", "wc_reply_authors": "299;37;0;37", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 131.75, 55.99720975191532 ], "wc_strengths_avg": [ 42.0, 19.3778223750761 ], "wc_weaknesses_avg": [ 157.0, 159.0801684686058 ], "wc_questions_avg": [ 32.0, 21.828879952943073 ], "wc_limitations_avg": [ 10.75, 9.33742469849155 ], "wc_review_avg": [ 373.5, 123.44533203001238 ], "wc_reply_reviewers_avg": [ 66.75, 62.58344429639519 ], "wc_reply_authors_avg": [ 93.25, 119.74634649959054 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3458572319330373, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10710414334173645812&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ustc.edu.cn;scut.edu.cn;ustc.edu.cn;ustc.edu.cn;", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Science and Technology of China;South China University of Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.scut.edu.cn", "aff_unique_abbr": "USTC;SCUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "RI6HFZFu3B", "title": "Deep Graph Neural Networks via Flexible Subgraph Aggregation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph neural networks (GNNs), a type of neural network that can learn from graph-structured data and learn the representation of nodes through aggregating neighborhood information, have shown superior performance in various downstream tasks. However, it is known that the performance of GNNs degrades gradually as the number of layers increases. In this paper, we evaluate the expressive power of GNN from the perspective of subgraph aggregation. We reveal the potential cause of performance degradation for traditional deep GNNs, i.e., aggregated subgraph overlap, and we theoretically illustrate the fact that previous residual-based GNNs exploit the aggregation results of 1 to k hop subgraphs to improve the effectiveness. Further, we find that the utilization of different subgraphs by previous models is often inflexible. Based on this, we propose a sampling-based node-level residual module (SNR) that can achieve a more flexible utilization of different hops of subgraph aggregation by introducing node-level parameters sampled from a learnable distribution. Extensive experiments show that the performance of GNNs with the our proposed SNR module outperform a comprehensive set of baselines.", "keywords": "Graph Neural Networks;Semi-supervised Learning;Deep Models", "primary_area": "", "supplementary_material": "/attachment/97ae8e176270202dc5a9e5adbf2aa4cf9996bb12.zip", "author": "Jingbo Zhou;Yixuan Du;Ruqiong Zhang;Di Jin;Carl Yang;Rui Zhang", "authorids": "zhoujb5520@mails.jlu.edu.cn;duyx5520@mails.jlu.edu.cn;~Ruqiong_Zhang1;~Di_Jin4;~Carl_Yang1;rui@jlu.edu.cn", "gender": ";;M;M;M;", "homepage": ";;https://github.com/dwsjoan;http://cic.tju.edu.cn/faculty/jindi/index.htm;https://cs.emory.edu/~jyang71/;", "dblp": ";;;67/1861-1.html;305/0254;", "google_scholar": ";;;Q8MRRecAAAAJ;mOINlwcAAAAJ;", "orcid": ";;;;0000-0001-9145-4531;", "linkedin": ";;;;;", "or_profile": "zhoujb5520@mails.jlu.edu.cn;duyx5520@mails.jlu.edu.cn;~Ruqiong_Zhang1;~Di_Jin4;~Carl_Yang1;rui@jlu.edu.cn", "aff": ";;Jilin University;Tianjin University;Emory University;", "aff_domain": ";;jlu.edu.cn;tju.edu.cn;emory.edu;", "position": ";;Undergrad student;Full Professor;Assistant Professor;", "bibtex": "@misc{\nzhou2023deep,\ntitle={Deep Graph Neural Networks via Flexible Subgraph Aggregation},\nauthor={Jingbo Zhou and Yixuan Du and Ruqiong Zhang and Di Jin and Carl Yang and Rui Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=RI6HFZFu3B}\n}", "github": "", "project": "", "reviewers": "M5qJ;f2R8;EwhV;y89v", "site": "https://openreview.net/forum?id=RI6HFZFu3B", "pdf_size": 321085, "rating": "4;5;7;7", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "2;2;4;3", "presentation": "2;3;3;3", "wc_summary": "69;83;77;38", "wc_strengths": "28;26;56;51", "wc_weaknesses": "243;202;63;195", "wc_questions": "5;128;31;2", "wc_limitations": "21;1;1;1", "wc_review": "366;440;228;287", "wc_reply_reviewers": "276;18;107;10", "wc_reply_authors": "364;100;265;21", "reply_reviewers": "1;1;2;1", "reply_authors": "2;3;3;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 66.75, 17.32591988899868 ], "wc_strengths_avg": [ 40.25, 13.386093530227555 ], "wc_weaknesses_avg": [ 175.75, 67.62904331720212 ], "wc_questions_avg": [ 41.5, 51.198144497628036 ], "wc_limitations_avg": [ 6.0, 8.660254037844387 ], "wc_review_avg": [ 330.25, 80.07613564602129 ], "wc_reply_reviewers_avg": [ 102.75, 107.02657380295793 ], "wc_reply_authors_avg": [ 187.5, 134.66346943399313 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mPXoCFwYuowJ:scholar.google.com/&scioq=Deep+Graph+Neural+Networks+via+Flexible+Subgraph+Aggregation&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Jilin University;Tianjin University;Emory University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.jlu.edu.cn;http://www.tju.edu.cn;https://www.emory.edu", "aff_unique_abbr": "JLU;TJU;Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "Relax, it doesn\u2019t matter how you get there: A new self-supervised approach for multi-timescale behavior analysis", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71667", "id": "RInTOCEL3l", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5aad86aa2a3c00b70c71e19bc4780319-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RInTOCEL3l", "openreview": "https://openreview.net/forum?id=RInTOCEL3l", "poster": "/media/PosterPDFs/NeurIPS%202023/71667.png?t=1702163804.7500849", "slides": "https://nips.cc/virtual/2023/poster/71667", "video": "https://nips.cc/virtual/2023/poster/71667", "author_site": "Mehdi Azabou, Michael Mendelson, Nauman Ahad, Maks Sorokin, Shantanu Thakoor, Carolina Urzay, Eva Dyer", "tldr": "", "abstract": "Unconstrained and natural behavior consists of dynamics that are complex and unpredictable, especially when trying to predict what will happen multiple steps into the future. While some success has been found in building representations of animal behavior under constrained or simplified task-based conditions, many of these models cannot be applied to free and naturalistic settings where behavior becomes increasingly hard to model. In this work, we develop a multi-task representation learning model for animal behavior that combines two novel components: (i) an action-prediction objective that aims to predict the distribution of actions over future timesteps, and (ii) a multi-scale architecture that builds separate latent spaces to accommodate short- and long-term dynamics. After demonstrating the ability of the method to build representations of both local and global dynamics in robots in varying environments and terrains, we apply our method to the MABe 2022 Multi-Agent Behavior challenge, where our model ranks first overall on both mice and fly benchmarks. In all of these cases, we show that our model can build representations that capture the many different factors that drive behavior and solve a wide range of downstream tasks.", "keywords": "animal behavior;behavioral neuroscience;self-supervised learning;multi-timescale", "primary_area": "", "supplementary_material": "", "author": "Mehdi Azabou;Michael Jacob Mendelson;Nauman Ahad;Maks Sorokin;Shantanu Thakoor;Carolina Urzay;Eva L Dyer", "authorids": "~Mehdi_Azabou2;~Michael_Jacob_Mendelson1;~Nauman_Ahad1;~Maks_Sorokin1;~Shantanu_Thakoor5;~Carolina_Urzay1;~Eva_L_Dyer1", "gender": "M;M;;F;F;M;M", "homepage": "https://www.mehai.dev;;;;http://dyerlab.gatech.edu;https://initmaks.com/;", "dblp": "281/8371;218/1451;136/6029;;64/8509.html;278/3198.html;218/7437", "google_scholar": "jXxyYCoAAAAJ;;https://scholar.google.com/citations?hl=en;;Sb_jcHcAAAAJ;tFuFXuMAAAAJ;polyCecAAAAJ", "orcid": ";;;;;;", "linkedin": ";mmend/;;carolina-urzay-gutierrez/;;;", "or_profile": "~Mehdi_Azabou2;~Michael_Jacob_Mendelson1;~Nauman_Ahad1;~Carolina_Urzay1;~Eva_Dyer1;~Maksim_Sorokin1;~Shantanu_Thakoor1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Technical University of Denmark;Georgia Institute of Technology;Georgia Institute of Technology;Google", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;dtu.dk;gatech.edu;gatech.edu;google.com", "position": "PhD student;Undergrad student;PhD student;MS student;Associate Professor;PhD student;Research Engineer", "bibtex": "@inproceedings{\nazabou2023relax,\ntitle={Relax, it doesn{\\textquoteright}t matter how you get there: A new self-supervised approach for multi-timescale behavior analysis},\nauthor={Mehdi Azabou and Michael Jacob Mendelson and Nauman Ahad and Maks Sorokin and Shantanu Thakoor and Carolina Urzay and Eva L Dyer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RInTOCEL3l}\n}", "github": "", "project": "", "reviewers": "7X95;Uc6V;hofo;6S6e", "pdf_size": 5067533, "rating": "5;6;7;7", "confidence": "3;5;4;3", "soundness": "2;3;3;4", "novelty": "2;2;4;3", "presentation": "3;3;3;3", "wc_summary": "126;55;146;65", "wc_strengths": "146;40;107;117", "wc_weaknesses": "311;146;135;72", "wc_questions": "232;13;81;48", "wc_limitations": "17;60;90;58", "wc_review": "832;314;559;360", "wc_reply_reviewers": "338;215;160;151", "wc_reply_authors": "0;200;174;25", "reply_reviewers": "1;2;2;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 98.0, 38.8136573901507 ], "wc_strengths_avg": [ 102.5, 38.82331773560833 ], "wc_weaknesses_avg": [ 166.0, 88.34874079464856 ], "wc_questions_avg": [ 93.5, 83.5 ], "wc_limitations_avg": [ 56.25, 25.96512083545925 ], "wc_review_avg": [ 516.25, 204.23316944120512 ], "wc_reply_reviewers_avg": [ 216.0, 74.5754651343188 ], "wc_reply_authors_avg": [ 99.75, 88.17702365128912 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5390375207915585840&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "gatech.edu;gatech.edu;gatech.edu;dtu.dk;gatech.edu;gatech.edu;google.com", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0;2", "aff_unique_norm": "Georgia Institute of Technology;Technical University of Denmark;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.gatech.edu;https://www.tek.dk;https://www.google.com", "aff_unique_abbr": "Georgia Tech;DTU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;Denmark" }, { "title": "Cheaply Estimating Inference Efficiency Metrics for Autoregressive Transformer Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71666", "id": "RJpAz15D0S", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d1a14493e5f84d6c6129414f0cd1a7c6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RJpAz15D0S", "openreview": "https://openreview.net/forum?id=RJpAz15D0S", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71666", "video": "https://nips.cc/virtual/2023/poster/71666", "author_site": "Deepak Narayanan, Keshav Santhanam, Peter Henderson, Peter Henderson, Rishi Bommasani, Tony Lee, Percy Liang", "tldr": "", "abstract": "Large language models (LLMs) are highly capable but also computationally expensive. \nCharacterizing the _fundamental tradeoff_ between inference efficiency and model capabilities is thus important, but requires an efficiency metric that is comparable across models from different providers.\nUnfortunately, raw runtimes measured through black-box APIs do not satisfy this property: model providers can implement software and hardware optimizations orthogonal to the model, and shared infrastructure introduces performance contention.\nWe propose a new metric for inference efficiency called _idealized runtime_, that puts models on equal footing as though they were served on uniform hardware and software without performance contention, and a cost model to efficiently estimate this metric for autoregressive Transformer models.\nWe also propose variants of the idealized runtime that incorporate the number and type of accelerators needed to serve the model.\nUsing these metrics, we compare ten LLMs developed in 2022 to provide the first analysis of inference efficiency-capability tradeoffs; we make several observations from this analysis, including the fact that the superior inference runtime performance of certain APIs is often a byproduct of optimizations within the API rather than the underlying model.\nOur code is open sourced at https://github.com/stanford-crfm/helm-efficiency.", "keywords": "Systems for Machine Learning;Inference efficiency;Transformer models;Text generation APIs;Capability-efficiency tradeoffs", "primary_area": "", "supplementary_material": "", "author": "Deepak Narayanan;Keshav Santhanam;Peter Henderson;Rishi Bommasani;Tony Lee;Percy Liang", "authorids": "~Deepak_Narayanan2;~Keshav_Santhanam1;~Peter_Henderson1;~Rishi_Bommasani1;~Tony_Lee1;~Percy_Liang1", "gender": "M;M;M;M;M;", "homepage": "https://deepakn94.github.io/;https://cs.stanford.edu/~keshav2;http://www.peterhenderson.co/;https://rishibommasani.github.io/;;https://cs.stanford.edu/~pliang/", "dblp": ";221/1812.html;h/PeterHenderson2;245/8673;46/4265;04/1701", "google_scholar": "sTzb6LAAAAAJ;bAyZGdAAAAAJ;dy_JBs0AAAAJ;WMBXw1EAAAAJ;OYNdx48AAAAJ;pouyVyUAAAAJ", "orcid": ";0000-0001-5939-7944;;;;", "linkedin": ";;phende/;;tonyhlee/;", "or_profile": "~Deepak_Narayanan2;~Keshav_Santhanam1;~Peter_Henderson1;~Rishi_Bommasani1;~Tony_Lee1;~Percy_Liang1", "aff": "Microsoft Research;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "microsoft.com;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "Researcher;PhD student;PhD student;PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nnarayanan2023cheaply,\ntitle={Cheaply Estimating Inference Efficiency Metrics for Autoregressive Transformer Models},\nauthor={Deepak Narayanan and Keshav Santhanam and Peter Henderson and Rishi Bommasani and Tony Lee and Percy Liang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RJpAz15D0S}\n}", "github": "", "project": "", "reviewers": "93KK;Jz6a;rBmS;a4zR;x2WX;aZks", "pdf_size": 6130477, "rating": "2;2;4;6;7;8", "confidence": "4;4;4;5;3;4", "soundness": "1;1;3;3;3;4", "novelty": "1;1;3;3;3;4", "presentation": "2;2;3;3;3;4", "wc_summary": "205;100;110;71;78;106", "wc_strengths": "59;61;140;76;132;47", "wc_weaknesses": "423;639;181;54;41;47", "wc_questions": "68;1;91;420;60;9", "wc_limitations": "26;84;1;11;32;13", "wc_review": "781;885;523;632;343;222", "wc_reply_reviewers": "486;0;31;0;15;0", "wc_reply_authors": "1161;0;0;0;0;0", "reply_reviewers": "1;0;1;0;1;0", "reply_authors": "3;1;1;1;1;1", "rating_avg": [ 4.833333333333333, 2.3392781412697 ], "confidence_avg": [ 4.0, 0.5773502691896257 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.8333333333333335, 0.6871842709362768 ], "wc_summary_avg": [ 111.66666666666667, 44.100894422776605 ], "wc_strengths_avg": [ 85.83333333333333, 36.530428716649666 ], "wc_weaknesses_avg": [ 230.83333333333334, 226.24206554534067 ], "wc_questions_avg": [ 108.16666666666667, 143.05408844986647 ], "wc_limitations_avg": [ 27.833333333333332, 27.076537608950098 ], "wc_review_avg": [ 564.3333333333334, 231.74674874861904 ], "wc_reply_reviewers_avg": [ 88.66666666666667, 178.05024259710765 ], "wc_reply_authors_avg": [ 193.5, 432.67915364620933 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.3333333333333333, 0.74535599249993 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.12340351046845907, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2591627827805498139&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "microsoft.com;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Microsoft;Stanford University", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.stanford.edu", "aff_unique_abbr": "MSR;Stanford", "aff_campus_unique_index": "1;1;1;1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Symbol-LLM: Leverage Language Models for Symbolic System in Visual Human Activity Reasoning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71665", "id": "RJq9bVEf6N", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5edb57c05c81d04beb716ef1d542fe9e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RJq9bVEf6N", "openreview": "https://openreview.net/forum?id=RJq9bVEf6N", "poster": "/media/PosterPDFs/NeurIPS%202023/71665.png?t=1700826962.8000205", "slides": "https://nips.cc/virtual/2023/poster/71665", "video": "https://nips.cc/virtual/2023/poster/71665", "author_site": "Xiaoqian Wu, Yong-Lu Li, Jianhua Sun, Cewu Lu", "tldr": "", "abstract": "Human reasoning can be understood as a cooperation between the intuitive, associative \"System-1'' and the deliberative, logical \"System-2''. For existing System-1-like methods in visual activity understanding, it is crucial to integrate System-2 processing to improve explainability, generalization, and data efficiency. One possible path of activity reasoning is building a symbolic system composed of symbols and rules, where one rule connects multiple symbols, implying human knowledge and reasoning abilities.\nPrevious methods have made progress, but are defective with limited symbols from handcraft and limited rules from visual-based annotations, failing to cover the complex patterns of activities and lacking compositional generalization. \nTo overcome the defects, we propose a new symbolic system with two ideal important properties: broad-coverage symbols and rational rules. Collecting massive human knowledge via manual annotations is expensive to instantiate this symbolic system. Instead, we leverage the recent advancement of LLMs (Large Language Models) as an approximation of the two ideal properties, i.e., Symbols from Large Language Models (Symbol-LLM). \nThen, given an image, visual contents from the images are extracted and\nchecked as symbols and activity semantics are reasoned out based on rules via fuzzy logic calculation.\nOur method shows superiority in extensive activity understanding tasks. Code and data are available at https://mvig-rhos.com/symbol_llm.", "keywords": "neuro-symbolic;visual reasoning;human activity understanding", "primary_area": "", "supplementary_material": "/attachment/312115080a9e9b8d829c670092e70d309e5612d5.pdf", "author": "Xiaoqian Wu;Yong-Lu Li;Jianhua Sun;Cewu Lu", "authorids": "~Xiaoqian_Wu1;~Yong-Lu_Li1;~Jianhua_Sun1;~Cewu_Lu3", "gender": "F;M;M;M", "homepage": "https://github.com/enlighten0707;https://dirtyharrylyl.github.io/;https://gothicai.github.io/;https://www.mvig.org/", "dblp": ";198/9345;36/1447-3;", "google_scholar": "-PHR96oAAAAJ;https://scholar.google.com.hk/citations?user=UExAaVgAAAAJ;L0hoY3kAAAAJ;https://scholar.google.com.tw/citations?user=QZVQEWAAAAAJ", "orcid": "0000-0003-1566-3811;0000-0003-0478-0692;;", "linkedin": ";%E6%B0%B8%E9%9C%B2-%E6%9D%8E-991b99139/;;", "or_profile": "~Xiaoqian_Wu1;~Yong-Lu_Li1;~Jianhua_Sun1;~Cewu_Lu3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Assistant Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nwu2023symbolllm,\ntitle={Symbol-{LLM}: Leverage Language Models for Symbolic System in Visual Human Activity Reasoning},\nauthor={Xiaoqian Wu and Yong-Lu Li and Jianhua Sun and Cewu Lu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RJq9bVEf6N}\n}", "github": "", "project": "", "reviewers": "K3QA;sNaH;sPA9;yZAX;SHNq;yAvu;zui3", "pdf_size": 1304229, "rating": "3;4;5;5;5;6;6", "confidence": "3;4;3;2;3;2;3", "soundness": "2;3;2;2;3;3;3", "novelty": "2;2;3;3;2;3;2", "presentation": "2;3;3;3;3;3;3", "wc_summary": "60;132;82;89;141;51;110", "wc_strengths": "56;150;129;57;93;69;87", "wc_weaknesses": "67;190;126;365;292;232;248", "wc_questions": "57;25;49;7;105;53;2", "wc_limitations": "40;6;180;16;9;8;1", "wc_review": "280;503;566;534;640;413;448", "wc_reply_reviewers": "0;0;203;221;318;144;23", "wc_reply_authors": "0;0;50;962;424;64;0", "reply_reviewers": "0;0;1;2;2;1;1", "reply_authors": "1;1;2;4;3;2;1", "rating_avg": [ 4.857142857142857, 0.9897433186107869 ], "confidence_avg": [ 2.857142857142857, 0.6388765649999398 ], "soundness_avg": [ 2.5714285714285716, 0.49487165930539345 ], "novelty_avg": [ 2.4285714285714284, 0.49487165930539345 ], "presentation_avg": [ 2.857142857142857, 0.34992710611188266 ], "wc_summary_avg": [ 95.0, 31.793979663721604 ], "wc_strengths_avg": [ 91.57142857142857, 33.39742138402366 ], "wc_weaknesses_avg": [ 217.14285714285714, 92.75532879790877 ], "wc_questions_avg": [ 42.57142857142857, 32.6796447911153 ], "wc_limitations_avg": [ 37.142857142857146, 59.498928133031036 ], "wc_review_avg": [ 483.42857142857144, 108.14785797080529 ], "wc_reply_reviewers_avg": [ 129.85714285714286, 116.12730523183669 ], "wc_reply_authors_avg": [ 214.28571428571428, 336.0245374908183 ], "reply_reviewers_avg": [ 1.0, 0.7559289460184544 ], "reply_authors_avg": [ 2.0, 1.0690449676496976 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4841229182759271, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7834767132790843464&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "RLJ8t01p0u", "title": "Exploring the Promise and Limits of Real-Time Recurrent Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Real-time recurrent learning (RTRL) for sequence-processing recurrent neural networks (RNNs) offers certain conceptual advantages over backpropagation through time (BPTT). RTRL requires neither caching past activations nor truncating context, and enables online learning. However, RTRL's time and space complexity makes it impractical. To overcome this problem, most recent work on RTRL focuses on approximation theories, while experiments are often limited to diagnostic settings. Here we explore the practical promise of RTRL in more realistic settings. We study actor-critic methods that combine RTRL and policy gradients, and test them in several subsets of DMLab-30, ProcGen, and Atari-2600 environments. On DMLab memory tasks, our system is competitive with or outperforms well-known IMPALA and R2D2 baselines trained on 10B frames, while using fewer than 1.2B environmental frames. To scale to such challenging tasks, we focus on certain well-known neural architectures with element-wise recurrence, allowing for tractable RTRL without approximation. We also discuss rarely addressed limitations of RTRL in real-world applications, such as its complexity in the multi-layer case.", "keywords": "real-time recurrent learning;online recurrent learning;recurrent neural networks;reinforcement learning;actor-critic;policy gradients", "primary_area": "", "supplementary_material": "/attachment/543ac84e27f1a8399fd1ba9bb19cce3f724fb465.zip", "author": "Kazuki Irie;Anand Gopalakrishnan;J\u00fcrgen Schmidhuber", "authorids": "~Kazuki_Irie1;~Anand_Gopalakrishnan1;~J\u00fcrgen_Schmidhuber1", "gender": ";M;M", "homepage": "https://sites.harvard.edu/kazuki-irie/;https://agopal42.github.io/;http://people.idsia.ch/~juergen/", "dblp": "148/9667;191/1040;s/JurgenSchmidhuber", "google_scholar": "https://scholar.google.de/citations?user=-gZ-BdwAAAAJ;SsbgJ1UAAAAJ;https://scholar.google.ch/citations?user=gLnCTgIAAAAJ", "orcid": "0000-0003-0923-691X;;", "linkedin": ";;", "or_profile": "~Kazuki_Irie1;~Anand_Gopalakrishnan1;~J\u00fcrgen_Schmidhuber1", "aff": "The Swiss AI Lab IDSIA, Dalle Molle Institute for Artificial Intelligence Research;Dalle Molle Institute for Artificial Intelligence Research;IDSIA", "aff_domain": "idsia.ch;idsia.ch;idsia.ch", "position": "Postdoc;PhD student;Scientific Director", "bibtex": "@misc{\nirie2023exploring,\ntitle={Exploring the Promise and Limits of Real-Time Recurrent Learning},\nauthor={Kazuki Irie and Anand Gopalakrishnan and J{\\\"u}rgen Schmidhuber},\nyear={2023},\nurl={https://openreview.net/forum?id=RLJ8t01p0u}\n}", "github": "", "project": "", "reviewers": "5hxy;4QGx;Bqpj;Y8rh", "site": "https://openreview.net/forum?id=RLJ8t01p0u", "pdf_size": 1630021, "rating": "4;4;7;7", "confidence": "5;4;3;3", "soundness": "3;1;3;4", "novelty": "3;3;3;3", "presentation": "3;3;4;4", "wc_summary": "105;74;141;103", "wc_strengths": "39;91;79;130", "wc_weaknesses": "128;389;149;107", "wc_questions": "84;312;76;5", "wc_limitations": "4;18;43;23", "wc_review": "360;884;488;368", "wc_reply_reviewers": "346;88;43;0", "wc_reply_authors": "1485;395;144;0", "reply_reviewers": "3;1;1;0", "reply_authors": "5;3;2;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 105.75, 23.763154251908563 ], "wc_strengths_avg": [ 84.75, 32.45285041410076 ], "wc_weaknesses_avg": [ 193.25, 113.98766380622071 ], "wc_questions_avg": [ 119.25, 115.45426583717035 ], "wc_limitations_avg": [ 22.0, 13.982131454109563 ], "wc_review_avg": [ 525.0, 213.37994282499938 ], "wc_reply_reviewers_avg": [ 119.25, 134.5610920734519 ], "wc_reply_authors_avg": [ 506.0, 582.632388389111 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6073559528544751678&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "IDSIA;Dalle Molle Institute for Artificial Intelligence Research;Institute of Digital Technologies", "aff_unique_dep": "Swiss AI Lab;Artificial Intelligence Research;", "aff_unique_url": "https://www.idsia.ch/;http://www.dallemolle.ch/;https://www.idsia.ch", "aff_unique_abbr": "IDSIA;DMI;IDSIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Elastic Decision Transformer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71664", "id": "RMeQjexaRj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3b3889d313ba9476c12c2d77ea66b24f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RMeQjexaRj", "openreview": "https://openreview.net/forum?id=RMeQjexaRj", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71664", "video": "https://nips.cc/virtual/2023/poster/71664", "author_site": "Yueh-Hua Wu, Xiaolong Wang, Masashi Hamaya", "tldr": "", "abstract": "This paper introduces Elastic Decision Transformer (EDT), a significant advancement over the existing Decision Transformer (DT) and its variants. Although DT purports to generate an optimal trajectory, empirical evidence suggests it struggles with trajectory stitching, a process involving the generation of an optimal or near-optimal trajectory from the best parts of a set of sub-optimal trajectories. The proposed EDT differentiates itself by facilitating trajectory stitching during action inference at test time, achieved by adjusting the history length maintained in DT. Further, the EDT optimizes the trajectory by retaining a longer history when the previous trajectory is optimal and a shorter one when it is sub-optimal, enabling it to \"stitch\" with a more optimal trajectory. Extensive experimentation demonstrates EDT's ability to bridge the performance gap between DT-based and Q Learning-based approaches. In particular, the EDT outperforms Q Learning-based methods in a multi-task regime on the D4RL locomotion benchmark and Atari games.", "keywords": "Offline Reinforcement Learning;Trajectory Stitching;Decision Transformer", "primary_area": "", "supplementary_material": "/attachment/166e655d524355b31fd3a9e26f8ec8146fb6b407.zip", "author": "Yueh-Hua Wu;Xiaolong Wang;Masashi Hamaya", "authorids": "~Yueh-Hua_Wu1;~Xiaolong_Wang3;~Masashi_Hamaya1", "gender": ";M;M", "homepage": ";https://xiaolonw.github.io/;https://sites.google.com/view/masashihamaya/home", "dblp": ";91/952-4;164/8431", "google_scholar": ";Y8O9N_0AAAAJ;Khb7qw8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yueh-Hua_Wu1;~Xiaolong_Wang3;~Masashi_Hamaya1", "aff": ";University of California, San Diego;", "aff_domain": ";ucsd.edu;", "position": ";Assistant Professor;", "bibtex": "@inproceedings{\nwu2023elastic,\ntitle={Elastic Decision Transformer},\nauthor={Yueh-Hua Wu and Xiaolong Wang and Masashi Hamaya},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RMeQjexaRj}\n}", "github": "", "project": "", "reviewers": "GPqH;C3CW;nm2U;kCB8;ak2g", "pdf_size": 1952552, "rating": "4;5;6;6;7", "confidence": "3;4;4;4;4", "soundness": "2;3;3;3;4", "novelty": "2;3;2;3;4", "presentation": "1;3;4;4;3", "wc_summary": "80;73;74;115;97", "wc_strengths": "56;68;53;75;107", "wc_weaknesses": "160;172;126;105;179", "wc_questions": "192;140;71;169;194", "wc_limitations": "181;24;2;1;26", "wc_review": "669;477;326;465;603", "wc_reply_reviewers": "104;14;13;13;138", "wc_reply_authors": "141;37;0;0;37", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;2;1;1;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 1.0954451150103321 ], "wc_summary_avg": [ 87.8, 16.09223415191315 ], "wc_strengths_avg": [ 71.8, 19.3225257148231 ], "wc_weaknesses_avg": [ 148.4, 28.330901856453494 ], "wc_questions_avg": [ 153.2, 45.499010978261936 ], "wc_limitations_avg": [ 46.8, 67.92171964843057 ], "wc_review_avg": [ 508.0, 119.06300852909773 ], "wc_reply_reviewers_avg": [ 56.4, 53.83158923903324 ], "wc_reply_authors_avg": [ 43.0, 51.71846865482388 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7844645405527362, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10955964573331130963&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": ";ucsd.edu;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Towards Consistent Video Editing with Text-to-Image Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71663", "id": "RNVwm4BzXO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b6c05f8254a00709e16fb0fdaae56cd8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RNVwm4BzXO", "openreview": "https://openreview.net/forum?id=RNVwm4BzXO", "poster": "/media/PosterPDFs/NeurIPS%202023/71663.png?t=1700555981.515566", "slides": "https://nips.cc/virtual/2023/poster/71663", "video": "https://nips.cc/virtual/2023/poster/71663", "author_site": "Zicheng Zhang, Bonan Li, Xuecheng Nie, Congying Han, Tiande Guo, Luoqi Liu", "tldr": "", "abstract": "Existing works have advanced Text-to-Image (TTI) diffusion models for video editing in a one-shot learning manner. Despite their low requirements of data and computation, these methods might produce results of unsatisfied consistency with text prompt as well as temporal sequence, limiting their applications in the real world. In this paper, we propose to address the above issues with a novel EI$^2$ model towards Enhancing vIdeo Editing consIstency of TTI-based frameworks. Specifically, we analyze and find that the inconsistent problem is caused by newly added modules into TTI models for learning temporal information. These modules lead to covariate shift in the feature space, which harms the editing capability. Thus, we design EI$^2$ to tackle the above drawbacks with two classical modules: Shift-restricted Temporal Attention Module (STAM) and Fine-coarse Frame Attention Module (FFAM). First, through theoretical analysis, we demonstrate that covariate shift is highly related to Layer Normalization, thus STAM employs a Instance Centering layer replacing it to preserve the distribution of temporal features. In addition, STAM employs an attention layer with normalized mapping to transform temporal features while constraining the variance shift. As the second part, we incorporate STAM with a novel FFAM, which efficiently leverages fine-coarse spatial information of overall frames to further enhance temporal consistency. Extensive experiments demonstrate the superiority of the proposed EI$^2$ model.", "keywords": "diffusion model;video editing;text-to-video diffusion model", "primary_area": "", "supplementary_material": "/attachment/83c680c5a5c46d4e27535995a54b8be909fdbc37.zip", "author": "Zicheng Zhang;Bonan Li;Xuecheng Nie;Congying Han;Tiande Guo;Luoqi Liu", "authorids": "~Zicheng_Zhang3;~Bonan_Li2;~Xuecheng_Nie2;~Congying_Han1;~Tiande_Guo1;~Luoqi_Liu7", "gender": "M;M;F;M;M;M", "homepage": ";https://niexc.github.io/;http://people.ucas.edu.cn/~hancy;https://people.ucas.ac.cn/~tdguo?language=en;;", "dblp": "99/3303;124/9139;07/2808;;29/8842;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=h93ctSsAAAAJ;;;nw4XTwMAAAAJ;H8WNWPsAAAAJ", "orcid": ";;0000-0002-3445-4620;0000-0002-3804-9163;;", "linkedin": ";;;;;", "or_profile": "~Bonan_Li2;~Xuecheng_Nie2;~Congying_Han1;~Tiande_Guo1;~Luoqi_Liu7;~zicheng_zhang1", "aff": "University of Chinese Academy of Sciences;Meitu Inc.;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;Meitu inc;JD Explore Academy", "aff_domain": "ucas.ac.cn;meitu.com;ucas.ac.cn;ucas.ac.cn;meitu.com;jd.com", "position": "Postdoc;Researcher;Full Professor;Full Professor;Principal Researcher;Intern", "bibtex": "@inproceedings{\nzhang2023towards,\ntitle={Towards Consistent Video Editing with Text-to-Image Diffusion Models},\nauthor={Zicheng Zhang and Bonan Li and Xuecheng Nie and Congying Han and Tiande Guo and Luoqi Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RNVwm4BzXO}\n}", "github": "", "project": "", "reviewers": "ipyC;8VBg;eBTE;FCWG;k6zc", "pdf_size": 11843224, "rating": "4;5;5;6;7", "confidence": "5;4;5;5;4", "soundness": "2;2;3;3;4", "novelty": "2;2;3;3;4", "presentation": "3;2;3;3;4", "wc_summary": "53;89;58;46;91", "wc_strengths": "29;48;87;63;100", "wc_weaknesses": "228;127;268;54;70", "wc_questions": "23;41;175;2;33", "wc_limitations": "15;55;6;11;17", "wc_review": "348;360;594;176;311", "wc_reply_reviewers": "35;332;58;10;17", "wc_reply_authors": "397;298;182;26;26", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;3;2;2;2", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 67.4, 18.853116453255147 ], "wc_strengths_avg": [ 65.4, 25.679563859224714 ], "wc_weaknesses_avg": [ 149.4, 85.030817942673 ], "wc_questions_avg": [ 54.8, 61.50252027356278 ], "wc_limitations_avg": [ 20.8, 17.508854902591434 ], "wc_review_avg": [ 357.8, 135.00577765414337 ], "wc_reply_reviewers_avg": [ 90.4, 121.9353927291006 ], "wc_reply_authors_avg": [ 185.8, 147.16032073898182 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4803844614152615, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4914403794571680583&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "ucas.ac.cn;meitu.com;ucas.ac.cn;ucas.ac.cn;meitu.com;jd.com", "author_num": 6, "aff_unique_index": "0;1;0;0;2;3", "aff_unique_norm": "University of Chinese Academy of Sciences;Meitu Inc.;Meitu Inc;JD", "aff_unique_dep": ";;;JD Explore Academy", "aff_unique_url": "http://www.ucas.ac.cn;https://www.meitu.com;https://www.meitu.com;", "aff_unique_abbr": "UCAS;Meitu;Meitu;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "On Imitation in Mean-field Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71662", "id": "RPFd3D3P3L", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7f2223201858b6ff4cc1832d8856459b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RPFd3D3P3L", "openreview": "https://openreview.net/forum?id=RPFd3D3P3L", "poster": "/media/PosterPDFs/NeurIPS%202023/71662.png?t=1697456526.6689997", "slides": "https://nips.cc/virtual/2023/poster/71662", "video": "https://nips.cc/virtual/2023/poster/71662", "author_site": "Giorgia Ramponi, Pavel Kolev, Olivier Pietquin, Niao He, Mathieu Lauriere, Matthieu Geist", "tldr": "", "abstract": "We explore the problem of imitation learning (IL) in the context of mean-field games (MFGs), where the goal is to imitate the behavior of a population of agents following a Nash equilibrium policy according to some unknown payoff function. IL in MFGs presents new challenges compared to single-agent IL, particularly when both the reward function and the transition kernel depend on the population distribution. In this paper, departing from the existing literature on IL for MFGs, we introduce a new solution concept called the Nash imitation gap. Then we show that when only the reward depends on the population distribution, IL in MFGs can be reduced to single-agent IL with similar guarantees. However, when the dynamics is population-dependent, we provide a novel upper-bound that suggests IL is harder in this setting. To address this issue, we propose a new adversarial formulation where the reinforcement learning problem is replaced by a mean-field control (MFC) problem, suggesting progress in IL within MFGs may have to build upon MFC.", "keywords": "Mean-field games;Imitation Learning", "primary_area": "", "supplementary_material": "/attachment/4177a136cd54808854b5dcacd1d93da7956cc619.pdf", "author": "Giorgia Ramponi;Pavel Kolev;Olivier Pietquin;Niao He;Mathieu Lauriere;Matthieu Geist", "authorids": "~Giorgia_Ramponi1;~Pavel_Kolev1;~Olivier_Pietquin1;~Niao_He3;~Mathieu_Lauriere1;~Matthieu_Geist1", "gender": "F;M;M;;M;", "homepage": "https://gioramponi.github.io/;http://pavelkolev.github.io/;http://www.cristal.univ-lille.fr/~pietquin/;https://mlauriere.github.io;;http://people.inf.ethz.ch/niaohe", "dblp": "186/4493;153/5818.html;58/6269;125/7744;38/6508;https://dblp.uni-trier.de/pers/h/He:Niao.html", "google_scholar": "xbIAH5gAAAAJ;https://scholar.google.de/citations?user=m1j0aaoAAAAJ;8K8-LdwAAAAJ;https://scholar.google.fr/citations?user=6uskEdwAAAAJ;ectPLEUAAAAJ;iNcA81MAAAAJ", "orcid": ";;;;;", "linkedin": ";pavel-kolev-72495b1a/;opietquin/;;;", "or_profile": "~Giorgia_Ramponi1;~Pavel_Kolev1;~Olivier_Pietquin1;~Mathieu_Lauriere1;~Matthieu_Geist1;~Niao_He1", "aff": "ETHZ - ETH Zurich;;Google Brain;New York University;Google;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;;google.com;nyu.edu;google.com;ethz.ch", "position": "Postdoc;;Staff Research Scientist;Assistant Professor;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nramponi2023on,\ntitle={On Imitation in Mean-field Games},\nauthor={Giorgia Ramponi and Pavel Kolev and Olivier Pietquin and Niao He and Mathieu Lauriere and Matthieu Geist},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RPFd3D3P3L}\n}", "github": "", "project": "", "reviewers": "uQRX;4ip4;Qrta;MgoB", "pdf_size": 463947, "rating": "4;5;6;6", "confidence": "3;3;3;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "80;68;62;37", "wc_strengths": "64;99;70;56", "wc_weaknesses": "192;147;24;196", "wc_questions": "8;139;50;32", "wc_limitations": "7;79;1;11", "wc_review": "351;532;207;332", "wc_reply_reviewers": "280;20;11;0", "wc_reply_authors": "817;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;1;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 61.75, 15.690363284513205 ], "wc_strengths_avg": [ 72.25, 16.223054582907622 ], "wc_weaknesses_avg": [ 139.75, 69.54270270847978 ], "wc_questions_avg": [ 57.25, 49.49431785568925 ], "wc_limitations_avg": [ 24.5, 31.666228067138025 ], "wc_review_avg": [ 355.5, 115.94934238709592 ], "wc_reply_reviewers_avg": [ 77.75, 116.98370613038382 ], "wc_reply_authors_avg": [ 204.25, 353.7713774459432 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14325519169403461009&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ethz.ch;;google.com;nyu.edu;google.com;ethz.ch", "author_num": 6, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "ETH Zurich;Google;New York University;Swiss Federal Institute of Technology", "aff_unique_dep": ";Google Brain;;", "aff_unique_url": "https://www.ethz.ch;https://brain.google.com;https://www.nyu.edu;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;Google Brain;NYU;ETH Zurich", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "Switzerland;United States" }, { "title": "IMPRESS: Evaluating the Resilience of Imperceptible Perturbations Against Unauthorized Data Usage in Diffusion-Based Generative AI", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71661", "id": "RRSltzPc7w", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/222dda29587fbc2979ca99fd5ed00735-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RRSltzPc7w", "openreview": "https://openreview.net/forum?id=RRSltzPc7w", "poster": "/media/PosterPDFs/NeurIPS%202023/71661.png?t=1702138911.716086", "slides": "https://nips.cc/virtual/2023/poster/71661", "video": "https://nips.cc/virtual/2023/poster/71661", "author_site": "Bochuan Cao, Changjiang Li, Ting Wang, Jinyuan Jia, Bo Li, Jinghui Chen", "tldr": "", "abstract": "Diffusion-based image generation models, such as Stable Diffusion or DALL\u00b7E 2, are able to learn from given images and generate high-quality samples following the guidance from prompts. For instance, they can be used to create artistic images that mimic the style of an artist based on his/her original artworks or to maliciously edit the original images for fake content. However, such ability also brings serious ethical issues without proper authorization from the owner of the original images. In response, several attempts have been made to protect the original images from such unauthorized data usage by adding imperceptible perturbations, which are designed to mislead the diffusion model and make it unable to properly generate new samples. In this work, we introduce a perturbation purification platform, named IMPRESS, to evaluate the effectiveness of imperceptible perturbations as a protective measure.\nIMPRESS is based on the key observation that imperceptible perturbations could lead to a perceptible inconsistency between the original image and the diffusion-reconstructed image, which can be used to devise a new optimization strategy for purifying the image, which may weaken the protection of the original image from unauthorized data usage (e.g., style mimicking, malicious editing).\nThe proposed IMPRESS platform offers a comprehensive evaluation of several contemporary protection methods, and can be used as an evaluation platform for future protection methods.", "keywords": "Image Generation Godels;Latent Diffusion Models;Image Purifying", "primary_area": "", "supplementary_material": "", "author": "Bochuan Cao;Changjiang Li;Ting Wang;Jinyuan Jia;Bo Li;Jinghui Chen", "authorids": "~Bochuan_Cao1;~Changjiang_Li1;~Ting_Wang1;~Jinyuan_Jia2;~Bo_Li19;~Jinghui_Chen1", "gender": ";M;M;;F;M", "homepage": "https://aaaaaasuka.github.io/;;https://alps-lab.github.io/;https://jinyuan-jia.github.io/;http://boli.cs.illinois.edu/;https://jinghuichen.github.io/", "dblp": "334/3881;;12/2633-6.html;24/5124-1.html;50/3402-26;67/5633", "google_scholar": "eOZCg2IAAAAJ;qBBlW0gAAAAJ;cwcBTegAAAAJ;iyg4ytkAAAAJ;K8vJkTcAAAAJ;mKia7Y4AAAAJ", "orcid": ";;;0000-0002-9785-7769;;", "linkedin": ";;;;;", "or_profile": "~Bochuan_Cao1;~Changjiang_Li1;~Ting_Wang1;~Jinyuan_Jia2;~Bo_Li19;~Jinghui_Chen1", "aff": "Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;University of Illinois Urbana-Champaign;University of Illinois, Urbana Champaign;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;psu.edu;cs.illinois.edu;illinois.edu;psu.edu", "position": "PhD student;PhD student;Associate Professor;Postdoc;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ncao2023impress,\ntitle={{IMPRESS}: Evaluating the Resilience of Imperceptible Perturbations Against Unauthorized Data Usage in Diffusion-Based Generative {AI}},\nauthor={Bochuan Cao and Changjiang Li and Ting Wang and Jinyuan Jia and Bo Li and Jinghui Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RRSltzPc7w}\n}", "github": "", "project": "", "reviewers": "ULFY;vDR3;UShu;DYEa;EvE8", "pdf_size": 9170796, "rating": "5;5;5;6;7", "confidence": "3;4;3;5;4", "soundness": "2;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;3;4;3", "wc_summary": "105;79;94;78;132", "wc_strengths": "89;43;32;44;105", "wc_weaknesses": "149;101;75;281;499", "wc_questions": "43;2;93;2;270", "wc_limitations": "4;1;37;15;1", "wc_review": "390;226;331;420;1007", "wc_reply_reviewers": "42;116;0;20;162", "wc_reply_authors": "25;309;0;23;21", "reply_reviewers": "1;2;0;1;1", "reply_authors": "2;3;1;2;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 97.6, 19.905778055629977 ], "wc_strengths_avg": [ 62.6, 28.848570155208733 ], "wc_weaknesses_avg": [ 221.0, 156.06665242773678 ], "wc_questions_avg": [ 82.0, 99.78577052866807 ], "wc_limitations_avg": [ 11.6, 13.705473359209451 ], "wc_review_avg": [ 474.8, 274.2301223425319 ], "wc_reply_reviewers_avg": [ 68.0, 61.22744482664616 ], "wc_reply_authors_avg": [ 75.6, 117.04631561907449 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5345224838248488, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4870912758937880036&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "psu.edu;psu.edu;psu.edu;cs.illinois.edu;illinois.edu;psu.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;1;0", "aff_unique_norm": "Pennsylvania State University;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://www.psu.edu;https://illinois.edu", "aff_unique_abbr": "PSU;UIUC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Spectral Invariant Learning for Dynamic Graphs under Distribution Shifts", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71660", "id": "RRUVZygUtr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/154b90fcc9ba3dee96779c05c3108908-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RRUVZygUtr", "openreview": "https://openreview.net/forum?id=RRUVZygUtr", "poster": "/media/PosterPDFs/NeurIPS%202023/71660.png?t=1701924335.3554828", "slides": "https://nips.cc/virtual/2023/poster/71660", "video": "https://nips.cc/virtual/2023/poster/71660", "author_site": "Zeyang Zhang, Xin Wang, Ziwei Zhang, Zhou Qin, Weigao Wen, Hui Xue', Haoyang Li, Wenwu Zhu", "tldr": "", "abstract": "Dynamic graph neural networks (DyGNNs) currently struggle with handling distribution shifts that are inherent in dynamic graphs.\nExisting work on DyGNNs with out-of-distribution settings only focuses on the time domain, failing to handle cases involving distribution shifts in the spectral domain. In this paper, we discover that there exist cases with distribution shifts unobservable in the time domain while observable in the spectral domain, and propose to study distribution shifts on dynamic graphs in the spectral domain for the first time.\nHowever, this investigation poses two key challenges: i) it is non-trivial to capture different graph patterns that are driven by various frequency components entangled in the spectral domain; and ii) it remains unclear how to handle distribution shifts with the discovered spectral patterns. To address these challenges, we propose Spectral Invariant Learning for Dynamic Graphs under Distribution Shifts (SILD), which can handle distribution shifts on dynamic graphs by capturing and utilizing invariant and variant spectral patterns. Specifically, we first design a DyGNN with Fourier transform to obtain the ego-graph trajectory spectrums, allowing the mixed dynamic graph patterns to be transformed into separate frequency components. We then develop a disentangled spectrum mask to filter graph dynamics from various frequency components and discover the invariant and variant spectral patterns. Finally, we propose invariant spectral filtering, which encourages the model to rely on invariant patterns for generalization under distribution shifts. Experimental results on synthetic and real-world dynamic graph datasets demonstrate the superiority of our method for both node classification and link prediction tasks under distribution shifts.", "keywords": "Dynamic Graph Neural Networks;Out-of-Distribution Generalization", "primary_area": "", "supplementary_material": "/attachment/80f82c65e73ae8e9b666e9161661eb4c831b1578.pdf", "author": "Zeyang Zhang;Xin Wang;Ziwei Zhang;Zhou Qin;Weigao Wen;Hui Xue';Haoyang Li;Wenwu Zhu", "authorids": "~Zeyang_Zhang1;~Xin_Wang17;~Ziwei_Zhang1;~Zhou_Qin2;~Weigao_Wen1;~Hui_Xue'1;~Haoyang_Li1;~Wenwu_Zhu1", "gender": ";M;;M;M;M;M;M", "homepage": "https://zzythu.com;http://mn.cs.tsinghua.edu.cn/xinwang/;;https://github.com/archwalker;;http://www.alibaba.com;https://haoyang.li;http://media.cs.tsinghua.edu.cn/en/zww", "dblp": "236/0242;10/5630-19;;;;;118/0004-1.html;97/6308-1.html", "google_scholar": "w_njVcAAAAAJ;YPOBHYUAAAAJ;;;;;86RE16gAAAAJ;https://scholar.google.com.tw/citations?user=7t2jzpgAAAAJ", "orcid": "0000-0003-1329-1313;0000-0002-0351-2939;;;;;0000-0003-3544-5563;0000-0003-2236-9290", "linkedin": "zeyang-zhang-a7a039159;;;;https://www.linkedin.cn/incareer/in/ACoAABlg3QUBY92_T2u0E9MmBcmBoAJzIoMYnjE;;;", "or_profile": "~Zeyang_Zhang1;~Xin_Wang17;~Ziwei_Zhang1;~Zhou_Qin2;~Weigao_Wen1;~Hui_Xue'1;~Haoyang_Li1;~Wenwu_Zhu1", "aff": "Tsinghua University;Tsinghua University;;;;Alibaba Group;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;cs.tsinghua.edu.cn;;;;alibaba-inc.com;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Assistant Professor;;;;Principal Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023spectral,\ntitle={Spectral Invariant Learning for Dynamic Graphs under Distribution Shifts},\nauthor={Zeyang Zhang and Xin Wang and Ziwei Zhang and Zhou Qin and Weigao Wen and Hui Xue' and Haoyang Li and Wenwu Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RRUVZygUtr}\n}", "github": "", "project": "", "reviewers": "QjfT;fiaV;4zF7;vwaL", "pdf_size": 699664, "rating": "3;7;8;8", "confidence": "4;4;4;4", "soundness": "1;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "62;116;140;100", "wc_strengths": "27;74;60;65", "wc_weaknesses": "39;4;80;84", "wc_questions": "122;116;80;4", "wc_limitations": "380;1;28;1", "wc_review": "630;311;388;254", "wc_reply_reviewers": "1097;63;57;5", "wc_reply_authors": "3020;0;0;0", "reply_reviewers": "4;1;1;1", "reply_authors": "7;1;1;1", "rating_avg": [ 6.5, 2.0615528128088303 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 104.5, 28.368115905008565 ], "wc_strengths_avg": [ 56.5, 17.755280904564703 ], "wc_weaknesses_avg": [ 51.75, 32.713720363174836 ], "wc_questions_avg": [ 80.5, 46.997340350279394 ], "wc_limitations_avg": [ 102.5, 160.59343074982863 ], "wc_review_avg": [ 395.75, 143.36034144769604 ], "wc_reply_reviewers_avg": [ 305.5, 457.5289608319893 ], "wc_reply_authors_avg": [ 755.0, 1307.6983597145024 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.5, 2.598076211353316 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10809985559721924928&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;cs.tsinghua.edu.cn;;;;alibaba-inc.com;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Tsinghua University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "THU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "How to Turn Your Knowledge Graph Embeddings into Generative Models", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71659", "id": "RSGNGiB1q4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f4b768188be63b8d2680a46934fd295a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RSGNGiB1q4", "openreview": "https://openreview.net/forum?id=RSGNGiB1q4", "poster": "/media/PosterPDFs/NeurIPS%202023/71659.png?t=1701694894.023519", "slides": "https://nips.cc/virtual/2023/poster/71659", "video": "https://nips.cc/virtual/2023/poster/71659", "author_site": "Lorenzo Loconte, Nicola Di Mauro, Robert Peharz, Antonio Vergari", "tldr": "", "abstract": "Some of the most successful knowledge graph embedding (KGE) models for link prediction \u2013 CP, RESCAL, TuckER, ComplEx \u2013 can be interpreted as energy-based models. Under this perspective they are not amenable for exact maximum-likelihood estimation (MLE), sampling and struggle to integrate logical constraints. This work re-interprets the score functions of these KGEs as circuits \u2013 constrained computational graphs allowing efficient marginalisation. Then, we design two recipes to obtain efficient generative circuit models by either restricting their activations to be non-negative or squaring their outputs. Our interpretation comes with little or no loss of performance for link prediction, while the circuits framework unlocks exact learning by MLE, efficient sampling of new triples, and guarantee that logical constraints are satisfied by design. Furthermore, our models scale more gracefully than the original KGEs on graphs with millions of entities.", "keywords": "knowledge graph;knowledge graph embeddings;probabilistic circuits;probabilistic reasoning;tractable inference", "primary_area": "", "supplementary_material": "/attachment/d134048902ac0563f8c793ce2489ed219e19bb88.zip", "author": "Lorenzo Loconte;Nicola Di Mauro;Robert Peharz;Antonio Vergari", "authorids": "~Lorenzo_Loconte1;~Nicola_Di_Mauro1;~Robert_Peharz5;~Antonio_Vergari3", "gender": "M;M;M;M", "homepage": "https://loreloc.github.io/;http://www.di.uniba.it/~ndm/;https://robert-peharz.github.io/;http://nolovedeeplearning.com", "dblp": "336/2917;07/4238;30/9232;http://dblp.uni-trier.de/pers/hd/v/Vergari:Antonio", "google_scholar": ";;https://scholar.google.com/citations?hl=en;YK0NLaUAAAAJ", "orcid": ";;0000-0002-8644-9655;0000-0003-0036-5678", "linkedin": ";;;", "or_profile": "~Lorenzo_Loconte1;~Nicola_Di_Mauro1;~Robert_Peharz5;~antonio_vergari2", "aff": "University of Edinburgh;University of Bari;Technische Universit\u00e4t Graz;University of Edinburgh", "aff_domain": "sms.ed.ac.uk;uniba.it;tugraz.at;ed.ac.uk", "position": "PhD student;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nloconte2023how,\ntitle={How to Turn Your Knowledge Graph Embeddings into Generative Models},\nauthor={Lorenzo Loconte and Nicola Di Mauro and Robert Peharz and Antonio Vergari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RSGNGiB1q4}\n}", "github": "", "project": "", "reviewers": "S5Mx;t9NK;P87h;v5Gr", "pdf_size": 2549574, "rating": "7;7;7;8", "confidence": "3;4;4;4", "soundness": "3;3;3;4", "novelty": "3;4;3;4", "presentation": "3;4;4;4", "wc_summary": "143;103;77;204", "wc_strengths": "51;116;15;74", "wc_weaknesses": "28;35;126;44", "wc_questions": "67;52;321;13", "wc_limitations": "10;9;25;24", "wc_review": "299;315;564;359", "wc_reply_reviewers": "0;15;16;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 131.75, 47.88201645712093 ], "wc_strengths_avg": [ 64.0, 36.65378561622251 ], "wc_weaknesses_avg": [ 58.25, 39.52451770736741 ], "wc_questions_avg": [ 113.25, 121.55322908092569 ], "wc_limitations_avg": [ 17.0, 7.516648189186454 ], "wc_review_avg": [ 384.25, 106.07868541794812 ], "wc_reply_reviewers_avg": [ 7.75, 7.75806032459145 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14485042441895168115&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "sms.ed.ac.uk;uniba.it;tugraz.at;ed.ac.uk", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Edinburgh;University of Bari;Technische Universit\u00e4t Graz", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ed.ac.uk;https://www.uniba.it;https://www.tugraz.at", "aff_unique_abbr": "Edinburgh;UNIBA;TU Graz", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "United Kingdom;Italy;Austria" }, { "title": "ANPL: Towards Natural Programming with Interactive Decomposition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71658", "id": "RTRS3ZTsSj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dba8fa689ede9e56cbcd4f719def38fb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RTRS3ZTsSj", "openreview": "https://openreview.net/forum?id=RTRS3ZTsSj", "poster": "/media/PosterPDFs/NeurIPS%202023/71658.png?t=1701945298.969406", "slides": "https://nips.cc/virtual/2023/poster/71658", "video": "https://nips.cc/virtual/2023/poster/71658", "author_site": "Di Huang, Ziyuan Nan, Xing Hu, Pengwei Jin, Shaohui Peng, Yuanbo Wen, Rui Zhang, Zidong Du, Qi Guo, Yewen Pu, Yunji Chen", "tldr": "", "abstract": "Though LLMs are capable of generating plausible programs, it\u2019s challenging to interact with the LLMs further to revise the program, especially if the user\u2019s specific requirements are different from the initial proposal. In this paper, we introduce ANPL, an interactive programming system that ensures users can always refine the generated code towards their specific programmatic intents via structured\ndecompositions. Borrowing the paradigm of sketching from program synthesis, an ANPL program consists of a set of input-outputs that it must satisfy, a \u201csketch\u201d \u2014 control/data flow expressed in precise code (e.g. Python), and \u201choles\u201d \u2014 sub-modules to be implemented by the LLM specified with natural language. The user revises an ANPL program by either modifying the sketch, changing the language used to describe the holes, or providing additional input-outputs to a particular hole, turning it into a sub-ANPL program that can be solved recursively. This workflow allows the users to offload programming burdens to the LLM as much as possible while retaining the ability to pinpoint and resolve bugs locally, without exposing the rest of the program to the LLM. We deploy ANPL on the Abstraction and Reasoning Corpus (ARC), a set of unique tasks that are challenging for state-of-the-art AI systems, showing it outperforms baseline programming systems that (a) without the ability to decompose tasks interactively and (b) without the guarantee that the modules can be correctly composed together. Additional evaluations on APPS, HumanEval, and real-world programming tasks have validated that the ANPL framework is applicable to multiple programming domains. We release the ANPL solutions to the ARC tasks as a dataset, providing insights into how humans decompose novel tasks programmatically.", "keywords": "programming language;large language models;program synthesis;code generation;human-ai interaction", "primary_area": "", "supplementary_material": "/attachment/a2b20f893fd24c4c4668238aee697773ba1002d4.zip", "author": "Di Huang;Ziyuan Nan;Xing Hu;Pengwei Jin;Shaohui Peng;Yuanbo Wen;Rui Zhang;Zidong Du;Qi Guo;Yewen Pu;Yunji Chen", "authorids": "~Di_Huang5;~Ziyuan_Nan1;~Xing_Hu3;~Pengwei_Jin1;~Shaohui_Peng2;~Yuanbo_Wen1;~Rui_Zhang1;~Zidong_Du1;~Qi_Guo4;~Yewen_Pu1;~Yunji_Chen1", "gender": "M;M;F;M;M;F;;M;M;M;M", "homepage": ";;;https://github.com/Pengwei-Jin;;;https://zidongdu.github.io/;http://novel.ict.ac.cn/qguo;http://www.mit.edu/~yewenpu;;", "dblp": ";340/8140;49/10052-1;304/2505;262/3144;60/2536-40;44/11216;67/398-1;53/10322;48/474;246/8768", "google_scholar": ";;Hc3iRxUAAAAJ;;;dse6jAsAAAAJ;https://scholar.google.com.sg/citations?user=8N9ym9YAAAAJ;;LJnNKXMAAAAJ;;", "orcid": "0000-0002-2370-0072;;;0000-0002-8267-9824;0000-0002-7775-2724;;0000-0002-7603-4210;;;;", "linkedin": ";;;;;;;;;;", "or_profile": "~Di_Huang5;~Ziyuan_Nan1;~Xing_Hu3;~Pengwei_Jin1;~Yuanbo_Wen1;~Rui_Zhang1;~Zidong_Du1;~Qi_Guo4;~Yewen_Pu1;~Yunji_Chen1;~shaohui_peng1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, CAS;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Autodesk;Institute of Computing Technology, Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ucas.edu.cn;ict.ac.cn;ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;autodesk.com;ict.ac.cn;ict.ac.cn", "position": "PhD student;MS student;Associate Professor;PhD student;Postdoc;Assistant Professor;Full Professor;Full Professor;Principal Researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\nhuang2023anpl,\ntitle={{ANPL}: Towards Natural Programming with Interactive Decomposition},\nauthor={Di Huang and Ziyuan Nan and Xing Hu and Pengwei Jin and Shaohui Peng and Yuanbo Wen and Rui Zhang and Zidong Du and Qi Guo and Yewen Pu and Yunji Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RTRS3ZTsSj}\n}", "github": "", "project": "", "reviewers": "tgZy;neh7;NKWp;ecZq", "pdf_size": 3037839, "rating": "5;6;6;7", "confidence": "3;4;3;4", "soundness": "4;2;2;3", "novelty": "2;2;2;2", "presentation": "4;2;3;2", "wc_summary": "82;74;22;161", "wc_strengths": "98;75;29;117", "wc_weaknesses": "165;110;132;459", "wc_questions": "62;23;16;82", "wc_limitations": "1;6;6;15", "wc_review": "408;288;205;834", "wc_reply_reviewers": "71;11;101;321", "wc_reply_authors": "512;17;99;815", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;3;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 84.75, 49.68588833864199 ], "wc_strengths_avg": [ 79.75, 32.85859857023729 ], "wc_weaknesses_avg": [ 216.5, 141.3691974936549 ], "wc_questions_avg": [ 45.75, 27.2981226460722 ], "wc_limitations_avg": [ 7.0, 5.049752469181039 ], "wc_review_avg": [ 433.75, 242.09127927292218 ], "wc_reply_reviewers_avg": [ 126.0, 117.15374513859982 ], "wc_reply_authors_avg": [ 360.75, 322.4502868660532 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10986268961630563469&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 7, "email": "ict.ac.cn;ucas.edu.cn;ict.ac.cn;ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;autodesk.com;ict.ac.cn;ict.ac.cn", "author_num": 11, "aff_unique_index": "0;1;0;0;0;0;0;0;2;0;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;Autodesk", "aff_unique_dep": "Institute of Computing Technology;;", "aff_unique_url": "http://www.ict.ac.cn;http://www.ucas.ac.cn;https://www.autodesk.com", "aff_unique_abbr": "CAS;UCAS;Autodesk", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Promises and Pitfalls of Threshold-based Auto-labeling", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71657", "id": "RUCFAKNDb2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a355051cc32d36e2a971de190701745a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RUCFAKNDb2", "openreview": "https://openreview.net/forum?id=RUCFAKNDb2", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71657", "video": "https://nips.cc/virtual/2023/poster/71657", "author_site": "Harit Vishwakarma, Heguang Lin, Frederic Sala, Ramya Korlakai Vinayak", "tldr": "", "abstract": "Creating large-scale high-quality labeled datasets is a major bottleneck in supervised machine learning workflows. Threshold-based auto-labeling (TBAL), where validation data obtained from humans is used to find a confidence threshold above which the data is machine-labeled, reduces reliance on manual annotation. TBAL is emerging as a widely-used solution in practice. Given the long shelf-life and diverse usage of the resulting datasets, understanding when the data obtained by such auto-labeling systems can be relied on is crucial. This is the first work to analyze TBAL systems and derive sample complexity bounds on the amount of human-labeled validation data required for guaranteeing the quality of machine-labeled data. Our results provide two crucial insights. First, reasonable chunks of unlabeled data can be automatically and accurately labeled by seemingly bad models. Second, a hidden downside of TBAL systems is potentially prohibitive validation data usage. Together, these insights describe the promise and pitfalls of using such systems. \nWe validate our theoretical guarantees with extensive experiments on synthetic and real datasets.", "keywords": "Auto Labeling;Active Learning;Selective Classification", "primary_area": "", "supplementary_material": "/attachment/2eb55f91bd340a19457e1ecfb3ad74991aeb2524.pdf", "author": "Harit Vishwakarma;Heguang Lin;Frederic Sala;Ramya Korlakai Vinayak", "authorids": "~Harit_Vishwakarma1;~Heguang_Lin1;~Frederic_Sala1;~Ramya_Korlakai_Vinayak1", "gender": "M;M;M;", "homepage": "https://harit7.github.io;https://2454511550lin.github.io/;https://pages.cs.wisc.edu/~fredsala/;https://ramyakv.github.io/", "dblp": "207/7622;;133/3602;148/9626", "google_scholar": "pJF_ZZUAAAAJ;https://scholar.google.com/citations?hl=en;9KhIkNkAAAAJ;", "orcid": ";;;", "linkedin": "harit7;;;", "or_profile": "~Harit_Vishwakarma1;~Heguang_Lin1;~Frederic_Sala1;~Ramya_Korlakai_Vinayak1", "aff": "University of Wisconsin, Madison;University of Wisconsin - Madison;University of Wisconsin, Madison;University of Wisconsin - Madison", "aff_domain": "wisc.edu;wisc.edu;wisc.edu;wisc.edu", "position": "PhD student;Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nvishwakarma2023promises,\ntitle={Promises and Pitfalls of Threshold-based Auto-labeling},\nauthor={Harit Vishwakarma and Heguang Lin and Frederic Sala and Ramya Korlakai Vinayak},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RUCFAKNDb2}\n}", "github": "", "project": "", "reviewers": "L2k7;zavU;RfNU", "pdf_size": 4702205, "rating": "6;7;7", "confidence": "4;3;4", "soundness": "2;3;4", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "76;68;102", "wc_strengths": "80;78;65", "wc_weaknesses": "154;159;107", "wc_questions": "2;113;138", "wc_limitations": "1;18;33", "wc_review": "313;436;445", "wc_reply_reviewers": "0;0;44", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.0, 14.514360704718161 ], "wc_strengths_avg": [ 74.33333333333333, 6.649979114420002 ], "wc_weaknesses_avg": [ 140.0, 23.423634787681152 ], "wc_questions_avg": [ 84.33333333333333, 59.10630724005312 ], "wc_limitations_avg": [ 17.333333333333332, 13.072447700751718 ], "wc_review_avg": [ 398.0, 60.21627686929839 ], "wc_reply_reviewers_avg": [ 14.666666666666666, 20.741798914805393 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16146107961668442914&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "wisc.edu;wisc.edu;wisc.edu;wisc.edu", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Wisconsin;University of Wisconsin-Madison", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "UW;UW-Madison", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Federated Spectral Clustering via Secure Similarity Reconstruction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71656", "id": "RW7rZ8Y3Bp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b6cd2650926d332c86a84c48529cc421-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RW7rZ8Y3Bp", "openreview": "https://openreview.net/forum?id=RW7rZ8Y3Bp", "poster": "/media/PosterPDFs/NeurIPS%202023/71656.png?t=1702183089.9786296", "slides": "https://nips.cc/virtual/2023/poster/71656", "video": "https://nips.cc/virtual/2023/poster/71656", "author_site": "Dong Qiao, Chris Ding, Jicong Fan", "tldr": "", "abstract": "Federated learning has a significant advantage in protecting information privacy. Many scholars proposed various secure learning methods within the framework of federated learning but the study on secure federated unsupervised learning especially clustering is limited. We in this work propose a secure kernelized factorization method for federated spectral clustering on distributed dataset. The method is non-trivial because the kernel or similarity matrix for spectral clustering is computed by data pairs, which violates the principle of privacy protection. Our method implicitly constructs an approximation for the kernel matrix on distributed data such that we can perform spectral clustering under the constraint of privacy protection. We provide a convergence guarantee of the optimization algorithm, reconstruction error bounds of the Gaussian kernel matrix, and the sufficient condition of correct clustering of our method. We also present some results of differential privacy. Numerical results on synthetic and real datasets demonstrate that the proposed method is efficient and accurate in comparison to the baselines.", "keywords": "clustering;federated learning;privacy", "primary_area": "", "supplementary_material": "", "author": "Dong Qiao;Chris Ding;Jicong Fan", "authorids": "~Dong_Qiao1;~Chris_Ding1;~Jicong_Fan2", "gender": ";M;M", "homepage": "https://yuanxiqd.github.io;http://ranger.uta.edu/~chqding/;https://jicongfan.github.io/", "dblp": ";https://dblp.uni-trier.de/pers/hd/d/Ding:Chris;139/1570", "google_scholar": ";q7FfnjgAAAAJ;vdJsnhIAAAAJ", "orcid": ";;0000-0001-9665-0355", "linkedin": ";;", "or_profile": "~Dong_Qiao1;~Chris_Ding1;~Jicong_Fan2", "aff": "The Chinese University of Hong Kong, Shenzhen;University of Texas at Arlington;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;cse.uta.edu;cuhk.edu.cn", "position": "PhD student;Professor;Research Assistant Professor", "bibtex": "@inproceedings{\nqiao2023federated,\ntitle={Federated Spectral Clustering via Secure Similarity Reconstruction},\nauthor={Dong Qiao and Chris Ding and Jicong Fan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RW7rZ8Y3Bp}\n}", "github": "", "project": "", "reviewers": "EUeG;cNYr;Mxhk;fSDW", "pdf_size": 2827315, "rating": "4;6;7;7", "confidence": "3;3;5;4", "soundness": "2;2;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "78;63;54;161", "wc_strengths": "35;38;158;255", "wc_weaknesses": "446;291;60;273", "wc_questions": "8;2;158;77", "wc_limitations": "31;2;1;25", "wc_review": "598;396;431;791", "wc_reply_reviewers": "0;13;62;46", "wc_reply_authors": "70;24;14;38", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 89.0, 42.444080859408416 ], "wc_strengths_avg": [ 121.5, 91.66378783358235 ], "wc_weaknesses_avg": [ 267.5, 137.3872264804847 ], "wc_questions_avg": [ 61.25, 63.156056716676034 ], "wc_limitations_avg": [ 14.75, 13.423393758658799 ], "wc_review_avg": [ 554.0, 156.68280058768417 ], "wc_reply_reviewers_avg": [ 30.25, 24.843258642939738 ], "wc_reply_authors_avg": [ 36.5, 21.13646138784825 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7385489458759963, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15968993967582281346&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "email": "cuhk.edu.cn;cse.uta.edu;cuhk.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;University of Texas at Arlington", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.uta.edu", "aff_unique_abbr": "CUHK;UTA", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Shenzhen;Arlington", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "BanditPAM++: Faster $k$-medoids Clustering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71655", "id": "RWcfpmjlYm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e885e5bc6e13b9dd8f80bc5482b1fa2f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RWcfpmjlYm", "openreview": "https://openreview.net/forum?id=RWcfpmjlYm", "poster": "/media/PosterPDFs/NeurIPS%202023/71655.png?t=1702316469.9483883", "slides": "https://nips.cc/virtual/2023/poster/71655", "video": "https://nips.cc/virtual/2023/poster/71655", "author_site": "Mo Tiwari, Ryan Kang, Donghyun Lee, Sebastian Thrun, Ilan Shomorony, Martin Zhang", "tldr": "", "abstract": "Clustering is a fundamental task in data science with wide-ranging applications. In $k$-medoids clustering, cluster centers must be actual datapoints and arbitrary distance metrics may be used; these features allow for greater interpretability of the cluster centers and the clustering of exotic objects in $k$-medoids clustering, respectively. $k$-medoids clustering has recently grown in popularity due to the discovery of more efficient $k$-medoids algorithms. In particular, recent research has proposed BanditPAM, a randomized $k$-medoids algorithm with state-of-the-art complexity and clustering accuracy. In this paper, we present BanditPAM++, which accelerates BanditPAM via two algorithmic improvements, and is $O(k)$ faster than BanditPAM in complexity and substantially faster than BanditPAM in wall-clock runtime. First, we demonstrate that BanditPAM has a special structure that allows the reuse of clustering information $\\textit{within}$ each iteration. Second, we demonstrate that BanditPAM has additional structure that permits the reuse of information $\\textit{across}$ different iterations. These observations inspire our proposed algorithm, BanditPAM++, which returns the same clustering solutions as BanditPAM but often several times faster. For example, on the CIFAR10 dataset, BanditPAM++ returns the same results as BanditPAM but runs over 10$\\times$ faster. Finally, we provide a high-performance C++ implementation of BanditPAM++, callable from Python and R, that may be of interest to practitioners at https://github.com/motiwari/BanditPAM. Auxiliary code to reproduce all of our experiments via a one-line script is available at https://github.com/ThrunGroup/BanditPAM_plusplus_experiments.", "keywords": "multi-armed bandits;clustering;k-medoids;best-arm identification", "primary_area": "", "supplementary_material": "/attachment/d034565b12fed223a1ffa227dea48102ac4e3ac9.pdf", "author": "Mo Tiwari;Ryan Kang;Donghyun Lee;Sebastian Thrun;Ilan Shomorony;Martin Jinye Zhang", "authorids": "~Mo_Tiwari1;~Ryan_Kang1;~Donghyun_Lee2;~Sebastian_Thrun1;~Ilan_Shomorony1;~Martin_Jinye_Zhang1", "gender": ";M;M;M;M;M", "homepage": "http://www.motiwari.com/;;;http://robot.cc;http://www.ilanshomorony.com;https://mzhanglab.github.io/", "dblp": "267/5421;https://dblp.org/rec/conf/nips/TiwariKLPSTZ22.html;298/4489;t/SebastianThrun;31/9223;184/9278", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;;fMAg4zEAAAAJ;zjr6n-QAAAAJ", "orcid": ";;;;;0000-0003-0006-2466", "linkedin": "motiwari;ryan-kang-554819221/;donghyun-lee-aa789422a;sebastian-thrun-59a0b273/;;", "or_profile": "~Mo_Tiwari1;~Ryan_Kang1;~Donghyun_Lee2;~Sebastian_Thrun1;~Ilan_Shomorony1;~Martin_J._Zhang1", "aff": ";Stanford University;University College London, University of London;;University of Illinois, Urbana Champaign;Harvard University", "aff_domain": ";stanford.edu;ucl.ac.uk;;illinois.edu;harvard.edu", "position": ";Undergrad student;MS student;;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\ntiwari2023banditpam,\ntitle={Bandit{PAM}++: Faster \\$k\\$-medoids Clustering},\nauthor={Mo Tiwari and Ryan Kang and Donghyun Lee and Sebastian Thrun and Ilan Shomorony and Martin Jinye Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RWcfpmjlYm}\n}", "github": "", "project": "", "reviewers": "hYVb;zP71;mktW;s8tF;oPsi", "pdf_size": 320821, "rating": "4;4;6;6;7", "confidence": "2;4;2;4;2", "soundness": "2;3;2;3;3", "novelty": "2;2;2;3;3", "presentation": "2;2;3;3;2", "wc_summary": "193;63;204;66;50", "wc_strengths": "109;27;14;38;33", "wc_weaknesses": "243;125;52;274;152", "wc_questions": "79;178;66;3;181", "wc_limitations": "13;11;1;33;1", "wc_review": "637;404;337;414;417", "wc_reply_reviewers": "0;10;262;0;30", "wc_reply_authors": "21;89;259;21;157", "reply_reviewers": "0;1;2;0;1", "reply_authors": "2;3;3;2;3", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 115.2, 68.31515205282061 ], "wc_strengths_avg": [ 44.2, 33.37903533657017 ], "wc_weaknesses_avg": [ 169.2, 80.51683053871407 ], "wc_questions_avg": [ 101.4, 68.76219891771932 ], "wc_limitations_avg": [ 11.8, 11.702991070662234 ], "wc_review_avg": [ 441.8, 101.88503324826469 ], "wc_reply_reviewers_avg": [ 60.4, 101.39349091534426 ], "wc_reply_authors_avg": [ 109.4, 90.21219429766688 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.6, 0.4898979485566356 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.2721655269759087, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18207588810473506386&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";stanford.edu;ucl.ac.uk;;illinois.edu;harvard.edu", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Stanford University;University College London;University of Illinois Urbana-Champaign;Harvard University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.stanford.edu;https://www.ucl.ac.uk;https://illinois.edu;https://www.harvard.edu", "aff_unique_abbr": "Stanford;UCL;UIUC;Harvard", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Stanford;;Urbana-Champaign", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Modality-Agnostic Self-Supervised Learning with Meta-Learned Masked Auto-Encoder", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71654", "id": "RZGtK2nDDJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e9df55bf67e499635908395931ed6ea9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RZGtK2nDDJ", "openreview": "https://openreview.net/forum?id=RZGtK2nDDJ", "poster": "/media/PosterPDFs/NeurIPS%202023/71654.png?t=1702224658.8696535", "slides": "https://nips.cc/virtual/2023/poster/71654", "video": "https://nips.cc/virtual/2023/poster/71654", "author_site": "Huiwon Jang, Jihoon Tack, Daewon Choi, Jongheon Jeong, Jinwoo Shin", "tldr": "", "abstract": "Despite its practical importance across a wide range of modalities, recent advances in self-supervised learning (SSL) have been primarily focused on a few well-curated domains, e.g., vision and language, often relying on their domain-specific knowledge. For example, Masked Auto-Encoder (MAE) has become one of the popular architectures in these domains, but less has explored its potential in other modalities. In this paper, we develop MAE as a unified, modality-agnostic SSL framework. In turn, we argue meta-learning as a key to interpreting MAE as a modality-agnostic learner, and propose enhancements to MAE from the motivation to jointly improve its SSL across diverse modalities, coined MetaMAE as a result. Our key idea is to view the mask reconstruction of MAE as a meta-learning task: masked tokens are predicted by adapting the Transformer meta-learner through the amortization of unmasked tokens. Based on this novel interpretation, we propose to integrate two advanced meta-learning techniques. First, we adapt the amortized latent of the Transformer encoder using gradient-based meta-learning to enhance the reconstruction. Then, we maximize the alignment between amortized and adapted latents through task contrastive learning which guides the Transformer encoder to better encode the task-specific knowledge. Our experiment demonstrates the superiority of MetaMAE in the modality-agnostic SSL benchmark (called DABS), significantly outperforming prior baselines.", "keywords": "Self-Supervised Learning;Modality-Agnostic Self-Supervised Learning;Meta-Learning;Masked Auto-Encoder", "primary_area": "", "supplementary_material": "/attachment/55f4b2f90210b36a817f9731905f0535138a35e2.zip", "author": "Huiwon Jang;Jihoon Tack;Daewon Choi;Jongheon Jeong;Jinwoo Shin", "authorids": "~Huiwon_Jang1;~Jihoon_Tack1;~Daewon_Choi1;~Jongheon_Jeong1;~Jinwoo_Shin1", "gender": "M;M;;M;M", "homepage": "https://huiwon-jang.github.io/;https://jihoontack.github.io;https://ChoiDae1.github.io;https://jh-jeong.github.io;https://sites.google.com/site/mijirim/", "dblp": "332/0647;267/5487;45/7082;241/5923;31/7062", "google_scholar": "https://scholar.google.com/citations?hl=en;eW8-OT4AAAAJ;https://scholar.google.co.kr/citations?user=NlcfkbgAAAAJ;mZB2qfcAAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ", "orcid": ";;;0000-0002-4058-5774;", "linkedin": "huiwon-jang-5a789b250;;;jongheonj/;", "or_profile": "~Huiwon_Jang1;~Jihoon_Tack1;~Daewon_Choi1;~Jongheon_Jeong1;~Jinwoo_Shin1", "aff": "Korea Advanced Institute of Science & Technology;University of Oxford;Korea University;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;stats.ox.ac.uk;korea.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Intern;Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\njang2023modalityagnostic,\ntitle={Modality-Agnostic Self-Supervised Learning with Meta-Learned Masked Auto-Encoder},\nauthor={Huiwon Jang and Jihoon Tack and Daewon Choi and Jongheon Jeong and Jinwoo Shin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RZGtK2nDDJ}\n}", "github": "", "project": "", "reviewers": "Pe9M;9KfC;2wfj;3Z8m;b3vS;GPRA", "pdf_size": 710627, "rating": "5;5;5;6;7;8", "confidence": "3;3;3;5;2;4", "soundness": "3;3;3;3;3;4", "novelty": "3;2;3;2;3;4", "presentation": "3;2;2;3;3;3", "wc_summary": "86;82;65;75;84;45", "wc_strengths": "51;71;47;72;83;27", "wc_weaknesses": "57;82;80;302;76;22", "wc_questions": "1;49;8;14;2;37", "wc_limitations": "1;7;14;50;16;14", "wc_review": "196;291;214;513;261;145", "wc_reply_reviewers": "13;15;0;47;22;21", "wc_reply_authors": "45;44;0;81;58;55", "reply_reviewers": "1;1;0;1;1;1", "reply_authors": "2;2;1;2;2;2", "rating_avg": [ 6.0, 1.1547005383792515 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.6871842709362768 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 72.83333333333333, 14.276048317218445 ], "wc_strengths_avg": [ 58.5, 18.7949461292125 ], "wc_weaknesses_avg": [ 103.16666666666667, 91.26594229077034 ], "wc_questions_avg": [ 18.5, 18.172781845386247 ], "wc_limitations_avg": [ 17.0, 15.620499351813308 ], "wc_review_avg": [ 270.0, 118.18629362155326 ], "wc_reply_reviewers_avg": [ 19.666666666666668, 14.185281887302141 ], "wc_reply_authors_avg": [ 47.166666666666664, 24.368125811304317 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.372677996249965 ], "reply_authors_avg": [ 1.8333333333333333, 0.3726779962499649 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.15309310892394862, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12616942737908900034&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "kaist.ac.kr;stats.ox.ac.uk;korea.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;University of Oxford;Korea University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.ox.ac.uk;https://www.korea.ac.kr", "aff_unique_abbr": "KAIST;Oxford;KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "South Korea;United Kingdom" }, { "title": "ClimateLearn: Benchmarking Machine Learning for Weather and Climate Modeling", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73595", "id": "RZJEkLFlPx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ed73c36e771881b232ef35fa3a1dec14-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=RZJEkLFlPx", "openreview": "https://openreview.net/forum?id=RZJEkLFlPx", "poster": "/media/PosterPDFs/NeurIPS%202023/73595.png?t=1701764254.610469", "slides": "https://nips.cc/virtual/2023/poster/73595", "video": "https://nips.cc/virtual/2023/poster/73595", "author_site": "Tung Nguyen, Jason Jewik, Hritik Bansal, Prakhar Sharma, Aditya Grover", "tldr": "", "abstract": "Modeling weather and climate is an essential endeavor to understand the near- and long-term impacts of climate change, as well as to inform technology and policymaking for adaptation and mitigation efforts. In recent years, there has been a surging interest in applying data-driven methods based on machine learning for solving core problems such as weather forecasting and climate downscaling. Despite promising results, much of this progress has been impaired due to the lack of large-scale, open-source efforts for reproducibility, resulting in the use of inconsistent or underspecified datasets, training setups, and evaluations by both domain scientists and artificial intelligence researchers. We introduce ClimateLearn, an open-source PyTorch library that vastly simplifies the training and evaluation of machine learning models for data-driven climate science. ClimateLearn consists of holistic pipelines for dataset processing (e.g., ERA5, CMIP6, PRISM), implementing state-of-the-art deep learning models (e.g., Transformers, ResNets), and quantitative and qualitative evaluation for standard weather and climate modeling tasks. We supplement these functionalities with extensive documentation, contribution guides, and quickstart tutorials to expand access and promote community growth. We have also performed comprehensive forecasting and downscaling experiments to showcase the capabilities and key features of our library. To our knowledge, ClimateLearn is the first large-scale, open-source effort for bridging research in weather and climate modeling with modern machine learning systems. Our library is available publicly at https://github.com/aditya-grover/climate-learn.", "keywords": "weather modeling;climate modeling;machine learning;benchmarking", "primary_area": "", "supplementary_material": "/attachment/7e13c19af26e246897e6fdc828c8155532779b07.pdf", "author": "Tung Nguyen;Jason Kyle Jewik;Hritik Bansal;Prakhar Sharma;Aditya Grover", "authorids": "~Tung_Nguyen2;~Jason_Kyle_Jewik1;~Hritik_Bansal2;~Prakhar_Sharma2;~Aditya_Grover1", "gender": "M;M;M;M;M", "homepage": "https://tung-nd.github.io/;https://jasonjewik.github.io;https://sites.google.com/view/hbansal;;https://aditya-grover.github.io", "dblp": ";;239/5922;;162/5052", "google_scholar": "https://scholar.google.com.vn/citations?user=F9mgq3sAAAAJ;;gAKTYtoAAAAJ;https://scholar.google.co.in/citations?user=WLlTP5gAAAAJ;oOhnPUgAAAAJ", "orcid": ";;;;", "linkedin": "tung-nguyen-40703616b/;jasonjewik;hritik-bansal/;prakhar6sharma/;", "or_profile": "~Tung_Nguyen2;~Jason_Kyle_Jewik1;~Hritik_Bansal2;~Prakhar_Sharma2;~Aditya_Grover1", "aff": "University of California, Los Angeles;;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;;ucla.edu;ucla.edu;ucla.edu", "position": "PhD student;;PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2023climatelearn,\ntitle={ClimateLearn: Benchmarking Machine Learning for Weather and Climate Modeling},\nauthor={Tung Nguyen and Jason Kyle Jewik and Hritik Bansal and Prakhar Sharma and Aditya Grover},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=RZJEkLFlPx}\n}", "github": "", "project": "", "reviewers": "T3D2;ofXC;qd4b;9UcC;48aK", "pdf_size": 1810823, "rating": "6;6;6;7;8", "confidence": "4;4;3;4;4", "wc_summary_and_contributions": "56;36;75;49;110", "wc_strengths": "65;33;42;52;125", "wc_improvement": "77;292;121;45;299", "wc_limitations": "40;3;8;7;99", "wc_correctness": "1;14;1;9;82", "wc_clarity": "3;3;3;5;134", "wc_relation_to_prior_work": "1;13;1;5;2", "wc_documentation": "1;13;3;5;36", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "245;408;255;178;888", "wc_reply_reviewers": "336;88;0;22;27", "wc_reply_authors": "1125;1613;583;185;661", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;3;1;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 65.2, 25.701361831622854 ], "wc_strengths_avg": [ 63.4, 32.58588651548397 ], "wc_improvement_avg": [ 166.8, 107.84136497652466 ], "wc_limitations_avg": [ 31.4, 36.313083041790875 ], "wc_correctness_avg": [ 21.4, 30.70244289954791 ], "wc_clarity_avg": [ 29.6, 52.20574681009745 ], "wc_relation_to_prior_work_avg": [ 4.4, 4.5431266766402185 ], "wc_documentation_avg": [ 11.6, 12.86234815265082 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 394.8, 257.8397952217617 ], "wc_reply_reviewers_avg": [ 94.6, 124.18309063636642 ], "wc_reply_authors_avg": [ 833.4, 491.05014000608946 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.375, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1658529016136763013&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "email": "cs.ucla.edu;;ucla.edu;ucla.edu;ucla.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "GenS: Generalizable Neural Surface Reconstruction from Multi-View Images", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71653", "id": "Rcit6V3vus", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b29ab822442a1616f9bd390fddf6e425-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Rcit6V3vus", "openreview": "https://openreview.net/forum?id=Rcit6V3vus", "poster": "/media/PosterPDFs/NeurIPS%202023/71653.png?t=1701529789.416836", "slides": "https://nips.cc/virtual/2023/poster/71653", "video": "https://nips.cc/virtual/2023/poster/71653", "author_site": "Rui Peng, Xiaodong Gu, Luyang Tang, Shihe Shen, Fanqi Yu, Ronggang Wang", "tldr": "", "abstract": "Combining the signed distance function (SDF) and differentiable volume rendering has emerged as a powerful paradigm for surface reconstruction from multi-view images without 3D supervision. However, current methods are impeded by requiring long-time per-scene optimizations and cannot generalize to new scenes. In this paper, we present GenS, an end-to-end generalizable neural surface reconstruction model. Unlike coordinate-based methods that train a separate network for each scene, we construct a generalized multi-scale volume to directly encode all scenes. Compared with existing solutions, our representation is more powerful, which can recover high-frequency details while maintaining global smoothness. Meanwhile, we introduce a multi-scale feature-metric consistency to impose the multi-view consistency in a more discriminative multi-scale feature space, which is robust to the failures of the photometric consistency. And the learnable feature can be self-enhanced to continuously improve the matching accuracy and mitigate aggregation ambiguity. Furthermore, we design a view contrast loss to force the model to be robust to those regions covered by few viewpoints through distilling the geometric prior from dense input to sparse input. Extensive experiments on popular benchmarks show that our model can generalize well to new scenes and outperform existing state-of-the-art methods even those employing ground-truth depth supervision. Code will be available at https://github.com/prstrive/GenS.", "keywords": "Generalizable Neural Surface;Volume Rendering;Signed Distance Function", "primary_area": "", "supplementary_material": "/attachment/1140b66985343a960baa9b31a15f431b2d4bbf47.pdf", "author": "Rui Peng;Xiaodong Gu;Luyang Tang;Shihe Shen;Fanqi Yu;Ronggang Wang", "authorids": "~Rui_Peng1;~Xiaodong_Gu3;~Luyang_Tang1;~Shihe_Shen1;~Fanqi_Yu1;~Ronggang_Wang1", "gender": "M;M;F;Not Specified;M;M", "homepage": "https://prstrive.github.io/;;https://github.com/mush-room;https://github.com/ssh0731;http://www.ece.pku.edu.cn/2014/jsjyy_0415/48.html;https://www.pku.edu.cn", "dblp": ";71/4467-4;303/1242;;;", "google_scholar": "Hfz_H50AAAAJ;aJPO514AAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;;;", "orcid": ";0000-0003-2623-7973;;;0000-0003-0873-0465;", "linkedin": ";;;;;", "or_profile": "~Rui_Peng1;~Xiaodong_Gu3;~Luyang_Tang1;~Shihe_Shen1;~Ronggang_Wang1;~yu_fanqi1", "aff": "Peking University;Alibaba Group;Peking University;Peking University;Peking University Shenzhen Graduate School;Peking University", "aff_domain": "pku.edu.cn;alibaba-inc.com;pku.edu.cn;pku.edu.cn;pkusz.edu.cn;pku.edu.cn", "position": "PhD student;Researcher;PhD student;MS student;Full Professor;MS student", "bibtex": "@inproceedings{\npeng2023gens,\ntitle={GenS: Generalizable Neural Surface Reconstruction from Multi-View Images},\nauthor={Rui Peng and Xiaodong Gu and Luyang Tang and Shihe Shen and Fanqi Yu and Ronggang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Rcit6V3vus}\n}", "github": "", "project": "", "reviewers": "Tu79;XxbL;EdqW;cro4", "pdf_size": 33814415, "rating": "5;6;6;7", "confidence": "4;5;3;4", "soundness": "3;3;3;4", "novelty": "3;3;2;3", "presentation": "3;3;3;2", "wc_summary": "26;80;102;129", "wc_strengths": "34;31;40;31", "wc_weaknesses": "100;206;225;41", "wc_questions": "77;128;6;100", "wc_limitations": "7;11;1;1", "wc_review": "244;456;374;302", "wc_reply_reviewers": "0;132;15;27", "wc_reply_authors": "0;21;18;23", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.25, 37.844253196489426 ], "wc_strengths_avg": [ 34.0, 3.6742346141747673 ], "wc_weaknesses_avg": [ 143.0, 75.73968576644612 ], "wc_questions_avg": [ 77.75, 45.190568706313044 ], "wc_limitations_avg": [ 5.0, 4.242640687119285 ], "wc_review_avg": [ 344.0, 79.38513714795737 ], "wc_reply_reviewers_avg": [ 43.5, 51.98317035349037 ], "wc_reply_authors_avg": [ 15.5, 9.12414379544733 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=901674964088364591&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;alibaba-inc.com;pku.edu.cn;pku.edu.cn;pkusz.edu.cn;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Peking University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "Peking U;Alibaba", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning Environment-Aware Affordance for 3D Articulated Object Manipulation under Occlusions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71652", "id": "Re2NHYoZ5l", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bf78fc727cf882df66e6dbc826161e86-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Re2NHYoZ5l", "openreview": "https://openreview.net/forum?id=Re2NHYoZ5l", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71652", "video": "https://nips.cc/virtual/2023/poster/71652", "author_site": "Ruihai Wu, Kai Cheng, Yan Zhao, Chuanruo Ning, Guanqi Zhan, Hao Dong", "tldr": "", "abstract": "Perceiving and manipulating 3D articulated objects in diverse environments is essential for home-assistant robots. Recent studies have shown that point-level affordance provides actionable priors for downstream manipulation tasks. However, existing works primarily focus on single-object scenarios with homogeneous agents, overlooking the realistic constraints imposed by the environment and the agent's morphology, e.g., occlusions and physical limitations. In this paper, we propose an environment-aware affordance framework that incorporates both object-level actionable priors and environment constraints. Unlike object-centric affordance approaches, learning environment-aware affordance faces the challenge of combinatorial explosion due to the complexity of various occlusions, characterized by their quantities, geometries, positions and poses. To address this and enhance data efficiency, we introduce a novel contrastive affordance learning framework capable of training on scenes containing a single occluder and generalizing to scenes with complex occluder combinations. Experiments demonstrate the effectiveness of our proposed approach in learning affordance considering environment constraints.", "keywords": "Visual Affordance for Robotics;Articulated Object Manipulation;Occlusion Handling", "primary_area": "", "supplementary_material": "/attachment/f7452b1fe6324d67dc136f175f758c97ed246c74.zip", "author": "Ruihai Wu;Kai Cheng;Yan Zhao;Chuanruo Ning;Guanqi Zhan;Hao Dong", "authorids": "~Ruihai_Wu1;~Kai_Cheng2;~Yan_Zhao5;~Chuanruo_Ning1;~Guanqi_Zhan1;~Hao_Dong3", "gender": "M;M;F;M;;M", "homepage": "https://warshallrho.github.io/;https://chengkaiacademycity.github.io/;https://sxy7147.github.io;https://tritiumr.github.io;https://www.robots.ox.ac.uk/~guanqi/;https://zsdonghao.github.io", "dblp": "248/8028.html;;88/5320-35;342/8955;254/2030;14/1525-3.html", "google_scholar": "https://scholar.google.com/citations?hl=en;uF17d-wAAAAJ;iIs4TDMAAAAJ;jnLq85IAAAAJ;f_m4WJIAAAAJ;xLFL4sMAAAAJ", "orcid": ";0009-0000-4910-1388;;;;0000-0003-2261-9122", "linkedin": ";kai-cheng-939193231/;;;;", "or_profile": "~Ruihai_Wu1;~Kai_Cheng2;~Yan_Zhao5;~Chuanruo_Ning1;~Guanqi_Zhan1;~Hao_Dong3", "aff": "Peking University;Peking University;Peking University;Peking University;University of Oxford;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;ox.ac.uk;pku.edu.cn", "position": "PhD student;Undergrad student;PhD student;Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwu2023learning,\ntitle={Learning Environment-Aware Affordance for 3D Articulated Object Manipulation under Occlusions},\nauthor={Ruihai Wu and Kai Cheng and Yan Zhao and Chuanruo Ning and Guanqi Zhan and Hao Dong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Re2NHYoZ5l}\n}", "github": "", "project": "", "reviewers": "ntD4;gQVm;wNB2;zyYG;eVFQ", "pdf_size": 8274310, "rating": "5;5;6;6;6", "confidence": "4;4;4;3;4", "soundness": "3;3;3;2;3", "novelty": "2;3;3;2;3", "presentation": "3;3;2;3;2", "wc_summary": "79;55;120;69;101", "wc_strengths": "85;67;80;68;89", "wc_weaknesses": "238;69;240;74;205", "wc_questions": "96;106;165;1;160", "wc_limitations": "5;14;22;1;38", "wc_review": "503;311;627;213;593", "wc_reply_reviewers": "21;20;53;23;96", "wc_reply_authors": "47;15;19;19;27", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 84.8, 23.120553626589484 ], "wc_strengths_avg": [ 77.8, 8.885943956609225 ], "wc_weaknesses_avg": [ 165.2, 77.52522170235956 ], "wc_questions_avg": [ 105.6, 59.196621525218816 ], "wc_limitations_avg": [ 16.0, 13.19090595827292 ], "wc_review_avg": [ 449.4, 161.29178528369013 ], "wc_reply_reviewers_avg": [ 42.6, 29.39795911283639 ], "wc_reply_authors_avg": [ 25.4, 11.48216007552586 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8215167892305151785&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;ox.ac.uk;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Peking University;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.ox.ac.uk", "aff_unique_abbr": "Peking U;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Species196: A One-Million Semi-supervised Dataset for Fine-grained Species Recognition", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73594", "id": "Rep7BB4vDa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8cb92f326d01fd7f4371283ee2fa6386-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Rep7BB4vDa", "openreview": "https://openreview.net/forum?id=Rep7BB4vDa", "poster": "/media/PosterPDFs/NeurIPS%202023/73594.png?t=1697478479.1884959", "slides": "https://nips.cc/virtual/2023/poster/73594", "video": "https://nips.cc/virtual/2023/poster/73594", "author_site": "Wei He, Kai Han, Ying Nie, Chengcheng Wang, Yunhe Wang", "tldr": "", "abstract": "The development of foundation vision models has pushed the general visual recognition to a high level, but cannot well address the fine-grained recognition in specialized domain such as invasive species classification. Identifying and managing invasive species has strong social and ecological value. Currently, most invasive species datasets are limited in scale and cover a narrow range of species, which restricts the development of deep-learning based invasion biometrics systems. To fill the gap of this area, we introduced Species196, a large-scale semi-supervised dataset of 196-category invasive species. It collects over 19K images with expert-level accurate annotations (Species196-L), and 1.2M unlabeled images of invasive species (Species196-U). The dataset provides four experimental settings for benchmarking the existing models and algorithms, namely, supervised learning, semi-supervised learning and self-supervised pretraining. To facilitate future research on these four learning paradigms, we conduct an empirical study of the representative methods on the introduced dataset. The dataset will be made publicly available at https://species-dataset.github.io/.", "keywords": "Invasion biometrics;Fine-grained dataset;Pre-training", "primary_area": "", "supplementary_material": "/attachment/adc929a177018d51634ee5c0dbbdaaafd5147a46.pdf", "author": "Wei He;Kai Han;Ying Nie;Chengcheng Wang;Yunhe Wang", "authorids": "~Wei_He10;~Kai_Han2;~Ying_Nie1;~Chengcheng_Wang1;~Yunhe_Wang1", "gender": ";M;M;M;M", "homepage": ";https://iamhankai.github.io;;;https://www.wangyunhe.site/", "dblp": ";51/4757-2;;;63/8217-1", "google_scholar": ";vThoBVcAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;OfmE9XUAAAAJ;https://scholar.google.com.sg/citations?user=isizOkYAAAAJ", "orcid": ";0000-0002-9761-2702;;;0000-0002-0142-509X", "linkedin": ";;;;", "or_profile": "~Wei_He10;~Kai_Han2;~Ying_Nie1;~Chengcheng_Wang1;~Yunhe_Wang1", "aff": ";Institute of Software, Chinese Academy of Sciences;Huawei Noah's Ark Lab;Huawei Technologies Ltd.;Huawei Noah's Ark Lab", "aff_domain": ";ios.ac.cn;huawei.com;huawei.com;huawei.com", "position": ";PhD student;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nhe2023species,\ntitle={Species196: A One-Million Semi-supervised Dataset for Fine-grained Species Recognition},\nauthor={Wei He and Kai Han and Ying Nie and Chengcheng Wang and Yunhe Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Rep7BB4vDa}\n}", "github": "", "project": "", "reviewers": "fB1t;3Z3U;QxhT;foZN;H3At", "pdf_size": 43247014, "rating": "6;6;6;7;8", "confidence": "4;4;4;4;3", "wc_summary_and_contributions": "97;59;86;45;87", "wc_strengths": "151;76;94;36;59", "wc_improvement": "124;293;57;156;46", "wc_limitations": "15;13;137;75;10", "wc_correctness": "13;9;33;18;253", "wc_clarity": "1;5;30;14;6", "wc_relation_to_prior_work": "1;14;26;7;15", "wc_documentation": "9;22;44;21;13", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "412;492;508;373;490", "wc_reply_reviewers": "13;0;48;25;106", "wc_reply_authors": "860;1687;766;1677;2345", "reply_reviewers": "1;0;1;1;1", "reply_authors": "3;3;2;3;5", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 74.8, 19.518196638009364 ], "wc_strengths_avg": [ 83.2, 38.93276255289368 ], "wc_improvement_avg": [ 135.2, 88.91659012805202 ], "wc_limitations_avg": [ 50.0, 49.775495979447555 ], "wc_correctness_avg": [ 65.2, 94.25157823612292 ], "wc_clarity_avg": [ 11.2, 10.303397497913007 ], "wc_relation_to_prior_work_avg": [ 12.6, 8.404760555780278 ], "wc_documentation_avg": [ 21.8, 12.12270596855339 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 455.0, 52.86965102967864 ], "wc_reply_reviewers_avg": [ 38.4, 37.312732411336476 ], "wc_reply_authors_avg": [ 1467.0, 587.0662654249518 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 3.2, 0.9797958971132712 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8750000000000001, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5628856658850275750&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";ios.ac.cn;huawei.com;huawei.com;huawei.com", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Chinese Academy of Sciences;Huawei", "aff_unique_dep": "Institute of Software;Noah's Ark Lab", "aff_unique_url": "http://www.ios.ac.cn;https://www.huawei.com", "aff_unique_abbr": "CAS;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "DeepPCR: Parallelizing Sequential Operations in Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71651", "id": "RgD92idA32", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/948d8ba4e30c8c3a800cf436b31f376e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RgD92idA32", "openreview": "https://openreview.net/forum?id=RgD92idA32", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71651", "video": "https://nips.cc/virtual/2023/poster/71651", "author_site": "Federico Danieli, Miguel Sarabia, Xavier Suau Cuadros, Pau Rodriguez, Luca Zappella", "tldr": "", "abstract": "Parallelization techniques have become ubiquitous for accelerating inference and training of deep neural networks. Despite this, several operations are still performed in a sequential manner. For instance, the forward and backward passes are executed layer-by-layer, and the output of diffusion models is produced by applying a sequence of denoising steps. This sequential approach results in a computational cost proportional to the number of steps involved, presenting a potential bottleneck as the number of steps increases. In this work, we introduce DeepPCR, a novel algorithm which parallelizes typically sequential operations in order to speed up inference and training of neural networks. DeepPCR is based on interpreting a sequence of $L$ steps as the solution of a specific system of equations, which we recover using the Parallel Cyclic Reduction algorithm. This reduces the complexity of computing the sequential operations from $\\mathcal{O}(L)$ to $\\mathcal{O}(\\log_2L)$, thus yielding a speedup for large $L$. To verify the theoretical lower complexity of the algorithm, and to identify regimes for speedup, we test the effectiveness of DeepPCR in parallelizing the forward and backward pass in multi-layer perceptrons, and reach speedups of up to $30\\times$ for the forward and $200\\times$ for the backward pass. We additionally showcase the flexibility of DeepPCR by parallelizing training of ResNets with as many as 1024 layers, and generation in diffusion models, enabling up to $7\\times$ faster training and $11\\times$ faster generation, respectively, when compared to the sequential approach.", "keywords": "Acceleration;layer-parallelization;diffusion;Parallel Cyclic Reduction", "primary_area": "", "supplementary_material": "", "author": "Federico Danieli;Miguel Sarabia;Xavier Suau;Pau Rodriguez;Luca Zappella", "authorids": "~Federico_Danieli1;~Miguel_Sarabia1;~Xavier_Suau1;~Pau_Rodriguez2;~Luca_Zappella1", "gender": "M;;M;;M", "homepage": ";;;;http://www.cis.jhu.edu/~luca/", "dblp": "277/1368;;21/8106;;38/2520", "google_scholar": ";;;;bmh6mxAAAAAJ", "orcid": ";;;;", "linkedin": "federico-danieli-4782a0b5/;;;;zappella?trk=people-guest_profile-result-card_result-card_full-click", "or_profile": "~Federico_Danieli1;~Miguel_Sarabia1;~Xavier_Suau1;~Pau_Rodriguez2;~Luca_Zappella1", "aff": "Apple;;Apple;;Apple", "aff_domain": "apple.com;;apple.com;;apple.com", "position": "Researcher;;Research scientist;;Principal Researcher", "bibtex": "@inproceedings{\ndanieli2023deeppcr,\ntitle={Deep{PCR}: Parallelizing Sequential Operations in Neural Networks},\nauthor={Federico Danieli and Miguel Sarabia and Xavier Suau and Pau Rodriguez and Luca Zappella},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RgD92idA32}\n}", "github": "", "project": "", "reviewers": "Cpwv;uCYF;7Bpu", "pdf_size": 3806131, "rating": "5;5;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "3;3;4", "wc_summary": "129;105;89", "wc_strengths": "44;85;78", "wc_weaknesses": "66;45;106", "wc_questions": "213;56;90", "wc_limitations": "4;64;7", "wc_review": "456;355;370", "wc_reply_reviewers": "6;0;6", "wc_reply_authors": "61;0;0", "reply_reviewers": "1;0;1", "reply_authors": "2;1;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 107.66666666666667, 16.438437341250605 ], "wc_strengths_avg": [ 69.0, 17.90716802475106 ], "wc_weaknesses_avg": [ 72.33333333333333, 25.30261295246446 ], "wc_questions_avg": [ 119.66666666666667, 67.44050876307371 ], "wc_limitations_avg": [ 25.0, 27.60434748368452 ], "wc_review_avg": [ 393.6666666666667, 44.4996878890428 ], "wc_reply_reviewers_avg": [ 4.0, 2.8284271247461903 ], "wc_reply_authors_avg": [ 20.333333333333332, 28.755675768252935 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=639651407636698225&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "apple.com;;apple.com;;apple.com", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Apple", "aff_unique_dep": "Apple Inc.", "aff_unique_url": "https://www.apple.com", "aff_unique_abbr": "Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Reusing Pretrained Models by Multi-linear Operators for Efficient Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71650", "id": "RgNXKIrWyU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/09d9a13f7018110cfb439c06b07940a2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RgNXKIrWyU", "openreview": "https://openreview.net/forum?id=RgNXKIrWyU", "poster": "/media/PosterPDFs/NeurIPS%202023/71650.png?t=1699603153.0288587", "slides": "https://nips.cc/virtual/2023/poster/71650", "video": "https://nips.cc/virtual/2023/poster/71650", "author_site": "Yu Pan, Ye Yuan, Yichun Yin, Zenglin Xu, Lifeng Shang, Xin Jiang, Qun Liu", "tldr": "", "abstract": "Training large models from scratch usually costs a substantial amount of resources. Towards this problem, recent studies such as bert2BERT and LiGO have reused small pretrained models to initialize a large model (termed the ``target model''), leading to a considerable acceleration in training. Despite the successes of these previous studies, they grew pretrained models by mapping partial weights only, ignoring potential correlations across the entire model. As we show in this paper, there are inter- and intra-interactions among the weights of both the pretrained and the target models. As a result, the partial mapping may not capture the complete information and lead to inadequate growth. In this paper, we propose a method that linearly correlates each weight of the target model to all the weights of the pretrained model to further enhance acceleration ability. We utilize multi-linear operators to reduce computational and spacial complexity, enabling acceptable resource requirements. Experiments demonstrate that our method can save 76\\% computational costs on DeiT-base transferred from DeiT-small, which outperforms bert2BERT by +12\\% and LiGO by +21\\%, respectively.", "keywords": "Model Growth;Efficient Training;Pretrained Model;Multi-linearity", "primary_area": "", "supplementary_material": "/attachment/6a8806e8e6b80679849be50372eac1f1f0f7fe9a.pdf", "author": "Yu Pan;Ye Yuan;Yichun Yin;Zenglin Xu;Lifeng Shang;Xin Jiang;Qun Liu", "authorids": "~Yu_Pan1;~Ye_Yuan12;~Yichun_Yin2;~Zenglin_Xu1;~Lifeng_Shang1;~Xin_Jiang1;~Qun_Liu1", "gender": "M;M;M;M;M;M;M", "homepage": "https://yupan.me;https://github.com/yuanyehome;;https://faculty.fudan.edu.cn/xuzenglin/en/index.htm;;;http://liuquncn.github.io/", "dblp": ";33/6315-16;180/5934;68/1538;70/4288;42/4142-2;75/4402-1", "google_scholar": "NuxEyPAAAAAJ;h8WQaTkAAAAJ;x3Mz21gAAAAJ;gF0H9nEAAAAJ;https://scholar.google.com.hk/citations?user=jMQIjYoAAAAJ;DUfcez0AAAAJ;2HhiGzcAAAAJ", "orcid": "0000-0001-7515-8492;;;0000-0001-5550-6461;;0000-0002-9117-8247;0000-0002-7000-1792", "linkedin": ";%E9%87%8E-%E8%A2%81-0641241a4/;;;;xin-jiang-9577b76/;qunliu/", "or_profile": "~Yu_Pan1;~Ye_Yuan12;~Yichun_Yin2;~Zenglin_Xu1;~Lifeng_Shang1;~Xin_Jiang1;~Qun_Liu1", "aff": "Harbin Institute of Technology, Shenzhen;Peking University;Huawei Noah's Ark Lab;Harbin Institute of Technology Shenzhen;Huawei Technologies Ltd.;Noah\u2019s Ark Lab, Huawei Technologies;Huawei Noah's Ark Lab", "aff_domain": "hit.edu.cn;pku.edu.cn;huawei.com;hit.edu.cn;huawei.com;huawei.com;huawei.com", "position": "PhD Candidate;PhD student;Researcher;Full Professor;Researcher;Principal Researcher;Chief Scientist of Speech and Language Computing", "bibtex": "@inproceedings{\npan2023reusing,\ntitle={Reusing Pretrained Models by Multi-linear Operators for Efficient Training},\nauthor={Yu Pan and Ye Yuan and Yichun Yin and Zenglin Xu and Lifeng Shang and Xin Jiang and Qun Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RgNXKIrWyU}\n}", "github": "", "project": "", "reviewers": "9N2L;kqva;cdCX;ShVL;u4ot", "pdf_size": 1610088, "rating": "5;5;6;6;6", "confidence": "4;5;3;2;4", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;2", "presentation": "3;3;3;3;3", "wc_summary": "70;76;172;53;69", "wc_strengths": "26;35;79;28;65", "wc_weaknesses": "39;255;152;67;79", "wc_questions": "266;4;36;33;31", "wc_limitations": "14;12;1;3;14", "wc_review": "415;382;440;184;258", "wc_reply_reviewers": "18;27;0;0;17", "wc_reply_authors": "32;43;0;0;42", "reply_reviewers": "1;1;0;0;1", "reply_authors": "2;2;1;1;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.0, 42.68489194082609 ], "wc_strengths_avg": [ 46.6, 21.415881957089695 ], "wc_weaknesses_avg": [ 118.4, 77.82955736736525 ], "wc_questions_avg": [ 74.0, 96.68298712803613 ], "wc_limitations_avg": [ 8.8, 5.635601121442148 ], "wc_review_avg": [ 335.8, 98.34714027362463 ], "wc_reply_reviewers_avg": [ 12.4, 10.707007051459337 ], "wc_reply_authors_avg": [ 23.4, 19.489484344127735 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7205766921228919, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5374113563704491386&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "hit.edu.cn;pku.edu.cn;huawei.com;hit.edu.cn;huawei.com;huawei.com;huawei.com", "author_num": 7, "aff_unique_index": "0;1;2;0;2;2;2", "aff_unique_norm": "Harbin Institute of Technology;Peking University;Huawei", "aff_unique_dep": ";;Noah's Ark Lab", "aff_unique_url": "http://en.hhit.edu.cn/;http://www.pku.edu.cn;https://www.huawei.com", "aff_unique_abbr": "HIT;Peking U;Huawei", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "WildfireSpreadTS: A dataset of multi-modal time series for wildfire spread prediction", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73593", "id": "RgdGkPRQ03", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ebd545176bdaa9cd5d45954947bd74b7-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=RgdGkPRQ03", "openreview": "https://openreview.net/forum?id=RgdGkPRQ03", "poster": "/media/PosterPDFs/NeurIPS%202023/73593.png?t=1699459977.2061107", "slides": "https://nips.cc/virtual/2023/poster/73593", "video": "https://nips.cc/virtual/2023/poster/73593", "author_site": "Sebastian Gerard, Yu Zhao, Josephine Sullivan", "tldr": "", "abstract": "We present a multi-temporal, multi-modal remote-sensing dataset for predicting how active wildfires will spread at a resolution of 24 hours. The dataset consists of 13607 images across 607 fire events in the United States from January 2018 to October 2021. For each fire event, the dataset contains a full time series of daily observations, containing detected active fires and variables related to fuel, topography and weather conditions. The dataset is challenging due to: a) its inputs being multi-temporal, b) the high number of 23 multi-modal input channels, c) highly imbalanced labels and d) noisy labels, due to smoke, clouds, and inaccuracies in the active fire detection. The underlying complexity of the physical processes adds to these challenges. Compared to existing public datasets in this area, WildfireSpreadTS allows for multi-temporal modeling of spreading wildfires, due to its time series structure. Furthermore, we provide additional input modalities and a high spatial resolution of 375m for the active fire maps. We publish this dataset to encourage further research on this important task with multi-temporal, noise-resistant or generative methods, uncertainty estimation or advanced optimization techniques that deal with the high-dimensional input space.", "keywords": "remote sensing;satellite images;earth observation;computer vision;multi-temporal;climate change", "primary_area": "", "supplementary_material": "/attachment/b9baa991d6a5b54f00b05f8dfc4defd843ef708a.pdf", "author": "Sebastian Gerard;Yu Zhao;Josephine Sullivan", "authorids": "~Sebastian_Gerard1;~Yu_Zhao12;~Josephine_Sullivan1", "gender": "M;M;F", "homepage": "https://www.kth.se/profile/sgerard?l=en;;", "dblp": ";;32/3671", "google_scholar": ";tIO7m7wAAAAJ;REbc02cAAAAJ", "orcid": "0000-0002-5329-8184;;", "linkedin": ";yu-zhao-81b49113a/;", "or_profile": "~Sebastian_Gerard1;~Yu_Zhao12;~Josephine_Sullivan1", "aff": "KTH Royal Institute of Technology;KTH Royal Institute of Technology;KTH Royal Institute of Technology, Stockholm, Sweden", "aff_domain": "kth.se;kth.se;kth.se", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\ngerard2023wildfirespreadts,\ntitle={WildfireSpread{TS}: A dataset of multi-modal time series for wildfire spread prediction},\nauthor={Sebastian Gerard and Yu Zhao and Josephine Sullivan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=RgdGkPRQ03}\n}", "github": "", "project": "", "reviewers": "ANKW;3tFe;KxL4;hHGN;n9fn", "pdf_size": 2305741, "rating": "4;6;7;7;8", "confidence": "4;4;5;3;5", "wc_summary_and_contributions": "101;466;60;92;115", "wc_strengths": "72;354;113;70;71", "wc_improvement": "205;652;142;42;65", "wc_limitations": "5;661;15;13;112", "wc_correctness": "30;549;81;10;33", "wc_clarity": "86;35;121;6;9", "wc_relation_to_prior_work": "123;112;56;16;13", "wc_documentation": "47;110;21;75;60", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "670;2940;610;325;479", "wc_reply_reviewers": "0;234;370;0;71", "wc_reply_authors": "803;795;802;366;449", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 166.8, 150.68828753423406 ], "wc_strengths_avg": [ 136.0, 110.20889256316842 ], "wc_improvement_avg": [ 221.2, 222.9954259620587 ], "wc_limitations_avg": [ 161.2, 252.96513593774142 ], "wc_correctness_avg": [ 140.6, 205.5281975788237 ], "wc_clarity_avg": [ 51.4, 45.098115259952934 ], "wc_relation_to_prior_work_avg": [ 64.0, 46.376718297007606 ], "wc_documentation_avg": [ 62.6, 29.601351320505625 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 1004.8, 974.8385302192358 ], "wc_reply_reviewers_avg": [ 135.0, 145.30794885346089 ], "wc_reply_authors_avg": [ 643.0, 194.0876090841453 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3152441624956403, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15062651558851250535&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 4, "email": "kth.se;kth.se;kth.se", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "KTH Royal Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kth.se", "aff_unique_abbr": "KTH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stockholm", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Sweden" }, { "title": "Feature Selection in the Contrastive Analysis Setting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71649", "id": "RhE01dqo8u", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d083980ec9f874025550136b776a96a9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RhE01dqo8u", "openreview": "https://openreview.net/forum?id=RhE01dqo8u", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71649", "video": "https://nips.cc/virtual/2023/poster/71649", "author_site": "Ethan Weinberger, Ian Covert, Su-In Lee", "tldr": "", "abstract": "Contrastive analysis (CA) refers to the exploration of variations uniquely enriched in a _target_ dataset as compared to a corresponding _background_ dataset generated from sources of variation that are irrelevant to a given task. For example, a biomedical data analyst may wish to find a small set of genes to use as a proxy for variations in genomic data only present among patients with a given disease (target) as opposed to healthy control subjects (background). However, as of yet the problem of feature selection in the CA setting has received little attention from the machine learning community. In this work we present contrastive feature selection (CFS),\na method for performing feature selection in the CA setting. We motivate our approach with a novel information-theoretic analysis of representation learning in the CA setting, and we empirically validate CFS on a semi-synthetic dataset and four real-world biomedical datasets. We find that our method consistently outperforms previously proposed state-of-the-art supervised and fully unsupervised feature selection methods not designed for the CA setting. An open-source implementation of our method is available at https://github.com/suinleelab/CFS.", "keywords": "Feature selection;contrastive analysis;computational biology;representation learning;information theory", "primary_area": "", "supplementary_material": "", "author": "Ethan Weinberger;Ian Connick Covert;Su-In Lee", "authorids": "~Ethan_Weinberger2;~Ian_Connick_Covert1;~Su-In_Lee2", "gender": "M;M;F", "homepage": "https://homes.cs.washington.edu/~ewein/;https://iancovert.com;http://suinlee.cs.washington.edu/", "dblp": "217/3451;262/3443;17/1784", "google_scholar": "Jg40o3gAAAAJ;Np8Ek3cAAAAJ;", "orcid": ";;", "linkedin": ";ian-covert/;", "or_profile": "~Ethan_Weinberger2;~Ian_Connick_Covert1;~Su-In_Lee2", "aff": "University of Washington;University of Washington;University of Washington", "aff_domain": "cs.washington.edu;uw.edu;uw.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nweinberger2023feature,\ntitle={Feature Selection in the Contrastive Analysis Setting},\nauthor={Ethan Weinberger and Ian Connick Covert and Su-In Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RhE01dqo8u}\n}", "github": "", "project": "", "reviewers": "nNUw;BGi6;SoSQ;xS67", "pdf_size": 3783550, "rating": "4;5;7;7", "confidence": "2;3;3;4", "soundness": "3;3;2;3", "novelty": "2;2;2;3", "presentation": "3;3;1;3", "wc_summary": "219;56;151;176", "wc_strengths": "20;42;89;64", "wc_weaknesses": "66;41;131;115", "wc_questions": "71;149;60;128", "wc_limitations": "72;12;25;11", "wc_review": "448;300;456;494", "wc_reply_reviewers": "245;0;34;21", "wc_reply_authors": "839;0;39;38", "reply_reviewers": "2;0;1;1", "reply_authors": "3;1;2;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 150.5, 59.73483071039877 ], "wc_strengths_avg": [ 53.75, 25.616157010761782 ], "wc_weaknesses_avg": [ 88.25, 36.29996556472196 ], "wc_questions_avg": [ 102.0, 37.44996662214801 ], "wc_limitations_avg": [ 30.0, 24.869660230891775 ], "wc_review_avg": [ 424.5, 73.9509972887452 ], "wc_reply_reviewers_avg": [ 75.0, 98.89641045053152 ], "wc_reply_authors_avg": [ 229.0, 352.5343954850363 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12145438801910128468&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "cs.washington.edu;uw.edu;uw.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "SimMMDG: A Simple and Effective Framework for Multi-modal Domain Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71648", "id": "RiSMijlsLT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f88bec15cc4cb56b432ee040bb63f94f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RiSMijlsLT", "openreview": "https://openreview.net/forum?id=RiSMijlsLT", "poster": "/media/PosterPDFs/NeurIPS%202023/71648.png?t=1699891928.1965616", "slides": "https://nips.cc/virtual/2023/poster/71648", "video": "https://nips.cc/virtual/2023/poster/71648", "author_site": "Hao Dong, Ismail Nejjar, Han Sun, Eleni Chatzi, Olga Fink", "tldr": "", "abstract": "In real-world scenarios, achieving domain generalization (DG) presents significant challenges as models are required to generalize to unknown target distributions. Generalizing to unseen multi-modal distributions poses even greater difficulties due to the distinct properties exhibited by different modalities. To overcome the challenges of achieving domain generalization in multi-modal scenarios, we propose SimMMDG, a simple yet effective multi-modal DG framework. We argue that mapping features from different modalities into the same embedding space impedes model generalization. To address this, we propose splitting the features within each modality into modality-specific and modality-shared components. We employ supervised contrastive learning on the modality-shared features to ensure they possess joint properties and impose distance constraints on modality-specific features to promote diversity. In addition, we introduce a cross-modal translation module to regularize the learned features, which can also be used for missing-modality generalization. We demonstrate that our framework is theoretically well-supported and achieves strong performance in multi-modal DG on the EPIC-Kitchens dataset and the novel Human-Animal-Cartoon (HAC) dataset introduced in this paper. Our source code and HAC dataset are available at https://github.com/donghao51/SimMMDG.", "keywords": "Domain Generalization;Multi-modal Learning;Distribution Shift;Out-of-distribution Generalization", "primary_area": "", "supplementary_material": "", "author": "Hao Dong;Ismail Nejjar;Han Sun;Eleni Chatzi;Olga Fink", "authorids": "~Hao_Dong4;~Ismail_Nejjar1;~Han_Sun6;~Eleni_Chatzi1;~Olga_Fink1", "gender": "M;M;F;F;F", "homepage": "https://sites.google.com/view/dong-hao/;https://people.epfl.ch/ismail.nejjar?lang=en;https://people.epfl.ch/han.sun?lang=en;https://chatzi.ibk.ethz.ch/;", "dblp": ";287/4268;;281/5425;", "google_scholar": "5jcoGEIAAAAJ;UWdDYtAAAAAJ;;2n9Mwt8AAAAJ;eAcIoUgAAAAJ", "orcid": ";;;0000-0002-6870-240X;0000-0002-9546-1488", "linkedin": "hao-dong-276317100/;;%E8%8F%A1-%E5%AD%99-633210214;eleni-chatzi-88065010/;", "or_profile": "~Hao_Dong4;~Ismail_Nejjar1;~Han_Sun6;~Eleni_Chatzi1;~Olga_Fink1", "aff": "ETH Zurich;Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne;Swiss Federal Institute of Technology;EPFL - EPF Lausanne", "aff_domain": "ethz.ch;epfl.ch;epfl.ch;ethz.ch;epfl.ch", "position": "PhD student;PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ndong2023simmmdg,\ntitle={Sim{MMDG}: A Simple and Effective Framework for Multi-modal Domain Generalization},\nauthor={Hao Dong and Ismail Nejjar and Han Sun and Eleni Chatzi and Olga Fink},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RiSMijlsLT}\n}", "github": "", "project": "", "reviewers": "8EtQ;Zavq;c1hR;kbvb;93nE", "pdf_size": 9266887, "rating": "5;5;6;6;6", "confidence": "5;2;4;4;4", "soundness": "3;3;3;3;3", "novelty": "3;3;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "105;65;127;82;160", "wc_strengths": "46;31;146;101;100", "wc_weaknesses": "146;46;106;368;420", "wc_questions": "0;3;26;2;107", "wc_limitations": "0;19;9;12;18", "wc_review": "297;164;414;565;805", "wc_reply_reviewers": "208;25;73;92;168", "wc_reply_authors": "1259;163;167;168;665", "reply_reviewers": "3;1;1;1;2", "reply_authors": "4;3;3;3;3", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 107.8, 33.46281518342412 ], "wc_strengths_avg": [ 84.8, 41.56633253006573 ], "wc_weaknesses_avg": [ 217.2, 148.73654561001476 ], "wc_questions_avg": [ 27.6, 40.81470323302621 ], "wc_limitations_avg": [ 11.6, 6.887670143089026 ], "wc_review_avg": [ 449.0, 221.69618851031245 ], "wc_reply_reviewers_avg": [ 113.2, 66.08297814112194 ], "wc_reply_authors_avg": [ 484.4, 432.84436001870233 ], "reply_reviewers_avg": [ 1.6, 0.8 ], "reply_authors_avg": [ 3.2, 0.39999999999999997 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.25, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11721115861529432465&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": "ethz.ch;epfl.ch;epfl.ch;ethz.ch;epfl.ch", "author_num": 5, "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology Lausanne;EPFL;Swiss Federal Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ethz.ch;https://www.epfl.ch;https://www.epfl.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;EPFL;EPFL;ETH Zurich", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Active representation learning for general task space with applications in robotics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71647", "id": "RiwPYAMLur", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ff4039889b7f89635e9cbd5cefffa0d4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RiwPYAMLur", "openreview": "https://openreview.net/forum?id=RiwPYAMLur", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71647", "video": "https://nips.cc/virtual/2023/poster/71647", "author_site": "Yifang Chen, Yingbing Huang, Simon Du, Kevin Jamieson, Guanya Shi", "tldr": "", "abstract": "Representation learning based on multi-task pretraining has become a powerful approach in many domains. In particular, task-aware representation learning aims to learn an optimal representation for a specific target task by sampling data from a set of source tasks, while task-agnostic representation learning seeks to learn a universal representation for a class of tasks. In this paper, we propose a general and versatile algorithmic and theoretic framework for \\emph{active representation learning}, where the learner optimally chooses which source tasks to sample from. This framework, along with a tractable meta algorithm, allows most arbitrary target and source task spaces (from discrete to continuous), covers both task-aware and task-agnostic settings, and is compatible with deep representation learning practices. \nWe provide several instantiations under this framework, from bilinear and feature-based nonlinear to general nonlinear cases. In the bilinear case, by leveraging the non-uniform spectrum of the task representation and the calibrated source-target relevance, we prove that the sample complexity to achieve $\\varepsilon$-excess risk on target scales with $(k^*)^2 ||v^*||_2^2 \\varepsilon^{-2}$\n where $k^*$ is the effective dimension of the target and $||v^*||_2^2 \\in (0,1]$ represents the connection between source and target space. Compared to the passive one, this can save up to $\\frac{1}{d_W}$ of sample complexity, where $d_W$ is the task space dimension. \nFinally, we demonstrate different instantiations of our meta algorithm in synthetic datasets and robotics problems, from pendulum simulations to real-world drone flight datasets. On average, our algorithms outperform baselines by 20%-70%.", "keywords": "active learning;representation learning;robotics;theory", "primary_area": "", "supplementary_material": "/attachment/c2c132c9ad34b53b9decde45f9ce95d4687b6048.pdf", "author": "Yifang Chen;Yingbing Huang;Simon Shaolei Du;Kevin Jamieson;Guanya Shi", "authorids": "~Yifang_Chen1;~Yingbing_Huang1;~Simon_Shaolei_Du1;~Kevin_Jamieson1;~Guanya_Shi1", "gender": "F;;M;M;M", "homepage": ";https://wendyh1108.github.io/;http://simonshaoleidu.com;;http://guanyashi.github.io", "dblp": "20/8403-1;;176/5602;85/10260;230/4386", "google_scholar": "LUz2mN4AAAAJ;;OttawxUAAAAJ;;joR1Z4UAAAAJ", "orcid": ";;;;0000-0002-9075-3705", "linkedin": ";;;;guanya-shi-b07b43126/", "or_profile": "~Yifang_Chen1;~Yingbing_Huang1;~Simon_Shaolei_Du1;~Kevin_Jamieson1;~Guanya_Shi1", "aff": "Department of Computer Science, University of Washington;University of Illinois, Urbana Champaign;Meta Facebook;University of Washington;University of Washington", "aff_domain": "cs.washington.edu;uiuc.edu;fb.com;washington.edu;uw.edu", "position": "PhD student;PhD student;Visiting Professor;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nchen2023active,\ntitle={Active representation learning for general task space with applications in robotics},\nauthor={Yifang Chen and Yingbing Huang and Simon Shaolei Du and Kevin Jamieson and Guanya Shi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RiwPYAMLur}\n}", "github": "", "project": "", "reviewers": "LcWv;7KMj;F2sU;DHY3;hbuX;7vYB", "pdf_size": 484936, "rating": "1;5;5;5;6;7", "confidence": "1;2;4;1;1;4", "soundness": "2;3;3;3;2;4", "novelty": "2;2;3;2;4;3", "presentation": "1;1;3;3;4;3", "wc_summary": "59;144;112;67;79;142", "wc_strengths": "5;55;115;31;52;184", "wc_weaknesses": "8;196;132;69;173;372", "wc_questions": "1;35;4;70;29;188", "wc_limitations": "4;40;4;21;1;13", "wc_review": "77;470;367;258;334;899", "wc_reply_reviewers": "0;71;0;46;0;39", "wc_reply_authors": "0;203;0;0;407;0", "reply_reviewers": "0;1;0;1;0;1", "reply_authors": "0;2;1;1;2;1", "rating_avg": [ 4.833333333333333, 1.863389981249825 ], "confidence_avg": [ 2.1666666666666665, 1.3437096247164249 ], "soundness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "novelty_avg": [ 2.6666666666666665, 0.7453559924999299 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 100.5, 34.28678073738235 ], "wc_strengths_avg": [ 73.66666666666667, 59.4913625850192 ], "wc_weaknesses_avg": [ 158.33333333333334, 114.42707527309939 ], "wc_questions_avg": [ 54.5, 63.908137197073735 ], "wc_limitations_avg": [ 13.833333333333334, 13.508227945301417 ], "wc_review_avg": [ 400.8333333333333, 252.927800413917 ], "wc_reply_reviewers_avg": [ 26.0, 27.75487945088815 ], "wc_reply_authors_avg": [ 101.66666666666667, 155.37124000992662 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.1666666666666667, 0.6871842709362768 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4770421687536971, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15291889220550305041&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "cs.washington.edu;uiuc.edu;fb.com;washington.edu;uw.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Washington;University of Illinois Urbana-Champaign;Meta", "aff_unique_dep": "Department of Computer Science;;Meta Platforms, Inc.", "aff_unique_url": "https://www.washington.edu;https://illinois.edu;https://meta.com", "aff_unique_abbr": "UW;UIUC;Meta", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Seattle;Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Posterior Sampling with Delayed Feedback for Reinforcement Learning with Linear Function Approximation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71646", "id": "RiyH3z7oIF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/15d3d4a4bd808605e3a3c1ea0fd0eba4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RiyH3z7oIF", "openreview": "https://openreview.net/forum?id=RiyH3z7oIF", "poster": "/media/PosterPDFs/NeurIPS%202023/71646.png?t=1702362883.9005537", "slides": "https://nips.cc/virtual/2023/poster/71646", "video": "https://nips.cc/virtual/2023/poster/71646", "author_site": "Nikki Lijing Kuang, Ming Yin, Mengdi Wang, Yu-Xiang Wang, Yian Ma", "tldr": "", "abstract": "Recent studies in reinforcement learning (RL) have made significant progress by leveraging function approximation to alleviate the sample complexity hurdle for better performance. Despite the success, existing provably efficient algorithms typically rely on the accessibility of immediate feedback upon taking actions. The failure to account for the impact of delay in observations can significantly degrade the performance of real-world systems due to the regret blow-up. In this work, we tackle the challenge of delayed feedback in RL with linear function approximation by employing posterior sampling, which has been shown to empirically outperform the popular UCB algorithms in a wide range of regimes. We first introduce \\textit{Delayed-PSVI}, an optimistic value-based algorithm that effectively explores the value function space via noise perturbation with posterior sampling. We provide the first analysis for posterior sampling algorithms with delayed feedback in RL and show our algorithm achieves $\\widetilde{O}(\\sqrt{d^3H^3 T} + d^2H^2 \\mathbb{E}[\\tau])$ worst-case regret in the presence of unknown stochastic delays. Here $\\mathbb{E}[\\tau]$ is the expected delay. To further improve its computational efficiency and to expand its applicability in high-dimensional RL problems, we incorporate a gradient-based approximate sampling scheme via Langevin dynamics for \\textit{Delayed-LPSVI}, which maintains the same order-optimal regret guarantee with $\\widetilde{O}(dHK)$ computational cost. Empirical evaluations are performed to demonstrate the statistical and computational efficacy of our algorithms.", "keywords": "Posterior Sampling;Reinforcement Learning Theory;Linear Markov Decision Processes;Delayed Feedback;Langevin Monte Carlo", "primary_area": "", "supplementary_material": "", "author": "Nikki Lijing Kuang;Ming Yin;Mengdi Wang;Yu-Xiang Wang;Yian Ma", "authorids": "~Nikki_Lijing_Kuang1;~Ming_Yin4;~Mengdi_Wang1;~Yu-Xiang_Wang1;~Yian_Ma1", "gender": "M;F;;M;F", "homepage": "https://mingyin0312.github.io;http://mwang.princeton.edu;http://www.cs.ucsb.edu/~yuxiangw/publications.html;https://sites.google.com/view/yianma;", "dblp": "89/453.html;;62/1637-3.html;;229/9146", "google_scholar": "ncBRYIUAAAAJ;;HGNZ1fkAAAAJ;A0TFlacAAAAJ;XYhmg74AAAAJ", "orcid": "0000-0001-6458-0751;;;;", "linkedin": ";;;;", "or_profile": "~Ming_Yin4;~Mengdi_Wang1;~Yu-Xiang_Wang1;~Yian_Ma1;~Nikki_Kuang1", "aff": "UC, Santa Barbara;Princeton University;UC Santa Barbara;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsb.edu;princeton.edu;ucsb.edu;ucsd.edu;ucsd.edu", "position": "PhD student;Full Professor;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nkuang2023posterior,\ntitle={Posterior Sampling with Delayed Feedback for Reinforcement Learning with Linear Function Approximation},\nauthor={Nikki Lijing Kuang and Ming Yin and Mengdi Wang and Yu-Xiang Wang and Yian Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RiyH3z7oIF}\n}", "github": "", "project": "", "reviewers": "TvPe;7RMs;U3pE;p2ng;UyNX;RbB5;8fzn", "pdf_size": 2523926, "rating": "5;5;5;5;5;6;6", "confidence": "3;2;3;3;3;2;3", "soundness": "3;3;2;3;3;3;3", "novelty": "2;2;3;2;2;2;3", "presentation": "1;2;1;3;3;3;3", "wc_summary": "164;63;30;144;79;125;50", "wc_strengths": "41;37;31;63;91;34;34", "wc_weaknesses": "234;99;249;142;210;29;1", "wc_questions": "111;122;62;4;4;132;41", "wc_limitations": "21;18;9;4;6;29;1", "wc_review": "571;339;381;357;390;349;127", "wc_reply_reviewers": "675;16;83;0;13;0;0", "wc_reply_authors": "1489;47;587;663;368;0;0", "reply_reviewers": "3;1;2;0;1;0;0", "reply_authors": "4;2;3;2;3;1;1", "rating_avg": [ 5.285714285714286, 0.45175395145262565 ], "confidence_avg": [ 2.7142857142857144, 0.45175395145262565 ], "soundness_avg": [ 2.857142857142857, 0.3499271061118826 ], "novelty_avg": [ 2.2857142857142856, 0.4517539514526256 ], "presentation_avg": [ 2.2857142857142856, 0.880630571852711 ], "wc_summary_avg": [ 93.57142857142857, 47.174015677086906 ], "wc_strengths_avg": [ 47.285714285714285, 20.429070922964417 ], "wc_weaknesses_avg": [ 137.71428571428572, 91.73520010756782 ], "wc_questions_avg": [ 68.0, 50.452240499591014 ], "wc_limitations_avg": [ 12.571428571428571, 9.514757603055264 ], "wc_review_avg": [ 359.14285714285717, 119.95049999470446 ], "wc_reply_reviewers_avg": [ 112.42857142857143, 231.28761139255485 ], "wc_reply_authors_avg": [ 450.57142857142856, 495.3771599622301 ], "reply_reviewers_avg": [ 1.0, 1.0690449676496976 ], "reply_authors_avg": [ 2.2857142857142856, 1.0301575072754257 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12313626740158855345&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ucsb.edu;princeton.edu;ucsb.edu;ucsd.edu;ucsd.edu", "author_num": 5, "aff_unique_index": "0;1;0;2;2", "aff_unique_norm": "University of California, Santa Barbara;Princeton University;University of California, San Diego", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucsb.edu;https://www.princeton.edu;https://www.ucsd.edu", "aff_unique_abbr": "UCSB;Princeton;UCSD", "aff_campus_unique_index": "0;0;2;2", "aff_campus_unique": "Santa Barbara;;San Diego", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "H2O: Heavy-Hitter Oracle for Efficient Generative Inference of Large Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71645", "id": "RkRrPp7GKO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6ceefa7b15572587b78ecfcebb2827f8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RkRrPp7GKO", "openreview": "https://openreview.net/forum?id=RkRrPp7GKO", "poster": "/media/PosterPDFs/NeurIPS%202023/71645.png?t=1701744870.2426875", "slides": "https://nips.cc/virtual/2023/poster/71645", "video": "https://nips.cc/virtual/2023/poster/71645", "author_site": "Zhenyu Zhang, Ying Sheng, Tianyi Zhou, Tianlong Chen, Tianlong Chen, Lianmin Zheng, Ruisi Cai, Zhao Song, Yuandong Tian, Christopher R\u00e9, Clark Barrett, Zhangyang \"Atlas\" Wang, Beidi Chen", "tldr": "", "abstract": "Large Language Models (LLMs), despite their recent impressive accomplishments, are notably cost-prohibitive to deploy, particularly for applications involving long-content generation, such as dialogue systems and story writing. Often, a large amount of transient state information, referred to as the $\\mathsf{KV}$ $\\mathsf{cache}$, is stored in GPU memory in addition to model parameters, scaling linearly with the sequence length and batch size. In this paper, we introduce a novel approach for implementing the $\\mathsf{KV}$ $\\mathsf{cache}$ which significantly reduces its memory footprint. Our approach is based on the noteworthy observation that a small portion of tokens contributes most of the value when computing attention scores. \n We call these tokens Heavy Hitters ($\\mathsf{H_2}$). Through a comprehensive investigation, we find that ($i$) the emergence of $\\mathsf{H_2}$ is natural and strongly correlates with the frequent co-occurrence of tokens in the text, and ($ii$) removing them results in significant performance degradation. Based on these insights, we propose Heavy Hitter Oracle ($\\mathsf{H_2O}$), a $\\mathsf{KV}$ $\\mathsf{cache}$ eviction policy that dynamically retains a balance of recent and $\\mathsf{H_2}$ tokens.\n We formulate the $\\mathsf{KV}$ $\\mathsf{cache}$ eviction as a dynamic submodular problem and prove (under mild assumptions) a theoretical guarantee for our novel eviction algorithm which could help guide future work. We validate the accuracy of our algorithm with OPT, LLaMA, and GPT-NeoX across a wide range of tasks. Our implementation of $\\mathsf{H_2O}$ with 20\\% heavy hitters improves the throughput over three leading inference systems DeepSpeed Zero-Inference, Hugging Face Accelerate, and FlexGen by up to $29\\times$, $29\\times$, and $3\\times$ on OPT-6.7B and OPT-30B. With the same batch size, $\\mathsf{H_2O}$ can reduce the latency by up to $1.9\\times$.", "keywords": "Large Language Models; Efficient Generative Inference", "primary_area": "", "supplementary_material": "", "author": "Zhenyu Zhang;Ying Sheng;Tianyi Zhou;Tianlong Chen;Lianmin Zheng;Ruisi Cai;Zhao Song;Yuandong Tian;Christopher Re;Clark Barrett;Zhangyang Wang;Beidi Chen", "authorids": "~Zhenyu_Zhang4;~Ying_Sheng1;~Tianyi_Zhou4;~Tianlong_Chen1;~Lianmin_Zheng2;~Ruisi_Cai1;~Zhao_Song3;~Yuandong_Tian1;~Christopher_Re1;~Clark_Barrett1;~Zhangyang_Wang1;~Beidi_Chen1", "gender": "M;F;;M;M;F;M;M;;M;M;F", "homepage": "https://zhenyu.gallery;https://sites.google.com/view/yingsheng;;https://tianlong-chen.github.io;http://lmzheng.net/;https://cairuisi.github.io;https://www.youtube.com/@zhaosong2031;http://yuandong-tian.com;;http://theory.stanford.edu/~barrett;https://vita-group.github.io;https://www.andrew.cmu.edu/user/beidic/", "dblp": "01/1844-15;262/6232.html;;;211/7027;341/1491;76/4051-2;t/YuandongTian;;b/ClarkWBarrett;119/4026;192/1339", "google_scholar": "ZLyJRxoAAAAJ;xMhGYpgAAAAJ;;LE3ctn0AAAAJ;_7Q8uIYAAAAJ;B0chY1AAAAAJ;yDZct7UAAAAJ;0mgEF28AAAAJ;;https://scholar.google.com.tw/citations?user=BtwmZfQAAAAJ;pxFyKAIAAAAJ;", "orcid": ";0000-0002-1883-2126;;0000-0001-7774-8197;;;;0000-0003-4202-4847;;0000-0002-9522-3084;;", "linkedin": "zhenyu-allen-zhang-a9b1391a3/;;;tianlong-chen-783862167/;;;;yuandongtian;;clark-barrett-a5b157/;;", "or_profile": "~Zhenyu_Zhang4;~Ying_Sheng1;~Tianyi_Zhou4;~Tianlong_Chen1;~Lianmin_Zheng2;~Ruisi_Cai1;~Zhao_Song3;~Yuandong_Tian1;~Christopher_Re1;~Clark_Barrett1;~Zhangyang_Wang1;~Beidi_Chen1", "aff": "University of Texas at Austin;Stanford University;;University of Texas, Austin;University of California, Berkeley;University of Texas at Austin;Adobe;Meta AI (FAIR);;Stanford University;University of Texas, Austin;Meta Facebook", "aff_domain": "utexas.edu;stanford.edu;;utexas.edu;berkeley.edu;utexas.edu;adobe.com;meta.com;;stanford.edu;utexas.edu;fb.com", "position": "PhD student;PhD student;;PhD student;PhD student;PhD student;Researcher;Research Scientist;;Professor (Research);Assistant Professor;Researcher", "bibtex": "@inproceedings{\nzhang2023ho,\ntitle={H2O: Heavy-Hitter Oracle for Efficient Generative Inference of Large Language Models},\nauthor={Zhenyu Zhang and Ying Sheng and Tianyi Zhou and Tianlong Chen and Lianmin Zheng and Ruisi Cai and Zhao Song and Yuandong Tian and Christopher Re and Clark Barrett and Zhangyang Wang and Beidi Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RkRrPp7GKO}\n}", "github": "", "project": "", "reviewers": "nR7P;AZfb;N7jZ;CXTK", "pdf_size": 3265278, "rating": "6;7;8;8", "confidence": "4;3;4;4", "soundness": "3;3;4;4", "novelty": "3;3;4;4", "presentation": "3;2;4;4", "wc_summary": "49;83;69;61", "wc_strengths": "28;58;81;37", "wc_weaknesses": "254;175;73;85", "wc_questions": "14;37;33;2", "wc_limitations": "42;5;1;1", "wc_review": "387;358;257;186", "wc_reply_reviewers": "668;22;14;12", "wc_reply_authors": "1032;20;27;30", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 65.5, 12.359207094308275 ], "wc_strengths_avg": [ 51.0, 20.457272545478784 ], "wc_weaknesses_avg": [ 146.75, 73.40427439870243 ], "wc_questions_avg": [ 21.5, 14.221462653327892 ], "wc_limitations_avg": [ 12.25, 17.25362280797862 ], "wc_review_avg": [ 297.0, 80.2215681721568 ], "wc_reply_reviewers_avg": [ 179.0, 282.349074728429 ], "wc_reply_authors_avg": [ 277.25, 435.7702232828673 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 420, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18236068363807432702&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "utexas.edu;stanford.edu;;utexas.edu;berkeley.edu;utexas.edu;adobe.com;meta.com;;stanford.edu;utexas.edu;fb.com", "author_num": 12, "aff_unique_index": "0;1;0;2;0;3;4;1;0;4", "aff_unique_norm": "University of Texas at Austin;Stanford University;University of California, Berkeley;Adobe;Meta", "aff_unique_dep": ";;;Adobe Inc.;Facebook AI Research (FAIR)", "aff_unique_url": "https://www.utexas.edu;https://www.stanford.edu;https://www.berkeley.edu;https://www.adobe.com;https://ai.facebook.com", "aff_unique_abbr": "UT Austin;Stanford;UC Berkeley;Adobe;Meta AI", "aff_campus_unique_index": "0;1;0;2;0;1;0", "aff_campus_unique": "Austin;Stanford;Berkeley;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Agnostic Multi-Group Active Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71644", "id": "RmxP5ZcQhC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/03b1043052700b1a471996b0baf309d4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RmxP5ZcQhC", "openreview": "https://openreview.net/forum?id=RmxP5ZcQhC", "poster": "/media/PosterPDFs/NeurIPS%202023/71644.png?t=1702250439.7331822", "slides": "https://nips.cc/virtual/2023/poster/71644", "video": "https://nips.cc/virtual/2023/poster/71644", "author_site": "Nicholas Rittler, Kamalika Chaudhuri", "tldr": "", "abstract": "Inspired by the problem of improving classification accuracy on rare or hard subsets of a population, there has been recent interest in models of learning where the goal is to generalize to a collection of distributions, each representing a ``group''. We consider a variant of this problem from the perspective of active learning, where the learner is endowed with the power to decide which examples are labeled from each distribution in the collection, and the goal is to minimize the number of label queries while maintaining PAC-learning guarantees. Our main challenge is that standard active learning techniques such as disagreement-based active learning do not directly apply to the multi-group learning objective. We modify existing algorithms to provide a consistent active learning algorithm for an agnostic formulation of multi-group learning, which given a collection of $G$ distributions and a hypothesis class $\\mathcal{H}$ with VC-dimension $d$, outputs an $\\epsilon$-optimal hypothesis using $\\tilde{O}\\left( (\\nu^2/\\epsilon^2) G d \\theta_{\\mathcal{G}}^2 \\log^2(1/\\epsilon) + G\\log(1/\\epsilon)/\\epsilon^2 \\right)$ label queries, where $\\theta_{\\mathcal{G}}$ is the worst-case disagreement coefficient over the collection. Roughly speaking, this guarantee improves upon the label complexity of standard multi-group learning in regimes where disagreement-based active learning algorithms may be expected to succeed, and the number of groups is not too large. We also consider the special case where each distribution in the collection is individually realizable with respect to $\\mathcal{H}$, and demonstrate $\\tilde{O}\\left( G d \\theta_{\\mathcal{G}} \\log(1/\\epsilon) \\right)$ label queries are sufficient for learning in this case. We further give an approximation result for the full agnostic case inspired by the group realizable strategy.", "keywords": "learning theory;active learning;multi-group learning", "primary_area": "", "supplementary_material": "/attachment/ef0ca56516f4063a3bbba3f2fb85f53fd6f06fad.pdf", "author": "Nicholas Rittler;Kamalika Chaudhuri", "authorids": "~Nicholas_Rittler1;~Kamalika_Chaudhuri1", "gender": "M;F", "homepage": ";http://cseweb.ucsd.edu/users/kamalika", "dblp": ";56/6435", "google_scholar": ";I-DJ7EsAAAAJ", "orcid": ";", "linkedin": "nicholas-r-515909111/;", "or_profile": "~Nicholas_Rittler1;~Kamalika_Chaudhuri1", "aff": "University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nrittler2023agnostic,\ntitle={Agnostic Multi-Group Active Learning},\nauthor={Nicholas Rittler and Kamalika Chaudhuri},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RmxP5ZcQhC}\n}", "github": "", "project": "", "reviewers": "T3gQ;7xXY;eXQR;rZgu", "pdf_size": 342855, "rating": "4;5;6;7", "confidence": "3;4;3;3", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "3;4;3;4", "wc_summary": "88;55;68;229", "wc_strengths": "99;44;74;66", "wc_weaknesses": "367;210;64;46", "wc_questions": "130;31;33;57", "wc_limitations": "31;1;1;47", "wc_review": "715;341;240;445", "wc_reply_reviewers": "97;15;9;16", "wc_reply_authors": "168;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 110.0, 69.70294111441784 ], "wc_strengths_avg": [ 70.75, 19.66437133498043 ], "wc_weaknesses_avg": [ 171.75, 129.43024182933445 ], "wc_questions_avg": [ 62.75, 40.15205474194316 ], "wc_limitations_avg": [ 20.0, 19.82422760159901 ], "wc_review_avg": [ 435.25, 177.0316002865025 ], "wc_reply_reviewers_avg": [ 34.25, 36.32750335489627 ], "wc_reply_authors_avg": [ 42.0, 72.74613391789285 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1980284745530765932&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ucsd.edu;ucsd.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Unsupervised Video Domain Adaptation for Action Recognition: A Disentanglement Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71643", "id": "Rp4PA0ez0m", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/39235c56aef13fb05a6adc95eb9d8d66-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Rp4PA0ez0m", "openreview": "https://openreview.net/forum?id=Rp4PA0ez0m", "poster": "/media/PosterPDFs/NeurIPS%202023/71643.png?t=1697490824.2899086", "slides": "https://nips.cc/virtual/2023/poster/71643", "video": "https://nips.cc/virtual/2023/poster/71643", "author_site": "Pengfei Wei, Lingdong Kong, Xinghua Qu, Yi Ren, Zhiqiang Xu, Jing Jiang, Xiang Yin", "tldr": "", "abstract": "Unsupervised video domain adaptation is a practical yet challenging task. In this work, for the first time, we tackle it from a disentanglement view. Our key idea is to handle the spatial and temporal domain divergence separately through disentanglement. Specifically, we consider the generation of cross-domain videos from two sets of latent factors, one encoding the static information and another encoding the dynamic information. A Transfer Sequential VAE (TranSVAE) framework is then developed to model such generation. To better serve for adaptation, we propose several objectives to constrain the latent factors. With these constraints, the spatial divergence can be readily removed by disentangling the static domain-specific information out, and the temporal divergence is further reduced from both frame- and video-levels through adversarial learning. Extensive experiments on the UCF-HMDB, Jester, and Epic-Kitchens datasets verify the effectiveness and superiority of TranSVAE compared with several state-of-the-art approaches.", "keywords": "action recognition;unsupervised domain adaptation;video analysis", "primary_area": "", "supplementary_material": "/attachment/5d43ec73fb10fa481d8bc04e329a965b8f20e8d3.zip", "author": "Pengfei Wei;Lingdong Kong;Xinghua Qu;Yi Ren;zhiqiang xu;Jing Jiang;Xiang Yin", "authorids": "~Pengfei_Wei3;~Lingdong_Kong1;~Xinghua_Qu1;~Yi_Ren2;~zhiqiang_xu1;~Jing_Jiang6;~Xiang_Yin2", "gender": ";;M;M;M;F;M", "homepage": ";;https://xinghua-qu.github.io/;https://rayeren.github.io/;https://scholar.google.com/citations?user=0R20iBMAAAAJ&hl=en;https://www.uts.edu.au/staff/jing.jiang;", "dblp": ";;18/1099;75/6568-6;72/51-3.html;68/1974-2;18/1022-6.html", "google_scholar": ";;https://scholar.google.com.sg/citations?user=2PxlmU0AAAAJ;4FA6C0AAAAAJ;;https://scholar.google.com.au/citations?hl=en;e6_J-lEAAAAJ", "orcid": ";;0000-0001-8072-2019;;0000-0002-5693-8933;;", "linkedin": ";;xinghua-qu/;;;;", "or_profile": "~Pengfei_Wei3;~Lingdong_Kong1;~Xinghua_Qu1;~Yi_Ren2;~zhiqiang_xu1;~Jing_Jiang6;~Xiang_Yin2", "aff": ";;Bytedance Seed;ByteDance;Mohamed bin Zayed University of Artificial Intelligence;University of Technology Sydney;ByteDance Inc.", "aff_domain": ";;bytedance.com;bytedance.com;mbzuai.ac.ae;uts.edu.au;bytedance.com", "position": ";;Research Scientist;Researcher;Assistant Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\nwei2023unsupervised,\ntitle={Unsupervised Video Domain Adaptation for Action Recognition: A Disentanglement Perspective},\nauthor={Pengfei Wei and Lingdong Kong and Xinghua Qu and Yi Ren and zhiqiang xu and Jing Jiang and Xiang Yin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Rp4PA0ez0m}\n}", "github": "", "project": "", "reviewers": "qCdJ;AobY;cyvj;3nXx;JyS2", "pdf_size": 5988618, "rating": "5;5;5;5;6", "confidence": "4;5;4;4;4", "soundness": "2;2;3;4;3", "novelty": "3;2;3;3;2", "presentation": "2;2;3;3;4", "wc_summary": "78;49;108;117;112", "wc_strengths": "44;51;56;49;124", "wc_weaknesses": "438;214;229;154;119", "wc_questions": "24;56;166;71;10", "wc_limitations": "9;2;2;9;14", "wc_review": "593;372;561;400;379", "wc_reply_reviewers": "213;28;69;271;33", "wc_reply_authors": "937;52;51;1024;19", "reply_reviewers": "2;1;1;2;1", "reply_authors": "4;2;2;4;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 92.8, 25.779061270729002 ], "wc_strengths_avg": [ 64.8, 29.848953080468334 ], "wc_weaknesses_avg": [ 230.8, 111.00882847773866 ], "wc_questions_avg": [ 65.4, 54.81459659616223 ], "wc_limitations_avg": [ 7.2, 4.621688003316537 ], "wc_review_avg": [ 461.0, 95.69743988216194 ], "wc_reply_reviewers_avg": [ 122.8, 100.04478996929325 ], "wc_reply_authors_avg": [ 416.6, 461.39640223998276 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.8, 0.9797958971132712 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.2500000000000001, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6882065303942970650&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";;bytedance.com;bytedance.com;mbzuai.ac.ae;uts.edu.au;bytedance.com", "author_num": 7, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "ByteDance;Mohamed bin Zayed University of Artificial Intelligence;University of Technology Sydney", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bytedance.com;https://mbzuai.ac.ae;https://www.uts.edu.au", "aff_unique_abbr": "Bytedance;MBZUAI;UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "China;United Arab Emirates;Australia" }, { "title": "Spectral Co-Distillation for Personalized Federated Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71642", "id": "RqjQL08UFc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1b86cf4b15cd83b6520d851eb7298228-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RqjQL08UFc", "openreview": "https://openreview.net/forum?id=RqjQL08UFc", "poster": "/media/PosterPDFs/NeurIPS%202023/71642.png?t=1701507762.1157641", "slides": "https://nips.cc/virtual/2023/poster/71642", "video": "https://nips.cc/virtual/2023/poster/71642", "author_site": "Zihan Chen, Howard Yang, Tony Quek, Kai Fong Ernest Chong", "tldr": "", "abstract": "Personalized federated learning (PFL) has been widely investigated to address the challenge of data heterogeneity, especially when a single generic model is inadequate in satisfying the diverse performance requirements of local clients simultaneously. Existing PFL methods are inherently based on the idea that the relations between the generic global and personalized local models are captured by the similarity of model weights. Such a similarity is primarily based on either partitioning the model architecture into generic versus personalized components or modeling client relationships via model weights. To better capture similar (yet distinct) generic versus personalized model representations, we propose $\\textit{spectral distillation}$, a novel distillation method based on model spectrum information. Building upon spectral distillation, we also introduce a co-distillation framework that establishes a two-way bridge between generic and personalized model training. Moreover, to utilize the local idle time in conventional PFL, we propose a wait-free local training protocol. Through extensive experiments on multiple datasets over diverse heterogeneous data settings, we demonstrate the outperformance and efficacy of our proposed spectral co-distillation method, as well as our wait-free training protocol.", "keywords": "Personalized federated learning;spectral bias;co-distillation;communication efficiency", "primary_area": "", "supplementary_material": "", "author": "Zihan Chen;Howard Hao Yang;Tony Quek;Kai Fong Ernest Chong", "authorids": "~Zihan_Chen1;~Howard_Hao_Yang1;~Tony_Quek1;~Kai_Fong_Ernest_Chong1", "gender": "M;M;M;", "homepage": "https://www.linkedin.com/in/zihan-chen-961217144/;https://person.zju.edu.cn/en/howardyang;https://people.sutd.edu.sg/~tonyquek/;", "dblp": "139/3503-1;87/763;65/1128;64/7802", "google_scholar": ";https://scholar.google.com.sg/citations?user=q0z9D9cAAAAJ;https://scholar.google.com.tw/citations?user=0o1tkokAAAAJ;JewaBYEAAAAJ", "orcid": ";;0000-0002-4037-3149;", "linkedin": ";;;", "or_profile": "~Zihan_Chen1;~Howard_Hao_Yang1;~Tony_Quek1;~Kai_Fong_Ernest_Chong1", "aff": "Singapore University of Technology and Design;Zhejiang University;Singapore University of Technology and Design;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;zju.edu.cn;sutd.edu.sg;sutd.edu.sg", "position": "Postdoc;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2023spectral,\ntitle={Spectral Co-Distillation for Personalized Federated Learning},\nauthor={Zihan Chen and Howard Hao Yang and Tony Quek and Kai Fong Ernest Chong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RqjQL08UFc}\n}", "github": "", "project": "", "reviewers": "FUrR;UBhD;rUNY;8564;6pBd;jcpB", "pdf_size": 400179, "rating": "4;5;5;5;6;7", "confidence": "3;3;4;4;3;3", "soundness": "3;3;3;2;3;3", "novelty": "2;2;3;2;3;4", "presentation": "3;2;1;2;3;4", "wc_summary": "40;82;96;167;150;67", "wc_strengths": "32;57;74;52;93;59", "wc_weaknesses": "18;136;210;763;79;47", "wc_questions": "48;164;25;47;2;115", "wc_limitations": "1;11;8;1;2;19", "wc_review": "139;450;413;1030;326;307", "wc_reply_reviewers": "0;0;58;25;0;17", "wc_reply_authors": "0;0;147;84;0;38", "reply_reviewers": "0;0;1;1;0;1", "reply_authors": "1;1;2;2;1;2", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.7453559924999298 ], "presentation_avg": [ 2.5, 0.9574271077563381 ], "wc_summary_avg": [ 100.33333333333333, 44.7536466546458 ], "wc_strengths_avg": [ 61.166666666666664, 18.862808792848313 ], "wc_weaknesses_avg": [ 208.83333333333334, 255.561745094127 ], "wc_questions_avg": [ 66.83333333333333, 55.471964891185245 ], "wc_limitations_avg": [ 7.0, 6.557438524302 ], "wc_review_avg": [ 444.1666666666667, 279.9139252619554 ], "wc_reply_reviewers_avg": [ 16.666666666666668, 20.861980304425135 ], "wc_reply_authors_avg": [ 44.833333333333336, 54.84650297775501 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2500000000000001, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16632380585010775375&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "sutd.edu.sg;zju.edu.cn;sutd.edu.sg;sutd.edu.sg", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Singapore University of Technology and Design;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sutd.edu.sg;https://www.zju.edu.cn", "aff_unique_abbr": "SUTD;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Singapore;China" }, { "title": "Cluster-aware Semi-supervised Learning: Relational Knowledge Distillation Provably Learns Clustering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71641", "id": "RrdBNXBUIF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8037f47a6254eb60899a644bd90b4f6a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RrdBNXBUIF", "openreview": "https://openreview.net/forum?id=RrdBNXBUIF", "poster": "/media/PosterPDFs/NeurIPS%202023/71641.png?t=1701449673.1976824", "slides": "https://nips.cc/virtual/2023/poster/71641", "video": "https://nips.cc/virtual/2023/poster/71641", "author_site": "Yijun Dong, Kevin Miller, Qi Lei, Rachel Ward", "tldr": "", "abstract": "Despite the empirical success and practical significance of (relational) knowledge distillation that matches (the relations of) features between teacher and student models, the corresponding theoretical interpretations remain limited for various knowledge distillation paradigms. In this work, we take an initial step toward a theoretical understanding of relational knowledge distillation (RKD), with a focus on semi-supervised classification problems. We start by casting RKD as spectral clustering on a population-induced graph unveiled by a teacher model. Via a notion of clustering error that quantifies the discrepancy between the predicted and ground truth clusterings, we illustrate that RKD over the population provably leads to low clustering error. Moreover, we provide a sample complexity bound for RKD with limited unlabeled samples. For semi-supervised learning, we further demonstrate the label efficiency of RKD through a general framework of cluster-aware semi-supervised learning that assumes low clustering errors. Finally, by unifying data augmentation consistency regularization into this cluster-aware framework, we show that despite the common effect of learning accurate clusterings, RKD facilitates a \"global\" perspective through spectral clustering, whereas consistency regularization focuses on a \"local\" perspective via expansion.", "keywords": "Relational knowledge distillation;Semi-supervised learning;Spectral clustering;Sample complexity", "primary_area": "", "supplementary_material": "/attachment/e505d80fb47c00506ae0c76050d69d04b652ab80.pdf", "author": "Yijun Dong;Kevin Miller;Qi Lei;Rachel Ward", "authorids": "~Yijun_Dong1;~Kevin_Miller3;~Qi_Lei1;~Rachel_Ward1", "gender": "F;M;F;", "homepage": "https://dyjdongyijun.github.io/;https://millerk22.github.io/;https://cecilialeiqi.github.io/;", "dblp": "200/1432;68/5582;;80/7132", "google_scholar": "l3bmbCkAAAAJ;y6As7c4AAAAJ;kGOgaowAAAAJ;", "orcid": ";0000-0003-4050-1849;;", "linkedin": "yijun-dong-82638513b/;;;", "or_profile": "~Yijun_Dong1;~Kevin_Miller3;~Qi_Lei1;~Rachel_Ward1", "aff": "University of Texas, Austin;University of Texas at Austin;New York University;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;nyu.edu;utexas.edu", "position": "PhD student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ndong2023clusteraware,\ntitle={Cluster-aware Semi-supervised Learning: Relational Knowledge Distillation Provably Learns Clustering},\nauthor={Yijun Dong and Kevin Miller and Qi Lei and Rachel Ward},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RrdBNXBUIF}\n}", "github": "", "project": "", "reviewers": "APrr;T9nK;mtgh", "pdf_size": 1547639, "rating": "5;6;6", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;2", "wc_summary": "52;106;78", "wc_strengths": "49;184;21", "wc_weaknesses": "82;231;103", "wc_questions": "65;6;58", "wc_limitations": "30;2;65", "wc_review": "278;529;325", "wc_reply_reviewers": "160;37;0", "wc_reply_authors": "468;122;0", "reply_reviewers": "2;1;0", "reply_authors": "2;2;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 78.66666666666667, 22.050447211388303 ], "wc_strengths_avg": [ 84.66666666666667, 71.16334886879778 ], "wc_weaknesses_avg": [ 138.66666666666666, 65.8499978908293 ], "wc_questions_avg": [ 43.0, 26.318561257535844 ], "wc_limitations_avg": [ 32.333333333333336, 25.772509040103607 ], "wc_review_avg": [ 377.3333333333333, 108.9474899002063 ], "wc_reply_reviewers_avg": [ 65.66666666666667, 68.39265717571993 ], "wc_reply_authors_avg": [ 196.66666666666666, 198.2209765780493 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10416970965672395603&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 8, "email": "utexas.edu;utexas.edu;nyu.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Texas at Austin;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.nyu.edu", "aff_unique_abbr": "UT Austin;NYU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Partially-Supervised Reinforcement Learning Framework for Visual Active Search", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71640", "id": "Rs6pzz21U4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/288b63aa98084366c4536ba0574a0f22-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Rs6pzz21U4", "openreview": "https://openreview.net/forum?id=Rs6pzz21U4", "poster": "/media/PosterPDFs/NeurIPS%202023/71640.png?t=1701402029.056565", "slides": "https://nips.cc/virtual/2023/poster/71640", "video": "https://nips.cc/virtual/2023/poster/71640", "author_site": "Anindya Sarkar, Nathan Jacobs, Yevgeniy Vorobeychik", "tldr": "", "abstract": "Visual active search (VAS) has been proposed as a modeling framework in which visual cues are used to guide exploration, with the goal of identifying regions of interest in a large geospatial area. Its potential applications include identifying hot spots of rare wildlife poaching activity, search-and-rescue scenarios, identifying illegal trafficking of weapons, drugs, or people, and many others. State of the art approaches to VAS include applications of deep reinforcement learning (DRL), which yield end-to-end search policies, and traditional active search, which combines predictions with custom algorithmic approaches. While the DRL framework has been shown to greatly outperform traditional active search in such domains, its end-to-end nature does not make full use of supervised information attained either during training, or during actual search, a significant limitation if search tasks differ significantly from those in the training distribution. We propose an approach that combines the strength of both DRL and conventional active search approaches by decomposing the search policy into a prediction module, which produces a geospatial distribution of regions of interest based on task embedding and search history, and a search module, which takes the predictions and search history as input and outputs the search distribution. In addition, we develop a novel meta-learning approach for jointly learning the resulting combined policy that can make effective use of supervised information obtained both at training and decision time. Our extensive experiments demonstrate that the proposed representation and meta-learning frameworks significantly outperform state of the art in visual active search on several problem domains.", "keywords": "Visual Active Search;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/4125702345fdfd86fecae67973f74424368c708f.pdf", "author": "Anindya Sarkar;Nathan Jacobs;Yevgeniy Vorobeychik", "authorids": "~Anindya_Sarkar2;~Nathan_Jacobs1;~Yevgeniy_Vorobeychik1", "gender": "M;M;M", "homepage": "https://sites.google.com/view/anindya-sarkar/home;https://jacobsn.github.io/;http://vorobeychik.com", "dblp": ";82/3140;70/2217", "google_scholar": "2hQyYz0AAAAJ;ZBgGyh8AAAAJ;https://scholar.google.com.tw/citations?user=ptI-HHkAAAAJ", "orcid": ";0000-0002-4242-8967;", "linkedin": ";jacobsn/;", "or_profile": "~Anindya_Sarkar2;~Nathan_Jacobs1;~Yevgeniy_Vorobeychik1", "aff": "Washington University, Saint Louis;Washington University, Saint Louis;Washington University, St. Louis", "aff_domain": "wustl.edu;wustl.edu;wustl.edu", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsarkar2023a,\ntitle={A Partially-Supervised Reinforcement Learning Framework for Visual Active Search},\nauthor={Anindya Sarkar and Nathan Jacobs and Yevgeniy Vorobeychik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Rs6pzz21U4}\n}", "github": "", "project": "", "reviewers": "mjFZ;yVuL;2ixD;6bym", "pdf_size": 24614478, "rating": "5;5;6;6", "confidence": "3;2;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "73;55;70;81", "wc_strengths": "61;33;81;69", "wc_weaknesses": "140;60;43;22", "wc_questions": "67;3;32;6", "wc_limitations": "1;1;91;14", "wc_review": "342;152;317;192", "wc_reply_reviewers": "0;0;0;22", "wc_reply_authors": "0;113;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.75, 9.41740410091868 ], "wc_strengths_avg": [ 61.0, 17.663521732655695 ], "wc_weaknesses_avg": [ 66.25, 44.656326539472545 ], "wc_questions_avg": [ 27.0, 25.700194551792794 ], "wc_limitations_avg": [ 26.75, 37.472489909265434 ], "wc_review_avg": [ 250.75, 80.4965061353597 ], "wc_reply_reviewers_avg": [ 5.5, 9.526279441628825 ], "wc_reply_authors_avg": [ 28.25, 48.93043531382078 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16940201448246463062&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "wustl.edu;wustl.edu;wustl.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Washington University in St. Louis", "aff_unique_dep": "", "aff_unique_url": "https://wustl.edu", "aff_unique_abbr": "WUSTL", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Saint Louis;St. Louis", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Dynamic Personalized Federated Learning with Adaptive Differential Privacy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71639", "id": "RteNLuc8D9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e4724af0e2a0d52ce5a0a4e084b87f59-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RteNLuc8D9", "openreview": "https://openreview.net/forum?id=RteNLuc8D9", "poster": "/media/PosterPDFs/NeurIPS%202023/71639.png?t=1701605696.9728665", "slides": "https://nips.cc/virtual/2023/poster/71639", "video": "https://nips.cc/virtual/2023/poster/71639", "author_site": "Xiyuan Yang, Wenke Huang, Mang Ye", "tldr": "", "abstract": "Personalized federated learning with differential privacy has been considered a feasible solution to address non-IID distribution of data and privacy leakage risks. However, current personalized federated learning methods suffer from inflexible personalization and convergence difficulties due to two main factors: 1) Firstly, we observe that the prevailing personalization methods mainly achieve this by personalizing a fixed portion of the model, which lacks flexibility. 2) Moreover, we further demonstrate that the default gradient calculation is sensitive to the widely-used clipping operations in differential privacy, resulting in difficulties in convergence. Considering that Fisher information values can serve as an effective measure for estimating the information content of parameters by reflecting the model sensitivity to parameters, we aim to leverage this property to address the aforementioned challenges. In this paper, we propose a novel federated learning method with Dynamic Fisher Personalization and Adaptive Constraint (FedDPA) to handle these challenges. Firstly, by using layer-wise Fisher information to measure the information content of local parameters, we retain local parameters with high Fisher values during the personalization process, which are considered informative, simultaneously prevent these parameters from noise perturbation. Secondly, we introduce an adaptive approach by applying differential constraint strategies to personalized parameters and shared parameters identified in the previous for better convergence. Our method boosts performance through flexible personalization while mitigating the slow convergence caused by clipping operations. Experimental results on CIFAR-10, FEMNIST and SVHN dataset demonstrate the effectiveness of our approach in achieving better performance and robustness against clipping, under personalized federated learning with differential privacy.", "keywords": "federated learning;differential privacy;personalization", "primary_area": "", "supplementary_material": "/attachment/61dad8ea520a53b8c8af0816d5c6290f788a1cb6.zip", "author": "Xiyuan Yang;Wenke Huang;Mang Ye", "authorids": "~Xiyuan_Yang1;~Wenke_Huang1;~Mang_Ye1", "gender": "M;M;M", "homepage": "https://xiyuanyang45.github.io;https://wenkehuang.github.io/;https://marswhu.github.io/", "dblp": ";330/1664;156/0610", "google_scholar": "dzpchdAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;j-HxRy0AAAAJ", "orcid": ";0000-0003-4819-293X;0000-0003-3989-7655", "linkedin": ";;", "or_profile": "~Xiyuan_Yang1;~Wenke_Huang1;~Mang_Ye1", "aff": "Wuhan University;Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;whu.edu.cn;whu.edu.cn", "position": "Undergrad student;PhD student;Professor", "bibtex": "@inproceedings{\nyang2023dynamic,\ntitle={Dynamic Personalized Federated Learning with Adaptive Differential Privacy},\nauthor={Xiyuan Yang and Wenke Huang and Mang Ye},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RteNLuc8D9}\n}", "github": "", "project": "", "reviewers": "EfaD;imA4;dw3B;UFEt;aCpX", "pdf_size": 702289, "rating": "5;5;5;8;8", "confidence": "4;4;1;4;5", "soundness": "3;3;2;4;3", "novelty": "2;2;2;4;3", "presentation": "3;2;3;4;4", "wc_summary": "48;102;89;64;99", "wc_strengths": "22;62;42;209;239", "wc_weaknesses": "68;150;194;26;37", "wc_questions": "70;104;96;31;77", "wc_limitations": "1;1;19;19;23", "wc_review": "209;419;440;349;475", "wc_reply_reviewers": "0;147;0;80;49", "wc_reply_authors": "0;454;43;0;0", "reply_reviewers": "0;2;0;1;1", "reply_authors": "1;2;2;1;1", "rating_avg": [ 6.2, 1.469693845669907 ], "confidence_avg": [ 3.6, 1.3564659966250536 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 80.4, 21.00095235935742 ], "wc_strengths_avg": [ 114.8, 90.5525261933647 ], "wc_weaknesses_avg": [ 95.0, 65.84831053261732 ], "wc_questions_avg": [ 75.6, 25.476263462289754 ], "wc_limitations_avg": [ 12.6, 9.583318840568753 ], "wc_review_avg": [ 378.4, 94.16496163648134 ], "wc_reply_reviewers_avg": [ 55.2, 55.09047104536319 ], "wc_reply_authors_avg": [ 99.4, 178.08043126632415 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5417363388859615, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4586496419112038145&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "whu.edu.cn;whu.edu.cn;whu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Diversified Outlier Exposure for Out-of-Distribution Detection via Informative Extrapolation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71638", "id": "RuxBLfiEqI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/46d943bc6a15a57c923829efc0db7c7a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=RuxBLfiEqI", "openreview": "https://openreview.net/forum?id=RuxBLfiEqI", "poster": "/media/PosterPDFs/NeurIPS%202023/71638.png?t=1701501524.3354669", "slides": "https://nips.cc/virtual/2023/poster/71638", "video": "https://nips.cc/virtual/2023/poster/71638", "author_site": "Jianing Zhu, Yu Geng, Jiangchao Yao, Tongliang Liu, Gang Niu, Masashi Sugiyama, Bo Han", "tldr": "", "abstract": "Out-of-distribution (OOD) detection is important for deploying reliable machine learning models on real-world applications. Recent advances in outlier exposure have shown promising results on OOD detection via fine-tuning model with informatively sampled auxiliary outliers. However, previous methods assume that the collected outliers can be sufficiently large and representative to cover the boundary between ID and OOD data, which might be impractical and challenging. In this work, we propose a novel framework, namely, Diversified Outlier Exposure (DivOE), for effective OOD detection via informative extrapolation based on the given auxiliary outliers. Specifically, DivOE introduces a new learning objective, which diversifies the auxiliary distribution by explicitly synthesizing more informative outliers for extrapolation during training. It leverages a multi-step optimization method to generate novel outliers beyond the original ones, which is compatible with many variants of outlier exposure. Extensive experiments and analyses have been conducted to characterize and demonstrate the effectiveness of the proposed DivOE. The code is publicly available at: https://github.com/tmlr-group/DivOE.", "keywords": "out-of-distribution detection;outlier exposure", "primary_area": "", "supplementary_material": "/attachment/5c28a7040b5cc53675be60dca2d857f996ceac38.zip", "author": "Jianing Zhu;Geng Yu;Jiangchao Yao;Tongliang Liu;Gang Niu;Masashi Sugiyama;Bo Han", "authorids": "~Jianing_Zhu2;~Geng_Yu1;~Jiangchao_Yao1;~Tongliang_Liu1;~Gang_Niu1;~Masashi_Sugiyama1;~Bo_Han1", "gender": "M;M;M;M;M;M;M", "homepage": "https://zfancy.github.io/;https://warriors-30.github.io/;https://sunarker.github.io/;https://tongliang-liu.github.io/;https://niug1984.github.io;http://www.ms.k.u-tokyo.ac.jp/sugi/;https://bhanml.github.io/", "dblp": "129/6807;;166/5900;150/6667;26/3367-1;35/1228;241/0472-3", "google_scholar": "82uNA3MAAAAJ;SNp2hXIAAAAJ;w8oDh9QAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;https://scholar.google.co.jp/citations?user=HOkcy00AAAAJ;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ;nTNjqHwAAAAJ", "orcid": ";;;;;0000-0001-6658-6743;", "linkedin": ";;;;;;", "or_profile": "~Jianing_Zhu2;~Geng_Yu1;~Jiangchao_Yao1;~Tongliang_Liu1;~Gang_Niu1;~Masashi_Sugiyama1;~bo_han2", "aff": "Hong Kong Baptist University;Shanghai Jiaotong University;Shanghai Artificial Intelligence Laboratory;University of Sydney;RIKEN;The University of Tokyo;RIKEN", "aff_domain": "hkbu.edu.hk;sjtu.edu.cn;pjlab.org.cn;sydney.edu.au;riken.jp;u-tokyo.ac.jp;riken.jp", "position": "PhD student;Undergrad student;Researcher;Lecturer;Research Scientist (tenured);Full Professor;Adjunct Scientist", "bibtex": "@inproceedings{\nzhu2023diversified,\ntitle={Diversified Outlier Exposure for Out-of-Distribution Detection via Informative Extrapolation},\nauthor={Jianing Zhu and Geng Yu and Jiangchao Yao and Tongliang Liu and Gang Niu and Masashi Sugiyama and Bo Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=RuxBLfiEqI}\n}", "github": "", "project": "", "reviewers": "Mh63;aWLd;1Wzd;vD9G", "pdf_size": 2186500, "rating": "5;5;6;6", "confidence": "4;3;4;2", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;3;3;3", "wc_summary": "139;54;120;106", "wc_strengths": "127;134;48;133", "wc_weaknesses": "327;158;156;145", "wc_questions": "5;217;73;110", "wc_limitations": "5;7;18;35", "wc_review": "603;570;415;529", "wc_reply_reviewers": "20;49;11;58", "wc_reply_authors": "61;68;19;612", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 104.75, 31.554516317002864 ], "wc_strengths_avg": [ 110.5, 36.18355980276125 ], "wc_weaknesses_avg": [ 196.5, 75.50662222613325 ], "wc_questions_avg": [ 101.25, 76.70845781268191 ], "wc_limitations_avg": [ 16.25, 11.903255857117413 ], "wc_review_avg": [ 529.25, 70.98019089858803 ], "wc_reply_reviewers_avg": [ 34.5, 19.525624189766635 ], "wc_reply_authors_avg": [ 190.0, 244.36141266574802 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8520434278624811997&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 10, "email": "hkbu.edu.hk;sjtu.edu.cn;pjlab.org.cn;sydney.edu.au;riken.jp;u-tokyo.ac.jp;riken.jp", "author_num": 7, "aff_unique_index": "0;1;2;3;4;5;4", "aff_unique_norm": "Hong Kong Baptist University;Shanghai Jiao Tong University;Shanghai Artificial Intelligence Laboratory;University of Sydney;RIKEN;University of Tokyo", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.hkbu.edu.hk;https://www.sjtu.edu.cn;http://www.shailab.org/;https://www.sydney.edu.au;https://www.riken.jp;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "HKBU;SJTU;Shanghai AI Lab;USYD;RIKEN;UTokyo", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;1;2;2;2", "aff_country_unique": "China;Australia;Japan" }, { "title": "Similarity, Compression and Local Steps: Three Pillars of Efficient Communications for Distributed Variational Inequalities", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71637", "id": "Rvk1wdwz1L", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5b4a459db23e6db9be2a128380953d96-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Rvk1wdwz1L", "openreview": "https://openreview.net/forum?id=Rvk1wdwz1L", "poster": "/media/PosterPDFs/NeurIPS%202023/71637.png?t=1702328027.9484842", "slides": "https://nips.cc/virtual/2023/poster/71637", "video": "https://nips.cc/virtual/2023/poster/71637", "author_site": "Aleksandr Beznosikov, Martin Takac, Alexander Gasnikov", "tldr": "", "abstract": "Variational inequalities are a broad and flexible class of problems that includes minimization, saddle point, and fixed point problems as special cases. Therefore, variational inequalities are used in various applications ranging from equilibrium search to adversarial learning. With the increasing size of data and models, today's instances demand parallel and distributed computing for real-world machine learning problems, most of which can be represented as variational inequalities. Meanwhile, most distributed approaches have a significant bottleneck -- the cost of communications. The three main techniques to reduce the total number of communication rounds and the cost of one such round are the similarity of local functions, compression of transmitted information, and local updates. In this paper, we combine all these approaches. Such a triple synergy did not exist before for variational inequalities and saddle problems, nor even for minimization problems. The methods presented in this paper have the best theoretical guarantees of communication complexity and are significantly ahead of other methods for distributed variational inequalities. The theoretical results are confirmed by adversarial learning experiments on synthetic and real datasets.", "keywords": "convex optimization;variational inequalities;similarity;local methods;compression;partial participation", "primary_area": "", "supplementary_material": "/attachment/2104fe378d6cb02708157354d413134048b13288.pdf", "author": "Aleksandr Beznosikov;Martin Tak\u00e1\u010d;Alexander Gasnikov", "authorids": "~Aleksandr_Beznosikov1;~Martin_Tak\u00e1\u010d1;~Alexander_Gasnikov1", "gender": ";;", "homepage": ";;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Aleksandr_Beznosikov1;~Martin_Tak\u00e1\u010d1;~Alexander_Gasnikov1", "aff": ";;", "aff_domain": ";;", "position": ";;", "bibtex": "@inproceedings{\nbeznosikov2023similarity,\ntitle={Similarity, Compression and Local Steps: Three Pillars of Efficient Communications for Distributed Variational Inequalities},\nauthor={Aleksandr Beznosikov and Martin Tak{\\'a}{\\v{c}} and Alexander Gasnikov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Rvk1wdwz1L}\n}", "github": "", "project": "", "reviewers": "JXTW;kubg;D8HM;4qia", "pdf_size": 1306349, "rating": "6;6;6;7", "confidence": "3;4;3;3", "soundness": "3;3;2;4", "novelty": "2;3;2;3", "presentation": "1;3;2;4", "wc_summary": "98;32;85;107", "wc_strengths": "25;29;87;87", "wc_weaknesses": "269;29;161;84", "wc_questions": "263;70;4;80", "wc_limitations": "11;1;30;40", "wc_review": "666;161;367;398", "wc_reply_reviewers": "25;14;21;23", "wc_reply_authors": "23;12;11;17", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 80.5, 29.073183520213263 ], "wc_strengths_avg": [ 57.0, 30.033314835362415 ], "wc_weaknesses_avg": [ 135.75, 90.09266063337235 ], "wc_questions_avg": [ 104.25, 96.19348990446287 ], "wc_limitations_avg": [ 20.5, 15.337861650177967 ], "wc_review_avg": [ 398.0, 179.5508284581277 ], "wc_reply_reviewers_avg": [ 20.75, 4.14578098794425 ], "wc_reply_authors_avg": [ 15.75, 4.763139720814412 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=711115212587956031&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 10, "email": ";;", "author_num": 3 }, { "title": "RL-ViGen: A Reinforcement Learning Benchmark for Visual Generalization", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73592", "id": "RwNIqaNOgd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/15c9f64ec172b046470d2a4d2b7669fc-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=RwNIqaNOgd", "openreview": "https://openreview.net/forum?id=RwNIqaNOgd", "poster": "/media/PosterPDFs/NeurIPS%202023/73592.png?t=1698742887.7111197", "slides": "https://nips.cc/virtual/2023/poster/73592", "video": "https://nips.cc/virtual/2023/poster/73592", "author_site": "Zhecheng Yuan, Sizhe Yang, Pu Hua, Can Chang, Kaizhe Hu, Huazhe Xu", "tldr": "", "abstract": "Visual Reinforcement Learning (Visual RL), coupled with high-dimensional observations, has consistently confronted the long-standing challenge of out-of-distribution generalization. Despite the focus on algorithms aimed at resolving visual generalization problems, we argue that the devil is in the existing benchmarks as they are restricted to isolated tasks and generalization categories, undermining a comprehensive evaluation of agents' visual generalization capabilities. To bridge this gap, we introduce RL-ViGen: a novel **R**einforcement **L**earning Benchmark for **Vi**sual **Gen**eralization, which contains diverse tasks and a wide spectrum of generalization types, thereby facilitating the derivation of more reliable conclusions. Furthermore, RL-ViGen incorporates the latest generalization visual RL algorithms into a unified framework, under which the experiment results indicate that no single existing algorithm has prevailed universally across tasks. Our aspiration is that Rl-ViGen will serve as a catalyst in this area, and lay a foundation for the future creation of universal visual generalization RL agents suitable for real-world scenarios. Access to our code and implemented algorithms is provided at https://gemcollector.github.io/RL-ViGen/.", "keywords": "Visual Generalization;Visual RL;Benchmark", "primary_area": "", "supplementary_material": "/attachment/c4b76b35829d0b0656e0cb69ae228c0096886318.pdf", "author": "Zhecheng Yuan;Sizhe Yang;Pu Hua;Can Chang;Kaizhe Hu;Huazhe Xu", "authorids": "~Zhecheng_Yuan1;~Sizhe_Yang3;~Pu_Hua1;~Can_Chang1;~Kaizhe_Hu1;~Huazhe_Xu1", "gender": "M;M;M;M;M;M", "homepage": "http://www.github.com;https://yangsizhe.github.io/;https://piao-0429.github.io/;https://github.com/cititude;https://hukz18.github.io/;http://hxu.rocks", "dblp": "314/5755;351/1712;331/5335;324/0128;330/4940;164/9006", "google_scholar": ";ue3SjGgAAAAJ;https://scholar.google.com/citations?hl=en;;mPpYLhcAAAAJ;t9HPFawAAAAJ", "orcid": ";;0009-0008-1301-7131;;;", "linkedin": ";;https://www.linkedin.cn/incareer/in/pu-hua-315462215;;%E5%BC%80%E5%93%B2-%E8%83%A1-40137718a/?miniProfileUrn=urn%3Ali%3Afs_miniProfile%3AACoAACyMbIEBJhMDJ4b7wLQyHotP_JGOnWDoEDU;", "or_profile": "~Zhecheng_Yuan1;~Sizhe_Yang3;~Pu_Hua1;~Can_Chang1;~Kaizhe_Hu1;~Huazhe_Xu1", "aff": "Tsinghua University;Shanghai Qi Zhi Institute;Electronic Engineering, Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;sqz.ac.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;Intern;Undergrad student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyuan2023rlvigen,\ntitle={{RL}-ViGen: A Reinforcement Learning Benchmark for Visual Generalization},\nauthor={Zhecheng Yuan and Sizhe Yang and Pu Hua and Can Chang and Kaizhe Hu and Huazhe Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=RwNIqaNOgd}\n}", "github": "", "project": "", "reviewers": "jzXK;WGQU;Jh47;jky4", "pdf_size": 16431488, "rating": "6;7;7;8", "confidence": "4;3;4;5", "wc_summary_and_contributions": "122;117;57;41", "wc_strengths": "76;105;92;44", "wc_improvement": "78;201;31;16", "wc_limitations": "60;237;1;139", "wc_correctness": "68;2;1;1", "wc_clarity": "7;9;1;5", "wc_relation_to_prior_work": "59;57;1;11", "wc_documentation": "9;50;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "480;779;186;259", "wc_reply_reviewers": "53;111;0;20", "wc_reply_authors": "2055;2735;193;1143", "reply_reviewers": "1;2;0;1", "reply_authors": "4;6;1;4", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 84.25, 35.74475485997911 ], "wc_strengths_avg": [ 79.25, 22.796655456447994 ], "wc_improvement_avg": [ 81.5, 72.68596838455136 ], "wc_limitations_avg": [ 109.25, 88.52789108523935 ], "wc_correctness_avg": [ 18.0, 28.8704000665041 ], "wc_clarity_avg": [ 5.5, 2.958039891549808 ], "wc_relation_to_prior_work_avg": [ 32.0, 26.248809496813376 ], "wc_documentation_avg": [ 15.25, 20.327014045353536 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 426.0, 230.76719870900197 ], "wc_reply_reviewers_avg": [ 46.0, 42.02975136733502 ], "wc_reply_authors_avg": [ 1531.5, 957.2067436034913 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.75, 1.7853571071357126 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5851463656485858603&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;sqz.ac.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Tsinghua University;Shanghai Qi Zhi Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.qz.io", "aff_unique_abbr": "THU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SwiftSage: A Generative Agent with Fast and Slow Thinking for Complex Interactive Tasks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71636", "id": "Rzk3GP1HN7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4b0eea69deea512c9e2c469187643dc2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Rzk3GP1HN7", "openreview": "https://openreview.net/forum?id=Rzk3GP1HN7", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71636", "video": "https://nips.cc/virtual/2023/poster/71636", "author_site": "Bill Yuchen Lin, Yicheng Fu, Karina Yang, Faeze Brahman, Shiyu Huang, Chandra Bhagavatula, Prithviraj Ammanabrolu, Yejin Choi, Xiang Ren", "tldr": "", "abstract": "We introduce SwiftSage, a novel agent framework inspired by the dual-process theory of human cognition, designed to excel in action planning for complex interactive reasoning tasks. SwiftSage integrates the strengths of behavior cloning and prompting large language models (LLMs) to enhance task completion performance. The framework comprises two primary modules: the Swift module, representing fast and intuitive thinking, and the Sage module, emulating deliberate thought processes. The Swift module is a small encoder-decoder LM fine-tuned on the oracle agent's action trajectories, while the Sage module employs LLMs such as GPT-4 for subgoal planning and grounding. We develop a heuristic method to harmoniously integrate the two modules, resulting in a more efficient and robust problem-solving process. In 30 tasks from the ScienceWorld benchmark, SwiftSage significantly outperforms other methods such as SayCan, ReAct, and Reflexion, demonstrating its effectiveness in solving complex interactive tasks.", "keywords": "interactive reasoning;text game;agents;action planning;large language models", "primary_area": "", "supplementary_material": "", "author": "Bill Yuchen Lin;Yicheng Fu;Karina Yang;Faeze Brahman;Shiyu Huang;Chandra Bhagavatula;Prithviraj Ammanabrolu;Yejin Choi;Xiang Ren", "authorids": "~Bill_Yuchen_Lin1;~Yicheng_Fu1;~Karina_Yang1;~Faeze_Brahman1;~Shiyu_Huang2;~Chandra_Bhagavatula1;~Prithviraj_Ammanabrolu1;~Yejin_Choi1;~Xiang_Ren1", "gender": "M;M;F;F;M;M;F;M;M", "homepage": "http://yuchenlin.xyz/;https://sofyc.github.io/;;https://fabrahman.github.io;https://www.chandrab.page;http://prithvirajva.com;https://yejinc.github.io/;https://shanzhenren.github.io/;http://huangshiyu13.github.io/", "dblp": "190/4518;304/3009;;276/6005;151/3093;202/2351;89/579-1;36/360-1;", "google_scholar": "https://scholar.google.com/citations?hl=en;Phj7N40AAAAJ;;viCG2ikAAAAJ;AsgHp14AAAAJ;2yaiWZ8AAAAJ;vhP-tlcAAAAJ;_moJlrIAAAAJ;PK57vrQAAAAJ", "orcid": ";0009-0003-1349-6412;0009-0008-1300-3119;;;;;;0000-0003-0500-0141", "linkedin": ";;;;;rajammanabrolu/;;xren7;shiyu-huang-841b92106/", "or_profile": "~Bill_Yuchen_Lin1;~Yicheng_Fu1;~Karina_Yang1;~Faeze_Brahman1;~Chandra_Bhagavatula1;~Prithviraj_Ammanabrolu1;~Yejin_Choi1;~Xiang_Ren1;~Shiyu_Huang1", "aff": "Allen Institute for Artificial Intelligence;Tsinghua University;University of Southern California;Allen Institute for AI;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Department of Computer Science, University of Washington;University of Southern California;4Paradigm Inc.", "aff_domain": "allenai.org;tsinghua.edu.cn;usc.edu;allenai.org;allenai.org;allenai.org;cs.washington.edu;usc.edu;4paradigm.com", "position": "Researcher;Undergrad student;Undergrad student;Postdoc;Researcher;Researcher;Full Professor;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nlin2023swiftsage,\ntitle={SwiftSage: A Generative Agent with Fast and Slow Thinking for Complex Interactive Tasks},\nauthor={Bill Yuchen Lin and Yicheng Fu and Karina Yang and Faeze Brahman and Shiyu Huang and Chandra Bhagavatula and Prithviraj Ammanabrolu and Yejin Choi and Xiang Ren},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Rzk3GP1HN7}\n}", "github": "", "project": "", "reviewers": "Sgwx;AXAQ;MDXU;CSRK", "pdf_size": 889196, "rating": "6;6;7;8", "confidence": "4;4;4;5", "soundness": "4;3;3;4", "novelty": "4;3;3;4", "presentation": "4;3;2;4", "wc_summary": "63;59;82;82", "wc_strengths": "54;44;81;137", "wc_weaknesses": "85;142;96;97", "wc_questions": "244;4;87;2", "wc_limitations": "7;10;7;4", "wc_review": "453;259;353;322", "wc_reply_reviewers": "29;14;14;30", "wc_reply_authors": "44;71;36;57", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;3", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 71.5, 10.594810050208546 ], "wc_strengths_avg": [ 79.0, 36.1178626167164 ], "wc_weaknesses_avg": [ 105.0, 21.874642854227357 ], "wc_questions_avg": [ 84.25, 98.4031884646021 ], "wc_limitations_avg": [ 7.0, 2.1213203435596424 ], "wc_review_avg": [ 346.75, 70.07273007383115 ], "wc_reply_reviewers_avg": [ 21.75, 7.75806032459145 ], "wc_reply_authors_avg": [ 52.0, 13.285330255586423 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3844178012869500706&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "allenai.org;tsinghua.edu.cn;usc.edu;allenai.org;allenai.org;allenai.org;cs.washington.edu;usc.edu;4paradigm.com", "author_num": 9, "aff_unique_index": "0;1;2;3;0;0;4;2;5", "aff_unique_norm": "Allen Institute for Artificial Intelligence;Tsinghua University;University of Southern California;Allen Institute for AI;University of Washington;4Paradigm", "aff_unique_dep": ";;;;Department of Computer Science;", "aff_unique_url": "https://allenai.org;https://www.tsinghua.edu.cn;https://www.usc.edu;https://allenai.org;https://www.washington.edu;https://www.4paradigm.com/", "aff_unique_abbr": "AI2;THU;USC;AI2;UW;4Paradigm", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Los Angeles;Seattle", "aff_country_unique_index": "0;1;0;0;0;0;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Dense-Exponential Random Features: Sharp Positive Estimators of the Gaussian Kernel", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71635", "id": "S0xrBMFihS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/02dec8877fb7c6aa9a79f81661baca7c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=S0xrBMFihS", "openreview": "https://openreview.net/forum?id=S0xrBMFihS", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71635", "video": "https://nips.cc/virtual/2023/poster/71635", "author_site": "Valerii Likhosherstov, Krzysztof M Choromanski, Kumar Avinava Dubey, Frederick Liu, Tamas Sarlos, Adrian Weller", "tldr": "", "abstract": "The problem of efficient approximation of a linear operator induced by the Gaussian or softmax kernel is often addressed using random features (RFs) which yield an unbiased approximation of the operator's result. Such operators emerge in important applications ranging from kernel methods to efficient Transformers. We propose parameterized, positive, non-trigonometric RFs which approximate Gaussian and softmax-kernels. In contrast to traditional RF approximations, parameters of these new methods can be optimized to reduce the variance of the approximation, and the optimum can be expressed in closed form. We show that our methods lead to variance reduction in practice (e^{10}-times smaller variance and beyond) and outperform previous methods in a kernel regression task. Using our proposed mechanism, we also present FAVOR#, a method for self-attention approximation in Transformers. We show that FAVOR# outperforms other random feature methods in speech modelling and natural language processing.", "keywords": "Gaussian kernel;softmax kernel", "primary_area": "", "supplementary_material": "/attachment/04d9ffa88532944231a88fcaf1a229d35b5bdc5b.zip", "author": "Valerii Likhosherstov;Krzysztof Marcin Choromanski;Kumar Avinava Dubey;Frederick Liu;Tamas Sarlos;Adrian Weller", "authorids": "~Valerii_Likhosherstov2;~Krzysztof_Marcin_Choromanski1;~Kumar_Avinava_Dubey1;~Frederick_Liu1;~Tamas_Sarlos1;~Adrian_Weller1", "gender": ";;M;M;M;M", "homepage": "https://valerytyumen.github.io/;;https://frederick0329.github.io/;https://sites.google.com/site/stamas/;http://mlg.eng.cam.ac.uk/adrian/;https://sites.google.com/site/kumaravinavadubey/", "dblp": "232/4391.html;78/11411;;48/959;73/8324;10/7789", "google_scholar": "iiVVfxUAAAAJ;;iJbdUkQAAAAJ;c4YtO-MAAAAJ;https://scholar.google.co.uk/citations?user=Ek4hM10AAAAJ;tBbUAfsAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Valerii_Likhosherstov2;~Krzysztof_Marcin_Choromanski1;~Frederick_Liu1;~Tamas_Sarlos1;~Adrian_Weller1;~Kumar_A_Dubey1", "aff": "Waymo;Google Brain Robotics & Columbia University;Google;Google Research;University of Cambridge;Google Research", "aff_domain": "waymo.com;columbia.edu;google.com;google.com;cam.ac.uk;google.com", "position": "Researcher;research scientist & adjunct assistant professor;Software Engineer;Staff Research Scientist;Principal Researcher;Research Scientist", "bibtex": "@inproceedings{\nlikhosherstov2023denseexponential,\ntitle={Dense-Exponential Random Features: Sharp Positive Estimators of the Gaussian Kernel},\nauthor={Valerii Likhosherstov and Krzysztof Marcin Choromanski and Kumar Avinava Dubey and Frederick Liu and Tamas Sarlos and Adrian Weller},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=S0xrBMFihS}\n}", "github": "", "project": "", "reviewers": "z18t;r6PB;LgLu;pnU2;PE4H", "pdf_size": 1390768, "rating": "3;4;6;6;7", "confidence": "4;4;2;4;3", "soundness": "4;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;2;3;4;3", "wc_summary": "109;116;20;187;41", "wc_strengths": "69;71;67;102;42", "wc_weaknesses": "153;278;50;279;38", "wc_questions": "147;156;8;351;36", "wc_limitations": "5;9;1;29;2", "wc_review": "483;630;146;948;159", "wc_reply_reviewers": "22;112;16;0;23", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.2, 1.469693845669907 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 94.6, 59.39898989040134 ], "wc_strengths_avg": [ 70.2, 19.072493282211425 ], "wc_weaknesses_avg": [ 159.6, 104.99257116577343 ], "wc_questions_avg": [ 139.6, 120.8844075966789 ], "wc_limitations_avg": [ 9.2, 10.283968105745954 ], "wc_review_avg": [ 473.2, 301.9598648827357 ], "wc_reply_reviewers_avg": [ 34.6, 39.565641660410364 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5783517448238059, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15962692413457144028&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "waymo.com;columbia.edu;google.com;google.com;cam.ac.uk;google.com", "author_num": 6, "aff_unique_index": "0;1;1;1;2;1", "aff_unique_norm": "Waymo;Google;University of Cambridge", "aff_unique_dep": ";Google Brain Robotics;", "aff_unique_url": "https://www.waymo.com;https://ai.google;https://www.cam.ac.uk", "aff_unique_abbr": "Waymo;Google;Cambridge", "aff_campus_unique_index": "1;1;1;2;1", "aff_campus_unique": ";Mountain View;Cambridge", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "ClusterFomer: Clustering As A Universal Visual Learner", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71634", "id": "S1KGaTSOTS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c9ef471a579197c4ed99df2aa542ce97-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=S1KGaTSOTS", "openreview": "https://openreview.net/forum?id=S1KGaTSOTS", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71634", "video": "https://nips.cc/virtual/2023/poster/71634", "author_site": "James Liang, Yiming Cui, Qifan Wang, Tong Geng, Wenguan Wang, Dongfang Liu", "tldr": "", "abstract": "This paper presents ClusterFormer, a universal vision model that is based on the Clustering paradigm with TransFormer. It comprises two novel designs: 1) recurrent cross-attention clustering, which reformulates the cross-attention mechanism in Transformer and enables recursive updates of cluster centers to facilitate strong representation learning; and 2) feature dispatching, which uses the updated cluster centers to redistribute image features through similarity-based metrics, resulting in a transparent pipeline. This elegant design streamlines an explainable and transferable workflow, capable of tackling heterogeneous vision tasks (i.e., image classification, object detection, and image segmentation) with varying levels of clustering granularity (i.e., image-, box-, and pixel-level). Empirical results demonstrate that ClusterFormer outperforms various well-known specialized architectures, achieving 83.41% top-1 acc. over ImageNet-1K for image classification, 54.2% and 47.0% mAP over MS COCO for object detection and instance segmentation, 52.4% mIoU over ADE20K for semantic segmentation, and 55.8% PQ over COCO Panoptic for panoptic segmentation. This work aims to initiate a paradigm shift in universal visual understanding and to benefit the broader field.", "keywords": "Universal Model;Clustering", "primary_area": "", "supplementary_material": "/attachment/3dd0badc7672156ebe795876de904f98d452769c.pdf", "author": "James Chenhao Liang;Yiming Cui;Qifan Wang;Tong Geng;Wenguan Wang;Dongfang Liu", "authorids": "~James_Chenhao_Liang1;~Yiming_Cui3;~Qifan_Wang2;~Tong_Geng1;~Wenguan_Wang4;~Dongfang_Liu1", "gender": "M;;M;M;M;", "homepage": "https://jamesliang819.github.io/;;https://wqfcr.github.io/;https://tonytgeng.com;https://sites.google.com/view/wenguanwang/;https://www.rit.edu/directory/dxleec-dongfang-liu", "dblp": "323/3403;;33/8610;188/5531;145/1078;", "google_scholar": "cR8m4CcAAAAJ;;LrSyLosAAAAJ;1B_nk28AAAAJ;CqAQQkgAAAAJ;uICY0vEAAAAJ", "orcid": ";;0000-0002-7570-5756;0000-0002-3644-2922;0000-0002-0802-9567;", "linkedin": ";;;;wenguanwang;", "or_profile": "~James_Chenhao_Liang1;~Yiming_Cui3;~Qifan_Wang2;~Tong_Geng1;~Wenguan_Wang4;~Dongfang_Liu1", "aff": "Rochester Institute of Technology;;Meta AI;University of Rochester;University of Technology Sydney;Rochester Institute of Technology", "aff_domain": "rit.edu;;fb.com;rochester.edu;uts.edu.au;rit.edu", "position": "PhD student;;Principal Researcher;Assistant Professor;Lecturer;Assistant Professor", "bibtex": "@inproceedings{\nliang2023clusterfomer,\ntitle={ClusterFomer: Clustering As A Universal Visual Learner},\nauthor={James Chenhao Liang and Yiming Cui and Qifan Wang and Tong Geng and Wenguan Wang and Dongfang Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=S1KGaTSOTS}\n}", "github": "", "project": "", "reviewers": "KGpU;bUzy;QRN3;R2Rz;RD2x;x7UH", "pdf_size": 2922863, "rating": "4;5;6;6;6;7", "confidence": "5;5;4;4;3;4", "soundness": "3;2;3;3;2;3", "novelty": "2;3;2;3;2;3", "presentation": "4;1;3;3;2;4", "wc_summary": "63;38;67;80;94;73", "wc_strengths": "21;26;36;25;73;125", "wc_weaknesses": "232;83;172;202;116;47", "wc_questions": "9;104;43;18;74;78", "wc_limitations": "58;1;26;11;12;17", "wc_review": "383;252;344;336;369;340", "wc_reply_reviewers": "279;332;264;0;83;100", "wc_reply_authors": "335;432;592;0;44;571", "reply_reviewers": "1;2;2;0;1;1", "reply_authors": "3;3;3;1;2;2", "rating_avg": [ 5.666666666666667, 0.9428090415820632 ], "confidence_avg": [ 4.166666666666667, 0.6871842709362768 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.8333333333333335, 1.0671873729054748 ], "wc_summary_avg": [ 69.16666666666667, 17.140757146507724 ], "wc_strengths_avg": [ 51.0, 37.38538038681253 ], "wc_weaknesses_avg": [ 142.0, 65.55150646628954 ], "wc_questions_avg": [ 54.333333333333336, 33.95421754199158 ], "wc_limitations_avg": [ 20.833333333333332, 18.215530614164265 ], "wc_review_avg": [ 337.3333333333333, 41.679997867349066 ], "wc_reply_reviewers_avg": [ 176.33333333333334, 121.16746904823735 ], "wc_reply_authors_avg": [ 329.0, 233.7534313473637 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.6871842709362768 ], "reply_authors_avg": [ 2.3333333333333335, 0.7453559924999298 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6859943405700354, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17447296173094774135&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "rit.edu;;fb.com;rochester.edu;uts.edu.au;rit.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Rochester Institute of Technology;Meta;University of Rochester;University of Technology Sydney", "aff_unique_dep": ";Meta AI;;", "aff_unique_url": "https://www.rit.edu;https://meta.com;https://www.rochester.edu;https://www.uts.edu.au", "aff_unique_abbr": "RIT;Meta;U of R;UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Australia" }, { "title": "Transportability for Bandits with Data from Different Environments", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71633", "id": "S2k5dBb91q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8a8ce53beb3775522305e0a6033d4455-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=S2k5dBb91q", "openreview": "https://openreview.net/forum?id=S2k5dBb91q", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71633", "video": "https://nips.cc/virtual/2023/poster/71633", "author_site": "Alexis Bellot, Alan Malek, Silvia Chiappa", "tldr": "", "abstract": "A unifying theme in the design of intelligent agents is to efficiently optimize a policy based on what prior knowledge of the problem is available and what actions can be taken to learn more about it. Bandits are a canonical instance of this task that has been intensely studied in the literature. Most methods, however, typically rely solely on an agent's experimentation in a single environment (or multiple closely related environments). In this paper, we relax this assumption and consider the design of bandit algorithms from a combination of batch data and qualitative assumptions about the relatedness across different environments, represented in the form of causal models. In particular, we show that it is possible to exploit invariances across environments, wherever they may occur in the underlying causal model, to consistently improve learning. The resulting bandit algorithm has a sub-linear regret bound with an explicit dependency on a term that captures how informative related environments are for the task at hand; and may have substantially lower regret than experimentation-only bandit instances.", "keywords": "Transportability;transfer learning;bandits", "primary_area": "", "supplementary_material": "", "author": "Alexis Bellot;Alan Malek;Silvia Chiappa", "authorids": "~Alexis_Bellot1;~Alan_Malek1;~Silvia_Chiappa1", "gender": "M;M;F", "homepage": ";http://www.alanmalek.com;https://csilviavr.github.io/", "dblp": "217/4339;https://dblp.uni-trier.de/pers/hd/m/Malek:Alan;", "google_scholar": ";NJqT9ukAAAAJ;https://scholar.google.co.uk/citations?user=GAvF3gUAAAAJ", "orcid": ";;0000-0002-1882-6842", "linkedin": ";;", "or_profile": "~Alexis_Bellot1;~Alan_Malek1;~Silvia_Chiappa1", "aff": "Google DeepMind;Google DeepMind;Google DeepMind", "aff_domain": "deepmind.com;deepmind.com;google.com", "position": "Researcher;Research Scientist;Researcher", "bibtex": "@inproceedings{\nbellot2023transportability,\ntitle={Transportability for Bandits with Data from Different Environments},\nauthor={Alexis Bellot and Alan Malek and Silvia Chiappa},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=S2k5dBb91q}\n}", "github": "", "project": "", "reviewers": "eikn;QDCf;KpW6;DogX", "pdf_size": 1693068, "rating": "6;6;6;6", "confidence": "1;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;2", "wc_summary": "34;21;86;82", "wc_strengths": "36;7;59;61", "wc_weaknesses": "1;19;102;30", "wc_questions": "1;2;198;72", "wc_limitations": "2;1;1;7", "wc_review": "74;50;446;252", "wc_reply_reviewers": "40;0;221;25", "wc_reply_authors": "778;52;367;52", "reply_reviewers": "1;0;2;1", "reply_authors": "3;2;3;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 55.75, 28.656369274560934 ], "wc_strengths_avg": [ 40.75, 21.821720830401986 ], "wc_weaknesses_avg": [ 38.0, 38.37316770869979 ], "wc_questions_avg": [ 68.25, 80.25077881242025 ], "wc_limitations_avg": [ 2.75, 2.48746859276655 ], "wc_review_avg": [ 205.5, 159.27570436196476 ], "wc_reply_reviewers_avg": [ 71.5, 87.48857068211825 ], "wc_reply_authors_avg": [ 312.25, 298.06909853253825 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11362797737656294428&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "deepmind.com;deepmind.com;google.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Self-Refine: Iterative Refinement with Self-Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71632", "id": "S37hOerQLB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/91edff07232fb1b55a505a9e9f6c0ff3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=S37hOerQLB", "openreview": "https://openreview.net/forum?id=S37hOerQLB", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71632", "video": "https://nips.cc/virtual/2023/poster/71632", "author_site": "Aman Madaan, Niket Tandon, Prakhar Gupta, Skyler Hallinan, Luyu Gao, Sarah Wiegreffe, Uri Alon, Nouha Dziri, Shrimai Prabhumoye, Yiming Yang, Shashank Gupta, Bodhisattwa Prasad Majumder, Katherine Hermann, Sean Welleck, Sean Welleck, Amir Yazdanbakhsh, Peter Clark", "tldr": "", "abstract": "Like humans, large language models (LLMs) do not always generate the best output on their first try. Motivated by how humans refine their written text, we introduce Self-Refine, an approach for improving initial outputs from LLMs through iterative feedback and refinement. The main idea is to generate an initial output using an LLMs; then, the same LLMs provides *feedback* for its output and uses it to *refine* itself, iteratively. Self-Refine does not require any supervised training data, additional training, or reinforcement learning, and instead uses a single LLM as the generator, refiner and the feedback provider. We evaluate Self-Refine across 7 diverse tasks, ranging from dialog response generation to mathematical reasoning, using state-of-the-art (GPT-3.5, ChatGPT, and GPT-4) LLMs. Across all evaluated tasks, outputs generated with Self-Refine are preferred by humans and automatic metrics over those generated with the same LLM using conventional one-step generation, improving by $\\sim$20\\% absolute on average in task performance. Our work demonstrates that even state-of-the-art LLMs like GPT-4 can be further improved at test-time using our simple, standalone approach.", "keywords": "LLMs;Iterative Refinement;Feedback-driven Generation", "primary_area": "", "supplementary_material": "/attachment/63bdd65f537aeb57d57478e701272187e6f53e32.pdf", "author": "Aman Madaan;Niket Tandon;Prakhar Gupta;Skyler Hallinan;Luyu Gao;Sarah Wiegreffe;Uri Alon;Nouha Dziri;Shrimai Prabhumoye;Yiming Yang;Shashank Gupta;Bodhisattwa Prasad Majumder;Katherine Hermann;Sean Welleck;Amir Yazdanbakhsh;Peter Clark", "authorids": "~Aman_Madaan1;~Niket_Tandon2;~Prakhar_Gupta1;~Skyler_Hallinan1;~Luyu_Gao1;~Sarah_Wiegreffe1;~Uri_Alon1;~Nouha_Dziri2;~Shrimai_Prabhumoye1;~Yiming_Yang1;~Shashank_Gupta3;~Bodhisattwa_Prasad_Majumder1;~Katherine_Hermann1;~Sean_Welleck1;~Amir_Yazdanbakhsh1;~Peter_Clark1", "gender": ";M;M;M;M;;M;;F;F;M;;F;;M;M", "homepage": "https://madaan.github.io;https://niket.tandon.info;https://prakharguptaz.github.io/;https://skylerhallinan.com/;https://luyug.github.io/;;https://urialon.ml/;;https://www.cs.cmu.edu/~sprabhum/;http://www.cs.cmu.edu/~yiming/;https://shashankgupta.info/;https://www.majumderb.com/;;;https://www.ayazdan.com/;https://allenai.org/team/peterc", "dblp": "138/1043;29/9923;121/0747;256/6863;;;40/2257-2;;203/8169;25/1666;;138/6177;254/1923;;44/8745;34/1184", "google_scholar": "jW9ts2cAAAAJ;9uWuZkUAAAAJ;YuFcRF0AAAAJ;mO_tZ94AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.co.il/citations?user=QBn7vq8AAAAJ;;A6JZ81cAAAAJ;MlZq4XwAAAAJ;U2Gz-NIAAAAJ;cEM1a5gAAAAJ;owcAYmEAAAAJ;;Vdu_sqwAAAAJ;o-5vyEsAAAAJ", "orcid": ";;;;;;;;;0000-0001-8322-607X;0000-0002-3683-3739;;;;0000-0001-8199-7671;", "linkedin": "amnmadaan/;;prakhar-gupta-100/;skyler-hallinan/;;;https://linkedin.com/in/urialon1/;;;yiming-yang-24100924/;shashank-gupta-5182bb28/;;;;ayazdanb/;peter-clark-a8b556/", "or_profile": "~Aman_Madaan1;~Niket_Tandon2;~Prakhar_Gupta1;~Skyler_Hallinan1;~Luyu_Gao1;~Sarah_Wiegreffe1;~Uri_Alon1;~Nouha_Dziri2;~Shrimai_Prabhumoye1;~Yiming_Yang1;~Shashank_Gupta3;~Bodhisattwa_Prasad_Majumder1;~Katherine_Hermann1;~Sean_Welleck1;~Amir_Yazdanbakhsh1;~Peter_Clark1", "aff": "Carnegie Mellon University;Allen Institute for Artificial Intelligence;Carnegie Mellon University;University of Washington;Carnegie Mellon University;;Carnegie Mellon University;;NVIDIA;School of Computer Science, Carnegie Mellon University;Allen Institute for Artificial Intelligence;University of California, San Diego;Google;;Google Brain;Allen Institute for Artificial Intelligence", "aff_domain": "cmu.edu;allenai.org;cmu.edu;uw.edu;cmu.edu;;cmu.edu;;nvidia.com;cs.cmu.edu;allenai.org;ucsd.edu;google.com;;google.com;allenai.org", "position": "PhD student;Researcher;PhD student;MS student;PhD student;;Postdoc;;Researcher;Full Professor;Researcher;PhD student;Researcher;;Researcher;Senior Research Manager", "bibtex": "@inproceedings{\nmadaan2023selfrefine,\ntitle={Self-Refine: Iterative Refinement with Self-Feedback},\nauthor={Aman Madaan and Niket Tandon and Prakhar Gupta and Skyler Hallinan and Luyu Gao and Sarah Wiegreffe and Uri Alon and Nouha Dziri and Shrimai Prabhumoye and Yiming Yang and Shashank Gupta and Bodhisattwa Prasad Majumder and Katherine Hermann and Sean Welleck and Amir Yazdanbakhsh and Peter Clark},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=S37hOerQLB}\n}", "github": "", "project": "", "reviewers": "YNkB;UJCc;mJq7;artr", "pdf_size": 1865721, "rating": "6;6;6;7", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "3;4;4;3", "wc_summary": "76;36;145;98", "wc_strengths": "63;21;59;25", "wc_weaknesses": "195;209;50;52", "wc_questions": "90;84;37;1", "wc_limitations": "7;7;7;1", "wc_review": "431;357;298;177", "wc_reply_reviewers": "30;58;6;15", "wc_reply_authors": "20;174;40;23", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 88.75, 39.35336707322513 ], "wc_strengths_avg": [ 42.0, 19.1049731745428 ], "wc_weaknesses_avg": [ 126.5, 75.66538178057387 ], "wc_questions_avg": [ 53.0, 36.36619309193636 ], "wc_limitations_avg": [ 5.5, 2.598076211353316 ], "wc_review_avg": [ 315.75, 92.93916020709462 ], "wc_reply_reviewers_avg": [ 27.25, 19.715159142142372 ], "wc_reply_authors_avg": [ 64.25, 63.82152849940214 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1546, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4842387269707922169&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "cmu.edu;allenai.org;cmu.edu;uw.edu;cmu.edu;;cmu.edu;;nvidia.com;cs.cmu.edu;allenai.org;ucsd.edu;google.com;;google.com;allenai.org", "author_num": 16, "aff_unique_index": "0;1;0;2;0;0;3;0;1;4;5;5;1", "aff_unique_norm": "Carnegie Mellon University;Allen Institute for Artificial Intelligence;University of Washington;NVIDIA;University of California, San Diego;Google", "aff_unique_dep": ";;;NVIDIA Corporation;;Google", "aff_unique_url": "https://www.cmu.edu;https://allenai.org;https://www.washington.edu;https://www.nvidia.com;https://www.ucsd.edu;https://www.google.com", "aff_unique_abbr": "CMU;AI2;UW;NVIDIA;UCSD;Google", "aff_campus_unique_index": "1;2;3;3", "aff_campus_unique": ";Pittsburgh;San Diego;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Benefits of Being Distributional: Small-Loss Bounds for Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71631", "id": "S3Y0VvegGm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/06fc38f5c21ae66ef955e28b7a78ece5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=S3Y0VvegGm", "openreview": "https://openreview.net/forum?id=S3Y0VvegGm", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71631", "video": "https://nips.cc/virtual/2023/poster/71631", "author_site": "Kaiwen Wang, Kevin Zhou, Runzhe Wu, Nathan Kallus, Wen Sun", "tldr": "", "abstract": "While distributional reinforcement learning (DistRL) has been empirically effective, the question of when and why it is better than vanilla, non-distributional RL has remained unanswered.\nThis paper explains the benefits of DistRL through the lens of small-loss bounds, which are instance-dependent bounds that scale with optimal achievable cost.\nParticularly, our bounds converge much faster than those from non-distributional approaches if the optimal cost is small.\nAs warmup, we propose a distributional contextual bandit (DistCB) algorithm, which we show enjoys small-loss regret bounds and empirically outperforms the state-of-the-art on three real-world tasks.\nIn online RL, we propose a DistRL algorithm that constructs confidence sets using maximum likelihood estimation. We prove that our algorithm enjoys novel small-loss PAC bounds in low-rank MDPs.\nAs part of our analysis, we introduce the $\\ell_1$ distributional eluder dimension which may be of independent interest. \nThen, in offline RL, we show that pessimistic DistRL enjoys small-loss PAC bounds that are novel to the offline setting and are more robust to bad single-policy coverage.", "keywords": "Reinforcement Learning Theory;Distributional Reinforcement Learning;Small-Loss Bounds;First-order regret", "primary_area": "", "supplementary_material": "/attachment/04a11a5168cf288a6f068bae3db1bffe4914f429.zip", "author": "Kaiwen Wang;Kevin Zhou;Runzhe Wu;Nathan Kallus;Wen Sun", "authorids": "~Kaiwen_Wang1;klz23@cornell.edu;~Runzhe_Wu1;~Nathan_Kallus1;~Wen_Sun1", "gender": "M;;M;;", "homepage": "https://kaiwenw.github.io/;;https://ziqian2000.github.io/;http://nathankallus.com/;https://wensun.github.io", "dblp": "220/3822;;294/9552;142/2900;", "google_scholar": "HsMheBUAAAAJ;;eBtFiuAAAAAJ;K2WfIlsAAAAJ;iOLC30YAAAAJ", "orcid": ";;;0000-0003-1672-0507;", "linkedin": "kaiwenw/;;;;", "or_profile": "~Kaiwen_Wang1;klz23@cornell.edu;~Runzhe_Wu1;~Nathan_Kallus1;~Wen_Sun1", "aff": "Department of Computer Science, Cornell University;;Cornell University;Cornell University;Cornell University", "aff_domain": "cs.cornell.edu;;cornell.edu;cornell.edu;cornell.edu", "position": "PhD student;;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023the,\ntitle={The Benefits of Being Distributional: Small-Loss Bounds for Reinforcement Learning},\nauthor={Kaiwen Wang and Kevin Zhou and Runzhe Wu and Nathan Kallus and Wen Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=S3Y0VvegGm}\n}", "github": "", "project": "", "reviewers": "L483;8JhK;nAEh;9rQf", "pdf_size": 539500, "rating": "6;7;7;7", "confidence": "3;4;3;2", "soundness": "4;3;3;3", "novelty": "3;3;3;3", "presentation": "2;4;3;3", "wc_summary": "147;44;138;125", "wc_strengths": "97;69;114;74", "wc_weaknesses": "40;639;173;84", "wc_questions": "1;2;337;101", "wc_limitations": "9;1;11;1", "wc_review": "294;755;773;385", "wc_reply_reviewers": "0;68;44;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 113.5, 40.88092464707715 ], "wc_strengths_avg": [ 88.5, 18.117670931993437 ], "wc_weaknesses_avg": [ 234.0, 238.68493877913622 ], "wc_questions_avg": [ 110.25, 137.07183335755016 ], "wc_limitations_avg": [ 5.5, 4.55521678957215 ], "wc_review_avg": [ 551.75, 214.76891651260897 ], "wc_reply_reviewers_avg": [ 28.0, 29.257477676655586 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15950936570126204554&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "cs.cornell.edu;;cornell.edu;cornell.edu;cornell.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient Equivariant Transfer Learning from Pretrained Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71630", "id": "S4NN3OOiwP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0d02892a0055c94584f6394f8d069c8e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=S4NN3OOiwP", "openreview": "https://openreview.net/forum?id=S4NN3OOiwP", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71630", "video": "https://nips.cc/virtual/2023/poster/71630", "author_site": "Sourya Basu, Pulkit Katdare, Prasanna Sattigeri, Vijil Chenthamarakshan, Katherine Driggs-Campbell, Payel Das, Lav Varshney", "tldr": "", "abstract": "Efficient transfer learning algorithms are key to the success of foundation models on diverse downstream tasks even with limited data. Recent works of Basu et al. (2023) and Kaba et al. (2022) propose group averaging (equitune) and optimization-based methods, respectively, over features from group-transformed inputs to obtain equivariant outputs from non-equivariant neural networks. While Kaba et al. (2022) are only concerned with training from scratch, we find that equitune performs poorly on equivariant zero-shot tasks despite good finetuning results. We hypothesize that this is because pretrained models provide better quality features for certain transformations than others and simply averaging them is deleterious. Hence, we propose \u03bb-equitune that averages the features using importance weights, \u03bbs. These weights are learned directly from the data using a small neural network, leading to excellent zero-shot and finetuned results that outperform equitune. Further, we prove that \u03bb-equitune is equivariant and a universal approximator of equivariant functions. Additionally, we show that the method of Kaba et al. (2022) used with appropriate loss functions, which we call equizero, also gives excellent zero-shot and finetuned performance. Both equitune and equizero are special cases of \u03bb- equitune. To show the simplicity and generality of our method, we validate on a wide range of diverse applications and models such as 1) image classification using CLIP, 2) deep Q-learning, 3) fairness in natural language generation (NLG), 4) compositional generalization in languages, and 5) image classification using pretrained CNNs such as Resnet and Alexnet.", "keywords": "zero-shot learning;equivariant machine learning;equivariant fine-tuning;pretrained models", "primary_area": "", "supplementary_material": "/attachment/8d64c85f1030406620e5f68dd221d6eed688dfb9.pdf", "author": "Sourya Basu;Pulkit Katdare;Prasanna Sattigeri;Vijil Chenthamarakshan;Katherine Rose Driggs-Campbell;Payel Das;Lav R. Varshney", "authorids": "~Sourya_Basu1;~Pulkit_Katdare1;~Prasanna_Sattigeri1;~Vijil_Chenthamarakshan1;~Katherine_Rose_Driggs-Campbell1;~Payel_Das1;~Lav_R._Varshney1", "gender": "M;M;;M;;F;M", "homepage": "https://basusourya.github.io/;https://pulkitkatdare.web.illinois.edu/;;https://researcher.watson.ibm.com/researcher/view.php?person=us-ecvijil;;;http://www.varshney.csl.illinois.edu/", "dblp": "199/8366;198/0632;00/7428;;;56/7926;36/4028", "google_scholar": "https://scholar.google.co.in/citations?user=_kpbz3cAAAAJ;yC1tsz8AAAAJ;m-s38ikAAAAJ;g9hboJ0AAAAJ;;;https://scholar.google.com.tw/citations?user=JIJGu30AAAAJ", "orcid": ";;0000-0003-4435-0486;;;;0000-0003-2798-5308", "linkedin": ";;prasannasattigeri/;;;;", "or_profile": "~Sourya_Basu1;~Pulkit_Katdare1;~Prasanna_Sattigeri1;~Vijil_Chenthamarakshan1;~Katherine_Rose_Driggs-Campbell1;~Payel_Das1;~Lav_R._Varshney1", "aff": "Mitsubishi Electric Research Labs;University of Illinois, Urbana Champaign;IBM Research;International Business Machines;;IBM, International Business Machines;University of Illinois, Urbana Champaign", "aff_domain": "merl.com;illinois.edu;ibm.com;ibm.com;;us.ibm.com;illinois.edu", "position": "Intern;PhD student;Researcher;Senior Technical Staff member;;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nbasu2023efficient,\ntitle={Efficient Equivariant Transfer Learning from Pretrained Models},\nauthor={Sourya Basu and Pulkit Katdare and Prasanna Sattigeri and Vijil Chenthamarakshan and Katherine Rose Driggs-Campbell and Payel Das and Lav R. Varshney},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=S4NN3OOiwP}\n}", "github": "", "project": "", "reviewers": "TNLv;vsSr;8HWc;DNsq", "pdf_size": 1658368, "rating": "5;6;7;7", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "53;109;66;257", "wc_strengths": "74;30;145;250", "wc_weaknesses": "125;117;86;216", "wc_questions": "6;9;77;38", "wc_limitations": "1;6;1;13", "wc_review": "259;271;375;774", "wc_reply_reviewers": "15;21;27;173", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 121.25, 81.06902922818307 ], "wc_strengths_avg": [ 124.75, 83.14257333039434 ], "wc_weaknesses_avg": [ 136.0, 48.43036237733515 ], "wc_questions_avg": [ 32.5, 28.570089254323307 ], "wc_limitations_avg": [ 5.25, 4.9180788932265 ], "wc_review_avg": [ 419.75, 209.4413700776425 ], "wc_reply_reviewers_avg": [ 59.0, 65.95452979136459 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7960149417983870702&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "merl.com;illinois.edu;ibm.com;ibm.com;;us.ibm.com;illinois.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;4;1", "aff_unique_norm": "Mitsubishi Electric Research Laboratories;University of Illinois Urbana-Champaign;IBM;International Business Machines Corporation;International Business Machines", "aff_unique_dep": ";;IBM Research;;", "aff_unique_url": "https://www.merl.com;https://illinois.edu;https://www.ibm.com/research;https://www.ibm.com;https://www.ibm.com", "aff_unique_abbr": "MERL;UIUC;IBM;IBM;IBM", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Clock and the Pizza: Two Stories in Mechanistic Explanation of Neural Networks", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71629", "id": "S5wmbQc1We", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/56cbfbf49937a0873d451343ddc8c57d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=S5wmbQc1We", "openreview": "https://openreview.net/forum?id=S5wmbQc1We", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71629", "video": "https://nips.cc/virtual/2023/poster/71629", "author_site": "Ziqian Zhong, Ziming Liu, Max Tegmark, Jacob Andreas", "tldr": "", "abstract": "Do neural networks, trained on well-understood algorithmic tasks, reliably rediscover known algorithms? Several recent studies, on tasks ranging from group operations to in-context linear regression, have suggested that the answer is yes. Using modular addition as a prototypical problem, we show that algorithm discovery in neural networks is sometimes more complex: small changes to model hyperparameters and initializations can induce discovery of qualitatively different algorithms from a fixed training set, and even learning of multiple different solutions in parallel. In modular addition, we specifically show that models learn a known *Clock* algorithm, a previously undescribed, less intuitive, but comprehensible procedure we term the *Pizza* algorithm, and a variety of even more complex procedures. Our results show that even simple learning problems can admit a surprising diversity of solutions, motivating the development of new tools for mechanistically characterizing the behavior of neural networks across the algorithmic phase space.", "keywords": "mechanistic interpretability;algorithmic phase transitions;arithmetic learning;neural network;transformer;ensemble", "primary_area": "", "supplementary_material": "/attachment/924775104b53fd9742ebd3872f156cc829bcbfdb.zip", "author": "Ziqian Zhong;Ziming Liu;Max Tegmark;Jacob Andreas", "authorids": "~Ziqian_Zhong1;~Ziming_Liu2;~Max_Tegmark1;~Jacob_Andreas1", "gender": "Not Specified;M;;M", "homepage": "https://fjzzq2002.github.io/;https://kindxiaoming.github.io/;https://space.mit.edu/home/tegmark/;http://web.mit.edu/jda/www", "dblp": "314/7033.html;;25/6578;97/8154", "google_scholar": "iZpSjEYAAAAJ;0b32RKAAAAAJ;eBXEZxgAAAAJ;dnZ8udEAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ziqian_Zhong1;~Ziming_Liu2;~Max_Tegmark1;~Jacob_Andreas1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Microsoft", "aff_domain": "mit.edu;mit.edu;mit.edu;microsoft.com", "position": "Undergrad student;PhD student;Full Professor;Researcher", "bibtex": "@inproceedings{\nzhong2023the,\ntitle={The Clock and the Pizza: Two Stories in Mechanistic Explanation of Neural Networks},\nauthor={Ziqian Zhong and Ziming Liu and Max Tegmark and Jacob Andreas},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=S5wmbQc1We}\n}", "github": "", "project": "", "reviewers": "DtSC;7M7r;HMiA;wANB;ZmSF", "pdf_size": 10585362, "rating": "7;7;7;7;8", "confidence": "3;4;3;4;4", "soundness": "3;4;4;3;4", "novelty": "3;3;4;3;3", "presentation": "4;2;4;3;4", "wc_summary": "92;163;127;64;92", "wc_strengths": "42;99;67;23;54", "wc_weaknesses": "46;691;52;26;64", "wc_questions": "304;194;31;538;29", "wc_limitations": "5;8;8;24;34", "wc_review": "489;1155;285;675;273", "wc_reply_reviewers": "21;94;12;46;9", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.2, 0.39999999999999997 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 107.6, 34.15611219093883 ], "wc_strengths_avg": [ 57.0, 25.51078203426935 ], "wc_weaknesses_avg": [ 175.8, 257.8933112742554 ], "wc_questions_avg": [ 219.2, 190.2917759652266 ], "wc_limitations_avg": [ 15.8, 11.285388783732706 ], "wc_review_avg": [ 575.4, 325.2996157390906 ], "wc_reply_reviewers_avg": [ 36.4, 31.601265797432863 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 88, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7297086462971835972&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "mit.edu;mit.edu;mit.edu;microsoft.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://web.mit.edu;https://www.microsoft.com", "aff_unique_abbr": "MIT;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A3FL: Adversarially Adaptive Backdoor Attacks to Federated Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71628", "id": "S6ajVZy6FA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c07d71ff0bc042e4b9acd626a79597fa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=S6ajVZy6FA", "openreview": "https://openreview.net/forum?id=S6ajVZy6FA", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71628", "video": "https://nips.cc/virtual/2023/poster/71628", "author_site": "Hangfan Zhang, Jinyuan Jia, Jinghui Chen, Lu Lin, Dinghao Wu", "tldr": "", "abstract": "Federated Learning (FL) is a distributed machine learning paradigm that allows multiple clients to train a global model collaboratively without sharing their local training data. Due to its distributed nature, many studies have shown that it is vulnerable to backdoor attacks. However, existing studies usually used a predetermined, fixed backdoor trigger or optimized it based solely on the local data and model without considering the global training dynamics. This leads to sub-optimal and less durable attack effectiveness, i.e., their attack success rate is low when the attack budget is limited and decreases quickly if the attacker can no longer perform attacks anymore. To address these limitations, we propose A3FL, a new backdoor attack which adversarially adapts the backdoor trigger to make it less likely to be removed by the global training dynamics. Our key intuition is that the difference between the global model and the local model in FL makes the local-optimized trigger much less effective when transferred to the global model. We solve this by optimizing the trigger to even survive the worst-case scenario where the global model was trained to directly unlearn the trigger. Extensive experiments on benchmark datasets are conducted for twelve existing defenses to comprehensively evaluate the effectiveness of our A3FL. Our code is available at https://github.com/hfzhang31/A3FL.", "keywords": "Backdoor Attack;Federated Learning", "primary_area": "", "supplementary_material": "/attachment/4324dc58a01f62c9b43c6e904248b88640f720ed.zip", "author": "Hangfan Zhang;Jinyuan Jia;Jinghui Chen;Lu Lin;Dinghao Wu", "authorids": "~Hangfan_Zhang1;~Jinyuan_Jia2;~Jinghui_Chen1;~Lu_Lin2;~Dinghao_Wu1", "gender": "Not Specified;;M;F;", "homepage": ";https://jinyuan-jia.github.io/;https://jinghuichen.github.io/;https://louise-lulin.github.io;", "dblp": "266/8368.html;24/5124-1.html;67/5633;86/2209-1;", "google_scholar": ";iyg4ytkAAAAJ;mKia7Y4AAAAJ;8N04pBgAAAAJ;", "orcid": ";0000-0002-9785-7769;;0000-0002-2539-3352;", "linkedin": ";;;lulin92/;", "or_profile": "~Hangfan_Zhang1;~Jinyuan_Jia2;~Jinghui_Chen1;~Lu_Lin2;~Dinghao_Wu1", "aff": "Pennsylvania State University;University of Illinois Urbana-Champaign;Pennsylvania State University;Pennsylvania State University;", "aff_domain": "psu.edu;cs.illinois.edu;psu.edu;psu.edu;", "position": "PhD student;Postdoc;Assistant Professor;Assistant Professor;", "bibtex": "@inproceedings{\nzhang2023afl,\ntitle={A3{FL}: Adversarially Adaptive Backdoor Attacks to Federated Learning},\nauthor={Hangfan Zhang and Jinyuan Jia and Jinghui Chen and Lu Lin and Dinghao Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=S6ajVZy6FA}\n}", "github": "", "project": "", "reviewers": "k1k8;BFfe;i2mP;iLct", "pdf_size": 1324772, "rating": "4;5;7;7", "confidence": "5;4;4;2", "soundness": "2;3;4;3", "novelty": "2;2;4;3", "presentation": "2;3;3;3", "wc_summary": "81;115;116;67", "wc_strengths": "30;44;152;40", "wc_weaknesses": "794;112;49;35", "wc_questions": "7;216;19;6", "wc_limitations": "10;249;33;5", "wc_review": "922;736;369;153", "wc_reply_reviewers": "86;30;77;42", "wc_reply_authors": "629;26;61;19", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.75, 21.33512362279628 ], "wc_strengths_avg": [ 66.5, 49.62610200287748 ], "wc_weaknesses_avg": [ 247.5, 316.85209483290464 ], "wc_questions_avg": [ 62.0, 89.05896922825909 ], "wc_limitations_avg": [ 74.25, 101.44302588152622 ], "wc_review_avg": [ 545.0, 301.35112410608326 ], "wc_reply_reviewers_avg": [ 58.75, 23.35995505132662 ], "wc_reply_authors_avg": [ 183.75, 257.55715385133453 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7505683356701914, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9044957325615864903&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "psu.edu;cs.illinois.edu;psu.edu;psu.edu;", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Pennsylvania State University;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://www.psu.edu;https://illinois.edu", "aff_unique_abbr": "PSU;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Posterior Sampling for Competitive RL: Function Approximation and Partial Observation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71627", "id": "S75ccNdOYG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/545a674417b8c4bcae96eceffad1c4f0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=S75ccNdOYG", "openreview": "https://openreview.net/forum?id=S75ccNdOYG", "poster": "/media/PosterPDFs/NeurIPS%202023/71627.png?t=1701602365.4494953", "slides": "https://nips.cc/virtual/2023/poster/71627", "video": "https://nips.cc/virtual/2023/poster/71627", "author_site": "Shuang Qiu, Ziyu Dai, Han Zhong, Zhaoran Wang, Zhuoran Yang, Tong Zhang", "tldr": "", "abstract": "This paper investigates posterior sampling algorithms for competitive reinforcement learning (RL) in the context of general function approximations. Focusing on zero-sum Markov games (MGs) under two critical settings, namely self-play and adversarial learning, we first propose the self-play and adversarial generalized eluder coefficient (GEC) as complexity measures for function approximation, capturing the exploration-exploitation trade-off in MGs. Based on self-play GEC, we propose a model-based self-play posterior sampling method to control both players to learn Nash equilibrium, which can successfully handle the partial observability of states. Furthermore, we identify a set of partially observable MG models fitting MG learning with the adversarial policies of the opponent. Incorporating the adversarial GEC, we propose a model-based posterior sampling method for learning adversarial MG with potential partial observability. We further provide low regret bounds for proposed algorithms that can scale sublinearly with the proposed GEC and the number of episodes $T$. To the best of our knowledge, we for the first time develop generic model-based posterior sampling algorithms for competitive RL that can be applied to a majority of tractable zero-sum MG classes in both fully observable and partially observable MGs with self-play and adversarial learning.", "keywords": "Markov game;Partial observation;Function approximation;Posterior sampling;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/f9e7af9bb2d733301cf6b178c76b343bcc495c7c.pdf", "author": "Shuang Qiu;Ziyu Dai;Han Zhong;Zhaoran Wang;Zhuoran Yang;Tong Zhang", "authorids": "~Shuang_Qiu2;~Ziyu_Dai1;~Han_Zhong1;~Zhaoran_Wang1;~Zhuoran_Yang1;~Tong_Zhang2", "gender": "M;M;;Not Specified;M;M", "homepage": "https://shq-ml.github.io/;;https://hanzhong-ml.github.io/;https://zhaoranwang.github.io/;https://zhuoranyang.github.io/;http://tongzhang-ml.org", "dblp": ";;137/8096.html;117/2756;;07/4227-1", "google_scholar": "-Z7fY00AAAAJ;;Bk5q_pAAAAAJ;https://scholar.google.com.tw/citations?user=HSx0BgQAAAAJ;;LurWtuYAAAAJ", "orcid": ";;;;;0000-0002-5511-2558", "linkedin": ";https://www.linkedin.cn/incareer/in/ACoAADX8U_UBF9LJp3CFnGp6mOTiok_6nF150uM;;;;", "or_profile": "~Shuang_Qiu2;~Ziyu_Dai1;~Han_Zhong1;~Zhaoran_Wang1;~Zhuoran_Yang1;~Tong_Zhang2", "aff": ";Fudan University;Peking University;;Yale University;Hong Kong University of Science and Technology", "aff_domain": ";fudan.edu.cn;stu.pku.edu.cn;;yale.edu;ust.hk", "position": ";Undergrad student;PhD student;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nqiu2023posterior,\ntitle={Posterior Sampling for Competitive {RL}: Function Approximation and Partial Observation},\nauthor={Shuang Qiu and Ziyu Dai and Han Zhong and Zhaoran Wang and Zhuoran Yang and Tong Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=S75ccNdOYG}\n}", "github": "", "project": "", "reviewers": "qywY;pkbQ;Gu3i;DqQm", "pdf_size": 613928, "rating": "5;5;6;7", "confidence": "2;1;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "62;49;62;68", "wc_strengths": "19;31;50;49", "wc_weaknesses": "193;13;38;98", "wc_questions": "35;28;6;1", "wc_limitations": "9;1;1;1", "wc_review": "318;122;157;217", "wc_reply_reviewers": "23;39;9;16", "wc_reply_authors": "37;222;27;27", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 60.25, 6.94172168845741 ], "wc_strengths_avg": [ 37.25, 12.968712349342937 ], "wc_weaknesses_avg": [ 85.5, 69.32712311931024 ], "wc_questions_avg": [ 17.5, 14.326548781894402 ], "wc_limitations_avg": [ 3.0, 3.4641016151377544 ], "wc_review_avg": [ 203.5, 74.32529851941396 ], "wc_reply_reviewers_avg": [ 21.75, 11.121488209767612 ], "wc_reply_authors_avg": [ 78.25, 83.09444927310128 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9438798074485388, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5204336303934098945&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": ";fudan.edu.cn;stu.pku.edu.cn;;yale.edu;ust.hk", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Fudan University;Peking University;Yale University;Hong Kong University of Science and Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.fudan.edu.cn;http://www.pku.edu.cn;https://www.yale.edu;https://www.ust.hk", "aff_unique_abbr": "Fudan;Peking U;Yale;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Learning Nonparametric Latent Causal Graphs with Unknown Interventions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71626", "id": "S8DFqgmEbe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bdeab378efe6eb289714e2a5abc6ed42-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=S8DFqgmEbe", "openreview": "https://openreview.net/forum?id=S8DFqgmEbe", "poster": "/media/PosterPDFs/NeurIPS%202023/71626.png?t=1701408324.1467106", "slides": "https://nips.cc/virtual/2023/poster/71626", "video": "https://nips.cc/virtual/2023/poster/71626", "author_site": "Yibo Jiang, Bryon Aragam", "tldr": "", "abstract": "We establish conditions under which latent causal graphs are nonparametrically identifiable and can be reconstructed from unknown interventions in the latent space. Our primary focus is the identification of the latent structure in measurement models without parametric assumptions such as linearity or Gaussianity. Moreover, we do not assume the number of hidden variables is known, and we show that at most one unknown intervention per hidden variable is needed. This extends a recent line of work on learning causal representations from observations and interventions. The proofs are constructive and introduce two new graphical concepts---_imaginary subsets_ and _isolated edges_---that may be useful in their own right. As a matter of independent interest, the proofs also involve a novel characterization of the limits of edge orientations within the equivalence class of DAGs induced by _unknown_ interventions. These are the first results to characterize the conditions under which causal representations are identifiable without making any parametric assumptions in a general setting with unknown interventions and without faithfulness.", "keywords": "graphical models;directed acyclic graphs;causality;identifiability;causal representation learning;unknown interventions", "primary_area": "", "supplementary_material": "", "author": "Yibo Jiang;Bryon Aragam", "authorids": "~Yibo_Jiang2;~Bryon_Aragam1", "gender": "M;", "homepage": ";http://bryonaragam.com/", "dblp": "54/2193;140/7564", "google_scholar": "hvQo2gQAAAAJ;u-W3_9QAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Yibo_Jiang2;~Bryon_Aragam1", "aff": "University of Chicago;Booth School of Business", "aff_domain": "uchicago.edu;chicagobooth.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\njiang2023learning,\ntitle={Learning Nonparametric Latent Causal Graphs with Unknown Interventions},\nauthor={Yibo Jiang and Bryon Aragam},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=S8DFqgmEbe}\n}", "github": "", "project": "", "reviewers": "bZuh;tfcU;Yp1n;3v5g;ZNv1", "pdf_size": 514110, "rating": "4;5;6;6;6", "confidence": "2;3;4;4;1", "soundness": "3;3;3;2;3", "novelty": "2;2;3;3;3", "presentation": "2;4;3;3;2", "wc_summary": "68;165;69;123;128", "wc_strengths": "68;112;19;55;34", "wc_weaknesses": "416;385;345;99;83", "wc_questions": "121;215;563;32;36", "wc_limitations": "1;29;1;8;7", "wc_review": "674;906;997;317;288", "wc_reply_reviewers": "0;33;443;99;21", "wc_reply_authors": "26;22;713;87;15", "reply_reviewers": "0;1;2;2;1", "reply_authors": "2;2;3;3;2", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 2.8, 1.16619037896906 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 110.6, 37.31273241133648 ], "wc_strengths_avg": [ 57.6, 32.00374978029918 ], "wc_weaknesses_avg": [ 265.6, 144.41551163223428 ], "wc_questions_avg": [ 193.4, 196.5386475988883 ], "wc_limitations_avg": [ 9.2, 10.322790320451153 ], "wc_review_avg": [ 636.4, 292.41381636304396 ], "wc_reply_reviewers_avg": [ 119.2, 165.24938729084593 ], "wc_reply_authors_avg": [ 172.6, 271.4292541344798 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.30012252399939054, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13727669265290582856&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "uchicago.edu;chicagobooth.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Chicago;University of Chicago Booth School of Business", "aff_unique_dep": ";Booth School of Business", "aff_unique_url": "https://www.uchicago.edu;https://www.chicagobooth.edu", "aff_unique_abbr": "UChicago;Booth", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "$p$-Poisson surface reconstruction in curl-free flow from point clouds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71625", "id": "S8hg5LpFvz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bd18189308a4c45c7d71ca83acf3deaa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=S8hg5LpFvz", "openreview": "https://openreview.net/forum?id=S8hg5LpFvz", "poster": "/media/PosterPDFs/NeurIPS%202023/71625.png?t=1702075703.996896", "slides": "https://nips.cc/virtual/2023/poster/71625", "video": "https://nips.cc/virtual/2023/poster/71625", "author_site": "Yesom Park, Taekyung Lee, Jooyoung Hahn, Myungjoo Kang", "tldr": "", "abstract": "The aim of this paper is the reconstruction of a smooth surface from an unorganized point cloud sampled by a closed surface, with the preservation of geometric shapes, without any further information other than the point cloud. Implicit neural representations (INRs) have recently emerged as a promising approach to surface reconstruction. However, the reconstruction quality of existing methods relies on ground truth implicit function values or surface normal vectors. In this paper, we show that proper supervision of partial differential equations and fundamental properties of differential vector fields are sufficient to robustly reconstruct high-quality surfaces. We cast the $p$-Poisson equation to learn a signed distance function (SDF) and the reconstructed surface is implicitly represented by the zero-level set of the SDF. For efficient training, we develop a variable splitting structure by introducing a gradient of the SDF as an auxiliary variable and impose the $p$-Poisson equation directly on the auxiliary variable as a hard constraint. Based on the curl-free property of the gradient field, we impose a curl-free constraint on the auxiliary variable, which leads to a more faithful reconstruction. Experiments on standard benchmark datasets show that the proposed INR provides a superior and robust reconstruction. The code is available at https://github.com/Yebbi/PINC.", "keywords": "Surface reconstruction;Signed distance function;Implicit neural representations;Point cloud", "primary_area": "", "supplementary_material": "/attachment/525b1678bce86dc3a24697fe5dd3666366957b68.zip", "author": "Yesom Park;Taekyung Lee;Jooyoung Hahn;Myungjoo Kang", "authorids": "~Yesom_Park1;~Taekyung_Lee2;~Jooyoung_Hahn1;~Myungjoo_Kang1", "gender": "F;;;", "homepage": ";https://www.math.sk/hahn/;http://ncia.snu.ac.kr/;https://github.com/LEETK99", "dblp": "213/0699;43/2681;64/5657.html;", "google_scholar": "https://scholar.google.com/citations?hl=ko;4w2vNhcAAAAJ;;", "orcid": ";0000-0003-4357-1009;;", "linkedin": ";jooyoung-hahn-252b5233/;;", "or_profile": "~Yesom_Park1;~Jooyoung_Hahn1;~Myungjoo_Kang1;~TAEKYUNG_LEE1", "aff": "Seoul National University;Slovak University of Technology in Bratislava;Seoul National University;SNU", "aff_domain": "snu.ac.kr;stuba.sk;snu.ac.kr;gsai.snu.ac.kr", "position": "PhD student;Researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\npark2023ppoisson,\ntitle={\\$p\\$-Poisson surface reconstruction in curl-free flow from point clouds},\nauthor={Yesom Park and Taekyung Lee and Jooyoung Hahn and Myungjoo Kang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=S8hg5LpFvz}\n}", "github": "", "project": "", "reviewers": "WkvV;ymPA;rqMu;2pnE;FaEi", "pdf_size": 8247320, "rating": "5;6;6;7;7", "confidence": "5;2;4;5;4", "soundness": "1;3;3;2;3", "novelty": "3;3;3;3;3", "presentation": "1;3;2;3;3", "wc_summary": "88;64;71;59;98", "wc_strengths": "28;92;36;13;60", "wc_weaknesses": "71;50;271;47;28", "wc_questions": "90;3;200;261;22", "wc_limitations": "21;7;15;1;5", "wc_review": "298;216;593;381;213", "wc_reply_reviewers": "112;18;253;49;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 4.0, 1.0954451150103321 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 76.0, 14.737706741552431 ], "wc_strengths_avg": [ 45.8, 27.657910260900042 ], "wc_weaknesses_avg": [ 93.4, 89.84119322448917 ], "wc_questions_avg": [ 115.2, 100.3581586120431 ], "wc_limitations_avg": [ 9.8, 7.222188034107115 ], "wc_review_avg": [ 340.2, 140.68319018276492 ], "wc_reply_reviewers_avg": [ 86.4, 91.60043668018183 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4896477453074270657&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "snu.ac.kr;stuba.sk;snu.ac.kr;gsai.snu.ac.kr", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Seoul National University;Slovak University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.stuba.sk", "aff_unique_abbr": "SNU;STU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Bratislava;Seoul", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "South Korea;Slovakia" }, { "title": "Category-Extensible Out-of-Distribution Detection via Hierarchical Context Descriptions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71624", "id": "SA2KrosYjY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/695b6f9490d27d852e439e35c56e73e3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SA2KrosYjY", "openreview": "https://openreview.net/forum?id=SA2KrosYjY", "poster": "/media/PosterPDFs/NeurIPS%202023/71624.png?t=1699842978.2862012", "slides": "https://nips.cc/virtual/2023/poster/71624", "video": "https://nips.cc/virtual/2023/poster/71624", "author_site": "Kai Liu, Zhihang Fu, Chao Chen, Sheng Jin, Ze Chen, Mingyuan Tao, Rongxin Jiang, Jieping Ye", "tldr": "", "abstract": "The key to OOD detection has two aspects: generalized feature representation and precise category description. Recently, vision-language models such as CLIP provide significant advances in both two issues, but constructing precise category descriptions is still in its infancy due to the absence of unseen categories. This work introduces two hierarchical contexts, namely perceptual context and spurious context, to carefully describe the precise category boundary through automatic prompt tuning. Specifically, perceptual contexts perceive the inter-category difference (e.g., cats vs apples) for current classification tasks, while spurious contexts further identify spurious (similar but exactly not) OOD samples for every single category (e.g., cats vs panthers, apples vs peaches). The two contexts hierarchically construct the precise description for a certain category, which is, first roughly classifying a sample to the predicted category and then delicately identifying whether it is truly an ID sample or actually OOD. Moreover, the precise descriptions for those categories within the vision-language framework present a novel application: CATegory-EXtensible OOD detection (CATEX). One can efficiently extend the set of recognizable categories by simply merging the hierarchical contexts learned under different sub-task settings. And extensive experiments are conducted to demonstrate CATEX\u2019s effectiveness, robustness, and category-extensibility. For instance, CATEX consistently surpasses the rivals by a large margin with several protocols on the challenging ImageNet-1K dataset. In addition, we offer new insights on how to efficiently scale up the prompt engineering in vision-language models to recognize thousands of object categories, as well as how to incorporate large language models (like GPT-3) to boost zero-shot applications.", "keywords": "out-of-distribution detection;vision-language models;category-extendable classification", "primary_area": "", "supplementary_material": "/attachment/7cf8988fc364d4941185d7e863b6a3049bee8627.pdf", "author": "Kai Liu;Zhihang Fu;Chao Chen;Sheng Jin;Ze Chen;Mingyuan Tao;Rongxin Jiang;Jieping Ye", "authorids": "~Kai_Liu8;~Zhihang_Fu1;~Chao_Chen19;~Sheng_Jin3;~Ze_Chen3;~Mingyuan_Tao1;~Rongxin_Jiang1;~Jieping_Ye4", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://kail8.github.io/;https://zhihangfu.top/;https://chaochen.cc/;;;;https://person.zju.edu.cn/0008430;http://yelabs.net/", "dblp": ";207/1894;66/3019-26.html;70/6780-2;15/4184-1;289/5997;10/2064-1;03/5454", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;e_e3Ur0AAAAJ;https://scholar.google.com.hk/citations?user=_xDUAtQAAAAJ;https://scholar.google.com/citations?view_op=list_works;hVqKZq0AAAAJ;https://scholar.google.com/citations?hl=en;;T9AzhwcAAAAJ", "orcid": ";;;0000-0001-7254-1664;;;;0000-0001-8662-5818", "linkedin": ";;;;;;;", "or_profile": "~Kai_Liu8;~Zhihang_Fu1;~Chao_Chen19;~Sheng_Jin3;~Ze_Chen3;~Mingyuan_Tao1;~Rongxin_Jiang1;~Jieping_Ye4", "aff": "Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;;Zhejiang University;Alibaba DAMO Academy", "aff_domain": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;;zju.edu.cn;alibaba-inc.com", "position": "Intern;Researcher;Researcher;Researcher;Researcher;;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nliu2023categoryextensible,\ntitle={Category-Extensible Out-of-Distribution Detection via Hierarchical Context Descriptions},\nauthor={Kai Liu and Zhihang Fu and Chao Chen and Sheng Jin and Ze Chen and Mingyuan Tao and Rongxin Jiang and Jieping Ye},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SA2KrosYjY}\n}", "github": "", "project": "", "reviewers": "194m;R97w;oU43;rNwN;ufZG", "pdf_size": 2693253, "rating": "6;6;6;6;6", "confidence": "3;3;5;4;3", "soundness": "2;2;3;3;2", "novelty": "3;2;3;2;3", "presentation": "3;2;4;2;2", "wc_summary": "78;24;65;49;55", "wc_strengths": "106;35;42;28;91", "wc_weaknesses": "458;106;71;129;116", "wc_questions": "198;6;12;8;45", "wc_limitations": "73;22;8;1;5", "wc_review": "913;193;198;215;312", "wc_reply_reviewers": "118;28;34;0;0", "wc_reply_authors": "473;34;46;0;0", "reply_reviewers": "1;1;1;0;0", "reply_authors": "2;2;2;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 54.2, 18.015548839821673 ], "wc_strengths_avg": [ 60.4, 31.777979797337654 ], "wc_weaknesses_avg": [ 176.0, 142.30811642348442 ], "wc_questions_avg": [ 53.8, 73.48578093753919 ], "wc_limitations_avg": [ 21.8, 26.55861442169 ], "wc_review_avg": [ 366.2, 276.79552019496265 ], "wc_reply_reviewers_avg": [ 36.0, 43.32204981299938 ], "wc_reply_authors_avg": [ 110.6, 182.1203997359988 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15121906445165100331&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;;zju.edu.cn;alibaba-inc.com", "author_num": 8, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Alibaba Group;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.zju.edu.cn", "aff_unique_abbr": "Alibaba;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Towards Self-Interpretable Graph-Level Anomaly Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71623", "id": "SAzaC8f3cM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1c6f06863df46de009a7a41b41c95cad-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SAzaC8f3cM", "openreview": "https://openreview.net/forum?id=SAzaC8f3cM", "poster": "/media/PosterPDFs/NeurIPS%202023/71623.png?t=1701389958.9541261", "slides": "https://nips.cc/virtual/2023/poster/71623", "video": "https://nips.cc/virtual/2023/poster/71623", "author_site": "Yixin Liu, Kaize Ding, Qinghua Lu, Fuyi Li, Leo Yu Zhang, Shirui Pan", "tldr": "", "abstract": "Graph-level anomaly detection (GLAD) aims to identify graphs that exhibit notable dissimilarity compared to the majority in a collection. However, current works primarily focus on evaluating graph-level abnormality while failing to provide meaningful explanations for the predictions, which largely limits their reliability and application scope. In this paper, we investigate a new challenging problem, explainable GLAD, where the learning objective is to predict the abnormality of each graph sample with corresponding explanations, i.e., the vital subgraph that leads to the predictions. To address this challenging problem, we propose a Self-Interpretable Graph aNomaly dETection model (SIGNET for short) that detects anomalous graphs as well as generates informative explanations simultaneously. Specifically, we first introduce the multi-view subgraph information bottleneck (MSIB) framework, serving as the design basis of our self-interpretable GLAD approach. This way SIGNET is able to not only measure the abnormality of each graph based on cross-view mutual information but also provide informative graph rationales by extracting bottleneck subgraphs from the input graph and its dual hypergraph in a self-supervised way. Extensive experiments on 16 datasets demonstrate the anomaly detection capability and self-interpretability of SIGNET.", "keywords": "Anomaly Detection;Graph Neural Networks;Explanation;Self-Interpretation", "primary_area": "", "supplementary_material": "/attachment/b030ea00ed934526a38e60b2a0d4ec9df10fc07f.pdf", "author": "Yixin Liu;Kaize Ding;Qinghua Lu;Fuyi Li;Leo Yu Zhang;Shirui Pan", "authorids": "~Yixin_Liu3;~Kaize_Ding1;~Qinghua_Lu1;~Fuyi_Li1;~Leo_Yu_Zhang1;~Shirui_Pan1", "gender": "M;M;F;;M;", "homepage": "https://yixinliu233.giuhub.io;https://kaize0409.github.io/;https://people.csiro.au/L/Q/Qinghua-Lu;;https://leozhangcs.github.io/;", "dblp": "140/7348-1;234/6878;19/9978;;117/3526;91/8171", "google_scholar": ";PI3myr8AAAAJ;;;https://scholar.google.com.hk/citations?user=JK21OM0AAAAJ;https://scholar.google.com.au/citations?user=frWRJN4AAAAJ", "orcid": ";;;;0000-0001-9330-2662;0000-0003-0794-527X", "linkedin": ";;;;;", "or_profile": "~Yixin_Liu3;~Kaize_Ding1;~Qinghua_Lu1;~Fuyi_Li1;~Leo_Yu_Zhang1;~Shirui_Pan1", "aff": "Monash University;Arizona State University;China University of Petroleum;;Griffith University;Griffith University", "aff_domain": "monash.edu;asu.edu;upc.edu.cn;;griffith.edu.au;griffith.edu.au", "position": "PhD student;PhD student;Associate Professor;;Researcher;Full Professor", "bibtex": "@inproceedings{\nliu2023towards,\ntitle={Towards Self-Interpretable Graph-Level Anomaly Detection},\nauthor={Yixin Liu and Kaize Ding and Qinghua Lu and Fuyi Li and Leo Yu Zhang and Shirui Pan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SAzaC8f3cM}\n}", "github": "", "project": "", "reviewers": "QzdR;yuBa;vtQs;6qZe", "pdf_size": 999390, "rating": "5;5;6;6", "confidence": "3;4;5;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;4;4", "wc_summary": "102;63;82;62", "wc_strengths": "53;57;107;62", "wc_weaknesses": "132;231;307;158", "wc_questions": "419;83;118;58", "wc_limitations": "1;21;55;19", "wc_review": "707;455;669;359", "wc_reply_reviewers": "11;42;26;644", "wc_reply_authors": "767;76;23;1249", "reply_reviewers": "1;1;1;2", "reply_authors": "3;2;2;4", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 77.25, 16.361158271956175 ], "wc_strengths_avg": [ 69.75, 21.741377601246892 ], "wc_weaknesses_avg": [ 207.0, 68.19457456425694 ], "wc_questions_avg": [ 169.5, 145.61679161415418 ], "wc_limitations_avg": [ 24.0, 19.519221295943137 ], "wc_review_avg": [ 547.5, 145.1645617910928 ], "wc_reply_reviewers_avg": [ 180.75, 267.68206421051076 ], "wc_reply_authors_avg": [ 528.75, 508.99134324662145 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12853047871155391162&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "monash.edu;asu.edu;upc.edu.cn;;griffith.edu.au;griffith.edu.au", "author_num": 6, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Monash University;Arizona State University;China University of Petroleum;Griffith University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.monash.edu;https://www.asu.edu;http://www.cup.edu.cn;https://www.griffith.edu.au", "aff_unique_abbr": "Monash;ASU;CUP;Griffith", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "Australia;United States;China" }, { "title": "Structure Learning with Adaptive Random Neighborhood Informed MCMC", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71622", "id": "SCsJFNcSHQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8027ace571384361920665f1d1b69758-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SCsJFNcSHQ", "openreview": "https://openreview.net/forum?id=SCsJFNcSHQ", "poster": "/media/PosterPDFs/NeurIPS%202023/71622.png?t=1698839052.1187763", "slides": "https://nips.cc/virtual/2023/poster/71622", "video": "https://nips.cc/virtual/2023/poster/71622", "author_site": "Xitong Liang, Alberto Caron, Samuel Livingstone, Jim Griffin", "tldr": "", "abstract": "In this paper, we introduce a novel MCMC sampler, PARNI-DAG, for a fully-Bayesian approach to the problem of structure learning under observational data. Under the assumption of causal sufficiency, the algorithm allows for approximate sampling directly from the posterior distribution on Directed Acyclic Graphs (DAGs). PARNI-DAG performs efficient sampling of DAGs via locally informed, adaptive random neighborhood proposal that results in better mixing properties. In addition, to ensure better scalability with the number of nodes, we couple PARNI-DAG with a pre-tuning procedure of the sampler's parameters that exploits a skeleton graph derived through some constraint-based or scoring-based algorithms. Thanks to these novel features, PARNI-DAG quickly converges to high-probability regions and is less likely to get stuck in local modes in the presence of high correlation between nodes in high-dimensional settings. After introducing the technical novelties in PARNI-DAG, we empirically demonstrate its mixing efficiency and accuracy in learning DAG structures on a variety of experiments.", "keywords": "Bayesian Networks;structure MCMC on graphs;Structure Learning;Random neighborhood samplers;Locally informed Metropolis-Hastings schemes", "primary_area": "", "supplementary_material": "/attachment/bd6392f94ea904fb56bafd4f961ded4004cce9b5.zip", "author": "Xitong Liang;Alberto Caron;Samuel Livingstone;Jim Griffin", "authorids": "~Xitong_Liang1;~Alberto_Caron1;~Samuel_Livingstone1;~Jim_Griffin1", "gender": "M;M;M;M", "homepage": "https://xitongliang.github.io/;https://albicaron.github.io/;https://samueljlivingstone.wixsite.com/webpage;", "dblp": ";274/2818;146/5226;", "google_scholar": "https://scholar.google.com/citations?hl=en;Gq_jO8cAAAAJ;HK9uPpIAAAAJ;https://scholar.google.co.uk/citations?user=vPUFsJcAAAAJ", "orcid": "0000-0002-2434-1841;;0000-0002-7277-086X;", "linkedin": "xitong-liang-a59716188/?originalSubdomain=uk;;;", "or_profile": "~Xitong_Liang1;~Alberto_Caron1;~Samuel_Livingstone1;~Jim_Griffin1", "aff": "University College London, University of London;University College London, University of London;University College London, University of London;University College London, University of London", "aff_domain": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk", "position": "PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nliang2023structure,\ntitle={Structure Learning with Adaptive Random Neighborhood Informed {MCMC}},\nauthor={Xitong Liang and Alberto Caron and Samuel Livingstone and Jim Griffin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SCsJFNcSHQ}\n}", "github": "", "project": "", "reviewers": "1zPb;xD8f;PZBP", "pdf_size": 865606, "rating": "4;5;7", "confidence": "2;4;4", "soundness": "3;3;4", "novelty": "1;2;3", "presentation": "2;3;4", "wc_summary": "87;161;199", "wc_strengths": "29;211;90", "wc_weaknesses": "217;300;61", "wc_questions": "2;9;483", "wc_limitations": "6;25;21", "wc_review": "341;706;854", "wc_reply_reviewers": "157;27;92", "wc_reply_authors": "202;23;74", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 149.0, 46.50448007092076 ], "wc_strengths_avg": [ 110.0, 75.6350888587213 ], "wc_weaknesses_avg": [ 192.66666666666666, 99.07685008225798 ], "wc_questions_avg": [ 164.66666666666666, 225.11379838255635 ], "wc_limitations_avg": [ 17.333333333333332, 8.178562764256865 ], "wc_review_avg": [ 633.6666666666666, 215.58653843771313 ], "wc_reply_reviewers_avg": [ 92.0, 53.07227776030219 ], "wc_reply_authors_avg": [ 99.66666666666667, 75.2964511484808 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7559289460184546, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7062346034201995332&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University College London", "aff_unique_dep": "", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "DISCO-10M: A Large-Scale Music Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73591", "id": "SDJ3kYpJFX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aa7ef4c0f4aaabf376088a1a74e09d4c-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=SDJ3kYpJFX", "openreview": "https://openreview.net/forum?id=SDJ3kYpJFX", "poster": "/media/PosterPDFs/NeurIPS%202023/73591.png?t=1701518323.979779", "slides": "https://nips.cc/virtual/2023/poster/73591", "video": "https://nips.cc/virtual/2023/poster/73591", "author_site": "Luca Lanzend\u00f6rfer, Florian Gr\u00f6tschla, Emil Funke, Roger Wattenhofer", "tldr": "", "abstract": "Music datasets play a crucial role in advancing research in machine learning for music. However, existing music datasets suffer from limited size, accessibility, and lack of audio resources. To address these shortcomings, we present DISCO-10M, a novel and extensive music dataset that surpasses the largest previously available music dataset by an order of magnitude. To ensure high-quality data, we implement a multi-stage filtering process. This process incorporates similarities based on textual descriptions and audio embeddings. Moreover, we provide precomputed CLAP embeddings alongside DISCO-10M, facilitating direct application on various downstream tasks. These embeddings enable efficient exploration of machine learning applications on the provided data. With DISCO-10M, we aim to democratize and facilitate new research to help advance the development of novel machine learning models for music: https://huggingface.co/DISCOX", "keywords": "music;dataset;machine learning;embedding", "primary_area": "", "supplementary_material": "/attachment/9193e7229de5351c255cfd3a60ac262a633f7492.pdf", "author": "Luca A Lanzend\u00f6rfer;Florian Gr\u00f6tschla;Emil Funke;Roger Wattenhofer", "authorids": "~Luca_A_Lanzend\u00f6rfer1;~Florian_Gr\u00f6tschla1;~Emil_Funke2;~Roger_Wattenhofer1", "gender": "M;M;;Not Specified", "homepage": ";https://disco.ethz.ch/members/fgroetschla;;https://disco.ethz.ch/members/wroger", "dblp": ";334/1811;;w/RogerWattenhofer", "google_scholar": ";;;https://scholar.google.ch/citations?user=EG3VPm4AAAAJ", "orcid": ";;;", "linkedin": "luca-lanzendoerfer/;;emil-funke-762a6b164/?originalSubdomain=ch;roger-wattenhofer-4466731/", "or_profile": "~Luca_A_Lanzend\u00f6rfer1;~Florian_Gr\u00f6tschla1;~Emil_Funke2;~Roger_Wattenhofer1", "aff": "ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "PhD student;PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nlanzend{\\\"o}rfer2023discom,\ntitle={{DISCO}-10M: A Large-Scale Music Dataset},\nauthor={Luca A Lanzend{\\\"o}rfer and Florian Gr{\\\"o}tschla and Emil Funke and Roger Wattenhofer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=SDJ3kYpJFX}\n}", "github": "", "project": "", "reviewers": "pF6S;fHP3;LrKY;28sH", "pdf_size": 14945232, "rating": "5;5;7;7", "confidence": "3;5;5;5", "wc_summary_and_contributions": "42;61;71;51", "wc_strengths": "47;62;100;23", "wc_improvement": "66;58;78;2", "wc_limitations": "34;42;1;36", "wc_correctness": "178;9;1;5", "wc_clarity": "11;6;5;2", "wc_relation_to_prior_work": "8;6;7;3", "wc_documentation": "95;7;1;2", "wc_additional_feedback": "1;1;1;1", "wc_review": "482;252;265;125", "wc_reply_reviewers": "23;116;0;0", "wc_reply_authors": "779;667;463;164", "reply_reviewers": "1;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "wc_summary_and_contributions_avg": [ 56.25, 10.848386976873567 ], "wc_strengths_avg": [ 58.0, 27.955321496988727 ], "wc_improvement_avg": [ 51.0, 29.171904291629644 ], "wc_limitations_avg": [ 28.25, 16.005858302509115 ], "wc_correctness_avg": [ 48.25, 74.96457496711363 ], "wc_clarity_avg": [ 6.0, 3.24037034920393 ], "wc_relation_to_prior_work_avg": [ 6.0, 1.8708286933869707 ], "wc_documentation_avg": [ 26.25, 39.757860857948586 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 281.0, 128.29068555433008 ], "wc_reply_reviewers_avg": [ 34.75, 47.840228887412316 ], "wc_reply_authors_avg": [ 518.25, 233.80694493534617 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3460597260276427910&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Nearly Optimal VC-Dimension and Pseudo-Dimension Bounds for Deep Neural Network Derivatives", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71621", "id": "SE73LzWNjr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/449a016a6ce6fba3fe50d05482abf836-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SE73LzWNjr", "openreview": "https://openreview.net/forum?id=SE73LzWNjr", "poster": "/media/PosterPDFs/NeurIPS%202023/71621.png?t=1697119507.0072124", "slides": "https://nips.cc/virtual/2023/poster/71621", "video": "https://nips.cc/virtual/2023/poster/71621", "author_site": "Yahong Yang, Haizhao Yang, Yang Xiang", "tldr": "", "abstract": "This paper addresses the problem of nearly optimal Vapnik--Chervonenkis dimension (VC-dimension) and pseudo-dimension estimations of the derivative functions of deep neural networks (DNNs). Two important applications of these estimations include: 1) Establishing a nearly tight approximation result of DNNs in the Sobolev space; 2) Characterizing the generalization error of machine learning methods with loss functions involving function derivatives. This theoretical investigation fills the gap of learning error estimations for a wide range of physics-informed machine learning models and applications including generative models, solving partial differential equations, operator learning, network compression, distillation, regularization, etc.", "keywords": "VC-dimension;pseudo-dimension;Sobolev space;generalization error;nearly optimal approximation", "primary_area": "", "supplementary_material": "/attachment/b789b19454bbe3f7082940e2e8b90a1028f8ec89.pdf", "author": "Yahong Yang;Haizhao Yang;Yang Xiang", "authorids": "~Yahong_Yang1;~Haizhao_Yang1;~Yang_Xiang3", "gender": "M;M;", "homepage": ";https://haizhaoyang.github.io;", "dblp": ";139/1215;", "google_scholar": ";p4mxTIwAAAAJ;", "orcid": "0000-0002-9721-2362;;", "linkedin": ";;", "or_profile": "~Yahong_Yang1;~Haizhao_Yang1;~Yang_Xiang3", "aff": "Hong Kong University of Science and Technology;University of Maryland, College Park;", "aff_domain": "ust.hk;umd.edu;", "position": "PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nyang2023nearly,\ntitle={Nearly Optimal {VC}-Dimension and Pseudo-Dimension Bounds for Deep Neural Network Derivatives},\nauthor={Yahong Yang and Haizhao Yang and Yang Xiang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SE73LzWNjr}\n}", "github": "", "project": "", "reviewers": "MY91;dSYn;vGRi;2REw", "pdf_size": 806343, "rating": "6;6;7;7", "confidence": "2;4;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;3;4;4", "wc_summary": "26;60;147;93", "wc_strengths": "15;180;121;88", "wc_weaknesses": "122;255;64;53", "wc_questions": "121;9;52;239", "wc_limitations": "7;2;1;2", "wc_review": "291;506;385;475", "wc_reply_reviewers": "12;105;13;114", "wc_reply_authors": "8;40;8;85", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 81.5, 44.62342434193055 ], "wc_strengths_avg": [ 101.0, 59.594462830031446 ], "wc_weaknesses_avg": [ 123.5, 80.31967380411851 ], "wc_questions_avg": [ 105.25, 86.94358803270083 ], "wc_limitations_avg": [ 3.0, 2.345207879911715 ], "wc_review_avg": [ 414.25, 83.896886116232 ], "wc_reply_reviewers_avg": [ 61.0, 48.605555238058955 ], "wc_reply_authors_avg": [ 35.25, 31.554516317002864 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15885802933162119937&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "ust.hk;umd.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;University of Maryland", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www/umd.edu", "aff_unique_abbr": "HKUST;UMD", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Hong Kong SAR;College Park", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "title": "SARAMIS: Simulation Assets for Robotic Assisted and Minimally Invasive Surgery", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73590", "id": "SEU9m9NReo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/52e78a95d8baa6d082fb2d0e9499b661-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=SEU9m9NReo", "openreview": "https://openreview.net/forum?id=SEU9m9NReo", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73590", "video": "https://nips.cc/virtual/2023/poster/73590", "author_site": "Nina Montana-Brown, Shaheer U. Saeed, Ahmed Abdulaal, Thomas Dowrick, Yakup Kilic, Sophie Wilkinson, Jack Gao, Meghavi Mashar, Chloe He, Alkisti Stavropoulou, Emma Thomson, Zachary MC Baum, Simone Foti, Brian Davidson, Yipeng Hu, Matthew Clarkson", "tldr": "", "abstract": "Minimally-invasive surgery (MIS) and robot-assisted minimally invasive (RAMIS) surgery offer well-documented benefits to patients such as reduced post-operative pain and shorter hospital stays.\nHowever, the automation of MIS and RAMIS through the use of AI has been slow due to difficulties in data acquisition and curation, partially caused by the ethical considerations of training, testing and deploying AI models in medical environments.\nWe introduce \\texttt{SARAMIS}, the first large-scale dataset of anatomically derived 3D rendering assets of the human abdominal anatomy.\nUsing previously existing, open-source CT datasets of the human anatomy, we derive novel 3D meshes, tetrahedral volumes, textures and diffuse maps for over 104 different anatomical targets in the human body, representing the largest, open-source dataset of 3D rendering assets for synthetic simulation of vision tasks in MIS+RAMIS, increasing the availability of openly available 3D meshes in the literature by three orders of magnitude.\nWe supplement our dataset with a series of GPU-enabled rendering environments, which can be used to generate datasets for realistic MIS/RAMIS tasks.\nFinally, we present an example of the use of \\texttt{SARAMIS} assets for an autonomous navigation task in colonoscopy from CT abdomen-pelvis scans for the first time in the literature.\n\\texttt{SARAMIS} is publically made available at https://github.com/NMontanaBrown/saramis/, with assets released under a CC-BY-NC-SA license.", "keywords": "Minimally-invasive surgery;simulation;reinforcement learning;computer vision;synthetic data", "primary_area": "", "supplementary_material": "/attachment/337a6963d09cdc139c1d94058aacd180c9379c91.pdf", "author": "Nina Montana-Brown;Shaheer U. Saeed;Ahmed Abdulaal;Thomas Dowrick;Yakup Kilic;Sophie Wilkinson;Jack Gao;Meghavi Mashar;Chloe He;Alkisti Stavropoulou;Emma L Thomson;Zachary Baum;Simone Foti;Brian Davidson;Yipeng Hu;Matthew John Clarkson", "authorids": "~Nina_Montana-Brown1;~Shaheer_U._Saeed1;~Ahmed_Abdulaal1;~Thomas_Dowrick1;~Yakup_Kilic1;~Sophie_Wilkinson1;~Jack_Gao1;meghavi@hotmail.co.uk;~Chloe_He2;alkisti.stavropoulou.16@ucl.ac.uk;~Emma_L_Thomson1;~Zachary_Baum1;~Simone_Foti1;b.davidson@ucl.ac.uk;~Yipeng_Hu1;~Matthew_John_Clarkson1", "gender": "F;M;M;;M;F;Not Specified;;;;F;M;M;;M;M", "homepage": "https://scholar.google.com/citations?user=lCbkLx4AAAAJ&hl=en;;;https://www.ucl.ac.uk/tdowrick;https://www.linkedin.com/in/yakup-kilic-mrcp-uk-a121a63a;;https://www.linkedin.com/in/jack-gao-488337172/;;;;;https://zacbaum.github.io/;https://www.simofoti.com/;;;https://iris.ucl.ac.uk/iris/browse/profile?upi=MJCLA42", "dblp": "274/3185.html;270/0077;;;;;;;;;;213/3129.html;246/7113;;45/5086.html;79/140", "google_scholar": "lCbkLx4AAAAJ;mnABWkIAAAAJ;;;;;;;;;https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=eOYKsdsAAAAJ;BuWPfNsAAAAJ;;https://scholar.google.co.uk/citations?user=_jYXK0IAAAAJ;https://scholar.google.co.uk/citations?user=bE6rIJEAAAAJ", "orcid": ";;0000-0002-3536-4803;;;0009-0003-9993-411X;;;;;;0000-0001-6838-335X;;;;0000-0002-5565-1252", "linkedin": ";;;;yakup-kilic-mrcp-uk-a121a63a;;;;;;;zacharybaum/;simone-foti/;;;matt-clarkson-9b77a51/", "or_profile": "~Nina_Montana-Brown1;~Shaheer_U._Saeed1;~Ahmed_Abdulaal1;~Thomas_Dowrick1;~Yakup_Kilic1;~Sophie_Wilkinson1;~Jack_Gao1;meghavi@hotmail.co.uk;~Chloe_He2;alkisti.stavropoulou.16@ucl.ac.uk;~Emma_L_Thomson1;~Zachary_Baum1;~Simone_Foti1;b.davidson@ucl.ac.uk;~Yipeng_Hu1;~Matthew_John_Clarkson1", "aff": "University College London, University of London;University College London, University of London;University College London, University of London;University College London, University of London;University College London, University of London;University College London, University of London;;;;;University College London, University of London;University College London, University of London;University College London;;University of Oxford;University College London", "aff_domain": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;;;;;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;;ox.ac.uk;ucl.ac.uk", "position": "PhD student;Researcher;PhD student;Researcher;Intern;Intern;;;;;PhD student;PhD student;PhD student;;Researcher;Full Professor", "bibtex": "@inproceedings{\nmontana-brown2023saramis,\ntitle={{SARAMIS}: Simulation Assets for Robotic Assisted and Minimally Invasive Surgery},\nauthor={Nina Montana-Brown and Shaheer U. Saeed and Ahmed Abdulaal and Thomas Dowrick and Yakup Kilic and Sophie Wilkinson and Jack Gao and Meghavi Mashar and Chloe He and Alkisti Stavropoulou and Emma L Thomson and Zachary Baum and Simone Foti and Brian Davidson and Yipeng Hu and Matthew John Clarkson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=SEU9m9NReo}\n}", "github": "", "project": "", "reviewers": "PbKg;zAJD;Mims;1urX", "pdf_size": 2582671, "rating": "2;7;7;8", "confidence": "5;4;4;4", "wc_summary_and_contributions": "27;84;91;150", "wc_strengths": "20;65;93;104", "wc_improvement": "290;978;287;137", "wc_limitations": "36;31;50;2", "wc_correctness": "4;45;10;2", "wc_clarity": "16;18;12;2", "wc_relation_to_prior_work": "9;37;5;3", "wc_documentation": "7;63;39;29", "wc_additional_feedback": "1;1;1;1", "wc_review": "410;1322;588;430", "wc_reply_reviewers": "528;0;105;18", "wc_reply_authors": "4365;4990;1314;686", "reply_reviewers": "1;0;1;1", "reply_authors": "9;9;2;1", "rating_avg": [ 6.0, 2.345207879911715 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 88.0, 43.560303029249006 ], "wc_strengths_avg": [ 70.5, 32.438403166617185 ], "wc_improvement_avg": [ 423.0, 326.3456756263211 ], "wc_limitations_avg": [ 29.75, 17.469616481193857 ], "wc_correctness_avg": [ 15.25, 17.426631917843448 ], "wc_clarity_avg": [ 12.0, 6.164414002968976 ], "wc_relation_to_prior_work_avg": [ 13.5, 13.738631664034086 ], "wc_documentation_avg": [ 34.5, 20.11839953873071 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 687.5, 372.76098240025067 ], "wc_reply_reviewers_avg": [ 162.75, 214.58258899547278 ], "wc_reply_authors_avg": [ 2838.75, 1865.2419916729305 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 5.25, 3.766629793329841 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": -0.9847319278346619, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6885283995543273812&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;;;;;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;;ox.ac.uk;ucl.ac.uk", "author_num": 16, "aff_unique_index": "0;0;0;0;0;0;0;0;0;1;0", "aff_unique_norm": "University College London;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://www.ox.ac.uk", "aff_unique_abbr": "UCL;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Graph Convolutional Kernel Machine versus Graph Convolutional Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71620", "id": "SFfOt1oDsX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3ec6c6fc9065aa57785eb05dffe7c3db-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SFfOt1oDsX", "openreview": "https://openreview.net/forum?id=SFfOt1oDsX", "poster": "/media/PosterPDFs/NeurIPS%202023/71620.png?t=1700203864.993772", "slides": "https://nips.cc/virtual/2023/poster/71620", "video": "https://nips.cc/virtual/2023/poster/71620", "author_site": "Zhihao Wu, Zhao Zhang, Jicong Fan", "tldr": "", "abstract": "Graph convolutional networks (GCN) with one or two hidden layers have been widely used in handling graph data that are prevalent in various disciplines. Many studies showed that the gain of making GCNs deeper is tiny or even negative. This implies that the complexity of graph data is often limited and shallow models are often sufficient to extract expressive features for various tasks such as node classification. Therefore, in this work, we present a framework called graph convolutional kernel machine (GCKM) for graph-based machine learning. GCKMs are built upon kernel functions integrated with graph convolution. An example is the graph convolutional kernel support vector machine (GCKSVM) for node classification, for which we analyze the generalization error bound and discuss the impact of the graph structure. Compared to GCNs, GCKMs require much less effort in architecture design, hyperparameter tuning, and optimization. More importantly, GCKMs are guaranteed to obtain globally optimal solutions and have strong generalization ability and high interpretability. GCKMs are composable, can be extended to large-scale data, and are applicable to various tasks (e.g., node or graph classification, clustering, feature extraction, dimensionality reduction). The numerical results on benchmark datasets show that, besides the aforementioned advantages, GCKMs have at least competitive accuracy compared to GCNs.", "keywords": "graph neural network;kernel method", "primary_area": "", "supplementary_material": "", "author": "Zhihao Wu;Zhao Zhang;Jicong Fan", "authorids": "~Zhihao_Wu4;~Zhao_Zhang3;~Jicong_Fan2", "gender": "M;M;M", "homepage": "https://zhihaowu99.github.io/;http://www.escience.cn/people/cszzhang;https://jicongfan.github.io/", "dblp": "27/8792-3;87/6853-1;139/1570", "google_scholar": "QDlGhPsAAAAJ;h6SCUNwAAAAJ;vdJsnhIAAAAJ", "orcid": "0000-0001-5835-9903;0000-0002-5703-7969;0000-0001-9665-0355", "linkedin": ";;", "or_profile": "~Zhihao_Wu4;~Zhao_Zhang3;~Jicong_Fan2", "aff": "The Chinese University of Hong Kong, Shenzhen;Hefei University of Technology;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;hfut.edu;cuhk.edu.cn", "position": "Intern;Full Professor;Research Assistant Professor", "bibtex": "@inproceedings{\nwu2023graph,\ntitle={Graph Convolutional Kernel Machine versus Graph Convolutional Networks},\nauthor={Zhihao Wu and Zhao Zhang and Jicong Fan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SFfOt1oDsX}\n}", "github": "", "project": "", "reviewers": "YFNo;nd8y;dP1e;PZjL", "pdf_size": 4749178, "rating": "6;6;6;7", "confidence": "4;4;3;5", "soundness": "2;4;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "38;43;98;59", "wc_strengths": "40;62;75;96", "wc_weaknesses": "193;233;130;53", "wc_questions": "74;50;39;174", "wc_limitations": "11;5;1;1", "wc_review": "356;393;343;383", "wc_reply_reviewers": "12;132;10;20", "wc_reply_authors": "12;519;7;9", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 59.5, 23.542514733987108 ], "wc_strengths_avg": [ 68.25, 20.327014045353536 ], "wc_weaknesses_avg": [ 152.25, 68.05650226098899 ], "wc_questions_avg": [ 84.25, 53.340299024283695 ], "wc_limitations_avg": [ 4.5, 4.092676385936225 ], "wc_review_avg": [ 368.75, 20.104414938017968 ], "wc_reply_reviewers_avg": [ 43.5, 51.232314021523564 ], "wc_reply_authors_avg": [ 136.75, 220.6993146794978 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8265369257895586725&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cuhk.edu.cn;hfut.edu;cuhk.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Hefei University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;http://www.hfut.edu.cn/", "aff_unique_abbr": "CUHK;HUT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Collaborative Learning via Prediction Consensus", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71619", "id": "SGKbHXoLCI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/065e259a1d2d955e63b99aac6a3a3081-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SGKbHXoLCI", "openreview": "https://openreview.net/forum?id=SGKbHXoLCI", "poster": "/media/PosterPDFs/NeurIPS%202023/71619.png?t=1701942260.5290744", "slides": "https://nips.cc/virtual/2023/poster/71619", "video": "https://nips.cc/virtual/2023/poster/71619", "author_site": "Dongyang Fan, Celestine Mendler-D\u00fcnner, Martin Jaggi", "tldr": "", "abstract": "We consider a collaborative learning setting where the goal of each agent is to improve their own model by leveraging the expertise of collaborators, in addition to their own training data. To facilitate the exchange of expertise among agents, we propose a distillation-based method leveraging shared unlabeled auxiliary data, which is pseudo-labeled by the collective. Central to our method is a trust weighting scheme that serves to adaptively weigh the influence of each collaborator on the pseudo-labels until a consensus on how to label the auxiliary data is reached. We demonstrate empirically that our collaboration scheme is able to significantly boost individual models\u2019 performance in the target domain from which the auxiliary data is sampled. At the same time, it can provably mitigate the negative impact of bad models on the collective. By design, our method adeptly accommodates heterogeneity in model architectures and substantially reduces communication overhead compared to typical collaborative learning methods.", "keywords": "Collaborative training;decentralized learning;consensus reaching", "primary_area": "", "supplementary_material": "/attachment/bd0b59c00ee940f98c5986c65ed7bba5efb25346.pdf", "author": "Dongyang Fan;Celestine Mendler-D\u00fcnner;Martin Jaggi", "authorids": "~Dongyang_Fan2;~Celestine_Mendler-D\u00fcnner1;~Martin_Jaggi1", "gender": "F;;M", "homepage": ";http://celestine.ai/;https://mlo.epfl.ch", "dblp": ";176/5511;17/4402", "google_scholar": ";UqtDdZUAAAAJ;https://scholar.google.ch/citations?user=r1TJBr8AAAAJ", "orcid": ";;0000-0003-1579-5558", "linkedin": "fannnndy/;;", "or_profile": "~Dongyang_Fan2;~Celestine_Mendler-D\u00fcnner1;~Martin_Jaggi1", "aff": "EPFL - EPF Lausanne;Max Planck Institute for Intelligent Systems;EPFL", "aff_domain": "epfl.ch;tuebingen.mpg.de;epfl.ch", "position": "PhD student;Group Lead;Associate Professor", "bibtex": "@inproceedings{\nfan2023collaborative,\ntitle={Collaborative Learning via Prediction Consensus},\nauthor={Dongyang Fan and Celestine Mendler-D{\\\"u}nner and Martin Jaggi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SGKbHXoLCI}\n}", "github": "", "project": "", "reviewers": "Bri9;vDF7;TUxA;u6rQ", "pdf_size": 1429668, "rating": "5;5;6;7", "confidence": "3;3;4;2", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;4;2;3", "wc_summary": "108;141;93;86", "wc_strengths": "104;73;115;60", "wc_weaknesses": "148;107;121;61", "wc_questions": "3;68;118;29", "wc_limitations": "1;15;80;1", "wc_review": "364;404;527;237", "wc_reply_reviewers": "16;0;47;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 107.0, 21.17781858454737 ], "wc_strengths_avg": [ 88.0, 22.327113561766108 ], "wc_weaknesses_avg": [ 109.25, 31.514877439076294 ], "wc_questions_avg": [ 54.5, 43.35031718453742 ], "wc_limitations_avg": [ 24.25, 32.69078616368839 ], "wc_review_avg": [ 383.0, 103.50603847119258 ], "wc_reply_reviewers_avg": [ 15.75, 19.188212527486765 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11235251777702992666&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "epfl.ch;tuebingen.mpg.de;epfl.ch", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "EPFL;Max Planck Institute for Intelligent Systems", "aff_unique_dep": ";Intelligent Systems", "aff_unique_url": "https://www.epfl.ch;https://www.mpi-is.mpg.de", "aff_unique_abbr": "EPFL;MPI-IS", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Switzerland;Germany" }, { "title": "Tracking Most Significant Shifts in Nonparametric Contextual Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71618", "id": "SGerL9HMrp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/13b501c58ae3bfe9635a259f4414e943-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SGerL9HMrp", "openreview": "https://openreview.net/forum?id=SGerL9HMrp", "poster": "/media/PosterPDFs/NeurIPS%202023/71618.png?t=1701661686.421093", "slides": "https://nips.cc/virtual/2023/poster/71618", "video": "https://nips.cc/virtual/2023/poster/71618", "author_site": "Joe Suk, Samory Kpotufe", "tldr": "", "abstract": "We study nonparametric contextual bandits where Lipschitz mean reward functions may change over time.\nWe first establish the minimax dynamic regret rate in this less understood setting in terms of number of changes $L$ and total-variation $V$, both capturing all changes in distribution over context space, and argue that state-of-the-art procedures are suboptimal in this setting.\n\nNext, we tend to the question of an _adaptivity_ for this setting, i.e. achieving the minimax rate without knowledge of $L$ or $V$. Quite importantly, we posit that the bandit problem, viewed locally at a given context $X_t$, should not be affected by reward changes in other parts of context space $\\cal X$. We therefore propose a notion of _change_, which we term _experienced significant shifts_, that better accounts for locality, and thus counts considerably less changes than $L$ and $V$. Furthermore, similar to recent work on non-stationary MAB (Suk & Kpotufe, 2022), _experienced significant shifts_ only count the most _significant_ changes in mean rewards, e.g., severe best-arm changes relevant to observed contexts.\n\nOur main result is to show that this more tolerant notion of change can in fact be adapted to.", "keywords": "multi-armed bandits;non-stationary;contextual bandits;nonparametric;Lipschitz", "primary_area": "", "supplementary_material": "/attachment/06c7794a9438583dd1e14a04960ccfd29129936d.pdf", "author": "Joe Suk;Samory Kpotufe", "authorids": "~Joe_Suk1;~Samory_Kpotufe3", "gender": "Not Specified;M", "homepage": "https://www.columbia.edu/~js5338/;http://www.columbia.edu/~skk2175/", "dblp": "271/0068;", "google_scholar": "https://scholar.google.com/citations?hl=en;9r7_pN8AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Joe_Suk1;~Samory_Kpotufe3", "aff": "Columbia University;Columbia University", "aff_domain": "columbia.edu;columbia.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nsuk2023tracking,\ntitle={Tracking Most Significant Shifts in Nonparametric Contextual Bandits},\nauthor={Joe Suk and Samory Kpotufe},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SGerL9HMrp}\n}", "github": "", "project": "", "reviewers": "PUca;aF2K;o8kG;GBqh", "pdf_size": 578712, "rating": "4;7;7;8", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "novelty": "3;4;3;4", "presentation": "2;3;3;4", "wc_summary": "93;69;86;59", "wc_strengths": "21;85;39;57", "wc_weaknesses": "149;76;139;42", "wc_questions": "111;112;166;55", "wc_limitations": "1;1;1;1", "wc_review": "375;343;431;214", "wc_reply_reviewers": "0;0;0;5", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 76.75, 13.460590625971804 ], "wc_strengths_avg": [ 50.5, 23.637893307145628 ], "wc_weaknesses_avg": [ 101.5, 44.30857704779064 ], "wc_questions_avg": [ 111.0, 39.24920381358073 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 340.75, 79.6692381035491 ], "wc_reply_reviewers_avg": [ 1.25, 2.165063509461097 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8323743474026866294&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "columbia.edu;columbia.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Concept Algebra for (Score-Based) Text-Controlled Generative Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71617", "id": "SGlrCuwdsB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6f125214c86439d107ccb58e549e828f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SGlrCuwdsB", "openreview": "https://openreview.net/forum?id=SGlrCuwdsB", "poster": "/media/PosterPDFs/NeurIPS%202023/71617.png?t=1701376145.6539693", "slides": "https://nips.cc/virtual/2023/poster/71617", "video": "https://nips.cc/virtual/2023/poster/71617", "author_site": "Zihao Wang, Lin Gui, Jeffrey Negrea, Victor Veitch", "tldr": "", "abstract": "This paper concerns the structure of learned representations in text-guided generative models, focusing on score-based models. A key property of such models is that they can compose disparate concepts in a 'disentangled' manner.This suggests these models have internal representations that encode concepts in a 'disentangled' manner. Here, we focus on the idea that concepts are encoded as subspaces of some representation space. We formalize what this means, show there's a natural choice for the representation, and develop a simple method for identifying the part of the representation corresponding to a given concept. In particular, this allows us to manipulate the concepts expressed by the model through algebraic manipulation of the representation. We demonstrate the idea with examples using Stable Diffusion.", "keywords": "disentanglement; representation learning; text-controlled generative models; diffusion models", "primary_area": "", "supplementary_material": "/attachment/7aa56b05e8e715ec766b0214f56c2cedb575ba35.zip", "author": "Zihao Wang;Lin Gui;Jeffrey Negrea;Victor Veitch", "authorids": "~Zihao_Wang8;~Lin_Gui5;~Jeffrey_Negrea1;~Victor_Veitch1", "gender": ";F;M;", "homepage": ";;http://utstat.toronto.edu/~negrea/;http://victorveitch.com", "dblp": "https://dblp.org/rec/journals/corr/abs-2105-13440;;252/5787.html;167/5650", "google_scholar": "jyBHUM8AAAAJ;88eaL8UAAAAJ;https://scholar.google.ca/citations?user=woSzLBMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-4665-3571;", "linkedin": "https://www.linkedin.com/mwlite/in/zihao-wang-2b1645123;;;", "or_profile": "~Zihao_Wang8;~Lin_Gui5;~Jeffrey_Negrea1;~Victor_Veitch1", "aff": "University of Chicago;University of Chicago;University of Waterloo;Google", "aff_domain": "uchicago.edu;uchicago.edu;uwaterloo.ca;google.com", "position": "PhD student;PhD student;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nwang2023concept,\ntitle={Concept Algebra for (Score-Based) Text-Controlled Generative Models},\nauthor={Zihao Wang and Lin Gui and Jeffrey Negrea and Victor Veitch},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SGlrCuwdsB}\n}", "github": "", "project": "", "reviewers": "WFH5;mXTP;RGiq;pK6W;1z2h", "pdf_size": 20784844, "rating": "3;6;6;6;9", "confidence": "4;4;4;3;4", "soundness": "2;3;3;3;4", "novelty": "1;4;2;3;4", "presentation": "3;4;3;2;3", "wc_summary": "51;116;95;155;73", "wc_strengths": "20;140;107;115;120", "wc_weaknesses": "414;111;145;147;17", "wc_questions": "24;137;20;84;3", "wc_limitations": "14;98;6;40;46", "wc_review": "523;602;373;541;259", "wc_reply_reviewers": "145;261;125;12;25", "wc_reply_authors": "158;597;168;31;16", "reply_reviewers": "1;2;2;1;1", "reply_authors": "3;2;4;2;2", "rating_avg": [ 6.0, 1.8973665961010275 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 1.16619037896906 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 98.0, 35.82178108358098 ], "wc_strengths_avg": [ 100.4, 41.64900959206593 ], "wc_weaknesses_avg": [ 166.8, 132.30933451574762 ], "wc_questions_avg": [ 53.6, 49.890279614369774 ], "wc_limitations_avg": [ 40.8, 32.33821269025238 ], "wc_review_avg": [ 459.6, 125.42982101557828 ], "wc_reply_reviewers_avg": [ 113.6, 90.57063541788806 ], "wc_reply_authors_avg": [ 194.0, 211.01374362822912 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2290205317679939059&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uchicago.edu;uchicago.edu;uwaterloo.ca;google.com", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Chicago;University of Waterloo;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.uchicago.edu;https://uwaterloo.ca;https://www.google.com", "aff_unique_abbr": "UChicago;UW;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Canada" }, { "title": "Contextual Stochastic Bilevel Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71616", "id": "SHBksHKutP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f77d9409647c096789067c09455858a2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SHBksHKutP", "openreview": "https://openreview.net/forum?id=SHBksHKutP", "poster": "/media/PosterPDFs/NeurIPS%202023/71616.png?t=1702407210.5034525", "slides": "https://nips.cc/virtual/2023/poster/71616", "video": "https://nips.cc/virtual/2023/poster/71616", "author_site": "Yifan Hu, Jie Wang, Yao Xie, Andreas Krause, Daniel Kuhn", "tldr": "", "abstract": "We introduce contextual stochastic bilevel optimization (CSBO) -- a stochastic bilevel optimization framework with the lower-level problem minimizing an expectation conditioned on some contextual information and the upper-level decision variable. This framework extends classical stochastic bilevel optimization when the lower-level decision maker responds optimally not only to the decision of the upper-level decision maker but also to some side information and when there are multiple or even infinite many followers. It captures important applications such as meta-learning, personalized federated learning, end-to-end learning, and Wasserstein distributionally robust optimization with side information (WDRO-SI). Due to the presence of contextual information, existing single-loop methods for classical stochastic bilevel optimization are unable to converge. To overcome this challenge, we introduce an efficient double-loop gradient method based on the Multilevel Monte-Carlo (MLMC) technique and establish its sample and computational complexities. When specialized to stochastic nonconvex optimization, our method matches existing lower bounds. For meta-learning, the complexity of our method does not depend on the number of tasks. Numerical experiments further validate our theoretical results.", "keywords": "stochastic optimization;bilevel optimization;contextual stochastic optimization;Multilevel Monte Carlo", "primary_area": "", "supplementary_material": "/attachment/c99d5c0bf7ac0b898e14707918736ab77de1393c.zip", "author": "Yifan Hu;Jie Wang;Yao Xie;Andreas Krause;Daniel Kuhn", "authorids": "~Yifan_Hu2;~Jie_Wang12;~Yao_Xie2;~Andreas_Krause1;~Daniel_Kuhn2", "gender": "M;M;F;M;", "homepage": "https://sites.google.com/view/yifan-hu;https://walterbabyrudin.github.io/;http://www2.isye.gatech.edu/~yxie77;https://las.inf.ethz.ch/krausea;https://people.epfl.ch/daniel.kuhn", "dblp": ";29/5259-49;13/4242-2;87/1831-1.html;35/479-1.html", "google_scholar": "rO2s0EEAAAAJ;PXk1frMAAAAJ;qvYp8ZQAAAAJ;https://scholar.google.ch/citations?user=eDHv58AAAAAJ;RqnXytkAAAAJ", "orcid": ";;;0000-0001-7260-9673;0000-0003-2697-8886", "linkedin": ";;yaoxie/;krausea/;", "or_profile": "~Yifan_Hu2;~Jie_Wang12;~Yao_Xie2;~Andreas_Krause1;~Daniel_D_Kuhn1", "aff": "ETHZ - ETH Zurich;Georgia Institute of Technology;Georgia Institute of Technology;ETH Zurich;Swiss Federal Institute of Technology Lausanne", "aff_domain": "inf.ethz.ch;gatech.edu;gatech.edu;ethz.ch;epfl.ch", "position": "Postdoc;PhD student;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhu2023contextual,\ntitle={Contextual Stochastic Bilevel Optimization},\nauthor={Yifan Hu and Jie Wang and Yao Xie and Andreas Krause and Daniel Kuhn},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SHBksHKutP}\n}", "github": "", "project": "", "reviewers": "DnyS;NrrF;Z7Go;BuTe", "pdf_size": 853342, "rating": "5;5;6;7", "confidence": "4;2;4;3", "soundness": "3;2;3;3", "novelty": "2;2;3;4", "presentation": "3;1;3;4", "wc_summary": "79;45;71;94", "wc_strengths": "56;54;79;72", "wc_weaknesses": "198;208;316;35", "wc_questions": "3;208;5;166", "wc_limitations": "3;1;1;1", "wc_review": "339;516;472;368", "wc_reply_reviewers": "21;15;38;61", "wc_reply_authors": "65;65;65;12", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 72.25, 17.76759691123141 ], "wc_strengths_avg": [ 65.25, 10.568230693924125 ], "wc_weaknesses_avg": [ 189.25, 100.35779740508457 ], "wc_questions_avg": [ 95.5, 92.69978425001862 ], "wc_limitations_avg": [ 1.5, 0.8660254037844386 ], "wc_review_avg": [ 423.75, 72.67865917860621 ], "wc_reply_reviewers_avg": [ 33.75, 17.851820635442202 ], "wc_reply_authors_avg": [ 51.75, 22.949673200287624 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18093075531153945682&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "inf.ethz.ch;gatech.edu;gatech.edu;ethz.ch;epfl.ch", "author_num": 5, "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "ETH Zurich;Georgia Institute of Technology;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ethz.ch;https://www.gatech.edu;https://www.epfl.ch", "aff_unique_abbr": "ETHZ;Georgia Tech;EPFL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "Switzerland;United States" }, { "title": "Hyperbolic Graph Neural Networks at Scale: A Meta Learning Approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71615", "id": "SHVwG9yOEk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8b6a8b010e9a266aad40a024c5976d5c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SHVwG9yOEk", "openreview": "https://openreview.net/forum?id=SHVwG9yOEk", "poster": "/media/PosterPDFs/NeurIPS%202023/71615.png?t=1701925789.658464", "slides": "https://nips.cc/virtual/2023/poster/71615", "video": "https://nips.cc/virtual/2023/poster/71615", "author_site": "Nurendra Choudhary, Nikhil Rao, Chandan Reddy", "tldr": "", "abstract": "The progress in hyperbolic neural networks (HNNs) research is hindered by their absence of inductive bias mechanisms, which are essential for generalizing to new tasks and facilitating scalable learning over large datasets. In this paper, we aim to alleviate these issues by learning generalizable inductive biases from the nodes\u2019 local subgraph and transfer them for faster learning over new subgraphs with a disjoint set of nodes, edges, and labels in a few-shot setting. We introduce a novel method, Hyperbolic GRAph Meta Learner (H-GRAM), that, for the tasks of node classification and link prediction, learns transferable information from a set of support local subgraphs in the form of hyperbolic meta gradients and label hyperbolic protonets to enable faster learning over a query set of new tasks dealing with disjoint subgraphs. Furthermore, we show that an extension of our meta-learning framework also mitigates the scalability challenges seen in HNNs faced by existing approaches. Our comparative analysis shows that H-GRAM effectively learns and transfers information in multiple challenging few-shot settings compared to other state-of-the-art baselines. Additionally, we demonstrate that, unlike standard HNNs, our approach is able to scale over large graph datasets and improve performance over its Euclidean counterparts.", "keywords": "meta learning;hyperbolic networks;scalability;graph neural networks", "primary_area": "", "supplementary_material": "/attachment/2e8e95df2cd9689868aa11b91ea228031eb2fd16.pdf", "author": "Nurendra Choudhary;Nikhil Rao;Chandan K. Reddy", "authorids": "~Nurendra_Choudhary1;~Nikhil_Rao1;~Chandan_K._Reddy1", "gender": "M;M;M", "homepage": "http://nurendra.me/;;https://creddy.net/", "dblp": "157/1351;57/9513.html;42/1341", "google_scholar": "https://scholar.google.co.in/citations?user=MWJfTQYAAAAJ;GhqD_rwAAAAJ;LoXnMOIAAAAJ", "orcid": "0000-0002-4471-8968;;", "linkedin": "nurendrachoudhary/;nikhil-rao-012068a1/;", "or_profile": "~Nurendra_Choudhary1;~Nikhil_Rao1;~Chandan_K._Reddy1", "aff": "Virginia Tech;Microsoft;Amazon", "aff_domain": "vt.edu;microsoft.com;amazon.com", "position": "PhD student;Principal Researcher;Amazon Scholar", "bibtex": "@inproceedings{\nchoudhary2023hyperbolic,\ntitle={Hyperbolic Graph Neural Networks at Scale: A Meta Learning Approach},\nauthor={Nurendra Choudhary and Nikhil Rao and Chandan K. Reddy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SHVwG9yOEk}\n}", "github": "", "project": "", "reviewers": "Ft7q;Urvd;qW7m;4N27;UWvg", "pdf_size": 3624662, "rating": "5;5;5;5;8", "confidence": "4;3;4;2;4", "soundness": "3;3;3;3;4", "novelty": "3;3;2;2;3", "presentation": "2;2;3;2;3", "wc_summary": "77;81;57;88;69", "wc_strengths": "67;82;43;47;87", "wc_weaknesses": "190;78;216;39;166", "wc_questions": "5;36;82;5;11", "wc_limitations": "30;11;2;6;31", "wc_review": "369;288;400;185;364", "wc_reply_reviewers": "0;0;0;0;42", "wc_reply_authors": "59;86;75;56;42", "reply_reviewers": "0;0;0;0;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.6, 1.2 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 74.4, 10.650821564555478 ], "wc_strengths_avg": [ 65.2, 17.80337046741431 ], "wc_weaknesses_avg": [ 137.8, 67.78318375526484 ], "wc_questions_avg": [ 27.8, 29.417001886664114 ], "wc_limitations_avg": [ 16.0, 12.181953866272849 ], "wc_review_avg": [ 321.2, 77.4322930049214 ], "wc_reply_reviewers_avg": [ 8.4, 16.8 ], "wc_reply_authors_avg": [ 63.6, 15.344054223053305 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.37499999999999994, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8935231157726623718&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "vt.edu;microsoft.com;amazon.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Virginia Tech;Microsoft;Amazon", "aff_unique_dep": ";Microsoft Corporation;Amazon.com, Inc.", "aff_unique_url": "https://www.vt.edu;https://www.microsoft.com;https://www.amazon.com", "aff_unique_abbr": "VT;Microsoft;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Unlocking Deterministic Robustness Certification on ImageNet", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71614", "id": "SHyVaWGTO4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/863da9d40547f1d1b18859519ce2dee4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SHyVaWGTO4", "openreview": "https://openreview.net/forum?id=SHyVaWGTO4", "poster": "/media/PosterPDFs/NeurIPS%202023/71614.png?t=1702078795.7414978", "slides": "https://nips.cc/virtual/2023/poster/71614", "video": "https://nips.cc/virtual/2023/poster/71614", "author_site": "Kai Hu, Andy Zou, Zifan Wang, Klas Leino, Matt Fredrikson", "tldr": "", "abstract": "Despite the promise of Lipschitz-based methods for provably-robust deep learning with deterministic guarantees, current state-of-the-art results are limited to feed-forward Convolutional Networks (ConvNets) on low-dimensional data, such as CIFAR-10. \nThis paper investigates strategies for expanding certifiably robust training to larger, deeper models.\nA key challenge in certifying deep networks is efficient calculation of the Lipschitz bound for residual blocks found in ResNet and ViT architectures.\nWe show that fast ways of bounding the Lipschitz constant for conventional ResNets are loose, and show how to address this by designing a new residual block, leading to the *Linear ResNet* (LiResNet) architecture.\nWe then introduce *Efficient Margin MAximization* (EMMA), a loss function that stabilizes robust training by penalizing worst-case adversarial examples from multiple classes simultaneously.\nTogether, these contributions yield new *state-of-the-art* robust accuracy on CIFAR-10/100 and Tiny-ImageNet under $\\ell_2$ perturbations.\nMoreover, for the first time, we are able to scale up fast deterministic robustness guarantees to ImageNet, demonstrating that this approach to robust learning can be applied to real-world applications.", "keywords": "adversarial robustness;ImageNet;Lipschitz-based certification;ResNet;adversarial examples;ML security", "primary_area": "", "supplementary_material": "/attachment/7cf0ef512361bb6ecdbee07297fbe2731f67c82e.zip", "author": "Kai Hu;Andy Zou;Zifan Wang;Klas Leino;Matt Fredrikson", "authorids": "~Kai_Hu2;~Andy_Zou1;~Zifan_Wang1;~Klas_Leino1;~Matt_Fredrikson1", "gender": "M;;M;M;M", "homepage": "https://github.com/hukkai;;https://www.zifanw.net;https://klas.leino.tech;https://cs.cmu.edu/~mfredrik", "dblp": ";274/2362;;;38/2612", "google_scholar": ";;HJOP3wMAAAAJ;;https://scholar.google.com.tw/citations?user=tMYCvLAAAAAJ", "orcid": ";;;;", "linkedin": ";andy-zou-09ba3616a/;zifan-wang-sail/;;", "or_profile": "~Kai_Hu2;~Andy_Zou1;~Zifan_Wang1;~Klas_Leino1;~Matt_Fredrikson1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Roam HQ;Carnegie Mellon University", "aff_domain": "cmu.edu;andrew.cmu.edu;cmu.edu;ro.am;cmu.edu", "position": "PhD student;PhD student;PhD student;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nhu2023unlocking,\ntitle={Unlocking Deterministic Robustness Certification on ImageNet},\nauthor={Kai Hu and Andy Zou and Zifan Wang and Klas Leino and Matt Fredrikson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SHyVaWGTO4}\n}", "github": "", "project": "", "reviewers": "diKy;jf6o;YimP;xiEp", "pdf_size": 414507, "rating": "4;5;6;7", "confidence": "3;4;5;4", "soundness": "3;2;2;3", "novelty": "2;3;2;3", "presentation": "3;3;2;4", "wc_summary": "69;39;88;53", "wc_strengths": "91;14;26;94", "wc_weaknesses": "60;280;880;86", "wc_questions": "42;80;149;34", "wc_limitations": "5;4;8;1", "wc_review": "267;417;1151;268", "wc_reply_reviewers": "352;16;385;37", "wc_reply_authors": "1190;9;97;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 62.25, 18.267115262131565 ], "wc_strengths_avg": [ 56.25, 36.51284020724764 ], "wc_weaknesses_avg": [ 326.5, 330.6762011394228 ], "wc_questions_avg": [ 76.25, 45.4553352204117 ], "wc_limitations_avg": [ 4.5, 2.5 ], "wc_review_avg": [ 525.75, 366.1115779376555 ], "wc_reply_reviewers_avg": [ 197.5, 171.5582991288967 ], "wc_reply_authors_avg": [ 324.0, 501.4194850621583 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6324555320336758, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7222223713198277113&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cmu.edu;andrew.cmu.edu;cmu.edu;ro.am;cmu.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Carnegie Mellon University;Roam", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.roamhq.com", "aff_unique_abbr": "CMU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Adversarial Robustness through Random Weight Sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71613", "id": "SIE9N5nnHg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/766f407b7b4a82135da23b32f0cbaff3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SIE9N5nnHg", "openreview": "https://openreview.net/forum?id=SIE9N5nnHg", "poster": "/media/PosterPDFs/NeurIPS%202023/71613.png?t=1699614259.8750648", "slides": "https://nips.cc/virtual/2023/poster/71613", "video": "https://nips.cc/virtual/2023/poster/71613", "author_site": "Yanxiang Ma, Minjing Dong, Chang Xu", "tldr": "", "abstract": "Deep neural networks have been found to be vulnerable in a variety of tasks. Adversarial attacks can manipulate network outputs, resulting in incorrect predictions. Adversarial defense methods aim to improve the adversarial robustness of networks by countering potential attacks. In addition to traditional defense approaches, randomized defense mechanisms have recently received increasing attention from researchers. These methods introduce different types of perturbations during the inference phase to destabilize adversarial attacks.\nAlthough promising empirical results have been demonstrated by these approaches, the defense performance is quite sensitive to the randomness parameters, which are always manually tuned without further analysis. On the contrary, we propose incorporating random weights into the optimization to fully exploit the potential of randomized defense. To perform better optimization of randomness parameters, we conduct a theoretical analysis of the connections between randomness parameters and gradient similarity as well as natural performance. From these two aspects, we suggest imposing theoretically-guided constraints on random weights during optimizations, as these weights play a critical role in balancing natural performance and adversarial robustness. We derive both the upper and lower bounds of random weight parameters by considering prediction bias and gradient similarity. In this study, we introduce the Constrained Trainable Random Weight (CTRW), which adds random weight parameters to the optimization and includes a constraint guided by the upper and lower bounds to achieve better trade-offs between natural and robust accuracy. We evaluate the effectiveness of CTRW on several datasets and benchmark convolutional neural networks. Our results indicate that our model achieves a robust accuracy approximately 16% to 17% higher than the baseline model under PGD-20 and 22% to 25% higher on Auto Attack.", "keywords": "Adversarial robustness; Randomized defense; Random parameters optimization", "primary_area": "", "supplementary_material": "/attachment/ab61d14a19aeda003804d8178ed18614811b0110.pdf", "author": "Yanxiang Ma;Minjing Dong;Chang Xu", "authorids": "~Yanxiang_Ma1;~Minjing_Dong1;~Chang_Xu4", "gender": "M;M;", "homepage": ";https://www.cs.cityu.edu.hk/~minjdong/;https://sydney.edu.au/engineering/about/our-people/academic-staff/c-xu.html", "dblp": ";246/2900.html;97/2966-2", "google_scholar": ";https://scholar.google.com.au/citations?user=gJJRqlsAAAAJ;N4F_3eoAAAAJ", "orcid": "0000-0002-3109-6944;0009-0003-1717-818X;0000-0002-4756-0609", "linkedin": ";;", "or_profile": "~Yanxiang_Ma1;~Minjing_Dong1;~Charles_Xu1", "aff": "University of Sydney, University of Sydney;University of Sydney;University of Sydney", "aff_domain": "uni.sydney.edu.au;sydney.edu.au;sydney.edu.au", "position": "MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nma2023adversarial,\ntitle={Adversarial Robustness through Random Weight Sampling},\nauthor={Yanxiang Ma and Minjing Dong and Chang Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SIE9N5nnHg}\n}", "github": "", "project": "", "reviewers": "1Bpz;nrzD;8U3B;kSR2", "pdf_size": 461998, "rating": "3;6;7;8", "confidence": "2;2;4;4", "soundness": "2;4;3;3", "novelty": "2;4;3;3", "presentation": "1;4;3;3", "wc_summary": "200;51;108;55", "wc_strengths": "32;43;36;123", "wc_weaknesses": "600;113;57;31", "wc_questions": "47;31;26;4", "wc_limitations": "1;1;11;42", "wc_review": "880;239;238;255", "wc_reply_reviewers": "436;0;42;21", "wc_reply_authors": "1251;32;81;96", "reply_reviewers": "4;0;1;1", "reply_authors": "7;2;3;3", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 103.5, 60.08535595301071 ], "wc_strengths_avg": [ 58.5, 37.446628686705566 ], "wc_weaknesses_avg": [ 200.25, 232.69011044735012 ], "wc_questions_avg": [ 27.0, 15.378556499229699 ], "wc_limitations_avg": [ 13.75, 16.813313177360374 ], "wc_review_avg": [ 403.0, 275.47867431073496 ], "wc_reply_reviewers_avg": [ 124.75, 180.31274913327675 ], "wc_reply_authors_avg": [ 365.0, 512.0795836586341 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 3.75, 1.920286436967152 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8017837257372733, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5371632276822533404&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uni.sydney.edu.au;sydney.edu.au;sydney.edu.au", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Sydney", "aff_unique_dep": "", "aff_unique_url": "https://www.sydney.edu.au", "aff_unique_abbr": "USYD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "id": "SJw4Da8BuR", "title": "Transformer Compression via Subspace Projection", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose TCSP, a novel method for compressing a transformer model by focusing on reducing the hidden size of the model. By projecting the whole transform model into a subspace, we enable matrix operations between the weight matrices in the model and features in a reduced-dimensional space, leading to significant reductions in model parameters and computing resources. To establish this subspace, we decompose the feature matrix, derived from different layers of sampled data instances, into a projection matrix. \nFor evaluation, TCSP is applied to compress T5 and BERT models on the GLUE and SQuAD benchmarks. Experimental results demonstrate that TCSP achieves a compression ratio of 44\\% with at most 1.6\\% degradation in accuracy, surpassing or matching prior compression methods. Furthermore, TCSP exhibits compatibility with other methods targeting filter and attention head size compression.", "keywords": "low-rank factorization;model compression;transformer", "primary_area": "", "supplementary_material": "/attachment/abf92706751f06ffa2b259b8615015527c407cac.zip", "author": "Yuxuan Hu;Jing Zhang", "authorids": "~Yuxuan_Hu2;~Jing_Zhang24", "gender": "M;", "homepage": "https://hyx1999.github.io/;https://xiaojingzi.github.io/", "dblp": ";05/3499-1.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;T7Wa3GQAAAAJ", "orcid": "0009-0003-1494-7617;", "linkedin": ";", "or_profile": "~Yuxuan_Hu2;~Jing_Zhang24", "aff": "Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nhu2023transformer,\ntitle={Transformer Compression via Subspace Projection},\nauthor={Yuxuan Hu and Jing Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=SJw4Da8BuR}\n}", "github": "", "project": "", "reviewers": "f6zD;Vtyh;HH5t;HaKw", "site": "https://openreview.net/forum?id=SJw4Da8BuR", "pdf_size": 742951, "rating": "3;3;4;7", "confidence": "4;3;5;4", "soundness": "2;3;2;3", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "60;65;52;117", "wc_strengths": "25;23;32;78", "wc_weaknesses": "161;247;69;18", "wc_questions": "47;4;30;121", "wc_limitations": "3;20;11;1", "wc_review": "296;359;194;335", "wc_reply_reviewers": "292;282;46;0", "wc_reply_authors": "404;212;0;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 4.25, 1.6393596310755 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.5, 25.53918557824427 ], "wc_strengths_avg": [ 39.5, 22.477766792989023 ], "wc_weaknesses_avg": [ 123.75, 87.69086326408242 ], "wc_questions_avg": [ 50.5, 43.48850422812907 ], "wc_limitations_avg": [ 8.75, 7.495832175282475 ], "wc_review_avg": [ 296.0, 63.03570416835208 ], "wc_reply_reviewers_avg": [ 155.0, 133.04510513355987 ], "wc_reply_authors_avg": [ 154.0, 168.2973558912914 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.2156655464068768, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WJtoOVcJVVcJ:scholar.google.com/&scioq=Transformer+Compression+via+Subspace+Projection&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73589", "id": "SKN2hflBIZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e2cfb719f58585f779d0a4f9f07bd618-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=SKN2hflBIZ", "openreview": "https://openreview.net/forum?id=SKN2hflBIZ", "poster": "/media/PosterPDFs/NeurIPS%202023/73589.png?t=1702065517.5960772", "slides": "https://nips.cc/virtual/2023/poster/73589", "video": "https://nips.cc/virtual/2023/poster/73589", "author_site": "Hugo Lauren\u00e7on, Lucile Saulnier, Leo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander Rush, Douwe Kiela, Matthieu Cord, Victor Sanh", "tldr": "", "abstract": "Large multimodal models trained on natural documents, which interleave images and text, outperform models trained on image-text pairs on various multimodal benchmarks. However, the datasets used to train these models have not been released, and the collection process has not been fully specified. We introduce the OBELICS dataset, an open web-scale filtered dataset of interleaved image-text documents comprising 141 million web pages extracted from Common Crawl, 353 million associated images, and 115 billion text tokens. We describe the dataset creation process, present comprehensive filtering rules, and provide an analysis of the dataset's content. To show the viability of OBELICS, we train on the dataset vision and language models of 9 and 80 billion parameters, IDEFICS-9B and IDEFICS, and obtain competitive performance on different multimodal benchmarks. We release our dataset, models and code.", "keywords": "dataset;multimodal;machine learning", "primary_area": "", "supplementary_material": "/attachment/80b67ba98fd6e1bfed5807e5c9933df947b58df7.pdf", "author": "Hugo Lauren\u00e7on;Lucile Saulnier;Leo Tronchon;Stas Bekman;Amanpreet Singh;Anton Lozhkov;Thomas Wang;Siddharth Karamcheti;Alexander M Rush;Douwe Kiela;Matthieu Cord;Victor Sanh", "authorids": "~Hugo_Lauren\u00e7on1;~Lucile_Saulnier1;~Leo_Tronchon1;~Stas_Bekman1;~Amanpreet_Singh1;~Anton_Lozhkov1;~Thomas_Wang1;~Siddharth_Karamcheti1;~Alexander_M_Rush1;~Douwe_Kiela1;~Matthieu_Cord1;~Victor_Sanh1", "gender": "M;;M;;M;;;M;M;M;M;", "homepage": ";;;https://stasosphere.com/machine-learning/;https://apsdehal.in;;;http://siddkaramcheti.com/;http://rush.seas.harvard.edu/;https://douwekiela.github.io;https://cord.isir.upmc.fr/;", "dblp": ";;;;38/8141;;;199/1922;http://dblp.uni-trier.de/pers/hd/r/Rush:Alexander_M=;136/9140;68/3117;230/4101", "google_scholar": ";;;;https://scholar.google.com/citations?hl=en;xlMMVCAAAAAJ;;L5v2PHAAAAAJ;LIjnUGgAAAAJ;Q0piorUAAAAJ;SpAotDcAAAAJ;6STg_7IAAAAJ", "orcid": ";;;;;;;;0000-0002-9900-1606;;;", "linkedin": "hugo-lauren%C3%A7on-304891145/;lucile-saulnier/;l%C3%A9o-tronchon-6b2548130;;;anton-lozhkov/;thomas-w-394479109/;;sasha-rush-a69b6917/;;;victor-sanh/", "or_profile": "~Hugo_Lauren\u00e7on1;~Lucile_Saulnier1;~Leo_Tronchon1;~Stas_Bekman1;~Amanpreet_Singh1;~Anton_Lozhkov1;~Thomas_Wang1;~Siddharth_Karamcheti1;~Alexander_M_Rush1;~Douwe_Kiela1;~Matthieu_Cord1;~Victor_Sanh1", "aff": ";Hugging Face;;;Hugging Face;Hugging Face;Hugging Face;Stanford University;School of Engineering and Applied Sciences, Harvard University;Stanford University;Sorbonne Universit\u00e9;Hugging Face", "aff_domain": ";huggingface.co;;;huggingface.co;huggingface.co;huggingface.co;stanford.edu;seas.harvard.edu;stanford.edu;isir.upmc.fr;huggingface.co", "position": ";Researcher;;;Researcher;Machine Learning Engineer;Researcher;PhD student;Assistant Professor;Adjunct Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nlauren{\\c{c}}on2023obelics,\ntitle={{OBELICS}: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents},\nauthor={Hugo Lauren{\\c{c}}on and Lucile Saulnier and Leo Tronchon and Stas Bekman and Amanpreet Singh and Anton Lozhkov and Thomas Wang and Siddharth Karamcheti and Alexander M Rush and Douwe Kiela and Matthieu Cord and Victor Sanh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=SKN2hflBIZ}\n}", "github": "", "project": "", "reviewers": "jNjy;pbuU;Jcmj;m4Ux", "pdf_size": 1649837, "rating": "6;6;6;7", "confidence": "4;5;4;4", "wc_summary_and_contributions": "84;51;169;71", "wc_strengths": "33;24;102;75", "wc_improvement": "63;139;62;373", "wc_limitations": "1;39;27;1", "wc_correctness": "1;1;15;1", "wc_clarity": "5;1;29;1", "wc_relation_to_prior_work": "17;52;23;1", "wc_documentation": "7;1;34;16", "wc_additional_feedback": "1;1;1;1", "wc_review": "212;309;462;540", "wc_reply_reviewers": "0;0;99;39", "wc_reply_authors": "405;779;1014;1301", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 93.75, 45.007638240636446 ], "wc_strengths_avg": [ 58.5, 31.64253466459348 ], "wc_improvement_avg": [ 159.25, 127.29959740706174 ], "wc_limitations_avg": [ 17.0, 16.55294535724685 ], "wc_correctness_avg": [ 4.5, 6.06217782649107 ], "wc_clarity_avg": [ 9.0, 11.661903789690601 ], "wc_relation_to_prior_work_avg": [ 23.25, 18.444172521422587 ], "wc_documentation_avg": [ 14.5, 12.459935794377111 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 380.75, 128.04955095587022 ], "wc_reply_reviewers_avg": [ 34.5, 40.5 ], "wc_reply_authors_avg": [ 874.75, 328.21972442252763 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 295, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11222715136811886717&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";huggingface.co;;;huggingface.co;huggingface.co;huggingface.co;stanford.edu;seas.harvard.edu;stanford.edu;isir.upmc.fr;huggingface.co", "author_num": 12, "aff_unique_index": "0;0;0;0;1;2;1;3;0", "aff_unique_norm": "Hugging Face;Stanford University;Harvard University;Sorbonne Universit\u00e9", "aff_unique_dep": ";;School of Engineering and Applied Sciences;", "aff_unique_url": "https://huggingface.co;https://www.stanford.edu;https://www.harvard.edu;https://www.sorbonne-universite.fr", "aff_unique_abbr": "Hugging Face;Stanford;Harvard;Sorbonne U", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Stanford;Cambridge", "aff_country_unique_index": "0;0;0;0;0;0;0;1;0", "aff_country_unique": "United States;France" }, { "title": "Distributional Model Equivalence for Risk-Sensitive Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71612", "id": "SLTQluG80x", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b0cd0e8027309ea050951e758b70d60e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SLTQluG80x", "openreview": "https://openreview.net/forum?id=SLTQluG80x", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71612", "video": "https://nips.cc/virtual/2023/poster/71612", "author_site": "Tyler Kastner, Murat Erdogdu, Amir-massoud Farahmand", "tldr": "", "abstract": "We consider the problem of learning models for risk-sensitive reinforcement learning. We theoretically demonstrate that proper value equivalence, a method of learning models which can be used to plan optimally in the risk-neutral setting, is not sufficient to plan optimally in the risk-sensitive setting. We leverage distributional reinforcement learning to introduce two new notions of model equivalence, one which is general and can be used to plan for any risk measure, but is intractable; and a practical variation which allows one to choose which risk measures they may plan optimally for. We demonstrate how our models can be used to augment any model-free risk-sensitive algorithm, and provide both tabular and large-scale experiments to demonstrate our method\u2019s ability.", "keywords": "Reinforcement learning;Risk-Sensitive Reinforcement Learning;Model-Based Reinforcement Learning;Distributional Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/2c3115377bbf1553ac139f4d68a0c63fed21470f.zip", "author": "Tyler Kastner;Murat A Erdogdu;Amir-massoud Farahmand", "authorids": "~Tyler_Kastner1;~Murat_A_Erdogdu1;~Amir-massoud_Farahmand1", "gender": "M;M;M", "homepage": "https://tylerbk6.github.io/;http://www.cs.toronto.edu/~erdogdu/;http://academic.sologen.net/", "dblp": "295/8562;139/1292;17/671", "google_scholar": "EwvaTJQAAAAJ;Lqc4cdAAAAAJ;https://scholar.google.ca/citations?user=G5SAV7gAAAAJ", "orcid": ";;", "linkedin": ";;amir-massoud-farahmand/", "or_profile": "~Tyler_Kastner1;~Murat_A_Erdogdu1;~Amir-massoud_Farahmand1", "aff": "Department of Computer Science, University of Toronto;Vector Institute;Vector Institute", "aff_domain": "cs.toronto.edu;vectorinstitute.ai;vectorinstitute.ai", "position": "PhD student;Faculty;Faculty Member", "bibtex": "@inproceedings{\nkastner2023distributional,\ntitle={Distributional Model Equivalence for Risk-Sensitive Reinforcement Learning},\nauthor={Tyler Kastner and Murat A Erdogdu and Amir-massoud Farahmand},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SLTQluG80x}\n}", "github": "", "project": "", "reviewers": "du98;B3wZ;us8H", "pdf_size": 714192, "rating": "5;7;7", "confidence": "3;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "162;129;72", "wc_strengths": "53;50;34", "wc_weaknesses": "85;523;55", "wc_questions": "205;276;15", "wc_limitations": "46;26;1", "wc_review": "551;1004;177", "wc_reply_reviewers": "38;74;11", "wc_reply_authors": "67;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 121.0, 37.17526059088221 ], "wc_strengths_avg": [ 45.666666666666664, 8.339997335464536 ], "wc_weaknesses_avg": [ 221.0, 213.89717155680202 ], "wc_questions_avg": [ 165.33333333333334, 110.1826765976495 ], "wc_limitations_avg": [ 24.333333333333332, 18.408935028645434 ], "wc_review_avg": [ 577.3333333333334, 338.1344242884609 ], "wc_reply_reviewers_avg": [ 41.0, 25.80697580112788 ], "wc_reply_authors_avg": [ 22.333333333333332, 31.584102892999123 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1180059712246740638&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "cs.toronto.edu;vectorinstitute.ai;vectorinstitute.ai", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Toronto;Vector Institute", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.utoronto.ca;https://vectorinstitute.ai/", "aff_unique_abbr": "U of T;Vector Institute", "aff_campus_unique_index": "0", "aff_campus_unique": "Toronto;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "CoLA: Exploiting Compositional Structure for Automatic and Efficient Numerical Linear Algebra", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71611", "id": "SLtNFERsHo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/88c3c482430a62d35e03926a22e4b67e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SLtNFERsHo", "openreview": "https://openreview.net/forum?id=SLtNFERsHo", "poster": "/media/PosterPDFs/NeurIPS%202023/71611.png?t=1697513510.7142484", "slides": "https://nips.cc/virtual/2023/poster/71611", "video": "https://nips.cc/virtual/2023/poster/71611", "author_site": "Andres Potapczynski, Marc Finzi, Geoff Pleiss, Geoff Pleiss, Andrew Wilson", "tldr": "", "abstract": "Many areas of machine learning and science involve large linear algebra problems, such as eigendecompositions, solving linear systems, computing matrix exponentials, and trace estimation. The matrices involved often have Kronecker, convolutional, block diagonal, sum, or product structure. In this paper, we propose a simple but general framework for large-scale linear algebra problems in machine learning, named CoLA (Compositional Linear Algebra). By combining a linear operator abstraction with compositional dispatch rules, CoLA automatically constructs memory and runtime efficient numerical algorithms. Moreover, CoLA provides memory efficient automatic differentiation, low precision computation, and GPU acceleration in both JAX and PyTorch, while also accommodating new objects, operations, and rules in downstream packages via multiple dispatch. CoLA can accelerate many algebraic operations, while making it easy to prototype matrix structures and algorithms, providing an appealing drop-in tool for virtually any computational effort that requires linear algebra. We showcase its efficacy across a broad range of applications, including partial differential equations, Gaussian processes, equivariant model construction, and unsupervised learning.", "keywords": "Machine Learning;Numerical Linear Algebra;partial differential equations;Gaussian processes;equivariance;graph learning;spectral analysis", "primary_area": "", "supplementary_material": "/attachment/221bec62f3e1dffe13bee3b67843fd7c852e2e70.pdf", "author": "Andres Potapczynski;Marc Anton Finzi;Geoff Pleiss;Andrew Gordon Wilson", "authorids": "~Andres_Potapczynski3;~Marc_Anton_Finzi1;~Geoff_Pleiss1;~Andrew_Gordon_Wilson1", "gender": ";M;M;Not Specified", "homepage": "https://andpotap.com/;https://mfinzi.github.io;http://geoffpleiss.com;https://cims.nyu.edu/~andrewgw", "dblp": "255/7271;222/3062;199/1693.html;65/10453", "google_scholar": ";ysMAhlwAAAAJ;XO8T-Y4AAAAJ;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ", "orcid": ";;0000-0002-7009-0967;", "linkedin": ";;;", "or_profile": "~Andres_Potapczynski3;~Marc_Anton_Finzi1;~Geoff_Pleiss1;~Andrew_Gordon_Wilson1", "aff": "New York University;New York University;Columbia University;New York University", "aff_domain": "nyu.edu;nyu.edu;columbia.edu;nyu.edu", "position": "PhD student;PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\npotapczynski2023cola,\ntitle={Co{LA}: Exploiting Compositional Structure for Automatic and Efficient Numerical Linear Algebra},\nauthor={Andres Potapczynski and Marc Anton Finzi and Geoff Pleiss and Andrew Gordon Wilson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SLtNFERsHo}\n}", "github": "", "project": "", "reviewers": "ZAWv;qUas;pi1f;V5p6;Wjxi", "pdf_size": 699707, "rating": "5;5;6;7;7", "confidence": "4;3;4;3;3", "soundness": "3;3;3;4;3", "novelty": "1;2;2;2;4", "presentation": "3;3;3;4;3", "wc_summary": "63;56;124;90;218", "wc_strengths": "24;81;177;34;127", "wc_weaknesses": "78;161;189;85;88", "wc_questions": "1;89;74;224;87", "wc_limitations": "1;1;22;16;45", "wc_review": "167;388;586;449;565", "wc_reply_reviewers": "95;349;31;86;50", "wc_reply_authors": "33;392;15;70;11", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.9797958971132712 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 110.2, 58.965752772266036 ], "wc_strengths_avg": [ 88.6, 57.44771535927256 ], "wc_weaknesses_avg": [ 120.2, 45.72701608458615 ], "wc_questions_avg": [ 95.0, 72.13598269934361 ], "wc_limitations_avg": [ 17.0, 16.260381299342274 ], "wc_review_avg": [ 431.0, 150.8973160795115 ], "wc_reply_reviewers_avg": [ 122.2, 115.77460861518816 ], "wc_reply_authors_avg": [ 104.2, 145.40343874888242 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.45643546458763845, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13304208411778858352&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "nyu.edu;nyu.edu;columbia.edu;nyu.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "New York University;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://www.columbia.edu", "aff_unique_abbr": "NYU;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "PLANNER: Generating Diversified Paragraph via Latent Language Diffusion Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71610", "id": "SLwy8UVS8Y", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fdba5e0a9b57fce03e89cc0cad0a24e9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SLwy8UVS8Y", "openreview": "https://openreview.net/forum?id=SLwy8UVS8Y", "poster": "/media/PosterPDFs/NeurIPS%202023/71610.png?t=1701418831.4170222", "slides": "https://nips.cc/virtual/2023/poster/71610", "video": "https://nips.cc/virtual/2023/poster/71610", "author_site": "Yizhe Zhang, Jiatao Gu, Zhuofeng Wu, Shuangfei Zhai, Joshua Susskind, Navdeep Jaitly", "tldr": "", "abstract": "Autoregressive models for text sometimes generate repetitive and low-quality output because errors accumulate during the steps of generation. This issue is often attributed to exposure bias -- the difference between how a model is trained, and how it is used during inference. Denoising diffusion models provide an alternative approach in which a model can revisit and revise its output. However, they can be computationally expensive and prior efforts on text have led to models that produce less fluent output compared to autoregressive models, especially for longer text and paragraphs. In this paper, we propose PLANNER, a model that combines latent semantic diffusion with autoregressive generation, to generate fluent text while exercising global control over paragraphs. The model achieves this by combining an autoregressive \"decoding\" module with a \"planning\" module that uses latent diffusion to generate semantic paragraph embeddings in a coarse-to-fine manner. The proposed method is evaluated on various conditional generation tasks, and results on semantic generation, text completion and summarization show its effectiveness in generating high-quality long-form text in an efficient manner.", "keywords": "Text generation;diffusion model;NLP", "primary_area": "", "supplementary_material": "/attachment/8e31e00f183c2c26e0e61870ac8d85de0a37ff35.pdf", "author": "Yizhe Zhang;Jiatao Gu;Zhuofeng Wu;Shuangfei Zhai;Joshua M. Susskind;Navdeep Jaitly", "authorids": "~Yizhe_Zhang2;~Jiatao_Gu1;~Zhuofeng_Wu1;~Shuangfei_Zhai3;~Joshua_M._Susskind1;~Navdeep_Jaitly1", "gender": "M;M;;M;M;M", "homepage": "https://dreasysnail.github.io;http://jiataogu.me;https://cserxy.github.io/;http://cs.binghamton.edu/~szhai2;http://www.apple.com;http://www.cs.toronto.edu/~ndjaitly/", "dblp": "132/4966-2.html;164/5848.html;153/7524-1;;132/7797;04/6137", "google_scholar": "WDVMfggAAAAJ;https://scholar.google.com.sg/citations?user=cB1mFBsAAAAJ;bqinFgYAAAAJ;G6vdBYsAAAAJ;Sv2TGqsAAAAJ;kjMNMLkAAAAJ", "orcid": ";;0000-0003-3775-2436;;;", "linkedin": ";jiatao-gu-204b2672/;zhuofeng-wu-914193127/;;joshua-susskind-8ab2ab5/;", "or_profile": "~Yizhe_Zhang2;~Jiatao_Gu1;~Zhuofeng_Wu1;~Shuangfei_Zhai3;~Joshua_M._Susskind1;~Navdeep_Jaitly1", "aff": "Apple;Apple;University of Michigan - Ann Arbor;Apple;Apple;Apple", "aff_domain": "apple.com;apple.com;umich.edu;apple.com;apple.com;apple.com", "position": "Researcher;Researcher;PhD student;Research Scientist;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nzhang2023planner,\ntitle={{PLANNER}: Generating Diversified Paragraph via Latent Language Diffusion Model},\nauthor={Yizhe Zhang and Jiatao Gu and Zhuofeng Wu and Shuangfei Zhai and Joshua M. Susskind and Navdeep Jaitly},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SLwy8UVS8Y}\n}", "github": "", "project": "", "reviewers": "r7NC;6a1h;H5ac;MZNE", "pdf_size": 2094754, "rating": "5;6;7;7", "confidence": "4;4;4;4", "soundness": "2;2;3;4", "novelty": "2;3;3;4", "presentation": "3;3;4;3", "wc_summary": "47;126;64;81", "wc_strengths": "61;91;61;146", "wc_weaknesses": "190;276;121;117", "wc_questions": "73;2;4;45", "wc_limitations": "0;2;4;3", "wc_review": "371;497;254;392", "wc_reply_reviewers": "10;34;0;36", "wc_reply_authors": "0;0;0;24", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 79.5, 29.415132160165456 ], "wc_strengths_avg": [ 89.75, 34.70860844228705 ], "wc_weaknesses_avg": [ 176.0, 64.61810891692824 ], "wc_questions_avg": [ 31.0, 29.706901555025897 ], "wc_limitations_avg": [ 2.25, 1.479019945774904 ], "wc_review_avg": [ 378.5, 86.28586210961794 ], "wc_reply_reviewers_avg": [ 20.0, 15.427248620541512 ], "wc_reply_authors_avg": [ 6.0, 10.392304845413264 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=867632816864144811&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "apple.com;apple.com;umich.edu;apple.com;apple.com;apple.com", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Apple;University of Michigan", "aff_unique_dep": "Apple Inc.;", "aff_unique_url": "https://www.apple.com;https://www.umich.edu", "aff_unique_abbr": "Apple;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Variational Annealing on Graphs for Combinatorial Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71609", "id": "SLx7paoaTU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c9c54ac0dd5e942b99b2b51c297544fd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SLx7paoaTU", "openreview": "https://openreview.net/forum?id=SLx7paoaTU", "poster": "/media/PosterPDFs/NeurIPS%202023/71609.png?t=1701426287.8022144", "slides": "https://nips.cc/virtual/2023/poster/71609", "video": "https://nips.cc/virtual/2023/poster/71609", "author_site": "Sebastian Sanokowski, Wilhelm Berghammer, Sepp Hochreiter, Sebastian Lehner", "tldr": "", "abstract": "Several recent unsupervised learning methods use probabilistic approaches to solve combinatorial optimization (CO) problems based on the assumption of statistically independent solution variables. We demonstrate that this assumption imposes performance limitations in particular on difficult problem instances. Our results corroborate that an autoregressive approach which captures statistical dependencies among solution variables yields superior performance on many popular CO problems. We introduce Subgraph Tokenization in which the configuration of a set of solution variables is represented by a single token. This tokenization technique alleviates the drawback of the long sequential sampling procedure which is inherent to autoregressive methods without sacrificing expressivity. Importantly, we theoretically motivate an annealed entropy regularization and show empirically that it is essential for efficient and stable learning.", "keywords": "Combinatorial Optimization;Entropy Regularization;Graph Neural Networks;Statistical Mechanics", "primary_area": "", "supplementary_material": "/attachment/f499e2d17e824a927f24e0810d81248572e9f909.zip", "author": "Sebastian Sanokowski;Wilhelm Franz Berghammer;Sepp Hochreiter;Sebastian Lehner", "authorids": "~Sebastian_Sanokowski1;~Wilhelm_Franz_Berghammer1;~Sepp_Hochreiter1;~Sebastian_Lehner1", "gender": "M;M;M;", "homepage": ";;https://www.jku.at/en/institute-for-machine-learning/about-us/team/sepp-hochreiter/;https://www.jku.at/institut-fuer-machine-learning/ueber-uns/team/dr-sebastian-lehner/", "dblp": "277/0779;;h/SeppHochreiter.html;292/2938", "google_scholar": "9A8llhsAAAAJ;;https://scholar.google.at/citations?user=tvUH3WMAAAAJ;gZO5TdUAAAAJ", "orcid": "0000-0001-8065-5805;;0000-0001-7449-2528;", "linkedin": ";wilhelmberghammer/;https://linkedin.com/in/sepp-hochreiter-41514846;", "or_profile": "~Sebastian_Sanokowski1;~Wilhelm_Franz_Berghammer1;~Sepp_Hochreiter1;~Sebastian_Lehner1", "aff": "Johannes Kepler University Linz;Johannes Kepler Universit\u00e4t Linz;Johannes Kepler University Linz;Johannes Kepler University Linz", "aff_domain": "jku.at;jku.at;jku.at;jku.at", "position": "PhD student;Undergrad student;Full Professor;Postdoc", "bibtex": "@inproceedings{\nsanokowski2023variational,\ntitle={Variational Annealing on Graphs for Combinatorial Optimization},\nauthor={Sebastian Sanokowski and Wilhelm Franz Berghammer and Sepp Hochreiter and Sebastian Lehner},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SLx7paoaTU}\n}", "github": "", "project": "", "reviewers": "rhwA;59Wh;fy9s;ojQm", "pdf_size": 3614230, "rating": "4;6;6;8", "confidence": "4;3;3;4", "soundness": "3;3;2;4", "novelty": "2;3;3;4", "presentation": "1;3;2;4", "wc_summary": "27;51;178;45", "wc_strengths": "16;96;209;84", "wc_weaknesses": "209;129;275;57", "wc_questions": "104;14;84;2", "wc_limitations": "5;5;21;2", "wc_review": "361;295;767;190", "wc_reply_reviewers": "607;0;212;11", "wc_reply_authors": "2365;0;305;0", "reply_reviewers": "3;0;2;1", "reply_authors": "4;1;2;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 75.25, 59.976557920574265 ], "wc_strengths_avg": [ 101.25, 69.28699372898207 ], "wc_weaknesses_avg": [ 167.5, 82.11424967689834 ], "wc_questions_avg": [ 51.0, 43.78355855797927 ], "wc_limitations_avg": [ 8.25, 7.46240577829965 ], "wc_review_avg": [ 403.25, 218.6851332395506 ], "wc_reply_reviewers_avg": [ 207.5, 245.60588348001764 ], "wc_reply_authors_avg": [ 667.5, 987.9302860020033 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12180517073003690602&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "jku.at;jku.at;jku.at;jku.at", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Johannes Kepler University;Johannes Kepler University Linz", "aff_unique_dep": ";", "aff_unique_url": "https://www.jku.at;https://www.jku.at", "aff_unique_abbr": "JKU;JKU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Linz", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Austria" }, { "title": "RoboDepth: Robust Out-of-Distribution Depth Estimation under Corruptions", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73588", "id": "SNznC08OOO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/43119db5d59f07cc08fca7ba6820179a-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=SNznC08OOO", "openreview": "https://openreview.net/forum?id=SNznC08OOO", "poster": "/media/PosterPDFs/NeurIPS%202023/73588.png?t=1697490981.1947274", "slides": "https://nips.cc/virtual/2023/poster/73588", "video": "https://nips.cc/virtual/2023/poster/73588", "author_site": "Lingdong Kong, Shaoyuan Xie, Hanjiang Hu, Lai Xing Ng, Benoit Cottereau, Wei Tsang Ooi", "tldr": "", "abstract": "Depth estimation from monocular images is pivotal for real-world visual perception systems. While current learning-based depth estimation models train and test on meticulously curated data, they often overlook out-of-distribution (OoD) situations. Yet, in practical settings -- especially safety-critical ones like autonomous driving -- common corruptions can arise. Addressing this oversight, we introduce a comprehensive robustness test suite, RoboDepth, encompassing 18 corruptions spanning three categories: i) weather and lighting conditions; ii) sensor failures and movement; and iii) data processing anomalies. We subsequently benchmark 42 depth estimation models across indoor and outdoor scenes to assess their resilience to these corruptions. Our findings underscore that, in the absence of a dedicated robustness evaluation framework, many leading depth estimation models may be susceptible to typical corruptions. We delve into design considerations for crafting more robust depth estimation models, touching upon pre-training, augmentation, modality, model capacity, and learning paradigms. We anticipate our benchmark will establish a foundational platform for advancing robust OoD depth estimation.", "keywords": "benchmark;monocular depth estimation;common corruptions;robustness", "primary_area": "", "supplementary_material": "", "author": "Lingdong Kong;Shaoyuan Xie;Hanjiang Hu;Lai Xing Ng;Benoit R Cottereau;Wei Tsang Ooi", "authorids": "~Lingdong_Kong1;~Shaoyuan_Xie1;~Hanjiang_Hu1;~Lai_Xing_Ng1;~Benoit_R_Cottereau1;~Wei_Tsang_Ooi1", "gender": ";M;M;M;M;", "homepage": ";https://daniel-xsy.github.io/;https://cs.cmu.edu/~hanjianh;;https://ipal.cnrs.fr/benoit-cottereau-personal-page/;", "dblp": ";;249/5764;258/1921;08/4824.html;", "google_scholar": ";s1m55YoAAAAJ;https://scholar.google.com/citations?hl=en;OOqyeJIAAAAJ;9I7uKooAAAAJ;", "orcid": ";;;0000-0002-5457-6289;0000-0002-2624-7680;", "linkedin": ";;hanjiang-hu-54337b196/;;benoit-cottereau-347470270/;", "or_profile": "~Lingdong_Kong1;~Shaoyuan_Xie1;~Hanjiang_Hu1;~Lai_Xing_Ng1;~Benoit_R_Cottereau1;~Wei_Tsang_Ooi1", "aff": ";Huazhong University of Science and Technology;School of Computer Science, Carnegie Mellon University;Institute for Infocomm Research (I2R), A*STAR;CNRS;", "aff_domain": ";hust.edu.cn;cs.cmu.edu;i2r.a-star.edu.sg;cnrs.fr;", "position": ";Undergrad student;MS student;Researcher;Researcher;", "bibtex": "@inproceedings{\nkong2023robodepth,\ntitle={RoboDepth: Robust Out-of-Distribution Depth Estimation under Corruptions},\nauthor={Lingdong Kong and Shaoyuan Xie and Hanjiang Hu and Lai Xing Ng and Benoit R Cottereau and Wei Tsang Ooi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=SNznC08OOO}\n}", "github": "", "project": "", "reviewers": "YXSL;pgHi;ji4t;j7EB;76NB", "pdf_size": 17606339, "rating": "5;6;7;7;9", "confidence": "3;4;4;5;4", "wc_summary_and_contributions": "32;39;71;57;117", "wc_strengths": "5;28;83;75;115", "wc_improvement": "124;14;112;174;56", "wc_limitations": "1;58;13;49;6", "wc_correctness": "1;1;8;25;1", "wc_clarity": "1;7;8;21;2", "wc_relation_to_prior_work": "1;43;10;11;6", "wc_documentation": "1;36;8;31;4", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "167;227;314;444;308", "wc_reply_reviewers": "0;54;20;103;0", "wc_reply_authors": "745;789;446;2121;607", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;2;2;4;1", "rating_avg": [ 6.8, 1.32664991614216 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 63.2, 30.175486740067676 ], "wc_strengths_avg": [ 61.2, 39.54946270178648 ], "wc_improvement_avg": [ 96.0, 55.5841704084895 ], "wc_limitations_avg": [ 25.4, 23.431602591372194 ], "wc_correctness_avg": [ 7.2, 9.303762679690406 ], "wc_clarity_avg": [ 7.8, 7.138627319029898 ], "wc_relation_to_prior_work_avg": [ 14.2, 14.824304368165137 ], "wc_documentation_avg": [ 16.0, 14.546477236774544 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 292.0, 93.48154898160385 ], "wc_reply_reviewers_avg": [ 35.4, 39.13872762367218 ], "wc_reply_authors_avg": [ 941.6, 601.747322387063 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.47673129462279606, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15107736511494591041&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";hust.edu.cn;cs.cmu.edu;i2r.a-star.edu.sg;cnrs.fr;", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Huazhong University of Science and Technology;Carnegie Mellon University;Institute for Infocomm Research;Centre National de la Recherche Scientifique", "aff_unique_dep": ";School of Computer Science;;", "aff_unique_url": "http://www.hust.edu.cn;https://www.cmu.edu;https://www.i2r.a-star.edu.sg;https://www.cnrs.fr", "aff_unique_abbr": "HUST;CMU;I2R;CNRS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;1;2;3", "aff_country_unique": "China;United States;Singapore;France" }, { "title": "Cognitive Model Discovery via Disentangled RNNs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71608", "id": "SOEF0i0G1z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c194ced51c857ec2c1928b02250e0ac8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SOEF0i0G1z", "openreview": "https://openreview.net/forum?id=SOEF0i0G1z", "poster": "/media/PosterPDFs/NeurIPS%202023/71608.png?t=1701992146.7568274", "slides": "https://nips.cc/virtual/2023/poster/71608", "video": "https://nips.cc/virtual/2023/poster/71608", "author_site": "Kevin Miller, Maria Eckstein, Matt Botvinick, Zeb Kurth-Nelson", "tldr": "", "abstract": "Computational cognitive models are a fundamental tool in behavioral neuroscience. They embody in software precise hypotheses about the cognitive mechanisms underlying a particular behavior. Constructing these models is typically a difficult iterative process that requires both inspiration from the literature and the creativity of an individual researcher. Here, we adopt an alternative approach to learn parsimonious cognitive models directly from data. We fit behavior data using a recurrent neural network that is penalized for carrying excess information between timesteps, leading to sparse, interpretable representations and dynamics. When fitting synthetic behavioral data from known cognitive models, our method recovers the underlying form of those models. When fit to choice data from rats performing a bandit task, our method recovers simple and interpretable models that make testable predictions about neural mechanisms.", "keywords": "Cognitive modeling;neural networks;interpretability;disentangling;neuroscience;rodent behavior", "primary_area": "", "supplementary_material": "", "author": "Kevin J Miller;Maria K Eckstein;Matthew Botvinick;Zeb Kurth-Nelson", "authorids": "~Kevin_J_Miller1;~Maria_K_Eckstein1;~Matthew_Botvinick1;~Zeb_Kurth-Nelson1", "gender": ";F;;M", "homepage": "http://kevinjmiller.com;https://www.mariaeckstein.com;;http://zebk.com", "dblp": ";;98/5712;165/6536", "google_scholar": "qSZJKJIAAAAJ;rVrL-tEAAAAJ;;https://scholar.google.co.uk/citations?user=6-tm9DUAAAAJ", "orcid": ";0000-0002-0330-9367;;", "linkedin": ";maria-eckstein-4087b597/;;", "or_profile": "~Kevin_J_Miller1;~Maria_K_Eckstein1;~Matthew_Botvinick1;~Zeb_Kurth-Nelson1", "aff": "Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind", "aff_domain": "google.com;google.com;google.com;deepmind.com", "position": "Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nmiller2023cognitive,\ntitle={Cognitive Model Discovery via Disentangled {RNN}s},\nauthor={Kevin J Miller and Maria K Eckstein and Matthew Botvinick and Zeb Kurth-Nelson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SOEF0i0G1z}\n}", "github": "", "project": "", "reviewers": "u93Y;9K9A;Zh1M;SAvA;M5Mx", "pdf_size": 10341684, "rating": "5;5;6;7;7", "confidence": "4;4;2;4;3", "soundness": "2;2;3;3;4", "novelty": "2;2;2;3;3", "presentation": "3;2;3;2;4", "wc_summary": "64;88;145;114;175", "wc_strengths": "18;72;46;78;88", "wc_weaknesses": "140;352;82;516;148", "wc_questions": "25;7;40;61;36", "wc_limitations": "75;7;4;14;5", "wc_review": "322;526;317;783;452", "wc_reply_reviewers": "16;253;24;156;31", "wc_reply_authors": "0;0;0;963;21", "reply_reviewers": "1;1;1;3;1", "reply_authors": "1;1;1;3;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 117.2, 39.51404813480897 ], "wc_strengths_avg": [ 60.4, 25.342454498331453 ], "wc_weaknesses_avg": [ 247.6, 162.39408856236116 ], "wc_questions_avg": [ 33.8, 17.769637024992942 ], "wc_limitations_avg": [ 21.0, 27.22498852157701 ], "wc_review_avg": [ 480.0, 171.02163605813155 ], "wc_reply_reviewers_avg": [ 96.0, 93.87012304242495 ], "wc_reply_authors_avg": [ 196.8, 383.1863254345071 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2795084971874737, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9134331200102231053&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "google.com;google.com;google.com;deepmind.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Leveraging Locality and Robustness to Achieve Massively Scalable Gaussian Process Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71607", "id": "SQP1H9Jy8W", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3c2b60a3f269c404e9329ee119f2d34a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SQP1H9Jy8W", "openreview": "https://openreview.net/forum?id=SQP1H9Jy8W", "poster": "/media/PosterPDFs/NeurIPS%202023/71607.png?t=1701777040.7828746", "slides": "https://nips.cc/virtual/2023/poster/71607", "video": "https://nips.cc/virtual/2023/poster/71607", "author_site": "Robert Allison, Anthony Stephenson, Samuel F, Edward O Pyzer-Knapp", "tldr": "", "abstract": "The accurate predictions and principled uncertainty measures provided by GP regression incur $O(n^3)$ cost which is prohibitive for modern-day large-scale applications. This has motivated extensive work on computationally efficient approximations. We introduce a new perspective by exploring robustness properties and limiting behaviour of GP nearest-neighbour (GPnn) prediction. We demonstrate through theory and simulation that as the data-size $n$ increases, accuracy of estimated parameters and GP model assumptions become increasingly irrelevant to GPnn predictive accuracy. Consequently, it is sufficient to spend small amounts of work on parameter estimation in order to achieve high MSE accuracy, even in the presence of gross misspecification. In contrast, as $n \\rightarrow \\infty$, uncertainty calibration and NLL are shown to remain sensitive to just one parameter, the additive noise-variance; but we show that this source of inaccuracy can be corrected for, thereby achieving both well-calibrated uncertainty measures and accurate predictions at remarkably low computational cost. We exhibit a very simple GPnn regression algorithm with stand-out performance compared to other state-of-the-art GP approximations as measured on large UCI datasets. It operates at a small fraction of those other methods' training costs, for example on a basic laptop taking about 30 seconds to train on a dataset of size $n = 1.6 \\times 10^6$.", "keywords": "Gaussian Processes;Bayesian Inference;Regression;Bayesian Nonparametrics;Kernel Methods", "primary_area": "", "supplementary_material": "/attachment/ba892407e6b75f5e895754be413e32d70a77457f.zip", "author": "Robert F Allison;Anthony Stephenson;Samuel F;Edward Pyzer-Knapp", "authorids": "~Robert_F_Allison1;~Anthony_Stephenson1;~Samuel_F1;~Edward_Pyzer-Knapp1", "gender": ";M;Not Specified;M", "homepage": ";;https://www.turing.ac.uk/;", "dblp": ";;;182/4776", "google_scholar": ";;;efzneU4AAAAJ", "orcid": ";;;", "linkedin": ";anthony-stephenson-691a757a/;;", "or_profile": "~Robert_F_Allison1;~Anthony_Stephenson1;~Samuel_F1;~Edward_Pyzer-Knapp1", "aff": ";University of Bristol;Alan Turing Institute;International Business Machines", "aff_domain": ";bristol.ac.uk;turing.ac.uk;ibm.com", "position": ";PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\nallison2023leveraging,\ntitle={Leveraging Locality and Robustness to Achieve Massively Scalable Gaussian Process Regression},\nauthor={Robert F Allison and Anthony Stephenson and Samuel F and Edward Pyzer-Knapp},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SQP1H9Jy8W}\n}", "github": "", "project": "", "reviewers": "N8D3;3iYQ;jFQi;MqBs", "pdf_size": 592698, "rating": "3;7;7;8", "confidence": "3;3;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;3", "wc_summary": "60;42;123;158", "wc_strengths": "59;25;80;73", "wc_weaknesses": "491;56;24;148", "wc_questions": "1;31;116;72", "wc_limitations": "9;4;6;1", "wc_review": "620;158;349;452", "wc_reply_reviewers": "573;53;33;77", "wc_reply_authors": "623;29;10;32", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.25, 1.920286436967152 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.75, 46.86349858898714 ], "wc_strengths_avg": [ 59.25, 21.1704392963396 ], "wc_weaknesses_avg": [ 179.75, 185.37580074001028 ], "wc_questions_avg": [ 55.0, 43.30704330706496 ], "wc_limitations_avg": [ 5.0, 2.9154759474226504 ], "wc_review_avg": [ 394.75, 167.45055240279143 ], "wc_reply_reviewers_avg": [ 184.0, 225.12885199369717 ], "wc_reply_authors_avg": [ 173.5, 259.6560224604852 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6509445549041193, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10887290409032428582&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "email": ";bristol.ac.uk;turing.ac.uk;ibm.com", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Bristol;Alan Turing Institute;International Business Machines Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bristol.ac.uk;https://www.turing.ac.uk;https://www.ibm.com", "aff_unique_abbr": "Bristol;ATI;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "MomentDiff: Generative Video Moment Retrieval from Random to Real", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71606", "id": "SQouRKRIXY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d01bda31bbcd780774ff15b534e03c40-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SQouRKRIXY", "openreview": "https://openreview.net/forum?id=SQouRKRIXY", "poster": "/media/PosterPDFs/NeurIPS%202023/71606.png?t=1698765046.217802", "slides": "https://nips.cc/virtual/2023/poster/71606", "video": "https://nips.cc/virtual/2023/poster/71606", "author_site": "Pandeng Li, Chen-Wei Xie, Hongtao Xie, Liming Zhao, Lei Zhang, Yun Zheng, Deli Zhao, Yongdong Zhang", "tldr": "", "abstract": "Video moment retrieval pursues an efficient and generalized solution to identify the specific temporal segments within an untrimmed video that correspond to a given language description.\nTo achieve this goal, we provide a generative diffusion-based framework called MomentDiff, which simulates a typical human retrieval process from random browsing to gradual localization.\nSpecifically, we first diffuse the real span to random noise, and learn to denoise the random noise to the original span with the guidance of similarity between text and video.\nThis allows the model to learn a mapping from arbitrary random locations to real moments, enabling the ability to locate segments from random initialization.\nOnce trained, MomentDiff could sample random temporal segments as initial guesses and iteratively refine them to generate an accurate temporal boundary.\nDifferent from discriminative works (e.g., based on learnable proposals or queries), MomentDiff with random initialized spans could resist the temporal location biases from datasets.\nTo evaluate the influence of the temporal location biases, we propose two ``anti-bias'' datasets with location distribution shifts, named Charades-STA-Len and Charades-STA-Mom.\nThe experimental results demonstrate that our efficient framework consistently outperforms state-of-the-art methods on three public benchmarks, and exhibits better generalization and robustness on the proposed anti-bias datasets. \nThe code, model, and anti-bias evaluation datasets will be released publicly.", "keywords": "Video Moment Retrieval;Diffusion Model", "primary_area": "", "supplementary_material": "/attachment/026b3bf77daa87e854e2f716096952787ad5a6c1.pdf", "author": "Pandeng Li;Chen-Wei Xie;Hongtao Xie;Liming Zhao;Lei Zhang;Yun Zheng;Deli Zhao;Yongdong Zhang", "authorids": "~Pandeng_Li1;~Chen-Wei_Xie2;~Hongtao_Xie2;~Liming_Zhao1;~Lei_Zhang54;~Yun_Zheng1;~Deli_Zhao1;~Yongdong_Zhang2", "gender": "M;M;M;M;;M;M;M", "homepage": "https://lpdone.github.io/;;http://imcc.ustc.edu.cn/main.htm;http://www.zhaoliming.net/;;;https://zhaodeli.github.io;https://imcc.ustc.edu.cn/_upload/tpl/0d/13/3347/template3347/zhangyongdong.html", "dblp": "313/5076;180/5461;;;;;77/1992;z/YongdongZhang", "google_scholar": "Moy-4-0AAAAJ;UHCDCRMAAAAJ;;https://scholar.google.com/citations?hl=en;;-hFpScAAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=hxGs4ukAAAAJ", "orcid": "0000-0002-0717-8659;;0000-0002-0163-9434;0000-0002-1467-1230;;;0000-0002-8838-578X;0000-0003-0066-3448", "linkedin": ";;;zlmzju;;;;", "or_profile": "~Pandeng_Li1;~Chen-Wei_Xie2;~Hongtao_Xie2;~Liming_Zhao1;~Lei_Zhang54;~Yun_Zheng1;~Deli_Zhao1;~Yongdong_Zhang2", "aff": "University of Science and Technology of China;Alibaba Group;University of Science and Technology of China;Alibaba Group;;Alibaba Group;Alibaba Group;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;alibaba-inc.com;ustc.edu.cn;alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com;ustc.edu.cn", "position": "PhD student;Researcher;Full Professor;Researcher;;Researcher;Director;Full Professor", "bibtex": "@inproceedings{\nli2023momentdiff,\ntitle={MomentDiff: Generative Video Moment Retrieval from Random to Real},\nauthor={Pandeng Li and Chen-Wei Xie and Hongtao Xie and Liming Zhao and Lei Zhang and Yun Zheng and Deli Zhao and Yongdong Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SQouRKRIXY}\n}", "github": "", "project": "", "reviewers": "uAtb;oTs9;KdBK;mu2A;2CFW", "pdf_size": 1390731, "rating": "5;5;6;7;7", "confidence": "5;4;3;4;5", "soundness": "4;2;3;3;3", "novelty": "3;3;3;4;3", "presentation": "3;3;3;4;3", "wc_summary": "68;19;79;85;59", "wc_strengths": "50;22;229;73;43", "wc_weaknesses": "140;277;126;95;104", "wc_questions": "121;12;264;49;3", "wc_limitations": "1;4;21;32;50", "wc_review": "380;334;719;334;259", "wc_reply_reviewers": "16;24;19;61;58", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 62.0, 23.28948260481542 ], "wc_strengths_avg": [ 83.4, 74.59919570612004 ], "wc_weaknesses_avg": [ 148.4, 66.2317144576524 ], "wc_questions_avg": [ 89.8, 96.50989586565721 ], "wc_limitations_avg": [ 21.6, 18.161497735594388 ], "wc_review_avg": [ 405.2, 161.62846283993423 ], "wc_reply_reviewers_avg": [ 35.6, 19.703806738800502 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 77, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13009063225505781667&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ustc.edu.cn;alibaba-inc.com;ustc.edu.cn;alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com;ustc.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;1;1;1;0", "aff_unique_norm": "University of Science and Technology of China;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "USTC;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Does progress on ImageNet transfer to real-world datasets?", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73587", "id": "SS3CK3yx5Z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4eb33c53ed5b14ce9028309431f565cc-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=SS3CK3yx5Z", "openreview": "https://openreview.net/forum?id=SS3CK3yx5Z", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73587", "video": "https://nips.cc/virtual/2023/poster/73587", "author_site": "Alex Fang, Simon Kornblith, Ludwig Schmidt", "tldr": "", "abstract": "Does progress on ImageNet transfer to real-world datasets? We investigate this question by evaluating ImageNet pre-trained models with varying accuracy (57% - 83%) on six practical image classification datasets. In particular, we study datasets collected with the goal of solving real-world tasks (e.g., classifying images from camera traps or satellites), as opposed to web-scraped benchmarks collected for comparing models. On multiple datasets, models with higher ImageNet accuracy do not consistently yield performance improvements. For certain tasks, interventions such as data augmentation improve performance even when architectures do not. We hope that future benchmarks will include more diverse datasets to encourage a more comprehensive approach to improving learning algorithms.", "keywords": "transfer learning;imagenet", "primary_area": "", "supplementary_material": "", "author": "Alex Fang;Simon Kornblith;Ludwig Schmidt", "authorids": "~Alex_Fang1;~Simon_Kornblith1;~Ludwig_Schmidt1", "gender": ";M;M", "homepage": ";;http://people.csail.mit.edu/ludwigs/", "dblp": "260/0449;220/4059;141/2720", "google_scholar": ";1O3RPmsAAAAJ;SWMKy70AAAAJ", "orcid": ";;", "linkedin": "alex-fang-8a11a8115/;;ludwig-schmidt-87ba3612/", "or_profile": "~Alex_Fang1;~Simon_Kornblith1;~Ludwig_Schmidt1", "aff": "Department of Computer Science, University of Washington;Google;Allen Institute for Artificial Intelligence", "aff_domain": "cs.washington.edu;google.com;allenai.org", "position": "PhD student;Research Scientist;Researcher", "bibtex": "@inproceedings{\nfang2023does,\ntitle={Does progress on ImageNet transfer to real-world datasets?},\nauthor={Alex Fang and Simon Kornblith and Ludwig Schmidt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=SS3CK3yx5Z}\n}", "github": "", "project": "", "reviewers": "ok7Z;TopE;c3Q1;7tbx;EeCa", "pdf_size": 6439187, "rating": "4;5;5;7;7", "confidence": "5;5;5;3;4", "wc_summary_and_contributions": "114;106;97;130;156", "wc_strengths": "49;16;57;55;77", "wc_improvement": "314;2;22;148;347", "wc_limitations": "77;196;459;1;13", "wc_correctness": "30;2;82;1;8", "wc_clarity": "37;3;76;45;8", "wc_relation_to_prior_work": "45;1;69;3;24", "wc_documentation": "15;1;37;6;28", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "682;328;900;390;662", "wc_reply_reviewers": "161;0;0;32;96", "wc_reply_authors": "459;422;739;319;373", "reply_reviewers": "1;0;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 1.2 ], "confidence_avg": [ 4.4, 0.8 ], "wc_summary_and_contributions_avg": [ 120.6, 20.761502835777566 ], "wc_strengths_avg": [ 50.8, 19.782820830205182 ], "wc_improvement_avg": [ 166.6, 143.26143933382772 ], "wc_limitations_avg": [ 149.2, 169.64244751830248 ], "wc_correctness_avg": [ 24.6, 30.552250326285296 ], "wc_clarity_avg": [ 33.8, 26.573671180324332 ], "wc_relation_to_prior_work_avg": [ 28.4, 25.842600488340953 ], "wc_documentation_avg": [ 17.4, 13.42534915747073 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 592.4, 208.9799990429706 ], "wc_reply_reviewers_avg": [ 57.8, 62.38076626653443 ], "wc_reply_authors_avg": [ 462.4, 146.0870973084208 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.875, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12582372480364496193&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cs.washington.edu;google.com;allenai.org", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Washington;Google;Allen Institute for Artificial Intelligence", "aff_unique_dep": "Department of Computer Science;Google;", "aff_unique_url": "https://www.washington.edu;https://www.google.com;https://allenai.org", "aff_unique_abbr": "UW;Google;AI2", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Seattle;Mountain View;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Plug-and-Play Stability for Intracortical Brain-Computer Interfaces: A One-Year Demonstration of Seamless Brain-to-Text Communication", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71605", "id": "STqaMqhtDi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/83a14a36de4502bac5b580db36e81858-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=STqaMqhtDi", "openreview": "https://openreview.net/forum?id=STqaMqhtDi", "poster": "/media/PosterPDFs/NeurIPS%202023/71605.png?t=1702155300.095072", "slides": "https://nips.cc/virtual/2023/poster/71605", "video": "https://nips.cc/virtual/2023/poster/71605", "author_site": "Chaofei Fan, Nick Hahn, Foram Kamdar, Donald Avansino, Guy Wilson, Leigh Hochberg, Krishna V Shenoy, Jaimie Henderson, Francis Willett", "tldr": "", "abstract": "Intracortical brain-computer interfaces (iBCIs) have shown promise for restoring rapid communication to people with neurological disorders such as amyotrophic lateral sclerosis (ALS). \nHowever, to maintain high performance over time, iBCIs typically need frequent recalibration to combat changes in the neural recordings that accrue over days. \nThis requires iBCI users to stop using the iBCI and engage in supervised data collection, making the iBCI system hard to use. \nIn this paper, we propose a method that enables self-recalibration of communication iBCIs without interrupting the user. \nOur method leverages large language models (LMs) to automatically correct errors in iBCI outputs. \nThe self-recalibration process uses these corrected outputs (\"pseudo-labels\") to continually update the iBCI decoder online. \nOver a period of more than one year (403 days), we evaluated our Continual Online Recalibration with Pseudo-labels (CORP) framework with one clinical trial participant. \nCORP achieved a stable decoding accuracy of 93.84% in an online handwriting iBCI task, significantly outperforming other baseline methods. \nNotably, this is the longest-running iBCI stability demonstration involving a human participant. \nOur results provide the first evidence for long-term stabilization of a plug-and-play, high-performance communication iBCI, addressing a major barrier for the clinical translation of iBCIs.", "keywords": "brain-computer interface;self-training;continual online learning", "primary_area": "", "supplementary_material": "/attachment/6bec12cf1b3661bc4133b134665fb4c2e8eb6b27.pdf", "author": "Chaofei Fan;Nick Hahn;Foram Kamdar;Donald Avansino;Guy H Wilson;Leigh Hochberg;Krishna V. Shenoy;Jaimie M. Henderson;Francis R Willett", "authorids": "~Chaofei_Fan1;~Nick_Hahn1;fkamdar@stanford.edu;~Donald_Avansino1;~Guy_H_Wilson1;~Leigh_Hochberg1;~Krishna_V._Shenoy1;~Jaimie_M._Henderson1;~Francis_R_Willett1", "gender": ";M;;;M;M;;M;M", "homepage": "https://fan.chaofei.me;;;;https://guyhwilson.com/;https://www.braingate.org;;https://profiles.stanford.edu/jaimie-henderson;", "dblp": "267/9685;;;316/4046;;118/9329;93/2791;;208/5029", "google_scholar": "YM4x068AAAAJ;kxyMq1UAAAAJ;;;7QQ8iUMAAAAJ;37DEyrUAAAAJ;;Qi2OyV0AAAAJ;g1x3RKgAAAAJ", "orcid": ";;;0000-0002-7974-0153;;0000-0003-0261-2273;;0000-0002-3276-2267;", "linkedin": ";;;;guy-wilson/;;;;", "or_profile": "~Chaofei_Fan1;~Nick_Hahn1;fkamdar@stanford.edu;~Donald_Avansino1;~Guy_H_Wilson1;~Leigh_Hochberg1;~Krishna_V._Shenoy1;~Jaimie_M._Henderson1;~Francis_R_Willett1", "aff": "Stanford University;Johns Hopkins University;;Stanford University;Stanford University;Brown University;;Stanford University;Stanford University", "aff_domain": "stanford.edu;jh.edu;;stanford.edu;stanford.edu;brown.edu;;stanford.edu;stanford.edu", "position": "PhD student;MS student;;Researcher;PhD student;Full Professor;;Full Professor;Researcher", "bibtex": "@inproceedings{\nfan2023plugandplay,\ntitle={Plug-and-Play Stability for Intracortical Brain-Computer Interfaces: A One-Year Demonstration of Seamless Brain-to-Text Communication},\nauthor={Chaofei Fan and Nick Hahn and Foram Kamdar and Donald Avansino and Guy H Wilson and Leigh Hochberg and Krishna V. Shenoy and Jaimie M. Henderson and Francis R Willett},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=STqaMqhtDi}\n}", "github": "", "project": "", "reviewers": "8J1g;QkLb;3txU;hfNe;miLw", "pdf_size": 1665582, "rating": "5;7;7;8;8", "confidence": "3;3;4;4;3", "soundness": "2;3;3;4;3", "novelty": "3;3;4;4;4", "presentation": "3;3;3;4;4", "wc_summary": "127;147;4;225;218", "wc_strengths": "90;97;198;76;51", "wc_weaknesses": "221;77;242;41;14", "wc_questions": "30;63;369;143;180", "wc_limitations": "14;60;148;61;5", "wc_review": "482;444;961;546;468", "wc_reply_reviewers": "40;0;28;5;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 1.0954451150103321 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 144.2, 79.91845844359112 ], "wc_strengths_avg": [ 102.4, 50.32136723102821 ], "wc_weaknesses_avg": [ 119.0, 94.24011884542591 ], "wc_questions_avg": [ 157.0, 118.85621565572411 ], "wc_limitations_avg": [ 57.6, 50.70936797081975 ], "wc_review_avg": [ 580.2, 193.37052515830842 ], "wc_reply_reviewers_avg": [ 14.6, 16.3902409988383 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.372677996249965, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16855131215396483936&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "stanford.edu;jh.edu;;stanford.edu;stanford.edu;brown.edu;;stanford.edu;stanford.edu", "author_num": 9, "aff_unique_index": "0;1;0;0;2;0;0", "aff_unique_norm": "Stanford University;Johns Hopkins University;Brown University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.jhu.edu;https://www.brown.edu", "aff_unique_abbr": "Stanford;JHU;Brown", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Robust Statistics for Simulation-based Inference under Model Misspecification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71604", "id": "STrXsSIEiq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/16c5b4102a6b6eb061e502ce6736ad8a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=STrXsSIEiq", "openreview": "https://openreview.net/forum?id=STrXsSIEiq", "poster": "/media/PosterPDFs/NeurIPS%202023/71604.png?t=1701420634.256249", "slides": "https://nips.cc/virtual/2023/poster/71604", "video": "https://nips.cc/virtual/2023/poster/71604", "author_site": "Daolang Huang, Ayush Bharti, Amauri Souza, Luigi Acerbi, Samuel Kaski", "tldr": "", "abstract": "Simulation-based inference (SBI) methods such as approximate Bayesian computation (ABC), synthetic likelihood, and neural posterior estimation (NPE) rely on simulating statistics to infer parameters of intractable likelihood models. However, such methods are known to yield untrustworthy and misleading inference outcomes under model misspecification, thus hindering their widespread applicability. In this work, we propose the first general approach to handle model misspecification that works across different classes of SBI methods. Leveraging the fact that the choice of statistics determines the degree of misspecification in SBI, we introduce a regularized loss function that penalizes those statistics that increase the mismatch between the data and the model. Taking NPE and ABC as use cases, we demonstrate the superior performance of our method on high-dimensional time-series models that are artificially misspecified. We also apply our method to real data from the field of radio propagation where the model is known to be misspecified. We show empirically that the method yields robust inference in misspecified scenarios, whilst still being accurate when the model is well-specified.", "keywords": "Simulation-based inference;model misspecification;likelihood-free inference;approximate Bayesian computation;neural posterior estimation", "primary_area": "", "supplementary_material": "/attachment/441d8e8f054fc2b149d0e26ca16c6231a302d238.zip", "author": "Daolang Huang;Ayush Bharti;Amauri H Souza;Luigi Acerbi;Samuel Kaski", "authorids": "~Daolang_Huang1;~Ayush_Bharti1;~Amauri_H_Souza1;~Luigi_Acerbi1;~Samuel_Kaski1", "gender": "M;M;M;M;M", "homepage": "https://www.huangdaolang.com;https://bharti-ayush.github.io/;http://www.amauriholanda.org;http://luigiacerbi.com/;https://people.aalto.fi/samuel.kaski", "dblp": "277/8410;232/3957;131/3352;72/1450;64/5826", "google_scholar": "2togGHoAAAAJ;https://scholar.google.dk/citations?user=6_7vkiUAAAAJ;lP0LBI4AAAAJ;https://scholar.google.co.uk/citations?user=QYBZoGwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-4577-8049;;0000-0001-7471-7336;0000-0003-1925-9154", "linkedin": "daolanghuang/?originalSubdomain=fi;;;luigi-acerbi-719b492/;samuel-kaski-27790/", "or_profile": "~Daolang_Huang1;~Ayush_Bharti1;~Amauri_H_Souza1;~Luigi_Acerbi1;~Samuel_Kaski1", "aff": "Aalto University;Aalto University;Federal Institute of Cear\u00e1;University of Helsinki;Aalto University", "aff_domain": "aalto.fi;aalto.fi;ifce.edu.br;helsinki.fi;aalto.fi", "position": "PhD student;Postdoc;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2023learning,\ntitle={Learning Robust Statistics for Simulation-based Inference under Model Misspecification},\nauthor={Daolang Huang and Ayush Bharti and Amauri H Souza and Luigi Acerbi and Samuel Kaski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=STrXsSIEiq}\n}", "github": "", "project": "", "reviewers": "DQRY;1V4M;KhFo;EXCq", "pdf_size": 3219815, "rating": "4;5;5;6", "confidence": "1;3;3;3", "soundness": "3;3;2;2", "novelty": "3;2;3;2", "presentation": "3;2;4;3", "wc_summary": "81;41;59;85", "wc_strengths": "45;26;54;23", "wc_weaknesses": "65;285;311;271", "wc_questions": "70;122;48;160", "wc_limitations": "22;10;13;5", "wc_review": "283;484;485;544", "wc_reply_reviewers": "35;52;257;195", "wc_reply_authors": "26;116;765;187", "reply_reviewers": "2;2;2;1", "reply_authors": "3;2;3;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 66.5, 17.741194999210173 ], "wc_strengths_avg": [ 37.0, 12.942179105544785 ], "wc_weaknesses_avg": [ 233.0, 98.05100713404222 ], "wc_questions_avg": [ 100.0, 43.840620433565945 ], "wc_limitations_avg": [ 12.5, 6.18465843842649 ], "wc_review_avg": [ 449.0, 98.87112824277874 ], "wc_reply_reviewers_avg": [ 134.75, 94.03822361146557 ], "wc_reply_authors_avg": [ 273.5, 289.44645446092443 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=116495087645242745&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "email": "aalto.fi;aalto.fi;ifce.edu.br;helsinki.fi;aalto.fi", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Aalto University;Federal Institute of Cear\u00e1;University of Helsinki", "aff_unique_dep": ";;", "aff_unique_url": "https://www.aalto.fi;http://www.ifce.edu.br;https://www.helsinki.fi", "aff_unique_abbr": "Aalto;IFCE;UH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Finland;Brazil" }, { "title": "Language Models Meet World Models: Embodied Experiences Enhance Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71603", "id": "SVBR6xBaMl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ee6630dcbcff857026e474fc857aa9f0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SVBR6xBaMl", "openreview": "https://openreview.net/forum?id=SVBR6xBaMl", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71603", "video": "https://nips.cc/virtual/2023/poster/71603", "author_site": "Jiannan Xiang, Tianhua Tao, Yi Gu, Tianmin Shu, Zirui Wang, Zichao Yang, Zhiting Hu", "tldr": "", "abstract": "While large language models (LMs) have shown remarkable capabilities across numerous tasks, they often struggle with simple reasoning and planning in physical environments, such as understanding object permanence or planning household activities. The limitation arises from the fact that LMs are trained only on written text and miss essential embodied knowledge and skills. In this paper, we propose a new paradigm of enhancing LMs by finetuning them with world models, to gain diverse embodied knowledge while retaining their general language capabilities. Our approach deploys an embodied agent in a world model, particularly a simulator of the physical world (VirtualHome), and acquires a diverse set of embodied experiences through both goal-oriented planning and random exploration. These experiences are then used to finetune LMs to teach diverse abilities of reasoning and acting in the physical world, e.g., planning and completing goals, object permanence and tracking, etc. Moreover, it is desirable to preserve the generality of LMs during finetuning, which facilitates generalizing the embodied knowledge across tasks rather than being tied to specific simulations. We thus further introduce the classical elastic weight consolidation (EWC) for selective weight updates, combined with low-rank adapters (LoRA) for training efficiency. Extensive experiments show our approach substantially improves base LMs on 18 downstream tasks by 64.28% on average. In particular, the small LMs (1.3B, 6B, and 13B) enhanced by our approach match or even outperform much larger LMs (e.g., ChatGPT).", "keywords": "Language Model;World Model;Embodied Experience", "primary_area": "", "supplementary_material": "/attachment/ade2cf968c38cf2082d644ad02b7d0d7fa59ff8c.zip", "author": "Jiannan Xiang;Tianhua Tao;Yi Gu;Tianmin Shu;Zirui Wang;Zichao Yang;Zhiting Hu", "authorids": "~Jiannan_Xiang1;~Tianhua_Tao1;~Yi_Gu4;~Tianmin_Shu1;~Zirui_Wang5;~Zichao_Yang1;~Zhiting_Hu3", "gender": "M;M;M;;M;M;M", "homepage": "https://szxiangjn.github.io/;http://www.taotianhua.com/;https://wu-qing-157.github.io/;;https://zwcolin.github.io;;http://zhiting.ucsd.edu", "dblp": "230/3430;296/1990.html;;163/2175.html;;07/8707;134/4031", "google_scholar": "l8BS2wsAAAAJ;;https://scholar.google.com/citations?hl=en;YT_ffdwAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=siCYLcUAAAAJ;N7_xhHoAAAAJ", "orcid": ";;0000-0003-3539-0259;;0009-0005-1329-5607;;", "linkedin": ";;;;zwcolin/;;", "or_profile": "~Jiannan_Xiang1;~Tianhua_Tao1;~Yi_Gu4;~Tianmin_Shu1;~Zirui_Wang5;~Zichao_Yang1;~Zhiting_Hu3", "aff": "Tecent AI Lab;University of Illinois, Urbana Champaign;Mohamed bin Zayed University of Artificial Intelligence;Massachusetts Institute of Technology;Princeton University;;Amazon", "aff_domain": "tecent.com;illinois.edu;mbzuai.ac.ae;mit.edu;princeton.edu;;amazon.com", "position": "Research Intern;MS student;Researcher;Postdoc;MS student;;Researcher", "bibtex": "@inproceedings{\nxiang2023language,\ntitle={Language Models Meet World Models: Embodied Experiences Enhance Language Models},\nauthor={Jiannan Xiang and Tianhua Tao and Yi Gu and Tianmin Shu and Zirui Wang and Zichao Yang and Zhiting Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SVBR6xBaMl}\n}", "github": "", "project": "", "reviewers": "ekFP;7bDY;DfLY;cenq;311c", "pdf_size": 2683099, "rating": "3;5;6;6;7", "confidence": "4;4;3;4;4", "soundness": "2;2;4;4;3", "novelty": "2;2;3;3;3", "presentation": "2;2;4;3;3", "wc_summary": "95;119;138;92;59", "wc_strengths": "68;159;147;85;66", "wc_weaknesses": "232;254;134;228;53", "wc_questions": "78;21;9;49;114", "wc_limitations": "5;14;1;14;43", "wc_review": "478;567;429;468;335", "wc_reply_reviewers": "174;132;0;188;175", "wc_reply_authors": "428;75;0;76;49", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 100.6, 26.73275144836386 ], "wc_strengths_avg": [ 105.0, 39.92492955535426 ], "wc_weaknesses_avg": [ 180.2, 75.80079155259529 ], "wc_questions_avg": [ 54.2, 38.248660107250814 ], "wc_limitations_avg": [ 15.4, 14.705101155721438 ], "wc_review_avg": [ 455.4, 75.25583033891793 ], "wc_reply_reviewers_avg": [ 133.8, 69.50798515278657 ], "wc_reply_authors_avg": [ 125.6, 153.69398166486545 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.22116293423234576, "gs_citation": 118, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9862445221001056800&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tecent.com;illinois.edu;mbzuai.ac.ae;mit.edu;princeton.edu;;amazon.com", "author_num": 7, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "Tencent;University of Illinois Urbana-Champaign;Mohamed bin Zayed University of Artificial Intelligence;Massachusetts Institute of Technology;Princeton University;Amazon", "aff_unique_dep": "Tencent AI Lab;;;;;Amazon.com, Inc.", "aff_unique_url": "https://ai.tencent.com;https://illinois.edu;https://mbzuai.ac.ae;https://web.mit.edu;https://www.princeton.edu;https://www.amazon.com", "aff_unique_abbr": "Tencent AI Lab;UIUC;MBZUAI;MIT;Princeton;Amazon", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;1;2;1;1;1", "aff_country_unique": "China;United States;United Arab Emirates" }, { "title": "DiffKendall: A Novel Approach for Few-Shot Learning with Differentiable Kendall's Rank Correlation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71602", "id": "SVUQX1W7RL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9b01333262789ea3a65a5fab4c22feae-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SVUQX1W7RL", "openreview": "https://openreview.net/forum?id=SVUQX1W7RL", "poster": "/media/PosterPDFs/NeurIPS%202023/71602.png?t=1701758548.8405554", "slides": "https://nips.cc/virtual/2023/poster/71602", "video": "https://nips.cc/virtual/2023/poster/71602", "author_site": "Kaipeng Zheng, Huishuai Zhang, Weiran Huang", "tldr": "", "abstract": "Few-shot learning aims to adapt models trained on the base dataset to novel tasks where the categories were not seen by the model before. This often leads to a relatively concentrated distribution of feature values across channels on novel classes, posing challenges in determining channel importance for novel tasks. Standard few-shot learning methods employ geometric similarity metrics such as cosine similarity and negative Euclidean distance to gauge the semantic relatedness between two features. However, features with high geometric similarities may carry distinct semantics, especially in the context of few-shot learning. In this paper, we demonstrate that the importance ranking of feature channels is a more reliable indicator for few-shot learning than geometric similarity metrics. We observe that replacing the geometric similarity metric with Kendall\u2019s rank correlation only during inference is able to improve the performance of few-shot learning across a wide range of methods and datasets with different domains. Furthermore, we propose a carefully designed differentiable loss for meta-training to address the non-differentiability issue of Kendall\u2019s rank correlation. By replacing geometric similarity with differentiable Kendall\u2019s rank correlation, our method can integrate with numerous existing few-shot approaches and is ready for integrating with future state-of-the-art methods that rely on geometric similarity metrics. Extensive experiments validate the efficacy of the rank-correlation-based approach, showcasing a significant improvement in few-shot learning.", "keywords": "Few-shot learning", "primary_area": "", "supplementary_material": "/attachment/b0b9810dfe39e15823a484db5dda4c2d8c36d321.pdf", "author": "Kaipeng Zheng;Huishuai Zhang;Weiran Huang", "authorids": "~Kaipeng_Zheng1;~Huishuai_Zhang3;~Weiran_Huang1", "gender": "M;M;M", "homepage": "https://github.com/uiiuiiuii;https://www.weiranhuang.com;https://huishuai-git.github.io", "dblp": "330/7352;170/0073-1;144/7537", "google_scholar": ";AjJ2rf8AAAAJ;w1srHyIAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Kaipeng_Zheng1;~Weiran_Huang1;~Huishuai_Zhang2", "aff": "University of Electronic Science and Technology of China;Shanghai Jiaotong University;Microsoft Research Asia", "aff_domain": "uestc.edu.cn;sjtu.edu.cn;microsoft.com", "position": "MS student;Associate Professor;Researcher", "bibtex": "@inproceedings{\nzheng2023diffkendall,\ntitle={DiffKendall: A Novel Approach for Few-Shot Learning with Differentiable Kendall's Rank Correlation},\nauthor={Kaipeng Zheng and Huishuai Zhang and Weiran Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SVUQX1W7RL}\n}", "github": "", "project": "", "reviewers": "oKqH;DzME;7vc7;zjGF;5zcq", "pdf_size": 3085256, "rating": "3;4;4;5;6", "confidence": "5;4;5;5;4", "soundness": "2;3;3;3;4", "novelty": "2;2;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "129;94;60;66;49", "wc_strengths": "125;43;39;22;137", "wc_weaknesses": "110;224;117;108;121", "wc_questions": "170;9;40;4;125", "wc_limitations": "14;10;5;16;3", "wc_review": "548;380;261;216;435", "wc_reply_reviewers": "281;78;0;0;73", "wc_reply_authors": "2029;582;119;0;85", "reply_reviewers": "2;1;0;0;1", "reply_authors": "6;3;2;1;2", "rating_avg": [ 4.4, 1.0198039027185568 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.6, 28.82082580357475 ], "wc_strengths_avg": [ 73.2, 47.868152251784274 ], "wc_weaknesses_avg": [ 136.0, 44.24929377967517 ], "wc_questions_avg": [ 69.6, 66.33430485050702 ], "wc_limitations_avg": [ 9.6, 5.0039984012787215 ], "wc_review_avg": [ 368.0, 119.65450263153494 ], "wc_reply_reviewers_avg": [ 86.4, 103.0040775891906 ], "wc_reply_authors_avg": [ 563.0, 760.5400712651503 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.8, 1.7204650534085253 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4803844614152615, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14311260574228474584&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "uestc.edu.cn;sjtu.edu.cn;microsoft.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Electronic Science and Technology of China;Shanghai Jiao Tong University;Microsoft", "aff_unique_dep": ";;Research", "aff_unique_url": "https://www.uestc.edu.cn;https://www.sjtu.edu.cn;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "UESTC;SJTU;MSR Asia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Improving CLIP Training with Language Rewrites", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71601", "id": "SVjDiiVySh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6fa4d985e7c434002fb6289ab9b2d654-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SVjDiiVySh", "openreview": "https://openreview.net/forum?id=SVjDiiVySh", "poster": "/media/PosterPDFs/NeurIPS%202023/71601.png?t=1702447473.9829972", "slides": "https://nips.cc/virtual/2023/poster/71601", "video": "https://nips.cc/virtual/2023/poster/71601", "author_site": "Lijie Fan, Dilip Krishnan, Phillip Isola, Dina Katabi, Yonglong Tian", "tldr": "", "abstract": "Contrastive Language-Image Pre-training (CLIP) stands as one of the most effective and scalable methods for training transferable vision models using paired image and text data. CLIP models are trained using contrastive loss, which typically relies on data augmentations to prevent overfitting and shortcuts. However, in the CLIP training paradigm, data augmentations are exclusively applied to image inputs, while language inputs remain unchanged throughout the entire training process, limiting the exposure of diverse texts to the same image. In this paper, we introduce Language augmented CLIP (LaCLIP), a simple yet highly effective approach to enhance CLIP training through language rewrites. Leveraging the in-context learning capability of large language models, we rewrite the text descriptions associated with each image. These rewritten texts exhibit diversity in sentence structure and vocabulary while preserving the original key concepts and meanings. During training, LaCLIP randomly selects either the original texts or the rewritten versions as text augmentations for each image. Extensive experiments on CC3M, CC12M, RedCaps and LAION-400M datasets show that CLIP pre-training with language rewrites significantly improves the transfer performance without computation or memory overhead during training. Specifically for ImageNet zero-shot accuracy, LaCLIP outperforms CLIP by 8.2% on CC12M and 2.4% on LAION-400M.", "keywords": "contrastive learning; CLIP; large language model", "primary_area": "", "supplementary_material": "/attachment/b848a1d1d3ed88e2a8f0d03cb82b3fbb5f8a8c8a.pdf", "author": "Lijie Fan;Dilip Krishnan;Phillip Isola;Dina Katabi;Yonglong Tian", "authorids": "~Lijie_Fan1;~Dilip_Krishnan1;~Phillip_Isola1;~Dina_Katabi1;~Yonglong_Tian1", "gender": ";M;M;;", "homepage": ";http://dilipkay.wordpress.com;http://web.mit.edu/phillipi/;;http://people.csail.mit.edu/yonglong/", "dblp": "156/9941;08/2316;36/9988;k/DinaKatabi;151/6328", "google_scholar": "qthDk3oAAAAJ;_MEuWIMAAAAJ;ROILf3EAAAAJ;;https://scholar.google.com.hk/citations?user=OsP7JHAAAAAJ", "orcid": ";;0000-0002-1411-6704;;", "linkedin": ";;phillip-isola-a9955b20/;;", "or_profile": "~Lijie_Fan1;~Dilip_Krishnan1;~Phillip_Isola1;~Dina_Katabi1;~Yonglong_Tian1", "aff": "Massachusetts Institute of Technology;Google;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Google", "aff_domain": "mit.edu;google.com;mit.edu;mit.edu;google.com", "position": "PhD student;Research Scientist;Associate Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nfan2023improving,\ntitle={Improving {CLIP} Training with Language Rewrites},\nauthor={Lijie Fan and Dilip Krishnan and Phillip Isola and Dina Katabi and Yonglong Tian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SVjDiiVySh}\n}", "github": "", "project": "", "reviewers": "FjR1;Fifz;qFtF;YRKL", "pdf_size": 1445701, "rating": "6;6;7;8", "confidence": "4;4;5;5", "soundness": "4;3;4;4", "novelty": "3;3;3;3", "presentation": "4;3;4;4", "wc_summary": "45;154;86;87", "wc_strengths": "43;56;106;298", "wc_weaknesses": "216;331;110;213", "wc_questions": "61;21;24;22", "wc_limitations": "1;1;2;52", "wc_review": "366;563;328;672", "wc_reply_reviewers": "0;39;32;0", "wc_reply_authors": "405;131;444;158", "reply_reviewers": "0;1;1;0", "reply_authors": "2;3;3;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 93.0, 39.08324449172561 ], "wc_strengths_avg": [ 125.75, 102.19191504223805 ], "wc_weaknesses_avg": [ 217.5, 78.20006393859279 ], "wc_questions_avg": [ 32.0, 16.777961735562517 ], "wc_limitations_avg": [ 14.0, 21.94310825749169 ], "wc_review_avg": [ 482.25, 141.27344938097886 ], "wc_reply_reviewers_avg": [ 17.75, 17.92170471802278 ], "wc_reply_authors_avg": [ 284.5, 141.00088652203573 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 193, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4963294935729541138&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "mit.edu;google.com;mit.edu;mit.edu;google.com", "author_num": 5, "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://web.mit.edu;https://www.google.com", "aff_unique_abbr": "MIT;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Neural Sampling in Hierarchical Exponential-family Energy-based Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71600", "id": "SWU8YLlFVH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f7fdebf712db182eddaee2eb02af91e0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SWU8YLlFVH", "openreview": "https://openreview.net/forum?id=SWU8YLlFVH", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71600", "video": "https://nips.cc/virtual/2023/poster/71600", "author_site": "Xingsi Dong, Si Wu", "tldr": "", "abstract": "Bayesian brain theory suggests that the brain employs generative models to understand the external world. The sampling-based perspective posits that the brain infers the posterior distribution through samples of stochastic neuronal responses. Additionally, the brain continually updates its generative model to approach the true distribution of the external world. In this study, we introduce the Hierarchical Exponential-family Energy-based (HEE) model, which captures the dynamics of inference and learning. In the HEE model, we decompose the partition function into individual layers and leverage a group of neurons with shorter time constants to sample the gradient of the decomposed normalization term. This allows our model to estimate the partition function and perform inference simultaneously, circumventing the negative phase encountered in conventional energy-based models (EBMs). As a result, the learning process is localized both in time and space, and the model is easy to converge. To match the brain's rapid computation, we demonstrate that neural adaptation can serve as a momentum term, significantly accelerating the inference process. On natural image datasets, our model exhibits representations akin to those observed in the biological visual system. Furthermore, for the machine learning community, our model can generate observations through joint or marginal generation. We show that marginal generation outperforms joint generation and achieves performance on par with other EBMs.", "keywords": "Baysian brain;sampling-based inference;energy-based models;local learning;exponential-family", "primary_area": "", "supplementary_material": "/attachment/e411e7918f31ed0a777c99d0d570abca64fdea2d.zip", "author": "Xingsi Dong;Si Wu", "authorids": "~Xingsi_Dong1;~Si_Wu1", "gender": "M;M", "homepage": ";https://mgv.pku.edu.cn/english/people/lbd/soeeace/267528.htm", "dblp": ";25/437-1", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;", "orcid": ";", "linkedin": ";", "or_profile": "~Xingsi_Dong1;~Si_Wu1", "aff": "Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ndong2023neural,\ntitle={Neural Sampling in Hierarchical Exponential-family Energy-based Models},\nauthor={Xingsi Dong and Si Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SWU8YLlFVH}\n}", "github": "", "project": "", "reviewers": "tHDe;zTHc;nSfE;moHy", "pdf_size": 4019345, "rating": "3;5;6;8", "confidence": "4;3;3;3", "soundness": "2;3;3;4", "novelty": "2;3;4;3", "presentation": "1;1;2;4", "wc_summary": "154;169;103;93", "wc_strengths": "77;77;47;94", "wc_weaknesses": "286;532;387;53", "wc_questions": "107;71;254;159", "wc_limitations": "122;25;8;7", "wc_review": "746;874;799;406", "wc_reply_reviewers": "52;98;57;45", "wc_reply_authors": "69;61;191;186", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.0, 1.224744871391589 ], "wc_summary_avg": [ 129.75, 32.383444844549814 ], "wc_strengths_avg": [ 73.75, 16.931848688197046 ], "wc_weaknesses_avg": [ 314.5, 174.46847852835768 ], "wc_questions_avg": [ 147.75, 68.85991214051903 ], "wc_limitations_avg": [ 40.5, 47.59464255564905 ], "wc_review_avg": [ 706.25, 179.21547784719934 ], "wc_reply_reviewers_avg": [ 63.0, 20.65187642806338 ], "wc_reply_authors_avg": [ 126.75, 61.84001536222319 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8006407690254357, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11198380248838887328&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;pku.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Learning to Parameterize Visual Attributes for Open-set Fine-grained Retrieval", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71599", "id": "SaII5qMgKH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cc19e4ffde5540ac3fcda240e6d975cb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SaII5qMgKH", "openreview": "https://openreview.net/forum?id=SaII5qMgKH", "poster": "/media/PosterPDFs/NeurIPS%202023/71599.png?t=1697187529.9997363", "slides": "https://nips.cc/virtual/2023/poster/71599", "video": "https://nips.cc/virtual/2023/poster/71599", "author_site": "Shijie Wang, Jianlong Chang, Haojie Li, Zhihui Wang, Wanli Ouyang, Qi Tian", "tldr": "", "abstract": "Open-set fine-grained retrieval is an emerging challenging task that allows to retrieve unknown categories beyond the training set. \nThe best solution for handling unknown categories is to represent them using a set of visual attributes learnt from known categories, as widely used in zero-shot learning. Though important, attribute modeling usually requires significant manual annotations and thus is labor-intensive. Therefore, it is worth to investigate how to transform retrieval models trained by image-level supervision from category semantic extraction to attribute modeling. To this end, we propose a novel Visual Attribute Parameterization Network (VAPNet) to learn visual attributes from known categories and parameterize them into the retrieval model, without the involvement of any attribute annotations.\nIn this way, VAPNet could utilize its parameters to parse a set of visual attributes from unknown categories and precisely represent them.\nTechnically, VAPNet explicitly attains some semantics with rich details via making use of local image patches and distills the visual attributes from these discovered semantics. Additionally, it integrates the online refinement of these visual attributes into the training process to iteratively enhance their quality. Simultaneously, VAPNet treats these attributes as supervisory signals to tune the retrieval models, thereby achieving attribute parameterization. Extensive experiments on open-set fine-grained retrieval datasets validate the superior performance of our VAPNet over existing solutions.", "keywords": "Open-set Fine-grained Retrieval;Visual Attribute;Unknown Categories", "primary_area": "", "supplementary_material": "", "author": "Shijie Wang;Jianlong Chang;Haojie Li;Zhihui Wang;Wanli Ouyang;Qi Tian", "authorids": "~Shijie_Wang3;~Jianlong_Chang2;~Haojie_Li2;~Zhihui_Wang4;~Wanli_Ouyang1;~Qi_Tian3", "gender": "M;M;M;F;;M", "homepage": ";https://jianlongchange.github.io/;https://cise.sdust.edu.cn/home/Page/teacher_detail/catId/20/id/1913.html;http://dlutir.dlut.edu.cn/Scholar/Detail/6293;;https://www.qitian1987.com/index.html", "dblp": ";92/2332;;65/2749-1.html;;78/1467-1.html", "google_scholar": "XLziKuQAAAAJ;RDwnNsQAAAAJ;pMnlgVMAAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-3882-2205;;;0000-0002-7252-5047", "linkedin": ";;;;;", "or_profile": "~Shijie_Wang3;~Jianlong_Chang2;~Haojie_Li2;~Zhihui_Wang4;~Wanli_Ouyang1;~Qi_Tian3", "aff": "Dalian University of Technology;Huawei Technologies Ltd.;Dalian University of Technology;Dalian University of Technology;;Huawei Technologies Ltd.", "aff_domain": "dlut.edu.cn;huawei.com;dlut.edu.cn;dlut.edu.cn;;huawei.com", "position": "PhD student;Principal Researcher;Full Professor;Full Professor;;Principal Researcher", "bibtex": "@inproceedings{\nwang2023learning,\ntitle={Learning to Parameterize Visual Attributes for Open-set Fine-grained Retrieval},\nauthor={Shijie Wang and Jianlong Chang and Haojie Li and Zhihui Wang and Wanli Ouyang and Qi Tian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SaII5qMgKH}\n}", "github": "", "project": "", "reviewers": "uknX;yK4j;CR86;Xapw;C9Cw", "pdf_size": 1079184, "rating": "5;5;5;6;6", "confidence": "3;2;3;3;3", "soundness": "2;3;2;3;3", "novelty": "2;3;2;3;3", "presentation": "2;3;2;2;3", "wc_summary": "115;142;97;53;61", "wc_strengths": "22;159;55;68;48", "wc_weaknesses": "178;200;231;208;72", "wc_questions": "87;10;6;5;2", "wc_limitations": "1;7;2;61;8", "wc_review": "403;518;391;395;191", "wc_reply_reviewers": "14;25;0;42;0", "wc_reply_authors": "23;48;0;65;75", "reply_reviewers": "1;1;0;1;0", "reply_authors": "2;2;1;2;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 93.6, 33.23612492454558 ], "wc_strengths_avg": [ 70.4, 46.770075903295265 ], "wc_weaknesses_avg": [ 177.8, 55.54961746042902 ], "wc_questions_avg": [ 22.0, 32.60061349115995 ], "wc_limitations_avg": [ 15.8, 22.763128080296873 ], "wc_review_avg": [ 379.6, 105.48857758070302 ], "wc_reply_reviewers_avg": [ 16.2, 15.954936540143304 ], "wc_reply_authors_avg": [ 42.2, 27.491089465497726 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.40824829046386313, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2020019455318171375&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "email": "dlut.edu.cn;huawei.com;dlut.edu.cn;dlut.edu.cn;;huawei.com", "author_num": 6, "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "Dalian University of Technology;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.dlut.edu.cn/;https://www.huawei.com", "aff_unique_abbr": "DUT;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ReMaX: Relaxing for Better Training on Efficient Panoptic Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71598", "id": "SaMrN9tnxE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e8e30fda5ab87ea93360a36288ac0145-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SaMrN9tnxE", "openreview": "https://openreview.net/forum?id=SaMrN9tnxE", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71598", "video": "https://nips.cc/virtual/2023/poster/71598", "author_site": "Shuyang Sun, WEIJUN WANG, Andrew Howard, Qihang Yu, Philip Torr, Liang-Chieh Chen", "tldr": "", "abstract": "This paper presents a new mechanism to facilitate the training of mask transformers for efficient panoptic segmentation, democratizing its deployment. We observe that due to the high complexity in the training objective of panoptic segmentation, it will inevitably lead to much higher penalization on false positive. Such unbalanced loss makes the training process of the end-to-end mask-transformer based architectures difficult, especially for efficient models. In this paper, we present ReMaX that adds relaxation to mask predictions and class predictions during the training phase for panoptic segmentation. We demonstrate that via these simple relaxation techniques during training, our model can be consistently improved by a clear margin without any extra computational cost on inference. By combining our method with efficient backbones like MobileNetV3-Small, our method achieves new state-of-the-art results for efficient panoptic segmentation on COCO, ADE20K and Cityscapes. Code and pre-trained checkpoints will be available at https://github.com/google-research/deeplab2.", "keywords": "Panoptic segmentation;efficient models", "primary_area": "", "supplementary_material": "", "author": "Shuyang Sun;Weijun Wang;Andrew G. Howard;Qihang Yu;Philip Torr;Liang-Chieh Chen", "authorids": "~Shuyang_Sun1;~Weijun_Wang2;~Andrew_G._Howard1;~Qihang_Yu1;~Philip_Torr1;~Liang-Chieh_Chen1", "gender": ";M;;;;", "homepage": ";;;;http://www.robots.ox.ac.uk/~tvg/;http://liangchiehchen.com/", "dblp": ";;139/0987;;;138/2443", "google_scholar": ";xUoSmEsAAAAJ;_9l8vD8AAAAJ;7zZdZxsAAAAJ;;ACjYGPUAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Shuyang_Sun1;~Weijun_Wang2;~Andrew_G._Howard1;~Qihang_Yu1;~Philip_Torr1;~Liang-Chieh_Chen1", "aff": ";Google;Google;Johns Hopkins University;University of Oxford;Google", "aff_domain": ";google.com;google.com;jhu.edu;ox.ac.uk;google.com", "position": ";Researcher;Software Engineer;PhD student;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nsun2023remax,\ntitle={ReMaX: Relaxing for Better Training on Efficient Panoptic Segmentation},\nauthor={Shuyang Sun and Weijun Wang and Andrew G. Howard and Qihang Yu and Philip Torr and Liang-Chieh Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SaMrN9tnxE}\n}", "github": "", "project": "", "reviewers": "EbKy;Mpjf;8sND;5TWL;c7F1", "pdf_size": 4316313, "rating": "6;6;6;6;8", "confidence": "5;4;5;4;4", "soundness": "3;3;3;3;4", "novelty": "2;3;2;3;4", "presentation": "4;3;1;3;4", "wc_summary": "39;46;165;158;51", "wc_strengths": "29;69;51;68;78", "wc_weaknesses": "24;19;125;100;53", "wc_questions": "96;21;16;32;2", "wc_limitations": "70;5;21;14;2", "wc_review": "258;160;378;372;186", "wc_reply_reviewers": "15;78;31;22;0", "wc_reply_authors": "23;64;28;0;0", "reply_reviewers": "1;2;1;1;0", "reply_authors": "2;2;2;1;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 1.0954451150103321 ], "wc_summary_avg": [ 91.8, 57.080294322997325 ], "wc_strengths_avg": [ 59.0, 17.35511451993331 ], "wc_weaknesses_avg": [ 64.2, 41.86358799720827 ], "wc_questions_avg": [ 33.4, 32.751183184733954 ], "wc_limitations_avg": [ 22.4, 24.727312834192073 ], "wc_review_avg": [ 270.8, 90.95581344806938 ], "wc_reply_reviewers_avg": [ 29.2, 26.422717498395198 ], "wc_reply_authors_avg": [ 23.0, 23.51169921549695 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=325511126050575766&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": ";google.com;google.com;jhu.edu;ox.ac.uk;google.com", "author_num": 6, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Google;Johns Hopkins University;University of Oxford", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.jhu.edu;https://www.ox.ac.uk", "aff_unique_abbr": "Google;JHU;Oxford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "SpecTr: Fast Speculative Decoding via Optimal Transport", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71597", "id": "SdYHLTCC5J", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6034a661584af6c28fd97a6f23e56c0a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SdYHLTCC5J", "openreview": "https://openreview.net/forum?id=SdYHLTCC5J", "poster": "/media/PosterPDFs/NeurIPS%202023/71597.png?t=1701531401.600066", "slides": "https://nips.cc/virtual/2023/poster/71597", "video": "https://nips.cc/virtual/2023/poster/71597", "author_site": "Ziteng Sun, Ananda Theertha Suresh, Jae Hun Ro, Ahmad Beirami, Himanshu Jain, Felix Yu", "tldr": "", "abstract": "Autoregressive sampling from large language models has led to state-of-the-art results in several natural language tasks.\nHowever, autoregressive sampling generates tokens one at a time making it slow, and even prohibitive in certain tasks. One way to speed up sampling is *speculative decoding*: use a small model to sample a *draft* (block or sequence of tokens), and then score all tokens in the draft by the large language model in parallel. A subset of the tokens in the draft are accepted (and the rest rejected) based on a statistical method to guarantee that the final output follows the distribution of the large model. \nIn this work, we provide a principled understanding of speculative decoding through the lens of optimal transport (OT) with *membership cost*. This framework can be viewed as an extension of the well-known *maximal-coupling* problem. This new formulation enables us to generalize the speculative decoding method to allow for a set of $k$ candidates at the token-level, which leads to an improved optimal membership cost. We show that the optimal draft selection algorithm (transport plan) can be computed via linear programming, whose best-known runtime is exponential in $k$. We then propose a valid draft selection algorithm whose acceptance probability is $(1-1/e)$-optimal multiplicatively. Moreover, it can be computed in time almost linear with size of domain of a single token.\nUsing this new draft selection algorithm, we develop a new autoregressive sampling algorithm called *SpecTr*, which provides speedup in decoding while ensuring that there is no quality degradation in the decoded output.\nWe experimentally demonstrate that for state-of-the-art large language models, the proposed approach achieves a wall clock speedup of 2.13X, a further 1.37X speedup over speculative decoding on standard benchmarks.", "keywords": "autoregressive sampling; computation efficiency; optimal transport", "primary_area": "", "supplementary_material": "/attachment/83053c3e41d23f5ff098ded83ab3feb95643842d.pdf", "author": "Ziteng Sun;Ananda Theertha Suresh;Jae Hun Ro;Ahmad Beirami;Himanshu Jain;Felix Yu", "authorids": "~Ziteng_Sun1;~Ananda_Theertha_Suresh1;~Jae_Hun_Ro1;~Ahmad_Beirami1;~Himanshu_Jain3;~Felix_Yu1", "gender": "M;M;M;M;M;M", "homepage": "http://www.zitengsun.com/;https://theertha.info;;https://beirami.github.io/;;http://felixyu.org", "dblp": "194/8609;119/3884;;41/9367;;23/10574", "google_scholar": ";K6ef57QAAAAJ;Xd5wXrgAAAAJ;VuKWbMMAAAAJ;JtrH9jQAAAAJ;lYvF6cUAAAAJ", "orcid": ";;;;;", "linkedin": ";;;ahmad-beirami-97001962;;", "or_profile": "~Ziteng_Sun1;~Ananda_Theertha_Suresh1;~Jae_Hun_Ro1;~Ahmad_Beirami1;~Himanshu_Jain3;~Felix_Yu1", "aff": "Google;Google;Google;Massachusetts Institute of Technology;Google;Google", "aff_domain": "google.com;google.com;google.com;mit.edu;google.com;google.com", "position": "Researcher;Research Scientist;Software Engineer;Research Affiliate;Researcher;Research Scientist", "bibtex": "@inproceedings{\nsun2023spectr,\ntitle={SpecTr: Fast Speculative Decoding via Optimal Transport},\nauthor={Ziteng Sun and Ananda Theertha Suresh and Jae Hun Ro and Ahmad Beirami and Himanshu Jain and Felix Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SdYHLTCC5J}\n}", "github": "", "project": "", "reviewers": "DdMT;jweu;TBLD;hEU2", "pdf_size": 579763, "rating": "4;6;6;7", "confidence": "3;3;2;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "109;153;265;365", "wc_strengths": "30;144;81;59", "wc_weaknesses": "468;205;58;135", "wc_questions": "1;1;22;2", "wc_limitations": "5;14;1;1", "wc_review": "613;517;427;562", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 223.0, 99.77975746613137 ], "wc_strengths_avg": [ 78.5, 41.91956583744636 ], "wc_weaknesses_avg": [ 216.5, 154.2311576822271 ], "wc_questions_avg": [ 6.5, 8.958236433584458 ], "wc_limitations_avg": [ 5.25, 5.3091901453988255 ], "wc_review_avg": [ 529.75, 68.35705888933491 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 86, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11785713841433060134&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "google.com;google.com;google.com;mit.edu;google.com;google.com", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Google;Massachusetts Institute of Technology", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://web.mit.edu", "aff_unique_abbr": "Google;MIT", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DiT-3D: Exploring Plain Diffusion Transformers for 3D Shape Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71596", "id": "Se71ks7Mfz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d6c01b025cad37d5c8bab4ba18846c02-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Se71ks7Mfz", "openreview": "https://openreview.net/forum?id=Se71ks7Mfz", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71596", "video": "https://nips.cc/virtual/2023/poster/71596", "author_site": "Shentong Mo, Shentong Mo, Enze Xie, Ruihang Chu, Lanqing Hong, Matthias Niessner, Zhenguo Li", "tldr": "", "abstract": "Recent Diffusion Transformers (i.e., DiT) have demonstrated their powerful effectiveness in generating high-quality 2D images. However, it is unclear how the Transformer architecture performs equally well in 3D shape generation, as previous 3D diffusion methods mostly adopted the U-Net architecture. \nTo bridge this gap, we propose a novel Diffusion Transformer for 3D shape generation, named DiT-3D, which can directly operate the denoising process on voxelized point clouds using plain Transformers. Compared to existing U-Net approaches, our DiT-3D is more scalable in model size and produces much higher quality generations.\nSpecifically, the DiT-3D adopts the design philosophy of DiT but modifies it by incorporating 3D positional and patch embeddings to aggregate input from voxelized point clouds.\nTo reduce the computational cost of self-attention in 3D shape generation, we incorporate 3D window attention into Transformer blocks, as the increased 3D token length resulting from the additional dimension of voxels can lead to high computation.\nFinally, linear and devoxelization layers are used to predict the denoised point clouds. \nIn addition, we empirically observe that the pre-trained DiT-2D checkpoint on ImageNet can significantly improve DiT-3D on ShapeNet.\nExperimental results on the ShapeNet dataset demonstrate that the proposed DiT-3D achieves state-of-the-art performance in high-fidelity and diverse 3D point cloud generation.", "keywords": "Diffusion Models;Transformers;3D Shape Generation", "primary_area": "", "supplementary_material": "/attachment/7512e32674e0d989335e6e290b96776fa7c1bcce.pdf", "author": "Shentong Mo;Enze Xie;Ruihang Chu;Lanqing HONG;Matthias Nie\u00dfner;Zhenguo Li", "authorids": "~Shentong_Mo1;~Enze_Xie1;~Ruihang_Chu1;~Lanqing_HONG1;~Matthias_Nie\u00dfner2;~Zhenguo_Li1", "gender": ";M;M;F;;M", "homepage": ";https://xieenze.github.io/;https://ruihang-chu.github.io/;https://racheltechie.github.io/;;http://www.ee.columbia.edu/~zgli/", "dblp": ";218/5441;250/9173.html;226/4258;;23/6479", "google_scholar": ";42MVVPgAAAAJ;https://scholar.google.com.hk/citations?user=62zPPxkAAAAJ;https://scholar.google.com.sg/citations?user=2p7x6OUAAAAJ;;XboZC1AAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Shentong_Mo1;~Enze_Xie1;~Ruihang_Chu1;~Lanqing_HONG1;~Matthias_Nie\u00dfner2;~Zhenguo_Li1", "aff": ";Huawei Noah's Ark Lab;The Chinese University of Hong Kong;Huawei Technologies Ltd.;;Huawei Noah's Ark Lab", "aff_domain": ";huawei.com;cuhk.edu.hk;huawei.com;;huawei.com", "position": ";Researcher;PhD student;Researcher;;Principal Researcher", "bibtex": "@inproceedings{\nmo2023ditd,\ntitle={DiT-3D: Exploring Plain Diffusion Transformers for 3D Shape Generation},\nauthor={Shentong Mo and Enze Xie and Ruihang Chu and Lanqing HONG and Matthias Nie{\\ss}ner and Zhenguo Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Se71ks7Mfz}\n}", "github": "", "project": "", "reviewers": "7dkY;trpZ;zPmE;RjmT;QDvm", "pdf_size": 746742, "rating": "4;5;5;5;6", "confidence": "3;4;3;4;5", "soundness": "2;2;3;2;3", "novelty": "2;2;2;2;3", "presentation": "3;2;3;3;3", "wc_summary": "76;65;63;102;149", "wc_strengths": "27;30;40;67;68", "wc_weaknesses": "198;247;205;72;165", "wc_questions": "6;139;89;107;185", "wc_limitations": "1;7;6;1;10", "wc_review": "308;488;403;349;577", "wc_reply_reviewers": "121;232;105;22;128", "wc_reply_authors": "774;456;255;35;182", "reply_reviewers": "2;1;1;1;1", "reply_authors": "4;3;2;2;2", "rating_avg": [ 5.0, 0.6324555320336759 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 91.0, 32.155870381627054 ], "wc_strengths_avg": [ 46.4, 17.76063061943466 ], "wc_weaknesses_avg": [ 177.4, 58.81020319638421 ], "wc_questions_avg": [ 105.2, 59.357897536890576 ], "wc_limitations_avg": [ 5.0, 3.521363372331802 ], "wc_review_avg": [ 425.0, 96.97628576100448 ], "wc_reply_reviewers_avg": [ 121.6, 66.97044124089372 ], "wc_reply_authors_avg": [ 340.4, 255.7440908408247 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8451542547285165, "gs_citation": 73, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3218901154955629056&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": ";huawei.com;cuhk.edu.hk;huawei.com;;huawei.com", "author_num": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Huawei;Chinese University of Hong Kong", "aff_unique_dep": "Noah's Ark Lab;", "aff_unique_url": "https://www.huawei.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "Huawei;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Optimistic Exploration in Reinforcement Learning Using Symbolic Model Estimates", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71595", "id": "Sf17j2pkCU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6cbd0a1251f41b41aa68e728bcc1ee40-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Sf17j2pkCU", "openreview": "https://openreview.net/forum?id=Sf17j2pkCU", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71595", "video": "https://nips.cc/virtual/2023/poster/71595", "author_site": "Sarath Sreedharan, Michael Katz", "tldr": "", "abstract": "There has been an increasing interest in using symbolic models along with reinforcement learning (RL) problems, where these coarser abstract models are used as a way to provide RL agents with higher level guidance. However, most of these works are inherently limited by their assumption of having an access to a symbolic approximation of the underlying problem. To address this issue, we introduce a new method for learning optimistic symbolic approximations of the underlying world model. We will see how these representations, coupled with fast diverse planners developed by the automated planning community, provide us with a new paradigm for optimistic exploration in sparse reward settings. We investigate the possibility of speeding up the learning process by generalizing learned model dynamics across similar actions with minimal human input. Finally, we evaluate the method, by testing it on multiple benchmark domains and compare it with other RL strategies.", "keywords": "Planning;Reinforcement Learning;Exploration", "primary_area": "", "supplementary_material": "/attachment/44924aac7c8c9452f206d1c9c71f6358c93cd379.zip", "author": "Sarath Sreedharan;Michael Katz", "authorids": "~Sarath_Sreedharan1;~Michael_Katz1", "gender": ";", "homepage": ";https://researcher.watson.ibm.com/researcher/view.php?person=ibm-Michael.Katz1", "dblp": "162/5110;75/1295-1", "google_scholar": ";pltkfcMAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Sarath_Sreedharan1;~Michael_Katz1", "aff": "Colorado State University;International Business Machines", "aff_domain": "colostate.edu;ibm.com", "position": "Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nsreedharan2023optimistic,\ntitle={Optimistic Exploration in Reinforcement Learning Using Symbolic Model Estimates},\nauthor={Sarath Sreedharan and Michael Katz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Sf17j2pkCU}\n}", "github": "", "project": "", "reviewers": "nfft;6i1K;MX4H;S1WD", "pdf_size": 1160946, "rating": "3;4;6;7", "confidence": "3;4;4;3", "soundness": "2;2;2;4", "novelty": "2;2;3;3", "presentation": "1;2;4;3", "wc_summary": "61;107;132;225", "wc_strengths": "42;59;81;204", "wc_weaknesses": "147;391;117;375", "wc_questions": "68;106;16;44", "wc_limitations": "10;123;42;74", "wc_review": "328;786;388;922", "wc_reply_reviewers": "0;31;0;157", "wc_reply_authors": "0;43;0;84", "reply_reviewers": "0;1;0;2", "reply_authors": "1;2;1;2", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 131.25, 59.8179529907201 ], "wc_strengths_avg": [ 96.5, 63.58655518267993 ], "wc_weaknesses_avg": [ 257.5, 126.07438280634175 ], "wc_questions_avg": [ 58.5, 33.026504507743475 ], "wc_limitations_avg": [ 62.25, 41.73951964266 ], "wc_review_avg": [ 606.0, 253.507396341803 ], "wc_reply_reviewers_avg": [ 47.0, 64.75723897758458 ], "wc_reply_authors_avg": [ 31.75, 34.90254288730264 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18445748508864238637&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "colostate.edu;ibm.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Colorado State University;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.colostate.edu;https://www.ibm.com", "aff_unique_abbr": "CSU;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Enhancing Sharpness-Aware Optimization Through Variance Suppression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71594", "id": "Sf3t6Bth4P", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e095c0a3717629aa5497601985bfcf0e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Sf3t6Bth4P", "openreview": "https://openreview.net/forum?id=Sf3t6Bth4P", "poster": "/media/PosterPDFs/NeurIPS%202023/71594.png?t=1701498463.9541981", "slides": "https://nips.cc/virtual/2023/poster/71594", "video": "https://nips.cc/virtual/2023/poster/71594", "author_site": "Bingcong Li, Georgios Giannakis", "tldr": "", "abstract": "Sharpness-aware minimization (SAM) has well documented merits in enhancing generalization of deep neural networks, even without sizable data augmentation. Embracing the geometry of the loss function, where neighborhoods of 'flat minima' heighten generalization ability, SAM seeks 'flat valleys' by minimizing the maximum loss caused by an *adversary* perturbing parameters within the neighborhood.\nAlthough critical to account for sharpness of the loss function, such an '*over-friendly* adversary' can curtail the outmost level of generalization. The novel approach of this contribution fosters stabilization of adversaries through *variance suppression* (VaSSO) to avoid such friendliness. VaSSO's *provable* stability safeguards its numerical improvement over SAM in model-agnostic tasks, including image classification and machine translation. In addition, experiments confirm that VaSSO endows SAM with robustness against high levels of label noise. Code is available at https://github.com/BingcongLi/VaSSO.", "keywords": "generalization;optimization;neural networks", "primary_area": "", "supplementary_material": "/attachment/714b14f3be267e74803a73c745feacecf5bb0faf.pdf", "author": "Bingcong Li;Georgios B. Giannakis", "authorids": "~Bingcong_Li1;~Georgios_B._Giannakis1", "gender": ";M", "homepage": ";http://spincom.umn.edu/", "dblp": ";33/4080", "google_scholar": ";Nu_6R8sAAAAJ", "orcid": ";", "linkedin": ";georgios-b-giannakis-54023b18/", "or_profile": "~Bingcong_Li1;~Georgios_B._Giannakis1", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@inproceedings{\nli2023enhancing,\ntitle={Enhancing Sharpness-Aware Optimization Through Variance Suppression},\nauthor={Bingcong Li and Georgios B. Giannakis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Sf3t6Bth4P}\n}", "github": "", "project": "", "reviewers": "988S;zZon;65jc;BP9H", "pdf_size": 1035238, "rating": "4;4;5;5", "confidence": "4;4;4;3", "soundness": "2;2;2;3", "novelty": "3;2;2;3", "presentation": "2;3;3;3", "wc_summary": "24;128;60;45", "wc_strengths": "12;70;24;26", "wc_weaknesses": "154;536;13;63", "wc_questions": "2;2;264;169", "wc_limitations": "2;1;6;2", "wc_review": "194;737;367;305", "wc_reply_reviewers": "0;0;39;14", "wc_reply_authors": "23;23;42;23", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 4.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.25, 38.963925623581616 ], "wc_strengths_avg": [ 33.0, 22.02271554554524 ], "wc_weaknesses_avg": [ 191.5, 205.2200038982555 ], "wc_questions_avg": [ 109.25, 112.3863314642844 ], "wc_limitations_avg": [ 2.75, 1.920286436967152 ], "wc_review_avg": [ 400.75, 203.78711318432283 ], "wc_reply_reviewers_avg": [ 13.25, 15.927570436196476 ], "wc_reply_authors_avg": [ 27.75, 8.227241335952167 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18089727266646707330&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 9, "email": ";", "author_num": 2 }, { "title": "Sampling from Gaussian Process Posteriors using Stochastic Gradient Descent", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71593", "id": "Sf9goJtTCE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7482e8ce4139df1a2d8195a0746fa713-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Sf9goJtTCE", "openreview": "https://openreview.net/forum?id=Sf9goJtTCE", "poster": "/media/PosterPDFs/NeurIPS%202023/71593.png?t=1701871931.8873665", "slides": "https://nips.cc/virtual/2023/poster/71593", "video": "https://nips.cc/virtual/2023/poster/71593", "author_site": "Jihao Andreas Lin, Javier Antor\u00e1n, Shreyas Padhy, David Janz, Jos\u00e9 Miguel Hern\u00e1ndez-Lobato, Alexander Terenin", "tldr": "", "abstract": "Gaussian processes are a powerful framework for quantifying uncertainty and for sequential decision-making but are limited by the requirement of solving linear systems. In general, this has a cubic cost in dataset size and is sensitive to conditioning. We explore stochastic gradient algorithms as a computationally efficient method of approximately solving these linear systems: we develop low-variance optimization objectives for sampling from the posterior and extend these to inducing points. Counterintuitively, stochastic gradient descent often produces accurate predictions, even in cases where it does not converge quickly to the optimum. We explain this through a spectral characterization of the implicit bias from non-convergence. We show that stochastic gradient descent produces predictive distributions close to the true posterior both in regions with sufficient data coverage, and in regions sufficiently far away from the data. Experimentally, stochastic gradient descent achieves state-of-the-art performance on sufficiently large-scale or ill-conditioned regression tasks. Its uncertainty estimates match the performance of significantly more expensive baselines on a large-scale Bayesian~optimization~task.", "keywords": "Gaussian processes;scalable learning;posterior sampling;Bayesian optimization", "primary_area": "", "supplementary_material": "", "author": "Jihao Andreas Lin;Javier Antoran;Shreyas Padhy;David Janz;Jos\u00e9 Miguel Hern\u00e1ndez-Lobato;Alexander Terenin", "authorids": "~Jihao_Andreas_Lin1;~Javier_Antoran1;~Shreyas_Padhy1;~David_Janz1;~Jos\u00e9_Miguel_Hern\u00e1ndez-Lobato1;~Alexander_Terenin1", "gender": "M;Unspecified;M;M;M;", "homepage": "https://jandylin.github.io;https://javierantoran.github.io/about/;http://shreyaspadhy.github.io;http://www.djanz.org;https://avt.im/;http://jmhl.org", "dblp": "279/2864;234/8818.html;267/9851;190/7685;185/1040;40/6058", "google_scholar": "Bn1GyeEAAAAJ;_b-Cs2cAAAAJ;JxbV2R0AAAAJ;https://scholar.google.co.uk/citations?user=rI5XB7sAAAAJ;6Qa-wXMAAAAJ;BEBccCQAAAAJ", "orcid": ";0000-0003-2877-2689;;;0000-0001-5292-3104;0000-0001-7610-949X", "linkedin": "jihao-andreas-lin/;javier-antoran/;;;;", "or_profile": "~Jihao_Andreas_Lin1;~Javier_Antoran1;~Shreyas_Padhy1;~David_Janz1;~Alexander_Terenin1;~Jose_Miguel_Hernandez_Lobato1", "aff": "University of Cambridge;University of Cambridge;Microsoft Research;University of Alberta;University of Cambridge;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk;research.microsoft.com;ualberta.ca;cam.ac.uk;cam.ac.uk", "position": "PhD student;PhD student;Intern;Postdoc;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nlin2023sampling,\ntitle={Sampling from Gaussian Process Posteriors using Stochastic Gradient Descent},\nauthor={Jihao Andreas Lin and Javier Antoran and Shreyas Padhy and David Janz and Jos{\\'e} Miguel Hern{\\'a}ndez-Lobato and Alexander Terenin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Sf9goJtTCE}\n}", "github": "", "project": "", "reviewers": "Hc1f;xja3;Hdii;w7c5", "pdf_size": 2397961, "rating": "7;8;8;9", "confidence": "5;4;3;4", "soundness": "4;4;4;4", "novelty": "3;3;4;4", "presentation": "4;3;4;4", "wc_summary": "124;123;151;89", "wc_strengths": "74;32;58;167", "wc_weaknesses": "190;215;26;68", "wc_questions": "245;73;35;74", "wc_limitations": "25;1;4;4", "wc_review": "658;444;274;402", "wc_reply_reviewers": "231;0;24;64", "wc_reply_authors": "702;0;0;0", "reply_reviewers": "2;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 8.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 121.75, 21.992896580487074 ], "wc_strengths_avg": [ 82.75, 50.898796645893306 ], "wc_weaknesses_avg": [ 124.75, 79.6472692563907 ], "wc_questions_avg": [ 106.75, 81.35224336181517 ], "wc_limitations_avg": [ 8.5, 9.604686356149273 ], "wc_review_avg": [ 444.5, 138.2561029394363 ], "wc_reply_reviewers_avg": [ 79.75, 90.2673113590961 ], "wc_reply_authors_avg": [ 175.5, 303.974916728338 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16115795031874363668&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 9, "email": "cam.ac.uk;cam.ac.uk;research.microsoft.com;ualberta.ca;cam.ac.uk;cam.ac.uk", "author_num": 6, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "University of Cambridge;Microsoft;University of Alberta", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "https://www.cam.ac.uk;https://www.microsoft.com/en-us/research;https://www.ualberta.ca", "aff_unique_abbr": "Cambridge;MSR;UAlberta", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;1;2;0;0", "aff_country_unique": "United Kingdom;United States;Canada" }, { "id": "SfXjt1FtMQ", "title": "GmGM: a fast Gaussian graphical model for multi-modal data", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper introduces the Gaussian multi-Graphical Model, a model to construct sparse graph representations of matrix- and tensor-variate data. We generalize prior work in this area by simultaneously learning this representation across several tensors that share axes, which is necessary to allow the analysis of multimodal datasets such as those encountered in multi-omics. Our algorithm uses only a single eigendecomposition per axis, achieving an order of magnitude speedup over prior work in the ungeneralized case. This allows the use of our methodology on large multi-modal datasets such as single-cell multi-omics data, which was challenging with previous approaches. We validate our model on synthetic data and five real-world datasets.", "keywords": "bigraphical lasso;network inference;gaussian graphical models;multi-omics;single-cell", "primary_area": "", "supplementary_material": "/attachment/75461dac18413e12d525f595c3518ead6984a63f.pdf", "author": "Bailey Andrew;David R Westhead;Luisa Cutillo", "authorids": "~Bailey_Andrew1;d.r.westhead@leeds.ac.uk;~Luisa_Cutillo1", "gender": "M;;F", "homepage": ";;https://eps.leeds.ac.uk/maths/staff/5526/dr-luisa-cutillo", "dblp": ";;32/6082", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Bailey_Andrew1;d.r.westhead@leeds.ac.uk;~Luisa_Cutillo1", "aff": "University of Leeds;;University of Leeds", "aff_domain": "leeds.ac.uk;;leeds.ac.uk", "position": "PhD student;;Lecturer", "bibtex": "@misc{\nandrew2023gmgm,\ntitle={Gm{GM}: a fast Gaussian graphical model for multi-modal data},\nauthor={Bailey Andrew and David R Westhead and Luisa Cutillo},\nyear={2023},\nurl={https://openreview.net/forum?id=SfXjt1FtMQ}\n}", "github": "", "project": "", "reviewers": "KacZ;ooWU;pfpR", "site": "https://openreview.net/forum?id=SfXjt1FtMQ", "pdf_size": 861842, "rating": "5;6;6", "confidence": "3;4;3", "soundness": "2;3;2", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "90;50;50", "wc_strengths": "132;117;75", "wc_weaknesses": "227;29;216", "wc_questions": "3;71;86", "wc_limitations": "3;8;11", "wc_review": "455;275;438", "wc_reply_reviewers": "31;13;53", "wc_reply_authors": "14;0;132", "reply_reviewers": "1;1;1", "reply_authors": "2;1;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 63.333333333333336, 18.856180831641264 ], "wc_strengths_avg": [ 108.0, 24.124676163629637 ], "wc_weaknesses_avg": [ 157.33333333333334, 90.85641908466836 ], "wc_questions_avg": [ 53.333333333333336, 36.11401697709938 ], "wc_limitations_avg": [ 7.333333333333333, 3.299831645537222 ], "wc_review_avg": [ 389.3333333333333, 81.14322043290014 ], "wc_reply_reviewers_avg": [ 32.333333333333336, 16.35712552851373 ], "wc_reply_authors_avg": [ 48.666666666666664, 59.2021020647822 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SiBYLRv60pcJ:scholar.google.com/&scioq=GmGM:+a+fast+Gaussian+graphical+model+for+multi-modal+data&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Leeds", "aff_unique_dep": "", "aff_unique_url": "https://www.leeds.ac.uk", "aff_unique_abbr": "Leeds", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "An Optimal Structured Zeroth-order Algorithm for Non-smooth Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71592", "id": "SfdkS6tt81", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7429f4c1b267cf619f28c4d4f1532f99-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SfdkS6tt81", "openreview": "https://openreview.net/forum?id=SfdkS6tt81", "poster": "/media/PosterPDFs/NeurIPS%202023/71592.png?t=1699285352.7333827", "slides": "https://nips.cc/virtual/2023/poster/71592", "video": "https://nips.cc/virtual/2023/poster/71592", "author_site": "Marco Rando, Cesare Molinari, Lorenzo Rosasco, Silvia Villa", "tldr": "", "abstract": "Finite-difference methods are a class of algorithms designed to solve black-box optimization problems by approximating a gradient of the target function on a set of directions. In black-box optimization, the non-smooth setting is particularly relevant since, in practice, differentiability and smoothness assumptions cannot be verified. To cope with nonsmoothness, several authors use a smooth approximation of the target function and show that finite difference methods approximate its gradient. Recently, it has been proved that imposing a structure in the directions allows improving performance. However, only the smooth setting was considered. To close this gap, we introduce and analyze O-ZD, the first structured finite-difference algorithm for non-smooth black-box optimization. Our method exploits a smooth approximation of the target function and we prove that it approximates its gradient on a subset of random {\\em orthogonal} directions. We analyze the convergence of O-ZD under different assumptions. \nFor non-smooth convex functions, we obtain the optimal complexity. In the non-smooth non-convex setting, we characterize the number of iterations needed to bound the expected norm of the smoothed gradient. For smooth functions, our analysis recovers existing results for structured zeroth-order methods for the convex case and extends them to the non-convex setting. We conclude with numerical simulations where assumptions are satisfied, observing that our algorithm has very good practical performances.", "keywords": "nonsmooth optimization;zeroth order optimization;nonsmooth zeroth-order", "primary_area": "", "supplementary_material": "/attachment/1b63f36977ac67dc7ee1cc2bf9d57e99b56ac50f.zip", "author": "Marco Rando;Cesare Molinari;Lorenzo Rosasco;Silvia Villa", "authorids": "~Marco_Rando1;~Cesare_Molinari1;~Lorenzo_Rosasco1;~Silvia_Villa1", "gender": "M;M;;F", "homepage": ";https://ceciomolinari.wixsite.com/my-site-3;;", "dblp": "295/8545;219/1357;;18/8186", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.fr/citations?user=qgdXMqgAAAAJ;;https://scholar.google.it/citations?user=wwj1C0EAAAAJ", "orcid": "0009-0008-3839-1429;;;0000-0002-6232-5631", "linkedin": "marco-rando-8085b6141/;;;", "or_profile": "~Marco_Rando1;~Cesare_Molinari1;~Lorenzo_Rosasco1;~Silvia_Villa1", "aff": "Universit\u00e0 degli Studi di Genova;University of Genoa;;Universit\u00e0 degli Studi di Genova", "aff_domain": "unige.it;unige.it;;unige.it", "position": "PhD student;Researcher;;Associate Professor", "bibtex": "@inproceedings{\nrando2023an,\ntitle={An Optimal Structured Zeroth-order Algorithm for Non-smooth Optimization},\nauthor={Marco Rando and Cesare Molinari and Lorenzo Rosasco and Silvia Villa},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SfdkS6tt81}\n}", "github": "", "project": "", "reviewers": "RtjB;KbSU;VJ1k;eK7Y;77xV", "pdf_size": 7835191, "rating": "5;6;6;6;7", "confidence": "3;4;3;2;3", "soundness": "3;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "50;51;92;52;43", "wc_strengths": "200;38;103;27;42", "wc_weaknesses": "2;40;129;43;5", "wc_questions": "2;351;119;50;213", "wc_limitations": "2;22;1;5;1", "wc_review": "256;502;444;177;304", "wc_reply_reviewers": "11;28;12;0;124", "wc_reply_authors": "0;0;0;0;771", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 57.6, 17.48828179095934 ], "wc_strengths_avg": [ 82.0, 64.69312173639482 ], "wc_weaknesses_avg": [ 43.8, 45.884202074352345 ], "wc_questions_avg": [ 147.0, 124.24974849069112 ], "wc_limitations_avg": [ 6.2, 8.034923770640267 ], "wc_review_avg": [ 336.6, 119.93598292422503 ], "wc_reply_reviewers_avg": [ 35.0, 45.387222871640866 ], "wc_reply_authors_avg": [ 154.2, 308.4 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4964185556540546225&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 7, "email": "unige.it;unige.it;;unige.it", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Universit\u00e0 degli Studi di Genova;University of Genoa", "aff_unique_dep": ";", "aff_unique_url": "https://www.unige.it;https://www.unige.it", "aff_unique_abbr": "UniGe;UniGe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Italy" }, { "title": "Errors-in-variables Fr\\'echet Regression with Low-rank Covariate Approximation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71591", "id": "Sg3aCpWUQP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ff06c57ef80625386884906c2d2d2429-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Sg3aCpWUQP", "openreview": "https://openreview.net/forum?id=Sg3aCpWUQP", "poster": "/media/PosterPDFs/NeurIPS%202023/71591.png?t=1699570138.359766", "slides": "https://nips.cc/virtual/2023/poster/71591", "video": "https://nips.cc/virtual/2023/poster/71591", "author_site": "Dogyoon Song, Kyunghee Han", "tldr": "", "abstract": "Fr\\'echet regression has emerged as a promising approach for regression analysis involving non-Euclidean response variables. However, its practical applicability has been hindered by its reliance on ideal scenarios with abundant and noiseless covariate data. In this paper, we present a novel estimation method that tackles these limitations by leveraging the low-rank structure inherent in the covariate matrix. Our proposed framework combines the concepts of global Fr\\'echet regression and principal component regression, aiming to improve the efficiency and accuracy of the regression estimator. By incorporating the low-rank structure, our method enables more effective modeling and estimation, particularly in high-dimensional and errors-in-variables regression settings. We provide a theoretical analysis of the proposed estimator's large-sample properties, including a comprehensive rate analysis of bias, variance, and additional variations due to measurement errors. Furthermore, our numerical experiments provide empirical evidence that supports the theoretical findings, demonstrating the superior performance of our approach. Overall, this work introduces a promising framework for regression analysis of non-Euclidean variables, effectively addressing the challenges associated with limited and noisy covariate data, with potential applications in diverse fields.", "keywords": "Frechet regression;principal component regression;non-Euclidean;low-rank matrix;errors-in-variables analysis", "primary_area": "", "supplementary_material": "/attachment/0cd2c05fd95bd55208f4157bd6e41ce6268ad9b4.zip", "author": "Dogyoon Song;Kyunghee Han", "authorids": "~Dogyoon_Song1;~Kyunghee_Han1", "gender": ";M", "homepage": "https://dogyoons.github.io/;https://sites.google.com/site/kyungheehanstat/", "dblp": "191/6676;", "google_scholar": "CT84_rEAAAAJ;", "orcid": "0000-0001-5489-8213;", "linkedin": "dogyoons/;", "or_profile": "~Dogyoon_Song1;~Kyunghee_Han1", "aff": "University of Michigan - Ann Arbor;University of Illinois at Chicago", "aff_domain": "umich.edu;uic.edu", "position": "Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nsong2023errorsinvariables,\ntitle={Errors-in-variables Fr{\\textbackslash}'echet Regression with Low-rank Covariate Approximation},\nauthor={Dogyoon Song and Kyunghee Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Sg3aCpWUQP}\n}", "github": "", "project": "", "reviewers": "urUC;BqGr;S37y;ZuE1;Jnac", "pdf_size": 759995, "rating": "5;6;6;6;6", "confidence": "4;2;2;4;3", "soundness": "3;3;3;3;2", "novelty": "3;3;2;3;2", "presentation": "3;2;3;3;3", "wc_summary": "123;62;80;76;94", "wc_strengths": "99;58;46;174;99", "wc_weaknesses": "136;59;26;29;181", "wc_questions": "33;26;1;53;126", "wc_limitations": "4;7;3;1;53", "wc_review": "395;212;156;333;553", "wc_reply_reviewers": "81;0;12;16;24", "wc_reply_authors": "463;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 87.0, 20.688160865577203 ], "wc_strengths_avg": [ 95.2, 44.81696107502159 ], "wc_weaknesses_avg": [ 86.2, 61.81067868904207 ], "wc_questions_avg": [ 47.8, 42.48952812164428 ], "wc_limitations_avg": [ 13.6, 19.794948850653796 ], "wc_review_avg": [ 329.8, 140.11623746018876 ], "wc_reply_reviewers_avg": [ 26.6, 28.28144267890165 ], "wc_reply_authors_avg": [ 92.6, 185.2 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5590169943749476, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11375885666698622217&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "umich.edu;uic.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Michigan;University of Illinois at Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.umich.edu;https://www.uic.edu", "aff_unique_abbr": "UM;UIC", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Ann Arbor;Chicago", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Bayesian Risk-Averse Q-Learning with Streaming Observations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71590", "id": "SjiLtmZETc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/efaf1c9726648c8ba363a5c927440529-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SjiLtmZETc", "openreview": "https://openreview.net/forum?id=SjiLtmZETc", "poster": "/media/PosterPDFs/NeurIPS%202023/71590.png?t=1701443361.2016332", "slides": "https://nips.cc/virtual/2023/poster/71590", "video": "https://nips.cc/virtual/2023/poster/71590", "author_site": "Yuhao Wang, Enlu Zhou", "tldr": "", "abstract": "We consider a robust reinforcement learning problem, where a learning agent learns from a simulated training environment. To account for the model mis-specification between this training environment and the true environment due to lack of data, we adopt a formulation of Bayesian risk MDP (BRMDP) with infinite horizon, which uses Bayesian posterior to estimate the transition model and impose a risk functional to account for the model uncertainty. Observations from the real environment that is out of the agent's control arrive periodically and are utilized by the agent to update the Bayesian posterior to reduce model uncertainty. We theoretically demonstrate that BRMDP balances the trade-off between robustness and conservativeness, and we further develop a multi-stage Bayesian risk-averse Q-learning algorithm to solve BRMDP with streaming observations from real environment. The proposed algorithm learns a risk-averse yet optimal policy that depends on the availability of real-world observations. We provide a theoretical guarantee of strong convergence for the proposed algorithm.", "keywords": "Q-learning;risk-averse reinforcement learning;off-policy learning;Bayesian risk Markov decision process;distributionally robust Markov decision process", "primary_area": "", "supplementary_material": "/attachment/77cedf1141274759db13cf0bfb287bc47c6ac158.zip", "author": "Yuhao Wang;Enlu Zhou", "authorids": "~Yuhao_Wang6;~Enlu_Zhou1", "gender": "M;F", "homepage": "https://sites.gatech.edu/yuhaowang/;https://www.enluzhou.gatech.edu/", "dblp": ";", "google_scholar": "https://scholar.google.com/citations?hl=en;", "orcid": "0000-0002-9863-273X;", "linkedin": ";", "or_profile": "~Yuhao_Wang6;~Enlu_Zhou1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nwang2023bayesian,\ntitle={Bayesian Risk-Averse Q-Learning with Streaming Observations},\nauthor={Yuhao Wang and Enlu Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SjiLtmZETc}\n}", "github": "", "project": "", "reviewers": "vogF;u4kt;kQUg;mBvj", "pdf_size": 717146, "rating": "6;6;7;7", "confidence": "4;3;3;3", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;2;3;4", "wc_summary": "148;97;56;96", "wc_strengths": "59;124;74;77", "wc_weaknesses": "80;148;169;115", "wc_questions": "88;68;247;109", "wc_limitations": "44;7;4;41", "wc_review": "419;444;550;438", "wc_reply_reviewers": "39;56;11;8", "wc_reply_authors": "27;67;20;7", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 99.25, 32.64486942844159 ], "wc_strengths_avg": [ 83.5, 24.3567239176372 ], "wc_weaknesses_avg": [ 128.0, 33.741665637605976 ], "wc_questions_avg": [ 128.0, 70.21751918146923 ], "wc_limitations_avg": [ 24.0, 18.560711193270585 ], "wc_review_avg": [ 462.75, 51.21218116815569 ], "wc_reply_reviewers_avg": [ 28.5, 19.956202043475106 ], "wc_reply_authors_avg": [ 30.25, 22.398381637966615 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17947714175759377268&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "gatech.edu;gatech.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "SlXKgBPMPn", "title": "HiGen: Hierarchical Graph Generative Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Most real-world graphs exhibit a hierarchical structure, which is often overlooked by existing graph generation methods. To address this limitation, we propose a novel graph generative network that captures the hierarchical nature of graphs and successively generates the graph sub-structures in a coarse-to-fine fashion. \nAt each level of hierarchy, this model generates communities in parallel, followed by the prediction of cross-edges between communities using a separate model. This modular approach results in a highly scalable graph generative network. Moreover, we model the output distribution of edges in the hierarchical graph with a multinomial distribution and derive a recursive factorization for this distribution, enabling us to generate sub-graphs with integer-valued edge weights in an autoregressive approach. Empirical studies demonstrate that the proposed generative model can effectively capture both local and global properties of graphs and achieves state-of-the-art performance in terms of graph quality on various benchmarks.", "keywords": "Generative Models;Graph Generative Network;Graph Neural Network", "primary_area": "", "supplementary_material": "/attachment/f49cd1a3a901a7f878514e614d833f4c8a117175.zip", "author": "Mahdi Karami", "authorids": "~Mahdi_Karami2", "gender": "M", "homepage": "https://karami-m.github.io/", "dblp": "90/394.html", "google_scholar": "https://scholar.google.com/citations?hl=en", "orcid": "", "linkedin": "mahdi-karami-2957412a/", "or_profile": "~Mahdi_Karami2", "aff": "Amazon", "aff_domain": "amazon.com", "position": "Researcher", "bibtex": "@misc{\nkarami2023higen,\ntitle={HiGen: Hierarchical Graph Generative Networks},\nauthor={Mahdi Karami},\nyear={2023},\nurl={https://openreview.net/forum?id=SlXKgBPMPn}\n}", "github": "", "project": "", "reviewers": "6GcD;FuoC;1z1k;zioq", "site": "https://openreview.net/forum?id=SlXKgBPMPn", "pdf_size": 3303070, "rating": "4;5;5;6", "confidence": "4;4;3;4", "soundness": "3;2;2;3", "novelty": "3;2;3;2", "presentation": "2;3;1;3", "wc_summary": "36;115;68;118", "wc_strengths": "119;66;38;50", "wc_weaknesses": "509;380;366;3", "wc_questions": "76;276;257;273", "wc_limitations": "17;28;76;41", "wc_review": "757;865;805;485", "wc_reply_reviewers": "33;1204;40;16", "wc_reply_authors": "78;1339;92;0", "reply_reviewers": "1;3;1;1", "reply_authors": "2;4;2;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 84.25, 34.1933838629639 ], "wc_strengths_avg": [ 68.25, 30.93844695520446 ], "wc_weaknesses_avg": [ 314.5, 188.28502330243901 ], "wc_questions_avg": [ 220.5, 83.73917840533187 ], "wc_limitations_avg": [ 40.5, 22.18670773233379 ], "wc_review_avg": [ 728.0, 145.42008114424914 ], "wc_reply_reviewers_avg": [ 323.25, 508.5761373678478 ], "wc_reply_authors_avg": [ 377.25, 556.3718967561176 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4762710871115878450&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "ARTree: A Deep Autoregressive Model for Phylogenetic Inference", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71589", "id": "SoLebIqHgZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2e9e513860b1342f3a12ebecf0528a21-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SoLebIqHgZ", "openreview": "https://openreview.net/forum?id=SoLebIqHgZ", "poster": "/media/PosterPDFs/NeurIPS%202023/71589.png?t=1699440381.0625536", "slides": "https://nips.cc/virtual/2023/poster/71589", "video": "https://nips.cc/virtual/2023/poster/71589", "author_site": "Tianyu Xie, Cheng Zhang", "tldr": "", "abstract": "Designing flexible probabilistic models over tree topologies is important for developing efficient phylogenetic inference methods. To do that, previous works often leverage the similarity of tree topologies via hand-engineered heuristic features which would require domain expertise and may suffer from limited approximation capability. In this paper, we propose a deep autoregressive model for phylogenetic inference based on graph neural networks (GNNs), called ARTree. By decomposing a tree topology into a sequence of leaf node addition operations and modeling the involved conditional distributions based on learnable topological features via GNNs, ARTree can provide a rich family of distributions over tree topologies that have simple sampling algorithms, without using heuristic features. We demonstrate the effectiveness and efficiency of our method on a benchmark of challenging real data tree topology density estimation and variational Bayesian phylogenetic inference problems.", "keywords": "phylogenetic inference;autoregressive model;graph neural network;density estimation;variational inference", "primary_area": "", "supplementary_material": "/attachment/f0327090008b982d83ba3788edd4ca5723b8d047.zip", "author": "Tianyu Xie;Cheng Zhang", "authorids": "~Tianyu_Xie1;~Cheng_Zhang3", "gender": "M;M", "homepage": "https://tyuxie.github.io;https://zcrabbit.github.io", "dblp": "345/3987-1;", "google_scholar": "qbJJQ_AAAAAJ;PddDrLgAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Tianyu_Xie1;~Cheng_Zhang3", "aff": "Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nxie2023artree,\ntitle={{ART}ree: A Deep Autoregressive Model for Phylogenetic Inference},\nauthor={Tianyu Xie and Cheng Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SoLebIqHgZ}\n}", "github": "", "project": "", "reviewers": "tXZx;tL42;aQBU;vFMY", "pdf_size": 1353530, "rating": "6;7;7;8", "confidence": "3;4;4;4", "soundness": "3;4;4;4", "novelty": "2;4;3;4", "presentation": "3;4;2;4", "wc_summary": "63;81;137;165", "wc_strengths": "91;189;72;105", "wc_weaknesses": "126;678;200;169", "wc_questions": "31;73;39;72", "wc_limitations": "1;1;20;11", "wc_review": "312;1022;468;522", "wc_reply_reviewers": "14;857;6;342", "wc_reply_authors": "10;737;9;666", "reply_reviewers": "1;2;1;2", "reply_authors": "2;3;2;3", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 111.5, 41.21589499210226 ], "wc_strengths_avg": [ 114.25, 44.71786555729153 ], "wc_weaknesses_avg": [ 293.25, 223.68434791017452 ], "wc_questions_avg": [ 53.75, 18.965429075030176 ], "wc_limitations_avg": [ 8.25, 7.917543811056558 ], "wc_review_avg": [ 581.0, 266.03195296805984 ], "wc_reply_reviewers_avg": [ 304.75, 346.4659976101551 ], "wc_reply_authors_avg": [ 355.5, 346.90957034939237 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9290057955986437848&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "pku.edu.cn;pku.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Robust and Actively Secure Serverless Collaborative Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71588", "id": "SouroWC5Un", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7c5a4b7a31dffef8ce296deedb6214a9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SouroWC5Un", "openreview": "https://openreview.net/forum?id=SouroWC5Un", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71588", "video": "https://nips.cc/virtual/2023/poster/71588", "author_site": "Nicholas Franzese, Adam Dziedzic, Christopher A. Choquette-Choo, Mark R Thomas, Muhammad Ahmad Kaleem, Stephan Rabanser, Congyu Fang, Somesh Jha, Nicolas Papernot, Xiao Wang", "tldr": "", "abstract": "Collaborative machine learning (ML) is widely used to enable institutions to learn better models from distributed data. While collaborative approaches to learning intuitively protect user data, they remain vulnerable to either the server, the clients, or both, deviating from the protocol. Indeed, because the protocol is asymmetric, a malicious server can abuse its power to reconstruct client data points. Conversely, malicious clients can corrupt learning with malicious updates. Thus, both clients and servers require a guarantee when the other cannot be trusted to fully cooperate. In this work, we propose a peer-to-peer (P2P) learning scheme that is secure against malicious servers and robust to malicious clients. Our core contribution is a generic framework that transforms any (compatible) algorithm for robust aggregation of model updates to the setting where servers and clients can act maliciously. Finally, we demonstrate the computational efficiency of our approach even with 1-million parameter models trained by 100s of peers on standard datasets.", "keywords": "collaborative learning;robust aggregation;secure machine learning", "primary_area": "", "supplementary_material": "/attachment/dc2c66f2261dd43d35e6b0902df9ad1de0e97649.zip", "author": "Nicholas Franzese;Adam Dziedzic;Christopher A. Choquette-Choo;Mark R. Thomas;Muhammad Ahmad Kaleem;Stephan Rabanser;Congyu Fang;Somesh Jha;Nicolas Papernot;Xiao Wang", "authorids": "~Nicholas_Franzese1;~Adam_Dziedzic1;~Christopher_A._Choquette-Choo1;~Mark_R._Thomas1;~Muhammad_Ahmad_Kaleem1;~Stephan_Rabanser1;~Congyu_Fang1;~Somesh_Jha1;~Nicolas_Papernot1;~Xiao_Wang11", "gender": "F;;M;M;;M;F;M;M;M", "homepage": "https://olive-franzese.github.io/;;https://www.christopherchoquette.com;https://www.markrthomas.ca/;;https://rabanser.dev;;;https://www.papernot.fr;https://wangxiao1254.github.io/", "dblp": "222/4653;;250/9674;;;210/2399;326/1546;j/SomeshJha;162/1405;150/9413", "google_scholar": "V0918CIAAAAJ;;oDE4I64AAAAJ;;;https://scholar.google.com/citations?hl=en;Dw700roAAAAJ;BaI7l8QAAAAJ;cGxq0cMAAAAJ;QbWLR8QAAAAJ", "orcid": ";;;0000-0003-1010-7437;;;;;;", "linkedin": ";;christopher-choquette-choo/;mrmarkthomas/;;;;;nicolaspapernot;", "or_profile": "~Nicholas_Franzese1;~Adam_Dziedzic1;~Christopher_A._Choquette-Choo1;~Mark_R._Thomas1;~Muhammad_Ahmad_Kaleem1;~Stephan_Rabanser1;~Congyu_Fang1;~Somesh_Jha1;~Nicolas_Papernot1;~Xiao_Wang11", "aff": "Northwestern University, Northwestern University;;Google Research, Brain Team;University of Alberta;;University of Cambridge;University of Toronto;Department of Computer Science, University of Wisconsin, Madison;Google;Northwestern University", "aff_domain": "u.northwestern.edu;;google.com;ualberta.ca;;cam.ac.uk;utoronto.ca;cs.wisc.edu;google.com;northwestern.edu", "position": "PhD student;;Researcher;Undergrad student;;Visiting Graduate Student;MS student;Full Professor;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nfranzese2023robust,\ntitle={Robust and Actively Secure Serverless Collaborative Learning},\nauthor={Nicholas Franzese and Adam Dziedzic and Christopher A. Choquette-Choo and Mark R. Thomas and Muhammad Ahmad Kaleem and Stephan Rabanser and Congyu Fang and Somesh Jha and Nicolas Papernot and Xiao Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SouroWC5Un}\n}", "github": "", "project": "", "reviewers": "5nT4;taqA;7EiB;HYce;8qrt", "pdf_size": 1208233, "rating": "5;6;7;7;9", "confidence": "4;2;2;4;4", "soundness": "2;3;3;4;4", "novelty": "2;3;3;3;4", "presentation": "2;3;2;4;4", "wc_summary": "135;65;67;47;131", "wc_strengths": "57;30;55;124;65", "wc_weaknesses": "400;17;99;71;5", "wc_questions": "70;1;32;3;58", "wc_limitations": "18;1;30;4;80", "wc_review": "680;114;283;249;339", "wc_reply_reviewers": "48;0;88;76;147", "wc_reply_authors": "1374;0;29;107;68", "reply_reviewers": "1;0;1;1;1", "reply_authors": "3;1;2;2;2", "rating_avg": [ 6.8, 1.32664991614216 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 89.0, 36.616935972306585 ], "wc_strengths_avg": [ 66.2, 31.18589424723941 ], "wc_weaknesses_avg": [ 118.4, 144.95737304462992 ], "wc_questions_avg": [ 32.8, 27.995713957675736 ], "wc_limitations_avg": [ 26.6, 28.646814831670206 ], "wc_review_avg": [ 333.0, 188.65948160641173 ], "wc_reply_reviewers_avg": [ 71.8, 48.284158892953705 ], "wc_reply_authors_avg": [ 315.6, 530.4287322534481 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.18463723646899913, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15612748348381815971&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "email": "u.northwestern.edu;;google.com;ualberta.ca;;cam.ac.uk;utoronto.ca;cs.wisc.edu;google.com;northwestern.edu", "author_num": 10, "aff_unique_index": "0;1;2;3;4;5;1;0", "aff_unique_norm": "Northwestern University;Google;University of Alberta;University of Cambridge;University of Toronto;University of Wisconsin-Madison", "aff_unique_dep": ";Google Research;;;;Department of Computer Science", "aff_unique_url": "https://www.northwestern.edu;https://research.google;https://www.ualberta.ca;https://www.cam.ac.uk;https://www.utoronto.ca;https://www.wisc.edu", "aff_unique_abbr": "NU;Google;UAlberta;Cambridge;U of T;UW-Madison", "aff_campus_unique_index": "1;2;3;1", "aff_campus_unique": ";Mountain View;Cambridge;Madison", "aff_country_unique_index": "0;0;1;2;1;0;0;0", "aff_country_unique": "United States;Canada;United Kingdom" }, { "title": "On Generalization Bounds for Projective Clustering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71587", "id": "Sp0yOBfelp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e30bf4765ae6b16a87fb4d7b0b3b3dec-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Sp0yOBfelp", "openreview": "https://openreview.net/forum?id=Sp0yOBfelp", "poster": "/media/PosterPDFs/NeurIPS%202023/71587.png?t=1699901080.1383264", "slides": "https://nips.cc/virtual/2023/poster/71587", "video": "https://nips.cc/virtual/2023/poster/71587", "author_site": "Maria Sofia Bucarelli, Matilde Larsen, Chris Schwiegelshohn, Mads Toftrup", "tldr": "", "abstract": "Given a set of points, clustering consists of finding a partition of a point set into $k$ clusters such that the center to which a point is assigned is as close as possible. Most commonly, centers are points themselves, which leads to the famous $k$-median and $k$-means objectives. One may also choose centers to be $j$ dimensional subspaces, which gives rise to subspace clustering. In this paper, we consider learning bounds for these problems. That is, given a set of $n$ samples $P$ drawn independently from some unknown, but fixed distribution $\\mathcal{D}$, how quickly does a solution computed on $P$ converge to the optimal clustering of $\\mathcal{D}$?\nWe give several near optimal results. In particular,\n 1. For center-based objectives, we show a convergence rate of $\\tilde{O}\\left(\\sqrt{{k}/{n}}\\right)$. This matches the known optimal bounds of [Fefferman, Mitter, and Narayanan, Journal of the Mathematical Society 2016] and [Bartlett, Linder, and Lugosi, IEEE Trans. Inf. Theory 1998] for $k$-means and extends it to other important objectives such as $k$-median.\n 2. For subspace clustering with $j$-dimensional subspaces, we show a convergence rate of $\\tilde{O}\\left(\\sqrt{{(kj^2)}/{n}}\\right)$. These are the first provable bounds for most of these problems. For the specific case of projective clustering, which generalizes $k$-means, we show a converge rate of $\\Omega\\left(\\sqrt{{(kj)}/{n}}\\right)$ is necessary, thereby proving that the bounds from [Fefferman, Mitter, and Narayanan, Journal of the Mathematical Society 2016] are essentially optimal.", "keywords": "Subspace Clustering;Learning Theory;Clustering;Error bounds", "primary_area": "", "supplementary_material": "/attachment/e197a8a73fbc4d9e3826215868399e3eb76ee118.zip", "author": "Maria Sofia Bucarelli;Matilde Fjelds\u00f8 Larsen;Chris Schwiegelshohn;Mads Toftrup", "authorids": "~Maria_Sofia_Bucarelli1;~Matilde_Fjelds\u00f8_Larsen1;~Chris_Schwiegelshohn1;~Mads_Toftrup1", "gender": "F;F;;", "homepage": "https://phd.uniroma1.it/web/MARIA-SOFIA-BUCARELLI_nP1617005_EN.aspx;;https://cs.au.dk/~schwiegelshohn/;", "dblp": "304/2548;;https://dblp.uni-trier.de/pers/hd/s/Schwiegelshohn:Chris;", "google_scholar": "https://scholar.google.com/citations?hl=it;;X9Hl0LcAAAAJ;SzTMW48AAAAJ", "orcid": "0009-0007-5101-8242;;;", "linkedin": ";https://linkedin.com/in/matilde-larsen-5963771ab?;;", "or_profile": "~Maria_Sofia_Bucarelli1;~Matilde_Fjelds\u00f8_Larsen1;~Chris_Schwiegelshohn1;~Mads_Toftrup1", "aff": "University of Roma \"La Sapienza\";Aarhus University;Aarhus University;Aarhus University", "aff_domain": "uniroma1.it;au.dk;cs.au.dk;cs.au.dk", "position": "PhD student;MS student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nbucarelli2023on,\ntitle={On Generalization Bounds for Projective Clustering},\nauthor={Maria Sofia Bucarelli and Matilde Fjelds{\\o} Larsen and Chris Schwiegelshohn and Mads Toftrup},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Sp0yOBfelp}\n}", "github": "", "project": "", "reviewers": "ZgJL;26Fm;iH2U;zjTi;hJ3D", "pdf_size": 2011106, "rating": "5;7;7;7;7", "confidence": "4;3;3;3;2", "soundness": "3;4;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;4;4;3", "wc_summary": "65;96;50;109;92", "wc_strengths": "61;80;91;23;159", "wc_weaknesses": "254;88;26;40;71", "wc_questions": "2;38;33;1;1", "wc_limitations": "1;31;16;1;12", "wc_review": "383;333;216;174;335", "wc_reply_reviewers": "0;29;20;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;0;0", "reply_authors": "1;1;1;0;1", "rating_avg": [ 6.6, 0.7999999999999999 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 82.4, 21.62036077404815 ], "wc_strengths_avg": [ 82.8, 44.57084248698918 ], "wc_weaknesses_avg": [ 95.8, 82.08385955838091 ], "wc_questions_avg": [ 15.0, 16.816658407662327 ], "wc_limitations_avg": [ 12.2, 11.124747188138704 ], "wc_review_avg": [ 288.2, 79.29539709213896 ], "wc_reply_reviewers_avg": [ 9.8, 12.33531515608742 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 0.8, 0.4 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.790569415042095, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1828144093897556988&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "uniroma1.it;au.dk;cs.au.dk;cs.au.dk", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Rome La Sapienza;Aarhus University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uniroma1.it;https://au.dk", "aff_unique_abbr": "La Sapienza;AU", "aff_campus_unique_index": "0", "aff_campus_unique": "Rome;", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Italy;Denmark" }, { "title": "Solving a Class of Non-Convex Minimax Optimization in Federated Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71586", "id": "SpStmVboGy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/254009e8d528f98764a060e877a1b01c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SpStmVboGy", "openreview": "https://openreview.net/forum?id=SpStmVboGy", "poster": "/media/PosterPDFs/NeurIPS%202023/71586.png?t=1701374964.837379", "slides": "https://nips.cc/virtual/2023/poster/71586", "video": "https://nips.cc/virtual/2023/poster/71586", "author_site": "Xidong Wu, Jianhui Sun, Zhengmian Hu, Aidong Zhang, Heng Huang", "tldr": "", "abstract": "The minimax problems arise throughout machine learning applications, ranging from adversarial training and policy evaluation in reinforcement learning to AUROC maximization. To address the large-scale distributed data challenges across multiple clients with communication-efficient distributed training, federated learning (FL) is gaining popularity. Many optimization algorithms for minimax problems have been developed in the centralized setting (\\emph{i.e.}, single-machine). Nonetheless, the algorithm for minimax problems under FL is still underexplored. In this paper, we study a class of federated nonconvex minimax optimization problems. We propose FL algorithms (FedSGDA+ and FedSGDA-M) and reduce existing complexity results for the most common minimax problems. For nonconvex-concave problems, we propose FedSGDA+ and reduce the communication complexity to $O(\\varepsilon^{-6})$. Under nonconvex-strongly-concave and nonconvex-PL minimax settings, we prove that FedSGDA-M has the best-known sample complexity of $O(\\kappa^{3} N^{-1}\\varepsilon^{-3})$ and the best-known communication complexity of $O(\\kappa^{2}\\varepsilon^{-2})$. FedSGDA-M is the first algorithm to match the best sample complexity $O(\\varepsilon^{-3})$ achieved by the single-machine method under the nonconvex-strongly-concave setting. Extensive experimental results on fair classification and AUROC maximization show the efficiency of our algorithms.", "keywords": "Federated Learning;Non-Convex Optimization;Minimax Optimization", "primary_area": "", "supplementary_material": "/attachment/8112c1d0462a7fc800cf87fef676022ad6b541b1.pdf", "author": "Xidong Wu;Jianhui Sun;Zhengmian Hu;Aidong Zhang;Heng Huang", "authorids": "~Xidong_Wu1;~Jianhui_Sun1;~Zhengmian_Hu1;~Aidong_Zhang2;~Heng_Huang1", "gender": "Non-Binary;;M;F;M", "homepage": "https://www.linkedin.com/in/xidong-wu-22924112b/;https://jsycsjh.github.io/;https://www.umd.edu/;https://engineering.virginia.edu/faculty/aidong-zhang;https://www.cs.umd.edu/~heng/", "dblp": "37/10581;207/9364;285/4945;z/AidongZhang.html;03/281", "google_scholar": "rj21L7sAAAAJ;https://scholar.google.com/citations?hl=en;4eXiWWgAAAAJ;O8XxkE4AAAAJ;4OqLaDwAAAAJ", "orcid": ";;0000-0003-0316-146X;0000-0001-9723-3246;", "linkedin": ";jianhui-sun-76a722a6/;;;", "or_profile": "~Xidong_Wu1;~Jianhui_Sun1;~Zhengmian_Hu1;~Aidong_Zhang2;~Heng_Huang1", "aff": "University of Pittsburgh;University of Virginia;University of Pittsburgh;University of Virginia;University of Pittsburgh", "aff_domain": "pitt.edu;virginia.edu;pitt.edu;virginia.edu;pitt.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwu2023solving,\ntitle={Solving a Class of Non-Convex Minimax Optimization in Federated Learning},\nauthor={Xidong Wu and Jianhui Sun and Zhengmian Hu and Aidong Zhang and Heng Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SpStmVboGy}\n}", "github": "", "project": "", "reviewers": "sra2;dmWe;Nj11;7re5;uFXg", "pdf_size": 1051468, "rating": "3;6;6;7;7", "confidence": "4;3;2;4;4", "soundness": "2;3;3;3;4", "novelty": "2;3;2;3;4", "presentation": "3;2;2;3;3", "wc_summary": "32;59;69;45;42", "wc_strengths": "84;76;69;36;32", "wc_weaknesses": "214;311;29;172;28", "wc_questions": "21;2;46;2;66", "wc_limitations": "3;38;15;5;3", "wc_review": "354;486;228;260;171", "wc_reply_reviewers": "10;19;0;136;29", "wc_reply_authors": "108;315;0;231;0", "reply_reviewers": "1;1;0;2;1", "reply_authors": "4;3;1;3;1", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 49.4, 13.062924634246345 ], "wc_strengths_avg": [ 59.4, 21.312906887611554 ], "wc_weaknesses_avg": [ 150.8, 109.56532298131559 ], "wc_questions_avg": [ 27.4, 25.168233946782994 ], "wc_limitations_avg": [ 12.8, 13.362634470792052 ], "wc_review_avg": [ 299.8, 110.39637675213801 ], "wc_reply_reviewers_avg": [ 38.8, 49.53947920598278 ], "wc_reply_authors_avg": [ 130.8, 125.46457667405569 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.4, 1.2000000000000002 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.10206207261596577, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15865030396896134837&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 9, "email": "pitt.edu;virginia.edu;pitt.edu;virginia.edu;pitt.edu", "author_num": 5, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "University of Pittsburgh;University of Virginia", "aff_unique_dep": ";", "aff_unique_url": "https://www.pitt.edu;https://www.virginia.edu", "aff_unique_abbr": "Pitt;UVA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Objaverse-XL: A Universe of 10M+ 3D Objects", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73586", "id": "Sq3CLKJeiz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/70364304877b5e767de4e9a2a511be0c-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Sq3CLKJeiz", "openreview": "https://openreview.net/forum?id=Sq3CLKJeiz", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73586", "video": "https://nips.cc/virtual/2023/poster/73586", "author_site": "Matt Deitke, Ruoshi Liu, Matthew Wallingford, Huong Ngo, Oscar Michel, Aditya Kusupati, Alan Fan, Christian Laforte, Vikram Voleti, Samir Yitzhak Gadre, Eli VanderBilt, Aniruddha Kembhavi, Carl Vondrick, Georgia Gkioxari, Kiana Ehsani, Ludwig Schmidt, Ali Farhadi", "tldr": "", "abstract": "Natural language processing and 2D vision models have attained remarkable proficiency on many tasks primarily by escalating the scale of training data. However, 3D vision tasks have not seen the same progress, in part due to the challenges of acquiring high-quality 3D data. In this work, we present Objaverse-XL, a dataset of over 10 million 3D objects. Our compilation comprises deduplicated 3D objects from a diverse set of sources, including manually designed objects, photogrammetry scans of landmarks and everyday items, and professional scans of historic and antique artifacts. Representing the largest scale and diversity in the realm of 3D datasets, Objaverse-XL enables significant new possibilities for 3D vision. Our experiments demonstrate the vast improvements enabled with the scale provided by Objaverse-XL. We show that by training Zero123 on novel view synthesis, utilizing over 100 million multi-view rendered images, we achieve strong zero-shot generalization abilities. We hope that releasing Objaverse-XL will enable further innovations in the field of 3D vision at scale.", "keywords": "3d;large-scale training;objaverse;nerf;generative modeling;zero123", "primary_area": "", "supplementary_material": "/attachment/810c7c91279340f1853fa0a0c69af43170e02da8.pdf", "author": "Matt Deitke;Ruoshi Liu;Matthew Wallingford;Huong Ngo;Oscar Michel;Aditya Kusupati;Alan Fan;Christian Laforte;Vikram Voleti;Samir Yitzhak Gadre;Eli VanderBilt;Aniruddha Kembhavi;Carl Vondrick;Georgia Gkioxari;Kiana Ehsani;Ludwig Schmidt;Ali Farhadi", "authorids": "~Matt_Deitke1;~Ruoshi_Liu2;~Matthew_Wallingford1;~Huong_Ngo1;~Oscar_Michel1;~Aditya_Kusupati1;~Alan_Fan1;~Christian_Laforte1;~Vikram_Voleti1;~Samir_Yitzhak_Gadre1;~Eli_VanderBilt1;~Aniruddha_Kembhavi1;~Carl_Vondrick2;~Georgia_Gkioxari1;~Kiana_Ehsani1;~Ludwig_Schmidt1;~Ali_Farhadi3", "gender": "M;M;F;M;M;;M;M;M;M;M;M;F;F;M;M;M", "homepage": "https://mattdeitke.com;https://ruoshiliu.github.io/;;;http://www.adityakusupati.com/;;;https://voletiv.github.io;https://sagadre.github.io/;https://www.elivanderbilt.com/;https://anikem.github.io/;http://www.cs.columbia.edu/~vondrick/;https://gkioxari.github.io/;https://ehsanik.github.io/;http://people.csail.mit.edu/ludwigs/;https://homes.cs.washington.edu/~ali/;https://mattwallingford.github.io/", "dblp": ";283/4797;352/3554;308/2324;231/7662;;;243/6609;246/7901;263/1958;81/7583;26/8610;135/4895.html;198/0910;141/2720;37/5826;263/1795", "google_scholar": "k4VxCcYAAAAJ;suAawHYAAAAJ;;D0WvX4YAAAAJ;https://scholar.google.co.in/citations?user=qULx8g8AAAAJ;;;PPCRqZUAAAAJ;oAhlg9gAAAAJ;;JnUevM0AAAAJ;3MzhkFIAAAAJ;kQisE-gAAAAJ;RScZCLEAAAAJ;SWMKy70AAAAJ;jeOFRDsAAAAJ;", "orcid": ";;;;0000-0001-8455-1851;;;;;;;;;;;;", "linkedin": ";ruoshi-liu-a5046aa0/;huong-ngo-016837210/;;adityakusupati/;https://linkedin.com/in/alan-fan;claforte;vikram-voleti-45372222;;eli-vanderbilt-a9710716;;;;kiana-ehsani-1b81b0162/;ludwig-schmidt-87ba3612/;;", "or_profile": "~Matt_Deitke1;~Ruoshi_Liu2;~Huong_Ngo1;~Oscar_Michel1;~Aditya_Kusupati1;~Alan_Fan1;~Christian_Laforte1;~Vikram_Voleti1;~Samir_Yitzhak_Gadre1;~Eli_VanderBilt1;~Aniruddha_Kembhavi1;~Carl_Vondrick2;~Georgia_Gkioxari1;~Kiana_Ehsani1;~Ludwig_Schmidt1;~Ali_Farhadi3;~Matthew_C_Wallingford2", "aff": "Department of Computer Science, University of Washington;Columbia University;University of Washington;Allen Institute for Artificial Intelligence;Department of Computer Science, University of Washington;Department of Computer Science;Stability AI;Meta;Columbia University;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Columbia University;California Institute of Technology;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;University of Washington;University of Washington", "aff_domain": "cs.washington.edu;columbia.edu;uw.edu;allenai.org;cs.washington.edu;cs.washington.edu;stability.ai;meta.com;columbia.edu;allenai.org;allenai.org;columbia.edu;caltech.edu;allenai.org;allenai.org;cs.uw.edu;washington.edu", "position": "Undergrad student;PhD student;Undergrad student;Researcher;PhD student;Undergrad student;Principal Researcher;Intern;PhD student;Researcher;Research Manager;Assistant Professor;Assistant Professor;Researcher;Researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\ndeitke2023objaversexl,\ntitle={Objaverse-{XL}: A Universe of 10M+ 3D Objects},\nauthor={Matt Deitke and Ruoshi Liu and Matthew Wallingford and Huong Ngo and Oscar Michel and Aditya Kusupati and Alan Fan and Christian Laforte and Vikram Voleti and Samir Yitzhak Gadre and Eli VanderBilt and Aniruddha Kembhavi and Carl Vondrick and Georgia Gkioxari and Kiana Ehsani and Ludwig Schmidt and Ali Farhadi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Sq3CLKJeiz}\n}", "github": "", "project": "", "reviewers": "QWh5;cssb;toL1;fkNJ;7tXF", "pdf_size": 8382555, "rating": "6;6;7;7;7", "confidence": "4;5;4;4;4", "wc_summary_and_contributions": "54;59;142;47;58", "wc_strengths": "49;64;171;40;108", "wc_improvement": "31;249;29;160;84", "wc_limitations": "50;182;1;61;12", "wc_correctness": "105;358;45;24;9", "wc_clarity": "102;175;23;10;1", "wc_relation_to_prior_work": "74;44;17;34;1", "wc_documentation": "26;128;52;26;10", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "492;1260;481;403;284", "wc_reply_reviewers": "250;145;0;259;0", "wc_reply_authors": "2089;1864;310;2248;807", "reply_reviewers": "2;1;0;1;0", "reply_authors": "4;5;2;5;3", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 72.0, 35.25336863336609 ], "wc_strengths_avg": [ 86.4, 48.32639030591877 ], "wc_improvement_avg": [ 110.6, 84.02047369540355 ], "wc_limitations_avg": [ 61.2, 64.44036002382357 ], "wc_correctness_avg": [ 108.2, 129.10058094369677 ], "wc_clarity_avg": [ 62.2, 66.8053890041814 ], "wc_relation_to_prior_work_avg": [ 34.0, 24.8112877537624 ], "wc_documentation_avg": [ 48.4, 42.01713936002783 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 584.0, 346.06646760412946 ], "wc_reply_reviewers_avg": [ 130.8, 114.07260845619338 ], "wc_reply_authors_avg": [ 1463.6, 765.3293670048209 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 3.8, 1.16619037896906 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 17, 0 ], "corr_rating_confidence": -0.6123724356957946, "gs_citation": 393, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17988312032075199010&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.washington.edu;columbia.edu;uw.edu;allenai.org;cs.washington.edu;cs.washington.edu;stability.ai;meta.com;columbia.edu;allenai.org;allenai.org;columbia.edu;caltech.edu;allenai.org;allenai.org;cs.uw.edu;washington.edu", "author_num": 17, "aff_unique_index": "0;1;0;2;0;3;4;5;1;2;2;1;6;2;2;0;0", "aff_unique_norm": "University of Washington;Columbia University;Allen Institute for Artificial Intelligence;Unknown Institution;Stability AI;Meta;California Institute of Technology", "aff_unique_dep": "Department of Computer Science;;;Department of Computer Science;;Meta Platforms, Inc.;", "aff_unique_url": "https://www.washington.edu;https://www.columbia.edu;https://allenai.org;;https://stability.ai;https://meta.com;https://www.caltech.edu", "aff_unique_abbr": "UW;Columbia;AI2;;Stability AI;Meta;Caltech", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Seattle;;Pasadena", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Persuading Farsighted Receivers in MDPs: the Power of Honesty", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73883", "id": "SqTUGq0R7j", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/30b28eb87fe7a6c4af8520293317d4c6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SqTUGq0R7j", "openreview": "https://openreview.net/forum?id=SqTUGq0R7j", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73883", "video": "https://nips.cc/virtual/2023/poster/73883", "author_site": "Martino Bernasconi, Matteo Castiglioni, Alberto Marchesi, Mirco Mutti", "tldr": "", "abstract": "Bayesian persuasion studies the problem faced by an informed sender who strategically discloses information to influence the behavior of an uninformed receiver. Recently, a growing attention has been devoted to settings where the sender and the receiver interact sequentially, in which the receiver's decision-making problem is usually modeled as a Markov decision process (MDP). However, the literature focuses on computing optimal information-revelation policies (a.k.a. signaling schemes) under the restrictive assumption that the receiver acts myopically, selecting actions to maximize the one-step utility and disregarding future rewards. This is justified by the fact that, when the receiver is farsighted and thus considers future rewards, finding an optimal Markovian signaling scheme is NP-hard. In this paper, we show that Markovian signaling schemes do not constitute the \"right\" class of policies. Indeed, differently from most of the MDPs settings, we show that Markovian signaling schemes are not optimal, and general history-dependent signaling schemes should be considered. Moreover, we also show that history-dependent signaling schemes circumvent the negative complexity results affecting Markovian signaling schemes. Formally, we design an algorithm that computes an optimal and $\\epsilon$-persuasive history-dependent signaling scheme in time polynomial in ${1}/{\\epsilon}$ and in the instance size. The crucial challenge is that general history-dependent signaling schemes cannot be represented in polynomial space. Nevertheless, we introduce a convenient subclass of history-dependent signaling schemes, called promise-form, which are as powerful as general history-dependent ones and efficiently representable. Intuitively, promise-form signaling schemes compactly encode histories in the form of honest promises on future receiver's rewards.", "keywords": "Bayesian Persuasion;MPD;information design;signaling", "primary_area": "", "supplementary_material": "", "author": "Martino Bernasconi;Matteo Castiglioni;Alberto Marchesi;Mirco Mutti", "authorids": "~Martino_Bernasconi1;~Matteo_Castiglioni1;~Alberto_Marchesi1;~Mirco_Mutti1", "gender": ";;M;", "homepage": "https://sites.google.com/view/martinobernasconi/home;https://castiglionimatteo.github.io;https://albymarke.github.io;", "dblp": "301/6372.html;225/7720;204/1718;222/2815", "google_scholar": "dtmoCekAAAAJ;https://scholar.google.it/citations?user=NPE3HAYAAAAJ;vXDtCzoAAAAJ;GlLkJ9UAAAAJ", "orcid": ";0000-0002-1070-6766;;", "linkedin": ";;;", "or_profile": "~Martino_Bernasconi1;~Matteo_Castiglioni1;~Alberto_Marchesi1;~Mirco_Mutti1", "aff": "Politecnico di Milano;Politecnico di Milano;Politecnico di Milano;Universit\u00e0 di Bologna", "aff_domain": "polimi.it;polimi.it;polimi.it;unibo.it", "position": "PhD student;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nbernasconi2023persuading,\ntitle={Persuading Farsighted Receivers in {MDP}s: the Power of Honesty},\nauthor={Martino Bernasconi and Matteo Castiglioni and Alberto Marchesi and Mirco Mutti},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SqTUGq0R7j}\n}", "github": "", "project": "", "reviewers": "XkbC;bcaS;q53d;Rxqa", "pdf_size": 462948, "rating": "5;6;7;8", "confidence": "2;5;4;5", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "2;3;3;4", "wc_summary": "133;314;178;118", "wc_strengths": "38;112;35;92", "wc_weaknesses": "153;525;29;46", "wc_questions": "78;55;321;33", "wc_limitations": "13;19;2;1", "wc_review": "415;1025;565;290", "wc_reply_reviewers": "0;365;32;20", "wc_reply_authors": "0;322;0;0", "reply_reviewers": "0;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 185.75, 77.26698842325874 ], "wc_strengths_avg": [ 69.25, 33.521448357730605 ], "wc_weaknesses_avg": [ 188.25, 200.14916312590466 ], "wc_questions_avg": [ 121.75, 116.13219837753869 ], "wc_limitations_avg": [ 8.75, 7.562241731127087 ], "wc_review_avg": [ 573.75, 278.127106733594 ], "wc_reply_reviewers_avg": [ 104.25, 150.97744036775825 ], "wc_reply_authors_avg": [ 80.5, 139.43009000929462 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7302967433402213, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=486466049382328590&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "polimi.it;polimi.it;polimi.it;unibo.it", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Politecnico di Milano;University of Bologna", "aff_unique_dep": ";", "aff_unique_url": "https://www.polimi.it;https://www.unibo.it", "aff_unique_abbr": "Polimi;Unibo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Italy" }, { "title": "Winner-Take-All Column Row Sampling for Memory Efficient Adaptation of Language Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71585", "id": "SquMNyrk1O", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0a6059857ae5c82ea9726ee9282a7145-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SquMNyrk1O", "openreview": "https://openreview.net/forum?id=SquMNyrk1O", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71585", "video": "https://nips.cc/virtual/2023/poster/71585", "author_site": "Zirui Liu, Guanchu Wang, Shaochen (Henry) Zhong, Zhaozhuo Xu, Daochen Zha, Ruixiang (Ryan) Tang, Zhimeng (Stephen) Jiang, Kaixiong Zhou, Vipin Chaudhary, Shuai Xu, Xia Hu", "tldr": "", "abstract": "As the model size grows rapidly, fine-tuning the large pre-trained language model has become increasingly difficult due to its extensive memory usage. \nPrevious works usually focus on reducing the number of trainable parameters in the network. \nWhile the model parameters do contribute to memory usage, the primary memory bottleneck during training arises from storing feature maps, also known as activations, as they are crucial for gradient calculation. \nNotably, machine learning models are typically trained using stochastic gradient descent.\nWe argue that in stochastic optimization, models can handle noisy gradients as long as the gradient estimator is unbiased with reasonable variance.\nFollowing this motivation, we propose a new family of unbiased estimators called \\sas, for matrix production with reduced variance, which only requires storing the sub-sampled activations for calculating the gradient.\nOur work provides both theoretical and experimental evidence that, in the context of tuning transformers, our proposed estimators exhibit lower variance compared to existing ones.\nBy replacing the linear operation with our approximated one in transformers, we can achieve up to 2.7X peak memory reduction with almost no accuracy drop and enables up to $6.4\\times$ larger batch size.\nUnder the same hardware, \\sas enables better down-streaming task performance by applying larger models and/or faster training speed with larger batch sizes.\nThe code is available at https://anonymous.4open.science/r/WTACRS-A5C5/.", "keywords": "memory-efficient tuning;language model;transformers", "primary_area": "", "supplementary_material": "/attachment/1ae43768bc01455b66e23463e788fbe9b288915d.pdf", "author": "Zirui Liu;Guanchu Wang;Shaochen Zhong;Zhaozhuo Xu;Daochen Zha;Ruixiang Tang;Zhimeng Jiang;Kaixiong Zhou;Vipin Chaudhary;Shuai Xu;Xia Hu", "authorids": "~Zirui_Liu1;~Guanchu_Wang1;~Shaochen_Zhong1;~Zhaozhuo_Xu2;~Daochen_Zha1;~Ruixiang_Tang1;~Zhimeng_Jiang1;~Kaixiong_Zhou1;~Vipin_Chaudhary2;~Shuai_Xu2;~Xia_Hu4", "gender": "M;M;M;;;M;M;M;M;M;M", "homepage": "https://zirui-ray-liu.github.io/;https://guanchuwang.github.io/home;https://openreview.net/profile?id=~Shaochen_Zhong1;https://ottovonxu.github.io/;http://dczha.com/;https://www.ruixiangtang.net/;http://www.zhimengjiang.com/;https://kaixiong-zhou.github.io/;https://engineering.case.edu/profiles/vxc204;https://engineering.case.edu/profiles/sxx214;https://cs.rice.edu/~xh37/index.html", "dblp": "196/8629-1.html;213/0985;326/7286.html;195/4352;167/0903;239/1928;217/3235;178/7315;c/VipinChaudhary.html;;256/9406.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;_QL5218AAAAJ;https://scholar.google.com/citations?hl=en;7tDlVAsAAAAJ;jK0NgMcAAAAJ;T575jsoAAAAJ;5Es3Yk4AAAAJ;zMspIjIAAAAJ;vJbjqpIAAAAJ;wu-vtI4AAAAJ;https://scholar.google.com.tw/citations?user=pcCS60IAAAAJ", "orcid": ";;;;0000-0002-6677-7504;;0000-0001-6933-3952;0000-0001-5226-8736;0000-0001-9672-6225;;", "linkedin": ";;shaochen-henry-zhong-96a941249/;;daochen-zha;ruixiang-tang-91660717b/;;;vipin-chaudhary-379529/;;", "or_profile": "~Zirui_Liu1;~Guanchu_Wang1;~Shaochen_Zhong1;~Zhaozhuo_Xu2;~Daochen_Zha1;~Ruixiang_Tang1;~Zhimeng_Jiang1;~Kaixiong_Zhou1;~Vipin_Chaudhary2;~Shuai_Xu2;~Xia_Hu2", "aff": "Rice University;Rice University;Rice University;Rice University;Rice University;Rice University;Texas A&M University;Rice University;Case Western Reserve University;Case Western Reserve University;Rice University", "aff_domain": "rice.edu;rice.edu;rice.edu;rice.edu;rice.edu;rice.edu;tamu.edu;rice.edu;case.edu;case.edu;rice.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;Full Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2023winnertakeall,\ntitle={Winner-Take-All Column Row Sampling for Memory Efficient Adaptation of Language Model},\nauthor={Zirui Liu and Guanchu Wang and Shaochen Zhong and Zhaozhuo Xu and Daochen Zha and Ruixiang Tang and Zhimeng Jiang and Kaixiong Zhou and Vipin Chaudhary and Shuai Xu and Xia Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SquMNyrk1O}\n}", "github": "", "project": "", "reviewers": "j1mL;cMiu;GDNX;ky3t", "pdf_size": 1296228, "rating": "5;6;7;7", "confidence": "4;4;4;3", "soundness": "3;2;4;4", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "79;111;236;76", "wc_strengths": "55;109;61;55", "wc_weaknesses": "137;121;211;16", "wc_questions": "121;16;154;7", "wc_limitations": "114;1;7;1", "wc_review": "506;358;669;155", "wc_reply_reviewers": "101;18;22;0", "wc_reply_authors": "863;0;0;136", "reply_reviewers": "1;1;1;0", "reply_authors": "3;1;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 125.5, 65.25526798657714 ], "wc_strengths_avg": [ 70.0, 22.64950330581225 ], "wc_weaknesses_avg": [ 121.25, 69.6073810741361 ], "wc_questions_avg": [ 74.5, 64.15021434102928 ], "wc_limitations_avg": [ 30.75, 48.12678568115681 ], "wc_review_avg": [ 422.0, 189.37396864405625 ], "wc_reply_reviewers_avg": [ 35.25, 38.854697270729055 ], "wc_reply_authors_avg": [ 249.75, 358.3869242871453 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=765572833048065056&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "rice.edu;rice.edu;rice.edu;rice.edu;rice.edu;rice.edu;tamu.edu;rice.edu;case.edu;case.edu;rice.edu", "author_num": 11, "aff_unique_index": "0;0;0;0;0;0;1;0;2;2;0", "aff_unique_norm": "Rice University;Texas A&M University;Case Western Reserve University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rice.edu;https://www.tamu.edu;https://www.case.edu", "aff_unique_abbr": "Rice;TAMU;CWRU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Cappy: Outperforming and Boosting Large Multi-Task LMs with a Small Scorer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71584", "id": "Srt1hhQgqa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b860c0c546f4a3a786f9c9468228c99f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Srt1hhQgqa", "openreview": "https://openreview.net/forum?id=Srt1hhQgqa", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71584", "video": "https://nips.cc/virtual/2023/poster/71584", "author_site": "Bowen Tan, Yun Zhu, Lijuan Liu, Eric Xing, Zhiting Hu, Jindong Chen", "tldr": "", "abstract": "Large language models (LLMs) such as T0, FLAN, and OPT-IML excel in multi-tasking under a unified instruction-following paradigm, where they also exhibit remarkable generalization abilities to unseen tasks. Despite their impressive performance, these LLMs, with sizes ranging from several billion to hundreds of billions of parameters, demand substantial computational resources, making their training and inference expensive and inefficient. Furthermore, adapting these models to downstream applications, particularly complex tasks, is often unfeasible due to the extensive hardware requirements for finetuning, even when utilizing parameter-efficient approaches such as prompt tuning. Additionally, the most powerful multi-task LLMs, such as OPT-IML-175B and FLAN-PaLM-540B, are not publicly accessible, severely limiting their customization potential. To address these challenges, we introduce a pretrained small scorer, \\textit{Cappy}, designed to enhance the performance and efficiency of multi-task LLMs. With merely 360 million parameters, Cappy functions either independently on classification tasks or serve as an auxiliary component for LLMs, boosting their performance. Moreover, Cappy enables efficiently integrating downstream supervision without requiring LLM finetuning nor the access to their parameters. Our experiments demonstrate that, when working independently on 11 language understanding tasks from PromptSource, Cappy outperforms LLMs that are several orders of magnitude larger. Besides, on 45 complex tasks from BIG-Bench, Cappy boosts the performance of the advanced multi-task LLM, FLAN-T5, by a large margin. Furthermore, Cappy is flexible to cooperate with other LLM adaptations, including finetuning and in-context learning, offering additional performance enhancement.", "keywords": "multi-task;large language models;pretrain model", "primary_area": "", "supplementary_material": "/attachment/c571d2cc9b9a661609a778c4471f76173b6071d0.zip", "author": "Bowen Tan;Yun Zhu;Lijuan Liu;Eric Xing;Zhiting Hu;Jindong Chen", "authorids": "~Bowen_Tan2;yunzhu@google.com;lijuanliu@google.com;~Eric_Xing1;~Zhiting_Hu3;jdchen@google.com", "gender": "M;;;M;M;", "homepage": "https://bowentan.me;;;http://www.cs.cmu.edu/~epxing/;http://zhiting.ucsd.edu;", "dblp": ";;;36/3855;134/4031;", "google_scholar": ";;;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ;N7_xhHoAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Bowen_Tan2;yunzhu@google.com;lijuanliu@google.com;~Eric_Xing1;~Zhiting_Hu3;jdchen@google.com", "aff": "Carnegie Mellon University;;;School of Computer Science, Carnegie Mellon University;Amazon;", "aff_domain": "cmu.edu;;;cs.cmu.edu;amazon.com;", "position": "PhD student;;;Full Professor;Researcher;", "bibtex": "@inproceedings{\ntan2023cappy,\ntitle={Cappy: Outperforming and Boosting Large Multi-Task {LM}s with a Small Scorer},\nauthor={Bowen Tan and Yun Zhu and Lijuan Liu and Eric Xing and Zhiting Hu and Jindong Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Srt1hhQgqa}\n}", "github": "", "project": "", "reviewers": "K4oy;hbCr;XRdH;uzD3", "pdf_size": 770646, "rating": "6;6;6;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;2;4", "wc_summary": "86;119;307;298", "wc_strengths": "73;23;137;184", "wc_weaknesses": "639;67;245;175", "wc_questions": "90;5;95;225", "wc_limitations": "51;3;1;94", "wc_review": "939;217;785;976", "wc_reply_reviewers": "346;57;0;0", "wc_reply_authors": "374;123;0;0", "reply_reviewers": "2;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 202.5, 100.72859574122931 ], "wc_strengths_avg": [ 104.25, 61.25918298508396 ], "wc_weaknesses_avg": [ 281.5, 215.92301868953203 ], "wc_questions_avg": [ 103.75, 78.61098841765062 ], "wc_limitations_avg": [ 37.25, 38.39514943322919 ], "wc_review_avg": [ 729.25, 304.29785983473494 ], "wc_reply_reviewers_avg": [ 100.75, 143.4945556458502 ], "wc_reply_authors_avg": [ 124.25, 152.68656620672297 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1913980771633892745&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cmu.edu;;;cs.cmu.edu;amazon.com;", "author_num": 6, "aff_unique_index": "0;0;1", "aff_unique_norm": "Carnegie Mellon University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.cmu.edu;https://www.amazon.com", "aff_unique_abbr": "CMU;Amazon", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Dataset Diffusion: Diffusion-based Synthetic Data Generation for Pixel-Level Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71583", "id": "StD4J5ZlI5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f2957e48240c1d90e62b303574871b47-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=StD4J5ZlI5", "openreview": "https://openreview.net/forum?id=StD4J5ZlI5", "poster": "/media/PosterPDFs/NeurIPS%202023/71583.png?t=1699870298.254545", "slides": "https://nips.cc/virtual/2023/poster/71583", "video": "https://nips.cc/virtual/2023/poster/71583", "author_site": "Quang Nguyen, Truong Vu, Truong Vu, Anh Tran, Khoi Nguyen", "tldr": "", "abstract": "Preparing training data for deep vision models is a labor-intensive task. To address this, generative models have emerged as an effective solution for generating synthetic data. While current generative models produce image-level category labels, we propose a novel method for generating pixel-level semantic segmentation labels using the text-to-image generative model Stable Diffusion (SD). By utilizing the text prompts, cross-attention, and self-attention of SD, we introduce three new techniques: class-prompt appending, class-prompt cross-attention, and self-attention exponentiation. These techniques enable us to generate segmentation maps corresponding to synthetic images. These maps serve as pseudo-labels for training semantic segmenters, eliminating the need for labor-intensive pixel-wise annotation. To account for the imperfections in our pseudo-labels, we incorporate uncertainty regions into the segmentation, allowing us to disregard loss from those regions. We conduct evaluations on two datasets, PASCAL VOC and MSCOCO, and our approach significantly outperforms concurrent work. Our benchmarks and code will be released at https://github.com/VinAIResearch/Dataset-Diffusion.", "keywords": "Deep learning; Diffusion Models; Semantic Segmentation; Text-to-Image", "primary_area": "", "supplementary_material": "", "author": "Quang Ho Nguyen;Truong Tuan Vu;Anh Tuan Tran;Khoi Nguyen", "authorids": "~Quang_Ho_Nguyen1;~Truong_Tuan_Vu1;~Anh_Tuan_Tran2;~Khoi_Nguyen1", "gender": "M;M;M;M", "homepage": "https://quang-ngh.github.io/;https://truongvu2000nd.github.io/;https://sites.google.com/site/anhttranusc/;https://khoinguyen.org/", "dblp": ";334/1025;150/5269-1;65/5737-1", "google_scholar": "dqgmViUAAAAJ;mTbQ2ZsAAAAJ;FYZ5ODQAAAAJ;Eul6W5kAAAAJ", "orcid": ";;0000-0002-3120-4036;0000-0002-9259-420X", "linkedin": ";truong-vu-4b9b4819a/;https://linkedin.com/in/anh-tran-97814b19;", "or_profile": "~Quang_Ho_Nguyen1;~Truong_Tuan_Vu1;~Anh_Tuan_Tran2;~Khoi_Nguyen1", "aff": "Ho Chi Minh City University of Technology;VinAI Research;VinAI Research;VinAI Research", "aff_domain": "hcmut.edu.vn;vinai.io;vinai.io;vinai.io", "position": "Undergrad student;Researcher;Research Scientist;Researcher", "bibtex": "@inproceedings{\nnguyen2023dataset,\ntitle={Dataset Diffusion: Diffusion-based Synthetic Data Generation for Pixel-Level Semantic Segmentation},\nauthor={Quang Ho Nguyen and Truong Tuan Vu and Anh Tuan Tran and Khoi Nguyen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=StD4J5ZlI5}\n}", "github": "", "project": "", "reviewers": "wdax;gEQM;2Ket;KgKS", "pdf_size": 7523323, "rating": "5;5;6;7", "confidence": "4;3;4;5", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "93;133;121;181", "wc_strengths": "65;70;139;164", "wc_weaknesses": "171;215;52;165", "wc_questions": "40;84;151;43", "wc_limitations": "54;8;1;5", "wc_review": "423;510;464;558", "wc_reply_reviewers": "120;166;116;106", "wc_reply_authors": "254;1169;96;141", "reply_reviewers": "2;2;2;1", "reply_authors": "4;5;5;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 132.0, 31.796226191169293 ], "wc_strengths_avg": [ 109.5, 42.9563732174866 ], "wc_weaknesses_avg": [ 150.75, 60.192919018768315 ], "wc_questions_avg": [ 79.5, 44.791182167922294 ], "wc_limitations_avg": [ 17.0, 21.50581316760657 ], "wc_review_avg": [ 488.75, 50.45480651038115 ], "wc_reply_reviewers_avg": [ 127.0, 23.08679276123039 ], "wc_reply_authors_avg": [ 415.0, 439.11103379441516 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.25, 0.82915619758885 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 100, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14810788769719024074&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "hcmut.edu.vn;vinai.io;vinai.io;vinai.io", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Ho Chi Minh City University of Technology;VinAI Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.hcmut.edu.vn;https://www.vinai.io/", "aff_unique_abbr": "HCMUT;VinAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Vietnam" }, { "title": "Topological Parallax: A Geometric Specification for Deep Perception Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71582", "id": "SthlUe5xDP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/597254dc45be8c166d3ccf0ba2d56325-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SthlUe5xDP", "openreview": "https://openreview.net/forum?id=SthlUe5xDP", "poster": "/media/PosterPDFs/NeurIPS%202023/71582.png?t=1701377756.3659399", "slides": "https://nips.cc/virtual/2023/poster/71582", "video": "https://nips.cc/virtual/2023/poster/71582", "author_site": "Abraham Smith, Michael Catanzaro, Gabrielle Angeloro, Nirav Patel, Paul Bendich", "tldr": "", "abstract": "For safety and robustness of AI systems, we introduce _topological parallax_ as a \ntheoretical and computational tool that compares a trained model to a reference dataset to determine whether they have similar multiscale geometric structure. \n\nOur proofs and examples show that this geometric similarity between dataset and model is essential \nto trustworthy interpolation and perturbation, and we conjecture that this new concept will add value to the current debate regarding the unclear relationship between \"overfitting\"' and \"generalization'' in applications of deep-learning. \n\nIn typical deep-learning applications, an explicit geometric description of the model is\nimpossible, but parallax can estimate topological features (components, cycles, voids, etc.)\nin the model by examining the effect on the Rips complex of geodesic distortions using the reference dataset.\nThus, parallax indicates whether the model shares similar multiscale geometric features with the dataset.\n\nParallax presents theoretically via topological data analysis [TDA] as a bi-filtered persistence module,\nand the key properties of this module are stable under perturbation of the reference dataset.", "keywords": "topological data analysis;persistent homology;convexity;AI safety;interpolation", "primary_area": "", "supplementary_material": "/attachment/e2e46ab73b0116e50b9c0f329b3aa20f6c0580df.zip", "author": "Abraham David Smith;Michael J. Catanzaro;Gabrielle Angeloro;Nirav Patel;Paul Bendich", "authorids": "~Abraham_David_Smith1;michael.catanzaro@geomdata.com;gabrielle.angeloro@geomdata.com;nirav.patel@geomdata.com;~Paul_Bendich1", "gender": "M;;;;M", "homepage": ";;;;https://www.paulbendich.com", "dblp": ";;;;", "google_scholar": ";;;;CW8Zd00AAAAJ", "orcid": "0000-0002-6875-3290;;;;", "linkedin": ";;;;", "or_profile": "~Abraham_David_Smith1;michael.catanzaro@geomdata.com;gabrielle.angeloro@geomdata.com;nirav.patel@geomdata.com;~Paul_Bendich1", "aff": "Geometric Data Analytics, Inc.;;;;Duke University", "aff_domain": "geomdata.com;;;;duke.edu", "position": "Researcher;;;;Researcher", "bibtex": "@inproceedings{\nsmith2023topological,\ntitle={Topological Parallax: A Geometric Specification for Deep Perception Models},\nauthor={Abraham David Smith and Michael J. Catanzaro and Gabrielle Angeloro and Nirav Patel and Paul Bendich},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SthlUe5xDP}\n}", "github": "", "project": "", "reviewers": "xrbA;YL9S;Hzkd;GC8k;kfxD;3L38", "pdf_size": 652887, "rating": "5;5;7;7;7;8", "confidence": "1;2;3;2;4;3", "soundness": "2;2;3;3;4;3", "novelty": "2;3;3;3;3;4", "presentation": "1;1;3;3;3;3", "wc_summary": "26;164;130;99;79;66", "wc_strengths": "17;104;138;59;37;100", "wc_weaknesses": "269;1147;175;142;23;41", "wc_questions": "360;34;44;15;141;40", "wc_limitations": "7;23;6;5;2;18", "wc_review": "679;1472;493;320;282;265", "wc_reply_reviewers": "139;365;10;0;9;16", "wc_reply_authors": "361;0;0;0;0;0", "reply_reviewers": "1;1;1;0;1;1", "reply_authors": "2;1;1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 2.5, 0.9574271077563381 ], "soundness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820635 ], "wc_summary_avg": [ 94.0, 44.44847203973008 ], "wc_strengths_avg": [ 75.83333333333333, 41.822707813924346 ], "wc_weaknesses_avg": [ 299.5, 387.8933487442134 ], "wc_questions_avg": [ 105.66666666666667, 120.6961289998795 ], "wc_limitations_avg": [ 10.166666666666666, 7.602996485304695 ], "wc_review_avg": [ 585.1666666666666, 421.99976961552426 ], "wc_reply_reviewers_avg": [ 89.83333333333333, 132.01188919521184 ], "wc_reply_authors_avg": [ 60.166666666666664, 134.53675664623736 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7006490497453708, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15638958239225216357&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 9, "email": "geomdata.com;;;;duke.edu", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Geometric Data Analytics;Duke University", "aff_unique_dep": ";", "aff_unique_url": ";https://www.duke.edu", "aff_unique_abbr": "GDA;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Scaling Up Differentially Private LASSO Regularized Logistic Regression via Faster Frank-Wolfe Iterations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71581", "id": "SuvDnzrKCo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/72235260ae8d57ac42638a26d3b7d089-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SuvDnzrKCo", "openreview": "https://openreview.net/forum?id=SuvDnzrKCo", "poster": "/media/PosterPDFs/NeurIPS%202023/71581.png?t=1700925063.680509", "slides": "https://nips.cc/virtual/2023/poster/71581", "video": "https://nips.cc/virtual/2023/poster/71581", "author_site": "Edward Raff, Amol Khanna, Fred Lu", "tldr": "", "abstract": "To the best of our knowledge, there are no methods today for training differentially private regression models on sparse input data. To remedy this, we adapt the Frank-Wolfe algorithm for $L_1$ penalized linear regression to be aware of sparse inputs and to use them effectively. In doing so, we reduce the training time of the algorithm from $\\mathcal{O}( T D S + T N S)$ to $\\mathcal{O}(N S + T \\sqrt{D} \\log{D} + T S^2)$, where $T$ is the number of iterations and a sparsity rate $S$ of a dataset with $N$ rows and $D$ features. Our results demonstrate that this procedure can reduce runtime by a factor of up to $2,200\\times$, depending on the value of the privacy parameter $\\epsilon$ and the sparsity of the dataset.", "keywords": "Sparsity;Differential Privacy;Regression", "primary_area": "", "supplementary_material": "/attachment/f4026f0c3e8f681ed5d0322c1824f1ef800b1bbc.pdf", "author": "Edward Raff;Amol Ashish Khanna;Fred Lu", "authorids": "~Edward_Raff1;~Amol_Ashish_Khanna1;~Fred_Lu1", "gender": "M;;", "homepage": "http://www.edwardraff.com/;;", "dblp": "204/3369;329/1865;", "google_scholar": "debM2bUAAAAJ;kJPXyT8AAAAJ;8BjErXQAAAAJ", "orcid": "0000-0002-9900-1972;0000-0002-5566-095X;0000-0003-1026-5734", "linkedin": "edward-raff-09992040/;;fl16180", "or_profile": "~Edward_Raff1;~Amol_Ashish_Khanna1;~Fred_Lu1", "aff": "Syracuse University;Booz Allen Hamilton;Booz Allen Hamilton", "aff_domain": "syr.edu;bah.com;bah.com", "position": "MBA student;Researcher;Researcher", "bibtex": "@inproceedings{\nraff2023scaling,\ntitle={Scaling Up Differentially Private {LASSO} Regularized Logistic Regression via Faster Frank-Wolfe Iterations},\nauthor={Edward Raff and Amol Ashish Khanna and Fred Lu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SuvDnzrKCo}\n}", "github": "", "project": "", "reviewers": "LVpw;d4md;Vyxf;Fgow", "pdf_size": 640722, "rating": "5;6;6;6", "confidence": "2;3;3;3", "soundness": "3;3;3;2", "novelty": "2;3;3;2", "presentation": "3;3;3;2", "wc_summary": "65;81;69;40", "wc_strengths": "51;97;29;59", "wc_weaknesses": "89;54;66;210", "wc_questions": "3;2;223;119", "wc_limitations": "7;1;2;1", "wc_review": "215;235;389;429", "wc_reply_reviewers": "20;16;28;16", "wc_reply_authors": "66;31;57;30", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.75, 14.922717580923388 ], "wc_strengths_avg": [ 59.0, 24.535688292770594 ], "wc_weaknesses_avg": [ 104.75, 62.05390801553114 ], "wc_questions_avg": [ 86.75, 91.92490141414349 ], "wc_limitations_avg": [ 2.75, 2.48746859276655 ], "wc_review_avg": [ 317.0, 93.34880824091971 ], "wc_reply_reviewers_avg": [ 20.0, 4.898979485566356 ], "wc_reply_authors_avg": [ 46.0, 15.827191791344413 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15797379821987579440&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "syr.edu;bah.com;bah.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Syracuse University;Booz Allen Hamilton", "aff_unique_dep": ";", "aff_unique_url": "https://www.syracuse.edu;https://www.boozallen.com", "aff_unique_abbr": "Syracuse;BAH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The Distortion of Binomial Voting Defies Expectation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71580", "id": "Sv5bo2StIx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/06cb881ec90a657a8f949a62f1b4ee5f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Sv5bo2StIx", "openreview": "https://openreview.net/forum?id=Sv5bo2StIx", "poster": "/media/PosterPDFs/NeurIPS%202023/71580.png?t=1701994468.0866127", "slides": "https://nips.cc/virtual/2023/poster/71580", "video": "https://nips.cc/virtual/2023/poster/71580", "author_site": "Yannai A. Gonczarowski, Gregory Kehne, Ariel Procaccia, Ben Schiffer, Shirley Zhang", "tldr": "", "abstract": "In computational social choice, the distortion of a voting rule quantifies the degree to which the rule overcomes limited preference information to select a socially desirable outcome. This concept has been investigated extensively, but only through a worst-case lens. Instead, we study the expected distortion of voting rules with respect to an underlying distribution over voter utilities. Our main contribution is the design and analysis of a novel and intuitive rule, binomial voting, which provides strong distribution-independent guarantees for both expected distortion and expected welfare.", "keywords": "computational social choice;statistics;distortion", "primary_area": "", "supplementary_material": "", "author": "Yannai Gonczarowski;Gregory Kehne;Ariel D. Procaccia;Ben Schiffer;Shirley Zhang", "authorids": "yannai@gonch.name;~Gregory_Kehne1;~Ariel_D._Procaccia1;bschiffer1@g.harvard.edu;szhang2@g.harvard.edu", "gender": ";;;;", "homepage": ";;;;", "dblp": ";;;;", "google_scholar": ";;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "yannai@gonch.name;~Gregory_Kehne1;~Ariel_D._Procaccia1;bschiffer1@g.harvard.edu;szhang2@g.harvard.edu", "aff": ";;;;", "aff_domain": ";;;;", "position": ";;;;", "bibtex": "@inproceedings{\ngonczarowski2023the,\ntitle={The Distortion of Binomial Voting Defies Expectation},\nauthor={Yannai Gonczarowski and Gregory Kehne and Ariel D. Procaccia and Ben Schiffer and Shirley Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Sv5bo2StIx}\n}", "github": "", "project": "", "reviewers": "gjAU;J4Js;Bk7r;3WoR;M8Ee;TGPz", "pdf_size": 410440, "rating": "3;5;5;6;7;7", "confidence": "4;3;3;5;4;3", "soundness": "3;3;4;3;4;3", "novelty": "2;3;3;3;3;4", "presentation": "3;4;4;3;4;4", "wc_summary": "402;517;94;205;297;362", "wc_strengths": "8;61;58;99;94;93", "wc_weaknesses": "46;608;91;131;43;322", "wc_questions": "154;114;61;125;18;128", "wc_limitations": "44;18;1;9;5;1", "wc_review": "654;1318;305;569;457;906", "wc_reply_reviewers": "397;13;6;39;9;22", "wc_reply_authors": "1205;0;0;0;0;0", "reply_reviewers": "3;1;1;1;1;1", "reply_authors": "4;1;1;1;1;1", "rating_avg": [ 5.5, 1.3844373104863459 ], "confidence_avg": [ 3.6666666666666665, 0.7453559924999298 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 312.8333333333333, 136.51546513938348 ], "wc_strengths_avg": [ 68.83333333333333, 31.640778891944 ], "wc_weaknesses_avg": [ 206.83333333333334, 202.5153629288954 ], "wc_questions_avg": [ 100.0, 46.12663727898086 ], "wc_limitations_avg": [ 13.0, 15.022205785658333 ], "wc_review_avg": [ 701.5, 331.33505197408056 ], "wc_reply_reviewers_avg": [ 81.0, 141.73566946961517 ], "wc_reply_authors_avg": [ 200.83333333333334, 449.07698548120777 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.74535599249993 ], "reply_authors_avg": [ 1.5, 1.118033988749895 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11885164959687824642&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": ";;;;", "author_num": 5 }, { "title": "DeepSimHO: Stable Pose Estimation for Hand-Object Interaction via Physics Simulation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71579", "id": "SxVHyYavHy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fbdaea4878318e214c0577dae4b8bc43-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SxVHyYavHy", "openreview": "https://openreview.net/forum?id=SxVHyYavHy", "poster": "/media/PosterPDFs/NeurIPS%202023/71579.png?t=1702182064.6747186", "slides": "https://nips.cc/virtual/2023/poster/71579", "video": "https://nips.cc/virtual/2023/poster/71579", "author_site": "Rong Wang, Wei Mao, Hongdong Li", "tldr": "", "abstract": "This paper addresses the task of 3D pose estimation for a hand interacting with an object from a single image observation. When modeling hand-object interaction, previous works mainly exploit proximity cues, while overlooking the dynamical nature that the hand must stably grasp the object to counteract gravity and thus preventing the object from slipping or falling. These works fail to leverage dynamical constraints in the estimation and consequently often produce unstable results. Meanwhile, refining unstable configurations with physics-based reasoning remains challenging, both by the complexity of contact dynamics and by the lack of effective and efficient physics inference in the data-driven learning framework. To address both issues, we present DeepSimHO: a novel deep-learning pipeline that combines forward physics simulation and backward gradient approximation with a neural network. Specifically, for an initial hand-object pose estimated by a base network, we forward it to a physics simulator to evaluate its stability. However, due to non-smooth contact geometry and penetration, existing differentiable simulators can not provide reliable state gradient. To remedy this, we further introduce a deep network to learn the stability evaluation process from the simulator, while smoothly approximating its gradient and thus enabling effective back-propagation. Extensive experiments show that our method noticeably improves the stability of the estimation and achieves superior efficiency over test-time optimization. The code is available at https://github.com/rongakowang/DeepSimHO.", "keywords": "hand-object pose estimation;physics simulation", "primary_area": "", "supplementary_material": "/attachment/98e61dbb87c9d9cb27fc1a0840605c87f79612e5.zip", "author": "Rong Wang;Wei Mao;Hongdong Li", "authorids": "~Rong_Wang3;~Wei_Mao1;~Hongdong_Li1", "gender": "M;M;M", "homepage": ";https://wei-mao-2019.github.io/home/;http://users.cecs.anu.edu.au/~hongdong/", "dblp": "66/4610-2.html;51/4914-1;59/4859.html", "google_scholar": "https://scholar.google.com/citations?hl=en;X3ji--4AAAAJ;https://scholar.google.com.tw/citations?hl=en", "orcid": "0000-0002-1905-3175;;", "linkedin": ";wei-mao-anu/;", "or_profile": "~Rong_Wang3;~Wei_Mao1;~Hongdong_Li1", "aff": "Australian National University;Australian National University;Australian National University", "aff_domain": "anu.edu.au;anu.edu.au;anu.edu.au", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nwang2023deepsimho,\ntitle={DeepSim{HO}: Stable Pose Estimation for Hand-Object Interaction via Physics Simulation},\nauthor={Rong Wang and Wei Mao and Hongdong Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SxVHyYavHy}\n}", "github": "", "project": "", "reviewers": "fSiU;gag8;oW7V;HRq6;bTxd", "pdf_size": 8093030, "rating": "5;5;5;5;6", "confidence": "4;3;3;4;4", "soundness": "3;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;4;3;4", "wc_summary": "79;329;61;92;100", "wc_strengths": "74;89;52;58;263", "wc_weaknesses": "178;117;221;349;239", "wc_questions": "133;276;2;38;51", "wc_limitations": "6;1;3;10;26", "wc_review": "470;812;339;547;679", "wc_reply_reviewers": "706;160;13;46;59", "wc_reply_authors": "736;208;0;27;88", "reply_reviewers": "2;1;1;1;1", "reply_authors": "3;2;1;2;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 132.2, 99.28020950823985 ], "wc_strengths_avg": [ 107.2, 78.96176289825348 ], "wc_weaknesses_avg": [ 220.8, 76.64567828651528 ], "wc_questions_avg": [ 100.0, 97.89177697845719 ], "wc_limitations_avg": [ 9.2, 8.930845424706442 ], "wc_review_avg": [ 569.4, 163.9104633633863 ], "wc_reply_reviewers_avg": [ 196.8, 259.2877937736368 ], "wc_reply_authors_avg": [ 211.8, 271.7119062536642 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3481546492181086933&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "anu.edu.au;anu.edu.au;anu.edu.au", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Australian National University", "aff_unique_dep": "", "aff_unique_url": "https://www.anu.edu.au", "aff_unique_abbr": "ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "Offline Reinforcement Learning for Mixture-of-Expert Dialogue Management", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71578", "id": "SxXN3kNTsV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/12bcf58a1c09a0fcb5310f3589291ab4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SxXN3kNTsV", "openreview": "https://openreview.net/forum?id=SxXN3kNTsV", "poster": "/media/PosterPDFs/NeurIPS%202023/71578.png?t=1702390016.6198587", "slides": "https://nips.cc/virtual/2023/poster/71578", "video": "https://nips.cc/virtual/2023/poster/71578", "author_site": "Dhawal Gupta, Yinlam Chow, Azamat Tulepbergenov, Mohammad Ghavamzadeh, Craig Boutilier", "tldr": "", "abstract": "Reinforcement learning (RL) has shown great promise for developing agents for dialogue management (DM) that are non-myopic, conduct rich conversations, and maximize overall user satisfaction. Despite the advancements in RL and language models (LMs), employing RL to drive conversational chatbots still poses significant challenges. A primary issue stems from RL\u2019s dependency on online exploration for effective learning, a process that can be costly. Moreover, engaging in online interactions with humans during the training phase can raise safety concerns, as the LM can potentially generate unwanted outputs. This issue is exacerbated by the combinatorial action spaces facing these algorithms, as most LM agents generate responses at the word level. We develop various RL algorithms, specialized in dialogue planning, that leverage recent Mixture-of-Expert Language Models (MoE-LMs)---models that capture diverse semantics, generate utterances reflecting different intents, and are amenable for multi-turn DM. By exploiting the MoE-LM structure, our methods significantly reduce the size of the action space and improve the efficacy of RL-based DM. We evaluate our methods in open-domain dialogue to demonstrate their effectiveness with respect to the diversity of intent in generated utterances and overall DM performance.", "keywords": "Reinforcement Learning;Mixture of Experts;Dialogue Management", "primary_area": "", "supplementary_material": "/attachment/2ce05b51d6fe628abcfd1e3b4cf93b6f99a91180.pdf", "author": "Dhawal Gupta;Yinlam Chow;Azamat Tulepbergenov;Mohammad Ghavamzadeh;Craig Boutilier", "authorids": "~Dhawal_Gupta1;~Yinlam_Chow1;~Azamat_Tulepbergenov1;~Mohammad_Ghavamzadeh2;~Craig_Boutilier2", "gender": "M;M;M;M;M", "homepage": "https://dhawgupta.github.io/;;https://atulep.github.io;https://research.google/people/craigboutilier/;https://mohammadghavamzadeh.github.io/", "dblp": "231/0618;146/7869;;10/3411;88/6389", "google_scholar": "n1Lsp_8AAAAJ;;https://scholar.google.com/citations?hl=en;cXkm3rsAAAAJ;https://scholar.google.ca/citations?user=LHIPpCsAAAAJ", "orcid": ";;;;", "linkedin": "dhawgupta/;;;;", "or_profile": "~Dhawal_Gupta1;~Yinlam_Chow1;~Azamat_Tulepbergenov1;~Craig_Boutilier2;~Mohammad_Ghavamzadeh1", "aff": "Department of Computer Science, University of Massachusetts at Amherst;Google Research;Google;Google;Google Research", "aff_domain": "cs.umass.edu;google.com;google.com;google.com;google.com", "position": "PhD student;Research Scientist;Research Software Engineer;Principal Researcher;Senior Staff Research Scientist", "bibtex": "@inproceedings{\ngupta2023offline,\ntitle={Offline Reinforcement Learning for Mixture-of-Expert Dialogue Management},\nauthor={Dhawal Gupta and Yinlam Chow and Azamat Tulepbergenov and Mohammad Ghavamzadeh and Craig Boutilier},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SxXN3kNTsV}\n}", "github": "", "project": "", "reviewers": "nRAM;BhVQ;3gha;T4hv", "pdf_size": 2059949, "rating": "4;6;6;6", "confidence": "4;2;2;2", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;2;2", "wc_summary": "52;220;50;104", "wc_strengths": "52;25;71;66", "wc_weaknesses": "133;258;73;329", "wc_questions": "3;110;77;78", "wc_limitations": "1;20;7;3", "wc_review": "241;633;278;580", "wc_reply_reviewers": "0;390;54;133", "wc_reply_authors": "0;632;122;0", "reply_reviewers": "0;3;1;1", "reply_authors": "1;3;2;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 106.5, 69.01267999433148 ], "wc_strengths_avg": [ 53.5, 17.867568385205637 ], "wc_weaknesses_avg": [ 198.25, 100.76054535382389 ], "wc_questions_avg": [ 67.0, 39.26194085880116 ], "wc_limitations_avg": [ 7.75, 7.39509972887452 ], "wc_review_avg": [ 433.0, 174.99857142274047 ], "wc_reply_reviewers_avg": [ 144.25, 149.55997960684536 ], "wc_reply_authors_avg": [ 188.5, 260.85388630419135 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16700002750709869676&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "cs.umass.edu;google.com;google.com;google.com;google.com", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "University of Massachusetts Amherst;Google", "aff_unique_dep": "Department of Computer Science;Google Research", "aff_unique_url": "https://www.umass.edu;https://research.google", "aff_unique_abbr": "UMass Amherst;Google Research", "aff_campus_unique_index": "0;1;1;1;1", "aff_campus_unique": "Amherst;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Implicit Variational Inference for High-Dimensional Posteriors", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71577", "id": "Sxu7xlUJGx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e99be8b1f637996eaf1154f2f4cb6f49-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Sxu7xlUJGx", "openreview": "https://openreview.net/forum?id=Sxu7xlUJGx", "poster": "/media/PosterPDFs/NeurIPS%202023/71577.png?t=1702255022.6287985", "slides": "https://nips.cc/virtual/2023/poster/71577", "video": "https://nips.cc/virtual/2023/poster/71577", "author_site": "Anshuk Uppal, Kristoffer Stensbo-Smidt, Wouter Boomsma, Jes Frellsen", "tldr": "", "abstract": "In variational inference, the benefits of Bayesian models rely on accurately capturing the true posterior distribution. We propose using neural samplers that specify implicit distributions, which are well-suited for approximating complex multimodal and correlated posteriors in high-dimensional spaces. Our approach introduces novel bounds for approximate inference using implicit distributions by locally linearising the neural sampler. This is distinct from existing methods that rely on additional discriminator networks and unstable adversarial objectives. Furthermore, we present a new sampler architecture that, for the first time, enables implicit distributions over tens of millions of latent variables, addressing computational concerns by using differentiable numerical approximations. We empirically show that our method is capable of recovering correlations across layers in large Bayesian neural networks, a property that is crucial for a network's performance but notoriously challenging to achieve. To the best of our knowledge, no other method has been shown to accomplish this task for such large models. Through experiments in downstream tasks, we demonstrate that our expressive posteriors outperform state-of-the-art uncertainty quantification methods, validating the effectiveness of our training algorithm and the quality of the learned implicit approximation.", "keywords": "Implicit models;Variational Inference;Bayesian Deep Learning;Bayesian Inference;Generative Modelling", "primary_area": "", "supplementary_material": "", "author": "Anshuk Uppal;Kristoffer Stensbo-Smidt;Wouter Boomsma;Jes Frellsen", "authorids": "~Anshuk_Uppal1;~Kristoffer_Stensbo-Smidt1;~Wouter_Boomsma1;~Jes_Frellsen1", "gender": "M;M;M;M", "homepage": "https://uppalanshuk.github.io/;;;https://frellsen.org", "dblp": ";139/4212;06/5945;83/8247", "google_scholar": "XBi06jkAAAAJ;https://scholar.google.co.uk/citations?user=urhY48QAAAAJ;EwqU_jsAAAAJ;Yj2sBWkAAAAJ", "orcid": ";0000-0002-2855-3817;0000-0002-8257-3827;0000-0001-9224-1271", "linkedin": "anshuk-uppal-58011a134/;;;frellsen/", "or_profile": "~Anshuk_Uppal1;~Kristoffer_Stensbo-Smidt1;~Wouter_Boomsma1;~Jes_Frellsen1", "aff": "Technical University of Denmark;Technical University of Denmark;University of Copenhagen;Technical University of Denmark", "aff_domain": "dtu.dk;dtu.dk;ku.dk;dtu.dk", "position": "PhD student;Postdoc;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nuppal2023implicit,\ntitle={Implicit Variational Inference for High-Dimensional Posteriors},\nauthor={Anshuk Uppal and Kristoffer Stensbo-Smidt and Wouter Boomsma and Jes Frellsen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Sxu7xlUJGx}\n}", "github": "", "project": "", "reviewers": "M1uk;Q4Sr;C434;MstJ", "pdf_size": 793531, "rating": "4;8;8;8", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "2;4;3;3", "presentation": "3;4;3;3", "wc_summary": "169;86;271;152", "wc_strengths": "52;69;164;72", "wc_weaknesses": "250;279;103;156", "wc_questions": "39;18;97;342", "wc_limitations": "10;18;27;1", "wc_review": "520;470;662;723", "wc_reply_reviewers": "603;34;235;160", "wc_reply_authors": "796;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 1.7320508075688772 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 169.5, 66.29668166658116 ], "wc_strengths_avg": [ 89.25, 43.82564888281747 ], "wc_weaknesses_avg": [ 197.0, 70.79901129253147 ], "wc_questions_avg": [ 124.0, 129.1452670445185 ], "wc_limitations_avg": [ 14.0, 9.617692030835672 ], "wc_review_avg": [ 593.75, 102.61182924010272 ], "wc_reply_reviewers_avg": [ 258.0, 211.73922640833464 ], "wc_reply_authors_avg": [ 199.0, 344.67811070620655 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7192300951666614940&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "dtu.dk;dtu.dk;ku.dk;dtu.dk", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Technical University of Denmark;University of Copenhagen", "aff_unique_dep": ";", "aff_unique_url": "https://www.tek.dk;https://www.ku.dk", "aff_unique_abbr": "DTU;UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Denmark" }, { "id": "SycQxJaGIR", "title": "Learn to Follow: Lifelong Multi-agent Pathfinding with Decentralized Replanning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Multi-agent Pathfinding (MAPF) problem generally asks to find a set of conflict-free paths for a set of agents confined to a graph. In conventional MAPF scenarios, the graph and the agents' start and goal locations are known in advance. Thus, a centralized planning algorithm can be utilized to generate a solution. In this work, we investigate the decentralized MAPF setting, in which the agents can not share the information and must independently navigate toward their goals without knowing the other agents' goals or paths. We focus on the lifelong variant of MAPF, which involves continuously assigning new goals to the agents upon arrival to the previous ones. To address this complex problem, we propose a method that integrates two complementary approaches: planning with heuristic search and reinforcement learning (RL) through policy optimization. Planning is utilized to maintain an individual path, while RL is employed to discover the collision avoidance policies that effectively guide an agent along the path. This decomposition and intrinsic motivation specific for multi-agent scenarios allows leveraging replanning with learnable policies. We evaluate our method on a wide range of setups and compare it to the state-of-the-art competitors (both learnable and search-based). The results show that our method consistently outperforms the competitors in challenging setups when the number of agents is high.", "keywords": "Multi-agent Pathfinding;Reinforcement learning;Heuristic Search", "primary_area": "", "supplementary_material": "/attachment/02cc4fde967100812eb20c8868724fe013769556.pdf", "author": "Alexey Skrynnik;Anton Andreychuk;Maria Nesterova;Konstantin Yakovlev;Aleksandr Panov", "authorids": "~Alexey_Skrynnik1;~Anton_Andreychuk1;~Maria_Nesterova1;~Konstantin_Yakovlev1;~Aleksandr_Panov1", "gender": "M;M;F;M;M", "homepage": "https://tviskaron.github.io;;;http://kyakovlev.me;http://grafft.github.io", "dblp": "222/2698;185/0555;331/7118;150/4804;177/9975", "google_scholar": "ITgs9IMAAAAJ;xtcLdhcAAAAJ;https://scholar.google.com/citations?hl=en;Tw0A27kAAAAJ;https://scholar.google.ru/citations?hl=ru", "orcid": "0000-0001-9243-1622;;;0000-0002-4377-321X;0000-0002-9747-3837", "linkedin": "alexey-skrynnik-65a125168/;;;;", "or_profile": "~Alexey_Skrynnik1;~Anton_Andreychuk1;~Maria_Nesterova1;~Konstantin_Yakovlev1;~Aleksandr_Panov1", "aff": "AIRI; Artificial Intelligence Research Institute;Moscow Institute of Physics and Technology;Federal Research Center for Computer Science and Control of Russian Academy of Sciences;Federal Research Center \u00abComputer Science and Control\u00bb of Russian Academy of Sciences", "aff_domain": "airi.net;airi.net;mipt.ru;isa.ru;frccsc.ru", "position": "Senior Research Scientist;Researcher;MS student;Principal Researcher;Principal Researcher", "bibtex": "@misc{\nskrynnik2023learn,\ntitle={Learn to Follow: Lifelong Multi-agent Pathfinding with Decentralized Replanning},\nauthor={Alexey Skrynnik and Anton Andreychuk and Maria Nesterova and Konstantin Yakovlev and Aleksandr Panov},\nyear={2023},\nurl={https://openreview.net/forum?id=SycQxJaGIR}\n}", "github": "", "project": "", "reviewers": "VcwW;8Qnn;wG3W;prAH", "site": "https://openreview.net/forum?id=SycQxJaGIR", "pdf_size": 642471, "rating": "4;5;6;6", "confidence": "3;3;3;3", "soundness": "2;3;2;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "49;76;62;66", "wc_strengths": "16;82;44;68", "wc_weaknesses": "64;255;87;70", "wc_questions": "77;20;148;73", "wc_limitations": "7;1;26;18", "wc_review": "213;434;367;295", "wc_reply_reviewers": "22;15;64;71", "wc_reply_authors": "554;0;34;66", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 63.25, 9.67923034130297 ], "wc_strengths_avg": [ 52.5, 25.074887836239668 ], "wc_weaknesses_avg": [ 119.0, 78.97151385151484 ], "wc_questions_avg": [ 79.5, 45.5 ], "wc_limitations_avg": [ 13.0, 9.669539802906858 ], "wc_review_avg": [ 327.25, 82.26291691886448 ], "wc_reply_reviewers_avg": [ 43.0, 24.748737341529164 ], "wc_reply_authors_avg": [ 163.5, 226.65998764669516 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11252617250604029845&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1;2;2", "aff_unique_norm": "Artificial Intelligence Research Institute;Moscow Institute of Physics and Technology;Russian Academy of Sciences", "aff_unique_dep": ";;Federal Research Center for Computer Science and Control", "aff_unique_url": "https://www.airi.jp;https://www.mipt.ru/en;https://www.ras.ru", "aff_unique_abbr": "AIRI;MIPT;RAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;2", "aff_country_unique": "Japan;United States;Russian Federation" }, { "title": "Precision-Recall Divergence Optimization for Generative Modeling with GANs and Normalizing Flows", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71576", "id": "SzYHu7EIwZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/67159f1c0cab15dd34c76a5dd830a389-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=SzYHu7EIwZ", "openreview": "https://openreview.net/forum?id=SzYHu7EIwZ", "poster": "/media/PosterPDFs/NeurIPS%202023/71576.png?t=1697797006.761681", "slides": "https://nips.cc/virtual/2023/poster/71576", "video": "https://nips.cc/virtual/2023/poster/71576", "author_site": "Alexandre Verine, Benjamin Negrevergne, Muni Sreenivas Pydi, Yann Chevaleyre", "tldr": "", "abstract": "Achieving a balance between image quality (precision) and diversity (recall) is a significant challenge in the domain of generative models. Current state-of-the-art models primarily rely on optimizing heuristics, such as the Fr\\'echet Inception Distance. While recent developments have introduced principled methods for evaluating precision and recall, they have yet to be successfully integrated into the training of generative models. Our main contribution is a novel training method for generative models, such as Generative Adversarial Networks and Normalizing Flows, which explicitly optimizes a user-defined trade-off between precision and recall. More precisely, we show that achieving a specified precision-recall trade-off corresponds to minimizing a unique $f$-divergence from a family we call the \\mbox{\\em PR-divergences}. Conversely, any $f$-divergence can be written as a linear combination of PR-divergences and corresponds to a weighted precision-recall trade-off. Through comprehensive evaluations, we show that our approach improves the performance of existing state-of-the-art models like BigGAN in terms of either precision or recall when tested on datasets such as ImageNet.", "keywords": "Generative Models;Precision;Recall;Optimization;f-Divergeces", "primary_area": "", "supplementary_material": "", "author": "Alexandre Verine;benjamin negrevergne;Muni Sreenivas Pydi;Yann Chevaleyre", "authorids": "~Alexandre_Verine1;~benjamin_negrevergne1;~Muni_Sreenivas_Pydi1;~Yann_Chevaleyre1", "gender": "M;;M;M", "homepage": ";;https://munisreenivas.github.io/;https://www.lamsade.dauphine.fr/~ychevaleyre/", "dblp": "297/4718;;194/2444;55/5658", "google_scholar": "l_e0zo8AAAAJ;;BT8j_-oAAAAJ;SF6g8p4AAAAJ", "orcid": ";;;", "linkedin": "alexandre-v%C3%A9rine-b300b2136/;;;yannchevaleyre", "or_profile": "~Alexandre_Verine1;~benjamin_negrevergne1;~Muni_Sreenivas_Pydi1;~Yann_Chevaleyre1", "aff": "Universit\u00e9 Paris-Dauphine (PSL);;Universit\u00e9 Paris Dauphine - PSL;Universit\u00e9 Paris-Dauphine (Paris IX)", "aff_domain": "lamsade.dauphine.fr;;lamsade.dauphine.fr;dauphine.fr", "position": "PhD student;;Postdoc;Full Professor", "bibtex": "@inproceedings{\nverine2023precisionrecall,\ntitle={Precision-Recall Divergence Optimization for Generative Modeling with {GAN}s and Normalizing Flows},\nauthor={Alexandre Verine and benjamin negrevergne and Muni Sreenivas Pydi and Yann Chevaleyre},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=SzYHu7EIwZ}\n}", "github": "", "project": "", "reviewers": "Pocn;jwf3;VDUJ;oBuL;hgHq", "pdf_size": 10472854, "rating": "3;3;6;6;7", "confidence": "4;4;4;4;4", "soundness": "2;3;3;3;4", "novelty": "2;3;2;3;4", "presentation": "3;4;3;3;3", "wc_summary": "74;83;63;96;59", "wc_strengths": "148;52;56;111;134", "wc_weaknesses": "421;157;39;192;125", "wc_questions": "60;34;296;58;5", "wc_limitations": "32;9;9;4;3", "wc_review": "735;335;463;461;326", "wc_reply_reviewers": "486;218;257;248;31", "wc_reply_authors": "557;0;0;375;0", "reply_reviewers": "1;1;1;2;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 5.0, 1.6733200530681511 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 75.0, 13.461054936371072 ], "wc_strengths_avg": [ 100.2, 39.54946270178648 ], "wc_weaknesses_avg": [ 186.8, 127.61567301863826 ], "wc_questions_avg": [ 90.6, 104.61280992306821 ], "wc_limitations_avg": [ 11.4, 10.594338110519223 ], "wc_review_avg": [ 464.0, 147.7403127111893 ], "wc_reply_reviewers_avg": [ 248.0, 144.71627413667062 ], "wc_reply_authors_avg": [ 186.4, 235.43542639118695 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7986051286366067052&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "lamsade.dauphine.fr;;lamsade.dauphine.fr;dauphine.fr", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Universit\u00e9 Paris-Dauphine;Universit\u00e9 Paris Dauphine", "aff_unique_dep": ";", "aff_unique_url": "https://www.univ-paris-dauphine.fr;https://www.univ-paris-dauphine.fr", "aff_unique_abbr": "PSL;UPD", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Connecting Certified and Adversarial Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71575", "id": "T2lM4ohRwb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e8b0c97b34fdaf58b2f48f8cca85e76a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=T2lM4ohRwb", "openreview": "https://openreview.net/forum?id=T2lM4ohRwb", "poster": "/media/PosterPDFs/NeurIPS%202023/71575.png?t=1701435955.7643604", "slides": "https://nips.cc/virtual/2023/poster/71575", "video": "https://nips.cc/virtual/2023/poster/71575", "author_site": "Yuhao Mao, Mark M\u00fcller, Marc Fischer, Martin Vechev", "tldr": "", "abstract": "Training certifiably robust neural networks remains a notoriously hard problem.\nWhile adversarial training optimizes under-approximations of the worst-case loss, which leads to insufficient regularization for certification, sound certified training methods, optimize loose over-approximations, leading to over-regularization and poor (standard) accuracy.\nIn this work, we propose TAPS, an (unsound) certified training method that combines IBP and PGD training to optimize more precise, although not necessarily sound, worst-case loss approximations, reducing over-regularization and increasing certified and standard accuracies.\nEmpirically, TAPS achieves a new state-of-the-art in many settings, e.g., reaching a certified accuracy of $22$% on TinyImageNet for $\\ell_\\infty$-perturbations with radius $\\epsilon=1/255$. We make our implementation and networks public at https://github.com/eth-sri/taps.", "keywords": "Certified Training;Certified Robustness;Adversarial Robustness;Robustness Verification", "primary_area": "", "supplementary_material": "", "author": "Yuhao Mao;Mark Niklas Mueller;Marc Fischer;Martin Vechev", "authorids": "~Yuhao_Mao1;~Mark_Niklas_Mueller2;~Marc_Fischer1;~Martin_Vechev1", "gender": "M;M;M;M", "homepage": "https://algebraloveme.github.io/about/;https://www.sri.inf.ethz.ch/people/mark;;https://www.sri.inf.ethz.ch/people/martin", "dblp": "245/5901;287/4254;37/9373-2;93/2189.html", "google_scholar": ";RBpmcCAAAAAJ;;https://scholar.google.ch/citations?user=aZ1Rh50AAAAJ", "orcid": ";0000-0002-2496-6542;;", "linkedin": ";mark-m%C3%BCller-8bb4b1140/;;", "or_profile": "~Yuhao_Mao1;~Mark_Niklas_Mueller2;~Marc_Fischer1;~Martin_Vechev1", "aff": "ETHZ - ETH Zurich;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "MS student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nmao2023connecting,\ntitle={Connecting Certified and Adversarial Training},\nauthor={Yuhao Mao and Mark Niklas Mueller and Marc Fischer and Martin Vechev},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=T2lM4ohRwb}\n}", "github": "", "project": "", "reviewers": "5pzA;hZus;8yjL;jSFD", "pdf_size": 580615, "rating": "5;6;7;7", "confidence": "4;4;3;5", "soundness": "2;4;4;3", "novelty": "2;2;3;3", "presentation": "3;4;3;3", "wc_summary": "51;140;98;107", "wc_strengths": "64;21;36;80", "wc_weaknesses": "185;89;298;44", "wc_questions": "2;148;103;11", "wc_limitations": "2;46;1;6", "wc_review": "304;444;536;248", "wc_reply_reviewers": "0;63;83;38", "wc_reply_authors": "0;0;226;0", "reply_reviewers": "0;1;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 99.0, 31.81980515339464 ], "wc_strengths_avg": [ 50.25, 23.09085316743407 ], "wc_weaknesses_avg": [ 154.0, 97.49615377028984 ], "wc_questions_avg": [ 66.0, 61.6725222445134 ], "wc_limitations_avg": [ 13.75, 18.713297411199342 ], "wc_review_avg": [ 383.0, 113.57376457615553 ], "wc_reply_reviewers_avg": [ 46.0, 30.97579700346708 ], "wc_reply_authors_avg": [ 56.5, 97.86087062764156 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=192562913138758927&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Intelligent Knee Sleeves: A Real-time Multimodal Dataset for 3D Lower Body Motion Estimation Using Smart Textile", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73585", "id": "T3FKjN4p8d", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/84948f178cfd3f6a0ffecda8fdcb3488-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=T3FKjN4p8d", "openreview": "https://openreview.net/forum?id=T3FKjN4p8d", "poster": "/media/PosterPDFs/NeurIPS%202023/73585.png?t=1701571799.385991", "slides": "https://nips.cc/virtual/2023/poster/73585", "video": "https://nips.cc/virtual/2023/poster/73585", "author_site": "Wenwen Zhang, Arvin Tashakori, Zenan Jiang, Amir Servati, Harishkumar Narayana, Saeid Soltanian, Rou Yi Yeap, Menghan Ma, Lauren Toy, Peyman Servati", "tldr": "", "abstract": "The kinematics of human movements and locomotion are closely linked to the activation and contractions of muscles. To investigate this, we present a multimodal dataset with benchmarks collected using a novel pair of Intelligent Knee Sleeves (Texavie MarsWear Knee Sleeves) for human pose estimation. Our system utilizes synchronized datasets that comprise time-series data from the Knee Sleeves and the corresponding ground truth labels from visualized motion capture camera system. We employ these to generate 3D human models solely based on the wearable data of individuals performing different activities. We demonstrate the effectiveness of this camera-free system and machine learning algorithms in the assessment of various movements and exercises, including extension to unseen exercises and individuals. The results show an average error of 7.21 degrees across all eight lower body joints when compared to the ground truth, indicating the effectiveness and reliability of the Knee Sleeve system for the prediction of different lower body joints beyond knees. The results enable human pose estimation in a seamless manner without being limited by visual occlusion or the field of view of cameras. Our results show the potential of multimodal wearable sensing in a variety of applications from home fitness to sports, healthcare, and physical rehabilitation focusing on pose and movement estimation.", "keywords": "Pose estimation;muscle activity;textile sensing;pressure sensor;motion track;wearable sensors;machine learning;joint angles;activity track;benchmark dataset;exercise monitoring;open-source multimodal dataset;3D human model", "primary_area": "", "supplementary_material": "/attachment/94df1a436f465cf578c965566f9d51a9085f5753.zip", "author": "Wenwen Zhang;Arvin Tashakori;Zenan Jiang;Amir Servati;Harishkumar Narayana;Saeid Soltanian;Rou Yi Yeap;Menghan Ma;Lauren Toy;Peyman Servati", "authorids": "~Wenwen_Zhang4;~Arvin_Tashakori1;~Zenan_Jiang1;~Amir_Servati1;~Harishkumar_Narayana1;~Saeid_Soltanian1;~Rou_Yi_Yeap1;~Menghan_Ma2;~Lauren_Toy1;~Peyman_Servati1", "gender": "F;M;F;M;M;;F;;F;M", "homepage": "https://github.com/Zhang-Wenwen;http://arvintashakori.com;;;;;;https://www.linkedin.com/in/megan-ma-88ba42233/;;https://feel.ece.ubc.ca/", "dblp": ";;;;;;;;;168/4706", "google_scholar": ";8pFUPnQAAAAJ;https://scholar.google.ca/citations?hl=en;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;;;https://scholar.google.ca/citations?user=bl8U3JoAAAAJ", "orcid": ";;;0009-0006-4778-7455;;;;;;0000-0003-0146-1557", "linkedin": ";realrvn/;;;;;ryyeap/;;laurenjtoy/;", "or_profile": "~Wenwen_Zhang4;~Arvin_Tashakori1;~Zenan_Jiang1;~Amir_Servati1;~Harishkumar_Narayana1;~Saeid_Soltanian1;~Rou_Yi_Yeap1;~Menghan_Ma2;~Lauren_Toy1;~Peyman_Servati1", "aff": "University of British Columbia;University of British Columbia;University of British Columbia;University of British Columbia;University of British Columbia;University of British Columbia;Texavie;University of British Columbia;University of Waterloo;University of British Columbia", "aff_domain": "ubc.ca;ece.ubc.ca;ubc.ca;ubc.ca;ubc.ca;ubc.ca;texavie.com;ubc.ca;uwaterloo.ca;ubc.ca", "position": "MS student;PhD student;Researcher;Researcher;Researcher;Researcher;Researcher;MS student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nzhang2023intelligent,\ntitle={Intelligent Knee Sleeves: A Real-time Multimodal Dataset for 3D Lower Body Motion Estimation Using Smart Textile},\nauthor={Wenwen Zhang and Arvin Tashakori and Zenan Jiang and Amir Servati and Harishkumar Narayana and Saeid Soltanian and Rou Yi Yeap and Menghan Ma and Lauren Toy and Peyman Servati},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=T3FKjN4p8d}\n}", "github": "", "project": "", "reviewers": "9KjM;Hbu8;kVDB;4Wwh;z3mb", "pdf_size": 8079451, "rating": "6;6;6;7;8", "confidence": "4;4;5;2;3", "wc_summary_and_contributions": "57;81;91;11;66", "wc_strengths": "33;35;17;29;25", "wc_improvement": "135;25;203;1;36", "wc_limitations": "29;25;7;4;99", "wc_correctness": "1;8;5;6;48", "wc_clarity": "14;5;12;1;14", "wc_relation_to_prior_work": "30;6;23;1;10", "wc_documentation": "1;5;1;6;10", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "301;191;360;60;309", "wc_reply_reviewers": "94;0;0;0;0", "wc_reply_authors": "1366;304;777;70;633", "reply_reviewers": "1;0;0;0;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "wc_summary_and_contributions_avg": [ 61.2, 27.715699522112015 ], "wc_strengths_avg": [ 27.8, 6.3999999999999995 ], "wc_improvement_avg": [ 80.0, 76.62375610735877 ], "wc_limitations_avg": [ 32.8, 34.504492461127434 ], "wc_correctness_avg": [ 13.6, 17.350504315437057 ], "wc_clarity_avg": [ 9.2, 5.2687759489277965 ], "wc_relation_to_prior_work_avg": [ 14.0, 10.825894882179487 ], "wc_documentation_avg": [ 4.6, 3.3823069050575527 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 244.2, 107.32641799668896 ], "wc_reply_reviewers_avg": [ 18.8, 37.60000000000001 ], "wc_reply_authors_avg": [ 630.0, 443.4388345645879 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.6864064729836441, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5732513627761522846&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "ubc.ca;ece.ubc.ca;ubc.ca;ubc.ca;ubc.ca;ubc.ca;texavie.com;ubc.ca;uwaterloo.ca;ubc.ca", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;1;0;2;0", "aff_unique_norm": "University of British Columbia;Texavie;University of Waterloo", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ubc.ca;;https://uwaterloo.ca", "aff_unique_abbr": "UBC;;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "Canada;" }, { "title": "PUCA: Patch-Unshuffle and Channel Attention for Enhanced Self-Supervised Image Denoising", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71574", "id": "T3SstRu5fq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3d226fb8fbd6ee6ec70d0427f1319707-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=T3SstRu5fq", "openreview": "https://openreview.net/forum?id=T3SstRu5fq", "poster": "/media/PosterPDFs/NeurIPS%202023/71574.png?t=1701321853.7325077", "slides": "https://nips.cc/virtual/2023/poster/71574", "video": "https://nips.cc/virtual/2023/poster/71574", "author_site": "Hyemi Jang, Junsung Park, Dahuin Jung, Jaihyun Lew, Ho Bae, Sungroh Yoon", "tldr": "", "abstract": "Although supervised image denoising networks have shown remarkable performance on synthesized noisy images, they often fail in practice due to the difference between real and synthesized noise. Since clean-noisy image pairs from the real world are extremely costly to gather, self-supervised learning, which utilizes noisy input itself as a target, has been studied. To prevent a self-supervised denoising model from learning identical mapping, each output pixel should not be influenced by its corresponding input pixel; This requirement is known as J-invariance. Blind-spot networks (BSNs) have been a prevalent choice to ensure J-invariance in self-supervised image denoising. However, constructing variations of BSNs by injecting additional operations such as downsampling can expose blinded information, thereby violating J-invariance. Consequently, convolutions designed specifically for BSNs have been allowed only, limiting architectural flexibility. To overcome this limitation, we propose PUCA, a novel J-invariant U-Net architecture, for self-supervised denoising. PUCA leverages patch-unshuffle/shuffle to dramatically expand receptive fields while maintaining J-invariance and dilated attention blocks (DABs) for global context incorporation. Experimental results demonstrate that PUCA achieves state-of-the-art performance, outperforming existing methods in self-supervised image denoising.", "keywords": "self-supervised image denoising;low-level vision", "primary_area": "", "supplementary_material": "/attachment/a5f58884bc792a39df675593f48f7a8908565c8c.pdf", "author": "Hyemi Jang;Junsung Park;Dahuin Jung;Jaihyun Lew;Ho Bae;Sungroh Yoon", "authorids": "~Hyemi_Jang1;~Junsung_Park1;~Dahuin_Jung2;~Jaihyun_Lew1;~Ho_Bae1;~Sungroh_Yoon1", "gender": ";M;F;M;M;", "homepage": "http://data.snu.ac.kr;http://data.snu.ac.kr/;https://hai.ssu.ac.kr/;;https://www.spai.co.kr;http://ailab.snu.ac.kr", "dblp": "224/0270;;224/0158;306/8963;199/1782;99/1474", "google_scholar": ";;https://scholar.google.co.kr/citations?user=wleS-UQAAAAJ;LeF2vTkAAAAJ;https://scholar.google.com/citations?hl=en;Bphl_fIAAAAJ", "orcid": "0000-0002-7736-0528;;;0000-0003-3934-2879;0000-0002-5238-3547;0000-0002-2367-197X", "linkedin": ";;;;;", "or_profile": "~Hyemi_Jang1;~Junsung_Park1;~Dahuin_Jung2;~Jaihyun_Lew1;~Ho_Bae1;~Sungroh_Yoon1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University;Ewha Womans University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;ewha.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\njang2023puca,\ntitle={{PUCA}: Patch-Unshuffle and Channel Attention for Enhanced Self-Supervised Image Denoising},\nauthor={Hyemi Jang and Junsung Park and Dahuin Jung and Jaihyun Lew and Ho Bae and Sungroh Yoon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=T3SstRu5fq}\n}", "github": "", "project": "", "reviewers": "DQiP;daq5;3t6u;jJKN;nv4m", "pdf_size": 3021409, "rating": "6;6;7;7;7", "confidence": "4;4;5;3;3", "soundness": "3;3;3;3;4", "novelty": "3;2;2;3;3", "presentation": "3;3;3;3;4", "wc_summary": "83;48;61;79;64", "wc_strengths": "62;58;57;34;133", "wc_weaknesses": "175;539;148;41;3", "wc_questions": "91;65;13;62;43", "wc_limitations": "10;16;12;5;11", "wc_review": "421;726;291;221;254", "wc_reply_reviewers": "99;77;16;12;15", "wc_reply_authors": "141;105;64;37;42", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 67.0, 12.696456198483101 ], "wc_strengths_avg": [ 68.8, 33.570224902434 ], "wc_weaknesses_avg": [ 181.2, 190.03831192683228 ], "wc_questions_avg": [ 54.8, 25.895173295423223 ], "wc_limitations_avg": [ 10.8, 3.54400902933387 ], "wc_review_avg": [ 382.6, 184.63000839516855 ], "wc_reply_reviewers_avg": [ 43.8, 36.77716682943372 ], "wc_reply_authors_avg": [ 77.8, 39.675685249280825 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.21821789023599236, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11426396062789402592&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 6, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;ewha.ac.kr;snu.ac.kr", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Seoul National University;Ewha Womans University", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;http://www.ewha.ac.kr", "aff_unique_abbr": "SNU;Ewha", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "T47mUw8pW4", "title": "The medial axis of closed bounded sets is Lipschitz stable with respect to the Hausdorff distance under ambient diffeomorphisms", "track": "main", "status": "Reject", "tldr": "", "abstract": "We prove that the medial axis of closed sets is Hausdorff stable in the following sense: \nLet $\\mathcal{S} \\subseteq \\mathbb{R}^d$ be a fixed closed set that contains a bounding sphere. Consider the space of $C^{1,1}$~diffeomorphisms of $\\mathbb{R}^d$ to itself, which keep the bounding sphere invariant. \nThe map from this space of diffeomorphisms (endowed with a Banach norm) to the space of closed subsets of $\\mathbb{R}^d$ (endowed with the Hausdorff distance), mapping a diffeomorphism $F$ to the closure of the medial axis of $F(\\mathcal{S})$, is Lipschitz.\n\nThis extends a previous stability result of Chazal and Soufflet on the stability of the medial axis of $C^2$~manifolds under $C^2$ ambient diffeomorphisms. ", "keywords": "Medial axis;Hausdorff distance;Lipschitz continuity", "primary_area": "", "supplementary_material": "/attachment/a1c29e15ab54069f9ccc510b0b79ed0e2996480b.pdf", "author": "Hana Dal Poz Kourimska;Andr\u00e9 Lieutier;Mathijs Wintraecken", "authorids": "~Hana_Dal_Poz_Kourimska1;~Andr\u00e9_Lieutier1;~Mathijs_Wintraecken1", "gender": "F;M;M", "homepage": "https://pub.ista.ac.at/~hkourims/;;", "dblp": "322/9054;13/5075;147/5373", "google_scholar": ";gCVwHtwAAAAJ;", "orcid": "0000-0001-7841-0091;0000-0001-9517-4641;0000-0002-7472-2220", "linkedin": ";;", "or_profile": "~Hana_Dal_Poz_Kourimska1;~Andr\u00e9_Lieutier1;~Mathijs_Wintraecken1", "aff": "Institute of Science and Technology;none;Institute of Science and Technology", "aff_domain": "ist.ac.at;gmail.com;ist.ac.at", "position": "Postdoc;Researcher;Postdoc", "bibtex": "@misc{\nkourimska2023the,\ntitle={The medial axis of closed bounded sets is Lipschitz stable with respect to the Hausdorff distance under ambient diffeomorphisms},\nauthor={Hana Dal Poz Kourimska and Andr{\\'e} Lieutier and Mathijs Wintraecken},\nyear={2023},\nurl={https://openreview.net/forum?id=T47mUw8pW4}\n}", "github": "", "project": "", "reviewers": "WGuX;Ynrq;D995;JoRT;zm1A;wGEh", "site": "https://openreview.net/forum?id=T47mUw8pW4", "pdf_size": 1064658, "rating": "3;4;4;6;6;7", "confidence": "2;1;1;4;4;3", "soundness": "3;3;3;3;3;3", "novelty": "2;2;3;3;3;3", "presentation": "3;2;3;3;3;3", "wc_summary": "56;126;62;45;127;102", "wc_strengths": "20;39;66;85;60;40", "wc_weaknesses": "124;247;65;29;17;36", "wc_questions": "23;104;19;89;135;202", "wc_limitations": "1;1;1;34;45;1", "wc_review": "224;517;213;282;384;381", "wc_reply_reviewers": "0;28;0;25;0;16", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "0;1;0;1;0;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 2.5, 1.2583057392117916 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 86.33333333333333, 33.39993346633426 ], "wc_strengths_avg": [ 51.666666666666664, 21.15550887016324 ], "wc_weaknesses_avg": [ 86.33333333333333, 79.9117568877627 ], "wc_questions_avg": [ 95.33333333333333, 63.41047933022498 ], "wc_limitations_avg": [ 13.833333333333334, 18.424771972054963 ], "wc_review_avg": [ 333.5, 106.1771318756225 ], "wc_reply_reviewers_avg": [ 11.5, 12.05197079319395 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7492686492653552, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14920629310502708260&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Institute of Science and Technology;", "aff_unique_dep": ";", "aff_unique_url": ";", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "", "aff_country_unique": "" }, { "title": "Classical Simulation of Quantum Circuits: Parallel Environments and Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73584", "id": "T5ArxPU3Oq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d41b70011dd21ec3de5e019302279551-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=T5ArxPU3Oq", "openreview": "https://openreview.net/forum?id=T5ArxPU3Oq", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73584", "video": "https://nips.cc/virtual/2023/poster/73584", "author_site": "Xiao-Yang Liu, Zeliang Zhang", "tldr": "", "abstract": "Google's quantum supremacy announcement has received broad questions from academia and industry due to the debatable estimate of 10,000 years' running time for the classical simulation task on the Summit supercomputer. Has quantum supremacy already come? Or will it come in one or two decades later? To avoid hasty advertisements of quantum supremacy by tech giants or quantum startups and eliminate the cost of dedicating a team to the classical simulation task, we advocate an open-source approach to maintain a trustable benchmark performance. In this paper, we take a reinforcement learning approach for the classical simulation of quantum circuits and demonstrate its great potential by reporting an estimated simulation time of less than 4 days, a speedup of 5.40x over the state-of-the-art method. Specifically, we formulate the classical simulation task as a tensor network contraction ordering problem using the K-spin Ising model and employ a novel Hamiltonina-based reinforcement learning algorithm. Then, we establish standard criteria to evaluate the performance of classical simulation of quantum circuits. We develop a dozen of massively parallel environments to simulate quantum circuits. We open-source our parallel gym environments and benchmarks. We hope the AI/ML community and quantum physics community will collaborate to maintain reference curves for validating an unequivocal first demonstration of empirical quantum supremacy.", "keywords": "Quantum circuits; tensor network contraction; reinforcement learning; classical simulation", "primary_area": "", "supplementary_material": "/attachment/98da7c2e9536e87dd9bf3950b23fa58efe7cb452.pdf", "author": "Xiao-Yang Liu;Zeliang Zhang", "authorids": "~Xiao-Yang_Liu1;~Zeliang_Zhang1", "gender": "M;M", "homepage": "http://www.tensorlet.org/publications/;https://github.com/ZhangAIPI", "dblp": "125/9849;219/9383", "google_scholar": "https://scholar.google.com/citations?hl=en;7nLfsSgAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Xiao-Yang_Liu1;~Zeliang_Zhang1", "aff": "Columbia University;University of Rochester", "aff_domain": "columbia.edu;rochester.edu", "position": "PhD student;PhD student", "bibtex": "@inproceedings{\nliu2023classical,\ntitle={Classical Simulation of Quantum Circuits: Parallel Environments and Benchmark},\nauthor={Xiao-Yang Liu and Zeliang Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=T5ArxPU3Oq}\n}", "github": "", "project": "", "reviewers": "YFU3;K8x3;m3Yr;m3Yv", "pdf_size": 1575074, "rating": "6;6;7;7", "confidence": "3;3;3;4", "wc_summary_and_contributions": "138;194;39;44", "wc_strengths": "47;19;142;101", "wc_improvement": "76;69;102;127", "wc_limitations": "17;67;1;32", "wc_correctness": "62;7;1;8", "wc_clarity": "6;7;1;23", "wc_relation_to_prior_work": "28;5;1;14", "wc_documentation": "52;14;1;2", "wc_additional_feedback": "1;1;1;1", "wc_review": "427;383;289;352", "wc_reply_reviewers": "169;0;0;0", "wc_reply_authors": "1389;1323;557;636", "reply_reviewers": "2;0;0;0", "reply_authors": "3;3;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 103.75, 65.34667168264961 ], "wc_strengths_avg": [ 77.25, 47.604490334421186 ], "wc_improvement_avg": [ 93.5, 22.91833327273168 ], "wc_limitations_avg": [ 29.25, 24.39646490785089 ], "wc_correctness_avg": [ 19.5, 24.682990094394967 ], "wc_clarity_avg": [ 9.25, 8.257572282456872 ], "wc_relation_to_prior_work_avg": [ 12.0, 10.36822067666386 ], "wc_documentation_avg": [ 17.25, 20.7047699818182 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 362.75, 50.231339818882 ], "wc_reply_reviewers_avg": [ 42.25, 73.17914661978507 ], "wc_reply_authors_avg": [ 976.25, 381.490088337823 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1102021925853182905&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "columbia.edu;rochester.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Columbia University;University of Rochester", "aff_unique_dep": ";", "aff_unique_url": "https://www.columbia.edu;https://www.rochester.edu", "aff_unique_abbr": "Columbia;U of R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "UNSSOR: Unsupervised Neural Speech Separation by Leveraging Over-determined Training Mixtures", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71573", "id": "T5h69frFF7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6b44765c9201730a27f7931afb4d7434-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=T5h69frFF7", "openreview": "https://openreview.net/forum?id=T5h69frFF7", "poster": "/media/PosterPDFs/NeurIPS%202023/71573.png?t=1701555604.3128252", "slides": "https://nips.cc/virtual/2023/poster/71573", "video": "https://nips.cc/virtual/2023/poster/71573", "author_site": "Zhong-Qiu Wang, Shinji Watanabe", "tldr": "", "abstract": "In reverberant conditions with multiple concurrent speakers, each microphone acquires a mixture signal of multiple speakers at a different location. In over-determined conditions where the microphones out-number speakers, we can narrow down the solutions to speaker images and realize unsupervised speech separation by leveraging each mixture signal as a constraint (i.e., the estimated speaker images at a microphone should add up to the mixture). Equipped with this insight, we propose UNSSOR, an algorithm for $\\underline{u}$nsupervised $\\underline{n}$eural $\\underline{s}$peech $\\underline{s}$eparation by leveraging $\\underline{o}$ver-determined training mixtu$\\underline{r}$es. At each training step, we feed an input mixture to a deep neural network (DNN) to produce an intermediate estimate for each speaker, linearly filter the estimates, and optimize a loss so that, at each microphone, the filtered estimates of all the speakers can add up to the mixture to satisfy the above constraint. We show that this loss can promote unsupervised separation of speakers. The linear filters are computed in each sub-band based on the mixture and DNN estimates through the forward convolutive prediction (FCP) algorithm. To address the frequency permutation problem incurred by using sub-band FCP, a loss term based on minimizing intra-source magnitude scattering is proposed. Although UNSSOR requires over-determined training mixtures, we can train DNNs to achieve under-determined separation (e.g., unsupervised monaural speech separation). Evaluation results on two-speaker separation in reverberant conditions show the effectiveness and potential of UNSSOR.", "keywords": "Speech separation;microphone array processing;deep learning", "primary_area": "", "supplementary_material": "", "author": "Zhong-Qiu Wang;Shinji Watanabe", "authorids": "~Zhong-Qiu_Wang1;~Shinji_Watanabe1", "gender": "M;M", "homepage": "http://zqwang7.github.io/;https://sites.google.com/view/shinjiwatanabe", "dblp": "28/9635-1;39/3245-1", "google_scholar": "fGUzTN8AAAAJ;U5xRA6QAAAAJ", "orcid": ";0000-0002-5970-8631", "linkedin": ";shinji-watanabe-82533520", "or_profile": "~Zhong-Qiu_Wang1;~Shinji_Watanabe1", "aff": "Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu", "position": "Postdoc;Associate Professor", "bibtex": "@inproceedings{\nwang2023unssor,\ntitle={{UNSSOR}: Unsupervised Neural Speech Separation by Leveraging Over-determined Training Mixtures},\nauthor={Zhong-Qiu Wang and Shinji Watanabe},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=T5h69frFF7}\n}", "github": "", "project": "", "reviewers": "ux7k;XDQW;XWBZ;jddy;2uqn", "pdf_size": 4288604, "rating": "6;6;7;7;7", "confidence": "5;5;4;4;5", "soundness": "3;4;3;3;3", "novelty": "2;3;4;3;3", "presentation": "2;4;4;4;3", "wc_summary": "201;166;132;310;62", "wc_strengths": "77;71;523;203;46", "wc_weaknesses": "671;352;188;230;30", "wc_questions": "77;173;37;248;270", "wc_limitations": "9;4;7;64;95", "wc_review": "1035;766;887;1055;503", "wc_reply_reviewers": "88;0;28;0;0", "wc_reply_authors": "658;0;0;0;0", "reply_reviewers": "2;0;1;0;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 174.2, 81.9717024344377 ], "wc_strengths_avg": [ 184.0, 178.06964929487563 ], "wc_weaknesses_avg": [ 294.2, 214.71879284310444 ], "wc_questions_avg": [ 161.0, 91.67987783586975 ], "wc_limitations_avg": [ 35.8, 37.03727851773129 ], "wc_review_avg": [ 849.2, 202.52446765761414 ], "wc_reply_reviewers_avg": [ 23.2, 34.166650406500196 ], "wc_reply_authors_avg": [ 131.6, 263.2 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6666666666666665, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4320512466477303582&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "cmu.edu;cmu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Mass-Producing Failures of Multimodal Systems with Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71572", "id": "T6iiOqsGOh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5d570ed1708bbe19cb60f7a7aff60575-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=T6iiOqsGOh", "openreview": "https://openreview.net/forum?id=T6iiOqsGOh", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71572", "video": "https://nips.cc/virtual/2023/poster/71572", "author_site": "Shengbang Tong, Erik Jones, Jacob Steinhardt", "tldr": "", "abstract": "Deployed multimodal models can fail in ways that evaluators did not anticipate. In order to find these failures before deployment, we introduce MultiMon, a system that automatically identifies systematic failures---generalizable, natural-language descriptions that describe categories of individual failures. To uncover systematic failures, MultiMon scrapes for examples of erroneous agreement: inputs that produce the same output, but should not. It then prompts a language model to identify common categories and describe them in natural language. \nWe use MultiMon to find 14 systematic failures (e.g.\"ignores quantifiers'') of the CLIP text-encoder, each comprising hundreds of distinct inputs (e.g.\"a shelf with a few/many books''). Because CLIP is the backbone for most state-of-the-art multimodal models, these inputs produce failures in Midjourney 5.1, DALL-E, VideoFusion, and others. MultiMon can also steer towards failures relevant to specific use cases, such as self-driving cars. We see MultiMon as a step towards evaluation that autonomously explores the long-tail of potential system failures.", "keywords": "safety;red-teaming;robustness;explainability;failures;multimodal models;vision-language;natural-language explanations", "primary_area": "", "supplementary_material": "/attachment/9aeddaf385c94646449025aeeb5da231b3ac73bb.zip", "author": "Shengbang Tong;Erik Jones;Jacob Steinhardt", "authorids": "~Shengbang_Tong1;~Erik_Jones3;~Jacob_Steinhardt1", "gender": "M;M;", "homepage": "https://tsb0601.github.io/petertongsb/;http://people.eecs.berkeley.edu/~erjones/;", "dblp": "306/1406;264/5304;35/10625", "google_scholar": "https://scholar.google.com/citations?hl=en;_-CU2CsAAAAJ;", "orcid": ";;", "linkedin": ";erik-jones-879239133/;", "or_profile": "~Shengbang_Tong1;~Erik_Jones3;~Jacob_Steinhardt1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ntong2023massproducing,\ntitle={Mass-Producing Failures of Multimodal Systems with Language Models},\nauthor={Shengbang Tong and Erik Jones and Jacob Steinhardt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=T6iiOqsGOh}\n}", "github": "", "project": "", "reviewers": "dXH6;VAzu;ATN5;vAM5", "pdf_size": 2846552, "rating": "6;6;7;7", "confidence": "4;2;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;4;4;4", "wc_summary": "45;146;174;79", "wc_strengths": "132;65;71;71", "wc_weaknesses": "80;98;177;62", "wc_questions": "74;156;84;30", "wc_limitations": "88;67;8;20", "wc_review": "419;532;514;262", "wc_reply_reviewers": "0;89;40;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 111.0, 51.41497836234107 ], "wc_strengths_avg": [ 84.75, 27.38955092731533 ], "wc_weaknesses_avg": [ 104.25, 43.88835266901687 ], "wc_questions_avg": [ 86.0, 45.232731511594565 ], "wc_limitations_avg": [ 45.75, 32.88141572377929 ], "wc_review_avg": [ 431.75, 106.99620320366513 ], "wc_reply_reviewers_avg": [ 32.25, 36.608571400697954 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3018489434511551522&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Curve Your Enthusiasm: Concurvity Regularization in Differentiable Generalized Additive Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71571", "id": "TAIYBdRb3C", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3c6696d70d364337cf98dcb7c652a770-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TAIYBdRb3C", "openreview": "https://openreview.net/forum?id=TAIYBdRb3C", "poster": "/media/PosterPDFs/NeurIPS%202023/71571.png?t=1701718357.0889661", "slides": "https://nips.cc/virtual/2023/poster/71571", "video": "https://nips.cc/virtual/2023/poster/71571", "author_site": "Julien Siems, Konstantin Ditschuneit, Winfried Ripken, Alma Lindborg, Maximilian Schambach, Johannes Otterbach, Martin Genzel", "tldr": "", "abstract": "Generalized Additive Models (GAMs) have recently experienced a resurgence in popularity due to their interpretability, which arises from expressing the target value as a sum of non-linear transformations of the features. Despite the current enthusiasm for GAMs, their susceptibility to concurvity \u2014 i.e., (possibly non-linear) dependencies between the features \u2014 has hitherto been largely overlooked. Here, we demonstrate how concurvity can severly impair the interpretability of GAMs and propose a remedy: a conceptually simple, yet effective regularizer which penalizes pairwise correlations of the non-linearly transformed feature variables. This procedure is applicable to any differentiable additive model, such as Neural Additive Models or NeuralProphet, and enhances interpretability by eliminating ambiguities due to self-canceling feature contributions. \nWe validate the effectiveness of our regularizer in experiments on synthetic as well as real-world datasets for time-series and tabular data. Our experiments show that concurvity in GAMs can be reduced without significantly compromising prediction quality, improving interpretability and reducing variance in the feature importances.", "keywords": "Interpretable Machine Learning;Generalized Additive Models;Concurvity;Multicollinearity;Regularization;Time-Series Forecasting;Interpretability", "primary_area": "", "supplementary_material": "", "author": "Julien Niklas Siems;Konstantin Ditschuneit;Winfried Ripken;Alma Lindborg;Maximilian Schambach;Johannes Otterbach;Martin Genzel", "authorids": "~Julien_Niklas_Siems1;~Konstantin_Ditschuneit1;~Winfried_Ripken1;~Alma_Lindborg1;~Maximilian_Schambach1;~Johannes_Otterbach1;~Martin_Genzel1", "gender": "M;M;F;M;M;M;M", "homepage": "https://juliensiems.github.io;https://konstantin.ditschuneit.de;;https://maxschambach.github.io/;https://jotterbach.github.io;https://www.martingenzel.com;", "dblp": "257/3075;;;255/7753;292/3909;157/3631;177/2014", "google_scholar": "https://scholar.google.de/citations?user=rKgTTh8AAAAJ;3xpwWbkAAAAJ;IBrCbDoAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;ZSfT6UAAAAAJ;wAVKdLcAAAAJ", "orcid": ";0000-0002-1120-1030;;0000-0002-4927-266X;0000-0002-7404-2321;0000-0002-2133-2579;", "linkedin": "julien-niklas-siems/;konstantin-ditschuneit-527379120/;alma-lindborg/;;jotterbach/;martin-genzel-512631202/;", "or_profile": "~Julien_Niklas_Siems1;~Konstantin_Ditschuneit1;~Alma_Lindborg1;~Maximilian_Schambach1;~Johannes_Otterbach1;~Martin_Genzel1;~Winfried_L\u00f6tzsch1", "aff": "University of Freiburg;Merantix Momentum GmbH;Merantix Momentum;Merantix Momentum;Merantix Momentum;Helmholtz-Zentrum Berlin;Merantix Momentum", "aff_domain": "cs.uni-freiburg.de;merantix-momentum.com;merantix-momentum.com;merantix.com;merantix-momentum.com;helmholtz-berlin.de;merantix.com", "position": "PhD student;Researcher;Researcher;Senior Machine Learning Researcher;Principal Researcher;Postdoc;ML Researcher", "bibtex": "@inproceedings{\nsiems2023curve,\ntitle={Curve Your Enthusiasm: Concurvity Regularization in Differentiable Generalized Additive Models},\nauthor={Julien Niklas Siems and Konstantin Ditschuneit and Winfried Ripken and Alma Lindborg and Maximilian Schambach and Johannes Otterbach and Martin Genzel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TAIYBdRb3C}\n}", "github": "", "project": "", "reviewers": "5zAd;aFW2;arAb;17dg", "pdf_size": 11363530, "rating": "4;5;6;6", "confidence": "5;4;4;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "4;3;2;2", "wc_summary": "21;87;46;100", "wc_strengths": "74;36;46;41", "wc_weaknesses": "329;270;60;65", "wc_questions": "73;713;334;100", "wc_limitations": "1;37;1;2", "wc_review": "498;1143;487;308", "wc_reply_reviewers": "0;468;69;14", "wc_reply_authors": "31;1282;253;16", "reply_reviewers": "0;2;1;1", "reply_authors": "2;4;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 63.5, 31.610915836147488 ], "wc_strengths_avg": [ 49.25, 14.720309100015529 ], "wc_weaknesses_avg": [ 181.0, 120.33494920429393 ], "wc_questions_avg": [ 305.0, 256.4926899543143 ], "wc_limitations_avg": [ 10.25, 15.449514555480375 ], "wc_review_avg": [ 609.0, 317.39643980359955 ], "wc_reply_reviewers_avg": [ 137.75, 192.40630836851477 ], "wc_reply_authors_avg": [ 395.5, 520.3530051801373 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14869577336083290901&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 7, "email": "cs.uni-freiburg.de;merantix-momentum.com;merantix-momentum.com;merantix.com;merantix-momentum.com;helmholtz-berlin.de;merantix.com", "author_num": 7, "aff_unique_index": "0;1;2;2;2;3;2", "aff_unique_norm": "University of Freiburg;Merantix Momentum GmbH;Merantix;Helmholtz-Zentrum Berlin", "aff_unique_dep": ";;Momentum;", "aff_unique_url": "https://www.uni-freiburg.de;;https://www.merantix.com;https://www.helmholtz-berlin.de", "aff_unique_abbr": "UoF;;Merantix;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "GEQ: Gaussian Kernel Inspired Equilibrium Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71570", "id": "TBOfDCX4Gz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/79cab89b43ac21c6941ad9735df95d30-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TBOfDCX4Gz", "openreview": "https://openreview.net/forum?id=TBOfDCX4Gz", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71570", "video": "https://nips.cc/virtual/2023/poster/71570", "author_site": "Mingjie Li, Yisen Wang, Zhouchen Lin", "tldr": "", "abstract": "Despite the connection established by optimization-induced deep equilibrium models (OptEqs) between their output and the underlying hidden optimization problems, the performance of it along with its related works is still not good enough especially when compared to deep networks. One key factor responsible for this performance limitation is the use of linear kernels to extract features in these models. To address this issue, we propose a novel approach by replacing its linear kernel with a new function that can readily capture nonlinear feature dependencies in the input data. Drawing inspiration from classical machine learning algorithms, we introduce Gaussian kernels as the alternative function and then propose our new equilibrium model, which we refer to as GEQ. By leveraging Gaussian kernels, GEQ can effectively extract the nonlinear information embedded within the input features, surpassing the performance of the original OptEqs. Moreover, GEQ can be perceived as a weight-tied neural network with infinite width and depth. GEQ also enjoys better theoretical properties and improved overall performance. Additionally, our GEQ exhibits enhanced stability when confronted with various samples. We further substantiate the effectiveness and stability of GEQ through a series of comprehensive experiments.", "keywords": "equilibirum models;neural networks", "primary_area": "", "supplementary_material": "", "author": "Mingjie Li;Yisen Wang;Zhouchen Lin", "authorids": "~Mingjie_Li1;~Yisen_Wang1;~Zhouchen_Lin1", "gender": "M;M;M", "homepage": "https://mingjieli0111.github.io/;https://yisenwang.github.io/;https://zhouchenlin.github.io", "dblp": ";172/1346-1;l/ZhouchenLin", "google_scholar": ";uMWPDboAAAAJ;https://scholar.google.com.tw/citations?user=TanjFwoAAAAJ", "orcid": "0000-0002-1588-2654;;0000-0003-1493-7569", "linkedin": ";;", "or_profile": "~Mingjie_Li1;~Yisen_Wang1;~Zhouchen_Lin1", "aff": "Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\nli2023geq,\ntitle={{GEQ}: Gaussian Kernel Inspired Equilibrium Models},\nauthor={Mingjie Li and Yisen Wang and Zhouchen Lin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TBOfDCX4Gz}\n}", "github": "", "project": "", "reviewers": "CdMn;sG64;8r48;p1Jk", "pdf_size": 633515, "rating": "4;6;6;7", "confidence": "3;3;4;3", "soundness": "1;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "45;88;34;42", "wc_strengths": "19;64;59;73", "wc_weaknesses": "370;57;235;205", "wc_questions": "96;76;54;89", "wc_limitations": "27;9;2;1", "wc_review": "557;294;384;410", "wc_reply_reviewers": "174;40;60;227", "wc_reply_authors": "367;0;513;341", "reply_reviewers": "2;1;1;2", "reply_authors": "4;1;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 52.25, 21.0282548015759 ], "wc_strengths_avg": [ 53.75, 20.680606857633556 ], "wc_weaknesses_avg": [ 216.75, 111.21684899330677 ], "wc_questions_avg": [ 78.75, 15.990231392947383 ], "wc_limitations_avg": [ 9.75, 10.425329730996522 ], "wc_review_avg": [ 411.25, 94.51818608077495 ], "wc_reply_reviewers_avg": [ 125.25, 77.86968280402843 ], "wc_reply_authors_avg": [ 305.25, 188.03506986729894 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7175795232588527957&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "REx: Data-Free Residual Quantization Error Expansion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71569", "id": "TDS3kqRteY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8c96b559340daa7bb29f56ccfbbc9c2f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TDS3kqRteY", "openreview": "https://openreview.net/forum?id=TDS3kqRteY", "poster": "/media/PosterPDFs/NeurIPS%202023/71569.png?t=1701893126.894829", "slides": "https://nips.cc/virtual/2023/poster/71569", "video": "https://nips.cc/virtual/2023/poster/71569", "author_site": "Edouard YVINEC, Arnaud Dapogny, Matthieu Cord, Kevin Bailly", "tldr": "", "abstract": "Deep neural networks (DNNs) are ubiquitous in computer vision and natural language processing, but suffer from high inference cost. This problem can be addressed by quantization, which consists in converting floating point operations into a lower bit-width format. With the growing concerns on privacy rights, we focus our efforts on data-free methods. However, such techniques suffer from their lack of adaptability to the target devices, as a hardware typically only supports specific bit widths. Thus, to adapt to a variety of devices, a quantization method shall be flexible enough to find good accuracy v.s. speed trade-offs for every bit width and target device. To achieve this, we propose REx, a quantization method that leverages residual error expansion, along with group sparsity. \nWe show experimentally that REx enables better trade-offs (in terms of accuracy given any target bit-width) on both convnets and transformers for computer vision, as well as NLP models. In particular, when applied to large language models, we show that REx elegantly solves the outlier problem that hinders state-of-the-art quantization methods.\nIn addition, REx is backed off by strong theoretical guarantees on the preservation of the predictive function of the original model. Lastly, we show that REx is agnostic to the quantization operator and can be used in combination with previous quantization work.", "keywords": "deep learning;quantization;compression;acceleration;data-free", "primary_area": "", "supplementary_material": "/attachment/9beea7389f32b84f6eb4ee026d239d891c7954ff.pdf", "author": "Edouard YVINEC;Arnaud Dapogny;Matthieu Cord;Kevin Bailly", "authorids": "~Edouard_YVINEC1;~Arnaud_Dapogny1;~Matthieu_Cord1;~Kevin_Bailly3", "gender": "M;M;M;M", "homepage": "https://edouardyvinec.netlify.app/;;https://cord.isir.upmc.fr/;https://sites.google.com/view/bailly/", "dblp": "263/2218.html;165/8156;68/3117;41/3712", "google_scholar": "https://scholar.google.fr/citations?user=q-J6Tz0AAAAJ;;SpAotDcAAAAJ;oy59G9AAAAAJ", "orcid": "0000-0002-4318-612X;;;", "linkedin": "edouard-yvinec-aa8333158/;;;", "or_profile": "~Edouard_YVINEC1;~Arnaud_Dapogny1;~Matthieu_Cord1;~Kevin_Bailly3", "aff": "Computer Science Lab - Pierre and Marie Curie University, Paris, France;datakalab;Sorbonne Universit\u00e9;Sorbonne University", "aff_domain": "upmc.fr;datakalab.com;isir.upmc.fr;sorbonne-universite.fr", "position": "PhD student;Principal Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nyvinec2023rex,\ntitle={{RE}x: Data-Free Residual Quantization Error Expansion},\nauthor={Edouard YVINEC and Arnaud Dapogny and Matthieu Cord and Kevin Bailly},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TDS3kqRteY}\n}", "github": "", "project": "", "reviewers": "m1LK;mdRP;E57B;d6mY", "pdf_size": 643246, "rating": "5;5;5;6", "confidence": "3;4;4;3", "soundness": "3;3;2;3", "novelty": "2;3;3;2", "presentation": "2;3;3;3", "wc_summary": "106;142;40;104", "wc_strengths": "40;90;17;79", "wc_weaknesses": "311;139;23;113", "wc_questions": "73;112;9;216", "wc_limitations": "1;75;16;18", "wc_review": "531;558;105;530", "wc_reply_reviewers": "219;26;236;78", "wc_reply_authors": "166;37;472;0", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 98.0, 36.742346141747674 ], "wc_strengths_avg": [ 56.5, 29.415132160165456 ], "wc_weaknesses_avg": [ 146.5, 104.27247959073381 ], "wc_questions_avg": [ 102.5, 75.14153312250157 ], "wc_limitations_avg": [ 27.5, 28.20017730440715 ], "wc_review_avg": [ 431.0, 188.55105409411001 ], "wc_reply_reviewers_avg": [ 139.75, 89.85648279339672 ], "wc_reply_authors_avg": [ 168.75, 185.60896395379186 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=577947050991789536&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "upmc.fr;datakalab.com;isir.upmc.fr;sorbonne-universite.fr", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Pierre and Marie Curie University;Datakalab;Sorbonne Universit\u00e9;Sorbonne University", "aff_unique_dep": "Computer Science Lab;;;", "aff_unique_url": "https://www.upmc.fr;;https://www.sorbonne-universite.fr;https://www.sorbonne.universite.fr", "aff_unique_abbr": "UPMC;;Sorbonne U;Sorbonne", "aff_campus_unique_index": "0", "aff_campus_unique": "Paris;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France;" }, { "title": "Uniform Convergence with Square-Root Lipschitz Loss", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71568", "id": "TEpRn67828", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2a50f08293e5f635655e8bec8f013d99-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TEpRn67828", "openreview": "https://openreview.net/forum?id=TEpRn67828", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71568", "video": "https://nips.cc/virtual/2023/poster/71568", "author_site": "Lijia Zhou, Zhen Dai, Frederic Koehler, Nati Srebro", "tldr": "", "abstract": "We establish generic uniform convergence guarantees for Gaussian data in terms of the Radamacher complexity of the hypothesis class and the Lipschitz constant of the square root of the scalar loss function. We show how these guarantees substantially generalize previous results based on smoothness (Lipschitz constant of the derivative), and allow us to handle the broader class of square-root-Lipschtz losses, which includes also non-smooth loss functions appropriate for studying phase retrieval and ReLU regression, as well as rederive and better understand \u201coptimistic rate\u201d and interpolation learning guarantees.", "keywords": "Uniform Convergence;Square-Root Lipschitz;Benign Overfitting;Minimal Norm Interpolation;Phase Retrieval;ReLU Regression;Matrix Sensing", "primary_area": "", "supplementary_material": "/attachment/f9919cd5b726cf6b33e5995162bfb61995444708.pdf", "author": "Lijia Zhou;Zhen Dai;Frederic Koehler;Nathan Srebro", "authorids": "~Lijia_Zhou1;~Zhen_Dai1;~Frederic_Koehler1;~Nathan_Srebro1", "gender": "M;M;;M", "homepage": "https://stat.uchicago.edu/people/profile/lijia-zhou/;https://sites.google.com/uchicago.edu/zhendaishomepage/home;https://frkoehle.github.io/;http://ttic.uchicago.edu/~nati/", "dblp": "90/5654;;132/1904;50/3633", "google_scholar": "ZTdI4mcAAAAJ;RDHmWxcAAAAJ;;https://scholar.google.com.tw/citations?user=ZnT-QpMAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Lijia_Zhou1;~Zhen_Dai1;~Frederic_Koehler1;~Nathan_Srebro1", "aff": "University of Chicago;University of Chicago;Stanford University;University of Chicago", "aff_domain": "uchicago.edu;uchicago.edu;stanford.edu;uchicago.edu", "position": "PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzhou2023uniform,\ntitle={Uniform Convergence with Square-Root Lipschitz Loss},\nauthor={Lijia Zhou and Zhen Dai and Frederic Koehler and Nathan Srebro},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TEpRn67828}\n}", "github": "", "project": "", "reviewers": "JChX;949a;K77B;Y88d;8w1y", "pdf_size": 464518, "rating": "4;4;6;7;7", "confidence": "3;2;3;1;3", "soundness": "3;3;4;4;3", "novelty": "3;2;3;3;2", "presentation": "2;3;3;4;2", "wc_summary": "25;117;39;138;194", "wc_strengths": "22;42;68;37;214", "wc_weaknesses": "227;208;52;87;305", "wc_questions": "8;3;2;51;129", "wc_limitations": "15;2;1;12;10", "wc_review": "297;372;162;325;852", "wc_reply_reviewers": "14;0;8;11;81", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 2.4, 0.8 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 102.6, 63.057434137459154 ], "wc_strengths_avg": [ 76.6, 70.28399533321935 ], "wc_weaknesses_avg": [ 175.8, 93.34109491536941 ], "wc_questions_avg": [ 38.6, 48.722068921588296 ], "wc_limitations_avg": [ 8.0, 5.549774770204643 ], "wc_review_avg": [ 401.6, 235.77667399469354 ], "wc_reply_reviewers_avg": [ 22.8, 29.471342012198903 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.22116293423234576, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1851674219705802144&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "uchicago.edu;uchicago.edu;stanford.edu;uchicago.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Chicago;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uchicago.edu;https://www.stanford.edu", "aff_unique_abbr": "UChicago;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "THDGuhN7LA", "title": "Boosting Semi-Supervised Few-Shot Object Detection with SoftER Teacher", "track": "main", "status": "Reject", "tldr": "", "abstract": "Few-shot object detection (FSOD) is an emerging problem aimed at detecting novel concepts from few exemplars. Existing approaches to FSOD assume abundant base labels to adapt to novel objects. This paper studies the task of semi-supervised FSOD by considering a realistic scenario in which both base and novel labels are simultaneously scarce. We explore the utility of unlabeled data and discover its remarkable ability to boost semi-supervised FSOD by way of region proposals. Motivated by this finding, we introduce SoftER Teacher, a robust detector combining pseudo-labeling with representation learning on region proposals, to harness unlabeled data for improved FSOD without relying on abundant labels. Extensive experiments show that SoftER Teacher surpasses the novel performance of a strong supervised detector using only 10% of required base labels, without experiencing catastrophic forgetting observed in prior approaches. Our work also sheds light on a potential relationship between semi-supervised and few-shot detection suggesting that a stronger semi-supervised detector leads to a more effective few-shot detector.", "keywords": "label-efficient object detection;semi-supervised few-shot object detection;generalized few-shot object detection", "primary_area": "", "supplementary_material": "/attachment/9dda27d46e2e09c725db552b8463a5c5993bfddd.zip", "author": "Phi Vu Tran", "authorids": "~Phi_Vu_Tran1", "gender": "", "homepage": "https://github.com/vuptran", "dblp": "", "google_scholar": "7f6XMyQAAAAJ", "orcid": "", "linkedin": "vuptran/", "or_profile": "~Phi_Vu_Tran1", "aff": "LexisNexis Risk Solutions", "aff_domain": "lexisnexisrisk.com", "position": "Machine Learning Scientist", "bibtex": "@misc{\ntran2023boosting,\ntitle={Boosting Semi-Supervised Few-Shot Object Detection with Soft{ER} Teacher},\nauthor={Phi Vu Tran},\nyear={2023},\nurl={https://openreview.net/forum?id=THDGuhN7LA}\n}", "github": "", "project": "", "reviewers": "F1pt;Qe4L;2E7Y;4p6Z", "site": "https://openreview.net/forum?id=THDGuhN7LA", "pdf_size": 2154278, "rating": "4;4;5;6", "confidence": "5;5;4;4", "soundness": "3;2;3;4", "novelty": "1;2;3;4", "presentation": "2;2;3;4", "wc_summary": "130;51;57;151", "wc_strengths": "19;38;97;39", "wc_weaknesses": "363;496;135;411", "wc_questions": "82;5;100;6", "wc_limitations": "1;5;12;1", "wc_review": "595;595;401;608", "wc_reply_reviewers": "96;0;56;0", "wc_reply_authors": "794;0;212;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 97.25, 43.93389921233944 ], "wc_strengths_avg": [ 48.25, 29.252136674096132 ], "wc_weaknesses_avg": [ 351.25, 133.6270462892898 ], "wc_questions_avg": [ 48.25, 43.222534631832964 ], "wc_limitations_avg": [ 4.75, 4.493050188902857 ], "wc_review_avg": [ 549.75, 86.04468315939108 ], "wc_reply_reviewers_avg": [ 38.0, 40.54626986542659 ], "wc_reply_authors_avg": [ 251.5, 324.9503808275965 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5658723268690111595&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "LexisNexis Risk Solutions", "aff_unique_dep": "", "aff_unique_url": "https://www.lexisnexis.com/risk-solutions", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "White-Box Transformers via Sparse Rate Reduction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71567", "id": "THfl8hdVxH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1e118ba9ee76c20df728b42a35fb4704-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=THfl8hdVxH", "openreview": "https://openreview.net/forum?id=THfl8hdVxH", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71567", "video": "https://nips.cc/virtual/2023/poster/71567", "author_site": "Yaodong Yu, Sam Buchanan, Druv Pai, Tianzhe Chu, Tianzhe Chu, Ziyang Wu, Shengbang Tong, Benjamin Haeffele, Yi Ma", "tldr": "", "abstract": "In this paper, we contend that the objective of representation learning is to compress and transform the distribution of the data, say sets of tokens, towards a mixture of low-dimensional Gaussian distributions supported on incoherent subspaces. The quality of the final representation can be measured by a unified objective function called sparse rate reduction. From this perspective, popular deep networks such as transformers can be naturally viewed as realizing iterative schemes to optimize this objective incrementally. Particularly, we show that the standard transformer block can be derived from alternating optimization on complementary parts of this objective: the multi-head self-attention operator can be viewed as a gradient descent step to compress the token sets by minimizing their lossy coding rate, and the subsequent multi-layer perceptron can be viewed as attempting to sparsify the representation of the tokens. This leads to a family of white-box transformer-like deep network architectures which are mathematically fully interpretable. Despite their simplicity, experiments show that these networks indeed learn to optimize the designed objective: they compress and sparsify representations of large-scale real-world vision datasets such as ImageNet, and achieve performance very close to thoroughly engineered transformers such as ViT. \nCode is at https://github.com/Ma-Lab-Berkeley/CRATE.", "keywords": "white-box deep neural networks;representation learning;transformer;sparse coding", "primary_area": "", "supplementary_material": "/attachment/c76d4e28ed757036e9c72787d94e89f734715496.pdf", "author": "Yaodong Yu;Sam Buchanan;Druv Pai;Tianzhe Chu;Ziyang Wu;Shengbang Tong;Benjamin David Haeffele;Yi Ma", "authorids": "~Yaodong_Yu4;~Sam_Buchanan1;~Druv_Pai1;~Tianzhe_Chu1;~Ziyang_Wu1;~Shengbang_Tong1;~Benjamin_David_Haeffele1;~Yi_Ma4", "gender": "M;M;M;M;;M;;M", "homepage": "https://yaodongyu.github.io;http://sdbuchanan.com;https://druvpai.github.io;https://tianzhechu.com;https://robinwu218.github.io/;https://tsb0601.github.io/petertongsb/;;http://people.eecs.berkeley.edu/~yima/", "dblp": ";226/5790;;348/8957;236/5238;306/1406;;", "google_scholar": "bZ9oyW8AAAAJ;5WT38A0AAAAJ;https://scholar.google.com/citations?hl=en;v6HphBcAAAAJ;9RAHYd0AAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.hk/citations?user=XqLiBQMAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;https://linkedin.com/in/druvpai;;;;;", "or_profile": "~Yaodong_Yu4;~Sam_Buchanan1;~Druv_Pai1;~Tianzhe_Chu1;~Ziyang_Wu1;~Shengbang_Tong1;~Benjamin_David_Haeffele1;~Yi_Ma4", "aff": "Electrical Engineering & Computer Science Department, University of California Berkeley;Toyota Technological Institute at Chicago;Electrical Engineering & Computer Science Department, University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;;University of California, Berkeley", "aff_domain": "eecs.berkeley.edu;ttic.edu;eecs.berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;;berkeley.edu", "position": "PhD student;Research Assistant Professor;MS student;Intern;PhD student;Undergrad student;;Full Professor", "bibtex": "@inproceedings{\nyu2023whitebox,\ntitle={White-Box Transformers via Sparse Rate Reduction},\nauthor={Yaodong Yu and Sam Buchanan and Druv Pai and Tianzhe Chu and Ziyang Wu and Shengbang Tong and Benjamin David Haeffele and Yi Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=THfl8hdVxH}\n}", "github": "", "project": "", "reviewers": "13ev;YxEu;vvYz;tJjQ;AAKz", "pdf_size": 4176350, "rating": "5;6;7;8;10", "confidence": "2;3;3;4;4", "soundness": "3;3;3;3;4", "novelty": "2;3;3;4;4", "presentation": "3;3;3;3;4", "wc_summary": "74;253;104;50;75", "wc_strengths": "61;191;135;108;17", "wc_weaknesses": "68;449;157;246;17", "wc_questions": "11;123;78;2;1", "wc_limitations": "1;139;11;2;1", "wc_review": "215;1155;485;408;111", "wc_reply_reviewers": "0;1145;27;21;0", "wc_reply_authors": "0;1907;42;46;0", "reply_reviewers": "0;2;1;1;0", "reply_authors": "1;5;2;2;1", "rating_avg": [ 7.2, 1.7204650534085253 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 111.2, 72.93668487119497 ], "wc_strengths_avg": [ 102.4, 59.91861146588763 ], "wc_weaknesses_avg": [ 187.4, 152.37138839034054 ], "wc_questions_avg": [ 43.0, 49.18129725820579 ], "wc_limitations_avg": [ 30.8, 54.230618657728776 ], "wc_review_avg": [ 474.8, 365.2628642498441 ], "wc_reply_reviewers_avg": [ 238.6, 453.3310490138525 ], "wc_reply_authors_avg": [ 399.0, 754.2577808680531 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.2, 1.469693845669907 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.90098616070174, "gs_citation": 87, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15364532811271216525&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "eecs.berkeley.edu;ttic.edu;eecs.berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;;berkeley.edu", "author_num": 8, "aff_unique_index": "0;1;0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley;Toyota Technological Institute at Chicago", "aff_unique_dep": "Electrical Engineering & Computer Science Department;", "aff_unique_url": "https://www.berkeley.edu;https://www.tti-chicago.org", "aff_unique_abbr": "UC Berkeley;TTI Chicago", "aff_campus_unique_index": "0;1;0;0;0;0;0", "aff_campus_unique": "Berkeley;Chicago", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "TKXMPtCniG", "title": "Accelerating Large Batch Training via Gradient Signal to Noise Ratio (GSNR)", "track": "main", "status": "Reject", "tldr": "", "abstract": "As models for nature language processing (NLP), computer vision (CV) and recommendation systems (RS) require surging computation, a large number of GPUs/TPUs are paralleled with a large batch (LB) to improve training throughput. Training such LB tasks often converges to sharp minimum and downgrades final precision. Adversarial learning (ConAdv) and LANS method scales ImageNet and BERT pretraining up to 96k batch size. In this work, we develop the variance reduced gradient descent technique (VRGD) based on the gradient signal to noise ratio (GSNR) and apply it onto popular optimizers such as SGD/Adam/LARS/LAMB. We carry out a theoretical analysis of VR-SGD's convergence rate to explain its fast training dynamics, and a generalization analysis to demonstrate its smaller generalization gap on LB training. Comprehensive experiments demonstrate that VRGD can remarkably accelerate training ($1.7\\sim 4 \\times$), narrow the generalization gap and improve final accuracy. We push the batch size limit of BERT pretraining up to 128k/64k and DLRM to 512k without noticeable accuracy loss. We improve ImageNet Top-1 accuracy at 96k by $0.52pp$ than LARS and significantly reduce generalization gap by $68.3$%.", "keywords": "large batch;optimizer", "primary_area": "", "supplementary_material": "/attachment/4619dfca5bf98b76f16eba418c11acfa4652e12e.pdf", "author": "GUO-QING JIANG;Jinlong Liu;Zixiang Ding;Lin Guo;Wei Lin", "authorids": "~GUO-QING_JIANG1;~Jinlong_Liu2;~Zixiang_Ding2;~Lin_Guo4;~Wei_Lin7", "gender": "M;M;;Not Specified;M", "homepage": "https://github.com/Ageliss;;;;", "dblp": ";;;;99/2649-22.html", "google_scholar": "kCBHjI4AAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;eLlnOd4AAAAJ;IAUsEFkAAAAJ", "orcid": ";;;;", "linkedin": ";;;;https://www.linkedin.cn/incareer/in/wei-lin-57731526", "or_profile": "~GUO-QING_JIANG1;~Jinlong_Liu2;~Zixiang_Ding2;~Lin_Guo4;~Wei_Lin7", "aff": "Meituan Group;;Meituan;;Meituan", "aff_domain": "meituan.com;;meituan.com;;meituan.com", "position": "Researcher;;Researcher;;Principal Researcher", "bibtex": "@misc{\njiang2023accelerating,\ntitle={Accelerating Large Batch Training via Gradient Signal to Noise Ratio ({GSNR})},\nauthor={GUO-QING JIANG and Jinlong Liu and Zixiang Ding and Lin Guo and Wei Lin},\nyear={2023},\nurl={https://openreview.net/forum?id=TKXMPtCniG}\n}", "github": "", "project": "", "reviewers": "cWbq;6iTd;FtDV;xC4Q;Xfpk", "site": "https://openreview.net/forum?id=TKXMPtCniG", "pdf_size": 2422416, "rating": "5;5;5;5;7", "confidence": "2;3;4;5;2", "soundness": "3;2;3;3;3", "novelty": "2;2;3;2;3", "presentation": "2;3;3;2;2", "wc_summary": "145;102;130;62;108", "wc_strengths": "26;90;56;39;18", "wc_weaknesses": "109;640;108;84;13", "wc_questions": "78;2;1;1;9", "wc_limitations": "38;45;1;1;2", "wc_review": "396;879;296;187;150", "wc_reply_reviewers": "28;551;70;201;12", "wc_reply_authors": "22;744;0;590;0", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;2;1;2;1", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 109.4, 28.267295590487606 ], "wc_strengths_avg": [ 45.8, 25.568730903195018 ], "wc_weaknesses_avg": [ 190.8, 227.30895274933627 ], "wc_questions_avg": [ 18.2, 30.049292836937113 ], "wc_limitations_avg": [ 17.4, 19.80504986108341 ], "wc_review_avg": [ 381.6, 263.2296335901412 ], "wc_reply_reviewers_avg": [ 172.4, 200.60967075392952 ], "wc_reply_authors_avg": [ 271.2, 326.9167478120385 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5144957554275266, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3395988849929275081&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Meituan Group;Meituan", "aff_unique_dep": ";", "aff_unique_url": "https://www.meituan.com;https://www.meituan.com", "aff_unique_abbr": "Meituan;Meituan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "CoDet: Co-occurrence Guided Region-Word Alignment for Open-Vocabulary Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71566", "id": "TKjX41IP7n", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e10a6a906ef323efaf708f76cf3c1d1e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TKjX41IP7n", "openreview": "https://openreview.net/forum?id=TKjX41IP7n", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71566", "video": "https://nips.cc/virtual/2023/poster/71566", "author_site": "Chuofan Ma, Yi Jiang, Xin Wen, Zehuan Yuan, Xiaojuan Qi", "tldr": "", "abstract": "Deriving reliable region-word alignment from image-text pairs is critical to learn\nobject-level vision-language representations for open-vocabulary object detection.\nExisting methods typically rely on pre-trained or self-trained vision-language\nmodels for alignment, which are prone to limitations in localization accuracy or\ngeneralization capabilities. In this paper, we propose CoDet, a novel approach\nthat overcomes the reliance on pre-aligned vision-language space by reformulating\nregion-word alignment as a co-occurring object discovery problem. Intuitively, by\ngrouping images that mention a shared concept in their captions, objects corresponding \nto the shared concept shall exhibit high co-occurrence among the group.\nCoDet then leverages visual similarities to discover the co-occurring objects and\nalign them with the shared concept. Extensive experiments demonstrate that CoDet\nhas superior performances and compelling scalability in open-vocabulary detection,\ne.g., by scaling up the visual backbone, CoDet achieves 37.0 $AP^m_{novel}$ and \n44.7 $AP^m_{all}$ on OV-LVIS, surpassing the previous SoTA by 4.2 $AP^m_{novel}$ \nand 9.8 $AP^m_{all}$. Code is available at https://github.com/CVMI-Lab/CoDet.", "keywords": "Open-vocabulary Object Detection; Object-level Vision-Language Pretraining", "primary_area": "", "supplementary_material": "/attachment/3c6981434aa7b1cc2a1eea84dfd9a5d0e802e5be.pdf", "author": "Chuofan Ma;Yi Jiang;Xin Wen;Zehuan Yuan;XIAOJUAN QI", "authorids": "~Chuofan_Ma1;~Yi_Jiang2;~Xin_Wen3;~Zehuan_Yuan1;~XIAOJUAN_QI2", "gender": ";M;M;M;F", "homepage": "https://machuofan.github.io/;https://enjoyyi.github.io/;https://wen-xin.info;https://shallowyuan.github.io/;https://xjqi.github.io/", "dblp": "330/3312;;42/4185;227/3298;176/1445-1.html", "google_scholar": "hgKtgWAAAAAJ;https://scholar.google.com.hk/citations?user=6dikuoYAAAAJ;byCeJl4AAAAJ;;bGn0uacAAAAJ", "orcid": ";0000-0002-2133-8719;0000-0003-3898-0406;;", "linkedin": ";;x-wen/;;", "or_profile": "~Chuofan_Ma1;~Yi_Jiang2;~Xin_Wen3;~Zehuan_Yuan1;~XIAOJUAN_QI2", "aff": "University of Hong Kong;Bytedance;The University of Hong Kong;ByteDance Inc.;University of Hong Kong", "aff_domain": "hku.hk;bytedance.com;hku.hk;bytedance.com;hku.hk", "position": "PhD student;Researcher;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nma2023codet,\ntitle={CoDet: Co-occurrence Guided Region-Word Alignment for Open-Vocabulary Object Detection},\nauthor={Chuofan Ma and Yi Jiang and Xin Wen and Zehuan Yuan and XIAOJUAN QI},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TKjX41IP7n}\n}", "github": "", "project": "", "reviewers": "Z3qw;Puz3;e66K;CkPP;2fPU", "pdf_size": 6413789, "rating": "5;5;6;6;7", "confidence": "3;4;3;3;4", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;2;4;3;2", "wc_summary": "65;70;100;70;108", "wc_strengths": "84;22;168;23;21", "wc_weaknesses": "33;46;109;51;98", "wc_questions": "85;213;7;3;77", "wc_limitations": "6;4;7;5;12", "wc_review": "273;355;391;152;316", "wc_reply_reviewers": "40;16;0;16;11", "wc_reply_authors": "48;0;0;134;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 82.6, 17.749366185867032 ], "wc_strengths_avg": [ 63.6, 57.461639377936315 ], "wc_weaknesses_avg": [ 67.4, 30.25623902602569 ], "wc_questions_avg": [ 77.0, 76.07364852562286 ], "wc_limitations_avg": [ 6.8, 2.7856776554368237 ], "wc_review_avg": [ 297.4, 82.657365068093 ], "wc_reply_reviewers_avg": [ 16.6, 13.078226179417452 ], "wc_reply_authors_avg": [ 36.4, 52.22106854517628 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.21821789023599233, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11589191407442803942&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "hku.hk;bytedance.com;hku.hk;bytedance.com;hku.hk", "author_num": 5, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "University of Hong Kong;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;https://www.bytedance.com", "aff_unique_abbr": "HKU;Bytedance", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning Rate Free Sampling in Constrained Domains", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71565", "id": "TNAGFUcSP7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cdee6c3eaa2adc285f11da7711a75c12-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TNAGFUcSP7", "openreview": "https://openreview.net/forum?id=TNAGFUcSP7", "poster": "/media/PosterPDFs/NeurIPS%202023/71565.png?t=1703593790.530934", "slides": "https://nips.cc/virtual/2023/poster/71565", "video": "https://nips.cc/virtual/2023/poster/71565", "author_site": "Louis Sharrock, Lester Mackey, Christopher Nemeth", "tldr": "", "abstract": "We introduce a suite of new particle-based algorithms for sampling in constrained domains which are entirely learning rate free. Our approach leverages coin betting ideas from convex optimisation, and the viewpoint of constrained sampling as a mirrored optimisation problem on the space of probability measures. Based on this viewpoint, we also introduce a unifying framework for several existing constrained sampling algorithms, including mirrored Langevin dynamics and mirrored Stein variational gradient descent. We demonstrate the performance of our algorithms on a range of numerical examples, including sampling from targets on the simplex, sampling with fairness constraints, and constrained sampling problems in post-selection inference. Our results indicate that our algorithms achieve competitive performance with existing constrained sampling methods, without the need to tune any hyperparameters.", "keywords": "Sampling;Particle Based Variational Inference;Bayesian Inference;Wasserstein Gradient Descent;Coin Betting;Constrained Domains", "primary_area": "", "supplementary_material": "/attachment/bfe7f24f62495d5ef945e3a337738a1c10a2f63a.zip", "author": "Louis Sharrock;Lester Mackey;Christopher Nemeth", "authorids": "~Louis_Sharrock1;~Lester_Mackey1;~Christopher_Nemeth1", "gender": "M;M;M", "homepage": "https://louissharrock.github.io/;https://stanford.edu/~lmackey;http://www.lancs.ac.uk/~nemeth/", "dblp": "304/5319;05/2961;88/10513", "google_scholar": "O0xSdYcAAAAJ;erv7TP0AAAAJ;https://scholar.google.co.uk/citations?user=17-Ze24AAAAJ", "orcid": "0000-0003-1691-1215;0000-0002-1102-0387;0000-0002-9084-3866", "linkedin": "louissharrock/;lester-mackey-5902909;christopher-nemeth-815963233/", "or_profile": "~Louis_Sharrock1;~Lester_Mackey1;~Christopher_Nemeth1", "aff": "Lancaster University;Microsoft Research New England;Lancaster University", "aff_domain": "lancaster.ac.uk;microsoft.com;lancaster.ac.uk", "position": "Postdoc;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nsharrock2023learning,\ntitle={Learning Rate Free Bayesian Inference in Constrained Domains},\nauthor={Louis Sharrock and Lester Mackey and Christopher Nemeth},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TNAGFUcSP7}\n}", "github": "", "project": "", "reviewers": "aGzU;6R7u;Uc6g;fTFt", "pdf_size": 1083481, "rating": "6;7;7;8", "confidence": "1;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "4;3;3;3", "wc_summary": "99;262;119;80", "wc_strengths": "98;231;49;80", "wc_weaknesses": "69;471;20;45", "wc_questions": "43;143;190;47", "wc_limitations": "16;85;20;16", "wc_review": "325;1192;398;268", "wc_reply_reviewers": "47;23;35;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 140.0, 71.773950706367 ], "wc_strengths_avg": [ 114.5, 69.50719387228922 ], "wc_weaknesses_avg": [ 151.25, 185.41895129678628 ], "wc_questions_avg": [ 105.75, 62.99751979244897 ], "wc_limitations_avg": [ 34.25, 29.345996319770776 ], "wc_review_avg": [ 545.75, 375.9470541179968 ], "wc_reply_reviewers_avg": [ 26.25, 17.36915369268175 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3887464979706469682&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "lancaster.ac.uk;microsoft.com;lancaster.ac.uk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Lancaster University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.lancaster.ac.uk;https://www.microsoft.com/en-us/research/group/microsoft-research-new-england", "aff_unique_abbr": "Lancaster;MSR NE", "aff_campus_unique_index": "1", "aff_campus_unique": ";New England", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Information Geometry of the Retinal Representation Manifold", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71564", "id": "TNLO8KNFFZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8a267516a7a697965c6ae4f48b908605-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TNLO8KNFFZ", "openreview": "https://openreview.net/forum?id=TNLO8KNFFZ", "poster": "/media/PosterPDFs/NeurIPS%202023/71564.png?t=1701023508.4084318", "slides": "https://nips.cc/virtual/2023/poster/71564", "video": "https://nips.cc/virtual/2023/poster/71564", "author_site": "Xuehao Ding, Dongsoo Lee, Joshua Melander, George Sivulka, Surya Ganguli, Stephen Baccus", "tldr": "", "abstract": "The ability for the brain to discriminate among visual stimuli is constrained by their retinal representations. Previous studies of visual discriminability have been limited to either low-dimensional artificial stimuli or pure theoretical considerations without a realistic encoding model. Here we propose a novel framework for understanding stimulus discriminability achieved by retinal representations of naturalistic stimuli with the method of information geometry. To model the joint probability distribution of neural responses conditioned on the stimulus, we created a stochastic encoding model of a population of salamander retinal ganglion cells based on a three-layer convolutional neural network model. This model not only accurately captured the mean response to natural scenes but also a variety of second-order statistics. With the model and the proposed theory, we computed the Fisher information metric over stimuli to study the most discriminable stimulus directions. We found that the most discriminable stimulus varied substantially across stimuli, allowing an examination of the relationship between the most discriminable stimulus and the current stimulus. By examining responses generated by the most discriminable stimuli we further found that the most discriminative response mode is often aligned with the most stochastic mode. This finding carries the important implication that under natural scenes, retinal noise correlations are information-limiting rather than increasing information transmission as has been previously speculated. We additionally observed that sensitivity saturates less in the population than for single cells and that as a function of firing rate, Fisher information varies less than sensitivity. We conclude that under natural scenes, population coding benefits from complementary coding and helps to equalize the information carried by different firing rates, which may facilitate decoding of the stimulus under principles of information maximization.", "keywords": "neural coding;theoretical neuroscience;stochastic methods;neural networks", "primary_area": "", "supplementary_material": "/attachment/08f20df133d89590008627b3adc783fb38b09dd0.pdf", "author": "Xuehao Ding;Dongsoo Lee;Joshua Brendan Melander;George Sivulka;Surya Ganguli;Stephen Baccus", "authorids": "~Xuehao_Ding1;~Dongsoo_Lee3;~Joshua_Brendan_Melander1;~George_Sivulka1;~Surya_Ganguli1;~Stephen_Baccus2", "gender": "M;;M;M;M;", "homepage": "https://profiles.stanford.edu/xuehao-ding;;https://www.jbmelander.com;http://sivulka.me;http://ganguli-gang.stanford.edu/surya.html;", "dblp": "241/9691;;227/3061;;56/10453;", "google_scholar": "I6oIgqIAAAAJ;;YIKKiYAAAAAJ;yli3M9UAAAAJ;;", "orcid": "0000-0003-4676-878X;;;;;", "linkedin": ";;;sivulka/;;", "or_profile": "~Xuehao_Ding1;~Dongsoo_Lee3;~Joshua_Brendan_Melander1;~George_Sivulka1;~Surya_Ganguli1;~Stephen_Baccus2", "aff": "Stanford University;;Stanford University;;Stanford University;", "aff_domain": "stanford.edu;;stanford.edu;;@stanford.edu;", "position": "PhD student;;PhD student;;Assistant Professor;", "bibtex": "@inproceedings{\nding2023information,\ntitle={Information Geometry of the Retinal Representation Manifold},\nauthor={Xuehao Ding and Dongsoo Lee and Joshua Brendan Melander and George Sivulka and Surya Ganguli and Stephen Baccus},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TNLO8KNFFZ}\n}", "github": "", "project": "", "reviewers": "SiWR;GBMw;pZyp;d8Ka", "pdf_size": 5304077, "rating": "5;5;6;6", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;4", "wc_summary": "135;86;40;106", "wc_strengths": "56;32;101;29", "wc_weaknesses": "143;205;276;48", "wc_questions": "249;154;456;207", "wc_limitations": "86;7;32;10", "wc_review": "669;484;905;400", "wc_reply_reviewers": "208;102;30;200", "wc_reply_authors": "303;292;0;323", "reply_reviewers": "2;1;1;2", "reply_authors": "2;2;1;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 91.75, 34.585943676586304 ], "wc_strengths_avg": [ 54.5, 28.81405906844782 ], "wc_weaknesses_avg": [ 168.0, 83.75261189957003 ], "wc_questions_avg": [ 266.5, 114.46942823304396 ], "wc_limitations_avg": [ 33.75, 31.67313530422904 ], "wc_review_avg": [ 614.5, 193.90783893386055 ], "wc_reply_reviewers_avg": [ 135.0, 73.60027173862879 ], "wc_reply_authors_avg": [ 229.5, 132.9671011942428 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17071111801398688300&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "stanford.edu;;stanford.edu;;@stanford.edu;", "author_num": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Multi-task Graph Neural Architecture Search with Task-aware Collaboration and Curriculum", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71563", "id": "TOxpAwp0VE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4e839c9c398c58c878a394633b806ccd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TOxpAwp0VE", "openreview": "https://openreview.net/forum?id=TOxpAwp0VE", "poster": "/media/PosterPDFs/NeurIPS%202023/71563.png?t=1701691717.0281177", "slides": "https://nips.cc/virtual/2023/poster/71563", "video": "https://nips.cc/virtual/2023/poster/71563", "author_site": "Yijian Qin, Xin Wang, Ziwei Zhang, Hong Chen, Wenwu Zhu", "tldr": "", "abstract": "Graph neural architecture search (GraphNAS) has shown great potential for automatically designing graph neural architectures for graph related tasks. However, multi-task GraphNAS capable of handling multiple tasks simultaneously has been largely unexplored in literature, posing great challenges to capture the complex relations and influences among different tasks. To tackle this problem, we propose a novel multi-task graph neural architecture search with task-aware collaboration and curriculum (MTGC3), which is able to simultaneously discover optimal architectures for different tasks and learn the collaborative relationships among different tasks in a joint manner. Specifically, we design the layer-wise disentangled supernet capable of managing multiple architectures in a unified framework, which combines with our proposed soft task-collaborative module to learn the transferability relationships between tasks. We further develop the task-wise curriculum training strategy to improve the architecture search procedure via reweighing the influence of different tasks based on task difficulties. Extensive experiments show that our proposed MTGC3 model achieves state-of-the-art performance against several baselines in multi-task scenarios, demonstrating its ability to discover effective architectures and capture the collaborative relationships for multiple tasks.", "keywords": "graph neural network;neural architecture search;multi-task learning", "primary_area": "", "supplementary_material": "/attachment/496b8ef3e662d8547f8a436662b080148ac601ae.pdf", "author": "Yijian Qin;Xin Wang;Ziwei Zhang;Hong Chen;Wenwu Zhu", "authorids": "~Yijian_Qin2;~Xin_Wang17;~Ziwei_Zhang1;~Hong_Chen9;~Wenwu_Zhu1", "gender": "M;M;;M;M", "homepage": "http://www.cs.tsinghua.edu.cn/;http://mn.cs.tsinghua.edu.cn/xinwang/;;https://forchchch.github.io/;http://media.cs.tsinghua.edu.cn/en/zww", "dblp": "290/1902;10/5630-19;;52/4150-11;97/6308-1.html", "google_scholar": "bSKCQwkAAAAJ;YPOBHYUAAAAJ;;;https://scholar.google.com.tw/citations?user=7t2jzpgAAAAJ", "orcid": ";0000-0002-0351-2939;;0000-0002-0943-2286;0000-0003-2236-9290", "linkedin": ";;;;", "or_profile": "~Yijian_Qin2;~Xin_Wang17;~Ziwei_Zhang1;~Hong_Chen9;~Wenwu_Zhu1", "aff": "Tsinghua University;Tsinghua University;;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;cs.tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Assistant Professor;;PhD student;Full Professor", "bibtex": "@inproceedings{\nqin2023multitask,\ntitle={Multi-task Graph Neural Architecture Search with Task-aware Collaboration and Curriculum},\nauthor={Yijian Qin and Xin Wang and Ziwei Zhang and Hong Chen and Wenwu Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TOxpAwp0VE}\n}", "github": "", "project": "", "reviewers": "cm5U;a5eh;Eks4;sAxj;xv8u", "pdf_size": 592353, "rating": "4;6;7;7;8", "confidence": "4;5;4;4;5", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "60;89;80;45;55", "wc_strengths": "22;102;57;53;58", "wc_weaknesses": "206;76;132;88;92", "wc_questions": "4;4;2;4;4", "wc_limitations": "4;5;1;1;1", "wc_review": "296;276;272;191;210", "wc_reply_reviewers": "0;0;48;131;19", "wc_reply_authors": "0;0;24;27;25", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;2;2;2", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 65.8, 16.26530048907797 ], "wc_strengths_avg": [ 58.4, 25.523322667709234 ], "wc_weaknesses_avg": [ 118.8, 47.49063065489865 ], "wc_questions_avg": [ 3.6, 0.8 ], "wc_limitations_avg": [ 2.4, 1.7435595774162693 ], "wc_review_avg": [ 249.0, 40.87052727822336 ], "wc_reply_reviewers_avg": [ 39.6, 48.95957516155548 ], "wc_reply_authors_avg": [ 15.2, 12.448293055676348 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3611575592573077, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5135129505019193754&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;cs.tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Parameter and Computation Efficient Transfer Learning for Vision-Language Pre-trained Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71562", "id": "TPeAmxwPK2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/80e354fdac2c7fbf439a51f4853edbac-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TPeAmxwPK2", "openreview": "https://openreview.net/forum?id=TPeAmxwPK2", "poster": "/media/PosterPDFs/NeurIPS%202023/71562.png?t=1701936633.0932162", "slides": "https://nips.cc/virtual/2023/poster/71562", "video": "https://nips.cc/virtual/2023/poster/71562", "author_site": "Qiong Wu, Wei Yu, Yiyi Zhou, Shubin Huang, Xiaoshuai Sun, Rongrong Ji", "tldr": "", "abstract": "With ever increasing parameters and computation, vision-language pre-trained (VLP) models exhibit prohibitive expenditure in downstream task adaption. Recent endeavors mainly focus on parameter efficient transfer learning (PETL) for VLP models by only updating a small number of parameters. However, excessive computational overhead still plagues the application of VLPs. In this paper, we aim at parameter and computation efficient transfer learning (PCETL) for VLP models. In particular, PCETL not only needs to limit the number of trainable parameters in VLP models, but also to reduce the computational redundancy during inference, thus enabling a more efficient transfer. To approach this target, we propose a novel dynamic architecture skipping (DAS) approach towards effective PCETL. Instead of directly optimizing the intrinsic architectures of VLP models, DAS first observes the significances of their modules to downstream tasks via a reinforcement learning (RL) based process, and then skips the redundant ones with lightweight networks, i.e. adapters, according to the obtained rewards. In this case, the VLP model can well maintain the scale of trainable parameters while speeding up its inference on downstream tasks. To validate DAS, we apply it to two representative VLP models, namely ViLT and METER, and conduct extensive experiments on a bunch of VL tasks. The experimental results not only show the great advantages of DAS in reducing computational complexity, e.g. -11.97% FLOPs of METER on VQA2.0, but also confirm its competitiveness against existing PETL methods in terms of parameter scale and performance. Our source code is given in our appendix.", "keywords": "vision and language;parameter and computation efficient transfer learning", "primary_area": "", "supplementary_material": "/attachment/cf36487f1271657cc959f38e1473a2715e675e3e.zip", "author": "Qiong Wu;Wei Yu;Yiyi Zhou;Shubin Huang;Xiaoshuai Sun;Rongrong Ji", "authorids": "~Qiong_Wu2;~Wei_Yu19;~Yiyi_Zhou1;~Shubin_Huang2;~Xiaoshuai_Sun3;~Rongrong_Ji5", "gender": "M;F;M;M;M;M", "homepage": ";https://github.com/FISH-dotcom;;;https://sites.google.com/view/xssun;http://mac.xmu.edu.cn/rrji-en.html", "dblp": "54/4158-12;;174/0086;;26/5787.html;86/5681", "google_scholar": "HyKLYKYAAAAJ;;w3_2ep0AAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;KPMK3B4AAAAJ;", "orcid": ";;;;0000-0003-3912-9306;", "linkedin": ";;;;;", "or_profile": "~Qiong_Wu2;~Wei_Yu19;~Yiyi_Zhou1;~Shubin_Huang2;~Xiaoshuai_Sun3;~Rongrong_Ji5", "aff": "Xiamen University;Xiamen University;Xiamen University;Xiamen University;Xiamen University;Xiamen University", "aff_domain": "xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn", "position": "PhD student;MS student;Associate Professor;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwu2023parameter,\ntitle={Parameter and Computation Efficient Transfer Learning for Vision-Language Pre-trained Models},\nauthor={Qiong Wu and Wei Yu and Yiyi Zhou and Shubin Huang and Xiaoshuai Sun and Rongrong Ji},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TPeAmxwPK2}\n}", "github": "", "project": "", "reviewers": "fJhT;pBnL;LDaL;5vAx", "pdf_size": 917176, "rating": "4;5;6;7", "confidence": "4;4;4;3", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "2;2;3;4", "wc_summary": "36;54;160;41", "wc_strengths": "18;41;73;60", "wc_weaknesses": "201;98;168;23", "wc_questions": "36;107;71;7", "wc_limitations": "18;7;9;1", "wc_review": "309;307;481;132", "wc_reply_reviewers": "720;0;54;35", "wc_reply_authors": "1636;0;0;16", "reply_reviewers": "5;0;1;1", "reply_authors": "5;1;1;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 72.75, 50.80046751753373 ], "wc_strengths_avg": [ 48.0, 20.724381776062707 ], "wc_weaknesses_avg": [ 122.5, 68.43427503817075 ], "wc_questions_avg": [ 55.25, 37.4991666574072 ], "wc_limitations_avg": [ 8.75, 6.098155458825234 ], "wc_review_avg": [ 307.25, 123.3944386915391 ], "wc_reply_reviewers_avg": [ 202.25, 299.5499749624426 ], "wc_reply_authors_avg": [ 413.0, 706.1295915056952 ], "reply_reviewers_avg": [ 1.75, 1.920286436967152 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5377337103600681571&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Xiamen University", "aff_unique_dep": "", "aff_unique_url": "https://www.xmu.edu.cn", "aff_unique_abbr": "XMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Neural Injective Functions for Multisets, Measures and Graphs via a Finite Witness Theorem", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71561", "id": "TQlpqmCeMe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/84b686f7cc7b7751e9aaac0da74f755a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TQlpqmCeMe", "openreview": "https://openreview.net/forum?id=TQlpqmCeMe", "poster": "/media/PosterPDFs/NeurIPS%202023/71561.png?t=1699874819.7800715", "slides": "https://nips.cc/virtual/2023/poster/71561", "video": "https://nips.cc/virtual/2023/poster/71561", "author_site": "Tal Amir, Steven Gortler, Ilai Avni, Ravina Ravina, Nadav Dym", "tldr": "", "abstract": "Injective multiset functions have a key role in the theoretical study of machine learning on multisets and graphs. Yet, there remains a gap between the provably injective multiset functions considered in theory, which typically rely on polynomial moments, and the multiset functions used in practice, which rely on $\\textit{neural moments}$ \u2014 whose injectivity on multisets has not been studied to date.\n\nIn this paper, we bridge this gap by showing that moments of neural networks do define injective multiset functions, provided that an analytic non-polynomial activation is used. The number of moments required by our theory is optimal essentially up to a multiplicative factor of two. To prove this result, we state and prove a $\\textit{finite witness theorem}$, which is of independent interest. \n\nAs a corollary to our main theorem, we derive new approximation results for functions on multisets and measures, and new separation results for graph neural networks. We also provide two negative results: (1) moments of piecewise-linear neural networks cannot be injective multiset functions; and (2) even when moment-based multiset functions are injective, they can never be bi-Lipschitz.", "keywords": "Equivariant Neural Networks;Universal approximation;Geometric deep learning;multiset learning;injective multiset functions;learning on measures. WL test", "primary_area": "", "supplementary_material": "/attachment/d0985ba45d65324a8cc79effa17615bfbb815b22.zip", "author": "Tal Amir;Steven J. Gortler;Ilai Avni;Ravina Ravina;Nadav Dym", "authorids": "~Tal_Amir1;~Steven_J._Gortler1;~Ilai_Avni1;ravina@campus.technion.ac.il;~Nadav_Dym1", "gender": ";;M;;", "homepage": "https://tal-amir.github.io/;;;;", "dblp": ";;;;", "google_scholar": "https://scholar.google.co.il/citations?user=Lx2W9vMAAAAJ;;;;", "orcid": "0009-0003-1868-1860;;;;", "linkedin": ";;ilai-avni-9b036a276;;", "or_profile": "~Tal_Amir1;~Steven_J._Gortler1;~Ilai_Avni1;ravina@campus.technion.ac.il;~Nadav_Dym1", "aff": "Technion - Israel Institute of Technology, Technion;;Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;;", "aff_domain": "technion.ac.il;;campus.technion.ac.il;;", "position": "Postdoc;;Undergrad student;;", "bibtex": "@inproceedings{\namir2023neural,\ntitle={Neural Injective Functions for Multisets, Measures and Graphs via a Finite Witness Theorem},\nauthor={Tal Amir and Steven J. Gortler and Ilai Avni and Ravina Ravina and Nadav Dym},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TQlpqmCeMe}\n}", "github": "", "project": "", "reviewers": "gUtg;ezv6;ejpb", "pdf_size": 743373, "rating": "7;7;8", "confidence": "3;3;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "4;3;4", "wc_summary": "147;281;131", "wc_strengths": "130;76;65", "wc_weaknesses": "58;281;77", "wc_questions": "114;59;133", "wc_limitations": "1;1;9", "wc_review": "450;698;415", "wc_reply_reviewers": "14;27;12", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 186.33333333333334, 67.25738290742181 ], "wc_strengths_avg": [ 90.33333333333333, 28.40578970718626 ], "wc_weaknesses_avg": [ 138.66666666666666, 100.94332843509548 ], "wc_questions_avg": [ 102.0, 31.379398762032817 ], "wc_limitations_avg": [ 3.6666666666666665, 3.7712361663282534 ], "wc_review_avg": [ 521.0, 125.97089610964377 ], "wc_reply_reviewers_avg": [ 17.666666666666668, 6.649979114420001 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6393535267883871209&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "technion.ac.il;;campus.technion.ac.il;;", "author_num": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "GLOBER: Coherent Non-autoregressive Video Generation via GLOBal Guided Video DecodER", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71560", "id": "TRbklCR2ZW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/efe36e55d80a94d1726f660b8d237a0f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TRbklCR2ZW", "openreview": "https://openreview.net/forum?id=TRbklCR2ZW", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71560", "video": "https://nips.cc/virtual/2023/poster/71560", "author_site": "Mingzhen Sun, Weining Wang, Zihan Qin, Jiahui Sun, Sihan Chen, Jing Liu", "tldr": "", "abstract": "Video generation necessitates both global coherence and local realism. This work presents a novel non-autoregressive method GLOBER, which first generates global features to obtain comprehensive global guidance and then synthesizes video frames based on the global features to generate coherent videos. Specifically, we propose a video auto-encoder, where a video encoder encodes videos into global features, and a video decoder, built on a diffusion model, decodes the global features and synthesizes video frames in a non-autoregressive manner. To achieve maximum flexibility, our video decoder perceives temporal information through normalized frame indexes, which enables it to synthesize arbitrary sub video clips with predetermined starting and ending frame indexes. Moreover, a novel adversarial loss is introduced to improve the global coherence and local realism between the synthesized video frames. Finally, we employ a diffusion-based video generator to fit the global features outputted by the video encoder for video generation. Extensive experimental results demonstrate the effectiveness and efficiency of our proposed method, and new state-of-the-art results have been achieved on multiple benchmarks.", "keywords": "Video Generation;Video Autoencoder;Diffusion Probabilistic Model", "primary_area": "", "supplementary_material": "/attachment/a3c747157f50b4d8abdce74cdef258f0d8ef154b.pdf", "author": "Mingzhen Sun;Weining Wang;Zihan Qin;Jiahui Sun;Sihan Chen;Jing Liu", "authorids": "~Mingzhen_Sun1;~Weining_Wang3;~Zihan_Qin1;~Jiahui_Sun2;~Sihan_Chen3;~Jing_Liu1", "gender": "F;F;F;M;M;F", "homepage": "https://github.com/iva-mzsun;;https://github.com/qzhqaq;https://github.com/LuoBoBo00;;http://www.nlpr.ia.ac.cn/iva/liujing/", "dblp": "296/4017;97/6006-1.html;;;;72/2590-1.html", "google_scholar": ";NDPvobAAAAAJ;;;4pHKj8kAAAAJ;sOI-S7oAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Mingzhen_Sun1;~Weining_Wang3;~Zihan_Qin1;~Jiahui_Sun2;~Sihan_Chen3;~Jing_Liu1", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;University of Chinese Academy of Sciences;, Institute of automation, Chinese academy of science;Institute of automation, Chinese academy of science", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ucas.ac.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn", "position": "PhD student;Assistant Professor;MS student;Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nsun2023glober,\ntitle={{GLOBER}: Coherent Non-autoregressive Video Generation via {GLOB}al Guided Video Decod{ER}},\nauthor={Mingzhen Sun and Weining Wang and Zihan Qin and Jiahui Sun and Sihan Chen and Jing Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TRbklCR2ZW}\n}", "github": "", "project": "", "reviewers": "P49b;xuBL;JPZt;4poX", "pdf_size": 3317506, "rating": "4;5;6;6", "confidence": "5;5;4;4", "soundness": "2;3;3;3", "novelty": "2;2;4;3", "presentation": "1;3;2;3", "wc_summary": "48;55;145;50", "wc_strengths": "37;30;33;17", "wc_weaknesses": "177;47;291;95", "wc_questions": "55;6;5;3", "wc_limitations": "9;6;10;5", "wc_review": "326;144;484;170", "wc_reply_reviewers": "352;19;103;72", "wc_reply_authors": "731;19;109;55", "reply_reviewers": "2;1;2;2", "reply_authors": "4;2;3;3", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 74.5, 40.7829621288106 ], "wc_strengths_avg": [ 29.25, 7.495832175282475 ], "wc_weaknesses_avg": [ 152.5, 92.49189153650173 ], "wc_questions_avg": [ 17.25, 21.821720830401986 ], "wc_limitations_avg": [ 7.5, 2.0615528128088303 ], "wc_review_avg": [ 281.0, 136.31214179228496 ], "wc_reply_reviewers_avg": [ 136.5, 127.99316387995103 ], "wc_reply_authors_avg": [ 228.5, 291.8813971461696 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8966000330735645255&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ucas.ac.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation;", "aff_unique_url": "http://www.ia.cas.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "BayesTune: Bayesian Sparse Deep Model Fine-tuning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71559", "id": "TRuqrVsmZK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cde2dc73e0ad650176cdfa9b779eefc7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TRuqrVsmZK", "openreview": "https://openreview.net/forum?id=TRuqrVsmZK", "poster": "/media/PosterPDFs/NeurIPS%202023/71559.png?t=1699368603.2730172", "slides": "https://nips.cc/virtual/2023/poster/71559", "video": "https://nips.cc/virtual/2023/poster/71559", "author_site": "Minyoung Kim, Timothy Hospedales", "tldr": "", "abstract": "Deep learning practice is increasingly driven by powerful foundation models (FM), pre-trained at scale and then fine-tuned for specific tasks of interest. A key property of this workflow is the efficacy of performing sparse or parameter-efficient fine-tuning, meaning that by updating only a tiny fraction of the whole FM parameters on a downstream task can lead to surprisingly good performance, often even superior to a full model update. However, it is not clear what is the optimal and principled way to select which parameters to update. Although a growing number of sparse fine-tuning ideas have been proposed, they are mostly not satisfactory, relying on hand-crafted heuristics or heavy approximation. In this paper we propose a novel Bayesian sparse fine-tuning algorithm: we place a (sparse) Laplace prior for each parameter of the FM, with the mean equal to the initial value and the scale parameter having a hyper-prior that encourages small scale. Roughly speaking, the posterior means of the scale parameters indicate how important it is to update the corresponding parameter away from its initial value when solving the downstream task. Given the sparse prior, most scale parameters are small a posteriori, and the few large-valued scale parameters identify those FM parameters that crucially need to be updated away from their initial values. Based on this, we can threshold the scale parameters to decide which parameters to update or freeze, leading to a principled sparse fine-tuning strategy. To efficiently infer the posterior distribution of the scale parameters, we adopt the Langevin MCMC sampler, requiring only two times the complexity of the vanilla SGD. Tested on popular NLP benchmarks as well as the VTAB vision tasks, our approach shows significant improvement over the state-of-the-arts (e.g., 1% point higher than the best SOTA when fine-tuning RoBERTa for GLUE and SuperGLUE benchmarks).", "keywords": "Parameter-efficient Foundation model fine-tuning;Bayesian methods;Stochastic-Gradient MCMC", "primary_area": "", "supplementary_material": "/attachment/df95e36f06e38d3c0ec4d6b734babe1e30ae269e.zip", "author": "Minyoung Kim;Timothy Hospedales", "authorids": "~Minyoung_Kim2;~Timothy_Hospedales1", "gender": "M;M", "homepage": "https://sites.google.com/site/mikim21/;http://homepages.inf.ed.ac.uk/thospeda/", "dblp": ";32/3545", "google_scholar": ";https://scholar.google.fr/citations?user=nHhtvqkAAAAJ", "orcid": ";0000-0003-4867-7486", "linkedin": ";timothyhospedales/", "or_profile": "~Minyoung_Kim2;~Timothy_Hospedales1", "aff": "Samsung AI Center, Cambridge, UK;Samsung AI Research Centre", "aff_domain": "samsung.com;samsung.com", "position": "Senior Researcher;Principal Researcher", "bibtex": "@inproceedings{\nkim2023bayestune,\ntitle={BayesTune: Bayesian Sparse Deep Model Fine-tuning},\nauthor={Minyoung Kim and Timothy Hospedales},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TRuqrVsmZK}\n}", "github": "", "project": "", "reviewers": "JDrk;ZJiD;F3ir;GHms;2Yw3", "pdf_size": 9049781, "rating": "5;5;5;7;8", "confidence": "4;3;3;3;4", "soundness": "3;2;3;3;4", "novelty": "2;3;2;3;3", "presentation": "4;3;3;2;4", "wc_summary": "124;153;149;98;54", "wc_strengths": "50;38;32;121;41", "wc_weaknesses": "210;150;197;166;21", "wc_questions": "41;189;91;95;40", "wc_limitations": "7;1;18;61;1", "wc_review": "432;531;487;541;157", "wc_reply_reviewers": "81;13;369;78;6", "wc_reply_authors": "394;4;790;0;4", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 6.0, 1.2649110640673518 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 115.6, 36.576495184749454 ], "wc_strengths_avg": [ 56.4, 32.81828758482075 ], "wc_weaknesses_avg": [ 148.8, 67.37774113162298 ], "wc_questions_avg": [ 91.2, 54.260114264531374 ], "wc_limitations_avg": [ 17.6, 22.570777567465413 ], "wc_review_avg": [ 429.6, 141.63558874802618 ], "wc_reply_reviewers_avg": [ 109.4, 133.5434011847834 ], "wc_reply_authors_avg": [ 238.4, 314.70468696859285 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3227486121839514, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9054410449883530054&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "email": "samsung.com;samsung.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Samsung", "aff_unique_dep": "AI Center", "aff_unique_url": "https://www.samsung.com/global/research-innovation/ai-research/", "aff_unique_abbr": "SAC", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;South Korea" }, { "title": "Context Shift Reduction for Offline Meta-Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71558", "id": "TStMZH3Xqx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fd489a44f3bcb9f122e4931ef21d0c43-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TStMZH3Xqx", "openreview": "https://openreview.net/forum?id=TStMZH3Xqx", "poster": "/media/PosterPDFs/NeurIPS%202023/71558.png?t=1700392314.7555044", "slides": "https://nips.cc/virtual/2023/poster/71558", "video": "https://nips.cc/virtual/2023/poster/71558", "author_site": "Yunkai Gao, Rui Zhang, Jiaming Guo, Fan Wu, Qi Yi, Shaohui Peng, Siming Lan, Ruizhi Chen, Zidong Du, Xing Hu, Qi Guo, Ling Li, Yunji Chen", "tldr": "", "abstract": "Offline meta-reinforcement learning (OMRL) utilizes pre-collected offline datasets to enhance the agent's generalization ability on unseen tasks. However, the context shift problem arises due to the distribution discrepancy between the contexts used for training (from the behavior policy) and testing (from the exploration policy). The context shift problem leads to incorrect task inference and further deteriorates the generalization ability of the meta-policy. Existing OMRL methods either overlook this problem or attempt to mitigate it with additional information. In this paper, we propose a novel approach called Context Shift Reduction for OMRL (CSRO) to address the context shift problem with only offline datasets. The key insight of CSRO is to minimize the influence of policy in context during both the meta-training and meta-test phases. During meta-training, we design a max-min mutual information representation learning mechanism to diminish the impact of the behavior policy on task representation. In the meta-test phase, we introduce the non-prior context collection strategy to reduce the effect of the exploration policy. Experimental results demonstrate that CSRO significantly reduces the context shift and improves the generalization ability, surpassing previous methods across various challenging domains.", "keywords": "offline meta-reinforcement learning;offline reinforcement learning;meta-reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/013fc0160416f5b444bfdab319efcc8ae5cff469.zip", "author": "Yunkai Gao;Rui Zhang;Jiaming Guo;Fan Wu;Qi Yi;Shaohui Peng;Siming Lan;Ruizhi Chen;Zidong Du;Xing Hu;Qi Guo;Ling Li;Yunji Chen", "authorids": "~Yunkai_Gao1;~Rui_Zhang1;~Jiaming_Guo2;~Fan_Wu11;~Qi_Yi1;~Shaohui_Peng2;~Siming_Lan1;~Ruizhi_Chen3;~Zidong_Du1;~Xing_Hu3;~Qi_Guo4;~Ling_Li6;~Yunji_Chen1", "gender": "M;F;M;M;M;M;M;;F;M;F;M;M", "homepage": ";;;http://fanwu.academic.site/;;https://github.com/niiceMing;;https://zidongdu.github.io/;;http://novel.ict.ac.cn/qguo;;;", "dblp": "44/8056-1.html;60/2536-40;63/8512;;295/8813;;120/4143;44/11216;49/10052-1;67/398-1;92/5001-1;48/474;246/8768", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;dse6jAsAAAAJ;;;veu6_ykAAAAJ;;x_wFaYgAAAAJ;https://scholar.google.com.sg/citations?user=8N9ym9YAAAAJ;Hc3iRxUAAAAJ;;;;", "orcid": "0000-0003-4126-7441;;;;;;0000-0001-7219-4658;0000-0002-7603-4210;;;0000-0001-8877-9052;;", "linkedin": ";;;;;;;;;;;;", "or_profile": "~Yunkai_Gao1;~Rui_Zhang1;~Jiaming_Guo2;~Fan_Wu11;~Qi_Yi1;~Siming_Lan1;~Ruizhi_Chen3;~Zidong_Du1;~Xing_Hu3;~Qi_Guo4;~Ling_Li6;~Yunji_Chen1;~shaohui_peng1", "aff": "University of Science and Technology of China;Institute of Computing Technology, CAS;Institute of Computing Technology, Chinese Academy of Sciences;University of Chinese Academy of Sciences, Tsinghua University;University of Science and Technology of China;University of Science and Technology of China;Institute of Software Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Software, CAS;Institute of Computing Technology, Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_domain": "ustc.edu.cn;ict.ac.cn;ict.ac.cn;ucas.edu.cn;ustc.edu.cn;ustc.edu.cn;iscas.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;iscas.ac.cn;ict.ac.cn;ict.ac.cn", "position": "PhD student;Assistant Professor;PhD student;PhD student;PhD student;PhD student;Assistant Professor;Full Professor;Associate Professor;Full Professor;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\ngao2023context,\ntitle={Context Shift Reduction for Offline Meta-Reinforcement Learning},\nauthor={Yunkai Gao and Rui Zhang and Jiaming Guo and Fan Wu and Qi Yi and Shaohui Peng and Siming Lan and Ruizhi Chen and Zidong Du and Xing Hu and Qi Guo and Ling Li and Yunji Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TStMZH3Xqx}\n}", "github": "", "project": "", "reviewers": "4mWP;DbDu;mr2B;qvwn", "pdf_size": 9047198, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "2;2;4;3", "wc_summary": "31;57;90;73", "wc_strengths": "11;43;93;37", "wc_weaknesses": "133;87;304;59", "wc_questions": "96;75;125;94", "wc_limitations": "4;19;26;22", "wc_review": "275;281;638;285", "wc_reply_reviewers": "34;14;53;372", "wc_reply_authors": "0;0;0;1122", "reply_reviewers": "1;1;1;3", "reply_authors": "1;1;1;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 62.75, 21.72987574745884 ], "wc_strengths_avg": [ 46.0, 29.68164415931166 ], "wc_weaknesses_avg": [ 145.75, 95.10881925457807 ], "wc_questions_avg": [ 97.5, 17.867568385205637 ], "wc_limitations_avg": [ 17.75, 8.317902379807062 ], "wc_review_avg": [ 369.75, 154.91509771484507 ], "wc_reply_reviewers_avg": [ 118.25, 147.15022086289915 ], "wc_reply_authors_avg": [ 280.5, 485.8402515230701 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1441105094406138024&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ustc.edu.cn;ict.ac.cn;ict.ac.cn;ucas.edu.cn;ustc.edu.cn;ustc.edu.cn;iscas.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;iscas.ac.cn;ict.ac.cn;ict.ac.cn", "author_num": 13, "aff_unique_index": "0;1;1;2;0;0;1;1;1;1;1;1;1", "aff_unique_norm": "University of Science and Technology of China;Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Computing Technology;", "aff_unique_url": "http://www.ustc.edu.cn;http://www.ict.ac.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "USTC;CAS;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Scalarization for Multi-Task and Multi-Domain Learning at Scale", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71557", "id": "TSuq3debnD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/368559ed8ede03b21f624feaeb3a5867-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TSuq3debnD", "openreview": "https://openreview.net/forum?id=TSuq3debnD", "poster": "/media/PosterPDFs/NeurIPS%202023/71557.png?t=1701711123.609142", "slides": "https://nips.cc/virtual/2023/poster/71557", "video": "https://nips.cc/virtual/2023/poster/71557", "author_site": "Amelie Royer, Tijmen Blankevoort, Babak Ehteshami Bejnordi", "tldr": "", "abstract": "Training a single model on multiple input domains and/or output tasks allows for compressing information from multiple sources into a unified backbone hence improves model efficiency. It also enables potential positive knowledge transfer across tasks/domains, leading to improved accuracy and data-efficient training. However, optimizing such networks is a challenge, in particular due to discrepancies between the different tasks or domains: Despite several hypotheses and solutions proposed over the years, recent work has shown that uniform scalarization training, i.e., simply minimizing the average of the task losses, yields on-par performance with more costly SotA optimization methods. This raises the issue of how well we understand the training dynamics of multi-task and multi-domain networks. In this work, we first devise a large-scale unified analysis of multi-domain and multi-task learning to better understand the dynamics of scalarization across varied task/domain combinations and model sizes. Following these insights, we then propose to leverage population-based training to efficiently search for the optimal scalarization weights when dealing with a large number of tasks or domains.", "keywords": "multitask;multidomain;optimization;population based training", "primary_area": "", "supplementary_material": "/attachment/ee8b8c290917fe884551db7da8e6f254b53a82cc.zip", "author": "Amelie Royer;Tijmen Blankevoort;Babak Ehteshami Bejnordi", "authorids": "~Amelie_Royer1;~Tijmen_Blankevoort1;~Babak_Ehteshami_Bejnordi1", "gender": "F;M;M", "homepage": "https://ameroyer.github.io;;http://babakint.com/", "dblp": "https://dblp.uni-trier.de/pid/169/4804;;175/5607", "google_scholar": "https://scholar.google.fr/citations?user=P9-oT8AAAAAJ;OGEyrG8AAAAJ;https://scholar.google.se/citations?user=Qk-AMk0AAAAJ", "orcid": ";;", "linkedin": ";tijmen-blankevoort-a5633a24/;babakint/", "or_profile": "~Amelie_Royer1;~Tijmen_Blankevoort1;~Babak_Ehteshami_Bejnordi1", "aff": "Qualcomm AI Research;Qualcomm Inc, QualComm;Qualcomm AI Research, Qualcomm Inc, QualComm", "aff_domain": "qualcomm.com;qti.qualcomm.com;qti.qualcomm.com", "position": "Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nroyer2023scalarization,\ntitle={Scalarization for Multi-Task and Multi-Domain Learning at Scale},\nauthor={Amelie Royer and Tijmen Blankevoort and Babak Ehteshami Bejnordi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TSuq3debnD}\n}", "github": "", "project": "", "reviewers": "iXKX;kG9J;wCAe;ytBJ", "pdf_size": 1114939, "rating": "3;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;2", "novelty": "1;3;2;3", "presentation": "2;3;4;3", "wc_summary": "91;51;256;66", "wc_strengths": "6;68;133;71", "wc_weaknesses": "379;170;250;266", "wc_questions": "85;138;62;108", "wc_limitations": "13;7;1;4", "wc_review": "574;434;702;515", "wc_reply_reviewers": "0;44;31;156", "wc_reply_authors": "0;0;0;421", "reply_reviewers": "0;1;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 116.0, 82.08227579691001 ], "wc_strengths_avg": [ 69.5, 44.91380634058975 ], "wc_weaknesses_avg": [ 266.25, 74.56666480405302 ], "wc_questions_avg": [ 98.25, 28.12805538959279 ], "wc_limitations_avg": [ 6.25, 4.437059837324712 ], "wc_review_avg": [ 556.25, 97.73017701815544 ], "wc_reply_reviewers_avg": [ 57.75, 58.933755183256395 ], "wc_reply_authors_avg": [ 105.25, 182.29834749662433 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12288562122683639907&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "qualcomm.com;qti.qualcomm.com;qti.qualcomm.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Qualcomm;Qualcomm Incorporated", "aff_unique_dep": "Qualcomm AI Research;", "aff_unique_url": "https://www.qualcomm.com/research;https://www.qualcomm.com", "aff_unique_abbr": "QAI;Qualcomm", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "NAP: Neural 3D Articulated Object Prior", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71556", "id": "TTkklyFv7e", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/655846cc914cb7ff977a1ada40866441-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TTkklyFv7e", "openreview": "https://openreview.net/forum?id=TTkklyFv7e", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71556", "video": "https://nips.cc/virtual/2023/poster/71556", "author_site": "Jiahui Lei, Congyue Deng, William B Shen, Leonidas Guibas, Kostas Daniilidis", "tldr": "", "abstract": "We propose Neural 3D Articulated object Prior (NAP), the first 3D deep generative model to synthesize 3D articulated object models. Despite the extensive research on generating 3D static objects, compositions, or scenes, there are hardly any approaches on capturing the distribution of articulated objects, a common object category for human and robot interaction. To generate articulated objects, we first design a novel articulation tree/graph parameterization and then apply a diffusion-denoising probabilistic model over this representation where articulated objects can be generated via denoising from random complete graphs. In order to capture both the geometry and the motion structure whose distribution will affect each other, we design a graph denoising network for learning the reverse diffusion process. We propose a novel distance that adapts widely used 3D generation metrics to our novel task to evaluate generation quality. Experiments demonstrate our high performance in articulated object generation as well as its applications on conditioned generation, including Part2Motion, PartNet-Imagination, Motion2Part, and GAPart2Object.", "keywords": "3D articulated objects;diffusion models;generative models", "primary_area": "", "supplementary_material": "/attachment/9c02cf466204a9c7218c49e409192ad1b293bb98.pdf", "author": "Jiahui Lei;Congyue Deng;Bokui Shen;Leonidas Guibas;Kostas Daniilidis", "authorids": "~Jiahui_Lei1;~Congyue_Deng1;~Bokui_Shen1;~Leonidas_Guibas1;~Kostas_Daniilidis1", "gender": "M;F;M;M;M", "homepage": "https://www.cis.upenn.edu/~leijh/;https://cs.stanford.edu/~congyue/;http://geometry.stanford.edu/;http://www.cis.upenn.edu/~kostas;https://cs.stanford.edu/~bshen88", "dblp": "252/0106.html;267/5521;g/LeonidasJGuibas;d/KostasDaniilidis;280/3036", "google_scholar": "rX6SwdIAAAAJ;XJZ8UBcAAAAJ;https://scholar.google.com.tw/citations?user=5JlEyTAAAAAJ;dGs2BcIAAAAJ;mOMChFIAAAAJ", "orcid": ";;;0000-0003-0498-0758;0000-0002-8183-3607", "linkedin": ";;;;", "or_profile": "~Jiahui_Lei1;~Congyue_Deng1;~Leonidas_Guibas1;~Kostas_Daniilidis1;~William_B._Shen1", "aff": "University of Pennsylvania;Stanford University;Stanford University;University of Pennsylvania;NVIDIA", "aff_domain": "cis.upenn.edu;stanford.edu;stanford.edu;upenn.edu;nvidia.com", "position": "PhD student;PhD student;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nlei2023nap,\ntitle={{NAP}: Neural 3D Articulated Object Prior},\nauthor={Jiahui Lei and Congyue Deng and Bokui Shen and Leonidas Guibas and Kostas Daniilidis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TTkklyFv7e}\n}", "github": "", "project": "", "reviewers": "ncuk;hNRt;WyGQ;pAau;zyR9;17WX", "pdf_size": 9784471, "rating": "4;6;7;7;7;8", "confidence": "2;4;4;3;3;5", "soundness": "3;3;3;4;3;4", "novelty": "3;2;3;4;3;4", "presentation": "2;3;1;4;2;3", "wc_summary": "61;94;95;58;109;48", "wc_strengths": "95;102;47;63;79;96", "wc_weaknesses": "316;333;163;153;176;126", "wc_questions": "473;189;105;3;48;21", "wc_limitations": "70;5;36;4;7;10", "wc_review": "1015;723;446;281;419;301", "wc_reply_reviewers": "115;74;152;56;49;25", "wc_reply_authors": "845;28;10;54;10;10", "reply_reviewers": "1;1;2;1;1;1", "reply_authors": "3;2;2;2;2;2", "rating_avg": [ 6.5, 1.2583057392117916 ], "confidence_avg": [ 3.5, 0.9574271077563381 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.1666666666666665, 0.6871842709362768 ], "presentation_avg": [ 2.5, 0.9574271077563381 ], "wc_summary_avg": [ 77.5, 22.706460167391423 ], "wc_strengths_avg": [ 80.33333333333333, 19.762478477042183 ], "wc_weaknesses_avg": [ 211.16666666666666, 81.677040157494 ], "wc_questions_avg": [ 139.83333333333334, 161.19803210819776 ], "wc_limitations_avg": [ 22.0, 24.090108620206205 ], "wc_review_avg": [ 530.8333333333334, 260.220686256023 ], "wc_reply_reviewers_avg": [ 78.5, 42.80478945164898 ], "wc_reply_authors_avg": [ 159.5, 306.9700039634709 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.3726779962499649 ], "reply_authors_avg": [ 2.1666666666666665, 0.3726779962499649 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.760885910252682, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16888606862798412319&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cis.upenn.edu;stanford.edu;stanford.edu;upenn.edu;nvidia.com", "author_num": 5, "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "University of Pennsylvania;Stanford University;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.upenn.edu;https://www.stanford.edu;https://www.nvidia.com", "aff_unique_abbr": "UPenn;Stanford;NVIDIA", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Correlative Information Maximization: A Biologically Plausible Approach to Supervised Deep Neural Networks without Weight Symmetry", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71555", "id": "TUGoUNkccV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6dea02c16a492682d66c6f626c306db2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TUGoUNkccV", "openreview": "https://openreview.net/forum?id=TUGoUNkccV", "poster": "/media/PosterPDFs/NeurIPS%202023/71555.png?t=1700777952.5617335", "slides": "https://nips.cc/virtual/2023/poster/71555", "video": "https://nips.cc/virtual/2023/poster/71555", "author_site": "Bariscan Bozkurt, Cengiz Pehlevan, Alper Erdogan", "tldr": "", "abstract": "The backpropagation algorithm has experienced remarkable success in training large-scale artificial neural networks; however, its biological plausibility has been strongly criticized, and it remains an open question whether the brain employs supervised learning mechanisms akin to it. Here, we propose correlative information maximization between layer activations as an alternative normative approach to describe the signal propagation in biological neural networks in both forward and backward directions. This new framework addresses many concerns about the biological-plausibility of conventional artificial neural networks and the backpropagation algorithm. The coordinate descent-based optimization of the corresponding objective, combined with the mean square error loss function for fitting labeled supervision data, gives rise to a neural network structure that emulates a more biologically realistic network of multi-compartment pyramidal neurons with dendritic processing and lateral inhibitory neurons. Furthermore, our approach provides a natural resolution to the weight symmetry problem between forward and backward signal propagation paths, a significant critique against the plausibility of the conventional backpropagation algorithm. This is achieved by leveraging two alternative, yet equivalent forms of the correlative mutual information objective. These alternatives intrinsically lead to forward and backward prediction networks without weight symmetry issues, providing a compelling solution to this long-standing challenge.", "keywords": "Correlative information maximization;Biologically-plausible learning;Multi-compartment neural model", "primary_area": "", "supplementary_material": "/attachment/06472d56b55b6a1a452d9ccf7aae45537718c587.zip", "author": "Bariscan Bozkurt;Cengiz Pehlevan;Alper Tunga Erdogan", "authorids": "~Bariscan_Bozkurt1;~Cengiz_Pehlevan2;~Alper_Tunga_Erdogan1", "gender": "M;;M", "homepage": ";https://pehlevan.seas.harvard.edu/;https://aspc.ku.edu.tr", "dblp": "321/6640;145/3480;46/5196", "google_scholar": "https://scholar.google.com/citations?hl=en;veDLTPEAAAAJ;CW8eBF8AAAAJ", "orcid": ";0000-0001-9767-6063;0000-0003-0876-2897", "linkedin": "bar%C4%B1%C5%9Fcan-bozkurt-436a5610b/;;", "or_profile": "~Bariscan_Bozkurt1;~Cengiz_Pehlevan2;~Alper_Tunga_Erdogan1", "aff": "Ko\u00e7 University;School of Engineering and Applied Sciences, Harvard University;Ko\u00e7 University", "aff_domain": "ku.edu.tr;seas.harvard.edu;ku.edu.tr", "position": "MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nbozkurt2023correlative,\ntitle={Correlative Information Maximization: A Biologically Plausible Approach to Supervised Deep Neural Networks without Weight Symmetry},\nauthor={Bariscan Bozkurt and Cengiz Pehlevan and Alper Tunga Erdogan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TUGoUNkccV}\n}", "github": "", "project": "", "reviewers": "wEH2;K4kq;MCJo;t54y", "pdf_size": 583055, "rating": "4;6;6;7", "confidence": "3;4;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;2;2", "wc_summary": "73;66;37;51", "wc_strengths": "40;173;86;60", "wc_weaknesses": "169;232;165;204", "wc_questions": "295;416;189;207", "wc_limitations": "17;9;8;49", "wc_review": "594;896;485;571", "wc_reply_reviewers": "32;445;287;132", "wc_reply_authors": "0;2392;433;358", "reply_reviewers": "1;5;2;2", "reply_authors": "1;7;3;4", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 56.75, 13.899190623917638 ], "wc_strengths_avg": [ 89.75, 50.75615726195197 ], "wc_weaknesses_avg": [ 192.5, 27.390691849604675 ], "wc_questions_avg": [ 276.75, 89.84535324656473 ], "wc_limitations_avg": [ 20.75, 16.67895380412093 ], "wc_review_avg": [ 636.5, 155.23288955630505 ], "wc_reply_reviewers_avg": [ 224.0, 156.63492586265681 ], "wc_reply_authors_avg": [ 795.75, 936.008112945609 ], "reply_reviewers_avg": [ 2.5, 1.5 ], "reply_authors_avg": [ 3.75, 2.165063509461097 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.2294157338705618, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6239735771105597820&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 7, "email": "ku.edu.tr;seas.harvard.edu;ku.edu.tr", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Ko\u00e7 University;Harvard University", "aff_unique_dep": ";School of Engineering and Applied Sciences", "aff_unique_url": "https://www.ku.edu.tr;https://www.harvard.edu", "aff_unique_abbr": "Ko\u00e7;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0", "aff_country_unique": "T\u00fcrkiye;United States" }, { "title": "Bounce: Reliable High-Dimensional Bayesian Optimization for Combinatorial and Mixed Spaces", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71554", "id": "TVD3wNVH9A", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/05d2175de7ee637588d1b5ced8b15b32-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TVD3wNVH9A", "openreview": "https://openreview.net/forum?id=TVD3wNVH9A", "poster": "/media/PosterPDFs/NeurIPS%202023/71554.png?t=1701704499.691776", "slides": "https://nips.cc/virtual/2023/poster/71554", "video": "https://nips.cc/virtual/2023/poster/71554", "author_site": "Leonard Papenmeier, Luigi Nardi, Matthias Poloczek", "tldr": "", "abstract": "Impactful applications such as materials discovery, hardware design, neural architecture search, or portfolio optimization require optimizing high-dimensional black-box functions with mixed and combinatorial input spaces.\nWhile Bayesian optimization has recently made significant progress in solving such problems, an in-depth analysis reveals that the current state-of-the-art methods are not reliable. \nTheir performances degrade substantially when the unknown optima of the function do not have a certain structure. \nTo fill the need for a reliable algorithm for combinatorial and mixed spaces, this paper proposes Bounce that relies on a novel map of various variable types into nested embeddings of increasing dimensionality.\nComprehensive experiments show that Bounce reliably achieves and often even improves upon state-of-the-art performance on a variety of high-dimensional problems.", "keywords": "Bayesian optimization;global optimization;Gaussian process;combinatorial optimization;high-dimensional", "primary_area": "", "supplementary_material": "/attachment/2e741f9dff6d137e621c0e0dba36698083adee6d.zip", "author": "Leonard Papenmeier;Luigi Nardi;Matthias Poloczek", "authorids": "~Leonard_Papenmeier1;~Luigi_Nardi1;~Matthias_Poloczek1", "gender": "M;M;", "homepage": "https://portal.research.lu.se/en/persons/leonard-papenmeier;;", "dblp": ";60/7206;13/9649", "google_scholar": "85BUIRcAAAAJ;https://scholar.google.it/citations?user=Kgs3zQoAAAAJ;g5BRMkoAAAAJ", "orcid": "0000-0001-9338-1567;0000-0002-4601-2264;", "linkedin": "leonard-papenmeier-a90a60135;nardiluigi/;", "or_profile": "~Leonard_Papenmeier1;~Luigi_Nardi1;~Matthias_Poloczek1", "aff": "Lund University;Stanford University;Amazon", "aff_domain": "lu.se;stanford.edu;amazon.com", "position": "PhD student;Researcher;Principal Researcher", "bibtex": "@inproceedings{\npapenmeier2023bounce,\ntitle={Bounce: Reliable High-Dimensional Bayesian Optimization for Combinatorial and Mixed Spaces},\nauthor={Leonard Papenmeier and Luigi Nardi and Matthias Poloczek},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TVD3wNVH9A}\n}", "github": "", "project": "", "reviewers": "RWfy;FW4t;HPPy;nHAf;ACyR", "pdf_size": 3194795, "rating": "6;7;7;7;7", "confidence": "4;4;4;4;4", "soundness": "3;3;3;4;3", "novelty": "3;3;3;3;3", "presentation": "3;3;2;4;3", "wc_summary": "75;119;150;89;52", "wc_strengths": "55;76;48;104;52", "wc_weaknesses": "283;79;61;5;198", "wc_questions": "10;404;180;90;111", "wc_limitations": "12;47;1;1;1", "wc_review": "435;725;440;289;414", "wc_reply_reviewers": "54;39;0;25;54", "wc_reply_authors": "0;0;0;0;76", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 97.0, 34.25200724045235 ], "wc_strengths_avg": [ 67.0, 20.8806130178211 ], "wc_weaknesses_avg": [ 125.2, 100.90074330747024 ], "wc_questions_avg": [ 159.0, 133.96417431537435 ], "wc_limitations_avg": [ 12.4, 17.81684596105607 ], "wc_review_avg": [ 460.6, 143.25166665697122 ], "wc_reply_reviewers_avg": [ 34.4, 20.303694245136768 ], "wc_reply_authors_avg": [ 15.2, 30.400000000000002 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=333560853050012715&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "lu.se;stanford.edu;amazon.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Lund University;Stanford University;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.lunduniversity.lu.se;https://www.stanford.edu;https://www.amazon.com", "aff_unique_abbr": "LU;Stanford;Amazon", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Sweden;United States" }, { "title": "Fair Streaming Principal Component Analysis: Statistical and Algorithmic Viewpoint", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71553", "id": "TW3ipYdDQG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1074541383db5ef12d6ac66d2f8e8d34-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TW3ipYdDQG", "openreview": "https://openreview.net/forum?id=TW3ipYdDQG", "poster": "/media/PosterPDFs/NeurIPS%202023/71553.png?t=1701694119.8682933", "slides": "https://nips.cc/virtual/2023/poster/71553", "video": "https://nips.cc/virtual/2023/poster/71553", "author_site": "Junghyun Lee, Hanseul Cho, Se-Young Yun, Chulhee Yun", "tldr": "", "abstract": "Fair Principal Component Analysis (PCA) is a problem setting where we aim to perform PCA while making the resulting representation fair in that the projected distributions, conditional on the sensitive attributes, match one another. However, existing approaches to fair PCA have two main problems: theoretically, there has been no statistical foundation of fair PCA in terms of learnability; practically, limited memory prevents us from using existing approaches, as they explicitly rely on full access to the entire data. On the theoretical side, we rigorously formulate fair PCA using a new notion called probably approximately fair and optimal (PAFO) learnability. On the practical side, motivated by recent advances in streaming algorithms for addressing memory limitation, we propose a new setting called fair streaming PCA along with a memory-efficient algorithm, fair noisy power method (FNPM). We then provide its statistical guarantee in terms of PAFO-learnability, which is the first of its kind in fair PCA literature. We verify our algorithm in the CelebA dataset without any pre-processing; while the existing approaches are inapplicable due to memory limitations, by turning it into a streaming setting, we show that our algorithm performs fair PCA efficiently and effectively.", "keywords": "streaming;PCA;memory-limited;fair representation;online learning", "primary_area": "", "supplementary_material": "", "author": "Junghyun Lee;Hanseul Cho;Se-Young Yun;Chulhee Yun", "authorids": "~Junghyun_Lee1;~Hanseul_Cho1;~Se-Young_Yun1;~Chulhee_Yun1", "gender": "M;M;M;M", "homepage": "https://nick-jhlee.github.io/;https://hanseuljo.github.io/;https://fbsqkd.github.io;https://chulheeyun.github.io/", "dblp": ";233/5755-2;23/8862;138/0148.html", "google_scholar": "sYtSnnQAAAAJ;IczOXwsAAAAJ;X_IAjb8AAAAJ;Ukl64ggAAAAJ", "orcid": "0000-0002-3898-6464;0009-0001-0410-0290;;", "linkedin": "junghyun-nick-lee-89799a197/;hanseul-cho-66b01a260/;seyoung-yun-395130ab/;", "or_profile": "~Junghyun_Lee1;~Hanseul_Cho1;~Se-Young_Yun1;~Chulhee_Yun1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;KAIST;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "MS student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nlee2023fair,\ntitle={Fair Streaming Principal Component Analysis: Statistical and Algorithmic Viewpoint},\nauthor={Junghyun Lee and Hanseul Cho and Se-Young Yun and Chulhee Yun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TW3ipYdDQG}\n}", "github": "", "project": "", "reviewers": "SSmx;GuCQ;6AMc;VcC6", "pdf_size": 0, "rating": "4;6;6;7", "confidence": "4;3;4;2", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "141;53;177;150", "wc_strengths": "10;25;156;75", "wc_weaknesses": "23;89;220;85", "wc_questions": "1;33;4;2", "wc_limitations": "1;8;17;1", "wc_review": "176;208;574;313", "wc_reply_reviewers": "11;120;12;0", "wc_reply_authors": "0;647;26;0", "reply_reviewers": "1;2;1;0", "reply_authors": "1;3;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 130.25, 46.5262022950509 ], "wc_strengths_avg": [ 66.5, 57.002192940272046 ], "wc_weaknesses_avg": [ 104.25, 71.76829035165879 ], "wc_questions_avg": [ 10.0, 13.322912594474229 ], "wc_limitations_avg": [ 6.75, 6.5717197140474575 ], "wc_review_avg": [ 317.75, 156.38474190278282 ], "wc_reply_reviewers_avg": [ 35.75, 48.86908531986249 ], "wc_reply_authors_avg": [ 168.25, 276.61017244490483 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7608859102526822, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1366228902919540583&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Beyond Uniform Sampling: Offline Reinforcement Learning with Imbalanced Datasets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71552", "id": "TW99HrZCJU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0ff3502bb29570b219967278db150a50-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TW99HrZCJU", "openreview": "https://openreview.net/forum?id=TW99HrZCJU", "poster": "/media/PosterPDFs/NeurIPS%202023/71552.png?t=1702221711.9364517", "slides": "https://nips.cc/virtual/2023/poster/71552", "video": "https://nips.cc/virtual/2023/poster/71552", "author_site": "Zhang-Wei Hong, Aviral Kumar, Sathwik Karnik, Abhishek Bhandwaldar, Akash Srivastava, Joni Pajarinen, Romain Laroche, Abhishek Gupta, Pulkit Agrawal", "tldr": "", "abstract": "Offline reinforcement learning (RL) enables learning a decision-making policy without interaction with the environment. This makes it particularly beneficial in situations where such interactions are costly. \nHowever, a known challenge for offline RL algorithms is the distributional mismatch between the state-action distributions of the learned policy and the dataset, which can significantly impact performance. State-of-the-art algorithms address it by constraining the policy to align with the state-action pairs in the dataset. However, this strategy struggles on datasets that predominantly consist of trajectories collected by low-performing policies and only a few trajectories from high-performing ones. Indeed, the constraint to align with the data leads the policy to imitate low-performing behaviors predominating the dataset. Our key insight to address this issue is to constrain the policy to the policy that collected the good parts of the dataset rather than all data. To this end, we optimize the importance sampling weights to emulate sampling data from a data distribution generated by a nearly optimal policy. Our method exhibits considerable performance gains (up to five times better) over the existing approaches in state-of-the-art offline RL algorithms over 72 imbalanced datasets with varying types of imbalance.", "keywords": "offline reinforcement learning;reinforcement learning;sampling;experience replay", "primary_area": "", "supplementary_material": "/attachment/23c31eead980d0e285733884e92ec8b20d8b76bd.zip", "author": "Zhang-Wei Hong;Aviral Kumar;Sathwik Karnik;Abhishek Bhandwaldar;Akash Srivastava;Joni Pajarinen;Romain Laroche;Abhishek Gupta;Pulkit Agrawal", "authorids": "~Zhang-Wei_Hong1;~Aviral_Kumar2;~Sathwik_Karnik1;~Abhishek_Bhandwaldar1;~Akash_Srivastava1;~Joni_Pajarinen2;~Romain_Laroche1;~Abhishek_Gupta1;~Pulkit_Agrawal1", "gender": "M;M;;M;M;;M;M;M", "homepage": ";https://aviralkumar2907.github.io/;;;http://akashgit.github.io;;https://www.researchgate.net/profile/Romain_Laroche;https://homes.cs.washington.edu/~abhgupta/;https://people.eecs.berkeley.edu/~pulkitag/", "dblp": "198/0600;202/7961;;;24/9528;23/8355;65/9019;18/6404-4;149/2672", "google_scholar": "GZkyN4cAAAAJ;;;lV0gYnkAAAAJ;https://scholar.google.co.uk/citations?user=2h6SZeEAAAAJ;https://scholar.google.fi/citations?user=-2fJStwAAAAJ;RiIOKJMAAAAJ;1wLVDP4AAAAJ;UpZmJI0AAAAJ", "orcid": ";;;;;0000-0003-4469-8191;;;", "linkedin": ";;sathwik-karnik-a72768172/;abhishek-bhandwaldar/;https://uk.linkedin.com/in/akash-srivastava-aa97361b;;romain-laroche-6282397/?originalSubdomain=ca;;", "or_profile": "~Zhang-Wei_Hong1;~Aviral_Kumar2;~Sathwik_Karnik1;~Abhishek_Bhandwaldar1;~Akash_Srivastava1;~Joni_Pajarinen2;~Romain_Laroche1;~Abhishek_Gupta1;~Pulkit_Agrawal1", "aff": "Massachusetts Institute of Technology;University of California, Berkeley;Computer Science and Artificial Intelligence Laboratory, Electrical Engineering & Computer Science;International Business Machines;MIT-IBM Watson AI Research Lab;Aalto University;Microsoft;University of Washington;Massachusetts Institute of Technology", "aff_domain": "mit.edu;berkeley.edu;csail.mit.edu;ibm.com;ibm.com;aalto.fi;microsoft.com;uw.edu;mit.edu", "position": "PhD student;PhD student;MS student;Researcher;Research Scientist;Assistant Professor;Principal Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nhong2023beyond,\ntitle={Beyond Uniform Sampling: Offline Reinforcement Learning with Imbalanced Datasets},\nauthor={Zhang-Wei Hong and Aviral Kumar and Sathwik Karnik and Abhishek Bhandwaldar and Akash Srivastava and Joni Pajarinen and Romain Laroche and Abhishek Gupta and Pulkit Agrawal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TW99HrZCJU}\n}", "github": "", "project": "", "reviewers": "XiFQ;qcFR;yys4;WKvr", "pdf_size": 1580009, "rating": "6;6;7;7", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "4;3;3;3", "wc_summary": "59;43;79;235", "wc_strengths": "25;53;67;78", "wc_weaknesses": "163;48;24;140", "wc_questions": "269;81;197;66", "wc_limitations": "85;56;1;2", "wc_review": "601;281;368;521", "wc_reply_reviewers": "104;19;57;10", "wc_reply_authors": "87;21;28;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 104.0, 76.70071707617863 ], "wc_strengths_avg": [ 55.75, 19.84155991851447 ], "wc_weaknesses_avg": [ 93.75, 58.933755183256395 ], "wc_questions_avg": [ 153.25, 83.8819855511301 ], "wc_limitations_avg": [ 36.0, 35.993054885630364 ], "wc_review_avg": [ 442.75, 125.41605758434604 ], "wc_reply_reviewers_avg": [ 47.5, 37.08436328157732 ], "wc_reply_authors_avg": [ 34.0, 32.28776858192588 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10488202739863718054&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mit.edu;berkeley.edu;csail.mit.edu;ibm.com;ibm.com;aalto.fi;microsoft.com;uw.edu;mit.edu", "author_num": 9, "aff_unique_index": "0;1;0;2;0;3;4;5;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of California, Berkeley;International Business Machines Corporation;Aalto University;Microsoft;University of Washington", "aff_unique_dep": ";;;;Microsoft Corporation;", "aff_unique_url": "https://web.mit.edu;https://www.berkeley.edu;https://www.ibm.com;https://www.aalto.fi;https://www.microsoft.com;https://www.washington.edu", "aff_unique_abbr": "MIT;UC Berkeley;IBM;Aalto;Microsoft;UW", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Berkeley;Cambridge", "aff_country_unique_index": "0;0;0;0;0;1;0;0;0", "aff_country_unique": "United States;Finland" }, { "title": "Improved Algorithms for Stochastic Linear Bandits Using Tail Bounds for Martingale Mixtures", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71551", "id": "TXoZiUZywf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8db0d67d22e0ec08c95b810be3a66907-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TXoZiUZywf", "openreview": "https://openreview.net/forum?id=TXoZiUZywf", "poster": "/media/PosterPDFs/NeurIPS%202023/71551.png?t=1701947782.776399", "slides": "https://nips.cc/virtual/2023/poster/71551", "video": "https://nips.cc/virtual/2023/poster/71551", "author_site": "Hamish Flynn, David Reeb, Melih Kandemir, Jan Peters", "tldr": "", "abstract": "We present improved algorithms with worst-case regret guarantees for the stochastic linear bandit problem. The widely used \"optimism in the face of uncertainty\" principle reduces a stochastic bandit problem to the construction of a confidence sequence for the unknown reward function. The performance of the resulting bandit algorithm depends on the size of the confidence sequence, with smaller confidence sets yielding better empirical performance and stronger regret guarantees. In this work, we use a novel tail bound for adaptive martingale mixtures to construct confidence sequences which are suitable for stochastic bandits. These confidence sequences allow for efficient action selection via convex programming. We prove that a linear bandit algorithm based on our confidence sequences is guaranteed to achieve competitive worst-case regret. We show that our confidence sequences are tighter than competitors, both empirically and theoretically. Finally, we demonstrate that our tighter confidence sequences give improved performance in several hyperparameter tuning tasks.", "keywords": "Linear bandits;confidence sequences;martingales;convex optimization;cumulative regret;regret analysis", "primary_area": "", "supplementary_material": "", "author": "Hamish Flynn;David Reeb;Melih Kandemir;Jan Peters", "authorids": "~Hamish_Flynn1;~David_Reeb2;~Melih_Kandemir1;~Jan_Peters3", "gender": ";M;M;M", "homepage": ";https://www.bosch-ai.com/about-us/our-people/;https://imada.sdu.dk/~kandemir/;https://www.jan-peters.net", "dblp": ";129/1561;95/7056;p/JanPeters1", "google_scholar": ";https://scholar.google.com/citations?hl=en;Jxm1UeYAAAAJ;https://scholar.google.de/citations?user=-kIVAcAAAAAJ", "orcid": ";;0000-0001-6293-3656;0000-0002-5266-8091", "linkedin": ";;melih-kandemir-64681a16/;janrpeters/", "or_profile": "~Hamish_Flynn1;~David_Reeb2;~Melih_Kandemir1;~Jan_Peters3", "aff": ";Robert Bosch GmbH, Bosch;University of Southern Denmark;TU Darmstadt", "aff_domain": ";de.bosch.com;sdu.dk;tu-darmstadt.de", "position": ";Research Scientist;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nflynn2023improved,\ntitle={Improved Algorithms for Stochastic Linear Bandits Using Tail Bounds for Martingale Mixtures},\nauthor={Hamish Flynn and David Reeb and Melih Kandemir and Jan Peters},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TXoZiUZywf}\n}", "github": "", "project": "", "reviewers": "LyTZ;Cd9x;qEDN;uqhQ", "pdf_size": 606098, "rating": "7;7;7;8", "confidence": "3;4;4;4", "soundness": "3;4;4;4", "novelty": "3;3;4;3", "presentation": "3;4;4;4", "wc_summary": "175;172;64;77", "wc_strengths": "66;83;160;26", "wc_weaknesses": "73;414;202;701", "wc_questions": "33;2;24;17", "wc_limitations": "6;1;38;39", "wc_review": "353;672;488;860", "wc_reply_reviewers": "92;24;34;40", "wc_reply_authors": "29;10;11;13", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 122.0, 51.71556825560365 ], "wc_strengths_avg": [ 83.75, 48.64347335460329 ], "wc_weaknesses_avg": [ 347.5, 237.64732272844986 ], "wc_questions_avg": [ 19.0, 11.335784048754634 ], "wc_limitations_avg": [ 21.0, 17.592612085759182 ], "wc_review_avg": [ 593.25, 191.1509547451961 ], "wc_reply_reviewers_avg": [ 47.5, 26.320144376503713 ], "wc_reply_authors_avg": [ 15.75, 7.725768570181222 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14998501150198693228&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": ";de.bosch.com;sdu.dk;tu-darmstadt.de", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Robert Bosch GmbH;University of Southern Denmark;Technische Universit\u00e4t Darmstadt", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bosch.com;https://www.sdu.dk;https://www.tu-darmstadt.de", "aff_unique_abbr": "Bosch;SDU;TU Darmstadt", "aff_campus_unique_index": "1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;Denmark" }, { "title": "A Variational Perspective on High-Resolution ODEs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71550", "id": "TXq8PCRSoY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0569458210c88d8db2985799da830d27-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TXq8PCRSoY", "openreview": "https://openreview.net/forum?id=TXq8PCRSoY", "poster": "/media/PosterPDFs/NeurIPS%202023/71550.png?t=1701418231.0120046", "slides": "https://nips.cc/virtual/2023/poster/71550", "video": "https://nips.cc/virtual/2023/poster/71550", "author_site": "Hoomaan Maskan, Konstantinos Zygalakis, Alp Yurtsever", "tldr": "", "abstract": "We consider unconstrained minimization of smooth convex functions. We propose a novel variational perspective using forced Euler-Lagrange equation that allows for studying high-resolution ODEs. Through this, we obtain a faster convergence rate for gradient norm minimization using Nesterov's accelerated gradient method. Additionally, we show that Nesterov's method can be interpreted as a rate-matching discretization of an appropriately chosen high-resolution ODE. Finally, using the results from the new variational perspective, we propose a stochastic method for noisy gradients. Several numerical experiments compare and illustrate our stochastic algorithm with state of the art methods.", "keywords": "Nesterov's accelerated gradient;gradient descent;Lyapunov function;gradient norm minimization;rate-matching;stochastic variance reduction;stochastic gradient descent;noisy gradient", "primary_area": "", "supplementary_material": "/attachment/7e43a2f6c38ad66e9c20c891de72a9083fb890bc.zip", "author": "Hoomaan Maskan;Konstantinos C. Zygalakis;Alp Yurtsever", "authorids": "~Hoomaan_Maskan1;~Konstantinos_C._Zygalakis1;~Alp_Yurtsever2", "gender": "M;M;M", "homepage": ";;https://alpyurtsever.github.io", "dblp": "228/8319;80/9576;174/9621", "google_scholar": "https://scholar.google.com.au/citations?hl=en;;wa_n-xYAAAAJ", "orcid": "0000-0001-8251-2605;;", "linkedin": ";;", "or_profile": "~Hoomaan_Maskan1;~Konstantinos_C._Zygalakis1;~Alp_Yurtsever2", "aff": "Umea University;University of Edinburgh;Ume\u00e5 University", "aff_domain": "umu.se;ed.ac.uk;umu.se", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nmaskan2023a,\ntitle={A Variational Perspective on High-Resolution {ODE}s},\nauthor={Hoomaan Maskan and Konstantinos C. Zygalakis and Alp Yurtsever},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TXq8PCRSoY}\n}", "github": "", "project": "", "reviewers": "ZUpb;rs2m;A9z2;CWtz", "pdf_size": 415012, "rating": "6;7;7;7", "confidence": "1;3;2;2", "soundness": "2;4;4;3", "novelty": "2;4;3;3", "presentation": "2;3;3;2", "wc_summary": "101;22;79;88", "wc_strengths": "55;76;110;88", "wc_weaknesses": "72;47;319;83", "wc_questions": "76;1;67;54", "wc_limitations": "1;8;46;23", "wc_review": "305;154;621;336", "wc_reply_reviewers": "154;0;135;115", "wc_reply_authors": "0;0;136;0", "reply_reviewers": "1;0;3;1", "reply_authors": "1;1;3;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 2.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 72.5, 30.18691769624716 ], "wc_strengths_avg": [ 82.25, 19.904459299363044 ], "wc_weaknesses_avg": [ 130.25, 109.75284734347441 ], "wc_questions_avg": [ 49.5, 29.073183520213263 ], "wc_limitations_avg": [ 19.5, 17.240939649566666 ], "wc_review_avg": [ 354.0, 168.82979594846404 ], "wc_reply_reviewers_avg": [ 101.0, 59.92078103629825 ], "wc_reply_authors_avg": [ 34.0, 58.88972745734183 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14922122565985759215&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "umu.se;ed.ac.uk;umu.se", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Ume\u00e5 University;University of Edinburgh", "aff_unique_dep": ";", "aff_unique_url": "https://www.umu.se;https://www.ed.ac.uk", "aff_unique_abbr": "UMU;Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Sweden;United Kingdom" }, { "title": "MIM4DD: Mutual Information Maximization for Dataset Distillation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71549", "id": "TZtw5YgxTE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/24d36eee157559e0d2549455fba28f6a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TZtw5YgxTE", "openreview": "https://openreview.net/forum?id=TZtw5YgxTE", "poster": "/media/PosterPDFs/NeurIPS%202023/71549.png?t=1697475872.6624749", "slides": "https://nips.cc/virtual/2023/poster/71549", "video": "https://nips.cc/virtual/2023/poster/71549", "author_site": "Yuzhang Shang, Zhihang Yuan, Yan Yan", "tldr": "", "abstract": "Dataset distillation (DD) aims to synthesize a small dataset whose test performance is comparable to a full dataset using the same model. State-of-the-art (SoTA) methods optimize synthetic datasets primarily by matching heuristic indicators extracted from two networks: one from real data and one from synthetic data (see Fig.1, Left), such as gradients and training trajectories. DD is essentially a compression problem that emphasizes on maximizing the preservation of information contained in the data. We argue that well-defined metrics which measure the amount of shared information between variables in information theory are necessary for success measurement, but are never considered by previous works. Thus, we introduce mutual information (MI) as the metric to quantify the shared information between the synthetic and the real datasets, and devise MIM4DD numerically maximizing the MI via a newly designed optimizable objective within a contrastive learning framework to update the synthetic dataset. Specifically, we designate the samples in different datasets who share the same labels as positive pairs, and vice versa negative pairs. Then we respectively pull and push those samples in positive and negative pairs into contrastive space via minimizing NCE loss. As a result, the targeted MI can be transformed into a lower bound represented by feature maps of samples, which is numerically feasible. Experiment results show that MIM4DD can be implemented as an add-on module to existing SoTA DD methods.", "keywords": "Dataset Distillation", "primary_area": "", "supplementary_material": "/attachment/a14f40206c443830e979199c431e996a9b8b740a.pdf", "author": "Yuzhang Shang;Zhihang Yuan;Yan Yan", "authorids": "~Yuzhang_Shang1;~Zhihang_Yuan1;~Yan_Yan6", "gender": "M;M;M", "homepage": "https://42shawn.github.io/;http://zhihang.cc;", "dblp": "300/8483;195/4180;13/3953-2", "google_scholar": "6ZPL5E0AAAAJ;https://scholar.google.ca/citations?user=iipYHLoAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yuzhang_Shang1;~Zhihang_Yuan1;~Yan_Yan6", "aff": "Illinois Institute of Technology;Houmo AI;", "aff_domain": "iit.edu;houmo.ai;", "position": "PhD student;Researcher;", "bibtex": "@inproceedings{\nshang2023mimdd,\ntitle={{MIM}4{DD}: Mutual Information Maximization for Dataset Distillation},\nauthor={Yuzhang Shang and Zhihang Yuan and Yan Yan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TZtw5YgxTE}\n}", "github": "", "project": "", "reviewers": "BhBC;7GDB;pJjQ;sR5S", "pdf_size": 1347324, "rating": "5;6;6;7", "confidence": "4;4;5;4", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "84;49;73;90", "wc_strengths": "87;60;50;126", "wc_weaknesses": "100;38;94;60", "wc_questions": "35;17;58;156", "wc_limitations": "24;2;2;4", "wc_review": "330;166;277;436", "wc_reply_reviewers": "297;19;302;76", "wc_reply_authors": "982;42;621;43", "reply_reviewers": "2;1;3;1", "reply_authors": "5;2;4;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.0, 15.668439615992398 ], "wc_strengths_avg": [ 80.75, 29.422567868899545 ], "wc_weaknesses_avg": [ 73.0, 25.317977802344327 ], "wc_questions_avg": [ 66.5, 53.67727638395972 ], "wc_limitations_avg": [ 8.0, 9.273618495495704 ], "wc_review_avg": [ 302.25, 97.28919518631038 ], "wc_reply_reviewers_avg": [ 173.5, 127.61367481582842 ], "wc_reply_authors_avg": [ 422.0, 400.38793688122024 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7579656180875020603&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 7, "email": "iit.edu;houmo.ai;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Illinois Institute of Technology;Houmo AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.iit.edu;https://www.houmo.ai", "aff_unique_abbr": "IIT;Houmo AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "title": "Fast Trainable Projection for Robust Fine-tuning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71548", "id": "Tb7np0MInj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/259e59fe23ebd09252647fed42949182-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Tb7np0MInj", "openreview": "https://openreview.net/forum?id=Tb7np0MInj", "poster": "/media/PosterPDFs/NeurIPS%202023/71548.png?t=1702046369.270024", "slides": "https://nips.cc/virtual/2023/poster/71548", "video": "https://nips.cc/virtual/2023/poster/71548", "author_site": "Junjiao Tian, Yen-Cheng Liu, James S Smith, Zsolt Kira", "tldr": "", "abstract": "Robust fine-tuning aims to achieve competitive in-distribution (ID) performance while maintaining the out-of-distribution (OOD) robustness of a pre-trained model when transferring it to a downstream task. Recently, projected gradient descent has been successfully used in robust fine-tuning by constraining the deviation from the initialization of the fine-tuned model explicitly through projection. However, algorithmically, two limitations prevent this method from being adopted more widely, scalability and efficiency. In this paper, we propose a new projection-based fine-tuning algorithm, Fast Trainable Projection (FTP) for computationally efficient learning of per-layer projection constraints, resulting in an average 35% speedup on our benchmarks compared to prior works. FTP can be combined with existing optimizers such as AdamW, and be used in a plug-and-play fashion. Finally, we show that FTP is a special instance of hyper-optimizers that tune the hyper-parameters of optimizers in a learnable manner through nested differentiation. Empirically, we show superior robustness on OOD datasets, including domain shifts and natural corruptions, across four different vision tasks with five different pre-trained models. Additionally, we demonstrate that FTP is broadly applicable and beneficial to other learning scenarios such as low-label and continual learning settings thanks to its easy adaptability. The code will be available at https://github.com/GT-RIPL/FTP.git.", "keywords": "fine-tuning;transfer learning;regularization", "primary_area": "", "supplementary_material": "/attachment/1b0594e7f007c36981f66d9927b257071d7eac2b.zip", "author": "Junjiao Tian;Yen-Cheng Liu;James Smith;Zsolt Kira", "authorids": "~Junjiao_Tian1;~Yen-Cheng_Liu1;~James_Smith1;~Zsolt_Kira1", "gender": "M;;M;M", "homepage": ";https://ycliu93.github.io/;https://jamessealesmith.github.io/;https://faculty.cc.gatech.edu/~zk15", "dblp": "246/3115.htm;29/7584;317/5043;36/4127", "google_scholar": "iHZD850AAAAJ;yeAeAhsAAAAJ;rT52aN8AAAAJ;2a5XgNAAAAAJ", "orcid": ";;0000-0001-9210-0161;0000-0002-2626-2004", "linkedin": ";;jamessealesmith/;", "or_profile": "~Junjiao_Tian1;~Yen-Cheng_Liu1;~James_Smith1;~Zsolt_Kira1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Tech Research Institute", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;gtri.gatech.edu", "position": "PhD student;PhD student;PhD student;Senior Research Scientist", "bibtex": "@inproceedings{\ntian2023fast,\ntitle={Fast Trainable Projection for Robust Fine-tuning},\nauthor={Junjiao Tian and Yen-Cheng Liu and James Smith and Zsolt Kira},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Tb7np0MInj}\n}", "github": "", "project": "", "reviewers": "yjP3;wzxc;2kDG;HDd6;FZ4t", "pdf_size": 4129340, "rating": "5;5;5;5;6", "confidence": "5;2;4;5;4", "soundness": "3;3;2;3;3", "novelty": "3;3;2;2;2", "presentation": "3;3;3;3;3", "wc_summary": "63;74;135;215;55", "wc_strengths": "47;34;93;72;63", "wc_weaknesses": "94;79;260;214;131", "wc_questions": "65;19;76;22;57", "wc_limitations": "2;6;1;1;6", "wc_review": "271;212;565;524;312", "wc_reply_reviewers": "13;0;55;57;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 4.0, 1.0954451150103321 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 108.4, 60.278022528944994 ], "wc_strengths_avg": [ 61.8, 20.350921355064 ], "wc_weaknesses_avg": [ 155.6, 70.11019897276002 ], "wc_questions_avg": [ 47.8, 23.11190169587955 ], "wc_limitations_avg": [ 3.2, 2.3151673805580453 ], "wc_review_avg": [ 376.8, 141.16571821798664 ], "wc_reply_reviewers_avg": [ 25.0, 25.760434778939583 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14309633076304018271&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "gatech.edu;gatech.edu;gatech.edu;gtri.gatech.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Georgia Institute of Technology;Georgia Tech Research Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.gtri.gatech.edu", "aff_unique_abbr": "Georgia Tech;GTRI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "BERT Lost Patience Won't Be Robust to Adversarial Slowdown", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71547", "id": "TcG8jhOPdv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c50a537060022ba5fc3d6a856625b664-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TcG8jhOPdv", "openreview": "https://openreview.net/forum?id=TcG8jhOPdv", "poster": "/media/PosterPDFs/NeurIPS%202023/71547.png?t=1700167301.5787725", "slides": "https://nips.cc/virtual/2023/poster/71547", "video": "https://nips.cc/virtual/2023/poster/71547", "author_site": "Zachary Coalson, Gabriel Ritter, Rakesh Bobba, Sanghyun Hong", "tldr": "", "abstract": "In this paper, we systematically evaluate the robustness of multi-exit language models against adversarial slowdown. To audit their robustness, we design a slowdown attack that generates natural adversarial text bypassing early-exit points. We use the resulting WAFFLE attack as a vehicle to conduct a comprehensive evaluation of three multi-exit mechanisms with the GLUE benchmark against adversarial slowdown. We then show our attack significantly reduces the computational savings provided by the three methods in both white-box and black-box settings. The more complex a mechanism is, the more vulnerable it is to adversarial slowdown. We also perform a linguistic analysis of the perturbed text inputs, identifying common perturbation patterns that our attack generates, and comparing them with standard adversarial text attacks. Moreover, we show that adversarial training is ineffective in defeating our slowdown attack, but input sanitization with a conversational model, e.g., ChatGPT, can remove perturbations effectively. This result suggests that future work is needed for developing efficient yet robust multi-exit models. Our code is available at: https://github.com/ztcoalson/WAFFLE", "keywords": "Efficient Methods for NLP; Multi-exit Language Models; Adversarial Slowdown", "primary_area": "", "supplementary_material": "", "author": "Zachary Coalson;Gabriel Ritter;Rakesh B Bobba;Sanghyun Hong", "authorids": "~Zachary_Coalson1;ritterg@oregonstate.edu;~Rakesh_B_Bobba1;~Sanghyun_Hong1", "gender": "M;;M;M", "homepage": "https://zachcoalson.com;;https://engineering.oregonstate.edu/people/rakesh-bobba;http://www.sanghyun-hong.com", "dblp": "359/5720;;https://dblp.uni-trier.de/pers/hd/b/Bobba:Rakesh;135/8991", "google_scholar": "AFAZZgkAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en", "orcid": "0009-0006-0821-3432;;0000-0002-5440-0785;", "linkedin": "zach-coalson/;;rakeshbobba/;", "or_profile": "~Zachary_Coalson1;ritterg@oregonstate.edu;~Rakesh_B_Bobba1;~Sanghyun_Hong1", "aff": "Oregon State University;;Oregon State University;Oregon State University", "aff_domain": "oregonstate.edu;;oregonstate.edu;oregonstate.edu", "position": "Undergrad student;;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ncoalson2023bert,\ntitle={{BERT} Lost Patience Won't Be Robust to Adversarial Slowdown},\nauthor={Zachary Coalson and Gabriel Ritter and Rakesh B Bobba and Sanghyun Hong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TcG8jhOPdv}\n}", "github": "", "project": "", "reviewers": "9ThA;69Mr;KooQ;Lh1p", "pdf_size": 374587, "rating": "5;6;6;7", "confidence": "4;4;4;2", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "79;55;115;26", "wc_strengths": "4;96;73;12", "wc_weaknesses": "9;178;33;65", "wc_questions": "91;40;87;16", "wc_limitations": "1;31;11;6", "wc_review": "184;400;319;125", "wc_reply_reviewers": "78;10;94;12", "wc_reply_authors": "47;44;225;0", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 68.75, 32.63721035873011 ], "wc_strengths_avg": [ 46.25, 39.20698279643564 ], "wc_weaknesses_avg": [ 71.25, 64.75482607497298 ], "wc_questions_avg": [ 58.5, 31.68990375498165 ], "wc_limitations_avg": [ 12.25, 11.388041973930374 ], "wc_review_avg": [ 257.0, 108.45044951497435 ], "wc_reply_reviewers_avg": [ 48.5, 37.93085815006035 ], "wc_reply_authors_avg": [ 79.0, 86.32207133752063 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10881637976532717390&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "email": "oregonstate.edu;;oregonstate.edu;oregonstate.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Oregon State University", "aff_unique_dep": "", "aff_unique_url": "https://oregonstate.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "L-CAD: Language-based Colorization with Any-level Descriptions using Diffusion Priors", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71546", "id": "TcmjewOAd1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f3bfbd65743e60c685a3845bd61ce15f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TcmjewOAd1", "openreview": "https://openreview.net/forum?id=TcmjewOAd1", "poster": "/media/PosterPDFs/NeurIPS%202023/71546.png?t=1699601862.1792104", "slides": "https://nips.cc/virtual/2023/poster/71546", "video": "https://nips.cc/virtual/2023/poster/71546", "author_site": "zheng chang, Shuchen Weng, Peixuan Zhang, Yu Li, Si Li, Boxin Shi", "tldr": "", "abstract": "Language-based colorization produces plausible and visually pleasing colors under the guidance of user-friendly natural language descriptions. Previous methods implicitly assume that users provide comprehensive color descriptions for most of the objects in the image, which leads to suboptimal performance. In this paper, we propose a unified model to perform language-based colorization with any-level descriptions. We leverage the pretrained cross-modality generative model for its robust language understanding and rich color priors to handle the inherent ambiguity of any-level descriptions. We further design modules to align with input conditions to preserve local spatial structures and prevent the ghosting effect. With the proposed novel sampling strategy, our model achieves instance-aware colorization in diverse and complex scenarios. Extensive experimental results demonstrate our advantages of effectively handling any-level descriptions and outperforming both language-based and automatic colorization methods. The code and pretrained models\nare available at: https://github.com/changzheng123/L-CAD.", "keywords": "Colorization;Language-based generation;Diffusion model", "primary_area": "", "supplementary_material": "/attachment/ea25d9bfc96c3d838c5aa5d229474863f0fe63b0.pdf", "author": "Zheng Chang;Shuchen Weng;Peixuan Zhang;Yu Li;Si Li;Boxin Shi", "authorids": "~Zheng_Chang2;~Shuchen_Weng1;~Peixuan_Zhang1;~Yu_Li4;~Si_Li5;~Boxin_Shi3", "gender": "M;M;M;M;;M", "homepage": "https://changzheng123.github.io/;https://shuchenweng.github.io/;https://github.com/zpx0922;https://yu-li.github.io/;http://www.pris.net.cn/introduction/teacher/lisi;http://camera.pku.edu.cn", "dblp": ";220/4303;;34/2997-3;54/6603-1.html;69/783", "google_scholar": "VW9pyxEAAAAJ;-5qVEQsAAAAJ;;j9lwU7kAAAAJ;;K1LjZxcAAAAJ", "orcid": ";0000-0003-0777-5055;;;;0000-0001-6749-0364", "linkedin": ";;;;;", "or_profile": "~Zheng_Chang2;~Shuchen_Weng1;~Peixuan_Zhang1;~Yu_Li4;~Si_Li5;~Boxin_Shi3", "aff": "Beijing University of Posts and Telecommunications;Peking University;Beijing University of Posts and Telecommunications;International Digital Economy Academy;Beijing University of Posts and Telecommunications;Peking University", "aff_domain": "bupt.edu.cn;pku.edu.cn;bupt.edu.cn;idea.edu.cn;bupt.edu.cn;pku.edu.cn", "position": "PhD student;PhD student;Undergrad student;Principal Researcher;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nchang2023lcad,\ntitle={L-{CAD}: Language-based Colorization with Any-level Descriptions using Diffusion Priors},\nauthor={Zheng Chang and Shuchen Weng and Peixuan Zhang and Yu Li and Si Li and Boxin Shi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TcmjewOAd1}\n}", "github": "", "project": "", "reviewers": "ZuAx;Gu3m;oFt4;whze;Gyr2", "pdf_size": 1837348, "rating": "6;6;7;7;7", "confidence": "2;4;4;3;4", "soundness": "3;3;2;3;3", "novelty": "3;3;3;3;3", "presentation": "2;3;2;2;3", "wc_summary": "112;105;114;105;64", "wc_strengths": "47;41;35;35;55", "wc_weaknesses": "236;197;47;14;71", "wc_questions": "60;3;530;16;2", "wc_limitations": "22;5;12;11;1", "wc_review": "477;351;738;181;193", "wc_reply_reviewers": "0;9;27;0;0", "wc_reply_authors": "0;0;64;0;0", "reply_reviewers": "0;1;1;0;0", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 100.0, 18.36300628982085 ], "wc_strengths_avg": [ 42.6, 7.631513611335565 ], "wc_weaknesses_avg": [ 113.0, 87.29948453456068 ], "wc_questions_avg": [ 122.2, 204.99014610463598 ], "wc_limitations_avg": [ 10.2, 7.138627319029899 ], "wc_review_avg": [ 388.0, 206.23481762301924 ], "wc_reply_reviewers_avg": [ 7.2, 10.49571341072154 ], "wc_reply_authors_avg": [ 12.8, 25.600000000000005 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18373706420271249617&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "bupt.edu.cn;pku.edu.cn;bupt.edu.cn;idea.edu.cn;bupt.edu.cn;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;2;0;1", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Peking University;International Digital Economy Academy", "aff_unique_dep": ";;", "aff_unique_url": "http://www.bupt.edu.cn/;http://www.pku.edu.cn;", "aff_unique_abbr": "BUPT;Peking U;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "The Rashomon Importance Distribution: Getting RID of Unstable, Single Model-based Variable Importance", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71545", "id": "TczT2jiPT5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1403ab1a427050538ec59c7f570aec8b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TczT2jiPT5", "openreview": "https://openreview.net/forum?id=TczT2jiPT5", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71545", "video": "https://nips.cc/virtual/2023/poster/71545", "author_site": "Jon Donnelly, Srikar Katta, Cynthia Rudin, Edward Browne", "tldr": "", "abstract": "Quantifying variable importance is essential for answering high-stakes questions in fields like genetics, public policy, and medicine. Current methods generally calculate variable importance for a given model trained on a given dataset. However, for a given dataset, there may be many models that explain the target outcome equally well; without accounting for all possible explanations, different researchers may arrive at many conflicting yet equally valid conclusions given the same data. Additionally, even when accounting for all possible explanations for a given dataset, these insights may not generalize because not all good explanations are stable across reasonable data perturbations. We propose a new variable importance framework that quantifies the importance of a variable across the set of all good models and is stable across the data distribution. Our framework is extremely flexible and can be integrated with most existing model classes and global variable importance metrics. We demonstrate through experiments that our framework recovers variable importance rankings for complex simulation setups where other methods fail. Further, we show that our framework accurately estimates the _true importance_ of a variable for the underlying data distribution. We provide theoretical guarantees on the consistency and finite sample error rates for our estimator. Finally, we demonstrate its utility with a real-world case study exploring which genes are important for predicting HIV load in persons with HIV, highlighting an important gene that has not previously been studied in connection with HIV.", "keywords": "Rashomon Effect;Variable Importance;XAI;Stability;Interpretable Machine Learning", "primary_area": "", "supplementary_material": "/attachment/483c7aecb4d82a0a9adf14dd8cdcb236cc9845cd.pdf", "author": "Jon Donnelly;Srikar Katta;Cynthia Rudin;Edward P Browne", "authorids": "~Jon_Donnelly1;srikar.katta@duke.edu;~Cynthia_Rudin1;epbrowne@email.unc.edu", "gender": "M;;;", "homepage": ";;;", "dblp": "307/5438;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;;", "orcid": "0000-0002-3971-1075;;;", "linkedin": ";;;", "or_profile": "~Jon_Donnelly1;srikar.katta@duke.edu;~Cynthia_Rudin1;epbrowne@email.unc.edu", "aff": "Duke University;;;", "aff_domain": "duke.edu;;;", "position": "PhD student;;;", "bibtex": "@inproceedings{\ndonnelly2023the,\ntitle={The Rashomon Importance Distribution: Getting {RID} of Unstable, Single Model-based Variable Importance},\nauthor={Jon Donnelly and Srikar Katta and Cynthia Rudin and Edward P Browne},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TczT2jiPT5}\n}", "github": "", "project": "", "reviewers": "zs1J;tvv9;eXQc;gRCY", "pdf_size": 1512144, "rating": "6;6;7;8", "confidence": "3;3;4;4", "soundness": "3;2;4;4", "novelty": "3;3;3;3", "presentation": "4;3;4;4", "wc_summary": "79;96;92;107", "wc_strengths": "168;103;138;88", "wc_weaknesses": "932;152;51;395", "wc_questions": "210;2;91;39", "wc_limitations": "1;1;9;59", "wc_review": "1390;354;381;688", "wc_reply_reviewers": "371;0;25;81", "wc_reply_authors": "140;0;0;274", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 93.5, 10.012492197250394 ], "wc_strengths_avg": [ 124.25, 31.09963826156182 ], "wc_weaknesses_avg": [ 382.5, 341.0018328396491 ], "wc_questions_avg": [ 85.5, 78.52547357386646 ], "wc_limitations_avg": [ 17.5, 24.181604578687494 ], "wc_review_avg": [ 703.25, 417.635831197468 ], "wc_reply_reviewers_avg": [ 119.25, 148.27740050324593 ], "wc_reply_authors_avg": [ 103.5, 113.82772070106648 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7545899678896307330&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 9, "email": "duke.edu;;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "4M: Massively Multimodal Masked Modeling", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71544", "id": "TegmlsD8oQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b6446566965fa38e183650728ab70318-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TegmlsD8oQ", "openreview": "https://openreview.net/forum?id=TegmlsD8oQ", "poster": "/media/PosterPDFs/NeurIPS%202023/71544.png?t=1702276008.2968297", "slides": "https://nips.cc/virtual/2023/poster/71544", "video": "https://nips.cc/virtual/2023/poster/71544", "author_site": "David Mizrahi, Roman Bachmann, Oguzhan Kar, Teresa Yeo, Teresa Yeo, Mingfei Gao, Afshin Dehghan, Amir Zamir", "tldr": "", "abstract": "Current machine learning models for vision are often highly specialized and limited to a single modality and task. In contrast, recent large language models exhibit a wide range of capabilities, hinting at a possibility for similarly versatile models in computer vision.\nIn this paper, we take a step in this direction and propose a multimodal training scheme called 4M. It consists of training a single unified Transformer encoder-decoder using a masked modeling objective across a wide range of input/output modalities \u2013 including text, images, geometric, and semantic modalities, as well as neural network feature maps. 4M achieves scalability by unifying the representation space of all modalities through mapping them into discrete tokens and performing multimodal masked modeling on a small randomized subset of tokens.\n\n4M leads to models that exhibit several key capabilities: (1) they can perform a diverse set of vision tasks out of the box, (2) they excel when fine-tuned for unseen downstream tasks or new input modalities, and (3) they can function as a generative model that can be conditioned on arbitrary modalities, enabling a wide variety of expressive multimodal editing capabilities with remarkable flexibility.\n\nThrough experimental analyses, we demonstrate the potential of 4M for training versatile and scalable foundation models for vision tasks, setting the stage for further exploration in multimodal learning for vision and other domains.", "keywords": "multimodal learning;multitask learning;representation learning;transfer learning;foundation models;generative models;computer vision", "primary_area": "", "supplementary_material": "", "author": "David Mizrahi;Roman Bachmann;Oguzhan Fatih Kar;Teresa Yeo;Mingfei Gao;Afshin Dehghan;Amir Zamir", "authorids": "~David_Mizrahi1;~Roman_Bachmann1;~Oguzhan_Fatih_Kar1;~Teresa_Yeo1;~Mingfei_Gao1;~Afshin_Dehghan5;~Amir_Zamir1", "gender": ";M;M;F;;;M", "homepage": "https://dmizrahi.com;;https://ofkar.github.io/;https://aserety.github.io/;https://fly6464.github.io;;https://amirzamir.com/", "dblp": "317/6970;248/2626-1;225/3220;230/3870;67/6825;;76/8610", "google_scholar": "IF8OK3IAAAAJ;-KHAy7kAAAAJ;https://scholar.google.com.tr/citations?user=Ojtk2_MAAAAJ;jIboOyIAAAAJ;kMe-G5AAAAAJ;;RKjEFukAAAAJ", "orcid": ";0000-0001-5324-2474;;;;;", "linkedin": ";;oguzhanfatihkar/;;;;", "or_profile": "~David_Mizrahi1;~Roman_Bachmann1;~Oguzhan_Fatih_Kar1;~Teresa_Yeo1;~Mingfei_Gao1;~Afshin_Dehghan5;~Amir_Zamir1", "aff": "Apple;Apple;Google;Swiss Federal Institute of Technology Lausanne;Apple;;Swiss Federal Institute of Technology Lausanne", "aff_domain": "apple.com;apple.com;google.com;epfl.ch;apple.com;;epfl.ch", "position": "Intern;Intern;Student researcher;PhD student;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nmizrahi2023m,\ntitle={4M: Massively Multimodal Masked Modeling},\nauthor={David Mizrahi and Roman Bachmann and Oguzhan Fatih Kar and Teresa Yeo and Mingfei Gao and Afshin Dehghan and Amir Zamir},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TegmlsD8oQ}\n}", "github": "", "project": "", "reviewers": "QEFU;RJ3x;q9W9;f31a;qLD7", "pdf_size": 19487018, "rating": "5;6;7;7;9", "confidence": "5;5;4;5;4", "soundness": "2;3;3;3;4", "novelty": "1;3;4;4;4", "presentation": "2;3;3;4;4", "wc_summary": "57;50;109;61;74", "wc_strengths": "23;33;111;56;64", "wc_weaknesses": "242;99;95;46;121", "wc_questions": "22;32;32;1;106", "wc_limitations": "20;21;9;24;13", "wc_review": "364;235;356;188;378", "wc_reply_reviewers": "68;13;48;14;14", "wc_reply_authors": "59;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.8, 1.32664991614216 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.2, 1.16619037896906 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 70.2, 20.913153755471697 ], "wc_strengths_avg": [ 57.4, 30.6502854799103 ], "wc_weaknesses_avg": [ 120.6, 65.46021692600782 ], "wc_questions_avg": [ 38.6, 35.5505274222479 ], "wc_limitations_avg": [ 17.4, 5.535341001239218 ], "wc_review_avg": [ 304.2, 77.45553563174165 ], "wc_reply_reviewers_avg": [ 31.4, 22.623881187806834 ], "wc_reply_authors_avg": [ 11.8, 23.599999999999994 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7385489458759965, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13888985035699445612&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "apple.com;apple.com;google.com;epfl.ch;apple.com;;epfl.ch", "author_num": 7, "aff_unique_index": "0;0;1;2;0;2", "aff_unique_norm": "Apple;Google;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": "Apple Inc.;Google;", "aff_unique_url": "https://www.apple.com;https://www.google.com;https://www.epfl.ch", "aff_unique_abbr": "Apple;Google;EPFL", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Mountain View;Lausanne", "aff_country_unique_index": "0;0;0;1;0;1", "aff_country_unique": "United States;Switzerland" }, { "title": "Sparse Modular Activation for Efficient Sequence Modeling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71543", "id": "TfbzX6I14i", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3f0739410e1c9c5da04fa10c1f3f86b6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TfbzX6I14i", "openreview": "https://openreview.net/forum?id=TfbzX6I14i", "poster": "/media/PosterPDFs/NeurIPS%202023/71543.png?t=1701558858.1544552", "slides": "https://nips.cc/virtual/2023/poster/71543", "video": "https://nips.cc/virtual/2023/poster/71543", "author_site": "Liliang Ren, Yang Liu, Shuohang Wang, Yichong Xu, Chenguang Zhu, Cheng Xiang Zhai", "tldr": "", "abstract": "Recent hybrid models combining Linear State Space Models (SSMs) with self-attention mechanisms have demonstrated impressive results across a range of sequence modeling tasks. However, current approaches apply attention modules statically and uniformly to all elements in the input sequences, leading to sub-optimal quality-efficiency trade-offs. To address this limitation, we introduce Sparse Modular Activation (SMA), a general mechanism enabling neural networks to sparsely and dynamically activate sub-modules for sequence elements in a differentiable manner. Through allowing each element to skip non-activated sub-modules, SMA reduces computation and memory consumption of neural networks at both training and inference stages. To validate the effectiveness of SMA on sequence modeling, we design a novel neural architecture, SeqBoat, which employs SMA to sparsely activate a Gated Attention Unit (GAU) based on the state representations learned from an SSM. By constraining the GAU to only conduct local attention on the activated inputs, SeqBoat can achieve linear inference complexity with theoretically infinite attention span, and provide substantially better quality-efficiency trade-off than the chunking-based models. With experiments on a wide range of tasks, including long sequence modeling, speech classification and language modeling, SeqBoat brings new state-of-the-art results among hybrid models with linear complexity, and reveals the amount of attention needed for each task through the learned sparse activation patterns. Our code is publicly available at https://github.com/renll/SeqBoat.", "keywords": "Sequence Modeling;Modularity;Sparsity;Attention Mechanism;State Space Model;Mixture of Experts;Neural Network;Transformer", "primary_area": "", "supplementary_material": "", "author": "Liliang Ren;Yang Liu;Shuohang Wang;Yichong Xu;Chenguang Zhu;ChengXiang Zhai", "authorids": "~Liliang_Ren1;~Yang_Liu50;~Shuohang_Wang1;~Yichong_Xu1;~Chenguang_Zhu1;~ChengXiang_Zhai1", "gender": ";M;M;M;M;M", "homepage": "https://renll.github.io/;https://nlp-yang.github.io/;;http://xycking.wixsite.com/yichongxu;;http://czhai.cs.illinois.edu/", "dblp": "68/7844;;173/5469.html;154/6421;48/7536-1.html;z/ChengXiangZhai", "google_scholar": "9MBMglQAAAAJ;HxTr-CtMdrsC;mN-IO6wAAAAJ;sYza2XwAAAAJ;1b2kKWoAAAAJ;YU-baPIAAAAJ", "orcid": ";;;;;0000-0002-6434-3702", "linkedin": "https://linkedin.com/in/liliang-ren-ba0529181/;;;;;", "or_profile": "~Liliang_Ren1;~Yang_Liu50;~Shuohang_Wang1;~Yichong_Xu1;~Chenguang_Zhu1;~ChengXiang_Zhai1", "aff": "University of Illinois, Urbana Champaign;Microsoft;Microsoft;Microsoft;Zoom;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;microsoft.com;microsoft.com;microsoft.com;zoom.us;illinois.edu", "position": "PhD student;Researcher;Researcher;Senior Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nren2023sparse,\ntitle={Sparse Modular Activation for Efficient Sequence Modeling},\nauthor={Liliang Ren and Yang Liu and Shuohang Wang and Yichong Xu and Chenguang Zhu and ChengXiang Zhai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TfbzX6I14i}\n}", "github": "", "project": "", "reviewers": "kQJ9;KtV6;oaMm;8NeG", "pdf_size": 709319, "rating": "5;5;6;6", "confidence": "3;4;4;4", "soundness": "2;4;3;4", "novelty": "2;3;3;4", "presentation": "2;3;2;3", "wc_summary": "125;98;121;144", "wc_strengths": "80;93;87;52", "wc_weaknesses": "118;87;643;139", "wc_questions": "84;274;429;364", "wc_limitations": "1;11;14;1", "wc_review": "408;563;1294;700", "wc_reply_reviewers": "11;0;1007;0", "wc_reply_authors": "0;0;726;0", "reply_reviewers": "1;0;3;0", "reply_authors": "1;1;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 122.0, 16.355427233796124 ], "wc_strengths_avg": [ 78.0, 15.700318468107582 ], "wc_weaknesses_avg": [ 246.75, 229.52164930568097 ], "wc_questions_avg": [ 287.75, 129.87373675997776 ], "wc_limitations_avg": [ 6.75, 5.84700778176325 ], "wc_review_avg": [ 741.25, 335.4335813540439 ], "wc_reply_reviewers_avg": [ 254.5, 434.4792860424994 ], "wc_reply_authors_avg": [ 181.5, 314.36722157375124 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2125007758875900919&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "illinois.edu;microsoft.com;microsoft.com;microsoft.com;zoom.us;illinois.edu", "author_num": 6, "aff_unique_index": "0;1;1;1;2;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;Microsoft;Zoom Video Communications Inc.", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://illinois.edu;https://www.microsoft.com;https://zoom.us", "aff_unique_abbr": "UIUC;Microsoft;Zoom", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Datasets and Benchmarks for Nanophotonic Structure and Parametric Design Simulations", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73583", "id": "Th33sYMCQd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0f12c9975ff4f2e44a5a26ef01b0b249-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Th33sYMCQd", "openreview": "https://openreview.net/forum?id=Th33sYMCQd", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73583", "video": "https://nips.cc/virtual/2023/poster/73583", "author_site": "Jungtaek Kim, Mingxuan Li, Oliver Hinder, Paul Leu", "tldr": "", "abstract": "Nanophotonic structures have versatile applications including solar cells, anti-reflective coatings, electromagnetic interference shielding, optical filters, and light emitting diodes. To design and understand these nanophotonic structures, electrodynamic simulations are essential. These simulations enable us to model electromagnetic fields over time and calculate optical properties. In this work, we introduce frameworks and benchmarks to evaluate nanophotonic structures in the context of parametric structure design problems. The benchmarks are instrumental in assessing the performance of optimization algorithms and identifying an optimal structure based on target optical properties. Moreover, we explore the impact of varying grid sizes in electrodynamic simulations, shedding light on how evaluation fidelity can be strategically leveraged in enhancing structure designs.", "keywords": "Nanophotonic structures;Nanophotonic structure simulations;Parametric structure designs;Parametric design simulations;Nanophotonic structure optimization", "primary_area": "", "supplementary_material": "", "author": "Jungtaek Kim;Mingxuan Li;Oliver Hinder;Paul Leu", "authorids": "~Jungtaek_Kim1;~Mingxuan_Li3;~Oliver_Hinder1;~Paul_Leu1", "gender": "M;M;M;M", "homepage": "https://jungtaekkim.github.io;;http://www.oliverhinder.com;http://lamp.pitt.edu", "dblp": "31/3193-1;;;", "google_scholar": "KXNUYWgAAAAJ;EM5mOIUAAAAJ;FiBMfBsAAAAJ;", "orcid": "0000-0002-1905-1399;;;", "linkedin": "jungtaekkim;;;", "or_profile": "~Jungtaek_Kim1;~Mingxuan_Li3;~Oliver_Hinder1;~Paul_Leu1", "aff": "University of Pittsburgh;University of Pittsburgh;University of Pittsburgh;University of Pittsburgh", "aff_domain": "pitt.edu;pitt.edu;pitt.edu;pitt.edu", "position": "Postdoc;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nkim2023datasets,\ntitle={Datasets and Benchmarks for Nanophotonic Structure and Parametric Design Simulations},\nauthor={Jungtaek Kim and Mingxuan Li and Oliver Hinder and Paul Leu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Th33sYMCQd}\n}", "github": "", "project": "", "reviewers": "eNci;s1HW;ZH3g;wCnZ;MbyG", "pdf_size": 22066314, "rating": "4;4;7;7;7", "confidence": "3;3;1;1;4", "wc_summary_and_contributions": "31;352;49;58;131", "wc_strengths": "33;48;25;25;85", "wc_improvement": "69;11;9;55;271", "wc_limitations": "9;10;4;6;14", "wc_correctness": "4;186;11;68;18", "wc_clarity": "1;211;5;2;25", "wc_relation_to_prior_work": "8;94;7;74;82", "wc_documentation": "1;74;4;5;39", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "157;987;115;294;666", "wc_reply_reviewers": "0;0;0;21;601", "wc_reply_authors": "392;1063;66;683;1181", "reply_reviewers": "0;0;0;1;2", "reply_authors": "2;3;2;3;5", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 2.4, 1.2 ], "wc_summary_and_contributions_avg": [ 124.2, 118.88044414452699 ], "wc_strengths_avg": [ 43.2, 22.52465316048174 ], "wc_improvement_avg": [ 83.0, 96.93709300365882 ], "wc_limitations_avg": [ 8.6, 3.4409301068170506 ], "wc_correctness_avg": [ 57.4, 68.1281146077007 ], "wc_clarity_avg": [ 48.8, 81.57058293281959 ], "wc_relation_to_prior_work_avg": [ 53.0, 37.69350076604719 ], "wc_documentation_avg": [ 24.6, 28.330901856453494 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 443.8, 333.83912293198955 ], "wc_reply_reviewers_avg": [ 124.4, 238.4387552391599 ], "wc_reply_authors_avg": [ 677.0, 414.1482826235067 ], "reply_reviewers_avg": [ 0.6, 0.8 ], "reply_authors_avg": [ 3.0, 1.0954451150103321 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12151332944871153836&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "pitt.edu;pitt.edu;pitt.edu;pitt.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Pittsburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.pitt.edu", "aff_unique_abbr": "Pitt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Bayesian target optimisation for high-precision holographic optogenetics", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71542", "id": "TiFMYdQiqp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/240225294cdd2c9b692c2519d3278a08-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TiFMYdQiqp", "openreview": "https://openreview.net/forum?id=TiFMYdQiqp", "poster": "/media/PosterPDFs/NeurIPS%202023/71542.png?t=1702056215.8911293", "slides": "https://nips.cc/virtual/2023/poster/71542", "video": "https://nips.cc/virtual/2023/poster/71542", "author_site": "Marcus Triplett, Marta Gajowa, Hillel Adesnik, Liam Paninski", "tldr": "", "abstract": "Two-photon optogenetics has transformed our ability to probe the structure and function of neural circuits. However, achieving precise optogenetic control of neural ensemble activity has remained fundamentally constrained by the problem of off-target stimulation (OTS): the inadvertent activation of nearby non-target neurons due to imperfect confinement of light onto target neurons. Here we propose a novel computational approach to this problem called Bayesian target optimisation. Our approach uses nonparametric Bayesian inference to model neural responses to optogenetic stimulation, and then optimises the laser powers and optical target locations needed to achieve a desired activity pattern with minimal OTS. We validate our approach in simulations and using data from in vitro experiments, showing that Bayesian target optimisation considerably reduces OTS across all conditions we test. Together, these results establish our ability to overcome OTS, enabling optogenetic stimulation with substantially improved precision.", "keywords": "Neuroscience;neural stimulation;optogenetics;calcium imaging", "primary_area": "", "supplementary_material": "/attachment/73bec3c1190c5d2135670106e9b276815cba1305.pdf", "author": "Marcus Triplett;Marta Agnieszka Gajowa;Hillel Adesnik;Liam Paninski", "authorids": "~Marcus_Triplett1;~Marta_Agnieszka_Gajowa1;hadesnik@berkeley.edu;~Liam_Paninski1", "gender": ";F;;", "homepage": ";;;", "dblp": ";;;94/2691", "google_scholar": ";;;", "orcid": ";0000-0002-6399-6883;;", "linkedin": ";martagajowa/;;", "or_profile": "~Marcus_Triplett1;~Marta_Agnieszka_Gajowa1;hadesnik@berkeley.edu;~Liam_Paninski1", "aff": ";University of California, Berkeley;;Columbia University", "aff_domain": ";berkeley.edu;;columbia.edu", "position": ";Postdoc;;Full Professor", "bibtex": "@inproceedings{\ntriplett2023bayesian,\ntitle={Bayesian target optimisation for high-precision holographic optogenetics},\nauthor={Marcus Triplett and Marta Agnieszka Gajowa and Hillel Adesnik and Liam Paninski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TiFMYdQiqp}\n}", "github": "", "project": "", "reviewers": "FU6t;G2K6;4tSm;5v9K", "pdf_size": 5896880, "rating": "6;7;7;8", "confidence": "4;5;4;4", "soundness": "3;4;3;3", "novelty": "2;3;3;4", "presentation": "3;4;4;4", "wc_summary": "29;128;83;159", "wc_strengths": "26;18;262;182", "wc_weaknesses": "31;96;221;142", "wc_questions": "68;157;287;64", "wc_limitations": "5;33;44;7", "wc_review": "159;432;897;554", "wc_reply_reviewers": "0;17;37;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.75, 48.976397376695644 ], "wc_strengths_avg": [ 122.0, 103.96153134693621 ], "wc_weaknesses_avg": [ 122.5, 69.2044073740972 ], "wc_questions_avg": [ 144.0, 90.54556863811723 ], "wc_limitations_avg": [ 22.25, 16.723860200324566 ], "wc_review_avg": [ 510.5, 265.04197780729 ], "wc_reply_reviewers_avg": [ 18.5, 13.124404748406688 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2368469698226993789&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": ";berkeley.edu;;columbia.edu", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of California, Berkeley;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.columbia.edu", "aff_unique_abbr": "UC Berkeley;Columbia", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Loss Dynamics of Temporal Difference Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71541", "id": "Tj0eXVPnRX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2ea04b568a2deb2d000c59f3a72829b5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Tj0eXVPnRX", "openreview": "https://openreview.net/forum?id=Tj0eXVPnRX", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71541", "video": "https://nips.cc/virtual/2023/poster/71541", "author_site": "Blake Bordelon, Paul Masset, Henry Kuo, Cengiz Pehlevan", "tldr": "", "abstract": "Reinforcement learning has been successful across several applications in which agents have to learn to act in environments with sparse feedback. However, despite this empirical success there is still a lack of theoretical understanding of how the parameters of reinforcement learning models and the features used to represent states interact to control the dynamics of learning. In this work, we use concepts from statistical physics, to study the typical case learning curves for temporal difference learning of a value function with linear function approximators. Our theory is derived under a Gaussian equivalence hypothesis where averages over the random trajectories are replaced with temporally correlated Gaussian feature averages and we validate our assumptions on small scale Markov Decision Processes. We find that the stochastic semi-gradient noise due to subsampling the space of possible episodes leads to significant plateaus in the value error, unlike in traditional gradient descent dynamics. We study how learning dynamics and plateaus depend on feature structure, learning rate, discount factor, and reward function. We then analyze how strategies like learning rate annealing and reward shaping can favorably alter learning dynamics and plateaus. To conclude, our work introduces new tools to open a new direction towards developing a theory of learning dynamics in reinforcement learning.", "keywords": "Reinforcement Learning;Statistical Mechanics;Stochastic Gradient Descent", "primary_area": "", "supplementary_material": "/attachment/934e7da23546f17ac1f4da204bbf1625edb06a6e.zip", "author": "Blake Bordelon;Paul Masset;Henry Kuo;Cengiz Pehlevan", "authorids": "~Blake_Bordelon1;~Paul_Masset1;~Henry_Kuo1;~Cengiz_Pehlevan2", "gender": "M;M;;", "homepage": "https://blakebordelon.github.io/;https://scholar.harvard.edu/paul-masset;;https://pehlevan.seas.harvard.edu/", "dblp": "228/6993;158/2619;;145/3480", "google_scholar": "yeQ8_pgAAAAJ;Mi1NbLkAAAAJ;;veDLTPEAAAAJ", "orcid": "0000-0003-0455-9445;0000-0003-2001-7515;;0000-0001-9767-6063", "linkedin": ";;;", "or_profile": "~Blake_Bordelon1;~Paul_Masset1;~Henry_Kuo1;~Cengiz_Pehlevan2", "aff": "Harvard University;Harvard University;;School of Engineering and Applied Sciences, Harvard University", "aff_domain": "harvard.edu;harvard.edu;;seas.harvard.edu", "position": "PhD student;Postdoc;;Assistant Professor", "bibtex": "@inproceedings{\nbordelon2023loss,\ntitle={Loss Dynamics of Temporal Difference Reinforcement Learning},\nauthor={Blake Bordelon and Paul Masset and Henry Kuo and Cengiz Pehlevan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Tj0eXVPnRX}\n}", "github": "", "project": "", "reviewers": "u1tm;KZaZ;5FZT;mCei;kcB9", "pdf_size": 1030156, "rating": "3;5;5;7;7", "confidence": "3;3;1;2;4", "soundness": "2;3;3;4;3", "novelty": "1;3;3;4;3", "presentation": "2;2;3;4;4", "wc_summary": "52;53;79;140;240", "wc_strengths": "21;66;101;58;55", "wc_weaknesses": "214;96;36;82;193", "wc_questions": "30;329;43;99;134", "wc_limitations": "11;1;6;6;11", "wc_review": "328;545;265;385;633", "wc_reply_reviewers": "202;0;83;37;145", "wc_reply_authors": "362;37;212;0;146", "reply_reviewers": "1;0;1;1;1", "reply_authors": "3;2;3;1;2", "rating_avg": [ 5.4, 1.4966629547095764 ], "confidence_avg": [ 2.6, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.9797958971132712 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 112.8, 71.18258213917223 ], "wc_strengths_avg": [ 60.2, 25.560907652115954 ], "wc_weaknesses_avg": [ 124.2, 68.04821819856858 ], "wc_questions_avg": [ 127.0, 107.79795916435523 ], "wc_limitations_avg": [ 7.0, 3.7416573867739413 ], "wc_review_avg": [ 431.2, 137.1720088064617 ], "wc_reply_reviewers_avg": [ 93.4, 72.75877953896698 ], "wc_reply_authors_avg": [ 151.4, 129.66356465869663 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.1048284836721918, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8715847915890294052&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 9, "email": "harvard.edu;harvard.edu;;seas.harvard.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Exact recovery and Bregman hard clustering of node-attributed Stochastic Block Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71540", "id": "TjJJmcHw9p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/770b3ecb70147a2d2f18d2964fafcdd5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TjJJmcHw9p", "openreview": "https://openreview.net/forum?id=TjJJmcHw9p", "poster": "/media/PosterPDFs/NeurIPS%202023/71540.png?t=1701687367.004646", "slides": "https://nips.cc/virtual/2023/poster/71540", "video": "https://nips.cc/virtual/2023/poster/71540", "author_site": "Maximilien Dreveton, Felipe Fernandes, Daniel Figueiredo", "tldr": "", "abstract": "Classic network clustering tackles the problem of identifying sets of nodes (communities) that have similar connection patterns. However, in many scenarios nodes also have attributes that are correlated and can also be used to identify node clusters. Thus, network information (edges) and node information (attributes) can be jointly leveraged to design high-performance clustering algorithms. Under a general model for the network and node attributes, this work establishes an information-theoretic criteria for the exact recovery of community labels and characterizes a phase transition determined by the Chernoff-Hellinger divergence of the model. The criteria shows how network and attribute information can be exchanged in order to have exact recovery (e.g., more reliable network information requires less reliable attribute information). This work also presents an iterative clustering algorithm that maximizes the joint likelihood, assuming that the probability distribution of network interactions and node attributes belong to exponential families. This covers a broad range of possible interactions (e.g., edges with weights) and attributes (e.g., non-Gaussian models) while also exploring the connection between exponential families and Bregman divergences. Extensive numerical experiments using synthetic and real data indicate that the proposed algorithm outperforms algorithms that leverage only network or only attribute information as well as recently proposed algorithms that perform clustering using both sources of information. The contributions of this work provide insights into the fundamental limits and practical techniques for inferring community labels on node-attributed networks.", "keywords": "community detection;stochastic block model;bregman divergence", "primary_area": "", "supplementary_material": "", "author": "Maximilien Dreveton;Felipe Schreiber Fernandes;Daniel R. Figueiredo", "authorids": "~Maximilien_Dreveton1;felipesc@cos.ufrj.br;~Daniel_R._Figueiredo1", "gender": ";;M", "homepage": "https://maximiliendreveton.fr/;;https://www.cos.ufrj.br/~daniel/", "dblp": "244/6486;;46/4473", "google_scholar": "y7tyN8QAAAAJ;;j4YbANwAAAAJ", "orcid": "0000-0001-6613-0615;;0000-0001-9341-6619", "linkedin": "maximilien-dreveton-42889a107/;;", "or_profile": "~Maximilien_Dreveton1;felipesc@cos.ufrj.br;~Daniel_R._Figueiredo1", "aff": "EPFL - EPF Lausanne;;Universidade Federal do Rio de Janeiro", "aff_domain": "epfl.ch;;ufrj.br", "position": "Postdoc;;Associate Professor", "bibtex": "@inproceedings{\ndreveton2023exact,\ntitle={Exact recovery and Bregman hard clustering of node-attributed Stochastic Block Model},\nauthor={Maximilien Dreveton and Felipe Schreiber Fernandes and Daniel R. Figueiredo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TjJJmcHw9p}\n}", "github": "", "project": "", "reviewers": "1Ec2;cu9t;hHhv;coB1", "pdf_size": 638287, "rating": "6;6;6;7", "confidence": "4;4;3;3", "soundness": "3;4;4;3", "novelty": "2;3;3;3", "presentation": "2;4;3;4", "wc_summary": "171;103;85;71", "wc_strengths": "39;78;100;151", "wc_weaknesses": "265;119;99;142", "wc_questions": "70;108;1;40", "wc_limitations": "15;1;1;18", "wc_review": "560;409;286;422", "wc_reply_reviewers": "136;0;93;0", "wc_reply_authors": "315;0;315;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 107.5, 38.37642505497353 ], "wc_strengths_avg": [ 92.0, 40.46603514059662 ], "wc_weaknesses_avg": [ 156.25, 64.6040826883255 ], "wc_questions_avg": [ 54.75, 39.28978874975024 ], "wc_limitations_avg": [ 8.75, 7.8222439235810075 ], "wc_review_avg": [ 419.25, 97.05507457109083 ], "wc_reply_reviewers_avg": [ 57.25, 59.23417510187848 ], "wc_reply_authors_avg": [ 157.5, 157.5 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13853116584316110261&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "epfl.ch;;ufrj.br", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "EPFL;Universidade Federal do Rio de Janeiro", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.ufrj.br", "aff_unique_abbr": "EPFL;UFRJ", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Lausanne;Rio de Janeiro", "aff_country_unique_index": "0;1", "aff_country_unique": "Switzerland;Brazil" }, { "title": "Kernel Stein Discrepancy thinning: a theoretical perspective of pathologies and a practical fix with regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71539", "id": "TjgG4UT62W", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9a8eb202c060b7d81f5889631cbcd47e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TjgG4UT62W", "openreview": "https://openreview.net/forum?id=TjgG4UT62W", "poster": "/media/PosterPDFs/NeurIPS%202023/71539.png?t=1699522454.941108", "slides": "https://nips.cc/virtual/2023/poster/71539", "video": "https://nips.cc/virtual/2023/poster/71539", "author_site": "Clement Benard, Brian Staber, S\u00e9bastien Da Veiga", "tldr": "", "abstract": "Stein thinning is a promising algorithm proposed by (Riabiz et al., 2022) for post-processing outputs of Markov chain Monte Carlo (MCMC). The main principle is to greedily minimize the kernelized Stein discrepancy (KSD), which only requires the gradient of the log-target distribution, and is thus well-suited for Bayesian inference. The main advantages of Stein thinning are the automatic remove of the burn-in period, the correction of the bias introduced by recent MCMC algorithms, and the asymptotic properties of convergence towards the target distribution. Nevertheless, Stein thinning suffers from several empirical pathologies, which may result in poor approximations, as observed in the literature. In this article, we conduct a theoretical analysis of these pathologies, to clearly identify the mechanisms at stake, and suggest improved strategies. Then, we introduce the regularized Stein thinning algorithm to alleviate the identified pathologies. Finally, theoretical guarantees and extensive experiments show the high efficiency of the proposed algorithm. An implementation of regularized Stein thinning as the kernax library in python and JAX is available at https://gitlab.com/drti/kernax.", "keywords": "Bayesian inference;Markov chain Monte Carlo;kernelized Stein discrepancy;Stein thinning;kernel methods", "primary_area": "", "supplementary_material": "", "author": "Clement Benard;Brian Staber;S\u00e9bastien Da Veiga", "authorids": "~Clement_Benard1;~Brian_Staber1;~S\u00e9bastien_Da_Veiga1", "gender": "M;M;", "homepage": "https://clementbenard.github.io/;https://bstaber.github.io/;", "dblp": "247/6370;181/2793;", "google_scholar": "9gJXPUcAAAAJ;https://scholar.google.fr/citations?user=61j2VawAAAAJ;", "orcid": ";;", "linkedin": "clement-benard-308a267a/;brian-staber/;", "or_profile": "~Clement_Benard1;~Brian_Staber1;~S\u00e9bastien_Da_Veiga1", "aff": "Safran Tech;Safran;", "aff_domain": "safrangroup.com;safrangroup.com;", "position": "Researcher;Researcher;", "bibtex": "@inproceedings{\nbenard2023kernel,\ntitle={Kernel Stein Discrepancy thinning: a theoretical perspective of pathologies and a practical fix with regularization},\nauthor={Clement Benard and Brian Staber and S{\\'e}bastien Da Veiga},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TjgG4UT62W}\n}", "github": "", "project": "", "reviewers": "GM6g;XEkq;zNe7;4eLX", "pdf_size": 1993764, "rating": "7;7;7;7", "confidence": "2;3;3;3", "soundness": "4;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "81;66;77;130", "wc_strengths": "179;61;51;104", "wc_weaknesses": "181;224;50;86", "wc_questions": "175;104;51;20", "wc_limitations": "16;33;4;9", "wc_review": "632;488;233;349", "wc_reply_reviewers": "19;108;12;75", "wc_reply_authors": "0;85;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 88.5, 24.58149710656371 ], "wc_strengths_avg": [ 98.75, 50.43002577829998 ], "wc_weaknesses_avg": [ 135.25, 70.11196402897298 ], "wc_questions_avg": [ 87.5, 58.77286788986905 ], "wc_limitations_avg": [ 15.5, 10.965856099730654 ], "wc_review_avg": [ 425.5, 149.54681541243198 ], "wc_reply_reviewers_avg": [ 53.5, 39.82775414205526 ], "wc_reply_authors_avg": [ 21.25, 36.80607966083864 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8924635925504083163&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 17, "email": "safrangroup.com;safrangroup.com;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Safran Tech;Safran", "aff_unique_dep": ";", "aff_unique_url": "https://www.safrantech.com;https://www.safran-group.com", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Predict-then-Calibrate: A New Perspective of Robust Contextual LP", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71538", "id": "TnTDiCppx5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/397271e11322fae8ba7f827c50ca8d9b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TnTDiCppx5", "openreview": "https://openreview.net/forum?id=TnTDiCppx5", "poster": "/media/PosterPDFs/NeurIPS%202023/71538.png?t=1701839195.1990921", "slides": "https://nips.cc/virtual/2023/poster/71538", "video": "https://nips.cc/virtual/2023/poster/71538", "author_site": "Chunlin Sun, Linyu Liu, Xiaocheng Li", "tldr": "", "abstract": "Contextual optimization, also known as predict-then-optimize or prescriptive analytics, considers an optimization problem with the presence of covariates (context or side information). The goal is to learn a prediction model (from the training data) that predicts the objective function from the covariates, and then in the test phase, solve the optimization problem with the covariates but without the observation of the objective function. In this paper, we consider a risk-sensitive version of the problem and propose a generic algorithm design paradigm called predict-then-calibrate. The idea is to first develop a prediction model without concern for the downstream risk profile or robustness guarantee, and then utilize calibration (or recalibration) methods to quantify the uncertainty of the prediction. While the existing methods suffer from either a restricted choice of the prediction model or strong assumptions on the underlying data, we show the disentangling of the prediction model and the calibration/uncertainty quantification has several advantages. First, it imposes no restriction on the prediction model and thus fully unleashes the potential of off-the-shelf machine learning methods. Second, the derivation of the risk and robustness guarantee can be made independent of the choice of the prediction model through a data-splitting idea. Third, our paradigm of predict-then-calibrate applies to both (risk-sensitive) robust and (risk-neutral) distributionally robust optimization (DRO) formulations. Theoretically, it gives new generalization bounds for the contextual LP problem and sheds light on the existing results of DRO for contextual LP. Numerical experiments further reinforce the advantage of the predict-then-calibrate paradigm in that an improvement on either the prediction model or the calibration model will lead to a better final performance.", "keywords": "Uncertainty Quantification;Contextual LP;Robust Optimization;Distributionally Robust Optimization", "primary_area": "", "supplementary_material": "/attachment/4985df5ca7ef41e471f21ea2927726b948b65212.zip", "author": "Chunlin Sun;Linyu Liu;Xiaocheng Li", "authorids": "~Chunlin_Sun1;~Linyu_Liu1;~Xiaocheng_Li1", "gender": "M;F;M", "homepage": "https://chunlinsun.github.io/;;http://xiaocheng-li.github.io/", "dblp": "260/0567;248/1022;171/2155", "google_scholar": "2MMNRmoAAAAJ;;", "orcid": ";0000-0003-2729-569X;", "linkedin": "chunlin-sun-ab8334139/;;", "or_profile": "~Chunlin_Sun1;~Linyu_Liu1;~Xiaocheng_Li1", "aff": "Stanford University;Tsinghua University;Imperial College London", "aff_domain": "stanford.edu;mail.tsinghua.edu.cn;imperial.ac.uk", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsun2023predictthencalibrate,\ntitle={Predict-then-Calibrate: A New Perspective of Robust Contextual {LP}},\nauthor={Chunlin Sun and Linyu Liu and Xiaocheng Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TnTDiCppx5}\n}", "github": "", "project": "", "reviewers": "tVPZ;VdM1;1GB8;kcWq;4BGT", "pdf_size": 0, "rating": "4;6;6;6;7", "confidence": "3;3;3;3;3", "soundness": "3;3;3;2;4", "novelty": "2;3;2;3;4", "presentation": "2;3;3;3;4", "wc_summary": "142;78;257;78;65", "wc_strengths": "34;77;133;96;83", "wc_weaknesses": "577;39;180;34;68", "wc_questions": "116;12;617;135;148", "wc_limitations": "90;9;26;9;8", "wc_review": "959;215;1213;352;372", "wc_reply_reviewers": "734;23;5;0;0", "wc_reply_authors": "1171;0;0;0;0", "reply_reviewers": "3;1;1;0;0", "reply_authors": "4;1;1;1;1", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 124.0, 71.73004949113029 ], "wc_strengths_avg": [ 84.6, 31.916140117501676 ], "wc_weaknesses_avg": [ 179.6, 205.5963034687151 ], "wc_questions_avg": [ 205.6, 211.2160978713507 ], "wc_limitations_avg": [ 28.4, 31.52522799283139 ], "wc_review_avg": [ 622.2, 390.8705156442476 ], "wc_reply_reviewers_avg": [ 152.4, 290.9230826180694 ], "wc_reply_authors_avg": [ 234.2, 468.4 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1179604459545176833&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "stanford.edu;mail.tsinghua.edu.cn;imperial.ac.uk", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Stanford University;Tsinghua University;Imperial College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.tsinghua.edu.cn;https://www.imperial.ac.uk", "aff_unique_abbr": "Stanford;THU;ICL", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United States;China;United Kingdom" }, { "title": "SatLM: Satisfiability-Aided Language Models Using Declarative Prompting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71537", "id": "TqW5PL1Poi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8e9c7d4a48bdac81a58f983a64aaf42b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TqW5PL1Poi", "openreview": "https://openreview.net/forum?id=TqW5PL1Poi", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71537", "video": "https://nips.cc/virtual/2023/poster/71537", "author_site": "Xi Ye, Qiaochu Chen, Isil Dillig, Greg Durrett", "tldr": "", "abstract": "Prior work has combined chain-of-thought prompting in large language models (LLMs) with programmatic representations to perform effective and transparent reasoning. While such an approach works well for tasks that only require forward reasoning (e.g., straightforward arithmetic), it is less effective for constraint solving problems that require more sophisticated planning and search. In this paper, we propose a new satisfiability-aided language modeling (SatLM) approach for improving the reasoning capabilities of LLMs. We use an LLM to generate a declarative task specification rather than an imperative program and leverage an off-the-shelf automated theorem prover to derive the final answer. This approach has two key advantages. The declarative specification is closer to the problem description than the reasoning steps are, so the LLM can parse it out of the description more accurately. Furthermore, by offloading the actual reasoning task to an automated theorem prover, our approach can guarantee the correctness of the answer with respect to the parsed specification and avoid planning errors in the solving process. We evaluate SATLM on 8 different datasets and show that it consistently outperforms program-aided LMs in the imperative paradigm. In particular, SATLM outperforms program-aided LMs by 23% on a challenging subset of the GSM arithmetic reasoning dataset; SATLM also achieves a new SoTA on LSAT and BoardgameQA, surpassing previous models that are trained on the respective training sets.", "keywords": "Reasoning;Chain-of-thought;Logical Reasoning;Arithmetic Reasoning;Prompting;In-Context Learning;Large Language Model", "primary_area": "", "supplementary_material": "/attachment/5125c9e7de1f64ce5dd02c6561d952e74b49d48f.zip", "author": "Xi Ye;Qiaochu Chen;Isil Dillig;Greg Durrett", "authorids": "~Xi_Ye2;~Qiaochu_Chen1;~Isil_Dillig1;~Greg_Durrett1", "gender": ";F;F;M", "homepage": "https://xiye17.github.io/;https://www.cs.utexas.edu/~qchen/;https://www.cs.utexas.edu/~isil/;http://www.cs.utexas.edu/~gdurrett/", "dblp": ";247/1177.html;;69/7968", "google_scholar": "qH83GlAAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.tw/citations?user=EpQ_sDEAAAAJ", "orcid": ";0000-0003-4680-5157;;", "linkedin": ";;;", "or_profile": "~Xi_Ye2;~Qiaochu_Chen1;~Isil_Dillig1;~Greg_Durrett1", "aff": "UT Austin;The University of Texas at Austin;University of Texas, Austin;University of Texas, Austin", "aff_domain": "cs.utexas.edu;utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nye2023satlm,\ntitle={Sat{LM}: Satisfiability-Aided Language Models Using Declarative Prompting},\nauthor={Xi Ye and Qiaochu Chen and Isil Dillig and Greg Durrett},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TqW5PL1Poi}\n}", "github": "", "project": "", "reviewers": "C7Hb;p8AX;Gm6N;Zc4x;nffC", "pdf_size": 583103, "rating": "5;5;5;6;7", "confidence": "3;4;3;4;4", "soundness": "3;3;2;3;4", "novelty": "3;2;2;2;3", "presentation": "4;3;3;3;3", "wc_summary": "61;55;118;55;55", "wc_strengths": "82;8;53;42;37", "wc_weaknesses": "134;239;143;50;202", "wc_questions": "8;70;127;90;90", "wc_limitations": "14;49;17;7;171", "wc_review": "299;421;458;244;555", "wc_reply_reviewers": "56;18;39;175;101", "wc_reply_authors": "123;0;19;147;26", "reply_reviewers": "1;1;1;2;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 68.8, 24.709512338368803 ], "wc_strengths_avg": [ 44.4, 23.971649922356203 ], "wc_weaknesses_avg": [ 153.6, 64.59907120075334 ], "wc_questions_avg": [ 77.0, 39.11010099705701 ], "wc_limitations_avg": [ 51.6, 61.421820227017044 ], "wc_review_avg": [ 395.4, 111.5896052506684 ], "wc_reply_reviewers_avg": [ 77.8, 55.75446170487166 ], "wc_reply_authors_avg": [ 63.0, 59.88321968631947 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6123724356957947, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7048577272687511189&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cs.utexas.edu;utexas.edu;utexas.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "TrcpLUcYfL", "title": "Block-local learning with probabilistic latent representations", "track": "main", "status": "Reject", "tldr": "", "abstract": "The ubiquitous backpropagation algorithm requires sequential updates across blocks of a network, introducing a locking problem.\nMoreover, backpropagation relies on the transpose of weight matrices to calculate updates, introducing a weight transport problem across blocks. Both these issues prevent efficient parallelisation and horizontal scaling of models across devices. We propose a new method that introduces a twin network that propagates information backwards from the targets to the input to provide auxiliary local losses. Forward and backward propagation can work in parallel and with different sets of weights, addressing the problems of weight transport and locking. Our approach derives from a statistical interpretation of end-to-end training which treats activations of network layers as parameters of probability distributions. The resulting learning framework uses these parameters locally to assess the matching between forward and backward information. Error backpropagation is then performed locally within each block, leading to `block-local' learning. Several previously proposed alternatives to error backpropagation emerge as special cases of our model. We present results on various tasks and architectures, including transformers, demonstrating state-of-the-art performance using block-local learning. These results provide a new principled framework to train very large networks in a distributed setting and can also be applied in neuromorphic systems.", "keywords": "alternative to backprop;locking problem;probabilistic models;weight transport problem", "primary_area": "", "supplementary_material": "/attachment/f851da4be6a7666bb8f886229c67d6c0ed392bbd.pdf", "author": "David Kappel;Khaleelulla Khan Nazeer;Cabrel Teguemne Fokam;Christian Mayr;Anand Subramoney", "authorids": "~David_Kappel2;~Khaleelulla_Khan_Nazeer1;cabrel.teguemnefokam@ini.rub.de;~Christian_Mayr1;~Anand_Subramoney2", "gender": "M;M;;M;", "homepage": "https://www.ini.rub.de/the_institute/people/david-kappel/;http://khaleelkhan.com/;;https://tu-dresden.de/ing/elektrotechnik/iee/hpsn;", "dblp": ";322/3902;;44/6754;", "google_scholar": "https://scholar.google.de/citations?user=csoW51sAAAAJ;xhbTUSsAAAAJ;;;", "orcid": "0000-0001-5942-4033;0000-0001-8525-8702;;;", "linkedin": ";khaleel-khan/;;;", "or_profile": "~David_Kappel2;~Khaleelulla_Khan_Nazeer1;cabrel.teguemnefokam@ini.rub.de;~Christian_Mayr1;~Anand_Subramoney2", "aff": "Ruhr-Universt\u00e4t Bochum;Technische Universit\u00e4t Dresden;;TU Dresden;", "aff_domain": "rub.de;tu-dresden.de;;tu-dresden.de;", "position": "Postdoc;PhD student;;Full Professor;", "bibtex": "@misc{\nkappel2023blocklocal,\ntitle={Block-local learning with probabilistic latent representations},\nauthor={David Kappel and Khaleelulla Khan Nazeer and Cabrel Teguemne Fokam and Christian Mayr and Anand Subramoney},\nyear={2023},\nurl={https://openreview.net/forum?id=TrcpLUcYfL}\n}", "github": "", "project": "", "reviewers": "xCFC;3PEq;RmC3;HQhi;39aM", "site": "https://openreview.net/forum?id=TrcpLUcYfL", "pdf_size": 913150, "rating": "2;5;5;6;7", "confidence": "5;5;2;3;4", "soundness": "2;3;2;2;3", "novelty": "1;2;3;3;4", "presentation": "1;2;2;2;3", "wc_summary": "416;71;64;145;65", "wc_strengths": "92;19;68;137;62", "wc_weaknesses": "440;43;92;215;71", "wc_questions": "791;558;198;184;37", "wc_limitations": "167;13;50;1;30", "wc_review": "1906;704;472;682;265", "wc_reply_reviewers": "876;0;66;306;0", "wc_reply_authors": "263;0;0;0;0", "reply_reviewers": "2;0;1;1;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.0, 1.6733200530681511 ], "confidence_avg": [ 3.8, 1.16619037896906 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 1.019803902718557 ], "presentation_avg": [ 2.0, 0.6324555320336759 ], "wc_summary_avg": [ 152.2, 135.36528358482465 ], "wc_strengths_avg": [ 75.6, 38.69160115580641 ], "wc_weaknesses_avg": [ 172.2, 146.22913526380438 ], "wc_questions_avg": [ 353.6, 277.96014102745016 ], "wc_limitations_avg": [ 52.2, 59.72403201392217 ], "wc_review_avg": [ 805.8, 572.715775930784 ], "wc_reply_reviewers_avg": [ 249.6, 332.82644125730155 ], "wc_reply_authors_avg": [ 52.6, 105.20000000000002 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4099600308453939, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14713263234123796166&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Ruhr-Universit\u00e4t Bochum;Technische Universit\u00e4t Dresden", "aff_unique_dep": ";", "aff_unique_url": "https://www.ruhr-uni-bochum.de;https://tu-dresden.de", "aff_unique_abbr": "RUB;TUD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Meta-Adapter: An Online Few-shot Learner for Vision-Language Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71536", "id": "Ts0d8PvTeB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ad48f017e6c3d474caf511208e600459-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ts0d8PvTeB", "openreview": "https://openreview.net/forum?id=Ts0d8PvTeB", "poster": "/media/PosterPDFs/NeurIPS%202023/71536.png?t=1697424384.2432594", "slides": "https://nips.cc/virtual/2023/poster/71536", "video": "https://nips.cc/virtual/2023/poster/71536", "author_site": "cheng cheng, Lin Song, Ruoyi Xue, Hang Wang, Hongbin Sun, Yixiao Ge, Ying Shan", "tldr": "", "abstract": "The contrastive vision-language pre-training, known as CLIP, demonstrates remarkable potential in perceiving open-world visual concepts, enabling effective zero-shot image recognition.\n Nevertheless, few-shot learning methods based on CLIP typically require offline fine-tuning of the parameters on few-shot samples, resulting in longer inference time and the risk of overfitting in certain domains.\n To tackle these challenges, we propose the Meta-Adapter, a lightweight residual-style adapter, to refine the CLIP features guided by the few-shot samples in an online manner.\n With a few training samples, our method can enable effective few-shot learning capabilities and generalize to unseen data or tasks without additional fine-tuning, achieving competitive performance and high efficiency.\n Without bells and whistles, our approach outperforms the state-of-the-art online few-shot learning method by an average of 3.6\\% on eight image classification datasets with higher inference speed.\n Furthermore, our model is simple and flexible, serving as a plug-and-play module directly applicable to downstream tasks.\n Without further fine-tuning, Meta-Adapter obtains notable performance improvements in open-vocabulary object detection and segmentation tasks.", "keywords": "Few-shot Learning; Vision-Language Model Adaption", "primary_area": "", "supplementary_material": "/attachment/ebb0c09a0b995ff465308f0c39219aec330566dd.pdf", "author": "Cheng Cheng;Lin Song;Ruoyi Xue;Hang Wang;Hongbin Sun;Yixiao Ge;Ying Shan", "authorids": "~Cheng_Cheng5;~Lin_Song2;~Ruoyi_Xue1;~Hang_Wang7;~Hongbin_Sun2;~Yixiao_Ge2;~Ying_Shan2", "gender": "M;M;F;M;M;F;M", "homepage": ";https://linsong.cc;https://github.com/qsjwzxhxy;https://gr.xjtu.edu.cn/web/hangwang;http://gr.xjtu.edu.cn/web/hsun/home;https://geyixiao.com/;", "dblp": ";;;;98/6690-1;228/6649;68/5910", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;6Ra2TgQAAAAJ;;;;TtU74NAAAAAJ;4oXBp9UAAAAJ", "orcid": ";;;0000-0002-2714-0703;;;0000-0001-7673-8325", "linkedin": ";\u6797-\u5b8b-9520a5183/;;;;;YingShanProfile/", "or_profile": "~Cheng_Cheng5;~Lin_Song2;~Ruoyi_Xue1;~Hang_Wang7;~Hongbin_Sun2;~Yixiao_Ge2;~Ying_Shan2", "aff": "Xi'an Jiaotong University;Tencent AI Lab;Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University;Tencent;Tencent PCG ARC Lab", "aff_domain": "xjtu.edu.cn;tencent.com;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;tencent.com;arc.tencent.com", "position": "PhD student;Researcher;MS student;Assistant Professor;Full Professor;Researcher;Director", "bibtex": "@inproceedings{\ncheng2023metaadapter,\ntitle={Meta-Adapter: An Online Few-shot Learner for Vision-Language Model},\nauthor={Cheng Cheng and Lin Song and Ruoyi Xue and Hang Wang and Hongbin Sun and Yixiao Ge and Ying Shan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ts0d8PvTeB}\n}", "github": "", "project": "", "reviewers": "Qojd;uypu;JEGz;iC61;deHD", "pdf_size": 589208, "rating": "5;5;5;6;6", "confidence": "4;4;3;4;3", "soundness": "3;2;2;3;3", "novelty": "3;2;2;3;3", "presentation": "3;3;3;3;2", "wc_summary": "58;54;67;52;122", "wc_strengths": "41;27;40;40;31", "wc_weaknesses": "33;112;57;139;102", "wc_questions": "93;4;17;49;1", "wc_limitations": "1;4;1;3;1", "wc_review": "226;201;182;283;257", "wc_reply_reviewers": "30;83;38;35;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 70.6, 26.211447880649402 ], "wc_strengths_avg": [ 35.8, 5.706137047074843 ], "wc_weaknesses_avg": [ 88.6, 38.35935348777401 ], "wc_questions_avg": [ 32.8, 34.573978654473656 ], "wc_limitations_avg": [ 2.0, 1.2649110640673518 ], "wc_review_avg": [ 229.8, 36.60273213846202 ], "wc_reply_reviewers_avg": [ 37.2, 26.603759132874437 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.16666666666666666, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6560755123285065605&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "xjtu.edu.cn;tencent.com;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;tencent.com;arc.tencent.com", "author_num": 7, "aff_unique_index": "0;1;0;0;0;1;1", "aff_unique_norm": "Xi'an Jiao Tong University;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.xjtu.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "XJTU;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "GIMLET: A Unified Graph-Text Model for Instruction-Based Molecule Zero-Shot Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71535", "id": "Tt6DrRCgJV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/129033c7c08be683059559e8d6bfd460-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Tt6DrRCgJV", "openreview": "https://openreview.net/forum?id=Tt6DrRCgJV", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71535", "video": "https://nips.cc/virtual/2023/poster/71535", "author_site": "Haiteng Zhao, Shengchao Liu, Ma Chang, Hannan Xu, Jie Fu, Zhihong Deng, Lingpeng Kong, Qi Liu", "tldr": "", "abstract": "Molecule property prediction has gained significant attention in recent years. The main bottleneck is the label insufficiency caused by expensive lab experiments. In order to alleviate this issue and to better leverage textual knowledge for tasks, this study investigates the feasibility of employing natural language instructions to accomplish molecule-related tasks in a zero-shot setting. We discover that existing molecule-text models perform poorly in this setting due to inadequate treatment of instructions and limited capacity for graphs. \nTo overcome these issues, we propose GIMLET, which unifies language models for both graph and text data. By adopting generalized position embedding, our model is extended to encode both graph structures and instruction text without additional graph encoding modules. GIMLET also decouples encoding of the graph from tasks instructions in the attention mechanism, enhancing the generalization of graph features across novel tasks. We construct a dataset consisting of more than two thousand molecule tasks with corresponding instructions derived from task descriptions. We pretrain GIMLET on the molecule tasks along with instructions, enabling the model to transfer effectively to a broad range of tasks. Experimental results demonstrate that GIMLET significantly outperforms molecule-text baselines in instruction-based zero-shot learning, even achieving closed results to supervised GNN models on tasks such as toxcast and muv.", "keywords": "Instruction;Molecule;Zero Shot;Graph;Language Model", "primary_area": "", "supplementary_material": "/attachment/72e65508c2bde36cbeded711eee729b5ab25f1b4.zip", "author": "Haiteng Zhao;Shengchao Liu;Chang Ma;Hannan Xu;Jie Fu;Zhi-Hong Deng;Lingpeng Kong;Qi Liu", "authorids": "~Haiteng_Zhao1;~Shengchao_Liu1;~Chang_Ma2;~Hannan_Xu1;~Jie_Fu2;~Zhi-Hong_Deng1;~Lingpeng_Kong1;~Qi_Liu5", "gender": "M;M;M;M;M;M;F;M", "homepage": "https://zhao-ht.github.io/haitengzhao/;https://chao1224.github.io/;;http://www.cis.pku.edu.cn/jzyg/szdw/dzh.htm;https://ikekonglp.github.io/;http://leuchine.github.io/;https://github.com/chang-github-00;https://bigaidream.github.io/", "dblp": "304/8330;;;161/4814-1;144/7656;;;", "google_scholar": "ZQlZN10AAAAJ;F1ws3XUAAAAJ;OqKCDxcAAAAJ;https://scholar.google.com.tw/citations?user=tRoAxlsAAAAJ;f1hBi5wAAAAJ;Y-OeKMwAAAAJ;8OOpuiIAAAAJ;66osleIAAAAJ", "orcid": ";0000-0003-2030-2367;0000-0002-0872-2185;0000-0002-0263-8142;;0000-0003-4608-5778;;0000-0002-4494-843X", "linkedin": ";;hannan-xu-766100135/;;;;;", "or_profile": "~Haiteng_Zhao1;~Shengchao_Liu1;~Hannan_Xu1;~Zhi-Hong_Deng1;~Lingpeng_Kong1;~Qi_Liu5;~Ma_Chang1;~Jie_Fu1", "aff": "Peking University;MILA-UdeM;University of Oxford;Peking University;Department of Computer Science, The University of Hong Kong;University of Hong Kong;University of Hong Kong;Beijing Academy of Artificial Intelligence", "aff_domain": "pku.edu.cn;mila.quebec;ox.ac.uk;pku.edu.cn;cs.hku.hk;hku.hk;hku.hk;baai.ac.cn", "position": "PhD student;PhD student;PhD student;Full Professor;Assistant Professor;Assistant Professor;PhD student;Researcher", "bibtex": "@inproceedings{\nzhao2023gimlet,\ntitle={{GIMLET}: A Unified Graph-Text Model for Instruction-Based Molecule Zero-Shot Learning},\nauthor={Haiteng Zhao and Shengchao Liu and Chang Ma and Hannan Xu and Jie Fu and Zhi-Hong Deng and Lingpeng Kong and Qi Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Tt6DrRCgJV}\n}", "github": "", "project": "", "reviewers": "UK2h;3HAC;drQy;M8Li;3WHb", "pdf_size": 6233026, "rating": "4;4;5;6;7", "confidence": "4;3;3;4;4", "soundness": "2;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "2;3;3;3;3", "wc_summary": "55;82;134;153;55", "wc_strengths": "18;67;90;143;74", "wc_weaknesses": "178;204;100;180;70", "wc_questions": "19;2;48;15;75", "wc_limitations": "3;7;2;26;19", "wc_review": "273;362;374;517;293", "wc_reply_reviewers": "19;0;18;76;14", "wc_reply_authors": "47;49;43;185;16", "reply_reviewers": "1;0;1;2;1", "reply_authors": "2;3;2;3;2", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 95.8, 40.62216143929321 ], "wc_strengths_avg": [ 78.4, 40.262141026030896 ], "wc_weaknesses_avg": [ 146.4, 51.83666655949242 ], "wc_questions_avg": [ 31.8, 26.31653472628948 ], "wc_limitations_avg": [ 11.4, 9.478396488858229 ], "wc_review_avg": [ 363.8, 85.83099673194992 ], "wc_reply_reviewers_avg": [ 25.4, 26.196182928052707 ], "wc_reply_authors_avg": [ 68.0, 59.6992462263972 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.4900980294098034, "gs_citation": 78, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8390578571473859304&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "pku.edu.cn;mila.quebec;ox.ac.uk;pku.edu.cn;cs.hku.hk;hku.hk;hku.hk;baai.ac.cn", "author_num": 8, "aff_unique_index": "0;1;2;0;3;3;3;4", "aff_unique_norm": "Peking University;Mila;University of Oxford;University of Hong Kong;Beijing Academy of Artificial Intelligence", "aff_unique_dep": ";Montreal Institute for Learning Algorithms;;Department of Computer Science;", "aff_unique_url": "http://www.pku.edu.cn;https://mila.quebec;https://www.ox.ac.uk;https://www.hku.hk;https://www.baaic.cn", "aff_unique_abbr": "Peking U;MILA;Oxford;HKU;BAAI", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;2;0;0;0;0;0", "aff_country_unique": "China;Canada;United Kingdom" }, { "title": "Optimal Parameter and Neuron Pruning for Out-of-Distribution Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71534", "id": "TtCPFN5fhO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a4316bb210a59fb7aafeca5dd21c2703-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TtCPFN5fhO", "openreview": "https://openreview.net/forum?id=TtCPFN5fhO", "poster": "/media/PosterPDFs/NeurIPS%202023/71534.png?t=1702140593.4373505", "slides": "https://nips.cc/virtual/2023/poster/71534", "video": "https://nips.cc/virtual/2023/poster/71534", "author_site": "Chao Chen, Zhihang Fu, Kai Liu, Ze Chen, Mingyuan Tao, Jieping Ye", "tldr": "", "abstract": "For a machine learning model deployed in real world scenarios, the ability of detecting out-of-distribution (OOD) samples is indispensable and challenging. Most existing OOD detection methods focused on exploring advanced training skills or training-free tricks to prevent the model from yielding overconfident confidence score for unknown samples. The training-based methods require expensive training cost and rely on OOD samples which are not always available, while most training-free methods can not efficiently utilize the prior information from the training data. In this work, we propose an \\textbf{O}ptimal \\textbf{P}arameter and \\textbf{N}euron \\textbf{P}runing (\\textbf{OPNP}) approach, which aims to identify and remove those parameters and neurons that lead to over-fitting. The main method is divided into two steps. In the first step, we evaluate the sensitivity of the model parameters and neurons by averaging gradients over all training samples. In the second step, the parameters and neurons with exceptionally large or close to zero sensitivities are removed for prediction. Our proposal is training-free, compatible with other post-hoc methods, and exploring the information from all training data. Extensive experiments are performed on multiple OOD detection tasks and model architectures, showing that our proposed OPNP consistently outperforms the existing methods by a large margin.", "keywords": "Out-of-Distribution Detection;Parameter Sensitivity;Parameter Pruning;Neuron Pruning", "primary_area": "", "supplementary_material": "", "author": "Chao Chen;Zhihang Fu;Kai Liu;Ze Chen;Mingyuan Tao;Jieping Ye", "authorids": "~Chao_Chen19;~Zhihang_Fu1;~Kai_Liu8;~Ze_Chen3;~Mingyuan_Tao1;~Jieping_Ye4", "gender": "M;M;M;M;M;M", "homepage": "https://chaochen.cc/;https://zhihangfu.top/;https://kail8.github.io/;;;http://yelabs.net/", "dblp": "66/3019-26.html;207/1894;;15/4184-1;289/5997;03/5454", "google_scholar": "https://scholar.google.com.hk/citations?user=_xDUAtQAAAAJ;e_e3Ur0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;hVqKZq0AAAAJ;https://scholar.google.com/citations?hl=en;T9AzhwcAAAAJ", "orcid": ";;;;;0000-0001-8662-5818", "linkedin": ";;;;;", "or_profile": "~Chao_Chen19;~Zhihang_Fu1;~Kai_Liu8;~Ze_Chen3;~Mingyuan_Tao1;~Jieping_Ye4", "aff": "Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;;Alibaba DAMO Academy", "aff_domain": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com", "position": "Researcher;Researcher;Intern;Researcher;;Principal Researcher", "bibtex": "@inproceedings{\nchen2023optimal,\ntitle={Optimal Parameter and Neuron Pruning for Out-of-Distribution Detection},\nauthor={Chao Chen and Zhihang Fu and Kai Liu and Ze Chen and Mingyuan Tao and Jieping Ye},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TtCPFN5fhO}\n}", "github": "", "project": "", "reviewers": "8qxy;Dntp;qR61;tCft", "pdf_size": 2256349, "rating": "5;5;6;6", "confidence": "3;4;4;4", "soundness": "2;3;3;4", "novelty": "2;2;2;4", "presentation": "2;2;4;3", "wc_summary": "45;26;33;44", "wc_strengths": "58;17;29;19", "wc_weaknesses": "137;142;43;24", "wc_questions": "443;4;49;132", "wc_limitations": "113;1;7;24", "wc_review": "796;190;161;243", "wc_reply_reviewers": "23;58;15;13", "wc_reply_authors": "24;596;23;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 37.0, 7.905694150420948 ], "wc_strengths_avg": [ 30.75, 16.37643123516232 ], "wc_weaknesses_avg": [ 86.5, 53.45325060274632 ], "wc_questions_avg": [ 157.0, 171.38698900441656 ], "wc_limitations_avg": [ 36.25, 45.107510461119446 ], "wc_review_avg": [ 347.5, 260.60554483740367 ], "wc_reply_reviewers_avg": [ 27.25, 18.14352501582865 ], "wc_reply_authors_avg": [ 160.75, 251.47502361069579 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8448870819435503854&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Alibaba Group", "aff_unique_dep": "", "aff_unique_url": "https://www.alibaba.com", "aff_unique_abbr": "Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Tools for Verifying Neural Models' Training Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71533", "id": "TwLHB8sKme", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/03e33e1f62e3302b47fe1d38a235921e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TwLHB8sKme", "openreview": "https://openreview.net/forum?id=TwLHB8sKme", "poster": "/media/PosterPDFs/NeurIPS%202023/71533.png?t=1702062223.458544", "slides": "https://nips.cc/virtual/2023/poster/71533", "video": "https://nips.cc/virtual/2023/poster/71533", "author_site": "Dami Choi, Yonadav Shavit, David Duvenaud", "tldr": "", "abstract": "It is important that consumers and regulators can verify the provenance of large neural models to evaluate their capabilities and risks. We introduce the concept of a \"Proof-of-Training-Data\": any protocol that allows a model trainer to convince a Verifier of the training data that produced a set of model weights. Such protocols could verify the amount and kind of data and compute used to train the model, including whether it was trained on specific harmful or beneficial data sources. We explore efficient verification strategies for Proof-of-Training-Data that are compatible with most current large-model training procedures. These include a method for the model-trainer to verifiably pre-commit to a random seed used in training, and a method that exploits models' tendency to temporarily overfit to training data in order to detect whether a given data-point was included in training. We show experimentally that our verification procedures can catch a wide variety of attacks, including all known attacks from the Proof-of-Learning literature.", "keywords": "Large Scale Learning;ML Security;AI Governance", "primary_area": "", "supplementary_material": "", "author": "Dami Choi;Yonadav G Shavit;David Duvenaud", "authorids": "~Dami_Choi1;~Yonadav_G_Shavit1;~David_Duvenaud2", "gender": ";;M", "homepage": ";https://yonadavshavit.com;https://www.cs.toronto.edu/~duvenaud/", "dblp": "209/9687;;86/9380", "google_scholar": "giuZW04AAAAJ;;https://scholar.google.ca/citations?user=ZLpO3XQAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Dami_Choi1;~Yonadav_G_Shavit1;~David_Duvenaud2", "aff": "Department of Computer Science, University of Toronto;Harvard University;Anthropic", "aff_domain": "cs.toronto.edu;harvard.edu;anthropic.com", "position": "PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\nchoi2023tools,\ntitle={Tools for Verifying Neural Models' Training Data},\nauthor={Dami Choi and Yonadav G Shavit and David Duvenaud},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TwLHB8sKme}\n}", "github": "", "project": "", "reviewers": "pydU;G7nA;Ejfn;yjkq", "pdf_size": 5705424, "rating": "5;5;5;6", "confidence": "2;2;2;3", "soundness": "2;3;2;3", "novelty": "3;2;2;3", "presentation": "2;2;3;2", "wc_summary": "89;134;69;55", "wc_strengths": "13;31;66;55", "wc_weaknesses": "237;335;124;61", "wc_questions": "1;101;3;46", "wc_limitations": "11;20;14;7", "wc_review": "351;621;276;224", "wc_reply_reviewers": "148;7;14;6", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.75, 29.83601012199855 ], "wc_strengths_avg": [ 41.25, 20.64430914319973 ], "wc_weaknesses_avg": [ 189.25, 105.15316210176468 ], "wc_questions_avg": [ 37.75, 40.702426217610174 ], "wc_limitations_avg": [ 13.0, 4.743416490252569 ], "wc_review_avg": [ 368.0, 152.8872133306118 ], "wc_reply_reviewers_avg": [ 43.75, 60.26763227471277 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5224903824703744548&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cs.toronto.edu;harvard.edu;anthropic.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Toronto;Harvard University;Anthropic", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://www.utoronto.ca;https://www.harvard.edu;https://www.anthropic.com", "aff_unique_abbr": "U of T;Harvard;Anthropic", "aff_campus_unique_index": "0", "aff_campus_unique": "Toronto;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Canada;United States" }, { "title": "An Efficient Doubly-Robust Test for the Kernel Treatment Effect", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71532", "id": "TyLjNSbSOe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bccdd196d798a51a4961989984a9ed4a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=TyLjNSbSOe", "openreview": "https://openreview.net/forum?id=TyLjNSbSOe", "poster": "/media/PosterPDFs/NeurIPS%202023/71532.png?t=1701386737.9345348", "slides": "https://nips.cc/virtual/2023/poster/71532", "video": "https://nips.cc/virtual/2023/poster/71532", "author_site": "Diego Martinez Taboada, Aaditya Ramdas, Edward Kennedy", "tldr": "", "abstract": "The average treatment effect, which is the difference in expectation of the counterfactuals, is probably the most popular target effect in causal inference with binary treatments. However, treatments may have effects beyond the mean, for instance decreasing or increasing the variance. We propose a new kernel-based test for distributional effects of the treatment. It is, to the best of our knowledge, the first kernel-based, doubly-robust test with provably valid type-I error. Furthermore, our proposed algorithm is computationally efficient, avoiding the use of permutations.", "keywords": "kernel treatment effect;causal inference;maximum mean discrepancy", "primary_area": "", "supplementary_material": "/attachment/10e9bc32a6a83fdc7248c75b3095088e261bca78.zip", "author": "Diego Martinez-Taboada;Aaditya Ramdas;Edward Kennedy", "authorids": "~Diego_Martinez-Taboada1;~Aaditya_Ramdas2;~Edward_Kennedy1", "gender": "M;M;M", "homepage": ";http://stat.cmu.edu/~aramdas;http://www.ehkennedy.com/", "dblp": "331/8680;117/3518;222/3133", "google_scholar": "9-p1MCsAAAAJ;ZvFaPxUAAAAJ;dXztgDYAAAAJ", "orcid": ";0000-0003-0497-311X;", "linkedin": "diego-martinez-taboada/;;", "or_profile": "~Diego_Martinez-Taboada1;~Aaditya_Ramdas2;~Edward_Kennedy1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nmartinez-taboada2023an,\ntitle={An Efficient Doubly-Robust Test for the Kernel Treatment Effect},\nauthor={Diego Martinez-Taboada and Aaditya Ramdas and Edward Kennedy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=TyLjNSbSOe}\n}", "github": "", "project": "", "reviewers": "yX9h;gJy9;DRRe;sucV;NqCE", "pdf_size": 825730, "rating": "4;6;7;7;8", "confidence": "4;1;4;4;5", "soundness": "4;3;4;2;4", "novelty": "2;1;4;2;3", "presentation": "2;3;4;2;4", "wc_summary": "140;83;59;81;35", "wc_strengths": "20;55;137;20;164", "wc_weaknesses": "74;127;64;189;289", "wc_questions": "330;13;51;34;99", "wc_limitations": "2;1;17;2;4", "wc_review": "566;279;328;326;591", "wc_reply_reviewers": "0;55;19;143;0", "wc_reply_authors": "0;280;13;34;0", "reply_reviewers": "0;2;1;1;0", "reply_authors": "1;2;2;2;1", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 3.6, 1.3564659966250538 ], "soundness_avg": [ 3.4, 0.8 ], "novelty_avg": [ 2.4, 1.019803902718557 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 79.6, 34.857423886454946 ], "wc_strengths_avg": [ 79.2, 60.210962457014425 ], "wc_weaknesses_avg": [ 148.6, 83.10619712151434 ], "wc_questions_avg": [ 105.4, 115.82849390370231 ], "wc_limitations_avg": [ 5.2, 5.979966555090422 ], "wc_review_avg": [ 418.0, 132.45225554893355 ], "wc_reply_reviewers_avg": [ 43.4, 53.697672202805954 ], "wc_reply_authors_avg": [ 65.4, 108.017776314827 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3043478260869565, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4728599418499127625&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cmu.edu;cmu.edu;cmu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Long Sequence Hopfield Memory", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71531", "id": "Tz2uONpgpy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aa32ebcdd2ce1bed4ef7f456fc8fa5c1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Tz2uONpgpy", "openreview": "https://openreview.net/forum?id=Tz2uONpgpy", "poster": "/media/PosterPDFs/NeurIPS%202023/71531.png?t=1702056766.2481353", "slides": "https://nips.cc/virtual/2023/poster/71531", "video": "https://nips.cc/virtual/2023/poster/71531", "author_site": "Hamza Chaudhry, Jacob Zavatone-Veth, Dmitry Krotov, Cengiz Pehlevan", "tldr": "", "abstract": "Sequence memory is an essential attribute of natural and artificial intelligence that enables agents to encode, store, and retrieve complex sequences of stimuli and actions. Computational models of sequence memory have been proposed where recurrent Hopfield-like neural networks are trained with temporally asymmetric Hebbian rules. However, these networks suffer from limited sequence capacity (maximal length of the stored sequence) due to interference between the memories. Inspired by recent work on Dense Associative Memories, we expand the sequence capacity of these models by introducing a nonlinear interaction term, enhancing separation between the patterns. We derive novel scaling laws for sequence capacity with respect to network size, significantly outperforming existing scaling laws for models based on traditional Hopfield networks, and verify these theoretical results with numerical simulation. Moreover, we introduce a generalized pseudoinverse rule to recall sequences of highly correlated patterns. Finally, we extend this model to store sequences with variable timing between states' transitions and describe a biologically-plausible implementation, with connections to motor neuroscience.", "keywords": "Sequence Recall;Dense Associative Memory;Memory Capacity;Hopfield Networks;Biological Motor Control", "primary_area": "", "supplementary_material": "", "author": "Hamza Tahir Chaudhry;Jacob A Zavatone-Veth;Dmitry Krotov;Cengiz Pehlevan", "authorids": "~Hamza_Tahir_Chaudhry1;~Jacob_A_Zavatone-Veth1;~Dmitry_Krotov2;~Cengiz_Pehlevan2", "gender": "Not Specified;M;;", "homepage": ";https://jzv.io;https://mitibmwatsonailab.mit.edu/people/dmitry-krotov/;https://pehlevan.seas.harvard.edu/", "dblp": ";270/9915;182/2341;145/3480", "google_scholar": ";i_HogJkAAAAJ;WeD9ll0AAAAJ;veDLTPEAAAAJ", "orcid": ";0000-0002-4060-1738;;0000-0001-9767-6063", "linkedin": "hamzatc/;;krotovdmitry;", "or_profile": "~Hamza_Tahir_Chaudhry1;~Jacob_A_Zavatone-Veth1;~Dmitry_Krotov2;~Cengiz_Pehlevan2", "aff": "Harvard University;Harvard University;Massachusetts Institute of Technology;School of Engineering and Applied Sciences, Harvard University", "aff_domain": "harvard.edu;harvard.edu;mit.edu;seas.harvard.edu", "position": "PhD student;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nchaudhry2023long,\ntitle={Long Sequence Hopfield Memory},\nauthor={Hamza Tahir Chaudhry and Jacob A Zavatone-Veth and Dmitry Krotov and Cengiz Pehlevan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Tz2uONpgpy}\n}", "github": "", "project": "", "reviewers": "2PbW;6yTi;Yq1L;mTmK", "pdf_size": 3156151, "rating": "6;6;6;6", "confidence": "4;4;4;2", "soundness": "3;2;3;3", "novelty": "3;3;2;3", "presentation": "3;4;3;3", "wc_summary": "32;82;54;96", "wc_strengths": "62;70;8;42", "wc_weaknesses": "300;39;145;243", "wc_questions": "172;151;25;42", "wc_limitations": "12;1;115;18", "wc_review": "578;343;347;441", "wc_reply_reviewers": "49;33;35;25", "wc_reply_authors": "47;0;10;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 66.0, 24.779023386727733 ], "wc_strengths_avg": [ 45.5, 23.93219588754864 ], "wc_weaknesses_avg": [ 181.75, 99.32616724710563 ], "wc_questions_avg": [ 97.5, 64.70896383036897 ], "wc_limitations_avg": [ 36.5, 45.7301869665979 ], "wc_review_avg": [ 427.25, 95.4630163990223 ], "wc_reply_reviewers_avg": [ 35.5, 8.645808232895291 ], "wc_reply_authors_avg": [ 14.25, 19.343926695477318 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17328776142563377362&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "email": "harvard.edu;harvard.edu;mit.edu;seas.harvard.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Harvard University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://web.mit.edu", "aff_unique_abbr": "Harvard;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Topological RANSAC for instance verification and retrieval without fine-tuning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71530", "id": "U1Kr8FTyhQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c972859a984a21658432d7320c7df385-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=U1Kr8FTyhQ", "openreview": "https://openreview.net/forum?id=U1Kr8FTyhQ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71530", "video": "https://nips.cc/virtual/2023/poster/71530", "author_site": "Guoyuan An, Ju-hyeong Seon, Inkyu An, Yuchi Huo, Sung-eui Yoon", "tldr": "", "abstract": "This paper presents an innovative approach to enhancing explainable image retrieval, particularly in situations where a fine-tuning set is unavailable. The widely-used SPatial verification (SP) method, despite its efficacy, relies on a spatial model and the hypothesis-testing strategy for instance recognition, leading to inherent limitations, including the assumption of planar structures and neglect of topological relations among features. To address these shortcomings, we introduce a pioneering technique that replaces the spatial model with a topological one within the RANSAC process. We propose bio-inspired saccade and fovea functions to verify the topological consistency among features, effectively circumventing the issues associated with SP's spatial model. Our experimental results demonstrate that our method significantly outperforms SP, achieving state-of-the-art performance in non-fine-tuning retrieval. Furthermore, our approach can enhance performance when used in conjunction with fine-tuned features. Importantly, our method retains high explainability and is lightweight, offering a practical and adaptable solution for a variety of real-world applications.", "keywords": "Landmarks retrieval;non-fine-tuning;spatial verification;explainable AI;hypothesis and test", "primary_area": "", "supplementary_material": "/attachment/542fce7bd759edd18d784374be067a132a254541.pdf", "author": "Guoyuan An;Ju-hyeong Seon;Inkyu An;Yuchi Huo;Sung-eui Yoon", "authorids": "~Guoyuan_An1;~Ju-hyeong_Seon1;~Inkyu_An1;~Yuchi_Huo1;~Sung-eui_Yoon1", "gender": "M;;M;M;M", "homepage": "https://sgvr.kaist.ac.kr/member/;https://inkyuan.github.io/;https://www.zju.edu.cn/home/huo/;http://sglab.kaist.ac.kr/~sungeui/;https://sunjuhyeong.github.io/online-cv/", "dblp": "299/8567;210/0902;;96/4574;", "google_scholar": "https://scholar.google.com.hk/citations?user=yiHeGiMAAAAJ;https://scholar.google.com/citations?hl=ko;;https://scholar.google.com.tw/citations?user=uLQzQW4AAAAJ;", "orcid": "0009-0008-6233-757X;;;;", "linkedin": "guoyuan-an-743833168/?originalSubdomain=kr;;;;", "or_profile": "~Guoyuan_An1;~Inkyu_An1;~Yuchi_Huo1;~Sung-eui_Yoon1;~Seon_Juhyeong1", "aff": "KAIST;Korea Advanced Institute of Science & Technology;Zhejiang University;KAIST;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;zju.edu.cn;kaist.edu;kaist.edu", "position": "PhD student;PhD student;Assistant Professor;Professor;MS student", "bibtex": "@inproceedings{\nan2023topological,\ntitle={Topological {RANSAC} for instance verification and retrieval without fine-tuning},\nauthor={Guoyuan An and Ju-hyeong Seon and Inkyu An and Yuchi Huo and Sung-eui Yoon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=U1Kr8FTyhQ}\n}", "github": "", "project": "", "reviewers": "JDZk;pXos;X5kj;WpBp", "pdf_size": 3892482, "rating": "5;6;7;8", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "novelty": "3;2;4;4", "presentation": "2;2;4;4", "wc_summary": "67;42;54;171", "wc_strengths": "55;46;52;194", "wc_weaknesses": "141;327;94;99", "wc_questions": "22;4;38;100", "wc_limitations": "17;21;1;90", "wc_review": "302;440;239;654", "wc_reply_reviewers": "0;25;0;0", "wc_reply_authors": "0;65;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 83.5, 51.28596299183628 ], "wc_strengths_avg": [ 86.75, 62.005544106958695 ], "wc_weaknesses_avg": [ 165.25, 95.15349441822933 ], "wc_questions_avg": [ 41.0, 36.124783736376884 ], "wc_limitations_avg": [ 32.25, 34.17144275561101 ], "wc_review_avg": [ 408.75, 159.16559772764967 ], "wc_reply_reviewers_avg": [ 6.25, 10.825317547305483 ], "wc_reply_authors_avg": [ 16.25, 28.145825622994256 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13404676683759033955&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "kaist.ac.kr;kaist.ac.kr;zju.edu.cn;kaist.edu;kaist.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.zju.edu.cn", "aff_unique_abbr": "KAIST;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "South Korea;China" }, { "title": "Towards Efficient and Accurate Winograd Convolution via Full Quantization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71529", "id": "U4WTG06Yu3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/400a2e6a82520b690810b97fd67fcc4e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=U4WTG06Yu3", "openreview": "https://openreview.net/forum?id=U4WTG06Yu3", "poster": "/media/PosterPDFs/NeurIPS%202023/71529.png?t=1701970920.7232473", "slides": "https://nips.cc/virtual/2023/poster/71529", "video": "https://nips.cc/virtual/2023/poster/71529", "author_site": "Tianqi Chen, Weixiang Xu, Weihan Chen, Peisong Wang, Jian Cheng", "tldr": "", "abstract": "The Winograd algorithm is an efficient convolution implementation, which performs calculations in the transformed domain. To further improve the computation efficiency, recent works propose to combine it with model quantization. Although Post-Training Quantization has the advantage of low computational cost and has been successfully applied in many other scenarios, a severe accuracy drop exists when utilizing it in Winograd convolution. Besides, despite the Winograd algorithm consisting of four stages, most existing methods only quantize the element-wise multiplication stage, leaving a considerable portion of calculations in full precision.\nIn this paper, observing the inconsistency among different transformation procedures, we present PTQ-Aware Winograd (PAW) to optimize them collaboratively under a unified objective function. Moreover, we explore the full quantization of faster Winograd (tile size $\\geq4$) for the first time. We further propose a hardware-friendly method called Factorized Scale Quantization (FSQ), which can effectively balance the significant range differences in the Winograd domain. Experiments demonstrate the effectiveness of our method, e.g., with 8-bit quantization and a tile size of 6, our method outperforms the previous Winograd PTQ method by 8.27\\% and 5.38\\% in terms of the top-1 accuracy on ResNet-18 and ResNet-34, respectively.", "keywords": "Winograd Convolution;Quantization", "primary_area": "", "supplementary_material": "/attachment/fa1952caff7fbc248c8941864ef156ec18bf7f85.pdf", "author": "Chen Tianqi;Weixiang Xu;Weihan Chen;Peisong Wang;Jian Cheng", "authorids": "~Chen_Tianqi1;~Weixiang_Xu2;~Weihan_Chen1;~Peisong_Wang1;~Jian_Cheng7", "gender": "M;M;M;M;M", "homepage": "https://github.com/1773226512?tab=repositories;;;https://people.ucas.ac.cn/~chengjian?language=en;", "dblp": ";250/2516;187/5474;14/6145-1;14/5824", "google_scholar": ";;UYFZpk4AAAAJ;ZGCIUJ8AAAAJ;", "orcid": ";;;0000-0003-1289-2758;", "linkedin": ";;;;", "or_profile": "~Chen_Tianqi1;~Weihan_Chen1;~Peisong_Wang1;~Jian_Cheng7;~Weixiang_Xu1", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation of\uff0cChinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn", "position": "PhD student;PhD student;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\ntianqi2023towards,\ntitle={Towards Efficient and Accurate Winograd Convolution via Full Quantization},\nauthor={Chen Tianqi and Weixiang Xu and Weihan Chen and Peisong Wang and Jian Cheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=U4WTG06Yu3}\n}", "github": "", "project": "", "reviewers": "M3Gq;JmCw;Rvu6;BWC3;2v14", "pdf_size": 504231, "rating": "5;5;5;6;7", "confidence": "3;3;3;3;5", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;4;2;3;3", "wc_summary": "43;67;69;47;90", "wc_strengths": "70;43;30;72;25", "wc_weaknesses": "38;102;344;152;50", "wc_questions": "26;76;91;2;43", "wc_limitations": "8;40;26;2;20", "wc_review": "185;328;560;275;228", "wc_reply_reviewers": "0;44;0;24;34", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 63.2, 16.951696080333672 ], "wc_strengths_avg": [ 48.0, 19.687559523719543 ], "wc_weaknesses_avg": [ 137.2, 111.05746260382504 ], "wc_questions_avg": [ 47.6, 32.425915561476444 ], "wc_limitations_avg": [ 19.2, 13.422369388450015 ], "wc_review_avg": [ 315.2, 131.34900075752384 ], "wc_reply_reviewers_avg": [ 20.4, 17.81684596105607 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.875, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6209765844510886626&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Masked Two-channel Decoupling Framework for Incomplete Multi-view Weak Multi-label Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71528", "id": "U4pFV192JQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/66772e6aa61e54ae16443ae1d78a7319-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=U4pFV192JQ", "openreview": "https://openreview.net/forum?id=U4pFV192JQ", "poster": "/media/PosterPDFs/NeurIPS%202023/71528.png?t=1697529764.7268515", "slides": "https://nips.cc/virtual/2023/poster/71528", "video": "https://nips.cc/virtual/2023/poster/71528", "author_site": "Chengliang Liu, Jie Wen, Yabo Liu, Chao Huang, Zhihao Wu, Xiaoling Luo, Yong Xu", "tldr": "", "abstract": "Multi-view learning has become a popular research topic in recent years, but research on the cross-application of classic multi-label classification and multi-view learning is still in its early stages. In this paper, we focus on the complex yet highly realistic task of incomplete multi-view weak multi-label learning and propose a masked two-channel decoupling framework based on deep neural networks to solve this problem. The core innovation of our method lies in decoupling the single-channel view-level representation, which is common in deep multi-view learning methods, into a shared representation and a view-proprietary representation. We also design a cross-channel contrastive loss to enhance the semantic property of the two channels. Additionally, we exploit supervised information to design a label-guided graph regularization loss, helping the extracted embedding features preserve the geometric structure among samples. Inspired by the success of masking mechanisms in image and text analysis, we develop a random fragment masking strategy for vector features to improve the learning ability of encoders. Finally, it is important to emphasize that our model is fully adaptable to arbitrary view and label absences while also performing well on the ideal full data. We have conducted sufficient and convincing experiments to confirm the effectiveness and advancement of our model.", "keywords": "Incomplete Multi-view Weak Multi-label Learning;Multi-view learning;Multi-label Classification", "primary_area": "", "supplementary_material": "", "author": "Chengliang Liu;Jie Wen;Yabo Liu;Chao Huang;Zhihao Wu;Xiaoling Luo;Yong Xu", "authorids": "~Chengliang_Liu1;~Jie_Wen1;~Yabo_Liu1;~Chao_Huang6;~Zhihao_Wu1;~Xiaoling_Luo2;~Yong_Xu9", "gender": ";;M;;M;F;M", "homepage": ";;;;;;https://www.yongxu.org", "dblp": ";;08/7626;;27/8792-2;;", "google_scholar": ";;8AyAPDEAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=zOVgYQYAAAAJ", "orcid": ";;;;0000-0002-2704-0614;0000-0003-3678-3185;", "linkedin": ";;;;;;", "or_profile": "~Chengliang_Liu1;~Jie_Wen1;~Yabo_Liu1;~Chao_Huang6;~Zhihao_Wu1;~Xiaoling_Luo2;~Yong_Xu9", "aff": ";;Harbin Institute of Technology;;Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": ";;hit.edu.cn;;hit.edu.cn;hit.edu.cn;hit.edu.cn", "position": ";;PhD student;;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nliu2023masked,\ntitle={Masked Two-channel Decoupling Framework for Incomplete Multi-view Weak Multi-label Learning},\nauthor={Chengliang Liu and Jie Wen and Yabo Liu and Chao Huang and Zhihao Wu and Xiaoling Luo and Yong Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=U4pFV192JQ}\n}", "github": "", "project": "", "reviewers": "NrND;XPPT;txHg;8GYm", "pdf_size": 2554204, "rating": "4;6;6;6", "confidence": "4;5;5;4", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "2;4;3;3", "wc_summary": "46;48;57;119", "wc_strengths": "15;110;116;79", "wc_weaknesses": "117;99;56;167", "wc_questions": "3;22;30;81", "wc_limitations": "15;15;1;17", "wc_review": "196;294;260;463", "wc_reply_reviewers": "19;18;27;22", "wc_reply_authors": "475;19;14;15", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 67.5, 30.02082610455615 ], "wc_strengths_avg": [ 80.0, 40.06869101929835 ], "wc_weaknesses_avg": [ 109.75, 39.79557136164777 ], "wc_questions_avg": [ 34.0, 28.853076092507017 ], "wc_limitations_avg": [ 12.0, 6.4031242374328485 ], "wc_review_avg": [ 303.25, 98.7151837358367 ], "wc_reply_reviewers_avg": [ 21.5, 3.5 ], "wc_reply_authors_avg": [ 130.75, 198.76163487957126 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5243225187079194306&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": ";;hit.edu.cn;;hit.edu.cn;hit.edu.cn;hit.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Harbin;Shenzhen", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "GADBench: Revisiting and Benchmarking Supervised Graph Anomaly Detection", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73582", "id": "U5uRXlLwnM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5eaafd67434a4cfb1cf829722c65f184-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=U5uRXlLwnM", "openreview": "https://openreview.net/forum?id=U5uRXlLwnM", "poster": "/media/PosterPDFs/NeurIPS%202023/73582.png?t=1701753156.1083817", "slides": "https://nips.cc/virtual/2023/poster/73582", "video": "https://nips.cc/virtual/2023/poster/73582", "author_site": "Jianheng Tang, Fengrui Hua, Ziqi Gao, Peilin Zhao, Jia Li", "tldr": "", "abstract": "With a long history of traditional Graph Anomaly Detection (GAD) algorithms and recently popular Graph Neural Networks (GNNs), it is still not clear (1) how they perform under a standard comprehensive setting, (2) whether GNNs can outperform traditional algorithms such as tree ensembles, and (3) how about their efficiency on large-scale graphs. In response, we introduce GADBench---a benchmark tool dedicated to supervised anomalous node detection in static graphs. GADBench facilitates a detailed comparison across 29 distinct models on ten real-world GAD datasets, encompassing thousands to millions (~6M) nodes. Our main finding is that tree ensembles with simple neighborhood aggregation can outperform the latest GNNs tailored for the GAD task. We shed light on the current progress of GAD, setting a robust groundwork for subsequent investigations in this domain. GADBench is open-sourced at https://github.com/squareRoot3/GADBench.", "keywords": "Graph Anomaly Detection;Graph Neural Networks", "primary_area": "", "supplementary_material": "", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\ntang2023gadbench,\ntitle={{GADB}ench: Revisiting and Benchmarking Supervised Graph Anomaly Detection},\nauthor={Jianheng Tang and Fengrui Hua and Ziqi Gao and Peilin Zhao and Jia Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=U5uRXlLwnM}\n}", "github": "", "project": "", "reviewers": "jFM3;7otD;VEyk;qCKR;eNf8", "pdf_size": 4044975, "rating": "6;6;7;7;8", "confidence": "4;4;5;4;5", "wc_summary_and_contributions": "56;40;69;45;22", "wc_strengths": "43;40;69;82;3", "wc_improvement": "4;69;66;24;281", "wc_limitations": "241;27;70;1;1", "wc_correctness": "4;1;94;1;7", "wc_clarity": "4;1;22;1;1", "wc_relation_to_prior_work": "4;1;63;1;1", "wc_documentation": "1;5;16;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "358;185;470;157;318", "wc_reply_reviewers": "0;0;23;16;225", "wc_reply_authors": "672;274;1384;464;1210", "reply_reviewers": "0;0;1;1;3", "reply_authors": "3;1;3;2;5", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 46.4, 15.755633912984905 ], "wc_strengths_avg": [ 47.4, 27.2367398930195 ], "wc_improvement_avg": [ 88.8, 99.23991132603858 ], "wc_limitations_avg": [ 68.0, 90.1021642359383 ], "wc_correctness_avg": [ 21.4, 36.36811790566017 ], "wc_clarity_avg": [ 5.8, 8.182909018191515 ], "wc_relation_to_prior_work_avg": [ 14.0, 24.52753554680943 ], "wc_documentation_avg": [ 4.8, 5.81033561853358 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 297.6, 115.09404849947717 ], "wc_reply_reviewers_avg": [ 52.8, 86.56881655654072 ], "wc_reply_authors_avg": [ 800.8, 427.8104253054149 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 2.8, 1.32664991614216 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": 0.7637626158259733, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2343203323097970072&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "", "author_num": 1 }, { "title": "Last-Iterate Convergent Policy Gradient Primal-Dual Methods for Constrained MDPs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71527", "id": "U6bhCLSPun", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d0949cbcec31c09431610553a284f94a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=U6bhCLSPun", "openreview": "https://openreview.net/forum?id=U6bhCLSPun", "poster": "/media/PosterPDFs/NeurIPS%202023/71527.png?t=1699546338.1421833", "slides": "https://nips.cc/virtual/2023/poster/71527", "video": "https://nips.cc/virtual/2023/poster/71527", "author_site": "Dongsheng Ding, Chen-Yu Wei, Kaiqing Zhang, Alejandro Ribeiro", "tldr": "", "abstract": "We study the problem of computing an optimal policy of an infinite-horizon discounted constrained Markov decision process (constrained MDP). Despite the popularity of Lagrangian-based policy search methods used in practice, the oscillation of policy iterates in these methods has not been fully understood, bringing out issues such as violation of constraints and sensitivity to hyper-parameters. To fill this gap, we employ the Lagrangian method to cast a constrained MDP into a constrained saddle-point problem in which max/min players correspond to primal/dual variables, respectively, and develop two single-time-scale policy-based primal-dual algorithms with non-asymptotic convergence of their policy iterates to an optimal constrained policy. Specifically, we first propose a regularized policy gradient primal-dual (RPG-PD) method that updates the policy using an entropy-regularized policy gradient, and the dual variable via a quadratic-regularized gradient ascent, simultaneously. We prove that the policy primal-dual iterates of RPG-PD converge to a regularized saddle point with a sublinear rate, while the policy iterates converge sublinearly to an optimal constrained policy. We further instantiate RPG-PD in large state or action spaces by including function approximation in policy parametrization, and establish similar sublinear last-iterate policy convergence. Second, we propose an optimistic policy gradient primal-dual (OPG-PD) method that employs the optimistic gradient method to update primal/dual variables, simultaneously. We prove that the policy primal-dual iterates of OPG-PD converge to a saddle point that contains an optimal constrained policy, with a linear rate. To the best of our knowledge, this work appears to be the first non-asymptotic policy last-iterate convergence result for single-time-scale algorithms in constrained MDPs. We further validate the merits and the effectiveness of our methods in computational experiments.", "keywords": "Constrained Markov decision processes;policy gradient primal-dual methods;non-convex saddle-point problem;last-iterate convergence;entropy regularization;optimistic gradient", "primary_area": "", "supplementary_material": "/attachment/995ab8f70096bdcd1d54eb59eeae812d4eac5a2d.zip", "author": "Dongsheng Ding;Chen-Yu Wei;Kaiqing Zhang;Alejandro Ribeiro", "authorids": "~Dongsheng_Ding1;~Chen-Yu_Wei1;~Kaiqing_Zhang3;~Alejandro_Ribeiro1", "gender": ";M;M;M", "homepage": "https://dongshed.github.io;https://bahh723.github.io/;https://alelab.seas.upenn.edu;https://kzhang66.github.io/", "dblp": "120/4610;183/1729;32/15;", "google_scholar": "Ixa7PJoAAAAJ;2L2cR-kAAAAJ;7mrPM4kAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-4230-9906;", "linkedin": ";;;", "or_profile": "~Dongsheng_Ding1;~Chen-Yu_Wei1;~Alejandro_Ribeiro1;~kaiqing_zhang1", "aff": "University of Pennsylvania;Massachusetts Institute of Technology;University of Pennsylvania;University of Maryland, College Park", "aff_domain": "upenn.edu;mit.edu;upenn.edu;umd.edu", "position": "Postdoc;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nding2023lastiterate,\ntitle={Last-Iterate Convergent Policy Gradient Primal-Dual Methods for Constrained {MDP}s},\nauthor={Dongsheng Ding and Chen-Yu Wei and Kaiqing Zhang and Alejandro Ribeiro},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=U6bhCLSPun}\n}", "github": "", "project": "", "reviewers": "tcAd;V7nr;NJhb;jUv5", "pdf_size": 2672605, "rating": "6;7;7;8", "confidence": "4;4;4;3", "soundness": "3;3;4;4", "novelty": "3;3;3;4", "presentation": "3;3;4;4", "wc_summary": "62;108;40;38", "wc_strengths": "67;71;41;69", "wc_weaknesses": "85;24;28;26", "wc_questions": "76;69;129;175", "wc_limitations": "3;1;2;9", "wc_review": "293;273;240;317", "wc_reply_reviewers": "14;23;0;11", "wc_reply_authors": "0;83;33;0", "reply_reviewers": "1;2;0;1", "reply_authors": "1;4;2;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 62.0, 28.178005607210743 ], "wc_strengths_avg": [ 62.0, 12.206555615733702 ], "wc_weaknesses_avg": [ 40.75, 25.586861863073402 ], "wc_questions_avg": [ 112.25, 43.01961761801237 ], "wc_limitations_avg": [ 3.75, 3.112474899497183 ], "wc_review_avg": [ 280.75, 28.216794644324857 ], "wc_reply_reviewers_avg": [ 12.0, 8.215838362577491 ], "wc_reply_authors_avg": [ 29.0, 33.96321539548339 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7560623011714556324&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "upenn.edu;mit.edu;upenn.edu;umd.edu", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Pennsylvania;Massachusetts Institute of Technology;University of Maryland", "aff_unique_dep": ";;", "aff_unique_url": "https://www.upenn.edu;https://web.mit.edu;https://www/umd.edu", "aff_unique_abbr": "UPenn;MIT;UMD", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Understanding Neural Network Binarization with Forward and Backward Proximal Quantizers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71526", "id": "U6fp6IUBdr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7f70331dbe58ad59d83941dfa7d975aa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=U6fp6IUBdr", "openreview": "https://openreview.net/forum?id=U6fp6IUBdr", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71526", "video": "https://nips.cc/virtual/2023/poster/71526", "author_site": "Yiwei Lu, Yaoliang Yu, Xinlin Li, Vahid Partovi Nia", "tldr": "", "abstract": "In neural network binarization, BinaryConnect (BC) and its variants are considered the standard. These methods apply the sign function in their forward pass and their respective gradients are backpropagated to update the weights. However, the derivative of the sign function is zero whenever defined, which consequently freezes training. Therefore, implementations of BC (e.g., BNN) usually replace the derivative of sign in the backward computation with identity or other approximate gradient alternatives. Although such practice works well empirically, it is largely a heuristic or ``training trick.'' We aim at shedding some light on these training tricks from the optimization perspective. Building from existing theory on ProxConnect (PC, a generalization of BC), we (1) equip PC with different forward-backward quantizers and obtain ProxConnect++ (PC++) that includes existing binarization techniques as special cases; (2) derive a principled way to synthesize forward-backward quantizers with automatic theoretical guarantees; (3) illustrate our theory by proposing an enhanced binarization algorithm BNN++; (4) conduct image classification experiments on CNNs and vision transformers, and empirically verify that BNN++ generally achieves competitive results on binarizing these models.", "keywords": "Neural network quantization;Model compression;Conditional gradient algorithm", "primary_area": "", "supplementary_material": "/attachment/dbb6cbcd85b994d1024612c91cb36b427a20ee17.zip", "author": "Yiwei Lu;Yaoliang Yu;Xinlin Li;Vahid Partovi Nia", "authorids": "~Yiwei_Lu1;~Yaoliang_Yu1;~Xinlin_Li2;~Vahid_Partovi_Nia1", "gender": "M;M;M;M", "homepage": "https://cs.uwaterloo.ca/~y485lu/;https://cs.uwaterloo.ca/~y328yu/;;http://datawisdom.ca", "dblp": ";90/4989;;178/0912", "google_scholar": "ke0k9PkAAAAJ;https://scholar.google.ca/citations?user=zbXIQMsAAAAJ;https://scholar.google.ca/citations?hl=en;onMDIN4AAAAJ", "orcid": ";0000-0002-3823-0720;;", "linkedin": ";;xinlin-li/;https://ca.linkedin.com/in/vahid-partovi-nia-29811385", "or_profile": "~Yiwei_Lu1;~Yaoliang_Yu1;~Xinlin_Li2;~Vahid_Partovi_Nia2", "aff": "University of Waterloo;University of Waterloo;Huawei Technologies Ltd.;Huawei Technologies Ltd.", "aff_domain": "uwaterloo.ca;uwaterloo.ca;huawei.com;huawei.com", "position": "PhD student;Associate Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nlu2023understanding,\ntitle={Understanding Neural Network Binarization with Forward and Backward Proximal Quantizers},\nauthor={Yiwei Lu and Yaoliang Yu and Xinlin Li and Vahid Partovi Nia},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=U6fp6IUBdr}\n}", "github": "", "project": "", "reviewers": "fayS;YJmg;5jjv;WcKF", "pdf_size": 925960, "rating": "5;5;7;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "47;54;225;56", "wc_strengths": "40;42;150;48", "wc_weaknesses": "69;350;185;39", "wc_questions": "117;43;176;2", "wc_limitations": "14;1;28;2", "wc_review": "287;490;764;147", "wc_reply_reviewers": "0;0;22;0", "wc_reply_authors": "32;32;34;32", "reply_reviewers": "0;0;1;0", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 95.5, 74.84149918327398 ], "wc_strengths_avg": [ 70.0, 46.281745861624536 ], "wc_weaknesses_avg": [ 160.75, 122.11137334417299 ], "wc_questions_avg": [ 84.5, 67.00186564566691 ], "wc_limitations_avg": [ 11.25, 10.940178243520533 ], "wc_review_avg": [ 422.0, 232.07649600939772 ], "wc_reply_reviewers_avg": [ 5.5, 9.526279441628825 ], "wc_reply_authors_avg": [ 32.5, 0.8660254037844386 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14661985482186988707&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "uwaterloo.ca;uwaterloo.ca;huawei.com;huawei.com", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of Waterloo;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://uwaterloo.ca;https://www.huawei.com", "aff_unique_abbr": "UW;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "Canada;China" }, { "title": "A Hierarchical Spatial Transformer for Massive Point Samples in Continuous Space", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71525", "id": "U9zRgpgdFI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6a0480190bbe6b622c7f1d3aa9be9c0f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=U9zRgpgdFI", "openreview": "https://openreview.net/forum?id=U9zRgpgdFI", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71525", "video": "https://nips.cc/virtual/2023/poster/71525", "author_site": "Wenchong He, Zhe Jiang, Tingsong Xiao, Zelin Xu, Shigang Chen, Ronald Fick, MILES MEDINA, Christine Angelini", "tldr": "", "abstract": "Transformers are widely used deep learning architectures. Existing transformers are mostly designed for sequences (texts or time series), images or videos, and graphs. This paper proposes a novel transformer model for massive (up to a million) point samples in continuous space. Such data are ubiquitous in environment sciences (e.g., sensor observations), numerical simulations (e.g., particle-laden flow, astrophysics), and location-based services (e.g., POIs and trajectories). However, designing a transformer for massive spatial points is non-trivial due to several challenges, including implicit long-range and multi-scale dependency on irregular points in continuous space, a non-uniform point distribution, the potential high computational costs of calculating all-pair attention across massive points, and the risks of over-confident predictions due to varying point density. To address these challenges, we propose a new hierarchical spatial transformer model, which includes multi-resolution representation learning within a quad-tree hierarchy and efficient spatial attention via coarse approximation. We also design an uncertainty quantification branch to estimate prediction confidence related to input feature noise and point sparsity. We provide a theoretical analysis of computational time complexity and memory costs. Extensive experiments on both real-world and synthetic datasets show that our method outperforms multiple baselines in prediction accuracy and our model can scale up to one million points on one NVIDIA A100 GPU. The code is available at https://github.com/spatialdatasciencegroup/HST", "keywords": "Spatial representation learning;transformer;quadtree;efficiency", "primary_area": "", "supplementary_material": "/attachment/23ccae9286a877f2599d3c59fd2b8ab128438f03.pdf", "author": "Wenchong He;Zhe Jiang;Tingsong Xiao;Zelin Xu;Shigang Chen;Ronald Fick;MILES D MEDINA;Christine Angelini", "authorids": "~Wenchong_He1;~Zhe_Jiang1;~Tingsong_Xiao1;~Zelin_Xu1;~Shigang_Chen1;~Ronald_Fick1;~MILES_D_MEDINA1;~Christine_Angelini1", "gender": "M;M;;M;M;M;;F", "homepage": "https://wenchonghekk.github.io/;https://www.jiangteam.org;;https://zelinxu2000.github.io/;https://www.cise.ufl.edu/~sgchen/;;;http://ccs.eng.ufl.edu/", "dblp": "266/5559.html;50/4629-1;;15/3244-1;c/ShigangChen.html;;;", "google_scholar": "NXyd1-sAAAAJ;R7xPuT8AAAAJ;;18rG-NkAAAAJ;jDCiMQkAAAAJ;;;TQDW3hsAAAAJ", "orcid": ";0000-0002-3576-6976;;0009-0004-4419-3155;0000-0001-5218-3257;0000-0002-4201-2304;;", "linkedin": ";;;;;;milesmedina;", "or_profile": "~Wenchong_He1;~Zhe_Jiang1;~Tingsong_Xiao1;~Zelin_Xu1;~Shigang_Chen1;~Ronald_Fick1;~MILES_D_MEDINA1;~Christine_Angelini1", "aff": "University of Florida;University of Florida;;University of Florida;University of Florida;;University of Florida;University of Florida", "aff_domain": "ufl.edu;ufl.edu;;ufl.edu;ufl.edu;;ufl.edu;ufl.edu", "position": "PhD student;Assistant Professor;;PhD student;Full Professor;;Researcher;Associate Professor", "bibtex": "@inproceedings{\nhe2023a,\ntitle={A Hierarchical Spatial Transformer for Massive Point Samples in Continuous Space},\nauthor={Wenchong He and Zhe Jiang and Tingsong Xiao and Zelin Xu and Shigang Chen and Ronald Fick and MILES D MEDINA and Christine Angelini},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=U9zRgpgdFI}\n}", "github": "", "project": "", "reviewers": "J8MW;Ma6V;Y8FJ;7rvT", "pdf_size": 977794, "rating": "4;5;8;8", "confidence": "4;5;4;4", "soundness": "3;3;4;4", "novelty": "3;3;4;4", "presentation": "3;3;3;3", "wc_summary": "59;19;96;108", "wc_strengths": "68;13;130;105", "wc_weaknesses": "114;161;26;56", "wc_questions": "112;15;12;25", "wc_limitations": "1;1;5;6", "wc_review": "354;209;269;300", "wc_reply_reviewers": "0;9;17;45", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.5, 34.78864757359791 ], "wc_strengths_avg": [ 79.0, 44.02839992550263 ], "wc_weaknesses_avg": [ 89.25, 52.121852422952124 ], "wc_questions_avg": [ 41.0, 41.27347816697788 ], "wc_limitations_avg": [ 3.25, 2.277608394786075 ], "wc_review_avg": [ 283.0, 52.44520950477746 ], "wc_reply_reviewers_avg": [ 17.75, 16.843025262701474 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.4042260417272216, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6617422452251558201&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ufl.edu;ufl.edu;;ufl.edu;ufl.edu;;ufl.edu;ufl.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Florida", "aff_unique_dep": "", "aff_unique_url": "https://www.ufl.edu", "aff_unique_abbr": "UF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unsupervised Graph Neural Architecture Search with Disentangled Self-Supervision", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71524", "id": "UAFa5ZhR85", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e78399fc43dbb2d87b7e1e6906ce5baf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UAFa5ZhR85", "openreview": "https://openreview.net/forum?id=UAFa5ZhR85", "poster": "/media/PosterPDFs/NeurIPS%202023/71524.png?t=1701924260.6613715", "slides": "https://nips.cc/virtual/2023/poster/71524", "video": "https://nips.cc/virtual/2023/poster/71524", "author_site": "Zeyang Zhang, Xin Wang, Ziwei Zhang, Guangyao Shen, Shiqi Shen, Wenwu Zhu", "tldr": "", "abstract": "The existing graph neural architecture search (GNAS) methods heavily rely on supervised labels during the search process, failing to handle ubiquitous scenarios where supervisions are not available. In this paper, we study the problem of unsupervised graph neural architecture search, which remains unexplored in the literature. The key problem is to discover the latent graph factors that drive the formation of graph data as well as the underlying relations between the factors and the optimal neural architectures. Handling this problem is challenging given that the latent graph factors together with architectures are highly entangled due to the nature of the graph and the complexity of the neural architecture search process. To address the challenge, we propose a novel Disentangled Self-supervised Graph Neural Architecture Search (DSGAS) model, which is able to discover the optimal architectures capturing various latent graph factors in a self-supervised fashion based on unlabeled graph data. Specifically, we first design a disentangled graph super-network capable of incorporating multiple architectures with factor-wise disentanglement, which are optimized simultaneously. Then, we estimate the performance of architectures under different factors by our proposed self-supervised training with joint architecture-graph disentanglement. Finally, we propose a contrastive search with architecture augmentations to discover architectures with factor-specific expertise. Extensive experiments on 11 real-world datasets demonstrate that the proposed model is able to achieve state-of-the-art performance against several baseline methods in an unsupervised manner.", "keywords": "Graph Neural Architecture Search;Unsupervised Learning;Self-supervised Learning", "primary_area": "", "supplementary_material": "/attachment/df5c8adc8ebf44e965c1b4c788e7163c9c5b7dee.pdf", "author": "Zeyang Zhang;Xin Wang;Ziwei Zhang;Guangyao Shen;Shiqi Shen;Wenwu Zhu", "authorids": "~Zeyang_Zhang1;~Xin_Wang17;~Ziwei_Zhang1;~Guangyao_Shen1;~Shiqi_Shen1;~Wenwu_Zhu1", "gender": ";M;;M;M;M", "homepage": "https://zzythu.com;http://mn.cs.tsinghua.edu.cn/xinwang/;;;;http://media.cs.tsinghua.edu.cn/en/zww", "dblp": "236/0242;10/5630-19;;178/8555;169/3386;97/6308-1.html", "google_scholar": "w_njVcAAAAAJ;YPOBHYUAAAAJ;;BUZDdGMAAAAJ;;https://scholar.google.com.tw/citations?user=7t2jzpgAAAAJ", "orcid": "0000-0003-1329-1313;0000-0002-0351-2939;;;;0000-0003-2236-9290", "linkedin": "zeyang-zhang-a7a039159;;;;;", "or_profile": "~Zeyang_Zhang1;~Xin_Wang17;~Ziwei_Zhang1;~Guangyao_Shen1;~Shiqi_Shen1;~Wenwu_Zhu1", "aff": "Tsinghua University;Tsinghua University;;Tencent;Wechat, Tencent;Tsinghua University", "aff_domain": "tsinghua.edu.cn;cs.tsinghua.edu.cn;;tencent.com;tencent.com;tsinghua.edu.cn", "position": "PhD student;Assistant Professor;;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhang2023unsupervised,\ntitle={Unsupervised Graph Neural Architecture Search with Disentangled Self-Supervision},\nauthor={Zeyang Zhang and Xin Wang and Ziwei Zhang and Guangyao Shen and Shiqi Shen and Wenwu Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UAFa5ZhR85}\n}", "github": "", "project": "", "reviewers": "JBpb;5ige;Hgo9;bqLE;GehM", "pdf_size": 509077, "rating": "4;6;7;7;8", "confidence": "5;4;5;5;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;2;2", "wc_summary": "26;82;88;81;100", "wc_strengths": "19;41;91;43;107", "wc_weaknesses": "142;68;105;8;63", "wc_questions": "4;4;68;120;3", "wc_limitations": "4;1;12;1;1", "wc_review": "195;196;364;253;274", "wc_reply_reviewers": "26;0;0;38;40", "wc_reply_authors": "351;0;0;0;0", "reply_reviewers": "1;0;0;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 75.4, 25.609373284014588 ], "wc_strengths_avg": [ 60.2, 33.16865990660461 ], "wc_weaknesses_avg": [ 77.2, 44.82588537887456 ], "wc_questions_avg": [ 39.8, 47.21186291600873 ], "wc_limitations_avg": [ 3.8, 4.261455150532504 ], "wc_review_avg": [ 256.4, 62.15657648229993 ], "wc_reply_reviewers_avg": [ 20.8, 17.645396000090223 ], "wc_reply_authors_avg": [ 70.2, 140.39999999999998 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3611575592573077, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8504555526661276341&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;cs.tsinghua.edu.cn;;tencent.com;tencent.com;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Tsinghua University;Tencent", "aff_unique_dep": ";Tencent Holdings Limited", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.tencent.com", "aff_unique_abbr": "THU;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Unified Generalization Analysis of Re-Weighting and Logit-Adjustment for Imbalanced Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71523", "id": "UAow2kPsYP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/973a0f50d43cf99118cdab456edcacda-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UAow2kPsYP", "openreview": "https://openreview.net/forum?id=UAow2kPsYP", "poster": "/media/PosterPDFs/NeurIPS%202023/71523.png?t=1699860977.2965124", "slides": "https://nips.cc/virtual/2023/poster/71523", "video": "https://nips.cc/virtual/2023/poster/71523", "author_site": "Zitai Wang, Qianqian Xu, Zhiyong Yang, Yuan He, Xiaochun Cao, Qingming Huang", "tldr": "", "abstract": "Real-world datasets are typically imbalanced in the sense that only a few classes have numerous samples, while many classes are associated with only a few samples. As a result, a naive ERM learning process will be biased towards the majority classes, making it difficult to generalize to the minority classes. To address this issue, one simple but effective approach is to modify the loss function to emphasize the learning on minority classes, such as re-weighting the losses or adjusting the logits via class-dependent terms. However, existing generalization analysis of such losses is still coarse-grained and fragmented, failing to explain some empirical results. To bridge this gap between theory and practice, we propose a novel technique named data-dependent contraction to capture how these modified losses handle different classes. On top of this technique, a fine-grained generalization bound is established for imbalanced learning, which helps reveal the mystery of re-weighting and logit-adjustment in a unified manner. Furthermore, a principled learning algorithm is developed based on the theoretical insights. Finally, the empirical results on benchmark datasets not only validate the theoretical results but also demonstrate the effectiveness of the proposed method.", "keywords": "Imbalanced Learning;Re-weighting;Logit Adjustment;Genralization Analysis", "primary_area": "", "supplementary_material": "/attachment/befc45cc5039c501f5a4722127ec94cd7cd3df30.pdf", "author": "Zitai Wang;Qianqian Xu;Zhiyong Yang;Yuan He;Xiaochun Cao;Qingming Huang", "authorids": "~Zitai_Wang1;~Qianqian_Xu2;~Zhiyong_Yang1;~Yuan_He2;~Xiaochun_Cao3;~Qingming_Huang1", "gender": "M;F;M;M;M;", "homepage": "https://wang22ti.github.io;http://vipl.ict.ac.cn/people/~qianqianxu;https://joshuaas.github.io/;http://www.alibaba.com;https://scst.sysu.edu.cn/members/caoxiaochun.htm;https://qmhuang-ucas.github.io/", "dblp": "251/3361;07/7627;01/452-1.html;11/1735-1.html;39/3695;68/4388", "google_scholar": "45qZ_LcAAAAJ;https://scholar.google.com.hk/citations?user=MjifS2MAAAAJ;https://scholar.google.com/citations?hl=zh-CN;cWbXLzgAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=J1vMnRgAAAAJ", "orcid": "0000-0003-4156-6417;;0000-0002-4409-4999;0000-0002-6885-1341;0000-0001-7141-708X;", "linkedin": ";;;;;", "or_profile": "~Zitai_Wang1;~Qianqian_Xu2;~Zhiyong_Yang1;~Yuan_He2;~Xiaochun_Cao3;~Qingming_Huang2", "aff": "University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Alibaba Group;SUN YAT-SEN UNIVERSITY;University of Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ict.ac.cn;ucas.ac.cn;alibaba-inc.com;sysu.edu.cn;ucas.ac.cn", "position": "PhD student;Full Professor;Postdoc;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023a,\ntitle={A Unified Generalization Analysis of Re-Weighting and Logit-Adjustment for Imbalanced Learning},\nauthor={Zitai Wang and Qianqian Xu and Zhiyong Yang and Yuan He and Xiaochun Cao and Qingming Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UAow2kPsYP}\n}", "github": "", "project": "", "reviewers": "2B2B;4AC3;D9Eg;sr97", "pdf_size": 1034320, "rating": "6;7;7;8", "confidence": "4;4;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;4;3;3", "wc_summary": "57;118;81;125", "wc_strengths": "57;35;50;31", "wc_weaknesses": "264;60;38;164", "wc_questions": "2;4;7;1", "wc_limitations": "1;30;15;1", "wc_review": "381;247;191;322", "wc_reply_reviewers": "14;14;0;16", "wc_reply_authors": "22;23;0;20", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 95.25, 27.698149757700424 ], "wc_strengths_avg": [ 43.25, 10.638961415476606 ], "wc_weaknesses_avg": [ 131.5, 90.0930074978075 ], "wc_questions_avg": [ 3.5, 2.29128784747792 ], "wc_limitations_avg": [ 11.75, 11.986972094736853 ], "wc_review_avg": [ 285.25, 72.22317841247366 ], "wc_reply_reviewers_avg": [ 11.0, 6.4031242374328485 ], "wc_reply_authors_avg": [ 16.25, 9.443913383762052 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15401944555822682105&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ucas.ac.cn;ict.ac.cn;ucas.ac.cn;alibaba-inc.com;sysu.edu.cn;ucas.ac.cn", "author_num": 6, "aff_unique_index": "0;1;0;2;3;0", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences;Alibaba Group;Sun Yat-sen University", "aff_unique_dep": ";Institute of Computing Technology;;", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ict.ac.cn;https://www.alibaba.com;http://www.sysu.edu.cn", "aff_unique_abbr": "UCAS;CAS;Alibaba;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Cross-modal Active Complementary Learning with Self-refining Correspondence", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71522", "id": "UBBeUjTja8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4e580cdd54fe38ca9a5b8ea6fe99bb44-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UBBeUjTja8", "openreview": "https://openreview.net/forum?id=UBBeUjTja8", "poster": "/media/PosterPDFs/NeurIPS%202023/71522.png?t=1697349711.0662591", "slides": "https://nips.cc/virtual/2023/poster/71522", "video": "https://nips.cc/virtual/2023/poster/71522", "author_site": "Yang Qin, Yuan Sun, Dezhong Peng, Joey Tianyi Zhou, Xi Peng, Peng Hu", "tldr": "", "abstract": "Recently, image-text matching has attracted more and more attention from academia and industry, which is fundamental to understanding the latent correspondence across visual and textual modalities. However, most existing methods implicitly assume the training pairs are well-aligned while ignoring the ubiquitous annotation noise, a.k.a noisy correspondence (NC), thereby inevitably leading to a performance drop. Although some methods attempt to address such noise, they still face two challenging problems: excessive memorizing/overfitting and unreliable correction for NC, especially under high noise. To address the two problems, we propose a generalized Cross-modal Robust Complementary Learning framework (CRCL), which benefits from a novel Active Complementary Loss (ACL) and an efficient Self-refining Correspondence Correction (SCC) to improve the robustness of existing methods. Specifically, ACL exploits active and complementary learning losses to reduce the risk of providing erroneous supervision, leading to theoretically and experimentally demonstrated robustness against NC. SCC utilizes multiple self-refining processes with momentum correction to enlarge the receptive field for correcting correspondences, thereby alleviating error accumulation and achieving accurate and stable corrections. We carry out extensive experiments on three image-text benchmarks, i.e., Flickr30K, MS-COCO, and CC152K, to verify the superior robustness of our CRCL against synthetic and real-world noisy correspondences.", "keywords": "Cross-modal learning;Image-text matching;Noisy correspondence.", "primary_area": "", "supplementary_material": "/attachment/a4abb83fce368c88dcc1f38a180fae63ac940d1e.pdf", "author": "Yang Qin;Yuan Sun;Dezhong Peng;Joey Tianyi Zhou;Xi Peng;Peng Hu", "authorids": "~Yang_Qin4;~Yuan_Sun2;~Dezhong_Peng1;~Joey_Tianyi_Zhou1;~Xi_Peng3;~Peng_Hu2", "gender": ";;M;M;;M", "homepage": ";;https://cs.scu.edu.cn/info/1249/10284.htm;https://joeyzhouty.github.io/;;https://penghu-cs.github.io/", "dblp": ";;;123/5110;;11/6278-2", "google_scholar": ";;0gupif8AAAAJ;https://scholar.google.com.sg/citations?user=cYNqDokAAAAJ;;gvESkwYAAAAJ", "orcid": ";;;0000-0002-4675-7055;;0000-0003-3868-3997", "linkedin": ";;;;;", "or_profile": "~Yang_Qin4;~Yuan_Sun2;~Dezhong_Peng1;~Joey_Tianyi_Zhou1;~Xi_Peng3;~Peng_Hu2", "aff": ";;Sichuan University;A*STAR Centre for Frontier AI Research;;Sichuan University", "aff_domain": ";;scu.edu.cn;cfar.a-star.edu.sg;;scu.edu.cn", "position": ";;Full Professor;Principal Researcher;;Associate Professor", "bibtex": "@inproceedings{\nqin2023crossmodal,\ntitle={Cross-modal Active Complementary Learning with Self-refining Correspondence},\nauthor={Yang Qin and Yuan Sun and Dezhong Peng and Joey Tianyi Zhou and Xi Peng and Peng Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UBBeUjTja8}\n}", "github": "", "project": "", "reviewers": "J1KY;tALj;8spS;DZ4B;QLFm", "pdf_size": 359358, "rating": "5;6;6;7;8", "confidence": "4;5;4;5;5", "soundness": "3;3;3;3;4", "novelty": "3;3;3;4;3", "presentation": "2;4;2;3;3", "wc_summary": "88;77;100;102;133", "wc_strengths": "90;161;79;136;110", "wc_weaknesses": "156;141;98;215;189", "wc_questions": "6;230;1;2;120", "wc_limitations": "26;12;1;13;11", "wc_review": "366;621;279;468;563", "wc_reply_reviewers": "0;0;0;38;0", "wc_reply_authors": "0;0;0;46;0", "reply_reviewers": "0;0;0;1;0", "reply_authors": "1;1;1;2;1", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 100.0, 18.793615937333612 ], "wc_strengths_avg": [ 115.2, 30.009331881932994 ], "wc_weaknesses_avg": [ 159.8, 40.21641455923191 ], "wc_questions_avg": [ 71.8, 91.17543528823978 ], "wc_limitations_avg": [ 12.6, 7.964923100695952 ], "wc_review_avg": [ 459.4, 125.05134945293474 ], "wc_reply_reviewers_avg": [ 7.6, 15.200000000000001 ], "wc_reply_authors_avg": [ 9.2, 18.4 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7205766921228922, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6416647533080723116&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";;scu.edu.cn;cfar.a-star.edu.sg;;scu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "Sichuan University;A*STAR", "aff_unique_dep": ";Centre for Frontier AI Research", "aff_unique_url": "https://www.scu.edu.cn;https://www.a-star.edu.sg", "aff_unique_abbr": "SCU;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;Singapore" }, { "title": "Time Series Kernels based on Nonlinear Vector AutoRegressive Delay Embeddings", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71521", "id": "UBUWFEwn7p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/754612bde73a8b65ad8743f1f6d8ddf6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UBUWFEwn7p", "openreview": "https://openreview.net/forum?id=UBUWFEwn7p", "poster": "/media/PosterPDFs/NeurIPS%202023/71521.png?t=1701681353.7765357", "slides": "https://nips.cc/virtual/2023/poster/71521", "video": "https://nips.cc/virtual/2023/poster/71521", "author_site": "Giovanni De Felice, John Goulermas, Vladimir Gusev", "tldr": "", "abstract": "Kernel design is a pivotal but challenging aspect of time series analysis, especially in the context of small datasets. In recent years, Reservoir Computing (RC) has emerged as a powerful tool to compare time series based on the underlying dynamics of the generating process rather than the observed data. However, the performance of RC highly depends on the hyperparameter setting, which is hard to interpret and costly to optimize because of the recurrent nature of RC. Here, we present a new kernel for time series based on the recently established equivalence between reservoir dynamics and Nonlinear Vector AutoRegressive (NVAR) processes. The kernel is non-recurrent and depends on a small set of meaningful hyperparameters, for which we suggest an effective heuristic. We demonstrate excellent performance on a wide range of real-world classification tasks, both in terms of accuracy and speed. This further advances the understanding of RC representation learning models and extends the typical use of the NVAR framework to kernel design and representation of real-world time series data.", "keywords": "Time Series;Kernel methods;NVAR processes;Dynamical systems;Reservoir Computing", "primary_area": "", "supplementary_material": "/attachment/54d2a8037a9b629590ed7d91735976eedda2dd48.zip", "author": "Giovanni De Felice;John Y Goulermas;Vladimir Gusev", "authorids": "~Giovanni_De_Felice1;~John_Y_Goulermas1;~Vladimir_Gusev1", "gender": "M;M;", "homepage": "https://gdefe.github.io;http://www.liv.ac.uk/computer-science;", "dblp": ";;", "google_scholar": "dFZ6v6IAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Giovanni_De_Felice1;~John_Y_Goulermas1;~Vladimir_Gusev1", "aff": "University of Liverpool;University of Liverpool;", "aff_domain": "liv.ac.uk;ed.ac.uk;", "position": "PhD student;Full Professor;", "bibtex": "@inproceedings{\nfelice2023time,\ntitle={Time Series Kernels based on Nonlinear Vector AutoRegressive Delay Embeddings},\nauthor={Giovanni De Felice and John Y Goulermas and Vladimir Gusev},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UBUWFEwn7p}\n}", "github": "", "project": "", "reviewers": "Nbgs;XEWY;8sG2;hHQP;RJLc", "pdf_size": 2246592, "rating": "4;7;7;7;7", "confidence": "4;3;4;3;3", "soundness": "3;3;4;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;4;3;3", "wc_summary": "69;117;80;157;107", "wc_strengths": "197;107;127;317;96", "wc_weaknesses": "263;69;63;131;188", "wc_questions": "29;126;262;2;187", "wc_limitations": "8;13;6;19;22", "wc_review": "566;432;538;626;600", "wc_reply_reviewers": "0;27;69;225;176", "wc_reply_authors": "0;0;0;51;50", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;2;2", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 106.0, 30.880414504990053 ], "wc_strengths_avg": [ 168.8, 82.00585344961664 ], "wc_weaknesses_avg": [ 142.8, 75.42519472961274 ], "wc_questions_avg": [ 121.2, 96.83676987591026 ], "wc_limitations_avg": [ 13.6, 6.151422599691879 ], "wc_review_avg": [ 552.4, 67.18809418341914 ], "wc_reply_reviewers_avg": [ 99.4, 86.82073485061042 ], "wc_reply_authors_avg": [ 20.2, 24.741867350707384 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6123724356957945, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:A7TSvEMKZCAJ:scholar.google.com/&scioq=Time+Series+Kernels+based+on+Nonlinear+Vector+AutoRegressive+Delay+Embeddings&hl=en&as_sdt=0,23", "gs_version_total": 3, "email": "liv.ac.uk;ed.ac.uk;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Liverpool", "aff_unique_dep": "", "aff_unique_url": "https://www.liverpool.ac.uk", "aff_unique_abbr": "Liv Uni", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Learning Human Action Recognition Representations Without Real Humans", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73581", "id": "UBbm5embIB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cd556f38dba3a6c367c42fa85fc0801c-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=UBbm5embIB", "openreview": "https://openreview.net/forum?id=UBbm5embIB", "poster": "/media/PosterPDFs/NeurIPS%202023/73581.png?t=1699515722.298918", "slides": "https://nips.cc/virtual/2023/poster/73581", "video": "https://nips.cc/virtual/2023/poster/73581", "author_site": "Howard Zhong, Samarth Mishra, Donghyun Kim, SouYoung Jin, Rameswar Panda, Hilde Kuehne, Leonid Karlinsky, Venkatesh Saligrama, Aude Oliva, Rogerio Feris", "tldr": "", "abstract": "Pre-training on massive video datasets has become essential to achieve high action recognition performance on smaller downstream datasets. However, most large-scale video datasets contain images of people and hence are accompanied with issues related to privacy, ethics, and data protection, often preventing them from being publicly shared for reproducible research. Existing work has attempted to alleviate these problems by blurring faces, downsampling videos, or training on synthetic data. On the other hand, analysis on the {\\em transferability} of privacy-preserving pre-trained models to downstream tasks has been limited. In this work, we study this problem by first asking the question: can we pre-train models for human action recognition with data that does not include real humans? To this end, we present, for the first time, a benchmark that leverages real-world videos with {\\em humans removed} and synthetic data containing virtual humans to pre-train a model. We then evaluate the transferability of the representation learned on this data to a diverse set of downstream action recognition benchmarks. Furthermore, we propose a novel pre-training strategy, called Privacy-Preserving MAE-Align, to effectively combine synthetic data and human-removed real data. \nOur approach outperforms previous baselines by up to 5\\% and closes the performance gap between human and no-human action recognition representations on downstream tasks, for both linear probing and fine-tuning. Our benchmark, code, and models are available at https://github.com/howardzh01/PPMA.", "keywords": "(Application) Computer Vision;(Application) Privacy;Anonymity;and Security;Multi-task and Transfer Learning", "primary_area": "", "supplementary_material": "/attachment/4e0a743fac3b51822424c2bf050c31efd147ea10.zip", "author": "Howard Zhong;Samarth Mishra;Donghyun Kim;SouYoung Jin;Rameswar Panda;Hilde Kuehne;Leonid Karlinsky;Venkatesh Saligrama;Aude Oliva;Rogerio Feris", "authorids": "~Howard_Zhong1;~Samarth_Mishra1;~Donghyun_Kim2;~SouYoung_Jin2;~Rameswar_Panda1;~Hilde_Kuehne5;~Leonid_Karlinsky3;~Venkatesh_Saligrama1;~Aude_Oliva1;~Rogerio_Feris1", "gender": ";M;M;F;M;F;M;;;M", "homepage": ";https://samarth4149.github.io/;https://cs-people.bu.edu/donhk;http://souyoungjin.com;https://rpand002.github.io/;https://hildekuehne.github.io;;https://venkatesh-saligrama.github.io/;;http://rogerioferis.com", "dblp": ";194/2977;;225/4723;126/0986;45/4963;05/4463;67/4721;;", "google_scholar": "35kXUtkAAAAJ;Vxk4TM4AAAAJ;https://scholar.google.co.kr/citations?user=UsqNPH4AAAAJ;_B-_CzYAAAAJ;_ySuu6gAAAAJ;pxhCcH0AAAAJ;https://scholar.google.co.il/citations?user=WbO7tjYAAAAJ;S4z3uzMAAAAJ;;xt3XLjcAAAAJ", "orcid": ";;;;;0000-0003-1079-4441;;0000-0002-0675-2268;;", "linkedin": "howard-zhong/;samarth-mishra/;;;;hilde-kuehne-8b9aa661;;venkatesh-saligrama-91175a16/;;", "or_profile": "~Howard_Zhong1;~Samarth_Mishra1;~Donghyun_Kim2;~SouYoung_Jin2;~Rameswar_Panda1;~Hilde_Kuehne5;~Leonid_Karlinsky3;~Venkatesh_Saligrama1;~Aude_Oliva1;~Rogerio_Feris1", "aff": "Massachusetts Institute of Technology;Amazon;MIT-IBM Watson AI Lab;Dartmouth College;MIT-IBM Watson AI Lab;Goethe University Frankfurt;International Business Machines;Boston University;;International Business Machines", "aff_domain": "mit.edu;amazon.com;ibm.com;dartmouth.edu;ibm.com;uni-frankfurt.de;ibm.com;bu.edu;;ibm.com", "position": "MIT;Intern;Researcher;Assistant Professor;Research Scientist;Assistant Professor;Principal Researcher;Full Professor;;Research Manager", "bibtex": "@inproceedings{\nzhong2023learning,\ntitle={Learning Human Action Recognition Representations Without Real Humans},\nauthor={Howard Zhong and Samarth Mishra and Donghyun Kim and SouYoung Jin and Rameswar Panda and Hilde Kuehne and Leonid Karlinsky and Venkatesh Saligrama and Aude Oliva and Rogerio Feris},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=UBbm5embIB}\n}", "github": "", "project": "", "reviewers": "HViE;He2J;Jm2h;FyUv", "pdf_size": 5968225, "rating": "4;6;7;7", "confidence": "5;4;4;5", "wc_summary_and_contributions": "25;78;28;150", "wc_strengths": "73;33;31;194", "wc_improvement": "3;32;19;151", "wc_limitations": "151;36;20;197", "wc_correctness": "1;7;5;17", "wc_clarity": "1;1;6;10", "wc_relation_to_prior_work": "1;1;8;35", "wc_documentation": "1;1;8;44", "wc_additional_feedback": "1;1;1;1", "wc_review": "257;190;126;799", "wc_reply_reviewers": "0;42;38;62", "wc_reply_authors": "409;598;266;928", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;3", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 70.25, 50.6279320138597 ], "wc_strengths_avg": [ 82.75, 66.3791194578536 ], "wc_improvement_avg": [ 51.25, 58.49946580952684 ], "wc_limitations_avg": [ 101.0, 75.00333325926255 ], "wc_correctness_avg": [ 7.5, 5.894913061275798 ], "wc_clarity_avg": [ 4.5, 3.774917217635375 ], "wc_relation_to_prior_work_avg": [ 11.25, 14.00669482783144 ], "wc_documentation_avg": [ 13.5, 17.839562774911272 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 343.0, 267.31535683533036 ], "wc_reply_reviewers_avg": [ 35.5, 22.422087324778662 ], "wc_reply_authors_avg": [ 550.25, 247.85315713139505 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=858689114419397276&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "mit.edu;amazon.com;ibm.com;dartmouth.edu;ibm.com;uni-frankfurt.de;ibm.com;bu.edu;;ibm.com", "author_num": 10, "aff_unique_index": "0;1;0;2;0;3;4;5;4", "aff_unique_norm": "Massachusetts Institute of Technology;Amazon;Dartmouth College;Goethe University Frankfurt;International Business Machines Corporation;Boston University", "aff_unique_dep": ";Amazon.com, Inc.;;;;", "aff_unique_url": "https://web.mit.edu;https://www.amazon.com;https://www.dartmouth.edu;https://www.uni-frankfurt.de;https://www.ibm.com;https://www.bu.edu", "aff_unique_abbr": "MIT;Amazon;Dartmouth;GU Frankfurt;IBM;BU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Frankfurt", "aff_country_unique_index": "0;0;0;0;0;1;0;0;0", "aff_country_unique": "United States;Germany" }, { "title": "Can Language Models Solve Graph Problems in Natural Language?", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71520", "id": "UDqHhbqYJV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/622afc4edf2824a1b6aaf5afe153fa93-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UDqHhbqYJV", "openreview": "https://openreview.net/forum?id=UDqHhbqYJV", "poster": "/media/PosterPDFs/NeurIPS%202023/71520.png?t=1699595872.8974679", "slides": "https://nips.cc/virtual/2023/poster/71520", "video": "https://nips.cc/virtual/2023/poster/71520", "author_site": "Heng Wang, Shangbin Feng, Tianxing He, Zhaoxuan Tan, Xiaochuang Han, Yulia Tsvetkov", "tldr": "", "abstract": "Large language models (LLMs) are increasingly adopted for a variety of tasks with implicit graphical structures, such as planning in robotics, multi-hop question answering or knowledge probing, structured commonsense reasoning, and more. While LLMs have advanced the state-of-the-art on these tasks with structure implications, whether LLMs could explicitly process textual descriptions of graphs and structures, map them to grounded conceptual spaces, and perform structured operations remains underexplored. To this end, we propose NLGraph (Natural Language Graph), a comprehensive benchmark of graph-based problem solving designed in natural language. NLGraph contains 29,370 problems, covering eight graph reasoning tasks with varying complexity from simple tasks such as connectivity and shortest path up to complex problems such as maximum flow and simulating graph neural networks. We evaluate LLMs (GPT-3/4) with various prompting approaches on the NLGraph benchmark and find that 1) language models do demonstrate preliminary graph reasoning abilities, 2) the benefit of advanced prompting and in-context learning diminishes on more complex graph problems, while 3) LLMs are also (un)surprisingly brittle in the face of spurious correlations in graph and problem settings. We then propose Build-a-Graph Prompting and Algorithmic Prompting, two instruction-based approaches to enhance LLMs in solving natural language graph problems. Build-a-Graph and Algorithmic prompting improve the performance of LLMs on NLGraph by 3.07% to 16.85% across multiple tasks and settings, while how to solve the most complicated graph reasoning tasks in our setup with language models remains an open research question.", "keywords": "large language models;graph reasoning;structured reasoning", "primary_area": "", "supplementary_material": "/attachment/6e3b61522a39dda6fc5831a757f63263134d6280.zip", "author": "Heng Wang;Shangbin Feng;Tianxing He;Zhaoxuan Tan;Xiaochuang Han;Yulia Tsvetkov", "authorids": "~Heng_Wang10;~Shangbin_Feng1;~Tianxing_He1;~Zhaoxuan_Tan1;~Xiaochuang_Han1;~Yulia_Tsvetkov1", "gender": "M;M;M;M;M;F", "homepage": "https://arthur-heng.github.io;https://bunsenfeng.github.io/;https://cloudygoose.github.io/;https://tamsiuhin.github.io/;https://xhan77.github.io/;https://homes.cs.washington.edu/~yuliats/", "dblp": "61/5618-8.html;295/9571;149/0111;301/7706;216/6755;75/8157", "google_scholar": "SLyDEswAAAAJ;Y3rLP9UAAAAJ;egmfjjwAAAAJ;0KE2CZsAAAAJ;GamSVF0AAAAJ;SEDPkrsAAAAJ", "orcid": ";0000-0002-4133-1987;;0000-0001-8230-6238;;0000-0002-4634-7128", "linkedin": ";;;zhaoxuan-tan-927132213/;;", "or_profile": "~Heng_Wang10;~Shangbin_Feng1;~Tianxing_He1;~Zhaoxuan_Tan1;~Xiaochuang_Han1;~Yulia_Tsvetkov1", "aff": "Xi'an Jiaotong University;University of Washington;University of Washington;Xi'an Jiaotong University;Department of Computer Science, University of Washington;Department of Computer Science, University of Washington", "aff_domain": "xjtu.edu.cn;cs.washington.edu;cs.washington.edu;xjtu.edu.cn;cs.washington.edu;cs.washington.edu", "position": "Undergrad student;PhD student;Postdoc;Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2023can,\ntitle={Can Language Models Solve Graph Problems in Natural Language?},\nauthor={Heng Wang and Shangbin Feng and Tianxing He and Zhaoxuan Tan and Xiaochuang Han and Yulia Tsvetkov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UDqHhbqYJV}\n}", "github": "", "project": "", "reviewers": "U4cj;moU8;EhYM;E2y1;7jfe", "pdf_size": 517175, "rating": "6;7;8;8;8", "confidence": "4;3;3;3;4", "soundness": "3;3;4;3;4", "novelty": "3;3;4;4;4", "presentation": "3;3;4;4;4", "wc_summary": "91;208;134;106;144", "wc_strengths": "138;96;104;131;72", "wc_weaknesses": "172;28;16;209;106", "wc_questions": "83;1;24;2;146", "wc_limitations": "22;1;13;9;8", "wc_review": "506;334;291;457;476", "wc_reply_reviewers": "15;6;0;15;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.4, 0.7999999999999999 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.6, 0.4898979485566356 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 136.6, 40.435627854653134 ], "wc_strengths_avg": [ 108.2, 24.019991673603887 ], "wc_weaknesses_avg": [ 106.2, 76.35286504120197 ], "wc_questions_avg": [ 51.2, 55.99785710185703 ], "wc_limitations_avg": [ 10.6, 6.887670143089026 ], "wc_review_avg": [ 412.8, 84.47342777465586 ], "wc_reply_reviewers_avg": [ 7.2, 6.734983296193095 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 205, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10660384245119063422&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "xjtu.edu.cn;cs.washington.edu;cs.washington.edu;xjtu.edu.cn;cs.washington.edu;cs.washington.edu", "author_num": 6, "aff_unique_index": "0;1;1;0;1;1", "aff_unique_norm": "Xi'an Jiao Tong University;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.washington.edu", "aff_unique_abbr": "XJTU;UW", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;1;1;0;1;1", "aff_country_unique": "China;United States" }, { "title": "Evaluating Open-QA Evaluation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73580", "id": "UErNpveP6R", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f323d594aa5d2c68154433a131c07959-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=UErNpveP6R", "openreview": "https://openreview.net/forum?id=UErNpveP6R", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73580", "video": "https://nips.cc/virtual/2023/poster/73580", "author_site": "Cunxiang Wang, Sirui Cheng, Qipeng Guo, Yuanhao Yue, Bowen Ding, Zhikun Xu, Yidong Wang, Xiangkun Hu, Zheng Zhang, Yue Zhang", "tldr": "", "abstract": "This study focuses on the evaluation of the Open Question Answering (Open-QA) task, which can directly estimate the factuality of large language models (LLMs). Current automatic evaluation methods have shown limitations, indicating that human evaluation still remains the most reliable approach. We introduce a new task, QA Evaluation (QA-Eval) and the corresponding dataset EVOUNA, designed to assess the accuracy of AI-generated answers in relation to standard answers within Open-QA. Our evaluation of these methods utilizes human-annotated results to measure their performance. Specifically, the work investigates methods that show high correlation with human evaluations, deeming them more reliable. We also discuss the pitfalls of current methods and methods to improve LLM-based evaluators. We believe this new QA-Eval task and corresponding dataset EVOUNA will facilitate the development of more effective automatic evaluation tools and prove valuable for future research in this area. All resources are available at https://github.com/wangcunxiang/QA-Eval and it is under the Apache-2.0 License.", "keywords": "Open-QA;Large Language Models;QA Evaluation", "primary_area": "", "supplementary_material": "/attachment/28ff3adaf2387e921a58c61ada98a277a90e444d.pdf", "author": "Cunxiang Wang;Sirui Cheng;Qipeng Guo;Yuanhao Yue;Bowen Ding;Zhikun Xu;Yidong Wang;Xiangkun Hu;Zheng Zhang;Yue Zhang", "authorids": "~Cunxiang_Wang1;~Sirui_Cheng1;~Qipeng_Guo1;~Yuanhao_Yue4;~Bowen_Ding1;~Zhikun_Xu1;~Yidong_Wang1;~Xiangkun_Hu1;~Zheng_Zhang1;~Yue_Zhang7", "gender": "Not Specified;M;M;M;M;;M;M;M;M", "homepage": "https://wangcunxiang.github.io/;;;https://github.com/bug-orz;;https://jerrrrykun.github.io/;https://qianlanwyd.github.io/;;https://shanghai.nyu.edu/academics/faculty/directory/zheng-zhang;http://frcchang.github.io", "dblp": "213/1862.html;;172/1046;;;98/8857;59/6759.html;224/5990;;47/722-4", "google_scholar": "https://scholar.google.com.sg/citations?hl=en;https://scholar.google.com.hk/citations?user=6CLWJN4AAAAJ;k3mPGKgAAAAJ;https://scholar.google.com/citations?hl;;QzSfrO0AAAAJ;;_-0MpawAAAAJ;https://scholar.google.com.hk/citations?user=k0KiE4wAAAAJ;", "orcid": ";;;;;;;;;0000-0002-5214-2268", "linkedin": ";;;;https://www.linkedin.cn/incareer/in/ACoAADO16esB2moOdxKMQBH69EAz3ofvC3Z63zM;;;;;", "or_profile": "~Cunxiang_Wang1;~Sirui_Cheng1;~Qipeng_Guo1;~Yuanhao_Yue4;~Bowen_Ding1;~Zhikun_Xu1;~Yidong_Wang1;~Xiangkun_Hu1;~Zheng_Zhang1;~Yue_Zhang7", "aff": "Westlake University;Northeastern University;Amazon;Fudan University;Westlake University;Fudan University;Peking University;Amazon;Amazon;Westlake University", "aff_domain": "westlake.edu.cn;neu.edu.cn;amazon.com;fudan.edu.cn;westlake.edu;fudan.edu.cn;pku.edu.cn;amazon.com;amazon.com;westlake.edu.cn", "position": "PhD student;Undergrad student;Researcher;MS student;Intern;MS student;PhD student;Applied Scientist;Senior Principal Scientist;Full Professor", "bibtex": "@inproceedings{\nwang2023evaluating,\ntitle={Evaluating Open-{QA} Evaluation},\nauthor={Cunxiang Wang and Sirui Cheng and Qipeng Guo and Yuanhao Yue and Bowen Ding and Zhikun Xu and Yidong Wang and Xiangkun Hu and Zheng Zhang and Yue Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=UErNpveP6R}\n}", "github": "", "project": "", "reviewers": "LhrH;Hse5;8Zik;WzGc", "pdf_size": 508529, "rating": "5;7;7;9", "confidence": "3;4;4;5", "wc_summary_and_contributions": "77;148;89;67", "wc_strengths": "89;49;37;38", "wc_improvement": "122;22;121;38", "wc_limitations": "26;73;53;77", "wc_correctness": "38;49;6;20", "wc_clarity": "47;40;7;1", "wc_relation_to_prior_work": "34;15;5;50", "wc_documentation": "11;8;4;19", "wc_additional_feedback": "1;1;1;1", "wc_review": "445;405;323;311", "wc_reply_reviewers": "0;120;15;0", "wc_reply_authors": "3915;4903;6311;3415", "reply_reviewers": "0;4;1;0", "reply_authors": "10;13;15;7", "rating_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 95.25, 31.43544973433655 ], "wc_strengths_avg": [ 53.25, 21.1704392963396 ], "wc_improvement_avg": [ 75.75, 46.09975596464693 ], "wc_limitations_avg": [ 57.25, 20.20365066021485 ], "wc_correctness_avg": [ 28.25, 16.498105951896417 ], "wc_clarity_avg": [ 23.75, 20.017180121085985 ], "wc_relation_to_prior_work_avg": [ 26.0, 17.334935823359714 ], "wc_documentation_avg": [ 10.5, 5.5 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 371.0, 55.98214000911362 ], "wc_reply_reviewers_avg": [ 33.75, 50.1715806009737 ], "wc_reply_authors_avg": [ 4636.0, 1105.3954043689525 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 11.25, 3.031088913245535 ], "replies_avg": [ 56, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4685200784313936167&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "westlake.edu.cn;neu.edu.cn;amazon.com;fudan.edu.cn;westlake.edu;fudan.edu.cn;pku.edu.cn;amazon.com;amazon.com;westlake.edu.cn", "author_num": 10, "aff_unique_index": "0;1;2;3;0;3;4;2;2;0", "aff_unique_norm": "Westlake University;Northeastern University;Amazon;Fudan University;Peking University", "aff_unique_dep": ";;Amazon.com, Inc.;;", "aff_unique_url": "https://www.westlake.edu.cn;https://www.northeastern.edu;https://www.amazon.com;https://www.fudan.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "WU;NEU;Amazon;Fudan;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0;0;0;1;1;0", "aff_country_unique": "China;United States" }, { "title": "MEMTO: Memory-guided Transformer for Multivariate Time Series Anomaly Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71519", "id": "UFW67uduJd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b4c898eb1fb556b8d871fbe9ead92256-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UFW67uduJd", "openreview": "https://openreview.net/forum?id=UFW67uduJd", "poster": "/media/PosterPDFs/NeurIPS%202023/71519.png?t=1698648523.5159426", "slides": "https://nips.cc/virtual/2023/poster/71519", "video": "https://nips.cc/virtual/2023/poster/71519", "author_site": "Junho Song, Keonwoo Kim, Jeonglyul Oh, Sungzoon Cho", "tldr": "", "abstract": "Detecting anomalies in real-world multivariate time series data is challenging due to complex temporal dependencies and inter-variable correlations. Recently, reconstruction-based deep models have been widely used to solve the problem. However, these methods still suffer from an over-generalization issue and fail to deliver consistently high performance. To address this issue, we propose the MEMTO, a memory-guided Transformer using a reconstruction-based approach. It is designed to incorporate a novel memory module that can learn the degree to which each memory item should be updated in response to the input data. To stabilize the training procedure, we use a two-phase training paradigm which involves using K-means clustering for initializing memory items. Additionally, we introduce a bi-dimensional deviation-based detection criterion that calculates anomaly scores considering both input space and latent space. We evaluate our proposed method on five real-world datasets from diverse domains, and it achieves an average anomaly detection F1-score of 95.74%, significantly outperforming the previous state-of-the-art methods. We also conduct extensive experiments to empirically validate the effectiveness of our proposed model's key components.", "keywords": "Multivariate time series;Anomaly detection", "primary_area": "", "supplementary_material": "/attachment/fec83f675ef5fa039b6f51ed53e315c1b2ba2de2.zip", "author": "Junho Song;Keonwoo Kim;Jeonglyul Oh;Sungzoon Cho", "authorids": "~Junho_Song2;~Keonwoo_Kim2;~Jeonglyul_Oh1;~Sungzoon_Cho1", "gender": "M;M;M;M", "homepage": "http://dm.snu.ac.kr/ko/;;http://bdai.snu.ac.kr/ko/;http://bdai.snu.ac.kr/en/people/professor", "dblp": ";58/2926;;60/2556", "google_scholar": ";IRStRngAAAAJ;;https://scholar.google.com/citations?hl=ko", "orcid": ";;;0000-0002-1695-1973", "linkedin": ";keonwookim97;;%EC%84%B1%EC%A4%80-%EC%A1%B0-00822319/", "or_profile": "~Junho_Song2;~Keonwoo_Kim2;~Jeonglyul_Oh1;~Sungzoon_Cho1", "aff": "Seoul National University;Seoul National University;Sogang University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;sogang.ac.kr;snu.ac.kr", "position": "MS student;MS student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nsong2023memto,\ntitle={{MEMTO}: Memory-guided Transformer for Multivariate Time Series Anomaly Detection},\nauthor={Junho Song and Keonwoo Kim and Jeonglyul Oh and Sungzoon Cho},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UFW67uduJd}\n}", "github": "", "project": "", "reviewers": "pZZS;UfyT;EFum;C8uM", "pdf_size": 927221, "rating": "4;6;6;7", "confidence": "4;3;3;4", "soundness": "2;3;2;3", "novelty": "1;3;2;3", "presentation": "2;3;3;3", "wc_summary": "105;70;85;62", "wc_strengths": "53;62;114;38", "wc_weaknesses": "97;134;241;65", "wc_questions": "125;7;5;79", "wc_limitations": "7;13;23;19", "wc_review": "387;286;468;263", "wc_reply_reviewers": "74;7;26;8", "wc_reply_authors": "171;18;20;18", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.5, 16.3783393541592 ], "wc_strengths_avg": [ 66.75, 28.595235617144336 ], "wc_weaknesses_avg": [ 134.25, 66.29243923706534 ], "wc_questions_avg": [ 54.0, 50.68530358989675 ], "wc_limitations_avg": [ 15.5, 6.06217782649107 ], "wc_review_avg": [ 351.0, 82.08836701993772 ], "wc_reply_reviewers_avg": [ 28.75, 27.19719654670312 ], "wc_reply_authors_avg": [ 56.75, 65.96732145539941 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2294157338705618, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4552696689854927268&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "snu.ac.kr;snu.ac.kr;sogang.ac.kr;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Seoul National University;Sogang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.sogang.ac.kr", "aff_unique_abbr": "SNU;Sogang", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Segment Everything Everywhere All at Once", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71518", "id": "UHBrWeFWlL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3ef61f7e4afacf9a2c5b71c726172b86-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UHBrWeFWlL", "openreview": "https://openreview.net/forum?id=UHBrWeFWlL", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71518", "video": "https://nips.cc/virtual/2023/poster/71518", "author_site": "Xueyan Zou, Jianwei Yang, Hao Zhang, Feng Li, Linjie Li, Jianfeng Wang, Lijuan Wang, Jianfeng Gao, Yong Jae Lee", "tldr": "", "abstract": "In this work, we present SEEM, a promotable and interactive model for segmenting everything everywhere all at once in an image. In SEEM, we propose a novel and versatile decoding mechanism that enables diverse prompting for all types of segmentation tasks, aiming at a universal interface that behaves like large language models (LLMs). More specifically, SEEM is designed with four desiderata:\ni) Versatility. We introduce a new visual prompt to unify different spatial queries including points, boxes, scribbles, and masks, which can further generalize to a different referring image; ii) Compositionality. We learn a joint visual-semantic space between text and visual prompts, which facilitates the dynamic composition of two prompt types required for various segmentation tasks, as shown in Fig. 1;\niii) Interactivity. We further incorporate learnable memory prompts into the decoder to retain segmentation history through mask-guided cross-attention from the decoder to image features; iv) Semantic awareness. We use a text encoder to encode text queries and mask labels into the same semantic space for open-vocabulary segmentation. We conduct a comprehensive empirical study to validate the effectiveness of SEEM across diverse segmentation tasks. The results demonstrate that SEEM exhibits robust generalizing to unseen user intents as it learns to compose prompts of different types in a unified representation space. Our approach achieves competitive performance on interactive segmentation, generic segmentation, referring segmentation, and video object segmentation on 9 datasets with minimum 1/100 supervision in a single set of weights.", "keywords": "Generaic segmentation;interactive segmentation;referring segmentation;multi-modality prompting.", "primary_area": "", "supplementary_material": "/attachment/366033ad1f1f0176ad5af8b7ff8d047939b07a3d.pdf", "author": "Xueyan Zou;Jianwei Yang;Hao Zhang;Feng Li;Linjie Li;Jianfeng Wang;Lijuan Wang;Jianfeng Gao;Yong Jae Lee", "authorids": "~Xueyan_Zou1;~Jianwei_Yang1;~Hao_Zhang39;~Feng_Li9;~Linjie_Li1;~Jianfeng_Wang4;~Lijuan_Wang1;~Jianfeng_Gao1;~Yong_Jae_Lee2", "gender": "F;M;M;F;M;F;M;M;M", "homepage": "https://maureenzou.github.io/;https://haozhang534.github.io/;https://fengli-ust.github.io/;;;https://www.microsoft.com/en-us/research/people/lijuanw/;https://www.microsoft.com/en-us/research/people/jfgao/;https://jwyang.github.io/;https://pages.cs.wisc.edu/~yongjaelee/", "dblp": "273/3780;55/2270-97;92/2954-40.html;200/8256;;51/2527.html;92/5339;;15/5471", "google_scholar": "eslbQqoAAAAJ;B8hPxMQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;WR875gYAAAAJ;vJWEw_8AAAAJ;cDcWXuIAAAAJ;https://scholar.google.com/citations?hl=en;Cl9byD8AAAAJ;4GTpCxcAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";hao-zhang-3b09b8196/;;;;;;;", "or_profile": "~Xueyan_Zou1;~Hao_Zhang39;~Feng_Li9;~Linjie_Li1;~Jianfeng_Wang4;~Lijuan_Wang1;~Jianfeng_Gao1;~Jianwei_Yang2;~Yong_Jae_Lee1", "aff": "University of Wisconsin - Madison;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Microsoft;Microsoft;Microsoft;Microsoft Research;Microsoft;University of Wisconsin - Madison", "aff_domain": "wisc.edu;ust.hk;ust.hk;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;cs.wisc.edu", "position": "PhD student;PhD student;PhD student;Researcher;Principal Researcher;Principal Researcher;Principal Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nzou2023segment,\ntitle={Segment Everything Everywhere All at Once},\nauthor={Xueyan Zou and Jianwei Yang and Hao Zhang and Feng Li and Linjie Li and Jianfeng Wang and Lijuan Wang and Jianfeng Gao and Yong Jae Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UHBrWeFWlL}\n}", "github": "", "project": "", "reviewers": "R6GZ;gMmy;cQYZ;x17t", "pdf_size": 18560335, "rating": "5;6;7;8", "confidence": "5;5;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "207;72;79;53", "wc_strengths": "261;28;72;76", "wc_weaknesses": "211;152;83;101", "wc_questions": "95;22;68;46", "wc_limitations": "7;12;28;2", "wc_review": "781;286;330;278", "wc_reply_reviewers": "139;295;19;66", "wc_reply_authors": "194;866;88;77", "reply_reviewers": "1;2;1;1", "reply_authors": "4;6;3;3", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 102.75, 60.935929466940934 ], "wc_strengths_avg": [ 109.25, 89.61410324273741 ], "wc_weaknesses_avg": [ 136.75, 49.781397127842844 ], "wc_questions_avg": [ 57.75, 26.966414296305693 ], "wc_limitations_avg": [ 12.25, 9.756408150543928 ], "wc_review_avg": [ 418.75, 210.08019302161733 ], "wc_reply_reviewers_avg": [ 129.75, 104.5499282639639 ], "wc_reply_authors_avg": [ 306.25, 326.3850295280101 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 1.224744871391589 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.8944271909999159, "gs_citation": 621, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14231243752671982551&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "wisc.edu;ust.hk;ust.hk;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;cs.wisc.edu", "author_num": 9, "aff_unique_index": "0;1;1;2;2;2;2;2;0", "aff_unique_norm": "University of Wisconsin-Madison;Hong Kong University of Science and Technology;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.wisc.edu;https://www.ust.hk;https://www.microsoft.com", "aff_unique_abbr": "UW-Madison;HKUST;Microsoft", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Madison;Hong Kong SAR;", "aff_country_unique_index": "0;1;1;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Asynchrony-Robust Collaborative Perception via Bird's Eye View Flow", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71517", "id": "UHIDdtxmVS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5a829e299ebc1c1615ddb09e98fb6ce8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UHIDdtxmVS", "openreview": "https://openreview.net/forum?id=UHIDdtxmVS", "poster": "/media/PosterPDFs/NeurIPS%202023/71517.png?t=1701182799.9966543", "slides": "https://nips.cc/virtual/2023/poster/71517", "video": "https://nips.cc/virtual/2023/poster/71517", "author_site": "Sizhe Wei, Yuxi Wei, Yue Hu, Yifan Lu, Yiqi Zhong, Siheng Chen, Ya Zhang", "tldr": "", "abstract": "Collaborative perception can substantially boost each agent's perception ability by facilitating communication among multiple agents. However, temporal asynchrony among agents is inevitable in the real world due to communication delays, interruptions, and clock misalignments. This issue causes information mismatch during multi-agent fusion, seriously shaking the foundation of collaboration. To address this issue, we propose CoBEVFlow, an asynchrony-robust collaborative perception system based on bird's eye view (BEV) flow. The key intuition of CoBEVFlow is to compensate motions to align asynchronous collaboration messages sent by multiple agents. To model the motion in a scene, we propose BEV flow, which is a collection of the motion vector corresponding to each spatial location. Based on BEV flow, asynchronous perceptual features can be reassigned to appropriate positions, mitigating the impact of asynchrony. CoBEVFlow has two advantages: (i) CoBEVFlow can handle asynchronous collaboration messages sent at irregular, continuous time stamps without discretization; and (ii) with BEV flow, CoBEVFlow only transports the original perceptual features, instead of generating new perceptual features, avoiding additional noises. To validate CoBEVFlow's efficacy, we create IRregular V2V(IRV2V), the first synthetic collaborative perception dataset with various temporal asynchronies that simulate different real-world scenarios. Extensive experiments conducted on both IRV2V and the real-world dataset DAIR-V2X show that CoBEVFlow consistently outperforms other baselines and is robust in extremely asynchronous settings. The code is available at https://github.com/MediaBrain-SJTU/CoBEVFlow.", "keywords": "Collaborative Perception; BEV Flow; Time Asynchronization", "primary_area": "", "supplementary_material": "/attachment/e4f4650a5d1be31ccb1903110281f2bf55b39a2f.zip", "author": "Sizhe Wei;Yuxi Wei;Yue Hu;Yifan Lu;Yiqi Zhong;Siheng Chen;Ya Zhang", "authorids": "~Sizhe_Wei1;~Yuxi_Wei1;~Yue_Hu1;~Yifan_Lu1;~Yiqi_Zhong1;~Siheng_Chen1;~Ya_Zhang1", "gender": "M;M;F;M;F;F;M", "homepage": "https://sizhewei.github.io;;https://phyllish.github.io/;https://yifanlu0227.github.io;;https://annzhanglion.github.io/;https://siheng-chen.github.io/", "dblp": "358/4003;;;;243/3355;85/3714-2;136/4945", "google_scholar": "KPFTRckAAAAJ;;XBbwb78AAAAJ;hiXGPH8AAAAJ;Bv8l8jkAAAAJ;pbjw9sMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0008-3576-8761;;;;;0000-0002-5390-9053;", "linkedin": "sizhewei/;https://www.linkedin.cn/incareer/in/ACoAADSxRKcB7zJIIKFvPU9bvO1G2BT7Mx6S4vw;;yifan-lu-65ab69229/;;;", "or_profile": "~Sizhe_Wei1;~Yuxi_Wei1;~Yue_Hu1;~Yifan_Lu1;~Yiqi_Zhong1;~Ya_Zhang1;~Siheng_Chen2", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;University of Southern California;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;usc.edu;sjtu.edu.cn;sjtu.edu.cn", "position": "MS student;Undergrad student;PhD student;MS student;PhD student;Professor;Associate Professor", "bibtex": "@inproceedings{\nwei2023asynchronyrobust,\ntitle={Asynchrony-Robust Collaborative Perception via Bird's Eye View Flow},\nauthor={Sizhe Wei and Yuxi Wei and Yue Hu and Yifan Lu and Yiqi Zhong and Siheng Chen and Ya Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UHIDdtxmVS}\n}", "github": "", "project": "", "reviewers": "iS3b;ZUTg;vzpF;NCLy", "pdf_size": 7330705, "rating": "6;6;7;8", "confidence": "5;3;5;5", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;2;3;4", "wc_summary": "64;99;78;93", "wc_strengths": "36;88;105;121", "wc_weaknesses": "45;105;48;30", "wc_questions": "1;9;185;50", "wc_limitations": "5;6;14;5", "wc_review": "151;307;430;299", "wc_reply_reviewers": "0;13;18;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 83.5, 13.6106575888162 ], "wc_strengths_avg": [ 87.5, 31.941352507368876 ], "wc_weaknesses_avg": [ 57.0, 28.53944638566067 ], "wc_questions_avg": [ 61.25, 73.82538520048507 ], "wc_limitations_avg": [ 7.5, 3.774917217635375 ], "wc_review_avg": [ 296.75, 98.87966171058636 ], "wc_reply_reviewers_avg": [ 7.75, 7.949056547792323 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1271204217467439367&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;usc.edu;sjtu.edu.cn;sjtu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;University of Southern California", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.usc.edu", "aff_unique_abbr": "SJTU;USC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Attacks on Online Learners: a Teacher-Student Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71516", "id": "UHwmoJYwSV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/46e37aeccafc3b4b697b17b8a36f3b30-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UHwmoJYwSV", "openreview": "https://openreview.net/forum?id=UHwmoJYwSV", "poster": "/media/PosterPDFs/NeurIPS%202023/71516.png?t=1701869271.5581083", "slides": "https://nips.cc/virtual/2023/poster/71516", "video": "https://nips.cc/virtual/2023/poster/71516", "author_site": "Riccardo Giuseppe Margiotta, Sebastian Goldt, Guido Sanguinetti", "tldr": "", "abstract": "Machine learning models are famously vulnerable to adversarial attacks: small ad-hoc perturbations of the data that can catastrophically alter the model predictions. While a large literature has studied the case of test-time attacks on pre-trained models, the important case of attacks in an online learning setting has received little attention so far. In this work, we use a control-theoretical perspective to study the scenario where an attacker may perturb data labels to manipulate the learning dynamics of an online learner. We perform a theoretical analysis of the problem in a teacher-student setup, considering different attack strategies, and obtaining analytical results for the steady state of simple linear learners. These results enable us to prove that a discontinuous transition in the learner's accuracy occurs when the attack strength exceeds a critical threshold. We then study empirically attacks on learners with complex architectures using real data, confirming the insights of our theoretical analysis. Our findings show that greedy attacks can be extremely efficient, especially when data stream in small batches.", "keywords": "Adversarial attacks;data poisoning;online learning;optimal control;teacher-student setup;solvable model", "primary_area": "", "supplementary_material": "", "author": "Riccardo Giuseppe Margiotta;Sebastian Goldt;Guido Sanguinetti", "authorids": "~Riccardo_Giuseppe_Margiotta1;~Sebastian_Goldt1;~Guido_Sanguinetti1", "gender": ";M;", "homepage": ";https://datascience.sissa.it/research-unit/12/theory-of-neural-networks;", "dblp": ";234/8941;", "google_scholar": ";R06wsMkAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Riccardo_Giuseppe_Margiotta1;~Sebastian_Goldt1;~Guido_Sanguinetti1", "aff": ";SISSA;", "aff_domain": ";sissa.it;", "position": ";Assistant Professor;", "bibtex": "@inproceedings{\nmargiotta2023attacks,\ntitle={Attacks on Online Learners: a Teacher-Student Analysis},\nauthor={Riccardo Giuseppe Margiotta and Sebastian Goldt and Guido Sanguinetti},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UHwmoJYwSV}\n}", "github": "", "project": "", "reviewers": "61gD;MYtD;7DCo;cwDs;rUtu", "pdf_size": 2180520, "rating": "3;5;5;6;6", "confidence": "3;3;4;2;3", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;2", "presentation": "3;2;2;2;3", "wc_summary": "66;48;213;77;40", "wc_strengths": "33;81;250;61;46", "wc_weaknesses": "221;94;324;80;115", "wc_questions": "85;46;335;40;85", "wc_limitations": "36;10;95;39;4", "wc_review": "441;279;1217;297;290", "wc_reply_reviewers": "204;0;150;63;0", "wc_reply_authors": "339;0;64;80;0", "reply_reviewers": "1;0;1;1;0", "reply_authors": "2;1;2;2;1", "rating_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 88.8, 63.4520291243708 ], "wc_strengths_avg": [ 94.2, 79.52207240760266 ], "wc_weaknesses_avg": [ 166.8, 92.91587593086554 ], "wc_questions_avg": [ 118.2, 110.03163181558293 ], "wc_limitations_avg": [ 36.8, 32.208073522022396 ], "wc_review_avg": [ 504.8, 360.9999445983337 ], "wc_reply_reviewers_avg": [ 83.4, 81.61764515103336 ], "wc_reply_authors_avg": [ 96.6, 125.5063345014904 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2886751345948129, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16936222866465506480&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";sissa.it;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Scuola Internazionale Superiore di Studi Avanzati", "aff_unique_dep": "", "aff_unique_url": "https://www.sissa.it", "aff_unique_abbr": "SISSA", "aff_country_unique_index": "0", "aff_country_unique": "Italy" }, { "title": "Fast Attention Over Long Sequences With Dynamic Sparse Flash Attention", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71515", "id": "UINHuKeWUa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bc222e8153a49c1b30a1b8ba96b35117-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UINHuKeWUa", "openreview": "https://openreview.net/forum?id=UINHuKeWUa", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71515", "video": "https://nips.cc/virtual/2023/poster/71515", "author_site": "Matteo Pagliardini, Daniele Paliotta, Martin Jaggi, Fran\u00e7ois Fleuret", "tldr": "", "abstract": "Transformer-based language models have found many diverse applications requiring them to process sequences of increasing length. For these applications, the causal self-attention---which is the only component scaling quadratically w.r.t. the sequence length---becomes a central concern. While many works have proposed schemes to sparsify the attention patterns and reduce the computational overhead of self-attention, those are often limited by implementation concerns and end up imposing a simple and static structure over the attention matrix. Conversely, implementing more dynamic sparse attention often results in runtimes significantly slower than computing the full attention using the Flash implementation from Dao et al. (2022). We extend FlashAttention to accommodate a large class of attention sparsity patterns that, in particular, encompass key/query dropping and hashing-based attention. This leads to implementations with no computational complexity overhead and a multi-fold runtime speedup on top of FlashAttention. Even with relatively low degrees of sparsity, our method improves visibly upon FlashAttention as the sequence length increases. Without sacrificing perplexity, we increase the training speed of a transformer language model by $2.0\\times$ and $3.3\\times$ for sequences of respectively $8k$ and $16k$ tokens.", "keywords": "self-attention;large language models;transformers", "primary_area": "", "supplementary_material": "", "author": "Matteo Pagliardini;Daniele Paliotta;Martin Jaggi;Fran\u00e7ois Fleuret", "authorids": "~Matteo_Pagliardini1;~Daniele_Paliotta1;~Martin_Jaggi1;~Fran\u00e7ois_Fleuret2", "gender": "M;M;M;M", "homepage": ";https://danielepaliotta.com;https://mlo.epfl.ch;https://fleuret.org/francois/", "dblp": "140/7789;314/5880;17/4402;90/5265", "google_scholar": "https://scholar.google.ch/citations?user=FXacC3oAAAAJ;_xugfIEAAAAJ;https://scholar.google.ch/citations?user=r1TJBr8AAAAJ;https://scholar.google.ch/citations?user=Bj1tRlsAAAAJ", "orcid": ";;0000-0003-1579-5558;0000-0001-9457-7393", "linkedin": ";;;francois-fleuret/", "or_profile": "~Matteo_Pagliardini1;~Daniele_Paliotta1;~Martin_Jaggi1;~Francois_Fleuret1", "aff": "Swiss Federal Institute of Technology Lausanne;University of Geneva;EPFL;University of Geneva", "aff_domain": "epfl.ch;unige.ch;epfl.ch;unige.ch", "position": "PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\npagliardini2023fast,\ntitle={Fast Attention Over Long Sequences With Dynamic Sparse Flash Attention},\nauthor={Matteo Pagliardini and Daniele Paliotta and Martin Jaggi and Fran{\\c{c}}ois Fleuret},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UINHuKeWUa}\n}", "github": "", "project": "", "reviewers": "Z3Z8;kvnu;89Ny;jEHP;RPsB", "pdf_size": 959634, "rating": "3;6;6;7;7", "confidence": "3;5;4;2;5", "soundness": "3;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "27;44;46;74;141", "wc_strengths": "17;48;16;103;98", "wc_weaknesses": "111;89;21;57;60", "wc_questions": "5;110;22;26;61", "wc_limitations": "1;22;2;1;2", "wc_review": "161;313;107;261;362", "wc_reply_reviewers": "50;15;10;0;13", "wc_reply_authors": "240;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 3.8, 1.16619037896906 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 66.4, 40.232325311868316 ], "wc_strengths_avg": [ 56.4, 37.83437590340298 ], "wc_weaknesses_avg": [ 67.6, 30.604574821421714 ], "wc_questions_avg": [ 44.8, 37.33845203004538 ], "wc_limitations_avg": [ 5.6, 8.212186067059124 ], "wc_review_avg": [ 240.8, 94.42542030618662 ], "wc_reply_reviewers_avg": [ 17.6, 17.00117642988273 ], "wc_reply_authors_avg": [ 48.0, 96.0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.21004201260420147, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2328589238099873866&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "epfl.ch;unige.ch;epfl.ch;unige.ch", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;University of Geneva;EPFL", "aff_unique_dep": ";;", "aff_unique_url": "https://www.epfl.ch;https://www.unige.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;UNIGE;EPFL", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Provably Safe Reinforcement Learning with Step-wise Violation Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71514", "id": "UJ9o8wbB5U", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aa3e67220ca4cd50010165c950fc8056-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UJ9o8wbB5U", "openreview": "https://openreview.net/forum?id=UJ9o8wbB5U", "poster": "/media/PosterPDFs/NeurIPS%202023/71514.png?t=1702102335.8991704", "slides": "https://nips.cc/virtual/2023/poster/71514", "video": "https://nips.cc/virtual/2023/poster/71514", "author_site": "Nuoya Xiong, Yihan Du, Longbo Huang", "tldr": "", "abstract": "We investigate a novel safe reinforcement learning problem with step-wise violation constraints. Our problem differs from existing works in that we focus on stricter step-wise violation constraints and do not assume the existence of safe actions, making our formulation more suitable for safety-critical applications that need to ensure safety in all decision steps but may not always possess safe actions, e.g., robot control and autonomous driving.\nWe propose an efficient algorithm SUCBVI, which guarantees $\\widetilde{\\mathcal{O}}(\\sqrt{ST})$ or gap-dependent $\\widetilde{\\mathcal{O}}(S/\\mathcal{C}_{\\mathrm{gap}} + S^2AH^2)$ step-wise violation and $\\widetilde{\\mathcal{O}}(\\sqrt{H^3SAT})$ regret. Lower bounds are provided to validate the optimality in both violation and regret performance with respect to the number of states $S$ and the total number of steps $T$. \nMoreover, we further study an innovative safe reward-free exploration problem with step-wise violation constraints. For this problem, we design algorithm SRF-UCRL to find a near-optimal safe policy, which achieves nearly state-of-the-art sample complexity $\\widetilde{\\mathcal{O}}((\\frac{S^2AH^2}{\\varepsilon}+\\frac{H^4SA}{\\varepsilon^2})(\\log(\\frac{1}{\\delta})+S))$, and guarantees $\\widetilde{\\mathcal{O}}(\\sqrt{ST})$ violation during exploration. Experimental results demonstrate the superiority of our algorithms in safety performance and corroborate our theoretical results.", "keywords": "safe reinforcement learning;step-wise violation;reinforcement learning theory", "primary_area": "", "supplementary_material": "/attachment/8a4ba93123920f542a3c40218bfcbd8ccac1da98.pdf", "author": "Nuoya Xiong;Yihan Du;Longbo Huang", "authorids": "~Nuoya_Xiong1;~Yihan_Du2;~Longbo_Huang2", "gender": "M;F;M", "homepage": "https://xiongny.github.io/index.html;https://yihandu.github.io/;http://people.iiis.tsinghua.edu.cn/~huang/", "dblp": "322/6141;231/1919;79/7077", "google_scholar": "K7Q4GWQAAAAJ;https://scholar.google.pl/citations?user=_RSr3vUAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Nuoya_Xiong1;~Yihan_Du2;~Longbo_Huang2", "aff": "Tsinghua University;IIIS, Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn", "position": "Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nxiong2023provably,\ntitle={Provably Safe Reinforcement Learning with Step-wise Violation Constraints},\nauthor={Nuoya Xiong and Yihan Du and Longbo Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UJ9o8wbB5U}\n}", "github": "", "project": "", "reviewers": "ZGMh;83sH;ryzU;RuZi;31rE", "pdf_size": 583637, "rating": "5;5;6;6;7", "confidence": "3;4;3;4;4", "soundness": "2;4;3;2;3", "novelty": "3;3;3;3;3", "presentation": "2;4;3;3;3", "wc_summary": "76;99;100;87;60", "wc_strengths": "27;21;36;430;78", "wc_weaknesses": "83;85;161;108;169", "wc_questions": "2;32;121;58;27", "wc_limitations": "22;1;26;1;17", "wc_review": "210;238;444;684;351", "wc_reply_reviewers": "163;64;40;16;34", "wc_reply_authors": "785;0;26;8;18", "reply_reviewers": "2;1;1;1;2", "reply_authors": "3;1;2;2;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 84.4, 15.027973915335359 ], "wc_strengths_avg": [ 118.4, 157.0714487104515 ], "wc_weaknesses_avg": [ 121.2, 36.912870384189844 ], "wc_questions_avg": [ 48.0, 40.600492607848985 ], "wc_limitations_avg": [ 13.4, 10.518555033843766 ], "wc_review_avg": [ 385.4, 171.03870906902915 ], "wc_reply_reviewers_avg": [ 63.4, 52.11372180146031 ], "wc_reply_authors_avg": [ 167.4, 308.9256221163923 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3273268353539886, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11090031243168656972&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Simple and Asymmetric Graph Contrastive Learning without Augmentations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71513", "id": "UK8mA3DRnb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3430bcc30cdaabd0bf6c5d0c31bda67c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UK8mA3DRnb", "openreview": "https://openreview.net/forum?id=UK8mA3DRnb", "poster": "/media/PosterPDFs/NeurIPS%202023/71513.png?t=1702160512.5409458", "slides": "https://nips.cc/virtual/2023/poster/71513", "video": "https://nips.cc/virtual/2023/poster/71513", "author_site": "Teng Xiao, Huaisheng Zhu, Zhengyu Chen, Suhang Wang", "tldr": "", "abstract": "Graph Contrastive Learning (GCL) has shown superior performance in representation learning in graph-structured data. Despite their success, most existing GCL methods rely on prefabricated graph augmentation and homophily assumptions. Thus, they fail to generalize well to heterophilic graphs where connected nodes may have different class labels and dissimilar features. In this paper, we study the problem of conducting contrastive learning on homophilic and heterophilic graphs. We find that we can achieve promising performance simply by considering an asymmetric view of the neighboring nodes. The resulting simple algorithm, Asymmetric Contrastive Learning for Graphs (GraphACL), is easy to implement and does not rely on graph augmentations and homophily assumptions. We provide theoretical and empirical evidence that GraphACL can capture one-hop local neighborhood information and two-hop monophily similarity, which are both important for modeling heterophilic graphs. Experimental results show that the simple GraphACL significantly outperforms state-of-the-art graph contrastive learning and self-supervised learning methods on homophilic and heterophilic graphs. The code of GraphACL is available at https://github.com/tengxiao1/GraphACL.", "keywords": "Contrastive Learning;Graph Representation Learning", "primary_area": "", "supplementary_material": "", "author": "Teng Xiao;Huaisheng Zhu;Zhengyu Chen;Suhang Wang", "authorids": "~Teng_Xiao2;~Huaisheng_Zhu1;~Zhengyu_Chen3;~Suhang_Wang1", "gender": ";M;;M", "homepage": ";;;https://faculty.ist.psu.edu/szw494/", "dblp": ";264/2622.html;;136/9440", "google_scholar": ";;;cdT_WMMAAAAJ", "orcid": ";;;0000-0003-3448-4878", "linkedin": ";;;", "or_profile": "~Teng_Xiao2;~Huaisheng_Zhu1;~Zhengyu_Chen3;~Suhang_Wang1", "aff": ";Pennsylvania State University;;Pennsylvania State University", "aff_domain": ";psu.edu;;psu.edu", "position": ";PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nxiao2023simple,\ntitle={Simple and Asymmetric Graph Contrastive Learning without Augmentations},\nauthor={Teng Xiao and Huaisheng Zhu and Zhengyu Chen and Suhang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UK8mA3DRnb}\n}", "github": "", "project": "", "reviewers": "gZUY;cxpZ;ycWA;1K5Q;fMCY", "pdf_size": 2514160, "rating": "6;6;6;6;6", "confidence": "4;4;3;4;4", "soundness": "3;3;3;3;3", "novelty": "3;3;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "38;41;70;74;56", "wc_strengths": "85;84;62;78;47", "wc_weaknesses": "140;26;636;134;137", "wc_questions": "2;19;14;73;2", "wc_limitations": "1;11;6;1;31", "wc_review": "266;181;788;360;273", "wc_reply_reviewers": "10;18;166;21;28", "wc_reply_authors": "23;23;71;25;40", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 55.8, 14.620533505997654 ], "wc_strengths_avg": [ 71.2, 14.634206503941373 ], "wc_weaknesses_avg": [ 214.6, 215.04938967595328 ], "wc_questions_avg": [ 22.0, 26.359059163786554 ], "wc_limitations_avg": [ 10.0, 11.135528725660043 ], "wc_review_avg": [ 373.6, 214.80465544303271 ], "wc_reply_reviewers_avg": [ 48.6, 58.98338749173364 ], "wc_reply_authors_avg": [ 36.4, 18.43474979488466 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13400495932669202264&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";psu.edu;;psu.edu", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning to Tokenize for Generative Retrieval", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71512", "id": "UKd6dpVGdu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/91228b942a4528cdae031c1b68b127e8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UKd6dpVGdu", "openreview": "https://openreview.net/forum?id=UKd6dpVGdu", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71512", "video": "https://nips.cc/virtual/2023/poster/71512", "author_site": "Weiwei Sun, Lingyong Yan, Zheng Chen, Shuaiqiang Wang, Haichao Zhu, Pengjie Ren, Zhumin Chen, Dawei Yin, Maarten Rijke, Zhaochun Ren", "tldr": "", "abstract": "As a new paradigm in information retrieval, generative retrieval directly generates a ranked list of document identifiers (docids) for a given query using generative language models (LMs).\nHow to assign each document a unique docid (denoted as document tokenization) is a critical problem, because it determines whether the generative retrieval model can precisely retrieve any document by simply decoding its docid.\nMost existing methods adopt rule-based tokenization, which is ad-hoc and does not generalize well.\nIn contrast, in this paper we propose a novel document tokenization learning method, GenRet, which learns to encode the complete document semantics into docids.\nGenRet learns to tokenize documents into short discrete representations (i.e., docids) via a discrete auto-encoding approach.\nWe develop a progressive training scheme to capture the autoregressive nature of docids and diverse clustering techniques to stabilize the training process.\nBased on the semantic-embedded docids of any set of documents, the generative retrieval model can learn to generate the most relevant docid only according to the docids' semantic relevance to the queries.\nWe conduct experiments on the NQ320K, MS MARCO, and BEIR datasets.\nGenRet establishes the new state-of-the-art on the NQ320K dataset.\nCompared to generative retrieval baselines, GenRet can achieve significant improvements on unseen documents.\nMoreover, GenRet can also outperform comparable baselines on MS MARCO and BEIR, demonstrating the method's generalizability.", "keywords": "Information Retrieval;Document Retrieval;Generative Retrieval", "primary_area": "", "supplementary_material": "", "author": "Weiwei Sun;Lingyong Yan;Zheng Chen;Shuaiqiang Wang;Haichao Zhu;Pengjie Ren;Zhumin Chen;Dawei Yin;Maarten de Rijke;Zhaochun Ren", "authorids": "~Weiwei_Sun9;~Lingyong_Yan1;~Zheng_Chen13;~Shuaiqiang_Wang2;~Haichao_Zhu3;~Pengjie_Ren1;~Zhumin_Chen1;~Dawei_Yin1;~Maarten_de_Rijke1;~Zhaochun_Ren1", "gender": ";M;M;M;M;;;M;;M", "homepage": "https://sunnweiwei.github.io/;https://yanlingyong.net;;http://wangshuaiqiang.net/;https://hczhu.fun/;;https://ir.sdu.edu.cn/~zhuminchen/~zhuminchen_en.htm;https://www.yindawei.com/;https://staff.fnwi.uva.nl/m.derijke/;https://renzhaochun.github.io/", "dblp": ";254/8048;;16/1524;65/1219;;88/1081;;r/MdRijke;58/10440", "google_scholar": "hdUZbxgAAAAJ;NksMJFcAAAAJ;;https://scholar.google.com.hk/citations?user=8SbYYcIAAAAJ;A9zyRzIAAAAJ;;;GuQ9bpAAAAAJ;https://scholar.google.com/citations?hl=en;fPcIPt0AAAAJ", "orcid": ";;0009-0004-9839-8498;0000-0002-9212-1947;0000-0002-4596-7384;;0000-0003-4592-4074;0000-0002-0684-6205;0000-0002-1086-0202;0000-0002-9076-6565", "linkedin": ";;;;;;;dwyin/;;zhaochun-ren-460491296/?locale=nl_NL", "or_profile": "~Weiwei_Sun9;~Lingyong_Yan1;~Zheng_Chen13;~Shuaiqiang_Wang2;~Haichao_Zhu3;~Pengjie_Ren1;~Zhumin_Chen1;~Dawei_Yin1;~Maarten_de_Rijke1;~Zhaochun_Ren1", "aff": "Shandong University;Baidu Inc.;Shandong University;Baidu Inc.;;;Shandong University;Baidu;University of Amsterdam;Shandong University", "aff_domain": "sdu.edu.cn;baidu.com;sdu.edu.cn;baidu.com;;;sdu.edu.cn;baidu.com;uva.nl;sdu.edu.cn", "position": "MS student;Search Scientist;Undergrad student;Principal Researcher;;;Full Professor;Principal Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsun2023learning,\ntitle={Learning to Tokenize for Generative Retrieval},\nauthor={Weiwei Sun and Lingyong Yan and Zheng Chen and Shuaiqiang Wang and Haichao Zhu and Pengjie Ren and Zhumin Chen and Dawei Yin and Maarten de Rijke and Zhaochun Ren},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UKd6dpVGdu}\n}", "github": "", "project": "", "reviewers": "BkMu;zz1Z;xNtF;e88A;BPS1", "pdf_size": 3809910, "rating": "5;5;6;6;7", "confidence": "4;5;3;4;3", "soundness": "3;2;4;3;3", "novelty": "3;3;3;3;3", "presentation": "4;3;2;3;2", "wc_summary": "96;100;126;55;41", "wc_strengths": "64;35;151;29;16", "wc_weaknesses": "153;102;230;122;32", "wc_questions": "197;37;136;27;30", "wc_limitations": "17;21;45;8;4", "wc_review": "527;295;688;241;123", "wc_reply_reviewers": "29;33;69;0;5", "wc_reply_authors": "28;115;27;0;9", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 83.6, 31.15509589136262 ], "wc_strengths_avg": [ 59.0, 48.60864120709403 ], "wc_weaknesses_avg": [ 127.8, 64.7407136197926 ], "wc_questions_avg": [ 85.4, 69.04665089633241 ], "wc_limitations_avg": [ 19.0, 14.352700094407323 ], "wc_review_avg": [ 374.8, 204.41761176571845 ], "wc_reply_reviewers_avg": [ 27.2, 24.563387388550463 ], "wc_reply_authors_avg": [ 35.8, 41.01414390182977 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.7857142857142858, "gs_citation": 112, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2088217086067503890&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 11, "email": "sdu.edu.cn;baidu.com;sdu.edu.cn;baidu.com;;;sdu.edu.cn;baidu.com;uva.nl;sdu.edu.cn", "author_num": 10, "aff_unique_index": "0;1;0;1;0;1;2;0", "aff_unique_norm": "Shandong University;Baidu;University of Amsterdam", "aff_unique_dep": ";Baidu Inc.;", "aff_unique_url": "http://www.sdu.edu.cn;https://www.baidu.com;https://www.uva.nl", "aff_unique_abbr": "SDU;Baidu;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;0", "aff_country_unique": "China;Netherlands" }, { "title": "The s-value: evaluating stability with respect to distributional shifts", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71511", "id": "UKtjq3dIs0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e3fea99df80195b316cefa7aa6099cd5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UKtjq3dIs0", "openreview": "https://openreview.net/forum?id=UKtjq3dIs0", "poster": "/media/PosterPDFs/NeurIPS%202023/71511.png?t=1701022836.3896153", "slides": "https://nips.cc/virtual/2023/poster/71511", "video": "https://nips.cc/virtual/2023/poster/71511", "author_site": "Suyash Gupta, Dominik Rothenh\u00e4usler", "tldr": "", "abstract": "Common statistical measures of uncertainty such as $p$-values and confidence intervals quantify the uncertainty due to sampling, that is, the uncertainty due to not observing the full population. However, sampling is not the only source of uncertainty. In practice, distributions change between locations and across time. This makes it difficult to gather knowledge that transfers across data sets. We propose a measure of instability that quantifies the distributional instability of a statistical parameter with respect to Kullback-Leibler divergence, that is, the sensitivity of the parameter under general distributional perturbations within a Kullback-Leibler divergence ball. In addition, we quantify the instability of parameters with respect to directional or variable-specific shifts. Measuring instability with respect to directional shifts can be used to detect under which kind of distribution shifts a statistical conclusion might be reversed. We discuss how such knowledge can inform data collection for transfer learning of statistical parameters under shifted distributions. We evaluate the performance of the proposed measure on real data and show that it can elucidate the distributional instability of a parameter with respect to certain shifts and can be used to improve estimation accuracy under shifted distributions.", "keywords": "Distributional Stability;Distributional Robustness;Distributional Shifts;Generalizability", "primary_area": "", "supplementary_material": "/attachment/df3eb278a582dd23fd7011f371f43f5c335d8005.pdf", "author": "Suyash Gupta;Dominik Rothenhaeusler", "authorids": "~Suyash_Gupta2;~Dominik_Rothenhaeusler2", "gender": "M;", "homepage": "http://web.stanford.edu/~suyash28/suyash.html;https://sites.google.com/view/rothenhaeusler/home", "dblp": "136/5662;178/3292", "google_scholar": "Olj9jQgAAAAJ;8vdua9wAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Suyash_Gupta2;~Dominik_Rothenhaeusler2", "aff": "Amazon;Stanford University", "aff_domain": "amazon.com;stanford.edu", "position": "Researcher;Assistant Professor", "bibtex": "@inproceedings{\ngupta2023the,\ntitle={The s-value: evaluating stability with respect to distributional shifts},\nauthor={Suyash Gupta and Dominik Rothenhaeusler},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UKtjq3dIs0}\n}", "github": "", "project": "", "reviewers": "buuF;ktSh;JAUw;msmy;oQ9C;jftc;qNys", "pdf_size": 888412, "rating": "3;3;5;6;6;7;7", "confidence": "2;4;3;4;3;4;3", "soundness": "2;4;3;3;3;3;3", "novelty": "2;2;2;3;3;3;3", "presentation": "1;3;4;3;2;3;3", "wc_summary": "49;65;78;61;80;202;95", "wc_strengths": "52;39;91;45;80;106;42", "wc_weaknesses": "180;39;141;210;148;423;158", "wc_questions": "125;23;92;4;50;228;69", "wc_limitations": "19;57;1;14;15;1;1", "wc_review": "425;223;403;334;373;960;365", "wc_reply_reviewers": "38;0;0;54;16;0;15", "wc_reply_authors": "163;0;0;0;0;0;0", "reply_reviewers": "1;0;0;1;1;0;1", "reply_authors": "2;1;1;1;1;1;1", "rating_avg": [ 5.285714285714286, 1.577908716741037 ], "confidence_avg": [ 3.2857142857142856, 0.6998542122237652 ], "soundness_avg": [ 3.0, 0.5345224838248488 ], "novelty_avg": [ 2.5714285714285716, 0.49487165930539345 ], "presentation_avg": [ 2.7142857142857144, 0.880630571852711 ], "wc_summary_avg": [ 90.0, 47.74934554525329 ], "wc_strengths_avg": [ 65.0, 24.945655218608998 ], "wc_weaknesses_avg": [ 185.57142857142858, 108.65523081597807 ], "wc_questions_avg": [ 84.42857142857143, 69.67446169325528 ], "wc_limitations_avg": [ 15.428571428571429, 18.3914354512959 ], "wc_review_avg": [ 440.42857142857144, 220.52914102407493 ], "wc_reply_reviewers_avg": [ 17.571428571428573, 19.565692590108135 ], "wc_reply_authors_avg": [ 23.285714285714285, 57.03811829623685 ], "reply_reviewers_avg": [ 0.5714285714285714, 0.4948716593053935 ], "reply_authors_avg": [ 1.1428571428571428, 0.34992710611188266 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.31416904015569347, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4825541034611635447&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "email": "amazon.com;stanford.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Amazon;Stanford University", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.stanford.edu", "aff_unique_abbr": "Amazon;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "UNOeQGHNaN", "title": "Robust Representation Learning via Asymmetric Negative Contrasting and Reverse Attention", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep neural networks are vulnerable to adversarial noise. Adversarial training (AT) has been demonstrated to be the most effective defense strategy to protect neural networks from being fooled. However, we find AT omits to learning robust features, resulting in poor performance of adversarial robustness. To address this issue, we highlight two characteristics of robust representation: \n(1) exclusion: feature of natural examples keeps away from that of other classes;\n(2) alignment: feature of natural and corresponding adversarial examples is close to each other. \nThese motivate us to propose a generic framework of AT to gain robust representation, by the asymmetric negative contrast and reverse attention. Specifically, we design an asymmetric negative contrast based on predicted probabilities and generate adversarial negative examples by the targeted attack, to push away examples of different classes in the feature space. Moreover, we propose to weight feature by parameters of the linear classifier as the reverse attention, to obtain class-aware feature and pull close the feature of the same class. Empirical evaluations on three benchmark datasets show our method greatly advances the robustness of AT and achieves the state-of-the-art performance.", "keywords": "Robust Representation learning;Asymmetric Negative Contrast;Reverse Attention", "primary_area": "", "supplementary_material": "/attachment/7f013ef6ddf60ed62c96bcf3c240a180289721e9.pdf", "author": "Nuoyan Zhou;Decheng Liu;Dawei Zhou;Nannan Wang;Chunlei Peng;Xinbo Gao", "authorids": "~Nuoyan_Zhou2;~Decheng_Liu2;~Dawei_Zhou3;~Nannan_Wang1;~Chunlei_Peng1;~Xinbo_Gao5", "gender": "M;M;M;M;M;M", "homepage": "https://openi.pcl.ac.cn/zhounuoyan;;;https://web.xidian.edu.cn/clpeng/en/index.html;https://faculty.cqupt.edu.cn/gaoxinbo/zh_CN/index.htm;", "dblp": ";;10/8359-1;148/8269;;39/3130-4", "google_scholar": ";c5TFU9sAAAAJ;SRBn7oUAAAAJ;U9TnHJgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=7H-LIigAAAAJ", "orcid": ";0000-0002-6550-212X;;;0000-0002-7985-0037;0000-0002-0694-3603", "linkedin": ";;;;xinbo-gao-151a2224/;", "or_profile": "~Nuoyan_Zhou2;~Decheng_Liu2;~Nannan_Wang1;~Chunlei_Peng1;~Xinbo_Gao5;~Zhou_Dawei1", "aff": ";Xidian University ;Xidian University;Xidian University;Chongqing University of Post and Telecommunications;Xidian University", "aff_domain": ";xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;cqupt.edu.cn;xidian.edu.cn", "position": ";Assistant Professor;Full Professor;Associate Professor;Full Professor;PhD student", "bibtex": "@misc{\nzhou2023robust,\ntitle={Robust Representation Learning via Asymmetric Negative Contrasting and Reverse Attention},\nauthor={Nuoyan Zhou and Decheng Liu and Dawei Zhou and Nannan Wang and Chunlei Peng and Xinbo Gao},\nyear={2023},\nurl={https://openreview.net/forum?id=UNOeQGHNaN}\n}", "github": "", "project": "", "reviewers": "2AVw;1pqm;PqGt;BjXk;K7R1", "site": "https://openreview.net/forum?id=UNOeQGHNaN", "pdf_size": 461265, "rating": "3;3;5;6;7", "confidence": "4;4;3;3;4", "soundness": "2;1;3;3;3", "novelty": "2;2;3;4;3", "presentation": "2;1;2;4;3", "wc_summary": "88;64;187;91;98", "wc_strengths": "40;28;83;180;446", "wc_weaknesses": "139;785;185;249;37", "wc_questions": "25;358;4;16;46", "wc_limitations": "16;15;10;9;67", "wc_review": "308;1250;469;545;694", "wc_reply_reviewers": "238;83;48;222;35", "wc_reply_authors": "488;1644;63;239;27", "reply_reviewers": "2;1;1;1;1", "reply_authors": "5;6;3;3;2", "rating_avg": [ 4.8, 1.6 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.4, 1.019803902718557 ], "wc_summary_avg": [ 105.6, 42.278126732389644 ], "wc_strengths_avg": [ 155.4, 154.81808679866833 ], "wc_weaknesses_avg": [ 279.0, 262.27314006584817 ], "wc_questions_avg": [ 89.8, 134.80118693839455 ], "wc_limitations_avg": [ 23.4, 21.969069165533618 ], "wc_review_avg": [ 653.2, 323.30629440207315 ], "wc_reply_reviewers_avg": [ 125.2, 87.14447773668736 ], "wc_reply_authors_avg": [ 492.2, 598.5640817823936 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 3.8, 1.469693845669907 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3572172541558802, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:r7Ps-QKZVcYJ:scholar.google.com/&scioq=Robust+Representation+Learning+via+Asymmetric+Negative+Contrasting+and+Reverse+Attention&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Xidian University;Chongqing University of Post and Telecommunications", "aff_unique_dep": ";", "aff_unique_url": "http://www.xidian.edu.cn/;http://www.cqupt.edu.cn", "aff_unique_abbr": "Xidian;CQUPT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Class-Distribution-Aware Pseudo-Labeling for Semi-Supervised Multi-Label Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71510", "id": "UOB1UgPjuG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5195825ee60d7efc1e42b7f3f3137040-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UOB1UgPjuG", "openreview": "https://openreview.net/forum?id=UOB1UgPjuG", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71510", "video": "https://nips.cc/virtual/2023/poster/71510", "author_site": "Ming-Kun Xie, Jiahao Xiao, Hao-Zhe Liu, Gang Niu, Masashi Sugiyama, Sheng-Jun Huang", "tldr": "", "abstract": "Pseudo-labeling has emerged as a popular and effective approach for utilizing unlabeled data. However, in the context of semi-supervised multi-label learning (SSMLL), conventional pseudo-labeling methods encounter difficulties when dealing with instances associated with multiple labels and an unknown label count. These limitations often result in the introduction of false positive labels or the neglect of true positive ones. To overcome these challenges, this paper proposes a novel solution called Class-Aware Pseudo-Labeling (CAP) that performs pseudo-labeling in a class-aware manner. The proposed approach introduces a regularized learning framework incorporating class-aware thresholds, which effectively control the assignment of positive and negative pseudo-labels for each class. Notably, even with a small proportion of labeled examples, our observations demonstrate that the estimated class distribution serves as a reliable approximation. Motivated by this finding, we develop a class-distribution-aware thresholding strategy to ensure the alignment of pseudo-label distribution with the true distribution. The correctness of the estimated class distribution is theoretically verified, and a generalization error bound is provided for our proposed method. Extensive experiments on multiple benchmark datasets confirm the efficacy of CAP in addressing the challenges of SSMLL problems.", "keywords": "Semi-supervised multi-label learning;pseudo labeling.", "primary_area": "", "supplementary_material": "", "author": "Ming-Kun Xie;Jia-Hao Xiao;Hao-Zhe Liu;Gang Niu;Masashi Sugiyama;Sheng-Jun Huang", "authorids": "~Ming-Kun_Xie1;~Jia-Hao_Xiao1;~Hao-Zhe_Liu1;~Gang_Niu1;~Masashi_Sugiyama1;~Sheng-Jun_Huang1", "gender": "M;M;M;;M;M", "homepage": "http://www.xiemk.pro/;https://niug1984.github.io;http://www.ms.k.u-tokyo.ac.jp/sugi/;http://parnec.nuaa.edu.cn/huangsj;https://github.com/liuhaozhe15;", "dblp": "215/4362;26/3367-1;35/1228;01/3367.html;;238/4029", "google_scholar": "https://scholar.google.co.jp/citations?hl=zh-CN;https://scholar.google.co.jp/citations?user=HOkcy00AAAAJ;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.tw/citations?view_op=list_works", "orcid": ";;0000-0001-6658-6743;0000-0002-7673-5367;;", "linkedin": ";;;;;", "or_profile": "~Ming-Kun_Xie1;~Gang_Niu1;~Masashi_Sugiyama1;~Sheng-Jun_Huang1;~Haozhe_Liu2;~Jiahao_Xiao2", "aff": "Nanjing University of Aeronautics and Astronautics;RIKEN;The University of Tokyo;Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics", "aff_domain": "nuaa.edu.cn;riken.jp;u-tokyo.ac.jp;nuaa.edu.cn;nuaa.edu.cn;nuaa.edu.cn", "position": "PhD student;Research Scientist (tenured);Full Professor;Full Professor;Undergrad student;MS student", "bibtex": "@inproceedings{\nxie2023classdistributionaware,\ntitle={Class-Distribution-Aware Pseudo-Labeling for Semi-Supervised Multi-Label Learning},\nauthor={Ming-Kun Xie and Jia-Hao Xiao and Hao-Zhe Liu and Gang Niu and Masashi Sugiyama and Sheng-Jun Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UOB1UgPjuG}\n}", "github": "", "project": "", "reviewers": "tMT4;xWdd;wYDh;ekLv", "pdf_size": 627160, "rating": "5;6;7;7", "confidence": "4;5;5;5", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "3;3;4;3", "wc_summary": "72;33;91;170", "wc_strengths": "58;41;113;162", "wc_weaknesses": "88;203;60;32", "wc_questions": "42;5;21;49", "wc_limitations": "10;8;1;7", "wc_review": "270;290;286;420", "wc_reply_reviewers": "0;167;0;11", "wc_reply_authors": "0;862;0;0", "reply_reviewers": "0;2;0;1", "reply_authors": "1;3;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 91.5, 49.912423303221814 ], "wc_strengths_avg": [ 93.5, 47.668123520860355 ], "wc_weaknesses_avg": [ 95.75, 65.00913397361943 ], "wc_questions_avg": [ 29.25, 17.383541066192468 ], "wc_limitations_avg": [ 6.5, 3.3541019662496847 ], "wc_review_avg": [ 316.5, 60.22250409938132 ], "wc_reply_reviewers_avg": [ 44.5, 70.86783473480759 ], "wc_reply_authors_avg": [ 215.5, 373.25694903109303 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8580477683281108500&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "nuaa.edu.cn;riken.jp;u-tokyo.ac.jp;nuaa.edu.cn;nuaa.edu.cn;nuaa.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "Nanjing University of Aeronautics and Astronautics;RIKEN;University of Tokyo", "aff_unique_dep": ";;", "aff_unique_url": "http://www.nuaa.edu.cn;https://www.riken.jp;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "NUAA;RIKEN;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0;0", "aff_country_unique": "China;Japan" }, { "title": "Brain encoding models based on multimodal transformers can transfer across language and vision", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71509", "id": "UPefaFqjNQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5ebbbac62b968254093023f1c95015d3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UPefaFqjNQ", "openreview": "https://openreview.net/forum?id=UPefaFqjNQ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71509", "video": "https://nips.cc/virtual/2023/poster/71509", "author_site": "Jerry Tang, Meng Du, Vy Vo, VASUDEV LAL, Alexander Huth", "tldr": "", "abstract": "Encoding models have been used to assess how the human brain represents concepts in language and vision. While language and vision rely on similar concept representations, current encoding models are typically trained and tested on brain responses to each modality in isolation. Recent advances in multimodal pretraining have produced transformers that can extract aligned representations of concepts in language and vision. In this work, we used representations from multimodal transformers to train encoding models that can transfer across fMRI responses to stories and movies. We found that encoding models trained on brain responses to one modality can successfully predict brain responses to the other modality, particularly in cortical regions that represent conceptual meaning. Further analysis of these encoding models revealed shared semantic dimensions that underlie concept representations in language and vision. Comparing encoding models trained using representations from multimodal and unimodal transformers, we found that multimodal transformers learn more aligned representations of concepts in language and vision. Our results demonstrate how multimodal transformers can provide insights into the brain\u2019s capacity for multimodal processing.", "keywords": "fMRI;neuroscience;encoding models;multimodal transformers;language;vision", "primary_area": "", "supplementary_material": "/attachment/2068fc48338b6297209076543d63b5f467b4faf4.pdf", "author": "Jerry Tang;Meng Du;Vy A. Vo;Vasudev Lal;Alexander Huth", "authorids": "~Jerry_Tang1;~Meng_Du1;~Vy_A._Vo2;~Vasudev_Lal1;~Alexander_Huth1", "gender": ";F;;M;", "homepage": ";http://meng-du.github.io/mengdu/;;;https://www.cs.utexas.edu/~huth/", "dblp": ";;;;44/8860.html", "google_scholar": ";yC6I4F4AAAAJ;;Qbu4oKwAAAAJ;JNXWWkIAAAAJ", "orcid": ";0000-0002-5397-2925;;0000-0002-5907-9898;", "linkedin": ";mengdu/;;vasudev-lal-79bb336/;", "or_profile": "~Jerry_Tang1;~Meng_Du1;~Vy_A._Vo2;~Vasudev_Lal1;~Alexander_Huth1", "aff": ";University of California, Los Angeles;;Intel;The University of Texas at Austin", "aff_domain": ";ucla.edu;;intel.com;utexas.edu", "position": ";PhD student;;Researcher;Assistant Professor", "bibtex": "@inproceedings{\ntang2023brain,\ntitle={Brain encoding models based on multimodal transformers can transfer across language and vision},\nauthor={Jerry Tang and Meng Du and Vy A. Vo and Vasudev Lal and Alexander Huth},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UPefaFqjNQ}\n}", "github": "", "project": "", "reviewers": "1ukF;LQBp;V1MV;C9Qg;Ebp3", "pdf_size": 29686806, "rating": "2;7;7;7;8", "confidence": "5;3;4;4;5", "soundness": "1;2;3;3;3", "novelty": "1;2;3;3;3", "presentation": "2;2;4;3;3", "wc_summary": "63;94;76;58;212", "wc_strengths": "31;82;53;81;194", "wc_weaknesses": "207;234;45;65;212", "wc_questions": "112;175;17;133;70", "wc_limitations": "15;14;33;1;14", "wc_review": "428;599;224;338;702", "wc_reply_reviewers": "331;104;85;0;276", "wc_reply_authors": "726;0;0;0;428", "reply_reviewers": "1;1;1;0;2", "reply_authors": "2;1;1;1;3", "rating_avg": [ 6.2, 2.1354156504062622 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 100.6, 57.073987069417186 ], "wc_strengths_avg": [ 88.2, 56.204626144117356 ], "wc_weaknesses_avg": [ 152.6, 80.45520492795976 ], "wc_questions_avg": [ 101.4, 54.10582223753743 ], "wc_limitations_avg": [ 15.4, 10.209799214480176 ], "wc_review_avg": [ 458.2, 172.90043377620543 ], "wc_reply_reviewers_avg": [ 159.2, 124.14088770425319 ], "wc_reply_authors_avg": [ 230.8, 297.9653671150391 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4005009394574071, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11958129375953877912&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": ";ucla.edu;;intel.com;utexas.edu", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Los Angeles;Intel;University of Texas at Austin", "aff_unique_dep": ";Intel Corporation;", "aff_unique_url": "https://www.ucla.edu;https://www.intel.com;https://www.utexas.edu", "aff_unique_abbr": "UCLA;Intel;UT Austin", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Los Angeles;;Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Fast Asymptotically Optimal Algorithms for Non-Parametric Stochastic Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71508", "id": "UPo8vlZ0wQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/26300457961c3e056ea61c9d3ebec2a4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UPo8vlZ0wQ", "openreview": "https://openreview.net/forum?id=UPo8vlZ0wQ", "poster": "/media/PosterPDFs/NeurIPS%202023/71508.png?t=1701766580.444465", "slides": "https://nips.cc/virtual/2023/poster/71508", "video": "https://nips.cc/virtual/2023/poster/71508", "author_site": "Dorian Baudry, Fabien Pesquerel, R\u00e9my Degenne, Odalric-Ambrym Maillard", "tldr": "", "abstract": "We consider the problem of regret minimization in non-parametric stochastic bandits. When the rewards are known to be bounded from above, there exists asymptotically optimal algorithms, with asymptotic regret depending on an infimum of Kullback-Leibler divergences (KL). These algorithms are computationally expensive and require storing all past rewards, thus simpler but non-optimal algorithms are often used instead. We introduce several methods to approximate the infimum KL which reduce drastically the computational and memory costs of existing optimal algorithms, while keeping their regret guaranties. We apply our findings to design new variants of the MED and IMED algorithms, and demonstrate their interest with extensive numerical simulations.", "keywords": "Multi-Armed Bandits", "primary_area": "", "supplementary_material": "/attachment/1b3ce9ae97ad64916792fce0e8061fca02e56b2e.zip", "author": "Dorian Baudry;Fabien Pesquerel;R\u00e9my Degenne;Odalric-Ambrym Maillard", "authorids": "~Dorian_Baudry1;~Fabien_Pesquerel1;~R\u00e9my_Degenne1;~Odalric-Ambrym_Maillard3", "gender": "M;M;M;", "homepage": "https://dbaudry.github.io/;https://fabienpesquerel.github.io/;https://remydegenne.github.io/;http://odalricambrymmaillard.neowordpress.fr/", "dblp": "277/6362;;157/1070;83/7401", "google_scholar": "https://scholar.google.fr/citations?user=RRW-kfYAAAAJ;dlu8QksAAAAJ;https://scholar.google.fr/citations?user=H-uIBOwAAAAJ;https://scholar.google.fr/citations?hl=fr", "orcid": ";;;", "linkedin": ";fabien-pesquerel-73515a124/;;", "or_profile": "~Dorian_Baudry1;~Fabien_Pesquerel1;~R\u00e9my_Degenne1;~odalric-ambrym_maillard1", "aff": "Ecole Nationale de la Statistique et de l'Administration Economique;INRIA;INRIA;inria", "aff_domain": "ensae.fr;inria.fr;inria.fr;inria.fr", "position": "Postdoc;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nbaudry2023fast,\ntitle={Fast Asymptotically Optimal Algorithms for Non-Parametric Stochastic Bandits},\nauthor={Dorian Baudry and Fabien Pesquerel and R{\\'e}my Degenne and Odalric-Ambrym Maillard},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UPo8vlZ0wQ}\n}", "github": "", "project": "", "reviewers": "Nc1j;ba7Y;zxKa;gHyp;zZCp", "pdf_size": 32719136, "rating": "5;5;6;6;7", "confidence": "4;3;3;4;2", "soundness": "4;3;3;3;3", "novelty": "3;3;3;4;3", "presentation": "4;3;3;3;3", "wc_summary": "77;72;179;80;74", "wc_strengths": "35;32;168;205;53", "wc_weaknesses": "35;13;201;107;24", "wc_questions": "31;109;87;43;54", "wc_limitations": "16;1;1;141;1", "wc_review": "194;227;636;576;206", "wc_reply_reviewers": "0;0;26;11;4", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 96.4, 41.38888739746455 ], "wc_strengths_avg": [ 98.6, 73.07147186145905 ], "wc_weaknesses_avg": [ 76.0, 70.6257743320383 ], "wc_questions_avg": [ 64.8, 28.91643131508451 ], "wc_limitations_avg": [ 32.0, 54.80875842417888 ], "wc_review_avg": [ 367.8, 195.69813489147 ], "wc_reply_reviewers_avg": [ 8.2, 9.765244492586962 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6428571428571428, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8511588268838328031&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ensae.fr;inria.fr;inria.fr;inria.fr", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Ecole Nationale de la Statistique et de l'Administration Economique;INRIA", "aff_unique_dep": ";", "aff_unique_url": "https://ensae.fr;https://www.inria.fr", "aff_unique_abbr": "ENSAE;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Graph Neural Networks for Road Safety Modeling: Datasets and Evaluations for Accident Analysis", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73579", "id": "UQ8pDKcXTq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a365be0950259c9624edfb4d26eabd46-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=UQ8pDKcXTq", "openreview": "https://openreview.net/forum?id=UQ8pDKcXTq", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73579", "video": "https://nips.cc/virtual/2023/poster/73579", "author_site": "Abhinav Nippani, Dongyue Li, Haotian Ju, Haris Koutsopoulos, Hongyang Zhang", "tldr": "", "abstract": "We consider the problem of traffic accident analysis on a road network based on road network connections and traffic volume. Previous works have designed various deep-learning methods using historical records to predict traffic accident occurrences. However, there is a lack of consensus on how accurate existing methods are, and a fundamental issue is the lack of public accident datasets for comprehensive evaluations. This paper constructs a large-scale, unified dataset of traffic accident records from official reports of various states in the US, totaling 9 million records, accompanied by road networks and traffic volume reports. Using this new dataset, we evaluate existing deep-learning methods for predicting the occurrence of accidents on road networks. Our main finding is that graph neural networks such as GraphSAGE can accurately predict the number of accidents on roads with less than 22% mean absolute error (relative to the actual count) and whether an accident will occur or not with over 87% AUROC, averaged over states. We achieve these results by using multitask learning to account for cross-state variabilities (e.g., availability of accident labels) and transfer learning to combine traffic volume with accident prediction. Ablation studies highlight the importance of road graph-structural features, amongst other features. Lastly, we discuss the implications of the analysis and develop a package for easily using our new dataset.", "keywords": "Graph Neural Networks;Road Safety;Transportation Networks", "primary_area": "", "supplementary_material": "/attachment/e0899c234a9ed99c960c357780789a58e4116498.zip", "author": "Abhinav Nippani;Dongyue Li;Haotian Ju;Haris Koutsopoulos;Hongyang R. Zhang", "authorids": "~Abhinav_Nippani1;~Dongyue_Li1;~Haotian_Ju1;~Haris_Koutsopoulos1;~Hongyang_R._Zhang1", "gender": "M;;M;M;M", "homepage": ";https://lidongyue12138.github.io/;;https://coe.northeastern.edu/people/koutsopoulos-haris/;http://www.hongyangzhang.com", "dblp": ";;;;264/2660", "google_scholar": ";ASLbvk8AAAAJ;wpKgvpwAAAAJ;FFoUiqEAAAAJ;Sx-673sAAAAJ", "orcid": ";0009-0001-6187-5367;;;", "linkedin": "abhinav-nippani/;;;;hongyang-r-zhang-5b7797157", "or_profile": "~Abhinav_Nippani1;~Dongyue_Li1;~Haotian_Ju1;~Haris_Koutsopoulos1;~Hongyang_R._Zhang1", "aff": "Northeastern University;Northeastern University;;Northeastern University;Northeastern University", "aff_domain": "northeastern.edu;northeastern.edu;;northeastern.edu;northeastern.edu", "position": "MS student;PhD student;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nnippani2023graph,\ntitle={Graph Neural Networks for Road Safety Modeling: Datasets and Evaluations for Accident Analysis},\nauthor={Abhinav Nippani and Dongyue Li and Haotian Ju and Haris Koutsopoulos and Hongyang R. Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=UQ8pDKcXTq}\n}", "github": "", "project": "", "reviewers": "F2Hc;rFa1;nmaH;ZTKf;FmiK", "pdf_size": 1713861, "rating": "6;6;6;6;7", "confidence": "5;4;3;4;4", "wc_summary_and_contributions": "45;187;115;116;59", "wc_strengths": "31;52;75;112;51", "wc_improvement": "62;388;177;239;140", "wc_limitations": "41;4;14;105;1", "wc_correctness": "15;16;23;1;60", "wc_clarity": "8;108;1;1;1", "wc_relation_to_prior_work": "18;70;1;1;25", "wc_documentation": "19;21;9;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "240;847;416;577;339", "wc_reply_reviewers": "0;0;13;0;0", "wc_reply_authors": "957;2334;1116;676;1162", "reply_reviewers": "0;0;1;0;0", "reply_authors": "2;5;3;1;2", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 104.4, 50.31739261925244 ], "wc_strengths_avg": [ 64.2, 27.66514052015641 ], "wc_improvement_avg": [ 201.2, 109.57262431830316 ], "wc_limitations_avg": [ 33.0, 38.66264346885764 ], "wc_correctness_avg": [ 23.0, 19.8292712927127 ], "wc_clarity_avg": [ 23.8, 42.18720185079831 ], "wc_relation_to_prior_work_avg": [ 23.0, 25.321927256826246 ], "wc_documentation_avg": [ 10.2, 8.541662601625049 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 483.8, 212.41600692979802 ], "wc_reply_reviewers_avg": [ 2.6, 5.2 ], "wc_reply_authors_avg": [ 1249.0, 568.520184338252 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.6, 1.3564659966250536 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5454795091372762417&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "northeastern.edu;northeastern.edu;;northeastern.edu;northeastern.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Horospherical Decision Boundaries for Large Margin Classification in Hyperbolic Space", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71507", "id": "URAZeoIC1q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/24cb8b08f3cb2f59671e33faac4790e6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=URAZeoIC1q", "openreview": "https://openreview.net/forum?id=URAZeoIC1q", "poster": "/media/PosterPDFs/NeurIPS%202023/71507.png?t=1701464659.5346158", "slides": "https://nips.cc/virtual/2023/poster/71507", "video": "https://nips.cc/virtual/2023/poster/71507", "author_site": "Xiran Fan, Chun-Hao Yang, Baba Vemuri", "tldr": "", "abstract": "Hyperbolic spaces have been quite popular in the recent past for representing hierarchically organized data. Further, several classification algorithms for data in these spaces have been proposed in the literature. These algorithms mainly use either hyperplanes or geodesics for decision boundaries in a large margin classifiers setting leading to a non-convex optimization problem. In this paper, we propose a novel large margin classifier based on horospherical decision boundaries that leads to a geodesically convex optimization problem that can be optimized using any Riemannian gradient descent technique guaranteeing a globally optimal solution. We present several experiments depicting the competitive performance of our classifier in comparison to SOTA.", "keywords": "Large-margin clssifier;Hyperbolic space;Horosphere;SVM;Geodesically convex;Global optimility;Busemann function", "primary_area": "", "supplementary_material": "/attachment/f614d1d5fd87a5637ff2edbacea722c0cffba7ab.zip", "author": "Xiran Fan;Chun-Hao Yang;Baba C. Vemuri", "authorids": "~Xiran_Fan1;~Chun-Hao_Yang3;~Baba_C._Vemuri1", "gender": ";M;M", "homepage": ";;https://www.cise.ufl.edu/~vemuri", "dblp": ";;93/1847.html", "google_scholar": ";;1fEZ_osAAAAJ", "orcid": ";0000-0002-2522-5957;", "linkedin": ";;", "or_profile": "~Xiran_Fan1;~Chun-Hao_Yang3;~Baba_C._Vemuri1", "aff": ";National Taiwan University;University of Florida", "aff_domain": ";ntu.edu.tw;ufl.edu", "position": ";Assistant Professor;Professor", "bibtex": "@inproceedings{\nfan2023horospherical,\ntitle={Horospherical Decision Boundaries for Large Margin Classification in Hyperbolic Space},\nauthor={Xiran Fan and Chun-Hao Yang and Baba C. Vemuri},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=URAZeoIC1q}\n}", "github": "", "project": "", "reviewers": "1ZfQ;yt6v;cEx8;38iz", "pdf_size": 2105958, "rating": "4;5;6;7", "confidence": "4;3;3;4", "soundness": "2;2;2;3", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "41;80;283;107", "wc_strengths": "39;110;180;45", "wc_weaknesses": "36;72;545;140", "wc_questions": "28;75;172;60", "wc_limitations": "1;9;39;1", "wc_review": "145;346;1219;353", "wc_reply_reviewers": "33;30;207;0", "wc_reply_authors": "422;0;378;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 127.75, 92.65358870545705 ], "wc_strengths_avg": [ 93.5, 57.177355657637754 ], "wc_weaknesses_avg": [ 198.25, 203.64966854871136 ], "wc_questions_avg": [ 83.75, 53.70463201624232 ], "wc_limitations_avg": [ 12.5, 15.644487847162015 ], "wc_review_avg": [ 515.75, 414.52344626088404 ], "wc_reply_reviewers_avg": [ 67.5, 81.56745674593515 ], "wc_reply_authors_avg": [ 200.0, 200.60408769514146 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17853722405257263400&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";ntu.edu.tw;ufl.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "National Taiwan University;University of Florida", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.tw;https://www.ufl.edu", "aff_unique_abbr": "NTU;UF", "aff_campus_unique_index": "0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "id": "URI2aAQiQC", "title": "SpikeBERT: A Language Spikformer Trained with Two-Stage Knowledge Distillation from BERT", "track": "main", "status": "Reject", "tldr": "", "abstract": "Spiking neural networks (SNNs) offer a promising avenue to implement deep neural networks in a more energy-efficient way.\nHowever, the network architectures of existing SNNs for language tasks are too simplistic, and deep architectures have not been fully explored, resulting in a significant performance gap compared to mainstream transformer-based networks such as BERT.\nTo this end, we improve a recently-proposed spiking transformer (i.e., Spikformer) to make it possible to process language tasks and propose a two-stage knowledge distillation method for training it, which combines pre-training by distilling knowledge from BERT with a large collection of unlabelled texts and fine-tuning with task-specific instances via knowledge distillation again from the BERT fine-tuned on the same training examples.\nThrough extensive experimentation, we show that the models trained with our method, named SpikeBERT, outperform state-of-the-art SNNs and even achieve comparable results to BERTs on text classification tasks for both English and Chinese with much less energy consumption.", "keywords": "Spiking neural networks;Text classification;Knowledge distillation", "primary_area": "", "supplementary_material": "/attachment/fe258ff65af6d7935fc33caa0382958f5affc501.zip", "author": "Changze Lv;Tianlong Li;Jianhan Xu;Chenxi Gu;Zixuan Ling;Cenyuan Zhang;Xiaoqing Zheng;Xuanjing Huang", "authorids": "~Changze_Lv1;~Tianlong_Li2;~Jianhan_Xu1;~Chenxi_Gu2;~Zixuan_Ling1;~Cenyuan_Zhang1;~Xiaoqing_Zheng2;~Xuanjing_Huang1", "gender": "M;;M;;M;;;F", "homepage": "https://lvchangze.github.io;;;;https://github.com/narcissusLZX;;;https://xuanjing-huang.github.io/", "dblp": "350/4445;;278/1558.html;;;293/9880;;05/6735-1", "google_scholar": "t3-viUwAAAAJ;;G_p-oocAAAAJ;;;ghu4BZcAAAAJ;;RGsMgZA4H78C", "orcid": ";;;;;;;0000-0001-9197-9426", "linkedin": ";;;;;;;", "or_profile": "~Changze_Lv1;~Tianlong_Li2;~Jianhan_Xu1;~Chenxi_Gu2;~Zixuan_Ling1;~Cenyuan_Zhang1;~Xiaoqing_Zheng2;~Xuanjing_Huang1", "aff": "Fudan University;;Fudan University;;Fudan University;Fudan University;;Fudan University", "aff_domain": "fudan.edu.cn;;fudan.edu.cn;;fudan.edu.cn;fudan.edu.cn;;fudan.edu.cn", "position": "PhD student;;MS student;;MS student;MS student;;Full Professor", "bibtex": "@misc{\nlv2023spikebert,\ntitle={Spike{BERT}: A Language Spikformer Trained with Two-Stage Knowledge Distillation from {BERT}},\nauthor={Changze Lv and Tianlong Li and Jianhan Xu and Chenxi Gu and Zixuan Ling and Cenyuan Zhang and Xiaoqing Zheng and Xuanjing Huang},\nyear={2023},\nurl={https://openreview.net/forum?id=URI2aAQiQC}\n}", "github": "", "project": "", "reviewers": "XMQ4;PVbb;nA4G;DsCq;9eKU", "site": "https://openreview.net/forum?id=URI2aAQiQC", "pdf_size": 821419, "rating": "3;3;4;6;6", "confidence": "3;4;3;4;3", "soundness": "2;3;2;3;3", "novelty": "2;2;1;3;3", "presentation": "3;2;3;3;3", "wc_summary": "98;40;133;65;62", "wc_strengths": "7;22;88;24;41", "wc_weaknesses": "231;151;100;21;102", "wc_questions": "10;9;39;65;2", "wc_limitations": "1;17;44;18;2", "wc_review": "347;239;404;193;209", "wc_reply_reviewers": "0;154;95;26;16", "wc_reply_authors": "611;654;441;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "3;2;2;1;1", "rating_avg": [ 4.4, 1.3564659966250536 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 79.6, 32.499846153482025 ], "wc_strengths_avg": [ 36.4, 27.96140196771256 ], "wc_weaknesses_avg": [ 121.0, 68.99565203692185 ], "wc_questions_avg": [ 25.0, 23.689660191737662 ], "wc_limitations_avg": [ 16.4, 15.551205741035002 ], "wc_review_avg": [ 278.4, 82.63558555489276 ], "wc_reply_reviewers_avg": [ 58.2, 57.85637389259717 ], "wc_reply_authors_avg": [ 341.2, 287.5520126864008 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.06019292654288467, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13932438509101523872&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ProteinGym: Large-Scale Benchmarks for Protein Fitness Prediction and Design", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73578", "id": "URoZHqAohf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cac723e5ff29f65e3fcbb0739ae91bee-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=URoZHqAohf", "openreview": "https://openreview.net/forum?id=URoZHqAohf", "poster": "/media/PosterPDFs/NeurIPS%202023/73578.png?t=1702337151.9354932", "slides": "https://nips.cc/virtual/2023/poster/73578", "video": "https://nips.cc/virtual/2023/poster/73578", "author_site": "Pascal Notin, Aaron Kollasch, Daniel Ritter, Lood van Niekerk, Steffanie Paul, Han Spinner, Nathan Rollins, Ada Shaw, Rose Orenbuch, Ruben Weitzman, Jonathan Frazer, Mafalda Dias, Dinko Franceschi, Yarin Gal, Debora Marks", "tldr": "", "abstract": "Predicting the effects of mutations in proteins is critical to many applications, from understanding genetic disease to designing novel proteins that can address our most pressing challenges in climate, agriculture and healthcare. Despite a surge in machine learning-based protein models to tackle these questions, an assessment of their respective benefits is challenging due to the use of distinct, often contrived, experimental datasets, and the variable performance of models across different protein families. Addressing these challenges requires scale. To that end we introduce ProteinGym, a large-scale and holistic set of benchmarks specifically designed for protein fitness prediction and design. It encompasses both a broad collection of over 250 standardized deep mutational scanning assays, spanning millions of mutated sequences, as well as curated clinical datasets providing high-quality expert annotations about mutation effects. We devise a robust evaluation framework that combines metrics for both fitness prediction and design, factors in known limitations of the underlying experimental methods, and covers both zero-shot and supervised settings. We report the performance of a diverse set of over 70 high-performing models from various subfields (eg., alignment-based, inverse folding) into a unified benchmark suite. We open source the corresponding codebase, datasets, MSAs, structures, model predictions and develop a user-friendly website that facilitates data access and analysis.", "keywords": "Protein fitness;Protein design;Mutation effects prediction;Benchmarks", "primary_area": "", "supplementary_material": "", "author": "Pascal Notin;Aaron W Kollasch;Daniel Ritter;Lood Van Niekerk;Steffan Paul;Han Spinner;Nathan J Rollins;Ada Shaw;Ruben Weitzman;Jonathan Frazer;Mafalda Dias;Dinko Franceschi;Rose Orenbuch;Yarin Gal;Debora Susan Marks", "authorids": "~Pascal_Notin1;~Aaron_W_Kollasch1;~Daniel_Ritter2;~Lood_Van_Niekerk1;~Steffan_Paul1;~Han_Spinner1;~Nathan_J_Rollins1;~Ada_Shaw1;~Ruben_Weitzman1;~Jonathan_Frazer1;~Mafalda_Dias1;~Dinko_Franceschi2;~Rose_Orenbuch1;~Yarin_Gal1;~Debora_Susan_Marks1", "gender": ";;M;M;F;Non-Binary;;;M;;F;;;;F", "homepage": ";;https://danieldritter.github.io/;;https://dbmi.hms.harvard.edu/people/steffan-paul;https://avivspinner.github.io;;https://ashaw3895.github.io;https://www.bdi.ox.ac.uk/Team/ruben-weitzman;https://jonnyfrazer.github.io/;https://www.crg.eu/ca/programmes-groups/dias-frazer-lab;;;http://www.cs.ox.ac.uk/people/yarin.gal/website//;https://www.deboramarkslab.com/", "dblp": ";;;;;;;;;321/1564;;;;67/9076;", "google_scholar": ";RbqQD0oAAAAJ;YQzCEJoAAAAJ;6ZqVFT0AAAAJ;;suZf26EAAAAJ;TveAliEAAAAJ;;;_QGoqgoAAAAJ;Ue5LxsIAAAAJ;;;https://scholar.google.co.uk/citations?user=SIayDoQAAAAJ;qFmoeNkAAAAJ", "orcid": ";0000-0001-9733-8822;0009-0009-3266-9917;0000-0001-9082-2574;0000-0001-7306-4863;;0000-0002-8037-6045;;;0000-0001-6900-6484;;;0000-0002-4678-0837;;0000-0001-9388-2281", "linkedin": ";;danielritter12/;loodvn/;steffanpaul359/;;;ada-shaw-914845114/;;jonathan-frazer-6b1391236/;;;;;debora-marks-3932a97/", "or_profile": "~Pascal_Notin1;~Aaron_W_Kollasch1;~Daniel_Ritter2;~Lood_Van_Niekerk1;~Steffan_Paul1;~Han_Spinner1;~Nathan_J_Rollins1;~Ada_Shaw1;~Ruben_Weitzman1;~Jonathan_Frazer1;~Mafalda_Dias1;~Dinko_Franceschi2;~Rose_Orenbuch1;~Yarin_Gal1;~Debora_Susan_Marks1", "aff": ";Harvard University;Harvard Medical School, Harvard University;Harvard University;Harvard University;Harvard University, Harvard University;Seismic Therapeutic;Harvard University;University of Oxford;Universitat Pompeu Fabra (UPF);Centre for Genomic Regulation;;Harvard University;University of Oxford;Harvard Medical School", "aff_domain": ";harvard.edu;hms.harvard.edu;harvard.edu;harvard.edu;g.harvard.edu;seismictx.com;harvard.edu;ox.ac.uk;crg.eu;crg.eu;;harvard.edu;ox.ac.uk;harvard.edu", "position": ";PhD student;Researcher;Researcher;PhD student;PhD student;Researcher;PhD student;PhD student;Assistant Professor;Assistant Professor;;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nnotin2023proteingym,\ntitle={ProteinGym: Large-Scale Benchmarks for Protein Fitness Prediction and Design},\nauthor={Pascal Notin and Aaron W Kollasch and Daniel Ritter and Lood Van Niekerk and Steffan Paul and Han Spinner and Nathan J Rollins and Ada Shaw and Rose Orenbuch and Ruben Weitzman and Jonathan Frazer and Mafalda Dias and Dinko Franceschi and Yarin Gal and Debora Susan Marks},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=URoZHqAohf}\n}", "github": "", "project": "", "reviewers": "3bqN;Ckxq;5XQY;dron;vaLL", "pdf_size": 1213964, "rating": "2;6;6;7;9", "confidence": "4;3;2;3;4", "wc_summary_and_contributions": "17;49;26;105;169", "wc_strengths": "10;15;21;77;118", "wc_improvement": "444;219;35;34;110", "wc_limitations": "1;15;76;2;39", "wc_correctness": "1;11;1;7;68", "wc_clarity": "1;9;1;4;9", "wc_relation_to_prior_work": "1;11;8;9;10", "wc_documentation": "1;4;1;11;4", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "477;334;170;250;528", "wc_reply_reviewers": "624;93;0;0;0", "wc_reply_authors": "2585;1373;220;208;526", "reply_reviewers": "2;2;0;0;0", "reply_authors": "5;3;1;1;1", "rating_avg": [ 6.0, 2.280350850198276 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 73.2, 56.85208879188169 ], "wc_strengths_avg": [ 48.2, 42.43300602125661 ], "wc_improvement_avg": [ 168.4, 153.48302837773304 ], "wc_limitations_avg": [ 26.6, 28.246061672381867 ], "wc_correctness_avg": [ 17.6, 25.48411269791436 ], "wc_clarity_avg": [ 4.8, 3.6 ], "wc_relation_to_prior_work_avg": [ 7.8, 3.54400902933387 ], "wc_documentation_avg": [ 4.2, 3.655133376499413 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 351.8, 134.50115241141987 ], "wc_reply_reviewers_avg": [ 143.4, 242.984443946521 ], "wc_reply_authors_avg": [ 982.4, 906.6581715288293 ], "reply_reviewers_avg": [ 0.8, 0.9797958971132713 ], "reply_authors_avg": [ 2.2, 1.6 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": -0.11720180773462387, "gs_citation": 183, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14866424575526967084&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": ";harvard.edu;hms.harvard.edu;harvard.edu;harvard.edu;g.harvard.edu;seismictx.com;harvard.edu;ox.ac.uk;crg.eu;crg.eu;;harvard.edu;ox.ac.uk;harvard.edu", "author_num": 15, "aff_unique_index": "0;0;0;0;0;1;0;2;3;4;0;2;0", "aff_unique_norm": "Harvard University;Seismic Therapeutic;University of Oxford;Universitat Pompeu Fabra;Centre for Genomic Regulation", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.harvard.edu;;https://www.ox.ac.uk;https://www.upf.edu;https://www.crg.eu", "aff_unique_abbr": "Harvard;;Oxford;UPF;CRG", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cambridge;Boston", "aff_country_unique_index": "0;0;0;0;0;0;2;3;3;0;2;0", "aff_country_unique": "United States;;United Kingdom;Spain" }, { "title": "PAC-Bayes Generalization Certificates for Learned Inductive Conformal Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71506", "id": "URrUpcp6Qh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9235c376df778f1aaf486a882afb7471-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=URrUpcp6Qh", "openreview": "https://openreview.net/forum?id=URrUpcp6Qh", "poster": "/media/PosterPDFs/NeurIPS%202023/71506.png?t=1702328856.9647906", "slides": "https://nips.cc/virtual/2023/poster/71506", "video": "https://nips.cc/virtual/2023/poster/71506", "author_site": "Apoorva Sharma, Sushant Veer, Asher Hancock, Heng Yang, Marco Pavone, Anirudha Majumdar", "tldr": "", "abstract": "Inductive Conformal Prediction (ICP) provides a practical and effective approach for equipping deep learning models with uncertainty estimates in the form of set-valued predictions which are guaranteed to contain the ground truth with high probability.\nDespite the appeal of this coverage guarantee, these sets may not be efficient: the size and contents of the prediction sets are not directly controlled, and instead depend on the underlying model and choice of score function.\nTo remedy this, recent work has proposed learning model and score function parameters using data to directly optimize the efficiency of the ICP prediction sets.\nWhile appealing, the generalization theory for such an approach is lacking: direct optimization of empirical efficiency may yield prediction sets that are either no longer efficient on test data, or no longer obtain the required coverage on test data.\nIn this work, we use PAC-Bayes theory to obtain generalization bounds on both the coverage and the efficiency of set-valued predictors which can be directly optimized to maximize efficiency while satisfying a desired test coverage.\nIn contrast to prior work, our framework allows us to utilize the entire calibration dataset to learn the parameters of the model and score function, instead of requiring a separate hold-out set for obtaining test-time coverage guarantees.\nWe leverage these theoretical results to provide a practical algorithm for using calibration data to simultaneously fine-tune the parameters of a model and score function while guaranteeing test-time coverage and efficiency of the resulting prediction sets.\nWe evaluate the approach on regression and classification tasks, and outperform baselines calibrated using a Hoeffding bound-based PAC guarantee on ICP, especially in the low-data regime.", "keywords": "Conformal Prediction;PAC Bayes;Generalization Theory", "primary_area": "", "supplementary_material": "", "author": "Apoorva Sharma;Sushant Veer;Asher Hancock;Heng Yang;Marco Pavone;Anirudha Majumdar", "authorids": "~Apoorva_Sharma1;~Sushant_Veer1;ajhancock@princeton.edu;~Heng_Yang4;~Marco_Pavone1;~Anirudha_Majumdar1", "gender": "M;M;;M;M;M", "homepage": "https://web.stanford.edu/~apoorva;;;https://hankyang.seas.harvard.edu/;https://web.stanford.edu/~pavone/;https://irom-lab.princeton.edu/majumdar/", "dblp": "181/4231;173/5950;;83/415-2;91/3382-1.html;116/6436", "google_scholar": "3bBgnTIAAAAJ;1FiIlQsAAAAJ;;GuKEDfixZqsC;RhOpyXcAAAAJ;ibu3FwsAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Apoorva_Sharma1;~Sushant_Veer1;ajhancock@princeton.edu;~Heng_Yang4;~Marco_Pavone1;~Anirudha_Majumdar1", "aff": "NVIDIA;NVIDIA;;NVIDIA;Stanford University;Princeton University", "aff_domain": "nvidia.com;nvidia.com;;nvidia.com;stanford.edu;princeton.edu", "position": "Researcher;Researcher;;Researcher;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nsharma2023pacbayes,\ntitle={{PAC}-Bayes Generalization Certificates for Learned Inductive Conformal Prediction},\nauthor={Apoorva Sharma and Sushant Veer and Asher Hancock and Heng Yang and Marco Pavone and Anirudha Majumdar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=URrUpcp6Qh}\n}", "github": "", "project": "", "reviewers": "XWYk;6Bn9;Akhh;kKqP;jcN9", "pdf_size": 919164, "rating": "3;5;6;6;7", "confidence": "4;3;3;3;4", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "134;53;66;57;188", "wc_strengths": "65;66;57;46;74", "wc_weaknesses": "371;111;163;54;80", "wc_questions": "96;12;4;39;221", "wc_limitations": "12;31;21;44;24", "wc_review": "678;273;311;240;587", "wc_reply_reviewers": "438;65;15;32;26", "wc_reply_authors": "551;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 99.6, 53.12852341257002 ], "wc_strengths_avg": [ 61.6, 9.478396488858229 ], "wc_weaknesses_avg": [ 155.8, 113.55949982278013 ], "wc_questions_avg": [ 74.4, 80.07646345837209 ], "wc_limitations_avg": [ 26.4, 10.707007051459339 ], "wc_review_avg": [ 417.8, 179.06356413296368 ], "wc_reply_reviewers_avg": [ 115.2, 162.25831257596636 ], "wc_reply_authors_avg": [ 110.2, 220.40000000000003 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.24077170617153837, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13848320609885355881&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "nvidia.com;nvidia.com;;nvidia.com;stanford.edu;princeton.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "NVIDIA;Stanford University;Princeton University", "aff_unique_dep": "NVIDIA Corporation;;", "aff_unique_url": "https://www.nvidia.com;https://www.stanford.edu;https://www.princeton.edu", "aff_unique_abbr": "NVIDIA;Stanford;Princeton", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Expressive Sign Equivariant Networks for Spectral Geometric Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71505", "id": "UWd4ysACo4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3516aa3393f0279e04c099f724664f99-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UWd4ysACo4", "openreview": "https://openreview.net/forum?id=UWd4ysACo4", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71505", "video": "https://nips.cc/virtual/2023/poster/71505", "author_site": "Derek Lim, Joshua Robinson, Stefanie Jegelka, Haggai Maron", "tldr": "", "abstract": "Recent work has shown the utility of developing machine learning models that respect the structure and symmetries of eigenvectors. These works promote sign invariance, since for any eigenvector v the negation -v is also an eigenvector. However, we show that sign invariance is theoretically limited for tasks such as building orthogonally equivariant models and learning node positional encodings for link prediction in graphs. In this work, we demonstrate the benefits of sign equivariance for these tasks. To obtain these benefits, we develop novel sign equivariant neural network architectures. Our models are based on a new analytic characterization of sign equivariant polynomials and thus inherit provable expressiveness properties. Controlled synthetic experiments show that our networks can achieve the theoretically predicted benefits of sign equivariant models.", "keywords": "Eigenvectors;spectral;geometry;universal approximation;graph;equivariance;invariance", "primary_area": "", "supplementary_material": "/attachment/956798f286021e4f599f302ba26aef1e688497b6.zip", "author": "Derek Lim;Joshua Robinson;Stefanie Jegelka;Haggai Maron", "authorids": "~Derek_Lim1;~Joshua_Robinson4;~Stefanie_Jegelka3;~Haggai_Maron1", "gender": "M;F;M;M", "homepage": "https://cptq.github.io/;http://people.csail.mit.edu/stefje/;https://haggaim.github.io/;https://joshrobinson.mit.edu/", "dblp": "267/5433;38/7003;181/6629;15/4759", "google_scholar": "y9YTBIsAAAAJ;gTWUZlsAAAAJ;https://scholar.google.co.il/citations?user=4v8uJrIAAAAJ;E02doCkAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Derek_Lim1;~Stefanie_Jegelka3;~Haggai_Maron1;~Joshua_David_Robinson1", "aff": "NVIDIA;Massachusetts Institute of Technology;NVIDIA;Massachusetts Institute of Technology", "aff_domain": "nvidia.com;mit.edu;nvidia.com;mit.edu", "position": "Intern;Associate Professor;Research Scientist;PhD student", "bibtex": "@inproceedings{\nlim2023expressive,\ntitle={Expressive Sign Equivariant Networks for Spectral Geometric Learning},\nauthor={Derek Lim and Joshua Robinson and Stefanie Jegelka and Haggai Maron},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UWd4ysACo4}\n}", "github": "", "project": "", "reviewers": "HW6g;YXFy;nERC;a3kf;p512", "pdf_size": 623122, "rating": "6;6;7;7;8", "confidence": "4;4;3;3;3", "soundness": "3;3;3;4;4", "novelty": "3;3;3;3;3", "presentation": "3;3;4;4;4", "wc_summary": "66;69;78;199;54", "wc_strengths": "138;40;135;220;31", "wc_weaknesses": "78;357;21;50;13", "wc_questions": "19;34;205;10;295", "wc_limitations": "15;1;7;33;35", "wc_review": "316;501;446;512;428", "wc_reply_reviewers": "92;86;90;17;55", "wc_reply_authors": "0;35;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 93.2, 53.454279529332354 ], "wc_strengths_avg": [ 112.8, 70.15810715804695 ], "wc_weaknesses_avg": [ 103.8, 128.66297058594597 ], "wc_questions_avg": [ 112.6, 115.99413778290696 ], "wc_limitations_avg": [ 18.2, 13.658696863171098 ], "wc_review_avg": [ 440.6, 69.94168999959895 ], "wc_reply_reviewers_avg": [ 68.0, 28.823601440486232 ], "wc_reply_authors_avg": [ 7.0, 14.0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8728715609439696, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2416697294366512942&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "nvidia.com;mit.edu;nvidia.com;mit.edu", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "NVIDIA;Massachusetts Institute of Technology", "aff_unique_dep": "NVIDIA Corporation;", "aff_unique_url": "https://www.nvidia.com;https://web.mit.edu", "aff_unique_abbr": "NVIDIA;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Intensity Profile Projection: A Framework for Continuous-Time Representation Learning for Dynamic Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71504", "id": "UXtLrsG4Rf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/487667c56596138d36bbaa3bd8aac6df-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UXtLrsG4Rf", "openreview": "https://openreview.net/forum?id=UXtLrsG4Rf", "poster": "/media/PosterPDFs/NeurIPS%202023/71504.png?t=1697195744.311531", "slides": "https://nips.cc/virtual/2023/poster/71504", "video": "https://nips.cc/virtual/2023/poster/71504", "author_site": "Alexander Modell, Ian Gallagher, Emma Ceccherini, Nick Whiteley, Patrick Rubin-Delanchy", "tldr": "", "abstract": "We present a new representation learning framework, Intensity Profile Projection, for continuous-time dynamic network data. Given triples $(i,j,t)$, each representing a time-stamped ($t$) interaction between two entities ($i,j$), our procedure returns a continuous-time trajectory for each node, representing its behaviour over time. The framework consists of three stages: estimating pairwise intensity functions, e.g. via kernel smoothing; learning a projection which minimises a notion of intensity reconstruction error; and constructing evolving node representations via the learned projection. The trajectories satisfy two properties, known as structural and temporal coherence, which we see as fundamental for reliable inference. Moreoever, we develop estimation theory providing tight control on the error of any estimated trajectory, indicating that the representations could even be used in quite noise-sensitive follow-on analyses. The theory also elucidates the role of smoothing as a bias-variance trade-off, and shows how we can reduce the level of smoothing as the signal-to-noise ratio increases on account of the algorithm `borrowing strength' across the network.", "keywords": "dynamic networks;representation learning;spectral methods", "primary_area": "", "supplementary_material": "", "author": "Alexander Modell;Ian Gallagher;Emma Ceccherini;Nick Whiteley;Patrick Rubin-Delanchy", "authorids": "~Alexander_Modell1;~Ian_Gallagher1;gs22311@bristol.ac.uk;~Nick_Whiteley1;~Patrick_Rubin-Delanchy1", "gender": "M;M;;;M", "homepage": "http://alexandermodell.github.io/;https://www.iangallagher.uk/;;;https://people.maths.bris.ac.uk/~pr12244/", "dblp": ";;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;-G_f2hIAAAAJ;;;https://scholar.google.co.uk/citations?user=a5ZrPkAAAAAJ", "orcid": "0000-0002-0074-8145;0009-0006-7537-8317;;;", "linkedin": ";;;;", "or_profile": "~Alexander_Modell1;~Ian_Gallagher1;gs22311@bristol.ac.uk;~Nick_Whiteley1;~Patrick_Rubin-Delanchy1", "aff": "University of Bristol;University of Bristol;;;", "aff_domain": "bristol.ac.uk;bristol.ac.uk;;;", "position": "PhD student;Postdoc;;;", "bibtex": "@inproceedings{\nmodell2023intensity,\ntitle={Intensity Profile Projection: A Framework for Continuous-Time Representation Learning for Dynamic Networks},\nauthor={Alexander Modell and Ian Gallagher and Emma Ceccherini and Nick Whiteley and Patrick Rubin-Delanchy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UXtLrsG4Rf}\n}", "github": "", "project": "", "reviewers": "fL1b;QsMj;L5U9;SnVH", "pdf_size": 1016900, "rating": "3;4;7;8", "confidence": "3;4;4;4", "soundness": "2;4;3;4", "novelty": "2;2;3;4", "presentation": "2;4;3;4", "wc_summary": "115;103;114;151", "wc_strengths": "21;30;71;64", "wc_weaknesses": "134;106;203;268", "wc_questions": "1;68;23;57", "wc_limitations": "1;32;9;36", "wc_review": "272;339;420;576", "wc_reply_reviewers": "0;0;64;37", "wc_reply_authors": "0;0;122;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.5, 2.0615528128088303 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 120.75, 18.08832496390973 ], "wc_strengths_avg": [ 46.5, 21.383404780343096 ], "wc_weaknesses_avg": [ 177.75, 62.93796548983769 ], "wc_questions_avg": [ 37.25, 26.705570579937064 ], "wc_limitations_avg": [ 19.5, 14.84082207965583 ], "wc_review_avg": [ 401.75, 113.4336259669063 ], "wc_reply_reviewers_avg": [ 25.25, 26.994212342648563 ], "wc_reply_authors_avg": [ 30.5, 52.827549630850754 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7001400420140048, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8664040887944644017&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "bristol.ac.uk;bristol.ac.uk;;;", "author_num": 5, "aff_unique_index": "0;0", "aff_unique_norm": "University of Bristol", "aff_unique_dep": "", "aff_unique_url": "https://www.bristol.ac.uk", "aff_unique_abbr": "Bristol", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Decompose Novel into Known: Part Concept Learning For 3D Novel Class Discovery", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71503", "id": "UYl9IIsjq7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aa31eee8f2351176ddd4d14646d4a950-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UYl9IIsjq7", "openreview": "https://openreview.net/forum?id=UYl9IIsjq7", "poster": "/media/PosterPDFs/NeurIPS%202023/71503.png?t=1701761365.330506", "slides": "https://nips.cc/virtual/2023/poster/71503", "video": "https://nips.cc/virtual/2023/poster/71503", "author_site": "Tingyu Weng, Jun Xiao, Haiyong Jiang", "tldr": "", "abstract": "In this work, we address 3D novel class discovery (NCD) that discovers novel classes from an unlabeled dataset by leveraging the knowledge of disjoint known classes. The key challenge of 3D NCD is that learned features by known class recognition are heavily biased and hinder generalization to novel classes. Since geometric parts are more generalizable across different classes, we propose to decompose novel into known parts, coined DNIK, to mitigate the above problems. DNIK learns a part concept bank encoding rich part geometric patterns from known classes so that novel 3D shapes can be represented as part concept compositions to facilitate cross-category generalization. Moreover, we formulate three constraints on part concepts to ensure diverse part concepts without collapsing. A part relation encoding module (PRE) is also developed to leverage part-wise spatial relations for better recognition. We construct three 3D NCD tasks for evaluation and extensive experiments show that our method achieves significantly superior results than SOTA baselines (+11.7%, +14.1%, and +16.3% improvements on average for three tasks, respectively). Code and data will be released.", "keywords": "3D point clouds;3D recognition;part-based representation;unsupervised class discovery", "primary_area": "", "supplementary_material": "/attachment/0ecdd7cfeb385fd42868d9faaddcad9e221c0f41.zip", "author": "Tingyu Weng;Jun Xiao;Haiyong Jiang", "authorids": "~Tingyu_Weng1;~Jun_Xiao4;~Haiyong_Jiang1", "gender": "M;M;", "homepage": "http://none.com;http://people.ucas.ac.cn/~0006716?language=en;", "dblp": "258/7035.html;71/2308-5;", "google_scholar": ";;", "orcid": "0000-0003-4760-5552;0000-0002-1799-3948;", "linkedin": ";;", "or_profile": "~Tingyu_Weng1;~Jun_Xiao4;~Haiyong_Jiang1", "aff": "University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;", "aff_domain": "ucas.ac.cn;ucas.edu.cn;", "position": "PhD student;Full Professor;", "bibtex": "@inproceedings{\nweng2023decompose,\ntitle={Decompose Novel into Known: Part Concept Learning For 3D Novel Class Discovery},\nauthor={Tingyu Weng and Jun Xiao and Haiyong Jiang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UYl9IIsjq7}\n}", "github": "", "project": "", "reviewers": "mbrM;qzzG;zZT9;eZuw;dey2", "pdf_size": 6431732, "rating": "5;5;5;6;6", "confidence": "5;4;5;5;3", "soundness": "2;3;4;4;3", "novelty": "3;3;3;3;3", "presentation": "4;3;3;4;3", "wc_summary": "68;162;86;129;74", "wc_strengths": "58;90;52;121;33", "wc_weaknesses": "86;154;202;362;40", "wc_questions": "88;6;82;38;3", "wc_limitations": "58;51;26;6;1", "wc_review": "358;463;448;656;151", "wc_reply_reviewers": "397;114;82;553;32", "wc_reply_authors": "981;237;0;1633;25", "reply_reviewers": "3;2;1;4;1", "reply_authors": "5;3;1;6;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 4.4, 0.8 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 103.8, 36.07991130809498 ], "wc_strengths_avg": [ 70.8, 31.09598044763985 ], "wc_weaknesses_avg": [ 168.8, 111.43859295594144 ], "wc_questions_avg": [ 43.4, 36.16407056734626 ], "wc_limitations_avg": [ 28.4, 23.000869548780106 ], "wc_review_avg": [ 415.2, 163.96267868024114 ], "wc_reply_reviewers_avg": [ 235.6, 203.285611886331 ], "wc_reply_authors_avg": [ 575.2, 637.4274546958266 ], "reply_reviewers_avg": [ 2.2, 1.16619037896906 ], "reply_authors_avg": [ 3.4, 1.854723699099141 ], "replies_avg": [ 37, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10732506057916153311&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ucas.ac.cn;ucas.edu.cn;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Chinese Academy of Sciences", "aff_unique_dep": "", "aff_unique_url": "http://www.ucas.ac.cn", "aff_unique_abbr": "UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "UZTpkfw0aC", "title": "Generative modeling for RNA splicing code predictions and design", "track": "main", "status": "Reject", "tldr": "", "abstract": "Alternative splicing (AS) of pre-mRNA splicing is a highly regulated process with diverse phenotypic effects ranging from changes in AS across tissues to numerous diseases. The ability to predict or manipulate AS has therefore been a long time goal in the RNA field with applications ranging from identifying novel regulatory mechanisms to designing therapeutic targets. Here we take advantage of generative model architectures to address both the prediction and design of RNA splicing condition-specific outcome. First, we construct a predictive model, TrASPr, which combines multiple transformers along with side information to predict splicing in a tissue specific manner. Then, we exploit TrASPr as on Oracle to produce labeled data for a Bayesian Optimization (BO) algorithm with a costume loss function for RNA splicing outcome design. We demonstrate TrASPr significantly outperforms recently published models and that it can identify relevant regulatory features which are also captured by the BO generative process.", "keywords": "RNA Splicing;Computational Biology;Transformers;Deep Learning;Bayesian Optimization;RNA Design", "primary_area": "", "supplementary_material": "", "author": "Di Wu;Anupama Jha;San Jewell;Natalie Maus;Jacob R. Gardner;Yoseph Barash", "authorids": "~Di_Wu27;~Anupama_Jha1;sjewell@pennmedicine.upenn.edu;~Natalie_Maus1;~Jacob_R._Gardner1;~Yoseph_Barash1", "gender": "M;F;;F;;M", "homepage": "https://www.biociphers.org/copy-of-joseph-aicher-lab-member;https://anupamajha.com/;;https://sites.google.com/seas.upenn.edu/natalie-maus/;;https://www.biociphers.org/", "dblp": ";209/7336;;264/7932;;https://dblp.uni-trier.de/pers/hd/b/Barash:Yoseph", "google_scholar": "kB6mIMkAAAAJ;aIhgsDIAAAAJ;;hNRd6lsAAAAJ;;", "orcid": ";0000-0003-3029-2086;;;;0000-0003-3005-5048", "linkedin": ";;;natalie-maus-14b936178/;;", "or_profile": "~Di_Wu27;~Anupama_Jha1;sjewell@pennmedicine.upenn.edu;~Natalie_Maus1;~Jacob_R._Gardner1;~Yoseph_Barash1", "aff": "University of Pennsylvania;University of Washington;;University of Pennsylvania;;University of Pennsylvania", "aff_domain": "upenn.edu;uw.edu;;upenn.edu;;upenn.edu", "position": "PhD student;Postdoc;;PhD student;;Associate Professor", "bibtex": "@misc{\nwu2023generative,\ntitle={Generative modeling for {RNA} splicing code predictions and design},\nauthor={Di Wu and Anupama Jha and San Jewell and Natalie Maus and Jacob R. Gardner and Yoseph Barash},\nyear={2023},\nurl={https://openreview.net/forum?id=UZTpkfw0aC}\n}", "github": "", "project": "", "reviewers": "aUkC;XeDB;wcPZ;Kz6p;LDfP", "site": "https://openreview.net/forum?id=UZTpkfw0aC", "pdf_size": 1481141, "rating": "4;5;5;6;6", "confidence": "3;4;3;3;4", "soundness": "1;2;2;3;3", "novelty": "1;2;2;3;2", "presentation": "2;2;3;3;3", "wc_summary": "99;25;62;108;59", "wc_strengths": "76;93;27;176;54", "wc_weaknesses": "237;92;107;227;192", "wc_questions": "239;259;7;58;139", "wc_limitations": "140;30;49;5;71", "wc_review": "791;499;252;574;515", "wc_reply_reviewers": "393;19;0;73;63", "wc_reply_authors": "776;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.2, 0.7483314773547882 ], "novelty_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 70.6, 29.977324763894458 ], "wc_strengths_avg": [ 85.2, 50.499108903029175 ], "wc_weaknesses_avg": [ 171.0, 60.44832503882965 ], "wc_questions_avg": [ 140.4, 98.36178119574696 ], "wc_limitations_avg": [ 59.0, 45.9608529076648 ], "wc_review_avg": [ 526.2, 172.31064969989524 ], "wc_reply_reviewers_avg": [ 109.6, 144.25477461768813 ], "wc_reply_authors_avg": [ 155.2, 310.4 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3273268353539886, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13097030538542263973&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Pennsylvania;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.upenn.edu;https://www.washington.edu", "aff_unique_abbr": "UPenn;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Model-enhanced Vector Index", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71502", "id": "UZlAjSnmvB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ac112e8ffc4e5b9ece32070440a8ca43-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UZlAjSnmvB", "openreview": "https://openreview.net/forum?id=UZlAjSnmvB", "poster": "/media/PosterPDFs/NeurIPS%202023/71502.png?t=1698462833.0029595", "slides": "https://nips.cc/virtual/2023/poster/71502", "video": "https://nips.cc/virtual/2023/poster/71502", "author_site": "Hailin Zhang, Yujing Wang, Qi Chen, Ruiheng Chang, Ting Zhang, Ziming Miao, Yingyan Hou, Yang Ding, Xupeng Miao, Haonan Wang, Bochen Pang, Yuefeng Zhan, Hao Sun, Weiwei Deng, Qi Zhang, Fan Yang, Xing Xie, Mao Yang, Bin CUI", "tldr": "", "abstract": "Embedding-based retrieval methods construct vector indices to search for document representations that are most similar to the query representations. They are widely used in document retrieval due to low latency and decent recall performance. Recent research indicates that deep retrieval solutions offer better model quality, but are hindered by unacceptable serving latency and the inability to support document updates. In this paper, we aim to enhance the vector index with end-to-end deep generative models, leveraging the differentiable advantages of deep retrieval models while maintaining desirable serving efficiency. We propose Model-enhanced Vector Index (MEVI), a differentiable model-enhanced index empowered by a twin-tower representation model. MEVI leverages a Residual Quantization (RQ) codebook to bridge the sequence-to-sequence deep retrieval and embedding-based models. To substantially reduce the inference time, instead of decoding the unique document ids in long sequential steps, we first generate some semantic virtual cluster ids of candidate documents in a small number of steps, and then leverage the well-adapted embedding vectors to further perform a fine-grained search for the relevant documents in the candidate virtual clusters. We empirically show that our model achieves better performance on the commonly used academic benchmarks MSMARCO Passage and Natural Questions, with comparable serving latency to dense retrieval solutions.", "keywords": "document retrieval;model-based index;dense retrieval;residual quantization", "primary_area": "", "supplementary_material": "", "author": "Hailin Zhang;Yujing Wang;Qi Chen;Ruiheng Chang;Ting Zhang;Ziming Miao;Yingyan Hou;Yang Ding;Xupeng Miao;Haonan Wang;Bochen Pang;Yuefeng Zhan;Hao Sun;Weiwei Deng;Qi Zhang;Fan Yang;Xing Xie;Mao Yang;Bin CUI", "authorids": "~Hailin_Zhang2;~Yujing_Wang1;~Qi_Chen2;~Ruiheng_Chang2;~Ting_Zhang7;~Ziming_Miao1;~Yingyan_Hou1;~Yang_Ding2;~Xupeng_Miao1;~Haonan_Wang1;~Bochen_Pang1;~Yuefeng_Zhan1;~Hao_Sun6;~Weiwei_Deng2;~Qi_Zhang19;~Fan_Yang28;~Xing_Xie3;~Mao_Yang1;~Bin_CUI2", "gender": ";F;F;M;M;F;M;M;M;M;;M;M;M;M;M;;M;F", "homepage": "https://hugozhl.github.io/;;https://www.microsoft.com/en-us/research/people/cheqi/;;;;;https://hsword.github.io;http://charles-haonan-wang.me/;;;;;;https://fanyangcs.github.io/;http://research.microsoft.com/en-us/people/xingx/;;https://cuibinpku.github.io/index.html;https://hellozting.github.io/", "dblp": "04/1131-4;16/4075;66/6320-9;;;322/1065;;243/2364;;291/3389.html;331/1573;;311/3565.html;;29/3081-24.html;08/6809-1;;55/5031.html;06/5919-2", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?view_op=list_works;;czR56GQAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;aCAgdYkAAAAJ;cLziVZMAAAAJ;u5qJq8AAAAAJ;dQIMlM0AAAAJ;OjWD_SsAAAAJ;;;https://scholar.google.com/citations?hl=en;5EQfAFIAAAAJ;LgJqohwAAAAJ;IJAU8KoAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0000-4188-7742;;0009-0006-7394-0185;;;0000-0002-2789-8581;;0000-0002-9371-8358;0009-0006-6963-8987;;0009-0007-5902-417X;0009-0004-5027-7478;0009-0001-4793-9715;;0000-0002-0378-060X;0000-0002-8608-8482;;0000-0003-1681-4677;", "linkedin": ";;;;ziming-miao-3771b714b/;;;;;;yuefengzhan/;;;qizhang07/;;xingx/;;;", "or_profile": "~Hailin_Zhang2;~Yujing_Wang1;~Qi_Chen2;~Ruiheng_Chang2;~Ziming_Miao1;~Yingyan_Hou1;~Yang_Ding2;~Xupeng_Miao1;~Haonan_Wang1;~Bochen_Pang1;~Yuefeng_Zhan1;~Hao_Sun6;~Weiwei_Deng2;~Qi_Zhang19;~Fan_Yang28;~Xing_Xie3;~Mao_Yang1;~Bin_CUI2;~ting_zhang6", "aff": "Peking University;Microsoft;Microsoft Research;Microsoft;Microsoft;Tsinghua University;Institute of Information Engineering, CAS;Carnegie Mellon University;National University of Singapore;Microsoft;Microsoft;Microsoft;Microsoft;Microsoft;Microsoft Research;Microsoft Research Asia;;Peking University;Microsoft", "aff_domain": "pku.edu.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com;tsinghua.edu.cn;iie.ac.cn;cmu.edu;u.nus.edu;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;research.microsoft.com;microsoft.com;;pku.edu.cn;microsoft.com", "position": "PhD student;Software Engineering Manager;Principal Researcher;Applied Scientist;Researcher;MS student;MS student;Postdoc;PhD student;SDE;Principal Applied Scientist;Researcher;Researcher;Researcher;Senior Principal Researcher;Senior Principal Researcher;;Full Professor;Researcher", "bibtex": "@inproceedings{\nzhang2023modelenhanced,\ntitle={Model-enhanced Vector Index},\nauthor={Hailin Zhang and Yujing Wang and Qi Chen and Ruiheng Chang and Ting Zhang and Ziming Miao and Yingyan Hou and Yang Ding and Xupeng Miao and Haonan Wang and Bochen Pang and Yuefeng Zhan and Hao Sun and Weiwei Deng and Qi Zhang and Fan Yang and Xing Xie and Mao Yang and Bin CUI},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UZlAjSnmvB}\n}", "github": "", "project": "", "reviewers": "v1wc;KfyS;aDGQ;c7Dg;VZqD", "pdf_size": 570633, "rating": "4;5;6;6;7", "confidence": "3;4;5;5;4", "soundness": "2;3;3;3;3", "novelty": "3;3;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "74;71;102;129;56", "wc_strengths": "55;68;84;77;49", "wc_weaknesses": "85;319;119;208;232", "wc_questions": "109;3;37;39;19", "wc_limitations": "41;1;38;1;1", "wc_review": "364;462;380;454;357", "wc_reply_reviewers": "0;50;90;28;115", "wc_reply_authors": "0;37;475;0;84", "reply_reviewers": "0;1;2;1;1", "reply_authors": "1;2;2;1;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.4, 25.973832986296035 ], "wc_strengths_avg": [ 66.6, 13.093509842666327 ], "wc_weaknesses_avg": [ 192.6, 83.38009354756086 ], "wc_questions_avg": [ 41.4, 36.252448193191036 ], "wc_limitations_avg": [ 16.4, 18.8849146145806 ], "wc_review_avg": [ 403.4, 45.27074110283595 ], "wc_reply_reviewers_avg": [ 56.6, 41.45165859166555 ], "wc_reply_authors_avg": [ 119.2, 180.55846698507386 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 19, 0 ], "corr_rating_confidence": 0.6289709020331512, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16848683721935699264&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "pku.edu.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com;tsinghua.edu.cn;iie.ac.cn;cmu.edu;u.nus.edu;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;research.microsoft.com;microsoft.com;;pku.edu.cn;microsoft.com", "author_num": 19, "aff_unique_index": "0;1;1;1;1;2;3;4;5;1;1;1;1;1;1;1;0;1", "aff_unique_norm": "Peking University;Microsoft;Tsinghua University;Chinese Academy of Sciences;Carnegie Mellon University;National University of Singapore", "aff_unique_dep": ";Microsoft Corporation;;Institute of Information Engineering;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com;https://www.tsinghua.edu.cn;http://www.cas.cn;https://www.cmu.edu;https://www.nus.edu.sg", "aff_unique_abbr": "Peking U;Microsoft;THU;CAS;CMU;NUS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;1;1;1;0;0;1;2;1;1;1;1;1;1;0;0;1", "aff_country_unique": "China;United States;Singapore" }, { "title": "Active Negative Loss Functions for Learning with Noisy Labels", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71501", "id": "Uafbv4rfJc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/15f4cefb0e143c7ad9d40e879b0a9d0c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Uafbv4rfJc", "openreview": "https://openreview.net/forum?id=Uafbv4rfJc", "poster": "/media/PosterPDFs/NeurIPS%202023/71501.png?t=1701872789.0814018", "slides": "https://nips.cc/virtual/2023/poster/71501", "video": "https://nips.cc/virtual/2023/poster/71501", "author_site": "Xichen Ye, Xiaoqiang Li, songmin dai, Tong Liu, Yan Sun, Weiqin Tong", "tldr": "", "abstract": "Robust loss functions are essential for training deep neural networks in the presence of noisy labels. Some robust loss functions use Mean Absolute Error (MAE) as its necessary component. For example, the recently proposed Active Passive Loss (APL) uses MAE as its passive loss function. However, MAE treats every sample equally, slows down the convergence and can make training difficult. In this work, we propose a new class of theoretically robust passive loss functions different from MAE, namely *Normalized Negative Loss Functions* (NNLFs), which focus more on memorized clean samples. By replacing the MAE in APL with our proposed NNLFs, we improve APL and propose a new framework called *Active Negative Loss* (ANL). Experimental results on benchmark and real-world datasets demonstrate that the new set of loss functions created by our ANL framework can outperform state-of-the-art methods. The code is available at\nhttps://github.com/Virusdoll/Active-Negative-Loss.", "keywords": "noisy label learning;robust loss function;multiclass classification;computer vision", "primary_area": "", "supplementary_material": "/attachment/7c76402ac4194165c69eafc43b0a25c9d1411b8e.zip", "author": "Xichen Ye;Xiaoqiang Li;Songmin Dai;Tong Liu;Yan Sun;Weiqin Tong", "authorids": "~Xichen_Ye1;~Xiaoqiang_Li2;~Songmin_Dai3;~Tong_Liu5;~Yan_Sun9;~Weiqin_Tong3", "gender": "M;M;F;Not Specified;M;M", "homepage": ";https://cs.shu.edu.cn/szdw/jsxx.htm;https://liutong-lab.github.io/;https://yansun-shu.github.io/;https://www.shu.edu.cn/info/1611/75785.htm;", "dblp": "352/8650;;;;;230/3847", "google_scholar": ";JGm4z4YAAAAJ;;;;a7JUqrQAAAAJ", "orcid": ";0000-0001-7243-2783;;;;", "linkedin": "yexichen0930;;;;;", "or_profile": "~Xichen_Ye1;~Xiaoqiang_Li2;~Tong_Liu5;~Yan_Sun9;~Weiqin_Tong3;~songmin_dai2", "aff": "Shanghai University;shanghai university;Shanghai University;Shanghai University;Shanghai University;", "aff_domain": "shu.edu.cn;shu.edu.cn;shu.edu.cn;shu.edu.cn;shu.edu;", "position": "MS student;Associate Professor;Associate Professor;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nye2023active,\ntitle={Active Negative Loss Functions for Learning with Noisy Labels},\nauthor={Xichen Ye and Xiaoqiang Li and Songmin Dai and Tong Liu and Yan Sun and Weiqin Tong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Uafbv4rfJc}\n}", "github": "", "project": "", "reviewers": "j1Cc;u2rQ;abGG;btFc", "pdf_size": 955358, "rating": "5;6;7;7", "confidence": "4;5;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "121;24;45;103", "wc_strengths": "53;50;23;43", "wc_weaknesses": "160;310;15;89", "wc_questions": "2;4;89;56", "wc_limitations": "10;10;4;1", "wc_review": "346;398;176;292", "wc_reply_reviewers": "104;75;19;18", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.25, 39.96482828688246 ], "wc_strengths_avg": [ 42.25, 11.691342951089922 ], "wc_weaknesses_avg": [ 143.5, 108.94608758463977 ], "wc_questions_avg": [ 37.75, 36.663162711364656 ], "wc_limitations_avg": [ 6.25, 3.897114317029974 ], "wc_review_avg": [ 303.0, 82.34682750416071 ], "wc_reply_reviewers_avg": [ 54.0, 36.952672433803755 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16083915228251281124&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "shu.edu.cn;shu.edu.cn;shu.edu.cn;shu.edu.cn;shu.edu;", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Shanghai University", "aff_unique_dep": "", "aff_unique_url": "https://www.shu.edu.cn", "aff_unique_abbr": "SHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Identification of Nonlinear Latent Hierarchical Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71500", "id": "Uc5yyiytR1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/065ef23a944b3995de7dd4a3e203d133-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Uc5yyiytR1", "openreview": "https://openreview.net/forum?id=Uc5yyiytR1", "poster": "/media/PosterPDFs/NeurIPS%202023/71500.png?t=1701833261.7024012", "slides": "https://nips.cc/virtual/2023/poster/71500", "video": "https://nips.cc/virtual/2023/poster/71500", "author_site": "Lingjing Kong, Biwei Huang, Feng Xie, Eric Xing, Yuejie Chi, Kun Zhang", "tldr": "", "abstract": "Identifying latent variables and causal structures from observational data is essential to many real-world applications involving biological data, medical data, and unstructured data such as images and languages. However, this task can be highly challenging, especially when observed variables are generated by causally related latent variables and the relationships are nonlinear. \n\nIn this work, we investigate the identification problem for nonlinear latent hierarchical causal models in which observed variables are generated by a set of causally related latent variables, and some latent variables may not have observed children. We show that the identifiability of causal structures and latent variables (up to invertible transformations) can be achieved under mild assumptions: on causal structures, we allow for multiple paths between any pair of variables in the graph, which relaxes latent tree assumptions in prior work; on structural functions, we permit general nonlinearity and multi-dimensional continuous variables, alleviating existing work's parametric assumptions. Specifically, we first develop an identification criterion in the form of novel identifiability guarantees for an elementary latent variable model. Leveraging this criterion, we show that both causal structures and latent variables of the hierarchical model can be identified asymptotically by explicitly constructing an estimation procedure. To the best of our knowledge, our work is the first to establish identifiability guarantees for both causal structures and latent variables in nonlinear latent hierarchical models.", "keywords": "Causal discovery;causal representation learning;latent variable models;causal structure learning;causal identifiability.", "primary_area": "", "supplementary_material": "", "author": "Lingjing Kong;Biwei Huang;Feng Xie;Eric Xing;Yuejie Chi;Kun Zhang", "authorids": "~Lingjing_Kong1;~Biwei_Huang1;~Feng_Xie1;~Eric_Xing1;~Yuejie_Chi1;~Kun_Zhang1", "gender": "M;F;M;M;;M", "homepage": "https://lingjing-kong.github.io/;;https://fengxie.site/;http://www.cs.cmu.edu/~epxing/;;http://www.andrew.cmu.edu/user/kunz1/", "dblp": "158/1994-1.html;165/3288;11/4605-2;36/3855;;96/3115-1", "google_scholar": "4hAlzvkAAAAJ;;stLFCtQAAAAJ;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ;;RGoypN4AAAAJ", "orcid": ";;0000-0001-7229-3955;;;", "linkedin": ";;;;;", "or_profile": "~Lingjing_Kong1;~Biwei_Huang1;~Feng_Xie1;~Eric_Xing1;~Yuejie_Chi1;~Kun_Zhang1", "aff": "Computer Science Department, School of Computer Science;University of California, San Diego;Beijing Technology and Business University;School of Computer Science, Carnegie Mellon University;;Carnegie Mellon University", "aff_domain": "csd.cs.cmu.edu;ucsd.edu;btbu.edu.cn;cs.cmu.edu;;cmu.edu", "position": "PhD student;Assistant Professor;Associate Professor;Full Professor;;Associate Professor", "bibtex": "@inproceedings{\nkong2023identification,\ntitle={Identification of Nonlinear Latent Hierarchical Models},\nauthor={Lingjing Kong and Biwei Huang and Feng Xie and Eric Xing and Yuejie Chi and Kun Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Uc5yyiytR1}\n}", "github": "", "project": "", "reviewers": "jcJm;mbLW;wEjX;gbV3", "pdf_size": 1064925, "rating": "4;5;5;7", "confidence": "4;3;4;3", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;2;3", "wc_summary": "90;45;56;175", "wc_strengths": "19;17;39;191", "wc_weaknesses": "573;78;91;135", "wc_questions": "42;48;451;8", "wc_limitations": "4;7;30;30", "wc_review": "728;195;667;539", "wc_reply_reviewers": "464;411;113;15", "wc_reply_authors": "1099;1567;194;13", "reply_reviewers": "1;4;1;1", "reply_authors": "5;6;3;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 91.5, 50.98284025042151 ], "wc_strengths_avg": [ 66.5, 72.39302452584779 ], "wc_weaknesses_avg": [ 219.25, 205.3270257418638 ], "wc_questions_avg": [ 137.25, 181.78472845649054 ], "wc_limitations_avg": [ 17.75, 12.295832627357937 ], "wc_review_avg": [ 532.25, 206.31211185967732 ], "wc_reply_reviewers_avg": [ 250.75, 190.85907759391483 ], "wc_reply_authors_avg": [ 718.25, 639.8364537129781 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 4.0, 1.5811388300841898 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1600845569253375108&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "csd.cs.cmu.edu;ucsd.edu;btbu.edu.cn;cs.cmu.edu;;cmu.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "School of Computer Science;University of California, San Diego;Beijing Technology and Business University;Carnegie Mellon University", "aff_unique_dep": "Computer Science Department;;;School of Computer Science", "aff_unique_url": ";https://www.ucsd.edu;http://www.btbu.edu.cn;https://www.cmu.edu", "aff_unique_abbr": ";UCSD;BTBU;CMU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";San Diego;Pittsburgh", "aff_country_unique_index": "1;2;1;1", "aff_country_unique": ";United States;China" }, { "title": "Generating Images with Multimodal Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71499", "id": "Uczck6TlSZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/43a69d143273bd8215578bde887bb552-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Uczck6TlSZ", "openreview": "https://openreview.net/forum?id=Uczck6TlSZ", "poster": "/media/PosterPDFs/NeurIPS%202023/71499.png?t=1698460413.877531", "slides": "https://nips.cc/virtual/2023/poster/71499", "video": "https://nips.cc/virtual/2023/poster/71499", "author_site": "Jing Yu Koh, Daniel Fried, Russ Salakhutdinov", "tldr": "", "abstract": "We propose a method to fuse frozen text-only large language models (LLMs) with pre-trained image encoder and decoder models, by mapping between their embedding spaces. Our model demonstrates a wide suite of multimodal capabilities: image retrieval, novel image generation, and multimodal dialogue. Ours is the first approach capable of conditioning on arbitrarily interleaved image and text inputs to generate coherent image (and text) outputs. To achieve strong performance on image generation, we propose an efficient mapping network to ground the LLM to an off-the-shelf text-to-image generation model. This mapping network translates hidden representations of text into the embedding space of the visual models, enabling us to leverage the strong text representations of the LLM for visual outputs. Our approach outperforms baseline generation models on tasks with longer and more complex language. In addition to novel image generation, our model is also capable of image retrieval from a prespecified dataset, and decides whether to retrieve or generate at inference time. This is done with a learnt decision module which conditions on the hidden representations of the LLM. Our model exhibits a wider range of capabilities compared to prior multimodal language models. It can process image-and-text inputs, and produce retrieved images, generated images, and generated text \u2014 outperforming non-LLM based generation models across several text-to-image tasks that measure context dependence.", "keywords": "multimodal;vision-and-language;language models", "primary_area": "", "supplementary_material": "/attachment/543589e862bf92dab570a4f5af209fc1f5526923.pdf", "author": "Jing Yu Koh;Daniel Fried;Ruslan Salakhutdinov", "authorids": "~Jing_Yu_Koh2;~Daniel_Fried1;~Ruslan_Salakhutdinov1", "gender": "M;M;M", "homepage": "https://dpfried.github.io/;https://jykoh.com;https://www.cs.cmu.edu/~rsalakhu/", "dblp": "117/4804;182/2222;", "google_scholar": "sJDqACEAAAAJ;iGLKl7cAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Daniel_Fried1;~Jing_Yu_Koh1;~Russ_Salakhutdinov1", "aff": "Carnegie Mellon University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cs.cmu.edu", "position": "Assistant Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nkoh2023generating,\ntitle={Generating Images with Multimodal Language Models},\nauthor={Jing Yu Koh and Daniel Fried and Ruslan Salakhutdinov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Uczck6TlSZ}\n}", "github": "", "project": "", "reviewers": "bnn7;Gk8n;QYwk;5sAf", "pdf_size": 5024254, "rating": "5;6;7;8", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "3;4;3;4", "presentation": "3;2;2;3", "wc_summary": "133;96;38;64", "wc_strengths": "13;111;121;125", "wc_weaknesses": "185;276;584;117", "wc_questions": "1;3;73;121", "wc_limitations": "139;3;20;6", "wc_review": "471;489;836;433", "wc_reply_reviewers": "0;22;77;53", "wc_reply_authors": "0;34;56;46", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 82.75, 35.54838252297846 ], "wc_strengths_avg": [ 92.5, 46.18170633486814 ], "wc_weaknesses_avg": [ 290.5, 178.5952127017967 ], "wc_questions_avg": [ 49.5, 50.44551516240071 ], "wc_limitations_avg": [ 42.0, 56.36931789546508 ], "wc_review_avg": [ 557.25, 162.20107120484747 ], "wc_reply_reviewers_avg": [ 38.0, 29.351320242878344 ], "wc_reply_authors_avg": [ 34.0, 21.118712081942874 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 281, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18125197563162628220&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "cmu.edu;cmu.edu;cs.cmu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "MoCa: Measuring Human-Language Model Alignment on Causal and Moral Judgment Tasks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71498", "id": "UdByCgCNdr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f751c6f8bfb52c60f43942896fe65904-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UdByCgCNdr", "openreview": "https://openreview.net/forum?id=UdByCgCNdr", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71498", "video": "https://nips.cc/virtual/2023/poster/71498", "author_site": "Allen Nie, Yuhui Zhang, Atharva Shailesh Amdekar, Chris Piech, Tatsunori Hashimoto, Tobias Gerstenberg", "tldr": "", "abstract": "Human commonsense understanding of the physical and social world is organized around intuitive theories. These theories support making causal and moral judgments. When something bad happens, we naturally ask: who did what, and why? A rich literature in cognitive science has studied people's causal and moral intuitions. This work has revealed a number of factors that systematically influence people's judgments, such as the violation of norms and whether the harm is avoidable or inevitable. We collected a dataset of stories from 24 cognitive science papers and developed a system to annotate each story with the factors they investigated. Using this dataset, we test whether large language models (LLMs) make causal and moral judgments about text-based scenarios that align with those of human participants. On the aggregate level, alignment has improved with more recent LLMs. However, using statistical analyses, we find that LLMs weigh the different factors quite differently from human participants. These results show how curated, challenge datasets combined with insights from cognitive science can help us go beyond comparisons based merely on aggregate metrics: we uncover LLMs implicit tendencies and show to what extent these align with human intuitions.", "keywords": "cognitive science;causal reasoning;moral reasoning;dataset;language models", "primary_area": "", "supplementary_material": "/attachment/15d148a6209d06828ceb29bbde5ffe0267302013.zip", "author": "Allen Nie;Yuhui Zhang;Atharva Amdekar;Christopher J Piech;Tatsunori Hashimoto;Tobias Gerstenberg", "authorids": "~Allen_Nie1;~Yuhui_Zhang3;~Atharva_Amdekar1;~Christopher_J_Piech1;~Tatsunori_Hashimoto1;~Tobias_Gerstenberg1", "gender": "M;M;M;M;M;M", "homepage": "https://anie.me;https://cs.stanford.edu/~yuhuiz/;http://www.amdekar.me;;https://thashim.github.io;http://cicl.stanford.edu/member/tobias_gerstenberg", "dblp": "207/7996;;;35/10987.html;;", "google_scholar": "r90OelAAAAAJ;X-Agfu8AAAAJ;EGrcQYgAAAAJ;;5ygiTwsAAAAJ;d0TfP8EAAAAJ", "orcid": ";;;;;0000-0002-9162-0779", "linkedin": ";;atharva-amdekar/;;;", "or_profile": "~Allen_Nie1;~Yuhui_Zhang3;~Atharva_Amdekar1;~Christopher_J_Piech1;~Tatsunori_Hashimoto1;~Tobias_Gerstenberg1", "aff": "Microsoft Research;Stanford University;Stanford University;;Stanford University;Stanford University", "aff_domain": "microsoft.com;stanford.edu;stanford.edu;;stanford.edu;stanford.edu", "position": "Intern;PhD student;MS student;;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nnie2023moca,\ntitle={MoCa: Measuring Human-Language Model Alignment on Causal and Moral Judgment Tasks},\nauthor={Allen Nie and Yuhui Zhang and Atharva Amdekar and Christopher J Piech and Tatsunori Hashimoto and Tobias Gerstenberg},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UdByCgCNdr}\n}", "github": "", "project": "", "reviewers": "DYrH;HXMa;7W7v;duEY;sFvB", "pdf_size": 2884363, "rating": "6;7;7;7;7", "confidence": "4;4;4;2;3", "soundness": "2;3;3;4;3", "novelty": "2;3;3;4;3", "presentation": "3;2;3;3;4", "wc_summary": "100;90;80;89;64", "wc_strengths": "112;199;57;87;44", "wc_weaknesses": "123;221;23;58;78", "wc_questions": "48;195;82;77;135", "wc_limitations": "32;206;1;12;1", "wc_review": "415;911;243;323;322", "wc_reply_reviewers": "46;31;13;40;17", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 84.6, 12.09297316626478 ], "wc_strengths_avg": [ 99.8, 54.952342989175634 ], "wc_weaknesses_avg": [ 100.6, 68.33037391965595 ], "wc_questions_avg": [ 107.4, 52.025378422458395 ], "wc_limitations_avg": [ 50.4, 78.619590433937 ], "wc_review_avg": [ 442.8, 240.3534064663948 ], "wc_reply_reviewers_avg": [ 29.4, 12.753038853543888 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.37500000000000017, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4922031930568301904&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "microsoft.com;stanford.edu;stanford.edu;;stanford.edu;stanford.edu", "author_num": 6, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Microsoft;Stanford University", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.stanford.edu", "aff_unique_abbr": "MSR;Stanford", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Double Gumbel Q-Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71497", "id": "UdaTyy0BNB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/07956d40074d6523bad11112b3225c6e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UdaTyy0BNB", "openreview": "https://openreview.net/forum?id=UdaTyy0BNB", "poster": "/media/PosterPDFs/NeurIPS%202023/71497.png?t=1699525497.69787", "slides": "https://nips.cc/virtual/2023/poster/71497", "video": "https://nips.cc/virtual/2023/poster/71497", "author_site": "David Yu-Tung Hui, Aaron Courville, Pierre-Luc Bacon", "tldr": "", "abstract": "We show that Deep Neural Networks introduce two heteroscedastic Gumbel noise sources into Q-Learning. To account for these noise sources, we propose Double Gumbel Q-Learning, a Deep Q-Learning algorithm applicable for both discrete and continuous control. In discrete control, we derive a closed-form expression for the loss function of our algorithm. In continuous control, this loss function is intractable and we therefore derive an approximation with a hyperparameter whose value regulates pessimism in Q-Learning. We present a default value for our pessimism hyperparameter that enables DoubleGum to outperform DDPG, TD3, SAC, XQL, quantile regression, and Mixture-of-Gaussian Critics in aggregate over 33 tasks from DeepMind Control, MuJoCo, MetaWorld, and Box2D and show that tuning this hyperparameter may further improve sample efficiency.", "keywords": "deep reinforcement learning;Q-Learning;TD-Learning with function approximation;extreme value theory;maximum-likelihood estimation;moment-matching", "primary_area": "", "supplementary_material": "/attachment/0fb2e7a941d10f9a0bd9e6e8c4a47fd9c2dacd0c.pdf", "author": "David Yu-Tung Hui;Aaron Courville;Pierre-Luc Bacon", "authorids": "~David_Yu-Tung_Hui1;~Aaron_Courville3;~Pierre-Luc_Bacon1", "gender": ";;", "homepage": ";;", "dblp": ";56/1688;", "google_scholar": "pXHOdMwAAAAJ;https://scholar.google.ca/citations?user=km6CP8cAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~David_Yu-Tung_Hui1;~Aaron_Courville3;~Pierre-Luc_Bacon1", "aff": ";Universit\u00e9 de Montr\u00e9al;", "aff_domain": "; ;", "position": ";Assistant Professor;", "bibtex": "@inproceedings{\nhui2023double,\ntitle={Double Gumbel Q-Learning},\nauthor={David Yu-Tung Hui and Aaron Courville and Pierre-Luc Bacon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UdaTyy0BNB}\n}", "github": "", "project": "", "reviewers": "uPm8;Mh9L;je5E;WFvy;dXQf", "pdf_size": 3322860, "rating": "4;6;6;6;6", "confidence": "4;2;4;3;4", "soundness": "2;3;2;2;3", "novelty": "2;3;3;2;2", "presentation": "2;3;3;3;3", "wc_summary": "47;33;87;138;77", "wc_strengths": "46;43;97;62;12", "wc_weaknesses": "53;69;207;176;109", "wc_questions": "391;75;12;98;180", "wc_limitations": "2;29;1;7;1", "wc_review": "539;249;404;481;379", "wc_reply_reviewers": "566;21;33;0;12", "wc_reply_authors": "1155;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "3;1;1;1;1", "rating_avg": [ 5.6, 0.7999999999999999 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 76.4, 36.48342089223542 ], "wc_strengths_avg": [ 52.0, 27.720028860013837 ], "wc_weaknesses_avg": [ 122.8, 59.794314110958744 ], "wc_questions_avg": [ 151.2, 131.4129369582767 ], "wc_limitations_avg": [ 8.0, 10.73312629199899 ], "wc_review_avg": [ 410.4, 98.58924890676468 ], "wc_reply_reviewers_avg": [ 126.4, 220.0659901029689 ], "wc_reply_authors_avg": [ 231.0, 462.0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.37500000000000017, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13986834234894434032&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "; ;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": "", "aff_unique_url": "https://www.umontreal.ca", "aff_unique_abbr": "UdeM", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "When can Regression-Adjusted Control Variate Help? Rare Events, Sobolev Embedding and Minimax Optimality", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71496", "id": "UdrybSp67L", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/730ce0ae730f39e4d77b0f04a8afe4be-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UdrybSp67L", "openreview": "https://openreview.net/forum?id=UdrybSp67L", "poster": "/media/PosterPDFs/NeurIPS%202023/71496.png?t=1699291556.749196", "slides": "https://nips.cc/virtual/2023/poster/71496", "video": "https://nips.cc/virtual/2023/poster/71496", "author_site": "Jose Blanchet, Haoxuan Chen, Yiping Lu, Yiping Lu, Lexing Ying", "tldr": "", "abstract": "This paper studies the use of a machine learning-based estimator as a control variate for mitigating the variance of Monte Carlo sampling. Specifically, we seek to uncover the key factors that influence the efficiency of control variates in reducing variance. We examine a prototype estimation problem that involves simulating the moments of a Sobolev function based on observations obtained from (random) quadrature nodes. Firstly, we establish an information-theoretic lower bound for the problem. We then study a specific quadrature rule that employs a nonparametric regression-adjusted control variate to reduce the variance of the Monte Carlo simulation. We demonstrate that this kind of quadrature rule can improve the Monte Carlo rate and achieve the minimax optimal rate under a sufficient smoothness assumption. Due to the Sobolev Embedding Theorem, the sufficient smoothness assumption eliminates the existence of rare and extreme events. Finally, we show that, in the presence of rare and extreme events, a truncated version of the Monte Carlo algorithm can achieve the minimax optimal rate while the control variate cannot improve the convergence rate.", "keywords": "Information-theoretic Lower Bounds;Sobolev Embedding Theorem;Quadrature Rule", "primary_area": "", "supplementary_material": "/attachment/1336a5b93304b1344f1429fa44c10cd7944d3840.pdf", "author": "Jose Blanchet;Haoxuan Chen;Yiping Lu;Lexing Ying", "authorids": "~Jose_Blanchet1;~Haoxuan_Chen1;~Yiping_Lu1;~Lexing_Ying1", "gender": "M;M;M;", "homepage": "https://web.stanford.edu/~jblanche/;https://haoxuanstevec00.github.io/;https://2prime.github.io/;http://web.stanford.edu/~lexing", "dblp": "75/5093.html;212/7201.html;93/683-1;68/3945", "google_scholar": "https://scholar.google.co.in/citations?user=O24CcQQAAAAJ;https://scholar.google.com/citations?hl=en;NmhvVBgAAAAJ;OwA3zyMAAAAJ", "orcid": ";0000-0002-8238-2764;;", "linkedin": "jose-blanchet;haoxuan-steve-chen-748b0a171/;;", "or_profile": "~Jose_Blanchet1;~Haoxuan_Chen1;~Yiping_Lu1;~Lexing_Ying1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "Professor;PhD student;PhD student;Professor", "bibtex": "@inproceedings{\nblanchet2023when,\ntitle={When can Regression-Adjusted Control Variate Help? Rare Events, Sobolev Embedding and Minimax Optimality},\nauthor={Jose Blanchet and Haoxuan Chen and Yiping Lu and Lexing Ying},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UdrybSp67L}\n}", "github": "", "project": "", "reviewers": "gXt5;Z34k;pkMo;Qqae", "pdf_size": 1156378, "rating": "3;5;6;7", "confidence": "3;3;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;2", "wc_summary": "17;26;191;124", "wc_strengths": "17;89;37;36", "wc_weaknesses": "18;156;34;186", "wc_questions": "15;349;118;25", "wc_limitations": "18;1;2;13", "wc_review": "85;621;382;384", "wc_reply_reviewers": "0;136;0;51", "wc_reply_authors": "0;458;0;13", "reply_reviewers": "0;1;0;1", "reply_authors": "0;2;1;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 89.5, 72.07808266040378 ], "wc_strengths_avg": [ 44.75, 26.76167969317322 ], "wc_weaknesses_avg": [ 98.5, 73.48979520994735 ], "wc_questions_avg": [ 126.75, 134.45515051495795 ], "wc_limitations_avg": [ 8.5, 7.22841614740048 ], "wc_review_avg": [ 368.0, 190.0986585960038 ], "wc_reply_reviewers_avg": [ 46.75, 55.57596153014359 ], "wc_reply_authors_avg": [ 117.75, 196.51510756173428 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.82915619758885 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.29277002188455997, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13620524818553289910&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Gigastep - One Billion Steps per Second Multi-agent Reinforcement Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73577", "id": "UgPAaEugH3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/00ba06ba5c324efdfb068865ca44cf0b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=UgPAaEugH3", "openreview": "https://openreview.net/forum?id=UgPAaEugH3", "poster": "/media/PosterPDFs/NeurIPS%202023/73577.png?t=1701723349.1656482", "slides": "https://nips.cc/virtual/2023/poster/73577", "video": "https://nips.cc/virtual/2023/poster/73577", "author_site": "Mathias Lechner, lianhao yin, Tim Seyde, Tsun-Hsuan Johnson Wang, Wei Xiao, Ramin Hasani, Joshua Rountree, Daniela Rus", "tldr": "", "abstract": "Multi-agent reinforcement learning (MARL) research is faced with a trade-off: it either uses complex environments requiring large compute resources, which makes it inaccessible to researchers with limited resources, or relies on simpler dynamics for faster execution, which makes the transferability of the results to more realistic tasks challenging. Motivated by these challenges, we present Gigastep, a fully vectorizable, MARL environment implemented in JAX, capable of executing up to one billion environment steps per second on consumer-grade hardware. Its design allows for comprehensive MARL experimentation, including a complex, high-dimensional space defined by 3D dynamics, stochasticity, and partial observations. Gigastep supports both collaborative and adversarial tasks, continuous and discrete action spaces, and provides RGB image and feature vector observations, allowing the evaluation of a wide range of MARL algorithms. \nWe validate Gigastep's usability through an extensive set of experiments, underscoring its role in widening participation and promoting inclusivity in the MARL research community.", "keywords": "Multi-agent reinforcement learning;JAX", "primary_area": "", "supplementary_material": "/attachment/ee34cafa3aed4fcfa26caab2f97c320fa4643483.pdf", "author": "Mathias Lechner;Lianhao Yin;Tim Seyde;Tsun-Hsuan Wang;Wei Xiao;Ramin Hasani;Joshua Rountree;Daniela Rus", "authorids": "~Mathias_Lechner1;~Lianhao_Yin1;~Tim_Seyde1;~Tsun-Hsuan_Wang2;~Wei_Xiao2;~Ramin_Hasani1;~Joshua_Rountree1;~Daniela_Rus1", "gender": "Unspecified;M;;M;M;;F;M", "homepage": "https://mlech26l.github.io/pages/;https://www.csail.mit.edu/person/lianhao-yin;;https://zswang666.github.io/;;https://www.aiaccelerator.af.mil/AIA-Team/Article-View/Article/3303034/josh-rountree/;https://www.csail.mit.edu/person/daniela-rus;http://www.raminhasani.com", "dblp": "209/9862;;226/6408;217/1809.html;20/4794-3;;r/DanielaRus;190/3168", "google_scholar": "https://scholar.google.at/citations?hl=en;;FJ7ILzkAAAAJ;xE3WSuYAAAAJ;BxdZJNQAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.at/citations?user=YarJF3QAAAAJ", "orcid": ";;;;;;;0000-0002-9889-5222", "linkedin": ";;;;;;;raminhasani/", "or_profile": "~Mathias_Lechner1;~Lianhao_Yin1;~Tim_Seyde1;~Tsun-Hsuan_Wang2;~Wei_Xiao2;~Joshua_Rountree1;~Daniela_Rus1;~Ramin_M._Hasani1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;;mit.edu;mit.edu", "position": "Postdoc;Postdoc;Student;PhD student;Postdoc;;Full Professor;Researcher", "bibtex": "@inproceedings{\nlechner2023gigastep,\ntitle={Gigastep - One Billion Steps per Second Multi-agent Reinforcement Learning},\nauthor={Mathias Lechner and Lianhao Yin and Tim Seyde and Tsun-Hsuan Wang and Wei Xiao and Ramin Hasani and Joshua Rountree and Daniela Rus},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=UgPAaEugH3}\n}", "github": "", "project": "", "reviewers": "rWA8;6dKh;zXRT;PUtM", "pdf_size": 932346, "rating": "3;7;7;9", "confidence": "4;3;4;4", "wc_summary_and_contributions": "160;121;57;51", "wc_strengths": "115;132;63;82", "wc_improvement": "218;112;8;50", "wc_limitations": "293;10;8;5", "wc_correctness": "3;7;12;8", "wc_clarity": "33;6;8;1", "wc_relation_to_prior_work": "10;17;4;9", "wc_documentation": "1;11;6;9", "wc_additional_feedback": "1;1;1;1", "wc_review": "834;417;167;216", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 6.5, 2.179449471770337 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 97.25, 45.44433408027892 ], "wc_strengths_avg": [ 98.0, 27.046256672597043 ], "wc_improvement_avg": [ 97.0, 79.05061669588669 ], "wc_limitations_avg": [ 79.0, 123.56577195971383 ], "wc_correctness_avg": [ 7.5, 3.2015621187164243 ], "wc_clarity_avg": [ 12.0, 12.389511693363866 ], "wc_relation_to_prior_work_avg": [ 10.0, 4.636809247747852 ], "wc_documentation_avg": [ 6.75, 3.766629793329841 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 408.5, 262.91681193868146 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10773080322331467057&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;;mit.edu;mit.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DAMEX: Dataset-aware Mixture-of-Experts for visual understanding of mixture-of-datasets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71495", "id": "UgSSOpqvPI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dc192b3eeffebba21bd1d82f6752b84b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UgSSOpqvPI", "openreview": "https://openreview.net/forum?id=UgSSOpqvPI", "poster": "/media/PosterPDFs/NeurIPS%202023/71495.png?t=1702171851.6359549", "slides": "https://nips.cc/virtual/2023/poster/71495", "video": "https://nips.cc/virtual/2023/poster/71495", "author_site": "Yash Jain, Harkirat Behl, Zsolt Kira, Vibhav Vineet", "tldr": "", "abstract": "Construction of a universal detector poses a crucial question: How can we most effectively train a model on a large mixture of datasets? \n The answer lies in learning dataset-specific features and ensembling their knowledge but do all this in a single model.\n Previous methods achieve this by having separate detection heads on a common backbone but that results in a significant increase in parameters.\n In this work, we present Mixture-of-Experts as a solution, highlighting that MoE are much more than a scalability tool. \n We propose Dataset-Aware Mixture-of-Experts, DAMEX where we train the experts to become an `expert' of a dataset by learning to route each dataset tokens to its mapped expert.\n Experiments on Universal Object-Detection Benchmark show that we outperform the existing state-of-the-art by average +10.2 AP score and improve over our non-MoE baseline by average +2.0 AP score. We also observe consistent gains while mixing datasets with (1) limited availability, (2) disparate domains and (3) divergent label sets.\n Further, we qualitatively show that DAMEX is robust against expert representation collapse. Code is available at https://github.com/jinga-lala/DAMEX", "keywords": "mixture-of-experts;moe;object detection;mixture of datasets;multiple datasets", "primary_area": "", "supplementary_material": "/attachment/97a7d73dd80a40d8a0a37c7d55045da7320b58f2.pdf", "author": "Yash Jain;Harkirat Behl;Zsolt Kira;Vibhav Vineet", "authorids": "~Yash_Jain1;~Harkirat_Behl1;~Zsolt_Kira1;~Vibhav_Vineet5", "gender": "M;M;M;", "homepage": "https://yash-jain.com;https://harkiratbehl.github.io/;https://faculty.cc.gatech.edu/~zk15;", "dblp": "255/2617;199/2125;36/4127;", "google_scholar": "Fr6QHDsAAAAJ;R7k23-0AAAAJ;2a5XgNAAAAAJ;", "orcid": "0000-0002-5175-1352;;0000-0002-2626-2004;", "linkedin": "jinga-lala/;;;", "or_profile": "~Yash_Jain1;~Harkirat_Behl1;~Zsolt_Kira1;~Vibhav_Vineet5", "aff": "Georgia Institute of Technology;Microsoft Research;Georgia Tech Research Institute;", "aff_domain": "gatech.edu;microsoft.com;gtri.gatech.edu;", "position": "MS student;Researcher;Senior Research Scientist;", "bibtex": "@inproceedings{\njain2023damex,\ntitle={{DAMEX}: Dataset-aware Mixture-of-Experts for visual understanding of mixture-of-datasets},\nauthor={Yash Jain and Harkirat Behl and Zsolt Kira and Vibhav Vineet},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UgSSOpqvPI}\n}", "github": "", "project": "", "reviewers": "xzJh;962D;2x26;1BaQ;UzKA", "pdf_size": 2942887, "rating": "5;5;5;6;6", "confidence": "4;3;4;4;3", "soundness": "2;2;2;3;3", "novelty": "2;3;2;2;3", "presentation": "3;3;3;2;3", "wc_summary": "82;104;68;138;55", "wc_strengths": "78;110;21;112;48", "wc_weaknesses": "139;194;277;204;130", "wc_questions": "33;146;3;51;242", "wc_limitations": "21;1;15;117;12", "wc_review": "353;555;384;622;487", "wc_reply_reviewers": "0;19;19;208;0", "wc_reply_authors": "0;0;0;174;0", "reply_reviewers": "0;1;1;2;0", "reply_authors": "1;1;1;2;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 89.4, 29.227384419410505 ], "wc_strengths_avg": [ 73.8, 35.329307946802466 ], "wc_weaknesses_avg": [ 188.8, 52.86738124779778 ], "wc_questions_avg": [ 95.0, 87.69720634090918 ], "wc_limitations_avg": [ 33.2, 42.400000000000006 ], "wc_review_avg": [ 480.2, 101.17588645522213 ], "wc_reply_reviewers_avg": [ 49.2, 79.85336561473162 ], "wc_reply_authors_avg": [ 34.8, 69.6 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.16666666666666666, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10747539610935927889&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "gatech.edu;microsoft.com;gtri.gatech.edu;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Georgia Institute of Technology;Microsoft;Georgia Tech Research Institute", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "https://www.gatech.edu;https://www.microsoft.com/en-us/research;https://www.gtri.gatech.edu", "aff_unique_abbr": "Georgia Tech;MSR;GTRI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Multi-Agent First Order Constrained Optimization in Policy Space", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71494", "id": "UgomCjCWjC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7b64c47dcb067efd6be5eee854c14835-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UgomCjCWjC", "openreview": "https://openreview.net/forum?id=UgomCjCWjC", "poster": "/media/PosterPDFs/NeurIPS%202023/71494.png?t=1701330083.740714", "slides": "https://nips.cc/virtual/2023/poster/71494", "video": "https://nips.cc/virtual/2023/poster/71494", "author_site": "Youpeng Zhao, Yaodong Yang, Zhenbo Lu, Wengang Zhou, Houqiang Li", "tldr": "", "abstract": "In the realm of multi-agent reinforcement learning (MARL), achieving high performance is crucial for a successful multi-agent system.\nMeanwhile, the ability to avoid unsafe actions is becoming an urgent and imperative problem to solve for real-life applications. \nWhereas, it is still challenging to develop a safety-aware method for multi-agent systems in MARL. In this work, we introduce a novel approach called Multi-Agent First Order Constrained Optimization in Policy Space (MAFOCOPS), which effectively addresses the dual objectives of attaining satisfactory performance and enforcing safety constraints. Using data generated from the current policy, MAFOCOPS first finds the optimal update policy by solving a constrained optimization problem in the nonparameterized policy space. Then, the update policy is projected back into the parametric policy space to achieve a feasible policy. Notably, our method is first-order in nature, ensuring the ease of implementation, and exhibits an approximate upper bound on the worst-case constraint violation. Empirical results show that our approach achieves remarkable performance while satisfying safe constraints on several safe MARL benchmarks.", "keywords": "Safe Multi-agent Reinforcement Learning;constrained policy optimisation;first-order optimisation", "primary_area": "", "supplementary_material": "/attachment/91c4b1ca689410058b621978ea81557691559ff7.zip", "author": "Youpeng Zhao;Yaodong Yang;Zhenbo Lu;Wengang Zhou;Houqiang Li", "authorids": "~Youpeng_Zhao1;~Yaodong_Yang1;~Zhenbo_Lu1;~Wengang_Zhou1;~Houqiang_Li1", "gender": "M;M;M;M;M", "homepage": ";https://www.yangyaodong.com;;http://staff.ustc.edu.cn/~zhwg/index.html;https://staff.ustc.edu.cn/~lihq/", "dblp": ";170/1496-1;42/501;22/4544-1;59/7017.html", "google_scholar": ";https://scholar.google.co.uk/citations?user=6yL0xw8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;8s1JF8YAAAAJ;7sFMIKoAAAAJ", "orcid": "0000-0002-4610-3545;0000-0001-8132-5613;;0000-0003-1690-9836;0000-0003-2188-3028", "linkedin": ";yaodong-yang;;;", "or_profile": "~Youpeng_Zhao1;~Yaodong_Yang1;~Zhenbo_Lu1;~Wengang_Zhou1;~Houqiang_Li1", "aff": "University of Science and Technology of China;Peking University;Institute of Artificial Intelligence, Hefei Comprehensive National Science Center;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;pku.edu.cn;iai.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "MS student;Assistant Professor;Researcher;Full Professor;Professor", "bibtex": "@inproceedings{\nzhao2023multiagent,\ntitle={Multi-Agent First Order Constrained Optimization in Policy Space},\nauthor={Youpeng Zhao and Yaodong Yang and Zhenbo Lu and Wengang Zhou and Houqiang Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UgomCjCWjC}\n}", "github": "", "project": "", "reviewers": "eMiG;fTdt;DYiL;VTR2", "pdf_size": 7468002, "rating": "3;6;6;7", "confidence": "3;2;4;3", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;2;3;4", "wc_summary": "28;25;65;108", "wc_strengths": "18;29;172;191", "wc_weaknesses": "104;65;99;46", "wc_questions": "7;4;88;213", "wc_limitations": "24;14;42;55", "wc_review": "181;137;466;613", "wc_reply_reviewers": "130;86;82;0", "wc_reply_authors": "246;72;71;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 56.5, 33.64892271678248 ], "wc_strengths_avg": [ 102.5, 79.38041320124253 ], "wc_weaknesses_avg": [ 78.5, 24.026027553467927 ], "wc_questions_avg": [ 78.0, 84.91466304472979 ], "wc_limitations_avg": [ 33.75, 15.848895860595462 ], "wc_review_avg": [ 349.25, 197.8337370116634 ], "wc_reply_reviewers_avg": [ 74.5, 46.9547654663507 ], "wc_reply_authors_avg": [ 97.25, 90.70660119307745 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1243557504543074054&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ustc.edu.cn;pku.edu.cn;iai.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Science and Technology of China;Peking University;Hefei Comprehensive National Science Center", "aff_unique_dep": ";;Institute of Artificial Intelligence", "aff_unique_url": "http://www.ustc.edu.cn;http://www.pku.edu.cn;http://www.hfcn.edu.cn", "aff_unique_abbr": "USTC;Peking U;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hefei", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "LoCoOp: Few-Shot Out-of-Distribution Detection via Prompt Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71493", "id": "UjtiLdXGMC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f0606b882692637835e8ac981089eccd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UjtiLdXGMC", "openreview": "https://openreview.net/forum?id=UjtiLdXGMC", "poster": "/media/PosterPDFs/NeurIPS%202023/71493.png?t=1701498238.1197982", "slides": "https://nips.cc/virtual/2023/poster/71493", "video": "https://nips.cc/virtual/2023/poster/71493", "author_site": "Atsuyuki Miyai, Qing Yu, Go Irie, Kiyoharu Aizawa", "tldr": "", "abstract": "We present a novel vision-language prompt learning approach for few-shot out-of-distribution (OOD) detection. Few-shot OOD detection aims to detect OOD images from classes that are unseen during training using only a few labeled in-distribution (ID) images. While prompt learning methods such as CoOp have shown effectiveness and efficiency in few-shot ID classification, they still face limitations in OOD detection due to the potential presence of ID-irrelevant information in text embeddings. To address this issue, we introduce a new approach called $\\textbf{Lo}$cal regularized $\\textbf{Co}$ntext $\\textbf{Op}$timization (LoCoOp), which performs OOD regularization that utilizes the portions of CLIP local features as OOD features during training. CLIP's local features have a lot of ID-irrelevant nuisances ($\\textit{e.g.}$, backgrounds), and by learning to push them away from the ID class text embeddings, we can remove the nuisances in the ID class text embeddings and enhance the separation between ID and OOD. Experiments on the large-scale ImageNet OOD detection benchmarks demonstrate the superiority of our LoCoOp over zero-shot, fully supervised detection methods and prompt learning methods. Notably, even in a one-shot setting -- just one label per class, LoCoOp outperforms existing zero-shot and fully supervised detection methods. The code is available via https://github.com/AtsuMiyai/LoCoOp.", "keywords": "out-of-distribution detection;vision-language foundation model;prompt learning", "primary_area": "", "supplementary_material": "/attachment/8704b842a0a6db43bb4c4c3541d95fc9611d3caa.zip", "author": "Atsuyuki Miyai;Qing Yu;Go Irie;Kiyoharu Aizawa", "authorids": "~Atsuyuki_Miyai1;~Qing_Yu2;~Go_Irie3;~Kiyoharu_Aizawa1", "gender": "M;M;M;M", "homepage": "https://atsumiyai.github.io/;http://yu1ut.com/;;https://sites.google.com/view/aizawa-kiyoharu", "dblp": "331/8141;;98/7454;71/5426", "google_scholar": "https://scholar.google.co.jp/citations?hl=ja;As3ImtEAAAAJ;2bCSG1AAAAAJ;https://scholar.google.co.jp/citations?user=CJRhhi0AAAAJ", "orcid": ";0000-0001-6965-9581;0000-0002-4309-4700;0000-0003-2146-6275", "linkedin": ";;;", "or_profile": "~Atsuyuki_Miyai1;~Qing_Yu2;~Go_Irie3;~Kiyoharu_Aizawa1", "aff": "The University of Tokyo;The University of Tokyo, The University of Tokyo;NTT;The University of Tokyo", "aff_domain": "t.u-tokyo.ac.jp;t.u-tokyo.ac.jp;ntt.co.jp;u-tokyo.ac.jp", "position": "MS student;PhD student;Visiting Researcher;Full Professor", "bibtex": "@inproceedings{\nmiyai2023locoop,\ntitle={LoCoOp: Few-Shot Out-of-Distribution Detection via Prompt Learning},\nauthor={Atsuyuki Miyai and Qing Yu and Go Irie and Kiyoharu Aizawa},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UjtiLdXGMC}\n}", "github": "", "project": "", "reviewers": "epH7;wEUW;uDwi;Lcii;RaSR", "pdf_size": 1223939, "rating": "5;5;5;6;7", "confidence": "4;4;4;5;4", "soundness": "3;2;3;3;4", "novelty": "3;2;3;3;4", "presentation": "4;3;3;3;4", "wc_summary": "77;90;75;55;127", "wc_strengths": "82;61;57;33;129", "wc_weaknesses": "85;347;171;152;96", "wc_questions": "10;51;52;61;6", "wc_limitations": "9;1;10;6;7", "wc_review": "263;550;365;307;365", "wc_reply_reviewers": "49;34;38;16;107", "wc_reply_authors": "63;65;0;0;926", "reply_reviewers": "1;1;1;1;2", "reply_authors": "2;2;1;1;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 84.8, 23.886397802933786 ], "wc_strengths_avg": [ 72.4, 32.29612979909512 ], "wc_weaknesses_avg": [ 170.2, 94.18577387270331 ], "wc_questions_avg": [ 36.0, 23.16031087874254 ], "wc_limitations_avg": [ 6.6, 3.1368774282716245 ], "wc_review_avg": [ 370.0, 97.84477502656951 ], "wc_reply_reviewers_avg": [ 48.8, 30.979993544221404 ], "wc_reply_authors_avg": [ 210.8, 358.7441428093287 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.25000000000000006, "gs_citation": 86, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7905943071931908149&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "t.u-tokyo.ac.jp;t.u-tokyo.ac.jp;ntt.co.jp;u-tokyo.ac.jp", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Tokyo;NTT Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.ntt.co.jp", "aff_unique_abbr": "UTokyo;NTT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "title": "Inner-Outer Aware Reconstruction Model for Monocular 3D Scene Reconstruction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71492", "id": "UkAGqeWTuL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/27c852e9d6c76890ca633f111c556a4f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UkAGqeWTuL", "openreview": "https://openreview.net/forum?id=UkAGqeWTuL", "poster": "/media/PosterPDFs/NeurIPS%202023/71492.png?t=1701398326.510209", "slides": "https://nips.cc/virtual/2023/poster/71492", "video": "https://nips.cc/virtual/2023/poster/71492", "author_site": "Yu-Kun Qiu, Guo-Hao Xu, Wei-Shi Zheng", "tldr": "", "abstract": "Monocular 3D scene reconstruction aims to reconstruct the 3D structure of scenes based on posed images. Recent volumetric-based methods directly predict the truncated signed distance function (TSDF) volume and have achieved promising results. The memory cost of volumetric-based methods will grow cubically as the volume size increases, so a coarse-to-fine strategy is necessary for saving memory. Specifically, the coarse-to-fine strategy distinguishes surface voxels from non-surface voxels, and only potential surface voxels are considered in the succeeding procedure. However, the non-surface voxels have various features, and in particular, the voxels on the inner side of the surface are quite different from those on the outer side since there exists an intrinsic gap between them. Therefore, grouping inner-surface and outer-surface voxels into the same class will force the classifier to spend its capacity to bridge the gap. By contrast, it is relatively easy for the classifier to distinguish inner-surface and outer-surface voxels due to the intrinsic gap. Inspired by this, we propose the inner-outer aware reconstruction (IOAR) model. IOAR explores a new coarse-to-fine strategy to classify outer-surface, inner-surface and surface voxels. In addition, IOAR separates occupancy branches from TSDF branches to avoid mutual interference between them. Since our model can better classify the surface, outer-surface and inner-surface voxels, it can predict more precise meshes than existing methods. Experiment results on ScanNet, ICL-NUIM and TUM-RGBD datasets demonstrate the effectiveness and generalization of our model. The code is available at https://github.com/YorkQiu/InnerOuterAwareReconstruction.", "keywords": "3D reconstruction", "primary_area": "", "supplementary_material": "/attachment/cf7c7c95c8c6f3e53da57cf42f4081d1846bd6e5.zip", "author": "Yu-Kun Qiu;Guohao Xu;Wei-Shi Zheng", "authorids": "~Yu-Kun_Qiu1;~Guohao_Xu1;~Wei-Shi_Zheng3", "gender": "M;M;M", "homepage": "https://github.com/KwokhoTsui;http://www.isee-ai.cn/~zhwshi;https://github.com/YorkQiu", "dblp": "369/5953;30/8399;", "google_scholar": ";AwqDDGoAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Guohao_Xu1;~Wei-Shi_Zheng3;~Yukun_Qiu1", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;sysu.edu.cn;sysu.edu.cn", "position": "MS student;Full Professor;PhD student", "bibtex": "@inproceedings{\nqiu2023innerouter,\ntitle={Inner-Outer Aware Reconstruction Model for Monocular 3D Scene Reconstruction},\nauthor={Yu-Kun Qiu and Guohao Xu and Wei-Shi Zheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UkAGqeWTuL}\n}", "github": "", "project": "", "reviewers": "seEC;5s3L;Ga1p;xj9n;HJrK", "pdf_size": 2271635, "rating": "4;5;6;6;6", "confidence": "3;5;4;4;4", "soundness": "3;4;3;3;3", "novelty": "3;3;3;3;3", "presentation": "2;3;2;2;3", "wc_summary": "37;91;63;148;104", "wc_strengths": "15;49;81;149;30", "wc_weaknesses": "86;79;127;566;13", "wc_questions": "2;153;18;38;4", "wc_limitations": "7;4;21;39;23", "wc_review": "147;376;310;940;174", "wc_reply_reviewers": "0;18;34;138;0", "wc_reply_authors": "0;24;23;21;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;2;2;2;1", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 88.6, 37.65421623138636 ], "wc_strengths_avg": [ 64.8, 47.524309568893266 ], "wc_weaknesses_avg": [ 174.2, 199.2801043757254 ], "wc_questions_avg": [ 43.0, 56.48362594593233 ], "wc_limitations_avg": [ 18.8, 12.560254774486065 ], "wc_review_avg": [ 389.4, 288.0274986871913 ], "wc_reply_reviewers_avg": [ 38.0, 51.58294291720859 ], "wc_reply_authors_avg": [ 13.6, 11.146299834474219 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3952847075210474, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8zS59oMAirwJ:scholar.google.com/&scioq=Inner-Outer+Aware+Reconstruction+Model+for+Monocular+3D+Scene+Reconstruction&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "sysu.edu.cn;sysu.edu.cn;sysu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Sun Yat-sen University", "aff_unique_dep": "", "aff_unique_url": "http://www.sysu.edu.cn", "aff_unique_abbr": "SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Approximate Heavy Tails in Offline (Multi-Pass) Stochastic Gradient Descent", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71491", "id": "UkPeUXML7s", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0525a72df7fb2cd943c780d059b94774-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UkPeUXML7s", "openreview": "https://openreview.net/forum?id=UkPeUXML7s", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71491", "video": "https://nips.cc/virtual/2023/poster/71491", "author_site": "Kruno Lehman, Alain Durmus, Umut Simsekli", "tldr": "", "abstract": "A recent line of empirical studies has demonstrated that SGD might exhibit a heavy-tailed behavior in practical settings, and the heaviness of the tails might correlate with the overall performance. In this paper, we investigate the emergence of such heavy tails. Previous works on this problem only considered, up to our knowledge, online (also called single-pass) SGD, in which the emergence of heavy tails in theoretical findings is contingent upon access to an infinite amount of data. Hence, the underlying mechanism generating the reported heavy-tailed behavior in practical settings, where the amount of training data is finite, is still not well-understood. Our contribution aims to fill this gap. In particular, we show that the stationary distribution of offline (also called multi-pass) SGD exhibits \u2018approximate\u2019 power-law tails and the approximation error is controlled by how fast the empirical distribution of the training data converges to the true underlying data distribution in the Wasserstein metric. Our main takeaway is that, as the number of data points increases, offline SGD will behave increasingly \u2018power-law-like\u2019. To achieve this result, we first prove nonasymptotic Wasserstein convergence bounds for offline SGD to online SGD as the number of data points increases, which can be interesting on their own. Finally, we illustrate our theory on various experiments conducted on synthetic data and neural networks.", "keywords": "SGD;heavy-tails;wasserstein convergence", "primary_area": "", "supplementary_material": "", "author": "Krunoslav Lehman Pavasovic;Alain Durmus;Umut Simsekli", "authorids": "~Krunoslav_Lehman_Pavasovic1;~Alain_Durmus1;~Umut_Simsekli1", "gender": ";M;M", "homepage": ";;https://www.di.ens.fr/~simsekli/", "dblp": ";01/11275;https://dblp.org/pers/s/Simsekli:Umut.html", "google_scholar": ";;https://scholar.google.fr/citations?user=CuArAkgAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Krunoslav_Lehman_Pavasovic1;~Alain_Durmus1;~Umut_Simsekli1", "aff": ";\u00c9cole Polytechnique;INRIA", "aff_domain": ";polytechnique.fr;inria.fr", "position": ";Full Professor;Research Faculty", "bibtex": "@inproceedings{\npavasovic2023approximate,\ntitle={Approximate Heavy Tails in Offline (Multi-Pass) Stochastic Gradient Descent},\nauthor={Krunoslav Lehman Pavasovic and Alain Durmus and Umut Simsekli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UkPeUXML7s}\n}", "github": "", "project": "", "reviewers": "t9h8;dWGg;rBYn;VaqK", "pdf_size": 1408425, "rating": "6;7;7;8", "confidence": "2;3;3;4", "soundness": "3;4;3;4", "novelty": "3;4;3;4", "presentation": "4;4;3;4", "wc_summary": "99;120;133;46", "wc_strengths": "76;163;106;45", "wc_weaknesses": "63;60;79;9", "wc_questions": "377;136;29;5", "wc_limitations": "11;6;6;1", "wc_review": "626;485;353;106", "wc_reply_reviewers": "125;49;15;0", "wc_reply_authors": "295;9;16;0", "reply_reviewers": "2;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.5, 33.185087012090236 ], "wc_strengths_avg": [ 97.5, 43.53446910208048 ], "wc_weaknesses_avg": [ 52.75, 26.271419832205492 ], "wc_questions_avg": [ 136.75, 147.21476658270393 ], "wc_limitations_avg": [ 6.0, 3.5355339059327378 ], "wc_review_avg": [ 392.5, 191.5208865894266 ], "wc_reply_reviewers_avg": [ 47.25, 48.27201570268223 ], "wc_reply_authors_avg": [ 80.0, 124.25980846597182 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17523335160320905208&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";polytechnique.fr;inria.fr", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Ecole Polytechnique;INRIA", "aff_unique_dep": ";", "aff_unique_url": "https://www.polytechnique.edu;https://www.inria.fr", "aff_unique_abbr": "X;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Textually Pretrained Speech Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71490", "id": "UlHueVjAKr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c859b99b5d717c9035e79d43dfd69435-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UlHueVjAKr", "openreview": "https://openreview.net/forum?id=UlHueVjAKr", "poster": "/media/PosterPDFs/NeurIPS%202023/71490.png?t=1701591187.6107523", "slides": "https://nips.cc/virtual/2023/poster/71490", "video": "https://nips.cc/virtual/2023/poster/71490", "author_site": "Michael Hassid, Tal Remez, Tu Anh Nguyen, Itai Gat, Alexis CONNEAU, Felix Kreuk, Jade Copet, Alexandre Defossez, Gabriel Synnaeve, Emmanuel Dupoux, Roy Schwartz, Yossi Adi", "tldr": "", "abstract": "Speech language models (SpeechLMs) process and generate acoustic data only, without textual supervision. In this work, we propose TWIST, a method for training SpeechLMs using a warm-start from a pretrained textual language models. We show using both automatic and human evaluations that TWIST outperforms a cold-start SpeechLM across the board. We empirically analyze the effect of different model design choices such as the speech tokenizer, the pretrained textual model, and the dataset size. We find that model and dataset scale both play an important role in constructing better-performing SpeechLMs. Based on our observations, we present the largest (to the best of our knowledge) SpeechLM both in terms of number of parameters and training data. We additionally introduce two spoken versions of the StoryCloze textual benchmark to further improve model evaluation and advance future research in the field. We make speech samples, code and models publicly available.", "keywords": "LLM;speech;generative;GSLM", "primary_area": "", "supplementary_material": "/attachment/c3bea5803a177d409cb56ad4e370ad8e2ca65ca4.zip", "author": "Michael Hassid;Tal Remez;Tu Anh Nguyen;Itai Gat;Alexis Conneau;Felix Kreuk;Jade Copet;Alexandre D\u00e9fossez;Gabriel Synnaeve;Emmanuel Dupoux;Roy Schwartz;Yossi Adi", "authorids": "~Michael_Hassid1;~Tal_Remez2;~Tu_Anh_Nguyen1;~Itai_Gat1;~Alexis_Conneau1;~Felix_Kreuk1;~Jade_Copet1;~Alexandre_D\u00e9fossez1;~Gabriel_Synnaeve1;~Emmanuel_Dupoux1;~Roy_Schwartz1;~Yossi_Adi1", "gender": "M;M;M;M;;M;;M;M;M;M;M", "homepage": ";https://talremez.github.io/;https://tuanh208.github.io/;https://www.linkedin.com/in/itaigat/;;https://scholar.google.co.il/citations?user=UiERcYsAAAAJ&hl=en;;https://ai.honu.io/;;http://www.lscp.net/persons/dupoux/;https://schwartz-lab-huji.github.io/;http://adiyoss.github.io/", "dblp": "306/7698;170/0030;37/11121.html;221/4128;;213/7459;;156/0054;http://dblp.uni-trier.de/pers/hd/s/Synnaeve:Gabriel;41/8160;19/376-1;171/0957.html", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.il/citations?user=XqHYn7EAAAAJ;TzZXAm4AAAAJ;TnJqhXIAAAAJ;;;GRMLwjAAAAAJ;https://scholar.google.fr/citations?user=DubNUU0AAAAJ;wN9rBkcAAAAJ;https://scholar.google.fr/citations?user=94c1abIAAAAJ;wvfWo9IAAAAJ;https://scholar.google.co.il/citations?user=4W-HuYYAAAAJ", "orcid": ";;0000-0002-9623-042X;;;;;;;0000-0002-7814-2952;;0000-0003-2237-3898", "linkedin": ";;nguyentuanh208/;;;;jadecopet/?locale=en_US;;;emmanuel-dupoux-18034055/;;yossi-adi-31a32858?trk=nav_responsive_tab_profile_pic", "or_profile": "~Michael_Hassid1;~Tal_Remez2;~Tu_Anh_Nguyen1;~Itai_Gat1;~Alexis_Conneau1;~Felix_Kreuk1;~Jade_Copet1;~Alexandre_D\u00e9fossez1;~Gabriel_Synnaeve1;~Emmanuel_Dupoux1;~Roy_Schwartz1;~Yossi_Adi1", "aff": "Hebrew University of Jerusalem;Meta;Meta Facebook;Technion;;Meta Facebook;Facebook AI Research;Meta;Meta Facebook;EHESS;Hebrew University, Hebrew University of Jerusalem;Meta", "aff_domain": "huji.ac.il;meta.com;fb.com;technion.ac.il;;fb.com;facebook.com;meta.com;fb.com;ehess.fr;cs.huji.ac.il;meta.com", "position": "PhD student;Researcher;PhD student;PhD student;;Researcher;Research Engineering Manager;Researcher;Research Scientist;Full Professor;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nhassid2023textually,\ntitle={Textually Pretrained Speech Language Models},\nauthor={Michael Hassid and Tal Remez and Tu Anh Nguyen and Itai Gat and Alexis Conneau and Felix Kreuk and Jade Copet and Alexandre D{\\'e}fossez and Gabriel Synnaeve and Emmanuel Dupoux and Roy Schwartz and Yossi Adi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UlHueVjAKr}\n}", "github": "", "project": "", "reviewers": "6BNW;5UoA;pWSQ;qFrm", "pdf_size": 514231, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "96;25;54;92", "wc_strengths": "25;41;57;49", "wc_weaknesses": "136;60;41;125", "wc_questions": "51;243;212;45", "wc_limitations": "7;6;7;1", "wc_review": "315;375;371;312", "wc_reply_reviewers": "0;73;24;23", "wc_reply_authors": "69;27;69;57", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 66.75, 29.14939965076468 ], "wc_strengths_avg": [ 43.0, 11.832159566199232 ], "wc_weaknesses_avg": [ 90.5, 40.74616546375867 ], "wc_questions_avg": [ 137.75, 90.44162481954866 ], "wc_limitations_avg": [ 5.25, 2.48746859276655 ], "wc_review_avg": [ 343.25, 29.80247472945829 ], "wc_reply_reviewers_avg": [ 30.0, 26.61766330841233 ], "wc_reply_authors_avg": [ 55.5, 17.168284713389397 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11229342356684379796&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "huji.ac.il;meta.com;fb.com;technion.ac.il;;fb.com;facebook.com;meta.com;fb.com;ehess.fr;cs.huji.ac.il;meta.com", "author_num": 12, "aff_unique_index": "0;1;1;2;1;1;1;1;3;0;1", "aff_unique_norm": "Hebrew University of Jerusalem;Meta;Technion - Israel Institute of Technology;Ecole des Hautes Etudes en Sciences Sociales", "aff_unique_dep": ";Meta Platforms, Inc.;;", "aff_unique_url": "https://www.huji.ac.il;https://meta.com;https://www.technion.ac.il/en/;https://www.ehess.fr", "aff_unique_abbr": "HUJI;Meta;Technion;EHESS", "aff_campus_unique_index": "0", "aff_campus_unique": "Jerusalem;", "aff_country_unique_index": "0;1;1;0;1;1;1;1;2;0;1", "aff_country_unique": "Israel;United States;France" }, { "title": "Extending the Design Space of Graph Neural Networks by Rethinking Folklore Weisfeiler-Lehman", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71489", "id": "UlJcZoawgU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1cac8326ce3fbe79171db9754211530c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UlJcZoawgU", "openreview": "https://openreview.net/forum?id=UlJcZoawgU", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71489", "video": "https://nips.cc/virtual/2023/poster/71489", "author_site": "Jiarui Feng, Lecheng Kong, Hao Liu, Dacheng Tao, Fuhai Li, Muhan Zhang, Yixin Chen", "tldr": "", "abstract": "Message passing neural networks (MPNNs) have emerged as the most popular framework of graph neural networks (GNNs) in recent years. However, their expressive power is limited by the 1-dimensional Weisfeiler-Lehman (1-WL) test. Some works are inspired by $k$-WL/FWL (Folklore WL) and design the corresponding neural versions. Despite the high expressive power, there are serious limitations in this line of research. In particular, (1) $k$-WL/FWL requires at least $O(n^k)$ space complexity, which is impractical for large graphs even when $k=3$; (2) The design space of $k$-WL/FWL is rigid, with the only adjustable hyper-parameter being $k$. To tackle the first limitation, we propose an extension, $(k, t)$-FWL. We theoretically prove that even if we fix the space complexity to $O(n^k)$ (for any $k \\geq 2$) in $(k, t)$-FWL, we can construct an expressiveness hierarchy up to solving the graph isomorphism problem. To tackle the second problem, we propose $k$-FWL+, which considers any equivariant set as neighbors instead of all nodes, thereby greatly expanding the design space of $k$-FWL. Combining these two modifications results in a flexible and powerful framework $(k, t)$-FWL+. We demonstrate $(k, t)$-FWL+ can implement most existing models with matching expressiveness. We then introduce an instance of $(k,t)$-FWL+ called Neighborhood$^2$-FWL (N$^2$-FWL), which is practically and theoretically sound. We prove that N$^2$-FWL is no less powerful than 3-WL, and can encode many substructures while only requiring $O(n^2)$ space. Finally, we design its neural version named **N$^2$-GNN** and evaluate its performance on various tasks. N$^2$-GNN achieves record-breaking results on ZINC-Subset (**0.059**), outperforming previous SOTA results by 10.6\\%. Moreover, N$^2$-GNN achieves new SOTA results on the BREC dataset (**71.8\\%**) among all existing high-expressive GNN methods.", "keywords": "Graph neural network;expressive power;Folklore Weisfeiler-Lehman test.", "primary_area": "", "supplementary_material": "", "author": "Jiarui Feng;Lecheng Kong;Hao Liu;Dacheng Tao;Fuhai Li;Muhan Zhang;Yixin Chen", "authorids": "~Jiarui_Feng1;~Lecheng_Kong1;~Hao_Liu25;~Dacheng_Tao1;~Fuhai_Li1;~Muhan_Zhang1;~Yixin_Chen1", "gender": "M;M;F;;M;M;M", "homepage": "https://jiaruifeng.github.io/;https://LechengKong.github.io/;https://haoliu-cola.github.io/;;https://profiles.wustl.edu/en/persons/fuhai-li;https://muhanzhang.github.io/;https://www.cse.wustl.edu/~yixin.chen/", "dblp": "77/8797;319/5576;09/3214-57;;;157/5518;59/983", "google_scholar": "6CSGUR8AAAAJ;yk3-_EgAAAAJ;;;rVZfU9sAAAAJ;https://scholar.google.com.hk/citations?user=OBBqkosAAAAJ;NByrsK0AAAAJ", "orcid": "0000-0002-3409-6819;0000-0001-9427-8799;;;0000-0002-3773-146X;0000-0002-7680-6401;", "linkedin": ";;;;fuhai-li-1b05611a/;jerry-muhan-zhang-a33a1777/;", "or_profile": "~Jiarui_Feng1;~Lecheng_Kong1;~Hao_Liu25;~Dacheng_Tao1;~Fuhai_Li1;~Muhan_Zhang1;~Yixin_Chen1", "aff": "Washington University, Saint Louis;Washington University, Saint Louis;Washington University in St. Louis;;Washington University, Saint Louis;Peking University;Washington University, Saint Louis", "aff_domain": "wustl.edu;wustl.edu;wustl.edu;;wustl.edu;pku.edu.cn;wustl.edu", "position": "PhD student;PhD student;PhD student;;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nfeng2023extending,\ntitle={Extending the Design Space of Graph Neural Networks by Rethinking Folklore Weisfeiler-Lehman},\nauthor={Jiarui Feng and Lecheng Kong and Hao Liu and Dacheng Tao and Fuhai Li and Muhan Zhang and Yixin Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UlJcZoawgU}\n}", "github": "", "project": "", "reviewers": "3qR2;eNLB;E38T;vTyW;X7wK", "pdf_size": 628600, "rating": "3;4;6;7;7", "confidence": "4;5;2;3;3", "soundness": "2;3;4;3;3", "novelty": "2;2;3;3;4", "presentation": "2;3;3;3;3", "wc_summary": "173;123;74;264;139", "wc_strengths": "16;44;76;116;88", "wc_weaknesses": "206;162;106;145;132", "wc_questions": "30;1;48;207;281", "wc_limitations": "2;1;34;66;64", "wc_review": "427;331;338;798;704", "wc_reply_reviewers": "220;1045;30;67;24", "wc_reply_authors": "847;2859;34;31;10", "reply_reviewers": "4;2;1;1;1", "reply_authors": "4;7;2;2;2", "rating_avg": [ 5.4, 1.624807680927192 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 154.6, 63.31698034492801 ], "wc_strengths_avg": [ 68.0, 34.77930419085465 ], "wc_weaknesses_avg": [ 150.2, 33.36105513918887 ], "wc_questions_avg": [ 113.4, 110.19727764332474 ], "wc_limitations_avg": [ 33.4, 28.408449447303525 ], "wc_review_avg": [ 519.6, 194.2334677649555 ], "wc_reply_reviewers_avg": [ 277.2, 390.43329776032164 ], "wc_reply_authors_avg": [ 756.2, 1098.573147314279 ], "reply_reviewers_avg": [ 1.8, 1.1661903789690604 ], "reply_authors_avg": [ 3.4, 1.9595917942265424 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7000665968986379, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3581046109960608919&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "wustl.edu;wustl.edu;wustl.edu;;wustl.edu;pku.edu.cn;wustl.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Washington University in St. Louis;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://wustl.edu;http://www.pku.edu.cn", "aff_unique_abbr": "WUSTL;Peking U", "aff_campus_unique_index": "0;0;1;0;0", "aff_campus_unique": "Saint Louis;St. Louis;", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Language Is Not All You Need: Aligning Perception with Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71488", "id": "UpN2wfrLec", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e425b75bac5742a008d643826428787c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UpN2wfrLec", "openreview": "https://openreview.net/forum?id=UpN2wfrLec", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71488", "video": "https://nips.cc/virtual/2023/poster/71488", "author_site": "Shaohan Huang, Li Dong, Wenhui Wang, Yaru Hao, Saksham Singhal, Shuming Ma, Tengchao Lv, Lei Cui, Owais Khan Mohammed, Barun Patra, Qiang Liu, Kriti Aggarwal, Zewen Chi, Nils Bjorck, Vishrav Chaudhary, Subhojit Som, XIA SONG, Furu Wei", "tldr": "", "abstract": "A big convergence of language, multimodal perception, action, and world modeling is a key step toward artificial general intelligence. In this work, we introduce KOSMOS-1, a Multimodal Large Language Model (MLLM) that can perceive general modalities, learn in context (i.e., few-shot), and follow instructions (i.e., zero-shot). Specifically, we train KOSMOS-1 from scratch on web-scale multi-modal corpora, including arbitrarily interleaved text and images, image-caption pairs, and text data. We evaluate various settings, including zero-shot, few-shot, and multimodal chain-of-thought prompting, on a wide range of tasks without any gradient updates or finetuning. Experimental results show that KOSMOS-1 achieves impressive performance on (i) language understanding, generation, and even OCR-free NLP (directly fed with document images), (ii) perception-language tasks, including multimodal dialogue, image captioning, visual question answering, and (iii) vision tasks, such as image recognition with descriptions (specifying classification via text instructions). We also show that MLLMs can benefit from cross-modal transfer, i.e., transfer knowledge from language to multimodal, and from multimodal to language. In addition, we introduce a dataset of Raven IQ test, which diagnoses the nonverbal reasoning capability of MLLMs.", "keywords": "multimodal large language model", "primary_area": "", "supplementary_material": "/attachment/40c7a43389b4dea2756fc42dbc2bc0a4b5d02ffa.pdf", "author": "Shaohan Huang;Li Dong;Wenhui Wang;Yaru Hao;Saksham Singhal;Shuming Ma;Tengchao Lv;Lei Cui;Owais Khan Mohammed;Barun Patra;Qiang Liu;Kriti Aggarwal;Zewen Chi;Johan Bjorck;Vishrav Chaudhary;Subhojit Som;Xia Song;Furu Wei", "authorids": "~Shaohan_Huang1;~Li_Dong1;~Wenhui_Wang1;~Yaru_Hao1;~Saksham_Singhal1;~Shuming_Ma1;~Tengchao_Lv1;~Lei_Cui2;~Owais_Khan_Mohammed1;~Barun_Patra1;~Qiang_Liu15;~Kriti_Aggarwal2;~Zewen_Chi1;~Johan_Bjorck2;~Vishrav_Chaudhary1;~Subhojit_Som1;~Xia_Song1;~Furu_Wei1", "gender": "M;M;M;F;;;M;M;M;M;M;;M;M;;M;M;M", "homepage": ";http://dong.li;;https://yaruhao.github.io/;;https://www.microsoft.com/en-us/research/people/shumma/;;https://www.microsoft.com/en-us/research/people/lecu/;;;https://www.linkedin.com/in/qiang-liu-1abb35a2/;;;https://nilsjohanbjorck.github.io/;;;;https://www.microsoft.com/en-us/research/people/fuwei/", "dblp": "176/0380;85/5090-4;37/2855;173/4242;175/5340.html;;254/8010.html;47/5523-1.html;;200/7924;;;220/0954.html;188/6399;;;165/6299;72/5870", "google_scholar": ";wEfQgPgAAAAJ;BxmpMVUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;0LTZGhUAAAAJ;ajJQoUcAAAAJ;;Gwg25AkAAAAJ;YifJ7jAAAAAJ;;MP1GX_0AAAAJ;https://scholar.google.com/citations?hl=en;;n1gxPekAAAAJ;0aPSv9kAAAAJ;G-V1VpwAAAAJ", "orcid": ";;;;;;0000-0001-7548-9566;;;;;;;;;;;", "linkedin": ";;;;;;;;owais-khan-0abb53160/;;;;;;;subhojitsom/;xiaso/;", "or_profile": "~Shaohan_Huang1;~Li_Dong1;~Wenhui_Wang1;~Yaru_Hao1;~Saksham_Singhal1;~Shuming_Ma1;~Tengchao_Lv1;~Lei_Cui2;~Owais_Khan_Mohammed1;~Barun_Patra1;~Qiang_Liu15;~Kriti_Aggarwal2;~Zewen_Chi1;~Johan_Bjorck2;~Vishrav_Chaudhary1;~Subhojit_Som1;~Xia_Song1;~Furu_Wei1", "aff": "Microsoft;Microsoft Research;Microsoft;Microsoft Research Asia;Microsoft;Microsoft;Microsoft;Microsoft Research Asia;;Microsoft;;;Beijing Institute of Technology;Microsoft;;;Microsoft;Microsoft Research", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;;microsoft.com;;;bit.edu.cn;microsoft.com;;;microsoft.com;microsoft.com", "position": "Researcher;Principal Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Principal Researcher;;Applied Scientist;;;PhD student;Researcher;;;Researcher;Distinguished Scientist", "bibtex": "@inproceedings{\nhuang2023language,\ntitle={Language Is Not All You Need: Aligning Perception with Language Models},\nauthor={Shaohan Huang and Li Dong and Wenhui Wang and Yaru Hao and Saksham Singhal and Shuming Ma and Tengchao Lv and Lei Cui and Owais Khan Mohammed and Barun Patra and Qiang Liu and Kriti Aggarwal and Zewen Chi and Johan Bjorck and Vishrav Chaudhary and Subhojit Som and Xia Song and Furu Wei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UpN2wfrLec}\n}", "github": "", "project": "", "reviewers": "BRko;XnP9;oWBZ;tesr", "pdf_size": 475914, "rating": "6;6;6;6", "confidence": "4;4;4;5", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;4;3", "wc_summary": "80;92;47;54", "wc_strengths": "37;56;94;62", "wc_weaknesses": "411;219;230;79", "wc_questions": "4;104;66;2", "wc_limitations": "1;7;1;3", "wc_review": "533;478;438;200", "wc_reply_reviewers": "62;23;6;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 68.25, 18.417043736713012 ], "wc_strengths_avg": [ 62.25, 20.522853115490545 ], "wc_weaknesses_avg": [ 234.75, 117.89057426274587 ], "wc_questions_avg": [ 44.0, 43.15089802078283 ], "wc_limitations_avg": [ 3.0, 2.449489742783178 ], "wc_review_avg": [ 412.25, 127.09912470194277 ], "wc_reply_reviewers_avg": [ 26.75, 21.22940178149163 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 18, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 543, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17880336204125555846&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;;microsoft.com;;;bit.edu.cn;microsoft.com;;;microsoft.com;microsoft.com", "author_num": 18, "aff_unique_index": "0;0;0;0;0;0;0;0;0;1;0;0;0", "aff_unique_norm": "Microsoft;Beijing Institute of Technology", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;http://www.bit.edu.cn/", "aff_unique_abbr": "Microsoft;BIT", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;1;0;0;0;1;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Polynomially Over-Parameterized Convolutional Neural Networks Contain Structured Strong Winning Lottery Tickets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71487", "id": "UqYrYB3dp5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/525338e0d98401a62950bc7c454eb83d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UqYrYB3dp5", "openreview": "https://openreview.net/forum?id=UqYrYB3dp5", "poster": "/media/PosterPDFs/NeurIPS%202023/71487.png?t=1699528090.321876", "slides": "https://nips.cc/virtual/2023/poster/71487", "video": "https://nips.cc/virtual/2023/poster/71487", "author_site": "Arthur da Cunha, Arthur da Cunha, Francesco D'Amore, Natale", "tldr": "", "abstract": "The Strong Lottery Ticket Hypothesis (SLTH) states that randomly-initialised neural networks likely contain subnetworks that perform well without any training. Although unstructured pruning has been extensively studied in this context, its structured counterpart, which can deliver significant computational and memory efficiency gains, has been largely unexplored. One of the main reasons for this gap is the limitations of the underlying mathematical tools used in formal analyses of the SLTH.\n\nIn this paper, we overcome these limitations: we leverage recent advances in the multidimensional generalisation of the Random Subset-Sum Problem and obtain a variant that admits the stochastic dependencies that arise when addressing structured pruning in the SLTH. We apply this result to prove, for a wide class of random Convolutional Neural Networks, the existence of structured subnetworks that can approximate any sufficiently smaller network.\n\nThis result provides the first sub-exponential bound around the SLTH for structured pruning, opening up new avenues for further research on the hypothesis and contributing to the understanding of the role of over-parameterization in deep learning.", "keywords": "lottery ticket hypothesis;convolutional neural network;network pruning;structured pruning;random subset sum", "primary_area": "", "supplementary_material": "", "author": "Arthur da Cunha;Francesco D'Amore;Emanuele Natale", "authorids": "~Arthur_da_Cunha1;~Francesco_D'Amore1;~Emanuele_Natale1", "gender": ";M;M", "homepage": ";https://fdamore95.github.io/;https://www-sop.inria.fr/members/Emanuele.Natale/", "dblp": ";237/7510-1;126/5223", "google_scholar": ";0I0zakAAAAAJ;https://scholar.google.it/citations?user=m2P3BH4AAAAJ", "orcid": ";0000-0001-7498-0660;0000-0002-8755-3892", "linkedin": ";;", "or_profile": "~Arthur_da_Cunha1;~Francesco_D'Amore1;~Emanuele_Natale1", "aff": ";Aalto University;CNRS", "aff_domain": ";aalto.fi;cnrs.fr", "position": ";Postdoc;Researcher", "bibtex": "@inproceedings{\ncunha2023polynomially,\ntitle={Polynomially Over-Parameterized Convolutional Neural Networks Contain Structured Strong Winning Lottery Tickets},\nauthor={Arthur da Cunha and Francesco D'Amore and Emanuele Natale},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UqYrYB3dp5}\n}", "github": "", "project": "", "reviewers": "Ewpx;TQCH;Up5n;ryBt;LtV9", "pdf_size": 470393, "rating": "5;5;6;7;7", "confidence": "1;3;3;3;1", "soundness": "3;2;3;3;4", "novelty": "3;3;3;3;3", "presentation": "3;2;3;3;4", "wc_summary": "141;75;180;124;24", "wc_strengths": "69;58;30;68;162", "wc_weaknesses": "142;242;381;105;51", "wc_questions": "2;109;132;42;196", "wc_limitations": "12;1;18;45;17", "wc_review": "366;485;741;384;450", "wc_reply_reviewers": "21;561;256;7;28", "wc_reply_authors": "0;766;500;0;0", "reply_reviewers": "1;3;2;1;1", "reply_authors": "1;4;2;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 2.2, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 108.8, 54.16788716573686 ], "wc_strengths_avg": [ 77.4, 44.585199337896874 ], "wc_weaknesses_avg": [ 184.2, 116.50476385109752 ], "wc_questions_avg": [ 96.2, 68.14220424964253 ], "wc_limitations_avg": [ 18.6, 14.513442045221387 ], "wc_review_avg": [ 485.2, 134.99837036053435 ], "wc_reply_reviewers_avg": [ 174.6, 214.05849667789408 ], "wc_reply_authors_avg": [ 253.2, 321.31131321508116 ], "reply_reviewers_avg": [ 1.6, 0.8 ], "reply_authors_avg": [ 1.8, 1.1661903789690602 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=481778671851419852&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": ";aalto.fi;cnrs.fr", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Aalto University;Centre National de la Recherche Scientifique", "aff_unique_dep": ";", "aff_unique_url": "https://www.aalto.fi;https://www.cnrs.fr", "aff_unique_abbr": "Aalto;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Finland;France" }, { "title": "Bayesian Optimisation of Functions on Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71486", "id": "UuNd9A6noD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/86419aba4e5eafd2b1009a2e3c540bb0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UuNd9A6noD", "openreview": "https://openreview.net/forum?id=UuNd9A6noD", "poster": "/media/PosterPDFs/NeurIPS%202023/71486.png?t=1701645476.0379305", "slides": "https://nips.cc/virtual/2023/poster/71486", "video": "https://nips.cc/virtual/2023/poster/71486", "author_site": "Xingchen Wan, Pierre Osselin, Henry Kenlay, Binxin Ru, Michael A Osborne, Xiaowen Dong", "tldr": "", "abstract": "The increasing availability of graph-structured data motivates the task of optimising over functions defined on the node set of graphs. Traditional graph search algorithms can be applied in this case, but they may be sample-inefficient and do not make use of information about the function values; on the other hand, Bayesian optimisation is a class of promising black-box solvers with superior sample efficiency, but it has scarcely been applied to such novel setups. To fill this gap, we propose a novel Bayesian optimisation framework that optimises over functions defined on generic, large-scale and potentially unknown graphs. Through the learning of suitable kernels on graphs, our framework has the advantage of adapting to the behaviour of the target function. The local modelling approach further guarantees the efficiency of our method. Extensive experiments on both synthetic and real-world graphs demonstrate the effectiveness of the proposed optimisation framework.", "keywords": "graphs;Bayesian optimisation;scalability", "primary_area": "", "supplementary_material": "", "author": "Xingchen Wan;Pierre Osselin;Henry Kenlay;Binxin Ru;Michael A Osborne;Xiaowen Dong", "authorids": "~Xingchen_Wan1;~Pierre_Osselin1;~Henry_Kenlay1;~Binxin_Ru1;~Michael_A_Osborne1;~Xiaowen_Dong1", "gender": "M;M;;M;;M", "homepage": "https://xingchen.one;https://pierreosselin.github.io/;;;https://web.media.mit.edu/~xdong/;https://www.robots.ox.ac.uk/~mosb/", "dblp": "255/7214;;;;91/9827-1;59/6403", "google_scholar": "6KkohssAAAAJ;;3xBEuKUAAAAJ;https://scholar.google.co.uk/citations?user=4piw-XMAAAAJ;_8tUq8kAAAAJ;https://scholar.google.co.uk/citations?user=iTNcAakAAAAJ", "orcid": "0000-0003-0074-0597;;;;;0000-0003-1959-012X", "linkedin": ";;;;;", "or_profile": "~Xingchen_Wan1;~Pierre_Osselin1;~Henry_Kenlay1;~Binxin_Ru1;~Xiaowen_Dong1;~Michael_Osborne1", "aff": "University of Oxford;University of Oxford;Exscientia;;Massachusetts Institute of Technology;University of Oxford", "aff_domain": "robots.ox.ac.uk;ox.ac.uk;exscientia.co.uk;;mit.edu;oxford.ac.uk", "position": "PhD student;PhD student;Researcher;;Research Affiliate;Full Professor", "bibtex": "@inproceedings{\nwan2023bayesian,\ntitle={Bayesian Optimisation of Functions on Graphs},\nauthor={Xingchen Wan and Pierre Osselin and Henry Kenlay and Binxin Ru and Michael A Osborne and Xiaowen Dong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UuNd9A6noD}\n}", "github": "", "project": "", "reviewers": "tMg5;GnE3;4pXS;nGA9;sVur", "pdf_size": 6006389, "rating": "3;4;5;7;8", "confidence": "4;3;4;4;3", "soundness": "2;3;3;3;4", "novelty": "2;2;2;3;3", "presentation": "2;2;2;3;3", "wc_summary": "69;55;82;149;57", "wc_strengths": "31;79;54;71;17", "wc_weaknesses": "46;72;98;111;10", "wc_questions": "238;294;45;119;68", "wc_limitations": "10;15;1;1;1", "wc_review": "394;515;280;451;153", "wc_reply_reviewers": "23;11;34;13;22", "wc_reply_authors": "127;331;22;15;21", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 5.4, 1.8547236990991407 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 82.4, 34.67333269243094 ], "wc_strengths_avg": [ 50.4, 23.440136518373777 ], "wc_weaknesses_avg": [ 67.4, 36.36261816756324 ], "wc_questions_avg": [ 152.8, 97.10901091041964 ], "wc_limitations_avg": [ 5.6, 5.851495535331117 ], "wc_review_avg": [ 358.6, 128.60886439122305 ], "wc_reply_reviewers_avg": [ 20.6, 8.212186067059124 ], "wc_reply_authors_avg": [ 103.2, 121.31677542697877 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.26413527189768726, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9305490885274768359&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "robots.ox.ac.uk;ox.ac.uk;exscientia.co.uk;;mit.edu;oxford.ac.uk", "author_num": 6, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of Oxford;Exscientia;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ox.ac.uk;https://www.exscientia.co.uk;https://web.mit.edu", "aff_unique_abbr": "Oxford;;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "UP-NeRF: Unconstrained Pose Prior-Free Neural Radiance Field", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71485", "id": "UvBwXdL95b", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d705dd6e77decdc399162d6d5b92f6e8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UvBwXdL95b", "openreview": "https://openreview.net/forum?id=UvBwXdL95b", "poster": "/media/PosterPDFs/NeurIPS%202023/71485.png?t=1701915743.676518", "slides": "https://nips.cc/virtual/2023/poster/71485", "video": "https://nips.cc/virtual/2023/poster/71485", "author_site": "Injae Kim, Minhyuk Choi, Hyunwoo Kim", "tldr": "", "abstract": "Neural Radiance Field (NeRF) has enabled novel view synthesis with high fidelity given images and camera poses. Subsequent works even succeeded in eliminating the necessity of pose priors by jointly optimizing NeRF and camera pose. However, these works are limited to relatively simple settings such as photometrically consistent and occluder-free image collections or a sequence of images from a video. So they have difficulty handling unconstrained images with varying illumination and transient occluders. In this paper, we propose **UP-NeRF** (**U**nconstrained **P**ose-prior-free **Ne**ural **R**adiance **F**ields) to optimize NeRF with unconstrained image collections without camera pose prior. We tackle these challenges with surrogate tasks that optimize color-insensitive feature fields and a separate module for transient occluders to block their influence on pose estimation. In addition, we introduce a candidate head to enable more robust pose estimation and transient-aware depth supervision to minimize the effect of incorrect prior. Our experiments verify the superior performance of our method compared to the baselines including BARF and its variants in a challenging internet photo collection, *Phototourism dataset*. The code of UP-NeRF is available at https://github.com/mlvlab/UP-NeRF.", "keywords": "neural radiance field;pose estimation", "primary_area": "", "supplementary_material": "/attachment/1d1abfe7922c3212edb6db4ed7a359e196a97c84.pdf", "author": "Injae Kim;Minhyuk Choi;Hyunwoo J. Kim", "authorids": "~Injae_Kim1;~Minhyuk_Choi2;~Hyunwoo_J._Kim3", "gender": "M;M;M", "homepage": ";https://github.com/cmh1027;https://hyunwoojkim.com/publications", "dblp": "331/2427;360/4968;150/4259", "google_scholar": ";https://scholar.google.co.kr/citations?user=1n-0E7YAAAAJ;https://scholar.google.co.kr/citations?user=LfBoJt8AAAAJ", "orcid": ";;0000-0002-2181-9264", "linkedin": "%EC%9D%B8%EC%9E%AC-%EA%B9%80-8713b021a;%EC%B5%9C%EB%AF%BC%ED%98%81-%ED%95%99%EB%B6%80%EC%A1%B8%EC%97%85-%EC%BB%B4%ED%93%A8%ED%84%B0%ED%95%99%EA%B3%BC-%E2%80%8D-439908261/;", "or_profile": "~Injae_Kim1;~Minhyuk_Choi2;~Hyunwoo_Kim1", "aff": "Korea University;Korea University;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr", "position": "MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nkim2023upnerf,\ntitle={{UP}-Ne{RF}: Unconstrained Pose Prior-Free Neural Radiance Field},\nauthor={Injae Kim and Minhyuk Choi and Hyunwoo J. Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UvBwXdL95b}\n}", "github": "", "project": "", "reviewers": "FYmx;Kmtq;qVYM;6T1t;7yJa", "pdf_size": 9851208, "rating": "6;6;7;7;7", "confidence": "5;4;4;4;4", "soundness": "3;3;3;4;3", "novelty": "2;2;3;3;3", "presentation": "2;2;2;4;3", "wc_summary": "73;100;70;142;68", "wc_strengths": "59;133;93;143;82", "wc_weaknesses": "69;503;246;89;106", "wc_questions": "12;58;20;76;4", "wc_limitations": "27;25;16;52;1", "wc_review": "240;819;445;502;261", "wc_reply_reviewers": "129;71;0;22;14", "wc_reply_authors": "146;52;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 90.6, 28.196453677723376 ], "wc_strengths_avg": [ 102.0, 31.534108517603602 ], "wc_weaknesses_avg": [ 202.6, 162.6094708188917 ], "wc_questions_avg": [ 34.0, 28.0 ], "wc_limitations_avg": [ 24.2, 16.65412861725284 ], "wc_review_avg": [ 453.4, 209.11872226082485 ], "wc_reply_reviewers_avg": [ 47.2, 47.37678756522017 ], "wc_reply_authors_avg": [ 39.6, 56.88444427081977 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6123724356957946, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3920048135923638244&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "korea.ac.kr;korea.ac.kr;korea.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Guiding Large Language Models via Directional Stimulus Prompting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71484", "id": "UvIN8oQ4uI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c5601d99ed028448f29d1dae2e4a926d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UvIN8oQ4uI", "openreview": "https://openreview.net/forum?id=UvIN8oQ4uI", "poster": "/media/PosterPDFs/NeurIPS%202023/71484.png?t=1701901277.577289", "slides": "https://nips.cc/virtual/2023/poster/71484", "video": "https://nips.cc/virtual/2023/poster/71484", "author_site": "Zekun Li, Baolin Peng, Pengcheng He, Michel Galley, Jianfeng Gao, Xifeng Yan", "tldr": "", "abstract": "We introduce Directional Stimulus Prompting, a novel framework for guiding black-box large language models (LLMs) towards specific desired outputs. Instead of directly adjusting LLMs, our method employs a small tunable policy model (e.g., T5) to generate an auxiliary directional stimulus prompt for each input instance. These directional stimulus prompts act as nuanced, instance-specific hints and clues to guide LLMs in generating desired outcomes, such as including specific keywords in the generated summary. Our approach sidesteps the challenges of direct LLM tuning by optimizing the policy model to explore directional stimulus prompts that align LLMs with desired behaviors. The policy model can be optimized through 1) supervised fine-tuning using labeled data and 2) reinforcement learning from offline or online rewards based on the LLM's output. We evaluate our method across various tasks, including summarization, dialogue response generation, and chain-of-thought reasoning. Our experiments indicate a consistent improvement in the performance of LLMs such as ChatGPT, Codex, and InstructGPT on these supervised tasks with minimal labeled data. Remarkably, by utilizing merely 80 dialogues from the MultiWOZ dataset, our approach boosts ChatGPT's performance by a relative 41.4%, achieving or exceeding the performance of some fully supervised state-of-the-art models. Moreover, the instance-specific chain-of-thought prompt generated through our method enhances InstructGPT's reasoning accuracy, outperforming both generalized human-crafted prompts and those generated through automatic prompt engineering. The code and data are publicly available at https://github.com/Leezekun/Directional-Stimulus-Prompting.", "keywords": "Black-box Large Language Models;Directional Stimulus Prompting;Hint;Reinforcement learning;Prompt optimization", "primary_area": "", "supplementary_material": "/attachment/9198017f09ff3e83ebc0235e7f044c70ef784601.zip", "author": "Zekun Li;Baolin Peng;Pengcheng He;Michel Galley;Jianfeng Gao;Xifeng Yan", "authorids": "~Zekun_Li2;~Baolin_Peng2;~Pengcheng_He2;~Michel_Galley1;~Jianfeng_Gao1;~Xifeng_Yan1", "gender": ";M;M;M;M;", "homepage": ";;;http://research.microsoft.com/~mgalley;https://www.microsoft.com/en-us/research/people/jfgao/;https://sites.cs.ucsb.edu/~xyan/", "dblp": ";144/2759;116/8665;05/3289;92/5339;y/XifengYan", "google_scholar": ";u1CNjgwAAAAJ;https://scholar.google.com/citations?hl=en;rs1M7CAAAAAJ;https://scholar.google.com/citations?hl=en;XZV2eogAAAAJ", "orcid": ";;;0000-0002-3310-1831;;", "linkedin": ";;;michelgalley;;", "or_profile": "~Zekun_Li2;~Baolin_Peng2;~Pengcheng_He2;~Michel_Galley1;~Jianfeng_Gao1;~Xifeng_Yan1", "aff": ";Tencent AI Lab;Microsoft;Microsoft;Microsoft Research;UC Santa Barbara", "aff_domain": ";tencent.com;microsoft.com;microsoft.com;microsoft.com;ucsb.edu", "position": ";Researcher;Principal Researcher;Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nli2023guiding,\ntitle={Guiding Large Language Models via Directional Stimulus Prompting},\nauthor={Zekun Li and Baolin Peng and Pengcheng He and Michel Galley and Jianfeng Gao and Xifeng Yan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UvIN8oQ4uI}\n}", "github": "", "project": "", "reviewers": "2Z7n;FEWZ;RT8N;14f8;JgjJ", "pdf_size": 2056689, "rating": "4;6;6;6;7", "confidence": "4;4;4;2;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "4;4;3;3;3", "wc_summary": "81;78;65;70;130", "wc_strengths": "36;96;82;57;80", "wc_weaknesses": "316;179;33;72;87", "wc_questions": "9;150;3;1;18", "wc_limitations": "24;29;24;1;1", "wc_review": "466;532;207;201;316", "wc_reply_reviewers": "110;0;0;0;30", "wc_reply_authors": "325;0;0;0;0", "reply_reviewers": "1;0;0;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 84.8, 23.301502097504358 ], "wc_strengths_avg": [ 70.2, 21.188676221038445 ], "wc_weaknesses_avg": [ 137.4, 101.33627188721717 ], "wc_questions_avg": [ 36.2, 57.20629336008408 ], "wc_limitations_avg": [ 15.8, 12.221292893961751 ], "wc_review_avg": [ 344.4, 134.334805616415 ], "wc_reply_reviewers_avg": [ 28.0, 42.61455150532503 ], "wc_reply_authors_avg": [ 65.0, 130.0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4082482904638631, "gs_citation": 117, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1373708663828630221&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";tencent.com;microsoft.com;microsoft.com;microsoft.com;ucsb.edu", "author_num": 6, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Tencent;Microsoft;University of California, Santa Barbara", "aff_unique_dep": "Tencent AI Lab;Microsoft Corporation;", "aff_unique_url": "https://ai.tencent.com;https://www.microsoft.com;https://www.ucsb.edu", "aff_unique_abbr": "Tencent AI Lab;Microsoft;UCSB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Pgx: Hardware-Accelerated Parallel Game Simulators for Reinforcement Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73576", "id": "UvX8QfhfUx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8f153093758af93861a74a1305dfdc18-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=UvX8QfhfUx", "openreview": "https://openreview.net/forum?id=UvX8QfhfUx", "poster": "/media/PosterPDFs/NeurIPS%202023/73576.png?t=1701751848.2176826", "slides": "https://nips.cc/virtual/2023/poster/73576", "video": "https://nips.cc/virtual/2023/poster/73576", "author_site": "Sotetsu Koyamada, Shinri Okano, Soichiro Nishimori, Yu Murata, Keigo Habara, Haruka Kita, Shin Ishii", "tldr": "", "abstract": "We propose Pgx, a suite of board game reinforcement learning (RL) environments written in JAX and optimized for GPU/TPU accelerators. By leveraging JAX's auto-vectorization and parallelization over accelerators, Pgx can efficiently scale to thousands of simultaneous simulations over accelerators. In our experiments on a DGX-A100 workstation, we discovered that Pgx can simulate RL environments 10-100x faster than existing implementations available in Python. Pgx includes RL environments commonly used as benchmarks in RL research, such as backgammon, chess, shogi, and Go. Additionally, Pgx offers miniature game sets and baseline models to facilitate rapid research cycles. We demonstrate the efficient training of the Gumbel AlphaZero algorithm with Pgx environments. Overall, Pgx provides high-performance environment simulators for researchers to accelerate their RL experiments. Pgx is available at https://github.com/sotetsuk/pgx.", "keywords": "Reinforcement learning;Game AI;AlphaZero;Multi-agent", "primary_area": "", "supplementary_material": "", "author": "Sotetsu Koyamada;Shinri Okano;Soichiro Nishimori;Yu Murata;Keigo Habara;Haruka Kita;Shin Ishii", "authorids": "~Sotetsu_Koyamada1;~Shinri_Okano1;~Soichiro_Nishimori1;~Yu_Murata1;~Keigo_Habara1;~Haruka_Kita2;~Shin_Ishii1", "gender": ";M;M;M;;M;M", "homepage": ";;https://scholar.google.co.jp/citations?hl=ja&user=swJkeuUAAAAJ;;https://github.com/habara-k;;", "dblp": ";330/1786;330/0903;;330/1400;344/1744;27/3253.html", "google_scholar": ";https://scholar.google.co.jp/scholar?q=author%3A%22OKANO+Shinri%22;https://scholar.google.co.jp/citations?hl=ja;https://scholar.google.co.jp/citations?user=5Xxs2loAAAAJ;;;https://scholar.google.co.jp/citations?hl=en", "orcid": ";;;;;;", "linkedin": ";;;;;haruka-kita-45b88a214;", "or_profile": "~Sotetsu_Koyamada1;~Shinri_Okano1;~Soichiro_Nishimori1;~Yu_Murata1;~Keigo_Habara1;~Haruka_Kita2;~Shin_Ishii1", "aff": ";Kyoto University;The University of Tokyo;Kyoto University, Tokyo Institute of Technology;Kyoto University;kyoto university;Kyoto University", "aff_domain": ";kyoto-u.ac.jp;u-tokyo.ac.jp;kyoto-u.ac.jp;kyoto-u.ac.jp;kyoto-u.ac.jp;kyoto-u.ac.jp", "position": ";Undergrad student;MS student;Undergrad student;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nkoyamada2023pgx,\ntitle={Pgx: Hardware-Accelerated Parallel Game Simulators for Reinforcement Learning},\nauthor={Sotetsu Koyamada and Shinri Okano and Soichiro Nishimori and Yu Murata and Keigo Habara and Haruka Kita and Shin Ishii},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=UvX8QfhfUx}\n}", "github": "", "project": "", "reviewers": "uMKG;fFc4;5n9R;bSTx;e7SA", "pdf_size": 920566, "rating": "6;7;7;7;9", "confidence": "5;4;3;4;4", "wc_summary_and_contributions": "73;49;225;96;103", "wc_strengths": "26;34;38;86;147", "wc_improvement": "74;195;24;546;22", "wc_limitations": "44;85;10;23;1", "wc_correctness": "78;44;2;17;1", "wc_clarity": "1;25;1;51;1", "wc_relation_to_prior_work": "1;14;16;29;1", "wc_documentation": "52;11;1;17;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "350;458;318;866;278", "wc_reply_reviewers": "0;101;35;71;20", "wc_reply_authors": "354;730;408;1116;167", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;2;3;2", "rating_avg": [ 7.2, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 109.2, 60.92093236318696 ], "wc_strengths_avg": [ 66.2, 45.53855509345899 ], "wc_improvement_avg": [ 172.2, 197.1866121216144 ], "wc_limitations_avg": [ 32.6, 29.923903488682754 ], "wc_correctness_avg": [ 28.4, 29.261578904768623 ], "wc_clarity_avg": [ 15.8, 19.903768487399567 ], "wc_relation_to_prior_work_avg": [ 12.2, 10.49571341072154 ], "wc_documentation_avg": [ 16.4, 18.821264569629747 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 454.0, 214.49848484313355 ], "wc_reply_reviewers_avg": [ 45.4, 36.224853346839105 ], "wc_reply_authors_avg": [ 555.0, 334.0359262115379 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3227486121839514, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6041515382989592693&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";kyoto-u.ac.jp;u-tokyo.ac.jp;kyoto-u.ac.jp;kyoto-u.ac.jp;kyoto-u.ac.jp;kyoto-u.ac.jp", "author_num": 7, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Kyoto University;University of Tokyo", "aff_unique_dep": ";", "aff_unique_url": "https://www.kyoto-u.ac.jp;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "Kyoto U;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Japan" }, { "title": "$k$-Means Clustering with Distance-Based Privacy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71483", "id": "UzUhiKACmS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3e8d9bf1dd1eb9d3d9d500fb3543c87b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=UzUhiKACmS", "openreview": "https://openreview.net/forum?id=UzUhiKACmS", "poster": "/media/PosterPDFs/NeurIPS%202023/71483.png?t=1702335578.7740486", "slides": "https://nips.cc/virtual/2023/poster/71483", "video": "https://nips.cc/virtual/2023/poster/71483", "author_site": "Alessandro Epasto, Vahab Mirrokni, Shyam Narayanan, Peilin Zhong", "tldr": "", "abstract": "In this paper, we initiate the study of Euclidean clustering with Distance-based privacy. Distance-based privacy is motivated by the fact that it is often only needed to protect the privacy of exact, rather than approximate, locations. We provide constant-approximate algorithms for $k$-means and $k$-median clustering, with additive error depending only on the attacker's precision bound $\\rho$, rather than the radius $\\Lambda$ of the space. In addition, we empirically demonstrate that our algorithm performs significantly better than previous differentially private clustering algorithms, as well as naive distance-based private clustering baselines.", "keywords": "differential Privacy;k-means;k-median;clustering;distance-based privacy", "primary_area": "", "supplementary_material": "", "author": "Alessandro Epasto;Vahab Mirrokni;Shyam Narayanan;Peilin Zhong", "authorids": "~Alessandro_Epasto3;~Vahab_Mirrokni2;~Shyam_Narayanan1;~Peilin_Zhong1", "gender": "M;M;M;M", "homepage": "https://epasto.org;https://people.csail.mit.edu/mirrokni/Welcome.html;https://sites.google.com/view/shyamnarayanan/home;http://www.cs.columbia.edu/~peilin/", "dblp": "58/7802;m/VahabSMirrokni;222/2805;148/9632", "google_scholar": "https://scholar.google.com/citations?hl=en;opbZfw0AAAAJ;CTT44Y0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-0456-3217;;;", "linkedin": "https://www.linkedin.com/pub/alessandro-epasto/85/649/733/;;;", "or_profile": "~Alessandro_Epasto3;~Vahab_Mirrokni2;~Shyam_Narayanan1;~Peilin_Zhong1", "aff": "Google;Google Research;Massachusetts Institute of Technology;Google", "aff_domain": "google.com;google.com;mit.edu;google.com", "position": "Research Scientist;VP, Google Fellow;PhD student;Researcher", "bibtex": "@inproceedings{\nepasto2023kmeans,\ntitle={\\$k\\$-Means Clustering with Distance-Based Privacy},\nauthor={Alessandro Epasto and Vahab Mirrokni and Shyam Narayanan and Peilin Zhong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=UzUhiKACmS}\n}", "github": "", "project": "", "reviewers": "1C7X;CEv9;vkqo;nn8B", "pdf_size": 625994, "rating": "4;5;6;8", "confidence": "4;3;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;1;3;3", "wc_summary": "97;106;29;82", "wc_strengths": "15;50;37;73", "wc_weaknesses": "276;283;51;47", "wc_questions": "1;39;56;306", "wc_limitations": "1;41;61;9", "wc_review": "390;519;234;517", "wc_reply_reviewers": "139;58;0;8", "wc_reply_authors": "486;38;51;31", "reply_reviewers": "2;1;0;1", "reply_authors": "5;2;3;2", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 78.5, 29.837057495671385 ], "wc_strengths_avg": [ 43.75, 21.016362672927016 ], "wc_weaknesses_avg": [ 164.25, 115.28524406878792 ], "wc_questions_avg": [ 100.5, 120.3048211835253 ], "wc_limitations_avg": [ 28.0, 24.228082879171435 ], "wc_review_avg": [ 415.0, 116.83963368651924 ], "wc_reply_reviewers_avg": [ 51.25, 55.32348054849767 ], "wc_reply_authors_avg": [ 151.5, 193.25695330310887 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.29277002188455997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6320148994421384840&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "google.com;google.com;mit.edu;google.com", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Google;Massachusetts Institute of Technology", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://web.mit.edu", "aff_unique_abbr": "Google;MIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Path to Simpler Models Starts With Noise", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71482", "id": "Uzi22WryyX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0a49935d2b3d3342ca08d6db0adcfa34-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Uzi22WryyX", "openreview": "https://openreview.net/forum?id=Uzi22WryyX", "poster": "/media/PosterPDFs/NeurIPS%202023/71482.png?t=1701869009.040679", "slides": "https://nips.cc/virtual/2023/poster/71482", "video": "https://nips.cc/virtual/2023/poster/71482", "author_site": "Lesia Semenova, Harry Chen, Ronald Parr, Cynthia Rudin", "tldr": "", "abstract": "The Rashomon set is the set of models that perform approximately equally well on a given dataset, and the Rashomon ratio is the fraction of all models in a given hypothesis space that are in the Rashomon set. Rashomon ratios are often large for tabular datasets in criminal justice, healthcare, lending, education, and in other areas, which has practical implications about whether simpler models can attain the same level of accuracy as more complex models. An open question is why Rashomon ratios often tend to be large. In this work, we propose and study a mechanism of the data generation process, coupled with choices usually made by the analyst during the learning process, that determines the size of the Rashomon ratio. Specifically, we demonstrate that noisier datasets lead to larger Rashomon ratios through the way that practitioners train models. Additionally, we introduce a measure called pattern diversity, which captures the average difference in predictions between distinct classification patterns in the Rashomon set, and motivate why it tends to increase with label noise. Our results explain a key aspect of why simpler models often tend to perform as well as black box models on complex, noisier datasets.", "keywords": "Rashomon Set;Simplicity;Interpretable Machine Learning;Model Selection;Model Multiplicity", "primary_area": "", "supplementary_material": "/attachment/de315f08aa5056098be83dcb631ee63110ce743f.zip", "author": "Lesia Semenova;Harry Chen;Ronald Parr;Cynthia Rudin", "authorids": "~Lesia_Semenova1;harry.chen084@duke.edu;~Ronald_Parr1;~Cynthia_Rudin1", "gender": ";;Not Specified;", "homepage": ";;https://users.cs.duke.edu/~parr/;", "dblp": ";;26/4670;", "google_scholar": ";;https://scholar.google.com.tw/citations?user=b-GJ3QIAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Lesia_Semenova1;harry.chen084@duke.edu;~Ronald_Parr1;~Cynthia_Rudin1", "aff": ";;Duke University;", "aff_domain": ";;duke.edu;", "position": ";;Full Professor;", "bibtex": "@inproceedings{\nsemenova2023a,\ntitle={A Path to Simpler Models Starts With Noise},\nauthor={Lesia Semenova and Harry Chen and Ronald Parr and Cynthia Rudin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Uzi22WryyX}\n}", "github": "", "project": "", "reviewers": "y6w5;26ns;CJvU;9wZ5", "pdf_size": 1615996, "rating": "5;6;6;6", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "92;71;90;165", "wc_strengths": "81;48;38;124", "wc_weaknesses": "237;204;91;212", "wc_questions": "101;29;1;79", "wc_limitations": "8;1;24;72", "wc_review": "519;353;244;652", "wc_reply_reviewers": "160;21;0;90", "wc_reply_authors": "407;0;0;88", "reply_reviewers": "2;1;0;1", "reply_authors": "3;1;1;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 104.5, 35.878266401820476 ], "wc_strengths_avg": [ 72.75, 33.5959446957516 ], "wc_weaknesses_avg": [ 186.0, 56.182737562350944 ], "wc_questions_avg": [ 52.5, 39.55692101263697 ], "wc_limitations_avg": [ 26.25, 27.698149757700424 ], "wc_review_avg": [ 442.0, 155.8476820488518 ], "wc_reply_reviewers_avg": [ 67.75, 62.810727586933744 ], "wc_reply_authors_avg": [ 123.75, 167.43412883877647 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13397636642937957388&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 10, "email": ";;duke.edu;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Effective Human-AI Teams via Learned Natural Language Rules and Onboarding", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71481", "id": "V2yFumwo5B", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/61355b9c218505505d1bedede9da56b2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=V2yFumwo5B", "openreview": "https://openreview.net/forum?id=V2yFumwo5B", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71481", "video": "https://nips.cc/virtual/2023/poster/71481", "author_site": "Hussein Mozannar, Jimin Lee, Dennis Wei, Prasanna Sattigeri, Subhro Das, David Sontag", "tldr": "", "abstract": "People are relying on AI agents to assist them with various tasks. The human must know when to rely on the agent, collaborate with the agent, or ignore its suggestions. In this work, we propose to learn rules grounded in data regions and described in natural language that illustrate how the human should collaborate with the AI. Our novel region discovery algorithm finds local regions in the data as neighborhoods in an embedding space that corrects the human prior. Each region is then described using an iterative and contrastive procedure where a large language model describes the region. We then teach these rules to the human via an onboarding stage. Through user studies on object detection and question-answering tasks, we show that our method can lead to more accurate human-AI teams. We also evaluate our region discovery and description algorithms separately.", "keywords": "human-ai;collaboration;onboarding;region-discovery;LLM;data description", "primary_area": "", "supplementary_material": "", "author": "Hussein Mozannar;Jimin J Lee;Dennis Wei;Prasanna Sattigeri;Subhro Das;David Sontag", "authorids": "~Hussein_Mozannar1;~Jimin_J_Lee1;~Dennis_Wei1;~Prasanna_Sattigeri1;~Subhro_Das1;~David_Sontag1", "gender": "M;F;M;;;M", "homepage": "https://husseinmozannar.github.io/;;https://sites.google.com/site/dennislwei/;;;http://people.csail.mit.edu/dsontag/", "dblp": ";;59/8761;00/7428;;12/673", "google_scholar": "XCfZyIkAAAAJ;;r4ldy4AAAAAJ;m-s38ikAAAAJ;;LfcroyAAAAAJ", "orcid": ";;;0000-0003-4435-0486;;0000-0002-5034-7796", "linkedin": ";gminnout/;dennis-wei-4886036b/;prasannasattigeri/;;", "or_profile": "~Hussein_Mozannar1;~Jimin_J_Lee1;~Dennis_Wei1;~Prasanna_Sattigeri1;~Subhro_Das1;~David_Sontag1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;International Business Machines;IBM Research;;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;ibm.com;ibm.com;;mit.edu", "position": "PhD student;Undergrad student;Research Staff Member;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nmozannar2023effective,\ntitle={Effective Human-{AI} Teams via Learned Natural Language Rules and Onboarding},\nauthor={Hussein Mozannar and Jimin J Lee and Dennis Wei and Prasanna Sattigeri and Subhro Das and David Sontag},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=V2yFumwo5B}\n}", "github": "", "project": "", "reviewers": "Ea7z;YAuF;doL5;oGDF", "pdf_size": 17868308, "rating": "5;6;6;8", "confidence": "3;4;5;4", "soundness": "3;2;3;3", "novelty": "3;4;3;3", "presentation": "2;3;3;3", "wc_summary": "87;147;85;104", "wc_strengths": "23;24;40;139", "wc_weaknesses": "32;144;452;59", "wc_questions": "28;3;197;46", "wc_limitations": "6;219;45;46", "wc_review": "176;537;819;394", "wc_reply_reviewers": "38;752;0;0", "wc_reply_authors": "28;1159;0;0", "reply_reviewers": "1;5;0;0", "reply_authors": "2;5;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 105.75, 24.933661985356263 ], "wc_strengths_avg": [ 56.5, 48.106652346634974 ], "wc_weaknesses_avg": [ 171.75, 166.9975673475515 ], "wc_questions_avg": [ 68.5, 75.74463677383369 ], "wc_limitations_avg": [ 79.0, 82.42269105046256 ], "wc_review_avg": [ 481.5, 233.43789323929394 ], "wc_reply_reviewers_avg": [ 197.5, 320.51638023664253 ], "wc_reply_authors_avg": [ 296.75, 497.95149111133304 ], "reply_reviewers_avg": [ 1.5, 2.0615528128088303 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3244428422615251, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=25245877711144656&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "mit.edu;mit.edu;ibm.com;ibm.com;;mit.edu", "author_num": 6, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Massachusetts Institute of Technology;International Business Machines Corporation;IBM", "aff_unique_dep": ";;IBM Research", "aff_unique_url": "https://web.mit.edu;https://www.ibm.com;https://www.ibm.com/research", "aff_unique_abbr": "MIT;IBM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Temporal Conditioning Spiking Latent Variable Models of the Neural Response to Natural Visual Scenes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71480", "id": "V4YeOvsQfu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0bcf9cf6ffe26bba3af99e18be0e1d8d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=V4YeOvsQfu", "openreview": "https://openreview.net/forum?id=V4YeOvsQfu", "poster": "/media/PosterPDFs/NeurIPS%202023/71480.png?t=1698131412.3568482", "slides": "https://nips.cc/virtual/2023/poster/71480", "video": "https://nips.cc/virtual/2023/poster/71480", "author_site": "Gehua Ma, Runhao Jiang, Rui Yan, Huajin Tang", "tldr": "", "abstract": "Developing computational models of neural response is crucial for understanding sensory processing and neural computations. Current state-of-the-art neural network methods use temporal filters to handle temporal dependencies, resulting in an **unrealistic and inflexible processing paradigm**. Meanwhile, these methods target **trial-averaged firing rates** and fail to capture important features in spike trains. This work presents the temporal conditioning spiking latent variable models (***TeCoS-LVM***) to simulate the neural response to natural visual stimuli. We use spiking neurons to produce spike outputs that directly match the recorded trains. This approach helps to avoid losing information embedded in the original spike trains. We exclude the temporal dimension from the model parameter space and introduce a temporal conditioning operation to allow the model to adaptively explore and exploit temporal dependencies in stimuli sequences in a **natural paradigm**. We show that TeCoS-LVM models can produce more realistic spike activities and accurately fit spike statistics than powerful alternatives. Additionally, learned TeCoS-LVM models can generalize well to longer time scales. Overall, while remaining computationally tractable, our model effectively captures key features of neural coding systems. It thus provides a useful tool for building accurate predictive computational accounts for various sensory perception circuits.", "keywords": "neuroscience;neural coding;sensory neuroscience;visual coding;SNN;spiking neural networks;generative model;latent variable model;cognitive computational neuroscience;computational neuroscience", "primary_area": "", "supplementary_material": "/attachment/01884dc3103de5985de3ff10689adab2f1fe3fea.pdf", "author": "Gehua Ma;Runhao Jiang;Rui Yan;Huajin Tang", "authorids": "~Gehua_Ma1;~Runhao_Jiang1;~Rui_Yan9;~Huajin_Tang1", "gender": "M;M;F;M", "homepage": "https://genema.github.io;;;https://person.zju.edu.cn/htang", "dblp": "https://dblp.uni-trier.de/pid/348/6861;282/7758;;18/434", "google_scholar": ";inz6ULsAAAAJ;QnEkkWQAAAAJ;U041O4QAAAAJ", "orcid": ";;;", "linkedin": ";runhao-jiang-b22356163/;;", "or_profile": "~Gehua_Ma1;~Runhao_Jiang1;~Rui_Yan9;~Huajin_Tang1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University of Technology;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zjut.edu.cn;zju.edu.cn", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nma2023temporal,\ntitle={Temporal Conditioning Spiking Latent Variable Models of the Neural Response to Natural Visual Scenes},\nauthor={Gehua Ma and Runhao Jiang and Rui Yan and Huajin Tang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=V4YeOvsQfu}\n}", "github": "", "project": "", "reviewers": "j9bD;qcrE;qZ33;QFPa", "pdf_size": 4350005, "rating": "5;6;6;7", "confidence": "2;3;3;3", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "67;69;46;120", "wc_strengths": "74;96;50;21", "wc_weaknesses": "186;355;108;109", "wc_questions": "233;8;56;141", "wc_limitations": "5;17;9;8", "wc_review": "565;545;269;399", "wc_reply_reviewers": "95;27;29;92", "wc_reply_authors": "271;38;34;199", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 75.5, 27.225906780123964 ], "wc_strengths_avg": [ 60.25, 27.896012259819503 ], "wc_weaknesses_avg": [ 189.5, 100.65411069598697 ], "wc_questions_avg": [ 109.5, 85.74526225978903 ], "wc_limitations_avg": [ 9.75, 4.437059837324712 ], "wc_review_avg": [ 444.5, 119.88640456699 ], "wc_reply_reviewers_avg": [ 60.75, 32.774799770555425 ], "wc_reply_authors_avg": [ 135.5, 102.71440989461995 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18320522000415697573&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "zju.edu.cn;zju.edu.cn;zjut.edu.cn;zju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Zhejiang University;Zhejiang University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.zjut.edu.cn", "aff_unique_abbr": "ZJU;ZJUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Perceptual Kalman Filters: Online State Estimation under a Perfect Perceptual-Quality Constraint", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71479", "id": "V4hqq2NGTW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c7f43ada17acc234f568dc66da527418-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=V4hqq2NGTW", "openreview": "https://openreview.net/forum?id=V4hqq2NGTW", "poster": "/media/PosterPDFs/NeurIPS%202023/71479.png?t=1699544143.6985004", "slides": "https://nips.cc/virtual/2023/poster/71479", "video": "https://nips.cc/virtual/2023/poster/71479", "author_site": "Dror Freirich, Tomer Michaeli, Ron Meir", "tldr": "", "abstract": "Many practical settings call for the reconstruction of temporal signals from corrupted or missing data. Classic examples include decoding, tracking, signal enhancement and denoising. Since the reconstructed signals are ultimately viewed by humans, it is desirable to achieve reconstructions that are pleasing to human perception.\nMathematically, perfect perceptual-quality is achieved when the distribution of restored signals is the same as that of natural signals, a requirement which has been heavily researched in static estimation settings (i.e. when a whole signal is processed at once). \nHere, we study the problem of optimal causal filtering under a perfect perceptual-quality constraint, which is a task of fundamentally different nature. \nSpecifically, we analyze a Gaussian Markov signal observed through a linear noisy transformation. In the absence of perceptual constraints, the Kalman filter is known to be optimal in the MSE sense for this setting. Here, we show that adding the perfect perceptual quality constraint (i.e. the requirement of temporal consistency), introduces a fundamental dilemma whereby the filter may have to ``knowingly'' ignore new information revealed by the observations in order to conform to its past decisions. This often comes at the cost of a significant increase in the MSE (beyond that encountered in static settings). Our analysis goes beyond the classic innovation process of the Kalman filter, and introduces the novel concept of an unutilized information process. Using this tool, we present a recursive formula for perceptual filters, and demonstrate the qualitative effects of perfect perceptual-quality estimation on a video reconstruction problem.", "keywords": "Kalman filter;estimation theory;causal filtering;signal processing;distortion-perception tradeoff", "primary_area": "", "supplementary_material": "/attachment/7d885ad19bf2c637e6fb5d2f4f82ac6f9690b43e.zip", "author": "Dror Freirich;Tomer Michaeli;Ron Meir", "authorids": "~Dror_Freirich1;~Tomer_Michaeli1;~Ron_Meir1", "gender": "M;M;M", "homepage": ";https://tomer.net.technion.ac.il/;https://ronmeir.net.technion.ac.il/", "dblp": "180/5279;70/3188.html;", "google_scholar": "mb4KtXcAAAAJ;n2EbR2cAAAAJ;https://scholar.google.co.il/citations?user=r3NAa9oAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Dror_Freirich1;~Tomer_Michaeli1;~Ron_Meir1", "aff": "Technion - Israel Institute of Technology, Technion;Technion, Technion;Technion, Technion", "aff_domain": "technion.ac.il;technion.ac.il;technion.ac.il", "position": "Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nfreirich2023perceptual,\ntitle={Perceptual Kalman Filters: Online State Estimation under a Perfect Perceptual-Quality Constraint},\nauthor={Dror Freirich and Tomer Michaeli and Ron Meir},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=V4hqq2NGTW}\n}", "github": "", "project": "", "reviewers": "3Cww;c5sG;j8n4;ZP1L", "pdf_size": 3703801, "rating": "4;6;6;7", "confidence": "4;3;4;2", "soundness": "3;4;4;3", "novelty": "2;3;3;3", "presentation": "2;3;4;4", "wc_summary": "53;51;105;65", "wc_strengths": "39;34;70;81", "wc_weaknesses": "24;31;105;46", "wc_questions": "72;12;19;3", "wc_limitations": "23;1;11;12", "wc_review": "211;129;310;207", "wc_reply_reviewers": "22;11;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 68.5, 21.742814905158898 ], "wc_strengths_avg": [ 56.0, 19.96246477767713 ], "wc_weaknesses_avg": [ 51.5, 31.894356867634123 ], "wc_questions_avg": [ 26.5, 26.874709300753377 ], "wc_limitations_avg": [ 11.75, 7.790218225441442 ], "wc_review_avg": [ 214.25, 64.22373003804746 ], "wc_reply_reviewers_avg": [ 8.25, 9.12071817347735 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7608859102526822, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7185247338138457507&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "technion.ac.il;technion.ac.il;technion.ac.il", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "title": "Strategic Behavior in Two-sided Matching Markets with Prediction-enhanced Preference-formation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71478", "id": "V5FNSilWiC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/70c6d82d27cd96c501c4def4803d5782-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=V5FNSilWiC", "openreview": "https://openreview.net/forum?id=V5FNSilWiC", "poster": "/media/PosterPDFs/NeurIPS%202023/71478.png?t=1699606636.890206", "slides": "https://nips.cc/virtual/2023/poster/71478", "video": "https://nips.cc/virtual/2023/poster/71478", "author_site": "Stefania Ionescu, Yuhao Du, Kenneth Joseph, Ancsa Hannak", "tldr": "", "abstract": "Two-sided matching markets have long existed to pair agents in the absence of regulated exchanges. A common example is school choice, where a matching mechanism uses student and school preferences to assign students to schools. In such settings, forming preferences is both difficult and critical. Prior work has suggested various prediction mechanisms that help agents make decisions about their preferences. Although often deployed together, these matching and prediction mechanisms are almost always analyzed separately. The present work shows that at the intersection of the two lies a previously unexplored type of strategic behavior: agents returning to the market (e.g., schools) can attack future predictions by interacting short-term non-optimally with their matches. Here, we first introduce this type of strategic behavior, which we call an adversarial interaction attack. Next, we construct a formal economic model that captures the feedback loop between prediction mechanisms designed to assist agents and the matching mechanism used to pair them. Finally, in a simplified setting, we prove that returning agents can benefit from using adversarial interaction attacks and gain progressively more as the trust in and accuracy of predictions increases. We also show that this attack increases inequality in the student population.", "keywords": "matching markets;strategic behaviour;ML-based forecasting;recommender systems;adversarial attacks;agent-based modelling", "primary_area": "", "supplementary_material": "/attachment/c3627c20a0f0a19cf2fea06b27feefffdd311d54.pdf", "author": "Stefania Ionescu;Yuhao Du;Kenneth Joseph;Aniko Hannak", "authorids": "~Stefania_Ionescu1;~Yuhao_Du1;~Kenneth_Joseph1;hannak@ifi.uzh.ch", "gender": "F;M;;", "homepage": ";;;", "dblp": "286/6560;;126/6273;", "google_scholar": "hwL_9wIAAAAJ;;;", "orcid": "0000-0002-6612-5856;;;", "linkedin": ";;;", "or_profile": "~Stefania_Ionescu1;~Yuhao_Du1;~Kenneth_Joseph1;hannak@ifi.uzh.ch", "aff": "Department of Informatics, University of Zurich, University of Zurich;;State University of New York at Buffalo;", "aff_domain": "ifi.uzh.ch;;buffalo.edu;", "position": "PhD student;;Assistant Professor;", "bibtex": "@inproceedings{\nionescu2023strategic,\ntitle={Strategic Behavior in Two-sided Matching Markets with Prediction-enhanced Preference-formation},\nauthor={Stefania Ionescu and Yuhao Du and Kenneth Joseph and Aniko Hannak},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=V5FNSilWiC}\n}", "github": "", "project": "", "reviewers": "UhQP;Zyns;EhdA", "pdf_size": 343159, "rating": "6;6;7", "confidence": "3;2;4", "soundness": "3;4;3", "novelty": "2;3;3", "presentation": "3;4;4", "wc_summary": "51;126;67", "wc_strengths": "29;108;82", "wc_weaknesses": "27;16;41", "wc_questions": "23;1;107", "wc_limitations": "1;1;59", "wc_review": "131;252;356", "wc_reply_reviewers": "13;15;26", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 81.33333333333333, 32.25247621845836 ], "wc_strengths_avg": [ 73.0, 32.873494895837695 ], "wc_weaknesses_avg": [ 28.0, 10.23067283548187 ], "wc_questions_avg": [ 43.666666666666664, 45.67518168789504 ], "wc_limitations_avg": [ 20.333333333333332, 27.34146220587984 ], "wc_review_avg": [ 246.33333333333334, 91.94321919291033 ], "wc_reply_reviewers_avg": [ 18.0, 5.715476066494082 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5923983856792535887&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ifi.uzh.ch;;buffalo.edu;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Zurich;State University of New York at Buffalo", "aff_unique_dep": "Department of Informatics;", "aff_unique_url": "https://www.uzh.ch;https://www.buffalo.edu", "aff_unique_abbr": "UZH;SUNY Buffalo", "aff_campus_unique_index": "1", "aff_campus_unique": ";Buffalo", "aff_country_unique_index": "0;1", "aff_country_unique": "Switzerland;United States" }, { "title": "Causal Effect Regularization: Automated Detection and Removal of Spurious Correlations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71477", "id": "V5Oh7Aqfft", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/42770daf4a3384b712ea9c36e9279998-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=V5Oh7Aqfft", "openreview": "https://openreview.net/forum?id=V5Oh7Aqfft", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71477", "video": "https://nips.cc/virtual/2023/poster/71477", "author_site": "Abhinav Kumar, Amit Deshpande, Amit Sharma", "tldr": "", "abstract": "In many classification datasets, the task labels are spuriously correlated with some input attributes. Classifiers trained on such datasets often rely on these attributes for prediction, especially when the spurious correlation is high, and thus fail to\ngeneralize whenever there is a shift in the attributes\u2019 correlation at deployment. If we assume that the spurious attributes are known a priori, several methods have been proposed to learn a classifier that is invariant to the specified attributes. However, in real-world data, information about spurious attributes is typically unavailable. Therefore, we propose a method that automatically identifies spurious attributes by estimating their causal effect on the label and then uses a regularization objective to mitigate the classifier\u2019s reliance on them. Although causal effect of an attribute on the label is not always identified, we present two commonly occurring data-generating processes where the effect can be identified. Compared to recent work for identifying spurious attributes, we find that our method, AutoACER, is\nmore accurate in removing the attribute from the learned model, especially when spurious correlation is high. Specifically, across synthetic, semi-synthetic, and real-world datasets, AutoACER shows significant improvement in a metric used to quantify the dependence of a classifier on spurious attributes ($\\Delta$Prob), while obtaining better or similar accuracy. Empirically we find that AutoACER mitigates\nthe reliance on spurious attributes even under noisy estimation of causal effects or when the causal effect is not identified. To explain the empirical robustness of our method, we create a simple linear classification task with two sets of attributes: causal and spurious. Under this setting, we prove that AutoACER only requires the ranking of estimated causal effects to be correct across attributes to select the\ncorrect classifier.", "keywords": "Spurious Correlation;Out of Distribution Generalization", "primary_area": "", "supplementary_material": "", "author": "Abhinav Kumar;Amit Deshpande;Amit Sharma", "authorids": "~Abhinav_Kumar3;~Amit_Deshpande1;~Amit_Sharma3", "gender": "M;M;M", "homepage": "https://abhinavkumar.info/;;http://amitsharma.in/", "dblp": "115/6458;28/6953-1;72/2540-7", "google_scholar": "n0fNl3oAAAAJ;;https://scholar.google.co.in/citations?user=CXgQufgAAAAJ", "orcid": ";;0000-0002-2086-3191", "linkedin": "abhinav-kumar-99b29a16b/;;", "or_profile": "~Abhinav_Kumar3;~Amit_Deshpande1;~Amit_Sharma3", "aff": "Microsoft Research;Microsoft Research;Microsoft Research", "aff_domain": "research.microsoft.com;microsoft.com;microsoft.com", "position": "Intern;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nkumar2023causal,\ntitle={Causal Effect Regularization: Automated Detection and Removal of Spurious Correlations},\nauthor={Abhinav Kumar and Amit Deshpande and Amit Sharma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=V5Oh7Aqfft}\n}", "github": "", "project": "", "reviewers": "quUc;nk8U;zxmG;pmkx", "pdf_size": 1781801, "rating": "5;5;6;7", "confidence": "4;3;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "4;3;4;4", "wc_summary": "58;100;51;121", "wc_strengths": "28;31;65;29", "wc_weaknesses": "89;285;92;163", "wc_questions": "187;5;68;40", "wc_limitations": "12;7;23;22", "wc_review": "374;428;299;375", "wc_reply_reviewers": "77;89;0;33", "wc_reply_authors": "707;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.5, 29.073183520213263 ], "wc_strengths_avg": [ 38.25, 15.481844205391036 ], "wc_weaknesses_avg": [ 157.25, 79.48073665989766 ], "wc_questions_avg": [ 75.0, 68.40687099992222 ], "wc_limitations_avg": [ 16.0, 6.745368781616021 ], "wc_review_avg": [ 369.0, 45.94017849334066 ], "wc_reply_reviewers_avg": [ 49.75, 35.49207658055527 ], "wc_reply_authors_avg": [ 176.75, 306.1399802377991 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1674581720975139552&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "research.microsoft.com;microsoft.com;microsoft.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Research", "aff_unique_url": "https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Lockdown: Backdoor Defense for Federated Learning with Isolated Subspace Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71476", "id": "V5cQH7JbGo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2376f25ef1725a9e3516ee3c86a59f46-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=V5cQH7JbGo", "openreview": "https://openreview.net/forum?id=V5cQH7JbGo", "poster": "/media/PosterPDFs/NeurIPS%202023/71476.png?t=1702501895.7710574", "slides": "https://nips.cc/virtual/2023/poster/71476", "video": "https://nips.cc/virtual/2023/poster/71476", "author_site": "Tiansheng Huang, Sihao Hu, Ka-Ho Chow, Fatih Ilhan, Selim Tekin, Ling Liu", "tldr": "", "abstract": "Federated learning (FL) is vulnerable to backdoor attacks due to its distributed computing nature. Existing defense solution \nusually requires larger amount of computation in either the training or testing phase, which limits their practicality in the resource-constrain scenarios. A more practical defense, i.e., neural network (NN) pruning based defense has been proposed in centralized backdoor setting. However, our empirical study shows that traditional pruning-based solution suffers \\textit{poison-coupling} effect in FL, which significantly degrades the defense performance.This paper presents Lockdown, an isolated subspace training method to mitigate the poison-coupling effect. Lockdown follows three key procedures. First, it modifies the training protocol by isolating the training subspaces for different clients. Second, it utilizes randomness in initializing isolated subspacess, and performs subspace pruning and subspace recovery to segregate the subspaces between malicious and benign clients. Third, it introduces quorum consensus to cure the global model by purging malicious/dummy parameters. Empirical results show that Lockdown achieves \\textit{superior} and \\textit{consistent} defense performance compared to existing representative approaches against backdoor attacks. Another value-added property of Lockdown is the communication-efficiency and model complexity reduction, which are both critical for resource-constrain FL scenario. Our code is available at \\url{https://github.com/git-disl/Lockdown}.", "keywords": "Federated learning;backdoor defense;isolated subspace training.", "primary_area": "", "supplementary_material": "/attachment/4329cf6aeacb3b7c477803b4d975c23bdc6c2267.pdf", "author": "Tiansheng Huang;Sihao Hu;Ka-Ho Chow;Fatih Ilhan;Selim Furkan Tekin;Ling Liu", "authorids": "~Tiansheng_Huang1;~Sihao_Hu1;~Ka-Ho_Chow1;~Fatih_Ilhan1;~Selim_Furkan_Tekin1;~Ling_Liu3", "gender": "M;M;M;M;M;", "homepage": "https://huangtiansheng.github.io/;https://bayi-hu.github.io/;https://khchow.com;https://fatih-ilhan.github.io/;https://sftekin.com/;", "dblp": "249/2114;266/4995;51/8062.html;260/0254;283/8174;", "google_scholar": "zz6Oq8wAAAAJ;WcZbKF4AAAAJ;2spsaBIAAAAJ;https://scholar.google.com.tr/citations?hl=en;a2O4Zx0AAAAJ;", "orcid": "0000-0002-4557-1865;0000-0003-3297-6991;0000-0001-5917-2577;0000-0002-0173-7544;0000-0002-8662-3609;", "linkedin": ";;khchow;;;", "or_profile": "~Tiansheng_Huang1;~Sihao_Hu1;~Ka-Ho_Chow1;~Fatih_Ilhan1;~Selim_Furkan_Tekin1;~Ling_Liu3", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;College of Computing, Georgia Institute of Technology;", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;gatech.edu;cc.gatech.edu;", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;", "bibtex": "@inproceedings{\nhuang2023lockdown,\ntitle={Lockdown: Backdoor Defense for Federated Learning with Isolated Subspace Training},\nauthor={Tiansheng Huang and Sihao Hu and Ka-Ho Chow and Fatih Ilhan and Selim Furkan Tekin and Ling Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=V5cQH7JbGo}\n}", "github": "", "project": "", "reviewers": "t853;dbHt;ozxV;ftvs", "pdf_size": 3495658, "rating": "5;5;6;7", "confidence": "4;4;3;4", "soundness": "4;3;3;3", "novelty": "4;3;3;3", "presentation": "4;4;2;3", "wc_summary": "64;100;115;69", "wc_strengths": "47;57;30;43", "wc_weaknesses": "105;102;51;47", "wc_questions": "273;345;106;8", "wc_limitations": "2;3;2;10", "wc_review": "491;607;304;177", "wc_reply_reviewers": "177;0;118;9", "wc_reply_authors": "795;133;248;31", "reply_reviewers": "1;0;2;1", "reply_authors": "4;3;4;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 87.0, 21.24852936087578 ], "wc_strengths_avg": [ 44.25, 9.67923034130297 ], "wc_weaknesses_avg": [ 76.25, 27.307279249313726 ], "wc_questions_avg": [ 183.0, 133.1333917542853 ], "wc_limitations_avg": [ 4.25, 3.344772040064913 ], "wc_review_avg": [ 394.75, 165.80466670151355 ], "wc_reply_reviewers_avg": [ 76.0, 74.54864183873507 ], "wc_reply_authors_avg": [ 301.75, 294.94353273126706 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8124889736962830371&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "gatech.edu;gatech.edu;gatech.edu;gatech.edu;cc.gatech.edu;", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Atlanta", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Localized Symbolic Knowledge Distillation for Visual Commonsense Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71475", "id": "V5eG47pyVl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/257be12f31dfa7cc158dda99822c6fd1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=V5eG47pyVl", "openreview": "https://openreview.net/forum?id=V5eG47pyVl", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71475", "video": "https://nips.cc/virtual/2023/poster/71475", "author_site": "Jae Sung Park, Jack Hessel, Khyathi Chandu, Paul Pu Liang, Ximing Lu, Peter West, Youngjae Yu, Qiuyuan Huang, Jianfeng Gao, Ali Farhadi, Yejin Choi", "tldr": "", "abstract": "Instruction following vision-language (VL) models offer a flexible\ninterface that supports a broad range of multimodal tasks in a zero-shot fashion.\nHowever, interfaces that operate on full images do not directly enable the user to\n\u201cpoint to\" and access specific regions within images. This capability is important\nnot only to support reference-grounded VL benchmarks, but also, for practical\napplications that require precise within-image reasoning. We build Localized\nVisual Commonsense model which allows users to specify (multiple) regions-\nas-input. We train our model by sampling localized commonsense knowledge\nfrom a large language model (LLM): specifically, we prompt a LLM to collect\ncommonsense knowledge given a global literal image description and a local\nliteral region description automatically generated by a set of VL models. This\npipeline is scalable and fully automatic, as no aligned or human-authored image\nand text pairs are required. With a separately trained critic model that selects\nhigh quality examples, we find that training on the localized commonsense corpus\nexpanded solely from images can successfully distill existing VL models to support\na reference-as-input interface. Empirical results and human evaluations in zero-shot\nsettings demonstrate that our distillation method results in more precise VL models\nof reasoning compared to a baseline of passing a generated referring expression.", "keywords": "multimodal;commonsense reasoning;instruction tuning;large language model", "primary_area": "", "supplementary_material": "/attachment/2c9a334e0e6916ab5ab60b8bf10b694551bbe573.pdf", "author": "Jae Sung Park;Jack Hessel;Khyathi Chandu;Paul Pu Liang;Ximing Lu;Peter West;Youngjae Yu;Qiuyuan Huang;Jianfeng Gao;Ali Farhadi;Yejin Choi", "authorids": "~Jae_Sung_Park2;~Jack_Hessel1;~Khyathi_Chandu1;~Paul_Pu_Liang1;~Ximing_Lu1;~Peter_West1;~Youngjae_Yu1;~Qiuyuan_Huang1;~Jianfeng_Gao1;~Ali_Farhadi3;~Yejin_Choi1", "gender": ";M;;M;F;M;M;F;M;M;F", "homepage": "https://homes.cs.washington.edu/~jspark96/;https://www.jmhessel.com;;https://pliang279.github.io/;https://gloriaximinglu.github.io/;https://peterwestai.notion.site/;https://yj-yu.github.io/home/;https://www.microsoft.com/en-us/research/people/qihua/;https://www.microsoft.com/en-us/research/people/jfgao/;https://homes.cs.washington.edu/~ali/;https://yejinc.github.io/", "dblp": ";https://dblp.uni-trier.de/pid/132/5250.html;;207/9749;24/10879;179/4587;188/6210;;92/5339;37/5826;89/579-1", "google_scholar": "hD2WqqcAAAAJ;SxQQ1msAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=9ubCBYwAAAAJ;https://scholar.google.co.kr/citations?user=WDO24ZYAAAAJ;U7Mmyc8AAAAJ;https://scholar.google.com/citations?hl=en;jeOFRDsAAAAJ;vhP-tlcAAAAJ", "orcid": ";0000-0002-4012-8979;;;;;;;;;", "linkedin": ";;;;;;;;;;", "or_profile": "~Jae_Sung_Park2;~Jack_Hessel1;~Khyathi_Chandu1;~Paul_Pu_Liang1;~Ximing_Lu1;~Peter_West1;~Youngjae_Yu1;~Qiuyuan_Huang1;~Jianfeng_Gao1;~Ali_Farhadi3;~Yejin_Choi1", "aff": "University of Washington;Allen Institute for Artificial Intelligence;;Carnegie Mellon University;University of Washington;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Microsoft Research, Redmond;Microsoft Research;University of Washington;Department of Computer Science, University of Washington", "aff_domain": "washington.edu;allenai.org;;cs.cmu.edu;cs.washington.edu;allenai.org;allenai.org;microsoft.com;microsoft.com;cs.uw.edu;cs.washington.edu", "position": "PhD student;Researcher;;PhD student;Undergrad student;Intern;Postdoc;Principal Researcher;Principal Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\npark2023localized,\ntitle={Localized Symbolic Knowledge Distillation for Visual Commonsense Models},\nauthor={Jae Sung Park and Jack Hessel and Khyathi Chandu and Paul Pu Liang and Ximing Lu and Peter West and Youngjae Yu and Qiuyuan Huang and Jianfeng Gao and Ali Farhadi and Yejin Choi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=V5eG47pyVl}\n}", "github": "", "project": "", "reviewers": "uG1j;MRhi;Ta54;Cm2R;UzVU", "pdf_size": 1571905, "rating": "5;5;5;6;8", "confidence": "4;4;4;4;4", "soundness": "2;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "50;113;85;77;165", "wc_strengths": "48;62;55;45;129", "wc_weaknesses": "118;324;193;58;205", "wc_questions": "33;2;35;55;85", "wc_limitations": "12;1;1;1;11", "wc_review": "261;502;369;236;595", "wc_reply_reviewers": "43;0;0;47;33", "wc_reply_authors": "32;0;0;189;0", "reply_reviewers": "1;0;0;1;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 98.0, 39.05892983685037 ], "wc_strengths_avg": [ 67.8, 31.16023106461183 ], "wc_weaknesses_avg": [ 179.6, 89.72981667205167 ], "wc_questions_avg": [ 42.0, 27.378823933836166 ], "wc_limitations_avg": [ 5.2, 5.1536394906900505 ], "wc_review_avg": [ 392.6, 138.08200462044286 ], "wc_reply_reviewers_avg": [ 24.6, 20.597087172704786 ], "wc_reply_authors_avg": [ 44.2, 73.45311429748912 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12923000145976044360&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "washington.edu;allenai.org;;cs.cmu.edu;cs.washington.edu;allenai.org;allenai.org;microsoft.com;microsoft.com;cs.uw.edu;cs.washington.edu", "author_num": 11, "aff_unique_index": "0;1;2;0;1;1;3;3;0;0", "aff_unique_norm": "University of Washington;Allen Institute for Artificial Intelligence;Carnegie Mellon University;Microsoft", "aff_unique_dep": ";;;Microsoft Research", "aff_unique_url": "https://www.washington.edu;https://allenai.org;https://www.cmu.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UW;AI2;CMU;MSR", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Redmond;Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SEEDS: Exponential SDE Solvers for Fast High-Quality Sampling from Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71474", "id": "V6IgkYKD8P", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d6f764aae383d9ff28a0f89f71defbd9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=V6IgkYKD8P", "openreview": "https://openreview.net/forum?id=V6IgkYKD8P", "poster": "/media/PosterPDFs/NeurIPS%202023/71474.png?t=1699610126.5521808", "slides": "https://nips.cc/virtual/2023/poster/71474", "video": "https://nips.cc/virtual/2023/poster/71474", "author_site": "Martin Gonzalez, Nelson Fernandez Pinto, Thuy Tran, elies Gherbi, Hatem Hajri, Nader Masmoudi", "tldr": "", "abstract": "A potent class of generative models known as Diffusion Probabilistic Models\n(DPMs) has become prominent. A forward diffusion process adds gradually noise\nto data, while a model learns to gradually denoise. Sampling from pre-trained\nDPMs is obtained by solving differential equations (DE) defined by the learnt\nmodel, a process which has shown to be prohibitively slow. Numerous efforts on\nspeeding-up this process have consisted on crafting powerful ODE solvers.\nDespite being quick, such solvers do not usually reach the optimal quality\nachieved by available slow SDE solvers. Our goal is to propose SDE solvers that\nreach optimal quality without requiring several hundreds or thousands of NFEs\nto achieve that goal. We propose Stochastic Explicit Exponential\nDerivative-free Solvers (SEEDS), improving and generalizing Exponential\nIntegrator approaches to the stochastic case on several frameworks. \nAfter carefully analyzing the formulation of exact\nsolutions of diffusion SDEs, we craft SEEDS to analytically compute the linear\npart of such solutions. Inspired by the Exponential Time-Differencing method,\nSEEDS use a novel treatment of the stochastic components of solutions,\nenabling the analytical computation of their variance, and contains high-order\nterms allowing to reach optimal quality sampling $\\sim3$-$5\\times$ faster than previous\nSDE methods. We validate our approach on several image generation benchmarks,\nshowing that SEEDS outperform or are competitive with previous SDE solvers.\nContrary to the latter, SEEDS are derivative and training free, and we fully\nprove strong convergence guarantees for them.", "keywords": "Diffusion Probabilistic Models;Exponential SDE methods;Image Generation;Generative Models", "primary_area": "", "supplementary_material": "/attachment/7d4b81a7d249fb1e7af4518210ea1bca8a268343.zip", "author": "Martin Gonzalez;Nelson Fernandez;Thuy Vinh Dinh Tran;Elies Gherbi;Hatem Hajri;Nader Masmoudi", "authorids": "~Martin_Gonzalez2;~Nelson_Fernandez1;~Thuy_Vinh_Dinh_Tran1;~Elies_Gherbi1;~Hatem_Hajri1;~Nader_Masmoudi1", "gender": "M;;M;;M;M", "homepage": "https://sites.google.com/view/martin-gonzalez/;;;https://scholar.google.com/citations?user=Le-32-wAAAAJ&hl=fr&oi=sra;https://hhajri.org/;https://math.nyu.edu/~masmoudi/", "dblp": "321/1754;314/5347;83/1257;;;", "google_scholar": ";;;Le-32-wAAAAJ;;", "orcid": "0000-0002-1176-2022;;;;;", "linkedin": ";nelson-fernandez-pinto/;dinh-vinh-thuy-tran-693ba4251/;;hatem-hajri-b7a25412a/;", "or_profile": "~Martin_Gonzalez2;~Nelson_Fernandez1;~Thuy_Vinh_Dinh_Tran1;~Elies_Gherbi1;~Hatem_Hajri1;~Nader_Masmoudi1", "aff": "IRT SystemX;;Universit\u00e9 de Limoges;Universit\u00e9 Vincennes Saint-Denis (Paris VIII);IRT SystemX;New York University", "aff_domain": "irt-systemx.fr;;unilim.fr;univ-paris8.fr;irt-systemx.fr;nyu.edu", "position": "Researcher;;MS student;Researcher;Senior researcher;Full Professor", "bibtex": "@inproceedings{\ngonzalez2023seeds,\ntitle={{SEEDS}: Exponential {SDE} Solvers for Fast High-Quality Sampling from Diffusion Models},\nauthor={Martin Gonzalez and Nelson Fernandez and Thuy Vinh Dinh Tran and Elies Gherbi and Hatem Hajri and Nader Masmoudi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=V6IgkYKD8P}\n}", "github": "", "project": "", "reviewers": "MHs8;CfgK;YvZd;daVP;nfDR", "pdf_size": 15375550, "rating": "3;3;6;6;6", "confidence": "3;3;4;4;4", "soundness": "2;2;3;3;3", "novelty": "1;1;3;2;3", "presentation": "2;3;3;3;3", "wc_summary": "21;58;62;61;40", "wc_strengths": "14;81;53;70;65", "wc_weaknesses": "142;202;27;205;278", "wc_questions": "2;59;70;52;48", "wc_limitations": "1;7;8;8;4", "wc_review": "180;407;220;396;435", "wc_reply_reviewers": "0;455;0;40;0", "wc_reply_authors": "0;2523;0;37;0", "reply_reviewers": "0;3;0;1;0", "reply_authors": "1;5;1;2;1", "rating_avg": [ 4.8, 1.469693845669907 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.0, 0.8944271909999159 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 48.4, 15.856859714331838 ], "wc_strengths_avg": [ 56.6, 23.122283624244382 ], "wc_weaknesses_avg": [ 170.8, 83.83650756084727 ], "wc_questions_avg": [ 46.2, 23.327237298917332 ], "wc_limitations_avg": [ 5.6, 2.727636339397171 ], "wc_review_avg": [ 327.6, 105.71773739538696 ], "wc_reply_reviewers_avg": [ 99.0, 178.6728854639114 ], "wc_reply_authors_avg": [ 512.0, 1005.6021081919031 ], "reply_reviewers_avg": [ 0.8, 1.1661903789690602 ], "reply_authors_avg": [ 2.0, 1.5491933384829668 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12965529034508884711&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "email": "irt-systemx.fr;;unilim.fr;univ-paris8.fr;irt-systemx.fr;nyu.edu", "author_num": 6, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "IRT SystemX;Universit\u00e9 de Limoges;Universit\u00e9 Vincennes Saint-Denis;New York University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.irt-systemx.fr;https://www.unilim.fr;https://www.univ-paris8.fr;https://www.nyu.edu", "aff_unique_abbr": ";Unilim;Paris VIII;NYU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris VIII", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "France;United States" }, { "title": "Nonparametric Identifiability of Causal Representations from Unknown Interventions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71473", "id": "V87gZeSOL4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/97fe251c25b6f99a2a23b330a75b11d4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=V87gZeSOL4", "openreview": "https://openreview.net/forum?id=V87gZeSOL4", "poster": "/media/PosterPDFs/NeurIPS%202023/71473.png?t=1701956899.4845848", "slides": "https://nips.cc/virtual/2023/poster/71473", "video": "https://nips.cc/virtual/2023/poster/71473", "author_site": "Julius von K\u00fcgelgen, Michel Besserve, Liang Wendong, Luigi Gresele, Armin Keki\u0107, Elias Bareinboim, David Blei, Bernhard Sch\u00f6lkopf", "tldr": "", "abstract": "We study causal representation learning, the task of inferring latent causal variables and their causal relations from high-dimensional functions (\u201cmixtures\u201d) of the variables. Prior work relies on weak supervision, in the form of counterfactual pre- and post-intervention views or temporal structure; places restrictive assumptions, such as linearity, on the mixing function or latent causal model; or requires partial knowledge of the generative process, such as the causal graph or intervention targets. We instead consider the general setting in which both the causal model and the mixing function are nonparametric. The learning signal takes the form of multiple datasets, or environments, arising from unknown interventions in the underlying causal model. Our goal is to identify both the ground truth latents and their causal graph up to a set of ambiguities which we show to be irresolvable from interventional data. We study the fundamental setting of two causal variables and prove that the observational distribution and one perfect intervention per node suffice for identifiability, subject to a genericity condition. This condition rules out spurious solutions that involve fine-tuning of the intervened and observational distributions, mirroring similar conditions for nonlinear cause-effect inference. For an arbitrary number of variables, we show that at least one pair of distinct perfect interventional domains per node guarantees identifiability. Further, we demonstrate that the strengths of causal influences among the latent variables are preserved by all equivalent solutions, rendering the inferred representation appropriate for drawing causal conclusions from new data. Our study provides the first identifiability results for the general nonparametric setting with unknown interventions, and elucidates what is possible and impossible for causal representation learning without more direct supervision.", "keywords": "Causal representation learning;identifiability;theory;nonparametric;interventions;multi-environment", "primary_area": "", "supplementary_material": "/attachment/e58e5c93699db6bc2776bd81a2e917adba129ee2.pdf", "author": "Julius von K\u00fcgelgen;Michel Besserve;Wendong Liang;Luigi Gresele;Armin Keki\u0107;Elias Bareinboim;David Blei;Bernhard Sch\u00f6lkopf", "authorids": "~Julius_von_K\u00fcgelgen2;~Michel_Besserve1;~Wendong_Liang1;~Luigi_Gresele1;~Armin_Keki\u01071;~Elias_Bareinboim2;~David_Blei2;~Bernhard_Sch\u00f6lkopf1", "gender": ";M;M;M;;M;M;", "homepage": ";https://computational-homeostasis.com;https://wendongl.github.io/;https://lgresele.github.io/;https://arminkekic.com/;https://causalai.net;http://www.cs.columbia.edu/~blei/;", "dblp": ";71/511;317/6907;211/6114;330/4165;85/9005;86/1910;", "google_scholar": ";https://scholar.google.de/citations?user=Nbq6kI0AAAAJ;bGVdtT0AAAAJ;JdZ8DWwAAAAJ;b7GNNQ8AAAAJ;r5U-D7YAAAAJ;https://scholar.google.com.tw/citations?user=8OYE6iEAAAAJ;", "orcid": ";;0000-0002-8984-8619;;0000-0002-1940-2523;;;", "linkedin": ";;wendong-liang/;;arminkekic/;;;", "or_profile": "~Julius_von_K\u00fcgelgen2;~Michel_Besserve1;~Wendong_Liang1;~Luigi_Gresele1;~Armin_Keki\u01071;~Elias_Bareinboim2;~David_Blei2;~Bernhard_Sch\u00f6lkopf1", "aff": ";MPI for Intelligent Systems;Ecole Normale Superieure Paris-Saclay;Max-Planck-Institute for Intelligent Systems, Max-Planck Institute;Max Planck Institute for Intelligent Systems, Max-Planck Institute;Columbia University;Columbia University;", "aff_domain": ";tuebingen.mpg.de;ens-paris-saclay.fr;is.mpg.de;tue.mpg.de;columbia.edu;columbia.edu;", "position": ";Senior research scientist;MS student;PhD student;PhD student;Associate Professor;Full Professor;", "bibtex": "@inproceedings{\nk{\\\"u}gelgen2023nonparametric,\ntitle={Nonparametric Identifiability of Causal Representations from Unknown Interventions},\nauthor={Julius von K{\\\"u}gelgen and Michel Besserve and Wendong Liang and Luigi Gresele and Armin Keki{\\'c} and Elias Bareinboim and David Blei and Bernhard Sch{\\\"o}lkopf},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=V87gZeSOL4}\n}", "github": "", "project": "", "reviewers": "ZLAN;E44d;FsUF;U76e", "pdf_size": 675375, "rating": "6;6;6;7", "confidence": "4;2;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;4", "wc_summary": "102;68;151;86", "wc_strengths": "192;35;98;95", "wc_weaknesses": "371;66;156;94", "wc_questions": "120;111;36;138", "wc_limitations": "10;1;27;7", "wc_review": "795;281;468;420", "wc_reply_reviewers": "298;26;0;11", "wc_reply_authors": "596;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 101.75, 30.873734791890662 ], "wc_strengths_avg": [ 105.0, 56.16493568054717 ], "wc_weaknesses_avg": [ 171.75, 119.55830167746613 ], "wc_questions_avg": [ 101.25, 38.90613704802881 ], "wc_limitations_avg": [ 11.25, 9.65336728815391 ], "wc_review_avg": [ 491.0, 188.47148325409867 ], "wc_reply_reviewers_avg": [ 83.75, 124.04107182703639 ], "wc_reply_authors_avg": [ 149.0, 258.0755703277627 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10337583546497528517&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": ";tuebingen.mpg.de;ens-paris-saclay.fr;is.mpg.de;tue.mpg.de;columbia.edu;columbia.edu;", "author_num": 8, "aff_unique_index": "0;1;2;0;3;3", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;Ecole Normale Superieure Paris-Saclay;Max-Planck-Institute for Intelligent Systems;Columbia University", "aff_unique_dep": ";;Intelligent Systems;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.ensparis-saclay.fr;https://www.mpi-is.mpg.de;https://www.columbia.edu", "aff_unique_abbr": "MPI-IS;ENS Paris-Saclay;MPI-IS;Columbia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris-Saclay", "aff_country_unique_index": "0;1;0;0;2;2", "aff_country_unique": "Germany;France;United States" }, { "title": "Temporally Disentangled Representation Learning under Unknown Nonstationarity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71472", "id": "V8GHCGYLkf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/19a567abaec3990cb40d7a013556fecd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=V8GHCGYLkf", "openreview": "https://openreview.net/forum?id=V8GHCGYLkf", "poster": "/media/PosterPDFs/NeurIPS%202023/71472.png?t=1701567867.685556", "slides": "https://nips.cc/virtual/2023/poster/71472", "video": "https://nips.cc/virtual/2023/poster/71472", "author_site": "Xiangchen Song, Weiran Yao, Yewen Fan, Xinshuai Dong, Guangyi Chen, Juan Carlos Niebles, Eric Xing, Kun Zhang", "tldr": "", "abstract": "In unsupervised causal representation learning for sequential data with time-delayed latent causal influences, strong identifiability results for the disentanglement of causally-related latent variables have been established in stationary settings by leveraging temporal structure.\nHowever, in nonstationary setting, existing work only partially addressed the problem by either utilizing observed auxiliary variables (e.g., class labels and/or domain indexes) as side information or assuming simplified latent causal dynamics. Both constrain the method to a limited range of scenarios.\nIn this study, we further explored the Markov Assumption under time-delayed causally related process in nonstationary setting and showed that under mild conditions, the independent latent components can be recovered from their nonlinear mixture up to a permutation and a component-wise transformation, without the observation of auxiliary variables. We then introduce NCTRL, a principled estimation framework, to reconstruct time-delayed latent causal variables and identify their relations from measured sequential data only.\nEmpirical evaluations demonstrated the reliable identification of time-delayed latent causal influences, with our methodology substantially outperforming existing baselines that fail to exploit the nonstationarity adequately and then, consequently, cannot distinguish distribution shifts.", "keywords": "Unsupervised learning;Temporal disentanglement;Nonlinear ICA;Identifiability theory", "primary_area": "", "supplementary_material": "/attachment/21f5031adcb03aae929fd32ca12fbd561dae8ef0.zip", "author": "Xiangchen Song;Weiran Yao;Yewen Fan;Xinshuai Dong;Guangyi Chen;Juan Carlos Niebles;Eric Xing;Kun Zhang", "authorids": "~Xiangchen_Song1;~Weiran_Yao1;~Yewen_Fan1;~Xinshuai_Dong1;~Guangyi_Chen1;~Juan_Carlos_Niebles1;~Eric_Xing1;~Kun_Zhang1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://xiangchensong.github.io/;;https://tofuwen.github.io/;https://dongxinshuai.github.io/;https://chengy12.github.io/;http://www.niebles.net/;http://www.cs.cmu.edu/~epxing/;http://www.andrew.cmu.edu/user/kunz1/", "dblp": "261/9024;192/3295;200/1168;279/6151.html;c/GuangyiChen-2;26/647;36/3855;96/3115-1", "google_scholar": "foR8BIoAAAAJ;rr_leUAAAAAJ;Q9_yaekAAAAJ;A7JyL1sAAAAJ;https://scholar.google.com/citations?hl=zh-CN;hqNhUCYAAAAJ;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ;RGoypN4AAAAJ", "orcid": ";;;;;;;", "linkedin": ";;yewen-fan;;;;;", "or_profile": "~Xiangchen_Song1;~Weiran_Yao1;~Yewen_Fan1;~Xinshuai_Dong1;~Guangyi_Chen1;~Juan_Carlos_Niebles1;~Eric_Xing1;~Kun_Zhang1", "aff": "Carnegie Mellon University;SalesForce.com;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Stanford University;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;salesforce.com;cmu.edu;cmu.edu;cmu.edu;stanford.edu;cs.cmu.edu;cmu.edu", "position": "PhD student;Researcher;PhD student;PhD student;Postdoc;Adjunct Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nsong2023temporally,\ntitle={Temporally Disentangled Representation Learning under Unknown Nonstationarity},\nauthor={Xiangchen Song and Weiran Yao and Yewen Fan and Xinshuai Dong and Guangyi Chen and Juan Carlos Niebles and Eric Xing and Kun Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=V8GHCGYLkf}\n}", "github": "", "project": "", "reviewers": "5DXD;t3u7;eHJB;PKrt", "pdf_size": 1191774, "rating": "3;4;6;7", "confidence": "3;3;3;3", "soundness": "2;2;4;3", "novelty": "2;2;3;3", "presentation": "1;2;3;4", "wc_summary": "55;98;238;132", "wc_strengths": "39;36;101;103", "wc_weaknesses": "301;65;57;91", "wc_questions": "2;114;35;41", "wc_limitations": "2;31;31;14", "wc_review": "399;344;462;381", "wc_reply_reviewers": "2;52;35;110", "wc_reply_authors": "188;194;111;84", "reply_reviewers": "1;1;1;1", "reply_authors": "4;4;3;3", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 130.75, 67.66599958620282 ], "wc_strengths_avg": [ 69.75, 32.275183965393595 ], "wc_weaknesses_avg": [ 128.5, 100.38301649183491 ], "wc_questions_avg": [ 48.0, 40.89621009335706 ], "wc_limitations_avg": [ 19.5, 12.257650672131263 ], "wc_review_avg": [ 396.5, 42.699531613356136 ], "wc_reply_reviewers_avg": [ 49.75, 39.15593824696326 ], "wc_reply_authors_avg": [ 144.25, 47.761778651972335 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5655551744212060362&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cmu.edu;salesforce.com;cmu.edu;cmu.edu;cmu.edu;stanford.edu;cs.cmu.edu;cmu.edu", "author_num": 8, "aff_unique_index": "0;1;0;0;0;2;0;0", "aff_unique_norm": "Carnegie Mellon University;Salesforce;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.salesforce.com;https://www.stanford.edu", "aff_unique_abbr": "CMU;Salesforce;Stanford", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Video-Mined Task Graphs for Keystep Recognition in Instructional Videos", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71471", "id": "VAC7aB6qSG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d62e65cfdba247e0cd7cac5964f9fbd9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VAC7aB6qSG", "openreview": "https://openreview.net/forum?id=VAC7aB6qSG", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71471", "video": "https://nips.cc/virtual/2023/poster/71471", "author_site": "Kumar Ashutosh, Santhosh Kumar Ramakrishnan, Triantafyllos Afouras, Kristen Grauman", "tldr": "", "abstract": "Procedural activity understanding requires perceiving human actions in terms of a broader task, where multiple keysteps are performed in sequence across a long video to reach a final goal state---such as the steps of a recipe or the steps of a DIY fix-it task. Prior work largely treats keystep recognition in isolation of this broader structure, or else rigidly confines keysteps to align with a particular sequential script. We propose discovering a task graph automatically from how-to videos to represent probabilistically how people tend to execute keysteps, then leverage this graph to regularize keystep recognition in novel videos. On multiple datasets of real-world instructional video, we show the impact: more reliable zero-shot keystep localization and improved video representation learning, exceeding the state of the art.", "keywords": "Instructional Videos;Task Graph;Keystep Recognition", "primary_area": "", "supplementary_material": "/attachment/aba6478be1015898c71fe92edbf9f1c66de6d0d8.pdf", "author": "Kumar Ashutosh;Santhosh Kumar Ramakrishnan;Triantafyllos Afouras;Kristen Grauman", "authorids": "~Kumar_Ashutosh1;~Santhosh_Kumar_Ramakrishnan1;~Triantafyllos_Afouras1;~Kristen_Grauman1", "gender": "M;M;M;F", "homepage": "https://thechargedneutron.github.io;https://srama2512.github.io/;http://www.robots.ox.ac.uk/~afourast/;http://www.cs.utexas.edu/~grauman/", "dblp": "231/3353;199/1913;175/5771;57/4553", "google_scholar": "https://scholar.google.co.in/citations?user=GDqE4f8AAAAJ;zr9B1YgAAAAJ;https://scholar.google.co.uk/citations?user=TkBHFfgAAAAJ;Jp6Mz1sAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Kumar_Ashutosh1;~Santhosh_Kumar_Ramakrishnan1;~Triantafyllos_Afouras1;~Kristen_Grauman1", "aff": "University of Texas at Austin;University of Texas, Austin;Meta;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;meta.com;utexas.edu", "position": "PhD student;PhD student;Researcher;Professor", "bibtex": "@inproceedings{\nashutosh2023videomined,\ntitle={Video-Mined Task Graphs for Keystep Recognition in Instructional Videos},\nauthor={Kumar Ashutosh and Santhosh Kumar Ramakrishnan and Triantafyllos Afouras and Kristen Grauman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VAC7aB6qSG}\n}", "github": "", "project": "", "reviewers": "vrnW;qtTp;iD7r;wv2a;G41M", "pdf_size": 1198175, "rating": "4;5;5;6;6", "confidence": "5;4;3;4;3", "soundness": "2;3;3;2;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "120;55;42;91;166", "wc_strengths": "34;39;29;80;127", "wc_weaknesses": "438;55;46;546;209", "wc_questions": "69;5;46;155;97", "wc_limitations": "23;5;46;21;7", "wc_review": "684;159;209;893;606", "wc_reply_reviewers": "45;0;0;191;0", "wc_reply_authors": "0;0;0;749;0", "reply_reviewers": "1;0;0;2;0", "reply_authors": "1;1;1;2;1", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 94.8, 44.92393571360372 ], "wc_strengths_avg": [ 61.8, 37.2848494699925 ], "wc_weaknesses_avg": [ 258.8, 201.93801029028688 ], "wc_questions_avg": [ 74.4, 50.31739261925244 ], "wc_limitations_avg": [ 20.4, 14.691494137765567 ], "wc_review_avg": [ 510.2, 282.8366312909274 ], "wc_reply_reviewers_avg": [ 47.2, 73.98216001172175 ], "wc_reply_authors_avg": [ 149.8, 299.6 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6428571428571428, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4840175933064677104&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "utexas.edu;utexas.edu;meta.com;utexas.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Texas at Austin;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.utexas.edu;https://meta.com", "aff_unique_abbr": "UT Austin;Meta", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Training Neural Networks is NP-Hard in Fixed Dimension", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71470", "id": "VAQp2EnZeW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8948a8d039ed52d1031db6c7c2373378-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VAQp2EnZeW", "openreview": "https://openreview.net/forum?id=VAQp2EnZeW", "poster": "/media/PosterPDFs/NeurIPS%202023/71470.png?t=1699620321.2437406", "slides": "https://nips.cc/virtual/2023/poster/71470", "video": "https://nips.cc/virtual/2023/poster/71470", "author_site": "Vincent Froese, Christoph Hertrich", "tldr": "", "abstract": "We study the parameterized complexity of training two-layer neural networks with respect to the dimension of the input data and the number of hidden neurons, considering ReLU and linear threshold activation functions. Albeit the computational complexity of these problems has been studied numerous times in recent years, several questions are still open. We answer questions by Arora et al. (ICLR 2018) and Khalife and Basu (IPCO 2022) showing that both problems are NP-hard for two dimensions, which excludes any polynomial-time algorithm for constant dimension. We also answer a question by Froese et al. (JAIR 2022) proving W[1]-hardness for four ReLUs (or two linear threshold neurons) with zero training error. Finally, in the ReLU case, we show fixed-parameter tractability for the combined parameter number of dimensions and number of ReLUs if the network is assumed to compute a convex map. Our results settle the complexity status regarding these parameters almost completely.", "keywords": "Computational Complexity;Neural Network;Rectified Linear Unit;Empirical Risk Minimization;Parameterized Complexity", "primary_area": "", "supplementary_material": "/attachment/a82c408fe5ec8a91a8e00d4d5672ea625c3ab56b.pdf", "author": "Vincent Froese;Christoph Hertrich", "authorids": "~Vincent_Froese1;~Christoph_Hertrich1", "gender": ";", "homepage": ";https://christophhertrich.gitlab.io", "dblp": ";234/8939", "google_scholar": ";bbMbGU4AAAAJ", "orcid": ";0000-0001-5646-8567", "linkedin": ";", "or_profile": "~Vincent_Froese1;~Christoph_Hertrich1", "aff": ";London School of Economics and Political Science", "aff_domain": ";lse.ac.uk", "position": ";Postdoc", "bibtex": "@inproceedings{\nfroese2023training,\ntitle={Training Neural Networks is {NP}-Hard in Fixed Dimension},\nauthor={Vincent Froese and Christoph Hertrich},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VAQp2EnZeW}\n}", "github": "", "project": "", "reviewers": "2aQG;pi8Y;T5Uz;bRY2", "pdf_size": 405643, "rating": "6;6;6;8", "confidence": "4;3;3;2", "soundness": "3;4;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "23;119;115;201", "wc_strengths": "26;41;159;40", "wc_weaknesses": "139;126;88;35", "wc_questions": "47;96;18;39", "wc_limitations": "6;83;14;95", "wc_review": "241;465;394;410", "wc_reply_reviewers": "45;11;14;0", "wc_reply_authors": "52;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 114.5, 62.998015841770766 ], "wc_strengths_avg": [ 66.5, 53.73313688963264 ], "wc_weaknesses_avg": [ 97.0, 40.40420770167385 ], "wc_questions_avg": [ 50.0, 28.591956910991595 ], "wc_limitations_avg": [ 49.5, 39.82775414205526 ], "wc_review_avg": [ 377.5, 83.0918166849179 ], "wc_reply_reviewers_avg": [ 17.5, 16.710774967068403 ], "wc_reply_authors_avg": [ 13.0, 22.516660498395403 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8681573015084988537&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": ";lse.ac.uk", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "London School of Economics and Political Science", "aff_unique_dep": "", "aff_unique_url": "https://www.lse.ac.uk", "aff_unique_abbr": "LSE", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "Mixed-Initiative Multiagent Apprenticeship Learning for Human Training of Robot Teams", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71469", "id": "VCOZaczCHg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6f5288d7059cbe3f5a19dad1b3bf17e1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VCOZaczCHg", "openreview": "https://openreview.net/forum?id=VCOZaczCHg", "poster": "/media/PosterPDFs/NeurIPS%202023/71469.png?t=1698242655.3150954", "slides": "https://nips.cc/virtual/2023/poster/71469", "video": "https://nips.cc/virtual/2023/poster/71469", "author_site": "Esmaeil Seraj, Jerry Xiong, Mariah Schrum, Matthew Gombolay", "tldr": "", "abstract": "Extending recent advances in Learning from Demonstration (LfD) frameworks to multi-robot settings poses critical challenges such as environment non-stationarity due to partial observability which is detrimental to the applicability of existing methods. Although prior work has shown that enabling communication among agents of a robot team can alleviate such issues, creating inter-agent communication under existing Multi-Agent LfD (MA-LfD) frameworks requires the human expert to provide demonstrations for both environment actions and communication actions, which necessitates an efficient communication strategy on a known message spaces. To address this problem, we propose Mixed-Initiative Multi-Agent Apprenticeship Learning (MixTURE). MixTURE enables robot teams to learn from a human expert-generated data a preferred policy to accomplish a collaborative task, while simultaneously learning emergent inter-agent communication to enhance team coordination. The key ingredient to MixTURE's success is automatically learning a communication policy, enhanced by a mutual-information maximizing reverse model that rationalizes the underlying expert demonstrations without the need for human generated data or an auxiliary reward function. MixTURE outperforms a variety of relevant baselines on diverse data generated by human experts in complex heterogeneous domains. MixTURE is the first MA-LfD framework to enable learning multi-robot collaborative policies directly from real human data, resulting in ~44% less human workload, and ~46% higher usability score.", "keywords": "Learning from Demonstration;Multi-Robot Systems;Teaching Robot Teams", "primary_area": "", "supplementary_material": "/attachment/5e6e9f604c7429c9957d9bb8c9f0a95900ee1f3f.pdf", "author": "Esmaeil Seraj;Jerry Yuyang Xiong;Mariah L Schrum;Matthew Gombolay", "authorids": "~Esmaeil_Seraj1;~Jerry_Yuyang_Xiong1;~Mariah_L_Schrum1;~Matthew_Gombolay1", "gender": "M;;F;M", "homepage": "https://www.linkedin.com/in/esmaeil-seraj-70590b80/;;;https://core-robotics.gatech.edu/", "dblp": "169/3595;;237/8619;144/1022", "google_scholar": "k0yj7xUAAAAJ;;QuzrQzIAAAAJ;Ihyz20wAAAAJ", "orcid": "0000-0002-0147-1037;0009-0006-7630-6729;;", "linkedin": "esmaeil-seraj-70590b80/;;;", "or_profile": "~Esmaeil_Seraj1;~Jerry_Yuyang_Xiong1;~Mariah_L_Schrum1;~Matthew_Gombolay1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;cc.gatech.edu", "position": "PhD student;Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nseraj2023mixedinitiative,\ntitle={Mixed-Initiative Multiagent Apprenticeship Learning for Human Training of Robot Teams},\nauthor={Esmaeil Seraj and Jerry Yuyang Xiong and Mariah L Schrum and Matthew Gombolay},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VCOZaczCHg}\n}", "github": "", "project": "", "reviewers": "fNhW;dfhv;N6Tr;ZoNx;BRdM", "pdf_size": 2790873, "rating": "4;5;6;6;7", "confidence": "4;2;4;3;4", "soundness": "3;2;3;3;3", "novelty": "2;2;2;3;3", "presentation": "3;2;3;4;3", "wc_summary": "72;46;102;82;216", "wc_strengths": "39;52;38;154;287", "wc_weaknesses": "341;170;187;210;28", "wc_questions": "28;14;28;126;51", "wc_limitations": "9;8;17;41;38", "wc_review": "489;290;372;613;620", "wc_reply_reviewers": "0;12;15;63;12", "wc_reply_authors": "69;121;38;34;54", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 103.6, 59.02406288963849 ], "wc_strengths_avg": [ 114.0, 96.72021505352436 ], "wc_weaknesses_avg": [ 187.2, 99.81462818645372 ], "wc_questions_avg": [ 49.4, 40.09788024322483 ], "wc_limitations_avg": [ 22.6, 14.178857499812882 ], "wc_review_avg": [ 476.8, 130.44753734739493 ], "wc_reply_reviewers_avg": [ 20.4, 21.914378841299612 ], "wc_reply_authors_avg": [ 63.2, 31.454093533274808 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.19611613513818402, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15076512867733646690&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "gatech.edu;gatech.edu;gatech.edu;cc.gatech.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Auditing for Human Expertise", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71468", "id": "VEpU9rFaQr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fb44a668c2d4bc984e9d6ca261262cbb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VEpU9rFaQr", "openreview": "https://openreview.net/forum?id=VEpU9rFaQr", "poster": "/media/PosterPDFs/NeurIPS%202023/71468.png?t=1702254247.2812443", "slides": "https://nips.cc/virtual/2023/poster/71468", "video": "https://nips.cc/virtual/2023/poster/71468", "author_site": "Rohan Alur, Loren Laine, Darrick Li, Manish Raghavan, Devavrat Shah, Dennis Shung", "tldr": "", "abstract": "High-stakes prediction tasks (e.g., patient diagnosis) are often handled by trained human experts. A common source of concern about automation in these settings is that experts may exercise intuition that is difficult to model and/or have access to information (e.g., conversations with a patient) that is simply unavailable to a would-be algorithm. This raises a natural question whether human experts add value which could not be captured by an algorithmic predictor.\nWe develop a statistical framework under which we can pose this question as a natural hypothesis test. Indeed, as our framework highlights, detecting human expertise is more subtle than simply comparing the accuracy of expert predictions to those made by a particular learning algorithm. Instead, we propose a simple procedure which tests whether expert predictions are statistically independent from the outcomes of interest after conditioning on the available inputs (\u2018features\u2019). A rejection of our test thus suggests that human experts may add value to any algorithm trained on the available data, and has direct implications for whether human-AI \u2018complementarity\u2019 is achievable in a given prediction task.\nWe highlight the utility of our procedure using admissions data collected from the emergency department of a large academic hospital system, where we show that physicians\u2019 admit/discharge decisions for patients with acute gastrointestinal bleeding (AGIB) appear to be incorporating information that is not available to a standard algorithmic screening tool. This is despite the fact that the screening tool is arguably more accurate than physicians\u2019 discretionary decisions, highlighting that \u2013 even absent normative concerns about accountability or interpretability \u2013 accuracy is insufficient to justify algorithmic automation.", "keywords": "hypothesis testing;human-AI complementarity;machine learning for healthcare", "primary_area": "", "supplementary_material": "/attachment/57a822a418126df41b47776cb88503756dabf774.zip", "author": "Rohan Alur;Loren Laine;Darrick K Li;Manish Raghavan;Devavrat Shah;Dennis Shung", "authorids": "~Rohan_Alur1;loren.laine@yale.edu;darrick.li@yale.edu;~Manish_Raghavan1;~Devavrat_Shah1;~Dennis_Shung1", "gender": "M;;;M;M;M", "homepage": "https://sites.google.com/view/rohanalur;;;https://mraghavan.github.io/;http://devavrat.mit.edu;", "dblp": ";;;143/9427;73/3881;", "google_scholar": "MFi8ptoAAAAJ;;;WaGlwJ4AAAAJ;;N-LePdMAAAAJ", "orcid": ";;;;;0000-0001-8226-1842", "linkedin": ";;;;;", "or_profile": "~Rohan_Alur1;loren.laine@yale.edu;darrick.li@yale.edu;~Manish_Raghavan1;~Devavrat_Shah1;~Dennis_Shung1", "aff": "Massachusetts Institute of Technology;;;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Yale University", "aff_domain": "mit.edu;;;mit.edu;mit.edu;yale.edu", "position": "PhD student;;;Assistant Professor;Professor;Assistant Professor", "bibtex": "@inproceedings{\nalur2023auditing,\ntitle={Auditing for Human Expertise},\nauthor={Rohan Alur and Loren Laine and Darrick K Li and Manish Raghavan and Devavrat Shah and Dennis Shung},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VEpU9rFaQr}\n}", "github": "", "project": "", "reviewers": "NoBC;7Ctt;7cTX;DMEm", "pdf_size": 705018, "rating": "7;7;7;7", "confidence": "3;3;3;4", "soundness": "4;4;3;3", "novelty": "3;3;3;3", "presentation": "4;4;3;3", "wc_summary": "92;116;92;56", "wc_strengths": "53;91;48;61", "wc_weaknesses": "55;152;202;168", "wc_questions": "56;104;212;109", "wc_limitations": "5;8;46;1", "wc_review": "261;471;600;395", "wc_reply_reviewers": "87;26;46;64", "wc_reply_authors": "338;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 89.0, 21.42428528562855 ], "wc_strengths_avg": [ 63.25, 16.67895380412093 ], "wc_weaknesses_avg": [ 144.25, 54.600251830921074 ], "wc_questions_avg": [ 120.25, 56.86991735531185 ], "wc_limitations_avg": [ 15.0, 18.069310999592652 ], "wc_review_avg": [ 431.75, 122.83601874043298 ], "wc_reply_reviewers_avg": [ 55.75, 22.498611068241523 ], "wc_reply_authors_avg": [ 84.5, 146.35829323957014 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=333205877305081550&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mit.edu;;;mit.edu;mit.edu;yale.edu", "author_num": 6, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.yale.edu", "aff_unique_abbr": "MIT;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Neural Polarizer: A Lightweight and Effective Backdoor Defense via Purifying Poisoned Features", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71467", "id": "VFhN15Vlkj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/03df5246cc78af497940338dd3eacbaa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VFhN15Vlkj", "openreview": "https://openreview.net/forum?id=VFhN15Vlkj", "poster": "/media/PosterPDFs/NeurIPS%202023/71467.png?t=1701428519.8006961", "slides": "https://nips.cc/virtual/2023/poster/71467", "video": "https://nips.cc/virtual/2023/poster/71467", "author_site": "Mingli Zhu, Shaokui Wei, Hongyuan Zha, Baoyuan Wu", "tldr": "", "abstract": "Recent studies have demonstrated the susceptibility of deep neural networks to backdoor attacks. Given a backdoored model, its prediction of a poisoned sample with trigger will be dominated by the trigger information, though trigger information and benign information coexist. Inspired by the mechanism of the optical polarizer that a polarizer could pass light waves with particular polarizations while filtering light waves with other polarizations, we propose a novel backdoor defense method by inserting a learnable neural polarizer into the backdoored model as an intermediate layer, in order to purify the poisoned sample via filtering trigger information while maintaining benign information. The neural polarizer is instantiated as one lightweight linear transformation layer, which is learned through solving a well designed bi-level optimization problem, based on a limited clean dataset. Compared to other fine-tuning-based defense methods which often adjust all parameters of the backdoored model, the proposed method only needs to learn one additional layer, such that it is more efficient and requires less clean data. Extensive experiments demonstrate the effectiveness and efficiency of our method in removing backdoors across various neural network architectures and datasets, especially in the case of very limited clean data. Codes are available at \\href{https://github.com/SCLBD/BackdoorBench}{https://github.com/SCLBD/BackdoorBench} (PyTorch) and \\href{https://github.com/JulieCarlon/NPD-MindSpore}{https://github.com/JulieCarlon/NPD-MindSpore} (MindSpore).", "keywords": "Backdoor Defense;Backdoor Learning;Trustworthy AI", "primary_area": "", "supplementary_material": "", "author": "Mingli Zhu;Shaokui Wei;Hongyuan Zha;Baoyuan Wu", "authorids": "~Mingli_Zhu1;~Shaokui_Wei1;~Hongyuan_Zha1;~Baoyuan_Wu1", "gender": "F;M;;M", "homepage": ";https://shawkui.github.io/;;https://sites.google.com/site/baoyuanwu2015/", "dblp": "329/6304;323/4243;z/HongyuanZha;73/7781", "google_scholar": ";WHkEfnsAAAAJ;n1DQMIsAAAAJ;JNTG1KoAAAAJ", "orcid": ";;;0000-0003-2183-5990", "linkedin": "%E6%98%8E%E4%B8%BD-%E6%9C%B1-03b47423a;;;", "or_profile": "~Mingli_Zhu1;~Shaokui_Wei1;~Hongyuan_Zha1;~Baoyuan_Wu1", "aff": "The Chinese University of Hong Kong(Shen Zhen);The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "position": "PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhu2023neural,\ntitle={Neural Polarizer: A Lightweight and Effective Backdoor Defense via Purifying Poisoned Features},\nauthor={Mingli Zhu and Shaokui Wei and Hongyuan Zha and Baoyuan Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VFhN15Vlkj}\n}", "github": "", "project": "", "reviewers": "DJTs;BztP;cjy4;qQYz", "pdf_size": 2683078, "rating": "6;6;6;7", "confidence": "5;4;3;4", "soundness": "3;3;2;3", "novelty": "2;3;2;2", "presentation": "3;3;3;3", "wc_summary": "44;49;50;54", "wc_strengths": "54;66;18;19", "wc_weaknesses": "216;36;125;95", "wc_questions": "28;45;104;17", "wc_limitations": "24;6;6;1", "wc_review": "366;202;303;186", "wc_reply_reviewers": "30;13;17;16", "wc_reply_authors": "109;40;41;132", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;3", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 49.25, 3.5619517121937516 ], "wc_strengths_avg": [ 39.25, 21.182244923520265 ], "wc_weaknesses_avg": [ 118.0, 65.01153743759642 ], "wc_questions_avg": [ 48.5, 33.55964838909967 ], "wc_limitations_avg": [ 9.25, 8.757139944068497 ], "wc_review_avg": [ 264.25, 73.91337835601888 ], "wc_reply_reviewers_avg": [ 19.0, 6.519202405202649 ], "wc_reply_authors_avg": [ 80.5, 40.81972562377165 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=598058186033323402&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.cn", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Delegated Classification", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71466", "id": "VGLXjbTSYa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2aab664e0d1656e8b56c74f868e1ea69-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VGLXjbTSYa", "openreview": "https://openreview.net/forum?id=VGLXjbTSYa", "poster": "/media/PosterPDFs/NeurIPS%202023/71466.png?t=1701793161.8960066", "slides": "https://nips.cc/virtual/2023/poster/71466", "video": "https://nips.cc/virtual/2023/poster/71466", "author_site": "Eden Saig, Inbal Talgam-Cohen, Nir Rosenfeld", "tldr": "", "abstract": "When machine learning is outsourced to a rational agent, conflicts of interest might arise and severely impact predictive performance. In this work, we propose a theoretical framework for incentive-aware delegation of machine learning tasks. We model delegation as a principal-agent game, in which accurate learning can be incentivized by the principal using performance-based contracts. Adapting the economic theory of contract design to this setting, we define budget-optimal contracts and prove they take a simple threshold form under reasonable assumptions. In the binary-action case, the optimality of such contracts is shown to be equivalent to the classic Neyman-Pearson lemma, establishing a formal connection between contract design and statistical hypothesis testing. Empirically, we demonstrate that budget-optimal contracts can be constructed using small-scale data, leveraging recent advances in the study of learning curves and scaling laws. Performance and economic outcomes are evaluated using synthetic and real-world classification tasks.", "keywords": "Delegation;Algorithmic Contract Design;Moral Hazard;Learning Curves", "primary_area": "", "supplementary_material": "/attachment/01e48e6d9bf3a8fb78836f91f8c41dc830f22930.zip", "author": "Eden Saig;Inbal Talgam-Cohen;Nir Rosenfeld", "authorids": "~Eden_Saig1;~Inbal_Talgam-Cohen2;~Nir_Rosenfeld2", "gender": "M;F;M", "homepage": "https://edensaig.github.io/;http://www.inbaltalgam.com/;https://nirr.cswp.cs.technion.ac.il", "dblp": "209/3728;07/8319;145/9800", "google_scholar": "7DsqqK8AAAAJ;R1YK5BsAAAAJ;WTlgnYkAAAAJ", "orcid": "0000-0002-0810-2218;;", "linkedin": "eden-saig/;;", "or_profile": "~Eden_Saig1;~Inbal_Talgam-Cohen2;~Nir_Rosenfeld2", "aff": "Technion - Israel Institute of Technology;Technion, Technion;Technion, Technion", "aff_domain": "cs.technion.ac.il;technion.ac.il;technion.ac.il", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nsaig2023delegated,\ntitle={Delegated Classification},\nauthor={Eden Saig and Inbal Talgam-Cohen and Nir Rosenfeld},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VGLXjbTSYa}\n}", "github": "", "project": "", "reviewers": "au5P;ybdo;EHyc;dRCD", "pdf_size": 1026089, "rating": "5;6;7;8", "confidence": "4;4;3;5", "soundness": "4;3;3;4", "novelty": "3;3;3;3", "presentation": "4;3;4;4", "wc_summary": "159;69;159;176", "wc_strengths": "93;40;44;192", "wc_weaknesses": "247;23;93;86", "wc_questions": "2;119;209;186", "wc_limitations": "1;246;1;1", "wc_review": "502;497;506;641", "wc_reply_reviewers": "13;12;10;9", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 140.75, 42.00223208354527 ], "wc_strengths_avg": [ 92.25, 61.25510182833753 ], "wc_weaknesses_avg": [ 112.25, 82.43595999319714 ], "wc_questions_avg": [ 129.0, 80.43320209963048 ], "wc_limitations_avg": [ 62.25, 106.08811196359373 ], "wc_review_avg": [ 536.5, 60.41729884726725 ], "wc_reply_reviewers_avg": [ 11.0, 1.5811388300841898 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3162277660168379, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16233981636275713331&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "cs.technion.ac.il;technion.ac.il;technion.ac.il", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "title": "Mesogeos: A multi-purpose dataset for data-driven wildfire modeling in the Mediterranean", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73575", "id": "VH1vxapUTs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9ee3ed2dd656402f954ef9dc37e39f48-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=VH1vxapUTs", "openreview": "https://openreview.net/forum?id=VH1vxapUTs", "poster": "/media/PosterPDFs/NeurIPS%202023/73575.png?t=1702475380.0835745", "slides": "https://nips.cc/virtual/2023/poster/73575", "video": "https://nips.cc/virtual/2023/poster/73575", "author_site": "Spyridon Kondylatos, Ioannis Prapas, Gustau Camps-Valls, Ioannis Papoutsis", "tldr": "", "abstract": "We introduce Mesogeos, a large-scale multi-purpose dataset for wildfire modeling in the Mediterranean. Mesogeos integrates variables representing wildfire drivers (meteorology, vegetation, human activity) and historical records of wildfire ignitions and burned areas for 17 years (2006-2022). It is designed as a cloud-friendly spatio-temporal dataset, namely a datacube, harmonizing all variables in a grid of 1km x 1km x 1-day resolution. The datacube structure offers opportunities to assess machine learning (ML) usage in various wildfire modeling tasks. We extract two ML-ready datasets that establish distinct tracks to demonstrate this potential: (1) short-term wildfire danger forecasting and (2) final burned area estimation given the point of ignition. We define appropriate metrics and baselines to evaluate the performance of models in each track. By publishing the datacube, along with the code to create the ML datasets and models, we encourage the community to foster the implementation of additional tracks for mitigating the increasing threat of wildfires in the Mediterranean.", "keywords": "wildfires;public dataset;machine learning dataset", "primary_area": "", "supplementary_material": "/attachment/4d56fa9ada118259b640ac5e7e734d1ebf34b357.pdf", "author": "Spyros Kondylatos;Ioannis Prapas;Gustau Camps-Valls;Ioannis Papoutsis", "authorids": "~Spyros_Kondylatos2;~Ioannis_Prapas1;~Gustau_Camps-Valls1;~Ioannis_Papoutsis1", "gender": "M;M;M;M", "homepage": ";https://iprapas.github.io;http://www.uv.es/gcamps;", "dblp": ";;32/5293;76/9902", "google_scholar": "Za_CSbMAAAAJ;tEF8OxIAAAAJ;6mgnauMAAAAJ;46cBUO8AAAAJ", "orcid": ";0000-0002-9111-4112;0000-0003-1683-2138;0000-0002-2845-9791", "linkedin": "skondylatos/;;gcampsvalls/;ioannis-papoutsis/", "or_profile": "~Spyros_Kondylatos2;~Ioannis_Prapas1;~Gustau_Camps-Valls1;~Ioannis_Papoutsis1", "aff": "National Obsevatory of Athens;National Observatory of Athens;Universitat de Val\u00e8ncia;National Observatory of Athens", "aff_domain": "noa.gr;noa.gr;uv.es;noa.gr", "position": "PhD student;Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\nkondylatos2023mesogeos,\ntitle={Mesogeos: A multi-purpose dataset for data-driven wildfire modeling in the Mediterranean},\nauthor={Spyros Kondylatos and Ioannis Prapas and Gustau Camps-Valls and Ioannis Papoutsis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=VH1vxapUTs}\n}", "github": "", "project": "", "reviewers": "cj7g;vRfc;28Ft;v8vR", "pdf_size": 2191418, "rating": "7;7;9;9", "confidence": "5;4;4;3", "wc_summary_and_contributions": "39;205;76;55", "wc_strengths": "40;56;81;9", "wc_improvement": "311;166;72;12", "wc_limitations": "2;43;18;13", "wc_correctness": "15;372;5;8", "wc_clarity": "4;9;9;6", "wc_relation_to_prior_work": "7;5;5;8", "wc_documentation": "1;1;7;3", "wc_additional_feedback": "1;1;1;1", "wc_review": "420;858;274;115", "wc_reply_reviewers": "48;0;0;0", "wc_reply_authors": "994;850;30;0", "reply_reviewers": "2;0;0;0", "reply_authors": "2;1;1;0", "rating_avg": [ 8.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 93.75, 65.55675022451922 ], "wc_strengths_avg": [ 46.5, 26.119915773217954 ], "wc_improvement_avg": [ 140.25, 112.83256400525515 ], "wc_limitations_avg": [ 19.0, 15.016657417681207 ], "wc_correctness_avg": [ 100.0, 157.0811891984524 ], "wc_clarity_avg": [ 7.0, 2.1213203435596424 ], "wc_relation_to_prior_work_avg": [ 6.25, 1.299038105676658 ], "wc_documentation_avg": [ 3.0, 2.449489742783178 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 416.75, 276.6508404107965 ], "wc_reply_reviewers_avg": [ 12.0, 20.784609690826528 ], "wc_reply_authors_avg": [ 468.5, 456.4720692441105 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.0, 0.7071067811865476 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14157861135292921829&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 10, "email": "noa.gr;noa.gr;uv.es;noa.gr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "National Observatory of Athens;Universitat de Val\u00e8ncia", "aff_unique_dep": ";", "aff_unique_url": "http://www.noa.gr/;https://www.uv.es", "aff_unique_abbr": "NOA;UV", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Greece;Spain" }, { "title": "Improving multimodal datasets with image captioning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73574", "id": "VIRKdeFJIg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/45e604a3e33d10fba508e755faa72345-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=VIRKdeFJIg", "openreview": "https://openreview.net/forum?id=VIRKdeFJIg", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73574", "video": "https://nips.cc/virtual/2023/poster/73574", "author_site": "Thao Nguyen, Samir Yitzhak Gadre, Gabriel Ilharco, Sewoong Oh, Ludwig Schmidt", "tldr": "", "abstract": "Massive web datasets play a key role in the success of large vision-language models like CLIP and Flamingo. However, the raw web data is noisy, and existing filtering methods to reduce noise often come at the expense of data diversity. Our work focuses on caption quality as one major source of noise, and studies how generated captions can increase the utility of web-scraped datapoints with nondescript text. Through exploring different mixing strategies for raw and generated captions, we outperform the best filtering method proposed by the DataComp benchmark by 2% on ImageNet and 4% on average across 38 tasks, given a candidate pool of 128M image-text pairs. Our best approach is also 2x better at Flickr and MS-COCO retrieval. We then analyze what makes synthetic captions an effective source of text supervision. In experimenting with different image captioning models, we also demonstrate that the performance of a model on standard image captioning benchmarks (e.g., NoCaps CIDEr) is not a reliable indicator of the utility of the captions it generates for multimodal training. Finally, our experiments with using generated captions at DataComp's large scale (1.28B image-text pairs) offer insights into the limitations of synthetic text, as well as the importance of image curation with increasing training data quantity. The synthetic captions used in our experiments are now available on HuggingFace.", "keywords": "web datasets;image captioning;synthetic data;CLIP training", "primary_area": "", "supplementary_material": "/attachment/348c8151b72a53d5b9a57158d6e91e98eeeeaca7.pdf", "author": "Thao Nguyen;Samir Yitzhak Gadre;Gabriel Ilharco;Sewoong Oh;Ludwig Schmidt", "authorids": "~Thao_Nguyen3;~Samir_Yitzhak_Gadre1;~Gabriel_Ilharco1;~Sewoong_Oh1;~Ludwig_Schmidt1", "gender": "F;M;M;M;M", "homepage": "https://thaonguyen19.github.io/;https://sagadre.github.io/;http://gabrielilharco.com/;https://homes.cs.washington.edu/~sewoong/;http://people.csail.mit.edu/ludwigs/", "dblp": "77/2922;246/7901;249/2616;80/4366;141/2720", "google_scholar": "DvJG-_8AAAAJ;oAhlg9gAAAAJ;https://scholar.google.com/citations?hl=en;55TAOdgAAAAJ;SWMKy70AAAAJ", "orcid": ";;;;", "linkedin": ";;;;ludwig-schmidt-87ba3612/", "or_profile": "~Thao_Nguyen3;~Samir_Yitzhak_Gadre1;~Gabriel_Ilharco1;~Sewoong_Oh1;~Ludwig_Schmidt1", "aff": "Meta;Columbia University;Department of Computer Science, University of Washington;University of Washington;Allen Institute for Artificial Intelligence", "aff_domain": "meta.com;columbia.edu;cs.washington.edu;uw.edu;allenai.org", "position": "Visiting Researcher;PhD student;PhD student;Associate Professor;Researcher", "bibtex": "@inproceedings{\nnguyen2023improving,\ntitle={Improving multimodal datasets with image captioning},\nauthor={Thao Nguyen and Samir Yitzhak Gadre and Gabriel Ilharco and Sewoong Oh and Ludwig Schmidt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=VIRKdeFJIg}\n}", "github": "", "project": "", "reviewers": "DC33;DaAH;cRge;fUUj;fLcW", "pdf_size": 3174812, "rating": "6;6;6;8;8", "confidence": "4;4;4;3;4", "wc_summary_and_contributions": "94;43;94;90;50", "wc_strengths": "117;155;40;53;57", "wc_improvement": "734;102;98;22;33", "wc_limitations": "185;4;22;14;12", "wc_correctness": "6;10;25;6;17", "wc_clarity": "23;20;94;6;5", "wc_relation_to_prior_work": "440;5;1;13;17", "wc_documentation": "5;35;9;10;32", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "1605;375;384;215;224", "wc_reply_reviewers": "99;0;0;23;0", "wc_reply_authors": "1513;532;954;138;110", "reply_reviewers": "1;0;0;1;0", "reply_authors": "3;1;2;1;1", "rating_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 74.2, 22.771912523984454 ], "wc_strengths_avg": [ 84.4, 44.17057844312207 ], "wc_improvement_avg": [ 197.8, 270.0788033148844 ], "wc_limitations_avg": [ 47.4, 69.03796057242711 ], "wc_correctness_avg": [ 12.8, 7.30479294709987 ], "wc_clarity_avg": [ 29.6, 33.00060605504087 ], "wc_relation_to_prior_work_avg": [ 95.2, 172.4927824577017 ], "wc_documentation_avg": [ 18.2, 12.63962024745997 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 560.6, 527.094906065312 ], "wc_reply_reviewers_avg": [ 24.4, 38.34892436561943 ], "wc_reply_authors_avg": [ 649.4, 530.0662600090673 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6123724356957946, "gs_citation": 81, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8121125897314064081&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "meta.com;columbia.edu;cs.washington.edu;uw.edu;allenai.org", "author_num": 5, "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "Meta;Columbia University;University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": "Meta Platforms, Inc.;;Department of Computer Science;", "aff_unique_url": "https://meta.com;https://www.columbia.edu;https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "Meta;Columbia;UW;AI2", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Interactive Multi-fidelity Learning for Cost-effective Adaptation of Language Model with Sparse Human Supervision", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71465", "id": "VIaw1XHb4G", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f6c1843f11d34312b11ec5ff9a10c5a6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VIaw1XHb4G", "openreview": "https://openreview.net/forum?id=VIaw1XHb4G", "poster": "/media/PosterPDFs/NeurIPS%202023/71465.png?t=1701416027.524014", "slides": "https://nips.cc/virtual/2023/poster/71465", "video": "https://nips.cc/virtual/2023/poster/71465", "author_site": "Jiaxin Zhang, Zhuohang Li, Kamalika Das, Sricharan Kumar", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated remarkable capabilities in various tasks. However, their suitability for domain-specific tasks, is limited due to their immense scale at deployment, susceptibility to misinformation, and more importantly, high data annotation costs. We propose a novel Interactive Multi-Fidelity Learning (IMFL) framework for cost-effective development of small domain-specific LMs under limited annotation budgets. Our approach formulates the domain-specific fine-tuning process as a multi-fidelity learning problem, focusing on identifying the optimal acquisition strategy that balances between low-fidelity automatic LLM annotations and high-fidelity human annotations to maximize model performance. We further propose an exploration-exploitation query strategy that enhances annotation diversity and informativeness, incorporating two innovative designs: 1) prompt retrieval that selects in-context examples from human-annotated samples to improve LLM annotation, and 2) variable batch size that controls the order for choosing each fidelity to facilitate knowledge distillation, ultimately enhancing annotation quality. Extensive experiments on financial and medical tasks demonstrate that IMFL achieves superior performance compared with single fidelity annotations. Given a limited budget of human annotation, IMFL significantly outperforms the $\\bf 3\\times$ human annotation baselines in all four tasks and achieves very close performance as $\\bf 5\\times$ human annotation on two of the tasks. These promising results suggest that the high human annotation costs in domain-specific tasks can be significantly reduced by employing IMFL, which utilizes fewer human annotations, supplemented with cheaper and faster LLM (e.g., GPT-3.5) annotations to achieve comparable performance.", "keywords": "multi-fidelity optimization;cost-effective learning;exploration-exploitation query;limited annotation budgets", "primary_area": "", "supplementary_material": "/attachment/1b7a3d423bb5c3a1bd76e60d38b3b7f8b8abd6c2.pdf", "author": "Jiaxin Zhang;Zhuohang Li;Kamalika Das;Sricharan Kumar", "authorids": "~Jiaxin_Zhang2;~Zhuohang_Li1;tomben0099@gmail.com;jxzhangornl@gmail.com", "gender": "M;M;;", "homepage": "https://jxzhangjhu.github.io/;https://zhuohang.li/;;", "dblp": "32/7698-5.html;;;", "google_scholar": "LiDm8jEAAAAJ;_FgPQ50AAAAJ;;", "orcid": ";;;", "linkedin": "jiaxin-zhang-1425289b/;;;", "or_profile": "~Jiaxin_Zhang2;~Zhuohang_Li1;tomben0099@gmail.com;jxzhangornl@gmail.com", "aff": "Intuit AI Research;Vanderbilt University;;", "aff_domain": "intuit.com;vanderbilt.edu;;", "position": "Researcher;PhD student;;", "bibtex": "@inproceedings{\nzhang2023interactive,\ntitle={Interactive Multi-fidelity Learning for Cost-effective Adaptation of Language Model with Sparse Human Supervision},\nauthor={Jiaxin Zhang and Zhuohang Li and Kamalika Das and Sricharan Kumar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VIaw1XHb4G}\n}", "github": "", "project": "", "reviewers": "ogFx;EwLY;Ltph;YvhV", "pdf_size": 1119637, "rating": "6;6;6;7", "confidence": "4;3;4;5", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;2;2;3", "wc_summary": "55;240;69;114", "wc_strengths": "80;75;45;127", "wc_weaknesses": "52;142;56;78", "wc_questions": "71;1;97;42", "wc_limitations": "1;1;1;13", "wc_review": "259;459;268;374", "wc_reply_reviewers": "121;0;0;21", "wc_reply_authors": "237;97;97;0", "reply_reviewers": "2;0;0;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 119.5, 72.90576108923081 ], "wc_strengths_avg": [ 81.75, 29.354514133264068 ], "wc_weaknesses_avg": [ 82.0, 36.02776706930364 ], "wc_questions_avg": [ 52.75, 35.65371649632055 ], "wc_limitations_avg": [ 4.0, 5.196152422706632 ], "wc_review_avg": [ 340.0, 82.25265953147047 ], "wc_reply_reviewers_avg": [ 35.5, 50.102395152327794 ], "wc_reply_authors_avg": [ 107.75, 84.4789174883296 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6412845714785146500&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "intuit.com;vanderbilt.edu;;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Intuit;Vanderbilt University", "aff_unique_dep": "Intuit AI Research;", "aff_unique_url": "https://intuit.com/;https://www.vanderbilt.edu", "aff_unique_abbr": "Intuit;Vanderbilt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Test-Time Distribution Normalization for Contrastively Learned Visual-language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71464", "id": "VKbEO2eh5w", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/931db0b5a61f9db6c97c7e4bf068147d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VKbEO2eh5w", "openreview": "https://openreview.net/forum?id=VKbEO2eh5w", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71464", "video": "https://nips.cc/virtual/2023/poster/71464", "author_site": "Yifei Zhou, Juntao Ren, Fengyu Li, Ramin Zabih, Ser Nam Lim", "tldr": "", "abstract": "Advances in the field of visual-language contrastive learning have made it possible for many downstream applications to be carried out efficiently and accurately by simply taking the dot product between image and text representations. One of the most representative approaches proposed recently known as CLIP has quickly garnered widespread adoption due to its effectiveness. CLIP is trained with an InfoNCE loss that takes into account both positive and negative samples to help learn a much more robust representation space. This paper however reveals that the common downstream practice of taking a dot product is only a zeroth-order approximation of the optimization goal, resulting in a loss of information during test-time. Intuitively, since the model has been optimized based on the InfoNCE loss, test-time procedures should ideally also be in alignment. The question lies in how one can retrieve any semblance of negative samples information during inference in a computationally efficient way. We propose Distribution Normalization (DN), where we approximate the mean representation of a batch of test samples and use such a mean to represent what would be analogous to negative samples in the InfoNCE loss. DN requires no retraining or fine-tuning and can be effortlessly applied during inference. Extensive experiments on a wide variety of downstream tasks exhibit a clear advantage of DN over the dot product on top of other existing test-time augmentation methods.", "keywords": "contrastive learning;pre-trained visual-language models;zero-shot learning;test-time augmentation", "primary_area": "", "supplementary_material": "/attachment/65b4bd1223777fddc0f8e0b2b4c076bdcc3f0eaa.zip", "author": "Yifei Zhou;Juntao Ren;Fengyu Li;Ramin Zabih;Ser-Nam Lim", "authorids": "~Yifei_Zhou1;~Juntao_Ren1;~Fengyu_Li1;~Ramin_Zabih1;~Ser-Nam_Lim3", "gender": "M;M;;M;", "homepage": "https://yifeizhou02.github.io/;https://jren03.github.io/;;https://www.cs.cornell.edu/~rdz;", "dblp": "50/7699;340/8425;;z/RaminZabih;", "google_scholar": ";https://scholar.google.com/citations?hl=en;;8Lp0W54AAAAJ;", "orcid": ";;;0000-0001-8769-5666;", "linkedin": "yifei-zhou-57aa9b222/;juntaoren/;;;", "or_profile": "~Yifei_Zhou1;~Juntao_Ren1;~Fengyu_Li1;~Ramin_Zabih1;~Ser-Nam_Lim3", "aff": "Department of Computer Science, Cornell University;Department of Computer Science, Cornell University;;Cornell;", "aff_domain": "cs.cornell.edu;cs.cornell.edu;;cornell.edu;", "position": "Undergrad student;Undergrad student;;Professor;", "bibtex": "@inproceedings{\nzhou2023testtime,\ntitle={Test-Time Distribution Normalization for Contrastively Learned Visual-language Models},\nauthor={Yifei Zhou and Juntao Ren and Fengyu Li and Ramin Zabih and Ser-Nam Lim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VKbEO2eh5w}\n}", "github": "", "project": "", "reviewers": "HYdZ;jN4m;XtRE;5JmX;UGnD", "pdf_size": 2059612, "rating": "4;4;6;6;6", "confidence": "4;4;5;4;5", "soundness": "3;3;3;3;1", "novelty": "3;2;3;2;2", "presentation": "3;3;4;2;3", "wc_summary": "129;78;44;63;98", "wc_strengths": "74;35;53;48;29", "wc_weaknesses": "84;62;122;90;220", "wc_questions": "1;2;4;28;5", "wc_limitations": "1;11;10;126;1", "wc_review": "289;188;233;355;353", "wc_reply_reviewers": "0;19;0;52;325", "wc_reply_authors": "0;23;0;22;645", "reply_reviewers": "0;1;0;1;2", "reply_authors": "1;2;1;2;3", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 82.4, 29.27524551562292 ], "wc_strengths_avg": [ 47.8, 15.68948692596415 ], "wc_weaknesses_avg": [ 115.6, 55.61870189064106 ], "wc_questions_avg": [ 8.0, 10.099504938362077 ], "wc_limitations_avg": [ 29.8, 48.28830086056042 ], "wc_review_avg": [ 283.6, 65.79240077698944 ], "wc_reply_reviewers_avg": [ 79.2, 124.35980057880441 ], "wc_reply_authors_avg": [ 138.0, 253.6998226250858 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6666666666666666, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7724272525384344480&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 8, "email": "cs.cornell.edu;cs.cornell.edu;;cornell.edu;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Regret Minimization via Saddle Point Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71463", "id": "VLnEFGu9V7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6eaf8c729af4fbeb18006dc2e6a41d9b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VLnEFGu9V7", "openreview": "https://openreview.net/forum?id=VLnEFGu9V7", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71463", "video": "https://nips.cc/virtual/2023/poster/71463", "author_site": "Johannes Kirschner, Alireza Bakhtiari, Kushagra Chandak, Volodymyr Tkachuk, Csaba Szepesvari", "tldr": "", "abstract": "A long line of works characterizes the sample complexity of regret minimization in sequential decision-making by min-max programs. \nIn the corresponding saddle-point game, the min-player optimizes the sampling distribution against an adversarial max-player that chooses confusing models leading to large regret. The most recent instantiation of this idea is the decision-estimation coefficient (DEC), which was shown to provide nearly tight lower and upper bounds on the worst-case expected regret in structured bandits and reinforcement learning. By re-parametrizing the offset DEC with the confidence radius and solving the corresponding min-max program, we derive an anytime variant of the Estimation-To-Decisions algorithm (Anytime-E2D). Importantly, the algorithm optimizes the exploration-exploitation trade-off online instead of via the analysis. Our formulation leads to a practical algorithm for finite model classes and linear feedback models. We further point out connections to the information ratio, decoupling coefficient and PAC-DEC, and numerically evaluate the performance of E2D on simple examples.", "keywords": "sequential decision-making;decision-estimation coefficient;regret minimization;bandits;reinforcement learning;partial monitoring", "primary_area": "", "supplementary_material": "", "author": "Johannes Kirschner;Alireza Bakhtiari;Kushagra Chandak;Volodymyr Tkachuk;Csaba Szepesvari", "authorids": "~Johannes_Kirschner1;~Alireza_Bakhtiari1;~Kushagra_Chandak1;~Volodymyr_Tkachuk2;~Csaba_Szepesvari1", "gender": ";M;;M;M", "homepage": ";;https://kushagra06.github.io/;https://vladtkachuk4.github.io/;https://sites.ualberta.ca/~szepesva/", "dblp": "223/0106;;278/3341;287/4223;http://dblp.uni-trier.de/pers/hd/s/Szepesv=aacute=ri:Csaba", "google_scholar": "https://scholar.google.ch/citations?user=IgO2ThIAAAAJ;RqWBIiYAAAAJ;abw36r8AAAAJ;9sSwAAsAAAAJ;https://scholar.google.ca/citations?user=zvC19mQAAAAJ", "orcid": "0000-0002-7228-8280;;;;", "linkedin": ";;;vtkachuk4/;csaba-szepesvari-09376b1?trk=hp-identity-name", "or_profile": "~Johannes_Kirschner1;~Alireza_Bakhtiari1;~Kushagra_Chandak1;~Volodymyr_Tkachuk2;~Csaba_Szepesvari1", "aff": "University of Alberta;University of Alberta;University of Alberta;University of Alberta;Google DeepMind", "aff_domain": "ualberta.ca;ualberta.ca;ualberta.ca;ualberta.ca;google.com", "position": "Postdoc;MS student;MS student;MS student;Research Scientist", "bibtex": "@inproceedings{\nkirschner2023regret,\ntitle={Regret Minimization via Saddle Point Optimization},\nauthor={Johannes Kirschner and Alireza Bakhtiari and Kushagra Chandak and Volodymyr Tkachuk and Csaba Szepesvari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VLnEFGu9V7}\n}", "github": "", "project": "", "reviewers": "frUC;j2sL;ye2a;zR5j", "pdf_size": 768331, "rating": "4;6;6;7", "confidence": "5;2;2;3", "soundness": "3;3;2;4", "novelty": "2;3;2;3", "presentation": "4;3;2;4", "wc_summary": "66;42;54;225", "wc_strengths": "16;58;24;103", "wc_weaknesses": "37;114;164;167", "wc_questions": "16;30;4;116", "wc_limitations": "1;13;1;1", "wc_review": "136;257;247;612", "wc_reply_reviewers": "33;33;99;130", "wc_reply_authors": "0;0;235;137", "reply_reviewers": "1;1;2;2", "reply_authors": "1;1;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 96.75, 74.52977592881922 ], "wc_strengths_avg": [ 50.25, 34.29559009552103 ], "wc_weaknesses_avg": [ 120.5, 52.60465758846834 ], "wc_questions_avg": [ 41.5, 43.98579316097415 ], "wc_limitations_avg": [ 4.0, 5.196152422706632 ], "wc_review_avg": [ 313.0, 179.04049821199672 ], "wc_reply_reviewers_avg": [ 73.75, 42.19819308927812 ], "wc_reply_authors_avg": [ 93.0, 99.24464721081938 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7492686492653552, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2640620113374818849&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ualberta.ca;ualberta.ca;ualberta.ca;ualberta.ca;google.com", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of Alberta;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.ualberta.ca;https://deepmind.com", "aff_unique_abbr": "UAlberta;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Canada;United Kingdom" }, { "title": "UP-DP: Unsupervised Prompt Learning for Data Pre-Selection with Vision-Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71462", "id": "VMAgvbBBts", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/06d5f1fe6509b001e6d4e0ec1afd83dd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VMAgvbBBts", "openreview": "https://openreview.net/forum?id=VMAgvbBBts", "poster": "/media/PosterPDFs/NeurIPS%202023/71462.png?t=1702084805.256249", "slides": "https://nips.cc/virtual/2023/poster/71462", "video": "https://nips.cc/virtual/2023/poster/71462", "author_site": "Xin Li, Sima Behpour, Thang Long Doan, Wenbin He, Liang Gou, Liu Ren", "tldr": "", "abstract": "In this study, we investigate the task of data pre-selection, which aims to select instances for labeling from an unlabeled dataset through a single pass, thereby optimizing performance for undefined downstream tasks with a limited annotation budget. Previous approaches to data pre-selection relied solely on visual features extracted from foundation models, such as CLIP and BLIP-2, but largely ignored the powerfulness of text features. In this work, we argue that, with proper design, the joint feature space of both vision and text can yield a better representation for data pre-selection. To this end, we introduce UP-DP, a simple yet effective unsupervised prompt learning approach that adapts vision-language models, like BLIP-2, for data pre-selection. Specifically, with the BLIP-2 parameters frozen, we train text prompts to extract the joint features with improved representation, ensuring a diverse cluster structure that covers the entire dataset. We extensively compare our method with the state-of-the-art using seven benchmark datasets in different settings, achieving up to a performance gain of 20\\%. Interestingly, the prompts learned from one dataset demonstrate significant generalizability and can be applied directly to enhance the feature extraction of BLIP-2 from other datasets. To the best of our knowledge, UP-DP is the first work to incorporate unsupervised prompt learning in a vision-language model for data pre-selection.", "keywords": "Unsupervised prompt learning;UP-DP;Data preselection", "primary_area": "", "supplementary_material": "/attachment/08049486fca44f32b75156984468350ec6b09398.zip", "author": "Xin Li;Sima Behpour;Thang Doan;Wenbin He;Liang Gou;Liu Ren", "authorids": "~Xin_Li26;~Sima_Behpour1;~Thang_Doan1;~Wenbin_He1;~Liang_Gou2;~Liu_Ren1", "gender": "M;;;M;M;M", "homepage": "https://www.xinliaiblog.com/;;;https://hewenbin.github.io/;;https://sites.google.com/site/liurenshomepage/", "dblp": ";;;;43/7218;65/4250", "google_scholar": ";;;BQG5angAAAAJ;x3VK0fAAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Xin_Li26;~Sima_Behpour1;~Thang_Doan1;~Wenbin_He1;~Liang_Gou2;~Liu_Ren1", "aff": "Bosch Reserach;;;Bosch;Bosch Research North America, Bosch Center for Artificial Intelligence (BCAI);Bosch Research", "aff_domain": "us.bosch.com;;;bosch.com;bosch.com;us.bosch.com", "position": "Researcher;;;Researcher;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nli2023updp,\ntitle={{UP}-{DP}: Unsupervised Prompt Learning for Data Pre-Selection with Vision-Language Models},\nauthor={Xin Li and Sima Behpour and Thang Doan and Wenbin He and Liang Gou and Liu Ren},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VMAgvbBBts}\n}", "github": "", "project": "", "reviewers": "X1Uz;soCe;ZwSf;ucTW;uJMt", "pdf_size": 7417434, "rating": "5;5;5;5;6", "confidence": "5;4;2;3;5", "soundness": "2;3;1;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "62;140;80;77;210", "wc_strengths": "47;87;88;51;134", "wc_weaknesses": "94;177;348;107;189", "wc_questions": "6;172;143;2;80", "wc_limitations": "6;5;10;1;23", "wc_review": "215;581;669;238;636", "wc_reply_reviewers": "19;44;0;0;78", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.8, 1.16619037896906 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 113.8, 54.992363106162294 ], "wc_strengths_avg": [ 81.4, 31.461722775461613 ], "wc_weaknesses_avg": [ 183.0, 90.54722524738126 ], "wc_questions_avg": [ 80.6, 69.26932943229637 ], "wc_limitations_avg": [ 9.0, 7.563068160475615 ], "wc_review_avg": [ 467.8, 199.14959201565037 ], "wc_reply_reviewers_avg": [ 28.2, 29.680970334542632 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5144957554275266, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18177589652790086138&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "us.bosch.com;;;bosch.com;bosch.com;us.bosch.com", "author_num": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Bosch Research;Robert Bosch GmbH;Bosch Research North America", "aff_unique_dep": ";;Bosch Center for Artificial Intelligence (BCAI)", "aff_unique_url": "https://research.bosch.com;https://www.bosch.com;https://research.bosch.com/", "aff_unique_abbr": "Bosch;Bosch;Bosch", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Germany;United States" }, { "title": "Hyperbolic Space with Hierarchical Margin Boosts Fine-Grained Learning from Coarse Labels", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71461", "id": "VMz5GhfxgV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e17e11960843febbc2dd22d3c7d79144-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VMz5GhfxgV", "openreview": "https://openreview.net/forum?id=VMz5GhfxgV", "poster": "/media/PosterPDFs/NeurIPS%202023/71461.png?t=1699454114.6710606", "slides": "https://nips.cc/virtual/2023/poster/71461", "video": "https://nips.cc/virtual/2023/poster/71461", "author_site": "Shu-Lin Xu, Yifan Sun, Faen Zhang, Anqi Xu, Xiu-Shen Wei, Yi Yang", "tldr": "", "abstract": "Learning fine-grained embeddings from coarse labels is a challenging task due to limited label granularity supervision, i.e., lacking the detailed distinctions required for fine-grained tasks. The task becomes even more demanding when attempting few-shot fine-grained recognition, which holds practical significance in various applications. To address these challenges, we propose a novel method that embeds visual embeddings into a hyperbolic space and enhances their discriminative ability with a hierarchical cosine margins manner. Specifically, the hyperbolic space offers distinct advantages, including the ability to capture hierarchical relationships and increased expressive power, which favors modeling fine-grained objects. Based on the hyperbolic space, we further enforce relatively large/small similarity margins between coarse/fine classes, respectively, yielding the so-called hierarchical cosine margins manner. While enforcing similarity margins in the regular Euclidean space has become popular for deep embedding learning, applying it to the hyperbolic space is non-trivial and validating the benefit for coarse-to-fine generalization is valuable. Extensive experiments conducted on five benchmark datasets showcase the effectiveness of our proposed method, yielding state-of-the-art results surpassing competing methods.", "keywords": "Fine-grained learning;Coarse-to-fine learning;Hyperbolic space;Hierarchical margin", "primary_area": "", "supplementary_material": "", "author": "ShuLin Xu;Yifan Sun;Faen Zhang;Anqi Xu;Xiu-Shen Wei;Yi Yang", "authorids": "~ShuLin_Xu2;~Yifan_Sun2;~Faen_Zhang1;anjojoo.xu@mail.utoronto.ca;~Xiu-Shen_Wei1;~Yi_Yang22", "gender": ";M;M;;;M", "homepage": ";https://yifansun-reid.github.io;;;;https://person.zju.edu.cn/yiyang", "dblp": ";99/10261-3.html;84/6074;;;33/4854-1.html", "google_scholar": ";uUZEL7UAAAAJ;9SGTrhoAAAAJ;;;RMSuNFwAAAAJ", "orcid": ";0000-0003-3532-6521;;;;", "linkedin": ";;faen-zhang-0ab57653/;;;", "or_profile": "~ShuLin_Xu2;~Yifan_Sun2;~Faen_Zhang1;anjojoo.xu@mail.utoronto.ca;~Xiu-Shen_Wei1;~Yi_Yang22", "aff": ";Baidu;;;;Zhejiang University", "aff_domain": ";baidu.com;;;;zju.edu.cn", "position": ";Senior Expert;;;;Full Professor", "bibtex": "@inproceedings{\nxu2023hyperbolic,\ntitle={Hyperbolic Space with Hierarchical Margin Boosts Fine-Grained Learning from Coarse Labels},\nauthor={ShuLin Xu and Yifan Sun and Faen Zhang and Anqi Xu and Xiu-Shen Wei and Yi Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VMz5GhfxgV}\n}", "github": "", "project": "", "reviewers": "tnZ4;FRH5;zkc6;EUtT", "pdf_size": 978175, "rating": "4;7;7;8", "confidence": "4;4;3;5", "soundness": "3;3;3;3", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "151;65;124;74", "wc_strengths": "26;366;219;128", "wc_weaknesses": "148;80;58;14", "wc_questions": "147;96;20;47", "wc_limitations": "1;48;1;8", "wc_review": "473;655;422;271", "wc_reply_reviewers": "271;14;13;30", "wc_reply_authors": "918;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 103.5, 35.4577213029828 ], "wc_strengths_avg": [ 184.75, 124.94673865291563 ], "wc_weaknesses_avg": [ 75.0, 48.38388161361178 ], "wc_questions_avg": [ 77.5, 48.5 ], "wc_limitations_avg": [ 14.5, 19.551214796017153 ], "wc_review_avg": [ 455.25, 137.17575405296665 ], "wc_reply_reviewers_avg": [ 82.0, 109.3274896812325 ], "wc_reply_authors_avg": [ 229.5, 397.50566033705735 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.23570226039551584, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4998523350883610197&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";baidu.com;;;;zju.edu.cn", "author_num": 6, "aff_unique_index": "0;1", "aff_unique_norm": "Baidu;Zhejiang University", "aff_unique_dep": "Baidu, Inc.;", "aff_unique_url": "https://www.baidu.com;https://www.zju.edu.cn", "aff_unique_abbr": "Baidu;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Graph of Circuits with GNN for Exploring the Optimal Design Space", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71460", "id": "VNjJAWjuEU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/12da92b7c64176eb6eb6ad0ae31554fd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VNjJAWjuEU", "openreview": "https://openreview.net/forum?id=VNjJAWjuEU", "poster": "/media/PosterPDFs/NeurIPS%202023/71460.png?t=1701181196.3613517", "slides": "https://nips.cc/virtual/2023/poster/71460", "video": "https://nips.cc/virtual/2023/poster/71460", "author_site": "Aditya Shahane, Saripilli Swapna Manjiri, Ankesh Jain, Sandeep Kumar", "tldr": "", "abstract": "The design automation of analog circuits poses significant challenges in terms of the large design space, complex interdependencies between circuit specifications, and resource-intensive simulations. To address these challenges, this paper presents an innovative framework called the Graph of Circuits Explorer (GCX). Leveraging graph structure learning along with graph neural networks, GCX enables the creation of a surrogate model that facilitates efficient exploration of the optimal design space within a semi-supervised learning framework which reduces the need for large labelled datasets. The proposed approach comprises three key stages. First, we learn the geometric representation of circuits and enrich it with technology information to create a comprehensive feature vector. Subsequently, integrating feature-based graph learning with few-shot and zero-shot learning enhances the generalizability in predictions for unseen circuits. Finally, we introduce two algorithms namely, EASCO and ASTROG which upon integration with GCX optimize the available samples to yield the optimal circuit configuration meeting the designer's criteria. The effectiveness of the proposed approach is demonstrated through simulated performance evaluation of various circuits, using derived parameters in 180nm CMOS technology. Furthermore, the generalizability of the approach is extended to higher-order topologies and different technology nodes such as 65nm and 45nm CMOS process nodes.", "keywords": "Analog design optimization;Analog synthesis;Graph Neural Networks;EDA;Graph learning;Optimization", "primary_area": "", "supplementary_material": "/attachment/8497896b512862a2178f09483d2a197487f737ad.zip", "author": "Aditya Hemant Shahane;Saripilli Venkata Swapna Manjiri;Ankesh Jain;Sandeep Kumar", "authorids": "~Aditya_Hemant_Shahane1;~Saripilli_Venkata_Swapna_Manjiri1;~Ankesh_Jain1;~Sandeep_Kumar8", "gender": "M;F;M;M", "homepage": ";;https://web.iitd.ac.in/~ankesh/;https://sites.google.com/view/sandeepkr/home", "dblp": ";;16/10352;", "google_scholar": ";;https://scholar.google.co.in/citations?user=BP9KOTkAAAAJ;lycMMW8AAAAJ", "orcid": ";;0000-0003-4109-6312;", "linkedin": "aditya-shahane-211bba158/;swapna-manjiri-saripilli-053277184/;ankesh-jain-09745614/?originalSubdomain=in;sandeep-kumar-84463332/", "or_profile": "~Aditya_Hemant_Shahane1;~Saripilli_Venkata_Swapna_Manjiri1;~Ankesh_Jain1;~Sandeep_Kumar8", "aff": "Indian Institute of Technology, Delhi;Indian Institute of Technology, Delhi;Indian Institute of Technology, Delhi;Indian Institute of Technology Delhi", "aff_domain": "iitd.ac.in;iitd.ac.in;iitd.ac.in;iitd.ac.in", "position": "MS student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nshahane2023graph,\ntitle={Graph of Circuits with {GNN} for Exploring the Optimal Design Space},\nauthor={Aditya Hemant Shahane and Saripilli Venkata Swapna Manjiri and Ankesh Jain and Sandeep Kumar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VNjJAWjuEU}\n}", "github": "", "project": "", "reviewers": "5fbs;LVTs;LaWa;pjdD;ufxr", "pdf_size": 805445, "rating": "3;5;6;6;7", "confidence": "4;3;1;3;1", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;2;3;3;4", "wc_summary": "94;125;16;73;121", "wc_strengths": "36;111;52;36;47", "wc_weaknesses": "139;171;3;158;136", "wc_questions": "50;4;41;57;8", "wc_limitations": "14;4;11;20;11", "wc_review": "333;415;123;344;323", "wc_reply_reviewers": "26;0;0;0;0", "wc_reply_authors": "47;0;0;0;0", "reply_reviewers": "1;0;0;0;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 2.4, 1.2000000000000002 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 85.8, 39.69584361113894 ], "wc_strengths_avg": [ 56.4, 28.00428538634757 ], "wc_weaknesses_avg": [ 121.4, 60.56599706105729 ], "wc_questions_avg": [ 32.0, 21.863211109075447 ], "wc_limitations_avg": [ 12.0, 5.176871642217914 ], "wc_review_avg": [ 307.6, 97.79488739192863 ], "wc_reply_reviewers_avg": [ 5.2, 10.4 ], "wc_reply_authors_avg": [ 9.4, 18.800000000000004 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8355044182110838, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14481056815521683711&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "iitd.ac.in;iitd.ac.in;iitd.ac.in;iitd.ac.in", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Indian Institute of Technology Delhi", "aff_unique_dep": "", "aff_unique_url": "https://www.iitdelhi.ac.in", "aff_unique_abbr": "IIT Delhi", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Delhi", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "India" }, { "title": "Federated Learning via Meta-Variational Dropout", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71459", "id": "VNyKBipt91", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/24a8d40f6656e542f3fd43bac678e71b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VNyKBipt91", "openreview": "https://openreview.net/forum?id=VNyKBipt91", "poster": "/media/PosterPDFs/NeurIPS%202023/71459.png?t=1701279450.5925176", "slides": "https://nips.cc/virtual/2023/poster/71459", "video": "https://nips.cc/virtual/2023/poster/71459", "author_site": "Insu Jeon, Minui Hong, Junhyeog Yun, Gunhee Kim", "tldr": "", "abstract": "Federated Learning (FL) aims to train a global inference model from remotely distributed clients, gaining popularity due to its benefit of improving data privacy. However, traditional FL often faces challenges in practical applications, including model overfitting and divergent local models due to limited and non-IID data among clients. To address these issues, we introduce a novel Bayesian meta-learning approach called meta-variational dropout (MetaVD). MetaVD learns to predict client-dependent dropout rates via a shared hypernetwork, enabling effective model personalization of FL algorithms in limited non-IID data settings. We also emphasize the posterior adaptation view of meta-learning and the posterior aggregation view of Bayesian FL via the conditional dropout posterior. We conducted extensive experiments on various sparse and non-IID FL datasets. MetaVD demonstrated excellent classification accuracy and uncertainty calibration performance, especially for out-of-distribution (OOD) clients. MetaVD compresses the local model parameters needed for each client, mitigating model overfitting and reducing communication costs. Code is available at https://github.com/insujeon/MetaVD.", "keywords": "Personalized Federated Learning;Variational Dropout;Meta-Learning;Bayesian Neural Network", "primary_area": "", "supplementary_material": "", "author": "Insu Jeon;Minui Hong;Junhyeog Yun;Gunhee Kim", "authorids": "~Insu_Jeon2;~Minui_Hong2;~Junhyeog_Yun1;~Gunhee_Kim1", "gender": ";;;M", "homepage": ";;;http://vision.snu.ac.kr/gunhee/", "dblp": ";;;45/115", "google_scholar": ";;;https://scholar.google.co.kr/citations?user=CiSdOV0AAAAJ", "orcid": ";;;0000-0002-9543-7453", "linkedin": ";;;", "or_profile": "~Insu_Jeon2;~Minui_Hong2;~Junhyeog_Yun1;~Gunhee_Kim1", "aff": ";;;Seoul National University", "aff_domain": ";;;snu.ac.kr", "position": ";;;Full Professor", "bibtex": "@inproceedings{\njeon2023federated,\ntitle={Federated Learning via Meta-Variational Dropout},\nauthor={Insu Jeon and Minui Hong and Junhyeog Yun and Gunhee Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VNyKBipt91}\n}", "github": "", "project": "", "reviewers": "mtpr;6ZgF;yoXe;1ob4", "pdf_size": 5262957, "rating": "5;5;6;6", "confidence": "4;4;4;5", "soundness": "3;3;4;3", "novelty": "2;2;2;3", "presentation": "3;4;4;3", "wc_summary": "49;62;90;64", "wc_strengths": "47;36;61;55", "wc_weaknesses": "84;296;219;66", "wc_questions": "349;3;60;149", "wc_limitations": "21;9;27;1", "wc_review": "550;406;457;335", "wc_reply_reviewers": "0;0;0;23", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 66.25, 14.872373717735847 ], "wc_strengths_avg": [ 49.75, 9.364160400164021 ], "wc_weaknesses_avg": [ 166.25, 95.43682465379912 ], "wc_questions_avg": [ 140.25, 131.2733312596279 ], "wc_limitations_avg": [ 14.5, 10.136567466356647 ], "wc_review_avg": [ 437.0, 78.31666489323968 ], "wc_reply_reviewers_avg": [ 5.75, 9.959292143521045 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5786274292952539155&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";;;snu.ac.kr", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "title": "Hard Prompts Made Easy: Gradient-Based Discrete Optimization for Prompt Tuning and Discovery", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71458", "id": "VOstHxDdsN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a00548031e4647b13042c97c922fadf1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VOstHxDdsN", "openreview": "https://openreview.net/forum?id=VOstHxDdsN", "poster": "/media/PosterPDFs/NeurIPS%202023/71458.png?t=1699387857.3131373", "slides": "https://nips.cc/virtual/2023/poster/71458", "video": "https://nips.cc/virtual/2023/poster/71458", "author_site": "Yuxin Wen, Neel Jain, John Kirchenbauer, Micah Goldblum, Jonas Geiping, Tom Goldstein", "tldr": "", "abstract": "The strength of modern generative models lies in their ability to be controlled through prompts. Hard prompts comprise interpretable words and tokens, and are typically hand-crafted by humans. Soft prompts, on the other hand, consist of continuous feature vectors. These can be discovered using powerful optimization methods, but they cannot be easily edited, re-used across models, or plugged into a text-based interface. We describe an easy-to-use approach to automatically optimize hard text prompts through efficient gradient-based optimization. Our approach can be readily applied to text-to-image and text-only applications alike. This method allows API users to easily generate, discover, and mix and match image concepts without prior knowledge of how to prompt the model. Furthermore, using our method, we can bypass token-level content filters imposed by Midjourney by optimizing through the open-sourced text encoder.", "keywords": "Diffusion Model;Generative AI;Prompt Discovery", "primary_area": "", "supplementary_material": "/attachment/f0d68dd56c851e1d1b44c85b0fe2635da094bfe2.zip", "author": "Yuxin Wen;Neel Jain;John Kirchenbauer;Micah Goldblum;Jonas Geiping;Tom Goldstein", "authorids": "~Yuxin_Wen2;~Neel_Jain1;~John_Kirchenbauer1;~Micah_Goldblum1;~Jonas_Geiping1;~Tom_Goldstein1", "gender": ";;M;;M;M", "homepage": "https://yuxinwenrick.github.io/;;https://jwkirchenbauer.notion.site/;;https://jonasgeiping.github.io/;https://www.cs.umd.edu/~tomg/", "dblp": ";;321/0678;241/7231;190/7229;25/8184", "google_scholar": "oUYfjg0AAAAJ;https://scholar.google.com/citations?hl=en;48GJrbsAAAAJ;pGDKzuUAAAAJ;https://scholar.google.de/citations?user=206vNCEAAAAJ;KmSuVtgAAAAJ", "orcid": ";;;;;", "linkedin": ";neel-jain-0a6a239/;johnkirchenbauer/;;;", "or_profile": "~Yuxin_Wen2;~Neel_Jain1;~John_Kirchenbauer1;~Micah_Goldblum1;~Jonas_Geiping1;~Tom_Goldstein1", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;New York University;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;umd.edu;nyu.edu;umd.edu;umd.edu", "position": "PhD student;PhD student;PhD student;Postdoc;Postdoc;Full Professor", "bibtex": "@inproceedings{\nwen2023hard,\ntitle={Hard Prompts Made Easy: Gradient-Based Discrete Optimization for Prompt Tuning and Discovery},\nauthor={Yuxin Wen and Neel Jain and John Kirchenbauer and Micah Goldblum and Jonas Geiping and Tom Goldstein},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VOstHxDdsN}\n}", "github": "", "project": "", "reviewers": "LSBN;Lu7o;DXf6;7Vg6", "pdf_size": 29465377, "rating": "5;5;6;7", "confidence": "3;4;3;3", "soundness": "3;3;2;4", "novelty": "3;2;2;4", "presentation": "2;3;2;4", "wc_summary": "25;80;52;75", "wc_strengths": "46;63;50;116", "wc_weaknesses": "28;429;242;83", "wc_questions": "50;2;39;41", "wc_limitations": "1;2;27;24", "wc_review": "150;576;410;339", "wc_reply_reviewers": "0;631;340;17", "wc_reply_authors": "0;781;462;0", "reply_reviewers": "0;2;2;1", "reply_authors": "1;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 58.0, 21.783020910791965 ], "wc_strengths_avg": [ 68.75, 27.99441908666797 ], "wc_weaknesses_avg": [ 195.5, 156.04246216975685 ], "wc_questions_avg": [ 33.0, 18.371173070873837 ], "wc_limitations_avg": [ 13.5, 12.05197079319395 ], "wc_review_avg": [ 368.75, 152.79950098086053 ], "wc_reply_reviewers_avg": [ 247.0, 259.8143568011591 ], "wc_reply_authors_avg": [ 310.75, 330.5838584988686 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 281, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16955361870497913549&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "umd.edu;umd.edu;umd.edu;nyu.edu;umd.edu;umd.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "University of Maryland;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www/umd.edu;https://www.nyu.edu", "aff_unique_abbr": "UMD;NYU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "LogSpecT: Feasible Graph Learning Model from Stationary Signals with Recovery Guarantees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71457", "id": "VPTZVVP4tm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fd8872fcba4ba87312cdfe5ebba91ca9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VPTZVVP4tm", "openreview": "https://openreview.net/forum?id=VPTZVVP4tm", "poster": "/media/PosterPDFs/NeurIPS%202023/71457.png?t=1701423832.56157", "slides": "https://nips.cc/virtual/2023/poster/71457", "video": "https://nips.cc/virtual/2023/poster/71457", "author_site": "Shangyuan LIU, Linglingzhi Zhu, Anthony Man-Cho So", "tldr": "", "abstract": "Graph learning from signals is a core task in graph signal processing (GSP). A significant subclass of graph signals called the stationary graph signals that broadens the concept of stationarity of data defined on regular domains to signals on graphs is gaining increasing popularity in the GSP community. The most commonly used model to learn graphs from these stationary signals is SpecT, which forms the foundation for nearly all the subsequent, more advanced models. Despite its strengths, the practical formulation of the model, known as rSpecT, has been identified to be susceptible to the choice of hyperparameters. More critically, it may suffer from infeasibility as an optimization problem. In this paper, we introduce the first condition that ensures the infeasibility of rSpecT and design a novel model called LogSpecT, along with its practical formulation rLogSpecT to overcome this issue. Contrary to rSpecT, our novel practical model rLogSpecT is always feasible. Furthermore, we provide recovery guarantees of rLogSpecT from modern optimization tools related to epi-convergence, which could be of independent interest and significant for various learning problems. To demonstrate the practical advantages of rLogSpecT, a highly efficient algorithm based on the linearized alternating direction method of multipliers (L-ADMM) that allows closed-form solutions for each subproblem is proposed with convergence guarantees. Extensive numerical results on both synthetic and real networks not only corroborate the stability of our proposed methods, but also highlight their comparable and even superior performance than existing models.", "keywords": "Graph Signal Processing;Spectral Template;Network Inference;Optimization;Linearized ADMM", "primary_area": "", "supplementary_material": "/attachment/48e7d680d0d1c05d95635b40acd583455f7b08ba.pdf", "author": "Shangyuan Liu;Linglingzhi Zhu;Anthony Man-Cho So", "authorids": "~Shangyuan_Liu2;~Linglingzhi_Zhu1;~Anthony_Man-Cho_So1", "gender": "M;M;M", "homepage": "https://hk.linkedin.com/in/shangyuan-liu-36b03a257?trk=public_profile_samename-profile;https://lzzhuling.github.io/;http://www1.se.cuhk.edu.hk/~manchoso/", "dblp": "346/0361;329/6941;82/3202", "google_scholar": ";https://scholar.google.com.hk/citations?user=nOSAyisAAAAJ;https://scholar.google.com.hk/citations?user=whi3UisAAAAJ", "orcid": ";0000-0002-5484-1134;0000-0003-2588-7851", "linkedin": ";;", "or_profile": "~Shangyuan_Liu2;~Linglingzhi_Zhu1;~Anthony_Man-Cho_So1", "aff": "Chinese University of Hong Kong, The Chinese University of Hong Kong;The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": "se.cuhk.edu.hk;cuhk.edu.hk;cuhk.edu.hk", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nliu2023logspect,\ntitle={LogSpecT: Feasible Graph Learning Model from Stationary Signals with Recovery Guarantees},\nauthor={Shangyuan Liu and Linglingzhi Zhu and Anthony Man-Cho So},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VPTZVVP4tm}\n}", "github": "", "project": "", "reviewers": "SR4h;U8JP;xuW7;xFRE;f5Sh", "pdf_size": 597310, "rating": "3;6;6;7;8", "confidence": "1;4;1;3;4", "soundness": "2;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "2;3;4;4;4", "wc_summary": "232;36;131;74;78", "wc_strengths": "2;132;58;43;64", "wc_weaknesses": "2;26;106;118;73", "wc_questions": "2;30;2;159;5", "wc_limitations": "2;5;8;1;1", "wc_review": "240;229;305;395;221", "wc_reply_reviewers": "6;7;0;8;34", "wc_reply_authors": "12;5;0;5;5", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 6.0, 1.6733200530681511 ], "confidence_avg": [ 2.6, 1.3564659966250538 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 110.2, 68.00117646041133 ], "wc_strengths_avg": [ 59.8, 42.08752784376863 ], "wc_weaknesses_avg": [ 65.0, 44.819638552759436 ], "wc_questions_avg": [ 39.6, 60.61880896223548 ], "wc_limitations_avg": [ 3.4, 2.727636339397171 ], "wc_review_avg": [ 278.0, 65.59268251870783 ], "wc_reply_reviewers_avg": [ 11.0, 11.832159566199232 ], "wc_reply_authors_avg": [ 5.4, 3.826225293941798 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7049073768502413, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15347254149646414965&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "se.cuhk.edu.hk;cuhk.edu.hk;cuhk.edu.hk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Continuous-Time Functional Diffusion Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71456", "id": "VPrir0p5b6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/75cd262a3fd8e76e37bb7941db141a1d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VPrir0p5b6", "openreview": "https://openreview.net/forum?id=VPrir0p5b6", "poster": "/media/PosterPDFs/NeurIPS%202023/71456.png?t=1699628939.2644508", "slides": "https://nips.cc/virtual/2023/poster/71456", "video": "https://nips.cc/virtual/2023/poster/71456", "author_site": "Giulio Franzese, Giulio Corallo, Simone Rossi, Markus Heinonen, Maurizio Filippone, Pietro Michiardi", "tldr": "", "abstract": "We introduce Functional Diffusion Processes (FDPs), which generalize score-based diffusion models to infinite-dimensional function spaces. FDPs require a new mathematical framework to describe the forward and backward dynamics, and several extensions to derive practical training objectives. These include infinite-dimensional versions of Girsanov theorem, in order to be able to compute an ELBO, and of the sampling theorem, in order to guarantee that functional evaluations in a countable set of points are equivalent to infinite-dimensional functions. We use FDPs to build a new breed of generative models in function spaces, which do not require specialized network architectures, and that can work with any kind of continuous data.\nOur results on real data show that FDPs achieve high-quality image generation, using a simple MLP architecture with orders of magnitude fewer parameters than existing diffusion models.", "keywords": "Hilbert spaces;Diffusion models;Stochastic Partial Differential Equations", "primary_area": "", "supplementary_material": "/attachment/ed6e8598f25a68ba195d8ccf6db7bb1fe7906a71.pdf", "author": "Giulio Franzese;Giulio Corallo;Simone Rossi;Markus Heinonen;Maurizio Filippone;Pietro Michiardi", "authorids": "~Giulio_Franzese1;~Giulio_Corallo1;~Simone_Rossi1;~Markus_Heinonen1;~Maurizio_Filippone1;~Pietro_Michiardi1", "gender": "M;M;;M;M;M", "homepage": ";https://www.eurecom.fr/en/people/corallo-giulio;;https://users.aalto.fi/~heinom10/;;http://www.eurecom.fr/~michiard/", "dblp": "217/1859.html;369/7200;86/5740-1.html;22/7709;35/5597;54/3028", "google_scholar": "kEtx_WwAAAAJ;https://scholar.google.com/citations?hl=it;;hFtfHZoAAAAJ;https://scholar.google.com.tw/citations?user=ILUeAloAAAAJ;https://scholar.google.com.tw/citations?user=mlx1eCgAAAAJ", "orcid": "0000-0003-4244-2053;;0000-0003-2908-3703;;;", "linkedin": ";;;;;", "or_profile": "~Giulio_Franzese1;~Giulio_Corallo1;~Simone_Rossi1;~Markus_Heinonen1;~Maurizio_Filippone1;~Pietro_Michiardi1", "aff": "Eurecom;Eurecom;Stellantis;Aalto University;Eurecom;EURECOM", "aff_domain": "eurecom.fr;eurecom.fr;stellantis.com;aalto.fi;eurecom.fr;eurecom.fr", "position": "Postdoc;PhD student;Researcher;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nfranzese2023continuoustime,\ntitle={Continuous-Time Functional Diffusion Processes},\nauthor={Giulio Franzese and Giulio Corallo and Simone Rossi and Markus Heinonen and Maurizio Filippone and Pietro Michiardi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VPrir0p5b6}\n}", "github": "", "project": "", "reviewers": "vNhS;jQYa;GA6N;XJB7;p4Hy", "pdf_size": 16784825, "rating": "6;6;6;7;7", "confidence": "4;1;3;4;4", "soundness": "3;3;3;4;4", "novelty": "2;3;3;4;4", "presentation": "3;3;3;3;3", "wc_summary": "136;54;32;67;336", "wc_strengths": "46;60;137;84;163", "wc_weaknesses": "124;145;218;173;81", "wc_questions": "213;26;48;73;25", "wc_limitations": "29;1;29;11;10", "wc_review": "548;286;464;408;615", "wc_reply_reviewers": "29;0;152;39;0", "wc_reply_authors": "84;0;108;0;0", "reply_reviewers": "1;0;1;1;0", "reply_authors": "2;1;2;1;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 125.0, 111.0819517293426 ], "wc_strengths_avg": [ 98.0, 44.9221548904324 ], "wc_weaknesses_avg": [ 148.2, 46.04085142566327 ], "wc_questions_avg": [ 77.0, 70.22535154771388 ], "wc_limitations_avg": [ 16.0, 11.171392035015153 ], "wc_review_avg": [ 464.2, 113.7337241103095 ], "wc_reply_reviewers_avg": [ 44.0, 56.18896688852715 ], "wc_reply_authors_avg": [ 38.4, 47.6386397790701 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5601120336112039, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16013266439732558937&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "eurecom.fr;eurecom.fr;stellantis.com;aalto.fi;eurecom.fr;eurecom.fr", "author_num": 6, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "EURECOM;Stellantis;Aalto University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.eurecom.fr;https://www.stellantis.com;https://www.aalto.fi", "aff_unique_abbr": ";Stellantis;Aalto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0;0", "aff_country_unique": "France;Netherlands;Finland" }, { "title": "Batchnorm Allows Unsupervised Radial Attacks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71455", "id": "VQ1heZKSLQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0266d95023740481d22d437aa8aba0e9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VQ1heZKSLQ", "openreview": "https://openreview.net/forum?id=VQ1heZKSLQ", "poster": "/media/PosterPDFs/NeurIPS%202023/71455.png?t=1699950348.7223108", "slides": "https://nips.cc/virtual/2023/poster/71455", "video": "https://nips.cc/virtual/2023/poster/71455", "author_site": "Amur Ghose, Apurv Gupta, Yaoliang Yu, Pascal Poupart", "tldr": "", "abstract": "The construction of adversarial examples usually requires the existence of soft or hard labels for each instance, with respect to which a loss gradient provides the signal for construction of the example. We show that for batch normalized deep image recognition architectures, intermediate latents that are produced after a batch normalization step by themselves suffice to produce adversarial examples using an intermediate loss solely utilizing angular deviations, without relying on any label. We motivate our loss through the geometry of batch normed representations and their concentration of norm on a hypersphere and distributional proximity to Gaussians. Our losses expand intermediate latent based attacks that usually require labels. The success of our method implies that leakage of intermediate representations may create a security breach for deployed models, which persists even when the model is transferred to downstream usage. Removal of batch norm weakens our attack, indicating it contributes to this vulnerability. Our attacks also succeed against LayerNorm empirically, thus being relevant for transformer architectures, most notably vision transformers which we analyze.", "keywords": "Adversarial;Batch normalization;Robustness;Geometric;radial", "primary_area": "", "supplementary_material": "/attachment/3e1500efa2285fe892c50a5c6574ceb0871763bd.zip", "author": "Amur Ghose;Apurv Gupta;Yaoliang Yu;Pascal Poupart", "authorids": "~Amur_Ghose1;~Apurv_Gupta1;~Yaoliang_Yu1;~Pascal_Poupart2", "gender": "M;M;M;M", "homepage": ";;https://cs.uwaterloo.ca/~y328yu/;https://cs.uwaterloo.ca/~ppoupart", "dblp": "227/6744;;90/4989;26/2122", "google_scholar": "bS4Q1mYAAAAJ;;https://scholar.google.ca/citations?user=zbXIQMsAAAAJ;https://scholar.google.ca/citations?user=KhAJWroAAAAJ", "orcid": ";;0000-0002-3823-0720;", "linkedin": ";apurvji/;;", "or_profile": "~Amur_Ghose1;~Apurv_Gupta1;~Yaoliang_Yu1;~Pascal_Poupart2", "aff": "Huawei Technologies Ltd.;IBM Consulting;University of Waterloo;University of Waterloo", "aff_domain": "huawei.com;in.ibm.com;uwaterloo.ca;uwaterloo.ca", "position": "Researcher;Data Scientist;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nghose2023batchnorm,\ntitle={Batchnorm Allows Unsupervised Radial Attacks},\nauthor={Amur Ghose and Apurv Gupta and Yaoliang Yu and Pascal Poupart},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VQ1heZKSLQ}\n}", "github": "", "project": "", "reviewers": "ykjk;fVuA;7ce2;V7by", "pdf_size": 2825692, "rating": "4;5;6;6", "confidence": "3;3;3;2", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "2;1;2;3", "wc_summary": "88;62;69;31", "wc_strengths": "117;40;61;61", "wc_weaknesses": "359;144;72;85", "wc_questions": "82;35;92;46", "wc_limitations": "88;2;1;4", "wc_review": "734;283;295;227", "wc_reply_reviewers": "100;151;46;33", "wc_reply_authors": "337;104;129;15", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;3;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 62.5, 20.524375751773793 ], "wc_strengths_avg": [ 69.75, 28.595235617144336 ], "wc_weaknesses_avg": [ 165.0, 115.24539036334599 ], "wc_questions_avg": [ 63.75, 23.836683913665507 ], "wc_limitations_avg": [ 23.75, 37.11047695732298 ], "wc_review_avg": [ 384.75, 203.2662970096125 ], "wc_reply_reviewers_avg": [ 82.5, 46.853495067070504 ], "wc_reply_authors_avg": [ 146.25, 117.99867584002797 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Pc8qEp5UnIYJ:scholar.google.com/&scioq=Batchnorm+Allows+Unsupervised+Radial+Attacks&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "huawei.com;in.ibm.com;uwaterloo.ca;uwaterloo.ca", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Huawei;IBM;University of Waterloo", "aff_unique_dep": "Huawei Technologies;IBM Consulting;", "aff_unique_url": "https://www.huawei.com;https://www.ibm.com/consulting;https://uwaterloo.ca", "aff_unique_abbr": "Huawei;IBM;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "China;United States;Canada" }, { "title": "OpenAssistant Conversations - Democratizing Large Language Model Alignment", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73573", "id": "VSJotgbPHF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/949f0f8f32267d297c2d4e3ee10a2e7e-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=VSJotgbPHF", "openreview": "https://openreview.net/forum?id=VSJotgbPHF", "poster": "/media/PosterPDFs/NeurIPS%202023/73573.png?t=1701939580.1300123", "slides": "https://nips.cc/virtual/2023/poster/73573", "video": "https://nips.cc/virtual/2023/poster/73573", "author_site": "Andreas K\u00f6pf, Yannic Kilcher, Dimitri von R\u00fctte, Sotiris Anagnostidis, Zhi Rui Tam, Keith Stevens, Abdullah Barhoum, Duc Nguyen, Oliver Stanley, Rich\u00e1rd Nagyfi, Shahul ES, Sameer Suri, David Glushkov, Arnav Dantuluri, Andrew Maguire, Christoph Schuhmann, Huu Nguyen, Alexander Mattick", "tldr": "", "abstract": "Aligning large language models (LLMs) with human preferences has proven to drastically improve usability and has driven rapid adoption as demonstrated by ChatGPT.\nAlignment techniques such as supervised fine-tuning (\\textit{SFT}) and reinforcement learning from human feedback (\\textit{RLHF}) greatly reduce the required skill and domain knowledge to effectively harness the capabilities of LLMs, increasing their accessibility and utility across various domains.\nHowever, state-of-the-art alignment techniques like \\textit{RLHF} rely on high-quality human feedback data, which is expensive to create and often remains proprietary.\nIn an effort to democratize research on large-scale alignment, we release OpenAssistant Conversations, a human-generated, human-annotated assistant-style conversation corpus consisting of 161,443 messages in 35 different languages, annotated with 461,292 quality ratings, resulting in over 10,000 complete and fully annotated conversation trees.\nThe corpus is a product of a worldwide crowd-sourcing effort involving over 13,500 volunteers.\nModels trained on OpenAssistant Conversations show consistent improvements on standard benchmarks over respective base models.\nWe release our code\\footnote{\\git} and data\\footnote{\\data} under a fully permissive licence.", "keywords": "dataset;human labels;instruction tuning;conversation;rlhf;open-source", "primary_area": "", "supplementary_material": "/attachment/79fece771c1251ac2fee56c1101cd664e4e55f44.pdf", "author": "Andreas K\u00f6pf;Yannic Kilcher;Dimitri von R\u00fctte;Sotiris Anagnostidis;Zhi Rui Tam;Keith Stevens;Abdullah Barhoum;Duc Minh Nguyen;Oliver Stanley;Rich\u00e1rd Nagyfi;Shahul ES;Sameer Suri;David Alexandrovich Glushkov;Arnav Varma Dantuluri;Andrew Maguire;Christoph Schuhmann;Huu Nguyen;Alexander Julian Mattick", "authorids": "~Andreas_K\u00f6pf1;~Yannic_Kilcher1;~Dimitri_von_R\u00fctte1;~Sotiris_Anagnostidis1;~Zhi_Rui_Tam1;~Keith_Stevens1;~Abdullah_Barhoum1;~Duc_Minh_Nguyen2;~Oliver_Stanley1;~Rich\u00e1rd_Nagyfi1;~Shahul_ES1;~Sameer_Suri1;~David_Alexandrovich_Glushkov1;~Arnav_Varma_Dantuluri1;~Andrew_Maguire1;~Christoph_Schuhmann1;~Huu_Nguyen2;~Alexander_Julian_Mattick1", "gender": "M;M;;M;M;M;;M;M;M;M;;M;M;M;M;;M", "homepage": "https://open-assistant.io/;;;;http://theblackcat102.github.io/;https://fozziethebeat.com/;;https://github.com/notmd;https://olliestanley.github.io;;https://shahules786.github.io/;;https://github.com/0x22almostEvil/;;https://andrewm4894.com/;http://christoph-schuhmann.de/;;", "dblp": ";https://dblp.org/pers/k/Kilcher:Yannic.html;;286/1763;279/1685.html;33/9013;;;;;;;;;;;;293/7466", "google_scholar": ";;wVQcUf8AAAAJ;qjzTKWUAAAAJ;https://scholar.google.com.tw/citations?user=WVv1_h0AAAAJ;;;;0Pa1ftIAAAAJ;;;;;;;;HLWECfMAAAAJ;", "orcid": ";;;;0000-0001-9968-2416;;;;;;;;;;;;;0000-0001-7805-199X", "linkedin": ";;dimitri-von-r%C3%BCtte-890633215/;sotiris-anagnostidis-b064a5129/;;fozziethebeat/;abdullah-b-a4168a16a/;;https://linkedin.com/in/ostanley;richardnagyfi/;shahules/;sameer-suri-b96848250;;arnav-dantuluri-0572b7235/;andrewm4894;https://de.linkedin.com/in/christoph-schuhmann-59a740235;huu-ai-machine-learning/;", "or_profile": "~Andreas_K\u00f6pf1;~Yannic_Kilcher1;~Dimitri_von_R\u00fctte1;~Sotiris_Anagnostidis1;~Zhi_Rui_Tam1;~Keith_Stevens1;~Abdullah_Barhoum1;~Duc_Minh_Nguyen2;~Oliver_Stanley1;~Rich\u00e1rd_Nagyfi1;~Shahul_ES1;~Sameer_Suri1;~David_Alexandrovich_Glushkov1;~Arnav_Varma_Dantuluri1;~Andrew_Maguire1;~Christoph_Schuhmann1;~Huu_Nguyen2;~Alexander_Julian_Mattick1", "aff": "PROVISIO GmbH;DeepJudge;ETH Zurich;ETH Zurich;;Surface Data;;University of Technical Education Ho Chi Minh City;Kainos;E\u00f6tv\u00f6s Lorand University;Exploding Gradients;Thousand Oaks High School;Moscow State Institute of Steel and Alloys;South Forsyth High School;;;Ontocord.AI;Friedrich-Alexander-Universit\u00e4t, Pattern Recognition Lab", "aff_domain": "provisio.com;deepjudge.ai;ethz.ch;inf.ethz.ch;;surfacedata.org;;hcmute.edu.vn;kainos.com;elte.hu;explodinggradients.com;conejousd.org;misis.ru;forsythk12.org;;;ontocord.ai;cs5.fau.de", "position": "Researcher;CTO;MS student;PhD student;;Engineer;;Undergrad student;ML engineer;PhD student;Researcher;Highschooler;Undergrad student;High School;;;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nk{\\\"o}pf2023openassistant,\ntitle={OpenAssistant Conversations - Democratizing Large Language Model Alignment},\nauthor={Andreas K{\\\"o}pf and Yannic Kilcher and Dimitri von R{\\\"u}tte and Sotiris Anagnostidis and Zhi Rui Tam and Keith Stevens and Abdullah Barhoum and Duc Minh Nguyen and Oliver Stanley and Rich{\\'a}rd Nagyfi and Shahul ES and Sameer Suri and David Alexandrovich Glushkov and Arnav Varma Dantuluri and Andrew Maguire and Christoph Schuhmann and Huu Nguyen and Alexander Julian Mattick},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=VSJotgbPHF}\n}", "github": "", "project": "", "reviewers": "dMbi;mgTk;EoWL;GdLf;Tg3T", "pdf_size": 713781, "rating": "7;9;9;9;9", "confidence": "3;4;4;4;5", "wc_summary_and_contributions": "105;45;91;55;117", "wc_strengths": "37;90;63;106;198", "wc_improvement": "293;212;76;53;587", "wc_limitations": "16;87;6;1;6", "wc_correctness": "30;195;1;1;7", "wc_clarity": "17;214;1;1;4", "wc_relation_to_prior_work": "13;43;1;1;10", "wc_documentation": "1;16;1;1;11", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "513;903;241;220;941", "wc_reply_reviewers": "42;10;21;13;37", "wc_reply_authors": "870;864;298;456;1374", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;1;1;2", "rating_avg": [ 8.6, 0.7999999999999999 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 82.6, 28.039971469315013 ], "wc_strengths_avg": [ 98.8, 54.90136610322188 ], "wc_improvement_avg": [ 244.2, 192.75829424437225 ], "wc_limitations_avg": [ 23.2, 32.270110009109054 ], "wc_correctness_avg": [ 46.8, 74.86628079449386 ], "wc_clarity_avg": [ 47.4, 83.50952041533947 ], "wc_relation_to_prior_work_avg": [ 13.6, 15.46091847207015 ], "wc_documentation_avg": [ 6.0, 6.324555320336759 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 563.6, 310.5849964180498 ], "wc_reply_reviewers_avg": [ 24.6, 12.784365451597509 ], "wc_reply_authors_avg": [ 772.4, 375.50051930723083 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 18, 0 ], "corr_rating_confidence": 0.790569415042095, "gs_citation": 587, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12400925472744173660&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "provisio.com;deepjudge.ai;ethz.ch;inf.ethz.ch;;surfacedata.org;;hcmute.edu.vn;kainos.com;elte.hu;explodinggradients.com;conejousd.org;misis.ru;forsythk12.org;;;ontocord.ai;cs5.fau.de", "author_num": 18, "aff_unique_index": "0;1;2;2;3;4;5;6;8;9;10;11;12", "aff_unique_norm": "PROVISIO GmbH;DeepJudge;ETH Zurich;Surface Data;University of Technical Education Ho Chi Minh City;Kainos;E\u00f6tv\u00f6s Lor\u00e1nd University;;Thousand Oaks High School;Moscow State Institute of Steel and Alloys;South Forsyth High School;Ontocord.AI;Friedrich-Alexander-Universit\u00e4t", "aff_unique_dep": ";;;;;;;;;;;;Pattern Recognition Lab", "aff_unique_url": ";;https://www.ethz.ch;;;https://www.kainos.com;https://www.elte.hu;;;http://www.msi.ru;;https://ontocord.ai;https://www.uni-erlangen.de/", "aff_unique_abbr": ";;ETHZ;;;;ELTE;;;MISiS;;Ontocord.AI;FAU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ho Chi Minh City", "aff_country_unique_index": "0;2;2;3;4;5;6;7;6;6;0", "aff_country_unique": "Germany;;Switzerland;Vietnam;United Kingdom;Hungary;United States;Russian Federation" }, { "title": "Understanding the Latent Space of Diffusion Models through the Lens of Riemannian Geometry", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71454", "id": "VUlYp3jiEI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4bfcebedf7a2967c410b64670f27f904-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VUlYp3jiEI", "openreview": "https://openreview.net/forum?id=VUlYp3jiEI", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71454", "video": "https://nips.cc/virtual/2023/poster/71454", "author_site": "Yong-Hyun Park, Mingi Kwon, Jaewoong Choi, Junghyo Jo, Youngjung Uh", "tldr": "", "abstract": "Despite the success of diffusion models (DMs), we still lack a thorough understanding of their latent space. To understand the latent space $\\mathbf{x}_t \\in \\mathcal{X}$, we analyze them from a geometrical perspective. Our approach involves deriving the local latent basis within $\\mathcal{X}$ by leveraging the pullback metric associated with their encoding feature maps. Remarkably, our discovered local latent basis enables image editing capabilities by moving $\\mathbf{x}_t$, the latent space of DMs, along the basis vector at specific timesteps. We further analyze how the geometric structure of DMs evolves over diffusion timesteps and differs across different text conditions. This confirms the known phenomenon of coarse-to-fine generation, as well as reveals novel insights such as the discrepancy between $\\mathbf{x}_t$ across timesteps, the effect of dataset complexity, and the time-varying influence of text prompts. To the best of our knowledge, this paper is the first to present image editing through $\\mathbf{x}$-space traversal, editing only once at specific timestep $t$ without any additional training, and providing thorough analyses of the latent structure of DMs.\nThe code to reproduce our experiments can be found at the [link](https://github.com/enkeejunior1/Diffusion-Pullback).", "keywords": "diffusion models;semantic image editing;differential geometry", "primary_area": "", "supplementary_material": "/attachment/46f236dceee12e029a214d7778011557eb442c08.pdf", "author": "Yong-Hyun Park;Mingi Kwon;Jaewoong Choi;Junghyo Jo;Youngjung Uh", "authorids": "~Yong-Hyun_Park1;~Mingi_Kwon1;~Jaewoong_Choi1;~Junghyo_Jo1;~Youngjung_Uh2", "gender": "M;M;M;;", "homepage": ";https://github.com/kwonminki;;;https://vilab.yonsei.ac.kr/member/professor", "dblp": "62/444;327/3276;63/11483;48/11016;57/10511", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.co.kr/citations?user=W8vK8BwAAAAJ;e4ZLjREAAAAJ;h1QXLx0AAAAJ;BWBGrEEAAAAJ", "orcid": ";;;0000-0002-1284-9488;", "linkedin": ";kwonmingi/;;;youngjung-uh-78b459b5/", "or_profile": "~Yong-Hyun_Park1;~Mingi_Kwon1;~Jaewoong_Choi1;~Junghyo_Jo1;~Youngjung_Uh2", "aff": "Seoul National University;Yonsei University;Korea Institute for Advanced Study;Seoul National University;Yonsei University", "aff_domain": "snu.ac.kr;yonsei.ac.kr;kias.re.kr;snu.ac.kr;yonsei.ac.kr", "position": "MS student;PhD student;Postdoc;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\npark2023understanding,\ntitle={Understanding the Latent Space of Diffusion Models through the Lens of Riemannian Geometry},\nauthor={Yong-Hyun Park and Mingi Kwon and Jaewoong Choi and Junghyo Jo and Youngjung Uh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VUlYp3jiEI}\n}", "github": "", "project": "", "reviewers": "6jtZ;vuQy;iEgM;cKEc", "pdf_size": 2720408, "rating": "4;5;6;7", "confidence": "4;5;4;3", "soundness": "2;2;3;3", "novelty": "3;2;3;3", "presentation": "1;2;2;2", "wc_summary": "143;71;91;92", "wc_strengths": "80;39;86;131", "wc_weaknesses": "301;92;95;143", "wc_questions": "36;106;108;65", "wc_limitations": "24;12;9;26", "wc_review": "584;320;389;457", "wc_reply_reviewers": "34;53;45;28", "wc_reply_authors": "165;164;62;18", "reply_reviewers": "1;1;1;1", "reply_authors": "2;4;3;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.25, 26.61179249881526 ], "wc_strengths_avg": [ 84.0, 32.61134771824066 ], "wc_weaknesses_avg": [ 157.75, 85.14509674667121 ], "wc_questions_avg": [ 78.75, 30.06139550985616 ], "wc_limitations_avg": [ 17.75, 7.361215932167728 ], "wc_review_avg": [ 437.5, 97.46922591259253 ], "wc_reply_reviewers_avg": [ 40.0, 9.669539802906858 ], "wc_reply_authors_avg": [ 102.25, 64.16531383855299 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6324555320336758, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15863114785102293281&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "snu.ac.kr;yonsei.ac.kr;kias.re.kr;snu.ac.kr;yonsei.ac.kr", "author_num": 5, "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "Seoul National University;Yonsei University;Korea Institute for Advanced Study", "aff_unique_dep": ";;", "aff_unique_url": "https://www.snu.ac.kr;https://www.yonsei.ac.kr;http://www.kaist.edu", "aff_unique_abbr": "SNU;Yonsei;KIAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Score-based Data Assimilation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71453", "id": "VUvLSnMZdX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7f7fa581cc8a1970a4332920cdf87395-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VUvLSnMZdX", "openreview": "https://openreview.net/forum?id=VUvLSnMZdX", "poster": "/media/PosterPDFs/NeurIPS%202023/71453.png?t=1701819011.6738703", "slides": "https://nips.cc/virtual/2023/poster/71453", "video": "https://nips.cc/virtual/2023/poster/71453", "author_site": "Fran\u00e7ois Rozet, Gilles Louppe", "tldr": "", "abstract": "Data assimilation, in its most comprehensive form, addresses the Bayesian inverse problem of identifying plausible state trajectories that explain noisy or incomplete observations of stochastic dynamical systems. Various approaches have been proposed to solve this problem, including particle-based and variational methods. However, most algorithms depend on the transition dynamics for inference, which becomes intractable for long time horizons or for high-dimensional systems with complex dynamics, such as oceans or atmospheres. In this work, we introduce score-based data assimilation for trajectory inference. We learn a score-based generative model of state trajectories based on the key insight that the score of an arbitrarily long trajectory can be decomposed into a series of scores over short segments. After training, inference is carried out using the score model, in a non-autoregressive manner by generating all states simultaneously. Quite distinctively, we decouple the observation model from the training procedure and use it only at inference to guide the generative process, which enables a wide range of zero-shot observation scenarios. We present theoretical and empirical evidence supporting the effectiveness of our method.", "keywords": "data assimilation;score-based;generative modeling;posterior inference;dynamical systems", "primary_area": "", "supplementary_material": "", "author": "Fran\u00e7ois Rozet;Gilles Louppe", "authorids": "~Fran\u00e7ois_Rozet1;~Gilles_Louppe1", "gender": "M;M", "homepage": "https://francois-rozet.github.io/;http://glouppe.github.io", "dblp": ";05/9382", "google_scholar": "C-WS1pwAAAAJ;F_77d4QAAAAJ", "orcid": "0000-0002-8846-8761;0000-0002-2082-3106", "linkedin": ";", "or_profile": "~Fran\u00e7ois_Rozet1;~Gilles_Louppe1", "aff": "Universit\u00e9 de Li\u00e8ge;University of Li\u00e8ge", "aff_domain": "ulg.ac.be;uliege.be", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nrozet2023scorebased,\ntitle={Score-based Data Assimilation},\nauthor={Fran{\\c{c}}ois Rozet and Gilles Louppe},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VUvLSnMZdX}\n}", "github": "", "project": "", "reviewers": "DaY5;RNbZ;bwmG;RS1N", "pdf_size": 4924035, "rating": "6;7;7;8", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "83;244;229;100", "wc_strengths": "213;169;64;118", "wc_weaknesses": "142;278;135;142", "wc_questions": "88;150;106;52", "wc_limitations": "20;204;59;8", "wc_review": "546;1045;593;420", "wc_reply_reviewers": "17;0;97;36", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 164.0, 72.94175758781796 ], "wc_strengths_avg": [ 141.0, 55.73598478541489 ], "wc_weaknesses_avg": [ 174.25, 59.968220750660926 ], "wc_questions_avg": [ 99.0, 35.2845575287547 ], "wc_limitations_avg": [ 72.75, 78.08769109149021 ], "wc_review_avg": [ 651.0, 236.10696728389868 ], "wc_reply_reviewers_avg": [ 37.5, 36.63673020344474 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=236038948055299021&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "ulg.ac.be;uliege.be", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Universit\u00e9 de Li\u00e8ge;University of Li\u00e8ge", "aff_unique_dep": ";", "aff_unique_url": "https://www.ulg.ac.be;https://www.ulg.ac.be", "aff_unique_abbr": "ULi\u00e8ge;ULi\u00e8ge", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Belgium" }, { "title": "Correlation Aware Sparsified Mean Estimation Using Random Projection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71452", "id": "VacSQpbI0U", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a8e21789027e92739f89df92cc172bcf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VacSQpbI0U", "openreview": "https://openreview.net/forum?id=VacSQpbI0U", "poster": "/media/PosterPDFs/NeurIPS%202023/71452.png?t=1702067305.3142538", "slides": "https://nips.cc/virtual/2023/poster/71452", "video": "https://nips.cc/virtual/2023/poster/71452", "author_site": "Shuli Jiang, PRANAY SHARMA, Gauri Joshi", "tldr": "", "abstract": "We study the problem of communication-efficient distributed vector mean estimation, which is a commonly used subroutine in distributed optimization and Federated Learning (FL). Rand-$k$ sparsification is a commonly used technique to reduce communication cost, where each client sends $k < d$ of its coordinates to the server. However, Rand-$k$ is agnostic to any correlations, that might exist between clients in practical scenarios. The recently proposed Rand-$k$-Spatial estimator leverages the cross-client correlation information at the server to improve Rand-$k$'s performance. Yet, the performance of Rand-$k$-Spatial is suboptimal, and improving mean estimation is key to a faster convergence in distributed optimization. We propose the Rand-Proj-Spatial estimator with a more flexible encoding-decoding procedure, which generalizes the encoding of Rand-$k$ by projecting the client vectors to a random $k$-dimensional subspace. We utilize Subsampled Randomized Hadamard Transform (SRHT) as the projection matrix, and show that Rand-Proj-Spatial with SRHT outperforms Rand-$k$-Spatial, using the correlation information more efficiently. Furthermore, we propose an approach to incorporate varying degrees of correlation, and suggest a practical variant of Rand-Proj-Spatial when the correlation information is not available to the server. Finally, experiments on real-world distributed optimization tasks showcase the superior performance of Rand-Proj-Spatial compared to Rand-$k$-Spatial and other more sophisticated sparsification techniques.", "keywords": "distributed vector mean estimation;communication efficiency;cross-client correlation", "primary_area": "", "supplementary_material": "/attachment/71c85bb167ddc8f7ae79eb423fe546be78a8b6a4.pdf", "author": "Shuli Jiang;Pranay Sharma;Gauri Joshi", "authorids": "~Shuli_Jiang1;~Pranay_Sharma2;~Gauri_Joshi1", "gender": "F;;", "homepage": "https://www.andrew.cmu.edu/user/shulij/;;", "dblp": "224/6441;;", "google_scholar": "D0IvR7MAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Shuli_Jiang1;~Pranay_Sharma2;~Gauri_Joshi1", "aff": "Carnegie Mellon University;;", "aff_domain": "cmu.edu;;", "position": "PhD student;;", "bibtex": "@inproceedings{\njiang2023correlation,\ntitle={Correlation Aware Sparsified Mean Estimation Using Random Projection},\nauthor={Shuli Jiang and Pranay Sharma and Gauri Joshi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VacSQpbI0U}\n}", "github": "", "project": "", "reviewers": "2UxF;R72T;BRXu;EAa8;uyDa", "pdf_size": 1349690, "rating": "5;6;6;6;7", "confidence": "4;4;2;3;4", "soundness": "3;3;3;3;3", "novelty": "3;2;2;3;3", "presentation": "3;2;3;3;2", "wc_summary": "117;44;114;151;462", "wc_strengths": "45;39;16;175;297", "wc_weaknesses": "233;164;24;67;530", "wc_questions": "11;115;16;35;343", "wc_limitations": "29;1;1;1;243", "wc_review": "435;363;171;429;1875", "wc_reply_reviewers": "68;53;0;15;923", "wc_reply_authors": "36;44;0;11;1970", "reply_reviewers": "1;1;0;1;2", "reply_authors": "2;2;1;2;7", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 177.6, 146.39480865112668 ], "wc_strengths_avg": [ 114.4, 106.95718769676024 ], "wc_weaknesses_avg": [ 203.6, 178.82125153347965 ], "wc_questions_avg": [ 104.0, 125.21661231641751 ], "wc_limitations_avg": [ 55.0, 94.62346432043164 ], "wc_review_avg": [ 654.6, 617.6398950845064 ], "wc_reply_reviewers_avg": [ 211.8, 356.45218473169723 ], "wc_reply_authors_avg": [ 412.2, 779.0646699729106 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.8, 2.1354156504062622 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12991974966444408596&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cmu.edu;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Efficient Adaptation of Large Vision Transformer via Adapter Re-Composing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71451", "id": "VbYdaK8ek0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a4ca07aa108036f80cbb5b82285fd4b1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VbYdaK8ek0", "openreview": "https://openreview.net/forum?id=VbYdaK8ek0", "poster": "/media/PosterPDFs/NeurIPS%202023/71451.png?t=1699103134.1838615", "slides": "https://nips.cc/virtual/2023/poster/71451", "video": "https://nips.cc/virtual/2023/poster/71451", "author_site": "Wei Dong, Dawei Yan, Zhijun Lin, Peng Wang", "tldr": "", "abstract": "The advent of high-capacity pre-trained models has revolutionized problem-solving in computer vision, shifting the focus from training task-specific models to adapting pre-trained models. Consequently, effectively adapting large pre-trained models to downstream tasks in an efficient manner has become a prominent research area. Existing solutions primarily concentrate on designing lightweight adapters and their interaction with pre-trained models, with the goal of minimizing the number of parameters requiring updates. In this study, we propose a novel Adapter Re-Composing (ARC) strategy that addresses efficient pre-trained model adaptation from a fresh perspective. Our approach considers the reusability of adaptation parameters and introduces a parameter-sharing scheme. Specifically, we leverage symmetric down-/up-projections to construct bottleneck operations, which are shared across layers. By learning low-dimensional re-scaling coefficients, we can effectively re-compose layer-adaptive adapters. This parameter-sharing strategy in adapter design allows us to further reduce the number of new parameters while maintaining satisfactory performance, thereby offering a promising approach to compress the adaptation cost. We conduct experiments on 24 downstream image classification tasks using various Vision Transformer variants to evaluate our method. The results demonstrate that our approach achieves compelling transfer learning performance with a reduced parameter count. Our code is available at https://github.com/DavidYanAnDe/ARC.", "keywords": "computer vision; vision transformer; visual adapter; transfer learning", "primary_area": "", "supplementary_material": "/attachment/8097723c5dc00fcad838de7114b5e0eeac16d183.zip", "author": "Wei Dong;Dawei Yan;Zhijun Lin;Peng Wang", "authorids": "~Wei_Dong6;~Dawei_Yan1;~Zhijun_Lin2;~Peng_Wang19", "gender": "M;;M;M", "homepage": "https://github.com/DavidYanAnDe;;https://wp8619.github.io/;", "dblp": ";;95/4442-23.html;", "google_scholar": ";;vIr3ICQAAAAJ;tkTl3BMAAAAJ", "orcid": ";0000-0002-6646-4866;;0000-0003-0263-3584", "linkedin": ";;;", "or_profile": "~Dawei_Yan1;~Zhijun_Lin2;~Peng_Wang19;~Wei_Dong4", "aff": "Xi'an University of Architecture and Technology;Northwest Polytechnical University Xi'an;University of Wollonong;Xi'an University of Architecture and Technology", "aff_domain": "xauat.edu.cn;nwpu.edu.cn;uow.edu.au;xauat.edu.cn", "position": "MS student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\ndong2023efficient,\ntitle={Efficient Adaptation of Large Vision Transformer via Adapter Re-Composing},\nauthor={Wei Dong and Dawei Yan and Zhijun Lin and Peng Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VbYdaK8ek0}\n}", "github": "", "project": "", "reviewers": "jreR;4kbb;4jX4;zRDU;7yx9", "pdf_size": 667181, "rating": "3;5;5;5;6", "confidence": "5;4;4;4;4", "soundness": "2;3;2;3;3", "novelty": "2;3;2;3;3", "presentation": "2;3;4;3;3", "wc_summary": "56;106;88;50;62", "wc_strengths": "28;35;37;51;45", "wc_weaknesses": "152;175;198;135;127", "wc_questions": "4;3;171;4;3", "wc_limitations": "1;8;11;1;3", "wc_review": "241;327;505;241;240", "wc_reply_reviewers": "0;234;156;20;39", "wc_reply_authors": "0;361;0;28;53", "reply_reviewers": "0;2;1;1;1", "reply_authors": "1;3;1;2;2", "rating_avg": [ 4.8, 0.9797958971132712 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 72.4, 21.21885953579975 ], "wc_strengths_avg": [ 39.2, 8.009993757800315 ], "wc_weaknesses_avg": [ 157.4, 26.127380274340556 ], "wc_questions_avg": [ 37.0, 67.00149252068941 ], "wc_limitations_avg": [ 4.8, 4.019950248448356 ], "wc_review_avg": [ 310.8, 102.69644589760641 ], "wc_reply_reviewers_avg": [ 89.8, 90.2139678763771 ], "wc_reply_authors_avg": [ 88.4, 137.72523370827875 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9185586535436918, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3787208857482257469&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "xauat.edu.cn;nwpu.edu.cn;uow.edu.au;xauat.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Xi'an University of Architecture and Technology;Northwest Polytechnical University;University of Wollongong", "aff_unique_dep": ";;", "aff_unique_url": "http://www.xauat.edu.cn;http://www.nwpu.edu.cn;https://www.uow.edu.au", "aff_unique_abbr": "XAUAT;NWPU;UOW", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Xi'an;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;Australia" }, { "title": "Efficient Algorithms for Generalized Linear Bandits with Heavy-tailed Rewards", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71450", "id": "Vbm5UCaYeh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e0982cbc81401df3430ee1ff780dc7a2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Vbm5UCaYeh", "openreview": "https://openreview.net/forum?id=Vbm5UCaYeh", "poster": "/media/PosterPDFs/NeurIPS%202023/71450.png?t=1699608558.7516973", "slides": "https://nips.cc/virtual/2023/poster/71450", "video": "https://nips.cc/virtual/2023/poster/71450", "author_site": "Bo Xue, Yimu Wang, Yuanyu Wan, Jinfeng Yi, Lijun Zhang", "tldr": "", "abstract": "This paper investigates the problem of generalized linear bandits with heavy-tailed rewards, whose $(1+\\epsilon)$-th moment is bounded for some $\\epsilon\\in (0,1]$. Although there exist methods for generalized linear bandits, most of them focus on bounded or sub-Gaussian rewards and are not well-suited for many real-world scenarios, such as financial markets and web-advertising. To address this issue, we propose two novel algorithms based on truncation and mean of medians. These algorithms achieve an almost optimal regret bound of $\\widetilde{O}(dT^{\\frac{1}{1+\\epsilon}})$, where $d$ is the dimension of contextual information and $T$ is the time horizon. Our truncation-based algorithm supports online learning, distinguishing it from existing truncation-based approaches. Additionally, our mean-of-medians-based algorithm requires only $O(\\log T)$ rewards and one estimator per epoch, making it more practical. Moreover, our algorithms improve the regret bounds by a logarithmic factor compared to existing algorithms when $\\epsilon=1$. Numerical experimental results confirm the merits of our algorithms.", "keywords": "linear bandits;heavy-tailed;truncated;mean of medians", "primary_area": "", "supplementary_material": "/attachment/dc43f74f2d57176fa75fb5be2afed7a11d8a99cb.pdf", "author": "Bo Xue;Yimu Wang;Yuanyu Wan;Jinfeng Yi;Lijun Zhang", "authorids": "~Bo_Xue1;~Yimu_Wang1;~Yuanyu_Wan1;~Jinfeng_Yi1;~Lijun_Zhang1", "gender": "M;M;M;M;", "homepage": "https://xueb1996.github.io/;https://yimuwangcs.github.io;https://yuanyuwan.github.io/;http://jinfengyi.net/;", "dblp": "122/2421-4;140/7766;221/3499;117/4898;", "google_scholar": "1D4gVmIAAAAJ;TV2vnN8AAAAJ;CEymMc8AAAAJ;lZxRZ84AAAAJ;", "orcid": "0000-0002-7295-4853;;;;", "linkedin": ";yimu-wang-854743151/;;https://www.linkedin.com/nhome/?trk=;", "or_profile": "~Bo_Xue1;~Yimu_Wang1;~Yuanyu_Wan1;~Jinfeng_Yi1;~Lijun_Zhang1", "aff": "City University of Hong Kong;University of Waterloo;Zhejiang University;JD AI Research;", "aff_domain": "cityu.edu.hk;uwaterloo.ca;zju.edu.cn;jd.com;", "position": "PhD student;PhD student;Researcher;Senior Director;", "bibtex": "@inproceedings{\nxue2023efficient,\ntitle={Efficient Algorithms for Generalized Linear Bandits with Heavy-tailed Rewards},\nauthor={Bo Xue and Yimu Wang and Yuanyu Wan and Jinfeng Yi and Lijun Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Vbm5UCaYeh}\n}", "github": "", "project": "", "reviewers": "jQHU;7N78;QvG4;WjRx", "pdf_size": 420607, "rating": "6;6;6;7", "confidence": "4;3;4;3", "soundness": "3;3;3;4", "novelty": "3;3;2;3", "presentation": "3;4;4;3", "wc_summary": "62;49;118;38", "wc_strengths": "38;111;69;26", "wc_weaknesses": "31;83;112;17", "wc_questions": "38;16;26;11", "wc_limitations": "1;1;1;13", "wc_review": "170;260;326;105", "wc_reply_reviewers": "0;33;16;12", "wc_reply_authors": "0;27;23;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 66.75, 30.784533454317607 ], "wc_strengths_avg": [ 61.0, 32.85574531189332 ], "wc_weaknesses_avg": [ 60.75, 38.473204961375394 ], "wc_questions_avg": [ 22.75, 10.328964130056798 ], "wc_limitations_avg": [ 4.0, 5.196152422706632 ], "wc_review_avg": [ 215.25, 84.36638844942931 ], "wc_reply_reviewers_avg": [ 15.25, 11.818946653572814 ], "wc_reply_authors_avg": [ 12.5, 12.579745625409124 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16886056543571260169&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "cityu.edu.hk;uwaterloo.ca;zju.edu.cn;jd.com;", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "City University of Hong Kong;University of Waterloo;Zhejiang University;JD", "aff_unique_dep": ";;;JD AI Research", "aff_unique_url": "https://www.cityu.edu.hk;https://uwaterloo.ca;https://www.zju.edu.cn;https://www.jd.com", "aff_unique_abbr": "CityU;UW;ZJU;JD AI", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;Canada" }, { "title": "Learning to Taste: A Multimodal Wine Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73572", "id": "VeJgZYhT7H", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/170035f97007fdfa665880107b56f384-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=VeJgZYhT7H", "openreview": "https://openreview.net/forum?id=VeJgZYhT7H", "poster": "/media/PosterPDFs/NeurIPS%202023/73572.png?t=1699487351.6972504", "slides": "https://nips.cc/virtual/2023/poster/73572", "video": "https://nips.cc/virtual/2023/poster/73572", "author_site": "Thoranna Bender, Simon S\u00f8rensen, Alireza Kashani, Kristjan Eldjarn Hjorleifsson, Grethe Hyldig, S\u00f8ren Hauberg, Serge Belongie, Frederik Warburg", "tldr": "", "abstract": "We present WineSensed, a large multimodal wine dataset for studying the relations between visual perception, language, and flavor. The dataset encompasses 897k images of wine labels and 824k reviews of wines curated from the Vivino platform. It has over 350k unique bottlings, annotated with year, region, rating, alcohol percentage, price, and grape composition. We obtained fine-grained flavor annotations on a subset by conducting a wine-tasting experiment with 256 participants who were asked to rank wines based on their similarity in flavor, resulting in more than 5k pairwise flavor distances. We propose a low-dimensional concept embedding algorithm that combines human experience with automatic machine similarity kernels. We demonstrate that this shared concept embedding space improves upon separate embedding spaces for coarse flavor classification (alcohol percentage, country, grape, price, rating) and representing human perception of flavor.", "keywords": "Crowd annotations;Multi-modal;Concept embeddings", "primary_area": "", "supplementary_material": "/attachment/a210c1a22b34f3546e296c8f541fb26d2469139e.pdf", "author": "Thoranna Bender;Simon Moe S\u00f8rensen;Alireza Kashani;Kristjan Eldjarn Hjorleifsson;Grethe Hyldig;S\u00f8ren Hauberg;Serge Belongie;Frederik Rahb\u00e6k Warburg", "authorids": "~Thoranna_Bender1;~Simon_Moe_S\u00f8rensen1;~Alireza_Kashani1;~Kristjan_Eldjarn_Hjorleifsson1;~Grethe_Hyldig1;~S\u00f8ren_Hauberg1;~Serge_Belongie1;~Frederik_Rahb\u00e6k_Warburg1", "gender": "F;M;M;;F;M;M;M", "homepage": ";;;;;http://www2.compute.dtu.dk/~sohau/;https://di.ku.dk/english/staff/?pure=en%2Fpersons%2Fserge-belongie(0ce65383-3761-4b17-948a-83b461e371e2)%2Fpublications.html;https://frederikwarburg.github.io/", "dblp": ";;;151/8872;;39/7226;http://dblp.uni-trier.de/pers/hd/b/Belongie:Serge_J=;", "google_scholar": ";;y7sa_coAAAAJ;K1H_xSgAAAAJ;;https://scholar.google.com/citations?hl=en;ORr4XJYAAAAJ;0Ozzy4IAAAAJ", "orcid": ";;;0000-0002-7851-1818;0000-0001-7991-8225;;0000-0002-0388-5217;", "linkedin": "thoranna/;simon-moe-s%C3%B8rensen-9981a1127/;;https://linkedin.com/in/kreldjarn;;;sergebelongie;", "or_profile": "~Thoranna_Bender1;~Simon_Moe_S\u00f8rensen1;~Alireza_Kashani1;~Kristjan_Eldjarn_Hjorleifsson1;~Grethe_Hyldig1;~S\u00f8ren_Hauberg1;~Serge_Belongie1;~Frederik_Rahb\u00e6k_Warburg1", "aff": "Technical University of Denmark;;Vivino;;Technical University of Denmark;Technical University of Denmark;University of Copenhagen;Technical University of Denmark", "aff_domain": "dtu.dk;;vivino.com;;dtu.dk;dtu.dk;ku.dk;dtu.dk", "position": "MS student;;Researcher;;Associate Professor;Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nbender2023learning,\ntitle={Learning to Taste: A Multimodal Wine Dataset},\nauthor={Thoranna Bender and Simon Moe S{\\o}rensen and Alireza Kashani and Kristjan Eldjarn Hjorleifsson and Grethe Hyldig and S{\\o}ren Hauberg and Serge Belongie and Frederik Rahb{\\ae}k Warburg},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=VeJgZYhT7H}\n}", "github": "", "project": "", "reviewers": "PNh1;N3w8;1rM1;YEVG;DdVt", "pdf_size": 8739314, "rating": "5;6;6;6;10", "confidence": "4;4;3;5;4", "wc_summary_and_contributions": "402;195;70;113;119", "wc_strengths": "234;134;21;60;68", "wc_improvement": "381;146;36;42;3", "wc_limitations": "215;1;14;77;41", "wc_correctness": "8;1;28;53;44", "wc_clarity": "62;1;48;14;24", "wc_relation_to_prior_work": "75;1;20;40;20", "wc_documentation": "5;1;11;16;23", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "1383;481;249;416;343", "wc_reply_reviewers": "395;0;20;0;102", "wc_reply_authors": "920;397;778;665;776", "reply_reviewers": "2;0;1;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.6, 1.7435595774162693 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 179.8, 118.16835447783811 ], "wc_strengths_avg": [ 103.4, 74.72509618595349 ], "wc_improvement_avg": [ 121.6, 138.28463399814166 ], "wc_limitations_avg": [ 69.6, 77.21554247688738 ], "wc_correctness_avg": [ 26.8, 20.013995103427003 ], "wc_clarity_avg": [ 29.8, 22.274649267721365 ], "wc_relation_to_prior_work_avg": [ 31.2, 25.134836382996408 ], "wc_documentation_avg": [ 11.2, 7.807688518377254 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 574.4, 411.60155490474034 ], "wc_reply_reviewers_avg": [ 103.4, 150.57967990402955 ], "wc_reply_authors_avg": [ 707.2, 174.94844955014605 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1631948355515434579&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "dtu.dk;;vivino.com;;dtu.dk;dtu.dk;ku.dk;dtu.dk", "author_num": 8, "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "Technical University of Denmark;Vivino;University of Copenhagen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tek.dk;https://www.vivino.com;https://www.ku.dk", "aff_unique_abbr": "DTU;Vivino;UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "Denmark;Unknown" }, { "title": "Conformal Prediction for Uncertainty-Aware Planning with Diffusion Dynamics Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71449", "id": "VeO03T59Sh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fe318a2b6c699808019a456b706cd845-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VeO03T59Sh", "openreview": "https://openreview.net/forum?id=VeO03T59Sh", "poster": "/media/PosterPDFs/NeurIPS%202023/71449.png?t=1701379977.479992", "slides": "https://nips.cc/virtual/2023/poster/71449", "video": "https://nips.cc/virtual/2023/poster/71449", "author_site": "Jiankai Sun, Yiqi Jiang, Jianing Qiu, Parth Nobel, Mykel J Kochenderfer, Mac Schwager", "tldr": "", "abstract": "Robotic applications often involve working in environments that are uncertain, dynamic, and partially observable. Recently, diffusion models have been proposed for learning trajectory prediction models trained from expert demonstrations, which can be used for planning in robot tasks. Such models have demonstrated a strong ability to overcome challenges such as multi-modal action distributions, high-dimensional output spaces, and training instability. It is crucial to quantify the uncertainty of these dynamics models when using them for planning. In this paper, we quantify the uncertainty of diffusion dynamics models using Conformal Prediction (CP). Given a finite number of exchangeable expert trajectory examples (called the \u201ccalibration set\u201d), we use CP to obtain a set in the trajectory space (called the \u201ccoverage region\u201d) that is guaranteed to contain the output of the diffusion model with a user-defined probability (called the \u201ccoverage level\u201d). In PlanCP, inspired by concepts from conformal prediction, we modify the loss function for training the diffusion model to include a quantile term to encourage more robust performance across the variety of training examples. At test time, we then calibrate PlanCP with a conformal prediction process to obtain coverage sets for the trajectory prediction with guaranteed coverage level. We evaluate our algorithm on various planning tasks and model-based offline reinforcement learning tasks and show that it reduces the uncertainty of the learned trajectory prediction model. As a by-product, our algorithm PlanCP outperforms prior algorithms on existing offline RL benchmarks and challenging continuous planning tasks. Our method can be combined with most model-based planning approaches to produce uncertainty estimates of the closed-loop system.", "keywords": "Uncertainty;Conformal Prediction;Dynamics Model", "primary_area": "", "supplementary_material": "/attachment/2ecc4ea663e3dc768de4f0a96c64d4c41aea6275.pdf", "author": "Jiankai Sun;Yiqi Jiang;Jianing Qiu;Parth Talpur Nobel;Mykel Kochenderfer;Mac Schwager", "authorids": "~Jiankai_Sun6;~Yiqi_Jiang2;~Jianing_Qiu1;~Parth_Talpur_Nobel1;~Mykel_Kochenderfer1;~Mac_Schwager1", "gender": ";F;;M;M;M", "homepage": ";https://yiqij.github.io/;;https://ptnobel.github.io;https://mykel.kochenderfer.com;https://msl.stanford.edu/", "dblp": "121/4211;;;274/6165;34/2029.html;22/7012", "google_scholar": "726MCb8AAAAJ;qrlQN-AAAAAJ;;gZ03aJ0AAAAJ;cAy9G6oAAAAJ;-EqbTXoAAAAJ", "orcid": ";;;0000-0002-8228-7441;0000-0002-7238-9663;", "linkedin": ";yiqi-jiang-aa1363173/;;ptnobel/;mykel-kochenderfer;", "or_profile": "~Jiankai_Sun6;~Yiqi_Jiang2;~Jianing_Qiu1;~Parth_Talpur_Nobel1;~Mykel_Kochenderfer1;~Mac_Schwager1", "aff": "Stanford University;Stanford University;;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nsun2023conformal,\ntitle={Conformal Prediction for Uncertainty-Aware Planning with Diffusion Dynamics Model},\nauthor={Jiankai Sun and Yiqi Jiang and Jianing Qiu and Parth Talpur Nobel and Mykel Kochenderfer and Mac Schwager},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VeO03T59Sh}\n}", "github": "", "project": "", "reviewers": "gfdz;8c3j;zTTG;BQNc;wAeC", "pdf_size": 554549, "rating": "4;5;5;6;7", "confidence": "5;3;4;3;3", "soundness": "3;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;1;2;3;4", "wc_summary": "44;31;59;32;131", "wc_strengths": "72;21;44;97;116", "wc_weaknesses": "359;123;576;166;123", "wc_questions": "99;1;6;175;26", "wc_limitations": "57;4;18;1;1", "wc_review": "631;180;703;471;397", "wc_reply_reviewers": "0;19;79;44;48", "wc_reply_authors": "133;22;35;66;40", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 59.4, 37.20537595563307 ], "wc_strengths_avg": [ 70.0, 34.42673379802388 ], "wc_weaknesses_avg": [ 269.4, 176.40249431343082 ], "wc_questions_avg": [ 61.4, 66.76705774556791 ], "wc_limitations_avg": [ 16.2, 21.348536249588633 ], "wc_review_avg": [ 476.4, 184.08432850191238 ], "wc_reply_reviewers_avg": [ 38.0, 26.914680009244027 ], "wc_reply_authors_avg": [ 59.2, 39.574739417967116 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7844645405527363, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3403473073810205004&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "stanford.edu;stanford.edu;;stanford.edu;stanford.edu;stanford.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "RDumb: A simple approach that questions our progress in continual test-time adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71448", "id": "VfP6VTVsHc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7d640f377893fc5f22b5610e175ef7c3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VfP6VTVsHc", "openreview": "https://openreview.net/forum?id=VfP6VTVsHc", "poster": "/media/PosterPDFs/NeurIPS%202023/71448.png?t=1701887471.9686272", "slides": "https://nips.cc/virtual/2023/poster/71448", "video": "https://nips.cc/virtual/2023/poster/71448", "author_site": "Ori Press, Steffen Schneider, Steffen Schneider, Matthias K\u00fcmmerer, Matthias Bethge", "tldr": "", "abstract": "Test-Time Adaptation (TTA) allows to update pre-trained models to changing data distributions at deployment time. While early work tested these algorithms for individual fixed distribution shifts, recent work proposed and applied methods for continual adaptation over long timescales. To examine the reported progress in the field, we propose the Continually Changing Corruptions (CCC) benchmark to measure asymptotic performance of TTA techniques. We find that eventually all but one state-of-the-art methods collapse and perform worse than a non-adapting model, including models specifically proposed to be robust to performance collapse. In addition, we introduce a simple baseline, \"RDumb\", that periodically resets the model to its pretrained state. RDumb performs better or on par with the previously proposed state-of-the-art in all considered benchmarks.\nOur results show that previous TTA approaches are neither effective at regularizing adaptation to avoid collapse nor able to outperform a simplistic resetting strategy.", "keywords": "test time adaptation;continual adaptation;benchmarking;imagenet-c;imagenet classification;robustness;continual learning;imagenet benchmark", "primary_area": "", "supplementary_material": "", "author": "Ori Press;Steffen Schneider;Matthias Kuemmerer;Matthias Bethge", "authorids": "~Ori_Press1;~Steffen_Schneider1;~Matthias_Kuemmerer1;~Matthias_Bethge1", "gender": ";;;M", "homepage": ";https://stes.io;;https://bethgelab.org", "dblp": ";16/8643.html;151/6291.html;77/3005", "google_scholar": ";https://scholar.google.de/citations?user=KR5dj44AAAAJ;https://scholar.google.de/citations?user=y5Ej2qYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-2327-6459;0000-0001-9644-4703;", "linkedin": ";https://linkedin.com/in/steffen-schneider;;", "or_profile": "~Ori_Press1;~Steffen_Schneider1;~Matthias_Kuemmerer1;~Matthias_Bethge1", "aff": ";University of Tuebingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of Tuebingen", "aff_domain": ";uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "position": ";PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\npress2023rdumb,\ntitle={{RD}umb: A simple approach that questions our progress in continual test-time adaptation},\nauthor={Ori Press and Steffen Schneider and Matthias Kuemmerer and Matthias Bethge},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VfP6VTVsHc}\n}", "github": "", "project": "", "reviewers": "BezD;xRL5;Gw5H;h5ar", "pdf_size": 14613467, "rating": "3;5;7;7", "confidence": "4;4;3;3", "soundness": "2;3;4;3", "novelty": "2;2;4;3", "presentation": "2;3;3;4", "wc_summary": "65;65;87;135", "wc_strengths": "18;52;58;67", "wc_weaknesses": "101;104;47;140", "wc_questions": "28;165;152;45", "wc_limitations": "4;34;3;1", "wc_review": "216;420;347;388", "wc_reply_reviewers": "0;101;38;145", "wc_reply_authors": "0;752;7;7", "reply_reviewers": "0;1;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 88.0, 28.583211855912904 ], "wc_strengths_avg": [ 48.75, 18.538810641462412 ], "wc_weaknesses_avg": [ 98.0, 33.20391543176798 ], "wc_questions_avg": [ 97.5, 61.46747107210447 ], "wc_limitations_avg": [ 10.5, 13.6106575888162 ], "wc_review_avg": [ 342.75, 77.6188604657399 ], "wc_reply_reviewers_avg": [ 71.0, 55.915114235777075 ], "wc_reply_authors_avg": [ 191.5, 323.61744390560904 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7407854005249056205&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "email": ";uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Tuebingen;Eberhard Karls University of T\u00fcbingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Learning Large-Scale MTP$_2$ Gaussian Graphical Models via Bridge-Block Decomposition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71447", "id": "Vfp8sDST4g", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e7e506bc5a94768243083216fe51d98b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Vfp8sDST4g", "openreview": "https://openreview.net/forum?id=Vfp8sDST4g", "poster": "/media/PosterPDFs/NeurIPS%202023/71447.png?t=1698395959.1647928", "slides": "https://nips.cc/virtual/2023/poster/71447", "video": "https://nips.cc/virtual/2023/poster/71447", "author_site": "Xiwen WANG, Jiaxi Ying, Daniel Palomar", "tldr": "", "abstract": "This paper studies the problem of learning the large-scale Gaussian graphical models that are multivariate totally positive of order two ($\\text{MTP}_2$). By introducing the concept of bridge, which commonly exists in large-scale sparse graphs, we show that the entire problem can be equivalently optimized through (1) several smaller-scaled sub-problems induced by a \\emph{bridge-block decomposition} on the thresholded sample covariance graph and (2) a set of explicit solutions on entries corresponding to \\emph{bridges}. From practical aspect, this simple and provable discipline can be applied to break down a large problem into small tractable ones, leading to enormous reduction on the computational complexity and substantial improvements for all existing algorithms. The synthetic and real-world experiments demonstrate that our proposed method presents a significant speed-up compared to the state-of-the-art benchmarks.", "keywords": "MTP2 Gaussian Graphical Model;High-dimensional precision matrix estimation;Bridge-block decomposition.", "primary_area": "", "supplementary_material": "/attachment/39f1fd852c087adbfd2ab8be1a298986350c932f.zip", "author": "Xiwen Wang;Jiaxi Ying;Daniel P. Palomar", "authorids": "~Xiwen_Wang2;~Jiaxi_Ying1;~Daniel_P._Palomar1", "gender": "M;M;M", "homepage": "http://www.wangxiwen1997.com;https://jxying.github.io/;https://www.danielppalomar.com/", "dblp": "187/3023;179/2448;", "google_scholar": ";_IzItlcAAAAJ;qlReqq8AAAAJ", "orcid": "0000-0001-6943-2686;;0000-0001-5250-4874", "linkedin": ";;", "or_profile": "~Xiwen_Wang2;~Jiaxi_Ying1;~Daniel_P._Palomar1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;ust.hk;ust.hk", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nwang2023learning,\ntitle={Learning Large-Scale {MTP}\\$\\_2\\$ Gaussian Graphical Models via Bridge-Block Decomposition},\nauthor={Xiwen Wang and Jiaxi Ying and Daniel P. Palomar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Vfp8sDST4g}\n}", "github": "", "project": "", "reviewers": "bj5B;pk8s;ghhX;tnT4;vWNe", "pdf_size": 626615, "rating": "5;6;6;6;7", "confidence": "3;4;4;3;3", "soundness": "3;3;2;3;3", "novelty": "2;3;2;3;3", "presentation": "2;2;2;3;4", "wc_summary": "131;36;130;92;387", "wc_strengths": "84;21;159;64;167", "wc_weaknesses": "113;47;225;31;105", "wc_questions": "46;181;114;30;230", "wc_limitations": "3;23;20;2;42", "wc_review": "377;308;648;219;931", "wc_reply_reviewers": "23;0;698;0;21", "wc_reply_authors": "71;0;1485;0;39", "reply_reviewers": "1;0;2;0;1", "reply_authors": "2;1;4;1;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 155.2, 120.95850528177009 ], "wc_strengths_avg": [ 99.0, 56.13911292494743 ], "wc_weaknesses_avg": [ 104.2, 68.26536457091547 ], "wc_questions_avg": [ 120.2, 76.72392065060284 ], "wc_limitations_avg": [ 18.0, 14.737706741552431 ], "wc_review_avg": [ 496.6, 260.21575663283727 ], "wc_reply_reviewers_avg": [ 148.4, 274.97679902129926 ], "wc_reply_authors_avg": [ 319.0, 583.6063741941139 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=813108073210202897&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "ust.hk;ust.hk;ust.hk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Uni-ControlNet: All-in-One Control to Text-to-Image Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71446", "id": "VgQw8zXrH8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2468f84a13ff8bb6767a67518fb596eb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VgQw8zXrH8", "openreview": "https://openreview.net/forum?id=VgQw8zXrH8", "poster": "/media/PosterPDFs/NeurIPS%202023/71446.png?t=1699365151.6905706", "slides": "https://nips.cc/virtual/2023/poster/71446", "video": "https://nips.cc/virtual/2023/poster/71446", "author_site": "Shihao Zhao, Dongdong Chen, Yen-Chun Chen, Jianmin Bao, Shaozhe Hao, Lu Yuan, Kwan-Yee K. Wong", "tldr": "", "abstract": "Text-to-Image diffusion models have made tremendous progress over the past two years, enabling the generation of highly realistic images based on open-domain text descriptions. However, despite their success, text descriptions often struggle to adequately convey detailed controls, even when composed of long and complex texts. Moreover, recent studies have also shown that these models face challenges in understanding such complex texts and generating the corresponding images. Therefore, there is a growing need to enable more control modes beyond text description. In this paper, we introduce Uni-ControlNet, a unified framework that allows for the simultaneous utilization of different local controls (e.g., edge maps, depth map, segmentation masks) and global controls (e.g., CLIP image embeddings) in a flexible and composable manner within one single model. Unlike existing methods, Uni-ControlNet only requires the fine-tuning of two additional adapters upon frozen pre-trained text-to-image diffusion models, eliminating the huge cost of training from scratch. Moreover, thanks to some dedicated adapter designs, Uni-ControlNet only necessitates a constant number (i.e., 2) of adapters, regardless of the number of local or global controls used. This not only reduces the fine-tuning costs and model size, making it more suitable for real-world deployment, but also facilitate composability of different conditions. Through both quantitative and qualitative comparisons, Uni-ControlNet demonstrates its superiority over existing methods in terms of controllability, generation quality and composability. Code is available at https://github.com/ShihaoZhaoZSH/Uni-ControlNet.", "keywords": "computer vision;diffusion model;text-to-image generation", "primary_area": "", "supplementary_material": "", "author": "Shihao Zhao;Dongdong Chen;Yen-Chun Chen;Jianmin Bao;Shaozhe Hao;Lu Yuan;Kwan-Yee K. Wong", "authorids": "~Shihao_Zhao1;~Dongdong_Chen1;~Yen-Chun_Chen1;~Jianmin_Bao1;~Shaozhe_Hao1;~Lu_Yuan1;~Kwan-Yee_K._Wong1", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/ShihaoZhaoZSH;http://www.dongdongchen.bid/;;https://jianminbao.github.io/;https://haoosz.github.io/;https://www.microsoft.com/en-us/research/people/luyuan/;https://i.cs.hku.hk/~kykwong/", "dblp": "77/7644;92/1489-1;160/0623-1;154/4693;313/9551;;w/KwanYeeKennethWong", "google_scholar": "dNQiLDQAAAAJ;https://scholar.google.com.sg/citations?user=sYKpKqEAAAAJ;Gptgy4YAAAAJ;hjwvkYUAAAAJ;72uQORoAAAAJ;k9TsUVsAAAAJ;https://scholar.google.com/citations?sortby=pubdate", "orcid": ";;;;;;0000-0001-8560-9007", "linkedin": ";;;;https://linkedin.com/in/haoshaozhe/;;kenneth-wong-94a4621a9/", "or_profile": "~Shihao_Zhao1;~Dongdong_Chen1;~Yen-Chun_Chen1;~Jianmin_Bao1;~Shaozhe_Hao1;~Lu_Yuan1;~Kwan-Yee_Kenneth_Wong2", "aff": "University of Hong Kong;Microsoft Research;Microsoft;Microsoft;University of Hong Kong;Microsoft;The University of Hong Kong", "aff_domain": "hku.hk;microsoft.com;microsoft.com;microsoft.com;hku.hk;microsoft.com;hku.hk", "position": "PhD student;Principal Researcher;Researcher;Researcher;PhD student;Principal Research Manager;Associate Professor", "bibtex": "@inproceedings{\nzhao2023unicontrolnet,\ntitle={Uni-ControlNet: All-in-One Control to Text-to-Image Diffusion Models},\nauthor={Shihao Zhao and Dongdong Chen and Yen-Chun Chen and Jianmin Bao and Shaozhe Hao and Lu Yuan and Kwan-Yee K. Wong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VgQw8zXrH8}\n}", "github": "", "project": "", "reviewers": "WHfG;MFNW;n8So;RHVi;WFYK", "pdf_size": 15850635, "rating": "5;6;6;6;7", "confidence": "5;5;4;4;4", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;2", "wc_summary": "60;60;20;49;67", "wc_strengths": "8;41;27;18;53", "wc_weaknesses": "161;213;75;37;112", "wc_questions": "1;27;2;160;36", "wc_limitations": "65;1;7;1;4", "wc_review": "295;342;131;265;272", "wc_reply_reviewers": "0;26;22;22;12", "wc_reply_authors": "88;70;61;69;75", "reply_reviewers": "0;1;1;1;1", "reply_authors": "3;3;3;3;3", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 51.2, 16.630093204789922 ], "wc_strengths_avg": [ 29.4, 16.032467059064867 ], "wc_weaknesses_avg": [ 119.6, 62.1404859974558 ], "wc_questions_avg": [ 45.2, 59.01660783203318 ], "wc_limitations_avg": [ 15.6, 24.799999999999997 ], "wc_review_avg": [ 261.0, 70.36192152009494 ], "wc_reply_reviewers_avg": [ 16.4, 9.414881836751857 ], "wc_reply_authors_avg": [ 72.6, 8.912911982062878 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.6454972243679027, "gs_citation": 291, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4786163553879277588&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "hku.hk;microsoft.com;microsoft.com;microsoft.com;hku.hk;microsoft.com;hku.hk", "author_num": 7, "aff_unique_index": "0;1;1;1;0;1;0", "aff_unique_norm": "University of Hong Kong;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.hku.hk;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "HKU;MSR", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;1;1;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Estimating Riemannian Metric with Noise-Contaminated Intrinsic Distance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71445", "id": "VhLU3pStsl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ea5cb7d9fd2deb0554def3552962d276-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VhLU3pStsl", "openreview": "https://openreview.net/forum?id=VhLU3pStsl", "poster": "/media/PosterPDFs/NeurIPS%202023/71445.png?t=1702072832.3781118", "slides": "https://nips.cc/virtual/2023/poster/71445", "video": "https://nips.cc/virtual/2023/poster/71445", "author_site": "Jiaming Qiu, Xiongtao Dai", "tldr": "", "abstract": "We extend metric learning by studying the Riemannian manifold structure of the underlying data space induced by similarity measures between data points. The key quantity of interest here is the Riemannian metric, which characterizes the Riemannian geometry and defines straight lines and derivatives on the manifold. Being able to estimate the Riemannian metric allows us to gain insights into the underlying manifold and compute geometric features such as the geodesic curves. We model the observed similarity measures as noisy responses generated from a function of the intrinsic geodesic distance between data points. A new local regression approach is proposed to learn the Riemannian metric tensor and its derivatives based on a Taylor expansion for the squared geodesic distances, accommodating different types of data such as continuous, binary, or comparative responses. We develop theoretical foundation for our method by deriving the rates of convergence for the asymptotic bias and variance of the estimated metric tensor. The proposed method is shown to be versatile in simulation studies and real data applications involving taxi trip time in New York City and MNIST digits.", "keywords": "metric learning;manifold learning;local metric;dissimilarity;geometry", "primary_area": "", "supplementary_material": "/attachment/0950183fb23de992b50546332c1ec8c30c31739d.pdf", "author": "Jiaming Qiu;Xiongtao Dai", "authorids": "~Jiaming_Qiu1;~Xiongtao_Dai1", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": ";", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@inproceedings{\nqiu2023estimating,\ntitle={Estimating Riemannian Metric with Noise-Contaminated Intrinsic Distance},\nauthor={Jiaming Qiu and Xiongtao Dai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VhLU3pStsl}\n}", "github": "", "project": "", "reviewers": "9sCn;E2so;uGdp;5uut", "pdf_size": 1641663, "rating": "4;5;7;7", "confidence": "3;4;4;3", "soundness": "3;3;4;4", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "84;187;68;223", "wc_strengths": "33;93;78;64", "wc_weaknesses": "237;208;274;70", "wc_questions": "48;258;138;93", "wc_limitations": "10;22;43;8", "wc_review": "412;768;601;458", "wc_reply_reviewers": "9;160;68;0", "wc_reply_authors": "8;391;10;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 140.5, 65.98674109243463 ], "wc_strengths_avg": [ 67.0, 22.147234590350102 ], "wc_weaknesses_avg": [ 197.25, 77.10179958989283 ], "wc_questions_avg": [ 134.25, 78.21245105480328 ], "wc_limitations_avg": [ 20.75, 13.91716565971678 ], "wc_review_avg": [ 559.75, 138.9718946406071 ], "wc_reply_reviewers_avg": [ 59.25, 63.76274382427406 ], "wc_reply_authors_avg": [ 102.25, 166.75187405243756 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4674497473369951838&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";", "author_num": 2 }, { "title": "Variance-Reduced Gradient Estimation via Noise-Reuse in Online Evolution Strategies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71444", "id": "VhbV56AJNt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8e69a97cbdd91ac0808603fa589d6c17-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VhbV56AJNt", "openreview": "https://openreview.net/forum?id=VhbV56AJNt", "poster": "/media/PosterPDFs/NeurIPS%202023/71444.png?t=1701883185.612657", "slides": "https://nips.cc/virtual/2023/poster/71444", "video": "https://nips.cc/virtual/2023/poster/71444", "author_site": "Oscar Li, James Harrison, Jascha Sohl-Dickstein, Virginia Smith, Luke Metz", "tldr": "", "abstract": "Unrolled computation graphs are prevalent throughout machine learning but present challenges to automatic differentiation (AD) gradient estimation methods when their loss functions exhibit extreme local sensitivtiy, discontinuity, or blackbox characteristics. In such scenarios, online evolution strategies methods are a more capable alternative, while being more parallelizable than vanilla evolution strategies (ES) by interleaving partial unrolls and gradient updates. In this work, we propose a general class of unbiased online evolution strategies methods. We analytically and empirically characterize the variance of this class of gradient estimators and identify the one with the least variance, which we term Noise-Reuse Evolution Strategies (NRES). Experimentally, we show NRES results in faster convergence than existing AD and ES methods in terms of wall-clock time and number of unroll steps across a variety of applications, including learning dynamical systems, meta-training learned optimizers, and reinforcement learning.", "keywords": "Evolution Strategies;unrolled computation graph;online gradient estimation;variance reduction;stochastic gradient estimation", "primary_area": "", "supplementary_material": "/attachment/34a0605305c7379edecc79b93b6c61c207f499a7.pdf", "author": "Oscar Li;James Harrison;Jascha Sohl-Dickstein;Virginia Smith;Luke Metz", "authorids": "~Oscar_Li1;~James_Harrison1;~Jascha_Sohl-Dickstein2;~Virginia_Smith1;~Luke_Metz1", "gender": "M;;F;M;M", "homepage": "https://www.oscarli.one/;;;http://lukemetz.com;http://sohldickstein.com", "dblp": "160/8481;;120/0921;;51/7117", "google_scholar": "rtpoh5wAAAAJ;-tEiRFcAAAAJ;;jCOmCb4AAAAJ;-3zYIjQAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Oscar_Li1;~James_Harrison1;~Virginia_Smith1;~Luke_Metz1;~Jascha_Sohl-Dickstein1", "aff": "Google;Google;Carnegie Mellon University;Google;Google", "aff_domain": "google.com;google.com;cmu.edu;google.com;google.com", "position": "Student Researcher;Researcher;Associate Professor;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nli2023variancereduced,\ntitle={Variance-Reduced Gradient Estimation via Noise-Reuse in Online Evolution Strategies},\nauthor={Oscar Li and James Harrison and Jascha Sohl-Dickstein and Virginia Smith and Luke Metz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VhbV56AJNt}\n}", "github": "", "project": "", "reviewers": "w7MW;hYKW;Wtuw;FJdn;77pb", "pdf_size": 3691421, "rating": "4;5;6;7;7", "confidence": "3;3;3;2;3", "soundness": "3;3;3;4;3", "novelty": "2;3;3;3;4", "presentation": "2;2;3;4;3", "wc_summary": "81;71;108;150;37", "wc_strengths": "13;60;90;70;94", "wc_weaknesses": "885;59;70;79;87", "wc_questions": "66;42;76;51;2", "wc_limitations": "1;1;4;14;3", "wc_review": "1046;233;348;364;223", "wc_reply_reviewers": "523;12;30;43;0", "wc_reply_authors": "1580;0;156;62;0", "reply_reviewers": "2;1;1;1;0", "reply_authors": "3;1;2;2;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 89.4, 37.876641878603756 ], "wc_strengths_avg": [ 65.4, 29.048924248584488 ], "wc_weaknesses_avg": [ 236.0, 324.6339477010992 ], "wc_questions_avg": [ 47.4, 25.56247249387273 ], "wc_limitations_avg": [ 4.6, 4.841487374764082 ], "wc_review_avg": [ 442.8, 307.04227721927805 ], "wc_reply_reviewers_avg": [ 121.6, 201.240751340279 ], "wc_reply_authors_avg": [ 359.6, 612.8652706753744 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5144957554275266, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14582015886033605061&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "google.com;google.com;cmu.edu;google.com;google.com", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Google;Carnegie Mellon University", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.cmu.edu", "aff_unique_abbr": "Google;CMU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DropPos: Pre-Training Vision Transformers by Reconstructing Dropped Positions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71443", "id": "VhcsIxVEd9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9098e2901b4eb54772f83535f89cb8ac-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VhcsIxVEd9", "openreview": "https://openreview.net/forum?id=VhcsIxVEd9", "poster": "/media/PosterPDFs/NeurIPS%202023/71443.png?t=1701915661.0417683", "slides": "https://nips.cc/virtual/2023/poster/71443", "video": "https://nips.cc/virtual/2023/poster/71443", "author_site": "Haochen Wang, Junsong Fan, Yuxi Wang, Kaiyou Song, Tong Wang, ZHAO-XIANG ZHANG", "tldr": "", "abstract": "As it is empirically observed that Vision Transformers (ViTs) are quite insensitive to the order of input tokens, the need for an appropriate self-supervised pretext task that enhances the location awareness of ViTs is becoming evident. To address this, we present DropPos, a novel pretext task designed to reconstruct Dropped Positions. The formulation of DropPos is simple: we first drop a large random subset of positional embeddings and then the model classifies the actual position for each non-overlapping patch among all possible positions solely based on their visual appearance. To avoid trivial solutions, we increase the difficulty of this task by keeping only a subset of patches visible. Additionally, considering there may be different patches with similar visual appearances, we propose position smoothing and attentive reconstruction strategies to relax this classification problem, since it is not necessary to reconstruct their exact positions in these cases. Empirical evaluations of DropPos show strong capabilities. DropPos outperforms supervised pre-training and achieves competitive results compared with state-of-the-art self-supervised alternatives on a wide range of downstream benchmarks. This suggests that explicitly encouraging spatial reasoning abilities, as DropPos does, indeed contributes to the improved location awareness of ViTs. The code is publicly available at https://github.com/Haochen-Wang409/DropPos.", "keywords": "Self-Supervised Learning;Vision Transformer;Visual Representation Learning", "primary_area": "", "supplementary_material": "/attachment/73826c2fdc44049e255ec20286a62ef95b3ca01b.pdf", "author": "Haochen Wang;Junsong Fan;Yuxi Wang;Kaiyou Song;Tong Wang;Zhaoxiang Zhang", "authorids": "~Haochen_Wang5;~Junsong_Fan1;~Yuxi_Wang1;~Kaiyou_Song1;~Tong_Wang11;~Zhaoxiang_Zhang3", "gender": "M;M;M;M;F;M", "homepage": "https://haochen-wang409.github.io/;;;;;http://zhaoxiangzhang.net", "dblp": ";150/4094;;216/9384;;55/2285-1.html", "google_scholar": "oNlpTdcAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;VLqzM1wAAAAJ;y8cNMmkAAAAJ;qxWfV6cAAAAJ", "orcid": "0000-0002-2333-1844;;0000-0003-1579-2357;0000-0001-8999-2680;;", "linkedin": ";;;;;", "or_profile": "~Haochen_Wang5;~Junsong_Fan1;~Yuxi_Wang1;~Kaiyou_Song1;~Tong_Wang11;~Zhaoxiang_Zhang3", "aff": "Institute of Automation, Chinese Academy of Sciences;Centre for Artificial Intelligence and Robotics (CAIR) Hong Kong Institute of Science & Innovation Chinese Academy of Sciences;Centre for Artificial Intelligence and Robotics Hong Kong Institute of Science & Innovation, Chinese Academy of Sciences;Megvii Technology Inc.;;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;cair-cas.org.hk;cair-cas.org.hk;megvii.com;;ia.ac.cn", "position": "PhD student;Assistant Professor;Assistant Professor;Researcher;;Full Professor", "bibtex": "@inproceedings{\nwang2023droppos,\ntitle={DropPos: Pre-Training Vision Transformers by Reconstructing Dropped Positions},\nauthor={Haochen Wang and Junsong Fan and Yuxi Wang and Kaiyou Song and Tong Wang and Zhaoxiang Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VhcsIxVEd9}\n}", "github": "", "project": "", "reviewers": "e3Vr;S2Rp;6DGy;Pgbn;HkQR", "pdf_size": 1214694, "rating": "5;6;6;6;7", "confidence": "4;4;5;5;4", "soundness": "3;3;3;3;3", "novelty": "3;3;2;3;3", "presentation": "2;3;3;3;4", "wc_summary": "72;176;73;51;61", "wc_strengths": "81;85;123;37;65", "wc_weaknesses": "189;259;331;85;115", "wc_questions": "254;120;77;3;3", "wc_limitations": "1;1;63;3;1", "wc_review": "597;641;667;179;245", "wc_reply_reviewers": "0;18;154;0;17", "wc_reply_authors": "0;0;281;0;0", "reply_reviewers": "0;1;2;0;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 86.6, 45.41629663457822 ], "wc_strengths_avg": [ 78.2, 28.04567702873297 ], "wc_weaknesses_avg": [ 195.8, 90.6915652086786 ], "wc_questions_avg": [ 91.4, 92.83663070146395 ], "wc_limitations_avg": [ 13.8, 24.61219210066426 ], "wc_review_avg": [ 465.8, 209.47400793415875 ], "wc_reply_reviewers_avg": [ 37.8, 58.62559168144915 ], "wc_reply_authors_avg": [ 56.2, 112.40000000000002 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9162106043941872390&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ia.ac.cn;cair-cas.org.hk;cair-cas.org.hk;megvii.com;;ia.ac.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Chinese Academy of Sciences;Hong Kong Institute of Science & Innovation;Megvii Technology", "aff_unique_dep": "Institute of Automation;Centre for Artificial Intelligence and Robotics;", "aff_unique_url": "http://www.ia.cas.cn;http://www.hkisi.org;https://www.megvii.com", "aff_unique_abbr": "CAS;HKISI;Megvii", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Efficient Activation Function Optimization through Surrogate Modeling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71442", "id": "ViFTWelHVZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/154d63285d3ed7826e7f026c0b350d69-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ViFTWelHVZ", "openreview": "https://openreview.net/forum?id=ViFTWelHVZ", "poster": "/media/PosterPDFs/NeurIPS%202023/71442.png?t=1696371790.8405218", "slides": "https://nips.cc/virtual/2023/poster/71442", "video": "https://nips.cc/virtual/2023/poster/71442", "author_site": "Garrett Bingham, Risto Miikkulainen", "tldr": "", "abstract": "Carefully designed activation functions can improve the performance of neural networks in many machine learning tasks. However, it is difficult for humans to construct optimal activation functions, and current activation function search algorithms are prohibitively expensive. This paper aims to improve the state of the art through three steps: First, the benchmark datasets Act-Bench-CNN, Act-Bench-ResNet, and Act-Bench-ViT were created by training convolutional, residual, and vision transformer architectures from scratch with 2,913 systematically generated activation functions. Second, a characterization of the benchmark space was developed, leading to a new surrogate-based method for optimization. More specifically, the spectrum of the Fisher information matrix associated with the model's predictive distribution at initialization and the activation function's output distribution were found to be highly predictive of performance. Third, the surrogate was used to discover improved activation functions in several real-world tasks, with a surprising finding: a sigmoidal design that outperformed all other activation functions was discovered, challenging the status quo of always using rectifier nonlinearities in deep learning. Each of these steps is a contribution in its own right; together they serve as a practical and theoretical foundation for further research on activation function optimization.", "keywords": "automl;activation function;surrogate modeling;fisher information matrix;eigenvalues;optimization;umap;imagenet", "primary_area": "", "supplementary_material": "/attachment/c2af74ac0901b53ca5d10b5e1bec15943c18fc52.zip", "author": "Garrett Bingham;Risto Miikkulainen", "authorids": "~Garrett_Bingham1;~Risto_Miikkulainen1", "gender": "M;", "homepage": "https://www.gjb.ai;http://www.cs.utexas.edu/users/risto", "dblp": ";m/RistoMiikkulainen", "google_scholar": "yyrZ2SQAAAAJ;", "orcid": ";", "linkedin": "garrettbingham/;", "or_profile": "~Garrett_Bingham1;~Risto_Miikkulainen1", "aff": "University of Texas, Austin;The University of Texas, Austin", "aff_domain": "utexas.edu;cs.utexas.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nbingham2023efficient,\ntitle={Efficient Activation Function Optimization through Surrogate Modeling},\nauthor={Garrett Bingham and Risto Miikkulainen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ViFTWelHVZ}\n}", "github": "", "project": "", "reviewers": "UeiT;N57C;x7yQ;sps3", "pdf_size": 23940554, "rating": "3;4;7;8", "confidence": "5;3;4;4", "soundness": "3;3;3;4", "novelty": "1;2;3;3", "presentation": "3;3;4;4", "wc_summary": "55;24;142;82", "wc_strengths": "72;37;99;55", "wc_weaknesses": "210;143;110;1", "wc_questions": "43;1;248;1", "wc_limitations": "5;1;20;1", "wc_review": "385;206;619;140", "wc_reply_reviewers": "108;91;156;0", "wc_reply_authors": "307;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 2.0615528128088303 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 75.75, 43.40722866067356 ], "wc_strengths_avg": [ 65.75, 22.84047941703501 ], "wc_weaknesses_avg": [ 116.0, 75.54137938904744 ], "wc_questions_avg": [ 73.25, 102.338592427295 ], "wc_limitations_avg": [ 6.75, 7.8222439235810075 ], "wc_review_avg": [ 337.5, 185.60509152499023 ], "wc_reply_reviewers_avg": [ 88.75, 56.51271980713722 ], "wc_reply_authors_avg": [ 76.75, 132.93489948091133 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.17149858514250882, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13216160890556353296&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "utexas.edu;cs.utexas.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Nonparametric Teaching for Multiple Learners", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71441", "id": "VkUNovXoxx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/184a03a3ad07e8897c62461c02634b02-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VkUNovXoxx", "openreview": "https://openreview.net/forum?id=VkUNovXoxx", "poster": "/media/PosterPDFs/NeurIPS%202023/71441.png?t=1700148358.693344", "slides": "https://nips.cc/virtual/2023/poster/71441", "video": "https://nips.cc/virtual/2023/poster/71441", "author_site": "Chen Zhang, Xiaofeng Cao, Weiyang Liu, Ivor Tsang, James Kwok", "tldr": "", "abstract": "We study the problem of teaching multiple learners simultaneously in the nonparametric iterative teaching setting, where the teacher iteratively provides examples to the learner for accelerating the acquisition of a target concept. This problem is motivated by the gap between current single-learner teaching setting and the real-world scenario of human instruction where a teacher typically imparts knowledge to multiple students. Under the new problem formulation, we introduce a novel framework -- Multi-learner Nonparametric Teaching (MINT). In MINT, the teacher aims to instruct multiple learners, with each learner focusing on learning a scalar-valued target model. To achieve this, we frame the problem as teaching a vector-valued target model and extend the target model space from a scalar-valued reproducing kernel Hilbert space used in single-learner scenarios to a vector-valued space. Furthermore, we demonstrate that MINT offers significant teaching speed-up over repeated single-learner teaching, particularly when the multiple learners can communicate with each other. Lastly, we conduct extensive experiments to validate the practicality and efficiency of MINT.", "keywords": "Nonparametric machine teaching;Multiple learners", "primary_area": "", "supplementary_material": "", "author": "Chen Zhang;Xiaofeng Cao;Weiyang Liu;Ivor Tsang;James Kwok", "authorids": "~Chen_Zhang13;~Xiaofeng_Cao2;~Weiyang_Liu1;~Ivor_Tsang1;~James_Kwok1", "gender": ";;M;;", "homepage": ";;http://wyliu.com/;;", "dblp": ";;137/1532;;", "google_scholar": ";;DMjROf0AAAAJ;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Chen_Zhang13;~Xiaofeng_Cao2;~Weiyang_Liu1;~Ivor_Tsang1;~James_Kwok1", "aff": ";;University of Cambridge;;", "aff_domain": ";;cam.ac.uk;;", "position": ";;Researcher;;", "bibtex": "@inproceedings{\nzhang2023nonparametric,\ntitle={Nonparametric Teaching for Multiple Learners},\nauthor={Chen Zhang and Xiaofeng Cao and Weiyang Liu and Ivor Tsang and James Kwok},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VkUNovXoxx}\n}", "github": "", "project": "", "reviewers": "eAYw;JyMg;D1DQ;dq11", "pdf_size": 6550654, "rating": "4;6;6;7", "confidence": "3;4;3;1", "soundness": "3;4;3;3", "novelty": "2;4;2;3", "presentation": "2;4;3;3", "wc_summary": "54;122;85;52", "wc_strengths": "67;77;28;17", "wc_weaknesses": "209;119;73;46", "wc_questions": "52;39;72;14", "wc_limitations": "109;1;17;1", "wc_review": "491;358;275;130", "wc_reply_reviewers": "72;29;12;0", "wc_reply_authors": "394;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 78.25, 28.44622119016865 ], "wc_strengths_avg": [ 47.25, 25.301926804099327 ], "wc_weaknesses_avg": [ 111.75, 61.91677882448343 ], "wc_questions_avg": [ 44.25, 21.05201890555868 ], "wc_limitations_avg": [ 32.0, 44.93328387732194 ], "wc_review_avg": [ 313.5, 130.99713737330293 ], "wc_reply_reviewers_avg": [ 28.25, 27.279800219209818 ], "wc_reply_authors_avg": [ 98.5, 170.6070045455344 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4736842105263159, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15782977707196836775&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": ";;cam.ac.uk;;", "author_num": 5, "aff_unique_index": "0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "Efficient Exploration in Continuous-time Model-based Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71440", "id": "VkhvDfY2dB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/836012122f3de08aeeae67369b087964-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VkhvDfY2dB", "openreview": "https://openreview.net/forum?id=VkhvDfY2dB", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71440", "video": "https://nips.cc/virtual/2023/poster/71440", "author_site": "Lenart Treven, Jonas H\u00fcbotter, Bhavya, Florian Dorfler, Andreas Krause", "tldr": "", "abstract": "Reinforcement learning algorithms typically consider discrete-time dynamics, even though the underlying systems are often continuous in time. In this paper, we introduce a model-based reinforcement learning algorithm that represents continuous-time dynamics using nonlinear ordinary differential equations (ODEs). We capture epistemic uncertainty using well-calibrated probabilistic models, and use the optimistic principle for exploration. Our regret bounds surface the importance of the measurement selection strategy (MSS), since in continuous time we not only must decide how to explore, but also when to observe the underlying system. Our analysis demonstrates that the regret is sublinear when modeling ODEs with Gaussian Processes (GP) for common choices of MSS, such as equidistant sampling. Additionally, we propose an adaptive, data-dependent, practical MSS that, when combined with GP dynamics, also achieves sublinear regret with significantly fewer samples. We showcase the benefits of continuous-time modeling over its discrete-time counterpart, as well as our proposed adaptive MSS over standard baselines, on several applications.", "keywords": "Reinforcement Learning;Optimal Control;Continuous Time", "primary_area": "", "supplementary_material": "", "author": "Lenart Treven;Jonas H\u00fcbotter;Bhavya Sukhija;Florian Dorfler;Andreas Krause", "authorids": "~Lenart_Treven1;~Jonas_H\u00fcbotter1;~Bhavya_Sukhija1;~Florian_Dorfler1;~Andreas_Krause1", "gender": "M;M;M;M;M", "homepage": ";https://jonhue.github.io;;http://people.ee.ethz.ch/~floriand/;https://las.inf.ethz.ch/krausea", "dblp": "267/9666;300/4583;312/4742;;87/1831-1.html", "google_scholar": "CDnzTWkAAAAJ;pxi_RkwAAAAJ;;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.ch/citations?user=eDHv58AAAAAJ", "orcid": ";;0000-0001-6238-9734;0000-0002-9649-5305;0000-0001-7260-9673", "linkedin": "lenart-treven/;jonhue/;;;krausea/", "or_profile": "~Lenart_Treven1;~Jonas_H\u00fcbotter1;~Bhavya_Sukhija1;~Florian_Dorfler1;~Andreas_Krause1", "aff": "Swiss Federal Institute of Technology;ETH Zurich;ETHZ - ETH Zurich;;ETH Zurich", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;;ethz.ch", "position": "PhD student;MS student;PhD student;;Full Professor", "bibtex": "@inproceedings{\ntreven2023efficient,\ntitle={Efficient Exploration in Continuous-time Model-based Reinforcement Learning},\nauthor={Lenart Treven and Jonas H{\\\"u}botter and Bhavya Sukhija and Florian Dorfler and Andreas Krause},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VkhvDfY2dB}\n}", "github": "", "project": "", "reviewers": "knGi;ffit;7bYe;xqxW", "pdf_size": 1009530, "rating": "5;5;6;7", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "72;107;100;65", "wc_strengths": "68;73;64;107", "wc_weaknesses": "237;211;276;91", "wc_questions": "262;82;131;2", "wc_limitations": "1;1;11;2", "wc_review": "640;474;582;267", "wc_reply_reviewers": "0;19;29;0", "wc_reply_authors": "150;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 86.0, 17.84656829757475 ], "wc_strengths_avg": [ 78.0, 17.04406054905931 ], "wc_weaknesses_avg": [ 203.75, 69.0846401163095 ], "wc_questions_avg": [ 119.25, 94.40703098816317 ], "wc_limitations_avg": [ 3.75, 4.205650960315181 ], "wc_review_avg": [ 490.75, 142.25571166037588 ], "wc_reply_reviewers_avg": [ 12.0, 12.509996003196804 ], "wc_reply_authors_avg": [ 37.5, 64.9519052838329 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6311947134316038592&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "ethz.ch;ethz.ch;ethz.ch;;ethz.ch", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Swiss Federal Institute of Technology;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Diffusion Hyperfeatures: Searching Through Time and Space for Semantic Correspondence", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71439", "id": "Vm1zeYqwdc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/942032b61720a3fd64897efe46237c81-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Vm1zeYqwdc", "openreview": "https://openreview.net/forum?id=Vm1zeYqwdc", "poster": "/media/PosterPDFs/NeurIPS%202023/71439.png?t=1701410463.7414727", "slides": "https://nips.cc/virtual/2023/poster/71439", "video": "https://nips.cc/virtual/2023/poster/71439", "author_site": "Grace Luo, Lisa Dunlap, Dong Huk Park, Aleksander Holynski, Trevor Darrell", "tldr": "", "abstract": "Diffusion models have been shown to be capable of generating high-quality images, suggesting that they could contain meaningful internal representations. Unfortunately, the feature maps that encode a diffusion model's internal information are spread not only over layers of the network, but also over diffusion timesteps, making it challenging to extract useful descriptors. We propose Diffusion Hyperfeatures, a framework for consolidating multi-scale and multi-timestep feature maps into per-pixel feature descriptors that can be used for downstream tasks. These descriptors can be extracted for both synthetic and real images using the generation and inversion processes. We evaluate the utility of our Diffusion Hyperfeatures on the task of semantic keypoint correspondence: our method achieves superior performance on the SPair-71k real image benchmark. We also demonstrate that our method is flexible and transferable: our feature aggregation network trained on the inversion features of real image pairs can be used on the generation features of synthetic image pairs with unseen objects and compositions. Our code is available at https://diffusion-hyperfeatures.github.io.", "keywords": "semantic correspondence;hypercolumns;diffusion models;generative model representations", "primary_area": "", "supplementary_material": "/attachment/e5ed406554f0f085c85c7ccdd6860762e9b7329e.pdf", "author": "Grace Luo;Lisa Dunlap;Dong Huk Park;Aleksander Holynski;Trevor Darrell", "authorids": "~Grace_Luo1;~Lisa_Dunlap1;~Dong_Huk_Park2;~Aleksander_Holynski1;~Trevor_Darrell2", "gender": "F;F;M;;M", "homepage": "https://people.eecs.berkeley.edu/~graceluo/;;;https://holynski.org;https://people.eecs.berkeley.edu/~trevor/", "dblp": "290/1388;;182/1826;230/7958;d/TrevorDarrell", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;_kJ-zUYAAAAJ;ypBMJMgAAAAJ;https://scholar.google.com.tw/citations?user=bh-uRFMAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Grace_Luo1;~Lisa_Dunlap1;~Dong_Huk_Park2;~Aleksander_Holynski1;~trevor_darrell1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Google DeepMind;Electrical Engineering & Computer Science Department", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;google.com;eecs.berkeley.edu", "position": "PhD student;PhD Student;PhD student;Researcher;Professor", "bibtex": "@inproceedings{\nluo2023diffusion,\ntitle={Diffusion Hyperfeatures: Searching Through Time and Space for Semantic Correspondence},\nauthor={Grace Luo and Lisa Dunlap and Dong Huk Park and Aleksander Holynski and Trevor Darrell},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Vm1zeYqwdc}\n}", "github": "", "project": "", "reviewers": "coKq;c4Xr;HmuP;LUzf;sN43", "pdf_size": 16837005, "rating": "5;5;5;5;6", "confidence": "2;5;4;4;4", "soundness": "2;2;3;3;3", "novelty": "2;2;2;2;2", "presentation": "2;1;3;3;4", "wc_summary": "123;71;78;55;141", "wc_strengths": "349;49;138;54;126", "wc_weaknesses": "303;414;85;119;94", "wc_questions": "219;3;68;6;202", "wc_limitations": "1;7;37;1;13", "wc_review": "995;544;406;235;576", "wc_reply_reviewers": "7;1055;395;0;136", "wc_reply_authors": "0;1406;512;0;0", "reply_reviewers": "1;4;1;0;1", "reply_authors": "1;4;2;1;1", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 93.6, 32.72674747053242 ], "wc_strengths_avg": [ 143.2, 109.09335451804569 ], "wc_weaknesses_avg": [ 203.0, 132.19833584429117 ], "wc_questions_avg": [ 99.6, 93.63033696404175 ], "wc_limitations_avg": [ 11.8, 13.362634470792052 ], "wc_review_avg": [ 551.2, 252.46417567647097 ], "wc_reply_reviewers_avg": [ 318.6, 394.9848604693602 ], "wc_reply_authors_avg": [ 383.6, 548.3129033681407 ], "reply_reviewers_avg": [ 1.4, 1.3564659966250538 ], "reply_authors_avg": [ 1.8, 1.1661903789690602 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.10206207261596577, "gs_citation": 126, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10094079962662524845&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "berkeley.edu;berkeley.edu;berkeley.edu;google.com;eecs.berkeley.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "University of California, Berkeley;Google;Electrical Engineering & Computer Science Department", "aff_unique_dep": ";Google DeepMind;Electrical Engineering & Computer Science", "aff_unique_url": "https://www.berkeley.edu;https://deepmind.com;", "aff_unique_abbr": "UC Berkeley;DeepMind;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;United Kingdom;" }, { "title": "SatBird: a Dataset for Bird Species Distribution Modeling using Remote Sensing and Citizen Science Data", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73571", "id": "Vn5qZGxGj3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ef7653bbc4655305efb89a32362e332a-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Vn5qZGxGj3", "openreview": "https://openreview.net/forum?id=Vn5qZGxGj3", "poster": "/media/PosterPDFs/NeurIPS%202023/73571.png?t=1700679065.2212121", "slides": "https://nips.cc/virtual/2023/poster/73571", "video": "https://nips.cc/virtual/2023/poster/73571", "author_site": "M\u00e9lisande Teng, Amna Elmustafa, Benjamin Akera, Yoshua Bengio, Hager Radi, Hugo Larochelle, David Rolnick", "tldr": "", "abstract": "Biodiversity is declining at an unprecedented rate, impacting ecosystem services necessary to ensure food, water, and human health and well-being. Understanding the distribution of species and their habitats is crucial for conservation policy planning. \nHowever, traditional methods in ecology for species distribution models (SDMs) generally focus either on narrow sets of species or narrow geographical areas and there remain significant knowledge gaps about the distribution of species. A major reason for this is the limited availability of data traditionally used, due to the prohibitive amount of effort and expertise required for traditional field monitoring. \nThe wide availability of remote sensing data and the growing adoption of citizen science tools to collect species observations data at low cost offer an opportunity for improving biodiversity monitoring and enabling the modelling of complex ecosystems. We introduce a novel task for mapping bird species to their habitats by predicting species encounter rates from satellite images, and present SatBird, a satellite dataset of locations in the USA with labels derived from presence-absence observation data from the citizen science database eBird, considering summer (breeding) and winter seasons. We also provide a dataset in Kenya representing low-data regimes. We additionally provide environmental data and species range maps for each location. We benchmark a set of baselines on our dataset, including SOTA models for remote sensing tasks. SatBird opens up possibilities for scalably modelling properties of ecosystems worldwide.", "keywords": "remote sensing;biodiversity monitoring;species distribution modelling;citizen science data", "primary_area": "", "supplementary_material": "/attachment/e679fd447b202b361ed8d74132059072ac9be44a.pdf", "author": "M\u00e9lisande Teng;Amna Elmustafa;Benjamin Akera;Yoshua Bengio;Hager Radi;Hugo Larochelle;David Rolnick", "authorids": "~M\u00e9lisande_Teng1;~Amna_Elmustafa1;~Benjamin_Akera1;~Yoshua_Bengio1;~Hager_Radi1;~Hugo_Larochelle1;~David_Rolnick1", "gender": "F;F;M;M;F;M;M", "homepage": ";;;http://yoshuabengio.org;https://hagerrady13.github.io;https://mila.quebec/en/directory/hugo-larochelle;http://www.davidrolnick.com/", "dblp": ";;;56/953;243/7195;86/3862.html;37/10718", "google_scholar": "eUNoxBMAAAAJ;;17ixOXkAAAAJ;kukA0LcAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=U89FHq4AAAAJ;P_luG3cAAAAJ", "orcid": ";;;;;;", "linkedin": ";amna-elmustafa-20ab8b11a/;;yoshuabengio/?originalSubdomain=ca;hagerrady13/;;", "or_profile": "~M\u00e9lisande_Teng1;~Amna_Elmustafa1;~Benjamin_Akera1;~Yoshua_Bengio1;~Hager_Radi1;~Hugo_Larochelle1;~David_Rolnick1", "aff": "Mila - Quebec Artificial Intelligence Institute;;McGill University;University of Montreal;Mila - Quebec Artificial Intelligence Institute;Google;McGill University", "aff_domain": "mila.quebec;;mcgill.ca;umontreal.ca;mila.quebec;google.com;cs.mcgill.ca", "position": "PhD student;;MS student;Full Professor;Researcher;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nteng2023satbird,\ntitle={SatBird: a Dataset for Bird Species Distribution Modeling using Remote Sensing and Citizen Science Data},\nauthor={M{\\'e}lisande Teng and Amna Elmustafa and Benjamin Akera and Yoshua Bengio and Hager Radi and Hugo Larochelle and David Rolnick},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Vn5qZGxGj3}\n}", "github": "", "project": "", "reviewers": "8dHr;Sbqv;d3tb;ntto;rCBg", "pdf_size": 7967483, "rating": "5;6;7;7;7", "confidence": "3;3;3;4;5", "wc_summary_and_contributions": "175;83;84;94;80", "wc_strengths": "45;111;70;64;61", "wc_improvement": "200;64;246;25;142", "wc_limitations": "102;67;32;234;5", "wc_correctness": "37;71;34;30;2", "wc_clarity": "3;25;9;20;1", "wc_relation_to_prior_work": "46;49;17;32;4", "wc_documentation": "13;6;13;34;3", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "622;477;506;534;299", "wc_reply_reviewers": "0;28;54;0;22", "wc_reply_authors": "870;792;716;351;260", "reply_reviewers": "0;1;1;0;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.8 ], "wc_summary_and_contributions_avg": [ 103.2, 36.20718160807328 ], "wc_strengths_avg": [ 70.2, 22.01272359341297 ], "wc_improvement_avg": [ 135.4, 82.1403676641394 ], "wc_limitations_avg": [ 88.0, 79.97249527181205 ], "wc_correctness_avg": [ 34.8, 21.976350925483512 ], "wc_clarity_avg": [ 11.6, 9.414881836751857 ], "wc_relation_to_prior_work_avg": [ 29.6, 17.118411141224527 ], "wc_documentation_avg": [ 13.8, 10.833282051160673 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 487.6, 106.05394853563915 ], "wc_reply_reviewers_avg": [ 20.8, 20.103730997006505 ], "wc_reply_authors_avg": [ 597.8, 245.2740508084783 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5625, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10020031335426246862&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mila.quebec;;mcgill.ca;umontreal.ca;mila.quebec;google.com;cs.mcgill.ca", "author_num": 7, "aff_unique_index": "0;1;2;0;3;1", "aff_unique_norm": "Quebec Artificial Intelligence Institute;McGill University;University of Montreal;Google", "aff_unique_dep": "Artificial Intelligence;;;Google", "aff_unique_url": "https://mila.quebec;https://www.mcgill.ca;https://wwwumontreal.ca;https://www.google.com", "aff_unique_abbr": "Mila;McGill;UM;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "Canada;United States" }, { "title": "Bounded rationality in structured density estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71438", "id": "VnfeOjR73Q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4f82088872dc8a91085f426f90bdd7dc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VnfeOjR73Q", "openreview": "https://openreview.net/forum?id=VnfeOjR73Q", "poster": "/media/PosterPDFs/NeurIPS%202023/71438.png?t=1702228892.2767282", "slides": "https://nips.cc/virtual/2023/poster/71438", "video": "https://nips.cc/virtual/2023/poster/71438", "author_site": "Tianyuan Teng, Kevin Li, Hang Zhang", "tldr": "", "abstract": "Learning to accurately represent environmental uncertainty is crucial for adaptive and optimal behaviors in various cognitive tasks. However, it remains unclear how the human brain, constrained by finite cognitive resources, constructs an internal model from an infinite space of probability distributions. In this study, we explore how these learned distributions deviate from the ground truth, resulting in observable inconsistency in a novel structured density estimation task. During each trial, human participants were asked to form and report the latent probability distribution functions underlying sequentially presented independent observations. As the number of observations increased, the reported predictive density became closer to the ground truth. Nevertheless, we observed an intriguing inconsistency in human structure estimation, specifically a large error in the number of reported clusters. Such inconsistency is invariant to the scale of the distribution and persists across stimulus modalities. We modeled uncertainty learning as approximate Bayesian inference in a nonparametric mixture prior of distributions. Human reports were best explained under resource rationality embodied in a decaying tendency towards model expansion. Our study offers insights into human cognitive processes under uncertainty and lays the groundwork for further exploration of resource-rational representations in the brain under more complex tasks.", "keywords": "human representation of uncertainty; Bayesian inference; bounded rationality; inductive bias; Chinese Restaurant Process", "primary_area": "", "supplementary_material": "/attachment/bbea652ff5c3b4f55e23055266b491fc8f5e2bb6.pdf", "author": "Tianyuan Teng;Li Kevin Wenliang;Hang Zhang", "authorids": "~Tianyuan_Teng1;~Li_Kevin_Wenliang1;~Hang_Zhang14", "gender": "M;;F", "homepage": "https://www.researchgate.net/profile/Teng-Tianyuan;https://kevin-w-li.github.io/;http://psy.pku.edu.cn/english/people/faculty/professor/hangzhang/index.htm", "dblp": "369/7112;255/7009;", "google_scholar": ";https://scholar.google.co.uk/citations?user=MW45NMEAAAAJ;6X8WtMgAAAAJ", "orcid": ";;0000-0002-9771-0660", "linkedin": ";;", "or_profile": "~Tianyuan_Teng1;~Li_Kevin_Wenliang1;~Hang_Zhang14", "aff": "Peking University;Google DeepMind;Peking University", "aff_domain": "pku.edu.cn;deepmind.com;pku.edu.cn", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nteng2023bounded,\ntitle={Bounded rationality in structured density estimation},\nauthor={Tianyuan Teng and Li Kevin Wenliang and Hang Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VnfeOjR73Q}\n}", "github": "", "project": "", "reviewers": "ziH7;8bgA;ti72;Ds9y;Xmkb", "pdf_size": 5146194, "rating": "5;7;7;7;8", "confidence": "2;3;4;3;4", "soundness": "2;3;3;2;3", "novelty": "2;3;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "57;193;217;92;184", "wc_strengths": "10;65;75;90;175", "wc_weaknesses": "17;321;213;71;219", "wc_questions": "372;188;194;193;274", "wc_limitations": "30;11;33;44;68", "wc_review": "486;778;732;490;920", "wc_reply_reviewers": "54;121;45;20;27", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 148.6, 62.44549623471656 ], "wc_strengths_avg": [ 83.0, 53.34791467339656 ], "wc_weaknesses_avg": [ 168.2, 109.76775482809147 ], "wc_questions_avg": [ 244.2, 71.44340417421331 ], "wc_limitations_avg": [ 37.2, 18.712562625145708 ], "wc_review_avg": [ 681.2, 169.49147471185682 ], "wc_reply_reviewers_avg": [ 53.4, 35.92547842409339 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8728715609439696, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:N7txRMyKvL8J:scholar.google.com/&scioq=Bounded+rationality+in+structured+density+estimation&hl=en&as_sdt=0,33", "gs_version_total": 6, "email": "pku.edu.cn;deepmind.com;pku.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Peking University;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "http://www.pku.edu.cn;https://deepmind.com", "aff_unique_abbr": "Peking U;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Fine-Tuning Language Models with Just Forward Passes", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71437", "id": "Vota6rFhBQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a627810151be4d13f907ac898ff7e948-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Vota6rFhBQ", "openreview": "https://openreview.net/forum?id=Vota6rFhBQ", "poster": "/media/PosterPDFs/NeurIPS%202023/71437.png?t=1701675686.1453338", "slides": "https://nips.cc/virtual/2023/poster/71437", "video": "https://nips.cc/virtual/2023/poster/71437", "author_site": "Sadhika Malladi, Tianyu Gao, Eshaan Nichani, Alex Damian, Jason Lee, Danqi Chen, Sanjeev Arora", "tldr": "", "abstract": "Fine-tuning language models (LMs) has yielded success on diverse downstream tasks, but as LMs grow in size, backpropagation requires a prohibitively large amount of memory. Zeroth-order (ZO) methods can in principle estimate gradients using only two forward passes but are theorized to be catastrophically slow for optimizing large models. In this work, we propose a memory-efficient zerothorder optimizer (MeZO), adapting the classical ZO-SGD method to operate in-place, thereby fine-tuning LMs with the same memory footprint as inference. For example, with a single A100 80GB GPU, MeZO can train a 30-billion parameter model, whereas fine-tuning with backpropagation can train only a 2.7B LM with the same budget. We conduct comprehensive experiments across model types (masked and autoregressive LMs), model scales (up to 66B), and downstream tasks (classification, multiple-choice, and generation). Our results demonstrate that (1) MeZO significantly outperforms in-context learning and linear probing; (2) MeZO achieves comparable performance to fine-tuning with backpropagation across multiple tasks, with up to 12\u00d7 memory reduction and up to 2\u00d7 GPU-hour reduction in our implementation; (3) MeZO is compatible with both full-parameter and parameter-efficient tuning techniques such as LoRA and prefix tuning; (4) MeZO can effectively optimize non-differentiable objectives (e.g., maximizing accuracy or F1). We support our empirical findings with theoretical insights, highlighting how adequate pre-training and task prompts enable MeZO to fine-tune huge models, despite classical ZO analyses suggesting otherwise.", "keywords": "language models;fine-tuning;zeroth order optimization;memory efficiency", "primary_area": "", "supplementary_material": "/attachment/ee7393dc6772e185bedd07302ab82308bb3409b0.zip", "author": "Sadhika Malladi;Tianyu Gao;Eshaan Nichani;Alex Damian;Jason D. Lee;Danqi Chen;Sanjeev Arora", "authorids": "~Sadhika_Malladi2;~Tianyu_Gao1;~Eshaan_Nichani1;~Alex_Damian1;~Jason_D._Lee1;~Danqi_Chen1;~Sanjeev_Arora1", "gender": "F;M;;M;M;F;", "homepage": "https://www.cs.princeton.edu/~smalladi/;https://gaotianyu.xyz/about/;https://eshaannichani.com/;https://web.math.princeton.edu/~ad27/;https://jasondlee88.github.io/;https://www.cs.princeton.edu/~danqic/;http://www.cs.princeton.edu/~arora/", "dblp": "176/9810;207/8893-1.html;260/6510;;88/3262;87/7949;a/SArora", "google_scholar": "9HCmTcwAAAAJ;il-F8YYAAAAJ;;YvHcBcEAAAAJ;GR_DsT0AAAAJ;sVR8ktkAAAAJ;RUP4S68AAAAJ", "orcid": ";0000-0002-5178-0866;;;;;", "linkedin": ";;;;;;", "or_profile": "~Sadhika_Malladi2;~Tianyu_Gao1;~Eshaan_Nichani1;~Alex_Damian1;~Jason_D._Lee1;~Danqi_Chen1;~Sanjeev_Arora1", "aff": "Microsoft Research;Princeton University;Princeton University;Princeton University;Princeton University;Princeton University;Princeton University", "aff_domain": "research.microsoft.com;princeton.edu;princeton.edu;princeton.edu;princeton.edu;cs.princeton.edu;princeton.edu", "position": "Intern;PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nmalladi2023finetuning,\ntitle={Fine-Tuning Language Models with Just Forward Passes},\nauthor={Sadhika Malladi and Tianyu Gao and Eshaan Nichani and Alex Damian and Jason D. Lee and Danqi Chen and Sanjeev Arora},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Vota6rFhBQ}\n}", "github": "", "project": "", "reviewers": "XvSx;Qhm1;unzy;x4kH;qocq", "pdf_size": 595235, "rating": "7;7;7;8;8", "confidence": "4;4;4;4;4", "soundness": "4;3;3;4;4", "novelty": "4;4;4;4;3", "presentation": "4;4;3;4;4", "wc_summary": "152;73;97;68;100", "wc_strengths": "67;86;147;84;100", "wc_weaknesses": "163;49;459;19;64", "wc_questions": "90;35;92;58;20", "wc_limitations": "1;22;42;17;36", "wc_review": "473;265;837;246;320", "wc_reply_reviewers": "5;5;6;2;6", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.4, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.8, 0.39999999999999997 ], "presentation_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_avg": [ 98.0, 29.819456735494025 ], "wc_strengths_avg": [ 96.8, 27.198529372008334 ], "wc_weaknesses_avg": [ 150.8, 161.49600614256687 ], "wc_questions_avg": [ 59.0, 28.802777643831508 ], "wc_limitations_avg": [ 23.6, 14.485855169785456 ], "wc_review_avg": [ 428.2, 219.39133984731484 ], "wc_reply_reviewers_avg": [ 4.8, 1.469693845669907 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 238, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16654609203453704723&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "research.microsoft.com;princeton.edu;princeton.edu;princeton.edu;princeton.edu;cs.princeton.edu;princeton.edu", "author_num": 7, "aff_unique_index": "0;1;1;1;1;1;1", "aff_unique_norm": "Microsoft;Princeton University", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.princeton.edu", "aff_unique_abbr": "MSR;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Chanakya: Learning Runtime Decisions for Adaptive Real-Time Perception", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71436", "id": "VpCjozUOM2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ae2d574d2c309f3a45880e4460efd176-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VpCjozUOM2", "openreview": "https://openreview.net/forum?id=VpCjozUOM2", "poster": "/media/PosterPDFs/NeurIPS%202023/71436.png?t=1701715317.4681575", "slides": "https://nips.cc/virtual/2023/poster/71436", "video": "https://nips.cc/virtual/2023/poster/71436", "author_site": "Anurag Ghosh, Vaibhav Balloli, Akshay Nambi, Aditya Singh, Tanuja Ganu", "tldr": "", "abstract": "Real-time perception requires planned resource utilization. Computational planning in real-time perception is governed by two considerations -- accuracy and latency. There exist run-time decisions (e.g. choice of input resolution) that induce tradeoffs affecting performance on a given hardware, arising from intrinsic (content, e.g. scene clutter) and extrinsic (system, e.g. resource contention) characteristics. \n\nEarlier runtime execution frameworks employed rule-based decision algorithms and operated with a fixed algorithm latency budget to balance these concerns, which is sub-optimal and inflexible. We propose Chanakya, a learned approximate execution framework that naturally derives from the streaming perception paradigm, to automatically learn decisions induced by these tradeoffs instead. Chanakya is trained via novel rewards balancing accuracy and latency implicitly, without approximating either objectives. Chanakya simultaneously considers intrinsic and extrinsic context, and predicts decisions in a flexible manner. Chanakya, designed with low overhead in mind, outperforms state-of-the-art static and dynamic execution policies on public datasets on both server GPUs and edge devices.", "keywords": "approximate execution framework; real time perception; latency-accuracy tradeoffs", "primary_area": "", "supplementary_material": "/attachment/3e2e314ae1699bb2a7dc2692e6e75708f2e108c3.pdf", "author": "Anurag Ghosh;Vaibhav Balloli;Akshay Nambi;Aditya Singh;Tanuja Ganu", "authorids": "~Anurag_Ghosh1;~Vaibhav_Balloli2;~Akshay_Nambi1;~Aditya_Singh1;~Tanuja_Ganu1", "gender": "M;;;M;", "homepage": "http://anuragxel.github.io/;;;;https://www.microsoft.com/en-us/research/people/taganu/", "dblp": "02/7988;;;;31/11538", "google_scholar": "zd0-SNQAAAAJ;;;;https://scholar.google.co.in/citations?user=uU9COWkAAAAJ", "orcid": ";;;;", "linkedin": "anuragxel/;;;;", "or_profile": "~Anurag_Ghosh1;~Vaibhav_Balloli2;~Akshay_Nambi1;~Aditya_Singh1;~Tanuja_Ganu1", "aff": "Carnegie Mellon University;;;;Microsoft", "aff_domain": "cmu.edu;;;;microsoft.com", "position": "MS student;;;;Researcher", "bibtex": "@inproceedings{\nghosh2023chanakya,\ntitle={Chanakya: Learning Runtime Decisions for Adaptive Real-Time Perception},\nauthor={Anurag Ghosh and Vaibhav Balloli and Akshay Nambi and Aditya Singh and Tanuja Ganu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VpCjozUOM2}\n}", "github": "", "project": "", "reviewers": "fYnV;L1Mt;Y14P;6nGS;7j3D;Gsqp", "pdf_size": 2132793, "rating": "5;5;5;6;6;6", "confidence": "2;1;3;3;3;4", "soundness": "3;3;3;3;2;2", "novelty": "3;2;2;3;2;2", "presentation": "3;2;2;3;3;3", "wc_summary": "40;39;53;57;114;49", "wc_strengths": "39;28;43;53;52;19", "wc_weaknesses": "27;140;35;33;83;32", "wc_questions": "16;23;69;26;34;111", "wc_limitations": "24;50;1;14;8;8", "wc_review": "146;280;201;183;291;219", "wc_reply_reviewers": "9;36;13;10;12;29", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 58.666666666666664, 25.577768124334504 ], "wc_strengths_avg": [ 39.0, 12.26104943849968 ], "wc_weaknesses_avg": [ 58.333333333333336, 41.108258159266356 ], "wc_questions_avg": [ 46.5, 33.48009358808106 ], "wc_limitations_avg": [ 17.5, 16.142593761020354 ], "wc_review_avg": [ 220.0, 51.39390365922143 ], "wc_reply_reviewers_avg": [ 18.166666666666668, 10.41499986664533 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4152594370896275290&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "cmu.edu;;;;microsoft.com", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Carnegie Mellon University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.cmu.edu;https://www.microsoft.com", "aff_unique_abbr": "CMU;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Patch n\u2019 Pack: NaViT, a Vision Transformer for any Aspect Ratio and Resolution", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71435", "id": "VpGFHmI7e5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/06ea400b9b7cfce6428ec27a371632eb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VpGFHmI7e5", "openreview": "https://openreview.net/forum?id=VpGFHmI7e5", "poster": "/media/PosterPDFs/NeurIPS%202023/71435.png?t=1702507126.1999793", "slides": "https://nips.cc/virtual/2023/poster/71435", "video": "https://nips.cc/virtual/2023/poster/71435", "author_site": "Mostafa Dehghani, Basil Mustafa, Josip Djolonga, Jonathan Heek, Matthias Minderer, Mathilde Caron, Andreas Steiner, Andreas Steiner, Joan Puigcerver, Robert Geirhos, Ibrahim Alabdulmohsin, Avital Oliver, Piotr Padlewski, Piotr Padlewski, Alexey Gritsenko, Mario Lucic, Neil Houlsby", "tldr": "", "abstract": "The ubiquitous and demonstrably suboptimal choice of resizing images to a fixed resolution before processing them with computer vision models has not yet been successfully challenged. However, models such as the Vision Transformer (ViT) offer flexible sequence-based modeling, and hence varying input sequence lengths. We take advantage of this with NaViT (Native Resolution ViT) which uses sequence packing during training to process inputs of arbitrary resolutions and aspect ratios. Alongside flexible model usage, we demonstrate improved training efficiency for large-scale supervised and contrastive image-text pretraining.\nNaViT can be efficiently transferred to standard tasks such as image and video classification, object detection, and semantic segmentation and leads to improved results on robustness and fairness benchmarks. At inference time, the input resolution flexibility can be used to smoothly navigate the test-time cost-performance trade-off. We believe that NaViTmarks a departure from the standard, CNN-designed, input and modelling pipeline used by most computer vision models, and represents a promising direction for ViTs.", "keywords": "Vision Transformer;variable aspect ratio;flexible inference;efficient training", "primary_area": "", "supplementary_material": "/attachment/801b1d8ade6a43939d2c30231d3cb759f8c74bd8.pdf", "author": "Mostafa Dehghani;Basil Mustafa;Josip Djolonga;Jonathan Heek;Matthias Minderer;Mathilde Caron;Andreas Peter Steiner;Joan Puigcerver;Robert Geirhos;Ibrahim Alabdulmohsin;Avital Oliver;Piotr Padlewski;Alexey A. Gritsenko;Mario Lucic;Neil Houlsby", "authorids": "~Mostafa_Dehghani1;~Basil_Mustafa1;~Josip_Djolonga2;~Jonathan_Heek1;~Matthias_Minderer1;~Mathilde_Caron1;~Andreas_Peter_Steiner1;~Joan_Puigcerver1;~Robert_Geirhos1;~Ibrahim_Alabdulmohsin1;~Avital_Oliver1;~Piotr_Padlewski1;~Alexey_A._Gritsenko1;~Mario_Lucic1;~Neil_Houlsby1", "gender": "M;M;M;;M;F;M;M;M;M;;;M;M;Not Specified", "homepage": "http://mostafadehghani.com/;https://www.basilmustafa.com/;;;https://mjlm.github.io/;;;http://www.jpuigcerver.net;https://robertgeirhos.com/;http://ibomohsin.com;;;http://lucic.ai;https://neilhoulsby.github.io/;", "dblp": "125/4062;;139/1342;247/1004;243/3155;223/4085;s/AndreasSteiner;155/3271;176/0076;153/5393;;210/6394;155/1945;91/10669;30/11478", "google_scholar": "https://scholar.google.nl/citations?user=MiHOX3QAAAAJ;https://scholar.google.co.uk/citations?user=LuxZAJwAAAAJ;;;57BFBY0AAAAJ;;;https://scholar.google.com/citations?hl=en;w3kGtMIAAAAJ;8WNMsPYAAAAJ;Suu45K8AAAAJ;QnU4nRAAAAAJ;SzZRlcMAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.nl/citations?user=zTy9cUwAAAAJ", "orcid": ";;;;0000-0002-6428-8256;;;;0000-0001-7698-3187;;;0000-0001-5107-0824;;;", "linkedin": ";basil-mustafa/;;;;;andreas-steiner-1859223b/;;rgeirhos/;;avitaloliver;piotr-padlewski/;;;agritsenko/", "or_profile": "~Mostafa_Dehghani1;~Basil_Mustafa1;~Josip_Djolonga2;~Jonathan_Heek1;~Matthias_Minderer1;~Mathilde_Caron1;~Andreas_Peter_Steiner1;~Joan_Puigcerver1;~Robert_Geirhos1;~Ibrahim_Alabdulmohsin1;~Avital_Oliver1;~Piotr_Padlewski1;~Mario_Lucic1;~Neil_Houlsby1;~Alexey_Alexeevich_Gritsenko1", "aff": "Google DeepMind;Google;Google;Google;Google;Google;Google DeepMind;Google;Google DeepMind;Google;Research, Google;Google;Google;Google;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;deepmind.com;google.com;google.com;google.com;research.google.com;google.com;deepmind.com;google.com;google.com", "position": "Research Scientist;Research Software Engineer;Research Engineer;Software Engineer;Researcher;Researcher;Research Engineer;Software Engineer in Research;Research Scientist;Research Scientist;Researcher;Software Engineer;Senior Staff Research Scientist;Researcher;Researcher", "bibtex": "@inproceedings{\ndehghani2023patch,\ntitle={Patch n{\\textquoteright} Pack: NaViT, a Vision Transformer for any Aspect Ratio and Resolution},\nauthor={Mostafa Dehghani and Basil Mustafa and Josip Djolonga and Jonathan Heek and Matthias Minderer and Mathilde Caron and Andreas Peter Steiner and Joan Puigcerver and Robert Geirhos and Ibrahim Alabdulmohsin and Avital Oliver and Piotr Padlewski and Alexey A. Gritsenko and Mario Lucic and Neil Houlsby},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VpGFHmI7e5}\n}", "github": "", "project": "", "reviewers": "CXkP;SsJZ;z6Gn;WGZP", "pdf_size": 1362460, "rating": "5;6;6;6", "confidence": "4;4;4;3", "soundness": "2;3;3;4", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "108;84;59;155", "wc_strengths": "106;64;48;83", "wc_weaknesses": "214;76;82;226", "wc_questions": "319;26;8;5", "wc_limitations": "10;1;1;64", "wc_review": "757;251;198;533", "wc_reply_reviewers": "204;0;23;160", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 101.5, 35.415392133929565 ], "wc_strengths_avg": [ 75.25, 21.649191670822262 ], "wc_weaknesses_avg": [ 149.5, 70.65939427988327 ], "wc_questions_avg": [ 89.5, 132.7450564051257 ], "wc_limitations_avg": [ 19.0, 26.239283526803852 ], "wc_review_avg": [ 434.75, 225.45107562395881 ], "wc_reply_reviewers_avg": [ 96.75, 87.0384254223386 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 108, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2081084367415143760&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "google.com;google.com;google.com;google.com;google.com;google.com;deepmind.com;google.com;google.com;google.com;research.google.com;google.com;deepmind.com;google.com;google.com", "author_num": 15, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1;1;1;1;1;1;1;1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;1;1;0;1;0;1;1;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Does Graph Distillation See Like Vision Dataset Counterpart?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71434", "id": "VqIWgUVsXc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a6efa49c54bedf4411f1bcd32f15937a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VqIWgUVsXc", "openreview": "https://openreview.net/forum?id=VqIWgUVsXc", "poster": "/media/PosterPDFs/NeurIPS%202023/71434.png?t=1701598178.2323422", "slides": "https://nips.cc/virtual/2023/poster/71434", "video": "https://nips.cc/virtual/2023/poster/71434", "author_site": "Beining Yang, Kai Wang, Qingyun Sun, Cheng Ji, Xingcheng Fu, Hao Tang, Yang You, Jianxin Li", "tldr": "", "abstract": "Training on large-scale graphs has achieved remarkable results in graph representation learning, but its cost and storage have attracted increasing concerns. Existing graph condensation methods primarily focus on optimizing the feature matrices of condensed graphs while overlooking the impact of the structure information from the original graphs. To investigate the impact of the structure information, we conduct analysis from the spectral domain and empirically identify substantial Laplacian Energy Distribution (LED) shifts in previous works. Such shifts lead to poor performance in cross-architecture generalization and specific tasks, including anomaly detection and link prediction. In this paper, we propose a novel Structure-broadcasting Graph Dataset Distillation (\\textbf{SGDD}) scheme for broadcasting the original structure information to the generation of the synthetic one, which explicitly prevents overlooking the original structure information. \nTheoretically, the synthetic graphs by SGDD are expected to have smaller LED shifts than previous works, leading to superior performance in both cross-architecture settings and specific tasks.\nWe validate the proposed SGDD~across 9 datasets and achieve state-of-the-art results on all of them: for example, on YelpChi dataset, our approach maintains 98.6\\% test accuracy of training on the original graph dataset with 1,000 times saving on the scale of the graph. Moreover, we empirically evaluate there exist 17.6\\% $\\sim$ 31.4\\% reductions in LED shift crossing 9 datasets. Extensive experiments and analysis verify the effectiveness and necessity of the proposed designs. The code will be made public.", "keywords": "data-efficient learning;graph generation;graph neural networks", "primary_area": "", "supplementary_material": "/attachment/14e03dd1caa5a20fb1d770f1588344a66fa142e0.pdf", "author": "Beining Yang;Kai Wang;Qingyun Sun;Cheng Ji;Xingcheng Fu;Hao Tang;Yang You;Jianxin Li", "authorids": "~Beining_Yang1;~Kai_Wang8;~Qingyun_Sun2;~Cheng_Ji1;~Xingcheng_Fu1;~Hao_Tang6;~Yang_You1;~Jianxin_Li3", "gender": ";M;F;M;M;M;M;M", "homepage": ";https://kaiwang960112.github.io/;https://sunqysunqy.github.io/;https://scholar.google.com/citations?hl=en&user=fRAeIZAAAAAJ;https://fuxingcheng.github.io/;https://ha0tang.github.io/;https://www.comp.nus.edu.sg/~youy/;http://myjianxin.github.io", "dblp": ";78/2022-36;;32/598-1.html;236/7003;07/5751-5;33/8167-1.html;l/JianxinLi-2.html", "google_scholar": ";i2II0XIAAAAJ;e2oYBzUAAAAJ;https://scholar.google.com/citations?hl=en;gN4tbgMAAAAJ;9zJkeEMAAAAJ;jF4dPZwAAAAJ;EY2lqD0AAAAJ", "orcid": ";0000-0002-1154-5175;;0000-0003-2513-3822;0000-0002-4643-8126;0000-0002-2077-1246;;0000-0001-5152-0055", "linkedin": ";;;;;hao-tang-887475138/;yang-you-0b92914b/;", "or_profile": "~Beining_Yang1;~Kai_Wang8;~Qingyun_Sun2;~Cheng_Ji1;~Xingcheng_Fu1;~Hao_Tang6;~Yang_You1;~Jianxin_Li3", "aff": ";National University of Singapore;Beihang University;Beihang University;Beihang University;ETH Zurich;National University of Singapore;Beihang University ", "aff_domain": ";u.nus.edu;buaa.edu.cn;buaa.edu.cn;act.buaa.edu.cn;vision.ee.ethz.ch;nus.edu.sg;buaa.edu.cn", "position": ";PhD student;Assistant Professor;PhD student;PhD student;Postdoc;Professor;Full Professor", "bibtex": "@inproceedings{\nyang2023does,\ntitle={Does Graph Distillation See Like Vision Dataset Counterpart?},\nauthor={Beining Yang and Kai Wang and Qingyun Sun and Cheng Ji and Xingcheng Fu and Hao Tang and Yang You and Jianxin Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VqIWgUVsXc}\n}", "github": "", "project": "", "reviewers": "FmUF;9hTZ;5ivm;ZZ8V;UGAQ", "pdf_size": 0, "rating": "4;4;5;6;7", "confidence": "3;4;3;2;4", "soundness": "2;3;2;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;2;4;3", "wc_summary": "16;64;95;54;59", "wc_strengths": "19;135;23;73;230", "wc_weaknesses": "239;163;215;18;99", "wc_questions": "2;14;5;7;57", "wc_limitations": "2;1;1;5;37", "wc_review": "278;377;339;157;482", "wc_reply_reviewers": "0;11;31;29;105", "wc_reply_authors": "462;405;145;295;53", "reply_reviewers": "0;1;1;1;1", "reply_authors": "4;5;4;3;2", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 57.6, 25.239651344660054 ], "wc_strengths_avg": [ 96.0, 79.07464827617004 ], "wc_weaknesses_avg": [ 146.8, 80.36018914860766 ], "wc_questions_avg": [ 17.0, 20.386269889315212 ], "wc_limitations_avg": [ 9.2, 13.977124167724918 ], "wc_review_avg": [ 326.6, 107.67469526309327 ], "wc_reply_reviewers_avg": [ 35.2, 36.74996598637882 ], "wc_reply_authors_avg": [ 272.0, 153.88827115800606 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 3.6, 1.019803902718557 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.04583492485141061, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15777633230572763843&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";u.nus.edu;buaa.edu.cn;buaa.edu.cn;act.buaa.edu.cn;vision.ee.ethz.ch;nus.edu.sg;buaa.edu.cn", "author_num": 8, "aff_unique_index": "0;1;1;1;2;0;1", "aff_unique_norm": "National University of Singapore;Beihang University;ETH Zurich", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;http://www.buaa.edu.cn/;https://www.ethz.ch", "aff_unique_abbr": "NUS;BUAA;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;2;0;1", "aff_country_unique": "Singapore;China;Switzerland" }, { "title": "Autonomous Capability Assessment of Sequential Decision-Making Systems in Stochastic Settings", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71433", "id": "VqclD6Nfaj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/abbb7f20cdffdd3bb7d98447f60b0b0c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VqclD6Nfaj", "openreview": "https://openreview.net/forum?id=VqclD6Nfaj", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71433", "video": "https://nips.cc/virtual/2023/poster/71433", "author_site": "Pulkit Verma, Rushang Karia, Siddharth Srivastava", "tldr": "", "abstract": "It is essential for users to understand what their AI systems can and can't do in order to use them safely. However, the problem of enabling users to assess AI systems with sequential decision-making (SDM) capabilities is relatively understudied. This paper presents a new approach for modeling the capabilities of black-box AI systems that can plan and act, along with the possible effects and requirements for executing those capabilities in stochastic settings. We present an active-learning approach that can effectively interact with a black-box SDM system and learn an interpretable probabilistic model describing its capabilities. Theoretical analysis of the approach identifies the conditions under which the learning process is guaranteed to converge to the correct model of the agent; empirical evaluations on different agents and simulated scenarios show that this approach is few-shot generalizable and can effectively describe the capabilities of arbitrary black-box SDM agents in a sample-efficient manner.", "keywords": "Sequential Decision Making;Interpretable Models;Relational Model Learning;Black-Box Agents;Symbolic Descriptions", "primary_area": "", "supplementary_material": "", "author": "Pulkit Verma;Rushang Karia;Siddharth Srivastava", "authorids": "~Pulkit_Verma1;~Rushang_Karia1;~Siddharth_Srivastava2", "gender": "M;M;", "homepage": "https://pulkitverma.net;https://rushangkaria.github.io/;", "dblp": "170/4800;270/2100;", "google_scholar": "5lg9EsoAAAAJ;;", "orcid": "0000-0002-8770-5390;;", "linkedin": "pulkitverma25;https://linkedin.com/in/rushangkaria;", "or_profile": "~Pulkit_Verma1;~Rushang_Karia1;~Siddharth_Srivastava2", "aff": "Meta;Arizona State University;", "aff_domain": "meta.com;asu.edu;", "position": "Intern;PhD student;", "bibtex": "@inproceedings{\nverma2023autonomous,\ntitle={Autonomous Capability Assessment of Sequential Decision-Making Systems in Stochastic Settings},\nauthor={Pulkit Verma and Rushang Karia and Siddharth Srivastava},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VqclD6Nfaj}\n}", "github": "", "project": "", "reviewers": "k56H;Sjai;Jehr;pN4U;Gtv9", "pdf_size": 1083127, "rating": "5;5;5;5;6", "confidence": "2;4;4;4;1", "soundness": "3;3;2;3;3", "novelty": "2;3;2;3;2", "presentation": "3;2;3;3;3", "wc_summary": "68;179;140;89;114", "wc_strengths": "206;128;104;49;27", "wc_weaknesses": "7;129;722;120;23", "wc_questions": "363;135;204;24;36", "wc_limitations": "8;25;46;19;5", "wc_review": "652;596;1216;301;205", "wc_reply_reviewers": "82;66;568;47;42", "wc_reply_authors": "16;25;870;32;16", "reply_reviewers": "1;1;3;1;1", "reply_authors": "2;2;3;2;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.0, 1.2649110640673518 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 118.0, 38.889587295315955 ], "wc_strengths_avg": [ 102.8, 63.12970774524463 ], "wc_weaknesses_avg": [ 200.2, 265.51941548594897 ], "wc_questions_avg": [ 152.4, 124.38102749213806 ], "wc_limitations_avg": [ 20.6, 14.62326912834473 ], "wc_review_avg": [ 594.0, 354.2377732540673 ], "wc_reply_reviewers_avg": [ 161.0, 203.99607839367894 ], "wc_reply_authors_avg": [ 191.8, 339.1532986718543 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7905694150420948, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2149422888565432802&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 12, "email": "meta.com;asu.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Meta;Arizona State University", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.asu.edu", "aff_unique_abbr": "Meta;ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "AdaptSSR: Pre-training User Model with Augmentation-Adaptive Self-Supervised Ranking", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71432", "id": "VsbrdJpwpT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5e0da5da69b71349ae0bd7ad716e4bc9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VsbrdJpwpT", "openreview": "https://openreview.net/forum?id=VsbrdJpwpT", "poster": "/media/PosterPDFs/NeurIPS%202023/71432.png?t=1698323972.2534196", "slides": "https://nips.cc/virtual/2023/poster/71432", "video": "https://nips.cc/virtual/2023/poster/71432", "author_site": "Yang Yu, Qi Liu, Kai Zhang, Yuren Zhang, Chao Song, Min Hou, Yuqing Yuan, Zhihao Ye, ZAIXI ZHANG, Sanshi Lei Yu", "tldr": "", "abstract": "User modeling, which aims to capture users' characteristics or interests, heavily relies on task-specific labeled data and suffers from the data sparsity issue. Several recent studies tackled this problem by pre-training the user model on massive user behavior sequences with a contrastive learning task. Generally, these methods assume different views of the same behavior sequence constructed via data augmentation are semantically consistent, i.e., reflecting similar characteristics or interests of the user, and thus maximizing their agreement in the feature space. However, due to the diverse interests and heavy noise in user behaviors, existing augmentation methods tend to lose certain characteristics of the user or introduce noisy behaviors. Thus, forcing the user model to directly maximize the similarity between the augmented views may result in a negative transfer. To this end, we propose to replace the contrastive learning task with a new pretext task: Augmentation-Adaptive SelfSupervised Ranking (AdaptSSR), which alleviates the requirement of semantic consistency between the augmented views while pre-training a discriminative user model. Specifically, we adopt a multiple pairwise ranking loss which trains the user model to capture the similarity orders between the implicitly augmented view, the explicitly augmented view, and views from other users. We further employ an in-batch hard negative sampling strategy to facilitate model training. Moreover, considering the distinct impacts of data augmentation on different behavior sequences, we design an augmentation-adaptive fusion mechanism to automatically adjust the similarity order constraint applied to each sample based on the estimated similarity between the augmented views. Extensive experiments on both public and industrial datasets with six downstream tasks verify the effectiveness of AdaptSSR.", "keywords": "User Model Pre-training;Data Augmentation;Contrastive Learning", "primary_area": "", "supplementary_material": "/attachment/d1581947b4604d8ba554fd13cbe723d6ea3fc5cf.pdf", "author": "Yang Yu;Qi Liu;Kai Zhang;Yuren Zhang;Chao Song;Min Hou;Yuqing Yuan;ZHIhao Ye;ZAIXI ZHANG;Sanshi Lei Yu", "authorids": "~Yang_Yu18;~Qi_Liu3;~Kai_Zhang12;~Yuren_Zhang2;~Chao_Song2;~Min_Hou1;~Yuqing_Yuan1;~ZHIhao_Ye2;~ZAIXI_ZHANG2;~Sanshi_Lei_Yu1", "gender": "M;M;M;M;M;F;M;;M;M", "homepage": "https://yflyl613.github.io/;http://staff.ustc.edu.cn/~qiliuql/;http://home.ustc.edu.cn/~sa517494/;;https://scholar.google.com/citations?user=cm1ZxAUAAAAJ&hl=en;https://scholar.google.com/citations?user=ENdvgjYAAAAJ&hl=zh-CN;https://github.com/sameul-yuan;https://github.com/yzhihao;http://home.ustc.edu.cn/~zaixi/;https://yusanshi.com/", "dblp": "46/2181;95/2446-3;55/957-38;270/6517;;15/6301-1;;;267/9295.html;336/3909", "google_scholar": "-DwIl3IAAAAJ;5EoHAFwAAAAJ;t6IIpAUAAAAJ;BJvX3rYAAAAJ;cm1ZxAUAAAAJ;ENdvgjYAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;dE3GoNsAAAAJ", "orcid": ";0000-0001-6956-5550;0000-0001-5335-2470;0000-0002-8758-906X;;0000-0002-0524-6806;;;;0000-0001-9393-1397", "linkedin": ";;;;;;;;;", "or_profile": "~Yang_Yu18;~Qi_Liu3;~Kai_Zhang12;~Yuren_Zhang2;~Chao_Song2;~Min_Hou1;~Yuqing_Yuan1;~ZHIhao_Ye2;~ZAIXI_ZHANG2;~Sanshi_Lei_Yu1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;;University of Science and Technology of China;;;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn;;;ustc.edu.cn;ustc.edu.cn", "position": "MS student;Full Professor;Researcher;PhD student;;PhD student;;;PhD student;MS student", "bibtex": "@inproceedings{\nyu2023adaptssr,\ntitle={Adapt{SSR}: Pre-training User Model with Augmentation-Adaptive Self-Supervised Ranking},\nauthor={Yang Yu and Qi Liu and Kai Zhang and Yuren Zhang and Chao Song and Min Hou and Yuqing Yuan and ZHIhao Ye and ZAIXI ZHANG and Sanshi Lei Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VsbrdJpwpT}\n}", "github": "", "project": "", "reviewers": "8cxi;toy8;P6ts;oeWK", "pdf_size": 3953716, "rating": "4;5;6;6", "confidence": "5;4;4;4", "soundness": "1;3;3;3", "novelty": "2;2;3;3", "presentation": "1;3;3;3", "wc_summary": "87;72;138;118", "wc_strengths": "30;20;38;121", "wc_weaknesses": "222;53;49;161", "wc_questions": "46;21;30;6", "wc_limitations": "9;14;32;2", "wc_review": "394;180;287;408", "wc_reply_reviewers": "0;11;24;19", "wc_reply_authors": "141;108;39;44", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 103.75, 25.810608284191986 ], "wc_strengths_avg": [ 52.25, 40.20183453525473 ], "wc_weaknesses_avg": [ 121.25, 73.49957482870224 ], "wc_questions_avg": [ 25.75, 14.49784466739798 ], "wc_limitations_avg": [ 14.25, 11.098986440211556 ], "wc_review_avg": [ 317.25, 92.03090513517728 ], "wc_reply_reviewers_avg": [ 13.5, 9.069178573608527 ], "wc_reply_authors_avg": [ 83.0, 43.14510400960925 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2021028158107309457&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn;;;ustc.edu.cn;ustc.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "WBCAtt: A White Blood Cell Dataset Annotated with Detailed Morphological Attributes", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73570", "id": "VtbKj2xlhI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9f34484e5b8d87f09cc58c292a1c9f5d-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=VtbKj2xlhI", "openreview": "https://openreview.net/forum?id=VtbKj2xlhI", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73570", "video": "https://nips.cc/virtual/2023/poster/73570", "author_site": "Satoshi Tsutsui, Winnie Pang, Bihan Wen", "tldr": "", "abstract": "The examination of blood samples at a microscopic level plays a fundamental role in clinical diagnostics. For instance, an in-depth study of White Blood Cells (WBCs), a crucial component of our blood, is essential for diagnosing blood-related diseases such as leukemia and anemia. While multiple datasets containing WBC images have been proposed, they mostly focus on cell categorization, often lacking the necessary morphological details to explain such categorizations, despite the importance of explainable artificial intelligence (XAI) in medical domains. This paper seeks to address this limitation by introducing comprehensive annotations for WBC images. Through collaboration with pathologists, a thorough literature review, and manual inspection of microscopic images, we have identified 11 morphological attributes associated with the cell and its components (nucleus, cytoplasm, and granules). We then annotated ten thousand WBC images with these attributes, resulting in 113k labels (11 attributes x 10.3k images). Annotating at this level of detail and scale is unprecedented, offering unique value to AI in pathology. Moreover, we conduct experiments to predict these attributes from cell images, and also demonstrate specific applications that can benefit from our detailed annotations. Overall, our dataset paves the way for interpreting WBC recognition models, further advancing XAI in the fields of pathology and hematology.", "keywords": "white blood cells;morphological attributes;microscopic image;explainable AI;computer vision", "primary_area": "", "supplementary_material": "/attachment/0b71ad725557d113ea4b4d869426ffd88ffc4da8.zip", "author": "Satoshi Tsutsui;Winnie Pang;Bihan Wen", "authorids": "~Satoshi_Tsutsui1;~Winnie_Pang1;~Bihan_Wen2", "gender": ";F;M", "homepage": "https://hellosatoshi.github.io;https://scholar.google.com/citations?user=IH02mw4AAAAJ&hl=en;https://personal.ntu.edu.sg/bihan.wen/", "dblp": "198/0640.html;214/3868;158/9840", "google_scholar": "tiXMNRIAAAAJ;IH02mw4AAAAJ;ypkClpwAAAAJ", "orcid": "0000-0001-7370-1754;0000-0002-6493-7156;0000-0002-6874-6453", "linkedin": ";;", "or_profile": "~Satoshi_Tsutsui1;~Winnie_Pang1;~Bihan_Wen2", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": "Postdoc;Researcher;Assistant Professor", "bibtex": "@inproceedings{\ntsutsui2023wbcatt,\ntitle={{WBCA}tt: A White Blood Cell Dataset Annotated with Detailed Morphological Attributes},\nauthor={Satoshi Tsutsui and Winnie Pang and Bihan Wen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=VtbKj2xlhI}\n}", "github": "", "project": "", "reviewers": "mmqD;sgnA;U8Fk;ZbQt", "pdf_size": 4219740, "rating": "6;6;7;8", "confidence": "3;2;3;4", "wc_summary_and_contributions": "33;100;35;28", "wc_strengths": "25;102;19;35", "wc_improvement": "144;82;196;70", "wc_limitations": "21;5;1;35", "wc_correctness": "12;4;54;1", "wc_clarity": "1;5;7;1", "wc_relation_to_prior_work": "1;10;16;1", "wc_documentation": "1;1;1;3", "wc_additional_feedback": "1;1;1;1", "wc_review": "239;310;330;175", "wc_reply_reviewers": "0;40;0;7", "wc_reply_authors": "1093;508;869;501", "reply_reviewers": "0;1;0;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 49.0, 29.5550334122633 ], "wc_strengths_avg": [ 45.25, 33.25939716831921 ], "wc_improvement_avg": [ 123.0, 50.6458290484024 ], "wc_limitations_avg": [ 15.5, 13.518505834595775 ], "wc_correctness_avg": [ 17.75, 21.3116752039815 ], "wc_clarity_avg": [ 3.5, 2.598076211353316 ], "wc_relation_to_prior_work_avg": [ 7.0, 6.363961030678928 ], "wc_documentation_avg": [ 1.5, 0.8660254037844386 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 263.5, 61.271934847856734 ], "wc_reply_reviewers_avg": [ 11.75, 16.55860803328589 ], "wc_reply_authors_avg": [ 742.75, 251.08004201847666 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15310291869437308950&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Evaluating Cognitive Maps and Planning in Large Language Models with CogEval", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71431", "id": "VtkGvGcGe3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dc9d5dcf3e86b83e137bad367227c8ca-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VtkGvGcGe3", "openreview": "https://openreview.net/forum?id=VtkGvGcGe3", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71431", "video": "https://nips.cc/virtual/2023/poster/71431", "author_site": "Ida Momennejad, Hosein Hasanbeig, Felipe Vieira Frujeri, Hiteshi Sharma, Nebojsa Jojic, Hamid Palangi, Robert Ness, Jonathan Larson", "tldr": "", "abstract": "Recently an influx of studies claims emergent cognitive abilities in large language models (LLMs). Yet, most rely on anecdotes, overlook contamination of training sets, or lack systematic Evaluation involving multiple tasks, control conditions, multiple iterations, and statistical robustness tests. Here we make two major contributions. First, we propose CogEval, a cognitive science-inspired protocol for the systematic evaluation of cognitive capacities in LLMs. The CogEval protocol can be followed for the evaluation of various abilities. Second, here we follow CogEval to systematically evaluate cognitive maps and planning ability across eight LLMs (OpenAI GPT-4, GPT-3.5-turbo-175B, davinci-003-175B, Google Bard, Cohere-xlarge-52.4B, Anthropic Claude-1-52B, LLaMA-13B, and Alpaca-7B). We base our task prompts on human experiments, which offer both established construct validity for evaluating planning, and are absent from LLM training sets. We find that, while LLMs show apparent competence in a few planning tasks with simpler structures, systematic evaluation reveals striking failure modes in planning tasks, including hallucinations of invalid trajectories and falling in loops. These findings do not support the idea of emergent out-of-the-box planning ability in LLMs. This could be because LLMs do not understand the latent relational structures underlying planning problems, known as cognitive maps, and fail at unrolling goal-directed trajectories based on the underlying structure. Implications for application and future directions are discussed.", "keywords": "Large Language Models;LLM evaluation;model comparison;GPT-4;graph analysis;cognitive science;cognitive map;hippocampus;planning;multi-step planning;reasoning;community graph", "primary_area": "", "supplementary_material": "/attachment/d36a4836b149df0db629419e67f1a823e3442d8e.pdf", "author": "Ida Momennejad;Hosein Hasanbeig;Felipe Vieira Frujeri;Hiteshi Sharma;Nebojsa Jojic;Hamid Palangi;Robert Ness;Jonathan Larson", "authorids": "~Ida_Momennejad1;hosein.hasanbeig@microsoft.com;~Felipe_Vieira_Frujeri1;~Hiteshi_Sharma1;~Nebojsa_Jojic1;~Hamid_Palangi1;~Robert_Ness1;~Jonathan_Larson1", "gender": "F;;;F;;M;;M", "homepage": "https://www.momen-nejad.org;;;https://hiteshis.github.io/;www.research.microsoft.com/~jojic;https://www.hamidpalangi.com/;https://www.microsoft.com/en-us/research/people/robertness/;https://www.linkedin.com/in/jonathanlarson3/", "dblp": ";;;158/3418;20/1944;01/963;;40/9529", "google_scholar": "https://scholar.google.de/citations?user=OFdUAJwAAAAJ;;wy0FA1cAAAAJ;;;https://scholar.google.ca/citations?user=B1lAghgAAAAJ;;", "orcid": "0000-0003-0830-3973;;;;;;;0000-0002-8865-9306", "linkedin": "ida-momennejad-8661a710/;;;;;;;jonathanlarson3/", "or_profile": "~Ida_Momennejad1;hosein.hasanbeig@microsoft.com;~Felipe_Vieira_Frujeri1;~Hiteshi_Sharma1;~Nebojsa_Jojic1;~Hamid_Palangi1;~Robert_Ness1;~Jonathan_Larson1", "aff": "Microsoft Research;;;Microsoft;Microsoft Research;Google;Microsoft Research;Microsoft", "aff_domain": "research.microsoft.com;;;microsoft.com; ;google.com;microsoft.com;microsoft.com", "position": "Principal Researcher;;;Researcher;Researcher;Staff Research Scientist;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nmomennejad2023evaluating,\ntitle={Evaluating Cognitive Maps and Planning in Large Language Models with CogEval},\nauthor={Ida Momennejad and Hosein Hasanbeig and Felipe Vieira Frujeri and Hiteshi Sharma and Nebojsa Jojic and Hamid Palangi and Robert Ness and Jonathan Larson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VtkGvGcGe3}\n}", "github": "", "project": "", "reviewers": "xxkR;Vmb5;ydCJ;UmDW", "pdf_size": 1106136, "rating": "5;5;7;7", "confidence": "3;4;4;3", "soundness": "2;2;3;3", "novelty": "3;3;4;3", "presentation": "1;1;4;2", "wc_summary": "72;45;60;39", "wc_strengths": "73;106;50;50", "wc_weaknesses": "182;581;56;156", "wc_questions": "123;39;84;93", "wc_limitations": "32;33;6;1", "wc_review": "482;804;256;339", "wc_reply_reviewers": "8;1251;17;26", "wc_reply_authors": "0;1462;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;3;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 1.224744871391589 ], "wc_summary_avg": [ 54.0, 12.90348790056394 ], "wc_strengths_avg": [ 69.75, 22.93877721239735 ], "wc_weaknesses_avg": [ 243.75, 200.31272425884484 ], "wc_questions_avg": [ 84.75, 30.102948360584218 ], "wc_limitations_avg": [ 18.0, 14.611639196202457 ], "wc_review_avg": [ 470.25, 208.9597748371681 ], "wc_reply_reviewers_avg": [ 325.5, 534.3755701751344 ], "wc_reply_authors_avg": [ 365.5, 633.0645701664247 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5012876687935119050&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "research.microsoft.com;;;microsoft.com; ;google.com;microsoft.com;microsoft.com", "author_num": 8, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Microsoft;Google", "aff_unique_dep": "Microsoft Research;Google", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.google.com", "aff_unique_abbr": "MSR;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Multiplication-Free Transformer Training via Piecewise Affine Operations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71430", "id": "Vtqymej1tA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/19df21cd4931bd0caaa4d8480e9a59cd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Vtqymej1tA", "openreview": "https://openreview.net/forum?id=Vtqymej1tA", "poster": "/media/PosterPDFs/NeurIPS%202023/71430.png?t=1702160226.5255687", "slides": "https://nips.cc/virtual/2023/poster/71430", "video": "https://nips.cc/virtual/2023/poster/71430", "author_site": "Atli Kosson, Martin Jaggi", "tldr": "", "abstract": "Multiplications are responsible for most of the computational cost involved in neural network training and inference. Recent research has thus looked for ways to reduce the cost associated with them. Inspired by Mogami 2020, we replace multiplication with a cheap piecewise affine approximation that is achieved by adding the bit representation of the floating point numbers together as integers. We show that transformers can be trained with the resulting modified matrix multiplications on both vision and language tasks with little to no performance impact, and without changes to the training hyperparameters. We further replace all non-linearities in the networks making them fully and jointly piecewise affine in both inputs and weights. Finally, we show that we can eliminate all multiplications in the entire training process, including operations in the forward pass, backward pass and optimizer update, demonstrating the first successful training of modern neural network architectures in a fully multiplication-free fashion.", "keywords": "multiplication-free;neural architectures;piecewise linear networks;piecewise affine networks;efficient training;efficient arithmetics", "primary_area": "", "supplementary_material": "", "author": "Atli Kosson;Martin Jaggi", "authorids": "~Atli_Kosson1;~Martin_Jaggi1", "gender": ";M", "homepage": ";https://mlo.epfl.ch", "dblp": ";17/4402", "google_scholar": ";https://scholar.google.ch/citations?user=r1TJBr8AAAAJ", "orcid": ";0000-0003-1579-5558", "linkedin": ";", "or_profile": "~Atli_Kosson1;~Martin_Jaggi1", "aff": ";EPFL", "aff_domain": ";epfl.ch", "position": ";Associate Professor", "bibtex": "@inproceedings{\nkosson2023multiplicationfree,\ntitle={Multiplication-Free Transformer Training via Piecewise Affine Operations},\nauthor={Atli Kosson and Martin Jaggi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Vtqymej1tA}\n}", "github": "", "project": "", "reviewers": "MZyL;aJYh;WjDg;DF7v", "pdf_size": 546239, "rating": "5;5;6;7", "confidence": "3;3;3;3", "soundness": "2;3;3;3", "novelty": "3;2;2;3", "presentation": "3;3;2;3", "wc_summary": "54;80;44;35", "wc_strengths": "26;51;55;47", "wc_weaknesses": "237;663;143;92", "wc_questions": "12;23;49;23", "wc_limitations": "12;5;12;8", "wc_review": "341;822;303;205", "wc_reply_reviewers": "5;92;11;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 53.25, 16.843025262701474 ], "wc_strengths_avg": [ 44.75, 11.188722000300123 ], "wc_weaknesses_avg": [ 283.75, 225.05263273287872 ], "wc_questions_avg": [ 26.75, 13.608361400256829 ], "wc_limitations_avg": [ 9.25, 2.947456530637899 ], "wc_review_avg": [ 417.75, 238.60990654203778 ], "wc_reply_reviewers_avg": [ 32.0, 35.04996433664377 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15170532577162599727&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": ";epfl.ch", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "EPFL", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_country_unique_index": "0", "aff_country_unique": "Switzerland" }, { "title": "RevColV2: Exploring Disentangled Representations in Masked Image Modeling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71429", "id": "VvnfMeC3gQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5d56e69c317429945785ede86c00b44e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VvnfMeC3gQ", "openreview": "https://openreview.net/forum?id=VvnfMeC3gQ", "poster": "/media/PosterPDFs/NeurIPS%202023/71429.png?t=1701871468.9215426", "slides": "https://nips.cc/virtual/2023/poster/71429", "video": "https://nips.cc/virtual/2023/poster/71429", "author_site": "Qi Han, Yuxuan Cai, Yuxuan Cai, Xiangyu Zhang", "tldr": "", "abstract": "Masked image modeling (MIM) has become a prevalent pre-training setup for vision foundation models and attains promising performance. Despite its success, existing MIM methods discard the decoder network during downstream applica- tions, resulting in inconsistent representations between pre-training and fine-tuning and can hamper downstream task performance. In this paper, we propose a new architecture, RevColV2, which tackles this issue by keeping the entire autoen- coder architecture during both pre-training and fine-tuning. The main body of RevColV2 contains bottom-up columns and top-down columns, between which information is reversibly propagated and gradually disentangled. Such design enables our architecture with the nice property: maintaining disentangled low-level and semantic information at the end of the network in MIM pre-training. Our experimental results suggest that a foundation model with decoupled features can achieve competitive performance across multiple downstream vision tasks such as image classification, semantic segmentation and object detection. For exam- ple, after intermediate fine-tuning on ImageNet-22K dataset, RevColV2-L attains 88.4\\% top-1 accuracy on ImageNet-1K classification and 58.6 mIoU on ADE20K semantic segmentation. With extra teacher and large scale dataset, RevColv2-L achieves 62.1 APbox on COCO detection and 60.4 mIoU on ADE20K semantic segmentation.", "keywords": "architecture design;representation learning;masked image modeling;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/0acbae5436b8c5584d7245b3b89667453773f4cb.pdf", "author": "Qi Han;Yuxuan Cai;Xiangyu Zhang", "authorids": "~Qi_Han3;~Yuxuan_Cai1;~Xiangyu_Zhang1", "gender": "M;M;M", "homepage": "http://hanqer.github.io;https://nightsnack.github.io;", "dblp": "76/5895;;95/3760-5.html", "google_scholar": "DuEUlAQAAAAJ;EzYiBeUAAAAJ;yuB-cfoAAAAJ", "orcid": ";;0000-0003-2138-4608", "linkedin": ";;", "or_profile": "~Qi_Han3;~Yuxuan_Cai1;~Xiangyu_Zhang1", "aff": "Megvii Technology Inc.;Megvii Technology Inc.;MEGVII Technology", "aff_domain": "megvii.com;megvii.com;megvii.com", "position": "Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nhan2023revcolv,\ntitle={RevColV2: Exploring Disentangled Representations in Masked Image Modeling},\nauthor={Qi Han and Yuxuan Cai and Xiangyu Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VvnfMeC3gQ}\n}", "github": "", "project": "", "reviewers": "yT3P;DHnY;6koM;W68W;xDT5", "pdf_size": 1989432, "rating": "5;5;5;6;7", "confidence": "5;4;4;5;4", "soundness": "3;3;3;4;3", "novelty": "3;3;2;3;3", "presentation": "3;3;3;4;3", "wc_summary": "60;44;66;32;60", "wc_strengths": "39;21;58;4;52", "wc_weaknesses": "188;71;248;4;112", "wc_questions": "100;10;39;172;76", "wc_limitations": "1;1;24;4;1", "wc_review": "388;147;435;216;301", "wc_reply_reviewers": "12;20;192;19;131", "wc_reply_authors": "0;0;968;0;317", "reply_reviewers": "1;1;2;1;2", "reply_authors": "1;1;3;1;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 52.4, 12.547509713086498 ], "wc_strengths_avg": [ 34.8, 19.95394697797907 ], "wc_weaknesses_avg": [ 124.6, 85.81748073673569 ], "wc_questions_avg": [ 79.4, 55.604316379216456 ], "wc_limitations_avg": [ 6.2, 8.97552226892675 ], "wc_review_avg": [ 297.4, 106.2084742381699 ], "wc_reply_reviewers_avg": [ 74.8, 73.42315711000175 ], "wc_reply_authors_avg": [ 257.0, 376.1031773330292 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.10206207261596573, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12448234881325242032&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "megvii.com;megvii.com;megvii.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Megvii Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.megvii.com", "aff_unique_abbr": "Megvii", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "VisionLLM: Large Language Model is also an Open-Ended Decoder for Vision-Centric Tasks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71428", "id": "Vx1JadlOIt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c1f7b1ed763e9c75e4db74b49b76db5f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Vx1JadlOIt", "openreview": "https://openreview.net/forum?id=Vx1JadlOIt", "poster": "/media/PosterPDFs/NeurIPS%202023/71428.png?t=1701845261.290421", "slides": "https://nips.cc/virtual/2023/poster/71428", "video": "https://nips.cc/virtual/2023/poster/71428", "author_site": "Wenhai Wang, Zhe Chen, Xiaokang Chen, Jiannan Wu, Xizhou Zhu, Gang Zeng, Ping Luo, Tong Lu, Jie Zhou, Yu Qiao, Jifeng Dai", "tldr": "", "abstract": "Large language models (LLMs) have notably accelerated progress towards artificial general intelligence (AGI), with their impressive zero-shot capacity for user-tailored tasks, endowing them with immense potential across a range of applications. However, in the field of computer vision, despite the availability of numerous powerful vision foundation models (VFMs), they are still restricted to tasks in a pre-defined form, struggling to match the open-ended task capabilities of LLMs. In this work, we present an LLM-based framework for vision-centric tasks, termed VisionLLM. This framework provides a unified perspective for vision and language tasks by treating images as a foreign language and aligning vision-centric tasks with language tasks that can be flexibly defined and managed using language instructions. An LLM-based decoder can then make appropriate predictions based on these instructions for open-ended tasks. Extensive experiments show that the proposed VisionLLM can achieve different levels of task customization through language instructions, from fine-grained object-level to coarse-grained task-level customization, all with good results. It's noteworthy that, with a generalist LLM-based framework, our model can achieve over 60% mAP on COCO, on par with detection-specific models. We hope this model can set a new baseline for generalist vision and language models. The code shall be released.", "keywords": "Large Vision-Language Model;Detection;Image Caption", "primary_area": "", "supplementary_material": "/attachment/8574505884713cfe0fa931b87a7559512e3af0c4.pdf", "author": "Wenhai Wang;Zhe Chen;Xiaokang Chen;Jiannan Wu;Xizhou Zhu;Gang Zeng;Ping Luo;Tong Lu;Jie Zhou;Yu Qiao;Jifeng Dai", "authorids": "~Wenhai_Wang2;~Zhe_Chen10;~Xiaokang_Chen1;~Jiannan_Wu2;~Xizhou_Zhu1;~Gang_Zeng1;~Ping_Luo2;~Tong_Lu1;~Jie_Zhou3;~Yu_Qiao1;~Jifeng_Dai1", "gender": ";M;M;M;;M;;M;M;;M", "homepage": ";https://czczup.github.io/;https://charlescxk.github.io/;;;https://www.cis.pku.edu.cn/info/1177/1378.htm;;https://cs.nju.edu.cn/lutong/;https://www.tsinghua.edu.cn/publish/auen/1713/2011/20110506105532098625469/20110506105532098625469_.html;;https://jifengdai.org/", "dblp": ";06/4240-17;163/6632;277/0616;170/1608;;;;00/5012-1;;14/9399", "google_scholar": ";j1rq_lYAAAAJ;https://scholar.google.com.hk/citations?view_op=list_works;1euA66EAAAAJ;02RXI00AAAAJ;RuHyY6gAAAAJ;;;;;SH_-B_AAAAAJ", "orcid": ";;;;;;;;;;", "linkedin": ";;;;;;;;;;", "or_profile": "~Wenhai_Wang2;~Zhe_Chen10;~Xiaokang_Chen1;~Jiannan_Wu2;~Xizhou_Zhu1;~Gang_Zeng1;~Ping_Luo2;~Tong_Lu1;~Jie_Zhou3;~Yu_Qiao1;~Jifeng_Dai1", "aff": ";Nanjing University;Peking University;University of Hong Kong;SenseTime;Peking University;;Nanjing University;Tsinghua University;;Tsinghua University", "aff_domain": ";nju.edu.cn;pku.edu.cn;hku.hk;sensetime.com;pku.edu.cn;;nju.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn", "position": ";PhD student;PhD student;PhD student;Researcher;Researcher;;Undergrad student;Full Professor;;Associate Professor", "bibtex": "@inproceedings{\nwang2023visionllm,\ntitle={Vision{LLM}: Large Language Model is also an Open-Ended Decoder for Vision-Centric Tasks},\nauthor={Wenhai Wang and Zhe Chen and Xiaokang Chen and Jiannan Wu and Xizhou Zhu and Gang Zeng and Ping Luo and Tong Lu and Jie Zhou and Yu Qiao and Jifeng Dai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Vx1JadlOIt}\n}", "github": "", "project": "", "reviewers": "87vj;kyDb;TifL;wdrB;eUTR", "pdf_size": 1127251, "rating": "5;5;6;6;7", "confidence": "5;4;4;3;4", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;2;3;3;2", "wc_summary": "42;55;59;27;128", "wc_strengths": "96;83;68;107;76", "wc_weaknesses": "271;86;149;98;96", "wc_questions": "85;143;135;104;165", "wc_limitations": "88;27;22;1;1", "wc_review": "582;394;433;337;466", "wc_reply_reviewers": "540;0;105;29;16", "wc_reply_authors": "1399;0;1179;8;23", "reply_reviewers": "3;0;2;1;1", "reply_authors": "5;1;4;2;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 62.2, 34.75284161043525 ], "wc_strengths_avg": [ 86.0, 13.957077057894322 ], "wc_weaknesses_avg": [ 140.0, 69.07676888795538 ], "wc_questions_avg": [ 126.4, 28.478764018124103 ], "wc_limitations_avg": [ 27.8, 31.921152861386446 ], "wc_review_avg": [ 442.4, 81.95754998778331 ], "wc_reply_reviewers_avg": [ 138.0, 204.20675796848644 ], "wc_reply_authors_avg": [ 521.8, 630.3108439492374 ], "reply_reviewers_avg": [ 1.4, 1.019803902718557 ], "reply_authors_avg": [ 2.8, 1.469693845669907 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 513, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7929876691840755441&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": ";nju.edu.cn;pku.edu.cn;hku.hk;sensetime.com;pku.edu.cn;;nju.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn", "author_num": 11, "aff_unique_index": "0;1;2;3;1;0;4;4", "aff_unique_norm": "Nanjing University;Peking University;University of Hong Kong;SenseTime;Tsinghua University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.nju.edu.cn;http://www.pku.edu.cn;https://www.hku.hk;https://www.sensetime.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Nanjing U;Peking U;HKU;SenseTime;THU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Double Pessimism is Provably Efficient for Distributionally Robust Offline Reinforcement Learning: Generic Algorithm and Robust Partial Coverage", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71427", "id": "VzLBMkc7tB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d31b005d817e9c635ec8ffb0fb90190e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VzLBMkc7tB", "openreview": "https://openreview.net/forum?id=VzLBMkc7tB", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71427", "video": "https://nips.cc/virtual/2023/poster/71427", "author_site": "Jose Blanchet, Miao Lu, Tong Zhang, Han Zhong", "tldr": "", "abstract": "We study distributionally robust offline reinforcement learning (RL), which seeks to find an optimal robust policy purely from an offline dataset that can perform well in perturbed environments. We propose a generic algorithm framework Doubly Pessimistic Model-based Policy Optimization ($\\texttt{P}^2\\texttt{MPO}$) for robust offline RL, which features a novel combination of a flexible model estimation subroutine and a doubly pessimistic policy optimization step. Here the double pessimism principle is crucial to overcome the distribution shift incurred by i) the mismatch between behavior policy and the family of target policies; and ii) the perturbation of the nominal model. Under certain accuracy assumptions on the model estimation subroutine, we show that $\\texttt{P}^2\\texttt{MPO}$ is provably sample-efficient with robust partial coverage data, which means that the offline dataset has good coverage of the distributions induced by the optimal robust policy and perturbed models around the nominal model. By tailoring specific model estimation subroutines for concrete examples including tabular Robust Markov Decision Process (RMDP), factored RMDP, and RMDP with kernel and neural function approximations, we show that $\\texttt{P}^2\\texttt{MPO}$ enjoys a $\\tilde{\\mathcal{O}}(n^{-1/2})$ convergence rate, where $n$ is the number of trajectories in the offline dataset. Notably, these models, except for the tabular case, are first identified and proven tractable by this paper. To the best of our knowledge, we first propose a general learning principle --- double pessimism --- for robust offline RL and show that it is provably efficient in the context of general function approximations.", "keywords": "distributionally robust offline reinforcement learning;double pessimism;general function approximation", "primary_area": "", "supplementary_material": "/attachment/99aa13a840ae37157ea74152ed724e230038bbf3.zip", "author": "Jose Blanchet;Miao Lu;Tong Zhang;Han Zhong", "authorids": "~Jose_Blanchet1;~Miao_Lu3;~Tong_Zhang2;~Han_Zhong1", "gender": "M;;M;", "homepage": "https://web.stanford.edu/~jblanche/;https://miaolu3.github.io;http://tongzhang-ml.org;https://hanzhong-ml.github.io/", "dblp": "75/5093.html;09/1168;07/4227-1;137/8096.html", "google_scholar": "https://scholar.google.co.in/citations?user=O24CcQQAAAAJ;3jS17zQAAAAJ;LurWtuYAAAAJ;Bk5q_pAAAAAJ", "orcid": ";;0000-0002-5511-2558;", "linkedin": "jose-blanchet;miao-lu-5bb9a31aa/;;", "or_profile": "~Jose_Blanchet1;~Miao_Lu3;~Tong_Zhang2;~Han_Zhong1", "aff": "Stanford University;University of Science and Technology of China;Hong Kong University of Science and Technology;Peking University", "aff_domain": "stanford.edu;ustc.edu.cn;ust.hk;stu.pku.edu.cn", "position": "Professor;Undergrad student;Full Professor;PhD student", "bibtex": "@inproceedings{\nblanchet2023double,\ntitle={Double Pessimism is Provably Efficient for Distributionally Robust Offline Reinforcement Learning: Generic Algorithm and Robust Partial Coverage},\nauthor={Jose Blanchet and Miao Lu and Tong Zhang and Han Zhong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VzLBMkc7tB}\n}", "github": "", "project": "", "reviewers": "NCKb;Ty5v;p1MD;F7Eb;otjP", "pdf_size": 488901, "rating": "4;6;6;7;8", "confidence": "4;3;4;4;4", "soundness": "3;3;3;3;4", "novelty": "2;3;2;3;3", "presentation": "3;3;2;2;4", "wc_summary": "16;99;72;110;42", "wc_strengths": "29;57;135;61;79", "wc_weaknesses": "76;50;223;162;5", "wc_questions": "35;67;144;3;30", "wc_limitations": "3;2;18;6;1", "wc_review": "159;275;592;342;157", "wc_reply_reviewers": "12;35;13;27;12", "wc_reply_authors": "0;29;24;24;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;2;2;2;1", "rating_avg": [ 6.2, 1.32664991614216 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 67.8, 34.98799794215153 ], "wc_strengths_avg": [ 72.2, 35.24996453898925 ], "wc_weaknesses_avg": [ 103.2, 78.76902944685811 ], "wc_questions_avg": [ 55.8, 48.56088961293852 ], "wc_limitations_avg": [ 6.0, 6.2289646009589745 ], "wc_review_avg": [ 305.0, 159.91122537207949 ], "wc_reply_reviewers_avg": [ 19.8, 9.495261976375375 ], "wc_reply_authors_avg": [ 15.4, 12.705904139414873 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0753778361444409, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=446745166062890090&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "stanford.edu;ustc.edu.cn;ust.hk;stu.pku.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Stanford University;University of Science and Technology of China;Hong Kong University of Science and Technology;Peking University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.stanford.edu;http://www.ustc.edu.cn;https://www.ust.hk;http://www.pku.edu.cn", "aff_unique_abbr": "Stanford;USTC;HKUST;Peking U", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Stanford;;Hong Kong SAR", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;China" }, { "title": "Exposing Attention Glitches with Flip-Flop Language Modeling", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71426", "id": "VzmpXQAn6E", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/510ad3018bbdc5b6e3b10646e2e35771-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=VzmpXQAn6E", "openreview": "https://openreview.net/forum?id=VzmpXQAn6E", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71426", "video": "https://nips.cc/virtual/2023/poster/71426", "author_site": "Bingbin Liu, Jordan Ash, Surbhi Goel, Akshay Krishnamurthy, Cyril Zhang", "tldr": "", "abstract": "Why do large language models sometimes output factual inaccuracies and exhibit erroneous reasoning? The brittleness of these models, particularly when executing long chains of reasoning, currently seems to be an inevitable price to pay for their advanced capabilities of coherently synthesizing knowledge, pragmatics, and abstract thought. Towards making sense of this fundamentally unsolved problem, this work identifies and analyzes the phenomenon of _attention glitches_, in which the Transformer architecture's inductive biases intermittently fail to capture robust reasoning. To isolate the issue, we introduce _flip-flop language modeling_ (FFLM), a parametric family of synthetic benchmarks designed to probe the extrapolative behavior of neural language models. This simple generative task requires a model to copy binary symbols over long-range dependencies, ignoring the tokens in between. We find that Transformer FFLMs suffer from a long tail of sporadic reasoning errors, some of which we can eliminate using various regularization techniques. Our preliminary mechanistic analyses show why the remaining errors may be very difficult to diagnose and resolve. We hypothesize that attention glitches account for (some of) the closed-domain hallucinations in natural LLMs.", "keywords": "Transformers;language models;hallucinations;long-range dependencies;generalization;extrapolation;out-of-distribution", "primary_area": "", "supplementary_material": "/attachment/f1e8150c55ba1f12e7b8ced818be54ac561d3746.pdf", "author": "Bingbin Liu;Jordan T. Ash;Surbhi Goel;Akshay Krishnamurthy;Cyril Zhang", "authorids": "~Bingbin_Liu1;~Jordan_T._Ash1;~Surbhi_Goel1;~Akshay_Krishnamurthy1;~Cyril_Zhang1", "gender": "F;;F;M;", "homepage": "https://clarabing.github.io/;http://www.jordantash.com;https://www.surbhigoel.com;https://www.cics.umass.edu/~akshay/;https://cyrilzhang.com", "dblp": "222/1554;176/5225;190/7815;85/8024;203/4448", "google_scholar": "2ud06rQAAAAJ;bmRNH-UAAAAJ;https://scholar.google.co.in/citations?user=Zqz4CQoAAAAJ;https://scholar.google.com.tw/citations?user=K0kaNvkAAAAJ;sXtjq8IAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Bingbin_Liu1;~Jordan_T._Ash1;~Surbhi_Goel1;~Akshay_Krishnamurthy1;~Cyril_Zhang1", "aff": "Carnegie Mellon University;Microsoft Research;University of Pennsylvania;Microsoft Research;Microsoft", "aff_domain": "cmu.edu;research.microsoft.com;upenn.edu;research.microsoft.com;microsoft.com", "position": "PhD student;Postdoc;Assistant Professor;Principal Researcher;Senior Researcher", "bibtex": "@inproceedings{\nliu2023exposing,\ntitle={Exposing Attention Glitches with Flip-Flop Language Modeling},\nauthor={Bingbin Liu and Jordan T. Ash and Surbhi Goel and Akshay Krishnamurthy and Cyril Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=VzmpXQAn6E}\n}", "github": "", "project": "", "reviewers": "vbMc;FpxV;ZZvs;ycg1;4Fu7", "pdf_size": 10682798, "rating": "6;6;6;8;9", "confidence": "3;4;3;4;4", "soundness": "2;3;3;4;4", "novelty": "3;3;2;3;4", "presentation": "2;3;4;4;4", "wc_summary": "95;101;202;81;180", "wc_strengths": "49;117;132;67;145", "wc_weaknesses": "102;296;101;70;192", "wc_questions": "100;59;52;256;101", "wc_limitations": "17;54;5;67;8", "wc_review": "363;627;492;541;626", "wc_reply_reviewers": "49;30;66;128;72", "wc_reply_authors": "34;26;21;89;21", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 7.0, 1.2649110640673518 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 131.8, 49.264185774251864 ], "wc_strengths_avg": [ 102.0, 37.43794866175229 ], "wc_weaknesses_avg": [ 152.2, 82.65688114125768 ], "wc_questions_avg": [ 113.6, 74.02323959406263 ], "wc_limitations_avg": [ 30.2, 25.388186229031803 ], "wc_review_avg": [ 529.8, 98.05998164388978 ], "wc_reply_reviewers_avg": [ 69.0, 32.92415526630866 ], "wc_reply_authors_avg": [ 38.2, 25.84105261013955 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6454972243679028, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5677692835053894130&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 9, "email": "cmu.edu;research.microsoft.com;upenn.edu;research.microsoft.com;microsoft.com", "author_num": 5, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Carnegie Mellon University;Microsoft;University of Pennsylvania", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "https://www.cmu.edu;https://www.microsoft.com/en-us/research;https://www.upenn.edu", "aff_unique_abbr": "CMU;MSR;UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Are Vision Transformers More Data Hungry Than Newborn Visual Systems?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71425", "id": "W23ZTdsabj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e75dce944052276caf89c17aca8963d3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=W23ZTdsabj", "openreview": "https://openreview.net/forum?id=W23ZTdsabj", "poster": "/media/PosterPDFs/NeurIPS%202023/71425.png?t=1702115208.636196", "slides": "https://nips.cc/virtual/2023/poster/71425", "video": "https://nips.cc/virtual/2023/poster/71425", "author_site": "Lalit Pandey, Samantha Wood, Justin Wood", "tldr": "", "abstract": "Vision transformers (ViTs) are top-performing models on many computer vision benchmarks and can accurately predict human behavior on object recognition tasks. However, researchers question the value of using ViTs as models of biological learning because ViTs are thought to be more \u201cdata hungry\u201d than brains, with ViTs requiring more training data than brains to reach similar levels of performance. To test this assumption, we directly compared the learning abilities of ViTs and animals, by performing parallel controlled-rearing experiments on ViTs and newborn chicks. We first raised chicks in impoverished visual environments containing a single object, then simulated the training data available in those environments by building virtual animal chambers in a video game engine. We recorded the first-person images acquired by agents moving through the virtual chambers and used those images to train self-supervised ViTs that leverage time as a teaching signal, akin to biological visual systems. When ViTs were trained \u201cthrough the eyes\u201d of newborn chicks, the ViTs solved the same view-invariant object recognition tasks as the chicks. Thus, ViTs were not more data hungry than newborn chicks: both learned view-invariant object representations in impoverished visual environments. The flexible and generic attention-based learning mechanism in ViTs\u2014combined with the embodied data streams available to newborn animals\u2014appears sufficient to drive the development of animal-like object recognition.", "keywords": "vision transformer;newborn;controlled rearing;object recognition;data hungry", "primary_area": "", "supplementary_material": "/attachment/7e57ac72117ff817f0b15c8be0331a7d557da904.zip", "author": "Lalit Pandey;Samantha Marie Waters Wood;Justin Newell Wood", "authorids": "~Lalit_Pandey1;sw113@iu.edu;~Justin_Newell_Wood1", "gender": "M;;M", "homepage": ";;http://www.buildingamind.com/", "dblp": ";;", "google_scholar": "fjcGYIwAAAAJ;;", "orcid": "0009-0000-0738-8747;;", "linkedin": "lpp/;;", "or_profile": "~Lalit_Pandey1;sw113@iu.edu;~Justin_Newell_Wood1", "aff": "Indiana University;;Indiana University at Bloomington", "aff_domain": "iu.edu;;indiana.edu", "position": "MS student;;Associate Professor", "bibtex": "@inproceedings{\npandey2023are,\ntitle={Are Vision Transformers More Data Hungry Than Newborn Visual Systems?},\nauthor={Lalit Pandey and Samantha Marie Waters Wood and Justin Newell Wood},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=W23ZTdsabj}\n}", "github": "", "project": "", "reviewers": "w2HU;xTn4;8hgW;Au5T", "pdf_size": 4262947, "rating": "4;4;5;7", "confidence": "4;4;4;5", "soundness": "3;2;3;3", "novelty": "2;2;3;4", "presentation": "3;3;4;3", "wc_summary": "53;64;118;109", "wc_strengths": "72;15;71;44", "wc_weaknesses": "280;201;73;31", "wc_questions": "56;215;10;240", "wc_limitations": "1;17;7;11", "wc_review": "462;512;279;435", "wc_reply_reviewers": "234;0;159;47", "wc_reply_authors": "788;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.0, 27.955321496988727 ], "wc_strengths_avg": [ 50.5, 23.371991785040485 ], "wc_weaknesses_avg": [ 146.25, 99.41673651855606 ], "wc_questions_avg": [ 130.25, 98.99589637959747 ], "wc_limitations_avg": [ 9.0, 5.830951894845301 ], "wc_review_avg": [ 422.0, 87.06032391393911 ], "wc_reply_reviewers_avg": [ 110.0, 91.98641204003991 ], "wc_reply_authors_avg": [ 197.0, 341.2140090910688 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9428090415820632, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3489231143638955814&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "iu.edu;;indiana.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Indiana University", "aff_unique_dep": "", "aff_unique_url": "https://www.indiana.edu", "aff_unique_abbr": "IU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Bloomington", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "PolyDiffuse: Polygonal Shape Reconstruction via Guided Set Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71424", "id": "W2ZBLdfa16", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/05f0e2fa003602db2d98ca72b79dec51-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=W2ZBLdfa16", "openreview": "https://openreview.net/forum?id=W2ZBLdfa16", "poster": "/media/PosterPDFs/NeurIPS%202023/71424.png?t=1699411200.103965", "slides": "https://nips.cc/virtual/2023/poster/71424", "video": "https://nips.cc/virtual/2023/poster/71424", "author_site": "Jiacheng Chen, Ruizhi Deng, Ruizhi Deng, Yasutaka Furukawa", "tldr": "", "abstract": "This paper presents \\textit{PolyDiffuse}, a novel structured reconstruction algorithm that transforms visual sensor data into polygonal shapes with Diffusion Models (DM), an emerging machinery amid exploding generative AI, while formulating reconstruction as a generation process conditioned on sensor data. \nThe task of structured reconstruction poses two fundamental challenges to DM: 1) A structured geometry is a ''set'' (e.g., a set of polygons for a floorplan geometry), where a sample of $N$ elements has $N!$ different but equivalent representations, making the denoising highly ambiguous; and 2) A ''reconstruction'' task has a single solution, where an initial noise needs to be chosen carefully, while any initial noise works for a generation task.\nOur technical contribution is the introduction of a Guided Set Diffusion Model where 1) the forward diffusion process learns \\textit{guidance networks} to control noise injection so that one representation of a sample remains distinct from its other permutation variants, thus resolving denoising ambiguity; and 2) the reverse denoising process reconstructs polygonal shapes, initialized and directed by the guidance networks, as a conditional generation process subject to the sensor data.\nWe have evaluated our approach for reconstructing two types of polygonal shapes: floorplan as a set of polygons and HD map for autonomous cars as a set of polylines.\nThrough extensive experiments on standard benchmarks, we demonstrate that PolyDiffuse significantly advances the current state of the art and enables broader practical applications. The code and data are available on our project page: https://poly-diffuse.github.io.", "keywords": "Structured Reconstruction;Floorplan Reconstruction;HD Map Construction;Diffusion Models", "primary_area": "", "supplementary_material": "/attachment/c4858f889ded821744eb41ae2138cb7451d5401f.pdf", "author": "Jiacheng Chen;Ruizhi Deng;Yasutaka Furukawa", "authorids": "~Jiacheng_Chen1;~Ruizhi_Deng1;~Yasutaka_Furukawa1", "gender": ";M;M", "homepage": ";https://ruizhid.me;https://www.cs.sfu.ca/~furukawa", "dblp": ";211/6827;37/1720", "google_scholar": ";https://scholar.google.ca/citations?user=JlmzaGsAAAAJ;https://scholar.google.com.tw/citations?user=wCxzFrMAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jiacheng_Chen1;~Ruizhi_Deng1;~Yasutaka_Furukawa1", "aff": ";Simon Fraser University;Simon Fraser University", "aff_domain": ";sfu.ca;sfu.ca", "position": ";PhD student;Associate Professor", "bibtex": "@inproceedings{\nchen2023polydiffuse,\ntitle={PolyDiffuse: Polygonal Shape Reconstruction via Guided Set Diffusion Models},\nauthor={Jiacheng Chen and Ruizhi Deng and Yasutaka Furukawa},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=W2ZBLdfa16}\n}", "github": "", "project": "", "reviewers": "uTWe;Fcug;t57P;pHyL;HnVz", "pdf_size": 9804304, "rating": "5;6;6;7;7", "confidence": "4;4;4;3;3", "soundness": "2;3;3;4;4", "novelty": "2;3;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "199;79;68;120;93", "wc_strengths": "129;70;51;62;150", "wc_weaknesses": "253;62;31;15;126", "wc_questions": "321;1;77;40;9", "wc_limitations": "1;1;31;5;14", "wc_review": "903;213;258;242;392", "wc_reply_reviewers": "19;5;10;9;5", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 111.8, 46.94422222169625 ], "wc_strengths_avg": [ 92.4, 39.48974550437113 ], "wc_weaknesses_avg": [ 97.4, 86.58082928685772 ], "wc_questions_avg": [ 89.6, 118.75285259731658 ], "wc_limitations_avg": [ 10.4, 11.341957503006261 ], "wc_review_avg": [ 401.6, 258.13066458675536 ], "wc_reply_reviewers_avg": [ 9.6, 5.122499389946279 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8728715609439696, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5039908753684326844&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";sfu.ca;sfu.ca", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Simon Fraser University", "aff_unique_dep": "", "aff_unique_url": "https://www.sfu.ca", "aff_unique_abbr": "SFU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Fair Canonical Correlation Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71423", "id": "W3cDd5xlKZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0b8e4c8468273ee3bafb288229c0acbc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=W3cDd5xlKZ", "openreview": "https://openreview.net/forum?id=W3cDd5xlKZ", "poster": "/media/PosterPDFs/NeurIPS%202023/71423.png?t=1701720256.9044738", "slides": "https://nips.cc/virtual/2023/poster/71423", "video": "https://nips.cc/virtual/2023/poster/71423", "author_site": "Zhuoping Zhou, Davoud Ataee Tarzanagh, Bojian Hou, Boning Tong, Jia Xu, Yanbo Feng, Qi Long, Li Shen", "tldr": "", "abstract": "This paper investigates fairness and bias in Canonical Correlation Analysis (CCA), a widely used statistical technique for examining the relationship between two sets of variables. We present a framework that alleviates unfairness by minimizing the correlation disparity error associated with protected attributes. Our approach enables CCA to learn global projection matrices from all data points while ensuring that these matrices yield comparable correlation levels to group-specific projection matrices. Experimental evaluation on both synthetic and real-world datasets demonstrates the efficacy of our method in reducing correlation disparity error without compromising CCA accuracy.", "keywords": "Fairness;Canonical Correlation Analysis;Riemannian Optimization;Pareto Optimization", "primary_area": "", "supplementary_material": "/attachment/e11aa777839d68a737af8e215a1338461b111828.zip", "author": "Zhuoping Zhou;Davoud Ataee Tarzanagh;Bojian Hou;Boning Tong;Jia Xu;Yanbo Feng;Qi Long;Li Shen", "authorids": "~Zhuoping_Zhou1;~Davoud_Ataee_Tarzanagh1;~Bojian_Hou1;~Boning_Tong1;jiaxu7@upenn.edu;yanbof@seas.upenn.edu;~Qi_Long1;~Li_Shen2", "gender": "F;M;;F;;;M;M", "homepage": "https://www.linkedin.com/in/zhuoping-zhou-b8b7a7224;https://tarzanagh.github.io/;;https://github.com/boningt;;;https://www.med.upenn.edu/long-lab/;https://www.med.upenn.edu/shenlab/", "dblp": "358/3369;;;;;;47/7320;s/LiShen", "google_scholar": ";Djtvz_0AAAAJ;;;;;gfklepYAAAAJ;QnWpiskAAAAJ", "orcid": ";0000-0003-1267-3889;;;;;0000-0003-0660-5230;0000-0002-5443-0503", "linkedin": "zhuoping-zhou-b8b7a7224;;;;;;qi-long-9652a0125/;shenli/", "or_profile": "~Zhuoping_Zhou1;~Davoud_Ataee_Tarzanagh1;~Bojian_Hou1;~Boning_Tong1;jiaxu7@upenn.edu;yanbof@seas.upenn.edu;~Qi_Long1;~Li_Shen2", "aff": "University of Pennsylvania;University of Pennsylvania;;University of Pennsylvania;;;University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;upenn.edu;;upenn.edu;;;upenn.edu;upenn.edu", "position": "PhD student;Postdoc;;PhD student;;;Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2023fair,\ntitle={Fair Canonical Correlation Analysis},\nauthor={Zhuoping Zhou and Davoud Ataee Tarzanagh and Bojian Hou and Boning Tong and Jia Xu and Yanbo Feng and Qi Long and Li Shen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=W3cDd5xlKZ}\n}", "github": "", "project": "", "reviewers": "6yqF;1Xpv;MdZ1;kQSh", "pdf_size": 2933496, "rating": "6;7;7;7", "confidence": "4;2;4;4", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;4", "wc_summary": "92;47;122;67", "wc_strengths": "142;115;371;72", "wc_weaknesses": "94;110;74;46", "wc_questions": "50;149;55;5", "wc_limitations": "75;180;93;5", "wc_review": "453;601;715;195", "wc_reply_reviewers": "170;0;14;0", "wc_reply_authors": "461;0;11;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 82.0, 28.062430400804562 ], "wc_strengths_avg": [ 175.0, 115.88140489310612 ], "wc_weaknesses_avg": [ 81.0, 23.895606290697042 ], "wc_questions_avg": [ 64.75, 52.39453692895854 ], "wc_limitations_avg": [ 88.25, 62.34330356983018 ], "wc_review_avg": [ 491.0, 194.50963986394095 ], "wc_reply_reviewers_avg": [ 46.0, 71.8192174839019 ], "wc_reply_authors_avg": [ 118.0, 198.08205370502398 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2101163455891946550&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 11, "email": "upenn.edu;upenn.edu;;upenn.edu;;;upenn.edu;upenn.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Toward Understanding Generative Data Augmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71422", "id": "W5Clq1bSrR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a94a8800a4b0af45600bab91164849df-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=W5Clq1bSrR", "openreview": "https://openreview.net/forum?id=W5Clq1bSrR", "poster": "/media/PosterPDFs/NeurIPS%202023/71422.png?t=1701438309.2606335", "slides": "https://nips.cc/virtual/2023/poster/71422", "video": "https://nips.cc/virtual/2023/poster/71422", "author_site": "Chenyu Zheng, Guoqiang Wu, Chongxuan LI", "tldr": "", "abstract": "Generative data augmentation, which scales datasets by obtaining fake labeled examples from a trained conditional generative model, boosts classification performance in various learning tasks including (semi-)supervised learning, few-shot learning, and adversarially robust learning. However, little work has theoretically investigated the effect of generative data augmentation. To fill this gap, we establish a general stability bound in this not independently and identically\ndistributed (non-i.i.d.) setting, where the learned distribution is dependent on the original train set and generally not the same as the true distribution. Our theoretical result includes the divergence between the learned distribution and the true distribution. It shows that generative data augmentation can enjoy a faster learning rate when the order of divergence term is $o(\\max\\left( \\log(m)\\beta_m, 1 / \\sqrt{m})\\right)$, where $m$ is the train set size and $\\beta_m$ is the corresponding stability constant. We further specify the learning setup to the Gaussian mixture model and generative adversarial nets. We prove that in both cases, though generative data augmentation does not enjoy a faster learning rate, it can improve the learning guarantees at a constant level when the train set is small, which is significant when the awful overfitting occurs. Simulation results on the Gaussian mixture model and empirical results on generative adversarial nets support our theoretical conclusions.", "keywords": "generative data augmentation;algorithmic stability;non-i.i.d. learning", "primary_area": "", "supplementary_material": "/attachment/9ade17956484317ccac2bfe5b5b5fd403e6e16bd.pdf", "author": "Chenyu Zheng;Guoqiang Wu;Chongxuan Li", "authorids": "~Chenyu_Zheng1;~Guoqiang_Wu2;~Chongxuan_Li1", "gender": "M;M;M", "homepage": "https://chen-yu-zheng.github.io;https://guoqiangwoodrowwu.github.io/;http://ml.cs.tsinghua.edu.cn/~chongxuan", "dblp": "133/5078;98/4857;161/9965", "google_scholar": "QDfsVgYAAAAJ;KCTX-_0AAAAJ;UKMcQn4AAAAJ", "orcid": ";0000-0003-4486-7944;0000-0002-0912-9076", "linkedin": ";;", "or_profile": "~Chenyu_Zheng1;~Guoqiang_Wu2;~Chongxuan_Li1", "aff": "Wuhan University;Shandong University;Renmin University of China", "aff_domain": "whu.edu.cn;sdu.edu.cn;ruc.edu.cn", "position": "Undergrad student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nzheng2023toward,\ntitle={Toward Understanding Generative Data Augmentation},\nauthor={Chenyu Zheng and Guoqiang Wu and Chongxuan Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=W5Clq1bSrR}\n}", "github": "", "project": "", "reviewers": "hFUC;Dxq5;sAdG;jB6r;SG4V", "pdf_size": 642084, "rating": "5;5;6;7;7", "confidence": "2;3;3;4;1", "soundness": "3;3;3;3;3", "novelty": "2;3;3;4;3", "presentation": "2;2;4;3;3", "wc_summary": "28;134;130;274;102", "wc_strengths": "35;77;24;66;126", "wc_weaknesses": "122;309;78;239;90", "wc_questions": "16;109;56;125;131", "wc_limitations": "9;2;1;13;4", "wc_review": "210;631;289;717;453", "wc_reply_reviewers": "35;22;26;36;55", "wc_reply_authors": "13;13;11;11;13", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 2.6, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 133.6, 79.84384760268007 ], "wc_strengths_avg": [ 65.6, 35.903203199714646 ], "wc_weaknesses_avg": [ 167.6, 90.7977973301115 ], "wc_questions_avg": [ 87.4, 44.418914889943 ], "wc_limitations_avg": [ 5.8, 4.534313619501853 ], "wc_review_avg": [ 460.0, 193.43215865000317 ], "wc_reply_reviewers_avg": [ 34.8, 11.408768557561329 ], "wc_reply_authors_avg": [ 12.2, 0.9797958971132713 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10352649592857341886&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "whu.edu.cn;sdu.edu.cn;ruc.edu.cn", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Wuhan University;Shandong University;Renmin University of China", "aff_unique_dep": ";;", "aff_unique_url": "http://www.whu.edu.cn/;http://www.sdu.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "WHU;SDU;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "ClimSim: A large multi-scale dataset for hybrid physics-ML climate emulation", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73569", "id": "W5If9P1xqO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/45fbcc01349292f5e059a0b8b02c8c3f-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=W5If9P1xqO", "openreview": "https://openreview.net/forum?id=W5If9P1xqO", "poster": "/media/PosterPDFs/NeurIPS%202023/73569.png?t=1701755651.3560717", "slides": "https://nips.cc/virtual/2023/poster/73569", "video": "https://nips.cc/virtual/2023/poster/73569", "author_site": "Sungduk Yu, Walter Hannah, Liran Peng, Jerry Lin, Mohamed Aziz Bhouri, Ritwik Gupta, Bj\u00f6rn L\u00fctjens, Justus C. Will, Gunnar Behrens, Julius Busecke, Nora Loose, Charles Stern, Tom Beucler, Bryce Harrop, Benjamin Hillman, Andrea Jenney, Savannah L. Ferretti, Nana Liu, Animashree Anandkumar, Noah Brenowitz, Veronika Eyring, Nicholas Geneva, Pierre Gentine, Stephan Mandt, Jaideep Pathak, Akshay Subramaniam, Carl Vondrick, Rose Yu, Laure Zanna, Tian Zheng, Ryan Abernathey, Fiaz Ahmed, David Bader, Pierre Baldi, Elizabeth Barnes, Christopher Bretherton, Peter Caldwell, Wayne Chuang, Yilun Han, YU HUANG, Fernando Iglesias-Suarez, Sanket Jantre, Karthik Kashinath, Marat Khairoutdinov, Thorsten Kurth, Nicholas Lutsko, Po-Lun Ma, Griffin Mooers, J. David Neelin, David Randall, Sara Shamekh, Mark Taylor, Nathan Urban, Janni Yuval, Guang Zhang, Mike Pritchard", "tldr": "", "abstract": "Modern climate projections lack adequate spatial and temporal resolution due to computational constraints. A consequence is inaccurate and imprecise predictions of critical processes such as storms. Hybrid methods that combine physics with machine learning (ML) have introduced a new generation of higher fidelity climate simulators that can sidestep Moore's Law by outsourcing compute-hungry, short, high-resolution simulations to ML emulators. However, this hybrid ML-physics simulation approach requires domain-specific treatment and has been inaccessible to ML experts because of lack of training data and relevant, easy-to-use workflows. We present ClimSim, the largest-ever dataset designed for hybrid ML-physics research. It comprises multi-scale climate simulations, developed by a consortium of climate scientists and ML researchers. It consists of 5.7 billion pairs of multivariate input and output vectors that isolate the influence of locally-nested, high-resolution, high-fidelity physics on a host climate simulator's macro-scale physical state.\n\nThe dataset is global in coverage, spans multiple years at high sampling frequency, and is designed such that resulting emulators are compatible with downstream coupling into operational climate simulators. We implement a range of deterministic and stochastic regression baselines to highlight the ML challenges and their scoring. The data (https://huggingface.co/datasets/LEAP/ClimSim_high-res) and code (https://leap-stc.github.io/ClimSim) are released openly to support the development of hybrid ML-physics and high-fidelity climate simulations for the benefit of science and society.", "keywords": "climate;climate modeling;benchmark;dataset;baseline;emulation;superparameterization;multi-scale modeling framework;physics-informed machine learning", "primary_area": "", "supplementary_material": "/attachment/39bb8dd7507ab2e5414600532a5317c28039a3ef.pdf", "author": "Sungduk Yu;Walter Hannah;Liran Peng;Jerry Lin;Mohamed Aziz Bhouri;Ritwik Gupta;Bj\u00f6rn L\u00fctjens;Justus Christopher Will;Gunnar Behrens;Julius Busecke;Nora Loose;Charles I Stern;Tom Beucler;Bryce Harrop;Benjamin R Hillman;Andrea Jenney;Savannah Ferretti;Nana Liu;Anima Anandkumar;Noah D Brenowitz;Veronika Eyring;Nicholas Geneva;Pierre Gentine;Stephan Mandt;Jaideep Pathak;Akshay Subramaniam;Carl Vondrick;Rose Yu;Laure Zanna;Tian Zheng;Ryan Abernathey;Fiaz Ahmed;David C Bader;Pierre Baldi;Elizabeth Barnes;Christopher Bretherton;Peter Caldwell;Wayne Chuang;Yilun Han;YU HUANG;Fernando Iglesias-Suarez;Sanket Jantre;Karthik Kashinath;Marat Khairoutdinov;Thorsten Kurth;Nicholas Lutsko;Po-Lun Ma;Griffin Mooers;J. David Neelin;David Randall;Sara Shamekh;Mark A Taylor;Nathan Urban;Janni Yuval;Guang Zhang;Michael Pritchard", "authorids": "~Sungduk_Yu1;hannah6@llnl.gov;liranp@uci.edu;jerryl9@uci.edu;~Mohamed_Aziz_Bhouri1;~Ritwik_Gupta1;~Bj\u00f6rn_L\u00fctjens1;~Justus_Christopher_Will1;gunnar.behrens@dlr.de;julius@ldeo.columbia.edu;~Nora_Loose1;~Charles_I_Stern1;tom.beucler@unil.ch;bryce.harrop@pnnl.gov;bhillma@sandia.gov;~Andrea_Jenney1;ferretts@uci.edu;~Nana_Liu1;~Anima_Anandkumar1;nbrenowitz@nvidia.com;~Veronika_Eyring1;~Nicholas_Geneva1;~Pierre_Gentine1;~Stephan_Mandt1;~Jaideep_Pathak1;~Akshay_Subramaniam1;~Carl_Vondrick2;~Rose_Yu1;laure.zanna@nyu.edu;~Tian_Zheng1;~Ryan_Abernathey2;fiaz@ucla.edu;bader2@llnl.gov;~Pierre_Baldi1;eabarnes@colostate.edu;~Christopher_Bretherton1;caldwell19@llnl.gov;wc2227@columbia.edu;hanyilun1993@qq.com;~YU_HUANG10;~Fernando_Iglesias-Suarez1;~Sanket_Jantre1;~Karthik_Kashinath2;~Marat_Khairoutdinov1;tkurth@nvidia.com;nlutsko@ucsd.edu;po-lun.ma@pnnl.gov;gmooers@uci.edu;~J._David_Neelin1;~David_Randall1;ss6287@columbia.edu;mataylo@sandia.gov;~Nathan_Urban2;yaniyuval@gmail.com;~Guang_Zhang1;~Michael_Pritchard1", "gender": ";;;;M;;M;M;;;F;;;;;F;;;;;F;M;M;;M;M;M;F;;F;;;;;;;;;;F;M;;;M;;;;;;;;;M;;;M", "homepage": ";;;;https://gentinelab.eee.columbia.edu/people/aziz-bhouri;;https://blutjens.github.io/;https://www.justuswill.com;;;https://noraloose.github.io/;https://cisaacstern.github.io;;;;https://sites.google.com/ucar.edu/jenney;;;;;http://www.pa.op.dlr.de/~/VeronikaEyring/;https://nicholasgeneva.com/;http://www.gentine.com;;https://research.nvidia.com/person/jaideep-pathak;;http://www.cs.columbia.edu/~vondrick/;http://roseyu.com;;;https://ocean-transport.github.io/;;;;;;;;;;https://www.pa.op.dlr.de/homepages/FernandoIglesiasSuarez/;;https://scholar.google.com/citations?user=RrbWhcYAAAAJ&hl=en&oi=ao;;;;;;;;;;https://www.bnl.gov/staff/nurban/;;;http://sites.ps.uci.edu/pritchard", "dblp": "339/7180;;;;;;;349/4535;;;;;;;;;;;;;;;;;;;26/8610;164/7314;;;;;;;;;;;;;;;;;;;;;;;;;;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;hthNY5QAAAAJ;;AayqHVcAAAAJ;KXlA6XUAAAAJ;;;;;;;;;;;;;;taB-MmEAAAAJ;;;cevw0gkAAAAJ;hNhELoYAAAAJ;3MzhkFIAAAAJ;;;-4J-KZoAAAAJ;t1lmjVkAAAAJ;;;;;;;;;AB9P5wYAAAAJ;;;RrbWhcYAAAAJ;fC9cvaAAAAAJ;;;;;;;;;;;;", "orcid": ";;;;0000-0003-1140-7415;;0000-0002-1616-4830;0009-0000-0013-0161;;;;;;;;;;;;;0000-0002-6887-4885;;;;;0000-0003-0297-0978;;;;0000-0003-4889-0391;0000-0001-5999-4917;;;;;;;;;;0000-0003-3403-8245;;;;;;;;;;;;;;;", "linkedin": "sungduk-yu;;;;;;bjorn-lutjens/;justuswill/;;;;;;;;;;;;;;;;;;akshays-subramaniam;;;;tian-zheng-082402/;;;;;;;;;;;;;;;;;;;;;;;;;;", "or_profile": "~Sungduk_Yu1;hannah6@llnl.gov;liranp@uci.edu;jerryl9@uci.edu;~Mohamed_Aziz_Bhouri1;~Ritwik_Gupta1;~Bj\u00f6rn_L\u00fctjens1;~Justus_Christopher_Will1;gunnar.behrens@dlr.de;julius@ldeo.columbia.edu;~Nora_Loose1;~Charles_I_Stern1;tom.beucler@unil.ch;bryce.harrop@pnnl.gov;bhillma@sandia.gov;~Andrea_Jenney1;ferretts@uci.edu;~Nana_Liu1;~Anima_Anandkumar1;nbrenowitz@nvidia.com;~Veronika_Eyring1;~Nicholas_Geneva1;~Pierre_Gentine1;~Stephan_Mandt1;~Jaideep_Pathak1;~Akshay_Subramaniam1;~Carl_Vondrick2;~Rose_Yu1;laure.zanna@nyu.edu;~Tian_Zheng1;~Ryan_Abernathey2;fiaz@ucla.edu;bader2@llnl.gov;~Pierre_Baldi1;eabarnes@colostate.edu;~Christopher_Bretherton1;caldwell19@llnl.gov;wc2227@columbia.edu;hanyilun1993@qq.com;~YU_HUANG10;~Fernando_Iglesias-Suarez1;~Sanket_Jantre1;~Karthik_Kashinath2;~Marat_Khairoutdinov1;tkurth@nvidia.com;nlutsko@ucsd.edu;po-lun.ma@pnnl.gov;gmooers@uci.edu;~J._David_Neelin1;~David_Randall1;ss6287@columbia.edu;mataylo@sandia.gov;~Nathan_Urban2;yaniyuval@gmail.com;~Guang_Zhang1;~Michael_Pritchard1", "aff": "University of California, Irvine;;;;Columbia University;;Massachusetts Institute of Technology;University of California, Irvine;;;Princeton University;Columbia University;;;;Oregon State University;;;;;DLR, Universit\u00e4t Bremen;NVIDIA;Columbia University;;NVIDIA;NVIDIA;Columbia University;University of California, San Diego;;Columbia University;;;;;;University of Washington;;;;Columbia University;DLR;;;SUNY at Stony Brook;;;;;University of California-Los Angeles;Colorado State University;;;Brookhaven National Laboratory;;University of California-San Diego Scripps Inst of Oceanography;University of California, Irvine", "aff_domain": "uci.edu;;;;columbia.edu;;mit.edu;uci.edu;;;princeton.edu;columbia.edu;;;;oregonstate.edu;;;;;uni-bremen.de;nvidia.com;ee.columbia.edu;;nvidia.com;nvidia.com;columbia.edu;ucsd.edu;;columbia.edu;;;;;;u.washington.edu;;;;columbia.edu;dlr.de;;;sunysb.edu;;;;;;colostate.edu;;;bnl.gov;;;uci.edu", "position": "Researcher;;;;Postdoc;;PhD student;PhD student;;;Postdoc;Researcher;;;;Assistant Professor;;;;;Full Professor;Researcher;Full Professor;;Researcher;Researcher;Assistant Professor;Assistant Professor;;Full Professor;;;;;;;;;;PhD student;Researcher;;;Full Professor;;;;;;;;;Researcher;;;Associate Professor", "bibtex": "@inproceedings{\nyu2023climsim,\ntitle={ClimSim: A large multi-scale dataset for hybrid physics-{ML} climate emulation},\nauthor={Sungduk Yu and Walter Hannah and Liran Peng and Jerry Lin and Mohamed Aziz Bhouri and Ritwik Gupta and Bj{\\\"o}rn L{\\\"u}tjens and Justus Christopher Will and Gunnar Behrens and Julius Busecke and Nora Loose and Charles I Stern and Tom Beucler and Bryce Harrop and Benjamin R Hillman and Andrea Jenney and Savannah Ferretti and Nana Liu and Anima Anandkumar and Noah D Brenowitz and Veronika Eyring and Nicholas Geneva and Pierre Gentine and Stephan Mandt and Jaideep Pathak and Akshay Subramaniam and Carl Vondrick and Rose Yu and Laure Zanna and Tian Zheng and Ryan Abernathey and Fiaz Ahmed and David C Bader and Pierre Baldi and Elizabeth Barnes and Christopher Bretherton and Peter Caldwell and Wayne Chuang and Yilun Han and YU HUANG and Fernando Iglesias-Suarez and Sanket Jantre and Karthik Kashinath and Marat Khairoutdinov and Thorsten Kurth and Nicholas Lutsko and Po-Lun Ma and Griffin Mooers and J. David Neelin and David Randall and Sara Shamekh and Mark A Taylor and Nathan Urban and Janni Yuval and Guang Zhang and Michael Pritchard},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=W5If9P1xqO}\n}", "github": "", "project": "", "reviewers": "HQ4J;pLrq;PWUd;ujho;uz8R", "pdf_size": 3593263, "rating": "7;8;9;9;10", "confidence": "4;3;4;4;4", "wc_summary_and_contributions": "83;75;65;110;146", "wc_strengths": "42;220;60;207;56", "wc_improvement": "105;834;2;325;191", "wc_limitations": "10;15;31;76;13", "wc_correctness": "8;14;6;11;56", "wc_clarity": "1;12;25;24;15", "wc_relation_to_prior_work": "14;11;26;11;6", "wc_documentation": "6;4;28;163;56", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "270;1186;244;928;540", "wc_reply_reviewers": "19;101;14;49;16", "wc_reply_authors": "648;2780;531;1424;883", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;5;2;3;3", "rating_avg": [ 8.6, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 95.8, 29.2123261655076 ], "wc_strengths_avg": [ 117.0, 79.1252172192911 ], "wc_improvement_avg": [ 291.4, 291.24601284824485 ], "wc_limitations_avg": [ 29.0, 24.60081299469593 ], "wc_correctness_avg": [ 19.0, 18.69759342803239 ], "wc_clarity_avg": [ 15.4, 8.777243302996677 ], "wc_relation_to_prior_work_avg": [ 13.6, 6.711184694225007 ], "wc_documentation_avg": [ 51.4, 58.87478237751712 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 633.6, 370.01924274286057 ], "wc_reply_reviewers_avg": [ 39.8, 33.15056560603453 ], "wc_reply_authors_avg": [ 1253.2, 822.8218276151891 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 1.0954451150103321 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 56, 0 ], "corr_rating_confidence": 0.29417420270727607, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10131705469830460910&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "uci.edu;;;;columbia.edu;;mit.edu;uci.edu;;;princeton.edu;columbia.edu;;;;oregonstate.edu;;;;;uni-bremen.de;nvidia.com;ee.columbia.edu;;nvidia.com;nvidia.com;columbia.edu;ucsd.edu;;columbia.edu;;;;;;u.washington.edu;;;;columbia.edu;dlr.de;;;sunysb.edu;;;;;;colostate.edu;;;bnl.gov;;;uci.edu", "author_num": 56, "aff_unique_index": "0;1;2;0;3;1;4;5;6;1;6;6;1;7;1;8;1;5;9;10;11;12;7;0", "aff_unique_norm": "University of California, Irvine;Columbia University;Massachusetts Institute of Technology;Princeton University;Oregon State University;Deutsches Zentrum f\u00fcr Luft- und Raumfahrt;NVIDIA;University of California, San Diego;University of Washington;State University of New York at Stony Brook;University of California, Los Angeles;Colorado State University;Brookhaven National Laboratory", "aff_unique_dep": ";;;;;;NVIDIA Corporation;;;;;;", "aff_unique_url": "https://www.uci.edu;https://www.columbia.edu;https://web.mit.edu;https://www.princeton.edu;https://oregonstate.edu;https://www.dlr.de;https://www.nvidia.com;https://www.ucsd.edu;https://www.washington.edu;https://www.stonybrook.edu;https://www.ucla.edu;https://www.colostate.edu;https://www.bnl.gov", "aff_unique_abbr": "UCI;Columbia;MIT;Princeton;OSU;DLR;NVIDIA;UCSD;UW;SUNY Stony Brook;UCLA;CSU;BNL", "aff_campus_unique_index": "0;0;2;3;4;5;3;0", "aff_campus_unique": "Irvine;;Bremen;San Diego;Stony Brook;Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0", "aff_country_unique": "United States;Germany" }, { "title": "Black-box Backdoor Defense via Zero-shot Image Purification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71421", "id": "W6U2xSbiE1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b36554b97da741b1c48c9de05c73993e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=W6U2xSbiE1", "openreview": "https://openreview.net/forum?id=W6U2xSbiE1", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71421", "video": "https://nips.cc/virtual/2023/poster/71421", "author_site": "Yucheng Shi, Mengnan Du, Xuansheng Wu, Zihan Guan, Jin Sun, Ninghao Liu", "tldr": "", "abstract": "Backdoor attacks inject poisoned samples into the training data, resulting in the misclassification of the poisoned input during a model's deployment. Defending against such attacks is challenging, especially for real-world black-box models where only query access is permitted. In this paper, we propose a novel defense framework against backdoor attacks through Zero-shot Image Purification (ZIP). Our framework can be applied to poisoned models without requiring internal information about the model or any prior knowledge of the clean/poisoned samples. Our defense framework involves two steps. First, we apply a linear transformation (e.g., blurring) on the poisoned image to destroy the backdoor pattern. Then, we use a pre-trained diffusion model to recover the missing semantic information removed by the transformation. In particular, we design a new reverse process by using the transformed image to guide the generation of high-fidelity purified images, which works in zero-shot settings. We evaluate our ZIP framework on multiple datasets with different types of attacks. Experimental results demonstrate the superiority of our ZIP framework compared to state-of-the-art backdoor defense baselines. We believe that our results will provide valuable insights for future defense methods for black-box models. Our code is available at https://github.com/sycny/ZIP.", "keywords": "backdoor defense;black-box defense;diffusion model", "primary_area": "", "supplementary_material": "/attachment/e8ed8bf0f408d7e9bcd4d5b9e3c250797f5f9bcc.zip", "author": "Yucheng Shi;Mengnan Du;Xuansheng Wu;Zihan Guan;Jin Sun;Ninghao Liu", "authorids": "~Yucheng_Shi2;~Mengnan_Du1;~Xuansheng_Wu1;~Zihan_Guan2;~Jin_Sun2;~Ninghao_Liu2", "gender": "M;;;M;M;M", "homepage": "https://sycny.github.io/;https://mengnandu.com/;https://github.com/JacksonWuxs;https://jinsungit.github.io/;https://cobweb.cs.uga.edu/~ninghaoliu/;https://guanzihan.github.io/", "dblp": ";183/5606;304/1261;93/1520-11;145/4489;344/0820", "google_scholar": "https://scholar.google.co.uk/citations?hl=en;0i-Js2gAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=scholar.google.com/citations?user=Gw10rFEAAAAJ;Nir-EDYAAAAJ;JmzhiYAAAAAJ", "orcid": "0009-0007-4192-1315;;0000-0002-7816-7658;0009-0004-2926-4023;0000-0002-9170-2424;", "linkedin": ";;;;;", "or_profile": "~Yucheng_Shi2;~Mengnan_Du1;~Xuansheng_Wu1;~Jin_Sun2;~Ninghao_Liu1;~zihan_guan1", "aff": "University of Georgia;New Jersey Institute of Technology;University of Georgia;University of Georgia;University of Georgia;University of Georgia", "aff_domain": "uga.edu;njit.edu;uga.edu;uga.edu;uga.edu;uga.edu", "position": "PhD student;Assistant Professor;PhD student;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nshi2023blackbox,\ntitle={Black-box Backdoor Defense via Zero-shot Image Purification},\nauthor={Yucheng Shi and Mengnan Du and Xuansheng Wu and Zihan Guan and Jin Sun and Ninghao Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=W6U2xSbiE1}\n}", "github": "", "project": "", "reviewers": "8ejw;8w5v;xL3D;L743;mm1a", "pdf_size": 25534136, "rating": "5;5;5;5;6", "confidence": "5;4;3;4;5", "soundness": "3;3;3;2;3", "novelty": "3;3;2;2;2", "presentation": "3;3;3;3;3", "wc_summary": "36;64;79;70;103", "wc_strengths": "31;23;33;45;89", "wc_weaknesses": "18;78;175;193;152", "wc_questions": "46;79;6;55;5", "wc_limitations": "50;6;1;21;5", "wc_review": "181;250;294;384;354", "wc_reply_reviewers": "0;13;27;77;28", "wc_reply_authors": "0;0;0;406;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;3;1", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.4, 21.731083728153088 ], "wc_strengths_avg": [ 44.2, 23.481056194302674 ], "wc_weaknesses_avg": [ 123.2, 65.56645483782084 ], "wc_questions_avg": [ 38.2, 28.798611077619697 ], "wc_limitations_avg": [ 16.6, 18.0288657435791 ], "wc_review_avg": [ 292.6, 72.65700241545889 ], "wc_reply_reviewers_avg": [ 29.0, 26.099808428415717 ], "wc_reply_authors_avg": [ 81.2, 162.4 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5345224838248488, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10966958864337456240&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "uga.edu;njit.edu;uga.edu;uga.edu;uga.edu;uga.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "University of Georgia;New Jersey Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.uga.edu;https://www.njit.edu", "aff_unique_abbr": "UGA;NJIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "UDC-SIT: A Real-World Dataset for Under-Display Cameras", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73568", "id": "W6xb7bkbYA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d612971396f825dbf8e0e736f99a1955-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=W6xb7bkbYA", "openreview": "https://openreview.net/forum?id=W6xb7bkbYA", "poster": "/media/PosterPDFs/NeurIPS%202023/73568.png?t=1702662590.2655802", "slides": "https://nips.cc/virtual/2023/poster/73568", "video": "https://nips.cc/virtual/2023/poster/73568", "author_site": "Kyusu Ahn, Byeonghyun Ko, HyunGyu Lee, Chanwoo Park, Jaejin Lee", "tldr": "", "abstract": "Under Display Camera (UDC) is a novel imaging system that mounts a digital camera lens beneath a display panel with the panel covering the camera. However, the display panel causes severe degradation to captured images, such as low transmittance, blur, noise, and flare. The restoration of UDC-degraded images is challenging because of the unique luminance and diverse patterns of flares. Existing UDC dataset studies focus on unrealistic or synthetic UDC degradation rather than real-world UDC images. In this paper, we propose a real-world UDC dataset called UDC-SIT. To obtain the non-degraded and UDC-degraded images for the same scene, we propose an image-capturing system and an image alignment technique that exploits discrete Fourier transform (DFT) to align a pair of captured images. UDC-SIT also includes comprehensive annotations missing from other UDC datasets, such as light source, day/night, indoor/outdoor, and flare components (e.g., shimmers, streaks, and glares). We compare UDC-SIT with four existing representative UDC datasets and present the problems with existing UDC datasets. To show UDC-SIT's effectiveness, we compare UDC-SIT and a representative synthetic UDC dataset using four representative learnable image restoration models. The result indicates that the models trained with the synthetic UDC dataset are impractical because the synthetic UDC dataset does not reflect the actual characteristics of UDC-degraded images. UDC-SIT can enable further exploration in the UDC image restoration area and provide better insights into the problem. UDC-SIT is available at: https://github.com/mcrl/UDC-SIT.", "keywords": "Under-display camera;Image restoration;Real-world dataset;Alignment of paired images;Fourier transform", "primary_area": "", "supplementary_material": "", "author": "Kyusu Ahn;Byeonghyun Ko;HyunGyu Lee;Chanwoo Park;Jaejin Lee", "authorids": "~Kyusu_Ahn1;~Byeonghyun_Ko1;~HyunGyu_Lee3;~Chanwoo_Park4;~Jaejin_Lee1", "gender": ";M;M;M;M", "homepage": "http://thunder.snu.ac.kr/~kyusu/;http://aces.snu.ac.kr/~byeonghyun/;https://github.com/hyungyulee7;https://github.com/sailor1493;https://sites.google.com/view/jaejinlee", "dblp": "369/7723;;;;30/880.html", "google_scholar": "b7XELUQAAAAJ;;;;6JaKru0AAAAJ", "orcid": "0009-0008-3548-833X;;;0009-0008-1717-1873;0000-0003-4638-8170", "linkedin": ";;;;", "or_profile": "~Kyusu_Ahn1;~Byeonghyun_Ko1;~HyunGyu_Lee3;~Chanwoo_Park4;~Jaejin_Lee1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;MS student;MS student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nahn2023udcsit,\ntitle={{UDC}-{SIT}: A Real-World Dataset for Under-Display Cameras},\nauthor={Kyusu Ahn and Byeonghyun Ko and HyunGyu Lee and Chanwoo Park and Jaejin Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=W6xb7bkbYA}\n}", "github": "", "project": "", "reviewers": "7scG;6C5X;iqzz;YpcN", "pdf_size": 22470254, "rating": "6;6;6;6", "confidence": "4;3;5;4", "wc_summary_and_contributions": "46;81;105;78", "wc_strengths": "27;18;107;46", "wc_improvement": "30;82;103;3", "wc_limitations": "1;92;6;125", "wc_correctness": "1;6;26;1", "wc_clarity": "1;13;11;1", "wc_relation_to_prior_work": "1;2;15;1", "wc_documentation": "4;6;89;7", "wc_additional_feedback": "1;1;1;1", "wc_review": "112;301;463;263", "wc_reply_reviewers": "6;0;0;0", "wc_reply_authors": "221;764;557;470", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 77.5, 20.982135258357285 ], "wc_strengths_avg": [ 49.5, 34.70230539892127 ], "wc_improvement_avg": [ 54.5, 39.87793876318083 ], "wc_limitations_avg": [ 56.0, 53.809850399346026 ], "wc_correctness_avg": [ 8.5, 10.307764064044152 ], "wc_clarity_avg": [ 6.5, 5.545268253204709 ], "wc_relation_to_prior_work_avg": [ 4.75, 5.931905258852336 ], "wc_documentation_avg": [ 26.5, 36.10055401236939 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 284.75, 124.85266316743107 ], "wc_reply_reviewers_avg": [ 1.5, 2.598076211353316 ], "wc_reply_authors_avg": [ 503.0, 194.71132478620754 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=991471055121069786&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "BadTrack: A Poison-Only Backdoor Attack on Visual Object Tracking", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71420", "id": "W9pJx9sFCh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/828bb8f42d4ab15322b9315151959c61-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=W9pJx9sFCh", "openreview": "https://openreview.net/forum?id=W9pJx9sFCh", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71420", "video": "https://nips.cc/virtual/2023/poster/71420", "author_site": "Bin Huang, Jiaqian Yu, Yiwei Chen, Siyang Pan, Qiang Wang, Zhi Wang", "tldr": "", "abstract": "Visual object tracking (VOT) is one of the most fundamental tasks in computer vision community. State-of-the-art VOT trackers extract positive and negative examples that are used to guide the tracker to distinguish the object from the background. In this paper, we show that this characteristic can be exploited to introduce new threats and hence propose a simple yet effective poison-only backdoor attack. To be specific, we poison a small part of the training data by attaching a predefined trigger pattern to the background region of each video frame, so that the trigger appears almost exclusively in the extracted negative examples. To the best of our knowledge, this is the first work that reveals the threat of poison-only backdoor attack on VOT trackers. We experimentally show that our backdoor attack can significantly degrade the performance of both two-stream Siamese and one-stream Transformer trackers on the poisoned data while gaining comparable performance with the benign trackers on the clean data.", "keywords": "Backdoor Attack;Visual Object Tracking;Deep Learning;Poison-Only", "primary_area": "", "supplementary_material": "/attachment/aedf76af909f7049b4eba6770dff05bb2e4a94f1.pdf", "author": "Bin Huang;Jiaqian Yu;Yiwei Chen;Siyang Pan;Qiang Wang;Zhi Wang", "authorids": "~Bin_Huang7;~Jiaqian_Yu1;~Yiwei_Chen4;~Siyang_Pan1;~Qiang_Wang1;~Zhi_Wang5", "gender": ";F;M;M;M;M", "homepage": "https://github.com/huangbinary;;;;http://zwang.inflexionlab.org/;", "dblp": ";164/7325;250/5753;64/5630-23;95/6543-1;", "google_scholar": ";8f7l1dIAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;PK8BtpwAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;0000-0001-5632-4408;0000-0002-5462-6178;", "linkedin": ";;;;zhi-wang-b159071a/;", "or_profile": "~Bin_Huang7;~Jiaqian_Yu1;~Siyang_Pan1;~Qiang_Wang1;~Zhi_Wang5;~Chen_Yiwei1", "aff": "Electronic Engineering, Tsinghua University, Tsinghua University;Samsung R&D Institute China - Beijing;Samsung;Samsung;SIGS, Tsinghua University;Samsung Research China-Beijing", "aff_domain": "mails.tsinghua.edu.cn;samsung.com;samsung.com;samsung.com;tsinghua.edu.cn;samsung.com", "position": "MS student;Researcher;Researcher;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nhuang2023badtrack,\ntitle={BadTrack: A Poison-Only Backdoor Attack on Visual Object Tracking},\nauthor={Bin Huang and Jiaqian Yu and Yiwei Chen and Siyang Pan and Qiang Wang and Zhi Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=W9pJx9sFCh}\n}", "github": "", "project": "", "reviewers": "hJ37;Xirk;dXuB;4718", "pdf_size": 10471163, "rating": "4;5;5;6", "confidence": "4;4;3;2", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "87;50;74;111", "wc_strengths": "51;41;123;28", "wc_weaknesses": "101;41;50;73", "wc_questions": "20;72;3;16", "wc_limitations": "37;7;2;81", "wc_review": "296;211;252;309", "wc_reply_reviewers": "18;23;28;45", "wc_reply_authors": "36;22;20;44", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.5, 22.051077071199945 ], "wc_strengths_avg": [ 60.75, 36.85359548266627 ], "wc_weaknesses_avg": [ 66.25, 23.209642392764263 ], "wc_questions_avg": [ 27.75, 26.309456474811487 ], "wc_limitations_avg": [ 31.75, 31.427495923156208 ], "wc_review_avg": [ 267.0, 38.61994303465504 ], "wc_reply_reviewers_avg": [ 28.5, 10.161200716450788 ], "wc_reply_authors_avg": [ 30.5, 9.937303457175895 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8528028654224418, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6099707047969232450&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "mails.tsinghua.edu.cn;samsung.com;samsung.com;samsung.com;tsinghua.edu.cn;samsung.com", "author_num": 6, "aff_unique_index": "0;1;1;1;0;1", "aff_unique_norm": "Tsinghua University;Samsung", "aff_unique_dep": "Electronic Engineering;Samsung R&D Institute China", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.samsung.com/cn", "aff_unique_abbr": "THU;SRC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "China;South Korea" }, { "title": "Nonparametric Boundary Geometry in Physics Informed Deep Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71419", "id": "WAd5ZRdFoc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/13aef57cf532e88c476a10ff372e44e5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WAd5ZRdFoc", "openreview": "https://openreview.net/forum?id=WAd5ZRdFoc", "poster": "/media/PosterPDFs/NeurIPS%202023/71419.png?t=1702484403.3592372", "slides": "https://nips.cc/virtual/2023/poster/71419", "video": "https://nips.cc/virtual/2023/poster/71419", "author_site": "Scott Cameron, Arnu Pretorius, S Roberts", "tldr": "", "abstract": "Engineering design problems frequently require solving systems of\npartial differential equations with boundary conditions specified on\nobject geometries in the form of a triangular mesh. These boundary\ngeometries are provided by a designer and are problem dependent.\nThe efficiency of the design process greatly benefits from fast turnaround\ntimes when repeatedly solving PDEs on various geometries. However,\nmost current work that uses machine learning to speed up the solution\nprocess relies heavily on a fixed parameterization of the geometry, which\ncannot be changed after training. This severely limits the possibility of\nreusing a trained model across a variety of design problems.\nIn this work, we propose a novel neural operator architecture which accepts\nboundary geometry, in the form of triangular meshes, as input and produces an\napproximate solution to a given PDE as output. Once trained, the model can be\nused to rapidly estimate the PDE solution over a new geometry, without the need for\nretraining or representation of the geometry to a pre-specified parameterization.", "keywords": "PINNs;physics informed neural networks;geometric deep learning;neural operator;PDEs", "primary_area": "", "supplementary_material": "/attachment/27f23b1556f5e6bd5dfca560bd0ce62853116aff.zip", "author": "Scott Alexander Cameron;Arnu Pretorius;Stephen J. Roberts", "authorids": "~Scott_Alexander_Cameron1;~Arnu_Pretorius1;~Stephen_J._Roberts1", "gender": "M;M;M", "homepage": ";;http://www.robots.ox.ac.uk/~sjrob", "dblp": ";188/4368;64/1485", "google_scholar": ";zZ6ydrAAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-3830-6673;;0000-0002-9305-9268", "linkedin": "scott-cameron-93a215bb/;arnupretorius/;", "or_profile": "~Scott_Alexander_Cameron1;~Arnu_Pretorius1;~Stephen_J._Roberts1", "aff": "University of Oxford;InstaDeep;University of Oxford", "aff_domain": "ox.ac.uk;instadeep.com;ox.ac.uk", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\ncameron2023nonparametric,\ntitle={Nonparametric Boundary Geometry in Physics Informed Deep Learning},\nauthor={Scott Alexander Cameron and Arnu Pretorius and Stephen J. Roberts},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WAd5ZRdFoc}\n}", "github": "", "project": "", "reviewers": "L8FG;4pp2;ECF7;mGk1", "pdf_size": 28569399, "rating": "3;5;5;7", "confidence": "2;3;2;4", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "2;2;2;3", "wc_summary": "57;143;102;167", "wc_strengths": "27;103;59;77", "wc_weaknesses": "109;353;131;94", "wc_questions": "24;4;3;67", "wc_limitations": "29;10;5;1", "wc_review": "246;613;300;406", "wc_reply_reviewers": "65;82;0;140", "wc_reply_authors": "0;29;0;49", "reply_reviewers": "1;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 117.25, 41.83524232032127 ], "wc_strengths_avg": [ 66.5, 27.654113618049667 ], "wc_weaknesses_avg": [ 171.75, 105.46889351842087 ], "wc_questions_avg": [ 24.5, 25.927784324928346 ], "wc_limitations_avg": [ 11.25, 10.732543966832841 ], "wc_review_avg": [ 391.25, 140.36982403636475 ], "wc_reply_reviewers_avg": [ 71.75, 49.89175783634006 ], "wc_reply_authors_avg": [ 19.5, 20.74246851269154 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3431209750736681156&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ox.ac.uk;instadeep.com;ox.ac.uk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Oxford;InstaDeep", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.instadeep.com", "aff_unique_abbr": "Oxford;InstaDeep", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "NCDL: A Framework for Deep Learning on non-Cartesian Lattices", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71418", "id": "WBXYGBQXiB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fab489de1a3224f0394d8f1d3c3213a8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WBXYGBQXiB", "openreview": "https://openreview.net/forum?id=WBXYGBQXiB", "poster": "/media/PosterPDFs/NeurIPS%202023/71418.png?t=1701705754.3542912", "slides": "https://nips.cc/virtual/2023/poster/71418", "video": "https://nips.cc/virtual/2023/poster/71418", "author_site": "Joshua Horacsek, Usman Alim", "tldr": "", "abstract": "The use of non-Cartesian grids is a niche but important topic in sub-fields of the numerical sciences such as simulation and scientific visualization. However, non-Cartesian approaches are virtually unexplored in machine learning. This is likely due to the difficulties in the representation of data on non-Cartesian domains and the lack of support for standard machine learning operations on non-Cartesian data. This paper proposes a new data structure called the lattice tensor which generalizes traditional tensor spatio-temporal operations to lattice tensors, enabling the use of standard machine learning algorithms on non-Cartesian data. However, data need not reside on a non-Cartesian structure, we use non-Dyadic downsampling schemes to bring Cartesian data into a non-Cartesian space for further processing. We introduce a software library that implements the lattice tensor container (with some common machine learning operations), and demonstrate its effectiveness. Our method provides a general framework for machine learning on non-Cartesian domains, addressing the challenges mentioned above and filling a gap in the current literature.", "keywords": "Computer Vision and Pattern Recognition", "primary_area": "", "supplementary_material": "/attachment/5907a70112899bd4412f162b90dce3ac7e53a0d8.zip", "author": "Joshua John Horacsek;Usman Alim", "authorids": "~Joshua_John_Horacsek1;~Usman_Alim1", "gender": "M;M", "homepage": ";http://www.cpsc.ucalgary.ca/~ualim", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": "joshua-horacsek/;", "or_profile": "~Joshua_John_Horacsek1;~Usman_Alim1", "aff": "University of Calgary;University of Calgary", "aff_domain": "ucalgary.ca;ucalgary.ca", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nhoracsek2023ncdl,\ntitle={{NCDL}: A Framework for Deep Learning on non-Cartesian Lattices},\nauthor={Joshua John Horacsek and Usman Alim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WBXYGBQXiB}\n}", "github": "", "project": "", "reviewers": "aoAr;Pd5c;Y55j;uhBv", "pdf_size": 2177016, "rating": "4;5;8;8", "confidence": "3;3;4;4", "soundness": "2;3;4;4", "novelty": "2;3;4;4", "presentation": "2;3;3;4", "wc_summary": "130;31;84;98", "wc_strengths": "46;43;52;132", "wc_weaknesses": "232;66;50;36", "wc_questions": "90;67;65;0", "wc_limitations": "22;11;26;0", "wc_review": "520;218;277;266", "wc_reply_reviewers": "0;0;52;29", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 85.75, 35.73776014245996 ], "wc_strengths_avg": [ 68.25, 36.948443810260805 ], "wc_weaknesses_avg": [ 96.0, 79.23383115816122 ], "wc_questions_avg": [ 55.5, 33.514922049737784 ], "wc_limitations_avg": [ 14.75, 10.133484099755622 ], "wc_review_avg": [ 320.25, 117.44014432893039 ], "wc_reply_reviewers_avg": [ 20.25, 21.821720830401986 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9801960588196068, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13424058407512328883&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ucalgary.ca;ucalgary.ca", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Calgary", "aff_unique_dep": "", "aff_unique_url": "https://www.ucalgary.ca", "aff_unique_abbr": "U of C", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Top-Ambiguity Samples Matter: Understanding Why Deep Ensemble Works in Selective Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71417", "id": "WBq6Q4ml04", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6f7fa4df2c8a79c164d3697898a32bd9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WBq6Q4ml04", "openreview": "https://openreview.net/forum?id=WBq6Q4ml04", "poster": "/media/PosterPDFs/NeurIPS%202023/71417.png?t=1701526258.1894898", "slides": "https://nips.cc/virtual/2023/poster/71417", "video": "https://nips.cc/virtual/2023/poster/71417", "author_site": "Qiang Ding, Yixuan Cao, Ping Luo", "tldr": "", "abstract": "Selective classification allows a machine learning model to reject some hard inputs and thus improve the reliability of its predictions. In this area, the ensemble method is powerful in practice, but there has been no solid analysis on why the ensemble method works. Inspired by an interesting empirical result that the improvement of the ensemble largely comes from top-ambiguity samples where its member models diverge, we prove that, based on some assumptions, the ensemble has a lower selective risk than the member model for any coverage within a range. The proof is nontrivial since the selective risk is a non-convex function of the model prediction. The assumptions and the theoretical results are supported by systematic experiments on both computer vision and natural language processing tasks.", "keywords": "selective classification;uncertainty estimation;ensemble learning", "primary_area": "", "supplementary_material": "/attachment/1a182ec45a9709673b8a614bbc951cb8c32cd1e3.zip", "author": "Qiang Ding;Yixuan Cao;Ping Luo", "authorids": "~Qiang_Ding1;~Yixuan_Cao1;~Ping_Luo1", "gender": "M;M;M", "homepage": "https://github.com/DingQiang2018;https://yixuancao.github.io/;https://ping-luo.github.io/", "dblp": ";217/4359;54/4989-1.html", "google_scholar": ";Q5XWFacAAAAJ;", "orcid": ";0000-0002-1721-5927;", "linkedin": ";;", "or_profile": "~Qiang_Ding1;~Yixuan_Cao1;~Ping_Luo1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nding2023topambiguity,\ntitle={Top-Ambiguity Samples Matter: Understanding Why Deep Ensemble Works in Selective Classification},\nauthor={Qiang Ding and Yixuan Cao and Ping Luo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WBq6Q4ml04}\n}", "github": "", "project": "", "reviewers": "qEtZ;rxSh;Y1pj;f6Kz", "pdf_size": 1466215, "rating": "4;4;6;8", "confidence": "4;3;4;3", "soundness": "2;2;3;4", "novelty": "3;2;2;4", "presentation": "2;3;3;3", "wc_summary": "51;101;92;67", "wc_strengths": "9;47;60;37", "wc_weaknesses": "232;90;134;27", "wc_questions": "2;29;183;5", "wc_limitations": "41;20;25;7", "wc_review": "335;287;494;143", "wc_reply_reviewers": "17;133;205;27", "wc_reply_authors": "21;386;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;4;1;1", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.75, 19.84155991851447 ], "wc_strengths_avg": [ 38.25, 18.7533330370897 ], "wc_weaknesses_avg": [ 120.75, 74.64373717868098 ], "wc_questions_avg": [ 54.75, 74.78093005573012 ], "wc_limitations_avg": [ 23.25, 12.173228823939851 ], "wc_review_avg": [ 314.75, 125.30836963267856 ], "wc_reply_reviewers_avg": [ 95.5, 77.86366289868465 ], "wc_reply_authors_avg": [ 101.75, 164.33559413590228 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15677408108933598227&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "ict.ac.cn;ict.ac.cn;ict.ac.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology", "aff_unique_url": "http://www.ict.ac.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Rank-N-Contrast: Learning Continuous Representations for Regression", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71416", "id": "WHedsAeatp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/39e9c5913c970e3e49c2df629daff636-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WHedsAeatp", "openreview": "https://openreview.net/forum?id=WHedsAeatp", "poster": "/media/PosterPDFs/NeurIPS%202023/71416.png?t=1698697633.7740283", "slides": "https://nips.cc/virtual/2023/poster/71416", "video": "https://nips.cc/virtual/2023/poster/71416", "author_site": "Kaiwen Zha, Peng Cao, Jeany Son, Yuzhe Yang, Dina Katabi", "tldr": "", "abstract": "Deep regression models typically learn in an end-to-end fashion without explicitly emphasizing a regression-aware representation. Consequently, the learned representations exhibit fragmentation and fail to capture the continuous nature of sample orders, inducing suboptimal results across a wide range of regression tasks. To fill the gap, we propose Rank-N-Contrast (RNC), a framework that learns continuous representations for regression by contrasting samples against each other based on their rankings in the target space. We demonstrate, theoretically and empirically, that RNC guarantees the desired order of learned representations in accordance with the target orders, enjoying not only better performance but also significantly improved robustness, efficiency, and generalization. Extensive experiments using five real-world regression datasets that span computer vision, human-computer interaction, and healthcare verify that RNC achieves state-of-the-art performance, highlighting its intriguing properties including better data efficiency, robustness to spurious targets and data corruptions, and generalization to distribution shifts.", "keywords": "regression;representation learning;continuity", "primary_area": "", "supplementary_material": "/attachment/129484ce22bb089b8bcde526aa6ee15d8a612030.zip", "author": "Kaiwen Zha;Peng Cao;Jeany Son;Yuzhe Yang;Dina Katabi", "authorids": "~Kaiwen_Zha3;~Peng_Cao1;~Jeany_Son1;~Yuzhe_Yang1;~Dina_Katabi1", "gender": "M;F;F;M;", "homepage": ";http://people.csail.mit.edu/pengcao/;https://jeanyson.github.io/;https://people.csail.mit.edu/yuzhe/;", "dblp": "213/6159;;03/8738;213/0962;k/DinaKatabi", "google_scholar": "xLFtb08AAAAJ;UlaXT00AAAAJ;JVliNv8AAAAJ;0_bSbIoAAAAJ;", "orcid": ";0000-0003-2014-5015;;0000-0002-7634-8295;", "linkedin": ";peng-cao-5a1401256/;;yuzhe-yang-6809b2131/;", "or_profile": "~Kaiwen_Zha3;~Peng_Cao1;~Jeany_Son1;~Yuzhe_Yang1;~Dina_Katabi1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Gwangju Institute of Science and Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;gist.ac.kr;mit.edu;mit.edu", "position": "PhD student;PhD student;Assistant Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nzha2023rankncontrast,\ntitle={Rank-N-Contrast: Learning Continuous Representations for Regression},\nauthor={Kaiwen Zha and Peng Cao and Jeany Son and Yuzhe Yang and Dina Katabi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WHedsAeatp}\n}", "github": "", "project": "", "reviewers": "2Kp1;wKSy;6JYz;dBkT;D7S9", "pdf_size": 16592806, "rating": "6;7;7;8;8", "confidence": "2;4;4;4;3", "soundness": "2;3;4;4;3", "novelty": "2;3;4;3;4", "presentation": "2;3;4;4;4", "wc_summary": "36;116;57;55;62", "wc_strengths": "25;184;154;123;100", "wc_weaknesses": "136;220;114;138;296", "wc_questions": "3;10;5;164;10", "wc_limitations": "14;5;1;96;55", "wc_review": "214;535;331;576;523", "wc_reply_reviewers": "42;6;19;222;47", "wc_reply_authors": "99;0;0;357;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 7.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 65.2, 26.887915501206116 ], "wc_strengths_avg": [ 117.2, 54.12356233656465 ], "wc_weaknesses_avg": [ 180.8, 67.9835274165735 ], "wc_questions_avg": [ 38.4, 62.86048043087167 ], "wc_limitations_avg": [ 34.2, 36.37251709739099 ], "wc_review_avg": [ 435.8, 139.48390588164645 ], "wc_reply_reviewers_avg": [ 67.2, 78.83501760004879 ], "wc_reply_authors_avg": [ 91.2, 138.3204973964452 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5345224838248488, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9936194592286663384&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "mit.edu;mit.edu;gist.ac.kr;mit.edu;mit.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Gwangju Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.gist.ac.kr", "aff_unique_abbr": "MIT;GIST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Gwangju", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;South Korea" }, { "title": "Contrastive Modules with Temporal Attention for Multi-Task Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71415", "id": "WIrZh2XxLT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/72802bef5cf1a3449e909b20c2ae18d5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WIrZh2XxLT", "openreview": "https://openreview.net/forum?id=WIrZh2XxLT", "poster": "/media/PosterPDFs/NeurIPS%202023/71415.png?t=1700223977.3048143", "slides": "https://nips.cc/virtual/2023/poster/71415", "video": "https://nips.cc/virtual/2023/poster/71415", "author_site": "Siming Lan, Rui Zhang, Qi Yi, Jiaming Guo, Shaohui Peng, Yunkai Gao, Fan Wu, Ruizhi Chen, Zidong Du, Xing Hu, xishan zhang, Ling Li, Yunji Chen", "tldr": "", "abstract": "In the field of multi-task reinforcement learning, the modular principle, which involves specializing functionalities into different modules and combining them appropriately, has been widely adopted as a promising approach to prevent the negative transfer problem that performance degradation due to conflicts between tasks. However, most of the existing multi-task RL methods only combine shared modules at the task level, ignoring that there may be conflicts within the task. In addition, these methods do not take into account that without constraints, some modules may learn similar functions, resulting in restricting the model's expressiveness and generalization capability of modular methods.\nIn this paper, we propose the Contrastive Modules with Temporal Attention(CMTA) method to address these limitations. CMTA constrains the modules to be different from each other by contrastive learning and combining shared modules at a finer granularity than the task level with temporal attention, alleviating the negative transfer within the task and improving the generalization ability and the performance for multi-task RL.\nWe conducted the experiment on Meta-World, a multi-task RL benchmark containing various robotics manipulation tasks. Experimental results show that CMTA outperforms learning each task individually for the first time and achieves substantial performance improvements over the baselines.", "keywords": "reinforcement learning;multi-task learning;contrastive learning", "primary_area": "", "supplementary_material": "/attachment/cc6fd049a2e972d7f21ef902a045aca9058c2004.zip", "author": "Siming Lan;Rui Zhang;Qi Yi;Jiaming Guo;Shaohui Peng;Yunkai Gao;Fan Wu;Ruizhi Chen;Zidong Du;Xing Hu;Xishan Zhang;Ling Li;Yunji Chen", "authorids": "~Siming_Lan1;~Rui_Zhang1;~Qi_Yi1;~Jiaming_Guo2;~Shaohui_Peng2;~Yunkai_Gao1;~Fan_Wu11;~Ruizhi_Chen3;~Zidong_Du1;~Xing_Hu3;~Xishan_Zhang1;~Ling_Li6;~Yunji_Chen1", "gender": "M;F;M;M;M;M;M;;F;;F;M;M", "homepage": "https://github.com/niiceMing;;;;;http://fanwu.academic.site/;;https://zidongdu.github.io/;;;;;", "dblp": ";60/2536-40;295/8813;63/8512;44/8056-1.html;;120/4143;44/11216;49/10052-1;133/6391;92/5001-1;48/474;246/8768", "google_scholar": ";dse6jAsAAAAJ;veu6_ykAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;x_wFaYgAAAAJ;https://scholar.google.com.sg/citations?user=8N9ym9YAAAAJ;Hc3iRxUAAAAJ;;;;", "orcid": ";;;;0000-0003-4126-7441;;0000-0001-7219-4658;0000-0002-7603-4210;;;0000-0001-8877-9052;;", "linkedin": ";;;;;;;;;;;;", "or_profile": "~Siming_Lan1;~Rui_Zhang1;~Qi_Yi1;~Jiaming_Guo2;~Yunkai_Gao1;~Fan_Wu11;~Ruizhi_Chen3;~Zidong_Du1;~Xing_Hu3;~Xishan_Zhang1;~Ling_Li6;~Yunji_Chen1;~shaohui_peng1", "aff": "University of Science and Technology of China;Institute of Computing Technology, CAS;University of Science and Technology of China;Institute of Computing Technology, Chinese Academy of Sciences;University of Science and Technology of China;University of Chinese Academy of Sciences, Tsinghua University;Institute of Software Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;, Cambricon Techonologies;Institute of Software, CAS;Institute of Computing Technology, Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_domain": "ustc.edu.cn;ict.ac.cn;ustc.edu.cn;ict.ac.cn;ustc.edu.cn;ucas.edu.cn;iscas.ac.cn;ict.ac.cn;ict.ac.cn;cambricon.com;iscas.ac.cn;ict.ac.cn;ict.ac.cn", "position": "PhD student;Assistant Professor;PhD student;PhD student;PhD student;PhD student;Assistant Professor;Full Professor;Associate Professor;Researcher;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nlan2023contrastive,\ntitle={Contrastive Modules with Temporal Attention for Multi-Task Reinforcement Learning},\nauthor={Siming Lan and Rui Zhang and Qi Yi and Jiaming Guo and Shaohui Peng and Yunkai Gao and Fan Wu and Ruizhi Chen and Zidong Du and Xing Hu and Xishan Zhang and Ling Li and Yunji Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WIrZh2XxLT}\n}", "github": "", "project": "", "reviewers": "GQFL;4rda;ouGh;fVjH;sdV7", "pdf_size": 5928287, "rating": "5;5;5;6;7", "confidence": "2;4;5;4;4", "soundness": "2;3;2;3;3", "novelty": "3;3;2;3;3", "presentation": "1;2;2;2;3", "wc_summary": "56;171;62;61;60", "wc_strengths": "28;219;13;92;39", "wc_weaknesses": "127;292;114;222;34", "wc_questions": "51;70;57;104;55", "wc_limitations": "1;256;23;59;6", "wc_review": "263;1008;269;538;194", "wc_reply_reviewers": "0;477;19;14;5", "wc_reply_authors": "0;318;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.0, 0.6324555320336759 ], "wc_summary_avg": [ 82.0, 44.54660480889649 ], "wc_strengths_avg": [ 78.2, 75.26327125497536 ], "wc_weaknesses_avg": [ 157.8, 89.80512234833824 ], "wc_questions_avg": [ 67.4, 19.37627415165258 ], "wc_limitations_avg": [ 69.0, 95.68489954010508 ], "wc_review_avg": [ 454.4, 300.7581087851166 ], "wc_reply_reviewers_avg": [ 103.0, 187.11814449699955 ], "wc_reply_authors_avg": [ 63.6, 127.20000000000002 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.15309310892394865, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9381997767583746242&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ustc.edu.cn;ict.ac.cn;ustc.edu.cn;ict.ac.cn;ustc.edu.cn;ucas.edu.cn;iscas.ac.cn;ict.ac.cn;ict.ac.cn;cambricon.com;iscas.ac.cn;ict.ac.cn;ict.ac.cn", "author_num": 13, "aff_unique_index": "0;1;0;1;0;2;1;1;1;3;1;1;1", "aff_unique_norm": "University of Science and Technology of China;Chinese Academy of Sciences;University of Chinese Academy of Sciences;Cambricon Technologies", "aff_unique_dep": ";Institute of Computing Technology;;", "aff_unique_url": "http://www.ustc.edu.cn;http://www.ict.ac.cn;http://www.ucas.ac.cn;https://www.cambricon.com", "aff_unique_abbr": "USTC;CAS;UCAS;Cambricon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Unsupervised Anomaly Detection with Rejection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71414", "id": "WK8LQzzHwW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dc48c738d3ef8c81b6e968453a84a819-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WK8LQzzHwW", "openreview": "https://openreview.net/forum?id=WK8LQzzHwW", "poster": "/media/PosterPDFs/NeurIPS%202023/71414.png?t=1697291186.796472", "slides": "https://nips.cc/virtual/2023/poster/71414", "video": "https://nips.cc/virtual/2023/poster/71414", "author_site": "Lorenzo Perini, Jesse Davis", "tldr": "", "abstract": "Anomaly detection aims at detecting unexpected behaviours in the data. Because anomaly detection is usually an unsupervised task, traditional anomaly detectors learn a decision boundary by employing heuristics based on intuitions, which are hard to verify in practice. This introduces some uncertainty, especially close to the decision boundary, that may reduce the user trust in the detector's predictions. A way to combat this is by allowing the detector to reject predictions with high uncertainty (Learning to Reject). This requires employing a confidence metric that captures the distance to the decision boundary and setting a rejection threshold to reject low-confidence predictions. However, selecting a proper metric and setting the rejection threshold without labels are challenging tasks. In this paper, we solve these challenges by setting a constant rejection threshold on the stability metric computed by ExCeeD. Our insight relies on a theoretical analysis of such a metric. Moreover, setting a constant threshold results in strong guarantees: we estimate the test rejection rate, and derive a theoretical upper bound for both the rejection rate and the expected prediction cost. Experimentally, we show that our method outperforms some metric-based methods.", "keywords": "Anomaly Detection;Learning with Rejection;Unsupervised Learning", "primary_area": "", "supplementary_material": "/attachment/76efe394bba9f2abb74f9c3506b67f94a019c95b.zip", "author": "Lorenzo Perini;Jesse Davis", "authorids": "~Lorenzo_Perini1;~Jesse_Davis1", "gender": "M;M", "homepage": "https://lorenzo-perini.github.io/;https://people.cs.kuleuven.be/~jesse.davis/", "dblp": "269/4550;d/JesseDavis", "google_scholar": "3L1PxnUAAAAJ;https://scholar.google.com.tw/citations?user=gz74XOYAAAAJ", "orcid": "0000-0002-5929-9727;0000-0002-3748-9263", "linkedin": "lorenzo-perini/;", "or_profile": "~Lorenzo_Perini1;~Jesse_Davis1", "aff": "KU Leuven;KU Leuven", "aff_domain": "kuleuven.be;kuleuven.be", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nperini2023unsupervised,\ntitle={Unsupervised Anomaly Detection with Rejection},\nauthor={Lorenzo Perini and Jesse Davis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WK8LQzzHwW}\n}", "github": "", "project": "", "reviewers": "R9xH;imnX;Xnvu;3a9y;6n88", "pdf_size": 439951, "rating": "5;6;6;6;6", "confidence": "3;2;4;3;3", "soundness": "4;3;3;3;3", "novelty": "4;3;3;2;3", "presentation": "4;2;3;3;3", "wc_summary": "54;79;126;60;94", "wc_strengths": "58;53;84;32;170", "wc_weaknesses": "28;164;151;49;90", "wc_questions": "4;1;85;133;47", "wc_limitations": "4;2;27;4;15", "wc_review": "148;299;473;278;416", "wc_reply_reviewers": "10;1;19;79;13", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 82.6, 25.904439773907484 ], "wc_strengths_avg": [ 79.4, 48.23111029200966 ], "wc_weaknesses_avg": [ 96.4, 53.88357820338215 ], "wc_questions_avg": [ 54.0, 50.11985634456667 ], "wc_limitations_avg": [ 10.4, 9.478396488858229 ], "wc_review_avg": [ 322.8, 113.46788091790557 ], "wc_reply_reviewers_avg": [ 24.4, 27.91128803907122 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=142478575731132493&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "kuleuven.be;kuleuven.be", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Katholieke Universiteit Leuven", "aff_unique_dep": "", "aff_unique_url": "https://www.kuleuven.be", "aff_unique_abbr": "KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Belgium" }, { "title": "Projection-Free Methods for Solving Nonconvex-Concave Saddle Point Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71413", "id": "WO1kHC5Lfz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a899a801fab59f14777fcc08842b6fc5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WO1kHC5Lfz", "openreview": "https://openreview.net/forum?id=WO1kHC5Lfz", "poster": "/media/PosterPDFs/NeurIPS%202023/71413.png?t=1701813837.8148758", "slides": "https://nips.cc/virtual/2023/poster/71413", "video": "https://nips.cc/virtual/2023/poster/71413", "author_site": "Morteza Boroun, Erfan Yazdandoost Hamedani, Afrooz Jalilzadeh", "tldr": "", "abstract": "In this paper, we investigate a class of constrained saddle point (SP) problems where the objective function is nonconvex-concave and smooth. This class of problems has wide applicability in machine learning, including robust multi-class classification and dictionary learning. Several projection-based primal-dual methods have been developed to tackle this problem; however, the availability of methods with projection-free oracles remains limited. To address this gap, we propose efficient single-loop projection-free methods reliant on first-order information. In particular, using regularization and nested approximation techniques, we propose a primal-dual conditional gradient method that solely employs linear minimization oracles to handle constraints. Assuming that the constraint set in the maximization is strongly convex, our method achieves an $\\epsilon$-stationary solution within $\\mathcal{O}(\\epsilon^{-6})$ iterations. When the projection onto the constraint set of maximization is easy to compute, we propose a one-sided projection-free method that achieves an $\\epsilon$-stationary solution within $\\mathcal{O}(\\epsilon^{-4})$ iterations. Moreover, we present improved iteration complexities of our methods under a strong concavity assumption. To the best of our knowledge, our proposed algorithms are among the first projection-free methods with convergence guarantees for solving nonconvex-concave SP problems.", "keywords": "Saddle Point Problem;Projection-free method", "primary_area": "", "supplementary_material": "/attachment/0a22d255425b9f07dd70fee85a67d3c1fe9c961a.zip", "author": "Morteza Boroun;Erfan Yazdandoost Hamedani;Afrooz Jalilzadeh", "authorids": "~Morteza_Boroun1;~Erfan_Yazdandoost_Hamedani1;~Afrooz_Jalilzadeh1", "gender": ";M;F", "homepage": ";https://profiles.arizona.edu/person/erfany;https://afroozjalilzadeh.faculty.arizona.edu", "dblp": ";191/6717;193/7630", "google_scholar": ";imtUGbQAAAAJ;13CgvOEAAAAJ", "orcid": ";0000-0002-3229-3499;0000-0002-3734-1082", "linkedin": ";;", "or_profile": "~Morteza_Boroun1;~Erfan_Yazdandoost_Hamedani1;~Afrooz_Jalilzadeh1", "aff": ";University of Arizona;University of Arizona", "aff_domain": ";arizona.edu;arizona.edu", "position": ";Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nboroun2023projectionfree,\ntitle={Projection-Free Methods for Solving Nonconvex-Concave Saddle Point Problems},\nauthor={Morteza Boroun and Erfan Yazdandoost Hamedani and Afrooz Jalilzadeh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WO1kHC5Lfz}\n}", "github": "", "project": "", "reviewers": "cP1u;LwGk;dDgV;WRhg;RF27", "pdf_size": 516456, "rating": "5;5;6;6;7", "confidence": "3;3;4;3;4", "soundness": "3;3;4;3;3", "novelty": "3;2;2;2;3", "presentation": "2;3;4;3;3", "wc_summary": "33;74;69;57;148", "wc_strengths": "14;46;23;34;202", "wc_weaknesses": "169;99;77;18;109", "wc_questions": "2;4;92;183;42", "wc_limitations": "2;6;3;11;1", "wc_review": "220;229;264;303;502", "wc_reply_reviewers": "14;10;10;97;203", "wc_reply_authors": "0;0;0;373;460", "reply_reviewers": "1;1;1;1;3", "reply_authors": "1;1;1;3;3", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 76.2, 38.59222719667783 ], "wc_strengths_avg": [ 63.8, 69.92681888946471 ], "wc_weaknesses_avg": [ 94.4, 48.865529773041445 ], "wc_questions_avg": [ 64.6, 67.63312797734554 ], "wc_limitations_avg": [ 4.6, 3.6110940170535577 ], "wc_review_avg": [ 303.6, 103.42649563820675 ], "wc_reply_reviewers_avg": [ 66.8, 75.76648335510893 ], "wc_reply_authors_avg": [ 166.6, 205.88890208070953 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7637626158259733, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15286141494197615407&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": ";arizona.edu;arizona.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Arizona", "aff_unique_dep": "", "aff_unique_url": "https://www.arizona.edu", "aff_unique_abbr": "UA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Robust Exact Algorithm for the Euclidean Bipartite Matching Problem", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71412", "id": "WPbIAdB6aQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a24a75ef009ee73b160653c16b18f00e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WPbIAdB6aQ", "openreview": "https://openreview.net/forum?id=WPbIAdB6aQ", "poster": "/media/PosterPDFs/NeurIPS%202023/71412.png?t=1701956125.6814137", "slides": "https://nips.cc/virtual/2023/poster/71412", "video": "https://nips.cc/virtual/2023/poster/71412", "author_site": "Akshaykumar Gattani, Sharath Raghvendra, Pouyan Shirzadian", "tldr": "", "abstract": "Algorithms for the minimum-cost bipartite matching can be used to estimate Wasserstein distance between two distributions.\nGiven two sets $A$ and $B$ of $n$ points in a $2$-dimensional Euclidean space, one can use a fast implementation of the Hungarian method to compute a minimum-cost bipartite matching of $A$ and $B$ in $\\tilde{O}(n^2)$ time. Let $\\Delta$ be the spread, i.e., the ratio of the distance of the farthest to the closest pair of points in $A\\cup B$. In this paper, we present a new algorithm to compute a minimum-cost bipartite matching of $A$ and $B$ with a similar worst-case execution time of $\\tilde{O}(n^2 \\log \\Delta)$. However, when $A$ and $B$ are drawn independently and identically from a fixed distribution that is not known to the algorithm, the execution time of our algorithm is, in expectation, $\\tilde{O}(n^{7/4}\\log \\Delta)$.\n\nTo the best of our knowledge, our algorithm is the first one to achieve a sub-quadratic execution time even for stochastic point sets with real-valued coordinates.\nOur algorithm extends to any dimension $d$, where it runs in $\\tilde{O}(n^{2-\\frac{1}{2d}}\\Phi(n))$ time for stochastic point sets $A$ and $B$; here $\\Phi(n)$ is the query/update time of a dynamic weighted nearest neighbor data structure. \nOur algorithm can be seen as a careful adaptation of the Hungarian method in the geometric divide-and-conquer framework.", "keywords": "Euclidean bipartite matching;exact algorithms;primal dual method", "primary_area": "", "supplementary_material": "/attachment/c36424f320a3d8167030e37038690392a9e29c5a.pdf", "author": "Akshaykumar G Gattani;Sharath Raghvendra;Pouyan Shirzadian", "authorids": "agattani@vt.edu;~Sharath_Raghvendra1;~Pouyan_Shirzadian1", "gender": ";M;M", "homepage": ";http://people.cs.vt.edu/~sharathr/;https://sites.google.com/vt.edu/pshirzadian/home", "dblp": ";149/2582;322/7785", "google_scholar": ";https://scholar.google.com.tw/citations?user=kOfRa7MAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-8315-2357", "linkedin": ";;", "or_profile": "agattani@vt.edu;~Sharath_Raghvendra1;~Pouyan_Shirzadian1", "aff": ";Virginia Tech;Virginia Polytechnic Institute and State University", "aff_domain": ";vt.edu;vt.edu", "position": ";Associate Professor;PhD student", "bibtex": "@inproceedings{\ngattani2023a,\ntitle={A Robust Exact Algorithm for the Euclidean Bipartite Matching Problem},\nauthor={Akshaykumar G Gattani and Sharath Raghvendra and Pouyan Shirzadian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WPbIAdB6aQ}\n}", "github": "", "project": "", "reviewers": "L63E;NeKH;SwWP;FnYg", "pdf_size": 711973, "rating": "5;6;7;7", "confidence": "4;3;3;3", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;4;4", "wc_summary": "110;74;321;358", "wc_strengths": "13;36;89;176", "wc_weaknesses": "86;57;68;183", "wc_questions": "23;36;59;172", "wc_limitations": "1;9;1;25", "wc_review": "233;212;538;914", "wc_reply_reviewers": "15;13;101;24", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 215.75, 125.0887185160996 ], "wc_strengths_avg": [ 78.5, 62.67575288738062 ], "wc_weaknesses_avg": [ 98.5, 49.87233702163956 ], "wc_questions_avg": [ 72.5, 58.8748673034598 ], "wc_limitations_avg": [ 9.0, 9.797958971132712 ], "wc_review_avg": [ 474.25, 284.78972506043823 ], "wc_reply_reviewers_avg": [ 38.25, 36.46488036453705 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14236278833889939344&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": ";vt.edu;vt.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Virginia Tech", "aff_unique_dep": "", "aff_unique_url": "https://www.vt.edu", "aff_unique_abbr": "VT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Full-Atom Protein Pocket Design via Iterative Refinement", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71411", "id": "WPdGRRJaPb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/365a6f71486ecdfa7eb8d61cbe168782-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WPdGRRJaPb", "openreview": "https://openreview.net/forum?id=WPdGRRJaPb", "poster": "/media/PosterPDFs/NeurIPS%202023/71411.png?t=1699213520.468551", "slides": "https://nips.cc/virtual/2023/poster/71411", "video": "https://nips.cc/virtual/2023/poster/71411", "author_site": "ZAIXI ZHANG, Zepu Lu, Hao Zhongkai, Marinka Zitnik, Qi Liu", "tldr": "", "abstract": "The design of \\emph{de novo} functional proteins that bind with specific ligand molecules is crucial in various domains like therapeutics and bio-engineering. One vital yet challenging step is to design the protein pocket, the cavity region of protein where the ligand binds with. Existing methods suffer from inefficient generation, insufficient context modeling (ligand molecule), and incapability of generating sidechain atoms. To overcome the limitations, we propose a \\textbf{F}ull-\\textbf{A}tom \\textbf{I}terative \\textbf{R}efinement framework (\\textbf{FAIR}) for protein pocket sequence (i.e., residue types) and 3D structure co-design. Generally, FAIR consists of two steps that follow a coarse-to-fine pipeline (backbone atoms to full atoms including sidechain) for full-atom generation. For efficiency, all residue types and structures are updated together in each round (i.e., full-shot refinement). In the first step, the residue types and backbone coordinates are updated with a hierarchical context encoder and two structure refinement modules capturing inter-residue and pocket-ligand interactions. The second step further models the sidechain atoms of pockets and updates residue types to achieve sequence-structure consistency. The structure of the binding ligand is also updated along with the above refinement iterations accounting for its flexibility. Finally, extensive evaluations show\nthat FAIR outperforms baselines in efficiently designing high-quality pocket sequences and structures. Specifically, the average improvements on AAR and RMSD are over 10$\\%$.", "keywords": "Graph Representation Learning;AI for Science", "primary_area": "", "supplementary_material": "", "author": "ZAIXI ZHANG;Zepu Lu;Zhongkai Hao;Marinka Zitnik;Qi Liu", "authorids": "~ZAIXI_ZHANG2;~Zepu_Lu1;~Zhongkai_Hao1;~Marinka_Zitnik1;~Qi_Liu3", "gender": "M;F;;M;M", "homepage": "http://home.ustc.edu.cn/~zaixi/;https://github.com/luzepu;https://zitniklab.hms.harvard.edu;http://staff.ustc.edu.cn/~qiliuql/;https://haozhongkai.github.io/", "dblp": "267/9295.html;342/3815;53/11277.html;95/2446-3;270/0220.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;YtUDgPIAAAAJ;5EoHAFwAAAAJ;dfSzq27ZiVoC", "orcid": ";;;0000-0001-6956-5550;", "linkedin": ";;;;", "or_profile": "~ZAIXI_ZHANG2;~Zepu_Lu1;~Marinka_Zitnik1;~Qi_Liu3;~Hao_Zhongkai1", "aff": "University of Science and Technology of China;University of Science and Technology of China;Harvard University;University of Science and Technology of China;Tsinghua University", "aff_domain": "ustc.edu.cn;ustc.edu.cn;harvard.edu;ustc.edu.cn;mails.tsinghua.edu.cn", "position": "PhD student;PhD student;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nzhang2023fullatom,\ntitle={Full-Atom Protein Pocket Design via Iterative Refinement},\nauthor={ZAIXI ZHANG and Zepu Lu and Zhongkai Hao and Marinka Zitnik and Qi Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WPdGRRJaPb}\n}", "github": "", "project": "", "reviewers": "z6bW;rsaq;Luqz;f8hF;4UXg", "pdf_size": 1725683, "rating": "6;6;8;9;9", "confidence": "3;3;4;5;4", "soundness": "2;3;4;3;4", "novelty": "3;2;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "74;65;57;69;62", "wc_strengths": "256;101;92;130;176", "wc_weaknesses": "319;36;53;45;12", "wc_questions": "177;92;185;43;35", "wc_limitations": "109;45;72;10;10", "wc_review": "935;339;459;297;295", "wc_reply_reviewers": "858;17;0;57;25", "wc_reply_authors": "1671;20;0;37;20", "reply_reviewers": "3;1;0;1;1", "reply_authors": "5;2;1;2;2", "rating_avg": [ 7.6, 1.3564659966250536 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 65.4, 5.817215828899594 ], "wc_strengths_avg": [ 151.0, 60.119880239401674 ], "wc_weaknesses_avg": [ 93.0, 113.833211322531 ], "wc_questions_avg": [ 106.4, 64.01124901140425 ], "wc_limitations_avg": [ 49.2, 37.91253090997751 ], "wc_review_avg": [ 465.0, 242.46071846796133 ], "wc_reply_reviewers_avg": [ 191.4, 333.813480854204 ], "wc_reply_authors_avg": [ 349.6, 660.8039346129834 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.4, 1.3564659966250538 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9063269671749656, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7782458785303755978&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ustc.edu.cn;ustc.edu.cn;harvard.edu;ustc.edu.cn;mails.tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "University of Science and Technology of China;Harvard University;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.harvard.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "USTC;Harvard;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "DYffusion: A Dynamics-informed Diffusion Model for Spatiotemporal Forecasting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71410", "id": "WRGldGm5Hz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8df90a1440ce782d1f5607b7a38f2531-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WRGldGm5Hz", "openreview": "https://openreview.net/forum?id=WRGldGm5Hz", "poster": "/media/PosterPDFs/NeurIPS%202023/71410.png?t=1699572036.0470743", "slides": "https://nips.cc/virtual/2023/poster/71410", "video": "https://nips.cc/virtual/2023/poster/71410", "author_site": "Salva R\u00fchling Cachay, Bo Zhao, Hailey Joren, Rose Yu", "tldr": "", "abstract": "While diffusion models can successfully generate data and make predictions, they are predominantly designed for static images. We propose an approach for training diffusion models for dynamics forecasting that leverages the temporal dynamics encoded in the data, directly coupling it with the diffusion steps in the network. We train a stochastic, time-conditioned interpolator and a backbone forecaster network\nthat mimic the forward and reverse processes of conventional diffusion models, respectively. This design choice naturally encodes multi-step and long-range forecasting capabilities, allowing for highly flexible, continuous-time sampling trajectories and the ability to trade-off performance with accelerated sampling at inference time. In addition, the dynamics-informed diffusion process imposes a strong inductive bias, allowing for improved computational efficiency compared to traditional Gaussian noise-based diffusion models. Our approach performs competitively on probabilistic skill score metrics in complex dynamics forecasting of sea surface temperatures, Navier-Stokes flows, and spring mesh systems.", "keywords": "AI for science;diffusion models;scientific machine learning;probabilistic forecasting", "primary_area": "", "supplementary_material": "/attachment/06ade9ae0db860286f9f368c3c2b59ac4cf7cfb5.pdf", "author": "Salva R\u00fchling Cachay;Bo Zhao;Hailey Joren;Rose Yu", "authorids": "~Salva_R\u00fchling_Cachay1;~Bo_Zhao6;~Hailey_Joren1;~Rose_Yu1", "gender": "M;;;F", "homepage": "https://salvarc.github.io/;https://b-zhao.github.io;;http://roseyu.com", "dblp": "280/1016;;;164/7314", "google_scholar": "8RHc4eQAAAAJ;ZCCrFoIAAAAJ;;", "orcid": "0000-0002-7968-5035;;;", "linkedin": "salva-rc;;;", "or_profile": "~Salva_R\u00fchling_Cachay1;~Bo_Zhao6;~Hailey_Joren1;~Rose_Yu1", "aff": "University of California, San Diego;University of California, San Diego;;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;;ucsd.edu", "position": "PhD student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\ncachay2023dyffusion,\ntitle={{DY}ffusion: A Dynamics-informed Diffusion Model for Spatiotemporal Forecasting},\nauthor={Salva R{\\\"u}hling Cachay and Bo Zhao and Hailey Joren and Rose Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WRGldGm5Hz}\n}", "github": "", "project": "", "reviewers": "4TEk;7msc;XpdM;WiMo", "pdf_size": 1507255, "rating": "4;5;7;7", "confidence": "4;4;3;3", "soundness": "2;2;2;2", "novelty": "2;2;3;3", "presentation": "2;3;1;3", "wc_summary": "93;77;118;65", "wc_strengths": "43;49;120;20", "wc_weaknesses": "213;157;38;73", "wc_questions": "19;58;186;761", "wc_limitations": "16;21;1;11", "wc_review": "384;362;463;930", "wc_reply_reviewers": "268;213;171;438", "wc_reply_authors": "665;483;123;499", "reply_reviewers": "1;2;1;3", "reply_authors": "2;2;2;3", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 88.25, 19.84155991851447 ], "wc_strengths_avg": [ 58.0, 37.39652390263031 ], "wc_weaknesses_avg": [ 120.25, 68.83086153753997 ], "wc_questions_avg": [ 256.0, 298.0343939883449 ], "wc_limitations_avg": [ 12.25, 7.39509972887452 ], "wc_review_avg": [ 534.75, 231.267566900333 ], "wc_reply_reviewers_avg": [ 272.5, 101.55417273554052 ], "wc_reply_authors_avg": [ 442.5, 197.74920985935697 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9622504486493761, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10212343872585451032&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "ucsd.edu;ucsd.edu;;ucsd.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Orthogonal Non-negative Tensor Factorization based Multi-view Clustering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71409", "id": "WRtlsxA5h7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3a5b75ce6cbd3aaaa32d6e935ffc4cff-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WRtlsxA5h7", "openreview": "https://openreview.net/forum?id=WRtlsxA5h7", "poster": "/media/PosterPDFs/NeurIPS%202023/71409.png?t=1699255935.1168842", "slides": "https://nips.cc/virtual/2023/poster/71409", "video": "https://nips.cc/virtual/2023/poster/71409", "author_site": "Jing Li, Quanxue Gao, QIANQIAN WANG, Ming Yang, Wei Xia", "tldr": "", "abstract": "Multi-view clustering (MVC) based on non-negative matrix factorization (NMF) and its variants have attracted much attention due to their advantages in clustering interpretability. However, existing NMF-based multi-view clustering methods perform NMF on each view respectively and ignore the impact of between-view. Thus, they can't well exploit the within-view spatial structure and between-view complementary information. To resolve this issue, we present orthogonal non-negative tensor factorization (Orth-NTF) and develop a novel multi-view clustering based on Orth-NTF with one-side orthogonal constraint. Our model directly performs Orth-NTF on the 3rd-order tensor which is composed of anchor graphs of views. Thus, our model directly considers the between-view relationship. Moreover, we use the tensor Schatten $p$-norm regularization as a rank approximation of the 3rd-order tensor which characterizes the cluster structure of multi-view data and exploits the between-view complementary information. In addition, we provide an optimization algorithm for the proposed method and prove mathematically that the algorithm always converges to the stationary KKT point. Extensive experiments on various benchmark datasets indicate that our proposed method is able to achieve satisfactory clustering performance.", "keywords": "Multi-view clustering;tensor Schatten p-norm;non-negative matrix factorization.", "primary_area": "", "supplementary_material": "/attachment/abb9aa1d01194817db147c47fce4edd572a475ae.pdf", "author": "Jing Li;Quanxue Gao;QIANQIAN WANG;Ming Yang;Wei Xia", "authorids": "~Jing_Li25;~Quanxue_Gao1;~QIANQIAN_WANG3;~Ming_Yang11;~Wei_Xia5", "gender": "M;M;F;M;M", "homepage": "https://github.com/Lee-jj;;https://qianqian.world;;", "dblp": ";63/804;118/6735-1;;", "google_scholar": ";R1vy_skAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.fr/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-9592-6193;;0000-0001-8217-5952;;", "linkedin": ";;;;", "or_profile": "~Jing_Li25;~Quanxue_Gao1;~QIANQIAN_WANG3;~Ming_Yang11;~Wei_Xia5", "aff": "Xidian University;Xidian University;Xidian University;University of Evansville;Xidian University", "aff_domain": "xidian.edu.cn;xidian.edu.cn;xidian.edu;evansville.edu;xidian.edu.cn", "position": "MS student;Full Professor;Lecturer;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nli2023orthogonal,\ntitle={Orthogonal Non-negative Tensor Factorization based Multi-view Clustering},\nauthor={Jing Li and Quanxue Gao and QIANQIAN WANG and Ming Yang and Wei Xia},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WRtlsxA5h7}\n}", "github": "", "project": "", "reviewers": "xoPB;a6FX;J3E2;PAbp", "pdf_size": 1625777, "rating": "5;5;7;7", "confidence": "4;4;5;5", "soundness": "2;3;3;3", "novelty": "3;2;3;3", "presentation": "2;4;3;3", "wc_summary": "73;105;138;32", "wc_strengths": "51;53;75;25", "wc_weaknesses": "127;72;86;108", "wc_questions": "61;6;51;71", "wc_limitations": "18;1;12;27", "wc_review": "330;237;362;263", "wc_reply_reviewers": "80;15;27;0", "wc_reply_authors": "125;13;13;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 87.0, 39.198214245039274 ], "wc_strengths_avg": [ 51.0, 17.72004514666935 ], "wc_weaknesses_avg": [ 98.25, 20.980645843252777 ], "wc_questions_avg": [ 47.25, 24.843258642939738 ], "wc_limitations_avg": [ 14.5, 9.447221813845593 ], "wc_review_avg": [ 298.0, 50.16472864473604 ], "wc_reply_reviewers_avg": [ 30.5, 30.137186331839274 ], "wc_reply_authors_avg": [ 37.75, 50.65261592455023 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12089928959393782251&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "xidian.edu.cn;xidian.edu.cn;xidian.edu;evansville.edu;xidian.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Xidian University;University of Evansville", "aff_unique_dep": ";", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.evansville.edu", "aff_unique_abbr": "Xidian;UE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Rank-DETR for High Quality Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71408", "id": "WUott1ZvRj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/34074479ee2186a9f236b8fd03635372-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WUott1ZvRj", "openreview": "https://openreview.net/forum?id=WUott1ZvRj", "poster": "/media/PosterPDFs/NeurIPS%202023/71408.png?t=1701576560.7100017", "slides": "https://nips.cc/virtual/2023/poster/71408", "video": "https://nips.cc/virtual/2023/poster/71408", "author_site": "Yifan Pu, Weicong Liang, Yiduo Hao, YUHUI YUAN, Yukang Yang, Chao Zhang, Han Hu, Gao Huang", "tldr": "", "abstract": "Modern detection transformers (DETRs) use a set of object queries to predict a list of bounding boxes, sort them by their classification confidence scores, and select the top-ranked predictions as the final detection results for the given input image. A highly performant object detector requires accurate ranking for the bounding box predictions. For DETR-based detectors, the top-ranked bounding boxes suffer from less accurate localization quality due to the misalignment between classification scores and localization accuracy, thus impeding the construction of high-quality detectors. In this work, we introduce a simple and highly performant DETR-based object detector by proposing a series of rank-oriented designs, combinedly called Rank-DETR. Our key contributions include: (i) a rank-oriented architecture design that can prompt positive predictions and suppress the negative ones to ensure lower false positive rates, as well as (ii) a rank-oriented loss function and matching cost design that prioritizes predictions of more accurate localization accuracy during ranking to boost the AP under high IoU thresholds. We apply our method to improve the recent SOTA methods (e.g., H-DETR and DINO-DETR) and report strong COCO object detection results when using different backbones such as ResNet-$50$, Swin-T, and Swin-L, demonstrating the effectiveness of our approach. Code is available at \\url{https://github.com/LeapLabTHU/Rank-DETR}.", "keywords": "Object Detection", "primary_area": "", "supplementary_material": "", "author": "Yifan Pu;Weicong Liang;Yiduo Hao;Yuhui Yuan;Yukang Yang;Chao Zhang;Han Hu;Gao Huang", "authorids": "~Yifan_Pu1;~Weicong_Liang1;~Yiduo_Hao1;~Yuhui_Yuan1;~Yukang_Yang1;~Chao_Zhang10;~Han_Hu1;~Gao_Huang1", "gender": "M;M;;M;M;M;M;M", "homepage": "https://yifanpu001.github.io/;;;;;http://www.cis.pku.edu.cn/faculty/vision/zhangchao/zhangchao.htm;https://ancientmooner.github.io/;http://www.gaohuang.net", "dblp": "222/2710;330/4850;;190/7361;;94/3019-1;;", "google_scholar": "oM9rnYQAAAAJ;QvHDIygAAAAJ;;PzyvzksAAAAJ;ASZWVzEAAAAJ;NeCCx-kAAAAJ;Jkss014AAAAJ;-P9LwcgAAAAJ", "orcid": "0000-0002-0404-1737;;;;;;;", "linkedin": ";weicong-liang-victor;;rainbowsecret/;;;;", "or_profile": "~Yifan_Pu1;~Weicong_Liang1;~Yiduo_Hao1;~Yuhui_Yuan1;~Yukang_Yang1;~Chao_Zhang10;~Han_Hu1;~Gao_Huang1", "aff": "Tsinghua University;Peking University;;Microsoft Research;Microsoft Research;Peking University;Microsft Research Asia;Tsinghua University", "aff_domain": "tsinghua.edu.cn;pku.edu.cn;;microsoft.com;research.microsoft.com;pku.edu.cn;microsoft.com;tsinghua.edu.cn", "position": "MS student;MS student;;Senior Researcher;Intern;Full Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\npu2023rankdetr,\ntitle={Rank-{DETR} for High Quality Object Detection},\nauthor={Yifan Pu and Weicong Liang and Yiduo Hao and Yuhui Yuan and Yukang Yang and Chao Zhang and Han Hu and Gao Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WUott1ZvRj}\n}", "github": "", "project": "", "reviewers": "kDha;m522;JDo7;vAjC", "pdf_size": 3237104, "rating": "5;5;6;7", "confidence": "3;4;4;5", "soundness": "3;3;2;3", "novelty": "2;2;2;3", "presentation": "3;3;2;3", "wc_summary": "85;86;97;51", "wc_strengths": "101;88;41;31", "wc_weaknesses": "82;175;189;182", "wc_questions": "108;6;69;3", "wc_limitations": "13;1;6;1", "wc_review": "389;356;402;268", "wc_reply_reviewers": "26;19;100;25", "wc_reply_authors": "0;0;50;52", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.75, 17.25362280797862 ], "wc_strengths_avg": [ 65.25, 29.819247140060394 ], "wc_weaknesses_avg": [ 157.0, 43.583253664681806 ], "wc_questions_avg": [ 46.5, 44.21820891895103 ], "wc_limitations_avg": [ 5.25, 4.9180788932265 ], "wc_review_avg": [ 353.75, 52.27033097274208 ], "wc_reply_reviewers_avg": [ 42.5, 33.30540496676178 ], "wc_reply_authors_avg": [ 25.5, 25.509802037648196 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 72, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7118386625491379914&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;pku.edu.cn;;microsoft.com;research.microsoft.com;pku.edu.cn;microsoft.com;tsinghua.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;2;1;2;0", "aff_unique_norm": "Tsinghua University;Peking University;Microsoft", "aff_unique_dep": ";;Microsoft Research", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.pku.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "THU;Peking U;MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Mixture Weight Estimation and Model Prediction in Multi-source Multi-target Domain Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71407", "id": "WVmus8NWE8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0fa81c3f0d57f95b8776de3a248ef0ed-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WVmus8NWE8", "openreview": "https://openreview.net/forum?id=WVmus8NWE8", "poster": "/media/PosterPDFs/NeurIPS%202023/71407.png?t=1702062913.5630002", "slides": "https://nips.cc/virtual/2023/poster/71407", "video": "https://nips.cc/virtual/2023/poster/71407", "author_site": "Yuyang Deng, Ilja Kuzborskij, Mehrdad Mahdavi", "tldr": "", "abstract": "We consider a problem of learning a model from multiple sources with the goal to perform\nwell on a new target distribution. Such problem arises in\nlearning with data collected from multiple sources (e.g. crowdsourcing) or\nlearning in distributed systems, where the data can be highly heterogeneous. The\ngoal of learner is to mix these data sources in a target-distribution aware way and\nsimultaneously minimize the empirical risk on the mixed source. The literature has made some tangible advancements in establishing\ntheory of learning on mixture domain. However, there are still two unsolved problems. Firstly, how to estimate the optimal mixture of sources, given a target domain; Secondly, when there are numerous target domains, we have to solve empirical risk minimization for each target on possibly unique mixed source data , which is computationally expensive. In this paper we address both problems efficiently and with guarantees.\nWe cast the first problem, mixture weight estimation as convex-nonconcave compositional minimax, and propose an efficient stochastic\nalgorithm with provable stationarity guarantees.\nNext, for the second problem, we identify that for certain regime,\nsolving ERM for each target domain individually can be avoided, and instead parameters for a target optimal\nmodel can be viewed as a non-linear function on\na space of the mixture coefficients.\nTo this end, we show that in offline setting, a GD-trained overparameterized neural network can provably learn such function.\nFinally, we also consider an online setting and propose an label efficient online algorithm, which predicts parameters for new models given arbitrary sequence of mixing coefficients, while enjoying optimal regret.", "keywords": "Multi-source domain adaptation; minimax optimization; learning theory", "primary_area": "", "supplementary_material": "/attachment/c32ad765e55d9a0774f6c0d0451d187c26478a61.pdf", "author": "Yuyang Deng;Ilja Kuzborskij;Mehrdad Mahdavi", "authorids": "~Yuyang_Deng3;~Ilja_Kuzborskij1;~Mehrdad_Mahdavi2", "gender": "M;M;M", "homepage": "https://sites.psu.edu/yuyangdeng/;https://iljaku.github.io/;http://www.cse.psu.edu/~mzm616/", "dblp": "261/9253;135/4924.html;88/4321", "google_scholar": "bfV3XWUAAAAJ;4Io_CtIAAAAJ;HzxnwocAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yuyang_Deng3;~Ilja_Kuzborskij1;~Mehrdad_Mahdavi2", "aff": "Pennsylvania State University;Google DeepMind;Toyota Technological Institute at Chicago", "aff_domain": "psu.edu;deepmind.com;ttic.edu", "position": "PhD student;Research Scientist;Researcher", "bibtex": "@inproceedings{\ndeng2023mixture,\ntitle={Mixture Weight Estimation and Model Prediction in Multi-source Multi-target Domain Adaptation},\nauthor={Yuyang Deng and Ilja Kuzborskij and Mehrdad Mahdavi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WVmus8NWE8}\n}", "github": "", "project": "", "reviewers": "Ety2;u3ZP;3rHn;9T2K;T5Nt", "pdf_size": 683503, "rating": "4;4;5;6;7", "confidence": "3;3;1;3;3", "soundness": "3;3;2;3;3", "novelty": "2;2;2;2;3", "presentation": "2;3;2;2;4", "wc_summary": "75;142;59;63;115", "wc_strengths": "25;83;53;39;99", "wc_weaknesses": "147;92;73;106;148", "wc_questions": "107;74;30;141;113", "wc_limitations": "18;28;10;10;29", "wc_review": "372;419;225;359;504", "wc_reply_reviewers": "153;0;167;196;30", "wc_reply_authors": "446;144;0;815;18", "reply_reviewers": "1;0;1;2;1", "reply_authors": "3;2;1;3;2", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 2.6, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 90.8, 32.37529922641642 ], "wc_strengths_avg": [ 59.8, 27.440116617827993 ], "wc_weaknesses_avg": [ 113.2, 29.902508255997525 ], "wc_questions_avg": [ 93.0, 38.02630668366309 ], "wc_limitations_avg": [ 19.0, 8.294576541331088 ], "wc_review_avg": [ 375.8, 90.92722364616661 ], "wc_reply_reviewers_avg": [ 109.2, 78.72839386142715 ], "wc_reply_authors_avg": [ 284.6, 309.58527096746707 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0857492925712544, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11344404630571317257&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "psu.edu;deepmind.com;ttic.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Pennsylvania State University;Google;Toyota Technological Institute at Chicago", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://www.psu.edu;https://deepmind.com;https://www.tti-chicago.org", "aff_unique_abbr": "PSU;DeepMind;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Max-Margin Token Selection in Attention Mechanism", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71406", "id": "WXc8O8ghLH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/970f59b22f4c72aec75174aae63c7459-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WXc8O8ghLH", "openreview": "https://openreview.net/forum?id=WXc8O8ghLH", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71406", "video": "https://nips.cc/virtual/2023/poster/71406", "author_site": "Davoud Ataee Tarzanagh, Yingcong Li, Xuechen Zhang, Samet Oymak", "tldr": "", "abstract": "Attention mechanism is a central component of the transformer architecture which led to the phenomenal success of large language models. However, the theoretical principles underlying the attention mechanism are poorly understood, especially its nonconvex optimization dynamics. In this work, we explore the seminal softmax-attention model $f(X)=\\langle Xv, \\texttt{softmax}(XWp)\\rangle$, where $X$ is the token sequence and $(v,W,p)$ are trainable parameters. We prove that running gradient descent on $p$, or equivalently $W$, converges in direction to a max-margin solution that separates *locally-optimal* tokens from non-optimal ones. This clearly formalizes attention as an optimal token selection mechanism. Remarkably, our results are applicable to general data and precisely characterize *optimality* of tokens in terms of the value embeddings $Xv$ and problem geometry. We also provide a broader regularization path analysis that establishes the margin maximizing nature of attention even for nonlinear prediction heads. When optimizing $v$ and $p$ simultaneously with logistic loss, we identify conditions under which the regularization paths directionally converge to their respective hard-margin SVM solutions where $v$ separates the input features based on their labels. Interestingly, the SVM formulation of $p$ is influenced by the support vector geometry of $v$. Finally, we verify our theoretical findings via numerical experiments and provide insights.", "keywords": "attention mechanism;implicit bias;margin maximization;nonconvex optimization;prompt tuning", "primary_area": "", "supplementary_material": "/attachment/e4826752f146b9ba14b163f6ffc5aff5340def46.zip", "author": "Davoud Ataee Tarzanagh;Yingcong Li;Xuechen Zhang;Samet Oymak", "authorids": "~Davoud_Ataee_Tarzanagh1;~Yingcong_Li1;~Xuechen_Zhang2;~Samet_Oymak2", "gender": "M;;F;M", "homepage": "https://tarzanagh.github.io/;https://yingcong-li.github.io/;https://www.linkedin.com/in/xuechen-zhang-9a5385213;https://sota.engin.umich.edu/", "dblp": ";244/4435;51/7435-2;89/8771", "google_scholar": "Djtvz_0AAAAJ;9uWgjIUAAAAJ;Xj4fIC4AAAAJ;AY6InkoAAAAJ", "orcid": "0000-0003-1267-3889;;;", "linkedin": ";;;", "or_profile": "~Davoud_Ataee_Tarzanagh1;~Yingcong_Li1;~Xuechen_Zhang2;~Samet_Oymak1", "aff": "University of Pennsylvania;University of California, Riverside;University of California, Riverside;University of California, Riverside", "aff_domain": "upenn.edu;ucr.edu;ucr.edu;ucr.edu", "position": "Postdoc;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ntarzanagh2023maxmargin,\ntitle={Max-Margin Token Selection in Attention Mechanism},\nauthor={Davoud Ataee Tarzanagh and Yingcong Li and Xuechen Zhang and Samet Oymak},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WXc8O8ghLH}\n}", "github": "", "project": "", "reviewers": "iE2T;CU4X;SE93;8XUp;sy1C;5i57", "pdf_size": 729764, "rating": "5;6;6;7;7;8", "confidence": "3;1;2;3;1;3", "soundness": "2;2;4;3;3;3", "novelty": "2;3;3;3;3;4", "presentation": "2;3;3;2;3;3", "wc_summary": "62;76;34;216;73;378", "wc_strengths": "54;53;36;180;164;64", "wc_weaknesses": "108;36;104;179;124;33", "wc_questions": "73;7;7;5;69;612", "wc_limitations": "43;2;5;20;20;1", "wc_review": "340;174;186;600;450;1088", "wc_reply_reviewers": "271;48;10;11;0;31", "wc_reply_authors": "1408;12;8;11;0;18", "reply_reviewers": "2;1;1;1;0;1", "reply_authors": "4;2;2;2;1;2", "rating_avg": [ 6.5, 0.9574271077563381 ], "confidence_avg": [ 2.1666666666666665, 0.8975274678557507 ], "soundness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 139.83333333333334, 121.32932685692919 ], "wc_strengths_avg": [ 91.83333333333333, 57.464240087978965 ], "wc_weaknesses_avg": [ 97.33333333333333, 50.72693783604745 ], "wc_questions_avg": [ 128.83333333333334, 218.00949265774847 ], "wc_limitations_avg": [ 15.166666666666666, 14.712995011968916 ], "wc_review_avg": [ 473.0, 312.0742433033097 ], "wc_reply_reviewers_avg": [ 61.833333333333336, 94.85676336221658 ], "wc_reply_authors_avg": [ 242.83333333333334, 521.1060086222593 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 2.1666666666666665, 0.8975274678557507 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.09697622757528539, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13360791383996696763&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "upenn.edu;ucr.edu;ucr.edu;ucr.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Pennsylvania;University of California, Riverside", "aff_unique_dep": ";", "aff_unique_url": "https://www.upenn.edu;https://www.ucr.edu", "aff_unique_abbr": "UPenn;UCR", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Riverside", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Necessary and Sufficient Conditions for Optimal Decision Trees using Dynamic Programming", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71405", "id": "WYYpxVsKpR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1d5fce9627e15c84db572a66e029b1fc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WYYpxVsKpR", "openreview": "https://openreview.net/forum?id=WYYpxVsKpR", "poster": "/media/PosterPDFs/NeurIPS%202023/71405.png?t=1699375060.9661794", "slides": "https://nips.cc/virtual/2023/poster/71405", "video": "https://nips.cc/virtual/2023/poster/71405", "author_site": "Jacobus van der Linden, Mathijs de Weerdt, Emir Demirovi\u0107", "tldr": "", "abstract": "Global optimization of decision trees has shown to be promising in terms of accuracy, size, and consequently human comprehensibility. \nHowever, many of the methods used rely on general-purpose solvers for which scalability remains an issue.\nDynamic programming methods have been shown to scale much better because they exploit the tree structure by solving subtrees as independent subproblems. However, this only works when an objective can be optimized separately for subtrees.\nWe explore this relationship in detail and show the necessary and sufficient conditions for such separability and generalize previous dynamic programming approaches into a framework that can optimize any combination of separable objectives and constraints.\nExperiments on five application domains show the general applicability of this framework, while outperforming the scalability of general-purpose solvers by a large margin.", "keywords": "optimal decision trees;dynamic programming;separability", "primary_area": "", "supplementary_material": "", "author": "Jacobus G.M. van der Linden;Mathijs de Weerdt;Emir Demirovi\u0107", "authorids": "~Jacobus_G.M._van_der_Linden1;~Mathijs_de_Weerdt1;~Emir_Demirovi\u01071", "gender": "M;Not Specified;M", "homepage": "https://www.tudelft.nl/ewi/over-de-faculteit/afdelingen/software-technology/algorithmics/people/koos-van-der-linden/;http://www.emirdemirovic.com;http://www.alg.ewi.tudelft.nl/weerdt/", "dblp": "294/1810;;91/3015", "google_scholar": "rc-Xm_AAAAAJ;;https://scholar.google.com.tw/citations?user=9GJ8AvgAAAAJ", "orcid": "0009-0001-4015-0594;;0000-0002-0470-6241", "linkedin": ";;mdeweerdt/", "or_profile": "~Jacobus_G.M._van_der_Linden1;~Emir_Demirovi\u01071;~Mathijs_Weerdt1", "aff": "Delft University of Technology;Delft University of Technology;Delft University of Technology", "aff_domain": "tudelft.nl;tudelft.nl;tudelft.nl", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nlinden2023necessary,\ntitle={Necessary and Sufficient Conditions for Optimal Decision Trees using Dynamic Programming},\nauthor={Jacobus G.M. van der Linden and Mathijs de Weerdt and Emir Demirovi{\\'c}},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WYYpxVsKpR}\n}", "github": "", "project": "", "reviewers": "khZh;L3H7;4G5j;VtkV;46BC", "pdf_size": 671642, "rating": "5;6;7;7;8", "confidence": "4;3;4;4;3", "soundness": "2;3;3;4;4", "novelty": "3;2;4;3;4", "presentation": "3;2;3;4;4", "wc_summary": "38;75;81;85;122", "wc_strengths": "72;69;68;128;45", "wc_weaknesses": "471;188;100;537;6", "wc_questions": "153;159;2;22;1", "wc_limitations": "7;43;43;4;33", "wc_review": "741;534;294;776;207", "wc_reply_reviewers": "58;58;0;60;31", "wc_reply_authors": "124;0;0;96;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 80.2, 26.75369133409444 ], "wc_strengths_avg": [ 76.4, 27.543420266916744 ], "wc_weaknesses_avg": [ 260.4, 208.11016313481667 ], "wc_questions_avg": [ 67.4, 72.75328171292344 ], "wc_limitations_avg": [ 26.0, 17.158088471621774 ], "wc_review_avg": [ 510.4, 229.4154310415932 ], "wc_reply_reviewers_avg": [ 41.4, 23.320377355437454 ], "wc_reply_authors_avg": [ 44.0, 54.61135413080324 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3202563076101743, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2535023501602224180&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "tudelft.nl;tudelft.nl;tudelft.nl", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Delft University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.tudelft.nl", "aff_unique_abbr": "TU Delft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "title": "Safety Gymnasium: A Unified Safe Reinforcement Learning Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73567", "id": "WZmlxIuIGR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3c557a3d6a48cc99444f85e924c66753-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=WZmlxIuIGR", "openreview": "https://openreview.net/forum?id=WZmlxIuIGR", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73567", "video": "https://nips.cc/virtual/2023/poster/73567", "author_site": "Jiaming Ji, Borong Zhang, Jiayi Zhou, Xuehai Pan, Weidong Huang, Ruiyang Sun, Yiran Geng, Yifan Zhong, Josef Dai, Yaodong Yang", "tldr": "", "abstract": "Artificial intelligence (AI) systems possess significant potential to drive societal progress. However, their deployment often faces obstacles due to substantial safety concerns. Safe reinforcement learning (SafeRL) emerges as a solution to optimize policies while simultaneously adhering to multiple constraints, thereby addressing the challenge of integrating reinforcement learning in safety-critical scenarios. In this paper, we present an environment suite called Safety-Gymnasium, which encompasses safety-critical tasks in both single and multi-agent scenarios, accepting vector and vision-only input. Additionally, we offer a library of algorithms named Safe Policy Optimization (SafePO), comprising 16 state-of-the-art SafeRL algorithms. This comprehensive library can serve as a validation tool for the research community. By introducing this benchmark, we aim to facilitate the evaluation and comparison of safety performance, thus fostering the development of reinforcement learning for safer, more reliable, and responsible real-world applications. The website of this project can be accessed at https://sites.google.com/view/safety-gymnasium.", "keywords": "Safe Reinforcement Learning;SafeRL;RL Simulator", "primary_area": "", "supplementary_material": "/attachment/bc446281183a91ecf4bfc38b90aa1542019bba5c.pdf", "author": "Jiaming Ji;Borong Zhang;Jiayi Zhou;Xuehai Pan;Weidong Huang;Ruiyang Sun;Yiran Geng;Yifan Zhong;Josef Dai;Yaodong Yang", "authorids": "~Jiaming_Ji2;~Borong_Zhang1;~Jiayi_Zhou2;~Xuehai_Pan1;~Weidong_Huang3;~Ruiyang_Sun2;~Yiran_Geng1;~Yifan_Zhong2;~Josef_Dai1;~Yaodong_Yang1", "gender": "M;;M;M;M;M;M;;;M", "homepage": "https://jijiaming.com;;https://gaiejj.github.io/;https://github.com/XuehaiPan;https://github.com/hdadong;https://github.com/rockmagma02;https://gengyiran.github.io/;;;https://www.yangyaodong.com", "dblp": "313/9356.html;;;;;273/6822;315/0588;;;170/1496-1", "google_scholar": "aW8WbYYAAAAJ;;rD77vW8AAAAJ;VIwJg4gAAAAJ;;EE_w5nAAAAAJ;;;;https://scholar.google.co.uk/citations?user=6yL0xw8AAAAJ", "orcid": ";;;;;;;;;0000-0001-8132-5613", "linkedin": ";;;;;;;;;yaodong-yang", "or_profile": "~Jiaming_Ji2;~Borong_Zhang1;~Jiayi_Zhou2;~Xuehai_Pan1;~Weidong_Huang3;~Ruiyang_Sun2;~Yiran_Geng1;~Yifan_Zhong2;~Josef_Dai1;~Yaodong_Yang1", "aff": "Peking University;;Peking University;Peking University;Peking University;Peking University;Peking University;;;Peking University", "aff_domain": "pku.edu.cn;;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;;;pku.edu.cn", "position": "PhD student;;Researcher;PhD student;Researcher;Undergrad student;Undergrad student;;;Assistant Professor", "bibtex": "@inproceedings{\nji2023safety,\ntitle={Safety Gymnasium: A Unified Safe Reinforcement Learning Benchmark},\nauthor={Jiaming Ji and Borong Zhang and Jiayi Zhou and Xuehai Pan and Weidong Huang and Ruiyang Sun and Yiran Geng and Yifan Zhong and Josef Dai and Yaodong Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=WZmlxIuIGR}\n}", "github": "", "project": "", "reviewers": "w71G;EjZD;7227;2t22;vCTw", "pdf_size": 18772367, "rating": "5;6;6;7;8", "confidence": "4;4;4;3;4", "wc_summary_and_contributions": "103;67;41;38;89", "wc_strengths": "57;55;22;21;66", "wc_improvement": "283;253;36;2;63", "wc_limitations": "96;90;32;34;1", "wc_correctness": "15;28;1;2;2", "wc_clarity": "1;6;1;2;1", "wc_relation_to_prior_work": "54;24;1;10;12", "wc_documentation": "18;5;5;7;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "628;529;140;117;236", "wc_reply_reviewers": "0;396;0;0;301", "wc_reply_authors": "2960;3750;1353;594;1542", "reply_reviewers": "0;2;0;0;1", "reply_authors": "7;9;3;1;4", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 67.6, 25.671774383552066 ], "wc_strengths_avg": [ 44.2, 18.90396783746735 ], "wc_improvement_avg": [ 127.4, 116.80171231621564 ], "wc_limitations_avg": [ 50.6, 36.59289548532611 ], "wc_correctness_avg": [ 9.6, 10.556514576317317 ], "wc_clarity_avg": [ 2.2, 1.9390719429665317 ], "wc_relation_to_prior_work_avg": [ 20.2, 18.42172630347113 ], "wc_documentation_avg": [ 7.2, 5.74108003776293 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 330.0, 209.14588210146525 ], "wc_reply_reviewers_avg": [ 139.4, 173.35235793031487 ], "wc_reply_authors_avg": [ 2039.8, 1147.2914886810588 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 4.8, 2.85657137141714 ], "replies_avg": [ 42, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.294174202707276, "gs_citation": 73, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6505105430005561172&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "pku.edu.cn;;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;;;pku.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Online learning of long-range dependencies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71404", "id": "Wa1GGPqjUn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2184d8450c8a641f9a10c49279087c97-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Wa1GGPqjUn", "openreview": "https://openreview.net/forum?id=Wa1GGPqjUn", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71404", "video": "https://nips.cc/virtual/2023/poster/71404", "author_site": "Nicolas Zucchet, Robert Meier, Simon Schug, Asier Mujika, Joao Sacramento", "tldr": "", "abstract": "Online learning holds the promise of enabling efficient long-term credit assignment in recurrent neural networks. However, current algorithms fall short of offline backpropagation by either not being scalable or failing to learn long-range dependencies. Here we present a high-performance online learning algorithm that merely doubles the memory and computational requirements of a single inference pass. We achieve this by leveraging independent recurrent modules in multi-layer networks, an architectural motif that has recently been shown to be particularly powerful. Experiments on synthetic memory problems and on the challenging long-range arena benchmark suite reveal that our algorithm performs competitively, establishing a new standard for what can be achieved through online learning. This ability to learn long-range dependencies offers a new perspective on learning in the brain and opens a promising avenue in neuromorphic computing.", "keywords": "online learning;linear recurrent units;temporal credit assignment;biologically-plausible learning;local learning rules;neuromorphic computing", "primary_area": "", "supplementary_material": "", "author": "Nicolas Zucchet;Robert Meier;Simon Schug;Asier Mujika;Joao Sacramento", "authorids": "~Nicolas_Zucchet1;~Robert_Meier2;~Simon_Schug1;~Asier_Mujika1;~Joao_Sacramento1", "gender": "M;;;;M", "homepage": ";https://as.inf.ethz.ch/people/members/romeier/index.html;https://smn.one/;;http://www.joaosacramento.com", "dblp": "289/6252;237/7185;262/5139;190/7152;59/9214", "google_scholar": "cLhZY44AAAAJ;https://scholar.google.ch/citations?user=PMgO33AAAAAJ;T-ZAeg0AAAAJ;;9hpcmYUAAAAJ", "orcid": ";;0000-0002-5305-2547;;", "linkedin": "nicolas-zucchet-7a84a6139/;;;;", "or_profile": "~Nicolas_Zucchet1;~Robert_Meier2;~Simon_Schug1;~Asier_Mujika1;~Joao_Sacramento1", "aff": "ETHZ - ETH Zurich;Department of Computer Science, ETHZ - ETH Zurich;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology;Department of Computer Science, ETHZ - ETH Zurich", "aff_domain": "ethz.ch;inf.ethz.ch;ethz.ch;ethz.ch;inf.ethz.ch", "position": "PhD student;PhD student;PhD student;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nzucchet2023online,\ntitle={Online learning of long-range dependencies},\nauthor={Nicolas Zucchet and Robert Meier and Simon Schug and Asier Mujika and Joao Sacramento},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Wa1GGPqjUn}\n}", "github": "", "project": "", "reviewers": "53aD;14Gy;XofY;Z8SA;hL6u", "pdf_size": 475334, "rating": "6;6;6;6;7", "confidence": "3;4;4;4;4", "soundness": "3;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "2;4;3;2;3", "wc_summary": "51;66;99;103;148", "wc_strengths": "46;138;77;128;99", "wc_weaknesses": "99;223;315;184;356", "wc_questions": "84;97;174;67;47", "wc_limitations": "1;9;22;15;132", "wc_review": "281;533;687;497;782", "wc_reply_reviewers": "87;62;46;0;42", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 93.4, 33.62499070631842 ], "wc_strengths_avg": [ 97.6, 33.60119045510144 ], "wc_weaknesses_avg": [ 235.4, 91.95781641600675 ], "wc_questions_avg": [ 93.8, 43.46676891603516 ], "wc_limitations_avg": [ 35.8, 48.5938267684281 ], "wc_review_avg": [ 556.0, 171.97209075893682 ], "wc_reply_reviewers_avg": [ 47.4, 28.492806109613 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.2500000000000001, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4011026435941493517&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ethz.ch;inf.ethz.ch;ethz.ch;ethz.ch;inf.ethz.ch", "author_num": 5, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "DeWave: Discrete Encoding of EEG Waves for EEG to Text Translation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71403", "id": "WaLI8slhLw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1f2fd23309a5b2d2537d063b29ec1b52-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WaLI8slhLw", "openreview": "https://openreview.net/forum?id=WaLI8slhLw", "poster": "/media/PosterPDFs/NeurIPS%202023/71403.png?t=1701615525.8898022", "slides": "https://nips.cc/virtual/2023/poster/71403", "video": "https://nips.cc/virtual/2023/poster/71403", "author_site": "Yiqun Duan, Charles Chau, Zhen Wang, Yu-Kai Wang, Chin-teng Lin", "tldr": "", "abstract": "The translation of brain dynamics into natural language is pivotal for brain-computer interfaces (BCIs), a field that has seen substantial growth in recent years. With the swift advancement of large language models, such as ChatGPT, the need to bridge the gap between the brain and languages becomes increasingly pressing. Current methods, however, require eye-tracking fixations or event markers to segment brain dynamics into word-level features, which can restrict the practical application of these systems. These event markers may not be readily available or could be challenging to acquire during real-time inference, and the sequence of eye fixations may not align with the order of spoken words. To tackle these issues, we introduce a novel framework, DeWave, that integrates discrete encoding sequences into open-vocabulary EEG-to-text translation tasks. DeWave uses a quantized variational encoder to derive discrete codex encoding and align it with pre-trained language models. This discrete codex representation brings forth two advantages: 1) it alleviates the order mismatch between eye fixations and spoken words by introducing text-EEG contrastive alignment training, and 2) it minimizes the interference caused by individual differences in EEG waves through an invariant discrete codex. Our model surpasses the previous baseline (40.1 and 31.7) by 3.06% and 6.34\\%, respectively, achieving 41.35 BLEU-1 and 33.71 Rouge-F on the ZuCo Dataset. Furthermore, this work is the first to facilitate the translation of entire EEG signal periods without the need for word-level order markers (e.g., eye fixations), scoring 20.5 BLEU-1 and 29.5 Rouge-1 on the ZuCo Dataset, respectively.", "keywords": "EEG; Neural Encoding; Brain Computer Interface", "primary_area": "", "supplementary_material": "/attachment/799ad1cc1832239d2e614854ae1c4afe785df08b.pdf", "author": "Yiqun Duan;Charles Zhou;Zhen Wang;Yu-Kai Wang;Chin-teng Lin", "authorids": "~Yiqun_Duan1;~Charles_Zhou1;~Zhen_Wang9;~Yu-Kai_Wang1;~Chin-teng_Lin1", "gender": "M;M;;M;M", "homepage": "https://github.com/DuanYiqun;;;https://profiles.uts.edu.au/YuKai.Wang;http://www.uts.edu.au/staff/chin-teng.lin", "dblp": "248/5526;;;;", "google_scholar": "https://scholar.google.com.au/citations?user=GoQKrD0AAAAJ;l-eP62sAAAAJ;;https://scholar.google.com/citations?hl=zh-TW;nubkF1cAAAAJ", "orcid": ";my-orcid?orcid=0000-0002-6620-604X;;;0000-0001-8371-8197", "linkedin": ";;;;", "or_profile": "~Yiqun_Duan1;~Charles_Zhou1;~Zhen_Wang9;~Yu-Kai_Wang1;~Chin-teng_Lin1", "aff": "University of Technology Sydney;University of Technology Sydney;;University of Technology Sydney;University of Technology Sydney", "aff_domain": "uts.edu.au;uts.edu.au;;uts.edu.au;uts.edu.au", "position": "PhD student;PhD student;;Lecturer;Full Professor", "bibtex": "@inproceedings{\nduan2023dewave,\ntitle={DeWave: Discrete Encoding of {EEG} Waves for {EEG} to Text Translation},\nauthor={Yiqun Duan and Charles Zhou and Zhen Wang and Yu-Kai Wang and Chin-teng Lin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WaLI8slhLw}\n}", "github": "", "project": "", "reviewers": "E1CL;tXBq;iEZr;TbVs", "pdf_size": 1738814, "rating": "6;6;7;7", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;2;2", "wc_summary": "62;84;97;130", "wc_strengths": "55;40;119;101", "wc_weaknesses": "156;88;169;64", "wc_questions": "1;173;166;289", "wc_limitations": "1;7;47;1", "wc_review": "275;392;598;585", "wc_reply_reviewers": "35;26;64;112", "wc_reply_authors": "0;0;344;479", "reply_reviewers": "1;1;2;2", "reply_authors": "1;1;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 93.25, 24.631027181179434 ], "wc_strengths_avg": [ 78.75, 32.3293597214668 ], "wc_weaknesses_avg": [ 119.25, 44.313513740167345 ], "wc_questions_avg": [ 157.25, 102.5874626842871 ], "wc_limitations_avg": [ 14.0, 19.209372712298546 ], "wc_review_avg": [ 462.5, 135.54796199131877 ], "wc_reply_reviewers_avg": [ 59.25, 33.53636086399358 ], "wc_reply_authors_avg": [ 205.75, 211.21360633254668 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6091028354446893430&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "uts.edu.au;uts.edu.au;;uts.edu.au;uts.edu.au", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Technology Sydney", "aff_unique_dep": "", "aff_unique_url": "https://www.uts.edu.au", "aff_unique_abbr": "UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Paraphrasing evades detectors of AI-generated text, but retrieval is an effective defense", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71402", "id": "WbFhFvjjKj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/575c450013d0e99e4b0ecf82bd1afaa4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WbFhFvjjKj", "openreview": "https://openreview.net/forum?id=WbFhFvjjKj", "poster": "/media/PosterPDFs/NeurIPS%202023/71402.png?t=1702074030.884116", "slides": "https://nips.cc/virtual/2023/poster/71402", "video": "https://nips.cc/virtual/2023/poster/71402", "author_site": "Kalpesh Krishna, Yixiao Song, Marzena Karpinska, John Wieting, Mohit Iyyer", "tldr": "", "abstract": "The rise in malicious usage of large language models, such as fake content creation and academic plagiarism, has motivated the development of approaches that identify AI-generated text, including those based on watermarking or outlier detection. However, the robustness of these detection algorithms to paraphrases of AI-generated text remains unclear. To stress test these detectors, we build a 11B parameter paraphrase generation model (DIPPER) that can paraphrase paragraphs, condition on surrounding context, and control lexical diversity and content reordering. Paraphrasing text generated by three large language models (including GPT3.5-davinci-003) with DIPPER successfully evades several detectors, including watermarking, GPTZero, DetectGPT, and OpenAI's text classifier. For example, DIPPER drops detection accuracy of DetectGPT from 70.3% to 4.6% (at a constant false positive rate of 1%), without appreciably modifying the input semantics.\nTo increase the robustness of AI-generated text detection to paraphrase attacks, we introduce a simple defense that relies on retrieving semantically-similar generations and must be maintained by a language model API provider. Given a candidate text, our algorithm searches a database of sequences previously generated by the API, looking for sequences that match the candidate text within a certain threshold. We empirically verify our defense using a database of 15M generations from a fine-tuned T5-XXL model and find that it can detect 80% to 97% of paraphrased generations across different settings while only classifying 1% of human-written sequences as AI-generated. We open-source our models, code and data.", "keywords": "AI-generated text detection;text detection;paraphrasing;attacks;retrieval;defenses;large language models;LLMs", "primary_area": "", "supplementary_material": "/attachment/83d03ae151e74d666b1cae1b67fe7f3787074839.zip", "author": "Kalpesh Krishna;Yixiao Song;Marzena Karpinska;John Frederick Wieting;Mohit Iyyer", "authorids": "~Kalpesh_Krishna1;~Yixiao_Song1;~Marzena_Karpinska1;~John_Frederick_Wieting1;~Mohit_Iyyer1", "gender": "M;F;;M;M", "homepage": "http://martiansideofthemoon.github.io/;https://yixiao-song.github.io;;;http://cs.umass.edu/~miyyer", "dblp": "207/8485;331/5829;;156/0158;148/9178", "google_scholar": "https://scholar.google.com/citations?hl=en;4OgciqMAAAAJ;;;rBVA5tcAAAAJ", "orcid": ";;;;", "linkedin": "kalpesh-krishna-6b3827a6/;songyixiao/;;;", "or_profile": "~Kalpesh_Krishna1;~Yixiao_Song1;~Marzena_Karpinska1;~John_Frederick_Wieting1;~Mohit_Iyyer1", "aff": "University of Massachusetts Amherst;University of Massachusetts at Amherst;;Google DeepMind;University of Massachusetts Amherst", "aff_domain": "cs.umass.edu;umass.edu;;google.com;cs.umass.edu", "position": "PhD student;PhD student;;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nkrishna2023paraphrasing,\ntitle={Paraphrasing evades detectors of {AI}-generated text, but retrieval is an effective defense},\nauthor={Kalpesh Krishna and Yixiao Song and Marzena Karpinska and John Frederick Wieting and Mohit Iyyer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WbFhFvjjKj}\n}", "github": "", "project": "", "reviewers": "44uy;k3DJ;qaDF;VDA7;KFgu", "pdf_size": 885516, "rating": "6;6;6;8;8", "confidence": "4;3;4;4;5", "soundness": "2;3;3;3;4", "novelty": "3;3;3;3;3", "presentation": "4;3;2;4;4", "wc_summary": "80;65;118;100;111", "wc_strengths": "67;38;34;53;276", "wc_weaknesses": "443;48;281;208;300", "wc_questions": "95;19;155;218;172", "wc_limitations": "62;6;15;1;142", "wc_review": "747;176;603;580;1001", "wc_reply_reviewers": "118;18;10;96;13", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 94.8, 19.671298889498882 ], "wc_strengths_avg": [ 93.6, 91.94476602830636 ], "wc_weaknesses_avg": [ 256.0, 128.91702757975767 ], "wc_questions_avg": [ 131.8, 68.79360435389324 ], "wc_limitations_avg": [ 45.2, 53.01848734168111 ], "wc_review_avg": [ 621.4, 268.44187452780164 ], "wc_reply_reviewers_avg": [ 51.0, 46.32062175748508 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6454972243679028, "gs_citation": 332, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4771235167992901737&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "cs.umass.edu;umass.edu;;google.com;cs.umass.edu", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Massachusetts Amherst;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.umass.edu;https://deepmind.com", "aff_unique_abbr": "UMass Amherst;DeepMind", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "GenEval: An object-focused framework for evaluating text-to-image alignment", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73566", "id": "Wbr51vK331", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a3bf71c7c63f0c3bcb7ff67c67b1e7b1-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Wbr51vK331", "openreview": "https://openreview.net/forum?id=Wbr51vK331", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73566", "video": "https://nips.cc/virtual/2023/poster/73566", "author_site": "Dhruba Ghosh, Hannaneh Hajishirzi, Ludwig Schmidt", "tldr": "", "abstract": "Recent breakthroughs in diffusion models, multimodal pretraining, and efficient finetuning have led to an explosion of text-to-image generative models. \nGiven human evaluation is expensive and difficult to scale, automated methods are critical for evaluating the increasingly large number of new models. \nHowever, most current automated evaluation metrics like FID or CLIPScore only offer a distribution-level measure of image quality or image-text alignment, and are unsuited for fine-grained or instance-level analysis. \nIn this paper, we introduce GenEval, an object-focused framework to evaluate compositional image properties such as object co-occurrence, position, count, and color. \nWe show that current object detection models can be leveraged to evaluate text-to-image models on a variety of generation tasks with strong human agreement, and that other discriminative vision models can be linked to this pipeline to further verify properties like object color. \nWe then evaluate several open-source text-to-image models and analyze their relative reasoning capabilities on our benchmark. \nWe find that recent models demonstrate significant improvement on these tasks, though they are still lacking in complex capabilities such as spatial relations and attribute binding. \nFinally, we demonstrate how GenEval might be used to help discover existing failure modes, in order to inform development of the next generation of text-to-image models. \nOur code to run the GenEval framework will be made publicly available at https://github.com/djghosh13/geneval.", "keywords": "text-to-image;vision-and-language;object detection;compositional;evaluation;framework", "primary_area": "", "supplementary_material": "", "author": "Dhruba Ghosh;Hannaneh Hajishirzi;Ludwig Schmidt", "authorids": "~Dhruba_Ghosh1;~Hannaneh_Hajishirzi1;~Ludwig_Schmidt1", "gender": ";F;M", "homepage": "https://djghosh13.github.io/;https://homes.cs.washington.edu/~hannaneh/;http://people.csail.mit.edu/ludwigs/", "dblp": "292/8318;52/1296;141/2720", "google_scholar": "lHuZ55oAAAAJ;LOV6_WIAAAAJ;SWMKy70AAAAJ", "orcid": "0000-0002-8518-2696;;", "linkedin": "dhruba-ghosh-b82467170/;;ludwig-schmidt-87ba3612/", "or_profile": "~Dhruba_Ghosh1;~Hannaneh_Hajishirzi1;~Ludwig_Schmidt1", "aff": "University of Washington;University of Washington;Allen Institute for Artificial Intelligence", "aff_domain": "uw.edu;uw.edu;allenai.org", "position": "PhD student;Associate Professor;Researcher", "bibtex": "@inproceedings{\nghosh2023geneval,\ntitle={GenEval: An object-focused framework for evaluating text-to-image alignment},\nauthor={Dhruba Ghosh and Hannaneh Hajishirzi and Ludwig Schmidt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Wbr51vK331}\n}", "github": "", "project": "", "reviewers": "nUMc;Pjnj;Ygiv;ALyh", "pdf_size": 3748330, "rating": "5;6;6;7", "confidence": "4;4;3;4", "wc_summary_and_contributions": "56;48;29;59", "wc_strengths": "49;58;37;70", "wc_improvement": "3;45;48;164", "wc_limitations": "2;37;53;195", "wc_correctness": "2;5;11;113", "wc_clarity": "1;11;4;97", "wc_relation_to_prior_work": "8;18;7;1", "wc_documentation": "6;17;39;19", "wc_additional_feedback": "1;1;1;1", "wc_review": "128;240;229;719", "wc_reply_reviewers": "0;47;0;22", "wc_reply_authors": "193;384;359;1512", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 48.0, 11.683321445547923 ], "wc_strengths_avg": [ 53.5, 12.093386622447824 ], "wc_improvement_avg": [ 65.0, 59.8623420858222 ], "wc_limitations_avg": [ 71.75, 73.5097782611266 ], "wc_correctness_avg": [ 32.75, 46.44553261617311 ], "wc_clarity_avg": [ 28.25, 39.85834291588149 ], "wc_relation_to_prior_work_avg": [ 8.5, 6.103277807866851 ], "wc_documentation_avg": [ 20.25, 11.903255857117413 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 329.0, 229.3588890799744 ], "wc_reply_reviewers_avg": [ 17.25, 19.382659776202026 ], "wc_reply_authors_avg": [ 612.0, 524.7747135676414 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 112, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15959464659480977695&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "uw.edu;uw.edu;allenai.org", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "UW;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Low Tensor Rank Learning of Neural Dynamics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71401", "id": "WcoX8eJJjI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/27030ad2ec1d8f2c3847a64e382c30ca-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WcoX8eJJjI", "openreview": "https://openreview.net/forum?id=WcoX8eJJjI", "poster": "/media/PosterPDFs/NeurIPS%202023/71401.png?t=1701415394.2802706", "slides": "https://nips.cc/virtual/2023/poster/71401", "video": "https://nips.cc/virtual/2023/poster/71401", "author_site": "Arthur Pellegrino, N Alex Cayco Gajic, Angus Chadwick", "tldr": "", "abstract": "Learning relies on coordinated synaptic changes in recurrently connected populations of neurons. Therefore, understanding the collective evolution of synaptic connectivity over learning is a key challenge in neuroscience and machine learning. In particular, recent work has shown that the weight matrices of task-trained RNNs are typically low rank, but how this low rank structure unfolds over learning is unknown. To address this, we investigate the rank of the 3-tensor formed by the weight matrices throughout learning. By fitting RNNs of varying rank to large-scale neural recordings during a motor learning task, we find that the inferred weights are low-tensor-rank and therefore evolve over a fixed low-dimensional subspace throughout the entire course of learning. We next validate the observation of low-tensor-rank learning on an RNN trained to solve the same task. Finally, we present a set of mathematical results bounding the matrix and tensor ranks of gradient descent learning dynamics which show that low-tensor-rank weights emerge naturally in RNNs trained to solve low-dimensional tasks. Taken together, our findings provide insight on the evolution of population connectivity over learning in both biological and artificial neural networks, and enable reverse engineering of learning-induced changes in recurrent dynamics from large-scale neural recordings.", "keywords": "Recurrent Neural Networks;Computational Neuroscience;Neural Data Analysis;Tensor;Learning", "primary_area": "", "supplementary_material": "/attachment/152e99cfdc7692c5347570b14e8d8294b134a450.zip", "author": "Arthur Pellegrino;N Alex Cayco Gajic;Angus Chadwick", "authorids": "~Arthur_Pellegrino1;~N_Alex_Cayco_Gajic1;~Angus_Chadwick1", "gender": ";F;M", "homepage": ";https://sites.google.com/view/caycogajic/home;https://homepages.inf.ed.ac.uk/achadwic/index.html", "dblp": ";132/1471.html;", "google_scholar": ";dRUs5RYAAAAJ;https://scholar.google.co.uk/citations?user=P5hPjOEAAAAJ", "orcid": ";0000-0003-3593-5773;0000-0003-2664-0746", "linkedin": ";;", "or_profile": "~Arthur_Pellegrino1;~N_Alex_Cayco_Gajic1;~Angus_Chadwick1", "aff": ";Ecole Normale Sup\u00e9rieure de Paris;Edinburgh University, University of Edinburgh", "aff_domain": ";ens.fr;inf.ed.ac.uk", "position": ";Assistant Professor;Lecturer", "bibtex": "@inproceedings{\npellegrino2023low,\ntitle={Low Tensor Rank Learning of Neural Dynamics},\nauthor={Arthur Pellegrino and N Alex Cayco Gajic and Angus Chadwick},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WcoX8eJJjI}\n}", "github": "", "project": "", "reviewers": "jwhe;X8YP;Qcae;T6eW", "pdf_size": 13858604, "rating": "4;5;7;7", "confidence": "1;2;2;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "38;60;156;101", "wc_strengths": "101;52;298;66", "wc_weaknesses": "26;26;114;53", "wc_questions": "112;1;25;435", "wc_limitations": "1;16;10;21", "wc_review": "278;155;603;676", "wc_reply_reviewers": "49;0;40;75", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 2.25, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 88.75, 44.92980636504012 ], "wc_strengths_avg": [ 129.25, 99.04891468360469 ], "wc_weaknesses_avg": [ 54.75, 35.94005425705421 ], "wc_questions_avg": [ 143.25, 173.43064175629402 ], "wc_limitations_avg": [ 12.0, 7.44983221287567 ], "wc_review_avg": [ 428.0, 217.46149084378135 ], "wc_reply_reviewers_avg": [ 41.0, 26.93510720231126 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7505683356701914, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2478701314621858524&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": ";ens.fr;inf.ed.ac.uk", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Ecole Normale Sup\u00e9rieure de Paris;University of Edinburgh", "aff_unique_dep": ";", "aff_unique_url": "https://www.ens.fr;https://www.ed.ac.uk", "aff_unique_abbr": "ENS Paris;Edinburgh", "aff_campus_unique_index": "0", "aff_campus_unique": "Paris;", "aff_country_unique_index": "0;1", "aff_country_unique": "France;United Kingdom" }, { "title": "Towards Semi-Structured Automatic ICD Coding via Tree-based Contrastive Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71400", "id": "Wff6DWFY2W", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d74f9efa1d8ca30b31d65cef8de7c2bf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Wff6DWFY2W", "openreview": "https://openreview.net/forum?id=Wff6DWFY2W", "poster": "/media/PosterPDFs/NeurIPS%202023/71400.png?t=1699498485.8098667", "slides": "https://nips.cc/virtual/2023/poster/71400", "video": "https://nips.cc/virtual/2023/poster/71400", "author_site": "Chang Lu, Chandan Reddy, Ping Wang, Yue Ning", "tldr": "", "abstract": "Automatic coding of International Classification of Diseases (ICD) is a multi-label text categorization task that involves extracting disease or procedure codes from clinical notes. Despite the application of state-of-the-art natural language processing (NLP) techniques, there are still challenges including limited availability of data due to privacy constraints and the high variability of clinical notes caused by different writing habits of medical professionals and various pathological features of patients. In this work, we investigate the semi-structured nature of clinical notes and propose an automatic algorithm to segment them into sections. To address the variability issues in existing ICD coding models with limited data, we introduce a contrastive pre-training approach on sections using a soft multi-label similarity metric based on tree edit distance. Additionally, we design a masked section training strategy to enable ICD coding models to locate sections related to ICD codes. Extensive experimental results demonstrate that our proposed training strategies effectively enhance the performance of existing ICD coding methods.", "keywords": "ICD Coding;Contrastive Learning;NLP;Healthcare;Text Categorization;Pre-training", "primary_area": "", "supplementary_material": "/attachment/3049c2f392177682c3c415b30e59b85bfbc56867.zip", "author": "Chang Lu;Chandan K. Reddy;Ping Wang;Yue Ning", "authorids": "~Chang_Lu2;~Chandan_K._Reddy1;~Ping_Wang8;~Yue_Ning1", "gender": "M;M;F;F", "homepage": "https://www.luchangcs.com/;https://creddy.net/;https://leafnlp.org/ping;https://yue-ning.github.io/", "dblp": "10/10150;42/1341;37/1304-24.html;74/9990-1.html", "google_scholar": "nWfJFp4AAAAJ;LoXnMOIAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-3756-7396;;0000-0002-0379-9183;", "linkedin": ";;;", "or_profile": "~Chang_Lu2;~Chandan_K._Reddy1;~Ping_Wang8;~Yue_Ning1", "aff": "Stevens Institute of Technology;Amazon;Stevens Institute of Technology;Stevens Institute of Technology", "aff_domain": "stevens.edu;amazon.com;stevens.edu;stevens.edu", "position": "PhD student;Amazon Scholar;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nlu2023towards,\ntitle={Towards Semi-Structured Automatic {ICD} Coding via Tree-based Contrastive Learning},\nauthor={Chang Lu and Chandan K. Reddy and Ping Wang and Yue Ning},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Wff6DWFY2W}\n}", "github": "", "project": "", "reviewers": "VQpM;BZWL;cSeG;ijoF", "pdf_size": 653847, "rating": "3;6;6;7", "confidence": "4;4;3;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "24;31;76;86", "wc_strengths": "35;56;38;167", "wc_weaknesses": "128;152;25;35", "wc_questions": "26;2;55;23", "wc_limitations": "7;12;1;34", "wc_review": "220;253;195;345", "wc_reply_reviewers": "65;16;12;17", "wc_reply_authors": "429;11;11;59", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;3", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 54.25, 27.095894522971555 ], "wc_strengths_avg": [ 74.0, 54.290883212561575 ], "wc_weaknesses_avg": [ 85.0, 55.76289088632332 ], "wc_questions_avg": [ 26.5, 18.874586088176873 ], "wc_limitations_avg": [ 13.5, 12.459935794377111 ], "wc_review_avg": [ 253.25, 56.82594037937252 ], "wc_reply_reviewers_avg": [ 27.5, 21.73131381210073 ], "wc_reply_authors_avg": [ 127.5, 175.1706311000791 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2864710355748997711&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "stevens.edu;amazon.com;stevens.edu;stevens.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Stevens Institute of Technology;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.stevens.edu;https://www.amazon.com", "aff_unique_abbr": "SIT;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Anonymous Learning via Look-Alike Clustering: A Precise Analysis of Model Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71399", "id": "WfsWy59bX2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/70899a5d74f83317c78f1a7d413d1baa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WfsWy59bX2", "openreview": "https://openreview.net/forum?id=WfsWy59bX2", "poster": "/media/PosterPDFs/NeurIPS%202023/71399.png?t=1701835905.8504207", "slides": "https://nips.cc/virtual/2023/poster/71399", "video": "https://nips.cc/virtual/2023/poster/71399", "author_site": "Adel Javanmard, Vahab Mirrokni", "tldr": "", "abstract": "While personalized recommendations systems have become increasingly popular, ensuring user data protection remains a top concern in the development of these learning systems. A common approach to enhancing privacy involves training models using anonymous data rather than individual data. In this paper, we explore a natural technique called \"look-alike clustering\", which involves replacing sensitive features of individuals with the cluster's average values. We provide a precise analysis of how training models using anonymous cluster centers affects their generalization capabilities. We focus on an asymptotic regime where the size of the training set grows in proportion to the features dimension. Our analysis is based on the Convex Gaussian Minimax Theorem (CGMT) and allows us to theoretically understand the role of different model components on the generalization error. In addition, we demonstrate that in certain high-dimensional regimes, training over anonymous cluster centers acts as a regularization and improves generalization error of the trained models. Finally, we corroborate our asymptotic theory with finite-sample numerical experiments where we observe a perfect match when the sample size is only of order of a few hundreds.", "keywords": "high-dimensional regression;generalization error;asymptotic analysis;Convex Gaussian Minimax Theorem;regularization", "primary_area": "", "supplementary_material": "/attachment/d0b0d8dcafb8288853ff60b90eccc2eb94916e8b.pdf", "author": "Adel Javanmard;Vahab Mirrokni", "authorids": "~Adel_Javanmard1;~Vahab_Mirrokni2", "gender": ";M", "homepage": "https://faculty.marshall.usc.edu/Adel-Javanmard/;https://people.csail.mit.edu/mirrokni/Welcome.html", "dblp": "96/8072;m/VahabSMirrokni", "google_scholar": "cNSbfGQAAAAJ;opbZfw0AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Adel_Javanmard1;~Vahab_Mirrokni2", "aff": "University of Southern California;Google Research", "aff_domain": "usc.edu;google.com", "position": "Full Professor;VP, Google Fellow", "bibtex": "@inproceedings{\njavanmard2023anonymous,\ntitle={Anonymous Learning via Look-Alike Clustering: A Precise Analysis of Model Generalization},\nauthor={Adel Javanmard and Vahab Mirrokni},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WfsWy59bX2}\n}", "github": "", "project": "", "reviewers": "Kp2o;PpUR;KLzk;gwNk;EnfZ", "pdf_size": 1032932, "rating": "6;6;6;6;7", "confidence": "3;2;4;4;3", "soundness": "3;2;3;3;4", "novelty": "2;3;2;3;3", "presentation": "3;2;3;3;3", "wc_summary": "231;71;65;92;371", "wc_strengths": "106;47;35;64;66", "wc_weaknesses": "71;341;147;83;358", "wc_questions": "190;3;22;192;94", "wc_limitations": "45;41;1;16;48", "wc_review": "643;503;270;447;937", "wc_reply_reviewers": "13;38;55;0;409", "wc_reply_authors": "0;0;40;0;517", "reply_reviewers": "1;1;1;0;2", "reply_authors": "1;1;2;1;3", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 166.0, 119.12346536262282 ], "wc_strengths_avg": [ 63.6, 24.071559982684963 ], "wc_weaknesses_avg": [ 200.0, 124.88714905866016 ], "wc_questions_avg": [ 100.2, 80.11591602172442 ], "wc_limitations_avg": [ 30.2, 18.475930287809597 ], "wc_review_avg": [ 560.0, 223.22007078217675 ], "wc_reply_reviewers_avg": [ 103.0, 154.19079090529368 ], "wc_reply_authors_avg": [ 111.4, 203.3908552516558 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.13363062095621223, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hm0EwyBI-DYJ:scholar.google.com/&scioq=Anonymous+Learning+via+Look-Alike+Clustering:+A+Precise+Analysis+of+Model+Generalization&hl=en&as_sdt=0,33", "gs_version_total": 8, "email": "usc.edu;google.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Southern California;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.usc.edu;https://research.google", "aff_unique_abbr": "USC;Google Research", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Los Angeles;Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Connected Superlevel Set in (Deep) Reinforcement Learning and its Application to Minimax Theorems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71398", "id": "WjDj6W872v", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3ff48dde82306fe8f26f3e51dd1054d7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WjDj6W872v", "openreview": "https://openreview.net/forum?id=WjDj6W872v", "poster": "/media/PosterPDFs/NeurIPS%202023/71398.png?t=1699927762.200231", "slides": "https://nips.cc/virtual/2023/poster/71398", "video": "https://nips.cc/virtual/2023/poster/71398", "author_site": "Sihan Zeng, Thinh Doan, Justin Romberg", "tldr": "", "abstract": "The aim of this paper is to improve the understanding of the optimization landscape for policy optimization problems in reinforcement learning. Specifically, we show that the superlevel set of the objective function with respect to the policy parameter is always a connected set both in the tabular setting and under policies represented by a class of neural networks. In addition, we show that the optimization objective as a function of the policy parameter and reward satisfies a stronger \u201cequiconnectedness\u201d property. To our best knowledge, these are novel and previously unknown discoveries.\n\nWe present an application of the connectedness of these superlevel sets to the derivation of minimax theorems for robust reinforcement learning. We show that any minimax optimization program which is convex on one side and is equiconnected on the other side observes the minimax equality (i.e. has a Nash equilibrium). We find that this exact structure is exhibited by an interesting class of robust reinforcement learning problems under an adversarial reward attack, and the validity of its minimax equality immediately follows. This is the first time such a result is established in the literature.", "keywords": "Reinforcement learning;superlevel sets;minimax optimization;robust reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/cf22a4263e3120795c08b19977e7d319d4c3e3ec.pdf", "author": "Sihan Zeng;Thinh T. Doan;Justin Romberg", "authorids": "~Sihan_Zeng1;~Thinh_T._Doan1;~Justin_Romberg1", "gender": ";M;M", "homepage": ";https://sites.google.com/site/thinhdoan210/home;https://jrom.ece.gatech.edu/", "dblp": ";193/3401;", "google_scholar": ";FiN_HnEAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Sihan_Zeng1;~Thinh_T._Doan1;~Justin_Romberg1", "aff": ";Virginia Polytechnic Institute and State University;Georgia Institute of Technology", "aff_domain": ";vt.edu;gatech.edu", "position": ";Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzeng2023connected,\ntitle={Connected Superlevel Set in (Deep) Reinforcement Learning and its Application to Minimax Theorems},\nauthor={Sihan Zeng and Thinh T. Doan and Justin Romberg},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WjDj6W872v}\n}", "github": "", "project": "", "reviewers": "ceyJ;efCX;DdeT;UPhm", "pdf_size": 955241, "rating": "5;5;6;7", "confidence": "3;3;1;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "4;3;3;3", "wc_summary": "97;87;58;47", "wc_strengths": "27;31;44;71", "wc_weaknesses": "26;9;37;65", "wc_questions": "105;134;33;36", "wc_limitations": "10;12;25;8", "wc_review": "265;273;197;227", "wc_reply_reviewers": "0;0;0;9", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.25, 20.437404434027332 ], "wc_strengths_avg": [ 43.25, 17.210098779495716 ], "wc_weaknesses_avg": [ 34.25, 20.363877332178173 ], "wc_questions_avg": [ 77.0, 43.73213921133976 ], "wc_limitations_avg": [ 13.75, 6.6473679001541655 ], "wc_review_avg": [ 240.5, 30.540956108150905 ], "wc_reply_reviewers_avg": [ 2.25, 3.897114317029974 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.2075143391598224, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2665569472492808678&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";vt.edu;gatech.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Virginia Tech;Georgia Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.vt.edu;https://www.gatech.edu", "aff_unique_abbr": "VT;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Differentiable Registration of Images and LiDAR Point Clouds with VoxelPoint-to-Pixel Matching", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71397", "id": "WjWifKqmcG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a0a53fefef4c2ad72d5ab79703ba70cb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WjWifKqmcG", "openreview": "https://openreview.net/forum?id=WjWifKqmcG", "poster": "/media/PosterPDFs/NeurIPS%202023/71397.png?t=1697096520.2380292", "slides": "https://nips.cc/virtual/2023/poster/71397", "video": "https://nips.cc/virtual/2023/poster/71397", "author_site": "Junsheng Zhou, Baorui Ma, Wenyuan Zhang, Yi Fang, Yu-Shen Liu, Zhizhong Han", "tldr": "", "abstract": "Cross-modality registration between 2D images captured by cameras and 3D point clouds from LiDARs is a crucial task in computer vision and robotic. Previous methods estimate 2D-3D correspondences by matching point and pixel patterns learned by neural networks, and use Perspective-n-Points (PnP) to estimate rigid transformation during post-processing. However, these methods struggle to map points and pixels to a shared latent space robustly since points and pixels have very different characteristics with patterns learned in different manners (MLP and CNN), and they also fail to construct supervision directly on the transformation since the PnP is non-differentiable, which leads to unstable registration results. To address these problems, we propose to learn a structured cross-modality latent space to represent pixel features and 3D features via a differentiable probabilistic PnP solver. Specifically, we design a triplet network to learn VoxelPoint-to-Pixel matching, where we represent 3D elements using both voxels and points to learn the cross-modality latent space with pixels. We design both the voxel and pixel branch based on CNNs to operate convolutions on voxels/pixels represented in grids, and integrate an additional point branch to regain the information lost during voxelization. We train our framework end-to-end by imposing supervisions directly on the predicted pose distribution with a probabilistic PnP solver. To explore distinctive patterns of cross-modality features, we design a novel loss with adaptive-weighted optimization for cross-modality feature description. The experimental results on KITTI and nuScenes datasets show significant improvements over the state-of-the-art methods.", "keywords": "LiDAR Point Clouds;2D images;Cross-modality registration;Matching", "primary_area": "", "supplementary_material": "/attachment/3df1f16f13058ef12f31ccf94a313ed87fc2b0a0.pdf", "author": "Junsheng Zhou;Baorui Ma;Wenyuan Zhang;Yi Fang;Yu-Shen Liu;Zhizhong Han", "authorids": "~Junsheng_Zhou3;~Baorui_Ma1;~Wenyuan_Zhang1;~Yi_Fang2;~Yu-Shen_Liu1;~Zhizhong_Han2", "gender": "M;;M;M;M;M", "homepage": "https://junshengzhou.github.io/;;https://wen-yuan-zhang.github.io/;http://mmvc.engineering.nyu.edu/;https://yushen-liu.github.io/;https://h312h.github.io/", "dblp": ";;;96/361-6;44/2229.html;166/5173", "google_scholar": "afPIrLYAAAAJ;;qzH0hNAAAAAJ;j-cyhzwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0001-7305-1915;", "linkedin": ";;;;;", "or_profile": "~Junsheng_Zhou3;~Baorui_Ma1;~Wenyuan_Zhang1;~Yi_Fang2;~Yu-Shen_Liu1;~Zhizhong_Han2", "aff": "Tsinghua University;;Software Engineering;New York University;Tsinghua University;Wayne State University", "aff_domain": "tsinghua.edu.cn;;mails.tsinghua.edu.cn;nyu.edu;tsinghua.edu.cn;wayne.edu", "position": "MS student;;PhD student;Associate Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhou2023differentiable,\ntitle={Differentiable Registration of Images and Li{DAR} Point Clouds with VoxelPoint-to-Pixel Matching},\nauthor={Junsheng Zhou and Baorui Ma and Wenyuan Zhang and Yi Fang and Yu-Shen Liu and Zhizhong Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WjWifKqmcG}\n}", "github": "", "project": "", "reviewers": "ec8L;QyPU;Ehvn;yFFc;CqXY", "pdf_size": 3838977, "rating": "5;5;6;6;6", "confidence": "4;5;4;4;4", "soundness": "3;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "95;66;91;80;58", "wc_strengths": "112;42;40;79;63", "wc_weaknesses": "201;300;370;180;167", "wc_questions": "5;5;7;16;3", "wc_limitations": "32;1;4;112;7", "wc_review": "445;414;512;467;298", "wc_reply_reviewers": "141;30;45;111;56", "wc_reply_authors": "452;33;19;27;118", "reply_reviewers": "2;1;1;1;2", "reply_authors": "3;2;2;2;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.0, 14.184498581197715 ], "wc_strengths_avg": [ 67.2, 26.603759132874437 ], "wc_weaknesses_avg": [ 243.6, 78.59414736480063 ], "wc_questions_avg": [ 7.2, 4.578209256903839 ], "wc_limitations_avg": [ 31.2, 41.873141749813804 ], "wc_review_avg": [ 427.2, 72.05386873721632 ], "wc_reply_reviewers_avg": [ 76.6, 42.249733726971584 ], "wc_reply_authors_avg": [ 129.8, 165.0253313888507 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6123724356957946, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14764367051616866549&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;;mails.tsinghua.edu.cn;nyu.edu;tsinghua.edu.cn;wayne.edu", "author_num": 6, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Tsinghua University;Software Engineering;New York University;Wayne State University", "aff_unique_dep": ";Software Engineering;;", "aff_unique_url": "https://www.tsinghua.edu.cn;;https://www.nyu.edu;https://wayne.edu", "aff_unique_abbr": "THU;;NYU;WSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;2;0;2", "aff_country_unique": "China;;United States" }, { "title": "Repetition In Repetition Out: Towards Understanding Neural Text Degeneration from the Data Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71396", "id": "WjgCRrOgip", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e6c2e85db1f1039177c4495ccd399ac4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WjgCRrOgip", "openreview": "https://openreview.net/forum?id=WjgCRrOgip", "poster": "/media/PosterPDFs/NeurIPS%202023/71396.png?t=1701608466.0904443", "slides": "https://nips.cc/virtual/2023/poster/71396", "video": "https://nips.cc/virtual/2023/poster/71396", "author_site": "Huayang Li, Tian Lan, Zihao Fu, Deng Cai, Lemao Liu, Nigel Collier, Taro Watanabe, Yixuan Su", "tldr": "", "abstract": "There are a number of diverging hypotheses about the neural text degeneration problem, i.e., generating repetitive and dull loops, which makes this problem both interesting and confusing. In this work, we aim to advance our understanding by presenting a straightforward and fundamental explanation from the data perspective. Our preliminary investigation reveals a strong correlation between the degeneration issue and the presence of repetitions in training data. Subsequent experiments also demonstrate that by selectively dropping out the attention to repetitive words in training data, degeneration can be significantly minimized. Furthermore, our empirical analysis illustrates that prior works addressing the degeneration issue from various standpoints, such as the high-inflow words, the likelihood objective, and the self-reinforcement phenomenon, can be interpreted by one simple explanation. That is, penalizing the repetitions in training data is a common and fundamental factor for their effectiveness. Moreover, our experiments reveal that penalizing the repetitions in training data remains critical even when considering larger model sizes and instruction tuning.", "keywords": "language modeling;text generation;natural language processing", "primary_area": "", "supplementary_material": "/attachment/2df50b2cb0bfe99c31e3127a8537564b06334dbb.pdf", "author": "Huayang Li;Tian Lan;Zihao Fu;Deng Cai;Lemao Liu;Nigel Collier;Taro Watanabe;Yixuan Su", "authorids": "~Huayang_Li1;~Tian_Lan7;~Zihao_Fu1;~Deng_Cai1;~Lemao_Liu3;~Nigel_Collier1;~Taro_Watanabe1;~Yixuan_Su1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://sites.google.com/view/huayangli;https://github.com/gmftbyGMFTBY;https://fuzihaofzh.github.io/;https://jcyk.github.io/;https://sites.google.com/site/nhcollier/;https://sites.google.com/site/tarowtnb/;https://yxuansu.github.io/;https://lemaoliu.github.io/homepage/", "dblp": "43/5939;31/83-3;;c/DCai-2;90/2619;50/4741;262/3282.html;41/10887.html", "google_scholar": "_1jSi34AAAAJ;https://scholar.google.com/citations?hl=en;64CHB2YAAAAJ;KpbRLYcAAAAJ;https://scholar.google.co.uk/citations?user=ZMelBa0AAAAJ;zsEEy7kAAAAJ;VuVuWEoAAAAJ;", "orcid": ";0000-0002-5200-1537;;;0000-0002-7230-4164;0000-0001-8349-3522;0000-0002-1472-7791;", "linkedin": "huayang-li-a426a3114/;%E5%A4%A9-%E5%85%B0-13128318b/;;;;;;", "or_profile": "~Huayang_Li1;~Tian_Lan7;~Zihao_Fu1;~Deng_Cai1;~Nigel_Collier1;~Taro_Watanabe1;~Yixuan_Su1;~lemao_liu1", "aff": "Nara Institute of Science and Technology;Beijing Institute of Technology;University of Cambridge;Tencent AI Lab;University of Cambridge;Nara Institute of Science and Technology, Japan;University of Cambridge;Tencent", "aff_domain": "is.naist.jp;bit.edu.cn;cam.ac.uk;tencent.com;cam.ac.uk;naist.jp;cam.ac.uk;tencent.com", "position": "MS student;PhD student;Postdoc;Research Scientist;Full Professor;Full Professor;PhD student;Researcher", "bibtex": "@inproceedings{\nli2023repetition,\ntitle={Repetition In Repetition Out: Towards Understanding Neural Text Degeneration from the Data Perspective},\nauthor={Huayang Li and Tian Lan and Zihao Fu and Deng Cai and Lemao Liu and Nigel Collier and Taro Watanabe and Yixuan Su},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WjgCRrOgip}\n}", "github": "", "project": "", "reviewers": "jpnv;6VM2;Pdys;6H9c", "pdf_size": 1233743, "rating": "6;6;7;7", "confidence": "3;4;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "108;108;201;73", "wc_strengths": "27;59;127;57", "wc_weaknesses": "105;271;244;31", "wc_questions": "14;12;121;1", "wc_limitations": "16;22;6;1", "wc_review": "270;472;699;163", "wc_reply_reviewers": "55;113;16;45", "wc_reply_authors": "46;622;30;16", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 122.5, 47.52104796824245 ], "wc_strengths_avg": [ 67.5, 36.61625322175932 ], "wc_weaknesses_avg": [ 162.75, 98.75822750535775 ], "wc_questions_avg": [ 37.0, 48.74935897014442 ], "wc_limitations_avg": [ 11.25, 8.227241335952167 ], "wc_review_avg": [ 401.0, 204.72542587573238 ], "wc_reply_reviewers_avg": [ 57.25, 35.23049105533444 ], "wc_reply_authors_avg": [ 178.5, 256.2747549018436 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11300211870938496294&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "is.naist.jp;bit.edu.cn;cam.ac.uk;tencent.com;cam.ac.uk;naist.jp;cam.ac.uk;tencent.com", "author_num": 8, "aff_unique_index": "0;1;2;3;2;0;2;3", "aff_unique_norm": "Nara Institute of Science and Technology;Beijing Institute of Technology;University of Cambridge;Tencent", "aff_unique_dep": ";;;Tencent AI Lab", "aff_unique_url": "https://www.nist.go.jp;http://www.bit.edu.cn/;https://www.cam.ac.uk;https://ai.tencent.com", "aff_unique_abbr": "NIST;BIT;Cambridge;Tencent AI Lab", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;2;1;2;0;2;1", "aff_country_unique": "Japan;China;United Kingdom" }, { "title": "Action Inference by Maximising Evidence: Zero-Shot Imitation from Observation with World Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71395", "id": "WjlCQxpuxU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/90e73f3cf1a6c84c723a2e8b7fb2b2c1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WjlCQxpuxU", "openreview": "https://openreview.net/forum?id=WjlCQxpuxU", "poster": "/media/PosterPDFs/NeurIPS%202023/71395.png?t=1701389870.2348707", "slides": "https://nips.cc/virtual/2023/poster/71395", "video": "https://nips.cc/virtual/2023/poster/71395", "author_site": "Xingyuan Zhang, Philip Becker-Ehmck, Patrick van der Smagt, Maximilian Karl", "tldr": "", "abstract": "Unlike most reinforcement learning agents which require an unrealistic amount of environment interactions to learn a new behaviour, humans excel at learning quickly by merely observing and imitating others. This ability highly depends on the fact that humans have a model of their own embodiment that allows them to infer the most likely actions that led to the observed behaviour. In this paper, we propose Action Inference by Maximising Evidence (AIME) to replicate this behaviour using world models. AIME consists of two distinct phases. In the first phase, the agent learns a world model from its past experience to understand its own body by maximising the ELBO. While in the second phase, the agent is given some observation-only demonstrations of an expert performing a novel task and tries to imitate the expert's behaviour. AIME achieves this by defining a policy as an inference model and maximising the evidence of the demonstration under the policy and world model. Our method is \"zero-shot\" in the sense that it does not require further training for the world model or online interactions with the environment after given the demonstration. We empirically validate the zero-shot imitation performance of our method on the Walker and Cheetah embodiment of the DeepMind Control Suite and find it outperforms the state-of-the-art baselines. Code is available at: https://github.com/argmax-ai/aime.", "keywords": "Imitation Learning;World Models;Latent Variable Model;Transfer Learning;Variational Inference", "primary_area": "", "supplementary_material": "", "author": "Xingyuan Zhang;Philip Becker-Ehmck;Patrick van der Smagt;Maximilian Karl", "authorids": "~Xingyuan_Zhang1;~Philip_Becker-Ehmck1;~Patrick_van_der_Smagt1;~Maximilian_Karl1", "gender": "M;M;M;M", "homepage": "https://icaruswizard.github.io/;;https://argmax.org;", "dblp": "66/3825;241/9813;24/6573.html;", "google_scholar": "BUqNLqcAAAAJ;https://scholar.google.de/citations?user=eFLK7OYAAAAJ;https://scholar.google.de/citations?user=5ybzvbsAAAAJ;noekAeoAAAAJ", "orcid": "0000-0001-9965-4490;;0000-0003-4418-4916;0000-0001-8959-368X", "linkedin": "xingyuan-zhang-b0069a112/;;smagt/;", "or_profile": "~Xingyuan_Zhang1;~Philip_Becker-Ehmck1;~Patrick_van_der_Smagt1;~Maximilian_Karl1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Volkswagen Group;Machine Learning Research Lab; Volkswagen Group;Machine Learning Research Lab, Volkswagen Group", "aff_domain": "tum.de;volkswagen.de;volkswagen.de;volkswagen.de", "position": "PhD student;Researcher;Full Professor;AI research scientist", "bibtex": "@inproceedings{\nzhang2023action,\ntitle={Action Inference by Maximising Evidence: Zero-Shot Imitation from Observation with World Models},\nauthor={Xingyuan Zhang and Philip Becker-Ehmck and Patrick van der Smagt and Maximilian Karl},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WjlCQxpuxU}\n}", "github": "", "project": "", "reviewers": "eCVB;nqaR;Cs3Z;MApq", "pdf_size": 1776513, "rating": "5;6;6;7", "confidence": "3;4;4;4", "soundness": "3;2;2;3", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "68;59;85;177", "wc_strengths": "29;16;197;89", "wc_weaknesses": "118;67;176;441", "wc_questions": "13;108;215;23", "wc_limitations": "1;9;189;8", "wc_review": "229;259;862;738", "wc_reply_reviewers": "24;21;30;64", "wc_reply_authors": "203;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 97.25, 46.980714128246284 ], "wc_strengths_avg": [ 82.75, 71.47858070778966 ], "wc_weaknesses_avg": [ 200.5, 144.10846609411954 ], "wc_questions_avg": [ 89.75, 81.18920802668295 ], "wc_limitations_avg": [ 51.75, 79.30124526134504 ], "wc_review_avg": [ 522.0, 281.63540260414703 ], "wc_reply_reviewers_avg": [ 34.75, 17.195566289017645 ], "wc_reply_authors_avg": [ 50.75, 87.90157848412052 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10132740115803905444&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "tum.de;volkswagen.de;volkswagen.de;volkswagen.de", "author_num": 4, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Volkswagen Group;Machine Learning Research Lab", "aff_unique_dep": ";;Machine Learning Research", "aff_unique_url": "https://www.tum.de;https://www.volkswagenag.com;", "aff_unique_abbr": "TUM;VW Group;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany;" }, { "title": "Large Language Models as Commonsense Knowledge for Large-Scale Task Planning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71394", "id": "Wjp1AYB8lH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/65a39213d7d0e1eb5d192aa77e77eeb7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Wjp1AYB8lH", "openreview": "https://openreview.net/forum?id=Wjp1AYB8lH", "poster": "/media/PosterPDFs/NeurIPS%202023/71394.png?t=1701672829.859892", "slides": "https://nips.cc/virtual/2023/poster/71394", "video": "https://nips.cc/virtual/2023/poster/71394", "author_site": "Zirui Zhao, Wee Sun Lee, David Hsu", "tldr": "", "abstract": "Large-scale task planning is a major challenge. Recent work exploits large\n language models (LLMs) directly as a policy and shows surprisingly\n interesting results. This paper shows that LLMs provide a\n commonsense model of the world in addition to a policy that acts on\n it. The world model and the policy can be combined in a search\n algorithm, such as Monte Carlo Tree Search (MCTS), to scale up task\n planning. In our new LLM-MCTS algorithm, the LLM-induced world model\n provides a commonsense prior belief for MCTS to achieve effective reasoning;\n the LLM-induced policy acts as a heuristic to guide the search, vastly\n improving search efficiency. Experiments show that LLM-MCTS outperforms\n both MCTS alone and policies induced by LLMs (GPT2 and GPT3.5) by a wide\n margin, for complex, novel tasks. \n Further experiments and analyses on multiple tasks --\n multiplication, travel planning, object rearrangement --\n suggest minimum description length (MDL)\n as a general guiding principle: if the\n description length of the world model is substantially smaller than that of the\n policy, using LLM as a world model for model-based planning is likely better\n than using LLM solely as a policy.", "keywords": "Embodied Task Planning;Large Language Models;Human-Robot Interaction", "primary_area": "", "supplementary_material": "/attachment/f78054a18bdeea5bf1e8cf71e138ea2c7ec35ae6.pdf", "author": "Zirui Zhao;Wee Sun Lee;David Hsu", "authorids": "~Zirui_Zhao1;~Wee_Sun_Lee1;~David_Hsu1", "gender": "M;M;M", "homepage": ";http://www.comp.nus.edu.sg/~leews/;http://www.comp.nus.edu.sg/~dyhsu/", "dblp": "211/7197;86/1498;29/331", "google_scholar": "sKs2OLUAAAAJ;https://scholar.google.com.sg/citations?user=8PCrLgwAAAAJ;S9LHLKEAAAAJ", "orcid": "0009-0001-5041-4194;;0000-0002-2309-4535", "linkedin": "zirui-zhao-228173186/;;david-hsu-a86200a1/", "or_profile": "~Zirui_Zhao1;~Wee_Sun_Lee1;~David_Hsu1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu.sg;nus.edu.sg", "position": "PhD student;Full Professor;Professor", "bibtex": "@inproceedings{\nzhao2023large,\ntitle={Large Language Models as Commonsense Knowledge for Large-Scale Task Planning},\nauthor={Zirui Zhao and Wee Sun Lee and David Hsu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Wjp1AYB8lH}\n}", "github": "", "project": "", "reviewers": "RKS5;jShB;3x3m;euNw;Mf2f", "pdf_size": 5787097, "rating": "4;6;6;7;7", "confidence": "4;3;3;4;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;4;3", "presentation": "2;4;3;3;2", "wc_summary": "267;73;96;60;193", "wc_strengths": "81;98;64;59;41", "wc_weaknesses": "72;55;262;101;55", "wc_questions": "101;54;43;14;214", "wc_limitations": "29;1;13;38;4", "wc_review": "550;281;478;272;507", "wc_reply_reviewers": "111;0;208;0;47", "wc_reply_authors": "404;0;286;0;16", "reply_reviewers": "1;0;1;0;1", "reply_authors": "2;1;3;1;2", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 137.8, 79.67283100279543 ], "wc_strengths_avg": [ 68.6, 19.458674158328463 ], "wc_weaknesses_avg": [ 109.0, 78.32496409191644 ], "wc_questions_avg": [ 85.2, 70.23218635355161 ], "wc_limitations_avg": [ 17.0, 14.324803663575986 ], "wc_review_avg": [ 417.6, 117.49825530619593 ], "wc_reply_reviewers_avg": [ 73.2, 78.74363466338089 ], "wc_reply_authors_avg": [ 141.2, 170.63458031712096 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 246, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18406274173092589648&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "u.nus.edu;nus.edu.sg;nus.edu.sg", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Contextual Bandits and Imitation Learning with Preference-Based Active Queries", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71393", "id": "WmqYhqvz5i", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2567c95fd41459a98a73ba893775d22a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WmqYhqvz5i", "openreview": "https://openreview.net/forum?id=WmqYhqvz5i", "poster": "/media/PosterPDFs/NeurIPS%202023/71393.png?t=1701901373.2091072", "slides": "https://nips.cc/virtual/2023/poster/71393", "video": "https://nips.cc/virtual/2023/poster/71393", "author_site": "Ayush Sekhari, Karthik Sridharan, Wen Sun, Runzhe Wu", "tldr": "", "abstract": "We consider the problem of contextual bandits and imitation learning, where the learner lacks direct knowledge of the executed action's reward. Instead, the learner can actively request the expert at each round to compare two actions and receive noisy preference feedback. The learner's objective is two-fold: to minimize regret associated with the executed actions, while simultaneously, minimizing the number of comparison queries made to the expert. In this paper, we assume that the learner has access to a function class that can represent the expert's preference model under appropriate link functions and present an algorithm that leverages an online regression oracle with respect to this function class. For the contextual bandit setting, our algorithm achieves a regret bound that combines the best of both worlds, scaling as $O(\\min\\\\{\\sqrt{T}, d/\\Delta\\\\})$, where $T$ represents the number of interactions, $d$ represents the eluder dimension of the function class, and $\\Delta$ represents the minimum preference of the optimal action over any suboptimal action under all contexts. Our algorithm does not require the knowledge of $\\Delta$, and the obtained regret bound is comparable to what can be achieved in the standard contextual bandits setting where the learner observes reward signals at each round. Additionally, our algorithm makes only $O(\\min\\\\{T, d^2/\\Delta^2\\\\})$ queries to the expert. We then extend our algorithm to the imitation learning setting, where the agent engages with an unknown environment in episodes of length $H$, and provide similar guarantees regarding regret and query complexity. Interestingly, with preference-based feedback, our imitation learning algorithm can learn a policy outperforming a sub-optimal expert, matching the result from interactive imitation learning algorithms [Ross and Bagnell, 2014] that require access to the expert's actions and also reward signals.", "keywords": "Contextual Bandit;Imitation Learning;Learning from Expert Feedback;Theory", "primary_area": "", "supplementary_material": "/attachment/4253e24c398a2588ba4b62cc9c067fa773b8c3a6.pdf", "author": "Ayush Sekhari;Karthik Sridharan;Wen Sun;Runzhe Wu", "authorids": "~Ayush_Sekhari1;~Karthik_Sridharan1;~Wen_Sun1;~Runzhe_Wu1", "gender": "M;M;;M", "homepage": "https://ayush.sekhari.com/;http://www.cs.cornell.edu/~sridharan/;https://wensun.github.io;https://ziqian2000.github.io/", "dblp": "203/8152;s/KarthikSridharan;;294/9552", "google_scholar": "jH9i188AAAAJ;https://scholar.google.com.tw/citations?user=nX9D5AoAAAAJ;iOLC30YAAAAJ;eBtFiuAAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ayush_Sekhari1;~Karthik_Sridharan1;~Wen_Sun1;~Runzhe_Wu1", "aff": "Massachusetts Institute of Technology;Cornell University;Cornell University;Cornell University", "aff_domain": "mit.edu;cornell.edu;cornell.edu;cornell.edu", "position": "Postdoc;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nsekhari2023contextual,\ntitle={Contextual Bandits and Imitation Learning with Preference-Based Active Queries},\nauthor={Ayush Sekhari and Karthik Sridharan and Wen Sun and Runzhe Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WmqYhqvz5i}\n}", "github": "", "project": "", "reviewers": "CTw4;PzSz;K3eU;FECY", "pdf_size": 510319, "rating": "6;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;4", "wc_summary": "322;59;53;152", "wc_strengths": "41;76;79;80", "wc_weaknesses": "90;201;101;73", "wc_questions": "21;146;31;6", "wc_limitations": "14;51;26;33", "wc_review": "488;533;290;344", "wc_reply_reviewers": "14;17;0;25", "wc_reply_authors": "0;0;0;584", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 146.5, 108.661170617659 ], "wc_strengths_avg": [ 69.0, 16.232683080747925 ], "wc_weaknesses_avg": [ 116.25, 49.936835101956554 ], "wc_questions_avg": [ 51.0, 55.56527692723217 ], "wc_limitations_avg": [ 31.0, 13.397761006974262 ], "wc_review_avg": [ 413.75, 99.89087796190401 ], "wc_reply_reviewers_avg": [ 14.0, 9.027735042633894 ], "wc_reply_authors_avg": [ 146.0, 252.87941790505607 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7507292550812976327&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "mit.edu;cornell.edu;cornell.edu;cornell.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.cornell.edu", "aff_unique_abbr": "MIT;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Accelerating Value Iteration with Anchoring", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71392", "id": "Wn82NbmvJy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a8f2713b5c6bdcd3d264f1aa9b9c6f03-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Wn82NbmvJy", "openreview": "https://openreview.net/forum?id=Wn82NbmvJy", "poster": "/media/PosterPDFs/NeurIPS%202023/71392.png?t=1702466357.54845", "slides": "https://nips.cc/virtual/2023/poster/71392", "video": "https://nips.cc/virtual/2023/poster/71392", "author_site": "Jongmin Lee, Ernest Ryu", "tldr": "", "abstract": "Value Iteration (VI) is foundational to the theory and practice of modern reinforcement learning, and it is known to converge at a $\\mathcal{O}(\\gamma^k)$-rate. Surprisingly, however, the optimal rate for the VI setup was not known, and finding a general acceleration mechanism has been an open problem. In this paper, we present the first accelerated VI for both the Bellman consistency and optimality operators. Our method, called Anc-VI, is based on an \\emph{anchoring} mechanism (distinct from Nesterov's acceleration), and it reduces the Bellman error faster than standard VI. In particular, Anc-VI exhibits a $\\mathcal{O}(1/k)$-rate for $\\gamma\\approx 1$ or even $\\gamma=1$, while standard VI has rate $\\mathcal{O}(1)$ for $\\gamma\\ge 1-1/k$, where $k$ is the iteration count. We also provide a complexity lower bound matching the upper bound up to a constant factor of $4$, thereby establishing optimality of the accelerated rate of Anc-VI. Finally, we show that the anchoring mechanism provides the same benefit in the approximate VI and Gauss--Seidel VI setups as well.", "keywords": "Value Iteration;Reinforcement Learning;Reinforcement Learning Theory;Dynamic Programming;Acceleration;Anchoring mechanism", "primary_area": "", "supplementary_material": "/attachment/1a89919c3e51376700e72fb3e740e30e18896bf7.pdf", "author": "Jongmin Lee;Ernest K. Ryu", "authorids": "~Jongmin_Lee3;~Ernest_K._Ryu1", "gender": "M;M", "homepage": "https://sites.google.com/snu.ac.kr/jongminlee/%ED%99%88;http://www.math.snu.ac.kr/~ernestryu/", "dblp": ";165/5192", "google_scholar": "G9DVxcQAAAAJ;CNOqUZoAAAAJ", "orcid": ";0000-0001-6820-9095", "linkedin": ";", "or_profile": "~Jongmin_Lee3;~Ernest_K._Ryu1", "aff": "Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlee2023accelerating,\ntitle={Accelerating Value Iteration with Anchoring},\nauthor={Jongmin Lee and Ernest K. Ryu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Wn82NbmvJy}\n}", "github": "", "project": "", "reviewers": "GSn8;8tEg;e9mJ;tQc7", "pdf_size": 474828, "rating": "6;6;7;7", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;2;3", "wc_summary": "96;77;35;266", "wc_strengths": "144;42;30;145", "wc_weaknesses": "729;132;42;2", "wc_questions": "773;52;374;90", "wc_limitations": "39;8;34;47", "wc_review": "1781;311;515;550", "wc_reply_reviewers": "143;44;25;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 118.5, 87.973007223807 ], "wc_strengths_avg": [ 90.25, 54.41679428264771 ], "wc_weaknesses_avg": [ 226.25, 294.0564359098437 ], "wc_questions_avg": [ 322.25, 288.45656085448985 ], "wc_limitations_avg": [ 32.0, 14.611639196202457 ], "wc_review_avg": [ 789.25, 579.8156495818305 ], "wc_reply_reviewers_avg": [ 58.0, 49.88486744494767 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14586268091369133832&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "snu.ac.kr;snu.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "Wp7TIOaDbb", "title": "Approximating Nash Equilibria in Normal-Form Games via Unbiased Stochastic Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose the first, to our knowledge, loss function for approximate Nash equilibria of normal-form games that is amenable to unbiased Monte Carlo estimation. This construction allows us to deploy standard non-convex stochastic optimization techniques for approximating Nash equilibria, resulting in novel algorithms with provable guarantees. We complement our theoretical analysis with experiments demonstrating that stochastic gradient descent can outperform previous state-of-the-art approaches.", "keywords": "game theory;stochastic optimization;nash equilbrium;normal-form game;x-armed bandits", "primary_area": "", "supplementary_material": "/attachment/74f4f3b4cee2b9c5a113bbf44dbe05d2b54c7c77.pdf", "author": "Ian Gemp;Luke Marris;Georgios Piliouras", "authorids": "~Ian_Gemp1;~Luke_Marris2;~Georgios_Piliouras1", "gender": "M;;", "homepage": "https://imgemp.github.io/;https://www.lukemarris.info/;", "dblp": "66/10996;223/4422;62/1236", "google_scholar": "5vo3MeEAAAAJ;dvTeSX4AAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ian_Gemp1;~Luke_Marris2;~Georgios_Piliouras1", "aff": "Google DeepMind;University College London;Singapore University of Technology and Design", "aff_domain": "google.com;ucl.ac.uk;sutd.edu.sg", "position": "Research Scientist;PhD student;Associate Professor", "bibtex": "@misc{\ngemp2023approximating,\ntitle={Approximating Nash Equilibria in Normal-Form Games via Unbiased Stochastic Optimization},\nauthor={Ian Gemp and Luke Marris and Georgios Piliouras},\nyear={2023},\nurl={https://openreview.net/forum?id=Wp7TIOaDbb}\n}", "github": "", "project": "", "reviewers": "4TEh;ksS3;rzbR;Deih;JGDw", "site": "https://openreview.net/forum?id=Wp7TIOaDbb", "pdf_size": 10000360, "rating": "4;5;6;6;6", "confidence": "4;3;2;3;2", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;4;3;2;3", "wc_summary": "63;66;64;191;66", "wc_strengths": "114;44;85;46;41", "wc_weaknesses": "566;109;53;10;93", "wc_questions": "34;22;93;128;6", "wc_limitations": "6;8;11;26;28", "wc_review": "783;249;306;401;234", "wc_reply_reviewers": "382;0;300;0;13", "wc_reply_authors": "1843;0;309;0;0", "reply_reviewers": "5;0;3;0;1", "reply_authors": "8;1;3;1;1", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 90.0, 50.513364568201155 ], "wc_strengths_avg": [ 66.0, 28.892905703649813 ], "wc_weaknesses_avg": [ 166.2, 202.82149787436242 ], "wc_questions_avg": [ 56.6, 46.24110725317897 ], "wc_limitations_avg": [ 15.8, 9.303762679690406 ], "wc_review_avg": [ 394.6, 202.8483177154792 ], "wc_reply_reviewers_avg": [ 139.0, 167.02574651831375 ], "wc_reply_authors_avg": [ 430.4, 716.3671125896275 ], "reply_reviewers_avg": [ 1.8, 1.9390719429665317 ], "reply_authors_avg": [ 2.8, 2.7129319932501077 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8685990362153791, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:M1O3p5piNgwJ:scholar.google.com/&scioq=Approximating+Nash+Equilibria+in+Normal-Form+Games+via+Unbiased+Stochastic+Optimization&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;University College London;Singapore University of Technology and Design", "aff_unique_dep": "Google DeepMind;;", "aff_unique_url": "https://deepmind.com;https://www.ucl.ac.uk;https://www.sutd.edu.sg", "aff_unique_abbr": "DeepMind;UCL;SUTD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;Singapore" }, { "title": "Joint Training of Deep Ensembles Fails Due to Learner Collusion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71391", "id": "WpGLxnOWhn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2bde8fef08f7ebe42b584266cbcfc909-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WpGLxnOWhn", "openreview": "https://openreview.net/forum?id=WpGLxnOWhn", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71391", "video": "https://nips.cc/virtual/2023/poster/71391", "author_site": "Alan Jeffares, Tennison Liu, Jonathan Crabb\u00e9, Mihaela van der Schaar", "tldr": "", "abstract": "Ensembles of machine learning models have been well established as a powerful method of improving performance over a single model. Traditionally, ensembling algorithms train their base learners independently or sequentially with the goal of optimizing their joint performance. In the case of deep ensembles of neural networks, we are provided with the opportunity to directly optimize the true objective: the joint performance of the ensemble as a whole. Surprisingly, however, directly minimizing the loss of the ensemble appears to rarely be applied in practice. Instead, most previous research trains individual models independently with ensembling performed _post hoc_. In this work, we show that this is for good reason - _joint optimization of ensemble loss results in degenerate behavior_. We approach this problem by decomposing the ensemble objective into the strength of the base learners and the diversity between them. We discover that joint optimization results in a phenomenon in which base learners collude to artificially inflate their apparent diversity. This pseudo-diversity fails to generalize beyond the training data, causing a larger generalization gap. We proceed to comprehensively demonstrate the practical implications of this effect on a range of standard machine learning tasks and architectures by smoothly interpolating between independent training and joint optimization.", "keywords": "Deep Ensembles;Deep Learning", "primary_area": "", "supplementary_material": "", "author": "Alan Jeffares;Tennison Liu;Jonathan Crabb\u00e9;Mihaela van der Schaar", "authorids": "~Alan_Jeffares1;~Tennison_Liu1;~Jonathan_Crabb\u00e91;~Mihaela_van_der_Schaar2", "gender": ";M;M;F", "homepage": "https://alanjeffares.com;https://www.vanderschaar-lab.com/research-team/#Tennison;https://jonathancrabbe.github.io/;https://www.vanderschaar-lab.com", "dblp": "304/1985;256/9899;278/8353.html;", "google_scholar": "e65kJ08AAAAJ;LtdHRjsAAAAJ;Y_Nmd2sAAAAJ;DZ3S--MAAAAJ", "orcid": ";;0000-0002-0341-7712;", "linkedin": "alanjeffares;;jonathan-crabb%C3%A9-4ab5701a5/;", "or_profile": "~Alan_Jeffares1;~Tennison_Liu1;~Jonathan_Crabb\u00e91;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;University of Cambridge;University of Cambridge;University of California, Los Angeles", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk;ucla.edu", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\njeffares2023joint,\ntitle={Joint Training of Deep Ensembles Fails Due to Learner Collusion},\nauthor={Alan Jeffares and Tennison Liu and Jonathan Crabb{\\'e} and Mihaela van der Schaar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WpGLxnOWhn}\n}", "github": "", "project": "", "reviewers": "m1Sy;XMTw;a4xj;pZUZ", "pdf_size": 732490, "rating": "4;5;7;7", "confidence": "4;4;3;3", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "4;1;4;3", "wc_summary": "113;46;176;152", "wc_strengths": "89;52;63;53", "wc_weaknesses": "345;109;92;167", "wc_questions": "50;5;120;128", "wc_limitations": "1;1;7;10", "wc_review": "598;213;458;510", "wc_reply_reviewers": "0;152;22;31", "wc_reply_authors": "0;937;0;32", "reply_reviewers": "0;2;1;1", "reply_authors": "1;3;1;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 121.75, 49.175069903356516 ], "wc_strengths_avg": [ 64.25, 14.922717580923388 ], "wc_weaknesses_avg": [ 178.25, 100.20822072065745 ], "wc_questions_avg": [ 75.75, 50.88405938995041 ], "wc_limitations_avg": [ 4.75, 3.897114317029974 ], "wc_review_avg": [ 444.75, 142.85197758519132 ], "wc_reply_reviewers_avg": [ 51.25, 59.251054842930856 ], "wc_reply_authors_avg": [ 242.25, 401.3267839305022 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9622504486493761, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13518051223860385742&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk;ucla.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;UCLA", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Cambridge;Los Angeles", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Regularizing Neural Networks with Meta-Learning Generative Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71390", "id": "WpuBEtrn0t", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/572cd21bd5dea96b065476b77d21b3c6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WpuBEtrn0t", "openreview": "https://openreview.net/forum?id=WpuBEtrn0t", "poster": "/media/PosterPDFs/NeurIPS%202023/71390.png?t=1701404893.7425804", "slides": "https://nips.cc/virtual/2023/poster/71390", "video": "https://nips.cc/virtual/2023/poster/71390", "author_site": "Shin'ya Yamaguchi, Daiki Chijiwa, Sekitoshi Kanai, Atsutoshi Kumagai, Hisashi Kashima", "tldr": "", "abstract": "This paper investigates methods for improving generative data augmentation for deep learning. Generative data augmentation leverages the synthetic samples produced by generative models as an additional dataset for classification with small dataset settings. A key challenge of generative data augmentation is that the synthetic data contain uninformative samples that degrade accuracy. This can be caused by the synthetic samples not perfectly representing class categories in real data and uniform sampling not necessarily providing useful samples for tasks. In this paper, we present a novel strategy for generative data augmentation called *meta generative regularization* (MGR). To avoid the degradation of generative data augmentation, MGR utilizes synthetic samples for regularizing feature extractors instead of training classifiers. These synthetic samples are dynamically determined to minimize the validation losses through meta-learning. We observed that MGR can avoid the performance degradation of naive generative data augmentation and boost the baselines. Experiments on six datasets showed that MGR is effective particularly when datasets are smaller and stably outperforms baselines by up to 7 percentage points on test accuracy.", "keywords": "Deep Learning;Generative Models;Generative Data Augmentation;Regularization;Meta-Learning", "primary_area": "", "supplementary_material": "/attachment/435b01fca750a5cb0f956139205f60dd4271fbeb.zip", "author": "Shin'ya Yamaguchi;Daiki Chijiwa;Sekitoshi Kanai;Atsutoshi Kumagai;Hisashi Kashima", "authorids": "~Shin'ya_Yamaguchi1;~Daiki_Chijiwa1;~Sekitoshi_Kanai1;~Atsutoshi_Kumagai2;~Hisashi_Kashima2", "gender": "M;M;M;M;M", "homepage": "https://yshinya6.github.io/;;https://sekitoshi.github.io/;https://scholar.google.co.jp/citations?user=Q_d8GEIAAAAJ&hl=ja;https://hkashima.github.io/index_e.html", "dblp": "https://dblp.uni-trier.de/pers/y/Yamaguchi:Shin=ya;295/8488;209/4874;178/8630;27/4448", "google_scholar": "_xJYVD0AAAAJ;;qa2i5_IAAAAJ;https://scholar.google.co.jp/citations?user=Q_d8GEIAAAAJ;bkTB0t8AAAAJ", "orcid": "0000-0001-9113-7405;;0000-0003-4383-4454;0000-0002-2915-4615;0000-0002-2770-0184", "linkedin": "shin-ya-yamaguchi-32183a154/;daiki-chijiwa-81491a1a7/;;;", "or_profile": "~Shin'ya_Yamaguchi1;~Daiki_Chijiwa1;~Sekitoshi_Kanai1;~Atsutoshi_Kumagai2;~Hisashi_Kashima2", "aff": "NTT;NTT;NTT;NTT;Kyoto University", "aff_domain": "ntt.co.jp;ntt.co.jp;ntt.co.jp;ntt.co.jp;kyoto-u.ac.jp", "position": "Researcher;Researcher;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nyamaguchi2023regularizing,\ntitle={Regularizing Neural Networks with Meta-Learning Generative Models},\nauthor={Shin'ya Yamaguchi and Daiki Chijiwa and Sekitoshi Kanai and Atsutoshi Kumagai and Hisashi Kashima},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WpuBEtrn0t}\n}", "github": "", "project": "", "reviewers": "Ja7o;gssr;g8gt;ZFum", "pdf_size": 3531088, "rating": "3;5;7;7", "confidence": "5;4;3;5", "soundness": "3;3;4;3", "novelty": "2;3;3;2", "presentation": "3;3;4;3", "wc_summary": "92;47;89;158", "wc_strengths": "32;64;101;93", "wc_weaknesses": "278;212;141;328", "wc_questions": "13;30;104;145", "wc_limitations": "34;11;19;46", "wc_review": "449;364;454;770", "wc_reply_reviewers": "160;98;15;42", "wc_reply_authors": "702;169;26;28", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 96.5, 39.714606884621176 ], "wc_strengths_avg": [ 72.5, 27.13392710243027 ], "wc_weaknesses_avg": [ 239.75, 70.30780539883179 ], "wc_questions_avg": [ 73.0, 53.83771911959124 ], "wc_limitations_avg": [ 27.5, 13.5 ], "wc_review_avg": [ 509.25, 154.73424798666906 ], "wc_reply_reviewers_avg": [ 78.75, 55.647888549342106 ], "wc_reply_authors_avg": [ 231.25, 277.90229847915975 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4545454545454545, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16169475295510731850&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "ntt.co.jp;ntt.co.jp;ntt.co.jp;ntt.co.jp;kyoto-u.ac.jp", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "NTT Corporation;Kyoto University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntt.co.jp;https://www.kyoto-u.ac.jp", "aff_unique_abbr": "NTT;Kyoto U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Japan" }, { "title": "LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73565", "id": "WqSPQFxFRC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/89e44582fd28ddfea1ea4dcb0ebbf4b0-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=WqSPQFxFRC", "openreview": "https://openreview.net/forum?id=WqSPQFxFRC", "poster": "/media/PosterPDFs/NeurIPS%202023/73565.png?t=1702308429.6880481", "slides": "https://nips.cc/virtual/2023/poster/73565", "video": "https://nips.cc/virtual/2023/poster/73565", "author_site": "Neel Guha, Julian Nyarko, Daniel Ho, Christopher R\u00e9, Adam Chilton, Aditya K, Alex Chohlas-Wood, Austin Peters, Brandon Waldon, Daniel Rockmore, Diego Zambrano, Dmitry Talisman, Enam Hoque, Faiz Surani, Frank Fagan, Galit Sarfaty, Gregory Dickinson, Haggai Porat, Jason Hegland, Jessica Wu, Joe Nudell, Joel Niklaus, John Nay, Jonathan Choi, Kevin Tobia, Margaret Hagan, Megan Ma, Michael Livermore, Nikon Rasumov-Rahe, Nils Holzenberger, Noam Kolt, Peter Henderson, Sean Rehaag, Sharad Goel, Shang Gao, Spencer Williams, Sunny Gandhi, Tom Zur, Varun Iyer, Zehua Li", "tldr": "", "abstract": "The advent of large language models (LLMs) and their adoption by the legal community has given rise to the question: what types of legal reasoning can LLMs perform? To enable greater study of this question, we present LegalBench: a collaboratively constructed legal reasoning benchmark consisting of 162 tasks covering six different types of legal reasoning. LegalBench was built through an interdisciplinary process, in which we collected tasks designed and hand-crafted by legal professionals. Because these subject matter experts took a leading role in construction, tasks either measure legal reasoning capabilities that are practically useful, or measure reasoning skills that lawyers find interesting. To enable cross-disciplinary conversations about LLMs in the law, we additionally show how popular legal frameworks for describing legal reasoning\u2014which distinguish between its many forms\u2014correspond to LegalBench tasks, thus giving lawyers and LLM developers a common vocabulary. This paper describes LegalBench, presents an empirical evaluation of 20 open-source and commercial LLMs, and illustrates the types of research explorations LegalBench enables.", "keywords": "large language models;law;benchmarks;legal reasoning;foundation models", "primary_area": "", "supplementary_material": "/attachment/1aae6f333dfa1b82974823f815d96766b131e06b.pdf", "author": "Neel Guha;Julian Nyarko;Daniel E. Ho;Christopher Re;Adam Chilton;Aditya Narayana;Alex Chohlas-Wood;Austin Peters;Brandon Waldon;Daniel Rockmore;Diego Zambrano;Dmitry Talisman;Enam Hoque;Faiz Surani;Frank Fagan;Galit Sarfaty;Gregory M. Dickinson;Haggai Porat;Jason Hegland;Jessica Wu;Joe Nudell;Joel Niklaus;John J Nay;Jonathan H. Choi;Kevin Tobia;Margaret Hagan;Megan Ma;Michael Livermore;Nikon Rasumov-Rahe;Nils Holzenberger;Noam Kolt;Peter Henderson;Sean Rehaag;Sharad Goel;Shang Gao;Spencer Williams;Sunny Gandhi;Tom Zur;Varun Iyer;Zehua Li", "authorids": "~Neel_Guha1;~Julian_Nyarko1;~Daniel_E._Ho1;~Christopher_Re1;~Adam_Chilton1;ak@maxime.tools;~Alex_Chohlas-Wood1;ajp014@stanford.edu;~Brandon_Waldon1;daniel.n.rockmore@dartmouth.edu;dzambran@law.stanford.edu;dmitry@maxime.tools;enam@lawbeta.com;~Faiz_Surani1;ffagan@stcl.edu;sarfaty@allard.ubc.ca;~Gregory_M._Dickinson1;hporat@sjd.law.harvard.edu;jhegland@law.stanford.edu;~Jessica_Wu1;~Joe_Nudell1;~Joel_Niklaus1;~John_J_Nay1;thechoipolloi@gmail.com;~Kevin_Tobia1;mdhagan@stanford.edu;meganma@law.stanford.edu;mlivermore@law.virginia.edu;nikon@maxime.tools;~Nils_Holzenberger1;~Noam_Kolt1;~Peter_Henderson1;srehaag@osgoode.yorku.ca;~Sharad_Goel1;~Shang_Gao7;swilliams@ggu.edu;~Sunny_Gandhi1;tzur@sjd.law.harvard.edu;varun_iyer@posteo.net;zehuali@stanford.edu", "gender": "M;;M;;M;;M;;M;;;;;M;;;;;;F;;M;;;;;;;;M;;M;;M;M;;;;;", "homepage": "http://neelguha.com;https://www.juliannyarko.com;https://dho.stanford.edu;;;;https://alexchohlaswood.com;;https://bwaldon.github.io;;;;;https://faizsurani.com;;;https://linqapp.com/gregory_dickinson;;;https://sla.law.stanford.edu;;https://niklaus.ai;http://johnjnay.com/;;https://www.law.georgetown.edu/faculty/kevin-tobia/;;;;;https://www.cs.jhu.edu/~nholzen1/;http://noamkolt.com/;http://www.peterhenderson.co/;;https://5harad.com/;;;https://www.sunnygandhi.me/;;;", "dblp": "130/0311;298/3501.html;240/9334;;;;186/5951;;;;;;;;;;;;;;;232/4545;;;;;;;;226/1982;;h/PeterHenderson2;;37/6406;;;;;;", "google_scholar": "YI5N4HQAAAAJ;;;;ulbqF7QAAAAJ;;ffGEnW4AAAAJ;;;;;;;;;;IfwgiyEAAAAJ;;;;https://scholar.google.com/citations?view_op=list_works;qJ8iricAAAAJ;;;;;;;;;;dy_JBs0AAAAJ;;Vv8UdowAAAAJ;dxxFrNIAAAAJ;;;;;", "orcid": ";0000-0002-7121-5696;;;;;0000-0002-8279-6270;;;;;;;;;;0009-0006-8992-3929;;;;;0000-0002-2779-1653;;;0000-0003-3447-9825;;;;;;;;;0000-0002-6103-9318;;;;;;", "linkedin": ";;;;;;;;;;;;;;;;greg-dickinson-7aa23356/;;;;;joelniklaus/;;;;;;;;;;phende/;;sharad-goel-1416104;hireshang/;;;;;", "or_profile": "~Neel_Guha1;~Julian_Nyarko1;~Daniel_E._Ho1;~Christopher_Re1;~Adam_Chilton1;ak@maxime.tools;~Alex_Chohlas-Wood1;ajp014@stanford.edu;~Brandon_Waldon1;daniel.n.rockmore@dartmouth.edu;dzambran@law.stanford.edu;dmitry@maxime.tools;enam@lawbeta.com;~Faiz_Surani1;ffagan@stcl.edu;sarfaty@allard.ubc.ca;~Gregory_M._Dickinson1;hporat@sjd.law.harvard.edu;jhegland@law.stanford.edu;~Jessica_Wu1;~Joe_Nudell1;~Joel_Niklaus1;~John_J_Nay1;thechoipolloi@gmail.com;~Kevin_Tobia1;mdhagan@stanford.edu;meganma@law.stanford.edu;mlivermore@law.virginia.edu;nikon@maxime.tools;~Nils_Holzenberger1;~Noam_Kolt1;~Peter_Henderson1;srehaag@osgoode.yorku.ca;~Sharad_Goel1;~Shang_Gao7;swilliams@ggu.edu;~Sunny_Gandhi1;tzur@sjd.law.harvard.edu;varun_iyer@posteo.net;zehuali@stanford.edu", "aff": "Stanford Law;Stanford University;Stanford University;;University of Chicago;;;;;;;;;University of California, Santa Barbara;;;St. Thomas University (FL);;;Stanford University;Stanford University;Stanford University;Stanford University;;Georgetown University;;;;;T\u00e9l\u00e9com ParisTech;Hebrew University of Jerusalem;Stanford University;;Harvard University;Casetext;;Indiana University;;;", "aff_domain": "law.stanford.edu;stanford.edu;stanford.edu;;uchicago.edu;;;;;;;;;ucsb.edu;;;stu.edu;;;stanford.edu;stanford.edu;stanford.edu;stanford.edu;;georgetown.edu;;;;;telecom-paristech.fr;huji.ac.il;stanford.edu;;harvard.edu;casetext.com;;iu.edu;;;", "position": "JD;Full Professor;Professor;;Full Professor;;;;;;;;;Undergrad student;;;Assistant Professor;;;Researcher;Researcher;Researcher;Researcher;;Associate Professor;;;;;Associate Professor;Assistant Professor;PhD student;;Full Professor;Researcher;;Undergrad student;;;", "bibtex": "@inproceedings{\nguha2023legalbench,\ntitle={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models},\nauthor={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Re and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John J Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=WqSPQFxFRC}\n}", "github": "", "project": "", "reviewers": "ZLU8;tmL2;XJRt;BYVU;cUyJ", "pdf_size": 983081, "rating": "5;6;7;8;8", "confidence": "4;5;4;3;4", "wc_summary_and_contributions": "80;51;82;44;64", "wc_strengths": "72;67;39;38;66", "wc_improvement": "709;61;59;124;165", "wc_limitations": "34;269;14;1;159", "wc_correctness": "32;9;11;1;9", "wc_clarity": "15;14;10;3;18", "wc_relation_to_prior_work": "184;188;6;79;24", "wc_documentation": "280;45;4;2;20", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "1407;705;226;293;526", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "732;742;155;320;429", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 1.16619037896906 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 64.2, 15.157836257197133 ], "wc_strengths_avg": [ 56.4, 14.759403781996074 ], "wc_improvement_avg": [ 223.6, 245.96715227851053 ], "wc_limitations_avg": [ 95.4, 103.43036304683456 ], "wc_correctness_avg": [ 12.4, 10.384603988597735 ], "wc_clarity_avg": [ 12.0, 5.176871642217914 ], "wc_relation_to_prior_work_avg": [ 96.2, 77.17616212276948 ], "wc_documentation_avg": [ 70.2, 106.02339364498762 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 631.4, 423.54343342802514 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 475.6, 230.5980051951881 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 40, 0 ], "corr_rating_confidence": -0.5423261445466404, "gs_citation": 216, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9333922395311077516&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "law.stanford.edu;stanford.edu;stanford.edu;;uchicago.edu;;;;;;;;;ucsb.edu;;;stu.edu;;;stanford.edu;stanford.edu;stanford.edu;stanford.edu;;georgetown.edu;;;;;telecom-paristech.fr;huji.ac.il;stanford.edu;;harvard.edu;casetext.com;;iu.edu;;;", "author_num": 40, "aff_unique_index": "0;0;0;1;2;3;0;0;0;0;4;5;6;0;7;8;9", "aff_unique_norm": "Stanford University;University of Chicago;University of California, Santa Barbara;St. Thomas University;Georgetown University;T\u00e9l\u00e9com ParisTech;Hebrew University of Jerusalem;Harvard University;Casetext;Indiana University", "aff_unique_dep": "Stanford Law School;;;;;;;;;", "aff_unique_url": "https://law.stanford.edu;https://www.uchicago.edu;https://www.ucsb.edu;https://www.stthomas.edu;https://www.georgetown.edu;https://www.telecom-paristech.fr;https://www.huji.ac.il;https://www.harvard.edu;https://www.casetext.com;https://www.indiana.edu", "aff_unique_abbr": "Stanford Law;UChicago;UCSB;STU;GU;TP;HUJI;Harvard;;IU", "aff_campus_unique_index": "0;0;0;2;3;0;0;0;0;4;0", "aff_campus_unique": "Stanford;;Santa Barbara;Florida;Jerusalem", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;1;2;0;0;0;0", "aff_country_unique": "United States;France;Israel" }, { "title": "MotionGPT: Human Motion as a Foreign Language", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71389", "id": "WqiZJGNkjn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3fbf0c1ea0716c03dea93bb6be78dd6f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WqiZJGNkjn", "openreview": "https://openreview.net/forum?id=WqiZJGNkjn", "poster": "/media/PosterPDFs/NeurIPS%202023/71389.png?t=1702008422.2835786", "slides": "https://nips.cc/virtual/2023/poster/71389", "video": "https://nips.cc/virtual/2023/poster/71389", "author_site": "Biao Jiang, Xin Chen, Wen Liu, Jingyi Yu, Gang Yu, Tao Chen", "tldr": "", "abstract": "Though the advancement of pre-trained large language models unfolds, the exploration of building a unified model for language and other multimodal data, such as motion, remains challenging and untouched so far. Fortunately, human motion displays a semantic coupling akin to human language, often perceived as a form of body language. By fusing language data with large-scale motion models, motion-language pre-training that can enhance the performance of motion-related tasks becomes feasible. Driven by this insight, we propose MotionGPT, a unified, versatile, and user-friendly motion-language model to handle multiple motion-relevant tasks. Specifically, we employ the discrete vector quantization for human motion and transfer 3D motion into motion tokens, similar to the generation process of word tokens. Building upon this \"motion vocabulary\", we perform language modeling on both motion and text in a unified manner, treating human motion as a specific language. Moreover, inspired by prompt learning, we pre-train MotionGPT with a mixture of motion-language data and fine-tune it on prompt-based question-and-answer tasks. Extensive experiments demonstrate that MotionGPT achieves state-of-the-art performances on multiple motion tasks including text-driven motion generation, motion captioning, motion prediction, and motion in-between.", "keywords": "3d motion;motion generation;human motion synthesis;text-driven;text-to-motion", "primary_area": "", "supplementary_material": "/attachment/f221a924949ac7b4de8f6d226d28b5439dd4aa56.pdf", "author": "Biao Jiang;Xin Chen;Wen Liu;Jingyi Yu;Gang YU;Tao Chen", "authorids": "~Biao_Jiang2;~Xin_Chen16;~Wen_Liu2;~Jingyi_Yu5;~Gang_YU2;~Tao_Chen6", "gender": ";M;M;M;M;M", "homepage": "https://github.com/billl-jiang;https://chenxin.tech/;https://github.com/StevenLiuWen;;https://skicyyu.org/;https://eetchen.github.io/", "dblp": "38/4792;24/1518-40;61/372-3;;;69/510-3", "google_scholar": ";7qeAJZ4AAAAJ;A6K6bkoAAAAJ;R9L_AfQAAAAJ;https://scholar.google.com.sg/citations?user=BJdigYsAAAAJ;https://scholar.google.com.sg/citations?user=w3OoFL0AAAAJ", "orcid": ";0000-0002-9347-1367;;;0000-0001-5570-2710;", "linkedin": ";xin-chen-cs/;;;;", "or_profile": "~Biao_Jiang2;~Xin_Chen16;~Wen_Liu2;~Jingyi_Yu5;~Gang_YU2;~Tao_Chen6", "aff": "Fudan University;Tencent;Tencent PCG;ShanghaiTech University;Tencent;Fudan University", "aff_domain": "fudan.edu.cn;tencent.com;tencent.com;shanghaitech.edu.cn;tencent.com;fudan.edu.cn", "position": "MS student;Researcher;Researcher;Full Professor;Research Scientist;Full Professor", "bibtex": "@inproceedings{\njiang2023motiongpt,\ntitle={Motion{GPT}: Human Motion as a Foreign Language},\nauthor={Biao Jiang and Xin Chen and Wen Liu and Jingyi Yu and Gang YU and Tao Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WqiZJGNkjn}\n}", "github": "", "project": "", "reviewers": "symu;gizA;hFS6;rpBK", "pdf_size": 1204782, "rating": "5;5;6;6", "confidence": "4;4;3;5", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "78;79;82;57", "wc_strengths": "53;42;80;81", "wc_weaknesses": "163;118;106;201", "wc_questions": "44;136;53;94", "wc_limitations": "11;1;6;5", "wc_review": "349;376;327;438", "wc_reply_reviewers": "10;12;21;61", "wc_reply_authors": "0;0;0;26", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.0, 9.924716620639604 ], "wc_strengths_avg": [ 64.0, 16.95582495781317 ], "wc_weaknesses_avg": [ 147.0, 37.72929895982696 ], "wc_questions_avg": [ 81.75, 36.55389856089224 ], "wc_limitations_avg": [ 5.75, 3.5619517121937516 ], "wc_review_avg": [ 372.5, 41.608292442733095 ], "wc_reply_reviewers_avg": [ 26.0, 20.627651344736268 ], "wc_reply_authors_avg": [ 6.5, 11.258330249197702 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 337, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1061280532192611229&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "fudan.edu.cn;tencent.com;tencent.com;shanghaitech.edu.cn;tencent.com;fudan.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;2;1;0", "aff_unique_norm": "Fudan University;Tencent;ShanghaiTech University", "aff_unique_dep": ";Tencent Holdings Limited;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.tencent.com;https://www.shanghaitech.edu.cn", "aff_unique_abbr": "Fudan;Tencent;ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Distributionally Robust Ensemble of Lottery Tickets Towards Calibrated Sparse Network Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71388", "id": "WrRG0C1Vo5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c5cf13bfd3762821ef7607e63ee90075-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WrRG0C1Vo5", "openreview": "https://openreview.net/forum?id=WrRG0C1Vo5", "poster": "/media/PosterPDFs/NeurIPS%202023/71388.png?t=1702228011.7684817", "slides": "https://nips.cc/virtual/2023/poster/71388", "video": "https://nips.cc/virtual/2023/poster/71388", "author_site": "Hitesh Sapkota, Dingrong Wang, Zhiqiang Tao, Qi Yu", "tldr": "", "abstract": "The recently developed sparse network training methods, such as Lottery Ticket Hypothesis (LTH) and its variants, have shown impressive learning capacity by finding sparse sub-networks from a dense one. While these methods could largely sparsify deep networks, they generally focus more on realizing comparable accuracy to dense counterparts yet neglect network calibration. However, how to achieve calibrated network predictions lies at the core of improving model reliability, especially when it comes to addressing the overconfident issue and out-of-distribution cases. In this study, we propose a novel Distributionally Robust Optimization (DRO) framework to achieve an ensemble of lottery tickets towards calibrated network sparsification. Specifically, the proposed DRO ensemble aims to learn multiple diverse and complementary sparse sub-networks (tickets) with the guidance of uncertainty sets, which encourage tickets to gradually capture different data distributions from easy to hard and naturally complement each other. We theoretically justify the strong calibration performance by showing how the proposed robust training process guarantees to lower the confidence of incorrect predictions. Extensive experimental results on several benchmarks show that our proposed lottery ticket ensemble leads to a clear calibration improvement without sacrificing accuracy and burdening inference costs. Furthermore, experiments on OOD datasets demonstrate the robustness of our approach in the open-set environment.", "keywords": "sparse network training;model calibration", "primary_area": "", "supplementary_material": "/attachment/828675c5e1ec8a27376ac56402d4d4df894bb1d6.pdf", "author": "Hitesh Sapkota;Dingrong Wang;ZHIQIANG TAO;Qi Yu", "authorids": "~Hitesh_Sapkota1;~Dingrong_Wang1;~ZHIQIANG_TAO2;~Qi_Yu1", "gender": "M;M;;M", "homepage": "https://hiteshsapkota.github.io/;https://wdr123.github.io;http://ztao.cc/;https://www.rit.edu/mining/", "dblp": "251/4284;276/3229;135/5229.html;58/6957-1", "google_scholar": "0FKsBXYAAAAJ;v--3Zr0AAAAJ;sEKglOkAAAAJ;L3gWdfEAAAAJ", "orcid": ";0009-0005-2407-2337;;0000-0002-0426-5407", "linkedin": "hitesh-sapkota-2226051ba/;dingrong-wang-56a203179/;;", "or_profile": "~Hitesh_Sapkota1;~Dingrong_Wang1;~ZHIQIANG_TAO2;~Qi_Yu1", "aff": "Rochester Institute of Technology;Rochester Institute of Technology;Rochester Institute of Technology;Rochester Institute of Technology", "aff_domain": "rit.edu;rit.edu;rit.edu;rit.edu", "position": "PhD student;PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\nsapkota2023distributionally,\ntitle={Distributionally Robust Ensemble of Lottery Tickets Towards Calibrated Sparse Network Training},\nauthor={Hitesh Sapkota and Dingrong Wang and ZHIQIANG TAO and Qi Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WrRG0C1Vo5}\n}", "github": "", "project": "", "reviewers": "RrYi;fvyR;xGvL;TYXu", "pdf_size": 2605274, "rating": "3;5;5;5", "confidence": "5;4;3;3", "soundness": "3;3;3;2", "novelty": "2;2;3;2", "presentation": "2;2;3;3", "wc_summary": "68;104;137;137", "wc_strengths": "23;66;47;106", "wc_weaknesses": "346;69;79;329", "wc_questions": "6;54;4;45", "wc_limitations": "1;7;1;24", "wc_review": "444;300;268;641", "wc_reply_reviewers": "0;53;10;514", "wc_reply_authors": "155;345;127;517", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 111.5, 28.5 ], "wc_strengths_avg": [ 60.5, 30.36856927812043 ], "wc_weaknesses_avg": [ 205.75, 131.93440605088577 ], "wc_questions_avg": [ 27.25, 22.487496525847426 ], "wc_limitations_avg": [ 8.25, 9.41740410091868 ], "wc_review_avg": [ 413.25, 147.25721544291133 ], "wc_reply_reviewers_avg": [ 144.25, 214.4019297954195 ], "wc_reply_authors_avg": [ 286.0, 157.54681843820268 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15190518667188020416&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "rit.edu;rit.edu;rit.edu;rit.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Rochester Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.rit.edu", "aff_unique_abbr": "RIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Weighted ROC Curve in Cost Space: Extending AUC to Cost-Sensitive Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71387", "id": "WsmBcJarWW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/38687a6a01f127d6db92561508a225b7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WsmBcJarWW", "openreview": "https://openreview.net/forum?id=WsmBcJarWW", "poster": "/media/PosterPDFs/NeurIPS%202023/71387.png?t=1698052832.4857266", "slides": "https://nips.cc/virtual/2023/poster/71387", "video": "https://nips.cc/virtual/2023/poster/71387", "author_site": "HuiYang Shao, Qianqian Xu, Zhiyong Yang, Peisong Wen, Gao Peifeng, Qingming Huang", "tldr": "", "abstract": "In this paper, we aim to tackle flexible cost requirements for long-tail datasets, where we need to construct a (a) cost-sensitive and (b) class-distribution robust learning framework. The misclassification cost and the area under the ROC curve (AUC) are popular metrics for (a) and (b), respectively. However, limited by their formulations, models trained with AUC cannot be applied to cost-sensitive decision problems, and models trained with fixed costs are sensitive to the class distribution shift. To address this issue, we present a new setting where costs are treated like a dataset to deal with arbitrarily unknown cost distributions. Moreover, we propose a novel weighted version of AUC where the cost distribution can be integrated into its calculation through decision thresholds. To formulate this setting, we propose a novel bilevel paradigm to bridge weighted AUC (WAUC) and cost. The inner-level problem approximates the optimal threshold from sampling costs, and the outer-level problem minimizes the WAUC loss over the optimal threshold distribution. To optimize this bilevel paradigm, we employ a stochastic optimization algorithm (SACCL) to optimize it. Finally, experiment results show that our algorithm performs better than existing cost-sensitive learning methods and two-stage AUC decisions approach.", "keywords": "AUC;Cost Learning;Bilevel;machine learning", "primary_area": "", "supplementary_material": "/attachment/70f385bc42b899788b8799f8e689136793e0191e.pdf", "author": "Huiyang Shao;Qianqian Xu;Zhiyong Yang;Peisong Wen;Gao Peifeng;Qingming Huang", "authorids": "~Huiyang_Shao2;~Qianqian_Xu2;~Zhiyong_Yang1;~Peisong_Wen1;~Gao_Peifeng2;~Qingming_Huang1", "gender": ";F;M;M;M;", "homepage": ";http://vipl.ict.ac.cn/people/~qianqianxu;https://joshuaas.github.io/;https://github.com/KID-7391;https://github.com/h-summit;", "dblp": ";07/7627;01/452-1.html;276/3218;;", "google_scholar": ";https://scholar.google.com.hk/citations?user=MjifS2MAAAAJ;https://scholar.google.com/citations?hl=zh-CN;Zk2XLWYAAAAJ;;", "orcid": ";;0000-0002-4409-4999;;;", "linkedin": ";;;;;", "or_profile": "~Huiyang_Shao2;~Qianqian_Xu2;~Zhiyong_Yang1;~Peisong_Wen1;~Gao_Peifeng2;~Qingming_Huang1", "aff": ";Institute of Computing Technology, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Chinese Academy of Sciences;University of Chinese Academy of Sciences;", "aff_domain": ";ict.ac.cn;ucas.ac.cn;ict.ac.cn;ucas.ac.cn;", "position": ";Full Professor;Postdoc;PhD student;Undergrad student;", "bibtex": "@inproceedings{\nshao2023weighted,\ntitle={Weighted {ROC} Curve in Cost Space: Extending {AUC} to Cost-Sensitive Learning},\nauthor={Huiyang Shao and Qianqian Xu and Zhiyong Yang and Peisong Wen and Gao Peifeng and Qingming Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WsmBcJarWW}\n}", "github": "", "project": "", "reviewers": "Trb9;mY4P;uAig;uhz4;N1ih", "pdf_size": 3955035, "rating": "5;5;6;6;7", "confidence": "4;3;2;3;2", "soundness": "3;3;3;3;3", "novelty": "3;2;3;2;3", "presentation": "3;3;2;3;4", "wc_summary": "53;85;111;56;58", "wc_strengths": "29;28;18;62;93", "wc_weaknesses": "28;86;213;85;17", "wc_questions": "54;102;30;3;19", "wc_limitations": "21;24;80;22;1", "wc_review": "185;325;452;228;188", "wc_reply_reviewers": "91;105;202;67;20", "wc_reply_authors": "220;417;144;0;0", "reply_reviewers": "2;2;2;1;1", "reply_authors": "2;3;3;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 72.6, 22.36604569431083 ], "wc_strengths_avg": [ 46.0, 27.79208520424475 ], "wc_weaknesses_avg": [ 85.8, 69.64883344320994 ], "wc_questions_avg": [ 41.6, 34.45925129772845 ], "wc_limitations_avg": [ 29.6, 26.53752060762271 ], "wc_review_avg": [ 275.6, 101.69090421468381 ], "wc_reply_reviewers_avg": [ 97.0, 59.92328428916426 ], "wc_reply_authors_avg": [ 156.2, 155.58457507092405 ], "reply_reviewers_avg": [ 1.6, 0.4898979485566356 ], "reply_authors_avg": [ 2.0, 0.8944271909999159 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7857142857142858, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1090190445441082347&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";ict.ac.cn;ucas.ac.cn;ict.ac.cn;ucas.ac.cn;", "author_num": 6, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Motion-X: A Large-scale 3D Expressive Whole-body Human Motion Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73564", "id": "WtajAo0JWU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4f8e27f6036c1d8b4a66b5b3a947dd7b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=WtajAo0JWU", "openreview": "https://openreview.net/forum?id=WtajAo0JWU", "poster": "/media/PosterPDFs/NeurIPS%202023/73564.png?t=1701419376.037487", "slides": "https://nips.cc/virtual/2023/poster/73564", "video": "https://nips.cc/virtual/2023/poster/73564", "author_site": "Jing Lin, Ailing Zeng, Shunlin Lu, Yuanhao Cai, Ruimao Zhang, Haoqian Wang, Lei Zhang", "tldr": "", "abstract": "In this paper, we present Motion-X, a large-scale 3D expressive whole-body motion dataset. Existing motion datasets predominantly contain body-only poses, lacking facial expressions, hand gestures, and fine-grained pose descriptions. Moreover, they are primarily collected from limited laboratory scenes with textual descriptions manually labeled, which greatly limits their scalability. To overcome these limitations, we develop a whole-body motion and text annotation pipeline, which can automatically annotate motion from either single- or multi-view videos and provide comprehensive semantic labels for each video and fine-grained whole-body pose descriptions for each frame. This pipeline is of high precision, cost-effective, and scalable for further research. Based on it, we construct Motion-X, which comprises 15.6M precise 3D whole-body pose annotations (i.e., SMPL-X) covering 81.1K motion sequences from massive scenes. Besides, Motion-X provides 15.6M frame-level whole-body pose descriptions and 81.1K sequence-level semantic labels. Comprehensive experiments demonstrate the accuracy of the annotation pipeline and the significant benefit of Motion-X in enhancing expressive, diverse, and natural motion generation, as well as 3D whole-body human mesh recovery.", "keywords": "3D Human Motion; Motion Generation; Mesh Recovery; Motion Capture", "primary_area": "", "supplementary_material": "/attachment/03294e3f3bac3c564f37f6c39f433eb590b8e284.zip", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nlin2023motionx,\ntitle={Motion-X: A Large-scale 3D Expressive Whole-body Human Motion Dataset},\nauthor={Jing Lin and Ailing Zeng and Shunlin Lu and Yuanhao Cai and Ruimao Zhang and Haoqian Wang and Lei Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=WtajAo0JWU}\n}", "github": "", "project": "", "reviewers": "4k8C;Kpcx;HxL5;XGnA;asP3", "pdf_size": 7182130, "rating": "6;6;6;7;8", "confidence": "5;4;4;4;5", "wc_summary_and_contributions": "70;53;134;67;102", "wc_strengths": "32;88;145;42;49", "wc_improvement": "425;180;136;59;138", "wc_limitations": "1;4;35;1;23", "wc_correctness": "1;1;29;1;14", "wc_clarity": "234;1;22;1;6", "wc_relation_to_prior_work": "1;1;32;1;6", "wc_documentation": "1;1;34;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "766;330;568;174;340", "wc_reply_reviewers": "24;10;11;35;12", "wc_reply_authors": "1344;678;1180;514;615", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;2;3;2;2", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 85.2, 29.198630104852523 ], "wc_strengths_avg": [ 71.2, 41.498915648484115 ], "wc_improvement_avg": [ 187.6, 124.96175414901954 ], "wc_limitations_avg": [ 12.8, 13.804347141389918 ], "wc_correctness_avg": [ 9.2, 11.106754701531855 ], "wc_clarity_avg": [ 52.8, 90.9272236461666 ], "wc_relation_to_prior_work_avg": [ 8.2, 12.056533498481228 ], "wc_documentation_avg": [ 7.6, 13.199999999999998 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 435.6, 207.56647128088872 ], "wc_reply_reviewers_avg": [ 18.4, 9.728309205612248 ], "wc_reply_authors_avg": [ 866.2, 331.4600428407623 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": 0.4082482904638631, "gs_citation": 116, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8796064373571077274&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "", "author_num": 1 }, { "title": "Probabilistic Invariant Learning with Randomized Linear Classifiers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71386", "id": "WwP2JaXAtB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ec4f0b0a7557d6a51c42308800f2c23a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WwP2JaXAtB", "openreview": "https://openreview.net/forum?id=WwP2JaXAtB", "poster": "/media/PosterPDFs/NeurIPS%202023/71386.png?t=1701192023.3059914", "slides": "https://nips.cc/virtual/2023/poster/71386", "video": "https://nips.cc/virtual/2023/poster/71386", "author_site": "Leonardo Cotta, Gal Yehuda, Assaf Schuster, Chris Maddison", "tldr": "", "abstract": "Designing models that are both expressive and preserve known invariances of tasks is an increasingly hard problem. Existing solutions tradeoff invariance for computational or memory resources. In this work, we show how to leverage randomness and design models that are both expressive and invariant but use less resources. Inspired by randomized algorithms, our key insight is that accepting probabilistic notions of universal approximation and invariance can reduce our resource requirements. More specifically, we propose a class of binary classification models called Randomized Linear Classifiers (RLCs). We give parameter and sample size conditions in which RLCs can, with high probability, approximate any (smooth) function while preserving invariance to compact group transformations. Leveraging this result, we design three RLCs that are provably probabilistic invariant for classification tasks over sets, graphs, and spherical data. We show how these models can achieve probabilistic invariance and universality using less resources than (deterministic) neural networks and their invariant counterparts. Finally, we empirically demonstrate the benefits of this new class of models on invariant tasks where deterministic invariant neural networks are known to struggle.", "keywords": "Invariant Learning;Geometric Deep Learning;Set Representations;Graph Representations;Expressive Power;Randomized Algorithms", "primary_area": "", "supplementary_material": "/attachment/a5f77970d000a6af95a4de45b55a2fc9abced945.pdf", "author": "Leonardo Cotta;Gal Yehuda;Assaf Schuster;Chris J. Maddison", "authorids": "~Leonardo_Cotta1;~Gal_Yehuda1;~Assaf_Schuster2;~Chris_J._Maddison1", "gender": "M;;M;M", "homepage": "https://cottascience.github.io/;https://sites.google.com/view/galyehuda/home;https://assaf.net.technion.ac.il/;http://www.cs.toronto.edu/~cmaddis/", "dblp": "183/1858.html;179/2610;s/AssafSchuster;139/1388", "google_scholar": "https://scholar.google.com.br/citations?user=0GI4MyoAAAAJ;yx6Z3g4AAAAJ;https://scholar.google.co.il/citations?user=KfwgjswAAAAJ;https://scholar.google.ca/citations?user=WjCG3owAAAAJ", "orcid": ";;0000-0002-3311-6937;", "linkedin": "cotta/;;;", "or_profile": "~Leonardo_Cotta1;~Gal_Yehuda1;~Assaf_Schuster2;~Chris_J_Maddison1", "aff": "Vector Institute;Computer Science Departmen, Technion-Israel Institute of Technology;Technion - Israel Institute of Technology, Technion;Google", "aff_domain": "vectorinstitute.ai;cs.technion.ac.il;technion.ac.il;google.com", "position": "Postdoc;PhD student;Full Professor;Researcher", "bibtex": "@inproceedings{\ncotta2023probabilistic,\ntitle={Probabilistic Invariant Learning with Randomized Linear Classifiers},\nauthor={Leonardo Cotta and Gal Yehuda and Assaf Schuster and Chris J. Maddison},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WwP2JaXAtB}\n}", "github": "", "project": "", "reviewers": "2JHn;EYc5;4yWn;iRPF;QYFX", "pdf_size": 550178, "rating": "4;5;6;7;7", "confidence": "3;2;3;3;4", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "87;74;111;68;99", "wc_strengths": "31;51;119;253;83", "wc_weaknesses": "26;174;93;451;212", "wc_questions": "58;16;104;116;3", "wc_limitations": "1;1;131;29;63", "wc_review": "203;316;558;917;460", "wc_reply_reviewers": "0;0;0;66;41", "wc_reply_authors": "0;0;0;505;0", "reply_reviewers": "0;0;0;1;1", "reply_authors": "1;1;1;3;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 87.8, 15.791136754521505 ], "wc_strengths_avg": [ 107.4, 78.67299409581409 ], "wc_weaknesses_avg": [ 191.2, 145.0233084714316 ], "wc_questions_avg": [ 59.4, 45.29724053405461 ], "wc_limitations_avg": [ 45.0, 48.67853736504416 ], "wc_review_avg": [ 490.8, 245.15089230920617 ], "wc_reply_reviewers_avg": [ 21.4, 27.375901811629877 ], "wc_reply_authors_avg": [ 101.0, 202.0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5423261445466404, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5394369314039867922&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "vectorinstitute.ai;cs.technion.ac.il;technion.ac.il;google.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Vector Institute;Technion-Israel Institute of Technology;Technion - Israel Institute of Technology;Google", "aff_unique_dep": ";Computer Science Department;;Google", "aff_unique_url": "https://vectorinstitute.ai/;https://www.technion.ac.il;https://www.technion.ac.il;https://www.google.com", "aff_unique_abbr": "Vector Institute;Technion;Technion;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "Canada;Israel;United States" }, { "title": "STORM: Efficient Stochastic Transformer based World Models for Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71385", "id": "WxnrX42rnS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5647763d4245b23e6a1cb0a8947b38c9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=WxnrX42rnS", "openreview": "https://openreview.net/forum?id=WxnrX42rnS", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71385", "video": "https://nips.cc/virtual/2023/poster/71385", "author_site": "Weipu Zhang, Gang Wang, Jian Sun, Yetian Yuan, Gao Huang", "tldr": "", "abstract": "Recently, model-based reinforcement learning algorithms have demonstrated remarkable efficacy in visual input environments. These approaches begin by constructing a parameterized simulation world model of the real environment through self-supervised learning. By leveraging the imagination of the world model, the agent's policy is enhanced without the constraints of sampling from the real environment. The performance of these algorithms heavily relies on the sequence modeling and generation capabilities of the world model. However, constructing a perfectly accurate model of a complex unknown environment is nearly impossible. Discrepancies between the model and reality may cause the agent to pursue virtual goals, resulting in subpar performance in the real environment. Introducing random noise into model-based reinforcement learning has been proven beneficial.\nIn this work, we introduce Stochastic Transformer-based wORld Model (STORM), an efficient world model architecture that combines the strong sequence modeling and generation capabilities of Transformers with the stochastic nature of variational autoencoders. STORM achieves a mean human performance of $126.7\\%$ on the Atari $100$k benchmark, setting a new record among state-of-the-art methods that do not employ lookahead search techniques. Moreover, training an agent with $1.85$ hours of real-time interaction experience on a single NVIDIA GeForce RTX 3090 graphics card requires only $4.3$ hours, showcasing improved efficiency compared to previous methodologies.", "keywords": "deep learning;reinforcement learning;model-based reinforcement learning;world model;learning in imagination;transformer;variational autoencoders;sequence modeling", "primary_area": "", "supplementary_material": "/attachment/5652a1112a65d252c4408817441e68e663dcb10b.zip", "author": "Weipu Zhang;Gang Wang;Jian Sun;Yetian Yuan;Gao Huang", "authorids": "~Weipu_Zhang1;~Gang_Wang19;~Jian_Sun7;~Yetian_Yuan1;~Gao_Huang1", "gender": "M;M;M;M;M", "homepage": "https://github.com/weipu-zhang;http://teacher.bit.edu.cn/sunjian/;https://github.com/Yet-ian;http://www.gaohuang.net;https://ac.bit.edu.cn/szdw/jsml/mssbyznxtyjs1/224f1108f85a435d9efaaa3dc05fa536.htm", "dblp": "306/0763.html;;;;71/4292-14.html", "google_scholar": "u60Jr-QAAAAJ;;;-P9LwcgAAAAJ;4MPZS5wAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Weipu_Zhang1;~Jian_Sun7;~Yetian_Yuan1;~Gao_Huang1;~Gang_Wang1", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Tsinghua University;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;tsinghua.edu.cn;bit.edu.cn", "position": "Undergrad student;Full Professor;MS student;Associate Professor;Professor", "bibtex": "@inproceedings{\nzhang2023storm,\ntitle={{STORM}: Efficient Stochastic Transformer based World Models for Reinforcement Learning},\nauthor={Weipu Zhang and Gang Wang and Jian Sun and Yetian Yuan and Gao Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=WxnrX42rnS}\n}", "github": "", "project": "", "reviewers": "E5z3;Cg1v;4iYJ;rfCq", "pdf_size": 1355490, "rating": "5;5;6;7", "confidence": "4;3;4;5", "soundness": "3;3;3;3", "novelty": "3;3;3;2", "presentation": "3;3;3;3", "wc_summary": "79;75;59;64", "wc_strengths": "48;8;66;45", "wc_weaknesses": "114;93;747;279", "wc_questions": "3;4;3;458", "wc_limitations": "8;1;1;9", "wc_review": "252;181;876;855", "wc_reply_reviewers": "32;16;327;88", "wc_reply_authors": "239;32;110;23", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.25, 8.073877630977572 ], "wc_strengths_avg": [ 41.75, 21.07575621419075 ], "wc_weaknesses_avg": [ 308.25, 263.35468004195405 ], "wc_questions_avg": [ 117.0, 196.8768650705308 ], "wc_limitations_avg": [ 4.75, 3.766629793329841 ], "wc_review_avg": [ 541.0, 325.55414296242645 ], "wc_reply_reviewers_avg": [ 115.75, 124.86067235122515 ], "wc_reply_authors_avg": [ 101.0, 86.55922827752106 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18414527815137525239&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "bit.edu.cn;bit.edu.cn;bit.edu.cn;tsinghua.edu.cn;bit.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Beijing Institute of Technology;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.bit.edu.cn/;https://www.tsinghua.edu.cn", "aff_unique_abbr": "BIT;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "VisAlign: Dataset for Measuring the Alignment between AI and Humans in Visual Perception", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73563", "id": "Wz2BJNQlyI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f37aba0f53fdb59f53254fe9098b2177-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Wz2BJNQlyI", "openreview": "https://openreview.net/forum?id=Wz2BJNQlyI", "poster": "/media/PosterPDFs/NeurIPS%202023/73563.png?t=1701835628.8718002", "slides": "https://nips.cc/virtual/2023/poster/73563", "video": "https://nips.cc/virtual/2023/poster/73563", "author_site": "Jiyoung Lee, Seungho Kim, Seunghyun Won, Joonseok Lee, Marzyeh Ghassemi, James Thorne, Jaeseok Choi, O-Kil Kwon, Edward Choi", "tldr": "", "abstract": "AI alignment refers to models acting towards human-intended goals, preferences, or ethical principles. Analyzing the similarity between models and humans can be a proxy measure for ensuring AI safety. In this paper, we focus on the models' visual perception alignment with humans, further referred to as AI-human visual alignment. Specifically, we propose a new dataset for measuring AI-human visual alignment in terms of image classification. In order to evaluate AI-human visual alignment, a dataset should encompass samples with various scenarios and have gold human perception labels. Our dataset consists of three groups of samples, namely Must-Act (i.e., Must-Classify), Must-Abstain, and Uncertain, based on the quantity and clarity of visual information in an image and further divided into eight categories. All samples have a gold human perception label; even Uncertain (e.g., severely blurry) sample labels were obtained via crowd-sourcing. The validity of our dataset is verified by sampling theory, statistical theories related to survey design, and experts in the related fields. Using our dataset, we analyze the visual alignment and reliability of five popular visual perception models and seven abstention methods. Our code and data is available at https://github.com/jiyounglee-0523/VisAlign.", "keywords": "visual perception;visual perception alignment;reliability", "primary_area": "", "supplementary_material": "/attachment/dc438b7cd8489ac0d357413589faf570dbf6ae1f.pdf", "author": "Jiyoung Lee;Seungho Kim;Seunghyun Won;Joonseok Lee;Marzyeh Ghassemi;James Thorne;Jaeseok Choi;O-Kil Kwon;Edward Choi", "authorids": "~Jiyoung_Lee3;~Seungho_Kim1;~Seunghyun_Won1;~Joonseok_Lee1;~Marzyeh_Ghassemi2;~James_Thorne1;~Jaeseok_Choi2;okkwon@kangwon.ac.kr;~Edward_Choi1", "gender": ";M;F;M;F;;M;;M", "homepage": "https://jiyounglee-0523.github.io/;;;http://www.joonseok.net;https://www.healthyml.org/;https://jamesthorne.com;;;http://mp2893.com", "dblp": ";;;77/1319.html;145/6563;204/1380;;;41/3886", "google_scholar": ";zTcAKNAAAAAJ;;https://scholar.google.co.kr/citations?user=M-MfqpMAAAAJ;;hao9RrgAAAAJ;;;GUlGIPkAAAAJ", "orcid": ";;0000-0003-1764-7890;;;;0000-0003-0593-8199;;", "linkedin": ";seungho-kim-25a43b213/;;joonseoklee;;;;;", "or_profile": "~Jiyoung_Lee3;~Seungho_Kim1;~Seunghyun_Won1;~Joonseok_Lee1;~Marzyeh_Ghassemi2;~James_Thorne1;~Jaeseok_Choi2;okkwon@kangwon.ac.kr;~Edward_Choi1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Seoul National University Bundang Hospital;Google Research;Massachusetts Institute of Technology;KAIST;Kangwon National University;;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;snubh.org;google.com;mit.edu;kaist.ac.kr;kangwon.ac.kr;;kaist.ac.kr", "position": "PhD student;MS student;Research assistant professor;Research Scientist;Assistant Professor;Assistant Professor;Researcher;;Associate Professor", "bibtex": "@inproceedings{\nlee2023visalign,\ntitle={VisAlign: Dataset for Measuring the Alignment between {AI} and Humans in Visual Perception},\nauthor={Jiyoung Lee and Seungho Kim and Seunghyun Won and Joonseok Lee and Marzyeh Ghassemi and James Thorne and Jaeseok Choi and O-Kil Kwon and Edward Choi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Wz2BJNQlyI}\n}", "github": "", "project": "", "reviewers": "PfuR;ChH6;82Bt;9Ee9", "pdf_size": 8859483, "rating": "6;7;7;7", "confidence": "3;5;5;4", "wc_summary_and_contributions": "28;39;79;48", "wc_strengths": "38;56;77;43", "wc_improvement": "44;6;525;37", "wc_limitations": "8;6;24;1", "wc_correctness": "1;276;5;1", "wc_clarity": "6;26;5;1", "wc_relation_to_prior_work": "1;23;5;1", "wc_documentation": "30;11;5;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "157;444;726;134", "wc_reply_reviewers": "0;486;2327;0", "wc_reply_authors": "674;3890;7000;477", "reply_reviewers": "0;3;12;0", "reply_authors": "1;8;12;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 48.5, 18.980252896102307 ], "wc_strengths_avg": [ 53.5, 15.074813431681335 ], "wc_improvement_avg": [ 153.0, 215.2498548199278 ], "wc_limitations_avg": [ 9.75, 8.613216588476108 ], "wc_correctness_avg": [ 70.75, 118.5123938666332 ], "wc_clarity_avg": [ 9.5, 9.7082439194738 ], "wc_relation_to_prior_work_avg": [ 7.5, 9.096702699330127 ], "wc_documentation_avg": [ 11.75, 11.121488209767612 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 365.25, 241.4470697689247 ], "wc_reply_reviewers_avg": [ 703.25, 958.2383250006233 ], "wc_reply_authors_avg": [ 3010.25, 2672.4279574012844 ], "reply_reviewers_avg": [ 3.75, 4.9180788932265 ], "reply_authors_avg": [ 5.5, 4.716990566028302 ], "replies_avg": [ 43, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15476500028944670421&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "kaist.ac.kr;kaist.ac.kr;snubh.org;google.com;mit.edu;kaist.ac.kr;kangwon.ac.kr;;kaist.ac.kr", "author_num": 9, "aff_unique_index": "0;0;1;2;3;0;4;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Seoul National University;Google;Massachusetts Institute of Technology;Kangwon National University", "aff_unique_dep": ";Hospital;Google Research;;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.snuh.org;https://research.google;https://web.mit.edu;http://www.kangwon.ac.kr", "aff_unique_abbr": "KAIST;SNUH;Google Research;MIT;KNU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Bundang;Mountain View", "aff_country_unique_index": "0;0;0;1;1;0;0;0", "aff_country_unique": "South Korea;United States" }, { "title": "Risk-Averse Model Uncertainty for Distributionally Robust Safe Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71384", "id": "X0CIxqYc4Z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/05b63fa06784b71aab3939004e0f0a0d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=X0CIxqYc4Z", "openreview": "https://openreview.net/forum?id=X0CIxqYc4Z", "poster": "/media/PosterPDFs/NeurIPS%202023/71384.png?t=1701124546.8921835", "slides": "https://nips.cc/virtual/2023/poster/71384", "video": "https://nips.cc/virtual/2023/poster/71384", "author_site": "James Queeney, Mouhacine Benosman", "tldr": "", "abstract": "Many real-world domains require safe decision making in uncertain environments. In this work, we introduce a deep reinforcement learning framework for approaching this important problem. We consider a distribution over transition models, and apply a risk-averse perspective towards model uncertainty through the use of coherent distortion risk measures. We provide robustness guarantees for this framework by showing it is equivalent to a specific class of distributionally robust safe reinforcement learning problems. Unlike existing approaches to robustness in deep reinforcement learning, however, our formulation does not involve minimax optimization. This leads to an efficient, model-free implementation of our approach that only requires standard data collection from a single training environment. In experiments on continuous control tasks with safety constraints, we demonstrate that our framework produces robust performance and safety at deployment time across a range of perturbed test environments.", "keywords": "deep reinforcement learning;model uncertainty;safety;risk-averse;distributionally robust", "primary_area": "", "supplementary_material": "/attachment/09abeb7ba79a35c02681b9d63b36f8847be976f6.pdf", "author": "James Queeney;Mouhacine Benosman", "authorids": "~James_Queeney1;~Mouhacine_Benosman1", "gender": "M;M", "homepage": "https://jqueeney.github.io/;", "dblp": "281/8330;", "google_scholar": "ybOJ8CwAAAAJ;cs7AJxcAAAAJ", "orcid": "0000-0003-0655-3637;", "linkedin": "jimmy-queeney/;", "or_profile": "~James_Queeney1;~Mouhacine_Benosman1", "aff": "Boston University;Mitsubishi Electric Research Labs", "aff_domain": "bu.edu;merl.com", "position": "PhD student;Researcher", "bibtex": "@inproceedings{\nqueeney2023riskaverse,\ntitle={Risk-Averse Model Uncertainty for Distributionally Robust Safe Reinforcement Learning},\nauthor={James Queeney and Mouhacine Benosman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=X0CIxqYc4Z}\n}", "github": "", "project": "", "reviewers": "JpRL;F3rP;22QQ;mRCw;ZX7b", "pdf_size": 768126, "rating": "5;5;6;6;6", "confidence": "4;4;5;4;2", "soundness": "3;2;3;2;3", "novelty": "2;2;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "69;73;120;91;84", "wc_strengths": "56;50;112;154;196", "wc_weaknesses": "86;234;337;261;2", "wc_questions": "66;143;50;166;2", "wc_limitations": "1;70;1;2;2", "wc_review": "278;570;620;674;286", "wc_reply_reviewers": "91;108;133;60;0", "wc_reply_authors": "160;184;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;1;1;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.4, 18.07318455613177 ], "wc_strengths_avg": [ 113.6, 56.19110249852729 ], "wc_weaknesses_avg": [ 184.0, 122.1196134943114 ], "wc_questions_avg": [ 85.4, 60.661684777130944 ], "wc_limitations_avg": [ 15.2, 27.403649392006166 ], "wc_review_avg": [ 485.6, 169.48109039063914 ], "wc_reply_reviewers_avg": [ 78.4, 45.82837548942795 ], "wc_reply_authors_avg": [ 68.8, 84.60354602497463 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.16666666666666663, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12715536926892171765&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "bu.edu;merl.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Boston University;Mitsubishi Electric Research Laboratories", "aff_unique_dep": ";", "aff_unique_url": "https://www.bu.edu;https://www.merl.com", "aff_unique_abbr": "BU;MERL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Nearly Optimal Bounds for Cyclic Forgetting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71383", "id": "X25L5AjHig", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d72ae75abaa70a3b19c5d4f436c680d1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=X25L5AjHig", "openreview": "https://openreview.net/forum?id=X25L5AjHig", "poster": "/media/PosterPDFs/NeurIPS%202023/71383.png?t=1702142006.468673", "slides": "https://nips.cc/virtual/2023/poster/71383", "video": "https://nips.cc/virtual/2023/poster/71383", "author_site": "William Swartworth, Deanna Needell, Rachel Ward, Mark Kong, Halyun Jeong", "tldr": "", "abstract": "We provide theoretical bounds on the forgetting quantity in the continual learning setting for linear tasks, where each round of learning corresponds to projecting onto a linear subspace. For a cyclic task ordering on $T$ tasks repeated $m$ times each, we prove the best known upper bound of $O(T^2/m)$ on the forgetting. Notably, our bound holds uniformly over all choices of tasks and is independent of the ambient dimension. Our main technical contribution is a characterization of the union of all numerical ranges of products of $T$ (real or complex) projections as a sinusoidal spiral, which may be of independent interest.", "keywords": "catastrophic forgetting;linear systems", "primary_area": "", "supplementary_material": "/attachment/9af9816ccd9dc95c6a1ff1b41b40613b0458b2fc.pdf", "author": "William Joseph Swartworth;Deanna Needell;Rachel Ward;Mark Kong;Halyun Jeong", "authorids": "~William_Joseph_Swartworth1;~Deanna_Needell2;~Rachel_Ward1;~Mark_Kong1;~Halyun_Jeong1", "gender": ";Not Specified;;M;", "homepage": "https://www.math.ucla.edu/~wswartworth/;https://www.math.ucla.edu/~deanna/index.html;;https://www.math.ucla.edu/~markkong;https://www.math.ucla.edu/~hajeong/", "dblp": ";03/2691;80/7132;;41/236", "google_scholar": ";;;;bCBpdqkAAAAJ", "orcid": ";0000-0002-8058-8638;;;", "linkedin": ";;;;", "or_profile": "~William_Joseph_Swartworth1;~Deanna_Needell2;~Rachel_Ward1;~Mark_Kong1;~Halyun_Jeong1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of Texas at Austin;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu;utexas.edu;ucla.edu;ucla.edu", "position": "PhD student;Full Professor;Full Professor;PhD student;Postdoc", "bibtex": "@inproceedings{\nswartworth2023nearly,\ntitle={Nearly Optimal Bounds for Cyclic Forgetting},\nauthor={William Joseph Swartworth and Deanna Needell and Rachel Ward and Mark Kong and Halyun Jeong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=X25L5AjHig}\n}", "github": "", "project": "", "reviewers": "eHkq;SAj1;kHo7;U3GB", "pdf_size": 368857, "rating": "3;5;6;7", "confidence": "3;3;2;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;2", "wc_summary": "46;34;69;235", "wc_strengths": "72;26;59;55", "wc_weaknesses": "181;234;61;105", "wc_questions": "40;2;19;47", "wc_limitations": "1;1;22;10", "wc_review": "340;297;230;452", "wc_reply_reviewers": "0;10;0;9", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 96.0, 81.23115165993893 ], "wc_strengths_avg": [ 53.0, 16.80773631397161 ], "wc_weaknesses_avg": [ 145.25, 66.84450239174498 ], "wc_questions_avg": [ 27.0, 17.7341478509682 ], "wc_limitations_avg": [ 8.5, 8.616843969807043 ], "wc_review_avg": [ 329.75, 80.7352927783135 ], "wc_reply_reviewers_avg": [ 4.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.29277002188455997, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10984758067499718125&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ucla.edu;ucla.edu;utexas.edu;ucla.edu;ucla.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of California, Los Angeles;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;https://www.utexas.edu", "aff_unique_abbr": "UCLA;UT Austin", "aff_campus_unique_index": "0;0;1;0;0", "aff_campus_unique": "Los Angeles;Austin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Causal Imitability Under Context-Specific Independence Relations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71382", "id": "X3IeHRD0zf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/54e13b23fa2f399cea6e67acf9063c40-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=X3IeHRD0zf", "openreview": "https://openreview.net/forum?id=X3IeHRD0zf", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71382", "video": "https://nips.cc/virtual/2023/poster/71382", "author_site": "Fateme Jamshidi, Sina Akbari, Negar Kiyavash", "tldr": "", "abstract": "Drawbacks of ignoring the causal mechanisms when performing imitation learning have recently been acknowledged. Several approaches both to assess the feasibility of imitation and to circumvent causal confounding and causal misspecifications have been proposed in the literature.\nHowever, the potential benefits of the incorporation of additional information about the underlying causal structure are left unexplored.\nAn example of such overlooked information is context-specific independence (CSI), i.e., independence that holds only in certain contexts.\nWe consider the problem of causal imitation learning when CSI relations are known.\nWe prove that the decision problem pertaining to the feasibility of imitation in this setting is NP-hard.\nFurther, we provide a necessary graphical criterion for imitation learning under CSI and show that under a structural assumption, this criterion is also sufficient.\nFinally, we propose a sound algorithmic approach for causal imitation learning which takes both CSI relations and data into account.", "keywords": "causal inference;conditional independence;context-specific independence relations;imitability", "primary_area": "", "supplementary_material": "", "author": "Fateme Jamshidi;Sina Akbari;Negar Kiyavash", "authorids": "~Fateme_Jamshidi1;~Sina_Akbari1;~Negar_Kiyavash1", "gender": "F;M;F", "homepage": ";https://sinaakbarii.github.io;https://people.epfl.ch/negar.kiyavash?lang=en", "dblp": "304/8469;;85/4976", "google_scholar": ";-kNnS1AAAAAJ;7tBDvOwAAAAJ", "orcid": ";;0000-0002-8545-7709", "linkedin": "fateme-jamshidi/;sina-akbari/;", "or_profile": "~Fateme_Jamshidi1;~Sina_Akbari1;~Negar_Kiyavash1", "aff": "Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology Lausanne", "aff_domain": "epfl.ch;epfl.ch;epfl.ch", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\njamshidi2023causal,\ntitle={Causal Imitability Under Context-Specific Independence Relations},\nauthor={Fateme Jamshidi and Sina Akbari and Negar Kiyavash},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=X3IeHRD0zf}\n}", "github": "", "project": "", "reviewers": "puDh;g52k;2EML;Fd5v", "pdf_size": 461796, "rating": "6;7;7;7", "confidence": "3;3;3;4", "soundness": "3;4;3;4", "novelty": "2;3;4;2", "presentation": "2;3;4;3", "wc_summary": "54;125;66;249", "wc_strengths": "10;193;59;178", "wc_weaknesses": "56;281;97;567", "wc_questions": "110;38;123;15", "wc_limitations": "2;1;64;4", "wc_review": "232;638;409;1013", "wc_reply_reviewers": "13;32;14;36", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 123.5, 77.28033385021055 ], "wc_strengths_avg": [ 110.0, 77.64341568993471 ], "wc_weaknesses_avg": [ 250.25, 201.5531877693826 ], "wc_questions_avg": [ 71.5, 45.95922105519196 ], "wc_limitations_avg": [ 17.75, 26.7242867070386 ], "wc_review_avg": [ 573.0, 291.97688264655477 ], "wc_reply_reviewers_avg": [ 23.75, 10.353139620424328 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7798285345405742873&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "epfl.ch;epfl.ch;epfl.ch", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Performance-optimized deep neural networks are evolving into worse models of inferotemporal visual cortex", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71381", "id": "X4mmXQ4Nxw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5bf234ecf83cd77bc5b77a24ba9338b0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=X4mmXQ4Nxw", "openreview": "https://openreview.net/forum?id=X4mmXQ4Nxw", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71381", "video": "https://nips.cc/virtual/2023/poster/71381", "author_site": "Drew Linsley, Ivan F Rodriguez Rodriguez, Thomas FEL, Michael Arcaro, Saloni Sharma, Margaret Livingstone, Thomas Serre", "tldr": "", "abstract": "One of the most impactful findings in computational neuroscience over the past decade is that the object recognition accuracy of deep neural networks (DNNs) correlates with their ability to predict neural responses to natural images in the inferotemporal (IT) cortex. This discovery supported the long-held theory that object recognition is a core objective of the visual cortex, and suggested that more accurate DNNs would serve as better models of IT neuron responses to images. Since then, deep learning has undergone a revolution of scale: billion parameter-scale DNNs trained on billions of images are rivaling or outperforming humans at visual tasks including object recognition. Have today's DNNs become more accurate at predicting IT neuron responses to images as they have grown more accurate at object recognition?\n\nSurprisingly, across three independent experiments, we find that this is not the case. DNNs have become progressively worse models of IT as their accuracy has increased on ImageNet. To understand why DNNs experience this trade-off and evaluate if they are still an appropriate paradigm for modeling the visual system, we turn to recordings of IT that capture spatially resolved maps of neuronal activity elicited by natural images. These neuronal activity maps reveal that DNNs trained on ImageNet learn to rely on different visual features than those encoded by IT and that this problem worsens as their accuracy increases. We successfully resolved this issue with the neural harmonizer, a plug-and-play training routine for DNNs that aligns their learned representations with humans. Our results suggest that harmonized DNNs break the trade-off between ImageNet accuracy and neural prediction accuracy that assails current DNNs and offer a path to more accurate models of biological vision. Our work indicates that the standard approach for modeling IT with task-optimized DNNs needs revision, and other biological constraints, including human psychophysics data, are needed to accurately reverse-engineer the visual cortex.", "keywords": "neural system identification;behavioral alignment;neural object recognition", "primary_area": "", "supplementary_material": "", "author": "Drew Linsley;Ivan F Rodriguez Rodriguez;Thomas FEL;Michael Arcaro;Saloni Sharma;Margaret Livingstone;Thomas Serre", "authorids": "~Drew_Linsley1;~Ivan_F_Rodriguez_Rodriguez1;~Thomas_FEL1;~Michael_Arcaro1;~Saloni_Sharma1;margaret_livingstone@hms.harvard.edu;~Thomas_Serre1", "gender": ";M;M;M;F;;M", "homepage": ";;https://thomasfel.me;https://arcarolab.org/;;;https://serre-lab.clps.brown.edu/", "dblp": "194/2308;327/9480;274/2390;;;;", "google_scholar": "cXZlAuQAAAAJ;https://scholar.google.com/citations?hl=en;1m5Mlx4AAAAJ;07tNFdgAAAAJ;QvYJdzMAAAAJ;;kZlPW4wAAAAJ", "orcid": ";;;0000-0002-4612-9921;0000-0002-0273-2091;;", "linkedin": ";ivan-felipe-rodriguez/en;;;;;", "or_profile": "~Drew_Linsley1;~Ivan_F_Rodriguez_Rodriguez1;~Thomas_FEL1;~Michael_Arcaro1;~Saloni_Sharma1;margaret_livingstone@hms.harvard.edu;~Thomas_Serre1", "aff": "Brown University;Brown University;Brown University;University of Pennsylvania;Harvard University;;Universit\u00e9 de Toulouse", "aff_domain": "brown.edu;brown.edu;brown.edu;upenn.edu;harvard.edu;;univ-toulouse.fr", "position": "Assistant Professor;PhD student;PhD student;Assistant Professor;Postdoc;;Full Professor", "bibtex": "@inproceedings{\nlinsley2023performanceoptimized,\ntitle={Performance-optimized deep neural networks are evolving into worse models of inferotemporal visual cortex},\nauthor={Drew Linsley and Ivan F Rodriguez Rodriguez and Thomas FEL and Michael Arcaro and Saloni Sharma and Margaret Livingstone and Thomas Serre},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=X4mmXQ4Nxw}\n}", "github": "", "project": "", "reviewers": "itup;X5jQ;GM1r;nyjr", "pdf_size": 32576132, "rating": "6;6;7;7", "confidence": "4;4;4;4", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "2;2;4;3", "wc_summary": "140;43;138;190", "wc_strengths": "86;31;95;199", "wc_weaknesses": "516;99;15;206", "wc_questions": "31;260;29;182", "wc_limitations": "79;4;16;9", "wc_review": "852;437;293;786", "wc_reply_reviewers": "21;104;0;1543", "wc_reply_authors": "0;522;0;2408", "reply_reviewers": "1;2;0;6", "reply_authors": "1;3;1;9", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 127.75, 53.180706087828504 ], "wc_strengths_avg": [ 102.75, 60.73044952904597 ], "wc_weaknesses_avg": [ 209.0, 189.7327067218512 ], "wc_questions_avg": [ 125.5, 99.40447676035521 ], "wc_limitations_avg": [ 27.0, 30.32325840011261 ], "wc_review_avg": [ 592.0, 233.80654396316626 ], "wc_reply_reviewers_avg": [ 417.0, 651.2583972587225 ], "wc_reply_authors_avg": [ 732.5, 990.5456829445071 ], "reply_reviewers_avg": [ 2.25, 2.277608394786075 ], "reply_authors_avg": [ 3.5, 3.278719262151 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3024302051792673399&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "brown.edu;brown.edu;brown.edu;upenn.edu;harvard.edu;;univ-toulouse.fr", "author_num": 7, "aff_unique_index": "0;0;0;1;2;3", "aff_unique_norm": "Brown University;University of Pennsylvania;Harvard University;Universit\u00e9 de Toulouse", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.brown.edu;https://www.upenn.edu;https://www.harvard.edu;https://www.univ-toulouse.fr", "aff_unique_abbr": "Brown;UPenn;Harvard;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;France" }, { "title": "Towards Evaluating Transfer-based Attacks Systematically, Practically, and Fairly", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71380", "id": "X5MH7iut9K", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/823e43f5537d8c1894afd1f6ab00a927-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=X5MH7iut9K", "openreview": "https://openreview.net/forum?id=X5MH7iut9K", "poster": "/media/PosterPDFs/NeurIPS%202023/71380.png?t=1700712298.6405208", "slides": "https://nips.cc/virtual/2023/poster/71380", "video": "https://nips.cc/virtual/2023/poster/71380", "author_site": "Qizhang Li, Yiwen Guo, Wangmeng Zuo, Hao Chen", "tldr": "", "abstract": "The adversarial vulnerability of deep neural networks (DNNs) has drawn great attention due to the security risk of applying these models in real-world applications. Based on transferability of adversarial examples, an increasing number of transfer-based methods have been developed to fool black-box DNN models whose architecture and parameters are inaccessible. Although tremendous effort has been exerted, there still lacks a standardized benchmark that could be taken advantage of to compare these methods systematically, fairly, and practically. Our investigation shows that the evaluation of some methods needs to be more reasonable and more thorough to verify their effectiveness, to avoid, for example, unfair comparison and insufficient consideration of possible substitute/victim models. Therefore, we establish a transfer-based attack benchmark (TA-Bench) which implements 30+ methods. In this paper, we evaluate and compare them comprehensively on 10 popular substitute/victim models on ImageNet. New insights about the effectiveness of these methods are gained and guidelines for future evaluations are provided.", "keywords": "adversarial examples;adversarial transferability;black-box attack", "primary_area": "", "supplementary_material": "/attachment/9df7522d4d12cad6a4308096bc0f9498b4a933d5.zip", "author": "Qizhang Li;Yiwen Guo;Wangmeng Zuo;Hao Chen", "authorids": "~Qizhang_Li1;~Yiwen_Guo1;~Wangmeng_Zuo3;~Hao_Chen5", "gender": "M;;M;", "homepage": ";;;https://www.cs.ucdavis.edu/~hchen/", "dblp": "272/9084;;93/2671;86/475-3", "google_scholar": "W5JLehEAAAAJ;;rUOpCEYAAAAJ;1Aa3qxIAAAAJ", "orcid": ";;0000-0002-3330-783X;0000-0002-4072-0710", "linkedin": ";;;", "or_profile": "~Qizhang_Li1;~Yiwen_Guo1;~Wangmeng_Zuo3;~Hao_Chen5", "aff": "Harbin Institute of Technology;;Harbin Institute of Technology;University of California, Davis", "aff_domain": "hit.edu;;hit.edu.cn;ucdavis.edu", "position": "PhD student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2023towards,\ntitle={Towards Evaluating Transfer-based Attacks Systematically, Practically, and Fairly},\nauthor={Qizhang Li and Yiwen Guo and Wangmeng Zuo and Hao Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=X5MH7iut9K}\n}", "github": "", "project": "", "reviewers": "Yj8S;fXNW;QeCx;xpVS;urLG;NrCP;EHYy", "pdf_size": 1686546, "rating": "4;5;6;6;6;7;7", "confidence": "5;5;2;3;3;2;4", "soundness": "2;3;3;3;3;3;3", "novelty": "3;3;4;2;3;4;3", "presentation": "2;2;3;3;3;3;4", "wc_summary": "59;50;38;82;56;45;66", "wc_strengths": "22;181;25;55;56;44;92", "wc_weaknesses": "311;926;27;123;60;10;118", "wc_questions": "2;208;43;4;50;32;5", "wc_limitations": "13;9;6;1;2;1;5", "wc_review": "407;1374;139;265;224;132;286", "wc_reply_reviewers": "107;460;10;50;21;32;57", "wc_reply_authors": "130;2504;22;32;24;28;175", "reply_reviewers": "1;1;1;1;1;1;2", "reply_authors": "3;6;2;2;2;2;3", "rating_avg": [ 5.857142857142857, 0.9897433186107869 ], "confidence_avg": [ 3.4285714285714284, 1.178030178747903 ], "soundness_avg": [ 2.857142857142857, 0.34992710611188266 ], "novelty_avg": [ 3.142857142857143, 0.6388765649999398 ], "presentation_avg": [ 2.857142857142857, 0.6388765649999399 ], "wc_summary_avg": [ 56.57142857142857, 13.436168064031426 ], "wc_strengths_avg": [ 67.85714285714286, 50.98859416275249 ], "wc_weaknesses_avg": [ 225.0, 300.7856379550061 ], "wc_questions_avg": [ 49.142857142857146, 67.3761903079134 ], "wc_limitations_avg": [ 5.285714285714286, 4.164965639175215 ], "wc_review_avg": [ 403.85714285714283, 405.4561553085007 ], "wc_reply_reviewers_avg": [ 105.28571428571429, 147.72920620969617 ], "wc_reply_authors_avg": [ 416.42857142857144, 854.1093702369332 ], "reply_reviewers_avg": [ 1.1428571428571428, 0.3499271061118826 ], "reply_authors_avg": [ 2.857142857142857, 1.355261854357877 ], "replies_avg": [ 38, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6826365409636549, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5397990106700967402&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "hit.edu;;hit.edu.cn;ucdavis.edu", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Harbin Institute of Technology;University of California, Davis", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;https://www.ucdavis.edu", "aff_unique_abbr": "HIT;UC Davis", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Harbin;Davis", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "Latent SDEs on Homogeneous Spaces", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71379", "id": "X6Eapo5paw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f0172a5da5a2611e3dc0fe9c6e9a7480-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=X6Eapo5paw", "openreview": "https://openreview.net/forum?id=X6Eapo5paw", "poster": "/media/PosterPDFs/NeurIPS%202023/71379.png?t=1701788714.6403706", "slides": "https://nips.cc/virtual/2023/poster/71379", "video": "https://nips.cc/virtual/2023/poster/71379", "author_site": "Sebastian Zeng, Florian Graf, Roland Kwitt", "tldr": "", "abstract": "We consider the problem of variational Bayesian inference in a latent variable model where a (possibly complex) observed stochastic process is governed by the unobserved solution of a latent stochastic differential equation (SDE). Motivated by the challenges that arise when trying to learn a latent SDE in $\\mathbb{R}^n$ from large-scale data, such as efficient gradient computation, we take a step back and study a specific subclass instead. In our case, the SDE evolves inside a homogeneous latent space and is induced by stochastic dynamics of the corresponding (matrix) Lie group. In the context of learning problems, SDEs on the $n$-dimensional unit sphere are arguably the most relevant incarnation of this setup. For variational inference, the sphere not only facilitates using a uniform prior on the initial state of the SDE, but we also obtain a particularly simple and intuitive expression for the KL divergence between the approximate posterior and prior process in the evidence lower bound. We provide empirical evidence that a latent SDE of the proposed type can be learned efficiently by means of an existing one-step geometric Euler-Maruyama scheme. Despite restricting ourselves to a less diverse class of SDEs, we achieve competitive or even state-of-the-art performance on a collection of time series interpolation and classification benchmarks.", "keywords": "Variational Bayesian inference;stochastic differential equation;homogeneous spaces;geometric Euler-Maruyama;time series", "primary_area": "", "supplementary_material": "", "author": "Sebastian Zeng;Florian Graf;Roland Kwitt", "authorids": "~Sebastian_Zeng1;~Florian_Graf2;~Roland_Kwitt1", "gender": "M;;M", "homepage": "https://uni-salzburg.elsevierpure.com/de/persons/sebastian-zeng;;http://rkwitt.org", "dblp": "297/5768;49/6607;60/4140", "google_scholar": "JICJ16IAAAAJ;https://scholar.google.com/citations?hl=de;https://scholar.google.at/citations?user=sfGFi6UAAAAJ", "orcid": ";0000-0003-3621-0897;", "linkedin": ";;", "or_profile": "~Sebastian_Zeng1;~Florian_Graf2;~Roland_Kwitt1", "aff": "University of Salzburg;University of Salzburg;University of Salzburg", "aff_domain": "sbg.ac.at;sbg.ac.at;sbg.ac.at", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzeng2023latent,\ntitle={Latent {SDE}s on Homogeneous Spaces},\nauthor={Sebastian Zeng and Florian Graf and Roland Kwitt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=X6Eapo5paw}\n}", "github": "", "project": "", "reviewers": "uKNv;XRoq;tqmQ;GYzQ", "pdf_size": 6747886, "rating": "6;7;7;7", "confidence": "3;4;2;3", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "1;4;4;3", "wc_summary": "199;262;143;90", "wc_strengths": "81;178;89;54", "wc_weaknesses": "724;197;79;95", "wc_questions": "98;242;19;66", "wc_limitations": "8;141;8;12", "wc_review": "1110;1020;338;317", "wc_reply_reviewers": "97;0;140;14", "wc_reply_authors": "0;0;199;0", "reply_reviewers": "1;0;3;1", "reply_authors": "1;1;3;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 173.5, 64.00195309519859 ], "wc_strengths_avg": [ 100.5, 46.585942085569116 ], "wc_weaknesses_avg": [ 273.75, 263.8630089648793 ], "wc_questions_avg": [ 106.25, 83.25975918773726 ], "wc_limitations_avg": [ 42.25, 57.0367206280305 ], "wc_review_avg": [ 696.25, 370.19479669492927 ], "wc_reply_reviewers_avg": [ 62.75, 57.99730597191563 ], "wc_reply_authors_avg": [ 49.75, 86.16952767655164 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5249211863563398930&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "email": "sbg.ac.at;sbg.ac.at;sbg.ac.at", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Salzburg", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-salzburg.at", "aff_unique_abbr": "USAL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Austria" }, { "title": "Explain Any Concept: Segment Anything Meets Concept-Based Explanation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71378", "id": "X6TBBsz9qi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/44cdeb5ab7da31d9b5cd88fd44e3da84-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=X6TBBsz9qi", "openreview": "https://openreview.net/forum?id=X6TBBsz9qi", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71378", "video": "https://nips.cc/virtual/2023/poster/71378", "author_site": "Ao Sun, Pingchuan Ma, Yuanyuan Yuan, Shuai Wang", "tldr": "", "abstract": "EXplainable AI (XAI) is an essential topic to improve human understanding of deep neural networks (DNNs) given their black-box internals. For computer vision tasks, mainstream pixel-based XAI methods explain DNN decisions by identifying important pixels, and emerging concept-based XAI explore forming explanations with concepts (e.g., a head in an image). However, pixels are generally hard to interpret and sensitive to the imprecision of XAI methods, whereas \u201cconcepts\u201d in prior works require human annotation or are limited to pre-defined concept sets. On the other hand, driven by large-scale pre-training, Segment Anything Model (SAM) has been demonstrated as a powerful and promotable framework for performing precise and comprehensive instance segmentation, enabling automatic preparation of concept sets from a given image. This paper for the first time explores using SAM to augment concept-based XAI. We offer an effective and flexible concept-based explanation method, namely Explain Any Concept (EAC), which explains DNN decisions with any concept. While SAM is highly effective and offers an \u201cout-of-the-box\u201d instance segmentation, it is costly when being integrated into defacto XAI pipelines. We thus propose a lightweight per-input equivalent (PIE) scheme, enabling efficient explanation with a surrogate model. Our evaluation over two popular datasets (ImageNet and COCO) illustrate the highly encouraging performance of EAC over commonly-used XAI methods.", "keywords": "EXplainable AI;Machine Learning;Computer Vision", "primary_area": "", "supplementary_material": "/attachment/16189a8e6e8c7f949c4055d2ada2121f85c6f0d2.pdf", "author": "Ao Sun;Pingchuan Ma;Yuanyuan Yuan;Shuai Wang", "authorids": "~Ao_Sun2;~Pingchuan_Ma4;~Yuanyuan_Yuan1;~Shuai_Wang7", "gender": "M;;M;M", "homepage": "https://jerry00917.github.io;;https://yuanyuan-yuan.github.io/;https://home.cse.ust.hk/~shuaiw/", "dblp": ";;;42/1503-11", "google_scholar": ";;wnhU3KoAAAAJ;", "orcid": ";;;", "linkedin": "ao-s-6bb42b137/;;;", "or_profile": "~Ao_Sun2;~Pingchuan_Ma4;~Yuanyuan_Yuan1;~Shuai_Wang7", "aff": "University of Illinois, Urbana Champaign;;Hong Kong University of Science and Technology;", "aff_domain": "illinois.edu;;cse.ust.hk;", "position": "Undergrad student;;PhD student;", "bibtex": "@inproceedings{\nsun2023explain,\ntitle={Explain Any Concept: Segment Anything Meets Concept-Based Explanation},\nauthor={Ao Sun and Pingchuan Ma and Yuanyuan Yuan and Shuai Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=X6TBBsz9qi}\n}", "github": "", "project": "", "reviewers": "58qS;bcLx;6va6;xxGh", "pdf_size": 853317, "rating": "5;5;5;6", "confidence": "4;4;4;3", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "67;45;59;125", "wc_strengths": "37;33;37;94", "wc_weaknesses": "420;144;92;73", "wc_questions": "3;27;37;11", "wc_limitations": "1;6;7;39", "wc_review": "528;255;232;342", "wc_reply_reviewers": "37;19;24;17", "wc_reply_authors": "43;0;55;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.0, 30.479501308256342 ], "wc_strengths_avg": [ 50.25, 25.31180554602931 ], "wc_weaknesses_avg": [ 182.25, 139.70392800490615 ], "wc_questions_avg": [ 19.5, 13.294735800308331 ], "wc_limitations_avg": [ 13.25, 15.039531242695032 ], "wc_review_avg": [ 339.25, 116.44177729663869 ], "wc_reply_reviewers_avg": [ 24.25, 7.790218225441442 ], "wc_reply_authors_avg": [ 24.5, 24.86463351831271 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16752337005254222243&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "illinois.edu;;cse.ust.hk;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.ust.hk", "aff_unique_abbr": "UIUC;HKUST", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Urbana-Champaign;Hong Kong SAR", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "title": "On the Planning Abilities of Large Language Models - A Critical Investigation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71377", "id": "X6dEqXIsEW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/efb2072a358cefb75886a315a6fcf880-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=X6dEqXIsEW", "openreview": "https://openreview.net/forum?id=X6dEqXIsEW", "poster": "/media/PosterPDFs/NeurIPS%202023/71377.png?t=1702477253.020453", "slides": "https://nips.cc/virtual/2023/poster/71377", "video": "https://nips.cc/virtual/2023/poster/71377", "author_site": "Karthik Valmeekam, Matthew Marquez, Sarath Sreedharan, Subbarao Kambhampati", "tldr": "", "abstract": "Intrigued by the claims of emergent reasoning capabilities in LLMs trained on general web corpora, in this paper, we set out to investigate their planning capabilities. We aim to evaluate (1) the effectiveness of LLMs in generating plans autonomously in commonsense planning tasks and (2) the potential of LLMs as a source of heuristic guidance for other agents (AI planners) in their planning tasks. We conduct a systematic study by generating a suite of instances on domains similar to the ones employed in the International Planning Competition and evaluate LLMs in two distinct modes: autonomous and heuristic. Our findings reveal that LLMs\u2019 ability to generate executable plans autonomously is rather limited, with the best model (GPT-4) having an average success rate of ~12% across the domains. However, the results in the heuristic mode show more promise. In the heuristic mode, we demonstrate that LLM-generated plans can improve the search process for underlying sound planners and additionally show that external verifiers can help provide feedback on the generated plans and back-prompt the LLM for better plan generation.", "keywords": "Large Language Models;Planning;LLMs for autonomous and heuristic planning guidance", "primary_area": "", "supplementary_material": "/attachment/311a163cf136ee33f0e7c71f6ef277c97d3abb15.pdf", "author": "Karthik Valmeekam;Matthew Marquez;Sarath Sreedharan;Subbarao Kambhampati", "authorids": "~Karthik_Valmeekam1;~Matthew_Marquez1;~Sarath_Sreedharan1;~Subbarao_Kambhampati1", "gender": "M;M;;M", "homepage": ";;;http://rakaposhi.eas.asu.edu", "dblp": "279/2957;332/1174.html;162/5110;k/SKambhampati", "google_scholar": "CrYLDt4AAAAJ;NPlGTxwAAAAJ;;yl3L07sAAAAJ", "orcid": ";0000-0001-9794-8700;;", "linkedin": ";matthew-m-2661a5a0/;;", "or_profile": "~Karthik_Valmeekam1;~Matthew_Marquez1;~Sarath_Sreedharan1;~Subbarao_Kambhampati1", "aff": "Arizona State University;Arizona State University;Colorado State University;Arizona State University", "aff_domain": "asu.edu;asu.edu;colostate.edu;asu.edu", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nvalmeekam2023on,\ntitle={On the Planning Abilities of Large Language Models - A Critical Investigation},\nauthor={Karthik Valmeekam and Matthew Marquez and Sarath Sreedharan and Subbarao Kambhampati},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=X6dEqXIsEW}\n}", "github": "", "project": "", "reviewers": "Y3FL;GZ9T;JKYr;NTC1", "pdf_size": 3974369, "rating": "5;8;8;8", "confidence": "3;4;4;5", "soundness": "2;4;4;3", "novelty": "2;4;3;4", "presentation": "2;3;4;4", "wc_summary": "58;197;144;122", "wc_strengths": "28;271;70;61", "wc_weaknesses": "284;159;35;72", "wc_questions": "5;28;74;98", "wc_limitations": "1;200;2;180", "wc_review": "376;855;325;533", "wc_reply_reviewers": "25;50;5;19", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 130.25, 49.83159138538524 ], "wc_strengths_avg": [ 107.5, 95.68307060290238 ], "wc_weaknesses_avg": [ 137.5, 95.81362116108544 ], "wc_questions_avg": [ 51.25, 36.68361350794112 ], "wc_limitations_avg": [ 95.75, 94.5155410501363 ], "wc_review_avg": [ 522.25, 206.84218017609464 ], "wc_reply_reviewers_avg": [ 24.75, 16.284578594486256 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 280, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2560204324087836226&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 8, "email": "asu.edu;asu.edu;colostate.edu;asu.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Arizona State University;Colorado State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.asu.edu;https://www.colostate.edu", "aff_unique_abbr": "ASU;CSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Stochastic Approximation Algorithms for Systems of Interacting Particles", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71376", "id": "X6mwdEVYvc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aebbbfa9680eafefd43a0edc85c101f9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=X6mwdEVYvc", "openreview": "https://openreview.net/forum?id=X6mwdEVYvc", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71376", "video": "https://nips.cc/virtual/2023/poster/71376", "author_site": "Mohammad Reza Karimi Jaghargh, Ya-Ping Hsieh, Andreas Krause", "tldr": "", "abstract": "Interacting particle systems have proven highly successful in various machine\nlearning tasks, including approximate Bayesian inference and neural network optimization. However, the analysis of these\nsystems often relies on the simplifying assumption of the \\emph{mean-field} limit, where particle\nnumbers approach infinity and infinitesimal step sizes are used. In practice, discrete time steps,\nfinite particle numbers, and complex integration schemes are employed, creating a theoretical gap\nbetween continuous-time and discrete-time processes. In this paper, we present a novel framework\nthat establishes a precise connection between these discrete-time schemes and their corresponding\nmean-field limits in terms of convergence properties and asymptotic behavior. By adopting a dynamical system perspective, our framework seamlessly integrates various numerical schemes that are typically analyzed independently. \nFor example, our framework provides a unified treatment of optimizing an infinite-width two-layer neural network and sampling via Stein Variational Gradient descent, which were previously studied in isolation.", "keywords": "Stochastic Approximation;Mean-Field Dynamics;Dynamical Systems;Neural Networks;Sampling", "primary_area": "", "supplementary_material": "/attachment/118d8279bb8ec45ce39a35e0b1066b81e77aef80.pdf", "author": "Mohammad Reza Karimi Jaghargh;Ya-Ping Hsieh;Andreas Krause", "authorids": "~Mohammad_Reza_Karimi_Jaghargh1;~Ya-Ping_Hsieh1;~Andreas_Krause1", "gender": "M;M;M", "homepage": "http://moreka.github.io;;https://las.inf.ethz.ch/krausea", "dblp": "https://dblp.uni-trier.de/pers/hd/k/Karimi:Mohammad_Reza;122/5313;87/1831-1.html", "google_scholar": "CEZbTgMAAAAJ;;https://scholar.google.ch/citations?user=eDHv58AAAAAJ", "orcid": ";;0000-0001-7260-9673", "linkedin": ";;krausea/", "or_profile": "~Mohammad_Reza_Karimi_Jaghargh1;~Ya-Ping_Hsieh1;~Andreas_Krause1", "aff": "Swiss Federal Institute of Technology;Department of Computer Science, ETHZ - ETH Zurich;ETH Zurich", "aff_domain": "ethz.ch;inf.ethz.ch;ethz.ch", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\njaghargh2023stochastic,\ntitle={Stochastic Approximation Algorithms for Systems of Interacting Particles},\nauthor={Mohammad Reza Karimi Jaghargh and Ya-Ping Hsieh and Andreas Krause},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=X6mwdEVYvc}\n}", "github": "", "project": "", "reviewers": "77LN;S8Cs;UKxL;MJDT", "pdf_size": 377669, "rating": "6;6;6;9", "confidence": "4;2;4;3", "soundness": "3;4;3;4", "novelty": "2;2;2;4", "presentation": "3;4;3;4", "wc_summary": "94;20;110;172", "wc_strengths": "100;21;51;114", "wc_weaknesses": "266;48;251;124", "wc_questions": "125;38;9;114", "wc_limitations": "12;11;22;17", "wc_review": "597;138;443;541", "wc_reply_reviewers": "44;20;17;0", "wc_reply_authors": "32;51;18;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 99.0, 54.120236510939236 ], "wc_strengths_avg": [ 71.5, 37.379807383131336 ], "wc_weaknesses_avg": [ 172.25, 90.49412964386143 ], "wc_questions_avg": [ 71.5, 49.23667332385485 ], "wc_limitations_avg": [ 15.5, 4.387482193696061 ], "wc_review_avg": [ 429.75, 177.23060542694085 ], "wc_reply_reviewers_avg": [ 20.25, 15.690363284513205 ], "wc_reply_authors_avg": [ 25.25, 18.699933154960743 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7480502763016981750&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ethz.ch;inf.ethz.ch;ethz.ch", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Swiss Federal Institute of Technology;ETH Zurich", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich;ETHZ", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Self-Weighted Contrastive Learning among Multiple Views for Mitigating Representation Degeneration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71375", "id": "X8dbFcAox2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/03b13b0db740b95cb741e007178ef5e5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=X8dbFcAox2", "openreview": "https://openreview.net/forum?id=X8dbFcAox2", "poster": "/media/PosterPDFs/NeurIPS%202023/71375.png?t=1697007031.2361987", "slides": "https://nips.cc/virtual/2023/poster/71375", "video": "https://nips.cc/virtual/2023/poster/71375", "author_site": "Jie Xu, Shuo Chen, Yazhou Ren, Xiaoshuang Shi, Hengtao Shen, Gang Niu, Xiaofeng Zhu", "tldr": "", "abstract": "Recently, numerous studies have demonstrated the effectiveness of contrastive learning (CL), which learns feature representations by pulling in positive samples while pushing away negative samples. Many successes of CL lie in that there exists semantic consistency between data augmentations of the same instance. In multi-view scenarios, however, CL might cause representation degeneration when the collected multiple views inherently have inconsistent semantic information or their representations subsequently do not capture sufficient discriminative information. To address this issue, we propose a novel framework called SEM: SElf-weighted Multi-view contrastive learning with reconstruction regularization. Specifically, SEM is a general framework where we propose to first measure the discrepancy between pairwise representations and then minimize the corresponding self-weighted contrastive loss, and thus making SEM adaptively strengthen the useful pairwise views and also weaken the unreliable pairwise views. Meanwhile, we impose a self-supervised reconstruction term to regularize the hidden features of encoders, to assist CL in accessing sufficient discriminative information of data. Experiments on public multi-view datasets verified that SEM can mitigate representation degeneration in existing CL methods and help them achieve significant performance improvements. Ablation studies also demonstrated the effectiveness of SEM with different options of weighting strategies and reconstruction terms.", "keywords": "Multi-view learning;Contrastive learning;Representation degeneration;Self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/306b6814794561752f4d81b12432c6533a33b603.pdf", "author": "Jie Xu;Shuo Chen;Yazhou Ren;Xiaoshuang Shi;Heng Tao Shen;Gang Niu;Xiaofeng Zhu", "authorids": "~Jie_Xu8;~Shuo_Chen8;~Yazhou_Ren1;~Xiaoshuang_Shi1;~Heng_Tao_Shen3;~Gang_Niu1;~Xiaofeng_Zhu7", "gender": "M;M;M;Not Specified;M;M;M", "homepage": "https://submissionsin.github.io;https://shuochenya.github.io/;https://yazhou-ren.github.io/;http://plaza.ufl.edu/xsshi2015/;https://niug1984.github.io;https://sites.google.com/site/seanzhuxf/;https://cfm.uestc.edu.cn/~shenht/", "dblp": "37/5126-44;00/6472-3.html;157/2928;87/10627;26/3367-1;60/4671-1;s/HTShen", "google_scholar": "YT1_9swAAAAJ;vlu_3ksAAAAJ;https://scholar.google.com/citations?hl=en;BWGQt3YAAAAJ;https://scholar.google.co.jp/citations?user=HOkcy00AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=krryaDkAAAAJ", "orcid": "0000-0003-1675-1821;;;;;0000-0001-6840-0578;", "linkedin": ";;;;;;", "or_profile": "~Jie_Xu8;~Shuo_Chen8;~Yazhou_Ren1;~Xiaoshuang_Shi1;~Gang_Niu1;~Xiaofeng_Zhu7;~Hengtao_Shen1", "aff": "University of Electronic Science and Technology of China;RIKEN;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;RIKEN;University of Electronic Science and Technology of China;", "aff_domain": "uestc.edu.cn;riken.jp;uestc.edu.cn;uestc.edu.cn;riken.jp;uestc.edu.cn;", "position": "PhD student;Postdoc;Associate Professor;Full Professor;Research Scientist (tenured);Full Professor;", "bibtex": "@inproceedings{\nxu2023selfweighted,\ntitle={Self-Weighted Contrastive Learning among Multiple Views for Mitigating Representation Degeneration},\nauthor={Jie Xu and Shuo Chen and Yazhou Ren and Xiaoshuang Shi and Heng Tao Shen and Gang Niu and Xiaofeng Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=X8dbFcAox2}\n}", "github": "", "project": "", "reviewers": "tvpR;gird;tV6r;dnwM;USXC", "pdf_size": 1656272, "rating": "6;6;7;7;7", "confidence": "4;5;3;5;4", "soundness": "3;3;4;4;4", "novelty": "3;3;3;4;3", "presentation": "3;4;3;3;3", "wc_summary": "51;52;155;26;73", "wc_strengths": "105;101;84;79;99", "wc_weaknesses": "81;65;192;72;107", "wc_questions": "24;3;50;17;2", "wc_limitations": "11;13;20;9;8", "wc_review": "272;234;501;203;289", "wc_reply_reviewers": "34;0;0;13;17", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 71.4, 44.373866182698116 ], "wc_strengths_avg": [ 93.6, 10.190191362285598 ], "wc_weaknesses_avg": [ 103.4, 46.52999032881911 ], "wc_questions_avg": [ 19.2, 17.52027397046062 ], "wc_limitations_avg": [ 12.2, 4.261455150532503 ], "wc_review_avg": [ 299.8, 104.94836825792005 ], "wc_reply_reviewers_avg": [ 12.8, 12.607934009979589 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.32732683535398854, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14833129799243168084&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "uestc.edu.cn;riken.jp;uestc.edu.cn;uestc.edu.cn;riken.jp;uestc.edu.cn;", "author_num": 7, "aff_unique_index": "0;1;0;0;1;0", "aff_unique_norm": "University of Electronic Science and Technology of China;RIKEN", "aff_unique_dep": ";", "aff_unique_url": "https://www.uestc.edu.cn;https://www.riken.jp", "aff_unique_abbr": "UESTC;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1;0", "aff_country_unique": "China;Japan" }, { "title": "Non-Convex Bilevel Optimization with Time-Varying Objective Functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71374", "id": "X9Vjq9Fuhq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5ee60ca5686bbcf756e56a6c75e66f32-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=X9Vjq9Fuhq", "openreview": "https://openreview.net/forum?id=X9Vjq9Fuhq", "poster": "/media/PosterPDFs/NeurIPS%202023/71374.png?t=1702270381.1668403", "slides": "https://nips.cc/virtual/2023/poster/71374", "video": "https://nips.cc/virtual/2023/poster/71374", "author_site": "Sen Lin, Daouda Sow, Kaiyi Ji, Yingbin Liang, Ness Shroff", "tldr": "", "abstract": "Bilevel optimization has become a powerful tool in a wide variety of machine learning problems. However, the current nonconvex bilevel optimization considers an offline dataset and static functions, which may not work well in emerging online applications with streaming data and time-varying functions. In this work, we study online bilevel optimization (OBO) where the functions can be time-varying and the agent continuously updates the decisions with online streaming data. To deal with the function variations and the unavailability of the true hypergradients in OBO, we propose a single-loop online bilevel optimizer with window averaging (SOBOW), which updates the outer-level decision based on a window average of the most recent hypergradient estimations stored in the memory. Compared to existing algorithms, SOBOW is computationally efficient and does not need to know previous functions. To handle the unique technical difficulties rooted in single-loop update and function variations for OBO, we develop a novel analytical technique that disentangles the complex couplings between decision variables, and carefully controls the hypergradient estimation error. We show that SOBOW can achieve a sublinear bilevel local regret under mild conditions. Extensive experiments across multiple domains corroborate the effectiveness of SOBOW.", "keywords": "Bilevel Optimization;Time-Varying Functions;Single-Loop;Sublinear Bilevel Local Regret", "primary_area": "", "supplementary_material": "/attachment/58e36aba1ee8cd41b75d2bb4b092a25ed535dc39.zip", "author": "Sen Lin;Daouda Sow;Kaiyi Ji;Yingbin Liang;Ness Shroff", "authorids": "~Sen_Lin1;~Daouda_Sow1;~Kaiyi_Ji1;~Yingbin_Liang1;~Ness_Shroff1", "gender": ";M;M;F;M", "homepage": "https://slin70.github.io/;https://sowmaster.github.io;https://cse.buffalo.edu/~kaiyiji/;https://sites.google.com/view/yingbinliang/home;http://newslab.ece.ohio-state.edu/", "dblp": "70/9499-1.html;;205/3164;51/332;67/1991", "google_scholar": "94-TbUsAAAAJ;https://scholar.google.com/citations?hl=en;E0A3lSIAAAAJ;lGgLAiIAAAAJ;https://scholar.google.com.tw/citations?user=5kL-ZrAAAAAJ", "orcid": ";;;;0000-0002-4606-6879", "linkedin": ";;;;nessshroff/", "or_profile": "~Sen_Lin1;~Daouda_Sow1;~Kaiyi_Ji1;~Yingbin_Liang1;~Ness_Shroff1", "aff": "Ohio State University, Columbus;Ohio State University;State University of New York at Buffalo;The Ohio State University;Ohio State University, Columbus", "aff_domain": "osu.edu;osu.edu;buffalo.edu;osu.edu;osu.edu", "position": "Postdoc;PhD student;Assistant Professor;Professor;Full Professor", "bibtex": "@inproceedings{\nlin2023nonconvex,\ntitle={Non-Convex Bilevel Optimization with Time-Varying Objective Functions},\nauthor={Sen Lin and Daouda Sow and Kaiyi Ji and Yingbin Liang and Ness Shroff},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=X9Vjq9Fuhq}\n}", "github": "", "project": "", "reviewers": "98z6;M56B;ouS8;Ue3m", "pdf_size": 516422, "rating": "5;6;7;7", "confidence": "4;3;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;4", "presentation": "3;3;4;3", "wc_summary": "34;185;207;90", "wc_strengths": "48;56;213;114", "wc_weaknesses": "209;184;292;50", "wc_questions": "54;1;250;2", "wc_limitations": "1;1;20;1", "wc_review": "346;427;982;257", "wc_reply_reviewers": "169;9;41;13", "wc_reply_authors": "766;0;236;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;4;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 129.0, 70.29580357318636 ], "wc_strengths_avg": [ 107.75, 65.8876885313182 ], "wc_weaknesses_avg": [ 183.75, 86.95508898276167 ], "wc_questions_avg": [ 76.75, 102.29705518733176 ], "wc_limitations_avg": [ 5.75, 8.227241335952167 ], "wc_review_avg": [ 503.0, 283.01148386593786 ], "wc_reply_reviewers_avg": [ 58.0, 65.26101439603893 ], "wc_reply_authors_avg": [ 250.5, 312.83022552176766 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15333074216805688399&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "osu.edu;osu.edu;buffalo.edu;osu.edu;osu.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Ohio State University;State University of New York at Buffalo", "aff_unique_dep": ";", "aff_unique_url": "https://www.osu.edu;https://www.buffalo.edu", "aff_unique_abbr": "OSU;SUNY Buffalo", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Columbus;;Buffalo", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Binarized Neural Machine Translation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71373", "id": "XAyPlfmWpu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bd1fc5cbedfe4d90d0ac2d23966fa27e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XAyPlfmWpu", "openreview": "https://openreview.net/forum?id=XAyPlfmWpu", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71373", "video": "https://nips.cc/virtual/2023/poster/71373", "author_site": "Yichi Zhang, Ankush Garg, Yuan Cao, Lukasz Lew, Behrooz Ghorbani, Zhiru Zhang, Orhan Firat", "tldr": "", "abstract": "The rapid scaling of language models is motivating research using low-bitwidth quantization.\nIn this work, we propose a novel binarization technique for Transformers applied to machine translation (BMT), the first of its kind. We identify and address the problem of inflated dot-product variance when using one-bit weights and activations. Specifically, BMT leverages additional LayerNorms and residual connections to improve binarization quality. Experiments on the WMT dataset show that a one-bit weight-only Transformer can achieve the same quality as a float one, while being 16$\\times$ smaller in size. One-bit activations incur varying degrees of quality drop, but mitigated by the proposed architectural changes. We further conduct a scaling law study using production-scale translation datasets, which shows that one-bit weight Transformers scale and generalize well in both in-domain and out-of-domain settings. Implementation in JAX/Flax will be open sourced.", "keywords": "neural network quantization;binarized transformer;machine translation;scaling law", "primary_area": "", "supplementary_material": "/attachment/9fcee1d1c215b758e0bcaa03ff946bff19916775.zip", "author": "Yichi Zhang;Ankush Garg;Yuan Cao;Lukasz Lew;Behrooz Ghorbani;Zhiru Zhang;Orhan Firat", "authorids": "~Yichi_Zhang2;~Ankush_Garg1;~Yuan_Cao2;~Lukasz_Lew1;~Behrooz_Ghorbani1;~Zhiru_Zhang2;~Orhan_Firat1", "gender": "M;M;M;M;;M;M", "homepage": "https://ychzhang.github.io/;;;;;https://www.csl.cornell.edu/~zhiruz;", "dblp": "86/7054;86/7221;52/4472-7.html;52/5311;162/0166;81/4227;120/2225", "google_scholar": "XrUhMgwAAAAJ;https://scholar.google.com/citations?hl=en;Q82vvqcAAAAJ;q65lmCAAAAAJ;;https://scholar.google.com.tw/citations?user=x05pUHsAAAAJ;https://scholar.google.com.tr/citations?user=dLaR9lgAAAAJ", "orcid": ";;0000-0002-1267-8930;;;;", "linkedin": ";agbgarg/;;;;;", "or_profile": "~Yichi_Zhang2;~Ankush_Garg1;~Yuan_Cao2;~Lukasz_Lew1;~Behrooz_Ghorbani1;~Zhiru_Zhang2;~Orhan_Firat1", "aff": "Cornell University;Google;Google DeepMind;Research, Google;Google;Cornell University;Google", "aff_domain": "cornell.edu;google.com;google.com;research.google.com;google.com;cornell.edu;google.com", "position": "PhD student;research engineer;Research scientist;Researcher;Researcher;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nzhang2023binarized,\ntitle={Binarized Neural Machine Translation},\nauthor={Yichi Zhang and Ankush Garg and Yuan Cao and Lukasz Lew and Behrooz Ghorbani and Zhiru Zhang and Orhan Firat},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XAyPlfmWpu}\n}", "github": "", "project": "", "reviewers": "HM3D;B1hS;jHYb;arnv;BTaK", "pdf_size": 9167187, "rating": "5;6;6;6;8", "confidence": "4;4;4;4;3", "soundness": "3;3;3;2;4", "novelty": "3;3;3;2;3", "presentation": "3;4;3;2;4", "wc_summary": "95;51;130;90;100", "wc_strengths": "78;61;143;97;44", "wc_weaknesses": "71;43;69;179;41", "wc_questions": "95;1;49;41;2", "wc_limitations": "11;1;1;13;8", "wc_review": "350;157;392;420;195", "wc_reply_reviewers": "4;0;12;141;4", "wc_reply_authors": "0;0;0;318;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 93.2, 25.2776581193749 ], "wc_strengths_avg": [ 84.6, 34.09750724026611 ], "wc_weaknesses_avg": [ 80.6, 50.776372458063605 ], "wc_questions_avg": [ 37.6, 34.76550014022522 ], "wc_limitations_avg": [ 6.8, 4.995998398718719 ], "wc_review_avg": [ 302.8, 106.58217486990965 ], "wc_reply_reviewers_avg": [ 32.2, 54.53952695064379 ], "wc_reply_authors_avg": [ 63.6, 127.2 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9185586535436918, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12048263008364333774&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "cornell.edu;google.com;google.com;research.google.com;google.com;cornell.edu;google.com", "author_num": 7, "aff_unique_index": "0;1;1;1;1;0;1", "aff_unique_norm": "Cornell University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.cornell.edu;https://www.google.com", "aff_unique_abbr": "Cornell;Google", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "GAIA: Delving into Gradient-based Attribution Abnormality for Out-of-distribution Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71372", "id": "XEBzQP3e7B", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fcdccd419c4dc471fa3b73ec97b53789-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XEBzQP3e7B", "openreview": "https://openreview.net/forum?id=XEBzQP3e7B", "poster": "/media/PosterPDFs/NeurIPS%202023/71372.png?t=1699261024.268454", "slides": "https://nips.cc/virtual/2023/poster/71372", "video": "https://nips.cc/virtual/2023/poster/71372", "author_site": "Jinggang Chen, Junjie Li, Xiaoyang Qu, Jianzong Wang, Jiguang Wan, Jing Xiao", "tldr": "", "abstract": "Detecting out-of-distribution (OOD) examples is crucial to guarantee the reliability and safety of deep neural networks in real-world settings. In this paper, we offer an innovative perspective on quantifying the disparities between in-distribution (ID) and OOD data---analyzing the uncertainty that arises when models attempt to explain their predictive decisions. This perspective is motivated by our observation that gradient-based attribution methods encounter challenges in assigning feature importance to OOD data, thereby yielding divergent explanation patterns. Consequently, we investigate how attribution gradients lead to uncertain explanation outcomes and introduce two forms of abnormalities for OOD detection: the zero-deflation abnormality and the channel-wise average abnormality. We then propose GAIA, a simple and effective approach that incorporates Gradient Abnormality Inspection and Aggregation. The effectiveness of GAIA is validated on both commonly utilized (CIFAR) and large-scale (ImageNet-1k) benchmarks. Specifically, GAIA reduces the average FPR95 by 23.10% on CIFAR10 and by 45.41% on CIFAR100 compared to advanced post-hoc methods.", "keywords": "out-of-distribution detection;distribution shifts;attribution gradients", "primary_area": "", "supplementary_material": "/attachment/b9c785ecc2a0115df776417a128558a0efa90107.pdf", "author": "Jinggang Chen;Junjie Li;Xiaoyang Qu;Jianzong Wang;Jiguang Wan;Jing Xiao", "authorids": "~Jinggang_Chen1;~Junjie_Li6;~Xiaoyang_Qu1;~Jianzong_Wang2;~Jiguang_Wan1;~Jing_Xiao3", "gender": ";;M;M;;M", "homepage": ";;;https://largeaudiomodel.com/author/jianzong-wang/;;http://www.cs.cmu.edu/~jxiao/", "dblp": ";;;70/8380;;67/4008-6.html", "google_scholar": ";;BnQE7yUAAAAJ;https://scholar.google.co.uk/citations?user=noi4qcUAAAAJ;;mcBd8KUAAAAJ", "orcid": ";;;0000-0002-9237-4231;;0000-0001-9615-4749", "linkedin": ";;;;;jing-xiao-8653051/", "or_profile": "~Jinggang_Chen1;~Junjie_Li6;~Xiaoyang_Qu1;~Jianzong_Wang2;~Jiguang_Wan1;~Jing_Xiao3", "aff": ";;Pingan Technology;Pingan Technology;;Pingan Group", "aff_domain": ";;pingan.com.cn;pingan.com.cn;;pingan.com.cn", "position": ";;Researcher;Researcher;;Chief Scientist", "bibtex": "@inproceedings{\nchen2023gaia,\ntitle={{GAIA}: Delving into Gradient-based Attribution Abnormality for Out-of-distribution Detection},\nauthor={Jinggang Chen and Junjie Li and Xiaoyang Qu and Jianzong Wang and Jiguang Wan and Jing Xiao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XEBzQP3e7B}\n}", "github": "", "project": "", "reviewers": "CiZ8;qAeX;kfJE;TjRi;Exp8", "pdf_size": 2704370, "rating": "5;5;5;5;6", "confidence": "4;3;4;4;4", "soundness": "3;2;3;2;3", "novelty": "2;2;2;2;3", "presentation": "3;2;3;3;3", "wc_summary": "55;102;84;162;99", "wc_strengths": "45;119;56;30;105", "wc_weaknesses": "98;282;206;145;144", "wc_questions": "76;7;65;126;67", "wc_limitations": "33;7;1;1;125", "wc_review": "307;517;412;464;540", "wc_reply_reviewers": "110;16;0;53;114", "wc_reply_authors": "230;32;0;329;692", "reply_reviewers": "1;1;0;2;2", "reply_authors": "2;2;1;3;3", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 100.4, 35.01199794356214 ], "wc_strengths_avg": [ 71.0, 34.762048271067115 ], "wc_weaknesses_avg": [ 175.0, 63.56099432828282 ], "wc_questions_avg": [ 68.2, 37.83860462543512 ], "wc_limitations_avg": [ 33.4, 47.30158559710234 ], "wc_review_avg": [ 448.0, 83.25623099804602 ], "wc_reply_reviewers_avg": [ 58.6, 46.885392181360714 ], "wc_reply_authors_avg": [ 256.6, 249.72432800990777 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.2500000000000001, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3227119837893693938&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";;pingan.com.cn;pingan.com.cn;;pingan.com.cn", "author_num": 6, "aff_unique_index": "0;0;1", "aff_unique_norm": "PingAn Technology;Ping An Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.pingan.com;https://www.pingan.com.cn", "aff_unique_abbr": ";Ping An", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "An Inductive Bias for Tabular Deep Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71371", "id": "XEUc1JegGt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8671b6dffc08b4fcf5b8ce26799b2bef-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XEUc1JegGt", "openreview": "https://openreview.net/forum?id=XEUc1JegGt", "poster": "/media/PosterPDFs/NeurIPS%202023/71371.png?t=1699590328.5084965", "slides": "https://nips.cc/virtual/2023/poster/71371", "video": "https://nips.cc/virtual/2023/poster/71371", "author_site": "Ege Beyazit, Jonathan Kozaczuk, Bo Li, Vanessa Wallace, Bilal Fadlallah", "tldr": "", "abstract": "Deep learning methods have achieved state-of-the-art performance in most modeling tasks involving images, text and audio, however, they typically underperform tree-based methods on tabular data. In this paper, we hypothesize that a significant contributor to this performance gap is the interaction between irregular target functions resulting from the heterogeneous nature of tabular feature spaces, and the well-known tendency of neural networks to learn smooth functions. Utilizing tools from spectral analysis, we show that functions described by tabular datasets often have high irregularity, and that they can be smoothed by transformations such as scaling and ranking in order to improve performance. However, because these transformations tend to lose information or negatively impact the loss landscape during optimization, they need to be rigorously fine-tuned for each feature to achieve performance gains. To address these problems, we propose introducing frequency reduction as an inductive bias. We realize this bias as a neural network layer that promotes learning low-frequency representations of the input features, allowing the network to operate in a space where the target function is more regular. Our proposed method introduces less computational complexity than a fully connected layer, while significantly improving neural network performance, and speeding up its convergence on 14 tabular datasets.", "keywords": "Tabular Deep Learning;Spectral Bias;Neural Networks", "primary_area": "", "supplementary_material": "", "author": "Ege Beyazit;Jonathan Kozaczuk;Bo Li;Vanessa Wallace;Bilal H Fadlallah", "authorids": "~Ege_Beyazit1;~Jonathan_Kozaczuk1;~Bo_Li44;vwall@amazon.com;~Bilal_H_Fadlallah1", "gender": "M;M;M;;", "homepage": ";;https://www.linkedin.com/in/bo-li-1221a381/;;https://scholar.google.com/citations?hl=en&user=JEb1xaAAAAAJ", "dblp": "201/4232;;;;", "google_scholar": ";oLVspLIAAAAJ;2iDLBSkAAAAJ;;", "orcid": ";0000-0002-1801-7965;;;", "linkedin": ";;;;", "or_profile": "~Ege_Beyazit1;~Jonathan_Kozaczuk1;~Bo_Li44;vwall@amazon.com;~Bilal_H_Fadlallah1", "aff": "Amazon;Amazon;Amazon;;", "aff_domain": "amazon.com;amazon.com;amazon.com;;", "position": "Researcher;Researcher;Researcher;;", "bibtex": "@inproceedings{\nbeyazit2023an,\ntitle={An Inductive Bias for Tabular Deep Learning},\nauthor={Ege Beyazit and Jonathan Kozaczuk and Bo Li and Vanessa Wallace and Bilal H Fadlallah},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XEUc1JegGt}\n}", "github": "", "project": "", "reviewers": "uyTw;CXbH;H86d;24qS", "pdf_size": 15685180, "rating": "5;6;6;7", "confidence": "3;4;3;5", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;3;2;3", "wc_summary": "117;128;59;664", "wc_strengths": "63;57;160;2", "wc_weaknesses": "69;341;136;2", "wc_questions": "64;63;135;2", "wc_limitations": "79;11;1;2", "wc_review": "392;600;491;672", "wc_reply_reviewers": "22;21;63;130", "wc_reply_authors": "0;0;0;515", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 242.0, 245.04795449054456 ], "wc_strengths_avg": [ 70.5, 56.879258082362504 ], "wc_weaknesses_avg": [ 137.0, 126.95077786291819 ], "wc_questions_avg": [ 66.0, 47.090338711884414 ], "wc_limitations_avg": [ 23.25, 32.42202183701689 ], "wc_review_avg": [ 538.75, 106.44570212084658 ], "wc_reply_reviewers_avg": [ 59.0, 44.35651023243375 ], "wc_reply_authors_avg": [ 128.75, 223.00154147449294 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11492556018944065319&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "amazon.com;amazon.com;amazon.com;;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Regularized Monotone Graphon Mean-Field Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71370", "id": "XF923QPCGw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d4c2f25bf0c33065b7d4fb9be2a9add1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XF923QPCGw", "openreview": "https://openreview.net/forum?id=XF923QPCGw", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71370", "video": "https://nips.cc/virtual/2023/poster/71370", "author_site": "Fengzhuo Zhang, Vincent Tan, Zhaoran Wang, Zhuoran Yang", "tldr": "", "abstract": "This paper studies two fundamental problems in regularized Graphon Mean-Field Games (GMFGs). First, we establish the existence of a Nash Equilibrium (NE) of any $\\lambda$-regularized GMFG (for $\\lambda\\geq 0$). This result relies on weaker conditions than previous works analyzing both unregularized GMFGs ($\\lambda=0$) and $\\lambda$-regularized MFGs, which are special cases of GMFGs. Second, we propose provably efficient algorithms to learn the NE in weakly monotone GMFGs, motivated by Lasry and Lions (2007). Previous literature either only analyzed continuous-time algorithms or required extra conditions to analyze discrete-time algorithms. In contrast, we design a discrete-time algorithm and derive its convergence rate solely under weakly monotone conditions. Furthermore, we develop and analyze the action-value function estimation procedure during the online learning process, which is absent from algorithms for monotone GMFGs. This serves as a sub-module in our optimization algorithm. The efficiency of the designed algorithm is corroborated by empirical evaluations.", "keywords": "mean-field approximation;graphon games;multi-agent reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/c164e5230d9ede0d5835ed0219145bb4f9a3ef0c.zip", "author": "Fengzhuo Zhang;Vincent Tan;Zhaoran Wang;Zhuoran Yang", "authorids": "~Fengzhuo_Zhang1;~Vincent_Tan1;~Zhaoran_Wang1;~Zhuoran_Yang1", "gender": "M;M;Not Specified;M", "homepage": ";https://www.ece.nus.edu.sg/stfpage/vtan/pubs.htm;https://zhaoranwang.github.io/;https://zhuoranyang.github.io/", "dblp": "254/1627;60/2327;117/2756;", "google_scholar": ";dJoAVvAAAAAJ;https://scholar.google.com.tw/citations?user=HSx0BgQAAAAJ;", "orcid": ";0000-0002-5008-4527;;", "linkedin": "%E4%B8%B0%E5%8D%93-%E5%BC%A0-4576a5135/;;;", "or_profile": "~Fengzhuo_Zhang1;~Vincent_Tan1;~Zhaoran_Wang1;~Zhuoran_Yang1", "aff": "National University of Singapore;National University of Singapore;;Yale University", "aff_domain": "nus.edu;nus.edu.sg;;yale.edu", "position": "PhD student;Full Professor;;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023learning,\ntitle={Learning Regularized Monotone Graphon Mean-Field Games},\nauthor={Fengzhuo Zhang and Vincent Tan and Zhaoran Wang and Zhuoran Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XF923QPCGw}\n}", "github": "", "project": "", "reviewers": "NT3m;eGqz;veSy;Epba", "pdf_size": 804055, "rating": "6;6;7;7", "confidence": "2;3;3;3", "soundness": "3;4;3;4", "novelty": "3;2;3;3", "presentation": "3;4;3;3", "wc_summary": "135;159;57;296", "wc_strengths": "77;54;104;41", "wc_weaknesses": "50;91;94;90", "wc_questions": "174;43;134;88", "wc_limitations": "14;1;13;14", "wc_review": "450;348;402;529", "wc_reply_reviewers": "4;65;23;15", "wc_reply_authors": "0;771;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;3;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 161.75, 86.19563504029657 ], "wc_strengths_avg": [ 69.0, 23.96872962841377 ], "wc_weaknesses_avg": [ 81.25, 18.102140757380052 ], "wc_questions_avg": [ 109.75, 49.10384404504397 ], "wc_limitations_avg": [ 10.5, 5.5 ], "wc_review_avg": [ 432.25, 66.49953007352758 ], "wc_reply_reviewers_avg": [ 26.75, 23.09085316743407 ], "wc_reply_authors_avg": [ 192.75, 333.8527931589011 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=557526753359233077&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "nus.edu;nus.edu.sg;;yale.edu", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "National University of Singapore;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.yale.edu", "aff_unique_abbr": "NUS;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Singapore;United States" }, { "title": "Convergence analysis of ODE models for accelerated first-order methods via positive semidefinite kernels", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71369", "id": "XFE6zpevLc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c70741145c2c4f1d0c2e91b98729a49a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XFE6zpevLc", "openreview": "https://openreview.net/forum?id=XFE6zpevLc", "poster": "/media/PosterPDFs/NeurIPS%202023/71369.png?t=1701962192.069069", "slides": "https://nips.cc/virtual/2023/poster/71369", "video": "https://nips.cc/virtual/2023/poster/71369", "author_site": "Jungbin Kim, Insoon Yang", "tldr": "", "abstract": "We propose a novel methodology that systematically analyzes ordinary differential equation (ODE) models for first-order optimization methods by converting the task of proving convergence rates into verifying the positive semidefiniteness of specific Hilbert-Schmidt integral operators. Our approach is based on the performance estimation problems (PEP) introduced by Drori and Teboulle. Unlike previous works on PEP, which rely on finite-dimensional linear algebra, we use tools from functional analysis. Using the proposed method, we establish convergence rates of various accelerated gradient flow models, some of which are new. As an immediate consequence of our framework, we show a correspondence between minimizing function values and minimizing gradient norms.", "keywords": "convex optimization;accelerated gradient methods", "primary_area": "", "supplementary_material": "/attachment/e76393412e4bc9801e8add1fc60e51fe8fad05d7.zip", "author": "Jungbin Kim;Insoon Yang", "authorids": "~Jungbin_Kim1;~Insoon_Yang2", "gender": "M;", "homepage": ";http://coregroup.snu.ac.kr/~yang/", "dblp": ";129/2417", "google_scholar": "https://scholar.google.com/citations?hl=ko;", "orcid": ";", "linkedin": ";", "or_profile": "~Jungbin_Kim1;~Insoon_Yang2", "aff": "Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr", "position": "Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nkim2023convergence,\ntitle={Convergence analysis of {ODE} models for accelerated first-order methods via positive semidefinite kernels},\nauthor={Jungbin Kim and Insoon Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XFE6zpevLc}\n}", "github": "", "project": "", "reviewers": "RgYv;48x5;pACc;PRzN;C3yE", "pdf_size": 817739, "rating": "5;6;6;7;7", "confidence": "4;4;5;3;4", "soundness": "2;4;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "55;175;93;68;114", "wc_strengths": "67;127;46;57;73", "wc_weaknesses": "91;68;216;45;62", "wc_questions": "272;122;220;3;36", "wc_limitations": "1;6;7;1;0", "wc_review": "486;498;582;174;285", "wc_reply_reviewers": "35;0;253;15;22", "wc_reply_authors": "0;0;565;0;0", "reply_reviewers": "1;0;2;1;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 101.0, 42.223216362565275 ], "wc_strengths_avg": [ 74.0, 28.042824394129774 ], "wc_weaknesses_avg": [ 96.4, 61.58766110187982 ], "wc_questions_avg": [ 130.6, 103.23875241400391 ], "wc_limitations_avg": [ 3.0, 2.898275349237888 ], "wc_review_avg": [ 405.0, 151.19523802024983 ], "wc_reply_reviewers_avg": [ 65.0, 94.67629059062253 ], "wc_reply_authors_avg": [ 113.0, 226.0 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17014923137412245869&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "snu.ac.kr;snu.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "DreamWaltz: Make a Scene with Complex 3D Animatable Avatars", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71368", "id": "XGXL1E8Yyo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0e769ec2c2cd99b6ad69c9d75113e386-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XGXL1E8Yyo", "openreview": "https://openreview.net/forum?id=XGXL1E8Yyo", "poster": "/media/PosterPDFs/NeurIPS%202023/71368.png?t=1699103905.73482", "slides": "https://nips.cc/virtual/2023/poster/71368", "video": "https://nips.cc/virtual/2023/poster/71368", "author_site": "Yukun Huang, Jianan Wang, Ailing Zeng, He CAO, Xianbiao Qi, Yukai Shi, Zheng-Jun Zha, Lei Zhang", "tldr": "", "abstract": "We present DreamWaltz, a novel framework for generating and animating complex 3D avatars given text guidance and parametric human body prior. While recent methods have shown encouraging results for text-to-3D generation of common objects, creating high-quality and animatable 3D avatars remains challenging. To create high-quality 3D avatars, DreamWaltz proposes 3D-consistent occlusion-aware Score Distillation Sampling (SDS) to optimize implicit neural representations with canonical poses. It provides view-aligned supervision via 3D-aware skeleton conditioning which enables complex avatar generation without artifacts and multiple faces. For animation, our method learns an animatable 3D avatar representation from abundant image priors of diffusion model conditioned on various poses, which could animate complex non-rigged avatars given arbitrary poses without retraining. Extensive evaluations demonstrate that DreamWaltz is an effective and robust approach for creating 3D avatars that can take on complex shapes and appearances as well as novel poses for animation. The proposed framework further enables the creation of complex scenes with diverse compositions, including avatar-avatar, avatar-object and avatar-scene interactions. See https://dreamwaltz3d.github.io/ for more vivid 3D avatar and animation results.", "keywords": "Avatar Generation;3D Content Creation;NeRF;Diffusion Model", "primary_area": "", "supplementary_material": "/attachment/279cb1628581ec0b991eef6384b4c385a51e5d6d.zip", "author": "Yukun Huang;Jianan Wang;Ailing Zeng;He CAO;Xianbiao Qi;Yukai Shi;Zheng-Jun Zha;Lei Zhang", "authorids": "~Yukun_Huang1;~Jianan_Wang2;~Ailing_Zeng1;~He_CAO1;~Xianbiao_Qi2;~Yukai_Shi3;~Zheng-Jun_Zha2;~Lei_Zhang23", "gender": "M;F;F;M;M;;M;M", "homepage": ";https://scholar.google.com/citations?user=mt5mvZ8AAAAJ&hl=en;https://ailingzeng.site/;https://github.com/CiaoHe;https://www.linkedin.com/in/xianbiao-qi-39617727/;https://shiyukai26.github.io/info/;;https://www.leizhang.org/", "dblp": "186/1316;49/6053,;226/4720;;118/3741;;23/1818;z/LeiZhang", "google_scholar": "lHb5gzoAAAAJ;mt5mvZ8AAAAJ;Tn7fzS8AAAAJ;tLZ2V2kAAAAJ;odjSydQAAAAJ;oQXfkSQAAAAJ;;fIlGZToAAAAJ", "orcid": "0000-0002-5322-2884;;;;;;;", "linkedin": ";;%E7%88%B1%E7%8E%B2-%E6%9B%BE-65504112a/;he-cao/;;;;", "or_profile": "~Yukun_Huang1;~Jianan_Wang2;~Ailing_Zeng1;~He_CAO1;~Xianbiao_Qi2;~Yukai_Shi3;~Zheng-Jun_Zha2;~Lei_Zhang1", "aff": "University of Science and Technology of China;International Digital Economy Academy (IDEA);International Digital Economy Academy;Hong Kong University of Science and Technology;International Digital Economy Academy;Tsinghua University;University of Science and Technology of China;International Digital Economy Academy", "aff_domain": "ustc.edu.cn;idea.edu.cn;idea.edu.cn;ust.hk;idea.edu.cn;mail.tsinghua.edu.cn;ustc.edu.cn;idea.edu.cn", "position": "PhD student;Researcher;Researcher;PhD student;Researcher;PhD student;Full Professor;Chief Scientist", "bibtex": "@inproceedings{\nhuang2023dreamwaltz,\ntitle={DreamWaltz: Make a Scene with Complex 3D Animatable Avatars},\nauthor={Yukun Huang and Jianan Wang and Ailing Zeng and He CAO and Xianbiao Qi and Yukai Shi and Zheng-Jun Zha and Lei Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XGXL1E8Yyo}\n}", "github": "", "project": "", "reviewers": "rngx;DdUA;kXf3;1xa2", "pdf_size": 13066114, "rating": "4;4;4;5", "confidence": "5;4;5;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;2;2", "wc_summary": "62;73;42;100", "wc_strengths": "41;63;58;36", "wc_weaknesses": "139;265;294;163", "wc_questions": "107;33;15;86", "wc_limitations": "89;8;10;33", "wc_review": "438;442;419;418", "wc_reply_reviewers": "0;50;72;153", "wc_reply_authors": "0;85;118;102", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 4.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 69.25, 20.94486810653149 ], "wc_strengths_avg": [ 49.5, 11.280514172678478 ], "wc_weaknesses_avg": [ 215.25, 65.61392763735455 ], "wc_questions_avg": [ 60.25, 37.5458053582554 ], "wc_limitations_avg": [ 35.0, 32.687918257362305 ], "wc_review_avg": [ 429.25, 10.848386976873567 ], "wc_reply_reviewers_avg": [ 68.75, 55.19680697286755 ], "wc_reply_authors_avg": [ 76.25, 45.54324867639549 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15354775474694145501&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "ustc.edu.cn;idea.edu.cn;idea.edu.cn;ust.hk;idea.edu.cn;mail.tsinghua.edu.cn;ustc.edu.cn;idea.edu.cn", "author_num": 8, "aff_unique_index": "0;1;1;2;1;3;0;1", "aff_unique_norm": "University of Science and Technology of China;International Digital Economy Academy;Hong Kong University of Science and Technology;Tsinghua University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ustc.edu.cn;;https://www.ust.hk;https://www.tsinghua.edu.cn", "aff_unique_abbr": "USTC;IDEA;HKUST;THU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "title": "Cold Diffusion: Inverting Arbitrary Image Transforms Without Noise", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71367", "id": "XH3ArccntI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/80fe51a7d8d0c73ff7439c2a2554ed53-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XH3ArccntI", "openreview": "https://openreview.net/forum?id=XH3ArccntI", "poster": "/media/PosterPDFs/NeurIPS%202023/71367.png?t=1701826563.5551014", "slides": "https://nips.cc/virtual/2023/poster/71367", "video": "https://nips.cc/virtual/2023/poster/71367", "author_site": "Arpit Bansal, Eitan Borgnia, Hong-Min Chu, Jie Li, Hamid Kazemi, Furong Huang, Micah Goldblum, Jonas Geiping, Tom Goldstein", "tldr": "", "abstract": "Standard diffusion models involve an image transform -- adding Gaussian noise -- and an image restoration operator that inverts this degradation. We observe that the generative behavior of diffusion models is not strongly dependent on the choice of image degradation, and in fact, an entire family of generative models can be constructed by varying this choice. Even when using completely deterministic degradations (e.g., blur, masking, and more), the training and test-time update rules that underlie diffusion models can be easily generalized to create generative models. \nThe success of these fully deterministic models calls into question the community's understanding of diffusion models, which relies on noise in either gradient Langevin dynamics or variational inference and paves the way for generalized diffusion models that invert arbitrary processes.", "keywords": "Generative Models;Computer Vision;Diffusion Models", "primary_area": "", "supplementary_material": "/attachment/8d5dae08f8a135d18be7583917280c6315b30fc3.zip", "author": "Arpit Bansal;Eitan Borgnia;Hong-Min Chu;Jie S. Li;Hamid Kazemi;Furong Huang;Micah Goldblum;Jonas Geiping;Tom Goldstein", "authorids": "~Arpit_Bansal1;~Eitan_Borgnia1;~Hong-Min_Chu1;~Jie_S._Li1;~Hamid_Kazemi1;~Furong_Huang1;~Micah_Goldblum1;~Jonas_Geiping1;~Tom_Goldstein1", "gender": "M;;F;M;F;;M;M;M", "homepage": "https://eitanborgnia.com;;;;https://furong-huang.com;;https://jonasgeiping.github.io/;https://www.cs.umd.edu/~tomg/;https://arpitbansal297.github.io/", "dblp": ";185/0720;205/2248;;72/8513;241/7231;190/7229;25/8184;190/9114", "google_scholar": ";;https://scholar.google.com/citations?hl=en;7hNdaGQAAAAJ;13yyuCcAAAAJ;pGDKzuUAAAAJ;https://scholar.google.de/citations?user=206vNCEAAAAJ;KmSuVtgAAAAJ;Pchxm4IAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;;hamid-kazemi-608a8085/;;;;;arpit-bansal-970865b1/", "or_profile": "~Eitan_Borgnia1;~Hong-Min_Chu1;~Jie_S._Li1;~Hamid_Kazemi1;~Furong_Huang1;~Micah_Goldblum1;~Jonas_Geiping1;~Tom_Goldstein1;~Arpit_Amit_Bansal1", "aff": "University of Chicago;Department of Computer Science, University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland;New York University;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "uchicago.edu;cs.umd.edu;umd.edu;umd.edu;cs.umd.edu;nyu.edu;umd.edu;umd.edu;umd.edu", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor;Postdoc;Postdoc;Full Professor;PhD student", "bibtex": "@inproceedings{\nbansal2023cold,\ntitle={Cold Diffusion: Inverting Arbitrary Image Transforms Without Noise},\nauthor={Arpit Bansal and Eitan Borgnia and Hong-Min Chu and Jie S. Li and Hamid Kazemi and Furong Huang and Micah Goldblum and Jonas Geiping and Tom Goldstein},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XH3ArccntI}\n}", "github": "", "project": "", "reviewers": "VoTU;SFSB;pjEM;pgXS", "pdf_size": 21149067, "rating": "4;4;5;7", "confidence": "5;4;4;5", "soundness": "3;3;2;3", "novelty": "2;2;3;4", "presentation": "4;3;2;3", "wc_summary": "41;233;62;53", "wc_strengths": "15;129;173;56", "wc_weaknesses": "95;166;443;20", "wc_questions": "37;45;65;18", "wc_limitations": "90;15;4;25", "wc_review": "278;588;747;172", "wc_reply_reviewers": "74;51;40;9", "wc_reply_authors": "325;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 97.25, 78.72856851232595 ], "wc_strengths_avg": [ 93.25, 61.540129184134805 ], "wc_weaknesses_avg": [ 181.0, 159.8327250596698 ], "wc_questions_avg": [ 41.25, 16.857861667483217 ], "wc_limitations_avg": [ 33.5, 33.455193916640205 ], "wc_review_avg": [ 446.25, 231.33565981058777 ], "wc_reply_reviewers_avg": [ 43.5, 23.39337513057917 ], "wc_reply_authors_avg": [ 81.25, 140.72912811497127 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 287, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7258632450146614338&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "uchicago.edu;cs.umd.edu;umd.edu;umd.edu;cs.umd.edu;nyu.edu;umd.edu;umd.edu;umd.edu", "author_num": 9, "aff_unique_index": "0;1;2;2;2;3;2;2;2", "aff_unique_norm": "University of Chicago;University of Maryland, College Park;University of Maryland;New York University", "aff_unique_dep": ";Department of Computer Science;;", "aff_unique_url": "https://www.uchicago.edu;https://www/umd.edu;https://www/umd.edu;https://www.nyu.edu", "aff_unique_abbr": "UChicago;UMD;UMD;NYU", "aff_campus_unique_index": "1;1;1;1;1;1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Solving Linear Inverse Problems Provably via Posterior Sampling with Latent Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71366", "id": "XKBFdYwfRo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9c70cfa2e7d9328c649c94d50cbf8faf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XKBFdYwfRo", "openreview": "https://openreview.net/forum?id=XKBFdYwfRo", "poster": "/media/PosterPDFs/NeurIPS%202023/71366.png?t=1702579992.8200278", "slides": "https://nips.cc/virtual/2023/poster/71366", "video": "https://nips.cc/virtual/2023/poster/71366", "author_site": "Litu Rout, Negin Raoof, Giannis Daras, Constantine Caramanis, Alex Dimakis, Sanjay Shakkottai", "tldr": "", "abstract": "We present the first framework to solve linear inverse problems leveraging pre-trained \\textit{latent} diffusion models. Previously proposed algorithms (such as DPS and DDRM) only apply to \\textit{pixel-space} diffusion models. We theoretically analyze our algorithm showing provable sample recovery in a linear model setting. The algorithmic insight obtained from our analysis extends to more general settings often considered in practice. Experimentally, we outperform previously proposed posterior sampling algorithms in a wide variety of problems including random inpainting, block inpainting, denoising, deblurring, destriping, and super-resolution.", "keywords": "Inverse Problems;Posterior Sampling;Latent Diffusion Model;Stable Diffusion;Sample Recovery", "primary_area": "", "supplementary_material": "/attachment/cab425eacf24e2ed6b4bcfde5fb018e863aa3a3d.zip", "author": "Litu Rout;Negin Raoof;Giannis Daras;Constantine Caramanis;Alex Dimakis;Sanjay Shakkottai", "authorids": "~Litu_Rout1;~Negin_Raoof1;~Giannis_Daras1;~Constantine_Caramanis1;~Alex_Dimakis1;~Sanjay_Shakkottai1", "gender": "M;F;M;M;M;M", "homepage": "https://liturout.github.io/;https://neginraoof.com/;https://giannisdaras.github.io/;http://users.ece.utexas.edu/~cmcaram/constantine_caramanis/Home.html;https://people.eecs.berkeley.edu/~alexdimakis/;https://sites.google.com/view/sanjay-shakkottai/", "dblp": "206/6445;331/6053;254/2703;96/5760;19/5000.html;61/4596", "google_scholar": "https://scholar.google.co.in/citations?hl=en;3UPV-0UAAAAJ;LaScvbQAAAAJ;47YTUrEAAAAJ;JSFmVQEAAAAJ;", "orcid": ";;;;;", "linkedin": "litu-rout-sac-isro/;negin-raoof/;;;alex-dimakis-b1b20320/;", "or_profile": "~Litu_Rout1;~Negin_Raoof1;~Giannis_Daras1;~Constantine_Caramanis1;~Alex_Dimakis1;~Sanjay_Shakkottai1", "aff": "University of Texas at Austin;University of Texas at Austin;University of Texas, Austin;University of Texas, Austin;University of Texas at Austin;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nrout2023solving,\ntitle={Solving Linear Inverse Problems Provably via Posterior Sampling with Latent Diffusion Models},\nauthor={Litu Rout and Negin Raoof and Giannis Daras and Constantine Caramanis and Alex Dimakis and Sanjay Shakkottai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XKBFdYwfRo}\n}", "github": "", "project": "", "reviewers": "S3JW;xKok;yVXi;vbWT", "pdf_size": 36712851, "rating": "5;6;7;7", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;2", "wc_summary": "184;100;162;297", "wc_strengths": "32;52;101;86", "wc_weaknesses": "235;275;141;200", "wc_questions": "139;117;173;195", "wc_limitations": "2;8;1;1", "wc_review": "592;552;578;779", "wc_reply_reviewers": "405;38;17;253", "wc_reply_authors": "451;0;0;228", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 185.75, 71.23333138355947 ], "wc_strengths_avg": [ 67.75, 27.224758952100935 ], "wc_weaknesses_avg": [ 212.75, 49.19540120783649 ], "wc_questions_avg": [ 156.0, 30.083217912982647 ], "wc_limitations_avg": [ 3.0, 2.9154759474226504 ], "wc_review_avg": [ 625.25, 89.9204509552749 ], "wc_reply_reviewers_avg": [ 178.25, 160.2145046492358 ], "wc_reply_authors_avg": [ 169.75, 187.166202878618 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 81, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4508103504447828976&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "utexas.edu;utexas.edu;utexas.edu;utexas.edu;utexas.edu;utexas.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Incentives in Private Collaborative Machine Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71365", "id": "XKP3mAsNHd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/180f1a1de4244c009ff0848c55ae54a5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XKP3mAsNHd", "openreview": "https://openreview.net/forum?id=XKP3mAsNHd", "poster": "/media/PosterPDFs/NeurIPS%202023/71365.png?t=1701746288.6348302", "slides": "https://nips.cc/virtual/2023/poster/71365", "video": "https://nips.cc/virtual/2023/poster/71365", "author_site": "Rachael Sim, Yehong Zhang, Nghia Hoang, Xinyi Xu, Bryan Kian Hsiang Low, Patrick Jaillet", "tldr": "", "abstract": "Collaborative machine learning involves training models on data from multiple parties but must incentivize their participation. Existing data valuation methods fairly value and reward each party based on shared data or model parameters but neglect the privacy risks involved. To address this, we introduce _differential privacy_ (DP) as an incentive. Each party can select its required DP guarantee and perturb its _sufficient statistic_ (SS) accordingly. The mediator values the perturbed SS by the Bayesian surprise it elicits about the model parameters. As our valuation function enforces a _privacy-valuation trade-off_, parties are deterred from selecting excessive DP guarantees that reduce the utility of the grand coalition's model. Finally, the mediator rewards each party with different posterior samples of the model parameters. Such rewards still satisfy existing incentives like fairness but additionally preserve DP and a high similarity to the grand coalition's posterior. We empirically demonstrate the effectiveness and practicality of our approach on synthetic and real-world datasets.", "keywords": "Incentives;Privacy;Shapley fairness;Collaborative machine learning;data valuation;reward;sufficient statistics", "primary_area": "", "supplementary_material": "/attachment/881c6889d95f054f7fb72a8f7fe864c54fe7afcb.zip", "author": "Rachael Hwee Ling Sim;Yehong Zhang;Trong Nghia Hoang;Xinyi Xu;Bryan Kian Hsiang Low;Patrick Jaillet", "authorids": "~Rachael_Hwee_Ling_Sim1;~Yehong_Zhang1;~Trong_Nghia_Hoang1;~Xinyi_Xu4;~Bryan_Kian_Hsiang_Low1;~Patrick_Jaillet1", "gender": ";;;M;M;M", "homepage": ";;;https://xinyi-xu.com;http://www.comp.nus.edu.sg/~lowkh;http://web.mit.edu/jaillet/www/", "dblp": ";172/1145;;;97/4877;https://dblp.uni-trier.de/pers/hd/j/Jaillet:Patrick", "google_scholar": ";jGJHvCMAAAAJ;;2762GgsAAAAJ;https://scholar.google.com.tw/citations?user=2P-Q09UAAAAJ;ND0FM6EAAAAJ", "orcid": ";;;0000-0002-8744-0695;;0000-0002-8585-6566", "linkedin": ";;;xinyi-xu-a93222133/;;patrick-jaillet-1260445/", "or_profile": "~Rachael_Hwee_Ling_Sim1;~Yehong_Zhang1;~Trong_Nghia_Hoang1;~Xinyi_Xu4;~Bryan_Kian_Hsiang_Low1;~Patrick_Jaillet1", "aff": ";Pengcheng Laboratory;;National University of Singapore;National University of Singapore;Massachusetts Institute of Technology", "aff_domain": ";pcl.ac.cn;;nus.edu.sg;nus.edu.sg;mit.edu", "position": ";Research Scientist;;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsim2023incentives,\ntitle={Incentives in Private Collaborative Machine Learning},\nauthor={Rachael Hwee Ling Sim and Yehong Zhang and Trong Nghia Hoang and Xinyi Xu and Bryan Kian Hsiang Low and Patrick Jaillet},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XKP3mAsNHd}\n}", "github": "", "project": "", "reviewers": "kKLU;zMuU;oNeh;Rv9L", "pdf_size": 1302327, "rating": "6;6;7;7", "confidence": "4;3;3;1", "soundness": "3;2;3;4", "novelty": "2;2;3;3", "presentation": "3;4;3;4", "wc_summary": "212;73;268;251", "wc_strengths": "88;72;123;86", "wc_weaknesses": "204;355;96;49", "wc_questions": "12;51;170;2", "wc_limitations": "143;11;17;1", "wc_review": "659;562;674;389", "wc_reply_reviewers": "499;283;13;0", "wc_reply_authors": "845;582;0;0", "reply_reviewers": "3;2;1;0", "reply_authors": "4;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 201.0, 76.63876303803448 ], "wc_strengths_avg": [ 92.25, 18.793283374652763 ], "wc_weaknesses_avg": [ 176.0, 117.63715399481578 ], "wc_questions_avg": [ 58.75, 66.78837848009188 ], "wc_limitations_avg": [ 43.0, 58.01723881744115 ], "wc_review_avg": [ 571.0, 113.53193383361352 ], "wc_reply_reviewers_avg": [ 198.75, 206.91347829467273 ], "wc_reply_authors_avg": [ 356.75, 368.66880462008174 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15605784922442540544&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 10, "email": ";pcl.ac.cn;;nus.edu.sg;nus.edu.sg;mit.edu", "author_num": 6, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Pengcheng Laboratory;National University of Singapore;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.nus.edu.sg;https://web.mit.edu", "aff_unique_abbr": ";NUS;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "China;Singapore;United States" }, { "title": "DiffUTE: Universal Text Editing Diffusion Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71364", "id": "XKeSauhUdJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c7138635035501eb71b0adf6ddc319d6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XKeSauhUdJ", "openreview": "https://openreview.net/forum?id=XKeSauhUdJ", "poster": "/media/PosterPDFs/NeurIPS%202023/71364.png?t=1696931432.3281882", "slides": "https://nips.cc/virtual/2023/poster/71364", "video": "https://nips.cc/virtual/2023/poster/71364", "author_site": "Haoxing Chen, Zhuoer Xu, Zhangxuan Gu, jun lan, \u884c \u90d1, Yaohui Li, Changhua Meng, Huijia Zhu, Weiqiang Wang", "tldr": "", "abstract": "Diffusion model based language-guided image editing has achieved great success recently. However, existing state-of-the-art diffusion models struggle with rendering correct text and text style during generation. To tackle this problem, we propose a universal self-supervised text editing diffusion model (DiffUTE), which aims to replace or modify words in the source image with another one while maintaining its realistic appearance. Specifically, we build our model on a diffusion model and carefully modify the network structure to enable the model for drawing multilingual characters with the help of glyph and position information. Moreover, we design a self-supervised learning framework to leverage large amounts of web data to improve the representation ability of the model. Experimental results show that our method achieves an impressive performance and enables controllable editing on in-the-wild images with high fidelity. Our code will be avaliable in \\url{https://github.com/chenhaoxing/DiffUTE}.", "keywords": "Diffusion model;text editing;self-supervied learning", "primary_area": "", "supplementary_material": "/attachment/ae4fbca51423dedde6216c2d2ccb7d5c3b0cfb47.zip", "author": "Haoxing Chen;Zhuoer Xu;Zhangxuan Gu;jun lan;\u884c \u90d1;Yaohui Li;Changhua Meng;Huijia Zhu;Weiqiang Wang", "authorids": "~Haoxing_Chen1;~Zhuoer_Xu1;~Zhangxuan_Gu1;~jun_lan2;~\u884c_\u90d11;~Yaohui_Li1;~Changhua_Meng1;huijia.zhj@alipay.com;~Weiqiang_Wang4", "gender": "M;M;M;M;M;M;M;;M", "homepage": "https://chenhaoxing.github.io;https://github.com/Unkrible;;https://scholar.google.com/citations?hl=zh-CN&user=nB_ntVkAAAAJ;;;https://www.linkedin.com/in/changhua-meng-04826021/;;https://www.linkedin.com/in/weiqiang-wang-489b925/", "dblp": "168/5619;276/7035;243/6953;218/0185;;93/7040;295/9441;;", "google_scholar": "BnS7HzAAAAAJ;na24qQoAAAAJ;Wkp3s68AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;pC2kmQoAAAAJ;;;", "orcid": "0000-0001-6637-8741;;;0000-0003-0921-0613;0000-0003-3279-6325;;;;0000-0002-6159-619X", "linkedin": ";;;;;;;;weiqiang-wang-489b925/", "or_profile": "~Haoxing_Chen1;~Zhuoer_Xu1;~Zhangxuan_Gu1;~jun_lan2;~\u884c_\u90d11;~Yaohui_Li1;~Changhua_Meng1;huijia.zhj@alipay.com;~Weiqiang_Wang4", "aff": "Nanjing University;Ant Group;Ant Group;AntGroup;;Department of Control Science and Intelligence Engineering, Nanjing University;Ant Group;;Ant Group", "aff_domain": "nju.edu.cn;antgroup.com;antgroup.com;antgroup.com;;nju.edu.cn;antgroup.com;;antgroup.com", "position": "MS student;Researcher;Researcher;Researcher;;MS student;Researcher;;Researcher", "bibtex": "@inproceedings{\nchen2023diffute,\ntitle={Diff{UTE}: Universal Text Editing Diffusion Model},\nauthor={Haoxing Chen and Zhuoer Xu and Zhangxuan Gu and jun lan and \u884c \u90d1 and Yaohui Li and Changhua Meng and Huijia Zhu and Weiqiang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XKeSauhUdJ}\n}", "github": "", "project": "", "reviewers": "FkEt;Rgqn;jbcg;sxrD;8YaX", "pdf_size": 6674152, "rating": "5;6;6;6;6", "confidence": "4;5;4;4;4", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;4;4", "wc_summary": "78;122;85;62;90", "wc_strengths": "38;34;28;127;51", "wc_weaknesses": "248;161;25;50;106", "wc_questions": "57;2;78;17;137", "wc_limitations": "16;1;6;23;6", "wc_review": "437;320;222;279;390", "wc_reply_reviewers": "0;0;22;33;203", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 87.4, 19.71395444856257 ], "wc_strengths_avg": [ 55.6, 36.488902422517455 ], "wc_weaknesses_avg": [ 118.0, 80.20723159416487 ], "wc_questions_avg": [ 58.2, 47.872330212764865 ], "wc_limitations_avg": [ 10.4, 7.964923100695952 ], "wc_review_avg": [ 329.6, 76.69837025647936 ], "wc_reply_reviewers_avg": [ 51.6, 76.77134882233085 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.2500000000000001, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17354090403886525193&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "nju.edu.cn;antgroup.com;antgroup.com;antgroup.com;;nju.edu.cn;antgroup.com;;antgroup.com", "author_num": 9, "aff_unique_index": "0;1;1;1;0;1;1", "aff_unique_norm": "Nanjing University;Ant Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.antgroup.com", "aff_unique_abbr": "Nanjing U;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "CosNet: A Generalized Spectral Kernel Network", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71363", "id": "XNBeTgYcAq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/756d74cd58592849c904421e3b2ec7a4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XNBeTgYcAq", "openreview": "https://openreview.net/forum?id=XNBeTgYcAq", "poster": "/media/PosterPDFs/NeurIPS%202023/71363.png?t=1701485688.8521342", "slides": "https://nips.cc/virtual/2023/poster/71363", "video": "https://nips.cc/virtual/2023/poster/71363", "author_site": "Yanfang Xue, Pengfei Fang, Jinyue Tian, Shipeng Zhu, hui xue", "tldr": "", "abstract": "Complex-valued representation exists inherently in the time-sequential data that can be derived from the integration of harmonic waves. The non-stationary spectral kernel, realizing a complex-valued feature mapping, has shown its potential to analyze the time-varying statistical characteristics of the time-sequential data, as a result of the modeling frequency parameters. However, most existing spectral kernel-based methods eliminate the imaginary part, thereby limiting the representation power of the spectral kernel. To tackle this issue, we propose a generalized spectral kernel network, namely, \\underline{Co}mplex-valued \\underline{s}pectral kernel \\underline{Net}work (CosNet), which includes spectral kernel mapping generalization (SKMG) module and complex-valued spectral kernel embedding (CSKE) module. Concretely, the SKMG module is devised to generalize the spectral kernel mapping in the real number domain to the complex number domain, recovering the inherent complex-valued representation for the real-valued data. Then a following CSKE module is further developed to combine the complex-valued spectral kernels and neural networks to effectively capture long-range or periodic relations of the data. Along with the CosNet, we study the effect of the complex-valued spectral kernel mapping via theoretically analyzing the bound of covering number and generalization error. Extensive experiments demonstrate that CosNet performs better than the mainstream kernel methods and complex-valued neural networks.", "keywords": "Spectral kernel; complex-valued networks", "primary_area": "", "supplementary_material": "/attachment/e9b1f6fc27271404667dc38a6330bb9bdf56d52e.pdf", "author": "Yanfang Xue;Pengfei Fang;Jinyue Tian;Shipeng Zhu;hui xue", "authorids": "hzxyanfang@163.com;~Pengfei_Fang1;220222083@seu.edu.cn;~Shipeng_Zhu1;~hui_xue3", "gender": ";M;;M;F", "homepage": ";https://fpfcjdsg.github.io/;;http://palm.seu.edu.cn/homepage/zhushipeng/demo/index.html;http://palm.seu.edu.cn/hxue/", "dblp": ";204/7650.html;;252/0041.html;27/3541-2.html", "google_scholar": ";Fk4A13IAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0001-8939-0460;;;", "linkedin": ";fang-pengfei-62956a96/?locale=zh_CN;;;", "or_profile": "hzxyanfang@163.com;~Pengfei_Fang1;220222083@seu.edu.cn;~Shipeng_Zhu1;~hui_xue3", "aff": ";Monash University;;Southeast University;Southeast University", "aff_domain": ";monash.edu;;seu.edu.cn;seu.edu.cn", "position": ";Postdoc;;PhD student;Full Professor", "bibtex": "@inproceedings{\nxue2023cosnet,\ntitle={CosNet: A Generalized Spectral Kernel Network},\nauthor={Yanfang Xue and Pengfei Fang and Jinyue Tian and Shipeng Zhu and hui xue},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XNBeTgYcAq}\n}", "github": "", "project": "", "reviewers": "uy3z;YSFk;gGnJ;BAqh", "pdf_size": 732514, "rating": "3;6;6;7", "confidence": "3;3;4;4", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "1;1;3;3", "wc_summary": "52;65;96;115", "wc_strengths": "13;25;84;46", "wc_weaknesses": "45;181;65;74", "wc_questions": "1;868;2;176", "wc_limitations": "1;21;7;1", "wc_review": "112;1160;254;412", "wc_reply_reviewers": "43;75;31;0", "wc_reply_authors": "4;66;29;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.0, 1.0 ], "wc_summary_avg": [ 82.0, 24.869660230891775 ], "wc_strengths_avg": [ 42.0, 26.972207918522354 ], "wc_weaknesses_avg": [ 91.25, 52.869532814277825 ], "wc_questions_avg": [ 261.75, 357.1948872814391 ], "wc_limitations_avg": [ 7.5, 8.170067319184096 ], "wc_review_avg": [ 484.5, 404.179106337772 ], "wc_reply_reviewers_avg": [ 37.25, 26.85493436968335 ], "wc_reply_authors_avg": [ 24.75, 26.280934153869037 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6666666666666667, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16344060708098433561&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "email": ";monash.edu;;seu.edu.cn;seu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "Monash University;Southeast University", "aff_unique_dep": ";", "aff_unique_url": "https://www.monash.edu;https://www.seu.edu.cn/", "aff_unique_abbr": "Monash;SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Australia;China" }, { "title": "TD Convergence: An Optimization Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71362", "id": "XOCbdqxAR2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9a08fbb992f15faa695c42b6a2c8e000-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XOCbdqxAR2", "openreview": "https://openreview.net/forum?id=XOCbdqxAR2", "poster": "/media/PosterPDFs/NeurIPS%202023/71362.png?t=1702055162.4170406", "slides": "https://nips.cc/virtual/2023/poster/71362", "video": "https://nips.cc/virtual/2023/poster/71362", "author_site": "Kavosh Asadi, Shoham Sabach, Yao Liu, Yao Liu, Omer Gottesman, Rasool Fakoor", "tldr": "", "abstract": "We study the convergence behavior of the celebrated temporal-difference (TD) learning algorithm. By looking at the algorithm through the lens of optimization, we first argue that TD can be viewed as an iterative optimization algorithm where the function to be minimized changes per iteration. By carefully investigating the divergence displayed by TD on a classical counter example, we identify two forces that determine the convergent or divergent behavior of the algorithm. We next formalize our discovery in the linear TD setting with quadratic loss and prove that convergence of TD hinges on the interplay between these two forces. We extend this optimization perspective to prove convergence of TD in a much broader setting than just linear approximation and squared loss. Our results provide a theoretical explanation for the successful application of TD in reinforcement learning.", "keywords": "Reinforcement Learning;Temporal Difference Learning;Value Function Optimization;Convergence", "primary_area": "", "supplementary_material": "/attachment/151c95fa4d793bc7de65ccd7f533b241ccd7dd9f.zip", "author": "Kavosh Asadi;Shoham Sabach;Yao Liu;Omer Gottesman;Rasool Fakoor", "authorids": "~Kavosh_Asadi1;~Shoham_Sabach1;~Yao_Liu1;~Omer_Gottesman1;~Rasool_Fakoor1", "gender": ";M;M;M;M", "homepage": "http://cs.brown.edu/~kasadiat/;https://ssabach.net.technion.ac.il/;http://yao-liu.com/;https://omergott.github.io/;http://rasoolfa.github.io", "dblp": "192/1404;;64/424-9.html;;123/2447", "google_scholar": "-2qyBJEAAAAJ;https://scholar.google.ca/citations?user=42D12TkAAAAJ;umAny5UAAAAJ;glNJx5zYUbsC;nVsOPtQAAAAJ", "orcid": ";;;;", "linkedin": ";;;;rasool-fakoor-695b5845/", "or_profile": "~Kavosh_Asadi1;~Shoham_Sabach1;~Yao_Liu1;~Omer_Gottesman1;~Rasool_Fakoor1", "aff": "Amazon;Technion - Israel Institute of Technology, Technion;Amazon;Amazon;Amazon Web Services", "aff_domain": "amazon.com;technion.ac.il;amazon.com;amazon.com;amazon.com", "position": "Researcher;Associate Professor;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nasadi2023td,\ntitle={{TD} Convergence: An Optimization Perspective},\nauthor={Kavosh Asadi and Shoham Sabach and Yao Liu and Omer Gottesman and Rasool Fakoor},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XOCbdqxAR2}\n}", "github": "", "project": "", "reviewers": "5ZUZ;fasm;gJiK;16Sb", "pdf_size": 312528, "rating": "6;7;7;7", "confidence": "4;4;4;4", "soundness": "4;4;3;2", "novelty": "2;3;1;2", "presentation": "3;4;3;3", "wc_summary": "134;154;32;90", "wc_strengths": "83;33;42;71", "wc_weaknesses": "869;103;85;141", "wc_questions": "450;164;43;20", "wc_limitations": "23;13;106;21", "wc_review": "1559;467;308;343", "wc_reply_reviewers": "285;95;243;68", "wc_reply_authors": "167;39;750;67", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 102.5, 46.82680856090878 ], "wc_strengths_avg": [ 57.25, 20.44963324854507 ], "wc_weaknesses_avg": [ 299.5, 329.4218420202279 ], "wc_questions_avg": [ 169.25, 171.07217044276956 ], "wc_limitations_avg": [ 40.75, 37.85746293665227 ], "wc_review_avg": [ 669.25, 517.0833467633627 ], "wc_reply_reviewers_avg": [ 172.75, 92.94185009994152 ], "wc_reply_authors_avg": [ 255.75, 289.2951563714816 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14413795887241771263&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "amazon.com;technion.ac.il;amazon.com;amazon.com;amazon.com", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Amazon;Technion - Israel Institute of Technology", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.technion.ac.il", "aff_unique_abbr": "Amazon;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Israel" }, { "title": "FreeMask: Synthetic Images with Dense Annotations Make Stronger Segmentation Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71361", "id": "XOotfgPiUF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3ba7560b4c3e66d760fbdd472cf4a5a9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XOotfgPiUF", "openreview": "https://openreview.net/forum?id=XOotfgPiUF", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71361", "video": "https://nips.cc/virtual/2023/poster/71361", "author_site": "Lihe Yang, Xiaogang Xu, Bingyi Kang, Yinghuan Shi, Hengshuang Zhao", "tldr": "", "abstract": "Semantic segmentation has witnessed tremendous progress due to the proposal of various advanced network architectures. However, they are extremely hungry for delicate annotations to train, and the acquisition is laborious and unaffordable. Therefore, we present FreeMask in this work, which resorts to synthetic images from generative models to ease the burden of both data collection and annotation procedures. Concretely, we first synthesize abundant training images conditioned on the semantic masks provided by realistic datasets. This yields extra well-aligned image-mask training pairs for semantic segmentation models. We surprisingly observe that, solely trained with synthetic images, we already achieve comparable performance with real ones (e.g., 48.3 vs. 48.5 mIoU on ADE20K, and 49.3 vs. 50.5 on COCO-Stuff). Then, we investigate the role of synthetic images by joint training with real images, or pre-training for real images. Meantime, we design a robust filtering principle to suppress incorrectly synthesized regions. In addition, we propose to inequally treat different semantic masks to prioritize those harder ones and sample more corresponding synthetic images for them. As a result, either jointly trained or pre-trained with our filtered and re-sampled synthesized images, segmentation models can be greatly enhanced, e.g., from 48.7 to 52.0 on ADE20K.", "keywords": "learning from synthetic;semantic segmentation;generative models", "primary_area": "", "supplementary_material": "/attachment/086c340f2605d5fb93fb7223ea9d2747883f63fb.pdf", "author": "Lihe Yang;Xiaogang Xu;Bingyi Kang;Yinghuan Shi;Hengshuang Zhao", "authorids": "~Lihe_Yang1;~Xiaogang_Xu2;~Bingyi_Kang1;~Yinghuan_Shi3;~Hengshuang_Zhao2", "gender": ";M;;M;M", "homepage": "https://liheyoung.github.io/;https://xiaogang00.github.io;https://bingykang.github.io/;https://cs.nju.edu.cn/shiyh/;https://hszhao.github.io", "dblp": ";118/2268-2;;30/7184;185/7848", "google_scholar": "QX7xv3UAAAAJ;https://scholar.google.com.hk/citations?user=R65xDQwAAAAJ;https://scholar.google.com.sg/citations?user=NmHgX-wAAAAJ;m6BKDUMAAAAJ;4uE10I0AAAAJ", "orcid": ";0000-0002-7928-7336;;;0000-0001-8277-2706", "linkedin": ";;;;hengshuang-zhao-347b8391/?originalSubdomain=hk", "or_profile": "~Lihe_Yang1;~Xiaogang_Xu2;~Bingyi_Kang1;~Yinghuan_Shi3;~Hengshuang_Zhao2", "aff": "Nanjing University;The Chinese University of Hong Kong;Sea AI Lab;Nanjing University;The University of Hong Kong", "aff_domain": "nju.edu.cn;cuhk.edu.hk;sea.com;nju.edu.cn;hku.hk", "position": "MS student;Postdoc;Researcher;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nyang2023freemask,\ntitle={FreeMask: Synthetic Images with Dense Annotations Make Stronger Segmentation Models},\nauthor={Lihe Yang and Xiaogang Xu and Bingyi Kang and Yinghuan Shi and Hengshuang Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XOotfgPiUF}\n}", "github": "", "project": "", "reviewers": "VTnr;3GaN;6xi1;FZD9;jaKB", "pdf_size": 18890861, "rating": "4;4;5;6;7", "confidence": "4;4;5;5;4", "soundness": "3;3;3;3;3", "novelty": "2;2;2;2;3", "presentation": "3;3;3;3;4", "wc_summary": "45;89;79;66;85", "wc_strengths": "21;35;34;99;125", "wc_weaknesses": "154;287;528;89;198", "wc_questions": "2;22;6;49;77", "wc_limitations": "2;4;7;1;36", "wc_review": "224;437;654;304;521", "wc_reply_reviewers": "0;43;13;227;42", "wc_reply_authors": "279;992;100;463;49", "reply_reviewers": "0;1;1;2;1", "reply_authors": "4;6;3;4;2", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 72.8, 15.929846201391904 ], "wc_strengths_avg": [ 62.8, 41.300847448932565 ], "wc_weaknesses_avg": [ 251.2, 152.63472737224646 ], "wc_questions_avg": [ 31.2, 28.23756363427978 ], "wc_limitations_avg": [ 10.0, 13.16054710108968 ], "wc_review_avg": [ 428.0, 152.83847683093416 ], "wc_reply_reviewers_avg": [ 65.0, 82.68736300064236 ], "wc_reply_authors_avg": [ 376.6, 340.4518174426449 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 3.8, 1.32664991614216 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.21004201260420144, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6520820499671496798&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "nju.edu.cn;cuhk.edu.hk;sea.com;nju.edu.cn;hku.hk", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Nanjing University;Chinese University of Hong Kong;Sea AI Lab;University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nju.edu.cn;https://www.cuhk.edu.hk;;https://www.hku.hk", "aff_unique_abbr": "Nanjing U;CUHK;;HKU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "title": "QATCH: Benchmarking SQL-centric tasks with Table Representation Learning Models on Your Data", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73562", "id": "XOpaPrb0U5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/62a24b69b820d30e9e5ad4f15ff7bf72-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=XOpaPrb0U5", "openreview": "https://openreview.net/forum?id=XOpaPrb0U5", "poster": "/media/PosterPDFs/NeurIPS%202023/73562.png?t=1698246568.3270776", "slides": "https://nips.cc/virtual/2023/poster/73562", "video": "https://nips.cc/virtual/2023/poster/73562", "author_site": "Simone Papicchio, Paolo Papotti, Luca Cagliero", "tldr": "", "abstract": "Table Representation Learning (TRL) models are commonly pre-trained on large open-domain datasets comprising millions of tables and then used to address downstream tasks. Choosing the right TRL model to use on proprietary data can be challenging, as the best results depend on the content domain, schema, and data quality. Our purpose is to support end-users in testing TRL models on proprietary data in two established SQL-centric tasks, i.e., Question Answering (QA) and Semantic Parsing (SP). We present QATCH (Query-Aided TRL Checklist), a toolbox to highlight TRL models\u2019 strengths and weaknesses on relational tables unseen at training time. For an input table, QATCH automatically generates a testing checklist tailored to QA and SP. Checklist generation is driven by a SQL query engine that crafts tests of different complexity. This design facilitates inherent portability, allowing the checks to be used by alternative models. We also introduce a set of cross-task performance metrics evaluating the TRL model\u2019s performance over its output. Finally, we show how QATCH automatically generates tests for proprietary datasets to evaluate various state-of-the-art models including TAPAS, TAPEX, and CHATGPT.", "keywords": "Table Representation Learning;TRL;Benchmarking tool;Checklist;Question Answering;QA;Semantic Parsing;text-to-SQL;Proprietary Data;Tabular Data;Relational Tables;Cross-task performance metrics;Automated testing;Large Language Model;ChatGPT;LLM\u200b", "primary_area": "", "supplementary_material": "", "author": "Simone Papicchio;Paolo Papotti;Luca Cagliero", "authorids": "~Simone_Papicchio1;~Paolo_Papotti1;~Luca_Cagliero2", "gender": "M;M;M", "homepage": "https://github.com/spapicchio;http://www.eurecom.fr/en/people/papotti-paolo;https://www.polito.it/en/staff?p=luca.cagliero", "dblp": "369/7068;p/PaoloPapotti.html;60/7439", "google_scholar": "tA30bFcAAAAJ;https://scholar.google.com.tw/citations?user=YwoezYX7JVgJ;https://scholar.google.it/citations?user=0uIAXl8AAAAJ", "orcid": "0009-0005-5361-0042;;0000-0002-7185-5247", "linkedin": "simone-papicchio/;papotti/;", "or_profile": "~Simone_Papicchio1;~Paolo_Papotti1;~Luca_Cagliero2", "aff": "Eurecom;Eurecom;Polytechnic Institute of Turin", "aff_domain": "eurecom.fr;eurecom.fr;polito.it", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\npapicchio2023qatch,\ntitle={{QATCH}: Benchmarking {SQL}-centric tasks with Table Representation Learning Models on Your Data},\nauthor={Simone Papicchio and Paolo Papotti and Luca Cagliero},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=XOpaPrb0U5}\n}", "github": "", "project": "", "reviewers": "aVca;U5Jd;dxvp;GkAC", "pdf_size": 1262345, "rating": "6;6;7;7", "confidence": "4;4;4;4", "wc_summary_and_contributions": "74;69;68;132", "wc_strengths": "44;43;29;57", "wc_improvement": "302;118;62;111", "wc_limitations": "97;11;20;40", "wc_correctness": "61;1;25;47", "wc_clarity": "49;1;7;15", "wc_relation_to_prior_work": "19;1;25;16", "wc_documentation": "53;1;40;11", "wc_additional_feedback": "1;1;1;1", "wc_review": "700;246;277;430", "wc_reply_reviewers": "39;1348;66;0", "wc_reply_authors": "1224;1769;424;689", "reply_reviewers": "2;6;1;0", "reply_authors": "3;4;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 85.75, 26.799020504488592 ], "wc_strengths_avg": [ 43.25, 9.908960591303208 ], "wc_improvement_avg": [ 148.25, 91.35199778877308 ], "wc_limitations_avg": [ 42.0, 33.44398301638129 ], "wc_correctness_avg": [ 33.5, 22.73213584333861 ], "wc_clarity_avg": [ 18.0, 18.57417562100671 ], "wc_relation_to_prior_work_avg": [ 15.25, 8.842369591913696 ], "wc_documentation_avg": [ 26.25, 21.063890903629368 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 413.25, 179.61260395640392 ], "wc_reply_reviewers_avg": [ 363.25, 569.029601602588 ], "wc_reply_authors_avg": [ 1026.5, 516.5329127945286 ], "reply_reviewers_avg": [ 2.25, 2.277608394786075 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15097948892558948176&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "eurecom.fr;eurecom.fr;polito.it", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "EURECOM;Polytechnic Institute of Turin", "aff_unique_dep": ";", "aff_unique_url": "https://www.eurecom.fr;https://www.polito.it", "aff_unique_abbr": ";Polito", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "France;Italy" }, { "title": "Mirror Diffusion Models for Constrained and Watermarked Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71360", "id": "XPWEtXzlLy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/85f5c7372625d1e0df0e3996f85062d6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XPWEtXzlLy", "openreview": "https://openreview.net/forum?id=XPWEtXzlLy", "poster": "/media/PosterPDFs/NeurIPS%202023/71360.png?t=1701128367.5321603", "slides": "https://nips.cc/virtual/2023/poster/71360", "video": "https://nips.cc/virtual/2023/poster/71360", "author_site": "Guan-Horng Liu, Tianrong Chen, Evangelos Theodorou, Molei Tao", "tldr": "", "abstract": "Modern successes of diffusion models in learning complex, high-dimensional data distributions are attributed, in part, to their capability to construct diffusion processes with analytic transition kernels and score functions. The tractability results in a simulation-free framework with stable regression losses, from which reversed, generative processes can be learned at scale. However, when data is confined to a constrained set as opposed to a standard Euclidean space, these desirable characteristics appear to be lost based on prior attempts. In this work, we propose Mirror Diffusion Models (MDM), a new class of diffusion models that generate data on convex constrained sets without losing any tractability. This is achieved by learning diffusion processes in a dual space constructed from a mirror map, which, crucially, is a standard Euclidean space. We derive efficient computation of mirror maps for popular constrained sets, such as simplices and $\\ell_2$-balls, showing significantly improved performance of MDM over existing methods. For safety and privacy purposes, we also explore constrained sets as a new mechanism to embed invisible but quantitative information (i.e., watermarks) in generated data, for which MDM serves as a compelling approach. Our work brings new algorithmic opportunities for learning tractable diffusion on complex domains.", "keywords": "diffusion models;constrained generation;constrained manifold;mirror map;watermarked generation;generation privacy", "primary_area": "", "supplementary_material": "/attachment/868ff97414019cfa4714684d2f85e245b4a65ed3.pdf", "author": "Guan-Horng Liu;Tianrong Chen;Evangelos Theodorou;Molei Tao", "authorids": "~Guan-Horng_Liu1;~Tianrong_Chen1;~Evangelos_Theodorou1;~Molei_Tao1", "gender": ";M;M;", "homepage": "https://ghliu.github.io;https://tianrongchen.github.io/;;http://people.math.gatech.edu/~mtao8/", "dblp": "143/6907;227/7295;155/9964;56/9263", "google_scholar": "2Dt0VJ4AAAAJ;r9D3Fg50gMoC;;", "orcid": ";;;", "linkedin": ";tianrong-chen-757b3216a/;;", "or_profile": "~Guan-Horng_Liu1;~Tianrong_Chen1;~Evangelos_Theodorou1;~Molei_Tao1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;gatech.edu", "position": "PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2023mirror,\ntitle={Mirror Diffusion Models for Constrained and Watermarked Generation},\nauthor={Guan-Horng Liu and Tianrong Chen and Evangelos Theodorou and Molei Tao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XPWEtXzlLy}\n}", "github": "", "project": "", "reviewers": "VdTP;Bc59;kFs5;i5Cc", "pdf_size": 4489575, "rating": "6;6;6;7", "confidence": "3;4;4;4", "soundness": "4;3;3;4", "novelty": "3;3;3;3", "presentation": "3;4;3;2", "wc_summary": "104;110;51;76", "wc_strengths": "136;85;23;78", "wc_weaknesses": "81;38;95;78", "wc_questions": "39;47;2;349", "wc_limitations": "1;6;2;9", "wc_review": "361;286;173;590", "wc_reply_reviewers": "21;14;12;25", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 85.25, 23.573024837725004 ], "wc_strengths_avg": [ 80.5, 40.04060439104285 ], "wc_weaknesses_avg": [ 73.0, 21.20141504711419 ], "wc_questions_avg": [ 109.25, 139.4567585310945 ], "wc_limitations_avg": [ 4.5, 3.2015621187164243 ], "wc_review_avg": [ 352.5, 152.57866823379996 ], "wc_reply_reviewers_avg": [ 18.0, 5.244044240850758 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4374388264067375767&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "gatech.edu;gatech.edu;gatech.edu;gatech.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Block-State Transformers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71359", "id": "XRTxIBs2eu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/16ccd203e9e3696a7ab0dcf568316379-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XRTxIBs2eu", "openreview": "https://openreview.net/forum?id=XRTxIBs2eu", "poster": "/media/PosterPDFs/NeurIPS%202023/71359.png?t=1697514398.157966", "slides": "https://nips.cc/virtual/2023/poster/71359", "video": "https://nips.cc/virtual/2023/poster/71359", "author_site": "Jonathan Pilault, Mahan Fathi, Orhan Firat, Chris Pal, Pierre-Luc Bacon, Ross Goroshin", "tldr": "", "abstract": "State space models (SSMs) have shown impressive results on tasks that require modeling long-range dependencies and efficiently scale to long sequences owing to their subquadratic runtime complexity.\nOriginally designed for continuous signals, SSMs have shown superior performance on a plethora of tasks, in vision and audio; however, SSMs still lag Transformer performance in Language Modeling tasks.\nIn this work, we propose a hybrid layer named Block-State Transformer (*BST*), that internally combines an SSM sublayer for long-range contextualization, and a Block Transformer sublayer for short-term representation of sequences.\nWe study three different, and completely *parallelizable*, variants that integrate SSMs and block-wise attention.\nWe show that our model outperforms similar Transformer-based architectures on language modeling perplexity and generalizes to longer sequences. \nIn addition, the Block-State Transformer demonstrates a more than *tenfold* increase in speed at the layer level compared to the Block-Recurrent Transformer when model parallelization is employed.", "keywords": "State Space Models;Efficient Transformers;Long Range Language Modeling;Language Modeling", "primary_area": "", "supplementary_material": "/attachment/71a9cddaad425383982e3b712f649a01265b4115.pdf", "author": "Jonathan Pilault;Mahan Fathi;Orhan Firat;Christopher Pal;Pierre-Luc Bacon;Ross Goroshin", "authorids": "~Jonathan_Pilault1;~Mahan_Fathi1;~Orhan_Firat1;~Christopher_Pal1;~Pierre-Luc_Bacon1;~Ross_Goroshin1", "gender": ";M;M;;;", "homepage": ";https://mahanfathi.github.io/;;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ&hl=en&oi=ao;;", "dblp": "248/8053.html;;120/2225;45/1217;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com.tr/citations?user=dLaR9lgAAAAJ;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ;;EC4o-1oAAAAJ", "orcid": ";;;;;", "linkedin": "jonathanpilault/;mahanfathi/;;;;", "or_profile": "~Jonathan_Pilault1;~Mahan_Fathi1;~Orhan_Firat1;~Christopher_Pal1;~Pierre-Luc_Bacon1;~Ross_Goroshin1", "aff": "Polytechnique Montreal;Google Brain;Google;Polytechnique Montreal;;Google", "aff_domain": "polymtl.ca;google.com;google.com;polymtl.ca;;google.com", "position": "PhD student;Researcher;Research Scientist;Full Professor;;Research Scientist", "bibtex": "@inproceedings{\npilault2023blockstate,\ntitle={Block-State Transformers},\nauthor={Jonathan Pilault and Mahan Fathi and Orhan Firat and Christopher Pal and Pierre-Luc Bacon and Ross Goroshin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XRTxIBs2eu}\n}", "github": "", "project": "", "reviewers": "9Xt3;4BCd;rmDJ;XDNb;V1Sd", "pdf_size": 3857011, "rating": "5;6;6;6;6", "confidence": "5;5;4;4;4", "soundness": "2;2;2;3;3", "novelty": "2;3;2;3;3", "presentation": "2;3;3;3;2", "wc_summary": "31;41;77;102;207", "wc_strengths": "47;58;79;59;81", "wc_weaknesses": "214;172;167;108;238", "wc_questions": "32;18;77;45;15", "wc_limitations": "10;2;26;1;1", "wc_review": "334;291;426;315;542", "wc_reply_reviewers": "59;43;21;17;0", "wc_reply_authors": "188;0;0;102;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;1;1;2;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 91.6, 63.04157358442126 ], "wc_strengths_avg": [ 64.8, 13.12097557348538 ], "wc_weaknesses_avg": [ 179.8, 44.579816060634435 ], "wc_questions_avg": [ 37.4, 22.50866499817348 ], "wc_limitations_avg": [ 8.0, 9.612491872558333 ], "wc_review_avg": [ 381.6, 92.31381261761427 ], "wc_reply_reviewers_avg": [ 28.0, 20.688160865577203 ], "wc_reply_authors_avg": [ 58.0, 76.06313167363017 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6123724356957946, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8704870880522368901&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "polymtl.ca;google.com;google.com;polymtl.ca;;google.com", "author_num": 6, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Polytechnique Montreal;Google", "aff_unique_dep": ";Google Brain", "aff_unique_url": "https://www.polymtl.ca;https://brain.google.com", "aff_unique_abbr": "PolyMTL;Google Brain", "aff_campus_unique_index": "0;1;1;0;1", "aff_campus_unique": "Montreal;Mountain View", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "Canada;United States" }, { "title": "Aleatoric and Epistemic Discrimination: Fundamental Limits of Fairness Interventions", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71358", "id": "XRy4YQYLe0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/55a49718689fdecef31b6a2386df6fe1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XRy4YQYLe0", "openreview": "https://openreview.net/forum?id=XRy4YQYLe0", "poster": "/media/PosterPDFs/NeurIPS%202023/71358.png?t=1701482341.954902", "slides": "https://nips.cc/virtual/2023/poster/71358", "video": "https://nips.cc/virtual/2023/poster/71358", "author_site": "Hao Wang, Luxi He, Luxi He, Rui Gao, Flavio Calmon", "tldr": "", "abstract": "Machine learning (ML) models can underperform on certain population groups due to choices made during model development and bias inherent in the data. We categorize sources of discrimination in the ML pipeline into two classes: aleatoric discrimination, which is inherent in the data distribution, and epistemic discrimination, which is due to decisions made during model development. We quantify aleatoric discrimination by determining the performance limits of a model under fairness constraints, assuming perfect knowledge of the data distribution. We demonstrate how to characterize aleatoric discrimination by applying Blackwell's results on comparing statistical experiments. We then quantify epistemic discrimination as the gap between a model's accuracy when fairness constraints are applied and the limit posed by aleatoric discrimination. We apply this approach to benchmark existing fairness interventions and investigate fairness risks in data with missing values. Our results indicate that state-of-the-art fairness interventions are effective at removing epistemic discrimination on standard (overused) tabular datasets. However, when data has missing values, there is still significant room for improvement in handling aleatoric discrimination.", "keywords": "information theory;fair machine learning", "primary_area": "", "supplementary_material": "/attachment/5516fc923d97bb91de1a66f263d39fc5c395b0ff.pdf", "author": "Hao Wang;Luxi He;Rui Gao;Flavio Calmon", "authorids": "~Hao_Wang22;~Luxi_He1;~Rui_Gao3;~Flavio_Calmon1", "gender": "M;F;;", "homepage": "https://haowang94.github.io;;https://faculty.mccombs.utexas.edu/rui.gao/index.html;http://people.seas.harvard.edu/~flavio/", "dblp": ";338/9240;43/2694-1;89/4611", "google_scholar": "A3WtYhAAAAAJ;;LWJj85wAAAAJ;P8N_YH4AAAAJ", "orcid": ";;;", "linkedin": ";lucy-he-3051111a9/;;", "or_profile": "~Hao_Wang22;~Luxi_He1;~Rui_Gao3;~Flavio_Calmon1", "aff": "MIT-IBM Watson AI Lab;Harvard University;University of Texas, Austin;Harvard University", "aff_domain": "ibm.com;harvard.edu;utexas.edu;harvard.edu", "position": "Researcher;Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023aleatoric,\ntitle={Aleatoric and Epistemic Discrimination: Fundamental Limits of Fairness Interventions},\nauthor={Hao Wang and Luxi He and Rui Gao and Flavio Calmon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XRy4YQYLe0}\n}", "github": "", "project": "", "reviewers": "D24A;CXu9;7W2X;wKi3;iE9m", "pdf_size": 3186068, "rating": "4;4;6;7;8", "confidence": "5;2;3;3;4", "soundness": "3;3;4;3;4", "novelty": "2;2;2;4;4", "presentation": "2;1;3;3;4", "wc_summary": "48;72;61;103;246", "wc_strengths": "56;59;29;96;193", "wc_weaknesses": "331;255;276;228;191", "wc_questions": "42;38;4;28;225", "wc_limitations": "61;78;44;119;206", "wc_review": "538;502;414;574;1061", "wc_reply_reviewers": "282;184;80;19;90", "wc_reply_authors": "1430;84;55;26;82", "reply_reviewers": "2;1;1;1;1", "reply_authors": "4;2;2;2;2", "rating_avg": [ 5.8, 1.6 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.9797958971132712 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 106.0, 72.32426978545999 ], "wc_strengths_avg": [ 86.6, 57.31526847184788 ], "wc_weaknesses_avg": [ 256.2, 46.97829285957505 ], "wc_questions_avg": [ 67.4, 79.89893616313049 ], "wc_limitations_avg": [ 101.6, 57.83632076818164 ], "wc_review_avg": [ 617.8, 227.88453216486633 ], "wc_reply_reviewers_avg": [ 131.0, 92.12600067299134 ], "wc_reply_authors_avg": [ 335.4, 547.7070749953847 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.4, 0.8000000000000002 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.049029033784546, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10301337476614563468&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "ibm.com;harvard.edu;utexas.edu;harvard.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Massachusetts Institute of Technology;Harvard University;University of Texas at Austin", "aff_unique_dep": "IBM Watson AI Lab;;", "aff_unique_url": "https://www.mitibmwatsonailab.org;https://www.harvard.edu;https://www.utexas.edu", "aff_unique_abbr": "MIT-IBM AI Lab;Harvard;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Bayesian Approach To Analysing Training Data Attribution In Deep Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71357", "id": "XSCYxDp3yE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ca774047bc3b46cc81e53ead34cd5d5a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XSCYxDp3yE", "openreview": "https://openreview.net/forum?id=XSCYxDp3yE", "poster": "/media/PosterPDFs/NeurIPS%202023/71357.png?t=1702170895.113802", "slides": "https://nips.cc/virtual/2023/poster/71357", "video": "https://nips.cc/virtual/2023/poster/71357", "author_site": "Elisa Nguyen, Minjoon Seo, Seong Joon Oh", "tldr": "", "abstract": "Training data attribution (TDA) techniques find influential training data for the model's prediction on the test data of interest. They approximate the impact of down- or up-weighting a particular training sample. While conceptually useful, they are hardly applicable to deep models in practice, particularly because of their sensitivity to different model initialisation. In this paper, we introduce a Bayesian perspective on the TDA task, where the learned model is treated as a Bayesian posterior and the TDA estimates as random variables. From this novel viewpoint, we observe that the influence of an individual training sample is often overshadowed by the noise stemming from model initialisation and SGD batch composition. Based on this observation, we argue that TDA can only be reliably used for explaining deep model predictions that are consistently influenced by certain training data, independent of other noise factors. Our experiments demonstrate the rarity of such noise-independent training-test data pairs but confirm their existence. We recommend that future researchers and practitioners trust TDA estimates only in such cases. Further, we find a disagreement between ground truth and estimated TDA distributions and encourage future work to study this gap. Code is provided at https://github.com/ElisaNguyen/bayesian-tda.", "keywords": "training data attribution;interpretability;explainability;data-driven xai", "primary_area": "", "supplementary_material": "/attachment/d2b7f5222205ac51b852eb1073f90854814c4e84.zip", "author": "Elisa Nguyen;Minjoon Seo;Seong Joon Oh", "authorids": "~Elisa_Nguyen1;~Minjoon_Seo1;~Seong_Joon_Oh1", "gender": "F;M;M", "homepage": "https://elisanguyen.github.io/;https://seominjoon.github.io;https://seongjoonoh.com", "dblp": "280/5837;149/1367;168/8835", "google_scholar": "YuBPap8AAAAJ;zYze5fIAAAAJ;https://scholar.google.de/citations?user=kmXOOdsAAAAJ", "orcid": "0000-0003-0224-268X;;0000-0002-8985-7689", "linkedin": "nguyen-elisa/;minjoon-seo/;seong-joon-oh-32113479/", "or_profile": "~Elisa_Nguyen1;~Minjoon_Seo1;~Seong_Joon_Oh1", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Twelve Labs;Eberhard-Karls-Universit\u00e4t T\u00fcbingen", "aff_domain": "uni-tuebingen.de;twelvelabs.io;uni-tuebingen.de", "position": "PhD student;Chief Scientist;Associate Professor", "bibtex": "@inproceedings{\nnguyen2023a,\ntitle={A Bayesian Approach To Analysing Training Data Attribution In Deep Learning},\nauthor={Elisa Nguyen and Minjoon Seo and Seong Joon Oh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XSCYxDp3yE}\n}", "github": "", "project": "", "reviewers": "fqXw;wqES;CbTs;fmRD", "pdf_size": 2686251, "rating": "4;5;6;7", "confidence": "4;3;4;3", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "2;3;4;4", "wc_summary": "72;95;103;76", "wc_strengths": "79;60;92;24", "wc_weaknesses": "479;164;267;27", "wc_questions": "219;3;103;9", "wc_limitations": "89;4;1;1", "wc_review": "938;326;566;137", "wc_reply_reviewers": "621;15;298;4", "wc_reply_authors": "1397;0;760;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 86.5, 12.893796958227627 ], "wc_strengths_avg": [ 63.75, 25.616157010761782 ], "wc_weaknesses_avg": [ 234.25, 164.97177788943173 ], "wc_questions_avg": [ 83.5, 87.7083234362623 ], "wc_limitations_avg": [ 23.75, 37.692008436802624 ], "wc_review_avg": [ 491.75, 299.15411997831484 ], "wc_reply_reviewers_avg": [ 234.5, 252.3514414462497 ], "wc_reply_authors_avg": [ 539.25, 584.3900131761322 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4472135954999579, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=89359662800224004&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 6, "email": "uni-tuebingen.de;twelvelabs.io;uni-tuebingen.de", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;Twelve Labs", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://twelvelabs.com", "aff_unique_abbr": "Uni T\u00fcbingen;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;United States" }, { "id": "XT9mL5vxX2", "title": "Multitask Learning for Face Forgery Detection: A Joint Embedding Approach", "track": "main", "status": "Reject", "tldr": "", "abstract": "Multitask learning for face forgery detection has experienced impressive successes in recent years. Nevertheless, the semantic relationships among different forgery detection tasks are generally overlooked in previous methods, which weakens knowledge transfer across tasks. Moreover, previously adopted multitask learning schemes require human intervention on allocating model capacity to each task and computing the loss weighting, which is bound to be suboptimal. In this paper, we aim at automated multitask learning for face forgery detection from a joint embedding perspective. We first define a set of coarse-to-fine face forgery detection tasks based on face attributes at different semantic levels. We describe the ground-truth for each task via a textural template, and train two encoders to jointly embed visual face images and textual descriptions in the shared feature space. In such a manner, the semantic closeness between two tasks is manifested as the distance in the learned feature space. Moreover, the capacity of the image encoder can be automatically allocated to each task through end-to-end optimization. Through joint embedding, face forgery detection can be performed by maximizing the feature similarity between the test face image and candidate textual descriptions. Extensive experiments show that the proposed method improves face forgery detection in terms of generalization to novel face manipulations. In addition, our multitask learning method renders some degree of model interpretation by providing human-understandable explanations.", "keywords": "face forgery detection;multitask learning;joint embedding;vision-langauge correspondence", "primary_area": "", "supplementary_material": "/attachment/27e3d026822c9c3d317116d41afcbc79f90610e2.pdf", "author": "Mian ZOU;Baosheng Yu;Yibing Zhan;Kede Ma", "authorids": "~Mian_ZOU1;~Baosheng_Yu1;~Yibing_Zhan2;~Kede_Ma2", "gender": "M;;;M", "homepage": ";https://dr.ntu.edu.sg/cris/rp/rp02563;https://kedema.org/;", "dblp": "277/5749;178/8725;127/1809;142/8486", "google_scholar": "zkE70XAAAAAJ;fjzIdMQAAAAJ;https://scholar.google.com.hk/citations?user=sfzOyFoAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-4306-8980;;0000-0001-8608-1128;", "linkedin": ";;;", "or_profile": "~Mian_ZOU1;~Baosheng_Yu1;~Kede_Ma2;~Yibing_Zhan1", "aff": "City University of Hong Kong;The University of Sydney;City University of Hong Kong;JD Explore Academy", "aff_domain": "cityu.edu.hk;sydney.edu.au;cityu.edu.hk;jd.com", "position": "PhD student;Research Fellow;Assistant Professor;Researcher", "bibtex": "@misc{\nzou2023multitask,\ntitle={Multitask Learning for Face Forgery Detection: A Joint Embedding Approach},\nauthor={Mian ZOU and Baosheng Yu and Yibing Zhan and Kede Ma},\nyear={2023},\nurl={https://openreview.net/forum?id=XT9mL5vxX2}\n}", "github": "", "project": "", "reviewers": "VfHM;jNdm;7SFc;fx6c;NnkA", "site": "https://openreview.net/forum?id=XT9mL5vxX2", "pdf_size": 1311406, "rating": "4;4;5;5;5", "confidence": "4;3;4;4;5", "soundness": "2;2;3;2;2", "novelty": "2;2;3;2;2", "presentation": "2;2;2;3;3", "wc_summary": "103;105;51;52;74", "wc_strengths": "72;67;5;84;73", "wc_weaknesses": "329;128;5;36;67", "wc_questions": "6;81;137;213;79", "wc_limitations": "8;25;8;27;4", "wc_review": "518;406;206;412;297", "wc_reply_reviewers": "179;0;28;18;13", "wc_reply_authors": "0;35;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 4.6, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.2, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 77.0, 23.53720459187964 ], "wc_strengths_avg": [ 60.2, 28.15244216759889 ], "wc_weaknesses_avg": [ 113.0, 115.4036394573412 ], "wc_questions_avg": [ 103.2, 68.88367005321363 ], "wc_limitations_avg": [ 14.4, 9.604165762834375 ], "wc_review_avg": [ 367.8, 106.92502045826319 ], "wc_reply_reviewers_avg": [ 47.6, 66.31621219581227 ], "wc_reply_authors_avg": [ 7.0, 14.0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6454972243679027, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eGD2d7Ck2pgJ:scholar.google.com/&scioq=Multitask+Learning+for+Face+Forgery+Detection:+A+Joint+Embedding+Approach&hl=en&as_sdt=0,3", "gs_version_total": 0, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "City University of Hong Kong;University of Sydney;JD", "aff_unique_dep": ";;JD Explore Academy", "aff_unique_url": "https://www.cityu.edu.hk;https://www.sydney.edu.au;", "aff_unique_abbr": "CityU;USYD;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;Australia;" }, { "title": "Implicit Contrastive Representation Learning with Guided Stop-gradient", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71356", "id": "XUu2GloTXb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6274172f7d981a8d58bbfd52342a9d1f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XUu2GloTXb", "openreview": "https://openreview.net/forum?id=XUu2GloTXb", "poster": "/media/PosterPDFs/NeurIPS%202023/71356.png?t=1700138175.912711", "slides": "https://nips.cc/virtual/2023/poster/71356", "video": "https://nips.cc/virtual/2023/poster/71356", "author_site": "Byeongchan Lee, Sehyun Lee", "tldr": "", "abstract": "In self-supervised representation learning, Siamese networks are a natural architecture for learning transformation-invariance by bringing representations of positive pairs closer together. But it is prone to collapse into a degenerate solution. To address the issue, in contrastive learning, a contrastive loss is used to prevent collapse by moving representations of negative pairs away from each other. But it is known that algorithms with negative sampling are not robust to a reduction in the number of negative samples. So, on the other hand, there are algorithms that do not use negative pairs. Many positive-only algorithms adopt asymmetric network architecture consisting of source and target encoders as a key factor in coping with collapse. By exploiting the asymmetric architecture, we introduce a methodology to implicitly incorporate the idea of contrastive learning. As its implementation, we present a novel method guided stop-gradient. We apply our method to benchmark algorithms SimSiam and BYOL and show that our method stabilizes training and boosts performance. We also show that the algorithms with our method work well with small batch sizes and do not collapse even when there is no predictor. The code is available in the supplementary material.", "keywords": "representation learning;self-supervised learning;contrastive learning", "primary_area": "", "supplementary_material": "/attachment/b224d1c1e780537ac5f20b5249dae50f984b9ae3.zip", "author": "Byeongchan Lee;Sehyun Lee", "authorids": "~Byeongchan_Lee1;~Sehyun_Lee1", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": ";", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@inproceedings{\nlee2023implicit,\ntitle={Implicit Contrastive Representation Learning with Guided Stop-gradient},\nauthor={Byeongchan Lee and Sehyun Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XUu2GloTXb}\n}", "github": "", "project": "", "reviewers": "CW7J;imWm;Wcuk;cNZ6", "pdf_size": 510374, "rating": "6;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "57;89;58;119", "wc_strengths": "37;50;47;38", "wc_weaknesses": "175;209;92;80", "wc_questions": "2;73;21;403", "wc_limitations": "1;14;1;16", "wc_review": "272;435;219;656", "wc_reply_reviewers": "13;147;18;42", "wc_reply_authors": "0;284;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 80.75, 25.557533136044253 ], "wc_strengths_avg": [ 43.0, 5.612486080160912 ], "wc_weaknesses_avg": [ 139.0, 54.51146668362538 ], "wc_questions_avg": [ 124.75, 162.73655858472614 ], "wc_limitations_avg": [ 8.0, 7.035623639735144 ], "wc_review_avg": [ 395.5, 170.16536075241635 ], "wc_reply_reviewers_avg": [ 55.0, 54.23559716643673 ], "wc_reply_authors_avg": [ 71.0, 122.97560733739029 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10452562855464428873&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";", "author_num": 2 }, { "title": "On the Size and Approximation Error of Distilled Datasets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71355", "id": "XWYv4BNShP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c03a9ccdb3e95f2c2dcfc3f4bc16bf42-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XWYv4BNShP", "openreview": "https://openreview.net/forum?id=XWYv4BNShP", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71355", "video": "https://nips.cc/virtual/2023/poster/71355", "author_site": "Alaa Maalouf, Murad Tukan, Noel Loo, Ramin Hasani, Mathias Lechner, Daniela Rus", "tldr": "", "abstract": "Dataset Distillation is the task of synthesizing small datasets from large ones while still retaining comparable predictive accuracy to the original uncompressed dataset. Despite significant empirical progress in recent years, there is little understanding of the theoretical limitations/guarantees of dataset distillation, specifically, what excess risk is achieved by distillation compared to the original dataset, and how large are distilled datasets? In this work, we take a theoretical view on kernel ridge regression (KRR) based methods of dataset distillation such as Kernel Inducing Points. By transforming ridge regression in random Fourier features (RFF) space, we provide the first proof of the existence of small (size) distilled datasets and their corresponding excess risk for shift-invariant kernels. We prove that a small set of instances exists in the original input space such that its solution in the RFF space coincides with the solution of the original data. We further show that a KRR solution can be generated using this distilled set of instances which gives an approximation towards the KRR solution optimized on the full input data. The size of this set is linear in the dimension of the RFF space of the input set or alternatively near linear in the number of effective degrees of freedom, which is a function of the kernel, number of data points, and the regularization parameter $\\lambda$. The error bound of this distilled set is also a function of $\\lambda$. We verify our bounds analytically and empirically.", "keywords": "Dataset Distillation;Size and Approximation Error", "primary_area": "", "supplementary_material": "/attachment/8316adcde5cc1bb681c3f8135dd02843bfe3ce06.zip", "author": "Alaa Maalouf;Murad Tukan;Noel Loo;Ramin Hasani;Mathias Lechner;Daniela Rus", "authorids": "~Alaa_Maalouf1;~Murad_Tukan1;~Noel_Loo1;~Ramin_Hasani1;~Mathias_Lechner1;~Daniela_Rus1", "gender": "M;M;;Unspecified;F;M", "homepage": ";;https://yolky.github.io/;https://mlech26l.github.io/pages/;https://www.csail.mit.edu/person/daniela-rus;http://www.raminhasani.com", "dblp": "242/8928.html;259/0724;279/6288;209/9862;r/DanielaRus;190/3168", "google_scholar": "https://scholar.google.com/citations?hl=en;;vokGv-gAAAAJ;https://scholar.google.at/citations?hl=en;https://scholar.google.com/citations?hl=en;https://scholar.google.at/citations?user=YarJF3QAAAAJ", "orcid": ";;;;;0000-0002-9889-5222", "linkedin": "alaa-maalouf/?originalSubdomain=il;;noel-loo-23a2a112b;;;raminhasani/", "or_profile": "~Alaa_Maalouf1;~Murad_Tukan1;~Noel_Loo1;~Mathias_Lechner1;~Daniela_Rus1;~Ramin_M._Hasani1", "aff": "Massachusetts Institute of Technology;Dataheroes;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;dataheroes.ai;mit.edu;mit.edu;mit.edu;mit.edu", "position": "Postdoc;Researcher;PhD student;Postdoc;Full Professor;Researcher", "bibtex": "@inproceedings{\nmaalouf2023on,\ntitle={On the Size and Approximation Error of Distilled Datasets},\nauthor={Alaa Maalouf and Murad Tukan and Noel Loo and Ramin Hasani and Mathias Lechner and Daniela Rus},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XWYv4BNShP}\n}", "github": "", "project": "", "reviewers": "BMB1;uCrm;mfyC;piM1", "pdf_size": 12651109, "rating": "4;5;5;6", "confidence": "2;4;2;3", "soundness": "2;3;3;3", "novelty": "2;2;3;2", "presentation": "1;3;3;3", "wc_summary": "46;44;88;56", "wc_strengths": "51;55;73;51", "wc_weaknesses": "133;308;90;71", "wc_questions": "67;44;36;2", "wc_limitations": "7;9;18;2", "wc_review": "304;460;305;182", "wc_reply_reviewers": "773;957;25;0", "wc_reply_authors": "2545;2553;0;0", "reply_reviewers": "4;5;1;0", "reply_authors": "5;9;1;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 58.5, 17.628102563804195 ], "wc_strengths_avg": [ 57.5, 9.096702699330127 ], "wc_weaknesses_avg": [ 150.5, 93.66562870124771 ], "wc_questions_avg": [ 37.25, 23.31710745354149 ], "wc_limitations_avg": [ 9.0, 5.787918451395113 ], "wc_review_avg": [ 312.75, 98.63410921177318 ], "wc_reply_reviewers_avg": [ 438.75, 431.2762310862958 ], "wc_reply_authors_avg": [ 1274.5, 1274.5031384818164 ], "reply_reviewers_avg": [ 2.5, 2.0615528128088303 ], "reply_authors_avg": [ 4.0, 3.3166247903554 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5445287843878307715&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 5, "email": "mit.edu;dataheroes.ai;mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Dataheroes", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;", "aff_unique_abbr": "MIT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Have it your way: Individualized Privacy Assignment for DP-SGD", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71354", "id": "XXPzBhOs4f", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3cbf627fa24fb6cb576e04e689b9428b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XXPzBhOs4f", "openreview": "https://openreview.net/forum?id=XXPzBhOs4f", "poster": "/media/PosterPDFs/NeurIPS%202023/71354.png?t=1701452119.6977224", "slides": "https://nips.cc/virtual/2023/poster/71354", "video": "https://nips.cc/virtual/2023/poster/71354", "author_site": "Franziska Boenisch, Christopher M\u00fchl, Adam Dziedzic, Roy Rinberg, Nicolas Papernot", "tldr": "", "abstract": "When training a machine learning model with differential privacy, one sets a privacy budget. This uniform budget represents an overall maximal privacy violation that any user is willing to face by contributing their data to the training set. We argue that this approach is limited because different users may have different privacy expectations. Thus, setting a uniform privacy budget across all points may be overly conservative for some users or, conversely, not sufficiently protective for others. In this paper, we capture these preferences through individualized privacy budgets. To demonstrate their practicality, we introduce a variant of Differentially Private Stochastic Gradient Descent (DP-SGD) which supports such individualized budgets. DP-SGD is the canonical approach to training models with differential privacy. We modify its data sampling and gradient noising mechanisms to arrive at our approach, which we call Individualized DP-SGD (IDP-SGD). Because IDP-SGD provides privacy guarantees tailored to the preferences of individual users and their data points, we empirically find it to improve privacy-utility trade-offs.", "keywords": "privacy;machine learning;differential privacy;DP-SGD;individualized privacy", "primary_area": "", "supplementary_material": "/attachment/d3485e03f852e8989a27512f7842cd6b3a865f2d.zip", "author": "Franziska Boenisch;Christopher M\u00fchl;Adam Dziedzic;Roy Rinberg;Nicolas Papernot", "authorids": "~Franziska_Boenisch2;~Christopher_M\u00fchl1;~Adam_Dziedzic1;~Roy_Rinberg1;~Nicolas_Papernot1", "gender": ";M;;M;M", "homepage": ";https://www.mi.fu-berlin.de/inf/groups/ag-idm/members/3_Mitarbeiter_innen/Christopher-Muehl;;https://www.royrinberg.com;https://www.papernot.fr", "dblp": ";;;286/5605;162/1405", "google_scholar": ";-Gt4kuUAAAAJ;;https://scholar.google.com/citations?;cGxq0cMAAAAJ", "orcid": ";;;0000-0002-0937-2236;", "linkedin": ";christopher-m%C3%BChl-644251133;;;nicolaspapernot", "or_profile": "~Franziska_Boenisch2;~Christopher_M\u00fchl1;~Adam_Dziedzic1;~Roy_Rinberg1;~Nicolas_Papernot1", "aff": ";Freie Universit\u00e4t Berlin;;Columbia University;Google", "aff_domain": ";fu-berlin.de;;columbia.edu;google.com", "position": ";Researcher;;MS student;Research Scientist", "bibtex": "@inproceedings{\nboenisch2023have,\ntitle={Have it your way: Individualized Privacy Assignment for {DP}-{SGD}},\nauthor={Franziska Boenisch and Christopher M{\\\"u}hl and Adam Dziedzic and Roy Rinberg and Nicolas Papernot},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XXPzBhOs4f}\n}", "github": "", "project": "", "reviewers": "Sruy;r49G;7px6;3QXJ", "pdf_size": 2947481, "rating": "5;6;6;6", "confidence": "4;4;3;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;4;4;3", "wc_summary": "112;50;65;404", "wc_strengths": "41;83;122;242", "wc_weaknesses": "478;204;381;610", "wc_questions": "249;2;118;158", "wc_limitations": "4;0;113;96", "wc_review": "884;339;799;1510", "wc_reply_reviewers": "20;39;113;0", "wc_reply_authors": "317;27;354;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;3;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 157.75, 144.00065104019495 ], "wc_strengths_avg": [ 122.0, 74.9699939975988 ], "wc_weaknesses_avg": [ 418.25, 148.01076818934493 ], "wc_questions_avg": [ 131.75, 88.68589233919903 ], "wc_limitations_avg": [ 53.25, 51.62061119359204 ], "wc_review_avg": [ 883.0, 417.16962976707686 ], "wc_reply_reviewers_avg": [ 43.0, 42.70245894559235 ], "wc_reply_authors_avg": [ 174.5, 161.81239136728684 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15682983969590160336&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": ";fu-berlin.de;;columbia.edu;google.com", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Freie Universit\u00e4t Berlin;Columbia University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.fu-berlin.de;https://www.columbia.edu;https://www.google.com", "aff_unique_abbr": "FU Berlin;Columbia;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Germany;United States" }, { "title": "Learning-to-Rank Meets Language: Boosting Language-Driven Ordering Alignment for Ordinal Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71353", "id": "XXagS1RQH0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f2a11632520f4b7473d7838f074a7d25-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XXagS1RQH0", "openreview": "https://openreview.net/forum?id=XXagS1RQH0", "poster": "/media/PosterPDFs/NeurIPS%202023/71353.png?t=1699281369.8919165", "slides": "https://nips.cc/virtual/2023/poster/71353", "video": "https://nips.cc/virtual/2023/poster/71353", "author_site": "Rui Wang, Peipei Li, Huaibo Huang, Chunshui Cao, Ran He, Zhaofeng He", "tldr": "", "abstract": "We present a novel language-driven ordering alignment method for ordinal classification. The labels in ordinal classification contain additional ordering relations, making them prone to overfitting when relying solely on training data. Recent developments in pre-trained vision-language models inspire us to leverage the rich ordinal priors in human language by converting the original task into a vision-language alignment task. Consequently, we propose L2RCLIP, which fully utilizes the language priors from two perspectives. First, we introduce a complementary prompt tuning technique called RankFormer, designed to enhance the ordering relation of original rank prompts. It employs token-level attention with residual-style prompt blending in the word embedding space. Second, to further incorporate language priors, we revisit the approximate bound optimization of vanilla cross-entropy loss and restructure it within the cross-modal embedding space. Consequently, we propose a cross-modal ordinal pairwise loss to refine the CLIP feature space, where texts and images maintain both semantic alignment and ordering alignment. Extensive experiments on three ordinal classification tasks, including facial age estimation, historical color image (HCI) classification, and aesthetic assessment demonstrate its promising performance.", "keywords": "Ordinal Classification;Representation Learning;Vision-Language;Prompt Learning", "primary_area": "", "supplementary_material": "/attachment/d6abfb071abddf9660642ef33ad851db5e0ae60d.pdf", "author": "Rui Wang;Pei Pei Li;Huaibo Huang;Chunshui Cao;Ran He;Zhaofeng He", "authorids": "~Rui_Wang33;~Pei_Pei_Li2;~Huaibo_Huang1;~Chunshui_Cao2;~Ran_He1;~Zhaofeng_He1", "gender": "M;F;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=fBLvVPwAAAAJ&hl=en;;https://people.ucas.edu.cn/~huanghuaibo;;https://rhe-web.github.io/;https://teacher.bupt.edu.cn/zhaofenghe/zh_CN/index.htm", "dblp": ";;211/7251.html;176/1432;61/6198-1;13/3992", "google_scholar": "fBLvVPwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;XMvLciUAAAAJ;GtwD2CUAAAAJ;ayrg9AUAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN", "orcid": ";;0000-0001-5866-2283;;0000-0002-3807-991X;0000-0002-3433-8435", "linkedin": ";;;;;", "or_profile": "~Rui_Wang33;~Pei_Pei_Li2;~Huaibo_Huang1;~Chunshui_Cao2;~Ran_He1;~Zhaofeng_He1", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Institute of Automation, Chinese Academy of Sciences;Watrix Technology;Institute of Automation, Chinese Academy of Sciences;Beijing University of Post and Telecommunication", "aff_domain": "bupt.edu.cn;bupt.edu.cn;ia.ac.cn;watrix.ai;ia.ac.cn;bupt.edu.cn", "position": "MS student;Assistant Professor;Associate Professor;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023learningtorank,\ntitle={Learning-to-Rank Meets Language: Boosting Language-Driven Ordering Alignment for Ordinal Classification},\nauthor={Rui Wang and Pei Pei Li and Huaibo Huang and Chunshui Cao and Ran He and Zhaofeng He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XXagS1RQH0}\n}", "github": "", "project": "", "reviewers": "yU8S;VQC9;GKDm;v94e;XrDm;6B9T", "pdf_size": 4267517, "rating": "3;4;5;6;6;7", "confidence": "4;4;3;2;3;5", "soundness": "2;3;3;3;3;2", "novelty": "2;2;3;3;2;2", "presentation": "1;2;3;3;2;2", "wc_summary": "72;60;72;93;91;106", "wc_strengths": "39;21;36;43;65;88", "wc_weaknesses": "298;292;120;1;154;232", "wc_questions": "2;16;182;1;50;87", "wc_limitations": "2;1;1;1;25;10", "wc_review": "413;390;411;139;385;523", "wc_reply_reviewers": "29;164;8;0;0;227", "wc_reply_authors": "81;652;0;0;0;372", "reply_reviewers": "1;1;1;0;0;2", "reply_authors": "3;3;1;1;1;2", "rating_avg": [ 5.166666666666667, 1.3437096247164249 ], "confidence_avg": [ 3.5, 0.9574271077563381 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.1666666666666665, 0.6871842709362768 ], "wc_summary_avg": [ 82.33333333333333, 15.606266547626168 ], "wc_strengths_avg": [ 48.666666666666664, 21.853044537445015 ], "wc_weaknesses_avg": [ 182.83333333333334, 104.43565270325817 ], "wc_questions_avg": [ 56.333333333333336, 63.78784279852148 ], "wc_limitations_avg": [ 6.666666666666667, 8.806563209081938 ], "wc_review_avg": [ 376.8333333333333, 115.93448820586373 ], "wc_reply_reviewers_avg": [ 71.33333333333333, 90.18437903662819 ], "wc_reply_authors_avg": [ 184.16666666666666, 247.1655428160559 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.6871842709362768 ], "reply_authors_avg": [ 1.8333333333333333, 0.8975274678557508 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.06477502756312957, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16563645644585658094&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "bupt.edu.cn;bupt.edu.cn;ia.ac.cn;watrix.ai;ia.ac.cn;bupt.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;1;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Chinese Academy of Sciences;Watrix Technology", "aff_unique_dep": ";Institute of Automation;", "aff_unique_url": "http://www.bupt.edu.cn/;http://www.ia.cas.cn;", "aff_unique_abbr": "BUPT;CAS;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Binary Radiance Fields", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71352", "id": "XY6BnwIh4q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aebf6284fe85a8f44b4785d41bc8249a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XY6BnwIh4q", "openreview": "https://openreview.net/forum?id=XY6BnwIh4q", "poster": "/media/PosterPDFs/NeurIPS%202023/71352.png?t=1701667719.1940148", "slides": "https://nips.cc/virtual/2023/poster/71352", "video": "https://nips.cc/virtual/2023/poster/71352", "author_site": "Seungjoo Shin, Jaesik Park", "tldr": "", "abstract": "In this paper, we propose \\textit{binary radiance fields} (BiRF), a storage-efficient radiance field representation employing binary feature encoding in a format of either $+1$ or $-1$. This binarization strategy lets us represent the feature grid with highly compact feature encoding and a dramatic reduction in storage size. Furthermore, our 2D-3D hybrid feature grid design enhances the compactness of feature encoding as the 3D grid includes main components while 2D grids capture details. In our experiments, binary radiance field representation successfully outperforms the reconstruction performance of state-of-the-art (SOTA) storage-efficient radiance field models with lower storage allocation. In particular, our model achieves impressive results in static scene reconstruction, with a PSNR of 32.03 dB for Synthetic-NeRF scenes, 34.48 dB for Synthetic-NSVF scenes, 28.20 dB for Tanks and Temples scenes while only utilizing 0.5 MB of storage space, respectively. We hope the proposed binary radiance field representation will make radiance fields more accessible without a storage bottleneck.", "keywords": "neural radiance fields;inverse rendering;binarization", "primary_area": "", "supplementary_material": "/attachment/8a4a333d38e0992211f9fbbab99217e8710a06e8.zip", "author": "Seungjoo Shin;Jaesik Park", "authorids": "~Seungjoo_Shin1;~Jaesik_Park3", "gender": "M;M", "homepage": "https://seungjooshin.github.io;http://jaesik.info", "dblp": "327/6336;00/10336", "google_scholar": "io7PSDIAAAAJ;_3q6KBIAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Seungjoo_Shin1;~Jaesik_Park3", "aff": "Pohang University of Science and Technology;Pohang University of Science and Technology", "aff_domain": "postech.ac.kr;postech.edu", "position": "MS student;Associate Professor", "bibtex": "@inproceedings{\nshin2023binary,\ntitle={Binary Radiance Fields},\nauthor={Seungjoo Shin and Jaesik Park},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XY6BnwIh4q}\n}", "github": "", "project": "", "reviewers": "Bcw2;ptCH;EsYg;fKiM;VKML", "pdf_size": 2289964, "rating": "4;6;6;7;8", "confidence": "5;5;4;5;4", "soundness": "3;3;3;3;4", "novelty": "2;3;2;3;3", "presentation": "3;3;3;4;4", "wc_summary": "121;129;207;145;118", "wc_strengths": "11;236;123;75;113", "wc_weaknesses": "55;328;126;62;98", "wc_questions": "2;114;68;90;55", "wc_limitations": "1;15;1;29;13", "wc_review": "190;822;525;401;397", "wc_reply_reviewers": "0;16;92;17;41", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 1.32664991614216 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 144.0, 32.863353450309965 ], "wc_strengths_avg": [ 111.6, 73.57608307051959 ], "wc_weaknesses_avg": [ 133.8, 100.41991834292637 ], "wc_questions_avg": [ 65.8, 37.68501028260441 ], "wc_limitations_avg": [ 11.8, 10.4 ], "wc_review_avg": [ 467.0, 207.5446939818024 ], "wc_reply_reviewers_avg": [ 33.2, 32.18322544432115 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.492365963917331, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4809961589571644431&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "postech.ac.kr;postech.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Pohang University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.postech.ac.kr", "aff_unique_abbr": "POSTECH", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pohang", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Benchmarking Distribution Shift in Tabular Data with TableShift", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73561", "id": "XYxNklOMMX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a76a757ed479a1e6a5f8134bea492f83-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=XYxNklOMMX", "openreview": "https://openreview.net/forum?id=XYxNklOMMX", "poster": "/media/PosterPDFs/NeurIPS%202023/73561.png?t=1702072778.345695", "slides": "https://nips.cc/virtual/2023/poster/73561", "video": "https://nips.cc/virtual/2023/poster/73561", "author_site": "Josh Gardner, Zoran Popovic, Ludwig Schmidt", "tldr": "", "abstract": "Robustness to distribution shift has become a growing concern for text and image models as they transition from research subjects to deployment in the real world. However, high-quality benchmarks for distribution shift in tabular machine learning tasks are still lacking despite the widespread real-world use of tabular data and differences in the models used for tabular data in comparison to text and images. As a consequence, the robustness of tabular models to distribution shift is poorly understood. To address this issue, we introduce TableShift, a distribution shift benchmark for tabular data. TableShift contains 15 binary classification tasks in total, each with an associated shift, and includes a diverse set of data sources, prediction targets, and distribution shifts. The benchmark covers domains including finance, education, public policy, healthcare, and civic participation, and is accessible using only a few lines of Python code via the TableShift API. We conduct a large-scale study comparing several state-of-the-art tabular data models alongside robust learning and domain generalization methods on the benchmark tasks. Our study demonstrates (1) a linear trend between in-distribution (ID) and out-of-distribution (OOD) accuracy; (2) domain robustness methods can reduce shift gaps but at the cost of reduced ID accuracy; (3) a strong relationship between shift gap (difference between ID and OOD performance) and shifts in the label distribution. The benchmark data, Python package, model implementations, and more information about TableShift are available at https://github.com/mlfoundations/tableshift and https://tableshift.org .", "keywords": "tabular data;benchmarking;distribution shift;robustness", "primary_area": "", "supplementary_material": "/attachment/c8b4f2c86e00a79b500c9b3b06bf96ddb0b4eaa0.pdf", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\ngardner2023benchmarking,\ntitle={Benchmarking Distribution Shift in Tabular Data with TableShift},\nauthor={Joshua P Gardner and Zoran Popovi and Ludwig Schmidt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=XYxNklOMMX}\n}", "github": "", "project": "", "reviewers": "yyVo;9358;ZSf5;XmM2;dJDE", "pdf_size": 2138963, "rating": "3;6;7;7;8", "confidence": "5;4;5;3;4", "wc_summary_and_contributions": "97;48;106;116;59", "wc_strengths": "58;49;70;15;70", "wc_improvement": "552;44;97;188;99", "wc_limitations": "57;7;40;10;19", "wc_correctness": "63;1;15;7;6", "wc_clarity": "5;6;18;1;70", "wc_relation_to_prior_work": "10;9;21;8;7", "wc_documentation": "10;5;84;12;4", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "853;170;452;358;335", "wc_reply_reviewers": "1231;0;52;0;46", "wc_reply_authors": "4968;491;390;664;422", "reply_reviewers": "4;0;1;0;2", "reply_authors": "11;2;2;1;3", "rating_avg": [ 6.2, 1.7204650534085253 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 85.2, 26.79850742112329 ], "wc_strengths_avg": [ 52.4, 20.30369424513677 ], "wc_improvement_avg": [ 196.0, 183.90976048051394 ], "wc_limitations_avg": [ 26.6, 19.085072700935672 ], "wc_correctness_avg": [ 18.4, 22.747307532980688 ], "wc_clarity_avg": [ 20.0, 25.635912310662945 ], "wc_relation_to_prior_work_avg": [ 11.0, 5.0990195135927845 ], "wc_documentation_avg": [ 23.0, 30.646370095004727 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 433.6, 228.54198738962606 ], "wc_reply_reviewers_avg": [ 265.8, 483.1009832322845 ], "wc_reply_authors_avg": [ 1387.0, 1793.0052983747705 ], "reply_reviewers_avg": [ 1.4, 1.4966629547095764 ], "reply_authors_avg": [ 3.8, 3.6551333764994136 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": -0.49709581280096005, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5774135952061296277&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "", "author_num": 1 }, { "title": "Realistic Synthetic Financial Transactions for Anti-Money Laundering Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73560", "id": "XZf2bnMBag", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5f38404edff6f3f642d6fa5892479c42-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=XZf2bnMBag", "openreview": "https://openreview.net/forum?id=XZf2bnMBag", "poster": "/media/PosterPDFs/NeurIPS%202023/73560.png?t=1698937574.3200517", "slides": "https://nips.cc/virtual/2023/poster/73560", "video": "https://nips.cc/virtual/2023/poster/73560", "author_site": "Erik Altman, Jovan Blanu\u0161a, Luc von Niederh\u00e4usern, Beni Egressy, Andreea Anghel, Kubilay Atasu", "tldr": "", "abstract": "With the widespread digitization of finance and the increasing popularity of cryptocurrencies, the sophistication of fraud schemes devised by cybercriminals is growing. Money laundering -- the movement of illicit funds to conceal their origins -- can cross bank and national boundaries, producing complex transaction patterns. The UN estimates 2-5\\% of global GDP or \\$0.8 - \\$2.0 trillion dollars are laundered globally each year. Unfortunately, real data to train machine learning models to detect laundering is generally not available, and previous synthetic data generators have had significant shortcomings. A realistic, standardized, publicly-available benchmark is needed for comparing models and for the advancement of the area.\n\nTo this end, this paper contributes a synthetic financial transaction dataset generator and a set of synthetically generated AML (Anti-Money Laundering) datasets. We have calibrated this agent-based generator to match real transactions as closely as possible and made the datasets public. We describe the generator in detail and demonstrate how the datasets generated can help compare different machine learning models in terms of their AML abilities. In a key way, using synthetic data in these comparisons can be even better than using real data: the ground truth labels are complete, whilst many laundering transactions in real data are never detected.", "keywords": "Synthetic Data;Anti Money Laundering;Multi-Agent;Virtual World;GNN", "primary_area": "", "supplementary_material": "/attachment/36688a4717d93e8d911b213906b4cb833e815c00.pdf", "author": "Erik Altman;Jovan Blanu\u0161a;Luc Von Niederh\u00e4usern;Beni Egressy;Andreea Anghel;Kubilay Atasu", "authorids": "~Erik_Altman1;~Jovan_Blanu\u0161a1;~Luc_Von_Niederh\u00e4usern1;~Beni_Egressy1;~Andreea_Anghel1;~Kubilay_Atasu1", "gender": ";M;M;Not Specified;F;", "homepage": "https://researcher.watson.ibm.com/researcher/view.php?person=us-ealtman;;;https://disco.ethz.ch/members/begressy;https://researcher.watson.ibm.com/researcher/view.php?person=zurich-AAN;", "dblp": ";271/6528;;274/2052;57/10370;11/3460", "google_scholar": "E2kFvGAAAAAJ;0kArThIAAAAJ;;tTYBrDEAAAAJ;https://scholar.google.ch/citations?user=gT8Xhk4AAAAJ;", "orcid": ";0000-0003-4915-6551;;;;", "linkedin": ";jovan-blanusa/;luc-von-niederhaeusern/;;https://ch.linkedin.com/in/andreeaanghel;", "or_profile": "~Erik_Altman1;~Jovan_Blanu\u0161a1;~Luc_Von_Niederh\u00e4usern1;~Beni_Egressy1;~Andreea_Anghel1;~Kubilay_Atasu1", "aff": "International Business Machines;EPFL - EPF Lausanne;IBM Research Zurich;International Business Machines;International Business Machines;International Business Machines", "aff_domain": "ibm.com;epfl.ch;research.ibm.com;ibm.com;ibm.com;ibm.com", "position": "Researcher;PhD student;Intern;Intern;Researcher;Research Scientist", "bibtex": "@inproceedings{\naltman2023realistic,\ntitle={Realistic Synthetic Financial Transactions for Anti-Money Laundering Models},\nauthor={Erik Altman and Jovan Blanu{\\v{s}}a and Luc Von Niederh{\\\"a}usern and Beni Egressy and Andreea Anghel and Kubilay Atasu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=XZf2bnMBag}\n}", "github": "", "project": "", "reviewers": "w9iG;jAyB;iXyM", "pdf_size": 2160433, "rating": "4;6;7", "confidence": "5;3;3", "wc_summary_and_contributions": "47;89;107", "wc_strengths": "37;107;114", "wc_improvement": "60;158;85", "wc_limitations": "3;7;6", "wc_correctness": "8;5;31", "wc_clarity": "7;6;6", "wc_relation_to_prior_work": "12;14;43", "wc_documentation": "25;1;32", "wc_additional_feedback": "1;1;1", "wc_review": "200;388;425", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 81.0, 25.13961017995307 ], "wc_strengths_avg": [ 86.0, 34.76588366008646 ], "wc_improvement_avg": [ 101.0, 41.57723736212721 ], "wc_limitations_avg": [ 5.333333333333333, 1.699673171197595 ], "wc_correctness_avg": [ 14.666666666666666, 11.61416759345623 ], "wc_clarity_avg": [ 6.333333333333333, 0.4714045207910317 ], "wc_relation_to_prior_work_avg": [ 23.0, 14.165686240583852 ], "wc_documentation_avg": [ 19.333333333333332, 13.274871834493252 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 337.6666666666667, 98.51001077160747 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.944911182523068, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11222330923762944971&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 11, "email": "ibm.com;epfl.ch;research.ibm.com;ibm.com;ibm.com;ibm.com", "author_num": 6, "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "International Business Machines Corporation;EPFL;IBM", "aff_unique_dep": ";;IBM Research", "aff_unique_url": "https://www.ibm.com;https://www.epfl.ch;https://www.ibm.com/research", "aff_unique_abbr": "IBM;EPFL;IBM", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Lausanne;Zurich", "aff_country_unique_index": "0;1;1;0;0;0", "aff_country_unique": "United States;Switzerland" }, { "title": "Provable Training for Graph Contrastive Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71351", "id": "Xasl21tSOf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9d75de47462ffe77addaa7b985fc6d8e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Xasl21tSOf", "openreview": "https://openreview.net/forum?id=Xasl21tSOf", "poster": "/media/PosterPDFs/NeurIPS%202023/71351.png?t=1701781431.7735333", "slides": "https://nips.cc/virtual/2023/poster/71351", "video": "https://nips.cc/virtual/2023/poster/71351", "author_site": "Yue Yu, Xiao Wang, Mengmei Zhang, Nian Liu, Chuan Shi", "tldr": "", "abstract": "Graph Contrastive Learning (GCL) has emerged as a popular training approach for learning node embeddings from augmented graphs without labels. Despite the key principle that maximizing the similarity between positive node pairs while minimizing it between negative node pairs is well established, some fundamental problems are still unclear. Considering the complex graph structure, are some nodes consistently well-trained and following this principle even with different graph augmentations? Or are there some nodes more likely to be untrained across graph augmentations and violate the principle? How to distinguish these nodes and further guide the training of GCL? To answer these questions, we first present experimental evidence showing that the training of GCL is indeed imbalanced across all nodes. To address this problem, we propose the metric \"node compactness\", which is the lower bound of how a node follows the GCL principle related to the range of augmentations. We further derive the form of node compactness theoretically through bound propagation, which can be integrated into binary cross-entropy as a regularization. To this end, we propose the PrOvable Training (POT) for GCL, which regularizes the training of GCL to encode node embeddings that follows the GCL principle better. Through extensive experiments on various benchmarks, POT consistently improves the existing GCL approaches, serving as a friendly plugin.", "keywords": "Graph Contrastive Learning;Graph Neural Networks;Bound Propagation", "primary_area": "", "supplementary_material": "", "author": "Yue Yu;Xiao Wang;Mengmei Zhang;Nian Liu;Chuan Shi", "authorids": "~Yue_Yu7;~Xiao_Wang2;~Mengmei_Zhang1;~Nian_Liu3;~Chuan_Shi1", "gender": "M;M;F;M;M", "homepage": ";https://wangxiaocs.github.io/;;https://liun-online.github.io/;http://www.shichuan.org/", "dblp": ";49/67-17;https://dblp.uni-trier.de/pid/234/4670;;64/3041-1", "google_scholar": "QK5fLVcAAAAJ;MnzarAQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;Tx8vRjUAAAAJ;tUq_v90AAAAJ", "orcid": "0009-0000-2428-6005;0000-0002-4444-7811;0000-0002-4581-0977;0009-0000-8378-1129;0000-0002-3734-0266", "linkedin": ";;;;", "or_profile": "~Yue_Yu7;~Xiao_Wang2;~Mengmei_Zhang1;~Nian_Liu3;~Chuan_Shi1", "aff": "Beijing University of Posts and Telecommunications;Beihang University;Beijing University of Posts and Telecommunications;Beijing University of Post and Telecommunication;Beijing University of Post and Telecommunication", "aff_domain": "bupt.edu.cn;buaa.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "position": "MS student;Associate Professor;PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nyu2023provable,\ntitle={Provable Training for Graph Contrastive Learning},\nauthor={Yue Yu and Xiao Wang and Mengmei Zhang and Nian Liu and Chuan Shi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Xasl21tSOf}\n}", "github": "", "project": "", "reviewers": "Kesd;QmjC;ynwF;AXri", "pdf_size": 1382066, "rating": "7;7;7;8", "confidence": "5;4;5;4", "soundness": "3;4;4;3", "novelty": "4;4;4;4", "presentation": "3;3;3;3", "wc_summary": "56;94;78;62", "wc_strengths": "20;44;53;39", "wc_weaknesses": "94;22;57;100", "wc_questions": "4;80;127;2", "wc_limitations": "1;1;1;15", "wc_review": "175;241;316;218", "wc_reply_reviewers": "9;0;0;27", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 4.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.5, 14.79019945774904 ], "wc_strengths_avg": [ 39.0, 12.062338081814818 ], "wc_weaknesses_avg": [ 68.25, 31.371762781201824 ], "wc_questions_avg": [ 53.25, 52.93096919573644 ], "wc_limitations_avg": [ 4.5, 6.06217782649107 ], "wc_review_avg": [ 237.5, 51.13951505440778 ], "wc_reply_reviewers_avg": [ 9.0, 11.022703842524301 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5401723996291157290&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "bupt.edu.cn;buaa.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Beihang University", "aff_unique_dep": ";", "aff_unique_url": "http://www.bupt.edu.cn/;http://www.buaa.edu.cn/", "aff_unique_abbr": "BUPT;BUAA", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Making Scalable Meta Learning Practical", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71350", "id": "Xazhn0JoNx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/531998dc1fc858b5857a90b74d96ecab-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Xazhn0JoNx", "openreview": "https://openreview.net/forum?id=Xazhn0JoNx", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71350", "video": "https://nips.cc/virtual/2023/poster/71350", "author_site": "Sang Choe, Sanket Vaibhav Mehta, Sanket Vaibhav Mehta, Hwijeen Ahn, Willie Neiswanger, Pengtao Xie, Pengtao Xie, Emma Strubell, Eric Xing", "tldr": "", "abstract": "Despite its flexibility to learn diverse inductive biases in machine learning programs, meta learning (i.e.,\\ learning to learn) has long been recognized to suffer from poor scalability due to its tremendous compute/memory costs, training instability, and a lack of efficient distributed training support. In this work, we focus on making scalable meta learning practical by introducing SAMA, which combines advances in both implicit differentiation algorithms and systems. Specifically, SAMA is designed to flexibly support a broad range of adaptive optimizers in the base level of meta learning programs, while reducing computational burden by avoiding explicit computation of second-order gradient information, and exploiting efficient distributed training techniques implemented for first-order gradients. Evaluated on multiple large-scale meta learning benchmarks, SAMA showcases up to 1.7/4.8x increase in throughput and 2.0/3.8x decrease in memory consumption respectively on single-/multi-GPU setups compared to other baseline meta learning algorithms. Furthermore, we show that SAMA-based data optimization leads to consistent improvements in text classification accuracy with BERT and RoBERTa large language models, and achieves state-of-the-art results in both small- and large-scale data pruning on image classification tasks, demonstrating the practical applicability of scalable meta learning across language and vision domains.", "keywords": "meta learning;bilevel optimization;large-scale learning;implicit differentiation", "primary_area": "", "supplementary_material": "/attachment/b1e4649d3e18503abf067e3abf8e74ae805fa646.zip", "author": "Sang Keun Choe;Sanket Vaibhav Mehta;Hwijeen Ahn;Willie Neiswanger;Pengtao Xie;Emma Strubell;Eric Xing", "authorids": "~Sang_Keun_Choe1;~Sanket_Vaibhav_Mehta2;~Hwijeen_Ahn1;~Willie_Neiswanger2;~Pengtao_Xie3;~Emma_Strubell1;~Eric_Xing1", "gender": ";M;M;M;M;Non-Binary;M", "homepage": ";https://sanketvmehta.github.io;https://hwijeen.github.io;https://willieneis.github.io/;https://pengtaoxie.github.io/;http://strubell.github.io;http://www.cs.cmu.edu/~epxing/", "dblp": ";225/7804;238/6223;120/7593.html;133/1998;153/2253;36/3855", "google_scholar": ";H4pn-ogAAAAJ;https://scholar.google.com/citations?hl=en;QwKHApEAAAAJ;cnncomYAAAAJ;UCDMtM0AAAAJ;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ", "orcid": ";0000-0003-1809-4685;;;;;", "linkedin": ";sanketvmehta/;hwijeen-ahn-b323b1142/;;;;", "or_profile": "~Sang_Keun_Choe1;~Sanket_Vaibhav_Mehta2;~Hwijeen_Ahn1;~Willie_Neiswanger2;~Pengtao_Xie3;~Emma_Strubell1;~Eric_Xing1", "aff": ";Carnegie Mellon University;Carnegie Mellon University;Stanford University;Carnegie Mellon University;Allen Institute for Artificial Intelligence;School of Computer Science, Carnegie Mellon University", "aff_domain": ";cmu.edu;cmu.edu;stanford.edu; ;allenai.org;cs.cmu.edu", "position": ";PhD student;MS student;Postdoc;Graduate Student;Visiting Researcher;Full Professor", "bibtex": "@inproceedings{\nchoe2023making,\ntitle={Making Scalable Meta Learning Practical},\nauthor={Sang Keun Choe and Sanket Vaibhav Mehta and Hwijeen Ahn and Willie Neiswanger and Pengtao Xie and Emma Strubell and Eric Xing},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Xazhn0JoNx}\n}", "github": "", "project": "", "reviewers": "fPdM;BWd8;2sgt;TSYz;YbqA", "pdf_size": 533277, "rating": "5;5;5;6;6", "confidence": "4;3;3;3;3", "soundness": "2;2;3;3;2", "novelty": "2;2;2;3;3", "presentation": "3;2;3;3;3", "wc_summary": "81;91;59;75;63", "wc_strengths": "63;47;165;74;57", "wc_weaknesses": "158;103;103;186;205", "wc_questions": "63;37;184;4;110", "wc_limitations": "3;1;42;1;1", "wc_review": "368;279;553;340;436", "wc_reply_reviewers": "18;0;16;0;65", "wc_reply_authors": "26;0;16;0;114", "reply_reviewers": "1;0;1;0;1", "reply_authors": "2;1;2;1;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 73.8, 11.702991070662236 ], "wc_strengths_avg": [ 81.2, 42.80373815451169 ], "wc_weaknesses_avg": [ 151.0, 41.94758634295899 ], "wc_questions_avg": [ 79.6, 62.656524001894645 ], "wc_limitations_avg": [ 9.6, 16.218507946170636 ], "wc_review_avg": [ 395.2, 93.65340356869045 ], "wc_reply_reviewers_avg": [ 19.8, 23.85288242540092 ], "wc_reply_authors_avg": [ 31.2, 42.56947263004323 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.40824829046386313, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13620987059956304851&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";cmu.edu;cmu.edu;stanford.edu; ;allenai.org;cs.cmu.edu", "author_num": 7, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "Carnegie Mellon University;Stanford University;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.stanford.edu;https://allenai.org", "aff_unique_abbr": "CMU;Stanford;AI2", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DiViNeT: 3D Reconstruction from Disparate Views using Neural Template Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71349", "id": "XbInLmYLDr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d2bdcd4f51eea138365af22b50f3bf0a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XbInLmYLDr", "openreview": "https://openreview.net/forum?id=XbInLmYLDr", "poster": "/media/PosterPDFs/NeurIPS%202023/71349.png?t=1701835289.756095", "slides": "https://nips.cc/virtual/2023/poster/71349", "video": "https://nips.cc/virtual/2023/poster/71349", "author_site": "Aditya Vora, Akshay Gadi Patil, Hao Zhang", "tldr": "", "abstract": "We present a volume rendering-based neural surface reconstruction method that takes as few as three disparate RGB images as input. Our key idea is to regularize the reconstruction, which is severely ill-posed and leaving significant gaps between the sparse views, by learning a set of neural templates that act as surface priors. Our method, coined DiViNet, operates in two stages. The first stage learns the templates, in the form of 3D Gaussian functions, across different scenes, without 3D supervision. In the reconstruction stage, our predicted templates serve as anchors to help \u201cstitch\u201d the surfaces over sparse regions. We demonstrate that our approach is not only able to complete the surface geometry but also reconstructs surface details to a reasonable extent from few disparate input views. On the DTU and BlendedMVS datasets, our approach achieves the best reconstruction quality among existing methods in the presence of such sparse views and performs on par, if not better, with competing methods when dense views are employed as inputs.", "keywords": "Multi-view Neural 3D Reconstruction;Sparse and Disparate Views;Neural Rendering;Volume Rendering", "primary_area": "", "supplementary_material": "/attachment/70debd04de586c18cc062a017a6e34c49404f4b3.pdf", "author": "Aditya Vora;Akshay Gadi Patil;Hao Zhang", "authorids": "~Aditya_Vora1;~Akshay_Gadi_Patil1;~Hao_Zhang25", "gender": "M;M;M", "homepage": "http://agp-ka32.github.io;https://aditya-vora.github.io/;https://www2.cs.sfu.ca/~haoz/", "dblp": "179/2323;202/2122;z/HaoZhang2", "google_scholar": "1aWv8V4AAAAJ;https://scholar.google.co.in/citations?user=0LO8tDEAAAAJ;osTl-5IAAAAJ", "orcid": ";;", "linkedin": "akshay-gadi-patil/;aditya-vora-b66b1a58/;", "or_profile": "~Akshay_Gadi_Patil1;~Aditya_Narendrabhai_Vora1;~Hao_Richard_Zhang1", "aff": "Amazon;Simon Fraser University;Amazon", "aff_domain": "amazon.com;sfu.ca;amazon.com", "position": "Researcher;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nvora2023divinet,\ntitle={DiViNeT: 3D Reconstruction from Disparate Views using Neural Template Regularization},\nauthor={Aditya Vora and Akshay Gadi Patil and Hao Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XbInLmYLDr}\n}", "github": "", "project": "", "reviewers": "Ahim;bjQu;hUMT;4rRP;jSFe", "pdf_size": 6443777, "rating": "3;4;4;7;7", "confidence": "4;4;4;4;5", "soundness": "2;3;2;3;4", "novelty": "2;2;3;4;4", "presentation": "2;2;3;3;4", "wc_summary": "117;66;49;67;85", "wc_strengths": "96;32;15;101;151", "wc_weaknesses": "216;115;156;129;223", "wc_questions": "36;4;20;73;68", "wc_limitations": "1;4;4;11;11", "wc_review": "466;221;244;381;538", "wc_reply_reviewers": "252;19;202;0;0", "wc_reply_authors": "969;0;1040;0;0", "reply_reviewers": "1;1;1;0;0", "reply_authors": "3;1;3;1;1", "rating_avg": [ 5.0, 1.6733200530681511 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 76.8, 23.103246525109842 ], "wc_strengths_avg": [ 79.0, 49.52171240981071 ], "wc_weaknesses_avg": [ 167.8, 44.278211345988225 ], "wc_questions_avg": [ 40.2, 26.7761087538873 ], "wc_limitations_avg": [ 6.2, 4.069397989875161 ], "wc_review_avg": [ 370.0, 122.99430881142428 ], "wc_reply_reviewers_avg": [ 94.6, 109.4743805645869 ], "wc_reply_authors_avg": [ 401.8, 492.6144131062347 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5976143046671969, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6008843335024129238&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 6, "email": "amazon.com;sfu.ca;amazon.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Amazon;Simon Fraser University", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.sfu.ca", "aff_unique_abbr": "Amazon;SFU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Canada" }, { "title": "Holistic Transfer: Towards Non-Disruptive Fine-Tuning with Partial Target Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71348", "id": "XbVnNXaIQY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5d087955ee13fe9a7402eedec879b9c3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XbVnNXaIQY", "openreview": "https://openreview.net/forum?id=XbVnNXaIQY", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71348", "video": "https://nips.cc/virtual/2023/poster/71348", "author_site": "Cheng-Hao Tu, Hong-You Chen, Zheda Mai, Jike Zhong, Vardaan Pahuja, Tanya Berger-Wolf, Song Gao, Charles Stewart, Yu Su, Wei-Lun (Harry) Chao", "tldr": "", "abstract": "We propose a learning problem involving adapting a pre-trained source model to the target domain for classifying all classes that appeared in the source data, using target data that covers only a partial label space. This problem is practical, as it is unrealistic for the target end-users to collect data for all classes prior to adaptation. However, it has received limited attention in the literature. To shed light on this issue, we construct benchmark datasets and conduct extensive experiments to uncover the inherent challenges. We found a dilemma --- on the one hand, adapting to the new target domain is important to claim better performance; on the other hand, we observe that preserving the classification accuracy of classes missing in the target adaptation data is highly challenging, let alone improving them. To tackle this, we identify two key directions: 1) disentangling domain gradients from classification gradients, and 2) preserving class relationships. We present several effective solutions that maintain the accuracy of the missing classes and enhance the overall performance, establishing solid baselines for holistic transfer of pre-trained models with partial target data.", "keywords": "Fine-tuning;Transfer learning;Domain adaptation;Continual learning;Robustness;Personalization", "primary_area": "", "supplementary_material": "/attachment/b92aaf37aff32190c94ed08287883e3df03ab259.pdf", "author": "Cheng-Hao Tu;Hong-You Chen;Zheda Mai;Jike Zhong;Vardaan Pahuja;Tanya Berger-Wolf;Song Gao;Charles Stewart;Yu Su;Wei-Lun Chao", "authorids": "~Cheng-Hao_Tu1;~Hong-You_Chen1;~Zheda_Mai1;~Jike_Zhong1;~Vardaan_Pahuja1;~Tanya_Berger-Wolf2;~Song_Gao3;~Charles_Stewart1;~Yu_Su2;~Wei-Lun_Chao1", "gender": "M;;M;M;M;F;M;M;M;M", "homepage": "https://andytu28.github.io/;https://sites.google.com/view/hongyouc/%E9%A6%96%E9%A0%81;https://zheda-mai.github.io/;;https://vardaan123.github.io/;https://cse.osu.edu/people/berger-wolf.1;https://geography.wisc.edu/geods/people;https://www.cs.rpi.edu/~stewart;http://ysu1989.github.io;https://sites.google.com/view/wei-lun-harry-chao", "dblp": "116/8913-4;228/5569;270/0552;;188/3398;b/TYBergerWolf;92/357-1;43/471;38/1070-1;64/8842", "google_scholar": "cZ87u54AAAAJ;uxlU7J8AAAAJ;FT3oT6EAAAAJ;;https://scholar.google.ca/citations?user=0O6NKfIAAAAJ;fDQUHyIAAAAJ;eLdz_6IAAAAJ;https://scholar.google.com/citations?hl=en;rIh5OqoAAAAJ;PGKakWwAAAAJ", "orcid": ";;;;;;0000-0003-4359-6302;;;0000-0003-1269-7231", "linkedin": ";;;jike-zhong-b053721a9/;;;;;;", "or_profile": "~Cheng-Hao_Tu1;~Hong-You_Chen1;~Zheda_Mai1;~Jike_Zhong1;~Vardaan_Pahuja1;~Tanya_Berger-Wolf2;~Song_Gao3;~Charles_Stewart1;~Yu_Su2;~Wei-Lun_Chao1", "aff": "Ohio State University, Columbus;;Ohio State University, Columbus;Ohio State University;The Ohio State University, Columbus;Ohio State University;University of Wisconsin - Madison;Rensselaer Polytechnic Institute;Microsoft;Ohio State University", "aff_domain": "osu.edu;;osu.edu;osu.edu;osu.edu;osu.edu;wisc.edu;cs.rpi.edu;microsoft.com;osu.edu", "position": "PhD student;;PhD student;Undergrad student;PhD student;Professor;Associate Professor;Professor;Senior Researcher;Assistant Professor", "bibtex": "@inproceedings{\ntu2023holistic,\ntitle={Holistic Transfer: Towards Non-Disruptive Fine-Tuning with Partial Target Data},\nauthor={Cheng-Hao Tu and Hong-You Chen and Zheda Mai and Jike Zhong and Vardaan Pahuja and Tanya Berger-Wolf and Song Gao and Charles Stewart and Yu Su and Wei-Lun Chao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XbVnNXaIQY}\n}", "github": "", "project": "", "reviewers": "HMzM;QPVq;9DJT;CeTn;tg8m", "pdf_size": 24962655, "rating": "3;3;5;7;7", "confidence": "4;4;4;3;4", "soundness": "2;2;2;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;2;3;3", "wc_summary": "83;90;43;169;214", "wc_strengths": "37;55;17;164;124", "wc_weaknesses": "224;353;109;239;53", "wc_questions": "53;51;109;201;29", "wc_limitations": "20;32;1;2;1", "wc_review": "417;581;279;775;421", "wc_reply_reviewers": "124;189;111;284;0", "wc_reply_authors": "321;1078;144;230;0", "reply_reviewers": "1;1;2;1;0", "reply_authors": "2;3;3;2;1", "rating_avg": [ 5.0, 1.7888543819998317 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 119.8, 62.345489010833816 ], "wc_strengths_avg": [ 79.4, 55.5575377424162 ], "wc_weaknesses_avg": [ 195.6, 105.16577390006694 ], "wc_questions_avg": [ 88.6, 62.10185182424112 ], "wc_limitations_avg": [ 11.2, 12.671227249165726 ], "wc_review_avg": [ 494.6, 169.71222701974068 ], "wc_reply_reviewers_avg": [ 141.6, 93.61751972788 ], "wc_reply_authors_avg": [ 354.6, 376.8435219026592 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.5590169943749475, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16892405478438395275&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "osu.edu;;osu.edu;osu.edu;osu.edu;osu.edu;wisc.edu;cs.rpi.edu;microsoft.com;osu.edu", "author_num": 10, "aff_unique_index": "0;0;0;0;0;1;2;3;0", "aff_unique_norm": "Ohio State University;University of Wisconsin-Madison;Rensselaer Polytechnic Institute;Microsoft", "aff_unique_dep": ";;;Microsoft Corporation", "aff_unique_url": "https://www.osu.edu;https://www.wisc.edu;https://www.rpi.edu;https://www.microsoft.com", "aff_unique_abbr": "OSU;UW-Madison;RPI;Microsoft", "aff_campus_unique_index": "0;0;0;2", "aff_campus_unique": "Columbus;;Madison", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "On Calibrating Diffusion Probabilistic Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71347", "id": "XcQzXeF7fX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9a645c38d4ec6f94633a35aeb2079596-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XcQzXeF7fX", "openreview": "https://openreview.net/forum?id=XcQzXeF7fX", "poster": "/media/PosterPDFs/NeurIPS%202023/71347.png?t=1701691299.245987", "slides": "https://nips.cc/virtual/2023/poster/71347", "video": "https://nips.cc/virtual/2023/poster/71347", "author_site": "Tianyu Pang, Cheng Lu, Chao Du, Min Lin, Shuicheng Yan, Zhijie Deng", "tldr": "", "abstract": "Recently, diffusion probabilistic models (DPMs) have achieved promising results in diverse generative tasks. A typical DPM framework includes a forward process that gradually diffuses the data distribution and a reverse process that recovers the data distribution from time-dependent data scores. In this work, we observe that the stochastic reverse process of data scores is a martingale, from which concentration bounds and the optional stopping theorem for data scores can be derived. Then, we discover a simple way for calibrating an arbitrary pretrained DPM, with which the score matching loss can be reduced and the lower bounds of model likelihood can consequently be increased. We provide general calibration guidelines under various model parametrizations. Our calibration method is performed only once and the resulting models can be used repeatedly for sampling. We conduct experiments on multiple datasets to empirically validate our proposal. Our code is available at https://github.com/thudzj/Calibrated-DPMs.", "keywords": "Diffusion Probabilistic Models;Model Calibration", "primary_area": "", "supplementary_material": "/attachment/0317948916073b1803d57df0eeb50c9500e83fea.zip", "author": "Tianyu Pang;Cheng Lu;Chao Du;Min Lin;Shuicheng YAN;Zhijie Deng", "authorids": "~Tianyu_Pang1;~Cheng_Lu5;~Chao_Du1;~Min_Lin1;~Shuicheng_YAN3;~Zhijie_Deng1", "gender": "M;M;M;M;M;M", "homepage": "https://p2333.github.io/;https://luchengthu.github.io/;https://duchao0726.github.io/;https://linmin.me;https://yanshuicheng.ai/;https://thudzj.github.io/", "dblp": "202/2550;91/1482-11;75/7523;;y/ShuichengYan;209/4959", "google_scholar": "wYDbtFsAAAAJ;vPE9VRoAAAAJ;QOp7xW0AAAAJ;BGONmkIAAAAJ;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ;J3dR0sUAAAAJ", "orcid": "0000-0003-0639-6176;;0000-0003-1244-6336;;;0000-0002-0932-1631", "linkedin": "%E5%A4%A9%E5%AE%87-%E5%BA%9E-b3999017a/;;duchao/;min-lin-08a3a422/;;", "or_profile": "~Tianyu_Pang1;~Cheng_Lu5;~Chao_Du1;~Min_Lin1;~Shuicheng_YAN3;~Zhijie_Deng1", "aff": "Sea AI Lab;Tsinghua University;Sea AI Lab;Sea AI Lab;sea Group;Shanghai Jiaotong University", "aff_domain": "sea.com;tsinghua.edu.cn;sea.com;sea.com;sea.com;sjtu.edu.cn", "position": "Research Scientist;PhD student;Research Scientist;Principal Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\npang2023on,\ntitle={On Calibrating Diffusion Probabilistic Models},\nauthor={Tianyu Pang and Cheng Lu and Chao Du and Min Lin and Shuicheng YAN and Zhijie Deng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XcQzXeF7fX}\n}", "github": "", "project": "", "reviewers": "DT4C;pqYH;hZuU;LmXp;2qSH", "pdf_size": 1151146, "rating": "5;6;7;7;7", "confidence": "3;3;4;3;4", "soundness": "3;2;4;4;4", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "52;155;75;64;114", "wc_strengths": "33;32;98;110;123", "wc_weaknesses": "262;117;308;113;155", "wc_questions": "54;37;1;104;113", "wc_limitations": "3;46;158;104;88", "wc_review": "404;387;640;495;593", "wc_reply_reviewers": "78;116;27;37;24", "wc_reply_authors": "46;216;26;24;25", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.4, 0.8 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 92.0, 37.75182114812476 ], "wc_strengths_avg": [ 79.2, 38.94303532083754 ], "wc_weaknesses_avg": [ 191.0, 79.4808152952648 ], "wc_questions_avg": [ 61.8, 41.89224271867048 ], "wc_limitations_avg": [ 79.8, 52.55244999046191 ], "wc_review_avg": [ 503.8, 100.18662585395319 ], "wc_reply_reviewers_avg": [ 56.4, 35.52238730716166 ], "wc_reply_authors_avg": [ 67.4, 74.74650493501352 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6123724356957947, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15675730106299203576&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "sea.com;tsinghua.edu.cn;sea.com;sea.com;sea.com;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;2;3", "aff_unique_norm": "Sea AI Lab;Tsinghua University;Sea Group;Shanghai Jiao Tong University", "aff_unique_dep": ";;;", "aff_unique_url": ";https://www.tsinghua.edu.cn;;https://www.sjtu.edu.cn", "aff_unique_abbr": ";THU;;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1", "aff_country_unique": ";China" }, { "title": "Bayesian Learning via Q-Exponential Process", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71346", "id": "XddoUFpjkP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e6bfdd58f1326ff821a1b92743963bdf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XddoUFpjkP", "openreview": "https://openreview.net/forum?id=XddoUFpjkP", "poster": "/media/PosterPDFs/NeurIPS%202023/71346.png?t=1699944937.3037694", "slides": "https://nips.cc/virtual/2023/poster/71346", "video": "https://nips.cc/virtual/2023/poster/71346", "author_site": "Shuyi Li, Michael O'Connor, Shiwei Lan", "tldr": "", "abstract": "Regularization is one of the most fundamental topics in optimization, statistics and machine learning. To get sparsity in estimating a parameter $u\\in\\mathbb{R}^d$, an $\\ell_q$ penalty term, $\\Vert u\\Vert_q$, is usually added to the objective function. What is the probabilistic distribution corresponding to such $\\ell_q$ penalty? What is the \\emph{correct} stochastic process corresponding to $\\Vert u\\Vert_q$ when we model functions $u\\in L^q$? This is important for statistically modeling high-dimensional objects such as images, with penalty to preserve certainty properties, e.g. edges in the image.\nIn this work, we generalize the $q$-exponential distribution (with density proportional to) $\\exp{(- \\frac{1}{2}|u|^q)}$ to a stochastic process named \\emph{$Q$-exponential (Q-EP) process} that corresponds to the $L_q$ regularization of functions. The key step is to specify consistent multivariate $q$-exponential distributions by choosing from a large family of elliptic contour distributions. The work is closely related to Besov process which is usually defined in terms of series. Q-EP can be regarded as a definition of Besov process with explicit probabilistic formulation, direct control on the correlation strength, and tractable prediction formula. From the Bayesian perspective, Q-EP provides a flexible prior on functions with sharper penalty ($q<2$) than the commonly used Gaussian process (GP, $q=2$).\nWe compare GP, Besov and Q-EP in modeling functional data, reconstructing images and solving inverse problems and demonstrate the advantage of our proposed methodology.", "keywords": "Functional Regularization;Besov Process;$Q$-Exponential Distribution;Elliptic Contour Distribution", "primary_area": "", "supplementary_material": "/attachment/0c29c0b298fbddf4fffe24eb3fece6bfd28b2879.pdf", "author": "Shuyi Li;Michael O'Connor;Shiwei Lan", "authorids": "~Shuyi_Li2;mfoconn1@asu.edu;~Shiwei_Lan1", "gender": ";;M", "homepage": "https://www.linkedin.com/in/shuyi-li-15906519b;;https://math.la.asu.edu/~slan", "dblp": "79/9577;;144/4462", "google_scholar": ";;", "orcid": ";;", "linkedin": "shuyi-li-15906519b;;", "or_profile": "~Shuyi_Li2;mfoconn1@asu.edu;~Shiwei_Lan1", "aff": "Arizona State University;;Arizona State University", "aff_domain": "asu.edu;;asu.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nli2023bayesian,\ntitle={Bayesian Learning via Q-Exponential Process},\nauthor={Shuyi Li and Michael O'Connor and Shiwei Lan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XddoUFpjkP}\n}", "github": "", "project": "", "reviewers": "Krqg;Yr66;V1qe;n1kr;av1p", "pdf_size": 3556572, "rating": "5;5;6;7;7", "confidence": "3;2;1;2;4", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;2;3;4;2", "wc_summary": "114;42;44;64;181", "wc_strengths": "39;34;45;26;108", "wc_weaknesses": "44;85;21;23;182", "wc_questions": "55;25;35;14;176", "wc_limitations": "2;12;33;7;59", "wc_review": "254;198;178;134;706", "wc_reply_reviewers": "0;48;0;10;36", "wc_reply_authors": "0;126;0;0;14", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;2;1;1;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 2.4, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 89.0, 52.816664037025284 ], "wc_strengths_avg": [ 50.4, 29.4659125092029 ], "wc_weaknesses_avg": [ 71.0, 60.08327554319921 ], "wc_questions_avg": [ 61.0, 59.06267857115862 ], "wc_limitations_avg": [ 22.6, 21.039011383617815 ], "wc_review_avg": [ 294.0, 209.5786248642738 ], "wc_reply_reviewers_avg": [ 18.8, 19.661129163911212 ], "wc_reply_authors_avg": [ 28.0, 49.299087212645226 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.2192645048267573, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8147352699090112910&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "asu.edu;;asu.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Arizona State University", "aff_unique_dep": "", "aff_unique_url": "https://www.asu.edu", "aff_unique_abbr": "ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Hierarchical Integration Diffusion Model for Realistic Image Deblurring", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71345", "id": "XeMryhpniy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5cebc89b113920dbff7c79854ba765a3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XeMryhpniy", "openreview": "https://openreview.net/forum?id=XeMryhpniy", "poster": "/media/PosterPDFs/NeurIPS%202023/71345.png?t=1697791047.0276473", "slides": "https://nips.cc/virtual/2023/poster/71345", "video": "https://nips.cc/virtual/2023/poster/71345", "author_site": "Zheng Chen, Zheng Chen, Yulun Zhang, Ding Liu, bin xia, Jinjin Gu, Linghe Kong, Xin Yuan", "tldr": "", "abstract": "Diffusion models (DMs) have recently been introduced in image deblurring and exhibited promising performance, particularly in terms of details reconstruction. However, the diffusion model requires a large number of inference iterations to recover the clean image from pure Gaussian noise, which consumes massive computational resources. Moreover, the distribution synthesized by the diffusion model is often misaligned with the target results, leading to restrictions in distortion-based metrics. To address the above issues, we propose the Hierarchical Integration Diffusion Model (HI-Diff), for realistic image deblurring. Specifically, we perform the DM in a highly compacted latent space to generate the prior feature for the deblurring process. The deblurring process is implemented by a regression-based method to obtain better distortion accuracy. Meanwhile, the highly compact latent space ensures the efficiency of the DM. Furthermore, we design the hierarchical integration module to fuse the prior into the regression-based model from multiple scales, enabling better generalization in complex blurry scenarios. Comprehensive experiments on synthetic and real-world blur datasets demonstrate that our HI-Diff outperforms state-of-the-art methods. Code and trained models are available at https://github.com/zhengchen1999/HI-Diff.", "keywords": "image deblurring;diffusion model", "primary_area": "", "supplementary_material": "/attachment/49ed8aea4dbdb6d628df22d86b0a003a14350caf.pdf", "author": "Zheng Chen;Yulun Zhang;Ding Liu;Bin Xia;Jinjin Gu;Linghe Kong;Xin Yuan", "authorids": "~Zheng_Chen11;~Yulun_Zhang1;~Ding_Liu6;~Bin_Xia1;~Jinjin_Gu1;~Linghe_Kong1;~Xin_Yuan4", "gender": "M;M;;;M;M;M", "homepage": "https://zheng-chen.cn/;http://yulunzhang.com/;;;http://www.jasongt.com;https://www.cs.sjtu.edu.cn/~linghe.kong/;https://en.westlake.edu.cn/faculty/xin-yuan.html", "dblp": "33/2592-14;166/2763-1.html;;;209/5709;23/7909;78/713-2", "google_scholar": "nLZtXdgAAAAJ;ORmLjWoAAAAJ;;;uMQ-G-QAAAAJ;https://scholar.google.com.tw/citations?user=-wm2X-8AAAAJ;cS9CbWkAAAAJ", "orcid": "0009-0004-3963-7938;0000-0002-2288-5079;;;0000-0002-4389-6236;0000-0001-9266-3044;0000-0002-8311-7524", "linkedin": "zheng-chen-290084313;yulun-zhang-1116b5b9/;;;jinjingu;;xin-yuan-0024bb31/", "or_profile": "~Zheng_Chen11;~Yulun_Zhang1;~Ding_Liu6;~Bin_Xia1;~Jinjin_Gu1;~Linghe_Kong1;~Xin_Yuan4", "aff": "Shanghai Jiaotong University;Swiss Federal Institute of Technology;;;University of Sydney;Shanghai Jiaotong University;Westlake University", "aff_domain": "sjtu.edu.cn;ethz.ch;;;sydney.edu.au;sjtu.edu.cn;westlake.edu.cn", "position": "MS student;Postdoc;;;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nchen2023hierarchical,\ntitle={Hierarchical Integration Diffusion Model for Realistic Image Deblurring},\nauthor={Zheng Chen and Yulun Zhang and Ding Liu and Bin Xia and Jinjin Gu and Linghe Kong and Xin Yuan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XeMryhpniy}\n}", "github": "", "project": "", "reviewers": "w6Fg;5Ars;mQcX;ymRm", "pdf_size": 4921350, "rating": "6;7;7;8", "confidence": "3;5;4;5", "soundness": "3;3;4;4", "novelty": "2;3;4;4", "presentation": "3;3;4;4", "wc_summary": "29;48;103;69", "wc_strengths": "21;36;70;157", "wc_weaknesses": "121;175;147;95", "wc_questions": "38;3;60;64", "wc_limitations": "25;3;21;17", "wc_review": "234;265;401;402", "wc_reply_reviewers": "69;26;85;64", "wc_reply_authors": "104;178;209;24", "reply_reviewers": "1;1;2;1", "reply_authors": "2;3;3;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 62.25, 27.453369556395078 ], "wc_strengths_avg": [ 71.0, 52.730446612938906 ], "wc_weaknesses_avg": [ 134.5, 29.744747435471695 ], "wc_questions_avg": [ 41.25, 24.200981385059574 ], "wc_limitations_avg": [ 16.5, 8.2915619758885 ], "wc_review_avg": [ 325.5, 76.78704317786953 ], "wc_reply_reviewers_avg": [ 61.0, 21.644860821913362 ], "wc_reply_authors_avg": [ 128.75, 71.50305937510646 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11438121842329996369&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "sjtu.edu.cn;ethz.ch;;;sydney.edu.au;sjtu.edu.cn;westlake.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Shanghai Jiao Tong University;Swiss Federal Institute of Technology;University of Sydney;Westlake University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.ethz.ch;https://www.sydney.edu.au;https://www.westlake.edu.cn", "aff_unique_abbr": "SJTU;ETH Zurich;USYD;WU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "China;Switzerland;Australia" }, { "title": "Deep Recurrent Optimal Stopping", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71344", "id": "XetXfkYZ6i", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/28795419a644f41ede3fa058b13fc622-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XetXfkYZ6i", "openreview": "https://openreview.net/forum?id=XetXfkYZ6i", "poster": "/media/PosterPDFs/NeurIPS%202023/71344.png?t=1699863524.1885579", "slides": "https://nips.cc/virtual/2023/poster/71344", "video": "https://nips.cc/virtual/2023/poster/71344", "author_site": "Niranjan Damera Venkata, Chiranjib Bhattacharyya", "tldr": "", "abstract": "Deep neural networks (DNNs) have recently emerged as a powerful paradigm for solving Markovian optimal stopping problems. However, a ready extension of DNN-based methods to non-Markovian settings requires significant state and parameter space expansion, manifesting the curse of dimensionality. Further, efficient state-space transformations permitting Markovian approximations, such as those afforded by recurrent neural networks (RNNs), are either structurally infeasible or are confounded by the curse of non-Markovianity. Considering these issues, we introduce, for the first time, an optimal stopping policy gradient algorithm (OSPG) that can leverage RNNs effectively in non-Markovian settings by implicitly optimizing value functions without recursion, mitigating the curse of non-Markovianity. The OSPG algorithm is derived from an inference procedure on a novel Bayesian network representation of discrete-time non-Markovian optimal stopping trajectories and, as a consequence, yields an offline policy gradient algorithm that eliminates expensive Monte Carlo policy rollouts.", "keywords": "optimal stopping;recurrent neural networks;probabilistic graphical models;policy gradient methods", "primary_area": "", "supplementary_material": "/attachment/31bd1d09950f190407606f1ecb8b70b385eee344.zip", "author": "NIRANJAN DAMERA VENKATA;Chiranjib Bhattacharyya", "authorids": "~NIRANJAN_DAMERA_VENKATA1;~Chiranjib_Bhattacharyya1", "gender": "M;M", "homepage": ";http://www.csa.iisc.ac.in/~chiru/", "dblp": "89/1711.html;b/CBhattacharyya", "google_scholar": "https://scholar.google.ca/citations?user=daDIHuUAAAAJ;", "orcid": ";", "linkedin": "https://in.linkedin.com/in/niranjan-damera-venkata-1363822;", "or_profile": "~NIRANJAN_DAMERA_VENKATA1;~Chiranjib_Bhattacharyya1", "aff": "HP Inc;Indian Institute of Science, Indian institute of science, Bangalore", "aff_domain": "hp.com;iisc.ac.in", "position": "Researcher;Full Professor", "bibtex": "@inproceedings{\nvenkata2023deep,\ntitle={Deep Recurrent Optimal Stopping},\nauthor={NIRANJAN DAMERA VENKATA and Chiranjib Bhattacharyya},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XetXfkYZ6i}\n}", "github": "", "project": "", "reviewers": "njDJ;XsN5;vhpm", "pdf_size": 1496948, "rating": "5;5;6", "confidence": "3;2;3", "soundness": "3;3;3", "novelty": "3;3;2", "presentation": "3;2;2", "wc_summary": "82;64;25", "wc_strengths": "66;29;19", "wc_weaknesses": "60;346;88", "wc_questions": "32;71;280", "wc_limitations": "41;19;2", "wc_review": "281;529;414", "wc_reply_reviewers": "67;52;86", "wc_reply_authors": "203;13;148", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 57.0, 23.790754506740637 ], "wc_strengths_avg": [ 38.0, 20.215505600075073 ], "wc_weaknesses_avg": [ 164.66666666666666, 128.73055408703698 ], "wc_questions_avg": [ 127.66666666666667, 108.88628114791238 ], "wc_limitations_avg": [ 20.666666666666668, 15.96524001977073 ], "wc_review_avg": [ 408.0, 101.3344298186291 ], "wc_reply_reviewers_avg": [ 68.33333333333333, 13.912424503139471 ], "wc_reply_authors_avg": [ 121.33333333333333, 79.82620009885365 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6714635981353051506&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "hp.com;iisc.ac.in", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "HP Inc;Indian Institute of Science", "aff_unique_dep": ";", "aff_unique_url": "https://www.hp.com;https://www.iisc.ac.in", "aff_unique_abbr": "HP;IISc", "aff_campus_unique_index": "1", "aff_campus_unique": ";Bangalore", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;India" }, { "title": "Towards Robust and Expressive Whole-body Human Pose and Shape Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71343", "id": "XfKnoW4Zef", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/381d36bf8e115cdeda48763c9cb77616-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XfKnoW4Zef", "openreview": "https://openreview.net/forum?id=XfKnoW4Zef", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71343", "video": "https://nips.cc/virtual/2023/poster/71343", "author_site": "Hui En Pang, Zhongang Cai, Lei Yang, Qingyi Tao, Zhonghua Wu, Tianwei Zhang, Ziwei Liu", "tldr": "", "abstract": "Whole-body pose and shape estimation aims to jointly predict different behaviors (e.g., pose, hand gesture, facial expression) of the entire human body from a monocular image. Existing methods often exhibit suboptimal performance due to the complexity of in-the-wild scenarios. We argue that the prediction accuracy of these models is significantly affected by the quality of the _bounding box_, e.g., scale, alignment. The natural discrepancy between the ideal bounding box annotations and model detection results is particularly detrimental to the performance of whole-body pose and shape estimation.\nIn this paper, we propose a novel framework to enhance the robustness of whole-body pose and shape estimation. Our framework incorporates three new modules to address the above challenges from three perspectives: (1) a **Localization Module** enhances the model's awareness of the subject's location and semantics within the image space; (2) a **Contrastive Feature Extraction Module** encourages the model to be invariant to robust augmentations by incorporating a contrastive loss and positive samples; (3) a **Pixel Alignment Module** ensures the reprojected mesh from the predicted camera and body model parameters are more accurate and pixel-aligned. We perform comprehensive experiments to demonstrate the effectiveness of our proposed framework on body, hands, face and whole-body benchmarks.", "keywords": "Whole-body;SMPLX Model;Human Pose and Shape Estimation;Human Mesh Recovery", "primary_area": "", "supplementary_material": "/attachment/410abf77244f7de43760a4ed7433c34f0d19b2e0.pdf", "author": "Hui En Pang;Zhongang Cai;Lei Yang;Qingyi Tao;Zhonghua Wu;Tianwei Zhang;Ziwei Liu", "authorids": "~Hui_En_Pang1;~Zhongang_Cai1;~Lei_Yang7;~Qingyi_Tao1;~Zhonghua_Wu2;~Tianwei_Zhang1;~Ziwei_Liu1", "gender": "F;M;M;;M;M;M", "homepage": ";https://caizhongang.com;https://www.yanglei.me;;https://wu-zhonghua.github.io/;https://personal.ntu.edu.sg/tianwei.zhang/index.html;https://liuziwei7.github.io/", "dblp": ";232/3190;50/2484-45;;;77/7902-4;05/6300-2", "google_scholar": ";WrDKqIAAAAAJ;jZH2IPYAAAAJ;;https://scholar.google.com.sg/citations?user=wMDgLCYAAAAJ;9vpiYDIAAAAJ;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ", "orcid": "0000-0002-2353-9071;0000-0002-1810-3855;0000-0002-0571-5924;;;;", "linkedin": ";caizhongang/;;;;;", "or_profile": "~Hui_En_Pang1;~Zhongang_Cai1;~Lei_Yang7;~Qingyi_Tao1;~Zhonghua_Wu2;~Tianwei_Zhang1;~Ziwei_Liu1", "aff": "Nanyang Technological University;Nanyang Technological University;Sensetime Ltd.;;SenseTime ;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;sensetime.com;;sensetime.com;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;PhD student;Researcher;;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\npang2023towards,\ntitle={Towards Robust and Expressive Whole-body Human Pose and Shape Estimation},\nauthor={Hui En Pang and Zhongang Cai and Lei Yang and Qingyi Tao and Zhonghua Wu and Tianwei Zhang and Ziwei Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XfKnoW4Zef}\n}", "github": "", "project": "", "reviewers": "p9Xq;go2c;bkwo;Uf3X;4HYU", "pdf_size": 7734982, "rating": "3;4;5;6;7", "confidence": "5;4;5;4;5", "soundness": "2;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "3;3;2;4;3", "wc_summary": "87;57;107;173;81", "wc_strengths": "42;44;34;116;143", "wc_weaknesses": "199;134;379;157;60", "wc_questions": "9;2;83;13;2", "wc_limitations": "38;2;22;23;58", "wc_review": "375;239;625;482;344", "wc_reply_reviewers": "1357;239;203;24;38", "wc_reply_authors": "1982;1107;904;131;131", "reply_reviewers": "4;2;1;1;1", "reply_authors": "6;3;4;3;3", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 101.0, 39.375119047439085 ], "wc_strengths_avg": [ 75.8, 44.79464253680344 ], "wc_weaknesses_avg": [ 185.8, 106.61969799244416 ], "wc_questions_avg": [ 21.8, 30.889480410003664 ], "wc_limitations_avg": [ 28.6, 18.62900963551203 ], "wc_review_avg": [ 413.0, 131.29051755553408 ], "wc_reply_reviewers_avg": [ 372.2, 499.82693004679123 ], "wc_reply_authors_avg": [ 851.0, 690.5513739034917 ], "reply_reviewers_avg": [ 1.8, 1.1661903789690604 ], "reply_authors_avg": [ 3.8, 1.16619037896906 ], "replies_avg": [ 38, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7689688358424180436&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "ntu.edu.sg;ntu.edu.sg;sensetime.com;;sensetime.com;ntu.edu.sg;ntu.edu.sg", "author_num": 7, "aff_unique_index": "0;0;1;1;0;0", "aff_unique_norm": "Nanyang Technological University;SenseTime", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.sensetime.com", "aff_unique_abbr": "NTU;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "Singapore;China" }, { "id": "XfQbPqRPXi", "title": "Towards Better Evaluation of GNN Expressiveness with BREC Dataset", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Research on the theoretical expressiveness of Graph Neural Networks~(GNNs) has developed rapidly, and many methods have been proposed to enhance the expressiveness. However, most methods do not have a uniform expressiveness measure except for a few that strictly follow the $k$-dimensional Weisfeiler-Lehman ($k$-WL) test hierarchy. Their theoretical analyses are often limited to distinguishing certain families of non-isomorphic graphs, leading to difficulties in quantitatively comparing their expressiveness. In contrast to theoretical analysis, another way to measure expressiveness is by evaluating model performance on certain datasets containing 1-WL-indistinguishable graphs. Previous datasets specifically designed for this purpose, however, face problems with difficulty (any model surpassing 1-WL has nearly 100\\% accuracy), granularity (models tend to be either 100\\% correct or near random guess), and scale (only a few essentially different graphs in each dataset). To address these limitations, we propose a new expressiveness dataset, **BREC**, which includes 400 pairs of non-isomorphic graphs carefully selected from four primary categories (Basic, Regular, Extension, and CFI). These graphs have higher difficulty (up to 4-WL-indistinguishable), finer granularity (able to compare models between 1-WL and 3-WL), and a larger scale (400 pairs). Further, we synthetically test 23 models with higher-than-1-WL expressiveness on our BREC dataset. Our experiment gives the first thorough comparison of the expressiveness of those state-of-the-art beyond-1-WL GNN models. We expect this dataset to serve as a benchmark for testing the expressiveness of future GNNs. Our dataset and evaluation code are released at: https://github.com/GraphPKU/BREC.", "keywords": "GNN;Expressiveness;Dataset", "primary_area": "", "supplementary_material": "/attachment/ec91b6f27fabb813b9019c883abc0ebe99f45dd0.pdf", "author": "Yanbo Wang;Muhan Zhang", "authorids": "~Yanbo_Wang2;~Muhan_Zhang1", "gender": "M;M", "homepage": "https://yanxwb.github.io/;https://muhanzhang.github.io/", "dblp": ";157/5518", "google_scholar": "Rmo7EXQAAAAJ;https://scholar.google.com.hk/citations?user=OBBqkosAAAAJ", "orcid": ";0000-0002-7680-6401", "linkedin": ";jerry-muhan-zhang-a33a1777/", "or_profile": "~Yanbo_Wang2;~Muhan_Zhang1", "aff": "Xi'an Jiaotong University;Peking University", "aff_domain": "xjtu.edu.cn;pku.edu.cn", "position": "Undergrad student;Assistant Professor", "bibtex": "@misc{\nwang2023towards,\ntitle={Towards Better Evaluation of {GNN} Expressiveness with {BREC} Dataset},\nauthor={Yanbo Wang and Muhan Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=XfQbPqRPXi}\n}", "github": "", "project": "", "reviewers": "Zvmi;AZWH;kpAj;vLAp;iUXu", "site": "https://openreview.net/forum?id=XfQbPqRPXi", "pdf_size": 920714, "rating": "4;6;7;8;8", "confidence": "3;3;5;3;4", "wc_summary_and_contributions": "77;46;45;45;67", "wc_strengths": "83;44;62;36;104", "wc_improvement": "187;108;256;106;102", "wc_limitations": "2;1;44;71;24", "wc_correctness": "1;1;27;1;32", "wc_clarity": "16;1;181;1;42", "wc_relation_to_prior_work": "22;6;44;1;20", "wc_documentation": "2;1;42;1;129", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "391;209;702;263;521", "wc_reply_reviewers": "0;0;146;21;24", "wc_reply_authors": "987;815;1347;696;1229", "reply_reviewers": "0;0;3;1;1", "reply_authors": "2;2;5;2;3", "rating_avg": [ 6.6, 1.4966629547095764 ], "confidence_avg": [ 3.6, 0.8 ], "wc_summary_and_contributions_avg": [ 56.0, 13.446189051177289 ], "wc_strengths_avg": [ 65.8, 25.031180555459226 ], "wc_improvement_avg": [ 151.8, 60.97999672023606 ], "wc_limitations_avg": [ 28.4, 26.55258932759666 ], "wc_correctness_avg": [ 12.4, 14.051334456200237 ], "wc_clarity_avg": [ 48.2, 68.06878873610137 ], "wc_relation_to_prior_work_avg": [ 18.6, 15.014659503298768 ], "wc_documentation_avg": [ 35.0, 49.57015230963084 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 417.2, 178.71586387335623 ], "wc_reply_reviewers_avg": [ 38.2, 54.83940189316437 ], "wc_reply_authors_avg": [ 1014.8, 244.36235389273855 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 2.8, 1.16619037896906 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.36748420762958356, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3462621932896971948&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Xi'an Jiao Tong University;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.xjtu.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "XJTU;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "On the Minimax Regret for Online Learning with Feedback Graphs", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71342", "id": "XfYpIaKDb6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/908f03779b5b063413fbf0247a46a403-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XfYpIaKDb6", "openreview": "https://openreview.net/forum?id=XfYpIaKDb6", "poster": "/media/PosterPDFs/NeurIPS%202023/71342.png?t=1701781967.4373143", "slides": "https://nips.cc/virtual/2023/poster/71342", "video": "https://nips.cc/virtual/2023/poster/71342", "author_site": "Khaled Eldowa, Emmanuel Esposito, Tom Cesari, Nicol\u00f2 Cesa-Bianchi", "tldr": "", "abstract": "In this work, we improve on the upper and lower bounds for the regret of online learning with strongly observable undirected feedback graphs. The best known upper bound for this problem is $\\mathcal{O}\\bigl(\\sqrt{\\alpha T\\ln K}\\bigr)$, where $K$ is the number of actions, $\\alpha$ is the independence number of the graph, and $T$ is the time horizon. The $\\sqrt{\\ln K}$ factor is known to be necessary when $\\alpha = 1$ (the experts case). On the other hand, when $\\alpha = K$ (the bandits case), the minimax rate is known to be $\\Theta\\bigl(\\sqrt{KT}\\bigr)$, and a lower bound $\\Omega\\bigl(\\sqrt{\\alpha T}\\bigr)$ is known to hold for any $\\alpha$. Our improved upper bound $\\mathcal{O}\\bigl(\\sqrt{\\alpha T(1+\\ln(K/\\alpha))}\\bigr)$ holds for any $\\alpha$ and matches the lower bounds for bandits and experts, while interpolating intermediate cases. To prove this result, we use FTRL with $q$-Tsallis entropy for a carefully chosen value of $q \\in [1/2, 1)$ that varies with $\\alpha$. The analysis of this algorithm requires a new bound on the variance term in the regret. We also show how to extend our techniques to time-varying graphs, without requiring prior knowledge of their independence numbers. Our upper bound is complemented by an improved $\\Omega\\bigl(\\sqrt{\\alpha T(\\ln K)/(\\ln\\alpha)}\\bigr)$ lower bound for all $\\alpha > 1$, whose analysis relies on a novel reduction to multitask learning. This shows that a logarithmic factor is necessary as soon as $\\alpha < K$.", "keywords": "Online learning;Feedback graphs;Multiarmed bandits", "primary_area": "", "supplementary_material": "/attachment/9d1736323705271f6557b9dfbd32f534f2bbf0fe.pdf", "author": "Khaled Eldowa;Emmanuel Esposito;Tommaso Cesari;Nicol\u00f2 Cesa-Bianchi", "authorids": "~Khaled_Eldowa1;~Emmanuel_Esposito1;~Tommaso_Cesari1;~Nicol\u00f2_Cesa-Bianchi1", "gender": "M;;M;M", "homepage": "https://www.deib.polimi.it/ita/personale/dettagli/1491550;https://emmanuelesposito.it;http://cesa-bianchi.di.unimi.it/;https://sites.google.com/view/tom-cesari/home", "dblp": "320/6639;250/9515;c/NicoloCesaBianchi;223/4631", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;Wnal-EIAAAAJ;https://scholar.google.it/citations?user=BWADJUkAAAAJ;VFo06EEAAAAJ", "orcid": ";;0000-0001-8477-4748;0000-0001-5010-1094", "linkedin": ";emmanuel-esposito/;;", "or_profile": "~Khaled_Eldowa1;~Emmanuel_Esposito1;~Nicol\u00f2_Cesa-Bianchi1;~Tommaso_R._Cesari1", "aff": "University of Milan;University of Milan;University of Milan;University of Ottawa", "aff_domain": "unimi.it;unimi.it;unimi.it;uottawa.ca", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\neldowa2023on,\ntitle={On the Minimax Regret for Online Learning with Feedback Graphs},\nauthor={Khaled Eldowa and Emmanuel Esposito and Tommaso Cesari and Nicol{\\`o} Cesa-Bianchi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XfYpIaKDb6}\n}", "github": "", "project": "", "reviewers": "J2zm;GSpg;kfEi;TWzC", "pdf_size": 335488, "rating": "7;7;7;8", "confidence": "4;4;4;2", "soundness": "4;4;3;3", "novelty": "3;2;3;4", "presentation": "4;4;3;3", "wc_summary": "181;345;272;211", "wc_strengths": "141;113;61;133", "wc_weaknesses": "46;203;96;46", "wc_questions": "30;49;24;1", "wc_limitations": "4;30;1;1", "wc_review": "402;740;454;392", "wc_reply_reviewers": "16;35;17;29", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 252.25, 62.79082337412052 ], "wc_strengths_avg": [ 112.0, 31.160872901765767 ], "wc_weaknesses_avg": [ 97.75, 64.10294455015308 ], "wc_questions_avg": [ 26.0, 17.131841699011815 ], "wc_limitations_avg": [ 9.0, 12.186057606953941 ], "wc_review_avg": [ 497.0, 142.25681003031102 ], "wc_reply_reviewers_avg": [ 24.25, 8.042853971072706 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14169297299099063255&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 13, "email": "unimi.it;unimi.it;unimi.it;uottawa.ca", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Milan;University of Ottawa", "aff_unique_dep": ";", "aff_unique_url": "https://www.unimi.it;https://www.uottawa.ca", "aff_unique_abbr": "UniMi;U Ottawa", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Italy;Canada" }, { "title": "Deep Insights into Noisy Pseudo Labeling on Graph Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71341", "id": "XhNlBvb4XV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f0318ba897cee71ce200e408dea6062e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XhNlBvb4XV", "openreview": "https://openreview.net/forum?id=XhNlBvb4XV", "poster": "/media/PosterPDFs/NeurIPS%202023/71341.png?t=1698136080.2397084", "slides": "https://nips.cc/virtual/2023/poster/71341", "video": "https://nips.cc/virtual/2023/poster/71341", "author_site": "Botao WANG, Jia Li, Yang Liu, Jiashun Cheng, Yu Rong, Wenjia Wang, Fugee Tsung", "tldr": "", "abstract": "Pseudo labeling (PL) is a wide-applied strategy to enlarge the labeled dataset by self-annotating the potential samples during the training process. Several works have shown that it can improve the graph learning model performance in general. However, we notice that the incorrect labels can be fatal to the graph training process. Inappropriate PL may result in the performance degrading, especially on graph data where the noise can propagate. Surprisingly, the corresponding error is seldom theoretically analyzed in the literature. In this paper, we aim to give deep insights of PL on graph learning models. We first present the error analysis of PL strategy by showing that the error is bounded by the confidence of PL threshold and consistency of multi-view prediction. Then, we theoretically illustrate the effect of PL on convergence property. Based on the analysis, we propose a cautious pseudo labeling methodology in which we pseudo label the samples with highest confidence and multi-view consistency. Finally, extensive experiments demonstrate that the proposed strategy improves graph learning process and outperforms other PL strategies on link prediction and node classification tasks.", "keywords": "Pseudo labeling;Graph data;Error analysis;Cautious", "primary_area": "", "supplementary_material": "/attachment/dcb2e5feff391ab7c5273ee979dcce8c0f51ee99.pdf", "author": "Botao WANG;Jia Li;Yang Liu;Jiashun Cheng;Yu Rong;Wenjia Wang;Fugee Tsung", "authorids": "~Botao_WANG2;~Jia_Li4;~Yang_Liu21;~Jiashun_Cheng1;~Yu_Rong1;~Wenjia_Wang2;~Fugee_Tsung1", "gender": ";M;M;M;M;M;M", "homepage": ";https://sites.google.com/view/lijia;https://scholar.google.com/citations?hl=zh-CN&user=IWyM6l0AAAAJ;https://www.linkedin.com/in/jiashun-cheng-b2b31a149/;https://royrong.me/;https://www.wenjia-w.com/;https://ieda.ust.hk/dfaculty/tsung/", "dblp": ";23/6950-9;51/3710-245;323/4178;24/10036-1;;95/2794", "google_scholar": ";1gSbcYoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;HVKMb10AAAAJ;https://scholar.google.com.hk/citations?user=itezhEMAAAAJ;EKS1sO0AAAAJ;yQVoXS0AAAAJ", "orcid": ";0000-0002-6362-4385;0000-0002-2633-512X;0000-0002-5485-7224;0000-0001-7387-302X;;0000-0002-0575-8254", "linkedin": ";;;;;;ftsung/", "or_profile": "~Botao_WANG2;~Jia_Li4;~Yang_Liu21;~Jiashun_Cheng1;~Yu_Rong1;~Wenjia_Wang2;~Fugee_Tsung1", "aff": ";Hong Kong University of Science and Technology (Guangzhou);Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Tencent AI Lab;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": ";ust.hk;ust.hk;ust.hk;tencent.com;ust.hk;ust.hk", "position": ";Assistant Professor;PhD student;PhD student;Senior Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023deep,\ntitle={Deep Insights into Noisy Pseudo Labeling on Graph Data},\nauthor={Botao WANG and Jia Li and Yang Liu and Jiashun Cheng and Yu Rong and Wenjia Wang and Fugee Tsung},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XhNlBvb4XV}\n}", "github": "", "project": "", "reviewers": "4kBQ;io9b;iJpS;nJna", "pdf_size": 2885327, "rating": "5;5;7;7", "confidence": "4;3;4;3", "soundness": "3;3;4;3", "novelty": "3;2;4;3", "presentation": "3;3;1;3", "wc_summary": "108;53;25;110", "wc_strengths": "37;14;37;94", "wc_weaknesses": "97;58;41;90", "wc_questions": "13;229;243;140", "wc_limitations": "14;7;7;7", "wc_review": "269;361;353;441", "wc_reply_reviewers": "58;49;21;13", "wc_reply_authors": "45;0;0;41", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 74.0, 36.37993952716249 ], "wc_strengths_avg": [ 45.5, 29.53387885124472 ], "wc_weaknesses_avg": [ 71.5, 22.940139493908923 ], "wc_questions_avg": [ 156.25, 91.65526444236578 ], "wc_limitations_avg": [ 8.75, 3.031088913245535 ], "wc_review_avg": [ 356.0, 60.88513775955508 ], "wc_reply_reviewers_avg": [ 35.25, 18.73999733191016 ], "wc_reply_authors_avg": [ 21.5, 21.54646142641524 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12111718667870514890&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 8, "email": ";ust.hk;ust.hk;ust.hk;tencent.com;ust.hk;ust.hk", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.ust.hk;https://ai.tencent.com", "aff_unique_abbr": "HKUST;Tencent AI Lab", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Batch Bayesian Optimization For Replicable Experimental Design", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71340", "id": "Xj4LJiXvlX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/727a5a5c77be15d053b47b7c391800c2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Xj4LJiXvlX", "openreview": "https://openreview.net/forum?id=Xj4LJiXvlX", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71340", "video": "https://nips.cc/virtual/2023/poster/71340", "author_site": "Zhongxiang Dai, Zhongxiang Dai, Quoc Phong Nguyen, Sebastian Tay, Daisuke Urano, Richalynn Leong, Bryan Kian Hsiang Low, Patrick Jaillet", "tldr": "", "abstract": "Many real-world experimental design problems (a) evaluate multiple experimental conditions in parallel and (b) replicate each condition multiple times due to large and heteroscedastic observation noise. Given a fixed total budget, this naturally induces a trade-off between evaluating more unique conditions while replicating each of them fewer times vs. evaluating fewer unique conditions and replicating each more times. Moreover, in these problems, practitioners may be risk-averse and hence prefer an input with both good average performance and small variability. To tackle both challenges, we propose the Batch Thompson Sampling for Replicable Experimental Design (BTS-RED) framework, which encompasses three algorithms. Our BTS-RED-Known and BTS-RED-Unknown algorithms, for, respectively, known and unknown noise variance, choose the number of replications adaptively rather than deterministically such that an input with a larger noise variance is replicated more times. As a result, despite the noise heteroscedasticity, both algorithms enjoy a theoretical guarantee and are asymptotically no-regret. Our Mean-Var-BTS-RED algorithm aims at risk-averse optimization and is also asymptotically no-regret. We also show the effectiveness of our algorithms in two practical real-world applications: precision agriculture and AutoML.", "keywords": "Bayesian optimization;Gaussian processes;AI4Science", "primary_area": "", "supplementary_material": "/attachment/62be92344179e32b9a8c26036cb79a2c8ae7a5a4.zip", "author": "Zhongxiang Dai;Quoc Phong Nguyen;Sebastian Shenghong Tay;Daisuke Urano;Richalynn Leong;Bryan Kian Hsiang Low;Patrick Jaillet", "authorids": "~Zhongxiang_Dai1;~Quoc_Phong_Nguyen2;~Sebastian_Shenghong_Tay1;~Daisuke_Urano1;~Richalynn_Leong1;~Bryan_Kian_Hsiang_Low1;~Patrick_Jaillet1", "gender": "M;;M;M;F;M;M", "homepage": "https://daizhongxiang.github.io/;;;http://www.tll.org.sg/group-leaders/urano-daisuke/;;http://www.comp.nus.edu.sg/~lowkh;http://web.mit.edu/jaillet/www/", "dblp": "172/4968;;281/7664;;;97/4877;https://dblp.uni-trier.de/pers/hd/j/Jaillet:Patrick", "google_scholar": "1v8xOIYAAAAJ;;https://scholar.google.com/citations?hl=en;ixmLRN0AAAAJ;;https://scholar.google.com.tw/citations?user=2P-Q09UAAAAJ;ND0FM6EAAAAJ", "orcid": ";;;;;;0000-0002-8585-6566", "linkedin": ";;;daisuke-urano-73206a86/?originalSubdomain=sg;www.linkedin.com/in/richalynn-leong-41432b39;;patrick-jaillet-1260445/", "or_profile": "~Zhongxiang_Dai1;~Quoc_Phong_Nguyen2;~Sebastian_Shenghong_Tay1;~Daisuke_Urano1;~Richalynn_Leong1;~Bryan_Kian_Hsiang_Low1;~Patrick_Jaillet1", "aff": "National University of Singapore;;National University of Singapore;Temasek Life Sciences Laboratory / National University of Singapore;National University of Singapore;National University of Singapore;Massachusetts Institute of Technology", "aff_domain": "nus.edu.sg;;nus.edu;nus.edu.sg;nus.edu.sg;nus.edu.sg;mit.edu", "position": "Postdoc;;PhD student;Assistant Professor;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ndai2023batch,\ntitle={Batch Bayesian Optimization For Replicable Experimental Design},\nauthor={Zhongxiang Dai and Quoc Phong Nguyen and Sebastian Shenghong Tay and Daisuke Urano and Richalynn Leong and Bryan Kian Hsiang Low and Patrick Jaillet},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Xj4LJiXvlX}\n}", "github": "", "project": "", "reviewers": "4YBP;QJyu;VcKK;2mxf", "pdf_size": 16364282, "rating": "4;5;6;6", "confidence": "3;4;3;4", "soundness": "3;3;2;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "54;107;208;111", "wc_strengths": "67;61;183;81", "wc_weaknesses": "207;241;304;326", "wc_questions": "116;398;213;568", "wc_limitations": "15;145;39;1", "wc_review": "459;952;947;1087", "wc_reply_reviewers": "263;83;85;166", "wc_reply_authors": "582;0;40;456", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;4", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 120.0, 55.56527692723217 ], "wc_strengths_avg": [ 98.0, 49.60846701924985 ], "wc_weaknesses_avg": [ 269.5, 47.699580710945455 ], "wc_questions_avg": [ 323.75, 173.63521388243802 ], "wc_limitations_avg": [ 50.0, 56.50663677834667 ], "wc_review_avg": [ 861.25, 238.9334373837199 ], "wc_reply_reviewers_avg": [ 149.25, 73.71694174340115 ], "wc_reply_authors_avg": [ 269.5, 253.84000866687663 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10540368831742861239&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "nus.edu.sg;;nus.edu;nus.edu.sg;nus.edu.sg;nus.edu.sg;mit.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "National University of Singapore;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://web.mit.edu", "aff_unique_abbr": "NUS;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Singapore", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "Singapore;United States" }, { "title": "ASIF: Coupled Data Turns Unimodal Models to Multimodal without Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71339", "id": "XjOj3ZmWEl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3186591903d9db31770ad131adb5ceb4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XjOj3ZmWEl", "openreview": "https://openreview.net/forum?id=XjOj3ZmWEl", "poster": "/media/PosterPDFs/NeurIPS%202023/71339.png?t=1702197472.6542954", "slides": "https://nips.cc/virtual/2023/poster/71339", "video": "https://nips.cc/virtual/2023/poster/71339", "author_site": "Antonio Norelli, Marco Fumero, Valentino Maiorca, Luca Moschella, Emanuele Rodol\u00e0, Francesco Locatello", "tldr": "", "abstract": "CLIP proved that aligning visual and language spaces is key to solving many vision tasks without explicit training, but required to train image and text encoders from scratch on a huge dataset. LiT improved this by only training the text encoder and using a pre-trained vision network. In this paper, we show that a common space can be created without any training at all, using single-domain encoders (trained with or without supervision) and a much smaller amount of image-text pairs. Furthermore, our model has unique properties. Most notably, deploying a new version with updated training samples can be done in a matter of seconds. Additionally, the representations in the common space are easily interpretable as every dimension corresponds to the similarity of the input to a unique entry in the multimodal dataset. Experiments on standard zero-shot visual benchmarks demonstrate the typical transfer ability of image-text models. Overall, our method represents a simple yet surprisingly strong baseline for foundation multi-modal models, raising important questions on their data efficiency and on the role of retrieval in machine learning.", "keywords": "Representation learning;Multimodal models;Analogy;Sparsity;Nonparametric;Relative representations;Language;Semiotics", "primary_area": "", "supplementary_material": "/attachment/b24e7b14d00036d8a1fd87f2b858a168dd65a3f3.zip", "author": "Antonio Norelli;Marco Fumero;Valentino Maiorca;Luca Moschella;Emanuele Rodol\u00e0;Francesco Locatello", "authorids": "~Antonio_Norelli2;~Marco_Fumero1;~Valentino_Maiorca1;~Luca_Moschella1;~Emanuele_Rodol\u00e01;~Francesco_Locatello1", "gender": "M;;M;M;M;M", "homepage": "https://phd.uniroma1.it/web/ANTONIO-NORELLI_nP1612487_EN.aspx;;https://gladia.di.uniroma1.it/authors/maiorca/;https://luca.moschella.dev;;https://twitter.com/FrancescoLocat8", "dblp": "261/9526;273/9625;305/9789;205/3639;54/8401;195/6074", "google_scholar": ";VYEljYEAAAAJ;https://scholar.google.it/citations?user=2VUUfFEAAAAJ;4byA-nefJJMC;-EH4wBYAAAAJ;", "orcid": ";0000-0001-5614-5004;0000-0001-5795-3695;0000-0002-0550-7498;0000-0003-0091-7241;", "linkedin": ";;valentino-maiorca;lucamoschella/;;", "or_profile": "~Antonio_Norelli2;~Marco_Fumero1;~Valentino_Maiorca1;~Luca_Moschella1;~Emanuele_Rodol\u00e01;~Francesco_Locatello1", "aff": "Sapienza University of Rome;Sapienza University of Rome;University of Roma \"La Sapienza\";NVIDIA;Sapienza University of Rome;Amazon", "aff_domain": "uniroma1.it;uniroma1.it;uniroma1.it;nvidia.com;uniroma1.it;amazon.com", "position": "PhD student;PhD student;PhD student;Intern;Full Professor;Senior Applied Scientist", "bibtex": "@inproceedings{\nnorelli2023asif,\ntitle={{ASIF}: Coupled Data Turns Unimodal Models to Multimodal without Training},\nauthor={Antonio Norelli and Marco Fumero and Valentino Maiorca and Luca Moschella and Emanuele Rodol{\\`a} and Francesco Locatello},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XjOj3ZmWEl}\n}", "github": "", "project": "", "reviewers": "UoCi;t279;QV7o;2Qsg;BGFe", "pdf_size": 12648978, "rating": "4;4;7;7;7", "confidence": "4;5;3;4;3", "soundness": "2;3;3;3;3", "novelty": "2;1;3;3;4", "presentation": "2;3;3;3;3", "wc_summary": "54;128;58;100;78", "wc_strengths": "76;15;84;78;59", "wc_weaknesses": "150;511;130;176;75", "wc_questions": "86;10;6;17;25", "wc_limitations": "1;4;10;16;23", "wc_review": "367;668;288;387;260", "wc_reply_reviewers": "121;1623;54;31;318", "wc_reply_authors": "0;1262;0;0;224", "reply_reviewers": "1;3;1;1;1", "reply_authors": "1;3;1;1;2", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 1.019803902718557 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 83.6, 27.57970268150112 ], "wc_strengths_avg": [ 62.4, 25.11254666496411 ], "wc_weaknesses_avg": [ 208.4, 154.89945125790473 ], "wc_questions_avg": [ 28.8, 29.321664345667692 ], "wc_limitations_avg": [ 10.8, 7.984985911070852 ], "wc_review_avg": [ 394.0, 144.94550700176947 ], "wc_reply_reviewers_avg": [ 429.4, 605.2849246429321 ], "wc_reply_authors_avg": [ 297.2, 490.1389190831514 ], "reply_reviewers_avg": [ 1.4, 0.8000000000000002 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7637626158259733, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3000153958594666831&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "uniroma1.it;uniroma1.it;uniroma1.it;nvidia.com;uniroma1.it;amazon.com", "author_num": 6, "aff_unique_index": "0;0;1;2;0;3", "aff_unique_norm": "Sapienza University of Rome;University of Rome La Sapienza;NVIDIA;Amazon", "aff_unique_dep": ";;NVIDIA Corporation;Amazon.com, Inc.", "aff_unique_url": "https://www.uniroma1.it;https://www.uniroma1.it;https://www.nvidia.com;https://www.amazon.com", "aff_unique_abbr": "Sapienza;La Sapienza;NVIDIA;Amazon", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Rome;", "aff_country_unique_index": "0;0;0;1;0;1", "aff_country_unique": "Italy;United States" }, { "title": "Scientific Document Retrieval using Multi-level Aspect-based Queries", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73559", "id": "XjaWEAyToL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/78f9c04bdcb06f1ada3902912d8b64ba-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=XjaWEAyToL", "openreview": "https://openreview.net/forum?id=XjaWEAyToL", "poster": "/media/PosterPDFs/NeurIPS%202023/73559.png?t=1702338486.977439", "slides": "https://nips.cc/virtual/2023/poster/73559", "video": "https://nips.cc/virtual/2023/poster/73559", "author_site": "Jianyou (Andre) Wang, Kaicheng Wang, Xiaoyue Wang, Prudhviraj Naidu, Leon Bergen, Ramamohan Paturi", "tldr": "", "abstract": "In scientific research, the ability to effectively retrieve relevant documents based on complex, multifaceted queries is critical. Existing evaluation datasets for this task are limited, primarily due to the high costs and effort required to annotate resources that effectively represent complex queries. To address this, we propose a novel task, $\\textbf{S}$cientific $\\textbf{Do}$cument $\\textbf{R}$etrieval using $\\textbf{M}$ulti-level $\\textbf{A}$spect-based qu$\\textbf{E}$ries (DORIS-MAE), which is designed to handle the complex nature of user queries in scientific research. We developed a benchmark dataset within the field of computer science, consisting of 100 human-authored complex query cases. For each complex query, we assembled a collection of 100 relevant documents and produced annotated relevance scores for ranking them. Recognizing the significant labor of expert annotation, we also introduce Anno-GPT, a scalable framework for evaluating the viability of Large Language Models (LLMs) such as ChatGPT-3.5 for expert-level dataset annotation tasks. The application of Anno-GPT to annotate the DORIS-MAE dataset resulted in a 500x reduction in cost, without compromising quality. Furthermore, due to the multi-tiered structure of these complex queries, our DORIS-MAE dataset can be extended to over 4,000 sub-query test cases without requiring additional annotation. We evaluated 17 recent retrieval methods on DORIS-MAE, observing notable performance drops compared to traditional datasets. This highlights DORIS-MAE's challenges and the need for better approaches to handle complex, multifaceted queries in scientific research. Our dataset and codebase are available at https://github.com/Real-Doris-Mae/Doris-Mae-Dataset .", "keywords": "natural language processing;large language model;retrieval;information retrieval;document retrieval;query-based document retrieval;aspect-based document similarity;benchmark dataset", "primary_area": "", "supplementary_material": "/attachment/99cb60c9da8be137462040e7dbeae9c3c3864147.pdf", "author": "Jianyou Wang;Kaicheng Wang;Xiaoyue Wang;Prudhviraj Naidu;Leon Bergen;Ramamohan Paturi", "authorids": "~Jianyou_Wang1;~Kaicheng_Wang1;xiw027@ucsd.edu;prnaidu@ucsd.edu;~Leon_Bergen1;~Ramamohan_Paturi1", "gender": "M;M;;;;Not Specified", "homepage": ";https://www.semanticscholar.org/author/Kaicheng-Wang/2257044695;;;;https://cseweb.ucsd.edu/~paturi/", "dblp": "251/3315;256/5299;;;136/8736;p/RPaturi.html", "google_scholar": "4nysj5kAAAAJ;;;;0FclEuAAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Jianyou_Wang1;~Kaicheng_Wang1;xiw027@ucsd.edu;prnaidu@ucsd.edu;~Leon_Bergen1;~Ramamohan_Paturi1", "aff": "University of California, San Diego;University of California, San Diego;;;Google;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;;;google.com;ucsd.edu", "position": "PhD student;MS student;;;Researcher;Full Professor", "bibtex": "@inproceedings{\nwang2023scientific,\ntitle={Scientific Document Retrieval using Multi-level Aspect-based Queries},\nauthor={Jianyou Wang and Kaicheng Wang and Xiaoyue Wang and Prudhviraj Naidu and Leon Bergen and Ramamohan Paturi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=XjaWEAyToL}\n}", "github": "", "project": "", "reviewers": "vmkV;xrVv;gbm8;uQNo;XCsh", "pdf_size": 1252403, "rating": "5;7;7;8;9", "confidence": "4;4;5;4;4", "wc_summary_and_contributions": "112;77;51;92;134", "wc_strengths": "65;43;42;107;81", "wc_improvement": "227;193;538;178;54", "wc_limitations": "3;120;2;24;42", "wc_correctness": "75;293;21;25;41", "wc_clarity": "4;241;11;41;8", "wc_relation_to_prior_work": "27;35;31;10;19", "wc_documentation": "17;22;1;13;17", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "531;1025;698;491;397", "wc_reply_reviewers": "13;30;10;27;29", "wc_reply_authors": "979;1103;777;328;152", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;3;1;2;1", "rating_avg": [ 7.2, 1.32664991614216 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 93.2, 28.505438077672128 ], "wc_strengths_avg": [ 67.6, 24.491631223746612 ], "wc_improvement_avg": [ 238.0, 160.99813663517972 ], "wc_limitations_avg": [ 38.2, 43.4897689117797 ], "wc_correctness_avg": [ 91.0, 102.7774294288391 ], "wc_clarity_avg": [ 61.0, 90.94833698314665 ], "wc_relation_to_prior_work_avg": [ 24.4, 8.935323161475472 ], "wc_documentation_avg": [ 14.0, 7.0992957397195395 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 628.4, 220.9285857466163 ], "wc_reply_reviewers_avg": [ 21.8, 8.518215775618742 ], "wc_reply_authors_avg": [ 667.8, 368.69575533222513 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.8944271909999159 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.0753778361444409, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16086461890849024576&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ucsd.edu;ucsd.edu;;;google.com;ucsd.edu", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of California, San Diego;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ucsd.edu;https://www.google.com", "aff_unique_abbr": "UCSD;Google", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "San Diego;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Structure-free Graph Condensation: From Large-scale Graphs to Condensed Graph-free Data", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71338", "id": "XkcufOcgUc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/13183a224208671a6fc33ba1aa661ec4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XkcufOcgUc", "openreview": "https://openreview.net/forum?id=XkcufOcgUc", "poster": "/media/PosterPDFs/NeurIPS%202023/71338.png?t=1699489298.500222", "slides": "https://nips.cc/virtual/2023/poster/71338", "video": "https://nips.cc/virtual/2023/poster/71338", "author_site": "Xin Zheng, Miao Zhang, Chunyang Chen, Quoc Viet Hung Nguyen, Xingquan Zhu, Shirui Pan", "tldr": "", "abstract": "Graph condensation, which reduces the size of a large-scale graph by synthesizing a small-scale condensed graph as its substitution, has immediate benefits for various graph learning tasks.\nHowever, existing graph condensation methods rely on the joint optimization of nodes and structures in the condensed graph, and overlook critical issues in effectiveness and generalization ability.\nIn this paper, we advocate a new Structure-Free Graph Condensation paradigm, named SFGC, to distill a large-scale graph into a small-scale graph node set without explicit graph structures, i.e., graph-free data.\nOur idea is to implicitly encode topology structure information into the node attributes in the synthesized graph-free data, whose topology is reduced to an identity matrix.\nSpecifically, SFGC contains two collaborative components: \n(1) a training trajectory meta-matching scheme for effectively synthesizing small-scale graph-free data;\n(2) a graph neural feature score metric for dynamically evaluating the quality of the condensed data. \nThrough training trajectory meta-matching, SFGC aligns the long-term GNN learning behaviors between the large-scale graph and the condensed small-scale graph-free data, ensuring comprehensive and compact transfer of informative knowledge to the graph-free data.\nAfterward, the underlying condensed graph-free data would be dynamically evaluated with the graph neural feature score, which is a closed-form metric for ensuring the excellent expressiveness of the condensed graph-free data.\nExtensive experiments verify the superiority of SFGC across different condensation ratios.", "keywords": "graph neural networks (GNNs);graph condensation;training trajectory meta-matching;graph neural feature score", "primary_area": "", "supplementary_material": "/attachment/18ddcd7d4cf92d5fd3df8113917ca934e2f17226.pdf", "author": "Xin Zheng;Miao Zhang;Chunyang Chen;Quoc Viet Hung Nguyen;Xingquan Zhu;Shirui Pan", "authorids": "~Xin_Zheng4;~Miao_Zhang4;~Chunyang_Chen1;~Quoc_Viet_Hung_Nguyen1;~Xingquan_Zhu1;~Shirui_Pan1", "gender": "F;M;;M;M;", "homepage": ";https://sites.google.com/view/miaozhang;https://chunyang-chen.github.io/;https://experts.griffith.edu.au/9203-henry-nguyen/;http://www.cse.fau.edu/~xqzhu/;", "dblp": ";60/7041-1.html;180/7246.html;88/302.html;https://dblp.uni-trier.de/pid/26/4253.html;91/8171", "google_scholar": "WAl7OtMAAAAJ;6EUV_UMAAAAJ;3tyGlPsAAAAJ;ntkO_bEAAAAJ;YhKZXtcAAAAJ;https://scholar.google.com.au/citations?user=frWRJN4AAAAJ", "orcid": "0000-0003-0915-7787;0000-0002-1262-4174;;0000-0002-9687-1315;;0000-0003-0794-527X", "linkedin": ";miao-zhang-71b13a177/;;quoc-viet-hung-nguyen-9304b348/;;", "or_profile": "~Xin_Zheng4;~Miao_Zhang4;~Chunyang_Chen1;~Quoc_Viet_Hung_Nguyen1;~Xingquan_Zhu1;~Shirui_Pan1", "aff": "Monash University;Harbin Institute of Technology (Shenzhen);Monash University;Griffith University;Florida Atlantic University;Griffith University", "aff_domain": "monash.edu;hit.edu.cn;monash.edu;griffith.edu.au;fau.edu;griffith.edu.au", "position": "PhD student;Full Professor;Associate Professor;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzheng2023structurefree,\ntitle={Structure-free Graph Condensation: From Large-scale Graphs to Condensed Graph-free Data},\nauthor={Xin Zheng and Miao Zhang and Chunyang Chen and Quoc Viet Hung Nguyen and Xingquan Zhu and Shirui Pan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XkcufOcgUc}\n}", "github": "", "project": "", "reviewers": "PLzT;8VHW;yKD6;KcU6;U4b1;4KcB", "pdf_size": 2017566, "rating": "5;5;6;6;6;7", "confidence": "2;3;1;4;2;4", "soundness": "2;3;4;3;4;4", "novelty": "2;2;4;3;3;4", "presentation": "2;3;3;4;3;3", "wc_summary": "52;81;38;88;88;98", "wc_strengths": "44;70;82;58;74;261", "wc_weaknesses": "104;151;32;194;22;112", "wc_questions": "32;145;61;27;54;8", "wc_limitations": "29;156;46;1;23;10", "wc_review": "261;603;259;368;261;489", "wc_reply_reviewers": "17;18;12;28;0;15", "wc_reply_authors": "83;0;0;0;0;0", "reply_reviewers": "1;1;1;1;0;1", "reply_authors": "2;1;1;1;1;1", "rating_avg": [ 5.833333333333333, 0.6871842709362768 ], "confidence_avg": [ 2.6666666666666665, 1.1055415967851334 ], "soundness_avg": [ 3.3333333333333335, 0.7453559924999298 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 74.16666666666667, 21.5902498570278 ], "wc_strengths_avg": [ 98.16666666666667, 73.82505596942605 ], "wc_weaknesses_avg": [ 102.5, 60.92002954693965 ], "wc_questions_avg": [ 54.5, 44.07852840858763 ], "wc_limitations_avg": [ 44.166666666666664, 51.99171942616333 ], "wc_review_avg": [ 373.5, 131.9491695060387 ], "wc_reply_reviewers_avg": [ 15.0, 8.32666399786453 ], "wc_reply_authors_avg": [ 13.833333333333334, 30.932273688747095 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3656362120635652, "gs_citation": 73, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8532138250787403940&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "monash.edu;hit.edu.cn;monash.edu;griffith.edu.au;fau.edu;griffith.edu.au", "author_num": 6, "aff_unique_index": "0;1;0;2;3;2", "aff_unique_norm": "Monash University;Harbin Institute of Technology;Griffith University;Florida Atlantic University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.monash.edu;http://en.hhit.edu.cn/;https://www.griffith.edu.au;https://www.fau.edu", "aff_unique_abbr": "Monash;HIT;Griffith;FAU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;1;0;0;2;0", "aff_country_unique": "Australia;China;United States" }, { "title": "Exact Representation of Sparse Networks with Symmetric Nonnegative Embeddings", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71337", "id": "XlvsieCnAX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/428ceef2cd8a53add7213e04d1746479-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XlvsieCnAX", "openreview": "https://openreview.net/forum?id=XlvsieCnAX", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71337", "video": "https://nips.cc/virtual/2023/poster/71337", "author_site": "Sudhanshu Chanpuriya, Ryan Rossi, Anup B. Rao, Tung Mai, Nedim Lipka, Zhao Song, Cameron Musco", "tldr": "", "abstract": "Graph models based on factorization of the adjacency matrix often fail to capture network structures related to links between dissimilar nodes (heterophily). We introduce a novel graph factorization model that leverages two nonnegative vectors per node to interpretably account for links between both similar and dissimilar nodes. We prove that our model can exactly represent any graph with low *arboricity*, a property that many real-world networks satisfy; our proof also applies to related models but has much greater scope than the closest prior bound, which is based on low *max degree*. Our factorization also has compelling properties besides expressiveness: due to its symmetric structure and nonnegativity, fitting the model inherently finds node communities, and the model's link predictions can be interpreted in terms of these communities. In experiments on real-world networks, we demonstrate our factorization's effectiveness on a variety of tasks, including community detection and link prediction.", "keywords": "graph;network;embeddings;arboricity;factorization;model;community;nonnegative", "primary_area": "", "supplementary_material": "/attachment/df7b5ff814e21afb2633bf28a320920e1e1c0512.zip", "author": "Sudhanshu Chanpuriya;Ryan A. Rossi;Anup Rao;Tung Mai;Nedim Lipka;Zhao Song;Cameron N Musco", "authorids": "~Sudhanshu_Chanpuriya1;~Ryan_A._Rossi2;~Anup_Rao1;~Tung_Mai1;~Nedim_Lipka1;~Zhao_Song3;~Cameron_N_Musco1", "gender": ";;;M;;M;M", "homepage": ";;;;;https://www.youtube.com/@zhaosong2031;https://people.cs.umass.edu/~cmusco/", "dblp": ";;63/6846;177/8902.html;;76/4051-2;149/2327", "google_scholar": ";;pkwXPU0AAAAJ;eUt8nlIAAAAJ;;yDZct7UAAAAJ;EeYGZCwAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Sudhanshu_Chanpuriya1;~Ryan_A._Rossi2;~Anup_Rao1;~Tung_Mai1;~Nedim_Lipka1;~Zhao_Song3;~Cameron_N_Musco1", "aff": ";;Adobe Systems;Adobe;;Adobe;University of Massachusetts, Amherst", "aff_domain": ";;adobe.com;adobe.com;;adobe.com;umass.edu", "position": ";;Researcher;Research Scientist;;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nchanpuriya2023exact,\ntitle={Exact Representation of Sparse Networks with Symmetric Nonnegative Embeddings},\nauthor={Sudhanshu Chanpuriya and Ryan A. Rossi and Anup Rao and Tung Mai and Nedim Lipka and Zhao Song and Cameron N Musco},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XlvsieCnAX}\n}", "github": "", "project": "", "reviewers": "VwbN;SSbD;gKGt;zLuj", "pdf_size": 499907, "rating": "4;5;6;7", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;4;3", "wc_summary": "69;102;148;34", "wc_strengths": "41;78;181;27", "wc_weaknesses": "92;172;149;141", "wc_questions": "61;66;4;144", "wc_limitations": "16;23;67;26", "wc_review": "279;441;549;372", "wc_reply_reviewers": "27;65;17;163", "wc_reply_authors": "167;235;0;623", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;1;3", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 88.25, 42.04982164052542 ], "wc_strengths_avg": [ 81.75, 60.2551864987571 ], "wc_weaknesses_avg": [ 138.5, 29.159046623646667 ], "wc_questions_avg": [ 68.75, 49.806500579743606 ], "wc_limitations_avg": [ 33.0, 19.96246477767713 ], "wc_review_avg": [ 410.25, 98.59861814447503 ], "wc_reply_reviewers_avg": [ 68.0, 57.697486947006624 ], "wc_reply_authors_avg": [ 256.25, 228.35649213455702 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4472135954999579, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1162379834652537135&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";;adobe.com;adobe.com;;adobe.com;umass.edu", "author_num": 7, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Adobe;University of Massachusetts Amherst", "aff_unique_dep": "Adobe Systems Incorporated;", "aff_unique_url": "https://www.adobe.com;https://www.umass.edu", "aff_unique_abbr": "Adobe;UMass Amherst", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Distributed Inference and Fine-tuning of Large Language Models Over The Internet", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71336", "id": "XmN7ZNbUAe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/28bf1419b9a1f908c15f6195f58cb865-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XmN7ZNbUAe", "openreview": "https://openreview.net/forum?id=XmN7ZNbUAe", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71336", "video": "https://nips.cc/virtual/2023/poster/71336", "author_site": "Alexander Borzunov, Max Ryabinin, Artem Chumachenko, Dmitry Baranchuk, Tim Dettmers, Younes Belkada, Pavel Samygin, Colin Raffel", "tldr": "", "abstract": "Large language models (LLMs) are useful in many NLP tasks and become more capable with size, with the best open-source models having over 50 billion parameters. However, using these 50B+ models requires high-end hardware, making them inaccessible to most researchers. In this work, we investigate methods for cost-efficient inference and fine-tuning of LLMs, comparing local and distributed strategies. We observe that a large enough model (50B+) can run efficiently even on geodistributed devices in a consumer-grade network. This could allow running LLM efficiently by pooling together idle compute resources of multiple research groups and volunteers. We address two open problems: (1) how to perform inference and fine-tuning reliably if any device can disconnect abruptly and (2) how to partition LLMs between devices with uneven hardware, joining and leaving at will. In order to do that, we develop special fault-tolerant inference algorithms and load-balancing protocols that automatically assign devices to maximize the total system throughput. We showcase these algorithms in Petals \u2014 a decentralized system that runs Llama 2 (70B) and BLOOM (176B) over the Internet up to $10\\times$ faster than offloading for interactive generation. We evaluate the performance of our system in simulated conditions and a real-world setup spanning two continents.", "keywords": "volunteer computing;distributed deep learning;distributed inference;efficient inference;large language models", "primary_area": "", "supplementary_material": "", "author": "Alexander Borzunov;Max Ryabinin;Artem Chumachenko;Dmitry Baranchuk;Tim Dettmers;Younes Belkada;Pavel Samygin;Colin Raffel", "authorids": "~Alexander_Borzunov1;~Max_Ryabinin1;~Artem_Chumachenko1;~Dmitry_Baranchuk2;~Tim_Dettmers2;~Younes_Belkada1;~Pavel_Samygin1;~Colin_Raffel1", "gender": "M;Not Specified;M;M;M;M;M;", "homepage": "https://github.com/borzunov;https://mryab.github.io/;;;https://timdettmers.com/;https://younesbelkada.github.io/;;http://colinraffel.com", "dblp": "295/8854;276/0192;;215/3712;172/1045;;;149/0082", "google_scholar": "https://scholar.google.ru/citations?user=HdwzsCMAAAAJ;930PERsAAAAJ;brCfhZkAAAAJ;NiPmk8oAAAAJ;lHI3w5kAAAAJ;;;I66ZBYwAAAAJ", "orcid": ";;;0000-0001-7660-3666;;;;", "linkedin": ";;;;;;pavel-samygin-535b151a6/;", "or_profile": "~Alexander_Borzunov1;~Max_Ryabinin1;~Artem_Chumachenko1;~Dmitry_Baranchuk2;~Tim_Dettmers2;~Younes_Belkada1;~Pavel_Samygin1;~Colin_Raffel1", "aff": "HSE University;Yandex;;Higher School of Economics;University of Washington;;;University of North Carolina, Chapel Hill", "aff_domain": "hse.ru;yandex-team.ru;;hse.ru;cs.washington.edu;;;unc.edu", "position": "Instructor;Research Scientist;;PhD student;PhD student;;;Assistant Professor", "bibtex": "@inproceedings{\nborzunov2023distributed,\ntitle={Distributed Inference and Fine-tuning of Large Language Models Over The Internet},\nauthor={Alexander Borzunov and Max Ryabinin and Artem Chumachenko and Dmitry Baranchuk and Tim Dettmers and Younes Belkada and Pavel Samygin and Colin Raffel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XmN7ZNbUAe}\n}", "github": "", "project": "", "reviewers": "8b6R;2M8A;DRjo;zN15", "pdf_size": 676737, "rating": "5;6;6;7", "confidence": "4;5;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "100;106;78;79", "wc_strengths": "109;91;53;43", "wc_weaknesses": "137;34;190;151", "wc_questions": "5;39;74;3", "wc_limitations": "21;110;53;3", "wc_review": "372;380;448;279", "wc_reply_reviewers": "0;0;66;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 90.75, 12.43734296383275 ], "wc_strengths_avg": [ 74.0, 27.0 ], "wc_weaknesses_avg": [ 128.0, 57.64113114781839 ], "wc_questions_avg": [ 30.25, 29.029080247227952 ], "wc_limitations_avg": [ 46.75, 40.671703922997864 ], "wc_review_avg": [ 369.75, 60.143058618597045 ], "wc_reply_reviewers_avg": [ 16.5, 28.578838324886476 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11981493985293993587&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "hse.ru;yandex-team.ru;;hse.ru;cs.washington.edu;;;unc.edu", "author_num": 8, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "Higher School of Economics;Yandex;University of Washington;University of North Carolina", "aff_unique_dep": ";;;", "aff_unique_url": "https://hse.ru;https://yandex.com;https://www.washington.edu;https://www.unc.edu", "aff_unique_abbr": "HSE;Yandex;UW;UNC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chapel Hill", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "Russian Federation;United States" }, { "title": "Res-Tuning: A Flexible and Efficient Tuning Paradigm via Unbinding Tuner from Backbone", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71335", "id": "XmpthbaJql", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8514a5203b87cba5e440bd62ab18f2b4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XmpthbaJql", "openreview": "https://openreview.net/forum?id=XmpthbaJql", "poster": "/media/PosterPDFs/NeurIPS%202023/71335.png?t=1701616729.2438476", "slides": "https://nips.cc/virtual/2023/poster/71335", "video": "https://nips.cc/virtual/2023/poster/71335", "author_site": "Zeyinzi Jiang, Chaojie Mao, Ziyuan Huang, Ao Ma, Yiliang Lv, Yujun Shen, Deli Zhao, Jingren Zhou", "tldr": "", "abstract": "Parameter-efficient tuning has become a trend in transferring large-scale foundation models to downstream applications. Existing methods typically embed some light-weight tuners into the backbone, where both the design and the learning of the tuners are highly dependent on the base model. This work offers a new tuning paradigm, dubbed Res-Tuning, which intentionally unbinds tuners from the backbone. With both theoretical and empirical evidence, we show that popular tuning approaches have their equivalent counterparts under our unbinding formulation, and hence can be integrated into our framework effortlessly. Thanks to the structural disentanglement, we manage to free the design of tuners from the network architecture, facilitating flexible combination of various tuning strategies. We further propose a memory-efficient variant of Res-Tuning, where the bypass i.e., formed by a sequence of tuners) is effectively detached from the main branch, such that the gradients are back-propagated only to the tuners but not to the backbone. Such a detachment also allows one-time backbone forward for multi-task inference. Extensive experiments on both discriminative and generative tasks demonstrate the superiority of our method over existing alternatives from the perspectives of efficacy and efficiency. Project page: https://res-tuning.github.io/.", "keywords": "Parameter-efficient Transfer Learning;Memory-efficient Transfer Learning;Residual Network;Vision Transformer;Foundation Model", "primary_area": "", "supplementary_material": "", "author": "Zeyinzi Jiang;Chaojie Mao;Ziyuan Huang;Ao Ma;Yiliang Lv;Yujun Shen;Deli Zhao;Jingren Zhou", "authorids": "~Zeyinzi_Jiang1;~Chaojie_Mao1;~Ziyuan_Huang1;~Ao_Ma2;~Yiliang_Lv2;~Yujun_Shen1;~Deli_Zhao1;~Jingren_Zhou1", "gender": "M;M;M;M;M;;M;M", "homepage": "https://github.com/jiangzeyinzi;https://maochaojie.github.io/;https://huang-ziyuan.github.io/;https://github.com/MaAo;https://www.researchgate.net/profile/Lv_Yiliang;;https://zhaodeli.github.io;", "dblp": "341/5971;214/4135;;;;;77/1992;84/2644", "google_scholar": ";C9ETPdUAAAAJ;A9D-disAAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.com/citations?hl=en;", "orcid": ";0000-0001-8876-2189;;;;;0000-0002-8838-578X;", "linkedin": ";;ziyuan-huang-731b78177/;;;;;", "or_profile": "~Zeyinzi_Jiang1;~Chaojie_Mao1;~Ziyuan_Huang1;~Ao_Ma2;~Yiliang_Lv2;~Yujun_Shen1;~Deli_Zhao1;~Jingren_Zhou1", "aff": "Alibaba Group;Alibaba Group;National University of Singapore;Alibaba Group;Alibaba DAMO Academy;;Alibaba Group;Alibaba Group", "aff_domain": "alibaba-inc.com;alibaba-inc.com;u.nus.edu;alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com", "position": "Researcher;Researcher;PhD student;Researcher;Researcher;;Director;Researcher", "bibtex": "@inproceedings{\njiang2023restuning,\ntitle={Res-Tuning: A Flexible and Efficient Tuning Paradigm via Unbinding Tuner from Backbone},\nauthor={Zeyinzi Jiang and Chaojie Mao and Ziyuan Huang and Ao Ma and Yiliang Lv and Yujun Shen and Deli Zhao and Jingren Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XmpthbaJql}\n}", "github": "", "project": "", "reviewers": "nLwc;Yhf3;mYTv;hNoJ;DEjH", "pdf_size": 49185135, "rating": "5;5;5;6;6", "confidence": "5;5;5;4;4", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;3", "presentation": "2;3;3;4;2", "wc_summary": "66;31;68;64;74", "wc_strengths": "52;20;48;76;43", "wc_weaknesses": "284;161;217;110;97", "wc_questions": "4;148;95;72;156", "wc_limitations": "1;7;16;23;28", "wc_review": "407;367;444;345;398", "wc_reply_reviewers": "36;26;0;20;40", "wc_reply_authors": "53;91;120;20;19", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 60.6, 15.173661390712526 ], "wc_strengths_avg": [ 47.8, 17.93766985982293 ], "wc_weaknesses_avg": [ 173.8, 69.4878406629534 ], "wc_questions_avg": [ 95.0, 55.389529696504916 ], "wc_limitations_avg": [ 15.0, 9.939818911831342 ], "wc_review_avg": [ 392.2, 34.055249228276104 ], "wc_reply_reviewers_avg": [ 24.4, 14.108153670838718 ], "wc_reply_authors_avg": [ 60.6, 39.72203418758913 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5617439812158115129&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "alibaba-inc.com;alibaba-inc.com;u.nus.edu;alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com", "author_num": 8, "aff_unique_index": "0;0;1;0;0;0;0", "aff_unique_norm": "Alibaba Group;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.nus.edu.sg", "aff_unique_abbr": "Alibaba;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Seeing is not always believing: Benchmarking Human and Model Perception of AI-Generated Images", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73558", "id": "Xoi31wJ5iI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/505df5ea30f630661074145149274af0-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Xoi31wJ5iI", "openreview": "https://openreview.net/forum?id=Xoi31wJ5iI", "poster": "/media/PosterPDFs/NeurIPS%202023/73558.png?t=1697431767.9273856", "slides": "https://nips.cc/virtual/2023/poster/73558", "video": "https://nips.cc/virtual/2023/poster/73558", "author_site": "Zeyu Lu, Di Huang, LEI BAI, Jingjing Qu, Chengyue Wu, Xihui Liu, Wanli Ouyang", "tldr": "", "abstract": "Photos serve as a way for humans to record what they experience in their daily lives, and they are often regarded as trustworthy sources of information. However, there is a growing concern that the advancement of artificial intelligence (AI) technology may produce fake photos, which can create confusion and diminish trust in photographs. This study aims to comprehensively evaluate agents for distinguishing state-of-the-art AI-generated visual content. Our study benchmarks both human capability and cutting-edge fake image detection AI algorithms, using a newly collected large-scale fake image dataset Fake2M. In our human perception evaluation, titled HPBench, we discovered that humans struggle significantly to distinguish real photos from AI-generated ones, with a misclassification rate of 38.7\\%. Along with this, we conduct the model capability of AI-Generated images detection evaluation MPBench and the top-performing model from MPBench achieves a 13\\% failure rate under the same setting used in the human evaluation.\nWe hope that our study can raise awareness of the potential risks of AI-generated images and facilitate further research to prevent the spread of false information. More information can refer to https://github.com/Inf-imagine/Sentry.", "keywords": "Human Perception;Model Perception;AI-Generated Images;Deepfake;Benchmark;Dataset", "primary_area": "", "supplementary_material": "/attachment/8dc13fb5ffde41e0069cec160458e024eb8d44d8.pdf", "author": "Zeyu Lu;Di Huang;LEI BAI;Jingjing Qu;Chengyue Wu;Xihui Liu;Wanli Ouyang", "authorids": "~Zeyu_Lu1;~Di_Huang6;~LEI_BAI1;~Jingjing_Qu1;~Chengyue_Wu1;~Xihui_Liu1;~Wanli_Ouyang1", "gender": "M;;M;F;M;F;", "homepage": ";;http://leibai.site/;;https://hills-code.github.io;https://xh-liu.github.io/;", "dblp": "285/3138;;119/1223-1;;;184/3911;", "google_scholar": "W_sLmX0AAAAJ;;https://scholar.google.com.au/citations?user=sakOO04AAAAJ;OpTJw68AAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=4YL23GMAAAAJ;", "orcid": "0000-0003-0494-911X;;0000-0003-3378-7201;;;0000-0003-1831-9952;", "linkedin": ";;lei-bai-641370153/;;;;", "or_profile": "~Zeyu_Lu1;~Di_Huang6;~LEI_BAI1;~Jingjing_Qu1;~Chengyue_Wu1;~Xihui_Liu1;~Wanli_Ouyang1", "aff": "Harbin Institute of Technology;;Shanghai AI Laboratory;Shanghai Artifical Intelligence;Harbin Institute of Technology;University of Hong Kong;", "aff_domain": "hit.edu.cn;;pjlab.org.cn;shlab.org.cn;hit.edu.cn;hku.hk;", "position": "Undergrad student;;Researcher;Associate Professor;Undergrad student;Assistant Professor;", "bibtex": "@inproceedings{\nlu2023seeing,\ntitle={Seeing is not always believing: Benchmarking Human and Model Perception of {AI}-Generated Images},\nauthor={Zeyu Lu and Di Huang and LEI BAI and Jingjing Qu and Chengyue Wu and Xihui Liu and Wanli Ouyang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Xoi31wJ5iI}\n}", "github": "", "project": "", "reviewers": "gbTx;kKjy;dyrB;5zTd;ZytP", "pdf_size": 2771336, "rating": "4;5;6;6;7", "confidence": "5;4;4;4;3", "wc_summary_and_contributions": "42;59;88;94;68", "wc_strengths": "19;98;35;44;98", "wc_improvement": "427;13;46;218;80", "wc_limitations": "8;115;2;85;189", "wc_correctness": "1;24;8;33;5", "wc_clarity": "6;9;4;9;8", "wc_relation_to_prior_work": "1;8;5;38;7", "wc_documentation": "1;28;18;1;42", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "506;355;207;523;498", "wc_reply_reviewers": "0;79;0;150;264", "wc_reply_authors": "1512;1551;163;761;849", "reply_reviewers": "0;1;0;1;3", "reply_authors": "3;4;1;2;4", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 70.2, 19.019989484749985 ], "wc_strengths_avg": [ 58.8, 32.993332659796586 ], "wc_improvement_avg": [ 156.8, 152.05709454017594 ], "wc_limitations_avg": [ 79.8, 69.85527897016803 ], "wc_correctness_avg": [ 14.2, 12.221292893961753 ], "wc_clarity_avg": [ 7.2, 1.9390719429665317 ], "wc_relation_to_prior_work_avg": [ 11.8, 13.317657451669193 ], "wc_documentation_avg": [ 18.0, 15.836666315863324 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 417.8, 121.37446189376084 ], "wc_reply_reviewers_avg": [ 98.6, 99.82705044225237 ], "wc_reply_authors_avg": [ 967.2, 517.85264313316 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 2.8, 1.16619037896906 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9302605094190635, "gs_citation": 72, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6618198924139578449&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "hit.edu.cn;;pjlab.org.cn;shlab.org.cn;hit.edu.cn;hku.hk;", "author_num": 7, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Harbin Institute of Technology;Shanghai AI Laboratory;Shanghai Artifical Intelligence;University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.hit.edu.cn/;https://www.shanghai-ai-lab.com;;https://www.hku.hk", "aff_unique_abbr": "HIT;SAIL;;HKU", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Harbin;;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SQ Lower Bounds for Non-Gaussian Component Analysis with Weaker Assumptions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71334", "id": "Xp68yXQiRk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0d00a699f60e642b310eb04b76cc7731-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Xp68yXQiRk", "openreview": "https://openreview.net/forum?id=Xp68yXQiRk", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71334", "video": "https://nips.cc/virtual/2023/poster/71334", "author_site": "Ilias Diakonikolas, Daniel Kane, Lisheng Ren, Yuxin Sun", "tldr": "", "abstract": "We study the complexity of Non-Gaussian Component Analysis (NGCA) in the Statistical Query (SQ) model.\nPrior work developed a methodology to prove SQ lower bounds for NGCA that have been applicable to a wide range of contexts.\nIn particular, it was known that for any univariate distribution $A$ satisfying certain conditions,\ndistinguishing between a standard multivariate Gaussian and a distribution that behaves like $A$ in a random hidden direction and like a standard Gaussian in the orthogonal complement, is SQ-hard.\nThe required conditions were that (1) $A$ matches many low-order moments with a standard Gaussian,\nand (2) the chi-squared norm of $A$ with respect to the standard Gaussian is finite.\nWhile the moment-matching condition is clearly necessary for hardness, the chi-squared condition was only required for technical reasons.\nIn this work, we establish that the latter condition is indeed not necessary.\nIn particular, we prove near-optimal SQ lower bounds for NGCA under the moment-matching condition only.", "keywords": "Non-Gaussian Component Analysis", "primary_area": "", "supplementary_material": "/attachment/43069150fdd815e3f8762daf6157698a828d0185.pdf", "author": "Ilias Diakonikolas;Daniel Kane;Lisheng Ren;Yuxin Sun", "authorids": "~Ilias_Diakonikolas1;~Daniel_Kane1;~Lisheng_Ren1;~Yuxin_Sun2", "gender": "M;M;;M", "homepage": "http://www.iliasdiakonikolas.org/;http://cseweb.ucsd.edu/~dakane/;https://www.wisc.edu/directories/person/?q=Lisheng%20Ren&email=lren29%40wisc.edu&savedQuery=Lisheng%20Ren&returnPath=%2Fdirectories%2F;https://pages.cs.wisc.edu/~yxsun/", "dblp": "d/IliasDiakonikolas;52/6817;93/495;", "google_scholar": "Vb3FLmkAAAAJ;https://scholar.google.com.tw/citations?user=DulpV-cAAAAJ;;8VuomNgAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ilias_Diakonikolas1;~Daniel_Kane1;~Lisheng_Ren1;~Yuxin_Sun2", "aff": "University of Wisconsin, Madison;University of California, San Diego;University of Wisconsin - Madison;Department of Computer Science, University of Wisconsin, Madison", "aff_domain": "wisc.edu;ucsd.edu;wisc.edu;cs.wisc.edu", "position": "Associate Professor;Assistant Professor;PhD student;PhD student", "bibtex": "@inproceedings{\ndiakonikolas2023sq,\ntitle={{SQ} Lower Bounds for Non-Gaussian Component Analysis with Weaker Assumptions},\nauthor={Ilias Diakonikolas and Daniel Kane and Lisheng Ren and Yuxin Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Xp68yXQiRk}\n}", "github": "", "project": "", "reviewers": "g3gW;oUue;YVEY;nJue", "pdf_size": 377138, "rating": "4;7;7;7", "confidence": "2;3;2;4", "soundness": "2;3;3;3", "novelty": "2;3;4;3", "presentation": "1;3;3;3", "wc_summary": "165;248;72;88", "wc_strengths": "21;161;174;39", "wc_weaknesses": "221;34;146;26", "wc_questions": "71;81;51;103", "wc_limitations": "35;12;10;2", "wc_review": "513;536;453;258", "wc_reply_reviewers": "723;43;38;0", "wc_reply_authors": "1025;0;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "4;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 143.25, 69.95489618318364 ], "wc_strengths_avg": [ 98.75, 69.19673041408821 ], "wc_weaknesses_avg": [ 106.75, 81.25076922712793 ], "wc_questions_avg": [ 76.5, 18.728320800328042 ], "wc_limitations_avg": [ 14.75, 12.275483697190918 ], "wc_review_avg": [ 440.0, 109.35949890155861 ], "wc_reply_reviewers_avg": [ 201.0, 301.8352199462482 ], "wc_reply_authors_avg": [ 256.25, 443.8380194395248 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=898730502046736881&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 10, "email": "wisc.edu;ucsd.edu;wisc.edu;cs.wisc.edu", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of Wisconsin;University of California, San Diego;University of Wisconsin-Madison", "aff_unique_dep": ";;", "aff_unique_url": "https://www.wisc.edu;https://www.ucsd.edu;https://www.wisc.edu", "aff_unique_abbr": "UW;UCSD;UW-Madison", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Madison;San Diego", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Regularized Behavior Cloning for Blocking the Leakage of Past Action Information", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71333", "id": "XpmJNP8BVA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/06b71ad997f7e3e4b2e2f2ea12e5a759-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XpmJNP8BVA", "openreview": "https://openreview.net/forum?id=XpmJNP8BVA", "poster": "/media/PosterPDFs/NeurIPS%202023/71333.png?t=1701844664.7137234", "slides": "https://nips.cc/virtual/2023/poster/71333", "video": "https://nips.cc/virtual/2023/poster/71333", "author_site": "Seokin Seo, HyeongJoo Hwang, Hongseok Yang, Kee-Eung Kim", "tldr": "", "abstract": "For partially observable environments, imitation learning with observation histories (ILOH) assumes that control-relevant information is sufficiently captured in the observation histories for imitating the expert actions. In the offline setting wherethe agent is required to learn to imitate without interaction with the environment, behavior cloning (BC) has been shown to be a simple yet effective method for imitation learning. However, when the information about the actions executed in the past timesteps leaks into the observation histories, ILOH via BC often ends up imitating its own past actions. In this paper, we address this catastrophic failure by proposing a principled regularization for BC, which we name Past Action Leakage Regularization (PALR). The main idea behind our approach is to leverage the classical notion of conditional independence to mitigate the leakage. We compare different instances of our framework with natural choices of conditional independence metric and its estimator. The result of our comparison advocates the use of a particular kernel-based estimator for the conditional independence metric. We conduct an extensive set of experiments on benchmark datasets in order to assess the effectiveness of our regularization method. The experimental results show that our method significantly outperforms prior related approaches, highlighting its potential to successfully imitate expert actions when the past action information leaks into the observation histories.", "keywords": "Imitation learning;Information leakage;Causal Confusion", "primary_area": "", "supplementary_material": "", "author": "Seokin Seo;HyeongJoo Hwang;Hongseok Yang;Kee-Eung Kim", "authorids": "~Seokin_Seo1;~HyeongJoo_Hwang1;~Hongseok_Yang2;~Kee-Eung_Kim2", "gender": ";M;M;M", "homepage": "https://sites.google.com/view/siseo0;https://github.com/gr8joo;http://ailab.kaist.ac.kr;https://sites.google.com/view/hongseokyang/home", "dblp": "231/7699;;35/6703;82/5808", "google_scholar": "https://scholar.google.com/citations?hl=en;IK5bNo0AAAAJ;https://scholar.google.com/citations?hl=ko;cLuwH14AAAAJ", "orcid": ";;;", "linkedin": "seokin-seo-026ab4150/;;;", "or_profile": "~Seokin_Seo1;~HyeongJoo_Hwang1;~Kee-Eung_Kim2;~Hongseok_Yang1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Institute for Basic Science", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;ibs.re.kr", "position": "PhD student;PhD student;Full Professor;Visiting Research Fellow", "bibtex": "@inproceedings{\nseo2023regularized,\ntitle={Regularized Behavior Cloning for Blocking the Leakage of Past Action Information},\nauthor={Seokin Seo and HyeongJoo Hwang and Hongseok Yang and Kee-Eung Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XpmJNP8BVA}\n}", "github": "", "project": "", "reviewers": "Xa8t;xrec;jVBb;UXMA;4vtt;TkkQ", "pdf_size": 653968, "rating": "5;5;5;7;7;8", "confidence": "3;2;3;5;5;3", "soundness": "2;3;3;3;3;3", "novelty": "2;3;3;3;3;3", "presentation": "3;3;3;4;3;4", "wc_summary": "102;50;94;77;73;99", "wc_strengths": "15;69;91;43;61;81", "wc_weaknesses": "171;45;415;90;319;44", "wc_questions": "90;64;2;58;43;2", "wc_limitations": "24;9;2;23;20;4", "wc_review": "402;237;604;291;516;230", "wc_reply_reviewers": "33;0;103;18;209;0", "wc_reply_authors": "16;0;292;555;51;0", "reply_reviewers": "1;0;1;1;1;0", "reply_authors": "2;1;2;3;2;1", "rating_avg": [ 6.166666666666667, 1.2133516482134197 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 82.5, 18.080837001274766 ], "wc_strengths_avg": [ 60.0, 25.15949125081825 ], "wc_weaknesses_avg": [ 180.66666666666666, 141.09413721184714 ], "wc_questions_avg": [ 43.166666666666664, 32.240847107702095 ], "wc_limitations_avg": [ 13.666666666666666, 8.993825042154695 ], "wc_review_avg": [ 380.0, 141.4366760544567 ], "wc_reply_reviewers_avg": [ 60.5, 74.99055496083045 ], "wc_reply_authors_avg": [ 152.33333333333334, 206.92725506537045 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.8333333333333333, 0.6871842709362768 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5528656051505562, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14051498720609374719&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;ibs.re.kr", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Institute for Basic Science", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.ibs.re.kr", "aff_unique_abbr": "KAIST;IBS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Multi-Prompt Alignment for Multi-Source Unsupervised Domain Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71332", "id": "Xq2s5yxzd2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eadeef7c51ad86989cc3b311cb49ec89-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Xq2s5yxzd2", "openreview": "https://openreview.net/forum?id=Xq2s5yxzd2", "poster": "/media/PosterPDFs/NeurIPS%202023/71332.png?t=1699256712.7603412", "slides": "https://nips.cc/virtual/2023/poster/71332", "video": "https://nips.cc/virtual/2023/poster/71332", "author_site": "Haoran Chen, Xintong Han, Zuxuan Wu, Yu-Gang Jiang", "tldr": "", "abstract": "Most existing methods for unsupervised domain adaptation (UDA) rely on a shared network to extract domain-invariant features. However, when facing multiple source domains, optimizing such a network involves updating the parameters of the entire network, making it both computationally expensive and challenging, particularly when coupled with min-max objectives. Inspired by recent advances in prompt learning that adapts high-capacity models for downstream tasks in a computationally economic way, we introduce Multi-Prompt Alignment (MPA), a simple yet efficient framework for multi-source UDA. Given a source and target domain pair, MPA first trains an individual prompt to minimize the domain gap through a contrastive loss. Then, MPA denoises the learned prompts through an auto-encoding process and aligns them by maximizing the agreement of all the reconstructed prompts. Moreover, we show that the resulting subspace acquired from the auto-encoding process can easily generalize to a streamlined set of target domains, making our method more efficient for practical usage. Extensive experiments show that MPA achieves state-of-the-art results on three popular datasets with an impressive average accuracy of 54.1% on DomainNet.", "keywords": "multi source unsupervised domain adaptation; transfer learning; computer vision", "primary_area": "", "supplementary_material": "/attachment/3734261da027464029d29d9b386188d9c197c232.zip", "author": "Haoran Chen;Xintong Han;Zuxuan Wu;Yu-Gang Jiang", "authorids": "~Haoran_Chen4;~Xintong_Han1;~Zuxuan_Wu1;~Yu-Gang_Jiang1", "gender": "M;M;M;M", "homepage": ";https://xthan.github.io/;https://zxwu.azurewebsites.net/;https://fvl.fudan.edu.cn/people/yugangjiang/", "dblp": ";120/9155;150/8447;24/5818", "google_scholar": "g3LC7pkAAAAJ;FGiWOIAAAAAJ;7t12hVkAAAAJ;f3_FP8AAAAAJ", "orcid": ";;;", "linkedin": ";xintong-han-2539b081/;;", "or_profile": "~Haoran_Chen4;~Xintong_Han1;~Zuxuan_Wu1;~Yu-Gang_Jiang1", "aff": "Fudan University;Huya Inc;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;huya.com;fudan.edu;fudan.edu.cn", "position": "PhD student;Principal Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023multiprompt,\ntitle={Multi-Prompt Alignment for Multi-Source Unsupervised Domain Adaptation},\nauthor={Haoran Chen and Xintong Han and Zuxuan Wu and Yu-Gang Jiang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Xq2s5yxzd2}\n}", "github": "", "project": "", "reviewers": "UUa1;tF9m;34CN;tSjx", "pdf_size": 1443012, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "4;2;3;3", "novelty": "3;2;3;3", "presentation": "4;2;3;3", "wc_summary": "76;75;20;160", "wc_strengths": "22;14;58;39", "wc_weaknesses": "151;89;36;76", "wc_questions": "132;4;5;223", "wc_limitations": "2;12;9;100", "wc_review": "383;194;128;598", "wc_reply_reviewers": "71;0;0;0", "wc_reply_authors": "69;69;69;69", "reply_reviewers": "1;0;0;0", "reply_authors": "3;3;3;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 82.75, 50.026867781223324 ], "wc_strengths_avg": [ 33.25, 16.90229274388537 ], "wc_weaknesses_avg": [ 88.0, 41.28559070668603 ], "wc_questions_avg": [ 91.0, 92.29030284921596 ], "wc_limitations_avg": [ 30.75, 40.14582792769381 ], "wc_review_avg": [ 325.75, 182.93492695491477 ], "wc_reply_reviewers_avg": [ 17.75, 30.74390183434757 ], "wc_reply_authors_avg": [ 69.0, 0.0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17513840757133828866&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "fudan.edu.cn;huya.com;fudan.edu;fudan.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Fudan University;Huya Inc", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.huya.com", "aff_unique_abbr": "Fudan;Huya", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Locality-Aware Generalizable Implicit Neural Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71331", "id": "XqcXf7ix5q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9713d53ee4f31781304b1ca43266f8d1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XqcXf7ix5q", "openreview": "https://openreview.net/forum?id=XqcXf7ix5q", "poster": "/media/PosterPDFs/NeurIPS%202023/71331.png?t=1702275687.110465", "slides": "https://nips.cc/virtual/2023/poster/71331", "video": "https://nips.cc/virtual/2023/poster/71331", "author_site": "Doyup Lee, Doyup Lee, Chiheon Kim, Minsu Cho, WOOK SHIN HAN", "tldr": "", "abstract": "Generalizable implicit neural representation (INR) enables a single continuous function, i.e., a coordinate-based neural network, to represent multiple data instances by modulating its weights or intermediate features using latent codes. However, the expressive power of the state-of-the-art modulation is limited due to its inability to localize and capture fine-grained details of data entities such as specific pixels and rays. To address this issue, we propose a novel framework for generalizable INR that combines a transformer encoder with a locality-aware INR decoder. The transformer encoder predicts a set of latent tokens from a data instance to encode local information into each latent token. The locality-aware INR decoder extracts a modulation vector by selectively aggregating the latent tokens via cross-attention for a coordinate input and then predicts the output by progressively decoding with coarse-to-fine modulation through multiple frequency bandwidths. The selective token aggregation and the multi-band feature modulation enable us to learn locality-aware representation in spatial and spectral aspects, respectively. Our framework significantly outperforms previous generalizable INRs and validates the usefulness of the locality-aware latents for downstream tasks such as image generation.", "keywords": "implicit neural representations;representation learning;neural fields", "primary_area": "", "supplementary_material": "/attachment/c3c5d0ff5ed0b17d94267b251b23d28aad0632f9.zip", "author": "Doyup Lee;Chiheon Kim;Minsu Cho;Wook-Shin Han", "authorids": "~Doyup_Lee1;~Chiheon_Kim1;~Minsu_Cho1;~Wook-Shin_Han1", "gender": "M;;M;M", "homepage": ";;http://cvlab.postech.ac.kr/~mcho/;https://wscrony.github.io/", "dblp": "205/2368;200/8046;;62/2450", "google_scholar": "https://scholar.google.co.kr/citations?user=5rAj44kAAAAJ;nimFSSEAAAAJ;5TyoF5QAAAAJ;Jp_w2IwAAAAJ", "orcid": ";;;0000-0001-9206-9563", "linkedin": ";;minsu-cho-062b3750/;", "or_profile": "~Doyup_Lee1;~Chiheon_Kim1;~Minsu_Cho1;~Wook-Shin_Han1", "aff": "Kakao Brain;Kakao Brain;POSTECH;POSTECH", "aff_domain": "kakaobrain.com;kakaobrain.com;postech.ac.kr;postech.ac.kr", "position": "Researcher;Research Scientist;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nlee2023localityaware,\ntitle={Locality-Aware Generalizable Implicit Neural Representation},\nauthor={Doyup Lee and Chiheon Kim and Minsu Cho and Wook-Shin Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XqcXf7ix5q}\n}", "github": "", "project": "", "reviewers": "KNfA;AhKU;TnVm;wXRn;9zhe;QDhX;binY", "pdf_size": 15437223, "rating": "3;4;5;5;6;6;7", "confidence": "5;4;2;3;3;3;3", "soundness": "1;2;3;3;3;3;3", "novelty": "2;3;2;3;3;3;4", "presentation": "2;1;2;3;3;3;4", "wc_summary": "74;143;66;68;59;107;142", "wc_strengths": "130;57;35;37;43;37;61", "wc_weaknesses": "298;309;76;60;67;144;121", "wc_questions": "68;370;15;2;2;79;92", "wc_limitations": "147;51;10;19;7;28;68", "wc_review": "717;930;202;186;178;395;484", "wc_reply_reviewers": "829;161;0;18;22;287;23", "wc_reply_authors": "1474;88;0;0;0;404;23", "reply_reviewers": "3;1;0;1;1;1;1", "reply_authors": "5;2;1;1;1;2;2", "rating_avg": [ 5.142857142857143, 1.2453996981544782 ], "confidence_avg": [ 3.2857142857142856, 0.880630571852711 ], "soundness_avg": [ 2.5714285714285716, 0.7284313590846836 ], "novelty_avg": [ 2.857142857142857, 0.6388765649999399 ], "presentation_avg": [ 2.5714285714285716, 0.9035079029052513 ], "wc_summary_avg": [ 94.14285714285714, 33.7191279815587 ], "wc_strengths_avg": [ 57.142857142857146, 31.224663198131292 ], "wc_weaknesses_avg": [ 153.57142857142858, 98.87778479785341 ], "wc_questions_avg": [ 89.71428571428571, 119.59540636868039 ], "wc_limitations_avg": [ 47.142857142857146, 45.62401802130281 ], "wc_review_avg": [ 441.7142857142857, 270.0723259138 ], "wc_reply_reviewers_avg": [ 191.42857142857142, 277.836980539132 ], "wc_reply_authors_avg": [ 284.14285714285717, 504.12652282974375 ], "reply_reviewers_avg": [ 1.1428571428571428, 0.8329931278350428 ], "reply_authors_avg": [ 2.0, 1.3093073414159542 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6884987079974276, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11481845565606252776&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "kakaobrain.com;kakaobrain.com;postech.ac.kr;postech.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Kakao Brain;Pohang University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://brain.kakao.com;https://www.postech.ac.kr", "aff_unique_abbr": "Kakao Brain;POSTECH", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Pohang", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "A Randomized Approach to Tight Privacy Accounting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71330", "id": "XrqqPDAsRE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6ae7df1f40f5faeda474b36b61197822-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XrqqPDAsRE", "openreview": "https://openreview.net/forum?id=XrqqPDAsRE", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71330", "video": "https://nips.cc/virtual/2023/poster/71330", "author_site": "Jiachen (Tianhao) Wang, Saeed Mahloujifar, Tong Wu, Ruoxi Jia, Prateek Mittal", "tldr": "", "abstract": "Bounding privacy leakage over compositions, i.e., privacy accounting, is a key challenge in differential privacy (DP). However, the privacy parameter ($\\varepsilon$ or $\\delta$) is often easy to estimate but hard to bound. In this paper, we propose a new differential privacy paradigm called estimate-verify-release (EVR), which tackles the challenges of providing a strict upper bound for the privacy parameter in DP compositions by converting an *estimate* of privacy parameter into a formal guarantee. The EVR paradigm first verifies whether the mechanism meets the *estimated* privacy guarantee, and then releases the query output based on the verification result. The core component of the EVR is privacy verification. We develop a randomized privacy verifier using Monte Carlo (MC) technique. Furthermore, we propose an MC-based DP accountant that outperforms existing DP accounting techniques in terms of accuracy and efficiency. MC-based DP verifier and accountant is applicable to an important and commonly used class of DP algorithms, including the famous DP-SGD. An empirical evaluation shows the proposed EVR paradigm improves the utility-privacy tradeoff for privacy-preserving machine learning.", "keywords": "Differential Privacy; Privacy Accounting", "primary_area": "", "supplementary_material": "", "author": "Jiachen T. Wang;Saeed Mahloujifar;Tong Wu;Ruoxi Jia;Prateek Mittal", "authorids": "~Jiachen_T._Wang1;~Saeed_Mahloujifar1;~Tong_Wu1;~Ruoxi_Jia1;~Prateek_Mittal1", "gender": "M;M;;;M", "homepage": "https://www.cs.virginia.edu/~sm5fd/;https://tongwu2020.github.io/tongwu/;https://ruoxijia.info/;http://www.princeton.edu/~pmittal/;https://tianhaowang.netlify.app/", "dblp": "208/0825;;147/5355-1;;274/2144", "google_scholar": "kW-hl3YAAAAJ;dt0eV8CPx3AC;JCrug-YAAAAJ;https://scholar.google.com.tw/citations?user=xTKD8J4AAAAJ;nvQOtgkAAAAJ", "orcid": ";;;0000-0002-4057-0118;", "linkedin": ";tongwu98/;;;tian-hao-wang/", "or_profile": "~Saeed_Mahloujifar1;~Tong_Wu1;~Ruoxi_Jia1;~Prateek_Mittal1;~Tianhao_Wang2", "aff": "Princeton University;Princeton University;Virginia Tech;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;vt.edu;princeton.edu;princeton.edu", "position": "Postdoc;PhD student;Assistant Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nwang2023a,\ntitle={A Randomized Approach to Tight Privacy Accounting},\nauthor={Jiachen T. Wang and Saeed Mahloujifar and Tong Wu and Ruoxi Jia and Prateek Mittal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XrqqPDAsRE}\n}", "github": "", "project": "", "reviewers": "UAod;eMPH;hkgF;D6i7", "pdf_size": 2763878, "rating": "6;7;7;7", "confidence": "3;3;4;4", "soundness": "3;4;3;4", "novelty": "3;3;3;3", "presentation": "3;2;3;4", "wc_summary": "76;159;79;64", "wc_strengths": "59;68;57;53", "wc_weaknesses": "230;177;47;60", "wc_questions": "4;99;1;11", "wc_limitations": "1;5;1;1", "wc_review": "370;508;185;189", "wc_reply_reviewers": "65;115;0;0", "wc_reply_authors": "23;31;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 94.5, 37.659660115301094 ], "wc_strengths_avg": [ 59.25, 5.494315243958978 ], "wc_weaknesses_avg": [ 128.5, 77.44191371602331 ], "wc_questions_avg": [ 28.75, 40.720848468567056 ], "wc_limitations_avg": [ 2.0, 1.7320508075688772 ], "wc_review_avg": [ 313.0, 135.1240171102088 ], "wc_reply_reviewers_avg": [ 45.0, 48.34769901453429 ], "wc_reply_authors_avg": [ 13.5, 13.793114224133722 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4161793858641847174&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "princeton.edu;princeton.edu;vt.edu;princeton.edu;princeton.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Princeton University;Virginia Tech", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.vt.edu", "aff_unique_abbr": "Princeton;VT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Is This Loss Informative? Faster Text-to-Image Customization by Tracking Objective Dynamics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71329", "id": "Xs6Xwc0Glj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/760e8857c7660fe50bac933161b14f41-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Xs6Xwc0Glj", "openreview": "https://openreview.net/forum?id=Xs6Xwc0Glj", "poster": "/media/PosterPDFs/NeurIPS%202023/71329.png?t=1701697561.349138", "slides": "https://nips.cc/virtual/2023/poster/71329", "video": "https://nips.cc/virtual/2023/poster/71329", "author_site": "Anton Voronov, Mikhail Khoroshikh, Artem Babenko, Max Ryabinin", "tldr": "", "abstract": "Text-to-image generation models represent the next step of evolution in image synthesis, offering a natural way to achieve flexible yet fine-grained control over the result.\nOne emerging area of research is the fast adaptation of large text-to-image models to smaller datasets or new visual concepts.\nHowever, many efficient methods of adaptation have a long training time, which limits their practical applications, slows down experiments, and spends excessive GPU resources.\nIn this work, we study the training dynamics of popular text-to-image personalization methods (such as Textual Inversion or DreamBooth), aiming to speed them up.\nWe observe that most concepts are learned at early stages and do not improve in quality later, but standard training convergence metrics fail to indicate that.\nInstead, we propose a simple drop-in early stopping criterion that only requires computing the regular training objective on a fixed set of inputs for all training iterations.\nOur experiments on Stable Diffusion for 48 different concepts and three personalization methods demonstrate the competitive performance of our approach, which makes adaptation up to 8 times faster with no significant drops in quality.", "keywords": "text-to-image generation;diffusion models;early stopping", "primary_area": "", "supplementary_material": "", "author": "Anton Voronov;Mikhail Khoroshikh;Artem Babenko;Max Ryabinin", "authorids": "~Anton_Voronov1;~Mikhail_Khoroshikh1;~Artem_Babenko1;~Max_Ryabinin1", "gender": ";M;M;Not Specified", "homepage": ";;;https://mryab.github.io/", "dblp": "290/3025;;117/4834;276/0192", "google_scholar": "GdnApR0AAAAJ;;q885d1wAAAAJ;930PERsAAAAJ", "orcid": ";;0000-0002-1830-8252;", "linkedin": ";michellemoorre/;;", "or_profile": "~Anton_Voronov1;~Mikhail_Khoroshikh1;~Artem_Babenko1;~Max_Ryabinin1", "aff": "Moscow Institute of Physics and Technology;Higher School of Economics, Higher School of Economics;Yandex;Yandex", "aff_domain": "phystech.edu;edu.hse.ru;yandex-team.ru;yandex-team.ru", "position": "PhD student;Undergrad student;Researcher;Research Scientist", "bibtex": "@inproceedings{\nvoronov2023is,\ntitle={Is This Loss Informative? Faster Text-to-Image Customization by Tracking Objective Dynamics},\nauthor={Anton Voronov and Mikhail Khoroshikh and Artem Babenko and Max Ryabinin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Xs6Xwc0Glj}\n}", "github": "", "project": "", "reviewers": "5cki;Gk9z;rpQb;DXLz", "pdf_size": 10763169, "rating": "4;5;6;7", "confidence": "5;4;3;5", "soundness": "3;2;3;4", "novelty": "2;2;3;3", "presentation": "2;2;2;3", "wc_summary": "109;64;82;90", "wc_strengths": "66;59;23;100", "wc_weaknesses": "218;144;107;56", "wc_questions": "6;90;9;49", "wc_limitations": "5;51;9;1", "wc_review": "404;408;230;296", "wc_reply_reviewers": "115;21;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.25, 16.161296358893985 ], "wc_strengths_avg": [ 62.0, 27.340446228984632 ], "wc_weaknesses_avg": [ 131.25, 59.03124172842716 ], "wc_questions_avg": [ 38.5, 34.23813663153998 ], "wc_limitations_avg": [ 16.5, 20.11839953873071 ], "wc_review_avg": [ 334.5, 75.224663508719 ], "wc_reply_reviewers_avg": [ 34.0, 47.544715794712666 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.1348399724926484, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17116955588713554591&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "phystech.edu;edu.hse.ru;yandex-team.ru;yandex-team.ru", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Moscow Institute of Physics and Technology;Higher School of Economics;Yandex", "aff_unique_dep": ";;", "aff_unique_url": "https://www.mipt.ru/en;https://www.hse.ru;https://yandex.com", "aff_unique_abbr": "MIPT;HSE;Yandex", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Russian Federation" }, { "id": "XsZ5YebcCz", "title": "Mildly Constrained Evaluation Policy for Offline Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Offline reinforcement learning (RL) methodologies enforce constraints on the policy to adhere closely to the behavior policy, thereby stabilizing value learning and mitigating the selection of out-of-distribution (OOD) actions during test time. Conventional approaches apply identical constraints for both value learning and test time inference. However, our findings indicate that the constraints suitable for value estimation may in fact be excessively restrictive for action selection during test time. To address this issue, we propose a Mildly Constrained Evaluation Policy (MCEP) for test time inference with a more constrained target policy for value estimation. Since the target policy has been adopted in various prior approaches, MCEP can be seamlessly integrated with them as a plug-in. We instantiate MCEP based on TD3-BC [Fujimoto and Gu, 2021] and AWAC [Nair et al., 2020] algorithms. The empirical results on MuJoCo locomotion tasks show that the MCEP significantly outperforms the target policy and achieves competitive results to state-of-the-art offline RL methods. The codes are open-sourced at link.", "keywords": "Offline Reinforcement Learning;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/a41b903ecae75d22838b431e127ee540f05d4376.zip", "author": "Linjie Xu;zhengyao jiang;Jinyu Wang;Lei Song;Jiang Bian", "authorids": "linjie.xu@qmul.ac.uk;~zhengyao_jiang2;~Jinyu_Wang1;~Lei_Song3;~Jiang_Bian1", "gender": ";M;;M;M", "homepage": ";https://zhengyaojiang.github.io/;;;https://sites.google.com/view/jiangbian", "dblp": ";;;76/893-1.html;09/851-2.html", "google_scholar": ";https://scholar.google.co.jp/citations?user=J8pFrgwAAAAJ;LvgWSg0AAAAJ;pXDSOocAAAAJ;pZBEnY8AAAAJ", "orcid": ";;;;0000-0002-9472-600X", "linkedin": ";;jinyuwang5134/;;jbian/", "or_profile": "linjie.xu@qmul.ac.uk;~zhengyao_jiang2;~Jinyu_Wang1;~Lei_Song3;~Jiang_Bian1", "aff": ";University College London;Microsoft;Microsoft;Microsoft", "aff_domain": ";ucl.ac.uk;microsoft.com;microsoft.com;microsoft.com", "position": ";PhD student;Researcher;Principal Researcher;Partner Research Manager", "bibtex": "@misc{\nxu2023mildly,\ntitle={Mildly Constrained Evaluation Policy for Offline Reinforcement Learning},\nauthor={Linjie Xu and zhengyao jiang and Jinyu Wang and Lei Song and Jiang Bian},\nyear={2023},\nurl={https://openreview.net/forum?id=XsZ5YebcCz}\n}", "github": "", "project": "", "reviewers": "FqHz;nJEQ;sDxq;wQCk", "site": "https://openreview.net/forum?id=XsZ5YebcCz", "pdf_size": 3289452, "rating": "3;6;6;6", "confidence": "4;4;3;5", "soundness": "2;2;4;3", "novelty": "1;3;3;2", "presentation": "2;2;3;4", "wc_summary": "52;59;68;92", "wc_strengths": "16;68;128;63", "wc_weaknesses": "123;190;39;63", "wc_questions": "6;80;19;48", "wc_limitations": "13;34;1;4", "wc_review": "210;431;255;270", "wc_reply_reviewers": "0;142;28;67", "wc_reply_authors": "0;246;0;0", "reply_reviewers": "0;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 67.75, 15.105876340020794 ], "wc_strengths_avg": [ 68.75, 39.77043499887825 ], "wc_weaknesses_avg": [ 103.75, 58.443883341201754 ], "wc_questions_avg": [ 38.25, 28.49890348767826 ], "wc_limitations_avg": [ 13.0, 12.90348790056394 ], "wc_review_avg": [ 291.5, 83.51197518919068 ], "wc_reply_reviewers_avg": [ 59.25, 53.3730971557769 ], "wc_reply_authors_avg": [ 61.5, 106.52112466548596 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=93229258655421342&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University College London;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.ucl.ac.uk;https://www.microsoft.com", "aff_unique_abbr": "UCL;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "LayoutGPT: Compositional Visual Planning and Generation with Large Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71328", "id": "Xu8aG5Q8M3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3a7f9e485845dac27423375c934cb4db-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Xu8aG5Q8M3", "openreview": "https://openreview.net/forum?id=Xu8aG5Q8M3", "poster": "/media/PosterPDFs/NeurIPS%202023/71328.png?t=1701993789.4391255", "slides": "https://nips.cc/virtual/2023/poster/71328", "video": "https://nips.cc/virtual/2023/poster/71328", "author_site": "Weixi Feng, Wanrong Zhu, Tsu-Jui Fu, Varun Jampani, Arjun Akula, Xuehai He, S Basu, Xin Eric Wang, William Yang Wang", "tldr": "", "abstract": "Attaining a high degree of user controllability in visual generation often requires intricate, fine-grained inputs like layouts. However, such inputs impose a substantial burden on users when compared to simple text inputs. To address the issue, we study how Large Language Models (LLMs) can serve as visual planners by generating layouts from text conditions, and thus collaborate with visual generative models. We propose LayoutGPT, a method to compose in-context visual demonstrations in style sheet language to enhance visual planning skills of LLMs. We show that LayoutGPT can generate plausible layouts in multiple domains, ranging from 2D images to 3D indoor scenes. LayoutGPT also shows superior performance in converting challenging language concepts like numerical and spatial relations to layout arrangements for faithful text-to-image generation. When combined with a downstream image generation model, LayoutGPT outperforms text-to-image models/systems by 20-40\\% and achieves comparable performance as human users in designing visual layouts for numerical and spatial correctness. Lastly, LayoutGPT achieves comparable performance to supervised methods in 3D indoor scene synthesis, demonstrating its effectiveness and potential in multiple visual domains.", "keywords": "Large Language Models;Compositional Image Generation;3D scene synthesis", "primary_area": "", "supplementary_material": "/attachment/28ab94bdbb2bce5fa015c42560daa94b2e33b0d7.pdf", "author": "Weixi Feng;Wanrong Zhu;Tsu-Jui Fu;Varun Jampani;Arjun Reddy Akula;Xuehai He;S Basu;Xin Eric Wang;William Yang Wang", "authorids": "~Weixi_Feng2;~Wanrong_Zhu1;~Tsu-Jui_Fu2;~Varun_Jampani2;~Arjun_Reddy_Akula1;~Xuehai_He1;~S_Basu1;~Xin_Eric_Wang2;~William_Yang_Wang2", "gender": "M;;M;;M;M;M;M;", "homepage": "https://weixi-feng.github.io/;;https://tsujuifu.github.io;;https://research.google/people/ArjunReddyAkula/;;http://sugatobasu.com/;https://eric-xw.github.io;", "dblp": "322/1026;;218/5366.html;;152/3930;251/0763;76/5024;10/5630-61;", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com.tw/citations?user=7QRDcC0AAAAJ;;CNKX9bgAAAAJ;kDzxOzUAAAAJ;https://scholar.google.com/citations?hl=en;YjqluE0AAAAJ;", "orcid": "0000-0002-7201-5688;;;;;;;0000-0003-2605-5504;", "linkedin": "weixifeng/;;tsujuifu1996;;arjun-akula-1b769939;;;;", "or_profile": "~Weixi_Feng2;~Wanrong_Zhu1;~Tsu-Jui_Fu2;~Varun_Jampani2;~Arjun_Reddy_Akula1;~Xuehai_He1;~S_Basu1;~Xin_Eric_Wang2;~William_Yang_Wang2", "aff": "University of California, Santa Barbara;;UC Santa Barbara;;Google Research;University of California Santa Curz;Google;University of California, Santa Cruz;", "aff_domain": "ucsb.edu;;ucsb.edu;;google.com;ucsc.edu;google.com;ucsc.edu;", "position": "PhD student;;PhD student;;Research Scientist;PhD student;Researcher;Assistant Professor;", "bibtex": "@inproceedings{\nfeng2023layoutgpt,\ntitle={Layout{GPT}: Compositional Visual Planning and Generation with Large Language Models},\nauthor={Weixi Feng and Wanrong Zhu and Tsu-Jui Fu and Varun Jampani and Arjun Reddy Akula and Xuehai He and S Basu and Xin Eric Wang and William Yang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Xu8aG5Q8M3}\n}", "github": "", "project": "", "reviewers": "uKyZ;HVHZ;1i69;ykMz", "pdf_size": 18308129, "rating": "5;5;6;6", "confidence": "3;4;5;5", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "137;54;358;106", "wc_strengths": "118;44;203;51", "wc_weaknesses": "526;219;286;155", "wc_questions": "69;54;440;9", "wc_limitations": "9;21;29;9", "wc_review": "859;392;1316;330", "wc_reply_reviewers": "155;67;93;21", "wc_reply_authors": "434;182;164;36", "reply_reviewers": "2;1;1;1", "reply_authors": "2;3;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 163.75, 116.00511842155932 ], "wc_strengths_avg": [ 104.0, 64.04295433535215 ], "wc_weaknesses_avg": [ 296.5, 140.36470354045565 ], "wc_questions_avg": [ 143.0, 172.88869251631235 ], "wc_limitations_avg": [ 17.0, 8.48528137423857 ], "wc_review_avg": [ 724.25, 398.16728582343376 ], "wc_reply_reviewers_avg": [ 84.0, 48.425200051213004 ], "wc_reply_authors_avg": [ 204.0, 144.22898460434365 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 235, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9871924343534708528&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ucsb.edu;;ucsb.edu;;google.com;ucsc.edu;google.com;ucsc.edu;", "author_num": 9, "aff_unique_index": "0;0;1;2;1;2", "aff_unique_norm": "University of California, Santa Barbara;Google;University of California, Santa Cruz", "aff_unique_dep": ";Google Research;", "aff_unique_url": "https://www.ucsb.edu;https://research.google;https://www.ucsc.edu", "aff_unique_abbr": "UCSB;Google Research;UCSC", "aff_campus_unique_index": "0;0;1;2;1;2", "aff_campus_unique": "Santa Barbara;Mountain View;Santa Cruz", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Self-supervised Graph Neural Networks via Low-Rank Decomposition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71327", "id": "XvGQ6F3sG8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6c33e4ea4ddfb05a78541022ab5a1fb9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XvGQ6F3sG8", "openreview": "https://openreview.net/forum?id=XvGQ6F3sG8", "poster": "/media/PosterPDFs/NeurIPS%202023/71327.png?t=1701507373.9841354", "slides": "https://nips.cc/virtual/2023/poster/71327", "video": "https://nips.cc/virtual/2023/poster/71327", "author_site": "Liang Yang, Runjie Shi, Qiuliang Zhang, bingxin niu, Zhen Wang, Xiaochun Cao, Chuan Wang", "tldr": "", "abstract": "Self-supervised learning is introduced to train graph neural networks (GNNs) by employing propagation-based GNNs designed for semi-supervised learning tasks. Unfortunately, this common choice tends to cause two serious issues. Firstly, global parameters cause the model lack the ability to capture the local property. Secondly, it is difficult to handle networks beyond homophily without label information.\nThis paper tends to break through the common choice of employing propagation-based GNNs, which aggregate representations of nodes belonging to different classes and tend to lose discriminative information. If the propagation in each ego-network is just between the nodes from the same class, the obtained representation matrix should follow the low-rank characteristic. To meet this requirement, this paper proposes the Low-Rank Decomposition-based GNNs (LRD-GNN-Matrix) by employing Low-Rank Decomposition to the attribute matrix. \nFurthermore, to incorporate long-distance information, Low-Rank Tensor Decomposition-based GNN (LRD-GNN-Tensor) is proposed by constructing the node attribute tensor from selected similar ego-networks and performing Low-Rank Tensor Decomposition. The employed tensor nuclear norm facilitates the capture of the long-distance relationship between original and selected similar ego-networks. Extensive experiments demonstrate the superior performance and the robustness of LRD-GNNs.", "keywords": "Graph neural network;Self-supervised learning;Low-Rank recovery", "primary_area": "", "supplementary_material": "/attachment/84887b718c9a1d82de9dea79d632f5fe03381bd9.zip", "author": "Liang Yang;Runjie Shi;Qiuliang Zhang;Bingxin Niu;Zhen Wang;Xiaochun Cao;Chuan Wang", "authorids": "~Liang_Yang2;~Runjie_Shi1;~Qiuliang_Zhang1;~Bingxin_Niu2;~Zhen_Wang11;~Xiaochun_Cao3;~Chuan_Wang1", "gender": "M;M;M;M;M;F;M", "homepage": "http://yangliang.github.io/;https://github.com/SR-Jegger;https://github.com/Zhangqiuliang/zhangqiuliang.github.io;http://iopen.nwpu.edu.cn/info/1015/1351.htm?ivk_sa=1024320u;https://scst.sysu.edu.cn/members/caoxiaochun.htm;https://chuanwang-cv.github.io/;", "dblp": "05/3933-2;;https://dblp.uni-trier.de/pid/265/9671;;39/3695;68/363-2;169/1022.html", "google_scholar": "7agkJogAAAAJ;;;https://scholar.google.co.uk/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;;", "orcid": ";;;;0000-0001-7141-708X;;", "linkedin": ";;;;;;", "or_profile": "~Liang_Yang2;~Runjie_Shi1;~Qiuliang_Zhang1;~Zhen_Wang11;~Xiaochun_Cao3;~Chuan_Wang1;~bingxin_niu1", "aff": "Hebei University of Technology;Hebei University of Technology;Hebei University of Technology;Northwestern Polytechnical University;SUN YAT-SEN UNIVERSITY;Institute of Information Engineering, Chinese Academy of Sciences;Hebei University of Techonology", "aff_domain": "hebut.edu.cn;hebut.edu.cn;hebut.edu.cn;nwpu.edu.cn;sysu.edu.cn;iie.ac.cn;hebut.edu.cn", "position": "Full Professor;MS student;MS student;Full Professor;Full Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nyang2023selfsupervised,\ntitle={Self-supervised Graph Neural Networks via Low-Rank Decomposition},\nauthor={Liang Yang and Runjie Shi and Qiuliang Zhang and Bingxin Niu and Zhen Wang and Xiaochun Cao and Chuan Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XvGQ6F3sG8}\n}", "github": "", "project": "", "reviewers": "Efzd;KVoZ;EJvt;7YeZ", "pdf_size": 1581092, "rating": "6;7;7;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;4;3", "presentation": "3;3;3;2", "wc_summary": "87;99;76;82", "wc_strengths": "92;76;97;124", "wc_weaknesses": "50;46;156;104", "wc_questions": "30;2;3;126", "wc_limitations": "22;1;1;1", "wc_review": "281;224;333;437", "wc_reply_reviewers": "31;0;23;0", "wc_reply_authors": "109;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 86.0, 8.455767262643882 ], "wc_strengths_avg": [ 97.25, 17.282577932704367 ], "wc_weaknesses_avg": [ 89.0, 44.955533585978046 ], "wc_questions_avg": [ 40.25, 50.76600732773851 ], "wc_limitations_avg": [ 6.25, 9.093266739736606 ], "wc_review_avg": [ 318.75, 78.40400181113206 ], "wc_reply_reviewers_avg": [ 13.5, 13.793114224133722 ], "wc_reply_authors_avg": [ 27.25, 47.198384506251905 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9935952834952603883&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "hebut.edu.cn;hebut.edu.cn;hebut.edu.cn;nwpu.edu.cn;sysu.edu.cn;iie.ac.cn;hebut.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;2;3;0", "aff_unique_norm": "Hebei University of Technology;Northwestern Polytechnical University;Sun Yat-sen University;Chinese Academy of Sciences", "aff_unique_dep": ";;;Institute of Information Engineering", "aff_unique_url": "http://www.hbut.edu.cn;https://www.nwpu.edu.cn;http://www.sysu.edu.cn;http://www.cas.cn", "aff_unique_abbr": "HUT;NWPU;SYSU;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Non-Rigid Shape Registration via Deep Functional Maps Prior", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71326", "id": "XvfEYqEbIb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b654d6150630a5ba5df7a55621390daf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XvfEYqEbIb", "openreview": "https://openreview.net/forum?id=XvfEYqEbIb", "poster": "/media/PosterPDFs/NeurIPS%202023/71326.png?t=1701840312.5103495", "slides": "https://nips.cc/virtual/2023/poster/71326", "video": "https://nips.cc/virtual/2023/poster/71326", "author_site": "Puhua Jiang, Mingze Sun, Ruqi Huang", "tldr": "", "abstract": "In this paper, we propose a learning-based framework for non-rigid shape registra- tion without correspondence supervision. Traditional shape registration techniques typically rely on correspondences induced by extrinsic proximity, therefore can fail in the presence of large intrinsic deformations. Spectral mapping methods overcome this challenge by embedding shapes into, geometric or learned, high- dimensional spaces, where shapes are easier to align. However, due to the dependency on abstract, non-linear embedding schemes, the latter can be vulnerable with respect to perturbed or alien input. In light of this, our framework takes the best of both worlds. Namely, we deform source mesh towards the target point cloud, guided by correspondences induced by high-dimensional embeddings learned from deep functional maps (DFM). In particular, the correspondences are dynamically updated according to the intermediate registrations and filtered by consistency prior, which prominently robustify the overall pipeline. Moreover, in order to alleviate the requirement of extrinsically aligned input, we train an orientation regressor on a set of aligned synthetic shapes independent of the training shapes for DFM. Empirical results show that, with as few as dozens of training shapes of limited variability, our pipeline achieves state-of-the-art results on several benchmarks of non-rigid point cloud matching, but also delivers high-quality correspondences between unseen challenging shape pairs that undergo both significant extrinsic and intrinsic defor- mations, in which case neither traditional registration methods nor intrinsic methods work. The code is available at https://github.com/rqhuang88/DFR.", "keywords": "shape registration; functional maps; unsupervised learning", "primary_area": "", "supplementary_material": "/attachment/53d5ec17db14a793985341a5a5dc22ae68fa499a.zip", "author": "Puhua Jiang;Mingze Sun;Ruqi Huang", "authorids": "~Puhua_Jiang1;~Mingze_Sun1;~Ruqi_Huang1", "gender": "M;M;M", "homepage": ";https://github.com/mingzesun0514;https://rqhuang88.github.io", "dblp": ";;161/7979.html", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?view_op=list_works", "orcid": ";0000-0002-0990-2161;", "linkedin": "puhua-jiang-4b4442192/;;", "or_profile": "~Puhua_Jiang1;~Mingze_Sun1;~Ruqi_Huang1", "aff": "Tsinghua University;Tsinghua University;Tsinghua Shenzhen International Graduate School/Tsinghua Berkeley Shenzhen Institute ", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\njiang2023nonrigid,\ntitle={Non-Rigid Shape Registration via Deep Functional Maps Prior},\nauthor={Puhua Jiang and Mingze Sun and Ruqi Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XvfEYqEbIb}\n}", "github": "", "project": "", "reviewers": "xFDL;iB8H;GUi9;CNuX;1VPz", "pdf_size": 9877031, "rating": "4;4;5;5;7", "confidence": "3;5;4;5;3", "soundness": "2;2;3;3;3", "novelty": "2;2;3;2;3", "presentation": "2;2;4;4;2", "wc_summary": "51;242;498;68;49", "wc_strengths": "16;35;85;89;17", "wc_weaknesses": "100;521;335;112;226", "wc_questions": "121;27;203;57;46", "wc_limitations": "8;1;7;42;91", "wc_review": "296;826;1128;368;429", "wc_reply_reviewers": "80;1060;162;13;3", "wc_reply_authors": "203;1077;60;0;12", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;3;2;1;2", "rating_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 181.6, 173.95470674862466 ], "wc_strengths_avg": [ 48.4, 32.25895224584952 ], "wc_weaknesses_avg": [ 258.8, 156.4600907579949 ], "wc_questions_avg": [ 90.8, 64.375150485261 ], "wc_limitations_avg": [ 29.8, 33.819520990102745 ], "wc_review_avg": [ 609.4, 317.7732524930316 ], "wc_reply_reviewers_avg": [ 263.6, 402.2430111263588 ], "wc_reply_authors_avg": [ 270.4, 409.7075054230762 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4082482904638631, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17236547401428752273&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "ReSync: Riemannian Subgradient-based Robust Rotation Synchronization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71325", "id": "Xxllzjt6T5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/10e9204f14c4daa08041343455435308-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Xxllzjt6T5", "openreview": "https://openreview.net/forum?id=Xxllzjt6T5", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71325", "video": "https://nips.cc/virtual/2023/poster/71325", "author_site": "Huikang Liu, Xiao Li, Anthony Man-Cho So", "tldr": "", "abstract": "This work presents ReSync, a Riemannian subgradient-based algorithm for solving the robust rotation synchronization problem, which arises in various engineering applications. ReSync solves a least-unsquared minimization formulation over the rotation group, which is nonsmooth and nonconvex, and aims at recovering the underlying rotations directly. We provide strong theoretical guarantees for ReSync under the random corruption setting. Specifically, we first show that the initialization procedure of ReSync yields a proper initial point that lies in a local region around the ground-truth rotations. We next establish the weak sharpness property of the aforementioned formulation and then utilize this property to derive the local linear convergence of ReSync to the ground-truth rotations. By combining these guarantees, we conclude that ReSync converges linearly to the ground-truth rotations under appropriate conditions. Experiment results demonstrate the effectiveness of ReSync.", "keywords": "Manifold optimization;Riemannian subgradient method;rotation synchronization", "primary_area": "", "supplementary_material": "", "author": "Huikang Liu;Xiao Li;Anthony Man-Cho So", "authorids": "~Huikang_Liu2;~Xiao_Li5;~Anthony_Man-Cho_So1", "gender": "M;M;M", "homepage": "https://huikang2019.github.io;https://www.xiao-li.org/;http://www1.se.cuhk.edu.hk/~manchoso/", "dblp": "62/8489;66/2069-9;82/3202", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-TW;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.hk/citations?user=whi3UisAAAAJ", "orcid": ";0000-0001-5577-6963;0000-0003-2588-7851", "linkedin": ";;", "or_profile": "~Huikang_Liu2;~Xiao_Li5;~Anthony_Man-Cho_So1", "aff": "Shanghai University of Finance and Economics;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong", "aff_domain": "sufe.edu;cuhk.edu.cn;cuhk.edu.hk", "position": "Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023resync,\ntitle={ReSync: Riemannian Subgradient-based Robust Rotation Synchronization},\nauthor={Huikang Liu and Xiao Li and Anthony Man-Cho So},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Xxllzjt6T5}\n}", "github": "", "project": "", "reviewers": "m1vD;aU9r;grLa;W8A3", "pdf_size": 927173, "rating": "6;7;7;7", "confidence": "4;3;3;4", "soundness": "1;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "179;51;95;62", "wc_strengths": "78;37;28;84", "wc_weaknesses": "450;34;41;38", "wc_questions": "6;2;2;31", "wc_limitations": "2;1;1;1", "wc_review": "715;125;167;216", "wc_reply_reviewers": "99;9;9;40", "wc_reply_authors": "0;31;31;16", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 96.75, 50.1715806009737 ], "wc_strengths_avg": [ 56.75, 24.5496944991175 ], "wc_weaknesses_avg": [ 140.75, 178.56283907913203 ], "wc_questions_avg": [ 10.25, 12.090802289343747 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 305.75, 238.46527524987783 ], "wc_reply_reviewers_avg": [ 39.25, 36.74489760497367 ], "wc_reply_authors_avg": [ 19.5, 12.816005617976296 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10314067636528253105&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "sufe.edu;cuhk.edu.cn;cuhk.edu.hk", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Shanghai University of Finance and Economics;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.sufe.edu.cn;https://www.cuhk.edu.cn", "aff_unique_abbr": "SUFE;CUHK", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Shenzhen;Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "CAP: Correlation-Aware Pruning for Highly-Accurate Sparse Vision Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71324", "id": "Xy7DoWSNZX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5bd9fbb3a5a985f80c16ddd0ec1dfc43-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Xy7DoWSNZX", "openreview": "https://openreview.net/forum?id=Xy7DoWSNZX", "poster": "/media/PosterPDFs/NeurIPS%202023/71324.png?t=1701386574.5731444", "slides": "https://nips.cc/virtual/2023/poster/71324", "video": "https://nips.cc/virtual/2023/poster/71324", "author_site": "Denis Kuznedelev, Eldar Kurti\u0107, Elias Frantar, Dan Alistarh, Dan Alistarh", "tldr": "", "abstract": "Driven by significant improvements in architectural design and training pipelines, computer vision\nhas recently experienced dramatic progress in terms of accuracy on classic benchmarks such as ImageNet. \nThese highly-accurate models are challenging to deploy, as they appear harder to compress using standard techniques such as pruning. \nWe address this issue by introducing the Correlation Aware Pruner (CAP), \na new unstructured pruning framework which significantly pushes the compressibility limits for state-of-the-art architectures.\nOur method is based on two technical advancements: a new theoretically-justified pruner, which can handle complex weight correlations accurately and efficiently during the pruning process itself, and an efficient finetuning procedure for post-compression recovery. \nWe validate our approach via extensive experiments on several modern vision models such as Vision Transformers (ViT), \nmodern CNNs, and ViT-CNN hybrids, showing for the first time that these can be \npruned to high sparsity levels (e.g. $\\geq 75$%) with low impact on accuracy ($\\leq 1$% relative drop). \nOur approach is also compatible with structured pruning and quantization, and can lead to practical speedups of 1.5 to 2.4x without accuracy loss. To further showcase CAP's accuracy and scalability, we use it to show for the first time that extremely-accurate large vision models, trained via self-supervised techniques, can also be pruned to moderate sparsities, with negligible accuracy loss.", "keywords": "neural network pruning;vision transformer;sparsity;model compression", "primary_area": "", "supplementary_material": "/attachment/99fd5de58a66cae135d331f8e56f216ff55e668a.zip", "author": "Denis Kuznedelev;Eldar Kurtic;Elias Frantar;Dan Alistarh", "authorids": "~Denis_Kuznedelev1;~Eldar_Kurtic1;~Elias_Frantar1;~Dan_Alistarh7", "gender": "M;M;M;M", "homepage": "https://github.com/Godofnothing;;;http://people.csail.mit.edu/alistarh/", "dblp": "322/8616;297/3713;259/2210;36/3251.html", "google_scholar": ";https://scholar.google.com/citations?hl=en;hjdlwz8AAAAJ;https://scholar.google.com.tw/citations?user=75q-6ZQAAAAJ", "orcid": "0009-0005-2420-9620;;;", "linkedin": ";eldar-kurti%C4%87-77963b160/;elias-frantar-5b43181a4;", "or_profile": "~Denis_Kuznedelev1;~Eldar_Kurtic1;~Elias_Frantar1;~Dan_Alistarh1", "aff": ";Institute of Science and Technology Austria;Google Brain;Institute of Science and Technology", "aff_domain": ";ist.ac.at;google.com;ist.ac.at", "position": ";Researcher;Intern;Full Professor", "bibtex": "@inproceedings{\nkuznedelev2023cap,\ntitle={{CAP}: Correlation-Aware Pruning for Highly-Accurate Sparse Vision Models},\nauthor={Denis Kuznedelev and Eldar Kurtic and Elias Frantar and Dan Alistarh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Xy7DoWSNZX}\n}", "github": "", "project": "", "reviewers": "kMpj;gPY6;wReA;JVcP", "pdf_size": 4612497, "rating": "3;5;5;7", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;4;4", "wc_summary": "95;30;130;75", "wc_strengths": "126;91;21;67", "wc_weaknesses": "319;65;51;58", "wc_questions": "337;3;2;2", "wc_limitations": "61;7;12;9", "wc_review": "938;196;216;211", "wc_reply_reviewers": "142;0;16;20", "wc_reply_authors": "486;104;32;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 82.5, 36.1420807370024 ], "wc_strengths_avg": [ 76.25, 38.17967391165095 ], "wc_weaknesses_avg": [ 123.25, 113.12465469560559 ], "wc_questions_avg": [ 86.0, 144.91549261552404 ], "wc_limitations_avg": [ 22.25, 22.442983313276336 ], "wc_review_avg": [ 390.25, 316.3292390848497 ], "wc_reply_reviewers_avg": [ 44.5, 56.78688228807776 ], "wc_reply_authors_avg": [ 155.5, 194.49614392064436 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1942071750463065040&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";ist.ac.at;google.com;ist.ac.at", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Institute of Science and Technology Austria;Google;Institute of Science and Technology", "aff_unique_dep": ";Google Brain;", "aff_unique_url": "https://www.ist.ac.at;https://brain.google.com;", "aff_unique_abbr": "IST Austria;Google Brain;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1", "aff_country_unique": "Austria;United States;" }, { "title": "An Empirical Study Towards Prompt-Tuning for Graph Contrastive Pre-Training in Recommendations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71323", "id": "XyAP8ScqLV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c6af791af7ef0f3e02bccef011211ca5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XyAP8ScqLV", "openreview": "https://openreview.net/forum?id=XyAP8ScqLV", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71323", "video": "https://nips.cc/virtual/2023/poster/71323", "author_site": "Haoran Yang, Xiangyu Zhao, Yicong Li, Hongxu Chen, Guandong Xu", "tldr": "", "abstract": "Graph contrastive learning (GCL) has emerged as a potent technology for numerous graph learning tasks. It has been successfully applied to real-world recommender systems, where the contrastive loss and the downstream recommendation objectives are always combined to form the overall objective function. Such a strategy is inconsistent with the original GCL paradigm, where graph embeddings are pre-trained without involving downstream training objectives. In this paper, we innovatively propose a prompt-enhanced framework for GCL-based recommender systems, namely CPTPP, which can fully leverage the advantages of the original GCL protocol through prompt tuning. Specifically, we first summarise user profiles in graph recommender systems to automatically generate personalized user prompts. These prompts will then be combined with pre-trained user embeddings to conduct prompt-tuning in downstream tasks, thereby narrowing the distinct targets between pre-training and downstream tasks. Extensive experiments on three benchmark datasets validate the effectiveness of CPTPP against state-of-the-art baselines. A further visualization experiment demonstrates that user embeddings generated by CPTPP have a more uniform distribution, indicating a better capacity to model the diversity of user preferences.\nThe implementation code is available online to ease reproducibility: https://anonymous.4open.science/r/CPTPP-F8F4", "keywords": "graph contrastive learning;prompt tuning;recommendation system", "primary_area": "", "supplementary_material": "/attachment/9fc50879b599c33e36a979d77a8441c9c3634ad0.pdf", "author": "Haoran Yang;Xiangyu Zhao;Yicong Li;Hongxu Chen;Guandong Xu", "authorids": "~Haoran_Yang4;~Xiangyu_Zhao1;~Yicong_Li3;~Hongxu_Chen1;~Guandong_Xu2", "gender": "M;M;Not Specified;M;M", "homepage": ";https://zhaoxyai.github.io/;https://yicongli.mysxl.cn/;https://sites.google.com/view/hxchen;https://profiles.uts.edu.au/Guandong.Xu", "dblp": ";08/890-1.html;246/8802-1;147/5824-2.html;https://dblp.uni-trier.de/pid/59/2340.html", "google_scholar": "LlxML5cAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.au/citations?user=W3CtDGQAAAAJ;https://scholar.google.com.au/citations?user=kcrdCq4AAAAJ", "orcid": ";0000-0003-2926-4416;0000-0001-7905-4885;0000-0001-7963-8813;0000-0003-4493-6663", "linkedin": ";;;hongxu-chen-phd-17b611120/?trk=public_profile_browsemap&originalSubdomain=au;guandong-xu-7a560325/", "or_profile": "~Haoran_Yang4;~Xiangyu_Zhao1;~Yicong_Li3;~Hongxu_Chen1;~Guandong_Xu2", "aff": "The Hong Kong Polytechnic University;City University of Hong Kong;;Commonwealth Bank of Australia;University of Technology Sydney", "aff_domain": "polyu.edu.hk;cityu.edu.hk;;cba.com.au;uts.edu.au", "position": "PhD student;Assistant Professor;;Data Scientist;Full Professor", "bibtex": "@inproceedings{\nyang2023an,\ntitle={An Empirical Study Towards Prompt-Tuning for Graph Contrastive Pre-Training in Recommendations},\nauthor={Haoran Yang and Xiangyu Zhao and Yicong Li and Hongxu Chen and Guandong Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XyAP8ScqLV}\n}", "github": "", "project": "", "reviewers": "NDuk;UMuQ;PJh3;1hDm", "pdf_size": 9827002, "rating": "4;4;5;7", "confidence": "3;4;4;4", "soundness": "2;2;3;4", "novelty": "2;2;3;4", "presentation": "2;3;3;3", "wc_summary": "66;35;77;201", "wc_strengths": "35;40;87;323", "wc_weaknesses": "66;71;152;71", "wc_questions": "42;3;173;63", "wc_limitations": "34;1;1;24", "wc_review": "243;150;490;682", "wc_reply_reviewers": "42;0;39;42", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.75, 63.24703550365029 ], "wc_strengths_avg": [ 121.25, 118.23361408668856 ], "wc_weaknesses_avg": [ 90.0, 35.85387008399512 ], "wc_questions_avg": [ 70.25, 63.10853745730446 ], "wc_limitations_avg": [ 15.0, 14.439529078193651 ], "wc_review_avg": [ 391.25, 208.84608567076376 ], "wc_reply_reviewers_avg": [ 30.75, 17.795715776557007 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9646754478143662356&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 9, "email": "polyu.edu.hk;cityu.edu.hk;;cba.com.au;uts.edu.au", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Hong Kong Polytechnic University;City University of Hong Kong;Commonwealth Bank of Australia;University of Technology Sydney", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.polyu.edu.hk;https://www.cityu.edu.hk;https://www.commbank.com.au;https://www.uts.edu.au", "aff_unique_abbr": "PolyU;CityU;CBA;UTS", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "China;Australia" }, { "title": "Look Ma, No Hands! Agent-Environment Factorization of Egocentric Videos", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71322", "id": "Xyj46OxEhK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/437cd2749391ad40f67e4dd1d87c4596-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Xyj46OxEhK", "openreview": "https://openreview.net/forum?id=Xyj46OxEhK", "poster": "/media/PosterPDFs/NeurIPS%202023/71322.png?t=1700086682.0124414", "slides": "https://nips.cc/virtual/2023/poster/71322", "video": "https://nips.cc/virtual/2023/poster/71322", "author_site": "Matthew Chang, Aditya Prakash, Saurabh Gupta", "tldr": "", "abstract": "The analysis and use of egocentric videos for robotics tasks is made challenging by occlusion and the visual mismatch between the human hand and a robot end-effector. Past work views the human hand as a nuisance and removes it from the scene. However, the hand also provides a valuable signal for learning. In this work, we propose to extract a factored representation of the scene that separates the agent (human hand) and the environment. This alleviates both occlusion and mismatch while preserving the signal, thereby easing the design of models for downstream robotics tasks. At the heart of this factorization is our proposed Video Inpainting via Diffusion Model (VIDM) that leverages both a prior on real-world images (through a large-scale pre-trained diffusion model) and the appearance of the object in earlier frames of the video (through attention). Our experiments demonstrate the effectiveness of VIDM at improving the in-painting quality in egocentric videos and the power of our factored representation for numerous tasks: object detection, 3D reconstruction of manipulated objects, and learning of reward functions, policies, and affordances from videos.", "keywords": "Inpainting;Diffusion;Robot Learning;Egocentric Vision", "primary_area": "", "supplementary_material": "/attachment/b17795477889f2674fbdafb3d2e67e3163669279.zip", "author": "Matthew Chang;Aditya Prakash;Saurabh Gupta", "authorids": "~Matthew_Chang1;~Aditya_Prakash1;~Saurabh_Gupta1", "gender": "M;M;", "homepage": "https://matthewchang.github.io/;https://ap229997.github.io/;http://saurabhg.web.illinois.edu", "dblp": "56/2174;136/9808-1;06/5843-1", "google_scholar": "lx-5mjUAAAAJ;DUgsNccAAAAJ;1HO5UacAAAAJ", "orcid": ";0000-0002-9041-4922;", "linkedin": "matthew-chang-1976b8136/;aditya-prakash-6b6072113;", "or_profile": "~Matthew_Chang1;~Aditya_Prakash1;~Saurabh_Gupta1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illinois.edu;illinois.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchang2023look,\ntitle={Look Ma, No Hands! Agent-Environment Factorization of Egocentric Videos},\nauthor={Matthew Chang and Aditya Prakash and Saurabh Gupta},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Xyj46OxEhK}\n}", "github": "", "project": "", "reviewers": "ymXH;UX1K;jD5i;t2iv;u9dg", "pdf_size": 8484796, "rating": "5;5;6;7;7", "confidence": "4;4;3;4;4", "soundness": "3;3;3;3;3", "novelty": "2;2;3;4;3", "presentation": "3;3;3;4;4", "wc_summary": "72;62;110;87;141", "wc_strengths": "40;66;92;55;44", "wc_weaknesses": "176;324;91;51;164", "wc_questions": "63;33;6;287;2", "wc_limitations": "7;6;6;11;23", "wc_review": "358;491;305;491;374", "wc_reply_reviewers": "24;0;13;22;40", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 94.4, 28.359125515431536 ], "wc_strengths_avg": [ 59.4, 18.650469162999627 ], "wc_weaknesses_avg": [ 161.2, 93.59786322347321 ], "wc_questions_avg": [ 78.2, 106.66845831828638 ], "wc_limitations_avg": [ 10.6, 6.468384651518492 ], "wc_review_avg": [ 403.8, 74.77272229897746 ], "wc_reply_reviewers_avg": [ 19.8, 13.181805642627264 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3363159932607258650&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "illinois.edu;illinois.edu;illinois.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Minimax Risks and Optimal Procedures for Estimation under Functional Local Differential Privacy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71321", "id": "XzTM9gVRT4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b4dde7f1bc45bf9c0fda8db8f272b758-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=XzTM9gVRT4", "openreview": "https://openreview.net/forum?id=XzTM9gVRT4", "poster": "/media/PosterPDFs/NeurIPS%202023/71321.png?t=1701757834.1742008", "slides": "https://nips.cc/virtual/2023/poster/71321", "video": "https://nips.cc/virtual/2023/poster/71321", "author_site": "Bonwoo Lee, Jeongyoun Ahn, Cheolwoo Park", "tldr": "", "abstract": "As concerns about data privacy continue to grow, differential privacy (DP) has emerged as a fundamental concept that aims to guarantee privacy by ensuring individuals' indistinguishability in data analysis. Local differential privacy (LDP) is a rigorous type of DP that requires individual data to be privatized before being sent to the collector, thus removing the need for a trusted third party to collect data. Among the numerous (L)DP-based approaches, functional DP has gained considerable attention in the DP community because it connects DP to statistical decision-making by formulating it as a hypothesis-testing problem and also exhibits Gaussian-related properties. However, the utility of privatized data is generally lower than that of non-private data, prompting research into optimal mechanisms that maximize the statistical utility for given privacy constraints. In this study, we investigate how functional LDP preserves the statistical utility by analyzing minimax risks of univariate mean estimation as well as nonparametric density estimation. We leverage the contraction property of functional LDP mechanisms and classical information-theoretical bounds to derive private minimax lower bounds. Our theoretical study reveals that it is possible to establish an interpretable, continuous balance between the statistical utility and privacy level, which has not been achieved under the $\\epsilon$-LDP framework. Furthermore, we suggest minimax optimal mechanisms based on Gaussian LDP (a type of functional LDP) that achieve the minimax upper bounds and show via a numerical study that they are superior to the counterparts derived under $\\epsilon$-LDP. The theoretical and empirical findings of this work suggest that Gaussian LDP should be considered a reliable standard for LDP.", "keywords": "Data privacy;Functional local differential privacy;Gaussian mechanism;Minimax risks;Statistical utility", "primary_area": "", "supplementary_material": "/attachment/31e34bd8da2d124f27d50f1a4e1d3698cd2f962e.zip", "author": "Bonwoo Lee;Jeongyoun Ahn;Cheolwoo Park", "authorids": "righthim@kaist.ac.kr;~Jeongyoun_Ahn1;~Cheolwoo_Park3", "gender": ";;M", "homepage": ";;https://sites.google.com/view/parkcw/home?authuser=1", "dblp": ";;46/4369", "google_scholar": ";;", "orcid": ";;0000-0003-4634-7045", "linkedin": ";;", "or_profile": "righthim@kaist.ac.kr;~Jeongyoun_Ahn1;~Cheolwoo_Park3", "aff": ";;Korea Advanced Institute of Science & Technology", "aff_domain": ";;kaist.ac.kr", "position": ";;Full Professor", "bibtex": "@inproceedings{\nlee2023minimax,\ntitle={Minimax Risks and Optimal Procedures for Estimation under Functional Local Differential Privacy},\nauthor={Bonwoo Lee and Jeongyoun Ahn and Cheolwoo Park},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=XzTM9gVRT4}\n}", "github": "", "project": "", "reviewers": "H56L;qJiE;fz6L;6GNC", "pdf_size": 900606, "rating": "6;6;7;7", "confidence": "3;4;3;3", "soundness": "3;4;3;4", "novelty": "2;2;3;3", "presentation": "3;3;4;4", "wc_summary": "63;46;98;119", "wc_strengths": "43;77;179;87", "wc_weaknesses": "118;214;227;104", "wc_questions": "2;163;82;10", "wc_limitations": "1;48;49;90", "wc_review": "227;548;635;410", "wc_reply_reviewers": "12;47;29;33", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 81.5, 28.64000698323937 ], "wc_strengths_avg": [ 96.5, 50.34630075785112 ], "wc_weaknesses_avg": [ 165.75, 55.165093129623195 ], "wc_questions_avg": [ 64.25, 64.9706664580255 ], "wc_limitations_avg": [ 47.0, 31.50396800404673 ], "wc_review_avg": [ 455.0, 154.15738710811104 ], "wc_reply_reviewers_avg": [ 30.25, 12.47747971346778 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=215921489997199631&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "title": "Towards Higher Ranks via Adversarial Weight Pruning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71320", "id": "Y17N9B0vXn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/040ace837dd270a87055bb10dd7c0392-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Y17N9B0vXn", "openreview": "https://openreview.net/forum?id=Y17N9B0vXn", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71320", "video": "https://nips.cc/virtual/2023/poster/71320", "author_site": "Yuchuan Tian, Hanting Chen, Tianyu Guo, Chao Xu, Yunhe Wang", "tldr": "", "abstract": "Convolutional Neural Networks (CNNs) are hard to deploy on edge devices due to its high computation and storage complexities. As a common practice for model compression, network pruning consists of two major categories: unstructured and structured pruning, where unstructured pruning constantly performs better. However, unstructured pruning presents a structured pattern at high pruning rates, which limits its performance. To this end, we propose a Rank-based PruninG (RPG) method to maintain the ranks of sparse weights in an adversarial manner. In each step, we minimize the low-rank approximation error for the weight matrices using singular value decomposition, and maximize their distance by pushing the weight matrices away from its low rank approximation. This rank-based optimization objective guides sparse weights towards a high-rank topology. The proposed method is conducted in a gradual pruning fashion to stabilize the change of rank during training. Experimental results on various datasets and different tasks demonstrate the effectiveness of our algorithm in high sparsity. The proposed RPG outperforms the state-of-the-art performance by 1.13\\% top-1 accuracy on ImageNet in ResNet-50 with 98\\% sparsity. The codes are available at https://github.com/huawei-noah/Efficient-Computing/tree/master/Pruning/RPG and https://gitee.com/mindspore/models/tree/master/research/cv/RPG.", "keywords": "Weight Pruning;Matrix Rank", "primary_area": "", "supplementary_material": "/attachment/6eb5c960791712ec1d3deb53590e4187c781af58.pdf", "author": "Yuchuan Tian;Hanting Chen;Tianyu Guo;Chao Xu;Yunhe Wang", "authorids": "~Yuchuan_Tian1;~Hanting_Chen1;~Tianyu_Guo1;~Chao_Xu1;~Yunhe_Wang1", "gender": "M;M;M;M;M", "homepage": ";;;http://www.cis.pku.edu.cn/faculty/vision/xuchao/xuchao01.htm;https://www.wangyunhe.site/", "dblp": "193/6675;232/2060;218/7273;;63/8217-1", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;RPK3oQgAAAAJ;https://scholar.google.co.uk/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=isizOkYAAAAJ", "orcid": ";;;;0000-0002-0142-509X", "linkedin": ";;;;", "or_profile": "~Yuchuan_Tian1;~Hanting_Chen1;~Tianyu_Guo1;~Chao_Xu1;~Yunhe_Wang1", "aff": "Peking University;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Peking University;Huawei Noah's Ark Lab", "aff_domain": "pku.edu.cn;huawei.com;huawei.com;pku.edu;huawei.com", "position": "PhD student;Researcher;Researcher;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\ntian2023towards,\ntitle={Towards Higher Ranks via Adversarial Weight Pruning},\nauthor={Yuchuan Tian and Hanting Chen and Tianyu Guo and Chao Xu and Yunhe Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Y17N9B0vXn}\n}", "github": "", "project": "", "reviewers": "P9XJ;yERC;GHQR;jsvG", "pdf_size": 429561, "rating": "6;6;7;7", "confidence": "5;3;5;4", "soundness": "4;3;4;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "72;145;100;79", "wc_strengths": "42;72;175;170", "wc_weaknesses": "143;380;145;141", "wc_questions": "2;94;64;5", "wc_limitations": "12;20;1;7", "wc_review": "271;711;485;402", "wc_reply_reviewers": "119;0;0;0", "wc_reply_authors": "132;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 99.0, 28.48683906648823 ], "wc_strengths_avg": [ 114.75, 58.74255271947245 ], "wc_weaknesses_avg": [ 202.25, 102.63375419422209 ], "wc_questions_avg": [ 41.25, 39.22610737761268 ], "wc_limitations_avg": [ 10.0, 6.96419413859206 ], "wc_review_avg": [ 467.25, 160.078691586357 ], "wc_reply_reviewers_avg": [ 29.75, 51.528511525174096 ], "wc_reply_authors_avg": [ 33.0, 57.15767664977295 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16633199390662285385&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;huawei.com;huawei.com;pku.edu;huawei.com", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Peking University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.pku.edu.cn;https://www.huawei.com", "aff_unique_abbr": "Peking U;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Posterior Contraction Rates for Mat\u00e9rn Gaussian Processes on Riemannian Manifolds", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71319", "id": "Y18r0xWkSh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6b7676588c33d344485eeba1b5653ab1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Y18r0xWkSh", "openreview": "https://openreview.net/forum?id=Y18r0xWkSh", "poster": "/media/PosterPDFs/NeurIPS%202023/71319.png?t=1701607020.224681", "slides": "https://nips.cc/virtual/2023/poster/71319", "video": "https://nips.cc/virtual/2023/poster/71319", "author_site": "Paul Rosa, Slava Borovitskiy, Alexander Terenin, Judith Rousseau", "tldr": "", "abstract": "Gaussian processes are used in many machine learning applications that rely on uncertainty quantification. Recently, computational tools for working with these models in geometric settings, such as when inputs lie on a Riemannian manifold, have been developed. This raises the question: can these intrinsic models be shown theoretically to lead to better performance, compared to simply embedding all relevant quantities into $\\mathbb{R}^d$ and using the restriction of an ordinary Euclidean Gaussian process? To study this, we prove optimal contraction rates for intrinsic Mat\u00e9rn Gaussian processes defined on compact Riemannian manifolds. We also prove analogous rates for extrinsic processes using trace and extension theorems between manifold and ambient Sobolev spaces: somewhat surprisingly, the rates obtained turn out to coincide with those of the intrinsic processes, provided that their smoothness parameters are matched appropriately. We illustrate these rates empirically on a number of examples, which, mirroring prior work, show that intrinsic processes can achieve better performance in practice. Therefore, our work shows that finer-grained analyses are needed to distinguish between different levels of data-efficiency of geometric Gaussian processes, particularly in settings which involve small data set sizes and non-asymptotic behavior.", "keywords": "Gaussian processes;posterior contraction;manifolds;kernels", "primary_area": "", "supplementary_material": "/attachment/628bdc2832e3a95a45afa07417804b0c22fdd034.pdf", "author": "Paul Rosa;Viacheslav Borovitskiy;Alexander Terenin;Judith Rousseau", "authorids": "~Paul_Rosa1;~Viacheslav_Borovitskiy1;~Alexander_Terenin1;~Judith_Rousseau1", "gender": "M;M;M;F", "homepage": "https://paulrosa.owlstown.net/;https://vab.im/;https://avt.im/;", "dblp": ";259/3201;185/1040;75/8020", "google_scholar": "mukIEioAAAAJ;https://scholar.google.ru/citations?user=1KqNyNMAAAAJ;6Qa-wXMAAAAJ;", "orcid": ";;0000-0001-5292-3104;", "linkedin": "paul-rosa-08bbb9146/;;;", "or_profile": "~Paul_Rosa1;~Viacheslav_Borovitskiy1;~Alexander_Terenin1;~Judith_Rousseau1", "aff": "University of Oxford;ETHZ - ETH Zurich;University of Cambridge;Univerist\u00e9 Paris-Dauphine", "aff_domain": "ox.ac.uk;ethz.ch;cam.ac.uk;dauphine.fr", "position": "PhD student;Postdoc;Postdoc;Full Professor", "bibtex": "@inproceedings{\nrosa2023posterior,\ntitle={Posterior Contraction Rates for Mat\\'ern Gaussian Processes on Riemannian Manifolds},\nauthor={Paul Rosa and Viacheslav Borovitskiy and Alexander Terenin and Judith Rousseau},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Y18r0xWkSh}\n}", "github": "", "project": "", "reviewers": "qZfm;dGg7;woEb;yf8T", "pdf_size": 1511555, "rating": "7;8;8;8", "confidence": "3;3;5;3", "soundness": "4;4;3;3", "novelty": "3;4;3;3", "presentation": "4;4;4;3", "wc_summary": "50;83;75;89", "wc_strengths": "70;48;110;1", "wc_weaknesses": "3;36;132;1", "wc_questions": "354;2;527;1", "wc_limitations": "1;1;1;1", "wc_review": "478;170;845;93", "wc_reply_reviewers": "18;9;9;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.25, 14.85555451674558 ], "wc_strengths_avg": [ 57.25, 39.35336707322513 ], "wc_weaknesses_avg": [ 43.0, 53.23063027994314 ], "wc_questions_avg": [ 221.0, 227.86289737471523 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 396.5, 296.3144444673597 ], "wc_reply_reviewers_avg": [ 9.0, 6.363961030678928 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13262841670461970457&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "ox.ac.uk;ethz.ch;cam.ac.uk;dauphine.fr", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Oxford;ETH Zurich;University of Cambridge;Universit\u00e9 Paris-Dauphine", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ox.ac.uk;https://www.ethz.ch;https://www.cam.ac.uk;https://www.univ-paris-dauphine.fr", "aff_unique_abbr": "Oxford;ETHZ;Cambridge;UPD", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "United Kingdom;Switzerland;France" }, { "title": "A Unified Approach for Maximizing Continuous DR-submodular Functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71318", "id": "Y1sJJW3pID", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c041d58d2250e67f70a5b004655315b5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Y1sJJW3pID", "openreview": "https://openreview.net/forum?id=Y1sJJW3pID", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71318", "video": "https://nips.cc/virtual/2023/poster/71318", "author_site": "Mohammad Pedramfar, Christopher Quinn, Vaneet Aggarwal", "tldr": "", "abstract": "This paper presents a unified approach for maximizing continuous DR-submodular functions that encompasses a range of settings and oracle access types. Our approach includes a Frank-Wolfe type offline algorithm for both monotone and non-monotone functions, with different restrictions on the general convex set. We consider settings where the oracle provides access to either the gradient of the function or only the function value, and where the oracle access is either deterministic or stochastic. We determine the number of required oracle accesses in all cases. Our approach gives new/improved results for nine out of the sixteen considered cases, avoids computationally expensive projections in three cases, with the proposed framework matching performance of state-of-the-art approaches in the remaining four cases. Notably, our approach for the stochastic function value-based oracle enables the first regret bounds with bandit feedback for stochastic DR-submodular functions.", "keywords": "Stochastic optimization;submodular maximization;Frank-Wolfe algorithm", "primary_area": "", "supplementary_material": "/attachment/cd94a338dfee8296e2a2b2b90541ae201f4d0337.pdf", "author": "Mohammad Pedramfar;Christopher John Quinn;Vaneet Aggarwal", "authorids": "~Mohammad_Pedramfar1;~Christopher_John_Quinn1;~Vaneet_Aggarwal1", "gender": ";M;M", "homepage": ";https://www.cs.iastate.edu/people/christopher-quinn;", "dblp": ";50/8822;91/6560", "google_scholar": ";oXWIgXcAAAAJ;", "orcid": ";0000-0002-9053-1504;", "linkedin": ";;", "or_profile": "~Mohammad_Pedramfar1;~Christopher_John_Quinn1;~Vaneet_Aggarwal1", "aff": ";Iowa State University;Purdue University", "aff_domain": ";iastate.edu;purdue.edu", "position": ";Assistant Professor;Full Professor", "bibtex": "@inproceedings{\npedramfar2023a,\ntitle={A Unified Approach for Maximizing Continuous {DR}-submodular Functions},\nauthor={Mohammad Pedramfar and Christopher John Quinn and Vaneet Aggarwal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Y1sJJW3pID}\n}", "github": "", "project": "", "reviewers": "Dxab;f6HN;GwvC;L9XA", "pdf_size": 422817, "rating": "3;6;6;7", "confidence": "5;5;3;3", "soundness": "2;4;2;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "85;76;166;116", "wc_strengths": "45;80;94;94", "wc_weaknesses": "80;66;60;122", "wc_questions": "60;170;184;111", "wc_limitations": "7;16;1;16", "wc_review": "277;408;505;459", "wc_reply_reviewers": "70;81;194;36", "wc_reply_authors": "509;764;737;10", "reply_reviewers": "1;2;2;1", "reply_authors": "3;2;3;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 110.75, 35.180783106690505 ], "wc_strengths_avg": [ 78.25, 20.029665498954294 ], "wc_weaknesses_avg": [ 82.0, 24.20743687382041 ], "wc_questions_avg": [ 131.25, 49.423552077931426 ], "wc_limitations_avg": [ 10.0, 6.363961030678928 ], "wc_review_avg": [ 412.25, 85.29177861904394 ], "wc_reply_reviewers_avg": [ 95.25, 59.37749994737064 ], "wc_reply_authors_avg": [ 505.0, 302.4673536102698 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6666666666666667, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15432733797012732565&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 9, "email": ";iastate.edu;purdue.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Iowa State University;Purdue University", "aff_unique_dep": ";", "aff_unique_url": "https://www.iastate.edu;https://www.purdue.edu", "aff_unique_abbr": "ISU;Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Expert load matters: operating networks at high accuracy and low manual effort", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71317", "id": "Y2VQWfi7Vc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/348346383eb58ed19def02e233c408d6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Y2VQWfi7Vc", "openreview": "https://openreview.net/forum?id=Y2VQWfi7Vc", "poster": "/media/PosterPDFs/NeurIPS%202023/71317.png?t=1699017101.9730694", "slides": "https://nips.cc/virtual/2023/poster/71317", "video": "https://nips.cc/virtual/2023/poster/71317", "author_site": "Sara Sangalli, Ertunc Erdil, Ender Konukoglu", "tldr": "", "abstract": "In human-AI collaboration systems for critical applications, in order to ensure minimal error, users should set an operating point based on model confidence to determine when the decision should be delegated to human experts. \nSamples for which model confidence is lower than the operating point would be manually analysed by experts to avoid mistakes.\nSuch systems can become truly useful only if they consider two aspects: models should be confident only for samples for which they are accurate, and the number of samples delegated to experts should be minimized.\nThe latter aspect is especially crucial for applications where available expert time is limited and expensive, such as healthcare. \n\nThe trade-off between the model accuracy and the number of samples delegated to experts can be represented by a curve that is similar to an ROC curve, which we refer to as confidence operating characteristic (COC) curve. \nIn this paper, we argue that deep neural networks should be trained by taking into account both accuracy and expert load and, to that end, propose a new complementary loss function for classification that maximizes the area under this COC curve.\nThis promotes simultaneously the increase in network accuracy and the reduction in number of samples delegated to humans.\nWe perform experiments on multiple computer vision and medical image datasets for classification.\nOur results demonstrate that the proposed loss improves classification accuracy and delegates less number of decisions to experts, achieves better out-of-distribution samples detection and on par calibration performance compared to existing loss functions.", "keywords": "human-ai collaboration system;optimization", "primary_area": "", "supplementary_material": "", "author": "Sara Sangalli;Ertunc Erdil;Ender Konukoglu", "authorids": "~Sara_Sangalli1;~Ertunc_Erdil1;~Ender_Konukoglu1", "gender": "F;;", "homepage": "https://ee.ethz.ch/the-department/people-a-z/person-detail.MjUyMTU3.TGlzdC8zMjc5LC0xNjUwNTg5ODIw.html;;http://www.vision.ee.ethz.ch/~kender", "dblp": "264/0294;84/8711;45/7041", "google_scholar": ";https://scholar.google.com.tr/citations?user=JJsyRqAAAAAJ;https://scholar.google.ch/citations?user=OeEMrhQAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Sara_Sangalli1;~Ertunc_Erdil1;~Ender_Konukoglu1", "aff": "ETHZ - ETH Zurich;Swiss Federal Institute of Technology;ETHZ - ETH Zurich", "aff_domain": "ethz.ch;ethz.ch;ethz.ch", "position": "PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nsangalli2023expert,\ntitle={Expert load matters: operating networks at high accuracy and low manual effort},\nauthor={Sara Sangalli and Ertunc Erdil and Ender Konukoglu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Y2VQWfi7Vc}\n}", "github": "", "project": "", "reviewers": "1tYR;eeZS;Bcd1;jAPT", "pdf_size": 557786, "rating": "4;5;6;7", "confidence": "4;3;3;4", "soundness": "2;2;3;4", "novelty": "2;2;3;3", "presentation": "3;2;3;4", "wc_summary": "49;46;84;121", "wc_strengths": "13;31;62;66", "wc_weaknesses": "132;247;286;62", "wc_questions": "3;3;33;54", "wc_limitations": "1;3;22;1", "wc_review": "198;330;487;304", "wc_reply_reviewers": "0;90;15;0", "wc_reply_authors": "43;0;41;0", "reply_reviewers": "0;1;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.0, 30.47129797038518 ], "wc_strengths_avg": [ 43.0, 21.988633427296023 ], "wc_weaknesses_avg": [ 181.75, 89.35987634279716 ], "wc_questions_avg": [ 23.25, 21.568205766822608 ], "wc_limitations_avg": [ 6.75, 8.842369591913696 ], "wc_review_avg": [ 329.75, 103.37885422077379 ], "wc_reply_reviewers_avg": [ 26.25, 37.31202889149825 ], "wc_reply_authors_avg": [ 21.0, 21.011901389450696 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18084909200094728056&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ethz.ch;ethz.ch;ethz.ch", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Beyond NTK with Vanilla Gradient Descent: A Mean-Field Analysis of Neural Networks with Polynomial Width, Samples, and Time", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71316", "id": "Y2hnMZvVDm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b3748cdac932d91f0a51a37db90dec50-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Y2hnMZvVDm", "openreview": "https://openreview.net/forum?id=Y2hnMZvVDm", "poster": "/media/PosterPDFs/NeurIPS%202023/71316.png?t=1702025752.110587", "slides": "https://nips.cc/virtual/2023/poster/71316", "video": "https://nips.cc/virtual/2023/poster/71316", "author_site": "Arvind Mahankali, Haochen Zhang, Kefan Dong, Margalit Glasgow, Tengyu Ma", "tldr": "", "abstract": "Despite recent theoretical progress on the non-convex optimization of two-layer neural networks, it is still an open question whether gradient descent on neural networks without unnatural modifications can achieve better sample complexity than kernel methods. This paper provides a clean mean-field analysis of projected gradient flow on polynomial-width two-layer neural networks. Different from prior works, our analysis does not require unnatural modifications of the optimization algorithm. We prove that with sample size $n = O(d^{3.1})$ where $d$ is the dimension of the inputs, the network trained with projected gradient flow converges in polynomial time to a non-trivial error that is not achievable by kernel methods using $n \\ll d^4$ samples, hence demonstrating a clear separation between unmodified gradient descent and NTK. As a corollary, we show that projected gradient descent with a positive learning rate and a polynomial number of iterations converges to low error with the same sample complexity.", "keywords": "Nonconvex Optimization;Mean-Field Analysis;Beyond NTK;Deep Learning Theory", "primary_area": "", "supplementary_material": "", "author": "Arvind Venkat Mahankali;Jeff Z. HaoChen;Kefan Dong;Margalit Glasgow;Tengyu Ma", "authorids": "~Arvind_Venkat_Mahankali1;~Jeff_Z._HaoChen1;~Kefan_Dong1;~Margalit_Glasgow1;~Tengyu_Ma1", "gender": ";M;F;M;", "homepage": "https://cs.stanford.edu/~jhaochen/;https://kfdong.github.io/;https://margalitglasgow.github.io/;http://ai.stanford.edu/~tengyuma/;", "dblp": "267/5319;234/8542;268/0063;54/9061;270/8242.html", "google_scholar": "SWQxcO8AAAAJ;XalUZEoAAAAJ;ErDOPbEAAAAJ;i38QlUwAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Jeff_Z._HaoChen1;~Kefan_Dong1;~Margalit_Glasgow1;~Tengyu_Ma1;~Arvind_V._Mahankali1", "aff": "Stanford University;Stanford University;Stanford University;Facebook AI Research;Computer Science Department, Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;fb.com;cs.stanford.edu", "position": "PhD student;PhD student;PhD student;Visiting Scientist;PhD student", "bibtex": "@inproceedings{\nmahankali2023beyond,\ntitle={Beyond {NTK} with Vanilla Gradient Descent: A Mean-Field Analysis of Neural Networks with Polynomial Width, Samples, and Time},\nauthor={Arvind Venkat Mahankali and Jeff Z. HaoChen and Kefan Dong and Margalit Glasgow and Tengyu Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Y2hnMZvVDm}\n}", "github": "", "project": "", "reviewers": "ZhLf;aN4G;c2Ri;HQjX", "pdf_size": 1084723, "rating": "5;5;7;8", "confidence": "4;3;4;5", "soundness": "3;3;4;4", "novelty": "3;2;3;4", "presentation": "3;3;3;3", "wc_summary": "97;53;44;65", "wc_strengths": "97;95;114;113", "wc_weaknesses": "156;536;66;22", "wc_questions": "347;56;34;48", "wc_limitations": "31;2;8;1", "wc_review": "728;742;266;249", "wc_reply_reviewers": "44;0;17;17", "wc_reply_authors": "161;0;139;7", "reply_reviewers": "1;0;1;1", "reply_authors": "3;1;3;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 64.75, 20.054612935681405 ], "wc_strengths_avg": [ 104.75, 8.78564169540279 ], "wc_weaknesses_avg": [ 195.0, 202.71408436514716 ], "wc_questions_avg": [ 121.25, 130.5744519421774 ], "wc_limitations_avg": [ 10.5, 12.134661099511597 ], "wc_review_avg": [ 496.25, 238.87692961020744 ], "wc_reply_reviewers_avg": [ 19.5, 15.75595125658873 ], "wc_reply_authors_avg": [ 76.75, 73.70337509232532 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13725164460404915845&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "stanford.edu;stanford.edu;stanford.edu;fb.com;cs.stanford.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Stanford University;Meta", "aff_unique_dep": ";Facebook AI Research", "aff_unique_url": "https://www.stanford.edu;https://research.facebook.com", "aff_unique_abbr": "Stanford;FAIR", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Detection Based Part-level Articulated Object Reconstruction from Single RGBD Image", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71315", "id": "Y3NjoeO4Q1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3af8c40dcf1bc94fa570a5e42edf219d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Y3NjoeO4Q1", "openreview": "https://openreview.net/forum?id=Y3NjoeO4Q1", "poster": "/media/PosterPDFs/NeurIPS%202023/71315.png?t=1701972612.0217602", "slides": "https://nips.cc/virtual/2023/poster/71315", "video": "https://nips.cc/virtual/2023/poster/71315", "author_site": "Yuki Kawana, Tatsuya Harada", "tldr": "", "abstract": "We propose an end-to-end trainable, cross-category method for reconstructing multiple man-made articulated objects from a single RGBD image, focusing on part-level shape reconstruction and pose and kinematics estimation. We depart from previous works that rely on learning instance-level latent space, focusing on man-made articulated objects with predefined part counts. Instead, we propose a novel alternative approach that employs part-level representation, representing instances as combinations of detected parts. While our detect-then-group approach effectively handles instances with diverse part structures and various part counts, it faces issues of false positives, varying part sizes and scales, and an increasing model size due to end-to-end training. To address these challenges, we propose 1) test-time kinematics-aware part fusion to improve detection performance while suppressing false positives, 2) anisotropic scale normalization for part shape learning to accommodate various part sizes and scales, and 3) a balancing strategy for cross-refinement between feature space and output space to improve part detection while maintaining model size. Evaluation on both synthetic and real data demonstrates that our method successfully reconstructs variously structured multiple instances that previous works cannot handle, and outperforms prior works in shape reconstruction and kinematics estimation.", "keywords": "articulated objects;shape reconstruction;3D reconstruction", "primary_area": "", "supplementary_material": "", "author": "Yuki Kawana;Tatsuya Harada", "authorids": "~Yuki_Kawana1;~Tatsuya_Harada1", "gender": "M;M", "homepage": ";https://www.mi.t.u-tokyo.ac.jp/harada/", "dblp": "165/1713;14/5849", "google_scholar": "l2e8yqcAAAAJ;https://scholar.google.com/citations?hl=ja", "orcid": ";", "linkedin": ";", "or_profile": "~Yuki_Kawana1;~Tatsuya_Harada1", "aff": "Princeton University;The University of Tokyo", "aff_domain": "princeton.edu;u-tokyo.ac.jp", "position": "Researcher;Full Professor", "bibtex": "@inproceedings{\nkawana2023detection,\ntitle={Detection Based Part-level Articulated Object Reconstruction from Single {RGBD} Image},\nauthor={Yuki Kawana and Tatsuya Harada},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Y3NjoeO4Q1}\n}", "github": "", "project": "", "reviewers": "HLDh;mcMG;MJHC;Y464;VtGS", "pdf_size": 19420706, "rating": "5;6;6;6;8", "confidence": "4;4;4;4;3", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "142;89;60;34;99", "wc_strengths": "85;105;102;116;33", "wc_weaknesses": "183;233;201;219;26", "wc_questions": "66;74;35;14;73", "wc_limitations": "12;8;28;6;6", "wc_review": "488;509;426;389;237", "wc_reply_reviewers": "25;13;69;25;13", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 84.8, 36.56993300513415 ], "wc_strengths_avg": [ 88.2, 29.335302964176115 ], "wc_weaknesses_avg": [ 172.4, 75.10952003574513 ], "wc_questions_avg": [ 52.4, 23.88807233746583 ], "wc_limitations_avg": [ 12.0, 8.294576541331088 ], "wc_review_avg": [ 409.8, 96.44770603803907 ], "wc_reply_reviewers_avg": [ 29.0, 20.707486568871655 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9185586535436918, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15807233436621465951&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "princeton.edu;u-tokyo.ac.jp", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Princeton University;University of Tokyo", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "Princeton;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Japan" }, { "title": "PTQD: Accurate Post-Training Quantization for Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71314", "id": "Y3g1PV5R9l", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2aab8a76c7e761b66eccaca0927787de-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Y3g1PV5R9l", "openreview": "https://openreview.net/forum?id=Y3g1PV5R9l", "poster": "/media/PosterPDFs/NeurIPS%202023/71314.png?t=1701400923.898302", "slides": "https://nips.cc/virtual/2023/poster/71314", "video": "https://nips.cc/virtual/2023/poster/71314", "author_site": "Yefei He, Luping Liu, Jing Liu, Weijia Wu, Hong Zhou, Bohan Zhuang", "tldr": "", "abstract": "Diffusion models have recently dominated image synthesis and other related generative tasks. However, the iterative denoising process is expensive in computations at inference time, making diffusion models less practical for low-latency and scalable real-world applications. \nPost-training quantization of diffusion models can significantly reduce the model size and accelerate the sampling process without requiring any re-training. Nonetheless, applying existing post-training quantization methods directly to low-bit diffusion models can significantly impair the quality of generated samples. Specifically, for each denoising step, quantization noise leads to deviations in the estimated mean and mismatches with the predetermined variance schedule. Moreover, as the sampling process proceeds, the quantization noise may accumulate, resulting in a low signal-to-noise ratio (SNR) during the later denoising steps. To address these challenges, we propose a unified formulation for the quantization noise and diffusion perturbed noise in the quantized denoising process. \nSpecifically, we first disentangle the quantization noise into its correlated and residual uncorrelated parts regarding its full-precision counterpart. The correlated part can be easily corrected by estimating the correlation coefficient. For the uncorrelated part, we subtract the bias from the quantized results to correct the mean deviation and calibrate the denoising variance schedule to absorb the excess variance resulting from quantization. Moreover, we introduce a mixed-precision scheme for selecting the optimal bitwidth for each denoising step, which prioritizes lower bitwidths to expedite early denoising steps, while ensuring that higher bitwidths maintain a high signal-to-noise ratio (SNR) in the later steps. Extensive experiments demonstrate that our method outperforms previous post-training quantized diffusion models in generating high-quality samples, with only a $0.06$ increase in FID score compared to full-precision LDM-4 on ImageNet $256\\times256$, while saving $19.9\\times$ bit operations. Code is available at [https://github.com/ziplab/PTQD](https://github.com/ziplab/PTQD).", "keywords": "Diffusion models;Post-training quantization;Mixed precision", "primary_area": "", "supplementary_material": "/attachment/45dda14c8889c9a75852cce6180c800729375398.pdf", "author": "Yefei He;Luping Liu;Jing Liu;Weijia Wu;Hong Zhou;Bohan Zhuang", "authorids": "~Yefei_He1;~Luping_Liu2;~Jing_Liu8;~Weijia_Wu2;~Hong_Zhou3;~Bohan_Zhuang1", "gender": "M;;M;M;M;M", "homepage": "https://hexy.tech/;;https://www.jing-liu.com/;https://weijiawu.github.io/;https://person.zju.edu.cn/zhouhong;https://bohanzhuang.github.io/", "dblp": "92/6254;;72/2590-48;87/7695-1;45/3426;145/1096", "google_scholar": "CTEQwwwAAAAJ;;-lHaZH4AAAAJ;NgjTRe4AAAAJ;;https://scholar.google.com.au/citations?user=DFuDBBwAAAAJ", "orcid": "0000-0002-2171-4518;;0000-0002-6745-3050;0000-0003-3912-7212;;", "linkedin": ";;jing-liu-619688133/;%E5%A8%81%E4%BD%B3-%E5%90%B4-07a852280/;;bohan-zhuang/", "or_profile": "~Yefei_He1;~Luping_Liu2;~Jing_Liu8;~Weijia_Wu2;~Hong_Zhou3;~Bohan_Zhuang1", "aff": "Zhejiang University;;Monash University;Zhejiang University;Zhejiang University;Monash University", "aff_domain": "zju.edu.cn;;monash.edu.au;zju.edu.cn;zju.edu.cn;monash.edu", "position": "PhD student;;PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nhe2023ptqd,\ntitle={{PTQD}: Accurate Post-Training Quantization for Diffusion Models},\nauthor={Yefei He and Luping Liu and Jing Liu and Weijia Wu and Hong Zhou and Bohan Zhuang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Y3g1PV5R9l}\n}", "github": "", "project": "", "reviewers": "5dgo;MVe5;LoUy;oS4i;i5nq", "pdf_size": 3034483, "rating": "5;5;5;6;6", "confidence": "4;4;5;2;2", "soundness": "3;2;3;3;3", "novelty": "3;2;2;3;3", "presentation": "3;1;2;3;3", "wc_summary": "78;74;61;40;81", "wc_strengths": "35;26;33;39;83", "wc_weaknesses": "29;212;51;28;53", "wc_questions": "109;94;159;1;2", "wc_limitations": "3;6;15;1;1", "wc_review": "254;412;319;109;220", "wc_reply_reviewers": "0;28;47;9;0", "wc_reply_authors": "0;29;371;0;0", "reply_reviewers": "0;1;2;1;0", "reply_authors": "1;2;5;1;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.4, 1.2 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 66.8, 15.038616957685969 ], "wc_strengths_avg": [ 43.2, 20.341091416145794 ], "wc_weaknesses_avg": [ 74.6, 69.50280569876298 ], "wc_questions_avg": [ 73.0, 62.22218253966989 ], "wc_limitations_avg": [ 5.2, 5.230678732248808 ], "wc_review_avg": [ 262.8, 100.97801740973131 ], "wc_reply_reviewers_avg": [ 16.8, 18.236227680087786 ], "wc_reply_authors_avg": [ 80.0, 145.93286127531385 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 1.5491933384829668 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9525793444156802, "gs_citation": 102, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13449006048649378183&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 8, "email": "zju.edu.cn;;monash.edu.au;zju.edu.cn;zju.edu.cn;monash.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "Zhejiang University;Monash University", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.monash.edu", "aff_unique_abbr": "ZJU;Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "China;Australia" }, { "title": "Quantum Bayesian Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71313", "id": "Y44NurSDjq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/401aa72e0e3be680348a5b0ffdb1a5aa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Y44NurSDjq", "openreview": "https://openreview.net/forum?id=Y44NurSDjq", "poster": "/media/PosterPDFs/NeurIPS%202023/71313.png?t=1702015639.8250537", "slides": "https://nips.cc/virtual/2023/poster/71313", "video": "https://nips.cc/virtual/2023/poster/71313", "author_site": "Zhongxiang Dai, Zhongxiang Dai, Gregory Kang Ruey Lau, Arun Verma, YAO SHU, Bryan Kian Hsiang Low, Patrick Jaillet", "tldr": "", "abstract": "Kernelized bandits, also known as Bayesian optimization (BO), has been a prevalent method for optimizing complicated black-box reward functions. Various BO algorithms have been theoretically shown to enjoy upper bounds on their cumulative regret which are sub-linear in the number $T$ of iterations, and a regret lower bound of $\\Omega(\\sqrt{T})$ has been derived which represents the unavoidable regrets for any classical BO algorithm. Recent works on quantum bandits have shown that with the aid of quantum computing, it is possible to achieve tighter regret upper bounds better than their corresponding classical lower bounds. However, these works are restricted to either multi-armed or linear bandits, and are hence not able to solve sophisticated real-world problems with non-linear reward functions. To this end, we introduce the quantum-Gaussian process-upper confidence bound (Q-GP-UCB) algorithm. To the best of our knowledge, our Q-GP-UCB is the first BO algorithm able to achieve a regret upper bound of $\\mathcal{O}(\\text{poly}\\log T)$, which is significantly smaller than its regret lower bound of $\\Omega(\\sqrt{T})$ in the classical setting. Moreover, thanks to our novel analysis of the confidence ellipsoid, our Q-GP-UCB with the linear kernel achieves a smaller regret than the quantum linear UCB algorithm from the previous work. We use simulations, as well as an experiment using a real quantum computer, to verify that the theoretical quantum speedup achieved by our Q-GP-UCB is also potentially relevant in practice.", "keywords": "quantum bandits;kernelized bandits", "primary_area": "", "supplementary_material": "/attachment/0fd4d6be2da2b78bb5c9951321d38d75482fa615.zip", "author": "Zhongxiang Dai;Gregory Kang Ruey Lau;Arun Verma;Yao Shu;Bryan Kian Hsiang Low;Patrick Jaillet", "authorids": "~Zhongxiang_Dai1;~Gregory_Kang_Ruey_Lau1;~Arun_Verma1;~Yao_Shu1;~Bryan_Kian_Hsiang_Low1;~Patrick_Jaillet1", "gender": "M;M;M;M;M;M", "homepage": "https://daizhongxiang.github.io/;https://www.comp.nus.edu.sg/~greglau/;https://arunv3rma.github.io/;https://yao.notion.site;http://www.comp.nus.edu.sg/~lowkh;http://web.mit.edu/jaillet/www/", "dblp": "172/4968;358/7226;28/3688;44/1338;97/4877;https://dblp.uni-trier.de/pers/hd/j/Jaillet:Patrick", "google_scholar": "1v8xOIYAAAAJ;;https://scholar.google.co.in/citations?user=tBcixlUAAAAJ;https://scholar.google.com.au/citations?hl=en;https://scholar.google.com.tw/citations?user=2P-Q09UAAAAJ;ND0FM6EAAAAJ", "orcid": ";;;;;0000-0002-8585-6566", "linkedin": ";gregorylau/;;yao-shu-a5640514b;;patrick-jaillet-1260445/", "or_profile": "~Zhongxiang_Dai1;~Gregory_Kang_Ruey_Lau1;~Arun_Verma1;~Yao_Shu1;~Bryan_Kian_Hsiang_Low1;~Patrick_Jaillet1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore;Massachusetts Institute of Technology", "aff_domain": "nus.edu.sg;u.nus.edu;nus.edu.sg;nus.edu.sg;nus.edu.sg;mit.edu", "position": "Postdoc;PhD student;Postdoc;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ndai2023quantum,\ntitle={Quantum Bayesian Optimization},\nauthor={Zhongxiang Dai and Gregory Kang Ruey Lau and Arun Verma and Yao Shu and Bryan Kian Hsiang Low and Patrick Jaillet},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Y44NurSDjq}\n}", "github": "", "project": "", "reviewers": "pGF9;Hp2A;5kMD;n5g2", "pdf_size": 5013980, "rating": "5;5;6;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;4;3", "wc_summary": "118;172;136;181", "wc_strengths": "22;45;107;125", "wc_weaknesses": "165;156;88;36", "wc_questions": "80;58;95;58", "wc_limitations": "41;1;1;20", "wc_review": "426;432;427;420", "wc_reply_reviewers": "149;21;9;0", "wc_reply_authors": "377;0;37;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 151.75, 25.752427070084092 ], "wc_strengths_avg": [ 74.75, 42.522788008313846 ], "wc_weaknesses_avg": [ 111.25, 52.66580959218229 ], "wc_questions_avg": [ 72.75, 15.674421839417235 ], "wc_limitations_avg": [ 15.75, 16.513252253871737 ], "wc_review_avg": [ 426.25, 4.264680527307995 ], "wc_reply_reviewers_avg": [ 44.75, 60.64806262363209 ], "wc_reply_authors_avg": [ 103.5, 158.62613277767318 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14742259085672701971&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "nus.edu.sg;u.nus.edu;nus.edu.sg;nus.edu.sg;nus.edu.sg;mit.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "National University of Singapore;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://web.mit.edu", "aff_unique_abbr": "NUS;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "Singapore;United States" }, { "title": "MADLAD-400: A Multilingual And Document-Level Large Audited Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73557", "id": "Y45ZCxslFx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d49042a5d49818711c401d34172f9900-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Y45ZCxslFx", "openreview": "https://openreview.net/forum?id=Y45ZCxslFx", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73557", "video": "https://nips.cc/virtual/2023/poster/73557", "author_site": "Sneha Kudugunta, Isaac Caswell, Biao Zhang, Xavier Garcia, Derrick Xin, Aditya Kusupati, Romi Stella, Ankur Bapna, Orhan Firat", "tldr": "", "abstract": "We introduce MADLAD-400, a manually audited, general domain 3T token monolingual dataset based on CommonCrawl, spanning 419 languages. We discuss the limitations revealed by self-auditing MADLAD-400, and the role data auditing had in the dataset creation process. We then train and release a 10.7B-parameter multilingual machine translation model on 250 billion tokens covering over 450 languages using publicly available data, and find that it is competitive with models that are significantly larger, and report the results on different domains. In addition, we train a 8B-parameter language model, and assess the results on few-shot translation. We make the baseline models available to the research community.", "keywords": "CommonCrawl;Low Resource Languages;Underrepresented Languages;Multilinguality;LLMs;Large Language Models;Machine Translation", "primary_area": "", "supplementary_material": "/attachment/cc2ada72679ef7aeaf73023e7df40d79a111e937.pdf", "author": "Sneha Kudugunta;Isaac Rayburn Caswell;Biao Zhang;Xavier Garcia;Derrick Xin;Aditya Kusupati;Romi Stella;Ankur Bapna;Orhan Firat", "authorids": "~Sneha_Kudugunta1;~Isaac_Rayburn_Caswell1;~Biao_Zhang2;~Xavier_Garcia1;~Derrick_Xin1;~Aditya_Kusupati1;romistella@google.com;~Ankur_Bapna1;~Orhan_Firat1", "gender": "F;;M;;M;M;;M;M", "homepage": ";;;;https://github.com/dxin12345;http://www.adityakusupati.com/;;;", "dblp": ";236/5919.html;https://dblp.uni-trier.de/pers/hd/z/Zhang_0002:Biao;;;231/7662;;200/8008;120/2225", "google_scholar": "LeEwxtgAAAAJ;myh9l2AAAAAJ;gqPKjaIAAAAJ;;;https://scholar.google.co.in/citations?user=qULx8g8AAAAJ;;6hK9IZoAAAAJ;https://scholar.google.com.tr/citations?user=dLaR9lgAAAAJ", "orcid": ";;;;;0000-0001-8455-1851;;;", "linkedin": ";;;;;adityakusupati/;;ankur-bapna-053b1269;", "or_profile": "~Sneha_Kudugunta1;~Isaac_Rayburn_Caswell1;~Biao_Zhang2;~Xavier_Garcia1;~Derrick_Xin1;~Aditya_Kusupati1;romistella@google.com;~Ankur_Bapna1;~Orhan_Firat1", "aff": "Google DeepMind;Google;Google DeepMind;;Research, Google;Department of Computer Science, University of Washington;;Google;Google", "aff_domain": "google.com;google.com;google.com;;research.google.com;cs.washington.edu;;google.com;google.com", "position": "Researcher;Researcher;Researcher;;Researcher;PhD student;;Software Engineer;Research Scientist", "bibtex": "@inproceedings{\nkudugunta2023madlad,\ntitle={{MADLAD}-400: A Multilingual And Document-Level Large Audited Dataset},\nauthor={Sneha Kudugunta and Isaac Rayburn Caswell and Biao Zhang and Xavier Garcia and Derrick Xin and Aditya Kusupati and Romi Stella and Ankur Bapna and Orhan Firat},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Y45ZCxslFx}\n}", "github": "", "project": "", "reviewers": "MbPM;5Zvf;4BCh", "pdf_size": 331215, "rating": "7;8;10", "confidence": "4;5;5", "wc_summary_and_contributions": "54;276;201", "wc_strengths": "97;51;16", "wc_improvement": "21;98;24", "wc_limitations": "1;40;1", "wc_correctness": "1;19;1", "wc_clarity": "45;17;1", "wc_relation_to_prior_work": "1;43;1", "wc_documentation": "1;24;1", "wc_additional_feedback": "1;1;1", "wc_review": "222;569;247", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "325;542;268", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 8.333333333333334, 1.247219128924647 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 177.0, 92.2062904578641 ], "wc_strengths_avg": [ 54.666666666666664, 33.169597860423664 ], "wc_improvement_avg": [ 47.666666666666664, 35.6121078036982 ], "wc_limitations_avg": [ 14.0, 18.384776310850235 ], "wc_correctness_avg": [ 7.0, 8.48528137423857 ], "wc_clarity_avg": [ 21.0, 18.184242262647807 ], "wc_relation_to_prior_work_avg": [ 15.0, 19.79898987322333 ], "wc_documentation_avg": [ 8.666666666666666, 10.842303978193728 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 346.0, 158.01476724238995 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 378.3333333333333, 118.04613034271343 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.7559289460184544, "gs_citation": 126, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3403145482648559011&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "google.com;google.com;google.com;;research.google.com;cs.washington.edu;;google.com;google.com", "author_num": 9, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Google;University of Washington", "aff_unique_dep": "Google DeepMind;Department of Computer Science", "aff_unique_url": "https://deepmind.com;https://www.washington.edu", "aff_unique_abbr": "DeepMind;UW", "aff_campus_unique_index": "1;1;2;1;1", "aff_campus_unique": ";Mountain View;Seattle", "aff_country_unique_index": "0;1;0;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "VisIT-Bench: A Dynamic Benchmark for Evaluating Instruction-Following Vision-and-Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73556", "id": "Y4GZ2w74f4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5503389dbe070cdae9b48086c4996a59-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Y4GZ2w74f4", "openreview": "https://openreview.net/forum?id=Y4GZ2w74f4", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73556", "video": "https://nips.cc/virtual/2023/poster/73556", "author_site": "Yonatan Bitton, Hritik Bansal, Jack Hessel, Rulin Shao, Wanrong Zhu, Anas Awadalla, Josh Gardner, Rohan Taori, Ludwig Schmidt", "tldr": "", "abstract": "We introduce VisIT-Bench (Visual InsTruction Benchmark), a benchmark for evaluating instruction-following vision-language models for real-world use. Our starting point is curating 70 \"instruction families\" that we envision instruction tuned vision-language models should be able to address. Extending beyond evaluations like VQAv2 and COCO, tasks range from basic recognition to game playing and creative generation. Following curation, our dataset comprises 592 test queries, each with a human-authored instruction-conditioned caption. These descriptions surface instruction-specific factors, e.g., for an instruction asking about the accessibility of a storefront for wheelchair users, the instruction-conditioned caption describes ramps/potential obstacles. These descriptions enable 1) collecting human-verified reference outputs for each instance; and 2) automatic evaluation of candidate multimodal generations using a text-only LLM, aligning with human judgment. We quantify quality gaps between models and references using both human and automatic evaluations; e.g., the top-performing instruction-following model wins against the GPT-4 reference in just 27% of the comparison. VisIT-Bench is dynamic to participate, practitioners simply submit their model's response on the project website; Data, code and leaderboard is available at https://visit-bench.github.io/.", "keywords": "Vision-language models;Instruction-following;Benchmark;Multimodal chatbots;ELO rating system;Automatic evaluation metric;Dataset collection", "primary_area": "", "supplementary_material": "/attachment/f5c3879ce7a4d81956008281ad70a27eac1d7f12.pdf", "author": "Yonatan Bitton;Hritik Bansal;Jack Hessel;Rulin Shao;Wanrong Zhu;Anas Awadalla;Joshua P Gardner;Rohan Taori;Ludwig Schmidt", "authorids": "~Yonatan_Bitton1;~Hritik_Bansal2;~Jack_Hessel1;~Rulin_Shao1;~Wanrong_Zhu1;~Anas_Awadalla1;~Joshua_P_Gardner1;~Rohan_Taori1;~Ludwig_Schmidt1", "gender": "M;M;M;;;M;;M;M", "homepage": "https://yonatanbitton.github.io/;https://sites.google.com/view/hbansal;https://www.jmhessel.com;https://rulinshao.github.io/;;https://github.com/anas-awadalla;;http://rohantaori.com/;http://people.csail.mit.edu/ludwigs/", "dblp": "277/7042;239/5922;https://dblp.uni-trier.de/pid/132/5250.html;;;;;220/3868;141/2720", "google_scholar": "P9Fpf4sAAAAJ;gAKTYtoAAAAJ;SxQQ1msAAAAJ;Vdwh6bcAAAAJ;;https://scholar.google.com/citations?hl=en;;juoUSMgAAAAJ;SWMKy70AAAAJ", "orcid": ";;0000-0002-4012-8979;;;;;;", "linkedin": "yonatanbitton/;hritik-bansal/;;;;;;rtaori/;ludwig-schmidt-87ba3612/", "or_profile": "~Yonatan_Bitton1;~Hritik_Bansal2;~Jack_Hessel1;~Rulin_Shao1;~Wanrong_Zhu1;~Anas_Awadalla1;~Joshua_P_Gardner1;~Rohan_Taori1;~Ludwig_Schmidt1", "aff": "Hebrew University of Jerusalem;University of California, Los Angeles;Allen Institute for Artificial Intelligence;University of Washington;;Department of Computer Science, University of Washington;;Stanford University;Allen Institute for Artificial Intelligence", "aff_domain": "huji.ac.il;ucla.edu;allenai.org;uw.edu;;cs.washington.edu;;stanford.edu;allenai.org", "position": "PhD student;PhD student;Researcher;PhD student;;Undergrad student;;PhD student;Researcher", "bibtex": "@inproceedings{\nbitton2023visitbench,\ntitle={Vis{IT}-Bench: A Dynamic Benchmark for Evaluating Instruction-Following Vision-and-Language Models},\nauthor={Yonatan Bitton and Hritik Bansal and Jack Hessel and Rulin Shao and Wanrong Zhu and Anas Awadalla and Joshua P Gardner and Rohan Taori and Ludwig Schmidt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Y4GZ2w74f4}\n}", "github": "", "project": "", "reviewers": "AvKi;ywz9;UGFt;Fmx9", "pdf_size": 7113793, "rating": "5;6;7;7", "confidence": "3;4;4;4", "wc_summary_and_contributions": "74;64;49;77", "wc_strengths": "77;55;77;62", "wc_improvement": "103;85;164;109", "wc_limitations": "3;1;14;2", "wc_correctness": "9;4;1;51", "wc_clarity": "1;9;1;9", "wc_relation_to_prior_work": "8;1;1;11", "wc_documentation": "1;2;5;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "277;222;313;323", "wc_reply_reviewers": "0;0;0;26", "wc_reply_authors": "442;414;430;319", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 66.0, 10.931605554537724 ], "wc_strengths_avg": [ 67.75, 9.575359001102779 ], "wc_improvement_avg": [ 115.25, 29.498940658945703 ], "wc_limitations_avg": [ 5.0, 5.244044240850758 ], "wc_correctness_avg": [ 16.25, 20.26542622300355 ], "wc_clarity_avg": [ 5.0, 4.0 ], "wc_relation_to_prior_work_avg": [ 5.25, 4.380353866983808 ], "wc_documentation_avg": [ 2.25, 1.6393596310755 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 283.75, 39.543488718118944 ], "wc_reply_reviewers_avg": [ 6.5, 11.258330249197702 ], "wc_reply_authors_avg": [ 401.25, 48.51481732419489 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7837735349727384730&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "huji.ac.il;ucla.edu;allenai.org;uw.edu;;cs.washington.edu;;stanford.edu;allenai.org", "author_num": 9, "aff_unique_index": "0;1;2;3;3;4;2", "aff_unique_norm": "Hebrew University of Jerusalem;University of California, Los Angeles;Allen Institute for Artificial Intelligence;University of Washington;Stanford University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.huji.ac.il;https://www.ucla.edu;https://allenai.org;https://www.washington.edu;https://www.stanford.edu", "aff_unique_abbr": "HUJI;UCLA;AI2;UW;Stanford", "aff_campus_unique_index": "0;1;3;4", "aff_campus_unique": "Jerusalem;Los Angeles;;Seattle;Stanford", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "Israel;United States" }, { "title": "Model Shapley: Equitable Model Valuation with Black-box Access", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71312", "id": "Y6IGTNMdLT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/86bcae6da75c72e32f30a5553f094c06-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Y6IGTNMdLT", "openreview": "https://openreview.net/forum?id=Y6IGTNMdLT", "poster": "/media/PosterPDFs/NeurIPS%202023/71312.png?t=1701417342.575461", "slides": "https://nips.cc/virtual/2023/poster/71312", "video": "https://nips.cc/virtual/2023/poster/71312", "author_site": "Xinyi Xu, Thanh Lam, Chuan Sheng Foo, Bryan Kian Hsiang Low", "tldr": "", "abstract": "Valuation methods of data and machine learning (ML) models are essential to the establishment of AI marketplaces. Importantly, certain practical considerations (e.g., operational constraints, legal restrictions) favor the use of model valuation over data valuation. Also, existing marketplaces that involve trading of pre-trained ML models call for an equitable model valuation method to price them. In particular, we investigate the black-box access setting which allows querying a model (to observe predictions) without disclosing model-specific information (e.g., architecture and parameters). By exploiting a Dirichlet abstraction of a model\u2019s predictions, we propose a novel and equitable model valuation method called model Shapley. We also leverage a Lipschitz continuity of model Shapley to design a learning approach for predicting the model Shapley values (MSVs) of many vendors\u2019 models (e.g., 150) in a large-scale marketplace. We perform extensive empirical validation on the effectiveness of model Shapley using various real-world datasets and heterogeneous model types.", "keywords": "Model Valuation;Dirichlet Abstraction;Shapley Value", "primary_area": "", "supplementary_material": "", "author": "Xinyi Xu;Thanh Lam;Chuan-Sheng Foo;Bryan Kian Hsiang Low", "authorids": "~Xinyi_Xu4;~Thanh_Lam1;~Chuan-Sheng_Foo1;~Bryan_Kian_Hsiang_Low1", "gender": "M;M;M;M", "homepage": "https://xinyi-xu.com;;http://ai.stanford.edu/~csfoo;http://www.comp.nus.edu.sg/~lowkh", "dblp": ";280/1674;73/1823;97/4877", "google_scholar": "2762GgsAAAAJ;https://scholar.google.com/citations?hl=en;AgbeqGkAAAAJ;https://scholar.google.com.tw/citations?user=2P-Q09UAAAAJ", "orcid": "0000-0002-8744-0695;;0000-0002-4748-5792;", "linkedin": "xinyi-xu-a93222133/;;;", "or_profile": "~Xinyi_Xu4;~Thanh_Lam1;~Chuan-Sheng_Foo1;~Bryan_Kian_Hsiang_Low1", "aff": "National University of Singapore;National University of Singapore;Institute for Infocomm Research, A*STAR;National University of Singapore", "aff_domain": "nus.edu.sg;nus.edu.sg;i2r.a-star.edu.sg;nus.edu.sg", "position": "PhD student;PhD student;Principal Scientist;Associate Professor", "bibtex": "@inproceedings{\nxu2023model,\ntitle={Model Shapley: Equitable Model Valuation with Black-box Access},\nauthor={Xinyi Xu and Thanh Lam and Chuan-Sheng Foo and Bryan Kian Hsiang Low},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Y6IGTNMdLT}\n}", "github": "", "project": "", "reviewers": "Coej;RTGV;bude;tCDm", "pdf_size": 1194813, "rating": "4;4;6;7", "confidence": "2;3;3;4", "soundness": "3;3;3;3", "novelty": "3;2;2;3", "presentation": "1;3;3;3", "wc_summary": "70;266;43;124", "wc_strengths": "52;218;54;141", "wc_weaknesses": "217;204;182;569", "wc_questions": "63;156;16;189", "wc_limitations": "2;146;10;2", "wc_review": "404;990;305;1025", "wc_reply_reviewers": "356;0;385;93", "wc_reply_authors": "608;59;216;19", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 125.75, 86.06501902631521 ], "wc_strengths_avg": [ 116.25, 68.8635426041966 ], "wc_weaknesses_avg": [ 293.0, 159.8389814782364 ], "wc_questions_avg": [ 106.0, 69.53056881688802 ], "wc_limitations_avg": [ 40.0, 61.28621378417825 ], "wc_review_avg": [ 681.0, 328.60386485858623 ], "wc_reply_reviewers_avg": [ 208.5, 165.6208018335861 ], "wc_reply_authors_avg": [ 225.5, 232.7879936766499 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8121560435291362483&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "nus.edu.sg;nus.edu.sg;i2r.a-star.edu.sg;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "National University of Singapore;Institute for Infocomm Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.i2r.a-star.edu.sg", "aff_unique_abbr": "NUS;I2R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "Y8Jfbqx0bA", "title": "On Consistent Bayesian Inference from Synthetic Data", "track": "main", "status": "Reject", "tldr": "", "abstract": "Generating synthetic data, with or without differential privacy, has attracted significant attention as a potential solution to the dilemma between making data easily available, and the privacy of data subjects. Several works have shown that consistency of downstream analyses from synthetic data, including accurate uncertainty estimation, requires accounting for the synthetic data generation. There are very few methods of doing so, most of them for frequentist analysis. In this paper, we study how to perform consistent Bayesian inference from synthetic data. We prove that mixing posterior samples obtained separately from multiple large synthetic datasets converges to the posterior of the downstream analysis under standard regularity conditions when the analyst's model is compatible with the data provider's model. We show experimentally that this works in practice, unlocking consistent Bayesian inference from synthetic data while reusing existing downstream analysis methods.\n", "keywords": "synthetic data;Bayesian inference;Bernstein-von Mises theorem;differential privacy", "primary_area": "", "supplementary_material": "/attachment/8c6d55f0c99bc25b1fca2e0eb327e00eaa0cf7fd.zip", "author": "Ossi R\u00e4is\u00e4;Joonas J\u00e4lk\u00f6;Antti Honkela", "authorids": "~Ossi_R\u00e4is\u00e41;~Joonas_J\u00e4lk\u00f61;~Antti_Honkela1", "gender": "M;M;M", "homepage": ";;https://www.cs.helsinki.fi/u/ahonkela/", "dblp": "296/0031;188/5963;h/AnttiHonkela", "google_scholar": "https://scholar.google.fi/citations?user=FpmQ-jcAAAAJ;;XsyLs6AAAAAJ", "orcid": ";;0000-0001-9193-8093", "linkedin": "ossi-r%C3%A4is%C3%A4-749502139/;;", "or_profile": "~Ossi_R\u00e4is\u00e41;~Joonas_J\u00e4lk\u00f61;~Antti_Honkela1", "aff": "University of Helsinki;Aalto University;University of Helsinki", "aff_domain": "helsinki.fi;aalto.fi;helsinki.fi", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\nr{\\\"a}is{\\\"a}2023on,\ntitle={On Consistent Bayesian Inference from Synthetic Data},\nauthor={Ossi R{\\\"a}is{\\\"a} and Joonas J{\\\"a}lk{\\\"o} and Antti Honkela},\nyear={2023},\nurl={https://openreview.net/forum?id=Y8Jfbqx0bA}\n}", "github": "", "project": "", "reviewers": "ZbGV;8977;uV6S;z4aQ;4qww", "site": "https://openreview.net/forum?id=Y8Jfbqx0bA", "pdf_size": 538379, "rating": "3;3;5;5;6", "confidence": "2;3;2;1;2", "soundness": "1;2;3;2;3", "novelty": "3;2;3;2;3", "presentation": "1;3;3;3;2", "wc_summary": "194;56;119;59;179", "wc_strengths": "23;29;72;41;20", "wc_weaknesses": "237;2;294;98;331", "wc_questions": "255;195;2;2;3", "wc_limitations": "12;33;2;2;11", "wc_review": "721;315;489;202;544", "wc_reply_reviewers": "0;308;144;16;203", "wc_reply_authors": "0;705;145;0;28", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;2;1;2", "rating_avg": [ 4.4, 1.2 ], "confidence_avg": [ 2.0, 0.6324555320336759 ], "soundness_avg": [ 2.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 121.4, 57.90544015893498 ], "wc_strengths_avg": [ 37.0, 18.920887928424502 ], "wc_weaknesses_avg": [ 192.4, 123.84280358583618 ], "wc_questions_avg": [ 91.4, 110.72235546627428 ], "wc_limitations_avg": [ 12.0, 11.331372379372237 ], "wc_review_avg": [ 454.2, 180.79756635530248 ], "wc_reply_reviewers_avg": [ 134.2, 115.77288110779658 ], "wc_reply_authors_avg": [ 175.6, 270.0582159461178 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5270462766947298, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1528344075201468506&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Helsinki;Aalto University", "aff_unique_dep": ";", "aff_unique_url": "https://www.helsinki.fi;https://www.aalto.fi", "aff_unique_abbr": "UH;Aalto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Finland" }, { "title": "A Unified Algorithm Framework for Unsupervised Discovery of Skills based on Determinantal Point Process", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71311", "id": "Y8p3ThNDmK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d6938c8e88ef62394d2f4f3fd428e036-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Y8p3ThNDmK", "openreview": "https://openreview.net/forum?id=Y8p3ThNDmK", "poster": "/media/PosterPDFs/NeurIPS%202023/71311.png?t=1701375758.1822772", "slides": "https://nips.cc/virtual/2023/poster/71311", "video": "https://nips.cc/virtual/2023/poster/71311", "author_site": "Jiayu Chen, Vaneet Aggarwal, Tian Lan", "tldr": "", "abstract": "Learning rich skills under the option framework without supervision of external rewards is at the frontier of reinforcement learning research. Existing works mainly fall into two distinctive categories: variational option discovery that maximizes the diversity of the options through a mutual information loss (while ignoring coverage) and Laplacian-based methods that focus on improving the coverage of options by increasing connectivity of the state space (while ignoring diversity). In this paper, we show that diversity and coverage in unsupervised option discovery can indeed be unified under the same mathematical framework. To be specific, we explicitly quantify the diversity and coverage of the learned options through a novel use of Determinantal Point Process (DPP) and optimize these objectives to discover options with both superior diversity and coverage. Our proposed algorithm, ODPP, has undergone extensive evaluation on challenging tasks created with Mujoco and Atari. The results demonstrate that our algorithm outperforms state-of-the-art baselines in both diversity- and coverage-driven categories.", "keywords": "Reinforcement Learning;Unsupervised Skill Discovery;Determinantal Point Process;Options", "primary_area": "", "supplementary_material": "/attachment/cb9c7e34c8eca3b9812bf92cb1ad2e459910760e.zip", "author": "Jiayu Chen;Vaneet Aggarwal;Tian Lan", "authorids": "~Jiayu_Chen2;~Vaneet_Aggarwal1;~Tian_Lan4", "gender": "M;M;M", "homepage": "https://agentic-intelligence-lab.org/members/jiayu-chen.html;;https://www2.seas.gwu.edu/~tlan/", "dblp": ";91/6560;", "google_scholar": "k0KJm7kAAAAJ;;", "orcid": ";;", "linkedin": "jiayu-chen-a707b5198/;;", "or_profile": "~Jiayu_Chen2;~Vaneet_Aggarwal1;~Tian_Lan4", "aff": "Purdue University;Purdue University;George Washington University", "aff_domain": "purdue.edu;purdue.edu;gwu.edu", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023a,\ntitle={A Unified Algorithm Framework for Unsupervised Discovery of Skills based on Determinantal Point Process},\nauthor={Jiayu Chen and Vaneet Aggarwal and Tian Lan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Y8p3ThNDmK}\n}", "github": "", "project": "", "reviewers": "6F6e;iqV6;7oxb;vq5Y;4NNK", "pdf_size": 3424556, "rating": "6;6;6;7;7", "confidence": "4;3;3;3;4", "soundness": "3;3;2;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "54;72;83;71;56", "wc_strengths": "89;87;46;92;59", "wc_weaknesses": "357;155;117;248;48", "wc_questions": "393;243;540;0;2", "wc_limitations": "14;1;14;0;1", "wc_review": "907;558;800;411;166", "wc_reply_reviewers": "48;0;58;34;0", "wc_reply_authors": "42;0;49;41;0", "reply_reviewers": "1;0;1;1;0", "reply_authors": "2;1;2;2;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 67.2, 10.833282051160674 ], "wc_strengths_avg": [ 74.6, 18.575252353602092 ], "wc_weaknesses_avg": [ 185.0, 107.56021569335012 ], "wc_questions_avg": [ 235.6, 213.33785411876624 ], "wc_limitations_avg": [ 6.0, 6.54217089351845 ], "wc_review_avg": [ 568.4, 266.4947279028236 ], "wc_reply_reviewers_avg": [ 28.0, 24.099792530227308 ], "wc_reply_authors_avg": [ 26.4, 21.731083728153088 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.16666666666666666, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16074322459931782180&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "purdue.edu;purdue.edu;gwu.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Purdue University;George Washington University", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://www.gwu.edu", "aff_unique_abbr": "Purdue;GWU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Finding Counterfactually Optimal Action Sequences in Continuous State Spaces", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71310", "id": "YDCpf85eXc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/09ae6beae5f1ff38f05c05979097ea0f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YDCpf85eXc", "openreview": "https://openreview.net/forum?id=YDCpf85eXc", "poster": "/media/PosterPDFs/NeurIPS%202023/71310.png?t=1701809322.2374227", "slides": "https://nips.cc/virtual/2023/poster/71310", "video": "https://nips.cc/virtual/2023/poster/71310", "author_site": "Stratis Tsirtsis, Manuel Rodriguez", "tldr": "", "abstract": "Whenever a clinician reflects on the efficacy of a sequence of treatment decisions for a patient, they may try to identify critical time steps where, had they made different decisions, the patient's health would have improved. While recent methods at the intersection of causal inference and reinforcement learning promise to aid human experts, as the clinician above, to *retrospectively* analyze sequential decision making processes, they have focused on environments with finitely many discrete states. However, in many practical applications, the state of the environment is inherently continuous in nature. In this paper, we aim to fill this gap. We start by formally characterizing a sequence of discrete actions and continuous states using finite horizon Markov decision processes and a broad class of bijective structural causal models. Building upon this characterization, we formalize the problem of finding counterfactually optimal action sequences and show that, in general, we cannot expect to solve it in polynomial time. Then, we develop a search method based on the A* algorithm that, under a natural form of Lipschitz continuity of the environment\u2019s dynamics, is guaranteed to return the optimal solution to the problem. Experiments on real clinical data show that our method is very efficient in practice, and it has the potential to offer interesting insights for sequential decision making tasks.", "keywords": "Counterfactual reasoning;Markov decision process;Structural causal model;A* search", "primary_area": "", "supplementary_material": "", "author": "Stratis Tsirtsis;Manuel Gomez Rodriguez", "authorids": "~Stratis_Tsirtsis1;~Manuel_Gomez_Rodriguez1", "gender": ";M", "homepage": "https://stsirtsis.github.io/;https://www.mpi-sws.org/~manuelgr/", "dblp": "258/3593.html;73/8260", "google_scholar": "GaDrqcsAAAAJ;https://scholar.google.com.tw/citations?user=UcuXmuwAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Stratis_Tsirtsis1;~Manuel_Gomez_Rodriguez1", "aff": "Meta Facebook;MPI-SWS", "aff_domain": "meta.com;mpi-sws.org", "position": "Intern;Associate Professor", "bibtex": "@inproceedings{\ntsirtsis2023finding,\ntitle={Finding Counterfactually Optimal Action Sequences in Continuous State Spaces},\nauthor={Stratis Tsirtsis and Manuel Gomez Rodriguez},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YDCpf85eXc}\n}", "github": "", "project": "", "reviewers": "MQT2;rmB3;DTwm;qYbP;R23o", "pdf_size": 1235677, "rating": "5;6;6;7;7", "confidence": "3;4;2;3;3", "soundness": "3;2;1;3;3", "novelty": "2;3;2;3;3", "presentation": "3;2;1;4;4", "wc_summary": "292;59;39;87;68", "wc_strengths": "75;9;26;113;77", "wc_weaknesses": "305;9;14;71;238", "wc_questions": "107;968;265;63;56", "wc_limitations": "46;9;0;9;1", "wc_review": "825;1054;344;343;440", "wc_reply_reviewers": "95;86;25;22;99", "wc_reply_authors": "339;41;17;0;33", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 1.16619037896906 ], "wc_summary_avg": [ 109.0, 92.79439638254026 ], "wc_strengths_avg": [ 60.0, 37.62977544445356 ], "wc_weaknesses_avg": [ 127.4, 121.51806450071528 ], "wc_questions_avg": [ 291.8, 346.42944447607226 ], "wc_limitations_avg": [ 13.0, 16.93517050401324 ], "wc_review_avg": [ 601.2, 287.72167106424223 ], "wc_reply_reviewers_avg": [ 65.4, 34.482459309045815 ], "wc_reply_authors_avg": [ 86.0, 127.27922061357856 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17402608235226723711&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "meta.com;mpi-sws.org", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Meta;Max Planck Institute for Software Systems", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.mpi-sws.org", "aff_unique_abbr": "Meta;MPI-SWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Germany" }, { "title": "$\\textbf{A}^2\\textbf{CiD}^2$: Accelerating Asynchronous Communication in Decentralized Deep Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71309", "id": "YE04aRkeZb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/940f1d0760ca52c8b21ef3b661357ec2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YE04aRkeZb", "openreview": "https://openreview.net/forum?id=YE04aRkeZb", "poster": "/media/PosterPDFs/NeurIPS%202023/71309.png?t=1702007768.7221718", "slides": "https://nips.cc/virtual/2023/poster/71309", "video": "https://nips.cc/virtual/2023/poster/71309", "author_site": "Adel Nabli, Eugene Belilovsky, Edouard Oyallon", "tldr": "", "abstract": "Distributed training of Deep Learning models has been critical to many recent successes in the field. Current standard methods primarily rely on synchronous centralized algorithms which induce major communication bottlenecks and synchronization locks at scale. Decentralized asynchronous algorithms are emerging as a potential alternative but their practical applicability still lags. In order to mitigate the increase in communication cost that naturally comes with scaling the number of workers, we introduce a principled asynchronous, randomized, gossip-based optimization algorithm which works thanks to a continuous local momentum named $\\textbf{A}^2\\textbf{CiD}^2$. Our method allows each worker to continuously process mini-batches without stopping, and run a peer-to-peer averaging routine in parallel, reducing idle time. In addition to inducing a significant communication acceleration at no cost other than adding a local momentum variable, minimal adaptation is required to incorporate $\\textbf{A}^2\\textbf{CiD}^2$ to standard asynchronous approaches. Our theoretical analysis proves accelerated rates compared to previous asynchronous decentralized baselines and we empirically show that using our $\\textbf{A}^2\\textbf{CiD}^2$ momentum significantly decrease communication costs in poorly connected networks. In particular, we show consistent improvement on the ImageNet dataset using up to 64 asynchronous workers (A100 GPUs) and various communication network topologies.", "keywords": "Decentralized Optimization for Deep Learning;Asynchronous Optimization;Distributed Training;Data-Parallel", "primary_area": "", "supplementary_material": "", "author": "Adel Nabli;Eugene Belilovsky;Edouard Oyallon", "authorids": "~Adel_Nabli1;~Eugene_Belilovsky1;~Edouard_Oyallon1", "gender": "M;M;", "homepage": ";http://eugenium.github.io;", "dblp": "269/9664.html;42/11445;", "google_scholar": "bvNfLmMAAAAJ;https://scholar.google.fr/citations?user=CffJDoEAAAAJ;", "orcid": "0000-0003-3180-5445;;", "linkedin": ";;", "or_profile": "~Adel_Nabli1;~Eugene_Belilovsky1;~Edouard_Oyallon1", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Concordia University, Montreal;", "aff_domain": "mila.umontreal.ca;concordia.ca;", "position": "PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nnabli2023textbfatextbfcid,\ntitle={\\${\\textbackslash}textbf\\{A\\}{\\textasciicircum}2{\\textbackslash}textbf\\{CiD\\}{\\textasciicircum}2\\$: Accelerating Asynchronous Communication in Decentralized Deep Learning},\nauthor={Adel Nabli and Eugene Belilovsky and Edouard Oyallon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YE04aRkeZb}\n}", "github": "", "project": "", "reviewers": "fQ1m;LKYk;o6Xd;fkqi", "pdf_size": 1190671, "rating": "4;5;5;5", "confidence": "2;4;4;3", "soundness": "2;2;3;2", "novelty": "2;2;3;2", "presentation": "1;2;2;3", "wc_summary": "74;238;24;35", "wc_strengths": "29;168;25;43", "wc_weaknesses": "92;205;121;227", "wc_questions": "2;134;10;68", "wc_limitations": "1;10;1;42", "wc_review": "198;755;181;415", "wc_reply_reviewers": "29;391;62;0", "wc_reply_authors": "74;456;1082;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;3;4;1", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 92.75, 85.89346599130809 ], "wc_strengths_avg": [ 66.25, 59.12433931977591 ], "wc_weaknesses_avg": [ 161.25, 56.24222168442495 ], "wc_questions_avg": [ 53.5, 52.99764145695542 ], "wc_limitations_avg": [ 13.5, 16.859715300087366 ], "wc_review_avg": [ 387.25, 231.49770517221117 ], "wc_reply_reviewers_avg": [ 120.5, 157.70621420857202 ], "wc_reply_authors_avg": [ 403.0, 428.51487722131657 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "mila.umontreal.ca;concordia.ca;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Montreal;Concordia University", "aff_unique_dep": "Montreal Institute for Learning Algorithms;", "aff_unique_url": "https://www.mila.quebec;https://www.concordia.ca", "aff_unique_abbr": "MILA;Concordia", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Montreal", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Model-Free Active Exploration in Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71308", "id": "YEtstXIpP3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/abbbb25cddb2c2cd08714e6bfa2f0634-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YEtstXIpP3", "openreview": "https://openreview.net/forum?id=YEtstXIpP3", "poster": "/media/PosterPDFs/NeurIPS%202023/71308.png?t=1701676991.347984", "slides": "https://nips.cc/virtual/2023/poster/71308", "video": "https://nips.cc/virtual/2023/poster/71308", "author_site": "Alessio Russo, Alessio Russo, Alexandre Proutiere", "tldr": "", "abstract": "We study the problem of exploration in Reinforcement Learning and present a novel model-free solution. We adopt an information-theoretical viewpoint and start from the instance-specific lower bound of the number of samples that have to be collected to identify a nearly-optimal policy. Deriving this lower bound along with the optimal exploration strategy entails solving an intricate optimization problem and requires a model of the system. In turn, most existing sample optimal exploration algorithms rely on estimating the model. We derive an approximation of the instance-specific lower bound that only involves quantities that can be inferred using model-free approaches. Leveraging this approximation, we devise an ensemble-based model-free exploration strategy applicable to both tabular and continuous Markov decision processes. Numerical results demonstrate that our strategy is able to identify efficient policies faster than state-of-the-art exploration approaches.", "keywords": "reinforcement learning; best policy identification; model free; exploration; sample complexity", "primary_area": "", "supplementary_material": "/attachment/26971ccf489623695ce64666605241b0e5b65ca9.pdf", "author": "Alessio Russo;Alexandre Proutiere", "authorids": "~Alessio_Russo1;~Alexandre_Proutiere1", "gender": ";M", "homepage": ";https://people.kth.se/~alepro/", "dblp": ";p/AlexandreProutiere", "google_scholar": ";g5sya5cAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Alessio_Russo1;~Alexandre_Proutiere1", "aff": ";KTH Royal Institute of Technology, Stockholm, Sweden", "aff_domain": ";kth.se", "position": ";Full Professor", "bibtex": "@inproceedings{\nrusso2023modelfree,\ntitle={Model-Free Active Exploration in Reinforcement Learning},\nauthor={Alessio Russo and Alexandre Proutiere},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YEtstXIpP3}\n}", "github": "", "project": "", "reviewers": "qwpS;M4pn;VjsS;1tWW;KZND", "pdf_size": 1594710, "rating": "5;6;6;7;7", "confidence": "2;2;2;4;3", "soundness": "3;2;3;4;4", "novelty": "2;3;3;4;3", "presentation": "2;2;3;3;3", "wc_summary": "66;39;70;56;45", "wc_strengths": "36;35;49;102;19", "wc_weaknesses": "43;88;127;196;53", "wc_questions": "173;141;185;79;39", "wc_limitations": "56;1;6;32;64", "wc_review": "374;304;437;465;220", "wc_reply_reviewers": "90;117;202;12;0", "wc_reply_authors": "53;108;671;0;0", "reply_reviewers": "1;1;2;1;0", "reply_authors": "2;2;3;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 2.6, 0.8 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 55.2, 11.855800268223145 ], "wc_strengths_avg": [ 48.2, 28.533489096148056 ], "wc_weaknesses_avg": [ 101.4, 55.74441676078421 ], "wc_questions_avg": [ 123.4, 55.96284481689615 ], "wc_limitations_avg": [ 31.8, 25.4432702300628 ], "wc_review_avg": [ 360.0, 89.31517228332486 ], "wc_reply_reviewers_avg": [ 84.2, 73.87665395779644 ], "wc_reply_authors_avg": [ 166.4, 255.4483117971227 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8017837257372731, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5683317157228306053&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": ";kth.se", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "KTH Royal Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kth.se", "aff_unique_abbr": "KTH", "aff_campus_unique_index": "0", "aff_campus_unique": "Stockholm", "aff_country_unique_index": "0", "aff_country_unique": "Sweden" }, { "title": "Inverse Reinforcement Learning with the Average Reward Criterion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71307", "id": "YFSrf8aciU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/da409884a933ecbc4af03338111bf6aa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YFSrf8aciU", "openreview": "https://openreview.net/forum?id=YFSrf8aciU", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71307", "video": "https://nips.cc/virtual/2023/poster/71307", "author_site": "Feiyang Wu, Jingyang Ke, Anqi Wu", "tldr": "", "abstract": "We study the problem of Inverse Reinforcement Learning (IRL) with an average-reward criterion. The goal is to recover an unknown policy and a reward function when the agent only has samples of states and actions from an experienced agent. Previous IRL methods assume that the expert is trained in a discounted environment, and the discount factor is known. This work alleviates this assumption by proposing an average-reward framework with efficient learning algorithms. We develop novel stochastic first-order methods to solve the IRL problem under the average-reward setting, which requires solving an Average-reward Markov Decision Process (AMDP) as a subproblem. To solve the subproblem, we develop a Stochastic Policy Mirror Descent (SPMD) method under general state and action spaces that needs $\\mathcal{O}(1/\\varepsilon)$ steps of gradient computation. Equipped with SPMD, we propose the Inverse Policy Mirror Descent (IPMD) method for solving the IRL problem with a $\\mathcal{O}(1/\\varepsilon^2)$ complexity. To the best of our knowledge, the aforementioned complexity results are new in IRL with the average reward criterion. Finally, we corroborate our analysis with numerical experiments using the MuJoCo benchmark and additional control tasks.", "keywords": "Machine Learning;Reinforcement Learning;Inverse Reinforcement Learning;Markov Decision Process;stochastic optimization;complexity analysis", "primary_area": "", "supplementary_material": "/attachment/a2d0fee28b081e400b883ed74e01417f987f7eed.pdf", "author": "Feiyang Wu;Jingyang Ke;Anqi Wu", "authorids": "~Feiyang_Wu1;~Jingyang_Ke1;~Anqi_Wu3", "gender": "M;M;F", "homepage": "https://www.feiyangwu.com;;https://sites.google.com/view/brainml/home", "dblp": "254/0425;;15/9453", "google_scholar": "hRriuSYAAAAJ;;ptGYJiEAAAAJ", "orcid": ";0000-0002-5094-4814;0000-0002-7866-9455", "linkedin": ";jingyang-ke-622169192/;", "or_profile": "~Feiyang_Wu1;~Jingyang_Ke1;~Anqi_Wu3", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu", "position": "MS student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nwu2023inverse,\ntitle={Inverse Reinforcement Learning with the Average Reward Criterion},\nauthor={Feiyang Wu and Jingyang Ke and Anqi Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YFSrf8aciU}\n}", "github": "", "project": "", "reviewers": "ifE7;DtTS;9xxW;cqFs", "pdf_size": 1235556, "rating": "3;6;6;7", "confidence": "4;3;4;3", "soundness": "4;3;3;3", "novelty": "2;3;3;4", "presentation": "3;3;4;2", "wc_summary": "62;62;108;113", "wc_strengths": "64;49;176;100", "wc_weaknesses": "297;89;206;12", "wc_questions": "129;156;5;327", "wc_limitations": "59;1;5;13", "wc_review": "611;357;500;565", "wc_reply_reviewers": "51;5;5;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 86.25, 24.31434761617099 ], "wc_strengths_avg": [ 97.25, 49.09875252997779 ], "wc_weaknesses_avg": [ 151.0, 108.97935584320545 ], "wc_questions_avg": [ 154.25, 114.84636476615182 ], "wc_limitations_avg": [ 19.5, 23.21098877687032 ], "wc_review_avg": [ 508.25, 95.81590421219225 ], "wc_reply_reviewers_avg": [ 15.25, 20.740961887048538 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6666666666666667, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2345900831396116121&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "gatech.edu;gatech.edu;gatech.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "NICE: NoIse-modulated Consistency rEgularization for Data-Efficient GANs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71306", "id": "YFW6MVGVTn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2c8047bf3ed8ef6905351608d641f02f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YFW6MVGVTn", "openreview": "https://openreview.net/forum?id=YFW6MVGVTn", "poster": "/media/PosterPDFs/NeurIPS%202023/71306.png?t=1701676608.1806765", "slides": "https://nips.cc/virtual/2023/poster/71306", "video": "https://nips.cc/virtual/2023/poster/71306", "author_site": "Yao Ni, Piotr Koniusz", "tldr": "", "abstract": "Generative Adversarial Networks (GANs) are powerful tools for image synthesis. However, they require access to vast amounts of training data, which is often costly and prohibitive. Limited data affects GANs, leading to discriminator overfitting and training instability. In this paper, we present a novel approach called NoIse-modulated Consistency rEgularization (NICE) to overcome these challenges. To this end, we introduce an adaptive multiplicative noise into the discriminator to modulate its latent features. We demonstrate the effectiveness of such a modulation in preventing discriminator overfitting by adaptively reducing the Rademacher complexity of the discriminator. However, this modulation leads to an unintended consequence of increased gradient norm, which can undermine the stability of GAN training. To mitigate this undesirable effect, we impose a constraint on the discriminator, ensuring its consistency for the same inputs under different noise modulations. The constraint effectively penalizes the first and second-order gradients of latent features, enhancing GAN stability. Experimental evidence aligns with our theoretical analysis, demonstrating the reduction of generalization error and gradient penalization of NICE. This substantiates the efficacy of NICE in reducing discriminator overfitting and improving stability of GAN training. NICE achieves state-of-the-art results on CIFAR-10, CIFAR-100, ImageNet and FFHQ datasets when trained with limited data, as well as in low-shot generation tasks.", "keywords": "Image Generation;limited dataset;Generative Adversarial Networks", "primary_area": "", "supplementary_material": "/attachment/3cbfb134a0262b5cda4d9fef7dd636e8b3246764.zip", "author": "Yao Ni;Piotr Koniusz", "authorids": "~Yao_Ni1;~Piotr_Koniusz1", "gender": "M;", "homepage": "https://cecs.anu.edu.au/people/yao-ni;https://www.koniusz.com", "dblp": "222/7928;25/8616", "google_scholar": "oGD-WMQAAAAJ;https://scholar.google.co.uk/citations?user=wZ7-1tUAAAAJ", "orcid": ";0000-0002-6340-5289", "linkedin": "yao-ni-855083221;", "or_profile": "~Yao_Ni1;~Piotr_Koniusz1", "aff": "Australian National University;Data61, CSIRO", "aff_domain": "anu.edu.au;data61.csiro.au", "position": "PhD student;senior research scientist", "bibtex": "@inproceedings{\nni2023nice,\ntitle={{NICE}: NoIse-modulated Consistency rEgularization for Data-Efficient {GAN}s},\nauthor={Yao Ni and Piotr Koniusz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YFW6MVGVTn}\n}", "github": "", "project": "", "reviewers": "LrCf;m7vf;fQTZ;v2y4;ijuK", "pdf_size": 3223603, "rating": "5;6;6;7;8", "confidence": "4;4;3;4;2", "soundness": "3;3;3;3;4", "novelty": "2;2;2;3;3", "presentation": "3;2;3;3;3", "wc_summary": "102;49;73;114;37", "wc_strengths": "81;34;56;164;90", "wc_weaknesses": "139;96;35;184;102", "wc_questions": "60;40;250;42;149", "wc_limitations": "16;18;6;79;31", "wc_review": "398;237;420;583;409", "wc_reply_reviewers": "0;42;0;0;26", "wc_reply_authors": "0;17;0;0;9", "reply_reviewers": "0;1;0;0;1", "reply_authors": "1;2;1;1;2", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 75.0, 29.57701810527897 ], "wc_strengths_avg": [ 85.0, 44.099886621169446 ], "wc_weaknesses_avg": [ 111.2, 49.38582792664309 ], "wc_questions_avg": [ 108.2, 81.398771489501 ], "wc_limitations_avg": [ 30.0, 25.760434778939583 ], "wc_review_avg": [ 409.4, 109.63685511724604 ], "wc_reply_reviewers_avg": [ 13.6, 17.408044117591157 ], "wc_reply_authors_avg": [ 5.2, 6.8527366796047255 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6864064729836441, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10528644540963191467&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "anu.edu.au;data61.csiro.au", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Australian National University;CSIRO", "aff_unique_dep": ";Data61", "aff_unique_url": "https://www.anu.edu.au;https://www.csiro.au", "aff_unique_abbr": "ANU;CSIRO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Australia" }, { "title": "Conformal Prediction Sets for Ordinal Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71305", "id": "YI4bn6aAmz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/029f699912bf3db747fe110948cc6169-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YI4bn6aAmz", "openreview": "https://openreview.net/forum?id=YI4bn6aAmz", "poster": "/media/PosterPDFs/NeurIPS%202023/71305.png?t=1701552430.714059", "slides": "https://nips.cc/virtual/2023/poster/71305", "video": "https://nips.cc/virtual/2023/poster/71305", "author_site": "Prasenjit Dey, Srujana Merugu, Sivaramakrishnan R Kaveri", "tldr": "", "abstract": "Ordinal classification (OC), i.e., labeling instances along classes with a natural ordering, is common in multiple applications such as size or budget based recommendations and disease severity labeling. Often in practical scenarios, it is desirable to obtain a small set of likely classes with a guaranteed high chance of including the true class. Recent works on conformal prediction (CP) address this problem for the classification setting with non-ordered labels but the resulting prediction sets (PS) are often non-contiguous and unsuitable for ordinal classification. In this work, we propose a framework to adapt existing CP methods to generate contiguous sets with guaranteed coverage and minimal cardinality. Our framework employs a novel non-parametric approach for modeling unimodal distributions. Empirical results on both synthetic and real-world datasets demonstrate our method outperforms SOTA baselines by 4% on Accuracy@K and 8% on PS size.", "keywords": "Ordinal Classification;Conformal Predictions;Unimodal modelling", "primary_area": "", "supplementary_material": "/attachment/1f6e2a97dc33f49ea0e9274182f3c5dbb3fba564.pdf", "author": "PRASENJIT DEY;Srujana Merugu;Sivaramakrishnan R Kaveri", "authorids": "~PRASENJIT_DEY2;~Srujana_Merugu2;~Sivaramakrishnan_R_Kaveri1", "gender": ";;M", "homepage": ";;", "dblp": ";;97/5086", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~PRASENJIT_DEY2;~Srujana_Merugu2;~Sivaramakrishnan_R_Kaveri1", "aff": ";;Amazon", "aff_domain": ";;amazon.com", "position": ";;Researcher", "bibtex": "@inproceedings{\ndey2023conformal,\ntitle={Conformal Prediction Sets for Ordinal Classification},\nauthor={PRASENJIT DEY and Srujana Merugu and Sivaramakrishnan R Kaveri},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YI4bn6aAmz}\n}", "github": "", "project": "", "reviewers": "vR8Y;w3W6;Qwu5;ieEd", "pdf_size": 697831, "rating": "5;7;7;7", "confidence": "5;4;3;4", "soundness": "2;3;3;3", "novelty": "1;4;3;3", "presentation": "2;3;3;4", "wc_summary": "103;100;183;117", "wc_strengths": "10;190;162;51", "wc_weaknesses": "139;131;358;5", "wc_questions": "19;40;39;100", "wc_limitations": "1;25;7;1", "wc_review": "272;486;749;274", "wc_reply_reviewers": "149;15;24;38", "wc_reply_authors": "294;0;37;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 125.75, 33.67027620914328 ], "wc_strengths_avg": [ 103.25, 74.8377411470977 ], "wc_weaknesses_avg": [ 158.25, 126.98302051849295 ], "wc_questions_avg": [ 49.5, 30.335622624235025 ], "wc_limitations_avg": [ 8.5, 9.836157786453 ], "wc_review_avg": [ 445.25, 195.74648783566974 ], "wc_reply_reviewers_avg": [ 56.5, 54.03008421240892 ], "wc_reply_authors_avg": [ 82.75, 122.8970605832377 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16917494546256214618&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";;amazon.com", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "A benchmark of categorical encoders for binary classification", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73555", "id": "YJ4ioRbxNb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ac01e21bb14609416760f790dd8966ae-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=YJ4ioRbxNb", "openreview": "https://openreview.net/forum?id=YJ4ioRbxNb", "poster": "/media/PosterPDFs/NeurIPS%202023/73555.png?t=1699521284.38544", "slides": "https://nips.cc/virtual/2023/poster/73555", "video": "https://nips.cc/virtual/2023/poster/73555", "author_site": "Federico Matteucci, Vadim Arzamasov, Klemens B\u00f6hm", "tldr": "", "abstract": "Categorical encoders transform categorical features into numerical representations that are indispensable for a wide range of machine learning models.\nExisting encoder benchmark studies lack generalizability because of their limited choice of (1) encoders, (2) experimental factors, and (3) datasets. \nAdditionally, inconsistencies arise from the adoption of varying aggregation strategies.\nThis paper is the most comprehensive benchmark of categorical encoders to date, including an extensive evaluation of 32 configurations of encoders from diverse families, with 36 combinations of experimental factors, and on 50 datasets.\nThe study shows the profound influence of dataset selection, experimental factors, and aggregation strategies on the benchmark's conclusions~---~aspects disregarded in previous encoder benchmarks.\nOur code is available at \\url{https://github.com/DrCohomology/EncoderBenchmarking}.", "keywords": "categorical data;encoder;benchmark;sensitivity analysis;replicability;generalizability;ranking", "primary_area": "", "supplementary_material": "/attachment/a1100422f0f614746f0f963aeccdc65258759b0c.pdf", "author": "Federico Matteucci;Vadim Arzamasov;Klemens B\u00f6hm", "authorids": "~Federico_Matteucci1;~Vadim_Arzamasov1;~Klemens_B\u00f6hm1", "gender": ";M;", "homepage": "https://github.com/DrCohomology;https://dbis.ipd.kit.edu/722_2410.php;", "dblp": "352/4741;148/6294;b/KBohm", "google_scholar": "https://scholar.google.de/citations?user=X_O8eI0AAAAJ;;", "orcid": "0000-0003-3181-2071;0000-0002-6854-4931;", "linkedin": "federico-matteucci-749ba41a4;vadim-arzamasov-803292b7/?originalSubdomain=de;", "or_profile": "~Federico_Matteucci1;~Vadim_Arzamasov1;~Klemens_B\u00f6hm1", "aff": "Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie", "aff_domain": "kit.edu;kit.edu;kit.edu", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nmatteucci2023a,\ntitle={A benchmark of categorical encoders for binary classification},\nauthor={Federico Matteucci and Vadim Arzamasov and Klemens B{\\\"o}hm},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=YJ4ioRbxNb}\n}", "github": "", "project": "", "reviewers": "QEyt;siZ7;N1YT", "pdf_size": 1757101, "rating": "1;6;7", "confidence": "5;3;4", "wc_summary_and_contributions": "5;24;136", "wc_strengths": "1;40;60", "wc_improvement": "1;38;183", "wc_limitations": "1;8;1", "wc_correctness": "1;1;96", "wc_clarity": "1;8;43", "wc_relation_to_prior_work": "1;1;57", "wc_documentation": "1;1;59", "wc_additional_feedback": "1;1;1", "wc_review": "13;122;636", "wc_reply_reviewers": "0;0;70", "wc_reply_authors": "0;227;859", "reply_reviewers": "0;0;2", "reply_authors": "0;1;2", "rating_avg": [ 4.666666666666667, 2.6246692913372702 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 55.0, 57.798500557252055 ], "wc_strengths_avg": [ 33.666666666666664, 24.499433100017278 ], "wc_improvement_avg": [ 74.0, 78.54085985438832 ], "wc_limitations_avg": [ 3.3333333333333335, 3.2998316455372216 ], "wc_correctness_avg": [ 32.666666666666664, 44.78342947514801 ], "wc_clarity_avg": [ 17.333333333333332, 18.372685039360892 ], "wc_relation_to_prior_work_avg": [ 19.666666666666668, 26.398653164297773 ], "wc_documentation_avg": [ 20.333333333333332, 27.34146220587984 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 257.0, 271.6627811583079 ], "wc_reply_reviewers_avg": [ 23.333333333333332, 32.99831645537222 ], "wc_reply_authors_avg": [ 362.0, 363.4455484204844 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.0, 0.816496580927726 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.777713771047819, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5428142186887535251&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 8, "email": "kit.edu;kit.edu;kit.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Karlsruher Institut f\u00fcr Technologie", "aff_unique_dep": "", "aff_unique_url": "https://www.kit.edu", "aff_unique_abbr": "KIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "ContiFormer: Continuous-Time Transformer for Irregular Time Series Modeling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71304", "id": "YJDz4F2AZu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9328208f88ec69420031647e6ff97727-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YJDz4F2AZu", "openreview": "https://openreview.net/forum?id=YJDz4F2AZu", "poster": "/media/PosterPDFs/NeurIPS%202023/71304.png?t=1701583294.2151349", "slides": "https://nips.cc/virtual/2023/poster/71304", "video": "https://nips.cc/virtual/2023/poster/71304", "author_site": "Yuqi Chen, Kan Ren, Yansen Wang, Yuchen Fang, Weiwei Sun, Dongsheng Li", "tldr": "", "abstract": "Modeling continuous-time dynamics on irregular time series is critical to account for data evolution and correlations that occur continuously. Traditional methods including recurrent neural networks or Transformer models leverage inductive bias via powerful neural architectures to capture complex patterns. However, due to their discrete characteristic, they have limitations in generalizing to continuous-time data paradigms. Though neural ordinary differential equations (Neural ODEs) and their variants have shown promising results in dealing with irregular time series, they often fail to capture the intricate correlations within these sequences. It is challenging yet demanding to concurrently model the relationship between input data points and capture the dynamic changes of the continuous-time system. To tackle this problem, we propose ContiFormer that extends the relation modeling of vanilla Transformer to the continuous-time domain, which explicitly incorporates the modeling abilities of continuous dynamics of Neural ODEs with the attention mechanism of Transformers. We mathematically characterize the expressive power of ContiFormer and illustrate that, by curated designs of function hypothesis, many Transformer variants specialized in irregular time series modeling can be covered as a special case of ContiFormer. A wide range of experiments on both synthetic and real-world datasets have illustrated the superior modeling capacities and prediction performance of ContiFormer on irregular time series data. The project link is https://seqml.github.io/contiformer/.", "keywords": "Irregular Time Series Modeling;Transformer;Neural Ordinary Differential Equation", "primary_area": "", "supplementary_material": "", "author": "Yuqi Chen;Kan Ren;Yansen Wang;Yuchen Fang;Weiwei Sun;Dongsheng Li", "authorids": "~Yuqi_Chen3;~Kan_Ren1;~Yansen_Wang2;~Yuchen_Fang2;~Weiwei_Sun5;~Dongsheng_Li2", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/chenyuqi990215;https://saying.ren;;http://homepage.fudan.edu.cn/wwsun/;http://recmind.cn;", "dblp": "334/4465;28/7458;;;254/0830-2.html;134/7817", "google_scholar": "6NAtJ7cAAAAJ;USnQVWgAAAAJ;https://scholar.google.co.jp/citations?hl=en;;VNg5rA8AAAAJ;Hvbzb1kAAAAJ", "orcid": ";;0000-0002-7882-8698;;0000-0003-3103-8442;", "linkedin": ";;;;;", "or_profile": "~Yuqi_Chen3;~Kan_Ren1;~Yuchen_Fang2;~Weiwei_Sun5;~Dongsheng_Li2;~Yansen_Wang1", "aff": "Fudan University;Microsoft;Shanghai Jiaotong University;Fudan University;Microsoft Research Asia;Microsoft Research Asia", "aff_domain": "fudan.edu.cn;microsoft.com;sjtu.edu.cn;fudan.edu.cn;microsoft.com;microsoft.com", "position": "MS student;Researcher;PhD student;Full Professor;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nchen2023contiformer,\ntitle={ContiFormer: Continuous-Time Transformer for Irregular Time Series Modeling},\nauthor={Yuqi Chen and Kan Ren and Yansen Wang and Yuchen Fang and Weiwei Sun and Dongsheng Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YJDz4F2AZu}\n}", "github": "", "project": "", "reviewers": "jYof;PCsf;v819;2E9G;GCXG", "pdf_size": 1896729, "rating": "5;5;5;6;7", "confidence": "4;4;2;3;3", "soundness": "2;2;1;3;3", "novelty": "3;2;1;3;3", "presentation": "1;3;2;2;3", "wc_summary": "104;65;517;64;164", "wc_strengths": "105;51;24;48;92", "wc_weaknesses": "695;103;8;317;213", "wc_questions": "428;500;8;100;113", "wc_limitations": "99;6;18;32;6", "wc_review": "1431;725;575;561;588", "wc_reply_reviewers": "38;642;123;170;37", "wc_reply_authors": "65;1410;279;473;24", "reply_reviewers": "1;2;1;2;1", "reply_authors": "2;3;2;3;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.2, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 182.8, 171.02210383456287 ], "wc_strengths_avg": [ 64.0, 29.966648127543394 ], "wc_weaknesses_avg": [ 267.2, 237.72959428729104 ], "wc_questions_avg": [ 229.8, 195.9473398645667 ], "wc_limitations_avg": [ 32.2, 34.7470862087744 ], "wc_review_avg": [ 776.0, 332.7449473696032 ], "wc_reply_reviewers_avg": [ 202.0, 225.8255964234347 ], "wc_reply_authors_avg": [ 450.2, 506.1760168162849 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.2004459314343183, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16700218742344989646&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "fudan.edu.cn;microsoft.com;sjtu.edu.cn;fudan.edu.cn;microsoft.com;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;2;0;1;1", "aff_unique_norm": "Fudan University;Microsoft;Shanghai Jiao Tong University", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.microsoft.com;https://www.sjtu.edu.cn", "aff_unique_abbr": "Fudan;Microsoft;SJTU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Connecting Pre-trained Language Model and Downstream Task via Properties of Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71303", "id": "YLOJ4aKAka", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/93712c59f6a81bd92040facf04c8b308-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YLOJ4aKAka", "openreview": "https://openreview.net/forum?id=YLOJ4aKAka", "poster": "/media/PosterPDFs/NeurIPS%202023/71303.png?t=1702262300.6689832", "slides": "https://nips.cc/virtual/2023/poster/71303", "video": "https://nips.cc/virtual/2023/poster/71303", "author_site": "Chenwei Wu, Holden Lee, Rong Ge", "tldr": "", "abstract": "Recently, researchers have found that representations learned by large-scale pre-trained language models are useful in various downstream tasks. However, there is little theoretical understanding of how pre-training performance is related to downstream task performance. In this paper, we analyze how this performance transfer depends on the properties of the downstream task and the structure of the representations. We consider a log-linear model where a word can be predicted from its context through a network having softmax as its last layer. We show that even if the downstream task is highly structured and depends on a simple function of the hidden representation, there are still cases when a low pre-training loss cannot guarantee good performance on the downstream task. On the other hand, we propose and empirically validate the existence of an ``anchor vector'' in the representation space, and show that this assumption, together with properties of the downstream task, guarantees performance transfer.", "keywords": "language model representation;downstream performance;deep learning theory", "primary_area": "", "supplementary_material": "/attachment/31a6858d0c5937a5e1c90e15c56d30c9c6c04060.pdf", "author": "Chenwei Wu;Holden Lee;Rong Ge", "authorids": "~Chenwei_Wu1;~Holden_Lee1;~Rong_Ge1", "gender": "M;M;M", "homepage": "https://users.cs.duke.edu/~cwwu/;http://holdenlee.github.io;https://users.cs.duke.edu/~rongge/", "dblp": "https://dblp.uni-trier.de/pers/hd/w/Wu_0002:Chenwei;150/3407;89/6869-1.html", "google_scholar": "WoB6M2cAAAAJ;hR9rFHgAAAAJ;https://scholar.google.com.tw/citations?user=MVxcjEoAAAAJ", "orcid": "0000-0002-5226-7431;;", "linkedin": "chenwei-wu-22754012b/;;", "or_profile": "~Chenwei_Wu1;~Holden_Lee1;~Rong_Ge1", "aff": "Duke University;Johns Hopkins University;Google (visiting)", "aff_domain": "duke.edu;jh.edu;google.com", "position": "PhD student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nwu2023connecting,\ntitle={Connecting Pre-trained Language Model and Downstream Task via Properties of Representation},\nauthor={Chenwei Wu and Holden Lee and Rong Ge},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YLOJ4aKAka}\n}", "github": "", "project": "", "reviewers": "TyuF;1JJM;joK7;8QyB", "pdf_size": 497327, "rating": "6;6;6;6", "confidence": "2;3;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "192;36;107;128", "wc_strengths": "60;58;57;21", "wc_weaknesses": "217;239;92;191", "wc_questions": "188;2;65;91", "wc_limitations": "10;41;49;1", "wc_review": "667;376;370;432", "wc_reply_reviewers": "79;33;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 115.75, 55.67932740254681 ], "wc_strengths_avg": [ 49.0, 16.20185174601965 ], "wc_weaknesses_avg": [ 184.75, 56.179956390157514 ], "wc_questions_avg": [ 86.5, 66.94213919497942 ], "wc_limitations_avg": [ 25.25, 20.20365066021485 ], "wc_review_avg": [ 461.25, 121.22577077502952 ], "wc_reply_reviewers_avg": [ 28.0, 32.380549717384355 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:b9rxCNIwNwgJ:scholar.google.com/&scioq=Connecting+Pre-trained+Language+Model+and+Downstream+Task+via+Properties+of+Representation&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "duke.edu;jh.edu;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Duke University;Johns Hopkins University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.duke.edu;https://www.jhu.edu;https://www.google.com", "aff_unique_abbr": "Duke;JHU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Which Models have Perceptually-Aligned Gradients? An Explanation via Off-Manifold Robustness", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71302", "id": "YMMlHBSQdC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/42bbe2bfdbbfcadda643e8f89025716c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YMMlHBSQdC", "openreview": "https://openreview.net/forum?id=YMMlHBSQdC", "poster": "/media/PosterPDFs/NeurIPS%202023/71302.png?t=1702420543.0932832", "slides": "https://nips.cc/virtual/2023/poster/71302", "video": "https://nips.cc/virtual/2023/poster/71302", "author_site": "Suraj Srinivas, Sebastian Bordt, Himabindu Lakkaraju", "tldr": "", "abstract": "One of the remarkable properties of robust computer vision models is that their input-gradients are often aligned with human perception, referred to in the literature as perceptually-aligned gradients (PAGs). Despite only being trained for classification, PAGs cause robust models to have rudimentary generative capabilities, including image generation, denoising, and in-painting. However, the underlying mechanisms behind these phenomena remain unknown. In this work, we provide a first explanation of PAGs via \\emph{off-manifold robustness}, which states that models must be more robust off- the data manifold than they are on-manifold. We first demonstrate theoretically that off-manifold robustness leads input gradients to lie approximately on the data manifold, explaining their perceptual alignment. We then show that Bayes optimal models satisfy off-manifold robustness, and confirm the same empirically for robust models trained via gradient norm regularization, randomized smoothing, and adversarial training with projected gradient descent. Quantifying the perceptual alignment of model gradients via their similarity with the gradients of generative models, we show that off-manifold robustness correlates well with perceptual alignment. Finally, based on the levels of on- and off-manifold robustness, we identify three different regimes of robustness that affect both perceptual alignment and model accuracy: weak robustness, bayes-aligned robustness, and excessive robustness. Code is available at https://github.com/tml-tuebingen/pags.", "keywords": "robustness;generative models;perceptually aligned gradients;bayes optimality;manifold hypothesis", "primary_area": "", "supplementary_material": "/attachment/0a0268e84535add108e834964bf9dd80d045bf52.zip", "author": "Suraj Srinivas;Sebastian Bordt;Himabindu Lakkaraju", "authorids": "~Suraj_Srinivas1;~Sebastian_Bordt1;~Himabindu_Lakkaraju1", "gender": "M;;F", "homepage": "https://suraj-srinivas.github.io/;http://www.tml.cs.uni-tuebingen.de/team/bordt/index.php;http://web.stanford.edu/~himalv", "dblp": "144/0584;270/0462;68/9376", "google_scholar": "https://scholar.google.co.in/citations?user=J2JWgKgAAAAJ;https://scholar.google.de/citations?user=6PnL3BgAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Suraj_Srinivas1;~Sebastian_Bordt1;~Hima_Lakkaraju1", "aff": "School of Engineering and Applied Sciences, Harvard University;Max Planck Institute for Intelligent Systems, Max-Planck Institute;Harvard University", "aff_domain": "seas.harvard.edu;tue.mpg.de;harvard.edu", "position": "Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsrinivas2023which,\ntitle={Which Models have Perceptually-Aligned Gradients? An Explanation via Off-Manifold Robustness},\nauthor={Suraj Srinivas and Sebastian Bordt and Himabindu Lakkaraju},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YMMlHBSQdC}\n}", "github": "", "project": "", "reviewers": "avWq;nd2c;aEc6;eaFT;58Dj", "pdf_size": 22589304, "rating": "6;6;7;8;8", "confidence": "3;3;5;5;4", "soundness": "2;3;4;4;4", "novelty": "3;4;3;4;4", "presentation": "3;3;4;4;4", "wc_summary": "168;345;98;59;96", "wc_strengths": "64;198;126;109;194", "wc_weaknesses": "61;70;247;188;253", "wc_questions": "22;88;151;77;182", "wc_limitations": "24;113;7;1;34", "wc_review": "339;814;629;434;759", "wc_reply_reviewers": "13;18;28;11;337", "wc_reply_authors": "0;0;0;0;717", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 7.0, 0.8944271909999159 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 3.4, 0.8 ], "novelty_avg": [ 3.6, 0.4898979485566356 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 153.2, 102.17514374836965 ], "wc_strengths_avg": [ 138.2, 51.374701945607434 ], "wc_weaknesses_avg": [ 163.8, 83.46352496749704 ], "wc_questions_avg": [ 104.0, 56.572077918351205 ], "wc_limitations_avg": [ 35.8, 40.355420949359456 ], "wc_review_avg": [ 595.0, 183.01366069231008 ], "wc_reply_reviewers_avg": [ 81.4, 127.93529614613787 ], "wc_reply_authors_avg": [ 143.4, 286.8 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.75, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15341709415752883850&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "seas.harvard.edu;tue.mpg.de;harvard.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Harvard University;Max Planck Institute for Intelligent Systems", "aff_unique_dep": "School of Engineering and Applied Sciences;Intelligent Systems", "aff_unique_url": "https://www.harvard.edu;https://www.mpi-is.mpg.de", "aff_unique_abbr": "Harvard;MPI-IS", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Germany" }, { "title": "On Measuring Fairness in Generative Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71301", "id": "YOZaej0ZC7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/220165f9c7f51163b73c8c7fff578b4e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YOZaej0ZC7", "openreview": "https://openreview.net/forum?id=YOZaej0ZC7", "poster": "/media/PosterPDFs/NeurIPS%202023/71301.png?t=1702124354.353686", "slides": "https://nips.cc/virtual/2023/poster/71301", "video": "https://nips.cc/virtual/2023/poster/71301", "author_site": "Christopher Teo, Milad Abdollahzadeh, Ngai-Man (Man) Cheung", "tldr": "", "abstract": "Recently, there has been increased interest in fair generative models. In this work,\nwe conduct, for the first time, an in-depth study on fairness measurement, a\ncritical component in gauging progress on fair generative models. We make three\ncontributions. First, we conduct a study that reveals that the existing fairness\nmeasurement framework has considerable measurement errors, even when highly\naccurate sensitive attribute (SA) classifiers are used. These findings cast doubts\non previously reported fairness improvements. Second, to address this issue,\nwe propose CLassifier Error-Aware Measurement (CLEAM), a new framework\nwhich uses a statistical model to account for inaccuracies in SA classifiers. Our\nproposed CLEAM reduces measurement errors significantly, e.g., 4.98%\u21920.62%\nfor StyleGAN2 w.r.t. Gender. Additionally, CLEAM achieves this with minimal\nadditional overhead. Third, we utilize CLEAM to measure fairness in important\ntext-to-image generator and GANs, revealing considerable biases in these models\nthat raise concerns about their applications. Code and more resources: https:\n//sutd-visual-computing-group.github.io/CLEAM/.", "keywords": "Fairness;Generative models;GAN;Calibration", "primary_area": "", "supplementary_material": "/attachment/e9d770afcd8c0bb2c47b6c1605e356493969f650.pdf", "author": "Christopher T.H Teo;Milad Abdollahzadeh;Ngai-man Cheung", "authorids": "~Christopher_T.H_Teo1;~Milad_Abdollahzadeh1;~Ngai-man_Cheung1", "gender": "M;M;M", "homepage": ";;https://sites.google.com/site/mancheung0407/", "dblp": ";211/7797;82/3605", "google_scholar": "JhyGETcAAAAJ;SYDsMNAAAAAJ;https://scholar.google.com.sg/citations?hl=en", "orcid": ";0000-0003-4011-4670;0000-0003-0135-3791", "linkedin": "tthchristopher/;milad-abdollahzadeh-b0764361/;", "or_profile": "~Christopher_T.H_Teo1;~Milad_Abdollahzadeh1;~Ngai-man_Cheung1", "aff": "Singapore University of Technology and Design;Singapore University of Technology and Design;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;sutd.edu.sg;sutd.edu.sg", "position": "PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nteo2023on,\ntitle={On Measuring Fairness in Generative Models},\nauthor={Christopher T.H Teo and Milad Abdollahzadeh and Ngai-man Cheung},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YOZaej0ZC7}\n}", "github": "", "project": "", "reviewers": "uk5v;uNfr;6ZNP;o1Z2;rFFs;2H4p", "pdf_size": 4371956, "rating": "3;5;6;6;6;6", "confidence": "3;2;3;4;4;1", "soundness": "2;3;3;3;3;2", "novelty": "2;3;3;2;3;2", "presentation": "2;2;2;3;2;3", "wc_summary": "84;48;87;104;54;81", "wc_strengths": "12;44;33;49;41;12", "wc_weaknesses": "193;75;264;129;256;106", "wc_questions": "5;6;66;7;214;72", "wc_limitations": "2;13;18;7;16;1", "wc_review": "296;186;468;296;581;272", "wc_reply_reviewers": "0;16;19;22;62;11", "wc_reply_authors": "0;32;29;43;27;32", "reply_reviewers": "0;1;1;1;1;1", "reply_authors": "1;2;2;2;2;2", "rating_avg": [ 5.333333333333333, 1.1055415967851332 ], "confidence_avg": [ 2.8333333333333335, 1.0671873729054748 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 76.33333333333333, 19.413626371414715 ], "wc_strengths_avg": [ 31.833333333333332, 14.803340463857774 ], "wc_weaknesses_avg": [ 170.5, 72.5275809606249 ], "wc_questions_avg": [ 61.666666666666664, 73.74430298146578 ], "wc_limitations_avg": [ 9.5, 6.601767440112787 ], "wc_review_avg": [ 349.8333333333333, 132.976710575783 ], "wc_reply_reviewers_avg": [ 21.666666666666668, 19.362047641943473 ], "wc_reply_authors_avg": [ 27.166666666666668, 13.158225142050462 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.372677996249965 ], "reply_authors_avg": [ 1.8333333333333333, 0.3726779962499649 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.04708816093480113, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=651786041095992533&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "sutd.edu.sg;sutd.edu.sg;sutd.edu.sg", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Singapore University of Technology and Design", "aff_unique_dep": "", "aff_unique_url": "https://www.sutd.edu.sg", "aff_unique_abbr": "SUTD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Spatio-Angular Convolutions for Super-resolution in Diffusion MRI", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71300", "id": "YPHIrNKI0d", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/294de0fa7149adcb88aa3119c239c63e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YPHIrNKI0d", "openreview": "https://openreview.net/forum?id=YPHIrNKI0d", "poster": "/media/PosterPDFs/NeurIPS%202023/71300.png?t=1700230082.9036078", "slides": "https://nips.cc/virtual/2023/poster/71300", "video": "https://nips.cc/virtual/2023/poster/71300", "author_site": "Matthew Lyon, Paul Armitage, Mauricio A \u00c1lvarez", "tldr": "", "abstract": "Diffusion MRI (dMRI) is a widely used imaging modality, but requires long scanning times to acquire high resolution datasets. By leveraging the unique geometry present within this domain, we present a novel approach to dMRI angular super-resolution that extends upon the parametric continuous convolution (PCConv) framework. We introduce several additions to the operation including a Fourier feature mapping, 'global' co-ordinates, and domain specific context. Using this framework, we build a fully parametric continuous convolution network (PCCNN) and compare against existing models. We demonstrate the PCCNN performs competitively while using significantly fewer parameters. Moreover, we show that this formulation generalises well to clinically relevant downstream analyses such as fixel-based analysis, and neurite orientation dispersion and density imaging.", "keywords": "Diffusion MRI;super-resolution;image synthesis;conditional image synthesis;continuous convolution;parametric continuous convolution", "primary_area": "", "supplementary_material": "", "author": "Matthew Lyon;Paul Armitage;Mauricio A \u00c1lvarez", "authorids": "~Matthew_Lyon1;p.armitage@sheffield.ac.uk;~Mauricio_A_\u00c1lvarez1", "gender": "M;;", "homepage": "https://m-lyon.github.io/;;", "dblp": ";;", "google_scholar": "uunJ_HcAAAAJ;;", "orcid": "0000-0001-5705-5051;;", "linkedin": ";;", "or_profile": "~Matthew_Lyon1;p.armitage@sheffield.ac.uk;~Mauricio_A_\u00c1lvarez1", "aff": "University of Manchester;;", "aff_domain": "manchester.ac.uk;;", "position": "PhD student;;", "bibtex": "@inproceedings{\nlyon2023spatioangular,\ntitle={Spatio-Angular Convolutions for Super-resolution in Diffusion {MRI}},\nauthor={Matthew Lyon and Paul Armitage and Mauricio A {\\'A}lvarez},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YPHIrNKI0d}\n}", "github": "", "project": "", "reviewers": "bfmq;Kmgz;Vyq4;bdM9;PJ9o", "pdf_size": 3250234, "rating": "4;5;5;5;6", "confidence": "4;4;4;5;4", "soundness": "3;3;2;2;3", "novelty": "3;2;2;2;3", "presentation": "3;3;2;4;3", "wc_summary": "58;60;25;135;49", "wc_strengths": "22;30;56;68;71", "wc_weaknesses": "77;137;120;939;166", "wc_questions": "63;37;16;35;206", "wc_limitations": "3;26;6;69;66", "wc_review": "223;290;223;1246;558", "wc_reply_reviewers": "204;110;10;224;222", "wc_reply_authors": "313;291;222;1854;505", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;3;2;6;2", "rating_avg": [ 5.0, 0.6324555320336759 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 65.4, 36.957272626642784 ], "wc_strengths_avg": [ 49.4, 19.915822855207363 ], "wc_weaknesses_avg": [ 287.8, 326.87330879103604 ], "wc_questions_avg": [ 71.4, 68.94229471086672 ], "wc_limitations_avg": [ 34.0, 28.488594208911046 ], "wc_review_avg": [ 508.0, 389.13185426022375 ], "wc_reply_reviewers_avg": [ 154.0, 83.30186072351566 ], "wc_reply_authors_avg": [ 637.0, 615.6996020788059 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 3.0, 1.5491933384829668 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5292076736452534351&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "manchester.ac.uk;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Manchester", "aff_unique_dep": "", "aff_unique_url": "https://www.manchester.ac.uk", "aff_unique_abbr": "UoM", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "Harnessing Hard Mixed Samples with Decoupled Regularizer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71299", "id": "YPQg2RTFD8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a5c47c1b7adf19e8dc633812a4acf6d2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YPQg2RTFD8", "openreview": "https://openreview.net/forum?id=YPQg2RTFD8", "poster": "/media/PosterPDFs/NeurIPS%202023/71299.png?t=1703861375.405593", "slides": "https://nips.cc/virtual/2023/poster/71299", "video": "https://nips.cc/virtual/2023/poster/71299", "author_site": "Zicheng Liu, Siyuan Li, Ge Wang, Lirong Wu, Cheng Tan, Stan Z. Li", "tldr": "", "abstract": "Mixup is an efficient data augmentation approach that improves the generalization of neural networks by smoothing the decision boundary with mixed data. Recently, dynamic mixup methods have improved previous \\textit{static} policies effectively (e.g., linear interpolation) by maximizing target-related salient regions in mixed samples, but excessive additional time costs are not acceptable. These additional computational overheads mainly come from optimizing the mixed samples according to the mixed labels. However, we found that the extra optimizing step may be redundant because label-mismatched mixed samples are informative hard mixed samples for deep models to localize discriminative features. In this paper, we thus are not trying to propose a more complicated dynamic mixup policy but rather an efficient mixup objective function with decoupled regularizer, named decoupled mixup (DM). The primary effect is that DM can adaptively utilize those hard mixed samples to mine discriminative features without losing the original smoothness of mixup. As a result, DM enables static mixup methods to achieve comparable or even exceed the performance of dynamic methods without any extra computation. This also leads to an interesting objective design problem for mixup training that we need to focus on both smoothing the decision boundaries and identifying discriminative features. Extensive experiments on supervised and semi-supervised learning benchmarks across seven datasets validate the effectiveness of DM.", "keywords": "mixup;data augmentation;classification;data efficiency", "primary_area": "", "supplementary_material": "/attachment/7125b660fcabc9b5744ea26adbb565ea3231d71d.zip", "author": "Zicheng Liu;Siyuan Li;Ge Wang;Lirong Wu;Cheng Tan;Stan Z. Li", "authorids": "~Zicheng_Liu2;~Siyuan_Li6;~Ge_Wang3;~Lirong_Wu1;~Cheng_Tan1;~Stan_Z._Li2", "gender": "M;M;;;M;M", "homepage": ";https://lupin1998.github.io/;;;https://chengtan9907.github.io/;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": "l/ZichengLiu-6;63/9705-2;34/5591;15/10330;70/1533-12.html;l/StanZLi", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=t9GUEMoAAAAJ;Tk7TrCoAAAAJ;6kTV6aMAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0001-6806-2468;0000-0001-8553-6493;;;", "linkedin": ";https://www.linkedin.cn/incareer/in/siyuan-li-lupin1998/;;;;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Zicheng_Liu2;~Siyuan_Li6;~Ge_Wang3;~Lirong_Wu1;~Cheng_Tan1;~Stan_Z._Li1", "aff": "Zhejiang University;Alibaba Group;WESTLAKE UNIVERSITY;Westlake University;Zhejiang University & Westlake University;Westlake University", "aff_domain": "zju.edu.cn;alibaba-inc.com;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn", "position": "PhD student;Intern;PhD student;PhD student;PhD student;Chair Professor", "bibtex": "@inproceedings{\nliu2023harnessing,\ntitle={Harnessing Hard Mixed Samples with Decoupled Regularizer},\nauthor={Zicheng Liu and Siyuan Li and Ge Wang and Lirong Wu and Cheng Tan and Stan Z. Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YPQg2RTFD8}\n}", "github": "", "project": "", "reviewers": "s9eU;sDcS;y8Fo;JzuZ", "pdf_size": 3666805, "rating": "6;6;7;8", "confidence": "4;4;3;3", "soundness": "3;2;3;4", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "29;70;92;132", "wc_strengths": "20;54;41;108", "wc_weaknesses": "78;178;56;17", "wc_questions": "14;5;2;497", "wc_limitations": "1;7;1;13", "wc_review": "142;314;192;767", "wc_reply_reviewers": "0;20;0;7", "wc_reply_authors": "0;0;0;9", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.75, 37.238253181372514 ], "wc_strengths_avg": [ 55.75, 32.51441987795569 ], "wc_weaknesses_avg": [ 82.25, 59.44062163201189 ], "wc_questions_avg": [ 129.5, 212.2221713205291 ], "wc_limitations_avg": [ 5.5, 4.9749371855331 ], "wc_review_avg": [ 353.75, 246.65601046802 ], "wc_reply_reviewers_avg": [ 6.75, 8.166241485530538 ], "wc_reply_authors_avg": [ 2.25, 3.897114317029974 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17792098731684229684&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "zju.edu.cn;alibaba-inc.com;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;2;0;2", "aff_unique_norm": "Zhejiang University;Alibaba Group;Westlake University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.alibaba.com;https://www.westlake.edu.cn", "aff_unique_abbr": "ZJU;Alibaba;WU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "3D-LLM: Injecting the 3D World into Large Language Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71298", "id": "YQA28p7qNz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/413885e70482b95dcbeeddc1daf39177-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YQA28p7qNz", "openreview": "https://openreview.net/forum?id=YQA28p7qNz", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71298", "video": "https://nips.cc/virtual/2023/poster/71298", "author_site": "Yining Hong, Haoyu Zhen, Peihao Chen, Shuhong Zheng, Yilun Du, Zhenfang Chen, Chuang Gan", "tldr": "", "abstract": "Large language models (LLMs) and Vision-Language Models (VLMs) have been proved to excel at multiple tasks, such as commonsense reasoning. Powerful as these models can be, they are not grounded in the 3D physical world, which involves richer concepts such as spatial relationships, affordances, physics, layout, and so on. In this work, we propose to inject the 3D world into large language models, and introduce a whole new family of 3D-LLMs. Specifically, 3D-LLMs can take 3D point clouds and their features as input and perform a diverse set of 3D-related tasks, including captioning, dense captioning, 3D question answering, task decomposition, 3D\ngrounding, 3D-assisted dialog, navigation, and so on. Using three types of prompting mechanisms that we design, we are able to collect over 300k 3D-language data covering these tasks. To efficiently train 3D-LLMs, we first utilize a 3D feature extractor that obtains 3D features from rendered multi-view images. Then, we use 2D VLMs as our backbones to train our 3D-LLMs. By introducing a 3D localization mechanism, 3D-LLMs could better capture 3D spatial information. Experiments on ScanQA show that our model outperforms state-of-the-art baselines by a large margin (\\textit{e.g.}, the BLEU-1 score surpasses state-of-the-art score by 9\\%). Furthermore, experiments on our held-in datasets for 3D captioning, task composition, and 3D-assisted dialogue show that our model outperforms 2D VLMs. Qualitative examples also show that our model could perform more tasks beyond the scope of existing LLMs and VLMs. Our model and data will be publicly available.", "keywords": "3D Visual Reasoning;3D Large Language Model", "primary_area": "", "supplementary_material": "/attachment/2fec46c28d35ae7877f3154d064392200a6d62e1.pdf", "author": "Yining Hong;Haoyu Zhen;Peihao Chen;Shuhong Zheng;Yilun Du;Zhenfang Chen;Chuang Gan", "authorids": "~Yining_Hong1;~Haoyu_Zhen1;~Peihao_Chen1;~Shuhong_Zheng1;~Yilun_Du1;~Zhenfang_Chen1;~Chuang_Gan1", "gender": "F;M;M;M;;M;M", "homepage": "https://evelinehong.github.io;https://haoyuzhen.com;https://peihaochen.github.io/;https://zsh2000.github.io/;https://yilundu.github.io;https://zfchenunique.github.io;http://people.csail.mit.edu/ganchuang/", "dblp": "245/3655;353/0317;249/8975;197/1289;204/4379;207/5321;139/6993", "google_scholar": "PTYxORcAAAAJ;_btLQY0AAAAJ;KkpEXpsAAAAJ;0XuYAB8AAAAJ;;QSRdIzAAAAAJ;PTeSCbIAAAAJ", "orcid": ";;0000-0002-6847-1621;;;;", "linkedin": ";;;;;\u632f\u65b9-\u9648-512011bb/;", "or_profile": "~Yining_Hong1;~Haoyu_Zhen1;~Peihao_Chen1;~Shuhong_Zheng1;~Yilun_Du1;~Zhenfang_Chen1;~Chuang_Gan1", "aff": "University of California, Los Angeles;Shanghai Jiaotong University;South China University of Technology;University of Illinois Urbana-Champaign;Massachusetts Institute of Technology;MIT-IBM Watson AI lab;MIT-IBM Watson AI Lab", "aff_domain": "cs.ucla.edu;sjtu.edu.cn;scut.edu.cn;illinois.edu;mit.edu;ibm.com;ibm.com", "position": "PhD student;Undergrad student;PhD student;MS student;PhD student;Researcher;PhD student", "bibtex": "@inproceedings{\nhong2023dllm,\ntitle={3D-{LLM}: Injecting the 3D World into Large Language Models},\nauthor={Yining Hong and Haoyu Zhen and Peihao Chen and Shuhong Zheng and Yilun Du and Zhenfang Chen and Chuang Gan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YQA28p7qNz}\n}", "github": "", "project": "", "reviewers": "6ag4;KXNR;QPPK;G3Mn", "pdf_size": 4119888, "rating": "6;6;7;8", "confidence": "4;4;3;4", "soundness": "3;3;2;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "41;50;46;111", "wc_strengths": "40;12;49;49", "wc_weaknesses": "91;237;60;82", "wc_questions": "156;3;62;2", "wc_limitations": "5;3;16;5", "wc_review": "333;305;233;249", "wc_reply_reviewers": "21;47;16;68", "wc_reply_authors": "66;59;0;18", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;1;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 62.0, 28.469281690973517 ], "wc_strengths_avg": [ 37.5, 15.173990905493518 ], "wc_weaknesses_avg": [ 117.5, 69.9088692513332 ], "wc_questions_avg": [ 55.75, 62.77091284982241 ], "wc_limitations_avg": [ 7.25, 5.11737237261468 ], "wc_review_avg": [ 280.0, 40.63249930782009 ], "wc_reply_reviewers_avg": [ 38.0, 20.940391591371924 ], "wc_reply_authors_avg": [ 35.75, 27.60774347895894 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 328, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9113313701189648384&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "cs.ucla.edu;sjtu.edu.cn;scut.edu.cn;illinois.edu;mit.edu;ibm.com;ibm.com", "author_num": 7, "aff_unique_index": "0;1;2;3;4;4;4", "aff_unique_norm": "University of California, Los Angeles;Shanghai Jiao Tong University;South China University of Technology;University of Illinois Urbana-Champaign;Massachusetts Institute of Technology", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ucla.edu;https://www.sjtu.edu.cn;https://www.scut.edu.cn;https://illinois.edu;https://web.mit.edu", "aff_unique_abbr": "UCLA;SJTU;SCUT;UIUC;MIT", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Los Angeles;;Urbana-Champaign", "aff_country_unique_index": "0;1;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Implicit Regularization in Over-Parameterized Support Vector Machine", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71297", "id": "YSFQRVkkl0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/659e07806dc17bd69d0d9aed47f85e7c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YSFQRVkkl0", "openreview": "https://openreview.net/forum?id=YSFQRVkkl0", "poster": "/media/PosterPDFs/NeurIPS%202023/71297.png?t=1699373514.2074761", "slides": "https://nips.cc/virtual/2023/poster/71297", "video": "https://nips.cc/virtual/2023/poster/71297", "author_site": "Yang Sui, Xin HE, Yang Bai", "tldr": "", "abstract": "In this paper, we design a regularization-free algorithm for high-dimensional support vector machines (SVMs) by integrating over-parameterization with Nesterov's smoothing method, and provide theoretical guarantees for the induced implicit regularization phenomenon. In particular, we construct an over-parameterized hinge loss function and estimate the true parameters by leveraging regularization-free gradient descent on this loss function. The utilization of Nesterov's method enhances the computational efficiency of our algorithm, especially in terms of determining the stopping criterion and reducing computational complexity. With appropriate choices of initialization, step size, and smoothness parameter, we demonstrate that unregularized gradient descent achieves a near-oracle statistical convergence rate. Additionally, we verify our theoretical findings through a variety of numerical experiments and compare the proposed method with explicit regularization. Our results illustrate the advantages of employing implicit regularization via gradient descent in conjunction with over-parameterization in sparse SVMs.", "keywords": "Over-parameterization;SVM;Sparsity;Lasso", "primary_area": "", "supplementary_material": "/attachment/dc35fe5b08067e47992492cb9cd63aa372a8d3c9.zip", "author": "Yang Sui;Xin HE;Yang Bai", "authorids": "~Yang_Sui2;~Xin_HE7;~Yang_Bai7", "gender": "M;;", "homepage": "https://github.com/suiyangsoo;https://ssm.sufe.edu.cn/ce/79/c712a118393/page.htm;https://sites.google.com/view/guoqinghe", "dblp": ";;", "google_scholar": ";5TlU9AMAAAAJ;aduqO4EAAAAJ", "orcid": ";0000-0002-4660-4542;", "linkedin": ";;", "or_profile": "~Yang_Sui2;~Yang_Bai7;~Xin_HE6", "aff": "Shanghai University of Finance and Economics;Shanghai University of Finance and Economics;Shanghai University of Finance and Economics", "aff_domain": "shufe.edu.cn;shufe.edu.cn;shufe.edu", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nsui2023implicit,\ntitle={Implicit Regularization in Over-Parameterized Support Vector Machine},\nauthor={Yang Sui and Xin HE and Yang Bai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YSFQRVkkl0}\n}", "github": "", "project": "", "reviewers": "4Xb3;6PiA;S4hZ;i2nR;Z81p", "pdf_size": 1111510, "rating": "5;6;6;6;8", "confidence": "3;5;2;4;3", "soundness": "2;3;4;3;4", "novelty": "2;3;3;3;3", "presentation": "3;2;3;2;4", "wc_summary": "122;91;29;238;117", "wc_strengths": "88;31;32;17;32", "wc_weaknesses": "118;680;99;26;4", "wc_questions": "85;3;5;150;243", "wc_limitations": "21;3;1;10;5", "wc_review": "434;808;166;441;401", "wc_reply_reviewers": "0;309;13;72;22", "wc_reply_authors": "0;1731;30;19;24", "reply_reviewers": "0;2;1;1;1", "reply_authors": "1;6;2;2;2", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 119.4, 67.90758425978647 ], "wc_strengths_avg": [ 40.0, 24.66576574931336 ], "wc_weaknesses_avg": [ 185.4, 250.97856482177917 ], "wc_questions_avg": [ 97.2, 91.17982232928512 ], "wc_limitations_avg": [ 8.0, 7.155417527999327 ], "wc_review_avg": [ 450.0, 205.69783664394723 ], "wc_reply_reviewers_avg": [ 83.2, 115.5048050948531 ], "wc_reply_authors_avg": [ 360.8, 685.1736714147736 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.6, 1.7435595774162693 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.08006407690254354, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Tl526Q3RIJUJ:scholar.google.com/&scioq=Implicit+Regularization+in+Over-Parameterized+Support+Vector+Machine&hl=en&as_sdt=0,48", "gs_version_total": 7, "email": "shufe.edu.cn;shufe.edu.cn;shufe.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai University of Finance and Economics", "aff_unique_dep": "", "aff_unique_url": "http://www.sufe.edu.cn", "aff_unique_abbr": "SUFE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "CELLE-2: Translating Proteins to Pictures and Back with a Bidirectional Text-to-Image Transformer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71296", "id": "YSMLVffl5u", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0fb7c02d420c993385c7de44c2b5bf01-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YSMLVffl5u", "openreview": "https://openreview.net/forum?id=YSMLVffl5u", "poster": "/media/PosterPDFs/NeurIPS%202023/71296.png?t=1702402343.281711", "slides": "https://nips.cc/virtual/2023/poster/71296", "video": "https://nips.cc/virtual/2023/poster/71296", "author_site": "Emaad Khwaja, Yun Song, Aaron Agarunov, Bo Huang", "tldr": "", "abstract": "We present CELL-E 2, a novel bidirectional transformer that can generate images depicting protein subcellular localization from the amino acid sequences (and vice versa). Protein localization is a challenging problem that requires integrating sequence and image information, which most existing methods ignore. CELL-E 2 extends the work of CELL-E, not only capturing the spatial complexity of protein localization and produce probability estimates of localization atop a nucleus image, but also being able to generate sequences from images, enabling de novo protein design. We train and finetune CELL-E 2 on two large-scale datasets of human proteins. We also demonstrate how to use CELL-E 2 to create hundreds of novel nuclear localization signals (NLS). Results and interactive demos are featured at https://bohuanglab.github.io/CELL-E_2/.", "keywords": "text-to-image;protein localization;protein engineering;transformers", "primary_area": "", "supplementary_material": "/attachment/6a39cadba8175053e39b8727e7f84648e3942f57.zip", "author": "Emaad Khwaja;Yun S. Song;Aaron Agarunov;Bo Huang", "authorids": "~Emaad_Khwaja1;~Yun_S._Song1;~Aaron_Agarunov1;~Bo_Huang5", "gender": ";;M;M", "homepage": "https://emaad.org;;https://agarun.com;http://huanglab.ucsf.edu", "dblp": "369/5959;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;", "orcid": ";;;0000-0003-1704-4141", "linkedin": "emaad/;;https://linkedin.com/in/agarunov/;", "or_profile": "~Emaad_Khwaja1;~Yun_S._Song1;~Aaron_Agarunov1;~Bo_Huang5", "aff": "UC Berkeley - UCSF Joint Bioengineering Graduate Program;;Memorial Sloan Kettering Cancer Center;Chan Zuckerberg Biohub - San Francisco", "aff_domain": "berkeley.edu;;mskcc.org;czbiohub.org", "position": "PhD student;;Researcher;Investigator", "bibtex": "@inproceedings{\nkhwaja2023celle,\ntitle={{CELLE}-2: Translating Proteins to Pictures and Back with a Bidirectional Text-to-Image Transformer},\nauthor={Emaad Khwaja and Yun S. Song and Aaron Agarunov and Bo Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YSMLVffl5u}\n}", "github": "", "project": "", "reviewers": "DiCT;pNFc;9W6q;ikCN", "pdf_size": 17220380, "rating": "5;5;7;7", "confidence": "3;3;4;3", "soundness": "2;2;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "102;95;132;63", "wc_strengths": "54;43;161;33", "wc_weaknesses": "87;33;95;64", "wc_questions": "291;148;362;268", "wc_limitations": "55;32;21;8", "wc_review": "589;351;771;436", "wc_reply_reviewers": "79;56;163;7", "wc_reply_authors": "26;14;92;6", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 98.0, 24.525496936861444 ], "wc_strengths_avg": [ 72.75, 51.489683432703295 ], "wc_weaknesses_avg": [ 69.75, 24.076700355322778 ], "wc_questions_avg": [ 267.25, 77.07585549314389 ], "wc_limitations_avg": [ 29.0, 17.24818831066034 ], "wc_review_avg": [ 536.75, 159.88804677023234 ], "wc_reply_reviewers_avg": [ 76.25, 56.433035537706104 ], "wc_reply_authors_avg": [ 34.5, 33.95217224272992 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10906340082736398805&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 8, "email": "berkeley.edu;;mskcc.org;czbiohub.org", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Berkeley;Memorial Sloan Kettering Cancer Center;Chan Zuckerberg Biohub", "aff_unique_dep": "Joint Bioengineering Graduate Program;;", "aff_unique_url": "https://www.berkeley.edu;https://www.mskcc.org;https://www.chanzuckerberg.com/science/biohub", "aff_unique_abbr": "UC Berkeley;MSKCC;", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Berkeley;;San Francisco", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "MoVie: Visual Model-Based Policy Adaptation for View Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71295", "id": "YV1MYtj2AR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/43b77cef2a83a25aa27d3271d209e4fd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YV1MYtj2AR", "openreview": "https://openreview.net/forum?id=YV1MYtj2AR", "poster": "/media/PosterPDFs/NeurIPS%202023/71295.png?t=1697705894.338454", "slides": "https://nips.cc/virtual/2023/poster/71295", "video": "https://nips.cc/virtual/2023/poster/71295", "author_site": "Sizhe Yang, Yanjie Ze, Yanjie Ze, Huazhe Xu", "tldr": "", "abstract": "Visual Reinforcement Learning (RL) agents trained on limited views face significant challenges in generalizing their learned abilities to unseen views. This inherent difficulty is known as the problem of $\\textit{view generalization}$. In this work, we systematically categorize this fundamental problem into four distinct and highly challenging scenarios that closely resemble real-world situations. Subsequently, we propose a straightforward yet effective approach to enable successful adaptation of visual $\\textbf{Mo}$del-based policies for $\\textbf{Vie}$w generalization ($\\textbf{MoVie}$) during test time, without any need for explicit reward signals and any modification during training time. Our method demonstrates substantial advancements across all four scenarios encompassing a total of $\\textbf{18}$ tasks sourced from DMControl, xArm, and Adroit, with a relative improvement of $\\mathbf{33}$%, $\\mathbf{86}$%, and $\\mathbf{152}$% respectively. The superior results highlight the immense potential of our approach for real-world robotics applications. Code and videos are available at https://yangsizhe.github.io/MoVie/.", "keywords": "visual reinforcement learning;visual generalization", "primary_area": "", "supplementary_material": "/attachment/41791080de6bb6163b3017631a90977c0be42c6f.pdf", "author": "Sizhe Yang;Yanjie Ze;Huazhe Xu", "authorids": "~Sizhe_Yang3;~Yanjie_Ze1;~Huazhe_Xu1", "gender": "M;M;M", "homepage": "https://yangsizhe.github.io/;http://yanjieze.com;http://hxu.rocks", "dblp": "351/1712;312/5407;164/9006", "google_scholar": "ue3SjGgAAAAJ;BO_b2O8AAAAJ;t9HPFawAAAAJ", "orcid": ";;", "linkedin": ";yanjie-ze-a71a0a247/;", "or_profile": "~Sizhe_Yang3;~Yanjie_Ze1;~Huazhe_Xu1", "aff": "Shanghai Qi Zhi Institute;Shanghai Jiaotong University;Tsinghua University", "aff_domain": "sqz.ac.cn;sjtu.edu.cn;tsinghua.edu.cn", "position": "Intern;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nyang2023movie,\ntitle={MoVie: Visual Model-Based Policy Adaptation for View Generalization},\nauthor={Sizhe Yang and Yanjie Ze and Huazhe Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YV1MYtj2AR}\n}", "github": "", "project": "", "reviewers": "BF5c;CdSm;E44Y;N1Ux;CADg", "pdf_size": 1341679, "rating": "4;5;6;6;6", "confidence": "3;4;3;4;4", "soundness": "2;2;3;3;3", "novelty": "2;2;2;2;3", "presentation": "2;3;3;4;4", "wc_summary": "34;53;85;145;69", "wc_strengths": "27;36;50;164;29", "wc_weaknesses": "90;45;198;265;133", "wc_questions": "74;210;74;82;3", "wc_limitations": "2;2;6;59;1", "wc_review": "227;346;413;715;235", "wc_reply_reviewers": "0;109;18;29;23", "wc_reply_authors": "46;241;46;8;46", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 77.2, 37.886145224870795 ], "wc_strengths_avg": [ 61.2, 52.02845375369135 ], "wc_weaknesses_avg": [ 146.2, 77.93689755180148 ], "wc_questions_avg": [ 88.6, 67.13449188010586 ], "wc_limitations_avg": [ 14.0, 22.565460332109335 ], "wc_review_avg": [ 387.2, 178.12624736405357 ], "wc_reply_reviewers_avg": [ 35.8, 37.85974115072632 ], "wc_reply_authors_avg": [ 77.4, 83.11341648614861 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8702194818732254776&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "sqz.ac.cn;sjtu.edu.cn;tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Shanghai Qi Zhi Institute;Shanghai Jiao Tong University;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.qz.io;https://www.sjtu.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": ";SJTU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Offline Reinforcement Learning with Differential Privacy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71294", "id": "YVMc3KiWBQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c1aaf7c3f306fe94f77236dc0756d771-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YVMc3KiWBQ", "openreview": "https://openreview.net/forum?id=YVMc3KiWBQ", "poster": "/media/PosterPDFs/NeurIPS%202023/71294.png?t=1701409896.7751698", "slides": "https://nips.cc/virtual/2023/poster/71294", "video": "https://nips.cc/virtual/2023/poster/71294", "author_site": "Dan Qiao, Yu-Xiang Wang", "tldr": "", "abstract": "The offline reinforcement learning (RL) problem is often motivated by the need to learn data-driven decision policies in financial, legal and healthcare applications. However, the learned policy could retain sensitive information of individuals in the training data (e.g., treatment and outcome of patients), thus susceptible to various privacy risks. We design offline RL algorithms with differential privacy guarantees which provably prevent such risks. These algorithms also enjoy strong instance-dependent learning bounds under both tabular and linear Markov Decision Process (MDP) settings. Our theory and simulation suggest that the privacy guarantee comes at (almost) no drop in utility comparing to the non-private counterpart for a medium-size dataset.", "keywords": "Differential privacy;offline reinforcement learning;reinforcement learning theory", "primary_area": "", "supplementary_material": "/attachment/cdce4c38d48a6541f861aa4e079f74f45c30c802.pdf", "author": "Dan Qiao;Yu-Xiang Wang", "authorids": "~Dan_Qiao1;~Yu-Xiang_Wang1", "gender": "M;", "homepage": ";http://www.cs.ucsb.edu/~yuxiangw/publications.html", "dblp": ";62/1637-3.html", "google_scholar": "EyfAUuUAAAAJ;HGNZ1fkAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Dan_Qiao1;~Yu-Xiang_Wang1", "aff": ", University of California, Santa Barbara;UC Santa Barbara", "aff_domain": "cs.ucsb.edu;ucsb.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nqiao2023offline,\ntitle={Offline Reinforcement Learning with Differential Privacy},\nauthor={Dan Qiao and Yu-Xiang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YVMc3KiWBQ}\n}", "github": "", "project": "", "reviewers": "fvW2;2mbC;ytfu;7uyZ;ZbTu", "pdf_size": 668396, "rating": "5;5;7;7;7", "confidence": "2;2;4;3;5", "soundness": "3;3;3;3;3", "novelty": "3;3;3;4;3", "presentation": "3;3;3;3;3", "wc_summary": "73;72;65;109;357", "wc_strengths": "96;46;45;57;304", "wc_weaknesses": "388;63;470;177;67", "wc_questions": "5;20;3;41;2", "wc_limitations": "5;1;1;1;1", "wc_review": "567;202;584;385;731", "wc_reply_reviewers": "0;0;0;21;13", "wc_reply_authors": "0;0;23;19;14", "reply_reviewers": "0;0;0;1;1", "reply_authors": "1;1;2;2;2", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 135.2, 111.95784921120985 ], "wc_strengths_avg": [ 109.6, 98.9557476855185 ], "wc_weaknesses_avg": [ 233.0, 167.2040669361843 ], "wc_questions_avg": [ 14.2, 14.905032707109367 ], "wc_limitations_avg": [ 1.8, 1.6000000000000003 ], "wc_review_avg": [ 493.8, 182.61588101805384 ], "wc_reply_reviewers_avg": [ 6.8, 8.704022058795577 ], "wc_reply_authors_avg": [ 11.2, 9.57914401186244 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.840168050416806, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8837374383297621855&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.ucsb.edu;ucsb.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "ECG-QA: A Comprehensive Question Answering Dataset Combined With Electrocardiogram", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73554", "id": "YWJ7Yi4OtH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d0b67349dd16b83b2cf6167fb4e2be50-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=YWJ7Yi4OtH", "openreview": "https://openreview.net/forum?id=YWJ7Yi4OtH", "poster": "/media/PosterPDFs/NeurIPS%202023/73554.png?t=1701995081.4727674", "slides": "https://nips.cc/virtual/2023/poster/73554", "video": "https://nips.cc/virtual/2023/poster/73554", "author_site": "Jungwoo Oh, Gyubok Lee, Seongsu Bae, Joon-myoung Kwon, Edward Choi", "tldr": "", "abstract": "Question answering (QA) in the field of healthcare has received much attention due to significant advancements in natural language processing. However, existing healthcare QA datasets primarily focus on medical images, clinical notes, or structured electronic health record tables. This leaves the vast potential of combining electrocardiogram (ECG) data with these systems largely untapped. To address this gap, we present ECG-QA, the first QA dataset specifically designed for ECG analysis. The dataset comprises a total of 70 question templates that cover a wide range of clinically relevant ECG topics, each validated by an ECG expert to ensure their clinical utility. As a result, our dataset includes diverse ECG interpretation questions, including those that require a comparative analysis of two different ECGs. In addition, we have conducted numerous experiments to provide valuable insights for future research directions. We believe that ECG-QA will serve as a valuable resource for the development of intelligent QA systems capable of assisting clinicians in ECG interpretations.", "keywords": "healthcare;question answering;electrocardiogram;ECG;ECG QA", "primary_area": "", "supplementary_material": "/attachment/a0bb4f7681eb34622e63609f9de6207a5faac4a9.pdf", "author": "Jungwoo Oh;Gyubok Lee;Seongsu Bae;Joon-myoung Kwon;Edward Choi", "authorids": "~Jungwoo_Oh1;~Gyubok_Lee1;~Seongsu_Bae1;~Joon-myoung_Kwon1;~Edward_Choi1", "gender": "M;M;M;M;M", "homepage": ";https://sites.google.com/view/gyuboklee;;https://scholar.google.co.kr/citations?user=DMd-2NEAAAAJ&hl=ko;http://mp2893.com", "dblp": "18/9560;249/4944;307/5358;;41/3886", "google_scholar": "YlZmoPQAAAAJ;UYzauyYAAAAJ;hJKVzt4AAAAJ;https://scholar.google.co.kr/citations?user=DMd-2NEAAAAJ;GUlGIPkAAAAJ", "orcid": "0000-0002-4804-6150;;;0000-0001-6754-1010;", "linkedin": ";gyubok-lee-104915229;seongsu-bae-17297b180/;;", "or_profile": "~Jungwoo_Oh1;~Gyubok_Lee1;~Seongsu_Bae1;~Joon-myoung_Kwon1;~Edward_Choi1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Research, Microsoft (Asia);Medical AI Co., Ltd.;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;research.microsoft.com;medicalai.com;kaist.ac.kr", "position": "PhD student;PhD student;Intern;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\noh2023ecgqa,\ntitle={{ECG}-{QA}: A Comprehensive Question Answering Dataset Combined With Electrocardiogram},\nauthor={Jungwoo Oh and Gyubok Lee and Seongsu Bae and Joon-myoung Kwon and Edward Choi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=YWJ7Yi4OtH}\n}", "github": "", "project": "", "reviewers": "qtCQ;4ESf;28Uj", "pdf_size": 533176, "rating": "7;7;7", "confidence": "3;3;2", "wc_summary_and_contributions": "61;62;81", "wc_strengths": "82;82;51", "wc_improvement": "36;83;49", "wc_limitations": "44;73;72", "wc_correctness": "9;24;1", "wc_clarity": "10;4;1", "wc_relation_to_prior_work": "8;7;1", "wc_documentation": "8;11;1", "wc_additional_feedback": "1;1;1", "wc_review": "259;347;258", "wc_reply_reviewers": "38;6;7", "wc_reply_authors": "674;806;273", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 68.0, 9.201449161228174 ], "wc_strengths_avg": [ 71.66666666666667, 14.613540144521982 ], "wc_improvement_avg": [ 56.0, 19.8158185969358 ], "wc_limitations_avg": [ 63.0, 13.4412301024373 ], "wc_correctness_avg": [ 11.333333333333334, 9.533566430716728 ], "wc_clarity_avg": [ 5.0, 3.7416573867739413 ], "wc_relation_to_prior_work_avg": [ 5.333333333333333, 3.0912061651652345 ], "wc_documentation_avg": [ 6.666666666666667, 4.189935029992179 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 288.0, 41.72129751897305 ], "wc_reply_reviewers_avg": [ 17.0, 14.854853303438128 ], "wc_reply_authors_avg": [ 584.3333333333334, 226.64558725512885 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8335340307607894481&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "kaist.ac.kr;kaist.ac.kr;research.microsoft.com;medicalai.com;kaist.ac.kr", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Microsoft;Medical AI Co., Ltd.", "aff_unique_dep": ";Research;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.microsoft.com;", "aff_unique_abbr": "KAIST;MS;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "South Korea;China" }, { "title": "Predicting a Protein's Stability under a Million Mutations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71293", "id": "YWSOpYjyG4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f03cb785864596fa5901f1359d23fd81-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YWSOpYjyG4", "openreview": "https://openreview.net/forum?id=YWSOpYjyG4", "poster": "/media/PosterPDFs/NeurIPS%202023/71293.png?t=1701930668.269617", "slides": "https://nips.cc/virtual/2023/poster/71293", "video": "https://nips.cc/virtual/2023/poster/71293", "author_site": "Jeffrey Ouyang-Zhang, Daniel Diaz, Adam Klivans, Philipp Kraehenbuehl", "tldr": "", "abstract": "Stabilizing proteins is a foundational step in protein engineering. However, the evolutionary pressure of all extant proteins makes identifying the scarce number of mutations that will improve thermodynamic stability challenging. \nDeep learning has recently emerged as a powerful tool for identifying promising mutations.\nExisting approaches, however, are computationally expensive, as the number of model inferences scales with the number of mutations queried. \nOur main contribution is a simple, parallel decoding algorithm.\nMutate Everything is capable of predicting the effect of all single and double mutations in one forward pass. \nIt is even versatile enough to predict higher-order mutations with minimal computational overhead.\nWe build Mutate Everything on top of ESM2 and AlphaFold, neither of which were trained to predict thermodynamic stability.\nWe trained on the Mega-Scale cDNA proteolysis dataset and achieved state-of-the-art performance on single and higher-order mutations on S669, ProTherm, and ProteinGym datasets.\nOur code is available at https://github.com/jozhang97/MutateEverything.", "keywords": "stability;proteins;biology;physical", "primary_area": "", "supplementary_material": "", "author": "Jeffrey Ouyang-Zhang;Daniel Jesus Diaz;Adam Klivans;Philipp Kraehenbuehl", "authorids": "~Jeffrey_Ouyang-Zhang1;~Daniel_Jesus_Diaz1;~Adam_Klivans1;~Philipp_Kraehenbuehl1", "gender": "M;M;M;M", "homepage": ";http://www.cs.utexas.edu/~klivans;http://www.philkr.net/;https://jozhang97.github.io", "dblp": ";k/AdamRKlivans;43/7592;229/3558", "google_scholar": "lVD0CNEAAAAJ;;https://scholar.google.com.tw/citations?user=dzOd2hgAAAAJ;KyNwquYAAAAJ", "orcid": "0000-0002-7891-2128;;;0009-0001-4268-3087", "linkedin": "aiproteins/;;;", "or_profile": "~Daniel_Jesus_Diaz1;~Adam_Klivans1;~Philipp_Kraehenbuehl1;~Jeffrey_O_Zhang1", "aff": "University of Texas at Austin;University of Texas, Austin;Apple;University of Texas, Austin", "aff_domain": "utexas.edu;cs.utexas.edu;apple.com;utexas.edu", "position": "PhD student;Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nouyang-zhang2023predicting,\ntitle={Predicting a Protein's Stability under a Million Mutations},\nauthor={Jeffrey Ouyang-Zhang and Daniel Jesus Diaz and Adam Klivans and Philipp Kraehenbuehl},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YWSOpYjyG4}\n}", "github": "", "project": "", "reviewers": "KhLn;xRxg;AB6d;31pU", "pdf_size": 1302571, "rating": "3;5;7;8", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "3;3;4;3", "wc_summary": "44;48;62;208", "wc_strengths": "41;57;44;91", "wc_weaknesses": "129;133;86;100", "wc_questions": "38;354;343;110", "wc_limitations": "24;27;29;12", "wc_review": "276;619;564;521", "wc_reply_reviewers": "0;511;149;15", "wc_reply_authors": "15;914;406;0", "reply_reviewers": "0;3;2;1", "reply_authors": "2;3;2;1", "rating_avg": [ 5.75, 1.920286436967152 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 90.5, 68.16707416341119 ], "wc_strengths_avg": [ 58.25, 19.84155991851447 ], "wc_weaknesses_avg": [ 112.0, 19.685019685029527 ], "wc_questions_avg": [ 211.25, 139.6448620608721 ], "wc_limitations_avg": [ 23.0, 6.59545297913646 ], "wc_review_avg": [ 495.0, 131.1239871266886 ], "wc_reply_reviewers_avg": [ 168.75, 205.93733877080183 ], "wc_reply_authors_avg": [ 333.75, 372.4583030353868 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10235942361255159080&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "utexas.edu;cs.utexas.edu;apple.com;utexas.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Texas at Austin;Apple", "aff_unique_dep": ";Apple Inc.", "aff_unique_url": "https://www.utexas.edu;https://www.apple.com", "aff_unique_abbr": "UT Austin;Apple", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Fixing the NTK: From Neural Network Linearizations to Exact Convex Programs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71292", "id": "YWsPN0EMZr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/055fc19a3ce780b96cff15ffe738c1f1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YWsPN0EMZr", "openreview": "https://openreview.net/forum?id=YWsPN0EMZr", "poster": "/media/PosterPDFs/NeurIPS%202023/71292.png?t=1701652675.033373", "slides": "https://nips.cc/virtual/2023/poster/71292", "video": "https://nips.cc/virtual/2023/poster/71292", "author_site": "Rajat Vadiraj Dwaraknath, Tolga Ergen, Mert Pilanci", "tldr": "", "abstract": "Recently, theoretical analyses of deep neural networks have broadly focused on two directions: 1) Providing insight into neural network training by SGD in the limit of infinite hidden-layer width and infinitesimally small learning rate (also known as gradient flow) via the Neural Tangent Kernel (NTK), and 2) Globally optimizing the regularized training objective via cone-constrained convex reformulations of ReLU networks. The latter research direction also yielded an alternative formulation of the ReLU network, called a gated ReLU network, that is globally optimizable via efficient unconstrained convex programs. In this work, we interpret the convex program for this gated ReLU network as a Multiple Kernel Learning (MKL) model with a weighted data masking feature map and establish a connection to the NTK. Specifically, we show that for a particular choice of mask weights that do not depend on the learning targets, this kernel is equivalent to the NTK of the gated ReLU network on the training data. A consequence of this lack of dependence on the targets is that the NTK cannot perform better than the optimal MKL kernel on the training set. By using iterative reweighting, we improve the weights induced by the NTK to obtain the optimal MKL kernel which is equivalent to the solution of the exact convex reformulation of the gated ReLU network. We also provide several numerical simulations corroborating our theory. Additionally, we provide an analysis of the prediction error of the resulting optimal kernel via consistency results for the group lasso.", "keywords": "neural tangent kernel;NTK;ReLU activations;neural networks;gated ReLU;convex optimization;kernel;multiple kernel learning;MKL;group lasso;iterative reweighting;group norm", "primary_area": "", "supplementary_material": "/attachment/c218f919a5fe456678361186e304b7bf0870c98a.zip", "author": "Rajat Vadiraj Dwaraknath;Tolga Ergen;Mert Pilanci", "authorids": "~Rajat_Vadiraj_Dwaraknath1;~Tolga_Ergen1;~Mert_Pilanci3", "gender": "M;M;M", "homepage": "https://eigentales.com;https://tolgaergen.github.io/;https://stanford.edu/~pilanci/", "dblp": "289/1785;202/7477.html;45/8056", "google_scholar": ";https://scholar.google.com.tr/citations?user=T1pWaCsAAAAJ;aSAS-aAAAAAJ", "orcid": ";0000-0003-4806-0224;", "linkedin": ";;mert-pilanci-ba615743/", "or_profile": "~Rajat_Vadiraj_Dwaraknath1;~Tolga_Ergen1;~Mert_Pilanci3", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ndwaraknath2023fixing,\ntitle={Fixing the {NTK}: From Neural Network Linearizations to Exact Convex Programs},\nauthor={Rajat Vadiraj Dwaraknath and Tolga Ergen and Mert Pilanci},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YWsPN0EMZr}\n}", "github": "", "project": "", "reviewers": "98L1;UwLS;nJAh;Tkhd;NJAL", "pdf_size": 596914, "rating": "4;5;5;7;8", "confidence": "4;1;3;4;4", "soundness": "2;3;3;3;4", "novelty": "2;3;2;3;4", "presentation": "3;3;3;3;4", "wc_summary": "104;41;79;85;83", "wc_strengths": "92;48;72;36;134", "wc_weaknesses": "267;38;315;303;1", "wc_questions": "114;1;64;246;1", "wc_limitations": "2;1;23;67;1", "wc_review": "579;129;553;737;220", "wc_reply_reviewers": "905;0;28;122;0", "wc_reply_authors": "653;0;63;54;0", "reply_reviewers": "3;0;1;1;0", "reply_authors": "4;0;2;2;1", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 78.4, 20.58737477193243 ], "wc_strengths_avg": [ 76.4, 34.696397507522306 ], "wc_weaknesses_avg": [ 184.8, 136.3912020623031 ], "wc_questions_avg": [ 85.2, 90.90082507876373 ], "wc_limitations_avg": [ 18.8, 25.521755425518833 ], "wc_review_avg": [ 443.6, 230.37152601829942 ], "wc_reply_reviewers_avg": [ 211.0, 349.8822659124066 ], "wc_reply_authors_avg": [ 154.0, 250.8840369573162 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 1.8, 1.32664991614216 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3734080224074693, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:s12gb3AzgycJ:scholar.google.com/&scioq=Fixing+the+NTK:+From+Neural+Network+Linearizations+to+Exact+Convex+Programs&hl=en&as_sdt=0,5", "gs_version_total": 9, "email": "stanford.edu;stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "PlanBench: An Extensible Benchmark for Evaluating Large Language Models on Planning and Reasoning about Change", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73553", "id": "YXogl4uQUO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7a92bcdede88c7afd108072faf5485c8-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=YXogl4uQUO", "openreview": "https://openreview.net/forum?id=YXogl4uQUO", "poster": "/media/PosterPDFs/NeurIPS%202023/73553.png?t=1702479714.7337754", "slides": "https://nips.cc/virtual/2023/poster/73553", "video": "https://nips.cc/virtual/2023/poster/73553", "author_site": "Karthik Valmeekam, Matthew Marquez, Alberto Olmo, Sarath Sreedharan, Subbarao Kambhampati", "tldr": "", "abstract": "Generating plans of action, and reasoning about change have long been considered a core competence of intelligent agents. It is thus no surprise that evaluating the planning and reasoning capabilities of large language models (LLMs) has become a hot topic of research. Most claims about LLM planning capabilities are however based on common sense tasks\u2013where it becomes hard to tell whether LLMs are planning or merely retrieving from their vast world knowledge. There is a strong need for systematic and extensible planning benchmarks with sufficient diversity to evaluate whether LLMs have innate planning capabilities. Motivated by this, we propose PlanBench, an extensible benchmark suite based on the kinds of domains used in the automated planning community, especially in the International Planning Competition, to test the capabilities of LLMs in planning or reasoning about actions and change. PlanBench provides sufficient diversity in both the task domains and the specific planning capabilities. Our studies also show that on many critical capabilities\u2013including plan generation\u2013LLM performance falls quite short, even with the SOTA models. PlanBench can thus function as a useful marker of progress of LLMs in planning and reasoning.", "keywords": "Large Language Models;Planning", "primary_area": "", "supplementary_material": "/attachment/9cca51cf6e290a2256fbae946694c77693d3d31c.pdf", "author": "Karthik Valmeekam;Matthew Marquez;Alberto Olmo;Sarath Sreedharan;Subbarao Kambhampati", "authorids": "~Karthik_Valmeekam1;~Matthew_Marquez1;~Alberto_Olmo1;~Sarath_Sreedharan1;~Subbarao_Kambhampati1", "gender": "M;M;;;M", "homepage": ";;;;http://rakaposhi.eas.asu.edu", "dblp": "279/2957;332/1174.html;;162/5110;k/SKambhampati", "google_scholar": "CrYLDt4AAAAJ;NPlGTxwAAAAJ;;;yl3L07sAAAAJ", "orcid": ";0000-0001-9794-8700;;;", "linkedin": ";matthew-m-2661a5a0/;;;", "or_profile": "~Karthik_Valmeekam1;~Matthew_Marquez1;~Alberto_Olmo1;~Sarath_Sreedharan1;~Subbarao_Kambhampati1", "aff": "Arizona State University;Arizona State University;;Colorado State University;Arizona State University", "aff_domain": "asu.edu;asu.edu;;colostate.edu;asu.edu", "position": "PhD student;PhD student;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nvalmeekam2023planbench,\ntitle={PlanBench: An Extensible Benchmark for Evaluating Large Language Models on Planning and Reasoning about Change},\nauthor={Karthik Valmeekam and Matthew Marquez and Alberto Olmo and Sarath Sreedharan and Subbarao Kambhampati},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=YXogl4uQUO}\n}", "github": "", "project": "", "reviewers": "7obx;nh2u;YvuN;vfR7", "pdf_size": 859064, "rating": "6;7;7;8", "confidence": "5;5;3;4", "wc_summary_and_contributions": "54;133;80;65", "wc_strengths": "56;127;111;57", "wc_improvement": "234;186;2;41", "wc_limitations": "1;28;2;2", "wc_correctness": "11;7;269;24", "wc_clarity": "1;5;92;33", "wc_relation_to_prior_work": "9;1;120;11", "wc_documentation": "1;21;4;61", "wc_additional_feedback": "1;1;1;1", "wc_review": "368;509;681;295", "wc_reply_reviewers": "206;17;572;11", "wc_reply_authors": "904;675;804;148", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 83.0, 30.306764921383476 ], "wc_strengths_avg": [ 87.75, 31.75984099456419 ], "wc_improvement_avg": [ 115.75, 96.75322992024607 ], "wc_limitations_avg": [ 8.25, 11.409973707244026 ], "wc_correctness_avg": [ 77.75, 110.59695972313163 ], "wc_clarity_avg": [ 32.75, 36.36189626518397 ], "wc_relation_to_prior_work_avg": [ 35.25, 49.07328703072579 ], "wc_documentation_avg": [ 21.75, 23.909987452945266 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 463.25, 147.38448866824487 ], "wc_reply_reviewers_avg": [ 201.5, 227.8272371776474 ], "wc_reply_authors_avg": [ 632.75, 291.40639577744344 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 191, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9119716657194460531&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "asu.edu;asu.edu;;colostate.edu;asu.edu", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Arizona State University;Colorado State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.asu.edu;https://www.colostate.edu", "aff_unique_abbr": "ASU;CSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Structured Prediction with Stronger Consistency Guarantees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71291", "id": "YZ7ip645Ra", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/927962d8866377a07ee3150d2d691319-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YZ7ip645Ra", "openreview": "https://openreview.net/forum?id=YZ7ip645Ra", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71291", "video": "https://nips.cc/virtual/2023/poster/71291", "author_site": "Anqi Mao, Mehryar Mohri, Yutao Zhong", "tldr": "", "abstract": "We present an extensive study of surrogate losses for structured prediction supported by *$H$-consistency bounds*. These are recently introduced guarantees that are more relevant to learning than Bayes-consistency, since they are not asymptotic and since they take into account the hypothesis set $H$ used. We first show that no non-trivial $H$-consistency bound can be derived for widely used surrogate structured prediction losses. We then define several new families of surrogate losses, including *structured comp-sum losses* and *structured constrained losses*, for which we prove $H$-consistency bounds and thus Bayes-consistency. These loss functions readily lead to new structured prediction algorithms with stronger theoretical guarantees, based on their minimization. We describe efficient algorithms for minimizing several of these surrogate losses, including a new *structured logistic loss*.", "keywords": "structured prediction;consistency;learning theory;natural language processing", "primary_area": "", "supplementary_material": "/attachment/9d1a600a7745e011d6d0a34db5a57703b85c77b1.pdf", "author": "Anqi Mao;Mehryar Mohri;Yutao Zhong", "authorids": "~Anqi_Mao1;~Mehryar_Mohri2;~Yutao_Zhong1", "gender": "F;M;", "homepage": "https://anqi-mao.github.io;https://cs.nyu.edu/~mohri/;", "dblp": "241/6864;03/5448;51/3178-2", "google_scholar": "nkjIZ-oAAAAJ;ktwwLjsAAAAJ;", "orcid": ";;", "linkedin": ";mehryar-mohri-3737b981/;", "or_profile": "~Anqi_Mao1;~Mehryar_Mohri2;~Yutao_Zhong1", "aff": "Courant Institute of Mathematical Sciences, NYU;Google Research;Google", "aff_domain": "cims.nyu.edu;google.com;google.com", "position": "PhD student;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nmao2023structured,\ntitle={Structured Prediction with Stronger Consistency Guarantees},\nauthor={Anqi Mao and Mehryar Mohri and Yutao Zhong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YZ7ip645Ra}\n}", "github": "", "project": "", "reviewers": "wtd4;oyCT;y5Sa;9Sph", "pdf_size": 534170, "rating": "6;7;7;7", "confidence": "3;4;3;2", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "101;44;73;148", "wc_strengths": "228;33;228;42", "wc_weaknesses": "79;18;63;115", "wc_questions": "677;266;39;2", "wc_limitations": "46;15;15;19", "wc_review": "1131;376;418;326", "wc_reply_reviewers": "114;39;18;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 91.5, 38.34383914007569 ], "wc_strengths_avg": [ 132.75, 95.30313478579811 ], "wc_weaknesses_avg": [ 68.75, 34.83084121866711 ], "wc_questions_avg": [ 246.0, 268.5823895939568 ], "wc_limitations_avg": [ 23.75, 12.94942083646987 ], "wc_review_avg": [ 562.75, 329.6918068439069 ], "wc_reply_reviewers_avg": [ 46.75, 39.85834291588149 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17805315449901276245&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cims.nyu.edu;google.com;google.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "New York University;Google", "aff_unique_dep": "Courant Institute of Mathematical Sciences;Google Research", "aff_unique_url": "https://www.courant.nyu.edu;https://research.google", "aff_unique_abbr": "NYU;Google Research", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "New York;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "GAN You See Me? Enhanced Data Reconstruction Attacks against Split Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71290", "id": "YZGWhs1H7F", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ab003a4f85ecb1b7b1514ff539dc7395-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YZGWhs1H7F", "openreview": "https://openreview.net/forum?id=YZGWhs1H7F", "poster": "/media/PosterPDFs/NeurIPS%202023/71290.png?t=1701578853.0009072", "slides": "https://nips.cc/virtual/2023/poster/71290", "video": "https://nips.cc/virtual/2023/poster/71290", "author_site": "Ziang Li, Mengda Yang, Yaxin Liu, Juan Wang, Hongxin Hu, Wenzhe Yi, Xiaoyang Xu", "tldr": "", "abstract": "Split Inference (SI) is an emerging deep learning paradigm that addresses computational constraints on edge devices and preserves data privacy through collaborative edge-cloud approaches. However, SI is vulnerable to Data Reconstruction Attacks (DRA), which aim to reconstruct users' private prediction instances. Existing attack methods suffer from various limitations. Optimization-based DRAs do not leverage public data effectively, while Learning-based DRAs depend heavily on auxiliary data quantity and distribution similarity. Consequently, these approaches yield unsatisfactory attack results and are sensitive to defense mechanisms. To overcome these challenges, we propose a GAN-based LAtent Space Search attack (GLASS) that harnesses abundant prior knowledge from public data using advanced StyleGAN technologies. Additionally, we introduce GLASS++ to enhance reconstruction stability. Our approach represents the first GAN-based DRA against SI, and extensive evaluation across different split points and adversary setups demonstrates its state-of-the-art performance. Moreover, we thoroughly examine seven defense mechanisms, highlighting our method's capability to reveal private information even in the presence of these defenses.", "keywords": "deep learning;split inference;data reconstruction attack", "primary_area": "", "supplementary_material": "/attachment/d54b34378d532778bb8b9d488acf60012077c1f5.pdf", "author": "Ziang Li;Mengda Yang;Yaxin Liu;Juan Wang;Hongxin Hu;Wenzhe Yi;Xiaoyang Xu", "authorids": "~Ziang_Li1;~Mengda_Yang1;yaxin.liu@whu.edu.cn;~Juan_Wang1;~Hongxin_Hu3;~Wenzhe_Yi1;~Xiaoyang_Xu2", "gender": "M;M;;F;M;M;M", "homepage": "https://l1ziang.github.io/;;;https://wangjuanwhu.github.io/publication/;https://people.cs.clemson.edu/~hongxih/;https://academia.yiwenzhe.com/;", "dblp": ";116/8591;;;02/2870;346/0918;59/8279-1.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;https://scholar.google.com.tw/citations?user=fQQXj1oAAAAJ;https://scholar.google.com/citations?hl=en;bWRTGTIAAAAJ", "orcid": "0000-0003-1015-5594;0000-0002-7808-852X;;;0000-0001-8710-247X;0000-0003-1096-2505;0000-0002-2672-9145", "linkedin": ";;;;hongxin-hu-7b22821b/;;", "or_profile": "~Ziang_Li1;~Mengda_Yang1;yaxin.liu@whu.edu.cn;~Juan_Wang1;~Hongxin_Hu3;~Wenzhe_Yi1;~Xiaoyang_Xu2", "aff": "Wuhan University;Wuhan University;;Wuhan University;State University of New York;Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;whu.edu.cn;;whu.edu.cn;buffalo.edu;whu.edu.cn;whu.edu.cn", "position": "PhD student;PhD student;;Full Professor;Associate Professor;MS student;MS student", "bibtex": "@inproceedings{\nli2023gan,\ntitle={{GAN} You See Me? Enhanced Data Reconstruction Attacks against Split Inference},\nauthor={Ziang Li and Mengda Yang and Yaxin Liu and Juan Wang and Hongxin Hu and Wenzhe Yi and Xiaoyang Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YZGWhs1H7F}\n}", "github": "", "project": "", "reviewers": "k9Qp;HnH3;WYrE;BEtd", "pdf_size": 14091662, "rating": "5;5;6;6", "confidence": "3;3;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "70;105;96;41", "wc_strengths": "31;139;40;89", "wc_weaknesses": "214;154;184;133", "wc_questions": "33;36;187;34", "wc_limitations": "15;30;122;3", "wc_review": "363;464;629;300", "wc_reply_reviewers": "32;26;27;0", "wc_reply_authors": "18;40;9;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 78.0, 24.92990172463582 ], "wc_strengths_avg": [ 74.75, 43.16465568031326 ], "wc_weaknesses_avg": [ 171.25, 30.621683493890405 ], "wc_questions_avg": [ 72.5, 66.11542936410532 ], "wc_limitations_avg": [ 42.5, 46.88549882426335 ], "wc_review_avg": [ 439.0, 124.32015122255925 ], "wc_reply_reviewers_avg": [ 21.25, 12.47747971346778 ], "wc_reply_authors_avg": [ 16.75, 14.85555451674558 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3227815690311000243&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "whu.edu.cn;whu.edu.cn;;whu.edu.cn;buffalo.edu;whu.edu.cn;whu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Wuhan University;State University of New York", "aff_unique_dep": ";", "aff_unique_url": "http://www.whu.edu.cn/;https://www.suny.edu", "aff_unique_abbr": "WHU;SUNY", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Contrast, Attend and Diffuse to Decode High-Resolution Images from Brain Activities", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71289", "id": "YZSLDEE0mw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/28dad4a70f748a2980998d3ed0f1b8d2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YZSLDEE0mw", "openreview": "https://openreview.net/forum?id=YZSLDEE0mw", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71289", "video": "https://nips.cc/virtual/2023/poster/71289", "author_site": "Jingyuan Sun, Mingxiao Li, Zijiao Chen, Yunhao Zhang, Shaonan Wang, Marie-Francine Moens", "tldr": "", "abstract": "Decoding visual stimuli from neural responses recorded by functional Magnetic Resonance Imaging (fMRI) presents an intriguing intersection between cognitive neuroscience and machine learning, promising advancements in understanding human visual perception. However, the task is challenging due to the noisy nature of fMRI signals and the intricate pattern of brain visual representations. To mitigate these challenges, we introduce a two-phase fMRI representation learning framework. The first phase pre-trains an fMRI feature learner with a proposed Double-contrastive Mask Auto-encoder to learn denoised representations. The second phase tunes the feature learner to attend to neural activation patterns most informative for visual reconstruction with guidance from an image auto-encoder. The optimized fMRI feature learner then conditions a latent diffusion model to reconstruct image stimuli from brain activities. Experimental results demonstrate our model's superiority in generating high-resolution and semantically accurate images, substantially exceeding previous state-of-the-art methods by 39.34% in the 50-way-top-1 semantic classification accuracy. The code implementations is available at https://github.com/soinx0629/vis_dec_neurips/.", "keywords": "Neural decoding;brain machine interface;mind reader;visual reconstruction;vision decoding", "primary_area": "", "supplementary_material": "/attachment/5d66031654b025bb4c5e07eb87338d4d2adc3d45.zip", "author": "Jingyuan Sun;Mingxiao Li;Zijiao Chen;Yunhao Zhang;Shaonan Wang;Marie-Francine Moens", "authorids": "~Jingyuan_Sun1;~Mingxiao_Li1;~Zijiao_Chen1;~Yunhao_Zhang3;~Shaonan_Wang1;~Marie-Francine_Moens1", "gender": "M;M;F;M;F;F", "homepage": ";https://mingxiao-li.github.io;;https://scholar.google.com/citations?user=IainqjgAAAAJ&hl=zh-CN&oi=sra;https://wangshaonan.github.io/;https://people.cs.kuleuven.be/~sien.moens/", "dblp": ";186/8921-2;;;29/8236;m/MarieFrancineMoens", "google_scholar": "aBnLE_EAAAAJ;https://scholar.google.com/citations?hl=en;gCTUx9oAAAAJ;IainqjgAAAAJ;ydFT-G8AAAAJ;https://scholar.google.com.tw/citations?user=O9hYMUUAAAAJ", "orcid": ";0000-0003-0238-483X;0000-0001-6233-8240;;;0000-0002-3732-9323", "linkedin": ";mingxiao-li-7b9b71180/;;;;marie-francine-moens-8175a56/?originalSubdomain=be", "or_profile": "~Jingyuan_Sun1;~Mingxiao_Li1;~Zijiao_Chen1;~Yunhao_Zhang3;~Shaonan_Wang1;~Marie-Francine_Moens1", "aff": "KU Leuven;;National University of Singaore, National University of Singapore;Institute of Automation, Chinese Academy of Sciences;;KU Leuven, KU Leuven", "aff_domain": "kuleuven.be;;u.nus.edu;ia.ac;;cs.kuleuven.be", "position": "Postdoc;;PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nsun2023contrast,\ntitle={Contrast, Attend and Diffuse to Decode High-Resolution Images from Brain Activities},\nauthor={Jingyuan Sun and Mingxiao Li and Yunhao Zhang and Marie-Francine Moens and Zijiao Chen and Shaonan Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YZSLDEE0mw}\n}", "github": "", "project": "", "reviewers": "ir1U;5Cka;BiRm;zH9U;zrBS", "pdf_size": 3985966, "rating": "5;6;6;6;7", "confidence": "5;4;4;3;4", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;4", "presentation": "3;2;3;3;3", "wc_summary": "47;89;53;107;246", "wc_strengths": "28;46;16;72;51", "wc_weaknesses": "58;74;157;11;63", "wc_questions": "2;479;5;118;54", "wc_limitations": "8;2;24;3;5", "wc_review": "143;690;255;311;419", "wc_reply_reviewers": "0;163;20;233;38", "wc_reply_authors": "0;151;0;1076;0", "reply_reviewers": "0;1;1;2;1", "reply_authors": "1;2;1;3;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 108.4, 72.32039822899208 ], "wc_strengths_avg": [ 42.6, 19.324595726689857 ], "wc_weaknesses_avg": [ 72.6, 47.38185306633754 ], "wc_questions_avg": [ 131.6, 178.72727827614898 ], "wc_limitations_avg": [ 8.4, 8.064738061462382 ], "wc_review_avg": [ 363.6, 185.91998278829527 ], "wc_reply_reviewers_avg": [ 90.8, 91.08106279573157 ], "wc_reply_authors_avg": [ 245.4, 419.39747257226054 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.49999999999999994, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11746941525344363466&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "kuleuven.be;;u.nus.edu;ia.ac;;cs.kuleuven.be", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Katholieke Universiteit Leuven;National University of Singapore;Chinese Academy of Sciences;KU Leuven", "aff_unique_dep": ";;Institute of Automation;", "aff_unique_url": "https://www.kuleuven.be;https://www.nus.edu.sg;http://www.ia.cas.cn;https://www.kuleuven.be", "aff_unique_abbr": "KU Leuven;NUS;CAS;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Belgium;Singapore;China" }, { "id": "Ya8WzNF8oQ", "title": "Beyond Stationarity: Convergence Analysis of Stochastic Softmax Policy Gradient Methods", "track": "main", "status": "Reject", "tldr": "", "abstract": "Markov Decision Processes (MDPs) deliver a formal framework for modeling and solving sequential decision-making problems. In this paper, we make several contributions towards the theoretical understanding of (stochastic) policy gradient methods for MDPs. The focus lies on proving convergence (rates) of softmax policy gradient towards global optima in undiscounted finite-time horizon problems, i.e. $\\gamma=1$, without regularization. Such problems are relevant for instance for optimal stopping or specific supply chain problems. Our estimates must differ significantly from several recent articles that involve powers of $(1-\\gamma)^{-1}$.\n\nThe main contributions are the following. For undiscounted finite-time MDPs we prove asymptotic convergence of policy gradient to a global optimum and derive a convergence rate using a weak Polyak-\\L ojasiewicz (PL) inequality. In each decision epoch, the derived error bound depends linearly on the remaining duration of the MDP. In the second part of the analysis, we quantify the convergence behavior for the stochastic version of policy gradient. The analysis yields complexity bounds for an approximation arbitrarily close to the global optimum with high probability. \n\nAs a by-product, our stochastic gradient arguments prove that the plain vanilla REINFORCE algorithm for softmax policies indeed approximates global optima for sufficiently large batch sizes.", "keywords": "reinforcement learning;policy gradient;stochastic approximation;finite-time MDP", "primary_area": "", "supplementary_material": "/attachment/71a41d7f67de9ea21b07efb17324eb7d32351650.pdf", "author": "Sara Klein;Simon Weissmann;Leif D\u00f6ring", "authorids": "~Sara_Klein1;simon.weissmann@uni-mannheim.de;leif.doering@uni-mannheim.de", "gender": "F;;", "homepage": ";;", "dblp": "359/1702;;", "google_scholar": ";;", "orcid": ";;", "linkedin": "sara-klein-wima/;;", "or_profile": "~Sara_Klein1;simon.weissmann@uni-mannheim.de;leif.doering@uni-mannheim.de", "aff": "Universit\u00e4t Mannheim;;", "aff_domain": "uni-mannheim.de;;", "position": "PhD student;;", "bibtex": "@misc{\nklein2023beyond,\ntitle={Beyond Stationarity: Convergence Analysis of Stochastic Softmax Policy Gradient Methods},\nauthor={Sara Klein and Simon Weissmann and Leif D{\\\"o}ring},\nyear={2023},\nurl={https://openreview.net/forum?id=Ya8WzNF8oQ}\n}", "github": "", "project": "", "reviewers": "h4oF;jcRK;6bDX;3eeX", "site": "https://openreview.net/forum?id=Ya8WzNF8oQ", "pdf_size": 321326, "rating": "3;6;6;7", "confidence": "3;2;4;3", "soundness": "2;3;3;4", "novelty": "1;2;2;3", "presentation": "3;3;3;4", "wc_summary": "112;61;37;68", "wc_strengths": "101;25;53;39", "wc_weaknesses": "540;45;114;88", "wc_questions": "227;2;45;66", "wc_limitations": "12;2;31;39", "wc_review": "992;135;280;300", "wc_reply_reviewers": "808;12;0;16", "wc_reply_authors": "1410;0;0;0", "reply_reviewers": "2;1;0;1", "reply_authors": "4;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 69.5, 27.097047809678457 ], "wc_strengths_avg": [ 54.5, 28.613807855648993 ], "wc_weaknesses_avg": [ 196.75, 199.70149598838762 ], "wc_questions_avg": [ 85.0, 85.16748205741438 ], "wc_limitations_avg": [ 21.0, 14.713938969562161 ], "wc_review_avg": [ 426.75, 332.500657894086 ], "wc_reply_reviewers_avg": [ 209.0, 345.8829281707902 ], "wc_reply_authors_avg": [ 352.5, 610.5479096680292 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7573781758097719815&as_sdt=1005&sciodt=0,4&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Mannheim", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-mannheim.de", "aff_unique_abbr": "UM", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "Toolformer: Language Models Can Teach Themselves to Use Tools", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71288", "id": "Yacmpz84TH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d842425e4bf79ba039352da0f658a906-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Yacmpz84TH", "openreview": "https://openreview.net/forum?id=Yacmpz84TH", "poster": "/media/PosterPDFs/NeurIPS%202023/71288.png?t=1702395710.0671892", "slides": "https://nips.cc/virtual/2023/poster/71288", "video": "https://nips.cc/virtual/2023/poster/71288", "author_site": "Timo Schick, Jane Dwivedi-Yu, Roberto Dessi, Roberta Raileanu, Maria Lomeli, Eric Hambro, Luke Zettlemoyer, Nicola Cancedda, Thomas Scialom", "tldr": "", "abstract": "Language models (LMs) exhibit remarkable abilities to solve new tasks from just a few examples or textual instructions, especially at scale. They also, paradoxically, struggle with basic functionality, such as arithmetic or factual lookup, where much simpler and smaller specialized models excel. In this paper, we show that LMs can teach themselves to *use external tools* via simple APIs and achieve the best of both worlds. We introduce *Toolformer*, a model trained to decide which APIs to call, when to call them, what arguments to pass, and how to best incorporate the results into future token prediction. This is done in a self-supervised way, requiring nothing more than a handful of demonstrations for each API. We incorporate a range of tools, including a calculator, a Q&A system, a search engine, a translation system, and a calendar. Toolformer achieves substantially improved zero-shot performance across a variety of downstream tasks, often competitive with much larger models, without sacrificing its core language modeling abilities.", "keywords": "Language Models;Zero-Shot Learning;Tool Use;APIs", "primary_area": "", "supplementary_material": "/attachment/21bae39b339152aac0f41c417befde081b3b7cdd.pdf", "author": "Timo Schick;Jane Dwivedi-Yu;Roberto Dessi;Roberta Raileanu;Maria Lomeli;Eric Hambro;Luke Zettlemoyer;Nicola Cancedda;Thomas Scialom", "authorids": "~Timo_Schick1;~Jane_Dwivedi-Yu1;~Roberto_Dessi1;~Roberta_Raileanu2;~Maria_Lomeli2;~Eric_Hambro1;~Luke_Zettlemoyer1;ncan@meta.com;~Thomas_Scialom1", "gender": ";;M;;F;M;M;;M", "homepage": "http://timoschick.com;;https://robertodessi.github.io/;;https://mlomeli1.github.io;https://erichambro.com/;https://www.cs.washington.edu/people/faculty/lsz/;;", "dblp": "203/9176;;228/9267.html;;132/9008;290/1986;21/6793;;", "google_scholar": ";;LElX2I4AAAAJ;;8SK2fPAAAAAJ;ehquBPIAAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;;qou4P-kAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;;;;eric-hambro;luke-zettlemoyer-a0109b226/;;", "or_profile": "~Timo_Schick1;~Jane_Dwivedi-Yu1;~Roberto_Dessi1;~Roberta_Raileanu2;~Maria_Lomeli2;~Eric_Hambro1;~Luke_Zettlemoyer1;ncan@meta.com;~Thomas_Scialom1", "aff": "Meta Facebook;;Meta;;Meta;Meta Facebook;Meta;;", "aff_domain": "fb.com;;fb.com;;meta.com;fb.com;meta.com;;", "position": "Researcher;;PhD student;;Researcher;Researcher;Researcher;;", "bibtex": "@inproceedings{\nschick2023toolformer,\ntitle={Toolformer: Language Models Can Teach Themselves to Use Tools},\nauthor={Timo Schick and Jane Dwivedi-Yu and Roberto Dessi and Roberta Raileanu and Maria Lomeli and Eric Hambro and Luke Zettlemoyer and Nicola Cancedda and Thomas Scialom},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Yacmpz84TH}\n}", "github": "", "project": "", "reviewers": "Vx6Q;6zEw;jLhs;dirG", "pdf_size": 471033, "rating": "6;7;7;8", "confidence": "5;4;4;4", "soundness": "4;4;3;4", "novelty": "3;3;3;4", "presentation": "4;3;3;3", "wc_summary": "59;70;191;79", "wc_strengths": "39;116;94;89", "wc_weaknesses": "109;203;134;127", "wc_questions": "1;55;2;87", "wc_limitations": "1;1;2;1", "wc_review": "209;445;423;383", "wc_reply_reviewers": "0;14;6;18", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 99.75, 53.15719612620666 ], "wc_strengths_avg": [ 84.5, 28.164694211015323 ], "wc_weaknesses_avg": [ 143.25, 35.68175303989421 ], "wc_questions_avg": [ 36.25, 36.54705870518173 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 365.0, 92.76852914647294 ], "wc_reply_reviewers_avg": [ 9.5, 6.98212002188447 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 1684, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14693007013214941701&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "fb.com;;fb.com;;meta.com;fb.com;meta.com;;", "author_num": 9, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "BiMatting: Efficient Video Matting via Binarization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71287", "id": "YbYQ0JEQ80", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/86c070ce724102ee876d1935590e111a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YbYQ0JEQ80", "openreview": "https://openreview.net/forum?id=YbYQ0JEQ80", "poster": "/media/PosterPDFs/NeurIPS%202023/71287.png?t=1699439228.028435", "slides": "https://nips.cc/virtual/2023/poster/71287", "video": "https://nips.cc/virtual/2023/poster/71287", "author_site": "Haotong Qin, Lei Ke, Xudong Ma, Martin Danelljan, Yu-Wing Tai, Chi-Keung Tang, Xianglong Liu, Fisher Yu", "tldr": "", "abstract": "Real-time video matting on edge devices faces significant computational resource constraints, limiting the widespread use of video matting in applications such as online conferences and short-form video production. Binarization is a powerful compression approach that greatly reduces computation and memory consumption by using 1-bit parameters and bitwise operations. However, binarization of the video matting model is not a straightforward process, and our empirical analysis has revealed two primary bottlenecks: severe representation degradation of the encoder and massive redundant computations of the decoder. To address these issues, we propose BiMatting, an accurate and efficient video matting model using binarization. Specifically, we construct shrinkable and dense topologies of the binarized encoder block to enhance the extracted representation. We sparsify the binarized units to reduce the low-information decoding computation. Through extensive experiments, we demonstrate that BiMatting outperforms other binarized video matting models, including state-of-the-art (SOTA) binarization methods, by a significant margin. Our approach even performs comparably to the full-precision counterpart in visual quality. Furthermore, BiMatting achieves remarkable savings of 12.4$\\times$ and 21.6$\\times$ in computation and storage, respectively, showcasing its potential and advantages in real-world resource-constrained scenarios. Our code and models are released at https://github.com/htqin/BiMatting .", "keywords": "Video Matting;Model Binarization;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/177749e34bff443aa6c9504651e10d2210601338.zip", "author": "Haotong Qin;Lei Ke;Xudong Ma;Martin Danelljan;Yu-Wing Tai;Chi-Keung Tang;Xianglong Liu;Fisher Yu", "authorids": "~Haotong_Qin1;~Lei_Ke1;~Xudong_Ma3;~Martin_Danelljan4;~Yu-Wing_Tai2;~Chi-Keung_Tang1;~Xianglong_Liu3;~Fisher_Yu2", "gender": "M;M;M;M;M;;;M", "homepage": "https://htqin.github.io/;http://www.kelei.site;https://macaronlin.github.io/;https://martin-danelljan.github.io/;https://yuwingtai.github.io/;;;https://www.yf.io/", "dblp": "262/3626.html;26/5225;19/2951;151/8848;40/566;;;117/6314", "google_scholar": "mK6n-KgAAAAJ;WseeNrUAAAAJ;3_zPktkAAAAJ;NCSSpMkAAAAJ;nFhLmFkAAAAJ;;;-XCiamcAAAAJ", "orcid": ";;;;0000-0002-3148-0380;;;", "linkedin": ";;;;;;;", "or_profile": "~Haotong_Qin1;~Lei_Ke1;~Xudong_Ma3;~Martin_Danelljan4;~Yu-Wing_Tai2;~Chi-Keung_Tang1;~Xianglong_Liu3;~Fisher_Yu2", "aff": "Beihang University;Hong Kong University of Science and Technology;Beihang University;ETH Zurich;Kuaishou Technology;;;Swiss Federal Institute of Technology", "aff_domain": "buaa.edu.cn;ust.hk;buaa.edu.cn;vision.ee.ethz.ch;kuaishou.com;;;ethz.ch", "position": "PhD student;PhD student;PhD student;Principal Researcher;Senior Research Director;;;Assistant Professor", "bibtex": "@inproceedings{\nqin2023bimatting,\ntitle={BiMatting: Efficient Video Matting via Binarization},\nauthor={Haotong Qin and Lei Ke and Xudong Ma and Martin Danelljan and Yu-Wing Tai and Chi-Keung Tang and Xianglong Liu and Fisher Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YbYQ0JEQ80}\n}", "github": "", "project": "", "reviewers": "busB;PTWg;wDXB;N9ej", "pdf_size": 4618618, "rating": "6;6;7;8", "confidence": "4;3;4;5", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "2;3;4;4", "wc_summary": "65;49;60;105", "wc_strengths": "72;93;69;286", "wc_weaknesses": "151;80;49;212", "wc_questions": "2;59;76;37", "wc_limitations": "1;39;26;11", "wc_review": "291;320;280;651", "wc_reply_reviewers": "55;0;0;89", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 69.75, 21.158627082114755 ], "wc_strengths_avg": [ 130.0, 90.54004638832477 ], "wc_weaknesses_avg": [ 123.0, 63.30481814206561 ], "wc_questions_avg": [ 43.5, 27.663152387246107 ], "wc_limitations_avg": [ 19.25, 14.463315664120728 ], "wc_review_avg": [ 385.5, 153.98133003711845 ], "wc_reply_reviewers_avg": [ 36.0, 37.953919428696686 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8027754738503106403&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "buaa.edu.cn;ust.hk;buaa.edu.cn;vision.ee.ethz.ch;kuaishou.com;;;ethz.ch", "author_num": 8, "aff_unique_index": "0;1;0;2;3;4", "aff_unique_norm": "Beihang University;Hong Kong University of Science and Technology;ETH Zurich;Kuaishou Technology;Swiss Federal Institute of Technology", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.ust.hk;https://www.ethz.ch;https://www.kuaishou.com;https://www.ethz.ch", "aff_unique_abbr": "BUAA;HKUST;ETHZ;Kuaishou;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;1;0;1", "aff_country_unique": "China;Switzerland" }, { "title": "Fast and Regret Optimal Best Arm Identification: Fundamental Limits and Low-Complexity Algorithms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71286", "id": "Yc9bqbnrbs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/35fdecdf8861bc15110d48fbec3193cf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Yc9bqbnrbs", "openreview": "https://openreview.net/forum?id=Yc9bqbnrbs", "poster": "/media/PosterPDFs/NeurIPS%202023/71286.png?t=1701284071.80691", "slides": "https://nips.cc/virtual/2023/poster/71286", "video": "https://nips.cc/virtual/2023/poster/71286", "author_site": "Qining Zhang, Lei Ying", "tldr": "", "abstract": "This paper considers a stochastic Multi-Armed Bandit (MAB) problem with dual objectives: (i) quick identification and commitment to the optimal arm, and (ii) reward maximization throughout a sequence of $T$ consecutive rounds. Though each objective has been individually well-studied, i.e., best arm identification for (i) and regret minimization for (ii), the simultaneous realization of both objectives remains an open problem, despite its practical importance. This paper introduces \\emph{Regret Optimal Best Arm Identification} (ROBAI) which aims to achieve these dual objectives. To solve ROBAI with both pre-determined stopping time and adaptive stopping time requirements, we present an algorithm called EOCP and its variants respectively, which not only achieve asymptotic optimal regret in both Gaussian and general bandits, but also commit to the optimal arm in $\\mathcal{O}(\\log T)$ rounds with pre-determined stopping time and $\\mathcal{O}(\\log^2 T)$ rounds with adaptive stopping time. We further characterize lower bounds on the commitment time (equivalent to the sample complexity) of ROBAI, showing that EOCP and its variants are sample optimal with pre-determined stopping time, and almost sample optimal with adaptive stopping time. Numerical results confirm our theoretical analysis and reveal an interesting ``over-exploration'' phenomenon carried by classic UCB algorithms, such that EOCP has smaller regret even though it stops exploration much earlier than UCB, i.e., $\\mathcal{O}(\\log T)$ versus $\\mathcal{O}(T)$, which suggests over-exploration is unnecessary and potentially harmful to system performance.", "keywords": "stochastic multi-armed bandits;regret optimal best arm identification;commitment", "primary_area": "", "supplementary_material": "/attachment/b1cb4cf969ceb09f0b498f8feead36d4ddb61702.zip", "author": "Qining Zhang;Lei Ying", "authorids": "~Qining_Zhang2;~Lei_Ying1", "gender": "M;M", "homepage": "https://thumichzqn.github.io/;http://leiying.engin.umich.edu/", "dblp": "272/6143;27/4818", "google_scholar": "mz_wcnMAAAAJ;7f3HKI8AAAAJ", "orcid": ";", "linkedin": "qiningz/;", "or_profile": "~Qining_Zhang2;~Lei_Ying1", "aff": "University of Michigan - Ann Arbor;University of Michigan, Ann Arbor", "aff_domain": "umich.edu;umich.edu", "position": "PhD student;Professor", "bibtex": "@inproceedings{\nzhang2023fast,\ntitle={Fast and Regret Optimal Best Arm Identification: Fundamental Limits and Low-Complexity Algorithms},\nauthor={Qining Zhang and Lei Ying},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Yc9bqbnrbs}\n}", "github": "", "project": "", "reviewers": "tQKP;T4df;WGiE;SqBM", "pdf_size": 630742, "rating": "3;6;6;7", "confidence": "4;3;3;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "670;168;134;157", "wc_strengths": "92;205;63;99", "wc_weaknesses": "5;440;135;70", "wc_questions": "5;71;2;29", "wc_limitations": "27;2;1;15", "wc_review": "799;886;335;370", "wc_reply_reviewers": "204;195;0;75", "wc_reply_authors": "467;313;64;171", "reply_reviewers": "1;1;0;1", "reply_authors": "5;3;3;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 282.25, 224.20345113311703 ], "wc_strengths_avg": [ 114.75, 53.825528329966254 ], "wc_weaknesses_avg": [ 162.5, 166.67708300783283 ], "wc_questions_avg": [ 26.75, 27.60774347895894 ], "wc_limitations_avg": [ 11.25, 10.638961415476606 ], "wc_review_avg": [ 597.5, 247.23318951953033 ], "wc_reply_reviewers_avg": [ 118.5, 85.28921385497699 ], "wc_reply_authors_avg": [ 253.75, 151.5245442164404 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9622504486493763, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3400149500107843474&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "umich.edu;umich.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Real-Time Motion Prediction via Heterogeneous Polyline Transformer with Relative Pose Encoding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71285", "id": "YcmGuwdLoU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b37c2e26b75ee02fcabd65a2a0367136-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YcmGuwdLoU", "openreview": "https://openreview.net/forum?id=YcmGuwdLoU", "poster": "/media/PosterPDFs/NeurIPS%202023/71285.png?t=1697441066.4265263", "slides": "https://nips.cc/virtual/2023/poster/71285", "video": "https://nips.cc/virtual/2023/poster/71285", "author_site": "Zhejun Zhang, Alexander Liniger, Christos Sakaridis, Fisher Yu, Luc V Gool", "tldr": "", "abstract": "The real-world deployment of an autonomous driving system requires its components to run on-board and in real-time, including the motion prediction module that predicts the future trajectories of surrounding traffic participants. Existing agent-centric methods have demonstrated outstanding performance on public benchmarks. However, they suffer from high computational overhead and poor scalability as the number of agents to be predicted increases. To address this problem, we introduce the K-nearest neighbor attention with relative pose encoding (KNARPE), a novel attention mechanism allowing the pairwise-relative representation to be used by Transformers. Then, based on KNARPE we present the Heterogeneous Polyline Transformer with Relative pose encoding (HPTR), a hierarchical framework enabling asynchronous token update during the online inference. By sharing contexts among agents and reusing the unchanged contexts, our approach is as efficient as scene-centric methods, while performing on par with state-of-the-art agent-centric methods. Experiments on Waymo and Argoverse-2 datasets show that HPTR achieves superior performance among end-to-end methods that do not apply expensive post-processing or model ensembling. The code is available at https://github.com/zhejz/HPTR.", "keywords": "Motion Prediction;Autonomous Driving;Transformer", "primary_area": "", "supplementary_material": "/attachment/a50195cd1394de25f2d169613ac2bdc3956afea4.pdf", "author": "Zhejun Zhang;Alexander Liniger;Christos Sakaridis;Fisher Yu;Luc Van Gool", "authorids": "~Zhejun_Zhang1;~Alexander_Liniger1;~Christos_Sakaridis1;~Fisher_Yu2;~Luc_Van_Gool1", "gender": "M;M;;M;", "homepage": "https://zhejz.github.io/;https://alexliniger.github.io/;https://people.ee.ethz.ch/~csakarid/;https://www.yf.io/;", "dblp": "58/9847;162/5560;188/5858;117/6314;61/5017", "google_scholar": "geTwCUAAAAAJ;amb0lc0AAAAJ;gyF5LmoAAAAJ;-XCiamcAAAAJ;https://scholar.google.be/citations?user=TwMib_QAAAAJ", "orcid": "0000-0002-8785-4644;0000-0002-7858-7900;0000-0003-1127-8887;;", "linkedin": "zhejun-zhang-386aaa161/;alexander-liniger/;;;", "or_profile": "~Zhejun_Zhang1;~Alexander_Liniger1;~Christos_Sakaridis1;~Fisher_Yu2;~Luc_Van_Gool1", "aff": "ETH Zurich;Huawei Technologies Ltd.;ETHZ - ETH Zurich;Swiss Federal Institute of Technology;KU Leuven", "aff_domain": "vision.ee.ethz.ch;huawei.com;ethz.ch;ethz.ch;kuleuven.be", "position": "PhD student;Principal Researcher;Postdoc;Assistant Professor;Emeritus", "bibtex": "@inproceedings{\nzhang2023realtime,\ntitle={Real-Time Motion Prediction via Heterogeneous Polyline Transformer with Relative Pose Encoding},\nauthor={Zhejun Zhang and Alexander Liniger and Christos Sakaridis and Fisher Yu and Luc Van Gool},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YcmGuwdLoU}\n}", "github": "", "project": "", "reviewers": "yHiV;Ccz6;LV25;753q;Gjvi", "pdf_size": 6637287, "rating": "3;4;6;6;6", "confidence": "4;4;4;4;4", "soundness": "3;2;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "1;51;86;76;131", "wc_strengths": "1;119;124;66;85", "wc_weaknesses": "1;176;132;160;94", "wc_questions": "1;43;210;32;189", "wc_limitations": "1;146;13;9;6", "wc_review": "5;535;565;343;505", "wc_reply_reviewers": "0;434;33;23;25", "wc_reply_authors": "0;802;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.0, 1.2649110640673518 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 69.0, 42.731721238442994 ], "wc_strengths_avg": [ 79.0, 44.528642467517464 ], "wc_weaknesses_avg": [ 112.6, 62.35896086369625 ], "wc_questions_avg": [ 95.0, 86.68333173107735 ], "wc_limitations_avg": [ 35.0, 55.63811643109425 ], "wc_review_avg": [ 390.6, 207.51250564725007 ], "wc_reply_reviewers_avg": [ 103.0, 165.8637995464954 ], "wc_reply_authors_avg": [ 160.4, 320.80000000000007 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7313068557053202112&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 12, "email": "vision.ee.ethz.ch;huawei.com;ethz.ch;ethz.ch;kuleuven.be", "author_num": 5, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "ETH Zurich;Huawei;Swiss Federal Institute of Technology;Katholieke Universiteit Leuven", "aff_unique_dep": ";Huawei Technologies;;", "aff_unique_url": "https://www.ethz.ch;https://www.huawei.com;https://www.ethz.ch;https://www.kuleuven.be", "aff_unique_abbr": "ETHZ;Huawei;ETH Zurich;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;2", "aff_country_unique": "Switzerland;China;Belgium" }, { "title": "Learning Trajectories are Generalization Indicators", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71284", "id": "YdfcKb4Wif", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e0da54d3dbc0107692da952358965f5f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YdfcKb4Wif", "openreview": "https://openreview.net/forum?id=YdfcKb4Wif", "poster": "/media/PosterPDFs/NeurIPS%202023/71284.png?t=1698843239.5765424", "slides": "https://nips.cc/virtual/2023/poster/71284", "video": "https://nips.cc/virtual/2023/poster/71284", "author_site": "Jingwen Fu, Zhizheng Zhang, Dacheng Yin, Yan Lu, Nanning Zheng", "tldr": "", "abstract": "This paper explores the connection between learning trajectories of Deep Neural Networks (DNNs) and their generalization capabilities when optimized using (stochastic) gradient descent algorithms. \nInstead of concentrating solely on the generalization error of the DNN post-training, we present a novel perspective for analyzing generalization error by investigating the contribution of each update step to the change in generalization error. This perspective enable a more direct comprehension of how the learning trajectory influences generalization error. Building upon this analysis, we propose a new generalization bound that incorporates more extensive trajectory information.\nOur proposed generalization bound depends on the complexity of learning trajectory and the ratio between the bias and diversity of training set. Experimental observations reveal that our method effectively captures the generalization error throughout the training process. Furthermore, our approach can also track changes in generalization error when adjustments are made to learning rates and label noise levels. These results demonstrate that learning trajectory information is a valuable indicator of a model's generalization capabilities.", "keywords": "Generalization;Learning Trajectory", "primary_area": "", "supplementary_material": "/attachment/ef679b56da0cee5b28e47c5c2932ecaab0f275ff.pdf", "author": "Jingwen Fu;Zhizheng Zhang;Dacheng Yin;Yan Lu;Nanning Zheng", "authorids": "~Jingwen_Fu1;~Zhizheng_Zhang1;~Dacheng_Yin1;~Yan_Lu7;~Nanning_Zheng1", "gender": "M;M;M;M;M", "homepage": "https://www.jw-fu.cn/;;;https://www.microsoft.com/en-us/research/people/yanlu/;", "dblp": "247/5290;67/4758;254/0985;15/4830-1;07/256-1", "google_scholar": ";X7M0I8kAAAAJ;https://scholar.google.com/citations?hl=en;djk5l-4AAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;0000-0001-5383-6424;", "linkedin": ";;;;", "or_profile": "~Jingwen_Fu1;~Zhizheng_Zhang1;~Dacheng_Yin1;~Yan_Lu7;~Nanning_Zheng1", "aff": "Microsoft;Microsoft Research;University of Science and Technology of China;Microsoft Research Asia;Xi'an Jiaotong University", "aff_domain": "microsoft.com;microsoft.com;ustc.edu;microsoft.com;xjtu.edu.cn", "position": "Intern;Senior Researcher;PhD student;Partner Research Manager;Full Professor", "bibtex": "@inproceedings{\nfu2023learning,\ntitle={Learning Trajectories are Generalization Indicators},\nauthor={Jingwen Fu and Zhizheng Zhang and Dacheng Yin and Yan Lu and Nanning Zheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YdfcKb4Wif}\n}", "github": "", "project": "", "reviewers": "t8yd;JRdZ;dakY;tZu2;pZNy", "pdf_size": 832486, "rating": "5;6;6;6;7", "confidence": "3;3;3;4;3", "soundness": "2;3;4;3;4", "novelty": "2;3;3;3;4", "presentation": "3;3;4;3;3", "wc_summary": "57;21;112;56;73", "wc_strengths": "48;36;226;62;172", "wc_weaknesses": "256;23;221;135;139", "wc_questions": "97;2;28;361;29", "wc_limitations": "117;9;14;25;20", "wc_review": "575;91;601;639;433", "wc_reply_reviewers": "14;0;17;13;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 63.8, 29.484911395491764 ], "wc_strengths_avg": [ 108.8, 76.04840563746225 ], "wc_weaknesses_avg": [ 154.8, 80.79207881964666 ], "wc_questions_avg": [ 103.4, 132.59049739706086 ], "wc_limitations_avg": [ 37.0, 40.36334971233185 ], "wc_review_avg": [ 467.8, 200.86453146337212 ], "wc_reply_reviewers_avg": [ 8.8, 7.30479294709987 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9285127451009601465&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 8, "email": "microsoft.com;microsoft.com;ustc.edu;microsoft.com;xjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Microsoft;University of Science and Technology of China;Xi'an Jiao Tong University", "aff_unique_dep": "Microsoft Corporation;;", "aff_unique_url": "https://www.microsoft.com;http://www.ustc.edu.cn;https://www.xjtu.edu.cn", "aff_unique_abbr": "Microsoft;USTC;XJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;1;1", "aff_country_unique": "United States;China" }, { "title": "Evaluating Graph Neural Networks for Link Prediction: Current Pitfalls and New Benchmarking", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73552", "id": "YdjWXrdOTh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0be50b4590f1c5fdf4c8feddd63c4f67-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=YdjWXrdOTh", "openreview": "https://openreview.net/forum?id=YdjWXrdOTh", "poster": "/media/PosterPDFs/NeurIPS%202023/73552.png?t=1697474274.198515", "slides": "https://nips.cc/virtual/2023/poster/73552", "video": "https://nips.cc/virtual/2023/poster/73552", "author_site": "Juanhui Li, Harry Shomer, Haitao Mao, Shenglai Zeng, Yao Ma, Neil Shah, Jiliang Tang, Dawei Yin", "tldr": "", "abstract": "Link prediction attempts to predict whether an unseen edge exists based on only a portion of the graph. A flurry of methods has been created in recent years that attempt to make use of graph neural networks (GNNs) for this task. Furthermore, new and diverse datasets have also been created to better evaluate the effectiveness of these new models. However, multiple limitations currently exist that hinders our ability to properly evaluate these new methods. This includes, but is not limited to: (1) The underreporting of performance on multiple baselines, (2) A lack of a unified data split and evaluation metric on some datasets, (3) An unrealistic evaluation setting that produces negative samples that are easy to classify. To overcome these challenges we first conduct a fair comparison across prominent methods and datasets, utilizing the same dataset settings and hyperparameter settings. We then create a new real-world evaluation setting that samples difficult negative samples via multiple heuristics. The new evaluation setting helps promote new challenges and opportunities in link prediction by aligning the evaluation with real-world situations.", "keywords": "Link Prediction;Graph Neural Networks;New Evaluation", "primary_area": "", "supplementary_material": "/attachment/8c9522a0ba3ebcdd4f7baf008117e5942fb6e2a9.pdf", "author": "Juanhui Li;Harry Shomer;Haitao Mao;Shenglai Zeng;Yao Ma;Neil Shah;Jiliang Tang;Dawei Yin", "authorids": "~Juanhui_Li1;~Harry_Shomer1;~Haitao_Mao1;~Shenglai_Zeng1;~Yao_Ma3;~Neil_Shah2;~Jiliang_Tang1;~Dawei_Yin1", "gender": "F;;M;M;M;M;M;M", "homepage": "https://juanhui28.github.io/;https://www.cse.msu.edu/~shomerha/;;https://yaoma24.github.io/;http://nshah.net;https://www.cse.msu.edu/~tangjili/;https://www.yindawei.com/;", "dblp": "313/9527.html;;;212/7871.html;71/7771;64/10812;;", "google_scholar": "5J0dd-sAAAAJ;_6eE2vsAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;wf9TTOIAAAAJ;Qut69OgAAAAJ;WtzKMWAAAAAJ;GuQ9bpAAAAAJ;3GmlKM4AAAAJ", "orcid": "0000-0003-4909-1778;0000-0001-5081-1870;;;0000-0003-3261-8430;0000-0001-7125-3898;0000-0002-0684-6205;", "linkedin": ";;;;;;dwyin/;", "or_profile": "~Juanhui_Li1;~Harry_Shomer1;~Shenglai_Zeng1;~Yao_Ma3;~Neil_Shah2;~Jiliang_Tang1;~Dawei_Yin1;~Mao_Haitao1", "aff": "Michigan State University;Michigan State University;University of Electronic Science and Technology of China;New Jersey Institute of Technology;Snap Inc.;Michigan State University;Baidu;Michigan State University", "aff_domain": "msu.edu;msu.edu;uestc.edu.cn;njit.edu;snap.com;msu.edu;baidu.com;msu.edu", "position": "PhD student;PhD student;Undergrad student;Assistant Professor;Research Scientist;Full Professor;Principal Researcher;PhD student", "bibtex": "@inproceedings{\nli2023evaluating,\ntitle={Evaluating Graph Neural Networks for Link Prediction: Current Pitfalls and New Benchmarking},\nauthor={Juanhui Li and Harry Shomer and Haitao Mao and Shenglai Zeng and Yao Ma and Neil Shah and Jiliang Tang and Dawei Yin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=YdjWXrdOTh}\n}", "github": "", "project": "", "reviewers": "P7Ue;rGWc;vjFt;dRZC;KDed", "pdf_size": 813888, "rating": "6;6;6;7;8", "confidence": "4;4;3;4;4", "wc_summary_and_contributions": "70;47;90;128;72", "wc_strengths": "146;35;93;40;47", "wc_improvement": "240;131;309;51;69", "wc_limitations": "112;1;2;1;1", "wc_correctness": "23;1;8;1;1", "wc_clarity": "1;1;17;1;1", "wc_relation_to_prior_work": "23;1;22;1;1", "wc_documentation": "4;1;17;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "620;219;559;225;194", "wc_reply_reviewers": "58;97;276;10;0", "wc_reply_authors": "1360;1786;1858;142;520", "reply_reviewers": "2;1;1;1;0", "reply_authors": "3;5;5;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 81.4, 27.00814691903167 ], "wc_strengths_avg": [ 72.2, 42.27244965695743 ], "wc_improvement_avg": [ 160.0, 99.60321279958794 ], "wc_limitations_avg": [ 23.4, 44.301692969908046 ], "wc_correctness_avg": [ 6.8, 8.541662601625049 ], "wc_clarity_avg": [ 4.2, 6.400000000000001 ], "wc_relation_to_prior_work_avg": [ 9.6, 10.537551897855591 ], "wc_documentation_avg": [ 4.8, 6.209669878504008 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 363.4, 185.90599775155184 ], "wc_reply_reviewers_avg": [ 88.2, 100.15268343883753 ], "wc_reply_authors_avg": [ 1133.2, 687.2281717159157 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 3.0, 1.7888543819998317 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.375, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15586714842758777495&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "msu.edu;msu.edu;uestc.edu.cn;njit.edu;snap.com;msu.edu;baidu.com;msu.edu", "author_num": 8, "aff_unique_index": "0;0;1;2;3;0;4;0", "aff_unique_norm": "Michigan State University;University of Electronic Science and Technology of China;New Jersey Institute of Technology;Snap Inc.;Baidu", "aff_unique_dep": ";;;;Baidu, Inc.", "aff_unique_url": "https://www.msu.edu;https://www.uestc.edu.cn;https://www.njit.edu;https://www.snapinc.com;https://www.baidu.com", "aff_unique_abbr": "MSU;UESTC;NJIT;Snap;Baidu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Your representations are in the network: composable and parallel adaptation for large scale models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71283", "id": "Ydxnan4P2G", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5be3783ea9d43d7add5409c101d87d83-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ydxnan4P2G", "openreview": "https://openreview.net/forum?id=Ydxnan4P2G", "poster": "/media/PosterPDFs/NeurIPS%202023/71283.png?t=1699512463.5807114", "slides": "https://nips.cc/virtual/2023/poster/71283", "video": "https://nips.cc/virtual/2023/poster/71283", "author_site": "Yonatan Dukler, Alessandro Achille, Hao Yang, Varsha Vivek, Luca Zancato, Benjamin Bowman, Avinash Ravichandran, Charless Fowlkes, Ashwin Swaminathan, Stefano Soatto", "tldr": "", "abstract": "We present a framework for transfer learning that efficiently adapts a large base-model by learning lightweight cross-attention modules attached to its intermediate activations.\nWe name our approach InCA (Introspective-Cross-Attention) and show that it can efficiently survey a network\u2019s representations and identify strong performing adapter models for a downstream task.\nDuring training, InCA enables training numerous adapters efficiently and in parallel, isolated from the frozen base model. On the ViT-L/16 architecture, our experiments show that a single adapter, 1.3% of the full model, is able to reach full fine-tuning accuracy on average across 11 challenging downstream classification tasks.\nCompared with other forms of parameter-efficient adaptation, the isolated nature of the InCA adaptation is computationally desirable for large-scale models. For instance, we adapt ViT-G/14 (1.8B+ parameters) quickly with 20+ adapters in parallel on a single V100 GPU (76% GPU memory reduction) and exhaustively identify its most useful representations.\nWe further demonstrate how the adapters learned by InCA can be incrementally modified or combined for flexible learning scenarios and our approach achieves state of the art performance on the ImageNet-to-Sketch multi-task benchmark.", "keywords": "Efficient learning;Compute-efficient deep learning;Deep Learning Theory;class-incremental-learning;downstream adaptation", "primary_area": "", "supplementary_material": "/attachment/cf2288ed9eaec93af707fe63d08fae3df30a7d43.pdf", "author": "Yonatan Dukler;Alessandro Achille;Hao Yang;Varsha Vivek;Luca Zancato;Benjamin Bowman;Avinash Ravichandran;Charless Fowlkes;Ashwin Swaminathan;Stefano Soatto", "authorids": "~Yonatan_Dukler1;~Alessandro_Achille1;~Hao_Yang4;~Varsha_Vivek1;~Luca_Zancato1;~Benjamin_Bowman1;~Avinash_Ravichandran1;~Charless_Fowlkes1;~Ashwin_Swaminathan1;~Stefano_Soatto3", "gender": "M;M;M;F;M;;M;M;M;", "homepage": "https://dukleryoni.github.io/;;https://sites.google.com/site/lancelot365;;;https://www.benjamin-bowman.com/;;https://www.ics.uci.edu/~fowlkes;https://sites.google.com/site/ashwins/;https://www.cs.ucla.edu/~soatto", "dblp": "242/3844;190/7328;54/4089-33.html;;274/1481;248/2643;90/4314;90/4157;61/754;08/1262", "google_scholar": ";;https://scholar.google.com.sg/citations?user=Y-I1X9QAAAAJ;;Z2Mhh2UAAAAJ;zYZ_FNEAAAAJ;28p_eLYAAAAJ;yLQF4mkAAAAJ;3CEGNJQAAAAJ;lH1PdF8AAAAJ", "orcid": ";;;;;;;;;0000-0003-2902-6362", "linkedin": ";;;varsha-vivek-47722111b/;;benjamin-bowman314;;;ashwinsw;stefano-soatto-5765aa6/", "or_profile": "~Yonatan_Dukler1;~Alessandro_Achille1;~Hao_Yang4;~Varsha_Vivek1;~Luca_Zancato1;~Benjamin_Bowman1;~Avinash_Ravichandran1;~Charless_Fowlkes1;~Ashwin_Swaminathan1;~Stefano_Soatto2", "aff": "AWS AI Labs;California Institute of Technology;Amazon;Amazon;Amazon Web Services;University of California, Los Angeles;Cruise;University of California, Irvine;Amazon;UCLA Computer Science Department, University of California, Los Angeles", "aff_domain": "amazon.com;caltech.edu;amazon.com;amazon.com;amazon.it;ucla.edu;getcruise.com;uci.edu;amazon.com;cs.ucla.edu", "position": "Researcher;Postdoc;Principal Researcher;Researcher;Applied Scientist;PhD student;Principal Applied Scientist;Professor;Principal Researcher;Professor", "bibtex": "@inproceedings{\ndukler2023your,\ntitle={Your representations are in the network: composable and parallel adaptation for large scale models},\nauthor={Yonatan Dukler and Alessandro Achille and Hao Yang and Varsha Vivek and Luca Zancato and Benjamin Bowman and Avinash Ravichandran and Charless Fowlkes and Ashwin Swaminathan and Stefano Soatto},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ydxnan4P2G}\n}", "github": "", "project": "", "reviewers": "gDN4;d63D;KdhN;XAoM;DnPr;st4F", "pdf_size": 1370908, "rating": "4;5;5;6;6;6", "confidence": "5;4;4;4;4;4", "soundness": "2;3;3;3;3;3", "novelty": "2;3;3;2;3;3", "presentation": "2;4;3;2;3;4", "wc_summary": "85;81;60;167;75;60", "wc_strengths": "88;58;67;28;165;61", "wc_weaknesses": "397;121;115;245;144;118", "wc_questions": "2;90;39;45;63;48", "wc_limitations": "3;8;1;1;1;4", "wc_review": "575;358;282;486;448;291", "wc_reply_reviewers": "104;32;164;17;8;48", "wc_reply_authors": "0;18;1421;31;0;0", "reply_reviewers": "1;1;2;1;1;1", "reply_authors": "1;2;4;2;1;1", "rating_avg": [ 5.333333333333333, 0.7453559924999298 ], "confidence_avg": [ 4.166666666666667, 0.372677996249965 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 88.0, 36.59690332983562 ], "wc_strengths_avg": [ 77.83333333333333, 42.7762265230999 ], "wc_weaknesses_avg": [ 190.0, 102.92392012225989 ], "wc_questions_avg": [ 47.833333333333336, 26.44123463246164 ], "wc_limitations_avg": [ 3.0, 2.516611478423583 ], "wc_review_avg": [ 406.6666666666667, 106.16601255685467 ], "wc_reply_reviewers_avg": [ 62.166666666666664, 55.08301815340994 ], "wc_reply_authors_avg": [ 245.0, 526.0506946420024 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.3726779962499649 ], "reply_authors_avg": [ 1.8333333333333333, 1.0671873729054748 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.8, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14768776410868057830&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "amazon.com;caltech.edu;amazon.com;amazon.com;amazon.it;ucla.edu;getcruise.com;uci.edu;amazon.com;cs.ucla.edu", "author_num": 10, "aff_unique_index": "0;1;0;0;0;2;3;4;0;2", "aff_unique_norm": "Amazon;California Institute of Technology;University of California, Los Angeles;Cruise;University of California, Irvine", "aff_unique_dep": "AWS AI Labs;;;;", "aff_unique_url": "https://aws.amazon.com;https://www.caltech.edu;https://www.ucla.edu;https://www.cruise.com;https://www.uci.edu", "aff_unique_abbr": "AWS;Caltech;UCLA;Cruise;UCI", "aff_campus_unique_index": "1;2;3;2", "aff_campus_unique": ";Pasadena;Los Angeles;Irvine", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Bandit Social Learning under Myopic Behavior", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71282", "id": "YeP8osxOht", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/212b143b5a5d6b88feb0fb1441b9756e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YeP8osxOht", "openreview": "https://openreview.net/forum?id=YeP8osxOht", "poster": "/media/PosterPDFs/NeurIPS%202023/71282.png?t=1702329805.8629303", "slides": "https://nips.cc/virtual/2023/poster/71282", "video": "https://nips.cc/virtual/2023/poster/71282", "author_site": "Kiarash Banihashem, MohammadTaghi Hajiaghayi, Suho Shin, Aleksandrs Slivkins", "tldr": "", "abstract": "We study social learning dynamics motivated by reviews on online platforms. The\nagents collectively follow a simple multi-armed bandit protocol, but each agent\nacts myopically, without regards to exploration. We allow a wide range of myopic\nbehaviors that are consistent with (parameterized) confidence intervals for the arms\u2019\nexpected rewards. We derive stark exploration failures for any such behavior, and\nprovide matching positive results. As a special case, we obtain the first general\nresults on failure of the greedy algorithm in bandits, thus providing a theoretical\nfoundation for why bandit algorithms should explore.", "keywords": "multi-armed bandits;greedy algorithm;social learning;myopic behavior;learning failures;algorithmic game theory", "primary_area": "", "supplementary_material": "", "author": "Kiarash Banihashem;MohammadTaghi Hajiaghayi;Suho Shin;Aleksandrs Slivkins", "authorids": "~Kiarash_Banihashem1;~MohammadTaghi_Hajiaghayi1;~Suho_Shin1;~Aleksandrs_Slivkins1", "gender": "M;M;M;M", "homepage": ";http://www.cs.umd.edu/~hajiagha/;https://suhoshin.github.io/;https://www.microsoft.com/en-us/research/people/slivkins/", "dblp": "285/5061;334/4488;218/5505;91/4014", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=SQ1eGN4AAAAJ;-p5eVQsAAAAJ;f2x233wAAAAJ", "orcid": ";0000-0003-4842-0533;;", "linkedin": ";mohammad-hajiaghayi-2139a913a&ved=2ahUKEwjMyeH-5-_-AhV3K1kFHeeBDKwQjjh6BAgSEAE&usg=AOvVaw1NSVoT5FCGtOTi4eT8nr4b;;", "or_profile": "~Kiarash_Banihashem1;~MohammadTaghi_Hajiaghayi1;~Suho_Shin1;~Aleksandrs_Slivkins1", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;Microsoft", "aff_domain": "umd.edu;umd.edu;umd.edu;microsoft.com", "position": "PhD student;Full Professor;PhD student;Researcher", "bibtex": "@inproceedings{\nbanihashem2023bandit,\ntitle={Bandit Social Learning under Myopic Behavior},\nauthor={Kiarash Banihashem and MohammadTaghi Hajiaghayi and Suho Shin and Aleksandrs Slivkins},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YeP8osxOht}\n}", "github": "", "project": "", "reviewers": "ZFQ2;Nim8;XcKk;1URz;piEt", "pdf_size": 450673, "rating": "3;6;6;7;7", "confidence": "2;4;4;3;3", "soundness": "3;3;4;3;4", "novelty": "2;2;3;3;3", "presentation": "3;2;3;4;4", "wc_summary": "42;107;92;172;70", "wc_strengths": "24;70;82;82;49", "wc_weaknesses": "139;218;193;59;52", "wc_questions": "3;74;37;46;27", "wc_limitations": "1;21;1;4;1", "wc_review": "209;490;405;363;199", "wc_reply_reviewers": "0;0;20;9;21", "wc_reply_authors": "36;0;44;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "2;1;2;1;1", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 96.6, 43.59633012077966 ], "wc_strengths_avg": [ 61.4, 22.249494376277408 ], "wc_weaknesses_avg": [ 132.2, 67.66801312289286 ], "wc_questions_avg": [ 37.4, 23.260266550493352 ], "wc_limitations_avg": [ 5.6, 7.787168933572715 ], "wc_review_avg": [ 333.2, 113.19434614855992 ], "wc_reply_reviewers_avg": [ 10.0, 9.186947262284681 ], "wc_reply_authors_avg": [ 16.0, 19.75854245636555 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5819143739626463, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13971540282698326126&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "umd.edu;umd.edu;umd.edu;microsoft.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Maryland;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www/umd.edu;https://www.microsoft.com", "aff_unique_abbr": "UMD;Microsoft", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "SEVA: Leveraging sketches to evaluate alignment between human and machine visual abstraction", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73551", "id": "YfPKQycBDE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d43621ff2dfe39d298dcd4a41937c912-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=YfPKQycBDE", "openreview": "https://openreview.net/forum?id=YfPKQycBDE", "poster": "/media/PosterPDFs/NeurIPS%202023/73551.png?t=1702441354.4451544", "slides": "https://nips.cc/virtual/2023/poster/73551", "video": "https://nips.cc/virtual/2023/poster/73551", "author_site": "Kushin Mukherjee, Holly Huey, Xuanchen Lu, Yael Vinker, Rio Aguina-Kang, Ariel Shamir, Judith Fan", "tldr": "", "abstract": "Sketching is a powerful tool for creating abstract images that are sparse but meaningful. Sketch understanding poses fundamental challenges for general-purpose vision algorithms because it requires robustness to the sparsity of sketches relative to natural visual inputs and because it demands tolerance for semantic ambiguity, as sketches can reliably evoke multiple meanings. While current vision algorithms have achieved high performance on a variety of visual tasks, it remains unclear to what extent they understand sketches in a human-like way. Here we introduce $\\texttt{SEVA}$, a new benchmark dataset containing approximately 90K human-generated sketches of 128 object concepts produced under different time constraints, and thus systematically varying in sparsity. We evaluated a suite of state-of-the-art vision algorithms on their ability to correctly identify the target concept depicted in these sketches and to generate responses that are strongly aligned with human response patterns on the same sketch recognition task. We found that vision algorithms that better predicted human sketch recognition performance also better approximated human uncertainty about sketch meaning, but there remains a sizable gap between model and human response patterns. To explore the potential of models that emulate human visual abstraction in generative tasks, we conducted further evaluations of a recently developed sketch generation algorithm (Vinker et al., 2022) capable of generating sketches that vary in sparsity. We hope that public release of this dataset and evaluation protocol will catalyze progress towards algorithms with enhanced capacities for human-like visual abstraction.", "keywords": "sketch understanding;visual abstraction;vision;cognitive science", "primary_area": "", "supplementary_material": "/attachment/dc533a4e1e6923b974f9b2cdeb23da59e9571905.pdf", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nmukherjee2023seva,\ntitle={{SEVA}: Leveraging sketches to evaluate alignment between human and machine visual abstraction},\nauthor={Kushin Mukherjee and Holly Huey and Xuanchen Lu and Yael Vinker and Rio Aguina-Kang and Ariel Shamir and Judith E Fan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=YfPKQycBDE}\n}", "github": "", "project": "", "reviewers": "K3MZ;81P5;p23D;d6px;Jrh3", "pdf_size": 6059371, "rating": "6;7;7;7;7", "confidence": "5;4;4;4;4", "wc_summary_and_contributions": "170;33;77;116;44", "wc_strengths": "177;60;112;100;20", "wc_improvement": "224;6;291;24;26", "wc_limitations": "89;6;68;36;1", "wc_correctness": "10;7;46;32;1", "wc_clarity": "5;8;9;7;6", "wc_relation_to_prior_work": "41;11;39;59;2", "wc_documentation": "8;9;52;42;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "725;141;695;417;102", "wc_reply_reviewers": "291;32;102;23;0", "wc_reply_authors": "1419;927;1114;249;117", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;1;1", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 88.0, 50.17967716117751 ], "wc_strengths_avg": [ 93.8, 52.68927784663593 ], "wc_improvement_avg": [ 114.2, 119.11070480859392 ], "wc_limitations_avg": [ 40.0, 34.2870237845165 ], "wc_correctness_avg": [ 19.2, 17.010584939971935 ], "wc_clarity_avg": [ 7.0, 1.4142135623730951 ], "wc_relation_to_prior_work_avg": [ 30.4, 20.915066339842195 ], "wc_documentation_avg": [ 22.4, 20.51925924588897 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 416.0, 263.6452161523133 ], "wc_reply_reviewers_avg": [ 89.6, 106.29882407628035 ], "wc_reply_authors_avg": [ 765.2, 502.3785027247882 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15214499444157567781&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 10, "email": "", "author_num": 1 }, { "title": "Autodecoding Latent 3D Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71281", "id": "YhAZqWhOnS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d3b93537b521f15613524415dfe43f37-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YhAZqWhOnS", "openreview": "https://openreview.net/forum?id=YhAZqWhOnS", "poster": "/media/PosterPDFs/NeurIPS%202023/71281.png?t=1702320991.9449039", "slides": "https://nips.cc/virtual/2023/poster/71281", "video": "https://nips.cc/virtual/2023/poster/71281", "author_site": "Evangelos Ntavelis, Aliaksandr Siarohin, Kyle Olszewski, Chaoyang Wang, Luc V Gool, Sergey Tulyakov", "tldr": "", "abstract": "Diffusion-based methods have shown impressive visual results in the text-to-image domain. They first learn a latent space using an autoencoder, then run a denoising process on the bottleneck to generate new samples. However, learning an autoencoder requires substantial data in the target domain. Such data is scarce for 3D generation, prohibiting the learning of large-scale diffusion models for 3D synthesis. We present a novel approach to the generation of static and articulated 3D assets that has a 3D autodecoder at its core. The 3D autodecoder framework embeds properties learned from the target dataset in the latent space, which can then be decoded into a volumetric representation for rendering view-consistent appearance and geometry. We then identify the appropriate intermediate volumetric latent space, and introduce robust normalization and de-normalization operations to learn a 3D diffusion from 2D images or monocular videos of rigid or articulated objects. Our approach is flexible enough to use either existing camera supervision or no camera information at all -- instead efficiently learning it during training. Our evaluations demonstrate that our generation results outperform state-of-the-art alternatives on various benchmark datasets and metrics, including multi-view image datasets of synthetic objects, real in-the-wild videos of moving people, and a large-scale, real video dataset of static objects.", "keywords": "3D Generation;Diffusion Models", "primary_area": "", "supplementary_material": "/attachment/5ec06974d20d247b3a533f1e3bc74bc7325a31ad.zip", "author": "Evangelos Ntavelis;Aliaksandr Siarohin;Kyle Olszewski;Chaoyang Wang;Luc Van Gool;Sergey Tulyakov", "authorids": "~Evangelos_Ntavelis1;~Aliaksandr_Siarohin1;~Kyle_Olszewski1;~Chaoyang_Wang1;~Luc_Van_Gool1;~Sergey_Tulyakov1", "gender": "M;M;M;M;;M", "homepage": "http://entavelis.com;;https://kyleolsz.github.io/;https://mightychaos.github.io/;;http://www.stulyakov.com/", "dblp": "262/6311;199/1971;165/9717;;61/5017;40/6115", "google_scholar": "iS1R-6gAAAAJ;https://scholar.google.it/citations?user=uMl5-k4AAAAJ;FWDVqjgAAAAJ;I-xDKHEAAAAJ;https://scholar.google.be/citations?user=TwMib_QAAAAJ;mgzXR0sAAAAJ", "orcid": ";;0000-0001-8775-6879;;;", "linkedin": ";;kyle-olszewski-2623ab1b;;;sergeytulyakov/", "or_profile": "~Evangelos_Ntavelis1;~Aliaksandr_Siarohin1;~Kyle_Olszewski1;~Chaoyang_Wang1;~Luc_Van_Gool1;~Sergey_Tulyakov1", "aff": "Snap Inc.;Snap Inc.;Snap Inc.;School of Computer Science, Carnegie Mellon University;KU Leuven;", "aff_domain": "snapchat.com;snapchat.com;snap.com;cs.cmu.edu;kuleuven.be;", "position": "Intern;Intern;Researcher;PhD student;Emeritus;", "bibtex": "@inproceedings{\nntavelis2023autodecoding,\ntitle={Autodecoding Latent 3D Diffusion Models},\nauthor={Evangelos Ntavelis and Aliaksandr Siarohin and Kyle Olszewski and Chaoyang Wang and Luc Van Gool and Sergey Tulyakov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YhAZqWhOnS}\n}", "github": "", "project": "", "reviewers": "nmDk;2ZWE;xDwW;Nn3Q;iNSR", "pdf_size": 24668478, "rating": "5;5;6;6;7", "confidence": "4;3;4;4;4", "soundness": "3;3;4;2;3", "novelty": "3;2;3;2;3", "presentation": "3;2;3;2;3", "wc_summary": "102;54;107;100;64", "wc_strengths": "128;15;164;59;96", "wc_weaknesses": "216;178;293;427;115", "wc_questions": "57;205;50;15;1", "wc_limitations": "37;7;10;10;1", "wc_review": "540;459;624;611;277", "wc_reply_reviewers": "56;162;19;22;17", "wc_reply_authors": "0;117;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 85.4, 21.905250512148907 ], "wc_strengths_avg": [ 92.4, 51.986921432221784 ], "wc_weaknesses_avg": [ 245.8, 107.38230766751104 ], "wc_questions_avg": [ 65.6, 72.77801865948263 ], "wc_limitations_avg": [ 13.0, 12.441864811996632 ], "wc_review_avg": [ 502.2, 127.02976029261805 ], "wc_reply_reviewers_avg": [ 55.2, 55.278929077904536 ], "wc_reply_authors_avg": [ 23.4, 46.79999999999999 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5345224838248487, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=963930041442768462&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "snapchat.com;snapchat.com;snap.com;cs.cmu.edu;kuleuven.be;", "author_num": 6, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Snap Inc.;Carnegie Mellon University;Katholieke Universiteit Leuven", "aff_unique_dep": ";School of Computer Science;", "aff_unique_url": "https://www.snapinc.com;https://www.cmu.edu;https://www.kuleuven.be", "aff_unique_abbr": "Snap;CMU;KU Leuven", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;Belgium" }, { "title": "Anytime Model Selection in Linear Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71280", "id": "YiRX7nQ77Q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5e11d23b18261d1b76d14da7a285fd0c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YiRX7nQ77Q", "openreview": "https://openreview.net/forum?id=YiRX7nQ77Q", "poster": "/media/PosterPDFs/NeurIPS%202023/71280.png?t=1702048513.5412548", "slides": "https://nips.cc/virtual/2023/poster/71280", "video": "https://nips.cc/virtual/2023/poster/71280", "author_site": "Parnian Kassraie, Nicolas Emmenegger, Andreas Krause, Aldo Pacchiano", "tldr": "", "abstract": "Model selection in the context of bandit optimization is a challenging problem, as it requires balancing exploration and exploitation not only for action selection, but also for model selection. One natural approach is to rely on online learning algorithms that treat different models as experts. Existing methods, however, scale poorly ($\\mathrm{poly}M$) with the number of models $M$ in terms of their regret.\nOur key insight is that, for model selection in linear bandits, we can emulate full-information feedback to the online learner with a favorable bias-variance trade-off. This allows us to develop ALEXP, which has an exponentially improved ($\\log M$) dependence on $M$ for its regret.\nALEXP has anytime guarantees on its regret, and neither requires knowledge of the horizon $n$, nor relies on an initial purely exploratory stage.\nOur approach utilizes a novel time-uniform analysis of the Lasso, establishing a new connection between online learning and high-dimensional statistics.", "keywords": "bandits;model selection;online learning", "primary_area": "", "supplementary_material": "/attachment/26c5b7c583e2fb96e7326d8f01dde62394696143.pdf", "author": "Parnian Kassraie;Nicolas Emmenegger;Andreas Krause;Aldo Pacchiano", "authorids": "~Parnian_Kassraie1;~Nicolas_Emmenegger1;~Andreas_Krause1;~Aldo_Pacchiano1", "gender": "F;Not Specified;M;M", "homepage": "https://pkassraie.github.io;;https://las.inf.ethz.ch/krausea;https://www.aldopacchiano.ai", "dblp": "216/8534.html;;87/1831-1.html;129/6338", "google_scholar": "GFDOkb0AAAAJ;_Ngo54EAAAAJ;https://scholar.google.ch/citations?user=eDHv58AAAAAJ;no_BfYgAAAAJ", "orcid": ";;0000-0001-7260-9673;", "linkedin": "parnian-kassraie/;;krausea/;", "or_profile": "~Parnian_Kassraie1;~Nicolas_Emmenegger1;~Andreas_Krause1;~Aldo_Pacchiano1", "aff": "Swiss Federal Institute of Technology;Department of Computer Science, ETHZ - ETH Zurich;ETH Zurich;Microsoft", "aff_domain": "ethz.ch;inf.ethz.ch;ethz.ch;microsoft.com", "position": "PhD student;MS student;Full Professor;Postdoc", "bibtex": "@inproceedings{\nkassraie2023anytime,\ntitle={Anytime Model Selection in Linear Bandits},\nauthor={Parnian Kassraie and Nicolas Emmenegger and Andreas Krause and Aldo Pacchiano},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YiRX7nQ77Q}\n}", "github": "", "project": "", "reviewers": "XdEK;XArQ;XgJC;1XGh;Vj2P", "pdf_size": 1314971, "rating": "6;6;6;7;8", "confidence": "3;5;2;4;2", "soundness": "3;3;3;4;4", "novelty": "3;2;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "84;49;71;225;92", "wc_strengths": "61;20;19;115;27", "wc_weaknesses": "168;44;94;83;85", "wc_questions": "89;45;106;96;1", "wc_limitations": "55;6;12;1;5", "wc_review": "457;164;302;520;210", "wc_reply_reviewers": "24;0;264;12;48", "wc_reply_authors": "119;0;779;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;3;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 104.2, 62.12696676967257 ], "wc_strengths_avg": [ 48.4, 36.66933323637068 ], "wc_weaknesses_avg": [ 94.8, 40.4346386159194 ], "wc_questions_avg": [ 67.4, 39.205101708833766 ], "wc_limitations_avg": [ 15.8, 19.913814300630605 ], "wc_review_avg": [ 330.6, 137.81668984560613 ], "wc_reply_reviewers_avg": [ 69.6, 98.48776573767931 ], "wc_reply_authors_avg": [ 179.6, 303.223086192328 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.34299717028501764, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15881911482574759632&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "ethz.ch;inf.ethz.ch;ethz.ch;microsoft.com", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Swiss Federal Institute of Technology;ETH Zurich;Microsoft", "aff_unique_dep": ";Department of Computer Science;Microsoft Corporation", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch;https://www.microsoft.com", "aff_unique_abbr": "ETH Zurich;ETHZ;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Switzerland;United States" }, { "title": "Near-Optimal Bounds for Learning Gaussian Halfspaces with Random Classification Noise", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71279", "id": "YifKp5b15e", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/44c150733f9c5b6f98cb0caad0c664c7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YifKp5b15e", "openreview": "https://openreview.net/forum?id=YifKp5b15e", "poster": "/media/PosterPDFs/NeurIPS%202023/71279.png?t=1702179491.2122893", "slides": "https://nips.cc/virtual/2023/poster/71279", "video": "https://nips.cc/virtual/2023/poster/71279", "author_site": "Ilias Diakonikolas, Jelena Diakonikolas, Daniel Kane, Puqian Wang, Nikos Zarifis", "tldr": "", "abstract": "We study the problem of learning general (i.e., not necessarily homogeneous) \nhalfspaces with Random Classification Noise under the Gaussian distribution. \nWe establish nearly-matching algorithmic and Statistical Query (SQ) lower bound results \nrevealing a surprising information-computation gap for this basic problem. \nSpecifically, the sample complexity of this learning problem is \n$\\widetilde{\\Theta}(d/\\epsilon)$, where $d$ is the dimension and $\\epsilon$ is the excess error. \nOur positive result is a computationally efficient learning algorithm with sample complexity\n$\\tilde{O}(d/\\epsilon + d/\\max(p, \\epsilon))^2)$, where $p$ quantifies the bias of the target halfspace. \nOn the lower bound side, we show that any efficient SQ algorithm (or low-degree test)\nfor the problem requires sample complexity at least \n$\\Omega(d^{1/2}/(\\max(p, \\epsilon))^2)$. \nOur lower bound suggests that this quadratic dependence on $1/\\epsilon$ is inherent for efficient algorithms.", "keywords": "PAC Learning;Random Classification Noise", "primary_area": "", "supplementary_material": "", "author": "Ilias Diakonikolas;Jelena Diakonikolas;Daniel Kane;Puqian Wang;Nikos Zarifis", "authorids": "~Ilias_Diakonikolas1;~Jelena_Diakonikolas2;~Daniel_Kane1;~Puqian_Wang1;~Nikos_Zarifis1", "gender": "M;F;M;;", "homepage": "http://www.iliasdiakonikolas.org/;http://www.jelena-diakonikolas.com/;http://cseweb.ucsd.edu/~dakane/;;", "dblp": "d/IliasDiakonikolas;147/5178;52/6817;;", "google_scholar": "Vb3FLmkAAAAJ;J8ixfu8AAAAJ;https://scholar.google.com.tw/citations?user=DulpV-cAAAAJ;;", "orcid": ";0000-0003-3439-0310;;;", "linkedin": ";;;;", "or_profile": "~Ilias_Diakonikolas1;~Jelena_Diakonikolas2;~Daniel_Kane1;~Puqian_Wang1;~Nikos_Zarifis1", "aff": "University of Wisconsin, Madison;University of Wisconsin, Madison;University of California, San Diego;;", "aff_domain": "wisc.edu;wisc.edu;ucsd.edu;;", "position": "Associate Professor;Assistant Professor;Assistant Professor;;", "bibtex": "@inproceedings{\ndiakonikolas2023nearoptimal,\ntitle={Near-Optimal Bounds for Learning Gaussian Halfspaces with Random Classification Noise},\nauthor={Ilias Diakonikolas and Jelena Diakonikolas and Daniel Kane and Puqian Wang and Nikos Zarifis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YifKp5b15e}\n}", "github": "", "project": "", "reviewers": "QgYm;qpWY;m9mP;imug;2pgi;BT2g", "pdf_size": 564982, "rating": "5;6;6;7;7;7", "confidence": "2;2;3;3;2;3", "soundness": "3;3;3;4;4;4", "novelty": "3;2;3;3;3;4", "presentation": "2;3;3;3;4;3", "wc_summary": "186;119;122;249;58;323", "wc_strengths": "45;61;133;68;39;48", "wc_weaknesses": "71;21;56;46;3;64", "wc_questions": "41;17;78;130;16;122", "wc_limitations": "33;1;30;11;3;1", "wc_review": "376;219;419;504;119;558", "wc_reply_reviewers": "0;0;35;32;0;23", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "0;0;1;1;0;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.333333333333333, 0.7453559924999298 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 176.16666666666666, 88.62546034984655 ], "wc_strengths_avg": [ 65.66666666666667, 31.652628467299344 ], "wc_weaknesses_avg": [ 43.5, 24.102212899787162 ], "wc_questions_avg": [ 67.33333333333333, 46.35251401548308 ], "wc_limitations_avg": [ 13.166666666666666, 13.42158295019216 ], "wc_review_avg": [ 365.8333333333333, 153.53329353017287 ], "wc_reply_reviewers_avg": [ 15.0, 15.427248620541512 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4472135954999579, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15671746785503343057&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "wisc.edu;wisc.edu;ucsd.edu;;", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Wisconsin;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.ucsd.edu", "aff_unique_abbr": "UW;UCSD", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Madison;San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Evaluating Neuron Interpretation Methods of NLP Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71278", "id": "YiwMpyMdPX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eef6cb60fd59b32d35718e176b4b08d6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YiwMpyMdPX", "openreview": "https://openreview.net/forum?id=YiwMpyMdPX", "poster": "/media/PosterPDFs/NeurIPS%202023/71278.png?t=1701850252.5017595", "slides": "https://nips.cc/virtual/2023/poster/71278", "video": "https://nips.cc/virtual/2023/poster/71278", "author_site": "Yimin Fan, Fahim Dalvi, Nadir Durrani, Hassan Sajjad", "tldr": "", "abstract": "Neuron interpretation offers valuable insights into how knowledge is structured within a deep neural network model. While a number of neuron interpretation methods have been proposed in the literature, the field lacks a comprehensive comparison among these methods. This gap hampers progress due to the absence of standardized metrics and benchmarks. The commonly used evaluation metric has limitations, and creating ground truth annotations for neurons is impractical. Addressing these challenges, we propose an evaluation framework based on voting theory. Our hypothesis posits that neurons consistently identified by different methods carry more significant information. We rigorously assess our framework across a diverse array of neuron interpretation methods. Notable findings include: i) despite the theoretical differences among the methods, neuron ranking methods share over 60% of their rankings when identifying salient neurons, ii) the neuron interpretation methods are most sensitive to the last layer representations, iii) Probeless neuron ranking emerges as the most consistent method.", "keywords": "Neuron interpretation;NLP;Interpretability;Machine Learning", "primary_area": "", "supplementary_material": "/attachment/ed7c25bd18ec4d28673c70c6825af525c8f1affb.zip", "author": "Yimin Fan;Fahim Dalvi;Nadir Durrani;Hassan Sajjad", "authorids": "~Yimin_Fan1;~Fahim_Dalvi1;~Nadir_Durrani1;~Hassan_Sajjad1", "gender": "M;M;M;M", "homepage": "https://fanyimin-cuhk.github.io/;https://fdalvi.github.io;https://nadirdurrani.github.io/;https://hsajjad.github.io/", "dblp": ";194/2537;54/9012;73/5938", "google_scholar": "https://scholar.google.fr/citations?user=zxwUIG8AAAAJ;uQGCv10AAAAJ;https://scholar.google.co.uk/citations?user=K6uisFAAAAAJ;https://scholar.google.de/citations?user=t3BH6NkAAAAJ", "orcid": ";;0000-0002-9378-4128;", "linkedin": ";;nadir-durrani-04048744/;hassan-sajjad-154b043a/", "or_profile": "~Yimin_Fan1;~Fahim_Dalvi1;~Nadir_Durrani1;~Hassan_Sajjad1", "aff": "University of Science and Technology of China;Hamad Bin Khalifa University;Qatar Computing Research Institute;Dalhousie University", "aff_domain": "ustc.edu.cn;hbku.edu.qa;hbku.edu.qa;dal.ca", "position": "MS student;Researcher;Scientist;Associate Professor", "bibtex": "@inproceedings{\nfan2023evaluating,\ntitle={Evaluating Neuron Interpretation Methods of {NLP} Models},\nauthor={Yimin Fan and Fahim Dalvi and Nadir Durrani and Hassan Sajjad},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YiwMpyMdPX}\n}", "github": "", "project": "", "reviewers": "F8Vs;x9Fa;QcFR;z745", "pdf_size": 1961715, "rating": "4;4;5;7", "confidence": "4;3;3;4", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "4;3;2;4", "wc_summary": "107;66;136;250", "wc_strengths": "61;26;57;103", "wc_weaknesses": "49;60;124;353", "wc_questions": "199;88;107;140", "wc_limitations": "3;7;9;5", "wc_review": "419;247;433;851", "wc_reply_reviewers": "175;0;209;19", "wc_reply_authors": "531;27;324;0", "reply_reviewers": "1;0;2;1", "reply_authors": "3;2;2;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 139.75, 68.33877010892134 ], "wc_strengths_avg": [ 61.75, 27.39867697535777 ], "wc_weaknesses_avg": [ 146.5, 122.61423245284374 ], "wc_questions_avg": [ 133.5, 42.14558102577303 ], "wc_limitations_avg": [ 6.0, 2.23606797749979 ], "wc_review_avg": [ 487.5, 222.28079089296043 ], "wc_reply_reviewers_avg": [ 100.75, 92.28319186070668 ], "wc_reply_authors_avg": [ 220.5, 219.76407804734603 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5642668642482761483&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "ustc.edu.cn;hbku.edu.qa;hbku.edu.qa;dal.ca", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Science and Technology of China;Hamad Bin Khalifa University;Qatar Computing Research Institute;Dalhousie University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.hbku.edu.qa;https://www.qcri.org;https://www.dal.ca", "aff_unique_abbr": "USTC;HBKU;QCRI;Dal", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "China;Qatar;Canada" }, { "title": "Boosting Learning for LDPC Codes to Improve the Error-Floor Performance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71277", "id": "Yj3lFEyfnl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/463a91da3c832bd28912cd0d1b8d9974-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Yj3lFEyfnl", "openreview": "https://openreview.net/forum?id=Yj3lFEyfnl", "poster": "/media/PosterPDFs/NeurIPS%202023/71277.png?t=1701745022.1631274", "slides": "https://nips.cc/virtual/2023/poster/71277", "video": "https://nips.cc/virtual/2023/poster/71277", "author_site": "Hee-Youl Kwak, Dae-Young Yun, Yongjune Kim, Sang-Hyo Kim, Jong-Seon No", "tldr": "", "abstract": "Low-density parity-check (LDPC) codes have been successfully commercialized in communication systems due to their strong error correction capabilities and simple decoding process. However, the error-floor phenomenon of LDPC codes, in which the error rate stops decreasing rapidly at a certain level, presents challenges for achieving extremely low error rates and deploying LDPC codes in scenarios demanding ultra-high reliability. In this work, we propose training methods for neural min-sum (NMS) decoders to eliminate the error-floor effect. First, by leveraging the boosting learning technique of ensemble networks, we divide the decoding network into two neural decoders and train the post decoder to be specialized for uncorrected words that the first decoder fails to correct. Secondly, to address the vanishing gradient issue in training, we introduce a block-wise training schedule that locally trains a block of weights while retraining the preceding block. Lastly, we show that assigning different weights to unsatisfied check nodes effectively lowers the error-floor with a minimal number of weights. By applying these training methods to standard LDPC codes, we achieve the best error-floor performance compared to other decoding methods. The proposed NMS decoder, optimized solely through novel training methods without additional modules, can be integrated into existing LDPC decoders without incurring extra hardware costs. The source code is available at https://github.com/ghy1228/LDPC_Error_Floor.", "keywords": "Error-floor;Low-density parity-check codes;Boosting learning;Training shcedule;weight sharing;Neural decoders;Min-sum", "primary_area": "", "supplementary_material": "", "author": "Hee-Youl Kwak;Dae-Young Yun;Yongjune Kim;Sang-Hyo Kim;Jong-Seon No", "authorids": "~Hee-Youl_Kwak1;bigbowl204@snu.ac.kr;~Yongjune_Kim1;iamshkim@skku.edu;~Jong-Seon_No1", "gender": ";;;;M", "homepage": ";;https://iil.postech.ac.kr;;http://ccl.snu.ac.kr", "dblp": ";;124/3256;;", "google_scholar": "XlSZdbgAAAAJ;;WPKrXEoAAAAJ;;", "orcid": ";;0000-0003-0120-3750;;", "linkedin": ";;;;", "or_profile": "~Hee-Youl_Kwak1;bigbowl204@snu.ac.kr;~Yongjune_Kim1;iamshkim@skku.edu;~Jong-Seon_No1", "aff": "Ulsan University;;POSTECH;;Seoul National University", "aff_domain": "ulsan.ac.kr;;postech.ac.kr;;snu.ac.kr", "position": "Assistant Professor;;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\nkwak2023boosting,\ntitle={Boosting Learning for {LDPC} Codes to Improve the Error-Floor Performance},\nauthor={Hee-Youl Kwak and Dae-Young Yun and Yongjune Kim and Sang-Hyo Kim and Jong-Seon No},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Yj3lFEyfnl}\n}", "github": "", "project": "", "reviewers": "uaWt;jBEt;MFD6;UbgL;zXhE", "pdf_size": 2682150, "rating": "5;6;6;7;7", "confidence": "4;4;4;5;5", "soundness": "2;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "2;2;4;2;4", "wc_summary": "88;105;116;82;154", "wc_strengths": "59;41;106;12;222", "wc_weaknesses": "275;220;120;13;1", "wc_questions": "93;76;1;96;72", "wc_limitations": "14;4;1;23;1", "wc_review": "529;446;344;226;450", "wc_reply_reviewers": "0;204;13;0;9", "wc_reply_authors": "0;748;0;0;0", "reply_reviewers": "0;2;1;0;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 109.0, 25.534290669607408 ], "wc_strengths_avg": [ 88.0, 73.62879871354686 ], "wc_weaknesses_avg": [ 125.8, 109.0566825096014 ], "wc_questions_avg": [ 67.6, 34.575135574571505 ], "wc_limitations_avg": [ 8.6, 8.63944442658207 ], "wc_review_avg": [ 399.0, 104.56003060443317 ], "wc_reply_reviewers_avg": [ 45.2, 79.56230263133415 ], "wc_reply_authors_avg": [ 149.6, 299.2 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8728715609439693, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14884035005284978607&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ulsan.ac.kr;;postech.ac.kr;;snu.ac.kr", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Ulsan University;Pohang University of Science and Technology;Seoul National University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ulsan.ac.kr;https://www.postech.ac.kr;https://www.snu.ac.kr", "aff_unique_abbr": "Ulsan U;POSTECH;SNU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pohang", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "STEVE-1: A Generative Model for Text-to-Behavior in Minecraft", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71276", "id": "YkBDJWerKg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dd03f856fc7f2efeec8b1c796284561d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YkBDJWerKg", "openreview": "https://openreview.net/forum?id=YkBDJWerKg", "poster": "/media/PosterPDFs/NeurIPS%202023/71276.png?t=1700161823.9346993", "slides": "https://nips.cc/virtual/2023/poster/71276", "video": "https://nips.cc/virtual/2023/poster/71276", "author_site": "Shalev Lifshitz, Keiran Paster, Harris Chan, Jimmy Ba, Sheila McIlraith", "tldr": "", "abstract": "Constructing AI models that respond to text instructions is challenging, especially for sequential decision-making tasks. This work introduces a methodology, inspired by unCLIP, for instruction-tuning generative models of behavior without relying on a large dataset of instruction-labeled trajectories. Using this methodology, we create an instruction-tuned Video Pretraining (VPT) model called STEVE-1, which can follow short-horizon open-ended text and visual instructions in Minecraft. STEVE-1 is trained in two steps: adapting the pretrained VPT model to follow commands in MineCLIP's latent space, then training a prior to predict latent codes from text. This allows us to finetune VPT through self-supervised behavioral cloning and hindsight relabeling, reducing the need for costly human text annotations, and all for only $60 of compute. By leveraging pretrained models like VPT and MineCLIP and employing best practices from text-conditioned image generation, STEVE-1 sets a new bar for open-ended instruction following in Minecraft with low-level controls (mouse and keyboard) and raw pixel inputs, far outperforming previous baselines and robustly completing 12 of 13 tasks in our early-game evaluation suite. We provide experimental evidence highlighting key factors for downstream performance, including pretraining, classifier-free guidance, and data scaling. All resources, including our model weights, training scripts, and evaluation tools are made available for further research.", "keywords": "minecraft;instruction following;foundation models;sequence models;reinforcement learning;sequential decision making;goal conditioned reinforcement learning;text conditioned reinforcement learning;transformers;deep learning", "primary_area": "", "supplementary_material": "", "author": "Shalev Lifshitz;Keiran Paster;Harris Chan;Jimmy Ba;Sheila A. McIlraith", "authorids": "shalev.lifshitz@mail.utoronto.ca;~Keiran_Paster1;~Harris_Chan1;~Jimmy_Ba1;~Sheila_A._McIlraith1", "gender": ";M;M;M;F", "homepage": ";http://keirp.com;http://www.cs.toronto.edu/~hchan/;http://jimmylba.github.io;http://www.cs.toronto.edu/~sheila/", "dblp": ";;227/3248;https://dblp.org/pers/b/Ba:Jimmy.html;66/3221", "google_scholar": ";;0tLCTHYAAAAJ;https://scholar.google.ca/citations?user=ymzxRhAAAAAJ;https://scholar.google.com.tw/citations?user=ny2zuvMAAAAJ", "orcid": ";;;;0000-0003-4953-0945", "linkedin": ";;theharrischan/;;sheila-mcilraith-a76aa513/?originalSubdomain=ca", "or_profile": "shalev.lifshitz@mail.utoronto.ca;~Keiran_Paster1;~Harris_Chan1;~Jimmy_Ba1;~Sheila_A._McIlraith1", "aff": ";University of Toronto;University of Toronto;Department of Computer Science, University of Toronto;Department of Computer Science, University of Toronto", "aff_domain": ";toronto.edu;toronto.edu;cs.toronto.edu;cs.toronto.edu", "position": ";PhD student;PhD;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nlifshitz2023steve,\ntitle={{STEVE}-1: A Generative Model for Text-to-Behavior in Minecraft},\nauthor={Shalev Lifshitz and Keiran Paster and Harris Chan and Jimmy Ba and Sheila A. McIlraith},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YkBDJWerKg}\n}", "github": "", "project": "", "reviewers": "7QfK;vziy;Q7hP;3aRk", "pdf_size": 4345499, "rating": "5;6;7;8", "confidence": "5;4;3;4", "soundness": "3;3;4;4", "novelty": "2;3;4;4", "presentation": "3;3;4;4", "wc_summary": "63;37;328;139", "wc_strengths": "71;94;203;191", "wc_weaknesses": "446;92;178;222", "wc_questions": "6;27;183;2", "wc_limitations": "24;14;53;68", "wc_review": "610;264;945;622", "wc_reply_reviewers": "49;29;0;367", "wc_reply_authors": "0;0;0;1344", "reply_reviewers": "1;1;0;2", "reply_authors": "1;1;1;3", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 141.75, 113.87575466270246 ], "wc_strengths_avg": [ 139.75, 57.98006122797733 ], "wc_weaknesses_avg": [ 234.5, 130.75454103013018 ], "wc_questions_avg": [ 54.5, 74.79471906491794 ], "wc_limitations_avg": [ 39.75, 21.706853756359994 ], "wc_review_avg": [ 610.25, 240.87587571195252 ], "wc_reply_reviewers_avg": [ 111.25, 148.6814968313139 ], "wc_reply_authors_avg": [ 336.0, 581.9690713431428 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6324555320336758, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3576217577079734475&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";toronto.edu;toronto.edu;cs.toronto.edu;cs.toronto.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "GraphAdapter: Tuning Vision-Language Models With Dual Knowledge Graph", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71275", "id": "YmEDnMynuO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2b25c39788e5cf11d3541de433ebf4c0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YmEDnMynuO", "openreview": "https://openreview.net/forum?id=YmEDnMynuO", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71275", "video": "https://nips.cc/virtual/2023/poster/71275", "author_site": "Xin Li, Dongze Lian, Zhihe Lu, Jiawang Bai, Zhibo Chen, Xinchao Wang", "tldr": "", "abstract": "Adapter-style efficient transfer learning (ETL) has shown excellent performance in the tuning of vision-language models (VLMs) under the low-data regime, where only a few additional parameters are introduced to excavate the task-specific knowledge based on the general and powerful representation of VLMs. However, most adapter-style works face two limitations: (i) modeling task-specific knowledge with a single modality only; and (ii) overlooking the exploitation of the inter-class relationships in downstream tasks, thereby leading to sub-optimal solutions. To mitigate that, we propose an effective adapter-style tuning strategy, dubbed GraphAdapter, which performs the textual adapter by explicitly modeling the dual-modality structure knowledge (i.e., the correlation of different semantics/classes in textual and visual modalities) with a dual knowledge graph. In particular, the dual knowledge graph is established with two sub-graphs, i.e., a textual knowledge sub-graph, and a visual knowledge sub-graph, where the nodes and edges represent the semantics/classes and their correlations in two modalities, respectively. This enables the textual feature of each prompt to leverage the task-specific structure knowledge from both textual and visual modalities, yielding a more effective classifier for downstream tasks. Extensive experimental results on 11 benchmark datasets reveal that our GraphAdapter significantly outperforms the previous adapter-based methods.", "keywords": "Efficient transfer learning;vision-language model;adapter-style tuning", "primary_area": "", "supplementary_material": "/attachment/195d52025d0c479f9208309427bb217933e24285.pdf", "author": "Xin Li;Dongze Lian;Zhihe Lu;Jiawang Bai;Zhibo Chen;Xinchao Wang", "authorids": "~Xin_Li28;~Dongze_Lian1;~Zhihe_Lu1;~Jiawang_Bai2;~Zhibo_Chen1;~Xinchao_Wang1", "gender": "M;M;M;M;M;M", "homepage": "https://lixinustc.github.io;https://dongzelian.com/;https://zhihelu.github.io/;;https://faculty.ustc.edu.cn/chenzhibo;https://sites.google.com/site/sitexinchaowang/", "dblp": "09/1365-82;211/7697;195/9141.html;237/9675;54/6561.html;", "google_scholar": "sbiY97gAAAAJ;q-C8LqsAAAAJ;X4LKIhgAAAAJ;https://scholar.google.com.hk/citations?user=sRksETcAAAAJ;1ayDJfsAAAAJ;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": ";;0000-0002-6917-8654;;;", "linkedin": ";;;;;", "or_profile": "~Xin_Li28;~Dongze_Lian1;~Zhihe_Lu1;~Jiawang_Bai2;~Zhibo_Chen1;~Xinchao_WANG3", "aff": "University of Science and Technology of China;National University of Singapore;National University of Singapore;Tsinghua University;University of Science and Technology of China;National University of Singapore", "aff_domain": "ustc.edu.cn;nus.edu.sg;nus.edu;tsinghua.edu.cn;ustc.edu.cn;nus.edu", "position": "PhD student;Postdoc;Postdoc;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2023graphadapter,\ntitle={GraphAdapter: Tuning Vision-Language Models With Dual Knowledge Graph},\nauthor={Xin Li and Dongze Lian and Zhihe Lu and Jiawang Bai and Zhibo Chen and Xinchao Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YmEDnMynuO}\n}", "github": "", "project": "", "reviewers": "NmuM;pwW3;hFHN;hUWw", "pdf_size": 2897504, "rating": "5;5;7;8", "confidence": "4;5;4;5", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "3;3;4;4", "wc_summary": "39;90;90;157", "wc_strengths": "43;47;59;177", "wc_weaknesses": "146;120;34;137", "wc_questions": "15;81;54;23", "wc_limitations": "2;18;30;10", "wc_review": "245;356;267;504", "wc_reply_reviewers": "14;21;14;33", "wc_reply_authors": "24;67;28;52", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 94.0, 41.91061917939175 ], "wc_strengths_avg": [ 81.5, 55.45042831214201 ], "wc_weaknesses_avg": [ 109.25, 44.437456047798236 ], "wc_questions_avg": [ 43.25, 26.21426138574192 ], "wc_limitations_avg": [ 15.0, 10.344080432788601 ], "wc_review_avg": [ 343.0, 101.82092122938194 ], "wc_reply_reviewers_avg": [ 20.5, 7.762087348130012 ], "wc_reply_authors_avg": [ 42.75, 17.62632973707232 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5232599650203594875&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ustc.edu.cn;nus.edu.sg;nus.edu;tsinghua.edu.cn;ustc.edu.cn;nus.edu", "author_num": 6, "aff_unique_index": "0;1;1;2;0;1", "aff_unique_norm": "University of Science and Technology of China;National University of Singapore;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.nus.edu.sg;https://www.tsinghua.edu.cn", "aff_unique_abbr": "USTC;NUS;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "Diffusion-SS3D: Diffusion Model for Semi-supervised 3D Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71274", "id": "YoghyvSG0H", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/99786eed5e16920f908572fb00e151c3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YoghyvSG0H", "openreview": "https://openreview.net/forum?id=YoghyvSG0H", "poster": "/media/PosterPDFs/NeurIPS%202023/71274.png?t=1698762965.3882627", "slides": "https://nips.cc/virtual/2023/poster/71274", "video": "https://nips.cc/virtual/2023/poster/71274", "author_site": "Cheng-Ju Ho, Chen-Hsuan Tai, Yen-Yu Lin, Ming-Hsuan Yang, Yi-Hsuan Tsai", "tldr": "", "abstract": "Semi-supervised object detection is crucial for 3D scene understanding, efficiently addressing the limitation of acquiring large-scale 3D bounding box annotations. Existing methods typically employ a teacher-student framework with pseudo-labeling to leverage unlabeled point clouds. However, producing reliable pseudo-labels in a diverse 3D space still remains challenging. In this work, we propose Diffusion-SS3D, a new perspective of enhancing the quality of pseudo-labels via the diffusion model for semi-supervised 3D object detection. Specifically, we include noises to produce corrupted 3D object size and class label distributions, and then utilize the diffusion model as a denoising process to obtain bounding box outputs. Moreover, we integrate the diffusion model into the teacher-student framework, so that the denoised bounding boxes can be used to improve pseudo-label generation, as well as the entire semi-supervised learning process. We conduct experiments on the ScanNet and SUN RGB-D benchmark datasets to demonstrate that our approach achieves state-of-the-art performance against existing methods. We also present extensive analysis to understand how our diffusion model design affects performance in semi-supervised learning. The source code will be available at https://github.com/luluho1208/Diffusion-SS3D.", "keywords": "Semi-supervised learning;3D object detection;diffusion model", "primary_area": "", "supplementary_material": "/attachment/7633c3dfb3556d1af1616ba0c4f10ec1c8a4169c.pdf", "author": "Cheng-Ju Ho;Chen-Hsuan Tai;Yen-Yu Lin;Ming-Hsuan Yang;Yi-Hsuan Tsai", "authorids": "~Cheng-Ju_Ho1;~Chen-Hsuan_Tai1;~Yen-Yu_Lin1;~Ming-Hsuan_Yang1;~Yi-Hsuan_Tsai1", "gender": "M;M;M;M;M", "homepage": ";;https://sites.google.com/site/yylinweb/;https://faculty.ucmerced.edu/mhyang/;https://sites.google.com/site/yihsuantsai/home", "dblp": "336/2444;336/3240;44/4894;79/3711.html;142/2924", "google_scholar": ";;9RhFbpwAAAAJ;p9-ohHsAAAAJ;https://scholar.google.it/citations?user=zjI51wEAAAAJ", "orcid": ";;0000-0002-7183-6070;0000-0003-4848-2304;", "linkedin": "\u4f55-\u653f\u5112-0b671523b/;%E6%99%A8%E8%BB%92-%E6%88%B4-b87873198/;;minghsuanyang/;", "or_profile": "~Cheng-Ju_Ho1;~Chen-Hsuan_Tai1;~Yen-Yu_Lin1;~Ming-Hsuan_Yang1;~Yi-Hsuan_Tsai1", "aff": "National Chiao Tung University, National Chiao Tung University;National Chiao Tung University;National Yang Ming Chiao Tung University;University of California at Merced;Google", "aff_domain": "cs.nctu.edu.tw;nctu.edu.tw;nycu.edu.tw;umcerced.edu;google.com", "position": "MS student;MS student;Full Professor;Professor;Researcher", "bibtex": "@inproceedings{\nho2023diffusionssd,\ntitle={Diffusion-{SS}3D: Diffusion Model for Semi-supervised 3D Object Detection},\nauthor={Cheng-Ju Ho and Chen-Hsuan Tai and Yen-Yu Lin and Ming-Hsuan Yang and Yi-Hsuan Tsai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YoghyvSG0H}\n}", "github": "", "project": "", "reviewers": "powF;PTFH;5wDL;7AAY;TXhm", "pdf_size": 1049818, "rating": "5;5;6;6;7", "confidence": "5;4;4;4;4", "soundness": "2;2;3;3;4", "novelty": "2;2;3;3;3", "presentation": "3;2;3;3;3", "wc_summary": "98;75;120;60;35", "wc_strengths": "46;27;97;66;28", "wc_weaknesses": "183;82;348;75;92", "wc_questions": "45;85;187;8;4", "wc_limitations": "16;85;37;11;39", "wc_review": "388;354;789;220;198", "wc_reply_reviewers": "92;39;57;0;40", "wc_reply_authors": "63;80;64;40;57", "reply_reviewers": "1;2;2;0;1", "reply_authors": "3;3;3;2;3", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 77.6, 29.47948439169179 ], "wc_strengths_avg": [ 52.8, 26.286118009321957 ], "wc_weaknesses_avg": [ 156.0, 103.65905652667306 ], "wc_questions_avg": [ 65.8, 67.30646328548248 ], "wc_limitations_avg": [ 37.6, 26.165626306282064 ], "wc_review_avg": [ 389.8, 212.72743123537217 ], "wc_reply_reviewers_avg": [ 45.6, 29.789931184881915 ], "wc_reply_authors_avg": [ 60.8, 12.890306435457616 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.8, 0.39999999999999997 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5345224838248487, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11042052535519542555&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cs.nctu.edu.tw;nctu.edu.tw;nycu.edu.tw;umcerced.edu;google.com", "author_num": 5, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "National Chiao Tung University;National Yang Ming Chiao Tung University;University of California, Merced;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.nctu.edu.tw;https://www.nycu.edu.tw;https://www.ucmerced.edu;https://www.google.com", "aff_unique_abbr": "NCTU;NYCU;UC Merced;Google", "aff_campus_unique_index": "0;0;0;1;2", "aff_campus_unique": "Taiwan;Merced;Mountain View", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "China;United States" }, { "title": "Pareto Frontiers in Deep Feature Learning: Data, Compute, Width, and Luck", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71273", "id": "Ypbke6biDm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/960573a3b797441aec39caa9f74bc793-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Ypbke6biDm", "openreview": "https://openreview.net/forum?id=Ypbke6biDm", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71273", "video": "https://nips.cc/virtual/2023/poster/71273", "author_site": "Benjamin Edelman, Surbhi Goel, Sham Kakade, Eran Malach, Cyril Zhang", "tldr": "", "abstract": "In modern deep learning, algorithmic choices (such as width, depth, and learning rate) are known to modulate nuanced resource tradeoffs. This work investigates how these complexities necessarily arise for feature learning in the presence of computational-statistical gaps. We begin by considering offline sparse parity learning, a supervised classification problem which admits a statistical query lower bound for gradient-based training of a multilayer perceptron. This lower bound can be interpreted as a *multi-resource tradeoff frontier*: \nsuccessful learning can only occur if one is sufficiently rich (large model), knowledgeable (large dataset), patient (many training iterations), or lucky (many random guesses). We show, theoretically and experimentally, that sparse initialization and increasing network width yield significant improvements in sample efficiency in this setting. Here, width plays the role of parallel search: it amplifies the probability of finding \"lottery ticket\" neurons, which learn sparse features more sample-efficiently. Finally, we show that the synthetic sparse parity task can be useful as a proxy for real problems requiring axis-aligned feature learning. We demonstrate improved sample efficiency on tabular classification benchmarks by using wide, sparsely-initialized MLP models; these networks sometimes outperform tuned random forests.", "keywords": "deep learning;feature learning;parity;grokking;lottery tickets;scaling", "primary_area": "", "supplementary_material": "/attachment/dd89813d087b4e7ee20c1056885927ec64f550bb.pdf", "author": "Benjamin L. Edelman;Surbhi Goel;Sham M. Kakade;eran malach;Cyril Zhang", "authorids": "~Benjamin_L._Edelman1;~Surbhi_Goel1;~Sham_M._Kakade1;~eran_malach1;~Cyril_Zhang1", "gender": "F;M;M;;M", "homepage": "https://www.surbhigoel.com;https://shamulent.github.io;;https://cyrilzhang.com;https://www.benjaminedelman.com/", "dblp": "190/7815;s/SMKakade;202/2566;203/4448;241/9410", "google_scholar": "https://scholar.google.co.in/citations?user=Zqz4CQoAAAAJ;https://scholar.google.com.tw/citations?user=wb-DKCIAAAAJ;I15dUOwAAAAJ;sXtjq8IAAAAJ;mQSj2C0AAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Surbhi_Goel1;~Sham_M._Kakade1;~eran_malach1;~Cyril_Zhang1;~Benjamin_L_Edelman1", "aff": "University of Pennsylvania;Harvard University;Hebrew University of Jerusalem, Israel;Microsoft;Harvard University", "aff_domain": "upenn.edu;harvard.edu;huji.ac.il;microsoft.com;harvard.edu", "position": "Assistant Professor;Full Professor;PhD student;Senior Researcher;PhD student", "bibtex": "@inproceedings{\nedelman2023pareto,\ntitle={Pareto Frontiers in Deep Feature Learning: Data, Compute, Width, and Luck},\nauthor={Benjamin L. Edelman and Surbhi Goel and Sham M. Kakade and eran malach and Cyril Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Ypbke6biDm}\n}", "github": "", "project": "", "reviewers": "T64v;XFsj;9731;qvgd", "pdf_size": 658545, "rating": "5;5;7;8", "confidence": "1;4;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;4;3;4", "wc_summary": "111;159;79;167", "wc_strengths": "49;166;88;88", "wc_weaknesses": "122;361;145;53", "wc_questions": "115;9;95;28", "wc_limitations": "1;6;24;1", "wc_review": "398;701;431;337", "wc_reply_reviewers": "0;0;33;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 129.0, 35.94440151122286 ], "wc_strengths_avg": [ 97.75, 42.49926469952157 ], "wc_weaknesses_avg": [ 170.25, 115.21583007555863 ], "wc_questions_avg": [ 61.75, 44.33607447666065 ], "wc_limitations_avg": [ 8.0, 9.460443964212251 ], "wc_review_avg": [ 466.75, 139.38503327115146 ], "wc_reply_reviewers_avg": [ 8.25, 14.289419162443238 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16833625942149677918&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "upenn.edu;harvard.edu;huji.ac.il;microsoft.com;harvard.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "University of Pennsylvania;Harvard University;Hebrew University of Jerusalem;Microsoft", "aff_unique_dep": ";;;Microsoft Corporation", "aff_unique_url": "https://www.upenn.edu;https://www.harvard.edu;https://www.huji.ac.il;https://www.microsoft.com", "aff_unique_abbr": "UPenn;Harvard;HUJI;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;Israel" }, { "title": "Federated Learning with Client Subsampling, Data Heterogeneity, and Unbounded Smoothness: A New Algorithm and Lower Bounds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71272", "id": "Yq6GKgN3RC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/14ecbfb2216bab76195b60bfac7efb1f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Yq6GKgN3RC", "openreview": "https://openreview.net/forum?id=Yq6GKgN3RC", "poster": "/media/PosterPDFs/NeurIPS%202023/71272.png?t=1701923571.6929994", "slides": "https://nips.cc/virtual/2023/poster/71272", "video": "https://nips.cc/virtual/2023/poster/71272", "author_site": "Michael Crawshaw, Yajie Bao, Mingrui Liu", "tldr": "", "abstract": "We study the problem of Federated Learning (FL) under client subsampling and data heterogeneity with an objective function that has potentially unbounded smoothness. This problem is motivated by empirical evidence that the class of relaxed smooth functions, where the Lipschitz constant of the gradient scales linearly with the gradient norm, closely resembles the loss functions of certain neural networks such as recurrent neural networks (RNNs) with possibly exploding gradient. We introduce EPISODE++, the first algorithm to solve this problem. It maintains historical statistics for each client to construct control variates and decide clipping behavior for sampled clients in the current round. We prove that EPISODE++ achieves linear speedup in the number of participating clients, reduced communication rounds, and resilience to data heterogeneity. Our upper bound proof relies on novel techniques of recursively bounding the client updates under unbounded smoothness and client subsampling, together with a refined high probability analysis. In addition, we prove a lower bound showing that the convergence rate of a special case of clipped minibatch SGD (without randomness in the stochastic gradient and with randomness in client subsampling) suffers from an explicit dependence on the maximum gradient norm of the objective in a sublevel set, which may be large. This effectively demonstrates that applying gradient clipping to minibatch SGD in our setting does not eliminate the problem of exploding gradients. Our lower bound is based on new constructions of hard instances tailored to client subsampling and a novel analysis of the trajectory of the algorithm in the presence of clipping. Lastly, we provide an experimental evaluation of EPISODE++ when training RNNs on federated text classification tasks, demonstrating that EPISODE++ outperforms strong baselines in FL. The code is available at https://github.com/MingruiLiu-ML-Lab/episode_plusplus.", "keywords": "federated learning;client subsampling;nonconvex optimization;relaxed smoothness;data heterogeneity;lower bound", "primary_area": "", "supplementary_material": "/attachment/71b53dd7c81515ad311f04636780331c04c15b3d.zip", "author": "Michael Crawshaw;Yajie Bao;Mingrui Liu", "authorids": "~Michael_Crawshaw1;~Yajie_Bao2;~Mingrui_Liu2", "gender": "M;M;", "homepage": ";https://yajiebao.github.io/;https://mingrliu.github.io", "dblp": "274/7164;254/8290;", "google_scholar": "XVrMZ_4AAAAJ;1n_aUsIAAAAJ;KFoEnFQAAAAJ", "orcid": ";0000-0003-3843-7016;", "linkedin": ";;mingrui-liu-447a2aab/", "or_profile": "~Michael_Crawshaw1;~Yajie_Bao2;~Mingrui_Liu2", "aff": "George Mason University;Shanghai Jiaotong University;George Mason University", "aff_domain": "gmu.edu;sjtu.edu.cn;gmu.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncrawshaw2023federated,\ntitle={Federated Learning with Client Subsampling, Data Heterogeneity, and Unbounded Smoothness: A New Algorithm and Lower Bounds},\nauthor={Michael Crawshaw and Yajie Bao and Mingrui Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Yq6GKgN3RC}\n}", "github": "", "project": "", "reviewers": "svY2;sD3Q;PGRH;xXDR", "pdf_size": 695425, "rating": "6;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;2;3;4", "wc_summary": "28;56;211;62", "wc_strengths": "32;53;130;49", "wc_weaknesses": "35;44;41;61", "wc_questions": "99;2;56;2", "wc_limitations": "19;1;1;8", "wc_review": "213;156;439;182", "wc_reply_reviewers": "11;101;21;12", "wc_reply_authors": "0;470;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 89.25, 71.45409365459757 ], "wc_strengths_avg": [ 66.0, 37.78227097462512 ], "wc_weaknesses_avg": [ 45.25, 9.65336728815391 ], "wc_questions_avg": [ 39.75, 40.69628361410904 ], "wc_limitations_avg": [ 7.25, 7.361215932167728 ], "wc_review_avg": [ 247.5, 112.3888339649451 ], "wc_reply_reviewers_avg": [ 36.25, 37.585735326051555 ], "wc_reply_authors_avg": [ 117.5, 203.51596988934307 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12584316957770851427&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 6, "email": "gmu.edu;sjtu.edu.cn;gmu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "George Mason University;Shanghai Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gmu.edu;https://www.sjtu.edu.cn", "aff_unique_abbr": "GMU;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "title": "Uncovering Neural Scaling Laws in Molecular Representation Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73550", "id": "Ys8RmfF9w1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/052e22cfdd344c79634f7ec76fa03e22-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=Ys8RmfF9w1", "openreview": "https://openreview.net/forum?id=Ys8RmfF9w1", "poster": "/media/PosterPDFs/NeurIPS%202023/73550.png?t=1701572514.156212", "slides": "https://nips.cc/virtual/2023/poster/73550", "video": "https://nips.cc/virtual/2023/poster/73550", "author_site": "Dingshuo Chen, Yanqiao Zhu, Jieyu Zhang, Yuanqi Du, Zhixun Li, Qiang Liu, Shu Wu, Liang Wang", "tldr": "", "abstract": "Molecular Representation Learning (MRL) has emerged as a powerful tool for drug and materials discovery in a variety of tasks such as virtual screening and inverse design. While there has been a surge of interest in advancing model-centric techniques, the influence of both data quantity and quality on molecular representations is not yet clearly understood within this field. \nIn this paper, we delve into the neural scaling behaviors of MRL from a data-centric viewpoint, examining four key dimensions: (1) data modalities, (2) dataset splitting, (3) the role of pre-training, and (4) model capacity.\nOur empirical studies confirm a consistent power-law relationship between data volume and MRL performance across these dimensions. Additionally, through detailed analysis, we identify potential avenues for improving learning efficiency.\nTo challenge these scaling laws, we adapt seven popular data pruning strategies to molecular data and benchmark their performance. Our findings underline the importance of data-centric MRL and highlight possible directions for future research.", "keywords": "molecular representation learning;neural scaling law;data-centric AI", "primary_area": "", "supplementary_material": "", "author": "Dingshuo Chen;Yanqiao Zhu;Jieyu Zhang;Yuanqi Du;Zhixun Li;Qiang Liu;Shu Wu;Liang Wang", "authorids": "~Dingshuo_Chen1;~Yanqiao_Zhu1;~Jieyu_Zhang1;~Yuanqi_Du1;~Zhixun_Li1;~Qiang_Liu8;~Shu_Wu1;~Liang_Wang3", "gender": "M;M;M;M;M;M;M;M", "homepage": ";https://sxkdz.github.io;https://jieyuz2.github.io/;https://yuanqidu.github.io/;;https://john-qiangliu.tech/;http://www.shuwu.name;", "dblp": "289/7535;67/8383-1;;266/2837;;61/3234-6;06/3577;56/4499-1", "google_scholar": "jvrhEfIAAAAJ;NBbJT3AAAAAJ;T_INUHUAAAAJ;fAc_zZMAAAAJ;;https://scholar.google.co.jp/citations?user=D-lKLcMAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";0000-0003-2205-5304;0000-0002-1846-2436;;0000-0001-6750-9002;0000-0002-9233-3827;0000-0003-2164-3577;", "linkedin": ";;jieyu-zhang-3baaa8154/;;;;;", "or_profile": "~Dingshuo_Chen1;~Yanqiao_Zhu1;~Jieyu_Zhang1;~Yuanqi_Du1;~Zhixun_Li1;~Qiang_Liu8;~Shu_Wu1;~Liang_Wang3", "aff": "Institute of automation, Chinese Academy of Sciences;University of California, Los Angeles;University of Washington;Cornell University;The Chinese University of Hong Kong;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation\uff0c CAS\uff0cChina", "aff_domain": "ia.ac.cn;ucla.edu;cs.washington.edu;cornell.edu;se.cuhk.edu.hk;nlpr.ia.ac.cn;ia.ac.cn;ia.ac.cn", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023uncovering,\ntitle={Uncovering Neural Scaling Laws in Molecular Representation Learning},\nauthor={Dingshuo Chen and Yanqiao Zhu and Jieyu Zhang and Yuanqi Du and Zhixun Li and Qiang Liu and Shu Wu and Liang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=Ys8RmfF9w1}\n}", "github": "", "project": "", "reviewers": "weCW;5fTN;rEJP;pZ6H;jova", "pdf_size": 1099232, "rating": "6;6;6;6;7", "confidence": "3;4;3;2;3", "wc_summary_and_contributions": "44;89;131;51;92", "wc_strengths": "54;45;75;35;160", "wc_improvement": "222;259;138;37;88", "wc_limitations": "7;21;53;25;1", "wc_correctness": "11;8;21;13;42", "wc_clarity": "98;10;41;1;26", "wc_relation_to_prior_work": "26;1;48;1;16", "wc_documentation": "30;5;33;23;38", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "493;439;541;187;464", "wc_reply_reviewers": "290;72;564;10;28", "wc_reply_authors": "1222;1446;1701;335;515", "reply_reviewers": "1;1;3;1;1", "reply_authors": "4;4;6;2;2", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 81.4, 31.474434069574627 ], "wc_strengths_avg": [ 73.8, 45.07504853020127 ], "wc_improvement_avg": [ 148.8, 82.23721785177317 ], "wc_limitations_avg": [ 21.4, 18.084247288731703 ], "wc_correctness_avg": [ 19.0, 12.280065146407 ], "wc_clarity_avg": [ 35.2, 34.254342790367474 ], "wc_relation_to_prior_work_avg": [ 18.4, 17.579533554676587 ], "wc_documentation_avg": [ 25.8, 11.478675881825394 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 424.8, 123.64529914234508 ], "wc_reply_reviewers_avg": [ 192.8, 210.90509714087045 ], "wc_reply_authors_avg": [ 1043.8, 530.557970442439 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 3.6, 1.4966629547095767 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3783659791998582683&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ia.ac.cn;ucla.edu;cs.washington.edu;cornell.edu;se.cuhk.edu.hk;nlpr.ia.ac.cn;ia.ac.cn;ia.ac.cn", "author_num": 8, "aff_unique_index": "0;1;2;3;4;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences;University of California, Los Angeles;University of Washington;Cornell University;Chinese University of Hong Kong", "aff_unique_dep": "Institute of Automation;;;;", "aff_unique_url": "http://www.ia.cas.cn;https://www.ucla.edu;https://www.washington.edu;https://www.cornell.edu;https://www.cuhk.edu.hk", "aff_unique_abbr": "CAS;UCLA;UW;Cornell;CUHK", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Los Angeles;Hong Kong SAR", "aff_country_unique_index": "0;1;1;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Deep Fractional Fourier Transform", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71271", "id": "YsYKv95jy9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e66309ead63bc1410d2df261a28f602d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YsYKv95jy9", "openreview": "https://openreview.net/forum?id=YsYKv95jy9", "poster": "/media/PosterPDFs/NeurIPS%202023/71271.png?t=1699698130.168225", "slides": "https://nips.cc/virtual/2023/poster/71271", "video": "https://nips.cc/virtual/2023/poster/71271", "author_site": "Hu Yu, Jie Huang, Lingzhi LI, man zhou, Feng Zhao", "tldr": "", "abstract": "Existing deep learning-based computer vision methods usually operate in the spatial and frequency domains, which are two orthogonal \\textbf{individual} perspectives for image processing.\nIn this paper, we introduce a new spatial-frequency analysis tool, Fractional Fourier Transform (FRFT), to provide comprehensive \\textbf{unified} spatial-frequency perspectives.\nThe FRFT is a unified continuous spatial-frequency transform that simultaneously reflects an image's spatial and frequency representations, making it optimal for processing non-stationary image signals.\nWe explore the properties of the FRFT for image processing and present a fast implementation of the 2D FRFT, which facilitates its widespread use.\nBased on these explorations, we introduce a simple yet effective operator, Multi-order FRactional Fourier Convolution (MFRFC), which exhibits the remarkable merits of processing images from more perspectives in the spatial-frequency plane. Our proposed MFRFC is a general and basic operator that can be easily integrated into various tasks for performance improvement.\nWe experimentally evaluate the MFRFC on various computer vision tasks, including object detection, image classification, guided super-resolution, denoising, dehazing, deraining, and low-light enhancement. Our proposed MFRFC consistently outperforms baseline methods by significant margins across all tasks.", "keywords": "Fractional Fourier Transform;image restoration", "primary_area": "", "supplementary_material": "/attachment/a7bab4649dfe90167f6d0822953286a27ffd4331.pdf", "author": "Hu Yu;Jie Huang;Lingzhi Li;Man Zhou;Feng Zhao", "authorids": "~Hu_Yu2;~Jie_Huang4;~Lingzhi_Li2;~Man_Zhou4;~Feng_Zhao6", "gender": "M;M;M;M;M", "homepage": "https://yuhuustc.github.io/;;https://lingzhili.com;https://bivlab123.github.io/;", "dblp": ";;78/8379-2;181/2734-4;165/8237", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;fNSGgPcAAAAJ;https://scholar.google.co.uk/citations?hl=en;", "orcid": "0000-0003-0598-8989;0000-0002-3518-3404;0000-0002-0552-9566;0000-0001-6767-8105;0000-0003-2872-605X", "linkedin": ";;;;", "or_profile": "~Hu_Yu2;~Jie_Huang4;~Lingzhi_Li2;~Feng_Zhao6;~man_zhou1", "aff": "University of Science and Technology of China;University of Science and Technology of China;Alibaba Group;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;alibaba-inc.com;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;PhD student;Researcher;Full Professor;Postdoc", "bibtex": "@inproceedings{\nyu2023deep,\ntitle={Deep Fractional Fourier Transform},\nauthor={Hu Yu and Jie Huang and Lingzhi Li and Man Zhou and Feng Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YsYKv95jy9}\n}", "github": "", "project": "", "reviewers": "TTF6;6jPA;HR4i;c5M1;Cjk7", "pdf_size": 1668748, "rating": "5;7;8;8;8", "confidence": "5;5;5;5;4", "soundness": "3;2;4;4;4", "novelty": "3;2;3;4;4", "presentation": "2;2;3;3;3", "wc_summary": "39;14;52;64;74", "wc_strengths": "37;6;130;75;128", "wc_weaknesses": "113;38;142;128;217", "wc_questions": "2;26;62;47;23", "wc_limitations": "2;1;14;65;17", "wc_review": "193;85;400;379;459", "wc_reply_reviewers": "45;10;34;48;18", "wc_reply_authors": "0;14;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 7.2, 1.16619037896906 ], "confidence_avg": [ 4.8, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.8 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 48.6, 20.895932618574363 ], "wc_strengths_avg": [ 75.2, 49.068931107168005 ], "wc_weaknesses_avg": [ 127.6, 57.34317744945775 ], "wc_questions_avg": [ 32.0, 20.69782597279241 ], "wc_limitations_avg": [ 19.8, 23.472537144501445 ], "wc_review_avg": [ 303.2, 140.81534007344513 ], "wc_reply_reviewers_avg": [ 31.0, 14.859340496805368 ], "wc_reply_authors_avg": [ 2.8, 5.6 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.34299717028501764, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10822627102748042760&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ustc.edu.cn;ustc.edu.cn;alibaba-inc.com;ustc.edu.cn;ustc.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Science and Technology of China;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "USTC;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Diversifying Spatial-Temporal Perception for Video Domain Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71270", "id": "YsZTDcIQwQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aef34c770664d06eabdfebc5d3d58a9c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YsZTDcIQwQ", "openreview": "https://openreview.net/forum?id=YsZTDcIQwQ", "poster": "/media/PosterPDFs/NeurIPS%202023/71270.png?t=1701434556.866022", "slides": "https://nips.cc/virtual/2023/poster/71270", "video": "https://nips.cc/virtual/2023/poster/71270", "author_site": "Kun-Yu Lin, Jia-Run Du, Yipeng Gao, Jiaming Zhou, Wei-Shi Zheng", "tldr": "", "abstract": "Video domain generalization aims to learn generalizable video classification models for unseen target domains by training in a source domain.\nA critical challenge of video domain generalization is to defend against the heavy reliance on domain-specific cues extracted from the source domain when recognizing target videos. To this end, we propose to perceive diverse spatial-temporal cues in videos, aiming to discover potential domain-invariant cues in addition to domain-specific cues. We contribute a novel model named Spatial-Temporal Diversification Network (STDN), which improves the diversity from both space and time dimensions of video data. First, our STDN proposes to discover various types of spatial cues within individual frames by spatial grouping. Then, our STDN proposes to explicitly model spatial-temporal dependencies between video contents at multiple space-time scales by spatial-temporal relation modeling. Extensive experiments on three benchmarks of different types demonstrate the effectiveness and versatility of our approach.", "keywords": "video understanding and analysis;video domain generalization", "primary_area": "", "supplementary_material": "/attachment/3dd78cd2c7dec06ca153e0ce234df174de371e34.pdf", "author": "Kun-Yu Lin;Jia-Run Du;Yipeng Gao;Jiaming Zhou;Wei-Shi Zheng", "authorids": "~Kun-Yu_Lin1;~Jia-Run_Du1;~Yipeng_Gao1;~Jiaming_Zhou1;~Wei-Shi_Zheng3", "gender": ";M;M;M;M", "homepage": ";https://github.com/Run542968;https://hlings.github.io/;https://jiaming-zhou.github.io/;http://www.isee-ai.cn/~zhwshi", "dblp": ";322/9114;146/8907;;30/8399", "google_scholar": ";;https://scholar.google.com.hk/citations?hl=zh-CN;b3y40w8AAAAJ;AwqDDGoAAAAJ", "orcid": ";0009-0007-4678-3716;;;", "linkedin": ";;;;", "or_profile": "~Kun-Yu_Lin1;~Jia-Run_Du1;~Yipeng_Gao1;~Jiaming_Zhou1;~Wei-Shi_Zheng3", "aff": ";SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY", "aff_domain": ";sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;sysu.edu.cn", "position": ";MS student;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nlin2023diversifying,\ntitle={Diversifying Spatial-Temporal Perception for Video Domain Generalization},\nauthor={Kun-Yu Lin and Jia-Run Du and Yipeng Gao and Jiaming Zhou and Wei-Shi Zheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YsZTDcIQwQ}\n}", "github": "", "project": "", "reviewers": "SFFk;JaYX;bwaF;YkHU;m6Ue", "pdf_size": 751006, "rating": "5;5;5;6;6", "confidence": "4;4;4;4;4", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "56;79;53;57;188", "wc_strengths": "32;43;31;22;90", "wc_weaknesses": "115;73;399;94;197", "wc_questions": "5;97;5;1;120", "wc_limitations": "5;13;1;1;28", "wc_review": "213;305;489;175;623", "wc_reply_reviewers": "17;55;19;51;82", "wc_reply_authors": "13;721;13;597;161", "reply_reviewers": "1;1;1;2;1", "reply_authors": "2;2;2;3;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.6, 51.53872330587944 ], "wc_strengths_avg": [ 43.6, 24.137936945812086 ], "wc_weaknesses_avg": [ 175.6, 119.3509111821104 ], "wc_questions_avg": [ 45.6, 51.890654264520506 ], "wc_limitations_avg": [ 9.6, 10.190191362285598 ], "wc_review_avg": [ 361.0, 170.0964432314797 ], "wc_reply_reviewers_avg": [ 44.8, 24.350770008359078 ], "wc_reply_authors_avg": [ 301.0, 299.834621083023 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17777305527508438379&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;sysu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Sun Yat-sen University", "aff_unique_dep": "", "aff_unique_url": "http://www.sysu.edu.cn", "aff_unique_abbr": "SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Online Map Vectorization for Autonomous Driving: A Rasterization Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71269", "id": "YvO5yTVv5Y", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/654f61ecd998c9095d30d42c03b832aa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YvO5yTVv5Y", "openreview": "https://openreview.net/forum?id=YvO5yTVv5Y", "poster": "/media/PosterPDFs/NeurIPS%202023/71269.png?t=1700726751.2532554", "slides": "https://nips.cc/virtual/2023/poster/71269", "video": "https://nips.cc/virtual/2023/poster/71269", "author_site": "Gongjie Zhang, Jiahao Lin, Shuang Wu, yilin song, Zhipeng Luo, Yang Xue, Shijian Lu, Zuoguan Wang", "tldr": "", "abstract": "High-definition (HD) vectorized map is essential for autonomous driving, providing detailed and precise environmental information for advanced perception and planning. However, current map vectorization methods often exhibit deviations, and the existing evaluation metric for map vectorization lacks sufficient sensitivity to detect these deviations. To address these limitations, we propose integrating the philosophy of rasterization into map vectorization. Specifically, we introduce a new rasterization-based evaluation metric, which has superior sensitivity and is better suited to real-world autonomous driving scenarios. Furthermore, we propose MapVR (Map Vectorization via Rasterization), a novel framework that applies differentiable rasterization to vectorized outputs and then performs precise and geometry-aware supervision on rasterized HD maps. Notably, MapVR designs tailored rasterization strategies for various geometric shapes, enabling effective adaptation to a wide range of map elements. Experiments show that incorporating rasterization into map vectorization greatly enhances performance with no extra computational cost during inference, leading to more accurate map perception and ultimately promoting safer autonomous driving. Codes are available at https://github.com/ZhangGongjie/MapVR. A standalone map vectorization evaluation toolkit is available at https://github.com/jiahaoLjh/MapVectorizationEvalToolkit.", "keywords": "Online HD Map Construction;Map Vectorization;Autonomous Driving;Evaluation Metric;Rasterization;Differentiable Rasterization;Bird's-Eye-View Perception", "primary_area": "", "supplementary_material": "/attachment/ddaceb38d416145b47757b40ceb5b476fc078742.pdf", "author": "Gongjie Zhang;Jiahao Lin;Shuang Wu;Yilin Song;Zhipeng Luo;Yang Xue;Shijian Lu;Zuoguan Wang", "authorids": "~Gongjie_Zhang1;~Jiahao_Lin1;~Shuang_Wu6;~Yilin_Song1;~Zhipeng_Luo2;~Yang_Xue5;~Shijian_Lu1;~Zuoguan_Wang2", "gender": "M;;M;M;M;M;M;M", "homepage": "https://zhanggongjie.github.io/;https://www.comp.nus.edu.sg/~jiahao/;;;;https://github.com/AustinXY;https://personal.ntu.edu.sg/shijian.lu/;https://www.linkedin.com/in/zuoguan-zuph-wang-b6131a25/", "dblp": "20/10243;182/5821;85/3231-2;;120/7554;25/6299;42/2718;", "google_scholar": "sRBTPp4AAAAJ;;https://scholar.google.com.sg/citations?user=_IlTlTsAAAAJ;;mw-qVgcAAAAJ;;https://scholar.google.com.sg/scholar?hl=en;", "orcid": "0000-0003-0506-8357;;0000-0002-7551-7712;;;;;", "linkedin": "gongjie-z-25538216a/;;bst-wushuang/;yilin-song-a3b969274/;zhipeng-luo-2bb8343b;;;", "or_profile": "~Gongjie_Zhang1;~Jiahao_Lin1;~Shuang_Wu6;~Yilin_Song1;~Zhipeng_Luo2;~Yang_Xue5;~Shijian_Lu1;~Zuoguan_Wang2", "aff": "Black Sesame Technologies;;Black Sesame Technologies;;Sensetime Research;;Nanyang Technological University;Black Sesame Technologies", "aff_domain": "bst.ai;;bst.ai;;sensetime.com;;ntu.edu.sg;bst.ai", "position": "Researcher;;Researcher;;Algorithm Researcher;;Associate Professor;Researcher", "bibtex": "@inproceedings{\nzhang2023online,\ntitle={Online Map Vectorization for Autonomous Driving: A Rasterization Perspective},\nauthor={Gongjie Zhang and Jiahao Lin and Shuang Wu and Yilin Song and Zhipeng Luo and Yang Xue and Shijian Lu and Zuoguan Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YvO5yTVv5Y}\n}", "github": "", "project": "", "reviewers": "pGkQ;yTjh;ddVN;U7tP;YAhf", "pdf_size": 6552972, "rating": "4;4;5;7;8", "confidence": "5;4;2;4;4", "soundness": "3;3;3;4;4", "novelty": "2;2;2;3;4", "presentation": "2;3;3;3;4", "wc_summary": "98;1;58;84;90", "wc_strengths": "48;1;58;70;282", "wc_weaknesses": "140;1;45;47;126", "wc_questions": "82;1;21;125;5", "wc_limitations": "5;1;1;29;11", "wc_review": "373;5;183;355;514", "wc_reply_reviewers": "61;0;15;31;142", "wc_reply_authors": "43;0;36;31;195", "reply_reviewers": "1;0;1;1;2", "reply_authors": "2;0;2;2;3", "rating_avg": [ 5.6, 1.624807680927192 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 66.2, 35.24996453898925 ], "wc_strengths_avg": [ 91.8, 97.93548897105686 ], "wc_weaknesses_avg": [ 71.8, 52.79166600894501 ], "wc_questions_avg": [ 46.8, 48.713037269297836 ], "wc_limitations_avg": [ 9.4, 10.461357464497617 ], "wc_review_avg": [ 286.0, 175.4331781619429 ], "wc_reply_reviewers_avg": [ 49.8, 50.340440999260224 ], "wc_reply_authors_avg": [ 61.0, 68.59446041773344 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.050251890762960646, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6000777717617256917&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "bst.ai;;bst.ai;;sensetime.com;;ntu.edu.sg;bst.ai", "author_num": 8, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Black Sesame Technologies;SenseTime;Nanyang Technological University", "aff_unique_dep": ";Research;", "aff_unique_url": ";https://www.sensetime.com/;https://www.ntu.edu.sg", "aff_unique_abbr": ";SenseTime;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;2", "aff_country_unique": ";China;Singapore" }, { "title": "Integration-free Training for Spatio-temporal Multimodal Covariate Deep Kernel Point Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71268", "id": "Yvpenkym8A", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4eb2c0adafbe71269f3a772c130f9e53-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Yvpenkym8A", "openreview": "https://openreview.net/forum?id=Yvpenkym8A", "poster": "/media/PosterPDFs/NeurIPS%202023/71268.png?t=1699610296.382097", "slides": "https://nips.cc/virtual/2023/poster/71268", "video": "https://nips.cc/virtual/2023/poster/71268", "author_site": "YIXUAN ZHANG, Quyu Kong, Feng Zhou", "tldr": "", "abstract": "In this study, we propose a novel deep spatio-temporal point process model, Deep Kernel Mixture Point Processes (DKMPP), that incorporates multimodal covariate information. DKMPP is an enhanced version of Deep Mixture Point Processes (DMPP), which uses a more flexible deep kernel to model complex relationships between events and covariate data, improving the model's expressiveness. To address the intractable training procedure of DKMPP due to the non-integrable deep kernel, we utilize an integration-free method based on score matching, and further improve efficiency by adopting a scalable denoising score matching method. Our experiments demonstrate that DKMPP and its corresponding score-based estimators outperform baseline models, showcasing the advantages of incorporating covariate information, utilizing a deep kernel, and employing score-based estimators.", "keywords": "Spatio-temporal Point Processes;Deep Kernel;Covariate;Integration-free", "primary_area": "", "supplementary_material": "/attachment/28e8e288e6eafc00c58ee5034c5f02ced062b51b.pdf", "author": "YIXUAN ZHANG;Quyu Kong;Feng Zhou", "authorids": "~YIXUAN_ZHANG1;~Quyu_Kong1;~Feng_Zhou9", "gender": ";M;", "homepage": ";https://qykong.github.io;", "dblp": "57/1240-6;209/9882;", "google_scholar": "oHaa8jsAAAAJ;0EXa6lkAAAAJ;", "orcid": "0009-0005-0094-7143;;", "linkedin": ";;", "or_profile": "~YIXUAN_ZHANG1;~Quyu_Kong1;~Feng_Zhou9", "aff": "University of Technology Sydney;Alibaba Group;", "aff_domain": "uts.edu.au;alibaba-inc.com;", "position": "PhD student;Researcher;", "bibtex": "@inproceedings{\nzhang2023integrationfree,\ntitle={Integration-free Training for Spatio-temporal Multimodal Covariate Deep Kernel Point Processes},\nauthor={YIXUAN ZHANG and Quyu Kong and Feng Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Yvpenkym8A}\n}", "github": "", "project": "", "reviewers": "6cnn;FYsq;NRcd", "pdf_size": 540265, "rating": "5;6;7", "confidence": "3;3;4", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "3;3;4", "wc_summary": "119;38;88", "wc_strengths": "71;53;91", "wc_weaknesses": "82;148;176", "wc_questions": "98;42;62", "wc_limitations": "15;45;17", "wc_review": "385;326;434", "wc_reply_reviewers": "49;39;22", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 81.66666666666667, 33.36997985548621 ], "wc_strengths_avg": [ 71.66666666666667, 15.520595635763755 ], "wc_weaknesses_avg": [ 135.33333333333334, 39.40671121635106 ], "wc_questions_avg": [ 67.33333333333333, 23.170862929310353 ], "wc_limitations_avg": [ 25.666666666666668, 13.695092389449425 ], "wc_review_avg": [ 381.6666666666667, 44.15377170248942 ], "wc_reply_reviewers_avg": [ 36.666666666666664, 11.145502331533658 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18239154169954539841&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "uts.edu.au;alibaba-inc.com;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Technology Sydney;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.uts.edu.au;https://www.alibaba.com", "aff_unique_abbr": "UTS;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Australia;China" }, { "title": "Text Promptable Surgical Instrument Segmentation with Vision-Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71267", "id": "YwgA3avHrP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5af741d487c5f0b08bfe56e11d1883e4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=YwgA3avHrP", "openreview": "https://openreview.net/forum?id=YwgA3avHrP", "poster": "/media/PosterPDFs/NeurIPS%202023/71267.png?t=1702116491.6128242", "slides": "https://nips.cc/virtual/2023/poster/71267", "video": "https://nips.cc/virtual/2023/poster/71267", "author_site": "Zijian Zhou, Oluwatosin Alabi, Meng Wei, Tom Vercauteren, Miaojing Shi", "tldr": "", "abstract": "In this paper, we propose a novel text promptable surgical instrument segmentation approach to overcome challenges associated with diversity and differentiation of surgical instruments in minimally invasive surgeries. We redefine the task as text promptable, thereby enabling a more nuanced comprehension of surgical instruments and adaptability to new instrument types. Inspired by recent advancements in vision-language models, we leverage pretrained image and text encoders as our model backbone and design a text promptable mask decoder consisting of attention- and convolution-based prompting schemes for surgical instrument segmentation prediction. Our model leverages multiple text prompts for each surgical instrument through a new mixture of prompts mechanism, resulting in enhanced segmentation performance. Additionally, we introduce a hard instrument area reinforcement module to improve image feature comprehension and segmentation precision. Extensive experiments on several surgical instrument segmentation datasets demonstrate our model's superior performance and promising generalization capability. To our knowledge, this is the first implementation of a promptable approach to surgical instrument segmentation, offering significant potential for practical application in the field of robotic-assisted surgery. Code is available at https://github.com/franciszzj/TP-SIS.", "keywords": "Surgical Instrument Segmentation;Vision Language Models;Text Promptable Segmentation", "primary_area": "", "supplementary_material": "/attachment/d3a3376f1d88a52f281da9646c70b19b19ba5975.pdf", "author": "Zijian Zhou;Oluwatosin Alabi;Meng Wei;Tom Vercauteren;Miaojing Shi", "authorids": "~Zijian_Zhou2;~Oluwatosin_Alabi1;~Meng_Wei9;~Tom_Vercauteren1;~Miaojing_Shi1", "gender": "M;M;M;F;M", "homepage": "https://sites.google.com/view/zijian-zhou/home;https://cai4cai.ml/author/oluwatosin-alabi/;https://sites.google.com/site/miaojingshi;https://cai4cai.ml/author/meng-wei/;https://cai4cai.ml", "dblp": "73/1606-2;;;;99/4387", "google_scholar": "https://scholar.google.com.hk/citations?user=cQAVobgAAAAJ;im7SKIkAAAAJ;aj2XHWoAAAAJ;LRFr7zAAAAAJ;zduEJkcAAAAJ", "orcid": ";0000-0002-3716-3503;;;0000-0003-1794-0456", "linkedin": ";oluwatosin-olatunde-alabi/;;meng-wei-673803161?trk=profile_share_wechat&from=singlemessage&isappinstalled=0;tomvercauteren", "or_profile": "~Zijian_Zhou2;~Oluwatosin_Alabi1;~Miaojing_Shi1;~Meng_Wei2;~Tom_Kamiel_Vercauteren1", "aff": "King's College London, University of London;King's College London, University of London;Tongji University;King's College London, University of London;University College London (UCL)", "aff_domain": "kcl.ac.uk;kcl.ac.uk;tongji.edu.cn;kcl.ac.uk;ucl.ac.uk", "position": "PhD student;PhD student;Full Professor;PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhou2023text,\ntitle={Text Promptable Surgical Instrument Segmentation with Vision-Language Models},\nauthor={Zijian Zhou and Oluwatosin Alabi and Meng Wei and Tom Vercauteren and Miaojing Shi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=YwgA3avHrP}\n}", "github": "", "project": "", "reviewers": "fFdZ;9oei;wnjR;Rt9K;NUuZ", "pdf_size": 2489626, "rating": "5;5;6;7;7", "confidence": "4;4;4;4;4", "soundness": "3;2;3;2;3", "novelty": "2;2;3;2;3", "presentation": "3;3;2;2;3", "wc_summary": "132;65;37;22;38", "wc_strengths": "58;19;136;47;133", "wc_weaknesses": "147;52;88;40;67", "wc_questions": "104;35;122;49;13", "wc_limitations": "62;1;55;28;12", "wc_review": "503;172;438;186;263", "wc_reply_reviewers": "266;84;88;49;184", "wc_reply_authors": "804;22;101;31;21", "reply_reviewers": "2;1;1;2;1", "reply_authors": "3;2;2;3;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 58.8, 39.13770560469788 ], "wc_strengths_avg": [ 78.6, 47.39029436498575 ], "wc_weaknesses_avg": [ 78.8, 37.679702758912526 ], "wc_questions_avg": [ 64.6, 41.543230495473026 ], "wc_limitations_avg": [ 31.6, 23.686282950264694 ], "wc_review_avg": [ 312.4, 134.33778321827407 ], "wc_reply_reviewers_avg": [ 134.2, 79.70545778050584 ], "wc_reply_authors_avg": [ 195.8, 305.55353049834 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9864259841355305332&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": "kcl.ac.uk;kcl.ac.uk;tongji.edu.cn;kcl.ac.uk;ucl.ac.uk", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "King's College London;Tongji University;University College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kcl.ac.uk;https://www.tongji.edu.cn;https://www.ucl.ac.uk", "aff_unique_abbr": "KCL;Tongji;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United Kingdom;China" }, { "title": "Compositional Policy Learning in Stochastic Control Systems with Formal Guarantees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71266", "id": "Yx8Sw2H5Q7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/95827e011b9e899f189a01fe2f4ef316-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Yx8Sw2H5Q7", "openreview": "https://openreview.net/forum?id=Yx8Sw2H5Q7", "poster": "/media/PosterPDFs/NeurIPS%202023/71266.png?t=1702078759.39853", "slides": "https://nips.cc/virtual/2023/poster/71266", "video": "https://nips.cc/virtual/2023/poster/71266", "author_site": "\u0110or\u0111e \u017dikeli\u0107, \u0110or\u0111e \u017dikeli\u0107, Mathias Lechner, Abhinav Verma, Krishnendu Chatterjee, Thomas Henzinger", "tldr": "", "abstract": "Reinforcement learning has shown promising results in learning neural network policies for complicated control tasks. However, the lack of formal guarantees about the behavior of such policies remains an impediment to their deployment. We propose a novel method for learning a composition of neural network policies in stochastic environments, along with a formal certificate which guarantees that a specification over the policy's behavior is satisfied with the desired probability. Unlike prior work on verifiable RL, our approach leverages the compositional nature of logical specifications provided in SpectRL, to learn over graphs of probabilistic reach-avoid specifications. The formal guarantees are provided by learning neural network policies together with reach-avoid supermartingales (RASM) for the graph\u2019s sub-tasks and then composing them into a global policy. We also derive a tighter lower bound compared to previous work on the probability of reach-avoidance implied by a RASM, which is required to find a compositional policy with an acceptable probabilistic threshold for complex tasks with multiple edge policies. We implement a prototype of our approach and evaluate it on a Stochastic Nine Rooms environment.", "keywords": "Verification;Compositional learning", "primary_area": "", "supplementary_material": "/attachment/a9882a67ffaf2cf103fc29657c129d50b7e68712.zip", "author": "\u0110or\u0111e \u017dikeli\u0107;Mathias Lechner;Abhinav Verma;Krishnendu Chatterjee;Thomas A Henzinger", "authorids": "~\u0110or\u0111e_\u017dikeli\u01071;~Mathias_Lechner1;~Abhinav_Verma1;~Krishnendu_Chatterjee1;~Thomas_A_Henzinger1", "gender": "M;Unspecified;M;M;M", "homepage": "https://djordjezikelic.github.io/;https://mlech26l.github.io/pages/;https://averma8053.github.io;http://pub.ist.ac.at/~kchatterjee/;https://pub.ist.ac.at/~tah", "dblp": "150/8968.html;209/9862;01/1084-1;92/5602;h/ThomasAHenzinger", "google_scholar": "D3CQXf0AAAAJ;https://scholar.google.at/citations?hl=en;jM1HeCIAAAAJ;https://scholar.google.com.tw/citations?user=1kaW8bwAAAAJ;jpgplxUAAAAJ", "orcid": ";;0000-0002-9820-8285;;0000-0002-2985-7724", "linkedin": ";;;;", "or_profile": "~\u0110or\u0111e_\u017dikeli\u01071;~Mathias_Lechner1;~Abhinav_Verma1;~Krishnendu_Chatterjee1;~Thomas_A_Henzinger1", "aff": "Institute of Science and Technology Austria;Massachusetts Institute of Technology;Pennsylvania State University;Institute of Science and Technology Austria;Institute of Science and Technology Austria", "aff_domain": "ist.ac.at;mit.edu;psu.edu;ist.ac.at;ist.ac.at", "position": "PhD student;Postdoc;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\n{\\v{z}}ikeli{\\'c}2023compositional,\ntitle={Compositional Policy Learning in Stochastic Control Systems with Formal Guarantees},\nauthor={{\\DJ}or{\\dj}e {\\v{Z}}ikeli{\\'c} and Mathias Lechner and Abhinav Verma and Krishnendu Chatterjee and Thomas A Henzinger},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Yx8Sw2H5Q7}\n}", "github": "", "project": "", "reviewers": "kcWZ;Dssn;6cWz;E1c8", "pdf_size": 562008, "rating": "4;5;6;7", "confidence": "4;4;5;4", "soundness": "3;2;4;4", "novelty": "2;2;2;3", "presentation": "3;3;4;4", "wc_summary": "105;116;79;56", "wc_strengths": "123;35;64;75", "wc_weaknesses": "179;100;120;105", "wc_questions": "34;136;85;23", "wc_limitations": "30;15;7;25", "wc_review": "471;402;355;284", "wc_reply_reviewers": "758;0;23;11", "wc_reply_authors": "1108;0;0;0", "reply_reviewers": "7;0;1;1", "reply_authors": "6;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 89.0, 23.313086453749534 ], "wc_strengths_avg": [ 74.25, 31.712576369636068 ], "wc_weaknesses_avg": [ 126.0, 31.47220996371243 ], "wc_questions_avg": [ 69.5, 44.95831402532795 ], "wc_limitations_avg": [ 19.25, 8.898735865278843 ], "wc_review_avg": [ 378.0, 68.17257513105984 ], "wc_reply_reviewers_avg": [ 198.0, 323.41845958448323 ], "wc_reply_authors_avg": [ 277.0, 479.778073696579 ], "reply_reviewers_avg": [ 2.25, 2.7726341266023544 ], "reply_authors_avg": [ 2.25, 2.165063509461097 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15096053424307727006&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "ist.ac.at;mit.edu;psu.edu;ist.ac.at;ist.ac.at", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Institute of Science and Technology Austria;Massachusetts Institute of Technology;Pennsylvania State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ist.ac.at;https://web.mit.edu;https://www.psu.edu", "aff_unique_abbr": "IST Austria;MIT;PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "Austria;United States" }, { "title": "A Unified Framework for Rank-based Loss Minimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71265", "id": "Z16jo3d6OD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a10946e1f46e1ffc0daf37cb2abfdcad-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Z16jo3d6OD", "openreview": "https://openreview.net/forum?id=Z16jo3d6OD", "poster": "/media/PosterPDFs/NeurIPS%202023/71265.png?t=1701611800.422215", "slides": "https://nips.cc/virtual/2023/poster/71265", "video": "https://nips.cc/virtual/2023/poster/71265", "author_site": "Rufeng Xiao, Yuze Ge, Rujun Jiang, Yifan Yan", "tldr": "", "abstract": "The empirical loss, commonly referred to as the average loss, is extensively utilized for training machine learning models. However, in order to address the diverse performance requirements of machine learning models, the use of the rank-based loss is prevalent, replacing the empirical loss in many cases. The rank-based loss comprises a weighted sum of sorted individual losses, encompassing both convex losses like the spectral risk, which includes the empirical risk and conditional value-at-risk, and nonconvex losses such as the human-aligned risk and the sum of the ranked range loss. In this paper, we introduce a unified framework for the optimization of the rank-based loss through the utilization of a proximal alternating direction method of multipliers. We demonstrate the convergence and convergence rate of the proposed algorithm under mild conditions. Experiments conducted on synthetic and real datasets illustrate the effectiveness and efficiency of the proposed algorithm.", "keywords": "rank-based loss;ADMM;nonconvex nonsmooth optimization;conditional Value-at-Risk;human-aligned risk;ranked range loss", "primary_area": "", "supplementary_material": "/attachment/5ecf3b38898ab6f5545ceb14fc28dd7ecac0f267.pdf", "author": "Rufeng Xiao;Yuze Ge;Rujun Jiang;Yifan Yan", "authorids": "~Rufeng_Xiao1;~Yuze_Ge1;~Rujun_Jiang1;yanyf21@m.fudan.edu.cn", "gender": "M;M;M;", "homepage": "https://github.com/RufengXiao;https://sxfxuz.github.io/;https://rjjiang.github.io/;", "dblp": "369/7081;;187/7827;", "google_scholar": ";;UxH6ELwAAAAJ;", "orcid": "0009-0001-9474-7767;;0000-0002-6610-6778;", "linkedin": ";;;", "or_profile": "~Rufeng_Xiao1;~Yuze_Ge1;~Rujun_Jiang1;yanyf21@m.fudan.edu.cn", "aff": "Fudan University;Fudan University;Fudan University;", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;", "position": "MS student;Undergrad student;Associate Professor;", "bibtex": "@inproceedings{\nxiao2023a,\ntitle={A Unified Framework for Rank-based Loss Minimization},\nauthor={Rufeng Xiao and Yuze Ge and Rujun Jiang and Yifan Yan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Z16jo3d6OD}\n}", "github": "", "project": "", "reviewers": "kXbu;GkCz;Gyag;G6DJ", "pdf_size": 992747, "rating": "4;5;6;7", "confidence": "2;2;1;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "72;56;45;98", "wc_strengths": "5;114;25;110", "wc_weaknesses": "15;453;19;28", "wc_questions": "40;8;10;57", "wc_limitations": "1;1;7;9", "wc_review": "133;632;106;302", "wc_reply_reviewers": "17;0;31;42", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 2.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 67.75, 19.929563467371782 ], "wc_strengths_avg": [ 63.5, 49.03315205042401 ], "wc_weaknesses_avg": [ 128.75, 187.26501942434416 ], "wc_questions_avg": [ 28.75, 20.656415468323637 ], "wc_limitations_avg": [ 4.5, 3.570714214271425 ], "wc_review_avg": [ 293.25, 209.50581734166715 ], "wc_reply_reviewers_avg": [ 22.5, 15.724185193516387 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3162277660168379, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5313979332565637475&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Path Regularization: A Convexity and Sparsity Inducing Regularization for Parallel ReLU Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71264", "id": "Z1Aj59LoZD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bbf38332580c1bed99fa99bc9ee53229-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Z1Aj59LoZD", "openreview": "https://openreview.net/forum?id=Z1Aj59LoZD", "poster": "/media/PosterPDFs/NeurIPS%202023/71264.png?t=1697473372.6198058", "slides": "https://nips.cc/virtual/2023/poster/71264", "video": "https://nips.cc/virtual/2023/poster/71264", "author_site": "Tolga Ergen, Mert Pilanci", "tldr": "", "abstract": "Understanding the fundamental principles behind the success of deep neural networks is one of the most important open questions in the current literature. To this end, we study the training problem of deep neural networks and introduce an analytic approach to unveil hidden convexity in the optimization landscape. We consider a deep parallel ReLU network architecture, which also includes standard deep networks and ResNets as its special cases. We then show that pathwise regularized training problems can be represented as an exact convex optimization problem. We further prove that the equivalent convex problem is regularized via a group sparsity inducing norm. Thus, a path regularized parallel ReLU network can be viewed as a parsimonious convex model in high dimensions. More importantly, since the original training problem may not be trainable in polynomial-time, we propose an approximate algorithm with a fully polynomial-time complexity in all data dimensions. Then, we prove strong global optimality guarantees for this algorithm. We also provide experiments corroborating our theory.", "keywords": "Convex optimization;deep learning theory;path norm;group sparsity;polynomial-time training;ReLU networks;parallel architectures;global optimality;computational complexity", "primary_area": "", "supplementary_material": "", "author": "Tolga Ergen;Mert Pilanci", "authorids": "~Tolga_Ergen1;~Mert_Pilanci3", "gender": "M;M", "homepage": "https://tolgaergen.github.io/;https://stanford.edu/~pilanci/", "dblp": "202/7477.html;45/8056", "google_scholar": "https://scholar.google.com.tr/citations?user=T1pWaCsAAAAJ;aSAS-aAAAAAJ", "orcid": "0000-0003-4806-0224;", "linkedin": ";mert-pilanci-ba615743/", "or_profile": "~Tolga_Ergen1;~Mert_Pilanci3", "aff": "Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nergen2023path,\ntitle={Path Regularization: A Convexity and Sparsity Inducing Regularization for Parallel Re{LU} Networks},\nauthor={Tolga Ergen and Mert Pilanci},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Z1Aj59LoZD}\n}", "github": "", "project": "", "reviewers": "kQ6x;SZac;EXww;24Tr;B3i8", "pdf_size": 1257441, "rating": "6;6;7;7;7", "confidence": "3;3;2;3;3", "soundness": "3;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "41;96;147;69;96", "wc_strengths": "27;25;26;89;41", "wc_weaknesses": "21;53;99;400;92", "wc_questions": "2;97;10;31;96", "wc_limitations": "2;1;1;11;1", "wc_review": "93;272;283;600;326", "wc_reply_reviewers": "0;0;9;146;17", "wc_reply_authors": "0;0;0;110;0", "reply_reviewers": "0;0;1;2;1", "reply_authors": "1;1;1;3;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.8, 35.107833883622035 ], "wc_strengths_avg": [ 41.6, 24.40983408382777 ], "wc_weaknesses_avg": [ 133.0, 136.4331338055386 ], "wc_questions_avg": [ 47.2, 41.35408081435253 ], "wc_limitations_avg": [ 3.2, 3.919183588453085 ], "wc_review_avg": [ 314.8, 163.4030599468688 ], "wc_reply_reviewers_avg": [ 34.4, 56.15905982119003 ], "wc_reply_authors_avg": [ 22.0, 44.0 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17382645761159288012&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "stanford.edu;stanford.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Revisiting Logistic-softmax Likelihood in Bayesian Meta-Learning for Few-Shot Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71263", "id": "Z1W0u3Cr74", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6cdb2cbb2083477cca5243843d6dad06-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Z1W0u3Cr74", "openreview": "https://openreview.net/forum?id=Z1W0u3Cr74", "poster": "/media/PosterPDFs/NeurIPS%202023/71263.png?t=1702039806.3781297", "slides": "https://nips.cc/virtual/2023/poster/71263", "video": "https://nips.cc/virtual/2023/poster/71263", "author_site": "Tianjun Ke, Haoqun Cao, Zenan Ling, Feng Zhou", "tldr": "", "abstract": "Meta-learning has demonstrated promising results in few-shot classification (FSC) by learning to solve new problems using prior knowledge. Bayesian methods are effective at characterizing uncertainty in FSC, which is crucial in high-risk fields. In this context, the logistic-softmax likelihood is often employed as an alternative to the softmax likelihood in multi-class Gaussian process classification due to its conditional conjugacy property. However, the theoretical property of logistic-softmax is not clear and previous research indicated that the inherent uncertainty of logistic-softmax leads to suboptimal performance. To mitigate these issues, we revisit and redesign the logistic-softmax likelihood, which enables control of the \\textit{a priori} confidence level through a temperature parameter. Furthermore, we theoretically and empirically show that softmax can be viewed as a special case of logistic-softmax and logistic-softmax induces a larger family of data distribution than softmax. Utilizing modified logistic-softmax, we integrate the data augmentation technique into the deep kernel based Gaussian process meta-learning framework, and derive an analytical mean-field approximation for task-specific updates. Our approach yields well-calibrated uncertainty estimates and achieves comparable or superior results on standard benchmark datasets. Code is publicly available at \\url{https://github.com/keanson/revisit-logistic-softmax}.", "keywords": "Few-shot learning;Gaussian processes;Conditional conjugate", "primary_area": "", "supplementary_material": "/attachment/63eb78f8f756319b6ce91d6aab08e82e689be9ad.pdf", "author": "Tianjun Ke;Haoqun Cao;Zenan Ling;Feng Zhou", "authorids": "~Tianjun_Ke1;~Haoqun_Cao1;~Zenan_Ling1;~Feng_Zhou9", "gender": ";M;M;", "homepage": "https://keanson.github.io/;https://kencao2007.github.io/;https://scholar.google.com/citations?user=BabePTkAAAAJ&hl=zh-CN;", "dblp": ";;183/7798;", "google_scholar": "https://scholar.google.com/citations?hl=en;;BabePTkAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Tianjun_Ke1;~Haoqun_Cao1;~Zenan_Ling1;~Feng_Zhou9", "aff": "School of Statistics, Renmin University of China;Renmin University of China;Huazhong University of Science and Technology;", "aff_domain": "stat.ruc.edu.cn;ruc.edu.cn;hust.edu.cn;", "position": "Undergrad student;Undergrad student;Researcher;", "bibtex": "@inproceedings{\nke2023revisiting,\ntitle={Revisiting Logistic-softmax Likelihood in Bayesian Meta-Learning for Few-Shot Classification},\nauthor={Tianjun Ke and Haoqun Cao and Zenan Ling and Feng Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Z1W0u3Cr74}\n}", "github": "", "project": "", "reviewers": "iEEK;1tNK;JdA9;L2f9", "pdf_size": 0, "rating": "5;6;6;6", "confidence": "3;3;3;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "90;111;128;118", "wc_strengths": "41;59;94;20", "wc_weaknesses": "87;164;72;33", "wc_questions": "41;76;20;91", "wc_limitations": "21;36;40;1", "wc_review": "280;446;354;263", "wc_reply_reviewers": "8;37;17;16", "wc_reply_authors": "20;265;21;21", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 111.75, 13.935117509371782 ], "wc_strengths_avg": [ 53.5, 27.15234796477093 ], "wc_weaknesses_avg": [ 89.0, 47.576254581461114 ], "wc_questions_avg": [ 57.0, 28.026772914483036 ], "wc_limitations_avg": [ 24.5, 15.305227865013967 ], "wc_review_avg": [ 335.75, 72.2647043860279 ], "wc_reply_reviewers_avg": [ 19.5, 10.688779163215974 ], "wc_reply_authors_avg": [ 81.75, 105.80022447991308 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12832098008728786419&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "stat.ruc.edu.cn;ruc.edu.cn;hust.edu.cn;", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Renmin University of China;Huazhong University of Science and Technology", "aff_unique_dep": "School of Statistics;", "aff_unique_url": "http://www.ruc.edu.cn;http://www.hust.edu.cn", "aff_unique_abbr": "RUC;HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Optimal Extragradient-Based Algorithms for Stochastic Variational Inequalities with Separable Structure", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71262", "id": "Z28nPtAVxx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/69ce18ad9f53f28e8e7ac1649ae02337-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Z28nPtAVxx", "openreview": "https://openreview.net/forum?id=Z28nPtAVxx", "poster": "/media/PosterPDFs/NeurIPS%202023/71262.png?t=1702498086.5428133", "slides": "https://nips.cc/virtual/2023/poster/71262", "video": "https://nips.cc/virtual/2023/poster/71262", "author_site": "Angela Yuan, Chris Junchi Li, Gauthier Gidel, Michael Jordan, Quanquan Gu, Simon Du", "tldr": "", "abstract": "We consider the problem of solving stochastic monotone variational inequalities with a separable structure using a stochastic first-order oracle. Building on standard extragradient for variational inequalities we propose a novel algorithm---stochastic \\emph{accelerated gradient-extragradient} (AG-EG)---for strongly monotone variational inequalities (VIs). Our approach combines the strengths of extragradient and Nesterov acceleration. By showing that its iterates remain in a bounded domain and applying scheduled restarting, we prove that AG-EG has an optimal convergence rate for strongly monotone VIs. Furthermore, when specializing to the particular case of bilinearly coupled strongly-convex-strongly-concave saddle-point problems, including bilinear games, our algorithm achieves fine-grained convergence rates that match the respective lower bounds, with the stochasticity being characterized by an additive statistical error term that is optimal up to a constant prefactor.", "keywords": "Stochastic variational inequalities;convex-concave separable saddle-point optimization;extragradient-based algorithm;Nesterov's acceleration;scheduled restarting;scaling reduction", "primary_area": "", "supplementary_material": "/attachment/3804bb9560b0b80e226ab7e3e153c4aac62011b8.pdf", "author": "Angela Yuan;Chris Junchi Li;Gauthier Gidel;Michael Jordan;Quanquan Gu;Simon Shaolei Du", "authorids": "~Angela_Yuan1;~Chris_Junchi_Li1;~Gauthier_Gidel1;~Michael_Jordan1;~Quanquan_Gu1;~Simon_Shaolei_Du1", "gender": ";M;M;M;M;M", "homepage": ";;https://gauthiergidel.github.io/;http://www.cs.berkeley.edu/~jordan/;http://web.cs.ucla.edu/~qgu/;http://simonshaoleidu.com", "dblp": ";;188/6326;j/MichaelIJordan;50/4597;176/5602", "google_scholar": ";cHN3PVYAAAAJ;https://scholar.google.fr/citations?user=bDrXQPUAAAAJ;https://scholar.google.com.tw/citations?user=yxUduqMAAAAJ;GU9HgNAAAAAJ;OttawxUAAAAJ", "orcid": ";;;0000-0001-8935-817X;;", "linkedin": ";;;;;", "or_profile": "~Angela_Yuan1;~Chris_Junchi_Li1;~Gauthier_Gidel1;~Michael_Jordan1;~Quanquan_Gu1;~Simon_Shaolei_Du1", "aff": ";University of California, Berkeley;Mila - Quebec Artificial Intelligence Institute;University of California, Berkeley;University of California, Los Angeles;Meta Facebook", "aff_domain": ";berkeley.edu;mila.quebec;berkeley.edu;cs.ucla.edu;fb.com", "position": ";Visiting Scientist;Assistant Professor;Full Professor;Associate Professor;Visiting Professor", "bibtex": "@inproceedings{\nyuan2023optimal,\ntitle={Optimal Extragradient-Based Algorithms for Stochastic Variational Inequalities with Separable Structure},\nauthor={Angela Yuan and Chris Junchi Li and Gauthier Gidel and Michael Jordan and Quanquan Gu and Simon Shaolei Du},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Z28nPtAVxx}\n}", "github": "", "project": "", "reviewers": "KTFh;oFAu;qR4K;iB8j", "pdf_size": 869452, "rating": "6;6;6;6", "confidence": "4;2;3;3", "soundness": "4;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "33;28;91;108", "wc_strengths": "18;30;68;30", "wc_weaknesses": "171;69;49;39", "wc_questions": "2;19;84;71", "wc_limitations": "2;11;9;1", "wc_review": "226;157;301;249", "wc_reply_reviewers": "12;24;13;16", "wc_reply_authors": "15;24;18;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 65.0, 35.06422678457348 ], "wc_strengths_avg": [ 36.5, 18.83480820183736 ], "wc_weaknesses_avg": [ 82.0, 52.507142371300304 ], "wc_questions_avg": [ 44.0, 34.343849522148794 ], "wc_limitations_avg": [ 5.75, 4.322904116447646 ], "wc_review_avg": [ 233.25, 51.73188088596818 ], "wc_reply_reviewers_avg": [ 16.25, 4.710360920354193 ], "wc_reply_authors_avg": [ 14.25, 8.842369591913696 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1846494655483317632&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";berkeley.edu;mila.quebec;berkeley.edu;cs.ucla.edu;fb.com", "author_num": 6, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "University of California, Berkeley;Quebec Artificial Intelligence Institute;University of California, Los Angeles;Meta", "aff_unique_dep": ";Artificial Intelligence;;Meta Platforms, Inc.", "aff_unique_url": "https://www.berkeley.edu;https://mila.quebec;https://www.ucla.edu;https://meta.com", "aff_unique_abbr": "UC Berkeley;Mila;UCLA;Meta", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Berkeley;;Los Angeles", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Meta-Learning with Neural Bandit Scheduler", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71261", "id": "Z2L7F0nekb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c9e6ac15e689e06139d7b39e1667b165-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Z2L7F0nekb", "openreview": "https://openreview.net/forum?id=Z2L7F0nekb", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71261", "video": "https://nips.cc/virtual/2023/poster/71261", "author_site": "Yunzhe Qi, Yikun Ban, Tianxin Wei, Jiaru Zou, Huaxiu Yao, Jingrui He", "tldr": "", "abstract": "Meta-learning has been proven an effective learning paradigm for training machine learning models with good generalization ability. Apart from the common practice of uniformly sampling the meta-training tasks, existing methods working on task scheduling strategies are mainly based on pre-defined sampling protocols or the assumed task-model correlations, and greedily make scheduling decisions, which can lead to sub-optimal performance bottlenecks of the meta-model. In this paper, we propose a novel task scheduling framework under Contextual Bandits settings, named BASS, which directly optimizes the task scheduling strategy based on the status of the meta-model. By balancing the exploitation and exploration in meta-learning task scheduling, BASS can help tackle the challenge of limited knowledge about the task distribution during the early stage of meta-training, while simultaneously exploring potential benefits for forthcoming meta-training iterations through an adaptive exploration strategy. Theoretical analysis and extensive experiments are presented to show the effectiveness of our proposed framework.", "keywords": "Meta Learning;Contextual Bandits", "primary_area": "", "supplementary_material": "/attachment/945aec1cf4085acc4218d0f3979997f89a47dc53.zip", "author": "Yunzhe Qi;Yikun Ban;Tianxin Wei;Jiaru Zou;Huaxiu Yao;Jingrui He", "authorids": "~Yunzhe_Qi1;~Yikun_Ban1;~Tianxin_Wei1;~Jiaru_Zou1;~Huaxiu_Yao1;~Jingrui_He1", "gender": "M;;;M;M;F", "homepage": "https://www.linkedin.com/in/yunzhe-qi-a1409b161/;;https://weitianxin.github.io/;;http://huaxiuyao.mystrikingly.com;https://www.hejingrui.org", "dblp": "259/3914;;277/5800;292/7978;197/1635;34/2685", "google_scholar": "Gt17_A0AAAAJ;;_LU2-kMAAAAJ;GzLTey4AAAAJ;A20BZnQAAAAJ;hXpZynkAAAAJ", "orcid": "0000-0001-5828-7436;;0000-0003-4450-2005;0009-0002-3583-354X;;0000-0002-6429-6272", "linkedin": "yunzhe-qi-a1409b161/;;tianxin-wei-7063a2180/;jiaru-zou-67434a21a/;huaxiuyao/;", "or_profile": "~Yunzhe_Qi1;~Yikun_Ban1;~Tianxin_Wei1;~Jiaru_Zou1;~Huaxiu_Yao1;~Jingrui_He1", "aff": "University of Illinois Urbana-Champaign;;University of Illinois, Urbana-Champaign;\tUniversity of Illinois at Urbana-Champaign ;Computer Science Department, Stanford University;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;;uiuc.edu;illinois.edu;cs.stanford.edu;illinois.edu", "position": "PhD student;;PhD student;Undergrad student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nqi2023metalearning,\ntitle={Meta-Learning with Neural Bandit Scheduler},\nauthor={Yunzhe Qi and Yikun Ban and Tianxin Wei and Jiaru Zou and Huaxiu Yao and Jingrui He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Z2L7F0nekb}\n}", "github": "", "project": "", "reviewers": "ua81;nu6A;XGJX;zT5d;Z8mk", "pdf_size": 1831916, "rating": "6;6;6;7;7", "confidence": "4;2;3;3;5", "soundness": "3;3;3;3;3", "novelty": "2;3;2;3;2", "presentation": "3;3;3;3;3", "wc_summary": "10;59;67;93;83", "wc_strengths": "27;159;46;65;154", "wc_weaknesses": "113;142;97;55;140", "wc_questions": "61;121;36;35;119", "wc_limitations": "10;33;8;30;21", "wc_review": "221;514;254;278;517", "wc_reply_reviewers": "66;19;13;46;41", "wc_reply_authors": "477;34;31;36;25", "reply_reviewers": "2;1;1;1;1", "reply_authors": "3;2;2;2;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 62.4, 28.772208813367108 ], "wc_strengths_avg": [ 90.2, 55.47395785411385 ], "wc_weaknesses_avg": [ 109.4, 32.01624587611733 ], "wc_questions_avg": [ 74.4, 38.385413896426854 ], "wc_limitations_avg": [ 20.4, 10.131140113531151 ], "wc_review_avg": [ 356.8, 130.83944359404774 ], "wc_reply_reviewers_avg": [ 37.0, 19.172897537930986 ], "wc_reply_authors_avg": [ 120.6, 178.2387163329 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4803844614152613, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15706632433059998157&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "illinois.edu;;uiuc.edu;illinois.edu;cs.stanford.edu;illinois.edu", "author_num": 6, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Illinois;Stanford University", "aff_unique_dep": ";;Computer Science Department", "aff_unique_url": "https://illinois.edu;https://illinois.edu;https://www.stanford.edu", "aff_unique_abbr": "UIUC;UIUC;Stanford", "aff_campus_unique_index": "0;0;0;1;0", "aff_campus_unique": "Urbana-Champaign;Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Wide Neural Networks as Gaussian Processes: Lessons from Deep Equilibrium Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71260", "id": "Z2he2Y0MoH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ac24656b0b5f543b202f748d62041637-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Z2he2Y0MoH", "openreview": "https://openreview.net/forum?id=Z2he2Y0MoH", "poster": "/media/PosterPDFs/NeurIPS%202023/71260.png?t=1702283000.9535325", "slides": "https://nips.cc/virtual/2023/poster/71260", "video": "https://nips.cc/virtual/2023/poster/71260", "author_site": "Tianxiang Gao, Xiaokai Huo, Hailiang Liu, Hongyang Gao", "tldr": "", "abstract": "Neural networks with wide layers have attracted significant attention due to their equivalence to Gaussian processes, enabling perfect fitting of training data while maintaining generalization performance, known as benign overfitting. However, existing results mainly focus on shallow or finite-depth networks, necessitating a comprehensive analysis of wide neural networks with infinite-depth layers, such as neural ordinary differential equations (ODEs) and deep equilibrium models (DEQs). \n\nIn this paper, we specifically investigate the deep equilibrium model (DEQ), an infinite-depth neural network with shared weight matrices across layers. Our analysis reveals that as the width of DEQ layers approaches infinity, it converges to a Gaussian process, establishing what is known as the Neural Network and Gaussian Process (NNGP) correspondence. Remarkably, this convergence holds even when the limits of depth and width are interchanged, which is not observed in typical infinite-depth Multilayer Perceptron (MLP) networks. Furthermore, we demonstrate that the associated Gaussian vector remains non-degenerate for any pairwise distinct input data, ensuring a strictly positive smallest eigenvalue of the corresponding kernel matrix using the NNGP kernel. These findings serve as fundamental elements for studying the training and generalization of DEQs, laying the groundwork for future research in this area.", "keywords": "Gradient descent;deep equilibrium model;Gaussian processes;kernel methods;NNGP;NTK", "primary_area": "", "supplementary_material": "/attachment/7b677f647902c06e881f84b53e88b83f9d1ff0b4.zip", "author": "Tianxiang Gao;Xiaokai Huo;Hailiang Liu;Hongyang Gao", "authorids": "~Tianxiang_Gao2;~Xiaokai_Huo1;~Hailiang_Liu1;~Hongyang_Gao1", "gender": "M;M;M;M", "homepage": "https://gaotx-cs.github.io/;;https://faculty.sites.iastate.edu/hliu/;https://faculty.sites.iastate.edu/hygao/", "dblp": "118/3814;;;200/7985", "google_scholar": "iNLlIbQAAAAJ;;Wq7IGEIAAAAJ;jGmq0aEAAAAJ", "orcid": ";0000-0001-8131-2310;;0000-0002-9020-9080", "linkedin": ";;;hongyang-gao-74924690/", "or_profile": "~Tianxiang_Gao2;~Xiaokai_Huo1;~Hailiang_Liu1;~Hongyang_Gao1", "aff": "Iowa State University;Iowa State University;Iowa State University;Iowa State University", "aff_domain": "iastate.edu;iastate.edu;iastate.edu;iastate.edu", "position": "PhD student;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ngao2023wide,\ntitle={Wide Neural Networks as Gaussian Processes: Lessons from Deep Equilibrium Models},\nauthor={Tianxiang Gao and Xiaokai Huo and Hailiang Liu and Hongyang Gao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Z2he2Y0MoH}\n}", "github": "", "project": "", "reviewers": "A2iv;2cEL;tXNv;bC2e", "pdf_size": 2187546, "rating": "5;6;6;7", "confidence": "3;2;3;4", "soundness": "3;4;3;3", "novelty": "2;3;2;3", "presentation": "2;4;3;2", "wc_summary": "79;69;93;50", "wc_strengths": "39;68;111;23", "wc_weaknesses": "112;99;268;70", "wc_questions": "189;184;86;74", "wc_limitations": "12;27;102;1", "wc_review": "431;447;660;218", "wc_reply_reviewers": "74;13;7;184", "wc_reply_authors": "68;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 72.75, 15.658464164789597 ], "wc_strengths_avg": [ 60.25, 33.446786093734026 ], "wc_weaknesses_avg": [ 137.25, 77.0044641563072 ], "wc_questions_avg": [ 133.25, 53.447988736714876 ], "wc_limitations_avg": [ 35.5, 39.48733974326455 ], "wc_review_avg": [ 439.0, 156.3729516252731 ], "wc_reply_reviewers_avg": [ 69.5, 71.11434454454319 ], "wc_reply_authors_avg": [ 17.0, 29.444863728670914 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2644496060020756682&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "iastate.edu;iastate.edu;iastate.edu;iastate.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Iowa State University", "aff_unique_dep": "", "aff_unique_url": "https://www.iastate.edu", "aff_unique_abbr": "ISU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Tree-Rings Watermarks: Invisible Fingerprints for Diffusion Images", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71259", "id": "Z57JrmubNl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b54d1757c190ba20dbc4f9e4a2f54149-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Z57JrmubNl", "openreview": "https://openreview.net/forum?id=Z57JrmubNl", "poster": "/media/PosterPDFs/NeurIPS%202023/71259.png?t=1699370145.5634189", "slides": "https://nips.cc/virtual/2023/poster/71259", "video": "https://nips.cc/virtual/2023/poster/71259", "author_site": "Yuxin Wen, John Kirchenbauer, Jonas Geiping, Tom Goldstein", "tldr": "", "abstract": "Watermarking the outputs of generative models is a crucial technique for tracing copyright and preventing potential harm from AI-generated content. In this paper, we introduce a novel technique called Tree-Ring Watermarking that robustly fingerprints diffusion model outputs. Unlike existing methods that perform post-hoc modifications to images after sampling, Tree-Ring Watermarking subtly influences the entire sampling process, resulting in a model fingerprint that is invisible to humans. The watermark embeds a pattern into the initial noise vector used for sampling. These patterns are structured in Fourier space so that they are invariant to convolutions, crops, dilations, flips, and rotations. After image generation, the watermark signal is detected by inverting the diffusion process to retrieve the noise vector, which is then checked for the embedded signal. We demonstrate that this technique can be easily applied to arbitrary diffusion models, including text-conditioned Stable Diffusion, as a plug-in with negligible loss in FID. Our watermark is semantically hidden in the image space and is far more robust than watermarking alternatives that are currently deployed.", "keywords": "Diffusion Model;Watermark;Privacy and Security", "primary_area": "", "supplementary_material": "/attachment/814e990b57b412180e6b41291e3caf6a16713300.zip", "author": "Yuxin Wen;John Kirchenbauer;Jonas Geiping;Tom Goldstein", "authorids": "~Yuxin_Wen2;~John_Kirchenbauer1;~Jonas_Geiping1;~Tom_Goldstein1", "gender": ";M;M;M", "homepage": "https://yuxinwenrick.github.io/;https://jwkirchenbauer.notion.site/;https://jonasgeiping.github.io/;https://www.cs.umd.edu/~tomg/", "dblp": ";321/0678;190/7229;25/8184", "google_scholar": "oUYfjg0AAAAJ;48GJrbsAAAAJ;https://scholar.google.de/citations?user=206vNCEAAAAJ;KmSuVtgAAAAJ", "orcid": ";;;", "linkedin": ";johnkirchenbauer/;;", "or_profile": "~Yuxin_Wen2;~John_Kirchenbauer1;~Jonas_Geiping1;~Tom_Goldstein1", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;umd.edu;umd.edu", "position": "PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nwen2023treerings,\ntitle={Tree-Rings Watermarks: Invisible Fingerprints for Diffusion Images},\nauthor={Yuxin Wen and John Kirchenbauer and Jonas Geiping and Tom Goldstein},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Z57JrmubNl}\n}", "github": "", "project": "", "reviewers": "KNue;9hBC;CpUq;ZCXQ", "pdf_size": 14929285, "rating": "4;4;6;8", "confidence": "5;5;4;4", "soundness": "3;3;3;3", "novelty": "4;2;2;3", "presentation": "2;3;3;3", "wc_summary": "40;44;104;101", "wc_strengths": "17;15;22;106", "wc_weaknesses": "614;186;230;442", "wc_questions": "37;44;2;190", "wc_limitations": "105;4;13;1", "wc_review": "813;293;371;840", "wc_reply_reviewers": "421;45;102;136", "wc_reply_authors": "858;315;427;136", "reply_reviewers": "2;1;1;2", "reply_authors": "4;2;3;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.25, 30.30160886817728 ], "wc_strengths_avg": [ 40.0, 38.19031290785662 ], "wc_weaknesses_avg": [ 368.0, 171.87204542915057 ], "wc_questions_avg": [ 68.25, 72.07071180444939 ], "wc_limitations_avg": [ 30.75, 43.09509832916036 ], "wc_review_avg": [ 579.25, 248.96623767089383 ], "wc_reply_reviewers_avg": [ 176.0, 145.13958798343063 ], "wc_reply_authors_avg": [ 434.0, 265.890014855767 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7945008072480030839&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "umd.edu;umd.edu;umd.edu;umd.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Topology-Aware Uncertainty for Image Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71258", "id": "Z6eexoCy7W", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/19ded4cfc36a7feb7fce975393d378fd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Z6eexoCy7W", "openreview": "https://openreview.net/forum?id=Z6eexoCy7W", "poster": "/media/PosterPDFs/NeurIPS%202023/71258.png?t=1701816690.0520782", "slides": "https://nips.cc/virtual/2023/poster/71258", "video": "https://nips.cc/virtual/2023/poster/71258", "author_site": "Saumya Gupta, Yikai Zhang, Xiaoling Hu, Prateek Prasanna, Chao Chen", "tldr": "", "abstract": "Segmentation of curvilinear structures such as vasculature and road networks is challenging due to relatively weak signals and complex geometry/topology. To facilitate and accelerate large scale annotation, one has to adopt semi-automatic approaches such as proofreading by experts. In this work, we focus on uncertainty estimation for such tasks, so that highly uncertain, and thus error-prone structures can be identified for human annotators to verify. Unlike most existing works, which provide pixel-wise uncertainty maps, we stipulate it is crucial to estimate uncertainty in the units of topological structures, e.g., small pieces of connections and branches. To achieve this, we leverage tools from topological data analysis, specifically discrete Morse theory (DMT), to first capture the structures, and then reason about their uncertainties. To model the uncertainty, we (1) propose a joint prediction model that estimates the uncertainty of a structure while taking the neighboring structures into consideration (inter-structural uncertainty); (2) propose a novel Probabilistic DMT to model the inherent uncertainty within each structure (intra-structural uncertainty) by sampling its representations via a perturb-and-walk scheme. On various 2D and 3D datasets, our method produces better structure-wise uncertainty maps compared to existing works. Code available at: https://github.com/Saumya-Gupta-26/struct-uncertainty", "keywords": "Topological Representation;Discrete Morse Theory;Structural Uncertainty;Image Segmentation", "primary_area": "", "supplementary_material": "", "author": "Saumya Gupta;Yikai Zhang;Xiaoling Hu;Prateek Prasanna;Chao Chen", "authorids": "~Saumya_Gupta1;~Yikai_Zhang1;~Xiaoling_Hu1;~Prateek_Prasanna3;~Chao_Chen1", "gender": "F;;M;M;M", "homepage": "https://saumya-gupta-26.github.io/;;https://huxiaoling.github.io/;https://you.stonybrook.edu/imaginelab/;https://chaochen.github.io/", "dblp": "141/5238;;59/11113-2;133/6611;66/3019-12", "google_scholar": "https://scholar.google.com/citations?hl=en;;6MfwhCAAAAAJ;uyA1Q18AAAAJ;J-iIIFAAAAAJ", "orcid": "0000-0003-0933-3445;;;;0000-0003-1703-6483", "linkedin": "saumya-gupta-0b48b416a/;;xiaoling-hu-1329337b/;;", "or_profile": "~Saumya_Gupta1;~Yikai_Zhang1;~Xiaoling_Hu1;~Prateek_Prasanna3;~Chao_Chen1", "aff": "State University of New York at Stony Brook;;Stony Brook University;State University of New York, Stony Brook;State University of New York, Stony Brook", "aff_domain": "stonybrook.edu;;stonybrook.edu;stonybrook.edu;stonybrook.edu", "position": "PhD student;;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ngupta2023topologyaware,\ntitle={Topology-Aware Uncertainty for Image Segmentation},\nauthor={Saumya Gupta and Yikai Zhang and Xiaoling Hu and Prateek Prasanna and Chao Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Z6eexoCy7W}\n}", "github": "", "project": "", "reviewers": "ns8Y;7bwv;qp6i;oww6", "pdf_size": 5969078, "rating": "5;5;5;8", "confidence": "3;5;3;5", "soundness": "3;4;3;4", "novelty": "3;4;2;4", "presentation": "2;4;3;4", "wc_summary": "90;49;94;163", "wc_strengths": "113;16;64;225", "wc_weaknesses": "69;118;243;633", "wc_questions": "15;4;52;272", "wc_limitations": "5;4;30;26", "wc_review": "292;191;483;1319", "wc_reply_reviewers": "6;37;49;66", "wc_reply_authors": "40;53;38;255", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 99.0, 40.93287187579195 ], "wc_strengths_avg": [ 104.5, 77.56448929761608 ], "wc_weaknesses_avg": [ 265.75, 221.32032780564916 ], "wc_questions_avg": [ 85.75, 108.99168546269941 ], "wc_limitations_avg": [ 16.25, 11.840080236214618 ], "wc_review_avg": [ 571.25, 444.2658972957524 ], "wc_reply_reviewers_avg": [ 39.5, 21.914607000811127 ], "wc_reply_authors_avg": [ 96.5, 91.69105736111892 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12820065732147034069&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "stonybrook.edu;;stonybrook.edu;stonybrook.edu;stonybrook.edu", "author_num": 5, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "State University of New York at Stony Brook;Stony Brook University;State University of New York", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stonybrook.edu;https://www.stonybrook.edu;https://www.stonybrook.edu", "aff_unique_abbr": "SUNY Stony Brook;SBU;SUNY Stony Brook", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stony Brook;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Puzzlefusion: Unleashing the Power of Diffusion Models for Spatial Puzzle Solving", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71257", "id": "Z764QxwETf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1e70ac91ad26ba5b24cf11b12a1f90fe-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Z764QxwETf", "openreview": "https://openreview.net/forum?id=Z764QxwETf", "poster": "/media/PosterPDFs/NeurIPS%202023/71257.png?t=1699581731.3370664", "slides": "https://nips.cc/virtual/2023/poster/71257", "video": "https://nips.cc/virtual/2023/poster/71257", "author_site": "Sepidehsadat (Sepid) Hossieni, Mohammad Amin Shabani, Saghar Irandoust, Yasutaka Furukawa", "tldr": "", "abstract": "This paper presents an end-to-end neural architecture based on Diffusion Models for spatial puzzle solving, particularly jigsaw puzzle and room arrangement tasks.\nIn the latter task, for instance, the proposed system ``PuzzleFusion'' takes a set of room layouts as polygonal curves in the top-down view and aligns the room layout pieces by estimating their 2D translations and rotations, akin to solving the jigsaw puzzle of room layouts. A surprising discovery of the paper is that the simple use of a Diffusion Model effectively solves these challenging spatial puzzle tasks as a conditional generation process. \nTo enable learning of an end-to-end neural system, the paper introduces new datasets with ground-truth arrangements: 1) 2D Voronoi Jigsaw Dataset, a synthetic one where pieces are generated by voronoi diagram of 2D pointset; and 2) MagicPlan Dataset, a real one from a production pipeline by MagicPlan, where pieces are room layouts constructed by augmented reality App by real-estate consumers.\nThe qualitative and quantitative evaluations demonstrate that the proposed approach outperforms the competing methods by significant margins in all three spatial puzzle tasks. We have provided code and data in https://sepidsh.github.io/puzzlefusion.", "keywords": "Diffusion;Jigsaw;puzzle solving", "primary_area": "", "supplementary_material": "/attachment/5a97d5b33d215e61257ad94997c07a09218c90c5.zip", "author": "Sepidehsadat Hosseini;Mohammad Amin Shabani;Saghar Irandoust;Yasutaka Furukawa", "authorids": "~Sepidehsadat_Hosseini2;~Mohammad_Amin_Shabani1;~Saghar_Irandoust1;~Yasutaka_Furukawa1", "gender": "F;;F;M", "homepage": "https://sepidsh.github.io/;https://aminshabani.github.io;;https://www.cs.sfu.ca/~furukawa", "dblp": ";213/7836;293/7233;37/1720", "google_scholar": ";NC5J2SkAAAAJ;St8JO4wAAAAJ;https://scholar.google.com.tw/citations?user=wCxzFrMAAAAJ", "orcid": ";;;", "linkedin": "sepidehsadat-hosseini-9b0314a0/;aminshabani/;saghar-irandoust/;", "or_profile": "~Sepidehsadat_Hosseini2;~Mohammad_Amin_Shabani1;~Saghar_Irandoust1;~Yasutaka_Furukawa1", "aff": "Borealis AI;Adobe Systems;Borealis AI;Simon Fraser University", "aff_domain": "borealisai.com;adobe.com;borealisai.com;sfu.ca", "position": "Intern;Intern;Research Engineer;Associate Professor", "bibtex": "@inproceedings{\nhosseini2023puzzlefusion,\ntitle={Puzzlefusion: Unleashing the Power of Diffusion Models for Spatial Puzzle Solving},\nauthor={Sepidehsadat Hosseini and Mohammad Amin Shabani and Saghar Irandoust and Yasutaka Furukawa},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Z764QxwETf}\n}", "github": "", "project": "", "reviewers": "zazU;qFVS;AMsE;c71V;TNRQ", "pdf_size": 1268286, "rating": "5;5;6;6;7", "confidence": "4;4;2;2;4", "soundness": "3;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;2;3;4;3", "wc_summary": "73;56;83;95;97", "wc_strengths": "44;109;77;56;38", "wc_weaknesses": "129;206;133;94;53", "wc_questions": "130;161;63;109;56", "wc_limitations": "31;7;16;4;1", "wc_review": "407;539;372;358;245", "wc_reply_reviewers": "140;20;5;17;83", "wc_reply_authors": "480;53;36;37;41", "reply_reviewers": "2;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 80.8, 15.13142425550219 ], "wc_strengths_avg": [ 64.8, 25.810075552001006 ], "wc_weaknesses_avg": [ 123.0, 50.52919947911306 ], "wc_questions_avg": [ 103.8, 39.83666652720832 ], "wc_limitations_avg": [ 11.8, 10.833282051160674 ], "wc_review_avg": [ 384.2, 94.54607342454788 ], "wc_reply_reviewers_avg": [ 53.0, 51.29912279951773 ], "wc_reply_authors_avg": [ 129.4, 175.4042188774261 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.21821789023599233, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17447802949368816935&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 6, "email": "borealisai.com;adobe.com;borealisai.com;sfu.ca", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Borealis AI;Adobe;Simon Fraser University", "aff_unique_dep": ";Adobe Systems Incorporated;", "aff_unique_url": "https://www.borealisai.com;https://www.adobe.com;https://www.sfu.ca", "aff_unique_abbr": "Borealis AI;Adobe;SFU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Canada;United States" }, { "title": "NEO-KD: Knowledge-Distillation-Based Adversarial Training for Robust Multi-Exit Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71256", "id": "Z7Cz9un2Fy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/81e1cdaa570954321d8b06be386cc3d4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Z7Cz9un2Fy", "openreview": "https://openreview.net/forum?id=Z7Cz9un2Fy", "poster": "/media/PosterPDFs/NeurIPS%202023/71256.png?t=1701746944.4085636", "slides": "https://nips.cc/virtual/2023/poster/71256", "video": "https://nips.cc/virtual/2023/poster/71256", "author_site": "Seokil Ham, Jungwuk Park, Dong-Jun Han, Jaekyun Moon", "tldr": "", "abstract": "While multi-exit neural networks are regarded as a promising solution for making efficient inference via early exits, combating adversarial attacks remains a challenging problem. In multi-exit networks, due to the high dependency among different submodels, an adversarial example targeting a specific exit not only degrades the performance of the target exit but also reduces the performance of all other exits concurrently. This makes multi-exit networks highly vulnerable to simple adversarial attacks. In this paper, we propose NEO-KD, a knowledge-distillation-based adversarial training strategy that tackles this fundamental challenge based on two key contributions. NEO-KD first resorts to neighbor knowledge distillation to guide the output of the adversarial examples to tend to the ensemble outputs of neighbor exits of clean data. NEO-KD also employs exit-wise orthogonal knowledge distillation for reducing adversarial transferability across different submodels. The result is a significantly improved robustness against adversarial attacks. Experimental results on various datasets/models show that our method achieves the best adversarial accuracy with reduced computation budgets, compared to the baselines relying on existing adversarial training or knowledge distillation techniques for multi-exit networks.", "keywords": "Multi-exit Neural Network;Adversarial Training;Knowledge Distillation;Adversarial Transferability", "primary_area": "", "supplementary_material": "", "author": "Seokil Ham;Jungwuk Park;Dong-Jun Han;Jaekyun Moon", "authorids": "~Seokil_Ham1;~Jungwuk_Park1;~Dong-Jun_Han1;~Jaekyun_Moon2", "gender": "M;M;M;M", "homepage": ";;https://sites.google.com/view/djhan930/home?authuser=0;http://comstolab.kaist.ac.kr/people.html", "dblp": "359/9843;307/4735;201/0078;78/2744", "google_scholar": "BXlQ2nMAAAAJ;ek4xQy0AAAAJ;https://scholar.google.co.kr/citations?user=-YR-GxUAAAAJ;", "orcid": "0000-0003-4400-847X;;;", "linkedin": ";jungwuk-park-458b25199;;", "or_profile": "~Seokil_Ham1;~Jungwuk_Park1;~Dong-Jun_Han1;~Jaekyun_Moon2", "aff": "KAIST;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;KAIST", "aff_domain": "ee.kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.edu", "position": "MS student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nham2023neokd,\ntitle={{NEO}-{KD}: Knowledge-Distillation-Based Adversarial Training for Robust Multi-Exit Neural Networks},\nauthor={Seokil Ham and Jungwuk Park and Dong-Jun Han and Jaekyun Moon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Z7Cz9un2Fy}\n}", "github": "", "project": "", "reviewers": "grwc;PqqG;KNG2;iEEe", "pdf_size": 1012484, "rating": "4;4;6;6", "confidence": "4;4;5;3", "soundness": "2;3;4;2", "novelty": "2;2;4;2", "presentation": "3;3;4;2", "wc_summary": "183;62;27;218", "wc_strengths": "47;34;46;127", "wc_weaknesses": "33;115;127;31", "wc_questions": "21;63;27;55", "wc_limitations": "11;22;5;48", "wc_review": "295;296;232;479", "wc_reply_reviewers": "138;28;0;82", "wc_reply_authors": "380;168;0;17", "reply_reviewers": "2;1;0;1", "reply_authors": "4;2;1;2", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 122.5, 79.93903927368655 ], "wc_strengths_avg": [ 63.5, 37.016888037759195 ], "wc_weaknesses_avg": [ 76.5, 44.707381940793624 ], "wc_questions_avg": [ 41.5, 17.853571071357123 ], "wc_limitations_avg": [ 21.5, 16.469669092000604 ], "wc_review_avg": [ 325.5, 92.3376954444933 ], "wc_reply_reviewers_avg": [ 62.0, 52.85830114561004 ], "wc_reply_authors_avg": [ 141.25, 152.56699348155223 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10330307285861215347&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ee.kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Characterizing the Impacts of Semi-supervised Learning for Weak Supervision", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71255", "id": "Z8TjsPFBSx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0d6270381e018b3d83eb9be7d0b06036-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Z8TjsPFBSx", "openreview": "https://openreview.net/forum?id=Z8TjsPFBSx", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71255", "video": "https://nips.cc/virtual/2023/poster/71255", "author_site": "Jeffrey Li, Jieyu Zhang, Ludwig Schmidt, Alexander Ratner", "tldr": "", "abstract": "Labeling training data is a critical and expensive step in producing high accuracy ML models, whether training from scratch or fine-tuning. \nTo make labeling more efficient, two major approaches are programmatic weak supervision (WS) and semi-supervised learning (SSL). More recent works have either explicitly or implicitly used techniques at their intersection, but in various complex and ad hoc ways. In this work, we define a simple, modular design space to study the use of SSL techniques for WS more systematically. Surprisingly, we find that fairly simple methods from our design space match the performance of more complex state-of-the-art methods, averaging a 3 p.p. increase in accuracy/F1-score across 8 standard WS benchmarks. Further, we provide practical guidance on when different components are worth their added complexity and training costs. Contrary to current understanding, we find using SSL is not necessary to obtain the best performance on most WS benchmarks but is more effective when: (1) end models are smaller, and (2) WS provides labels for only a small portion of training examples.", "keywords": "Weak Supervision;Semi-supervised Learning;Learning From Limited Labels", "primary_area": "", "supplementary_material": "", "author": "Jeffrey Li;Jieyu Zhang;Ludwig Schmidt;Alexander Ratner", "authorids": "~Jeffrey_Li1;~Jieyu_Zhang1;~Ludwig_Schmidt1;~Alexander_Ratner1", "gender": "M;M;M;M", "homepage": ";https://jieyuz2.github.io/;http://people.csail.mit.edu/ludwigs/;https://ajratner.github.io/", "dblp": ";;141/2720;180/5513", "google_scholar": "JDS2BnIAAAAJ;T_INUHUAAAAJ;SWMKy70AAAAJ;rfwwtFYAAAAJ", "orcid": ";0000-0002-1846-2436;;", "linkedin": "jeffrey-li-a78684111/;jieyu-zhang-3baaa8154/;ludwig-schmidt-87ba3612/;alexander-ratner-038ba239/", "or_profile": "~Jeffrey_Li1;~Jieyu_Zhang1;~Ludwig_Schmidt1;~Alexander_Ratner1", "aff": "Department of Computer Science, University of Washington;University of Washington;Allen Institute for Artificial Intelligence;Department of Computer Science, University of Washington", "aff_domain": "cs.washington.edu;cs.washington.edu;allenai.org;cs.washington.edu", "position": "PhD student;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nli2023characterizing,\ntitle={Characterizing the Impacts of Semi-supervised Learning for Weak Supervision},\nauthor={Jeffrey Li and Jieyu Zhang and Ludwig Schmidt and Alexander Ratner},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Z8TjsPFBSx}\n}", "github": "", "project": "", "reviewers": "xhQz;1c9G;ohhg;vRqg;62vf", "pdf_size": 654071, "rating": "6;6;6;7;8", "confidence": "4;4;3;4;4", "soundness": "4;3;3;4;4", "novelty": "3;3;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "65;96;109;213;194", "wc_strengths": "58;104;112;220;194", "wc_weaknesses": "77;199;190;69;139", "wc_questions": "2;38;73;83;0", "wc_limitations": "4;7;16;3;9", "wc_review": "206;444;500;588;536", "wc_reply_reviewers": "19;73;47;17;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 135.4, 57.72555759799987 ], "wc_strengths_avg": [ 137.6, 60.15180795287869 ], "wc_weaknesses_avg": [ 134.8, 54.510182535008994 ], "wc_questions_avg": [ 39.2, 34.59132839311032 ], "wc_limitations_avg": [ 7.8, 4.621688003316537 ], "wc_review_avg": [ 454.8, 132.96375445962707 ], "wc_reply_reviewers_avg": [ 31.2, 25.771301868551383 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.375, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9213757860051648886&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "cs.washington.edu;cs.washington.edu;allenai.org;cs.washington.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "UW;AI2", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Z8q7GmS89a", "title": "How to Leverage Imperfect Demonstrations in Offline Imitation Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Offline imitation learning (IL) with imperfect data has garnered increasing attention due to the scarcity of expert data in many real-world domains. A fundamental problem in this scenario is how to extract good behaviors from noisy demonstrations. In general, current approaches to the problem build upon state-action similarity to the expert, neglecting the valuable information in (potentially abundant) diverse behaviors that deviate from given expert demonstrations. In this paper, we introduce a simple yet effective data selection method that identifies the positive behavior based on its \"resultant state\", which is a more informative criterion that enables explicit utilization of dynamics information and the extraction of both expert-like and beneficial diverse behaviors. Further, we devise a lightweight constrained behavior cloning algorithm capable of leveraging the expert and selected data correctly. We term our proposed method iLID and evaluate it on a suite of complex and high-dimensional offline IL benchmarks, including MuJoCo and Adroit tasks. The results demonstrate that iLID achieves state-of-the-art performance, significantly outperforming existing methods often by 2-5x while maintaining a comparable runtime to behavior cloning (BC).", "keywords": "offline imitation learning;imperfect demonstration;imitation learning", "primary_area": "", "supplementary_material": "/attachment/0f06755fa6929e36c3d0c292726b1b5405cd063a.zip", "author": "Sheng Yue;Jiani Liu;Yongheng Deng;Ju Ren", "authorids": "~Sheng_Yue1;~Jiani_Liu2;~Yongheng_Deng1;~Ju_Ren1", "gender": "M;F;;", "homepage": "https://shaunyue.github.io;;https://ieeexplore.ieee.org/author/37087122976;", "dblp": "236/3241;;251/2735;", "google_scholar": "n0Gjw_oAAAAJ;https://scholar.google.com.hk/citations?user=cF-pqUIAAAAJ;;", "orcid": "0009-0001-3416-8181;;0000-0003-3010-3812;", "linkedin": ";;;", "or_profile": "~Sheng_Yue1;~Jiani_Liu2;~Yongheng_Deng1;~Ju_Ren1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;", "position": "Postdoc;PhD student;PhD student;", "bibtex": "@misc{\nyue2023how,\ntitle={How to Leverage Imperfect Demonstrations in Offline Imitation Learning},\nauthor={Sheng Yue and Jiani Liu and Yongheng Deng and Ju Ren},\nyear={2023},\nurl={https://openreview.net/forum?id=Z8q7GmS89a}\n}", "github": "", "project": "", "reviewers": "71T4;b1iR;K4fk;Y7ma", "site": "https://openreview.net/forum?id=Z8q7GmS89a", "pdf_size": 2972866, "rating": "3;4;6;7", "confidence": "4;3;3;3", "soundness": "1;2;3;3", "novelty": "2;3;3;3", "presentation": "3;4;3;2", "wc_summary": "92;127;49;85", "wc_strengths": "27;164;167;92", "wc_weaknesses": "46;203;275;133", "wc_questions": "113;71;16;356", "wc_limitations": "13;22;7;105", "wc_review": "291;587;514;771", "wc_reply_reviewers": "187;0;25;282", "wc_reply_authors": "562;0;13;409", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 88.25, 27.6891224129621 ], "wc_strengths_avg": [ 112.5, 57.7775908116633 ], "wc_weaknesses_avg": [ 164.25, 84.7448376008828 ], "wc_questions_avg": [ 139.0, 129.92112992119488 ], "wc_limitations_avg": [ 36.75, 39.76414842543469 ], "wc_review_avg": [ 540.75, 171.93367180398377 ], "wc_reply_reviewers_avg": [ 123.5, 116.30670659940466 ], "wc_reply_authors_avg": [ 246.0, 245.57585386189743 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7302967433402215, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Pcgnu3vWoowJ:scholar.google.com/&scioq=How+to+Leverage+Imperfect+Demonstrations+in+Offline+Imitation+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Generalized Bayesian Inference for Scientific Simulators via Amortized Cost Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71254", "id": "ZARAiV25CW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fdd565f63f49776bef620e0ce368a492-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZARAiV25CW", "openreview": "https://openreview.net/forum?id=ZARAiV25CW", "poster": "/media/PosterPDFs/NeurIPS%202023/71254.png?t=1701979851.6945894", "slides": "https://nips.cc/virtual/2023/poster/71254", "video": "https://nips.cc/virtual/2023/poster/71254", "author_site": "Richard Gao, Michael Deistler, Jakob H Macke", "tldr": "", "abstract": "Simulation-based inference (SBI) enables amortized Bayesian inference for simulators with implicit likelihoods. But when we are primarily interested in the quality of predictive simulations, or when the model cannot exactly reproduce the observed data (i.e., is misspecified), targeting the Bayesian posterior may be overly restrictive. Generalized Bayesian Inference (GBI) aims to robustify inference for (misspecified) simulator models, replacing the likelihood-function with a cost function that evaluates the goodness of parameters relative to data. However, GBI methods generally require running multiple simulations to estimate the cost function at each parameter value during inference, making the approach computationally infeasible for even moderately complex simulators. Here, we propose amortized cost estimation (ACE) for GBI to address this challenge: We train a neural network to approximate the cost function, which we define as the expected distance between simulations produced by a parameter and observed data. The trained network can then be used with MCMC to infer GBI posteriors for any observation without running additional simulations. We show that, on several benchmark tasks, ACE accurately predicts cost and provides predictive simulations that are closer to synthetic observations than other SBI methods, especially for misspecified simulators. Finally, we apply ACE to infer parameters of the Hodgkin-Huxley model given real intracellular recordings from the Allen Cell Types Database. ACE identifies better data-matching parameters while being an order of magnitude more simulation-efficient than a standard SBI method. In summary, ACE combines the strengths of SBI methods and GBI to perform robust and simulation-amortized inference for scientific simulators.", "keywords": "simulation-based inference;generalized bayesian inference;neural network;machine learning for science", "primary_area": "", "supplementary_material": "", "author": "Richard Gao;Michael Deistler;Jakob H. Macke", "authorids": "~Richard_Gao1;~Michael_Deistler1;~Jakob_H._Macke1", "gender": ";M;M", "homepage": "https://www.rdgao.com/;https://michaeldeistler.github.io/;http://www.mackelab.org", "dblp": "211/3796;243/5747;97/11106", "google_scholar": "a2o9IKYAAAAJ;Q24H-zYAAAAJ;FKOqtF8AAAAJ", "orcid": ";0000-0002-3573-0404;0000-0001-5154-8912", "linkedin": ";;", "or_profile": "~Richard_Gao1;~Michael_Deistler1;~Jakob_H_Macke1", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of Tuebingen;University of Tuebingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "position": "Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\ngao2023generalized,\ntitle={Generalized Bayesian Inference for Scientific Simulators via Amortized Cost Estimation},\nauthor={Richard Gao and Michael Deistler and Jakob H. Macke},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZARAiV25CW}\n}", "github": "", "project": "", "reviewers": "HKbp;CFba;9qMX;bf2N", "pdf_size": 6165793, "rating": "6;6;7;7", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "197;48;72;130", "wc_strengths": "192;51;75;187", "wc_weaknesses": "162;181;97;376", "wc_questions": "63;101;3;99", "wc_limitations": "56;26;9;40", "wc_review": "670;407;256;832", "wc_reply_reviewers": "104;21;5;105", "wc_reply_authors": "0;0;0;72", "reply_reviewers": "1;1;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 111.75, 57.54291876504006 ], "wc_strengths_avg": [ 126.25, 63.841111362506844 ], "wc_weaknesses_avg": [ 204.0, 104.07449255220993 ], "wc_questions_avg": [ 66.5, 39.657912199206855 ], "wc_limitations_avg": [ 32.75, 17.340343133859836 ], "wc_review_avg": [ 541.25, 223.88766714582562 ], "wc_reply_reviewers_avg": [ 58.75, 46.09975596464693 ], "wc_reply_authors_avg": [ 18.0, 31.176914536239792 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9774977253772548898&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;University of Tuebingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Aiming towards the minimizers: fast convergence of SGD for overparametrized problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71253", "id": "ZBB8EFO7ma", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bef2af7a1199ec7a134b15ac00bd5377-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZBB8EFO7ma", "openreview": "https://openreview.net/forum?id=ZBB8EFO7ma", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71253", "video": "https://nips.cc/virtual/2023/poster/71253", "author_site": "Chaoyue Liu, Dmitriy Drusvyatskiy, Misha Belkin, Damek Davis, Yian Ma", "tldr": "", "abstract": "Modern machine learning paradigms, such as deep learning, occur in or close to the interpolation regime, wherein the number of model parameters is much larger than the number of data samples. \nIn this work, we propose a regularity condition within the interpolation regime which endows the stochastic gradient method with the same worst-case iteration complexity as the deterministic gradient method, while using only a single sampled gradient (or a minibatch) in each iteration. In contrast, all existing guarantees require the stochastic gradient method to take small steps, thereby resulting in a much slower linear rate of convergence. Finally, we demonstrate that our condition holds when training sufficiently wide feedforward neural networks with a linear output layer.", "keywords": "Polyak-Lojasiewicz condition;SGD;interpolation;fast convergence", "primary_area": "", "supplementary_material": "", "author": "Chaoyue Liu;Dmitriy Drusvyatskiy;Misha Belkin;Damek Davis;Yian Ma", "authorids": "~Chaoyue_Liu2;~Dmitriy_Drusvyatskiy3;~Misha_Belkin1;~Damek_Davis1;~Yian_Ma1", "gender": "M;M;;;M", "homepage": "https://cliu212.github.io/;https://sites.google.com/uw.edu/ddrusv;http://misha.belkin-wang.org/;;https://sites.google.com/view/yianma", "dblp": "191/6684-1;;;;", "google_scholar": "sRjoMX0AAAAJ;;Iwd9DdkAAAAJ;;A0TFlacAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Chaoyue_Liu2;~Dmitriy_Drusvyatskiy3;~Misha_Belkin1;~Damek_Davis1;~Yian_Ma1", "aff": "University of California, San Diego;University of Washington, Seattle;University of California, San Diego;;University of California, San Diego", "aff_domain": "ucsd.edu;uw.edu;ucsd.edu;;ucsd.edu", "position": "Postdoc;Associate Professor;Professor;;Assistant Professor", "bibtex": "@inproceedings{\nliu2023aiming,\ntitle={Aiming towards the minimizers: fast convergence of {SGD} for overparametrized problems},\nauthor={Chaoyue Liu and Dmitriy Drusvyatskiy and Misha Belkin and Damek Davis and Yian Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZBB8EFO7ma}\n}", "github": "", "project": "", "reviewers": "eCMo;APCi;vP9H;FvUq", "pdf_size": 873853, "rating": "5;6;6;6", "confidence": "2;3;3;3", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "49;56;97;59", "wc_strengths": "51;51;99;46", "wc_weaknesses": "110;104;277;184", "wc_questions": "5;16;5;5", "wc_limitations": "7;13;23;6", "wc_review": "222;240;501;300", "wc_reply_reviewers": "13;38;21;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 65.25, 18.686559340873856 ], "wc_strengths_avg": [ 61.75, 21.602951187279945 ], "wc_weaknesses_avg": [ 168.75, 69.99062437212572 ], "wc_questions_avg": [ 7.75, 4.763139720814412 ], "wc_limitations_avg": [ 12.25, 6.7592529172978875 ], "wc_review_avg": [ 315.75, 110.78441903083664 ], "wc_reply_reviewers_avg": [ 21.25, 10.207227831296802 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10710599007277004732&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ucsd.edu;uw.edu;ucsd.edu;;ucsd.edu", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of California, San Diego;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsd.edu;https://www.washington.edu", "aff_unique_abbr": "UCSD;UW", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "San Diego;Seattle", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Stability Guarantees for Feature Attributions with Multiplicative Smoothing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71252", "id": "ZBxycYCuEL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c4889bd7f7ce643003746526da2c2fc4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZBxycYCuEL", "openreview": "https://openreview.net/forum?id=ZBxycYCuEL", "poster": "/media/PosterPDFs/NeurIPS%202023/71252.png?t=1702231204.718496", "slides": "https://nips.cc/virtual/2023/poster/71252", "video": "https://nips.cc/virtual/2023/poster/71252", "author_site": "Anton Xue, Rajeev Alur, Eric Wong", "tldr": "", "abstract": "Explanation methods for machine learning models tend not to provide any formal guarantees and may not reflect the underlying decision-making process.\nIn this work, we analyze stability as a property for reliable feature attribution methods. \nWe prove that relaxed variants of stability are guaranteed if the model is sufficiently Lipschitz with respect to the masking of features. \nWe develop a smoothing method called Multiplicative Smoothing (MuS) to achieve such a model.\nWe show that MuS overcomes the theoretical limitations of standard smoothing techniques and can be integrated with any classifier and feature attribution method.\nWe evaluate MuS on vision and language models with various feature attribution methods, such as LIME and SHAP, and demonstrate that MuS endows feature attributions with non-trivial stability guarantees.", "keywords": "Feature Attribution;Smoothing;Explainable;Interpretable;Provable Guarantees", "primary_area": "", "supplementary_material": "/attachment/82bed804c1f2d9268a7e3ee654699d7ae7308287.zip", "author": "Anton Xue;Rajeev Alur;Eric Wong", "authorids": "~Anton_Xue1;~Rajeev_Alur1;~Eric_Wong1", "gender": "M;M;M", "homepage": "https://antonxue.github.io/;http://www.cis.upenn.edu/~alur/;http://riceric22.github.io/", "dblp": "242/4544;https://dblp.uni-trier.de/pid/a/RAlur.html;64/1811-1.html", "google_scholar": "W6e3zzkAAAAJ;ZvLa1RIAAAAJ;pWnTMRkAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Anton_Xue1;~Rajeev_Alur1;~Eric_Wong1", "aff": "University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "seas.upenn.edu;upenn.edu;upenn.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nxue2023stability,\ntitle={Stability Guarantees for Feature Attributions with Multiplicative Smoothing},\nauthor={Anton Xue and Rajeev Alur and Eric Wong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZBxycYCuEL}\n}", "github": "", "project": "", "reviewers": "Qhau;WB7f;WjBG;JJJj;dPnc;7LaM", "pdf_size": 3389171, "rating": "5;6;6;6;7;7", "confidence": "3;3;4;2;4;2", "soundness": "3;3;3;3;4;4", "novelty": "2;2;3;2;3;3", "presentation": "3;2;3;3;3;3", "wc_summary": "54;96;70;93;74;97", "wc_strengths": "35;22;53;95;147;31", "wc_weaknesses": "177;43;77;36;250;119", "wc_questions": "17;89;102;35;53;31", "wc_limitations": "2;1;6;2;18;10", "wc_review": "285;251;308;261;542;288", "wc_reply_reviewers": "36;44;87;15;115;15", "wc_reply_authors": "0;0;334;0;0;0", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "1;1;2;1;1;1", "rating_avg": [ 6.166666666666667, 0.6871842709362768 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 80.66666666666667, 15.933891203622828 ], "wc_strengths_avg": [ 63.833333333333336, 44.09239037395102 ], "wc_weaknesses_avg": [ 117.0, 76.29984709464802 ], "wc_questions_avg": [ 54.5, 31.057741922640375 ], "wc_limitations_avg": [ 6.5, 5.993051532121734 ], "wc_review_avg": [ 322.5, 99.90120119397965 ], "wc_reply_reviewers_avg": [ 52.0, 37.0854868288571 ], "wc_reply_authors_avg": [ 55.666666666666664, 124.4744507474883 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4653886725145611712&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "seas.upenn.edu;upenn.edu;upenn.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "AdANNS: A Framework for Adaptive Semantic Search", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71251", "id": "ZBzYWP2Gpl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f062da1973ac9ac61fc6d44dd7fa309f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZBzYWP2Gpl", "openreview": "https://openreview.net/forum?id=ZBzYWP2Gpl", "poster": "/media/PosterPDFs/NeurIPS%202023/71251.png?t=1698356738.841488", "slides": "https://nips.cc/virtual/2023/poster/71251", "video": "https://nips.cc/virtual/2023/poster/71251", "author_site": "Aniket Rege, Aditya Kusupati, Sharan Ranjit S, Alan Fan, Qingqing Cao, Sham Kakade, Prateek Jain, Ali Farhadi", "tldr": "", "abstract": "Web-scale search systems learn an encoder to embed a given query which is then hooked into an approximate nearest neighbor search (ANNS) pipeline to retrieve similar data points. To accurately capture tail queries and data points, learned representations typically are _rigid, high-dimensional_ vectors that are generally used as-is in the entire ANNS pipeline and can lead to computationally expensive retrieval. In this paper, we argue that instead of rigid representations, different stages of ANNS can leverage _adaptive representations_ of varying capacities to achieve significantly better accuracy-compute trade-offs, i.e., stages of ANNS that can get away with more approximate computation should use a lower-capacity representation of the same data point. To this end, we introduce AdANNS, a novel ANNS design framework that explicitly leverages the flexibility of Matryoshka Representations. We demonstrate state-of-the-art accuracy-compute trade-offs using novel AdANNS-based key ANNS building blocks like search data structures (AdANNS-IVF) and quantization (AdANNS-OPQ). For example on ImageNet retrieval, AdANNS-IVF is up to $\\mathbf{1.5}$% more accurate than the rigid representations-based IVF at the same compute budget; and matches accuracy while being up to $\\mathbf{90}\\times$ faster in _wall-clock time_. For Natural Questions, $32$-byte AdANNS-OPQ matches the accuracy of the $64$-byte OPQ baseline constructed using rigid representations -- _same accuracy at half the cost!_ We further show that the gains from AdANNS translate to modern-day composite ANNS indices that combine search structures and quantization. Finally, we demonstrate that AdANNS can enable inference-time adaptivity for compute-aware search on ANNS indices built non-adaptively on matryoshka representations. Code is open-sourced at https://github.com/RAIVNLab/AdANNS.", "keywords": "Semantic Search;Approximate Nearest Neighbor Search;Large-scale search;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/9cf75026a9598b23ba42d037b8445e688599d5ab.pdf", "author": "Aniket Rege;Aditya Kusupati;Sharan Ranjit S;Alan Fan;Qingqing Cao;Sham M. Kakade;Prateek Jain;Ali Farhadi", "authorids": "~Aniket_Rege1;~Aditya_Kusupati1;~Sharan_Ranjit_S1;~Alan_Fan1;~Qingqing_Cao1;~Sham_M._Kakade1;~Prateek_Jain1;~Ali_Farhadi3", "gender": "M;M;M;;M;M;M;M", "homepage": "https://aniketrege.github.io/;http://www.adityakusupati.com/;;;https://awk.ai/;https://shamulent.github.io;http://prateekjain.org;https://homes.cs.washington.edu/~ali/", "dblp": "271/6999;231/7662;;;;s/SMKakade;https://dblp.uni-trier.de/pers/j/Jain_0002:Prateek.html;37/5826", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?user=qULx8g8AAAAJ;;;vLpPyUUAAAAJ;https://scholar.google.com.tw/citations?user=wb-DKCIAAAAJ;qYhRbJoAAAAJ;jeOFRDsAAAAJ", "orcid": ";0000-0001-8455-1851;;;0000-0002-8564-9241;;;", "linkedin": "aniket-rege/;adityakusupati/;https://linkedin.com/in/sharan-ranjit;https://linkedin.com/in/alan-fan;qqcao;;;", "or_profile": "~Aniket_Rege1;~Aditya_Kusupati1;~Sharan_Ranjit_S1;~Alan_Fan1;~Qingqing_Cao1;~Sham_M._Kakade1;~Prateek_Jain1;~Ali_Farhadi3", "aff": "University of Washington;Department of Computer Science, University of Washington;University of Washington;Department of Computer Science;University of Washington, Seattle;Harvard University;Google;University of Washington", "aff_domain": "uw.edu;cs.washington.edu;uw.edu;cs.washington.edu;uw.edu;harvard.edu;google.com;cs.uw.edu", "position": "MS student;PhD student;MS student;Undergrad student;Postdoc;Full Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nrege2023adanns,\ntitle={Ad{ANNS}: A Framework for Adaptive Semantic Search},\nauthor={Aniket Rege and Aditya Kusupati and Sharan Ranjit S and Alan Fan and Qingqing Cao and Sham M. Kakade and Prateek Jain and Ali Farhadi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZBzYWP2Gpl}\n}", "github": "", "project": "", "reviewers": "i99a;kQzU;kCbM;V9Qf", "pdf_size": 861292, "rating": "4;5;7;7", "confidence": "4;4;4;5", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "48;140;58;96", "wc_strengths": "40;68;113;81", "wc_weaknesses": "192;132;13;182", "wc_questions": "31;21;1;93", "wc_limitations": "16;34;1;1", "wc_review": "327;395;186;453", "wc_reply_reviewers": "0;53;12;81", "wc_reply_authors": "23;89;37;31", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;3;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 85.5, 36.204281514760105 ], "wc_strengths_avg": [ 75.5, 26.23451924468981 ], "wc_weaknesses_avg": [ 129.75, 71.13499490405549 ], "wc_questions_avg": [ 36.5, 34.36204301260331 ], "wc_limitations_avg": [ 13.0, 13.583077707206124 ], "wc_review_avg": [ 340.25, 99.59762798380291 ], "wc_reply_reviewers_avg": [ 36.5, 32.34578797927173 ], "wc_reply_authors_avg": [ 45.0, 25.88435821108957 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17491745353662203526&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "uw.edu;cs.washington.edu;uw.edu;cs.washington.edu;uw.edu;harvard.edu;google.com;cs.uw.edu", "author_num": 8, "aff_unique_index": "0;0;0;1;0;2;3;0", "aff_unique_norm": "University of Washington;Unknown Institution;Harvard University;Google", "aff_unique_dep": ";Department of Computer Science;;Google", "aff_unique_url": "https://www.washington.edu;;https://www.harvard.edu;https://www.google.com", "aff_unique_abbr": "UW;;Harvard;Google", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Seattle;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "On Learning Latent Models with Multi-Instance Weak Supervision", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71250", "id": "ZD65F3x1jU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1e83498c3eafe109a44b12979c2c73db-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZD65F3x1jU", "openreview": "https://openreview.net/forum?id=ZD65F3x1jU", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71250", "video": "https://nips.cc/virtual/2023/poster/71250", "author_site": "Kaifu Wang, Efthymia Tsamoura, Dan Roth", "tldr": "", "abstract": "We consider a weakly supervised learning scenario where the supervision signal is generated by a transition function $\\sigma$ of labels associated with multiple input instances. We formulate this problem as *multi-instance Partial Label Learning (multi-instance PLL)*, which is an extension to the standard PLL problem. Our problem is met in different fields, including latent structural learning and neuro-symbolic integration. Despite the existence of many learning techniques, limited theoretical analysis has been dedicated to this problem. In this paper, we provide the first theoretical study of multi-instance PLL with possibly an unknown transition $\\sigma$. Our main contributions are as follows: First, we proposed a necessary and sufficient condition for the learnability of the problem. This condition nontrivially generalizes and relaxes the existing *small ambiguity degree* in PLL literature since we allow the transition to be deterministic. Second, we derived Rademacher-style error bounds based on the top-$k$ surrogate loss that is widely used in the neuro-symbolic literature. Furthermore, we conclude with empirical experiments for learning with an unknown transition. The empirical results align with our theoretical findings; however, they also expose the issue of scalability in the weak supervision literature.", "keywords": "weak supervision;partial label learning;neuro-symbolic learning;latent structural learning", "primary_area": "", "supplementary_material": "", "author": "Kaifu Wang;Efthymia Tsamoura;Dan Roth", "authorids": "~Kaifu_Wang1;~Efthymia_Tsamoura1;~Dan_Roth3", "gender": ";F;M", "homepage": "https://kaifu96.github.io;;https://www.cis.upenn.edu/~danroth/", "dblp": ";03/2000;r/DanRoth", "google_scholar": ";;E-bpPWgAAAAJ", "orcid": ";;", "linkedin": ";;dan-roth-8667361/", "or_profile": "~Kaifu_Wang1;~Efthymia_Tsamoura1;~Dan_Roth3", "aff": "University of Pennsylvania;Samsung AI;Amazon", "aff_domain": "upenn.edu;samsung.com;amazon.com", "position": "PhD student;Researcher;VP and Distinguished Scientist", "bibtex": "@inproceedings{\nwang2023on,\ntitle={On Learning Latent Models with Multi-Instance Weak Supervision},\nauthor={Kaifu Wang and Efthymia Tsamoura and Dan Roth},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZD65F3x1jU}\n}", "github": "", "project": "", "reviewers": "6VmD;vhBR;a7AK;KpA9", "pdf_size": 583518, "rating": "5;6;6;7", "confidence": "3;3;2;4", "soundness": "2;3;3;4", "novelty": "3;3;3;3", "presentation": "2;3;2;4", "wc_summary": "132;106;68;215", "wc_strengths": "76;131;66;50", "wc_weaknesses": "349;154;62;96", "wc_questions": "77;184;40;25", "wc_limitations": "42;52;16;4", "wc_review": "676;627;252;390", "wc_reply_reviewers": "109;0;51;33", "wc_reply_authors": "95;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 130.25, 53.9646875280493 ], "wc_strengths_avg": [ 80.75, 30.457962834043908 ], "wc_weaknesses_avg": [ 165.25, 111.07064193566183 ], "wc_questions_avg": [ 81.5, 62.13091018164791 ], "wc_limitations_avg": [ 28.5, 19.30673457630782 ], "wc_review_avg": [ 486.25, 173.17097764925853 ], "wc_reply_reviewers_avg": [ 48.25, 39.55613100392909 ], "wc_reply_authors_avg": [ 23.75, 41.13620667976084 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16549475904828878152&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "upenn.edu;samsung.com;amazon.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Pennsylvania;Samsung;Amazon", "aff_unique_dep": ";Samsung AI;Amazon.com, Inc.", "aff_unique_url": "https://www.upenn.edu;https://www.samsung.com;https://www.amazon.com", "aff_unique_abbr": "UPenn;Samsung AI;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;South Korea" }, { "title": "A Step Towards Worldwide Biodiversity Assessment: The BIOSCAN-1M Insect Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73549", "id": "ZDnnzsado4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/87dbbdc3a685a97ad28489a1d57c45c1-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=ZDnnzsado4", "openreview": "https://openreview.net/forum?id=ZDnnzsado4", "poster": "/media/PosterPDFs/NeurIPS%202023/73549.png?t=1701487737.4713573", "slides": "https://nips.cc/virtual/2023/poster/73549", "video": "https://nips.cc/virtual/2023/poster/73549", "author_site": "Zahra Gharaee, ZeMing Gong, Nicholas Pellegrino, Iuliia Zarubiieva, Joakim Bruslund Haurum, Scott Lowe, Jaclyn McKeown, Chris Ho, Joschka McLeod, Yi-Yun Wei, Jireh Agda, Sujeevan Ratnasingham, Dirk Steinke, Angel Chang, Graham Taylor, Paul Fieguth", "tldr": "", "abstract": "In an effort to catalog insect biodiversity, we propose a new large dataset of hand-labelled insect images, the BIOSCAN-1M Insect Dataset. Each record is taxonomically classified by an expert, and also has associated genetic information including raw nucleotide barcode sequences and assigned barcode index numbers, which are genetic-based proxies for species classification. This paper presents a curated million-image dataset, primarily to train computer-vision models capable of providing image-based taxonomic assessment, however, the dataset also presents compelling characteristics, the study of which would be of interest to the broader machine learning community. Driven by the biological nature inherent to the dataset, a characteristic long-tailed class-imbalance distribution is exhibited. Furthermore, taxonomic labelling is a hierarchical classification scheme, presenting a highly fine-grained classification problem at lower levels. Beyond spurring interest in biodiversity research within the machine learning community, progress on creating an image-based taxonomic classifier will also further the ultimate goal of all BIOSCAN research: to lay the foundation for a comprehensive survey of global biodiversity. This paper introduces the dataset and explores the classification task through the implementation and analysis of a baseline classifier. The code repository of the BIOSCAN-1M-Insect dataset is available at https://github.com/zahrag/BIOSCAN-1M", "keywords": "Insect biodiversity;Image classification;Class-imbalance distribution;Fine-grained classification;Taxonomic classification;DNA barcode sequences;Barcode Index Number (BIN)", "primary_area": "", "supplementary_material": "/attachment/045c9e0336ea1ba809767a21d29a33673f3037b1.pdf", "author": "Zahra Gharaee;ZeMing Gong;Nicholas Pellegrino;Iuliia Zarubiieva;Joakim Bruslund Haurum;Scott C Lowe;Jaclyn McKeown;Chris C.Y. Ho;Joschka McLeod;Yi-Yun Catherine Wei;Jireh Agda;Sujeevan Ratnasingham;Dirk Steinke;Angel X Chang;Graham W. Taylor;Paul W. Fieguth", "authorids": "~Zahra_Gharaee1;~ZeMing_Gong1;~Nicholas_Pellegrino1;~Iuliia_Zarubiieva1;~Joakim_Bruslund_Haurum1;~Scott_C_Lowe1;~Jaclyn_McKeown1;~Chris_C.Y._Ho1;~Joschka_McLeod1;~Yi-Yun_Catherine_Wei1;~Jireh_Agda1;~Sujeevan_Ratnasingham1;~Dirk_Steinke1;~Angel_X_Chang1;~Graham_W._Taylor1;~Paul_W._Fieguth1", "gender": "F;M;Not Specified;F;M;;F;;M;F;;M;M;F;;", "homepage": "https://zahrag.github.io/;https://github.com/zmgong;https://uwaterloo.ca/scholar/npellegr;;https://vbn.aau.dk/en/persons/139317;https://scottclowe.com/;;;;;;;https://biodiversitygenomics.net;https://angelxuanchang.github.io;;", "dblp": "174/4584;352/5753;;;201/0296;245/0038;;;;;;;;46/10489;;f/PWFieguth", "google_scholar": "https://scholar.google.pl/citations?user=nWe8d1MAAAAJ;;https://scholar.google.ca/citations?user=eiIC09EAAAAJ;;GAEtgr4AAAAJ;https://scholar.google.ca/citations?user=ZFPhxuAAAAAJ;;;;;;https://scholar.google.ca/citations?user=5JNqG1EAAAAJ;;8gfs8XIAAAAJ;;TObmBfYAAAAJ", "orcid": "0000-0003-0140-0025;;0000-0002-2102-2431;0009-0007-1597-8684;0000-0002-0544-0422;0000-0002-5237-3867;;;0000-0002-7503-1835;0000-0001-5855-4864;0009-0004-5235-9610;;;0009-0003-5055-6437;;0000-0001-7260-2260", "linkedin": "zahragh/;zeming-gong-86a12b1ab/;nicholas-nick-pellegrino/;ieyriay/;;scottclowe/;jaclyn-mckeown-1a452616/;;;;;sratnasingham/?originalSubdomain=ca;;;;paul-fieguth-1071461", "or_profile": "~Zahra_Gharaee1;~ZeMing_Gong1;~Nicholas_Pellegrino1;~Iuliia_Zarubiieva1;~Joakim_Bruslund_Haurum1;~Scott_C_Lowe1;~Jaclyn_McKeown1;~Chris_C.Y._Ho1;~Joschka_McLeod1;~Yi-Yun_Catherine_Wei1;~Jireh_Agda1;~Sujeevan_Ratnasingham1;~Dirk_Steinke1;~Angel_X_Chang1;~Graham_W._Taylor1;~Paul_W._Fieguth1", "aff": "University of Waterloo;Simon Fraser University;University of Waterloo;University of Guelph;Vector Institute;Vector Institute;;;;University of Guelph;University of Guelph;University of Guelph;;Simon Fraser University;;University of Waterloo", "aff_domain": "uwaterloo.ca;sfu.ca;uwaterloo.ca;uoguelph.ca;vectorinstitute.ai;vectorinstitute.ai;;;;uoguelph.ca;uoguelph.ca;uoguelph.ca;;sfu.ca;;uwaterloo.ca", "position": "Postdoc;MS student;PhD student;Postdoc;Visiting Researcher;Postdoc;;;;Programmer;Researcher;Researcher;;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\ngharaee2023a,\ntitle={A Step Towards Worldwide Biodiversity Assessment: The {BIOSCAN}-1M Insect Dataset},\nauthor={Zahra Gharaee and ZeMing Gong and Nicholas Pellegrino and Iuliia Zarubiieva and Joakim Bruslund Haurum and Scott C Lowe and Jaclyn McKeown and Chris C.Y. Ho and Joschka McLeod and Yi-Yun Catherine Wei and Jireh Agda and Sujeevan Ratnasingham and Dirk Steinke and Angel X Chang and Graham W. Taylor and Paul W. Fieguth},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=ZDnnzsado4}\n}", "github": "", "project": "", "reviewers": "4KRL;Dp7Q;t14s;qQMp", "pdf_size": 46967413, "rating": "6;6;7;7", "confidence": "3;4;4;5", "wc_summary_and_contributions": "33;213;99;64", "wc_strengths": "44;94;120;101", "wc_improvement": "128;337;60;102", "wc_limitations": "52;337;161;10", "wc_correctness": "31;675;7;12", "wc_clarity": "10;15;10;8", "wc_relation_to_prior_work": "46;252;32;5", "wc_documentation": "13;39;54;4", "wc_additional_feedback": "1;1;1;1", "wc_review": "358;1963;544;307", "wc_reply_reviewers": "23;0;5;22", "wc_reply_authors": "718;1902;582;375", "reply_reviewers": "1;0;1;1", "reply_authors": "1;3;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 102.25, 68.0711943482704 ], "wc_strengths_avg": [ 89.75, 28.07467720206236 ], "wc_improvement_avg": [ 156.75, 106.85825892274308 ], "wc_limitations_avg": [ 140.0, 126.38631255005424 ], "wc_correctness_avg": [ 181.25, 285.2072711204958 ], "wc_clarity_avg": [ 10.75, 2.5860201081971503 ], "wc_relation_to_prior_work_avg": [ 83.75, 98.25063613025617 ], "wc_documentation_avg": [ 27.5, 19.981241202688086 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 793.0, 681.2345411090075 ], "wc_reply_reviewers_avg": [ 12.5, 10.161200716450788 ], "wc_reply_authors_avg": [ 894.25, 594.5049936712054 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8253258544893701534&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "uwaterloo.ca;sfu.ca;uwaterloo.ca;uoguelph.ca;vectorinstitute.ai;vectorinstitute.ai;;;;uoguelph.ca;uoguelph.ca;uoguelph.ca;;sfu.ca;;uwaterloo.ca", "author_num": 16, "aff_unique_index": "0;1;0;2;3;3;2;2;2;1;0", "aff_unique_norm": "University of Waterloo;Simon Fraser University;University of Guelph;Vector Institute", "aff_unique_dep": ";;;", "aff_unique_url": "https://uwaterloo.ca;https://www.sfu.ca;https://www.uoguelph.ca;https://vectorinstitute.ai/", "aff_unique_abbr": "UW;SFU;U of G;Vector Institute", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Human-in-the-Loop Optimization for Deep Stimulus Encoding in Visual Prostheses", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71249", "id": "ZED5wdGous", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fb06bc3abcece7b8725a8b83b8fa3632-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZED5wdGous", "openreview": "https://openreview.net/forum?id=ZED5wdGous", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71249", "video": "https://nips.cc/virtual/2023/poster/71249", "author_site": "Jacob Granley, Tristan Fauvel, Matthew Chalk, Michael Beyeler", "tldr": "", "abstract": "Neuroprostheses show potential in restoring lost sensory function and enhancing human capabilities, but the sensations produced by current devices often seem unnatural or distorted. Exact placement of implants and differences in individual perception lead to significant variations in stimulus response, making personalized stimulus optimization a key challenge. Bayesian optimization could be used\nto optimize patient-specific stimulation parameters with limited noisy observations, but is not feasible for high-dimensional stimuli. Alternatively, deep learning models can optimize stimulus encoding strategies, but typically assume perfect knowledge of patient-specific variations. Here we propose a novel, practically feasible approach that overcomes both of these fundamental limitations. First, a deep encoder network is trained to produce optimal stimuli for any individual patient by inverting a forward model mapping electrical stimuli to visual percepts. Second, a preferential Bayesian optimization strategy utilizes this encoder to learn the optimal patient-specific parameters for a new patient, using a minimal number of pairwise comparisons between candidate stimuli. We demonstrate the viability of this approach on a novel, state-of-the-art visual prosthesis model. Our approach quickly learns a personalized stimulus encoder and leads to dramatic improvements in the quality of restored vision, outperforming existing encoding strategies. Further, this approach is robust to noisy patient feedback and misspecifications in the underlying forward model. Overall, our results suggest that combining the strengths of deep learning and Bayesian optimization could significantly improve the perceptual experience of patients fitted with visual prostheses and may prove a viable solution for a range of neuroprosthetic technologies", "keywords": "Brain Computer Interfaces;BCI;Stimulus Encoding;Visual Prostheses;Bayesian Optimization;Preferential Bayesian Optimization;Human-in-the-loop Optimization;Sensory Neuroprostheses;Neuroprostheses;Patient-Specific Optimization;Latent Space Bayesian Optimization", "primary_area": "", "supplementary_material": "/attachment/261fabe8d3ca68075b6d20de1a817ec8a4fa7421.zip", "author": "Jacob Granley;Tristan Fauvel;Matthew Chalk;Michael Beyeler", "authorids": "~Jacob_Granley1;tri.fauvel@gmail.com;~Matthew_Chalk1;~Michael_Beyeler1", "gender": "M;;M;M", "homepage": ";;https://matthewjchalk.wixsite.com/mysite;", "dblp": "260/5940;;137/3929;136/0857", "google_scholar": "0jACZrEAAAAJ;;;dK-0kG4AAAAJ", "orcid": "0000-0002-9024-2454;;0000-0001-7782-4436;0000-0001-5233-844X", "linkedin": ";;;", "or_profile": "~Jacob_Granley1;tri.fauvel@gmail.com;~Matthew_Chalk1;~Michael_Beyeler1", "aff": "University of California, Santa Barbara;;Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);University of California, Santa Barbara", "aff_domain": "cs.ucsb.edu;;upmc.fr;ucsb.edu", "position": "PhD student;;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ngranley2023humanintheloop,\ntitle={Human-in-the-Loop Optimization for Deep Stimulus Encoding in Visual Prostheses},\nauthor={Jacob Granley and Tristan Fauvel and Matthew Chalk and Michael Beyeler},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZED5wdGous}\n}", "github": "", "project": "", "reviewers": "YFQD;HJZJ;bw3i", "pdf_size": 9500876, "rating": "5;6;6", "confidence": "3;4;5", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "2;4;3", "wc_summary": "84;72;103", "wc_strengths": "114;215;77", "wc_weaknesses": "221;258;43", "wc_questions": "377;153;103", "wc_limitations": "200;28;19", "wc_review": "996;726;345", "wc_reply_reviewers": "57;85;121", "wc_reply_authors": "0;52;312", "reply_reviewers": "1;1;2", "reply_authors": "1;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 86.33333333333333, 12.762793146051099 ], "wc_strengths_avg": [ 135.33333333333334, 58.32285620196901 ], "wc_weaknesses_avg": [ 174.0, 93.85449731721259 ], "wc_questions_avg": [ 211.0, 119.141372606944 ], "wc_limitations_avg": [ 82.33333333333333, 83.28398538868215 ], "wc_review_avg": [ 689.0, 267.0543015942638 ], "wc_reply_reviewers_avg": [ 87.66666666666667, 26.195843605851334 ], "wc_reply_authors_avg": [ 121.33333333333333, 136.48280314953806 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3390123797868571263&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "cs.ucsb.edu;;upmc.fr;ucsb.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Santa Barbara;Sorbonne Universit\u00e9", "aff_unique_dep": ";Facult\u00e9 des Sciences", "aff_unique_url": "https://www.ucsb.edu;https://www.sorbonne-universite.fr", "aff_unique_abbr": "UCSB;Sorbonne U", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Santa Barbara;Paris VI", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;France" }, { "title": "Learning to Reason and Memorize with Self-Notes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71248", "id": "ZFwNdsDCRL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/274d0146144643ee2459a602123c60ff-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZFwNdsDCRL", "openreview": "https://openreview.net/forum?id=ZFwNdsDCRL", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71248", "video": "https://nips.cc/virtual/2023/poster/71248", "author_site": "Jack Lanchantin, Shubham Toshniwal, Jason Weston, arthur szlam, Sainbayar Sukhbaatar", "tldr": "", "abstract": "Large language models have been shown to struggle with multi-step reasoning, and do not retain previous reasoning steps for future use. We propose a simple method for solving both of these problems by allowing the model to take Self-Notes. Unlike recent chain-of-thought or scratchpad approaches, the model can deviate from the input context at any time to explicitly think and write down its thoughts. This allows the model to perform reasoning on the fly as it reads the context and even integrate previous reasoning steps, thus enhancing its memory with useful information and enabling multi-step reasoning. Experiments across a wide variety of tasks demonstrate that our method can outperform chain-of-thought and scratchpad methods by taking Self-Notes that interleave the input text.", "keywords": "Memory;Reasoning;Language Models", "primary_area": "", "supplementary_material": "", "author": "Jack Lanchantin;Shubham Toshniwal;Jason E Weston;Arthur Szlam;Sainbayar Sukhbaatar", "authorids": "~Jack_Lanchantin1;~Shubham_Toshniwal1;~Jason_E_Weston1;~Arthur_Szlam1;~Sainbayar_Sukhbaatar1", "gender": ";;;M;M", "homepage": "https://www.jacklanchantin.com/;;;;", "dblp": "178/8538.html;;;22/6733;56/10550", "google_scholar": "35PmAZwAAAAJ;;;;ri1sE34AAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Jack_Lanchantin1;~Shubham_Toshniwal1;~Jason_E_Weston1;~Arthur_Szlam1;~Sainbayar_Sukhbaatar1", "aff": "Meta;;;CUNY City College;Meta AI", "aff_domain": "facebook.com;;;;meta.com", "position": "Postdoc;;;;Research Scientist", "bibtex": "@inproceedings{\nlanchantin2023learning,\ntitle={Learning to Reason and Memorize with Self-Notes},\nauthor={Jack Lanchantin and Shubham Toshniwal and Jason E Weston and Arthur Szlam and Sainbayar Sukhbaatar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZFwNdsDCRL}\n}", "github": "", "project": "", "reviewers": "v6D1;PMpA;D5Ka;7QVa", "pdf_size": 550435, "rating": "5;6;7;8", "confidence": "4;5;4;4", "soundness": "2;2;4;4", "novelty": "2;3;3;4", "presentation": "4;2;4;4", "wc_summary": "98;68;163;163", "wc_strengths": "121;40;39;94", "wc_weaknesses": "259;76;291;92", "wc_questions": "345;42;13;20", "wc_limitations": "1;12;2;33", "wc_review": "824;238;508;402", "wc_reply_reviewers": "343;31;5;5", "wc_reply_authors": "17;17;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 123.0, 41.382363393117124 ], "wc_strengths_avg": [ 73.5, 35.316426772820606 ], "wc_weaknesses_avg": [ 179.5, 96.33405420722207 ], "wc_questions_avg": [ 105.0, 138.97661673821247 ], "wc_limitations_avg": [ 12.0, 12.864680330268607 ], "wc_review_avg": [ 493.0, 213.946254933336 ], "wc_reply_reviewers_avg": [ 96.0, 143.0 ], "wc_reply_authors_avg": [ 8.5, 8.5 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6714622421416903230&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "facebook.com;;;;meta.com", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Meta;City College of New York", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.ccny.cuny.edu", "aff_unique_abbr": "Meta;CCNY", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "On kernel-based statistical learning theory in the mean field limit", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71247", "id": "ZGElmTRk3w", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/411fa9d368b5485be4c6bb62615b365e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZGElmTRk3w", "openreview": "https://openreview.net/forum?id=ZGElmTRk3w", "poster": "/media/PosterPDFs/NeurIPS%202023/71247.png?t=1701534437.923137", "slides": "https://nips.cc/virtual/2023/poster/71247", "video": "https://nips.cc/virtual/2023/poster/71247", "author_site": "Christian Fiedler, Michael Herty, Sebastian Trimpe", "tldr": "", "abstract": "In many applications of machine learning, a large number of variables are considered. Motivated by machine learning of interacting particle systems, we consider the situation when the number of input variables goes to infinity. First, we continue the recent investigation of the mean field limit of kernels and their reproducing kernel Hilbert spaces, completing the existing theory. Next, we provide results relevant for approximation with such kernels in the mean field limit, including a representer theorem. Finally, we use these kernels in the context of statistical learning in the mean field limit, focusing on Support Vector Machines. In particular, we show mean field convergence of empirical and infinite-sample solutions as well as the convergence of the corresponding risks. On the one hand, our results establish rigorous mean field limits in the context of kernel methods, providing new theoretical tools and insights for large-scale problems. On the other hand, our setting corresponds to a new form of limit of learning problems, which seems to have not been investigated yet in the statistical learning theory literature.", "keywords": "Reproducing Kernel Hilbert Spaces;Kernel Methods;Mean Field Limit;Interacting Particle Systems;Support Vector Machines;Statistical Learning Theory", "primary_area": "", "supplementary_material": "/attachment/dc2ea752c38c4b27a34b921b24e233d66a9d4a3f.pdf", "author": "Christian Fiedler;Michael Herty;Sebastian Trimpe", "authorids": "~Christian_Fiedler1;~Michael_Herty1;~Sebastian_Trimpe1", "gender": ";M;M", "homepage": "https://www.dsme.rwth-aachen.de/cms/DSME/Das-Institut/Team-CMS-Artikel-/~jptsq/Christian-Fiedler/;http://www.igpm.rwth-aachen.de;https://www.dsme.rwth-aachen.de/trimpe", "dblp": "257/5782;;15/8135", "google_scholar": "93Qt_hgAAAAJ;;https://scholar.google.de/citations?user=9kzHZssAAAAJ", "orcid": ";0000-0002-6262-2927;0000-0002-2785-2487", "linkedin": ";;sebastian-trimpe-2472a0a3/", "or_profile": "~Christian_Fiedler1;~Michael_Herty1;~Sebastian_Trimpe1", "aff": "Rheinisch Westf\u00e4lische Technische Hochschule Aachen;Rheinisch Westf\u00e4lische Technische Hochschule Aachen;RWTH Aachen University", "aff_domain": "rwth-aachen.de;rwth-aachen.de;rwth-aachen.de", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nfiedler2023on,\ntitle={On kernel-based statistical learning theory in the mean field limit},\nauthor={Christian Fiedler and Michael Herty and Sebastian Trimpe},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZGElmTRk3w}\n}", "github": "", "project": "", "reviewers": "sYSX;NtYB;m8Hw;Vo6s;SuzB", "pdf_size": 425400, "rating": "5;5;5;6;6", "confidence": "3;3;2;3;4", "soundness": "3;3;3;3;4", "novelty": "3;3;2;2;3", "presentation": "2;3;2;3;3", "wc_summary": "48;187;78;70;48", "wc_strengths": "40;159;121;70;25", "wc_weaknesses": "53;269;71;26;92", "wc_questions": "35;37;85;45;41", "wc_limitations": "18;22;1;1;4", "wc_review": "194;674;356;212;210", "wc_reply_reviewers": "46;20;0;0;29", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 86.2, 51.7857123152709 ], "wc_strengths_avg": [ 83.0, 50.2035855293225 ], "wc_weaknesses_avg": [ 102.2, 86.17052860462213 ], "wc_questions_avg": [ 48.6, 18.521339044464362 ], "wc_limitations_avg": [ 9.2, 8.97552226892675 ], "wc_review_avg": [ 329.2, 182.1146891384657 ], "wc_reply_reviewers_avg": [ 19.0, 17.618172436436193 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6454972243679027, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12520904461453226526&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "rwth-aachen.de;rwth-aachen.de;rwth-aachen.de", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "RWTH Aachen University", "aff_unique_dep": "", "aff_unique_url": "https://www.rwth-aachen.de", "aff_unique_abbr": "RWTH", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Aachen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "ZHMYXfk4b1", "title": "Prompt-based Node Feature Extractor for Few-shot Learning on Text-Attributed Graph", "track": "main", "status": "Reject", "tldr": "", "abstract": "Text-attributed Graphs (TAGs) are commonly found in the real world, such as social networks and citation networks, which consist of nodes represented by textual descriptions. \nCurrently, mainstream machine learning methods on TAGs involve a two-stage modeling approach: (1) unsupervised node feature extraction with pre-trained language models (PLMs); and (2) supervised learning using Graph Neural Networks (GNNs). \nHowever, we observe that these representations, which have undergone large-scale pre-training, do not significantly improve performance with limited amount of training samples. \nThe main issue is that existing methods have not effectively integrated information from the graph and downstream tasks simultaneously. \nFirst, G-Prompt introduces a learnable GNN layer (i.e., adaptor) at the end of PLMs, which is fine-tuned to better capture the masked tokens considering graph neighborhood information.\nAfter the adapter is trained, G-Prompt incorporates task-specific prompts to obtain interpretable node representations for the downstream task.\nOur experiment results demonstrate that our proposed method outperforms current state-of-the-art (SOTA) methods on few-shot node classification. More importantly, in zero-shot settings, the G-Prompt embeddings can not only provide better task interpretability than vanilla PLMs\nbut also achieve comparable performance with fully-supervised baselines.", "keywords": "Text-attributed graph; graph neural network; language model", "primary_area": "", "supplementary_material": "/attachment/456b23ecb6c78190c992c5a46743b5cfd51fa70d.pdf", "author": "Xuanwen Huang;Kaiqiao Han;Dezheng Bao;Yang Yang;Qi Zhu", "authorids": "~Xuanwen_Huang1;~Kaiqiao_Han1;~Dezheng_Bao1;~Yang_Yang35;~Qi_Zhu7", "gender": "M;M;M;M;M", "homepage": ";;https://dzbao.github.io/;http://yangy.org;https://gentlezhu.github.io/", "dblp": "256/9418;356/3989;356/3486;;66/5923-8", "google_scholar": "JFLCWNQAAAAJ;gFBnb-AAAAAJ;njTpYWsAAAAJ;;xCHy4c8AAAAJ", "orcid": ";0009-0008-1389-1291;0009-0000-5574-9682;0000-0002-5058-4417;0000-0003-0129-8542", "linkedin": ";kaiqiao-han-30a6a7329/;;;qi-zhu-22633598/", "or_profile": "~Xuanwen_Huang1;~Kaiqiao_Han1;~Dezheng_Bao1;~Yang_Yang35;~Qi_Zhu7", "aff": "Zhejiang University;Zhejiang University;College of Computer Science and Technology, Zhejiang University;Zhejiang University;University of Illinois, Urbana Champaign", "aff_domain": "zju.edu.cn;zju.edu.cn;cs.zju.edu.cn;zju.edu.cn;illinois.edu", "position": "PhD student;Undergrad student;Undergrad student;Associate Professor;PhD student", "bibtex": "@misc{\nhuang2023promptbased,\ntitle={Prompt-based Node Feature Extractor for Few-shot Learning on Text-Attributed Graph},\nauthor={Xuanwen Huang and Kaiqiao Han and Dezheng Bao and Yang Yang and Qi Zhu},\nyear={2023},\nurl={https://openreview.net/forum?id=ZHMYXfk4b1}\n}", "github": "", "project": "", "reviewers": "xGC6;qmSV;Fbx8;4Eon;CCnd", "site": "https://openreview.net/forum?id=ZHMYXfk4b1", "pdf_size": 420195, "rating": "3;4;4;4;5", "confidence": "4;4;3;4;2", "soundness": "2;2;3;2;3", "novelty": "2;3;3;2;2", "presentation": "2;2;3;2;2", "wc_summary": "41;133;164;92;106", "wc_strengths": "14;41;59;56;48", "wc_weaknesses": "108;293;59;143;67", "wc_questions": "4;83;35;59;48", "wc_limitations": "12;108;41;9;66", "wc_review": "179;658;358;359;335", "wc_reply_reviewers": "21;25;0;11;9", "wc_reply_authors": "0;0;45;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 4.0, 0.6324555320336759 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 107.2, 41.24754538151331 ], "wc_strengths_avg": [ 43.6, 16.0822883943797 ], "wc_weaknesses_avg": [ 134.0, 85.03175877282558 ], "wc_questions_avg": [ 45.8, 26.17938119971517 ], "wc_limitations_avg": [ 47.2, 36.8423669163641 ], "wc_review_avg": [ 377.8, 155.31310311754123 ], "wc_reply_reviewers_avg": [ 13.2, 8.908422980528034 ], "wc_reply_authors_avg": [ 9.0, 18.0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7905694150420948, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1238521869363752293&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Zhejiang University;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://illinois.edu", "aff_unique_abbr": "ZJU;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "A Graph-Theoretic Framework for Understanding Open-World Semi-Supervised Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71246", "id": "ZITOHWeAy7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4b6898c70d5b328deaf2216aefd8f77a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZITOHWeAy7", "openreview": "https://openreview.net/forum?id=ZITOHWeAy7", "poster": "/media/PosterPDFs/NeurIPS%202023/71246.png?t=1699020572.816629", "slides": "https://nips.cc/virtual/2023/poster/71246", "video": "https://nips.cc/virtual/2023/poster/71246", "author_site": "Yiyou Sun, Zhenmei Shi, Yixuan Li", "tldr": "", "abstract": "Open-world semi-supervised learning aims at inferring both known and novel classes in unlabeled data, by harnessing prior knowledge from a labeled set with known classes. Despite its importance, there is a lack of theoretical foundations for this problem. This paper bridges the gap by formalizing a graph-theoretic framework tailored for the open-world setting, where the clustering can be theoretically characterized by graph factorization. Our graph-theoretic framework illuminates practical algorithms and provides guarantees. In particular, based on our graph formulation, we apply the algorithm called Spectral Open-world Representation Learning (SORL), and show that minimizing our loss is equivalent to performing spectral decomposition on the graph. Such equivalence allows us to derive a provable error bound on the clustering performance for both known and novel classes, and analyze rigorously when labeled data helps. Empirically, SORL can match or outperform several strong baselines on common benchmark datasets, which is appealing for practical usage while enjoying theoretical guarantees.", "keywords": "open-world learning;clustering;spectral analysis", "primary_area": "", "supplementary_material": "", "author": "Yiyou Sun;Zhenmei Shi;Yixuan Li", "authorids": "~Yiyou_Sun1;~Zhenmei_Shi1;~Yixuan_Li1", "gender": "M;M;F", "homepage": "https://sunyiyou.github.io/;http://zhmeishi.github.io/;http://pages.cs.wisc.edu/~sharonli/", "dblp": "211/5630;246/5216;144/6087-1", "google_scholar": "IKqlQo4AAAAJ;0oeNnzMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";zhenmei-shi-56408a113/;liyixuan", "or_profile": "~Yiyou_Sun1;~Zhenmei_Shi1;~Yixuan_Li1", "aff": "University of Wisconsin, Madison;University of Wisconsin - Madison;Cornell University", "aff_domain": "wisc.edu;wisc.edu;cornell.edu", "position": "PhD student;PhD student;Graduate Student", "bibtex": "@inproceedings{\nsun2023a,\ntitle={A Graph-Theoretic Framework for Understanding Open-World Semi-Supervised Learning},\nauthor={Yiyou Sun and Zhenmei Shi and Yixuan Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZITOHWeAy7}\n}", "github": "", "project": "", "reviewers": "1J6p;66eW;7sHp;iP9L", "pdf_size": 4962263, "rating": "6;7;7;7", "confidence": "3;3;3;3", "soundness": "3;4;3;4", "novelty": "4;3;3;4", "presentation": "3;4;3;4", "wc_summary": "120;51;46;137", "wc_strengths": "100;47;81;25", "wc_weaknesses": "571;87;105;37", "wc_questions": "547;27;327;72", "wc_limitations": "26;5;1;15", "wc_review": "1364;217;560;286", "wc_reply_reviewers": "668;44;389;14", "wc_reply_authors": "296;0;0;22", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 88.5, 40.48765243873742 ], "wc_strengths_avg": [ 63.25, 29.123658767400773 ], "wc_weaknesses_avg": [ 200.0, 215.64090521049107 ], "wc_questions_avg": [ 243.25, 209.38526094259834 ], "wc_limitations_avg": [ 11.75, 9.67923034130297 ], "wc_review_avg": [ 606.75, 455.63108706496314 ], "wc_reply_reviewers_avg": [ 278.75, 268.7334878648361 ], "wc_reply_authors_avg": [ 79.5, 125.31859399147439 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13067033266105574354&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "wisc.edu;wisc.edu;cornell.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Wisconsin;University of Wisconsin-Madison;Cornell University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu;https://www.cornell.edu", "aff_unique_abbr": "UW;UW-Madison;Cornell", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Sparse Parameterization for Epitomic Dataset Distillation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71245", "id": "ZIfhYAE2xg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9e8889198d16fb79926e71adbe38cae4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZIfhYAE2xg", "openreview": "https://openreview.net/forum?id=ZIfhYAE2xg", "poster": "/media/PosterPDFs/NeurIPS%202023/71245.png?t=1699411037.4946792", "slides": "https://nips.cc/virtual/2023/poster/71245", "video": "https://nips.cc/virtual/2023/poster/71245", "author_site": "Xing Wei, Anjia Cao, Funing Yang, Zhiheng Ma", "tldr": "", "abstract": "The success of deep learning relies heavily on large and diverse datasets, but the storage, preprocessing, and training of such data present significant challenges. To address these challenges, dataset distillation techniques have been proposed to obtain smaller synthetic datasets that capture the essential information of the originals. In this paper, we introduce a Sparse Parameterization for Epitomic datasEt Distillation (SPEED) framework, which leverages the concept of dictionary learning and sparse coding to distill epitomes that represent pivotal information of the dataset. SPEED prioritizes proper parameterization of the synthetic dataset and introduces techniques to capture spatial redundancy within and between synthetic images. We propose Spatial-Agnostic Epitomic Tokens (SAETs) and Sparse Coding Matrices (SCMs) to efficiently represent and select significant features. Additionally, we build a Feature-Recurrent Network (FReeNet) to generate hierarchical features with high compression and storage efficiency. Experimental results demonstrate the superiority of SPEED in handling high-resolution datasets, achieving state-of-the-art performance on multiple benchmarks and downstream applications. Our framework is compatible with a variety of dataset matching approaches, generally enhancing their performance. This work highlights the importance of proper parameterization in epitomic dataset distillation and opens avenues for efficient representation learning. Source code is available at https://github.com/MIV-XJTU/SPEED.", "keywords": "Dataset Distillation;Dataset Condensation;Sparse Coding;Dictionary Learning", "primary_area": "", "supplementary_material": "/attachment/2cac7c43c4c0d85be6418b1352d77b981c4156dd.zip", "author": "Xing Wei;Anjia Cao;Funing Yang;Zhiheng Ma", "authorids": "~Xing_Wei5;~Anjia_Cao1;~Funing_Yang1;~Zhiheng_Ma1", "gender": "M;;M;M", "homepage": "https://gr.xjtu.edu.cn/web/weixing;https://github.com/CAOANJIA;https://github.com/moolinks;https://zhiheng-ma.github.io", "dblp": "14/4301-1.html;369/7079;;173/9652", "google_scholar": "KNyC5EUAAAAJ;J-31eZYAAAAJ;;y6ijVukAAAAJ", "orcid": "0000-0002-5025-3941;;;0000-0002-0034-2065", "linkedin": ";;;", "or_profile": "~Xing_Wei5;~Anjia_Cao1;~Funing_Yang1;~Zhiheng_Ma1", "aff": "Xi'an Jiaotong University;Xidian University;Xi'an Jiaotong University;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences", "aff_domain": "xjtu.edu.cn;xidian.edu.cn;xjtu.edu.cn;siat.ac.cn", "position": "Assistant Professor;Undergrad student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nwei2023sparse,\ntitle={Sparse Parameterization for Epitomic Dataset Distillation},\nauthor={Xing Wei and Anjia Cao and Funing Yang and Zhiheng Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZIfhYAE2xg}\n}", "github": "", "project": "", "reviewers": "kT2g;7J1t;yNYq;27jp", "pdf_size": 26051092, "rating": "5;6;7;8", "confidence": "4;4;5;4", "soundness": "3;2;4;3", "novelty": "2;2;4;3", "presentation": "2;2;3;4", "wc_summary": "40;105;62;65", "wc_strengths": "60;114;66;85", "wc_weaknesses": "59;100;108;58", "wc_questions": "3;36;51;24", "wc_limitations": "1;43;1;1", "wc_review": "163;398;288;233", "wc_reply_reviewers": "0;27;18;51", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 68.0, 23.441416339462084 ], "wc_strengths_avg": [ 81.25, 21.040140208658308 ], "wc_weaknesses_avg": [ 81.25, 22.92787604642 ], "wc_questions_avg": [ 28.5, 17.55704986607944 ], "wc_limitations_avg": [ 11.5, 18.186533479473212 ], "wc_review_avg": [ 270.5, 85.91420138719792 ], "wc_reply_reviewers_avg": [ 24.0, 18.371173070873837 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9623724079063093210&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "xjtu.edu.cn;xidian.edu.cn;xjtu.edu.cn;siat.ac.cn", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Xi'an Jiao Tong University;Xidian University;Chinese Academy of Sciences", "aff_unique_dep": ";;Shenzhen Institutes of Advanced Technology", "aff_unique_url": "https://www.xjtu.edu.cn;http://www.xidian.edu.cn/;http://www.cas.cn", "aff_unique_abbr": "XJTU;Xidian;CAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "ResShift: Efficient Diffusion Model for Image Super-resolution by Residual Shifting", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71244", "id": "ZIyAHaLlsn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2ac2eac5098dba08208807b65c5851cc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZIyAHaLlsn", "openreview": "https://openreview.net/forum?id=ZIyAHaLlsn", "poster": "/media/PosterPDFs/NeurIPS%202023/71244.png?t=1699871487.9958138", "slides": "https://nips.cc/virtual/2023/poster/71244", "video": "https://nips.cc/virtual/2023/poster/71244", "author_site": "Zongsheng Yue, Jianyi Wang, Chen Change Loy", "tldr": "", "abstract": "Diffusion-based image super-resolution (SR) methods are mainly limited by the low inference speed due to the requirements of hundreds or even thousands of sampling steps. Existing acceleration sampling techniques inevitably sacrifice performance to some extent, leading to over-blurry SR results. To address this issue, we propose a novel and efficient diffusion model for SR that significantly reduces the number of diffusion steps, thereby eliminating the need for post-acceleration during inference and its associated performance deterioration. Our method constructs a Markov chain that transfers between the high-resolution image and the low-resolution image by shifting the residual between them, substantially improving the transition efficiency. Additionally, an elaborate noise schedule is developed to flexibly control the shifting speed and the noise strength during the diffusion process. Extensive experiments demonstrate that the proposed method obtains superior or at least comparable performance to current state-of-the-art methods on both synthetic and real-world datasets, \\textit{\\textbf{even only with 20 sampling steps}}. Our code and model will be made publicly.", "keywords": "Super-resolution; Diffusion model; Efficient", "primary_area": "", "supplementary_material": "/attachment/031aee85a19f76e42abf0a01a7dcd7f5af275dda.pdf", "author": "Zongsheng Yue;Jianyi Wang;Chen Change Loy", "authorids": "~Zongsheng_Yue1;~Jianyi_Wang1;~Chen_Change_Loy2", "gender": "M;M;M", "homepage": "https://zsyoaoa.github.io/;https://iceclear.github.io;https://www.mmlab-ntu.com/person/ccloy/index.html", "dblp": "198/4455;39/4327;01/5855", "google_scholar": "F554LkQAAAAJ;Fq9SgKYAAAAJ;https://scholar.google.co.uk/citations?user=559LF80AAAAJ", "orcid": "0000-0002-9178-671X;0000-0001-7025-3626;0000-0001-5345-1591", "linkedin": ";jianyi-wang-a57b46236;", "or_profile": "~Zongsheng_Yue1;~Jianyi_Wang1;~Chen_Change_Loy2", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": "Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nyue2023resshift,\ntitle={ResShift: Efficient Diffusion Model for Image Super-resolution by Residual Shifting},\nauthor={Zongsheng Yue and Jianyi Wang and Chen Change Loy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZIyAHaLlsn}\n}", "github": "", "project": "", "reviewers": "Mv36;PfAe;k6dY;RmTK;vFLR", "pdf_size": 3675440, "rating": "5;6;6;6;6", "confidence": "5;3;4;4;4", "soundness": "3;4;3;3;3", "novelty": "3;3;3;2;3", "presentation": "1;3;3;3;3", "wc_summary": "92;30;134;198;76", "wc_strengths": "48;23;131;114;99", "wc_weaknesses": "208;34;234;357;59", "wc_questions": "2;24;7;19;76", "wc_limitations": "1;6;32;1;27", "wc_review": "351;117;538;689;337", "wc_reply_reviewers": "0;10;34;93;43", "wc_reply_authors": "0;0;32;64;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;2;2;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 106.0, 56.78027826631356 ], "wc_strengths_avg": [ 83.0, 40.855844135202986 ], "wc_weaknesses_avg": [ 178.4, 119.14126069502538 ], "wc_questions_avg": [ 25.6, 26.41666140904259 ], "wc_limitations_avg": [ 13.4, 13.365627557282899 ], "wc_review_avg": [ 406.4, 194.32920521630297 ], "wc_reply_reviewers_avg": [ 36.0, 32.47768464653846 ], "wc_reply_authors_avg": [ 19.2, 25.599999999999998 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.790569415042095, "gs_citation": 228, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6404155521203830002&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "The ToMCAT Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73548", "id": "ZJWQfgXQb6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/803d8d4b4a549d0d062fc704f8659ce3-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=ZJWQfgXQb6", "openreview": "https://openreview.net/forum?id=ZJWQfgXQb6", "poster": "/media/PosterPDFs/NeurIPS%202023/73548.png?t=1701938906.666981", "slides": "https://nips.cc/virtual/2023/poster/73548", "video": "https://nips.cc/virtual/2023/poster/73548", "author_site": "Adarsh Pyarelal, Eric Duong, Caleb Shibu, Paulo Soares, Savannah Boyd, Payal Khosla, Valeria A. Pfeifer, Diheng Zhang, Eric Andrews, Rick Champlin, Vincent Raymond, Meghavarshini Krishnaswamy, Clayton Morrison, Emily Butler, Kobus Barnard", "tldr": "", "abstract": "We present a rich, multimodal dataset consisting of data from 40 teams of three humans conducting simulated urban search-and-rescue (SAR) missions in a Minecraft-based testbed, collected for the Theory of Mind-based Cognitive Architecture for Teams (ToMCAT) project. Modalities include two kinds of brain scan data---functional near-infrared spectroscopy (fNIRS) and electroencephalography (EEG), as well as skin conductance, heart rate, eye tracking, face images, spoken dialog audio data with automatic speech recognition (ASR) transcriptions, game screenshots, gameplay data, game performance data, demographic data, and self-report questionnaires. Each team undergoes up to six consecutive phases: three behavioral tasks, one mission training session, and two collaborative SAR missions. As time-synchronized multimodal data collected under a variety of circumstances, this dataset will support studying a large variety of research questions on topics including teamwork, coordination, plan recognition, affective computing, physiological linkage, entrainment, and dialog understanding. We provide an initial public release of the de-identified data, along with analyses illustrating the utility of this dataset to both computer scientists and social scientists.", "keywords": "multimodal;dataset;Minecraft;fNIRS;EEG;human teams", "primary_area": "", "supplementary_material": "/attachment/78e888fa6814c670bc2c97d8d75ae03f27068bac.zip", "author": "Adarsh Pyarelal;Eric Duong;Caleb Jones Shibu;Paulo Soares;Savannah Boyd;Payal Khosla;Valeria Pfeifer;Diheng Zhang;Eric S Andrews;Rick Champlin;Vincent Paul Raymond;Meghavarshini Krishnaswamy;Clayton Morrison;Emily Butler;Kobus Barnard", "authorids": "~Adarsh_Pyarelal1;~Eric_Duong2;~Caleb_Jones_Shibu1;~Paulo_Soares1;~Savannah_Boyd1;~Payal_Khosla1;~Valeria_Pfeifer1;~Diheng_Zhang1;~Eric_S_Andrews1;~Rick_Champlin1;~Vincent_Paul_Raymond1;~Meghavarshini_Krishnaswamy1;~Clayton_Morrison2;~Emily_Butler3;~Kobus_Barnard1", "gender": "M;M;;M;F;F;F;M;;;M;F;M;M;F", "homepage": "https://adarsh.cc;;;;;https://norton.arizona.edu/person/payal-khosla-phd;;https://dihengzhang.info;http://www.netlabgroup.com;https://rchamplin.github.io/portfolio/;https://github.com/vincentraymond-ua;https://linguistics.arizona.edu/people/meghavarshini-krishnaswamy;http://kobus.ca;https://ml4ai.github.io/;", "dblp": "242/7424;;;118/5058.html;;;;;;;;290/5863;53/2666;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;PeECn1cAAAAJ;Fpwf6FYAAAAJ;Wo-_VVIAAAAJ;;JTfQitkAAAAJ;jaXZM-wAAAAJ;;;;9HzdoS0AAAAJ;https://scholar.google.co.uk/citations?user=fKESO6sAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-1602-0386;0000-0003-3277-5145;0000-0002-3141-5669;0000-0002-0646-037X;;;;0000-0001-8878-8952;0000-0001-7186-5755;;;0000-0002-0205-9298;0000-0002-8568-9518;0000-0002-3606-0078;", "linkedin": "adarshpyarelal/;;caleb-jones-shibu/;paulosoaresua/;;;;;;;;meghavarshini-krishnaswamy/;;;", "or_profile": "~Adarsh_Pyarelal1;~Eric_Duong2;~Caleb_Jones_Shibu1;~Paulo_Soares1;~Savannah_Boyd1;~Payal_Khosla1;~Valeria_Pfeifer1;~Diheng_Zhang1;~Eric_S_Andrews1;~Rick_Champlin1;~Vincent_Paul_Raymond1;~Meghavarshini_Krishnaswamy1;~Kobus_Barnard1;~Clayton_Morrison1;~Emily_Annette_Butler1", "aff": "University of Arizona;University of Arizona;University of Arizona;University of Arizona;University of Arizona;;University of Arizona;University of Arizona;University of Arizona;University of Arizona;Lum.AI;University of Arizona;University of Arizona;University of Arizona;University of Arizona", "aff_domain": "arizona.edu;arizona.edu;arizona.edu;cs.arizona.edu;arizona.edu;;arizona.edu;arizona.edu;arizona.edu;arizona.edu;lum.ai;arizona.edu;arizona.edu;arizona.edu;arizona.edu", "position": "Assistant Professor;PhD student;MS student;PhD student;PhD student;;Postdoc;PhD student;PhD student;Researcher;Researcher;PhD student;Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\npyarelal2023the,\ntitle={The To{MCAT} Dataset},\nauthor={Adarsh Pyarelal and Eric Duong and Caleb Jones Shibu and Paulo Soares and Savannah Boyd and Payal Khosla and Valeria Pfeifer and Diheng Zhang and Eric S Andrews and Rick Champlin and Vincent Paul Raymond and Meghavarshini Krishnaswamy and Clayton Morrison and Emily Butler and Kobus Barnard},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=ZJWQfgXQb6}\n}", "github": "", "project": "", "reviewers": "EAAF;69jK;W3oi;A9Aa;yyxM", "pdf_size": 5853736, "rating": "5;6;6;6;6", "confidence": "3;2;1;4;4", "wc_summary_and_contributions": "86;24;77;67;75", "wc_strengths": "40;58;3;38;53", "wc_improvement": "208;82;91;67;290", "wc_limitations": "76;95;44;25;84", "wc_correctness": "12;19;3;133;33", "wc_clarity": "1;3;3;92;8", "wc_relation_to_prior_work": "1;9;20;45;80", "wc_documentation": "1;2;8;67;48", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "426;293;250;535;672", "wc_reply_reviewers": "115;131;144;0;0", "wc_reply_authors": "1001;1312;756;1251;2087", "reply_reviewers": "1;1;1;0;0", "reply_authors": "3;3;2;2;4", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 2.8, 1.16619037896906 ], "wc_summary_and_contributions_avg": [ 65.8, 21.75683800555586 ], "wc_strengths_avg": [ 38.4, 19.252012881774206 ], "wc_improvement_avg": [ 147.6, 87.0967278375026 ], "wc_limitations_avg": [ 64.8, 26.16409753842085 ], "wc_correctness_avg": [ 40.0, 47.52262619005814 ], "wc_clarity_avg": [ 21.4, 35.37569787297488 ], "wc_relation_to_prior_work_avg": [ 31.0, 28.642625577973817 ], "wc_documentation_avg": [ 25.2, 27.15437349673161 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 435.2, 155.33113017035575 ], "wc_reply_reviewers_avg": [ 78.0, 64.34594004286517 ], "wc_reply_authors_avg": [ 1281.4, 448.4464739520203 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": -0.08574929257125445, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13221071182003074156&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "arizona.edu;arizona.edu;arizona.edu;cs.arizona.edu;arizona.edu;;arizona.edu;arizona.edu;arizona.edu;arizona.edu;lum.ai;arizona.edu;arizona.edu;arizona.edu;arizona.edu", "author_num": 15, "aff_unique_index": "0;0;0;0;0;0;0;0;0;1;0;0;0;0", "aff_unique_norm": "University of Arizona;Lum.AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.arizona.edu;https://www.lum.ai", "aff_unique_abbr": "UA;Lum.AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "PanoGRF: Generalizable Spherical Radiance Fields for Wide-baseline Panoramas", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71243", "id": "ZKVxABGJ6r", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/16049e0c3f47899091ac46f8b3afb178-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZKVxABGJ6r", "openreview": "https://openreview.net/forum?id=ZKVxABGJ6r", "poster": "/media/PosterPDFs/NeurIPS%202023/71243.png?t=1697705246.0660217", "slides": "https://nips.cc/virtual/2023/poster/71243", "video": "https://nips.cc/virtual/2023/poster/71243", "author_site": "Zheng Chen, Yan-Pei Cao, Yuan-Chen Guo, Chen Wang, Ying Shan, Song-Hai Zhang", "tldr": "", "abstract": "Achieving an immersive experience enabling users to explore virtual environments with six degrees of freedom (6DoF) is essential for various applications such as virtual reality (VR). Wide-baseline panoramas are commonly used in these applications to reduce network bandwidth and storage requirements. However, synthesizing novel views from these panoramas remains a key challenge. Although existing neural radiance field methods can produce photorealistic views under narrow-baseline and dense image captures, they tend to overfit the training views when dealing with wide-baseline panoramas due to the difficulty in learning accurate geometry from sparse $360^{\\circ}$ views. To address this problem, we propose PanoGRF, Generalizable Spherical Radiance Fields for Wide-baseline Panoramas, which construct spherical radiance fields incorporating $360^{\\circ}$ scene priors. Unlike generalizable radiance fields trained on perspective images, PanoGRF avoids the information loss from panorama-to-perspective conversion and directly aggregates geometry and appearance features of 3D sample points from each panoramic view based on spherical projection. Moreover, as some regions of the panorama are only visible from one view while invisible from others under wide baseline settings, PanoGRF incorporates $360^{\\circ}$ monocular depth priors into spherical depth estimation to improve the geometry features. Experimental results on multiple panoramic datasets demonstrate that PanoGRF significantly outperforms state-of-the-art generalizable view synthesis methods for wide-baseline panoramas (e.g., OmniSyn) and perspective images (e.g., IBRNet, NeuRay).", "keywords": "neural rendering;neural radiance field;novel view synthesis;panorama;360-degree image", "primary_area": "", "supplementary_material": "/attachment/100361412017a69775860064b7963cf9148ec975.zip", "author": "Zheng Chen;Yan-Pei Cao;Yuan-Chen Guo;Chen Wang;Ying Shan;Song-Hai Zhang", "authorids": "~Zheng_Chen12;~Yan-Pei_Cao1;~Yuan-Chen_Guo1;~Chen_Wang13;~Ying_Shan2;~Song-Hai_Zhang1", "gender": "M;M;;M;M;M", "homepage": ";https://yanpei.me/;https://cwchenwang.github.io;;https://www.cs.tsinghua.edu.cn/csen/info/1307/4342.htm;", "dblp": ";141/6343;82/4206-49;68/5910;45/6733;", "google_scholar": "fmbBnegAAAAJ;50194vkAAAAJ;5cY3Ho4AAAAJ;4oXBp9UAAAAJ;https://scholar.google.com.tw/citations?user=AWtV-EQAAAAJ;b7ZJV9oAAAAJ", "orcid": "0000-0001-9796-1745;;0000-0002-9315-3780;0000-0001-7673-8325;;0000-0001-6164-8343", "linkedin": ";;;YingShanProfile/;;", "or_profile": "~Zheng_Chen12;~Yan-Pei_Cao1;~Chen_Wang13;~Ying_Shan2;~Song-Hai_Zhang1;~Yuanchen_Guo1", "aff": "ARC Lab;Tencent ARC Lab, Tencent AI Lab;Tsinghua University;Tencent PCG ARC Lab;Tsinghua University;Tsinghua University", "aff_domain": "tencent.com;tencent.com;tsinghua.edu.cn;arc.tencent.com;tsinghua.edu.cn;mails.tsinghua.edu.cn", "position": "Intern;Principal Researcher;MS student;Director;Associate Professor;PhD student", "bibtex": "@inproceedings{\nchen2023panogrf,\ntitle={Pano{GRF}: Generalizable Spherical Radiance Fields for Wide-baseline Panoramas},\nauthor={Zheng Chen and Yan-Pei Cao and Yuan-Chen Guo and Chen Wang and Ying Shan and Song-Hai Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZKVxABGJ6r}\n}", "github": "", "project": "", "reviewers": "74Ux;kb7G;un5g;WLFo;zV2U", "pdf_size": 18190671, "rating": "3;4;6;7;7", "confidence": "5;5;4;5;3", "soundness": "2;3;3;3;3", "novelty": "2;1;2;3;3", "presentation": "1;4;3;4;3", "wc_summary": "79;102;118;99;52", "wc_strengths": "11;83;46;216;76", "wc_weaknesses": "199;117;59;119;45", "wc_questions": "151;101;14;26;4", "wc_limitations": "5;28;5;24;1", "wc_review": "445;431;242;484;178", "wc_reply_reviewers": "390;117;81;33;104", "wc_reply_authors": "1485;494;136;11;434", "reply_reviewers": "1;1;1;1;2", "reply_authors": "5;3;3;2;3", "rating_avg": [ 5.4, 1.624807680927192 ], "confidence_avg": [ 4.4, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 1.0954451150103321 ], "wc_summary_avg": [ 90.0, 22.68920448142684 ], "wc_strengths_avg": [ 86.4, 69.61781381227078 ], "wc_weaknesses_avg": [ 107.8, 54.50284396249429 ], "wc_questions_avg": [ 59.2, 57.21328517049165 ], "wc_limitations_avg": [ 12.6, 11.11035552986492 ], "wc_review_avg": [ 356.0, 122.15563842901399 ], "wc_reply_reviewers_avg": [ 145.0, 125.80143083447024 ], "wc_reply_authors_avg": [ 512.0, 518.751192769713 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 3.2, 0.9797958971132712 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5846845821518306, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1199810141059045789&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "tencent.com;tencent.com;tsinghua.edu.cn;arc.tencent.com;tsinghua.edu.cn;mails.tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;1;2;2", "aff_unique_norm": "ARC Lab;Tencent;Tsinghua University", "aff_unique_dep": ";Tencent ARC Lab;", "aff_unique_url": ";https://ai.tencent.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": ";Tencent AI Lab;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1;1;1", "aff_country_unique": ";China" }, { "title": "Depth-discriminative Metric Learning for Monocular 3D Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71242", "id": "ZNBblMEP16", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fda257e65f46e21dbc117b20fd0aba3c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZNBblMEP16", "openreview": "https://openreview.net/forum?id=ZNBblMEP16", "poster": "/media/PosterPDFs/NeurIPS%202023/71242.png?t=1699338623.0069954", "slides": "https://nips.cc/virtual/2023/poster/71242", "video": "https://nips.cc/virtual/2023/poster/71242", "author_site": "Wonhyeok Choi, Mingyu Shin, Sunghoon Im", "tldr": "", "abstract": "Monocular 3D object detection poses a significant challenge due to the lack of depth information in RGB images. Many existing methods strive to enhance the object depth estimation performance by allocating additional parameters for object depth estimation, utilizing extra modules or data. In contrast, we introduce a novel metric learning scheme that encourages the model to extract depth-discriminative features regardless of the visual attributes without increasing inference time and model size. Our method employs the distance-preserving function to organize the feature space manifold in relation to ground-truth object depth. The proposed $(K,B,\\epsilon)$-quasi-isometric loss leverages predetermined pairwise distance restriction as guidance for adjusting the distance among object descriptors without disrupting the non-linearity of the natural feature manifold. Moreover, we introduce an auxiliary head for object-wise depth estimation, which enhances depth quality while maintaining the inference time. The broad applicability of our method is demonstrated through experiments that show improvements in overall performance when integrated into various baselines. The results show that our method consistently improves the performance of various baselines by 23.51\\% and 5.78\\% on average across KITTI and Waymo, respectively.", "keywords": "Monocular 3D object detection;Autonomous driving;Recognition;Regression;Metric learning", "primary_area": "", "supplementary_material": "/attachment/ebdb39c5ffde72a004838f0b91ccd899af567904.pdf", "author": "Wonhyeok Choi;Mingyu Shin;Sunghoon Im", "authorids": "~Wonhyeok_Choi1;~Mingyu_Shin1;~Sunghoon_Im1", "gender": "M;M;M", "homepage": "https://wonhyeok-choi.github.io;;https://sunghoonim.github.io/", "dblp": "315/9302;331/8381;174/1228", "google_scholar": "7zAhXNIAAAAJ;kFSK0GsAAAAJ;https://scholar.google.co.kr/citations?user=37fSLtAAAAAJ", "orcid": ";;", "linkedin": "wonhyeok-choi-389aa92a9/;;", "or_profile": "~Wonhyeok_Choi1;~Mingyu_Shin1;~Sunghoon_Im1", "aff": "Daegu Gyeongbuk Institute of Science and Technology;Daegu Gyeongbuk Institute of Science and Technology;Daegu Gyeongbuk Institute of Science and Technology", "aff_domain": "dgist.ac.kr;dgist.ac.kr;dgist.ac.kr", "position": "MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nchoi2023depthdiscriminative,\ntitle={Depth-discriminative Metric Learning for Monocular 3D Object Detection},\nauthor={Wonhyeok Choi and Mingyu Shin and Sunghoon Im},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZNBblMEP16}\n}", "github": "", "project": "", "reviewers": "tr2v;Wa1k;nNTt;6BaQ;eeNQ", "pdf_size": 1397735, "rating": "6;6;7;7;8", "confidence": "4;3;4;4;5", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "90;115;74;159;79", "wc_strengths": "131;72;104;118;117", "wc_weaknesses": "114;3;141;109;223", "wc_questions": "2;97;67;82;2", "wc_limitations": "20;27;44;27;2", "wc_review": "357;314;430;495;423", "wc_reply_reviewers": "24;0;38;28;35", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 103.4, 31.193589084938594 ], "wc_strengths_avg": [ 108.4, 20.105720578979508 ], "wc_weaknesses_avg": [ 118.0, 70.53509764649085 ], "wc_questions_avg": [ 50.0, 40.32369030731191 ], "wc_limitations_avg": [ 24.0, 13.549907748763458 ], "wc_review_avg": [ 403.8, 62.652693477615145 ], "wc_reply_reviewers_avg": [ 25.0, 13.446189051177289 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8451542547285165, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1246496752454645962&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "dgist.ac.kr;dgist.ac.kr;dgist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Daegu Gyeongbuk Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.dgist.ac.kr", "aff_unique_abbr": "DGIST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Daegu", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Encoding Human Behavior in Information Design through Deep Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71241", "id": "ZOKhtz2Z9X", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/17d0a21da4ec2c12b4f07fa2e34e4d6c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZOKhtz2Z9X", "openreview": "https://openreview.net/forum?id=ZOKhtz2Z9X", "poster": "/media/PosterPDFs/NeurIPS%202023/71241.png?t=1702235033.2933419", "slides": "https://nips.cc/virtual/2023/poster/71241", "video": "https://nips.cc/virtual/2023/poster/71241", "author_site": "Guanghui Yu, Wei Tang, Saumik Narayanan, Chien-Ju Ho", "tldr": "", "abstract": "We initiate the study of $\\textit{behavioral information design}$ through deep learning. In information design, a $\\textit{sender}$ aims to persuade a $\\textit{receiver}$ to take certain actions by strategically revealing information. We address scenarios in which the receiver might exhibit different behavior patterns other than the standard Bayesian rational assumption. We propose HAIDNet, a neural-network-based optimization framework for information design that can adapt to multiple representations of human behavior. Through extensive simulation, we show that HAIDNet can not only recover information policies that are near-optimal compared with known analytical solutions, but also can extend to designing information policies for settings that are computationally challenging (e.g., when there are multiple receivers) or for settings where there are no known solutions in general (e.g., when the receiver behavior does not follow the Bayesian rational assumption). We also conduct real-world human-subject experiments and demonstrate that our framework can capture human behavior from data and lead to more effective information policy for real-world human receivers.", "keywords": "Information design; Human behavior; Behavioral experiments", "primary_area": "", "supplementary_material": "/attachment/c8b39d7ad326d1fa5c46d1e5ac20253db0dc03fb.zip", "author": "Guanghui Yu;Wei Tang;Saumik Narayanan;Chien-Ju Ho", "authorids": "~Guanghui_Yu1;~Wei_Tang1;~Saumik_Narayanan1;~Chien-Ju_Ho1", "gender": "M;M;M;M", "homepage": ";https://wtang.org/;https://saumikn.com;http://chienjuho.com", "dblp": ";;;85/4929", "google_scholar": "EMo97CgAAAAJ;;GSgaC84AAAAJ;https://scholar.google.com.tw/citations?user=DWKoeW0AAAAJ", "orcid": "0000-0002-0077-8897;;;", "linkedin": ";;https://linkedin.com/in/saumikn;", "or_profile": "~Guanghui_Yu1;~Wei_Tang1;~Saumik_Narayanan1;~Chien-Ju_Ho1", "aff": "Washington University, St. Louis;Columbia University;Washington University, Saint Louis;Washington University in St. Louis", "aff_domain": "wustl.edu;columbia.edu;wustl.edu;wustl.edu", "position": "PhD student;Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyu2023encoding,\ntitle={Encoding Human Behavior in Information Design through Deep Learning},\nauthor={Guanghui Yu and Wei Tang and Saumik Narayanan and Chien-Ju Ho},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZOKhtz2Z9X}\n}", "github": "", "project": "", "reviewers": "34E5;AWce;5W54;7rN5", "pdf_size": 1114057, "rating": "4;5;6;6", "confidence": "5;1;4;4", "soundness": "4;3;4;3", "novelty": "2;3;2;2", "presentation": "3;3;3;3", "wc_summary": "64;97;368;138", "wc_strengths": "29;106;142;57", "wc_weaknesses": "433;533;975;142", "wc_questions": "1;12;92;256", "wc_limitations": "1;10;91;100", "wc_review": "528;758;1668;693", "wc_reply_reviewers": "410;74;233;33", "wc_reply_authors": "1245;22;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "4;2;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 1.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 166.75, 119.1120795721408 ], "wc_strengths_avg": [ 83.5, 43.591857037754195 ], "wc_weaknesses_avg": [ 520.75, 299.017035467881 ], "wc_questions_avg": [ 90.25, 101.93717427906269 ], "wc_limitations_avg": [ 50.5, 45.224440295044005 ], "wc_review_avg": [ 911.75, 444.59778170836614 ], "wc_reply_reviewers_avg": [ 187.5, 148.60097577068598 ], "wc_reply_authors_avg": [ 316.75, 536.0006413242432 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.10050378152592121, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12199654557731819117&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "wustl.edu;columbia.edu;wustl.edu;wustl.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Washington University in St. Louis;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://wustl.edu;https://www.columbia.edu", "aff_unique_abbr": "WUSTL;Columbia", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "St. Louis;;Saint Louis", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "PyNeRF: Pyramidal Neural Radiance Fields", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71240", "id": "ZPj7ey5fXa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/767c1b5f7c03d9299e493bc9e1feeba6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZPj7ey5fXa", "openreview": "https://openreview.net/forum?id=ZPj7ey5fXa", "poster": "/media/PosterPDFs/NeurIPS%202023/71240.png?t=1701407624.309555", "slides": "https://nips.cc/virtual/2023/poster/71240", "video": "https://nips.cc/virtual/2023/poster/71240", "author_site": "Haithem Turki, Michael Zollh\u00f6fer, Christian Richardt, Deva Ramanan", "tldr": "", "abstract": "Neural Radiance Fields (NeRFs) can be dramatically accelerated by spatial grid representations. However, they do not explicitly reason about scale and so introduce aliasing artifacts when reconstructing scenes captured at different camera distances. Mip-NeRF and its extensions propose scale-aware renderers that project volumetric frustums rather than point samples. But such approaches rely on positional encodings that are not readily compatible with grid methods. We propose a simple modification to grid-based models by training model heads at different spatial grid resolutions. At render time, we simply use coarser grids to render samples that cover larger volumes. Our method can be easily applied to existing accelerated NeRF methods and significantly improves rendering quality (reducing error rates by 20\u201390% across synthetic and unbounded real-world scenes) while incurring minimal performance overhead (as each model head is quick to evaluate). Compared to Mip-NeRF, we reduce error rates by 20% while training over 60x faster.", "keywords": "view synthesis;3d reconstruction;scene representation;3d deep learning", "primary_area": "", "supplementary_material": "/attachment/c3209b941cbe9b02ba3262086649cf0043d5ed1c.pdf", "author": "Haithem Turki;Michael Zollh\u00f6fer;Christian Richardt;Deva Ramanan", "authorids": "~Haithem_Turki1;~Michael_Zollh\u00f6fer2;~Christian_Richardt1;~Deva_Ramanan1", "gender": "M;M;M;M", "homepage": "https://haithemturki.com/;https://richardt.name;https://www.cs.cmu.edu/~deva/;https://zollhoefer.com", "dblp": "64/10771;94/7988;49/488;52/8573", "google_scholar": "fuCTftEAAAAJ;AZH_wV0AAAAJ;9B8PoXUAAAAJ;https://scholar.google.de/citations?user=eQ8ZIG4AAAAJ", "orcid": ";0000-0001-6716-9845;;", "linkedin": ";cr333/;;", "or_profile": "~Haithem_Turki1;~Christian_Richardt1;~Deva_Ramanan1;~Michael_Zollhoefer2", "aff": "Carnegie Mellon University;Meta;School of Computer Science, Carnegie Mellon University;Meta", "aff_domain": "cmu.edu;meta.com;cs.cmu.edu;meta.com", "position": "PhD student;Research Scientist;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nturki2023pynerf,\ntitle={PyNe{RF}: Pyramidal Neural Radiance Fields},\nauthor={Haithem Turki and Michael Zollh{\\\"o}fer and Christian Richardt and Deva Ramanan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZPj7ey5fXa}\n}", "github": "", "project": "", "reviewers": "pHej;JoFm;LPiw;LTbu;PaSq", "pdf_size": 4148061, "rating": "5;6;6;7;7", "confidence": "5;4;4;4;5", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "63;90;85;134;38", "wc_strengths": "88;54;136;146;22", "wc_weaknesses": "139;106;219;84;38", "wc_questions": "44;67;89;238;65", "wc_limitations": "6;12;1;14;4", "wc_review": "340;329;530;616;167", "wc_reply_reviewers": "33;15;28;235;0", "wc_reply_authors": "0;0;0;203;0", "reply_reviewers": "1;1;1;2;0", "reply_authors": "1;1;1;2;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 82.0, 31.855925665407998 ], "wc_strengths_avg": [ 89.2, 47.27113284024406 ], "wc_weaknesses_avg": [ 117.2, 60.562034311935065 ], "wc_questions_avg": [ 100.6, 70.16152791950871 ], "wc_limitations_avg": [ 7.4, 4.882622246293481 ], "wc_review_avg": [ 396.4, 159.0101883528222 ], "wc_reply_reviewers_avg": [ 62.2, 87.15365741034624 ], "wc_reply_authors_avg": [ 40.6, 81.2 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2182178902359923, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2727718855567085966&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cmu.edu;meta.com;cs.cmu.edu;meta.com", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Carnegie Mellon University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.cmu.edu;https://meta.com", "aff_unique_abbr": "CMU;Meta", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Adversarial Low-rank Markov Decision Processes with Unknown Transition and Full-information Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71239", "id": "ZPtzwr2SwJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b93fda2862db7a7ac4a5c412adfb1ac2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZPtzwr2SwJ", "openreview": "https://openreview.net/forum?id=ZPtzwr2SwJ", "poster": "/media/PosterPDFs/NeurIPS%202023/71239.png?t=1702108310.1017597", "slides": "https://nips.cc/virtual/2023/poster/71239", "video": "https://nips.cc/virtual/2023/poster/71239", "author_site": "Canzhe Zhao, Ruofeng Yang, Baoxiang Wang, Xuezhou Zhang, Shuai Li", "tldr": "", "abstract": "In this work, we study the low-rank MDPs with adversarially changed losses in the full-information feedback setting. In particular, the unknown transition probability kernel admits a low-rank matrix decomposition \\citep{REPUCB22}, and the loss functions may change adversarially but are revealed to the learner at the end of each episode. We propose a policy optimization-based algorithm POLO, and we prove that it attains the $\\widetilde{O}(K^{\\frac{5}{6}}A^{\\frac{1}{2}}d\\ln(1+M)/(1-\\gamma)^2)$ regret guarantee, where $d$ is rank of the transition kernel (and hence the dimension of the unknown representations), $A$ is the cardinality of the action space, $M$ is the cardinality of the model class that contains all the plausible representations, and $\\gamma$ is the discounted factor. Notably, our algorithm is oracle-efficient and has a regret guarantee with no dependence on the size of potentially arbitrarily large state space. Furthermore, we also prove an $\\Omega(\\frac{\\gamma^2}{1-\\gamma} \\sqrt{d A K})$ regret lower bound for this problem, showing that low-rank MDPs are statistically more difficult to learn than linear MDPs in the regret minimization setting. To the best of our knowledge, we present the first algorithm that interleaves representation learning, exploration, and exploitation to achieve the sublinear regret guarantee for RL with nonlinear function approximation and adversarial losses.", "keywords": "adversarial low-rank mdps", "primary_area": "", "supplementary_material": "/attachment/8bd8b9ef3a808d07cfce4f7ee87709b4d605d8ed.pdf", "author": "Canzhe Zhao;Ruofeng Yang;Baoxiang Wang;Xuezhou Zhang;Shuai Li", "authorids": "~Canzhe_Zhao1;~Ruofeng_Yang1;~Baoxiang_Wang1;~Xuezhou_Zhang2;~Shuai_Li3", "gender": "M;M;;;F", "homepage": "https://www.linkedin.com/in/canzhe-zhao-5357891b1/;https://github.com/wanshuiyin;;;http://shuaili8.github.io", "dblp": "https://dblp.uni-trier.de/pid/290/8007;350/4546;;;57/2281-10", "google_scholar": ";https://scholar.google.com.hk/citations?user=Cw9HDacAAAAJ;;;https://scholar.google.com.hk/citations?user=kMZgQxcAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Canzhe_Zhao1;~Ruofeng_Yang1;~Baoxiang_Wang1;~Xuezhou_Zhang2;~Shuai_Li3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;;;John Hopcroft Center, Shanghai Jiao Tong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;;;sjtu.edu.cn", "position": "PhD student;PhD student;;;Assistant Professor", "bibtex": "@inproceedings{\nzhao2023learning,\ntitle={Learning Adversarial Low-rank Markov Decision Processes with Unknown Transition and Full-information Feedback},\nauthor={Canzhe Zhao and Ruofeng Yang and Baoxiang Wang and Xuezhou Zhang and Shuai Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZPtzwr2SwJ}\n}", "github": "", "project": "", "reviewers": "qWwQ;aWbZ;ci4p;n9xa", "pdf_size": 432766, "rating": "5;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;2", "wc_summary": "63;105;276;283", "wc_strengths": "28;55;110;139", "wc_weaknesses": "266;40;54;77", "wc_questions": "4;57;139;19", "wc_limitations": "5;1;13;17", "wc_review": "366;258;592;535", "wc_reply_reviewers": "30;12;15;21", "wc_reply_authors": "22;26;18;86", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 181.75, 98.90241402513894 ], "wc_strengths_avg": [ 83.0, 43.800684926151554 ], "wc_weaknesses_avg": [ 109.25, 91.45866552711121 ], "wc_questions_avg": [ 54.75, 52.33724773046439 ], "wc_limitations_avg": [ 9.0, 6.324555320336759 ], "wc_review_avg": [ 437.75, 132.9555846890231 ], "wc_reply_reviewers_avg": [ 19.5, 6.87386354243376 ], "wc_reply_authors_avg": [ 38.0, 27.85677655436824 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8725761457478599060&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;sjtu.edu.cn;;;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shanghai", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Normalizing flow neural networks by JKO scheme", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71238", "id": "ZQMlfNijY5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/93fce71def4e3cf418918805455d436f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZQMlfNijY5", "openreview": "https://openreview.net/forum?id=ZQMlfNijY5", "poster": "/media/PosterPDFs/NeurIPS%202023/71238.png?t=1702255219.5889413", "slides": "https://nips.cc/virtual/2023/poster/71238", "video": "https://nips.cc/virtual/2023/poster/71238", "author_site": "Chen Xu, Xiuyuan Cheng, Yao Xie", "tldr": "", "abstract": "Normalizing flow is a class of deep generative models for efficient sampling and likelihood estimation, which achieves attractive performance, particularly in high dimensions. The flow is often implemented using a sequence of invertible residual blocks. Existing works adopt special network architectures and regularization of flow trajectories. In this paper, we develop a neural ODE flow network called JKO-iFlow, inspired by the Jordan-Kinderleherer-Otto (JKO) scheme, which unfolds the discrete-time dynamic of the Wasserstein gradient flow. The proposed method stacks residual blocks one after another, allowing efficient block-wise training of the residual blocks, avoiding sampling SDE trajectories and score matching or variational learning, thus reducing the memory load and difficulty in end-to-end training. We also develop adaptive time reparameterization of the flow network with a progressive refinement of the induced trajectory in probability space to improve the model accuracy further. Experiments with synthetic and real data show that the proposed JKO-iFlow network achieves competitive performance compared with existing flow and diffusion models at a significantly reduced computational and memory cost.", "keywords": "Normalizing flow;invertible neural networks;JKO scheme", "primary_area": "", "supplementary_material": "", "author": "Chen Xu;Xiuyuan Cheng;Yao Xie", "authorids": "~Chen_Xu12;~Xiuyuan_Cheng1;~Yao_Xie2", "gender": "M;;F", "homepage": "https://hamrel-cxu.github.io/;;http://www2.isye.gatech.edu/~yxie77", "dblp": ";79/9747;13/4242-2", "google_scholar": "https://scholar.google.com/citations?hl=en;I2gwdssAAAAJ;qvYp8ZQAAAAJ", "orcid": ";;", "linkedin": "chen-xu-92013714a/;;yaoxie/", "or_profile": "~Chen_Xu12;~Xiuyuan_Cheng1;~Yao_Xie2", "aff": "Georgia Institute of Technology;Duke University;Georgia Institute of Technology", "aff_domain": "gatech.edu;duke.edu;gatech.edu", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nxu2023normalizing,\ntitle={Normalizing flow neural networks by {JKO} scheme},\nauthor={Chen Xu and Xiuyuan Cheng and Yao Xie},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZQMlfNijY5}\n}", "github": "", "project": "", "reviewers": "xTiV;uXKQ;NZGt;3zMK", "pdf_size": 7742352, "rating": "7;7;7;8", "confidence": "3;2;3;3", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;3;3;3", "wc_summary": "88;170;65;121", "wc_strengths": "82;98;39;165", "wc_weaknesses": "86;125;90;145", "wc_questions": "10;75;28;53", "wc_limitations": "67;21;5;3", "wc_review": "333;489;227;487", "wc_reply_reviewers": "19;0;25;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 111.0, 39.45250308915773 ], "wc_strengths_avg": [ 96.0, 45.304525160297175 ], "wc_weaknesses_avg": [ 111.5, 24.58149710656371 ], "wc_questions_avg": [ 41.5, 24.642443060703215 ], "wc_limitations_avg": [ 24.0, 25.787593916455254 ], "wc_review_avg": [ 384.0, 110.54863183232979 ], "wc_reply_reviewers_avg": [ 15.0, 9.246621004453464 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9984542681586331991&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "gatech.edu;duke.edu;gatech.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Georgia Institute of Technology;Duke University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.duke.edu", "aff_unique_abbr": "Georgia Tech;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Rethinking the Role of Token Retrieval in Multi-Vector Retrieval", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71237", "id": "ZQzm0Z47jz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/31d997278ee9069d6721bc194174bb4c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZQzm0Z47jz", "openreview": "https://openreview.net/forum?id=ZQzm0Z47jz", "poster": "/media/PosterPDFs/NeurIPS%202023/71237.png?t=1701734071.462841", "slides": "https://nips.cc/virtual/2023/poster/71237", "video": "https://nips.cc/virtual/2023/poster/71237", "author_site": "Jinhyuk Lee, Zhuyun Dai, Sai Meher Karthik Duddu, Tao Lei, Iftekhar Naim, Ming-Wei Chang, Vincent Zhao", "tldr": "", "abstract": "Multi-vector retrieval models such as ColBERT [Khattab et al., 2020] allow token-level interactions between queries and documents, and hence achieve state of the art on many information retrieval benchmarks. However, their non-linear scoring function cannot be scaled to millions of documents, necessitating a three-stage process for inference: retrieving initial candidates via token retrieval, accessing all token vectors, and scoring the initial candidate documents. The non-linear scoring function is applied over all token vectors of each candidate document, making the inference process complicated and slow. In this paper, we aim to simplify the multi-vector retrieval by rethinking the role of token retrieval. We present XTR, ConteXtualized Token Retriever, which introduces a simple, yet novel, objective function that encourages the model to retrieve the most important document tokens first. The improvement to token retrieval allows XTR to rank candidates only using the retrieved tokens rather than all tokens in the document, and enables a newly designed scoring stage that is two-to-three orders of magnitude cheaper than that of ColBERT. On the popular BEIR benchmark, XTR advances the state-of-the-art by 2.8 nDCG@10 without any distillation. Detailed analysis confirms our decision to revisit the token retrieval stage, as XTR demonstrates much better recall of the token retrieval stage compared to ColBERT.", "keywords": "information retrieval;document retrieval;natural language processing", "primary_area": "", "supplementary_material": "/attachment/d1d14e7b074916f4165929145826e0ee92145984.pdf", "author": "Jinhyuk Lee;Zhuyun Dai;Sai Meher Karthik Duddu;Tao Lei;Iftekhar Naim;Ming-Wei Chang;Vincent Y Zhao", "authorids": "~Jinhyuk_Lee2;~Zhuyun_Dai1;~Sai_Meher_Karthik_Duddu2;~Tao_Lei1;~Iftekhar_Naim1;~Ming-Wei_Chang3;~Vincent_Y_Zhao1", "gender": "M;;;M;M;;M", "homepage": "https://jhyuklee.github.io;;;;;;https://foo.bar", "dblp": "https://dblp.uni-trier.de/pers/hd/l/Lee:Jinhyuk;148/4531;;;11/8759;;301/7889", "google_scholar": "https://scholar.google.co.kr/citations?user=YWm_zVcAAAAJ;9bbHwJIAAAAJ;;g2uay50AAAAJ;E8-dfNcAAAAJ;;", "orcid": "0000-0003-4972-239X;;;;;;", "linkedin": "jinhyuk-lee-73b27489/;;;;;;", "or_profile": "~Jinhyuk_Lee2;~Zhuyun_Dai1;~Sai_Meher_Karthik_Duddu2;~Tao_Lei1;~Iftekhar_Naim1;~Ming-Wei_Chang3;~Vincent_Y_Zhao1", "aff": "Google;Google;;Google;Google;;Google", "aff_domain": "google.com;google.com;;google.com;google.com;;google.com", "position": "Research Scientist;Researcher;;Research scientist;Researcher;;Researcher", "bibtex": "@inproceedings{\nlee2023rethinking,\ntitle={Rethinking the Role of Token Retrieval in Multi-Vector Retrieval},\nauthor={Jinhyuk Lee and Zhuyun Dai and Sai Meher Karthik Duddu and Tao Lei and Iftekhar Naim and Ming-Wei Chang and Vincent Y Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZQzm0Z47jz}\n}", "github": "", "project": "", "reviewers": "HcB5;BRdC;VrQp;XbZR;5SZT", "pdf_size": 738897, "rating": "6;6;6;7;8", "confidence": "4;5;3;4;3", "soundness": "2;3;3;3;3", "novelty": "2;2;3;4;4", "presentation": "1;3;3;4;2", "wc_summary": "45;95;157;157;96", "wc_strengths": "18;81;90;58;86", "wc_weaknesses": "85;97;27;444;69", "wc_questions": "138;57;40;55;1", "wc_limitations": "25;25;34;4;11", "wc_review": "311;355;348;718;263", "wc_reply_reviewers": "164;72;0;325;0", "wc_reply_authors": "409;265;0;58;0", "reply_reviewers": "2;1;0;1;0", "reply_authors": "2;2;1;2;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 110.0, 42.57698909035255 ], "wc_strengths_avg": [ 66.6, 26.71029763967448 ], "wc_weaknesses_avg": [ 144.4, 151.65961888386772 ], "wc_questions_avg": [ 58.2, 44.682882628586086 ], "wc_limitations_avg": [ 19.8, 10.796295661012625 ], "wc_review_avg": [ 399.0, 162.81154750201227 ], "wc_reply_reviewers_avg": [ 112.2, 122.27902518420728 ], "wc_reply_authors_avg": [ 146.4, 163.52687852460218 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.46770717334674267, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15936538871396259042&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "google.com;google.com;;google.com;google.com;;google.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Revisiting Area Convexity: Faster Box-Simplex Games and Spectrahedral Generalizations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71236", "id": "ZRBGwpeewz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b3bec3f5ad96055b7f60c93edc3606c8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZRBGwpeewz", "openreview": "https://openreview.net/forum?id=ZRBGwpeewz", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71236", "video": "https://nips.cc/virtual/2023/poster/71236", "author_site": "Arun Jambulapati, Kevin Tian", "tldr": "", "abstract": "We investigate area convexity [Sherman17], a mysterious tool introduced to tackle optimization problems under the challenging $\\ell_\\infty$ geometry. We develop a deeper understanding of its relationship with conventional analyses of extragradient methods [Nemirovski04, Nesterov07]. We also give improved solvers for the subproblems required by variants of the [Sherman17] algorithm, designed through the lens of relative smoothness [BBT17, LFN18}.\n\nLeveraging these new tools, we give a state-of-the-art first-order algorithm for solving box-simplex games (a primal-dual formulation of $\\ell_\\infty$ regression) in a $d \\times n$ matrix with bounded rows, using $O(\\log d \\cdot \\epsilon^{-1})$ matrix-vector queries. As a consequence, we obtain improved complexities for approximate maximum flow, optimal transport, min-mean-cycle, and other basic combinatorial optimization problems. We also develop a near-linear time algorithm for a matrix generalization of box-simplex games, capturing a family of problems closely related to semidefinite programs recently used as subroutines in robust statistics and numerical linear algebra.", "keywords": "Optimization;optimal transport;linear programming;semidefinite programming", "primary_area": "", "supplementary_material": "/attachment/1ab38a543388fd89bef2968100d5687bf5757b98.pdf", "author": "Arun Jambulapati;Kevin Tian", "authorids": "jmblpati@uw.edu;~Kevin_Tian4", "gender": ";", "homepage": ";https://kjtian.github.io", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "jmblpati@uw.edu;~Kevin_Tian4", "aff": ";Microsoft", "aff_domain": ";microsoft.com", "position": ";Postdoc", "bibtex": "@inproceedings{\njambulapati2023revisiting,\ntitle={Revisiting Area Convexity: Faster Box-Simplex Games and Spectrahedral Generalizations},\nauthor={Arun Jambulapati and Kevin Tian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZRBGwpeewz}\n}", "github": "", "project": "", "reviewers": "Cfry;E7hD;mAce;Vb7x;Wg5p", "pdf_size": 370681, "rating": "4;5;5;7;7", "confidence": "2;4;3;3;3", "soundness": "3;3;3;4;3", "novelty": "2;2;3;3;3", "presentation": "3;1;2;3;2", "wc_summary": "146;110;105;252;91", "wc_strengths": "40;32;165;68;77", "wc_weaknesses": "89;145;140;116;85", "wc_questions": "22;305;107;90;52", "wc_limitations": "28;63;64;20;11", "wc_review": "325;655;581;546;316", "wc_reply_reviewers": "10;17;32;17;17", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 1.2 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 140.8, 58.48555377185036 ], "wc_strengths_avg": [ 76.4, 47.36496595586236 ], "wc_weaknesses_avg": [ 115.0, 24.90783009416918 ], "wc_questions_avg": [ 115.2, 99.40503005381568 ], "wc_limitations_avg": [ 37.2, 22.139557357815445 ], "wc_review_avg": [ 484.6, 138.56204386483333 ], "wc_reply_reviewers_avg": [ 18.6, 7.227724399837061 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.2635231383473649, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3924942057915151165&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";microsoft.com", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Self-Supervised Learning with Lie Symmetries for Partial Differential Equations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71235", "id": "ZULq9QV8rH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5c46ae130105fa012da0446126c01d1d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZULq9QV8rH", "openreview": "https://openreview.net/forum?id=ZULq9QV8rH", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71235", "video": "https://nips.cc/virtual/2023/poster/71235", "author_site": "Gr\u00e9goire Mialon, Quentin Garrido, Hannah Lawrence, Danyal Rehman, Yann LeCun, Bobak Kiani, Bobak Kiani", "tldr": "", "abstract": "Machine learning for differential equations paves the way for computationally efficient alternatives to numerical solvers, with potentially broad impacts in science and engineering. Though current algorithms typically require simulated training data tailored to a given setting, one may instead wish to learn useful information from heterogeneous sources, or from real dynamical systems observations that are messy or incomplete. In this work, we learn general-purpose representations of PDEs from heterogeneous data by implementing joint embedding methods for self-supervised learning (SSL), a framework for unsupervised representation learning that has had notable success in computer vision. Our representation outperforms baseline approaches to invariant tasks, such as regressing the coefficients of a PDE, while also improving the time-stepping performance of neural solvers. We hope that our proposed methodology will prove useful in the eventual development of general-purpose foundation models for PDEs.", "keywords": "Self-supervised learning;partial differential equations;Lie symmetries;data augmentation", "primary_area": "", "supplementary_material": "/attachment/98c5365751dc9564026f191fa33eaaaea567a119.pdf", "author": "Gr\u00e9goire Mialon;Quentin Garrido;Hannah Lawrence;Danyal Rehman;Yann LeCun;Bobak Kiani", "authorids": "~Gr\u00e9goire_Mialon1;~Quentin_Garrido1;~Hannah_Lawrence1;~Danyal_Rehman1;~Yann_LeCun1;~Bobak_Kiani1", "gender": ";M;F;M;M;", "homepage": ";https://garridoq.com;https://hannahlawrence.github.io/;;http://yann.lecun.com;", "dblp": "228/9191;285/6628;251/5474;;l/YannLeCun;232/4086", "google_scholar": ";RQaZUNsAAAAJ;;XdyK1qoAAAAJ;WLN3QrAAAAAJ;", "orcid": ";;;;;", "linkedin": ";;hannah-lawrence-417b5a130/;danyalrehman/;;bobak-kiani", "or_profile": "~Gr\u00e9goire_Mialon1;~Quentin_Garrido1;~Hannah_Lawrence1;~Danyal_Rehman1;~Yann_LeCun1;~Bobak_Kiani1", "aff": "Meta Facebook;Research, Facebook;Massachusetts Institute of Technology;Massachusetts Institute of Technology;New York University;Massachusetts Institute of Technology", "aff_domain": "fb.com;research.facebook.com;mit.edu;mit.edu;nyu.edu;mit.edu", "position": "Postdoc;PhD student;PhD student;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nmialon2023selfsupervised,\ntitle={Self-Supervised Learning with Lie Symmetries for Partial Differential Equations},\nauthor={Gr{\\'e}goire Mialon and Quentin Garrido and Hannah Lawrence and Danyal Rehman and Yann LeCun and Bobak Kiani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZULq9QV8rH}\n}", "github": "", "project": "", "reviewers": "xeoP;VTPv;swdq;rnp9", "pdf_size": 11771374, "rating": "3;5;7;8", "confidence": "4;4;4;4", "soundness": "2;4;3;4", "novelty": "1;2;3;3", "presentation": "3;4;3;4", "wc_summary": "44;110;61;46", "wc_strengths": "42;139;39;192", "wc_weaknesses": "246;187;217;160", "wc_questions": "54;286;144;27", "wc_limitations": "7;15;1;47", "wc_review": "393;737;462;472", "wc_reply_reviewers": "0;39;237;144", "wc_reply_authors": "0;0;535;0", "reply_reviewers": "0;1;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.75, 1.920286436967152 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 65.25, 26.65872277510684 ], "wc_strengths_avg": [ 103.0, 65.25718351262181 ], "wc_weaknesses_avg": [ 202.5, 32.20636583037583 ], "wc_questions_avg": [ 127.75, 101.1147244470359 ], "wc_limitations_avg": [ 17.5, 17.741194999210173 ], "wc_review_avg": [ 516.0, 131.16973736346353 ], "wc_reply_reviewers_avg": [ 105.0, 92.63638593986707 ], "wc_reply_authors_avg": [ 133.75, 231.66179551233733 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11609232228283802783&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "fb.com;research.facebook.com;mit.edu;mit.edu;nyu.edu;mit.edu", "author_num": 6, "aff_unique_index": "0;0;1;1;2;1", "aff_unique_norm": "Meta;Massachusetts Institute of Technology;New York University", "aff_unique_dep": "Meta Platforms, Inc.;;", "aff_unique_url": "https://meta.com;https://web.mit.edu;https://www.nyu.edu", "aff_unique_abbr": "Meta;MIT;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Large Language Models are Fixated by Red Herrings: Exploring Creative Problem Solving and Einstellung Effect using the Only Connect Wall Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73547", "id": "ZV4tZgclu8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/11e3e0f1b29dcd31bd0952bfc1357f68-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=ZV4tZgclu8", "openreview": "https://openreview.net/forum?id=ZV4tZgclu8", "poster": "/media/PosterPDFs/NeurIPS%202023/73547.png?t=1702072965.9490309", "slides": "https://nips.cc/virtual/2023/poster/73547", "video": "https://nips.cc/virtual/2023/poster/73547", "author_site": "Saeid Alavi Naeini, Raeid Saqur, Mozhgan Saeidi, John Giorgi, Babak Taati", "tldr": "", "abstract": "The quest for human imitative AI has been an enduring topic in AI research since inception. The technical evolution and emerging capabilities of the latest cohort of large language models (LLMs) have reinvigorated the subject beyond academia to cultural zeitgeist. \nWhile recent NLP evaluation benchmark tasks test some aspects of human-imitative behaviour (e.g., BIG-bench's `human-like behavior' tasks), few, if not none, examine *creative problem solving* abilities. Creative problem solving in humans is a well-studied topic in cognitive neuroscience with standardized tests that predominantly use ability to associate (heterogeneous) connections among clue words as a metric for creativity. Exposure to misleading stimuli --- distractors dubbed *red herrings* --- impede human performance in such tasks via the *fixation effect* and Einstellung paradigm. In cognitive neuroscience studies, such fixations are experimentally induced by pre-exposing participants to orthographically similar incorrect words to subsequent word-fragments or clues. The popular British quiz show Only Connect's *Connecting Wall* segment essentially mimics Mednick's Remote Associates Test (RAT) formulation with built-in, deliberate red herrings, that makes it an ideal proxy dataset to explore and study fixation effect and Einstellung paradigm from cognitive neuroscience in LLMs. In addition to presenting the novel Only Connect Wall (OCW) dataset, we also report results from our evaluation of selected pre-trained language models and LLMs (including OpenAI's GPT series) on creative problem solving tasks like grouping clue words by heterogeneous connections, and identifying correct open knowledge domain connections in respective groups. We synthetically generate two additional datasets: OCW-Randomized, OCW-WordNet to further analyze our red-herrings hypothesis in language models.\nThe code and link to the dataset is available at [url](https://github.com/TaatiTeam/OCW).", "keywords": "Natural language processing;Large language models;creative problem solving;creativity;Remote Associates Test;cognitive neuroscience;artificial general intelligence;human-imitative AI;GPT;in-context learning", "primary_area": "", "supplementary_material": "/attachment/192870021c6ae66feb17f07640c5f999a5d01898.pdf", "author": "Saeid Alavi Naeini;Raeid Saqur;mozhgan saeidi;John Michael Giorgi;Babak Taati", "authorids": "~Saeid_Alavi_Naeini1;~Raeid_Saqur1;~mozhgan_saeidi1;~John_Michael_Giorgi1;~Babak_Taati1", "gender": "M;M;F;M;M", "homepage": "https://salavina.github.io;http://www.cs.toronto.edu/~raeidsaqur/;;https://github.com/JohnGiorgi;https://www.cs.toronto.edu/~taati/", "dblp": ";;;;32/4116", "google_scholar": ";0aJ--58AAAAJ;;TNFEhK4AAAAJ;https://scholar.google.ca/citations?user=7-X6qUUAAAAJ", "orcid": ";0000-0002-6330-5480;;0000-0001-9621-5046;", "linkedin": "salavina;raeidsaqur/;mojgansaeedi/;john-giorgi/;babaktaati/", "or_profile": "~Saeid_Alavi_Naeini1;~Raeid_Saqur1;~mozhgan_saeidi1;~John_Michael_Giorgi1;~Babak_Taati1", "aff": "University of Toronto;University of Toronto;Stanford University;Toronto University;University of Toronto", "aff_domain": "utoronto.ca;toronto.edu;stanford.edu;utoronto.ca;torondo.edu", "position": "Associate Researcher;PhD student;Postdoc;PhD student;Associate Professor", "bibtex": "@inproceedings{\nnaeini2023large,\ntitle={Large Language Models are Fixated by Red Herrings: Exploring Creative Problem Solving and Einstellung Effect using the Only Connect Wall Dataset},\nauthor={Saeid Alavi Naeini and Raeid Saqur and mozhgan saeidi and John Michael Giorgi and Babak Taati},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=ZV4tZgclu8}\n}", "github": "", "project": "", "reviewers": "9y7c;H46b;fDpy;zJGy", "pdf_size": 839488, "rating": "6;6;6;7", "confidence": "3;4;3;4", "wc_summary_and_contributions": "121;67;85;74", "wc_strengths": "54;41;84;68", "wc_improvement": "80;102;151;156", "wc_limitations": "332;4;19;1", "wc_correctness": "47;36;1;1", "wc_clarity": "44;23;1;11", "wc_relation_to_prior_work": "10;1;1;1", "wc_documentation": "187;1;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "876;276;344;314", "wc_reply_reviewers": "0;5;15;0", "wc_reply_authors": "654;329;369;242", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 86.75, 20.78911975048487 ], "wc_strengths_avg": [ 61.75, 16.005858302509115 ], "wc_improvement_avg": [ 122.25, 32.251937926270415 ], "wc_limitations_avg": [ 89.0, 140.46173856249965 ], "wc_correctness_avg": [ 21.25, 20.620075169601105 ], "wc_clarity_avg": [ 19.75, 16.021469970012117 ], "wc_relation_to_prior_work_avg": [ 3.25, 3.897114317029974 ], "wc_documentation_avg": [ 47.5, 80.5403625519528 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 452.5, 245.6923889745061 ], "wc_reply_reviewers_avg": [ 5.0, 6.123724356957945 ], "wc_reply_authors_avg": [ 398.5, 154.4935273725084 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8305308282529281674&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "utoronto.ca;toronto.edu;stanford.edu;utoronto.ca;torondo.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Toronto;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.utoronto.ca;https://www.stanford.edu", "aff_unique_abbr": "U of T;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Canada;United States" }, { "title": "Beyond Confidence: Reliable Models Should Also Consider Atypicality", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71234", "id": "ZVRG3toCTT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7900318ffaf5e9bc60250f134c6cc3c7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZVRG3toCTT", "openreview": "https://openreview.net/forum?id=ZVRG3toCTT", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71234", "video": "https://nips.cc/virtual/2023/poster/71234", "author_site": "Mert Yuksekgonul, Linjun Zhang, James Zou, Carlos Guestrin", "tldr": "", "abstract": "While most machine learning models can provide confidence in their predictions, confidence is insufficient to understand a prediction's reliability. For instance, the model may have a low confidence prediction if the input is not well-represented in the training dataset or if the input is inherently ambiguous. In this work, we investigate the relationship between how atypical~(rare) a sample or a class is and the reliability of a model's predictions. We first demonstrate that atypicality is strongly related to miscalibration and accuracy. In particular, we empirically show that predictions for atypical inputs or atypical classes are more overconfident and have lower accuracy. Using these insights, we show incorporating atypicality improves uncertainty quantification and model performance for discriminative neural networks and large language models. In a case study, we show that using atypicality improves the performance of a skin lesion classifier across different skin tone groups without having access to the group attributes. Overall, we propose that models should use not only confidence but also atypicality to improve uncertainty quantification and performance. Our results demonstrate that simple post-hoc atypicality estimators can provide significant value.", "keywords": "trustworthy machine learning;reliable machine learning;uncertainty", "primary_area": "", "supplementary_material": "", "author": "Mert Yuksekgonul;Linjun Zhang;James Zou;Carlos Guestrin", "authorids": "~Mert_Yuksekgonul1;~Linjun_Zhang1;~James_Zou1;~Carlos_Guestrin1", "gender": "M;M;;M", "homepage": "https://cs.stanford.edu/~merty;;;https://guestrin.stanford.edu", "dblp": "249/5558;;;38/769", "google_scholar": "https://scholar.google.com/citations?hl=en;TUAzs3sAAAAJ;23ZXZvEAAAAJ;DpLFv4gAAAAJ", "orcid": ";;;", "linkedin": ";;;carlos-guestrin-5352a869/", "or_profile": "~Mert_Yuksekgonul1;~Linjun_Zhang1;~James_Zou1;~Carlos_Guestrin1", "aff": "Microsoft;Rutgers University;Stanford University;Stanford University", "aff_domain": "microsoft.com;rutgers.edu;stanford.edu;stanford.edu", "position": "Intern;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nyuksekgonul2023beyond,\ntitle={Beyond Confidence: Reliable Models Should Also Consider Atypicality},\nauthor={Mert Yuksekgonul and Linjun Zhang and James Zou and Carlos Guestrin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZVRG3toCTT}\n}", "github": "", "project": "", "reviewers": "7E3C;AQHi;vUKB;y4KB", "pdf_size": 2331404, "rating": "5;5;7;8", "confidence": "5;4;1;4", "soundness": "3;3;2;3", "novelty": "2;2;2;4", "presentation": "2;1;3;3", "wc_summary": "51;54;17;234", "wc_strengths": "48;91;25;129", "wc_weaknesses": "65;272;1;245", "wc_questions": "211;129;1;295", "wc_limitations": "1;4;1;1", "wc_review": "376;550;45;904", "wc_reply_reviewers": "11;169;0;89", "wc_reply_authors": "96;288;0;34", "reply_reviewers": "1;1;0;1", "reply_authors": "3;3;1;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 1.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 89.0, 84.96764089934473 ], "wc_strengths_avg": [ 73.25, 39.96482828688246 ], "wc_weaknesses_avg": [ 145.75, 115.39361984096001 ], "wc_questions_avg": [ 159.0, 108.47119433287347 ], "wc_limitations_avg": [ 1.75, 1.299038105676658 ], "wc_review_avg": [ 468.75, 309.9236801214131 ], "wc_reply_reviewers_avg": [ 67.25, 68.03078347336594 ], "wc_reply_authors_avg": [ 104.5, 111.39456898789994 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4490502093697089, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18423453223541677778&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "microsoft.com;rutgers.edu;stanford.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Microsoft;Rutgers University;Stanford University", "aff_unique_dep": "Microsoft Corporation;;", "aff_unique_url": "https://www.microsoft.com;https://www.rutgers.edu;https://www.stanford.edu", "aff_unique_abbr": "Microsoft;Rutgers;Stanford", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Structured Semidefinite Programming for Recovering Structured Preconditioners", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71233", "id": "ZViPzk1sUI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/36ce475705c1dc6c50a5956cedff3d01-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZViPzk1sUI", "openreview": "https://openreview.net/forum?id=ZViPzk1sUI", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71233", "video": "https://nips.cc/virtual/2023/poster/71233", "author_site": "Arun Jambulapati, Jerry Li, Christopher Musco, Kirankumar Shiragur, Aaron Sidford, Kevin Tian", "tldr": "", "abstract": "We develop a general framework for finding approximately-optimal preconditioners for solving linear systems. Leveraging this framework we obtain improved runtimes for fundamental preconditioning and linear system solving problems including:\nDiagonal preconditioning. We give an algorithm which, given positive definite $\\mathbf{K} \\in \\mathbb{R}^{d \\times d}$ with $\\mathrm{nnz}(\\mathbf{K})$ nonzero entries, computes an $\\epsilon$-optimal diagonal preconditioner in time $\\widetilde{O}(\\mathrm{nnz}(\\mathbf{K}) \\cdot \\mathrm{poly}(\\kappa^\\star,\\epsilon^{-1}))$, where $\\kappa^\\star$ is the optimal condition number of the rescaled matrix.\nStructured linear systems. We give an algorithm which, given $\\mathbf{M} \\in \\mathbb{R}^{d \\times d}$ that is either the pseudoinverse of a graph Laplacian matrix or a constant spectral approximation of one, solves linear systems in $\\mathbf{M}$ in $\\widetilde{O}(d^2)$ time. \nOur diagonal preconditioning results improve state-of-the-art runtimes of $\\Omega(d^{3.5})$ attained by general-purpose semidefinite programming, and our solvers improve state-of-the-art runtimes of $\\Omega(d^{\\omega})$ where $\\omega > 2.3$ is the current matrix multiplication constant. We attain our results via new algorithms for a class of semidefinite programs (SDPs) we call matrix-dictionary approximation SDPs, which we leverage to solve an associated problem we call matrix-dictionary recovery.", "keywords": "preconditioning;semidefinite programming;numerical linear algebra;linear regression;semi-random models", "primary_area": "", "supplementary_material": "/attachment/0dffd83210357d6af7ba22a7ecf8bc63d4dfb92c.pdf", "author": "Arun Jambulapati;Jerry Li;Christopher Musco;Kirankumar Shiragur;Aaron Sidford;Kevin Tian", "authorids": "jmblpati@uw.edu;~Jerry_Li1;~Christopher_Musco1;~Kirankumar_Shiragur1;~Aaron_Sidford1;~Kevin_Tian4", "gender": ";M;;M;;", "homepage": ";https://jerryzli.github.io/;;https://sites.google.com/view/kiran-shiragur;;https://kjtian.github.io", "dblp": ";;;;;", "google_scholar": ";4zybTq4AAAAJ;;;;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "jmblpati@uw.edu;~Jerry_Li1;~Christopher_Musco1;~Kirankumar_Shiragur1;~Aaron_Sidford1;~Kevin_Tian4", "aff": ";Microsoft;;Broad Institute;;Microsoft", "aff_domain": ";microsoft.com;;broadinstitute.org;;microsoft.com", "position": ";Senior Researcher;;Postdoc;;Postdoc", "bibtex": "@inproceedings{\njambulapati2023structured,\ntitle={Structured Semidefinite Programming for Recovering Structured Preconditioners},\nauthor={Arun Jambulapati and Jerry Li and Christopher Musco and Kirankumar Shiragur and Aaron Sidford and Kevin Tian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZViPzk1sUI}\n}", "github": "", "project": "", "reviewers": "1BXc;2VGS;HjkM;pK6D", "pdf_size": 369691, "rating": "3;6;7;8", "confidence": "1;3;3;3", "soundness": "2;3;4;3", "novelty": "2;3;3;4", "presentation": "1;3;4;4", "wc_summary": "128;80;152;154", "wc_strengths": "31;74;43;234", "wc_weaknesses": "82;100;26;42", "wc_questions": "10;46;56;22", "wc_limitations": "13;2;1;4", "wc_review": "264;302;278;456", "wc_reply_reviewers": "10;10;8;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 128.5, 29.811910371527684 ], "wc_strengths_avg": [ 95.5, 81.48772913758242 ], "wc_weaknesses_avg": [ 62.5, 29.744747435471695 ], "wc_questions_avg": [ 33.5, 18.350749303502567 ], "wc_limitations_avg": [ 5.0, 4.743416490252569 ], "wc_review_avg": [ 325.0, 76.84399781375251 ], "wc_reply_reviewers_avg": [ 7.0, 4.123105625617661 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9258200997725515, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=919508305347209301&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": ";microsoft.com;;broadinstitute.org;;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "Microsoft;Broad Institute", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.broadinstitute.org", "aff_unique_abbr": "Microsoft;Broad", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "TART: A plug-and-play Transformer module for task-agnostic reasoning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71232", "id": "ZXbgVm3PSt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1ece70d2259b8e9510e2d4ca8754cecf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZXbgVm3PSt", "openreview": "https://openreview.net/forum?id=ZXbgVm3PSt", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71232", "video": "https://nips.cc/virtual/2023/poster/71232", "author_site": "Kush Bhatia, Avanika Narayan, Christopher De Sa, Christopher R\u00e9", "tldr": "", "abstract": "Large language models (LLMs) exhibit in-context learning abilities which enable the same model to perform several tasks without any task-specific training. In contrast, traditional adaptation approaches, such as fine-tuning, modify the underlying models for each specific task. In-context learning, however, consistently underperforms task-specific tuning approaches even when presented with the same examples. While most existing approaches (e.g., prompt engineering) focus on the LLM's learned representations to patch this performance gap, our experiments actually reveal that LLM representations contain sufficient information to make good predictions. As such, we focus on the LLM's reasoning abilities and demonstrate that this performance gap exists due to their inability to perform simple probabilistic reasoning tasks. This raises an intriguing question: Are LLMs actually capable of learning how to reason in a task-agnostic manner? We answer this in the affirmative and, as a proof of concept, propose TART which generically improves an LLM's reasoning abilities using a synthetically trained reasoning module. TART trains this Transformer-based reasoning module in a task-agnostic manner using only synthetic logistic regression tasks and composes it with an arbitrary real-world pre-trained model without any additional training. With a single inference module, TART improves performance across different model families (GPT-Neo, Pythia, Bloom), model sizes (100M - 6B), tasks (14 NLP classification tasks), and even across different modalities (audio and vision). On the RAFT Benchmark, TART improves GPT-Neo (125M)'s performance such that it outperforms Bloom (176B), and is within $4$% of GPT-3.", "keywords": "In-context learning;task-agnostic methods;large language models", "primary_area": "", "supplementary_material": "/attachment/a5f5ffeb5342db9fdc3a98ca52d2cff7821f6832.zip", "author": "Kush Bhatia;Avanika Narayan;Christopher De Sa;Christopher Re", "authorids": "~Kush_Bhatia3;~Avanika_Narayan1;~Christopher_De_Sa2;~Christopher_Re1", "gender": ";;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Kush_Bhatia3;~Avanika_Narayan1;~Christopher_De_Sa2;~Christopher_Re1", "aff": ";;;", "aff_domain": ";;;", "position": ";;;", "bibtex": "@inproceedings{\nbhatia2023tart,\ntitle={{TART}: A plug-and-play Transformer module for task-agnostic reasoning},\nauthor={Kush Bhatia and Avanika Narayan and Christopher De Sa and Christopher Re},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZXbgVm3PSt}\n}", "github": "", "project": "", "reviewers": "VGrP;CKNq;hArx;tMpn", "pdf_size": 11413334, "rating": "4;6;6;8", "confidence": "3;4;3;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "2;4;2;4", "wc_summary": "150;118;93;60", "wc_strengths": "37;102;80;240", "wc_weaknesses": "106;1216;197;20", "wc_questions": "129;85;63;87", "wc_limitations": "1;3;8;1", "wc_review": "423;1524;441;408", "wc_reply_reviewers": "0;448;209;0", "wc_reply_authors": "0;1278;673;0", "reply_reviewers": "0;5;2;0", "reply_authors": "1;6;3;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 105.25, 33.025558284456 ], "wc_strengths_avg": [ 114.75, 75.99794405113865 ], "wc_weaknesses_avg": [ 384.75, 483.98624722196394 ], "wc_questions_avg": [ 91.0, 23.874672772626646 ], "wc_limitations_avg": [ 3.25, 2.8613807855648994 ], "wc_review_avg": [ 699.0, 476.4572383750718 ], "wc_reply_reviewers_avg": [ 164.25, 184.71109197879807 ], "wc_reply_authors_avg": [ 487.75, 532.5910133488924 ], "reply_reviewers_avg": [ 1.75, 2.0463381929681126 ], "reply_authors_avg": [ 2.75, 2.0463381929681126 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5707240732651008728&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": ";;;", "author_num": 4 }, { "title": "A Definition of Continual Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71231", "id": "ZZS9WEWYbD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9d8cf1247786d6dfeefeeb53b8b5f6d7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZZS9WEWYbD", "openreview": "https://openreview.net/forum?id=ZZS9WEWYbD", "poster": "/media/PosterPDFs/NeurIPS%202023/71231.png?t=1701785389.5887442", "slides": "https://nips.cc/virtual/2023/poster/71231", "video": "https://nips.cc/virtual/2023/poster/71231", "author_site": "David Abel, Andre Barreto, Benjamin Van Roy, Doina Precup, Hado van Hasselt, Satinder Singh", "tldr": "", "abstract": "In a standard view of the reinforcement learning problem, an agent\u2019s goal is to efficiently identify a policy that maximizes long-term reward. However, this perspective is based on a restricted view of learning as finding a solution, rather than treating learning as endless adaptation. In contrast, continual reinforcement learning refers to the setting in which the best agents never stop learning. Despite the importance of continual reinforcement learning, the community lacks a simple definition of the problem that highlights its commitments and makes its primary concepts precise and clear. To this end, this paper is dedicated to carefully defining the continual reinforcement learning problem. We formalize the notion of agents that \u201cnever stop learning\u201d through a new mathematical language for analyzing and cataloging agents. Using this new language, we define a continual learning agent as one that can be understood as carrying out an implicit search process indefinitely, and continual reinforcement learning as the setting in which the best agents are all continual learning agents. We provide two motivating examples, illustrating that traditional views of multi-task reinforcement learning and continual supervised learning are special cases of our definition. Collectively, these definitions and perspectives formalize many intuitive concepts at the heart of learning, and open new research pathways surrounding continual learning agents.", "keywords": "Continual Reinforcement Learning;Reinforcement Learning;Lifelong Reinforcement Learning;Continual Learning", "primary_area": "", "supplementary_material": "/attachment/ea1c5aa94d4cd317ae0b28d9669f8cc2efb017bf.zip", "author": "David Abel;Andre Barreto;Benjamin Van Roy;Doina Precup;Hado van Hasselt;Satinder Singh", "authorids": "~David_Abel1;~Andre_Barreto1;~Benjamin_Van_Roy3;~Doina_Precup1;~Hado_van_Hasselt1;~Satinder_Singh2", "gender": "M;M;;F;M;", "homepage": "http://david-abel.github.io;https://sites.google.com/corp/view/andrebarreto/about;https://web.stanford.edu/~bvr;http://cs.mcgill.ca/~dprecup/;http://hadovanhasselt.com;", "dblp": "162/9926;72/953;41/4314.html;p/DoinaPrecup;https://dblp.uni-trier.de/pers/h/Hasselt:Hado_van.html;", "google_scholar": "lvBJlmwAAAAJ;https://scholar.google.co.uk/citations?user=H-xtdV4AAAAJ;05sMX8MAAAAJ;https://scholar.google.com.tw/citations?user=j54VcVEAAAAJ;;", "orcid": "0000-0003-0302-7543;;;;;", "linkedin": ";;;;;", "or_profile": "~David_Abel1;~Andre_Barreto1;~Benjamin_Van_Roy3;~Doina_Precup1;~Hado_van_Hasselt1;~Satinder_Baveja2", "aff": "Google DeepMind;Google DeepMind;Google;McGill University;Google DeepMind;Google DeepMind", "aff_domain": "google.com;google.com;google.com;mcgill.ca;google.com;google.com", "position": "Research Scientist;Research Scientist;research scientist;Associate Professor;Research scientist;Research Scientist", "bibtex": "@inproceedings{\nabel2023a,\ntitle={A Definition of Continual Reinforcement Learning},\nauthor={David Abel and Andre Barreto and Benjamin Van Roy and Doina Precup and Hado van Hasselt and Satinder Singh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZZS9WEWYbD}\n}", "github": "", "project": "", "reviewers": "tAG2;DFWz;1ia1;EKup;oHpE;9sbq;z8Jz", "pdf_size": 1372466, "rating": "4;4;7;7;7;7;7", "confidence": "2;3;3;4;2;3;3", "soundness": "2;2;3;4;3;3;3", "novelty": "2;2;3;3;3;3;3", "presentation": "2;3;3;4;3;4;3", "wc_summary": "40;18;56;45;49;297;83", "wc_strengths": "25;33;45;96;35;181;82", "wc_weaknesses": "56;382;125;254;51;103;201", "wc_questions": "58;1;36;175;31;294;1", "wc_limitations": "17;1;39;26;33;39;1", "wc_review": "196;435;301;596;199;914;368", "wc_reply_reviewers": "0;637;111;75;59;63;27", "wc_reply_authors": "0;267;346;0;0;0;0", "reply_reviewers": "0;1;2;1;1;1;1", "reply_authors": "1;2;2;1;1;1;1", "rating_avg": [ 6.142857142857143, 1.355261854357877 ], "confidence_avg": [ 2.857142857142857, 0.6388765649999398 ], "soundness_avg": [ 2.857142857142857, 0.6388765649999398 ], "novelty_avg": [ 2.7142857142857144, 0.4517539514526256 ], "presentation_avg": [ 3.142857142857143, 0.6388765649999398 ], "wc_summary_avg": [ 84.0, 88.79510926042846 ], "wc_strengths_avg": [ 71.0, 51.23893608798461 ], "wc_weaknesses_avg": [ 167.42857142857142, 111.21774928086812 ], "wc_questions_avg": [ 85.14285714285714, 101.37543879972473 ], "wc_limitations_avg": [ 22.285714285714285, 15.219482680473227 ], "wc_review_avg": [ 429.85714285714283, 236.1715046144152 ], "wc_reply_reviewers_avg": [ 138.85714285714286, 205.95590968625754 ], "wc_reply_authors_avg": [ 87.57142857142857, 140.0631053110981 ], "reply_reviewers_avg": [ 1.0, 0.5345224838248488 ], "reply_authors_avg": [ 1.2857142857142858, 0.45175395145262565 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.35355339059327373, "gs_citation": 93, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10399324538067993856&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "google.com;google.com;google.com;mcgill.ca;google.com;google.com", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Google;McGill University", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.mcgill.ca", "aff_unique_abbr": "DeepMind;McGill", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;2;0;0", "aff_country_unique": "United Kingdom;United States;Canada" }, { "title": "Generalizable Lightweight Proxy for Robust NAS against Diverse Perturbations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71230", "id": "ZZWg9jJQ1j", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/796455f65fd2cbe049112a2d2d4488cb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZZWg9jJQ1j", "openreview": "https://openreview.net/forum?id=ZZWg9jJQ1j", "poster": "/media/PosterPDFs/NeurIPS%202023/71230.png?t=1701760420.5534115", "slides": "https://nips.cc/virtual/2023/poster/71230", "video": "https://nips.cc/virtual/2023/poster/71230", "author_site": "Hyeonjeong Ha, Minseon Kim, Sung Ju Hwang", "tldr": "", "abstract": "Recent neural architecture search (NAS) frameworks have been successful in finding optimal architectures for given conditions (e.g., performance or latency). However, they search for optimal architectures in terms of their performance on clean images only, while robustness against various types of perturbations or corruptions is crucial in practice. Although there exist several robust NAS frameworks that tackle this issue by integrating adversarial training into one-shot NAS, however, they are limited in that they only consider robustness against adversarial attacks and require significant computational resources to discover optimal architectures for a single task, which makes them impractical in real-world scenarios. To address these challenges, we propose a novel lightweight robust zero-cost proxy that considers the consistency across features, parameters, and gradients of both clean and perturbed images at the initialization state. Our approach facilitates an efficient and rapid search for neural architectures capable of learning generalizable features that exhibit robustness across diverse perturbations. The experimental results demonstrate that our proxy can rapidly and efficiently search for neural architectures that are consistently robust against various perturbations on multiple benchmark datasets and diverse search spaces, largely outperforming existing clean zero-shot NAS and robust NAS with reduced search cost.", "keywords": "neural architecture search;generalization;efficiency;zero-cost proxy", "primary_area": "", "supplementary_material": "/attachment/6f5635e72095f774388b97808bb4ec5660690488.zip", "author": "Hyeonjeong Ha;Minseon Kim;Sung Ju Hwang", "authorids": "~Hyeonjeong_Ha1;~Minseon_Kim1;~Sung_Ju_Hwang1", "gender": ";;", "homepage": "https://hyeonjeongha.github.io/;https://kim-minseon.github.io/;", "dblp": "331/5333;247/5952;", "google_scholar": "https://scholar.google.com/citations?hl=ko;ZwObZNwAAAAJ;", "orcid": ";;", "linkedin": "hyeonjeong-ha-bb93b0285/;minseon-kim-707a84174;", "or_profile": "~Hyeonjeong_Ha1;~Minseon_Kim1;~Sung_Ju_Hwang1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;", "aff_domain": "kaist.edu;kaist.ac.kr;", "position": "MS student;PhD student;", "bibtex": "@inproceedings{\nha2023generalizable,\ntitle={Generalizable Lightweight Proxy for Robust {NAS} against Diverse Perturbations},\nauthor={Hyeonjeong Ha and Minseon Kim and Sung Ju Hwang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZZWg9jJQ1j}\n}", "github": "", "project": "", "reviewers": "KEHv;41nW;DQhw;m6P2", "pdf_size": 1316566, "rating": "4;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "239;37;47;130", "wc_strengths": "75;33;45;59", "wc_weaknesses": "181;139;115;81", "wc_questions": "339;22;2;110", "wc_limitations": "48;17;18;9", "wc_review": "882;248;227;389", "wc_reply_reviewers": "288;0;87;61", "wc_reply_authors": "2214;521;426;417", "reply_reviewers": "3;0;1;1", "reply_authors": "8;5;3;5", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 113.25, 81.08136345671551 ], "wc_strengths_avg": [ 53.0, 15.684387141358123 ], "wc_weaknesses_avg": [ 129.0, 36.41428291206625 ], "wc_questions_avg": [ 118.25, 133.76915750650446 ], "wc_limitations_avg": [ 23.0, 14.849242404917497 ], "wc_review_avg": [ 436.5, 264.64551762688143 ], "wc_reply_reviewers_avg": [ 109.0, 108.06248192596726 ], "wc_reply_authors_avg": [ 894.5, 762.9025167083931 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 5.25, 1.7853571071357126 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17668140141522138090&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "kaist.edu;kaist.ac.kr;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Continuous Parametric Optical Flow", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71229", "id": "ZZgfS1DbmO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/49f42aafbcce59b2665640cb9f3d794f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZZgfS1DbmO", "openreview": "https://openreview.net/forum?id=ZZgfS1DbmO", "poster": "/media/PosterPDFs/NeurIPS%202023/71229.png?t=1700669295.594205", "slides": "https://nips.cc/virtual/2023/poster/71229", "video": "https://nips.cc/virtual/2023/poster/71229", "author_site": "Jianqin Luo, Zhexiong Wan, yuxin mao, Bo Li, Yuchao Dai", "tldr": "", "abstract": "In this paper, we present continuous parametric optical flow, a parametric representation of dense and continuous motion over arbitrary time interval. In contrast to existing discrete-time representations (i.e., flow in between consecutive frames), this new representation transforms the frame-to-frame pixel correspondences to dense continuous flow. In particular, we present a temporal-parametric model that employs B-splines to fit point trajectories using a limited number of frames. To further improve the stability and robustness of the trajectories, we also add an encoder with a neural ordinary differential equation (NODE) to represent features associated with specific times. We also contribute a synthetic dataset and introduce two evaluation perspectives to measure the accuracy and robustness of continuous flow estimation. Benefiting from the combination of explicit parametric modeling and implicit feature optimization, our model focuses on motion continuity and outperforms the flow-based and point-tracking approaches for fitting long-term and variable sequences.", "keywords": "optical flow;point trajectories;continuous motion;neural ordinary differential equation", "primary_area": "", "supplementary_material": "/attachment/ef33f3849ce10dc8bc2fc9ccbbe623052c1acbd0.pdf", "author": "Jianqin Luo;Zhexiong Wan;yuxin mao;Bo Li;Yuchao Dai", "authorids": "~Jianqin_Luo2;~Zhexiong_Wan1;~yuxin_mao2;~Bo_Li35;~Yuchao_Dai1", "gender": "M;M;M;M;M", "homepage": "https://github.com/LuoRadisher;https://danquxunhuan.cn/;https://github.com/fupiao1998;;http://npu-cvr.cn/", "dblp": ";274/2815;74/6497;50/3402-90;65/7804", "google_scholar": ";rgjSWLkAAAAJ;https://scholar.google.com.hk/citations?user=HPvl-ikAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.tw/citations?user=fddAbqsAAAAJ", "orcid": ";0000-0002-4494-6178;;;0000-0002-4432-7406", "linkedin": ";;;;", "or_profile": "~Jianqin_Luo2;~Zhexiong_Wan1;~yuxin_mao2;~Bo_Li35;~Yuchao_Dai1", "aff": "Northwest Polytechnical University ;Northwestern Polytechnical University;Northwest Polytechnical University Xi'an;Northwestern Polytechnical University Xi'an;Northwestern Polytechnical University", "aff_domain": "nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn", "position": "MS student;PhD student;PhD student;Lecturer;Professor", "bibtex": "@inproceedings{\nluo2023continuous,\ntitle={Continuous Parametric Optical Flow},\nauthor={Jianqin Luo and Zhexiong Wan and yuxin mao and Bo Li and Yuchao Dai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZZgfS1DbmO}\n}", "github": "", "project": "", "reviewers": "7yvr;TYqX;JXsp;sWYx;dH6J", "pdf_size": 2229859, "rating": "3;5;5;5;5", "confidence": "3;2;4;5;5", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;2", "presentation": "2;3;4;2;2", "wc_summary": "84;114;45;112;56", "wc_strengths": "37;25;38;37;26", "wc_weaknesses": "80;254;85;187;123", "wc_questions": "24;63;18;63;2", "wc_limitations": "1;3;8;148;16", "wc_review": "226;459;194;547;223", "wc_reply_reviewers": "68;52;23;93;0", "wc_reply_authors": "93;45;30;361;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;2;1", "rating_avg": [ 4.6, 0.7999999999999999 ], "confidence_avg": [ 3.8, 1.16619037896906 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 82.2, 28.187940683916587 ], "wc_strengths_avg": [ 32.6, 5.817215828899594 ], "wc_weaknesses_avg": [ 145.8, 66.2582825011334 ], "wc_questions_avg": [ 34.0, 24.746716953971895 ], "wc_limitations_avg": [ 35.2, 56.63709032074299 ], "wc_review_avg": [ 329.8, 144.56195903487196 ], "wc_reply_reviewers_avg": [ 47.2, 32.7621733100843 ], "wc_reply_authors_avg": [ 105.8, 131.09294412743958 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3429971702850177, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=684131283475342618&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;1;1", "aff_unique_norm": "Northwest Polytechnical University;Northwestern Polytechnical University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nwpu.edu.cn;https://www.nwpu.edu.cn", "aff_unique_abbr": "NWPU;NWPU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "CSMeD: Bridging the Dataset Gap in Automated Citation Screening for Systematic Literature Reviews", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73546", "id": "ZbmS3MU25p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4962a23916103301b27bde29a27642e8-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=ZbmS3MU25p", "openreview": "https://openreview.net/forum?id=ZbmS3MU25p", "poster": "/media/PosterPDFs/NeurIPS%202023/73546.png?t=1702339391.9679327", "slides": "https://nips.cc/virtual/2023/poster/73546", "video": "https://nips.cc/virtual/2023/poster/73546", "author_site": "Wojciech Kusa, Oscar E. Mendoza, Matthias Samwald, Petr Knoth, Allan Hanbury", "tldr": "", "abstract": "Systematic literature reviews (SLRs) play an essential role in summarising, synthesising and validating scientific evidence. In recent years, there has been a growing interest in using machine learning techniques to automate the identification of relevant studies for SLRs. However, the lack of standardised evaluation datasets makes comparing the performance of such automated literature screening systems difficult. In this paper, we analyse the citation screening evaluation datasets, revealing that many of the available datasets are either too small, suffer from data leakage or have limited applicability to systems treating automated literature screening as a classification task, as opposed to, for example, a retrieval or question-answering task. To address these challenges, we introduce CSMED, a meta-dataset consolidating nine publicly released collections, providing unified access to 325 SLRs from the fields of medicine and computer science. CSMED serves as a comprehensive resource for training and evaluating the performance of automated citation screening models. Additionally, we introduce CSMED-FT, a new dataset designed explicitly for evaluating the full text publication screening task. To demonstrate the utility of CSMED, we conduct experiments and establish baselines on new datasets.", "keywords": "evaluation;NLP;meta-dataset;citation screening;systematic reviews", "primary_area": "", "supplementary_material": "/attachment/85a3eb17813b2ba9f10a63920f0e58277236d01f.pdf", "author": "Wojciech Kusa;Oscar E. Mendoza;Matthias Samwald;Petr Knoth;Allan Hanbury", "authorids": "~Wojciech_Kusa1;o.espitiamendoza@campus.unimib.it;~Matthias_Samwald1;petr.knoth@open.ac.uk;~Allan_Hanbury1", "gender": ";;M;;M", "homepage": ";;https://samwald.info;;https://informatics.tuwien.ac.at/people/allan-hanbury", "dblp": ";;;;55/6683", "google_scholar": ";;https://scholar.google.com/citations?hl=en;;https://scholar.google.at/citations?user=_hR4cSAAAAAJ", "orcid": ";;0000-0002-4855-2571;;0000-0002-7149-5843", "linkedin": ";;matthiassamwald/;;allan-hanbury-9483a26/", "or_profile": "~Wojciech_Kusa1;o.espitiamendoza@campus.unimib.it;~Matthias_Samwald1;petr.knoth@open.ac.uk;~Allan_Hanbury1", "aff": ";;Institute of Artificial Intelligence, Medical University of Vienna;;Technische Universit\u00e4t Wien", "aff_domain": ";;meduniwien.ac.at;;tuwien.ac.at", "position": ";;Associate Professor;;Full Professor", "bibtex": "@inproceedings{\nkusa2023csmed,\ntitle={{CSM}eD: Bridging the Dataset Gap in Automated Citation Screening for Systematic Literature Reviews},\nauthor={Wojciech Kusa and Oscar E. Mendoza and Matthias Samwald and Petr Knoth and Allan Hanbury},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=ZbmS3MU25p}\n}", "github": "", "project": "", "reviewers": "zrvA;h67z;TzT9;6uGL", "pdf_size": 339308, "rating": "4;7;7;8", "confidence": "5;2;3;4", "wc_summary_and_contributions": "47;61;82;61", "wc_strengths": "48;36;106;42", "wc_improvement": "14;55;103;95", "wc_limitations": "54;1;74;3", "wc_correctness": "1;1;72;3", "wc_clarity": "23;51;178;216", "wc_relation_to_prior_work": "2;1;29;2", "wc_documentation": "2;1;85;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "192;208;730;424", "wc_reply_reviewers": "0;12;0;17", "wc_reply_authors": "575;565;569;560", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "wc_summary_and_contributions_avg": [ 62.75, 12.497499749949988 ], "wc_strengths_avg": [ 58.0, 28.035691537752374 ], "wc_improvement_avg": [ 66.75, 35.470938809115275 ], "wc_limitations_avg": [ 33.0, 31.804087787578503 ], "wc_correctness_avg": [ 19.25, 30.466169762541533 ], "wc_clarity_avg": [ 117.0, 81.72209003690496 ], "wc_relation_to_prior_work_avg": [ 8.5, 11.84271928232701 ], "wc_documentation_avg": [ 22.25, 36.23102951890824 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 388.5, 217.4137760124689 ], "wc_reply_reviewers_avg": [ 7.25, 7.46240577829965 ], "wc_reply_authors_avg": [ 567.25, 5.494315243958978 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5962847939999439, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6334462210135793275&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 8, "email": ";;meduniwien.ac.at;;tuwien.ac.at", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Medical University of Vienna;Technische Universit\u00e4t Wien", "aff_unique_dep": "Institute of Artificial Intelligence;", "aff_unique_url": "https://www.meduniwien.ac.at;https://www.tuwien.ac.at", "aff_unique_abbr": "MUW;TU Wien", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Austria" }, { "title": "Large Language Models Are Semi-Parametric Reinforcement Learning Agents", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71228", "id": "ZcJa1R6j3v", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f6b22ac37beb5da61efd4882082c9ecd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZcJa1R6j3v", "openreview": "https://openreview.net/forum?id=ZcJa1R6j3v", "poster": "/media/PosterPDFs/NeurIPS%202023/71228.png?t=1701746255.2961004", "slides": "https://nips.cc/virtual/2023/poster/71228", "video": "https://nips.cc/virtual/2023/poster/71228", "author_site": "Danyang Zhang, Lu Chen, Situo Zhang, Hongshen Xu, Zihan Zhao, Kai Yu", "tldr": "", "abstract": "Inspired by the insights in cognitive science with respect to human memory and reasoning mechanism, a novel evolvable LLM-based (Large Language Model) agent framework is proposed as Rememberer. By equipping the LLM with a long-term experience memory, Rememberer is capable of exploiting the experiences from the past episodes even for different task goals, which excels an LLM-based agent with fixed exemplars or equipped with a transient working memory. We further introduce **R**einforcement **L**earning with **E**xperience **M**emory (**RLEM**) to update the memory. Thus, the whole system can learn from the experiences of both success and failure, and evolve its capability without fine-tuning the parameters of the LLM. In this way, the proposed Rememberer constitutes a semi-parametric RL agent. Extensive experiments are conducted on two RL task sets to evaluate the proposed framework. The average results with different initialization and training sets exceed the prior SOTA by 4% and 2% for the success rate on two task sets and demonstrate the superiority and robustness of Rememberer.", "keywords": "Learning from Experiences;LLM;Reinforcement Learning;Decision Making;Experience Memory", "primary_area": "", "supplementary_material": "/attachment/7eaf13243062cd1fbb5243c13a773dc064f58faa.pdf", "author": "Danyang Zhang;Lu Chen;Situo Zhang;Hongshen Xu;Zihan Zhao;Kai Yu", "authorids": "~Danyang_Zhang2;~Lu_Chen3;~Situo_Zhang1;~Hongshen_Xu1;~Zihan_Zhao1;~Kai_Yu3", "gender": ";M;M;M;M;M", "homepage": "https://zdy023.github.io;https://coai-sjtu.github.io;;https://speechlab.sjtu.edu.cn/members/hongshen-xu;https://travelleralone.github.io/;https://x-lance.sjtu.edu.cn/~kaiyu/", "dblp": ";69/157-2;335/8386;314/8140;216/4838.html;197/1322-4", "google_scholar": "https://scholar.google.com.hk/citations?user=CMHormkAAAAJ;https://scholar.google.ca/citations?user=Fb3jWaYAAAAJ;SMUy6hMAAAAJ;;hBJig4cAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0001-4958-5017;;;0000-0002-6770-6564;;0000-0002-7102-9826", "linkedin": "%E4%B8%B9%E9%98%B3-%E5%BC%A0-b62931182;;;;;", "or_profile": "~Danyang_Zhang2;~Lu_Chen3;~Situo_Zhang1;~Hongshen_Xu1;~Zihan_Zhao1;~Kai_Yu3", "aff": "University of Hong Kong;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "hku.hk;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "Intern;Assistant Professor;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023large,\ntitle={Large Language Models Are Semi-Parametric Reinforcement Learning Agents},\nauthor={Danyang Zhang and Lu Chen and Situo Zhang and Hongshen Xu and Zihan Zhao and Kai Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZcJa1R6j3v}\n}", "github": "", "project": "", "reviewers": "4pvM;dPzh;d9XL;8w3X;2hwN", "pdf_size": 502055, "rating": "4;4;5;6;6", "confidence": "4;3;4;4;4", "soundness": "3;2;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;2;3;3", "wc_summary": "54;43;53;74;91", "wc_strengths": "39;25;9;64;100", "wc_weaknesses": "24;27;275;162;135", "wc_questions": "26;92;11;90;11", "wc_limitations": "5;12;7;19;62", "wc_review": "148;199;355;409;399", "wc_reply_reviewers": "42;0;0;78;24", "wc_reply_authors": "112;208;32;39;0", "reply_reviewers": "2;0;0;1;1", "reply_authors": "3;3;2;2;1", "rating_avg": [ 5.0, 0.8944271909999159 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 63.0, 17.23948955160796 ], "wc_strengths_avg": [ 47.4, 31.903604811995777 ], "wc_weaknesses_avg": [ 124.6, 93.56623322545373 ], "wc_questions_avg": [ 46.0, 37.15373467095872 ], "wc_limitations_avg": [ 21.0, 21.06181378704123 ], "wc_review_avg": [ 302.0, 107.69586807301383 ], "wc_reply_reviewers_avg": [ 28.8, 29.24653825668946 ], "wc_reply_authors_avg": [ 78.2, 74.53428741190191 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5590169943749476, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2949276381396387530&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "hku.hk;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "University of Hong Kong;Shanghai Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;https://www.sjtu.edu.cn", "aff_unique_abbr": "HKU;SJTU", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Optimal testing using combined test statistics across independent studies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71227", "id": "ZcuFDaMTYw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ff703bfaf652f00ae7b609ce0da3fde2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZcuFDaMTYw", "openreview": "https://openreview.net/forum?id=ZcuFDaMTYw", "poster": "/media/PosterPDFs/NeurIPS%202023/71227.png?t=1702320041.1666281", "slides": "https://nips.cc/virtual/2023/poster/71227", "video": "https://nips.cc/virtual/2023/poster/71227", "author_site": "Lasse Vuursteen, Botond Szabo, Aad van der Vaart, Harry van Zanten", "tldr": "", "abstract": "Combining test statistics from independent trials or experiments is a popular method of meta-analysis. However, there is very limited theoretical understanding of the power of the combined test, especially in high-dimensional models considering composite hypotheses tests. We derive a mathematical framework to study standard {meta-analysis} testing approaches in the context of the many normal means model, which serves as the platform to investigate more complex models.\n\nWe introduce a natural and mild restriction on the meta-level combination functions of the local trials. This allows us to mathematically quantify the cost of compressing $m$ trials into real-valued test statistics and combining these. We then derive minimax lower and matching upper bounds for the separation rates of standard combination methods for e.g. p-values and e-values, quantifying the loss relative to using the full, pooled data. We observe an elbow effect, revealing that in certain cases combining the locally optimal tests in each trial results in a sub-optimal {meta-analysis} method and develop approaches to achieve the global optima. We also explore the possible gains of allowing limited coordination between the trial designs. Our results connect meta-analysis with bandwidth constraint distributed inference and build on recent information theoretic developments in the latter field.", "keywords": "testing;meta-analysis;p-values;e-values;optimal;combining trials", "primary_area": "", "supplementary_material": "/attachment/8f45ae163132e07c83573d919a7e34c1578f679d.zip", "author": "Lasse Vuursteen;Botond Szabo;Aad van der Vaart;Harry van Zanten", "authorids": "~Lasse_Vuursteen1;~Botond_Szabo2;a.w.vandervaart@tudelft.nl;~Harry_van_Zanten1", "gender": "M;;;", "homepage": "https://lassev.github.io/;https://botondszabo.com/;;", "dblp": "281/7082;;;", "google_scholar": "JtvExucAAAAJ;;;https://scholar.google.nl/citations?user=QW6lU_oAAAAJ", "orcid": "0000-0002-3255-8549;;;", "linkedin": ";;;", "or_profile": "~Lasse_Vuursteen1;~Botond_Szabo2;a.w.vandervaart@tudelft.nl;~Harry_van_Zanten1", "aff": "Delft University of Technology;Bocconi University;;", "aff_domain": "tudelft.nl;unibocconi.it;;", "position": "PhD student;Associate Professor;;", "bibtex": "@inproceedings{\nvuursteen2023optimal,\ntitle={Optimal testing using combined test statistics across independent studies},\nauthor={Lasse Vuursteen and Botond Szabo and Aad van der Vaart and Harry van Zanten},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZcuFDaMTYw}\n}", "github": "", "project": "", "reviewers": "GjrH;MHU2;ThPF;qxf1;aj9v;vLRc", "pdf_size": 593015, "rating": "5;5;6;6;6;7", "confidence": "3;2;3;3;4;2", "soundness": "3;3;4;3;3;3", "novelty": "2;2;2;3;3;3", "presentation": "2;3;4;3;3;4", "wc_summary": "88;66;351;110;151;103", "wc_strengths": "51;22;101;108;107;104", "wc_weaknesses": "144;238;262;79;205;70", "wc_questions": "45;18;50;287;86;2", "wc_limitations": "3;37;18;16;163;1", "wc_review": "331;381;782;600;712;280", "wc_reply_reviewers": "88;0;104;56;135;86", "wc_reply_authors": "52;0;38;0;0;650", "reply_reviewers": "2;0;1;1;1;2", "reply_authors": "2;1;2;1;1;3", "rating_avg": [ 5.833333333333333, 0.6871842709362768 ], "confidence_avg": [ 2.8333333333333335, 0.6871842709362768 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.1666666666666665, 0.6871842709362768 ], "wc_summary_avg": [ 144.83333333333334, 95.69990711710342 ], "wc_strengths_avg": [ 82.16666666666667, 33.43359920133571 ], "wc_weaknesses_avg": [ 166.33333333333334, 74.37442810237638 ], "wc_questions_avg": [ 81.33333333333333, 95.67944165574733 ], "wc_limitations_avg": [ 39.666666666666664, 56.40232461718419 ], "wc_review_avg": [ 514.3333333333334, 193.37240984403354 ], "wc_reply_reviewers_avg": [ 78.16666666666667, 42.144262822938174 ], "wc_reply_authors_avg": [ 123.33333333333333, 236.42522896021245 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.6871842709362768 ], "reply_authors_avg": [ 1.6666666666666667, 0.7453559924999299 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.05882352941176472, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12180899097512502648&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "tudelft.nl;unibocconi.it;;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Delft University of Technology;Bocconi University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tudelft.nl;https://www.bocconi.edu", "aff_unique_abbr": "TU Delft;Bocconi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Netherlands;Italy" }, { "title": "SimFBO: Towards Simple, Flexible and Communication-efficient Federated Bilevel Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71226", "id": "ZdxGmJGKOo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/686a3f32067838c8dbb68da6e9e3cf69-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZdxGmJGKOo", "openreview": "https://openreview.net/forum?id=ZdxGmJGKOo", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71226", "video": "https://nips.cc/virtual/2023/poster/71226", "author_site": "Yifan Yang, Peiyao Xiao, Kaiyi Ji", "tldr": "", "abstract": "Federated bilevel optimization (FBO) has shown great potential recently in machine learning and edge computing due to the emerging nested optimization structure in meta-learning, fine-tuning, hyperparameter tuning, etc. However, existing FBO algorithms often involve complicated computations and require multiple sub-loops per iteration, each of which contains a number of communication rounds. In this paper, we propose a simple and flexible FBO framework named SimFBO, which is easy to implement without sub-loops, and includes a generalized server-side aggregation and update for improving communication efficiency. We further propose System-level heterogeneity robust FBO (ShroFBO) as a variant of SimFBO with stronger resilience to heterogeneous local computation. We show that SimFBO and ShroFBO provably achieve a linear convergence speedup with partial client participation and client sampling without replacement, as well as improved sample and communication complexities. Experiments demonstrate the effectiveness of the proposed methods over existing FBO algorithms.", "keywords": "Federated bilevel optimization;federated hypergradient;communication efficiency;system-level heterogeneity;linear speedup", "primary_area": "", "supplementary_material": "/attachment/8d6d07af15f654480ab8cde42dee8444a7305c57.zip", "author": "Yifan Yang;Peiyao Xiao;Kaiyi Ji", "authorids": "~Yifan_Yang13;~Peiyao_Xiao1;~Kaiyi_Ji1", "gender": ";M;M", "homepage": ";https://xiaopeiyao.github.io/index.html;https://cse.buffalo.edu/~kaiyiji/", "dblp": ";;205/3164", "google_scholar": ";_gf0LboAAAAJ;E0A3lSIAAAAJ", "orcid": ";;", "linkedin": ";xiao-peiyao-915430266;", "or_profile": "~Yifan_Yang13;~Peiyao_Xiao1;~Kaiyi_Ji1", "aff": ";State University of New York at Buffalo;State University of New York at Buffalo", "aff_domain": ";buffalo.edu;buffalo.edu", "position": ";PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyang2023simfbo,\ntitle={Sim{FBO}: Towards Simple, Flexible and Communication-efficient Federated Bilevel Learning},\nauthor={Yifan Yang and Peiyao Xiao and Kaiyi Ji},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZdxGmJGKOo}\n}", "github": "", "project": "", "reviewers": "8pmc;DZHN;MEss;TKNN", "pdf_size": 2460965, "rating": "6;7;7;8", "confidence": "3;3;4;4", "soundness": "3;3;4;4", "novelty": "2;3;4;4", "presentation": "1;4;4;4", "wc_summary": "84;174;120;80", "wc_strengths": "42;354;169;98", "wc_weaknesses": "568;210;32;25", "wc_questions": "7;157;110;36", "wc_limitations": "1;25;1;79", "wc_review": "702;920;432;318", "wc_reply_reviewers": "24;34;9;14", "wc_reply_authors": "32;33;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 1.299038105676658 ], "wc_summary_avg": [ 114.5, 37.71935842508459 ], "wc_strengths_avg": [ 165.75, 117.6358257504915 ], "wc_weaknesses_avg": [ 208.75, 220.26503921412495 ], "wc_questions_avg": [ 77.5, 59.30640774823577 ], "wc_limitations_avg": [ 26.5, 31.85514087239295 ], "wc_review_avg": [ 593.0, 234.71045992882378 ], "wc_reply_reviewers_avg": [ 20.25, 9.60143218483576 ], "wc_reply_authors_avg": [ 16.25, 16.253845698787718 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4450904972408513173&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";buffalo.edu;buffalo.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "State University of New York at Buffalo", "aff_unique_dep": "", "aff_unique_url": "https://www.buffalo.edu", "aff_unique_abbr": "SUNY Buffalo", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Buffalo", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Local Convergence of Gradient Methods for Min-Max Games: Partial Curvature Generically Suffices", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71225", "id": "ZeRiLBvIps", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bf331c87e29f473b610336f00fe1cb51-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZeRiLBvIps", "openreview": "https://openreview.net/forum?id=ZeRiLBvIps", "poster": "/media/PosterPDFs/NeurIPS%202023/71225.png?t=1701428309.311486", "slides": "https://nips.cc/virtual/2023/poster/71225", "video": "https://nips.cc/virtual/2023/poster/71225", "author_site": "Guillaume Wang, L\u00e9na\u00efc Chizat", "tldr": "", "abstract": "We study the convergence to local Nash equilibria of gradient methods for two-player zero-sum differentiable games.\nIt is well-known that, in the continuous-time setting, such dynamics converge locally when $S \\succ 0$ and may diverge when $S=0$, where $S\\succeq 0$ is the symmetric part of the Jacobian at equilibrium that accounts for the \"potential\" component of the game. We show that these dynamics also converge as soon as $S$ is nonzero (*partial curvature*) and the eigenvectors of the antisymmetric part $A$ are in general position with respect to the kernel of $S$.\nWe then study the convergence rate when $S \\ll A$ and prove that it typically depends on the *average* of the eigenvalues of $S$, instead of the minimum as an analogy with minimization problems would suggest.\nTo illustrate our results, we consider the problem of computing mixed Nash equilibria of continuous games. We show that, thanks to partial curvature, conic particle methods -- which optimize over both weights and supports of the mixed strategies -- generically converge faster than fixed-support methods.\nFor min-max games, it is thus beneficial to add degrees of freedom \"with curvature\": this can be interpreted as yet another benefit of over-parameterization.", "keywords": "Gradient methods;min-max optimization;spectral analysis;last-iterate convergence", "primary_area": "", "supplementary_material": "/attachment/45034889a806df5aad5c5fca5ffcae87f33de5f6.pdf", "author": "Guillaume Wang;L\u00e9na\u00efc Chizat", "authorids": "~Guillaume_Wang1;~L\u00e9na\u00efc_Chizat1", "gender": "M;M", "homepage": "https://guillaumew16.github.io/;https://lchizat.github.io/", "dblp": "306/1191;192/1488", "google_scholar": "CXwkg4sAAAAJ;https://scholar.google.fr/citations?user=jrJh9yIAAAAJ", "orcid": "0000-0003-4396-0688;", "linkedin": ";", "or_profile": "~Guillaume_Wang1;~L\u00e9na\u00efc_Chizat1", "aff": "Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2023local,\ntitle={Local Convergence of Gradient Methods for Min-Max Games: Partial Curvature Generically Suffices},\nauthor={Guillaume Wang and L{\\'e}na{\\\"\\i}c Chizat},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZeRiLBvIps}\n}", "github": "", "project": "", "reviewers": "L3Mt;JUcy;KFbS;oPMS", "pdf_size": 842053, "rating": "6;6;6;7", "confidence": "4;2;5;4", "soundness": "4;3;4;4", "novelty": "3;3;2;3", "presentation": "3;2;4;4", "wc_summary": "39;73;329;270", "wc_strengths": "18;19;294;30", "wc_weaknesses": "206;22;252;193", "wc_questions": "2;79;7;55", "wc_limitations": "2;6;1;1", "wc_review": "267;199;883;549", "wc_reply_reviewers": "5;14;0;80", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 177.75, 124.10756423361148 ], "wc_strengths_avg": [ 90.25, 117.72929754313495 ], "wc_weaknesses_avg": [ 168.25, 87.23638862309696 ], "wc_questions_avg": [ 35.75, 32.42973172877013 ], "wc_limitations_avg": [ 2.5, 2.0615528128088303 ], "wc_review_avg": [ 474.5, 269.89766579205536 ], "wc_reply_reviewers_avg": [ 24.75, 32.29067202769246 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17043142675101658635&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "epfl.ch;epfl.ch", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "TrojLLM: A Black-box Trojan Prompt Attack on Large Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71224", "id": "ZejTutd7VY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cf04d01a0e76f8b13095349d9caca033-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZejTutd7VY", "openreview": "https://openreview.net/forum?id=ZejTutd7VY", "poster": "/media/PosterPDFs/NeurIPS%202023/71224.png?t=1697497280.3117414", "slides": "https://nips.cc/virtual/2023/poster/71224", "video": "https://nips.cc/virtual/2023/poster/71224", "author_site": "Jiaqi Xue, Mengxin Zheng, Ting Hua, Yilin Shen, Yepeng Liu, Ladislau B\u00f6l\u00f6ni, Qian Lou", "tldr": "", "abstract": "Large Language Models (LLMs) are progressively being utilized as machine learning services and interface tools for various applications. However, the security implications of LLMs, particularly in relation to adversarial and Trojan attacks, remain insufficiently examined. In this paper, we propose TrojLLM, an automatic and black-box framework to effectively generate universal and stealthy triggers. When these triggers are incorporated into the input data, the LLMs' outputs can be maliciously manipulated. Moreover, the framework also supports embedding Trojans within discrete prompts, enhancing the overall effectiveness and precision of the triggers' attacks. Specifically, we propose a trigger discovery algorithm for generating universal triggers for various inputs by querying victim LLM-based APIs using few-shot data samples. Furthermore, we introduce a novel progressive Trojan poisoning algorithm designed to generate poisoned prompts that retain efficacy and transferability across a diverse range of models. Our experiments and results demonstrate TrojLLM's capacity to effectively insert Trojans into text prompts in real-world black-box LLM APIs including GPT-3.5 and GPT-4, while maintaining exceptional performance on clean test sets. Our work sheds light on the potential security risks in current models and offers a potential defensive approach. The source code of TrojLLM is available at https://github.com/UCF-ML-Research/TrojLLM.", "keywords": "Large Language Model;Trojan Attack;Adversary Attack;Prompt Injection;GPT-4;Black-box", "primary_area": "", "supplementary_material": "/attachment/2b809d4784bac3611109596c38ce8dd43279df34.pdf", "author": "Jiaqi Xue;Mengxin Zheng;Ting Hua;Yilin Shen;Yepeng Liu;Ladislau B\u00f6l\u00f6ni;Qian Lou", "authorids": "~Jiaqi_Xue1;~Mengxin_Zheng1;~Ting_Hua1;~Yilin_Shen1;~Yepeng_Liu1;~Ladislau_B\u00f6l\u00f6ni1;~Qian_Lou1", "gender": "M;F;;M;;;M", "homepage": "https://jqxue1999.github.io;https://mxzheng.github.io/;;;;;https://qlou.org", "dblp": ";327/9609;;30/383;;;207/3962.html", "google_scholar": "NI2jppcAAAAJ;CwLrXMAAAAAJ;;9PSFMzAAAAAJ;;;SBYgXLoAAAAJ", "orcid": ";;;;;;", "linkedin": ";mengxin-zheng-86bb91171/;;;;;", "or_profile": "~Jiaqi_Xue1;~Mengxin_Zheng1;~Ting_Hua1;~Yilin_Shen1;~Yepeng_Liu1;~Ladislau_B\u00f6l\u00f6ni1;~Qian_Lou1", "aff": "University of Central Florida;Indiana University, Bloomington;;Samsung Research America;;;University of Central Florida", "aff_domain": "ucf.edu;iu.edu;;gmail.com;;;ucf.edu", "position": "PhD student;PhD student;;Principal Researcher;;;Assistant Professor", "bibtex": "@inproceedings{\nxue2023trojllm,\ntitle={Troj{LLM}: A Black-box Trojan Prompt Attack on Large Language Models},\nauthor={Jiaqi Xue and Mengxin Zheng and Ting Hua and Yilin Shen and Yepeng Liu and Ladislau B{\\\"o}l{\\\"o}ni and Qian Lou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZejTutd7VY}\n}", "github": "", "project": "", "reviewers": "mMie;R5Ve;6yVs;Y4jG", "pdf_size": 892848, "rating": "5;5;6;6", "confidence": "4;4;4;3", "soundness": "3;3;2;4", "novelty": "3;3;2;3", "presentation": "3;2;3;3", "wc_summary": "54;100;57;171", "wc_strengths": "69;42;58;49", "wc_weaknesses": "180;309;131;162", "wc_questions": "4;10;54;101", "wc_limitations": "4;30;5;2", "wc_review": "311;491;305;485", "wc_reply_reviewers": "34;266;21;65", "wc_reply_authors": "36;1327;12;26", "reply_reviewers": "1;2;1;1", "reply_authors": "2;4;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.5, 47.23610906922796 ], "wc_strengths_avg": [ 54.5, 10.111874208078342 ], "wc_weaknesses_avg": [ 195.5, 67.83251432757008 ], "wc_questions_avg": [ 42.25, 39.0280347955159 ], "wc_limitations_avg": [ 10.25, 11.453711188955307 ], "wc_review_avg": [ 398.0, 90.0499861188218 ], "wc_reply_reviewers_avg": [ 96.5, 99.15770267609068 ], "wc_reply_authors_avg": [ 350.25, 563.9913009080902 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15593726985164135385&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ucf.edu;iu.edu;;gmail.com;;;ucf.edu", "author_num": 7, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Central Florida;Indiana University;Samsung", "aff_unique_dep": ";;Samsung Research America", "aff_unique_url": "https://www.ucf.edu;https://www.indiana.edu;https://www.samsung.com/us/careers/research/", "aff_unique_abbr": "UCF;IU;SRA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Bloomington", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Leveraging the two-timescale regime to demonstrate convergence of neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71223", "id": "ZfFR4d5gUM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cd062f8003e38f55dcb93df55b2683d6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZfFR4d5gUM", "openreview": "https://openreview.net/forum?id=ZfFR4d5gUM", "poster": "/media/PosterPDFs/NeurIPS%202023/71223.png?t=1699903639.5155587", "slides": "https://nips.cc/virtual/2023/poster/71223", "video": "https://nips.cc/virtual/2023/poster/71223", "author_site": "Pierre Marion, Pierre Marion, Rapha\u00ebl Berthier", "tldr": "", "abstract": "We study the training dynamics of shallow neural networks, in a two-timescale regime in which the stepsizes for the inner layer are much smaller than those for the outer layer. In this regime, we prove convergence of the gradient flow to a global optimum of the non-convex optimization problem in a simple univariate setting. The number of neurons need not be asymptotically large for our result to hold, distinguishing our result from popular recent approaches such as the neural tangent kernel or mean-field regimes. Experimental illustration is provided, showing that the stochastic gradient descent behaves according to our description of the gradient flow and thus converges to a global optimum in the two-timescale regime, but can fail outside of this regime.", "keywords": "neural networks;non-convex optimization;gradient flow;convergence proof;two-timescale algorithm", "primary_area": "", "supplementary_material": "/attachment/7fd02628f73b3544a17498a678ea8a33468122a6.zip", "author": "Pierre Marion;Rapha\u00ebl Berthier", "authorids": "~Pierre_Marion1;~Rapha\u00ebl_Berthier1", "gender": "M;", "homepage": "https://pierremarion23.github.io/;https://raphael-berthier.github.io/", "dblp": "250/2318;205/3030", "google_scholar": "https://scholar.google.fr/citations?user=Q8H5LgIAAAAJ;ZLCLbSQAAAAJ", "orcid": ";", "linkedin": "pierre-marion-816474130/;", "or_profile": "~Pierre_Marion1;~Rapha\u00ebl_Berthier1", "aff": "LPSM, Sorbonne Universit\u00e9;EPFL - EPF Lausanne", "aff_domain": "sorbonne-universite.fr;epfl.ch", "position": "PhD student;Postdoc", "bibtex": "@inproceedings{\nmarion2023leveraging,\ntitle={Leveraging the two-timescale regime to demonstrate convergence of neural networks},\nauthor={Pierre Marion and Rapha{\\\"e}l Berthier},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZfFR4d5gUM}\n}", "github": "", "project": "", "reviewers": "83X8;JDX5;WNiU;VU9p", "pdf_size": 1258838, "rating": "5;6;6;7", "confidence": "4;4;4;3", "soundness": "3;3;4;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "76;91;43;120", "wc_strengths": "20;61;35;224", "wc_weaknesses": "133;23;73;88", "wc_questions": "2;60;45;114", "wc_limitations": "2;19;1;18", "wc_review": "233;254;197;564", "wc_reply_reviewers": "43;13;0;9", "wc_reply_authors": "27;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.5, 27.753378172755834 ], "wc_strengths_avg": [ 85.0, 81.58124784532289 ], "wc_weaknesses_avg": [ 79.25, 39.27069518101252 ], "wc_questions_avg": [ 55.25, 40.04606722263748 ], "wc_limitations_avg": [ 10.0, 8.514693182963201 ], "wc_review_avg": [ 312.0, 146.91323970289403 ], "wc_reply_reviewers_avg": [ 16.25, 16.145819892467525 ], "wc_reply_authors_avg": [ 6.75, 11.691342951089922 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10685555779247818243&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "sorbonne-universite.fr;epfl.ch", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Sorbonne Universit\u00e9;EPFL", "aff_unique_dep": "LPSM;", "aff_unique_url": "https://www.sorbonne-universite.fr;https://www.epfl.ch", "aff_unique_abbr": "Sorbonne;EPFL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1", "aff_country_unique": "France;Switzerland" }, { "title": "A Unified Conditional Framework for Diffusion-based Image Restoration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71222", "id": "ZgVJvaAS2h", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9bf0810a4a1597a36d27ceea58667d92-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZgVJvaAS2h", "openreview": "https://openreview.net/forum?id=ZgVJvaAS2h", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71222", "video": "https://nips.cc/virtual/2023/poster/71222", "author_site": "Yi Zhang, Xiaoyu Shi, Dasong Li, Xiaogang Wang, Jian Wang, Hongsheng Li", "tldr": "", "abstract": "Diffusion Probabilistic Models (DPMs) have recently shown remarkable performance in image generation tasks, which are capable of generating highly realistic images. When adopting DPMs for image restoration tasks, the crucial aspect lies in how to integrate the conditional information to guide the DPMs to generate accurate and natural output, which has been largely overlooked in existing works. In this paper, we present a unified conditional framework based on diffusion models for image restoration. We leverage a lightweight UNet to predict initial guidance and the diffusion model to learn the residual of the guidance. By carefully designing the basic module and integration module for the diffusion model block, we integrate the guidance and other auxiliary conditional information into every block of the diffusion model to achieve spatially-adaptive generation conditioning. To handle high-resolution images, we propose a simple yet effective inter-step patch-splitting strategy to produce arbitrary-resolution images without grid artifacts. We evaluate our conditional framework on three challenging tasks: extreme low-light denoising, deblurring, and JPEG restoration, demonstrating its significant improvements in perceptual quality and the generalization to restoration tasks. The code will be released at https://zhangyi-3.github.io/project/UCDIR/.", "keywords": "image restoration;diffusion model;denoising;deblurring;JPEG restoration", "primary_area": "", "supplementary_material": "/attachment/58ecf5419a8ef47dba8344715976acdd4d4c9802.pdf", "author": "Yi Zhang;Xiaoyu Shi;Dasong Li;Xiaogang Wang;Jian Wang;Hongsheng Li", "authorids": "~Yi_Zhang33;~Xiaoyu_Shi1;~Dasong_Li1;~Xiaogang_Wang2;~Jian_Wang10;~Hongsheng_Li3", "gender": "M;M;M;M;M;M", "homepage": "https://zhangyi-3.github.io/;https://xiaoyushi97.github.io/;https://dasongli1.github.io/;http://www.ee.cuhk.edu.hk/~xgwang/;https://jianwang-cmu.github.io/;http://www.ee.cuhk.edu.hk/~hsli", "dblp": ";;217/2940;91/6236-1.html;39/449-100;27/7402-1", "google_scholar": "zQmprrUAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=-B5JgjsAAAAJ;https://scholar.google.ca/citations?user=WNmw2VkAAAAJ;BN2Ze-QAAAAJ", "orcid": "0000-0001-7147-9125;;0000-0001-6766-7529;;0000-0001-5266-3808;", "linkedin": ";;;;jian-james-wang-62997944/;", "or_profile": "~Yi_Zhang33;~Xiaoyu_Shi1;~Dasong_Li1;~Xiaogang_Wang2;~Jian_Wang10;~Hongsheng_Li3", "aff": "The Chinese University of Hong Kong;The Chinese University of Hong Kong;The Chinese University of Hong Kong;The Chinese University of Hong Kong;Snap Inc.;The Chinese University of Hong Kong", "aff_domain": "ee.cuhk.edu.hk;cuhk.edu.hk;cuhk.edu.hk;cuhk.edu.hk;snapchat.com;cuhk.edu.hk", "position": "PhD student;PhD student;PhD student;Full Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nzhang2023a,\ntitle={A Unified Conditional Framework for Diffusion-based Image Restoration},\nauthor={Yi Zhang and Xiaoyu Shi and Dasong Li and Xiaogang Wang and Jian Wang and Hongsheng Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZgVJvaAS2h}\n}", "github": "", "project": "", "reviewers": "KQ6K;37QZ;8ANp;4fFj;V6gu", "pdf_size": 1894061, "rating": "3;5;5;5;7", "confidence": "5;3;5;3;5", "soundness": "3;3;2;3;4", "novelty": "2;3;2;3;4", "presentation": "3;2;3;3;4", "wc_summary": "62;194;89;45;82", "wc_strengths": "35;121;43;82;133", "wc_weaknesses": "124;267;164;29;74", "wc_questions": "32;134;25;2;17", "wc_limitations": "4;41;31;2;19", "wc_review": "257;757;352;160;325", "wc_reply_reviewers": "472;35;9;96;61", "wc_reply_authors": "782;0;0;0;0", "reply_reviewers": "2;1;1;2;1", "reply_authors": "3;1;1;1;1", "rating_avg": [ 5.0, 1.2649110640673518 ], "confidence_avg": [ 4.2, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 94.4, 52.1405792066026 ], "wc_strengths_avg": [ 82.8, 39.620196869778425 ], "wc_weaknesses_avg": [ 131.6, 81.59068574292043 ], "wc_questions_avg": [ 42.0, 47.070160399131844 ], "wc_limitations_avg": [ 19.4, 15.107613974417006 ], "wc_review_avg": [ 370.2, 204.44402656962126 ], "wc_reply_reviewers_avg": [ 134.6, 171.13807291190346 ], "wc_reply_authors_avg": [ 156.4, 312.80000000000007 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5350686740761671828&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ee.cuhk.edu.hk;cuhk.edu.hk;cuhk.edu.hk;cuhk.edu.hk;snapchat.com;cuhk.edu.hk", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Snap Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.snapinc.com", "aff_unique_abbr": "CUHK;Snap", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Collapsed Inference for Bayesian Deep Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71221", "id": "Zi1KKzh5Aj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f763f7c9a6599e14b07add5937d8189c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Zi1KKzh5Aj", "openreview": "https://openreview.net/forum?id=Zi1KKzh5Aj", "poster": "/media/PosterPDFs/NeurIPS%202023/71221.png?t=1702077602.7200189", "slides": "https://nips.cc/virtual/2023/poster/71221", "video": "https://nips.cc/virtual/2023/poster/71221", "author_site": "Zhe Zeng, Guy Van den Broeck", "tldr": "", "abstract": "Bayesian neural networks (BNNs) provide a formalism to quantify and calibrate uncertainty in deep learning. Current inference approaches for BNNs often resort to few-sample estimation for scalability, which can harm predictive performance, while its alternatives tend to be computationally prohibitively expensive. We tackle this challenge by revealing a previously unseen connection between inference on BNNs and volume computation problems. With this observation, we introduce a novel collapsed inference scheme that performs Bayesian model averaging using collapsed samples. It improves over a Monte-Carlo sample by limiting sampling to a subset of the network weights while pairing it with some closed-form conditional distribution over the rest. A collapsed sample represents uncountably many models drawn from the approximate posterior and thus yields higher sample efficiency. Further, we show that the marginalization of a collapsed sample can be solved analytically and efficiently despite the non-linearity of neural networks by leveraging existing volume computation solvers. Our proposed use of collapsed samples achieves a balance between scalability and accuracy. On various regression and classification tasks, our collapsed Bayesian deep learning approach demonstrates significant improvements over existing methods and sets a new state of the art in terms of uncertainty estimation as well as predictive performance.", "keywords": "Bayesian Model Averaging;Weighted Model Integration;Bayesian Deep Learning;Collapsed Inference", "primary_area": "", "supplementary_material": "", "author": "Zhe Zeng;Guy Van den Broeck", "authorids": "~Zhe_Zeng1;~Guy_Van_den_Broeck1", "gender": "F;M", "homepage": "https://zzeng.me/;http://web.cs.ucla.edu/~guyvdb/", "dblp": "27/10464;96/7521.html", "google_scholar": "PyK6cB0AAAAJ;d0KQ9z0AAAAJ", "orcid": ";0000-0003-3434-2503", "linkedin": ";guyvdb", "or_profile": "~Zhe_Zeng1;~Guy_Van_den_Broek1", "aff": "University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;ucla.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nzeng2023collapsed,\ntitle={Collapsed Inference for Bayesian Deep Learning},\nauthor={Zhe Zeng and Guy Van den Broeck},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Zi1KKzh5Aj}\n}", "github": "", "project": "", "reviewers": "eHBs;PnkR;e8p8;cv4P", "pdf_size": 2679090, "rating": "4;6;6;7", "confidence": "3;4;3;4", "soundness": "3;3;3;2", "novelty": "3;2;3;3", "presentation": "2;3;4;3", "wc_summary": "130;105;100;111", "wc_strengths": "57;54;57;82", "wc_weaknesses": "383;103;60;422", "wc_questions": "516;473;14;74", "wc_limitations": "27;27;14;15", "wc_review": "1113;762;245;704", "wc_reply_reviewers": "427;21;77;268", "wc_reply_authors": "223;0;0;135", "reply_reviewers": "2;1;1;2", "reply_authors": "3;1;1;3", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 111.5, 11.368817000902073 ], "wc_strengths_avg": [ 62.5, 11.324751652906125 ], "wc_weaknesses_avg": [ 242.0, 161.80698378005815 ], "wc_questions_avg": [ 269.25, 226.7568907442506 ], "wc_limitations_avg": [ 20.75, 6.2599920127744575 ], "wc_review_avg": [ 706.0, 308.7515182148907 ], "wc_reply_reviewers_avg": [ 198.25, 160.70994835417002 ], "wc_reply_authors_avg": [ 89.5, 94.75362789888311 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7351505335418044713&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 9, "email": "cs.ucla.edu;ucla.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "VidChapters-7M: Video Chapters at Scale", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73545", "id": "ZknHnDDxng", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9b5c3e00d6ed30aad7adac9e7a664de1-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=ZknHnDDxng", "openreview": "https://openreview.net/forum?id=ZknHnDDxng", "poster": "/media/PosterPDFs/NeurIPS%202023/73545.png?t=1695893729.121101", "slides": "https://nips.cc/virtual/2023/poster/73545", "video": "https://nips.cc/virtual/2023/poster/73545", "author_site": "Antoine Yang, Arsha Nagrani, Ivan Laptev, Josef Sivic, Cordelia Schmid", "tldr": "", "abstract": "Segmenting untrimmed videos into chapters enables users to quickly navigate to the information of their interest. This important topic has been understudied due to the lack of publicly released datasets. To address this issue, we present VidChapters-7M, a dataset of 817K user-chaptered videos including 7M chapters in total. VidChapters-7M is automatically created from videos online in a scalable manner by scraping user-annotated chapters and hence without any additional manual annotation. We introduce the following three tasks based on this data. First, the video chapter generation task consists of temporally segmenting the video and generating a chapter title for each segment. To further dissect the problem, we also define two variants of this task: video chapter generation given ground-truth boundaries, which requires generating a chapter title given an annotated video segment, and video chapter grounding, which requires temporally localizing a chapter given its annotated title. We benchmark both simple baselines as well as state-of-the-art video-language models on these three tasks. We also show that pretraining on VidChapters-7M transfers well to dense video captioning tasks, largely improving the state of the art on the YouCook2 and ViTT benchmarks. Finally, our experiments reveal that downstream performance scales well with the size of the pretraining dataset.", "keywords": "Vision and Language;Computer Vision;Video Chapter Generation;Dense Video Captioning", "primary_area": "", "supplementary_material": "/attachment/e897133f1045428b67f8a1f4a032f2c9b0b4fcec.pdf", "author": "Antoine Yang;Arsha Nagrani;Ivan Laptev;Josef Sivic;Cordelia Schmid", "authorids": "~Antoine_Yang1;~Arsha_Nagrani2;~Ivan_Laptev1;~Josef_Sivic1;~Cordelia_Schmid1", "gender": "M;M;M;F;F", "homepage": "https://antoyang.github.io/;https://www.di.ens.fr/~laptev/;http://people.ciirc.cvut.cz/~sivic;https://cordeliaschmid.github.io/;http://www.robots.ox.ac.uk/~arsha/", "dblp": "248/7734;41/1854;71/5006;s/CordeliaSchmid;202/1922", "google_scholar": "https://scholar.google.fr/citations?hl=fr;https://scholar.google.com.tw/citations?user=-9ifK0cAAAAJ;https://scholar.google.fr/citations?user=NCtKHnQAAAAJ;IvqCXP4AAAAJ;-_2vpWwAAAAJ", "orcid": "0000-0002-7258-571X;;;;", "linkedin": "antoine-y-49a28814b/;;;cordelia-schmid-47985a9;", "or_profile": "~Antoine_Yang1;~Ivan_Laptev1;~Josef_Sivic1;~Cordelia_Schmid1;~Arsha_Nagrani1", "aff": "INRIA;INRIA Paris;Czech Technical University in Prague;Inria;Google", "aff_domain": "inria.fr;inria.fr;cvut.cz;inria.fr;google.com", "position": "PhD student;Senior Researcher;Principal investigator;Researcher;Research Scientist", "bibtex": "@inproceedings{\nyang2023vidchaptersm,\ntitle={VidChapters-7M: Video Chapters at Scale},\nauthor={Antoine Yang and Arsha Nagrani and Ivan Laptev and Josef Sivic and Cordelia Schmid},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=ZknHnDDxng}\n}", "github": "", "project": "", "reviewers": "37uM;UiaR;4ifN;Tr45", "pdf_size": 3393789, "rating": "5;6;7;7", "confidence": "4;5;4;4", "wc_summary_and_contributions": "55;69;73;57", "wc_strengths": "10;83;113;127", "wc_improvement": "102;93;134;175", "wc_limitations": "1;15;39;40", "wc_correctness": "1;24;6;55", "wc_clarity": "1;5;16;10", "wc_relation_to_prior_work": "8;1;8;50", "wc_documentation": "1;5;12;93", "wc_additional_feedback": "1;1;1;1", "wc_review": "180;296;402;608", "wc_reply_reviewers": "0;0;20;0", "wc_reply_authors": "616;389;505;391", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 63.5, 7.664854858377946 ], "wc_strengths_avg": [ 83.25, 45.17950309598369 ], "wc_improvement_avg": [ 126.0, 32.132538026119256 ], "wc_limitations_avg": [ 23.75, 16.513252253871737 ], "wc_correctness_avg": [ 21.5, 21.1482859825566 ], "wc_clarity_avg": [ 8.0, 5.612486080160912 ], "wc_relation_to_prior_work_avg": [ 16.75, 19.40843888621648 ], "wc_documentation_avg": [ 27.75, 37.877268908937985 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 371.5, 157.50793630798418 ], "wc_reply_reviewers_avg": [ 5.0, 8.660254037844387 ], "wc_reply_authors_avg": [ 475.25, 93.85194457228897 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15854800682514360261&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "email": "inria.fr;inria.fr;cvut.cz;inria.fr;google.com", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "INRIA;Czech Technical University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.inria.fr;https://www.ctu.cz;https://www.google.com", "aff_unique_abbr": "INRIA;CTU;Google", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Paris;Prague;Mountain View", "aff_country_unique_index": "0;0;1;0;2", "aff_country_unique": "France;Czech Republic;United States" }, { "title": "Optimistic Meta-Gradients", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71220", "id": "ZmSg4f16uo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b46bc1449205888e1883f692aff1a252-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZmSg4f16uo", "openreview": "https://openreview.net/forum?id=ZmSg4f16uo", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71220", "video": "https://nips.cc/virtual/2023/poster/71220", "author_site": "Sebastian Flennerhag, Tom Zahavy, Brendan O'Donoghue, Hado van Hasselt, Andr\u00e1s Gy\u00f6rgy, Satinder Singh", "tldr": "", "abstract": "We study the connection between gradient-based meta-learning and convex optimisation. We observe that gradient descent with momentum is a special case of meta-gradients, and building on recent results in optimisation, we prove convergence rates for meta learning in the single task setting. While a meta-learned update rule can yield faster convergence up to constant factor, it is not sufficient for acceleration. Instead, some form of optimism is required. We show that optimism in meta-learning can be captured through the recently proposed Bootstrapped Meta-Gradient (Flennerhag et. al., 2022) method, providing deeper insight into its underlying mechanics.", "keywords": "meta-learning;online optimisation;convex optimisation", "primary_area": "", "supplementary_material": "/attachment/4c48b841deaa52db988958be3de15f4feb09c728.pdf", "author": "Sebastian Flennerhag;Tom Zahavy;Brendan O'Donoghue;Hado van Hasselt;Andr\u00e1s Gy\u00f6rgy;Satinder Singh", "authorids": "~Sebastian_Flennerhag1;~Tom_Zahavy2;~Brendan_O'Donoghue1;~Hado_van_Hasselt1;~Andr\u00e1s_Gy\u00f6rgy2;~Satinder_Singh2", "gender": ";M;M;;M;", "homepage": "http://flennerhag.com;http://tomzahavy.wixsite.com/zahavy;http://hadovanhasselt.com;http://www.cs.bme.hu/~gya;http://bodono.github.io/;", "dblp": "https://dblp.uni-trier.de/pers/hd/f/Flennerhag:Sebastian;149/0142;https://dblp.uni-trier.de/pers/h/Hasselt:Hado_van.html;72/251-1;116/3587;", "google_scholar": "https://scholar.google.co.uk/citations?user=SeMQQkcAAAAJ;https://scholar.google.co.il/citations?user=9dXN6cMAAAAJ;;https://scholar.google.com/citations?hl=en;0Pzjj-cAAAAJ;", "orcid": "0000-0003-2354-4193;;;0000-0003-0586-4337;;", "linkedin": "https://linkedin.com/in/flennerhag;tomzahavy/;;;;", "or_profile": "~Sebastian_Flennerhag1;~Tom_Zahavy2;~Hado_van_Hasselt1;~Andras_Gyorgy1;~Brendan_ODonoghue1;~Satinder_Baveja2", "aff": "Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind", "aff_domain": "google.com;deepmind.com;google.com;deepmind.com;deepmind.com;google.com", "position": "Research Scientist;Research Scientist;Research scientist;Research Scientist;Researcher;Research Scientist", "bibtex": "@inproceedings{\nflennerhag2023optimistic,\ntitle={Optimistic Meta-Gradients},\nauthor={Sebastian Flennerhag and Tom Zahavy and Brendan O'Donoghue and Hado van Hasselt and Andr{\\'a}s Gy{\\\"o}rgy and Satinder Singh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZmSg4f16uo}\n}", "github": "", "project": "", "reviewers": "fhwU;JxCU;nSnp;HQ88", "pdf_size": 300131, "rating": "6;6;7;7", "confidence": "3;3;3;3", "soundness": "3;3;4;4", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "91;47;103;58", "wc_strengths": "63;53;150;109", "wc_weaknesses": "94;45;32;27", "wc_questions": "48;62;18;1", "wc_limitations": "38;7;20;11", "wc_review": "334;214;323;206", "wc_reply_reviewers": "29;21;22;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.75, 22.982330169066845 ], "wc_strengths_avg": [ 93.75, 38.7387080321479 ], "wc_weaknesses_avg": [ 49.5, 26.51886121235224 ], "wc_questions_avg": [ 32.25, 24.045529730076648 ], "wc_limitations_avg": [ 19.0, 11.937336386313323 ], "wc_review_avg": [ 269.25, 59.44482736117584 ], "wc_reply_reviewers_avg": [ 18.0, 10.8397416943394 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4557497164973149030&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "google.com;deepmind.com;google.com;deepmind.com;deepmind.com;google.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Time Series as Images: Vision Transformer for Irregularly Sampled Time Series", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71219", "id": "ZmeAoWQqe0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9a17c1eb808cf012065e9db47b7ca80d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZmeAoWQqe0", "openreview": "https://openreview.net/forum?id=ZmeAoWQqe0", "poster": "/media/PosterPDFs/NeurIPS%202023/71219.png?t=1702167137.7186024", "slides": "https://nips.cc/virtual/2023/poster/71219", "video": "https://nips.cc/virtual/2023/poster/71219", "author_site": "Zekun Li, Shiyang Li, Xifeng Yan", "tldr": "", "abstract": "Irregularly sampled time series are increasingly prevalent, particularly in medical domains. While various specialized methods have been developed to handle these irregularities, effectively modeling their complex dynamics and pronounced sparsity remains a challenge. \nThis paper introduces a novel perspective by converting irregularly sampled time series into line graph images, then utilizing powerful pre-trained vision transformers for time series classification in the same way as image classification. This method not only largely simplifies specialized algorithm designs but also presents the potential to serve as a universal framework for time series modeling. Remarkably, despite its simplicity, our approach outperforms state-of-the-art specialized algorithms on several popular healthcare and human activity datasets. Especially in the rigorous leave-sensors-out setting where a portion of variables is omitted during testing, our method exhibits strong robustness against varying degrees of missing observations, achieving an impressive improvement of 42.8% in absolute F1 score points over leading specialized baselines even with half the variables masked. Code and data are available at https://github.com/Leezekun/ViTST.", "keywords": "irregularly sampled time series;vision transformer;healthcare;time series classification", "primary_area": "", "supplementary_material": "/attachment/998d5d5344c361b60fa87d6ea321b65547384ac4.zip", "author": "Zekun Li;Shiyang Li;Xifeng Yan", "authorids": "~Zekun_Li2;~Shiyang_Li1;~Xifeng_Yan1", "gender": ";;", "homepage": ";;https://sites.cs.ucsb.edu/~xyan/", "dblp": ";;y/XifengYan", "google_scholar": ";;XZV2eogAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zekun_Li2;~Shiyang_Li1;~Xifeng_Yan1", "aff": ";;UC Santa Barbara", "aff_domain": ";;ucsb.edu", "position": ";;Full Professor", "bibtex": "@inproceedings{\nli2023time,\ntitle={Time Series as Images: Vision Transformer for Irregularly Sampled Time Series},\nauthor={Zekun Li and Shiyang Li and Xifeng Yan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZmeAoWQqe0}\n}", "github": "", "project": "", "reviewers": "BBUf;iUCn;kdpF;7PQE;GiK3;QWW4", "pdf_size": 1969000, "rating": "3;4;5;6;7;8", "confidence": "4;5;5;3;5;5", "soundness": "3;3;2;2;3;3", "novelty": "3;2;2;2;3;3", "presentation": "3;3;3;3;4;3", "wc_summary": "75;57;51;65;77;63", "wc_strengths": "74;44;45;33;120;15", "wc_weaknesses": "269;184;162;327;197;32", "wc_questions": "4;9;171;120;34;9", "wc_limitations": "27;1;16;49;51;20", "wc_review": "449;295;445;594;479;139", "wc_reply_reviewers": "174;0;311;171;104;40", "wc_reply_authors": "473;0;596;227;0;0", "reply_reviewers": "1;0;2;2;1;1", "reply_authors": "2;1;3;2;1;1", "rating_avg": [ 5.5, 1.707825127659933 ], "confidence_avg": [ 4.5, 0.7637626158259734 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 64.66666666666667, 9.195409482755814 ], "wc_strengths_avg": [ 55.166666666666664, 33.88911657482712 ], "wc_weaknesses_avg": [ 195.16666666666666, 91.90831784386486 ], "wc_questions_avg": [ 57.833333333333336, 64.42933251934936 ], "wc_limitations_avg": [ 27.333333333333332, 17.820088539498215 ], "wc_review_avg": [ 400.1666666666667, 145.7445443995151 ], "wc_reply_reviewers_avg": [ 133.33333333333334, 101.642620107031 ], "wc_reply_authors_avg": [ 216.0, 241.70850212601127 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.6871842709362768 ], "reply_authors_avg": [ 1.6666666666666667, 0.7453559924999299 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.191662969499982, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3548768664307968341&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";;ucsb.edu", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "SE(3) Diffusion Model-based Point Cloud Registration for Robust 6D Object Pose Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71218", "id": "Znpz1sv4IP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/43069caa6776eac8bca4bfd74d4a476d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Znpz1sv4IP", "openreview": "https://openreview.net/forum?id=Znpz1sv4IP", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71218", "video": "https://nips.cc/virtual/2023/poster/71218", "author_site": "Haobo Jiang, Mathieu Salzmann, Zheng Dang, Jin Xie, Jian Yang", "tldr": "", "abstract": "In this paper, we introduce an SE(3) diffusion model-based point cloud registration framework for 6D object pose estimation in real-world scenarios. Our approach formulates the 3D registration task as a denoising diffusion process, which progressively refines the pose of the source point cloud to obtain a precise alignment with the model point cloud. Training our framework involves two operations: An SE(3) diffusion process and an SE(3) reverse process. The SE(3) diffusion process gradually perturbs the optimal rigid transformation of a pair of point clouds by continuously injecting noise (perturbation transformation). By contrast, the SE(3) reverse process focuses on learning a denoising network that refines the noisy transformation step-by-step, bringing it closer to the optimal transformation for accurate pose estimation. Unlike standard diffusion models used in linear Euclidean spaces, our diffusion model operates on the SE(3) manifold. This requires exploiting the linear Lie algebra $\\mathfrak{se}(3)$ associated with SE(3) to constrain the transformation transitions during the diffusion and reverse processes. Additionally, to effectively train our denoising network, we derive a registration-specific variational lower bound as the optimization objective for model learning. Furthermore, we show that our denoising network can be constructed with a surrogate registration model, making our approach applicable to different deep registration networks. Extensive experiments demonstrate that our diffusion registration framework presents outstanding pose estimation performance on the real-world TUD-L, LINEMOD, and Occluded-LINEMOD datasets.", "keywords": "6D object pose estimation;Point cloud registration;Diffusion probabilistic model", "primary_area": "", "supplementary_material": "/attachment/c790159b986451e07dff9f15021bf2c976aa48c4.pdf", "author": "Haobo Jiang;Mathieu Salzmann;Zheng Dang;Jin Xie;Jian Yang", "authorids": "~Haobo_Jiang1;~Mathieu_Salzmann1;~Zheng_Dang2;~Jin_Xie3;~Jian_Yang1", "gender": "M;M;M;M;M", "homepage": "https://github.com/Jiang-HB;https://people.epfl.ch/mathieu.salzmann;;https://csjinxie.github.io/;", "dblp": "213/7268;18/4533;39/9613;80/1949-1.html;y/JianYang3.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.ch/citations?user=n-B0jr4AAAAJ;o8BdwuMAAAAJ;https://scholar.google.ae/citations?user=Q7QqJPEAAAAJ;https://scholar.google.com.hk/citations?user=6CIDtZQAAAAJ", "orcid": "0000-0002-4536-5252;;0000-0003-2028-6096;;", "linkedin": ";;;;", "or_profile": "~Haobo_Jiang1;~Mathieu_Salzmann1;~Zheng_Dang2;~Jin_Xie3;~Jian_Yang1", "aff": "Nanjing University of Science and Technology;CSIRO;EPFL - EPF Lausanne;Nanjing University of Science and Technology;Nanjing University of Science and Technology", "aff_domain": "njust.edu.cn;data61.csiro.au;epfl.ch;njust.edu.cn;njust.edu.cn", "position": "PhD student;Collaborator;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\njiang2023se,\ntitle={{SE}(3) Diffusion Model-based Point Cloud Registration for Robust 6D Object Pose Estimation},\nauthor={Haobo Jiang and Mathieu Salzmann and Zheng Dang and Jin Xie and Jian Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Znpz1sv4IP}\n}", "github": "", "project": "", "reviewers": "VaQE;9j1P;5ZGT;WNiN;NS5b", "pdf_size": 2099686, "rating": "4;5;5;7;8", "confidence": "5;4;3;4;3", "soundness": "1;3;3;4;3", "novelty": "2;3;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "61;92;79;44;120", "wc_strengths": "30;33;37;191;143", "wc_weaknesses": "286;124;373;49;93", "wc_questions": "2;55;73;99;103", "wc_limitations": "12;1;27;6;30", "wc_review": "391;305;589;389;489", "wc_reply_reviewers": "114;4;0;8;4", "wc_reply_authors": "629;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.9797958971132712 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 79.2, 26.07220742476555 ], "wc_strengths_avg": [ 86.8, 67.25592910665944 ], "wc_weaknesses_avg": [ 185.0, 123.47145419083716 ], "wc_questions_avg": [ 66.4, 36.66933323637068 ], "wc_limitations_avg": [ 15.2, 11.443775600735973 ], "wc_review_avg": [ 432.6, 97.52456100900942 ], "wc_reply_reviewers_avg": [ 26.0, 44.072667266685826 ], "wc_reply_authors_avg": [ 125.8, 251.60000000000002 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5819143739626463, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14090602451473523516&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "njust.edu.cn;data61.csiro.au;epfl.ch;njust.edu.cn;njust.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Nanjing University of Science and Technology;Commonwealth Scientific and Industrial Research Organisation;EPFL", "aff_unique_dep": ";;", "aff_unique_url": "http://www.nust.edu.cn/;https://www.csiro.au;https://www.epfl.ch", "aff_unique_abbr": "NUST;CSIRO;EPFL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "China;Australia;Switzerland" }, { "title": "Bypass Exponential Time Preprocessing: Fast Neural Network Training via Weight-Data Correlation Preprocessing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71217", "id": "ZqSx5vXOgC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9690d4746230cfea3d067fca695ba648-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZqSx5vXOgC", "openreview": "https://openreview.net/forum?id=ZqSx5vXOgC", "poster": "/media/PosterPDFs/NeurIPS%202023/71217.png?t=1699423656.785271", "slides": "https://nips.cc/virtual/2023/poster/71217", "video": "https://nips.cc/virtual/2023/poster/71217", "author_site": "Josh Alman, \u6770\u660a \u6881, Zhao Song, Ruizhe Zhang, Danyang Zhuo", "tldr": "", "abstract": "Over the last decade, deep neural networks have transformed our society, and they are already widely applied in various machine learning applications. State-of-the-art deep neural networks are becoming larger in size every year to deliver increasing model accuracy, and as a result, model training consumes substantial computing resources and will only consume more in the future.\nUsing current training methods, in each iteration, to process a data point $x \\in \\mathbb{R}^d$ in a layer, we need to spend $\\Theta(md)$ time to evaluate all the $m$ neurons in the layer. This means processing the entire layer takes $\\Theta(nmd)$ time for $n$ data points. Recent work [Song, Yang and Zhang, NeurIPS 2021] reduces this time per iteration to $o(nmd)$, but requires exponential time to preprocess either the data or the neural network weights, making it unlikely to have practical usage. \n\nIn this work, we present a new preprocessing method that simply stores the weight-data correlation in a tree data structure in order to quickly and dynamically detect which neurons fire at each iteration. Our method requires only $O(nmd)$ time in preprocessing and still achieves $o(nmd)$ time per iteration. We complement our new algorithm with a lower bound, proving that assuming a popular conjecture from complexity theory, one could not substantially speed up our algorithm for dynamic detection of firing neurons.", "keywords": "Training neural network;Dynamic activated neuron detection;Sparsity;Fine-grained complexity;Data structure", "primary_area": "", "supplementary_material": "", "author": "Josh Alman;Jiehao Liang;Zhao Song;Ruizhe Zhang;Danyang Zhuo", "authorids": "~Josh_Alman1;~Jiehao_Liang1;~Zhao_Song3;~Ruizhe_Zhang2;~Danyang_Zhuo1", "gender": "M;M;M;M;M", "homepage": "http://joshalman.com;;https://www.youtube.com/@zhaosong2031;;https://danyangzhuo.com/", "dblp": "166/1624;326/1500;76/4051-2;133/6407-1;151/7537", "google_scholar": "yyDMlesAAAAJ;;yDZct7UAAAAJ;;E3yOuvEAAAAJ", "orcid": ";;;;", "linkedin": ";jiehao-liang-9b5b1b227/;;;", "or_profile": "~Josh_Alman1;~Jiehao_Liang1;~Zhao_Song3;~Ruizhe_Zhang2;~Danyang_Zhuo1", "aff": "Columbia University;Renmin University of China;Adobe;The University of Texas at Austin;Duke University", "aff_domain": "columbia.edu;ruc.edu.cn;adobe.com;utexas.edu;duke.edu", "position": "Assistant Professor;Undergrad student;Researcher;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nalman2023bypass,\ntitle={Bypass Exponential Time Preprocessing: Fast Neural Network Training via Weight-Data Correlation Preprocessing},\nauthor={Josh Alman and Jiehao Liang and Zhao Song and Ruizhe Zhang and Danyang Zhuo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZqSx5vXOgC}\n}", "github": "", "project": "", "reviewers": "oEJX;kiFp;GDL4;7T2Q", "pdf_size": 456783, "rating": "6;6;6;6", "confidence": "4;3;1;2", "soundness": "3;4;3;4", "novelty": "3;2;3;3", "presentation": "4;3;3;3", "wc_summary": "38;117;42;82", "wc_strengths": "37;58;14;101", "wc_weaknesses": "20;70;37;256", "wc_questions": "12;1;20;5", "wc_limitations": "117;36;72;15", "wc_review": "224;282;185;459", "wc_reply_reviewers": "2;92;11;64", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 69.75, 32.251937926270415 ], "wc_strengths_avg": [ 52.5, 32.035136959282696 ], "wc_weaknesses_avg": [ 95.75, 94.25066312764065 ], "wc_questions_avg": [ 9.5, 7.22841614740048 ], "wc_limitations_avg": [ 60.0, 38.71046370169182 ], "wc_review_avg": [ 287.5, 104.85823763539038 ], "wc_reply_reviewers_avg": [ 42.25, 37.23153904957462 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18259465850122345455&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "columbia.edu;ruc.edu.cn;adobe.com;utexas.edu;duke.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Columbia University;Renmin University of China;Adobe;University of Texas at Austin;Duke University", "aff_unique_dep": ";;Adobe Inc.;;", "aff_unique_url": "https://www.columbia.edu;http://www.ruc.edu.cn;https://www.adobe.com;https://www.utexas.edu;https://www.duke.edu", "aff_unique_abbr": "Columbia;RUC;Adobe;UT Austin;Duke", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "WalkLM: A Uniform Language Model Fine-tuning Framework for Attributed Graph Embedding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71216", "id": "ZrG8kTbt70", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2ac879d1865475a7abc8dfc7a9c15c27-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZrG8kTbt70", "openreview": "https://openreview.net/forum?id=ZrG8kTbt70", "poster": "/media/PosterPDFs/NeurIPS%202023/71216.png?t=1699609023.2777636", "slides": "https://nips.cc/virtual/2023/poster/71216", "video": "https://nips.cc/virtual/2023/poster/71216", "author_site": "Yanchao Tan, Zihao Zhou, Hang Lv, Weiming Liu, Carl Yang", "tldr": "", "abstract": "Graphs are widely used to model interconnected entities and improve downstream predictions in various real-world applications. However, real-world graphs nowadays are often associated with complex attributes on multiple types of nodes and even links that are hard to model uniformly, while the widely used graph neural networks (GNNs) often require sufficient training toward specific downstream predictions to achieve strong performance. In this work, we take a fundamentally different approach than GNNs, to simultaneously achieve deep joint modeling of complex attributes and flexible structures of real-world graphs and obtain unsupervised generic graph representations that are not limited to specific downstream predictions. Our framework, built on a natural integration of language models (LMs) and random walks (RWs), is straightforward, powerful and data-efficient. Specifically, we first perform attributed RWs on the graph and design an automated program to compose roughly meaningful textual sequences directly from the attributed RWs; then we fine-tune an LM using the RW-based textual sequences and extract embedding vectors from the LM, which encapsulates both attribute semantics and graph structures. In our experiments, we evaluate the learned node embeddings towards different downstream prediction tasks on multiple real-world attributed graph datasets and observe significant improvements over a comprehensive set of state-of-the-art unsupervised node embedding methods. We believe this work opens a door for more sophisticated technical designs and empirical evaluations toward the leverage of LMs for the modeling of real-world graphs.", "keywords": "Arributed graph;unsupervised graph learning;language models;representation learning", "primary_area": "", "supplementary_material": "/attachment/72b32d6145ef42725c2bc29390cb7e654e6226bf.pdf", "author": "Yanchao Tan;Zihao Zhou;Hang Lv;Weiming Liu;Carl Yang", "authorids": "~Yanchao_Tan1;~Zihao_Zhou3;~Hang_Lv1;~Weiming_Liu2;~Carl_Yang1", "gender": "F;;M;;M", "homepage": ";;https://github.com/lvhangkenn;;https://cs.emory.edu/~jyang71/", "dblp": "210/4829.html;;369/5929;;305/0254", "google_scholar": "NQWuK9UAAAAJ;;qHH6dxsAAAAJ;;mOINlwcAAAAJ", "orcid": "0000-0002-3526-6859;;0009-0007-2566-390X;;0000-0001-9145-4531", "linkedin": ";;;;", "or_profile": "~Yanchao_Tan1;~Zihao_Zhou3;~Hang_Lv1;~Weiming_Liu2;~Carl_Yang1", "aff": "Fuzhou University;;Fuzhou University;;Emory University", "aff_domain": "fzu.edu.cn;;fzu.edu.cn;;emory.edu", "position": "Lecturer;;Undergrad student;;Assistant Professor", "bibtex": "@inproceedings{\ntan2023walklm,\ntitle={Walk{LM}: A Uniform Language Model Fine-tuning Framework for Attributed Graph Embedding},\nauthor={Yanchao Tan and Zihao Zhou and Hang Lv and Weiming Liu and Carl Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZrG8kTbt70}\n}", "github": "", "project": "", "reviewers": "4W5x;vuBn;DtxK;x99Y", "pdf_size": 1241728, "rating": "4;6;8;8", "confidence": "4;4;4;4", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "80;72;84;106", "wc_strengths": "22;57;129;117", "wc_weaknesses": "92;103;67;52", "wc_questions": "22;4;1;44", "wc_limitations": "1;1;4;57", "wc_review": "217;237;285;376", "wc_reply_reviewers": "214;16;24;0", "wc_reply_authors": "341;25;30;0", "reply_reviewers": "3;1;1;0", "reply_authors": "4;2;2;1", "rating_avg": [ 6.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 85.5, 12.599603168354152 ], "wc_strengths_avg": [ 81.25, 43.751428548105714 ], "wc_weaknesses_avg": [ 78.5, 20.1059692628831 ], "wc_questions_avg": [ 17.75, 17.151894939043906 ], "wc_limitations_avg": [ 15.75, 23.84716964337697 ], "wc_review_avg": [ 278.75, 61.344824557577795 ], "wc_reply_reviewers_avg": [ 63.5, 87.31981447529536 ], "wc_reply_authors_avg": [ 99.0, 140.18024111835447 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16075812816822044500&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "fzu.edu.cn;;fzu.edu.cn;;emory.edu", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "Fuzhou University;Emory University", "aff_unique_dep": ";", "aff_unique_url": "https://www.fznu.edu.cn;https://www.emory.edu", "aff_unique_abbr": "FZU;Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "Data Portraits: Recording Foundation Model Training Data", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73544", "id": "ZrNRBmOzwE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3112ee706d21d734c15532c1239773e1-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=ZrNRBmOzwE", "openreview": "https://openreview.net/forum?id=ZrNRBmOzwE", "poster": "/media/PosterPDFs/NeurIPS%202023/73544.png?t=1702056632.9252276", "slides": "https://nips.cc/virtual/2023/poster/73544", "video": "https://nips.cc/virtual/2023/poster/73544", "author_site": "Marc Marone, Benjamin Van Durme", "tldr": "", "abstract": "Foundation models are trained on increasingly immense and opaque datasets. Even while these models are now key in AI system building, it can be difficult to answer the straightforward question: has the model already encountered a given example during training? We therefore propose a widespread adoption of Data Portraits: artifacts that record training data and allow for downstream inspection. First we outline the properties of such an artifact and discuss how existing solutions can be used to increase transparency. We then propose and implement a solution based on data sketching, stressing fast and space efficient querying. Using our tools, we document a popular language modeling corpus (The Pile) and a recently released code modeling dataset (The Stack). We show that our solution enables answering questions about test set leakage and model plagiarism. Our tool is lightweight and fast, costing only 3% of the dataset size in overhead. We release a live interface of our tools at https://dataportraits.org/ and call on dataset and model creators to release Data Portraits as a complement to current documentation practices.", "keywords": "natural language processing;data documentation;dataset curation;documentation practices", "primary_area": "", "supplementary_material": "/attachment/0394b1d417c023203be6387603efc05be3a3b3f0.pdf", "author": "Marc Marone;Benjamin Van Durme", "authorids": "~Marc_Marone1;~Benjamin_Van_Durme2", "gender": ";", "homepage": "https://marcmarone.com/;", "dblp": "237/9886;", "google_scholar": "A9DLKmoAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Marc_Marone1;~Benjamin_Van_Durme2", "aff": "Johns Hopkins University;", "aff_domain": "jhu.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nmarone2023data,\ntitle={Data Portraits: Recording Foundation Model Training Data},\nauthor={Marc Marone and Benjamin Van Durme},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=ZrNRBmOzwE}\n}", "github": "", "project": "", "reviewers": "g9wo;YRzG;FHQe;RQmh;KAn6", "pdf_size": 748872, "rating": "5;5;5;7;7", "confidence": "3;4;4;3;4", "wc_summary_and_contributions": "80;148;134;59;71", "wc_strengths": "148;68;69;51;84", "wc_improvement": "109;238;146;21;20", "wc_limitations": "49;1;57;1;19", "wc_correctness": "7;6;17;1;10", "wc_clarity": "17;1;24;1;19", "wc_relation_to_prior_work": "7;1;39;1;15", "wc_documentation": "1;6;14;1;11", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "419;470;501;137;250", "wc_reply_reviewers": "0;33;361;0;0", "wc_reply_authors": "249;629;1039;436;121", "reply_reviewers": "0;1;2;0;0", "reply_authors": "1;1;3;1;1", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 98.4, 35.69089519751501 ], "wc_strengths_avg": [ 84.0, 33.66303610787357 ], "wc_improvement_avg": [ 106.8, 82.03755237694503 ], "wc_limitations_avg": [ 25.4, 23.61016730139793 ], "wc_correctness_avg": [ 8.2, 5.2687759489277965 ], "wc_clarity_avg": [ 12.4, 9.58331884056875 ], "wc_relation_to_prior_work_avg": [ 12.6, 14.164744967700617 ], "wc_documentation_avg": [ 6.6, 5.238320341483519 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 355.4, 139.41678521612812 ], "wc_reply_reviewers_avg": [ 78.8, 141.67766231837678 ], "wc_reply_authors_avg": [ 494.8, 321.79645740747367 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.16666666666666663, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15374044026422260722&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "jhu.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "MagicBrush: A Manually Annotated Dataset for Instruction-Guided Image Editing", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73543", "id": "ZsDB2GzsqG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/64008fa30cba9b4d1ab1bd3bd3d57d61-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=ZsDB2GzsqG", "openreview": "https://openreview.net/forum?id=ZsDB2GzsqG", "poster": "/media/PosterPDFs/NeurIPS%202023/73543.png?t=1702265793.8549256", "slides": "https://nips.cc/virtual/2023/poster/73543", "video": "https://nips.cc/virtual/2023/poster/73543", "author_site": "Kai Zhang, Lingbo Mo, Wenhu Chen, Huan Sun, Yu Su", "tldr": "", "abstract": "Text-guided image editing is widely needed in daily life, ranging from personal use to professional applications such as Photoshop.\nHowever, existing methods are either zero-shot or trained on an automatically synthesized dataset, which contains a high volume of noise.\nThus, they still require lots of manual tuning to produce desirable outcomes in practice.\nTo address this issue, we introduce MagicBrush, the first large-scale, manually annotated dataset for instruction-guided real image editing that covers diverse scenarios: single-turn, multi-turn, mask-provided, and mask-free editing.\nMagicBrush comprises over 10K manually annotated triplets (source image, instruction, target image), which supports trainining large-scale text-guided image editing models.\nWe fine-tune InstructPix2Pix on MagicBrush and show that the new model can produce much better images according to human evaluation.\nWe further conduct extensive experiments to evaluate current image editing baselines from multiple dimensions including quantitative, qualitative, and human evaluations.\nThe results reveal the challenging nature of our dataset and the gap between current baselines and real-world editing needs.", "keywords": "Image Editing;Text-guided Image Editing;Diffusion", "primary_area": "", "supplementary_material": "", "author": "Kai Zhang;Lingbo Mo;Wenhu Chen;Huan Sun;Yu Su", "authorids": "~Kai_Zhang10;~Lingbo_Mo1;~Wenhu_Chen3;~Huan_Sun1;~Yu_Su2", "gender": "M;;F;M;M", "homepage": "https://drogozhang.github.io;https://molingbo.github.io/;https://u.osu.edu/ihudas/people/;http://ysu1989.github.io;https://wenhuchen.github.io/", "dblp": "55/957-33;241/5516;33/2952-1.html;38/1070-1;136/0957.html", "google_scholar": "sDnAIsgAAAAJ;nSEutzsAAAAJ;wIFkulcAAAAJ;rIh5OqoAAAAJ;https://scholar.google.co.jp/citations?user=U8ShbhUAAAAJ", "orcid": ";;;;", "linkedin": "kai-zhang-43774b196/;;huan-sun-81527924/?originalSubdomain=cn;;", "or_profile": "~Kai_Zhang10;~Lingbo_Mo1;~Huan_Sun1;~Yu_Su2;~wenhu_chen1", "aff": "Google DeepMind;Ohio State University, Columbus;The Ohio State University, Columbus;Microsoft;University of Waterloo", "aff_domain": "google.com;osu.edu;osu.edu;microsoft.com;uwaterloo.ca", "position": "Student Researcher;PhD student;Associate Professor;Senior Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023magicbrush,\ntitle={MagicBrush: A Manually Annotated Dataset for Instruction-Guided Image Editing},\nauthor={Kai Zhang and Lingbo Mo and Wenhu Chen and Huan Sun and Yu Su},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=ZsDB2GzsqG}\n}", "github": "", "project": "", "reviewers": "Sjyy;6Wso;xGdT;bheo", "pdf_size": 0, "rating": "5;6;7;7", "confidence": "4;3;4;4", "wc_summary_and_contributions": "42;34;56;60", "wc_strengths": "30;52;25;47", "wc_improvement": "112;216;66;111", "wc_limitations": "12;6;43;13", "wc_correctness": "1;9;4;13", "wc_clarity": "1;36;7;5", "wc_relation_to_prior_work": "1;8;9;10", "wc_documentation": "1;11;5;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "201;373;216;261", "wc_reply_reviewers": "73;0;0;12", "wc_reply_authors": "1180;1113;141;350", "reply_reviewers": "1;0;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 48.0, 10.488088481701515 ], "wc_strengths_avg": [ 38.5, 11.280514172678478 ], "wc_improvement_avg": [ 126.25, 55.04713889022753 ], "wc_limitations_avg": [ 18.5, 14.396180048887969 ], "wc_correctness_avg": [ 6.75, 4.602988159880492 ], "wc_clarity_avg": [ 12.25, 13.88119231190174 ], "wc_relation_to_prior_work_avg": [ 7.0, 3.5355339059327378 ], "wc_documentation_avg": [ 4.5, 4.092676385936225 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 262.75, 67.37349256198613 ], "wc_reply_reviewers_avg": [ 21.25, 30.276847590196706 ], "wc_reply_authors_avg": [ 696.0, 457.13400661075303 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 234, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1179848482165053364&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "google.com;osu.edu;osu.edu;microsoft.com;uwaterloo.ca", "author_num": 5, "aff_unique_index": "0;1;1;2;3", "aff_unique_norm": "Google;Ohio State University;Microsoft;University of Waterloo", "aff_unique_dep": "Google DeepMind;;Microsoft Corporation;", "aff_unique_url": "https://deepmind.com;https://www.osu.edu;https://www.microsoft.com;https://uwaterloo.ca", "aff_unique_abbr": "DeepMind;OSU;Microsoft;UW", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Columbus", "aff_country_unique_index": "0;1;1;1;2", "aff_country_unique": "United Kingdom;United States;Canada" }, { "title": "Differentially Private Approximate Near Neighbor Counting in High Dimensions", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71215", "id": "Zt9RzHjSEy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/87571720167f7e88827c40e468e3101f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Zt9RzHjSEy", "openreview": "https://openreview.net/forum?id=Zt9RzHjSEy", "poster": "/media/PosterPDFs/NeurIPS%202023/71215.png?t=1702335322.1609242", "slides": "https://nips.cc/virtual/2023/poster/71215", "video": "https://nips.cc/virtual/2023/poster/71215", "author_site": "Alexandr Andoni, Piotr Indyk, Sepideh Mahabadi, Shyam Narayanan", "tldr": "", "abstract": "Range counting (e.g., counting the number of data points falling into a given query ball) under differential privacy has been studied extensively. However, the current algorithms for this problem are subject to the following dichotomy. One class of algorithms suffers from an additive error that is a fixed polynomial in the number of points. Another class of algorithms allows for polylogarithmic additive error, but the error grows exponentially in the dimension. To achieve the latter, the problem is relaxed to allow a \u201cfuzzy\u201d definition of the range boundary, e.g., a count of the points in a ball of radius $r$ might also include points in a ball of radius $cr$ for some $c>1$. In this paper we present an efficient algorithm that offers a sweet spot between these two classes. The algorithm has an additive error that is an arbitrary small power of the data set size, depending on how fuzzy the range boundary is, as well as a small ($1+o(1)$) multiplicative error. Crucially, the amount of noise added has no dependence on the dimension. Our algorithm introduces a variant of Locality-Sensitive Hashing, utilizing it in a novel manner.", "keywords": "Differential Privacy;Near Neighbor Search;Locality Sensitive Hashing;Data Structures;Range Query", "primary_area": "", "supplementary_material": "", "author": "Alexandr Andoni;Piotr Indyk;Sepideh Mahabadi;Shyam Narayanan", "authorids": "~Alexandr_Andoni1;~Piotr_Indyk1;~Sepideh_Mahabadi1;~Shyam_Narayanan1", "gender": "M;;F;M", "homepage": "http://www.mit.edu/~andoni/;https://people.csail.mit.edu/indyk/;https://www.mit.edu/~mahabadi/;https://sites.google.com/view/shyamnarayanan/home", "dblp": "66/6009;i/PiotrIndyk;130/0388;222/2805", "google_scholar": "Evgx6UkAAAAJ;oOwNKsAAAAAJ;NirVdpMAAAAJ;CTT44Y0AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Alexandr_Andoni1;~Piotr_Indyk1;~Sepideh_Mahabadi1;~Shyam_Narayanan1", "aff": "Columbia University;Massachusetts Institute of Technology;Microsoft Research;Massachusetts Institute of Technology", "aff_domain": "columbia.edu;mit.edu;microsoft.com;mit.edu", "position": "Associate Professor;Full Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nandoni2023differentially,\ntitle={Differentially Private Approximate Near Neighbor Counting in High Dimensions},\nauthor={Alexandr Andoni and Piotr Indyk and Sepideh Mahabadi and Shyam Narayanan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Zt9RzHjSEy}\n}", "github": "", "project": "", "reviewers": "1btN;2zYg;3KJE;TS42", "pdf_size": 405226, "rating": "5;7;7;7", "confidence": "3;3;3;4", "soundness": "3;4;4;4", "novelty": "2;3;3;3", "presentation": "2;3;4;4", "wc_summary": "37;48;178;106", "wc_strengths": "58;60;34;45", "wc_weaknesses": "119;46;205;24", "wc_questions": "171;41;60;119", "wc_limitations": "1;16;1;1", "wc_review": "386;211;478;295", "wc_reply_reviewers": "10;28;0;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 92.25, 56.01952784520769 ], "wc_strengths_avg": [ 49.25, 10.520812706250407 ], "wc_weaknesses_avg": [ 98.5, 70.83254901526557 ], "wc_questions_avg": [ 97.75, 51.143792389692806 ], "wc_limitations_avg": [ 4.75, 6.49519052838329 ], "wc_review_avg": [ 342.5, 99.75093984519644 ], "wc_reply_reviewers_avg": [ 12.0, 10.099504938362077 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3600609088502685304&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "columbia.edu;mit.edu;microsoft.com;mit.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Columbia University;Massachusetts Institute of Technology;Microsoft", "aff_unique_dep": ";;Microsoft Research", "aff_unique_url": "https://www.columbia.edu;https://web.mit.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Columbia;MIT;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Injecting Multimodal Information into Rigid Protein Docking via Bi-level Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71214", "id": "ZuaVKlWdD2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/77fa0e7d45c6687f1958de0b31e9fc05-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZuaVKlWdD2", "openreview": "https://openreview.net/forum?id=ZuaVKlWdD2", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71214", "video": "https://nips.cc/virtual/2023/poster/71214", "author_site": "Ruijia Wang, YiWu Sun, Yujie Luo, Shaochuan Li, Cheng Yang, Xingyi Cheng, Hui Li, Chuan Shi, Le Song", "tldr": "", "abstract": "The structure of protein-protein complexes is critical for understanding binding dynamics, biological mechanisms, and intervention strategies. Rigid protein docking, a fundamental problem in this field, aims to predict the 3D structure of complexes from their unbound states without conformational changes. In this scenario, we have access to two types of valuable information: sequence-modal information, such as coevolutionary data obtained from multiple sequence alignments, and structure-modal information, including the 3D conformations of rigid structures. However, existing docking methods typically utilize single-modal information, resulting in suboptimal predictions. In this paper, we propose xTrimoBiDock (or BiDock for short), a novel rigid docking model that effectively integrates sequence- and structure-modal information through bi-level optimization. Specifically, a cross-modal transformer combines multimodal information to predict an inter-protein distance map. To achieve rigid docking, the roto-translation transformation is optimized to align the docked pose with the predicted distance map. In order to tackle this bi-level optimization problem, we unroll the gradient descent of the inner loop and further derive a better initialization for roto-translation transformation based on spectral estimation. Compared to baselines, BiDock achieves a promising result of a maximum 234% relative improvement in challenging antibody-antigen docking problem.", "keywords": "complex structure prediction;rigid docking;protein docking;antibody-antigen docking", "primary_area": "", "supplementary_material": "/attachment/5e8825952ca554c1aea37beed4db73d8f357d040.zip", "author": "Ruijia Wang;YiWu Sun;Yujie Luo;Shaochuan Li;Cheng Yang;Xingyi Cheng;Hui Li;Chuan Shi;Le Song", "authorids": "~Ruijia_Wang2;~YiWu_Sun1;~Yujie_Luo1;~Shaochuan_Li1;~Cheng_Yang6;~Xingyi_Cheng3;~Hui_Li2;~Chuan_Shi1;~Le_Song1", "gender": "F;M;M;;M;M;;M;M", "homepage": ";https://github.com/SYW23;;;https://albertyang33.github.io/;;;http://www.shichuan.org/;http://www.cc.gatech.edu/~lsong", "dblp": ";;;;49/1457-2;206/6376;;64/3041-1;94/3481", "google_scholar": "https://scholar.google.ca/citations?user=DpsuBrsAAAAJ;;;;OlLjVUcAAAAJ;shO7XmIAAAAJ;;tUq_v90AAAAJ;Xl4E0CsAAAAJ", "orcid": ";0000-0002-0061-0779;;;0000-0001-7821-0030;;;0000-0002-3734-0266;", "linkedin": ";;%E7%8E%89%E6%9D%B0-%E7%BD%97-2320b3147/;;;;;;", "or_profile": "~Ruijia_Wang2;~YiWu_Sun1;~Yujie_Luo1;~Shaochuan_Li1;~Cheng_Yang6;~Xingyi_Cheng3;~Hui_Li2;~Chuan_Shi1;~Le_Song1", "aff": "Beijing University of Posts and Telecommunications;;BioMap, Inc.;;Beijing University of Posts and Telecommunications;BioMap;;Beijing University of Post and Telecommunication;College of Computing, Georgia Institute of Technology", "aff_domain": "bupt.edu.cn;;biomap.com;;bupt.edu.cn;biomap.com;;bupt.edu.cn;cc.gatech.edu", "position": "PhD student;;AI engineer;;Associate Professor;Principal Researcher;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023injecting,\ntitle={Injecting Multimodal Information into Rigid Protein Docking via Bi-level Optimization},\nauthor={Ruijia Wang and YiWu Sun and Yujie Luo and Shaochuan Li and Cheng Yang and Xingyi Cheng and Hui Li and Chuan Shi and Le Song},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZuaVKlWdD2}\n}", "github": "", "project": "", "reviewers": "XfrH;ZMi5;DgVe;HJaR", "pdf_size": 6842243, "rating": "4;5;5;7", "confidence": "5;5;5;4", "soundness": "2;2;2;3", "novelty": "2;2;3;3", "presentation": "2;3;2;3", "wc_summary": "123;147;33;102", "wc_strengths": "32;233;25;34", "wc_weaknesses": "329;284;256;391", "wc_questions": "4;5;39;34", "wc_limitations": "4;10;40;3", "wc_review": "492;679;393;564", "wc_reply_reviewers": "630;70;18;65", "wc_reply_authors": "492;118;64;65", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 101.25, 42.49926469952157 ], "wc_strengths_avg": [ 81.0, 87.82084035125148 ], "wc_weaknesses_avg": [ 315.0, 51.024503917235684 ], "wc_questions_avg": [ 20.5, 16.101242188104617 ], "wc_limitations_avg": [ 14.25, 15.105876340020794 ], "wc_review_avg": [ 532.0, 104.34797554337123 ], "wc_reply_reviewers_avg": [ 195.75, 251.5336707083169 ], "wc_reply_authors_avg": [ 184.75, 178.73076819618944 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7326792632611502658&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "bupt.edu.cn;;biomap.com;;bupt.edu.cn;biomap.com;;bupt.edu.cn;cc.gatech.edu", "author_num": 9, "aff_unique_index": "0;1;0;2;0;3", "aff_unique_norm": "Beijing University of Posts and Telecommunications;BioMap, Inc.;BioMap;Georgia Institute of Technology", "aff_unique_dep": ";;;College of Computing", "aff_unique_url": "http://www.bupt.edu.cn/;;;https://www.gatech.edu", "aff_unique_abbr": "BUPT;;;Georgia Tech", "aff_campus_unique_index": "0;0;0;2", "aff_campus_unique": "Beijing;;Atlanta", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "China;United States;" }, { "title": "Thought Cloning: Learning to Think while Acting by Imitating Human Thinking", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71213", "id": "ZvDmna23r3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8b4ba64e549c410185c4d3eac3a81726-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZvDmna23r3", "openreview": "https://openreview.net/forum?id=ZvDmna23r3", "poster": "/media/PosterPDFs/NeurIPS%202023/71213.png?t=1702078367.6265197", "slides": "https://nips.cc/virtual/2023/poster/71213", "video": "https://nips.cc/virtual/2023/poster/71213", "author_site": "Shengran Hu, Jeff Clune", "tldr": "", "abstract": "Language is often considered a key aspect of human thinking, providing us with exceptional abilities to generalize, explore, plan, replan, and adapt to new situations. However, Reinforcement Learning (RL) agents are far from human-level performance in any of these abilities. We hypothesize one reason for such cognitive deficiencies is that they lack the benefits of thinking in language and that we can improve AI agents by training them to $\\textit{think like humans do}$. We introduce a novel Imitation Learning framework, Thought Cloning, where the idea is to not just clone the behaviors of human demonstrators, $\\textit{but also the thoughts humans have as they perform these behaviors}$. While we expect Thought Cloning to truly shine at scale on internet-sized datasets (e.g. online videos with transcripts), here we conduct experiments in a domain where the thinking and action data are synthetically generated. Results reveal that Thought Cloning learns much faster than Behavioral Cloning and its performance advantage grows the further out of distribution test tasks are, highlighting its ability to better handle novel situations. Thought Cloning also provides important benefits for AI Safety and Interpretability, and makes it easier to debug and improve AI. Because we can observe the agent\u2019s thoughts, we can (1) more easily diagnose why things are going wrong, making it easier to fix the problem, (2) steer the agent by correcting its thinking, or (3) prevent it from doing unsafe things it plans to do. Overall, by training agents $\\textit{how to think}$ as well as behave, Thought Cloning creates safer, more powerful agents.", "keywords": "Reinforcement learning;Imitation Learning;AI Safety;Interpretability", "primary_area": "", "supplementary_material": "/attachment/3e227a675a08ce569a3138840ef39007e0862406.pdf", "author": "Shengran Hu;Jeff Clune", "authorids": "~Shengran_Hu2;~Jeff_Clune3", "gender": "M;", "homepage": ";", "dblp": "279/6644;", "google_scholar": "xt4UjA4AAAAJ;", "orcid": "0000-0002-4240-0585;", "linkedin": ";", "or_profile": "~Shengran_Hu2;~Jeff_Clune3", "aff": "University of British Columbia;", "aff_domain": "cs.ubc.ca;", "position": "PhD student;", "bibtex": "@inproceedings{\nhu2023thought,\ntitle={Thought Cloning: Learning to Think while Acting by Imitating Human Thinking},\nauthor={Shengran Hu and Jeff Clune},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZvDmna23r3}\n}", "github": "", "project": "", "reviewers": "QGuE;9iZD;gnSR;HBeY", "pdf_size": 1351572, "rating": "5;7;8;9", "confidence": "3;4;4;4", "soundness": "2;3;3;4", "novelty": "1;3;3;3", "presentation": "3;4;2;4", "wc_summary": "95;139;91;42", "wc_strengths": "64;83;88;82", "wc_weaknesses": "527;170;289;141", "wc_questions": "108;83;16;41", "wc_limitations": "30;66;17;8", "wc_review": "824;541;501;314", "wc_reply_reviewers": "0;74;137;22", "wc_reply_authors": "0;0;0;37", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 7.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 91.75, 34.34657916008521 ], "wc_strengths_avg": [ 79.25, 9.093266739736606 ], "wc_weaknesses_avg": [ 281.75, 152.06803576031353 ], "wc_questions_avg": [ 62.0, 35.75611835756225 ], "wc_limitations_avg": [ 30.25, 22.072324299900995 ], "wc_review_avg": [ 545.0, 182.45136338213536 ], "wc_reply_reviewers_avg": [ 58.25, 52.81275887510517 ], "wc_reply_authors_avg": [ 9.25, 16.021469970012117 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8783100656536799, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10518182798701484533&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 6, "email": "cs.ubc.ca;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "Rehearsal Learning for Avoiding Undesired Future", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71212", "id": "ZwQJRXLjVm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fed1ea8dcc2a13f3835cc854e8c8294c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ZwQJRXLjVm", "openreview": "https://openreview.net/forum?id=ZwQJRXLjVm", "poster": "/media/PosterPDFs/NeurIPS%202023/71212.png?t=1701396971.1235523", "slides": "https://nips.cc/virtual/2023/poster/71212", "video": "https://nips.cc/virtual/2023/poster/71212", "author_site": "Tian Qin, Tian-Zuo Wang, Zhi-Hua Zhou", "tldr": "", "abstract": "Machine learning (ML) models have been widely used to make predictions. Instead of a predictive statement about future outcomes, in many situations we want to pursue a decision: what can we do to avoid the undesired future if an ML model predicts so? In this paper, we present a rehearsal learning framework, in which decisions that can persuasively avoid the happening of undesired outcomes can be found and recommended. Based on the influence relation, we characterize the generative process of variables with structural rehearsal models, consisting of a probabilistic graphical model called rehearsal graphs and structural equations, and find actionable decisions that can alter the outcome by reasoning under a Bayesian framework. Moreover, we present a probably approximately correct bound to quantify the associated risk of a decision. Experiments validate the effectiveness of the proposed rehearsal learning framework and the informativeness of the bound.", "keywords": "decision-making;structural rehearsal model;Bayesian inference;probabilistic graphical model", "primary_area": "", "supplementary_material": "/attachment/4dc97369a37358e110c846075ef0948daf63c96c.zip", "author": "Tian Qin;Tian-Zuo Wang;Zhi-Hua Zhou", "authorids": "~Tian_Qin1;~Tian-Zuo_Wang1;~Zhi-Hua_Zhou2", "gender": "M;M;M", "homepage": "http://www.lamda.nju.edu.cn/qint/;http://www.lamda.nju.edu.cn/wangtz/;https://cs.nju.edu.cn/zhouzh/", "dblp": "https://dblp.uni-trier.de/pid/133/4172;249/9504;z/ZhiHuaZhou", "google_scholar": "5tIqs3sAAAAJ;xUyl98AAAAAJ;https://scholar.google.com.tw/citations?user=rSVIHasAAAAJ", "orcid": ";;0000-0003-0746-1494", "linkedin": ";;", "or_profile": "~Tian_Qin1;~Tian-Zuo_Wang1;~Zhi-hua_Zhou1", "aff": "Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nqin2023rehearsal,\ntitle={Rehearsal Learning for Avoiding Undesired Future},\nauthor={Tian Qin and Tian-Zuo Wang and Zhi-Hua Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ZwQJRXLjVm}\n}", "github": "", "project": "", "reviewers": "8gB7;LZtj;Pcym;LgnQ;9i1V", "pdf_size": 567892, "rating": "5;5;6;7;7", "confidence": "2;3;2;3;3", "soundness": "2;2;2;3;3", "novelty": "2;3;2;3;3", "presentation": "3;2;3;3;3", "wc_summary": "54;122;119;52;123", "wc_strengths": "42;56;51;65;63", "wc_weaknesses": "103;131;104;145;208", "wc_questions": "52;28;175;314;198", "wc_limitations": "6;4;25;1;4", "wc_review": "257;341;474;577;596", "wc_reply_reviewers": "23;0;15;19;58", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 94.0, 33.50820794969495 ], "wc_strengths_avg": [ 55.4, 8.357032966310472 ], "wc_weaknesses_avg": [ 138.2, 38.41562182237846 ], "wc_questions_avg": [ 153.4, 104.16832532012789 ], "wc_limitations_avg": [ 8.0, 8.648699324175862 ], "wc_review_avg": [ 449.0, 132.01969550033056 ], "wc_reply_reviewers_avg": [ 23.0, 19.15202339179858 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.45643546458763845, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17410748201824171911&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "3D molecule generation by denoising voxel grids", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71211", "id": "Zyzluw0hC4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/da1131a86ac3c70e0b7cae89c3d4df22-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=Zyzluw0hC4", "openreview": "https://openreview.net/forum?id=Zyzluw0hC4", "poster": "/media/PosterPDFs/NeurIPS%202023/71211.png?t=1701475604.5204933", "slides": "https://nips.cc/virtual/2023/poster/71211", "video": "https://nips.cc/virtual/2023/poster/71211", "author_site": "Pedro O. Pinheiro, Joshua Rackers, Joseph Kleinhenz, Michael Maser, Omar Mahmood, Andrew Watkins, Stephen Ra, Vishnu Sresht, Saeed Saremi", "tldr": "", "abstract": "We propose a new score-based approach to generate 3D molecules represented as atomic densities on regular grids.\nFirst, we train a denoising neural network that learns to map from a smooth distribution of noisy molecules to the distribution of real molecules.\nThen, we follow the _neural empirical Bayes_ framework [Saremi and Hyvarinen, 2019] and generate molecules in two steps: (i) sample noisy density grids from a smooth distribution via underdamped Langevin Markov chain Monte Carlo, and (ii) recover the \"clean\" molecule by denoising the noisy grid with a single step.\nOur method, _VoxMol_, generates molecules in a fundamentally different way than the current state of the art (ie, diffusion models applied to atom point clouds). It differs in terms of the data representation, the noise model, the network architecture and the generative modeling algorithm.\nOur experiments show that VoxMol captures the distribution of drug-like molecules better than state of the art, while being faster to generate samples.", "keywords": "generative model;molecule generation;drug discovery", "primary_area": "", "supplementary_material": "", "author": "Pedro O. Pinheiro;Joshua Rackers;joseph Kleinhenz;Michael Maser;Omar Mahmood;Andrew Martin Watkins;Stephen Ra;Vishnu Sresht;Saeed Saremi", "authorids": "~Pedro_O._Pinheiro1;rackersj@gene.com;kleinhej@gene.com;~Michael_Maser1;mahmoodo@gene.com;~Andrew_Martin_Watkins1;~Stephen_Ra1;~Vishnu_Sresht1;~Saeed_Saremi1", "gender": "M;;;;;M;M;;M", "homepage": ";;;;;;https://www.stephenra.com;;https://saeedsaremi.github.io/", "dblp": "223/9937;;;;;;255/5897;;128/2619", "google_scholar": "https://scholar.google.ca/citations?user=BU6f7L4AAAAJ;;;;;zglcuwEAAAAJ;bxl__-MAAAAJ;;", "orcid": ";;;;;;;;", "linkedin": ";;;michael-maser-7a9844b9;;;;;", "or_profile": "~Pedro_O._Pinheiro1;rackersj@gene.com;kleinhej@gene.com;~Michael_Maser1;mahmoodo@gene.com;~Andrew_Martin_Watkins1;~Stephen_Ra1;~Vishnu_Sresht1;~Saeed_Saremi1", "aff": "Prescient Design, Genentech;;;Genentech;;Prescient Design, Genentech;Prescient Design, Genentech;;Genentech", "aff_domain": "gene.com;;;gene.com;;gene.com;gene.com;;gene.com", "position": "Researcher;;;Researcher;;Researcher;Director of Frontier Research;;Senior Principal Research Scientist", "bibtex": "@inproceedings{\npinheiro2023d,\ntitle={3D molecule generation by denoising voxel grids},\nauthor={Pedro O. Pinheiro and Joshua Rackers and joseph Kleinhenz and Michael Maser and Omar Mahmood and Andrew Martin Watkins and Stephen Ra and Vishnu Sresht and Saeed Saremi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=Zyzluw0hC4}\n}", "github": "", "project": "", "reviewers": "bpDM;R82i;zHHZ;j1qM", "pdf_size": 10890715, "rating": "3;4;6;6", "confidence": "4;5;4;4", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;2;3", "wc_summary": "55;24;19;72", "wc_strengths": "57;24;72;107", "wc_weaknesses": "340;167;127;84", "wc_questions": "75;79;168;46", "wc_limitations": "1;1;22;31", "wc_review": "528;295;408;340", "wc_reply_reviewers": "1730;1033;34;34", "wc_reply_authors": "2142;1675;33;41", "reply_reviewers": "5;4;1;1", "reply_authors": "6;5;2;2", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 42.5, 21.914607000811127 ], "wc_strengths_avg": [ 65.0, 29.824486584013478 ], "wc_weaknesses_avg": [ 179.5, 97.20210903061724 ], "wc_questions_avg": [ 92.0, 45.689167206242665 ], "wc_limitations_avg": [ 13.75, 13.141061600951424 ], "wc_review_avg": [ 392.75, 87.83898621910433 ], "wc_reply_reviewers_avg": [ 707.75, 717.4016918714368 ], "wc_reply_authors_avg": [ 972.75, 950.2090230575586 ], "reply_reviewers_avg": [ 2.75, 1.7853571071357126 ], "reply_authors_avg": [ 3.75, 1.7853571071357126 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8824079650575349996&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "gene.com;;;gene.com;;gene.com;gene.com;;gene.com", "author_num": 9, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Genentech", "aff_unique_dep": "Prescient Design", "aff_unique_url": "https://www.gene.com", "aff_unique_abbr": "Genentech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Training Chain-of-Thought via Latent-Variable Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71210", "id": "a147pIS2Co", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e69a9560c450ca76584d9eb37e7f5ae8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=a147pIS2Co", "openreview": "https://openreview.net/forum?id=a147pIS2Co", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71210", "video": "https://nips.cc/virtual/2023/poster/71210", "author_site": "Du Phan, Matthew Douglas Hoffman, David Dohan, Sholto Douglas, Tuan Anh Le, Aaron Parisi, Pavel Sountsov, Charles Sutton, Sharad Vikram, Rif A. Saurous", "tldr": "", "abstract": "Large language models (LLMs) solve problems more accurately and interpretably when instructed to work out the answer step by step using a \"chain-of-thought\" (CoT) prompt. One can also improve LLMs' performance on a specific task by supervised fine-tuning, i.e., by using gradient ascent on some tunable parameters to maximize the average log-likelihood of correct answers from a labeled training set. \nNaively combining CoT with supervised tuning requires supervision not just of the correct answers, but also of detailed rationales that lead to those answers; these rationales are expensive to produce by hand. Instead, we propose a fine-tuning strategy that tries to maximize the \\emph{marginal} log-likelihood of generating a correct answer using CoT prompting, approximately averaging over all possible rationales. The core challenge is sampling from the posterior over rationales conditioned on the correct answer; we address it using a simple Markov-chain Monte Carlo (MCMC) expectation-maximization (EM) algorithm inspired by the self-taught reasoner (STaR), memoized wake-sleep, Markovian score climbing, and persistent contrastive divergence. This algorithm also admits a novel control-variate technique that drives the variance of our gradient estimates to zero as the model improves. Applying our technique to GSM8K and the tasks in BIG-Bench Hard, we find that this MCMC-EM fine-tuning technique typically improves the model's accuracy on held-out examples more than STaR or prompt-tuning with or without CoT.", "keywords": "Large language models;latent-variable models;control variates;chain-of-thought;MCMC", "primary_area": "", "supplementary_material": "", "author": "Du Phan;Matthew Douglas Hoffman;david dohan;Sholto Douglas;Tuan Anh Le;Aaron T Parisi;Pavel Sountsov;Charles Sutton;Sharad Vikram;Rif A. Saurous", "authorids": "~Du_Phan1;~Matthew_Douglas_Hoffman1;dmrdohan@gmail.com;~Sholto_Douglas1;~Tuan_Anh_Le1;~Aaron_T_Parisi1;~Pavel_Sountsov2;~Charles_Sutton1;~Sharad_Vikram1;~Rif_A._Saurous1", "gender": "M;M;;M;M;M;;M;M;M", "homepage": "https://fehiepsi.github.io/;http://www.matthewdhoffman.com;;https://sholtodouglas.github.io;https://www.tuananhle.co.uk;;http://people.brandeis.edu/~sl157/;http://homepages.inf.ed.ac.uk/csutton/;https://www.sharadvikram.com;", "dblp": "251/5646;07/4433;;;76/10097-1;318/1580;;59/5879;;186/7923", "google_scholar": "CeC9PtYAAAAJ;IeHKeGYAAAAJ;;;https://scholar.google.co.uk/citations?user=tkceMM0AAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.co.uk/citations?user=hYtGXD0AAAAJ;;QNnjg7YAAAAJ", "orcid": ";;;;;;;0000-0002-0041-3820;;", "linkedin": "phandu/;;;;;aaron-parisi-3b5130116;;charles-sutton-772aa126;;", "or_profile": "~Du_Phan1;~Matthew_Douglas_Hoffman1;dmrdohan@gmail.com;~Sholto_Douglas1;~Tuan_Anh_Le1;~Aaron_T_Parisi1;~Pavel_Sountsov2;~Charles_Sutton1;~Sharad_Vikram1;~Rif_A._Saurous1", "aff": "Google;Google;;;Google Research;Google;Google;University of Edinburgh;Google;Google", "aff_domain": "google.com;google.com;;;google.com;google.com;google.com;ed.ac.uk;google.com;google.com", "position": "Researcher;Research Scientist;;;Research Scientist;Researcher;Researcher;Professor;Google;Engineer, Director", "bibtex": "@inproceedings{\nhoffman2023training,\ntitle={Training Chain-of-Thought via Latent-Variable Inference},\nauthor={Matthew Douglas Hoffman and Du Phan and david dohan and Sholto Douglas and Tuan Anh Le and Aaron T Parisi and Pavel Sountsov and Charles Sutton and Sharad Vikram and Rif A. Saurous},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=a147pIS2Co}\n}", "github": "", "project": "", "reviewers": "Surj;d8Yh;zJeM;AjuJ;SKio", "pdf_size": 455942, "rating": "4;5;6;6;7", "confidence": "4;5;4;4;4", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;3", "presentation": "2;3;3;3;4", "wc_summary": "21;80;109;78;82", "wc_strengths": "59;51;71;90;95", "wc_weaknesses": "99;132;2;285;80", "wc_questions": "3;82;214;12;1", "wc_limitations": "3;29;7;38;36", "wc_review": "185;374;403;503;294", "wc_reply_reviewers": "0;45;50;336;18", "wc_reply_authors": "0;220;202;481;0", "reply_reviewers": "0;1;1;3;1", "reply_authors": "1;2;2;4;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 74.0, 28.809720581775867 ], "wc_strengths_avg": [ 73.2, 17.069270634681494 ], "wc_weaknesses_avg": [ 119.6, 93.0947904020413 ], "wc_questions_avg": [ 62.4, 81.49257634901475 ], "wc_limitations_avg": [ 22.6, 14.732277488562318 ], "wc_review_avg": [ 351.8, 106.90070158796901 ], "wc_reply_reviewers_avg": [ 89.8, 124.43857922686196 ], "wc_reply_authors_avg": [ 180.6, 177.47292751290263 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.29417420270727607, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14336827565484713391&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "google.com;google.com;;;google.com;google.com;google.com;ed.ac.uk;google.com;google.com", "author_num": 10, "aff_unique_index": "0;0;0;0;0;1;0;0", "aff_unique_norm": "Google;University of Edinburgh", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.ed.ac.uk", "aff_unique_abbr": "Google;Edinburgh", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Students Parrot Their Teachers: Membership Inference on Model Distillation", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71209", "id": "a2Yg9Za6Rb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8b07d224a643b02e7571e083578a86d2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=a2Yg9Za6Rb", "openreview": "https://openreview.net/forum?id=a2Yg9Za6Rb", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71209", "video": "https://nips.cc/virtual/2023/poster/71209", "author_site": "Matthew Jagielski, Milad Nasr, Katherine Lee, Christopher A. Choquette-Choo, Nicholas Carlini, Florian Tramer", "tldr": "", "abstract": "Model distillation is frequently proposed as a technique to reduce the privacy leakage of machine learning. These empirical privacy defenses rely on the intuition that distilled ``student'' models protect the privacy of training data, as they only interact with this data indirectly through a ``teacher'' model. In this work, we design membership inference attacks to systematically study the privacy provided by knowledge distillation to both the teacher and student training sets. Our new attacks show that distillation alone provides only limited privacy across a number of domains. We explain the success of our attacks on distillation by showing that membership inference attacks on a private dataset can succeed even if the target model is never queried on any actual training points, but only on inputs whose predictions are highly influenced by training data. Finally, we show that our attacks are strongest when student and teacher sets are similar, or when the attacker can poison the teacher set.", "keywords": "model distillation;membership inference;privacy;dark knowledge", "primary_area": "", "supplementary_material": "/attachment/3e1b6cc738c6578f4335c1939793e0a7c0633bb7.pdf", "author": "Matthew Jagielski;Milad Nasr;Katherine Lee;Christopher A. Choquette-Choo;Nicholas Carlini;Florian Tram\u00e8r", "authorids": "~Matthew_Jagielski1;~Milad_Nasr2;~Katherine_Lee1;~Christopher_A._Choquette-Choo1;~Nicholas_Carlini1;~Florian_Tram\u00e8r1", "gender": "M;;F;M;;M", "homepage": "https://jagielski.github.io/;https://people.cs.umass.edu/~milad/;https://katelee168.github.io/;https://www.christopherchoquette.com;http://nicholas.carlini.com;http://floriantramer.com", "dblp": "218/5156;;115/5082.html;250/9674;145/1806;158/7224", "google_scholar": "_8rw_GMAAAAJ;k6-nvDAAAAAJ;bjdB4K8AAAAJ;oDE4I64AAAAJ;;https://scholar.google.ch/citations?user=ijH0-a8AAAAJ", "orcid": ";;;;;", "linkedin": ";;;christopher-choquette-choo/;;", "or_profile": "~Matthew_Jagielski1;~Milad_Nasr2;~Katherine_Lee1;~Christopher_A._Choquette-Choo1;~Nicholas_Carlini1;~Florian_Tramer1", "aff": "Google;Google;Cornell University;Google Research, Brain Team;Google;ETHZ - ETH Zurich", "aff_domain": "google.com;google.com;cornell.edu;google.com;google.com;ethz.ch", "position": "Researcher;Researcher;PhD student;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\njagielski2023students,\ntitle={Students Parrot Their Teachers: Membership Inference on Model Distillation},\nauthor={Matthew Jagielski and Milad Nasr and Katherine Lee and Christopher A. Choquette-Choo and Nicholas Carlini and Florian Tram{\\`e}r},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=a2Yg9Za6Rb}\n}", "github": "", "project": "", "reviewers": "4Gun;xTVE;E9Bb;odTu", "pdf_size": 1734754, "rating": "6;8;8;8", "confidence": "4;4;4;4", "soundness": "2;4;4;4", "novelty": "3;3;4;3", "presentation": "4;3;4;4", "wc_summary": "132;98;61;65", "wc_strengths": "88;78;9;171", "wc_weaknesses": "137;104;15;11", "wc_questions": "30;104;269;173", "wc_limitations": "11;10;10;29", "wc_review": "398;394;364;449", "wc_reply_reviewers": "35;24;10;68", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.5, 0.8660254037844386 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 89.0, 28.679260799399973 ], "wc_strengths_avg": [ 86.5, 57.4913036902104 ], "wc_weaknesses_avg": [ 66.75, 55.01988276977696 ], "wc_questions_avg": [ 144.0, 88.12207441952329 ], "wc_limitations_avg": [ 15.0, 8.093207028119323 ], "wc_review_avg": [ 401.25, 30.53993287484437 ], "wc_reply_reviewers_avg": [ 34.25, 21.405314760591587 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1281233192087810597&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "google.com;google.com;cornell.edu;google.com;google.com;ethz.ch", "author_num": 6, "aff_unique_index": "0;0;1;0;0;2", "aff_unique_norm": "Google;Cornell University;ETH Zurich", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.cornell.edu;https://www.ethz.ch", "aff_unique_abbr": "Google;Cornell;ETHZ", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;Switzerland" }, { "title": "On the Convergence of CART under Sufficient Impurity Decrease Condition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71208", "id": "a2svOXTVgO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b418964bafb4fdd9aef9017301323a8a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=a2svOXTVgO", "openreview": "https://openreview.net/forum?id=a2svOXTVgO", "poster": "/media/PosterPDFs/NeurIPS%202023/71208.png?t=1701841581.64196", "slides": "https://nips.cc/virtual/2023/poster/71208", "video": "https://nips.cc/virtual/2023/poster/71208", "author_site": "Rahul Mazumder, Haoyue Wang", "tldr": "", "abstract": "The decision tree is a flexible machine-learning model that finds its success in numerous applications. It is usually fitted in a recursively greedy manner using CART. In this paper, we study the convergence rate of CART under a regression setting. First, we prove an upper bound on the prediction error of CART under a sufficient impurity decrease (SID) condition \\cite{chi2020asymptotic} -- our result is an improvement over the known result by \\cite{chi2020asymptotic} under a similar assumption. We show via examples that this error bound cannot be further improved by more than a constant or a log factor. Second, we introduce a few easy-to-check sufficient conditions of the SID condition. In particular, we show that the SID condition can be satisfied by an additive model when the component functions satisfy a ``locally reverse Poincare inequality\". We discuss a few familiar function classes in non-parametric estimation to demonstrate the usefulness of this conception.", "keywords": "decision tree;CART", "primary_area": "", "supplementary_material": "/attachment/3988297d1c83f9fc28d94b7cf3af89c14cdbd185.pdf", "author": "Rahul Mazumder;Haoyue Wang", "authorids": "~Rahul_Mazumder1;~Haoyue_Wang3", "gender": "M;", "homepage": "http://www.mit.edu/~rahulmaz/;", "dblp": "11/9365.html;", "google_scholar": "cyCp3pIAAAAJ;N-iirNIAAAAJ", "orcid": "0000-0003-1384-9743;", "linkedin": ";", "or_profile": "~Rahul_Mazumder1;~Haoyue_Wang3", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu", "position": "Associate Professor;PhD student", "bibtex": "@inproceedings{\nmazumder2023on,\ntitle={On the Convergence of {CART} under Sufficient Impurity Decrease Condition},\nauthor={Rahul Mazumder and Haoyue Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=a2svOXTVgO}\n}", "github": "", "project": "", "reviewers": "MZ6v;te2H;xrLy;vX7v;1fUr", "pdf_size": 514852, "rating": "5;6;6;6;6", "confidence": "1;3;4;3;2", "soundness": "3;4;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;4;3;3;4", "wc_summary": "46;53;67;200;74", "wc_strengths": "27;48;154;91;55", "wc_weaknesses": "45;123;125;67;16", "wc_questions": "21;43;71;5;84", "wc_limitations": "9;6;1;5;25", "wc_review": "148;273;418;368;254", "wc_reply_reviewers": "0;23;41;0;14", "wc_reply_authors": "0;221;0;0;0", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 2.6, 1.019803902718557 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 88.0, 56.868268832451726 ], "wc_strengths_avg": [ 75.0, 44.56455991031439 ], "wc_weaknesses_avg": [ 75.2, 43.00883630139276 ], "wc_questions_avg": [ 44.8, 29.586483400363754 ], "wc_limitations_avg": [ 9.2, 8.304215796810677 ], "wc_review_avg": [ 292.2, 94.01361603512547 ], "wc_reply_reviewers_avg": [ 15.6, 15.4220621189256 ], "wc_reply_authors_avg": [ 44.2, 88.4 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7844645405527363, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12474310842307055532&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "mit.edu;mit.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "a4kspTMV9M", "title": "A Specialized Semismooth Newton Method for Kernel-Based Optimal Transport", "track": "main", "status": "Reject", "tldr": "", "abstract": "Kernel-based optimal transport (OT) estimation is an alternative to the standard plug-in OT estimation. Recent works suggested that kernel-based OT estimators are more statistically efficient than plug-in OT estimators when comparing probability measures in high-dimensions~\\citep{Vacher-2021-Dimension}. However, the computation of these estimators relies on the short-step interior-point method for which the required number of iterations is known to be \\textit{large} in practice. In this paper, we propose a nonsmooth equation model for kernel-based OT estimation and show that it can be efficiently solved via a specialized semismooth Newton (SSN) method. Indeed, by exploring the special problem structure, the per-iteration cost of performing one SSN step can be significantly reduced in practice. We also prove that our algorithm can achieve a global convergence rate of $O(1/\\sqrt{k})$ and a local quadratic convergence rate under some standard regularity conditions. Finally, we demonstrate the effectiveness of our algorithm by conducing the experiments on both synthetic and real datasets.", "keywords": "kernal-based optimal transport estimation;nonsmooth equation model;specialized semi-smooth Newton method", "primary_area": "", "supplementary_material": "/attachment/92f25fb4a447609425779886dcb49191bef4c471.pdf", "author": "Tianyi Lin;marco cuturi;Michael Jordan", "authorids": "~Tianyi_Lin2;~marco_cuturi2;~Michael_Jordan1", "gender": "M;M;M", "homepage": "https://tydlin.github.io/;http://marcocuturi.net;http://www.cs.berkeley.edu/~jordan/", "dblp": "143/9377;85/5102;j/MichaelIJordan", "google_scholar": "juW6t-AAAAAJ;https://scholar.google.fr/citations?user=kQEydDMAAAAJ;https://scholar.google.com.tw/citations?user=yxUduqMAAAAJ", "orcid": "0000-0002-5323-1852;;0000-0001-8935-817X", "linkedin": ";;", "or_profile": "~Tianyi_Lin2;~marco_cuturi2;~Michael_Jordan1", "aff": "University of California, Berkeley;Ensae ParisTech;University of California, Berkeley", "aff_domain": "berkeley.edu;ensae.fr;berkeley.edu", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nlin2023a,\ntitle={A Specialized Semismooth Newton Method for Kernel-Based Optimal Transport},\nauthor={Tianyi Lin and marco cuturi and Michael Jordan},\nyear={2023},\nurl={https://openreview.net/forum?id=a4kspTMV9M}\n}", "github": "", "project": "", "reviewers": "jxBx;7do7;bQMP", "site": "https://openreview.net/forum?id=a4kspTMV9M", "pdf_size": 6112499, "rating": "3;6;7", "confidence": "3;3;4", "soundness": "3;2;3", "novelty": "2;3;3", "presentation": "2;2;3", "wc_summary": "90;107;96", "wc_strengths": "104;44;93", "wc_weaknesses": "109;747;76", "wc_questions": "103;60;63", "wc_limitations": "20;9;7", "wc_review": "426;967;335", "wc_reply_reviewers": "338;392;0", "wc_reply_authors": "1609;854;0", "reply_reviewers": "3;2;0", "reply_authors": "4;3;1", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 97.66666666666667, 7.039570693980959 ], "wc_strengths_avg": [ 80.33333333333333, 26.081070189358073 ], "wc_weaknesses_avg": [ 310.6666666666667, 308.82825144226825 ], "wc_questions_avg": [ 75.33333333333333, 19.601587237318874 ], "wc_limitations_avg": [ 12.0, 5.715476066494082 ], "wc_review_avg": [ 576.0, 278.9635579545591 ], "wc_reply_reviewers_avg": [ 243.33333333333334, 173.46917753755434 ], "wc_reply_authors_avg": [ 821.0, 657.2858333074482 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.247219128924647 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6933752452815364, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11479753157603941986&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Berkeley;ENSAE ParisTech", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.ensae.fr", "aff_unique_abbr": "UC Berkeley;Ensae", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;France" }, { "id": "a648X9AoL4", "title": "Large Language Model Guided Tree-of-Thought", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper, we introduce the Tree-of-Thought (ToT) framework, a novel approach aimed at improving the problem-solving capabilities of auto-regressive large language models (LLMs). The ToT technique is inspired by the human mind's approach for solving complex reasoning tasks through trial and error. In this process, the human mind explores the solution space through a tree-like thought process, allowing for backtracking when necessary. To implement ToT as a software system, we augment an LLM with additional modules including a prompter agent, a checker module, a memory module, and a ToT controller. In order to solve a given problem, these modules engage in a multi-round conversation with the LLM. The memory module records the conversation and state history of the problem solving process, which allows the system to backtrack to the previous steps of the thought-process and explore other directions from there. To verify the effectiveness of the proposed technique, we implement a ToT-based solver for the Sudoku Puzzle. Experimental results show that the ToT framework can significantly increase the success rate of Sudoku puzzle solving.", "keywords": "LLM;tree of thought;problem solving", "primary_area": "", "supplementary_material": "/attachment/7b8c61bde9ab961f945e0efaee43f73a0216d1a9.gz", "author": "Jieyi Long", "authorids": "~Jieyi_Long1", "gender": "M", "homepage": "", "dblp": "95/2892", "google_scholar": "https://scholar.google.com/citations?hl=en", "orcid": "", "linkedin": "jieyilong/", "or_profile": "~Jieyi_Long1", "aff": "Theta Labs, Inc.", "aff_domain": "thetalabs.org", "position": "Principal Researcher", "bibtex": "@misc{\nlong2023large,\ntitle={Large Language Model Guided Tree-of-Thought},\nauthor={Jieyi Long},\nyear={2023},\nurl={https://openreview.net/forum?id=a648X9AoL4}\n}", "github": "", "project": "", "reviewers": "1FWa;gGf6;Xo2U;gL3B;QeCS", "site": "https://openreview.net/forum?id=a648X9AoL4", "pdf_size": 343460, "rating": "2;3;3;4;4", "confidence": "4;5;4;4;4", "soundness": "1;3;2;1;2", "novelty": "2;1;2;2;3", "presentation": "2;3;2;2;2", "wc_summary": "84;82;35;126;84", "wc_strengths": "67;41;15;99;73", "wc_weaknesses": "576;387;598;209;199", "wc_questions": "61;17;26;191;65", "wc_limitations": "29;67;4;1;23", "wc_review": "817;594;678;626;444", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 3.2, 0.7483314773547882 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 1.8, 0.7483314773547883 ], "novelty_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 82.2, 28.81943788487208 ], "wc_strengths_avg": [ 59.0, 28.705400188814647 ], "wc_weaknesses_avg": [ 393.8, 171.48690912136703 ], "wc_questions_avg": [ 72.0, 62.40512799442046 ], "wc_limitations_avg": [ 24.8, 23.667699507979226 ], "wc_review_avg": [ 631.8, 120.96677229718911 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.13363062095621225, "gs_citation": 213, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10156605711608966059&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Theta Labs", "aff_unique_dep": "", "aff_unique_url": "https://www.thetalabs.org", "aff_unique_abbr": "Theta Labs", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Polynomial-Time Linear-Swap Regret Minimization in Imperfect-Information Sequential Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71207", "id": "aCOKUvqHtD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/59f6421e64707225fdf5b28840679a07-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aCOKUvqHtD", "openreview": "https://openreview.net/forum?id=aCOKUvqHtD", "poster": "/media/PosterPDFs/NeurIPS%202023/71207.png?t=1702134827.9949305", "slides": "https://nips.cc/virtual/2023/poster/71207", "video": "https://nips.cc/virtual/2023/poster/71207", "author_site": "Gabriele Farina, Charilaos Pipis", "tldr": "", "abstract": "No-regret learners seek to minimize the difference between the loss they cumulated through the actions they played, and the loss they would have cumulated in hindsight had they consistently modified their behavior according to some strategy transformation function. The size of the set of transformations considered by the learner determines a natural notion of rationality. As the set of transformations each learner considers grows, the strategies played by the learners recover more complex game-theoretic equilibria, including correlated \nequilibria in normal-form games and extensive-form correlated equilibria in extensive-form games. At the extreme, a no-swap-regret agent is one that minimizes regret against the set of all functions from the set of strategies to itself. While it is known that the no-swap-regret condition can be attained efficiently in nonsequential (normal-form) games, understanding what is the strongest notion of rationality that can be attained efficiently in the worst case in sequential (extensive-form) games is a longstanding open problem. In this paper we provide a positive result, by showing that it is possible, in any sequential game, to retain polynomial-time (in the game tree size) iterations while achieving sublinear regret with respect to all linear transformations of the mixed strategy space, a notion called no-linear-swap regret. This notion of hindsight rationality is as strong as no-swap-regret in nonsequential games, and stronger than no-trigger-regret in sequential games\u2014thereby proving the existence of a subset of extensive-form correlated equilibria robust to linear deviations, which we call linear-deviation correlated equilibria, that can be approached efficiently.", "keywords": "online learning;algorithmic game theory;extensive form games;correlated equilibrium;swap regret;linear swap regret", "primary_area": "", "supplementary_material": "", "author": "Gabriele Farina;Charilaos Pipis", "authorids": "~Gabriele_Farina1;~Charilaos_Pipis1", "gender": "M;", "homepage": "http://www.cs.cmu.edu/~gfarina/about/;", "dblp": ";", "google_scholar": "sktDNcEAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Gabriele_Farina1;~Charilaos_Pipis1", "aff": "FAIR, Meta AI;", "aff_domain": "meta.com;", "position": "Researcher;", "bibtex": "@inproceedings{\nfarina2023polynomialtime,\ntitle={Polynomial-Time Linear-Swap Regret Minimization in Imperfect-Information Sequential Games},\nauthor={Gabriele Farina and Charilaos Pipis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aCOKUvqHtD}\n}", "github": "", "project": "", "reviewers": "qFDY;vZdR;V8FX;Dcee", "pdf_size": 449820, "rating": "6;6;6;7", "confidence": "4;2;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "110;251;109;65", "wc_strengths": "31;95;37;57", "wc_weaknesses": "121;220;108;26", "wc_questions": "100;1;118;57", "wc_limitations": "1;1;20;4", "wc_review": "363;568;392;209", "wc_reply_reviewers": "136;16;13;20", "wc_reply_authors": "267;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 133.75, 70.09056641232114 ], "wc_strengths_avg": [ 55.0, 25.019992006393608 ], "wc_weaknesses_avg": [ 118.75, 68.87443284702967 ], "wc_questions_avg": [ 69.0, 45.0832563153994 ], "wc_limitations_avg": [ 6.5, 7.88986691902975 ], "wc_review_avg": [ 383.0, 127.45783616553358 ], "wc_reply_reviewers_avg": [ 46.25, 51.87665660005471 ], "wc_reply_authors_avg": [ 66.75, 115.61439140522256 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=728594839242917068&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "meta.com;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta AI", "aff_unique_url": "https://meta.ai", "aff_unique_abbr": "Meta AI", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Spectral Entry-wise Matrix Estimation for Low-Rank Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71206", "id": "aDLmRMb0K9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f334c3375bd3744e98a0ca8eaa2403b0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aDLmRMb0K9", "openreview": "https://openreview.net/forum?id=aDLmRMb0K9", "poster": "/media/PosterPDFs/NeurIPS%202023/71206.png?t=1701942953.4577115", "slides": "https://nips.cc/virtual/2023/poster/71206", "video": "https://nips.cc/virtual/2023/poster/71206", "author_site": "Stefan Stojanovic, Yassir Jedra, Yassir Jedra, Alexandre Proutiere", "tldr": "", "abstract": "We study matrix estimation problems arising in reinforcement learning with low-rank structure. In low-rank bandits, the matrix to be recovered specifies the expected arm rewards, and for low-rank Markov Decision Processes (MDPs), it characterizes the transition kernel of the MDP. In both cases, each entry of the matrix carries important information, and we seek estimation methods with low entry-wise prediction error. Importantly, these methods further need to accommodate for inherent correlations in the available data (e.g. for MDPs, the data consists of system trajectories). We investigate the performance of simple spectral-based matrix estimation approaches: we show that they efficiently recover the singular subspaces of the matrix and exhibit nearly-minimal entry-wise prediction error. These new results on low-rank matrix estimation make it possible to devise reinforcement learning algorithms that fully exploit the underlying low-rank structure. We provide two examples of such algorithms: a regret minimization algorithm for low-rank bandit problems, and a best policy identification algorithm for low-rank MDPs. Both algorithms yield state-of-the-art performance guarantees.", "keywords": "Low-rank matrix estimation; low rank bandits; low rank MDP; spectral methods", "primary_area": "", "supplementary_material": "/attachment/9e2a4be46699d9be73423ad7c4df9e9ed694c913.pdf", "author": "Stefan Stojanovic;Yassir Jedra;Alexandre Proutiere", "authorids": "~Stefan_Stojanovic1;~Yassir_Jedra1;~Alexandre_Proutiere1", "gender": "M;;M", "homepage": "https://www.kth.se/profile/stesto;https://sites.google.com/view/yassir-jedra/home?authuser=1;https://people.kth.se/~alepro/", "dblp": "315/5080;238/0358;p/AlexandreProutiere", "google_scholar": "jCkz9ykAAAAJ;tePNfWQAAAAJ;g5sya5cAAAAJ", "orcid": ";;", "linkedin": ";yassirjedra/;", "or_profile": "~Stefan_Stojanovic1;~Yassir_Jedra1;~Alexandre_Proutiere1", "aff": "KTH Royal Institute of Technology;KTH Royal Institute of Technology, Stockholm, Sweden;KTH Royal Institute of Technology, Stockholm, Sweden", "aff_domain": "kth.se;kth.se;kth.se", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nstojanovic2023spectral,\ntitle={Spectral Entry-wise Matrix Estimation for Low-Rank Reinforcement Learning},\nauthor={Stefan Stojanovic and Yassir Jedra and Alexandre Proutiere},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aDLmRMb0K9}\n}", "github": "", "project": "", "reviewers": "N4Ue;zbTi;JAik;j7uj", "pdf_size": 686316, "rating": "6;6;6;8", "confidence": "4;4;3;3", "soundness": "3;4;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "71;126;49;697", "wc_strengths": "67;30;82;215", "wc_weaknesses": "288;53;165;411", "wc_questions": "53;35;44;992", "wc_limitations": "5;10;9;85", "wc_review": "484;254;349;2400", "wc_reply_reviewers": "11;6;16;199", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 235.75, 267.77544230194076 ], "wc_strengths_avg": [ 98.5, 69.87309925858449 ], "wc_weaknesses_avg": [ 229.25, 133.8625694509111 ], "wc_questions_avg": [ 281.0, 410.5453689910532 ], "wc_limitations_avg": [ 27.25, 33.39442318711314 ], "wc_review_avg": [ 871.75, 886.1124011659017 ], "wc_reply_reviewers_avg": [ 58.0, 81.48312708775971 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3765851006322186497&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "kth.se;kth.se;kth.se", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "KTH Royal Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kth.se", "aff_unique_abbr": "KTH", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stockholm", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Sweden" }, { "title": "Optimal Unbiased Randomizers for Regression with Label Differential Privacy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71205", "id": "aG6xOP9QY7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bd5d436621dd3ee728b11c067d32d488-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aG6xOP9QY7", "openreview": "https://openreview.net/forum?id=aG6xOP9QY7", "poster": "/media/PosterPDFs/NeurIPS%202023/71205.png?t=1702159350.7059922", "slides": "https://nips.cc/virtual/2023/poster/71205", "video": "https://nips.cc/virtual/2023/poster/71205", "author_site": "Ashwinkumar Badanidiyuru Varadaraja, Badih Ghazi, Pritish Kamath, Ravi Kumar, Ethan Leeman, Pasin Manurangsi, Avinash V Varadarajan, Chiyuan Zhang", "tldr": "", "abstract": "We propose a new family of label randomizers for training _regression_ models under the constraint of label differential privacy (DP). In particular, we leverage the trade-offs between bias and variance to construct better label randomizers depending on a privately estimated prior distribution over the labels. We demonstrate that these randomizers achieve state-of-the-art privacy-utility trade-offs on several datasets, highlighting the importance of reducing bias when training neural networks with label DP. We also provide theoretical results shedding light on the structural properties of the optimal unbiased randomizers.", "keywords": "label differential privacy", "primary_area": "", "supplementary_material": "", "author": "Ashwinkumar Badanidiyuru;Badih Ghazi;Pritish Kamath;Ravi Kumar;Ethan Jacob Leeman;Pasin Manurangsi;Avinash V Varadarajan;Chiyuan Zhang", "authorids": "~Ashwinkumar_Badanidiyuru1;~Badih_Ghazi1;~Pritish_Kamath2;~Ravi_Kumar1;~Ethan_Jacob_Leeman1;~Pasin_Manurangsi2;~Avinash_V_Varadarajan1;~Chiyuan_Zhang1", "gender": "M;;M;M;M;M;;M", "homepage": "https://sites.google.com/site/ashwinkumarbv/home;https://sites.google.com/view/badihghazi/home;https://pritishkamath.github.io/;https://sites.google.com/site/ravik53/;;https://pasin30055.github.io/;;http://pluskid.org", "dblp": "15/2638;125/2134;https://dblp.org/pers/k/Kamath:Pritish.html;k/RaviKumar.html;140/7652.html;133/2059;77/3611;21/8315", "google_scholar": "HhValEMAAAAJ;GBJLTN8AAAAJ;1JFARhUAAAAJ;J_XhIsgAAAAJ;;35hM-PkAAAAJ;jTNsAXkAAAAJ;l_G2vr0AAAAJ", "orcid": ";;;0000-0002-2203-2586;;;;", "linkedin": "ashwinkumar-badanidiyuru-varadaraja-42b9832b/;badih-ghazi-608379132/;;ravi-kumar-a3a9631;https://www.linkedin.com/mwlite/in/ethan-leeman;;;", "or_profile": "~Ashwinkumar_Badanidiyuru1;~Badih_Ghazi1;~Pritish_Kamath2;~Ravi_Kumar1;~Ethan_Jacob_Leeman1;~Pasin_Manurangsi2;~Avinash_V_Varadarajan1;~Chiyuan_Zhang1", "aff": "Google;Google;Google Research;Google;Google;Google;Google;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "position": "Researcher;Researcher;Research Scientist;Research Scientist;Researcher;Research Scientist;Software Engineer;Research Scientist", "bibtex": "@inproceedings{\nbadanidiyuru2023optimal,\ntitle={Optimal Unbiased Randomizers for Regression with Label Differential Privacy},\nauthor={Ashwinkumar Badanidiyuru and Badih Ghazi and Pritish Kamath and Ravi Kumar and Ethan Jacob Leeman and Pasin Manurangsi and Avinash V Varadarajan and Chiyuan Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aG6xOP9QY7}\n}", "github": "", "project": "", "reviewers": "DWLv;oAvy;YHYY;Vhiv", "pdf_size": 1574898, "rating": "6;7;7;7", "confidence": "3;4;4;3", "soundness": "3;4;3;3", "novelty": "3;3;2;3", "presentation": "3;4;4;4", "wc_summary": "91;49;407;42", "wc_strengths": "62;82;145;41", "wc_weaknesses": "75;102;154;16", "wc_questions": "53;4;73;148", "wc_limitations": "18;1;33;1", "wc_review": "299;238;812;248", "wc_reply_reviewers": "0;16;86;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 147.25, 151.13301260809962 ], "wc_strengths_avg": [ 82.5, 38.88765871070152 ], "wc_weaknesses_avg": [ 86.75, 49.746231013012434 ], "wc_questions_avg": [ 69.5, 51.80974811751163 ], "wc_limitations_avg": [ 13.25, 13.348689074212494 ], "wc_review_avg": [ 399.25, 239.42156857726917 ], "wc_reply_reviewers_avg": [ 25.5, 35.53519382246282 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2115661228573428021&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Enhancing Adaptive History Reserving by Spiking Convolutional Block Attention Module in Recurrent Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71204", "id": "aGZp61S9Lj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b8734840bf65c8facd619f5105c6acd0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aGZp61S9Lj", "openreview": "https://openreview.net/forum?id=aGZp61S9Lj", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71204", "video": "https://nips.cc/virtual/2023/poster/71204", "author_site": "Qi Xu, Yuyuan Gao, Jiangrong Shen, Yaxin Li, Xuming Ran, Huajin Tang, Gang Pan", "tldr": "", "abstract": "Spiking neural networks (SNNs) serve as one type of efficient model to process spatio-temporal patterns in time series, such as the Address-Event Representation data collected from Dynamic Vision Sensor (DVS). Although convolutional SNNs have achieved remarkable performance on these AER datasets, benefiting from the predominant spatial feature extraction ability of convolutional structure, they ignore temporal features related to sequential time points. In this paper, we develop a recurrent spiking neural network (RSNN) model embedded with an advanced spiking convolutional block attention module (SCBAM) component to combine both spatial and temporal features of spatio-temporal patterns. It invokes the history information in spatial and temporal channels adaptively through SCBAM, which brings the advantages of efficient memory calling and history redundancy elimination. The performance of our model was evaluated in DVS128-Gesture dataset and other time-series datasets. The experimental results show that the proposed SRNN-SCBAM model makes better use of the history information in spatial and temporal dimensions with less memory space, and achieves higher accuracy compared to other models.", "keywords": "Spiking neural networks (SNNs);Recurrent spiking neural network (RSNN);Dynamic Vision Sensor (DVS);Spiking convolutional block attention module (SCBAM)", "primary_area": "", "supplementary_material": "", "author": "Qi Xu;Yuyuan Gao;Jiangrong Shen;Yaxin Li;Xuming Ran;Huajin Tang;Gang Pan", "authorids": "~Qi_Xu1;~Yuyuan_Gao1;~Jiangrong_Shen1;~Yaxin_Li4;~Xuming_Ran2;~Huajin_Tang1;~Gang_Pan1", "gender": "M;M;F;;;M;", "homepage": "https://www.researchgate.net/profile/Qi_Xu43;https://github.com/lulinglanfeng;;;;https://person.zju.edu.cn/htang;", "dblp": ";;208/3564;143/0251-3;;18/434;", "google_scholar": "dGEcAuYAAAAJ;;3XK6COkAAAAJ;;;U041O4QAAAAJ;", "orcid": "0000-0001-9245-5544;;;0000-0003-0160-8950;;;", "linkedin": ";;;;;;", "or_profile": "~Qi_Xu1;~Yuyuan_Gao1;~Jiangrong_Shen1;~Yaxin_Li4;~Xuming_Ran2;~Huajin_Tang1;~Gang_Pan1", "aff": "School of Computer Science and Technology;Dalian University of Technology;;Dalian University of Technology;;Zhejiang University;", "aff_domain": "dlut.edu.cn;dlut.edu.cn;;dlut.edu.cn;;zju.edu.cn;", "position": "Associate Professor;MS student;;MS student;;Full Professor;", "bibtex": "@inproceedings{\nxu2023enhancing,\ntitle={Enhancing Adaptive History Reserving by Spiking Convolutional Block Attention Module in Recurrent Neural Networks},\nauthor={Qi Xu and Yuyuan Gao and Jiangrong Shen and Yaxin Li and Xuming Ran and Huajin Tang and Gang Pan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aGZp61S9Lj}\n}", "github": "", "project": "", "reviewers": "KuzU;NNKj;TiEP;9YjJ", "pdf_size": 2090254, "rating": "3;7;7;7", "confidence": "5;5;5;5", "soundness": "2;3;3;4", "novelty": "1;3;3;4", "presentation": "1;4;3;3", "wc_summary": "45;79;79;86", "wc_strengths": "1;92;32;44", "wc_weaknesses": "184;53;29;41", "wc_questions": "5;72;86;49", "wc_limitations": "72;8;30;12", "wc_review": "307;304;256;232", "wc_reply_reviewers": "0;13;12;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.7320508075688772 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 72.25, 15.990231392947383 ], "wc_strengths_avg": [ 42.25, 32.72900090134131 ], "wc_weaknesses_avg": [ 76.75, 62.49949999799998 ], "wc_questions_avg": [ 53.0, 30.700162866017504 ], "wc_limitations_avg": [ 30.5, 25.352514668174436 ], "wc_review_avg": [ 274.75, 31.91688424642982 ], "wc_reply_reviewers_avg": [ 6.25, 6.2599920127744575 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14061384144497329880&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "dlut.edu.cn;dlut.edu.cn;;dlut.edu.cn;;zju.edu.cn;", "author_num": 7, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "School of Computer Science and Technology;Dalian University of Technology;Zhejiang University", "aff_unique_dep": "Computer Science and Technology;;", "aff_unique_url": ";http://www.dlut.edu.cn/;https://www.zju.edu.cn", "aff_unique_abbr": ";DUT;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1", "aff_country_unique": ";China" }, { "title": "CS4ML: A general framework for active learning with arbitrary data based on Christoffel functions", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71203", "id": "aINqoP32cb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3f8c7eb848ffec848f3ed2b7ca44915d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aINqoP32cb", "openreview": "https://openreview.net/forum?id=aINqoP32cb", "poster": "/media/PosterPDFs/NeurIPS%202023/71203.png?t=1701726405.4275866", "slides": "https://nips.cc/virtual/2023/poster/71203", "video": "https://nips.cc/virtual/2023/poster/71203", "author_site": "Juan M. Cardenas, Ben Adcock, Nick Dexter", "tldr": "", "abstract": "We introduce a general framework for active learning in regression problems. Our framework extends the standard setup by allowing for general types of data, rather than merely pointwise samples of the target function. This generalization covers many cases of practical interest, such as data acquired in transform domains (e.g., Fourier data), vector-valued data (e.g., gradient-augmented data), data acquired along continuous curves, and, multimodal data (i.e., combinations of different types of measurements). Our framework considers random sampling according to a finite number of sampling measures and arbitrary nonlinear approximation spaces (model classes). We introduce the concept of \\textit{generalized Christoffel functions} and show how these can be used to optimize the sampling measures. We prove that this leads to near-optimal sample complexity in various important cases. This paper focuses on applications in scientific computing, where active learning is often desirable, since it is usually expensive to generate data. We demonstrate the efficacy of our framework for gradient-augmented learning with polynomials, Magnetic Resonance Imaging (MRI) using generative models and adaptive sampling for solving PDEs using Physics-Informed Neural Networks (PINNs).", "keywords": "active learning;regression;arbitrary data;leverage scores;Christoffel functions;generative models;Magnetic Resonance Imaging (MRI);Physics-Informed Neural Networks (PINNs)", "primary_area": "", "supplementary_material": "/attachment/d958cae3f28bf641f27b69856a034adb8a7f0817.zip", "author": "Juan M Cardenas;Ben Adcock;Nick Dexter", "authorids": "~Juan_M_Cardenas1;ben_adcock@sfu.ca;nick.dexter@fsu.edu", "gender": "M;;", "homepage": "https://sites.google.com/view/jmcardenas;;", "dblp": "246/5072;;", "google_scholar": "https://scholar.google.ca/citations?user=ZMTUqvoAAAAJ;;", "orcid": ";;", "linkedin": "juan-m-cardenas-22b8a9205/;;", "or_profile": "~Juan_M_Cardenas1;ben_adcock@sfu.ca;nick.dexter@fsu.edu", "aff": "Simon Fraser University;;", "aff_domain": "sfu.ca;;", "position": "PhD student;;", "bibtex": "@inproceedings{\ncardenas2023csml,\ntitle={{CS}4{ML}: A general framework for active learning with arbitrary data based on Christoffel functions},\nauthor={Juan M Cardenas and Ben Adcock and Nick Dexter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aINqoP32cb}\n}", "github": "", "project": "", "reviewers": "7Xvg;tt34;9pYz;n7M7", "pdf_size": 16498398, "rating": "3;7;7;8", "confidence": "2;3;1;3", "soundness": "1;3;4;3", "novelty": "1;3;4;4", "presentation": "1;2;4;3", "wc_summary": "41;126;37;338", "wc_strengths": "6;58;56;367", "wc_weaknesses": "63;126;51;389", "wc_questions": "154;534;57;644", "wc_limitations": "1;53;6;1", "wc_review": "265;897;207;1739", "wc_reply_reviewers": "0;34;36;111", "wc_reply_authors": "0;6;6;20", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.25, 1.920286436967152 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 3.0, 1.224744871391589 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 135.5, 122.19758590086795 ], "wc_strengths_avg": [ 121.75, 143.11948679337834 ], "wc_weaknesses_avg": [ 157.25, 136.7998081139005 ], "wc_questions_avg": [ 347.25, 247.24823052956313 ], "wc_limitations_avg": [ 15.25, 21.890351755967743 ], "wc_review_avg": [ 777.0, 617.8365479639417 ], "wc_reply_reviewers_avg": [ 45.25, 40.5670740872447 ], "wc_reply_authors_avg": [ 8.0, 7.3484692283495345 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.2747740351926008, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14978310653203035912&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 5, "email": "sfu.ca;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Simon Fraser University", "aff_unique_dep": "", "aff_unique_url": "https://www.sfu.ca", "aff_unique_abbr": "SFU", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "Feature Adaptation for Sparse Linear Regression", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71202", "id": "aIUnoHuENG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5f999632c48f87cffb214e575581e4a9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aIUnoHuENG", "openreview": "https://openreview.net/forum?id=aIUnoHuENG", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71202", "video": "https://nips.cc/virtual/2023/poster/71202", "author_site": "Jonathan Kelner, Frederic Koehler, Raghu Meka, Dhruv Rohatgi", "tldr": "", "abstract": "Sparse linear regression is a central problem in high-dimensional statistics. We study the correlated random design setting, where the covariates are drawn from a multivariate Gaussian $N(0,\\Sigma)$, and we seek an estimator with small excess risk. \n\nIf the true signal is $t$-sparse, information-theoretically, it is possible to achieve strong recovery guarantees with only $O(t\\log n)$ samples. However, computationally efficient algorithms have sample complexity linear in (some variant of) the *condition number* of $\\Sigma$. Classical algorithms such as the Lasso can require significantly more samples than necessary even if there is only a single sparse approximate dependency among the covariates.\n\nWe provide a polynomial-time algorithm that, given $\\Sigma$, automatically adapts the Lasso to tolerate a small number of approximate dependencies. In particular, we achieve near-optimal sample complexity for constant sparsity and if $\\Sigma$ has few ``outlier'' eigenvalues.\nOur algorithm fits into a broader framework of *feature adaptation* for sparse linear regression with ill-conditioned covariates. With this framework, we additionally provide the first polynomial-factor improvement over brute-force search for constant sparsity $t$ and arbitrary covariance $\\Sigma$.", "keywords": "theory;sparse linear regression;feature adaptation;lasso", "primary_area": "", "supplementary_material": "/attachment/8a9583dfbb0155ca684440f3162b0e6ecdcbd423.zip", "author": "Jonathan Kelner;Frederic Koehler;Raghu Meka;Dhruv Rohatgi", "authorids": "~Jonathan_Kelner1;~Frederic_Koehler1;~Raghu_Meka1;~Dhruv_Rohatgi1", "gender": "M;;M;M", "homepage": "https://math.mit.edu/~kelner/;https://frkoehle.github.io/;http://raghumeka.org;http://www.mit.edu/~drohatgi/", "dblp": "64/4772.html;132/1904;76/1906;223/4465", "google_scholar": ";;xuDZ9-sAAAAJ;NUd_d6UAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Jonathan_Kelner1;~Frederic_Koehler1;~Raghu_Meka1;~Dhruv_Rohatgi1", "aff": "Massachusetts Institute of Technology;Stanford University;University of California, Los Angeles;Massachusetts Institute of Technology", "aff_domain": "mit.edu;stanford.edu;ucla.edu;mit.edu", "position": "Full Professor;Postdoc;Associate Professor;PhD student", "bibtex": "@inproceedings{\nkelner2023feature,\ntitle={Feature Adaptation for Sparse Linear Regression},\nauthor={Jonathan Kelner and Frederic Koehler and Raghu Meka and Dhruv Rohatgi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aIUnoHuENG}\n}", "github": "", "project": "", "reviewers": "EHBK;8yrN;Ahsd;BTho", "pdf_size": 666700, "rating": "5;7;8;8", "confidence": "4;3;4;3", "soundness": "3;4;4;3", "novelty": "2;4;4;4", "presentation": "1;3;4;4", "wc_summary": "61;82;253;56", "wc_strengths": "59;51;91;68", "wc_weaknesses": "129;45;157;80", "wc_questions": "2;80;1;147", "wc_limitations": "1;1;8;20", "wc_review": "252;259;510;371", "wc_reply_reviewers": "5;5;0;25", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 113.0, 81.41560047067146 ], "wc_strengths_avg": [ 67.25, 14.972892172189045 ], "wc_weaknesses_avg": [ 102.75, 43.25722483007896 ], "wc_questions_avg": [ 57.5, 60.80501624043858 ], "wc_limitations_avg": [ 7.5, 7.762087348130012 ], "wc_review_avg": [ 348.0, 104.77356536836952 ], "wc_reply_reviewers_avg": [ 8.75, 9.60143218483576 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5688061605574254213&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "mit.edu;stanford.edu;ucla.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Massachusetts Institute of Technology;Stanford University;University of California, Los Angeles", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.stanford.edu;https://www.ucla.edu", "aff_unique_abbr": "MIT;Stanford;UCLA", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning to Modulate pre-trained Models in RL", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71201", "id": "aIpGtPwXny", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/77e59fafe99e94f822e79bf9308ec377-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aIpGtPwXny", "openreview": "https://openreview.net/forum?id=aIpGtPwXny", "poster": "/media/PosterPDFs/NeurIPS%202023/71201.png?t=1701720842.800453", "slides": "https://nips.cc/virtual/2023/poster/71201", "video": "https://nips.cc/virtual/2023/poster/71201", "author_site": "Thomas Schmied, Markus Hofmarcher, Fabian Paischer, Razvan Pascanu, Sepp Hochreiter", "tldr": "", "abstract": "Reinforcement Learning (RL) has been successful in various domains like robotics, game playing, and simulation. While RL agents have shown impressive capabilities in their specific tasks, they insufficiently adapt to new tasks. In supervised learning, this adaptation problem is addressed by large-scale pre-training followed by fine-tuning to new down-stream tasks. Recently, pre-training on multiple tasks has been gaining traction in RL. However, fine-tuning a pre-trained model often suffers from catastrophic forgetting. That is, the performance on the pre-training tasks deteriorates when fine-tuning on new tasks. To investigate the catastrophic forgetting phenomenon, we first jointly pre-train a model on datasets from two benchmark suites, namely Meta-World and DMControl. Then, we evaluate and compare a variety of fine-tuning methods prevalent in natural language processing, both in terms of performance on new tasks, and how well performance on pre-training tasks is retained. Our study shows that with most fine-tuning approaches, the performance on pre-training tasks deteriorates significantly. Therefore, we propose a novel method, Learning-to-Modulate (L2M), that avoids the degradation of learned skills by modulating the information flow of the frozen pre-trained model via a learnable modulation pool. Our method achieves state-of-the-art performance on the Continual-World benchmark, while retaining performance on the pre-training tasks. Finally, to aid future research in this area, we release a dataset encompassing 50 Meta-World and 16 DMControl tasks.", "keywords": "Reinforcement Learning;Transformer;Decision Transformer;Multi-task learning;Continual learning;NLP;Fine-tuning;Prompt Tuning;Parameter efficient Fine-tuning", "primary_area": "", "supplementary_material": "", "author": "Thomas Schmied;Markus Hofmarcher;Fabian Paischer;Razvan Pascanu;Sepp Hochreiter", "authorids": "~Thomas_Schmied1;~Markus_Hofmarcher1;~Fabian_Paischer1;~Razvan_Pascanu1;~Sepp_Hochreiter1", "gender": "Not Specified;M;M;M;M", "homepage": "https://www.jku.at/en/institute-for-machine-learning/;;;https://razp.info;https://www.jku.at/en/institute-for-machine-learning/about-us/team/sepp-hochreiter/", "dblp": "278/8207;224/9960;309/5971;65/8368.html;h/SeppHochreiter.html", "google_scholar": ";FD27EMIAAAAJ;zdm5ZKwAAAAJ;https://scholar.google.ca/citations?user=eSPY8LwAAAAJ;https://scholar.google.at/citations?user=tvUH3WMAAAAJ", "orcid": ";;;;0000-0001-7449-2528", "linkedin": ";;;;https://linkedin.com/in/sepp-hochreiter-41514846", "or_profile": "~Thomas_Schmied1;~Markus_Hofmarcher1;~Fabian_Paischer1;~Razvan_Pascanu1;~Sepp_Hochreiter1", "aff": "Johannes Kepler Universit\u00e4t Linz;Johannes Kepler Universit\u00e4t Linz;University College London, University of London;Google DeepMind;Johannes Kepler University Linz", "aff_domain": "jku.at;jku.at;ucl.ac.uk;google.com;jku.at", "position": "PhD student;PhD student;Researcher;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nschmied2023learning,\ntitle={Learning to Modulate pre-trained Models in {RL}},\nauthor={Thomas Schmied and Markus Hofmarcher and Fabian Paischer and Razvan Pascanu and Sepp Hochreiter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aIpGtPwXny}\n}", "github": "", "project": "", "reviewers": "yPGU;PZAn;tSZN;aphp;Mf3J", "pdf_size": 9351066, "rating": "5;5;6;7;7", "confidence": "4;3;3;3;3", "soundness": "3;3;3;3;3", "novelty": "4;2;2;3;3", "presentation": "2;3;2;3;2", "wc_summary": "58;59;56;153;38", "wc_strengths": "37;65;34;96;41", "wc_weaknesses": "61;140;145;314;59", "wc_questions": "405;52;21;104;1", "wc_limitations": "22;5;7;16;1", "wc_review": "583;321;263;683;140", "wc_reply_reviewers": "69;53;29;51;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 72.8, 40.82842147328255 ], "wc_strengths_avg": [ 54.6, 23.414525406251563 ], "wc_weaknesses_avg": [ 143.8, 92.76939150387912 ], "wc_questions_avg": [ 116.6, 148.3301722509618 ], "wc_limitations_avg": [ 10.2, 7.678541528180986 ], "wc_review_avg": [ 398.0, 203.06058209312806 ], "wc_reply_reviewers_avg": [ 40.4, 23.87969849055888 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5590169943749476, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11739421617108772736&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 7, "email": "jku.at;jku.at;ucl.ac.uk;google.com;jku.at", "author_num": 5, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Johannes Kepler University Linz;University College London;Google;Johannes Kepler University", "aff_unique_dep": ";;Google DeepMind;", "aff_unique_url": "https://www.jku.at;https://www.ucl.ac.uk;https://deepmind.com;https://www.jku.at", "aff_unique_abbr": "JKU;UCL;DeepMind;JKU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Linz;", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "Austria;United Kingdom" }, { "title": "Multimodal Clinical Benchmark for Emergency Care (MC-BEC): A Comprehensive Benchmark for Evaluating Foundation Models in Emergency Medicine", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73542", "id": "aKnWIrDPiR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8f61049e8fe5b9ed714860b951066f1e-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=aKnWIrDPiR", "openreview": "https://openreview.net/forum?id=aKnWIrDPiR", "poster": "/media/PosterPDFs/NeurIPS%202023/73542.png?t=1701718516.0019727", "slides": "https://nips.cc/virtual/2023/poster/73542", "video": "https://nips.cc/virtual/2023/poster/73542", "author_site": "Emma Chen, Aman Kansal, Julie Chen, Boyang Tom Jin, Julia Reisler, David Kim, Pranav Rajpurkar", "tldr": "", "abstract": "We propose the Multimodal Clinical Benchmark for Emergency Care (MC-BEC), a comprehensive benchmark for evaluating foundation models in Emergency Medicine using a dataset of 100K+ continuously monitored Emergency Department visits from 2020-2022. MC-BEC focuses on clinically relevant prediction tasks at timescales from minutes to days, including predicting patient decompensation, disposition, and emergency department (ED) revisit, and includes a standardized evaluation framework with train-test splits and evaluation metrics. The multimodal dataset includes a wide range of detailed clinical data, including triage information, prior diagnoses and medications, continuously measured vital signs, electrocardiogram and photoplethysmograph waveforms, orders placed and medications administered throughout the visit, free-text reports of imaging studies, and information on ED diagnosis, disposition, and subsequent revisits. We provide performance baselines for each prediction task to enable the evaluation of multimodal, multitask models. We believe that MC-BEC will encourage researchers to develop more effective, generalizable, and accessible foundation models for multimodal clinical data.", "keywords": "Benchmark;Multimodal Clinical Data;Medical AI;Deep Learning;Foundation model;Electronic health records (EHR)", "primary_area": "", "supplementary_material": "", "author": "Emma Chen;Aman Kansal;Julie Chen;Boyang Tom Jin;Julia Rachel Reisler;David A Kim;Pranav Rajpurkar", "authorids": "~Emma_Chen1;~Aman_Kansal1;~Julie_Chen2;~Boyang_Tom_Jin1;~Julia_Rachel_Reisler1;~David_A_Kim1;~Pranav_Rajpurkar1", "gender": ";M;;;F;M;", "homepage": ";https://www.linkedin.com/in/kansalaman/;;;;https://profiles.stanford.edu/157679;", "dblp": ";;;;;;", "google_scholar": ";;;;;;", "orcid": "0000-0002-7528-5229;;0000-0002-7313-2501;0000-0003-4810-5816;0000-0001-5378-0635;;", "linkedin": ";;;;julia-reisler-b48471149;;", "or_profile": "~Emma_Chen1;~Aman_Kansal1;~Julie_Chen2;~Boyang_Tom_Jin1;~Julia_Rachel_Reisler1;~David_A_Kim1;~Pranav_Rajpurkar1", "aff": "Harvard University, Harvard University;Stanford University;Computer Science Department, Stanford University;Stanford University;Computer Science Department, Stanford University;Stanford University;", "aff_domain": "g.harvard.edu;stanford.edu;cs.stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu;", "position": "PhD student;MS student;Undergrad student;MS student;MS student;Assistant Professor;", "bibtex": "@inproceedings{\nchen2023multimodal,\ntitle={Multimodal Clinical Benchmark for Emergency Care ({MC}-{BEC}): A Comprehensive Benchmark for Evaluating Foundation Models in Emergency Medicine},\nauthor={Emma Chen and Aman Kansal and Julie Chen and Boyang Tom Jin and Julia Rachel Reisler and David A Kim and Pranav Rajpurkar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=aKnWIrDPiR}\n}", "github": "", "project": "", "reviewers": "RbWs;2TbH;XBLz;PuQz", "pdf_size": 734199, "rating": "6;6;7;8", "confidence": "5;4;5;4", "wc_summary_and_contributions": "62;161;52;26", "wc_strengths": "114;88;9;67", "wc_improvement": "113;263;93;107", "wc_limitations": "113;38;25;104", "wc_correctness": "3;29;1;148", "wc_clarity": "118;17;1;1", "wc_relation_to_prior_work": "6;25;1;16", "wc_documentation": "5;6;14;30", "wc_additional_feedback": "1;1;1;1", "wc_review": "535;628;197;500", "wc_reply_reviewers": "184;21;0;0", "wc_reply_authors": "589;955;419;248", "reply_reviewers": "1;1;0;0", "reply_authors": "1;3;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 75.25, 51.22194353985409 ], "wc_strengths_avg": [ 69.5, 38.69431482789171 ], "wc_improvement_avg": [ 144.0, 69.0869017976635 ], "wc_limitations_avg": [ 70.0, 38.903727327853815 ], "wc_correctness_avg": [ 45.25, 60.342253023896944 ], "wc_clarity_avg": [ 34.25, 48.79228934985527 ], "wc_relation_to_prior_work_avg": [ 12.0, 9.246621004453464 ], "wc_documentation_avg": [ 13.75, 10.0093706095838 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 465.0, 161.64621863811104 ], "wc_reply_reviewers_avg": [ 51.25, 77.12125193485905 ], "wc_reply_authors_avg": [ 552.75, 261.66808651419456 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3213705669840651260&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "g.harvard.edu;stanford.edu;cs.stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu;", "author_num": 7, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Harvard University;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.stanford.edu", "aff_unique_abbr": "Harvard;Stanford", "aff_campus_unique_index": "1;1;1;1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Inference-Time Intervention: Eliciting Truthful Answers from a Language Model", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71200", "id": "aLLuYpn83y", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/81b8390039b7302c909cb769f8b6cd93-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aLLuYpn83y", "openreview": "https://openreview.net/forum?id=aLLuYpn83y", "poster": "/media/PosterPDFs/NeurIPS%202023/71200.png?t=1697385407.9450572", "slides": "https://nips.cc/virtual/2023/poster/71200", "video": "https://nips.cc/virtual/2023/poster/71200", "author_site": "Kenneth Li, Oam Patel, Fernanda Vi\u00e9gas, Hanspeter Pfister, Martin Wattenberg", "tldr": "", "abstract": "We introduce Inference-Time Intervention (ITI), a technique designed to enhance the \"truthfulness\" of large language models (LLMs). ITI operates by shifting model activations during inference, following a learned set of directions across a limited number of attention heads. This intervention significantly improves the performance of LLaMA models on the TruthfulQA benchmark. On an instruction-finetuned LLaMA called Alpaca, ITI improves its truthfulness from $32.5\\%$ to $65.1\\%$. We identify a tradeoff between truthfulness and helpfulness and demonstrate how to balance it by tuning the intervention strength. ITI is minimally invasive and computationally inexpensive. Moreover, the technique is data efficient: while approaches like RLHF require extensive annotations, ITI locates truthful directions using only few hundred examples. Our findings suggest that LLMs may have an internal representation of the likelihood of something being true, even as they produce falsehoods on the surface.", "keywords": "Large Language Model;AI Safety", "primary_area": "", "supplementary_material": "/attachment/083a1de25d25e7d19a4c3eff1ea4c2291a1d924a.pdf", "author": "Kenneth Li;Oam Patel;Fernanda Vi\u00e9gas;Hanspeter Pfister;Martin Wattenberg", "authorids": "~Kenneth_Li1;~Oam_Patel1;~Fernanda_Vi\u00e9gas1;~Hanspeter_Pfister1;~Martin_Wattenberg1", "gender": ";M;;M;M", "homepage": "https://likenneth.github.io/;http://github.com/0amp;;https://vcg.seas.harvard.edu;http://www.bewitched.com", "dblp": "75/6627-12;;;p/HanspeterPfister;w/MartinWattenberg", "google_scholar": "v0GItgwAAAAJ;;;tvBEoaMAAAAJ;pv54dqMAAAAJ", "orcid": ";;;0000-0002-3620-2582;", "linkedin": ";oam-patel;;hpfister/;", "or_profile": "~Kenneth_Li1;~Oam_Patel1;~Fernanda_Vi\u00e9gas1;~Hanspeter_Pfister1;~Martin_Wattenberg1", "aff": "Harvard University;Harvard University;;Harvard University;Google", "aff_domain": "harvard.edu;harvard.edu;;harvard.edu;google.com", "position": "PhD student;Undergrad student;;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nli2023inferencetime,\ntitle={Inference-Time Intervention: Eliciting Truthful Answers from a Language Model},\nauthor={Kenneth Li and Oam Patel and Fernanda Vi{\\'e}gas and Hanspeter Pfister and Martin Wattenberg},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aLLuYpn83y}\n}", "github": "", "project": "", "reviewers": "4t4s;Zn8f;gjfu;wYs3", "pdf_size": 1678945, "rating": "6;7;7;8", "confidence": "4;4;4;4", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "2;2;3;4", "wc_summary": "95;194;82;75", "wc_strengths": "94;20;57;61", "wc_weaknesses": "264;4;1;656", "wc_questions": "199;297;78;1", "wc_limitations": "2;12;1;5", "wc_review": "654;527;219;798", "wc_reply_reviewers": "179;14;11;137", "wc_reply_authors": "229;0;0;502", "reply_reviewers": "1;1;1;3", "reply_authors": "2;1;1;4", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 111.5, 48.16897341650536 ], "wc_strengths_avg": [ 58.0, 26.22022120425379 ], "wc_weaknesses_avg": [ 231.25, 267.4615626590109 ], "wc_questions_avg": [ 143.75, 113.17988999817945 ], "wc_limitations_avg": [ 5.0, 4.301162633521313 ], "wc_review_avg": [ 549.5, 213.54683327083077 ], "wc_reply_reviewers_avg": [ 85.25, 74.25757537113638 ], "wc_reply_authors_avg": [ 182.75, 206.6728997715956 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 440, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8900411386972860415&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "harvard.edu;harvard.edu;;harvard.edu;google.com", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Harvard University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.harvard.edu;https://www.google.com", "aff_unique_abbr": "Harvard;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "FourierHandFlow: Neural 4D Hand Representation Using Fourier Query Flow", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71199", "id": "aMTiwdK3y8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5d4834a159f1547b267a05a4e2b7cf5e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aMTiwdK3y8", "openreview": "https://openreview.net/forum?id=aMTiwdK3y8", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71199", "video": "https://nips.cc/virtual/2023/poster/71199", "author_site": "Jihyun Lee, Junbong Jang, Donghwan Kim, Minhyuk Sung, Tae-Kyun Kim", "tldr": "", "abstract": "Recent 4D shape representations model continuous temporal evolution of implicit shapes by (1) learning query flows without leveraging shape and articulation priors or (2) decoding shape occupancies separately for each time value. Thus, they do not effectively capture implicit correspondences between articulated shapes or regularize jittery temporal deformations. In this work, we present FourierHandFlow, which is a spatio-temporally continuous representation for human hands that combines a 3D occupancy field with articulation-aware query flows represented as Fourier series. Given an input RGB sequence, we aim to learn a fixed number of Fourier coefficients for each query flow to guarantee smooth and continuous temporal shape dynamics. To effectively model spatio-temporal deformations of articulated hands, we compose our 4D representation based on two types of Fourier query flow: (1) pose flow that models query dynamics influenced by hand articulation changes via implicit linear blend skinning and (2) shape flow that models query-wise displacement flow. In the experiments, our method achieves state-of-the-art results on video-based 4D reconstruction while being computationally more efficient than the existing 3D/4D implicit shape representations. We additionally show our results on motion inter- and extrapolation and texture transfer using the learned correspondences of implicit shapes. To the best of our knowledge, FourierHandFlow is the first neural 4D continuous hand representation learned from RGB videos. The code will be publicly accessible.", "keywords": "4D representation;hand reconstruction;implicit representation", "primary_area": "", "supplementary_material": "/attachment/5fb694dd1af2b27dde48c005b5fbe39fe38a5e42.pdf", "author": "Jihyun Lee;Junbong Jang;Donghwan Kim;Minhyuk Sung;Tae-Kyun Kim", "authorids": "~Jihyun_Lee3;~Junbong_Jang1;~Donghwan_Kim6;~Minhyuk_Sung1;~Tae-Kyun_Kim2", "gender": "F;M;M;M;M", "homepage": "https://jyunlee.github.io/;https://github.com/JunbongJang;https://donghwankim0101.github.io/;https://mhsung.github.io/;https://sites.google.com/view/tkkim/", "dblp": ";https://dblp.org/rec/journals/corr/abs-2303-08364;;171/6792;28/787", "google_scholar": "UaMiOq8AAAAJ;Ndu8xqMAAAAJ;https://scholar.google.com/citations?hl=en;PcIYMp4AAAAJ;https://scholar.google.co.uk/citations?user=j2WcLecAAAAJ", "orcid": ";0000-0001-9317-6520;0009-0000-4390-725X;;", "linkedin": "jihyun-lee-b51861238/;junbong-jang-86b738137/;donghwan-kim-1060a81a5/;mhsung;", "or_profile": "~Jihyun_Lee3;~Junbong_Jang1;~Donghwan_Kim6;~Minhyuk_Sung1;~Tae-kyun_Kim1", "aff": "KAIST;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;MS student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nlee2023fourierhandflow,\ntitle={FourierHandFlow: Neural 4D Hand Representation Using Fourier Query Flow},\nauthor={Jihyun Lee and Junbong Jang and Donghwan Kim and Minhyuk Sung and Tae-Kyun Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aMTiwdK3y8}\n}", "github": "", "project": "", "reviewers": "s3gm;g8GH;vgcD;Uqr4;rxcm", "pdf_size": 5789525, "rating": "3;5;6;6;7", "confidence": "3;4;5;3;5", "soundness": "2;2;3;3;4", "novelty": "2;3;3;3;4", "presentation": "1;3;4;3;4", "wc_summary": "101;80;119;78;81", "wc_strengths": "111;54;55;27;37", "wc_weaknesses": "300;150;43;221;67", "wc_questions": "71;189;138;2;45", "wc_limitations": "7;22;22;1;1", "wc_review": "590;495;377;329;231", "wc_reply_reviewers": "162;0;112;182;10", "wc_reply_authors": "450;0;177;313;0", "reply_reviewers": "2;0;1;2;1", "reply_authors": "3;1;2;2;1", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 1.0954451150103321 ], "wc_summary_avg": [ 91.8, 15.942396306703706 ], "wc_strengths_avg": [ 56.8, 29.081953166869656 ], "wc_weaknesses_avg": [ 156.2, 95.58953917662747 ], "wc_questions_avg": [ 89.0, 66.6783323126786 ], "wc_limitations_avg": [ 10.6, 9.562426470305535 ], "wc_review_avg": [ 404.4, 125.82463987629768 ], "wc_reply_reviewers_avg": [ 93.2, 75.60529082015358 ], "wc_reply_authors_avg": [ 188.0, 176.11246406770874 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6593804733957871, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11777728430333143363&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "The emergence of clusters in self-attention dynamics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71198", "id": "aMjaEkkXJx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b2b3e1d9840eba17ad9bbf073e009afe-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aMjaEkkXJx", "openreview": "https://openreview.net/forum?id=aMjaEkkXJx", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71198", "video": "https://nips.cc/virtual/2023/poster/71198", "author_site": "Borjan Geshkovski, Cyril Letrouit, Yury Polyanskiy, Philippe Rigollet", "tldr": "", "abstract": "Viewing Transformers as interacting particle systems, we describe the geometry of learned representations when the weights are not time-dependent. We show that particles, representing tokens, tend to cluster toward particular limiting objects as time tends to infinity. Using techniques from dynamical systems and partial differential equations, we show that type of limiting object that emerges depends on the spectrum of the value matrix. Additionally, in the one-dimensional case we prove that the self-attention matrix converges to a low-rank Boolean matrix. The combination of these results mathematically confirms the empirical observation made by Vaswani et al. [ VSP`17 ] that leaders appear in a sequence of tokens when processed by Transformers.", "keywords": "Transformers;Self-Attention;Clustering;Interacting Particle Systems;Continuous Time", "primary_area": "", "supplementary_material": "/attachment/92a37d81bbd76c79c38f4be8c6c3453d2ce2645c.pdf", "author": "Borjan Geshkovski;Cyril Letrouit;Yury Polyanskiy;Philippe Rigollet", "authorids": "~Borjan_Geshkovski1;letrouit@mit.edu;~Yury_Polyanskiy1;~Philippe_Rigollet1", "gender": ";;M;M", "homepage": "https://borjang.github.io/;;http://www.mit.edu/~ypol/;http://www-math.mit.edu/~rigollet/", "dblp": ";;74/8860;05/5072", "google_scholar": "XxCmLvoAAAAJ;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Borjan_Geshkovski1;letrouit@mit.edu;~Yury_Polyanskiy1;~Philippe_Rigollet1", "aff": "Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;;mit.edu;mit.edu", "position": "Postdoc;;Full Professor;Full Professor", "bibtex": "@inproceedings{\ngeshkovski2023the,\ntitle={The emergence of clusters in self-attention dynamics},\nauthor={Borjan Geshkovski and Cyril Letrouit and Yury Polyanskiy and Philippe Rigollet},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aMjaEkkXJx}\n}", "github": "", "project": "", "reviewers": "8epq;P5zn;U2EX;Qc4C", "pdf_size": 5716650, "rating": "6;7;7;7", "confidence": "2;4;4;4", "soundness": "2;3;4;4", "novelty": "2;3;4;3", "presentation": "2;4;4;3", "wc_summary": "115;64;184;141", "wc_strengths": "65;38;91;72", "wc_weaknesses": "87;83;226;76", "wc_questions": "112;244;5;119", "wc_limitations": "45;18;29;1", "wc_review": "424;447;535;409", "wc_reply_reviewers": "52;56;34;0", "wc_reply_authors": "0;52;0;0", "reply_reviewers": "1;2;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 126.0, 43.45687517528153 ], "wc_strengths_avg": [ 66.5, 19.00657780874821 ], "wc_weaknesses_avg": [ 118.0, 62.477996126636455 ], "wc_questions_avg": [ 120.0, 84.6551829482401 ], "wc_limitations_avg": [ 23.25, 16.037066439969625 ], "wc_review_avg": [ 453.75, 48.823022233368555 ], "wc_reply_reviewers_avg": [ 35.5, 22.107690969434145 ], "wc_reply_authors_avg": [ 13.0, 22.516660498395403 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17973490280819932409&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "mit.edu;;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Scale-Space Hypernetworks for Efficient Biomedical Image Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71197", "id": "aN0llPIbdg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/29f421fbdcc82aeb349d784d3aaccdb3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aN0llPIbdg", "openreview": "https://openreview.net/forum?id=aN0llPIbdg", "poster": "/media/PosterPDFs/NeurIPS%202023/71197.png?t=1702402028.6877186", "slides": "https://nips.cc/virtual/2023/poster/71197", "video": "https://nips.cc/virtual/2023/poster/71197", "author_site": "Jose Javier Gonzalez Ortiz, John Guttag, Adrian Dalca", "tldr": "", "abstract": "Convolutional Neural Networks (CNNs) are the predominant model used for a variety of medical image analysis tasks. At inference time, these models are computationally intensive, especially with volumetric data.In principle, it is possible to trade accuracy for computational efficiency by manipulating the rescaling factor in the downsample and upsample layers of CNN architectures.However, properly exploring the accuracy-efficiency trade-off is prohibitively expensive with existing models.To address this, we introduce Scale-Space HyperNetworks (SSHN), a method that learns a spectrum of CNNs with varying internal rescaling factors.A single SSHN characterizes an entire Pareto accuracy-efficiency curve of models that match, and occasionally surpass, the outcomes of training many separate networks with fixed rescaling factors.We demonstrate the proposed approach in several medical image analysis applications, comparing SSHN against strategies with both fixed and dynamic rescaling factors.We find that SSHN consistently provides a better accuracy-efficiency trade-off at a fraction of the training cost. Trained SSHNs enable the user to quickly choose a rescaling factor that appropriately balances accuracy and computational efficiency for their particular needs at inference.", "keywords": "hypernetworks;amortized learning;computer vision;rescaling;convolutional neural networks;pareto efficiency", "primary_area": "", "supplementary_material": "/attachment/d3887b0ab9fce30650edfb29d6d41a502740baba.pdf", "author": "Jose Javier Gonzalez Ortiz;John Guttag;Adrian V Dalca", "authorids": "~Jose_Javier_Gonzalez_Ortiz2;~John_Guttag2;~Adrian_V_Dalca1", "gender": "M;M;M", "homepage": "https://josejg.com;https://people.csail.mit.edu/guttag/;http://adalca.mit.edu", "dblp": ";g/JohnVGuttag;27/4108", "google_scholar": "4wa-Kd0AAAAJ;;https://scholar.google.ca/citations?user=zRy-zdAAAAAJ", "orcid": ";;0000-0002-8422-0136", "linkedin": "https://linkedin.com/in/josejg;;adalca/", "or_profile": "~Jose_Javier_Gonzalez_Ortiz2;~John_Guttag2;~Adrian_V_Dalca1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu", "position": "PhD student;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nortiz2023scalespace,\ntitle={Scale-Space Hypernetworks for Efficient Biomedical Image Analysis},\nauthor={Jose Javier Gonzalez Ortiz and John Guttag and Adrian V Dalca},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aN0llPIbdg}\n}", "github": "", "project": "", "reviewers": "VkYJ;Lhzq;wkRF;veWP;GRpN", "pdf_size": 0, "rating": "3;7;7;7;7", "confidence": "5;3;4;4;4", "soundness": "2;3;4;4;3", "novelty": "2;3;3;3;3", "presentation": "2;4;4;4;2", "wc_summary": "69;61;79;86;60", "wc_strengths": "30;57;78;64;91", "wc_weaknesses": "276;1;163;37;170", "wc_questions": "76;55;99;130;4", "wc_limitations": "4;4;41;1;1", "wc_review": "455;178;460;318;326", "wc_reply_reviewers": "172;17;103;0;16", "wc_reply_authors": "617;0;82;0;0", "reply_reviewers": "3;1;1;0;1", "reply_authors": "3;1;2;1;1", "rating_avg": [ 6.2, 1.6000000000000003 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.9797958971132712 ], "wc_summary_avg": [ 71.0, 10.13903348450926 ], "wc_strengths_avg": [ 64.0, 20.639767440550294 ], "wc_weaknesses_avg": [ 129.4, 99.29068435659006 ], "wc_questions_avg": [ 72.8, 42.47069577956076 ], "wc_limitations_avg": [ 10.2, 15.458331087151677 ], "wc_review_avg": [ 347.4, 104.18752324534834 ], "wc_reply_reviewers_avg": [ 61.6, 65.9775719468366 ], "wc_reply_authors_avg": [ 139.8, 240.70429992004713 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7905694150420947, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xb6pyTXtVC0J:scholar.google.com/&scioq=Scale-Space+Hypernetworks+for+Efficient+Biomedical+Image+Analysis&hl=en&as_sdt=0,22", "gs_version_total": 2, "email": "mit.edu;mit.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Dynamical System View of Langevin-Based Non-Convex Sampling", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71196", "id": "aRBa0lSxEB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/80f253dcb51cd2af7ce54e9379fb3521-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aRBa0lSxEB", "openreview": "https://openreview.net/forum?id=aRBa0lSxEB", "poster": "/media/PosterPDFs/NeurIPS%202023/71196.png?t=1702269997.3513422", "slides": "https://nips.cc/virtual/2023/poster/71196", "video": "https://nips.cc/virtual/2023/poster/71196", "author_site": "Mohammad Reza Karimi Jaghargh, Ya-Ping Hsieh, Andreas Krause", "tldr": "", "abstract": "Non-convex sampling is a key challenge in machine learning, central to non-convex optimization in deep learning as well as to approximate probabilistic inference. Despite its significance, theoretically there remain some important challenges: Existing guarantees suffer from the drawback of lacking guarantees for the last-iterates, and little is known beyond the elementary schemes of stochastic gradient Langevin dynamics. To address these issues, we develop a novel framework that lifts the above issues by harnessing several tools from the theory of dynamical systems. Our key result is that, for a large class of state-of-the-art sampling schemes, their last-iterate convergence in Wasserstein distances can be reduced to the study of their continuous-time counterparts, which is much better understood. Coupled with standard assumptions of MCMC sampling, our theory immediately yields the last-iterate Wasserstein convergence of many advanced sampling schemes such as mirror Langevin, proximal, randomized mid-point, and Runge-Kutta methods.", "keywords": "Non-Convex Sampling;Langevin Dynamics;Dynamical Systems", "primary_area": "", "supplementary_material": "/attachment/82ed29d5f04d741ed9d1ed3c2b7f8563c61d1fd8.pdf", "author": "Mohammad Reza Karimi Jaghargh;Ya-Ping Hsieh;Andreas Krause", "authorids": "~Mohammad_Reza_Karimi_Jaghargh1;~Ya-Ping_Hsieh1;~Andreas_Krause1", "gender": "M;M;M", "homepage": "http://moreka.github.io;;https://las.inf.ethz.ch/krausea", "dblp": "https://dblp.uni-trier.de/pers/hd/k/Karimi:Mohammad_Reza;122/5313;87/1831-1.html", "google_scholar": "CEZbTgMAAAAJ;;https://scholar.google.ch/citations?user=eDHv58AAAAAJ", "orcid": ";;0000-0001-7260-9673", "linkedin": ";;krausea/", "or_profile": "~Mohammad_Reza_Karimi_Jaghargh1;~Ya-Ping_Hsieh1;~Andreas_Krause1", "aff": "Swiss Federal Institute of Technology;Department of Computer Science, ETHZ - ETH Zurich;ETH Zurich", "aff_domain": "ethz.ch;inf.ethz.ch;ethz.ch", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\njaghargh2023a,\ntitle={A Dynamical System View of Langevin-Based Non-Convex Sampling},\nauthor={Mohammad Reza Karimi Jaghargh and Ya-Ping Hsieh and Andreas Krause},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aRBa0lSxEB}\n}", "github": "", "project": "", "reviewers": "vdDX;3YTY;hN2a;Vq75", "pdf_size": 346076, "rating": "5;7;7;8", "confidence": "3;3;4;3", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "78;30;60;93", "wc_strengths": "38;143;42;154", "wc_weaknesses": "262;115;206;61", "wc_questions": "266;20;26;98", "wc_limitations": "8;11;4;16", "wc_review": "652;319;338;422", "wc_reply_reviewers": "69;36;37;19", "wc_reply_authors": "22;20;21;9", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 65.25, 23.466731770743024 ], "wc_strengths_avg": [ 94.25, 54.40760516692497 ], "wc_weaknesses_avg": [ 161.0, 78.0096147920242 ], "wc_questions_avg": [ 102.5, 99.26101953939421 ], "wc_limitations_avg": [ 9.75, 4.380353866983808 ], "wc_review_avg": [ 432.75, 132.384619574934 ], "wc_reply_reviewers_avg": [ 40.25, 18.07449860992 ], "wc_reply_authors_avg": [ 18.0, 5.244044240850758 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15312905636894148288&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "ethz.ch;inf.ethz.ch;ethz.ch", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Swiss Federal Institute of Technology;ETH Zurich", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich;ETHZ", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "id": "aS2Yl8s5OG", "title": "On Adversarial Training without Perturbing all Examples", "track": "main", "status": "Reject", "tldr": "", "abstract": "Adversarial training is the de-facto standard for improving robustness against adversarial examples. This usually involves a multi-step adversarial attack applied on each example during training. In this paper, we explore only constructing adversarial examples (AE) on a subset of the training examples. That is, we split the training set in two subsets $A$ and $B$, train models on both ($A\\cup B$) but construct AEs only for examples in $A$. Starting with $A$ containing only a single class, we systematically increase the size of $A$ and consider splitting by class and by examples. We observe that: (i) adv. robustness transfers by difficulty and to classes in $B$ that have never been adv. attacked during training, (ii) we observe a tendency for hard examples to provide better robustness transfer than easy examples, yet find this tendency to diminish with increasing complexity of datasets (iii) generating AEs on only $50$% of training data is sufficient to recover most of the baseline AT performance even on ImageNet. We observe similar transfer properties across tasks, where generating AEs on only $30$% of data can recover baseline robustness on the target task. We evaluate our subset analysis on a wide variety of image datasets like CIFAR-10, CIFAR-100, ImageNet-200 and show transfer to SVHN, Oxford-Flowers-102 and Caltech-256. In contrast to conventional practice, our experiments indicate that the utility of computing AEs varies by class and examples and that weighting examples from $A$ higher than $B$ provides high transfer performance.", "keywords": "adversarial robustness;adversarial training;adversarial robust transfer", "primary_area": "", "supplementary_material": "/attachment/ed5347b75c3ec6762ed5b19c69e9e2402d88c006.pdf", "author": "Max Losch;Mohamed Omran;David Stutz;Mario Fritz;Bernt Schiele", "authorids": "~Max_Losch1;~Mohamed_Omran1;~David_Stutz1;~Mario_Fritz1;~Bernt_Schiele1", "gender": "M;;M;M;M", "homepage": "https://www.mpi-inf.mpg.de/departments/computer-vision-and-machine-learning/people/max-losch/;http://www.mpi-inf.mpg.de/~mohomran;http://davidstutz.de/;https://cispa.saarland/group/fritz/;http://www.mpi-inf.mpg.de/~schiele", "dblp": "245/8930;154/6713.html;17/9394;;s/BerntSchiele", "google_scholar": "-mbO5ykAAAAJ;https://scholar.google.de/citations?user=jphx5uUAAAAJ;TxEy3cwAAAAJ;https://scholar.google.de/citations?user=4V1nNm4AAAAJ;https://scholar.google.de/citations?user=z76PBfYAAAAJ", "orcid": ";;;;0000-0001-9683-5237", "linkedin": ";;davidstutz92/;;", "or_profile": "~Max_Losch1;~Mohamed_Omran1;~David_Stutz1;~Mario_Fritz1;~Bernt_Schiele1", "aff": "Max-Planck Institute for Informatics;Qualcomm Inc, QualComm;Google DeepMind;Saarland University;Amazon", "aff_domain": "mpi-inf.mpg.de;qti.qualcomm.com;deepmind.com;uni-saarland.de;amazon.com", "position": "PhD student;Researcher;Researcher;Full Professor;Principal Researcher", "bibtex": "@misc{\nlosch2023on,\ntitle={On Adversarial Training without Perturbing all Examples},\nauthor={Max Losch and Mohamed Omran and David Stutz and Mario Fritz and Bernt Schiele},\nyear={2023},\nurl={https://openreview.net/forum?id=aS2Yl8s5OG}\n}", "github": "", "project": "", "reviewers": "mnhf;mJ8b;8YTs;DBWu;NsHp", "site": "https://openreview.net/forum?id=aS2Yl8s5OG", "pdf_size": 1099425, "rating": "5;5;5;7;7", "confidence": "4;5;3;4;4", "soundness": "3;2;3;4;2", "novelty": "2;2;2;2;3", "presentation": "2;2;3;4;3", "wc_summary": "61;87;98;172;220", "wc_strengths": "30;14;52;55;150", "wc_weaknesses": "166;141;153;109;344", "wc_questions": "114;3;147;137;191", "wc_limitations": "28;11;29;16;34", "wc_review": "399;256;479;489;939", "wc_reply_reviewers": "0;16;24;19;119", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 127.6, 59.10871340166354 ], "wc_strengths_avg": [ 60.2, 47.33877902945956 ], "wc_weaknesses_avg": [ 182.6, 82.88449794744491 ], "wc_questions_avg": [ 118.4, 62.882748031554726 ], "wc_limitations_avg": [ 23.6, 8.639444426582072 ], "wc_review_avg": [ 512.4, 229.02017378388308 ], "wc_reply_reviewers_avg": [ 35.6, 42.46692830897945 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18292219272241647438&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Max-Planck Institute for Informatics;Qualcomm Incorporated;Google;Saarland University;Amazon", "aff_unique_dep": ";;Google DeepMind;;Amazon.com, Inc.", "aff_unique_url": "https://mpi-inf.mpg.de;https://www.qualcomm.com;https://deepmind.com;https://www.uni-saarland.de;https://www.amazon.com", "aff_unique_abbr": "MPII;Qualcomm;DeepMind;UdS;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;1", "aff_country_unique": "Germany;United States;United Kingdom" }, { "title": "Drift doesn't Matter: Dynamic Decomposition with Diffusion Reconstruction for Unstable Multivariate Time Series Anomaly Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71195", "id": "aW5bSuduF1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/22f5d8e689d2a011cd8ead552ed59052-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aW5bSuduF1", "openreview": "https://openreview.net/forum?id=aW5bSuduF1", "poster": "/media/PosterPDFs/NeurIPS%202023/71195.png?t=1698763583.030597", "slides": "https://nips.cc/virtual/2023/poster/71195", "video": "https://nips.cc/virtual/2023/poster/71195", "author_site": "Chengsen Wang, Zirui Zhuang, Qi Qi, Jingyu Wang, Xingyu Wang, Haifeng Sun, Jianxin Liao", "tldr": "", "abstract": "Many unsupervised methods have recently been proposed for multivariate time series anomaly detection. However, existing works mainly focus on stable data yet often omit the drift generated from non-stationary environments, which may lead to numerous false alarms. We propose **D**ynamic **D**ecomposition with **D**iffusion **R**econstruction (D$^3$R), a novel anomaly detection network for real-world unstable data to fill the gap. D$^3$R tackles the drift via decomposition and reconstruction. In the decomposition procedure, we utilize data-time mix-attention to dynamically decompose long-period multivariate time series, overcoming the limitation of the local sliding window. The information bottleneck is critical yet difficult to determine in the reconstruction procedure. To avoid retraining once the bottleneck changes, we control it externally by noise diffusion and directly reconstruct the polluted data. The whole model can be trained end-to-end. Extensive experiments on various real-world datasets demonstrate that D$^3$R significantly outperforms existing methods, with a 11% average relative improvement over the previous SOTA models.", "keywords": "Anomaly Detection;Time Series;Diffusion;Transformer", "primary_area": "", "supplementary_material": "/attachment/db7da5733f507ce3bd935267f535fea5b84e2e51.zip", "author": "Chengsen Wang;Zirui Zhuang;Qi Qi;Jingyu Wang;Xingyu Wang;Haifeng Sun;Jianxin Liao", "authorids": "~Chengsen_Wang1;~Zirui_Zhuang1;~Qi_Qi1;~Jingyu_Wang1;~Xingyu_Wang4;~Haifeng_Sun2;~Jianxin_Liao2", "gender": "M;Not Specified;;M;M;M;M", "homepage": "https://forestsking.github.io/;https://ziruizhuang.github.io/;;https://jericwang.github.io/;https://666wxy666.github.io;https://kyy.bupt.edu.cn/info/1014/3341.htm;https://hfsun.github.io", "dblp": "223/1052;https://dblp.uni-trier.de/pid/235/7014;80/6406-1.html;37/2749-1;59/6939;;00/11044-1", "google_scholar": "6KQpKtgAAAAJ;j74lPwkAAAAJ;;H441DjwAAAAJ;SrpeBiYAAAAJ;;dwhbTsEAAAAJ", "orcid": "0000-0002-3826-1148;0000-0003-3345-1732;0000-0003-0829-4624;0000-0002-2182-2228;0000-0001-5318-1202;0000-0000-8780-1230;", "linkedin": ";;;;;;", "or_profile": "~Chengsen_Wang1;~Zirui_Zhuang1;~Qi_Qi1;~Jingyu_Wang1;~Xingyu_Wang4;~Jianxin_Liao2;~haifeng_sun1", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Post and Telecommunication;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications", "aff_domain": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "position": "MS student;Associate Professor;Full Professor;Full Professor;MS student;Full Professor;Lecturer", "bibtex": "@inproceedings{\nwang2023drift,\ntitle={Drift doesn't Matter: Dynamic Decomposition with Diffusion Reconstruction for Unstable Multivariate Time Series Anomaly Detection},\nauthor={Chengsen Wang and Zirui Zhuang and Qi Qi and Jingyu Wang and Xingyu Wang and Haifeng Sun and Jianxin Liao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aW5bSuduF1}\n}", "github": "", "project": "", "reviewers": "njrU;KWMB;crri;JUP1;WuHJ;xTrQ", "pdf_size": 577273, "rating": "3;6;6;7;7;7", "confidence": "4;4;1;2;4;3", "soundness": "2;3;3;3;3;3", "novelty": "2;3;3;3;3;2", "presentation": "2;3;3;3;3;3", "wc_summary": "76;101;111;88;90;58", "wc_strengths": "20;54;73;105;52;47", "wc_weaknesses": "334;73;41;25;118;32", "wc_questions": "9;2;1;1;69;53", "wc_limitations": "31;1;1;25;37;7", "wc_review": "470;231;227;244;366;197", "wc_reply_reviewers": "0;0;61;14;0;0", "wc_reply_authors": "0;0;157;0;0;0", "reply_reviewers": "0;0;1;1;0;0", "reply_authors": "1;1;2;1;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 1.1547005383792515 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 87.33333333333333, 17.045690234060796 ], "wc_strengths_avg": [ 58.5, 25.979158313283875 ], "wc_weaknesses_avg": [ 103.83333333333333, 107.58781942621985 ], "wc_questions_avg": [ 22.5, 27.747372247956502 ], "wc_limitations_avg": [ 17.0, 14.560219778561036 ], "wc_review_avg": [ 289.1666666666667, 96.94227950463902 ], "wc_reply_reviewers_avg": [ 12.5, 22.28414982298704 ], "wc_reply_authors_avg": [ 26.166666666666668, 58.510445411244504 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3061862178478973, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14484780163867999770&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Language Models Can Improve Event Prediction by Few-Shot Abductive Reasoning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71194", "id": "aW9BqtRQkh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5e5fd18f863cbe6d8ae392a93fd271c9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aW9BqtRQkh", "openreview": "https://openreview.net/forum?id=aW9BqtRQkh", "poster": "/media/PosterPDFs/NeurIPS%202023/71194.png?t=1698052240.489295", "slides": "https://nips.cc/virtual/2023/poster/71194", "video": "https://nips.cc/virtual/2023/poster/71194", "author_site": "Xiaoming Shi, Siqiao Xue, Kangrui Wang, Fan Zhou, James Zhang, Jun Zhou, Chenhao Tan, Hongyuan Mei", "tldr": "", "abstract": "Large language models have shown astonishing performance on a wide range of reasoning tasks. In this paper, we investigate whether they could reason about real-world events and help improve the prediction performance of event sequence models. We design LAMP, a framework that integrates a large language model in event prediction. Particularly, the language model performs abductive reasoning to assist an event sequence model: the event model proposes predictions on future events given the past; instructed by a few expert-annotated demonstrations, the language model learns to suggest possible causes for each proposal; a search module finds out the previous events that match the causes; a scoring function learns to examine whether the retrieved events could actually cause the proposal. Through extensive experiments on several challenging real-world datasets, we demonstrate that our framework---thanks to the reasoning capabilities of large language models---could significantly outperform the state-of-the-art event sequence models.", "keywords": "event sequences;irregular time series;event prediction;large language model;reasoning;few-shot prompting", "primary_area": "", "supplementary_material": "/attachment/df56fac5e8050eba9ac59779cc0ce02a3829d4a2.zip", "author": "Xiaoming Shi;Siqiao Xue;Kangrui Wang;Fan Zhou;James Y. Zhang;JUN ZHOU;Chenhao Tan;Hongyuan Mei", "authorids": "~Xiaoming_Shi2;~Siqiao_Xue1;~Kangrui_Wang2;~Fan_Zhou10;~James_Y._Zhang1;~JUN_ZHOU6;~Chenhao_Tan1;~Hongyuan_Mei1", "gender": "M;M;M;M;M;M;M;M", "homepage": ";https://www.antgroup.com/en;https://jameskrw.github.io/;;https://scholar.google.com/citations?user=Ywakh_sAAAAJ;https://scholar.google.com/citations?user=mCVvloEAAAAJ&hl=en;https://chenhaot.com/;http://www.cs.jhu.edu/~hmei/", "dblp": "65/9789-1;302/7766;216/9159;;151/3086;99/3847-11;95/8314;164/5576", "google_scholar": "0WMTWacAAAAJ;pZqTpoEAAAAJ;;https://scholar.google.com/citations?view_op=list_works;Ywakh_sAAAAJ;mCVvloEAAAAJ;https://scholar.google.com.tw/citations?user=KGMaP18AAAAJ;g_zaiVIAAAAJ", "orcid": "0000-0003-0764-8961;;;;0000-0001-6519-676X;0000-0001-6033-6102;;", "linkedin": ";;wang-kangrui-8b9a37257/;https://www.linkedin.com/mwlite/in/moutozf;jamesymzhang/;;;hongyuan-mei-57687858?trk=nav_responsive_tab_profile_pic", "or_profile": "~Xiaoming_Shi2;~Siqiao_Xue1;~Kangrui_Wang2;~Fan_Zhou10;~James_Y._Zhang1;~JUN_ZHOU6;~Chenhao_Tan1;~Hongyuan_Mei1", "aff": "Ant Group;Alibaba;;AntGroup;Ant Group;Ant Group;University of Chicago;Toyota Technological Institute at Chicago", "aff_domain": "antgroup.com;alibaba-inc.com;;antgroup.com;alipay.com;antgroup.com;uchicago.edu;ttic.edu", "position": "Researcher;researcher;;Researcher;managing director;Researcher;Assistant Professor;Research Assistant Professor", "bibtex": "@inproceedings{\nshi2023language,\ntitle={Language Models Can Improve Event Prediction by Few-Shot Abductive Reasoning},\nauthor={Xiaoming Shi and Siqiao Xue and Kangrui Wang and Fan Zhou and James Y. Zhang and JUN ZHOU and Chenhao Tan and Hongyuan Mei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aW9BqtRQkh}\n}", "github": "", "project": "", "reviewers": "HaQW;mHgh;pKDi;Hwvw;E9bj", "pdf_size": 864810, "rating": "5;6;6;6;7", "confidence": "4;4;4;4;3", "soundness": "3;2;3;3;3", "novelty": "3;2;3;3;3", "presentation": "3;2;3;2;3", "wc_summary": "82;114;56;108;143", "wc_strengths": "47;88;45;25;75", "wc_weaknesses": "31;468;279;382;66", "wc_questions": "1;8;216;11;144", "wc_limitations": "20;11;64;135;35", "wc_review": "181;689;660;661;463", "wc_reply_reviewers": "0;45;258;355;40", "wc_reply_authors": "12;59;1624;1618;74", "reply_reviewers": "0;1;2;3;1", "reply_authors": "2;2;5;5;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 100.6, 29.5540183393054 ], "wc_strengths_avg": [ 56.0, 22.57432169523594 ], "wc_weaknesses_avg": [ 245.2, 171.7502838425602 ], "wc_questions_avg": [ 76.0, 87.97499644785444 ], "wc_limitations_avg": [ 53.0, 44.770526018799465 ], "wc_review_avg": [ 530.8, 192.68046086720884 ], "wc_reply_reviewers_avg": [ 139.6, 140.55120063521338 ], "wc_reply_authors_avg": [ 677.4, 770.7200788872702 ], "reply_reviewers_avg": [ 1.4, 1.019803902718557 ], "reply_authors_avg": [ 3.2, 1.469693845669907 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.7905694150420948, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4729652770330373464&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 7, "email": "antgroup.com;alibaba-inc.com;;antgroup.com;alipay.com;antgroup.com;uchicago.edu;ttic.edu", "author_num": 8, "aff_unique_index": "0;1;0;0;0;2;3", "aff_unique_norm": "Ant Group;Alibaba Group Holding Limited;University of Chicago;Toyota Technological Institute at Chicago", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.antgroup.com;https://www.alibaba.com;https://www.uchicago.edu;https://www.tti-chicago.org", "aff_unique_abbr": "Ant Group;Alibaba;UChicago;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0;0;1;1", "aff_country_unique": "China;United States" }, { "title": "Reproducibility in Multiple Instance Learning: A Case For Algorithmic Unit Tests", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71193", "id": "aZ44Na3l9p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2bab8865fa4511e445767e3750b2b5ac-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aZ44Na3l9p", "openreview": "https://openreview.net/forum?id=aZ44Na3l9p", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71193", "video": "https://nips.cc/virtual/2023/poster/71193", "author_site": "Edward Raff, James Holt", "tldr": "", "abstract": "Multiple Instance Learning (MIL) is a sub-domain of classification problems with positive and negative labels and a \"bag\" of inputs, where the label is positive if and only if a positive element is contained within the bag, and otherwise is negative. Training in this context requires associating the bag-wide label to instance-level information, and implicitly contains a causal assumption and asymmetry to the task (i.e., you can't swap the labels without changing the semantics). MIL problems occur in healthcare (one malignant cell indicates cancer), cyber security (one malicious executable makes an infected computer), and many other tasks. In this work, we examine five of the most prominent deep-MIL models and find that none of them respects the standard MIL assumption. They are able to learn anti-correlated instances, i.e., defaulting to \"positive\" labels until seeing a negative counter-example, which should not be possible for a correct MIL model. We suspect that enhancements and other works derived from these models will share the same issue. In any context in which these models are being used, this creates the potential for learning incorrect models, which creates risk of operational failure. We identify and demonstrate this problem via a proposed ``algorithmic unit test'', where we create synthetic datasets that can be solved by a MIL respecting model, and which clearly reveal learning that violates MIL assumptions. The five evaluated methods each fail one or more of these tests. This provides a model-agnostic way to identify violations of modeling assumptions, which we hope will be useful for future development and evaluation of MIL models.", "keywords": "reproducibility; multiple instance learning", "primary_area": "", "supplementary_material": "/attachment/b65b8dd5b0eb11cd6da4c345981b84502dddcbf3.pdf", "author": "Edward Raff;James Holt", "authorids": "~Edward_Raff1;~James_Holt1", "gender": "M;M", "homepage": "http://www.edwardraff.com/;", "dblp": "204/3369;93/1248", "google_scholar": "debM2bUAAAAJ;GtVgGjkAAAAJ", "orcid": "0000-0002-9900-1972;0000-0002-6368-8696", "linkedin": "edward-raff-09992040/;jeholt/", "or_profile": "~Edward_Raff1;~James_Holt1", "aff": "Syracuse University;Laboratory for Physical Sciences", "aff_domain": "syr.edu;umd.edu", "position": "MBA student;Principal Researcher", "bibtex": "@inproceedings{\nraff2023reproducibility,\ntitle={Reproducibility in Multiple Instance Learning: A Case For Algorithmic Unit Tests},\nauthor={Edward Raff and James Holt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aZ44Na3l9p}\n}", "github": "", "project": "", "reviewers": "6q8u;3dcE;BYpB;kMQN", "pdf_size": 286122, "rating": "6;7;7;7", "confidence": "3;4;4;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;2;3;4", "wc_summary": "142;96;151;169", "wc_strengths": "15;94;192;22", "wc_weaknesses": "184;68;525;72", "wc_questions": "169;497;255;107", "wc_limitations": "1;37;182;1", "wc_review": "511;792;1305;371", "wc_reply_reviewers": "140;39;110;26", "wc_reply_authors": "257;36;56;25", "reply_reviewers": "2;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 139.5, 26.93046601899046 ], "wc_strengths_avg": [ 80.75, 71.28595583984267 ], "wc_weaknesses_avg": [ 212.25, 186.47302083679557 ], "wc_questions_avg": [ 257.0, 148.19581640518737 ], "wc_limitations_avg": [ 55.25, 74.6403878607286 ], "wc_review_avg": [ 744.75, 357.2256814676123 ], "wc_reply_reviewers_avg": [ 78.75, 47.67271232057182 ], "wc_reply_authors_avg": [ 93.5, 95.0486717424289 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10370807863440260056&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "syr.edu;umd.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Syracuse University;Laboratory for Physical Sciences", "aff_unique_dep": ";", "aff_unique_url": "https://www.syracuse.edu;", "aff_unique_abbr": "Syracuse;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Anchor Data Augmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71192", "id": "aZ9hvpnp0k", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ecc9b6dfdbe374c0a3364ff81cd28642-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aZ9hvpnp0k", "openreview": "https://openreview.net/forum?id=aZ9hvpnp0k", "poster": "/media/PosterPDFs/NeurIPS%202023/71192.png?t=1701980908.0050228", "slides": "https://nips.cc/virtual/2023/poster/71192", "video": "https://nips.cc/virtual/2023/poster/71192", "author_site": "Nora Schneider, Shirin Goshtasbpour, Fernando Perez-Cruz", "tldr": "", "abstract": "We propose a novel algorithm for data augmentation in nonlinear over-parametrized regression. Our data augmentation algorithm borrows from the literature on causality. Contrary to the current state-of-the-art solutions that rely on modifications of Mixup algorithm, we extend the recently proposed distributionally robust Anchor regression (AR) method for data augmentation. Our Anchor Data Augmentation (ADA) uses several replicas of the modified samples in AR to provide more training examples, leading to more robust regression predictions. We apply ADA to linear and nonlinear regression problems using neural networks. ADA is competitive with state-of-the-art C-Mixup solutions.", "keywords": "Data Augmentation;Regression;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/19f65426797819bb5692cd9e3334d8cf3b86f34d.pdf", "author": "Nora Schneider;Shirin Goshtasbpour;Fernando Perez-Cruz", "authorids": "~Nora_Schneider1;~Shirin_Goshtasbpour1;~Fernando_Perez-Cruz1", "gender": "F;;M", "homepage": "https://github.com/NoraSchneider;;http://www.tsc.uc3m.es/~fernando/", "dblp": ";;75/805", "google_scholar": ";;8FfrHw0AAAAJ", "orcid": ";;0000-0001-8996-5076", "linkedin": ";;fernando-perez-cruz-930610b", "or_profile": "~Nora_Schneider1;~Shirin_Goshtasbpour1;~Fernando_Perez-Cruz1", "aff": "ETHZ - ETH Zurich;;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;;ethz.ch", "position": "MS student;;Chief Data Scientist", "bibtex": "@inproceedings{\nschneider2023anchor,\ntitle={Anchor Data Augmentation},\nauthor={Nora Schneider and Shirin Goshtasbpour and Fernando Perez-Cruz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aZ9hvpnp0k}\n}", "github": "", "project": "", "reviewers": "K137;KQFt;2wsW;dYVk;SE5E;zYBi", "pdf_size": 748868, "rating": "3;5;5;6;6;7", "confidence": "4;4;5;2;3;3", "soundness": "2;2;2;3;3;3", "novelty": "2;2;2;3;3;3", "presentation": "3;2;3;3;3;3", "wc_summary": "52;445;109;51;46;74", "wc_strengths": "54;88;80;21;88;60", "wc_weaknesses": "212;55;209;77;101;46", "wc_questions": "2;113;9;21;45;40", "wc_limitations": "2;69;58;1;1;12", "wc_review": "322;770;465;171;281;232", "wc_reply_reviewers": "2;11;69;11;17;0", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "1;1;1;1;1;0", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.5, 0.9574271077563381 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 129.5, 142.7010277935423 ], "wc_strengths_avg": [ 65.16666666666667, 23.68133348347503 ], "wc_weaknesses_avg": [ 116.66666666666667, 68.5921926234239 ], "wc_questions_avg": [ 38.333333333333336, 36.7499055176412 ], "wc_limitations_avg": [ 23.833333333333332, 28.480500151663225 ], "wc_review_avg": [ 373.5, 199.09022242859308 ], "wc_reply_reviewers_avg": [ 18.333333333333332, 23.378527660131972 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5582905262390825, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7702110476565990038&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ethz.ch;;ethz.ch", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "MarioGPT: Open-Ended Text2Level Generation through Large Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71191", "id": "aa8KsqfTPa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a9bbeb2858dfbdbd4c19814e5d80ec60-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aa8KsqfTPa", "openreview": "https://openreview.net/forum?id=aa8KsqfTPa", "poster": "/media/PosterPDFs/NeurIPS%202023/71191.png?t=1702004257.3990824", "slides": "https://nips.cc/virtual/2023/poster/71191", "video": "https://nips.cc/virtual/2023/poster/71191", "author_site": "Shyam Sudhakaran, Miguel Gonz\u00e1lez-Duque, Matthias Freiberger, Claire Glanois, Elias Najarro, Sebastian Risi", "tldr": "", "abstract": "Procedural Content Generation (PCG) is a technique to generate complex and diverse environments in an automated way. However, while generating content with PCG methods is often straightforward, generating meaningful content that reflects specific intentions and constraints remains challenging. Furthermore, many PCG algorithms lack the ability to generate content in an open-ended manner. Recently, Large Language Models (LLMs) have shown to be incredibly effective in many diverse domains. These trained LLMs can be fine-tuned, re-using information and accelerating training for new tasks. Here, we introduce MarioGPT, a fine-tuned GPT2 model trained to generate tile-based game levels, in our case Super Mario Bros levels. MarioGPT can not only generate diverse levels, but can be text-prompted for controllable level generation, addressing one of the key challenges of current PCG techniques. As far as we know, MarioGPT is the first text-to-level model and combined with novelty search it enables the generation of diverse levels with varying play-style dynamics (i.e. player paths) and the open-ended discovery of an increasingly diverse range of content. \nCode available at https://github.com/shyamsn97/mario-gpt.", "keywords": "Large Language Models;Procedural Content Generation;Open-endedness;Novelty Search", "primary_area": "", "supplementary_material": "/attachment/7c4c19d8b9a6683b844816b30b4d513b9b888bde.zip", "author": "Shyam Sudhakaran;Miguel Gonz\u00e1lez-Duque;Matthias Freiberger;Claire Glanois;Elias Najarro;Sebastian Risi", "authorids": "~Shyam_Sudhakaran1;~Miguel_Gonz\u00e1lez-Duque3;~Matthias_Freiberger1;~Claire_Glanois1;~Elias_Najarro1;~Sebastian_Risi1", "gender": "M;M;F;;M;M", "homepage": "https://shyamsn97.github.io/;;;https://najarro.science/;http://www.sebastianrisi.com;https://www.miguelgondu.com/", "dblp": ";200/9091;281/7174;269/9715;81/7183;244/9609.html", "google_scholar": ";qkPbhlUAAAAJ;https://scholar.google.dk/citations?user=xYtaBiUAAAAJ;x7EcQBMAAAAJ;Tf8winBIYUsC;eje0FAYAAAAJ", "orcid": ";0000-0003-2101-6274;;0000-0002-7875-3251;;", "linkedin": ";matthias-freiberger-a1377b7b/;claire-glanois-0bb81098/;elias-najarro/;;", "or_profile": "~Shyam_Sudhakaran1;~Matthias_Freiberger1;~Claire_Glanois1;~Elias_Najarro1;~Sebastian_Risi1;~Miguel_Gonz\u00e1lez_Duque1", "aff": "IT University;IT University of Copenhagen;IT University of Copenhagen;IT University of Copenhagen;IT University of Copenhagen;IT University of Copenhagen", "aff_domain": "itu.dk;itu.dk;itu.dk;itu.dk;itu.dk;itu.dk", "position": "Researcher;Postdoc;Postdoc;Researcher;Professor;PhD student", "bibtex": "@inproceedings{\nsudhakaran2023mariogpt,\ntitle={Mario{GPT}: Open-Ended Text2Level Generation through Large Language Models},\nauthor={Shyam Sudhakaran and Miguel Gonz{\\'a}lez-Duque and Matthias Freiberger and Claire Glanois and Elias Najarro and Sebastian Risi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aa8KsqfTPa}\n}", "github": "", "project": "", "reviewers": "an9Y;aEvk;wPh9;6gGD", "pdf_size": 2393226, "rating": "6;6;7;7", "confidence": "4;4;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;4;3;4", "wc_summary": "96;207;133;55", "wc_strengths": "46;55;127;91", "wc_weaknesses": "219;133;548;33", "wc_questions": "88;133;273;30", "wc_limitations": "3;30;13;6", "wc_review": "452;558;1094;215", "wc_reply_reviewers": "27;59;115;32", "wc_reply_authors": "45;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 122.75, 55.92126160951664 ], "wc_strengths_avg": [ 79.75, 32.057565409743766 ], "wc_weaknesses_avg": [ 233.25, 193.2749013710782 ], "wc_questions_avg": [ 131.0, 89.74686624055461 ], "wc_limitations_avg": [ 13.0, 10.464224768228174 ], "wc_review_avg": [ 579.75, 321.8263312720076 ], "wc_reply_reviewers_avg": [ 58.25, 34.95264653785175 ], "wc_reply_authors_avg": [ 11.25, 19.48557158514987 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 80, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2459521809228070379&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "itu.dk;itu.dk;itu.dk;itu.dk;itu.dk;itu.dk", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "IT University;IT University of Copenhagen", "aff_unique_dep": ";", "aff_unique_url": ";https://itu.dk", "aff_unique_abbr": ";ITU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1;1;1", "aff_country_unique": ";Denmark" }, { "title": "Mixed Samples as Probes for Unsupervised Model Selection in Domain Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71190", "id": "ackajXqei2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7721f1fea280e9ffae528dc78c732576-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ackajXqei2", "openreview": "https://openreview.net/forum?id=ackajXqei2", "poster": "/media/PosterPDFs/NeurIPS%202023/71190.png?t=1701414368.2324266", "slides": "https://nips.cc/virtual/2023/poster/71190", "video": "https://nips.cc/virtual/2023/poster/71190", "author_site": "Dapeng Hu, Jian Liang, Jun Hao Liew, Chuhui Xue, Song Bai, Xinchao Wang", "tldr": "", "abstract": "Unsupervised domain adaptation (UDA) has been widely applied in improving model generalization on unlabeled target data. However, accurately selecting the best UDA model for the target domain is challenging due to the absence of labeled target data and domain distribution shifts. Traditional model selection approaches involve training extra models with source data to estimate the target validation risk. Recent studies propose practical methods that are based on measuring various properties of model predictions on target data. Although effective for some UDA models, these methods often lack stability and may lead to poor selections for other UDA models.\nIn this paper, we present MixVal, an innovative model selection method that operates solely with unlabeled target data during inference. MixVal leverages mixed target samples with pseudo labels to directly probe the learned target structure by each UDA model. Specifically, MixVal employs two distinct types of probes: the intra-cluster mixed samples for evaluating neighborhood density and the inter-cluster mixed samples for investigating the classification boundary. With this comprehensive probing strategy, MixVal elegantly combines the strengths of two state-of-the-art model selection methods, Entropy and SND. We extensively evaluate MixVal on 11 UDA methods across 4 adaptation settings, including classification and segmentation tasks. Experimental results consistently demonstrate that MixVal achieves state-of-the-art performance and maintains exceptional stability in model selection. \nCode is available at \\url{https://github.com/LHXXHB/MixVal}.", "keywords": "Unsupervised Domain Adaptation; Model Selection; Hyperparameter Selection; Unsupervised Validation;", "primary_area": "", "supplementary_material": "", "author": "Dapeng Hu;Jian Liang;Jun Hao Liew;Chuhui Xue;Song Bai;Xinchao Wang", "authorids": "~Dapeng_Hu2;~Jian_Liang1;~Jun_Hao_Liew1;~Chuhui_Xue2;~Song_Bai3;~Xinchao_Wang1", "gender": "M;M;;F;;M", "homepage": "https://lhxxhb.github.io/;https://liangjian.xyz;;;https://songbai.site/;https://sites.google.com/site/sitexinchaowang/", "dblp": "247/3382;19/2208-1;;223/4745;;", "google_scholar": "wv9HjA0AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.sg/citations?user=8gm-CYYAAAAJ;https://scholar.google.com.sg/citations?user=KJU5YRYAAAAJ;LXuWMF4AAAAJ;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": ";0000-0003-3890-1894;;;;", "linkedin": ";;;;;", "or_profile": "~Dapeng_Hu2;~Jian_Liang1;~Jun_Hao_Liew1;~Chuhui_Xue2;~Song_Bai3;~Xinchao_WANG3", "aff": "National University of Singapore;Institute of Automation, Chinese Academy of Sciences;ByteDance;ByteDance Inc.;ByteDance;National University of Singapore", "aff_domain": "u.nus.edu;ia.ac.cn;bytedance.com;bytedance.com;bytedance.com;nus.edu", "position": "PhD student;Associate Professor;Researcher;Researcher;Computer Vision Lead;Assistant Professor", "bibtex": "@inproceedings{\nhu2023mixed,\ntitle={Mixed Samples as Probes for Unsupervised Model Selection in Domain Adaptation},\nauthor={Dapeng Hu and Jian Liang and Jun Hao Liew and Chuhui Xue and Song Bai and Xinchao Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ackajXqei2}\n}", "github": "", "project": "", "reviewers": "PTRC;4S59;G1xN;zt6b", "pdf_size": 591102, "rating": "5;5;5;7", "confidence": "4;3;4;3", "soundness": "3;3;2;3", "novelty": "2;2;2;4", "presentation": "3;2;3;3", "wc_summary": "54;62;62;80", "wc_strengths": "36;45;38;90", "wc_weaknesses": "150;17;235;33", "wc_questions": "10;78;60;43", "wc_limitations": "46;5;4;20", "wc_review": "296;207;399;266", "wc_reply_reviewers": "38;27;90;26", "wc_reply_authors": "737;68;569;74", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.5, 9.526279441628825 ], "wc_strengths_avg": [ 52.25, 22.049659861322123 ], "wc_weaknesses_avg": [ 108.75, 89.15821611046286 ], "wc_questions_avg": [ 47.75, 25.063668925358872 ], "wc_limitations_avg": [ 18.75, 16.96135312998347 ], "wc_review_avg": [ 292.0, 69.58088818059166 ], "wc_reply_reviewers_avg": [ 45.25, 26.261902063635834 ], "wc_reply_authors_avg": [ 362.0, 297.00757566095854 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16168122885124730606&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "u.nus.edu;ia.ac.cn;bytedance.com;bytedance.com;bytedance.com;nus.edu", "author_num": 6, "aff_unique_index": "0;1;2;2;2;0", "aff_unique_norm": "National University of Singapore;Chinese Academy of Sciences;ByteDance", "aff_unique_dep": ";Institute of Automation;", "aff_unique_url": "https://www.nus.edu.sg;http://www.ia.cas.cn;https://www.bytedance.com", "aff_unique_abbr": "NUS;CAS;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;0", "aff_country_unique": "Singapore;China" }, { "title": "Adapting to Continuous Covariate Shift via Online Density Ratio Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71189", "id": "ad3JNoR2np", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5cad96c4433955a2e76749ee74a424f5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ad3JNoR2np", "openreview": "https://openreview.net/forum?id=ad3JNoR2np", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71189", "video": "https://nips.cc/virtual/2023/poster/71189", "author_site": "Yu-Jie Zhang, Zhen-Yu Zhang, Peng Zhao, Masashi Sugiyama", "tldr": "", "abstract": "Dealing with distribution shifts is one of the central challenges for modern machine learning. One fundamental situation is the covariate shift, where the input distributions of data change from the training to testing stages while the input-conditional output distribution remains unchanged. In this paper, we initiate the study of a more challenging scenario --- continuous covariate shift --- in which the test data appear sequentially, and their distributions can shift continuously. Our goal is to adaptively train the predictor such that its prediction risk accumulated over time can be minimized. Starting with the importance-weighted learning, we theoretically show the method works effectively if the time-varying density ratios of test and train inputs can be accurately estimated. However, existing density ratio estimation methods would fail due to data scarcity at each time step. To this end, we propose an online density ratio estimation method that can appropriately reuse historical information. Our method is proven to perform well by enjoying a dynamic regret bound, which finally leads to an excess risk guarantee for the predictor. Empirical results also validate the effectiveness.", "keywords": "Covariate Shift;Density Ratio Estimation;Online Convex Optimization;Dynamic Regret;Logistic Regression", "primary_area": "", "supplementary_material": "/attachment/92a116612261ae83bde0911392afd3e28c1ea786.zip", "author": "Yu-Jie Zhang;Zhen-Yu Zhang;Peng Zhao;Masashi Sugiyama", "authorids": "~Yu-Jie_Zhang1;~Zhen-Yu_Zhang1;~Peng_Zhao1;~Masashi_Sugiyama1", "gender": "M;M;;M", "homepage": "https://yujie-zhang96.github.io/;https://zhangzy07.github.io/;;http://www.ms.k.u-tokyo.ac.jp/sugi/", "dblp": "234/6681;;;35/1228", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.co.jp/citations?user=JP8qCpUAAAAJ;;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ", "orcid": ";0000-0003-2101-1836;;0000-0001-6658-6743", "linkedin": ";;;", "or_profile": "~Yu-Jie_Zhang1;~Zhen-Yu_Zhang1;~Peng_Zhao1;~Masashi_Sugiyama1", "aff": "The University of Tokyo;Nanjing University;;The University of Tokyo", "aff_domain": "u-tokyo.ac.jp;nju.edu.cn;;u-tokyo.ac.jp", "position": "PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nzhang2023adapting,\ntitle={Adapting to Continuous Covariate Shift via Online Density Ratio Estimation},\nauthor={Yu-Jie Zhang and Zhen-Yu Zhang and Peng Zhao and Masashi Sugiyama},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ad3JNoR2np}\n}", "github": "", "project": "", "reviewers": "ZW3t;KTsd;Ls7D;UHuD", "pdf_size": 1576420, "rating": "5;7;7;7", "confidence": "3;3;4;4", "soundness": "3;3;4;3", "novelty": "3;3;2;3", "presentation": "3;3;3;4", "wc_summary": "106;73;241;103", "wc_strengths": "15;59;50;33", "wc_weaknesses": "187;50;156;189", "wc_questions": "39;36;71;25", "wc_limitations": "7;18;11;10", "wc_review": "354;236;529;360", "wc_reply_reviewers": "0;0;0;22", "wc_reply_authors": "0;0;0;21", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 130.75, 64.94757501246679 ], "wc_strengths_avg": [ 39.25, 16.82817577754642 ], "wc_weaknesses_avg": [ 145.5, 56.667892143611624 ], "wc_questions_avg": [ 42.75, 17.122718826167766 ], "wc_limitations_avg": [ 11.5, 4.031128874149275 ], "wc_review_avg": [ 369.75, 104.39438442751603 ], "wc_reply_reviewers_avg": [ 5.5, 9.526279441628825 ], "wc_reply_authors_avg": [ 5.25, 9.093266739736606 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11527331244417247060&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "u-tokyo.ac.jp;nju.edu.cn;;u-tokyo.ac.jp", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Tokyo;Nanjing University", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.nju.edu.cn", "aff_unique_abbr": "UTokyo;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Japan;China" }, { "title": "Tree Variational Autoencoders", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71188", "id": "adq0oXb9KM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ac58b418745b3e5f10c80110c963969f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=adq0oXb9KM", "openreview": "https://openreview.net/forum?id=adq0oXb9KM", "poster": "/media/PosterPDFs/NeurIPS%202023/71188.png?t=1701949928.1476247", "slides": "https://nips.cc/virtual/2023/poster/71188", "video": "https://nips.cc/virtual/2023/poster/71188", "author_site": "Laura Manduchi, Moritz Vandenhirtz, Alain Ryser, Julia Vogt", "tldr": "", "abstract": "We propose Tree Variational Autoencoder (TreeVAE), a new generative hierarchical clustering model\n that learns a flexible tree-based posterior distribution over latent variables. TreeVAE hierarchically divides samples according to their intrinsic characteristics, shedding light on hidden structures in the data. It adapts its architecture to discover the optimal tree for encoding dependencies between latent variables. The proposed tree-based generative architecture enables lightweight conditional inference and improves generative performance by utilizing specialized leaf decoders. \n We show that TreeVAE uncovers underlying clusters in the data and finds meaningful hierarchical relations between the different groups on a variety of datasets, including real-world imaging data. \n We present empirically that TreeVAE provides a more competitive log-likelihood lower bound than the sequential counterparts. \n Finally, due to its generative nature, TreeVAE is able to generate new samples from the discovered clusters via conditional sampling.", "keywords": "hierarchical clustering;hierarchical VAE;representation learning;VAE;deep clustering", "primary_area": "", "supplementary_material": "", "author": "Laura Manduchi;Moritz Vandenhirtz;Alain Ryser;Julia E Vogt", "authorids": "~Laura_Manduchi2;~Moritz_Vandenhirtz1;~Alain_Ryser1;~Julia_E_Vogt1", "gender": "F;M;M;F", "homepage": "https://mds.inf.ethz.ch/team/detail/laura-manduchi/;;https://mds.inf.ethz.ch/team/detail/alain-ryser;http://mds.inf.ethz.ch", "dblp": "249/9257;;230/3590;13/8412", "google_scholar": ";H2cG0BwAAAAJ;https://scholar.google.ch/citations?user=l9tQ2agAAAAJ;UoeV-8kAAAAJ", "orcid": ";;;", "linkedin": ";moritz-simon-vandenhirtz-488b0b16b/;alain-r-0554441b5/;julia-vogt-50b53895", "or_profile": "~Laura_Manduchi2;~Moritz_Vandenhirtz1;~Alain_Ryser1;~Julia_E_Vogt1", "aff": "Swiss Federal Institute of Technology;ETHZ - ETH Zurich;ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nmanduchi2023tree,\ntitle={Tree Variational Autoencoders},\nauthor={Laura Manduchi and Moritz Vandenhirtz and Alain Ryser and Julia E Vogt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=adq0oXb9KM}\n}", "github": "", "project": "", "reviewers": "NuUb;bo43;hJ8G;UPHe;822K", "pdf_size": 5865244, "rating": "6;7;7;7;8", "confidence": "3;3;5;4;4", "soundness": "3;3;4;3;3", "novelty": "3;3;3;3;4", "presentation": "3;3;4;3;4", "wc_summary": "154;117;92;91;121", "wc_strengths": "131;219;48;131;73", "wc_weaknesses": "172;278;68;325;85", "wc_questions": "169;9;60;88;12", "wc_limitations": "2;17;51;7;51", "wc_review": "628;640;319;642;342", "wc_reply_reviewers": "0;20;0;67;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;0;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 0.6324555320336759 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 115.0, 23.09112383579457 ], "wc_strengths_avg": [ 120.4, 59.05116425609237 ], "wc_weaknesses_avg": [ 185.6, 102.08349523796684 ], "wc_questions_avg": [ 67.6, 58.789795032811604 ], "wc_limitations_avg": [ 25.6, 21.29413064672986 ], "wc_review_avg": [ 514.2, 150.24300316487287 ], "wc_reply_reviewers_avg": [ 17.4, 25.981531902487966 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.42257712736425823, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1181005415403954482&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Swiss Federal Institute of Technology;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "RanPAC: Random Projections and Pre-trained Models for Continual Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71187", "id": "aec58UfBzA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2793dc35e14003dd367684d93d236847-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aec58UfBzA", "openreview": "https://openreview.net/forum?id=aec58UfBzA", "poster": "/media/PosterPDFs/NeurIPS%202023/71187.png?t=1699612517.6242101", "slides": "https://nips.cc/virtual/2023/poster/71187", "video": "https://nips.cc/virtual/2023/poster/71187", "author_site": "Mark D. McDonnell, Dong Gong, Amin Parvaneh, Ehsan Abbasnejad, Anton van den Hengel", "tldr": "", "abstract": "Continual learning (CL) aims to incrementally learn different tasks (such as classification) in a non-stationary data stream without forgetting old ones. Most CL works focus on tackling catastrophic forgetting under a learning-from-scratch paradigm. However, with the increasing prominence of foundation models, pre-trained models equipped with informative representations have become available for various downstream requirements. Several CL methods based on pre-trained models have been explored, either utilizing pre-extracted features directly (which makes bridging distribution gaps challenging) or incorporating adaptors (which may be subject to forgetting). In this paper, we propose a concise and effective approach for CL with pre-trained models. Given that forgetting occurs during parameter updating, we contemplate an alternative approach that exploits training-free random projectors and class-prototype accumulation, which thus bypasses the issue. Specifically, we inject a frozen Random Projection layer with nonlinear activation between the pre-trained model's feature representations and output head, which captures interactions between features with expanded dimensionality, providing enhanced linear separability for class-prototype-based CL. We also demonstrate the importance of decorrelating the class-prototypes to reduce the distribution disparity when using pre-trained representations. These techniques prove to be effective and circumvent the problem of forgetting for both class- and domain-incremental continual learning. Compared to previous methods applied to pre-trained ViT-B/16 models, we reduce final error rates by between 20% and 62% on seven class-incremental benchmark datasets, despite not using any rehearsal memory. We conclude that the full potential of pre-trained models for simple, effective, and fast continual learning has not hitherto been fully tapped. Code is available at https://github.com/RanPAC/RanPAC.", "keywords": "continual learning;class incremental learning;domain incremental learning;pre-trained models;parameter-efficient transfer learning", "primary_area": "", "supplementary_material": "/attachment/1b0d72eaff355d84e2c5c2d374fae2972aab8466.pdf", "author": "Mark McDonnell;Dong Gong;Amin Parvaneh;Ehsan Abbasnejad;Anton van den Hengel", "authorids": "~Mark_McDonnell1;~Dong_Gong1;~Amin_Parvaneh1;~Ehsan_Abbasnejad3;~Anton_van_den_Hengel1", "gender": "M;M;M;;M", "homepage": "https://www.aurizn.co;https://donggong1.github.io;;;https://ehsanabb.github.io/", "dblp": "58/8860;125/5032;241/5249;v/AntonvandenHengel;30/11191", "google_scholar": "https://scholar.google.com.au/citations?user=29OHpnEAAAAJ;https://scholar.google.com.au/citations?user=e2u6hRoAAAAJ;6R7IG3YAAAAJ;https://scholar.google.com.au/citations?user=nMGZ2ZQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-7009-3869;0000-0002-2668-9630;0000-0003-1674-9051;0000-0003-3027-8364;", "linkedin": "mark-mcdonnell-52576212/;;amin-parvaneh/;;", "or_profile": "~Mark_McDonnell1;~Dong_Gong1;~Amin_Parvaneh1;~Anton_van_den_Hengel1;~Ehsan_M_Abbasnejad1", "aff": "University of Adelaide;University of New South Wales;The University of Adelaide;University of Adelaide;University of Adelaide", "aff_domain": "adelaide.edu.au;unsw.edu.au;adelaide.edu.au;adelaide.edu.au;adelaide.edu.au", "position": "Senior Researcher;Assistant Professor;Postdoc;Professor;Assistant Professor", "bibtex": "@inproceedings{\nmcdonnell2023ranpac,\ntitle={Ran{PAC}: Random Projections and Pre-trained Models for Continual Learning},\nauthor={Mark McDonnell and Dong Gong and Amin Parvaneh and Ehsan Abbasnejad and Anton van den Hengel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aec58UfBzA}\n}", "github": "", "project": "", "reviewers": "cPUm;b1mf;Q6EM;2MfY", "pdf_size": 3399327, "rating": "4;5;7;7", "confidence": "4;5;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;4;3;3", "wc_summary": "96;35;117;61", "wc_strengths": "129;49;43;83", "wc_weaknesses": "204;145;127;73", "wc_questions": "167;25;33;83", "wc_limitations": "21;1;1;6", "wc_review": "617;255;321;306", "wc_reply_reviewers": "109;158;20;20", "wc_reply_authors": "535;274;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 77.25, 31.546592525976557 ], "wc_strengths_avg": [ 76.0, 34.19064199455752 ], "wc_weaknesses_avg": [ 137.25, 46.76737644982878 ], "wc_questions_avg": [ 77.0, 56.515484603779164 ], "wc_limitations_avg": [ 7.25, 8.1967981553775 ], "wc_review_avg": [ 374.75, 141.98657506961706 ], "wc_reply_reviewers_avg": [ 76.75, 59.33538151895545 ], "wc_reply_authors_avg": [ 202.25, 222.30651699849017 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 117, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=113927828332782429&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "adelaide.edu.au;unsw.edu.au;adelaide.edu.au;adelaide.edu.au;adelaide.edu.au", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Adelaide;University of New South Wales", "aff_unique_dep": ";", "aff_unique_url": "https://www.adelaide.edu.au;https://www.unsw.edu.au", "aff_unique_abbr": "Adelaide;UNSW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Cross-Episodic Curriculum for Transformer Agents", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71186", "id": "afKnrwJBAl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/001608167bb652337af5df0129aeaabd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=afKnrwJBAl", "openreview": "https://openreview.net/forum?id=afKnrwJBAl", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71186", "video": "https://nips.cc/virtual/2023/poster/71186", "author_site": "Lucy Xiaoyang Shi, Yunfan Jiang, Jake Grigsby, Linxi Fan, Yuke Zhu", "tldr": "", "abstract": "We present a new algorithm, Cross-Episodic Curriculum (CEC), to boost the learning efficiency and generalization of Transformer agents. Central to CEC is the placement of cross-episodic experiences into a Transformer\u2019s context, which forms the basis of a curriculum. By sequentially structuring online learning trials and mixed-quality demonstrations, CEC constructs curricula that encapsulate learning progression and proficiency increase across episodes. Such synergy combined with the potent pattern recognition capabilities of Transformer models delivers a powerful cross-episodic attention mechanism. The effectiveness of CEC is demonstrated under two representative scenarios: one involving multi-task reinforcement learning with discrete control, such as in DeepMind Lab, where the curriculum captures the learning progression in both individual and progressively complex settings; and the other involving imitation learning with mixed-quality data for continuous control, as seen in RoboMimic, where the curriculum captures the improvement in demonstrators' expertise. In all instances, policies resulting from CEC exhibit superior performance and strong generalization. Code is open-sourced on the project website https://cec-agent.github.io/ to facilitate research on Transformer agent learning.", "keywords": "Transformers;In-context Learning;Reinforcement Learning;Robotics", "primary_area": "", "supplementary_material": "/attachment/4f1d3e38a3d5a8d7bfdb25dfa72b1419f5918a3e.pdf", "author": "Lucy Xiaoyang Shi;Yunfan Jiang;Jake Grigsby;Linxi Fan;Yuke Zhu", "authorids": "~Lucy_Xiaoyang_Shi1;~Yunfan_Jiang1;~Jake_Grigsby1;~Linxi_Fan2;~Yuke_Zhu1", "gender": "F;M;M;;M", "homepage": "https://lucys0.github.io/;https://yunfanj.com/;https://github.com/jakegrigsby;;https://cs.utexas.edu/~yukez/", "dblp": "324/5129;311/5581-1;276/6109;154/6778;133/1772", "google_scholar": ";https://scholar.google.com/citations?hl=en;qgUe3jYAAAAJ;sljtWIUAAAAJ;mWGyYMsAAAAJ", "orcid": ";;;;", "linkedin": "lucy-xiaoyang-shi/;;;;", "or_profile": "~Lucy_Xiaoyang_Shi1;~Yunfan_Jiang1;~Jake_Grigsby1;~Linxi_Fan2;~Yuke_Zhu1", "aff": "University of Southern California;Stanford University;University of Texas at Austin;NVIDIA;Computer Science Department, University of Texas, Austin", "aff_domain": "usc.edu;cs.stanford.edu;cs.utexas.edu;nvidia.com;cs.utexas.edu", "position": "Undergrad student;PhD student;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nshi2023crossepisodic,\ntitle={Cross-Episodic Curriculum for Transformer Agents},\nauthor={Lucy Xiaoyang Shi and Yunfan Jiang and Jake Grigsby and Linxi Fan and Yuke Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=afKnrwJBAl}\n}", "github": "", "project": "", "reviewers": "3Crt;btkb;xTZL;hTvj", "pdf_size": 1528653, "rating": "4;5;5;6", "confidence": "3;3;3;5", "soundness": "2;3;2;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "45;73;23;166", "wc_strengths": "45;32;43;164", "wc_weaknesses": "0;138;194;157", "wc_questions": "286;78;20;194", "wc_limitations": "32;14;45;7", "wc_review": "408;335;325;688", "wc_reply_reviewers": "116;87;84;25", "wc_reply_authors": "249;811;42;42", "reply_reviewers": "1;2;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 76.75, 54.49025142169928 ], "wc_strengths_avg": [ 71.0, 53.92123885817165 ], "wc_weaknesses_avg": [ 122.25, 73.39746249019785 ], "wc_questions_avg": [ 144.5, 102.95023069425342 ], "wc_limitations_avg": [ 24.5, 14.941552797483935 ], "wc_review_avg": [ 439.0, 147.28713453659148 ], "wc_reply_reviewers_avg": [ 78.0, 33.05298776207682 ], "wc_reply_authors_avg": [ 286.0, 314.66887357983154 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7494896122808056043&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "usc.edu;cs.stanford.edu;cs.utexas.edu;nvidia.com;cs.utexas.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "University of Southern California;Stanford University;University of Texas at Austin;NVIDIA", "aff_unique_dep": ";;;NVIDIA Corporation", "aff_unique_url": "https://www.usc.edu;https://www.stanford.edu;https://www.utexas.edu;https://www.nvidia.com", "aff_unique_abbr": "USC;Stanford;UT Austin;NVIDIA", "aff_campus_unique_index": "0;1;2;2", "aff_campus_unique": "Los Angeles;Stanford;Austin;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Mixtures of Gaussians Using the DDPM Objective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71185", "id": "aig7sgdRfI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3ec077b4af90f2556b517b556e186f64-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aig7sgdRfI", "openreview": "https://openreview.net/forum?id=aig7sgdRfI", "poster": "/media/PosterPDFs/NeurIPS%202023/71185.png?t=1702171031.5169466", "slides": "https://nips.cc/virtual/2023/poster/71185", "video": "https://nips.cc/virtual/2023/poster/71185", "author_site": "Kulin Shah, Sitan Chen, Adam Klivans", "tldr": "", "abstract": "Recent works have shown that diffusion models can learn essentially any distribution provided one can perform score estimation.\nYet it remains poorly understood under what settings score estimation is possible, let alone when practical gradient-based algorithms for this task can provably succeed. \n\nIn this work, we give the first provably efficient results for one of the most fundamental distribution families, Gaussian mixture models.\nWe prove that GD on the denoising diffusion probabilistic model (DDPM) objective can efficiently recover the ground truth parameters of the mixture model in the following two settings:\n1. We show GD with random initialization learns mixtures of two spherical Gaussians in $d$ dimensions with $1/\\text{poly}(d)$-separated centers.\n2. We show GD with a warm start learns mixtures of $K$ spherical Gaussians with $\\Omega(\\sqrt{\\log(\\min(K,d))})$-separated centers.\n\nA key ingredient in our proofs is a new connection between score-based methods and two other approaches to distribution learning, EM and spectral methods.", "keywords": "Mixtures of Gaussians;score-based generative models;provable learning of score;Expectation-Maximization;DDPM generative model", "primary_area": "", "supplementary_material": "/attachment/28da383cea09406d4a021e049748ceb8241d92a9.pdf", "author": "Kulin Shah;Sitan Chen;Adam Klivans", "authorids": "~Kulin_Shah1;~Sitan_Chen1;~Adam_Klivans1", "gender": "M;M;M", "homepage": "https://kulinshah98.github.io/;https://sitanchen.com;http://www.cs.utexas.edu/~klivans", "dblp": "215/3581;141/7670;k/AdamRKlivans", "google_scholar": "https://scholar.google.co.in/citations?user=67OmLg4AAAAJ;YnJVsp4AAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Kulin_Shah1;~Sitan_Chen1;~Adam_Klivans1", "aff": "University of Texas, Austin;University of California, Berkeley;University of Texas, Austin", "aff_domain": "cs.utexas.edu;berkeley.edu;cs.utexas.edu", "position": "PhD student;Postdoc;Professor", "bibtex": "@inproceedings{\nshah2023learning,\ntitle={Learning Mixtures of Gaussians Using the {DDPM} Objective},\nauthor={Kulin Shah and Sitan Chen and Adam Klivans},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aig7sgdRfI}\n}", "github": "", "project": "", "reviewers": "6LCQ;R4kZ;Egk5;NKbJ", "pdf_size": 578761, "rating": "3;3;3;7", "confidence": "2;3;2;4", "soundness": "1;2;3;3", "novelty": "2;2;3;3", "presentation": "3;1;1;3", "wc_summary": "36;87;69;70", "wc_strengths": "19;42;111;117", "wc_weaknesses": "165;226;723;248", "wc_questions": "25;69;65;5", "wc_limitations": "1;54;23;5", "wc_review": "246;478;991;445", "wc_reply_reviewers": "0;856;248;28", "wc_reply_authors": "0;795;0;0", "reply_reviewers": "0;2;1;1", "reply_authors": "1;3;1;1", "rating_avg": [ 4.0, 1.7320508075688772 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 1.0 ], "wc_summary_avg": [ 65.5, 18.472953201911167 ], "wc_strengths_avg": [ 72.25, 42.587410111440214 ], "wc_weaknesses_avg": [ 340.5, 222.91982863801059 ], "wc_questions_avg": [ 41.0, 26.981475126464083 ], "wc_limitations_avg": [ 20.75, 20.90902915010642 ], "wc_review_avg": [ 540.0, 275.0936204276646 ], "wc_reply_reviewers_avg": [ 283.0, 344.48076869398676 ], "wc_reply_authors_avg": [ 198.75, 344.24509800431434 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8085622038896580338&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "cs.utexas.edu;berkeley.edu;cs.utexas.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Texas at Austin;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.berkeley.edu", "aff_unique_abbr": "UT Austin;UC Berkeley", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Austin;Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Improving Robustness with Adaptive Weight Decay", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71184", "id": "ajnThDhuq6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f9d7d6c695bc983fcfb5b70a5fbdfd2f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ajnThDhuq6", "openreview": "https://openreview.net/forum?id=ajnThDhuq6", "poster": "/media/PosterPDFs/NeurIPS%202023/71184.png?t=1699850303.5004692", "slides": "https://nips.cc/virtual/2023/poster/71184", "video": "https://nips.cc/virtual/2023/poster/71184", "author_site": "Mohammad Amin Ghiasi, Ali Shafahi, Reza Ardekani", "tldr": "", "abstract": "We propose adaptive weight decay, which automatically tunes the hyper-parameter for weight decay during each training iteration. For classification problems, we propose changing the value of the weight decay hyper-parameter on the fly based on the strength of updates from the classification loss (i.e., gradient of cross-entropy), and the regularization loss (i.e., $\\ell_2$-norm of the weights). We show that this simple modification can result in large improvements in adversarial robustness \u2014 an area which suffers from robust overfitting \u2014 without requiring extra data accros various datasets and architecture choices. For example, our reformulation results in 20\\% relative robustness improvement for CIFAR-100, and 10\\% relative robustness improvement on CIFAR-10 comparing to the best tuned hyper-parameters of traditional weight decay resulting in models that have comparable performance to SOTA robustness methods. In addition, this method has other desirable properties, such as less sensitivity to learning rate, and smaller weight norms, which the latter contributes to robustness to overfitting to label noise, and pruning.", "keywords": "Adaptive weight decay;adversarial robustness;weight decay;robust overfitting;overfitting;adversarial attacks;noisy label", "primary_area": "", "supplementary_material": "/attachment/c1abab114346ae624dd02f20ca0db5cb301778b1.pdf", "author": "Amin Ghiasi;Ali Shafahi;Reza Ardekani", "authorids": "~Amin_Ghiasi1;~Ali_Shafahi1;~Reza_Ardekani1", "gender": "M;M;M", "homepage": "http://cs.umd.edu/~amin;;", "dblp": "239/8313;136/0235;", "google_scholar": "tNQWOxUAAAAJ;5Jnk00MAAAAJ;5OZTNTQAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Amin_Ghiasi1;~Ali_Shafahi1;~Reza_Ardekani1", "aff": "Apple;Apple;Apple", "aff_domain": "apple.com;apple.com;apple.com", "position": "Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nghiasi2023improving,\ntitle={Improving Robustness with Adaptive Weight Decay},\nauthor={Amin Ghiasi and Ali Shafahi and Reza Ardekani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ajnThDhuq6}\n}", "github": "", "project": "", "reviewers": "61Q2;pbDj;3gZW;N3sV;c9pV", "pdf_size": 1270635, "rating": "4;5;6;6;6", "confidence": "4;3;3;5;4", "soundness": "2;4;2;3;3", "novelty": "2;3;2;3;3", "presentation": "2;4;2;4;3", "wc_summary": "44;43;78;50;91", "wc_strengths": "38;29;15;24;32", "wc_weaknesses": "48;87;45;106;69", "wc_questions": "282;2;354;20;181", "wc_limitations": "15;2;1;6;1", "wc_review": "427;163;493;206;374", "wc_reply_reviewers": "187;167;36;25;26", "wc_reply_authors": "495;1067;0;0;0", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;3;1;1;1", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 61.2, 19.610201426808448 ], "wc_strengths_avg": [ 27.6, 7.761443164772903 ], "wc_weaknesses_avg": [ 71.0, 23.194827009486403 ], "wc_questions_avg": [ 167.8, 139.44231782353592 ], "wc_limitations_avg": [ 5.0, 5.329165037789691 ], "wc_review_avg": [ 332.6, 127.39324942868832 ], "wc_reply_reviewers_avg": [ 88.2, 72.88182215065702 ], "wc_reply_authors_avg": [ 312.4, 423.21275973202887 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.1336306209562122, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8589096077289440313&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 6, "email": "apple.com;apple.com;apple.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Apple", "aff_unique_dep": "Apple Inc.", "aff_unique_url": "https://www.apple.com", "aff_unique_abbr": "Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Decompose a Task into Generalizable Subtasks in Multi-Agent Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71183", "id": "aky0dKv9ip", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f7d3cef7ff579f2f903c8f458e730cae-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aky0dKv9ip", "openreview": "https://openreview.net/forum?id=aky0dKv9ip", "poster": "/media/PosterPDFs/NeurIPS%202023/71183.png?t=1699496818.5496845", "slides": "https://nips.cc/virtual/2023/poster/71183", "video": "https://nips.cc/virtual/2023/poster/71183", "author_site": "Zikang Tian, Ruizhi Chen, Xing Hu, Ling Li, Rui Zhang, Fan Wu, Shaohui Peng, Jiaming Guo, Zidong Du, Qi Guo, Yunji Chen", "tldr": "", "abstract": "In recent years, Multi-Agent Reinforcement Learning (MARL) techniques have made significant strides in achieving high asymptotic performance in single task. However, there has been limited exploration of model transferability across tasks. Training a model from scratch for each task can be time-consuming and expensive, especially for large-scale Multi-Agent Systems. Therefore, it is crucial to develop methods for generalizing the model across tasks. Considering that there exist task-independent subtasks across MARL tasks, a model that can decompose such subtasks from the source task could generalize to target tasks. However, ensuring true task-independence of subtasks poses a challenge. In this paper, we propose to \\textbf{d}ecompose a \\textbf{t}ask in\\textbf{to} a series of \\textbf{g}eneralizable \\textbf{s}ubtasks (DT2GS), a novel framework that addresses this challenge by utilizing a scalable subtask encoder and an adaptive subtask semantic module. We show that these components endow subtasks with two properties critical for task-independence: avoiding overfitting to the source task and maintaining consistent yet scalable semantics across tasks. Empirical results demonstrate that DT2GS possesses sound zero-shot generalization capability across tasks, exhibits sufficient transferability, and outperforms existing methods in both multi-task and single-task problems.", "keywords": "Multi-Agent Reinforcement Learning;Transfer Learning;Zero-Shot Generalization", "primary_area": "", "supplementary_material": "/attachment/8d55052a305406d9ba080f70cbfa0a8956261b2f.zip", "author": "Zikang Tian;Ruizhi Chen;Xing Hu;Ling Li;Rui Zhang;Fan Wu;Shaohui Peng;Jiaming Guo;Zidong Du;Qi Guo;Yunji Chen", "authorids": "~Zikang_Tian1;~Ruizhi_Chen3;~Xing_Hu3;~Ling_Li6;~Rui_Zhang1;~Fan_Wu11;~Shaohui_Peng2;~Jiaming_Guo2;~Zidong_Du1;~Qi_Guo4;~Yunji_Chen1", "gender": "M;M;F;F;F;M;M;;M;M;M", "homepage": ";;;;;http://fanwu.academic.site/;;https://zidongdu.github.io/;http://novel.ict.ac.cn/qguo;;", "dblp": ";120/4143;49/10052-1;92/5001-1;60/2536-40;;63/8512;44/11216;67/398-1;48/474;246/8768", "google_scholar": "JcxwjJcAAAAJ;x_wFaYgAAAAJ;Hc3iRxUAAAAJ;;dse6jAsAAAAJ;;;https://scholar.google.com.sg/citations?user=8N9ym9YAAAAJ;;;", "orcid": "0000-0003-2356-3925;0000-0001-7219-4658;;0000-0001-8877-9052;;;;0000-0002-7603-4210;;;", "linkedin": ";;;;;;;;;;", "or_profile": "~Zikang_Tian1;~Ruizhi_Chen3;~Xing_Hu3;~Ling_Li6;~Rui_Zhang1;~Fan_Wu11;~Jiaming_Guo2;~Zidong_Du1;~Qi_Guo4;~Yunji_Chen1;~shaohui_peng1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Software Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Software, CAS;Institute of Computing Technology, CAS;University of Chinese Academy of Sciences, Tsinghua University;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;iscas.ac.cn;ict.ac.cn;iscas.ac.cn;ict.ac.cn;ucas.edu.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "MS student;Assistant Professor;Associate Professor;Full Professor;Assistant Professor;PhD student;PhD student;Full Professor;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\ntian2023decompose,\ntitle={Decompose a Task into Generalizable Subtasks in Multi-Agent Reinforcement Learning},\nauthor={Zikang Tian and Ruizhi Chen and Xing Hu and Ling Li and Rui Zhang and Fan Wu and Shaohui Peng and Jiaming Guo and Zidong Du and Qi Guo and Yunji Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aky0dKv9ip}\n}", "github": "", "project": "", "reviewers": "NoDQ;5oAQ;i2DM;McXb", "pdf_size": 7301302, "rating": "4;5;7;7", "confidence": "3;4;4;3", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;1;3;3", "wc_summary": "120;63;52;168", "wc_strengths": "77;123;69;43", "wc_weaknesses": "259;450;67;60", "wc_questions": "127;223;167;53", "wc_limitations": "34;113;6;29", "wc_review": "617;972;361;353", "wc_reply_reviewers": "46;162;25;23", "wc_reply_authors": "259;768;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "3;5;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 100.75, 46.622821664931436 ], "wc_strengths_avg": [ 78.0, 28.861739379323623 ], "wc_weaknesses_avg": [ 209.0, 160.4259953997481 ], "wc_questions_avg": [ 142.5, 61.90920771581558 ], "wc_limitations_avg": [ 45.5, 40.37635446644484 ], "wc_review_avg": [ 575.75, 252.21555760896274 ], "wc_reply_reviewers_avg": [ 64.0, 57.29310604252487 ], "wc_reply_authors_avg": [ 256.75, 313.5373781545033 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8511968762257632089&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ict.ac.cn;iscas.ac.cn;ict.ac.cn;iscas.ac.cn;ict.ac.cn;ucas.edu.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "author_num": 11, "aff_unique_index": "0;0;0;0;0;1;0;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Factorized Contrastive Learning: Going Beyond Multi-view Redundancy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71182", "id": "alLs7EtRJP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6818dcc65fdf3cbd4b05770fb957803e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=alLs7EtRJP", "openreview": "https://openreview.net/forum?id=alLs7EtRJP", "poster": "/media/PosterPDFs/NeurIPS%202023/71182.png?t=1701831219.015507", "slides": "https://nips.cc/virtual/2023/poster/71182", "video": "https://nips.cc/virtual/2023/poster/71182", "author_site": "Paul Pu Liang, Zihao Deng, Martin Q. Ma, James Zou, Louis-Philippe Morency, Ruslan Salakhutdinov", "tldr": "", "abstract": "In a wide range of multimodal tasks, contrastive learning has become a particularly appealing approach since it can successfully learn representations from abundant unlabeled data with only pairing information (e.g., image-caption or video-audio pairs). Underpinning these approaches is the assumption of multi-view redundancy - that shared information between modalities is necessary and sufficient for downstream tasks. However, in many real-world settings, task-relevant information is also contained in modality-unique regions: information that is only present in one modality but still relevant to the task. How can we learn self-supervised multimodal representations to capture both shared and unique information relevant to downstream tasks? This paper proposes FactorCL, a new multimodal representation learning method to go beyond multi-view redundancy. FactorCL is built from three new contributions: (1) factorizing task-relevant information into shared and unique representations, (2) capturing task-relevant information via maximizing MI lower bounds and removing task-irrelevant information via minimizing MI upper bounds, and (3) multimodal data augmentations to approximate task relevance without labels. On large-scale real-world datasets, FactorCL captures both shared and unique information and achieves state-of-the-art results on six benchmarks.", "keywords": "multimodal learning;contrastive learning;self-supervised learning;information theory", "primary_area": "", "supplementary_material": "/attachment/4fbd98fb741f9a1d71d6dfef9f8c8246377b85c1.zip", "author": "Paul Pu Liang;Zihao Deng;Martin Q. Ma;James Zou;Louis-Philippe Morency;Russ Salakhutdinov", "authorids": "~Paul_Pu_Liang1;~Zihao_Deng2;~Martin_Q._Ma1;~James_Zou1;~Louis-Philippe_Morency1;~Russ_Salakhutdinov1", "gender": "M;M;;M;M;M", "homepage": "https://pliang279.github.io/;;;https://www.cs.cmu.edu/~morency/;https://www.cs.cmu.edu/~rsalakhu/;http://www.cs.cmu.edu/~qianlim/", "dblp": "207/9749;;;31/739;;251/5669.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;23ZXZvEAAAAJ;https://scholar.google.com.tw/citations?user=APgaFK0AAAAJ;;TFCtuaQAAAAJ", "orcid": ";;;0000-0001-6376-7696;;", "linkedin": ";https://www.linkedin.cn/in/zihao-deng-32b1401b5;;morency?challengeId=AQELGK_OvMa0vwAAAY72L-VV4X9hW8juuY80VHVeeSGHZ1PJHeeEa5LTFoeTmDGU0t1OL07MXJTYC9EAi6qgPDd2z9ztnbdFYA&submissionId=09a0ff34-04ac-c717-bef7-8c9c8811b463&challengeSource=AgFhxWkU3q7v4wAAAY72L-1xRE0eG-BnZUNE9e3eAG95pgOCZ9u1nxEg-1dK2Dw&challegeType=AgHMzV0lqKgEFwAAAY72L-11X6DHMd3V_A3Iur8XZeyYF2-oBzoufs8&memberId=AgH4yz7pZ_riCgAAAY72L-146jmR2pdr3dmhy2icxBtEQzQ&recognizeDevice=AgFDCNyrhKiFSAAAAY72L-16m7z2EH2t0ueWmMKjyk1_ZJAkfFVe;;", "or_profile": "~Paul_Pu_Liang1;~Zihao_Deng2;~James_Zou1;~Louis-Philippe_Morency1;~Russ_Salakhutdinov1;~Martin_Ma2", "aff": "Carnegie Mellon University;University of Pennsylvania;Stanford University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Meta Platforms, Inc.", "aff_domain": "cs.cmu.edu;seas.upenn.edu;stanford.edu;cmu.edu;cs.cmu.edu;meta.com", "position": "PhD student;MS student;Assistant Professor;Associate Professor;Full Professor;Intern", "bibtex": "@inproceedings{\nliang2023factorized,\ntitle={Factorized Contrastive Learning: Going Beyond Multi-view Redundancy},\nauthor={Paul Pu Liang and Zihao Deng and Martin Q. Ma and James Zou and Louis-Philippe Morency and Russ Salakhutdinov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=alLs7EtRJP}\n}", "github": "", "project": "", "reviewers": "4hLy;N8Dx;859W;fNpT;bfCW", "pdf_size": 2736221, "rating": "4;6;6;6;7", "confidence": "4;3;4;4;4", "soundness": "3;3;3;2;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "112;99;117;185;51", "wc_strengths": "43;90;102;86;61", "wc_weaknesses": "68;179;112;547;106", "wc_questions": "20;6;155;177;2", "wc_limitations": "1;31;27;7;1", "wc_review": "244;405;513;1002;221", "wc_reply_reviewers": "0;34;49;492;13", "wc_reply_authors": "0;0;335;1083;0", "reply_reviewers": "0;1;1;3;1", "reply_authors": "1;1;2;3;1", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 112.8, 42.99023144855119 ], "wc_strengths_avg": [ 76.4, 21.378493866500513 ], "wc_weaknesses_avg": [ 202.4, 175.96886088169123 ], "wc_questions_avg": [ 72.0, 77.29683046542077 ], "wc_limitations_avg": [ 13.4, 12.986146464598342 ], "wc_review_avg": [ 477.0, 283.5242494038208 ], "wc_reply_reviewers_avg": [ 117.6, 187.95808043284543 ], "wc_reply_authors_avg": [ 283.6, 420.2306985454537 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.10206207261596574, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15785607661032642666&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cs.cmu.edu;seas.upenn.edu;stanford.edu;cmu.edu;cs.cmu.edu;meta.com", "author_num": 6, "aff_unique_index": "0;1;2;0;0;3", "aff_unique_norm": "Carnegie Mellon University;University of Pennsylvania;Stanford University;Meta", "aff_unique_dep": ";;;Meta Platforms, Inc.", "aff_unique_url": "https://www.cmu.edu;https://www.upenn.edu;https://www.stanford.edu;https://www.meta.com", "aff_unique_abbr": "CMU;UPenn;Stanford;Meta", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "FlowCam: Training Generalizable 3D Radiance Fields without Camera Poses via Pixel-Aligned Scene Flow", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71181", "id": "apFDDJOYf5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0534abc9e6db91683d82186ef0d68202-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=apFDDJOYf5", "openreview": "https://openreview.net/forum?id=apFDDJOYf5", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71181", "video": "https://nips.cc/virtual/2023/poster/71181", "author_site": "Cameron Smith, Yilun Du, Ayush Tewari, Vincent Sitzmann", "tldr": "", "abstract": "Reconstruction of 3D neural fields from posed images has emerged as a promising method for self-supervised representation learning. The key challenge preventing the deployment of these 3D scene learners on large-scale video data is their dependence on precise camera poses from structure-from-motion, which is prohibitively expensive to run at scale. We propose a method that jointly reconstructs camera poses and 3D neural scene representations online and in a single forward pass. We estimate poses by first lifting frame-to-frame optical flow to 3D scene flow via differentiable rendering, preserving locality and shift-equivariance of the image processing backbone. SE(3) camera pose estimation is then performed via a weighted least-squares fit to the scene flow field. This formulation enables us to jointly supervise pose estimation and a generalizable neural scene representation via re-rendering the input video, and thus, train end-to-end and fully self-supervised on real-world video datasets. We demonstrate that our method performs robustly on diverse, real-world video, notably on sequences traditionally challenging to optimization-based pose estimation techniques.", "keywords": "Pose Estimation;Scene Flow Estimation;Scene Representation Learning;Computer Vision;Neural Implicit Representations;Neural Radiance Fields;View Synthesis;Self-Supervised Representation Learning", "primary_area": "", "supplementary_material": "/attachment/8ac098430378dce09fed3111900e279adf43c626.zip", "author": "Cameron Omid Smith;Yilun Du;Ayush Tewari;Vincent Sitzmann", "authorids": "~Cameron_Omid_Smith1;~Yilun_Du1;~Ayush_Tewari2;~Vincent_Sitzmann1", "gender": ";;;M", "homepage": ";https://yilundu.github.io;https://ayushtewari.com;https://vsitzmann.github.io", "dblp": ";204/4379;198/1021;192/1958", "google_scholar": ";;pDnzpeoAAAAJ;X44QVV4AAAAJ", "orcid": ";;;0000-0002-0107-5704", "linkedin": ";;;vincentsitzmann/", "or_profile": "~Cameron_Omid_Smith1;~Yilun_Du1;~Ayush_Tewari2;~Vincent_Sitzmann1", "aff": ";Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": ";mit.edu;mit.edu;mit.edu", "position": ";PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nsmith2023flowcam,\ntitle={FlowCam: Training Generalizable 3D Radiance Fields without Camera Poses via Pixel-Aligned Scene Flow},\nauthor={Cameron Omid Smith and Yilun Du and Ayush Tewari and Vincent Sitzmann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=apFDDJOYf5}\n}", "github": "", "project": "", "reviewers": "3ZaF;52at;oZ25;kV1r", "pdf_size": 44812305, "rating": "5;6;6;7", "confidence": "5;5;4;4", "soundness": "3;2;3;3", "novelty": "3;2;3;4", "presentation": "3;3;3;3", "wc_summary": "150;78;56;46", "wc_strengths": "30;32;132;68", "wc_weaknesses": "72;770;166;47", "wc_questions": "95;29;19;37", "wc_limitations": "27;1;5;18", "wc_review": "374;910;378;216", "wc_reply_reviewers": "15;42;0;16", "wc_reply_authors": "0;128;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.5, 40.65402809070707 ], "wc_strengths_avg": [ 65.5, 41.26439142893059 ], "wc_weaknesses_avg": [ 263.75, 295.6318445296447 ], "wc_questions_avg": [ 45.0, 29.563490998188964 ], "wc_limitations_avg": [ 12.75, 10.353139620424328 ], "wc_review_avg": [ 469.5, 262.58093990234704 ], "wc_reply_reviewers_avg": [ 18.25, 15.105876340020794 ], "wc_reply_authors_avg": [ 32.0, 55.42562584220407 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7641547776618309101&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";mit.edu;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "LICO: Explainable Models with Language-Image COnsistency", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71180", "id": "apjOYp3mOa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c2eac51b6353a4441e8b7426f8e8db78-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=apjOYp3mOa", "openreview": "https://openreview.net/forum?id=apjOYp3mOa", "poster": "/media/PosterPDFs/NeurIPS%202023/71180.png?t=1697358895.952961", "slides": "https://nips.cc/virtual/2023/poster/71180", "video": "https://nips.cc/virtual/2023/poster/71180", "author_site": "Yiming Lei, Zilong Li, Yangyang Li, Junping Zhang, Hongming Shan", "tldr": "", "abstract": "Interpreting the decisions of deep learning models has been actively studied since the explosion of deep neural networks. One of the most convincing interpretation approaches is salience-based visual interpretation, such as Grad-CAM, where the generation of attention maps depends merely on categorical labels. Although existing interpretation methods can provide explainable decision clues, they often yield partial correspondence between image and saliency maps due to the limited discriminative information from one-hot labels. This paper develops a Language-Image COnsistency model for explainable image classification, termed LICO, by correlating learnable linguistic prompts with corresponding visual features in a coarse-to-fine manner. Specifically, we first establish a coarse global manifold structure alignment by minimizing the distance between the distributions of image and language features. We then achieve fine-grained saliency maps by applying optimal transport (OT) theory to assign local feature maps with class-specific prompts. Extensive experimental results on eight benchmark datasets demonstrate that the proposed LICO achieves a significant improvement in generating more explainable attention maps in conjunction with existing interpretation methods such as Grad-CAM. Remarkably, LICO improves the classification performance of existing models without introducing any computational overhead during inference.", "keywords": "Language-image consistency;prompt learning;image classification;CNN interpretation", "primary_area": "", "supplementary_material": "", "author": "Yiming Lei;Zilong Li;Yangyang Li;Junping Zhang;Hongming Shan", "authorids": "~Yiming_Lei1;~Zilong_Li1;~Yangyang_Li2;~Junping_Zhang2;~Hongming_Shan1", "gender": "M;M;;M;M", "homepage": ";;http://www.math.ac.cn/people/assiprof/202204/t20220414_695676.html;http://www.pami.fudan.edu.cn;http://hmshan.io/", "dblp": ";;;02/5388.html;184/8229", "google_scholar": "hvtuRWsAAAAJ;xriCV6QAAAAJ;;Aib_NTYAAAAJ;https://scholar.google.co.uk/citations?user=RYfSzKwAAAAJ", "orcid": "0000-0002-1349-7074;;;;0000-0002-0604-3197", "linkedin": ";;;;", "or_profile": "~Yiming_Lei1;~Zilong_Li1;~Yangyang_Li2;~Junping_Zhang2;~Hongming_Shan1", "aff": "Fudan University;Fudan University;;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;;fudan.edu.cn;fudan.edu.cn", "position": "Postdoc;PhD student;;Professor;Associate Professor", "bibtex": "@inproceedings{\nlei2023lico,\ntitle={{LICO}: Explainable Models with Language-Image {CO}nsistency},\nauthor={Yiming Lei and Zilong Li and Yangyang Li and Junping Zhang and Hongming Shan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=apjOYp3mOa}\n}", "github": "", "project": "", "reviewers": "V7Pm;rFY8;r2cD;tr3q", "pdf_size": 6592312, "rating": "5;5;6;6", "confidence": "3;4;4;3", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "43;144;53;91", "wc_strengths": "42;40;72;130", "wc_weaknesses": "115;164;172;55", "wc_questions": "58;142;3;2", "wc_limitations": "52;1;1;1", "wc_review": "310;491;301;279", "wc_reply_reviewers": "216;111;0;117", "wc_reply_authors": "907;413;0;327", "reply_reviewers": "2;1;0;2", "reply_authors": "5;3;1;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.75, 39.63820757804268 ], "wc_strengths_avg": [ 71.0, 36.345563690772494 ], "wc_weaknesses_avg": [ 126.5, 46.69314724882014 ], "wc_questions_avg": [ 51.25, 57.0849148199417 ], "wc_limitations_avg": [ 13.75, 22.083647796503186 ], "wc_review_avg": [ 345.25, 84.90104534103217 ], "wc_reply_reviewers_avg": [ 111.0, 76.45586962424795 ], "wc_reply_authors_avg": [ 411.75, 324.8056149453085 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15605955504457856009&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "fudan.edu.cn;fudan.edu.cn;;fudan.edu.cn;fudan.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "argZAtDMMF", "title": "FLOP: Tasks for Fitness Landscapes Of Protein wildtypes", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Protein engineering has the potential to create optimized protein variants with improved properties and function. \nAn initial step in the protein optimization process typically consists of a search among natural (wildtype) sequences to find the naturally occurring proteins with the most desirable properties. \nPromising candidates from this initial discovery phase then form the basis of the second step: a more local optimization procedure, exploring the space of variants separated from this candidate by a number of mutations. \nWhile considerable progress has been made on evaluating machine learning methods on single protein datasets, benchmarks of data-driven approaches for global fitness landscape exploration are still lacking. \nIn this paper, we have carefully curated a representative benchmark dataset, which reflects industrially relevant scenarios for the initial wildtype discovery phase of protein engineering.\nWe focus on exploration within a protein family, and investigate the downstream predictive power of various protein representation paradigms, i.e., protein language model-based representations, structure-based representations, and evolution-based representations.\nOur benchmark highlights the importance of coherent split strategies, and how we can be misled into overly optimistic estimates of the state of the field. The codebase and data can be accessed via https://github.com/petergroth/FLOP.", "keywords": "protein engineering;enzyme engineering;representation learning;benchmark;optimization;wildtype", "primary_area": "", "supplementary_material": "/attachment/080587db78179391b8c4bafc1ba2d5507a3c3089.pdf", "author": "Peter M\u00f8rch Groth;Richard Michael;Jesper Salomon;Pengfei Tian;Wouter Boomsma", "authorids": "~Peter_M\u00f8rch_Groth1;~Richard_Michael1;~Jesper_Salomon1;~Pengfei_Tian1;~Wouter_Boomsma1", "gender": ";M;M;M;M", "homepage": ";https://laplaceml.com;;;", "dblp": ";;;;06/5945", "google_scholar": ";-U772ZIAAAAJ;;https://scholar.google.dk/citations?user=p4T8WvYAAAAJ;EwqU_jsAAAAJ", "orcid": ";0000-0002-6264-7323;0000-0003-2445-6582;;0000-0002-8257-3827", "linkedin": ";rimichael/;;;", "or_profile": "~Peter_M\u00f8rch_Groth1;~Richard_Michael1;~Jesper_Salomon1;~Pengfei_Tian1;~Wouter_Boomsma1", "aff": ";University of Copenhagen;Novozymes;;University of Copenhagen", "aff_domain": ";diku.dk;novozymes.com;;ku.dk", "position": ";PhD student;Principal Researcher;;Full Professor", "bibtex": "@misc{\ngroth2023flop,\ntitle={{FLOP}: Tasks for Fitness Landscapes Of Protein wildtypes},\nauthor={Peter M{\\o}rch Groth and Richard Michael and Jesper Salomon and Pengfei Tian and Wouter Boomsma},\nyear={2023},\nurl={https://openreview.net/forum?id=argZAtDMMF}\n}", "github": "", "project": "", "reviewers": "XdTc;SXvC;YyxM;aTwL", "site": "https://openreview.net/forum?id=argZAtDMMF", "pdf_size": 1435259, "rating": "4;5;7;7", "confidence": "3;5;2;4", "wc_summary_and_contributions": "60;56;71;102", "wc_strengths": "64;41;27;128", "wc_improvement": "271;283;29;104", "wc_limitations": "69;15;3;130", "wc_correctness": "203;14;13;23", "wc_clarity": "456;6;1;25", "wc_relation_to_prior_work": "20;5;1;26", "wc_documentation": "59;41;1;27", "wc_additional_feedback": "1;1;1;1", "wc_review": "1203;462;147;566", "wc_reply_reviewers": "0;102;7;70", "wc_reply_authors": "2503;1305;455;583", "reply_reviewers": "0;1;1;1", "reply_authors": "4;3;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "wc_summary_and_contributions_avg": [ 72.25, 18.0329559418305 ], "wc_strengths_avg": [ 65.0, 38.6975451417787 ], "wc_improvement_avg": [ 171.75, 108.62176347307201 ], "wc_limitations_avg": [ 54.25, 50.30593901320201 ], "wc_correctness_avg": [ 63.25, 80.77863269454367 ], "wc_clarity_avg": [ 122.0, 193.04274138128065 ], "wc_relation_to_prior_work_avg": [ 13.0, 10.319883720275147 ], "wc_documentation_avg": [ 32.0, 21.18962010041709 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 594.5, 383.69812352942256 ], "wc_reply_reviewers_avg": [ 44.75, 42.84492385335747 ], "wc_reply_authors_avg": [ 1211.5, 813.0219861725758 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8941149226776922501&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Copenhagen;Novozymes", "aff_unique_dep": ";", "aff_unique_url": "https://www.ku.dk;https://www.novozymes.com", "aff_unique_abbr": "UCPH;Novozymes", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "title": "Concept Distillation: Leveraging Human-Centered Explanations for Model Improvement", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71179", "id": "arkmhtYLL6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c9450295fd667740a39a68148fc17f6e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=arkmhtYLL6", "openreview": "https://openreview.net/forum?id=arkmhtYLL6", "poster": "/media/PosterPDFs/NeurIPS%202023/71179.png?t=1701103360.5710673", "slides": "https://nips.cc/virtual/2023/poster/71179", "video": "https://nips.cc/virtual/2023/poster/71179", "author_site": "Avani Gupta, Saurabh Saini, P J Narayanan", "tldr": "", "abstract": "Humans use abstract *concepts* for understanding instead of hard features. Recent interpretability research has focused on human-centered concept explanations of neural networks. Concept Activation Vectors (CAVs) estimate a model's sensitivity and possible biases to a given concept. We extend CAVs from post-hoc analysis to ante-hoc training to reduce model bias through fine-tuning using an additional *Concept Loss*. Concepts are defined on the final layer of the network in the past. We generalize it to intermediate layers, including the last convolution layer. We also introduce *Concept Distillation*, a method to define rich and effective concepts using a pre-trained knowledgeable model as the teacher. Our method can sensitize or desensitize a model towards concepts. We show applications of concept-sensitive training to debias several classification problems. We also show a way to induce prior knowledge into a reconstruction problem. We show that concept-sensitive training can improve model interpretability, reduce biases, and induce prior knowledge.", "keywords": "Human Centered Concepts;ML interpretability;XAI based Model Improvement;Debiasing", "primary_area": "", "supplementary_material": "/attachment/fefca6a49e6740d6e50a6cceb16f37bf3e3e9fd2.zip", "author": "Avani Gupta;Saurabh Saini;P J Narayanan", "authorids": "~Avani_Gupta1;~Saurabh_Saini1;~P_J_Narayanan1", "gender": "F;M;M", "homepage": "https://avani17101.github.io/;http://researchweb.iiit.ac.in/~saurabh.saini;https://www.iiit.ac.in/~pjn/", "dblp": "319/9403;184/8348;n/PJNarayanan", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?user=OSZDITwAAAAJ;https://scholar.google.com.tw/citations?user=3HKjt_IAAAAJ", "orcid": "0000-0003-1262-4286;0000-0002-8274-2379;0000-0002-7164-4917", "linkedin": "avani17101-gupta/;saurabh0saini/;pjnarayanan/", "or_profile": "~Avani_Gupta1;~Saurabh_Saini1;~P_J_Narayanan1", "aff": "International Institute of Information Technology, Hyderabad;International Institute of Information Technology Hyderabad;International Institute of Information Technology Hyderabad", "aff_domain": "research.iiit.ac.in;iiit.ac.in;iiit.ac.in", "position": "MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\ngupta2023concept,\ntitle={Concept Distillation: Leveraging Human-Centered Explanations for Model Improvement},\nauthor={Avani Gupta and Saurabh Saini and P J Narayanan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=arkmhtYLL6}\n}", "github": "", "project": "", "reviewers": "oT4y;FYuP;gigg;YYKb", "pdf_size": 4115690, "rating": "3;6;6;7", "confidence": "4;3;3;3", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "84;101;70;74", "wc_strengths": "40;87;50;51", "wc_weaknesses": "222;46;52;175", "wc_questions": "251;210;339;8", "wc_limitations": "57;9;26;1", "wc_review": "654;453;537;309", "wc_reply_reviewers": "822;36;371;0", "wc_reply_authors": "1517;28;1104;0", "reply_reviewers": "2;1;2;0", "reply_authors": "4;2;5;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.25, 11.96609794377432 ], "wc_strengths_avg": [ 57.0, 17.84656829757475 ], "wc_weaknesses_avg": [ 123.75, 76.60409584349912 ], "wc_questions_avg": [ 202.0, 121.31570384744096 ], "wc_limitations_avg": [ 23.25, 21.47527648250425 ], "wc_review_avg": [ 488.25, 125.72067252445001 ], "wc_reply_reviewers_avg": [ 307.25, 330.53394303762514 ], "wc_reply_authors_avg": [ 662.25, 664.5654125065493 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 1.5811388300841898 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9622504486493763, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13279571549821142196&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "research.iiit.ac.in;iiit.ac.in;iiit.ac.in", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "International Institute of Information Technology", "aff_unique_dep": "", "aff_unique_url": "https://iiit Hyderabad.ac.in", "aff_unique_abbr": "IIIT Hyderabad", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hyderabad", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "id": "au9VfbABDO", "title": "Diffusion Model-Augmented Behavioral Cloning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Imitation learning addresses the challenge of learning by observing an expert\u2019s demonstrations without access to reward signals from the environment. Most existing imitation learning methods that do not require interacting with the environment either model the expert distribution as the conditional probability p(a|s) (e.g., behavioral cloning, BC) or the joint probability p(s, a) (e.g., implicit behavioral cloning). Despite its simplicity, modeling the conditional probability with BC usually struggles with generalization. While modeling the joint probability can lead to improved generalization performance, the inference procedure can be time-consuming and it often suffers from manifold overfitting. This work proposes an imitation learning framework that benefits from modeling both the conditional and joint probability of the expert distribution. Our proposed diffusion model-augmented behavioral cloning (DBC) employs a diffusion model trained to model expert behaviors and learns a policy to optimize both the BC loss (conditional) and our proposed diffusion model loss (joint). DBC outperforms baselines in various continuous control tasks in navigation, robot arm manipulation, dexterous manipulation, and locomotion. We design additional experiments to verify the limitations of modeling either the conditional probability or the joint probability of the expert distribution as well as compare different generative models.", "keywords": "Imitation Learning;Learning from Demonstration;Diffusion Models;Behavioral Cloning", "primary_area": "", "supplementary_material": "/attachment/387c0fae8bfb97ac9c23908525adab38b40eb742.pdf", "author": "Hsiang-Chun Wang;Shang-Fu Chen;Ming-Hao Hsu;Chun-Mao Lai;Shao-Hua Sun", "authorids": "~Hsiang-Chun_Wang1;~Shang-Fu_Chen2;~Ming-Hao_Hsu1;~Chun-Mao_Lai1;~Shao-Hua_Sun1", "gender": ";M;M;M;M", "homepage": "https://hsiangchun0205.github.io/;https://www.linkedin.com/in/shang-fu-chen-354914199/;https://qaz159qaz159.github.io/;https://mecoli1219.github.io/;http://shaohua0116.github.io", "dblp": ";203/9102;325/4631;325/4767;158/9680", "google_scholar": "https://scholar.google.com.tw/citations?user=vpJMSjMAAAAJ;https://scholar.google.com.tw/citations?user=ZKOpgs4AAAAJ;;;uXsfnaQAAAAJ", "orcid": ";;;;0000-0001-7579-6734", "linkedin": "https://tw.linkedin.com/in/hsiang-chun-wang-8a4798269;;;;shaohua0116/", "or_profile": "~Hsiang-Chun_Wang1;~Shang-Fu_Chen2;~Ming-Hao_Hsu1;~Chun-Mao_Lai1;~Shao-Hua_Sun1", "aff": "National Taiwan University;National Taiwan University;National Taiwan University;National Taiwan University;National Taiwan University", "aff_domain": "ntu.edu.tw;ntu.edu.tw;ntu.edu.tw;ntu.edu.tw;ntu.edu.tw", "position": "MS student;PhD student;Undergrad student;Undergrad student;Assistant Professor", "bibtex": "@misc{\nwang2023diffusion,\ntitle={Diffusion Model-Augmented Behavioral Cloning},\nauthor={Hsiang-Chun Wang and Shang-Fu Chen and Ming-Hao Hsu and Chun-Mao Lai and Shao-Hua Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=au9VfbABDO}\n}", "github": "", "project": "", "reviewers": "Pa84;2m35;uqof;Bk1n;Q6yg", "site": "https://openreview.net/forum?id=au9VfbABDO", "pdf_size": 3353425, "rating": "3;3;3;6;7", "confidence": "4;4;4;3;4", "soundness": "1;2;1;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;2;3;3", "wc_summary": "60;72;87;97;77", "wc_strengths": "190;59;60;101;306", "wc_weaknesses": "926;81;167;145;221", "wc_questions": "54;438;63;62;16", "wc_limitations": "18;49;13;40;40", "wc_review": "1248;699;390;445;660", "wc_reply_reviewers": "215;190;316;19;14", "wc_reply_authors": "557;919;442;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;3;2;1;1", "rating_avg": [ 4.4, 1.7435595774162693 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.0, 0.8944271909999159 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 78.6, 12.658593918757328 ], "wc_strengths_avg": [ 143.2, 94.32581831078912 ], "wc_weaknesses_avg": [ 308.0, 312.2409326145437 ], "wc_questions_avg": [ 126.6, 156.646863996698 ], "wc_limitations_avg": [ 32.0, 13.957077057894322 ], "wc_review_avg": [ 688.4, 304.0911705393631 ], "wc_reply_reviewers_avg": [ 150.8, 117.50302123775371 ], "wc_reply_authors_avg": [ 383.6, 350.550766651565 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4588314677411235, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=499986354715790633&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "National Taiwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.tw", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Discovering Intrinsic Spatial-Temporal Logic Rules to Explain Human Actions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71178", "id": "avuRopYsCg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d69fdbe4d13080bb7fa33249ca136976-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=avuRopYsCg", "openreview": "https://openreview.net/forum?id=avuRopYsCg", "poster": "/media/PosterPDFs/NeurIPS%202023/71178.png?t=1696924737.3644896", "slides": "https://nips.cc/virtual/2023/poster/71178", "video": "https://nips.cc/virtual/2023/poster/71178", "author_site": "Chengzhi Cao, Chao Yang, Ruimao Zhang, Shuang Li", "tldr": "", "abstract": "We propose an interpretable model to uncover the behavioral patterns of human movements by analyzing their trajectories. Our approach is based on the belief that human actions are driven by intentions and are influenced by environmental factors such as spatial relationships with surrounding objects. To model this, we use a set of spatial-temporal logic rules that include intention variables as principles. These rules are automatically discovered and used to capture the dynamics of human actions. To learn the model parameters and rule content, we design an EM learning algorithm that treats the unknown rule content as a latent variable. In the E-step, we evaluate the posterior over the latent rule content, and in the M-step, we optimize the rule generator and model parameters by maximizing the expected log-likelihood. Our model has wide-ranging applications in areas such as sports analytics, robotics, and autonomous cars. We demonstrate the model's superior interpretability and prediction performance on both pedestrian and NBA basketball player datasets, achieving promising results.", "keywords": "Logic rule;human actions;sports analyze", "primary_area": "", "supplementary_material": "/attachment/ffd4baebff22cb7dd3c7489b24c33d51413db02f.pdf", "author": "Chengzhi Cao;Chao Yang;Ruimao Zhang;Shuang Li", "authorids": "~Chengzhi_Cao1;~Chao_Yang9;~Ruimao_Zhang1;~Shuang_Li3", "gender": "M;M;M;F", "homepage": ";https://github.com/yangchaoforthree;http://zhangruimao.site/#;https://shuangli01.github.io", "dblp": "30/185;;54/10697;43/6294-2", "google_scholar": ";;ZJwZdtgAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Chengzhi_Cao1;~Chao_Yang9;~Ruimao_Zhang1;~Shuang_Li3", "aff": "University of Science and Technology of China;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong (Shenzhen);The Chinese University of Hong Kong (Shenzhen)", "aff_domain": "ustc.edu.cn;cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "position": "MS student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ncao2023discovering,\ntitle={Discovering Intrinsic Spatial-Temporal Logic Rules to Explain Human Actions},\nauthor={Chengzhi Cao and Chao Yang and Ruimao Zhang and Shuang Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=avuRopYsCg}\n}", "github": "", "project": "", "reviewers": "zUPP;gEM9;AEbP;7WeJ", "pdf_size": 0, "rating": "6;6;7;7", "confidence": "2;2;3;3", "soundness": "2;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;2;4", "wc_summary": "44;98;130;73", "wc_strengths": "28;80;77;143", "wc_weaknesses": "379;100;36;47", "wc_questions": "101;48;91;131", "wc_limitations": "8;66;1;12", "wc_review": "560;392;335;406", "wc_reply_reviewers": "27;27;33;31", "wc_reply_authors": "38;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 86.25, 31.67313530422904 ], "wc_strengths_avg": [ 82.0, 40.82278775390039 ], "wc_weaknesses_avg": [ 140.5, 139.80790392535036 ], "wc_questions_avg": [ 92.75, 29.73529048117741 ], "wc_limitations_avg": [ 21.75, 25.849323008543184 ], "wc_review_avg": [ 423.25, 83.31078861708129 ], "wc_reply_reviewers_avg": [ 29.5, 2.598076211353316 ], "wc_reply_authors_avg": [ 9.5, 16.454482671904334 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5290466848712240305&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ustc.edu.cn;cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Science and Technology of China;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.cuhk.edu.cn", "aff_unique_abbr": "USTC;CUHK", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Risk-Averse Active Sensing for Timely Outcome Prediction under Cost Pressure", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71177", "id": "aw1vLo7TE7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1498a03a04f9bcd3a7d44058fc5dc639-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=aw1vLo7TE7", "openreview": "https://openreview.net/forum?id=aw1vLo7TE7", "poster": "/media/PosterPDFs/NeurIPS%202023/71177.png?t=1701376921.2775328", "slides": "https://nips.cc/virtual/2023/poster/71177", "video": "https://nips.cc/virtual/2023/poster/71177", "author_site": "Yuchao Qin, Mihaela van der Schaar, Changhee Lee", "tldr": "", "abstract": "Timely outcome prediction is essential in healthcare to enable early detection and intervention of adverse events. However, in longitudinal follow-ups to patients' health status, cost-efficient acquisition of patient covariates is usually necessary due to the significant expense involved in screening and lab tests. To balance the timely and accurate outcome predictions with acquisition costs, an effective active sensing strategy is crucial. In this paper, we propose a novel risk-averse active sensing approach RAS that addresses the composite decision problem of when to conduct the acquisition and which measurements to make. Our approach decomposes the policy into two sub-policies: acquisition scheduler and feature selector, respectively. Moreover, we introduce a novel risk-aversion training strategy to focus on the underrepresented subgroup of high-risk patients for whom timely and accurate prediction of disease progression is of greater value. Our method outperforms baseline active sensing approaches in experiments with both synthetic and real-world datasets, and we illustrate the significance of our policy decomposition and the necessity of a risk-averse sensing policy through case studies.", "keywords": "active sensing;value of information;risk-averse learning", "primary_area": "", "supplementary_material": "", "author": "Yuchao Qin;Mihaela van der Schaar;Changhee Lee", "authorids": "~Yuchao_Qin1;~Mihaela_van_der_Schaar2;~Changhee_Lee1", "gender": "M;F;", "homepage": ";https://www.vanderschaar-lab.com;", "dblp": ";;", "google_scholar": "WkGPAquyti0C;DZ3S--MAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yuchao_Qin1;~Mihaela_van_der_Schaar2;~Changhee_Lee1", "aff": "University of Cambridge;University of California, Los Angeles;ChungAng University", "aff_domain": "cam.ac.uk;ucla.edu;cau.ac.kr", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nqin2023riskaverse,\ntitle={Risk-Averse Active Sensing for Timely Outcome Prediction under Cost Pressure},\nauthor={Yuchao Qin and Mihaela van der Schaar and Changhee Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=aw1vLo7TE7}\n}", "github": "", "project": "", "reviewers": "L7Er;d1SC;fZr8;mJK8", "pdf_size": 676230, "rating": "3;5;6;6", "confidence": "3;5;2;3", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "2;2;2;3", "wc_summary": "69;97;76;65", "wc_strengths": "42;55;42;49", "wc_weaknesses": "56;85;170;62", "wc_questions": "37;679;59;97", "wc_limitations": "9;19;21;5", "wc_review": "213;935;368;278", "wc_reply_reviewers": "39;31;45;29", "wc_reply_authors": "485;0;110;519", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;2;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 76.75, 12.336429791475327 ], "wc_strengths_avg": [ 47.0, 5.431390245600108 ], "wc_weaknesses_avg": [ 93.25, 45.61455359860491 ], "wc_questions_avg": [ 218.0, 267.0224709645239 ], "wc_limitations_avg": [ 13.5, 6.689544080129826 ], "wc_review_avg": [ 448.5, 286.2223785800125 ], "wc_reply_reviewers_avg": [ 36.0, 6.4031242374328485 ], "wc_reply_authors_avg": [ 278.5, 227.1766933468308 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.18731716231633877, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=30847453188460822&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;ucla.edu;cau.ac.kr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles;Chungang University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu;http://www.cau.ac.kr", "aff_unique_abbr": "Cambridge;UCLA;CAU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Cambridge;Los Angeles;", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United Kingdom;United States;South Korea" }, { "title": "LEACE: Perfect linear concept erasure in closed form", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71176", "id": "awIpKpwTwF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d066d21c619d0a78c5b557fa3291a8f4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=awIpKpwTwF", "openreview": "https://openreview.net/forum?id=awIpKpwTwF", "poster": "/media/PosterPDFs/NeurIPS%202023/71176.png?t=1697453583.09101", "slides": "https://nips.cc/virtual/2023/poster/71176", "video": "https://nips.cc/virtual/2023/poster/71176", "author_site": "Nora Belrose, David Schneider-Joseph, Shauli Ravfogel, Ryan Cotterell, Edward Raff, Stella Biderman", "tldr": "", "abstract": "Concept erasure aims to remove specified features from a representation. It can improve fairness (e.g. preventing a classifier from using gender or race) and interpretability (e.g. removing a concept to observe changes in model behavior). We introduce LEAst-squares Concept Erasure (LEACE), a closed-form method which provably prevents all linear classifiers from detecting a concept while changing the representation as little as possible, as measured by a broad class of norms. We apply LEACE to large language models with a novel procedure called concept scrubbing, which erases target concept information from _every_ layer in the network. We demonstrate our method on two tasks: measuring the reliance of language models on part-of-speech information, and reducing gender bias in BERT embeddings. Our code is available at https://github.com/EleutherAI/concept-erasure.", "keywords": "interpretability;fairness;concept erasure;representation;adversarial;robustness", "primary_area": "", "supplementary_material": "", "author": "Nora Belrose;David Schneider-Joseph;Shauli Ravfogel;Ryan Cotterell;Edward Raff;Stella Biderman", "authorids": "~Nora_Belrose1;~David_Schneider-Joseph1;~Shauli_Ravfogel1;~Ryan_Cotterell1;~Edward_Raff1;~Stella_Biderman1", "gender": "F;;M;M;F;Not Specified", "homepage": "https://twitter.com/norabelrose;http://davidsj.com/;https://github.com/Shaul1321;http://www.edwardraff.com/;http://www.stellabiderman.com;https://rycolab.io/", "dblp": "332/2248;;227/2231;204/3369;239/5641;146/4361.html", "google_scholar": "p_oBc64AAAAJ;;;debM2bUAAAAJ;bO7H0DAAAAAJ;DexOqtoAAAAJ", "orcid": ";;;0000-0002-9900-1972;0000-0001-8228-1042;", "linkedin": ";;;edward-raff-09992040/;stellabiderman;", "or_profile": "~Nora_Belrose1;~David_Schneider-Joseph1;~Shauli_Ravfogel1;~Edward_Raff1;~Stella_Biderman1;~Ryan_D_Cotterell1", "aff": "EleutherAI;EleutherAI;Bar-Ilan University;Syracuse University;Booz Allen Hamilton;Swiss Federal Institute of Technology", "aff_domain": "eleuther.ai;eleuther.ai;biu.ac.il;syr.edu;boozallen.com;ethz.ch", "position": "Researcher;Researcher;PhD student;MBA student;Industry researcher;Assistant Professor", "bibtex": "@inproceedings{\nbelrose2023leace,\ntitle={{LEACE}: Perfect linear concept erasure in closed form},\nauthor={Nora Belrose and David Schneider-Joseph and Shauli Ravfogel and Ryan Cotterell and Edward Raff and Stella Biderman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=awIpKpwTwF}\n}", "github": "", "project": "", "reviewers": "uqXm;rafU;3Jip;EBcB", "pdf_size": 652703, "rating": "4;6;7;7", "confidence": "4;3;5;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "2;3;4;4", "wc_summary": "108;160;387;150", "wc_strengths": "82;72;1;47", "wc_weaknesses": "382;136;1;87", "wc_questions": "4;74;1;139", "wc_limitations": "94;13;1;1", "wc_review": "670;455;391;424", "wc_reply_reviewers": "0;15;13;25", "wc_reply_authors": "0;57;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 201.25, 109.00315362410392 ], "wc_strengths_avg": [ 50.5, 31.2929704566377 ], "wc_weaknesses_avg": [ 151.5, 141.5812487584426 ], "wc_questions_avg": [ 54.5, 56.8616742630746 ], "wc_limitations_avg": [ 27.25, 38.84826250940961 ], "wc_review_avg": [ 485.0, 109.18104231046706 ], "wc_reply_reviewers_avg": [ 13.25, 8.898735865278843 ], "wc_reply_authors_avg": [ 14.25, 24.681724007856502 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15570074468885964633&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "eleuther.ai;eleuther.ai;biu.ac.il;syr.edu;boozallen.com;ethz.ch", "author_num": 6, "aff_unique_index": "0;0;1;2;3;4", "aff_unique_norm": "EleutherAI;Bar-Ilan University;Syracuse University;Booz Allen Hamilton;Swiss Federal Institute of Technology", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.eleuther.ai;https://www.biu.ac.il;https://www.syracuse.edu;https://www.boozallen.com;https://www.ethz.ch", "aff_unique_abbr": "EleutherAI;BIU;Syracuse;BAH;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;2", "aff_country_unique": "United States;Israel;Switzerland" }, { "title": "Characterization of Overfitting in Robust Multiclass Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71175", "id": "awbWWO0nb6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f144ab9985c739a5091ec188a2688644-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=awbWWO0nb6", "openreview": "https://openreview.net/forum?id=awbWWO0nb6", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71175", "video": "https://nips.cc/virtual/2023/poster/71175", "author_site": "Jingyuan Xu, Weiwei Liu", "tldr": "", "abstract": "This paper considers the following question: Given the number of classes m, the number of robust accuracy queries k, and the number of test examples in the dataset n, how much can adaptive algorithms robustly overfit the test dataset? We solve this problem by equivalently giving near-matching upper and lower bounds of the robust overfitting bias in multiclass classification problems.", "keywords": "Learning Theory", "primary_area": "", "supplementary_material": "", "author": "Jingyuan Xu;Weiwei Liu", "authorids": "~Jingyuan_Xu2;~Weiwei_Liu1", "gender": ";M", "homepage": "https://github.com/fzJing;https://sites.google.com/site/weiweiliuhomepage/", "dblp": ";54/6677-3.html", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN", "orcid": ";", "linkedin": ";weiwei-liu-4a7849134/", "or_profile": "~Jingyuan_Xu2;~Weiwei_Liu1", "aff": "Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;whu.edu.cn", "position": "MS student;Full Professor", "bibtex": "@inproceedings{\nxu2023characterization,\ntitle={Characterization of Overfitting in Robust Multiclass Classification},\nauthor={Jingyuan Xu and Weiwei Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=awbWWO0nb6}\n}", "github": "", "project": "", "reviewers": "YY54;PNTq;JGWq;WwD9;UhYx;5K3z", "pdf_size": 319970, "rating": "4;5;5;6;6;7", "confidence": "3;3;2;4;4;2", "soundness": "2;3;3;3;4;2", "novelty": "2;2;2;2;3;2", "presentation": "1;2;3;4;3;2", "wc_summary": "63;66;89;83;37;77", "wc_strengths": "26;37;77;176;12;84", "wc_weaknesses": "225;208;281;327;97;84", "wc_questions": "86;48;219;5;1;74", "wc_limitations": "13;1;9;5;1;20", "wc_review": "413;360;675;596;148;339", "wc_reply_reviewers": "316;36;235;0;79;71", "wc_reply_authors": "545;21;144;0;17;132", "reply_reviewers": "2;1;1;0;1;2", "reply_authors": "4;2;2;1;2;2", "rating_avg": [ 5.5, 0.9574271077563381 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "novelty_avg": [ 2.1666666666666665, 0.3726779962499649 ], "presentation_avg": [ 2.5, 0.9574271077563381 ], "wc_summary_avg": [ 69.16666666666667, 16.974654308376618 ], "wc_strengths_avg": [ 68.66666666666667, 54.55781846404377 ], "wc_weaknesses_avg": [ 203.66666666666666, 88.84755983643495 ], "wc_questions_avg": [ 72.16666666666667, 72.93242686822431 ], "wc_limitations_avg": [ 8.166666666666666, 6.792561290771611 ], "wc_review_avg": [ 421.8333333333333, 173.2891770679545 ], "wc_reply_reviewers_avg": [ 122.83333333333333, 113.38050488901912 ], "wc_reply_authors_avg": [ 143.16666666666666, 188.38561221305858 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.6871842709362768 ], "reply_authors_avg": [ 2.1666666666666665, 0.8975274678557507 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16152708458572284075&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "whu.edu.cn;whu.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Greedy Poisson Rejection Sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71174", "id": "axRMkinASf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/74fb3d526c7d8bd0c3e4b71704bb5abf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=axRMkinASf", "openreview": "https://openreview.net/forum?id=axRMkinASf", "poster": "/media/PosterPDFs/NeurIPS%202023/71174.png?t=1702048121.5331962", "slides": "https://nips.cc/virtual/2023/poster/71174", "video": "https://nips.cc/virtual/2023/poster/71174", "tldr": "", "abstract": "One-shot channel simulation is a fundamental data compression problem concerned with encoding a single sample from a target distribution $Q$ using a coding distribution $P$ using as few bits as possible on average. Algorithms that solve this problem find applications in neural data compression and differential privacy and can serve as a more efficient and natural alternative to quantization-based methods. Unfortunately, existing solutions are too slow or have limited applicability, preventing their widespread adaptation. In this paper, we conclusively solve one-shot channel simulation for one-dimensional problems where the target-proposal density ratio is unimodal by describing an algorithm with optimal runtime. We achieve this by constructing a rejection sampling procedure equivalent to greedily searching over the points of a Poisson process. Hence, we call our algorithm greedy Poisson rejection sampling (GPRS) and analyze the correctness and time complexity of several of its variants. Finally, we empirically verify our theorems, demonstrating that GPRS significantly outperforms the current state-of-the-art method, A* coding.", "keywords": "channel simulation;relative entropy coding;reverse channel coding;rejection sampling;Poisson process", "primary_area": "", "supplementary_material": "/attachment/b127e24870e2c977510135aa14d1040198557ce1.zip", "author": "Gergely Flamich", "authorids": "~Gergely_Flamich1", "gender": "M", "homepage": "https://gergely-flamich.github.io/", "dblp": "187/9709", "google_scholar": "4Iw9TH8AAAAJ", "orcid": "0009-0009-9831-7455", "linkedin": "gergely-flamich-142773102", "or_profile": "~Gergely_Flamich1", "aff": "University of Cambridge", "aff_domain": "cam.ac.uk", "position": "PhD student", "bibtex": "@inproceedings{\nflamich2023greedy,\ntitle={Greedy Poisson Rejection Sampling},\nauthor={Gergely Flamich},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=axRMkinASf}\n}", "github": "", "project": "", "reviewers": "prqL;4NF6;HQuL;sb37;tLU5", "pdf_size": 550965, "rating": "5;6;6;6;9", "confidence": "2;1;3;3;5", "soundness": "3;4;3;3;4", "novelty": "2;3;3;2;4", "presentation": "2;3;4;3;3", "wc_summary": "53;186;69;138;110", "wc_strengths": "36;51;43;86;199", "wc_weaknesses": "123;20;42;161;17", "wc_questions": "122;132;18;50;248", "wc_limitations": "30;1;2;38;22", "wc_review": "364;390;174;473;596", "wc_reply_reviewers": "47;41;12;0;63", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 2.8, 1.32664991614216 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 111.2, 47.92243733367493 ], "wc_strengths_avg": [ 83.0, 60.49462786066214 ], "wc_weaknesses_avg": [ 72.6, 58.56483586590165 ], "wc_questions_avg": [ 114.0, 79.56883812146562 ], "wc_limitations_avg": [ 18.6, 14.853955702101715 ], "wc_review_avg": [ 399.4, 138.71928488858353 ], "wc_reply_reviewers_avg": [ 32.6, 23.191377708105225 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.8224253152776162, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1455878159828681561&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "cam.ac.uk", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "Experiment Planning with Function Approximation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71173", "id": "axmY49ahVI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1e0d9f30c100129259f66660403fb1e2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=axmY49ahVI", "openreview": "https://openreview.net/forum?id=axmY49ahVI", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71173", "video": "https://nips.cc/virtual/2023/poster/71173", "author_site": "Aldo Pacchiano, Jonathan Lee, Emma Brunskill", "tldr": "", "abstract": "We study the problem of experiment planning with function approximation in contextual bandit problems. In settings where there is a significant overhead to deploying adaptive algorithms---for example, when the execution of the data collection policies is required to be distributed, or a human in the loop is needed to implement these policies---producing in advance a set of policies for data collection is paramount. We study the setting where a large dataset of contexts but not rewards is available and may be used by the learner to design an effective data collection strategy. Although when rewards are linear this problem has been well studied, results are still missing for more complex reward models. In this work we propose two experiment planning strategies compatible with function approximation. The first is an eluder planning and sampling procedure that can recover optimality guarantees depending on the eluder dimension of the reward function class. For the second, we show that a uniform sampler achieves competitive optimality rates in the setting where the number of actions is small. We finalize our results introducing a statistical gap fleshing out the fundamental differences between planning and adaptive learning and provide results for planning with model selection.", "keywords": "regret;model selection;planning;static;lower bound", "primary_area": "", "supplementary_material": "/attachment/38f0d3c622bacb5753b87176fd8f90fc25d825b8.pdf", "author": "Aldo Pacchiano;Jonathan Lee;Emma Brunskill", "authorids": "~Aldo_Pacchiano1;~Jonathan_Lee4;~Emma_Brunskill2", "gender": "M;M;", "homepage": "https://www.aldopacchiano.ai;http://jonathannlee.com/;", "dblp": "129/6338;30/3557-2.html;", "google_scholar": "no_BfYgAAAAJ;J8_FdjkAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Aldo_Pacchiano1;~Jonathan_Lee4;~Emma_Brunskill2", "aff": "Microsoft;Stanford University;", "aff_domain": "microsoft.com;stanford.edu;", "position": "Postdoc;PhD student;", "bibtex": "@inproceedings{\npacchiano2023experiment,\ntitle={Experiment Planning with Function Approximation},\nauthor={Aldo Pacchiano and Jonathan Lee and Emma Brunskill},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=axmY49ahVI}\n}", "github": "", "project": "", "reviewers": "ESHY;qQSQ;TWux;aXBj;xUKA", "pdf_size": 679468, "rating": "5;5;6;6;7", "confidence": "1;5;2;3;3", "soundness": "1;2;3;3;3", "novelty": "3;2;3;3;3", "presentation": "1;2;3;3;4", "wc_summary": "81;185;71;72;120", "wc_strengths": "36;48;69;26;117", "wc_weaknesses": "143;205;120;77;60", "wc_questions": "54;52;13;62;48", "wc_limitations": "4;1;17;15;1", "wc_review": "318;491;290;252;346", "wc_reply_reviewers": "58;80;22;39;0", "wc_reply_authors": "88;387;0;46;0", "reply_reviewers": "2;3;1;1;0", "reply_authors": "2;5;1;2;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 2.8, 1.32664991614216 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 105.8, 43.457565509356364 ], "wc_strengths_avg": [ 59.2, 32.25771225614117 ], "wc_weaknesses_avg": [ 121.0, 51.37703767248556 ], "wc_questions_avg": [ 45.8, 17.02233826476257 ], "wc_limitations_avg": [ 7.6, 6.974238309665077 ], "wc_review_avg": [ 339.4, 81.9307024258916 ], "wc_reply_reviewers_avg": [ 39.8, 27.74454901417574 ], "wc_reply_authors_avg": [ 104.2, 145.14875128639585 ], "reply_reviewers_avg": [ 1.4, 1.019803902718557 ], "reply_authors_avg": [ 2.2, 1.469693845669907 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.040291148201269035, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1663360970963298304&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "microsoft.com;stanford.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Microsoft;Stanford University", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.stanford.edu", "aff_unique_abbr": "Microsoft;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "On the Ability of Graph Neural Networks to Model Interactions Between Vertices", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71172", "id": "ayZpFoAu5c", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/543ec10715d964122ab7cb15f648772b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ayZpFoAu5c", "openreview": "https://openreview.net/forum?id=ayZpFoAu5c", "poster": "/media/PosterPDFs/NeurIPS%202023/71172.png?t=1696870725.1917126", "slides": "https://nips.cc/virtual/2023/poster/71172", "video": "https://nips.cc/virtual/2023/poster/71172", "author_site": "Noam Razin, Tom Verbin, Nadav Cohen", "tldr": "", "abstract": "Graph neural networks (GNNs) are widely used for modeling complex interactions between entities represented as vertices of a graph. Despite recent efforts to theoretically analyze the expressive power of GNNs, a formal characterization of their ability to model interactions is lacking. The current paper aims to address this gap. Formalizing strength of interactions through an established measure known as separation rank, we quantify the ability of certain GNNs to model interaction between a given subset of vertices and its complement, i.e. between the sides of a given partition of input vertices. Our results reveal that the ability to model interaction is primarily determined by the partition's walk index --- a graph-theoretical characteristic defined by the number of walks originating from the boundary of the partition. Experiments with common GNN architectures corroborate this finding. As a practical application of our theory, we design an edge sparsification algorithm named Walk Index Sparsification (WIS), which preserves the ability of a GNN to model interactions when input edges are removed. WIS is simple, computationally efficient, and in our experiments has markedly outperformed alternative methods in terms of induced prediction accuracy. More broadly, it showcases the potential of improving GNNs by theoretically analyzing the interactions they can model.", "keywords": "Graph Neural Networks;Expressivity;Interactions;Edge Sparsification", "primary_area": "", "supplementary_material": "/attachment/dc3d7a7f7215e2717a4beb45c81ad6da619e3e50.zip", "author": "Noam Razin;Tom Verbin;Nadav Cohen", "authorids": "~Noam_Razin1;~Tom_Verbin1;~Nadav_Cohen1", "gender": "M;;M", "homepage": "https://noamrazin.github.io/;;http://www.cohennadav.com", "dblp": "247/1241;;119/7155", "google_scholar": "tDsd50oAAAAJ;;AfLwLQ0AAAAJ", "orcid": ";;", "linkedin": ";;cohennadav/", "or_profile": "~Noam_Razin1;~Tom_Verbin1;~Nadav_Cohen1", "aff": "Tel Aviv University;;School of Computer Science, Tel Aviv University", "aff_domain": "tau.ac.il;;cs.tau.ac.il", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nrazin2023on,\ntitle={On the Ability of Graph Neural Networks to Model Interactions Between Vertices},\nauthor={Noam Razin and Tom Verbin and Nadav Cohen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ayZpFoAu5c}\n}", "github": "", "project": "", "reviewers": "CGQv;uCaB;2FCe", "pdf_size": 2482955, "rating": "5;6;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "123;95;150", "wc_strengths": "81;48;59", "wc_weaknesses": "256;38;134", "wc_questions": "337;2;109", "wc_limitations": "39;10;42", "wc_review": "836;193;494", "wc_reply_reviewers": "244;0;97", "wc_reply_authors": "816;0;37", "reply_reviewers": "3;0;1", "reply_authors": "5;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 122.66666666666667, 22.45489305746572 ], "wc_strengths_avg": [ 62.666666666666664, 13.719410418171117 ], "wc_weaknesses_avg": [ 142.66666666666666, 89.2088685177781 ], "wc_questions_avg": [ 149.33333333333334, 139.70524526858523 ], "wc_limitations_avg": [ 30.333333333333332, 14.42990721460891 ], "wc_review_avg": [ 507.6666666666667, 262.68147166398234 ], "wc_reply_reviewers_avg": [ 113.66666666666667, 100.30730559413682 ], "wc_reply_authors_avg": [ 284.3333333333333, 376.24844038067306 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1817459578718910310&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "tau.ac.il;;cs.tau.ac.il", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Tel Aviv University", "aff_unique_dep": "", "aff_unique_url": "https://www.tau.ac.il", "aff_unique_abbr": "TAU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Tel Aviv", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Sharp Recovery Thresholds of Tensor PCA Spectral Algorithms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71171", "id": "b1BhHjBxsx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b14d76c7266be21b338527cd25deac45-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=b1BhHjBxsx", "openreview": "https://openreview.net/forum?id=b1BhHjBxsx", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71171", "video": "https://nips.cc/virtual/2023/poster/71171", "author_site": "Michael Feldman, David Donoho", "tldr": "", "abstract": "Many applications seek to recover low-rank approximations of noisy tensor data. We consider several practical and effective matricization strategies which construct specific matrices from such tensors and then apply spectral methods; the strategies include tensor unfolding, partial tracing, power iteration, and recursive unfolding. We settle the behaviors of unfolding and partial tracing, identifying sharp thresholds in signal-to-noise ratio above which the signal is partially recovered. In particular, we extend previous results to a much larger class of tensor shapes where axis lengths may be different. For power iteration and recursive unfolding, we prove that under conditions where previous algorithms partially recovery the signal, these methods achieve (asymptotically) exact recovery. Our analysis deploys random matrix theory to obtain sharp thresholds which elude perturbation and concentration bounds. Specifically, we rely upon recent disproportionate random matrix results, which describe sequences of matrices with diverging aspect ratio.", "keywords": "tensor PCA;spectral algorithms;random matrix theory", "primary_area": "", "supplementary_material": "/attachment/074b726527353d857c1750d417789269aa494d6e.pdf", "author": "Michael Jacob Feldman;David Donoho", "authorids": "~Michael_Jacob_Feldman1;~David_Donoho1", "gender": "M;M", "homepage": ";https://statistics.stanford.edu/people/david-donoho", "dblp": ";d/DavidLDonoho.html", "google_scholar": ";https://scholar.google.com/scholar?hl=en", "orcid": "0000-0002-2621-9049;0000-0003-1830-710X", "linkedin": ";", "or_profile": "~Michael_Jacob_Feldman1;~David_L._Donoho1", "aff": "Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nfeldman2023sharp,\ntitle={Sharp Recovery Thresholds of Tensor {PCA} Spectral Algorithms},\nauthor={Michael Jacob Feldman and David Donoho},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=b1BhHjBxsx}\n}", "github": "", "project": "", "reviewers": "tdbZ;PnQP;K1MF;mafC", "pdf_size": 1235180, "rating": "3;4;4;6", "confidence": "5;4;4;4", "soundness": "4;3;2;3", "novelty": "1;2;2;2", "presentation": "4;3;2;3", "wc_summary": "71;30;116;75", "wc_strengths": "53;66;85;96", "wc_weaknesses": "169;132;140;118", "wc_questions": "2;1;5;422", "wc_limitations": "2;1;79;1", "wc_review": "297;230;425;712", "wc_reply_reviewers": "0;68;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "0;1;0;1", "rating_avg": [ 4.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 73.0, 30.438462510448847 ], "wc_strengths_avg": [ 75.0, 16.62828914831589 ], "wc_weaknesses_avg": [ 139.75, 18.632968094214082 ], "wc_questions_avg": [ 107.5, 181.58262582086425 ], "wc_limitations_avg": [ 20.75, 33.633130987167995 ], "wc_review_avg": [ 416.0, 184.69840280847043 ], "wc_reply_reviewers_avg": [ 17.0, 29.444863728670914 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0.5, 0.5 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16292546903872803785&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "stanford.edu;stanford.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Stable Nonconvex-Nonconcave Training via Linear Interpolation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71170", "id": "b1JPBGJhUi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9c256fa1965318b7fcb9ed104c265540-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=b1JPBGJhUi", "openreview": "https://openreview.net/forum?id=b1JPBGJhUi", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71170", "video": "https://nips.cc/virtual/2023/poster/71170", "author_site": "Thomas Pethick, Wanyun Xie, Volkan Cevher", "tldr": "", "abstract": "This paper presents a theoretical analysis of linear interpolation as a principled method for stabilizing (large-scale) neural network training. We argue that instabilities in the optimization process are often caused by the nonmonotonicity of the loss landscape and show how linear interpolation can help by leveraging the theory of nonexpansive operators. We construct a new optimization scheme called relaxed approximate proximal point (RAPP), which is the first 1-SCLI method to achieve last iterate convergence rates for $\\rho$-comonotone problems while only requiring $\\rho > -\\tfrac{1}{2L}$. The construction extends to constrained and regularized settings. By replacing the inner optimizer in RAPP we rediscover the family of Lookahead algorithms for which we establish convergence in cohypomonotone problems even when the base optimizer is taken to be gradient descent ascent. The range of cohypomonotone problems in which Lookahead converges is further expanded by exploiting that Lookahead inherits the properties of the base optimizer. We corroborate the results with experiments on generative adversarial networks which demonstrates the benefits of the linear interpolation present in both RAPP and Lookahead.", "keywords": "Minimax optimization;Lookahead;Generative adversarial networks;Stability;Nonconvex-nonconcave;Cohypomonotone", "primary_area": "", "supplementary_material": "/attachment/666d93eee39c7625c9a4e7f02b30d5edacddabdb.pdf", "author": "Thomas Pethick;Wanyun Xie;Volkan Cevher", "authorids": "~Thomas_Pethick1;~Wanyun_Xie1;~Volkan_Cevher1", "gender": "M;F;M", "homepage": "https://pethick.dk;;http://lions.epfl.ch", "dblp": "305/4521;;70/5301", "google_scholar": ";S4rh8MoAAAAJ;https://scholar.google.ch/citations?user=hlWhzU8AAAAJ", "orcid": ";;", "linkedin": ";wanyun-xie-71a287210/;", "or_profile": "~Thomas_Pethick1;~Wanyun_Xie1;~Volkan_Cevher1", "aff": "Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne;Amazon Development Center Germany", "aff_domain": "epfl.ch;epfl.ch;amazon.de", "position": "PhD student;PhD student;Amazon Scholar", "bibtex": "@inproceedings{\npethick2023stable,\ntitle={Stable Nonconvex-Nonconcave Training via Linear Interpolation},\nauthor={Thomas Pethick and Wanyun Xie and Volkan Cevher},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=b1JPBGJhUi}\n}", "github": "", "project": "", "reviewers": "RmjV;KxAB;TKoG;7YGi", "pdf_size": 1303399, "rating": "6;6;7;7", "confidence": "2;3;1;3", "soundness": "3;4;3;4", "novelty": "3;4;3;3", "presentation": "2;4;3;2", "wc_summary": "74;86;45;187", "wc_strengths": "74;54;65;81", "wc_weaknesses": "107;79;22;113", "wc_questions": "6;47;1;50", "wc_limitations": "3;20;4;1", "wc_review": "264;286;137;432", "wc_reply_reviewers": "77;14;9;19", "wc_reply_authors": "123;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 98.0, 53.50233639758174 ], "wc_strengths_avg": [ 68.5, 10.111874208078342 ], "wc_weaknesses_avg": [ 80.25, 35.99565946055163 ], "wc_questions_avg": [ 26.0, 22.594247055390007 ], "wc_limitations_avg": [ 7.0, 7.582875444051551 ], "wc_review_avg": [ 279.75, 104.69568997814571 ], "wc_reply_reviewers_avg": [ 29.75, 27.50795339533641 ], "wc_reply_authors_avg": [ 30.75, 53.26056233274298 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1707963352746380965&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 8, "email": "epfl.ch;epfl.ch;amazon.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL;Amazon", "aff_unique_dep": ";;Development Center", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch;https://www.amazon.de", "aff_unique_abbr": "EPFL;EPFL;Amazon", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Switzerland;Germany" }, { "id": "b2WpR0Fymj", "title": "On the Universal Approximation Properties of Deep Neural Networks using MAM Neurons", "track": "main", "status": "Reject", "tldr": "", "abstract": "As Deep Neural Networks (DNNs) are trained to perform tasks of increasing complexity, their size grows, presenting several challenges when it comes to deploying them on edge devices that have limited resources. To cope with this, a recently proposed approach hinges on substituting the classical Multiply-and-Accumulate (MAC) neurons in the hidden layers of a DNN with other neurons called Multiply-And-Max/min (MAM) whose selective behaviour helps identifying important interconnections and allows extremely aggressive pruning. Hybrid structures with MAC and MAM neurons promise a reduction in the number of interconnections that outperforms what can be achieved with MAC-only structures by more than an order of magnitude. However, by now, the lack of any theoretical demonstration of their ability to work as universal approximators limits their diffusion. \nHere, we take a first step in the theoretical characterization of the capabilities of MAM\\&MAC networks. In details, we prove two theorems that confirm that they are universal approximators providing that two hidden MAM layers are followed either by a MAC neuron without nonlinearity or by a normalized variant of the same. Approximation quality is measured either in terms of the first-order $L^p$ Sobolev norm or by the $L^\\infty$ norm.", "keywords": "universal approximation theory;multiply-and-max/min neurons", "primary_area": "", "supplementary_material": "", "author": "Philippe Bich;Andriy Enttsel;Luciano Prono;Alex Marchioni;Fabio Pareschi;Mauro Mangia;Gianluca Setti;Riccardo Rovatti", "authorids": "~Philippe_Bich1;~Andriy_Enttsel1;~Luciano_Prono1;alex.marchioni@unibo.it;fabio.pareschi@polito.it;mauro.mangia@unibo.it;gianluca.setti@kaust.edu.sa;riccardo.rovatti@unibo.it", "gender": "M;M;M;;;;;", "homepage": "https://smartdata.polito.it/members/philippe-bich/;https://www.unibo.it/sitoweb/andriy.enttsel;;;;;;", "dblp": ";304/2228;;;;;;", "google_scholar": ";ilSzGRwAAAAJ;acEieFgAAAAJ;;;;;", "orcid": "0000-0002-3772-4817;0000-0003-1010-3858;0000-0003-1507-9092;;;;;", "linkedin": "philippebich/;;luciano-prono-573b82145/;;;;;", "or_profile": "~Philippe_Bich1;~Andriy_Enttsel1;~Luciano_Prono1;alex.marchioni@unibo.it;fabio.pareschi@polito.it;mauro.mangia@unibo.it;gianluca.setti@kaust.edu.sa;riccardo.rovatti@unibo.it", "aff": "Polytechnic Institute of Turin;University of Bologna;Polytechnic Institute of Turin;;;;;", "aff_domain": "polito.it;unibo.it;polito.it;;;;;", "position": "PhD student;PhD student;PhD student;;;;;", "bibtex": "@misc{\nbich2023on,\ntitle={On the Universal Approximation Properties of Deep Neural Networks using {MAM} Neurons},\nauthor={Philippe Bich and Andriy Enttsel and Luciano Prono and Alex Marchioni and Fabio Pareschi and Mauro Mangia and Gianluca Setti and Riccardo Rovatti},\nyear={2023},\nurl={https://openreview.net/forum?id=b2WpR0Fymj}\n}", "github": "", "project": "", "reviewers": "p1mV;Ar7s;A9Cg;rhbL", "site": "https://openreview.net/forum?id=b2WpR0Fymj", "pdf_size": 1713836, "rating": "3;3;6;6", "confidence": "5;4;5;4", "soundness": "2;2;3;4", "novelty": "1;2;3;3", "presentation": "2;1;2;3", "wc_summary": "88;118;52;173", "wc_strengths": "27;48;55;67", "wc_weaknesses": "319;228;198;134", "wc_questions": "21;212;10;103", "wc_limitations": "56;78;10;59", "wc_review": "511;684;325;536", "wc_reply_reviewers": "22;405;0;400", "wc_reply_authors": "217;543;0;457", "reply_reviewers": "1;2;0;2", "reply_authors": "2;2;1;2", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 107.75, 44.33043536894263 ], "wc_strengths_avg": [ 49.25, 14.53229162933362 ], "wc_weaknesses_avg": [ 219.75, 66.60471079435749 ], "wc_questions_avg": [ 86.5, 80.87799453497843 ], "wc_limitations_avg": [ 50.75, 24.993749218554626 ], "wc_review_avg": [ 514.0, 127.58722506583486 ], "wc_reply_reviewers_avg": [ 206.75, 195.912448558023 ], "wc_reply_authors_avg": [ 304.25, 212.43513715955748 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:rSMf-DCFCOYJ:scholar.google.com/&scioq=On+the+Universal+Approximation+Properties+of+Deep+Neural+Networks+using+MAM+Neurons&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Polytechnic Institute of Turin;University of Bologna", "aff_unique_dep": ";", "aff_unique_url": "https://www.polito.it;https://www.unibo.it", "aff_unique_abbr": "Polito;Unibo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Italy" }, { "title": "LightSpeed: Light and Fast Neural Light Fields on Mobile Devices", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71169", "id": "b2wSODM7iG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/631ad9ae3174bf4d6c0f6fdca77335a4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=b2wSODM7iG", "openreview": "https://openreview.net/forum?id=b2wSODM7iG", "poster": "/media/PosterPDFs/NeurIPS%202023/71169.png?t=1699581492.62952", "slides": "https://nips.cc/virtual/2023/poster/71169", "video": "https://nips.cc/virtual/2023/poster/71169", "author_site": "Aarush Gupta, Junli Cao, Chaoyang Wang, Ju Hu, Sergey Tulyakov, Jian Ren, L\u00e1szl\u00f3 Jeni", "tldr": "", "abstract": "Real-time novel-view image synthesis on mobile devices is prohibitive due to the limited computational power and storage. Using volumetric rendering methods, such as NeRF and its derivatives, on mobile devices is not suitable due to the high computational cost of volumetric rendering. On the other hand, recent advances in neural light field representations have shown promising real-time view synthesis results on mobile devices. Neural light field methods learn a direct mapping from a ray representation to the pixel color. The current choice of ray representation is either stratified ray sampling or Pl\u00fccker coordinates, overlooking the classic light slab (two-plane) representation, the preferred representation to interpolate between light field views. In this work, we find that using the light slab representation is an efficient representation for learning a neural light field. More importantly, it is a lower-dimensional ray representation enabling us to learn the 4D ray space using feature grids which are significantly faster to train and render. Although mostly designed for frontal views, we show that the light-slab representation can be further extended to non-frontal scenes using a divide-and-conquer strategy. Our method provides better rendering quality than prior light field methods and a significantly better trade-off between rendering quality and speed than prior light field methods.", "keywords": "light field;neural radiance field;novel view synthesis", "primary_area": "", "supplementary_material": "/attachment/696c5c1039a3015a98171de265bc38464fbd3235.zip", "author": "Aarush Gupta;Junli Cao;Chaoyang Wang;Ju Hu;Sergey Tulyakov;Jian Ren;Laszlo Attila Jeni", "authorids": "~Aarush_Gupta1;~Junli_Cao2;~Chaoyang_Wang1;~Ju_Hu1;~Sergey_Tulyakov1;~Jian_Ren2;~Laszlo_Attila_Jeni1", "gender": "M;M;M;M;M;M;M", "homepage": "https://www.cs.cmu.edu/~aarushg3;;https://mightychaos.github.io/;;http://www.stulyakov.com/;https://alanspike.github.io/;http://www.laszlojeni.com/", "dblp": "223/4539;234/8466;;;40/6115;59/2180-5;35/7547", "google_scholar": "_oaol88AAAAJ;;I-xDKHEAAAAJ;ozJiSMcAAAAJ;mgzXR0sAAAAJ;https://scholar.google.co.jp/citations?user=vDALiU4AAAAJ;Wdnc-mEAAAAJ", "orcid": ";;;;;;0000-0002-2830-700X", "linkedin": "aarush98/;junli-cao-5165b41a1;;erichuju;sergeytulyakov/;;laszlojeni/", "or_profile": "~Aarush_Gupta1;~Junli_Cao2;~Chaoyang_Wang1;~Ju_Hu1;~Sergey_Tulyakov1;~Jian_Ren2;~Laszlo_Attila_Jeni1", "aff": "Carnegie Mellon University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Snap Inc.;;Snap Inc.;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cmu.edu;cs.cmu.edu;snapchat.com;;snapchat.com;cmu.edu", "position": "MS student;MS student;PhD student;Researcher;;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\ngupta2023lightspeed,\ntitle={LightSpeed: Light and Fast Neural Light Fields on Mobile Devices},\nauthor={Aarush Gupta and Junli Cao and Chaoyang Wang and Ju Hu and Sergey Tulyakov and Jian Ren and Laszlo Attila Jeni},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=b2wSODM7iG}\n}", "github": "", "project": "", "reviewers": "bQpx;BeXQ;CSSm;8eAH;XtjZ", "pdf_size": 8773128, "rating": "4;5;6;6;8", "confidence": "5;4;4;3;2", "soundness": "2;3;4;2;3", "novelty": "2;2;3;3;3", "presentation": "3;2;4;3;3", "wc_summary": "78;30;110;53;61", "wc_strengths": "16;81;113;60;49", "wc_weaknesses": "290;82;89;276;1", "wc_questions": "59;87;27;8;1", "wc_limitations": "57;3;18;1;1", "wc_review": "500;283;357;398;113", "wc_reply_reviewers": "477;14;38;22;0", "wc_reply_authors": "938;0;50;149;0", "reply_reviewers": "2;1;1;1;0", "reply_authors": "4;1;2;3;1", "rating_avg": [ 5.8, 1.32664991614216 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 66.4, 26.71778433927484 ], "wc_strengths_avg": [ 63.8, 32.35676127179604 ], "wc_weaknesses_avg": [ 147.6, 114.8853341380004 ], "wc_questions_avg": [ 36.4, 32.30851281009388 ], "wc_limitations_avg": [ 16.0, 21.46625258399798 ], "wc_review_avg": [ 330.2, 129.26778407631193 ], "wc_reply_reviewers_avg": [ 110.2, 183.81120749290562 ], "wc_reply_authors_avg": [ 227.4, 359.441566878401 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 1.16619037896906 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9460998335825321, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7021280842192936290&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "andrew.cmu.edu;cmu.edu;cs.cmu.edu;snapchat.com;;snapchat.com;cmu.edu", "author_num": 7, "aff_unique_index": "0;0;0;1;1;0", "aff_unique_norm": "Carnegie Mellon University;Snap Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.snapinc.com", "aff_unique_abbr": "CMU;Snap", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "b4Tr8NWTDt", "title": "Co-Learning Empirical Games and World Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Game-based decision-making involves reasoning over both world dynamics and strategic interactions among the agents. Typically, empirical models capturing these respective aspects are learned and used separately. We investigate the potential gain from co-learning these elements: a world model for dynamics and an empirical game for strategic interactions. Empirical games drive world models toward a broader consideration of possible game dynamics induced by a diversity of strategy profiles. Conversely, world models guide empirical games to efficiently discover new strategies through planning. We demonstrate these benefits first independently, then in combination as realized by a new algorithm, Dyna-PSRO, that co-learns an empirical game and a world model. When compared to PSRO---a baseline empirical-game building algorithm, Dyna-PSRO is found to compute lower regret solutions on partially observable general-sum games. In our experiments, Dyna-PSRO also requires substantially fewer experiences than PSRO, a key algorithmic advantage for settings where collecting player-game interaction data is a cost-limiting factor.", "keywords": "Multiagent learning;Empirical Game Theory;Model-Based Reinforcement Learning;Transfer Learning", "primary_area": "", "supplementary_material": "/attachment/ce55ed3be192332b05c12bae9493f55fecd17ca8.pdf", "author": "Max Olan Smith;Michael P. Wellman", "authorids": "~Max_Olan_Smith1;~Michael_P._Wellman1", "gender": "M;M", "homepage": "https://www.maxosmith.com;https://strategicreasoning.org/michael-p-wellman", "dblp": "275/3418;w/MichaelPWellman", "google_scholar": "gc1jnZ4AAAAJ;https://scholar.google.com.tw/citations?user=UruIct4AAAAJ", "orcid": ";0000-0002-1691-6844", "linkedin": ";https://linkedin.com/in/michael-wellman-23ab1", "or_profile": "~Max_Smith1;~Michael_Wellman1", "aff": "University of Michigan;University of Michigan", "aff_domain": "umich.edu;umich.edu", "position": "PhD student;Full Professor", "bibtex": "@misc{\nsmith2023colearning,\ntitle={Co-Learning Empirical Games and World Models},\nauthor={Max Olan Smith and Michael P. Wellman},\nyear={2023},\nurl={https://openreview.net/forum?id=b4Tr8NWTDt}\n}", "github": "", "project": "", "reviewers": "s5k7;WC2X;Nexz;Lma6", "site": "https://openreview.net/forum?id=b4Tr8NWTDt", "pdf_size": 1610729, "rating": "3;6;6;7", "confidence": "4;5;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "109;25;272;108", "wc_strengths": "136;113;128;94", "wc_weaknesses": "243;366;904;78", "wc_questions": "253;198;465;16", "wc_limitations": "40;12;2;6", "wc_review": "781;714;1771;302", "wc_reply_reviewers": "255;89;759;24", "wc_reply_authors": "343;0;897;25", "reply_reviewers": "2;1;3;1", "reply_authors": "2;1;3;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 128.5, 89.58934088383506 ], "wc_strengths_avg": [ 117.75, 16.005858302509115 ], "wc_weaknesses_avg": [ 397.75, 309.63072764181527 ], "wc_questions_avg": [ 233.0, 160.10777620090786 ], "wc_limitations_avg": [ 15.0, 14.866068747318506 ], "wc_review_avg": [ 892.0, 539.6169938020855 ], "wc_reply_reviewers_avg": [ 281.75, 288.12790128691114 ], "wc_reply_authors_avg": [ 316.25, 361.53379855830906 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3999842010021962723&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Heavy-Tailed Algebra for Probabilistic Programming", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71168", "id": "b5R8mbqo9Q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3d8f7945cd7f4446cb05a390d4c00558-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=b5R8mbqo9Q", "openreview": "https://openreview.net/forum?id=b5R8mbqo9Q", "poster": "/media/PosterPDFs/NeurIPS%202023/71168.png?t=1702392080.5650775", "slides": "https://nips.cc/virtual/2023/poster/71168", "video": "https://nips.cc/virtual/2023/poster/71168", "author_site": "Feynman Liang, Liam Hodgkinson, Michael Mahoney", "tldr": "", "abstract": "Despite the successes of probabilistic models based on passing noise through neural networks, recent work has identified that such methods often fail to capture tail behavior accurately---unless the tails of the base distribution are appropriately calibrated. To overcome this deficiency, we propose a systematic approach for analyzing the tails of random variables, and we illustrate how this approach can be used during the static analysis (before drawing samples) pass of a probabilistic programming language (PPL) compiler. To characterize how the tails change under various operations, we develop an algebra which acts on a three-parameter family of tail asymptotics and which is based on the generalized Gamma distribution. Our algebraic operations are closed under addition and multiplication; they are capable of distinguishing sub-Gaussians with differing scales; and they handle ratios sufficiently well to reproduce the tails of most important statistical distributions directly from their definitions. Our empirical results confirm that inference algorithms that leverage our heavy-tailed algebra attain superior performance across a number of density modeling and variational inference (VI) tasks.", "keywords": "probabilistic programming;static analysis;heavy tails;monte carlo;mcmc;variational inference", "primary_area": "", "supplementary_material": "/attachment/33a0219ca165e2c5b95d37752d6bf68daf18623b.zip", "author": "Feynman T. Liang;Liam Hodgkinson;Michael W. Mahoney", "authorids": "~Feynman_T._Liang1;~Liam_Hodgkinson1;~Michael_W._Mahoney1", "gender": ";M;", "homepage": ";http://www.liamhodgkinson.com;", "dblp": "191/6740;238/1555;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Feynman_T._Liang1;~Liam_Hodgkinson1;~Michael_W._Mahoney1", "aff": "Foundation Model Operations Company;University of Melbourne;", "aff_domain": "fmops.ai;unimelb.edu;", "position": "Researcher;Lecturer;", "bibtex": "@inproceedings{\nliang2023a,\ntitle={A Heavy-Tailed Algebra for Probabilistic Programming},\nauthor={Feynman T. Liang and Liam Hodgkinson and Michael W. Mahoney},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=b5R8mbqo9Q}\n}", "github": "", "project": "", "reviewers": "bd7W;mtnU;E196;9rGq", "pdf_size": 644934, "rating": "7;7;7;7", "confidence": "3;3;3;4", "soundness": "4;3;4;3", "novelty": "3;3;4;4", "presentation": "4;3;3;3", "wc_summary": "128;68;69;327", "wc_strengths": "124;40;56;117", "wc_weaknesses": "94;22;110;558", "wc_questions": "88;39;97;200", "wc_limitations": "18;10;7;44", "wc_review": "452;179;339;1246", "wc_reply_reviewers": "17;19;12;191", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 148.0, 106.16261112086495 ], "wc_strengths_avg": [ 84.25, 36.772102197182036 ], "wc_weaknesses_avg": [ 196.0, 211.61285405192191 ], "wc_questions_avg": [ 106.0, 58.58754133772811 ], "wc_limitations_avg": [ 19.75, 14.566657131957214 ], "wc_review_avg": [ 554.0, 411.13197394510684 ], "wc_reply_reviewers_avg": [ 59.75, 75.82009957788237 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5006006395860149203&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "fmops.ai;unimelb.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Foundation Model Operations Company;University of Melbourne", "aff_unique_dep": ";", "aff_unique_url": ";https://www.unimelb.edu.au", "aff_unique_abbr": ";UniMelb", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Australia" }, { "title": "On the Robustness of Removal-Based Feature Attributions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71167", "id": "b60wLlkBta", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fbbda4e85a6641bf425be3a6cfd84d20-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=b60wLlkBta", "openreview": "https://openreview.net/forum?id=b60wLlkBta", "poster": "/media/PosterPDFs/NeurIPS%202023/71167.png?t=1701997936.4141176", "slides": "https://nips.cc/virtual/2023/poster/71167", "video": "https://nips.cc/virtual/2023/poster/71167", "author_site": "Chris Lin, Ian Covert, Su-In Lee", "tldr": "", "abstract": "To explain predictions made by complex machine learning models, many feature attribution methods have been developed that assign importance scores to input features. Some recent work challenges the robustness of these methods by showing that they are sensitive to input and model perturbations, while other work addresses this issue by proposing robust attribution methods. However, previous work on attribution robustness has focused primarily on gradient-based feature attributions, whereas the robustness of removal-based attribution methods is not currently well understood. To bridge this gap, we theoretically characterize the robustness properties of removal-based feature attributions. Specifically, we provide a unified analysis of such methods and derive upper bounds for the difference between intact and perturbed attributions, under settings of both input and model perturbations. Our empirical results on synthetic and real-world data validate our theoretical results and demonstrate their practical implications, including the ability to increase attribution robustness by improving the model\u2019s Lipschitz regularity.", "keywords": "explainable artificial intelligence;interpretable machine learning;feature attributions;removal-based feature attributions;robustness", "primary_area": "", "supplementary_material": "", "author": "Chris Lin;Ian Connick Covert;Su-In Lee", "authorids": "~Chris_Lin1;~Ian_Connick_Covert1;~Su-In_Lee2", "gender": ";M;F", "homepage": "https://homes.cs.washington.edu/~clin25/;https://iancovert.com;http://suinlee.cs.washington.edu/", "dblp": "163/4747;262/3443;17/1784", "google_scholar": "4Jj_dNgAAAAJ;Np8Ek3cAAAAJ;", "orcid": ";;", "linkedin": ";ian-covert/;", "or_profile": "~Chris_Lin1;~Ian_Connick_Covert1;~Su-In_Lee2", "aff": "University of Washington;University of Washington;University of Washington", "aff_domain": "cs.washington.edu;uw.edu;uw.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlin2023on,\ntitle={On the Robustness of Removal-Based Feature Attributions},\nauthor={Chris Lin and Ian Connick Covert and Su-In Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=b60wLlkBta}\n}", "github": "", "project": "", "reviewers": "dzDD;2wCy;aeSn;isMK", "pdf_size": 9076216, "rating": "6;6;6;7", "confidence": "4;3;4;3", "soundness": "4;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "74;71;91;68", "wc_strengths": "171;97;110;40", "wc_weaknesses": "621;111;21;114", "wc_questions": "102;72;5;53", "wc_limitations": "18;23;14;17", "wc_review": "986;374;241;292", "wc_reply_reviewers": "176;57;0;179", "wc_reply_authors": "211;0;0;320", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;3", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 76.0, 8.916277250063503 ], "wc_strengths_avg": [ 104.5, 46.5537323960174 ], "wc_weaknesses_avg": [ 216.75, 236.36663787429902 ], "wc_questions_avg": [ 58.0, 35.234925854895735 ], "wc_limitations_avg": [ 18.0, 3.24037034920393 ], "wc_review_avg": [ 473.25, 299.81442176786624 ], "wc_reply_reviewers_avg": [ 103.0, 77.18484307168086 ], "wc_reply_authors_avg": [ 132.75, 138.23055921177487 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1023388000647184534&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "cs.washington.edu;uw.edu;uw.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Convergence of Alternating Gradient Descent for Matrix Factorization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71166", "id": "b6FeLpKKjl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/46c10f6c8ea5aa6f267bcdabcb123f97-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=b6FeLpKKjl", "openreview": "https://openreview.net/forum?id=b6FeLpKKjl", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71166", "video": "https://nips.cc/virtual/2023/poster/71166", "author_site": "Rachel Ward, Tamara Kolda", "tldr": "", "abstract": "We consider alternating gradient descent (AGD) with fixed step size applied to the asymmetric matrix factorization objective.\n We show that, for a rank-$r$ matrix $A \\in \\mathbb{R}^{m \\times n}$,\n $T = C ( \\frac{\\sigma_1(A)}{\\sigma_r(A)} )^2 \\log(1/\\epsilon)$\n iterations of alternating gradient descent suffice to reach an $\\epsilon$-optimal factorization \n $\\| A - X_{T} Y_{T}' \\|^2 \\leq \\epsilon \\| A \\|^2$ with high probability\n starting from an atypical random initialization. The\n factors have rank $d \\geq r$ so that $X_{T}\\in \\mathbb{R}^{m \\times d}$ and $Y_{T} \\in\\mathbb{R}^{n \\times d}$, and mild overparameterization suffices for the constant $C$ in the iteration complexity $T$ to be an absolute constant. \n Experiments suggest that our proposed initialization is not merely of theoretical benefit, but rather significantly improves the convergence rate of gradient descent in practice. Our proof is conceptually simple: a uniform Polyak-Lojasiewicz (PL) inequality and uniform Lipschitz smoothness constant are guaranteed for a sufficient number of iterations, starting from our random initialization. Our proof method should be useful for extending and simplifying convergence analyses for a broader class of nonconvex low-rank factorization problems.", "keywords": "matrix factorization; gradient descent; global convergence; concentration; optimization", "primary_area": "", "supplementary_material": "/attachment/448e5722d71427a347f403669397d66c45ec6b97.pdf", "author": "Rachel Ward;Tamara G. Kolda", "authorids": "~Rachel_Ward1;~Tamara_G._Kolda1", "gender": ";", "homepage": ";https://mathsci.ai", "dblp": "80/7132;45/6474", "google_scholar": ";9hjmW7AAAAAJ", "orcid": ";0000-0003-4176-2493", "linkedin": ";tammy-kolda-a82b6b1/", "or_profile": "~Rachel_Ward1;~Tamara_G._Kolda1", "aff": "University of Texas at Austin;MathSci.ai", "aff_domain": "utexas.edu;mathsci.ai", "position": "Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nward2023convergence,\ntitle={Convergence of Alternating Gradient Descent for Matrix Factorization},\nauthor={Rachel Ward and Tamara G. Kolda},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=b6FeLpKKjl}\n}", "github": "", "project": "", "reviewers": "GCxr;Ewek;BUrC;Zy5R", "pdf_size": 503750, "rating": "4;8;8;8", "confidence": "4;4;3;4", "soundness": "3;3;4;4", "novelty": "1;4;3;4", "presentation": "3;4;4;4", "wc_summary": "117;115;123;154", "wc_strengths": "46;106;12;79", "wc_weaknesses": "248;170;76;6", "wc_questions": "200;72;55;19", "wc_limitations": "3;1;12;1", "wc_review": "614;464;278;259", "wc_reply_reviewers": "0;12;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 7.0, 1.7320508075688772 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 1.224744871391589 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 127.25, 15.722197683530124 ], "wc_strengths_avg": [ 60.75, 35.26595383652624 ], "wc_weaknesses_avg": [ 125.0, 91.80958555619343 ], "wc_questions_avg": [ 86.5, 68.26602375999352 ], "wc_limitations_avg": [ 4.25, 4.548351349665063 ], "wc_review_avg": [ 403.75, 145.43104035934007 ], "wc_reply_reviewers_avg": [ 3.0, 5.196152422706632 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16212651007138601146&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "utexas.edu;mathsci.ai", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Texas at Austin;MathSci.ai", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.mathsci.ai", "aff_unique_abbr": "UT Austin;MathSci.ai", "aff_campus_unique_index": "0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "One-Step Diffusion Distillation via Deep Equilibrium Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71165", "id": "b6XvK2de99", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/82f05a105c928c10706213952bf0c8b7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=b6XvK2de99", "openreview": "https://openreview.net/forum?id=b6XvK2de99", "poster": "/media/PosterPDFs/NeurIPS%202023/71165.png?t=1702442179.2087371", "slides": "https://nips.cc/virtual/2023/poster/71165", "video": "https://nips.cc/virtual/2023/poster/71165", "author_site": "Zhengyang Geng, Ashwini Pokle, J. Zico Kolter", "tldr": "", "abstract": "Diffusion models excel at producing high-quality samples but naively require hundreds of iterations, prompting multiple attempts to distill the generation process into a faster network. However, many existing approaches suffer from a variety of challenges: the process for distillation training can be complex, often requiring multiple training stages, and the resulting models perform poorly when utilized in single-step generative applications. In this paper, we introduce a simple yet effective means of distilling diffusion models *directly* from the initial noise to the resulting image. Of particular importance to our approach is to leverage a new Deep Equilibrium (DEQ) model as the distilled architecture: the Generative Equilibrium Transformer (GET). Our method enables fully offline training with just noise/image pairs from the diffusion model while achieving superior performance compared to existing one-step methods on comparable training budgets. We demonstrate that the DEQ architecture is crucial to this capability, as GET matches a $5\\times$ larger ViT in terms of FID scores while striking a critical balance of computational cost and image quality. Code, checkpoints, and datasets are available [here](https://github.com/locuslab/get).", "keywords": "Deep Equilibrium Models;Diffusion Models;Distillation;Generative Models", "primary_area": "", "supplementary_material": "/attachment/2e665db8d992c7ba0abf47a9db4d2c0668c6d95e.zip", "author": "Zhengyang Geng;Ashwini Pokle;J Zico Kolter", "authorids": "~Zhengyang_Geng1;~Ashwini_Pokle1;~J_Zico_Kolter1", "gender": ";F;M", "homepage": "https://gsunshine.github.io/;https://ashwinipokle.github.io/;http://www.zicokolter.com", "dblp": "250/2651.html;228/5527;67/2526", "google_scholar": "lNkw3QYAAAAJ;o_1YtVoAAAAJ;UXh1I6UAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zhengyang_Geng1;~Ashwini_Pokle1;~Zico_Kolter1", "aff": "Meta Facebook;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "meta.com;andrew.cmu.edu;cmu.edu", "position": "Intern;PhD student;Full Professor", "bibtex": "@inproceedings{\ngeng2023onestep,\ntitle={One-Step Diffusion Distillation via Deep Equilibrium Models},\nauthor={Zhengyang Geng and Ashwini Pokle and J Zico Kolter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=b6XvK2de99}\n}", "github": "", "project": "", "reviewers": "MHac;sYNQ;GPxt;UV2u;FiXo;BXuM", "pdf_size": 2946108, "rating": "5;5;5;5;6;6", "confidence": "4;3;3;4;4;4", "soundness": "3;2;3;3;2;3", "novelty": "2;2;4;2;3;2", "presentation": "3;3;4;3;3;3", "wc_summary": "63;56;69;82;62;165", "wc_strengths": "192;19;40;44;106;84", "wc_weaknesses": "517;30;111;220;112;186", "wc_questions": "152;158;44;34;4;4", "wc_limitations": "15;7;1;1;6;6", "wc_review": "939;270;265;381;290;445", "wc_reply_reviewers": "40;32;135;36;157;104", "wc_reply_authors": "0;0;359;0;0;0", "reply_reviewers": "1;1;2;1;1;1", "reply_authors": "1;1;2;1;1;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.5, 0.7637626158259734 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 82.83333333333333, 37.61833181959148 ], "wc_strengths_avg": [ 80.83333333333333, 57.51642277548986 ], "wc_weaknesses_avg": [ 196.0, 155.75300960174093 ], "wc_questions_avg": [ 66.0, 64.62197768561404 ], "wc_limitations_avg": [ 6.0, 4.69041575982343 ], "wc_review_avg": [ 431.6666666666667, 235.9785771821012 ], "wc_reply_reviewers_avg": [ 84.0, 50.45460005457051 ], "wc_reply_authors_avg": [ 59.833333333333336, 133.79140065373744 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.3726779962499649 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5000000000000001, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3399471040838485220&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "meta.com;andrew.cmu.edu;cmu.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Meta;Carnegie Mellon University", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.cmu.edu", "aff_unique_abbr": "Meta;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A case for reframing automated medical image classification as segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71164", "id": "b8xowIlZ7v", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ad6a3bd12095fdca71c306871bdec400-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=b8xowIlZ7v", "openreview": "https://openreview.net/forum?id=b8xowIlZ7v", "poster": "/media/PosterPDFs/NeurIPS%202023/71164.png?t=1699319877.7816777", "slides": "https://nips.cc/virtual/2023/poster/71164", "video": "https://nips.cc/virtual/2023/poster/71164", "author_site": "Sarah Hooper, Mayee Chen, Khaled Saab, Kush Bhatia, Curtis Langlotz, Christopher R\u00e9", "tldr": "", "abstract": "Image classification and segmentation are common applications of deep learning to radiology. While many tasks can be framed using either classification or segmentation, classification has historically been cheaper to label and more widely used. However, recent work has drastically reduced the cost of training segmentation networks. In light of this recent work, we reexamine the choice of training classification vs. segmentation models. First, we use an information theoretic approach to analyze why segmentation vs. classification models may achieve different performance on the same dataset and overarching task. We then implement multiple methods for using segmentation models to classify medical images, which we call *segmentation-for-classification*, and compare these methods against traditional classification on three retrospective datasets. We use our analysis and experiments to summarize the benefits of switching from segmentation to classification, including: improved sample efficiency, enabling improved performance with fewer labeled images (up to an order of magnitude lower), on low-prevalence classes, and on certain rare subgroups (up to 161.1\\% improved recall); improved robustness to spurious correlations (up to 44.8\\% improved robust AUROC); and improved model interpretability, evaluation, and error analysis.", "keywords": "Medical imaging;segmentation;classification", "primary_area": "", "supplementary_material": "", "author": "Sarah Hooper;Mayee F Chen;Khaled Kamal Saab;Kush Bhatia;Curtis Langlotz;Christopher Re", "authorids": "~Sarah_Hooper1;~Mayee_F_Chen1;~Khaled_Kamal_Saab1;~Kush_Bhatia3;~Curtis_Langlotz1;~Christopher_Re1", "gender": ";;;;M;", "homepage": ";;https://web.stanford.edu/~ksaab/;;https://profiles.stanford.edu/curtis-langlotz;", "dblp": ";;176/4061;;12/1751;", "google_scholar": ";;W77CiNUAAAAJ;;WQkBYwQAAAAJ;", "orcid": ";;0000-0003-1427-0469;;0000-0002-8972-8051;", "linkedin": ";;khaled-saab-181034122/;;langlotz/;", "or_profile": "~Sarah_Hooper1;~Mayee_F_Chen1;~Khaled_Kamal_Saab1;~Kush_Bhatia3;~Curtis_Langlotz1;~Christopher_Re1", "aff": ";;Stanford University;;Stanford University;", "aff_domain": ";;stanford.edu;;stanford.edu;", "position": ";;PhD student;;Full Professor;", "bibtex": "@inproceedings{\nhooper2023a,\ntitle={A case for reframing automated medical image classification as segmentation},\nauthor={Sarah Hooper and Mayee F Chen and Khaled Kamal Saab and Kush Bhatia and Curtis Langlotz and Christopher Re},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=b8xowIlZ7v}\n}", "github": "", "project": "", "reviewers": "D8HP;GK2b;rmFZ;RFD8", "pdf_size": 5641805, "rating": "4;6;6;7", "confidence": "3;4;4;5", "soundness": "2;2;4;3", "novelty": "2;2;3;3", "presentation": "2;3;3;2", "wc_summary": "137;54;64;108", "wc_strengths": "24;153;99;80", "wc_weaknesses": "56;96;69;390", "wc_questions": "35;19;70;178", "wc_limitations": "18;13;43;67", "wc_review": "270;335;345;823", "wc_reply_reviewers": "0;17;42;14", "wc_reply_authors": "0;20;18;21", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 90.75, 33.55126674210677 ], "wc_strengths_avg": [ 89.0, 46.10314522893205 ], "wc_weaknesses_avg": [ 152.75, 137.73411886674992 ], "wc_questions_avg": [ 75.5, 61.985885490166226 ], "wc_limitations_avg": [ 35.25, 21.568205766822608 ], "wc_review_avg": [ 443.25, 221.13160674132496 ], "wc_reply_reviewers_avg": [ 18.25, 15.138939857202683 ], "wc_reply_authors_avg": [ 14.75, 8.584142356694699 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9733285267845754, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1839814152178130229&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": ";;stanford.edu;;stanford.edu;", "author_num": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "bA3iR0jQO9", "title": "Accelerate Multi-Agent Reinforcement Learning in Zero-Sum Games with Subgame Curriculum Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Learning Nash equilibrium (NE) in complex zero-sum games with multi-agent reinforcement learning (MARL) can be extremely computationally expensive. Curriculum learning is an effective way to accelerate learning, but an under-explored dimension for generating a curriculum is the difficulty-to-learn of the *subgames* -- games induced by starting from a specific state. In this work, we present a novel subgame curriculum learning framework for zero-sum games. It adopts an adaptive initial state distribution by resetting agents to some previously visited states where they can quickly learn to improve performance. Building upon this framework, we derive a subgame selection metric that approximates the squared distance to NE values and further adopt a particle-based state sampler for subgame generation. Integrating these techniques leads to our new algorithm, *Subgame Automatic Curriculum Learning* (SACL), which is a realization of the subgame curriculum learning framework. SACL can be combined with any MARL algorithm such as MAPPO. Experiments in the particle-world environment and Google Research Football environment show SACL produces much stronger policies than baselines. In the challenging hide-and-seek quadrant environment, SACL produces all four emergent stages and uses only half the samples of MAPPO with self-play. The project website is at https://sites.google.com/view/sacl-neurips. ", "keywords": "multi-agent reinforcement learning;curriculum learning;zero-sum games", "primary_area": "", "supplementary_material": "/attachment/4c11ea0152c695c36396570680ab57cf067a9629.pdf", "author": "Jiayu Chen;Zelai Xu;Yunfei Li;Chao Yu;Jiaming Song;Huazhong Yang;Fei Fang;Yu Wang;Yi Wu", "authorids": "~Jiayu_Chen1;~Zelai_Xu1;~Yunfei_Li1;~Chao_Yu1;~Jiaming_Song1;~Huazhong_Yang2;~Fei_Fang1;~Yu_Wang3;~Yi_Wu1", "gender": ";M;;F;M;M;F;M;M", "homepage": ";https://nicsefc.ee.tsinghua.edu.cn/people/ZelaiXu;https://irisli17.github.io/;http://zoeyuchao.github.io;http://tsong.me;http://web.ee.tsinghua.edu.cn/yanghuazhong/en/index.htm;https://feifang.info/;https://nicsefc.ee.tsinghua.edu.cn;https://jxwuyi.weebly.com", "dblp": "80/8422;;;36/6789-5;173/5104;94/1128.html;57/2878;w/YuWang2.html;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;3JjcAnoAAAAJ;https://scholar.google.com/citations?hl=en;BYoq_bwAAAAJ;;;R6jE0VEAAAAJ;https://scholar.google.com.hk/citations?user=j8JGVvoAAAAJ;dusV5HMAAAAJ", "orcid": ";0000-0001-5578-199X;0000-0003-0988-9400;0000-0001-6975-0158;;0000-0003-2421-353X;;0000-0001-6108-5157;", "linkedin": ";;;;jiamings/;;;;", "or_profile": "~Jiayu_Chen1;~Zelai_Xu1;~Yunfei_Li1;~Chao_Yu1;~Jiaming_Song1;~Huazhong_Yang2;~Fei_Fang1;~Yu_Wang3;~Yi_Wu1", "aff": "Tsinghua University;Tsinghua University;Institute for Interdisciplinary Information Sciences, Tsinghua University;Tsinghua University;NVIDIA;Tsinghua University;Carnegie Mellon University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;nvidia.com;tsinghua.edu.cn;cmu.edu;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;PhD student;PhD student;Researcher;Full Professor;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@misc{\nchen2023accelerate,\ntitle={Accelerate Multi-Agent Reinforcement Learning in Zero-Sum Games with Subgame Curriculum Learning},\nauthor={Jiayu Chen and Zelai Xu and Yunfei Li and Chao Yu and Jiaming Song and Huazhong Yang and Fei Fang and Yu Wang and Yi Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=bA3iR0jQO9}\n}", "github": "", "project": "", "reviewers": "aeUw;9JJp;z7mX;dzKo;7PBD", "site": "https://openreview.net/forum?id=bA3iR0jQO9", "pdf_size": 1309879, "rating": "5;5;5;5;5", "confidence": "3;4;3;4;2", "soundness": "3;3;2;2;3", "novelty": "2;3;2;2;2", "presentation": "3;4;3;4;3", "wc_summary": "64;109;89;58;44", "wc_strengths": "73;160;63;38;35", "wc_weaknesses": "49;145;296;148;29", "wc_questions": "83;143;2;15;27", "wc_limitations": "18;51;20;3;13", "wc_review": "287;608;470;262;148", "wc_reply_reviewers": "4;28;0;408;24", "wc_reply_authors": "0;0;0;1022;0", "reply_reviewers": "1;1;0;3;1", "reply_authors": "1;1;1;3;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 72.8, 23.232735525546705 ], "wc_strengths_avg": [ 73.8, 45.46383177867875 ], "wc_weaknesses_avg": [ 133.4, 94.66699530459388 ], "wc_questions_avg": [ 54.0, 52.375566822708464 ], "wc_limitations_avg": [ 21.0, 16.112107248898266 ], "wc_review_avg": [ 355.0, 163.31319603755233 ], "wc_reply_reviewers_avg": [ 92.8, 157.97518792519284 ], "wc_reply_authors_avg": [ 204.4, 408.8 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4421668030216238370&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;0;0;1;0;2;0;0", "aff_unique_norm": "Tsinghua University;NVIDIA;Carnegie Mellon University", "aff_unique_dep": ";NVIDIA Corporation;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.nvidia.com;https://www.cmu.edu", "aff_unique_abbr": "THU;NVIDIA;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Marich: A Query-efficient Distributionally Equivalent Model Extraction Attack", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71163", "id": "bAI21VEMvM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e5440ffceaf4831b5f98652b8a27ffde-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bAI21VEMvM", "openreview": "https://openreview.net/forum?id=bAI21VEMvM", "poster": "/media/PosterPDFs/NeurIPS%202023/71163.png?t=1698852530.3590715", "slides": "https://nips.cc/virtual/2023/poster/71163", "video": "https://nips.cc/virtual/2023/poster/71163", "author_site": "Pratik Karmakar, Debabrota Basu", "tldr": "", "abstract": "We study design of black-box model extraction attacks that can *send minimal number of queries from* a *publicly available dataset* to a target ML model through a predictive API with an aim *to create an informative and distributionally equivalent replica* of the target.\nFirst, we define *distributionally equivalent* and *Max-Information model extraction* attacks, and reduce them into a variational optimisation problem. The attacker sequentially solves this optimisation problem to select the most informative queries that simultaneously maximise the entropy and reduce the mismatch between the target and the stolen models. This leads to *an active sampling-based query selection algorithm*, Marich, which is *model-oblivious*. Then, we evaluate Marich on different text and image data sets, and different models, including CNNs and BERT. Marich extracts models that achieve $\\sim 60-95\\%$ of true model's accuracy and uses $\\sim 1,000 - 8,500$ queries from the publicly available datasets, which are different from the private training datasets. Models extracted by Marich yield prediction distributions, which are $\\sim2-4\\times$ closer to the target's distribution in comparison to the existing active sampling-based attacks. The extracted models also lead to 84-96$\\%$ accuracy under membership inference attacks. Experimental results validate that Marich is *query-efficient*, and capable of performing task-accurate, high-fidelity, and informative model extraction.", "keywords": "Differential Privacy;Model Extraction Attacks;Active Sampling;Max-Information Attack", "primary_area": "", "supplementary_material": "/attachment/6c9382833e23e0dbb6c69f63af31b7e79d847dfb.zip", "author": "Pratik Karmakar;Debabrota Basu", "authorids": "~Pratik_Karmakar1;~Debabrota_Basu1", "gender": "M;", "homepage": "https://github.com/pratik2358;https://debabrota-basu.github.io/", "dblp": ";126/2209", "google_scholar": ";https://scholar.google.co.in/citations?user=e26Maa4AAAAJ", "orcid": "0009-0008-1111-8801;", "linkedin": ";", "or_profile": "~Pratik_Karmakar1;~Debabrota_Basu1", "aff": "National University of Singapore;INRIA", "aff_domain": "nus.edu;inria.fr", "position": "PhD student;Faculty", "bibtex": "@inproceedings{\nkarmakar2023marich,\ntitle={Marich: A Query-efficient Distributionally Equivalent Model Extraction Attack},\nauthor={Pratik Karmakar and Debabrota Basu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bAI21VEMvM}\n}", "github": "", "project": "", "reviewers": "wp9P;tspy;jbqh;SZQw", "pdf_size": 1378607, "rating": "4;5;6;7", "confidence": "5;3;4;2", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "302;64;73;47", "wc_strengths": "40;16;117;47", "wc_weaknesses": "70;115;48;116", "wc_questions": "41;3;36;37", "wc_limitations": "19;3;7;12", "wc_review": "472;201;281;259", "wc_reply_reviewers": "655;11;42;0", "wc_reply_authors": "1305;22;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "4;2;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 121.5, 104.62910684890701 ], "wc_strengths_avg": [ 55.0, 37.59654239421492 ], "wc_weaknesses_avg": [ 87.25, 29.303370113350443 ], "wc_questions_avg": [ 29.25, 15.270478054075452 ], "wc_limitations_avg": [ 10.25, 5.973901572674261 ], "wc_review_avg": [ 303.25, 101.71621060578299 ], "wc_reply_reviewers_avg": [ 177.0, 276.40278580361667 ], "wc_reply_authors_avg": [ 331.75, 561.9779243884941 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7999999999999999, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17087805928829517446&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "nus.edu;inria.fr", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "National University of Singapore;INRIA", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.inria.fr", "aff_unique_abbr": "NUS;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Singapore;France" }, { "title": "A Bayesian Take on Gaussian Process Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71162", "id": "bBIHqoZ3OR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b146e7c87685fa208bd95ce4b08e330c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bBIHqoZ3OR", "openreview": "https://openreview.net/forum?id=bBIHqoZ3OR", "poster": "/media/PosterPDFs/NeurIPS%202023/71162.png?t=1702162822.3697557", "slides": "https://nips.cc/virtual/2023/poster/71162", "video": "https://nips.cc/virtual/2023/poster/71162", "author_site": "Enrico Giudice, Jack Kuipers, Giusi Moffa", "tldr": "", "abstract": "Gaussian Process Networks (GPNs) are a class of directed graphical models which employ Gaussian processes as priors for the conditional expectation of each variable given its parents in the network. The model allows the description of continuous joint distributions in a compact but flexible manner with minimal parametric assumptions on the dependencies between variables. Bayesian structure learning of GPNs requires computing the posterior over graphs of the network and is computationally infeasible even in low dimensions. This work implements Monte Carlo and Markov Chain Monte Carlo methods to sample from the posterior distribution of network structures. As such, the approach follows the Bayesian paradigm, comparing models via their marginal likelihood and computing the posterior probability of the GPN features. Simulation studies show that our method outperforms state-of-the-art algorithms in recovering the graphical structure of the network and provides an accurate approximation of its posterior distribution.", "keywords": "Bayesian networks;structure learning;graphical models;gaussian processes;Bayesian inference;MCMC sampling;importance sampling", "primary_area": "", "supplementary_material": "/attachment/957e06443acdad9d7c1dd6d86438aabcfd672322.pdf", "author": "Enrico Giudice;Jack Kuipers;Giusi Moffa", "authorids": "~Enrico_Giudice1;~Jack_Kuipers1;~Giusi_Moffa1", "gender": ";;F", "homepage": ";;https://dmi.unibas.ch/en/persons/moffa-giusi/", "dblp": ";;126/9673", "google_scholar": ";;qsFpjOYAAAAJ", "orcid": ";;0000-0002-2739-0454", "linkedin": ";;giusimoffa/", "or_profile": "~Enrico_Giudice1;~Jack_Kuipers1;~Giusi_Moffa1", "aff": ";;University of Basel", "aff_domain": ";;unibas.ch", "position": ";;Assistant Professor", "bibtex": "@inproceedings{\ngiudice2023a,\ntitle={A Bayesian Take on Gaussian Process Networks},\nauthor={Enrico Giudice and Jack Kuipers and Giusi Moffa},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bBIHqoZ3OR}\n}", "github": "", "project": "", "reviewers": "sMvJ;9qjJ;tnR2;uGD4;JYiJ", "pdf_size": 919062, "rating": "3;6;6;7;7", "confidence": "5;4;4;4;4", "soundness": "2;3;2;4;4", "novelty": "2;3;2;3;3", "presentation": "3;3;2;3;4", "wc_summary": "63;228;57;177;143", "wc_strengths": "16;49;35;102;272", "wc_weaknesses": "161;318;181;308;126", "wc_questions": "175;93;64;207;81", "wc_limitations": "48;20;1;57;66", "wc_review": "463;708;338;851;688", "wc_reply_reviewers": "47;0;28;0;312", "wc_reply_authors": "0;0;17;0;512", "reply_reviewers": "1;0;1;0;1", "reply_authors": "1;1;2;1;2", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 133.6, 65.9320862706467 ], "wc_strengths_avg": [ 94.8, 93.09650906451863 ], "wc_weaknesses_avg": [ 218.8, 78.96682847879862 ], "wc_questions_avg": [ 124.0, 56.39148871948674 ], "wc_limitations_avg": [ 38.4, 24.23716154998353 ], "wc_review_avg": [ 609.6, 184.04412514394477 ], "wc_reply_reviewers_avg": [ 77.4, 118.6450167516529 ], "wc_reply_authors_avg": [ 105.8, 203.2066928031653 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9525793444156804, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9309304975851398063&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "email": ";;unibas.ch", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Basel", "aff_unique_dep": "", "aff_unique_url": "https://www.unibas.ch", "aff_unique_abbr": "UniBas", "aff_country_unique_index": "0", "aff_country_unique": "Switzerland" }, { "title": "Differentiable Neuro-Symbolic Reasoning on Large-Scale Knowledge Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71161", "id": "bETvUctiTR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5965f3a748a8d41415db2bfa44635cc3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bETvUctiTR", "openreview": "https://openreview.net/forum?id=bETvUctiTR", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71161", "video": "https://nips.cc/virtual/2023/poster/71161", "author_site": "CHEN SHENGYUAN, Yunfeng Cai, Huang Fang, Xiao Huang, Mingming Sun", "tldr": "", "abstract": "Knowledge graph (KG) reasoning utilizes two primary techniques, i.e., rule-based and KG-embedding based. The former provides precise inferences, but inferring via concrete rules is not scalable. The latter enables efficient reasoning at the cost of ambiguous inference accuracy. Neuro-symbolic reasoning seeks to amalgamate the advantages of both techniques. The crux of this approach is replacing the predicted existence of all possible triples (i.e., truth scores inferred from rules) with a suitable approximation grounded in embedding representations. However, constructing an effective approximation of all possible triples' truth scores is a challenging task, because it needs to balance the tradeoff between accuracy and efficiency, while compatible with both the rule-based and KG-embedding models. To this end, we proposed a differentiable framework - DiffLogic. Instead of directly approximating all possible triples, we design a tailored filter to adaptively select essential triples based on the dynamic rules and weights. The truth scores assessed by KG-embedding are continuous, so we employ a continuous Markov logic network named probabilistic soft logic (PSL). It employs the truth scores of essential triples to assess the overall agreement among rules, weights, and observed triples. PSL enables end-to-end differentiable optimization, so we can alternately update embedding and weighted rules. On benchmark datasets, we empirically show that DiffLogic surpasses baselines in both effectiveness and efficiency.", "keywords": "Neuro-Symbolic Reasoning;Knowledge graph embedding;Probabilistic soft logic", "primary_area": "", "supplementary_material": "/attachment/4219646eeba6f81e06e17d22ac9c56962712188d.pdf", "author": "CHEN SHENGYUAN;YUNFENG CAI;Huang Fang;Xiao Huang;Mingming Sun", "authorids": "~CHEN_SHENGYUAN1;~YUNFENG_CAI1;~Huang_Fang1;~Xiao_Huang1;~Mingming_Sun1", "gender": "M;M;M;M;M", "homepage": "https://chensycn.github.io/;https://www.bimsa.cn/detail/yfcai.html;https://www.cs.ubc.ca/~hgfang;https://www4.comp.polyu.edu.hk/~xiaohuang/;", "dblp": ";133/8201;17/7697;25/692-1.html;87/8665-1.html", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;SYYFwD8AAAAJ;Be21PkYAAAAJ;", "orcid": "0000-0001-6300-711X;;;0000-0002-3867-900X;", "linkedin": "shengyuan-chen-cn;;;;", "or_profile": "~CHEN_SHENGYUAN1;~YUNFENG_CAI1;~Huang_Fang1;~Xiao_Huang1;~Mingming_Sun1", "aff": "Hong Kong Polytechnic University;Baidu Research;Baidu;The Hong Kong Polytechnic University;Baidu", "aff_domain": "polyu.edu.hk;baidu.com;baidu.com;polyu.edu.hk;baidu.com", "position": "PhD student;Resseacher;Researcher;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nshengyuan2023differentiable,\ntitle={Differentiable Neuro-Symbolic Reasoning on Large-Scale Knowledge Graphs},\nauthor={CHEN SHENGYUAN and YUNFENG CAI and Huang Fang and Xiao Huang and Mingming Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bETvUctiTR}\n}", "github": "", "project": "", "reviewers": "kYuQ;KuwS;jUBS;wvKd;Djat", "pdf_size": 482725, "rating": "6;6;6;7;7", "confidence": "4;3;3;4;2", "soundness": "3;2;2;3;3", "novelty": "3;2;2;3;3", "presentation": "3;2;3;3;4", "wc_summary": "98;74;57;103;67", "wc_strengths": "81;86;28;144;36", "wc_weaknesses": "103;25;256;183;124", "wc_questions": "90;2;36;77;28", "wc_limitations": "14;2;10;33;39", "wc_review": "386;189;387;540;294", "wc_reply_reviewers": "9;14;20;22;78", "wc_reply_authors": "0;0;29;0;20", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;2;1;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 79.8, 17.814600753314682 ], "wc_strengths_avg": [ 75.0, 41.5884599378241 ], "wc_weaknesses_avg": [ 138.2, 77.6386501685855 ], "wc_questions_avg": [ 46.6, 32.41974706872341 ], "wc_limitations_avg": [ 19.6, 14.065560778013793 ], "wc_review_avg": [ 359.2, 116.10236862355565 ], "wc_reply_reviewers_avg": [ 28.6, 25.12050954897213 ], "wc_reply_authors_avg": [ 9.8, 12.33531515608742 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.21821789023599236, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3454115632564508306&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "polyu.edu.hk;baidu.com;baidu.com;polyu.edu.hk;baidu.com", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Hong Kong Polytechnic University;Baidu", "aff_unique_dep": ";Baidu Research", "aff_unique_url": "https://www.polyu.edu.hk;https://research.baidu.com", "aff_unique_abbr": "PolyU;Baidu", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ATTA: Anomaly-aware Test-Time Adaptation for Out-of-Distribution Detection in Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71160", "id": "bGcdjXrU2w", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8dcc306a2522c60a78f047ab8739e631-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bGcdjXrU2w", "openreview": "https://openreview.net/forum?id=bGcdjXrU2w", "poster": "/media/PosterPDFs/NeurIPS%202023/71160.png?t=1701956878.450866", "slides": "https://nips.cc/virtual/2023/poster/71160", "video": "https://nips.cc/virtual/2023/poster/71160", "author_site": "Zhitong Gao, Shipeng Yan, Xuming He", "tldr": "", "abstract": "Recent advancements in dense out-of-distribution (OOD) detection have primarily focused on scenarios where the training and testing datasets share a similar domain, with the assumption that no domain shift exists between them. However, in real-world situations, domain shift often exits and significantly affects the accuracy of existing out-of-distribution (OOD) detection models. In this work, we propose a dual-level OOD detection framework to handle domain shift and semantic shift jointly. The first level distinguishes whether domain shift exists in the image by leveraging global low-level features, while the second level identifies pixels with semantic shift by utilizing dense high-level feature maps. In this way, we can selectively adapt the model to unseen domains as well as enhance model's capacity in detecting novel classes. We validate the efficacy of our proposed method on several OOD segmentation benchmarks, including those with significant domain shifts and those without, observing consistent performance improvements across various baseline models. Code is available at https://github.com/gaozhitong/ATTA.", "keywords": "ood detection;semantic segmentation;anomaly segmentation;test-time adaptation", "primary_area": "", "supplementary_material": "", "author": "Zhitong Gao;Shipeng Yan;Xuming He", "authorids": "~Zhitong_Gao1;~Shipeng_Yan1;~Xuming_He3", "gender": "F;M;M", "homepage": "https://gaozhitong.github.io/;;https://faculty.sist.shanghaitech.edu.cn/faculty/hexm/index.html", "dblp": ";63/9201;03/4230", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;oYILsyoAAAAJ;0KyeZ2QAAAAJ", "orcid": "0000-0002-3707-4850;;", "linkedin": ";;", "or_profile": "~Zhitong_Gao1;~Shipeng_Yan1;~Xuming_He3", "aff": "ShanghaiTech University;ShanghaiTech University;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "position": "MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\ngao2023atta,\ntitle={{ATTA}: Anomaly-aware Test-Time Adaptation for Out-of-Distribution Detection in Segmentation},\nauthor={Zhitong Gao and Shipeng Yan and Xuming He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bGcdjXrU2w}\n}", "github": "", "project": "", "reviewers": "XNCt;C29A;TGMY;Nknk;7M1E", "pdf_size": 2691428, "rating": "5;5;5;5;6", "confidence": "4;3;3;4;3", "soundness": "2;2;3;3;3", "novelty": "3;2;3;4;3", "presentation": "2;3;2;4;3", "wc_summary": "111;77;175;70;95", "wc_strengths": "71;70;90;59;80", "wc_weaknesses": "279;579;171;182;119", "wc_questions": "68;82;17;9;6", "wc_limitations": "19;71;4;20;3", "wc_review": "548;879;457;340;303", "wc_reply_reviewers": "154;106;37;0;27", "wc_reply_authors": "438;57;346;134;0", "reply_reviewers": "2;1;1;0;1", "reply_authors": "3;2;2;2;1", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 105.6, 37.53185313836768 ], "wc_strengths_avg": [ 74.0, 10.411532067856296 ], "wc_weaknesses_avg": [ 266.0, 164.8199017109281 ], "wc_questions_avg": [ 36.4, 32.028737096551275 ], "wc_limitations_avg": [ 23.4, 24.856387509048858 ], "wc_review_avg": [ 505.4, 205.94037972189912 ], "wc_reply_reviewers_avg": [ 64.8, 56.66533331764669 ], "wc_reply_authors_avg": [ 195.0, 168.9023386457393 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2841140309265564329&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "ShanghaiTech University", "aff_unique_dep": "", "aff_unique_url": "https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "FourierGNN: Rethinking Multivariate Time Series Forecasting from a Pure Graph Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71159", "id": "bGs1qWQ1Fx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dc1e32dd3eb381dbc71482f6a96cbf86-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bGs1qWQ1Fx", "openreview": "https://openreview.net/forum?id=bGs1qWQ1Fx", "poster": "/media/PosterPDFs/NeurIPS%202023/71159.png?t=1701931905.6668057", "slides": "https://nips.cc/virtual/2023/poster/71159", "video": "https://nips.cc/virtual/2023/poster/71159", "author_site": "Kun Yi, Qi Zhang, Wei Fan, Hui He, Liang Hu, Pengyang Wang, Ning An, Longbing Cao, Zhendong Niu", "tldr": "", "abstract": "Multivariate time series (MTS) forecasting has shown great importance in numerous industries. Current state-of-the-art graph neural network (GNN)-based forecasting methods usually require both graph networks (e.g., GCN) and temporal networks (e.g., LSTM) to capture inter-series (spatial) dynamics and intra-series (temporal) dependencies, respectively. However, the uncertain compatibility of the two networks puts an extra burden on handcrafted model designs. Moreover, the separate spatial and temporal modeling naturally violates the unified spatiotemporal inter-dependencies in real world, which largely hinders the forecasting performance. To overcome these problems, we explore an interesting direction of directly applying graph networks and rethink MTS forecasting from a pure graph perspective. We first define a novel data structure, hypervariate graph, which regards each series value (regardless of variates or timestamps) as a graph node, and represents sliding windows as space-time fully-connected graphs. This perspective considers spatiotemporal dynamics unitedly and reformulates classic MTS forecasting into the predictions on hypervariate graphs.\n Then, we propose a novel architecture Fourier Graph Neural Network (FourierGNN) by stacking our proposed Fourier Graph Operator (FGO) to perform matrix multiplications in Fourier space. FourierGNN accommodates adequate expressiveness and achieves much lower complexity, which can effectively and efficiently accomplish {the forecasting}. Besides, our theoretical analysis reveals FGO's equivalence to graph convolutions in the time domain, which further verifies the validity of FourierGNN. Extensive experiments on seven datasets have demonstrated our superior performance with higher efficiency and fewer parameters compared with state-of-the-art methods. Code is available at this repository: https://github.com/aikunyi/FourierGNN.", "keywords": "multivariate time series forecasting;fourier space", "primary_area": "", "supplementary_material": "/attachment/0aa25b6f4ae6ed9c7ad81da356a613038f46e90c.zip", "author": "Kun Yi;Qi Zhang;Wei Fan;Hui He;Liang Hu;Pengyang Wang;Ning An;Longbing Cao;Zhendong Niu", "authorids": "~Kun_Yi2;~Qi_Zhang25;~Wei_Fan6;~Hui_He2;~Liang_Hu1;~Pengyang_Wang1;~Ning_An1;~Longbing_Cao1;~Zhendong_Niu2", "gender": ";M;M;F;M;M;M;M;M", "homepage": "https://github.com/aikunyi;https://sites.google.com/view/qizhang-bit-uts/home;https://weifan.site/;https://www.researchgate.net/profile/Hui_He43;https://sites.google.com/view/lianghu/home;https://pengyangwang.com/;;https://www.datasciences.org;", "dblp": "202/8470-1;52/323-20;54/3488-10;https://dblp.uni-trier.de/pid/53/1151;48/5388-4;219/1752;98/6171-1.html;14/2589;https://dblp.uni-trier.de/pid/06/3613.html", "google_scholar": "MhMZcIEAAAAJ;8UAk1p4AAAAJ;cQ8zLJ4AAAAJ;1IqAdRwAAAAJ;https://scholar.google.com.au/citations?user=cj6wAgYAAAAJ;o26vQZwAAAAJ;tr5oJtQAAAAJ;cDs3DM8AAAAJ;", "orcid": "0000-0002-9980-6033;0000-0002-1037-1361;0000-0001-7656-445X;0000-0001-5515-2739;;0000-0003-3961-5523;0000-0003-3317-5299;0000-0003-1562-9429;", "linkedin": ";;;;;;ningan/;;", "or_profile": "~Kun_Yi2;~Qi_Zhang25;~Wei_Fan6;~Hui_He2;~Liang_Hu1;~Pengyang_Wang1;~Ning_An1;~Longbing_Cao1;~Zhendong_Niu2", "aff": "Beijing Institute of Technology;Tongji University;University of Central Florida;Beijing Institute of Technology;Tongji University;University of Macau;Hefei University of Technology;University of Technology Sydney;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;tongji.edu.cn;ucf.edu;bit.edu.cn;tongji.edu.cn;um.edu.mo;hfut.edu.cn;uts.edu.au;bit.edu.cn", "position": "PhD student;Researcher;PhD student;PhD student;Full Professor;Assistant Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyi2023fouriergnn,\ntitle={Fourier{GNN}: Rethinking Multivariate Time Series Forecasting from a Pure Graph Perspective},\nauthor={Kun Yi and Qi Zhang and Wei Fan and Hui He and Liang Hu and Pengyang Wang and Ning An and Longbing Cao and Zhendong Niu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bGs1qWQ1Fx}\n}", "github": "", "project": "", "reviewers": "ndBv;G8af;yaJh;ipTP", "pdf_size": 2283079, "rating": "3;6;7;7", "confidence": "4;5;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;3", "wc_summary": "77;116;66;82", "wc_strengths": "21;38;142;128", "wc_weaknesses": "43;132;65;132", "wc_questions": "356;49;36;26", "wc_limitations": "1;4;1;1", "wc_review": "498;339;310;369", "wc_reply_reviewers": "221;0;0;0", "wc_reply_authors": "1168;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "4;1;1;1", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.25, 18.673175948402566 ], "wc_strengths_avg": [ 82.25, 53.32154817707378 ], "wc_weaknesses_avg": [ 93.0, 39.76807765029635 ], "wc_questions_avg": [ 116.75, 138.3715559643672 ], "wc_limitations_avg": [ 1.75, 1.299038105676658 ], "wc_review_avg": [ 379.0, 71.80181056212997 ], "wc_reply_reviewers_avg": [ 55.25, 95.69580711818047 ], "wc_reply_authors_avg": [ 292.0, 505.75883581011215 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.08804509063256237, "gs_citation": 138, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=787870658423149869&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "bit.edu.cn;tongji.edu.cn;ucf.edu;bit.edu.cn;tongji.edu.cn;um.edu.mo;hfut.edu.cn;uts.edu.au;bit.edu.cn", "author_num": 9, "aff_unique_index": "0;1;2;0;1;3;4;5;0", "aff_unique_norm": "Beijing Institute of Technology;Tongji University;University of Central Florida;University of Macau;Hefei University of Technology;University of Technology Sydney", "aff_unique_dep": ";;;;;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.tongji.edu.cn;https://www.ucf.edu;https://www.um.edu.mo;http://www.hfut.edu.cn/;https://www.uts.edu.au", "aff_unique_abbr": "BIT;Tongji;UCF;UM;HUT;UTS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Macau SAR", "aff_country_unique_index": "0;0;1;0;0;0;0;2;0", "aff_country_unique": "China;United States;Australia" }, { "title": "Asymmetric Certified Robustness via Feature-Convex Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71158", "id": "bH4LVNVXUo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a45b205c10ef082515cacae80555bbef-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bH4LVNVXUo", "openreview": "https://openreview.net/forum?id=bH4LVNVXUo", "poster": "/media/PosterPDFs/NeurIPS%202023/71158.png?t=1699647591.6703749", "slides": "https://nips.cc/virtual/2023/poster/71158", "video": "https://nips.cc/virtual/2023/poster/71158", "author_site": "Samuel Pfrommer, Brendon Anderson, Julien Piet, Somayeh Sojoudi", "tldr": "", "abstract": "Real-world adversarial attacks on machine learning models often feature an asymmetric structure wherein adversaries only attempt to induce false negatives (e.g., classify a spam email as not spam). We formalize the asymmetric robustness certification problem and correspondingly present the feature-convex neural network architecture, which composes an input-convex neural network (ICNN) with a Lipschitz continuous feature map in order to achieve asymmetric adversarial robustness. We consider the aforementioned binary setting with one \"sensitive\" class, and for this class we prove deterministic, closed-form, and easily-computable certified robust radii for arbitrary $\\ell_p$-norms. We theoretically justify the use of these models by characterizing their decision region geometry, extending the universal approximation theorem for ICNN regression to the classification setting, and proving a lower bound on the probability that such models perfectly fit even unstructured uniformly distributed data in sufficiently high dimensions. Experiments on Malimg malware classification and subsets of the MNIST, Fashion-MNIST, and CIFAR-10 datasets show that feature-convex classifiers attain substantial certified $\\ell_1$, $\\ell_2$, and $\\ell_{\\infty}$-radii while being far more computationally efficient than competitive baselines.", "keywords": "asymmetric certified robustness;input-convex neural networks", "primary_area": "", "supplementary_material": "/attachment/74d7c7398306460094a509c820e180a5e7927881.zip", "author": "Samuel Pfrommer;Brendon G. Anderson;Julien Piet;Somayeh Sojoudi", "authorids": "~Samuel_Pfrommer1;~Brendon_G._Anderson1;~Julien_Piet1;~Somayeh_Sojoudi1", "gender": ";;M;F", "homepage": "https://sam.pfrommer.us/;https://brendon-anderson.github.io/;https://people.eecs.berkeley.edu/~julien.piet/;https://eecs.berkeley.edu/~sojoudi/", "dblp": ";225/6104;237/0174;06/7000", "google_scholar": "ysS4V1UAAAAJ;kNA83jQAAAAJ;bRWa8q8AAAAJ;kNH8zcgAAAAJ", "orcid": ";;;", "linkedin": "sampfrommer/;;julien-piet-b1741975/;", "or_profile": "~Samuel_Pfrommer1;~Brendon_G._Anderson1;~Julien_Piet1;~Somayeh_Sojoudi1", "aff": "University of California, Berkeley;University of California, Berkeley;Electrical Engineering & Computer Science Department, University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;eecs.berkeley.edu;berkeley.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\npfrommer2023asymmetric,\ntitle={Asymmetric Certified Robustness via Feature-Convex Neural Networks},\nauthor={Samuel Pfrommer and Brendon G. Anderson and Julien Piet and Somayeh Sojoudi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bH4LVNVXUo}\n}", "github": "", "project": "", "reviewers": "yvd4;g6eM;44oN;qP9X", "pdf_size": 1185092, "rating": "5;6;6;6", "confidence": "4;4;4;2", "soundness": "3;3;4;2", "novelty": "3;2;4;3", "presentation": "3;4;1;2", "wc_summary": "74;141;101;214", "wc_strengths": "27;44;42;35", "wc_weaknesses": "275;235;66;392", "wc_questions": "119;28;166;120", "wc_limitations": "2;7;7;1", "wc_review": "497;455;382;762", "wc_reply_reviewers": "17;84;152;59", "wc_reply_authors": "0;0;157;23", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 132.5, 52.74703783152187 ], "wc_strengths_avg": [ 37.0, 6.670832032063167 ], "wc_weaknesses_avg": [ 242.0, 116.84819211267242 ], "wc_questions_avg": [ 108.25, 50.071823413972055 ], "wc_limitations_avg": [ 4.25, 2.7726341266023544 ], "wc_review_avg": [ 524.0, 143.4381399767858 ], "wc_reply_reviewers_avg": [ 78.0, 48.974483151943524 ], "wc_reply_authors_avg": [ 45.0, 65.34141106526549 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2791609541535311514&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "berkeley.edu;berkeley.edu;eecs.berkeley.edu;berkeley.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Riemannian Laplace approximations for Bayesian neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71157", "id": "bHS7qjLOAy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/631f99d8e860054410c239fc90d18270-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bHS7qjLOAy", "openreview": "https://openreview.net/forum?id=bHS7qjLOAy", "poster": "/media/PosterPDFs/NeurIPS%202023/71157.png?t=1702153737.57531", "slides": "https://nips.cc/virtual/2023/poster/71157", "video": "https://nips.cc/virtual/2023/poster/71157", "author_site": "Federico Bergamin, Pablo Moreno-Mu\u00f1oz, S\u00f8ren Hauberg, Georgios Arvanitidis", "tldr": "", "abstract": "Bayesian neural networks often approximate the weight-posterior with a Gaussian distribution. However, practical posteriors are often, even locally, highly non-Gaussian, and empirical performance deteriorates. We propose a simple parametric approximate posterior that adapts to the shape of the true posterior through a Riemannian metric that is determined by the log-posterior gradient. We develop a Riemannian Laplace approximation where samples naturally fall into weight-regions with low negative log-posterior. We show that these samples can be drawn by solving a system of ordinary differential equations, which can be done efficiently by leveraging the structure of the Riemannian metric and automatic differentiation. Empirically, we demonstrate that our approach consistently improves over the conventional Laplace approximation across tasks. We further show that, unlike the conventional Laplace approximation, our method is not overly sensitive to the choice of prior, which alleviates a practical pitfall of current approaches.", "keywords": "Riemannian geometry;Laplace approximation;Approximate inference;Bayesian neural networks", "primary_area": "", "supplementary_material": "", "author": "Federico Bergamin;Pablo Moreno-Mu\u00f1oz;S\u00f8ren Hauberg;Georgios Arvanitidis", "authorids": "~Federico_Bergamin1;~Pablo_Moreno-Mu\u00f1oz1;~S\u00f8ren_Hauberg1;~Georgios_Arvanitidis1", "gender": ";M;M;M", "homepage": ";https://pmorenoz.github.io/;http://www2.compute.dtu.dk/~sohau/;https://www2.compute.dtu.dk/~gear/", "dblp": ";220/5334;39/7226;142/4188", "google_scholar": ";8vL8iawAAAAJ;https://scholar.google.com/citations?hl=en;sFtJbSUAAAAJ", "orcid": ";0000-0002-7249-2986;;0000-0002-0377-2976", "linkedin": ";;;", "or_profile": "~Federico_Bergamin1;~Pablo_Moreno-Mu\u00f1oz1;~S\u00f8ren_Hauberg1;~Georgios_Arvanitidis1", "aff": ";Technical University of Denmark;Technical University of Denmark;Technical University of Denmark", "aff_domain": ";dtu.dk;dtu.dk;dtu.dk", "position": ";Postdoc;Professor;Assistant Professor", "bibtex": "@inproceedings{\nbergamin2023riemannian,\ntitle={Riemannian Laplace approximations for Bayesian neural networks},\nauthor={Federico Bergamin and Pablo Moreno-Mu{\\~n}oz and S{\\o}ren Hauberg and Georgios Arvanitidis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bHS7qjLOAy}\n}", "github": "", "project": "", "reviewers": "CqAb;BssD;am4X;dyBq", "pdf_size": 5719427, "rating": "5;6;6;7", "confidence": "3;4;2;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "45;223;94;87", "wc_strengths": "109;73;63;100", "wc_weaknesses": "291;222;217;200", "wc_questions": "125;207;1;68", "wc_limitations": "33;65;11;101", "wc_review": "603;790;386;556", "wc_reply_reviewers": "0;212;103;21", "wc_reply_authors": "0;155;161;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 112.25, 66.63098003181403 ], "wc_strengths_avg": [ 86.25, 18.859679212542297 ], "wc_weaknesses_avg": [ 232.5, 34.74550330618338 ], "wc_questions_avg": [ 100.25, 75.66166466580022 ], "wc_limitations_avg": [ 52.5, 33.95217224272992 ], "wc_review_avg": [ 583.75, 143.8616957358699 ], "wc_reply_reviewers_avg": [ 84.0, 83.32166584988565 ], "wc_reply_authors_avg": [ 79.0, 79.0284758805331 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7209717156194188822&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": ";dtu.dk;dtu.dk;dtu.dk", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technical University of Denmark", "aff_unique_dep": "", "aff_unique_url": "https://www.tek.dk", "aff_unique_abbr": "DTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "title": "Neural Lad: A Neural Latent Dynamics Framework for Times Series Modeling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71156", "id": "bISkJSa5Td", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/382a8606a85ca6ec7c06185a1a95ce8b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bISkJSa5Td", "openreview": "https://openreview.net/forum?id=bISkJSa5Td", "poster": "/media/PosterPDFs/NeurIPS%202023/71156.png?t=1702184720.2919936", "slides": "https://nips.cc/virtual/2023/poster/71156", "video": "https://nips.cc/virtual/2023/poster/71156", "author_site": "ting li, Jianguo Li, Zhanxing Zhu", "tldr": "", "abstract": "Neural ordinary differential equation (Neural ODE) is an elegant yet powerful framework to learn the temporal dynamics for time series modeling.\nHowever, we observe that existing Neural ODE forecasting models suffer from two disadvantages:\ni) controlling the latent states only through the linear transformation over the local change of the observed signals may be inadequate;\nii) lacking the ability to capture the inherent periodical property in time series forecasting tasks;\nTo overcome the two issues, \nwe introduce a new neural ODE framework called \\textbf{Neural Lad}, a \\textbf{Neural} \\textbf{La}tent \\textbf{d}ynamics model in which the latent representations evolve with an ODE enhanced by the change of observed signal and seasonality-trend characterization. We incorporate the local change of input signal into the latent dynamics in an attention-based manner and design a residual architecture over basis expansion to depict the periodicity in the underlying dynamics. To accommodate the multivariate time series forecasting, we extend the Neural Lad through learning an adaptive relationship between multiple time series. \nExperiments demonstrate that our model can achieve better or comparable performance against existing neural ODE families and transformer variants in various datasets. Remarkably, the empirical superiority of Neural Lad is consistent across short and long-horizon forecasting for both univariate, multivariate and even irregular sampled time series.", "keywords": "Neural CDE;Time-series forecasting;Latent Dynamic", "primary_area": "", "supplementary_material": "/attachment/c43d0bba51b833f3820ec442995114ca869ff0f9.pdf", "author": "Ting Li;Jianguo Li;Zhanxing Zhu", "authorids": "~Ting_Li3;~Jianguo_Li2;~Zhanxing_Zhu1", "gender": "F;M;M", "homepage": "https://liting.com;https://zhanxingzhu.github.io/;https://sites.google.com/site/leeplus/", "dblp": ";87/7756.html;70/6237", "google_scholar": ";a2sHceIAAAAJ;n44GlFcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ting_Li3;~Zhanxing_Zhu1;~jianguo_Li1", "aff": "AntGroup;University of Southampton;Ant Group", "aff_domain": "antgroup.com;soton.ac.uk;antgroup.com", "position": "Algorithm Engineer;Associate Professor;Director", "bibtex": "@inproceedings{\nli2023neural,\ntitle={Neural Lad: A Neural Latent Dynamics Framework for Times Series Modeling},\nauthor={Ting Li and Jianguo Li and Zhanxing Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bISkJSa5Td}\n}", "github": "", "project": "", "reviewers": "85MJ;GHEt;LTR9;gsAa", "pdf_size": 536288, "rating": "6;6;6;7", "confidence": "3;3;4;2", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "3;3;4;4", "wc_summary": "78;86;86;87", "wc_strengths": "78;51;49;33", "wc_weaknesses": "328;158;52;8", "wc_questions": "353;6;58;20", "wc_limitations": "66;1;6;1", "wc_review": "903;302;251;149", "wc_reply_reviewers": "108;12;13;10", "wc_reply_authors": "53;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 84.25, 3.6314597615834874 ], "wc_strengths_avg": [ 52.75, 16.161296358893985 ], "wc_weaknesses_avg": [ 136.5, 123.27509886428808 ], "wc_questions_avg": [ 109.25, 142.009462712877 ], "wc_limitations_avg": [ 18.5, 27.5 ], "wc_review_avg": [ 401.25, 294.8765631582137 ], "wc_reply_reviewers_avg": [ 35.75, 41.72753886823425 ], "wc_reply_authors_avg": [ 13.25, 22.949673200287624 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11626401038136239557&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "antgroup.com;soton.ac.uk;antgroup.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Ant Group;University of Southampton", "aff_unique_dep": ";", "aff_unique_url": "https://www.antgroup.com;https://www.southampton.ac.uk", "aff_unique_abbr": "AntGroup;Southampton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Deep Optimal Transport: A Practical Algorithm for Photo-realistic Image Restoration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71155", "id": "bJJY9TFfe0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c281c5a17ad2e55e1ac1ca825071f991-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bJJY9TFfe0", "openreview": "https://openreview.net/forum?id=bJJY9TFfe0", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71155", "video": "https://nips.cc/virtual/2023/poster/71155", "author_site": "Theo Adrai, Guy Ohayon, Michael Elad, Tomer Michaeli", "tldr": "", "abstract": "We propose an image restoration algorithm that can control the perceptual quality and/or the mean square error (MSE) of any pre-trained model, trading one over the other at test time. Our algorithm is few-shot: Given about a dozen images restored by the model, it can significantly improve the perceptual quality and/or the MSE of the model for newly restored images without further training. Our approach is motivated by a recent theoretical result that links between the minimum MSE (MMSE) predictor and the predictor that minimizes the MSE under a perfect perceptual quality constraint. Specifically, it has been shown that the latter can be obtained by optimally transporting the output of the former, such that its distribution matches that of the source data. Thus, to improve the perceptual quality of a predictor that was originally trained to minimize MSE, we approximate the optimal transport by a linear transformation in the latent space of a variational auto-encoder, which we compute in closed-form using empirical means and covariances. Going beyond the theory, we find that applying the same procedure on models that were initially trained to achieve high perceptual quality, typically improves their perceptual quality even further. And by interpolating the results with the original output of the model, we can improve their MSE on the expense of perceptual quality. We illustrate our method on a variety of degradations applied to general content images with arbitrary dimensions.", "keywords": "Computer Vision;Image Restoration;Deep Learning;Perceptual Quality", "primary_area": "", "supplementary_material": "/attachment/f23838d45c97fa8d1dedc482ea792fdd6f62f6d5.zip", "author": "Theo Joseph Adrai;Guy Ohayon;Michael Elad;Tomer Michaeli", "authorids": "~Theo_Joseph_Adrai1;~Guy_Ohayon1;~Michael_Elad1;~Tomer_Michaeli1", "gender": "M;M;M;M", "homepage": "https://theoad.webgr.technion.ac.il/;;https://elad.cs.technion.ac.il/;https://tomer.net.technion.ac.il/", "dblp": ";287/4281;e/MichaelElad;70/3188.html", "google_scholar": "a2cSGuwAAAAJ;Gso71ogAAAAJ;UpZbV44AAAAJ;n2EbR2cAAAAJ", "orcid": ";;0000-0001-8131-6928;", "linkedin": ";ohayonguy/;michael-elad-5553852a3/;", "or_profile": "~Theo_Joseph_Adrai1;~Guy_Ohayon1;~Michael_Elad1;~Tomer_Michaeli1", "aff": ";Microsoft;Verily;Technion, Technion", "aff_domain": ";microsoft.com;verily.com;technion.ac.il", "position": ";Intern;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nadrai2023deep,\ntitle={Deep Optimal Transport: A Practical Algorithm for Photo-realistic Image Restoration},\nauthor={Theo Joseph Adrai and Guy Ohayon and Michael Elad and Tomer Michaeli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bJJY9TFfe0}\n}", "github": "", "project": "", "reviewers": "XzV9;tvbs;Md85;YhDf;HWzm", "pdf_size": 21065552, "rating": "3;4;5;5;6", "confidence": "5;4;5;4;5", "soundness": "2;3;2;3;3", "novelty": "2;2;2;2;3", "presentation": "3;2;3;3;3", "wc_summary": "201;65;82;31;44", "wc_strengths": "79;19;54;39;20", "wc_weaknesses": "207;102;150;203;15", "wc_questions": "97;5;27;17;13", "wc_limitations": "7;1;11;8;1", "wc_review": "591;192;324;298;93", "wc_reply_reviewers": "14;21;0;39;12", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 4.6, 1.0198039027185568 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 84.6, 60.76380501581513 ], "wc_strengths_avg": [ 42.2, 22.51577224969199 ], "wc_weaknesses_avg": [ 135.4, 71.44396405575492 ], "wc_questions_avg": [ 31.8, 33.36105513918886 ], "wc_limitations_avg": [ 5.6, 3.97994974842648 ], "wc_review_avg": [ 299.6, 167.19641144474363 ], "wc_reply_reviewers_avg": [ 17.2, 12.82809416865966 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.08006407690254355, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8939555692680198095&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";microsoft.com;verily.com;technion.ac.il", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Microsoft;Verily;Technion - Israel Institute of Technology", "aff_unique_dep": "Microsoft Corporation;;", "aff_unique_url": "https://www.microsoft.com;https://www.verily.com;https://www.technion.ac.il/en/", "aff_unique_abbr": "Microsoft;Verily;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Israel" }, { "title": "Uncovering the Hidden Dynamics of Video Self-supervised Learning under Distribution Shifts", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71154", "id": "bKqrWLCMrX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a86d17b6cd70366d56ab48d2a05a4df1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bKqrWLCMrX", "openreview": "https://openreview.net/forum?id=bKqrWLCMrX", "poster": "/media/PosterPDFs/NeurIPS%202023/71154.png?t=1701739070.0637062", "slides": "https://nips.cc/virtual/2023/poster/71154", "video": "https://nips.cc/virtual/2023/poster/71154", "author_site": "Pritam Sarkar, Ahmad Beirami, Ali Etemad", "tldr": "", "abstract": "Video self-supervised learning (VSSL) has made significant progress in recent years. However, the exact behavior and dynamics of these models under different forms of distribution shift are not yet known. In this paper, we comprehensively study the behavior of six popular self-supervised methods (v-SimCLR, v-MoCo, v-BYOL, v-SimSiam, v-DINO, v-MAE) in response to various forms of natural distribution shift, i.e., (i) context shift, (ii) viewpoint shift, (iii) actor shift, (iv) source shift, (v) generalizability to unknown classes (zero-shot), and (vi) open-set recognition. To perform this extensive study, we carefully craft a test bed consisting of 17 in-distribution and out-of-distribution benchmark pairs using available public datasets and a series of evaluation protocols to stress-test the different methods under the intended shifts. Our study uncovers a series of intriguing findings and interesting behaviors of VSSL methods. For instance, we observe that while video models generally struggle with context shifts, v-MAE and supervised learning exhibit more robustness. Moreover, our study shows that v-MAE is a strong temporal learner, whereas contrastive methods, v-SimCLR and v-MoCo, exhibit strong performances against viewpoint shifts. When studying the notion of open-set recognition, we notice a trade-off between closed-set and open-set recognition performance if the pretrained VSSL encoders are used without finetuning. We hope that our work will contribute to the development of robust video representation learning frameworks for various real-world scenarios. The project page and code are available at: https://pritamqu.github.io/OOD-VSSL.", "keywords": "computer vision;self-supervised learning;video self-supervised learning;natural distribution shift;video learning;out-of-distribution generalization", "primary_area": "", "supplementary_material": "", "author": "Pritam Sarkar;Ahmad Beirami;Ali Etemad", "authorids": "~Pritam_Sarkar1;~Ahmad_Beirami1;~Ali_Etemad1", "gender": "M;M;M", "homepage": "https://pritamsarkar.com/;https://beirami.github.io/;http://www.aiimlab.com", "dblp": "246/5024;41/9367;15/8931", "google_scholar": "https://scholar.google.ca/citations?hl=en;VuKWbMMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-7128-0220", "linkedin": ";ahmad-beirami-97001962;ali-etemad-6b3379b/", "or_profile": "~Pritam_Sarkar1;~Ahmad_Beirami1;~Ali_Etemad1", "aff": "Google;Massachusetts Institute of Technology;Google Research", "aff_domain": "google.com;mit.edu;google.com", "position": "Intern;Research Affiliate;Researcher", "bibtex": "@inproceedings{\nsarkar2023uncovering,\ntitle={Uncovering the Hidden Dynamics of Video Self-supervised Learning under Distribution Shifts},\nauthor={Pritam Sarkar and Ahmad Beirami and Ali Etemad},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bKqrWLCMrX}\n}", "github": "", "project": "", "reviewers": "D3xd;E4vF;jxy6;iDRZ", "pdf_size": 8291188, "rating": "5;6;6;7", "confidence": "3;4;3;4", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "46;48;36;68", "wc_strengths": "55;24;21;41", "wc_weaknesses": "311;36;143;120", "wc_questions": "116;80;153;19", "wc_limitations": "92;13;4;1", "wc_review": "620;201;357;249", "wc_reply_reviewers": "1070;0;0;95", "wc_reply_authors": "1559;0;0;45", "reply_reviewers": "2;0;0;1", "reply_authors": "4;1;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 49.5, 11.6081867662439 ], "wc_strengths_avg": [ 35.25, 13.718144918318949 ], "wc_weaknesses_avg": [ 152.5, 99.80105209866277 ], "wc_questions_avg": [ 92.0, 49.42165517260627 ], "wc_limitations_avg": [ 27.5, 37.5 ], "wc_review_avg": [ 356.75, 162.14865864385064 ], "wc_reply_reviewers_avg": [ 291.25, 451.28116235890013 ], "wc_reply_authors_avg": [ 401.0, 668.8239678719656 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4688078162564160624&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com;mit.edu;google.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Google;Massachusetts Institute of Technology", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://web.mit.edu", "aff_unique_abbr": "Google;MIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Greatness in Simplicity: Unified Self-Cycle Consistency for Parser-Free Virtual Try-On", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71153", "id": "bLB4vTwSbC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4065a881baab1744bfba208a4361bbb1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bLB4vTwSbC", "openreview": "https://openreview.net/forum?id=bLB4vTwSbC", "poster": "/media/PosterPDFs/NeurIPS%202023/71153.png?t=1699925832.5985675", "slides": "https://nips.cc/virtual/2023/poster/71153", "video": "https://nips.cc/virtual/2023/poster/71153", "author_site": "Chenghu Du, junyin Wang, Shuqing Liu, Shengwu Xiong", "tldr": "", "abstract": "Image-based virtual try-on tasks remain challenging, primarily due to inherent complexities associated with non-rigid garment deformation modeling and strong feature entanglement of clothing within human body. Recent groundbreaking formulations, such as in-painting, cycle consistency, and knowledge distillation, have facilitated self-supervised generation of try-on images. However, these paradigms necessitate the disentanglement of garment features within human body features through auxiliary tasks, such as leveraging 'teacher knowledge' and dual generators. The potential presence of irresponsible prior knowledge in the auxiliary task can serve as a significant bottleneck for the main generator (e.g., 'student model') in the downstream task. Moreover, existing garment deformation methods lack the ability to perceive the correlation between the garment and the human body in the real world, leading to unrealistic alignment effects. To tackle these limitations, we present a new parser-free virtual try-on network based on unified self-cycle consistency (USC-PFN), which enables robust translation between different garments using just a single generator, faithfully replicating non-rigid geometric deformation of garments in real-life scenarios. Specifically, we first propose a self-cycle consistency architecture with a circular mode. It utilizes real unpaired garment-person images exclusively as input for training, effectively eliminating the impact of irresponsible prior knowledge at the model input end. Additionally, we formulate a Markov Random Field to simulate a more natural and realistic garment deformation. Furthermore, USC-PFN can leverage a general generator for self-supervised cycle training. Experiments demonstrate that our method achieves state-of-the-art performance on a popular virtual try-on benchmark.", "keywords": "parser-free virtual try-on;self-cycle consistency;human analysis and understanding;fashion synthesis;Markov Random Field", "primary_area": "", "supplementary_material": "/attachment/ce484c1e5feff800c6abee958d1f10bb2d5fc069.zip", "author": "Chenghu Du;junyin Wang;Shuqing Liu;Shengwu Xiong", "authorids": "~Chenghu_Du1;~junyin_Wang1;~Shuqing_Liu1;~Shengwu_Xiong1", "gender": "M;M;F;M", "homepage": ";https://github.com/wjy-whut;;", "dblp": "309/4642;;;", "google_scholar": "pKh8bTwAAAAJ;;;r6ZfScIAAAAJ", "orcid": "0000-0001-7275-5064;;0009-0005-3778-5934;", "linkedin": ";;;", "or_profile": "~Chenghu_Du1;~junyin_Wang1;~Shuqing_Liu1;~Shengwu_Xiong1", "aff": "Wuhan University of Technology;Wuhan University of Technology;Wuhan Textile University;Wuhan University of Technology", "aff_domain": "whut.edu.cn;whut.edu.cn;wtu.edu.cn;whut.edu.cn", "position": "PhD student;PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\ndu2023greatness,\ntitle={Greatness in Simplicity: Unified Self-Cycle Consistency for Parser-Free Virtual Try-On},\nauthor={Chenghu Du and junyin Wang and Shuqing Liu and Shengwu Xiong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bLB4vTwSbC}\n}", "github": "", "project": "", "reviewers": "RUsU;99zV;XB1J;ugBm;nwZk", "pdf_size": 1819024, "rating": "4;5;5;6;6", "confidence": "4;4;5;2;3", "soundness": "3;2;2;3;3", "novelty": "3;2;1;3;2", "presentation": "2;1;2;3;2", "wc_summary": "85;161;98;115;105", "wc_strengths": "101;129;39;67;71", "wc_weaknesses": "208;244;266;103;1098", "wc_questions": "243;105;45;5;49", "wc_limitations": "31;65;2;12;13", "wc_review": "668;704;450;302;1336", "wc_reply_reviewers": "257;122;292;0;63", "wc_reply_authors": "783;856;675;0;0", "reply_reviewers": "2;3;2;0;1", "reply_authors": "2;3;2;1;1", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.0, 0.6324555320336759 ], "wc_summary_avg": [ 112.8, 26.003076741032014 ], "wc_strengths_avg": [ 81.4, 30.86486675817668 ], "wc_weaknesses_avg": [ 383.8, 361.4572727169838 ], "wc_questions_avg": [ 89.4, 83.14228767600757 ], "wc_limitations_avg": [ 24.6, 22.258481529520385 ], "wc_review_avg": [ 692.0, 353.8700326391033 ], "wc_reply_reviewers_avg": [ 146.8, 111.72716768986852 ], "wc_reply_authors_avg": [ 462.8, 382.2383549566945 ], "reply_reviewers_avg": [ 1.6, 1.019803902718557 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6813851438692469, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6569218125269700714&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "whut.edu.cn;whut.edu.cn;wtu.edu.cn;whut.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Wuhan University of Technology;Wuhan Textile University", "aff_unique_dep": ";", "aff_unique_url": "http://www.wut.edu.cn;http://www.wtu.edu.cn", "aff_unique_abbr": "WUT;WTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Function Space Bayesian Pseudocoreset for Bayesian Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71152", "id": "bM6mynsusR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f36a180277bd3d5781dc02245f9d5f52-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bM6mynsusR", "openreview": "https://openreview.net/forum?id=bM6mynsusR", "poster": "/media/PosterPDFs/NeurIPS%202023/71152.png?t=1697040527.6962156", "slides": "https://nips.cc/virtual/2023/poster/71152", "video": "https://nips.cc/virtual/2023/poster/71152", "author_site": "Balhae Kim, Hyungi Lee, Juho Lee", "tldr": "", "abstract": "A Bayesian pseudocoreset is a compact synthetic dataset summarizing essential information of a large-scale dataset and thus can be used as a proxy dataset for scalable Bayesian inference. Typically, a Bayesian pseudocoreset is constructed by minimizing a divergence measure between the posterior conditioning on the pseudocoreset and the posterior conditioning on the full dataset. However, evaluating the divergence can be challenging, particularly for the models like deep neural networks having high-dimensional parameters. In this paper, we propose a novel Bayesian pseudocoreset construction method that operates on a function space. Unlike previous methods, which construct and match the coreset and full data posteriors in the space of model parameters (weights), our method constructs variational approximations to the coreset posterior on a function space and matches it to the full data posterior in the function space. By working directly on the function space, our method could bypass several challenges that may arise when working on a weight space, including limited scalability and multi-modality issue. Through various experiments, we demonstrate that the Bayesian pseudocoresets constructed from our method enjoys enhanced uncertainty quantification and better robustness across various model architectures.", "keywords": "Bayesian pseudocoresets;Function space variational inference", "primary_area": "", "supplementary_material": "/attachment/6aaebfbc3fe7fa645d522dfc272a2a56f25623fc.pdf", "author": "Balhae Kim;Hyungi Lee;Juho Lee", "authorids": "~Balhae_Kim1;~Hyungi_Lee1;~Juho_Lee2", "gender": "F;M;M", "homepage": "http://siml.kaist.ac.kr;;https://juho.lee.github.io", "dblp": "331/2084;221/7959;55/3410-1", "google_scholar": ";;Py4URJUAAAAJ", "orcid": ";;", "linkedin": ";hyungi-lee-a8b161149/;", "or_profile": "~Balhae_Kim1;~Hyungi_Lee1;~Juho_Lee2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nkim2023function,\ntitle={Function Space Bayesian Pseudocoreset for Bayesian Neural Networks},\nauthor={Balhae Kim and Hyungi Lee and Juho Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bM6mynsusR}\n}", "github": "", "project": "", "reviewers": "sWjp;d2wU;jYGC;9hB5", "pdf_size": 6140954, "rating": "4;6;7;7", "confidence": "4;4;4;4", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;3;2", "wc_summary": "191;107;42;267", "wc_strengths": "42;69;109;91", "wc_weaknesses": "136;8;24;511", "wc_questions": "218;627;1;227", "wc_limitations": "18;1;1;43", "wc_review": "605;812;177;1139", "wc_reply_reviewers": "88;86;87;119", "wc_reply_authors": "0;737;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 151.75, 84.95697440469499 ], "wc_strengths_avg": [ 77.75, 25.033727249452888 ], "wc_weaknesses_avg": [ 169.75, 203.09895986932085 ], "wc_questions_avg": [ 268.25, 226.0258558218506 ], "wc_limitations_avg": [ 15.75, 17.195566289017645 ], "wc_review_avg": [ 683.25, 348.81827288718694 ], "wc_reply_reviewers_avg": [ 95.0, 13.874436925511608 ], "wc_reply_authors_avg": [ 184.25, 319.13036129456566 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12028367518808138001&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Sequential Predictive Two-Sample and Independence Testing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71151", "id": "bN1ZBSOV2f", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a72b207734d6112f6b47447e46be40e9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bN1ZBSOV2f", "openreview": "https://openreview.net/forum?id=bN1ZBSOV2f", "poster": "/media/PosterPDFs/NeurIPS%202023/71151.png?t=1702328940.643685", "slides": "https://nips.cc/virtual/2023/poster/71151", "video": "https://nips.cc/virtual/2023/poster/71151", "author_site": "Aleksandr Podkopaev, Aaditya Ramdas", "tldr": "", "abstract": "We study the problems of sequential nonparametric two-sample and independence testing. Sequential tests process data online and allow using observed data to decide whether to stop and reject the null hypothesis or to collect more data, while maintaining type I error control. We build upon the principle of (nonparametric) testing by betting, where a gambler places bets on future observations and their wealth measures evidence against the null hypothesis. While recently developed kernel-based betting strategies often work well on simple distributions, selecting a suitable kernel for high-dimensional or structured data, such as images, is often nontrivial. To address this drawback, we design prediction-based betting strategies that rely on the following fact: if a sequentially updated predictor starts to consistently determine (a) which distribution an instance is drawn from, or (b) whether an instance is drawn from the joint distribution or the product of the marginal distributions (the latter produced by external randomization), it provides evidence against the two-sample or independence nulls respectively. We empirically demonstrate the superiority of our tests over kernel-based approaches under structured settings. Our tests can be applied beyond the case of independent and identically distributed data, remaining valid and powerful even when the data distribution drifts over time.", "keywords": "two-sample testing;independence testing;testing by betting;sequential testing", "primary_area": "", "supplementary_material": "/attachment/eb6fe2cb1e3079efaf5aef33b8b8c4848a922d71.zip", "author": "Aleksandr Podkopaev;Aaditya Ramdas", "authorids": "~Aleksandr_Podkopaev1;~Aaditya_Ramdas2", "gender": "M;M", "homepage": "https://sashapodkopaev.com;http://stat.cmu.edu/~aramdas", "dblp": "268/0747;117/3518", "google_scholar": "58-8sF8AAAAJ;ZvFaPxUAAAAJ", "orcid": ";0000-0003-0497-311X", "linkedin": "sasha-podkopaev/;", "or_profile": "~Aleksandr_Podkopaev1;~Aaditya_Ramdas2", "aff": "Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cmu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\npodkopaev2023sequential,\ntitle={Sequential Predictive Two-Sample and Independence Testing},\nauthor={Aleksandr Podkopaev and Aaditya Ramdas},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bN1ZBSOV2f}\n}", "github": "", "project": "", "reviewers": "ssj4;VGT6;FPjp;ryYs;qWK6", "pdf_size": 1407702, "rating": "4;6;6;7;8", "confidence": "4;2;3;4;4", "soundness": "3;3;3;4;4", "novelty": "2;3;3;4;3", "presentation": "1;2;3;4;3", "wc_summary": "70;26;74;77;185", "wc_strengths": "58;48;81;39;57", "wc_weaknesses": "94;46;56;35;122", "wc_questions": "51;63;86;41;180", "wc_limitations": "10;58;5;3;6", "wc_review": "283;241;302;195;550", "wc_reply_reviewers": "10;10;370;0;40", "wc_reply_authors": "0;0;1063;0;0", "reply_reviewers": "1;1;4;0;1", "reply_authors": "1;1;3;1;1", "rating_avg": [ 6.2, 1.32664991614216 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 86.4, 52.690037008907105 ], "wc_strengths_avg": [ 56.6, 14.008568806269967 ], "wc_weaknesses_avg": [ 70.6, 32.481379281058864 ], "wc_questions_avg": [ 84.2, 50.197211077907504 ], "wc_limitations_avg": [ 16.4, 20.924626639440906 ], "wc_review_avg": [ 314.2, 123.52392480811156 ], "wc_reply_reviewers_avg": [ 86.0, 142.63239463740345 ], "wc_reply_authors_avg": [ 212.6, 425.2 ], "reply_reviewers_avg": [ 1.4, 1.3564659966250536 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.11306675421666137, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14108215863232729652&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "andrew.cmu.edu;cmu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning Layer-wise Equivariances Automatically using Gradients", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71150", "id": "bNIHdyunFC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5a33645b5c5b7a9882652526d30d0acc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bNIHdyunFC", "openreview": "https://openreview.net/forum?id=bNIHdyunFC", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71150", "video": "https://nips.cc/virtual/2023/poster/71150", "author_site": "Tycho van der Ouderaa, Alexander Immer, Mark van der Wilk", "tldr": "", "abstract": "Convolutions encode equivariance symmetries into neural networks leading to better generalisation performance. However, symmetries provide fixed hard constraints on the functions a network can represent, need to be specified in advance, and can not be adapted. Our goal is to allow flexible symmetry constraints that can automatically be learned from data using gradients. Learning symmetry and associated weight connectivity structures from scratch is difficult for two reasons. First, it requires efficient and flexible parameterisations of layer-wise equivariances. Secondly, symmetries act as constraints and are therefore not encouraged by training losses measuring data fit. To overcome these challenges, we improve parameterisations of soft equivariance and learn the amount of equivariance in layers by optimising the marginal likelihood, estimated using differentiable Laplace approximations. The objective balances data fit and model complexity enabling layer-wise symmetry discovery in deep networks. We demonstrate the ability to automatically learn layer-wise equivariances on image classification tasks, achieving equivalent or improved performance over baselines with hard-coded symmetry.", "keywords": "learning layer-wise relaxed equivariances bayesian symmetry discovery marginal likelihood", "primary_area": "", "supplementary_material": "/attachment/e862118ef9e288e6c63c80bd734b5ac97fa95887.pdf", "author": "Tycho F.A. van der Ouderaa;Alexander Immer;Mark van der Wilk", "authorids": "~Tycho_F.A._van_der_Ouderaa1;~Alexander_Immer1;~Mark_van_der_Wilk1", "gender": "M;;M", "homepage": "https://tychovdo.github.io/;;https://mvdw.uk", "dblp": ";;142/2927", "google_scholar": ";;PKcjcT4AAAAJ", "orcid": ";;0000-0001-7947-6682", "linkedin": "tychovdo/;;", "or_profile": "~Tycho_F.A._van_der_Ouderaa1;~Alexander_Immer1;~Mark_van_der_Wilk1", "aff": "Imperial College London;;Imperial College London", "aff_domain": "imperial.ac.uk;;imperial.ac.uk", "position": "PhD student;;Lecturer (Assistant Professor)", "bibtex": "@inproceedings{\nouderaa2023learning,\ntitle={Learning Layer-wise Equivariances Automatically using Gradients},\nauthor={Tycho F.A. van der Ouderaa and Alexander Immer and Mark van der Wilk},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bNIHdyunFC}\n}", "github": "", "project": "", "reviewers": "iYmm;n5aj;VChg;gE9j", "pdf_size": 1497073, "rating": "6;7;7;7", "confidence": "3;3;4;3", "soundness": "3;3;3;4", "novelty": "3;2;2;3", "presentation": "3;2;3;4", "wc_summary": "63;240;217;313", "wc_strengths": "36;76;119;75", "wc_weaknesses": "102;246;282;123", "wc_questions": "37;105;104;180", "wc_limitations": "1;11;13;30", "wc_review": "239;678;735;721", "wc_reply_reviewers": "27;35;0;31", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 208.25, 91.04222921260221 ], "wc_strengths_avg": [ 76.5, 29.364093720052047 ], "wc_weaknesses_avg": [ 188.25, 77.16986134495772 ], "wc_questions_avg": [ 106.5, 50.59891303180336 ], "wc_limitations_avg": [ 13.75, 10.425329730996522 ], "wc_review_avg": [ 593.25, 205.6020123928752 ], "wc_reply_reviewers_avg": [ 23.25, 13.718144918318949 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9975681631591528099&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "imperial.ac.uk;;imperial.ac.uk", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Empowering Collaborative Filtering with Principled Adversarial Contrastive Loss", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71149", "id": "bNNIf8F9OU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/13f1750b825659394a6499399e7637fc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bNNIf8F9OU", "openreview": "https://openreview.net/forum?id=bNNIf8F9OU", "poster": "/media/PosterPDFs/NeurIPS%202023/71149.png?t=1699943034.7682314", "slides": "https://nips.cc/virtual/2023/poster/71149", "video": "https://nips.cc/virtual/2023/poster/71149", "author_site": "An Zhang, Leheng Sheng, Zhibo Cai, Xiang Wang, Tat-Seng Chua", "tldr": "", "abstract": "Contrastive Learning (CL) has achieved impressive performance in self-supervised learning tasks, showing superior generalization ability. Inspired by the success, adopting CL into collaborative filtering (CF) is prevailing in semi-supervised topK recommendations. The basic idea is to routinely conduct heuristic-based data augmentation and apply contrastive losses (e.g., InfoNCE) on the augmented views. Yet, some CF-tailored challenges make this adoption suboptimal, such as the issue of out-of-distribution, the risk of false negatives, and the nature of top-K evaluation. They necessitate the CL-based CF scheme to focus more on mining hard negatives and distinguishing false negatives from the vast unlabeled user-item interactions, for informative contrast signals. Worse still, there is limited understanding of contrastive loss in CF methods, especially w.r.t. its generalization ability. To bridge the gap, we delve into the reasons underpinning the success of contrastive loss in CF, and propose a principled Adversarial InfoNCE loss (AdvInfoNCE), which is a variant of InfoNCE, specially tailored for CF methods. AdvInfoNCE adaptively explores and assigns hardness to each negative instance in an adversarial fashion and further utilizes a fine-grained hardness-aware ranking criterion to empower the recommender\u2019s generalization ability. Training CF models with AdvInfoNCE, we validate the effectiveness of AdvInfoNCE on both synthetic and real-world benchmark datasets, thus showing its generalization ability to mitigate out-of-distribution problems. Given the theoretical guarantees and empirical superiority of AdvInfoNCE over most contrastive loss functions, we advocate its adoption as a standard loss in recommender systems, particularly for the out-of-distribution tasks. Codes are available at https://github.com/LehengTHU/AdvInfoNCE.", "keywords": "Collaborative filtering;Contrastive loss;Recommendation;Generalization ability", "primary_area": "", "supplementary_material": "/attachment/e5db06631ff67b4cdfe2baee820b44d5ee494843.pdf", "author": "An Zhang;Leheng Sheng;Zhibo Cai;Xiang Wang;Tat-Seng Chua", "authorids": "~An_Zhang2;~Leheng_Sheng2;~Zhibo_Cai1;~Xiang_Wang6;~Tat-Seng_Chua2", "gender": "M;M;M;F;M", "homepage": "https://lehengthu.github.io/;https://github.com/caizhibo2;https://github.com/xiangwang1223;https://github.com/anzhang314;http://www.comp.nus.edu.sg/~chuats/", "dblp": "359/0347.html;;31/2864-10;78/5581-3;", "google_scholar": "https://scholar.google.com.hk/citations?user=s8bNbU0AAAAJ;;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;https://scholar.google.com.sg/citations?user=BcX7GJcAAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "orcid": "0000-0002-5764-6596;;0000-0002-6148-6329;;0000-0001-6097-7807", "linkedin": ";;;;", "or_profile": "~Leheng_Sheng2;~Zhibo_Cai1;~Xiang_Wang6;~AN_ZHANG1;~Tat-seng_Chua1", "aff": "Tsinghua University;Renmin University of China;University of Science and Technology of China;National University of Singapore;National University of Singapore", "aff_domain": "mails.tsinghua.edu.cn;ruc.edu.cn;ustc.edu.cn;nus.edu.sg;nus.edu.sg", "position": "MS student;Lecturer;Full Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzhang2023empowering,\ntitle={Empowering Collaborative Filtering with Principled Adversarial Contrastive Loss},\nauthor={An Zhang and Leheng Sheng and Zhibo Cai and Xiang Wang and Tat-Seng Chua},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bNNIf8F9OU}\n}", "github": "", "project": "", "reviewers": "1hVj;kSe8;ikze;qmEP;PLbw", "pdf_size": 4477533, "rating": "5;6;7;8;8", "confidence": "3;4;4;4;4", "soundness": "3;2;3;4;3", "novelty": "3;2;3;3;4", "presentation": "3;2;3;4;3", "wc_summary": "83;111;71;145;79", "wc_strengths": "57;106;50;124;129", "wc_weaknesses": "91;231;56;118;85", "wc_questions": "162;6;90;38;27", "wc_limitations": "17;4;6;1;7", "wc_review": "410;458;273;426;327", "wc_reply_reviewers": "12;154;16;18;33", "wc_reply_authors": "88;947;0;24;26", "reply_reviewers": "1;3;1;1;1", "reply_authors": "2;6;1;2;2", "rating_avg": [ 6.8, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 97.8, 27.176460402340847 ], "wc_strengths_avg": [ 93.2, 33.37903533657017 ], "wc_weaknesses_avg": [ 116.2, 60.68739572596603 ], "wc_questions_avg": [ 64.6, 56.01285566724839 ], "wc_limitations_avg": [ 7.0, 5.403702434442518 ], "wc_review_avg": [ 378.8, 68.33856890512122 ], "wc_reply_reviewers_avg": [ 46.6, 54.168625605603104 ], "wc_reply_authors_avg": [ 217.0, 366.1584356532019 ], "reply_reviewers_avg": [ 1.4, 0.8000000000000002 ], "reply_authors_avg": [ 2.6, 1.7435595774162693 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7717436331412898, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13488398690297903974&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "mails.tsinghua.edu.cn;ruc.edu.cn;ustc.edu.cn;nus.edu.sg;nus.edu.sg", "author_num": 5, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Tsinghua University;Renmin University of China;University of Science and Technology of China;National University of Singapore", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ruc.edu.cn;http://www.ustc.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "THU;RUC;USTC;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "China;Singapore" }, { "title": "Structured Neural Networks for Density Estimation and Causal Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71148", "id": "bNXVRJjmOl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d1881b5125b4e9cf42f6d6d0b6575934-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bNXVRJjmOl", "openreview": "https://openreview.net/forum?id=bNXVRJjmOl", "poster": "/media/PosterPDFs/NeurIPS%202023/71148.png?t=1702228888.936454", "slides": "https://nips.cc/virtual/2023/poster/71148", "video": "https://nips.cc/virtual/2023/poster/71148", "author_site": "Asic Chen, Ruian (Ian) Shi, Xiang Gao, Ricardo Baptista, Rahul Krishnan", "tldr": "", "abstract": "Injecting structure into neural networks enables learning functions that satisfy invariances with respect to subsets of inputs. For instance, when learning generative models using neural networks, it is advantageous to encode the conditional independence structure of observed variables, often in the form of Bayesian networks. We propose the Structured Neural Network (StrNN), which injects structure through masking pathways in a neural network. The masks are designed via a novel relationship we explore between neural network architectures and binary matrix factorization, to ensure that the desired independencies are respected. We devise and study practical algorithms for this otherwise NP-hard design problem based on novel objectives that control the model architecture. We demonstrate the utility of StrNN in three applications: (1) binary and Gaussian density estimation with StrNN, (2) real-valued density estimation with Structured Autoregressive Flows (StrAFs) and Structured Continuous Normalizing Flows (StrCNF), and (3) interventional and counterfactual analysis with StrAFs for causal inference. Our work opens up new avenues for learning neural networks that enable data-efficient generative modeling and the use of normalizing flows for causal effect estimation.", "keywords": "generative models;density estimation;normalizing flows;binary matrix factorization;causal inference", "primary_area": "", "supplementary_material": "/attachment/48dac142a223d845d070c1e4e53fb9f8e01dab55.pdf", "author": "Asic Q Chen;Ruian Shi;Xiang Gao;Ricardo Baptista;Rahul G Krishnan", "authorids": "~Asic_Q_Chen1;~Ruian_Shi1;~Xiang_Gao8;~Ricardo_Baptista1;~Rahul_G_Krishnan1", "gender": "F;;M;M;M", "homepage": "https://www.cs.toronto.edu/~asicchen/;;https://www.cs.toronto.edu/~xgao/;;http://www.cs.toronto.edu/~rahulgk/index.html", "dblp": ";238/2739;14/3881-19;136/6901;172/0880", "google_scholar": ";p7GXhskAAAAJ;https://scholar.google.com/citations?hl=en;;ilJgXHkAAAAJ", "orcid": ";;;;", "linkedin": ";;;;rahulgk/", "or_profile": "~Asic_Q_Chen1;~Ruian_Shi1;~Xiang_Gao8;~Ricardo_Baptista1;~Rahul_G_Krishnan1", "aff": "University of Toronto;University of Toronto;Department of Computer Science, University of Toronto;Deparment of Computing + Mathematical Sciences, California Institute of Technology;Department of Computer Science, University of Toronto", "aff_domain": "utoronto.ca;cs.toronto.edu;cs.toronto.edu;cms.caltech.edu;cs.toronto.edu", "position": "MS student;PhD student;Undergrad student;Instructor;Assistant Professor", "bibtex": "@inproceedings{\nchen2023structured,\ntitle={Structured Neural Networks for Density Estimation and Causal Inference},\nauthor={Asic Q Chen and Ruian Shi and Xiang Gao and Ricardo Baptista and Rahul G Krishnan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bNXVRJjmOl}\n}", "github": "", "project": "", "reviewers": "Tj4q;LoZH;icwN;Wq9i", "pdf_size": 1715983, "rating": "5;6;6;6", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "3;3;2;3", "wc_summary": "120;97;92;145", "wc_strengths": "12;44;112;100", "wc_weaknesses": "77;150;64;82", "wc_questions": "59;179;126;114", "wc_limitations": "1;79;3;43", "wc_review": "269;549;397;484", "wc_reply_reviewers": "0;258;26;28", "wc_reply_authors": "0;213;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 113.5, 21.02974084481309 ], "wc_strengths_avg": [ 67.0, 40.82891132518721 ], "wc_weaknesses_avg": [ 93.25, 33.41687447981932 ], "wc_questions_avg": [ 119.5, 42.64094276631322 ], "wc_limitations_avg": [ 31.5, 32.13642792844283 ], "wc_review_avg": [ 424.75, 104.85317114899291 ], "wc_reply_reviewers_avg": [ 78.0, 104.5083728703112 ], "wc_reply_authors_avg": [ 53.25, 92.23170550304272 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8746616489192463453&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "utoronto.ca;cs.toronto.edu;cs.toronto.edu;cms.caltech.edu;cs.toronto.edu", "author_num": 5, "aff_unique_index": "0;0;0;2;0", "aff_unique_norm": "University of Toronto;;California Institute of Technology", "aff_unique_dep": ";;Mathematical Sciences", "aff_unique_url": "https://www.utoronto.ca;;https://www.caltech.edu", "aff_unique_abbr": "U of T;;Caltech", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Toronto;Pasadena", "aff_country_unique_index": "0;0;0;2;0", "aff_country_unique": "Canada;;United States" }, { "title": "Online Control for Meta-optimization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71147", "id": "bOQNd7tWAp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/745b7e084d5ca5afc07fb454ab2be522-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bOQNd7tWAp", "openreview": "https://openreview.net/forum?id=bOQNd7tWAp", "poster": "/media/PosterPDFs/NeurIPS%202023/71147.png?t=1701715269.5002348", "slides": "https://nips.cc/virtual/2023/poster/71147", "video": "https://nips.cc/virtual/2023/poster/71147", "author_site": "Xinyi Chen, Elad Hazan", "tldr": "", "abstract": "Choosing the optimal hyperparameters, including learning rate and momentum, for specific optimization instances is a significant yet non-convex challenge. This makes conventional iterative techniques such as hypergradient descent \\cite{baydin2017online} insufficient in obtaining global optimality guarantees.\n\nWe consider the more general task of meta-optimization -- online learning of the best optimization algorithm given problem instances, and introduce a novel approach based on control theory. We show how meta-optimization can be formulated as an optimal control problem, departing from existing literature that use stability-based methods to study optimization. Our approach leverages convex relaxation techniques in the recently-proposed nonstochastic control framework to overcome the challenge of nonconvexity, and obtains regret guarantees vs. the best offline solution. This guarantees that in meta-optimization, we can learn a method that attains convergence comparable to that of the best optimization method in hindsight from a class of methods.", "keywords": "online learning;control;hyperparameter optimization", "primary_area": "", "supplementary_material": "/attachment/f87b26706b7da5520b247383a9fa3e161a8687e8.pdf", "author": "Xinyi Chen;Elad Hazan", "authorids": "~Xinyi_Chen1;~Elad_Hazan1", "gender": "F;M", "homepage": ";https://www.ehazan.com", "dblp": "84/6214;72/739", "google_scholar": ";LnhCGNMAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Xinyi_Chen1;~Elad_Hazan1", "aff": "Google DeepMind;Princeton University", "aff_domain": "google.com;princeton.edu", "position": "Researcher;Full Professor", "bibtex": "@inproceedings{\nchen2023online,\ntitle={Online Control for Meta-optimization},\nauthor={Xinyi Chen and Elad Hazan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bOQNd7tWAp}\n}", "github": "", "project": "", "reviewers": "MnL8;fcBo;PoEe;yXWX", "pdf_size": 634089, "rating": "7;7;7;8", "confidence": "4;3;2;3", "soundness": "4;3;4;4", "novelty": "3;3;4;4", "presentation": "2;3;3;4", "wc_summary": "96;98;48;79", "wc_strengths": "109;121;51;88", "wc_weaknesses": "287;93;44;52", "wc_questions": "25;52;68;99", "wc_limitations": "41;9;18;12", "wc_review": "558;373;229;330", "wc_reply_reviewers": "467;22;25;15", "wc_reply_authors": "502;0;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 80.25, 20.029665498954294 ], "wc_strengths_avg": [ 92.25, 26.58359456506964 ], "wc_weaknesses_avg": [ 119.0, 98.75980963934671 ], "wc_questions_avg": [ 61.0, 26.78619047195775 ], "wc_limitations_avg": [ 20.0, 12.549900398011133 ], "wc_review_avg": [ 372.5, 119.17319329446535 ], "wc_reply_reviewers_avg": [ 132.25, 193.3020628446577 ], "wc_reply_authors_avg": [ 125.5, 217.3723763498941 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5356730148481835361&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "google.com;princeton.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Google;Princeton University", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.princeton.edu", "aff_unique_abbr": "DeepMind;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Efficiently incorporating quintuple interactions into geometric deep learning force fields", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71146", "id": "bPJmu1PbZD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f32b13bfc384b3b1d52d675b05f2bece-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bPJmu1PbZD", "openreview": "https://openreview.net/forum?id=bPJmu1PbZD", "poster": "/media/PosterPDFs/NeurIPS%202023/71146.png?t=1697348350.2320783", "slides": "https://nips.cc/virtual/2023/poster/71146", "video": "https://nips.cc/virtual/2023/poster/71146", "author_site": "Zun Wang, Guoqing Liu, Yichi Zhou, Tong Wang, Bin Shao", "tldr": "", "abstract": "Machine learning force fields (MLFFs) have instigated a groundbreaking shift in molecular dynamics (MD) simulations across a wide range of fields, such as physics, chemistry, biology, and materials science. Incorporating higher order many-body interactions can enhance the expressiveness and accuracy of models. Recent models have achieved this by explicitly including up to four-body interactions. However, five-body interactions, which have relevance in various fields, are still challenging to incorporate efficiently into MLFFs. In this work, we propose the quintuple network (QuinNet), an end-to-end graph neural network that efficiently expresses many-body interactions up to five-body interactions with \\emph{ab initio} accuracy. By analyzing the topology of diverse many-body interactions, we design the model architecture to efficiently and explicitly represent these interactions. We evaluate QuinNet on public datasets of small molecules, such as MD17 and its revised version, and show that it is compatible with other state-of-the-art models on these benchmarks. Moreover, QuinNet surpasses many leading models on larger and more complex molecular systems, such as MD22 and Chignolin, without increasing the computational complexity. We also use QuinNet as a force field for molecular dynamics (MD) simulations to demonstrate its accuracy and stability, and conduct an ablation study to elucidate the significance of five-body interactions. We open source our implementation at https://github.com/Zun-Wang/QuinNet.", "keywords": "Machine learning force field;graph neural network;many-body interactions", "primary_area": "", "supplementary_material": "/attachment/efb5f2c95ba88c7b547ff92c0fffb598d01f1fa1.pdf", "author": "Zun Wang;Guoqing Liu;Yichi Zhou;Tong Wang;Bin Shao", "authorids": "~Zun_Wang2;~Guoqing_Liu3;~Yichi_Zhou2;~Tong_Wang2;~Bin_Shao1", "gender": "M;M;;M;", "homepage": ";https://www.microsoft.com/en-us/research/people/guoqingliu/;https://www.microsoft.com/en-us/research/people/yiczho/;;https://www.binshao.info/", "dblp": "44/8410;;203/4453;51/6856-14;", "google_scholar": ";h-eHvyoAAAAJ;;ETiIfc4AAAAJ;h9L4CgIAAAAJ", "orcid": "0000-0002-8763-8327;;;0000-0002-9483-0050;", "linkedin": ";;;tong-wang-2a5965b9/;", "or_profile": "~Zun_Wang2;~Guoqing_Liu3;~Yichi_Zhou2;~Tong_Wang2;~Bin_Shao1", "aff": "Microsoft;Microsoft Research ;Microsoft;Microsoft;Microsoft", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "Researcher;Researcher;Microsoft research;Researcher;Principal Research Manager", "bibtex": "@inproceedings{\nwang2023efficiently,\ntitle={Efficiently incorporating quintuple interactions into geometric deep learning force fields},\nauthor={Zun Wang and Guoqing Liu and Yichi Zhou and Tong Wang and Bin Shao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bPJmu1PbZD}\n}", "github": "", "project": "", "reviewers": "YmDt;8RW7;um1j;YYfR", "pdf_size": 10323086, "rating": "4;5;7;7", "confidence": "5;5;4;4", "soundness": "2;3;4;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "52;64;157;53", "wc_strengths": "19;41;97;93", "wc_weaknesses": "398;6;105;297", "wc_questions": "10;282;61;46", "wc_limitations": "10;1;32;8", "wc_review": "489;394;452;497", "wc_reply_reviewers": "0;0;21;96", "wc_reply_authors": "0;0;36;79", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.5, 43.84347157787577 ], "wc_strengths_avg": [ 62.5, 33.44772040064913 ], "wc_weaknesses_avg": [ 201.5, 154.32514377119497 ], "wc_questions_avg": [ 99.75, 106.84188083331368 ], "wc_limitations_avg": [ 12.75, 11.60549438843516 ], "wc_review_avg": [ 458.0, 40.663251222694925 ], "wc_reply_reviewers_avg": [ 29.25, 39.480216564755565 ], "wc_reply_authors_avg": [ 28.75, 32.52210786526605 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9622504486493761, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13926838901906230036&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Distributionally Robust Bayesian Optimization with $\\varphi$-divergences", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71145", "id": "bRlEwWd7Vy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3feb8ed3c33c3310b45f80be7dfef707-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bRlEwWd7Vy", "openreview": "https://openreview.net/forum?id=bRlEwWd7Vy", "poster": "/media/PosterPDFs/NeurIPS%202023/71145.png?t=1698372756.3636768", "slides": "https://nips.cc/virtual/2023/poster/71145", "video": "https://nips.cc/virtual/2023/poster/71145", "author_site": "Hisham Husain, Vu Nguyen, Anton van den Hengel", "tldr": "", "abstract": "The study of robustness has received much attention due to its inevitability in data-driven settings where many systems face uncertainty. One such example of concern is Bayesian Optimization (BO), where uncertainty is multi-faceted, yet there only exists a limited number of works dedicated to this direction. In particular, there is the work of Kirschner et al., which bridges the existing literature of Distributionally Robust Optimization (DRO) by casting the BO problem from the lens of DRO. While this work is pioneering, it admittedly suffers from various practical shortcomings such as finite contexts assumptions, leaving behind the main question \\textit{Can one devise a computationally tractable algorithm for solving this DRO-BO problem}? In this work, we tackle this question to a large degree of generality by considering robustness against data-shift in $\\varphi$-divergences, which subsumes many popular choices, such as the $\\chi^2$-divergence, Total Variation, and the extant Kullback-Leibler (KL) divergence. We show that the DRO-BO problem in this setting is equivalent to a finite-dimensional optimization problem which, even in the continuous context setting, can be easily implemented with provable sublinear regret bounds. We then show experimentally that our method surpasses existing methods, attesting to the theoretical results.", "keywords": "Bayesian Optimization;Distributionally Robust Optimization;\u03c6-divergences", "primary_area": "", "supplementary_material": "/attachment/43d7597baf9308600d25c648036bc90f77e030a0.pdf", "author": "Hisham Husain;Vu Nguyen;Anton van den Hengel", "authorids": "~Hisham_Husain1;~Vu_Nguyen1;~Anton_van_den_Hengel1", "gender": ";M;", "homepage": ";http://ntienvu.github.io;", "dblp": "222/3235;68/11111;v/AntonvandenHengel", "google_scholar": "bwq3crYAAAAJ;https://scholar.google.com.au/citations?user=5RQyC9cAAAAJ;https://scholar.google.com.au/citations?user=nMGZ2ZQAAAAJ", "orcid": ";0000-0002-0294-4561;0000-0003-3027-8364", "linkedin": ";tienvunguyen/;", "or_profile": "~Hisham_Husain1;~Vu_Nguyen1;~Anton_van_den_Hengel1", "aff": "Amazon;Amazon;University of Adelaide", "aff_domain": "amazon.com;amazon.com;adelaide.edu.au", "position": "Researcher;Machine Learning Scientist;Professor", "bibtex": "@inproceedings{\nhusain2023distributionally,\ntitle={Distributionally Robust Bayesian Optimization with \\${\\textbackslash}varphi\\$-divergences},\nauthor={Hisham Husain and Vu Nguyen and Anton van den Hengel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bRlEwWd7Vy}\n}", "github": "", "project": "", "reviewers": "NSv3;xPHX;1hSk;Yhe4;DEDT", "pdf_size": 693625, "rating": "3;5;6;7;8", "confidence": "4;2;3;3;4", "soundness": "2;3;3;4;3", "novelty": "3;2;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "202;56;106;77;108", "wc_strengths": "75;31;67;143;119", "wc_weaknesses": "387;5;197;113;142", "wc_questions": "254;1;516;76;40", "wc_limitations": "20;11;8;3;2", "wc_review": "938;104;894;412;411", "wc_reply_reviewers": "109;0;167;105;15", "wc_reply_authors": "409;0;208;651;0", "reply_reviewers": "1;0;3;3;1", "reply_authors": "2;1;3;3;1", "rating_avg": [ 5.8, 1.7204650534085253 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 109.8, 49.977594980150855 ], "wc_strengths_avg": [ 87.0, 39.59797974644666 ], "wc_weaknesses_avg": [ 168.8, 125.75277332925903 ], "wc_questions_avg": [ 177.4, 190.15530494835008 ], "wc_limitations_avg": [ 8.8, 6.493073232299171 ], "wc_review_avg": [ 551.8, 318.1649886458282 ], "wc_reply_reviewers_avg": [ 79.2, 62.70055821123126 ], "wc_reply_authors_avg": [ 253.6, 250.11245470787736 ], "reply_reviewers_avg": [ 1.6, 1.2000000000000002 ], "reply_authors_avg": [ 2.0, 0.8944271909999159 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.031068488300060027, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1755575606028584129&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "amazon.com;amazon.com;adelaide.edu.au", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Amazon;University of Adelaide", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.adelaide.edu.au", "aff_unique_abbr": "Amazon;Adelaide", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Australia" }, { "title": "Primal-Attention: Self-attention through Asymmetric Kernel SVD in Primal Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71144", "id": "bRyduWAAVT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cd687a58a13b673eea3fc1b2e4944cf7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bRyduWAAVT", "openreview": "https://openreview.net/forum?id=bRyduWAAVT", "poster": "/media/PosterPDFs/NeurIPS%202023/71144.png?t=1698955815.6176822", "slides": "https://nips.cc/virtual/2023/poster/71144", "video": "https://nips.cc/virtual/2023/poster/71144", "author_site": "Yingyi Chen, Qinghua Tao, Francesco Tonin, Johan Suykens", "tldr": "", "abstract": "Recently, a new line of works has emerged to understand and improve self-attention in Transformers by treating it as a kernel machine. However, existing works apply the methods for symmetric kernels to the asymmetric self-attention, resulting in a nontrivial gap between the analytical understanding and numerical implementation. In this paper, we provide a new perspective to represent and optimize self-attention through asymmetric Kernel Singular Value Decomposition (KSVD), which is also motivated by the low-rank property of self-attention normally observed in deep layers. Through asymmetric KSVD, i) a primal-dual representation of self-attention is formulated, where the optimization objective is cast to maximize the projection variances in the attention outputs; ii) a novel attention mechanism, i.e., Primal-Attention, is proposed via the primal representation of KSVD, avoiding explicit computation of the kernel matrix in the dual; iii) with KKT conditions, we prove that the stationary solution to the KSVD optimization in Primal-Attention yields a zero-value objective. In this manner, KSVD optimization can be implemented by simply minimizing a regularization loss, so that low-rank property is promoted without extra decomposition. Numerical experiments show state-of-the-art performance of our Primal-Attention with improved efficiency. Moreover, we demonstrate that the deployed KSVD optimization regularizes Primal-Attention with a sharper singular value decay than that of the canonical self-attention, further verifying the great potential of our method. To the best of our knowledge, this is the first work that provides a primal-dual representation for the asymmetric kernel in self-attention and successfully applies it to modelling and optimization.", "keywords": "Self-attention;primal-dual representations;SVD;kernel method;asymmetry;transformer", "primary_area": "", "supplementary_material": "/attachment/838390341fc19fc44ad931d1a2e7e68a62b591f6.pdf", "author": "Yingyi Chen;Qinghua Tao;Francesco Tonin;Johan Suykens", "authorids": "~Yingyi_Chen3;~Qinghua_Tao1;~Francesco_Tonin1;~Johan_Suykens1", "gender": "F;F;;M", "homepage": ";https://qinghua-tao.github.io/;https://taralloc.github.io/;https://www.kuleuven.be/wieiswie/nl/person/00015385", "dblp": "09/9441;182/9643.html;279/6777;61/3224", "google_scholar": "5b2jAVUAAAAJ;_dZHZD8AAAAJ;;https://scholar.google.be/citations?user=WtBmh0UAAAAJ", "orcid": "0000-0002-5571-9050;0000-0001-9705-7748;0000-0002-5644-0086;0000-0002-8846-6352", "linkedin": ";;;", "or_profile": "~Yingyi_Chen3;~Qinghua_Tao1;~Francesco_Tonin1;~Johan_Suykens1", "aff": "Department of Electrical Engineering, KU Leuven, Belgium;(ESAT) Department of Electrical Engineering, KU Leuven, Belgium, KU Leuven;KU Leuven;KU Leuven", "aff_domain": "esat.kuleuven.be;esat.kuleuven.be;kuleuven.be;kuleuven.be", "position": "PhD student;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nchen2023primalattention,\ntitle={Primal-Attention: Self-attention through Asymmetric Kernel {SVD} in Primal Representation},\nauthor={Yingyi Chen and Qinghua Tao and Francesco Tonin and Johan Suykens},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bRyduWAAVT}\n}", "github": "", "project": "", "reviewers": "2pAx;aphr;8hph", "pdf_size": 3029951, "rating": "4;7;7", "confidence": "4;2;5", "soundness": "2;3;4", "novelty": "2;3;4", "presentation": "2;2;4", "wc_summary": "90;100;77", "wc_strengths": "24;17;44", "wc_weaknesses": "51;74;41", "wc_questions": "48;86;28", "wc_limitations": "1;1;11", "wc_review": "214;278;201", "wc_reply_reviewers": "102;13;130", "wc_reply_authors": "688;0;669", "reply_reviewers": "1;1;2", "reply_authors": "2;1;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 89.0, 9.41629792788369 ], "wc_strengths_avg": [ 28.333333333333332, 11.440668201153676 ], "wc_weaknesses_avg": [ 55.333333333333336, 13.816254517375139 ], "wc_questions_avg": [ 54.0, 24.055491403558285 ], "wc_limitations_avg": [ 4.333333333333333, 4.714045207910317 ], "wc_review_avg": [ 231.0, 33.65511352924941 ], "wc_reply_reviewers_avg": [ 81.66666666666667, 49.88208317845419 ], "wc_reply_authors_avg": [ 452.3333333333333, 319.9420086342037 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.18898223650461365, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10688105235098045052&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 10, "email": "esat.kuleuven.be;esat.kuleuven.be;kuleuven.be;kuleuven.be", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "KU Leuven;Katholieke Universiteit Leuven", "aff_unique_dep": "Department of Electrical Engineering;", "aff_unique_url": "https://www.kuleuven.be;https://www.kuleuven.be", "aff_unique_abbr": "KU Leuven;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Belgium" }, { "title": "Seeing is not Believing: Robust Reinforcement Learning against Spurious Correlation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71143", "id": "bTL5SNOpfa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d0c3841867db2c516454845a450ca885-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bTL5SNOpfa", "openreview": "https://openreview.net/forum?id=bTL5SNOpfa", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71143", "video": "https://nips.cc/virtual/2023/poster/71143", "author_site": "Wenhao Ding, Laixi Shi, Yuejie Chi, DING ZHAO", "tldr": "", "abstract": "Robustness has been extensively studied in reinforcement learning (RL) to handle various forms of uncertainty such as random perturbations, rare events, and malicious attacks. In this work, we consider one critical type of robustness against spurious correlation, where different portions of the state do not have correlations induced by unobserved confounders. These spurious correlations are ubiquitous in real-world tasks, for instance, a self-driving car usually observes heavy traffic in the daytime and light traffic at night due to unobservable human activity. A model that learns such useless or even harmful correlation could catastrophically fail when the confounder in the test case deviates from the training one. Although motivated, enabling robustness against spurious correlation poses significant challenges since the uncertainty set, shaped by the unobserved confounder and causal structure, is difficult to characterize and identify. Existing robust algorithms that assume simple and unstructured uncertainty sets are therefore inadequate to address this challenge. To solve this issue, we propose Robust State-Confounded Markov Decision Processes (RSC-MDPs) and theoretically demonstrate its superiority in avoiding learning spurious correlations compared with other robust RL counterparts. We also design an empirical algorithm to learn the robust optimal policy for RSC-MDPs, which outperforms all baselines in eight realistic self-driving and manipulation tasks.", "keywords": "reinforcement learning;robustness;causality;spurious correlation", "primary_area": "", "supplementary_material": "/attachment/33b6d70f1bcb43786325d4d51ddde7f494f88712.pdf", "author": "Wenhao Ding;Laixi Shi;Yuejie Chi;Ding Zhao", "authorids": "~Wenhao_Ding1;~Laixi_Shi1;~Yuejie_Chi1;~Ding_Zhao1", "gender": "M;F;;", "homepage": "https://wenhao.pub;https://laixishi.github.io/;;https://safeai-lab.github.io", "dblp": "215/3667.html;211/7965;;", "google_scholar": "q2aqI9sAAAAJ;V8RkRr8AAAAJ;;z7tPc9IAAAAJ", "orcid": ";;;", "linkedin": "wenhaoding/;;;", "or_profile": "~Wenhao_Ding1;~Laixi_Shi1;~Yuejie_Chi1;~Ding_Zhao1", "aff": "Carnegie Mellon University;Carnegie Mellon University;;Carnegie Mellon University", "aff_domain": "cmu.edu;andrew.cmu.edu;;cmu.edu", "position": "PhD student;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nding2023seeing,\ntitle={Seeing is not Believing: Robust Reinforcement Learning against Spurious Correlation},\nauthor={Wenhao Ding and Laixi Shi and Yuejie Chi and Ding Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bTL5SNOpfa}\n}", "github": "", "project": "", "reviewers": "27Kb;EdL3;uQEX;3Qte", "pdf_size": 6573182, "rating": "6;6;6;6", "confidence": "3;3;3;4", "soundness": "3;2;3;3", "novelty": "2;2;2;3", "presentation": "2;2;3;2", "wc_summary": "43;222;118;73", "wc_strengths": "36;66;73;58", "wc_weaknesses": "304;480;254;149", "wc_questions": "11;97;2;7", "wc_limitations": "23;39;1;29", "wc_review": "417;904;448;316", "wc_reply_reviewers": "26;1168;0;105", "wc_reply_authors": "89;1401;89;128", "reply_reviewers": "1;4;0;1", "reply_authors": "2;6;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 114.0, 67.82698577999763 ], "wc_strengths_avg": [ 58.25, 13.899190623917638 ], "wc_weaknesses_avg": [ 296.75, 119.67743103860477 ], "wc_questions_avg": [ 29.25, 39.245222639195205 ], "wc_limitations_avg": [ 23.0, 13.92838827718412 ], "wc_review_avg": [ 521.25, 226.3066227488714 ], "wc_reply_reviewers_avg": [ 324.75, 488.3837502415493 ], "wc_reply_authors_avg": [ 426.75, 562.7087945820645 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 3.0, 1.7320508075688772 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15731682400175881272&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "cmu.edu;andrew.cmu.edu;;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Sample-Efficient and Safe Deep Reinforcement Learning via Reset Deep Ensemble Agents", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71142", "id": "bTidcHIK2t", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a6f6a5c517b2b92f3d309786af64086c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bTidcHIK2t", "openreview": "https://openreview.net/forum?id=bTidcHIK2t", "poster": "/media/PosterPDFs/NeurIPS%202023/71142.png?t=1701833446.9095397", "slides": "https://nips.cc/virtual/2023/poster/71142", "video": "https://nips.cc/virtual/2023/poster/71142", "author_site": "Woojun Kim, Yongjae Shin, Jongeui Park, Youngchul Sung", "tldr": "", "abstract": "Deep reinforcement learning (RL) has achieved remarkable success in solving complex tasks through its integration with deep neural networks (DNNs) as function approximators. However, the reliance on DNNs has introduced a new challenge called primacy bias, whereby these function approximators tend to prioritize early experiences, leading to overfitting. To alleviate this bias, a reset method has been proposed, which involves periodic resets of a portion or the entirety of a deep RL agent while preserving the replay buffer. However, the use of this method can result in performance collapses after executing the reset, raising concerns from the perspective of safe RL and regret minimization. In this paper, we propose a novel reset-based method that leverages deep ensemble learning to address the limitations of the vanilla reset method and enhance sample efficiency. The effectiveness of the proposed method is validated through various experiments including those in the domain of safe RL. Numerical results demonstrate its potential for real-world applications requiring high sample efficiency and safety considerations.", "keywords": "deep reinforcement learning;primacy bais;reset;deep ensemble learning", "primary_area": "", "supplementary_material": "/attachment/e11cae19aaf32eabb75bdb2b682eeb3d70631029.zip", "author": "Woojun Kim;Yongjae Shin;Jongeui Park;Youngchul Sung", "authorids": "~Woojun_Kim1;~Yongjae_Shin1;~Jongeui_Park1;~Youngchul_Sung1", "gender": "M;M;M;M", "homepage": ";https://sites.google.com/view/sisrelkaist/members/yjshin;;https://sites.google.com/view/youngchulsung", "dblp": "236/4974;;295/5486;17/6798", "google_scholar": "https://scholar.google.co.kr/citations?user=bcHWCBoAAAAJ;;;-9D2k3UAAAAJ", "orcid": ";;0000-0003-2845-6053;0000-0003-4536-6690", "linkedin": ";;jongeui-park-ab0a91138/;", "or_profile": "~Woojun_Kim1;~Yongjae_Shin1;~Jongeui_Park1;~Youngchul_Sung1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "Postdoc;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkim2023sampleefficient,\ntitle={Sample-Efficient and Safe Deep Reinforcement Learning via Reset Deep Ensemble Agents},\nauthor={Woojun Kim and Yongjae Shin and Jongeui Park and Youngchul Sung},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bTidcHIK2t}\n}", "github": "", "project": "", "reviewers": "6LEC;EaBR;UDiR;fWBK", "pdf_size": 5212616, "rating": "4;5;5;7", "confidence": "4;4;5;4", "soundness": "3;3;2;3", "novelty": "2;2;2;2", "presentation": "3;3;2;3", "wc_summary": "66;126;92;126", "wc_strengths": "125;38;71;132", "wc_weaknesses": "54;172;702;280", "wc_questions": "11;54;16;12", "wc_limitations": "29;6;69;28", "wc_review": "285;396;950;578", "wc_reply_reviewers": "0;0;113;46", "wc_reply_authors": "72;65;239;17", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 102.5, 25.233905761891084 ], "wc_strengths_avg": [ 91.5, 38.87479903485033 ], "wc_weaknesses_avg": [ 302.0, 244.38085031360376 ], "wc_questions_avg": [ 23.25, 17.851820635442202 ], "wc_limitations_avg": [ 33.0, 22.726636354727024 ], "wc_review_avg": [ 552.25, 252.34141059287117 ], "wc_reply_reviewers_avg": [ 39.75, 46.27296727031886 ], "wc_reply_authors_avg": [ 98.25, 83.97432643373807 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18164568769418005743&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "The Emergence of Essential Sparsity in Large Pre-trained Models: The Weights that Matter", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71141", "id": "bU9hwbsVcy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7a69ab48efcbb0153e72d458fb091969-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bU9hwbsVcy", "openreview": "https://openreview.net/forum?id=bU9hwbsVcy", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71141", "video": "https://nips.cc/virtual/2023/poster/71141", "author_site": "AJAY JAISWAL, Shiwei Liu, Shiwei Liu, Tianlong Chen, Tianlong Chen, Zhangyang \"Atlas\" Wang", "tldr": "", "abstract": "Large pre-trained transformers are $\\textit{show-stealer}$ in modern-day deep learning, and it becomes crucial to comprehend the parsimonious patterns that exist within them as they grow in scale. With exploding parameter counts, Lottery Ticket Hypothesis (LTH) and its variants, have lost their pragmatism in sparsifying them due to high computation and memory bottleneck of repetitive $\\textit{train-prune-retrain}$ routine of iterative magnitude pruning (IMP) which worsens with increasing model size. In this paper, we comprehensively study $\\textit{induced sparse patterns}$ across multiple large pre-trained vision and language transformers. We propose the existence of -- $\\textbf{essential sparsity}$ defined with a $\\textbf{sharp dropping point}$ beyond which the performance declines much faster w.r.t the rise of sparsity level, when we directly remove weights with the smallest magnitudes in $\\textbf{one-shot}$. We also present an intriguing emerging phenomenon of $\\textbf{abrupt sparsification}$ during the pre-training of BERT, i.e., BERT suddenly becomes heavily sparse in pre-training after certain iterations. Moreover, our observations also indicate a $\\textbf{counter-intuitive}$ finding that BERT trained with a larger amount of pre-training data tends to have a better ability to condense knowledge in comparatively relatively fewer parameters. Lastly, we investigate the effect of the pre-training loss on essential sparsity and discover that self-supervised learning (SSL) objectives trigger stronger emergent sparsification properties than supervised learning (SL). All our codes will be publicly available.", "keywords": "Pre-trained Models;Sparsity;Emergence;Transformers;Pruning", "primary_area": "", "supplementary_material": "/attachment/11b232c87dfd3e19249fa2df0250fff895f08f99.pdf", "author": "AJAY KUMAR JAISWAL;Shiwei Liu;Tianlong Chen;Zhangyang Wang", "authorids": "~AJAY_KUMAR_JAISWAL1;~Shiwei_Liu2;~Tianlong_Chen1;~Zhangyang_Wang1", "gender": "M;M;M;M", "homepage": "https://ajay1994.github.io/;https://shiweiliuiiiiiii.github.io/;https://tianlong-chen.github.io;https://vita-group.github.io", "dblp": "30/9707;234/8697-3.html;;119/4026", "google_scholar": "I783HxYAAAAJ;73IbXtsAAAAJ;LE3ctn0AAAAJ;pxFyKAIAAAAJ", "orcid": ";;0000-0001-7774-8197;", "linkedin": ";;tianlong-chen-783862167/;", "or_profile": "~AJAY_KUMAR_JAISWAL1;~Shiwei_Liu2;~Tianlong_Chen1;~Zhangyang_Wang1", "aff": "Amazon;University of Texas at Austin;University of Texas, Austin;University of Texas, Austin", "aff_domain": "amazon.com;utexas.edu;utexas.edu;utexas.edu", "position": "Researcher;Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\njaiswal2023the,\ntitle={The Emergence of Essential Sparsity in Large Pre-trained Models: The Weights that Matter},\nauthor={AJAY KUMAR JAISWAL and Shiwei Liu and Tianlong Chen and Zhangyang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bU9hwbsVcy}\n}", "github": "", "project": "", "reviewers": "Vvp4;AZn4;QH66;XMXq;dYgU", "pdf_size": 1239214, "rating": "4;4;6;7;8", "confidence": "5;3;3;4;5", "soundness": "3;2;2;3;3", "novelty": "3;2;3;3;4", "presentation": "3;2;2;3;4", "wc_summary": "17;61;132;107;48", "wc_strengths": "20;60;55;108;357", "wc_weaknesses": "17;302;87;167;187", "wc_questions": "13;18;131;5;4", "wc_limitations": "8;77;85;6;6", "wc_review": "75;518;490;393;602", "wc_reply_reviewers": "50;279;36;178;108", "wc_reply_authors": "978;1583;0;0;52", "reply_reviewers": "1;2;1;1;1", "reply_authors": "5;5;1;1;2", "rating_avg": [ 5.8, 1.6 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 73.0, 41.3327956954281 ], "wc_strengths_avg": [ 120.0, 121.7686330710828 ], "wc_weaknesses_avg": [ 152.0, 96.33275663033837 ], "wc_questions_avg": [ 34.2, 48.676072150493 ], "wc_limitations_avg": [ 36.4, 36.510820313983636 ], "wc_review_avg": [ 415.6, 182.91265675179505 ], "wc_reply_reviewers_avg": [ 130.2, 89.69370100514305 ], "wc_reply_authors_avg": [ 522.6, 648.0004938269724 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.8, 1.8330302779823362 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2795084971874737, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9598421083581660950&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "amazon.com;utexas.edu;utexas.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Amazon;University of Texas at Austin", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.utexas.edu", "aff_unique_abbr": "Amazon;UT Austin", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Model-Based Reparameterization Policy Gradient Methods: Theory and Practical Algorithms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71140", "id": "bUgqyyNo8j", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d78e9e4316e1714fbb0f20be66f8044c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bUgqyyNo8j", "openreview": "https://openreview.net/forum?id=bUgqyyNo8j", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71140", "video": "https://nips.cc/virtual/2023/poster/71140", "author_site": "Shenao Zhang, Boyi Liu, Zhaoran Wang, Tuo Zhao", "tldr": "", "abstract": "ReParameterization (RP) Policy Gradient Methods (PGMs) have been widely adopted for continuous control tasks in robotics and computer graphics. However, recent studies have revealed that, when applied to long-term reinforcement learning problems, model-based RP PGMs may experience chaotic and non-smooth optimization landscapes with exploding gradient variance, which leads to slow convergence. This is in contrast to the conventional belief that reparameterization methods have low gradient estimation variance in problems such as training deep generative models. To comprehend this phenomenon, we conduct a theoretical examination of model-based RP PGMs and search for solutions to the optimization difficulties. Specifically, we analyze the convergence of the model-based RP PGMs and pinpoint the smoothness of function approximators as a major factor that affects the quality of gradient estimation. Based on our analysis, we propose a spectral normalization method to mitigate the exploding variance issue caused by long model unrolls. Our experimental results demonstrate that proper normalization significantly reduces the gradient variance of model-based RP PGMs. As a result, the performance of the proposed method is comparable or superior to other gradient estimators, such as the Likelihood Ratio (LR) gradient estimator. Our code is available at https://github.com/agentification/RP_PGM.", "keywords": "Reinforcement Learning;Model-Based Reinforcement Learning;Policy Gradient", "primary_area": "", "supplementary_material": "", "author": "Shenao Zhang;Boyi Liu;Zhaoran Wang;Tuo Zhao", "authorids": "~Shenao_Zhang1;~Boyi_Liu1;~Zhaoran_Wang1;~Tuo_Zhao1", "gender": "M;M;Not Specified;M", "homepage": "https://shenao-zhang.github.io/;;https://zhaoranwang.github.io/;http://www2.isye.gatech.edu/~tzhao80", "dblp": "253/4543.html;;117/2756;", "google_scholar": "8NamuusAAAAJ;1G8RH_YAAAAJ;https://scholar.google.com.tw/citations?user=HSx0BgQAAAAJ;EJXN6tYAAAAJ", "orcid": ";;;", "linkedin": "shenao-zhang-055a53178/;;;", "or_profile": "~Shenao_Zhang1;~Boyi_Liu1;~Zhaoran_Wang1;~Tuo_Zhao1", "aff": "Georgia Institute of Technology;Northwestern University, Northwestern University;;Georgia Institute of Technology", "aff_domain": "gatech.edu;u.northwestern.edu;;gatech.edu", "position": "MS student;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nzhang2023modelbased,\ntitle={Model-Based Reparameterization Policy Gradient Methods: Theory and Practical Algorithms},\nauthor={Shenao Zhang and Boyi Liu and Zhaoran Wang and Tuo Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bUgqyyNo8j}\n}", "github": "", "project": "", "reviewers": "iodf;epDC;eACF;yXDQ", "pdf_size": 1862990, "rating": "6;6;7;7", "confidence": "3;4;3;3", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "3;3;4;4", "wc_summary": "124;93;98;52", "wc_strengths": "84;45;66;62", "wc_weaknesses": "379;469;145;63", "wc_questions": "16;253;2;1", "wc_limitations": "1;38;34;7", "wc_review": "604;898;345;185", "wc_reply_reviewers": "97;673;17;0", "wc_reply_authors": "73;1131;0;95", "reply_reviewers": "1;2;1;0", "reply_authors": "2;4;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 91.75, 25.791229129298976 ], "wc_strengths_avg": [ 64.25, 13.863170633011771 ], "wc_weaknesses_avg": [ 264.0, 165.68946858506126 ], "wc_questions_avg": [ 68.0, 106.9742959780526 ], "wc_limitations_avg": [ 20.0, 16.20185174601965 ], "wc_review_avg": [ 508.0, 270.28410978080086 ], "wc_reply_reviewers_avg": [ 196.75, 277.3917581688396 ], "wc_reply_authors_avg": [ 324.75, 466.81493924252254 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2106356388945607162&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "gatech.edu;u.northwestern.edu;;gatech.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Georgia Institute of Technology;Northwestern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.northwestern.edu", "aff_unique_abbr": "Georgia Tech;NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "LoRA: A Logical Reasoning Augmented Dataset for Visual Question Answering", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73541", "id": "bW1uwPV3im", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/617ff5271b2b41dfb217a3b0f1b3d1be-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=bW1uwPV3im", "openreview": "https://openreview.net/forum?id=bW1uwPV3im", "poster": "/media/PosterPDFs/NeurIPS%202023/73541.png?t=1701989583.943178", "slides": "https://nips.cc/virtual/2023/poster/73541", "video": "https://nips.cc/virtual/2023/poster/73541", "author_site": "Jingying Gao, Qi Wu, Alan Blair, Maurice Pagnucco", "tldr": "", "abstract": "The capacity to reason logically is a hallmark of human cognition. Humans excel at integrating multimodal information for locigal reasoning, as exemplified by the Visual Question Answering (VQA) task, which is a challenging multimodal task. VQA tasks and large vision-and-language models aim to tackle reasoning problems, but the accuracy, consistency and fabrication of the generated answers is hard to evaluate in the absence of a VQA dataset that can offer formal, comprehensive and systematic complex logical reasoning questions. To address this gap, we present LoRA, a novel Logical Reasoning Augmented VQA dataset that requires formal and complex description logic reasoning based on a food-and-kitchen knowledge base. Our main objective in creating LoRA is to enhance the complex and formal logical reasoning capabilities of VQA models, which are not adequately measured by existing VQA datasets. We devise strong and flexible programs to automatically generate 200,000 diverse description logic reasoning questions based on the SROIQ Description Logic, along with realistic kitchen scenes and ground truth answers. We fine-tune the latest transformer VQA models and evaluate the zero-shot performance of the state-of-the-art large vision-and-language models on LoRA. The results reveal that LoRA presents a unique challenge in logical reasoning, setting a systematic and comprehensive evaluation standard.", "keywords": "VQA;Dataset;Logical Reasoning", "primary_area": "", "supplementary_material": "/attachment/a4c1d146b5d959c3cd2bdcbda88cf0a0bf3719ec.pdf", "author": "Jingying Gao;Qi Wu;Alan Blair;Maurice Pagnucco", "authorids": "~Jingying_Gao1;~Qi_Wu3;~Alan_Blair1;~Maurice_Pagnucco1", "gender": "F;M;M;M", "homepage": "https://www.linkedin.com/in/gaojingying/;http://qi-wu.me/;https://www.cse.unsw.edu.au/~blair/;http://www.cse.unsw.edu.au/~morri/", "dblp": "129/9472;96/3446-1;79/3180;p/MauricePagnucco", "google_scholar": "https://scholar.google.com.au/citations?user=9OEpS0cAAAAJ;https://scholar.google.co.uk/citations?user=aKXe1FEAAAAJ;oYi8fBIAAAAJ;lqjmockAAAAJ", "orcid": ";;0000-0002-1039-4766;0000-0001-7712-6646", "linkedin": "gaojingying/;;;mauricepagnucco/?originalSubdomain=au", "or_profile": "~Jingying_Gao1;~Qi_Wu3;~Alan_Blair1;~Maurice_Pagnucco1", "aff": "University of New South Wales;The University of Adelaide;;University of New South Wales", "aff_domain": "unsw.edu.au;adelaide.edu.au;;unsw.edu.au", "position": "Researcher;Associate Professor;;Full Professor", "bibtex": "@inproceedings{\ngao2023lora,\ntitle={Lo{RA}: A Logical Reasoning Augmented Dataset for Visual Question Answering},\nauthor={Jingying Gao and Qi Wu and Alan Blair and Maurice Pagnucco},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=bW1uwPV3im}\n}", "github": "", "project": "", "reviewers": "pVrF;3Fyb;JcBn;KjfL;G9pJ", "pdf_size": 2953332, "rating": "6;6;6;7;7", "confidence": "4;2;3;3;3", "wc_summary_and_contributions": "62;66;124;101;86", "wc_strengths": "55;122;146;64;68", "wc_improvement": "69;142;258;46;262", "wc_limitations": "47;149;37;11;1", "wc_correctness": "32;13;32;11;1", "wc_clarity": "22;74;22;6;1", "wc_relation_to_prior_work": "24;8;26;9;4", "wc_documentation": "27;9;20;12;4", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "339;584;666;261;428", "wc_reply_reviewers": "29;0;300;15;36", "wc_reply_authors": "754;457;907;183;737", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 87.8, 22.92945703674642 ], "wc_strengths_avg": [ 91.0, 36.16628264005025 ], "wc_improvement_avg": [ 155.4, 91.10784818005527 ], "wc_limitations_avg": [ 49.0, 52.71811832757311 ], "wc_correctness_avg": [ 17.8, 12.286578042726134 ], "wc_clarity_avg": [ 25.0, 25.907527863538043 ], "wc_relation_to_prior_work_avg": [ 14.2, 8.997777503361593 ], "wc_documentation_avg": [ 14.4, 8.16333265278342 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 455.6, 150.320457689564 ], "wc_reply_reviewers_avg": [ 76.0, 112.67830314661292 ], "wc_reply_authors_avg": [ 607.6, 257.2093310904564 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18234973300800969511&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "unsw.edu.au;adelaide.edu.au;;unsw.edu.au", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of New South Wales;University of Adelaide", "aff_unique_dep": ";", "aff_unique_url": "https://www.unsw.edu.au;https://www.adelaide.edu.au", "aff_unique_abbr": "UNSW;Adelaide", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "Kissing to Find a Match: Efficient Low-Rank Permutation Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71139", "id": "bXvmnpCMmq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/97826456fb8c02fa368d673a49bbc563-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bXvmnpCMmq", "openreview": "https://openreview.net/forum?id=bXvmnpCMmq", "poster": "/media/PosterPDFs/NeurIPS%202023/71139.png?t=1701437700.758926", "slides": "https://nips.cc/virtual/2023/poster/71139", "video": "https://nips.cc/virtual/2023/poster/71139", "author_site": "Hannah Dr\u00f6ge, Zorah L\u00e4hner, Yuval Bahat, Onofre Martorell Nadal, Felix Heide, Michael Moeller", "tldr": "", "abstract": "Permutation matrices play a key role in matching and assignment problems across the fields, especially in computer vision and robotics. However, memory for explicitly representing permutation matrices grows quadratically with the size of the problem, prohibiting large problem instances. In this work, we propose to tackle the curse of dimensionality of large permutation matrices by approximating them using low-rank matrix factorization, followed by a nonlinearity. To this end, we rely on the Kissing number theory to infer the minimal rank required for representing a permutation matrix of a given size, which is significantly smaller than the problem size. This leads to a drastic reduction in computation and memory costs, e.g., up to $3$ orders of magnitude less memory for a problem of size $n=20000$, represented using $8.4\\times10^5$ elements in two small matrices instead of using a single huge matrix with $4\\times 10^8$ elements. The proposed representation allows for accurate representations of large permutation matrices, which in turn enables handling large problems that would have been infeasible otherwise. We demonstrate the applicability and merits of the proposed approach through a series of experiments on a range of problems that involve predicting permutation matrices, from linear and quadratic assignment to shape matching problems.", "keywords": "low rank;permutation;kissing number;matrix factorization;assigment problem", "primary_area": "", "supplementary_material": "/attachment/4b3862b25309d8c80b18dba9b76a1ea2741edeb7.pdf", "author": "Hannah Dr\u00f6ge;Zorah L\u00e4hner;Yuval Bahat;Onofre Martorell Nadal;Felix Heide;Michael Moeller", "authorids": "~Hannah_Dr\u00f6ge1;~Zorah_L\u00e4hner1;~Yuval_Bahat2;~Onofre_Martorell_Nadal1;~Felix_Heide2;~Michael_Moeller1", "gender": ";F;;M;;M", "homepage": "https://www.vsa.informatik.uni-siegen.de/en/droge-hannah;https://geometryinml.cs.uni-bonn.de;http://www.wisdom.weizmann.ac.il/%7Eybahat/;;https://www.cs.princeton.edu/~fheide/;http://vsa.informatik.uni-siegen.de", "dblp": "262/0583;175/1635;159/8756;;01/9396;08/5840-1", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.co.il/citations?user=VseAjdcAAAAJ;dGeBr3gAAAAJ;gRqzSHsAAAAJ;https://scholar.google.de/citations?user=sxzdAGUAAAAJ", "orcid": "0000-0001-7163-4279;0000-0003-0599-094X;;0000-0002-9071-778X;;", "linkedin": "hannah-dr%C3%B6ge-29105a16a/;;;;;", "or_profile": "~Hannah_Dr\u00f6ge1;~Zorah_L\u00e4hner1;~Yuval_Bahat2;~Onofre_Martorell_Nadal1;~Felix_Heide2;~Michael_Moeller1", "aff": "University of Siegen;University of Siegen;Department of Computer Science, Princeton University;;Algolux;University of Siegen", "aff_domain": "uni-siegen.de;uni-siegen.de;cs.princeton.edu;;algolux.com;uni-siegen.de", "position": "PhD student;Postdoc;Postdoc;;CTO;Full Professor", "bibtex": "@inproceedings{\ndr{\\\"o}ge2023kissing,\ntitle={Kissing to Find a Match: Efficient Low-Rank Permutation Representation},\nauthor={Hannah Dr{\\\"o}ge and Zorah L{\\\"a}hner and Yuval Bahat and Onofre Martorell Nadal and Felix Heide and Michael Moeller},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bXvmnpCMmq}\n}", "github": "", "project": "", "reviewers": "2QBr;AkAs;tWZ6;1Eor;KuKh", "pdf_size": 1658570, "rating": "4;5;8;8;9", "confidence": "4;3;4;2;4", "soundness": "2;3;4;4;4", "novelty": "2;2;4;4;4", "presentation": "4;3;4;4;4", "wc_summary": "142;49;90;204;98", "wc_strengths": "95;51;63;168;115", "wc_weaknesses": "147;92;191;28;33", "wc_questions": "2;112;116;38;1", "wc_limitations": "137;29;1;1;9", "wc_review": "523;333;461;439;256", "wc_reply_reviewers": "0;46;20;0;0", "wc_reply_authors": "0;56;0;0;0", "reply_reviewers": "0;1;1;0;0", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.8, 1.9390719429665317 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.4, 0.8 ], "novelty_avg": [ 3.2, 0.9797958971132712 ], "presentation_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_avg": [ 116.6, 52.73935911631843 ], "wc_strengths_avg": [ 98.4, 41.548044478651455 ], "wc_weaknesses_avg": [ 98.2, 63.577983610680825 ], "wc_questions_avg": [ 53.8, 50.94467587491356 ], "wc_limitations_avg": [ 35.4, 51.82123117024527 ], "wc_review_avg": [ 402.4, 95.46433889154629 ], "wc_reply_reviewers_avg": [ 13.2, 18.137254478007414 ], "wc_reply_authors_avg": [ 11.2, 22.4 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.07735659346940951, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11633164109400073550&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "uni-siegen.de;uni-siegen.de;cs.princeton.edu;;algolux.com;uni-siegen.de", "author_num": 6, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of Siegen;Princeton University;Algolux", "aff_unique_dep": ";Department of Computer Science;", "aff_unique_url": "https://www.uni-siegen.de;https://www.princeton.edu;https://www.algolux.com", "aff_unique_abbr": "Uni Siegen;Princeton;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "Germany;United States;Sweden" }, { "title": "Hybrid Search for Efficient Planning with Completeness Guarantees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71138", "id": "bY0c46ZtXa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/46d26daeb05fbbcfe5f3d8f7ca756e16-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bY0c46ZtXa", "openreview": "https://openreview.net/forum?id=bY0c46ZtXa", "poster": "/media/PosterPDFs/NeurIPS%202023/71138.png?t=1701377737.7924187", "slides": "https://nips.cc/virtual/2023/poster/71138", "video": "https://nips.cc/virtual/2023/poster/71138", "author_site": "Kalle Kujanp\u00e4\u00e4, Joni Pajarinen, Alexander Ilin", "tldr": "", "abstract": "Solving complex planning problems has been a long-standing challenge in computer science. Learning-based subgoal search methods have shown promise in tackling these problems, but they often suffer from a lack of completeness guarantees, meaning that they may fail to find a solution even if one exists. In this paper, we propose an efficient approach to augment a subgoal search method to achieve completeness in discrete action spaces. Specifically, we augment the high-level search with low-level actions to execute a multi-level (hybrid) search, which we call complete subgoal search. This solution achieves the best of both worlds: the practical efficiency of high-level search and the completeness of low-level search. We apply the proposed search method to a recently proposed subgoal search algorithm and evaluate the algorithm trained on offline data on complex planning problems. We demonstrate that our complete subgoal search not only guarantees completeness but can even improve performance in terms of search expansions for instances that the high-level could solve without low-level augmentations. Our approach makes it possible to apply subgoal-level planning for systems where completeness is a critical requirement.", "keywords": "Planning;Subgoal search;Reinforcement learning;Hierarchical Imitation Learning;Hierarchical planning;Hierarchical reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/89ec0633111afb2075f895facd830f6cfb5b72d5.pdf", "author": "Kalle Kujanp\u00e4\u00e4;Joni Pajarinen;Alexander Ilin", "authorids": "~Kalle_Kujanp\u00e4\u00e41;~Joni_Pajarinen2;~Alexander_Ilin1", "gender": ";;M", "homepage": ";;https://users.aalto.fi/~alexilin/", "dblp": ";23/8355;85/5835", "google_scholar": ";https://scholar.google.fi/citations?user=-2fJStwAAAAJ;i2gcTBQAAAAJ", "orcid": ";0000-0003-4469-8191;", "linkedin": ";;alexanderilin/", "or_profile": "~Kalle_Kujanp\u00e4\u00e41;~Joni_Pajarinen2;~Alexander_Ilin1", "aff": ";Aalto University;Aalto University", "aff_domain": ";aalto.fi;aalto.fi", "position": ";Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nkujanp{\\\"a}{\\\"a}2023hybrid,\ntitle={Hybrid Search for Efficient Planning with Completeness Guarantees},\nauthor={Kalle Kujanp{\\\"a}{\\\"a} and Joni Pajarinen and Alexander Ilin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bY0c46ZtXa}\n}", "github": "", "project": "", "reviewers": "nqzu;jgQs;i9rx;v7TG", "pdf_size": 941747, "rating": "5;6;7;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "134;93;36;70", "wc_strengths": "35;174;41;26", "wc_weaknesses": "34;306;148;65", "wc_questions": "45;344;8;502", "wc_limitations": "15;56;12;13", "wc_review": "263;973;245;676", "wc_reply_reviewers": "0;6;394;894", "wc_reply_authors": "0;0;1081;1683", "reply_reviewers": "0;1;2;4", "reply_authors": "1;1;3;4", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.25, 35.632674611934476 ], "wc_strengths_avg": [ 69.0, 60.85638832530238 ], "wc_weaknesses_avg": [ 138.25, 105.43807424265677 ], "wc_questions_avg": [ 224.75, 206.38480443094642 ], "wc_limitations_avg": [ 24.0, 18.506755523321747 ], "wc_review_avg": [ 539.25, 304.0299121797064 ], "wc_reply_reviewers_avg": [ 323.5, 366.02561385782826 ], "wc_reply_authors_avg": [ 691.0, 723.036306142368 ], "reply_reviewers_avg": [ 1.75, 1.479019945774904 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16552832735046147056&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": ";aalto.fi;aalto.fi", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Aalto University", "aff_unique_dep": "", "aff_unique_url": "https://www.aalto.fi", "aff_unique_abbr": "Aalto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Finland" }, { "title": "Lie Point Symmetry and Physics-Informed Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71137", "id": "ba4boN3W1n", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8493c860bec41705f7743d5764301b94-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ba4boN3W1n", "openreview": "https://openreview.net/forum?id=ba4boN3W1n", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71137", "video": "https://nips.cc/virtual/2023/poster/71137", "author_site": "Tara Akhound-Sadegh, Laurence Perreault-Levasseur, Johannes Brandstetter, Max Welling, Siamak Ravanbakhsh", "tldr": "", "abstract": "Symmetries have been leveraged to improve the generalization of neural networks through different mechanisms from data augmentation to equivariant architectures. However, despite their potential, their integration into neural solvers for partial differential equations (PDEs) remains largely unexplored. We explore the integration of PDE symmetries, known as Lie point symmetries, in a major family of neural solvers known as physics-informed neural networks (PINNs). We propose a loss function that informs the network about Lie point symmetries in the same way that PINN models try to enforce the underlying PDE through a loss function. Intuitively, our symmetry loss ensures that the infinitesimal generators of the Lie group conserve the PDE solutions.. Effectively, this means that once the network learns a solution, it also learns the neighbouring solutions generated by Lie point symmetries.\nEmpirical evaluations indicate that the inductive bias introduced by the Lie point symmetries of the PDEs greatly boosts the sample efficiency of PINNs.", "keywords": "PDE;Lie point symmetry;Symmetry;Neural PDE solver;PINNs", "primary_area": "", "supplementary_material": "/attachment/2e5fbe0e2dcbcb9eccb8e063de52a8300a1ef677.pdf", "author": "Tara Akhound-Sadegh;Laurence Perreault-Levasseur;Johannes Brandstetter;Max Welling;Siamak Ravanbakhsh", "authorids": "~Tara_Akhound-Sadegh1;~Laurence_Perreault-Levasseur1;~Johannes_Brandstetter1;~Max_Welling1;~Siamak_Ravanbakhsh1", "gender": ";F;M;M;", "homepage": "https://sites.google.com/view/taraakhound-sadegh/home;;;https://staff.fnwi.uva.nl/m.welling/;", "dblp": ";;251/8691;16/2286;", "google_scholar": "RHDoTkkAAAAJ;wVXcNOQAAAAJ;KiRvOHcAAAAJ;https://scholar.google.nl/citations?user=8200InoAAAAJ;", "orcid": ";;;0000-0003-1484-2121;", "linkedin": "tara-akhound-sadegh-574748101/;;;;", "or_profile": "~Tara_Akhound-Sadegh1;~Laurence_Perreault-Levasseur1;~Johannes_Brandstetter1;~Max_Welling1;~Siamak_Ravanbakhsh1", "aff": "McGill University;Universit\u00e9 de Montr\u00e9al;Microsoft;University of Amsterdam;", "aff_domain": "mcgill.ca;umontreal.ca;microsoft.com;uva.nl;", "position": "PhD student;Assistant Professor;Researcher;Full Professor;", "bibtex": "@inproceedings{\nakhound-sadegh2023lie,\ntitle={Lie Point Symmetry and Physics-Informed Networks},\nauthor={Tara Akhound-Sadegh and Laurence Perreault-Levasseur and Johannes Brandstetter and Max Welling and Siamak Ravanbakhsh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ba4boN3W1n}\n}", "github": "", "project": "", "reviewers": "dief;qo6e;KJvK;5AKh;vAL8", "pdf_size": 2084271, "rating": "3;4;6;6;6", "confidence": "4;2;4;3;3", "soundness": "3;3;3;3;3", "novelty": "2;2;3;2;3", "presentation": "1;2;2;3;2", "wc_summary": "52;105;15;75;124", "wc_strengths": "37;28;21;67;54", "wc_weaknesses": "367;54;103;47;170", "wc_questions": "54;30;30;153;113", "wc_limitations": "8;9;14;7;5", "wc_review": "518;226;183;349;466", "wc_reply_reviewers": "79;18;144;161;335", "wc_reply_authors": "104;0;0;109;244", "reply_reviewers": "1;1;1;2;1", "reply_authors": "2;1;1;2;2", "rating_avg": [ 5.0, 1.2649110640673518 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.0, 0.6324555320336759 ], "wc_summary_avg": [ 74.2, 38.54036844660414 ], "wc_strengths_avg": [ 41.4, 16.906803364326446 ], "wc_weaknesses_avg": [ 148.2, 117.90402876916463 ], "wc_questions_avg": [ 76.0, 49.01836390578535 ], "wc_limitations_avg": [ 8.6, 3.006659275674582 ], "wc_review_avg": [ 348.4, 130.33280477301176 ], "wc_reply_reviewers_avg": [ 147.4, 106.59568471565817 ], "wc_reply_authors_avg": [ 91.4, 89.95910181854863 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6397390460482740037&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mcgill.ca;umontreal.ca;microsoft.com;uva.nl;", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "McGill University;Universit\u00e9 de Montr\u00e9al;Microsoft;University of Amsterdam", "aff_unique_dep": ";;Microsoft Corporation;", "aff_unique_url": "https://www.mcgill.ca;https://www.umontreal.ca;https://www.microsoft.com;https://www.uva.nl", "aff_unique_abbr": "McGill;UdeM;Microsoft;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "Canada;United States;Netherlands" }, { "title": "Anonymous and Copy-Robust Delegations for Liquid Democracy", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71136", "id": "bbL20Oupi4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dbb5180957513805ebeea787b8c66ac9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bbL20Oupi4", "openreview": "https://openreview.net/forum?id=bbL20Oupi4", "poster": "/media/PosterPDFs/NeurIPS%202023/71136.png?t=1701684161.1143148", "slides": "https://nips.cc/virtual/2023/poster/71136", "video": "https://nips.cc/virtual/2023/poster/71136", "author_site": "Markus Utke, Ulrike Schmidt-Kraepelin", "tldr": "", "abstract": "Liquid democracy with ranked delegations is a novel voting scheme that unites the practicability of representative democracy with the idealistic appeal of direct democracy: Every voter decides between casting their vote on a question at hand or delegating their voting weight to some other, trusted agent. Delegations are transitive, and since voters may end up in a delegation cycle, they are encouraged to indicate not only a single delegate, but a set of potential delegates and a ranking among them. Based on the delegation preferences of all voters, a delegation rule selects one representative per voter. Previous work has revealed a trade-off between two properties of delegation rules called anonymity and copy-robustness. \n\nTo overcome this issue we study two fractional delegation rules: Mixed Borda branching, which generalizes a rule satisfying copy-robustness, and the random walk rule, which satisfies anonymity. Using the Markov chain tree theorem, we show that the two rules are in fact equivalent, and simultaneously satisfy generalized versions of the two properties. Combining the same theorem with Fulkerson's algorithm, we develop a polynomial-time algorithm for computing the outcome of the studied delegation rule. This algorithm is of independent interest, having applications in semi-supervised learning and graph theory.", "keywords": "liquid democracy;directed trees;parameterized markov chain;matrix tree theorem;axiomatic method", "primary_area": "", "supplementary_material": "/attachment/e80d5e2a577bf933c7a40e3d0671484b1e39f762.pdf", "author": "Markus Utke;Ulrike Schmidt-Kraepelin", "authorids": "markus.utke@campus.tu-berlin.de;~Ulrike_Schmidt-Kraepelin1", "gender": ";", "homepage": ";https://www.algo.tu-berlin.de/menue/people/schmidt_kraepelin/", "dblp": ";230/7808", "google_scholar": ";S1I3-iMAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "markus.utke@campus.tu-berlin.de;~Ulrike_Schmidt-Kraepelin1", "aff": ";TU Berlin", "aff_domain": ";tu-berlin.de", "position": ";PhD student", "bibtex": "@inproceedings{\nutke2023anonymous,\ntitle={Anonymous and Copy-Robust Delegations for Liquid Democracy},\nauthor={Markus Utke and Ulrike Schmidt-Kraepelin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bbL20Oupi4}\n}", "github": "", "project": "", "reviewers": "rGK4;vsDd;PkVV;eBke;rLuU", "pdf_size": 482055, "rating": "6;6;7;7;8", "confidence": "3;3;4;4;3", "soundness": "3;4;4;4;4", "novelty": "3;2;3;3;4", "presentation": "3;3;4;4;4", "wc_summary": "174;241;56;90;62", "wc_strengths": "79;90;40;75;50", "wc_weaknesses": "494;167;34;95;394", "wc_questions": "54;14;3;34;26", "wc_limitations": "8;1;9;1;3", "wc_review": "809;513;142;295;535", "wc_reply_reviewers": "65;20;9;0;101", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 124.6, 71.84872998181666 ], "wc_strengths_avg": [ 66.8, 18.73392644375439 ], "wc_weaknesses_avg": [ 236.8, 177.18397218710274 ], "wc_questions_avg": [ 26.2, 17.440183485273312 ], "wc_limitations_avg": [ 4.4, 3.4409301068170506 ], "wc_review_avg": [ 458.8, 227.41011411104827 ], "wc_reply_reviewers_avg": [ 39.0, 38.21518022985107 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.21821789023599233, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12613424632846584356&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": ";tu-berlin.de", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Technische Universit\u00e4t Berlin", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-berlin.de", "aff_unique_abbr": "TU Berlin", "aff_campus_unique_index": "0", "aff_campus_unique": "Berlin", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "Contrastive Lift: 3D Object Instance Segmentation by Slow-Fast Contrastive Fusion", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71135", "id": "bbbbbov4Xu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1cb5b3d64bdf3c6642c8d9a8fbecd019-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bbbbbov4Xu", "openreview": "https://openreview.net/forum?id=bbbbbov4Xu", "poster": "/media/PosterPDFs/NeurIPS%202023/71135.png?t=1701533616.388286", "slides": "https://nips.cc/virtual/2023/poster/71135", "video": "https://nips.cc/virtual/2023/poster/71135", "author_site": "Yash Bhalgat, Iro Laina, Jo\u00e3o Henriques, Andrea Vedaldi, Andrew Zisserman", "tldr": "", "abstract": "Instance segmentation in 3D is a challenging task due to the lack of large-scale annotated datasets. In this paper, we show that this task can be addressed effectively by leveraging instead 2D pre-trained models for instance segmentation. We propose a novel approach to lift 2D segments to 3D and fuse them by means of a neural field representation, which encourages multi-view consistency across frames. The core of our approach is a slow-fast clustering objective function, which is scalable and well-suited for scenes with a large number of objects. Unlike previous approaches, our method does not require an upper bound on the number of objects or object tracking across frames. To demonstrate the scalability of the slow-fast clustering, we create a new semi-realistic dataset called the Messy Rooms dataset, which features scenes with up to 500 objects per scene. Our approach outperforms the state-of-the-art on challenging scenes from the ScanNet, Hypersim, and Replica datasets, as well as on our newly created Messy Rooms dataset, demonstrating the effectiveness and scalability of our slow-fast clustering method.", "keywords": "Neural Radiance Fields;Instance Segmentation;Metric Learning;Clustering;3D Computer Vision", "primary_area": "", "supplementary_material": "/attachment/cc1bb207f16090ab81288711480dc825efe65881.zip", "author": "Yash Sanjay Bhalgat;Iro Laina;Joao F. Henriques;Andrea Vedaldi;Andrew Zisserman", "authorids": "~Yash_Sanjay_Bhalgat1;~Iro_Laina1;~Joao_F._Henriques1;~Andrea_Vedaldi1;~Andrew_Zisserman1", "gender": "M;;M;M;", "homepage": "https://yashbhalgat.github.io;;http://www.robots.ox.ac.uk/~joao/;https://www.robots.ox.ac.uk/~vedaldi/;", "dblp": "186/8383;;31/8617.html;99/2825;", "google_scholar": "https://scholar.google.com/citations?hl=en;;aCQjyp0AAAAJ;bRT7t28AAAAJ;", "orcid": ";;;0000-0003-1374-2858;", "linkedin": "yashbhalgat/;;;;", "or_profile": "~Yash_Sanjay_Bhalgat1;~Iro_Laina1;~Joao_F._Henriques1;~Andrea_Vedaldi1;~Andrew_Zisserman1", "aff": "University of Oxford;;University of Oxford;Meta;", "aff_domain": "ox.ac.uk;;ox.ac.uk;meta.com;", "position": "PhD student;;Principal Researcher;Researcher;", "bibtex": "@inproceedings{\nbhalgat2023contrastive,\ntitle={Contrastive Lift: 3D Object Instance Segmentation by Slow-Fast Contrastive Fusion},\nauthor={Yash Sanjay Bhalgat and Iro Laina and Joao F. Henriques and Andrea Vedaldi and Andrew Zisserman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bbbbbov4Xu}\n}", "github": "", "project": "", "reviewers": "RRt9;Vjqu;4dod;KUUX;zMUp", "pdf_size": 41013782, "rating": "5;7;7;7;7", "confidence": "3;4;3;4;4", "soundness": "2;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "2;3;3;4;3", "wc_summary": "210;84;100;203;82", "wc_strengths": "55;61;150;146;77", "wc_weaknesses": "379;41;106;62;102", "wc_questions": "103;4;30;19;134", "wc_limitations": "20;4;18;29;16", "wc_review": "767;194;404;459;411", "wc_reply_reviewers": "0;23;21;24;19", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 135.8, 58.10473302580436 ], "wc_strengths_avg": [ 97.8, 41.63364024439852 ], "wc_weaknesses_avg": [ 138.0, 122.95202316350878 ], "wc_questions_avg": [ 58.0, 51.0333224472011 ], "wc_limitations_avg": [ 17.4, 8.039900496896712 ], "wc_review_avg": [ 447.0, 184.22703384682717 ], "wc_reply_reviewers_avg": [ 17.4, 8.867919710958146 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6123724356957948, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6154777883799139064&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ox.ac.uk;;ox.ac.uk;meta.com;", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Oxford;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.ox.ac.uk;https://meta.com", "aff_unique_abbr": "Oxford;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "A Dataset of Relighted 3D Interacting Hands", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73540", "id": "bdWkFt7M6X", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/396beafa6feba781a7114780e6837253-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=bdWkFt7M6X", "openreview": "https://openreview.net/forum?id=bdWkFt7M6X", "poster": "/media/PosterPDFs/NeurIPS%202023/73540.png?t=1699312557.5352042", "slides": "https://nips.cc/virtual/2023/poster/73540", "video": "https://nips.cc/virtual/2023/poster/73540", "author_site": "Gyeongsik Moon, Shunsuke Saito, Weipeng Xu, Rohan Joshi, Julia Buffalini, Harley Bellan, Nicholas Rosen, Jesse Richardson, Mallorie Mize, Philippe De Bree, Tomas Simon, Bo Peng, Shubham Garg, Kevyn McPhail, Takaaki Shiratori", "tldr": "", "abstract": "The two-hand interaction is one of the most challenging signals to analyze due to the self-similarity, complicated articulations, and occlusions of hands. Although several datasets have been proposed for the two-hand interaction analysis, all of them do not achieve 1) diverse and realistic image appearances and 2) diverse and large-scale groundtruth (GT) 3D poses at the same time. In this work, we propose Re:InterHand, a dataset of relighted 3D interacting hands that achieve the two goals. To this end, we employ a state-of-the-art hand relighting network with our accurately tracked two-hand 3D poses. We compare our Re:InterHand with existing 3D interacting hands datasets and show the benefit of it. Our Re:InterHand is available in https://mks0601.github.io/ReInterHand/", "keywords": "dataset;3D interacting hands;relighting", "primary_area": "", "supplementary_material": "/attachment/d5615e4347a43301713ae366485d3f77a3a9e4bd.pdf", "author": "Gyeongsik Moon;Shunsuke Saito;Weipeng Xu;Rohan Joshi;Julia Buffalini;Harley Bellan;Nicholas Matthew Rosen;Jesse Richardson;Mallorie Mize;Philippe De Bree;Tomas Simon;Bo Peng;Shubham Garg;Kevyn Alex Anthony McPhail;Takaaki Shiratori", "authorids": "~Gyeongsik_Moon1;~Shunsuke_Saito1;~Weipeng_Xu1;~Rohan_Joshi1;~Julia_Buffalini1;~Harley_Bellan1;~Nicholas_Matthew_Rosen1;~Jesse_Richardson1;~Mallorie_Mize1;~Philippe_De_Bree1;~Tomas_Simon1;~Bo_Peng4;~Shubham_Garg2;~Kevyn_Alex_Anthony_McPhail1;~Takaaki_Shiratori3", "gender": "M;M;M;M;F;F;M;M;Non-Binary;M;;M;;M;M", "homepage": "https://mks0601.github.io/;http://www-scf.usc.edu/~saitos/;https://sites.google.com/view/xuweipeng;;https://www.linkedin.com/in/julia-buffalini-424983123/;;;;;;;;;https://www.kevynmc.com;https://sites.google.com/view/takaaki-shiratori/home", "dblp": "185/6852;21/5061;190/7431;;;;;;;;23/8654;03/5954-5;;;17/5270.html", "google_scholar": "2f2D258AAAAJ;IolN_okAAAAJ;https://scholar.google.de/citations?user=vy8xXDQAAAAJ;;;;;;;;7aabHgsAAAAJ;;gJaE3Y4AAAAJ;;YvS3QpkAAAAJ", "orcid": ";;;;;;;;;;0000-0002-0972-7455;;;;", "linkedin": "gyeongsik-moon-bb9a73152/;;;rohan-m-joshi/;;https://www.linkedin.com/mwlite/in/harley-bellan-61021a1bb;https://www.linkedin.com/mwlite/in/nicholas-rosen-697784156;jesserichardson1/;mallorie-mize-746332169/;philippe-de-bree;;;shubhamgarg19/;;", "or_profile": "~Gyeongsik_Moon1;~Shunsuke_Saito1;~Weipeng_Xu1;~Rohan_Joshi1;~Julia_Buffalini1;~Harley_Bellan1;~Nicholas_Matthew_Rosen1;~Jesse_Richardson1;~Mallorie_Mize1;~Philippe_De_Bree1;~Tomas_Simon1;~Bo_Peng4;~Shubham_Garg2;~Kevyn_Alex_Anthony_McPhail1;~Takaaki_Shiratori3", "aff": "Meta;Codec Avatars Lab;Meta Reality Labs;Meta Facebook;Meta;Meta;Meta;;Meta;;Meta;Meta Platforms Inc.;;Meta;Meta", "aff_domain": "meta.com;meta.com;meta.com;fb.com;meta.com;meta.com;about.meta.com;;meta.com;;fb.com;meta.com;;meta.com;meta.com", "position": "Postdoc;Researcher;Researcher;Researcher;Project Manager;Capture Technician;Capture Technician;;Researcher;;Research Scientist;Researcher;;Engineer;Research Scientist", "bibtex": "@inproceedings{\nmoon2023a,\ntitle={A Dataset of Relighted 3D Interacting Hands},\nauthor={Gyeongsik Moon and Shunsuke Saito and Weipeng Xu and Rohan Joshi and Julia Buffalini and Harley Bellan and Nicholas Matthew Rosen and Jesse Richardson and Mallorie Mize and Philippe De Bree and Tomas Simon and Bo Peng and Shubham Garg and Kevyn Alex Anthony McPhail and Takaaki Shiratori},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=bdWkFt7M6X}\n}", "github": "", "project": "", "reviewers": "vVQs;AoTd;rLCe", "pdf_size": 7691453, "rating": "4;7;10", "confidence": "5;3;4", "wc_summary_and_contributions": "40;44;91", "wc_strengths": "12;48;40", "wc_improvement": "71;61;14", "wc_limitations": "14;10;14", "wc_correctness": "5;17;3", "wc_clarity": "11;105;5", "wc_relation_to_prior_work": "12;8;1", "wc_documentation": "4;30;1", "wc_additional_feedback": "1;1;1", "wc_review": "170;324;170", "wc_reply_reviewers": "788;31;119", "wc_reply_authors": "2344;521;184", "reply_reviewers": "3;1;1", "reply_authors": "4;2;2", "rating_avg": [ 7.0, 2.449489742783178 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 58.333333333333336, 23.156472577277874 ], "wc_strengths_avg": [ 33.333333333333336, 15.4344492037203 ], "wc_improvement_avg": [ 48.666666666666664, 24.850665092821068 ], "wc_limitations_avg": [ 12.666666666666666, 1.8856180831641267 ], "wc_correctness_avg": [ 8.333333333333334, 6.182412330330469 ], "wc_clarity_avg": [ 40.333333333333336, 45.79179936286505 ], "wc_relation_to_prior_work_avg": [ 7.0, 4.546060565661952 ], "wc_documentation_avg": [ 11.666666666666666, 13.02134998974974 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 221.33333333333334, 72.59629620181889 ], "wc_reply_reviewers_avg": [ 312.6666666666667, 338.0259687591407 ], "wc_reply_authors_avg": [ 1016.3333333333334, 948.8295713960202 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5131373528104867292&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "meta.com;meta.com;meta.com;fb.com;meta.com;meta.com;about.meta.com;;meta.com;;fb.com;meta.com;;meta.com;meta.com", "author_num": 15, "aff_unique_index": "0;1;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Meta;Codec Avatars Lab", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;", "aff_unique_abbr": "Meta;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Kiki or Bouba? Sound Symbolism in Vision-and-Language Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71134", "id": "bfmSc1ETT9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f74054328beeb0c21a9b8e99da557f5a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bfmSc1ETT9", "openreview": "https://openreview.net/forum?id=bfmSc1ETT9", "poster": "/media/PosterPDFs/NeurIPS%202023/71134.png?t=1701915870.3948684", "slides": "https://nips.cc/virtual/2023/poster/71134", "video": "https://nips.cc/virtual/2023/poster/71134", "author_site": "Morris Alper, Hadar Averbuch-Elor", "tldr": "", "abstract": "Although the mapping between sound and meaning in human language is assumed to be largely arbitrary, research in cognitive science has shown that there are non-trivial correlations between particular sounds and meanings across languages and demographic groups, a phenomenon known as sound symbolism. Among the many dimensions of meaning, sound symbolism is particularly salient and well-demonstrated with regards to cross-modal associations between language and the visual domain. In this work, we address the question of whether sound symbolism is reflected in vision-and-language models such as CLIP and Stable Diffusion. Using zero-shot knowledge probing to investigate the inherent knowledge of these models, we find strong evidence that they do show this pattern, paralleling the well-known kiki-bouba effect in psycholinguistics. Our work provides a novel method for demonstrating sound symbolism and understanding its nature using computational tools. Our code will be made publicly available.", "keywords": "multimodal learning;computer vision;NLP;cognitive science", "primary_area": "", "supplementary_material": "/attachment/4e05539257eac1a591508f4da7a0660c4b0b393e.pdf", "author": "Morris Alper;Hadar Averbuch-Elor", "authorids": "~Morris_Alper1;~Hadar_Averbuch-Elor2", "gender": "M;F", "homepage": "https://morrisalp.github.io/;http://www.hadarelor.com", "dblp": "205/5338;162/5758", "google_scholar": "M2RsdCUAAAAJ;", "orcid": "0000-0002-3533-7602;", "linkedin": "morris-alper/;", "or_profile": "~Morris_Alper1;~Hadar_Elor1", "aff": "Tel Aviv University;Cornell University", "aff_domain": "tau.ac.il;cornell.edu", "position": "MS student;Postdoc", "bibtex": "@inproceedings{\nalper2023kiki,\ntitle={Kiki or Bouba? Sound Symbolism in Vision-and-Language Models},\nauthor={Morris Alper and Hadar Averbuch-Elor},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bfmSc1ETT9}\n}", "github": "", "project": "", "reviewers": "DdD9;kkX7;gvCK;NTGx", "pdf_size": 1378201, "rating": "6;7;7;8", "confidence": "3;3;4;4", "soundness": "3;3;3;4", "novelty": "2;3;4;3", "presentation": "3;4;4;4", "wc_summary": "93;94;209;148", "wc_strengths": "55;71;69;26", "wc_weaknesses": "163;137;431;8", "wc_questions": "36;93;103;16", "wc_limitations": "52;37;64;14", "wc_review": "399;432;876;212", "wc_reply_reviewers": "0;139;188;27", "wc_reply_authors": "0;147;16;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 136.0, 47.6602559791699 ], "wc_strengths_avg": [ 55.25, 17.977416388346796 ], "wc_weaknesses_avg": [ 184.75, 153.81218254741722 ], "wc_questions_avg": [ 62.0, 36.857834987964225 ], "wc_limitations_avg": [ 41.75, 18.659782956937093 ], "wc_review_avg": [ 479.75, 243.67229530662692 ], "wc_reply_reviewers_avg": [ 88.5, 77.56448929761608 ], "wc_reply_authors_avg": [ 40.75, 61.69025449777298 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11715511027500478150&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "tau.ac.il;cornell.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Tel Aviv University;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tau.ac.il;https://www.cornell.edu", "aff_unique_abbr": "TAU;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Israel;United States" }, { "id": "biLgaNKYdB", "title": "Transforming to Yoked Neural Networks to Improve ANN Structure", "track": "main", "status": "Reject", "tldr": "", "abstract": "Artificial neural networks (ANNs) are typically designed as a tree structure to mimic neural networks. However, this structure has limitations as nodes on the same level cannot communicate with each other, resulting in deficiencies. To address this, we proposed the Yoke neural networks (YNN) model, which connected nodes bidirectionally in a complete graph, forming a neural module. YNN improved information transfer and eliminated structural bias more effectively than traditional ANNs. Although ANN structures have advanced to more complex structures such as Directed Acyclic Graph (DAG) in recent years, these methods also exhibit unidirectional and acyclic biases for ANN. Compared to traditional ANN, our YNN can emulate neural networks more effectively. In this study, we analyzed the limitations of existing ANN structures and attached an auxiliary sparsity constraint to the distribution of connectedness to focus on critical connections. Unlike traditional structures which are a flow of tensors, with each unit representing a model level and each level being a tensor with independent elements, YNN treated each tensor in the flow as a whole to represent an information unit since the elements in the tensor interacted with each other. Moreover, based on the optimized structure, we designed a neural module structure using the minimum cut technique to reduce the calculation of the YNN model. This learning process was compatible with existing networks and various tasks that efficiently eliminated structural bias. The quantitative results of our experiments indicated that the learned connectivity was superior to the traditional neural network structure.", "keywords": "Deep Learning;Neural Networks", "primary_area": "", "supplementary_material": "/attachment/4b7c92a4f71fd76cbc60e924098d426b5d74aefb.zip", "author": "Xinshun Liu;Yizhi Fang;Yichao Jiang", "authorids": "~Xinshun_Liu1;nooneimportant121@gmail.com;mjiang@sztu.edu.cn", "gender": "M;;", "homepage": "https://www.linkedin.com/in/%E6%96%B0%E9%A1%BA-%E5%88%98-2a1471126/;;", "dblp": "348/8877;;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;", "orcid": "0009-0005-7270-0721;;", "linkedin": "%E6%96%B0%E9%A1%BA-%E5%88%98-2a1471126/;;", "or_profile": "~Xinshun_Liu1;nooneimportant121@gmail.com;mjiang@sztu.edu.cn", "aff": "Didi International Business Group;;", "aff_domain": "didiglobal.com;;", "position": "Researcher;;", "bibtex": "@misc{\nliu2023transforming,\ntitle={Transforming to Yoked Neural Networks to Improve {ANN} Structure},\nauthor={Xinshun Liu and Yizhi Fang and Yichao Jiang},\nyear={2023},\nurl={https://openreview.net/forum?id=biLgaNKYdB}\n}", "github": "", "project": "", "reviewers": "ZkFh;v2EA;N4pP;bqNk;tc8h", "site": "https://openreview.net/forum?id=biLgaNKYdB", "pdf_size": 22357140, "rating": "3;3;3;3;5", "confidence": "4;4;4;3;4", "soundness": "2;2;3;2;3", "novelty": "1;1;2;2;3", "presentation": "2;1;2;2;3", "wc_summary": "44;49;51;90;140", "wc_strengths": "10;66;36;14;52", "wc_weaknesses": "41;129;70;370;51", "wc_questions": "58;83;839;514;114", "wc_limitations": "6;8;5;2;1", "wc_review": "159;335;1001;990;358", "wc_reply_reviewers": "37;0;185;0;0", "wc_reply_authors": "286;286;424;286;286", "reply_reviewers": "1;0;1;0;0", "reply_authors": "2;2;2;2;2", "rating_avg": [ 3.4, 0.8 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 1.8, 0.7483314773547883 ], "presentation_avg": [ 2.0, 0.6324555320336759 ], "wc_summary_avg": [ 74.8, 36.5042463283383 ], "wc_strengths_avg": [ 35.6, 21.518364250100426 ], "wc_weaknesses_avg": [ 132.2, 122.75080447801554 ], "wc_questions_avg": [ 321.6, 307.97181689239034 ], "wc_limitations_avg": [ 4.4, 2.5768197453450252 ], "wc_review_avg": [ 568.6, 355.31428341680834 ], "wc_reply_reviewers_avg": [ 44.4, 71.74566188976166 ], "wc_reply_authors_avg": [ 313.6, 55.2 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.25, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9eFIPS85kzUJ:scholar.google.com/&scioq=Transforming+to+Yoked+Neural+Networks+to+Improve+ANN+Structure&hl=en&as_sdt=0,44", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Didi International Business Group", "aff_unique_dep": "", "aff_unique_url": "https://www.didi.com", "aff_unique_abbr": "Didi", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "biaOpY5gAo", "title": "Stack More Layers Differently: High-Rank Training Through Low-Rank Updates", "track": "main", "status": "Reject", "tldr": "", "abstract": "Despite the dominance and effectiveness of scaling, resulting in large networks with hundreds of billions of parameters,\nthe necessity to train overparametrized models remains poorly understood, and alternative approaches do not necessarily make it cheaper to train high-performance models.\nIn this paper, we explore low-rank training techniques as an alternative approach to training large neural networks. We introduce a novel method called ReLoRA, which utilizes low-rank updates to train high-rank networks.\nWe apply ReLoRA to pre-training transformer language models with up to 350M parameters and demonstrate comparable performance to regular neural network training. Furthermore, we observe that the efficiency of ReLoRA increases with model size, making it a promising approach for training multi-billion-parameter networks efficiently. Our findings shed light on the potential of low-rank training techniques and their implications for model scaling.", "keywords": "efficient training;scaling laws;language models;low-rank training;parameter-efficient fine-tuning", "primary_area": "", "supplementary_material": "/attachment/3c283d828337b0966c7ccb12c8989e3341910673.zip", "author": "Vladislav Lialin;Sherin Muckatira;Namrata Shivagunde;Anna Rumshisky", "authorids": "~Vladislav_Lialin1;~Sherin_Muckatira1;~Namrata_Shivagunde1;~Anna_Rumshisky2", "gender": ";F;F;Not Specified", "homepage": "http://vladlialin.com;;https://text-machine.cs.uml.edu/lab2/people/nshivagunde/;http://text-machine.cs.uml.edu", "dblp": "https://dblp.uni-trier.de/pid/251/5456;138/5752;320/5719;63/873", "google_scholar": "B1Ijov0AAAAJ;https://scholar.google.com/citations?hl=en;RWNUtkkAAAAJ;https://scholar.google.com.tw/citations?user=_Q1uzVYAAAAJ", "orcid": ";;;", "linkedin": "vlialin/;sherinmuckatira/;namrata-shivagunde-b5823ba3/;", "or_profile": "~Vladislav_Lialin1;~Sherin_Muckatira1;~Namrata_Shivagunde1;~Anna_Rumshisky1", "aff": "University of Massachusetts, Lowell;University of Massachusetts at Lowell;Department of Computer Science, University of Massachusetts at Lowell;University of Massachusetts, Lowell", "aff_domain": "uml.edu;uml.edu;cs.umass.edu;uml.edu", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@misc{\nlialin2023stack,\ntitle={Stack More Layers Differently: High-Rank Training Through Low-Rank Updates},\nauthor={Vladislav Lialin and Sherin Muckatira and Namrata Shivagunde and Anna Rumshisky},\nyear={2023},\nurl={https://openreview.net/forum?id=biaOpY5gAo}\n}", "github": "", "project": "", "reviewers": "4sGe;wgeh;nJNf;Cooz", "site": "https://openreview.net/forum?id=biaOpY5gAo", "pdf_size": 524115, "rating": "3;4;5;5", "confidence": "3;5;3;4", "soundness": "2;2;3;3", "novelty": "2;3;2;3", "presentation": "1;3;3;3", "wc_summary": "370;43;140;100", "wc_strengths": "16;33;34;44", "wc_weaknesses": "15;158;145;67", "wc_questions": "2;199;62;47", "wc_limitations": "1;9;57;17", "wc_review": "404;442;438;275", "wc_reply_reviewers": "0;220;35;0", "wc_reply_authors": "0;603;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 4.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 163.25, 124.24446667759494 ], "wc_strengths_avg": [ 31.75, 10.059199769365355 ], "wc_weaknesses_avg": [ 96.25, 58.409652455737145 ], "wc_questions_avg": [ 77.5, 73.54080499967348 ], "wc_limitations_avg": [ 21.0, 21.540659228538015 ], "wc_review_avg": [ 389.75, 67.8762661023719 ], "wc_reply_reviewers_avg": [ 63.75, 91.33557631065783 ], "wc_reply_authors_avg": [ 150.75, 261.10665924100823 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16347103820657222273&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Massachusetts Lowell;University of Massachusetts at Lowell", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.uml.edu;https://www.uml.edu", "aff_unique_abbr": "UMass Lowell;UMass Lowell", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Lowell", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "How to Data in Datathons", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73539", "id": "bjvRVA2ihO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/215a55741fbe4baad173468f93336a7d-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=bjvRVA2ihO", "openreview": "https://openreview.net/forum?id=bjvRVA2ihO", "poster": "/media/PosterPDFs/NeurIPS%202023/73539.png?t=1700123757.6565142", "slides": "https://nips.cc/virtual/2023/poster/73539", "video": "https://nips.cc/virtual/2023/poster/73539", "author_site": "Carlos Mougan, Richard Plant, Clare Teng, Marya Bazzi, Alvaro Cabrejas Egea, Ryan Chan, David Salvador Jasin, Martin Stoffel, Kirstie Whitaker, JULES MANSER", "tldr": "", "abstract": "The rise of datathons, also known as data or data science hackathons, has provided a platform to collaborate, learn, and innovate quickly. Despite their significant potential benefits, organizations often struggle to effectively work with data due to a lack of clear guidelines and best practices for potential issues that might arise. Drawing on our own experiences and insights from organizing +80 datathon challenges with +60 partnership organizations since 2016, we provide a guide that serves as a resource for organizers to navigate the data-related complexities of datathons. We apply our proposed framework to 10 case studies.", "keywords": "datathon;hackathon;data;data framework", "primary_area": "", "supplementary_material": "", "author": "Carlos Mougan;Richard Plant;Clare Teng;Marya Bazzi;Alvaro Cabrejas-Egea;Ryan Sze-Yin Chan;David Salvador Jasin;martin stoffel;Kirstie Jane Whitaker;JULES MANSER", "authorids": "~Carlos_Mougan1;~Richard_Plant1;~Clare_Teng1;~Marya_Bazzi1;alvaro.cabrejasegea@fujitsu.com;~Ryan_Sze-Yin_Chan1;~David_Salvador_Jasin1;mstoffel@turing.ac.uk;~Kirstie_Jane_Whitaker1;~JULES_MANSER1", "gender": "M;M;;;;;M;;F;M", "homepage": "https://cmougan.eu;https://rplant.netlify.app/;https://eng.ox.ac.uk/people/clare-teng/;https://warwick.ac.uk/fac/sci/maths/people/staff/bazzi/;;;;;https://www.turing.ac.uk/people/researchers/kirstie-whitaker;https://www.linkedin.com/in/jules-m-4859682/", "dblp": "293/7915;;;;;;;;;", "google_scholar": "dQ5WrokAAAAJ;;;;;;;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/scholar?hl=en", "orcid": ";0000-0002-0239-2090;0000-0001-9200-6204;;;;0000-0001-8892-2410;;0000-0001-8498-4059;", "linkedin": "carlosmougan/;;;;;;;;;jules-m-4859682/", "or_profile": "~Carlos_Mougan1;~Richard_Plant1;~Clare_Teng1;~Marya_Bazzi1;alvaro.cabrejasegea@fujitsu.com;~Ryan_Sze-Yin_Chan1;~David_Salvador_Jasin1;mstoffel@turing.ac.uk;~Kirstie_Jane_Whitaker1;~JULES_MANSER1", "aff": "University of Southampton;Napier University;University of Oxford;University of Warwick;;;Alan Turing Institute;;Alan Turing Institute;Alan Turing Institute", "aff_domain": "soton.ac.uk;napier.ac.uk;ox.ac.uk;warwick.ac.uk;;;turing.ac.uk;;turing.ac.uk;turing.ac.uk", "position": "PhD student;PhD student;PhD student;Assistant Professor;;;Researcher;;Principal Researcher;Programme Manager", "bibtex": "@inproceedings{\nmougan2023how,\ntitle={How to Data in Datathons},\nauthor={Carlos Mougan and Richard Plant and Clare Teng and Marya Bazzi and Alvaro Cabrejas-Egea and Ryan Sze-Yin Chan and David Salvador Jasin and martin stoffel and Kirstie Jane Whitaker and JULES MANSER},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=bjvRVA2ihO}\n}", "github": "", "project": "", "reviewers": "oVgp;CNLi;ijbY;ammT;s7kZ", "pdf_size": 290378, "rating": "6;6;7;7;8", "confidence": "4;4;4;2;3", "wc_summary_and_contributions": "40;159;53;63;97", "wc_strengths": "43;84;89;95;42", "wc_improvement": "219;121;554;80;36", "wc_limitations": "23;125;62;26;38", "wc_correctness": "28;1;4;1;1", "wc_clarity": "8;11;8;16;1", "wc_relation_to_prior_work": "17;25;115;1;1", "wc_documentation": "1;1;4;24;2", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "380;528;890;307;219", "wc_reply_reviewers": "40;0;338;12;0", "wc_reply_authors": "511;260;1113;156;43", "reply_reviewers": "1;0;2;1;0", "reply_authors": "1;1;3;1;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.8 ], "wc_summary_and_contributions_avg": [ 82.4, 42.70643979542196 ], "wc_strengths_avg": [ 70.6, 23.208619088605854 ], "wc_improvement_avg": [ 202.0, 186.11501820111133 ], "wc_limitations_avg": [ 54.8, 37.690317058894586 ], "wc_correctness_avg": [ 7.0, 10.564090116995406 ], "wc_clarity_avg": [ 8.8, 4.874423042781576 ], "wc_relation_to_prior_work_avg": [ 31.8, 42.62581377522311 ], "wc_documentation_avg": [ 6.4, 8.867919710958146 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 464.8, 235.49895965799934 ], "wc_reply_reviewers_avg": [ 78.0, 130.81895887064687 ], "wc_reply_authors_avg": [ 416.6, 381.02419870659133 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.5345224838248488, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16620111869969494723&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "soton.ac.uk;napier.ac.uk;ox.ac.uk;warwick.ac.uk;;;turing.ac.uk;;turing.ac.uk;turing.ac.uk", "author_num": 10, "aff_unique_index": "0;1;2;3;4;4;4", "aff_unique_norm": "University of Southampton;Napier University;University of Oxford;University of Warwick;Alan Turing Institute", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.southampton.ac.uk;https://www.napier.ac.uk;https://www.ox.ac.uk;https://www.warwick.ac.uk;https://www.turing.ac.uk", "aff_unique_abbr": "Southampton;Napier;Oxford;Warwick;ATI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Adaptive SGD with Polyak stepsize and Line-search: Robust Convergence and Variance Reduction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71133", "id": "blC2kbzvNC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/540eb9e0ee35d525231c3fd22d1dcbf2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=blC2kbzvNC", "openreview": "https://openreview.net/forum?id=blC2kbzvNC", "poster": "/media/PosterPDFs/NeurIPS%202023/71133.png?t=1699826051.7644506", "slides": "https://nips.cc/virtual/2023/poster/71133", "video": "https://nips.cc/virtual/2023/poster/71133", "author_site": "Xiaowen Jiang, Sebastian Stich", "tldr": "", "abstract": "The recently proposed stochastic Polyak stepsize (SPS) and stochastic line-search (SLS) for SGD have shown remarkable effectiveness when training over-parameterized models. However, two issues remain unsolved in this line of work. \n\nFirst, in non-interpolation settings, both algorithms only guarantee convergence to a neighborhood of a solution which may result in a worse output than the initial guess. While artificially decreasing the adaptive stepsize has been proposed to address this issue (Orvieto et al.), this approach results in slower convergence rates under interpolation. Second, intuitive line-search methods equipped with variance-reduction (VR) fail to converge (Dubois-Taine et al.). So far, no VR methods successfully accelerate these two stepsizes with a convergence guarantee.\n\nIn this work, we make two contributions:\nFirstly, we propose two new robust variants of SPS and SLS, called AdaSPS and AdaSLS, which achieve optimal asymptotic rates in both strongly-convex or convex and interpolation or non-interpolation settings, except for the case when we have both strong convexity and non-interpolation. AdaSLS requires no knowledge of problem-dependent parameters, and AdaSPS requires only a lower bound of the optimal function value as input. Secondly, we propose a novel VR method that can use Polyak stepsizes or line-search to achieve acceleration. When it is equipped with AdaSPS or AdaSLS, the resulting algorithms obtain the optimal rate\nfor optimizing convex smooth functions. Finally, numerical experiments on synthetic and real datasets validate our theory and demonstrate the effectiveness and robustness of our algorithms.", "keywords": "Convex Optimization;SGD;Adaptive Methods;Variance Reduction;Polyak Stepsize;Line-Search", "primary_area": "", "supplementary_material": "", "author": "Xiaowen Jiang;Sebastian U Stich", "authorids": "~Xiaowen_Jiang1;~Sebastian_U_Stich1", "gender": "M;M", "homepage": ";https://www.sstich.ch", "dblp": "192/3782-3;04/10549", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.ch/citations?user=8l-mDfQAAAAJ", "orcid": ";", "linkedin": "xiaowen-jiang-65570b222/;", "or_profile": "~Xiaowen_Jiang1;~Sebastian_U_Stich1", "aff": "CISPA Helmholtz Center for Information Security;CISPA Helmholtz Center for Information Security", "aff_domain": "cispa.de;cispa.de", "position": "PhD student;Tenure Track Faculty", "bibtex": "@inproceedings{\njiang2023adaptive,\ntitle={Adaptive {SGD} with Polyak stepsize and Line-search: Robust Convergence and Variance Reduction},\nauthor={Xiaowen Jiang and Sebastian U Stich},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=blC2kbzvNC}\n}", "github": "", "project": "", "reviewers": "Dgvs;7mHQ;a9xP;NwRJ", "pdf_size": 1460404, "rating": "2;4;4;5", "confidence": "4;3;4;4", "soundness": "2;1;3;2", "novelty": "1;2;2;3", "presentation": "2;4;3;2", "wc_summary": "36;163;63;121", "wc_strengths": "30;74;32;50", "wc_weaknesses": "54;306;253;82", "wc_questions": "223;58;91;69", "wc_limitations": "7;36;1;1", "wc_review": "350;637;440;323", "wc_reply_reviewers": "673;194;25;0", "wc_reply_authors": "820;753;0;0", "reply_reviewers": "10;2;1;0", "reply_authors": "11;3;1;1", "rating_avg": [ 3.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 95.75, 49.504418994671575 ], "wc_strengths_avg": [ 46.5, 17.684739183827393 ], "wc_weaknesses_avg": [ 173.75, 107.85261934695883 ], "wc_questions_avg": [ 110.25, 66.1716517853378 ], "wc_limitations_avg": [ 11.25, 14.49784466739798 ], "wc_review_avg": [ 437.5, 123.05791319537317 ], "wc_reply_reviewers_avg": [ 223.0, 270.3118569356513 ], "wc_reply_authors_avg": [ 393.25, 393.9627996397629 ], "reply_reviewers_avg": [ 3.25, 3.960744879438715 ], "reply_authors_avg": [ 4.0, 4.123105625617661 ], "replies_avg": [ 37, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6589259741439194460&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "cispa.de;cispa.de", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "CISPA Helmholtz Center for Information Security", "aff_unique_dep": "", "aff_unique_url": "https://www.cispa.de/", "aff_unique_abbr": "CISPA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Paxion: Patching Action Knowledge in Video-Language Foundation Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71132", "id": "blm1pqiOXe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/420492060687ca7448398c4c3fa10366-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=blm1pqiOXe", "openreview": "https://openreview.net/forum?id=blm1pqiOXe", "poster": "/media/PosterPDFs/NeurIPS%202023/71132.png?t=1698163953.2004263", "slides": "https://nips.cc/virtual/2023/poster/71132", "video": "https://nips.cc/virtual/2023/poster/71132", "author_site": "Zhenhailong Wang, Ansel Blume, Sha Li, Genglin Liu, Jaemin Cho, Zineng Tang, Mohit Bansal, Heng Ji", "tldr": "", "abstract": "Action knowledge involves the understanding of textual, visual, and temporal aspects of actions. We introduce the **Action Dynamics Benchmark (ActionBench)** containing two carefully designed probing tasks: Action Antonym and Video Reversal, which targets multimodal alignment capabilities and temporal understanding skills of the model, respectively. Despite recent video-language models\u2019 (VidLM) impressive performance on various benchmark tasks, our diagnostic tasks reveal their surprising deficiency (near-random performance) in action knowledge, suggesting that current models rely on object recognition abilities as a shortcut for action understanding. To remedy this, we propose a novel framework, **Paxion**, along with a new **Discriminative Video Dynamics Modeling (DVDM)** objective. The Paxion framework utilizes a **Knowledge Patcher** network to encode new action knowledge and a **Knowledge Fuser** component to integrate the Patcher into frozen VidLMs without compromising their existing capabilities. Due to limitations of the widely-used Video-Text Contrastive (VTC) loss for learning action knowledge, we introduce the DVDM objective to train the Knowledge Patcher. DVDM forces the model to encode the correlation between the action text and the correct ordering of video frames. Our extensive analyses show that Paxion and DVDM together effectively fill the gap in action knowledge understanding (~50% \u2192 80%), while maintaining or improving performance on a wide spectrum of both object- and action-centric downstream tasks.", "keywords": "video-language model;action knowledge benchmarking;action understanding;temporal understanding", "primary_area": "", "supplementary_material": "/attachment/3e1ca2e010d52351ce0d1c23fe50b309c0627091.zip", "author": "Zhenhailong Wang;Ansel Blume;Sha Li;Genglin Liu;Jaemin Cho;Zineng Tang;Mohit Bansal;Heng Ji", "authorids": "~Zhenhailong_Wang1;~Ansel_Blume1;~Sha_Li1;~Genglin_Liu1;~Jaemin_Cho1;~Zineng_Tang1;~Mohit_Bansal2;~Heng_Ji3", "gender": "M;;F;M;M;M;M;F", "homepage": "https://mikewangwzhl.github.io/;;;https://genglinliu.github.io/;https://j-min.io;https://zinengtang.github.io/;https://www.cs.unc.edu/~mbansal/;http://blender.cs.illinois.edu/hengji.html", "dblp": "290/1319;;;347/9436;130/8348-1;251/9569;32/5243.html;", "google_scholar": "arzvOlgAAAAJ;;OIo8J2YAAAAJ;xTX3r0IAAAAJ;IbQZoHQAAAAJ;bZy4vtwAAAAJ;DN8QtscAAAAJ;z7GCqT4AAAAJ", "orcid": "0000-0002-4704-5455;;;;0000-0002-1558-6169;;;", "linkedin": "zhenhailong-wang-7952111b2/;;;genglin-liu-085101190/;;;;", "or_profile": "~Zhenhailong_Wang1;~Ansel_Blume1;~Sha_Li1;~Genglin_Liu1;~Jaemin_Cho1;~Zineng_Tang1;~Mohit_Bansal2;~Heng_Ji3", "aff": "University of Illinois Urbana-Champaign;;University of Illinois, Urbana Champaign;University of Illinois Urbana-Champaign;University of North Carolina, Chapel Hill;University of California, Berkeley;University of North Carolina at Chapel Hill;University of Illinois, Urbana-Champaign", "aff_domain": "illinois.edu;;illinois.edu;illinois.edu;unc.edu;berkeley.edu;unc.edu;uiuc.edu", "position": "MS student;;PhD student;MS student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023paxion,\ntitle={Paxion: Patching Action Knowledge in Video-Language Foundation Models},\nauthor={Zhenhailong Wang and Ansel Blume and Sha Li and Genglin Liu and Jaemin Cho and Zineng Tang and Mohit Bansal and Heng Ji},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=blm1pqiOXe}\n}", "github": "", "project": "", "reviewers": "ou3W;CJDm;BUR1;SP6c", "pdf_size": 3301017, "rating": "5;6;7;7", "confidence": "4;5;5;4", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "134;48;108;110", "wc_strengths": "79;84;14;122", "wc_weaknesses": "273;47;183;141", "wc_questions": "31;2;36;27", "wc_limitations": "595;2;24;2", "wc_review": "1112;183;365;402", "wc_reply_reviewers": "139;29;117;65", "wc_reply_authors": "49;22;20;28", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 100.0, 31.71750305430741 ], "wc_strengths_avg": [ 74.75, 38.81607270191048 ], "wc_weaknesses_avg": [ 161.0, 81.2773031048644 ], "wc_questions_avg": [ 24.0, 13.095800853708795 ], "wc_limitations_avg": [ 155.75, 253.76009832122938 ], "wc_review_avg": [ 515.5, 354.2248579645421 ], "wc_reply_reviewers_avg": [ 87.5, 43.159587579123134 ], "wc_reply_authors_avg": [ 29.75, 11.497282287566918 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14901390192453152702&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 8, "email": "illinois.edu;;illinois.edu;illinois.edu;unc.edu;berkeley.edu;unc.edu;uiuc.edu", "author_num": 8, "aff_unique_index": "0;0;0;1;2;1;3", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of North Carolina;University of California, Berkeley;University of Illinois", "aff_unique_dep": ";;;", "aff_unique_url": "https://illinois.edu;https://www.unc.edu;https://www.berkeley.edu;https://illinois.edu", "aff_unique_abbr": "UIUC;UNC;UC Berkeley;UIUC", "aff_campus_unique_index": "0;0;0;1;2;1;0", "aff_campus_unique": "Urbana-Champaign;Chapel Hill;Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Bringing regularized optimal transport to lightspeed: a splitting method adapted for GPUs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71131", "id": "bmdnWIuypV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/54f7125dee9b8b3dc798bb9a082b09e2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bmdnWIuypV", "openreview": "https://openreview.net/forum?id=bmdnWIuypV", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71131", "video": "https://nips.cc/virtual/2023/poster/71131", "author_site": "Jacob Lindb\u00e4ck, Zesen Wang, Mikael Johansson", "tldr": "", "abstract": "We present an efficient algorithm for regularized optimal transport. In contrast to\nprevious methods, we use the Douglas-Rachford splitting technique to develop\nan efficient solver that can handle a broad class of regularizers. The algorithm\nhas strong global convergence guarantees, low per-iteration cost, and can exploit\nGPU parallelization, making it considerably faster than the state-of-the-art for\nmany problems. We illustrate its competitiveness in several applications, including\ndomain adaptation and learning of generative models.", "keywords": "optimal transport;domain adaptation;splitting methods;gpu computations", "primary_area": "", "supplementary_material": "", "author": "Jacob Lindb\u00e4ck;Zesen Wang;Mikael Johansson", "authorids": "~Jacob_Lindb\u00e4ck1;~Zesen_Wang1;~Mikael_Johansson3", "gender": "M;M;M", "homepage": ";;https://people.KTH.se/~mikaelj", "dblp": "304/8949;;53/764-1", "google_scholar": ";;wQSRT18AAAAJ", "orcid": "0000-0002-1752-5335;0000-0003-0191-5301;", "linkedin": "jacob-lindback/;zesen-wang-59129a172/;", "or_profile": "~Jacob_Lindb\u00e4ck1;~Zesen_Wang1;~Mikael_Johansson3", "aff": "KTH Royal Institute of Technology;KTH Royal Institute of Technology;KTH Royal Institute of Technology, Stockholm, Sweden", "aff_domain": "kth.se;kth.se;kth.se", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nlindb{\\\"a}ck2023bringing,\ntitle={Bringing regularized optimal transport to lightspeed: a splitting method adapted for {GPU}s},\nauthor={Jacob Lindb{\\\"a}ck and Zesen Wang and Mikael Johansson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bmdnWIuypV}\n}", "github": "", "project": "", "reviewers": "WrJb;4HzB;PMSB;e6yJ", "pdf_size": 4091572, "rating": "4;7;7;7", "confidence": "4;4;3;3", "soundness": "3;3;2;4", "novelty": "2;3;3;3", "presentation": "3;4;3;4", "wc_summary": "81;72;147;54", "wc_strengths": "120;61;57;87", "wc_weaknesses": "224;152;105;294", "wc_questions": "10;19;28;115", "wc_limitations": "2;7;22;1", "wc_review": "437;311;359;551", "wc_reply_reviewers": "0;26;67;136", "wc_reply_authors": "0;0;105;57", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 88.5, 35.14612354157995 ], "wc_strengths_avg": [ 81.25, 25.16321720289359 ], "wc_weaknesses_avg": [ 193.75, 71.736932607967 ], "wc_questions_avg": [ 43.0, 42.053537306628556 ], "wc_limitations_avg": [ 8.0, 8.396427811873332 ], "wc_review_avg": [ 414.5, 90.73450280901967 ], "wc_reply_reviewers_avg": [ 57.25, 51.35842189943145 ], "wc_reply_authors_avg": [ 40.5, 43.91184350491334 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14933918149281033051&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "kth.se;kth.se;kth.se", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "KTH Royal Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kth.se", "aff_unique_abbr": "KTH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stockholm", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Sweden" }, { "title": "EV-Eye: Rethinking High-frequency Eye Tracking through the Lenses of Event Cameras", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73538", "id": "bmfMNIf1bU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c41b5d8c1ba15b2aa83e4fa1541f02c8-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=bmfMNIf1bU", "openreview": "https://openreview.net/forum?id=bmfMNIf1bU", "poster": "/media/PosterPDFs/NeurIPS%202023/73538.png?t=1699107898.8287046", "slides": "https://nips.cc/virtual/2023/poster/73538", "video": "https://nips.cc/virtual/2023/poster/73538", "author_site": "Guangrong Zhao, Yurun Yang, Jingwei Liu, Ning Chen, Yiran Shen, Hongkai Wen, Guohao Lan", "tldr": "", "abstract": "In this paper, we present EV-Eye, a first-of-its-kind large scale multimodal eye tracking dataset aimed at inspiring research on high-frequency eye/gaze tracking. EV-Eye utilizes an emerging bio-inspired event camera to capture independent pixel-level intensity changes induced by eye movements, achieving sub-microsecond latency. Our dataset was curated over a two-week period and collected from 48 participants encompassing diverse genders and age groups. It comprises over 1.5 million near-eye grayscale images and 2.7 billion event samples generated by two DAVIS346 event cameras. Additionally, the dataset contains 675 thousands scene images and 2.7 million gaze references captured by Tobii Pro Glasses 3 eye tracker for cross-modality validation. Compared with existing event-based high-frequency eye tracking datasets, our dataset is significantly larger in size, and the gaze references involve more natural eye movement patterns, i.e., fixation, saccade and smooth pursuit. Alongside the event data, we also present a hybrid eye tracking method as benchmark, which leverages both the near-eye grayscale images and event data for robust and high-frequency eye tracking. We show that our method achieves higher accuracy for both pupil and gaze estimation tasks compared to the existing solution.", "keywords": "event cameras;high-frequency eye tracking;large scale multi-modal dataset", "primary_area": "", "supplementary_material": "/attachment/19ab9304ba1b841d63c40451d1bee536dfb6e242.pdf", "author": "Guangrong Zhao;Yurun Yang;Jingwei Liu;Ning Chen;Yiran Shen;Hongkai Wen;Guohao Lan", "authorids": "~Guangrong_Zhao2;~Yurun_Yang1;~Jingwei_Liu3;~Ning_Chen6;~Yiran_Shen1;~Hongkai_Wen1;~Guohao_Lan1", "gender": "M;F;M;;M;;M", "homepage": "https://doi.org/10.1145/3440250;https://willowstar.top/;https://blog.csdn.net/m0_53444618;;;;https://guohao.netlify.app/", "dblp": ";;;;71/11188-1;;178/9755.html", "google_scholar": ";;;;;;1ebZN5gAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Guangrong_Zhao2;~Yurun_Yang1;~Jingwei_Liu3;~Ning_Chen6;~Yiran_Shen1;~Hongkai_Wen1;~Guohao_Lan1", "aff": "Shandong University;Shandong University;Shandong University;;Shandong University;;Delft University of Technology", "aff_domain": "sdu.edu.cn;sdu.edu.cn;sdu.edu.cn;;sdu.edu.cn;;tudelft.nl", "position": "PhD student;Undergrad student;Undergrad student;;Full Professor;;Assistant Professor", "bibtex": "@inproceedings{\nzhao2023eveye,\ntitle={{EV}-Eye: Rethinking High-frequency Eye Tracking through the Lenses of Event Cameras},\nauthor={Guangrong Zhao and Yurun Yang and Jingwei Liu and Ning Chen and Yiran Shen and Hongkai Wen and Guohao Lan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=bmfMNIf1bU}\n}", "github": "", "project": "", "reviewers": "HJTg;MbfV;MqwV;QL5M;pW5t", "pdf_size": 0, "rating": "6;6;6;7;8", "confidence": "3;3;4;2;4", "wc_summary_and_contributions": "55;115;36;14;39", "wc_strengths": "74;27;13;15;40", "wc_improvement": "45;81;71;89;85", "wc_limitations": "90;84;11;2;16", "wc_correctness": "11;9;7;2;44", "wc_clarity": "5;18;14;5;16", "wc_relation_to_prior_work": "8;6;13;1;31", "wc_documentation": "19;7;25;9;23", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "308;348;191;138;295", "wc_reply_reviewers": "80;194;59;0;0", "wc_reply_authors": "371;988;515;306;489", "reply_reviewers": "1;3;1;0;0", "reply_authors": "1;3;2;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 51.8, 34.19590618772955 ], "wc_strengths_avg": [ 33.8, 22.301569451498253 ], "wc_improvement_avg": [ 74.2, 15.778466338652816 ], "wc_limitations_avg": [ 40.6, 38.19738210924932 ], "wc_correctness_avg": [ 14.6, 15.00133327407934 ], "wc_clarity_avg": [ 11.6, 5.535341001239219 ], "wc_relation_to_prior_work_avg": [ 11.8, 10.342146779078316 ], "wc_documentation_avg": [ 16.6, 7.310266752998826 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 256.0, 78.5340690401306 ], "wc_reply_reviewers_avg": [ 66.6, 71.18876315824008 ], "wc_reply_authors_avg": [ 533.8, 239.6058430005412 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.13363062095621223, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14106045441162511958&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "sdu.edu.cn;sdu.edu.cn;sdu.edu.cn;;sdu.edu.cn;;tudelft.nl", "author_num": 7, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Shandong University;Delft University of Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.sdu.edu.cn;https://www.tudelft.nl", "aff_unique_abbr": "SDU;TU Delft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;Netherlands" }, { "title": "Hierarchical Open-vocabulary Universal Image Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71130", "id": "bn4qZxltsH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/43663f64775ae439ec52b64305d219d3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bn4qZxltsH", "openreview": "https://openreview.net/forum?id=bn4qZxltsH", "poster": "/media/PosterPDFs/NeurIPS%202023/71130.png?t=1699545404.6217074", "slides": "https://nips.cc/virtual/2023/poster/71130", "video": "https://nips.cc/virtual/2023/poster/71130", "author_site": "Xudong Wang, Shufan Li, Konstantinos Kallidromitis, Yusuke Kato, Kazuki Kozuka, Trevor Darrell", "tldr": "", "abstract": "Open-vocabulary image segmentation aims to partition an image into semantic regions according to arbitrary text descriptions. However, complex visual scenes can be naturally decomposed into simpler parts and abstracted at multiple lev4 els of granularity, introducing inherent segmentation ambiguity. Unlike existing methods that typically sidestep this ambiguity and treat it as an external factor, our approach actively incorporates a hierarchical representation encompassing different semantic-levels into the learning process. We propose a decoupled text-image fusion mechanism and representation learning modules for both \u201cthings\u201d and \u201cstuff\u201d. Additionally, we systematically examine the differences that exist in the textual and visual features between these types of categories. Our resulting model, named HIPIE, tackles HIerarchical, oPen-vocabulary, and unIvErsal segmentation tasks within a unified framework. Benchmarked on diverse datasets, e.g., ADE20K,COCO, Pascal-VOC Part, and RefCOCO/RefCOCOg, HIPIE achieves the state-of14 the-art results at various levels of image comprehension, including semantic-level (e.g., semantic segmentation), instance-level (e.g., panoptic/referring segmentationand object detection), as well as part-level (e.g., part/subpart segmentation) tasks.", "keywords": "Universal Image Segmentation;Hierarchical;Open-vocabulary", "primary_area": "", "supplementary_material": "/attachment/cc8a2e4c418b2c3f23f97966f1e755a82f9257c3.pdf", "author": "Xudong Wang;Shufan Li;Konstantinos Kallidromitis;Yusuke Kato;Kazuki Kozuka;Trevor Darrell", "authorids": "~Xudong_Wang4;~Shufan_Li1;~Konstantinos_Kallidromitis1;~Yusuke_Kato1;~Kazuki_Kozuka1;~Trevor_Darrell2", "gender": "M;M;M;;M;M", "homepage": "http://people.eecs.berkeley.edu/~xdwang/;;https://github.com/KKallidromitis;;https://kazukikozuka.net/;https://people.eecs.berkeley.edu/~trevor/", "dblp": ";218/8196;;;35/5062;d/TrevorDarrell", "google_scholar": "Azf07WcAAAAJ;;5EuNtbQAAAAJ;;_Fq6uvQAAAAJ;https://scholar.google.com.tw/citations?user=bh-uRFMAAAAJ", "orcid": ";;;;;", "linkedin": ";shufan-li-126b70187/;kkall/;yusuke-kato-b1a875175/;;", "or_profile": "~Xudong_Wang4;~Shufan_Li1;~Konstantinos_Kallidromitis1;~Yusuke_Kato1;~Kazuki_Kozuka1;~trevor_darrell1", "aff": "GenAI, Meta;University of California, Berkeley;Panasonic;Panasonic Holdings Corporation;Panasonic Corporation;Electrical Engineering & Computer Science Department", "aff_domain": "meta.com;berkeley.edu;us.panasonic.com;jp.panasonic.com;jp.panasonic.com;eecs.berkeley.edu", "position": "Research Intern;Undergrad student;AI Research Engineer;Researcher;Researcher;Professor", "bibtex": "@inproceedings{\nwang2023hierarchical,\ntitle={Hierarchical Open-vocabulary Universal Image Segmentation},\nauthor={Xudong Wang and Shufan Li and Konstantinos Kallidromitis and Yusuke Kato and Kazuki Kozuka and Trevor Darrell},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bn4qZxltsH}\n}", "github": "", "project": "", "reviewers": "EcWi;7gze;4C24;YNZH", "pdf_size": 23391904, "rating": "4;5;5;6", "confidence": "4;4;3;3", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "3;3;2;3", "wc_summary": "63;56;71;42", "wc_strengths": "24;14;102;40", "wc_weaknesses": "187;159;188;108", "wc_questions": "14;2;15;81", "wc_limitations": "1;1;44;1", "wc_review": "289;232;420;272", "wc_reply_reviewers": "0;0;220;160", "wc_reply_authors": "327;334;735;474", "reply_reviewers": "0;0;2;1", "reply_authors": "2;2;4;3", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 58.0, 10.653637876331258 ], "wc_strengths_avg": [ 45.0, 34.19064199455752 ], "wc_weaknesses_avg": [ 160.5, 32.46921619010844 ], "wc_questions_avg": [ 28.0, 31.024184114977142 ], "wc_limitations_avg": [ 11.75, 18.619546181365433 ], "wc_review_avg": [ 303.25, 70.51019429841334 ], "wc_reply_reviewers_avg": [ 95.0, 97.33961166965892 ], "wc_reply_authors_avg": [ 467.5, 165.197608941534 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13624780248988315714&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 5, "email": "meta.com;berkeley.edu;us.panasonic.com;jp.panasonic.com;jp.panasonic.com;eecs.berkeley.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;2;4", "aff_unique_norm": "Meta;University of California, Berkeley;Panasonic Corporation;Panasonic Holdings Corporation;Electrical Engineering & Computer Science Department", "aff_unique_dep": "Meta;;;;Electrical Engineering & Computer Science", "aff_unique_url": "https://meta.com;https://www.berkeley.edu;https://www.panasonic.com;https://www.panasonic.com/global;", "aff_unique_abbr": "Meta;UC Berkeley;Panasonic;PHC;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;1;1;1", "aff_country_unique": "United States;Japan;" }, { "title": "Active Reasoning in an Open-World Environment", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71129", "id": "bo5oIoL95U", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2712b17bb58ea5b2b65c45857b024744-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bo5oIoL95U", "openreview": "https://openreview.net/forum?id=bo5oIoL95U", "poster": "/media/PosterPDFs/NeurIPS%202023/71129.png?t=1701416912.3803127", "slides": "https://nips.cc/virtual/2023/poster/71129", "video": "https://nips.cc/virtual/2023/poster/71129", "author_site": "Manjie Xu, Guangyuan Jiang, Wei Liang, Chi Zhang, Yixin Zhu", "tldr": "", "abstract": "Recent advances in vision-language learning have achieved notable success on *complete-information* question-answering datasets through the integration of extensive world knowledge. Yet, most models operate *passively*, responding to questions based on pre-stored knowledge. In stark contrast, humans possess the ability to *actively* explore, accumulate, and reason using both newfound and existing information to tackle *incomplete-information* questions. In response to this gap, we introduce **Conan**, an interactive open-world environment devised for the assessment of *active reasoning*. **Conan** facilitates active exploration and promotes multi-round abductive inference, reminiscent of rich, open-world settings like Minecraft. Diverging from previous works that lean primarily on single-round deduction via instruction following, **Conan** compels agents to actively interact with their surroundings, amalgamating new evidence with prior knowledge to elucidate events from incomplete observations. Our analysis on \\bench underscores the shortcomings of contemporary state-of-the-art models in active exploration and understanding complex scenarios. Additionally, we explore *Abduction from Deduction*, where agents harness Bayesian rules to recast the challenge of abduction as a deductive process. Through **Conan**, we aim to galvanize advancements in active reasoning and set the stage for the next generation of artificial intelligence agents adept at dynamically engaging in environments.", "keywords": "Visual Reasoning;Abductive Reasoning;Active Reasoning", "primary_area": "", "supplementary_material": "/attachment/bbbd3a15a238d4014d6e2a89eba291a90b3e6132.zip", "author": "Manjie Xu;Guangyuan Jiang;Wei Liang;Chi Zhang;Yixin Zhu", "authorids": "~Manjie_Xu1;~Guangyuan_Jiang1;~Wei_Liang1;~Chi_Zhang12;~Yixin_Zhu1", "gender": "M;M;F;;M", "homepage": "https://mjtsu.github.io;https://jiang.gy/;https://liangwei-bit.github.io/web/;;https://yzhu.io/", "dblp": "322/5851;322/5214;;;91/1103-1.html", "google_scholar": "j-WwUGEAAAAJ;3L79mEAAAAAJ;3p6YfBEAAAAJ;;qG9l6JEAAAAJ", "orcid": ";;;;0000-0001-7024-1545", "linkedin": ";;;;", "or_profile": "~Manjie_Xu1;~Guangyuan_Jiang1;~Wei_Liang1;~Chi_Zhang12;~Yixin_Zhu1", "aff": "Tencent AI Lab;Peking University;Beijing Institute of Technology;;Peking University", "aff_domain": "tencent.com;pku.edu.cn;bit.edu.cn;;pku.edu.cn", "position": "Intern;Undergrad student;Full Professor;;Assistant Professor", "bibtex": "@inproceedings{\nxu2023active,\ntitle={Active Reasoning in an Open-World Environment},\nauthor={Manjie Xu and Guangyuan Jiang and Wei Liang and Chi Zhang and Yixin Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bo5oIoL95U}\n}", "github": "", "project": "", "reviewers": "yGBc;2rwT;i6nS;u9zC;nDEB", "pdf_size": 15050838, "rating": "3;5;5;6;7", "confidence": "4;4;2;5;4", "soundness": "2;3;2;3;3", "novelty": "2;2;3;3;4", "presentation": "2;4;2;4;4", "wc_summary": "60;93;61;81;276", "wc_strengths": "49;43;54;65;72", "wc_weaknesses": "340;109;184;109;261", "wc_questions": "312;9;2;96;394", "wc_limitations": "1;8;2;8;1", "wc_review": "762;262;303;359;1004", "wc_reply_reviewers": "0;18;0;40;128", "wc_reply_authors": "51;77;0;18;180", "reply_reviewers": "0;1;0;1;2", "reply_authors": "2;2;1;2;3", "rating_avg": [ 5.2, 1.32664991614216 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.9797958971132712 ], "wc_summary_avg": [ 114.2, 81.85206167226333 ], "wc_strengths_avg": [ 56.6, 10.55651457631732 ], "wc_weaknesses_avg": [ 200.6, 89.59598205276842 ], "wc_questions_avg": [ 162.6, 161.05104780783014 ], "wc_limitations_avg": [ 4.0, 3.286335345030997 ], "wc_review_avg": [ 538.0, 293.5213791191367 ], "wc_reply_reviewers_avg": [ 37.2, 47.72588396247889 ], "wc_reply_authors_avg": [ 65.2, 63.24365580831013 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.1846372364689991, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6575160617754644963&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "tencent.com;pku.edu.cn;bit.edu.cn;;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Tencent;Peking University;Beijing Institute of Technology", "aff_unique_dep": "Tencent AI Lab;;", "aff_unique_url": "https://ai.tencent.com;http://www.pku.edu.cn;http://www.bit.edu.cn/", "aff_unique_abbr": "Tencent AI Lab;Peking U;BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning Universal Policies via Text-Guided Video Generation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71128", "id": "bo8q5MRcwy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1d5b9233ad716a43be5c0d3023cb82d0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bo8q5MRcwy", "openreview": "https://openreview.net/forum?id=bo8q5MRcwy", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71128", "video": "https://nips.cc/virtual/2023/poster/71128", "author_site": "Yilun Du, Sherry Yang, Bo Dai, Hanjun Dai, Ofir Nachum, Josh Tenenbaum, Dale Schuurmans, Pieter Abbeel", "tldr": "", "abstract": "A goal of artificial intelligence is to construct an agent that can solve a wide variety of tasks. Recent progress in text-guided image synthesis has yielded models with an impressive ability to generate complex novel images, exhibiting combinatorial generalization across domains. Motivated by this success, we investigate whether such tools can be used to construct more general-purpose agents. Specifically, we cast the sequential decision making problem as a text-conditioned video generation problem, where, given a text-encoded specification of a desired goal, a planner synthesizes a set of future frames depicting its planned actions in the future, after which control actions are extracted from the generated video. By leveraging text as the underlying goal specification, we are able to naturally and combinatorially generalize to novel goals. The proposed policy-as-video formulation can further represent environments with different state and action spaces in a unified space of images, which, for example, enables learning and generalization across a variety of robot manipulation tasks. Finally, by leveraging pretrained language embeddings and widely available videos from the internet, the approach enables knowledge transfer through predicting highly realistic video plans for real robots.", "keywords": "sequential decision making;general-purpose agent;video diffusion", "primary_area": "", "supplementary_material": "/attachment/0461f7825a836efb5d24f1e7e79cdc544673c109.zip", "author": "Yilun Du;Sherry Yang;Bo Dai;Hanjun Dai;Ofir Nachum;Joshua B. Tenenbaum;Dale Schuurmans;Pieter Abbeel", "authorids": "~Yilun_Du1;~Sherry_Yang1;~Bo_Dai1;~Hanjun_Dai1;~Ofir_Nachum1;~Joshua_B._Tenenbaum1;~Dale_Schuurmans1;~Pieter_Abbeel2", "gender": ";F;;M;M;;;M", "homepage": "https://yilundu.github.io;https://sherryy.github.io;https://bo-dai.github.io/;https://hanjun-dai.github.io;https://scholar.google.com/citations?user=C-ZlBWMAAAAJ&hl=en;;;https://people.eecs.berkeley.edu/~pabbeel/", "dblp": "204/4379;;64/2903;144/7311;;t/JoshuaBTenenbaum;;", "google_scholar": ";7c1B_fIAAAAJ;TIKl_foAAAAJ;obpl7GQAAAAJ;C-ZlBWMAAAAJ;;;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ", "orcid": ";;0009-0002-8070-574X;;;;;", "linkedin": ";;;hanjun-dai;;;;", "or_profile": "~Yilun_Du1;~Sherry_Yang1;~Bo_Dai1;~Hanjun_Dai1;~Ofir_Nachum1;~Joshua_B._Tenenbaum1;~Dale_Schuurmans1;~Pieter_Abbeel2", "aff": "Massachusetts Institute of Technology;University of California, Berkeley;Google Brain;Google Research;OpenAI;Massachusetts Institute of Technology;;Covariant", "aff_domain": "mit.edu;berkeley.edu;google.com;google.com;openai.com;mit.edu;;covariant.ai", "position": "PhD student;Student;Research Scientist;Researcher;Researcher;Professor;;Founder", "bibtex": "@inproceedings{\ndu2023learning,\ntitle={Learning Universal Policies via Text-Guided Video Generation},\nauthor={Yilun Du and Sherry Yang and Bo Dai and Hanjun Dai and Ofir Nachum and Joshua B. Tenenbaum and Dale Schuurmans and Pieter Abbeel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bo8q5MRcwy}\n}", "github": "", "project": "", "reviewers": "5Y9t;VoKS;eKLN;voju", "pdf_size": 6065059, "rating": "6;6;7;7", "confidence": "4;5;3;4", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;4", "wc_summary": "120;72;121;58", "wc_strengths": "32;41;45;59", "wc_weaknesses": "416;88;121;148", "wc_questions": "133;193;121;48", "wc_limitations": "37;11;12;24", "wc_review": "738;405;420;337", "wc_reply_reviewers": "34;292;28;17", "wc_reply_authors": "0;340;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 92.75, 28.19020219863632 ], "wc_strengths_avg": [ 44.25, 9.730750228014282 ], "wc_weaknesses_avg": [ 193.25, 130.34833140474026 ], "wc_questions_avg": [ 123.75, 51.54306451890496 ], "wc_limitations_avg": [ 21.0, 10.559356040971437 ], "wc_review_avg": [ 475.0, 155.0306421324507 ], "wc_reply_reviewers_avg": [ 92.75, 115.19847004192373 ], "wc_reply_authors_avg": [ 85.0, 147.22431864335456 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 232, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1344708825498826124&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "mit.edu;berkeley.edu;google.com;google.com;openai.com;mit.edu;;covariant.ai", "author_num": 8, "aff_unique_index": "0;1;2;2;3;0;4", "aff_unique_norm": "Massachusetts Institute of Technology;University of California, Berkeley;Google;OpenAI;Covariant", "aff_unique_dep": ";;Google Brain;;", "aff_unique_url": "https://web.mit.edu;https://www.berkeley.edu;https://brain.google.com;https://openai.com;", "aff_unique_abbr": "MIT;UC Berkeley;Google Brain;OpenAI;", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Berkeley;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "EDGI: Equivariant Diffusion for Planning with Embodied Agents", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71127", "id": "bpmM6SkDUy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c95c049637c5c549c2a08e8d6dcbca4b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bpmM6SkDUy", "openreview": "https://openreview.net/forum?id=bpmM6SkDUy", "poster": "/media/PosterPDFs/NeurIPS%202023/71127.png?t=1701726956.2086208", "slides": "https://nips.cc/virtual/2023/poster/71127", "video": "https://nips.cc/virtual/2023/poster/71127", "author_site": "Johann Brehmer, Joey Bose, Pim de Haan, Taco Cohen", "tldr": "", "abstract": "Embodied agents operate in a structured world, often solving tasks with spatial, temporal, and permutation symmetries. Most algorithms for planning and model-based reinforcement learning (MBRL) do not take this rich geometric structure into account, leading to sample inefficiency and poor generalization. We introduce the Equivariant Diffuser for Generating Interactions (EDGI), an algorithm for MBRL and planning that is equivariant with respect to the product of the spatial symmetry group SE(3), the discrete-time translation group \u2124, and the object permutation group S\u2099. EDGI follows the Diffuser framework by Janner et al. (2022) in treating both learning a world model and planning in it as a conditional generative modeling problem, training a diffusion model on an offline trajectory dataset. We introduce a new SE(3) \u00d7 \u2124 \u00d7 S\u2099-equivariant diffusion model that supports multiple representations. We integrate this model in a planning loop, where conditioning and classifier guidance let us softly break the symmetry for specific tasks as needed. On object manipulation and navigation tasks, EDGI is substantially more sample efficient and generalizes better across the symmetry group than non-equivariant models.", "keywords": "Planning;Diffusion models;Equivariance;Equivariant generative models", "primary_area": "", "supplementary_material": "/attachment/34e1d19ee1991b5ae99e6f916ab0329dff21ff88.pdf", "author": "Johann Brehmer;Joey Bose;Pim De Haan;Taco Cohen", "authorids": "~Johann_Brehmer1;~Joey_Bose1;~Pim_De_Haan1;~Taco_Cohen1", "gender": "M;M;M;M", "homepage": "https://johannbrehmer.github.io;https://joeybose.github.io/;https://pimdehaan.com;http://www.ta.co.nl", "dblp": "220/5763;174/3372;;142/2903", "google_scholar": "ZdUMvCsAAAAJ;ybPyI7IAAAAJ;AZeK-REAAAAJ;a3q4YxEAAAAJ", "orcid": "0000-0003-3344-4209;;;", "linkedin": "johannbrehmer;;https://nl.linkedin.com/in/pim-de-haan;", "or_profile": "~Johann_Brehmer1;~Joey_Bose1;~Pim_De_Haan1;~Taco_Cohen1", "aff": "Qualcomm AI Research;McGill University and Mila;Qualcomm;Qualcomm Inc, QualComm", "aff_domain": "qualcomm.com;mcgill.ca;qualcomm.com;qti.qualcomm.com", "position": "Researcher;PhD student;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nbrehmer2023edgi,\ntitle={{EDGI}: Equivariant Diffusion for Planning with Embodied Agents},\nauthor={Johann Brehmer and Joey Bose and Pim De Haan and Taco Cohen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bpmM6SkDUy}\n}", "github": "", "project": "", "reviewers": "kPhe;LvgJ;FPx8;28Jy", "pdf_size": 1666629, "rating": "6;6;6;7", "confidence": "3;4;3;3", "soundness": "2;2;3;4", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "71;94;86;104", "wc_strengths": "47;46;42;9", "wc_weaknesses": "55;99;136;49", "wc_questions": "49;152;117;22", "wc_limitations": "1;37;100;12", "wc_review": "223;428;481;196", "wc_reply_reviewers": "14;23;28;5", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.75, 12.07010770457331 ], "wc_strengths_avg": [ 36.0, 15.700318468107582 ], "wc_weaknesses_avg": [ 84.75, 35.329697139941636 ], "wc_questions_avg": [ 85.0, 51.90857347298228 ], "wc_limitations_avg": [ 37.5, 38.36991008589934 ], "wc_review_avg": [ 332.0, 124.29199491519958 ], "wc_reply_reviewers_avg": [ 17.5, 8.789197915623474 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3873027644013321850&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "qualcomm.com;mcgill.ca;qualcomm.com;qti.qualcomm.com", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Qualcomm;McGill University;Qualcomm Incorporated", "aff_unique_dep": "Qualcomm AI Research;;", "aff_unique_url": "https://www.qualcomm.com/research;https://www.mcgill.ca;https://www.qualcomm.com", "aff_unique_abbr": "QAI;McGill;Qualcomm", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "SyncTREE: Fast Timing Analysis for Integrated Circuit Design through a Physics-informed Tree-based Graph Neural Network", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71126", "id": "bprclnHNvm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/435e8fbbfc2c6072d4f3a5cb6e56a39a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bprclnHNvm", "openreview": "https://openreview.net/forum?id=bprclnHNvm", "poster": "/media/PosterPDFs/NeurIPS%202023/71126.png?t=1700643071.3459833", "slides": "https://nips.cc/virtual/2023/poster/71126", "video": "https://nips.cc/virtual/2023/poster/71126", "author_site": "Yuting Hu, Jiajie Li, Florian Klemme, Gi-Joon Nam, Tengfei Ma, Hussam Amrouch, Jinjun Xiong", "tldr": "", "abstract": "Nowadays integrated circuits (ICs) are underpinning all major information technology innovations including the current trends of artificial intelligence (AI). Modern IC designs often involve analyses of complex phenomena (such as timing, noise, and power etc.) for tens of billions of electronic components, like resistance (R), capacitance (C), transistors and gates, interconnected in various complex structures. Those analyses often need to strike a balance between accuracy and speed as those analyses need to be carried out many times throughout the entire IC design cycles. With the advancement of AI, researchers also start to explore news ways in leveraging AI to improve those analyses. This paper focuses on one of the most important analyses, timing analysis for interconnects. Since IC interconnects can be represented as an RC-tree, a specialized graph as tree, we design a novel tree-based graph neural network, SyncTREE, to speed up the timing analysis by incorporating both the structural and physical properties of electronic circuits. Our major innovations include (1) a two-pass message-passing (bottom-up and top-down) for graph embedding, (2) a tree contrastive loss to guide learning, and (3) a closed formular-based approach to conduct fast timing. Our experiments show that, compared to conventional GNN models, SyncTREE achieves the best timing prediction in terms of both delays and slews, all in reference to the industry golden numerical analyses results on real IC design data.", "keywords": "Graph Neural Networks;Integrated Circuits;Circuit Timing Analysis;Physics-guided Deep Learning", "primary_area": "", "supplementary_material": "/attachment/ee318bec92d0822d237e1f6a46712bc9407397d5.pdf", "author": "Yuting Hu;Jiajie Li;Florian Klemme;Gi-Joon Nam;Tengfei Ma;Hussam Amrouch;Jinjun Xiong", "authorids": "~Yuting_Hu2;~Jiajie_Li2;~Florian_Klemme1;~Gi-Joon_Nam1;~Tengfei_Ma1;~Hussam_Amrouch1;~Jinjun_Xiong1", "gender": "F;M;M;Not Specified;M;M;", "homepage": "https://www.webofscience.com/wos/author/record/2245410;https://jiajie.li;;;https://sites.google.com/site/matf0123/;https://www.ce.cit.tum.de/en/aipro;https://www.xlab-ub.com", "dblp": ";;;37/3594;94/9023-1;94/10663.html;81/1130", "google_scholar": ";oMCzOmoAAAAJ;dYMPMdgAAAAJ;CMn_5ToAAAAJ;9OvNakkAAAAJ;63J6zas48moC;tRt1xPYAAAAJ", "orcid": "0000-0003-1009-1519;;0000-0002-0148-0523;;0000-0002-1086-529X;0000-0002-5649-3102;0000-0002-2620-4859", "linkedin": ";li-jia-jie/;;gi-joon-nam-883864202/;;hussam-amrouch-86b28867/;jinjun-xiong-314774/", "or_profile": "~Yuting_Hu2;~Jiajie_Li2;~Florian_Klemme1;~Gi-Joon_Nam1;~Tengfei_Ma1;~Hussam_Amrouch1;~Jinjun_Xiong1", "aff": "State University of New York at Buffalo;State University of New York at Buffalo;Universit\u00e4t Stuttgart;IBM, International Business Machines;International Business Machines;Technische Universit\u00e4t M\u00fcnchen;State University of New York at Buffalo", "aff_domain": "buffalo.edu;buffalo.edu;uni-stuttgart.de;us.ibm.com;ibm.com;tum.de;buffalo.edu", "position": "PhD student;PhD student;PhD student;Principal Researcher;Researcher;Full Professor;Professor", "bibtex": "@inproceedings{\nhu2023synctree,\ntitle={Sync{TREE}: Fast Timing Analysis for Integrated Circuit Design through a Physics-informed Tree-based Graph Neural Network},\nauthor={Yuting Hu and Jiajie Li and Florian Klemme and Gi-Joon Nam and Tengfei Ma and Hussam Amrouch and Jinjun Xiong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bprclnHNvm}\n}", "github": "", "project": "", "reviewers": "sZvh;ZUNz;YDvu;6pFT", "pdf_size": 2978418, "rating": "3;5;6;6", "confidence": "4;2;4;1", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;3;3", "wc_summary": "13;41;44;12", "wc_strengths": "25;42;32;60", "wc_weaknesses": "43;83;350;3", "wc_questions": "3;225;36;24", "wc_limitations": "3;30;1;8", "wc_review": "87;421;463;107", "wc_reply_reviewers": "0;0;43;0", "wc_reply_authors": "46;40;20;37", "reply_reviewers": "0;0;1;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 27.5, 15.041608956491324 ], "wc_strengths_avg": [ 39.75, 13.160072188251856 ], "wc_weaknesses_avg": [ 119.75, 135.91058641621703 ], "wc_questions_avg": [ 72.0, 89.12070466507768 ], "wc_limitations_avg": [ 10.5, 11.543396380615196 ], "wc_review_avg": [ 269.5, 173.28228414930362 ], "wc_reply_reviewers_avg": [ 10.75, 18.619546181365433 ], "wc_reply_authors_avg": [ 35.75, 9.65336728815391 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7503923549992195473&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "buffalo.edu;buffalo.edu;uni-stuttgart.de;us.ibm.com;ibm.com;tum.de;buffalo.edu", "author_num": 7, "aff_unique_index": "0;0;1;2;3;4;0", "aff_unique_norm": "State University of New York at Buffalo;University of Stuttgart;International Business Machines;International Business Machines Corporation;Technische Universit\u00e4t M\u00fcnchen", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.buffalo.edu;https://www.uni-stuttgart.de;https://www.ibm.com;https://www.ibm.com;https://www.tum.de", "aff_unique_abbr": "SUNY Buffalo;Uni Stuttgart;IBM;IBM;TUM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Buffalo;", "aff_country_unique_index": "0;0;1;0;0;1;0", "aff_country_unique": "United States;Germany" }, { "title": "Parallel Sampling of Diffusion Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71125", "id": "bpzwUfX1UP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0d1986a61e30e5fa408c81216a616e20-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bpzwUfX1UP", "openreview": "https://openreview.net/forum?id=bpzwUfX1UP", "poster": "/media/PosterPDFs/NeurIPS%202023/71125.png?t=1699553821.7033885", "slides": "https://nips.cc/virtual/2023/poster/71125", "video": "https://nips.cc/virtual/2023/poster/71125", "author_site": "Andy Shih, Suneel Belkhale, Stefano Ermon, Dorsa Sadigh, Nima Anari", "tldr": "", "abstract": "Diffusion models are powerful generative models but suffer from slow sampling, often taking 1000 sequential denoising steps for one sample. As a result, considerable efforts have been directed toward reducing the number of denoising steps, but these methods hurt sample quality. Instead of reducing the number of denoising steps (trading quality for speed), in this paper we explore an orthogonal approach: can we run the denoising steps in parallel (trading compute for speed)? In spite of the sequential nature of the denoising steps, we show that surprisingly it is possible to parallelize sampling via Picard iterations, by guessing the solution of future denoising steps and iteratively refining until convergence. With this insight, we present ParaDiGMS, a novel method to accelerate the sampling of pretrained diffusion models by denoising multiple steps in parallel. ParaDiGMS is the first diffusion sampling method that enables trading compute for speed and is even compatible with existing fast sampling techniques such as DDIM and DPMSolver. Using ParaDiGMS, we improve sampling speed by 2-4x across a range of robotics and image generation models, giving state-of-the-art sampling speeds of 0.2s on 100-step DiffusionPolicy and 14.6s on 1000-step StableDiffusion-v2 with no measurable degradation of task reward, FID score, or CLIP score.", "keywords": "diffusion models;parallel sampling", "primary_area": "", "supplementary_material": "/attachment/83ef4ef7a2a4710daac304ded2c2ab931aa5b325.zip", "author": "Andy Shih;Suneel Belkhale;Stefano Ermon;Dorsa Sadigh;Nima Anari", "authorids": "~Andy_Shih1;~Suneel_Belkhale1;~Stefano_Ermon1;~Dorsa_Sadigh1;~Nima_Anari1", "gender": ";M;M;F;M", "homepage": "https://cs.stanford.edu/~andyshih/;https://github.com/suneelbelkhale;http://cs.stanford.edu/~ermon/;https://dorsa.fyi/;https://nimaanari.com", "dblp": "https://dblp.uni-trier.de/pers/hd/s/Shih:Andy;236/5069;47/8135;117/3174;60/8821", "google_scholar": "G85kxUUAAAAJ;;;ZaJEZpYAAAAJ;kmeUhO8AAAAJ", "orcid": ";0000-0002-3963-7987;;;0000-0002-4394-3530", "linkedin": ";suneel-b-032b1a101/;;;", "or_profile": "~Andy_Shih1;~Suneel_Belkhale1;~Stefano_Ermon1;~Dorsa_Sadigh1;~Nima_Anari1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "cs.stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nshih2023parallel,\ntitle={Parallel Sampling of Diffusion Models},\nauthor={Andy Shih and Suneel Belkhale and Stefano Ermon and Dorsa Sadigh and Nima Anari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bpzwUfX1UP}\n}", "github": "", "project": "", "reviewers": "RXY2;gnmk;veyN;PoCx;psnQ", "pdf_size": 5242753, "rating": "6;6;7;7;8", "confidence": "3;4;4;4;4", "soundness": "2;3;4;4;4", "novelty": "2;3;3;3;4", "presentation": "2;3;4;4;4", "wc_summary": "60;69;97;65;71", "wc_strengths": "20;66;108;55;62", "wc_weaknesses": "17;45;83;160;279", "wc_questions": "290;66;44;2;91", "wc_limitations": "7;8;32;5;45", "wc_review": "394;254;364;287;548", "wc_reply_reviewers": "153;72;86;21;33", "wc_reply_authors": "254;273;0;0;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "2;2;1;1;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.8 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 72.4, 12.86234815265082 ], "wc_strengths_avg": [ 62.2, 28.08843178249722 ], "wc_weaknesses_avg": [ 116.8, 94.26855255067832 ], "wc_questions_avg": [ 98.6, 100.06717743596049 ], "wc_limitations_avg": [ 19.4, 16.156732342896568 ], "wc_review_avg": [ 369.4, 102.60526302290737 ], "wc_reply_reviewers_avg": [ 73.0, 46.634750991079606 ], "wc_reply_authors_avg": [ 105.4, 129.22786077313205 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5345224838248487, "gs_citation": 126, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13198523717133778694&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 9, "email": "cs.stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "ProteinInvBench: Benchmarking Protein Inverse Folding on Diverse Tasks, Models, and Metrics", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73537", "id": "bqXduvuW5E", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d73078d49799693792fb0f3f32c57fc8-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=bqXduvuW5E", "openreview": "https://openreview.net/forum?id=bqXduvuW5E", "poster": "/media/PosterPDFs/NeurIPS%202023/73537.png?t=1701049145.9034383", "slides": "https://nips.cc/virtual/2023/poster/73537", "video": "https://nips.cc/virtual/2023/poster/73537", "author_site": "Zhangyang Gao, Cheng Tan, Yijie Zhang, Xingran Chen, Lirong Wu, Stan Z. Li", "tldr": "", "abstract": "Protein inverse folding has attracted increasing attention in recent years. However, we observe that current methods are usually limited to the CATH dataset and the recovery metric. The lack of a unified framework for ensembling and comparing different methods hinders the comprehensive investigation. In this paper, we propose ProteinBench, a new benchmark for protein design, which comprises extended protein design tasks, integrated models, and diverse evaluation metrics. We broaden the application of methods originally designed for single-chain protein design to new scenarios of multi-chain and \\textit{de novo} protein design. Recent impressive methods, including GraphTrans, StructGNN, GVP, GCA, AlphaDesign, ProteinMPNN, PiFold and KWDesign are integrated into our framework. In addition to the recovery, we also evaluate the confidence, diversity, sc-TM, efficiency, and robustness to thoroughly revisit current protein design approaches and inspire future work. As a result, we establish the first comprehensive benchmark for protein design, which is publicly available at \\url{https://github.com/A4Bio/OpenCPD}.", "keywords": "protein design", "primary_area": "", "supplementary_material": "/attachment/ffdf6c4ccdf982ab2a288564b6fc325cc565db47.pdf", "author": "Zhangyang Gao;Cheng Tan;Yijie Zhang;Xingran Chen;Lirong Wu;Stan Z. Li", "authorids": "~Zhangyang_Gao1;~Cheng_Tan1;~Yijie_Zhang3;~Xingran_Chen1;~Lirong_Wu1;~Stan_Z._Li2", "gender": "M;M;F;M;;M", "homepage": ";https://chengtan9907.github.io/;https://forrest-yijie-zhang.github.io/;https://www.chenxingran.com/;;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": "275/3266;70/1533-12.html;;203/8349;15/10330;l/StanZLi", "google_scholar": "4SclT-QAAAAJ;6kTV6aMAAAAJ;Q9Gby5wAAAAJ;X01oTv8AAAAJ;Tk7TrCoAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0003-1026-6083;;;;;", "linkedin": ";;;;;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Zhangyang_Gao1;~Cheng_Tan1;~Yijie_Zhang3;~Xingran_Chen1;~Lirong_Wu1;~Stan_Z._Li1", "aff": "Westlake University, China;Zhejiang University & Westlake University;McGill University;University of Michigan - Ann Arbor;Westlake University;Westlake University", "aff_domain": "westlake.edu.cn;westlake.edu.cn;mail.mcgill.ca;umich.edu;westlake.edu.cn;westlake.edu.cn", "position": "PhD student;PhD student;MS student;MS student;PhD student;Chair Professor", "bibtex": "@inproceedings{\ngao2023proteininvbench,\ntitle={ProteinInvBench: Benchmarking Protein Inverse Folding on Diverse Tasks, Models, and Metrics},\nauthor={Zhangyang Gao and Cheng Tan and Yijie Zhang and Xingran Chen and Lirong Wu and Stan Z. Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=bqXduvuW5E}\n}", "github": "", "project": "", "reviewers": "Gd38;xYTD;wq41;HGkS;oPQ8", "pdf_size": 1079127, "rating": "6;7;7;8;8", "confidence": "4;3;3;4;4", "wc_summary_and_contributions": "71;57;72;66;89", "wc_strengths": "160;20;79;87;93", "wc_improvement": "79;22;79;52;221", "wc_limitations": "12;8;2;62;7", "wc_correctness": "21;6;7;5;6", "wc_clarity": "7;40;1;9;5", "wc_relation_to_prior_work": "60;15;16;11;11", "wc_documentation": "67;15;3;11;12", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "478;184;260;304;445", "wc_reply_reviewers": "538;0;23;0;36", "wc_reply_authors": "2659;141;646;187;828", "reply_reviewers": "1;0;1;0;1", "reply_authors": "6;1;2;1;2", "rating_avg": [ 7.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 71.0, 10.449880382090505 ], "wc_strengths_avg": [ 87.8, 44.53043902770329 ], "wc_improvement_avg": [ 90.6, 68.51160485640371 ], "wc_limitations_avg": [ 18.2, 22.130521909796883 ], "wc_correctness_avg": [ 9.0, 6.0332412515993425 ], "wc_clarity_avg": [ 12.4, 14.051334456200237 ], "wc_relation_to_prior_work_avg": [ 22.6, 18.810635289643994 ], "wc_documentation_avg": [ 21.6, 23.04430515333452 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 334.2, 111.29492351405791 ], "wc_reply_reviewers_avg": [ 119.4, 209.75566738469786 ], "wc_reply_authors_avg": [ 892.2, 921.7295481864514 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 1.8547236990991407 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.21821789023599233, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4106606255046330619&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "westlake.edu.cn;westlake.edu.cn;mail.mcgill.ca;umich.edu;westlake.edu.cn;westlake.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "Westlake University;Zhejiang University;McGill University;University of Michigan", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.westlake.edu.cn;http://www.zju.edu.cn;https://www.mcgill.ca;https://www.umich.edu", "aff_unique_abbr": "WU;ZJU;McGill;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;1;2;0;0", "aff_country_unique": "China;Canada;United States" }, { "title": "Self-Chained Image-Language Model for Video Localization and Question Answering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71124", "id": "brOMKBEGXP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f22a9af8dbb348952b08bd58d4734b50-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=brOMKBEGXP", "openreview": "https://openreview.net/forum?id=brOMKBEGXP", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71124", "video": "https://nips.cc/virtual/2023/poster/71124", "author_site": "Shoubin Yu, Jaemin Cho, Prateek Yadav, Mohit Bansal", "tldr": "", "abstract": "Recent studies have shown promising results on utilizing large pre-trained image-language models for video question answering. While these image-language models can efficiently bootstrap the representation learning of video-language models, they typically concatenate uniformly sampled video frames as visual inputs without explicit language-aware, temporal modeling. When only a portion of a video input is relevant to the language query, such uniform frame sampling can often lead to missing important visual cues. Although humans often find a video moment to focus on and rewind the moment to answer questions, training a query-aware video moment localizer often requires expensive annotations and high computational costs. To address this issue, we propose Self-Chained Video Localization-Answering (SeViLA), a novel framework that leverages a single image-language model (BLIP- 2) to tackle both temporal keyframe localization and question answering on videos. SeViLA framework consists of two modules: Localizer and Answerer, where both are parameter-efficiently fine-tuned from BLIP-2. We propose two ways of chaining these modules for cascaded inference and self-refinement. First, in the forward chain, the Localizer finds multiple language-aware keyframes in a video, which the Answerer uses to predict the answer. Second, in the reverse chain, the Answerer generates keyframe pseudo-labels to refine the Localizer, alleviating the need for expensive video moment localization annotations. Our SeViLA framework outperforms several strong baselines/previous works on five challenging video question answering and event prediction benchmarks, and achieves the state-of-the-art in both fine-tuning (NExT-QA and STAR) and zero-shot (NExT-QA, STAR, How2QA, and VLEP) settings. We show a comprehensive analysis of our framework, including the impact of Localizer, comparisons of Localizer with other temporal localization models, pre-training/self-refinement of Localizer, and varying the number of keyframes.", "keywords": "Video Question Answering;Video Localization;Image-Language Model", "primary_area": "", "supplementary_material": "", "author": "Shoubin Yu;Jaemin Cho;Prateek Yadav;Mohit Bansal", "authorids": "~Shoubin_Yu1;~Jaemin_Cho1;~Prateek_Yadav1;~Mohit_Bansal2", "gender": ";M;M;M", "homepage": ";https://j-min.io;http://prateek-yadav.github.io;https://www.cs.unc.edu/~mbansal/", "dblp": ";130/8348-1;220/5741;32/5243.html", "google_scholar": ";IbQZoHQAAAAJ;1lXhc0kAAAAJ;DN8QtscAAAAJ", "orcid": ";0000-0002-1558-6169;;", "linkedin": ";;prateek-yadav-40bb34a8;", "or_profile": "~Shoubin_Yu1;~Jaemin_Cho1;~Prateek_Yadav1;~Mohit_Bansal2", "aff": ";University of North Carolina, Chapel Hill;Department of Computer Science, University of North Carolina, Chapel Hill;University of North Carolina at Chapel Hill", "aff_domain": ";unc.edu;cs.unc.edu;unc.edu", "position": ";PhD student;Graduate Student;Full Professor", "bibtex": "@inproceedings{\nyu2023selfchained,\ntitle={Self-Chained Image-Language Model for Video Localization and Question Answering},\nauthor={Shoubin Yu and Jaemin Cho and Prateek Yadav and Mohit Bansal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=brOMKBEGXP}\n}", "github": "", "project": "", "reviewers": "wzBa;TABX;mWtS;b5LJ", "pdf_size": 2669516, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "3;4;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "74;64;96;95", "wc_strengths": "55;83;40;138", "wc_weaknesses": "70;139;73;145", "wc_questions": "3;2;2;132", "wc_limitations": "3;2;12;14", "wc_review": "205;290;223;524", "wc_reply_reviewers": "20;0;146;21", "wc_reply_authors": "22;0;52;22", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.25, 13.718144918318949 ], "wc_strengths_avg": [ 79.0, 37.39652390263031 ], "wc_weaknesses_avg": [ 106.75, 35.329697139941636 ], "wc_questions_avg": [ 34.75, 56.14879784999854 ], "wc_limitations_avg": [ 7.75, 5.3091901453988255 ], "wc_review_avg": [ 310.5, 127.26841713481001 ], "wc_reply_reviewers_avg": [ 46.75, 57.91103090085688 ], "wc_reply_authors_avg": [ 24.0, 18.49324200890693 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 183, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9010104890174620077&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 7, "email": ";unc.edu;cs.unc.edu;unc.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of North Carolina", "aff_unique_dep": "", "aff_unique_url": "https://www.unc.edu", "aff_unique_abbr": "UNC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Dynamic Attribute-factored World Models for Efficient Multi-object Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71123", "id": "bsNslV3Ahe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3cc87f2bd3e3b4df8f9217326761c322-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bsNslV3Ahe", "openreview": "https://openreview.net/forum?id=bsNslV3Ahe", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71123", "video": "https://nips.cc/virtual/2023/poster/71123", "author_site": "Fan Feng, Sara Magliacane", "tldr": "", "abstract": "In many reinforcement learning tasks, the agent has to learn to interact with many objects of different types and generalize to unseen combinations and numbers of objects. Often a task is a composition of previously learned tasks (e.g. block stacking).\nThese are examples of compositional generalization, in which we compose object-centric representations to solve complex tasks. Recent works have shown the benefits of object-factored representations and hierarchical abstractions for improving sample efficiency in these settings. On the other hand, these methods do not fully exploit the benefits of factorization in terms of object attributes. In this paper, we address this opportunity and introduce the Dynamic Attribute FacTored RL (DAFT-RL) framework. \nIn DAFT-RL, we leverage object-centric representation learning to extract objects from visual inputs. We learn to classify them into classes and infer their latent parameters. For each class of object, we learn a class template graph that describes how the dynamics and reward of an object of this class factorize according to its attributes. We also learn an interaction pattern graph that describes how objects of different classes interact with each other at the attribute level. Through these graphs and a dynamic interaction graph that models the interactions between objects, we can learn a policy that can then be directly applied in a new environment by estimating the interactions and latent parameters.\nWe evaluate DAFT-RL in three benchmark datasets and show our framework outperforms the state-of-the-art in generalizing across unseen objects with varying attributes and latent parameters, as well as in the composition of previously learned tasks.", "keywords": "multi-object RL;compositional generalization;factored representations", "primary_area": "", "supplementary_material": "/attachment/96c07bc8753da7a40eed836f1d77a14888785287.pdf", "author": "Fan Feng;Sara Magliacane", "authorids": "~Fan_Feng2;~Sara_Magliacane1", "gender": ";F", "homepage": ";http://saramagliacane.github.io", "dblp": ";120/5256", "google_scholar": ";https://scholar.google.nl/citations?user=H3j_zQ4AAAAJ", "orcid": ";", "linkedin": ";magliacane/", "or_profile": "~Fan_Feng2;~Sara_Magliacane1", "aff": ";MIT-IBM Watson AI Lab", "aff_domain": ";mit.edu", "position": ";Research Scientist", "bibtex": "@inproceedings{\nfeng2023learning,\ntitle={Learning Dynamic Attribute-factored World Models for Efficient Multi-object Reinforcement Learning},\nauthor={Fan Feng and Sara Magliacane},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bsNslV3Ahe}\n}", "github": "", "project": "", "reviewers": "aVkM;xWTh;akCD;H9zq;NUSp;kNiT", "pdf_size": 2065790, "rating": "4;5;5;6;6;7", "confidence": "4;2;3;4;4;2", "soundness": "2;3;3;3;3;4", "novelty": "1;3;2;3;3;4", "presentation": "3;3;2;3;3;3", "wc_summary": "74;65;180;183;65;111", "wc_strengths": "74;64;68;133;36;39", "wc_weaknesses": "383;278;142;498;30;45", "wc_questions": "11;53;110;229;38;128", "wc_limitations": "36;10;41;129;13;53", "wc_review": "578;470;541;1172;182;376", "wc_reply_reviewers": "384;124;14;13;0;74", "wc_reply_authors": "0;194;0;0;0;0", "reply_reviewers": "1;1;1;1;0;1", "reply_authors": "1;2;1;1;1;1", "rating_avg": [ 5.5, 0.9574271077563381 ], "confidence_avg": [ 3.1666666666666665, 0.8975274678557507 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 113.0, 50.86255990411808 ], "wc_strengths_avg": [ 69.0, 31.979159880563884 ], "wc_weaknesses_avg": [ 229.33333333333334, 172.96595683030293 ], "wc_questions_avg": [ 94.83333333333333, 72.27128675268547 ], "wc_limitations_avg": [ 47.0, 39.66946768821919 ], "wc_review_avg": [ 553.1666666666666, 305.4474404687145 ], "wc_reply_reviewers_avg": [ 101.5, 133.45379974607943 ], "wc_reply_authors_avg": [ 32.333333333333336, 72.2995312724932 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.2909286827258562, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17628610184566347129&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": ";mit.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "IBM Watson AI Lab", "aff_unique_url": "https://www.mitibmwatsonailab.org", "aff_unique_abbr": "MIT-IBM AI Lab", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Chatting Makes Perfect: Chat-based Image Retrieval", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71122", "id": "bt7pQ7o7zG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c1b3d1e2cf53bb28cabd801bd58b3521-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bt7pQ7o7zG", "openreview": "https://openreview.net/forum?id=bt7pQ7o7zG", "poster": "/media/PosterPDFs/NeurIPS%202023/71122.png?t=1701380269.0278814", "slides": "https://nips.cc/virtual/2023/poster/71122", "video": "https://nips.cc/virtual/2023/poster/71122", "author_site": "Matan Levy, Rami Ben-Ari, Nir Darshan, Dani Lischinski", "tldr": "", "abstract": "Chats emerge as an effective user-friendly approach for information retrieval, and are successfully employed in many domains, such as customer service, healthcare, and finance. However, existing image retrieval approaches typically address the case of a single query-to-image round, and the use of chats for image retrieval has been mostly overlooked. In this work, we introduce ChatIR: a chat-based image retrieval system that engages in a conversation with the user to elicit information, in addition to an initial query, in order to clarify the user's search intent. Motivated by the capabilities of today's foundation models, we leverage Large Language Models to generate follow-up questions to an initial image description. These questions form a dialog with the user in order to retrieve the desired image from a large corpus. In this study, we explore the capabilities of such a system tested on a large dataset and reveal that engaging in a dialog yields significant gains in image retrieval. We start by building an evaluation pipeline from an existing manually generated dataset and explore different modules and training strategies for ChatIR. Our comparison includes strong baselines derived from related applications trained with Reinforcement Learning. Our system is capable of retrieving the target image from a pool of 50K images with over 78% success rate after 5 dialogue rounds, compared to 75% when questions are asked by humans, and 64% for a single shot text-to-image retrieval. \nExtensive evaluations reveal the strong capabilities and examine the limitations of CharIR under different settings. Project repository is available at https://github.com/levymsn/ChatIR.", "keywords": "Image Retrieval;Multi-modal learning", "primary_area": "", "supplementary_material": "/attachment/5bb12c54edb1a7531fdd3f5d5964229f0368a793.pdf", "author": "Matan Levy;Rami Ben-Ari;Nir Darshan;Dani Lischinski", "authorids": "~Matan_Levy1;~Rami_Ben-Ari2;~Nir_Darshan1;~Dani_Lischinski2", "gender": ";M;M;M", "homepage": "https://levymsn.github.io;http://www.benarirami.com/;;https://www.cs.huji.ac.il/~danix/", "dblp": "307/5294;07/1624;;29/19", "google_scholar": "t6c3DWMAAAAJ;https://scholar.google.co.il/citations?user=C4i_vUMAAAAJ;;haahCZ4AAAAJ", "orcid": ";;;0000-0002-6191-0361", "linkedin": ";;nirdarshan/;", "or_profile": "~Matan_Levy1;~Rami_Ben-Ari2;~Nir_Darshan1;~Dani_Lischinski2", "aff": "Hebrew University of Jerusalem;Bar-Ilan University;OriginAI;The Hebrew University of Jerusalem, Israel", "aff_domain": "huji.ac.il;biu.ac.il;originai.co;cs.huji.ac.il", "position": "PhD student;Lecturer;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nlevy2023chatting,\ntitle={Chatting Makes Perfect: Chat-based Image Retrieval},\nauthor={Matan Levy and Rami Ben-Ari and Nir Darshan and Dani Lischinski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bt7pQ7o7zG}\n}", "github": "", "project": "", "reviewers": "34K9;Z6DK;L59s;6Lfv", "pdf_size": 4938791, "rating": "4;4;5;5", "confidence": "4;3;4;4", "soundness": "2;2;3;3", "novelty": "2;2;4;3", "presentation": "3;3;3;3", "wc_summary": "18;75;121;79", "wc_strengths": "26;87;38;34", "wc_weaknesses": "139;116;96;30", "wc_questions": "5;81;117;2", "wc_limitations": "32;8;17;2", "wc_review": "220;367;389;147", "wc_reply_reviewers": "0;22;269;0", "wc_reply_authors": "0;0;114;0", "reply_reviewers": "0;1;2;0", "reply_authors": "1;1;3;1", "rating_avg": [ 4.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.25, 36.635877224382114 ], "wc_strengths_avg": [ 46.25, 23.920441049445557 ], "wc_weaknesses_avg": [ 95.25, 40.62865368185365 ], "wc_questions_avg": [ 51.25, 49.428610136235875 ], "wc_limitations_avg": [ 14.75, 11.299889379989523 ], "wc_review_avg": [ 280.75, 100.91673548029583 ], "wc_reply_reviewers_avg": [ 72.75, 113.66040427519164 ], "wc_reply_authors_avg": [ 28.5, 49.363448015713004 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7916291981082635435&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "email": "huji.ac.il;biu.ac.il;originai.co;cs.huji.ac.il", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Hebrew University of Jerusalem;Bar-Ilan University;OriginAI", "aff_unique_dep": ";;", "aff_unique_url": "https://www.huji.ac.il;https://www.biu.ac.il;", "aff_unique_abbr": "HUJI;BIU;", "aff_campus_unique_index": "0", "aff_campus_unique": "Jerusalem;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel;" }, { "title": "Global Structure-Aware Diffusion Process for Low-light Image Enhancement", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71121", "id": "bv9mmH0LGF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fc034d186280f55370b6aca7a3285a65-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bv9mmH0LGF", "openreview": "https://openreview.net/forum?id=bv9mmH0LGF", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71121", "video": "https://nips.cc/virtual/2023/poster/71121", "author_site": "Jinhui HOU, Zhiyu Zhu, Junhui Hou, Hui LIU, Huanqiang Zeng, Hui Yuan", "tldr": "", "abstract": "This paper studies a diffusion-based framework to address the low-light image enhancement problem. To harness the capabilities of diffusion models, we delve into this intricate process and advocate for the regularization of its inherent ODE-trajectory. To be specific, inspired by the recent research that low curvature ODE-trajectory results in a stable and effective diffusion process, we formulate a curvature regularization term anchored in the intrinsic non-local structures of image data, i.e., global structure-aware regularization, which gradually facilitates the preservation of complicated details and the augmentation of contrast during the diffusion process. This incorporation mitigates the adverse effects of noise and artifacts resulting from the diffusion process, leading to a more precise and flexible enhancement. To additionally promote learning in challenging regions, we introduce an uncertainty-guided regularization technique, which wisely relaxes constraints on the most extreme regions of the image. Experimental evaluations reveal that the proposed diffusion-based framework, complemented by rank-informed regularization, attains distinguished performance in low-light enhancement. The outcomes indicate substantial advancements in image quality, noise suppression, and contrast amplification in comparison with state-of-the-art methods. We believe this innovative approach will stimulate further exploration and advancement in low-light image processing, with potential implications for other applications of diffusion models. The code is publicly available at https://github.com/jinnh/GSAD.", "keywords": "Image enhancement;diffusion models", "primary_area": "", "supplementary_material": "/attachment/aef61ac8201ce09c1ef708c468060ae5ed633d77.zip", "author": "Jinhui HOU;Zhiyu Zhu;Junhui Hou;Hui LIU;Huanqiang Zeng;Hui Yuan", "authorids": "~Jinhui_HOU1;~Zhiyu_Zhu1;~Junhui_Hou2;~Hui_LIU14;~Huanqiang_Zeng1;~Hui_Yuan1", "gender": "M;M;M;F;M;M", "homepage": ";;http://www.cityu.edu.hk/stfprofile/csjhhou.htm;https://heidiliu.github.io/;http://smartviplab.org/members/hqzeng-en.html;https://faculty.sdu.edu.cn/yuanhui/zh_CN/index.htm", "dblp": "213/4553;;122/2673.html;;25/8798;21/780-1", "google_scholar": ";d1L0KkoAAAAJ;j6eefhwAAAAJ;U66txqwAAAAJ;-QvoSUkAAAAJ;u58FkyUAAAAJ", "orcid": ";0000-0002-0726-4522;0000-0003-3431-2021;;;0000-0001-5212-3393", "linkedin": ";;;;;", "or_profile": "~Jinhui_HOU1;~Zhiyu_Zhu1;~Junhui_Hou2;~Hui_LIU14;~Huanqiang_Zeng1;~Hui_Yuan1", "aff": "City University of Hong Kong;City University of Hong Kong;City University of Hong Kong;Saint Francis University;Huaqiao University, China;Shandong University", "aff_domain": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;sfu.edu.hk;hqu.edu.cn;sdu.edu.cn", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhou2023global,\ntitle={Global Structure-Aware Diffusion Process for Low-light Image Enhancement},\nauthor={Jinhui HOU and Zhiyu Zhu and Junhui Hou and Hui LIU and Huanqiang Zeng and Hui Yuan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bv9mmH0LGF}\n}", "github": "", "project": "", "reviewers": "2x92;gyFW;9ew6;ts6H;U5ii", "pdf_size": 22884741, "rating": "3;4;5;5;7", "confidence": "5;5;5;5;5", "soundness": "3;2;3;3;3", "novelty": "2;2;2;2;4", "presentation": "3;2;3;3;3", "wc_summary": "25;27;52;56;81", "wc_strengths": "14;19;34;35;91", "wc_weaknesses": "314;158;94;113;4", "wc_questions": "4;60;7;9;189", "wc_limitations": "4;1;17;3;10", "wc_review": "361;265;204;216;375", "wc_reply_reviewers": "640;50;67;35;235", "wc_reply_authors": "1783;185;185;223;30", "reply_reviewers": "4;1;1;1;1", "reply_authors": "5;3;3;3;2", "rating_avg": [ 4.8, 1.32664991614216 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 48.2, 20.68235963327202 ], "wc_strengths_avg": [ 38.6, 27.45614685275412 ], "wc_weaknesses_avg": [ 136.6, 101.86579406258019 ], "wc_questions_avg": [ 53.8, 70.70332382568728 ], "wc_limitations_avg": [ 7.0, 5.830951894845301 ], "wc_review_avg": [ 284.2, 71.54690768999035 ], "wc_reply_reviewers_avg": [ 205.4, 228.95117383407322 ], "wc_reply_authors_avg": [ 481.2, 654.2783505512008 ], "reply_reviewers_avg": [ 1.6, 1.2000000000000002 ], "reply_authors_avg": [ 3.2, 0.9797958971132712 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 103, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9633812976833326541&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;sfu.edu.hk;hqu.edu.cn;sdu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;2;3", "aff_unique_norm": "City University of Hong Kong;Saint Francis University;Huaqiao University;Shandong University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cityu.edu.hk;https://www.saintfrancis.edu;https://www.hqu.edu.cn;http://www.sdu.edu.cn", "aff_unique_abbr": "CityU;SFU;HQU;SDU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Natural Language Instruction-following with Task-related Language Development and Translation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71120", "id": "bx0SDRVDzF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1dc2fe8d9ae956616f86bab3ce5edc59-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bx0SDRVDzF", "openreview": "https://openreview.net/forum?id=bx0SDRVDzF", "poster": "/media/PosterPDFs/NeurIPS%202023/71120.png?t=1701609358.7111533", "slides": "https://nips.cc/virtual/2023/poster/71120", "video": "https://nips.cc/virtual/2023/poster/71120", "author_site": "Jing-Cheng Pang, Xin-Yu Yang, Si-Hang Yang, Xiong-Hui Chen, Yang Yu", "tldr": "", "abstract": "Natural language-conditioned reinforcement learning (RL) enables agents to follow human instructions. Previous approaches generally implemented language-conditioned RL by providing the policy with human instructions in natural language (NL) and training the policy to follow instructions. In this is outside-in approach, the policy must comprehend the NL and manage the task simultaneously. However, the unbounded NL examples often bring much extra complexity for solving concrete RL tasks, which can distract policy learning from completing the task. To ease the learning burden of the policy, we investigate an inside-out scheme for natural language-conditioned RL by developing a task language (TL) that is task-related and easily understood by the policy, thus reducing the policy learning burden. Besides, we employ a translator to translate natural language into the TL, which is used in RL to achieve efficient policy training. We implement this scheme as TALAR (TAsk Language with predicAte Representation) that learns multiple predicates to model object relationships as the TL. Experiments indicate that TALAR not only better comprehends NL instructions but also leads to a better instruction-following policy that significantly improves the success rate over baselines and adapts to unseen expressions of NL instruction. Besides, the TL is also an effective sub-task abstraction compatible with hierarchical RL.", "keywords": "Reinforcement learning;instruction-following;autonomous agent", "primary_area": "", "supplementary_material": "/attachment/4c3318b2a9e929e2f6be1f755d4bd2be8b6c178d.zip", "author": "Jing-Cheng Pang;Xinyu Yang;Si-Hang Yang;Xiong-Hui Chen;Yang Yu", "authorids": "~Jing-Cheng_Pang1;~Xinyu_Yang3;~Si-Hang_Yang1;~Xiong-Hui_Chen1;~Yang_Yu5", "gender": "M;F;M;M;M", "homepage": "https://www.lamda.nju.edu.cn/pangjc;http://www.lamda.nju.edu.cn/yangxy/;;http://www.lamda.nju.edu.cn/chenxh/;http://www.lamda.nju.edu.cn/yuy", "dblp": "254/2679;;;241/7938;46/2181-1", "google_scholar": "R3Y_WrkAAAAJ;;;H5pguCYAAAAJ;PG2lDSwAAAAJ", "orcid": ";;;;", "linkedin": ";;si-hang-yang-aa0796235/;;", "or_profile": "~Jing-Cheng_Pang1;~Xinyu_Yang3;~Si-Hang_Yang1;~Xiong-Hui_Chen1;~Yang_Yu2", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;MS student;Undergrad student;PhD student;Professor", "bibtex": "@inproceedings{\npang2023natural,\ntitle={Natural Language Instruction-following with Task-related Language Development and Translation},\nauthor={Jing-Cheng Pang and Xinyu Yang and Si-Hang Yang and Xiong-Hui Chen and Yang Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bx0SDRVDzF}\n}", "github": "", "project": "", "reviewers": "rFCy;KoSw;nXhV;rdPR", "pdf_size": 3747896, "rating": "5;6;6;7", "confidence": "4;3;3;4", "soundness": "3;4;3;4", "novelty": "2;3;3;3", "presentation": "3;4;2;4", "wc_summary": "83;95;63;97", "wc_strengths": "50;57;33;71", "wc_weaknesses": "296;76;64;233", "wc_questions": "56;71;128;137", "wc_limitations": "4;9;7;12", "wc_review": "489;308;295;550", "wc_reply_reviewers": "67;18;44;297", "wc_reply_authors": "309;25;27;383", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 84.5, 13.518505834595775 ], "wc_strengths_avg": [ 52.75, 13.681648292512127 ], "wc_weaknesses_avg": [ 167.25, 99.85833715819626 ], "wc_questions_avg": [ 98.0, 35.04996433664377 ], "wc_limitations_avg": [ 8.0, 2.9154759474226504 ], "wc_review_avg": [ 410.5, 111.20813819141115 ], "wc_reply_reviewers_avg": [ 106.5, 111.34293870740075 ], "wc_reply_authors_avg": [ 186.0, 162.12649382503773 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17598937322796677947&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 6, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Federated Linear Bandits with Finite Adversarial Actions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71119", "id": "bzXpQUnule", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c4e380fb74dec9da9c7212e834657aa9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bzXpQUnule", "openreview": "https://openreview.net/forum?id=bzXpQUnule", "poster": "/media/PosterPDFs/NeurIPS%202023/71119.png?t=1702043995.7921321", "slides": "https://nips.cc/virtual/2023/poster/71119", "video": "https://nips.cc/virtual/2023/poster/71119", "author_site": "Li Fan, Ruida Zhou, Chao Tian, Cong Shen", "tldr": "", "abstract": "We study a federated linear bandits model, where $M$ clients communicate with a central server to solve a linear contextual bandits problem with finite adversarial action sets that may be different across clients. To address the unique challenges of **adversarial finite** action sets, we propose the FedSupLinUCB algorithm, which extends the principles of SupLinUCB and OFUL algorithms in linear contextual bandits. We prove that FedSupLinUCB achieves a total regret of $\\tilde{O}(\\sqrt{d T})$, where $T$ is the total number of arm pulls from all clients, and $d$ is the ambient dimension of the linear model. This matches the minimax lower bound and thus is order-optimal (up to polylog terms). We study both asynchronous and synchronous cases and show that the communication cost can be controlled as $O(d M^2 \\log(d)\\log(T))$ and $O(\\sqrt{d^3 M^3} \\log(d))$, respectively. The FedSupLinUCB design is further extended to two scenarios: (1) variance-adaptive, where a total regret of $\\tilde{O} (\\sqrt{d \\sum \\nolimits_{t=1}^{T} \\sigma_t^2})$ can be achieved with $\\sigma_t^2$ being the noise variance of round $t$; and (2) adversarial corruption, where a total regret of $\\tilde{O}(\\sqrt{dT} + d C_p)$ can be achieved with $C_p$ being the total corruption budget. Experiment results corroborate the theoretical analysis and demonstrate the effectiveness of \\alg on both synthetic and real-world datasets.", "keywords": "Federated bandits;contextual bandits;regret analysis", "primary_area": "", "supplementary_material": "/attachment/216e7c247e67a67f0cc83844218e468f69363400.zip", "author": "Li Fan;Ruida Zhou;Chao Tian;Cong Shen", "authorids": "~Li_Fan5;~Ruida_Zhou1;~Chao_Tian2;~Cong_Shen1", "gender": "M;M;;M", "homepage": ";https://sites.google.com/view/ruida-zhou;;https://cshen317.github.io/", "dblp": ";215/2026;;79/6027-1.html", "google_scholar": "https://scholar.google.com/citations?hl=en;kXbo1twAAAAJ;;70LBhKcAAAAJ", "orcid": ";;;0000-0002-3148-4453", "linkedin": ";;;cong-shen-3372404/", "or_profile": "~Li_Fan5;~Ruida_Zhou1;~Chao_Tian2;~Cong_Shen1", "aff": "University of Virginia, Charlottesville;Texas A&M University;;University of Virginia", "aff_domain": "virginia.edu;tamu.edu;;virginia.edu", "position": "PhD student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nfan2023federated,\ntitle={Federated Linear Bandits with Finite Adversarial Actions},\nauthor={Li Fan and Ruida Zhou and Chao Tian and Cong Shen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bzXpQUnule}\n}", "github": "", "project": "", "reviewers": "5B2X;EJUn;64z5;xbXb", "pdf_size": 0, "rating": "4;5;6;6", "confidence": "4;4;3;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "54;69;121;324", "wc_strengths": "35;17;111;206", "wc_weaknesses": "232;3;8;55", "wc_questions": "58;53;37;29", "wc_limitations": "8;1;13;1", "wc_review": "387;143;290;615", "wc_reply_reviewers": "46;18;22;0", "wc_reply_authors": "0;0;207;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 142.0, 107.9791646568911 ], "wc_strengths_avg": [ 92.25, 74.5498993963104 ], "wc_weaknesses_avg": [ 74.5, 93.16785926487739 ], "wc_questions_avg": [ 44.25, 11.734031702701335 ], "wc_limitations_avg": [ 5.75, 5.0682837331783235 ], "wc_review_avg": [ 358.75, 171.56394580447255 ], "wc_reply_reviewers_avg": [ 21.5, 16.393596310755 ], "wc_reply_authors_avg": [ 51.75, 89.6336292916894 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9251868332818113868&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "virginia.edu;tamu.edu;;virginia.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Virginia;Texas A&M University", "aff_unique_dep": ";", "aff_unique_url": "https://www.virginia.edu;https://www.tamu.edu", "aff_unique_abbr": "UVA;TAMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Charlottesville;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Language Models Don't Always Say What They Think: Unfaithful Explanations in Chain-of-Thought Prompting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71118", "id": "bzs4uPLXvi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ed3fea9033a80fea1376299fa7863f4a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=bzs4uPLXvi", "openreview": "https://openreview.net/forum?id=bzs4uPLXvi", "poster": "/media/PosterPDFs/NeurIPS%202023/71118.png?t=1702407730.4543743", "slides": "https://nips.cc/virtual/2023/poster/71118", "video": "https://nips.cc/virtual/2023/poster/71118", "author_site": "Miles Turpin, Julian Michael, Ethan Perez, Samuel Bowman", "tldr": "", "abstract": "Large Language Models (LLMs) can achieve strong performance on many tasks by producing step-by-step reasoning before giving a final output, often referred to as chain-of-thought reasoning (CoT). It is tempting to interpret these CoT explanations as the LLM's process for solving a task. This level of transparency into LLMs' predictions would yield significant safety benefits. However, we find that CoT explanations can systematically misrepresent the true reason for a model's prediction. We demonstrate that CoT explanations can be heavily influenced by adding biasing features to model inputs\u2014e.g., by reordering the multiple-choice options in a few-shot prompt to make the answer always \"(A)\"\u2014which models systematically fail to mention in their explanations. When we bias models toward incorrect answers, they frequently generate CoT explanations rationalizing those answers. This causes accuracy to drop by as much as 36% on a suite of 13 tasks from BIG-Bench Hard, when testing with GPT-3.5 from OpenAI and Claude 1.0 from Anthropic. On a social-bias task, model explanations justify giving answers in line with stereotypes without mentioning the influence of these social biases. Our findings indicate that CoT explanations can be plausible yet misleading, which risks increasing our trust in LLMs without guaranteeing their safety. Building more transparent and explainable systems will require either improving CoT faithfulness through targeted efforts or abandoning CoT in favor of alternative methods.", "keywords": "Natural language processing;large language models;XAI;explainability", "primary_area": "", "supplementary_material": "/attachment/85893a3cc399e875b7a4356a1d7620cf1ccebda0.pdf", "author": "Miles Turpin;Julian Michael;Ethan Perez;Samuel R. Bowman", "authorids": "~Miles_Turpin1;~Julian_Michael1;~Ethan_Perez1;~Samuel_R._Bowman1", "gender": ";M;M;", "homepage": ";https://julianmichael.org;http://ethanperez.net;", "dblp": ";185/0981;192/1812;", "google_scholar": ";9DDOHR8AAAAJ;https://scholar.google.ca/citations?user=za0-taQAAAAJ;", "orcid": ";0000-0002-5358-3102;;", "linkedin": ";;https://linkedin.com/in/ethanjperez;", "or_profile": "~Miles_Turpin1;~Julian_Michael1;~Ethan_Perez1;~Samuel_R._Bowman1", "aff": ";New York University;New York University;", "aff_domain": ";nyu.edu;nyu.edu;", "position": ";Postdoc;Researcher;", "bibtex": "@inproceedings{\nturpin2023language,\ntitle={Language Models Don't Always Say What They Think: Unfaithful Explanations in Chain-of-Thought Prompting},\nauthor={Miles Turpin and Julian Michael and Ethan Perez and Samuel R. Bowman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=bzs4uPLXvi}\n}", "github": "", "project": "", "reviewers": "6weX;y8DJ;mRxa;aL7J", "pdf_size": 1000186, "rating": "6;6;7;7", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "2;3;4;4", "presentation": "3;3;4;4", "wc_summary": "87;44;220;121", "wc_strengths": "163;29;62;54", "wc_weaknesses": "159;182;1505;59", "wc_questions": "87;19;249;32", "wc_limitations": "36;3;41;60", "wc_review": "532;277;2077;326", "wc_reply_reviewers": "0;62;337;0", "wc_reply_authors": "0;0;458;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 118.0, 64.90377492873584 ], "wc_strengths_avg": [ 77.0, 51.122402134485036 ], "wc_weaknesses_avg": [ 476.25, 595.7463281464687 ], "wc_questions_avg": [ 96.75, 91.53243960476526 ], "wc_limitations_avg": [ 35.0, 20.530465167647808 ], "wc_review_avg": [ 803.0, 741.7415318020153 ], "wc_reply_reviewers_avg": [ 99.75, 139.295324760022 ], "wc_reply_authors_avg": [ 114.5, 198.31981746663644 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 410, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7703703273553306464&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 10, "email": ";nyu.edu;nyu.edu;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "BIOT: Biosignal Transformer for Cross-data Learning in the Wild", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71117", "id": "c2LZyTyddi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f6b30f3e2dd9cb53bbf2024402d02295-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=c2LZyTyddi", "openreview": "https://openreview.net/forum?id=c2LZyTyddi", "poster": "/media/PosterPDFs/NeurIPS%202023/71117.png?t=1698024507.9691226", "slides": "https://nips.cc/virtual/2023/poster/71117", "video": "https://nips.cc/virtual/2023/poster/71117", "author_site": "Chaoqi Yang, M Westover, Jimeng Sun", "tldr": "", "abstract": "Biological signals, such as electroencephalograms (EEG), play a crucial role in numerous clinical applications, exhibiting diverse data formats and quality profiles. Current deep learning models for biosignals (based on CNN, RNN, and Transformers) are typically specialized for specific datasets and clinical settings, limiting their broader applicability. This paper explores the development of a flexible biosignal encoder architecture that can enable pre-training on multiple datasets and fine-tuned on downstream biosignal tasks with different formats.\n\nTo overcome the unique challenges associated with biosignals of various formats, such as mismatched channels, variable sample lengths, and prevalent missing val- ues, we propose Biosignal Transformer (BIOT). The proposed BIOT model can enable cross-data learning with mismatched channels, variable lengths, and missing values by tokenizing different biosignals into unified \"sentences\" structure. Specifically, we tokenize each channel separately into fixed-length segments containing local signal features and then rearrange the segments to form a long \"sentence\". Channel embeddings and relative position embeddings are added to each segment (viewed as \"token\") to preserve spatio-temporal features.\n\nThe BIOT model is versatile and applicable to various biosignal learning settings across different datasets, including joint pre-training for larger models. Comprehensive evaluations on EEG, electrocardiogram (ECG), and human activity sensory signals demonstrate that BIOT outperforms robust baselines in common settings and facilitates learning across multiple datasets with different formats. Using CHB-MIT seizure detection task as an example, our vanilla BIOT model shows 3% improvement over baselines in balanced accuracy, and the pre-trained BIOT models (optimized from other data sources) can further bring up to 4% improvements. Our repository is public at https://github.com/ycq091044/BIOT.", "keywords": "biological signal;transformer;cross-data learning;in-the-wild learning", "primary_area": "", "supplementary_material": "/attachment/8b9a1f75c9c304cd08d813499b545ca1c5f7282f.pdf", "author": "Chaoqi Yang;M Brandon Westover;Jimeng Sun", "authorids": "~Chaoqi_Yang1;~M_Brandon_Westover1;~Jimeng_Sun3", "gender": "M;;", "homepage": "https://ycq091044.github.io;https://cdac.mgh.harvard.edu/;http://sunlab.org", "dblp": ";;", "google_scholar": "Lj_rYrkAAAAJ;;9jmmp5sAAAAJ", "orcid": ";;0000-0003-1512-6426", "linkedin": ";;jimengsun/", "or_profile": "~Chaoqi_Yang1;~M_Brandon_Westover1;~Jimeng_Sun3", "aff": "University of Illinois Urbana Champaign;Massachusetts General Hospital, Harvard University;Georgia Institute of Technology", "aff_domain": "illinois.edu;mgh.harvard.edu;gatech.edu", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nyang2023biot,\ntitle={{BIOT}: Biosignal Transformer for Cross-data Learning in the Wild},\nauthor={Chaoqi Yang and M Brandon Westover and Jimeng Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=c2LZyTyddi}\n}", "github": "", "project": "", "reviewers": "ykvi;XY8p;vQ39;ve81", "pdf_size": 4252659, "rating": "5;5;7;7", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "3;1;3;3", "wc_summary": "84;63;97;87", "wc_strengths": "57;38;135;51", "wc_weaknesses": "55;232;60;40", "wc_questions": "169;49;300;32", "wc_limitations": "34;31;1;84", "wc_review": "399;413;593;294", "wc_reply_reviewers": "67;142;11;22", "wc_reply_authors": "240;273;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 82.75, 12.376893794486563 ], "wc_strengths_avg": [ 70.25, 38.00904497616324 ], "wc_weaknesses_avg": [ 96.75, 78.4326940758763 ], "wc_questions_avg": [ 137.5, 107.65802338887706 ], "wc_limitations_avg": [ 37.5, 29.78674201721296 ], "wc_review_avg": [ 424.75, 107.47645090902472 ], "wc_reply_reviewers_avg": [ 60.5, 51.51941381654104 ], "wc_reply_authors_avg": [ 128.25, 128.77960824602627 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7246974259097958421&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "illinois.edu;mgh.harvard.edu;gatech.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;Harvard University;Georgia Institute of Technology", "aff_unique_dep": ";Massachusetts General Hospital;", "aff_unique_url": "https://illinois.edu;https://www.harvard.edu;https://www.gatech.edu", "aff_unique_abbr": "UIUC;Harvard;Georgia Tech", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Tight Risk Bounds for Gradient Descent on Separable Data", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71116", "id": "c2eedxSlPJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d8ca28a32c05cd3b9b0940e43720f31b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=c2eedxSlPJ", "openreview": "https://openreview.net/forum?id=c2eedxSlPJ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71116", "video": "https://nips.cc/virtual/2023/poster/71116", "author_site": "Matan Schliserman, Tomer Koren", "tldr": "", "abstract": "We study the generalization properties of unregularized gradient methods applied to separable linear classification---a setting that has received considerable attention since the pioneering work of Soudry et al. (2018).\nWe establish tight upper and lower (population) risk bounds for gradient descent in this setting, for any smooth loss function, expressed in terms of its tail decay rate.\nOur bounds take the form $\\Theta(r_{\\ell,T}^2 / \\gamma^2 T + r_{\\ell,T}^2 / \\gamma^2 n)$, \nwhere $T$ is the number of gradient steps, $n$ is size of the training set, $\\gamma$ is the data margin, and $r_{\\ell,T}$ is a complexity term that depends on the tail decay rate of the loss function (and on $T$).\nOur upper bound greatly improves the existing risk bounds due to Shamir (2021) and Schliserman and Koren (2022), that either applied to specific loss functions or imposed extraneous technical assumptions, and applies to virtually any convex and smooth loss function.\nOur risk lower bound is the first in this context and establish the tightness of our general upper bound for any given tail decay rate and in all parameter regimes.\nThe proof technique used to show these results is also markedly simpler compared to previous work, and is straightforward to extend to other gradient methods; we illustrate this by providing analogous results for Stochastic Gradient Descent.", "keywords": "Convex optimization;Gradient Descent;separable data;generalization bounds;Stochastic Gradient Descent.", "primary_area": "", "supplementary_material": "/attachment/ebd57170322b95c4999e871d40792b0a545b4d02.pdf", "author": "Matan Schliserman;Tomer Koren", "authorids": "~Matan_Schliserman1;~Tomer_Koren1", "gender": "M;M", "homepage": ";https://tomerkoren.github.io", "dblp": "314/6601;12/10044", "google_scholar": ";wGG1voYAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Matan_Schliserman1;~Tomer_Koren1", "aff": "Tel Aviv University;Tel Aviv University", "aff_domain": "tau.ac.il;tau.ac.il", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nschliserman2023tight,\ntitle={Tight Risk Bounds for Gradient Descent on Separable Data},\nauthor={Matan Schliserman and Tomer Koren},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=c2eedxSlPJ}\n}", "github": "", "project": "", "reviewers": "MaTy;5FbM;GBsf;GaV2", "pdf_size": 270868, "rating": "5;6;7;7", "confidence": "4;3;3;3", "soundness": "3;3;4;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "88;100;31;159", "wc_strengths": "135;109;63;71", "wc_weaknesses": "397;66;71;163", "wc_questions": "5;63;54;64", "wc_limitations": "35;1;19;16", "wc_review": "660;339;238;473", "wc_reply_reviewers": "10;5;11;11", "wc_reply_authors": "21;19;20;20", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 94.5, 45.45602270326783 ], "wc_strengths_avg": [ 94.5, 29.13331426391443 ], "wc_weaknesses_avg": [ 174.25, 134.27839550724457 ], "wc_questions_avg": [ 46.5, 24.27447218787671 ], "wc_limitations_avg": [ 17.75, 12.07010770457331 ], "wc_review_avg": [ 427.5, 158.01028447541 ], "wc_reply_reviewers_avg": [ 9.25, 2.48746859276655 ], "wc_reply_authors_avg": [ 20.0, 0.7071067811865476 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17599068942331391071&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "tau.ac.il;tau.ac.il", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Tel Aviv University", "aff_unique_dep": "", "aff_unique_url": "https://www.tau.ac.il", "aff_unique_abbr": "TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "FORB: A Flat Object Retrieval Benchmark for Universal Image Embedding", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73536", "id": "c3kuX7ltzr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/506630e4a43bb9d64a49f98b9ba934e9-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=c3kuX7ltzr", "openreview": "https://openreview.net/forum?id=c3kuX7ltzr", "poster": "/media/PosterPDFs/NeurIPS%202023/73536.png?t=1698033109.2649493", "slides": "https://nips.cc/virtual/2023/poster/73536", "video": "https://nips.cc/virtual/2023/poster/73536", "author_site": "Pengxiang Wu, Siman Wang, Kevin Dela Rosa, Derek Hu", "tldr": "", "abstract": "Image retrieval is a fundamental task in computer vision. Despite recent advances in this field, many techniques have been evaluated on a limited number of domains, with a small number of instance categories. Notably, most existing works only consider domains like 3D landmarks, making it difficult to generalize the conclusions made by these works to other domains, e.g., logo and other 2D flat objects. To bridge this gap, we introduce a new dataset for benchmarking visual search methods on flat images with diverse patterns. Our flat object retrieval benchmark (FORB) supplements the commonly adopted 3D object domain, and more importantly, it serves as a testbed for assessing the image embedding quality on out-of-distribution domains. In this benchmark we investigate the retrieval accuracy of representative methods in terms of candidate ranks, as well as matching score margin, a viewpoint which is largely ignored by many works. Our experiments not only highlight the challenges and rich heterogeneity of FORB, but also reveal the hidden properties of different retrieval strategies. The proposed benchmark is a growing project and we expect to expand in both quantity and variety of objects. The dataset and supporting codes are available at https://github.com/pxiangwu/FORB/.", "keywords": "Image retrieval;Image Embedding;Flat Object;Benchmark;Out-of-distribution", "primary_area": "", "supplementary_material": "/attachment/baa293dba9fe7b5c121383080c5f9bae04bb2bd4.pdf", "author": "Pengxiang Wu;Siman Wang;Kevin S Dela Rosa;Derek Hao Hu", "authorids": "~Pengxiang_Wu1;~Siman_Wang1;~Kevin_S_Dela_Rosa1;~Derek_Hao_Hu1", "gender": ";;;M", "homepage": "https://pxiangwu.github.io/;https://www.linkedin.com/in/siman-wang-964316139;https://kevindelarosa.com/;", "dblp": "156/1749;;31/8324.html;h/DerekHaoHu", "google_scholar": "MXLs7GcAAAAJ;;8Pc5MiUAAAAJ;Ks81aO0AAAAJ", "orcid": "0000-0002-6929-5877;;;", "linkedin": ";;kdrosa/;", "or_profile": "~Pengxiang_Wu1;~Siman_Wang1;~Kevin_S_Dela_Rosa1;~Derek_Hao_Hu1", "aff": "Snap Inc.;;Snap Inc.;Snap Inc.", "aff_domain": "snapchat.com;;snapchat.com;snap.com", "position": "Researcher;;Eng;Senior Manager, Machine Learning Engineering", "bibtex": "@inproceedings{\nwu2023forb,\ntitle={{FORB}: A Flat Object Retrieval Benchmark for Universal Image Embedding},\nauthor={Pengxiang Wu and Siman Wang and Kevin S Dela Rosa and Derek Hao Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=c3kuX7ltzr}\n}", "github": "", "project": "", "reviewers": "XRLd;srxZ;348u;Utwz;CwcQ", "pdf_size": 4960888, "rating": "6;6;6;7;9", "confidence": "4;4;3;3;5", "wc_summary_and_contributions": "68;43;44;79;73", "wc_strengths": "33;39;24;73;137", "wc_improvement": "196;49;24;19;87", "wc_limitations": "9;6;38;11;71", "wc_correctness": "38;6;11;5;15", "wc_clarity": "5;5;6;6;8", "wc_relation_to_prior_work": "16;9;1;1;36", "wc_documentation": "21;4;1;1;51", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "387;162;150;196;479", "wc_reply_reviewers": "95;12;0;0;28", "wc_reply_authors": "1035;274;122;110;677", "reply_reviewers": "1;1;0;0;1", "reply_authors": "3;1;1;1;2", "rating_avg": [ 6.8, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 61.4, 15.027973915335359 ], "wc_strengths_avg": [ 61.2, 41.368587116313265 ], "wc_improvement_avg": [ 75.0, 65.11221083637078 ], "wc_limitations_avg": [ 27.0, 24.8112877537624 ], "wc_correctness_avg": [ 15.0, 12.049896265113654 ], "wc_clarity_avg": [ 6.0, 1.0954451150103321 ], "wc_relation_to_prior_work_avg": [ 12.6, 12.970736293672768 ], "wc_documentation_avg": [ 15.6, 19.2 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 274.8, 133.2627479830729 ], "wc_reply_reviewers_avg": [ 27.0, 35.519008995184535 ], "wc_reply_authors_avg": [ 443.6, 359.9192131576196 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6416889479197478, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13560685555116021932&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "snapchat.com;;snapchat.com;snap.com", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Snap Inc.", "aff_unique_dep": "", "aff_unique_url": "https://www.snapinc.com", "aff_unique_abbr": "Snap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Tight Bounds for Volumetric Spanners and Applications", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71115", "id": "c4Xc0uTLXW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/02a92b52670752daf17b53f04f1ab405-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=c4Xc0uTLXW", "openreview": "https://openreview.net/forum?id=c4Xc0uTLXW", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71115", "video": "https://nips.cc/virtual/2023/poster/71115", "author_site": "Aditya Bhaskara, Sepideh Mahabadi, Ali Vakilian", "tldr": "", "abstract": "Given a set of points of interest, a volumetric spanner is a subset of the points using which all the points can be expressed using \"small\" coefficients (measured in an appropriate norm). Formally, given a set of vectors $X = [v_1, v_2, \\dots, v_n]$, the goal is to find $T \\subseteq [n]$ such that every $v \\in X$ can be expressed as $\\sum_{i\\in T} \\alpha_i v_i$, with $\\Vert \\alpha \\Vert$ being small. This notion, which has also been referred to as a well-conditioned basis, has found several applications, including bandit linear optimization, determinant maximization, and matrix low rank approximation. In this paper, we give almost optimal bounds on the size of volumetric spanners for all $\\ell_p$ norms, and show that they can be constructed using a simple local search procedure. We then show the applications of our result to other tasks and in particular the problem of finding coresets for the Minimum Volume Enclosing Ellipsoid (MVEE) problem.", "keywords": "volumetric spanner;well-conditioned basis;determinant maximization;minimum volume enclosing ellipsoid", "primary_area": "", "supplementary_material": "/attachment/1af5b9da0cf545dc3fa22448798c42b93bff5efb.pdf", "author": "Aditya Bhaskara;Sepideh Mahabadi;Ali Vakilian", "authorids": "~Aditya_Bhaskara1;~Sepideh_Mahabadi1;~Ali_Vakilian1", "gender": "M;F;", "homepage": "http://www.cs.utah.edu/~bhaskara/;https://www.mit.edu/~mahabadi/;http://www.mit.edu/~vakilian/", "dblp": "47/7801.html;130/0388;116/4679", "google_scholar": "tqxTaiAAAAAJ;NirVdpMAAAAJ;uXZaVaAAAAAJ", "orcid": ";;0000-0001-5049-7594", "linkedin": ";;", "or_profile": "~Aditya_Bhaskara1;~Sepideh_Mahabadi1;~Ali_Vakilian1", "aff": "University of Utah;Microsoft Research;Toyota Technological Institute at Chicago", "aff_domain": "utah.edu;microsoft.com;ttic.edu", "position": "Associate Professor;Researcher;Research Assistant Professor", "bibtex": "@inproceedings{\nbhaskara2023tight,\ntitle={Tight Bounds for Volumetric Spanners and Applications},\nauthor={Aditya Bhaskara and Sepideh Mahabadi and Ali Vakilian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=c4Xc0uTLXW}\n}", "github": "", "project": "", "reviewers": "4Fat;QeGT;TY5V;JkQ2;QwvL", "pdf_size": 350038, "rating": "4;6;6;6;6", "confidence": "4;4;1;4;5", "soundness": "3;3;3;2;4", "novelty": "2;3;2;3;3", "presentation": "3;3;3;2;4", "wc_summary": "79;139;163;85;126", "wc_strengths": "49;34;95;75;20", "wc_weaknesses": "244;101;89;345;127", "wc_questions": "28;1;6;100;14", "wc_limitations": "2;1;1;11;1", "wc_review": "402;276;354;616;288", "wc_reply_reviewers": "325;24;25;167;0", "wc_reply_authors": "435;0;0;80;0", "reply_reviewers": "2;1;1;2;0", "reply_authors": "3;1;1;2;1", "rating_avg": [ 5.6, 0.7999999999999999 ], "confidence_avg": [ 3.6, 1.3564659966250536 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 118.4, 32.05994385522221 ], "wc_strengths_avg": [ 54.6, 27.207351947589462 ], "wc_weaknesses_avg": [ 181.2, 98.63549057007828 ], "wc_questions_avg": [ 29.8, 36.267892136158125 ], "wc_limitations_avg": [ 3.2, 3.919183588453085 ], "wc_review_avg": [ 387.2, 123.18831113380847 ], "wc_reply_reviewers_avg": [ 108.2, 123.43322081190298 ], "wc_reply_authors_avg": [ 103.0, 168.86681142249356 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.14744195615489714, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17552125279455517167&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "utah.edu;microsoft.com;ttic.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Utah;Microsoft;Toyota Technological Institute at Chicago", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "https://www.utah.edu;https://www.microsoft.com/en-us/research;https://www.tti-chicago.org", "aff_unique_abbr": "Utah;MSR;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "RIO: A Benchmark for Reasoning Intention-Oriented Objects in Open Environments", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73535", "id": "c5DUGninMz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8644353f7d307baaf29bc1e56fe8e0ec-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=c5DUGninMz", "openreview": "https://openreview.net/forum?id=c5DUGninMz", "poster": "/media/PosterPDFs/NeurIPS%202023/73535.png?t=1699269760.3633475", "slides": "https://nips.cc/virtual/2023/poster/73535", "video": "https://nips.cc/virtual/2023/poster/73535", "author_site": "Mengxue Qu, Yu Wu, Wu Liu, Xiaodan Liang, Jingkuan Song, Yao Zhao, Yunchao Wei", "tldr": "", "abstract": "Intention-oriented object detection aims to detect desired objects based on specific intentions or requirements. For instance, when we desire to \"lie down and rest\", we instinctively seek out a suitable option such as a \"bed\" or a \"sofa\" that can fulfill our needs. Previous work in this area is limited either by the number of intention descriptions or by the affordance vocabulary available for intention objects. These limitations make it challenging to handle intentions in open environments effectively. To facilitate this research, we construct a comprehensive dataset called Reasoning Intention-Oriented Objects (RIO). In particular, RIO is specifically designed to incorporate diverse real-world scenarios and a wide range of object categories. It offers the following key features: 1) intention descriptions in RIO are represented as natural sentences rather than a mere word or verb phrase, making them more practical and meaningful; 2) the intention descriptions are contextually relevant to the scene, enabling a broader range of potential functionalities associated with the objects; 3) the dataset comprises a total of 40,214 images and 130,585 intention-object pairs. With the proposed RIO, we evaluate the ability of some existing models to reason intention-oriented objects in open environments.", "keywords": "Intention-Oriented Reasoning; Vision and Language; Object Detection; Instance Segmentation", "primary_area": "", "supplementary_material": "", "author": "Mengxue Qu;Yu Wu;Wu Liu;Xiaodan Liang;Jingkuan Song;Yao Zhao;Yunchao Wei", "authorids": "~Mengxue_Qu1;~Yu_Wu3;~Wu_Liu2;~Xiaodan_Liang2;~Jingkuan_Song3;~Yao_Zhao1;~Yunchao_Wei1", "gender": "F;M;M;F;M;M;M", "homepage": "https://scholar.google.com/citations?user=k8h4bqMAAAAJ&hl=en&oi=ao;https://yu-wu.net;https://www.drliuwu.com;https://www.sysu-hcp.net/;https://cfm.uestc.edu.cn/~songjingkuan/;http://mepro.bjtu.edu.cn;https://weiyc.github.io/", "dblp": ";22/0-11;;;70/10575;45/2091-1.html;118/5394", "google_scholar": ";23SZHUwAAAAJ;rQpizr0AAAAJ;voxznZAAAAAJ;F5Zy9V4AAAAJ;474TbQYAAAAJ;https://scholar.google.com.sg/citations?user=qL9Csv0AAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Mengxue_Qu1;~Yu_Wu3;~Wu_Liu2;~Xiaodan_Liang2;~Jingkuan_Song3;~Yao_Zhao1;~Yunchao_Wei1", "aff": "Beijing Jiaotong University;Wuhan University;JD.com;SUN YAT-SEN UNIVERSITY;University of Electronic Science and Technology of China,;Beijing Jiaotong University;Beijing Jiaotong University", "aff_domain": "bjtu.edu.cn;whu.edu.cn;jd.com;sysu.edu.cn;uestc.edu.cn;bjtu.edu.cn;bjtu.edu.cn", "position": "PhD student;Full Professor;Senior Researcher;Associate Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nqu2023rio,\ntitle={{RIO}: A Benchmark for Reasoning Intention-Oriented Objects in Open Environments},\nauthor={Mengxue Qu and Yu Wu and Wu Liu and Xiaodan Liang and Jingkuan Song and Yao Zhao and Yunchao Wei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=c5DUGninMz}\n}", "github": "", "project": "", "reviewers": "eKT9;UNtV;gkNq", "pdf_size": 2090917, "rating": "6;6;7", "confidence": "3;4;4", "wc_summary_and_contributions": "93;85;59", "wc_strengths": "65;70;135", "wc_improvement": "75;410;134", "wc_limitations": "44;84;3", "wc_correctness": "7;61;13", "wc_clarity": "1;71;1", "wc_relation_to_prior_work": "27;1;24", "wc_documentation": "20;52;1", "wc_additional_feedback": "1;1;1", "wc_review": "333;835;371", "wc_reply_reviewers": "30;382;15", "wc_reply_authors": "573;1752;213", "reply_reviewers": "1;1;1", "reply_authors": "1;3;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 79.0, 14.514360704718161 ], "wc_strengths_avg": [ 90.0, 31.88521078284832 ], "wc_improvement_avg": [ 206.33333333333334, 146.01445894918155 ], "wc_limitations_avg": [ 43.666666666666664, 33.06895153396242 ], "wc_correctness_avg": [ 27.0, 24.166091947189145 ], "wc_clarity_avg": [ 24.333333333333332, 32.99831645537222 ], "wc_relation_to_prior_work_avg": [ 17.333333333333332, 11.61416759345623 ], "wc_documentation_avg": [ 24.333333333333332, 21.044925490219462 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 513.0, 228.2162716956586 ], "wc_reply_reviewers_avg": [ 142.33333333333334, 169.58052823232848 ], "wc_reply_authors_avg": [ 846.0, 657.2807619275038 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13430707457544539533&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "bjtu.edu.cn;whu.edu.cn;jd.com;sysu.edu.cn;uestc.edu.cn;bjtu.edu.cn;bjtu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;3;4;0;0", "aff_unique_norm": "Beijing Jiao Tong University;Wuhan University;JD.com;Sun Yat-sen University;University of Electronic Science and Technology of China", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.njtu.edu.cn/en;http://www.whu.edu.cn/;https://www.jd.com;http://www.sysu.edu.cn;https://www.uestc.edu.cn", "aff_unique_abbr": "BJTU;WHU;JD;SYSU;UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "c5Inzw6giM", "title": "Privacy-Preserving CNN Training with Transfer Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Privacy-preserving nerual network inference has been well studied while homomorphic CNN training still remains an open challenging task.\nIn this paper, we present a practical solution to implement privacy-preserving CNN training based on mere Homomorphic Encryption (HE) technique. To our best knowledge, this is the first attempt successfully to crack this nut and no work ever before has achieved this goal. Several techniques combine to accomplish the task:: (1) with transfer learning, privacy-preserving CNN training can be reduced to homomorphic neural network training, or even multiclass logistic regression (MLR) training; (2) via a faster gradient variant called $\\texttt{Quadratic Gradient}$, an enhanced gradient method for MLR with a state-of-the-art performance in convergence speed is applied in this work to achieve high performance; (3) we employ the thought of transformation in mathematics to transform approximating Softmax function in the encryption domain to the approximation of the Sigmoid function. A new type of loss function termed $\\texttt{Squared Likelihood Error}$ has been developed alongside to align with this change.; and (4) we use a simple but flexible matrix-encoding method named $\\texttt{Volley Revolver}$ to manage the data flow in the ciphertexts, which is the key factor to complete the whole homomorphic CNN training. The complete, runnable C++ code to implement our work can be found at: \\href{https://anonymous.4open.science/r/HE-CNNtraining-B355/}{$\\texttt{https://anonymous.4open.science/r/HE-CNNtraining-B355/}$}. \n\nWe select $\\texttt{REGNET\\_X\\_400MF}$ as our pre-trained model for transfer learning. We use the first 128 MNIST training images as training data and the whole MNIST testing dataset as the testing data. The client only needs to upload 6 ciphertexts to the cloud and it takes $\\sim 21$ mins to perform 2 iterations on a cloud with 64 vCPUs, resulting in a precision of $21.49\\%$.\n", "keywords": "Privacy-Preserving;Homomorphic Encryption;Multiclass Logistic Regression;Quadratic Gradient;Squared Likelihood Error;Volley Revolver", "primary_area": "", "supplementary_material": "", "author": "Li-Yue Sun", "authorids": "~Li-Yue_Sun1", "gender": "M", "homepage": "https://petitioner.github.io", "dblp": "302/4847", "google_scholar": "U78Tt8kAAAAJ", "orcid": "0000-0003-0378-0607", "linkedin": "", "or_profile": "~Li-Yue_Sun1", "aff": "", "aff_domain": "", "position": "", "bibtex": "@misc{\nsun2023privacypreserving,\ntitle={Privacy-Preserving {CNN} Training with Transfer Learning},\nauthor={Li-Yue Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=c5Inzw6giM}\n}", "github": "", "project": "", "reviewers": "F8vz;ZXqT;tzCF;og3N", "site": "https://openreview.net/forum?id=c5Inzw6giM", "pdf_size": 576476, "rating": "2;3;3;5", "confidence": "5;2;5;3", "soundness": "1;1;2;4", "novelty": "1;1;2;3", "presentation": "1;1;2;1", "wc_summary": "52;16;67;40", "wc_strengths": "32;11;25;41", "wc_weaknesses": "407;90;129;28", "wc_questions": "6;18;253;25", "wc_limitations": "18;1;2;4", "wc_review": "515;136;476;138", "wc_reply_reviewers": "30;9;0;16", "wc_reply_authors": "12;255;0;9", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 3.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 1.299038105676658 ], "soundness_avg": [ 2.0, 1.224744871391589 ], "novelty_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 1.25, 0.4330127018922193 ], "wc_summary_avg": [ 43.75, 18.659782956937093 ], "wc_strengths_avg": [ 27.25, 10.96300597464035 ], "wc_weaknesses_avg": [ 163.5, 145.12494616708736 ], "wc_questions_avg": [ 75.5, 102.7046737008594 ], "wc_limitations_avg": [ 6.25, 6.869315832017043 ], "wc_review_avg": [ 316.25, 179.78094309464504 ], "wc_reply_reviewers_avg": [ 13.75, 10.96300597464035 ], "wc_reply_authors_avg": [ 69.0, 107.47790470603714 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.4856618642571828, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DlT5daGXpnUJ:scholar.google.com/&scioq=Privacy-Preserving+CNN+Training+with+Transfer+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0 }, { "title": "PLASTIC: Improving Input and Label Plasticity for Sample Efficient Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71114", "id": "c5WOU7p4ES", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c464fc4516aca4e68f2a14e67c6f0402-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=c5WOU7p4ES", "openreview": "https://openreview.net/forum?id=c5WOU7p4ES", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71114", "video": "https://nips.cc/virtual/2023/poster/71114", "author_site": "Hojoon Lee, Hanseul Cho, HYUNSEUNG KIM, DAEHOON GWAK, Joonkee Kim, Jaegul Choo, Se-Young Yun, Chulhee Yun", "tldr": "", "abstract": "In Reinforcement Learning (RL), enhancing sample efficiency is crucial, particularly in scenarios when data acquisition is costly and risky. In principle, off-policy RL algorithms can improve sample efficiency by allowing multiple updates per environment interaction. However, these multiple updates often lead the model to overfit to earlier interactions, which is referred to as the loss of plasticity. Our study investigates the underlying causes of this phenomenon by dividing plasticity into two aspects. Input plasticity, which denotes the model's adaptability to changing input data, and label plasticity, which denotes the model's adaptability to evolving input-output relationships. Synthetic experiments on the CIFAR-10 dataset reveal that finding smoother minima of loss landscape enhances input plasticity, whereas refined gradient propagation improves label plasticity. Leveraging these findings, we introduce the **PLASTIC** algorithm, which harmoniously combines techniques to address both concerns. With minimal architectural modifications, PLASTIC achieves competitive performance on benchmarks including Atari-100k and Deepmind Control Suite. This result emphasizes the importance of preserving the model's plasticity to elevate the sample efficiency in RL. The code is available at https://github.com/dojeon-ai/plastic.", "keywords": "Reinforcement Learning;Sharpness Minimization;Generalization;Plasticity;Deep Learning", "primary_area": "", "supplementary_material": "", "author": "Hojoon Lee;Hanseul Cho;Hyunseung Kim;Daehoon Gwak;Joonkee Kim;Jaegul Choo;Se-Young Yun;Chulhee Yun", "authorids": "~Hojoon_Lee1;~Hanseul_Cho1;~Hyunseung_Kim1;~Daehoon_Gwak1;~Joonkee_Kim1;~Jaegul_Choo1;~Se-Young_Yun1;~Chulhee_Yun1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://joonleesky.github.io/;https://hanseuljo.github.io/;;;;https://sites.google.com/site/jaegulchoo/;https://fbsqkd.github.io;https://chulheeyun.github.io/", "dblp": ";233/5755-2;244/0949;276/7016;323/4661;07/2074;23/8862;138/0148.html", "google_scholar": ";IczOXwsAAAAJ;https://scholar.google.com/citations?view_op=list_works;NyQ42l8AAAAJ;LL9Yj54AAAAJ;GHJYsLEAAAAJ;X_IAjb8AAAAJ;Ukl64ggAAAAJ", "orcid": ";0009-0001-0410-0290;;;;;;", "linkedin": ";hanseul-cho-66b01a260/;;;%EC%A4%80%EA%B8%B0-%EA%B9%80-5171831b3/;;seyoung-yun-395130ab/;", "or_profile": "~Hojoon_Lee1;~Hanseul_Cho1;~Hyunseung_Kim1;~Daehoon_Gwak1;~Joonkee_Kim1;~Jaegul_Choo1;~Se-Young_Yun1;~Chulhee_Yun1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;KAIST;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;MS student;PhD student;PhD student;MS student;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nlee2023plastic,\ntitle={{PLASTIC}: Improving Input and Label Plasticity for Sample Efficient Reinforcement Learning},\nauthor={Hojoon Lee and Hanseul Cho and Hyunseung Kim and Daehoon Gwak and Joonkee Kim and Jaegul Choo and Se-Young Yun and Chulhee Yun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=c5WOU7p4ES}\n}", "github": "", "project": "", "reviewers": "dLhS;mzhA;GE2W;1mez", "pdf_size": 567626, "rating": "6;7;7;8", "confidence": "4;4;5;3", "soundness": "3;4;2;4", "novelty": "3;2;3;4", "presentation": "4;4;2;4", "wc_summary": "58;124;83;291", "wc_strengths": "44;140;133;157", "wc_weaknesses": "41;188;519;201", "wc_questions": "94;65;133;121", "wc_limitations": "13;78;211;15", "wc_review": "250;595;1079;785", "wc_reply_reviewers": "117;40;448;35", "wc_reply_authors": "38;27;132;35", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 139.0, 90.8652849002302 ], "wc_strengths_avg": [ 118.5, 43.88906469725688 ], "wc_weaknesses_avg": [ 237.25, 174.382302714467 ], "wc_questions_avg": [ 103.25, 26.21426138574192 ], "wc_limitations_avg": [ 79.25, 80.43125947043227 ], "wc_review_avg": [ 677.25, 300.9654257551854 ], "wc_reply_reviewers_avg": [ 160.0, 169.4240242704676 ], "wc_reply_authors_avg": [ 58.0, 42.91270208225066 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9626146720892278454&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "EMMA-X: An EM-like Multilingual Pre-training Algorithm for Cross-lingual Representation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71113", "id": "c5dRV9tA3K", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/201408406e0c5cf7626c4baeae6eaadd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=c5dRV9tA3K", "openreview": "https://openreview.net/forum?id=c5dRV9tA3K", "poster": "/media/PosterPDFs/NeurIPS%202023/71113.png?t=1699324971.917672", "slides": "https://nips.cc/virtual/2023/poster/71113", "video": "https://nips.cc/virtual/2023/poster/71113", "author_site": "Ping Guo, Xiangpeng Wei, Yue Hu, Baosong Yang, Dayiheng Liu, Fei Huang, jun xie", "tldr": "", "abstract": "Expressing universal semantics common to all languages is helpful to understand the meanings of complex and culture-specific sentences. The research theme underlying this scenario focuses on learning universal representations across languages with the usage of massive parallel corpora. However, due to the sparsity and scarcity of parallel data, there is still a big challenge in learning authentic ``universals'' for any two languages. In this paper, we propose Emma-X: an EM-like Multilingual pre-training Algorithm, to learn Cross-lingual universals with the aid of excessive multilingual non-parallel data. Emma-X unifies the cross-lingual representation learning task and an extra semantic relation prediction task within an EM framework. Both the extra semantic classifier and the cross-lingual sentence encoder approximate the semantic relation of two sentences, and supervise each other until convergence. To evaluate Emma-X, we conduct experiments on xrete, a newly introduced benchmark containing 12 widely studied cross-lingual tasks that fully depend on sentence-level representations. Results reveal that Emma-X achieves state-of-the-art performance. Further geometric analysis of the built representation space with three requirements demonstrates the superiority of Emma-X over advanced models.", "keywords": "cross-lingual pretraining;language-agnostic representation", "primary_area": "", "supplementary_material": "/attachment/ea2186d189418f3c0e3ce3cba52c00b2de427e3f.zip", "author": "Ping Guo;Xiangpeng Wei;Yue Hu;Baosong Yang;Dayiheng Liu;Fei Huang;jun xie", "authorids": "~Ping_Guo5;~Xiangpeng_Wei1;~Yue_Hu3;~Baosong_Yang1;~Dayiheng_Liu1;~Fei_Huang1;~jun_xie5", "gender": ";M;F;M;M;Not Specified;M", "homepage": ";https://pemywei.github.io/;;https://baosongyang.site/;https://dayihengliu.github.io/;;https://sites.google.com/view/fei-huang", "dblp": "33/5440-2;220/9947;https://dblp.uni-trier.de/pid/34/5808-2;203/8245;https://dblp.uni-trier.de/pers/hd/l/Liu:Dayiheng;;h/FeiHuang.html", "google_scholar": ";KnLk78UAAAAJ;;https://scholar.google.com.tw/citations?user=fXsHJXkAAAAJ;pPLQrX4AAAAJ;YjuM2GsAAAAJ;9r98PpoAAAAJ", "orcid": ";;;;0000-0002-8755-8941;;", "linkedin": ";;;;;;fei-huang-cas-cmu", "or_profile": "~Ping_Guo5;~Xiangpeng_Wei1;~Yue_Hu3;~Baosong_Yang1;~Dayiheng_Liu1;~jun_xie5;~Fei_Huang2", "aff": "Institute of Information Engineering, Chinese Academy of Sciences;Alibaba Group;;Alibaba Group;Alibaba Group;Alibaba DAMO Academy;Alibaba Group US", "aff_domain": "iie.ac.cn;alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "position": "PhD student;Researcher;;Researcher;Researcher;Principal Researcher;Senior Research Director", "bibtex": "@inproceedings{\nguo2023emmax,\ntitle={{EMMA}-X: An {EM}-like Multilingual Pre-training Algorithm for Cross-lingual Representation Learning},\nauthor={Ping Guo and Xiangpeng Wei and Yue Hu and Baosong Yang and Dayiheng Liu and Fei Huang and jun xie},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=c5dRV9tA3K}\n}", "github": "", "project": "", "reviewers": "tN44;FMvk;LAMb;oxNi;s5pr", "pdf_size": 1645421, "rating": "5;6;6;7;7", "confidence": "5;3;4;4;4", "soundness": "2;3;3;3;2", "novelty": "3;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "107;65;80;61;140", "wc_strengths": "24;41;20;57;99", "wc_weaknesses": "157;47;43;99;192", "wc_questions": "1;22;43;264;32", "wc_limitations": "3;11;1;14;21", "wc_review": "292;186;187;495;484", "wc_reply_reviewers": "35;0;0;21;33", "wc_reply_authors": "59;0;0;44;50", "reply_reviewers": "1;0;0;1;1", "reply_authors": "2;1;1;2;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 90.6, 29.50660942907538 ], "wc_strengths_avg": [ 48.2, 28.603496289789472 ], "wc_weaknesses_avg": [ 107.6, 59.13239382944005 ], "wc_questions_avg": [ 72.4, 96.79380145443199 ], "wc_limitations_avg": [ 10.0, 7.321202087089251 ], "wc_review_avg": [ 328.8, 136.79385951130993 ], "wc_reply_reviewers_avg": [ 17.8, 15.302287410710859 ], "wc_reply_authors_avg": [ 30.6, 25.436980952935436 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11004645691399537048&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "iie.ac.cn;alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "author_num": 7, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Chinese Academy of Sciences;Alibaba Group", "aff_unique_dep": "Institute of Information Engineering;", "aff_unique_url": "http://www.cas.cn;https://www.alibaba.com", "aff_unique_abbr": "CAS;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "BuildingsBench: A Large-Scale Dataset of 900K Buildings and Benchmark for Short-Term Load Forecasting", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73534", "id": "c5rqd6PZn6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3f17bf868966df01ca125e5bbc9ee24e-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=c5rqd6PZn6", "openreview": "https://openreview.net/forum?id=c5rqd6PZn6", "poster": "/media/PosterPDFs/NeurIPS%202023/73534.png?t=1701537045.8795993", "slides": "https://nips.cc/virtual/2023/poster/73534", "video": "https://nips.cc/virtual/2023/poster/73534", "author_site": "Patrick Emami, Abhijeet Sahu, Peter Graf", "tldr": "", "abstract": "Short-term forecasting of residential and commercial building energy consumption is widely used in power systems and continues to grow in importance. Data-driven short-term load forecasting (STLF), although promising, has suffered from a lack of open, large-scale datasets with high building diversity. This has hindered exploring the pretrain-then-fine-tune paradigm for STLF. To help address this, we present BuildingsBench, which consists of: 1) Buildings-900K, a large-scale dataset of 900K simulated buildings representing the U.S. building stock; and 2) an evaluation platform with over 1,900 real residential and commercial buildings from 7 open datasets. BuildingsBench benchmarks two under-explored tasks: zero-shot STLF, where a pretrained model is evaluated on unseen buildings without fine-tuning, and transfer learning, where a pretrained model is fine-tuned on a target building. The main finding of our benchmark analysis is that synthetically pretrained models generalize surprisingly well to real commercial buildings. An exploration of the effect of increasing dataset size and diversity on zero-shot commercial building performance reveals a power-law with diminishing returns. We also show that fine-tuning pretrained models on real commercial and residential buildings improves performance for a majority of target buildings. We hope that BuildingsBench encourages and facilitates future research on generalizable STLF. All datasets and code can be accessed from https://github.com/NREL/BuildingsBench.", "keywords": "Short-term load forecasting;climate change;time series forecasting;transformers;pretraining", "primary_area": "", "supplementary_material": "/attachment/9f78df9482f1a4b3de070e921580905f27e9bb45.pdf", "author": "Patrick Emami;Abhijeet Sahu;Peter Graf", "authorids": "~Patrick_Emami1;~Abhijeet_Sahu1;~Peter_Graf1", "gender": "M;M;M", "homepage": "http://pemami4911.github.io;https://www.nrel.gov/research/staff/abhijeet-sahu.html;https://www.nrel.gov/research/staff/peter-graf.html", "dblp": "153/7716;;", "google_scholar": "WSU6_r0AAAAJ;G5YeHuoAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Patrick_Emami1;~Abhijeet_Sahu1;~Peter_Graf1", "aff": "National Renewable Energy Lab;;National Renewable Energy Lab", "aff_domain": "nrel.gov;;nrel.gov", "position": "Postdoc;;Scientist", "bibtex": "@inproceedings{\nemami2023buildingsbench,\ntitle={BuildingsBench: A Large-Scale Dataset of 900K Buildings and Benchmark for Short-Term Load Forecasting},\nauthor={Patrick Emami and Abhijeet Sahu and Peter Graf},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=c5rqd6PZn6}\n}", "github": "", "project": "", "reviewers": "cRNE;YiaS;zkdi;aV5A;ZMcD", "pdf_size": 1968818, "rating": "5;5;6;7;8", "confidence": "3;4;4;4;2", "wc_summary_and_contributions": "69;108;19;76;163", "wc_strengths": "61;115;28;57;102", "wc_improvement": "141;137;47;49;330", "wc_limitations": "287;1;47;10;147", "wc_correctness": "18;1;8;1;12", "wc_clarity": "4;1;5;1;74", "wc_relation_to_prior_work": "10;1;3;1;40", "wc_documentation": "6;1;9;1;13", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "597;366;167;197;882", "wc_reply_reviewers": "0;0;0;32;26", "wc_reply_authors": "832;803;174;126;793", "reply_reviewers": "0;0;0;1;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.4, 0.8 ], "wc_summary_and_contributions_avg": [ 87.0, 47.50999894758997 ], "wc_strengths_avg": [ 72.6, 31.714980687366026 ], "wc_improvement_avg": [ 140.8, 102.99203852725704 ], "wc_limitations_avg": [ 98.4, 107.58735985235441 ], "wc_correctness_avg": [ 8.0, 6.54217089351845 ], "wc_clarity_avg": [ 17.0, 28.5447017850949 ], "wc_relation_to_prior_work_avg": [ 11.0, 14.872793954062566 ], "wc_documentation_avg": [ 6.0, 4.6475800154489 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 441.8, 267.9741778604797 ], "wc_reply_reviewers_avg": [ 11.6, 14.333178293735132 ], "wc_reply_authors_avg": [ 545.6, 323.61619242553365 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5144957554275266, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7649218464695951947&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "nrel.gov;;nrel.gov", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "National Renewable Energy Laboratory", "aff_unique_dep": "", "aff_unique_url": "https://www.nrel.gov", "aff_unique_abbr": "NREL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Maximum Average Randomly Sampled: A Scale Free and Non-parametric Algorithm for Stochastic Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71112", "id": "c8nIdZ5HJJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b84adff45775e92a45f0cd87c37f5ce9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=c8nIdZ5HJJ", "openreview": "https://openreview.net/forum?id=c8nIdZ5HJJ", "poster": "/media/PosterPDFs/NeurIPS%202023/71112.png?t=1701655246.0350542", "slides": "https://nips.cc/virtual/2023/poster/71112", "video": "https://nips.cc/virtual/2023/poster/71112", "author_site": "Masoud Moravej Khorasani, Erik Weyer", "tldr": "", "abstract": "Upper Confidence Bound (UCB) methods are one of the most effective methods in dealing with the exploration-exploitation trade-off in online decision-making problems. The confidence bounds utilized in UCB methods tend to be constructed based on concentration equalities which are usually dependent on a parameter of scale (e.g. a bound on the payoffs, a variance, or a subgaussian parameter) that must be known in advance. The necessity of knowing a scale parameter a priori and the fact that the confidence bounds only use the tail information can deteriorate the performance of the UCB methods.\n\nHere we propose a data-dependent UCB algorithm called MARS (Maximum Average Randomly Sampled) in a non-parametric setup for multi-armed bandits with symmetric rewards. The algorithm does not depend on any scaling, and the data-dependent upper confidence bound is constructed based on the maximum average of randomly sampled rewards inspired by the work of Hartigan in the 1960s and 70s. A regret bound for the multi-armed bandit problem is derived under the same assumptions as for the $\\psi$-UCB method without incorporating any correction factors. The method is illustrated and compared with baseline algorithms in numerical experiments.", "keywords": "Stochastic Multi-armed bandit;Online Learning;Upper Confidence Bound", "primary_area": "", "supplementary_material": "/attachment/e30cc4c1fa3ff96b4b348cc78d5686e81d6e3883.pdf", "author": "Masoud Moravej Khorasani;Erik Weyer", "authorids": "~Masoud_Moravej_Khorasani1;~Erik_Weyer1", "gender": ";M", "homepage": ";https://findanexpert.unimelb.edu.au/profile/2609-erik-weyer", "dblp": ";44/4878", "google_scholar": ";https://scholar.google.com.au/citations?user=Q6NDDm0AAAAJ", "orcid": ";0000-0003-4309-4337", "linkedin": ";", "or_profile": "~Masoud_Moravej_Khorasani1;~Erik_Weyer1", "aff": ";University of Melbourne", "aff_domain": ";unimelb.edu.au", "position": ";Full Professor", "bibtex": "@inproceedings{\nkhorasani2023maximum,\ntitle={Maximum Average Randomly Sampled: A Scale Free and Non-parametric Algorithm for Stochastic Bandits},\nauthor={Masoud Moravej Khorasani and Erik Weyer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=c8nIdZ5HJJ}\n}", "github": "", "project": "", "reviewers": "t4Bj;GPeu;DQ4N;sKFK", "pdf_size": 454497, "rating": "6;6;6;7", "confidence": "5;4;4;4", "soundness": "4;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "164;47;54;138", "wc_strengths": "140;42;35;61", "wc_weaknesses": "575;254;80;194", "wc_questions": "191;2;111;58", "wc_limitations": "4;1;1;1", "wc_review": "1074;346;281;452", "wc_reply_reviewers": "67;33;38;61", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 100.75, 51.143792389692806 ], "wc_strengths_avg": [ 69.5, 41.80011961705373 ], "wc_weaknesses_avg": [ 275.75, 183.7285701789463 ], "wc_questions_avg": [ 90.5, 69.65809357138623 ], "wc_limitations_avg": [ 1.75, 1.299038105676658 ], "wc_review_avg": [ 538.25, 315.2795386637071 ], "wc_reply_reviewers_avg": [ 49.75, 14.515078366994786 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9068879605952942647&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": ";unimelb.edu.au", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Melbourne", "aff_unique_dep": "", "aff_unique_url": "https://www.unimelb.edu.au", "aff_unique_abbr": "UniMelb", "aff_country_unique_index": "0", "aff_country_unique": "Australia" }, { "title": "Sequential Subset Matching for Dataset Distillation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71111", "id": "c9fXCzR5fK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d553f0e0abb80e2a60328d634583bd2e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=c9fXCzR5fK", "openreview": "https://openreview.net/forum?id=c9fXCzR5fK", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71111", "video": "https://nips.cc/virtual/2023/poster/71111", "author_site": "JIAWEI DU, Qin Shi, Joey Tianyi Zhou", "tldr": "", "abstract": "Dataset distillation is a newly emerging task that synthesizes a small-size dataset used in training deep neural networks (DNNs) for reducing data storage and model training costs. The synthetic datasets are expected to capture the essence of the knowledge contained in real-world datasets such that the former yields a similar performance as the latter. Recent advancements in distillation methods have produced notable improvements in generating synthetic datasets. However, current state-of-the-art methods treat the entire synthetic dataset as a unified entity and optimize each synthetic instance equally . This static optimization approach may lead to performance degradation in dataset distillation. \nSpecifically, we argue that static optimization can give rise to a coupling issue within the synthetic data, particularly when a larger amount of synthetic data is being optimized. This coupling issue, in turn, leads to the failure of the distilled dataset to extract the high-level features learned by the deep neural network (DNN) in the latter epochs.\nIn this study, we propose a new dataset distillation strategy called Sequential Subset Matching (SeqMatch), which tackles this problem by adaptively optimizing the synthetic data to encourage sequential acquisition of knowledge during dataset distillation. Our analysis indicates that SeqMatch effectively addresses the coupling issue by sequentially generating the synthetic instances, thereby enhancing its performance significantly. Our proposed SeqMatch outperforms state-of-the-art methods in various datasets, including SVNH, CIFAR-10, CIFAR-100, and Tiny ImageNet.", "keywords": "Dataset distillation;gradients matching", "primary_area": "", "supplementary_material": "", "author": "Jiawei Du;Qin Shi;Joey Tianyi Zhou", "authorids": "~Jiawei_Du1;~Qin_Shi2;~Joey_Tianyi_Zhou1", "gender": "M;F;M", "homepage": ";;https://joeyzhouty.github.io/", "dblp": ";;123/5110", "google_scholar": "WrJKEzEAAAAJ;;https://scholar.google.com.sg/citations?user=cYNqDokAAAAJ", "orcid": ";;0000-0002-4675-7055", "linkedin": ";qin-shi-5a3072247;", "or_profile": "~Jiawei_Du1;~Qin_Shi2;~Joey_Tianyi_Zhou1", "aff": "National University of Singapore;National University of Singapore;A*STAR Centre for Frontier AI Research", "aff_domain": "u.nus.edu;u.nus.edu;cfar.a-star.edu.sg", "position": "PhD student;MS student;Principal Researcher", "bibtex": "@inproceedings{\ndu2023sequential,\ntitle={Sequential Subset Matching for Dataset Distillation},\nauthor={Jiawei Du and Qin Shi and Joey Tianyi Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=c9fXCzR5fK}\n}", "github": "", "project": "", "reviewers": "xCSw;Qidh;u5kd;QS3g", "pdf_size": 15304249, "rating": "4;5;5;8", "confidence": "4;3;4;5", "soundness": "2;1;2;4", "novelty": "3;3;2;4", "presentation": "3;3;3;4", "wc_summary": "103;91;122;85", "wc_strengths": "33;60;54;80", "wc_weaknesses": "245;148;91;87", "wc_questions": "80;29;89;75", "wc_limitations": "7;14;34;1", "wc_review": "468;342;390;328", "wc_reply_reviewers": "155;30;24;117", "wc_reply_authors": "1530;180;182;356", "reply_reviewers": "1;1;1;1", "reply_authors": "6;3;4;3", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 1.0897247358851685 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 100.25, 14.13108276106258 ], "wc_strengths_avg": [ 56.75, 16.753730927766508 ], "wc_weaknesses_avg": [ 142.75, 63.774505094120485 ], "wc_questions_avg": [ 68.25, 23.209642392764263 ], "wc_limitations_avg": [ 14.0, 12.429802894656053 ], "wc_review_avg": [ 382.0, 54.71745608121781 ], "wc_reply_reviewers_avg": [ 81.5, 56.171612047367844 ], "wc_reply_authors_avg": [ 562.0, 563.423464190124 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 4.0, 1.224744871391589 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=719022730639671688&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "u.nus.edu;u.nus.edu;cfar.a-star.edu.sg", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "National University of Singapore;A*STAR", "aff_unique_dep": ";Centre for Frontier AI Research", "aff_unique_url": "https://www.nus.edu.sg;https://www.a-star.edu.sg", "aff_unique_abbr": "NUS;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Characterization and Learning of Causal Graphs with Small Conditioning Sets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71110", "id": "cANkPsVtsw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eaef3b49866b942041a34bb8da397eb7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cANkPsVtsw", "openreview": "https://openreview.net/forum?id=cANkPsVtsw", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71110", "video": "https://nips.cc/virtual/2023/poster/71110", "tldr": "", "abstract": "Constraint-based causal discovery algorithms learn part of the causal graph structure by systematically testing conditional independences observed in the data. These algorithms, such as the PC algorithm and its variants, rely on graphical characterizations of the so-called equivalence class of causal graphs proposed by Pearl. However, constraint-based causal discovery algorithms struggle when data is limited since conditional independence tests quickly lose their statistical power, especially when the conditioning set is large. To address this, we propose using conditional independence tests where the size of the conditioning set is upper bounded by some integer k for robust causal discovery. The existing graphical characterizations of the equivalence classes of causal graphs are not applicable when we cannot leverage all the conditional independence statements. We first define the notion of k-Markov equivalence: Two causal graphs are k-Markov equivalent if they entail the same conditional independence constraints where the conditioning set size is upper bounded by k. We propose a novel representation that allows us to graphically characterize k-Markov equivalence between two causal graphs. We propose a sound constraint-based algorithm called the k-PC algorithm for learning this equivalence class. Finally, we conduct synthetic, and semi-synthetic experiments to demonstrate that the k-PC algorithm enables more robust causal discovery in the small sample regime compared to the baseline algorithms.", "keywords": "causal discovery", "primary_area": "", "supplementary_material": "/attachment/124c192f6344916f6a601e88755de8b442569a4b.zip", "author": "Murat Kocaoglu", "authorids": "~Murat_Kocaoglu1", "gender": "M", "homepage": "https://www.muratkocaoglu.com", "dblp": "74/11343", "google_scholar": "7N7bzdwAAAAJ", "orcid": "", "linkedin": "mkocaoglu/", "or_profile": "~Murat_Kocaoglu1", "aff": "Purdue University", "aff_domain": "purdue.edu", "position": "Assistant Professor", "bibtex": "@inproceedings{\nkocaoglu2023characterization,\ntitle={Characterization and Learning of Causal Graphs with Small Conditioning Sets},\nauthor={Murat Kocaoglu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cANkPsVtsw}\n}", "github": "", "project": "", "reviewers": "fQhL;XPAW;mhM7;Gg3j;iDG2", "pdf_size": 1013584, "rating": "5;5;7;7;7", "confidence": "2;4;4;3;4", "soundness": "3;4;4;4;3", "novelty": "2;2;3;3;4", "presentation": "3;4;3;4;4", "wc_summary": "271;109;140;63;61", "wc_strengths": "2;19;97;67;70", "wc_weaknesses": "2;86;150;59;28", "wc_questions": "2;13;152;20;2", "wc_limitations": "2;1;13;11;6", "wc_review": "279;228;552;220;167", "wc_reply_reviewers": "13;44;27;22;0", "wc_reply_authors": "0;315;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 128.8, 77.02570999348205 ], "wc_strengths_avg": [ 51.0, 35.09415905816807 ], "wc_weaknesses_avg": [ 65.0, 51.068581339214816 ], "wc_questions_avg": [ 37.8, 57.50965136392326 ], "wc_limitations_avg": [ 6.6, 4.758150901348127 ], "wc_review_avg": [ 289.2, 136.1137759376324 ], "wc_reply_reviewers_avg": [ 21.2, 14.634206503941373 ], "wc_reply_authors_avg": [ 63.0, 126.0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.4082482904638631, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6544033054285342636&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "purdue.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Fair, Polylog-Approximate Low-Cost Hierarchical Clustering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71109", "id": "cAPMmCl2f3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8c533f53f76c4df9a7f08e7cb676d132-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cAPMmCl2f3", "openreview": "https://openreview.net/forum?id=cAPMmCl2f3", "poster": "/media/PosterPDFs/NeurIPS%202023/71109.png?t=1699592953.4108496", "slides": "https://nips.cc/virtual/2023/poster/71109", "video": "https://nips.cc/virtual/2023/poster/71109", "author_site": "Marina Knittel, Max Springer, John Dickerson, MohammadTaghi Hajiaghayi", "tldr": "", "abstract": "Research in fair machine learning, and particularly clustering, has been crucial in recent years given the many ethical controversies that modern intelligent systems have posed. Ahmadian et al. [2020] established the study of fairness in hierarchical clustering, a stronger, more structured variant of its well-known flat counterpart, though their proposed algorithm that optimizes for Dasgupta's [2016] famous cost function was highly theoretical. Knittel et al. [2023] then proposed the first practical fair approximation for cost, however they were unable to break the polynomial-approximate barrier they posed as a hurdle of interest. We break this barrier, proposing the first truly polylogarithmic-approximate low-cost fair hierarchical clustering, thus greatly bridging the gap between the best fair and vanilla hierarchical clustering approximations.", "keywords": "Fair machine learning;hierarchical clustering;clustering", "primary_area": "", "supplementary_material": "/attachment/053ea063d630781cb78a4d6cfffa5e31cbcb5407.zip", "author": "Marina Knittel;Max Springer;John P Dickerson;MohammadTaghi Hajiaghayi", "authorids": "~Marina_Knittel1;~Max_Springer1;~John_P_Dickerson1;~MohammadTaghi_Hajiaghayi1", "gender": "Non-Binary;M;M;M", "homepage": "https://mknittel.github.io/;https://www.maxspringer.me;https://jpdickerson.com/;http://www.cs.umd.edu/~hajiagha/", "dblp": "245/0342;292/2716;75/8479;334/4488", "google_scholar": "7EB47RUAAAAJ;x9NBFhwAAAAJ;https://scholar.google.com.tw/citations?user=QgDpfCQAAAAJ;https://scholar.google.com.tw/citations?user=SQ1eGN4AAAAJ", "orcid": ";0000-0001-9291-6574;0000-0003-2231-680X;0000-0003-4842-0533", "linkedin": ";mss423/;john-dickerson-83a74a7/;mohammad-hajiaghayi-2139a913a&ved=2ahUKEwjMyeH-5-_-AhV3K1kFHeeBDKwQjjh6BAgSEAE&usg=AOvVaw1NSVoT5FCGtOTi4eT8nr4b", "or_profile": "~Marina_Knittel1;~Max_Springer1;~John_P_Dickerson1;~MohammadTaghi_Hajiaghayi1", "aff": "Department of Computer Science, University of Maryland, College Park;University of Maryland, College Park;Optimized Markets, Inc;University of Maryland, College Park", "aff_domain": "cs.umd.edu;umd.edu;optimizedmarkets.com;umd.edu", "position": "PhD student;PhD student;Consultant;Full Professor", "bibtex": "@inproceedings{\nknittel2023fair,\ntitle={Fair, Polylog-Approximate Low-Cost Hierarchical Clustering},\nauthor={Marina Knittel and Max Springer and John P Dickerson and MohammadTaghi Hajiaghayi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cAPMmCl2f3}\n}", "github": "", "project": "", "reviewers": "gEkD;BAkf;qPF4;XyRG", "pdf_size": 3191045, "rating": "3;6;6;8", "confidence": "3;4;3;2", "soundness": "2;3;3;4", "novelty": "2;3;2;4", "presentation": "1;2;2;4", "wc_summary": "81;74;60;64", "wc_strengths": "52;33;81;48", "wc_weaknesses": "358;67;253;24", "wc_questions": "36;120;94;142", "wc_limitations": "8;13;4;1", "wc_review": "535;307;492;279", "wc_reply_reviewers": "126;58;89;14", "wc_reply_authors": "62;0;0;0", "reply_reviewers": "1;1;2;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 69.75, 8.257572282456872 ], "wc_strengths_avg": [ 53.5, 17.38533865071371 ], "wc_weaknesses_avg": [ 175.5, 136.04870451422903 ], "wc_questions_avg": [ 98.0, 39.6232255123179 ], "wc_limitations_avg": [ 6.5, 4.5 ], "wc_review_avg": [ 403.25, 111.7326608472205 ], "wc_reply_reviewers_avg": [ 71.75, 41.12405014100629 ], "wc_reply_authors_avg": [ 15.5, 26.846787517317598 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.39605901719066966, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17459302063333164593&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cs.umd.edu;umd.edu;optimizedmarkets.com;umd.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of Maryland, College Park;University of Maryland;Optimized Markets, Inc", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu;", "aff_unique_abbr": "UMD;UMD;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Estimating the Rate-Distortion Function by Wasserstein Gradient Descent", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71108", "id": "cAaTbLa3ad", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/07eea3fb833c905c5edf46f914231f15-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cAaTbLa3ad", "openreview": "https://openreview.net/forum?id=cAaTbLa3ad", "poster": "/media/PosterPDFs/NeurIPS%202023/71108.png?t=1700350896.2704914", "slides": "https://nips.cc/virtual/2023/poster/71108", "video": "https://nips.cc/virtual/2023/poster/71108", "author_site": "Yibo Yang, Stephan Eckstein, Marcel Nutz, Stephan Mandt", "tldr": "", "abstract": "In the theory of lossy compression, the rate-distortion (R-D) function $R(D)$ describes how much a data source can be compressed (in bit-rate) at any given level of fidelity (distortion). Obtaining $R(D)$ for a given data source establishes the fundamental performance limit for all compression algorithms. We propose a new method to estimate $R(D)$ from the perspective of optimal transport. Unlike the classic Blahut--Arimoto algorithm which fixes the support of the reproduction distribution in advance, our Wasserstein gradient descent algorithm learns the support of the optimal reproduction distribution by moving particles. We prove its local convergence and analyze the sample complexity of our R-D estimator based on a connection to entropic optimal transport. Experimentally, we obtain comparable or tighter bounds than state-of-the-art neural network methods on low-rate sources while requiring considerably less tuning and computation effort. We also highlight a connection to maximum-likelihood deconvolution and introduce a new class of sources that can be used as test cases with known solutions to the R-D problem.", "keywords": "information theory;rate-distortion function;optimal transport", "primary_area": "", "supplementary_material": "/attachment/7ff0c67a7d6c29f520e81bb3afa270c16938c242.zip", "author": "Yibo Yang;Stephan Eckstein;Marcel Nutz;Stephan Mandt", "authorids": "~Yibo_Yang1;stephan.eckstein@math.ethz.ch;~Marcel_Nutz1;~Stephan_Mandt1", "gender": "Unspecified;;M;", "homepage": ";;https://www.math.columbia.edu/~mnutz/;", "dblp": ";;59/9670;", "google_scholar": "N0VVxNUAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": ";;0000-0003-2936-2315;", "linkedin": ";;;", "or_profile": "~Yibo_Yang1;stephan.eckstein@math.ethz.ch;~Marcel_Nutz1;~Stephan_Mandt1", "aff": "University of California, Irvine;;Columbia University;", "aff_domain": "uci.edu;;columbia.edu;", "position": "PhD student;;Full Professor;", "bibtex": "@inproceedings{\nyang2023estimating,\ntitle={Estimating the Rate-Distortion Function by Wasserstein Gradient Descent},\nauthor={Yibo Yang and Stephan Eckstein and Marcel Nutz and Stephan Mandt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cAaTbLa3ad}\n}", "github": "", "project": "", "reviewers": "GB3T;jdNJ;RV2s;4bjw;SDqV", "pdf_size": 1947279, "rating": "5;5;7;7;8", "confidence": "3;5;5;3;2", "soundness": "2;4;3;3;3", "novelty": "3;2;3;3;3", "presentation": "1;3;3;3;2", "wc_summary": "79;65;85;54;55", "wc_strengths": "15;103;57;14;87", "wc_weaknesses": "171;154;108;7;34", "wc_questions": "33;33;137;140;59", "wc_limitations": "33;1;1;19;12", "wc_review": "331;356;388;234;247", "wc_reply_reviewers": "26;14;0;27;20", "wc_reply_authors": "125;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 3.6, 1.2 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 67.6, 12.515590277729613 ], "wc_strengths_avg": [ 55.2, 36.36701802457826 ], "wc_weaknesses_avg": [ 94.8, 64.63868810549917 ], "wc_questions_avg": [ 80.4, 48.388428368774285 ], "wc_limitations_avg": [ 13.2, 12.03993355463393 ], "wc_review_avg": [ 311.2, 60.628046315216196 ], "wc_reply_reviewers_avg": [ 17.4, 9.871170143402452 ], "wc_reply_authors_avg": [ 25.0, 50.0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4444444444444445, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15298820389564370887&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "uci.edu;;columbia.edu;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of California, Irvine;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uci.edu;https://www.columbia.edu", "aff_unique_abbr": "UCI;Columbia", "aff_campus_unique_index": "0", "aff_campus_unique": "Irvine;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Explainable Brain Age Prediction using coVariance Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71107", "id": "cAhJF87GN0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/92bb2145c74b7d10fbb61aba315b5010-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cAhJF87GN0", "openreview": "https://openreview.net/forum?id=cAhJF87GN0", "poster": "/media/PosterPDFs/NeurIPS%202023/71107.png?t=1701814154.2127655", "slides": "https://nips.cc/virtual/2023/poster/71107", "video": "https://nips.cc/virtual/2023/poster/71107", "author_site": "Saurabh Sihag, Gonzalo Mateos, Corey McMillan, Alejandro Ribeiro", "tldr": "", "abstract": "In computational neuroscience, there has been an increased interest in developing machine learning algorithms that leverage brain imaging data to provide estimates of \"brain age\" for an individual. Importantly, the discordance between brain age and chronological age (referred to as \"brain age gap\") can capture accelerated aging due to adverse health conditions and therefore, can reflect increased vulnerability towards neurological disease or cognitive impairments. However, widespread adoption of brain age for clinical decision support has been hindered due to lack of transparency and methodological justifications in most existing brain age prediction algorithms. In this paper, we leverage coVariance neural networks (VNN) to propose an explanation-driven and anatomically interpretable framework for brain age prediction using cortical thickness features. Specifically, our brain age prediction framework extends beyond the coarse metric of brain age gap in Alzheimer\u2019s disease (AD) and we make two important observations: (i) VNNs can assign anatomical interpretability to elevated brain age gap in AD by identifying contributing brain regions, (ii) the interpretability offered by VNNs is contingent on their ability to exploit specific eigenvectors of the anatomical covariance matrix. Together, these observations facilitate an explainable and anatomically interpretable perspective to the task of brain age prediction.", "keywords": "graph neural networks;brain age;Alzheimer's disease;interpretability;explainability;computational neuroscience", "primary_area": "", "supplementary_material": "/attachment/138b5322600468a0468e69e603325ecce3deb643.zip", "author": "Saurabh Sihag;Gonzalo Mateos;Corey McMillan;Alejandro Ribeiro", "authorids": "~Saurabh_Sihag1;~Gonzalo_Mateos1;~Corey_McMillan1;~Alejandro_Ribeiro1", "gender": "M;M;M;M", "homepage": "https://sihags.github.io/;https://www.hajim.rochester.edu/ece/sites/gmateos/;https://www.pennbindlab.com;https://alelab.seas.upenn.edu", "dblp": "172/0928;28/7822;;32/15", "google_scholar": "T8D94-QAAAAJ;4QAOifUAAAAJ;;7mrPM4kAAAAJ", "orcid": ";0000-0002-9847-6298;;0000-0003-4230-9906", "linkedin": ";;;", "or_profile": "~Saurabh_Sihag1;~Gonzalo_Mateos1;~Corey_McMillan1;~Alejandro_Ribeiro1", "aff": "University of Pennsylvania;University of Rochester;University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;rochester.edu;upenn.edu;upenn.edu", "position": "Postdoc;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsihag2023explainable,\ntitle={Explainable Brain Age Prediction using coVariance Neural Networks},\nauthor={Saurabh Sihag and Gonzalo Mateos and Corey McMillan and Alejandro Ribeiro},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cAhJF87GN0}\n}", "github": "", "project": "", "reviewers": "n5r3;HXvC;m6iX;mFGa", "pdf_size": 8614582, "rating": "4;5;5;7", "confidence": "4;4;3;4", "soundness": "3;3;3;4", "novelty": "2;2;2;4", "presentation": "3;2;2;4", "wc_summary": "126;97;54;109", "wc_strengths": "71;125;143;102", "wc_weaknesses": "139;160;292;262", "wc_questions": "42;32;109;219", "wc_limitations": "5;1;115;66", "wc_review": "383;415;713;758", "wc_reply_reviewers": "119;33;154;602", "wc_reply_authors": "395;355;57;809", "reply_reviewers": "1;2;1;2", "reply_authors": "2;3;2;3", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 96.5, 26.612966764342527 ], "wc_strengths_avg": [ 110.25, 26.920020430898635 ], "wc_weaknesses_avg": [ 213.25, 65.05142196754811 ], "wc_questions_avg": [ 100.5, 74.5469650623015 ], "wc_limitations_avg": [ 46.75, 47.07640066954992 ], "wc_review_avg": [ 567.25, 169.37882837001797 ], "wc_reply_reviewers_avg": [ 227.0, 220.93777404509171 ], "wc_reply_authors_avg": [ 404.0, 267.82270254778626 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14473835537211352843&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "upenn.edu;rochester.edu;upenn.edu;upenn.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Pennsylvania;University of Rochester", "aff_unique_dep": ";", "aff_unique_url": "https://www.upenn.edu;https://www.rochester.edu", "aff_unique_abbr": "UPenn;U of R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "HyPoradise: An Open Baseline for Generative Speech Recognition with Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73533", "id": "cAjZ3tMye6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6492267465a7ac507be1f9fd1174e78d-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=cAjZ3tMye6", "openreview": "https://openreview.net/forum?id=cAjZ3tMye6", "poster": "/media/PosterPDFs/NeurIPS%202023/73533.png?t=1697435862.1924725", "slides": "https://nips.cc/virtual/2023/poster/73533", "video": "https://nips.cc/virtual/2023/poster/73533", "author_site": "CHEN CHEN, Yuchen Hu, Chao-Han Huck Yang, Sabato Marco Siniscalchi, Pin-Yu Chen, Eng-Siong Chng", "tldr": "", "abstract": "Advancements in deep neural networks have allowed automatic speech recognition (ASR) systems to attain human parity on several publicly available clean speech datasets. However, even state-of-the-art ASR systems experience performance degradation when confronted with adverse conditions, as a well-trained acoustic model is sensitive to variations in the speech domain, e.g., background noise. Intuitively, humans address this issue by relying on their linguistic knowledge: the meaning of ambiguous spoken terms is usually inferred from contextual cues thereby reducing the dependency on the auditory system. Inspired by this observation, we introduce the first open-source benchmark to utilize external large language models (LLMs) for ASR error correction, where N-best decoding hypotheses provide informative elements for true transcription prediction. This approach is a paradigm shift from the traditional language model rescoring strategy that can only select one candidate hypothesis as output transcription. The proposed benchmark contains a novel dataset, \"HyPoradise\" (HP), encompassing more than 316,000 pairs of N-best hypotheses and corresponding accurate transcriptions across prevalent speech domains. Given this dataset, we examine three types of error correction techniques based on LLMs with varying amounts of labeled hypotheses-transcription pairs, which gains significant word error rate (WER) reduction. Experimental evidence demonstrates the proposed technique achieves a breakthrough by surpassing the upper bound of traditional re-ranking based methods. More surprisingly, LLM with reasonable prompt design can even correct those tokens that are missing in N-best list. We make our results publicly accessible for reproducible pipelines with released pre-trained models, thus providing a new paradigm for ASR error correction with LLMs.", "keywords": "Automatic speech recognition;language model rescoring;N-best Hypothesis list.", "primary_area": "", "supplementary_material": "/attachment/dd65041b51149667bcb790e0fdb28ddd28b64747.pdf", "author": "CHEN CHEN;Yuchen Hu;Chao-Han Huck Yang;Sabato Marco Siniscalchi;Pin-Yu Chen;EngSiong Chng", "authorids": "~CHEN_CHEN37;~Yuchen_Hu1;~Chao-Han_Huck_Yang1;~Sabato_Marco_Siniscalchi1;~Pin-Yu_Chen1;~EngSiong_Chng1", "gender": "M;M;M;M;M;M", "homepage": ";https://yuchen005.github.io/;https://huckiyang.github.io/;https://www.ntnu.edu/employees/marco.siniscalchi;http://www.pinyuchen.com;https://personal.ntu.edu.sg/aseschng/intro1.html", "dblp": "65/4423;;230/4012;58/6344;39/8969;c/ChngEngSiong", "google_scholar": "uUmSp1QAAAAJ;Neo-1mIAAAAJ;TT3XJW8AAAAJ;https://scholar.google.it/citations?user=iHhGIcEAAAAJ;jxwlCUUAAAAJ;https://scholar.google.com.tw/citations?user=FJodrCcAAAAJ", "orcid": ";;0000-0003-2879-8811;0000-0002-0770-0507;0000-0003-1039-8369;", "linkedin": ";;;;pin-yu-chen-940062a2;", "or_profile": "~CHEN_CHEN37;~Yuchen_Hu1;~Chao-Han_Huck_Yang1;~Sabato_Marco_Siniscalchi1;~Pin-Yu_Chen1;~EngSiong_Chng1", "aff": "Nanyang Technological University;Nanyang Technological University;Amazon AGI;Kore University of Enna;International Business Machines;Nanyang Technological University", "aff_domain": "ntu.edu;ntu.edu.sg;amazon.com;unikore.it;ibm.com;ntu.edu.sg", "position": "PhD student;PhD student;Researcher;Full Professor;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nchen2023hyporadise,\ntitle={HyPoradise: An Open Baseline for Generative Speech Recognition with Large Language Models},\nauthor={CHEN CHEN and Yuchen Hu and Chao-Han Huck Yang and Sabato Marco Siniscalchi and Pin-Yu Chen and EngSiong Chng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=cAjZ3tMye6}\n}", "github": "", "project": "", "reviewers": "7FEE;4B4v;LMrw;y46i;Vw1k", "pdf_size": 955111, "rating": "6;6;6;7;8", "confidence": "4;4;5;5;5", "wc_summary_and_contributions": "71;96;31;38;41", "wc_strengths": "46;181;73;88;108", "wc_improvement": "2;535;43;43;227", "wc_limitations": "71;315;12;4;23", "wc_correctness": "1;11;1;14;50", "wc_clarity": "1;17;1;2;6", "wc_relation_to_prior_work": "34;482;12;4;232", "wc_documentation": "1;214;23;1;16", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "228;1852;197;195;704", "wc_reply_reviewers": "11;31;0;0;0", "wc_reply_authors": "369;1522;352;146;354", "reply_reviewers": "1;1;0;0;0", "reply_authors": "1;4;1;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "wc_summary_and_contributions_avg": [ 55.4, 24.483463807231196 ], "wc_strengths_avg": [ 99.2, 45.630691425837504 ], "wc_improvement_avg": [ 170.0, 198.47216429514745 ], "wc_limitations_avg": [ 85.0, 117.32859838931002 ], "wc_correctness_avg": [ 15.4, 18.07318455613177 ], "wc_clarity_avg": [ 5.4, 6.086049621881176 ], "wc_relation_to_prior_work_avg": [ 152.8, 184.7835490513157 ], "wc_documentation_avg": [ 51.0, 81.94876448122938 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 635.2, 638.2699742272074 ], "wc_reply_reviewers_avg": [ 8.4, 12.076423311560422 ], "wc_reply_authors_avg": [ 548.6, 493.63371035617087 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6123724356957945, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4986032967695116984&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "ntu.edu;ntu.edu.sg;amazon.com;unikore.it;ibm.com;ntu.edu.sg", "author_num": 6, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Nanyang Technological University;Amazon;Kore University of Enna;International Business Machines Corporation", "aff_unique_dep": ";Amazon AGI;;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.amazon.com;https://www.unieenna.it;https://www.ibm.com", "aff_unique_abbr": "NTU;Amazon;Kore Enna;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;1;0", "aff_country_unique": "Singapore;United States;Italy" }, { "title": "Enhancing Motion Deblurring in High-Speed Scenes with Spike Streams", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71106", "id": "cAyLnMxiTl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dead3d8ff3f9198e38a36a950ebbcafd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cAyLnMxiTl", "openreview": "https://openreview.net/forum?id=cAyLnMxiTl", "poster": "/media/PosterPDFs/NeurIPS%202023/71106.png?t=1702465469.7720573", "slides": "https://nips.cc/virtual/2023/poster/71106", "video": "https://nips.cc/virtual/2023/poster/71106", "author_site": "Shiyan Chen, Jiyuan Zhang, Yajing Zheng, Tiejun Huang, Zhaofei Yu", "tldr": "", "abstract": "Traditional cameras produce desirable vision results but struggle with motion blur in high-speed scenes due to long exposure windows. Existing frame-based deblurring algorithms face challenges in extracting useful motion cues from severely blurred images. Recently, an emerging bio-inspired vision sensor known as the spike camera has achieved an extremely high frame rate while preserving rich spatial details, owing to its novel sampling mechanism. However, typical binary spike streams are relatively low-resolution, degraded image signals devoid of color information, making them unfriendly to human vision. In this paper, we propose a novel approach that integrates the two modalities from two branches, leveraging spike streams as auxiliary visual cues for guiding deblurring in high-speed motion scenes. \nWe propose the first spike-based motion deblurring model with bidirectional information complementarity. We introduce a content-aware motion magnitude attention module that utilizes learnable mask to extract relevant information from blurry images effectively, and we incorporate a transposed cross-attention fusion module to efficiently combine features from both spike data and blurry RGB images.\nFurthermore, we build two extensive synthesized datasets for training and validation purposes, encompassing high-temporal-resolution spikes, blurry images, and corresponding sharp images. The experimental results demonstrate that our method effectively recovers clear RGB images from highly blurry scenes and outperforms state-of-the-art deblurring algorithms in multiple settings.", "keywords": "spike camera;neuromorphic vision sensors;motion deblurring;high speed imaging", "primary_area": "", "supplementary_material": "/attachment/082e4bcd49f57553166e8ee0aa833988593864b0.pdf", "author": "Shiyan Chen;Jiyuan Zhang;Yajing Zheng;Tiejun Huang;Zhaofei Yu", "authorids": "~Shiyan_Chen1;~Jiyuan_Zhang3;~Yajing_Zheng1;~Tiejun_Huang1;~Zhaofei_Yu1", "gender": ";M;F;M;M", "homepage": ";;https://zyj061.github.io;https://idm.pku.edu.cn/~tjhuang/;https://yuzhaofei.github.io", "dblp": ";;230/4398;h/TiejunHuang;166/0573", "google_scholar": ";ukHrw0IAAAAJ;_bUM0NcAAAAJ;https://scholar.google.com.tw/citations?user=knvEK4AAAAAJ;qaUgD50AAAAJ", "orcid": ";;;0000-0002-4234-6099;", "linkedin": ";jiyuanzhang-leo;;;", "or_profile": "~Shiyan_Chen1;~Jiyuan_Zhang3;~Yajing_Zheng1;~Tiejun_Huang1;~Zhaofei_Yu1", "aff": ";Peking University;Peking University;Peking University;Peking University", "aff_domain": ";pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": ";PhD student;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2023enhancing,\ntitle={Enhancing Motion Deblurring in High-Speed Scenes with Spike Streams},\nauthor={Shiyan Chen and Jiyuan Zhang and Yajing Zheng and Tiejun Huang and Zhaofei Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cAyLnMxiTl}\n}", "github": "", "project": "", "reviewers": "9w9w;kDLr;wVmq;DRAW;Rugk", "pdf_size": 4625805, "rating": "4;5;5;5;6", "confidence": "5;4;5;4;4", "soundness": "3;3;2;3;3", "novelty": "2;3;2;2;3", "presentation": "2;3;2;3;3", "wc_summary": "106;44;39;41;52", "wc_strengths": "69;55;20;42;54", "wc_weaknesses": "591;129;115;153;213", "wc_questions": "12;14;2;2;18", "wc_limitations": "9;4;2;24;1", "wc_review": "787;246;178;262;338", "wc_reply_reviewers": "228;41;42;18;15", "wc_reply_authors": "1127;22;227;261;22", "reply_reviewers": "1;1;1;1;1", "reply_authors": "4;2;3;3;2", "rating_avg": [ 5.0, 0.6324555320336759 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 56.4, 25.19206224190469 ], "wc_strengths_avg": [ 48.0, 16.407315441594946 ], "wc_weaknesses_avg": [ 240.2, 178.57480225384543 ], "wc_questions_avg": [ 9.6, 6.499230723708768 ], "wc_limitations_avg": [ 8.0, 8.461678320522472 ], "wc_review_avg": [ 362.2, 218.40915731717843 ], "wc_reply_reviewers_avg": [ 68.8, 80.38756122684653 ], "wc_reply_authors_avg": [ 331.8, 409.9489724343751 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6454972243679027, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16761856997248312174&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Monarch Mixer: A Simple Sub-Quadratic GEMM-Based Architecture", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71105", "id": "cB0BImqSS9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f498c1ce6bff52eb04febf87438dd84b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cB0BImqSS9", "openreview": "https://openreview.net/forum?id=cB0BImqSS9", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71105", "video": "https://nips.cc/virtual/2023/poster/71105", "author_site": "Dan Fu, Simran Arora, Jessica Grogan, Isys Johnson, Evan Sabri Eyuboglu, Armin Thomas, Benjamin Spector, Michael Poli, Atri Rudra, Christopher R\u00e9", "tldr": "", "abstract": "Machine learning models are increasingly being scaled in both sequence length and model dimension to reach longer contexts and better performance. However, existing architectures such as Transformers scale quadratically along both these axes. We ask: are there performant architectures that can scale sub-quadratically along sequence length and model dimension? We introduce Monarch Mixer (M2), a new architecture that uses the same sub-quadratic primitive along both sequence length and model dimension: Monarch matrices, a simple class of expressive structured matrices that captures many linear transforms, achieves high hardware efficiency on GPUs, and scales sub-quadratically. As a proof of concept, we explore the performance of M2 in three domains: non-causal BERT-style language modeling, ViT-style image classification, and causal GPT-style language modeling. For non-causal BERT-style modeling, M2 matches BERT-base and BERT-large in downstream GLUE quality with up to 27% fewer parameters, and achieves up to 9.1$\\times$ higher throughput at sequence length 4K. On ImageNet, M2 outperforms ViT-b by 1% in accuracy, with only half the parameters. Causal GPT-style models introduce a technical challenge: enforcing causality via masking introduces a quadratic bottleneck. To alleviate this bottleneck, we develop a novel theoretical view of Monarch matrices based on multivariate polynomial evaluation and interpolation, which lets us parameterize M2 to be causal while remaining sub-quadratic. Using this parameterization, M2 matches GPT-style Transformers at 360M parameters in pretraining perplexity on The PILE\u2014showing for the first time that it may be possible to match Transformer quality without attention or MLPs.", "keywords": "structured matrices;transformers;efficiency", "primary_area": "", "supplementary_material": "", "author": "Daniel Y Fu;Simran Arora;Jessica Grogan;Isys Johnson;Sabri Eyuboglu;Armin W Thomas;Benjamin Frederick Spector;Michael Poli;Atri Rudra;Christopher Re", "authorids": "~Daniel_Y_Fu1;~Simran_Arora1;~Jessica_Grogan1;~Isys_Johnson1;~Sabri_Eyuboglu1;~Armin_W_Thomas1;~Benjamin_Frederick_Spector1;~Michael_Poli1;~Atri_Rudra1;~Christopher_Re1", "gender": ";;F;;;Non-Binary;M;M;M;", "homepage": ";https://scholar.google.com/citations?user=rGRsWH8AAAAJ&hl=en;https://cse.buffalo.edu/~jrgrogan/;https://github.com/isysjo;http://www.sabrieyuboglu.com/;;http://benjaminfspector.com;;http://www.cse.buffalo.edu/faculty/atri/;", "dblp": ";243/2342;;304/8782.html;298/7563;228/8292;;;04/4980;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;;;awtZJwkAAAAJ;;RgIBwboAAAAJ;https://scholar.google.com.tw/citations?user=_e5H8IoAAAAJ;", "orcid": ";;;;;0000-0002-9947-5705;;;;", "linkedin": ";;jessicagrogan4/;;;;;;;", "or_profile": "~Daniel_Y_Fu1;~Simran_Arora1;~Jessica_Grogan1;~Isys_Johnson1;~Sabri_Eyuboglu1;~Armin_W_Thomas1;~Benjamin_Frederick_Spector1;~Michael_Poli1;~Atri_Rudra1;~Christopher_Re1", "aff": ";The Wharton School, University of Pennsylvania;State University of New York at Buffalo;State University of New York, Buffalo;Stanford University;Stanford University;Stanford University;Stanford University;State University of New York, Buffalo;", "aff_domain": ";wharton.upenn.edu;buffalo.edu;buffalo.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;buffalo.edu;", "position": ";Undergrad student;PhD student;PhD student;PhD student;Postdoc;PhD student;PhD student;Professor;", "bibtex": "@inproceedings{\nfu2023monarch,\ntitle={Monarch Mixer: A Simple Sub-Quadratic {GEMM}-Based Architecture},\nauthor={Daniel Y Fu and Simran Arora and Jessica Grogan and Isys Johnson and Sabri Eyuboglu and Armin W Thomas and Benjamin Frederick Spector and Michael Poli and Atri Rudra and Christopher Re},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cB0BImqSS9}\n}", "github": "", "project": "", "reviewers": "zN2M;qwKK;xUjR", "pdf_size": 1699542, "rating": "8;8;8", "confidence": "3;5;3", "soundness": "4;4;3", "novelty": "4;4;3", "presentation": "3;4;4", "wc_summary": "140;132;82", "wc_strengths": "111;39;46", "wc_weaknesses": "41;13;55", "wc_questions": "72;163;146", "wc_limitations": "9;1;20", "wc_review": "373;348;349", "wc_reply_reviewers": "14;0;14", "wc_reply_authors": "10;0;0", "reply_reviewers": "1;0;1", "reply_authors": "2;1;1", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 118.0, 25.664502073226878 ], "wc_strengths_avg": [ 65.33333333333333, 32.417416443771224 ], "wc_weaknesses_avg": [ 36.333333333333336, 17.46106780494506 ], "wc_questions_avg": [ 127.0, 39.50527390952589 ], "wc_limitations_avg": [ 10.0, 7.788880963698615 ], "wc_review_avg": [ 356.6666666666667, 11.55662388223981 ], "wc_reply_reviewers_avg": [ 9.333333333333334, 6.599663291074444 ], "wc_reply_authors_avg": [ 3.3333333333333335, 4.714045207910316 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1421689823915917785&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";wharton.upenn.edu;buffalo.edu;buffalo.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;buffalo.edu;", "author_num": 10, "aff_unique_index": "0;1;1;2;2;2;2;1", "aff_unique_norm": "University of Pennsylvania;State University of New York at Buffalo;Stanford University", "aff_unique_dep": "The Wharton School;;", "aff_unique_url": "https://www.wharton.upenn.edu;https://www.buffalo.edu;https://www.stanford.edu", "aff_unique_abbr": "UPenn Wharton;SUNY Buffalo;Stanford", "aff_campus_unique_index": "1;1;2;2;2;2;1", "aff_campus_unique": ";Buffalo;Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Strategic Apple Tasting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71104", "id": "cBIPcZKFdw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fcd3909db30887ce1da519c4468db668-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cBIPcZKFdw", "openreview": "https://openreview.net/forum?id=cBIPcZKFdw", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71104", "video": "https://nips.cc/virtual/2023/poster/71104", "author_site": "Keegan Harris, Chara Podimata, Steven Wu", "tldr": "", "abstract": "Algorithmic decision-making in high-stakes domains often involves assigning decisions to agents with incentives to strategically modify their input to the algorithm. In addition to dealing with incentives, in many domains of interest (e.g. lending and hiring) the decision-maker only observes feedback regarding their policy for rounds in which they assign a positive decision to the agent; this type of feedback is often referred to as apple tasting (or one-sided) feedback. We formalize this setting as an online learning problem with apple-tasting feedback where a principal makes decisions about a sequence of $T$ agents, each of which is represented by a context that may be strategically modified. Our goal is to achieve sublinear strategic regret, which compares the performance of the principal to that of the best fixed policy in hindsight, if the agents were truthful when revealing their contexts. Our main result is a learning algorithm which incurs $\\tilde{\\mathcal{O}}(\\sqrt{T})$ strategic regret when the sequence of agents is chosen stochastically. We also give an algorithm capable of handling adversarially-chosen agents, albeit at the cost of $\\tilde{\\mathcal{O}}(T^{(d+1)/(d+2)})$ strategic regret (where $d$ is the dimension of the context). Our algorithms can be easily adapted to the setting where the principal receives bandit feedback---this setting generalizes both the linear contextual bandit problem (by considering agents with incentives) and the strategic classification problem (by allowing for partial feedback).", "keywords": "strategic classification;strategic learning;apple tasting;bandit feedback;learning with incentives", "primary_area": "", "supplementary_material": "/attachment/25f928c44229dbdfe328e895d4f6d7caa0d1c9f4.pdf", "author": "Keegan Harris;Chara Podimata;Steven Wu", "authorids": "~Keegan_Harris1;~Chara_Podimata1;~Steven_Wu1", "gender": "M;F;M", "homepage": "https://keeganharris.github.io/;https://www.charapodimata.com/;https://zstevenwu.com/", "dblp": "294/5044;209/9752;137/8350", "google_scholar": "TnvQIrYAAAAJ;XY9hKvIAAAAJ;MbF6rTEAAAAJ", "orcid": ";;", "linkedin": ";;zstevenwu/", "or_profile": "~Keegan_Harris1;~Chara_Podimata1;~Zhiwei_Steven_Wu1", "aff": "Carnegie Mellon University;University of California, Berkeley;Carnegie Mellon University", "aff_domain": "cmu.edu;berkeley.edu;cmu.edu", "position": "PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nharris2023strategic,\ntitle={Strategic Apple Tasting},\nauthor={Keegan Harris and Chara Podimata and Steven Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cBIPcZKFdw}\n}", "github": "", "project": "", "reviewers": "6Gq7;qFGe;Mu3N;vd1A;qhsL", "pdf_size": 546460, "rating": "5;6;6;6;6", "confidence": "2;4;3;4;4", "soundness": "3;3;3;4;3", "novelty": "2;3;3;3;2", "presentation": "3;3;3;4;3", "wc_summary": "53;137;177;143;100", "wc_strengths": "10;113;54;33;94", "wc_weaknesses": "57;252;37;57;136", "wc_questions": "314;131;78;145;201", "wc_limitations": "5;2;19;1;24", "wc_review": "439;635;365;379;555", "wc_reply_reviewers": "91;27;0;20;193", "wc_reply_authors": "181;0;0;0;374", "reply_reviewers": "2;1;0;1;2", "reply_authors": "2;1;1;1;3", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 122.0, 42.27528828996912 ], "wc_strengths_avg": [ 60.8, 38.01789052538291 ], "wc_weaknesses_avg": [ 107.8, 79.70294850254913 ], "wc_questions_avg": [ 173.8, 80.29296357714044 ], "wc_limitations_avg": [ 10.2, 9.45304183847718 ], "wc_review_avg": [ 474.6, 104.50952109736224 ], "wc_reply_reviewers_avg": [ 66.2, 70.35168796837785 ], "wc_reply_authors_avg": [ 111.0, 149.0181197036119 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8750000000000001, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11410318867061811845&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cmu.edu;berkeley.edu;cmu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Carnegie Mellon University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.berkeley.edu", "aff_unique_abbr": "CMU;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Conditional Score Guidance for Text-Driven Image-to-Image Translation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71103", "id": "cBS5CU96Jq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/799f81cfa0611f93586c007024041460-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cBS5CU96Jq", "openreview": "https://openreview.net/forum?id=cBS5CU96Jq", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71103", "video": "https://nips.cc/virtual/2023/poster/71103", "author_site": "Hyunsoo Lee, Minsoo Kang, Bohyung Han", "tldr": "", "abstract": "We present a novel algorithm for text-driven image-to-image translation based on a pretrained text-to-image diffusion model. \nOur method aims to generate a target image by selectively editing regions of interest in a source image, defined by a modifying text, while preserving the remaining parts.\nIn contrast to existing techniques that solely rely on a target prompt, we introduce a new score function that additionally considers both the source image and the source text prompt, tailored to address specific translation tasks. \nTo this end, we derive the conditional score function in a principled way, decomposing it into the standard score and a guiding term for target image generation.\nFor the gradient computation about the guiding term, we assume a Gaussian distribution for the posterior distribution and estimate its mean and variance to adjust the gradient without additional training.\nIn addition, to improve the quality of the conditional score guidance, we incorporate a simple yet effective mixup technique, which combines two cross-attention maps derived from the source and target latents.\nThis strategy is effective for promoting a desirable fusion of the invariant parts in the source image and the edited regions aligned with the target prompt, leading to high-fidelity target image generation.\nThrough comprehensive experiments, we demonstrate that our approach achieves outstanding image-to-image translation performance on various tasks.\nCode is available at https://github.com/Hleephilip/CSG.", "keywords": "Diffusion;Image-to-Image Translation", "primary_area": "", "supplementary_material": "", "author": "Hyunsoo Lee;Minsoo Kang;Bohyung Han", "authorids": "~Hyunsoo_Lee1;~Minsoo_Kang1;~Bohyung_Han1", "gender": "M;M;Not Specified", "homepage": ";https://kminsoo.github.io;http://cvlab.snu.ac.kr/~bhhan", "dblp": ";29/5268;73/4880.html", "google_scholar": "https://scholar.google.co.kr/citations?user=6JNXaH0AAAAJ;in5F4IUAAAAJ;9aaeCToAAAAJ", "orcid": ";;", "linkedin": "philip21/;;", "or_profile": "~Hyunsoo_Lee1;~Minsoo_Kang1;~Bohyung_Han1", "aff": "Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nlee2023conditional,\ntitle={Conditional Score Guidance for Text-Driven Image-to-Image Translation},\nauthor={Hyunsoo Lee and Minsoo Kang and Bohyung Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cBS5CU96Jq}\n}", "github": "", "project": "", "reviewers": "ZGMV;nbbt;fXLA;JnRy;rP2j", "pdf_size": 12529506, "rating": "5;5;5;5;6", "confidence": "3;4;4;5;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;2;3", "wc_summary": "87;28;76;60;138", "wc_strengths": "87;10;12;29;100", "wc_weaknesses": "273;8;52;180;91", "wc_questions": "4;215;1;27;52", "wc_limitations": "58;91;1;17;12", "wc_review": "509;352;142;313;393", "wc_reply_reviewers": "0;37;0;25;106", "wc_reply_authors": "56;92;56;88;106", "reply_reviewers": "0;1;0;1;1", "reply_authors": "2;3;2;3;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 77.8, 36.07991130809498 ], "wc_strengths_avg": [ 47.6, 38.275840944386836 ], "wc_weaknesses_avg": [ 120.8, 94.88392909233892 ], "wc_questions_avg": [ 59.8, 79.74308747471468 ], "wc_limitations_avg": [ 35.8, 33.67729205265768 ], "wc_review_avg": [ 341.8, 119.52472547552662 ], "wc_reply_reviewers_avg": [ 33.6, 38.949197681081955 ], "wc_reply_authors_avg": [ 79.6, 20.175232340669584 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5345224838248488, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15586475146491785677&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "The geometry of hidden representations of large transformer models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71102", "id": "cCYvakU5Ek", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a0e66093d7168b40246af1cddc025daa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cCYvakU5Ek", "openreview": "https://openreview.net/forum?id=cCYvakU5Ek", "poster": "/media/PosterPDFs/NeurIPS%202023/71102.png?t=1701703147.7253094", "slides": "https://nips.cc/virtual/2023/poster/71102", "video": "https://nips.cc/virtual/2023/poster/71102", "author_site": "Lucrezia Valeriani, Diego Doimo, Francesca Cuturello, Alessandro Laio, Alessio Ansuini, Alberto Cazzaniga", "tldr": "", "abstract": "Large transformers are powerful architectures used for self-supervised data analysis across various data types, including protein sequences, images, and text. In these models, the semantic structure of the dataset emerges from a sequence of transformations between one representation and the next. \nWe characterize the geometric and statistical properties of these representations and how they change as we move through the layers.\nBy analyzing the intrinsic dimension (ID) and neighbor composition, we find that the representations evolve similarly in transformers trained on protein language taskand image reconstruction tasks. In the first layers, the data manifold expands, becoming high-dimensional, and then contracts significantly in the intermediate layers. In the last part of the model, the ID remains approximately constant or forms a second shallow peak. \nWe show that the semantic information of the dataset is better expressed at the end of the first peak, and this phenomenon can be observed across many models trained on diverse datasets.\nBased on our findings, we point out an explicit strategy to identify, without supervision, the layers that maximize semantic content: representations at intermediate layers corresponding to a relative minimum of the ID profile are more suitable for downstream learning tasks.", "keywords": "Representations;transformers;geometry;interpretability", "primary_area": "", "supplementary_material": "/attachment/37cae9b0b83dd7a215bdf900bc7ff132f38cd301.zip", "author": "Lucrezia Valeriani;Diego Doimo;Francesca Cuturello;Alessandro Laio;Alessio ansuini;Alberto Cazzaniga", "authorids": "~Lucrezia_Valeriani1;~Diego_Doimo1;~Francesca_Cuturello1;~Alessandro_Laio1;~Alessio_ansuini1;~Alberto_Cazzaniga1", "gender": "F;;F;M;M;M", "homepage": ";;;https://people.sissa.it/~laio/;;https://areasciencepark-rit.gitlab.io/lade/alberto.cazzaniga/", "dblp": "339/6830;270/0353;339/6516;;232/2196;339/6443", "google_scholar": "https://scholar.google.com/citations?hl=it;yu7h58MAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.it/citations?user=ma-T1oEAAAAJ;6lhdu6kAAAAJ;AmafJqIAAAAJ", "orcid": ";0000-0002-1553-1504;;;0000-0002-3117-3532;0000-0001-6271-3303", "linkedin": "lucrezia-valeriani-827909194;diego-doimo-84575b158;;;alessioansuini/;alberto-cazzaniga-4155b6164/", "or_profile": "~Lucrezia_Valeriani1;~Diego_Doimo1;~Francesca_Cuturello1;~Alessandro_Laio1;~Alessio_ansuini1;~Alberto_Cazzaniga1", "aff": "University of Trieste;Area Science Park;AREA Science Park;SISSA/ISAS;AREA Science Park;AREA Science Park", "aff_domain": "units.it;areasciencepark.it;areasciencepark.it;sissa.it;areasciencepark.it;areasciencepark.it", "position": "PhD student;Researcher;Researcher;Full Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nvaleriani2023the,\ntitle={The geometry of hidden representations of large transformer models},\nauthor={Lucrezia Valeriani and Diego Doimo and Francesca Cuturello and Alessandro Laio and Alessio ansuini and Alberto Cazzaniga},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cCYvakU5Ek}\n}", "github": "", "project": "", "reviewers": "K92u;kxbE;wEGg;4o8z", "pdf_size": 754523, "rating": "5;6;7;9", "confidence": "1;4;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;4;2;4", "wc_summary": "151;94;154;87", "wc_strengths": "44;143;185;170", "wc_weaknesses": "73;193;470;164", "wc_questions": "2;212;240;184", "wc_limitations": "10;88;36;14", "wc_review": "280;730;1085;619", "wc_reply_reviewers": "21;44;59;23", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 1.479019945774904 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 121.5, 31.11671576500322 ], "wc_strengths_avg": [ 135.5, 54.92950027080166 ], "wc_weaknesses_avg": [ 225.0, 148.2177452264067 ], "wc_questions_avg": [ 159.5, 93.06315060215832 ], "wc_limitations_avg": [ 37.0, 31.064449134018133 ], "wc_review_avg": [ 678.5, 287.33125482620227 ], "wc_reply_reviewers_avg": [ 36.75, 15.690363284513205 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6831300510639733, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6746575805702882005&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "units.it;areasciencepark.it;areasciencepark.it;sissa.it;areasciencepark.it;areasciencepark.it", "author_num": 6, "aff_unique_index": "0;1;1;2;1;1", "aff_unique_norm": "University of Trieste;Area Science Park;Scuola Internazionale Superiore di Studi Avanzati", "aff_unique_dep": ";;", "aff_unique_url": "https://www.units.it;;https://www.sissa.it", "aff_unique_abbr": "UniTS;;SISSA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Italy;" }, { "title": "Bitstream-Corrupted Video Recovery: A Novel Benchmark Dataset and Method", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73532", "id": "cF6rQz8V3V", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d7928f6dfb0c30d6a6917587dacbe4bc-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=cF6rQz8V3V", "openreview": "https://openreview.net/forum?id=cF6rQz8V3V", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73532", "video": "https://nips.cc/virtual/2023/poster/73532", "author_site": "Tianyi Liu, Kejun Wu, Yi Wang, Wenyang Liu, Kim-Hui Yap, Lap-Pui Chau", "tldr": "", "abstract": "The past decade has witnessed great strides in video recovery by specialist technologies, like video inpainting, completion, and error concealment. However, they typically simulate the missing content by manual-designed error masks, thus failing to fill in the realistic video loss in video communication (e.g., telepresence, live streaming, and internet video) and multimedia forensics. To address this, we introduce the bitstream-corrupted video (BSCV) benchmark, the first benchmark dataset with more than 28,000 video clips, which can be used for bitstream-corrupted video recovery in the real world. The BSCV is a collection of 1) a proposed three-parameter corruption model for video bitstream, 2) a large-scale dataset containing rich error patterns, multiple corruption levels, and flexible dataset branches, and 3) a new video recovery framework that serves as a benchmark. We evaluate state-of-the-art video inpainting methods on the BSCV dataset, demonstrating existing approaches' limitations and our framework's advantages in solving the bitstream-corrupted video recovery problem. The benchmark and dataset are released at https://github.com/LIUTIGHE/BSCV-Dataset.", "keywords": "Video recovery;bitstream corruption;benchmark dataset", "primary_area": "", "supplementary_material": "/attachment/658c499aa11e53916cda1987cd4b0ed9dcccc8ec.pdf", "author": "Tianyi Liu;Kejun Wu;YI WANG;Wenyang Liu;Kim-Hui Yap;Lap-Pui Chau", "authorids": "~Tianyi_Liu6;~Kejun_Wu1;~YI_WANG28;~Wenyang_Liu2;~Kim-Hui_Yap1;~Lap-Pui_Chau3", "gender": "M;M;M;M;;M", "homepage": ";;https://wangyintu.github.io;https://github.com/wenyang001;;https://www.polyu.edu.hk/eee/people/academic-staff-and-teaching-staff/prof-chau-lap-pui/", "dblp": ";;17/221-68;;49/1306;03/5597.html", "google_scholar": ";;https://scholar.google.com.sg/citations?user=MAG909MAAAAJ;;https://scholar.google.com.sg/citations?user=nr86m98AAAAJ;MYREIH0AAAAJ", "orcid": "0000-0002-6705-7808;0000-0001-9859-9573;0000-0001-8659-4724;0009-0004-3226-0920;;0000-0003-4932-0593", "linkedin": ";;yi-wang-479757ab/;;;", "or_profile": "~Tianyi_Liu6;~Kejun_Wu1;~YI_WANG28;~Wenyang_Liu2;~Kim-Hui_Yap1;~Lap-pui_Chau1", "aff": "Nanyang Technological University;;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;The Hong Kong Polytechnic University", "aff_domain": "ntu.edu.sg;;ntu.edu.sg;ntu.edu;ntu.edu.sg;polyu.edu.hk", "position": "PhD student;;Postdoc;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023bitstreamcorrupted,\ntitle={Bitstream-Corrupted Video Recovery: A Novel Benchmark Dataset and Method},\nauthor={Tianyi Liu and Kejun Wu and YI WANG and Wenyang Liu and Kim-Hui Yap and Lap-Pui Chau},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=cF6rQz8V3V}\n}", "github": "", "project": "", "reviewers": "rrQM;BsWN;gEor;rSJL;f8pj", "pdf_size": 11414479, "rating": "5;5;5;7;8", "confidence": "4;4;4;4;3", "wc_summary_and_contributions": "44;86;60;82;98", "wc_strengths": "27;8;45;54;49", "wc_improvement": "23;23;134;63;22", "wc_limitations": "62;41;6;25;9", "wc_correctness": "11;1;1;1;8", "wc_clarity": "4;1;1;1;6", "wc_relation_to_prior_work": "7;30;1;1;1", "wc_documentation": "20;1;1;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "199;192;250;229;195", "wc_reply_reviewers": "0;0;0;359;0", "wc_reply_authors": "1181;1405;1908;2629;234", "reply_reviewers": "0;0;0;4;0", "reply_authors": "2;4;4;5;2", "rating_avg": [ 6.0, 1.2649110640673518 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 74.0, 19.390719429665317 ], "wc_strengths_avg": [ 36.6, 16.95405556201819 ], "wc_improvement_avg": [ 53.0, 43.40967634065014 ], "wc_limitations_avg": [ 28.6, 20.86719914123599 ], "wc_correctness_avg": [ 4.4, 4.2708313008125245 ], "wc_clarity_avg": [ 2.6, 2.0591260281974 ], "wc_relation_to_prior_work_avg": [ 8.0, 11.242775458044157 ], "wc_documentation_avg": [ 4.8, 7.6000000000000005 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 213.0, 22.74203157151973 ], "wc_reply_reviewers_avg": [ 71.8, 143.6 ], "wc_reply_authors_avg": [ 1471.4, 793.7955908166788 ], "reply_reviewers_avg": [ 0.8, 1.6 ], "reply_authors_avg": [ 3.4, 1.2 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7905694150420948, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17212495371035401638&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ntu.edu.sg;;ntu.edu.sg;ntu.edu;ntu.edu.sg;polyu.edu.hk", "author_num": 6, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Nanyang Technological University;Hong Kong Polytechnic University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.polyu.edu.hk", "aff_unique_abbr": "NTU;PolyU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Singapore;China" }, { "title": "NeuroGF: A Neural Representation for Fast Geodesic Distance and Path Queries", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71101", "id": "cGdGh3Mp2W", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3e22abb329d44080460b0eb11bf21da1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cGdGh3Mp2W", "openreview": "https://openreview.net/forum?id=cGdGh3Mp2W", "poster": "/media/PosterPDFs/NeurIPS%202023/71101.png?t=1698163954.9725714", "slides": "https://nips.cc/virtual/2023/poster/71101", "video": "https://nips.cc/virtual/2023/poster/71101", "author_site": "Qijian Zhang, Junhui Hou, Yohanes Adikusuma, Wenping Wang, Ying He", "tldr": "", "abstract": "Geodesics play a critical role in many geometry processing applications. Traditional algorithms for computing geodesics on 3D mesh models are often inefficient and slow, which make them impractical for scenarios requiring extensive querying of arbitrary point-to-point geodesics. Recently, deep implicit functions have gained popularity for 3D geometry representation, yet there is still no research on neural implicit representation of geodesics. To bridge this gap, we make the first attempt to represent geodesics using implicit learning frameworks. Specifically, we propose neural geodesic field (NeuroGF), which can be learned to encode all-pairs geodesics of a given 3D mesh model, enabling to efficiently and accurately answer queries of arbitrary point-to-point geodesic distances and paths. Evaluations on common 3D object models and real-captured scene-level meshes demonstrate our exceptional performances in terms of representation accuracy and querying efficiency. Besides, NeuroGF also provides a convenient way of jointly encoding both 3D geometry and geodesics in a unified representation. Moreover, the working mode of per-model overfitting is further extended to generalizable learning frameworks that can work on various input formats such as unstructured point clouds, which also show satisfactory performances for unseen shapes and categories. Our code and data are available at https://github.com/keeganhk/NeuroGF.", "keywords": "geodesic distance;implicit representation;3D geometry", "primary_area": "", "supplementary_material": "/attachment/e3c3c0f98f769b3f1cd38f7de6e8e7a34ce9c6a0.zip", "author": "Qijian Zhang;Junhui Hou;Yohanes Yudhi Adikusuma;Wenping Wang;Ying He", "authorids": "~Qijian_Zhang1;~Junhui_Hou2;~Yohanes_Yudhi_Adikusuma1;~Wenping_Wang1;~Ying_He1", "gender": "M;M;M;M;M", "homepage": "https://keeganhk.github.io/;http://www.cityu.edu.hk/stfprofile/csjhhou.htm;;https://engineering.tamu.edu/cse/profiles/Wang-Wenping.html;https://personal.ntu.edu.sg/yhe/", "dblp": "201/6850.html;122/2673.html;263/9660;;h/YingHe1", "google_scholar": "4NIiTYgAAAAJ;j6eefhwAAAAJ;;28shvv0AAAAJ;ISNmBxwAAAAJ", "orcid": "0000-0003-4723-6136;0000-0003-3431-2021;;0000-0002-2284-3952;0000-0002-6749-4485", "linkedin": ";;yohanes-yudhi-239a0117b/;;", "or_profile": "~Qijian_Zhang1;~Junhui_Hou2;~Yohanes_Yudhi_Adikusuma1;~Wenping_Wang1;~Ying_He1", "aff": "City University of Hong Kong;City University of Hong Kong;Bigo Technology;Texas A&M University - College Station;Nanyang Technological University", "aff_domain": "cityu.edu.hk;cityu.edu.hk;bigo.sg;tamu.edu;ntu.edu.sg", "position": "PhD student;Assistant Professor;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2023neurogf,\ntitle={Neuro{GF}: A Neural Representation for Fast Geodesic Distance and Path Queries},\nauthor={Qijian Zhang and Junhui Hou and Yohanes Yudhi Adikusuma and Wenping Wang and Ying He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cGdGh3Mp2W}\n}", "github": "", "project": "", "reviewers": "yBGS;Ujtd;oZNk;Racd;nwTQ", "pdf_size": 4687849, "rating": "3;4;4;5;6", "confidence": "5;5;4;4;3", "soundness": "3;3;2;3;3", "novelty": "1;2;2;2;3", "presentation": "3;4;3;3;3", "wc_summary": "88;111;104;36;170", "wc_strengths": "25;27;39;46;81", "wc_weaknesses": "248;58;127;51;111", "wc_questions": "61;59;34;89;160", "wc_limitations": "91;1;32;1;79", "wc_review": "513;256;336;223;601", "wc_reply_reviewers": "0;0;0;23;201", "wc_reply_authors": "54;54;54;40;733", "reply_reviewers": "0;0;0;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 4.4, 1.0198039027185568 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 101.8, 43.036728500200844 ], "wc_strengths_avg": [ 43.6, 20.23462379190678 ], "wc_weaknesses_avg": [ 119.0, 70.87171509142416 ], "wc_questions_avg": [ 80.6, 43.35250857793583 ], "wc_limitations_avg": [ 40.8, 38.01262948021354 ], "wc_review_avg": [ 385.8, 147.1888582739876 ], "wc_reply_reviewers_avg": [ 44.8, 78.60636106575599 ], "wc_reply_authors_avg": [ 187.0, 273.0538408446217 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8910421112136307, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2488033502350913112&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 10, "email": "cityu.edu.hk;cityu.edu.hk;bigo.sg;tamu.edu;ntu.edu.sg", "author_num": 5, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "City University of Hong Kong;Bigo Technology;Texas A&M University;Nanyang Technological University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cityu.edu.hk;https://www.bigo.sg;https://www.tamu.edu;https://www.ntu.edu.sg", "aff_unique_abbr": "CityU;;TAMU;NTU", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Hong Kong SAR;;College Station", "aff_country_unique_index": "0;0;0;1;2", "aff_country_unique": "China;United States;Singapore" }, { "title": "On the Role of Entanglement and Statistics in Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71100", "id": "cGeLeh995N", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/acb7ce5aab6e134300a2361dd90a501f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cGeLeh995N", "openreview": "https://openreview.net/forum?id=cGeLeh995N", "poster": "/media/PosterPDFs/NeurIPS%202023/71100.png?t=1702230576.0108395", "slides": "https://nips.cc/virtual/2023/poster/71100", "video": "https://nips.cc/virtual/2023/poster/71100", "author_site": "Srinivasan Arunachalam, Vojtech Havlicek, Louis Schatzki", "tldr": "", "abstract": "In this work we make progress in understanding the relationship between learning models when given access to entangled measurements, separable measurements and statistical measurements in the quantum statistical query ($\\mathsf{QSQ}$) model. To this end, we show the following results.\n\n$\\textbf{Entanglement versus separable measurements.}$ The goal here is to learn an unknown $f$ from the concept class $\\mathcal{C} \\subseteq \\{f:\\{0,1\\}^n\\rightarrow [k]\\}$ given copies of $\\frac{1}{\\sqrt{2^n}}\\sum_x \\ket{x,f(x)}$. We show that, if $T$ copies suffice to learn $f$ using entangled measurements, then $O(nT^2)$ copies suffice to learn $f$ using just separable measurements. Additionally, we exhibit a concept class $\\mathcal{C}$ for which, in order to learn some \\emph{property} of $f$, the sample complexity of learning using entangled measurements is exponentially smaller than separable measurements.\n\n$\\textbf{Entangled versus statistical measurements}$ The goal here is to learn a function $f \\in \\mathcal{C}$ given access to separable measurements and statistical measurements. We exhibit a concept class $\\mathcal{C}$ based on degree-$2$ functions that gives an exponential separation between $\\mathsf{QSQ}$ learning and quantum learning with entangled measurements (even in the presence of noise). This proves the \"quantum analogue\" of the seminal result of (Blum, 2003) that separates classical $\\mathsf{SQ}$ learning from classical $\\mathsf{PAC}$ learning with classification~noise.\n\n$\\textbf{$\\mathsf{QSQ}$ lower bounds for learning states.}$ The main technical contribution is to introduce a quantum statistical query dimension ($\\mathsf{QSDA}$), which we use to give lower bounds on the $\\mathsf{QSQ}$ complexity of learning. Using this, we prove exponential $\\mathsf{QSQ}$ lower bounds for testing purity of quantum states, learning CCHL states, coset states of Abelian groups, degree-$2$ functions, planted bi-clique states and learning output states of Clifford circuits of depth polylog($n$).\n\n$\\textbf{Further applications.}$ Using our $\\mathsf{QSQ}$ lower bounds give an $\\textit{unconditional}$ separation between weak and strong error mitigation and prove lower bounds for learning distributions in the $\\mathsf{QSQ}$ model. Prior works by (Quek et al., 2022), (Hinsche et al., 2022), and (Neitner et al., 23) proved the analogous results $\\textit{assuming}$ diagonal measurements and our work removes this assumption.", "keywords": "Quantum Computing;Statistical Learning;Quantum learning theory;Entanglement", "primary_area": "", "supplementary_material": "/attachment/f7fdf9afba35840351ad97d391b22134280383d9.pdf", "author": "Srinivasan A;Vojt\u011bch Havl\u00ed\u010dek;Louis Schatzki", "authorids": "~Srinivasan_A1;vojtech.havlicek@ibm.com;~Louis_Schatzki1", "gender": "M;;", "homepage": ";;", "dblp": "https://dblp.uni-trier.de/pers/hd/a/Arunachalam:Srinivasan;;", "google_scholar": "A7SyWJsAAAAJ;;48KNNksAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Srinivasan_A1;vojtech.havlicek@ibm.com;~Louis_Schatzki1", "aff": "International Business Machines;;University of Illinois, Urbana-Champaign", "aff_domain": "ibm.com;;illinois.edu", "position": "Assistant Professor;;PhD student", "bibtex": "@inproceedings{\na2023on,\ntitle={On the Role of Entanglement and Statistics in Learning},\nauthor={Srinivasan A and Vojt{\\v{e}}ch Havl{\\'\\i}{\\v{c}}ek and Louis Schatzki},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cGeLeh995N}\n}", "github": "", "project": "", "reviewers": "UDTy;AabW;JWWq;gCmC", "pdf_size": 396520, "rating": "6;6;7;8", "confidence": "4;3;4;4", "soundness": "4;4;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;3", "wc_summary": "130;95;187;128", "wc_strengths": "83;48;166;105", "wc_weaknesses": "378;14;54;103", "wc_questions": "77;16;76;14", "wc_limitations": "7;1;12;14", "wc_review": "675;174;495;364", "wc_reply_reviewers": "47;14;11;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 135.0, 33.08322837934654 ], "wc_strengths_avg": [ 100.5, 42.933087473416116 ], "wc_weaknesses_avg": [ 137.25, 142.52609410209766 ], "wc_questions_avg": [ 45.75, 30.760160922856045 ], "wc_limitations_avg": [ 8.5, 5.024937810560445 ], "wc_review_avg": [ 427.0, 183.10243034979084 ], "wc_reply_reviewers_avg": [ 18.0, 17.53567791675018 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11747888993587405083&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ibm.com;;illinois.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "International Business Machines Corporation;University of Illinois", "aff_unique_dep": ";", "aff_unique_url": "https://www.ibm.com;https://illinois.edu", "aff_unique_abbr": "IBM;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "HIQL: Offline Goal-Conditioned RL with Latent States as Actions", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71099", "id": "cLQCCtVDuW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6d7c4a0727e089ed6cdd3151cbe8d8ba-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cLQCCtVDuW", "openreview": "https://openreview.net/forum?id=cLQCCtVDuW", "poster": "/media/PosterPDFs/NeurIPS%202023/71099.png?t=1698512444.6673064", "slides": "https://nips.cc/virtual/2023/poster/71099", "video": "https://nips.cc/virtual/2023/poster/71099", "author_site": "Seohong Park, Dibya Ghosh, Benjamin Eysenbach, Sergey Levine", "tldr": "", "abstract": "Unsupervised pre-training has recently become the bedrock for computer vision and natural language processing. In reinforcement learning (RL), goal-conditioned RL can potentially provide an analogous self-supervised approach for making use of large quantities of unlabeled (reward-free) data. However, building effective algorithms for goal-conditioned RL that can learn directly from diverse offline data is challenging, because it is hard to accurately estimate the exact value function for faraway goals. Nonetheless, goal-reaching problems exhibit structure, such that reaching distant goals entails first passing through closer subgoals. This structure can be very useful, as assessing the quality of actions for nearby goals is typically easier than for more distant goals. Based on this idea, we propose a hierarchical algorithm for goal-conditioned RL from offline data. Using one action-free value function, we learn two policies that allow us to exploit this structure: a high-level policy that treats states as actions and predicts (a latent representation of) a subgoal and a low-level policy that predicts the action for reaching this subgoal. Through analysis and didactic examples, we show how this hierarchical decomposition makes our method robust to noise in the estimated value function. We then apply our method to offline goal-reaching benchmarks, showing that our method can solve long-horizon tasks that stymie prior methods, can scale to high-dimensional image observations, and can readily make use of action-free data. Our code is available at https://seohong.me/projects/hiql/", "keywords": "reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/8850f2ee384b5ad4b1d069c03bd49d17a8c53b8c.pdf", "author": "Seohong Park;Dibya Ghosh;Benjamin Eysenbach;Sergey Levine", "authorids": "~Seohong_Park1;~Dibya_Ghosh1;~Benjamin_Eysenbach1;~Sergey_Levine1", "gender": ";M;M;M", "homepage": "https://seohong.me/;https://dibyaghosh.com;https://ben-eysenbach.github.io/;https://people.eecs.berkeley.edu/~svlevine/", "dblp": "227/6308;210/2547;192/1863;80/7594", "google_scholar": ";znnl0kwAAAAJ;DRnOvU8AAAAJ;8R35rCwAAAAJ", "orcid": ";;0009-0000-7136-6307;", "linkedin": ";;benjamin-eysenbach-a7235775/;", "or_profile": "~Seohong_Park1;~Dibya_Ghosh1;~Benjamin_Eysenbach1;~Sergey_Levine1", "aff": "University of California, Berkeley;University of California, Berkeley;Carnegie Mellon University;Google", "aff_domain": "berkeley.edu;berkeley.edu;cmu.edu;google.com", "position": "PhD student;PhD student;PhD student;Research Scientist", "bibtex": "@inproceedings{\npark2023hiql,\ntitle={{HIQL}: Offline Goal-Conditioned {RL} with Latent States as Actions},\nauthor={Seohong Park and Dibya Ghosh and Benjamin Eysenbach and Sergey Levine},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cLQCCtVDuW}\n}", "github": "", "project": "", "reviewers": "kqhw;ZY68;cj3e;xrL8;tvKa", "pdf_size": 6317323, "rating": "7;7;7;7;7", "confidence": "4;4;3;5;4", "soundness": "4;3;4;4;3", "novelty": "3;3;3;2;3", "presentation": "4;4;3;4;3", "wc_summary": "77;88;129;70;77", "wc_strengths": "91;69;109;102;22", "wc_weaknesses": "319;32;214;124;23", "wc_questions": "167;173;34;21;12", "wc_limitations": "37;1;3;1;3", "wc_review": "691;363;489;318;137", "wc_reply_reviewers": "88;37;92;52;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 88.2, 21.19811312357777 ], "wc_strengths_avg": [ 78.6, 31.372599509763294 ], "wc_weaknesses_avg": [ 142.4, 112.33628087131957 ], "wc_questions_avg": [ 81.4, 72.70378257009742 ], "wc_limitations_avg": [ 9.0, 14.028542333400146 ], "wc_review_avg": [ 399.6, 184.31668399794958 ], "wc_reply_reviewers_avg": [ 53.8, 34.08460062843629 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11453142559608035049&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "berkeley.edu;berkeley.edu;cmu.edu;google.com", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of California, Berkeley;Carnegie Mellon University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.berkeley.edu;https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;CMU;Google", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Berkeley;;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Variational Imbalanced Regression: Fair Uncertainty Quantification via Probabilistic Smoothing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71098", "id": "cMUBkkTrMo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/612a56f193d031687683445cd0001083-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cMUBkkTrMo", "openreview": "https://openreview.net/forum?id=cMUBkkTrMo", "poster": "/media/PosterPDFs/NeurIPS%202023/71098.png?t=1699476021.7761052", "slides": "https://nips.cc/virtual/2023/poster/71098", "video": "https://nips.cc/virtual/2023/poster/71098", "author_site": "Ziyan Wang, Hao Wang", "tldr": "", "abstract": "Existing regression models tend to fall short in both accuracy and uncertainty estimation when the label distribution is imbalanced. In this paper, we propose a probabilistic deep learning model, dubbed variational imbalanced regression (VIR), which not only performs well in imbalanced regression but naturally produces reasonable uncertainty estimation as a byproduct. \nDifferent from typical variational autoencoders assuming I.I.D. representations (a data point's representation is not directly affected by other data points), our VIR borrows data with similar regression labels to compute the latent representation's variational distribution; furthermore, different from deterministic regression models producing point estimates, VIR predicts the entire normal-inverse-gamma distributions and modulates the associated conjugate distributions to impose probabilistic reweighting on the imbalanced data, thereby providing better uncertainty estimation. Experiments in several real-world datasets show that our VIR can outperform state-of-the-art imbalanced regression models in terms of both accuracy and uncertainty estimation. Code will soon be available at https://github.com/Wang-ML-Lab/variational-imbalanced-regression.", "keywords": "probabilistic methods;imbalanced regression;variational inference", "primary_area": "", "supplementary_material": "/attachment/e5e39d8031c70dcf7fc2a9f1213069554b07b029.pdf", "author": "Ziyan Wang;Hao Wang", "authorids": "~Ziyan_Wang4;~Hao_Wang3", "gender": "M;M", "homepage": ";http://www.wanghao.in", "dblp": ";w/HaoWang-14", "google_scholar": "BP_1to8AAAAJ;NrOA9QoAAAAJ", "orcid": "0000-0001-5624-5275;", "linkedin": ";", "or_profile": "~Ziyan_Wang4;~Hao_Wang4", "aff": "Georgia Institute of Technology;Rutgers University", "aff_domain": "gatech.edu;cs.rutgers.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2023variational,\ntitle={Variational Imbalanced Regression: Fair Uncertainty Quantification via Probabilistic Smoothing},\nauthor={Ziyan Wang and Hao Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cMUBkkTrMo}\n}", "github": "", "project": "", "reviewers": "cFEB;C6MR;ivhk;9XGJ", "pdf_size": 622925, "rating": "6;7;7;8", "confidence": "3;4;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;2;3", "wc_summary": "181;85;38;56", "wc_strengths": "89;87;52;135", "wc_weaknesses": "188;15;116;29", "wc_questions": "115;1;91;215", "wc_limitations": "5;16;8;6", "wc_review": "578;204;305;441", "wc_reply_reviewers": "0;0;57;220", "wc_reply_authors": "0;0;330;74", "reply_reviewers": "0;0;2;2", "reply_authors": "1;1;3;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 90.0, 55.14979601050216 ], "wc_strengths_avg": [ 90.75, 29.4819860253681 ], "wc_weaknesses_avg": [ 87.0, 69.98214057886483 ], "wc_questions_avg": [ 105.5, 76.17578355356773 ], "wc_limitations_avg": [ 8.75, 4.322904116447646 ], "wc_review_avg": [ 382.0, 140.98758810618756 ], "wc_reply_reviewers_avg": [ 69.25, 90.09266063337235 ], "wc_reply_authors_avg": [ 101.0, 135.62079486568422 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16747251921752069664&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "gatech.edu;cs.rutgers.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Georgia Institute of Technology;Rutgers University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.rutgers.edu", "aff_unique_abbr": "Georgia Tech;Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "PanoGen: Text-Conditioned Panoramic Environment Generation for Vision-and-Language Navigation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71097", "id": "cNObl6QQEH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4522de4178bddb36b49aa26efad537cf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cNObl6QQEH", "openreview": "https://openreview.net/forum?id=cNObl6QQEH", "poster": "/media/PosterPDFs/NeurIPS%202023/71097.png?t=1702054935.7585154", "slides": "https://nips.cc/virtual/2023/poster/71097", "video": "https://nips.cc/virtual/2023/poster/71097", "author_site": "Jialu Li, Mohit Bansal", "tldr": "", "abstract": "Vision-and-Language Navigation requires the agent to follow language instructions to navigate through 3D environments. One main challenge in Vision-and-Language Navigation is the limited availability of photorealistic training environments, which makes it hard to generalize to new and unseen environments. To address this problem, we propose PanoGen, a generation method that can potentially create an infinite number of diverse panoramic environments conditioned on text. Specifically, we collect room descriptions by captioning the room images in existing Matterport3D environments, and leverage a state-of-the-art text-to-image diffusion model to generate the new panoramic environments. We use recursive outpainting over the generated images to create consistent 360-degree panorama views. Our new panoramic environments share similar semantic information with the original environments by conditioning on text descriptions, which ensures the co-occurrence of objects in the panorama follows human intuition, and creates enough diversity in room appearance and layout with image outpainting. Lastly, we explore two ways of utilizing PanoGen in VLN pre-training and fine-tuning. We generate instructions for paths in our PanoGen environments with a speaker built on a pre-trained vision-and-language model for VLN pre-training, and augment the visual observation with our panoramic environments during agents' fine-tuning to avoid overfitting to seen environments. Empirically, learning with our PanoGen environments achieves the new state-of-the-art on the Room-to-Room, Room-for-Room, and CVDN datasets. Besides, we find that pre-training with our PanoGen speaker data is especially effective for CVDN, which has under-specified instructions and needs commonsense knowledge to reach the target. Lastly, we show that the agent can benefit from training with more generated panoramic environments, suggesting promising results for scaling up the PanoGen environments to enhance agents' generalization to unseen environments.", "keywords": "Vision-and-Language Navigation;diffusion models;image inpainting for panorama generation", "primary_area": "", "supplementary_material": "/attachment/f471995286fe7428be489d5f0ceccfefbdc3a5ce.zip", "author": "Jialu Li;Mohit Bansal", "authorids": "~Jialu_Li2;~Mohit_Bansal2", "gender": "F;M", "homepage": "https://jialuli-luka.github.io/;https://www.cs.unc.edu/~mbansal/", "dblp": ";32/5243.html", "google_scholar": "KyI1vSgAAAAJ;DN8QtscAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Jialu_Li2;~Mohit_Bansal2", "aff": "Apple;University of North Carolina at Chapel Hill", "aff_domain": "apple.com;unc.edu", "position": "Intern;Full Professor", "bibtex": "@inproceedings{\nli2023panogen,\ntitle={PanoGen: Text-Conditioned Panoramic Environment Generation for Vision-and-Language Navigation},\nauthor={Jialu Li and Mohit Bansal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cNObl6QQEH}\n}", "github": "", "project": "", "reviewers": "tReg;PpHD;wyXn;VRWy;Wxjf", "pdf_size": 8479294, "rating": "4;5;7;7;8", "confidence": "5;3;4;3;5", "soundness": "2;3;4;4;3", "novelty": "2;3;4;3;3", "presentation": "3;3;4;3;3", "wc_summary": "56;77;54;101;58", "wc_strengths": "24;53;37;136;49", "wc_weaknesses": "161;124;130;160;70", "wc_questions": "14;62;20;20;4", "wc_limitations": "1;13;1;1;4", "wc_review": "256;329;242;418;185", "wc_reply_reviewers": "0;279;10;13;0", "wc_reply_authors": "0;447;16;17;0", "reply_reviewers": "0;3;1;1;0", "reply_authors": "1;4;2;2;1", "rating_avg": [ 6.2, 1.469693845669907 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 69.2, 17.904189453868053 ], "wc_strengths_avg": [ 59.8, 39.42283602177804 ], "wc_weaknesses_avg": [ 129.0, 33.14211821836378 ], "wc_questions_avg": [ 24.0, 19.879637823662684 ], "wc_limitations_avg": [ 4.0, 4.6475800154489 ], "wc_review_avg": [ 286.0, 80.38656604184557 ], "wc_reply_reviewers_avg": [ 60.4, 109.42504283755159 ], "wc_reply_authors_avg": [ 96.0, 175.65534435365183 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8462532636411246691&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "apple.com;unc.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Apple;University of North Carolina", "aff_unique_dep": "Apple Inc.;", "aff_unique_url": "https://www.apple.com;https://www.unc.edu", "aff_unique_abbr": "Apple;UNC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chapel Hill", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Scalable Neural Network for DSIC Affine Maximizer Auction Design", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71096", "id": "cNb5hkTfGC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/af31604708f3e44b4de9fdfa6dcaa9d1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cNb5hkTfGC", "openreview": "https://openreview.net/forum?id=cNb5hkTfGC", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71096", "video": "https://nips.cc/virtual/2023/poster/71096", "author_site": "Zhijian Duan, Haoran Sun, Yurong Chen, Xiaotie Deng", "tldr": "", "abstract": "Automated auction design aims to find empirically high-revenue mechanisms through machine learning. Existing works on multi item auction scenarios can be roughly divided into RegretNet-like and affine maximizer auctions (AMAs) approaches. However, the former cannot strictly ensure dominant strategy incentive compatibility (DSIC), while the latter faces scalability issue due to the large number of allocation candidates. To address these limitations, we propose AMenuNet, a scalable neural network that constructs the AMA parameters (even including the allocation menu) from bidder and item representations. AMenuNet is always DSIC and individually rational (IR) due to the properties of AMAs, and it enhances scalability by generating candidate allocations through a neural network. Additionally, AMenuNet is permutation equivariant, and its number of parameters is independent of auction scale. We conduct extensive experiments to demonstrate that AMenuNet outperforms strong baselines in both contextual and non-contextual multi-item auctions, scales well to larger auctions, generalizes well to different settings, and identifies useful deterministic allocations. Overall, our proposed approach offers an effective solution to automated DSIC auction design, with improved scalability and strong revenue performance in various settings.", "keywords": "Automated Mechanism Design;Auction Design;Affine Maximizer Auctions;Deep Learning;Game Theory", "primary_area": "", "supplementary_material": "", "author": "Zhijian Duan;Haoran Sun;Yurong Chen;Xiaotie Deng", "authorids": "~Zhijian_Duan1;~Haoran_Sun6;~Yurong_Chen3;~Xiaotie_Deng1", "gender": "M;M;F;M", "homepage": "https://zjduan.github.io/;https://github.com/knightt0301;https://ruyc.github.io/;https://cfcs.pku.edu.cn/english/people/faculty/xiaotiedeng/index.htm", "dblp": "170/9206-1;;02/41-2;d/XiaotieDeng", "google_scholar": "7pZHSbkAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=OBUwP_oAAAAJ", "orcid": "0000-0002-4696-2139;;0000-0003-0659-7154;0000-0002-5282-6467", "linkedin": ";;;", "or_profile": "~Zhijian_Duan1;~Haoran_Sun6;~Yurong_Chen3;~Xiaotie_Deng1", "aff": "Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nduan2023a,\ntitle={A Scalable Neural Network for {DSIC} Affine Maximizer Auction Design},\nauthor={Zhijian Duan and Haoran Sun and Yurong Chen and Xiaotie Deng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cNb5hkTfGC}\n}", "github": "", "project": "", "reviewers": "Dfp6;1hHm;MRew;Zst9", "pdf_size": 316589, "rating": "5;7;8;8", "confidence": "5;5;4;5", "soundness": "4;3;4;4", "novelty": "2;3;4;4", "presentation": "4;4;4;3", "wc_summary": "42;146;31;286", "wc_strengths": "90;103;229;37", "wc_weaknesses": "93;463;65;76", "wc_questions": "28;174;13;109", "wc_limitations": "1;15;1;26", "wc_review": "254;901;339;534", "wc_reply_reviewers": "105;751;0;7", "wc_reply_authors": "490;1270;0;0", "reply_reviewers": "1;2;0;1", "reply_authors": "2;4;1;1", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 126.25, 102.56796527181379 ], "wc_strengths_avg": [ 114.75, 70.44279594110387 ], "wc_weaknesses_avg": [ 174.25, 167.00804621334865 ], "wc_questions_avg": [ 81.0, 64.93458246574009 ], "wc_limitations_avg": [ 10.75, 10.497023387608508 ], "wc_review_avg": [ 507.0, 249.09737052004382 ], "wc_reply_reviewers_avg": [ 215.75, 311.80232119084684 ], "wc_reply_authors_avg": [ 440.0, 519.2783453986889 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9138732175397989251&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "The Curious Price of Distributional Robustness in Reinforcement Learning with a Generative Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71095", "id": "cOQH8YO255", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fc8ee7c7ab5b5f6b1615045dfb617ed6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cOQH8YO255", "openreview": "https://openreview.net/forum?id=cOQH8YO255", "poster": "/media/PosterPDFs/NeurIPS%202023/71095.png?t=1701415176.1424458", "slides": "https://nips.cc/virtual/2023/poster/71095", "video": "https://nips.cc/virtual/2023/poster/71095", "author_site": "Laixi Shi, Gen Li, Yuting Wei, Yuxin Chen, Matthieu Geist, Yuejie Chi", "tldr": "", "abstract": "This paper investigates model robustness in reinforcement learning (RL) via the framework of distributionally robust Markov decision processes (RMDPs). Despite recent efforts, the sample complexity of RMDPs is much less understood regardless of the uncertainty set in use; in particular, there exist large gaps between existing upper and lower bounds, and it is unclear if distributional robustness bears any statistical implications when benchmarked against standard RL. In this paper, assuming access to a generative model, we derive the sample complexity of RMDPs---when the uncertainty set is measured via either total variation or $\\chi^2$ divergence over the full range of uncertainty levels---using a model-based algorithm called distributionally robust value iteration, and develop minimax lower bounds to benchmark its tightness. Our results not only strengthen the prior art in both directions of upper and lower bounds, but also deliver surprising messages that learning RMDPs is not necessarily easier or more difficult than standard MDPs. In the case of total variation, we establish the minimax-optimal sample complexity of RMDPs which is always smaller than that of standard MDPs. In the case of $\\chi^2$ divergence, we establish the sample complexity of RMDPs that is tight up to polynomial factors of the effective horizon, and grows linearly with respect to the uncertainty level when it approaches infinity.", "keywords": "distributionally robust reinforcement learning;robust Markov decision processes;sample complexity", "primary_area": "", "supplementary_material": "/attachment/6fcda46118e34af8f80e155793cf9a5c4eb048f3.pdf", "author": "Laixi Shi;Gen Li;Yuting Wei;Yuxin Chen;Matthieu Geist;Yuejie Chi", "authorids": "~Laixi_Shi1;~Gen_Li2;~Yuting_Wei1;~Yuxin_Chen5;~Matthieu_Geist1;~Yuejie_Chi1", "gender": "F;M;F;M;M;", "homepage": "https://laixishi.github.io/;;https://yutingwei.github.io/;https://yuxinchen2020.github.io/;;", "dblp": "211/7965;28/538-5.html;184/3856;11/5123-2;38/6508;", "google_scholar": "V8RkRr8AAAAJ;https://scholar.google.com/citations?view_op=list_works;fsbXdAYAAAAJ;RtNVud4AAAAJ;ectPLEUAAAAJ;", "orcid": ";0000-0002-3078-9191;;0000-0001-9256-5815;;", "linkedin": ";;;;;", "or_profile": "~Laixi_Shi1;~Gen_Li2;~Yuting_Wei1;~Yuxin_Chen5;~Matthieu_Geist1;~Yuejie_Chi1", "aff": "Carnegie Mellon University;The Wharton School, University of Pennsylvania;The Wharton School, University of Pennsylvania;University of Pennsylvania;Google;", "aff_domain": "andrew.cmu.edu;wharton.upenn.edu;wharton.upenn.edu;upenn.edu;google.com;", "position": "PhD student;Postdoc;Assistant Professor;Associate Professor;Researcher;", "bibtex": "@inproceedings{\nshi2023the,\ntitle={The Curious Price of Distributional Robustness in Reinforcement Learning with a Generative Model},\nauthor={Laixi Shi and Gen Li and Yuting Wei and Yuxin Chen and Matthieu Geist and Yuejie Chi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cOQH8YO255}\n}", "github": "", "project": "", "reviewers": "fPn4;ZjPC;aUcN;wpEG;Pv5G", "pdf_size": 976948, "rating": "5;5;6;7;8", "confidence": "4;4;3;3;4", "soundness": "3;3;2;4;4", "novelty": "3;3;3;4;4", "presentation": "3;3;2;4;4", "wc_summary": "75;41;78;89;71", "wc_strengths": "234;27;34;69;31", "wc_weaknesses": "349;292;134;36;33", "wc_questions": "291;12;65;30;16", "wc_limitations": "18;31;2;1;2", "wc_review": "967;403;313;225;153", "wc_reply_reviewers": "197;111;0;0;11", "wc_reply_authors": "655;455;125;133;103", "reply_reviewers": "1;1;0;0;1", "reply_authors": "3;4;3;3;3", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 70.8, 16.054905792311583 ], "wc_strengths_avg": [ 79.0, 78.94048391034856 ], "wc_weaknesses_avg": [ 168.8, 130.33710139480624 ], "wc_questions_avg": [ 82.8, 105.76086232628778 ], "wc_limitations_avg": [ 10.8, 11.923086848631105 ], "wc_review_avg": [ 412.2, 289.81262912440513 ], "wc_reply_reviewers_avg": [ 63.8, 78.61144954775989 ], "wc_reply_authors_avg": [ 294.2, 222.35323249280637 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 3.2, 0.39999999999999997 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.21004201260420152, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8711356856468864685&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "andrew.cmu.edu;wharton.upenn.edu;wharton.upenn.edu;upenn.edu;google.com;", "author_num": 6, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Carnegie Mellon University;University of Pennsylvania;Google", "aff_unique_dep": ";The Wharton School;Google", "aff_unique_url": "https://www.cmu.edu;https://www.wharton.upenn.edu;https://www.google.com", "aff_unique_abbr": "CMU;UPenn Wharton;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "GraphMP: Graph Neural Network-based Motion Planning with Efficient Graph Search", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71094", "id": "cQdc9Dyk4i", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/096961cae3c3423c44ea045aeb584e05-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cQdc9Dyk4i", "openreview": "https://openreview.net/forum?id=cQdc9Dyk4i", "poster": "/media/PosterPDFs/NeurIPS%202023/71094.png?t=1701650010.6040924", "slides": "https://nips.cc/virtual/2023/poster/71094", "video": "https://nips.cc/virtual/2023/poster/71094", "author_site": "Xiao Zang, Miao Yin, Jinqi Xiao, Saman Zonouz, Bo Yuan", "tldr": "", "abstract": "Motion planning, which aims to find a high-quality collision-free path in the configuration space, is a fundamental task in robotic systems. Recently, learning-based motion planners, especially the graph neural network-powered, have shown promising planning performance. However, though the state-of-the-art GNN planner can efficiently extract and learn graph information, its inherent mechanism is not well suited for graph search process, hindering its further performance improvement. To address this challenge and fully unleash the potential of GNN in motion planning, this paper proposes GraphMP, a neural motion planner for both low and high-dimensional planning tasks. With the customized model architecture and training mechanism design, GraphMP can simultaneously perform efficient graph pattern extraction and graph search processing, leading to strong planning performance. Experiments on a variety of environments, ranging from 2D Maze to 14D dual KUKA robotic arm, show that our proposed GraphMP achieves significant improvement on path quality and planning speed over the state-of-the-art learning-based and classical planners; while preserving the competitive success rate.", "keywords": "graph neural network;deep learning", "primary_area": "", "supplementary_material": "/attachment/94d15a1526259515c0cac4c8dd396f193c813e67.pdf", "author": "Xiao Zang;Miao Yin;Jinqi Xiao;Saman Zonouz;Bo Yuan", "authorids": "~Xiao_Zang1;~Miao_Yin1;~Jinqi_Xiao1;~Saman_Zonouz1;~Bo_Yuan3", "gender": "M;;M;;M", "homepage": ";https://noodle-lab.github.io/;https://github.com/jinqixiao;;https://sites.google.com/site/samanzonouz4n6/saman-zonouz", "dblp": "158/4308;199/1982;338/6610;41/1662-1;", "google_scholar": "HN6NBToAAAAJ;ILDdu98AAAAJ;ITSm2LYAAAAJ;oUy9elEAAAAJ;", "orcid": ";;0009-0004-7311-9413;;", "linkedin": ";miao-yin-55ab64170/;;;", "or_profile": "~Xiao_Zang1;~Miao_Yin1;~Jinqi_Xiao1;~Bo_Yuan3;~Saman_Aliari_Zonouz1", "aff": "Rutgers University, New Brunswick;Rutgers University;Rutgers University;Rutgers University;Georgia Institute of Technology", "aff_domain": "rutgers.edu;rutgers.edu;rutgers.edu;rutgers.edu;gatech.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nzang2023graphmp,\ntitle={Graph{MP}: Graph Neural Network-based Motion Planning with Efficient Graph Search},\nauthor={Xiao Zang and Miao Yin and Jinqi Xiao and Saman Zonouz and Bo Yuan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cQdc9Dyk4i}\n}", "github": "", "project": "", "reviewers": "Akwa;Dhw9;KcNw;3bor", "pdf_size": 3498701, "rating": "7;7;7;7", "confidence": "4;3;4;4", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "3;3;2;3", "wc_summary": "73;37;61;111", "wc_strengths": "37;44;44;67", "wc_weaknesses": "24;264;57;101", "wc_questions": "361;18;125;40", "wc_limitations": "8;28;69;5", "wc_review": "503;391;356;324", "wc_reply_reviewers": "62;297;287;22", "wc_reply_authors": "23;875;700;10", "reply_reviewers": "1;2;2;1", "reply_authors": "2;4;3;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.5, 26.7348087705897 ], "wc_strengths_avg": [ 48.0, 11.335784048754634 ], "wc_weaknesses_avg": [ 111.5, 92.18595337685672 ], "wc_questions_avg": [ 136.0, 135.90989662272574 ], "wc_limitations_avg": [ 27.5, 25.53918557824427 ], "wc_review_avg": [ 393.5, 67.51481318940311 ], "wc_reply_reviewers_avg": [ 167.0, 125.8471294865322 ], "wc_reply_authors_avg": [ 402.0, 390.46062541567494 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16639782521055459144&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "rutgers.edu;rutgers.edu;rutgers.edu;rutgers.edu;gatech.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Rutgers University;Georgia Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.rutgers.edu;https://www.gatech.edu", "aff_unique_abbr": "Rutgers;Georgia Tech", "aff_campus_unique_index": "0", "aff_campus_unique": "New Brunswick;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Precise asymptotic generalization for multiclass classification with overparameterized linear models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71093", "id": "cRGINXQWem", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/84f44b36ceb4fbc9bb269959f4796eed-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cRGINXQWem", "openreview": "https://openreview.net/forum?id=cRGINXQWem", "poster": "/media/PosterPDFs/NeurIPS%202023/71093.png?t=1702110458.4608934", "slides": "https://nips.cc/virtual/2023/poster/71093", "video": "https://nips.cc/virtual/2023/poster/71093", "author_site": "David Wu, Anant Sahai", "tldr": "", "abstract": "We study the asymptotic generalization of an overparameterized linear model for multiclass classification under the Gaussian covariates bi-level model introduced in Subramanian et al. (NeurIPS'22), where the number of data points, features, and classes all grow together. We fully resolve the conjecture posed in Subramanian et al. '22, matching the predicted regimes for which the model does and does not generalize. Furthermore, our new lower bounds are akin to an information-theoretic strong converse: they establish that the misclassification rate goes to 0 or 1 asymptotically. One surprising consequence of our tight results is that the min-norm interpolating classifier can be asymptotically suboptimal relative to noninterpolating classifiers in the regime where the min-norm interpolating regressor is known to be optimal. \n\nThe key to our tight analysis is a new variant of the Hanson-Wright inequality which is broadly useful for multiclass problems with sparse labels. As an application, we show that the same type of analysis can be used to analyze the related multi-label classification problem under the same bi-level ensemble.", "keywords": "overparameterized;multiclass;classification;theory;generalization;interpolation;bi-level;Gaussian model", "primary_area": "", "supplementary_material": "/attachment/b0657d4150010235306e8d1be743f9a89faebace.zip", "author": "David Xing Wu;Anant Sahai", "authorids": "~David_Xing_Wu1;~Anant_Sahai1", "gender": "M;M", "homepage": "https://davidxwu.github.io/;https://www2.eecs.berkeley.edu/Faculty/Homepages/sahai.html", "dblp": "56/2742;50/2194.html", "google_scholar": "GbtCyIUAAAAJ;https://scholar.google.com.tw/citations?user=4gWt4fgAAAAJ", "orcid": ";0000-0001-9263-7719", "linkedin": ";", "or_profile": "~David_Xing_Wu1;~Anant_Sahai1", "aff": "University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nwu2023precise,\ntitle={Precise asymptotic generalization for multiclass classification with overparameterized linear models},\nauthor={David Xing Wu and Anant Sahai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cRGINXQWem}\n}", "github": "", "project": "", "reviewers": "BQHA;qo1L;PSb6;V1nE;S1af;ggP8", "pdf_size": 744370, "rating": "6;7;7;8;8;8", "confidence": "1;1;2;3;1;4", "soundness": "3;3;3;3;4;4", "novelty": "3;4;2;3;4;4", "presentation": "3;3;3;3;4;4", "wc_summary": "94;60;126;97;65;53", "wc_strengths": "8;39;54;86;77;45", "wc_weaknesses": "22;97;146;0;5;91", "wc_questions": "20;178;96;31;1;46", "wc_limitations": "6;17;22;14;14;1", "wc_review": "150;391;444;228;162;236", "wc_reply_reviewers": "17;0;20;0;0;16", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "1;0;1;0;0;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 7.333333333333333, 0.7453559924999298 ], "confidence_avg": [ 2.0, 1.1547005383792515 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.7453559924999298 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 82.5, 25.552234083669997 ], "wc_strengths_avg": [ 51.5, 25.617376914898998 ], "wc_weaknesses_avg": [ 60.166666666666664, 54.459210015896815 ], "wc_questions_avg": [ 62.0, 59.60145412096364 ], "wc_limitations_avg": [ 12.333333333333334, 6.944222218666553 ], "wc_review_avg": [ 268.5, 110.97109233189215 ], "wc_reply_reviewers_avg": [ 8.833333333333334, 8.914719413544221 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5809475019311127, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12771511067575880463&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "berkeley.edu;berkeley.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Riemannian Residual Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71092", "id": "cRzt1umRNx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c868aa7437dc9b29e674cd2e25689021-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cRzt1umRNx", "openreview": "https://openreview.net/forum?id=cRzt1umRNx", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71092", "video": "https://nips.cc/virtual/2023/poster/71092", "author_site": "Isay Katsman, Eric Chen, Sidhanth Holalkere, Anna Asch, Aaron Lou, Ser Nam Lim, Christopher De Sa", "tldr": "", "abstract": "Recent methods in geometric deep learning have introduced various neural networks to operate over data that lie on Riemannian manifolds. Such networks are often necessary to learn well over graphs with a hierarchical structure or to learn over manifold-valued data encountered in the natural sciences. These networks are often inspired by and directly generalize standard Euclidean neural networks. However, extending Euclidean networks is difficult and has only been done for a select few manifolds. In this work, we examine the residual neural network (ResNet) and show how to extend this construction to general Riemannian manifolds in a geometrically principled manner. Originally introduced to help solve the vanishing gradient problem, ResNets have become ubiquitous in machine learning due to their beneficial learning properties, excellent empirical results, and easy-to-incorporate nature when building varied neural networks. We find that our Riemannian ResNets mirror these desirable properties: when compared to existing manifold neural networks designed to learn over hyperbolic space and the manifold of symmetric positive definite matrices, we outperform both kinds of networks in terms of relevant testing metrics and training dynamics.", "keywords": "neural network;riemannian;manifold;resnet", "primary_area": "", "supplementary_material": "/attachment/97003361446bfa4e6e04e4d68389d5873a4e57d6.pdf", "author": "Isay Katsman;Eric Ming Chen;Sidhanth Holalkere;Anna Asch;Aaron Lou;Ser-Nam Lim;Christopher De Sa", "authorids": "~Isay_Katsman1;~Eric_Ming_Chen1;~Sidhanth_Holalkere1;~Anna_Asch1;~Aaron_Lou1;~Ser-Nam_Lim3;~Christopher_De_Sa2", "gender": "M;M;M;;M;;", "homepage": "https://isaykatsman.github.io/;https://echen01.github.io;https://sholalkere.github.io/;;https://aaronlou.com;;", "dblp": "211/6801;331/2233;;;232/3858;;", "google_scholar": "https://scholar.google.com/citations?hl=en;GNbun8YAAAAJ;;;;;", "orcid": ";;;;;;", "linkedin": "isay-katsman-031171a0/;;holalkere/;;;;", "or_profile": "~Isay_Katsman1;~Eric_Ming_Chen1;~Sidhanth_Holalkere1;~Anna_Asch1;~Aaron_Lou1;~Ser-Nam_Lim3;~Christopher_De_Sa2", "aff": "Yale University;Cornell University;Cornell University;;Stanford University;;", "aff_domain": "yale.edu;cornell.edu;cornell.edu;;stanford.edu;;", "position": "PhD student;Undergrad student;Undergrad student;;PhD student;;", "bibtex": "@inproceedings{\nkatsman2023riemannian,\ntitle={Riemannian Residual Neural Networks},\nauthor={Isay Katsman and Eric Ming Chen and Sidhanth Holalkere and Anna Asch and Aaron Lou and Ser-Nam Lim and Christopher De Sa},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cRzt1umRNx}\n}", "github": "", "project": "", "reviewers": "nyBA;5HC1;1vkd;1LjF", "pdf_size": 2514233, "rating": "4;6;6;6", "confidence": "3;4;4;2", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "1;3;3;2", "wc_summary": "104;121;67;75", "wc_strengths": "10;38;240;32", "wc_weaknesses": "140;72;465;138", "wc_questions": "1;220;125;31", "wc_limitations": "1;14;28;5", "wc_review": "256;465;925;281", "wc_reply_reviewers": "0;23;112;0", "wc_reply_authors": "0;0;200;38", "reply_reviewers": "0;1;2;0", "reply_authors": "1;1;3;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 91.75, 21.787324296480282 ], "wc_strengths_avg": [ 80.0, 92.9623579735368 ], "wc_weaknesses_avg": [ 203.75, 153.29444706185544 ], "wc_questions_avg": [ 94.25, 85.81193098864516 ], "wc_limitations_avg": [ 12.0, 10.36822067666386 ], "wc_review_avg": [ 481.75, 268.33502846255465 ], "wc_reply_reviewers_avg": [ 33.75, 46.14311974715191 ], "wc_reply_authors_avg": [ 59.5, 82.58783203353869 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3030423948161848474&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "yale.edu;cornell.edu;cornell.edu;;stanford.edu;;", "author_num": 7, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Yale University;Cornell University;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.yale.edu;https://www.cornell.edu;https://www.stanford.edu", "aff_unique_abbr": "Yale;Cornell;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Discovering Hierarchical Achievements in Reinforcement Learning via Contrastive Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71091", "id": "cUuXVaMmmv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c919a2b5ec1de69f2629f9119676e336-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cUuXVaMmmv", "openreview": "https://openreview.net/forum?id=cUuXVaMmmv", "poster": "/media/PosterPDFs/NeurIPS%202023/71091.png?t=1699861291.014143", "slides": "https://nips.cc/virtual/2023/poster/71091", "video": "https://nips.cc/virtual/2023/poster/71091", "author_site": "Seungyong Moon, Junyoung Yeom, Bumsoo Park, Hyun Oh Song", "tldr": "", "abstract": "Discovering achievements with a hierarchical structure in procedurally generated environments presents a significant challenge.\nThis requires an agent to possess a broad range of abilities, including generalization and long-term reasoning. Many prior methods have been built upon model-based or hierarchical approaches, with the belief that an explicit module for long-term planning would be advantageous for learning hierarchical dependencies. However, these methods demand an excessive number of environment interactions or large model sizes, limiting their practicality. In this work, we demonstrate that proximal policy optimization (PPO), a simple yet versatile model-free algorithm, outperforms previous methods when optimized with recent implementation practices. Moreover, we find that the PPO agent can predict the next achievement to be unlocked to some extent, albeit with limited confidence. Based on this observation, we introduce a novel contrastive learning method, called achievement distillation, which strengthens the agent's ability to predict the next achievement. Our method exhibits a strong capacity for discovering hierarchical achievements and shows state-of-the-art performance on the challenging Crafter environment in a sample-efficient manner while utilizing fewer model parameters.", "keywords": "reinforcement learning;hierarchical reinforcement learning;contrastive learning;procedurally generated environments", "primary_area": "", "supplementary_material": "/attachment/18a3a94d1fc3b3493ea17d447b33006f1ea91de7.pdf", "author": "Seungyong Moon;Junyoung Yeom;Bumsoo Park;Hyun Oh Song", "authorids": "~Seungyong_Moon1;~Junyoung_Yeom1;~Bumsoo_Park1;~Hyun_Oh_Song1", "gender": "M;M;M;M", "homepage": "https://symoon11.github.io/;https://yeomjy.com;;https://mllab.snu.ac.kr/hyunoh", "dblp": "241/6182;;;05/10781", "google_scholar": "oBU9w4UAAAAJ;L0TXeqoAAAAJ;;ScoZZPsAAAAJ", "orcid": ";;;", "linkedin": ";yeomjy;bumsoopark96/;hyun-oh-song-5a39b03", "or_profile": "~Seungyong_Moon1;~Junyoung_Yeom1;~Bumsoo_Park1;~Hyun_Oh_Song1", "aff": "KRAFTON;Seoul National University;KRAFTON;Seoul National University", "aff_domain": "krafton.com;snu.ac.kr;krafton.com;snu.ac.kr", "position": "Intern;Undergrad student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nmoon2023discovering,\ntitle={Discovering Hierarchical Achievements in Reinforcement Learning via Contrastive Learning},\nauthor={Seungyong Moon and Junyoung Yeom and Bumsoo Park and Hyun Oh Song},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cUuXVaMmmv}\n}", "github": "", "project": "", "reviewers": "cpyF;z85w;nAYF;Tzgb", "pdf_size": 1361937, "rating": "6;6;7;7", "confidence": "3;4;3;3", "soundness": "2;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "163;45;130;145", "wc_strengths": "43;46;119;140", "wc_weaknesses": "61;102;135;110", "wc_questions": "358;49;40;146", "wc_limitations": "1;6;46;7", "wc_review": "626;248;470;548", "wc_reply_reviewers": "31;71;63;38", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 120.75, 45.267952239967734 ], "wc_strengths_avg": [ 87.0, 43.15669125408017 ], "wc_weaknesses_avg": [ 102.0, 26.61766330841233 ], "wc_questions_avg": [ 148.25, 128.03197842726635 ], "wc_limitations_avg": [ 15.0, 18.041618552668716 ], "wc_review_avg": [ 473.0, 141.12760183606892 ], "wc_reply_reviewers_avg": [ 50.75, 16.67895380412093 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8349105537734817226&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "krafton.com;snu.ac.kr;krafton.com;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "KRAFTON Inc.;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.krafton.com;https://www.snu.ac.kr", "aff_unique_abbr": "KRAFTON;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Context-guided Embedding Adaptation for Effective Topic Modeling in Low-Resource Regimes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71090", "id": "cYkSt7jqlx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fce176458ff542940fa3ed16e6f9c852-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cYkSt7jqlx", "openreview": "https://openreview.net/forum?id=cYkSt7jqlx", "poster": "/media/PosterPDFs/NeurIPS%202023/71090.png?t=1697367502.3199112", "slides": "https://nips.cc/virtual/2023/poster/71090", "video": "https://nips.cc/virtual/2023/poster/71090", "author_site": "Yishi Xu, Jianqiao Sun, Yudi Su, Xinyang Liu, Zhibin Duan, Bo Chen, Mingyuan Zhou", "tldr": "", "abstract": "Embedding-based neural topic models have turned out to be a superior option for low-resourced topic modeling. However, current approaches consider static word embeddings learnt from source tasks as general knowledge that can be transferred directly to the target task, discounting the dynamically changing nature of word meanings in different contexts, thus typically leading to sub-optimal results when adapting to new tasks with unfamiliar contexts. To settle this issue, we provide an effective method that centers on adaptively generating semantically tailored word embeddings for each task by fully exploiting contextual information. Specifically, we first condense the contextual syntactic dependencies of words into a semantic graph for each task, which is then modeled by a Variational Graph Auto-Encoder to produce task-specific word representations. On this basis, we further impose a learnable Gaussian mixture prior on the latent space of words to efficiently learn topic representations from a clustering perspective, which contributes to diverse topic discovery and fast adaptation to novel tasks. We have conducted a wealth of quantitative and qualitative experiments, and the results show that our approach comprehensively outperforms established topic models.", "keywords": "Few-shot generative model; topic modeling;", "primary_area": "", "supplementary_material": "/attachment/5780fd76b8374a42997c1e65aad3727f0c2c3524.pdf", "author": "Yishi Xu;Jianqiao Sun;Yudi Su;Xinyang Liu;Zhibin Duan;Bo Chen;Mingyuan Zhou", "authorids": "~Yishi_Xu2;~Jianqiao_Sun1;~Yudi_Su1;~Xinyang_Liu4;~Zhibin_Duan1;~Bo_Chen1;~Mingyuan_Zhou1", "gender": "M;F;M;M;M;M;M", "homepage": ";https://github.com/optimus20;https://xinyangatk.github.io;;http://web.xidian.edu.cn/bchen/en/index.html;http://mingyuanzhou.github.io;https://novicestone.github.io/", "dblp": "323/9124;354/3610.html;;268/2560;89/5615-1;;268/6784", "google_scholar": ";;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=bITyHaEAAAAJ;;LXwCIisAAAAJ;SHVgTzIAAAAJ", "orcid": ";0009-0006-4938-0683;;;0000-0001-5151-9388;;", "linkedin": "https://www.linkedin.cn/injobs/in/%E5%89%91%E6%A1%A5-%E5%AD%99-88a727138;;;;;;", "or_profile": "~Jianqiao_Sun1;~Yudi_Su1;~Xinyang_Liu4;~Zhibin_Duan1;~Bo_Chen1;~Mingyuan_Zhou1;~Yi.shi_Xu1", "aff": "Xidian University;Xidian University;Xidian University;Xidian University;Xidian University;Google;Xidian University", "aff_domain": "xidian.edu;xidian.edu.cn;xidian.edu;xidian.edu;xidian.edu.cn;google.com;xidian.edu.cn", "position": "PhD student;MS student;MS student;PhD student;Full Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nxu2023contextguided,\ntitle={Context-guided Embedding Adaptation for Effective Topic Modeling in Low-Resource Regimes},\nauthor={Yishi Xu and Jianqiao Sun and Yudi Su and Xinyang Liu and Zhibin Duan and Bo Chen and Mingyuan Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cYkSt7jqlx}\n}", "github": "", "project": "", "reviewers": "VS4z;4MPJ;biyw;Jyas;CvqN;fyGx", "pdf_size": 1875302, "rating": "5;5;5;5;7;7", "confidence": "2;5;2;3;4;3", "soundness": "2;3;3;3;3;4", "novelty": "2;3;3;3;3;3", "presentation": "2;3;2;2;3;3", "wc_summary": "83;41;105;74;65;128", "wc_strengths": "25;35;31;26;36;72", "wc_weaknesses": "526;159;388;50;18;81", "wc_questions": "4;49;3;57;259;3", "wc_limitations": "4;1;1;8;17;4", "wc_review": "642;285;528;215;395;288", "wc_reply_reviewers": "19;41;382;21;76;10", "wc_reply_authors": "24;59;458;9;23;87", "reply_reviewers": "1;1;2;1;1;1", "reply_authors": "2;2;4;2;2;3", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.1666666666666665, 1.0671873729054748 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 82.66666666666667, 27.920522121829233 ], "wc_strengths_avg": [ 37.5, 15.96610994158147 ], "wc_weaknesses_avg": [ 203.66666666666666, 188.41679566559054 ], "wc_questions_avg": [ 62.5, 90.67110895980042 ], "wc_limitations_avg": [ 5.833333333333333, 5.520165053893065 ], "wc_review_avg": [ 392.1666666666667, 149.77696381249316 ], "wc_reply_reviewers_avg": [ 91.5, 131.68744561777078 ], "wc_reply_authors_avg": [ 110.0, 157.797338380595 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.3726779962499649 ], "reply_authors_avg": [ 2.5, 0.7637626158259734 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.22086305214969315, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11225422116426706176&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "xidian.edu;xidian.edu.cn;xidian.edu;xidian.edu;xidian.edu.cn;google.com;xidian.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Xidian University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.google.com", "aff_unique_abbr": "Xidian;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Data Minimization at Inference Time", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71089", "id": "cZS5X3PLOR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e48880ea81caa7836e6a0694049093ae-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cZS5X3PLOR", "openreview": "https://openreview.net/forum?id=cZS5X3PLOR", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71089", "video": "https://nips.cc/virtual/2023/poster/71089", "author_site": "Cuong Tran, Nando Fioretto", "tldr": "", "abstract": "In high-stakes domains such as legal, banking, hiring, and healthcare, learning models frequently rely on sensitive user information for inference, necessitating the complete set of features. This not only poses significant privacy risks for individuals but also demands substantial human effort from organizations to verify information accuracy. \nThis study asks whether it is necessary to use all input features for accurate predictions at inference time. The paper demonstrates that, in a personalized setting, individuals may only need to disclose a small subset of features without compromising decision-making accuracy. The paper also provides an efficient sequential algorithm to determine the appropriate attributes for each individual to provide. Evaluations across various learning tasks show that individuals can potentially report as little as 10\\% of their information while maintaining the same accuracy level as a model that employs the full set of user information.", "keywords": "Privacy; data minimization", "primary_area": "", "supplementary_material": "/attachment/7c464f91347b71d9c66bd4a7a15bb4e7d242a1ba.zip", "author": "Cuong Tran;Ferdinando Fioretto", "authorids": "~Cuong_Tran1;~Ferdinando_Fioretto1", "gender": "M;M", "homepage": ";http://nandofioretto.com", "dblp": "275/3885;119/6404", "google_scholar": "RiYBF7sAAAAJ;ASf9Q04AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Cuong_Tran1;~Ferdinando_Fioretto1", "aff": "Syracuse University;Syracuse University", "aff_domain": "syr.edu;syr.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\ntran2023data,\ntitle={Data Minimization at Inference Time},\nauthor={Cuong Tran and Ferdinando Fioretto},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cZS5X3PLOR}\n}", "github": "", "project": "", "reviewers": "ddiE;ZsPJ;dE9z;ahUZ;oWvc", "pdf_size": 1229028, "rating": "5;6;6;6;6", "confidence": "4;4;4;3;4", "soundness": "2;3;3;3;3", "novelty": "2;4;2;3;2", "presentation": "2;2;3;2;3", "wc_summary": "56;161;70;59;90", "wc_strengths": "26;170;35;47;68", "wc_weaknesses": "147;445;198;152;196", "wc_questions": "10;37;12;140;329", "wc_limitations": "77;1;8;70;147", "wc_review": "316;814;323;468;830", "wc_reply_reviewers": "284;135;36;0;279", "wc_reply_authors": "942;222;67;26;217", "reply_reviewers": "2;1;1;0;1", "reply_authors": "6;3;3;2;3", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 87.2, 38.77834447214063 ], "wc_strengths_avg": [ 69.2, 52.32743066499634 ], "wc_weaknesses_avg": [ 227.6, 110.76930982903161 ], "wc_questions_avg": [ 105.6, 121.40609539887197 ], "wc_limitations_avg": [ 60.6, 53.18119968560318 ], "wc_review_avg": [ 550.2, 228.51905828617447 ], "wc_reply_reviewers_avg": [ 146.8, 118.54686836859082 ], "wc_reply_authors_avg": [ 294.8, 332.9765156884191 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 3.4, 1.3564659966250538 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.2500000000000001, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16255008290520443072&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "syr.edu;syr.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Syracuse University", "aff_unique_dep": "", "aff_unique_url": "https://www.syracuse.edu", "aff_unique_abbr": "Syracuse", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Guarantees for Self-Play in Multiplayer Games via Polymatrix Decomposability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71088", "id": "cZVBRg59eb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/40386e4770bebd63fdf47cbc67341c0b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cZVBRg59eb", "openreview": "https://openreview.net/forum?id=cZVBRg59eb", "poster": "/media/PosterPDFs/NeurIPS%202023/71088.png?t=1701896301.4703107", "slides": "https://nips.cc/virtual/2023/poster/71088", "video": "https://nips.cc/virtual/2023/poster/71088", "author_site": "Revan MacQueen, James Wright", "tldr": "", "abstract": "Self-play is a technique for machine learning in multi-agent systems where a learning algorithm learns by interacting with copies of itself. Self-play is useful for generating large quantities of data for learning, but has the drawback that the agents the learner will face post-training may have dramatically different behavior than the learner came to expect by interacting with itself. For the special case of two-player constant-sum games, self-play that reaches Nash equilibrium is guaranteed to produce strategies that perform well against any post-training opponent; however, no such guarantee exists for multiplayer games. We show that in games that approximately decompose into a set of two-player constant-sum games (called constant-sum polymatrix games) where global $\\epsilon$-Nash equilibria are boundedly far from Nash equilibria in each subgame (called subgame stability), any no-external-regret algorithm that learns by self-play will produce a strategy with bounded vulnerability. For the first time, our results identify a structural property of multiplayer games that enable performance guarantees for the strategies produced by a broad class of self-play algorithms. We demonstrate our findings through experiments on Leduc poker.", "keywords": "Algorithmic Game Theory;Self-Play;Regret-Minimization;Multi-agent RL;Multiplayer Games;General-Sum Games", "primary_area": "", "supplementary_material": "/attachment/c0e43d83de57aaaf7ffa491a412b1eeb0e42dd80.zip", "author": "Revan MacQueen;James R. Wright", "authorids": "~Revan_MacQueen1;~James_R._Wright1", "gender": "M;M", "homepage": "https://www.revanmacqueen.com/;http://jrwright.info", "dblp": "280/0873;35/8399", "google_scholar": "https://scholar.google.ca/citations?user=ZXXfcCMAAAAJ;https://scholar.google.ca/citations?user=-BEP3TYAAAAJ", "orcid": ";0000-0001-9622-5842", "linkedin": ";", "or_profile": "~Revan_MacQueen1;~James_R._Wright1", "aff": "University of Alberta;University of Alberta", "aff_domain": "ualberta.ca;ualberta.ca", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nmacqueen2023guarantees,\ntitle={Guarantees for Self-Play in Multiplayer Games via Polymatrix Decomposability},\nauthor={Revan MacQueen and James R. Wright},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cZVBRg59eb}\n}", "github": "", "project": "", "reviewers": "WEmp;hBPZ;Wd36;wuxJ", "pdf_size": 517132, "rating": "6;6;7;8", "confidence": "2;4;3;3", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "143;118;110;230", "wc_strengths": "55;78;36;73", "wc_weaknesses": "17;123;91;112", "wc_questions": "32;67;54;70", "wc_limitations": "6;99;6;6", "wc_review": "253;485;297;491", "wc_reply_reviewers": "23;21;56;95", "wc_reply_authors": "0;0;0;86", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 150.25, 47.625492123441624 ], "wc_strengths_avg": [ 60.5, 16.53027525481654 ], "wc_weaknesses_avg": [ 85.75, 41.324175732856425 ], "wc_questions_avg": [ 55.75, 14.972892172189045 ], "wc_limitations_avg": [ 29.25, 40.2701812759764 ], "wc_review_avg": [ 381.5, 107.65105665993251 ], "wc_reply_reviewers_avg": [ 48.75, 30.102948360584218 ], "wc_reply_authors_avg": [ 21.5, 37.239092362730865 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10283511989437716701&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ualberta.ca;ualberta.ca", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Bayesian Extensive-Rank Matrix Factorization with Rotational Invariant Priors", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71087", "id": "ca2QmdOlIh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4b8afc47273c746662a96dfdf562f87f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ca2QmdOlIh", "openreview": "https://openreview.net/forum?id=ca2QmdOlIh", "poster": "/media/PosterPDFs/NeurIPS%202023/71087.png?t=1699377876.3087428", "slides": "https://nips.cc/virtual/2023/poster/71087", "video": "https://nips.cc/virtual/2023/poster/71087", "author_site": "Farzad Pourkamali, Nicolas Macris", "tldr": "", "abstract": "We consider a statistical model for matrix factorization in a regime where the rank of the two hidden matrix factors grows linearly with their dimension and their product is corrupted by additive noise. Despite various approaches, statistical and algorithmic limits of such problems have remained elusive. We study a Bayesian setting with the assumptions that (a) one of the matrix factors is symmetric, (b) both factors as well as the additive noise have rotational invariant priors, (c) the priors are known to the statistician. We derive analytical formulas for Rotation Invariant Estimators to reconstruct the two matrix factors, and conjecture that these are optimal in the large-dimension limit, in the sense that they minimize the average mean-square-error. We provide numerical checks which confirm the optimality conjecture when confronted to Oracle Estimators which are optimal by definition, but involve the ground-truth. Our derivation relies on a combination of tools, namely random matrix theory transforms, spherical integral formulas, and the replica method from statistical mechanics.", "keywords": "Matrix factorization;Bayesian inference;rotation invariant estimators;random matrix theory;spherical integrals;replica method", "primary_area": "", "supplementary_material": "/attachment/90d31c0f1ed234c61e696bf4560d790201cde575.zip", "author": "Farzad Pourkamali;Nicolas Macris", "authorids": "~Farzad_Pourkamali1;~Nicolas_Macris1", "gender": "M;M", "homepage": "https://people.epfl.ch/farzad.pourkamali?lang=en;", "dblp": "297/5614;47/5851", "google_scholar": ";", "orcid": ";0000-0003-2189-7411", "linkedin": ";", "or_profile": "~Farzad_Pourkamali1;~Nicolas_Macris1", "aff": "EPFL - EPF Lausanne;Ecole Polytechnique Federale Lausanne", "aff_domain": "epfl.ch;epfl.ch", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\npourkamali2023bayesian,\ntitle={Bayesian Extensive-Rank Matrix Factorization with Rotational Invariant Priors},\nauthor={Farzad Pourkamali and Nicolas Macris},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ca2QmdOlIh}\n}", "github": "", "project": "", "reviewers": "PN68;AAM7;LQDn;LgE7", "pdf_size": 2787135, "rating": "7;7;7;9", "confidence": "3;3;3;4", "soundness": "3;3;3;4", "novelty": "4;3;2;4", "presentation": "4;3;3;4", "wc_summary": "45;154;112;136", "wc_strengths": "18;90;97;28", "wc_weaknesses": "10;105;40;18", "wc_questions": "48;24;119;39", "wc_limitations": "6;8;14;1", "wc_review": "127;381;382;222", "wc_reply_reviewers": "16;0;15;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 111.75, 41.318125562517956 ], "wc_strengths_avg": [ 58.25, 35.51320177060919 ], "wc_weaknesses_avg": [ 43.25, 37.305328037694565 ], "wc_questions_avg": [ 57.5, 36.52738698565776 ], "wc_limitations_avg": [ 7.25, 4.656984002549289 ], "wc_review_avg": [ 278.0, 108.81406159132192 ], "wc_reply_reviewers_avg": [ 10.25, 6.339361166552983 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9201985706091403348&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "epfl.ch;epfl.ch", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "EPFL;Ecole Polytechnique Federale de Lausanne", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Augmentation-Free Dense Contrastive Knowledge Distillation for Efficient Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71086", "id": "caUhYUVsLl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a12779b5e802668df1cbc73fa00da62f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=caUhYUVsLl", "openreview": "https://openreview.net/forum?id=caUhYUVsLl", "poster": "/media/PosterPDFs/NeurIPS%202023/71086.png?t=1702062857.3744016", "slides": "https://nips.cc/virtual/2023/poster/71086", "video": "https://nips.cc/virtual/2023/poster/71086", "author_site": "Jiawei Fan, Chao Li, Xiaolong Liu, Meina Song, Anbang Yao", "tldr": "", "abstract": "In recent years, knowledge distillation methods based on contrastive learning have achieved promising results on image classification and object detection tasks. However, in this line of research, we note that less attention is paid to semantic segmentation. Existing methods heavily rely on data augmentation and memory buffer, which entail high computational resource demands when applying them to handle semantic segmentation that requires to preserve high-resolution feature maps for making dense pixel-wise predictions. In order to address this problem, we present Augmentation-free Dense Contrastive Knowledge Distillation (Af-DCD), a new contrastive distillation learning paradigm to train compact and accurate deep neural networks for semantic segmentation applications. Af-DCD leverages a masked feature mimicking strategy, and formulates a novel contrastive learning loss via taking advantage of tactful feature partitions across both channel and spatial dimensions, allowing to effectively transfer dense and structured local knowledge learnt by the teacher model to a target student model while maintaining training efficiency. Extensive experiments on five mainstream benchmarks with various teacher-student network pairs demonstrate the effectiveness of our approach. For instance, DeepLabV3-Res18|DeepLabV3-MBV2 model trained by Af-DCD reaches 77.03\\%|76.38\\% mIOU on Cityscapes dataset when choosing DeepLabV3-Res101 as the teacher, setting new performance records. Besides that, Af-DCD achieves an absolute mIOU improvement of 3.26\\%|3.04\\%|2.75\\%|2.30\\%|1.42\\% compared with individually trained counterpart on Cityscapes|Pascal VOC|Camvid|ADE20K|COCO-Stuff-164K. Code is available at https://github.com/OSVAI/Af-DCD.", "keywords": "Knowledge distillation;semantic segmentation;contrastive learning", "primary_area": "", "supplementary_material": "/attachment/fdcbe916dfbaaa94e8cdc37201d62f7a3c416b9a.pdf", "author": "Jiawei Fan;Chao Li;Xiaolong Liu;Meina Song;Anbang Yao", "authorids": "~Jiawei_Fan1;~Chao_Li16;~Xiaolong_Liu2;~Meina_Song1;~Anbang_Yao1", "gender": "M;M;M;F;", "homepage": "https://jwfandl.github.io/;https://github.com/chaoli-ai/chaoli.github.io;;http://teacher.bupt.edu.cn/songmeina/;https://yaoanbang.github.io/", "dblp": ";;;95/4440;http://dblp.uni-trier.de/pers/hd/y/Yao:Anbang", "google_scholar": "7H674NUAAAAJ;;hgFJj0MAAAAJ;https://scholar.google.com/citations?hl=zh-CN;b9hCmPYAAAAJ", "orcid": "0000-0002-5487-2109;;;0000-0001-6626-9932;0000-0002-3878-8679", "linkedin": ";;;;anbang-yao-1805b712a/", "or_profile": "~Jiawei_Fan1;~Chao_Li16;~Xiaolong_Liu2;~Meina_Song1;~Anbang_Yao1", "aff": "Beijing University of Posts and Telecommunications;Intel;;Beijing University of Posts and Telecommunications;Intel", "aff_domain": "bupt.edu.cn;intel.com;;bupt.edu.cn;intel.com", "position": "MS student;Researcher;;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nfan2023augmentationfree,\ntitle={Augmentation-free Dense Contrastive Distillation for Efficient Semantic Segmentation},\nauthor={Jiawei Fan and Chao Li and Xiaolong Liu and Meina Song and Anbang Yao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=caUhYUVsLl}\n}", "github": "", "project": "", "reviewers": "vSYf;wLTq;cEUs;YQfC", "pdf_size": 6461535, "rating": "5;6;6;6", "confidence": "5;4;3;4", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "3;3;2;2", "wc_summary": "81;66;97;98", "wc_strengths": "10;48;30;60", "wc_weaknesses": "282;29;137;110", "wc_questions": "42;4;3;53", "wc_limitations": "1;39;7;1", "wc_review": "416;186;274;322", "wc_reply_reviewers": "25;17;0;83", "wc_reply_authors": "52;41;116;214", "reply_reviewers": "1;1;0;2", "reply_authors": "2;2;2;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 85.5, 13.124404748406688 ], "wc_strengths_avg": [ 37.0, 18.894443627691185 ], "wc_weaknesses_avg": [ 139.5, 91.36875833675316 ], "wc_questions_avg": [ 25.5, 22.34390297150433 ], "wc_limitations_avg": [ 12.0, 15.7797338380595 ], "wc_review_avg": [ 299.5, 83.08279003500063 ], "wc_reply_reviewers_avg": [ 31.25, 31.21197686786276 ], "wc_reply_authors_avg": [ 105.75, 68.74727267317591 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12061784428449153826&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "bupt.edu.cn;intel.com;;bupt.edu.cn;intel.com", "author_num": 5, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Intel", "aff_unique_dep": ";Intel Corporation", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.intel.com", "aff_unique_abbr": "BUPT;Intel", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;United States" }, { "id": "calKOSmBxj", "title": "HoK3v3: an Environment for Generalization in Heterogeneous Multi-agent Reinforcement Learning", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "We introduce HoK3v3, a 3v3 game environment for multi-agent reinforcement learning (MARL) research, based on Honor of Kings, the world's most popular Multiplayer Online Battle Arena (MOBA) game at present. Due to the presence of diverse heroes and lineups (a.k.a., hero combinations), this environment poses a unique challenge for generalization in heterogeneous MARL. A detailed description of the tasks contained in HoK3v3, including observations, structured actions, and multi-head reward specifications, has been provided. We validate the environment by applying conventional MARL baseline algorithms. We examine the challenges of generalization through experiments involving the 3v3 MOBA full game task and its decomposed sub tasks, executed by lineups picked from the hero pool. The results indicate the limitations of existing RL methods in addressing scenarios that require heterogeneous generalization. All of the code, tutorial, encrypted game engine, can be accessed at: https://github.com/tencent-ailab/hok_env.", "keywords": "Reinforcement learning;multi-agent reinforcement learning;competitive reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/79e153882b1762a814aaa4cea5ecb5ac1f46c1bd.pdf", "author": "Lin Liu;Jianzhun Shao;Xinkai Chen;Yun Qu;Boyuan Wang;Zhenbin Ye;Yuexuan Tu;Hongyang Qin;Yang Jun Feng;Lin Lai;Yuanqin Wang;Meng Meng;Wenjun Wang;Xiyang Ji;QIANG FU;Lanxiao Huang;Minwen Deng;Yang Wei;Houqiang Li;Wengang Zhou;Ning Xie;Xiangyang Ji;Lvfang Tao;Lin Yuan;Juchao Zhuo;YANG GUANG;Deheng Ye", "authorids": "~Lin_Liu15;~Jianzhun_Shao1;~Xinkai_Chen1;~Yun_Qu2;~Boyuan_Wang1;~Zhenbin_Ye1;~Yuexuan_Tu1;~Hongyang_Qin1;~Yang_Jun_Feng1;~Lin_Lai1;~Yuanqin_Wang2;~Meng_Meng3;~Wenjun_Wang5;~Xiyang_Ji1;~QIANG_FU8;~Lanxiao_Huang1;~Minwen_Deng2;~Yang_Wei2;~Houqiang_Li1;~Wengang_Zhou1;~Ning_Xie7;~Xiangyang_Ji1;~Lvfang_Tao1;~Lin_Yuan4;~Juchao_Zhuo1;~YANG_GUANG1;~Deheng_Ye1", "gender": ";M;M;M;M;Not Specified;M;;M;M;;F;M;M;M;M;M;M;M;M;M;;M;M;M;M;M", "homepage": ";https://github.com/qyz55;https://github.com/joker201613151420?tab=repositories;https://github.com/cloud-qu;https://github.com/BoyuanWang-hub;https://ieeexplore.ieee.org/author/37086201790;;;https://github.com/;https://lailin.xyz/;;;https://github.com/jamesonwang;;;;;;https://staff.ustc.edu.cn/~lihq/;http://staff.ustc.edu.cn/~zhwg/index.html;;;;;https://aiarena.tencent.com/aiarena/zh;https://iwiki.woa.com/space/~mikoyang;http://yedeheng.github.io/", "dblp": ";263/2309;;80/10774-2;;;;;;;;;;;;255/6012.html;256/8604.html;03/1094-32.html;59/7017.html;22/4544-1;55/4104-3;;271/7673;;;;159/9503", "google_scholar": ";;;l9Ky9goAAAAJ;;;https://scholar.google.com/citations?hl=en;;;;;;;https://scholar.google.com.hk/citations?hl=zh-CN;gANaxT0AAAAJ;;;;7sFMIKoAAAAJ;8s1JF8YAAAAJ;;;ZqPV9OEAAAAJ;;;;jz5XKuQAAAAJ", "orcid": ";;;0009-0000-1803-8435;;;;;;;;;;;;;;;0000-0003-2188-3028;0000-0003-1690-9836;0000-0002-1509-464X;;;;;;0000-0002-1754-1837", "linkedin": ";;;;;;;;;;;%E6%9C%A6-%E8%92%99-b52bb1178/;;;;;;;;;;;;https://www.linkedin.cn/incareer/in/%E6%9E%97-%E8%A2%81-604bb1278;;;", "or_profile": "~Lin_Liu15;~Jianzhun_Shao1;~Xinkai_Chen1;~Yun_Qu2;~Boyuan_Wang1;~Zhenbin_Ye1;~Yuexuan_Tu1;~Hongyang_Qin1;~Yang_Jun_Feng1;~Lin_Lai1;~Yuanqin_Wang2;~Meng_Meng3;~Wenjun_Wang5;~Xiyang_Ji1;~QIANG_FU8;~Lanxiao_Huang1;~Minwen_Deng2;~Yang_Wei2;~Houqiang_Li1;~Wengang_Zhou1;~Ning_Xie7;~Xiangyang_Ji1;~Lvfang_Tao1;~Lin_Yuan4;~Juchao_Zhuo1;~YANG_GUANG1;~Deheng_Ye1", "aff": ";Tsinghua University;University of Electronic Science and Technology of China;Tencent TiMi Studio;Tsinghua University;;Tencent AI Lab;;;;;TiMi Studios;;;Tencent AI Lab;Tencent TiMi L1 Studio;Tencent AI Lab;Tencent AI Lab;University of Science and Technology of China;University of Science and Technology of China;University of Electronic Science and Technology of China;;Tencent AI;;;TianMei studio;Tencent", "aff_domain": ";tsinghua.edu.cn;uestc.edu.cn;tencent.com;tsinghua.edu.cn;;tencent.com;;;;;tencent.com;;;tencent.com;tencent.com;tencent.com;tencent.com;ustc.edu.cn;ustc.edu.cn;uestc.edu.cn;;tencent.com;;;tencent.com;tencent.com", "position": ";PhD student;Graduate student;Intern;MS student;;Researcher;;;;;Researcher;;;Principal Researcher;Researcher;Researcher;Researcher;Professor;Full Professor;Associate Professor;;Researcher;;;Undergrad student;Team Manager", "bibtex": "@misc{\nliu2023hokv,\ntitle={HoK3v3: an Environment for Generalization in Heterogeneous Multi-agent Reinforcement Learning},\nauthor={Lin Liu and Jianzhun Shao and Xinkai Chen and Yun Qu and Boyuan Wang and Zhenbin Ye and Yuexuan Tu and Hongyang Qin and Yang Jun Feng and Lin Lai and Yuanqin Wang and Meng Meng and Wenjun Wang and Xiyang Ji and QIANG FU and Lanxiao Huang and Minwen Deng and Yang Wei and Houqiang Li and Wengang Zhou and Ning Xie and Xiangyang Ji and Lvfang Tao and Lin Yuan and Juchao Zhuo and YANG GUANG and Deheng Ye},\nyear={2023},\nurl={https://openreview.net/forum?id=calKOSmBxj}\n}", "github": "", "project": "", "reviewers": "6TCe;y5Qe;J3fJ;naRj", "site": "https://openreview.net/forum?id=calKOSmBxj", "pdf_size": 5848134, "rating": "5;6;6;7", "confidence": "4;4;4;4", "wc_summary_and_contributions": "70;75;107;156", "wc_strengths": "49;27;82;349", "wc_improvement": "30;83;236;230", "wc_limitations": "8;23;25;101", "wc_correctness": "10;82;18;56", "wc_clarity": "23;5;6;12", "wc_relation_to_prior_work": "55;49;22;14", "wc_documentation": "67;13;15;25", "wc_additional_feedback": "1;1;1;1", "wc_review": "313;358;512;944", "wc_reply_reviewers": "76;0;0;359", "wc_reply_authors": "578;927;566;1553", "reply_reviewers": "1;0;0;1", "reply_authors": "2;2;1;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 102.0, 34.25638626592128 ], "wc_strengths_avg": [ 126.75, 129.80056818057463 ], "wc_improvement_avg": [ 144.75, 90.24238194994633 ], "wc_limitations_avg": [ 39.25, 36.25172409693089 ], "wc_correctness_avg": [ 41.5, 29.13331426391443 ], "wc_clarity_avg": [ 11.5, 7.158910531638177 ], "wc_relation_to_prior_work_avg": [ 35.0, 17.363755354185336 ], "wc_documentation_avg": [ 30.0, 21.840329667841555 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 531.75, 249.18905975182778 ], "wc_reply_reviewers_avg": [ 108.75, 147.77580146965877 ], "wc_reply_authors_avg": [ 906.0, 400.6975168378262 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 27, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lm9bF5s5NjgJ:scholar.google.com/&scioq=HoK3v3:+an+Environment+for+Generalization+in+Heterogeneous+Multi-agent+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;2;3;2;2;2;2;4;4;1;2;5;2", "aff_unique_norm": "Tsinghua University;University of Electronic Science and Technology of China;Tencent;TiMi Studios;University of Science and Technology of China;TianMei studio", "aff_unique_dep": ";;TiMi Studio;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.uestc.edu.cn;https://timi.qq.com;;http://www.ustc.edu.cn;", "aff_unique_abbr": "THU;UESTC;Tencent;;USTC;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Empowering Convolutional Neural Nets with MetaSin Activation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71085", "id": "cay8LnKSro", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1f05584d537c92c8271699f207677475-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cay8LnKSro", "openreview": "https://openreview.net/forum?id=cay8LnKSro", "poster": "/media/PosterPDFs/NeurIPS%202023/71085.png?t=1701335484.4263496", "slides": "https://nips.cc/virtual/2023/poster/71085", "video": "https://nips.cc/virtual/2023/poster/71085", "author_site": "Farnood Salehi, Tun\u00e7 Aydin, Andr\u00e9 Gaillard, Guglielmo Camporese, Yuxuan Wang", "tldr": "", "abstract": "ReLU networks have remained the default choice for models in the area of image prediction despite their well-established spectral bias towards learning low frequencies faster, and consequently their difficulty of reproducing high frequency visual details. As an alternative, sin networks showed promising results in learning implicit representations of visual data. However training these networks in practically relevant settings proved to be difficult, requiring careful initialization, dealing with issues due to inconsistent gradients, and a degeneracy in local minima. In this work, we instead propose replacing a baseline network\u2019s existing activations with a novel ensemble function with trainable parameters. The proposed MetaSin activation can be trained reliably without requiring intricate initialization schemes, and results in consistently lower test loss compared to alternatives. We demonstrate our method in the areas of Monte-Carlo denoising and image resampling where we set new state-of-the-art through a knowledge distillation based training procedure. We present ablations on hyper-parameter settings, comparisons with alternative activation function formulations, and discuss the use of our method in other domains, such as image classification.", "keywords": "sin activation;image prediction;image resampling;monte-carlo denoising;knowledge distillation", "primary_area": "", "supplementary_material": "/attachment/38b161d8e305ef427a5f96395df38aa3b39b3db3.pdf", "author": "Farnood Salehi;Tunc Ozan Aydin;Andr\u00e9 Gaillard;Guglielmo Camporese;Yuxuan Wang", "authorids": "~Farnood_Salehi1;~Tunc_Ozan_Aydin1;~Andr\u00e9_Gaillard1;~Guglielmo_Camporese1;~Yuxuan_Wang11", "gender": "M;M;M;M;M", "homepage": "https://studios.disneyresearch.com/people/farnood-salehi/;http://www.tuncaydin.com;https://www.linkedin.com/in/andre-gaillard-811852183/;https://guglielmocamporese.github.io/;https://yxwang7.github.io", "dblp": "199/1945;05/4398;;263/1813;", "google_scholar": ";E0fxZeUAAAAJ;;Yu6im9cAAAAJ;", "orcid": ";;;0000-0003-1209-0851;", "linkedin": "farnood-salehi/?originalSubdomain=ch;;;guglielmocamporese/;", "or_profile": "~Farnood_Salehi1;~Tunc_Ozan_Aydin1;~Andr\u00e9_Gaillard1;~Guglielmo_Camporese1;~Yuxuan_Wang11", "aff": "Disney Research|Studios;Disney Research;Department of Computer Science, ETHZ - ETH Zurich;University of Padova;Department of Computer Science, ETHZ - ETH Zurich", "aff_domain": "disneyresearch.com;disneyresearch.com;inf.ethz.ch;unipd.it;inf.ethz.ch", "position": "Researcher;Researcher;MS student;PhD student;MS student", "bibtex": "@inproceedings{\nsalehi2023empowering,\ntitle={Empowering Convolutional Neural Nets with MetaSin Activation},\nauthor={Farnood Salehi and Tunc Ozan Aydin and Andr{\\'e} Gaillard and Guglielmo Camporese and Yuxuan Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cay8LnKSro}\n}", "github": "", "project": "", "reviewers": "pU6A;C7v1;9soJ;N8tS", "pdf_size": 18136064, "rating": "6;6;6;6", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "novelty": "3;3;2;2", "presentation": "3;4;3;3", "wc_summary": "62;105;154;40", "wc_strengths": "89;111;51;158", "wc_weaknesses": "200;174;230;283", "wc_questions": "91;99;106;62", "wc_limitations": "14;1;8;22", "wc_review": "456;490;549;565", "wc_reply_reviewers": "97;73;166;106", "wc_reply_authors": "35;53;40;87", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 90.25, 43.602608866901534 ], "wc_strengths_avg": [ 102.25, 38.68704563545787 ], "wc_weaknesses_avg": [ 221.75, 40.5362492098122 ], "wc_questions_avg": [ 89.5, 16.740669042783207 ], "wc_limitations_avg": [ 11.25, 7.725768570181222 ], "wc_review_avg": [ 515.0, 44.05110668303352 ], "wc_reply_reviewers_avg": [ 110.5, 34.23813663153998 ], "wc_reply_authors_avg": [ 53.75, 20.29008378494283 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11267371479954632380&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "disneyresearch.com;disneyresearch.com;inf.ethz.ch;unipd.it;inf.ethz.ch", "author_num": 5, "aff_unique_index": "0;0;1;2;1", "aff_unique_norm": "Disney Research;ETH Zurich;University of Padova", "aff_unique_dep": "Research;Department of Computer Science;", "aff_unique_url": "https://research.disney.com;https://www.ethz.ch;https://www.unipd.it", "aff_unique_abbr": "Disney Research;ETHZ;UNIPD", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;0;1;2;1", "aff_country_unique": "United States;Switzerland;Italy" }, { "title": "Towards Distribution-Agnostic Generalized Category Discovery", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71084", "id": "cczH4Xl7Zo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b7216f4a324864e1f592c18de4d83d10-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cczH4Xl7Zo", "openreview": "https://openreview.net/forum?id=cczH4Xl7Zo", "poster": "/media/PosterPDFs/NeurIPS%202023/71084.png?t=1697375346.1894438", "slides": "https://nips.cc/virtual/2023/poster/71084", "video": "https://nips.cc/virtual/2023/poster/71084", "author_site": "Jianhong Bai, Zuozhu Liu, Hualiang Wang, Ruizhe Chen, Lianrui Mu, Xiaomeng Li, Joey Tianyi Zhou, YANG FENG, Jian Wu, Haoji Hu", "tldr": "", "abstract": "Data imbalance and open-ended distribution are two intrinsic characteristics of the real visual world. Though encouraging progress has been made in tackling each challenge separately, few works dedicated to combining them towards real-world scenarios. While several previous works have focused on classifying close-set samples and detecting open-set samples during testing, it's still essential to be able to classify unknown subjects as human beings. In this paper, we formally define a more realistic task as distribution-agnostic generalized category discovery (DA-GCD): generating fine-grained predictions for both close- and open-set classes in a long-tailed open-world setting. To tackle the challenging problem, we propose a Self-**Ba**lanced **Co**-Advice co**n**trastive framework (BaCon), which consists of a contrastive-learning branch and a pseudo-labeling branch, working collaboratively to provide interactive supervision to resolve the DA-GCD task. In particular, the contrastive-learning branch provides reliable distribution estimation to regularize the predictions of the pseudo-labeling branch, which in turn guides contrastive learning through self-balanced knowledge transfer and a proposed novel contrastive loss. We compare BaCon with state-of-the-art methods from two closely related fields: imbalanced semi-supervised learning and generalized category discovery. The effectiveness of BaCon is demonstrated with superior performance over all baselines and comprehensive analysis across various datasets. Our code is publicly available.", "keywords": "Generalized Category Discovery;Open-world Recognition;Long-tail Learning;Contrastive Learning", "primary_area": "", "supplementary_material": "/attachment/2e119ed82f52dd480c2f74ec617d00fbd613886c.pdf", "author": "Jianhong Bai;Zuozhu Liu;Hualiang Wang;Ruizhe Chen;Lianrui Mu;Xiaomeng Li;Joey Tianyi Zhou;YANG FENG;Jian Wu;Haoji Hu", "authorids": "~Jianhong_Bai2;~Zuozhu_Liu1;~Hualiang_Wang1;~Ruizhe_Chen1;~Lianrui_Mu1;~Xiaomeng_Li1;~Joey_Tianyi_Zhou1;~YANG_FENG6;~Jian_Wu6;~Haoji_Hu1", "gender": "M;M;M;M;M;F;M;M;M;M", "homepage": "https://jianhongbai.github.io/;https://person.zju.edu.cn/en/lzz;https://github.com/SiLangWHL;https://www.linkedin.com/in/ruizhe-chen-015887275/;https://mu437.github.io/;https://xmengli.github.io/;https://joeyzhouty.github.io/;;https://scholar.google.com/citations?hl=zh-TW&user=VO9XIXYAAAAJ;https://person.zju.edu.cn/huhaoji", "dblp": "349/0391;173/9297;;286/3608;;02/9850-1;123/5110;;96/2744-1;65/11145", "google_scholar": "U926UgYAAAAJ;h602wLIAAAAJ;4lzd8NsAAAAJ;Wr2K2sMAAAAJ;https://scholar.google.com/citations?hl=en;uVTzPpoAAAAJ;https://scholar.google.com.sg/citations?user=cYNqDokAAAAJ;;https://scholar.google.com/citations?hl=zh-TW;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-3121-7259;0000-0002-7816-502X;0009-0006-0157-8885;0000-0003-2302-6775;;;0000-0002-4675-7055;;;0000-0001-6048-6549", "linkedin": ";;;;;;;https://www.linkedin.cn/incareer/in/%E6%B4%8B-%E5%86%AF-797451b0;;", "or_profile": "~Jianhong_Bai2;~Zuozhu_Liu1;~Hualiang_Wang1;~Ruizhe_Chen1;~Lianrui_Mu1;~Xiaomeng_Li1;~Joey_Tianyi_Zhou1;~YANG_FENG6;~Jian_Wu6;~Haoji_Hu1", "aff": "Zhejiang University;Zhejiang University;Hong Kong University of Science and Technology;Zhejiang University;Zhejiang University;Hong Kong University of Science and Technology;A*STAR Centre for Frontier AI Research;;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;ust.hk;zju.edu.cn;zju.edu.cn;ust.hk;cfar.a-star.edu.sg;;zju.edu.cn;zju.edu.cn", "position": "PhD student;Assistant Professor;PhD student;PhD student;PhD student;Assistant Professor;Principal Researcher;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nbai2023towards,\ntitle={Towards Distribution-Agnostic Generalized Category Discovery},\nauthor={Jianhong Bai and Zuozhu Liu and Hualiang Wang and Ruizhe Chen and Lianrui Mu and Xiaomeng Li and Joey Tianyi Zhou and YANG FENG and Jian Wu and Haoji Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cczH4Xl7Zo}\n}", "github": "", "project": "", "reviewers": "udGd;kmyS;7Jpa;B9Lc;JDMB", "pdf_size": 1673653, "rating": "4;5;6;6;7", "confidence": "3;4;3;3;4", "soundness": "2;2;2;3;3", "novelty": "2;2;2;3;3", "presentation": "2;2;2;2;4", "wc_summary": "89;115;137;69;94", "wc_strengths": "37;81;85;33;94", "wc_weaknesses": "82;133;425;12;272", "wc_questions": "19;2;67;146;390", "wc_limitations": "1;10;17;49;9", "wc_review": "228;341;731;309;859", "wc_reply_reviewers": "0;29;376;16;780", "wc_reply_authors": "112;34;955;42;1498", "reply_reviewers": "0;1;2;1;3", "reply_authors": "2;2;4;2;5", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 100.8, 23.27573844156185 ], "wc_strengths_avg": [ 66.0, 25.69046515733026 ], "wc_weaknesses_avg": [ 184.8, 147.24863327039745 ], "wc_questions_avg": [ 124.8, 141.70024700049044 ], "wc_limitations_avg": [ 17.2, 16.690116836020053 ], "wc_review_avg": [ 493.6, 252.10442280927955 ], "wc_reply_reviewers_avg": [ 240.2, 304.1028773293669 ], "wc_reply_authors_avg": [ 528.2, 596.0732840851031 ], "reply_reviewers_avg": [ 1.4, 1.019803902718557 ], "reply_authors_avg": [ 3.0, 1.2649110640673518 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.3202563076101743, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9295522417483964315&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "zju.edu.cn;zju.edu.cn;ust.hk;zju.edu.cn;zju.edu.cn;ust.hk;cfar.a-star.edu.sg;;zju.edu.cn;zju.edu.cn", "author_num": 10, "aff_unique_index": "0;0;1;0;0;1;2;0;0", "aff_unique_norm": "Zhejiang University;Hong Kong University of Science and Technology;A*STAR", "aff_unique_dep": ";;Centre for Frontier AI Research", "aff_unique_url": "https://www.zju.edu.cn;https://www.ust.hk;https://www.a-star.edu.sg", "aff_unique_abbr": "ZJU;HKUST;A*STAR", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;1;0;0", "aff_country_unique": "China;Singapore" }, { "title": "DeepACO: Neural-enhanced Ant Systems for Combinatorial Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71083", "id": "cd5D1DD923", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/883105b282fe15275991b411e6b200c5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cd5D1DD923", "openreview": "https://openreview.net/forum?id=cd5D1DD923", "poster": "/media/PosterPDFs/NeurIPS%202023/71083.png?t=1701503191.5233035", "slides": "https://nips.cc/virtual/2023/poster/71083", "video": "https://nips.cc/virtual/2023/poster/71083", "author_site": "Haoran Ye, Jiarui Wang, Zhiguang Cao, Helan Liang, Yong Li", "tldr": "", "abstract": "Ant Colony Optimization (ACO) is a meta-heuristic algorithm that has been successfully applied to various Combinatorial Optimization Problems (COPs). Traditionally, customizing ACO for a specific problem requires the expert design of knowledge-driven heuristics. In this paper, we propose DeepACO, a generic framework that leverages deep reinforcement learning to automate heuristic designs. DeepACO serves to strengthen the heuristic measures of existing ACO algorithms and dispense with laborious manual design in future ACO applications. As a neural-enhanced meta-heuristic, DeepACO consistently outperforms its ACO counterparts on eight COPs using a single neural model and a single set of hyperparameters. As a Neural Combinatorial Optimization method, DeepACO performs better than or on par with problem-specific methods on canonical routing problems. Our code is publicly available at https://github.com/henry-yeh/DeepACO.", "keywords": "Neural Combinatorial Optimization;Ant Colony Optimization;Evolutionary algorithm;Meta-heuristic;Deep reinforcement learning;Learned heuristic measure;Neural local search;Generalization", "primary_area": "", "supplementary_material": "/attachment/86f1eccc63a66859592ec6465ddbe3d04d73f16a.zip", "author": "Haoran Ye;Jiarui Wang;Zhiguang Cao;Helan Liang;Yong Li", "authorids": "~Haoran_Ye1;~Jiarui_Wang3;~Zhiguang_Cao1;~Helan_Liang1;~Yong_Li7", "gender": "M;M;M;F;M", "homepage": "https://yehaoran.info;;https://zhiguangcaosg.github.io/;http://scst.suda.edu.cn/_s289/a8/66/c29255a501862/page.psp;http://fi.ee.tsinghua.edu.cn/~liyong/", "dblp": "237/9631;178/5014-2;178/8621;135/5022;", "google_scholar": "https://scholar.google.com.hk/citations?view_op=list_works;;https://scholar.google.com.sg/citations?user=2R-cOkYAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-8510-3716;0000-0002-2138-6016;0000-0002-4499-759X;0000-0001-9176-8944;", "linkedin": ";;;;", "or_profile": "~Haoran_Ye1;~Jiarui_Wang3;~Zhiguang_Cao1;~Helan_Liang1;~Yong_Li7", "aff": "Suzhou University;Soochow University;Institute for Infocomm Research, A*STAR;Suzhou University;Tsinghua University", "aff_domain": "suda.edu.cn;suda.edu.cn;i2r.a-star.edu.sg;suda.edu.cn;tsinghua.edu.cn", "position": "Undergrad student;Undergrad student;Scientist ;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nye2023deepaco,\ntitle={Deep{ACO}: Neural-enhanced Ant Systems for Combinatorial Optimization},\nauthor={Haoran Ye and Jiarui Wang and Zhiguang Cao and Helan Liang and Yong Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cd5D1DD923}\n}", "github": "", "project": "", "reviewers": "BitH;d7BW;YjbH;hKqX", "pdf_size": 1112532, "rating": "5;6;6;6", "confidence": "4;4;3;4", "soundness": "3;4;3;4", "novelty": "3;3;2;2", "presentation": "3;4;3;4", "wc_summary": "52;133;138;96", "wc_strengths": "40;56;110;82", "wc_weaknesses": "83;111;199;64", "wc_questions": "7;115;75;18", "wc_limitations": "1;1;1;6", "wc_review": "183;416;523;266", "wc_reply_reviewers": "18;20;17;21", "wc_reply_authors": "9;9;9;9", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 104.75, 34.50633999716574 ], "wc_strengths_avg": [ 72.0, 26.570660511172846 ], "wc_weaknesses_avg": [ 114.25, 51.70771219073611 ], "wc_questions_avg": [ 53.75, 43.77998972133273 ], "wc_limitations_avg": [ 2.25, 2.165063509461097 ], "wc_review_avg": [ 347.0, 131.52376211164278 ], "wc_reply_reviewers_avg": [ 19.0, 1.5811388300841898 ], "wc_reply_authors_avg": [ 9.0, 0.0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17538047837922530113&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "suda.edu.cn;suda.edu.cn;i2r.a-star.edu.sg;suda.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Suzhou University;Soochow University;Institute for Infocomm Research;Tsinghua University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.suda.edu.cn;https://www.soochow.edu.cn;https://www.i2r.a-star.edu.sg;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Suda;Soochow U;I2R;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Learning non-Markovian Decision-Making from State-only Sequences", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71082", "id": "cdlmsnQkZ9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/154926e0b66e2b2a8c1120852f31a12d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cdlmsnQkZ9", "openreview": "https://openreview.net/forum?id=cdlmsnQkZ9", "poster": "/media/PosterPDFs/NeurIPS%202023/71082.png?t=1697371437.6102498", "slides": "https://nips.cc/virtual/2023/poster/71082", "video": "https://nips.cc/virtual/2023/poster/71082", "author_site": "Aoyang Qin, Feng Gao, Qing Li, Song-Chun Zhu, Sirui Xie", "tldr": "", "abstract": "Conventional imitation learning assumes access to the actions of demonstrators, but these motor signals are often non-observable in naturalistic settings. Additionally, sequential decision-making behaviors in these settings can deviate from the assumptions of a standard Markov Decision Process (MDP). To address these challenges, we explore deep generative modeling of state-only sequences with non-Markov Decision Process (nMDP), where the policy is an energy-based prior in the latent space of the state transition generator. We develop maximum likelihood estimation to achieve model-based imitation, which involves short-run MCMC sampling from the prior and importance sampling for the posterior. The learned model enables $\\textit{decision-making as inference}$: model-free policy execution is equivalent to prior sampling, model-based planning is posterior sampling initialized from the policy. We demonstrate the efficacy of the proposed method in a prototypical path planning task with non-Markovian constraints and show that the learned model exhibits strong performances in challenging domains from the MuJoCo suite.", "keywords": "Sequential Decision Making;Generative Model;Imitation Learning", "primary_area": "", "supplementary_material": "/attachment/d513233a568bf8a2193eaaf09b8073323318fc85.pdf", "author": "Aoyang Qin;Feng Gao;Qing Li;Song-Chun Zhu;Sirui Xie", "authorids": "~Aoyang_Qin1;~Feng_Gao2;~Qing_Li1;~Song-Chun_Zhu1;~Sirui_Xie1", "gender": "M;M;M;M;M", "homepage": "https://github.com/qayqaq;https://fen9.github.io/;http://liqing-ustc.github.io/;https://zhusongchun.net/;https://www.siruixie.com", "dblp": ";10/2674-13;181/2689-3;10/10313;232/3072", "google_scholar": ";amaLnocAAAAJ;iwdFZBEAAAAJ;https://scholar.google.com.tw/citations?user=Al8dyb4AAAAJ;9GJn5FIAAAAJ", "orcid": ";0000-0003-1515-1357;;;", "linkedin": ";;;;", "or_profile": "~Aoyang_Qin1;~Feng_Gao2;~Qing_Li1;~Song-Chun_Zhu1;~Sirui_Xie1", "aff": "Tsinghua University;Amazon;Beijing Institute for General Artificial Intelligence (BIGAI);Peking University;University of California, Los Angeles", "aff_domain": "tsinghua.edu.cn;amazon.com;bigai.ai;pku.edu.cn;ucla.edu", "position": "PhD student;Researcher;Researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\nqin2023learning,\ntitle={Learning non-Markovian Decision-Making from State-only Sequences},\nauthor={Aoyang Qin and Feng Gao and Qing Li and Song-Chun Zhu and Sirui Xie},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cdlmsnQkZ9}\n}", "github": "", "project": "", "reviewers": "tbAW;W5w2;LurP;wvit;N9u8", "pdf_size": 1968489, "rating": "3;6;6;7;7", "confidence": "4;5;2;4;2", "soundness": "3;3;3;4;3", "novelty": "2;2;3;3;3", "presentation": "1;3;2;3;3", "wc_summary": "61;279;111;89;99", "wc_strengths": "35;83;69;55;48", "wc_weaknesses": "420;468;237;84;74", "wc_questions": "183;253;78;8;3", "wc_limitations": "1;33;11;2;4", "wc_review": "700;1116;506;238;228", "wc_reply_reviewers": "1536;0;54;0;0", "wc_reply_authors": "1592;0;4;0;0", "reply_reviewers": "5;0;1;0;0", "reply_authors": "4;1;2;1;1", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 3.4, 1.2000000000000002 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 127.8, 77.3832023116128 ], "wc_strengths_avg": [ 58.0, 16.637307474468336 ], "wc_weaknesses_avg": [ 256.6, 164.26028126117404 ], "wc_questions_avg": [ 105.0, 98.5190336940025 ], "wc_limitations_avg": [ 10.2, 11.923086848631105 ], "wc_review_avg": [ 557.6, 330.31233703874886 ], "wc_reply_reviewers_avg": [ 318.0, 609.3590074824529 ], "wc_reply_authors_avg": [ 319.2, 636.4018856037433 ], "reply_reviewers_avg": [ 1.2, 1.9390719429665317 ], "reply_authors_avg": [ 1.8, 1.1661903789690602 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2948459875572344, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9370535421787136476&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;amazon.com;bigai.ai;pku.edu.cn;ucla.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Tsinghua University;Amazon;Beijing Institute for General Artificial Intelligence;Peking University;University of California, Los Angeles", "aff_unique_dep": ";Amazon.com, Inc.;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.amazon.com;http://www.bigmodel.cn/;http://www.pku.edu.cn;https://www.ucla.edu", "aff_unique_abbr": "THU;Amazon;BIGAI;Peking U;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Semi-Supervised Domain Generalization with Known and Unknown Classes", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71081", "id": "ce59j806df", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5b84864ff8474fd742c66f219b2eaac1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ce59j806df", "openreview": "https://openreview.net/forum?id=ce59j806df", "poster": "/media/PosterPDFs/NeurIPS%202023/71081.png?t=1698653842.1257799", "slides": "https://nips.cc/virtual/2023/poster/71081", "video": "https://nips.cc/virtual/2023/poster/71081", "author_site": "Lei Zhang, Ji-Fu Li, Wei Wang", "tldr": "", "abstract": "Semi-Supervised Domain Generalization (SSDG) aims to learn a model that is generalizable to an unseen target domain with only a few labels, and most existing SSDG methods assume that unlabeled training and testing samples are all known classes. However, a more realistic scenario is that known classes may be mixed with some unknown classes in unlabeled training and testing data. To deal with such a scenario, we propose the Class-Wise Adaptive Exploration and Exploitation (CWAEE) method. In particular, we explore unlabeled training data by using one-vs-rest classifiers and class-wise adaptive thresholds to detect known and unknown classes, and exploit them by adopting consistency regularization on augmented samples based on Fourier Transformation to improve the unseen domain generalization. The experiments conducted on real-world datasets verify the effectiveness and superiority of our method.", "keywords": "Domain Generalization;Semi-Supervised Learning;Out-of-Distribution Detection;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/bb7cdcbcecfcab0fd8f13e45acdcba6f224b8f91.pdf", "author": "Lei Zhang;Ji-Fu Li;Wei Wang", "authorids": "~Lei_Zhang55;~Ji-Fu_Li1;~Wei_Wang10", "gender": ";;", "homepage": ";;", "dblp": ";;", "google_scholar": ";;", "orcid": "0000-0002-8089-6796;0000-0002-0920-0231;", "linkedin": ";;", "or_profile": "~Lei_Zhang55;~Ji-Fu_Li1;~Wei_Wang10", "aff": "Nanjing university;Nanjing University;", "aff_domain": "nju.edu.cn;nju.edu.cn;", "position": "MS student;MS student;", "bibtex": "@inproceedings{\nzhang2023semisupervised,\ntitle={Semi-Supervised Domain Generalization with Known and Unknown Classes},\nauthor={Lei Zhang and Ji-Fu Li and Wei Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ce59j806df}\n}", "github": "", "project": "", "reviewers": "kd1R;8khn;enwD;mXoF", "pdf_size": 770193, "rating": "5;6;7;7", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "45;112;50;48", "wc_strengths": "78;7;58;97", "wc_weaknesses": "124;81;25;23", "wc_questions": "4;23;29;75", "wc_limitations": "4;93;1;4", "wc_review": "255;316;163;247", "wc_reply_reviewers": "30;56;27;0", "wc_reply_authors": "73;28;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 63.75, 27.913930214142187 ], "wc_strengths_avg": [ 60.0, 33.56337289367682 ], "wc_weaknesses_avg": [ 63.25, 42.097357399247755 ], "wc_questions_avg": [ 32.75, 26.080404521402652 ], "wc_limitations_avg": [ 25.5, 38.9903834297638 ], "wc_review_avg": [ 245.25, 54.47189642375231 ], "wc_reply_reviewers_avg": [ 28.25, 19.828956099603428 ], "wc_reply_authors_avg": [ 25.25, 29.844388082183894 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4105287039047800647&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nju.edu.cn;nju.edu.cn;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Max-Sliced Mutual Information", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71080", "id": "ce9B2x3zQa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fe4da14f07561a232782820d30ea22f3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ce9B2x3zQa", "openreview": "https://openreview.net/forum?id=ce9B2x3zQa", "poster": "/media/PosterPDFs/NeurIPS%202023/71080.png?t=1701422561.9817293", "slides": "https://nips.cc/virtual/2023/poster/71080", "video": "https://nips.cc/virtual/2023/poster/71080", "author_site": "Dor Tsur, Ziv Goldfeld, Kristjan Greenewald", "tldr": "", "abstract": "Quantifying dependence between high-dimensional random variables is central to statistical learning and inference. Two classical methods are canonical correlation analysis (CCA), which identifies maximally correlated projected versions of the original variables, and Shannon's mutual information, which is a universal dependence measure that also captures high-order dependencies. However, CCA only accounts for linear dependence, which may be insufficient for certain applications, while mutual information is often infeasible to compute/estimate in high dimensions. This work proposes a middle ground in the form of a scalable information-theoretic generalization of CCA, termed max-sliced mutual information (mSMI). mSMI equals the maximal mutual information between low-dimensional projections of the high-dimensional variables, which reduces back to CCA in the Gaussian case. It enjoys the best of both worlds: capturing intricate dependencies in the data while being amenable to fast computation and scalable estimation from samples. We show that mSMI retains favorable structural properties of Shannon's mutual information, like variational forms and identification of independence. We then study statistical estimation of mSMI, propose an efficiently computable neural estimator, and couple it with formal non-asymptotic error bounds. We present experiments that demonstrate the utility of mSMI for several tasks, encompassing independence testing, multi-view representation learning, algorithmic fairness, and generative modeling. We observe that mSMI consistently outperforms competing methods with little-to-no computational overhead.", "keywords": "CCA;dimensionality reduction;information theory;mutual information;neural estimation;slicing", "primary_area": "", "supplementary_material": "/attachment/73f06750e31cad50a3693e4f70ae296a0050bf5a.pdf", "author": "Dor Tsur;Ziv Goldfeld;Kristjan Greenewald", "authorids": "~Dor_Tsur1;~Ziv_Goldfeld1;~Kristjan_Greenewald1", "gender": "M;M;", "homepage": ";http://people.ece.cornell.edu/zivg/;https://researcher.watson.ibm.com/researcher/view.php?person=ibm-Kristjan.H.Greenewald", "dblp": "260/0302;119/3922;146/0563", "google_scholar": "q4VAHZQAAAAJ;YKRiYRAAAAAJ;L3zNUG4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Dor_Tsur1;~Ziv_Goldfeld1;~Kristjan_Greenewald1", "aff": "Ben-Gurion University of the Negev;Cornell University;MIT-IBM Watson AI Lab, IBM Research", "aff_domain": "bgu.ac.il;cornell.edu;ibm.com", "position": "PhD student;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\ntsur2023maxsliced,\ntitle={Max-Sliced Mutual Information},\nauthor={Dor Tsur and Ziv Goldfeld and Kristjan Greenewald},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ce9B2x3zQa}\n}", "github": "", "project": "", "reviewers": "rZV3;YCWE;oZ2r;WmTq", "pdf_size": 1349502, "rating": "6;7;7;8", "confidence": "3;3;3;4", "soundness": "3;3;4;4", "novelty": "3;3;3;3", "presentation": "3;3;4;4", "wc_summary": "162;64;215;78", "wc_strengths": "81;29;56;61", "wc_weaknesses": "60;62;87;18", "wc_questions": "13;81;76;16", "wc_limitations": "2;66;11;74", "wc_review": "318;302;445;247", "wc_reply_reviewers": "121;19;6;12", "wc_reply_authors": "615;9;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "5;2;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 129.75, 61.8642667458364 ], "wc_strengths_avg": [ 56.75, 18.552290963651902 ], "wc_weaknesses_avg": [ 56.75, 24.772716847370617 ], "wc_questions_avg": [ 46.5, 32.06633748964793 ], "wc_limitations_avg": [ 38.25, 32.03416145304884 ], "wc_review_avg": [ 328.0, 72.50172411743047 ], "wc_reply_reviewers_avg": [ 39.5, 47.2784306000104 ], "wc_reply_authors_avg": [ 156.0, 265.02924366944865 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9939506791278767622&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "bgu.ac.il;cornell.edu;ibm.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Ben-Gurion University of the Negev;Cornell University;IBM", "aff_unique_dep": ";;AI Lab", "aff_unique_url": "https://www.bgu.ac.il;https://www.cornell.edu;https://www.ibmwatsonai.org/", "aff_unique_abbr": "BGU;Cornell;MIT-IBM AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Israel;United States" }, { "title": "IBA: Towards Irreversible Backdoor Attacks in Federated Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71079", "id": "cemEOP8YoC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d0c6bc641a56bebee9d985b937307367-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cemEOP8YoC", "openreview": "https://openreview.net/forum?id=cemEOP8YoC", "poster": "/media/PosterPDFs/NeurIPS%202023/71079.png?t=1701832184.3463557", "slides": "https://nips.cc/virtual/2023/poster/71079", "video": "https://nips.cc/virtual/2023/poster/71079", "author_site": "Thuy Dung Nguyen, Tuan Nguyen, Anh Tran, Khoa D Doan, Kok-Seng Wong", "tldr": "", "abstract": "Federated learning (FL) is a distributed learning approach that enables machine learning models to be trained on decentralized data without compromising end devices' personal, potentially sensitive data. However, the distributed nature and uninvestigated data intuitively introduce new security vulnerabilities, including backdoor attacks. In this scenario, an adversary implants backdoor functionality into the global model during training, which can be activated to cause the desired misbehaviors for any input with a specific adversarial pattern. Despite having remarkable success in triggering and distorting model behavior, prior backdoor attacks in FL often hold impractical assumptions, limited imperceptibility, and durability. Specifically, the adversary needs to control a sufficiently large fraction of clients or know the data distribution of other honest clients. In many cases, the trigger inserted is often visually apparent, and the backdoor effect is quickly diluted if the adversary is removed from the training process. To address these limitations, we propose a novel backdoor attack framework in FL, the Irreversible Backdoor Attack (IBA), that jointly learns the optimal and visually stealthy trigger and then gradually implants the backdoor into a global model. This approach allows the adversary to execute a backdoor attack that can evade both human and machine inspections. Additionally, we enhance the efficiency and durability of the proposed attack by selectively poisoning the model's parameters that are least likely updated by the main task's learning process and constraining the poisoned model update to the vicinity of the global model. Finally, we evaluate the proposed attack framework on several benchmark datasets, including MNIST, CIFAR-10, and Tiny ImageNet, and achieved high success rates while simultaneously bypassing existing backdoor defenses and achieving a more durable backdoor effect compared to other backdoor attacks. Overall, IBA offers a more effective, stealthy, and durable approach to backdoor attacks in FL. The code associated with this paper is available on [GitHub](https://github.com/sail-research/iba).", "keywords": "Backdoor Attacks;Federated Learning;Durability;Imperceptibility;Stealthiness", "primary_area": "", "supplementary_material": "/attachment/2301d3e0dc5bf53a1267db6a09f8fb64101ce8fc.pdf", "author": "Dung Thuy Nguyen;Tuan Minh Nguyen;Anh Tuan Tran;Khoa D Doan;KOK SENG WONG", "authorids": "~Dung_Thuy_Nguyen1;~Tuan_Minh_Nguyen1;~Anh_Tuan_Tran2;~Khoa_D_Doan1;~KOK_SENG_WONG2", "gender": "F;M;M;M;", "homepage": ";https://mtuann.github.io/;https://sites.google.com/site/anhttranusc/;https://sail-research.com/;https://khoadoan.me", "dblp": "36/6174;;150/5269-1;29/11439;238/4276.html", "google_scholar": "p4Xbn8QAAAAJ;_-nQHtcAAAAJ;FYZ5ODQAAAAJ;https://scholar.google.co.kr/citations?user=WQyULhIAAAAJ;Zz2hMgcAAAAJ", "orcid": ";;0000-0002-3120-4036;0000-0002-2029-7644;", "linkedin": "dungnt110/;;https://linkedin.com/in/anh-tran-97814b19;kok-seng-wong-058217204/;", "or_profile": "~Dung_Thuy_Nguyen1;~Tuan_Minh_Nguyen1;~Anh_Tuan_Tran2;~KOK_SENG_WONG2;~Khoa_Doan1", "aff": "VinUniversity;VinUniversity;VinAI Research;VinUniversity;VinUniversity", "aff_domain": "vinui.edu;vinuni.edu.vn;vinai.io;vinuni.edu.vn;vinuni.edu.vn", "position": "Researcher;PhD student;Research Scientist;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2023iba,\ntitle={{IBA}: Towards Irreversible Backdoor Attacks in Federated Learning},\nauthor={Dung Thuy Nguyen and Tuan Minh Nguyen and Anh Tuan Tran and Khoa D Doan and KOK SENG WONG},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cemEOP8YoC}\n}", "github": "", "project": "", "reviewers": "Avr8;yF1b;S7Me;gShs", "pdf_size": 9848976, "rating": "4;4;6;7", "confidence": "5;4;4;3", "soundness": "2;2;3;4", "novelty": "1;2;2;4", "presentation": "2;3;2;4", "wc_summary": "75;120;57;50", "wc_strengths": "16;25;42;68", "wc_weaknesses": "579;167;122;32", "wc_questions": "7;8;233;30", "wc_limitations": "26;17;27;13", "wc_review": "703;337;481;193", "wc_reply_reviewers": "46;0;86;0", "wc_reply_authors": "76;0;255;0", "reply_reviewers": "1;0;2;0", "reply_authors": "2;1;3;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 1.0897247358851685 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 75.5, 27.26261176043117 ], "wc_strengths_avg": [ 37.75, 19.803724397193573 ], "wc_weaknesses_avg": [ 225.0, 210.08212679806914 ], "wc_questions_avg": [ 69.5, 94.84329180284708 ], "wc_limitations_avg": [ 20.75, 5.931905258852336 ], "wc_review_avg": [ 428.5, 188.37396316901123 ], "wc_reply_reviewers_avg": [ 33.0, 35.90264614203248 ], "wc_reply_authors_avg": [ 82.75, 104.17623289407234 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8837517510293784743&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "vinui.edu;vinuni.edu.vn;vinai.io;vinuni.edu.vn;vinuni.edu.vn", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "VinUniversity;VinAI Research", "aff_unique_dep": ";", "aff_unique_url": "https://vinuni.edu.vn;https://www.vinai.io/", "aff_unique_abbr": "VinUni;VinAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Vietnam" }, { "title": "On Separate Normalization in Self-supervised Transformers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71078", "id": "cezKbXsT3V", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8ba80c47b9d3dced79ee835b7d3bf72a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cezKbXsT3V", "openreview": "https://openreview.net/forum?id=cezKbXsT3V", "poster": "/media/PosterPDFs/NeurIPS%202023/71078.png?t=1697472206.995039", "slides": "https://nips.cc/virtual/2023/poster/71078", "video": "https://nips.cc/virtual/2023/poster/71078", "author_site": "Xiaohui Chen, Yinkai Wang, Yuanqi Du, Soha Hassoun, Liping Liu", "tldr": "", "abstract": "Self-supervised training methods for transformers have demonstrated remarkable performance across various domains. Previous transformer-based models, such as masked autoencoders (MAE), typically utilize a single normalization layer for both the [CLS] symbol and the tokens. We propose in this paper a simple modification that employs separate normalization layers for the tokens and the [CLS] symbol to better capture their distinct characteristics and enhance downstream task performance. Our method aims to alleviate the potential negative effects of using the same normalization statistics for both token types, which may not be optimally aligned with their individual roles. We empirically show that by utilizing a separate normalization layer, the [CLS] embeddings can better encode the global contextual information and are distributed more uniformly in its anisotropic space. When replacing the conventional normalization layer with the two separate layers, we observe an average 2.7% performance improvement over the image, natural language, and graph domains.", "keywords": "Transformer;Self-supervised Learning;Normalization", "primary_area": "", "supplementary_material": "", "author": "Xiaohui Chen;Yinkai Wang;Yuanqi Du;Soha Hassoun;Liping Liu", "authorids": "~Xiaohui_Chen2;~Yinkai_Wang1;~Yuanqi_Du1;~Soha_Hassoun1;~Liping_Liu1", "gender": "M;M;M;F;", "homepage": "https://github.com/Xiaohui9607;https://yinkaiw.github.io;https://yuanqidu.github.io/;http://www.cs.tufts.edu/~soha/;https://www.eecs.tufts.edu/~liulp/", "dblp": ";308/6333;266/2837;82/450;47/5615-1", "google_scholar": "_7m3QTsAAAAJ;PfRyo6EAAAAJ;fAc_zZMAAAAJ;https://scholar.google.com.tw/citations?user=tR5MNfkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0001-9477-2199;0000-0002-3690-3928", "linkedin": "xiaohui-chen-464a0816b/;yinkai-wang;;sohahassoun/;", "or_profile": "~Xiaohui_Chen2;~Yinkai_Wang1;~Yuanqi_Du1;~Soha_Hassoun1;~Liping_Liu1", "aff": "Tufts University;Tufts University;Cornell University;Tufts University;Tufts University", "aff_domain": "tufts.edu;tufts.edu;cornell.edu;tufts.edu;tufts.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2023on,\ntitle={On Separate Normalization in Self-supervised Transformers},\nauthor={Xiaohui Chen and Yinkai Wang and Yuanqi Du and Soha Hassoun and Liping Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cezKbXsT3V}\n}", "github": "", "project": "", "reviewers": "QRJ8;vuG3;4DDo;hvnd;voLG", "pdf_size": 1495444, "rating": "4;6;7;7;7", "confidence": "4;4;4;4;4", "soundness": "2;3;4;3;4", "novelty": "2;2;3;3;4", "presentation": "2;3;3;3;4", "wc_summary": "58;46;119;39;105", "wc_strengths": "26;40;115;54;49", "wc_weaknesses": "166;20;113;9;8", "wc_questions": "22;49;87;9;1", "wc_limitations": "39;7;35;12;4", "wc_review": "311;162;469;123;167", "wc_reply_reviewers": "33;511;50;48;1", "wc_reply_authors": "58;749;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;1;1;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 73.4, 32.401234544381175 ], "wc_strengths_avg": [ 56.8, 30.616335509005648 ], "wc_weaknesses_avg": [ 63.2, 64.65106340966095 ], "wc_questions_avg": [ 33.6, 31.27682848371938 ], "wc_limitations_avg": [ 19.4, 14.65059725744995 ], "wc_review_avg": [ 246.4, 128.35824866365232 ], "wc_reply_reviewers_avg": [ 128.6, 192.00270831423185 ], "wc_reply_authors_avg": [ 161.4, 294.6574960865581 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1432756061526336978&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "tufts.edu;tufts.edu;cornell.edu;tufts.edu;tufts.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Tufts University;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tufts.edu;https://www.cornell.edu", "aff_unique_abbr": "Tufts;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Fine-Grained Cross-View Geo-Localization Using a Correlation-Aware Homography Estimator", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71077", "id": "cgiP4cMBP9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/112d8e0c7563de6e3408b49a09b4d8a3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cgiP4cMBP9", "openreview": "https://openreview.net/forum?id=cgiP4cMBP9", "poster": "/media/PosterPDFs/NeurIPS%202023/71077.png?t=1697687923.1821404", "slides": "https://nips.cc/virtual/2023/poster/71077", "video": "https://nips.cc/virtual/2023/poster/71077", "author_site": "Xiaolong Wang, Runsen Xu, Zhuofan Cui, Zeyu Wan, Yu Zhang", "tldr": "", "abstract": "In this paper, we introduce a novel approach to fine-grained cross-view geo-localization. Our method aligns a warped ground image with a corresponding GPS-tagged satellite image covering the same area using homography estimation. We first employ a differentiable spherical transform, adhering to geometric principles, to accurately align the perspective of the ground image with the satellite map. This transformation effectively places ground and aerial images in the same view and on the same plane, reducing the task to an image alignment problem. To address challenges such as occlusion, small overlapping range, and seasonal variations, we propose a robust correlation-aware homography estimator to align similar parts of the transformed ground image with the satellite image. Our method achieves sub-pixel resolution and meter-level GPS accuracy by mapping the center point of the transformed ground image to the satellite image using a homography matrix and determining the orientation of the ground camera using a point above the central axis. Operating at a speed of 30 FPS, our method outperforms state-of-the-art techniques, reducing the mean metric localization error by 21.3\\% and 32.4\\% in same-area and cross-area generalization tasks on the VIGOR benchmark, respectively, and by 34.4\\% on the KITTI benchmark in same-area evaluation.", "keywords": "Fine-Grained Cross-View Geo-Localization;Homography Estimation", "primary_area": "", "supplementary_material": "/attachment/103087fe471441cfdd474ee6d8165fb8db3c0c04.pdf", "author": "Xiaolong Wang;Runsen Xu;Zhuofan Cui;Zeyu Wan;Yu Zhang", "authorids": "~Xiaolong_Wang13;~Runsen_Xu1;~Zhuofan_Cui1;~Zeyu_Wan1;~Yu_Zhang31", "gender": "M;M;F;M;M", "homepage": "https://guanfang12.github.io;;https://github.com/zfcui33;;https://person.zju.edu.cn/zhangyu", "dblp": "91/952-13;289/6916;;;50/671-18", "google_scholar": ";MOobrCcAAAAJ;;;", "orcid": ";;;;0000-0002-0043-4904", "linkedin": ";runsen-xu-4262a3272/;;;", "or_profile": "~Xiaolong_Wang13;~Runsen_Xu1;~Zhuofan_Cui1;~Zeyu_Wan1;~Yu_Zhang31", "aff": "Zhejiang University;The Chinese University of Hong Kong;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;ie.cuhk.edu;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "MS student;PhD student;MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nwang2023finegrained,\ntitle={Fine-Grained Cross-View Geo-Localization Using a Correlation-Aware Homography Estimator},\nauthor={Xiaolong Wang and Runsen Xu and Zhuofan Cui and Zeyu Wan and Yu Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cgiP4cMBP9}\n}", "github": "", "project": "", "reviewers": "NjYP;ZVTo;NnZj;Ak83", "pdf_size": 8025941, "rating": "3;3;5;6", "confidence": "5;4;5;5", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "1;2;3;3", "wc_summary": "89;69;83;70", "wc_strengths": "20;18;99;31", "wc_weaknesses": "840;324;183;74", "wc_questions": "7;24;6;13", "wc_limitations": "23;1;7;72", "wc_review": "979;436;378;260", "wc_reply_reviewers": "684;104;16;27", "wc_reply_authors": "1475;809;54;49", "reply_reviewers": "2;1;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 77.75, 8.525696452489967 ], "wc_strengths_avg": [ 42.0, 33.279122584587476 ], "wc_weaknesses_avg": [ 355.25, 293.5688803330489 ], "wc_questions_avg": [ 12.5, 7.158910531638177 ], "wc_limitations_avg": [ 25.75, 27.887048965424793 ], "wc_review_avg": [ 513.25, 276.2782790955525 ], "wc_reply_reviewers_avg": [ 207.75, 277.04546107092244 ], "wc_reply_authors_avg": [ 596.75, 593.9235535824455 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18204295680726531777&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "zju.edu.cn;ie.cuhk.edu;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Zhejiang University;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.cuhk.edu.hk", "aff_unique_abbr": "ZJU;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Expressive probabilistic sampling in recurrent neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71076", "id": "ch1buUOGa3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6e2a1a8a037f9a06004fe651054e8938-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ch1buUOGa3", "openreview": "https://openreview.net/forum?id=ch1buUOGa3", "poster": "/media/PosterPDFs/NeurIPS%202023/71076.png?t=1702264553.853362", "slides": "https://nips.cc/virtual/2023/poster/71076", "video": "https://nips.cc/virtual/2023/poster/71076", "author_site": "Shirui Chen, Linxing Jiang, Rajesh PN Rao, Eric Shea-Brown", "tldr": "", "abstract": "In sampling-based Bayesian models of brain function, neural activities are assumed to be samples from probability distributions that the brain uses for probabilistic computation. However, a comprehensive understanding of how mechanistic models of neural dynamics can sample from arbitrary distributions is still lacking. We use tools from functional analysis and stochastic differential equations to explore the minimum architectural requirements for $\\textit{recurrent}$ neural circuits to sample from complex distributions. We first consider the traditional sampling model consisting of a network of neurons whose outputs directly represent the samples ($\\textit{sampler-only}$ network). We argue that synaptic current and firing-rate dynamics in the traditional model have limited capacity to sample from a complex probability distribution. We show that the firing rate dynamics of a recurrent neural circuit with a separate set of output units can sample from an arbitrary probability distribution. We call such circuits $\\textit{reservoir-sampler networks}$ (RSNs). We propose an efficient training procedure based on denoising score matching that finds recurrent and output weights such that the RSN implements Langevin sampling. We empirically demonstrate our model's ability to sample from several complex data distributions using the proposed neural dynamics and discuss its applicability to developing the next generation of sampling-based Bayesian brain models.", "keywords": "neural coding;probabilistic sampling;neural dynamics;recurrent neural network", "primary_area": "", "supplementary_material": "/attachment/defb9af6daf14c64da4a35e93dccd216b5f7422f.pdf", "author": "Shirui Chen;Linxing Preston Jiang;Rajesh P. N. Rao;Eric Todd SheaBrown", "authorids": "~Shirui_Chen1;~Linxing_Preston_Jiang1;~Rajesh_P._N._Rao1;~Eric_Todd_SheaBrown1", "gender": "M;M;;", "homepage": "https://github.com/chinsengi;https://lpjiang97.github.io/;;", "dblp": ";;;", "google_scholar": ";B706p2YAAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Shirui_Chen1;~Linxing_Preston_Jiang1;~Rajesh_P._N._Rao1;~Eric_Todd_SheaBrown1", "aff": "University of Washington;Department of Computer Science, University of Washington;;", "aff_domain": "uw.edu;cs.washington.edu;;", "position": "PhD student;PhD student;;", "bibtex": "@inproceedings{\nchen2023expressive,\ntitle={Expressive probabilistic sampling in recurrent neural networks},\nauthor={Shirui Chen and Linxing Preston Jiang and Rajesh P. N. Rao and Eric Todd SheaBrown},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ch1buUOGa3}\n}", "github": "", "project": "", "reviewers": "pX3M;wwgi;sqZn;kc8J;Y31x", "pdf_size": 8450719, "rating": "5;6;6;7;7", "confidence": "5;3;4;2;4", "soundness": "2;3;2;3;4", "novelty": "2;3;3;4;3", "presentation": "3;3;3;3;3", "wc_summary": "43;75;68;129;96", "wc_strengths": "120;35;74;96;13", "wc_weaknesses": "506;46;92;103;11", "wc_questions": "204;139;2;321;119", "wc_limitations": "32;6;8;10;5", "wc_review": "905;301;244;659;244", "wc_reply_reviewers": "128;137;15;97;0", "wc_reply_authors": "36;34;10;50;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;2;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.2, 28.88182819698227 ], "wc_strengths_avg": [ 67.6, 39.08247689182455 ], "wc_weaknesses_avg": [ 151.6, 180.24050599130038 ], "wc_questions_avg": [ 157.0, 104.78358650094012 ], "wc_limitations_avg": [ 12.2, 10.047885349664377 ], "wc_review_avg": [ 470.6, 266.70478060957214 ], "wc_reply_reviewers_avg": [ 75.4, 57.20349639663646 ], "wc_reply_authors_avg": [ 26.0, 18.286607121059937 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6813851438692469, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17020800810528831489&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "email": "uw.edu;cs.washington.edu;;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A-NeSI: A Scalable Approximate Method for Probabilistic Neurosymbolic Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71075", "id": "chlTA9Cegc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4d9944ab3330fe6af8efb9260aa9f307-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=chlTA9Cegc", "openreview": "https://openreview.net/forum?id=chlTA9Cegc", "poster": "/media/PosterPDFs/NeurIPS%202023/71075.png?t=1701863933.806514", "slides": "https://nips.cc/virtual/2023/poster/71075", "video": "https://nips.cc/virtual/2023/poster/71075", "author_site": "Emile van Krieken, Thiviyan Thanapalasingam, Jakub Tomczak, Frank van Harmelen, Annette Ten Teije", "tldr": "", "abstract": "We study the problem of combining neural networks with symbolic reasoning. Recently introduced frameworks for Probabilistic Neurosymbolic Learning (PNL), such as DeepProbLog, perform exponential-time exact inference, limiting the scalability of PNL solutions. We introduce Approximate Neurosymbolic Inference (A-NeSI): a new framework for PNL that uses neural networks for scalable approximate inference. A-NeSI 1) performs approximate inference in polynomial time without changing the semantics of probabilistic logics; 2) is trained using data generated by the background knowledge; 3) can generate symbolic explanations of predictions; and 4) can guarantee the satisfaction of logical constraints at test time, which is vital in safety-critical applications. Our experiments show that A-NeSI is the first end-to-end method to solve three neurosymbolic tasks with exponential combinatorial scaling. Finally, our experiments show that A-NeSI achieves explainability and safety without a penalty in performance.", "keywords": "Neurosymbolic Learning;Generative Modeling;Approximate Inference", "primary_area": "", "supplementary_material": "/attachment/fcb208b98a46c6ad2e780e61341f2b46a1329e1a.zip", "author": "Emile van Krieken;Thiviyan Thanapalasingam;Jakub M. Tomczak;Frank Van Harmelen;Annette Ten Teije", "authorids": "~Emile_van_Krieken1;~Thiviyan_Thanapalasingam1;~Jakub_M._Tomczak1;~Frank_Van_Harmelen2;~Annette_Ten_Teije2", "gender": "M;M;M;F;M", "homepage": "https://emilevankrieken.com;https://thiviyansingam.com/;https://www.cs.vu.nl/~frankh;https://research.vu.nl/en/persons/acm-ten-teije;https://jmtomczak.github.io/", "dblp": "235/1698;;h/FrankvanHarmelen;https://dblp.uni-trier.de/pid/96/3134.html;80/8238", "google_scholar": "https://scholar.google.nl/citations?user=il8Y0B4AAAAJ;F2PvjdUAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=2fDYEcgAAAAJ;https://scholar.google.pl/citations?user=XB99pR4AAAAJ", "orcid": "0000-0001-5502-4817;0000-0002-0170-9105;0000-0002-7913-0048;0000-0002-9771-8822;0000-0001-8634-6878", "linkedin": "emile-van-krieken/;;frankvanharmelen/;;jakub-tomczak-04305314a/", "or_profile": "~Emile_van_Krieken1;~Thiviyan_Thanapalasingam1;~Frank_Van_Harmelen2;~Annette_Ten_Teije2;~Jakub_Mikolaj_Tomczak1", "aff": "Vrije Universiteit Amsterdam;University of Amsterdam;Vrije Universiteit Amsterdam;Vrije Universiteit Amsterdam;Vrije Universiteit Amsterdam", "aff_domain": "vu.nl;uva.nl;vu.nl;vu.nl;vu.nl", "position": "PhD student;PhD student;Full Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nkrieken2023anesi,\ntitle={A-Ne{SI}: A Scalable Approximate Method for Probabilistic Neurosymbolic Inference},\nauthor={Emile van Krieken and Thiviyan Thanapalasingam and Jakub M. Tomczak and Frank Van Harmelen and Annette Ten Teije},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=chlTA9Cegc}\n}", "github": "", "project": "", "reviewers": "8FBf;agwr;xznW;iEEt", "pdf_size": 3746198, "rating": "6;7;7;7", "confidence": "4;3;4;4", "soundness": "3;4;3;4", "novelty": "2;4;2;3", "presentation": "3;4;2;4", "wc_summary": "73;31;56;76", "wc_strengths": "49;125;96;85", "wc_weaknesses": "191;93;92;12", "wc_questions": "53;338;24;49", "wc_limitations": "17;19;90;7", "wc_review": "383;606;358;229", "wc_reply_reviewers": "113;18;0;38", "wc_reply_authors": "214;0;0;21", "reply_reviewers": "2;1;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 59.0, 17.874562931719478 ], "wc_strengths_avg": [ 88.75, 27.206387117733954 ], "wc_weaknesses_avg": [ 97.0, 63.44682813190901 ], "wc_questions_avg": [ 116.0, 128.6526330861518 ], "wc_limitations_avg": [ 33.25, 33.07850510527947 ], "wc_review_avg": [ 394.0, 135.63369787777668 ], "wc_reply_reviewers_avg": [ 42.25, 43.002180177288686 ], "wc_reply_authors_avg": [ 58.75, 90.04269820479615 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17810784681082117804&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "vu.nl;uva.nl;vu.nl;vu.nl;vu.nl", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Vrije Universiteit Amsterdam;University of Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.vu.nl;https://www.uva.nl", "aff_unique_abbr": "VU Amsterdam;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Netherlands" }, { "title": "Convergent Bregman Plug-and-Play Image Restoration for Poisson Inverse Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71074", "id": "clCELP8zFb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/56db53e53db1b29ae658e53fb764f067-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=clCELP8zFb", "openreview": "https://openreview.net/forum?id=clCELP8zFb", "poster": "/media/PosterPDFs/NeurIPS%202023/71074.png?t=1701939539.8787231", "slides": "https://nips.cc/virtual/2023/poster/71074", "video": "https://nips.cc/virtual/2023/poster/71074", "author_site": "Samuel Hurault, Ulugbek Kamilov, Arthur Leclaire, Nicolas Papadakis", "tldr": "", "abstract": "Plug-and-Play (PnP) methods are efficient iterative algorithms for solving ill-posed image inverse problems. PnP methods are obtained by using deep Gaussian denoisers instead of the proximal operator or the gradient-descent step within proximal algorithms. Current PnP schemes rely on data-fidelity terms that have either Lipschitz gradients or closed-form proximal operators, which is not applicable to Poisson inverse problems. Based on the observation that the Gaussian noise is not the adequate noise model in this setting, we propose to generalize PnP using the Bregman Proximal Gradient (BPG) method. BPG replaces the Euclidean distance with a Bregman divergence that can better capture the smoothness properties of the problem. We introduce the Bregman Score Denoiser specifically parametrized and trained for the new Bregman geometry and prove that it corresponds to the proximal operator of a nonconvex potential. We propose two PnP algorithms based on the Bregman Score Denoiser for solving Poisson inverse problems. Extending the convergence results of BPG in the nonconvex settings, we show that the proposed methods converge, targeting stationary points of an explicit global functional. Experimental evaluations conducted on various Poisson inverse problems validate the convergence results and showcase effective restoration performance.", "keywords": "Plug-and-Play;Poisson Inverse Problems;Bregman distance;Proximal Gradient Descent;nonconvex and nonsmooth optimization;Poisson inverse problems", "primary_area": "", "supplementary_material": "", "author": "Samuel Hurault;Ulugbek Kamilov;Arthur Leclaire;Nicolas Papadakis", "authorids": "~Samuel_Hurault1;~Ulugbek_Kamilov1;~Arthur_Leclaire1;~Nicolas_Papadakis3", "gender": "M;Not Specified;M;M", "homepage": ";https://ukmlv.github.io;https://perso.telecom-paristech.fr/aleclaire/;https://www.math.u-bordeaux.fr/~npapadak/", "dblp": "239/3588;73/9223;130/1813;70/1520", "google_scholar": "https://scholar.google.fr/citations?user=f_rtYCAAAAAJ;https://scholar.google.com.tw/citations?user=3qYUSDwAAAAJ;;https://scholar.google.fr/citations?user=hfyLiLYAAAAJ", "orcid": ";0000-0001-6770-3278;;", "linkedin": ";;;", "or_profile": "~Samuel_Hurault1;~Ulugbek_Kamilov1;~Arthur_Leclaire1;~Nicolas_Papadakis3", "aff": "University of Bordeaux;Washington University, St. Louis;University of Bordeaux;CNRS/IMB", "aff_domain": "u-bordeaux.fr;wustl.edu;u-bordeaux.fr;u-bordeaux.fr", "position": "PhD student;Assistant Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\nhurault2023convergent,\ntitle={Convergent Bregman Plug-and-Play Image Restoration for Poisson Inverse Problems},\nauthor={Samuel Hurault and Ulugbek Kamilov and Arthur Leclaire and Nicolas Papadakis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=clCELP8zFb}\n}", "github": "", "project": "", "reviewers": "oqFb;5jWQ;6Scw", "pdf_size": 2831066, "rating": "6;6;6", "confidence": "3;4;4", "soundness": "4;4;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "46;132;98", "wc_strengths": "47;109;24", "wc_weaknesses": "144;346;59", "wc_questions": "58;56;30", "wc_limitations": "13;47;2", "wc_review": "308;690;213", "wc_reply_reviewers": "28;50;14", "wc_reply_authors": "0;372;0", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 92.0, 35.364765892999586 ], "wc_strengths_avg": [ 60.0, 35.89800365851375 ], "wc_weaknesses_avg": [ 183.0, 120.36887748361977 ], "wc_questions_avg": [ 48.0, 12.754084313139327 ], "wc_limitations_avg": [ 20.666666666666668, 19.154343864744856 ], "wc_review_avg": [ 403.6666666666667, 206.14935254702004 ], "wc_reply_reviewers_avg": [ 30.666666666666668, 14.817407180595247 ], "wc_reply_authors_avg": [ 124.0, 175.36248173426378 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11121920003816403198&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "u-bordeaux.fr;wustl.edu;u-bordeaux.fr;u-bordeaux.fr", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Bordeaux;Washington University in St. Louis;CNRS", "aff_unique_dep": ";;Institut de Math\u00e9matiques de Bordeaux", "aff_unique_url": "https://www.u-bordeaux.fr;https://wustl.edu;https://www.cnrs.fr", "aff_unique_abbr": "UBordeaux;WUSTL;CNRS", "aff_campus_unique_index": "1", "aff_campus_unique": ";St. Louis", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "France;United States" }, { "title": "Hierarchical Vector Quantized Transformer for Multi-class Unsupervised Anomaly Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71073", "id": "clJTNssgn6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1abc87c67cc400a67b869358e627fe37-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=clJTNssgn6", "openreview": "https://openreview.net/forum?id=clJTNssgn6", "poster": "/media/PosterPDFs/NeurIPS%202023/71073.png?t=1697425276.314875", "slides": "https://nips.cc/virtual/2023/poster/71073", "video": "https://nips.cc/virtual/2023/poster/71073", "author_site": "Ruiying Lu, YuJie Wu, Long Tian, Dongsheng Wang, Bo Chen, Xiyang Liu, Ruimin Hu", "tldr": "", "abstract": "Unsupervised image Anomaly Detection (UAD) aims to learn robust and discriminative representations of normal samples. While separate solutions per class endow expensive computation and limited generalizability, this paper focuses on building a unified framework for multiple classes. Under such a challenging setting, popular reconstruction-based networks with continuous latent representation assumption always suffer from the \"identical shortcut\" issue, where both normal and abnormal samples can be well recovered and difficult to distinguish. To address this pivotal issue, we propose a hierarchical vector quantized prototype-oriented Transformer under a probabilistic framework. First, instead of learning the continuous representations, we preserve the typical normal patterns as discrete iconic prototypes, and confirm the importance of Vector Quantization in preventing the model from falling into the shortcut. The vector quantized iconic prototypes are integrated into the Transformer for reconstruction, such that the abnormal data point is flipped to a normal data point. Second, we investigate an exquisite hierarchical framework to relieve the codebook collapse issue and replenish frail normal patterns. Third, a prototype-oriented optimal transport method is proposed to better regulate the prototypes and hierarchically evaluate the abnormal score. By evaluating on MVTec-AD and VisA datasets, our model surpasses the state-of-the-art alternatives and possesses good interpretability. The code is available at https://github.com/RuiyingLu/HVQ-Trans.", "keywords": "Anomaly Detection;Transformer;Vector Quantization;Unsupervised Anomaly Detection", "primary_area": "", "supplementary_material": "/attachment/c75f5a0e97ccfcd053d978c310034f915984fed4.pdf", "author": "Ruiying Lu;YuJie Wu;Long Tian;Dongsheng Wang;Bo Chen;Xiyang Liu;Ruimin Hu", "authorids": "~Ruiying_Lu1;~YuJie_Wu5;~Long_Tian1;~Dongsheng_Wang4;~Bo_Chen1;~Xiyang_Liu4;~Ruimin_Hu4", "gender": "F;M;M;M;M;M;M", "homepage": "https://github.com/RuiyingLu;https://github.com/deeper2learn;https://faculty.xidian.edu.cn/TL1/zh_CN/index.htm;http://web.xidian.edu.cn/bchen/en/index.html;https://faculty.xidian.edu.cn/LXY10/zh_CN/index.htm;https://wds2014.github.io/;https://web.xidian.edu.cn/rmhu/", "dblp": "255/5995;02/3699;;89/5615-1;;21/841-3;97/1491", "google_scholar": ";;;;;https://scholar.google.com/citations?hl=en;https://scholar.google.be/citations?user=c9ZfhU0AAAAJ", "orcid": "0000-0002-8825-6064;;;0000-0001-5151-9388;;0000-0002-3380-5337;0000-0002-0290-5757", "linkedin": ";;;;;;", "or_profile": "~Ruiying_Lu1;~YuJie_Wu5;~Long_Tian1;~Bo_Chen1;~Xiyang_Liu4;~dongsheng_wang3;~Ruimin_Hu1", "aff": "Xidian University;Xi'an University of Electronic Science and Technology;Xi'an University of Software Engineering Institute;Xidian University;Xi'an University of Electronic Science and Technology;Xidian University;Xidian University", "aff_domain": "xidian.edu;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn", "position": "Assistant Professor;MS student;Assistant Professor;Full Professor;Full Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nlu2023hierarchical,\ntitle={Hierarchical Vector Quantized Transformer for Multi-class Unsupervised Anomaly Detection},\nauthor={Ruiying Lu and YuJie Wu and Long Tian and Dongsheng Wang and Bo Chen and Xiyang Liu and Ruimin Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=clJTNssgn6}\n}", "github": "", "project": "", "reviewers": "ksQL;uU2U;XjaL;Ah19;JdWV", "pdf_size": 13647176, "rating": "3;5;5;5;5", "confidence": "4;5;4;5;4", "soundness": "1;3;3;3;3", "novelty": "1;2;3;3;2", "presentation": "2;3;3;3;3", "wc_summary": "37;76;82;73;26", "wc_strengths": "22;42;49;45;29", "wc_weaknesses": "117;79;62;275;392", "wc_questions": "48;76;3;7;3", "wc_limitations": "36;15;3;9;25", "wc_review": "260;288;199;409;475", "wc_reply_reviewers": "46;69;0;25;29", "wc_reply_authors": "546;555;0;233;32", "reply_reviewers": "2;1;0;1;1", "reply_authors": "3;2;1;2;2", "rating_avg": [ 4.6, 0.7999999999999999 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.6, 0.8000000000000002 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 58.8, 22.745549015137005 ], "wc_strengths_avg": [ 37.4, 10.209799214480174 ], "wc_weaknesses_avg": [ 185.0, 128.01406172760866 ], "wc_questions_avg": [ 27.4, 29.64186228967404 ], "wc_limitations_avg": [ 17.6, 11.723480711802276 ], "wc_review_avg": [ 326.2, 101.0136624422657 ], "wc_reply_reviewers_avg": [ 33.8, 22.93817778290159 ], "wc_reply_authors_avg": [ 273.2, 240.10947503170297 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.40824829046386313, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17015234389603681427&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "xidian.edu;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;0;1;0;0", "aff_unique_norm": "Xidian University;Xi'an University of Electronic Science and Technology;Xi'an University of Software Engineering", "aff_unique_dep": ";;Software Engineering Institute", "aff_unique_url": "http://www.xidian.edu.cn/;http://www.xidian.edu.cn/;http://www.xauat.edu.cn", "aff_unique_abbr": "Xidian;Xidian University;", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Self-Supervised Visual Acoustic Matching", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71072", "id": "clKbFMt29V", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4cbec10b0cf25025e3f9fcfd943bb58c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=clKbFMt29V", "openreview": "https://openreview.net/forum?id=clKbFMt29V", "poster": "/media/PosterPDFs/NeurIPS%202023/71072.png?t=1702238758.8234258", "slides": "https://nips.cc/virtual/2023/poster/71072", "video": "https://nips.cc/virtual/2023/poster/71072", "author_site": "Arjun Somayazulu, Changan Chen, Kristen Grauman", "tldr": "", "abstract": "Acoustic matching aims to re-synthesize an audio clip to sound as if it were recorded in a target acoustic environment. Existing methods assume access to paired training data, where the audio is observed in both source and target environments, but this limits the diversity of training data or requires the use of simulated data or heuristics to create paired samples. We propose a self-supervised approach to visual acoustic matching where training samples include only the target scene image and audio---without acoustically mismatched source audio for reference. Our approach jointly learns to disentangle room acoustics and re-synthesize audio into the target environment, via a conditional GAN framework and a novel metric that quantifies the level of residual acoustic information in the de-biased audio. Training with either in-the-wild web data or simulated data, we demonstrate it outperforms the state-of-the-art on multiple challenging datasets and a wide variety of real-world audio and environments.", "keywords": "Audio-Visual learning;Visual Acoustic Matching", "primary_area": "", "supplementary_material": "/attachment/bb9a7572572cd8646cd77d2254baca747b2a5874.zip", "author": "Arjun Somayazulu;Changan Chen;Kristen Grauman", "authorids": "~Arjun_Somayazulu1;~Changan_Chen2;~Kristen_Grauman1", "gender": "M;;F", "homepage": ";;http://www.cs.utexas.edu/~grauman/", "dblp": "333/1844;;57/4553", "google_scholar": ";;Jp6Mz1sAAAAJ", "orcid": ";;", "linkedin": "arjunsomayazulu/;;", "or_profile": "~Arjun_Somayazulu1;~Changan_Chen2;~Kristen_Grauman1", "aff": "University of Texas at Austin;;University of Texas, Austin", "aff_domain": "cs.utexas.edu;;utexas.edu", "position": "PhD student;;Professor", "bibtex": "@inproceedings{\nsomayazulu2023selfsupervised,\ntitle={Self-Supervised Visual Acoustic Matching},\nauthor={Arjun Somayazulu and Changan Chen and Kristen Grauman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=clKbFMt29V}\n}", "github": "", "project": "", "reviewers": "2YZ1;AZuk;Ky2n;bz7J;7iLe", "pdf_size": 5334604, "rating": "5;5;6;6;6", "confidence": "4;4;4;4;4", "soundness": "3;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "2;3;3;4;3", "wc_summary": "45;294;286;198;80", "wc_strengths": "65;52;62;107;40", "wc_weaknesses": "136;523;164;330;78", "wc_questions": "45;100;58;216;46", "wc_limitations": "7;3;73;191;1", "wc_review": "298;972;643;1042;245", "wc_reply_reviewers": "29;189;13;75;58", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 180.6, 102.74161766295097 ], "wc_strengths_avg": [ 65.2, 22.657449106199046 ], "wc_weaknesses_avg": [ 246.2, 161.76081107610705 ], "wc_questions_avg": [ 93.0, 64.67766229541695 ], "wc_limitations_avg": [ 55.0, 73.13549070048002 ], "wc_review_avg": [ 640.0, 330.09877309678086 ], "wc_reply_reviewers_avg": [ 72.8, 62.00129030915406 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5672758484618635570&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cs.utexas.edu;;utexas.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Bayesian Learning of Optimal Policies in Markov Decision Processes with Countably Infinite State-Space", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71071", "id": "cm53OBkctM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0c79d6ed1788653643a1ac67b6ea32a7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cm53OBkctM", "openreview": "https://openreview.net/forum?id=cm53OBkctM", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71071", "video": "https://nips.cc/virtual/2023/poster/71071", "author_site": "Saghar Adler, Vijay Subramanian", "tldr": "", "abstract": "Models of many real-life applications, such as queueing models of communication networks or computing systems, have a countably infinite state-space. Algorithmic and learning procedures that have been developed to produce optimal policies mainly focus on finite state settings, and do not directly apply to these models. To overcome this lacuna, in this work we study the problem of optimal control of a family of discrete-time countable state-space Markov Decision Processes (MDPs) governed by an unknown parameter $\\theta\\in\\Theta$, \n and defined on a countably-infinite state-space $\\mathcal X=\\mathbb{Z}_+^d$, with finite action space $\\mathcal A$, and an unbounded cost function. We take a Bayesian perspective with the random unknown parameter $\\boldsymbol{\\theta}^*$ generated via a given fixed prior distribution on $\\Theta$. To optimally control the unknown MDP, we propose an algorithm based on Thompson sampling with dynamically-sized episodes: at the beginning of each episode, the posterior distribution formed via Bayes' rule is used to produce a parameter estimate, which then decides the policy applied during the episode. To ensure the stability of the Markov chain obtained by following the policy chosen for each parameter, we impose ergodicity assumptions. From this condition and using the solution of the average cost Bellman equation, we establish an $\\tilde O(dh^d\\sqrt{|\\mathcal A|T})$ upper bound on the Bayesian regret of our algorithm, where $T$ is the time-horizon. Finally, to elucidate the applicability of our algorithm, we consider two different queueing models with unknown dynamics, and show that our algorithm can be applied to develop approximately optimal control algorithms.", "keywords": "Thompson Sampling;Reinforcement Learning;Queueing theory", "primary_area": "", "supplementary_material": "", "author": "Saghar Adler;Vijay Subramanian", "authorids": "~Saghar_Adler1;~Vijay_Subramanian1", "gender": "F;M", "homepage": ";https://subramanian.engin.umich.edu", "dblp": ";36/3972", "google_scholar": "vQxZ3tAAAAAJ;vYQAKZwAAAAJ", "orcid": ";0000-0001-9136-6419", "linkedin": ";", "or_profile": "~Saghar_Adler1;~Vijay_Subramanian1", "aff": "Electrical Engineering and Computer Science, University of Michigan - Ann Arbor;University of Michigan - Ann Arbor", "aff_domain": "eecs.umich.edu;umich.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nadler2023bayesian,\ntitle={Bayesian Learning of Optimal Policies in Markov Decision Processes with Countably Infinite State-Space},\nauthor={Saghar Adler and Vijay Subramanian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cm53OBkctM}\n}", "github": "", "project": "", "reviewers": "5uHS;Raj1;5dJh;MJCc", "pdf_size": 969529, "rating": "5;6;7;7", "confidence": "2;4;4;3", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;2", "wc_summary": "174;44;89;94", "wc_strengths": "49;26;121;130", "wc_weaknesses": "107;78;203;234", "wc_questions": "2;129;178;120", "wc_limitations": "1;5;1;31", "wc_review": "333;282;592;609", "wc_reply_reviewers": "20;0;43;148", "wc_reply_authors": "8;0;8;284", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 100.25, 46.82080200082011 ], "wc_strengths_avg": [ 81.5, 44.85810963471376 ], "wc_weaknesses_avg": [ 155.5, 64.76302957706658 ], "wc_questions_avg": [ 107.25, 64.65050270492875 ], "wc_limitations_avg": [ 9.5, 12.519984025548915 ], "wc_review_avg": [ 454.0, 147.72779020888385 ], "wc_reply_reviewers_avg": [ 52.75, 57.05863212520959 ], "wc_reply_authors_avg": [ 75.0, 120.71039723238425 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18325032219555031247&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "eecs.umich.edu;umich.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "Department of Electrical Engineering and Computer Science", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Context-PIPs: Persistent Independent Particles Demands Spatial Context Features", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71070", "id": "cnpkzQZaLU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ad2fa437f7c23e4e9875599c6065d18a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cnpkzQZaLU", "openreview": "https://openreview.net/forum?id=cnpkzQZaLU", "poster": "/media/PosterPDFs/NeurIPS%202023/71070.png?t=1702008195.6810632", "slides": "https://nips.cc/virtual/2023/poster/71070", "video": "https://nips.cc/virtual/2023/poster/71070", "author_site": "Weikang Bian, Zhaoyang Huang, Zhaoyang Huang, Xiaoyu Shi, Yitong Dong, Yijin Li, Hongsheng Li", "tldr": "", "abstract": "We tackle the problem of Persistent Independent Particles (PIPs), also called Tracking Any Point (TAP), in videos, which specifically aims at estimating persistent long-term trajectories of query points in videos. Previous methods attempted to estimate these trajectories independently to incorporate longer image sequences, therefore, ignoring the potential benefits of incorporating spatial context features. \nWe argue that independent video point tracking also demands spatial context features. To this end, we propose a novel framework Context-PIPs, which effectively improves point trajectory accuracy by aggregating spatial context features in videos. Context-PIPs contains two main modules: 1) a SOurse Feature Enhancement (SOFE) module, and 2) a TArget Feature Aggregation (TAFA) module. Context-PIPs significantly improves PIPs all-sided, reducing 11.4\\% Average Trajectory Error of Occluded Points (ATE-Occ) on CroHD and increasing 11.8\\% Average Percentage of Correct Keypoint (A-PCK) on TAP-Vid-Kinetics. Demos are available at \\url{https://wkbian.github.io/Projects/Context-PIPs/}.", "keywords": "Point Tracking; Optical Flow; Video Correspondence; Computer Vision;", "primary_area": "", "supplementary_material": "", "author": "Weikang BIAN;Zhaoyang Huang;Xiaoyu Shi;Yitong Dong;Yijin Li;Hongsheng Li", "authorids": "~Weikang_BIAN1;~Zhaoyang_Huang2;~Xiaoyu_Shi1;~Yitong_Dong1;~Yijin_Li1;~Hongsheng_Li3", "gender": ";;M;F;M;M", "homepage": "https://wkbian.github.io/;https://drinkingcoder.github.io/;https://xiaoyushi97.github.io/;;https://eugenelyj.github.io/;http://www.ee.cuhk.edu.hk/~hsli", "dblp": "252/4248;;;;178/6879;27/7402-1", "google_scholar": "_PjUeqcAAAAJ;y2xos7IAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;BN2Ze-QAAAAJ", "orcid": "0000-0001-9986-3348;0000-0001-7688-1471;;0009-0000-4932-8814;;", "linkedin": ";;;;;", "or_profile": "~Weikang_BIAN1;~Zhaoyang_Huang2;~Xiaoyu_Shi1;~Yitong_Dong1;~Yijin_Li1;~Hongsheng_Li3", "aff": "The Chinese University of Hong Kong, The Chinese University of Hong Kong;The Chinese University of Hong Kong;The Chinese University of Hong Kong;Zhejiang University;Zhejiang University;The Chinese University of Hong Kong", "aff_domain": "ee.cuhk.edu.hk;cuhk.edu.hk;cuhk.edu.hk;zju.edu.cn;zju.edu.cn;cuhk.edu.hk", "position": "Intern;PhD student;PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nbian2023contextpips,\ntitle={Context-{PIP}s: Persistent Independent Particles Demands Context Features},\nauthor={Weikang BIAN and Zhaoyang Huang and Xiaoyu Shi and Yitong Dong and Yijin Li and Hongsheng Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cnpkzQZaLU}\n}", "github": "", "project": "", "reviewers": "2Jdq;WGNK;ydfF;ujcD", "pdf_size": 4271359, "rating": "6;6;6;7", "confidence": "4;4;2;4", "soundness": "3;3;2;3", "novelty": "2;3;2;4", "presentation": "2;3;2;3", "wc_summary": "104;129;64;106", "wc_strengths": "20;37;48;212", "wc_weaknesses": "39;327;52;134", "wc_questions": "59;58;61;2", "wc_limitations": "21;2;9;27", "wc_review": "243;553;234;481", "wc_reply_reviewers": "0;0;0;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 100.75, 23.381349405027933 ], "wc_strengths_avg": [ 79.25, 77.28963384568463 ], "wc_weaknesses_avg": [ 138.0, 115.0369505854532 ], "wc_questions_avg": [ 45.0, 24.849547279578356 ], "wc_limitations_avg": [ 14.75, 9.807522622966516 ], "wc_review_avg": [ 377.75, 141.59338791059417 ], "wc_reply_reviewers_avg": [ 2.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2152403943008970193&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ee.cuhk.edu.hk;cuhk.edu.hk;cuhk.edu.hk;zju.edu.cn;zju.edu.cn;cuhk.edu.hk", "author_num": 6, "aff_unique_index": "0;0;0;1;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.zju.edu.cn", "aff_unique_abbr": "CUHK;ZJU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Implicit Manifold Gaussian Process Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71069", "id": "co4p15OMoc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d611d06e3207330555fbc10810e70163-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=co4p15OMoc", "openreview": "https://openreview.net/forum?id=co4p15OMoc", "poster": "/media/PosterPDFs/NeurIPS%202023/71069.png?t=1701784744.480087", "slides": "https://nips.cc/virtual/2023/poster/71069", "video": "https://nips.cc/virtual/2023/poster/71069", "author_site": "Bernardo Fichera, Slava Borovitskiy, Andreas Krause, Aude G Billard", "tldr": "", "abstract": "Gaussian process regression is widely used because of its ability to provide well-calibrated uncertainty estimates and handle small or sparse datasets. However, it struggles with high-dimensional data. One possible way to scale this technique to higher dimensions is to leverage the implicit low-dimensional manifold upon which the data actually lies, as postulated by the manifold hypothesis. Prior work ordinarily requires the manifold structure to be explicitly provided though, i.e. given by a mesh or be known to be one of the well-known manifolds like the sphere. In contrast, in this paper we propose a Gaussian process regression technique capable of inferring implicit structure directly from data (labeled and unlabeled) in a fully differentiable way. For the resulting model, we discuss its convergence to the Mat\u00e9rn Gaussian process on the assumed manifold. Our technique scales up to hundreds of thousands of data points, and improves the predictive performance and calibration of the standard Gaussian process regression in some high-dimensional settings.", "keywords": "Gaussian process;manifolds;manifold learning;uncertainty;regression;graph Laplacian", "primary_area": "", "supplementary_material": "", "author": "Bernardo Fichera;Viacheslav Borovitskiy;Andreas Krause;Aude Billard", "authorids": "~Bernardo_Fichera1;~Viacheslav_Borovitskiy1;~Andreas_Krause1;aude.billard@epfl.ch", "gender": "M;M;M;", "homepage": "https://bernardofichera.xyz;https://vab.im/;https://las.inf.ethz.ch/krausea;", "dblp": ";259/3201;87/1831-1.html;", "google_scholar": ";https://scholar.google.ru/citations?user=1KqNyNMAAAAJ;https://scholar.google.ch/citations?user=eDHv58AAAAAJ;", "orcid": ";;0000-0001-7260-9673;", "linkedin": "bernardo-fichera/?locale=en_US;;krausea/;", "or_profile": "~Bernardo_Fichera1;~Viacheslav_Borovitskiy1;~Andreas_Krause1;aude.billard@epfl.ch", "aff": "EPFL;ETHZ - ETH Zurich;ETH Zurich;", "aff_domain": "epfl.ch;ethz.ch;ethz.ch;", "position": "PhD student;Postdoc;Full Professor;", "bibtex": "@inproceedings{\nfichera2023implicit,\ntitle={Implicit Manifold Gaussian Process Regression},\nauthor={Bernardo Fichera and Viacheslav Borovitskiy and Andreas Krause and Aude Billard},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=co4p15OMoc}\n}", "github": "", "project": "", "reviewers": "2ieq;ejKa;DAU4;g5fB", "pdf_size": 6639117, "rating": "5;6;7;7", "confidence": "4;4;4;4", "soundness": "2;3;4;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "81;111;76;136", "wc_strengths": "51;81;68;83", "wc_weaknesses": "83;157;94;91", "wc_questions": "204;94;1;76", "wc_limitations": "42;15;142;5", "wc_review": "461;458;381;391", "wc_reply_reviewers": "172;63;87;0", "wc_reply_authors": "23;0;151;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 101.0, 24.238399287081645 ], "wc_strengths_avg": [ 70.75, 12.774486291041217 ], "wc_weaknesses_avg": [ 106.25, 29.57511622969553 ], "wc_questions_avg": [ 93.75, 72.5822808955464 ], "wc_limitations_avg": [ 51.0, 54.25403210822215 ], "wc_review_avg": [ 422.75, 36.93490896157726 ], "wc_reply_reviewers_avg": [ 80.5, 61.64616776410355 ], "wc_reply_authors_avg": [ 43.5, 62.77141068989927 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2093637381783106717&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "epfl.ch;ethz.ch;ethz.ch;", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "EPFL;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.ethz.ch", "aff_unique_abbr": "EPFL;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Group Fairness in Peer Review", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71068", "id": "cpUuSV8kRw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ccba10dd4e80e7276054222bb95d467c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cpUuSV8kRw", "openreview": "https://openreview.net/forum?id=cpUuSV8kRw", "poster": "/media/PosterPDFs/NeurIPS%202023/71068.png?t=1702244549.5263116", "slides": "https://nips.cc/virtual/2023/poster/71068", "video": "https://nips.cc/virtual/2023/poster/71068", "author_site": "Haris Aziz, Evi Micha, Nisarg Shah", "tldr": "", "abstract": "Large conferences such as NeurIPS and AAAI serve as crossroads of various AI fields, since they attract submissions from a vast number of communities. However, in some cases, this has resulted in a poor reviewing experience for some communities, whose submissions get assigned to less qualified reviewers outside of their communities. An often-advocated solution is to break up any such large conference into smaller conferences, but this can lead to isolation of communities and harm interdisciplinary research. We tackle this challenge by introducing a notion of group fairness, called the core, which requires that every possible community (subset of researchers) to be treated in a way that prevents them from unilaterally benefiting by withdrawing from a large conference. \n\nWe study a simple peer review model, prove that it always admits a reviewing assignment in the core, and design an efficient algorithm to find one such assignment. \nWe use real data from CVPR and ICLR conferences to compare our algorithm to existing reviewing assignment algorithms on a number of metrics.", "keywords": "peer review; group fairness; core; stable", "primary_area": "", "supplementary_material": "/attachment/d4fd6df46b9fb4094c09fa399fb30952c4d12359.zip", "author": "Haris Aziz;Evi Micha;Nisarg Shah", "authorids": "~Haris_Aziz1;~Evi_Micha1;~Nisarg_Shah1", "gender": "M;F;M", "homepage": "https://sites.google.com/site/harisaziz/;https://evi-micha.github.io;https://www.cs.toronto.edu/~nisarg/", "dblp": ";204/3011;95/9508-1", "google_scholar": ";;https://scholar.google.ca/citations?user=klcw_tAAAAAJ", "orcid": ";;0000-0002-0946-3402", "linkedin": ";;", "or_profile": "~Haris_Aziz1;~Evi_Micha1;~Nisarg_Shah1", "aff": "University of New South Wales;University of Toronto;University of Toronto", "aff_domain": "unsw.edu.au;toronto.edu;utoronto.ca", "position": "Associate Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\naziz2023group,\ntitle={Group Fairness in Peer Review},\nauthor={Haris Aziz and Evi Micha and Nisarg Shah},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cpUuSV8kRw}\n}", "github": "", "project": "", "reviewers": "YGAX;wnxf;VPuM;HKZ7;E8Ux", "pdf_size": 338018, "rating": "5;6;6;7;7", "confidence": "4;3;4;3;3", "soundness": "2;3;3;3;4", "novelty": "2;2;3;3;3", "presentation": "3;2;2;4;4", "wc_summary": "88;160;140;113;58", "wc_strengths": "56;126;246;107;41", "wc_weaknesses": "189;213;182;106;21", "wc_questions": "69;83;196;30;39", "wc_limitations": "6;12;24;19;141", "wc_review": "408;594;788;375;300", "wc_reply_reviewers": "351;66;0;80;0", "wc_reply_authors": "395;0;0;49;0", "reply_reviewers": "2;1;0;1;0", "reply_authors": "2;1;1;2;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 111.8, 36.27891949879434 ], "wc_strengths_avg": [ 115.2, 72.52971804715636 ], "wc_weaknesses_avg": [ 142.2, 70.40852221144824 ], "wc_questions_avg": [ 83.4, 59.513359844660094 ], "wc_limitations_avg": [ 40.4, 50.66991217675436 ], "wc_review_avg": [ 493.0, 176.42222082266167 ], "wc_reply_reviewers_avg": [ 99.4, 130.04245460617852 ], "wc_reply_authors_avg": [ 88.8, 154.2717083589859 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7637626158259733, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11698641286038252599&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "email": "unsw.edu.au;toronto.edu;utoronto.ca", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of New South Wales;University of Toronto", "aff_unique_dep": ";", "aff_unique_url": "https://www.unsw.edu.au;https://www.utoronto.ca", "aff_unique_abbr": "UNSW;U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Australia;Canada" }, { "title": "Back-Modality: Leveraging Modal Transformation for Data Augmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71067", "id": "cr99foBDPV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e20a65c7308b7b94ed1178eebc45bf76-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cr99foBDPV", "openreview": "https://openreview.net/forum?id=cr99foBDPV", "poster": "/media/PosterPDFs/NeurIPS%202023/71067.png?t=1701410268.4735303", "slides": "https://nips.cc/virtual/2023/poster/71067", "video": "https://nips.cc/virtual/2023/poster/71067", "author_site": "Zhi Li, Yifan Liu, Yin Zhang", "tldr": "", "abstract": "We introduce Back-Modality, a novel data augmentation schema predicated on modal transformation. Data from an initial modality undergoes transformation to an intermediate modality, followed by a reverse transformation. This framework serves dual roles. On one hand, it operates as a general data augmentation strategy. On the other hand, it allows for other augmentation techniques, suitable for the intermediate modality, to enhance the initial modality. For instance, data augmentation methods applicable to pure text can be employed to augment images, thereby facilitating the cross-modality of data augmentation techniques. To validate the viability and efficacy of our framework, we proffer three instantiations of Back-Modality: back-captioning, back-imagination, and back-speech. Comprehensive evaluations across tasks such as image classification, sentiment classification, and textual entailment demonstrate that our methods consistently enhance performance under data-scarce circumstances.", "keywords": "data augmentation;cross-modal", "primary_area": "", "supplementary_material": "/attachment/944645a5630be398f371e04e7e4ccd97b5366cbe.pdf", "author": "Zhi Li;Yifan Liu;Yin Zhang", "authorids": "~Zhi_Li6;~Yifan_Liu11;~Yin_Zhang3", "gender": ";;M", "homepage": "https://scholar.google.com.hk/citations?user=82L1zVEAAAAJ&hl=zh-CN;https://person.zju.edu.cn/en/zhangyin;", "dblp": ";91/3045-6;", "google_scholar": ";vCoh6tYAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yifan_Liu11;~Yin_Zhang3;~Zhi_Ii1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "MS student;Associate Professor;PhD student", "bibtex": "@inproceedings{\nli2023backmodality,\ntitle={Back-Modality: Leveraging Modal Transformation for Data Augmentation},\nauthor={Zhi Li and Yifan Liu and Yin Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cr99foBDPV}\n}", "github": "", "project": "", "reviewers": "QqX9;Usve;UnFZ;WjzN;44qx", "pdf_size": 972455, "rating": "6;6;6;6;7", "confidence": "4;3;4;3;5", "soundness": "3;3;3;3;4", "novelty": "3;3;3;3;4", "presentation": "3;2;3;2;3", "wc_summary": "76;139;95;37;39", "wc_strengths": "77;57;89;46;71", "wc_weaknesses": "155;366;189;44;173", "wc_questions": "95;53;52;2;177", "wc_limitations": "21;59;13;1;7", "wc_review": "424;674;438;130;467", "wc_reply_reviewers": "21;22;18;11;24", "wc_reply_authors": "51;58;38;49;59", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 77.2, 37.9810479055015 ], "wc_strengths_avg": [ 68.0, 15.073154945133417 ], "wc_weaknesses_avg": [ 185.4, 103.63512917925081 ], "wc_questions_avg": [ 75.8, 58.55390678682337 ], "wc_limitations_avg": [ 20.2, 20.49780476051033 ], "wc_review_avg": [ 426.6, 173.75108632753927 ], "wc_reply_reviewers_avg": [ 19.2, 4.534313619501853 ], "wc_reply_authors_avg": [ 51.0, 7.563068160475615 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8017837257372733, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8296765658291806706&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "No-regret Algorithms for Fair Resource Allocation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71066", "id": "crNAh1EZKo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/96842011407c2691ab4eefff48fc864d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=crNAh1EZKo", "openreview": "https://openreview.net/forum?id=crNAh1EZKo", "poster": "/media/PosterPDFs/NeurIPS%202023/71066.png?t=1702144781.883245", "slides": "https://nips.cc/virtual/2023/poster/71066", "video": "https://nips.cc/virtual/2023/poster/71066", "author_site": "Abhishek Sinha, Ativ Joshi, Rajarshi Bhattacharjee, Cameron Musco, Mohammad Hajiesmaili", "tldr": "", "abstract": "We consider a fair resource allocation problem in the no-regret setting against an unrestricted adversary. The objective is to allocate resources equitably among several agents in an online fashion so that the difference of the aggregate $\\alpha$-fair utilities of the agents achieved by an optimal static clairvoyant allocation and the online policy grows sublinearly with time. The problem inherits its difficulty from the non-separable nature of the global $\\alpha$-fairness function. Previously, it was shown that no online policy could achieve a sublinear standard regret in this problem. In this paper, we propose an efficient online resource allocation policy, called Online Fair Allocation ($\\texttt{OFA}$), that achieves sublinear $c_\\alpha$-approximate regret with approximation factor $c_\\alpha=(1-\\alpha)^{-(1-\\alpha)}\\leq 1.445,$ for $0\\leq \\alpha < 1$. Our upper bound on the $c_\\alpha$-regret for this problem exhibits a surprising \\emph{phase transition} phenomenon -- transitioning from a power-law to a constant at the critical exponent $\\alpha=\\frac{1}{2}.$ Our result also resolves an open problem in designing an efficient no-regret policy for the online job scheduling problem in certain parameter regimes. Along the way, we introduce new algorithmic and analytical techniques, including greedy estimation of the future gradients for non-additive global reward functions and bootstrapping second-order regret bounds, which may be of independent interest.", "keywords": "Online Learning;Bandit Algorithms;Learning Theory", "primary_area": "", "supplementary_material": "/attachment/9e2af4eb9bbe04ba46838c9f2d4ce762fdb45617.pdf", "author": "Abhishek Sinha;Ativ Joshi;Rajarshi Bhattacharjee;Cameron N Musco;Mohammad Hajiesmaili", "authorids": "~Abhishek_Sinha3;~Ativ_Joshi1;~Rajarshi_Bhattacharjee1;~Cameron_N_Musco1;~Mohammad_Hajiesmaili1", "gender": "M;;;M;M", "homepage": "https://www.tifr.res.in/~abhishek.sinha/;;https://rbhattacharj.github.io/;https://people.cs.umass.edu/~cmusco/;https://groups.cs.umass.edu/hajiesmaili/", "dblp": "47/9175;;255/5945;149/2327;49/7911", "google_scholar": "https://scholar.google.co.in/citations?user=Oc7BRX0AAAAJ;;https://scholar.google.com/citations?hl=en;EeYGZCwAAAAJ;XCGuYKIAAAAJ", "orcid": "0000-0001-7220-0691;;;;", "linkedin": "abhishek-sinha-a645291b/;;;;", "or_profile": "~Abhishek_Sinha3;~Ativ_Joshi1;~Rajarshi_Bhattacharjee1;~Cameron_N_Musco1;~Mohammad_Hajiesmaili1", "aff": "Tata Institute of Fundamental Research;;University of Massachusetts Amherst;University of Massachusetts, Amherst;College of Information and Computer Science, University of Massachusetts, Amherst", "aff_domain": "tifr.res.in;;cs.umass.edu;umass.edu;cics.umass.edu", "position": "Reader;;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nsinha2023noregret,\ntitle={No-regret Algorithms for Fair Resource Allocation},\nauthor={Abhishek Sinha and Ativ Joshi and Rajarshi Bhattacharjee and Cameron N Musco and Mohammad Hajiesmaili},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=crNAh1EZKo}\n}", "github": "", "project": "", "reviewers": "jNhn;wJNK;q9U9;DRz6", "pdf_size": 894797, "rating": "4;7;7;7", "confidence": "3;3;4;3", "soundness": "2;4;3;3", "novelty": "2;3;3;3", "presentation": "2;4;4;3", "wc_summary": "33;62;278;149", "wc_strengths": "23;18;167;66", "wc_weaknesses": "121;82;119;68", "wc_questions": "37;70;92;111", "wc_limitations": "1;11;6;3", "wc_review": "215;243;662;397", "wc_reply_reviewers": "0;34;7;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 130.5, 95.25885785584457 ], "wc_strengths_avg": [ 68.5, 59.85190055461898 ], "wc_weaknesses_avg": [ 97.5, 23.04886114323222 ], "wc_questions_avg": [ 77.5, 27.518175811634027 ], "wc_limitations_avg": [ 5.25, 3.766629793329841 ], "wc_review_avg": [ 379.25, 177.34482653858274 ], "wc_reply_reviewers_avg": [ 13.0, 12.747548783981962 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12065479059350108048&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "tifr.res.in;;cs.umass.edu;umass.edu;cics.umass.edu", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Tata Institute of Fundamental Research;University of Massachusetts Amherst", "aff_unique_dep": ";", "aff_unique_url": "https://www.tifr.res.in;https://www.umass.edu", "aff_unique_abbr": "TIFR;UMass Amherst", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "India;United States" }, { "title": "RayDF: Neural Ray-surface Distance Fields with Multi-view Consistency", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71065", "id": "crZlhMnfeO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4f86833d5cc98ec32e470ef1c8cb82e3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=crZlhMnfeO", "openreview": "https://openreview.net/forum?id=crZlhMnfeO", "poster": "/media/PosterPDFs/NeurIPS%202023/71065.png?t=1702170053.2650142", "slides": "https://nips.cc/virtual/2023/poster/71065", "video": "https://nips.cc/virtual/2023/poster/71065", "author_site": "Zhuoman Liu, Bo Yang, Yan Luximon, Ajay Kumar, Jinxi Li", "tldr": "", "abstract": "In this paper, we study the problem of continuous 3D shape representations. The majority of existing successful methods are coordinate-based implicit neural representations. However, they are inefficient to render novel views or recover explicit surface points. A few works start to formulate 3D shapes as ray-based neural functions, but the learned structures are inferior due to the lack of multi-view geometry consistency. To tackle these challenges, we propose a new framework called RayDF. It consists of three major components: 1) the simple ray-surface distance field, 2) the novel dual-ray visibility classifier, and 3) a multi-view consistency optimization module to drive the learned ray-surface distances to be multi-view geometry consistent. We extensively evaluate our method on three public datasets, demonstrating remarkable performance in 3D surface point reconstruction on both synthetic and challenging real-world 3D scenes, clearly surpassing existing coordinate-based and ray-based baselines. Most notably, our method achieves a 1000x faster speed than coordinate-based methods to render an 800x800 depth image, showing the superiority of our method for 3D shape representation. Our code and data are available at https://github.com/vLAR-group/RayDF", "keywords": "implicit shape representations;multi-view consistency;novel view synthesis", "primary_area": "", "supplementary_material": "", "author": "Zhuoman Liu;Bo Yang;Yan Luximon;Ajay Kumar;Jinxi Li", "authorids": "~Zhuoman_Liu1;~Bo_Yang7;~Yan_Luximon1;~Ajay_Kumar2;~Jinxi_Li2", "gender": "F;M;;;M", "homepage": "https://zhuomanliu.tech/;https://yang7879.github.io/;;;", "dblp": "284/0962;46/999-27;;;198/4279,", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0002-2419-4140;;;", "linkedin": ";;;;jinxi-leo-li", "or_profile": "~Zhuoman_Liu1;~Bo_Yang7;~Yan_Luximon1;~Ajay_Kumar2;~Jinxi_Li2", "aff": "Hong Kong Polytechnic University;The Hong Kong Polytechnic University;;;Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;polyu.edu.hk;;;polyu.edu.hk", "position": "PhD student;Assistant Professor;;;PhD student", "bibtex": "@inproceedings{\nliu2023raydf,\ntitle={Ray{DF}: Neural Ray-surface Distance Fields with Multi-view Consistency},\nauthor={Zhuoman Liu and Bo Yang and Yan Luximon and Ajay Kumar and Jinxi Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=crZlhMnfeO}\n}", "github": "", "project": "", "reviewers": "fezd;MPar;4cJL;sym2", "pdf_size": 49393355, "rating": "4;6;6;7", "confidence": "3;4;3;5", "soundness": "3;4;3;3", "novelty": "2;4;2;3", "presentation": "3;4;3;4", "wc_summary": "59;56;120;122", "wc_strengths": "26;43;55;36", "wc_weaknesses": "97;128;98;248", "wc_questions": "2;32;18;143", "wc_limitations": "7;1;14;1", "wc_review": "191;260;305;550", "wc_reply_reviewers": "0;12;60;21", "wc_reply_authors": "83;15;158;16", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 89.25, 31.77558024647229 ], "wc_strengths_avg": [ 40.0, 10.559356040971437 ], "wc_weaknesses_avg": [ 142.75, 62.02973077484699 ], "wc_questions_avg": [ 48.75, 55.44084685500394 ], "wc_limitations_avg": [ 5.75, 5.356071321407137 ], "wc_review_avg": [ 326.5, 135.2747204765177 ], "wc_reply_reviewers_avg": [ 23.25, 22.487496525847426 ], "wc_reply_authors_avg": [ 68.0, 58.81751439834907 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7608859102526822, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11316074143646875527&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "polyu.edu.hk;polyu.edu.hk;;;polyu.edu.hk", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong Polytechnic University", "aff_unique_dep": "", "aff_unique_url": "https://www.polyu.edu.hk", "aff_unique_abbr": "PolyU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "AVOIDDS: Aircraft Vision-based Intruder Detection Dataset and Simulator", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73531", "id": "crbPFR2Hpv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/19a260641ebaf68d412f427e591bb74a-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=crbPFR2Hpv", "openreview": "https://openreview.net/forum?id=crbPFR2Hpv", "poster": "/media/PosterPDFs/NeurIPS%202023/73531.png?t=1701671308.9914758", "slides": "https://nips.cc/virtual/2023/poster/73531", "video": "https://nips.cc/virtual/2023/poster/73531", "author_site": "Elysia Smyers, Sydney Katz, Anthony Corso, Mykel J Kochenderfer", "tldr": "", "abstract": "Designing robust machine learning systems remains an open problem, and there is a need for benchmark problems that cover both environmental changes and evaluation on a downstream task. In this work, we introduce AVOIDDS, a realistic object detection benchmark for the vision-based aircraft detect-and-avoid problem. We provide a labeled dataset consisting of 72,000 photorealistic images of intruder aircraft with various lighting conditions, weather conditions, relative geometries, and geographic locations. We also provide an interface that evaluates trained models on slices of this dataset to identify changes in performance with respect to changing environmental conditions. Finally, we implement a fully-integrated, closed-loop simulator of the vision-based detect-and-avoid problem to evaluate trained models with respect to the downstream collision avoidance task. This benchmark will enable further research in the design of robust machine learning systems for use in safety-critical applications. The AVOIDDS dataset and code are publicly available at https://purl.stanford.edu/hj293cv5980 and https://github.com/sisl/VisionBasedAircraftDAA, respectively.", "keywords": "aviation;distribution shift;closed-loop evaluation;object detection", "primary_area": "", "supplementary_material": "/attachment/1575960b49039e916afd7bc2168239c791de5b2d.zip", "author": "Elysia Quinn Smyers;Sydney Michelle Katz;Anthony Corso;Mykel Kochenderfer", "authorids": "~Elysia_Quinn_Smyers1;~Sydney_Michelle_Katz1;~Anthony_Corso1;~Mykel_Kochenderfer1", "gender": "F;F;M;M", "homepage": "https://eqsmy.github.io/;https://sydneymkatz.com;http://anthonylcorso.com/;https://mykel.kochenderfer.com", "dblp": ";244/9782;154/0661;34/2029.html", "google_scholar": ";4U1XK1gAAAAJ;4BLu9kYAAAAJ;cAy9G6oAAAAJ", "orcid": ";;0000-0002-4027-0473;0000-0002-7238-9663", "linkedin": ";smkatz/;;mykel-kochenderfer", "or_profile": "~Elysia_Quinn_Smyers1;~Sydney_Michelle_Katz1;~Anthony_Corso1;~Mykel_Kochenderfer1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "Undergrad student;PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nsmyers2023avoidds,\ntitle={{AVOIDDS}: Aircraft Vision-based Intruder Detection Dataset and Simulator},\nauthor={Elysia Quinn Smyers and Sydney Michelle Katz and Anthony Corso and Mykel Kochenderfer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=crbPFR2Hpv}\n}", "github": "", "project": "", "reviewers": "s82Z;hmNh", "pdf_size": 9104140, "rating": "5;8", "confidence": "4;3", "wc_summary_and_contributions": "36;105", "wc_strengths": "27;148", "wc_improvement": "68;115", "wc_limitations": "31;83", "wc_correctness": "6;120", "wc_clarity": "1;8", "wc_relation_to_prior_work": "17;57", "wc_documentation": "1;150", "wc_additional_feedback": "1;1", "wc_review": "188;787", "wc_reply_reviewers": "97;47", "wc_reply_authors": "770;774", "reply_reviewers": "1;1", "reply_authors": "1;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 70.5, 34.5 ], "wc_strengths_avg": [ 87.5, 60.5 ], "wc_improvement_avg": [ 91.5, 23.5 ], "wc_limitations_avg": [ 57.0, 26.0 ], "wc_correctness_avg": [ 63.0, 57.0 ], "wc_clarity_avg": [ 4.5, 3.5 ], "wc_relation_to_prior_work_avg": [ 37.0, 20.0 ], "wc_documentation_avg": [ 75.5, 74.5 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 487.5, 299.5 ], "wc_reply_reviewers_avg": [ 72.0, 25.0 ], "wc_reply_authors_avg": [ 772.0, 2.0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14514863507436239929&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient RL with Impaired Observability: Learning to Act with Delayed and Missing State Observations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71064", "id": "csdEeUn0ve", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9156b0f6dfa9bbd18c79cc459ef5d61c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=csdEeUn0ve", "openreview": "https://openreview.net/forum?id=csdEeUn0ve", "poster": "/media/PosterPDFs/NeurIPS%202023/71064.png?t=1702248477.576339", "slides": "https://nips.cc/virtual/2023/poster/71064", "video": "https://nips.cc/virtual/2023/poster/71064", "author_site": "Minshuo Chen, Yu Bai, H. Vincent Poor, Mengdi Wang", "tldr": "", "abstract": "In real-world reinforcement learning (RL) systems, various forms of {\\it impaired observability} can complicate matters. These situations arise when an agent is unable to observe the most recent state of the system due to latency or lossy channels, yet the agent must still make real-time decisions. This paper introduces a theoretical investigation into efficient RL in control systems where agents must act with delayed and missing state observations. We establish near-optimal regret bounds, of the form $\\tilde{\\mathcal{O}}(\\sqrt{{\\rm poly}(H) SAK})$, for RL in both the delayed and missing observation settings. Despite impaired observability posing significant challenges to the policy class and planning, our results demonstrate that learning remains efficient, with the regret bound optimally depending on the state-action size of the original system. Additionally, we provide a characterization of the performance of the optimal policy under impaired observability, comparing it to the optimal value obtained with full observability.", "keywords": "Delayed and missing observations;MDPs;efficient regret bounds", "primary_area": "", "supplementary_material": "/attachment/513a8b56e57a47de5f96ae462cf6128736ef4c65.pdf", "author": "Minshuo Chen;Yu Bai;H. Vincent Poor;Mengdi Wang", "authorids": "~Minshuo_Chen1;~Yu_Bai1;~H._Vincent_Poor1;~Mengdi_Wang1", "gender": "M;;M;F", "homepage": "https://minshuochen.github.io;https://yubai.org;http://ee.princeton.edu/people/faculty/h-vincent-poor;http://mwang.princeton.edu", "dblp": "217/1509;03/6325-17.html;p/HVincentPoor;", "google_scholar": "qU9WvTgAAAAJ;owqhKD8AAAAJ;Dq93mOUAAAAJ;", "orcid": ";;;", "linkedin": ";;vince-poor-974a3/;", "or_profile": "~Minshuo_Chen1;~Yu_Bai1;~H._Vincent_Poor1;~Mengdi_Wang1", "aff": "Princeton University;Salesforce Research;Princeton University;Princeton University", "aff_domain": "princeton.edu;salesforce.com;princeton.edu;princeton.edu", "position": "Postdoc;Research Scientist;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023efficient,\ntitle={Efficient {RL} with Impaired Observability: Learning to Act with Delayed and Missing State Observations},\nauthor={Minshuo Chen and Yu Bai and H. Vincent Poor and Mengdi Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=csdEeUn0ve}\n}", "github": "", "project": "", "reviewers": "61zK;kTAd;Mu3C;yWj8", "pdf_size": 1130353, "rating": "5;5;6;7", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;2;2", "wc_summary": "50;72;72;108", "wc_strengths": "142;20;155;63", "wc_weaknesses": "260;35;143;92", "wc_questions": "85;30;12;160", "wc_limitations": "1;22;7;6", "wc_review": "538;179;389;429", "wc_reply_reviewers": "15;0;100;7", "wc_reply_authors": "0;0;438;0", "reply_reviewers": "1;0;2;1", "reply_authors": "1;1;3;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.5, 20.80264406271472 ], "wc_strengths_avg": [ 95.0, 55.80770556114989 ], "wc_weaknesses_avg": [ 132.5, 82.93521568067452 ], "wc_questions_avg": [ 71.75, 57.61239016045073 ], "wc_limitations_avg": [ 9.0, 7.842193570679061 ], "wc_review_avg": [ 383.75, 130.18328425723482 ], "wc_reply_reviewers_avg": [ 30.5, 40.475301110677364 ], "wc_reply_authors_avg": [ 109.5, 189.65956342879207 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14895093424685634963&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "princeton.edu;salesforce.com;princeton.edu;princeton.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Princeton University;Salesforce", "aff_unique_dep": ";Salesforce Research", "aff_unique_url": "https://www.princeton.edu;https://research.salesforce.com", "aff_unique_abbr": "Princeton;Salesforce", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Counterfactual Generation with Identifiability Guarantees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71063", "id": "cslnCXE9XA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/afda6bf3fb086eabbaf161ba1cec5a9a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cslnCXE9XA", "openreview": "https://openreview.net/forum?id=cslnCXE9XA", "poster": "/media/PosterPDFs/NeurIPS%202023/71063.png?t=1702490640.1291327", "slides": "https://nips.cc/virtual/2023/poster/71063", "video": "https://nips.cc/virtual/2023/poster/71063", "author_site": "Hanqi Yan, Lingjing Kong, Lin Gui, Yuejie Chi, Eric Xing, Yulan He, Kun Zhang", "tldr": "", "abstract": "Counterfactual generation lies at the core of various machine learning tasks, including image translation and controllable text generation. This generation process usually requires the identification of the disentangled latent representations, such as content and style, that underlie the observed data. However, it becomes more challenging when faced with a scarcity of paired data and labelling information. Existing disentangled methods crucially rely on oversimplified assumptions, such as assuming independent content and style variables, to identify the latent variables, even though such assumptions may not hold for complex data distributions. For instance, food reviews tend to involve words like \u201ctasty\u201d, whereas movie reviews commonly contain words such as \u201cthrilling\u201d for the same positive sentiment. This problem is exacerbated when data are sampled from multiple domains since the dependence between content and style may vary significantly over domains. In this work, we tackle the domain-varying dependence between the content and the style variables inherent in the counterfactual generation task. We provide identification guarantees for such latent-variable models by leveraging the relative sparsity of the influences from different latent variables. Our theoretical insights enable the development of a doMain AdapTive counTerfactual gEneration model, called (MATTE). Our theoretically grounded framework achieves state-of-the-art performance in unsupervised style transfer tasks, where neither paired data nor style labels are utilized, across four large-scale datasets.", "keywords": "Causal Representation Learning;Identifiability;Counterfactual Generation;Latent variable models;Disentanglement.", "primary_area": "", "supplementary_material": "/attachment/2de63e2575885154347bca512f592b9850c1c8b1.zip", "author": "Hanqi Yan;Lingjing Kong;Lin Gui;Yuejie Chi;Eric Xing;Yulan He;Kun Zhang", "authorids": "~Hanqi_Yan2;~Lingjing_Kong1;~Lin_Gui3;~Yuejie_Chi1;~Eric_Xing1;~Yulan_He1;~Kun_Zhang1", "gender": ";M;M;;M;F;M", "homepage": ";https://lingjing-kong.github.io/;;;http://www.cs.cmu.edu/~epxing/;https://www.kcl.ac.uk/people/yulan-he;http://www.andrew.cmu.edu/user/kunz1/", "dblp": ";158/1994-1.html;34/8605-3;;36/3855;75/5430;96/3115-1", "google_scholar": ";4hAlzvkAAAAJ;https://scholar.google.com.ph/citations?user=1b3Eyx4AAAAJ;;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ;https://scholar.google.co.uk/citations?user=SP9r32UAAAAJ;RGoypN4AAAAJ", "orcid": ";;;;;0000-0003-3948-5845;", "linkedin": ";;;;;yulan-he-277234a/?originalSubdomain=uk;", "or_profile": "~Hanqi_Yan2;~Lingjing_Kong1;~Lin_Gui3;~Yuejie_Chi1;~Eric_Xing1;~Yulan_He1;~Kun_Zhang1", "aff": ";Computer Science Department, School of Computer Science;King's College London, University of London;;School of Computer Science, Carnegie Mellon University;King's College London, University of London;Carnegie Mellon University", "aff_domain": ";csd.cs.cmu.edu;kcl.ac.uk;;cs.cmu.edu;kcl.ac.uk;cmu.edu", "position": ";PhD student;Lecturer;;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nyan2023counterfactual,\ntitle={Counterfactual Generation with Identifiability Guarantees},\nauthor={Hanqi Yan and Lingjing Kong and Lin Gui and Yuejie Chi and Eric Xing and Yulan He and Kun Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cslnCXE9XA}\n}", "github": "", "project": "", "reviewers": "zc3n;pLx5;1Pqp;pDz1;yXyx", "pdf_size": 1050213, "rating": "4;4;6;6;8", "confidence": "3;3;3;3;3", "soundness": "2;2;3;2;4", "novelty": "2;3;3;2;3", "presentation": "2;1;2;3;3", "wc_summary": "57;31;99;63;162", "wc_strengths": "36;41;32;47;40", "wc_weaknesses": "101;229;80;403;22", "wc_questions": "120;69;13;4;155", "wc_limitations": "1;28;5;58;41", "wc_review": "315;398;229;575;420", "wc_reply_reviewers": "36;48;32;24;21", "wc_reply_authors": "714;566;595;180;22", "reply_reviewers": "1;1;1;1;1", "reply_authors": "4;3;2;4;2", "rating_avg": [ 5.6, 1.4966629547095764 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 82.4, 45.332549012823 ], "wc_strengths_avg": [ 39.2, 5.035871324805669 ], "wc_weaknesses_avg": [ 167.0, 136.0073527424161 ], "wc_questions_avg": [ 72.2, 58.83332389046194 ], "wc_limitations_avg": [ 26.6, 21.527656630483495 ], "wc_review_avg": [ 387.4, 115.49995670994859 ], "wc_reply_reviewers_avg": [ 32.2, 9.558242516278817 ], "wc_reply_authors_avg": [ 415.4, 266.1861003132958 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.8944271909999159 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11873809553322710933&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 9, "email": ";csd.cs.cmu.edu;kcl.ac.uk;;cs.cmu.edu;kcl.ac.uk;cmu.edu", "author_num": 7, "aff_unique_index": "0;1;2;1;2", "aff_unique_norm": "School of Computer Science;King's College London;Carnegie Mellon University", "aff_unique_dep": "Computer Science Department;;School of Computer Science", "aff_unique_url": ";https://www.kcl.ac.uk;https://www.cmu.edu", "aff_unique_abbr": ";KCL;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "1;2;1;2", "aff_country_unique": ";United Kingdom;United States" }, { "title": "Demystifying Softmax Gating Function in Gaussian Mixture of Experts", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71062", "id": "cto6jIIbMZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0ef6ffcb85a2d238fc4761860c31ded4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cto6jIIbMZ", "openreview": "https://openreview.net/forum?id=cto6jIIbMZ", "poster": "/media/PosterPDFs/NeurIPS%202023/71062.png?t=1701119324.9575818", "slides": "https://nips.cc/virtual/2023/poster/71062", "video": "https://nips.cc/virtual/2023/poster/71062", "author_site": "Huy Nguyen, TrungTin Nguyen, Nhat Ho", "tldr": "", "abstract": "Understanding the parameter estimation of softmax gating Gaussian mixture of experts has remained a long-standing open problem in the literature. It is mainly due to three fundamental theoretical challenges associated with the softmax gating function: (i) the identifiability only up to the translation of parameters; (ii) the intrinsic interaction via partial differential equations between the softmax gating and the expert functions in the Gaussian density; (iii) the complex dependence between the numerator and denominator of the conditional density of softmax gating Gaussian mixture of experts. We resolve these challenges by proposing novel Voronoi loss functions among parameters and establishing the convergence rates of maximum likelihood estimator (MLE) for solving parameter estimation in these models. When the true number of experts is unknown and over-specified, our findings show a connection between the convergence rate of the MLE and a solvability problem of a system of polynomial equations.", "keywords": "Mixture of Experts;Maximum Likelihood Estimation;Voronoi Loss Function;Algebraic Geometry.", "primary_area": "", "supplementary_material": "", "author": "Huy Nguyen;TrungTin Nguyen;Nhat Ho", "authorids": "~Huy_Nguyen5;~TrungTin_Nguyen1;~Nhat_Ho1", "gender": "M;M;M", "homepage": "https://huynm99.github.io/;https://trung-tinnguyen.github.io/;https://nhatptnk8912.github.io/", "dblp": "48/6075;275/3643;203/4479", "google_scholar": "_YYwzhQAAAAJ;NhiJDJsAAAAJ;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ", "orcid": ";0000-0001-8433-5980;", "linkedin": "huy-nguyen-081199/;trungtinnguyen0/;nhat-pham-minh-ho-267b8164/", "or_profile": "~Huy_Nguyen5;~TrungTin_Nguyen1;~Nhat_Ho1", "aff": "University of Texas at Austin;INRIA;University of Texas, Austin", "aff_domain": "utexas.edu;inria.fr;utexas.edu", "position": "PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2023demystifying,\ntitle={Demystifying Softmax Gating Function in Gaussian Mixture of Experts},\nauthor={Huy Nguyen and TrungTin Nguyen and Nhat Ho},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cto6jIIbMZ}\n}", "github": "", "project": "", "reviewers": "q5b3;8L63;sK4C;txMC;PaPA", "pdf_size": 615902, "rating": "6;7;7;7;7", "confidence": "2;2;2;2;3", "soundness": "3;3;3;4;3", "novelty": "3;3;3;2;3", "presentation": "2;3;4;3;2", "wc_summary": "84;93;47;40;192", "wc_strengths": "88;136;47;212;189", "wc_weaknesses": "71;117;40;216;193", "wc_questions": "121;1;20;28;211", "wc_limitations": "1;1;1;18;1", "wc_review": "365;348;155;514;786", "wc_reply_reviewers": "0;16;0;21;8", "wc_reply_authors": "0;24;0;29;24", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;2;1;2;2", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 2.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 91.2, 54.38896946992101 ], "wc_strengths_avg": [ 134.4, 61.314272400477854 ], "wc_weaknesses_avg": [ 127.4, 67.94291721732296 ], "wc_questions_avg": [ 76.2, 79.13886529386178 ], "wc_limitations_avg": [ 4.4, 6.8 ], "wc_review_avg": [ 433.6, 209.90531198614292 ], "wc_reply_reviewers_avg": [ 9.0, 8.438009243891594 ], "wc_reply_authors_avg": [ 15.4, 12.705904139414873 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.2500000000000001, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11246217382401269725&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "utexas.edu;inria.fr;utexas.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Texas at Austin;INRIA", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.inria.fr", "aff_unique_abbr": "UT Austin;INRIA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;France" }, { "title": "Object Reprojection Error (ORE): Camera pose benchmarks from lightweight tracking annotations", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73530", "id": "cuheT1BAp4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eb206443c93d07da8b1974b768d8a0d4-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=cuheT1BAp4", "openreview": "https://openreview.net/forum?id=cuheT1BAp4", "poster": "/media/PosterPDFs/NeurIPS%202023/73530.png?t=1699581575.724324", "slides": "https://nips.cc/virtual/2023/poster/73530", "video": "https://nips.cc/virtual/2023/poster/73530", "author_site": "Xingyu Chen, Weiyao Wang, Hao Tang, Matt Feiszli", "tldr": "", "abstract": "3D spatial understanding is highly valuable in the context of semantic modeling of environments, agents, and their relationships. Semantic modeling approaches employed on monocular video often ingest outputs from off-the-shelf SLAM/SfM pipelines, which are anecdotally observed to perform poorly or fail completely on some fraction of the videos of interest. These target videos may vary widely in complexity of scenes, activities, camera trajectory, etc. Unfortunately, such semantically-rich video data often comes with no ground-truth 3D information, and in practice it is prohibitively costly or impossible to obtain ground truth reconstructions or camera pose post-hoc. \n\nThis paper proposes a novel evaluation protocol, Object Reprojection Error (ORE) to benchmark camera trajectories; ORE computes reprojection error for static objects within the video and requires only lightweight object tracklet annotations. These annotations are easy to gather on new or existing video, enabling ORE to be calculated on essentially arbitrary datasets. We show that ORE maintains high rank correlation with standard metrics based on groundtruth. Leveraging ORE, we source videos and annotations from Ego4D-EgoTracks, resulting in EgoStatic, a large-scale diverse dataset for evaluating camera trajectories in-the-wild.", "keywords": "egocentric vision; camera trajectory evaluation; semantic SLAM; 3D pose;", "primary_area": "", "supplementary_material": "/attachment/5b14331c4d5028090d8bc3d851bf2484d8232980.zip", "author": "Xingyu Chen;Weiyao Wang;Hao Tang;Matt Feiszli", "authorids": "~Xingyu_Chen1;~Weiyao_Wang1;~Hao_Tang14;~Matt_Feiszli1", "gender": "M;M;;M", "homepage": ";https://research.fb.com/people/wang-weiyao/;https://tanghaotommy.github.io/;", "dblp": ";206/6183-1;;182/8255", "google_scholar": "gjSHr6YAAAAJ;;2X3D1-4AAAAJ;A-wA73gAAAAJ", "orcid": ";;;", "linkedin": "xingyu-chen-029865b3/;;;matt-feiszli-76b34b/", "or_profile": "~Xingyu_Chen1;~Weiyao_Wang1;~Hao_Tang14;~Matt_Feiszli1", "aff": "Facebook AI;Meta Facebook;Meta Platforms;Meta AI", "aff_domain": "facebook.com;meta.com;meta.com;fb.com", "position": "Software Engineer;Researcher;Researcher;Research Scientist", "bibtex": "@inproceedings{\nchen2023object,\ntitle={Object Reprojection Error ({ORE}): Camera pose benchmarks from lightweight tracking annotations},\nauthor={Xingyu Chen and Weiyao Wang and Hao Tang and Matt Feiszli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=cuheT1BAp4}\n}", "github": "", "project": "", "reviewers": "u9zJ;2WCT;C25m;i2aG;U9Jy", "pdf_size": 5528039, "rating": "6;6;7;7;8", "confidence": "5;4;3;4;4", "wc_summary_and_contributions": "109;162;42;128;108", "wc_strengths": "53;121;34;57;31", "wc_improvement": "160;186;40;176;63", "wc_limitations": "3;16;51;65;5", "wc_correctness": "10;55;25;7;4", "wc_clarity": "3;234;1;10;5", "wc_relation_to_prior_work": "6;1;1;1;1", "wc_documentation": "3;84;15;1;2", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "348;860;210;446;220", "wc_reply_reviewers": "40;0;6;0;32", "wc_reply_authors": "677;902;431;740;217", "reply_reviewers": "2;0;1;0;1", "reply_authors": "2;2;1;1;2", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 109.8, 39.13259510944808 ], "wc_strengths_avg": [ 59.2, 32.535519052260405 ], "wc_improvement_avg": [ 125.0, 61.01803012225157 ], "wc_limitations_avg": [ 28.0, 25.282404948896772 ], "wc_correctness_avg": [ 20.2, 18.840382161729096 ], "wc_clarity_avg": [ 50.6, 91.74878745792776 ], "wc_relation_to_prior_work_avg": [ 2.0, 2.0 ], "wc_documentation_avg": [ 21.0, 31.906112267087632 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 416.8, 238.12131361975977 ], "wc_reply_reviewers_avg": [ 15.6, 16.98940846527624 ], "wc_reply_authors_avg": [ 593.4, 241.5637390007035 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IqAoNJOrQvkJ:scholar.google.com/&scioq=Object+Reprojection+Error+(ORE):+Camera+pose+benchmarks+from+lightweight+tracking+annotations&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "facebook.com;meta.com;meta.com;fb.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Facebook AI", "aff_unique_url": "https://www.facebook.com", "aff_unique_abbr": "Facebook AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "On the Learnability of Multilabel Ranking", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71061", "id": "cwBeRBe9hq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2786baf8091ee8ecb060580239967ba0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cwBeRBe9hq", "openreview": "https://openreview.net/forum?id=cwBeRBe9hq", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71061", "video": "https://nips.cc/virtual/2023/poster/71061", "author_site": "Vinod Raman, UNIQUE SUBEDI, Ambuj Tewari", "tldr": "", "abstract": "Multilabel ranking is a central task in machine learning. However, the most fundamental question of learnability in a multilabel ranking setting with relevance-score feedback remains unanswered. In this work, we characterize the learnability of multilabel ranking problems in both batch and online settings for a large family of ranking losses. Along the way, we give two equivalence classes of ranking losses based on learnability that capture most losses used in practice.", "keywords": "Multilabel Ranking;PAC Learning;Online Learning", "primary_area": "", "supplementary_material": "/attachment/91e852a627cc0c42b229f092c49e340f07da1683.pdf", "author": "Vinod Raman;UNIQUE SUBEDI;Ambuj Tewari", "authorids": "~Vinod_Raman1;~UNIQUE_SUBEDI1;~Ambuj_Tewari1", "gender": "M;M;M", "homepage": "https://vinodkraman.github.io;https://unique-subedi.github.io/;https://www.ambujtewari.com", "dblp": "126/5382;;24/567", "google_scholar": "Wn5QzOgAAAAJ;DO16ipsAAAAJ;ttbl4FsAAAAJ", "orcid": ";;0000-0001-6969-7844", "linkedin": ";;", "or_profile": "~Vinod_Raman1;~UNIQUE_SUBEDI1;~Ambuj_Tewari1", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;umich.edu;umich.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nraman2023on,\ntitle={On the Learnability of Multilabel Ranking},\nauthor={Vinod Raman and UNIQUE SUBEDI and Ambuj Tewari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cwBeRBe9hq}\n}", "github": "", "project": "", "reviewers": "6zYz;edyo;euAG;ejP3", "pdf_size": 574929, "rating": "6;6;7;8", "confidence": "2;3;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;3;3", "wc_summary": "67;84;271;39", "wc_strengths": "48;40;32;83", "wc_weaknesses": "12;182;373;10", "wc_questions": "7;29;5;1", "wc_limitations": "12;91;35;1", "wc_review": "146;426;716;134", "wc_reply_reviewers": "0;75;118;0", "wc_reply_authors": "0;0;434;0", "reply_reviewers": "0;1;2;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 115.25, 91.34652429074683 ], "wc_strengths_avg": [ 50.75, 19.45989465541887 ], "wc_weaknesses_avg": [ 144.25, 149.38603515723952 ], "wc_questions_avg": [ 10.5, 10.897247358851684 ], "wc_limitations_avg": [ 34.75, 34.71581051912803 ], "wc_review_avg": [ 355.5, 238.6854624814842 ], "wc_reply_reviewers_avg": [ 48.25, 50.588412704887276 ], "wc_reply_authors_avg": [ 108.5, 187.92751262122317 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3ieuap7IbBcJ:scholar.google.com/&scioq=On+the+Learnability+of+Multilabel+Ranking&hl=en&as_sdt=0,5", "gs_version_total": 8, "email": "umich.edu;umich.edu;umich.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "GPT4Tools: Teaching Large Language Model to Use Tools via Self-instruction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71060", "id": "cwjh8lqmOL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e393677793767624f2821cec8bdd02f1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cwjh8lqmOL", "openreview": "https://openreview.net/forum?id=cwjh8lqmOL", "poster": "/media/PosterPDFs/NeurIPS%202023/71060.png?t=1699451558.6983144", "slides": "https://nips.cc/virtual/2023/poster/71060", "video": "https://nips.cc/virtual/2023/poster/71060", "author_site": "Rui Yang, Lin Song, Yanwei Li, Sijie Zhao, Yixiao Ge, Xiu Li, Ying Shan", "tldr": "", "abstract": "This paper aims to efficiently enable Large Language Models (LLMs) to use multi-modal tools.\nThe advanced proprietary LLMs, such as ChatGPT and GPT-4, have shown great potential for tool usage through sophisticated prompt engineering.\nNevertheless, these models typically rely on prohibitive computational costs and publicly inaccessible data.\nTo address these challenges, we propose the GPT4Tools based on self-instruct to enable open-source LLMs, such as LLaMA and OPT, to use tools.\nIt generates an instruction-following dataset by prompting an advanced teacher with various multi-modal contexts.\nBy using the Low-Rank Adaptation (LoRA) optimization, our approach facilitates the open-source LLMs to solve a range of visual problems, including visual comprehension and image generation.\nMoreover, we provide a benchmark to evaluate the ability of LLMs to use tools, which is performed in both zero-shot and fine-tuning ways.\nExtensive experiments demonstrate the effectiveness of our method on various language models, which not only significantly improves the accuracy of invoking seen tools, but also enables the zero-shot capacity for unseen tools.", "keywords": "multimodality;foundation models;tool usage", "primary_area": "", "supplementary_material": "/attachment/f289a34a137e6fb5bc5bf64a858adc21a90d3721.pdf", "author": "Rui Yang;Lin Song;Yanwei Li;Sijie Zhao;Yixiao Ge;Xiu Li;Ying Shan", "authorids": "~Rui_Yang15;~Lin_Song2;~Yanwei_Li1;~Sijie_Zhao2;~Yixiao_Ge2;~Xiu_Li1;~Ying_Shan2", "gender": "M;M;;M;F;F;M", "homepage": ";https://linsong.cc;;https://sijeh.github.io/;https://geyixiao.com/;https://thusigsiclab.github.io/thu.github.io/introduction.html;", "dblp": ";;;300/5422;228/6649;13/1206-1;68/5910", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;6Ra2TgQAAAAJ;;tZ3dS3MAAAAJ;TtU74NAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;4oXBp9UAAAAJ", "orcid": ";;;;;0000-0003-0403-1923;0000-0001-7673-8325", "linkedin": ";\u6797-\u5b8b-9520a5183/;;;;;YingShanProfile/", "or_profile": "~Rui_Yang15;~Lin_Song2;~Yanwei_Li1;~Sijie_Zhao2;~Yixiao_Ge2;~Xiu_Li1;~Ying_Shan2", "aff": "Tsinghua University;Tencent AI Lab;;Tencent AI Lab;Tencent;Tsinghua University;Tencent PCG ARC Lab", "aff_domain": "mails.tsinghua.edu.cn;tencent.com;;tencent.com;tencent.com;tsinghua.edu.cn;arc.tencent.com", "position": "MS student;Researcher;;Researcher;Researcher;Professor;Director", "bibtex": "@inproceedings{\nyang2023gpttools,\ntitle={{GPT}4Tools: Teaching Large Language Model to Use Tools via Self-instruction},\nauthor={Rui Yang and Lin Song and Yanwei Li and Sijie Zhao and Yixiao Ge and Xiu Li and Ying Shan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cwjh8lqmOL}\n}", "github": "", "project": "", "reviewers": "F8mq;MACc;VYCC;witA", "pdf_size": 872249, "rating": "5;5;6;6", "confidence": "4;4;3;4", "soundness": "3;2;3;3", "novelty": "2;4;3;3", "presentation": "3;2;3;2", "wc_summary": "104;221;43;122", "wc_strengths": "54;32;89;85", "wc_weaknesses": "215;229;92;187", "wc_questions": "63;54;4;45", "wc_limitations": "1;38;4;3", "wc_review": "437;574;232;442", "wc_reply_reviewers": "75;59;0;41", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 122.5, 63.96287986011887 ], "wc_strengths_avg": [ 65.0, 23.37733945512192 ], "wc_weaknesses_avg": [ 180.75, 53.424596395293435 ], "wc_questions_avg": [ 41.5, 22.566568192793515 ], "wc_limitations_avg": [ 11.5, 15.337861650177967 ], "wc_review_avg": [ 421.25, 122.29753676996116 ], "wc_reply_reviewers_avg": [ 43.75, 27.976552682559014 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 213, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9581017018447399972&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "mails.tsinghua.edu.cn;tencent.com;;tencent.com;tencent.com;tsinghua.edu.cn;arc.tencent.com", "author_num": 7, "aff_unique_index": "0;1;1;1;0;1", "aff_unique_norm": "Tsinghua University;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.tsinghua.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "THU;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "cx8lw7WXY4", "title": "ITEM3D: Illumination-Aware Directional Texture Editing for 3D Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Texture editing is a crucial task in 3D modeling that allows users to automatically manipulate the surface properties of 3D models. However, \nthe inherent complexity of 3D models and the ambiguous text description lead to the challenge in this task. \nTo address this challenge, we propose ITEM3D, an illumination-aware model for automatically 3D object editing according to the text prompts. \nLeveraging the power of diffusion model, ITEM3D takes the rendered images as the bridge of text and 3D representation and further optimizes the disentangled texture and environment map.\nPrevious methods adopt the absolute editing direction namely score distillation sampling (SDS) as the optimization objective, which unfortunately results in the noisy appearance and text inconsistency.\nTo solve the problem caused by the ambiguous text, we introduce a relative editing direction, a optimization objective defined by the noise difference between the source and target texts, to release the semantic ambiguity between the texts and images.\nAdditionally, we gradually adjust the direction during optimization to further address the unexpected deviation in texture domain. \nQualitative and quantitative experiments show that our ITEM3D outperforms SDS-based methods on various 3D objects. We also perform text-guided relighting to show the explicit control over lighting.", "keywords": "Texture editing;diffusion model;relative direction;direction adjustment;relighting", "primary_area": "", "supplementary_material": "/attachment/b01fc5306fef9a05fabb5bd36da58dacef0766c7.zip", "author": "Shengqi Liu;Zhuo Chen;Jingnan Gao;Yichao Yan;Wenhan Zhu;XIAOBO LI;KE GAO;Xiaokang Yang", "authorids": "~Shengqi_Liu1;~Zhuo_Chen11;~Jingnan_Gao1;~Yichao_Yan1;~Wenhan_Zhu1;~XIAOBO_LI4;~KE_GAO2;~Xiaokang_Yang1", "gender": "M;M;M;M;M;F;M;M", "homepage": "https://github.com/LSQsjtu;;https://g-1nonly.github.io;https://daodaofr.github.io/;;;https://icne.sjtu.edu.cn/info/1064/1078.htm;", "dblp": "195/9149;;;185/7881;;81/2423.html;06/3071-1.html;l/XiaoboLi-1", "google_scholar": ";;PyqKZDIAAAAJ;ZPHMMRkAAAAJ;;;yDEavdMAAAAJ;", "orcid": "0009-0004-4596-5524;;0000-0001-6688-8418;0000-0003-3209-8965;0000-0001-8781-1110;;0000-0003-4029-3322;0000-0002-8074-0230", "linkedin": "shengqi-liu-3270aa209/;;;;;;;\u6653\u6ce2-\u674e-7b64271a3", "or_profile": "~Shengqi_Liu1;~Zhuo_Chen11;~Jingnan_Gao1;~Yichao_Yan1;~Wenhan_Zhu1;~KE_GAO2;~Xiaokang_Yang1;~LI_XIAOBO1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Alibaba Group;Shanghai Jiaotong University;Ant Group", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;alibaba-inc.com;sjtu.edu.cn;antgroup.com", "position": "Undergrad student;PhD student;Undergrad student;Assistant Professor;Postdoc;Researcher;Full Professor;Researcher", "bibtex": "@misc{\nliu2023itemd,\ntitle={{ITEM}3D: Illumination-Aware Directional Texture Editing for 3D Models},\nauthor={Shengqi Liu and Zhuo Chen and Jingnan Gao and Yichao Yan and Wenhan Zhu and XIAOBO LI and KE GAO and Xiaokang Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=cx8lw7WXY4}\n}", "github": "", "project": "", "reviewers": "67rK;CxP2;4exv;pR6T;1P22", "site": "https://openreview.net/forum?id=cx8lw7WXY4", "pdf_size": 4271623, "rating": "3;4;5;6;7", "confidence": "4;5;3;4;5", "soundness": "2;2;2;3;2", "novelty": "2;2;2;3;3", "presentation": "3;2;2;4;2", "wc_summary": "103;76;117;162;99", "wc_strengths": "38;49;179;102;57", "wc_weaknesses": "358;246;197;233;81", "wc_questions": "80;31;96;5;135", "wc_limitations": "15;6;4;30;10", "wc_review": "594;408;593;532;382", "wc_reply_reviewers": "208;184;16;15;198", "wc_reply_authors": "731;672;33;24;320", "reply_reviewers": "1;1;1;1;2", "reply_authors": "2;2;2;2;3", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 111.4, 28.52788109902311 ], "wc_strengths_avg": [ 85.0, 51.79575272162767 ], "wc_weaknesses_avg": [ 223.0, 89.09994388325954 ], "wc_questions_avg": [ 69.4, 46.35773937542684 ], "wc_limitations_avg": [ 13.0, 9.2951600308978 ], "wc_review_avg": [ 501.8, 90.42212118724046 ], "wc_reply_reviewers_avg": [ 124.2, 89.0806376268154 ], "wc_reply_authors_avg": [ 356.0, 302.1026315674857 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.18898223650461363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=601352690899705870&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;1;0;2", "aff_unique_norm": "Shanghai Jiao Tong University;Alibaba Group;Ant Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.alibaba.com;https://www.antgroup.com", "aff_unique_abbr": "SJTU;Alibaba;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Privacy Assessment on Reconstructed Images: Are Existing Evaluation Metrics Faithful to Human Perception?", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71059", "id": "cx9a4Xvb3l", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2082273791021571c410f41d565d0b45-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cx9a4Xvb3l", "openreview": "https://openreview.net/forum?id=cx9a4Xvb3l", "poster": "/media/PosterPDFs/NeurIPS%202023/71059.png?t=1701842895.7292511", "slides": "https://nips.cc/virtual/2023/poster/71059", "video": "https://nips.cc/virtual/2023/poster/71059", "author_site": "Xiaoxiao Sun, Nidham Gazagnadou, Vivek Sharma, Lingjuan Lyu, Hongdong Li, Liang Zheng", "tldr": "", "abstract": "Hand-crafted image quality metrics, such as PSNR and SSIM, are commonly used to evaluate model privacy risk under reconstruction attacks. Under these metrics, reconstructed images that are determined to resemble the original one generally indicate more privacy leakage. Images determined as overall dissimilar, on the other hand, indicate higher robustness against attack. However, there is no guarantee that these metrics well reflect human opinions, which offers trustworthy judgement for model privacy leakage. In this paper, we comprehensively study the faithfulness of these hand-crafted metrics to human perception of privacy information from the reconstructed images. On 5 datasets ranging from natural images, faces, to fine-grained classes, we use 4 existing attack methods to reconstruct images from many different classification models and, for each reconstructed image, we ask multiple human annotators to assess whether this image is recognizable. Our studies reveal that the hand-crafted metrics only have a weak correlation with the human evaluation of privacy leakage and that even these metrics themselves often contradict each other. These observations suggest risks of current metrics in the community. To address this potential risk, we propose a learning-based measure called SemSim to evaluate the Semantic Similarity between the original and reconstructed images. SemSim is trained with a standard triplet loss, using an original image as an anchor, one of its recognizable reconstructed images as a positive sample, and an unrecognizable one as a negative. By training on human annotations, SemSim exhibits a greater reflection of privacy leakage on the semantic level. We show that SemSim has a significantly higher correlation with human judgment compared with existing metrics. Moreover, this strong correlation generalizes to unseen datasets, models and attack methods. We envision this work as a milestone for image quality evaluation closer to the human level. The project webpage can be accessed at https://sites.google.com/view/semsim.", "keywords": "Privacy Assessment;Reconstructed Images;Evaluation Metrics;Human Perception", "primary_area": "", "supplementary_material": "/attachment/8f8f659da2e6ec7e88e8614489dffc482dc35318.pdf", "author": "Xiaoxiao Sun;Nidham Gazagnadou;Vivek Sharma;Lingjuan Lyu;Hongdong Li;Liang Zheng", "authorids": "~Xiaoxiao_Sun1;~Nidham_Gazagnadou1;~Vivek_Sharma1;~Lingjuan_Lyu1;~Hongdong_Li1;~Liang_Zheng4", "gender": "F;M;M;F;M;M", "homepage": "https://xiaoxiaosun.com/;https://ngazagna.github.io/;https://vivoutlaw.github.io/;https://sites.google.com/view/lingjuan-lyu;http://users.cecs.anu.edu.au/~hongdong/;http://zheng-lab.cecs.anu.edu.au/", "dblp": "185/7856-2;236/5973;;178/9876;59/4859.html;61/7360-1", "google_scholar": "1oCrd64AAAAJ;;fNbVXwQAAAAJ;;https://scholar.google.com.tw/citations?hl=en;https://scholar.google.com.au/citations?user=vNHqr3oAAAAJ", "orcid": "0000-0002-6944-7914;;;;;", "linkedin": ";nidham-gazagnadou-b06415a7/;vivoutlaw/;;;liang-zheng-76341311a/", "or_profile": "~Xiaoxiao_Sun1;~Nidham_Gazagnadou1;~Vivek_Sharma1;~Lingjuan_Lyu1;~Hongdong_Li1;~Liang_Zheng4", "aff": "Australian National University;Sony AI;Sony Research;Sony;Australian National University;Australian National University", "aff_domain": "anu.edu.au;sony.com;sony.com;sony.com;anu.edu.au;anu.edu.au", "position": "PhD student;Researcher;Senior Research Scientist;scientist;Full Professor;Senior Lecturer", "bibtex": "@inproceedings{\nsun2023privacy,\ntitle={Privacy Assessment on Reconstructed Images: Are Existing Evaluation Metrics Faithful to Human Perception?},\nauthor={Xiaoxiao Sun and Nidham Gazagnadou and Vivek Sharma and Lingjuan Lyu and Hongdong Li and Liang Zheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cx9a4Xvb3l}\n}", "github": "", "project": "", "reviewers": "Lywc;AN5f;j2WQ;oRwe", "pdf_size": 3716289, "rating": "6;6;7;8", "confidence": "4;3;4;5", "soundness": "3;3;4;4", "novelty": "3;2;3;4", "presentation": "3;3;3;3", "wc_summary": "67;52;106;41", "wc_strengths": "34;29;150;94", "wc_weaknesses": "87;67;107;5", "wc_questions": "182;42;5;113", "wc_limitations": "6;13;12;107", "wc_review": "376;203;380;360", "wc_reply_reviewers": "25;0;12;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 66.5, 24.60182920028509 ], "wc_strengths_avg": [ 76.75, 49.423552077931426 ], "wc_weaknesses_avg": [ 66.5, 38.21975928757271 ], "wc_questions_avg": [ 85.5, 67.89882178653765 ], "wc_limitations_avg": [ 34.5, 41.9434142625514 ], "wc_review_avg": [ 329.75, 73.5607741938596 ], "wc_reply_reviewers_avg": [ 9.25, 10.328964130056798 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12131464158985629083&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "anu.edu.au;sony.com;sony.com;sony.com;anu.edu.au;anu.edu.au", "author_num": 6, "aff_unique_index": "0;1;1;2;0;0", "aff_unique_norm": "Australian National University;Sony;Sony Corporation", "aff_unique_dep": ";Sony AI;", "aff_unique_url": "https://www.anu.edu.au;https://www.sony.com;https://www.sony.com", "aff_unique_abbr": "ANU;Sony AI;Sony", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0;0", "aff_country_unique": "Australia;Japan" }, { "title": "Efficient Neural Music Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71058", "id": "cxazQGSsQa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/38b23e2328096520e9c889ae03e372c9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=cxazQGSsQa", "openreview": "https://openreview.net/forum?id=cxazQGSsQa", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71058", "video": "https://nips.cc/virtual/2023/poster/71058", "author_site": "Max W. Y. Lam, Qiao Tian, Tang Li, Zongyu Yin, Siyuan Feng, Ming Tu, Yuliang Ji, Rui Xia, Mingbo Ma, Xuchen Song, Jitong Chen, Wang Yuping, Yuxuan Wang", "tldr": "", "abstract": "Recent progress in music generation has been remarkably advanced by the state-of-the-art MusicLM, which comprises a hierarchy of three LMs, respectively, for semantic, coarse acoustic, and fine acoustic modelings. Yet, sampling with the MusicLM requires processing through these LMs one by one to obtain the fine-grained acoustic tokens, making it computationally expensive and prohibitive for a real-time generation. Efficient music generation with a quality on par with MusicLM remains a significant challenge.\nIn this paper, we present **M**e**L**o**D**y (**M** for music; **L** for LM; **D** for diffusion), an LM-guided diffusion model that generates music audios of state-of-the-art quality meanwhile reducing 95.7\\% to 99.6\\% forward passes in MusicLM, respectively, for sampling 10s to 30s music. MeLoDy inherits the highest-level LM from MusicLM for semantic modeling, and applies a novel dual-path diffusion (DPD) model and an audio VAE-GAN to efficiently decode the conditioning semantic tokens into waveform. DPD is proposed to simultaneously model the coarse and fine acoustics by incorporating the semantic information into segments of latents effectively via cross-attention at each denoising step. Our experimental results suggest the superiority of MeLoDy, not only in its practical advantages on sampling speed and infinitely continuable generation, but also in its state-of-the-art musicality, audio quality, and text correlation.\n\nOur samples are available at https://Efficient-MeLoDy.github.io/.", "keywords": "Music Generation;Language Model;Diffusion Model;MusicLM", "primary_area": "", "supplementary_material": "/attachment/30ab751c92e86427ea6ad351d48731f792c65480.pdf", "author": "Max W. Y. Lam;Qiao Tian;Tang Li;Zongyu Yin;Siyuan Feng;Ming Tu;Yuliang Ji;Rui Xia;Mingbo Ma;Xuchen Song;Jitong Chen;Yuping Wang;Yuxuan Wang", "authorids": "~Max_W._Y._Lam1;~Qiao_Tian1;litang.frank@bytedance.com;zongyu.yin@bytedance.com;fengsiyuan.ee@bytedance.com;~Ming_Tu1;~Yuliang_Ji1;~Rui_Xia5;mingbo.ma@bytedance.com;~Xuchen_Song1;~Jitong_Chen1;~Yuping_Wang3;~Yuxuan_Wang1", "gender": "M;M;;;;M;;F;;M;M;;M", "homepage": ";https://scholar.google.com/citations?user=PMH1tnEAAAAJ&hl=en;;;;;;;;https://scholar.google.com/citations?user=pLw0e78AAAAJ&hl=en&authuser=1;http://jitongchen.com;;", "dblp": "200/9096;206/9465-1.html;;;;148/3800;;;;;49/8730;;", "google_scholar": "R0E0bKkAAAAJ;PMH1tnEAAAAJ;;;;5BusdUwAAAAJ;;26oErxwAAAAJ;;;3cUlApYAAAAJ;;3RaOfJkAAAAJ", "orcid": ";;;;;;;;;;0000-0001-6084-043X;;", "linkedin": "maxingaussian/;;;;;tuming/;;;;;jitong-chen-6214b172;;", "or_profile": "~Max_W._Y._Lam1;~Qiao_Tian1;litang.frank@bytedance.com;zongyu.yin@bytedance.com;fengsiyuan.ee@bytedance.com;~Ming_Tu1;~Yuliang_Ji1;~Rui_Xia5;mingbo.ma@bytedance.com;~Xuchen_Song1;~Jitong_Chen1;~Yuping_Wang3;~Yuxuan_Wang1", "aff": "Speech, Audio & Music Intelligence (SAMI), ByteDance;ByteDance;;;;;;;;ByteDance;ByteDance;;ByteDance", "aff_domain": "bytedance.com;bytedance.com;;;;;;;;bytedance.com;bytedance.com;;bytedance.com", "position": "Principal Researcher;Researcher;;;;;;;;Researcher;Research Scientist;;Researcher", "bibtex": "@inproceedings{\nlam2023efficient,\ntitle={Efficient Neural Music Generation},\nauthor={Max W. Y. Lam and Qiao Tian and Tang Li and Zongyu Yin and Siyuan Feng and Ming Tu and Yuliang Ji and Rui Xia and Mingbo Ma and Xuchen Song and Jitong Chen and Yuping Wang and Yuxuan Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=cxazQGSsQa}\n}", "github": "", "project": "", "reviewers": "KL1t;cbjn;4HjZ;hBX7;ieEr", "pdf_size": 770947, "rating": "4;4;6;6;7", "confidence": "4;3;5;3;4", "soundness": "2;2;3;3;3", "novelty": "3;3;3;3;3", "presentation": "2;2;2;2;3", "wc_summary": "86;99;204;82;159", "wc_strengths": "62;66;71;55;51", "wc_weaknesses": "239;312;252;36;216", "wc_questions": "1;267;112;13;67", "wc_limitations": "1;43;39;14;13", "wc_review": "389;787;678;200;506", "wc_reply_reviewers": "0;199;134;0;21", "wc_reply_authors": "0;743;210;0;0", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;2;2;1;1", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 126.0, 47.82886157959439 ], "wc_strengths_avg": [ 61.0, 7.238784428341543 ], "wc_weaknesses_avg": [ 211.0, 93.07631277613011 ], "wc_questions_avg": [ 92.0, 96.09578554754626 ], "wc_limitations_avg": [ 22.0, 16.2234398325386 ], "wc_review_avg": [ 512.0, 207.6968945362448 ], "wc_reply_reviewers_avg": [ 70.8, 81.16008871360356 ], "wc_reply_authors_avg": [ 190.6, 287.926101630262 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.31180478223116187, "gs_citation": 73, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13987278157292993267&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 7, "email": "bytedance.com;bytedance.com;;;;;;;;bytedance.com;bytedance.com;;bytedance.com", "author_num": 13, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "ByteDance", "aff_unique_dep": "Speech, Audio & Music Intelligence (SAMI)", "aff_unique_url": "https://www.bytedance.com", "aff_unique_abbr": "ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Exploring Diverse In-Context Configurations for Image Captioning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71057", "id": "czwZnNf60r", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/804b5e300c9ed4e3ea3b073f186f4adc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=czwZnNf60r", "openreview": "https://openreview.net/forum?id=czwZnNf60r", "poster": "/media/PosterPDFs/NeurIPS%202023/71057.png?t=1698673445.044633", "slides": "https://nips.cc/virtual/2023/poster/71057", "video": "https://nips.cc/virtual/2023/poster/71057", "author_site": "Xu Yang, Yongliang Wu, Mingzhuo Yang, Haokun Chen, Xin Geng", "tldr": "", "abstract": "After discovering that Language Models (LMs) can be good in-context few-shot learners, numerous strategies have been proposed to optimize in-context sequence configurations. Recently, researchers in Vision-Language (VL) domains also develop their few-shot learners, while they only use the simplest way, \\ie, randomly sampling, to configure in-context image-text pairs. In order to explore the effects of varying configurations on VL in-context learning, we devised four strategies for image selection and four for caption assignment to configure in-context image-text pairs for image captioning. Here Image Captioning is used as the case study since it can be seen as the visually-conditioned LM. Our comprehensive experiments yield two counter-intuitive but valuable insights, highlighting the distinct characteristics of VL in-context learning due to multi-modal synergy, as compared to the NLP case. Furthermore, in our exploration of optimal combination strategies, we observed an average performance enhancement of 20.9 in CIDEr scores compared to the baseline. The code is given in https://github.com/yongliang-wu/ExploreCfg.", "keywords": "Image Caption; Few-shot Prompt; Vision Language Model;", "primary_area": "", "supplementary_material": "", "author": "Xu Yang;Yongliang Wu;Mingzhuo Yang;Haokun Chen;Xin Geng", "authorids": "~Xu_Yang5;~Yongliang_Wu1;~Mingzhuo_Yang1;~Haokun_Chen4;~Xin_Geng1", "gender": "M;M;M;M;M", "homepage": ";https://yongliang-wu.github.io/;https://yangmingzhuo.github.io/;https://haokunchen0.github.io/;http://palm.seu.edu.cn/xgeng/index.htm", "dblp": "63/1534-21.html;61/1913;;218/6928;", "google_scholar": "SqdxMH0AAAAJ;NdE8DZ8AAAAJ;;;ZOCxkIcAAAAJ", "orcid": "0000-0002-8276-2679;;;;", "linkedin": ";;;;", "or_profile": "~Xu_Yang5;~Yongliang_Wu1;~Mingzhuo_Yang1;~Haokun_Chen4;~Xin_Geng1", "aff": "Southeast University;Southeast University;Sengine;Southeast University;Southeast University, China", "aff_domain": "seu.edu.cn;seu.edu.cn;sengine.ai;seu.edu.cn;seu.edu.cn", "position": "Associate Professor;Undergrad student;Principal Researcher;MS student;Professor", "bibtex": "@inproceedings{\nyang2023exploring,\ntitle={Exploring Diverse In-Context Configurations for Image Captioning},\nauthor={Xu Yang and Yongliang Wu and Mingzhuo Yang and Haokun Chen and Xin Geng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=czwZnNf60r}\n}", "github": "", "project": "", "reviewers": "vuiF;Qg6d;FMu1;zeNe;tg5h", "pdf_size": 5791311, "rating": "5;5;5;5;6", "confidence": "4;4;3;5;4", "soundness": "3;4;3;2;3", "novelty": "2;3;3;3;3", "presentation": "1;3;3;2;2", "wc_summary": "63;95;133;102;183", "wc_strengths": "54;51;28;101;130", "wc_weaknesses": "417;279;57;231;193", "wc_questions": "112;33;296;21;140", "wc_limitations": "23;1;16;1;5", "wc_review": "669;459;530;456;651", "wc_reply_reviewers": "90;27;86;0;47", "wc_reply_authors": "18;20;254;0;46", "reply_reviewers": "1;1;3;0;1", "reply_authors": "2;2;4;1;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 115.2, 40.548242871917395 ], "wc_strengths_avg": [ 72.8, 37.155887824138986 ], "wc_weaknesses_avg": [ 235.4, 117.06681852685671 ], "wc_questions_avg": [ 120.4, 98.80202427076077 ], "wc_limitations_avg": [ 9.2, 8.818163074019441 ], "wc_review_avg": [ 553.0, 91.47021373102831 ], "wc_reply_reviewers_avg": [ 50.0, 34.44996371551064 ], "wc_reply_authors_avg": [ 67.6, 94.34744299661756 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2969526313997337683&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "seu.edu.cn;seu.edu.cn;sengine.ai;seu.edu.cn;seu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Southeast University;Sengine", "aff_unique_dep": ";", "aff_unique_url": "https://www.seu.edu.cn/;", "aff_unique_abbr": "SEU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "title": "On the Role of Randomization in Adversarially Robust Classification", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71056", "id": "d0IEd3VgBh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fa9755043814e7f08d859a286bb83c35-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=d0IEd3VgBh", "openreview": "https://openreview.net/forum?id=d0IEd3VgBh", "poster": "/media/PosterPDFs/NeurIPS%202023/71056.png?t=1699028007.6767461", "slides": "https://nips.cc/virtual/2023/poster/71056", "video": "https://nips.cc/virtual/2023/poster/71056", "author_site": "Lucas Gnecco Heredia, Muni Sreenivas Pydi, Laurent Meunier, Benjamin Negrevergne, Yann Chevaleyre", "tldr": "", "abstract": "Deep neural networks are known to be vulnerable to small adversarial perturbations in test data. To defend against adversarial attacks, probabilistic classifiers have been proposed as an alternative to deterministic ones. However, literature has conflicting findings on the effectiveness of probabilistic classifiers in comparison to deterministic ones. In this paper, we clarify the role of randomization in building adversarially robust classifiers.\nGiven a base hypothesis set of deterministic classifiers, we show the conditions under which a randomized ensemble outperforms the hypothesis set in adversarial risk, extending previous results.\nAdditionally, we show that for any probabilistic binary classifier (including randomized ensembles), there exists a deterministic classifier that outperforms it. Finally, we give an explicit description of the deterministic hypothesis set that contains such a deterministic classifier for many types of commonly used probabilistic classifiers, *i.e.* randomized ensembles and parametric/input noise injection.", "keywords": "adversarial attacks;robustness;adversarial;attacks;deep learning;randomization;randomized ensembles", "primary_area": "", "supplementary_material": "", "author": "Lucas Gnecco Heredia;Muni Sreenivas Pydi;Laurent Meunier;benjamin negrevergne;Yann Chevaleyre", "authorids": "~Lucas_Gnecco_Heredia2;~Muni_Sreenivas_Pydi1;~Laurent_Meunier1;~benjamin_negrevergne1;~Yann_Chevaleyre1", "gender": "M;M;M;;M", "homepage": ";https://munisreenivas.github.io/;;;https://www.lamsade.dauphine.fr/~ychevaleyre/", "dblp": "325/5719;194/2444;15/4624;;55/5658", "google_scholar": "https://scholar.google.com/citations?hl=en;BT8j_-oAAAAJ;;;SF6g8p4AAAAJ", "orcid": "0000-0002-1561-2080;;;;", "linkedin": ";;;;yannchevaleyre", "or_profile": "~Lucas_Gnecco_Heredia2;~Muni_Sreenivas_Pydi1;~Laurent_Meunier1;~benjamin_negrevergne1;~Yann_Chevaleyre1", "aff": ", Universit\u00e9 Paris-Dauphine (Paris IX);Universit\u00e9 Paris Dauphine - PSL;Payflows;;Universit\u00e9 Paris-Dauphine (Paris IX)", "aff_domain": "lamsade.dauphine.fr;lamsade.dauphine.fr;payflows.io;;dauphine.fr", "position": "PhD student;Postdoc;Researcher;;Full Professor", "bibtex": "@inproceedings{\nheredia2023on,\ntitle={On the Role of Randomization in Adversarially Robust Classification},\nauthor={Lucas Gnecco Heredia and Muni Sreenivas Pydi and Laurent Meunier and benjamin negrevergne and Yann Chevaleyre},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=d0IEd3VgBh}\n}", "github": "", "project": "", "reviewers": "TdsR;ZKUe;Tq42;gWM1", "pdf_size": 688945, "rating": "6;7;7;7", "confidence": "3;3;4;2", "soundness": "3;3;3;3", "novelty": "3;3;4;3", "presentation": "3;3;2;3", "wc_summary": "84;117;123;78", "wc_strengths": "38;209;26;61", "wc_weaknesses": "102;258;187;160", "wc_questions": "1;104;86;119", "wc_limitations": "2;30;15;14", "wc_review": "227;718;437;432", "wc_reply_reviewers": "28;68;147;70", "wc_reply_authors": "0;19;732;64", "reply_reviewers": "1;1;2;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 100.5, 19.72941965694886 ], "wc_strengths_avg": [ 83.5, 73.54080499967348 ], "wc_weaknesses_avg": [ 176.75, 56.06859637979178 ], "wc_questions_avg": [ 77.5, 45.68643124604941 ], "wc_limitations_avg": [ 15.25, 9.934158243152764 ], "wc_review_avg": [ 453.5, 174.6403447087757 ], "wc_reply_reviewers_avg": [ 78.25, 43.083494519363214 ], "wc_reply_authors_avg": [ 203.75, 305.86955961651364 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14927362524289666218&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "lamsade.dauphine.fr;lamsade.dauphine.fr;payflows.io;;dauphine.fr", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Universit\u00e9 Paris-Dauphine;Universit\u00e9 Paris Dauphine;Payflows", "aff_unique_dep": ";;", "aff_unique_url": "https://www.univ-paris-dauphine.fr;https://www.univ-paris-dauphine.fr;", "aff_unique_abbr": "UPD;UPD;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Paris;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France;" }, { "title": "VRA: Variational Rectified Activation for Out-of-distribution Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71055", "id": "d0VItRE2ZH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5c20c00504e0c049ec2370d0cceaf3c4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=d0VItRE2ZH", "openreview": "https://openreview.net/forum?id=d0VItRE2ZH", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71055", "video": "https://nips.cc/virtual/2023/poster/71055", "author_site": "Mingyu Xu, Zheng Lian, Bin Liu, Jianhua Tao", "tldr": "", "abstract": "Out-of-distribution (OOD) detection is critical to building reliable machine learning systems in the open world. Researchers have proposed various strategies to reduce model overconfidence on OOD data. Among them, ReAct is a typical and effective technique to deal with model overconfidence, which truncates high activations to increase the gap between in-distribution and OOD. Despite its promising results, is this technique the best choice? To answer this question, we leverage the variational method to find the optimal operation and verify the necessity of suppressing abnormally low and high activations and amplifying intermediate activations in OOD detection, rather than focusing only on high activations like ReAct. This motivates us to propose a novel technique called ``Variational Rectified Activation (VRA)'', which simulates these suppression and amplification operations using piecewise functions. Experimental results on multiple benchmark datasets demonstrate that our method outperforms existing post-hoc strategies. Meanwhile, VRA is compatible with different scoring functions and network architectures. Our code is available at https://github.com/zeroQiaoba/VRA.", "keywords": "Out-of-distribution Detection", "primary_area": "", "supplementary_material": "/attachment/541bffc89819e3c8b1930dd793a22acbfa642f82.zip", "author": "Mingyu Xu;Zheng Lian;Bin Liu;Jianhua Tao", "authorids": "~Mingyu_Xu1;~Zheng_Lian3;~Bin_Liu13;~Jianhua_Tao1", "gender": ";M;M;", "homepage": ";https://zeroqiaoba.github.io/Homepage/;https://people.ucas.ac.cn/~bin.liu;", "dblp": ";;35/837-41;", "google_scholar": ";S34nWz0AAAAJ;;", "orcid": ";0000-0001-9477-0599;;", "linkedin": ";;;", "or_profile": "~Mingyu_Xu1;~Zheng_Lian3;~Bin_Liu13;~Jianhua_Tao1", "aff": ";Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science;", "aff_domain": ";ia.ac.cn;nlpr.ia.ac.cn;", "position": ";Assistant Professor;Associate Professor;", "bibtex": "@inproceedings{\nxu2023vra,\ntitle={{VRA}: Variational Rectified Activation for Out-of-distribution Detection},\nauthor={Mingyu Xu and Zheng Lian and Bin Liu and Jianhua Tao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=d0VItRE2ZH}\n}", "github": "", "project": "", "reviewers": "J9PQ;GN4S;Tdya;JjUp", "pdf_size": 1200471, "rating": "4;4;5;6", "confidence": "5;3;4;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "38;48;75;48", "wc_strengths": "58;23;91;154", "wc_weaknesses": "134;60;43;190", "wc_questions": "61;59;7;5", "wc_limitations": "5;14;10;26", "wc_review": "296;204;226;423", "wc_reply_reviewers": "293;51;22;112", "wc_reply_authors": "619;298;245;375", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 52.25, 13.754544703478919 ], "wc_strengths_avg": [ 81.5, 48.27266307134919 ], "wc_weaknesses_avg": [ 106.75, 58.99735163547598 ], "wc_questions_avg": [ 33.0, 27.018512172212592 ], "wc_limitations_avg": [ 13.75, 7.75806032459145 ], "wc_review_avg": [ 287.25, 85.42065031360977 ], "wc_reply_reviewers_avg": [ 119.5, 105.30550792812312 ], "wc_reply_authors_avg": [ 384.25, 143.198070866894 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4545454545454545, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6126043725076356699&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";ia.ac.cn;nlpr.ia.ac.cn;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Optimality of Message-Passing Architectures for Sparse Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71054", "id": "d1knqWjmNt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7e991aa4cd2fdf0014fba2f000f542d0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=d1knqWjmNt", "openreview": "https://openreview.net/forum?id=d1knqWjmNt", "poster": "/media/PosterPDFs/NeurIPS%202023/71054.png?t=1699439476.0486805", "slides": "https://nips.cc/virtual/2023/poster/71054", "video": "https://nips.cc/virtual/2023/poster/71054", "author_site": "Aseem Baranwal, Kimon Fountoulakis, Aukosh Jagannath", "tldr": "", "abstract": "We study the node classification problem on feature-decorated graphs in the sparse setting, i.e., when the expected degree of a node is $O(1)$ in the number of nodes, in the fixed-dimensional asymptotic regime, i.e., the dimension of the feature data is fixed while the number of nodes is large. Such graphs are typically known to be locally tree-like. We introduce a notion of Bayes optimality for node classification tasks, called asymptotic local Bayes optimality, and compute the optimal classifier according to this criterion for a fairly general statistical data model with arbitrary distributions of the node features and edge connectivity. The optimal classifier is implementable using a message-passing graph neural network architecture. We then compute the generalization error of this classifier and compare its performance against existing learning methods theoretically on a well-studied statistical model with naturally identifiable signal-to-noise ratios (SNRs) in the data. We find that the optimal message-passing architecture interpolates between a standard MLP in the regime of low graph signal and a typical convolution in the regime of high graph signal. Furthermore, we prove a corresponding non-asymptotic result.", "keywords": "graph neural networks;message passing;bayesian inference;node classification;contextual stochastic block model", "primary_area": "", "supplementary_material": "/attachment/78efc68f136c297c2615b602680e803712f89d92.zip", "author": "Aseem Baranwal;Kimon Fountoulakis;Aukosh Jagannath", "authorids": "~Aseem_Baranwal1;~Kimon_Fountoulakis1;~Aukosh_Jagannath1", "gender": "M;M;", "homepage": "https://aseemrb.me;https://opallab.ca;", "dblp": "285/5304;149/5799;", "google_scholar": "DPt626YAAAAJ;https://scholar.google.ca/citations?user=K-SafJUAAAAJ;", "orcid": "0000-0001-5318-6054;;", "linkedin": "aseemrb/;;", "or_profile": "~Aseem_Baranwal1;~Kimon_Fountoulakis1;~Aukosh_Jagannath1", "aff": "University of Waterloo;University of Waterloo;", "aff_domain": "uwaterloo.ca;uwaterloo.ca;", "position": "PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nbaranwal2023optimality,\ntitle={Optimality of Message-Passing Architectures for Sparse Graphs},\nauthor={Aseem Baranwal and Kimon Fountoulakis and Aukosh Jagannath},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=d1knqWjmNt}\n}", "github": "", "project": "", "reviewers": "LT99;S322;K3Xe;mQCx", "pdf_size": 515871, "rating": "4;6;7;7", "confidence": "3;4;4;5", "soundness": "3;3;4;3", "novelty": "2;3;3;4", "presentation": "3;3;4;2", "wc_summary": "106;146;142;51", "wc_strengths": "178;39;63;38", "wc_weaknesses": "230;152;89;418", "wc_questions": "38;282;229;80", "wc_limitations": "9;62;22;2", "wc_review": "561;681;545;589", "wc_reply_reviewers": "0;238;302;534", "wc_reply_authors": "0;0;180;218", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 111.25, 38.114137796885814 ], "wc_strengths_avg": [ 79.5, 57.74296493946254 ], "wc_weaknesses_avg": [ 222.25, 123.56046090881986 ], "wc_questions_avg": [ 157.25, 101.11719685592556 ], "wc_limitations_avg": [ 23.75, 23.220411279734044 ], "wc_review_avg": [ 594.0, 52.640288753007425 ], "wc_reply_reviewers_avg": [ 268.5, 190.15454241221795 ], "wc_reply_authors_avg": [ 99.5, 100.40293820401871 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8660254037844386, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11191162793193274158&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "uwaterloo.ca;uwaterloo.ca;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Zero-Shot Anomaly Detection via Batch Normalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71053", "id": "d1wjMBYbP1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8078e8c3055303a884ffae2d3ea00338-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=d1wjMBYbP1", "openreview": "https://openreview.net/forum?id=d1wjMBYbP1", "poster": "/media/PosterPDFs/NeurIPS%202023/71053.png?t=1700178344.1620626", "slides": "https://nips.cc/virtual/2023/poster/71053", "video": "https://nips.cc/virtual/2023/poster/71053", "author_site": "Aodong Li, Chen Qiu, Marius Kloft, Padhraic Smyth, Maja Rudolph, Stephan Mandt", "tldr": "", "abstract": "Anomaly detection (AD) plays a crucial role in many safety-critical application domains. The challenge of adapting an anomaly detector to drift in the normal data distribution, especially when no training data is available for the \"new normal,\" has led to the development of zero-shot AD techniques. In this paper, we propose a simple yet effective method called Adaptive Centered Representations (ACR) for zero-shot batch-level AD. Our approach trains off-the-shelf deep anomaly detectors (such as deep SVDD) to adapt to a set of inter-related training data distributions in combination with batch normalization, enabling automatic zero-shot generalization for unseen AD tasks. This simple recipe, batch normalization plus meta-training, is a highly effective and versatile tool. Our results demonstrate the first zero-shot AD results for tabular data and outperform existing methods in zero-shot anomaly detection and segmentation on image data from specialized domains.", "keywords": "deep anomaly detection;zero-shot learning;batch normalization", "primary_area": "", "supplementary_material": "/attachment/53df58a714952a8cd49b5284e476a98854bf7805.zip", "author": "Aodong Li;Chen Qiu;Marius Kloft;Padhraic Smyth;Maja Rudolph;Stephan Mandt", "authorids": "~Aodong_Li1;~Chen_Qiu1;~Marius_Kloft1;~Padhraic_Smyth1;~Maja_Rudolph4;~Stephan_Mandt1", "gender": "M;M;M;M;F;M", "homepage": "https://aodongli.github.io;;http://ml.informatik.uni-kl.de/;https://www.ics.uci.edu/~smyth;http://maja-rita-rudolph.com/;https://www.stephanmandt.com", "dblp": "207/7672;;73/2217;s/PadhraicSmyth;164/5581;147/5018", "google_scholar": ";uX5Y9XUAAAAJ;https://scholar.google.de/citations?user=l-BJCdAAAAAJ;OsoQ-dcAAAAJ;https://scholar.google.com/citations?hl=en;HOrGe7wAAAAJ", "orcid": ";;;0000-0001-9971-8378;;", "linkedin": ";;;;;stephan-mandt-8702795a/", "or_profile": "~Aodong_Li1;~Chen_Qiu1;~Marius_Kloft1;~Padhraic_Smyth1;~Maja_Rudolph4;~Stephan_M_Mandt1", "aff": "University of California, Irvine;Robert Bosch GmbH, Germany;RPTU Kaiserslautern-Landau;University of California, Irvine;Bosch;University of California, Irvine", "aff_domain": "uci.edu;de.bosch.com;uni-kl.de;uci.edu;bosch.com;uci.edu", "position": "PhD student;PhD student;Professor;Full Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nli2023zeroshot,\ntitle={Zero-Shot Anomaly Detection via Batch Normalization},\nauthor={Aodong Li and Chen Qiu and Marius Kloft and Padhraic Smyth and Maja Rudolph and Stephan Mandt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=d1wjMBYbP1}\n}", "github": "", "project": "", "reviewers": "2urY;GN5r;CaFY;Jqv8;hQFt", "pdf_size": 1867859, "rating": "5;5;5;5;6", "confidence": "4;4;5;4;5", "soundness": "3;2;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;2;3;4", "wc_summary": "124;71;24;66;31", "wc_strengths": "53;51;64;39;25", "wc_weaknesses": "190;121;188;103;504", "wc_questions": "133;92;5;22;5", "wc_limitations": "1;14;41;8;1", "wc_review": "501;349;322;238;566", "wc_reply_reviewers": "29;51;61;73;539", "wc_reply_authors": "13;23;384;363;820", "reply_reviewers": "1;1;1;1;3", "reply_authors": "2;2;2;2;4", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 63.2, 35.60561753431613 ], "wc_strengths_avg": [ 46.4, 13.32066064427737 ], "wc_weaknesses_avg": [ 221.2, 145.64532261627903 ], "wc_questions_avg": [ 51.4, 51.91762706441811 ], "wc_limitations_avg": [ 13.0, 14.818906842274163 ], "wc_review_avg": [ 395.2, 120.47472764028147 ], "wc_reply_reviewers_avg": [ 150.6, 194.73633456548367 ], "wc_reply_authors_avg": [ 320.6, 296.10849363029087 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 2.4, 0.8 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6123724356957946, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3938492876778010219&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "uci.edu;de.bosch.com;uni-kl.de;uci.edu;bosch.com;uci.edu", "author_num": 6, "aff_unique_index": "0;1;2;0;1;0", "aff_unique_norm": "University of California, Irvine;Robert Bosch GmbH;Rheinland-Pfalz Technical University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uci.edu;https://www.bosch.com;https://www.rptu.de", "aff_unique_abbr": "UCI;Bosch;RPTU", "aff_campus_unique_index": "0;2;0;0", "aff_campus_unique": "Irvine;;Kaiserslautern-Landau", "aff_country_unique_index": "0;1;1;0;1;0", "aff_country_unique": "United States;Germany" }, { "title": "Unbalanced Low-rank Optimal Transport Solvers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71052", "id": "d2WsCmoITF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a439259e78294c38d157a51a2c40486b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=d2WsCmoITF", "openreview": "https://openreview.net/forum?id=d2WsCmoITF", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71052", "video": "https://nips.cc/virtual/2023/poster/71052", "author_site": "Meyer Scetbon, Michal Klein, Giovanni Palla, Marco Cuturi", "tldr": "", "abstract": "The relevance of optimal transport methods to machine learning has long been hindered by two salient limitations.\nFirst, the $O(n^3)$ computational cost of standard sample-based solvers (when used on batches of $n$ samples) is prohibitive.\nSecond, the mass conservation constraint makes OT solvers too rigid in practice: because they must match \\textit{all} points from both measures, their output can be heavily influenced by outliers.\nA flurry of recent works in OT has addressed these computational and modelling limitations, but has resulted in two separate strains of methods:\nWhile the computational outlook was much improved by entropic regularization, more recent $O(n)$ linear-time \\textit{low-rank} solvers hold the promise to scale up OT further.\nOn the other hand, modelling rigidities have been eased owing to unbalanced variants of OT, that rely on penalization terms to promote, rather than impose, mass conservation.\nThe goal of this paper is to merge these two strains, to achieve the promise of \\textit{both} versatile/scalable unbalanced/low-rank OT solvers. \nWe propose custom algorithms to implement these extensions for the linear OT problem and its Fused-Gromov-Wasserstein generalization, and demonstrate their practical relevance to challenging spatial transcriptomics matching problems.", "keywords": "Optimal Transport;Unbalanced", "primary_area": "", "supplementary_material": "/attachment/797e0228bb56337d93210903d96801c7c510a87e.pdf", "author": "Meyer Scetbon;Michal Klein;Giovanni Palla;marco cuturi", "authorids": "~Meyer_Scetbon1;~Michal_Klein1;~Giovanni_Palla1;~marco_cuturi2", "gender": "M;M;M;M", "homepage": "https://meyerscetbon.github.io;https://github.com/michalk8;https://giovannipalla.com/;http://marcocuturi.net", "dblp": "249/8054;332/4607;;85/5102", "google_scholar": ";zByzdzcAAAAJ;20uwxzkAAAAJ;https://scholar.google.fr/citations?user=kQEydDMAAAAJ", "orcid": ";0000-0002-2433-6380;0000-0002-8004-4462;", "linkedin": ";michal-klein-148697165/;;", "or_profile": "~Meyer_Scetbon1;~Michal_Klein1;~Giovanni_Palla1;~marco_cuturi2", "aff": "Microsoft;Apple;Technische Universit\u00e4t M\u00fcnchen;Ensae ParisTech", "aff_domain": "microsoft.com;apple.com;tum.de;ensae.fr", "position": "Researcher;Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nscetbon2023unbalanced,\ntitle={Unbalanced Low-rank Optimal Transport Solvers},\nauthor={Meyer Scetbon and Michal Klein and Giovanni Palla and marco cuturi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=d2WsCmoITF}\n}", "github": "", "project": "", "reviewers": "1XVN;ckny;fjNH;GuCe", "pdf_size": 9367769, "rating": "4;4;6;7", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "3;2;2;4", "presentation": "2;3;3;3", "wc_summary": "109;48;185;137", "wc_strengths": "90;42;37;96", "wc_weaknesses": "167;296;139;263", "wc_questions": "289;10;88;4", "wc_limitations": "8;10;2;8", "wc_review": "663;406;451;508", "wc_reply_reviewers": "473;163;38;162", "wc_reply_authors": "993;265;30;536", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 119.75, 49.54480295651604 ], "wc_strengths_avg": [ 66.25, 26.892145693491994 ], "wc_weaknesses_avg": [ 216.25, 65.07447656339619 ], "wc_questions_avg": [ 97.75, 115.28307551414474 ], "wc_limitations_avg": [ 7.0, 3.0 ], "wc_review_avg": [ 507.0, 97.04895671773087 ], "wc_reply_reviewers_avg": [ 209.0, 160.67202618999985 ], "wc_reply_authors_avg": [ 456.0, 358.0244405065107 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6929115930976474130&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "microsoft.com;apple.com;tum.de;ensae.fr", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Microsoft;Apple;Technische Universit\u00e4t M\u00fcnchen;ENSAE ParisTech", "aff_unique_dep": "Microsoft Corporation;Apple Inc.;;", "aff_unique_url": "https://www.microsoft.com;https://www.apple.com;https://www.tum.de;https://www.ensae.fr", "aff_unique_abbr": "Microsoft;Apple;TUM;Ensae", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "United States;Germany;France" }, { "title": "On the Gini-impurity Preservation For Privacy Random Forests", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71051", "id": "d47iuwOt3j", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8d6b1d775014eff18256abeb207202ad-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=d47iuwOt3j", "openreview": "https://openreview.net/forum?id=d47iuwOt3j", "poster": "/media/PosterPDFs/NeurIPS%202023/71051.png?t=1701832250.799784", "slides": "https://nips.cc/virtual/2023/poster/71051", "video": "https://nips.cc/virtual/2023/poster/71051", "author_site": "XinRan Xie, Man-Jie Yuan, Xuetong Bai, Wei Gao, Zhi-Hua Zhou", "tldr": "", "abstract": "Random forests have been one successful ensemble algorithms in machine learning. Various techniques have been utilized to preserve the privacy of random forests from anonymization, differential privacy, homomorphic encryption, etc., whereas it rarely takes into account some crucial ingredients of learning algorithm. This work presents a new encryption to preserve data's Gini impurity, which plays a crucial role during the construction of random forests. Our basic idea is to modify the structure of binary search tree to store several examples in each node, and encrypt data features by incorporating label and order information. Theoretically, we prove that our scheme preserves the minimum Gini impurity in ciphertexts without decrypting, and present the security guarantee for encryption. For random forests, we encrypt data features based on our Gini-impurity-preserving scheme, and take the homomorphic encryption scheme CKKS to encrypt data labels due to their importance and privacy. We conduct extensive experiments to show the effectiveness, efficiency and security of our proposed method.", "keywords": "classification;random forests;privacy-preserving machine learng;data encrytion", "primary_area": "", "supplementary_material": "/attachment/993e6d9da65469fbbe74e0277c511718921b5ab3.zip", "author": "XinRan Xie;Man-Jie Yuan;Xuetong Bai;Wei Gao;Zhi-Hua Zhou", "authorids": "~XinRan_Xie1;~Man-Jie_Yuan1;~Xuetong_Bai1;~Wei_Gao7;~Zhi-Hua_Zhou2", "gender": ";;F;M;", "homepage": "http://www.lamda.nju.edu.cn/xiexr/;;https://github.kellybaibai.com;http://www.lamda.nju.edu.cn/gaow/;", "dblp": ";;;28/2073-8;", "google_scholar": ";;;pBzGACcAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~XinRan_Xie1;~Man-Jie_Yuan1;~Xuetong_Bai1;~Wei_Gao7;~Zhi-Hua_Zhou2", "aff": "Nanjing University;;Nanjing University;Nanjing University;", "aff_domain": "nju.edu.cn;;nju.edu.cn;nju.edu.cn;", "position": "MS student;;Undergrad student;Full Professor;", "bibtex": "@inproceedings{\nxie2023on,\ntitle={On the Gini-impurity Preservation For Privacy Random Forests},\nauthor={XinRan Xie and Man-Jie Yuan and Xuetong Bai and Wei Gao and Zhi-Hua Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=d47iuwOt3j}\n}", "github": "", "project": "", "reviewers": "PTBU;B6q6;Emcz;2RPe;1ntL", "pdf_size": 2745653, "rating": "6;6;6;6;7", "confidence": "3;1;4;4;4", "soundness": "1;3;3;3;3", "novelty": "3;2;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "52;38;59;111;107", "wc_strengths": "42;32;58;319;236", "wc_weaknesses": "270;20;34;65;156", "wc_questions": "83;3;26;79;92", "wc_limitations": "1;3;27;54;29", "wc_review": "448;96;204;628;620", "wc_reply_reviewers": "28;0;12;0;6", "wc_reply_authors": "351;0;0;0;0", "reply_reviewers": "1;0;1;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 2.6, 0.8000000000000002 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 73.4, 29.870386673091463 ], "wc_strengths_avg": [ 137.4, 117.65644903701626 ], "wc_weaknesses_avg": [ 109.0, 93.39378994344324 ], "wc_questions_avg": [ 56.6, 35.38700326391033 ], "wc_limitations_avg": [ 22.8, 19.47716611830376 ], "wc_review_avg": [ 399.2, 216.1096018227788 ], "wc_reply_reviewers_avg": [ 9.2, 10.4 ], "wc_reply_authors_avg": [ 70.2, 140.39999999999998 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.34299717028501764, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1598370560741797561&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "nju.edu.cn;;nju.edu.cn;nju.edu.cn;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Towards Test-Time Refusals via Concept Negation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71050", "id": "d4X0QWS2Ln", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/54801e196796134a2b0ae5e8adef502f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=d4X0QWS2Ln", "openreview": "https://openreview.net/forum?id=d4X0QWS2Ln", "poster": "/media/PosterPDFs/NeurIPS%202023/71050.png?t=1701436576.9151278", "slides": "https://nips.cc/virtual/2023/poster/71050", "video": "https://nips.cc/virtual/2023/poster/71050", "author_site": "Peiran Dong, Song Guo, Junxiao Wang, Bingjie WANG, Jiewei Zhang, Ziming Liu", "tldr": "", "abstract": "Generative models produce unbounded outputs, necessitating the use of refusal techniques to confine their output space. Employing generative refusals is crucial in upholding the ethical and copyright integrity of synthesized content, particularly when working with widely adopted diffusion models. \"Concept negation'' presents a promising paradigm to achieve generative refusals, as it effectively defines and governs the model's output space based on concepts, utilizing natural language interfaces that are readily comprehensible to humans. However, despite the valuable contributions of prior research to the field of concept negation, it still suffers from significant limitations. The existing concept negation methods, which operate based on the composition of score or noise predictions from the diffusion process, are limited to independent concepts (e.g., ``a blonde girl`` without ``glasses``) and fail to consider the interconnected nature of concepts in reality (e.g., ``Mickey mouse eats ice cream`` without ``Disney characters``). Keeping the limitations in mind, we propose a novel framework, called $ProtoRe$, to improve the flexibility of concept negation via test-time negative concept identification along with purification in the feature space. $ProtoRe$ works by incorporating CLIP's language-contrastive knowledge to identify the prototype of negative concepts, extract the negative features from outputs using the prototype as a prompt, and further refine the attention maps by retrieving negative features. Our evaluation on multiple benchmarks shows that $ProtoRe$ outperforms state-of-the-art methods under various settings, in terms of the effectiveness of purification and the fidelity of generative images.", "keywords": "Diffusion models;test-time refusal;concept negation;safety in generative models", "primary_area": "", "supplementary_material": "/attachment/b41c718c8ae6d562b615384a462a9e29e18d4a3a.pdf", "author": "Peiran Dong;Song Guo;Junxiao Wang;Bingjie WANG;Jiewei Zhang;Ziming Liu", "authorids": "~Peiran_Dong1;~Song_Guo5;~Junxiao_Wang1;~Bingjie_WANG1;~Jiewei_Zhang1;~Ziming_Liu1", "gender": "M;M;M;M;M;M", "homepage": "https://polyu.netlify.app/people/peiran-dong/;https://cse.hkust.edu.hk/~songguo/;http://jxiao.wang/;;http://peilab.comp.polyu.edu.hk/people/jiewei-zhang/;", "dblp": "243/6454;01/267-1;;136/5474;15/10697;", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;H6RsGygAAAAJ;;https://scholar.google.com.hk/citations?user=gak5NX0AAAAJ;b1WCs5kAAAAJ", "orcid": "0000-0002-1129-9218;;0000-0001-7263-174X;0000-0002-5904-2024;0000-0003-2841-6422;0000-0001-8001-9585", "linkedin": ";;junxiao-wang/;;;", "or_profile": "~Peiran_Dong1;~Song_Guo5;~Junxiao_Wang1;~Bingjie_WANG1;~Jiewei_Zhang1;~Ziming_Liu1", "aff": "Hong Kong Polytechnic University;The Hong Kong Polytechnic University;Hong Kong Polytechnic University;Hong Kong Polytechnic University;The Hong Kong Polytechnic University;The Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;connect.polyu.hk", "position": "PhD student;Full Professor;Postdoc;PhD student;PhD student;PhD student", "bibtex": "@inproceedings{\ndong2023towards,\ntitle={Towards Test-Time Refusals via Concept Negation},\nauthor={Peiran Dong and Song Guo and Junxiao Wang and Bingjie WANG and Jiewei Zhang and Ziming Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=d4X0QWS2Ln}\n}", "github": "", "project": "", "reviewers": "2mKf;bbwA;WXn9;e3Lc;p7ZQ;YV3y", "pdf_size": 1100396, "rating": "4;6;7;7;7;7", "confidence": "3;3;2;3;3;3", "soundness": "2;3;4;3;4;4", "novelty": "3;3;3;4;4;4", "presentation": "2;2;2;2;4;4", "wc_summary": "77;138;80;85;115;63", "wc_strengths": "32;88;33;77;41;37", "wc_weaknesses": "45;137;135;116;25;49", "wc_questions": "90;274;270;25;17;42", "wc_limitations": "39;60;8;8;1;42", "wc_review": "283;697;526;311;199;233", "wc_reply_reviewers": "78;28;32;10;0;0", "wc_reply_authors": "231;0;0;0;0;0", "reply_reviewers": "1;1;1;1;0;0", "reply_authors": "2;1;1;1;1;1", "rating_avg": [ 6.333333333333333, 1.1055415967851332 ], "confidence_avg": [ 2.8333333333333335, 0.3726779962499649 ], "soundness_avg": [ 3.3333333333333335, 0.7453559924999298 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 93.0, 25.48855952514121 ], "wc_strengths_avg": [ 51.333333333333336, 22.45489305746572 ], "wc_weaknesses_avg": [ 84.5, 45.93382921841664 ], "wc_questions_avg": [ 119.66666666666667, 110.1766258735894 ], "wc_limitations_avg": [ 26.333333333333332, 21.80723631172817 ], "wc_review_avg": [ 374.8333333333333, 178.03783929890358 ], "wc_reply_reviewers_avg": [ 24.666666666666668, 26.898988002442685 ], "wc_reply_authors_avg": [ 38.5, 86.0886171337419 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.26967994498529696, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12384099156849230663&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;connect.polyu.hk", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Hong Kong Polytechnic University", "aff_unique_dep": "", "aff_unique_url": "https://www.polyu.edu.hk", "aff_unique_abbr": "PolyU", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Structural Pruning for Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71049", "id": "d4f40zJJIS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/35c1d69d23bb5dd6b9abcd68be005d5c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=d4f40zJJIS", "openreview": "https://openreview.net/forum?id=d4f40zJJIS", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71049", "video": "https://nips.cc/virtual/2023/poster/71049", "author_site": "Gongfan Fang, Xinyin Ma, Xinchao Wang", "tldr": "", "abstract": "Generative modeling has recently undergone remarkable advancements, primarily propelled by the transformative implications of Diffusion Probabilistic Models (DPMs). The impressive capability of these models, however, often entails significant computational overhead during both training and inference. To tackle this challenge, we present Diff-Pruning, an efficient compression method tailored for learning lightweight diffusion models from pre-existing ones, without the need for extensive re-training. The essence of Diff-Pruning is encapsulated in a Taylor expansion over pruned timesteps, a process that disregards non-contributory diffusion steps and ensembles informative gradients to identify important weights. Our empirical assessment, undertaken across several datasets highlights two primary benefits of our proposed method: 1) Efficiency: it enables approximately a 50\\% reduction in FLOPs at a mere 10% to 20% of the original training expenditure; 2) Consistency: the pruned diffusion models inherently preserve generative behavior congruent with their pre-trained models.", "keywords": "Diffusion Model;Network Pruning;Model Compression;Efficient Deep Learning", "primary_area": "", "supplementary_material": "/attachment/ffd4eb35b33adf376fb2d2d058b41c386a0d0749.pdf", "author": "Gongfan Fang;Xinyin Ma;Xinchao Wang", "authorids": "~Gongfan_Fang2;~Xinyin_Ma1;~Xinchao_Wang1", "gender": "M;F;M", "homepage": "https://fangggf.github.io/;https://horseee.github.io;https://sites.google.com/site/sitexinchaowang/", "dblp": "243/5768;267/2244;", "google_scholar": "489YZ_kAAAAJ;jFUKS0oAAAAJ;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Gongfan_Fang2;~Xinyin_Ma1;~Xinchao_WANG3", "aff": "National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;u.nus.edu;nus.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nfang2023structural,\ntitle={Structural Pruning for Diffusion Models},\nauthor={Gongfan Fang and Xinyin Ma and Xinchao Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=d4f40zJJIS}\n}", "github": "", "project": "", "reviewers": "S8k8;5jdL;PXRJ;oE14;i6fA", "pdf_size": 11623415, "rating": "4;5;5;6;7", "confidence": "2;4;3;4;4", "soundness": "2;2;4;3;4", "novelty": "2;2;4;3;4", "presentation": "2;2;4;3;3", "wc_summary": "88;105;69;108;91", "wc_strengths": "39;32;106;54;81", "wc_weaknesses": "118;248;79;294;84", "wc_questions": "5;25;6;2;24", "wc_limitations": "50;15;7;44;1", "wc_review": "300;425;267;502;281", "wc_reply_reviewers": "0;138;0;13;0", "wc_reply_authors": "0;330;126;181;0", "reply_reviewers": "0;1;0;1;0", "reply_authors": "1;2;2;3;1", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 92.2, 13.934130758680285 ], "wc_strengths_avg": [ 62.4, 27.528893911670334 ], "wc_weaknesses_avg": [ 164.6, 89.10129067527585 ], "wc_questions_avg": [ 12.4, 9.971960689854328 ], "wc_limitations_avg": [ 23.4, 19.865548066942427 ], "wc_review_avg": [ 355.0, 92.47053584791212 ], "wc_reply_reviewers_avg": [ 30.2, 54.13464694629494 ], "wc_reply_authors_avg": [ 127.4, 123.59870549483922 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7844645405527363, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "u.nus.edu;u.nus.edu;nus.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Unsupervised Image Denoising with Score Function", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71048", "id": "d6LShzSTOP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dc9e095f668044e7a0909a4ea3926beb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=d6LShzSTOP", "openreview": "https://openreview.net/forum?id=d6LShzSTOP", "poster": "/media/PosterPDFs/NeurIPS%202023/71048.png?t=1697471443.412186", "slides": "https://nips.cc/virtual/2023/poster/71048", "video": "https://nips.cc/virtual/2023/poster/71048", "author_site": "Yutong Xie, Mingze Yuan, Bin Dong, Quanzheng Li", "tldr": "", "abstract": "Though achieving excellent performance in some cases, current unsupervised learning methods for single image denoising usually have constraints in applications. In this paper, we propose a new approach which is more general and applicable to complicated noise models. Utilizing the property of score function, the gradient of logarithmic probability, we define a solving system for denoising. Once the score function of noisy images has been estimated, the denoised result can be obtained through the solving system. Our approach can be applied to multiple noise models, such as the mixture of multiplicative and additive noise combined with structured correlation. Experimental results show that our method is comparable when the noise model is simple, and has good performance in complicated cases where other methods are not applicable or perform poorly.", "keywords": "unsupervised learning;image denoising;score function", "primary_area": "", "supplementary_material": "/attachment/15d29557239578e3850c1eda456ecaa624eb4ecb.pdf", "author": "Yutong Xie;Mingze Yuan;Bin Dong;Quanzheng Li", "authorids": "~Yutong_Xie1;~Mingze_Yuan1;~Bin_Dong1;~Quanzheng_Li1", "gender": "M;;M;M", "homepage": ";;http://bicmr.pku.edu.cn/~dongbin;https://camca.mgh.harvard.edu/people/faculty/", "dblp": ";;11/6024;", "google_scholar": "https://scholar.google.com.hk/citations?user=TXCQd3kAAAAJ;;zLXcC90AAAAJ;MHq2z7oAAAAJ", "orcid": ";;;", "linkedin": "\u96e8\u5f64-\u8c22-49585314a/;;;", "or_profile": "~Yutong_Xie1;~Mingze_Yuan1;~Bin_Dong1;~Quanzheng_Li1", "aff": "Peking University;;Peking University;Harvard University", "aff_domain": "pku.edu.cn;; ;harvard.edu", "position": "PhD student;;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nxie2023unsupervised,\ntitle={Unsupervised Image Denoising with Score Function},\nauthor={Yutong Xie and Mingze Yuan and Bin Dong and Quanzheng Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=d6LShzSTOP}\n}", "github": "", "project": "", "reviewers": "Gx7h;Y7fg;V5Ro;PKR9;Qfmm", "pdf_size": 7896542, "rating": "5;5;5;5;5", "confidence": "3;4;4;4;3", "soundness": "3;3;3;3;4", "novelty": "2;2;2;3;3", "presentation": "4;3;3;3;3", "wc_summary": "23;49;71;37;45", "wc_strengths": "36;53;36;75;40", "wc_weaknesses": "79;122;78;35;144", "wc_questions": "65;49;52;36;2", "wc_limitations": "3;33;2;1;15", "wc_review": "206;306;239;184;246", "wc_reply_reviewers": "22;17;43;0;51", "wc_reply_authors": "5;5;70;0;58", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 45.0, 15.748015748023622 ], "wc_strengths_avg": [ 48.0, 14.872793954062566 ], "wc_weaknesses_avg": [ 91.6, 37.992630864418956 ], "wc_questions_avg": [ 40.8, 21.479292353334177 ], "wc_limitations_avg": [ 10.8, 12.204917041913886 ], "wc_review_avg": [ 236.2, 41.503734771704586 ], "wc_reply_reviewers_avg": [ 26.6, 18.358649187780674 ], "wc_reply_authors_avg": [ 27.6, 30.01732832881701 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6111072558040698917&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;; ;harvard.edu", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Peking University;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.harvard.edu", "aff_unique_abbr": "Peking U;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "How to Fine-tune the Model: Unified Model Shift and Model Bias Policy Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71047", "id": "d7a5TpePV7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b9b4f084b2e6709a2bfad0f601271aec-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=d7a5TpePV7", "openreview": "https://openreview.net/forum?id=d7a5TpePV7", "poster": "/media/PosterPDFs/NeurIPS%202023/71047.png?t=1700901045.350967", "slides": "https://nips.cc/virtual/2023/poster/71047", "video": "https://nips.cc/virtual/2023/poster/71047", "author_site": "Hai Zhang, Hang Yu, Junqiao Zhao, Di Zhang, xiao zhang, Hongtu Zhou, Chang Huang, Chen Ye", "tldr": "", "abstract": "Designing and deriving effective model-based reinforcement learning (MBRL) algorithms with a performance improvement guarantee is challenging, mainly attributed to the high coupling between model learning and policy optimization. Many prior methods that rely on return discrepancy to guide model learning ignore the impacts of model shift, which can lead to performance deterioration due to excessive model updates. Other methods use performance difference bound to explicitly consider model shift. However, these methods rely on a fixed threshold to constrain model shift, resulting in a heavy dependence on the threshold and a lack of adaptability during the training process. In this paper, we theoretically derive an optimization objective that can unify model shift and model bias and then formulate a fine-tuning process. This process adaptively adjusts the model updates to get a performance improvement guarantee while avoiding model overfitting. Based on these, we develop a straightforward algorithm USB-PO (Unified model Shift and model Bias Policy Optimization). Empirical results show that USB-PO achieves state-of-the-art performance on several challenging benchmark tasks.", "keywords": "model-based reinforcement learning;model shift;model bias;fine-tuning;performance difference bound", "primary_area": "", "supplementary_material": "/attachment/d71664a6ce8253356d65ceff49c5268375529f7d.zip", "author": "Hai Zhang;Hang Yu;Junqiao Zhao;Di Zhang;Chang Huang;Hongtu Zhou;Xiao Zhang;Chen Ye", "authorids": "~Hai_Zhang2;~Hang_Yu14;~Junqiao_Zhao1;~Di_Zhang5;2133035@tongji.edu.cn;zhouhongtu@tongji.edu.cn;2130790@tongji.edu.cn;~Chen_Ye1", "gender": "M;M;M;M;;;;M", "homepage": "https://betray12138.github.io/resume/;https://tinyyukoala.com;http://cs1.tongji.edu.cn/~junqiao;https://github.com/DinoMax00;;;;", "dblp": ";;;;;;;", "google_scholar": "YHqAzxUAAAAJ;;;;;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Hai_Zhang2;~Hang_Yu14;~Junqiao_Zhao1;~Di_Zhang5;2133035@tongji.edu.cn;zhouhongtu@tongji.edu.cn;2130790@tongji.edu.cn;~Chen_Ye1", "aff": "Tongji University;Tongji University;Tongji University;Tongji University;;;;Tongji University", "aff_domain": "tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;;;;tongji.edu.cn", "position": "MS student;Undergrad student;Associate Professor;Undergrad student;;;;Full Professor", "bibtex": "@inproceedings{\nzhang2023how,\ntitle={How to Fine-tune the Model: Unified Model Shift and Model Bias Policy Optimization},\nauthor={Hai Zhang and Hang Yu and Junqiao Zhao and Di Zhang and Chang Huang and Hongtu Zhou and Xiao Zhang and Chen Ye},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=d7a5TpePV7}\n}", "github": "", "project": "", "reviewers": "VRuX;eAwQ;C4Te;dkWj", "pdf_size": 11100093, "rating": "3;5;6;8", "confidence": "3;4;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "1;3;3;4", "wc_summary": "48;102;24;86", "wc_strengths": "133;35;55;127", "wc_weaknesses": "163;118;160;16", "wc_questions": "56;82;5;160", "wc_limitations": "1;32;5;6", "wc_review": "401;369;249;395", "wc_reply_reviewers": "77;31;30;40", "wc_reply_authors": "207;143;24;17", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 65.0, 30.740852297878796 ], "wc_strengths_avg": [ 87.5, 43.136411533645216 ], "wc_weaknesses_avg": [ 114.25, 59.449032792804964 ], "wc_questions_avg": [ 75.75, 55.97488276003801 ], "wc_limitations_avg": [ 11.0, 12.267844146385297 ], "wc_review_avg": [ 353.5, 61.52032184571209 ], "wc_reply_reviewers_avg": [ 44.5, 19.1637678967368 ], "wc_reply_authors_avg": [ 97.75, 80.53376621020527 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5547001962252291, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6588180110299799907&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;;;;tongji.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tongji University", "aff_unique_dep": "", "aff_unique_url": "https://www.tongji.edu.cn", "aff_unique_abbr": "Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Meta-AdaM: An Meta-Learned Adaptive Optimizer with Momentum for Few-Shot Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71046", "id": "d85pPNBHLt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ce26d21662c979d515164b416d4571fe-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=d85pPNBHLt", "openreview": "https://openreview.net/forum?id=d85pPNBHLt", "poster": "/media/PosterPDFs/NeurIPS%202023/71046.png?t=1701458814.1850076", "slides": "https://nips.cc/virtual/2023/poster/71046", "video": "https://nips.cc/virtual/2023/poster/71046", "author_site": "Siyuan Sun, Hongyang Gao", "tldr": "", "abstract": "We introduce Meta-AdaM, a meta-learned adaptive optimizer with momentum, designed for few-shot learning tasks that pose significant challenges to deep learning models due to the limited number of labeled examples. Meta-learning has been successfully employed to address these challenges by transferring meta-learned prior knowledge to new tasks. Most existing works focus on meta-learning an optimal model initialization or an adaptive learning rate learner for rapid convergence. However, these approaches either neglect to consider weight-update history for the adaptive learning rate learner or fail to effectively integrate momentum for fast convergence, as seen in many-shot learning settings. To tackle these limitations, we propose a meta-learned learning rate learner that utilizes weight-update history as input to predict more appropriate learning rates for rapid convergence. Furthermore, for the first time, our approach incorporates momentum into the optimization process of few-shot learning via a double look-ahead mechanism, enabling rapid convergence similar to many-shot settings. Extensive experimental results on benchmark datasets demonstrate the effectiveness of the proposed Meta-AdaM.", "keywords": "Few shot learning;Meta Learning", "primary_area": "", "supplementary_material": "/attachment/c9ca80b554aff1cce76363f13f953100568986b2.zip", "author": "Siyuan Sun;Hongyang Gao", "authorids": "~Siyuan_Sun1;~Hongyang_Gao1", "gender": "M;M", "homepage": ";https://faculty.sites.iastate.edu/hygao/", "dblp": ";200/7985", "google_scholar": ";jGmq0aEAAAAJ", "orcid": ";0000-0002-9020-9080", "linkedin": "siyuan-sun-00b402140/;hongyang-gao-74924690/", "or_profile": "~Siyuan_Sun1;~Hongyang_Gao1", "aff": "Iowa State University;Iowa State University", "aff_domain": "iastate.edu;iastate.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsun2023metaadam,\ntitle={Meta-AdaM: An Meta-Learned Adaptive Optimizer with Momentum for Few-Shot Learning},\nauthor={Siyuan Sun and Hongyang Gao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=d85pPNBHLt}\n}", "github": "", "project": "", "reviewers": "xuqU;n9ad;7eNt;cqvx;W1we", "pdf_size": 873029, "rating": "4;6;6;6;7", "confidence": "3;4;5;3;5", "soundness": "2;3;2;3;4", "novelty": "2;3;3;3;3", "presentation": "3;2;4;2;4", "wc_summary": "71;115;79;86;113", "wc_strengths": "46;109;70;115;89", "wc_weaknesses": "260;380;320;155;194", "wc_questions": "7;112;503;92;17", "wc_limitations": "6;31;21;16;51", "wc_review": "390;747;993;464;464", "wc_reply_reviewers": "163;288;244;20;79", "wc_reply_authors": "476;388;397;84;19", "reply_reviewers": "1;2;2;1;1", "reply_authors": "2;2;3;2;2", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 92.8, 17.95995545651492 ], "wc_strengths_avg": [ 85.8, 25.43540839066674 ], "wc_weaknesses_avg": [ 261.8, 81.68818764056405 ], "wc_questions_avg": [ 146.2, 183.0206545721001 ], "wc_limitations_avg": [ 25.0, 15.297058540778355 ], "wc_review_avg": [ 611.6, 226.48496638850006 ], "wc_reply_reviewers_avg": [ 158.8, 99.60200801188698 ], "wc_reply_authors_avg": [ 272.8, 184.41626826286233 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6846531968814576, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2362250157323890722&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "iastate.edu;iastate.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Iowa State University", "aff_unique_dep": "", "aff_unique_url": "https://www.iastate.edu", "aff_unique_abbr": "ISU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "3D Copy-Paste: Physically Plausible Object Insertion for Monocular 3D Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71045", "id": "d86B6Mdweq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/370fa2e691f57eb319bc263a07dad4a5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=d86B6Mdweq", "openreview": "https://openreview.net/forum?id=d86B6Mdweq", "poster": "/media/PosterPDFs/NeurIPS%202023/71045.png?t=1702085756.1459262", "slides": "https://nips.cc/virtual/2023/poster/71045", "video": "https://nips.cc/virtual/2023/poster/71045", "author_site": "Yunhao Ge, Hong-Xing Yu, Cheng Zhao, Yuliang Guo, Xinyu Huang, Liu Ren, Laurent Itti, Jiajun Wu", "tldr": "", "abstract": "A major challenge in monocular 3D object detection is the limited diversity and quantity of objects in real datasets. While augmenting real scenes with virtual objects holds promise to improve both the diversity and quantity of the objects, it remains elusive due to the lack of an effective 3D object insertion method in complex real captured scenes. In this work, we study augmenting complex real indoor scenes with virtual objects for monocular 3D object detection. The main challenge is to automatically identify plausible physical properties for virtual assets (e.g., locations, appearances, sizes, etc.) in cluttered real scenes. To address this challenge, we propose a physically plausible indoor 3D object insertion approach to automatically copy virtual objects and paste them into real scenes. The resulting objects in scenes have 3D bounding boxes with plausible physical locations and appearances. In particular, our method first identifies physically feasible locations and poses for the inserted objects to prevent collisions with the existing room layout. Subsequently, it estimates spatially-varying illumination for the insertion location, enabling the immersive blending of the virtual objects into the original scene with plausible appearances and cast shadows. We show that our augmentation method significantly improves existing monocular 3D object models and achieves state-of-the-art performance. For the first time, we demonstrate that a physically plausible 3D object insertion, serving as a generative data augmentation technique, can lead to significant improvements for discriminative downstream tasks such as monocular 3D object detection. Project website: https://gyhandy.github.io/3D-Copy-Paste/.", "keywords": "3D Copy-Paste;Object Insertion;Monocular 3D Object Detection;Physically Plausible;Data Generation", "primary_area": "", "supplementary_material": "/attachment/b3ae06385ffb7977a6997ebac77b54d9da57b1aa.pdf", "author": "Yunhao Ge;Hong-Xing Yu;Cheng Zhao;Yuliang Guo;Xinyu Huang;Liu Ren;Laurent Itti;Jiajun Wu", "authorids": "~Yunhao_Ge1;~Hong-Xing_Yu1;~Cheng_Zhao1;~Yuliang_Guo2;~Xinyu_Huang3;~Liu_Ren1;~Laurent_Itti1;~Jiajun_Wu1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://gyhandy.github.io/;https://kovenyu.com;;https://yuliangguo.github.io;;https://sites.google.com/site/liurenshomepage/;http://ilab.usc.edu;https://jiajunwu.com", "dblp": "204/1908;205/2676.html;93/359834;117/8269;91/2102-1;65/4250;31/3256;117/4768", "google_scholar": "https://scholar.google.ca/citations?user=QhjGr4oAAAAJ;kNKncZcAAAAJ;EAC-8m0AAAAJ;CP-YkUwAAAAJ;cL4bNBwAAAAJ;;xhUvqK8AAAAJ;2efgcS0AAAAJ", "orcid": ";;;;;;0000-0002-0168-2977;0000-0002-4176-343X", "linkedin": "yunhao-ge-720727135/;;;yuliang-guo-01781a20;xhuan4;;;jiajunwu/", "or_profile": "~Yunhao_Ge1;~Hong-Xing_Yu1;~Cheng_Zhao1;~Yuliang_Guo2;~Xinyu_Huang3;~Liu_Ren1;~Laurent_Itti1;~Jiajun_Wu1", "aff": "University of Southern California;Stanford University;Bosch;Bosch Research North America;Robert Bosch Research in North America;Bosch Research;University of Southern California;Stanford University", "aff_domain": "usc.edu;cs.stanford.edu;us.bosch.com;bosch.com;us.bosch.com;us.bosch.com;usc.edu;stanford.edu", "position": "PhD student;PhD student;Researcher;Researcher;Principal Scientist;Principal Researcher;Professor;Assistant Professor", "bibtex": "@inproceedings{\nge2023d,\ntitle={3D Copy-Paste: Physically Plausible Object Insertion for Monocular 3D Detection},\nauthor={Yunhao Ge and Hong-Xing Yu and Cheng Zhao and Yuliang Guo and Xinyu Huang and Liu Ren and Laurent Itti and Jiajun Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=d86B6Mdweq}\n}", "github": "", "project": "", "reviewers": "mHBD;DWmW;B4nT;sM4b;7hAU", "pdf_size": 4337031, "rating": "5;5;6;6;6", "confidence": "5;4;3;4;2", "soundness": "3;4;3;3;3", "novelty": "2;3;3;3;2", "presentation": "2;4;3;3;2", "wc_summary": "88;170;72;134;74", "wc_strengths": "115;57;63;103;99", "wc_weaknesses": "334;145;149;160;234", "wc_questions": "1;52;54;96;2", "wc_limitations": "12;11;33;19;28", "wc_review": "550;435;371;512;437", "wc_reply_reviewers": "62;132;127;55;58", "wc_reply_authors": "15;74;100;15;6", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 107.6, 38.39583310725266 ], "wc_strengths_avg": [ 87.4, 23.06165648863932 ], "wc_weaknesses_avg": [ 204.4, 72.44473755905256 ], "wc_questions_avg": [ 41.0, 35.87756959438585 ], "wc_limitations_avg": [ 20.6, 8.685620300243386 ], "wc_review_avg": [ 461.0, 63.04601494146954 ], "wc_reply_reviewers_avg": [ 86.8, 34.970844999799475 ], "wc_reply_authors_avg": [ 42.0, 37.79417944604698 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.7205766921228919, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15730370133621944712&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "usc.edu;cs.stanford.edu;us.bosch.com;bosch.com;us.bosch.com;us.bosch.com;usc.edu;stanford.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;4;5;0;1", "aff_unique_norm": "University of Southern California;Stanford University;Robert Bosch GmbH;Bosch Research North America;Robert Bosch Research;Bosch Research", "aff_unique_dep": ";;;;Research;", "aff_unique_url": "https://www.usc.edu;https://www.stanford.edu;https://www.bosch.com;https://research.bosch.com/northamerica;https://research.bosch.com;https://research.bosch.com", "aff_unique_abbr": "USC;Stanford;Bosch;Bosch RNA;RBRNA;Bosch", "aff_campus_unique_index": "0;1;0;1", "aff_campus_unique": "Los Angeles;Stanford;", "aff_country_unique_index": "0;0;1;0;2;1;0;0", "aff_country_unique": "United States;Germany;Unknown" }, { "title": "ZipLM: Inference-Aware Structured Pruning of Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71044", "id": "d8j3lsBWpV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ced46a50befedcb884ccf0cbe8c3ad23-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=d8j3lsBWpV", "openreview": "https://openreview.net/forum?id=d8j3lsBWpV", "poster": "/media/PosterPDFs/NeurIPS%202023/71044.png?t=1701386253.186801", "slides": "https://nips.cc/virtual/2023/poster/71044", "video": "https://nips.cc/virtual/2023/poster/71044", "author_site": "Eldar Kurti\u0107, Elias Frantar, Dan Alistarh, Dan Alistarh", "tldr": "", "abstract": "The breakthrough performance of large language models (LLMs) comes with major computational footprints and high deployment costs. In this paper, we progress towards resolving this problem by proposing a novel structured compression approach for LLMs, called ZipLM. ZipLM achieves state-of-the-art accuracy-vs-speedup, while matching a set of desired target runtime speedups in any given inference environment. Specifically, given a model, a dataset, an inference environment, as well as a set of speedup targets, ZipLM iteratively identifies and removes components with the worst loss-runtime trade-off. Unlike prior methods that specialize in either the *post-training/one-shot* or the *gradual compression* setting, and only for specific families of models such as BERT (*encoder*) or GPT (*decoder*), ZipLM produces state-of-the-art compressed models across all these settings. Furthermore, ZipLM achieves superior results for a fraction of the computational cost relative to prior distillation and pruning techniques, making it a cost-effective approach for generating an entire family of smaller, faster, and highly accurate models, guaranteed to meet the desired inference specifications. In particular, ZipLM outperforms all prior BERT-base distillation and pruning techniques, such as CoFi, MiniLM, and TinyBERT. Moreover, it matches the performance of the heavily optimized MobileBERT model, obtained via extensive architecture search, by simply pruning the baseline BERT-large model. When compressing GPT2, ZipLM outperforms DistilGPT2 while being 60\\% smaller and 30\\% faster. Our code is available at: https://github.com/IST-DASLab/ZipLM.", "keywords": "LLMs;pruning;compression;inference", "primary_area": "", "supplementary_material": "/attachment/587925816a20648818fbf5ac83b2874e46e27e4f.zip", "author": "Eldar Kurtic;Elias Frantar;Dan Alistarh", "authorids": "~Eldar_Kurtic1;~Elias_Frantar1;~Dan_Alistarh7", "gender": "M;M;M", "homepage": ";;http://people.csail.mit.edu/alistarh/", "dblp": "297/3713;259/2210;36/3251.html", "google_scholar": "https://scholar.google.com/citations?hl=en;hjdlwz8AAAAJ;https://scholar.google.com.tw/citations?user=75q-6ZQAAAAJ", "orcid": ";;", "linkedin": "eldar-kurti%C4%87-77963b160/;elias-frantar-5b43181a4;", "or_profile": "~Eldar_Kurtic1;~Elias_Frantar1;~Dan_Alistarh1", "aff": "Institute of Science and Technology Austria;Google Brain;Institute of Science and Technology", "aff_domain": "ist.ac.at;google.com;ist.ac.at", "position": "Researcher;Intern;Full Professor", "bibtex": "@inproceedings{\nkurtic2023ziplm,\ntitle={Zip{LM}: Inference-Aware Structured Pruning of Language Models},\nauthor={Eldar Kurtic and Elias Frantar and Dan Alistarh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=d8j3lsBWpV}\n}", "github": "", "project": "", "reviewers": "rTQd;mFy7;5zkW;h7Df;27Y8", "pdf_size": 805586, "rating": "5;5;6;6;6", "confidence": "4;4;3;3;3", "soundness": "2;3;3;4;3", "novelty": "3;3;3;3;2", "presentation": "3;3;3;2;2", "wc_summary": "159;72;77;71;53", "wc_strengths": "17;69;31;115;36", "wc_weaknesses": "42;125;144;164;105", "wc_questions": "299;118;5;33;38", "wc_limitations": "1;1;10;12;9", "wc_review": "518;385;267;395;241", "wc_reply_reviewers": "70;0;0;0;37", "wc_reply_authors": "253;0;0;0;52", "reply_reviewers": "2;0;0;0;1", "reply_authors": "3;1;1;1;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 86.4, 37.199999999999996 ], "wc_strengths_avg": [ 53.6, 35.120364462801355 ], "wc_weaknesses_avg": [ 116.0, 41.871231173682965 ], "wc_questions_avg": [ 98.6, 107.02635189522252 ], "wc_limitations_avg": [ 6.6, 4.673328578219169 ], "wc_review_avg": [ 361.2, 99.61606296175331 ], "wc_reply_reviewers_avg": [ 21.4, 28.21063629200873 ], "wc_reply_authors_avg": [ 61.0, 98.08975481669836 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12913931932563387983&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ist.ac.at;google.com;ist.ac.at", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Institute of Science and Technology Austria;Google;Institute of Science and Technology", "aff_unique_dep": ";Google Brain;", "aff_unique_url": "https://www.ist.ac.at;https://brain.google.com;", "aff_unique_abbr": "IST Austria;Google Brain;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1", "aff_country_unique": "Austria;United States;" }, { "title": "A*Net: A Scalable Path-based Reasoning Approach for Knowledge Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71043", "id": "dAJrxQz1lk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b9e98316cb72fee82cc1160da5810abc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dAJrxQz1lk", "openreview": "https://openreview.net/forum?id=dAJrxQz1lk", "poster": "/media/PosterPDFs/NeurIPS%202023/71043.png?t=1702181707.2654629", "slides": "https://nips.cc/virtual/2023/poster/71043", "video": "https://nips.cc/virtual/2023/poster/71043", "author_site": "Zhaocheng Zhu, Xinyu Yuan, Michael Galkin, Louis-Pascal Xhonneux, Ming Zhang, Maxime Gazeau, Jian Tang", "tldr": "", "abstract": "Reasoning on large-scale knowledge graphs has been long dominated by embedding methods. While path-based methods possess the inductive capacity that embeddings lack, their scalability is limited by the exponential number of paths. Here we present A\\*Net, a scalable path-based method for knowledge graph reasoning. Inspired by the A\\* algorithm for shortest path problems, our A\\*Net learns a priority function to select important nodes and edges at each iteration, to reduce time and memory footprint for both training and inference. The ratio of selected nodes and edges can be specified to trade off between performance and efficiency. Experiments on both transductive and inductive knowledge graph reasoning benchmarks show that A\\*Net achieves competitive performance with existing state-of-the-art path-based methods, while merely visiting 10% nodes and 10% edges at each iteration. On a million-scale dataset ogbl-wikikg2, A\\*Net not only achieves a new state-of-the-art result, but also converges faster than embedding methods. A\\*Net is the first path-based method for knowledge graph reasoning at such scale.", "keywords": "Knowledge Graph Reasoning;Path-based Methods;Scalability;A* Algorithm", "primary_area": "", "supplementary_material": "/attachment/3d434dbee88546b25786191d61cbf689c18c100c.pdf", "author": "Zhaocheng Zhu;Xinyu Yuan;Mikhail Galkin;Sophie Xhonneux;Ming Zhang;Maxime Gazeau;Jian Tang", "authorids": "~Zhaocheng_Zhu1;~Xinyu_Yuan2;~Mikhail_Galkin1;~Sophie_Xhonneux1;~Ming_Zhang5;~Maxime_Gazeau2;~Jian_Tang1", "gender": "M;F;M;F;;;", "homepage": "https://kiddozhu.github.io/;https://github.com/KatarinaYuan/;https://migalkin.github.io/;https://cs.pku.edu.cn/info/1080/1371.htm;http://www.jian-tang.com;;", "dblp": "195/0435;;160/8154;73/1844-4;181/2667-5;;255/5495", "google_scholar": "Qd8JumkAAAAJ;;yfYRbG4AAAAJ;LbzoQBsAAAAJ;https://scholar.google.ca/citations?user=1ir6WUEAAAAJ;LfmqBJsAAAAJ;", "orcid": ";;;0000-0002-9809-3430;;;0000-0003-1990-4475", "linkedin": ";;;;;;", "or_profile": "~Zhaocheng_Zhu1;~Xinyu_Yuan2;~Mikhail_Galkin1;~Ming_Zhang5;~Jian_Tang1;~maxime_gazeau1;~Louis-Pascal_A._C._Xhonneux1", "aff": "Universit\u00e9 de Montr\u00e9al;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Intel;Peking University;Mila, HEC Montreal;Google DeepMind;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal", "aff_domain": "mila.quebec;mila.umontreal.ca;intel.com;pku.edu.cn;hec.ca;deepmind.com;mila.umontreal.ca", "position": "PhD student;PhD student;Researcher;Full Professor;Assistant Professor;Research engineer;PhD student", "bibtex": "@inproceedings{\nzhu2023anet,\ntitle={A*Net: A Scalable Path-based Reasoning Approach for Knowledge Graphs},\nauthor={Zhaocheng Zhu and Xinyu Yuan and Mikhail Galkin and Sophie Xhonneux and Ming Zhang and Maxime Gazeau and Jian Tang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dAJrxQz1lk}\n}", "github": "", "project": "", "reviewers": "7LW7;rgXX;ALvG;2kB4", "pdf_size": 819538, "rating": "4;5;6;6", "confidence": "4;3;4;3", "soundness": "3;2;2;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "90;66;84;22", "wc_strengths": "90;46;23;65", "wc_weaknesses": "355;111;58;26", "wc_questions": "111;3;101;149", "wc_limitations": "11;2;4;1", "wc_review": "657;228;270;263", "wc_reply_reviewers": "241;173;35;15", "wc_reply_authors": "1143;433;54;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 65.5, 26.622359023948274 ], "wc_strengths_avg": [ 56.0, 24.627220712049503 ], "wc_weaknesses_avg": [ 137.5, 129.1907504429013 ], "wc_questions_avg": [ 91.0, 53.87021440462252 ], "wc_limitations_avg": [ 4.5, 3.905124837953327 ], "wc_review_avg": [ 354.5, 175.37174800976354 ], "wc_reply_reviewers_avg": [ 116.0, 94.38749917229505 ], "wc_reply_authors_avg": [ 407.5, 456.24253418549216 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6164739305629330856&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "mila.quebec;mila.umontreal.ca;intel.com;pku.edu.cn;hec.ca;deepmind.com;mila.umontreal.ca", "author_num": 7, "aff_unique_index": "0;1;2;3;4;5;1", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;University of Montreal;Intel;Peking University;HEC Montreal;Google", "aff_unique_dep": ";Montreal Institute for Learning Algorithms;Intel Corporation;;HEC Business School;Google DeepMind", "aff_unique_url": "https://www.umontreal.ca;https://www.mila.quebec;https://www.intel.com;http://www.pku.edu.cn;https://www.hec.ca;https://deepmind.com", "aff_unique_abbr": "UdeM;MILA;Intel;Peking U;HEC;DeepMind", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;1;2;0;3;0", "aff_country_unique": "Canada;United States;China;United Kingdom" }, { "title": "Contrastive Sampling Chains in Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71042", "id": "dAbGv5Jz5U", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e8ff788779f2e9e74ccd0d6b84607437-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dAbGv5Jz5U", "openreview": "https://openreview.net/forum?id=dAbGv5Jz5U", "poster": "/media/PosterPDFs/NeurIPS%202023/71042.png?t=1699791527.7001417", "slides": "https://nips.cc/virtual/2023/poster/71042", "video": "https://nips.cc/virtual/2023/poster/71042", "author_site": "Junyu Zhang, Daochang Liu, Shichao Zhang, Chang Xu", "tldr": "", "abstract": "The past few years have witnessed great success in the use of diffusion models (DMs) to generate high-fidelity images with the help of stochastic differential equations (SDEs). However, discretization error is an inevitable limitation when utilizing numerical solvers to solve SDEs. To address this limitation, we provide a theoretical analysis demonstrating that an appropriate combination of the contrastive loss and score matching serves as an upper bound of the KL divergence between the true data distribution and the model distribution. To obtain this bound, we utilize a contrastive loss to construct a contrastive sampling chain to fine-tuning the pre-trained DM. In this manner, our method reduces the discretization error and thus yields a smaller gap between the true data distribution and our model distribution. Moreover, the presented method can be applied to fine-tuning various pre-trained DMs, both with or without fast sampling algorithms, contributing to better sample quality or slightly faster sampling speeds. To validate the efficacy of our method, we conduct comprehensive experiments. For example, on CIFAR10, when applied to a pre-trained EDM, our method improves the FID from 2.04 to 1.88 with 35 neural function evaluations (NFEs), and reduces NFEs from 35 to 25 to achieve the same 2.04 FID.", "keywords": "diffusion models;contrastive loss;discretization error;contrastive sampling chain", "primary_area": "", "supplementary_material": "", "author": "Junyu Zhang;Daochang Liu;Shichao Zhang;Chang Xu", "authorids": "~Junyu_Zhang2;~Daochang_Liu1;~Shichao_Zhang3;~Chang_Xu4", "gender": "M;M;M;", "homepage": ";https://finspire13.github.io;;https://sydney.edu.au/engineering/about/our-people/academic-staff/c-xu.html", "dblp": ";222/2701;z/ShichaoZhang;97/2966-2", "google_scholar": ";https://scholar.google.com/citations?hl=en;;N4F_3eoAAAAJ", "orcid": "0000-0002-2631-0192;;;0000-0002-4756-0609", "linkedin": ";;;", "or_profile": "~Junyu_Zhang2;~Daochang_Liu1;~Shichao_Zhang3;~Charles_Xu1", "aff": "Central South University;University of Sydney;Guangxi Normal University;University of Sydney", "aff_domain": "csu.edu.cn;usyd.edu.au;gxnu.edu.cn;sydney.edu.au", "position": "PhD student;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023contrastive,\ntitle={Contrastive Sampling Chains in Diffusion Models},\nauthor={Junyu Zhang and Daochang Liu and Shichao Zhang and Chang Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dAbGv5Jz5U}\n}", "github": "", "project": "", "reviewers": "mcPT;pguc;AaWg;e493;sJua", "pdf_size": 2571197, "rating": "5;6;7;7;8", "confidence": "3;4;4;3;5", "soundness": "3;3;3;3;3", "novelty": "3;3;3;2;3", "presentation": "3;3;3;3;4", "wc_summary": "157;35;52;69;59", "wc_strengths": "58;83;98;40;106", "wc_weaknesses": "338;191;216;92;61", "wc_questions": "55;18;7;20;37", "wc_limitations": "7;1;18;22;38", "wc_review": "615;328;391;243;301", "wc_reply_reviewers": "298;45;9;76;13", "wc_reply_authors": "845;74;0;56;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "3;2;1;2;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 74.4, 42.76260048219706 ], "wc_strengths_avg": [ 77.0, 24.690078979217542 ], "wc_weaknesses_avg": [ 179.6, 98.2702396455814 ], "wc_questions_avg": [ 27.4, 16.81190054693401 ], "wc_limitations_avg": [ 17.2, 12.82809416865966 ], "wc_review_avg": [ 375.6, 128.81242176125716 ], "wc_reply_reviewers_avg": [ 88.2, 107.66503610736403 ], "wc_reply_authors_avg": [ 195.0, 326.34705452937675 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.681385143869247, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4808992906915836075&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "csu.edu.cn;usyd.edu.au;gxnu.edu.cn;sydney.edu.au", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Central South University;University of Sydney;Guangxi Normal University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.csu.edu.cn;https://www.sydney.edu.au;http://www.gxnu.edu.cn", "aff_unique_abbr": "CSU;USYD;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;Australia" }, { "title": "SmooSeg: Smoothness Prior for Unsupervised Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71041", "id": "dB4lvScPIj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/25823c8eadef751dbd09a0ab9f463b59-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dB4lvScPIj", "openreview": "https://openreview.net/forum?id=dB4lvScPIj", "poster": "/media/PosterPDFs/NeurIPS%202023/71041.png?t=1701429080.9305727", "slides": "https://nips.cc/virtual/2023/poster/71041", "video": "https://nips.cc/virtual/2023/poster/71041", "author_site": "Mengcheng Lan, Xinjiang Wang, Yiping Ke, Jiaxing Xu, Litong Feng, Wayne Zhang", "tldr": "", "abstract": "Unsupervised semantic segmentation is a challenging task that segments images into semantic groups without manual annotation. Prior works have primarily focused on leveraging prior knowledge of semantic consistency or priori concepts from self-supervised learning methods, which often overlook the coherence property of image segments. In this paper, we demonstrate that the smoothness prior, asserting that close features in a metric space share the same semantics, can significantly simplify segmentation by casting unsupervised semantic segmentation as an energy minimization problem. Under this paradigm, we propose a novel approach called SmooSeg that harnesses self-supervised learning methods to model the closeness relationships among observations as smoothness signals. To effectively discover coherent semantic segments, we introduce a novel smoothness loss that promotes piecewise smoothness within segments while preserving discontinuities across different segments. Additionally, to further enhance segmentation quality, we design an asymmetric teacher-student style predictor that generates smoothly updated pseudo labels, facilitating an optimal fit between observations and labeling outputs. Thanks to the rich supervision cues of the smoothness prior, our SmooSeg significantly outperforms STEGO in terms of pixel accuracy on three datasets: COCOStuff (+14.9\\%), Cityscapes (+13.0\\%), and Potsdam-3 (+5.7\\%).", "keywords": "unsupervised semantic segmentation; self-supervised learning; smoothness prior", "primary_area": "", "supplementary_material": "", "author": "Mengcheng Lan;Xinjiang Wang;Yiping Ke;Jiaxing Xu;Litong Feng;Wayne Zhang", "authorids": "~Mengcheng_Lan1;~Xinjiang_Wang1;~Yiping_Ke1;~Jiaxing_Xu2;~Litong_Feng1;~Wayne_Zhang2", "gender": "M;M;F;;M;", "homepage": ";;https://keyiping.wixsite.com/index;;;", "dblp": "250/5850;215/3546;07/3111;;133/4032.html;", "google_scholar": "https://scholar.google.com.hk/citations?user=CtQbdvkAAAAJ;https://scholar.google.com/citations?hl=zh-TW;https://scholar.google.com.tw/citations?user=30Fp0YYAAAAJ;;PnNAAasAAAAJ;", "orcid": "0000-0002-3311-0295;;0000-0001-9473-3202;;;", "linkedin": ";;;;litong-feng-9579747b/;", "or_profile": "~Mengcheng_Lan1;~Xinjiang_Wang1;~Yiping_Ke1;~Jiaxing_Xu2;~Litong_Feng1;~Wayne_Zhang2", "aff": "Nanyang Technological University;SenseTime Group;Nanyang Technological University;;SenseTime Research;", "aff_domain": "ntu.edu.sg;sensetime.com;ntu.edu.sg;;sensetime.com;", "position": "PhD student;Researcher;Associate Professor;;Associate Research Director;", "bibtex": "@inproceedings{\nlan2023smooseg,\ntitle={SmooSeg: Smoothness Prior for Unsupervised Semantic Segmentation},\nauthor={Mengcheng Lan and Xinjiang Wang and Yiping Ke and Jiaxing Xu and Litong Feng and Wayne Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dB4lvScPIj}\n}", "github": "", "project": "", "reviewers": "yftt;1NSo;iJZU;gyDm;HQSk", "pdf_size": 7931117, "rating": "4;5;6;6;6", "confidence": "5;2;4;4;4", "soundness": "3;4;3;3;3", "novelty": "2;2;2;3;3", "presentation": "3;4;4;3;3", "wc_summary": "89;62;118;92;91", "wc_strengths": "212;152;159;44;81", "wc_weaknesses": "704;311;679;110;146", "wc_questions": "131;48;161;83;4", "wc_limitations": "37;28;98;14;34", "wc_review": "1173;601;1215;343;356", "wc_reply_reviewers": "575;81;138;77;0", "wc_reply_authors": "1281;42;42;37;0", "reply_reviewers": "2;1;1;1;0", "reply_authors": "3;2;2;2;1", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 90.4, 17.738094598913378 ], "wc_strengths_avg": [ 129.6, 59.741442901891816 ], "wc_weaknesses_avg": [ 390.0, 255.45802003460372 ], "wc_questions_avg": [ 85.4, 56.25868821790995 ], "wc_limitations_avg": [ 42.2, 28.999310336626973 ], "wc_review_avg": [ 737.6, 384.0497884389471 ], "wc_reply_reviewers_avg": [ 174.2, 205.1491164982194 ], "wc_reply_authors_avg": [ 280.4, 500.5471406371232 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.15309310892394867, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4680248957454554570&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 7, "email": "ntu.edu.sg;sensetime.com;ntu.edu.sg;;sensetime.com;", "author_num": 6, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Nanyang Technological University;SenseTime Group;SenseTime", "aff_unique_dep": ";;SenseTime Research", "aff_unique_url": "https://www.ntu.edu.sg;https://www.sensetime.com;https://www.sensetime.com", "aff_unique_abbr": "NTU;SenseTime;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Singapore;China" }, { "title": "This Looks Like Those: Illuminating Prototypical Concepts Using Multiple Visualizations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71040", "id": "dCAk9VlegR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7b76eea0c3683e440c3d362620f578cd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dCAk9VlegR", "openreview": "https://openreview.net/forum?id=dCAk9VlegR", "poster": "/media/PosterPDFs/NeurIPS%202023/71040.png?t=1701736584.5524077", "slides": "https://nips.cc/virtual/2023/poster/71040", "video": "https://nips.cc/virtual/2023/poster/71040", "author_site": "Chiyu Ma, Brandon Zhao, Chaofan Chen, Cynthia Rudin", "tldr": "", "abstract": "We present ProtoConcepts, a method for interpretable image classification combining deep learning and case-based reasoning using prototypical parts. Existing work in prototype-based image classification uses a \"this looks like that'' reasoning process, which dissects a test image by finding prototypical parts and combining evidence from these prototypes to make a final classification. However, all of the existing prototypical part-based image classifiers provide only one-to-one comparisons, where a single training image patch serves as a prototype to compare with a part of our test image. With these single-image comparisons, it can often be difficult to identify the underlying concept being compared (e.g., \"is it comparing the color or the shape?''). Our proposed method modifies the architecture of prototype-based networks to instead learn prototypical concepts which are visualized using multiple image patches. Having multiple visualizations of the same prototype allows us to more easily identify the concept captured by that prototype (e.g., \"the test image and the related training patches are all the same shade of blue''), and allows our model to create richer, more interpretable visual explanations. Our experiments show that our ``this looks like those'' reasoning process can be applied as a modification to a wide range of existing prototypical image classification networks while achieving comparable accuracy on benchmark datasets.", "keywords": "deep learning;interpretability;prototype-based neural network;case-based reasoning", "primary_area": "", "supplementary_material": "", "author": "Chiyu Ma;Brandon Zhao;Chaofan Chen;Cynthia Rudin", "authorids": "~Chiyu_Ma1;~Brandon_Zhao1;~Chaofan_Chen1;~Cynthia_Rudin1", "gender": "M;M;M;", "homepage": "https://henrymachiyu.github.io/;http://brandonyzhao.github.io;https://umaine.edu/scis/people/chaofan-chen/;", "dblp": ";;;", "google_scholar": "h_3TRv0AAAAJ;;pJ0vTRUAAAAJ;", "orcid": ";;;", "linkedin": "henry-chiyu-ma-3b7b30203/;;;", "or_profile": "~Chiyu_Ma1;~Brandon_Zhao1;~Chaofan_Chen1;~Cynthia_Rudin1", "aff": "Duke University;California Institute of Technology;University of Maine;", "aff_domain": "duke.edu;caltech.edu;maine.edu;", "position": "MS student;PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nma2023this,\ntitle={This Looks Like Those: Illuminating Prototypical Concepts Using Multiple Visualizations},\nauthor={Chiyu Ma and Brandon Zhao and Chaofan Chen and Cynthia Rudin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dCAk9VlegR}\n}", "github": "", "project": "", "reviewers": "x5Kg;wBiV;p3JV;8ZAP", "pdf_size": 4491024, "rating": "6;6;6;7", "confidence": "5;4;4;3", "soundness": "3;2;3;3", "novelty": "2;3;2;3", "presentation": "3;3;2;3", "wc_summary": "74;76;72;66", "wc_strengths": "81;147;47;156", "wc_weaknesses": "194;150;149;231", "wc_questions": "2;73;33;151", "wc_limitations": "65;53;1;9", "wc_review": "416;499;302;613", "wc_reply_reviewers": "94;161;168;202", "wc_reply_authors": "105;419;513;37", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.0, 3.7416573867739413 ], "wc_strengths_avg": [ 107.75, 45.48282642932385 ], "wc_weaknesses_avg": [ 181.0, 34.11011580162108 ], "wc_questions_avg": [ 64.75, 55.7959451931769 ], "wc_limitations_avg": [ 32.0, 27.477263328068172 ], "wc_review_avg": [ 457.5, 113.80355881957296 ], "wc_reply_reviewers_avg": [ 156.25, 39.14316670889058 ], "wc_reply_authors_avg": [ 268.5, 201.71452600147566 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13763017869630218765&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "duke.edu;caltech.edu;maine.edu;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Duke University;California Institute of Technology;University of Maine", "aff_unique_dep": ";;", "aff_unique_url": "https://www.duke.edu;https://www.caltech.edu;https://www.umaine.edu", "aff_unique_abbr": "Duke;Caltech;UMaine", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pasadena", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Supervised Pretraining Can Learn In-Context Reinforcement Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71039", "id": "dCYBAGQXLo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8644b61a9bc87bf7844750a015feb600-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dCYBAGQXLo", "openreview": "https://openreview.net/forum?id=dCYBAGQXLo", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71039", "video": "https://nips.cc/virtual/2023/poster/71039", "author_site": "Jonathan Lee, Annie Xie, Aldo Pacchiano, Yash Chandak, Chelsea Finn, Ofir Nachum, Emma Brunskill", "tldr": "", "abstract": "Large transformer models trained on diverse datasets have shown a remarkable ability to learn in-context, achieving high few-shot performance on tasks they were not explicitly trained to solve. In this paper, we study the in-context learning capabilities of transformers in decision-making problems, i.e., reinforcement learning (RL) for bandits and Markov decision processes. To do so, we introduce and study the Decision-Pretrained Transformer (DPT), a supervised pretraining method where a transformer predicts an optimal action given a query state and an in-context dataset of interactions from a diverse set of tasks. While simple, this procedure produces a model with several surprising capabilities. We find that the trained transformer can solve a range of RL problems in-context, exhibiting both exploration online and conservatism offline, despite not being explicitly trained to do so. The model also generalizes beyond the pretraining distribution to new tasks and automatically adapts its decision-making strategies to unknown structure. Theoretically, we show DPT can be viewed as an efficient implementation of Bayesian posterior sampling, a provably sample-efficient RL algorithm. We further leverage this connection to provide guarantees on the regret of the in-context algorithm yielded by DPT, and prove that it can learn faster than algorithms used to generate the pretraining data. These results suggest a promising yet simple path towards instilling strong in-context decision-making abilities in transformers.", "keywords": "decision making;reinforcement learning;in-context learning;bandits;transformers;offline reinforcement learning;exploration;reinforcement learning theory", "primary_area": "", "supplementary_material": "/attachment/cdcb5a081d3c271a6a020607846f79b68994029a.zip", "author": "Jonathan Lee;Annie Xie;Aldo Pacchiano;Yash Chandak;Chelsea Finn;Ofir Nachum;Emma Brunskill", "authorids": "~Jonathan_Lee4;~Annie_Xie1;~Aldo_Pacchiano1;~Yash_Chandak1;~Chelsea_Finn1;~Ofir_Nachum1;~Emma_Brunskill2", "gender": "M;;M;;F;M;", "homepage": "http://jonathannlee.com/;https://cs.stanford.edu/~anniexie/;https://www.aldopacchiano.ai;https://yashchandak.github.io/;https://ai.stanford.edu/~cbfinn/;https://scholar.google.com/citations?user=C-ZlBWMAAAAJ&hl=en;", "dblp": "30/3557-2.html;215/3608;129/6338;168/8450;131/1783;;", "google_scholar": "J8_FdjkAAAAJ;;no_BfYgAAAAJ;AsgUcSEAAAAJ;vfPE6hgAAAAJ;C-ZlBWMAAAAJ;", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Jonathan_Lee4;~Annie_Xie1;~Aldo_Pacchiano1;~Yash_Chandak1;~Chelsea_Finn1;~Ofir_Nachum1;~Emma_Brunskill2", "aff": "Stanford University;Stanford University;Microsoft;Computer Science Department, Stanford University;Google;OpenAI;", "aff_domain": "stanford.edu;stanford.edu;microsoft.com;cs.stanford.edu;google.com;openai.com;", "position": "PhD student;PhD student;Postdoc;Postdoc;Research Scientist;Researcher;", "bibtex": "@inproceedings{\nlee2023supervised,\ntitle={Supervised Pretraining Can Learn In-Context Reinforcement Learning},\nauthor={Jonathan Lee and Annie Xie and Aldo Pacchiano and Yash Chandak and Chelsea Finn and Ofir Nachum and Emma Brunskill},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dCYBAGQXLo}\n}", "github": "", "project": "", "reviewers": "qnXx;CX5S;pHmJ;R3a7", "pdf_size": 916004, "rating": "6;6;7;8", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "novelty": "3;1;3;3", "presentation": "3;2;4;4", "wc_summary": "102;83;305;49", "wc_strengths": "48;300;49;65", "wc_weaknesses": "145;217;42;25", "wc_questions": "64;164;48;27", "wc_limitations": "9;1;1;50", "wc_review": "368;765;445;216", "wc_reply_reviewers": "33;20;0;22", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 134.75, 100.11087603252706 ], "wc_strengths_avg": [ 115.5, 106.73448364984955 ], "wc_weaknesses_avg": [ 107.25, 78.25079871796837 ], "wc_questions_avg": [ 75.75, 52.613567641816495 ], "wc_limitations_avg": [ 15.25, 20.327014045353536 ], "wc_review_avg": [ 448.5, 200.4501184833773 ], "wc_reply_reviewers_avg": [ 18.75, 11.903255857117413 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 80, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10481406298137699436&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 7, "email": "stanford.edu;stanford.edu;microsoft.com;cs.stanford.edu;google.com;openai.com;", "author_num": 7, "aff_unique_index": "0;0;1;0;2;3", "aff_unique_norm": "Stanford University;Microsoft;Google;OpenAI", "aff_unique_dep": ";Microsoft Corporation;Google;", "aff_unique_url": "https://www.stanford.edu;https://www.microsoft.com;https://www.google.com;https://openai.com", "aff_unique_abbr": "Stanford;Microsoft;Google;OpenAI", "aff_campus_unique_index": "0;0;0;2", "aff_campus_unique": "Stanford;;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Online Inventory Problems: Beyond the i.i.d. Setting with Online Convex Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71038", "id": "dDk6URGRXP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/41128e5b3a7622da5b17588757599077-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dDk6URGRXP", "openreview": "https://openreview.net/forum?id=dDk6URGRXP", "poster": "/media/PosterPDFs/NeurIPS%202023/71038.png?t=1702028138.4734535", "slides": "https://nips.cc/virtual/2023/poster/71038", "video": "https://nips.cc/virtual/2023/poster/71038", "author_site": "Massil HIHAT, St\u00e9phane Ga\u00efffas, Guillaume Garrigos, Simon Bussy", "tldr": "", "abstract": "We study multi-product inventory control problems where a manager makes sequential replenishment decisions based on partial historical information in order to minimize its cumulative losses. Our motivation is to consider general demands, losses and dynamics to go beyond standard models which usually rely on newsvendor-type losses, fixed dynamics, and unrealistic i.i.d. demand assumptions. We propose MaxCOSD, an online algorithm that has provable guarantees even for problems with non-i.i.d. demands and stateful dynamics, including for instance perishability. We consider what we call non-degeneracy assumptions on the demand process, and argue that they are necessary to allow learning.", "keywords": "online convex optimization;inventory control;newsvendor;online learning;regret analysis", "primary_area": "", "supplementary_material": "", "author": "Massil HIHAT;St\u00e9phane Ga\u00efffas;Guillaume Garrigos;Simon Bussy", "authorids": "~Massil_HIHAT1;~St\u00e9phane_Ga\u00efffas1;~Guillaume_Garrigos1;simon.bussy@califrais.fr", "gender": "M;M;M;", "homepage": ";https://stephanegaiffas.github.io;https://guillaume-garrigos.com/;", "dblp": ";58/9890;;", "google_scholar": ";;DN0Cu0IAAAAJ;", "orcid": ";;0000-0002-8613-5664;", "linkedin": "massil-hihat-7b9578139/;;;", "or_profile": "~Massil_HIHAT1;~St\u00e9phane_Ga\u00efffas1;~Guillaume_Garrigos1;simon.bussy@califrais.fr", "aff": "Universit\u00e9 Paris Cit\u00e9;University of Paris;Universit\u00e9 Paris Cit\u00e9;", "aff_domain": "u-paris.fr;lpsm.paris;u-paris.fr;", "position": "PhD student;Full Professor;Assistant Professor;", "bibtex": "@inproceedings{\nhihat2023online,\ntitle={Online Inventory Problems: Beyond the i.i.d. Setting with Online Convex Optimization},\nauthor={Massil HIHAT and St{\\'e}phane Ga{\\\"\\i}ffas and Guillaume Garrigos and Simon Bussy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dDk6URGRXP}\n}", "github": "", "project": "", "reviewers": "8ZLu;UtoR;ZTux;5kk9", "pdf_size": 691637, "rating": "4;6;7;7", "confidence": "3;3;3;4", "soundness": "2;4;4;3", "novelty": "2;3;3;2", "presentation": "3;3;4;3", "wc_summary": "189;183;99;81", "wc_strengths": "60;13;114;50", "wc_weaknesses": "264;28;98;533", "wc_questions": "77;37;180;2", "wc_limitations": "5;1;3;1", "wc_review": "595;262;494;667", "wc_reply_reviewers": "311;0;20;293", "wc_reply_authors": "586;0;0;367", "reply_reviewers": "1;0;1;1", "reply_authors": "3;1;1;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 138.0, 48.46648326421054 ], "wc_strengths_avg": [ 59.25, 36.134298111351214 ], "wc_weaknesses_avg": [ 230.75, 194.41627375299632 ], "wc_questions_avg": [ 74.0, 66.70457255690947 ], "wc_limitations_avg": [ 2.5, 1.6583123951777 ], "wc_review_avg": [ 504.5, 152.89947678131537 ], "wc_reply_reviewers_avg": [ 156.0, 146.30960323915858 ], "wc_reply_authors_avg": [ 238.25, 250.5158428123858 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13626243405619727733&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "u-paris.fr;lpsm.paris;u-paris.fr;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Universit\u00e9 Paris Cit\u00e9;University of Paris", "aff_unique_dep": ";", "aff_unique_url": "https://www.universite-paris.fr;https://www.universite-paris.fr", "aff_unique_abbr": "UPC;UP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Fed-CO$_{2}$: Cooperation of Online and Offline Models for Severe Data Heterogeneity in Federated Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71037", "id": "dEDdRWunxU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/431d53d513461ff155d5bc8faa9a440c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dEDdRWunxU", "openreview": "https://openreview.net/forum?id=dEDdRWunxU", "poster": "/media/PosterPDFs/NeurIPS%202023/71037.png?t=1699348500.4934409", "slides": "https://nips.cc/virtual/2023/poster/71037", "video": "https://nips.cc/virtual/2023/poster/71037", "author_site": "Zhongyi Cai, Ye Shi, Wei Huang, Jingya Wang", "tldr": "", "abstract": "Federated Learning (FL) has emerged as a promising distributed learning paradigm that enables multiple clients to learn a global model collaboratively without sharing their private data. However, the effectiveness of FL is highly dependent on the quality of the data that is being used for training. In particular, data heterogeneity issues, such as label distribution skew and feature skew, can significantly impact the performance of FL. Previous studies in FL have primarily focused on addressing label distribution skew data heterogeneity, while only a few recent works have made initial progress in tackling feature skew issues. Notably, these two forms of data heterogeneity have been studied separately and have not been well explored within a unified FL framework. To address this gap, we propose Fed-CO$_2$, a universal FL framework that handles both label distribution skew and feature skew within a Cooperation mechanism between the Online and Offline models. Specifically, the online model learns general knowledge that is shared among all clients, while the offline model is trained locally to learn the specialized knowledge of each individual client. To further enhance model cooperation in the presence of feature shifts, we design an intra-client knowledge transfer mechanism that reinforces mutual learning between the online and offline models, and an inter-client knowledge transfer mechanism to increase the models\u2019 domain generalization ability. Extensive experiments show that our Fed-CO$_2$ outperforms a wide range of existing personalized federated learning algorithms in terms of handling label distribution skew and feature skew, both individually and collectively. The empirical results are supported by our convergence analyses in a simplified setting.", "keywords": "Federated Learning;Data Heterogeneity;Model Cooperation;Mutual Learning;Knowledge Transfer", "primary_area": "", "supplementary_material": "/attachment/bd5dc75d80aa270b9a256ffb06eaa23b7b59a926.pdf", "author": "Zhongyi Cai;Ye Shi;Wei Huang;Jingya Wang", "authorids": "~Zhongyi_Cai1;~Ye_Shi1;~Wei_Huang6;~Jingya_Wang3", "gender": "M;M;M;F", "homepage": ";http://faculty.sist.shanghaitech.edu.cn/faculty/shiye;https://weihuang05.github.io/;https://faculty.sist.shanghaitech.edu.cn/faculty/wangjingya/", "dblp": "257/4285;34/11191-1;81/6685-34;", "google_scholar": ";gMqbZPUAAAAJ;RZfDh4MAAAAJ;https://scholar.google.com.au/citations?user=vmvJV_IAAAAJ", "orcid": "0009-0002-1410-7952;;0000-0001-5674-7021;", "linkedin": ";;;", "or_profile": "~Zhongyi_Cai1;~Ye_Shi1;~Wei_Huang6;~Jingya_Wang3", "aff": "ShanghaiTech University;ShanghaiTech University;RIKEN AIP;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;riken.jp;shanghaitech.edu.cn", "position": "MS student;Assistant Professor;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\ncai2023fedco,\ntitle={Fed-{CO}\\$\\_\\{2\\}\\$: Cooperation of Online and Offline Models for Severe Data Heterogeneity in Federated Learning},\nauthor={Zhongyi Cai and Ye Shi and Wei Huang and Jingya Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dEDdRWunxU}\n}", "github": "", "project": "", "reviewers": "5E9k;xxVq;NQAC;y4gh", "pdf_size": 1801595, "rating": "5;5;6;6", "confidence": "2;4;2;3", "soundness": "3;3;4;2", "novelty": "3;3;4;2", "presentation": "3;2;3;2", "wc_summary": "222;107;50;61", "wc_strengths": "108;38;49;51", "wc_weaknesses": "100;206;148;94", "wc_questions": "64;1;67;26", "wc_limitations": "4;2;1;23", "wc_review": "498;354;315;255", "wc_reply_reviewers": "19;72;16;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 110.0, 68.1065341945984 ], "wc_strengths_avg": [ 61.5, 27.299267389437397 ], "wc_weaknesses_avg": [ 137.0, 45.0 ], "wc_questions_avg": [ 39.5, 27.48181216732259 ], "wc_limitations_avg": [ 7.5, 9.013878188659973 ], "wc_review_avg": [ 355.5, 89.51117248701415 ], "wc_reply_reviewers_avg": [ 26.75, 27.105119442644042 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12286525374618682639&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "shanghaitech.edu.cn;shanghaitech.edu.cn;riken.jp;shanghaitech.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "ShanghaiTech University;RIKEN", "aff_unique_dep": ";Advanced Institute for Computational Science", "aff_unique_url": "https://www.shanghaitech.edu.cn;https://www.aip.riken.jp", "aff_unique_abbr": "ShanghaiTech;RIKEN AIP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;Japan" }, { "title": "Separable Physics-Informed Neural Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71036", "id": "dEySGIcDnI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4af827e7d0b7bdae6097d44977e87534-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dEySGIcDnI", "openreview": "https://openreview.net/forum?id=dEySGIcDnI", "poster": "/media/PosterPDFs/NeurIPS%202023/71036.png?t=1699502202.5854683", "slides": "https://nips.cc/virtual/2023/poster/71036", "video": "https://nips.cc/virtual/2023/poster/71036", "author_site": "Junwoo Cho, Seungtae Nam, Hyunmo Yang, Seok-Bae Yun, Youngjoon Hong, Eunbyung Park", "tldr": "", "abstract": "Physics-informed neural networks (PINNs) have recently emerged as promising data-driven PDE solvers showing encouraging results on various PDEs. \nHowever, there is a fundamental limitation of training PINNs to solve multi-dimensional PDEs and approximate very complex solution functions.\nThe number of training points (collocation points) required on these challenging PDEs grows substantially, and it is severely limited due to the expensive computational costs and heavy memory overhead.\nTo overcome this limit, we propose a network architecture and training algorithm for PINNs.\nThe proposed method, separable PINN (SPINN), operates on a per-axis basis to decrease the number of network propagations in multi-dimensional PDEs instead of point-wise processing in conventional PINNs.\nWe also propose using forward-mode automatic differentiation to reduce the computational cost of computing PDE residuals, enabling a large number of collocation points ($>10^7$) on a single commodity GPU. \nThe experimental results show significantly reduced computational costs ($62\\times$ in wall-clock time, $1,394\\times$ in FLOPs given the same number of collocation points) in multi-dimensional PDEs while achieving better accuracy.\nFurthermore, we present that SPINN can solve a chaotic (2+1)-d Navier-Stokes equation much faster than the best-performing prior method (9 minutes vs. 10 hours in a single GPU), maintaining accuracy.\nFinally, we showcase that SPINN can accurately obtain the solution of a highly nonlinear and multi-dimensional PDE, a (3+1)-d Navier-Stokes equation.\nFor visualized results and code, please see https://jwcho5576.github.io/spinn.github.io/.", "keywords": "partial differential equations;scientific machine learning;physics-informed neural networks;fluid dynamics", "primary_area": "", "supplementary_material": "/attachment/bda37ece307c8c9fcd66a3a6499b3a5f46440d25.zip", "author": "Junwoo Cho;Seungtae Nam;Hyunmo Yang;Seok-Bae Yun;Youngjoon Hong;Eunbyung Park", "authorids": "~Junwoo_Cho1;~Seungtae_Nam1;~Hyunmo_Yang1;~Seok-Bae_Yun1;~Youngjoon_Hong2;~Eunbyung_Park1", "gender": "M;M;M;M;M;M", "homepage": "https://silverbottlep.github.io/;https://github.com/stnamjef;https://seokbaeyun.wordpress.com/;https://silverbottlep.github.io/;;https://www.youngjoonhong.com", "dblp": "311/3844;321/0019;;92/9727;321/2642;119/1276", "google_scholar": ";8NKPmmwCmrAC;https://scholar.google.co.kr/citations?user=ibXBxPwAAAAJ;iPyuJmQAAAAJ;https://scholar.google.com/citations?view_op=list_works;", "orcid": ";;;;;", "linkedin": ";;;eunbyung-park-286384b4/;;", "or_profile": "~Junwoo_Cho1;~Seungtae_Nam1;~Seok-Bae_Yun1;~Eunbyung_Park1;~Yang_Hyunmo1;~Youngjoon_Hong1", "aff": "Sungkyunkwan University;Sungkyunkwan University;Sungkyunkwan University;Sungkyunkwan University;Sungkyunkwan University;Sungkyunkwan University", "aff_domain": "skku.edu;skku.edu;skku.edu;skku.edu;skku.edu;skku.edu", "position": "MS student;MS student;Associate Professor;Assistant Professor;MS student;Assistant Professor", "bibtex": "@inproceedings{\ncho2023separable,\ntitle={Separable Physics-Informed Neural Networks},\nauthor={Junwoo Cho and Seungtae Nam and Hyunmo Yang and Seok-Bae Yun and Youngjoon Hong and Eunbyung Park},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dEySGIcDnI}\n}", "github": "", "project": "", "reviewers": "dosJ;3ENg;Zd5Z;kaJa;5Jte", "pdf_size": 4292667, "rating": "5;5;6;8;8", "confidence": "3;4;4;3;4", "soundness": "2;3;3;4;4", "novelty": "3;2;3;4;3", "presentation": "3;3;3;4;3", "wc_summary": "93;41;71;140;82", "wc_strengths": "98;41;107;143;72", "wc_weaknesses": "203;114;202;181;92", "wc_questions": "190;74;142;2;153", "wc_limitations": "9;1;10;1;9", "wc_review": "593;271;532;467;408", "wc_reply_reviewers": "47;18;44;0;27", "wc_reply_authors": "350;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "3;1;1;1;1", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 85.4, 32.33944959333724 ], "wc_strengths_avg": [ 92.2, 34.242663447810244 ], "wc_weaknesses_avg": [ 158.4, 46.435331376011526 ], "wc_questions_avg": [ 112.2, 66.64653029228154 ], "wc_limitations_avg": [ 6.0, 4.09878030638384 ], "wc_review_avg": [ 454.2, 110.61536963731578 ], "wc_reply_reviewers_avg": [ 27.2, 17.313578486263317 ], "wc_reply_authors_avg": [ 70.0, 140.0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.06019292654288467, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14217870553750739293&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "skku.edu;skku.edu;skku.edu;skku.edu;skku.edu;skku.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Sungkyunkwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.skku.edu", "aff_unique_abbr": "SKKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "CP-SLAM: Collaborative Neural Point-based SLAM System", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71035", "id": "dFSeZm6dTC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7c10e259c7e56fa218ee03d9ae7d728e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dFSeZm6dTC", "openreview": "https://openreview.net/forum?id=dFSeZm6dTC", "poster": "/media/PosterPDFs/NeurIPS%202023/71035.png?t=1699519920.6179981", "slides": "https://nips.cc/virtual/2023/poster/71035", "video": "https://nips.cc/virtual/2023/poster/71035", "author_site": "Jiarui Hu, Mao Mao, Hujun Bao, Guofeng Zhang, Zhaopeng Cui", "tldr": "", "abstract": "This paper presents a collaborative implicit neural simultaneous localization and mapping (SLAM) system with RGB-D image sequences, which consists of complete front-end and back-end modules including odometry, loop detection, sub-map fusion, and global refinement. In order to enable all these modules in a unified framework, we propose a novel neural point based 3D scene representation in which each point maintains a learnable neural feature for scene encoding and is associated with a certain keyframe. Moreover, a distributed-to-centralized learning strategy is proposed for the collaborative implicit SLAM to improve consistency and cooperation. A novel global optimization framework is also proposed to improve the system accuracy like traditional bundle adjustment. Experiments on various datasets demonstrate the superiority of the proposed method in both camera tracking and mapping.", "keywords": "Collaborative SLAM; Neural Point Field; Keyframe-based SLAM; Pose Graph Optimization", "primary_area": "", "supplementary_material": "/attachment/244c9c294c445aadcd958f041fc37f4336200d3a.zip", "author": "Jiarui Hu;Mao Mao;Hujun Bao;Guofeng Zhang;Zhaopeng Cui", "authorids": "~Jiarui_Hu1;~Mao_Mao2;~Hujun_Bao1;~Guofeng_Zhang3;~Zhaopeng_Cui1", "gender": "M;M;M;M;M", "homepage": ";https://github.com/TwiceMao;http://www.cad.zju.edu.cn/home/bao/;http://www.cad.zju.edu.cn/home/gfzhang;https://zhpcui.github.io/", "dblp": "287/8031-4;133/0478.html;b/HujunBao;78/5389-1.html;28/7484", "google_scholar": "0CJPK3IAAAAJ;WO2JOnQAAAAJ;AZCcDmsAAAAJ;F0xfpXAAAAAJ;https://scholar.google.ca/citations?user=vwIRwDUAAAAJ", "orcid": "0009-0006-9563-8956;0009-0000-5142-9913;0000-0002-2662-0334;0000-0001-5661-8430;0000-0002-7130-439X", "linkedin": ";;;;", "or_profile": "~Jiarui_Hu1;~Mao_Mao2;~Hujun_Bao1;~Guofeng_Zhang3;~Zhaopeng_Cui1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;PhD student;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nhu2023cpslam,\ntitle={{CP}-{SLAM}: Collaborative Neural Point-based {SLAM} System},\nauthor={Jiarui Hu and Mao Mao and Hujun Bao and Guofeng Zhang and Zhaopeng Cui},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dFSeZm6dTC}\n}", "github": "", "project": "", "reviewers": "Zpdr;jr2H;46C6;zwcP;q5H1;1EKn", "pdf_size": 4984832, "rating": "5;6;6;6;6;6", "confidence": "4;4;4;3;3;4", "soundness": "3;3;4;3;3;3", "novelty": "3;3;4;3;3;3", "presentation": "3;3;4;3;3;3", "wc_summary": "61;103;77;64;67;99", "wc_strengths": "65;136;95;37;39;138", "wc_weaknesses": "104;306;43;64;39;96", "wc_questions": "158;145;212;98;49;129", "wc_limitations": "4;41;1;26;49;9", "wc_review": "392;731;428;289;243;471", "wc_reply_reviewers": "0;210;355;0;61;62", "wc_reply_authors": "0;549;347;0;72;34", "reply_reviewers": "0;2;2;0;2;1", "reply_authors": "1;3;2;1;2;2", "rating_avg": [ 5.833333333333333, 0.372677996249965 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 3.1666666666666665, 0.3726779962499649 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 78.5, 16.690815837859255 ], "wc_strengths_avg": [ 85.0, 41.49297129233657 ], "wc_weaknesses_avg": [ 108.66666666666667, 91.54537429906306 ], "wc_questions_avg": [ 131.83333333333334, 50.495599368217775 ], "wc_limitations_avg": [ 21.666666666666668, 18.436075745366445 ], "wc_review_avg": [ 425.6666666666667, 157.3040226934949 ], "wc_reply_reviewers_avg": [ 114.66666666666667, 128.3220774284595 ], "wc_reply_authors_avg": [ 167.0, 208.49300547820144 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.8975274678557508 ], "reply_authors_avg": [ 1.8333333333333333, 0.6871842709362768 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.31622776601683783, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13602928411628876079&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Cookie Consent Has Disparate Impact on Estimation Accuracy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71034", "id": "dFtpRphNb3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6c473e69ba261200dd595d07494c1a73-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dFtpRphNb3", "openreview": "https://openreview.net/forum?id=dFtpRphNb3", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71034", "video": "https://nips.cc/virtual/2023/poster/71034", "author_site": "Erik Miehling, Rahul Nair, Elizabeth Daly, Karthikeyan Natesan Ramamurthy, Robert Redmond", "tldr": "", "abstract": "Cookies are designed to enable more accurate identification and tracking of user behavior, in turn allowing for more personalized ads and better performing ad campaigns. Given the additional information that is recorded, questions related to privacy and fairness naturally arise. How does a user's consent decision influence how much the system can learn about their demographic and tastes? Is the impact of a user's consent decision on the recommender system's ability to learn about their latent attributes uniform across demographics? We investigate these questions in the context of an engagement-driven recommender system using simulation. We empirically demonstrate that when consent rates exhibit demographic-dependence, user consent has a disparate impact on the recommender agent's ability to estimate users' latent attributes. In particular, we find that when consent rates are demographic-dependent, a user disagreeing to share their cookie may counter-intuitively cause the recommender agent to know more about the user than if the user agreed to share their cookie. Furthermore, the gap in base consent rates across demographics serves as an amplifier: users from the lower consent rate demographic who agree to cookie sharing generally experience higher estimation errors than the same users from the higher consent rate demographic, and conversely for users who choose to disagree to cookie sharing, with these differences increasing in consent rate gap. We discuss the need for new notions of fairness that encourage consistency between a user's privacy decisions and the system's ability to estimate their latent attributes.", "keywords": "fairness;cookies;recommender systems", "primary_area": "", "supplementary_material": "/attachment/689148cd8ddb9ca09b6c519aea2024b66ba41143.pdf", "author": "Erik Miehling;Rahul Nair;Elizabeth M. Daly;Karthikeyan Natesan Ramamurthy;Robert Nelson Redmond", "authorids": "~Erik_Miehling1;~Rahul_Nair3;~Elizabeth_M._Daly1;~Karthikeyan_Natesan_Ramamurthy1;~Robert_Nelson_Redmond1", "gender": "M;M;;;M", "homepage": "http://emiehling.github.io;https://rahulnair23.github.io/;http://researcher.watson.ibm.com/researcher/view.php?person=ie-elizabeth.daly;https://nrkarthikeyan.github.io/;", "dblp": "99/10766;76/4693.html;10/5750;58/7800;", "google_scholar": "mmd29pMAAAAJ;rCo_gNYAAAAJ;llFJcF4AAAAJ;mG8HuhEAAAAJ;", "orcid": "0000-0003-0533-8329;;;0000-0002-6021-5930;", "linkedin": ";;;;robertnredmond/", "or_profile": "~Erik_Miehling1;~Rahul_Nair3;~Elizabeth_M._Daly1;~Karthikeyan_Natesan_Ramamurthy1;~Robert_Nelson_Redmond1", "aff": "IBM Research;IBM Research;IBM Research;International Business Machines;", "aff_domain": "ibm.com;ibm.com;ibm.com;ibm.com;", "position": "Research Scientist;Researcher;Research Scientist;Research Staff Member;", "bibtex": "@inproceedings{\nmiehling2023cookie,\ntitle={Cookie Consent Has Disparate Impact on Estimation Accuracy},\nauthor={Erik Miehling and Rahul Nair and Elizabeth M. Daly and Karthikeyan Natesan Ramamurthy and Robert Nelson Redmond},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dFtpRphNb3}\n}", "github": "", "project": "", "reviewers": "WBi3;S9vB;iC41;cPvy", "pdf_size": 2771429, "rating": "4;4;4;7", "confidence": "4;4;3;3", "soundness": "2;2;3;4", "novelty": "2;1;2;3", "presentation": "3;2;3;4", "wc_summary": "121;264;56;58", "wc_strengths": "22;25;62;38", "wc_weaknesses": "108;137;77;427", "wc_questions": "34;193;92;3", "wc_limitations": "8;55;4;21", "wc_review": "293;674;291;547", "wc_reply_reviewers": "0;186;250;186", "wc_reply_authors": "0;0;0;270", "reply_reviewers": "0;1;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 124.75, 84.53808313417096 ], "wc_strengths_avg": [ 36.75, 15.769828787910159 ], "wc_weaknesses_avg": [ 187.25, 140.03637920197738 ], "wc_questions_avg": [ 80.5, 72.38266367024634 ], "wc_limitations_avg": [ 22.0, 20.062402647738878 ], "wc_review_avg": [ 451.25, 165.46053154755668 ], "wc_reply_reviewers_avg": [ 155.5, 93.50267375856158 ], "wc_reply_authors_avg": [ 67.5, 116.91342951089922 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3159147481902666648&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ibm.com;ibm.com;ibm.com;ibm.com;", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "IBM;International Business Machines Corporation", "aff_unique_dep": "IBM Research;", "aff_unique_url": "https://www.ibm.com/research;https://www.ibm.com", "aff_unique_abbr": "IBM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "LMC: Large Model Collaboration with Cross-assessment for Training-Free Open-Set Object Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71033", "id": "dHF3Im8Aic", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/91813e5ddd9658b99be4c532e274b49c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dHF3Im8Aic", "openreview": "https://openreview.net/forum?id=dHF3Im8Aic", "poster": "/media/PosterPDFs/NeurIPS%202023/71033.png?t=1699840908.3614132", "slides": "https://nips.cc/virtual/2023/poster/71033", "video": "https://nips.cc/virtual/2023/poster/71033", "author_site": "Haoxuan Qu, Xiaofei Hui, Yujun Cai, Jun Liu", "tldr": "", "abstract": "Open-set object recognition aims to identify if an object is from a class that has been encountered during training or not. To perform open-set object recognition accurately, a key challenge is how to reduce the reliance on spurious-discriminative features. In this paper, motivated by that different large models pre-trained through different paradigms can possess very rich while distinct implicit knowledge, we propose a novel framework named Large Model Collaboration (LMC) to tackle the above challenge via collaborating different off-the-shelf large models in a training-free manner. Moreover, we also incorporate the proposed framework with several novel designs to effectively extract implicit knowledge from large models. Extensive experiments demonstrate the efficacy of our proposed framework. Code is available \\href{https://github.com/Harryqu123/LMC}{here}.", "keywords": "Deep learning;Open-set object recognition;Large models;Training-free", "primary_area": "", "supplementary_material": "/attachment/85d6e0aa0923ad3ff85e923c0369b364b31e7558.pdf", "author": "Haoxuan Qu;Xiaofei Hui;Yujun Cai;Jun Liu", "authorids": "~Haoxuan_Qu1;~Xiaofei_Hui1;~Yujun_Cai1;~Jun_Liu8", "gender": "M;;F;M", "homepage": ";;;", "dblp": "302/3883;357/4971;227/4399;95/3736-36", "google_scholar": "https://scholar.google.com.sg/citations?user=fR83-ycAAAAJ;;https://scholar.google.com/citations?hl=en;Q5Ild8UAAAAJ", "orcid": "0000-0001-5054-3394;0000-0002-9258-5768;;", "linkedin": ";xiaofei-hui-13aa75213/;;", "or_profile": "~Haoxuan_Qu1;~Xiaofei_Hui1;~Yujun_Cai1;~Jun_Liu8", "aff": "Singapore University of Technology and Design;Singapore University of Technology and Design;Meta Facebook;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;sutd.edu.sg;fb.com;sutd.edu.sg", "position": "PhD student;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nqu2023lmc,\ntitle={{LMC}: Large Model Collaboration with Cross-assessment for Training-Free Open-Set Object Recognition},\nauthor={Haoxuan Qu and Xiaofei Hui and Yujun Cai and Jun Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dHF3Im8Aic}\n}", "github": "", "project": "", "reviewers": "Kpbj;zWHw;BEEJ;Qabu", "pdf_size": 12738330, "rating": "5;6;6;6", "confidence": "5;4;4;4", "soundness": "2;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "295;112;165;130", "wc_strengths": "55;115;161;133", "wc_weaknesses": "233;128;172;47", "wc_questions": "5;136;113;29", "wc_limitations": "57;9;30;1", "wc_review": "645;500;641;340", "wc_reply_reviewers": "81;55;100;36", "wc_reply_authors": "258;31;182;32", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 175.5, 71.5768817426409 ], "wc_strengths_avg": [ 116.0, 38.84584919911006 ], "wc_weaknesses_avg": [ 145.0, 67.76060802560733 ], "wc_questions_avg": [ 70.75, 55.01988276977696 ], "wc_limitations_avg": [ 24.25, 21.672274915199836 ], "wc_review_avg": [ 531.5, 125.0369945256203 ], "wc_reply_reviewers_avg": [ 68.0, 24.423349483639626 ], "wc_reply_authors_avg": [ 125.75, 98.00605848619767 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13064923026232836679&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "sutd.edu.sg;sutd.edu.sg;fb.com;sutd.edu.sg", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Singapore University of Technology and Design;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.sutd.edu.sg;https://meta.com", "aff_unique_abbr": "SUTD;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Singapore;United States" }, { "title": "On the Convergence of Black-Box Variational Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71032", "id": "dHQ2av9NzO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8bea36ac39e11ebe49e9eddbd4b8bd3a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dHQ2av9NzO", "openreview": "https://openreview.net/forum?id=dHQ2av9NzO", "poster": "/media/PosterPDFs/NeurIPS%202023/71032.png?t=1702085320.1103065", "slides": "https://nips.cc/virtual/2023/poster/71032", "video": "https://nips.cc/virtual/2023/poster/71032", "author_site": "Kyurae Kim, Jisu Oh, Kaiwen Wu, Yian Ma, Jacob Gardner", "tldr": "", "abstract": "We provide the first convergence guarantee for black-box variational inference (BBVI) with the reparameterization gradient.\n While preliminary investigations worked on simplified versions of BBVI (e.g., bounded domain, bounded support, only optimizing for the scale, and such), our setup does not need any such algorithmic modifications.\n Our results hold for log-smooth posterior densities with and without strong log-concavity and the location-scale variational family.\n Notably, our analysis reveals that certain algorithm design choices commonly employed in practice, such as nonlinear parameterizations of the scale matrix, can result in suboptimal convergence rates.\n Fortunately, running BBVI with proximal stochastic gradient descent fixes these limitations and thus achieves the strongest known convergence guarantees.\n We evaluate this theoretical insight by comparing proximal SGD against other standard implementations of BBVI on large-scale Bayesian inference problems.", "keywords": "black-box variational inference;stochastic gradient descent;Bayesian inference;variational inference;probabilistic machine learning;Bayesian machine learning;variational Bayes", "primary_area": "", "supplementary_material": "/attachment/b71eecbce2cab2592e3610739381e308691d2558.zip", "author": "Kyurae Kim;Jisu Oh;Kaiwen Wu;Yian Ma;Jacob R. Gardner", "authorids": "~Kyurae_Kim1;~Jisu_Oh1;~Kaiwen_Wu2;~Yian_Ma1;~Jacob_R._Gardner1", "gender": ";M;;M;", "homepage": ";;;https://sites.google.com/view/yianma;", "dblp": ";26/4034;;;", "google_scholar": ";;;A0TFlacAAAAJ;", "orcid": ";0000-0002-7962-2536;;;", "linkedin": ";;;;", "or_profile": "~Kyurae_Kim1;~Jisu_Oh1;~Kaiwen_Wu2;~Yian_Ma1;~Jacob_R._Gardner1", "aff": ";North Carolina State University;;University of California, San Diego;", "aff_domain": ";ncsu.edu;;ucsd.edu;", "position": ";PhD student;;Assistant Professor;", "bibtex": "@inproceedings{\nkim2023on,\ntitle={On the Convergence of Black-Box Variational Inference},\nauthor={Kyurae Kim and Jisu Oh and Kaiwen Wu and Yian Ma and Jacob R. Gardner},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dHQ2av9NzO}\n}", "github": "", "project": "", "reviewers": "TwpX;MsbX;C3v1;gnfU", "pdf_size": 588102, "rating": "7;7;7;7", "confidence": "3;3;3;4", "soundness": "4;4;3;4", "novelty": "3;3;2;2", "presentation": "3;3;3;4", "wc_summary": "149;102;104;61", "wc_strengths": "94;131;66;52", "wc_weaknesses": "254;179;281;96", "wc_questions": "172;213;216;109", "wc_limitations": "33;27;104;4", "wc_review": "702;652;771;322", "wc_reply_reviewers": "82;51;205;32", "wc_reply_authors": "187;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 104.0, 31.13679495388053 ], "wc_strengths_avg": [ 85.75, 30.185882461839675 ], "wc_weaknesses_avg": [ 202.5, 71.95310973126874 ], "wc_questions_avg": [ 177.5, 43.200115740585694 ], "wc_limitations_avg": [ 42.0, 37.39652390263031 ], "wc_review_avg": [ 611.75, 172.54039382127306 ], "wc_reply_reviewers_avg": [ 92.5, 67.35911222692889 ], "wc_reply_authors_avg": [ 46.75, 80.97337525384502 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16555621286499630008&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": ";ncsu.edu;;ucsd.edu;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "North Carolina State University;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://www.ncsu.edu;https://www.ucsd.edu", "aff_unique_abbr": "NCSU;UCSD", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Can LLM Already Serve as A Database Interface? A BIg Bench for Large-Scale Database Grounded Text-to-SQLs", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73529", "id": "dI4wzAE6uV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/83fc8fab1710363050bbd1d4b8cc0021-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=dI4wzAE6uV", "openreview": "https://openreview.net/forum?id=dI4wzAE6uV", "poster": "/media/PosterPDFs/NeurIPS%202023/73529.png?t=1697392857.4342997", "slides": "https://nips.cc/virtual/2023/poster/73529", "video": "https://nips.cc/virtual/2023/poster/73529", "author_site": "Jinyang Li, Binyuan Hui, Ge Qu, Jiaxi Yang, Binhua Li, Bowen Li, Bailin Wang, Bowen Qin, Ruiying Geng, Nan Huo, Xuanhe Zhou, Ma Chenhao, Guoliang Li, Kevin Chang, Fei Huang, Reynold Cheng, Yongbin Li", "tldr": "", "abstract": "Text-to-SQL parsing, which aims at converting natural language instructions into executable SQLs, has gained increasing attention in recent years. In particular, GPT-4 and Claude-2 have shown impressive results in this task. However, most of the prevalent benchmarks, i.e., Spider, and WikiSQL, focus on database schema with few rows of database contents leaving the gap between academic study and real-world applications. To mitigate this gap, we present BIRD, a BIg benchmark for laRge-scale Database grounded in text-to-SQL tasks, containing 12,751 pairs of text-to-SQL data and 95 databases with a total size of 33.4 GB, spanning 37 professional domains. Our emphasis on database values highlights the new challenges of dirty database contents, external knowledge between NL questions and database contents, and SQL efficiency, particularly in the context of massive databases. To solve these problems, text-to-SQL models must feature database value comprehension in addition to semantic parsing. The experimental results demonstrate the significance of database values in generating accurate text-to-SQLs for big databases. Furthermore, even the most popular and effective text-to-SQL models, i.e. GPT-4, only achieve 54.89% in execution accuracy, which is still far from the human result of 92.96%, proving that challenges still stand. We also provide an efficiency analysis to offer insights into generating text-to-efficient-SQLs that are beneficial to industries. \nWe believe that BIRD will contribute to advancing real-world applications of text-to-SQL research.\nThe leaderboard and source code are available: https://bird-bench.github.io/.", "keywords": "Text-to-SQL Parsing;Large Language Models;Database Grounding;SQL Efficiency", "primary_area": "", "supplementary_material": "/attachment/ba51a55c62628a84cd76bf3def35407791133b62.zip", "author": "Jinyang Li;Binyuan Hui;GE QU;Jiaxi Yang;Binhua Li;Bowen Li;Bailin Wang;Bowen Qin;Ruiying Geng;Nan Huo;Xuanhe Zhou;Chenhao Ma;Guoliang Li;Kevin Chang;Fei Huang;Reynold Cheng;Yongbin Li", "authorids": "~Jinyang_Li4;~Binyuan_Hui1;~GE_QU1;~Jiaxi_Yang1;~Binhua_Li1;~Bowen_Li8;~Bailin_Wang3;~Bowen_Qin1;~Ruiying_Geng2;~Nan_Huo1;~Xuanhe_Zhou1;~Chenhao_Ma1;~Guoliang_Li1;~Kevin_Chang1;~Fei_Huang1;~Reynold_Cheng1;~Yongbin_Li2", "gender": "M;F;;M;M;;;M;;M;M;M;M;M;;M;M", "homepage": "http://jinyang-li.me/;https://huybery.github.io/;;;;;;https://eyuansu62.github.io/;;https://nan-huo.github.io/;https://db.zhouxh.store/;https://chenhao-ma.github.io/;http://dbgroup.cs.tsinghua.edu.cn/ligl/;https://siebelschool.illinois.edu/about/people/faculty/kcchang;;https://reynold.hku.hk;https://yongbin-li.github.io/", "dblp": "79/572-3;246/4699;;;236/5662.html;75/10470-2;;;;;247/8418.html;251/5251;http://dblp.uni-trier.de/pers/hd/l/Li_0001:Guoliang;c/KCCChang;;89/2619;", "google_scholar": "https://scholar.google.com/citations?hl=en;RBb3ItMAAAAJ;;NqUIrccAAAAJ;;RLWXNf8AAAAJ;;75pkx3YAAAAJ;;;hTEZzf4AAAAJ;EFyELCcAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=sugWZ6MAAAAJ;;7R7MSb4AAAAJ;xF5VrokAAAAJ", "orcid": ";;;0000-0002-7710-1489;;;;;;;;0000-0002-3243-8512;0000-0002-1398-0621;0000-0003-0997-6803;;0000-0002-9480-9809;", "linkedin": ";;;;;;;;;;;;;;;;", "or_profile": "~Jinyang_Li4;~Binyuan_Hui1;~GE_QU1;~Jiaxi_Yang1;~Binhua_Li1;~Bowen_Li8;~Bailin_Wang3;~Bowen_Qin1;~Ruiying_Geng2;~Nan_Huo1;~Xuanhe_Zhou1;~Chenhao_Ma1;~Guoliang_Li1;~Kevin_Chang1;~Fei_Huang1;~Reynold_Cheng1;~Yongbin_Li2", "aff": "The University of Hong Kong;Alibaba Group;;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;;International Innovation Center of Tsinghua University, Shanghai;;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;;the University of Hong Kong, University of Hong Kong;Tsinghua University;The Chinese University of Hong Kong, Shenzhen;Tsinghua University;University of Illinois, Urbana Champaign;;The University of Hong Kong;Alibaba Group", "aff_domain": "hku.hk;alibaba-inc.com;;siat.ac.cn;;tsinghua.edu.cn;;siat.ac.cn;;cs.hku.hk;mail.tsinghua.edu.cn;cuhk.edu.cn;tsinghua.edu.cn;illinois.edu;;cs.hku.hk;alibaba-inc.com", "position": "PhD student;Researcher;;PhD student;;Researcher;;MS student;;PhD student;PhD student;Assistant Professor;Full Professor;Full Professor;;Full Professor;Researcher", "bibtex": "@inproceedings{\nli2023can,\ntitle={Can {LLM} Already Serve as A Database Interface? A {BI}g Bench for Large-Scale Database Grounded Text-to-{SQL}s},\nauthor={Jinyang Li and Binyuan Hui and GE QU and Jiaxi Yang and Binhua Li and Bowen Li and Bailin Wang and Bowen Qin and Ruiying Geng and Nan Huo and Xuanhe Zhou and Chenhao Ma and Guoliang Li and Kevin Chang and Fei Huang and Reynold Cheng and Yongbin Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=dI4wzAE6uV}\n}", "github": "", "project": "", "reviewers": "3iDu;fFFH;YmxV;QBSP;Kfee;7VcG", "pdf_size": 11550589, "rating": "7;8;8;8;8;9", "confidence": "4;3;5;3;4;5", "wc_summary_and_contributions": "139;359;133;88;201;101", "wc_strengths": "96;155;152;43;168;40", "wc_improvement": "156;166;220;32;544;6", "wc_limitations": "18;203;15;148;52;58", "wc_correctness": "5;92;1;1;33;1", "wc_clarity": "81;57;1;24;189;1", "wc_relation_to_prior_work": "42;15;1;8;173;1", "wc_documentation": "99;10;1;35;5;1", "wc_additional_feedback": "1;1;1;1;1;1", "wc_review": "637;1058;525;380;1366;210", "wc_reply_reviewers": "71;51;12;51;76;16", "wc_reply_authors": "517;802;361;579;787;414", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "2;2;2;2;2;2", "rating_avg": [ 8.0, 0.5773502691896257 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 170.16666666666666, 91.73951650673165 ], "wc_strengths_avg": [ 109.0, 52.80782770259222 ], "wc_improvement_avg": [ 187.33333333333334, 176.41113595487357 ], "wc_limitations_avg": [ 82.33333333333333, 69.5908678555519 ], "wc_correctness_avg": [ 22.166666666666668, 33.24864241171693 ], "wc_clarity_avg": [ 58.833333333333336, 64.9882468006502 ], "wc_relation_to_prior_work_avg": [ 40.0, 61.079183578913906 ], "wc_documentation_avg": [ 25.166666666666668, 35.00674538174353 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 696.0, 397.64263688224713 ], "wc_reply_reviewers_avg": [ 46.166666666666664, 24.599570908633524 ], "wc_reply_authors_avg": [ 576.6666666666666, 169.09234820719186 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 17, 0 ], "corr_rating_confidence": 0.3535533905932738, "gs_citation": 408, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15223521996574351524&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "hku.hk;alibaba-inc.com;;siat.ac.cn;;tsinghua.edu.cn;;siat.ac.cn;;cs.hku.hk;mail.tsinghua.edu.cn;cuhk.edu.cn;tsinghua.edu.cn;illinois.edu;;cs.hku.hk;alibaba-inc.com", "author_num": 17, "aff_unique_index": "0;1;2;3;2;0;3;4;3;5;0;1", "aff_unique_norm": "University of Hong Kong;Alibaba Group;Chinese Academy of Sciences;Tsinghua University;Chinese University of Hong Kong;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;Shenzhen Institutes of Advanced Technology;International Innovation Center;;", "aff_unique_url": "https://www.hku.hk;https://www.alibaba.com;http://www.cas.cn;https://www.tsinghua.edu.cn;https://www.cuhk.edu.cn;https://illinois.edu", "aff_unique_abbr": "HKU;Alibaba;CAS;THU;CUHK;UIUC", "aff_campus_unique_index": "0;2;3;2;0;2;4;0", "aff_campus_unique": "Hong Kong SAR;;Shenzhen;Shanghai;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "KuaiSim: A Comprehensive Simulator for Recommender Systems", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73528", "id": "dJEjgQcbOt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8c7f8f98f9a8f5650922dd4545254f28-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=dJEjgQcbOt", "openreview": "https://openreview.net/forum?id=dJEjgQcbOt", "poster": "/media/PosterPDFs/NeurIPS%202023/73528.png?t=1699952824.6531372", "slides": "https://nips.cc/virtual/2023/poster/73528", "video": "https://nips.cc/virtual/2023/poster/73528", "author_site": "Kesen Zhao, Shuchang Liu, Qingpeng Cai, Xiangyu Zhao, Ziru Liu, Dong Zheng, Peng Jiang, Kun Gai", "tldr": "", "abstract": "Reinforcement Learning (RL)-based recommender systems (RSs) have garnered considerable attention due to their ability to learn optimal recommendation policies and maximize long-term user rewards. \nHowever, deploying RL models directly in online environments and generating authentic data through A/B tests can pose challenges and require substantial resources. \nSimulators offer an alternative approach by providing training and evaluation environments for RS models, reducing reliance on real-world data. \nExisting simulators have shown promising results but also have limitations such as simplified user feedback, lacking consistency with real-world data, the challenge of simulator evaluation, and difficulties in migration and expansion across RSs.\nTo address these challenges, we propose KuaiSim, a comprehensive user environment that provides user feedback with multi-behavior and cross-session responses.\nThe resulting simulator can support three levels of recommendation problems: the request level list-wise recommendation task, the whole-session level sequential recommendation task, and the cross-session level retention optimization task. \nFor each task, KuaiSim also provides evaluation protocols and baseline recommendation algorithms that further serve as benchmarks for future research. \nWe also restructure existing competitive simulators on the Kuairand Dataset and compare them against KuaiSim to future assess their performance and behavioral differences. \nFurthermore, to showcase KuaiSim's flexibility in accommodating different datasets, we demonstrate its versatility and robustness when deploying it on the ML-1m dataset. The implementation code is available online to ease reproducibility \\footnote{https://github.com/Applied-Machine-Learning-Lab/KuaiSim}.", "keywords": "Recommender Systems;Reinforcement Learning;Simulator", "primary_area": "", "supplementary_material": "/attachment/9d46d96945c71d656e7c25b431053f1181700448.pdf", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nzhao2023kuaisim,\ntitle={KuaiSim: A Comprehensive Simulator for Recommender Systems},\nauthor={Kesen Zhao and Shuchang Liu and Qingpeng Cai and Xiangyu Zhao and Ziru Liu and Dong Zheng and Peng Jiang and Kun Gai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=dJEjgQcbOt}\n}", "github": "", "project": "", "reviewers": "fsvF;uGYD;P5ds", "pdf_size": 573596, "rating": "6;6;7", "confidence": "3;3;2", "wc_summary_and_contributions": "43;71;94", "wc_strengths": "37;62;17", "wc_improvement": "189;180;14", "wc_limitations": "1;37;47", "wc_correctness": "9;36;1", "wc_clarity": "16;32;1", "wc_relation_to_prior_work": "9;31;1", "wc_documentation": "29;19;1", "wc_additional_feedback": "1;1;1", "wc_review": "334;469;177", "wc_reply_reviewers": "25;18;11", "wc_reply_authors": "678;726;808", "reply_reviewers": "1;1;1", "reply_authors": "2;1;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 69.33333333333333, 20.853989759489405 ], "wc_strengths_avg": [ 38.666666666666664, 18.408935028645434 ], "wc_improvement_avg": [ 127.66666666666667, 80.45840885547983 ], "wc_limitations_avg": [ 28.333333333333332, 19.754043186705406 ], "wc_correctness_avg": [ 15.333333333333334, 14.974051630144134 ], "wc_clarity_avg": [ 16.333333333333332, 12.657891697365017 ], "wc_relation_to_prior_work_avg": [ 13.666666666666666, 12.684198393626966 ], "wc_documentation_avg": [ 16.333333333333332, 11.585431464655178 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 326.6666666666667, 119.32122843633297 ], "wc_reply_reviewers_avg": [ 18.0, 5.715476066494082 ], "wc_reply_authors_avg": [ 737.3333333333334, 53.67391255432092 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16890108918317541371&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "", "author_num": 1 }, { "title": "Data Augmentations for Improved (Large) Language Model Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71031", "id": "dJZ3MvDw86", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/df88b275bef31ac96c85f0c4013734fc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dJZ3MvDw86", "openreview": "https://openreview.net/forum?id=dJZ3MvDw86", "poster": "/media/PosterPDFs/NeurIPS%202023/71031.png?t=1701637063.0024", "slides": "https://nips.cc/virtual/2023/poster/71031", "video": "https://nips.cc/virtual/2023/poster/71031", "author_site": "Amir Feder, Yoav Wald, Claudia Shi, Suchi Saria, David Blei", "tldr": "", "abstract": "The reliance of text classifiers on spurious correlations can lead to poor generalization at deployment, raising concerns about their use in safety-critical domains such as healthcare. In this work, we propose to use counterfactual data augmentation, guided by knowledge of the causal structure of the data, to simulate interventions on spurious features and to learn more robust text classifiers. We show that this strategy is appropriate in prediction problems where the label is spuriously correlated with an attribute. Under the assumptions of such problems, we discuss the favorable sample complexity of counterfactual data augmentation, compared to importance re-weighting. Pragmatically, we match examples using auxiliary data, based on diff-in-diff methodology, and use a large language model (LLM) to represent a conditional probability of text. Through extensive experimentation on learning caregiver-invariant predictors of clinical diagnoses from medical narratives and on semi-synthetic data, we demonstrate that our method for simulating interventions improves out-of-distribution (OOD) accuracy compared to baseline invariant learning algorithms.", "keywords": "Counterfactually Augmented Data;Invariant Learning;Out-of-distribution Generalization;Clinical NLP", "primary_area": "", "supplementary_material": "/attachment/b148bdc3b4da08e36d57cc855eb93e39cc596df3.pdf", "author": "Amir Feder;Yoav Wald;Claudia Shi;Suchi Saria;David Blei", "authorids": "~Amir_Feder1;~Yoav_Wald1;~Claudia_Shi1;~Suchi_Saria1;~David_Blei2", "gender": ";;F;M;M", "homepage": "https://www.amirfeder.com/;https://claudiajshi.com/;https://suchisaria.jhu.edu/;http://www.cs.columbia.edu/~blei/;", "dblp": "214/3604;;72/2433;86/1910;165/8048", "google_scholar": "ERwoPLIAAAAJ;WHKniLsAAAAJ;;https://scholar.google.com.tw/citations?user=8OYE6iEAAAAJ;", "orcid": "0000-0001-5472-1135;;;;", "linkedin": "amir-feder-b65b7035/;;;;", "or_profile": "~Amir_Feder1;~Claudia_Shi1;~Suchi_Saria1;~David_Blei2;~Yoav_Itzhak_Wald1", "aff": "Google;Columbia University;Department of Computer Science, Whiting School of Engineering;Columbia University;New York University", "aff_domain": "google.com;columbia.edu;cs.jhu.edu;columbia.edu;nyu.edu", "position": "Researcher;PhD student;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nfeder2023causalstructure,\ntitle={Causal-structure Driven Augmentations for Text {OOD} Generalization},\nauthor={Amir Feder and Yoav Wald and Claudia Shi and Suchi Saria and David Blei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dJZ3MvDw86}\n}", "github": "", "project": "", "reviewers": "8nR3;J1UG;Erys;YqAy;MXTx", "pdf_size": 1241904, "rating": "5;5;7;7;8", "confidence": "3;3;4;4;3", "soundness": "2;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "75;103;124;146;14", "wc_strengths": "87;35;94;13;53", "wc_weaknesses": "108;194;200;16;77", "wc_questions": "107;13;140;189;2", "wc_limitations": "6;1;5;81;8", "wc_review": "383;346;563;445;154", "wc_reply_reviewers": "56;124;35;0;16", "wc_reply_authors": "56;53;63;391;25", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 92.4, 45.67975481545408 ], "wc_strengths_avg": [ 56.4, 30.669854906732116 ], "wc_weaknesses_avg": [ 119.0, 70.25667228100119 ], "wc_questions_avg": [ 90.2, 72.47454725626093 ], "wc_limitations_avg": [ 20.2, 30.48540634467581 ], "wc_review_avg": [ 378.2, 134.1184551059249 ], "wc_reply_reviewers_avg": [ 46.2, 43.17591921430278 ], "wc_reply_authors_avg": [ 117.6, 137.31074247851112 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4082482904638631, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12217823738453824986&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "google.com;columbia.edu;cs.jhu.edu;columbia.edu;nyu.edu", "author_num": 5, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Google;Columbia University;Johns Hopkins University;New York University", "aff_unique_dep": "Google;;Department of Computer Science;", "aff_unique_url": "https://www.google.com;https://www.columbia.edu;https://www.jhu.edu;https://www.nyu.edu", "aff_unique_abbr": "Google;Columbia;JHU;NYU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Mountain View;;Baltimore", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Keypoint-Augmented Self-Supervised Learning for Medical Image Segmentation with Limited Annotation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71030", "id": "dK0Ew3kkVf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bee3d6218d7414f8cadfff0eafd0d7be-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dK0Ew3kkVf", "openreview": "https://openreview.net/forum?id=dK0Ew3kkVf", "poster": "/media/PosterPDFs/NeurIPS%202023/71030.png?t=1702081399.17413", "slides": "https://nips.cc/virtual/2023/poster/71030", "video": "https://nips.cc/virtual/2023/poster/71030", "author_site": "Zhangsihao Yang, Mengwei Ren, Kaize Ding, Guido Gerig, Yalin Wang", "tldr": "", "abstract": "Pretraining CNN models (i.e., UNet) through self-supervision has become a powerful approach to facilitate medical image segmentation under low annotation regimes. Recent contrastive learning methods encourage similar global representations when the same image undergoes different transformations, or enforce invariance across different image/patch features that are intrinsically correlated. However, CNN-extracted global and local features are limited in capturing long-range spatial dependencies that are essential in biological anatomy. To this end, we present a keypoint-augmented fusion layer that extracts representations preserving both short- and long-range self-attention. In particular, we augment the CNN feature map at multiple scales by incorporating an additional input that learns long-range spatial self-attention among localized keypoint features. Further, we introduce both global and local self-supervised pretraining for the framework. At the global scale, we obtain global representations from both the bottleneck of the UNet, and by aggregating multiscale keypoint features. These global features are subsequently regularized through image-level contrastive objectives. At the local scale, we define a distance-based criterion to first establish correspondences among keypoints and encourage similarity between their features. Through extensive experiments on both MRI and CT segmentation tasks, we demonstrate the architectural advantages of our proposed method in comparison to both CNN and Transformer-based UNets, when all architectures are trained with randomly initialized weights. With our proposed pretraining strategy, our method further outperforms existing SSL methods by producing more robust self-attention and achieving state-of-the-art segmentation results. The code is available at https://github.com/zshyang/kaf.git.", "keywords": "Keypoints;Medical image;self-supervised learning;transformer;segmentation", "primary_area": "", "supplementary_material": "/attachment/b900674a53536903e3e565d81ed88042ab5bb36d.zip", "author": "Zhangsihao Yang;Mengwei Ren;Kaize Ding;Guido Gerig;Yalin Wang", "authorids": "~Zhangsihao_Yang1;~Mengwei_Ren1;~Kaize_Ding1;~Guido_Gerig1;~Yalin_Wang3", "gender": "M;F;M;M;M", "homepage": ";https://www.mengweiren.com/;https://kaize0409.github.io/;http://engineering.nyu.edu/people/guido-gerig/;http://gsl.lab.asu.edu", "dblp": ";210/2614;234/6878;https://dblp.uni-trier.de/pid/g/GuidoGerig;88/128-1", "google_scholar": "VaRp0cMAAAAJ;https://scholar.google.com/citations?hl=en;PI3myr8AAAAJ;https://scholar.google.com.tw/citations?user=P5CovF0AAAAJ;F4tTgLQAAAAJ", "orcid": ";;;;0000-0002-6241-735X", "linkedin": ";;;;", "or_profile": "~Zhangsihao_Yang1;~Mengwei_Ren1;~Kaize_Ding1;~Guido_Gerig1;~Yalin_Wang3", "aff": "Arizona State University;New York University;Arizona State University;New York University;Arizona State University", "aff_domain": "asu.edu;nyu.edu;asu.edu;nyu.edu;asu.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nyang2023keypointaugmented,\ntitle={Keypoint-Augmented Self-Supervised Learning for Medical Image Segmentation with Limited Annotation},\nauthor={Zhangsihao Yang and Mengwei Ren and Kaize Ding and Guido Gerig and Yalin Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dK0Ew3kkVf}\n}", "github": "", "project": "", "reviewers": "9qHo;8g4p;PaeV;m84B;b18J", "pdf_size": 43493037, "rating": "3;5;6;6;6", "confidence": "4;3;4;5;4", "soundness": "2;3;3;3;4", "novelty": "2;3;2;3;3", "presentation": "2;3;3;3;4", "wc_summary": "116;80;126;55;166", "wc_strengths": "165;26;50;32;108", "wc_weaknesses": "158;150;144;224;337", "wc_questions": "145;10;9;195;111", "wc_limitations": "7;5;6;33;7", "wc_review": "591;271;335;539;729", "wc_reply_reviewers": "14;57;12;89;90", "wc_reply_authors": "218;24;10;453;199", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 108.6, 38.32283914325764 ], "wc_strengths_avg": [ 76.2, 53.022259476563235 ], "wc_weaknesses_avg": [ 202.6, 73.09062867426987 ], "wc_questions_avg": [ 94.0, 73.9891883993871 ], "wc_limitations_avg": [ 11.6, 10.725670142233538 ], "wc_review_avg": [ 493.0, 168.32349806251057 ], "wc_reply_reviewers_avg": [ 52.4, 34.296355491509594 ], "wc_reply_authors_avg": [ 180.8, 160.97627154335513 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.2711630722733202, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15453090050144252660&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "asu.edu;nyu.edu;asu.edu;nyu.edu;asu.edu", "author_num": 5, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "Arizona State University;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.asu.edu;https://www.nyu.edu", "aff_unique_abbr": "ASU;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Reimagining Synthetic Tabular Data Generation through Data-Centric AI: A Comprehensive Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73527", "id": "dK1Rs1o0Ij", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6aa9a05b929fb08ff46a58cab6cf860d-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=dK1Rs1o0Ij", "openreview": "https://openreview.net/forum?id=dK1Rs1o0Ij", "poster": "/media/PosterPDFs/NeurIPS%202023/73527.png?t=1701416552.8109338", "slides": "https://nips.cc/virtual/2023/poster/73527", "video": "https://nips.cc/virtual/2023/poster/73527", "author_site": "Lasse Hansen, Nabeel Seedat, Mihaela van der Schaar, Andrija Petrovic", "tldr": "", "abstract": "Synthetic data serves as an alternative in training machine learning models, particularly when real-world data is limited or inaccessible. However, ensuring that synthetic data mirrors the complex nuances of real-world data is a challenging task. This paper addresses this issue by exploring the potential of integrating data-centric AI techniques which profile the data to guide the synthetic data generation process. Moreover, we shed light on the often ignored consequences of neglecting these data profiles during synthetic data generation --- despite seemingly high statistical fidelity. Subsequently, we propose a novel framework to evaluate the integration of data profiles to guide the creation of more representative synthetic data. In an empirical study, we evaluate the performance of five state-of-the-art models for tabular data generation on eleven distinct tabular datasets. The findings offer critical insights into the successes and limitations of current synthetic data generation techniques. Finally, we provide practical recommendations for integrating data-centric insights into the synthetic data generation process, with a specific focus on classification performance, model selection, and feature selection. This study aims to reevaluate conventional approaches to synthetic data generation and promote the application of data-centric AI techniques in improving the quality and effectiveness of synthetic data.", "keywords": "synthetic data generation;data-centric AI;tabular data;synthetic data evaluation", "primary_area": "", "supplementary_material": "", "author": "Lasse Hansen;Nabeel Seedat;Mihaela van der Schaar;Andrija Petrovic", "authorids": "~Lasse_Hansen2;~Nabeel_Seedat1;~Mihaela_van_der_Schaar2;~Andrija_Petrovic3", "gender": "M;;F;M", "homepage": "https://lassehansen.me;;https://www.vanderschaar-lab.com;http://www.fon.bg.ac.rs/o-fakultetu/organizacija/nastavnici/andrija-petrovic/", "dblp": ";227/8368;;", "google_scholar": "https://scholar.google.dk/citations?user=UtsjUGGsFC0C;https://scholar.google.com/citations?hl=en;DZ3S--MAAAAJ;tW32SZ4AAAAJ", "orcid": "0000-0003-1113-4779;;;", "linkedin": ";nabeel-seedat/;;andrija-petrovic-20299ba2", "or_profile": "~Lasse_Hansen2;~Nabeel_Seedat1;~Mihaela_van_der_Schaar2;~Andrija_Petrovic3", "aff": "Aarhus University;University of Cambridge;University of California, Los Angeles;University of Belgrade", "aff_domain": "au.dk;cam.ac.uk;ucla.edu;bg.ac.rs", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nhansen2023reimagining,\ntitle={Reimagining Synthetic Tabular Data Generation through Data-Centric {AI}: A Comprehensive Benchmark},\nauthor={Lasse Hansen and Nabeel Seedat and Mihaela van der Schaar and Andrija Petrovic},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=dK1Rs1o0Ij}\n}", "github": "", "project": "", "reviewers": "g5vc;P9Bf;yA6T", "pdf_size": 8688752, "rating": "4;7;7", "confidence": "4;4;3", "wc_summary_and_contributions": "72;73;68", "wc_strengths": "46;24;95", "wc_improvement": "199;58;628", "wc_limitations": "1;13;24", "wc_correctness": "1;6;34", "wc_clarity": "1;26;107", "wc_relation_to_prior_work": "45;17;121", "wc_documentation": "71;6;8", "wc_additional_feedback": "1;1;1", "wc_review": "437;224;1086", "wc_reply_reviewers": "0;0;75", "wc_reply_authors": "1587;190;1861", "reply_reviewers": "0;0;1", "reply_authors": "4;1;4", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 71.0, 2.160246899469287 ], "wc_strengths_avg": [ 55.0, 29.676028485406647 ], "wc_improvement_avg": [ 295.0, 242.40049504899943 ], "wc_limitations_avg": [ 12.666666666666666, 9.392668535736913 ], "wc_correctness_avg": [ 13.666666666666666, 14.522013940527977 ], "wc_clarity_avg": [ 44.666666666666664, 45.24255617103093 ], "wc_relation_to_prior_work_avg": [ 61.0, 43.93935214209088 ], "wc_documentation_avg": [ 28.333333333333332, 30.180935851773864 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 582.3333333333334, 366.60817715315017 ], "wc_reply_reviewers_avg": [ 25.0, 35.35533905932738 ], "wc_reply_authors_avg": [ 1212.6666666666667, 731.7350765285358 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9252760239925104525&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "au.dk;cam.ac.uk;ucla.edu;bg.ac.rs", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Aarhus University;University of Cambridge;University of California, Los Angeles;University of Belgrade", "aff_unique_dep": ";;;", "aff_unique_url": "https://au.dk;https://www.cam.ac.uk;https://www.ucla.edu;https://www.bg.ac.rs", "aff_unique_abbr": "AU;Cambridge;UCLA;UB", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cambridge;Los Angeles", "aff_country_unique_index": "0;1;2;3", "aff_country_unique": "Denmark;United Kingdom;United States;Serbia" }, { "title": "SwiFT: Swin 4D fMRI Transformer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71029", "id": "dKeWh6EzBB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8313b1920ee9c78d846c5798c1ce48be-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dKeWh6EzBB", "openreview": "https://openreview.net/forum?id=dKeWh6EzBB", "poster": "/media/PosterPDFs/NeurIPS%202023/71029.png?t=1702013382.5328417", "slides": "https://nips.cc/virtual/2023/poster/71029", "video": "https://nips.cc/virtual/2023/poster/71029", "author_site": "Peter Kim, Junbeom Kwon, Sunghwan Joo, Sangyoon Bae, Donggyu Lee, Yoonho Jung, Shinjae Yoo, Jiook Cha, Taesup Moon", "tldr": "", "abstract": "Modeling spatiotemporal brain dynamics from high-dimensional data, such as functional Magnetic Resonance Imaging (fMRI), is a formidable task in neuroscience. Existing approaches for fMRI analysis utilize hand-crafted features, but the process of feature extraction risks losing essential information in fMRI scans. To address this challenge, we present SwiFT (Swin 4D fMRI Transformer), a Swin Transformer architecture that can learn brain dynamics directly from fMRI volumes in a memory and computation-efficient manner. SwiFT achieves this by implementing a 4D window multi-head self-attention mechanism and absolute positional embeddings. We evaluate SwiFT using multiple large-scale resting-state fMRI datasets, including the Human Connectome Project (HCP), Adolescent Brain Cognitive Development (ABCD), and UK Biobank (UKB) datasets, to predict sex, age, and cognitive intelligence. Our experimental outcomes reveal that SwiFT consistently outperforms recent state-of-the-art models. Furthermore, by leveraging its end-to-end learning capability, we show that contrastive loss-based self-supervised pre-training of SwiFT can enhance performance on downstream tasks. Additionally, we employ an explainable AI method to identify the brain regions associated with sex classification. To our knowledge, SwiFT is the first Swin Transformer architecture to process dimensional spatiotemporal brain functional data in an end-to-end fashion. Our work holds substantial potential in facilitating scalable learning of functional brain imaging in neuroscience research by reducing the hurdles associated with applying Transformer models to high-dimensional fMRI.", "keywords": "fMRI;Swin Transformer;4D;neuroscience", "primary_area": "", "supplementary_material": "", "author": "Peter Yongho Kim;Junbeom Kwon;Sunghwan Joo;Sangyoon Bae;Donggyu Lee;Yoonho Jung;Shinjae Yoo;Jiook Cha;Taesup Moon", "authorids": "~Peter_Yongho_Kim1;~Junbeom_Kwon1;~Sunghwan_Joo1;~Sangyoon_Bae1;~Donggyu_Lee1;~Yoonho_Jung1;~Shinjae_Yoo1;~Jiook_Cha1;~Taesup_Moon1", "gender": "M;M;M;F;M;M;M;M;", "homepage": ";https://sites.google.com/connectomelab.net/snu?pli=1;;;https://sites.google.com/view/dqlee/%ED%99%88;https://sites.google.com/snu.ac.kr/jungyh19/%ED%99%88;;https://www.connectomelab.com;https://mindlab-snu.github.io/people/pi/", "dblp": "352/3146;;230/4203;;142/3306;285/9029;69/1062;157/5725;05/4084", "google_scholar": "yOdQFVgAAAAJ;https://scholar.google.co.kr/citations?hl=ko;;;7syHfVAAAAAJ;;https://scholar.google.com/citations?hl=en;fHSAoOoAAAAJ;lQlioBoAAAAJ", "orcid": ";;;0000-0003-0411-5692;;;;0000-0002-5314-7992;0000-0002-9257-6503", "linkedin": ";;;stella-sangyoon-bae/;;;;jiook-cha-35904a3b/;", "or_profile": "~Peter_Yongho_Kim1;~Junbeom_Kwon1;~Sunghwan_Joo1;~Sangyoon_Bae1;~Donggyu_Lee1;~Yoonho_Jung1;~Shinjae_Yoo1;~Jiook_Cha1;~Taesup_Moon1", "aff": "Seoul National University;Seoul National University;Sung Kyun Kwan University;Seoul National University;Sungkyunkwan University;Seoul National University;Brookhaven National Lab;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;skku.ac.kr;snu.ac.kr;skku.edu;snu.ac.kr;bnl.gov;snu.ac.kr;snu.ac.kr", "position": "Undergrad student;MS student;PhD student;PhD student;PhD student;Undergrad student;Scientist;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nkim2023swift,\ntitle={Swi{FT}: Swin 4D f{MRI} Transformer},\nauthor={Peter Yongho Kim and Junbeom Kwon and Sunghwan Joo and Sangyoon Bae and Donggyu Lee and Yoonho Jung and Shinjae Yoo and Jiook Cha and Taesup Moon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dKeWh6EzBB}\n}", "github": "", "project": "", "reviewers": "st1h;JwtU;pTkQ;bLJJ;ksE2", "pdf_size": 1100852, "rating": "6;6;6;6;7", "confidence": "4;4;5;4;4", "soundness": "3;3;3;3;3", "novelty": "3;2;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "88;112;115;66;107", "wc_strengths": "74;52;131;134;134", "wc_weaknesses": "38;79;208;121;157", "wc_questions": "113;111;130;106;24", "wc_limitations": "9;9;146;45;1", "wc_review": "322;363;730;472;423", "wc_reply_reviewers": "6;54;50;6;26", "wc_reply_authors": "0;0;84;0;0", "reply_reviewers": "1;1;1;1;2", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 97.6, 18.380424369420854 ], "wc_strengths_avg": [ 105.0, 35.008570379265706 ], "wc_weaknesses_avg": [ 120.6, 59.18986399714059 ], "wc_questions_avg": [ 96.8, 37.2848494699925 ], "wc_limitations_avg": [ 42.0, 54.19225036847981 ], "wc_review_avg": [ 462.0, 143.41966392374513 ], "wc_reply_reviewers_avg": [ 28.4, 20.645580640902306 ], "wc_reply_authors_avg": [ 16.8, 33.599999999999994 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.25000000000000006, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7955225227979970131&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "email": "snu.ac.kr;snu.ac.kr;skku.ac.kr;snu.ac.kr;skku.edu;snu.ac.kr;bnl.gov;snu.ac.kr;snu.ac.kr", "author_num": 9, "aff_unique_index": "0;0;1;0;1;0;2;0;0", "aff_unique_norm": "Seoul National University;Sungkyunkwan University;Brookhaven National Laboratory", "aff_unique_dep": ";;", "aff_unique_url": "https://www.snu.ac.kr;https://www.skku.edu;https://www.bnl.gov", "aff_unique_abbr": "SNU;SKKU;BNL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;0;0", "aff_country_unique": "South Korea;United States" }, { "title": "Double and Single Descent in Causal Inference with an Application to High-Dimensional Synthetic Control", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71028", "id": "dL0GM9Wwtq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c904c5d43d8a01177063977bd67bf6fc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dL0GM9Wwtq", "openreview": "https://openreview.net/forum?id=dL0GM9Wwtq", "poster": "/media/PosterPDFs/NeurIPS%202023/71028.png?t=1699469074.6476498", "slides": "https://nips.cc/virtual/2023/poster/71028", "video": "https://nips.cc/virtual/2023/poster/71028", "author_site": "Jann Spiess, guido imbens, Amar Venugopal", "tldr": "", "abstract": "Motivated by a recent literature on the double-descent phenomenon in machine learning, we consider highly over-parameterized models in causal inference, including synthetic control with many control units. In such models, there may be so many free parameters that the model fits the training data perfectly. We first investigate high-dimensional linear regression for imputing wage data and estimating average treatment effects, where we find that models with many more covariates than sample size can outperform simple ones. We then document the performance of high-dimensional synthetic control estimators with many control units. We find that adding control units can help improve imputation performance even beyond the point where the pre-treatment fit is perfect. We provide a unified theoretical perspective on the performance of these high-dimensional models. Specifically, we show that more complex models can be interpreted as model-averaging estimators over simpler ones, which we link to an improvement in average performance. This perspective yields concrete insights into the use of synthetic control when control units are many relative to the number of pre-treatment periods.", "keywords": "Double descent;interpolating regression;synthetic control;causal inference", "primary_area": "", "supplementary_material": "/attachment/b0b066574ba7196cc405b2964a3f91eacd083059.zip", "author": "Jann Spiess;Guido Imbens;Amar Venugopal", "authorids": "~Jann_Spiess1;~Guido_Imbens1;amar.venugopal@stanford.edu", "gender": ";M;", "homepage": "https://gsb-faculty.stanford.edu/jann-spiess/;https://www.gsb.stanford.edu/faculty-research/faculty/guido-w-imbens;", "dblp": "288/1552;210/2296;", "google_scholar": "bOosPJwAAAAJ;dYwbc9sAAAAJ;", "orcid": ";0000-0002-4846-7326;", "linkedin": ";;", "or_profile": "~Jann_Spiess1;~Guido_Imbens1;amar.venugopal@stanford.edu", "aff": "Stanford University;Stanford University;", "aff_domain": "stanford.edu;stanford.edu;", "position": "Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nspiess2023double,\ntitle={Double and Single Descent in Causal Inference with an Application to High-Dimensional Synthetic Control},\nauthor={Jann Spiess and Guido Imbens and Amar Venugopal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dL0GM9Wwtq}\n}", "github": "", "project": "", "reviewers": "TPer;aRQr;wCp4;xBLi", "pdf_size": 475457, "rating": "4;6;6;9", "confidence": "3;3;3;4", "soundness": "4;3;3;4", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "122;193;69;138", "wc_strengths": "86;229;66;206", "wc_weaknesses": "325;318;167;43", "wc_questions": "78;95;95;557", "wc_limitations": "18;118;70;7", "wc_review": "629;953;467;951", "wc_reply_reviewers": "271;39;61;16", "wc_reply_authors": "140;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 1.7853571071357126 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 130.5, 44.206899913927465 ], "wc_strengths_avg": [ 146.75, 71.56596607326698 ], "wc_weaknesses_avg": [ 213.25, 116.81689732226242 ], "wc_questions_avg": [ 206.25, 202.62449876557375 ], "wc_limitations_avg": [ 53.25, 44.313513740167345 ], "wc_review_avg": [ 750.0, 209.96428267684007 ], "wc_reply_reviewers_avg": [ 96.75, 101.8537554535914 ], "wc_reply_authors_avg": [ 35.0, 60.6217782649107 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8892972917998875, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10950676470003600065&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 13, "email": "stanford.edu;stanford.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Constrained Policy Optimization with Explicit Behavior Density For Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71027", "id": "dLmDPVv19z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/11e1900e680f5fe1893a8e27362dbe2c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dLmDPVv19z", "openreview": "https://openreview.net/forum?id=dLmDPVv19z", "poster": "/media/PosterPDFs/NeurIPS%202023/71027.png?t=1701395070.4562492", "slides": "https://nips.cc/virtual/2023/poster/71027", "video": "https://nips.cc/virtual/2023/poster/71027", "author_site": "Jing Zhang, Chi Zhang, Wenjia Wang, Bingyi Jing", "tldr": "", "abstract": "Due to the inability to interact with the environment, offline reinforcement learning (RL) methods face the challenge of estimating the Out-of-Distribution (OOD) points. Existing methods for addressing this issue either control policy to exclude the OOD action or make the $Q$ function pessimistic. However, these methods can be overly conservative or fail to identify OOD areas accurately. To overcome this problem, we propose a Constrained Policy optimization with Explicit Behavior density (CPED) method that utilizes a flow-GAN model to explicitly estimate the density of behavior policy. By estimating the explicit density, CPED can accurately identify the safe region and enable exploration within the region, resulting in less conservative learning policies. We further provide theoretical results for both the flow-GAN estimator and performance guarantee for CPED by showing that CPED can find the optimal $Q$-function value. Empirically, CPED outperforms existing alternatives on various standard offline reinforcement learning tasks, yielding higher expected returns.", "keywords": "Offline Reinforcement Learning;GAN;Flow Model;Policy Control", "primary_area": "", "supplementary_material": "/attachment/1bc15f483ec11ea3137c63ec1fb0f6e845e6ae86.pdf", "author": "Jing Zhang;Chi Zhang;Wenjia Wang;Bingyi Jing", "authorids": "~Jing_Zhang34;~Chi_Zhang6;~Wenjia_Wang2;~Bingyi_Jing1", "gender": "F;M;M;M", "homepage": ";;https://www.wenjia-w.com/;https://www.sustech.edu.cn/en/faculties/jingbing-yi.html", "dblp": ";91/195-100;;15/8051", "google_scholar": "wruIkmYAAAAJ;602Al-UAAAAJ;EKS1sO0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-9604-2699;;;0000-0002-8876-1570", "linkedin": ";;;", "or_profile": "~Jing_Zhang34;~Chi_Zhang6;~Wenjia_Wang2;~Bingyi_Jing1", "aff": "Hong Kong University of Science and Technology;Kuaishou Technology;Hong Kong University of Science and Technology;South University of Science and Technology", "aff_domain": "hkust.edu;kuaishou.com;ust.hk;sustech.edu.cn", "position": "PhD student;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023constrained,\ntitle={Constrained Policy Optimization with Explicit Behavior Density For Offline Reinforcement Learning},\nauthor={Jing Zhang and Chi Zhang and Wenjia Wang and Bingyi Jing},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dLmDPVv19z}\n}", "github": "", "project": "", "reviewers": "1U1v;6qDS;krNs;Y4vE", "pdf_size": 435498, "rating": "5;5;6;6", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;2", "presentation": "3;2;3;2", "wc_summary": "67;63;46;52", "wc_strengths": "32;51;30;43", "wc_weaknesses": "100;81;213;149", "wc_questions": "67;255;35;26", "wc_limitations": "224;6;5;1", "wc_review": "490;456;329;271", "wc_reply_reviewers": "144;111;79;47", "wc_reply_authors": "594;812;463;291", "reply_reviewers": "1;1;1;1", "reply_authors": "4;4;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 57.0, 8.396427811873332 ], "wc_strengths_avg": [ 39.0, 8.514693182963201 ], "wc_weaknesses_avg": [ 135.75, 51.036139156484005 ], "wc_questions_avg": [ 95.75, 93.19703589707132 ], "wc_limitations_avg": [ 59.0, 95.28116288123272 ], "wc_review_avg": [ 386.5, 89.70646576473739 ], "wc_reply_reviewers_avg": [ 95.25, 36.11353624335341 ], "wc_reply_authors_avg": [ 540.0, 190.28268444606303 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11937380221757553946&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "hkust.edu;kuaishou.com;ust.hk;sustech.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Hong Kong University of Science and Technology;Kuaishou Technology;South University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ust.hk;https://www.kuaishou.com;https://www.sustech.edu.cn", "aff_unique_abbr": "HKUST;Kuaishou;SUSTech", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "From Discrete Tokens to High-Fidelity Audio Using Multi-Band Diffusion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71026", "id": "dOanKg3jKS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/054f771d614df12fe8def8ecdbe4e8e1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dOanKg3jKS", "openreview": "https://openreview.net/forum?id=dOanKg3jKS", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71026", "video": "https://nips.cc/virtual/2023/poster/71026", "author_site": "Robin San Roman, Yossi Adi, Antoine Deleforge, Romain Serizel, Gabriel Synnaeve, Alexandre Defossez", "tldr": "", "abstract": "Deep generative models can generate high-fidelity audio conditioned on various\ntypes of representations (e.g., mel-spectrograms, Mel-frequency Cepstral Coefficients\n(MFCC)). Recently, such models have been used to synthesize audio\nwaveforms conditioned on highly compressed representations. Although such\nmethods produce impressive results, they are prone to generate audible artifacts\nwhen the conditioning is flawed or imperfect. An alternative modeling approach is\nto use diffusion models. However, these have mainly been used as speech vocoders\n(i.e., conditioned on mel-spectrograms) or generating relatively low sampling\nrate signals. In this work, we propose a high-fidelity multi-band diffusion-based\nframework that generates any type of audio modality (e.g., speech, music, environmental\nsounds) from low-bitrate discrete representations. At equal bit rate,\nthe proposed approach outperforms state-of-the-art generative techniques in terms\nof perceptual quality. Training and evaluation code are available on the facebookresearch/\naudiocraft github project. Samples are available on the following\nlink (https://ai.honu.io/papers/mbd/).", "keywords": "diffusion;audio;compression", "primary_area": "", "supplementary_material": "/attachment/e8f99141657a9387c6ed10760878805b4fa095b5.pdf", "author": "Robin San Roman;Yossi Adi;Antoine Deleforge;Romain Serizel;Gabriel Synnaeve;Alexandre D\u00e9fossez", "authorids": "~Robin_San_Roman1;~Yossi_Adi1;~Antoine_Deleforge2;romain.serizel@loria.fr;~Gabriel_Synnaeve1;~Alexandre_D\u00e9fossez1", "gender": "M;M;M;;M;M", "homepage": ";http://adiyoss.github.io/;https://members.loria.fr/ADeleforge/;;;https://ai.honu.io/", "dblp": "289/7209;171/0957.html;47/10875;;http://dblp.uni-trier.de/pers/hd/s/Synnaeve:Gabriel;156/0054", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.co.il/citations?user=4W-HuYYAAAAJ;https://scholar.google.fr/citations?user=s7TQp00AAAAJ;;wN9rBkcAAAAJ;https://scholar.google.fr/citations?user=DubNUU0AAAAJ", "orcid": ";0000-0003-2237-3898;;;;", "linkedin": ";yossi-adi-31a32858?trk=nav_responsive_tab_profile_pic;;;;", "or_profile": "~Robin_San_Roman1;~Yossi_Adi1;~Antoine_Deleforge2;romain.serizel@loria.fr;~Gabriel_Synnaeve1;~Alexandre_D\u00e9fossez1", "aff": "FAIR;Meta;INRIA;;Meta Facebook;Meta", "aff_domain": "meta.com;meta.com;inria.fr;;fb.com;meta.com", "position": "PhD student;Research Scientist;Researcher;;Research Scientist;Researcher", "bibtex": "@inproceedings{\nroman2023from,\ntitle={From Discrete Tokens to High-Fidelity Audio Using Multi-Band Diffusion},\nauthor={Robin San Roman and Yossi Adi and Antoine Deleforge and Romain Serizel and Gabriel Synnaeve and Alexandre D{\\'e}fossez},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dOanKg3jKS}\n}", "github": "", "project": "", "reviewers": "tTLs;vmZN;SCw1;kZTC;s9xT", "pdf_size": 8755810, "rating": "5;5;6;6;7", "confidence": "4;4;2;4;4", "soundness": "3;3;3;3;3", "novelty": "3;2;2;3;3", "presentation": "3;3;2;3;3", "wc_summary": "53;40;52;129;72", "wc_strengths": "35;114;79;50;39", "wc_weaknesses": "126;267;125;19;302", "wc_questions": "23;62;79;28;14", "wc_limitations": "1;11;16;20;39", "wc_review": "238;494;351;246;466", "wc_reply_reviewers": "15;108;54;0;0", "wc_reply_authors": "0;149;0;0;0", "reply_reviewers": "1;1;1;0;0", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 69.2, 31.606328480226868 ], "wc_strengths_avg": [ 63.4, 29.614861134234616 ], "wc_weaknesses_avg": [ 167.8, 103.50922664187961 ], "wc_questions_avg": [ 41.2, 24.927093693409187 ], "wc_limitations_avg": [ 17.4, 12.531560158256434 ], "wc_review_avg": [ 359.0, 106.90930735908825 ], "wc_reply_reviewers_avg": [ 35.4, 41.32602085853415 ], "wc_reply_authors_avg": [ 29.8, 59.60000000000001 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.13363062095621217, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2606055502741052588&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "meta.com;meta.com;inria.fr;;fb.com;meta.com", "author_num": 6, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Meta;INRIA", "aff_unique_dep": "Facebook AI Research;", "aff_unique_url": "https://research.facebook.com;https://www.inria.fr", "aff_unique_abbr": "FAIR;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;France" }, { "title": "SG\u00d7P : A Sorghum Genotype \u00d7 Phenotype Prediction Dataset and Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73526", "id": "dOeBYjxSoq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/39d02e8e23bafadd7cd405f2281bc05c-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=dOeBYjxSoq", "openreview": "https://openreview.net/forum?id=dOeBYjxSoq", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73526", "video": "https://nips.cc/virtual/2023/poster/73526", "author_site": "Zeyu Zhang, Robert Pless, Nadia Shakoor, Austin Carnahan, Abby Stylianou", "tldr": "", "abstract": "Large scale field-phenotyping approaches have the potential to solve important questions about the relationship of plant genotype to plant phenotype. Computational approaches to measuring the phenotype (the observable plant features) are required to address the problem at a large scale, but machine learning approaches to extract phenotypes from sensor data have been hampered by limited access to (a) sufficiently large, organized multi-sensor datasets, (b) field trials that have a large scale and significant number of genotypes, (c) full genetic sequencing of those phenotypes, and (d) datasets sufficiently organized so that algorithm centered researchers can directly address the real biological problems. To address this, we present SGxP, a novel benchmark dataset from a large-scale field trial consisting of the complete genotype of over 300 sorghum varieties, and time sequences of imagery from several field plots growing each variety, taken with RGB and laser 3D scanner imaging. To lower the barrier to entry and facilitate further developments, we provide a set of well organized, multi-sensor imagery and corresponding genomic data. We implement baseline deep learning based phenotyping approaches to create baseline results for individual sensors and multi-sensor fusion for detecting genetic mutations with known impacts. We also provide and support an open-ended challenge by identifying thousands of genetic mutations whose phenotypic impacts are currently unknown. A web interface for machine learning researchers and practitioners to share approaches, visualizations and hypotheses supports engagement with plant biologists to further the understanding of the sorghum genotype x phenotype relationship. The full dataset, leaderboard (including baseline results) and discussion forums can be found at http://sorghumsnpbenchmark.com.", "keywords": "benchmark;dataset;plant science;ml for science;sorghum;genetics;plant phenotyping", "primary_area": "", "supplementary_material": "/attachment/14ba01a61c028bec30cfa459d5f20a0ba96fbe78.pdf", "author": "Zeyu Zhang;Robert Pless;Nadia Shakoor;Austin Carnahan;Abby Stylianou", "authorids": "~Zeyu_Zhang5;~Robert_Pless3;nshakoor@danforthcenter.org;austin.carnahan@slu.edu;~Abby_Stylianou1", "gender": "M;M;;;F", "homepage": "http://zhzy.xyz;http://www2.seas.gwu.edu/~pless/;;;https://cs.slu.edu/~astylianou/", "dblp": "44/8352;76/1967;;;158/8944", "google_scholar": "seqbl5oAAAAJ;uVBjUtAAAAAJ;;;mNoB9SgAAAAJ", "orcid": ";0000-0001-5775-8216;;;", "linkedin": ";;;;", "or_profile": "~Zeyu_Zhang5;~Robert_Pless3;nshakoor@danforthcenter.org;austin.carnahan@slu.edu;~Abby_Stylianou1", "aff": "George Washington University;George Washington University;;;Saint Louis University", "aff_domain": "gwu.edu;gwu.edu;;;slu.edu", "position": "PhD student;Full Professor;;;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023sgp,\ntitle={{SG}{\\texttimes}P : A Sorghum Genotype {\\texttimes} Phenotype Prediction Dataset and Benchmark},\nauthor={Zeyu Zhang and Robert Pless and Nadia Shakoor and Austin Carnahan and Abby Stylianou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=dOeBYjxSoq}\n}", "github": "", "project": "", "reviewers": "aqR6;w226;VdiJ;XBbG;J6ZQ", "pdf_size": 3720324, "rating": "5;6;6;7;8", "confidence": "4;4;4;3;4", "wc_summary_and_contributions": "120;326;163;151;56", "wc_strengths": "84;63;118;83;81", "wc_improvement": "340;124;465;137;229", "wc_limitations": "92;49;395;65;18", "wc_correctness": "51;14;1157;27;21", "wc_clarity": "10;5;385;165;10", "wc_relation_to_prior_work": "26;1;5;76;11", "wc_documentation": "14;19;373;325;18", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "738;602;3062;1030;445", "wc_reply_reviewers": "0;58;3348;61;0", "wc_reply_authors": "636;326;3005;509;349", "reply_reviewers": "0;1;7;1;0", "reply_authors": "2;2;6;2;2", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 163.2, 89.45479305213333 ], "wc_strengths_avg": [ 85.8, 17.837040113202637 ], "wc_improvement_avg": [ 259.0, 128.84564408624763 ], "wc_limitations_avg": [ 123.8, 137.70025417550977 ], "wc_correctness_avg": [ 254.0, 451.6715620890915 ], "wc_clarity_avg": [ 115.0, 148.02026888233922 ], "wc_relation_to_prior_work_avg": [ 23.8, 27.447404248853843 ], "wc_documentation_avg": [ 149.8, 163.36143975859176 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 1175.4, 962.7036096327882 ], "wc_reply_reviewers_avg": [ 693.4, 1327.5670378553393 ], "wc_reply_authors_avg": [ 965.0, 1026.21186896274 ], "reply_reviewers_avg": [ 1.8, 2.638181191654584 ], "reply_authors_avg": [ 2.8, 1.6 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.294174202707276, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:soZ6OowBRDEJ:scholar.google.com/&scioq=SG%C3%97P+:+A+Sorghum+Genotype+%C3%97+Phenotype+Prediction+Dataset+and+Benchmark&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "gwu.edu;gwu.edu;;;slu.edu", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "George Washington University;Saint Louis University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gwu.edu;https://www.slu.edu", "aff_unique_abbr": "GWU;SLU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Locally Invariant Explanations: Towards Stable and Unidirectional Explanations through Local Invariant Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71025", "id": "dOxm4FnMFu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3df874367ce2c43891aab1ab23ae6959-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dOxm4FnMFu", "openreview": "https://openreview.net/forum?id=dOxm4FnMFu", "poster": "/media/PosterPDFs/NeurIPS%202023/71025.png?t=1700488340.0468998", "slides": "https://nips.cc/virtual/2023/poster/71025", "video": "https://nips.cc/virtual/2023/poster/71025", "author_site": "Amit Dhurandhar, Karthikeyan Natesan Ramamurthy, Kartik Ahuja, Vijay Arya", "tldr": "", "abstract": "Locally interpretable model agnostic explanations (LIME) method is one of the most popular methods used to explain black-box models at a per example level. Although many variants have been proposed, few provide a simple way to produce high fidelity explanations that are also stable and intuitive. In this work, we provide a novel perspective by proposing a model agnostic local explanation method inspired by the invariant risk minimization (IRM) principle -- originally proposed for (global) out-of-distribution generalization -- to provide such high fidelity explanations that are also stable and unidirectional across nearby examples. Our method is based on a game theoretic formulation where we theoretically show that our approach has a strong tendency to eliminate features where the gradient of the black-box function abruptly changes sign in the locality of the example we want to explain, while in other cases it is more careful and will choose a more conservative (feature) attribution, a behavior which can be highly desirable for recourse. Empirically, we show on tabular, image and text data that the quality of our explanations with neighborhoods formed using random perturbations are much better than LIME and in some cases even comparable to other methods that use realistic neighbors sampled from the data manifold. This is desirable given that learning a manifold to either create realistic neighbors or to project explanations is typically expensive or may even be impossible. Moreover, our algorithm is simple and efficient to train, and can ascertain stable input features for local decisions of a black-box without access to side information such as a (partial) causal graph as has been seen in some recent works.", "keywords": "Explainable AI;Game theory;Invariance", "primary_area": "", "supplementary_material": "/attachment/51e747e8776990700aeca265c251151d3c17393e.zip", "author": "Amit Dhurandhar;Karthikeyan Natesan Ramamurthy;Kartik Ahuja;Vijay Arya", "authorids": "~Amit_Dhurandhar1;~Karthikeyan_Natesan_Ramamurthy1;~Kartik_Ahuja1;~Vijay_Arya1", "gender": "M;;;M", "homepage": "https://researcher.watson.ibm.com/researcher/view.php?person=us-adhuran;https://nrkarthikeyan.github.io/;;", "dblp": "66/3289;58/7800;;77/1485", "google_scholar": "km9vIPEAAAAJ;mG8HuhEAAAAJ;;", "orcid": ";0000-0002-6021-5930;;", "linkedin": ";;;", "or_profile": "~Amit_Dhurandhar1;~Karthikeyan_Natesan_Ramamurthy1;~Kartik_Ahuja1;~Vijay_Arya1", "aff": "International Business Machines;International Business Machines;;IBM Research", "aff_domain": "ibm.com;ibm.com;;ibm.com", "position": "Principal Researcher;Research Staff Member;;Researcher", "bibtex": "@inproceedings{\ndhurandhar2023locally,\ntitle={Locally Invariant Explanations: Towards Stable and Unidirectional Explanations through Local Invariant Learning},\nauthor={Amit Dhurandhar and Karthikeyan Natesan Ramamurthy and Kartik Ahuja and Vijay Arya},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dOxm4FnMFu}\n}", "github": "", "project": "", "reviewers": "Rtbi;bruB;aw9L;uF3q", "pdf_size": 7646550, "rating": "4;6;7;7", "confidence": "4;3;4;3", "soundness": "2;3;3;3", "novelty": "2;2;4;3", "presentation": "2;2;4;4", "wc_summary": "93;123;93;134", "wc_strengths": "45;36;92;91", "wc_weaknesses": "145;391;122;341", "wc_questions": "216;31;35;57", "wc_limitations": "7;9;5;2", "wc_review": "506;590;347;625", "wc_reply_reviewers": "902;140;143;91", "wc_reply_authors": "1120;25;716;29", "reply_reviewers": "3;1;2;1", "reply_authors": "5;2;3;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 110.75, 18.171062159378575 ], "wc_strengths_avg": [ 66.0, 25.700194551792794 ], "wc_weaknesses_avg": [ 249.75, 117.86724523802192 ], "wc_questions_avg": [ 84.75, 76.42111946314317 ], "wc_limitations_avg": [ 5.75, 2.5860201081971503 ], "wc_review_avg": [ 517.0, 107.25437054031877 ], "wc_reply_reviewers_avg": [ 319.0, 337.2276679040437 ], "wc_reply_authors_avg": [ 472.5, 467.8399833276331 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5800432145814744598&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ibm.com;ibm.com;;ibm.com", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "International Business Machines Corporation;IBM", "aff_unique_dep": ";IBM Research", "aff_unique_url": "https://www.ibm.com;https://www.ibm.com/research", "aff_unique_abbr": "IBM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Safe Exploration in Reinforcement Learning: A Generalized Formulation and Algorithms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71024", "id": "dQLsvKNwZC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5d4cd12ef6efedbf26b69b410f1f7d67-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dQLsvKNwZC", "openreview": "https://openreview.net/forum?id=dQLsvKNwZC", "poster": "/media/PosterPDFs/NeurIPS%202023/71024.png?t=1700534011.057683", "slides": "https://nips.cc/virtual/2023/poster/71024", "video": "https://nips.cc/virtual/2023/poster/71024", "author_site": "Akifumi Wachi, Wataru Hashimoto, Xun Shen, Kazumune Hashimoto", "tldr": "", "abstract": "Safe exploration is essential for the practical use of reinforcement learning (RL) in many real-world scenarios. In this paper, we present a generalized safe exploration (GSE) problem as a unified formulation of common safe exploration problems. We then propose a solution of the GSE problem in the form of a meta-algorithm for safe exploration, MASE, which combines an unconstrained RL algorithm with an uncertainty quantifier to guarantee safety in the current episode while properly penalizing unsafe explorations before actual safety violation to discourage them in future episodes. The advantage of MASE is that we can optimize a policy while guaranteeing with a high probability that no safety constraint will be violated under proper assumptions. Specifically, we present two variants of MASE with different constructions of the uncertainty quantifier: one based on generalized linear models with theoretical guarantees of safety and near-optimality, and another that combines a Gaussian process to ensure safety with a deep RL algorithm to maximize the reward. Finally, we demonstrate that our proposed algorithm achieves better performance than state-of-the-art algorithms on grid-world and Safety Gym benchmarks without violating any safety constraints, even during training.", "keywords": "Reinforcement Learning;Safety Exploration", "primary_area": "", "supplementary_material": "", "author": "Akifumi Wachi;Wataru Hashimoto;Xun Shen;Kazumune Hashimoto", "authorids": "~Akifumi_Wachi2;~Wataru_Hashimoto1;~Xun_Shen1;~Kazumune_Hashimoto1", "gender": "M;M;M;M", "homepage": "https://akifumi-wachi-4.github.io/website/;;https://sites.google.com/view/xunshen/home;https://sites.google.com/view/kazumunehashimotoupdate/", "dblp": "218/7526;44/2724;193/0660;166/3737", "google_scholar": "https://scholar.google.co.jp/citations?user=iC2b9GUAAAAJ;;TPQUTVsAAAAJ;https://scholar.google.co.jp/citations?user=uk_8zNcAAAAJ", "orcid": ";0000-0001-8473-0593;;", "linkedin": "akifumi-wachi-008654123/?originalSubdomain=jp;;%E8%BF%85-%E6%B2%88-8a3387224/;", "or_profile": "~Akifumi_Wachi2;~Wataru_Hashimoto1;~Xun_Shen1;~Kazumune_Hashimoto1", "aff": "LINE;Osaka University;Osaka University;Osaka University", "aff_domain": "linecorp.com;osaka-u.ac.jp;eei.eng.osaka-u.ac.jp;osaka-u.ac.jp", "position": "Senior Research Scientist;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwachi2023safe,\ntitle={Safe Exploration in Reinforcement Learning: A Generalized Formulation and Algorithms},\nauthor={Akifumi Wachi and Wataru Hashimoto and Xun Shen and Kazumune Hashimoto},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dQLsvKNwZC}\n}", "github": "", "project": "", "reviewers": "Q24U;sDLd;pGdw;zpL1", "pdf_size": 1326042, "rating": "5;5;7;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;1;3;3", "presentation": "3;2;3;2", "wc_summary": "96;121;79;88", "wc_strengths": "46;70;106;56", "wc_weaknesses": "321;318;118;259", "wc_questions": "51;40;51;41", "wc_limitations": "9;13;14;43", "wc_review": "523;562;368;487", "wc_reply_reviewers": "13;25;0;194", "wc_reply_authors": "26;474;0;230", "reply_reviewers": "1;1;0;1", "reply_authors": "2;4;1;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 96.0, 15.636495771111889 ], "wc_strengths_avg": [ 69.5, 22.73213584333861 ], "wc_weaknesses_avg": [ 254.0, 82.31949951256992 ], "wc_questions_avg": [ 45.75, 5.261891294962297 ], "wc_limitations_avg": [ 19.75, 13.5531361684298 ], "wc_review_avg": [ 485.0, 72.57065522647567 ], "wc_reply_reviewers_avg": [ 58.0, 79.01582120056717 ], "wc_reply_authors_avg": [ 182.5, 190.41205318991757 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9819659721634816788&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "linecorp.com;osaka-u.ac.jp;eei.eng.osaka-u.ac.jp;osaka-u.ac.jp", "author_num": 4, "aff_unique_index": "1;1;1", "aff_unique_norm": ";Osaka University", "aff_unique_dep": ";", "aff_unique_url": ";https://www.osaka-u.ac.jp", "aff_unique_abbr": ";Osaka U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1", "aff_country_unique": ";Japan" }, { "title": "NeuralGF: Unsupervised Point Normal Estimation by Learning Neural Gradient Function", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71023", "id": "dR6p49RYLq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d027a5c93d484a4312cc486d399c62c1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dR6p49RYLq", "openreview": "https://openreview.net/forum?id=dR6p49RYLq", "poster": "/media/PosterPDFs/NeurIPS%202023/71023.png?t=1702353701.1182244", "slides": "https://nips.cc/virtual/2023/poster/71023", "video": "https://nips.cc/virtual/2023/poster/71023", "author_site": "Qing Li, Huifang Feng, Kanle Shi, Yue Gao, Yi Fang, Yu-Shen Liu, Zhizhong Han", "tldr": "", "abstract": "Normal estimation for 3D point clouds is a fundamental task in 3D geometry processing. The state-of-the-art methods rely on priors of fitting local surfaces learned from normal supervision. However, normal supervision in benchmarks comes from synthetic shapes and is usually not available from real scans, thereby limiting the learned priors of these methods. In addition, normal orientation consistency across shapes remains difficult to achieve without a separate post-processing procedure. To resolve these issues, we propose a novel method for estimating oriented normals directly from point clouds without using ground truth normals as supervision. We achieve this by introducing a new paradigm for learning neural gradient functions, which encourages the neural network to fit the input point clouds and yield unit-norm gradients at the points. Specifically, we introduce loss functions to facilitate query points to iteratively reach the moving targets and aggregate onto the approximated surface, thereby learning a global surface representation of the data. Meanwhile, we incorporate gradients into the surface approximation to measure the minimum signed deviation of queries, resulting in a consistent gradient field associated with the surface. These techniques lead to our deep unsupervised oriented normal estimator that is robust to noise, outliers and density variations. Our excellent results on widely used benchmarks demonstrate that our method can learn more accurate normals for both unoriented and oriented normal estimation tasks than the latest methods. The source code and pre-trained model are publicly available.", "keywords": "Point Clouds;Normal Estimation;Neural Gradient", "primary_area": "", "supplementary_material": "/attachment/49659ab5e5ee7f380fdfeedfeae0f8156250224b.pdf", "author": "Qing Li;Huifang Feng;Kanle Shi;Yue Gao;Yi Fang;Yu-Shen Liu;Zhizhong Han", "authorids": "~Qing_Li17;~Huifang_Feng1;~Kanle_Shi1;~Yue_Gao4;~Yi_Fang2;~Yu-Shen_Liu1;~Zhizhong_Han2", "gender": ";F;;M;M;M;M", "homepage": ";;;http://www.gaoyue.org;http://mmvc.engineering.nyu.edu/;https://yushen-liu.github.io/;https://h312h.github.io/", "dblp": ";65/5023-2;;33/3099-2;96/361-6;44/2229.html;166/5173", "google_scholar": ";;;UTDfWocAAAAJ;j-cyhzwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-6874-698X;;;;0000-0001-7305-1915;", "linkedin": ";;;;;;", "or_profile": "~Qing_Li17;~Huifang_Feng1;~Kanle_Shi1;~Yue_Gao4;~Yi_Fang2;~Yu-Shen_Liu1;~Zhizhong_Han2", "aff": ";Xiamen University;;Tsinghua University;New York University;Tsinghua University;Wayne State University", "aff_domain": ";xmu.edu.cn;;tsinghua.edu.cn;nyu.edu;tsinghua.edu.cn;wayne.edu", "position": ";PhD student;;Associate Professor;Associate Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2023neuralgf,\ntitle={Neural{GF}: Unsupervised Point Normal Estimation by Learning Neural Gradient Function},\nauthor={Qing Li and Huifang Feng and Kanle Shi and Yue Gao and Yi Fang and Yu-Shen Liu and Zhizhong Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dR6p49RYLq}\n}", "github": "", "project": "", "reviewers": "vh4j;6zu3;1E4M;wJx4;HD1H", "pdf_size": 17293763, "rating": "4;6;6;6;6", "confidence": "4;3;4;4;3", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;2", "presentation": "4;2;2;3;3", "wc_summary": "85;172;93;133;104", "wc_strengths": "42;47;85;72;112", "wc_weaknesses": "47;252;256;259;137", "wc_questions": "87;49;57;204;31", "wc_limitations": "31;8;16;18;15", "wc_review": "292;528;507;686;399", "wc_reply_reviewers": "0;122;121;40;100", "wc_reply_authors": "64;252;19;116;0", "reply_reviewers": "0;2;1;2;1", "reply_authors": "2;4;2;2;1", "rating_avg": [ 5.6, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 117.4, 31.777979797337654 ], "wc_strengths_avg": [ 71.6, 25.663982543634962 ], "wc_weaknesses_avg": [ 190.2, 85.11028140007528 ], "wc_questions_avg": [ 85.6, 61.90185780733887 ], "wc_limitations_avg": [ 17.6, 7.4993333037010705 ], "wc_review_avg": [ 482.4, 132.1704959512523 ], "wc_reply_reviewers_avg": [ 76.6, 48.553475673735235 ], "wc_reply_authors_avg": [ 90.2, 90.2627276343896 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10500465991909954280&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": ";xmu.edu.cn;;tsinghua.edu.cn;nyu.edu;tsinghua.edu.cn;wayne.edu", "author_num": 7, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Xiamen University;Tsinghua University;New York University;Wayne State University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.xmu.edu.cn;https://www.tsinghua.edu.cn;https://www.nyu.edu;https://wayne.edu", "aff_unique_abbr": "XMU;THU;NYU;WSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Slow and Weak Attractor Computation Embedded in Fast and Strong E-I Balanced Neural Dynamics", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71022", "id": "dSRyKIYRnP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/425ee25d6c22ef98b67328273b8f95d5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dSRyKIYRnP", "openreview": "https://openreview.net/forum?id=dSRyKIYRnP", "poster": "/media/PosterPDFs/NeurIPS%202023/71022.png?t=1701849244.9076126", "slides": "https://nips.cc/virtual/2023/poster/71022", "video": "https://nips.cc/virtual/2023/poster/71022", "author_site": "Xiaohan Lin, Liyuan Li, Boxin Shi, Tiejun Huang, Yuanyuan Mi, Si Wu", "tldr": "", "abstract": "Attractor networks require neuronal connections to be highly structured in order to maintain attractor states that represent information, while excitation and inhibition balanced networks (E-INNs) require neuronal connections to be random and sparse to generate irregular neuronal firings. Despite being regarded as canonical models of neural circuits, both types of networks are usually studied in isolation, and it remains unclear how they coexist in the brain, given their very different structural demands. In this study, we investigate the compatibility of continuous attractor neural networks (CANNs) and E-INNs. In line with recent experimental data, we find that a neural circuit can exhibit both the traits of CANNs and E-INNs if the neuronal synapses consist of two sets: one set is strong and fast for irregular firing, and the other set is weak and slow for attractor dynamics. Our results from simulations and theoretical analysis reveal that the network also exhibits enhanced performance compared to the case of using only one set of synapses, with accelerated convergence of attractor states and retained E-I balanced condition for localized input. We also apply the network model to solve a real-world tracking problem and demonstrate that it can track fast-moving objects well. We hope that this study provides insight into how structured neural computations are realized by irregular firings of neurons.", "keywords": "Continuous attractor neural network; Excitation inhibition balance; Brain-inspired algorithms; Object tracking;", "primary_area": "", "supplementary_material": "/attachment/5d551c1ac1d18aa24198ab3fd74b8ea9a093197d.pdf", "author": "Xiaohan Lin;Liyuan Li;Boxin Shi;Tiejun Huang;Yuanyuan Mi;Si Wu", "authorids": "~Xiaohan_Lin1;liyuanli@pku.edu.cn;~Boxin_Shi3;~Tiejun_Huang1;~Yuanyuan_Mi1;~Si_Wu1", "gender": ";;M;M;F;M", "homepage": ";;http://camera.pku.edu.cn;https://idm.pku.edu.cn/~tjhuang/;;https://mgv.pku.edu.cn/english/people/lbd/soeeace/267528.htm", "dblp": ";;69/783;h/TiejunHuang;48/9864;25/437-1", "google_scholar": ";;K1LjZxcAAAAJ;https://scholar.google.com.tw/citations?user=knvEK4AAAAAJ;;", "orcid": ";;0000-0001-6749-0364;0000-0002-4234-6099;0000-0002-4156-5089;", "linkedin": ";;;;;", "or_profile": "~Xiaohan_Lin1;liyuanli@pku.edu.cn;~Boxin_Shi3;~Tiejun_Huang1;~Yuanyuan_Mi1;~Si_Wu1", "aff": ";;Peking University;Peking University;Chongqing University;Peking University", "aff_domain": ";;pku.edu.cn;pku.edu.cn;cqu.edu.cn;pku.edu.cn", "position": ";;Assistant Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nlin2023slow,\ntitle={Slow and Weak Attractor Computation Embedded in Fast and Strong E-I Balanced Neural Dynamics},\nauthor={Xiaohan Lin and Liyuan Li and Boxin Shi and Tiejun Huang and Yuanyuan Mi and Si Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dSRyKIYRnP}\n}", "github": "", "project": "", "reviewers": "Mp8Q;bo6j;FMpH;kktH", "pdf_size": 4939482, "rating": "7;7;7;7", "confidence": "3;3;4;3", "soundness": "4;3;2;3", "novelty": "3;3;3;2", "presentation": "3;2;3;2", "wc_summary": "167;103;73;71", "wc_strengths": "58;39;53;159", "wc_weaknesses": "110;89;237;84", "wc_questions": "144;85;146;2", "wc_limitations": "10;1;4;1", "wc_review": "489;317;513;317", "wc_reply_reviewers": "17;14;30;8", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 103.5, 38.79110722833263 ], "wc_strengths_avg": [ 77.25, 47.70940682926167 ], "wc_weaknesses_avg": [ 130.0, 62.54198589747531 ], "wc_questions_avg": [ 94.25, 58.62753192826729 ], "wc_limitations_avg": [ 4.0, 3.6742346141747673 ], "wc_review_avg": [ 409.0, 92.39047569960877 ], "wc_reply_reviewers_avg": [ 17.25, 8.042853971072706 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16449505756875893847&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";;pku.edu.cn;pku.edu.cn;cqu.edu.cn;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Peking University;Chongqing University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.cqu.edu.cn", "aff_unique_abbr": "Peking U;CQU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Does a sparse ReLU network training problem always admit an optimum ?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71021", "id": "dTj5tH94xv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cdda0657a9f32bc7ddd4343686e7371e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dTj5tH94xv", "openreview": "https://openreview.net/forum?id=dTj5tH94xv", "poster": "/media/PosterPDFs/NeurIPS%202023/71021.png?t=1701436456.2972732", "slides": "https://nips.cc/virtual/2023/poster/71021", "video": "https://nips.cc/virtual/2023/poster/71021", "author_site": "TUNG LE, QUOC-TUNG LE, Remi Gribonval, Elisa Riccietti", "tldr": "", "abstract": "Given a training set, a loss function, and a neural network architecture, it is often taken for granted that optimal network parameters exist, and a common practice is to apply available optimization algorithms to search for them. In this work, we show that the existence of an optimal solution is not always guaranteed, especially in the context of sparse ReLU neural networks.\nIn particular, we first show that optimization problems involving deep networks with certain sparsity patterns do not always have optimal parameters, and that optimization algorithms may then diverge. Via a new topological relation between sparse ReLU neural networks and their linear counterparts, we derive --using existing tools from real algebraic geometry-- an algorithm to verify that a given sparsity pattern suffers from this issue. Then, the existence of a global optimum is proved for every concrete optimization problem involving \na shallow sparse ReLU neural network of output dimension one. Overall, the analysis is based on the investigation of two topological properties of the space of functions implementable as sparse ReLU neural networks: a best approximation property, and a closedness property, both in the uniform norm. This is studied both for (finite) domains corresponding to practical training on finite training sets, and for more general domains such as the unit cube. This allows us to provide conditions for the guaranteed existence of an optimum given a sparsity pattern. The results apply not only to several sparsity patterns proposed in recent works on network pruning/sparsification, but also to classical dense neural networks, including architectures not covered by existing results.", "keywords": "Topology;best approximation property;closedness;function space;sparse neural networks", "primary_area": "", "supplementary_material": "/attachment/2af65d984280707d20af96963f784e33a41f0b6e.zip", "author": "TUNG QUOC LE;R\u00e9mi Gribonval;Elisa Riccietti", "authorids": "~TUNG_QUOC_LE1;~R\u00e9mi_Gribonval1;~Elisa_Riccietti1", "gender": "M;;F", "homepage": "https://tung-qle.github.io/;;http://perso.ens-lyon.fr/elisa.riccietti/", "dblp": "296/4467;;179/5701", "google_scholar": "odYBqHgAAAAJ;;NtPpissAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~TUNG_QUOC_LE1;~R\u00e9mi_Gribonval1;~Elisa_Riccietti1", "aff": "Ecole Normale Sup\u00e9rieure de Lyon;;ENS Lyon", "aff_domain": "ens-lyon.fr;;ens.fr", "position": "PhD student;;Associate Professor", "bibtex": "@inproceedings{\nle2023does,\ntitle={Does a sparse Re{LU} network training problem always admit an optimum ?},\nauthor={TUNG QUOC LE and R{\\'e}mi Gribonval and Elisa Riccietti},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dTj5tH94xv}\n}", "github": "", "project": "", "reviewers": "oxZZ;ay34;7u6K;5AbX", "pdf_size": 887768, "rating": "6;7;7;7", "confidence": "3;4;3;3", "soundness": "4;4;4;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "14;93;63;88", "wc_strengths": "80;131;137;157", "wc_weaknesses": "186;581;448;57", "wc_questions": "2;32;146;32", "wc_limitations": "8;18;13;2", "wc_review": "290;855;807;336", "wc_reply_reviewers": "18;44;45;21", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 64.5, 31.2929704566377 ], "wc_strengths_avg": [ 126.25, 28.38463492807332 ], "wc_weaknesses_avg": [ 318.0, 207.13160067937486 ], "wc_questions_avg": [ 53.0, 55.072679252057455 ], "wc_limitations_avg": [ 10.25, 5.931905258852336 ], "wc_review_avg": [ 572.0, 260.0644150974908 ], "wc_reply_reviewers_avg": [ 32.0, 12.549900398011133 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5824732457004245734&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 13, "email": "ens-lyon.fr;;ens.fr", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Ecole Normale Sup\u00e9rieure de Lyon", "aff_unique_dep": "", "aff_unique_url": "https://www.ens-lyon.fr", "aff_unique_abbr": "ENS de Lyon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "RECKONING: Reasoning through Dynamic Knowledge Encoding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71020", "id": "dUAcAtCuKk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c518f504ad5894ccb264a9890f0f5544-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dUAcAtCuKk", "openreview": "https://openreview.net/forum?id=dUAcAtCuKk", "poster": "/media/PosterPDFs/NeurIPS%202023/71020.png?t=1702237865.746123", "slides": "https://nips.cc/virtual/2023/poster/71020", "video": "https://nips.cc/virtual/2023/poster/71020", "author_site": "Zeming Chen, Gail Weiss, Eric Mitchell, Asli Celikyilmaz, Antoine Bosselut", "tldr": "", "abstract": "Recent studies on transformer-based language models show that they can answer questions by reasoning over knowledge provided as part of the context (i.e., in-context reasoning). However, since the available knowledge is often not filtered for a particular question, in-context reasoning can be sensitive to distractor facts, additional content that is irrelevant to a question but that may be relevant for a different question (i.e., not necessarily random noise). In these situations, the model fails to\ndistinguish the necessary knowledge to answer the question, leading to spurious reasoning and degraded performance. This reasoning failure contrasts with the model\u2019s apparent ability to distinguish its contextual knowledge from all the knowledge it has memorized during pre-training. Following this observation, we propose teaching the model to reason more robustly by folding the provided contextual knowledge into the model\u2019s parameters before presenting it with a question. Our method, RECKONING, is a bi-level learning algorithm that teaches language models to reason by updating their parametric knowledge through back-propagation, allowing them to answer questions using the updated parameters. During training, the inner loop rapidly adapts a copy of the model weights to encode contextual knowledge into its parameters. In the outer loop, the model learns to use the updated weights to reproduce and answer reasoning questions about the memorized knowledge. Our experiments on three diverse multi-hop reasoning datasets show that RECKONING\u2019s performance improves over the in-context reasoning baseline (by up to 4.5%). We also find that compared to in-context reasoning, RECKONING generalizes better to longer reasoning chains unseen during training, is more robust to distractors in the context, and is computationally more efficient when multiple questions are asked about the same knowledge.", "keywords": "natural language processing;multi-hop reasoning;knowledge memorisation", "primary_area": "", "supplementary_material": "/attachment/b422e9bb2f8fcd08ac0edd11ce8c1ad336efdc9a.zip", "author": "Zeming Chen;Gail Weiss;Eric Mitchell;Asli Celikyilmaz;Antoine Bosselut", "authorids": "~Zeming_Chen1;~Gail_Weiss1;~Eric_Mitchell1;~Asli_Celikyilmaz1;~Antoine_Bosselut1", "gender": "M;F;M;F;M", "homepage": "https://eric11eca.github.io/;https://gailweiss.github.io;https://ericmitchell.ai;https://asli.us;https://atcbosselut.github.io/", "dblp": "95/10696;210/2407;238/0419;15/3724;184/3742", "google_scholar": "-gqyv8cAAAAJ;https://scholar.google.co.il/citations?user=qEc_VqcAAAAJ;q77J4fgAAAAJ;https://scholar.google.com/citations?hl=en;XD9hkJwAAAAJ", "orcid": "0000-0002-2389-6968;;0000-0002-7487-1744;;", "linkedin": "zeming-chen-769985123/;;;aslicelikyilmaz/;", "or_profile": "~Zeming_Chen1;~Gail_Weiss1;~Eric_Mitchell1;~Asli_Celikyilmaz1;~Antoine_Bosselut1", "aff": "Swiss Federal Institute of Technology Lausanne (EPFL);EPFL - EPF Lausanne;Stanford University;FAIR ;Swiss Federal Institute of Technology Lausanne", "aff_domain": "epfl.ch;epfl.ch;stanford.edu;meta.com;epfl.ch", "position": "PhD student;Postdoc;PhD student;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nchen2023reckoning,\ntitle={{RECKONING}: Reasoning through Dynamic Knowledge Encoding},\nauthor={Zeming Chen and Gail Weiss and Eric Mitchell and Asli Celikyilmaz and Antoine Bosselut},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dUAcAtCuKk}\n}", "github": "", "project": "", "reviewers": "5k3Z;W7M8;ZqBF;UhBM;ymTg", "pdf_size": 1482694, "rating": "5;6;6;7;7", "confidence": "4;4;4;4;3", "soundness": "2;3;3;3;3", "novelty": "2;2;3;2;3", "presentation": "3;4;3;3;3", "wc_summary": "104;120;154;144;57", "wc_strengths": "61;112;50;55;22", "wc_weaknesses": "102;127;70;87;23", "wc_questions": "77;27;110;66;250", "wc_limitations": "9;4;9;1;1", "wc_review": "353;390;393;353;353", "wc_reply_reviewers": "0;0;31;16;58", "wc_reply_authors": "24;0;20;24;25", "reply_reviewers": "0;0;1;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 115.8, 34.26018096858217 ], "wc_strengths_avg": [ 60.0, 29.23696290656743 ], "wc_weaknesses_avg": [ 81.8, 34.85627633583369 ], "wc_questions_avg": [ 106.0, 76.72548468403441 ], "wc_limitations_avg": [ 4.8, 3.6 ], "wc_review_avg": [ 368.4, 18.8849146145806 ], "wc_reply_reviewers_avg": [ 21.0, 21.79908254950194 ], "wc_reply_authors_avg": [ 18.6, 9.457272334029511 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5345224838248487, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14669372230876414600&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "epfl.ch;epfl.ch;stanford.edu;meta.com;epfl.ch", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL;Stanford University;Meta", "aff_unique_dep": ";;;Facebook AI Research", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch;https://www.stanford.edu;https://research.facebook.com", "aff_unique_abbr": "EPFL;EPFL;Stanford;FAIR", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Lausanne;Stanford;", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "Switzerland;United States" }, { "id": "dUFf0pgkC7", "title": "HHD-Ethiopic: A Historical Handwritten Dataset for Ethiopic OCR with Baseline Models and Human-level Performance", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "This paper introduces HHD-Ethiopic, a new OCR dataset for historical handwritten Ethiopic script, characterized by a unique syllabic writing system, low resource availability, and complex orthographic diacritics. The dataset consists of roughly 80,000 annotated text-line images from 1700 pages of $18^{th}$ to $20^{th}$ century documents, including a training set with text-line images from the $19^{th}$ to $20^{th}$ century and two test sets. One is distributed similarly to the training set with nearly 6,000 text-line images, and the other contains only images from the $18^{th}$ century manuscripts, with around 16,000 images. The former test set allows us to check baseline performance in the classical IID setting (Independently and Identically Distributed), while the latter addresses a more realistic setting in which the test set is drawn from a different distribution than the training set (Out-Of-Distribution or OOD). Multiple annotators labeled all text-line images for the HHD-Ethiopic dataset, and an expert supervisor double-checked them. We assessed human-level recognition performance and compared it with state-of-the-art OCR models using the Character Error Rate (CER) metric. Our results show that the model performed comparably to human-level recognition on the $18^{th}$ century test set and outperformed humans on the IID test set. However, the unique challenges posed by the Ethiopic script, such as detecting complex diacritics, still present difficulties for the models. Our baseline evaluation and HHD-Ethiopic dataset will stimulate further research on tailored OCR techniques for the Ethiopic script. The HHD-Ethiopic dataset and the code are publicly available at https://github.com/bdu-birhanu/HHD-Ethiopic", "keywords": "HHD-Ethiopic dataset;Historical Ethiopic script;Human-level recognition performance;HHD-Ethiopic dataset;Character error rate;low-resource script recognition", "primary_area": "", "supplementary_material": "/attachment/3209faf65371c13e41ef829ce75108cc93ed485a.pdf", "author": "Birhanu Hailu Belay;Isabelle Guyon;Tadele Mengiste;Bezawork Tilahun;Marcus Liwicki;Tesfa Tegegne;Romain Egele;Tsiyon Worku", "authorids": "~Birhanu_Hailu_Belay1;~Isabelle_Guyon1;tadele.mengiste@bdu.edu.et;bezawork.tilahun@bdu.edu.et;~Marcus_Liwicki1;tesfa.tegegne@bdu.edu.et;~Romain_Egele1;tsiyon.worku@bdu.edu.et", "gender": "M;F;;;Unspecified;;;", "homepage": "https://sites.google.com/view/birhanu-belay/home;;;;https://www.ltu.se/staff/m/marliw?l=en;;http://deathn0t.github.io;", "dblp": "254/8159;31/6176;;;;;248/7760;", "google_scholar": "0ICZbqcAAAAJ;;;;https://scholar.google.ch/citations?user=n1Y4zq4AAAAJ;;K4yYtFwAAAAJ;", "orcid": ";;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Birhanu_Hailu_Belay1;~Isabelle_Guyon1;tadele.mengiste@bdu.edu.et;bezawork.tilahun@bdu.edu.et;~Marcus_Liwicki1;tesfa.tegegne@bdu.edu.et;~Romain_Egele1;tsiyon.worku@bdu.edu.et", "aff": "Universite of Paris-saclay;Universit\u00e9 Paris-Saclay;;;Lule\u00e5 University of Technology;;Argonne National Laboratory;", "aff_domain": "universite-paris-saclay.fr;universite-paris-saclay.fr;;;ltu.se;;anl.gov;", "position": "Postdoc;Full Professor;;;Full Professor;;PhD student;", "bibtex": "@misc{\nbelay2023hhdethiopic,\ntitle={{HHD}-Ethiopic: A Historical Handwritten Dataset for Ethiopic {OCR} with Baseline Models and Human-level Performance},\nauthor={Birhanu Hailu Belay and Isabelle Guyon and Tadele Mengiste and Bezawork Tilahun and Marcus Liwicki and Tesfa Tegegne and Romain Egele and Tsiyon Worku},\nyear={2023},\nurl={https://openreview.net/forum?id=dUFf0pgkC7}\n}", "github": "", "project": "", "reviewers": "uxiq;yVsL;cn6S;BcAH", "site": "https://openreview.net/forum?id=dUFf0pgkC7", "pdf_size": 5400835, "rating": "5;5;6;7", "confidence": "3;5;3;4", "wc_summary_and_contributions": "79;50;81;39", "wc_strengths": "50;25;40;28", "wc_improvement": "119;509;34;114", "wc_limitations": "94;1;232;5", "wc_correctness": "73;1;25;16", "wc_clarity": "17;1;5;4", "wc_relation_to_prior_work": "13;1;10;1", "wc_documentation": "24;1;18;17", "wc_additional_feedback": "1;1;1;1", "wc_review": "470;590;446;225", "wc_reply_reviewers": "25;71;0;20", "wc_reply_authors": "758;646;552;418", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 62.25, 18.18481509391833 ], "wc_strengths_avg": [ 35.75, 9.959292143521045 ], "wc_improvement_avg": [ 194.0, 184.96621313093914 ], "wc_limitations_avg": [ 83.0, 93.71499346422641 ], "wc_correctness_avg": [ 28.75, 26.947866334832522 ], "wc_clarity_avg": [ 6.75, 6.098155458825234 ], "wc_relation_to_prior_work_avg": [ 6.25, 5.356071321407137 ], "wc_documentation_avg": [ 15.0, 8.514693182963201 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 432.75, 131.76755101313827 ], "wc_reply_reviewers_avg": [ 29.0, 25.990382836734053 ], "wc_reply_authors_avg": [ 593.5, 124.83889618223961 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=944030300266927705&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Universit\u00e9 de Paris-Saclay;Universit\u00e9 Paris-Saclay;Lule\u00e5 University of Technology;Argonne National Laboratory", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.universite-paris-saclay.fr;https://www.universite-paris-saclay.fr;https://www.ltu.se;https://www.anl.gov", "aff_unique_abbr": "Paris-Saclay;UPSaclay;LTU;ANL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "France;Sweden;United States" }, { "title": "DataComp: In search of the next generation of multimodal datasets", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73525", "id": "dVaWCDMBof", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/56332d41d55ad7ad8024aac625881be7-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=dVaWCDMBof", "openreview": "https://openreview.net/forum?id=dVaWCDMBof", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73525", "video": "https://nips.cc/virtual/2023/poster/73525", "author_site": "Samir Yitzhak Gadre, Gabriel Ilharco, Alex Fang, Jonathan Hayase, Georgios Smyrnis, Thao Nguyen, Ryan Marten, Mitchell Wortsman, Dhruba Ghosh, Jieyu Zhang, Eyal Orgad, Rahim Entezari, Giannis Daras, Sarah Pratt, Vivek Ramanujan, Yonatan Bitton, Kalyani Marathe, Stephen Mussmann, Richard Vencu, Mehdi Cherti, Ranjay Krishna, Pang Wei Koh, Olga Saukh, Alexander Ratner, Shuran Song, Hannaneh Hajishirzi, Ali Farhadi, Romain Beaumont, Sewoong Oh, Alex Dimakis, Jenia Jitsev, Yair Carmon, Vaishaal Shankar, Ludwig Schmidt", "tldr": "", "abstract": "Multimodal datasets are a critical component in recent breakthroughs such as CLIP, Stable Diffusion and GPT-4, yet their design does not receive the same research attention as model architectures or training algorithms. To address this shortcoming in the machine learning ecosystem, we introduce DataComp, a testbed for dataset experiments centered around a new candidate pool of 12.8 billion image-text pairs from Common Crawl. Participants in our benchmark design new filtering techniques or curate new data sources and then evaluate their new dataset by running our standardized CLIP training code and testing the resulting model on 38 downstream test sets. Our benchmark consists of multiple compute scales spanning four orders of magnitude, which enables the study of scaling trends and makes the benchmark accessible to researchers with varying resources. Our baseline experiments show that the DataComp workflow leads to better training sets. Our best baseline, DataComp-1B, enables training a CLIP ViT-L/14 from scratch to 79.2% zero-shot accuracy on ImageNet, outperforming OpenAI's CLIP ViT-L/14 by 3.7 percentage points while using the same training procedure and compute. We release \\datanet and all accompanying code at www.datacomp.ai.", "keywords": "CLIP;zero-shot;data curation;vision-and-language;datasets;pre-training;benchmark", "primary_area": "", "supplementary_material": "/attachment/636261a2aa4e37b7c700544b279c7c9a82a89802.pdf", "author": "Samir Yitzhak Gadre;Gabriel Ilharco;Alex Fang;Jonathan Hayase;Georgios Smyrnis;Thao Nguyen;Ryan Marten;Mitchell Wortsman;Dhruba Ghosh;Jieyu Zhang;Eyal Orgad;Rahim Entezari;Giannis Daras;Sarah M Pratt;Vivek Ramanujan;Yonatan Bitton;Kalyani Marathe;Stephen Mussmann;Richard Vencu;Mehdi Cherti;Ranjay Krishna;Pang Wei Koh;Olga Saukh;Alexander Ratner;Shuran Song;Hannaneh Hajishirzi;Ali Farhadi;Romain Beaumont;Sewoong Oh;Alex Dimakis;Jenia Jitsev;Yair Carmon;Vaishaal Shankar;Ludwig Schmidt", "authorids": "~Samir_Yitzhak_Gadre1;~Gabriel_Ilharco1;~Alex_Fang1;~Jonathan_Hayase2;~Georgios_Smyrnis1;~Thao_Nguyen3;~Ryan_Marten1;~Mitchell_Wortsman1;~Dhruba_Ghosh1;~Jieyu_Zhang1;~Eyal_Orgad1;~Rahim_Entezari1;~Giannis_Daras1;~Sarah_M_Pratt1;~Vivek_Ramanujan1;~Yonatan_Bitton1;~Kalyani_Marathe1;~Stephen_Mussmann1;~Richard_Vencu1;~Mehdi_Cherti2;~Ranjay_Krishna1;~Pang_Wei_Koh1;~Olga_Saukh1;~Alexander_Ratner1;~Shuran_Song3;~Hannaneh_Hajishirzi1;~Ali_Farhadi3;~Romain_Beaumont1;~Sewoong_Oh1;~Alex_Dimakis1;~Jenia_Jitsev1;~Yair_Carmon1;~Vaishaal_Shankar1;~Ludwig_Schmidt1", "gender": "M;M;;M;M;F;;M;;M;M;M;M;F;M;M;F;M;M;M;M;F;M;F;F;M;M;M;M;;M;M;M;M", "homepage": "https://sagadre.github.io/;http://gabrielilharco.com/;;https://jhayase.github.io/;;https://thaonguyen19.github.io/;https://ryanmarten.com;https://mitchellnw.github.io/;https://djghosh13.github.io/;https://jieyuz2.github.io/;;http://rahimentezari.github.io;https://giannisdaras.github.io/;;https://vkramanuj.github.io;https://yonatanbitton.github.io/;https://kalyani7195.github.io/;https://steve.mussmann.us;;http://ranjaykrishna.com;http://cs.stanford.edu/~pangwei;http://www.olgasaukh.com;https://ajratner.github.io/;https://shurans.github.io/;https://homes.cs.washington.edu/~hannaneh/;https://homes.cs.washington.edu/~ali/;;https://homes.cs.washington.edu/~sewoong/;https://people.eecs.berkeley.edu/~alexdimakis/;;https://www.cs.tau.ac.il/~ycarmon/;http://people.csail.mit.edu/ludwigs/;;http://vaishaal.com", "dblp": "246/7901;249/2616;260/0449;244/9599;255/9114;77/2922;;232/2273;292/8318;;;193/7037.html;254/2703;;225/4845;277/7042;;155/8913;;167/3785;10/10453;37/2725;180/5513;;52/1296;37/5826;;80/4366;19/5000.html;53/5156;13/558;141/2720;;159/3628", "google_scholar": "oAhlg9gAAAAJ;https://scholar.google.com/citations?hl=en;;Zw-l1d8AAAAJ;;DvJG-_8AAAAJ;76LRPYuZDwkC;fzRnjFgAAAAJ;lHuZ55oAAAAJ;T_INUHUAAAAJ;;CmTeX7kAAAAJ;LaScvbQAAAAJ;;yXFPyNMAAAAJ;P9Fpf4sAAAAJ;gCxlvdcAAAAJ;oGah6EgAAAAJ;;IcqahyAAAAAJ;Nn990CkAAAAJ;https://scholar.google.ch/citations?user=f-MDKlYAAAAJ;rfwwtFYAAAAJ;https://scholar.google.com/citations?hl=en;LOV6_WIAAAAJ;jeOFRDsAAAAJ;jhtyYGAAAAAJ;55TAOdgAAAAJ;JSFmVQEAAAAJ;https://scholar.google.com/citations?hl=en;kTKmpT0AAAAJ;SWMKy70AAAAJ;https://scholar.google.fr/citations?user=JgOyYi8AAAAJ;", "orcid": ";;;0000-0002-3757-6586;;;;;0000-0002-8518-2696;0000-0002-1846-2436;;;;;;;;;;0000-0001-8784-2531;;0000-0001-7849-3368;;;;;;;;0000-0002-1221-7851;;;;", "linkedin": ";;alex-fang-8a11a8115/;jonathan-hayase-5ab849128;;;ryan-marten/;;dhruba-ghosh-b82467170/;jieyu-zhang-3baaa8154/;eyal-orgad-64353a153/;;;sarahpratt;;yonatanbitton/;;stephen-mussmann-5516b480/;richardvencu;ranjay-krishna-1a344444/;;saukh/;alexander-ratner-038ba239/;;;;;;alex-dimakis-b1b20320/;;;ludwig-schmidt-87ba3612/;mehdi-cherti;", "or_profile": "~Samir_Yitzhak_Gadre1;~Gabriel_Ilharco1;~Alex_Fang1;~Jonathan_Hayase2;~Georgios_Smyrnis1;~Thao_Nguyen3;~Ryan_Marten1;~Mitchell_Wortsman1;~Dhruba_Ghosh1;~Jieyu_Zhang1;~Eyal_Orgad1;~Rahim_Entezari1;~Giannis_Daras1;~Sarah_M_Pratt1;~Vivek_Ramanujan1;~Yonatan_Bitton1;~Kalyani_Marathe1;~Stephen_Mussmann1;~Richard_Vencu1;~Ranjay_Krishna1;~Pang_Wei_Koh1;~Olga_Saukh1;~Alexander_Ratner1;~Shuran_Song3;~Hannaneh_Hajishirzi1;~Ali_Farhadi3;~Romain_Beaumont1;~Sewoong_Oh1;~Alex_Dimakis1;~Jenia_Jitsev1;~Yair_Carmon1;~Ludwig_Schmidt1;~mehdi_cherti1;~vaishaal_naanny_shankar1", "aff": "Columbia University;Department of Computer Science, University of Washington;Department of Computer Science, University of Washington;University of Washington;University of Texas, Austin;Meta;University of Illinois Urbana-Champaign;Google;University of Washington;University of Washington;Tel Aviv University;Complexity Science Hub;University of Texas, Austin;University of Washington;Apple;Hebrew University of Jerusalem;University of Washington, Seattle;University of Washington;;University of Washington;Google;Complexity Science Hub;Department of Computer Science, University of Washington;Columbia University;University of Washington;University of Washington;Google;University of Washington;University of Texas at Austin;Juelich Supercomputing Center, Research Center Juelich;Tel Aviv University;Allen Institute for Artificial Intelligence;Forschungszentrum J\u00fclich;Apple", "aff_domain": "columbia.edu;cs.washington.edu;cs.washington.edu;washington.edu;utexas.edu;meta.com;illinois.edu;google.com;uw.edu;cs.washington.edu;tau.ac.il;csh.ac.at;utexas.edu;uw.edu;apple.com;huji.ac.il;uw.edu;washington.edu;;cs.washington.edu;google.com;csh.ac.at;cs.washington.edu;cs.columbia.edu;uw.edu;cs.uw.edu;google.com;uw.edu;utexas.edu;fz-juelich.de;tau.ac.il;allenai.org;fz-juelich.de;apple.com", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Visiting Researcher;MS student;Intern;PhD student;PhD student;MS student;Resident PhD;PhD student;PhD student;Intern;PhD student;PhD student;Postdoc;;Assistant Professor;Researcher;Research Group Leader;Assistant Professor;Assistant Professor;Associate Professor;Full Professor;Software engineer;Associate Professor;Full Professor;Senior Scientist;Assistant Professor;Researcher;Postdoc;Researcher", "bibtex": "@inproceedings{\ngadre2023datacomp,\ntitle={DataComp: In search of the next generation of multimodal datasets},\nauthor={Samir Yitzhak Gadre and Gabriel Ilharco and Alex Fang and Jonathan Hayase and Georgios Smyrnis and Thao Nguyen and Ryan Marten and Mitchell Wortsman and Dhruba Ghosh and Jieyu Zhang and Eyal Orgad and Rahim Entezari and Giannis Daras and Sarah M Pratt and Vivek Ramanujan and Yonatan Bitton and Kalyani Marathe and Stephen Mussmann and Richard Vencu and Mehdi Cherti and Ranjay Krishna and Pang Wei Koh and Olga Saukh and Alexander Ratner and Shuran Song and Hannaneh Hajishirzi and Ali Farhadi and Romain Beaumont and Sewoong Oh and Alex Dimakis and Jenia Jitsev and Yair Carmon and Vaishaal Shankar and Ludwig Schmidt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=dVaWCDMBof}\n}", "github": "", "project": "", "reviewers": "TaGm;GJCx;N9Hd;pyn4", "pdf_size": 1053321, "rating": "8;9;9;9", "confidence": "3;4;4;5", "wc_summary_and_contributions": "79;89;225;222", "wc_strengths": "31;37;307;73", "wc_improvement": "29;44;92;97", "wc_limitations": "12;72;11;14", "wc_correctness": "2;9;67;17", "wc_clarity": "1;8;9;5", "wc_relation_to_prior_work": "2;1;7;10", "wc_documentation": "24;1;48;9", "wc_additional_feedback": "1;1;1;1", "wc_review": "181;262;767;448", "wc_reply_reviewers": "70;15;44;0", "wc_reply_authors": "166;459;1200;825", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;2;3", "rating_avg": [ 8.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 153.75, 69.84760196313113 ], "wc_strengths_avg": [ 112.0, 113.7233485261492 ], "wc_improvement_avg": [ 65.5, 29.53387885124472 ], "wc_limitations_avg": [ 27.25, 25.85899263312475 ], "wc_correctness_avg": [ 23.75, 25.52817071393875 ], "wc_clarity_avg": [ 5.75, 3.112474899497183 ], "wc_relation_to_prior_work_avg": [ 5.0, 3.6742346141747673 ], "wc_documentation_avg": [ 20.5, 17.89553016817328 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 414.5, 225.36470442374068 ], "wc_reply_reviewers_avg": [ 32.25, 26.929305598176867 ], "wc_reply_authors_avg": [ 662.5, 388.3416665772551 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 34, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 438, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17915054761876319494&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "email": "columbia.edu;cs.washington.edu;cs.washington.edu;washington.edu;utexas.edu;meta.com;illinois.edu;google.com;uw.edu;cs.washington.edu;tau.ac.il;csh.ac.at;utexas.edu;uw.edu;apple.com;huji.ac.il;uw.edu;washington.edu;;cs.washington.edu;google.com;csh.ac.at;cs.washington.edu;cs.columbia.edu;uw.edu;cs.uw.edu;google.com;uw.edu;utexas.edu;fz-juelich.de;tau.ac.il;allenai.org;fz-juelich.de;apple.com", "author_num": 34, "aff_unique_index": "0;1;1;1;2;3;4;5;1;1;6;7;2;1;8;9;1;1;1;5;7;1;0;1;1;5;1;2;10;6;11;12;8", "aff_unique_norm": "Columbia University;University of Washington;University of Texas at Austin;Meta;University of Illinois Urbana-Champaign;Google;Tel Aviv University;Complexity Science Hub;Apple;Hebrew University of Jerusalem;Research Center Juelich;Allen Institute for Artificial Intelligence;Forschungszentrum J\u00fclich", "aff_unique_dep": ";Department of Computer Science;;Meta Platforms, Inc.;;Google;;;Apple Inc.;;Juelich Supercomputing Center;;", "aff_unique_url": "https://www.columbia.edu;https://www.washington.edu;https://www.utexas.edu;https://meta.com;https://illinois.edu;https://www.google.com;https://www.tau.ac.il;;https://www.apple.com;https://www.huji.ac.il;https://www.fz-juelich.de/;https://allenai.org;https://www.fz-juelich.de", "aff_unique_abbr": "Columbia;UW;UT Austin;Meta;UIUC;Google;TAU;;Apple;HUJI;FZ J\u00fclich;AI2;FZJ", "aff_campus_unique_index": "1;1;2;3;4;2;5;1;4;1;4;2", "aff_campus_unique": ";Seattle;Austin;Urbana-Champaign;Mountain View;Jerusalem", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;1;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;3;1;0;3;0", "aff_country_unique": "United States;Israel;;Germany" }, { "title": "Human-like Few-Shot Learning via Bayesian Reasoning over Natural Language", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71019", "id": "dVnhdm9MIg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2aa9b18b9ab37b0ab1fdaae46fb781d4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dVnhdm9MIg", "openreview": "https://openreview.net/forum?id=dVnhdm9MIg", "poster": "/media/PosterPDFs/NeurIPS%202023/71019.png?t=1701458184.5735202", "slides": "https://nips.cc/virtual/2023/poster/71019", "video": "https://nips.cc/virtual/2023/poster/71019", "tldr": "", "abstract": "A core tension in models of concept learning is that the model must carefully balance the tractability of inference against the expressivity of the hypothesis class. Humans, however, can efficiently learn a broad range of concepts. \nWe introduce a model of inductive learning that seeks to be human-like in that sense.\nIt implements a Bayesian reasoning process where a language model first proposes candidate hypotheses expressed in natural language, which are then re-weighed by a prior and a likelihood.\nBy estimating the prior from human data, we can predict human judgments on learning problems involving numbers and sets, spanning concepts that are generative, discriminative, propositional, and higher-order.", "keywords": "Cognitive science;Bayesian;Language model;Induction;Psychology;Reasoning", "primary_area": "", "supplementary_material": "/attachment/580e66d8c96f01560bcf82f7044ad62b0065aeda.pdf", "author": "Kevin Ellis", "authorids": "~Kevin_Ellis1", "gender": "M", "homepage": "https://www.cs.cornell.edu/~ellisk/", "dblp": "", "google_scholar": "L7XI6asAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Kevin_Ellis1", "aff": "Cornell University", "aff_domain": "cornell.edu", "position": "Assistant Professor", "bibtex": "@inproceedings{\nellis2023humanlike,\ntitle={Human-like Few-Shot Learning via Bayesian Reasoning over Natural Language},\nauthor={Kevin Ellis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dVnhdm9MIg}\n}", "github": "", "project": "", "reviewers": "8GEn;y5Ba;qc38;ce9a", "pdf_size": 949431, "rating": "7;7;8;10", "confidence": "4;4;3;4", "soundness": "4;3;3;4", "novelty": "4;3;4;4", "presentation": "3;3;4;4", "wc_summary": "80;169;183;170", "wc_strengths": "63;157;107;192", "wc_weaknesses": "40;390;173;363", "wc_questions": "176;369;121;17", "wc_limitations": "6;63;25;11", "wc_review": "365;1148;609;753", "wc_reply_reviewers": "102;626;41;32", "wc_reply_authors": "49;730;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 8.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 150.5, 41.07614879708174 ], "wc_strengths_avg": [ 129.75, 48.96618731328793 ], "wc_weaknesses_avg": [ 241.5, 143.2733401578954 ], "wc_questions_avg": [ 170.75, 127.91085763139891 ], "wc_limitations_avg": [ 26.25, 22.331312097590683 ], "wc_review_avg": [ 718.75, 283.9950483723264 ], "wc_reply_reviewers_avg": [ 200.25, 247.2775515488618 ], "wc_reply_authors_avg": [ 194.75, 309.6735175955477 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14645190119297667101&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cornell.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Train Faster, Perform Better: Modular Adaptive Training in Over-Parameterized Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71018", "id": "dWDEBW2raJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/516fd05dc408fd6d6374940a83930193-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dWDEBW2raJ", "openreview": "https://openreview.net/forum?id=dWDEBW2raJ", "poster": "/media/PosterPDFs/NeurIPS%202023/71018.png?t=1701779373.9414454", "slides": "https://nips.cc/virtual/2023/poster/71018", "video": "https://nips.cc/virtual/2023/poster/71018", "author_site": "Yubin Shi, Yixuan Chen, Mingzhi Dong, Xiaochen Yang, Dongsheng Li, Yujiang Wang, Robert Dick, Qin Lv, Yingying Zhao, Fan Yang, Tun Lu, Ning Gu, Li Shang", "tldr": "", "abstract": "Despite their prevalence in deep-learning communities, over-parameterized models convey high demands of computational costs for proper training. This work studies the fine-grained, modular-level learning dynamics of over-parameterized models to attain a more efficient and fruitful training strategy. Empirical evidence reveals that when scaling down into network modules, such as heads in self-attention models, we can observe varying learning patterns implicitly associated with each module's trainability. To describe such modular-level learning capabilities, we introduce a novel concept dubbed modular neural tangent kernel (mNTK), and we demonstrate that the quality of a module's learning is tightly associated with its mNTK's principal eigenvalue $\\lambda_{\\max}$. A large $\\lambda_{\\max}$ indicates that the module learns features with better convergence, while those miniature ones may impact generalization negatively. Inspired by the discovery, we propose a novel training strategy termed Modular Adaptive Training (MAT) to update those modules with their $\\lambda_{\\max}$ exceeding a dynamic threshold selectively, concentrating the model on learning common features and ignoring those inconsistent ones. Unlike most existing training schemes with a complete BP cycle across all network modules, MAT can significantly save computations by its partially-updating strategy and can further improve performance. Experiments show that MAT nearly halves the computational cost of model training and outperforms the accuracy of baselines.", "keywords": "Modular Adaptive Training;Efficient Training;Over-parameterized Model;Neural Tangent Kernel.", "primary_area": "", "supplementary_material": "/attachment/af01d75a832c060a8a17915c1481a6e58f165c6d.pdf", "author": "Yubin Shi;Yixuan Chen;Mingzhi Dong;Xiaochen Yang;Dongsheng Li;Yujiang Wang;Robert P. Dick;Qin Lv;Yingying Zhao;Fan Yang;Tun Lu;Ning Gu;Li Shang", "authorids": "~Yubin_Shi1;~Yixuan_Chen1;~Mingzhi_Dong1;~Xiaochen_Yang2;~Dongsheng_Li2;~Yujiang_Wang1;~Robert_P._Dick1;~Qin_Lv1;~Yingying_Zhao1;~Fan_Yang31;~Tun_Lu1;~Ning_Gu2;~Li_Shang3", "gender": "M;F;M;;M;M;M;F;F;M;M;M;", "homepage": ";;;;http://recmind.cn;;http://robertdick.org/;https://home.cs.colorado.edu/~lv/;;https://ephonic.github.io;;https://cscw.fudan.edu.cn/;https://cscw.fudan.edu.cn/lishang/list.htm", "dblp": "221/2003;30/7103-3;118/4806;;254/0830-2.html;125/0429-1;84/523.html;11/808;;;41/2472;;", "google_scholar": "IyLkK_kAAAAJ;cmdWHrIAAAAJ;;;VNg5rA8AAAAJ;https://scholar.google.co.uk/citations?user=3xxDPJUAAAAJ;;dTkWR0MAAAAJ;;;;https://scholar.google.com.au/citations?user=AUnPpaUAAAAJ;AnBUn0QAAAAJ", "orcid": ";;;;0000-0003-3103-8442;;;0000-0002-9437-1376;0000-0001-5902-1306;;0000-0002-6633-4826;0000-0002-2915-974X;", "linkedin": ";;;;;;;;;;;;", "or_profile": "~Yubin_Shi1;~Yixuan_Chen1;~Mingzhi_Dong1;~Xiaochen_Yang2;~Dongsheng_Li2;~Yujiang_Wang1;~Robert_P._Dick1;~Qin_Lv1;~Yingying_Zhao1;~Fan_Yang31;~Tun_Lu1;~Ning_Gu2;~Li_Shang3", "aff": "Fudan University;Fudan University;Fudan University;;Microsoft Research Asia;University of Oxford;University of Michigan;University of Colorado at Boulder;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu;fudan.edu.cn;;microsoft.com;ox.ac.uk;umich.edu;colorado.edu;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "MS student;PhD student;Postdoc;;Principal Researcher;Postdoc;Full Professor;Full Professor;Postdoc;Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nshi2023train,\ntitle={Train Faster, Perform Better: Modular Adaptive Training in Over-Parameterized Models},\nauthor={Yubin Shi and Yixuan Chen and Mingzhi Dong and Xiaochen Yang and Dongsheng Li and Yujiang Wang and Robert P. Dick and Qin Lv and Yingying Zhao and Fan Yang and Tun Lu and Ning Gu and Li Shang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dWDEBW2raJ}\n}", "github": "", "project": "", "reviewers": "TyiH;pEnu;1e7h;afqQ", "pdf_size": 1949003, "rating": "4;6;7;7", "confidence": "4;2;4;3", "soundness": "2;3;3;3", "novelty": "2;3;4;3", "presentation": "3;4;4;4", "wc_summary": "141;189;190;61", "wc_strengths": "20;30;29;34", "wc_weaknesses": "185;152;159;2", "wc_questions": "71;36;87;263", "wc_limitations": "17;1;1;1", "wc_review": "434;408;466;361", "wc_reply_reviewers": "0;32;16;31", "wc_reply_authors": "209;276;12;221", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;2;3", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 145.25, 52.518449139326265 ], "wc_strengths_avg": [ 28.25, 5.11737237261468 ], "wc_weaknesses_avg": [ 124.5, 71.78614072368008 ], "wc_questions_avg": [ 114.25, 87.83898621910433 ], "wc_limitations_avg": [ 5.0, 6.928203230275509 ], "wc_review_avg": [ 417.25, 38.42769183804825 ], "wc_reply_reviewers_avg": [ 19.75, 13.045593125649749 ], "wc_reply_authors_avg": [ 179.5, 99.95123811139109 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.24618298195866545, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5133763054008165178&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "fudan.edu.cn;fudan.edu;fudan.edu.cn;;microsoft.com;ox.ac.uk;umich.edu;colorado.edu;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 13, "aff_unique_index": "0;0;0;1;2;3;4;0;0;0;0;0", "aff_unique_norm": "Fudan University;Microsoft;University of Oxford;University of Michigan;University of Colorado", "aff_unique_dep": ";Research;;;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.microsoft.com/en-us/research/group/asia;https://www.ox.ac.uk;https://www.umich.edu;https://www.colorado.edu", "aff_unique_abbr": "Fudan;MSR Asia;Oxford;UM;CU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Asia;Boulder", "aff_country_unique_index": "0;0;0;0;1;2;2;0;0;0;0;0", "aff_country_unique": "China;United Kingdom;United States" }, { "title": "Self-Correcting Bayesian Optimization through Bayesian Active Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71017", "id": "dX9MjUtP1A", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fa55bf1947530fc9567059ff42a806c2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dX9MjUtP1A", "openreview": "https://openreview.net/forum?id=dX9MjUtP1A", "poster": "/media/PosterPDFs/NeurIPS%202023/71017.png?t=1702064965.778947", "slides": "https://nips.cc/virtual/2023/poster/71017", "video": "https://nips.cc/virtual/2023/poster/71017", "author_site": "Carl Hvarfner, Erik Hellsten, Frank Hutter, Luigi Nardi", "tldr": "", "abstract": "Gaussian processes are the model of choice in Bayesian optimization and active learning. Yet, they are highly dependent on cleverly chosen hyperparameters to reach their full potential, and little effort is devoted to finding good hyperparameters in the literature. We demonstrate the impact of selecting good hyperparameters for GPs and present two acquisition functions that explicitly prioritize hyperparameter learning. Statistical distance-based Active Learning (SAL) considers the average disagreement between samples from the posterior, as measured by a statistical distance. SAL outperforms the state-of-the-art in Bayesian active learning on several test functions. We then introduce Self-Correcting Bayesian Optimization (SCoreBO), which extends SAL to perform Bayesian optimization and active learning simultaneously. SCoreBO learns the model hyperparameters at improved rates compared to vanilla BO, while outperforming the latest Bayesian optimization methods on traditional benchmarks. Moreover, we demonstrate the importance of self-correction on atypical Bayesian optimization tasks.", "keywords": "Bayesian Optimization;Bayesian Active Learning;Gaussian Processes", "primary_area": "", "supplementary_material": "", "author": "Carl Hvarfner;Erik Orm Hellsten;Frank Hutter;Luigi Nardi", "authorids": "~Carl_Hvarfner1;~Erik_Orm_Hellsten1;~Frank_Hutter1;~Luigi_Nardi1", "gender": "M;M;M;M", "homepage": "https://portal.research.lu.se/portal/sv/persons/carl-hvarfner(cd140b82-9fed-4e88-868e-1cf569dcbeb7).html;;http://ml.informatik.uni-freiburg.de/~hutter/;", "dblp": "319/3033;;89/5383;60/7206", "google_scholar": "https://scholar.google.se/citations?hl=en;https://scholar.google.se/citations?user=mK5N-xQAAAAJ;https://scholar.google.de/citations?user=YUrxwrkAAAAJ;https://scholar.google.it/citations?user=Kgs3zQoAAAAJ", "orcid": ";;0000-0002-2037-3694;0000-0002-4601-2264", "linkedin": "carl-hvarfner-a97421153/;;frank-hutter-9190b24b/;nardiluigi/", "or_profile": "~Carl_Hvarfner1;~Erik_Orm_Hellsten1;~Frank_Hutter1;~Luigi_Nardi1", "aff": "Lund University;Lund University;Albert-Ludwigs-Universit\u00e4t Freiburg;Stanford University", "aff_domain": "lu.se;lu.se;uni-freiburg.de;stanford.edu", "position": "PhD student;Postdoc;Full Professor;Researcher", "bibtex": "@inproceedings{\nhvarfner2023selfcorrecting,\ntitle={Self-Correcting Bayesian Optimization through Bayesian Active Learning},\nauthor={Carl Hvarfner and Erik Orm Hellsten and Frank Hutter and Luigi Nardi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dX9MjUtP1A}\n}", "github": "", "project": "", "reviewers": "BCnP;LR3N;fp2C;ta8V", "pdf_size": 6348549, "rating": "5;6;6;6", "confidence": "3;4;3;4", "soundness": "3;2;3;2", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "54;73;104;124", "wc_strengths": "10;39;62;209", "wc_weaknesses": "23;421;260;127", "wc_questions": "689;67;4;63", "wc_limitations": "8;7;4;22", "wc_review": "784;607;434;545", "wc_reply_reviewers": "121;443;147;50", "wc_reply_authors": "783;1847;0;45", "reply_reviewers": "2;2;1;1", "reply_authors": "3;7;1;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.75, 27.068200900687877 ], "wc_strengths_avg": [ 80.0, 76.72352963726317 ], "wc_weaknesses_avg": [ 207.75, 149.04592413078595 ], "wc_questions_avg": [ 205.75, 280.1172745476437 ], "wc_limitations_avg": [ 10.25, 6.94172168845741 ], "wc_review_avg": [ 592.5, 126.7487672523879 ], "wc_reply_reviewers_avg": [ 190.25, 150.18218103357003 ], "wc_reply_authors_avg": [ 668.75, 747.9332774385693 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.25, 2.277608394786075 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4788843497033731223&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 6, "email": "lu.se;lu.se;uni-freiburg.de;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Lund University;Albert-Ludwigs-Universit\u00e4t Freiburg;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.lunduniversity.lu.se;https://www.uni-freiburg.de;https://www.stanford.edu", "aff_unique_abbr": "LU;Albert-Ludwigs-Universit\u00e4t;Stanford", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Freiburg;Stanford", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "Sweden;Germany;United States" }, { "title": "Causal Discovery in Semi-Stationary Time Series", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71016", "id": "dYeUvLUxBQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/91f9fb16b5679115a777ade51af87e48-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dYeUvLUxBQ", "openreview": "https://openreview.net/forum?id=dYeUvLUxBQ", "poster": "/media/PosterPDFs/NeurIPS%202023/71016.png?t=1702175790.4378433", "slides": "https://nips.cc/virtual/2023/poster/71016", "video": "https://nips.cc/virtual/2023/poster/71016", "author_site": "Shanyun Gao, Raghavendra Addanki, Tong Yu, Ryan Rossi, Murat Kocaoglu", "tldr": "", "abstract": "Discovering causal relations from observational time series without making the stationary assumption is a significant challenge. In practice, this challenge is common in many areas, such as retail sales, transportation systems, and medical science. Here, we consider this problem for a class of non-stationary time series. The structural causal model (SCM) of this type of time series, called the semi-stationary time series, exhibits that a finite number of different causal mechanisms occur sequentially and periodically across time. This model holds considerable practical utility because it can represent periodicity, including common occurrences such as seasonality and diurnal variation. We propose a constraint-based, non-parametric algorithm for discovering causal relations in this setting. The resulting algorithm, PCMCI$_{\\Omega}$, can capture the alternating and recurring changes in the causal mechanisms and then identify the underlying causal graph with conditional independence (CI) tests. We show that this algorithm is sound in identifying causal relations on discrete time series. We validate the algorithm with extensive experiments on continuous and discrete simulated data. We also apply our algorithm to a real-world climate dataset.", "keywords": "time-series causal discovery;constraint-based causal discovery", "primary_area": "", "supplementary_material": "/attachment/2a0e0a48e13ea5a6023110b5519dbd285d3011f4.pdf", "author": "Shanyun Gao;Raghavendra Addanki;Tong Yu;Ryan A. Rossi;Murat Kocaoglu", "authorids": "~Shanyun_Gao2;~Raghavendra_Addanki1;~Tong_Yu3;~Ryan_A._Rossi2;~Murat_Kocaoglu1", "gender": "F;M;;M;M", "homepage": ";https://raddanki.github.io/;https://www.linkedin.com/in/tong-yu-42790744;https://www.muratkocaoglu.com;http://ryanrossi.com", "dblp": ";218/5579;32/1593-1;74/11343;17/5085", "google_scholar": ";SUPaOhgAAAAJ;https://scholar.google.com/citations?hl=en;7N7bzdwAAAAJ;_Dc6lbQAAAAJ", "orcid": ";;0000-0002-5991-2050;;0000-0001-9758-0635", "linkedin": "https://www.linkedin.com/jobs/?src=go-pa&trk=sem-ga_campid.18853522261_asid.146084015209_crid.633923221414_kw.linkedin_d.c_tid.kwd-296170574619_n.g_mt.e_geo.9016722&mcid=6994434350142418944&cid=&gclid=Cj0KCQiAi8KfBhCuARIsADp-A565vO-bdHEB4of97YFPZUvB5FwxZx0Aphoa3GoJSDpVVvjy6XFooQQaAskBEALw_wcB&gclsrc=aw.ds;;tong-yu-42790744;mkocaoglu/;", "or_profile": "~Shanyun_Gao2;~Raghavendra_Addanki1;~Tong_Yu3;~Murat_Kocaoglu1;~Ryan_Rossi1", "aff": "Purdue University;Adobe Systems;Adobe Research;Purdue University;Adobe Research", "aff_domain": "purdue.edu;adobe.com;adobe.com;purdue.edu;adobe.com", "position": "PhD student;Research Scientist;Senior Research Scientist;Assistant Professor;Senior Research Scientist", "bibtex": "@inproceedings{\ngao2023causal,\ntitle={Causal Discovery in Semi-Stationary Time Series},\nauthor={Shanyun Gao and Raghavendra Addanki and Tong Yu and Ryan A. Rossi and Murat Kocaoglu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dYeUvLUxBQ}\n}", "github": "", "project": "", "reviewers": "YnE6;nsZ6;ztgZ;ZpAB", "pdf_size": 1520199, "rating": "5;5;6;6", "confidence": "3;3;4;3", "soundness": "3;2;3;4", "novelty": "3;2;3;3", "presentation": "2;2;2;3", "wc_summary": "146;34;202;105", "wc_strengths": "74;24;20;78", "wc_weaknesses": "291;127;115;181", "wc_questions": "106;19;301;311", "wc_limitations": "21;5;57;32", "wc_review": "638;209;695;707", "wc_reply_reviewers": "32;148;172;153", "wc_reply_authors": "42;236;806;304", "reply_reviewers": "1;1;3;1", "reply_authors": "2;2;4;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 121.75, 61.25510182833753 ], "wc_strengths_avg": [ 49.0, 27.073972741361768 ], "wc_weaknesses_avg": [ 178.5, 69.54674686856316 ], "wc_questions_avg": [ 184.25, 125.62518656702564 ], "wc_limitations_avg": [ 28.75, 18.925842121290138 ], "wc_review_avg": [ 562.25, 205.60809201001794 ], "wc_reply_reviewers_avg": [ 126.25, 55.14696274501434 ], "wc_reply_authors_avg": [ 347.0, 281.9024654024863 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5335476357817337855&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "purdue.edu;adobe.com;adobe.com;purdue.edu;adobe.com", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Purdue University;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://www.purdue.edu;https://www.adobe.com", "aff_unique_abbr": "Purdue;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Epistemic Neural Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71015", "id": "dZqcC1qCmB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/07fbde96bee50f4e09303fd4f877c2f3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dZqcC1qCmB", "openreview": "https://openreview.net/forum?id=dZqcC1qCmB", "poster": "/media/PosterPDFs/NeurIPS%202023/71015.png?t=1702328104.466789", "slides": "https://nips.cc/virtual/2023/poster/71015", "video": "https://nips.cc/virtual/2023/poster/71015", "author_site": "Ian Osband, Zheng Wen, Seyed Mohammad Asghari, Vikranth Dwaracherla, MORTEZA IBRAHIMI, Xiuyuan Lu, Benjamin Van Roy", "tldr": "", "abstract": "Intelligence relies on an agent's knowledge of what it does not know.\nThis capability can be assessed based on the quality of joint predictions of labels across multiple inputs.\nIn principle, ensemble-based approaches can produce effective joint predictions, but the computational costs of large ensembles become prohibitive.\nWe introduce the epinet: an architecture that can supplement any conventional neural network, including large pretrained models, and can be trained with modest incremental computation to estimate uncertainty.\nWith an epinet, conventional neural networks outperform very large ensembles, consisting of hundreds or more particles, with orders of magnitude less computation.\nThe epinet does not fit the traditional framework of Bayesian neural networks.\nTo accommodate development of approaches beyond BNNs, such as the epinet, we introduce the epistemic neural network (ENN) as a general interface for models that produce joint predictions.", "keywords": "Uncertainty;Deep Learning;Neural Networks", "primary_area": "", "supplementary_material": "", "author": "Ian Osband;Zheng Wen;Seyed Mohammad Asghari;Vikranth Dwaracherla;Morteza Ibrahimi;Xiuyuan Lu;Benjamin Van Roy", "authorids": "~Ian_Osband1;~Zheng_Wen1;~Seyed_Mohammad_Asghari1;~Vikranth_Dwaracherla1;~Morteza_Ibrahimi2;~Xiuyuan_Lu1;~Benjamin_Van_Roy3", "gender": "M;M;;M;;F;", "homepage": "http://iosband.github.io/;http://zheng-wen.com/;;https://vikranth.people.stanford.edu/;;;https://web.stanford.edu/~bvr", "dblp": ";;;182/7585;;200/9014;41/4314.html", "google_scholar": "https://scholar.google.co.uk/citations?user=QA4o6eYAAAAJ;kK3qvd8AAAAJ;;ir7j5AkAAAAJ;pgcjVaYAAAAJ;SPL_2lIAAAAJ;05sMX8MAAAAJ", "orcid": ";;;;;;", "linkedin": "iosband;;seyed-mohammad-asghari;;;lxy-lucy/;", "or_profile": "~Ian_Osband1;~Zheng_Wen1;~Seyed_Mohammad_Asghari1;~Vikranth_Dwaracherla1;~Morteza_Ibrahimi2;~Xiuyuan_Lu1;~Benjamin_Van_Roy3", "aff": "Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google Deepmind;Google", "aff_domain": "google.com;google.com;deepmind.com;deepmind.com;deepmind.com;google.com;google.com", "position": "Researcher;Research Scientist;Research Engineer;Researcher;Researcher;Research Scientist;research scientist", "bibtex": "@inproceedings{\nosband2023epistemic,\ntitle={Epistemic Neural Networks},\nauthor={Ian Osband and Zheng Wen and Seyed Mohammad Asghari and Vikranth Dwaracherla and Morteza Ibrahimi and Xiuyuan Lu and Benjamin Van Roy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dZqcC1qCmB}\n}", "github": "", "project": "", "reviewers": "TShA;tvti;zTnm;jzvR;UZs5", "pdf_size": 3684941, "rating": "5;6;6;7;7", "confidence": "2;4;3;3;4", "soundness": "3;2;3;3;3", "novelty": "3;3;3;3;3", "presentation": "2;1;3;4;3", "wc_summary": "78;55;82;89;132", "wc_strengths": "50;48;45;113;103", "wc_weaknesses": "81;972;242;86;142", "wc_questions": "142;83;21;75;86", "wc_limitations": "16;4;11;48;1", "wc_review": "367;1162;401;411;464", "wc_reply_reviewers": "71;48;0;0;0", "wc_reply_authors": "0;28;0;0;0", "reply_reviewers": "1;1;0;0;0", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 87.2, 25.134836382996408 ], "wc_strengths_avg": [ 71.8, 29.768439663509408 ], "wc_weaknesses_avg": [ 304.6, 338.69254494305005 ], "wc_questions_avg": [ 81.4, 38.43227810057582 ], "wc_limitations_avg": [ 16.0, 16.84042754801671 ], "wc_review_avg": [ 561.0, 302.10792773444393 ], "wc_reply_reviewers_avg": [ 23.8, 30.042636369000643 ], "wc_reply_authors_avg": [ 5.6, 11.2 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6428571428571428, "gs_citation": 144, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16141581552171835458&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 6, "email": "google.com;google.com;deepmind.com;deepmind.com;deepmind.com;google.com;google.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Google;DeepMind", "aff_unique_dep": "Google DeepMind;DeepMind", "aff_unique_url": "https://deepmind.com;https://deepmind.com", "aff_unique_abbr": "DeepMind;DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Finding Order in Chaos: A Novel Data Augmentation Method for Time Series in Contrastive Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71014", "id": "dbVRDk2wt7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/61c2c6338033da68885e0226881cbe71-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dbVRDk2wt7", "openreview": "https://openreview.net/forum?id=dbVRDk2wt7", "poster": "/media/PosterPDFs/NeurIPS%202023/71014.png?t=1699188511.8469558", "slides": "https://nips.cc/virtual/2023/poster/71014", "video": "https://nips.cc/virtual/2023/poster/71014", "author_site": "Berken Utku Demirel, Christian Holz", "tldr": "", "abstract": "The success of contrastive learning is well known to be dependent on data augmentation.\nAlthough the degree of data augmentations has been well controlled by utilizing pre-defined techniques in some domains like vision, time-series data augmentation is less explored and remains a challenging problem due to the complexity of the data generation mechanism, such as the intricate mechanism involved in the cardiovascular system.\nMoreover, there is no widely recognized and general time-series augmentation method that can be applied across different tasks.\nIn this paper, we propose a novel data augmentation method for time-series tasks that aims to connect intra-class samples together, and thereby find order in the latent space.\nOur method builds upon the well-known data augmentation technique of mixup by incorporating a novel approach that accounts for the non-stationary nature of time-series data.\nAlso, by controlling the degree of chaos created by data augmentation, our method leads to improved feature representations and performance on downstream tasks.\nWe evaluate our proposed method on three time-series tasks, including heart rate estimation, human activity recognition, and cardiovascular disease detection. \nExtensive experiments against the state-of-the-art methods show that the proposed method outperforms prior works on optimal data generation and known data augmentation techniques in three tasks, reflecting the effectiveness of the presented method. \nThe source code is available at double-blind policy.", "keywords": "Contrastive learning;Time-series;Augmentation", "primary_area": "", "supplementary_material": "/attachment/7dc9a4d1085fd3726867a314a583d61ec2a6d97f.zip", "author": "Berken Utku Demirel;Christian Holz", "authorids": "~Berken_Utku_Demirel2;~Christian_Holz1", "gender": "M;M", "homepage": "https://berken-demirel.github.io/BerkenUtku-Demirel/;https://siplab.org", "dblp": "283/8117;79/7439-1", "google_scholar": "https://scholar.google.ch/citations?user=zbgxpdIAAAAJ;OfXP9jMAAAAJ", "orcid": ";0000-0001-9655-9519", "linkedin": ";", "or_profile": "~Berken_Utku_Demirel2;~Christian_Holz1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;ETH Zurich", "aff_domain": "inf.ethz.ch;inf.ethz.ch", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\ndemirel2023finding,\ntitle={Finding Order in Chaos: A Novel Data Augmentation Method for Time Series in Contrastive Learning},\nauthor={Berken Utku Demirel and Christian Holz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dbVRDk2wt7}\n}", "github": "", "project": "", "reviewers": "hEg3;JK7u;CSD8;JMVG", "pdf_size": 771618, "rating": "4;4;5;8", "confidence": "4;4;5;3", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "2;2;2;3", "wc_summary": "133;72;35;98", "wc_strengths": "79;52;34;72", "wc_weaknesses": "185;218;161;49", "wc_questions": "94;3;343;31", "wc_limitations": "21;26;16;1", "wc_review": "512;371;589;251", "wc_reply_reviewers": "326;151;59;58", "wc_reply_authors": "0;0;168;24", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.25, 1.6393596310755 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.5, 35.8503835404867 ], "wc_strengths_avg": [ 59.25, 17.62632973707232 ], "wc_weaknesses_avg": [ 153.25, 63.499507872108744 ], "wc_questions_avg": [ 117.75, 134.15918716211723 ], "wc_limitations_avg": [ 16.0, 9.354143466934854 ], "wc_review_avg": [ 430.75, 129.9276240835643 ], "wc_reply_reviewers_avg": [ 148.5, 109.21652805322096 ], "wc_reply_authors_avg": [ 48.0, 69.9714227381436 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6469966392206306, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5990068061145816747&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "inf.ethz.ch;inf.ethz.ch", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Deep Gaussian Markov Random Fields for Graph-Structured Dynamical Systems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71013", "id": "dcw7qRUuD8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f04957cc30544d62386f402e1da0b001-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dcw7qRUuD8", "openreview": "https://openreview.net/forum?id=dcw7qRUuD8", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/71013", "video": "https://nips.cc/virtual/2023/poster/71013", "author_site": "Fiona Lippert, Bart Kranstauber, Emiel van Loon, Patrick Forr\u00e9", "tldr": "", "abstract": "Probabilistic inference in high-dimensional state-space models is computationally challenging. For many spatiotemporal systems, however, prior knowledge about the dependency structure of state variables is available. We leverage this structure to develop a computationally efficient approach to state estimation and learning in graph-structured state-space models with (partially) unknown dynamics and limited historical data. Building on recent methods that combine ideas from deep learning with principled inference in Gaussian Markov random fields (GMRF), we reformulate graph-structured state-space models as Deep GMRFs defined by simple spatial and temporal graph layers. This results in a flexible spatiotemporal prior that can be learned efficiently from a single time sequence via variational inference. Under linear Gaussian assumptions, we retain a closed-form posterior, which can be sampled efficiently using the conjugate gradient method, scaling favourably compared to classical Kalman filter based approaches.", "keywords": "probabilistic inference;graphical models;spatiotemporal dynamical systems;state-space models", "primary_area": "", "supplementary_material": "/attachment/42f3744e4473223e2b6704c1d9e4e44f0732b83d.pdf", "author": "Fiona Lippert;Bart Kranstauber;E. Emiel van Loon;Patrick Forr\u00e9", "authorids": "~Fiona_Lippert1;b.kranstauber@uva.nl;e.e.vanloon@uva.nl;~Patrick_Forr\u00e91", "gender": "F;;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": "https://scholar.google.nl/citations?user=YOqyWTwAAAAJ;;;", "orcid": "0000-0003-4174-2230;;;", "linkedin": ";;;", "or_profile": "~Fiona_Lippert1;b.kranstauber@uva.nl;e.e.vanloon@uva.nl;~Patrick_Forr\u00e91", "aff": "University of Amsterdam;;;", "aff_domain": "uva.nl;;;", "position": "PhD student;;;", "bibtex": "@inproceedings{\nlippert2023deep,\ntitle={Deep Gaussian Markov Random Fields for Graph-Structured Dynamical Systems},\nauthor={Fiona Lippert and Bart Kranstauber and E. Emiel van Loon and Patrick Forr{\\'e}},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dcw7qRUuD8}\n}", "github": "", "project": "", "reviewers": "k13T;Upf6;LAkB;9F96", "pdf_size": 10236274, "rating": "4;6;6;6", "confidence": "2;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;2;4", "presentation": "3;3;2;3", "wc_summary": "10;76;59;34", "wc_strengths": "72;136;48;42", "wc_weaknesses": "156;174;66;124", "wc_questions": "1;147;84;33", "wc_limitations": "2;57;1;9", "wc_review": "241;590;258;242", "wc_reply_reviewers": "75;40;10;232", "wc_reply_authors": "406;0;0;682", "reply_reviewers": "1;1;1;2", "reply_authors": "3;1;1;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 44.75, 25.01374622082826 ], "wc_strengths_avg": [ 74.5, 37.239092362730865 ], "wc_weaknesses_avg": [ 130.0, 41.060930335295616 ], "wc_questions_avg": [ 66.25, 55.22397577139842 ], "wc_limitations_avg": [ 17.25, 23.155722834755128 ], "wc_review_avg": [ 332.75, 148.67645240588706 ], "wc_reply_reviewers_avg": [ 89.25, 85.56685982318155 ], "wc_reply_authors_avg": [ 272.0, 288.9740472776059 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=900642342798037379&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 5, "email": "uva.nl;;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_country_unique_index": "0", "aff_country_unique": "Netherlands" }, { "title": "Differentially Private Decoupled Graph Convolutions for Multigranular Topology Protection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71012", "id": "dd3KNayGFz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8e3db2040672d85fd12e6313945594fe-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dd3KNayGFz", "openreview": "https://openreview.net/forum?id=dd3KNayGFz", "poster": "/media/PosterPDFs/NeurIPS%202023/71012.png?t=1698622221.4123216", "slides": "https://nips.cc/virtual/2023/poster/71012", "video": "https://nips.cc/virtual/2023/poster/71012", "author_site": "Eli Chien, Wei-Ning Chen, Chao Pan, Pan Li, Ayfer Ozgur, Olgica Milenkovic", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have proven to be highly effective in solving real-world learning problems that involve graph-structured data. However, GNNs can also inadvertently expose sensitive user information and interactions through their model predictions. To address these privacy concerns, Differential Privacy (DP) protocols are employed to control the trade-off between provable privacy protection and model utility. Applying standard DP approaches to GNNs directly is not advisable due to two main reasons. First, the prediction of node labels, which relies on neighboring node attributes through graph convolutions, can lead to privacy leakage. Second, in practical applications, the privacy requirements for node attributes and graph topology may differ. In the latter setting, existing DP-GNN models fail to provide multigranular trade-offs between graph topology privacy, node attribute privacy, and GNN utility. To address both limitations, we propose a new framework termed Graph Differential Privacy (GDP), specifically tailored to graph learning. GDP ensures both provably private model parameters as well as private predictions. Additionally, we describe a novel unified notion of graph dataset adjacency to analyze the properties of GDP for different levels of graph topology privacy. Our findings reveal that DP-GNNs, which rely on graph convolutions, not only fail to meet the requirements for multigranular graph topology privacy but also necessitate the injection of DP noise that scales at least linearly with the maximum node degree. In contrast, our proposed Differentially Private Decoupled Graph Convolutions (DPDGCs) represent a more flexible and efficient alternative to graph convolutions that still provides the necessary guarantees of GDP. To validate our approach, we conducted extensive experiments on seven node classification benchmarking and illustrative synthetic datasets. The results demonstrate that DPDGCs significantly outperform existing DP-GNNs in terms of privacy-utility trade-offs.", "keywords": "Graph Neural Networks;Differential Privacy;Multigranular Topology Protection", "primary_area": "", "supplementary_material": "/attachment/9856201a352c639fb19648e49985f8a14284c68b.zip", "author": "Eli Chien;Wei-Ning Chen;Chao Pan;Pan Li;Ayfer Ozgur;Olgica Milenkovic", "authorids": "~Eli_Chien1;~Wei-Ning_Chen1;~Chao_Pan2;~Pan_Li2;~Ayfer_Ozgur1;~Olgica_Milenkovic1", "gender": ";M;;;F;M", "homepage": "https://web.stanford.edu/~wnchen/index.html;;;;https://www.ece.illinois.edu/directory/profile/milenkov/;https://sites.google.com/view/eli-chien/home", "dblp": "51/2118;06/7730-3;https://dblp.org/pers/hd/l/Li_0005:Pan;12/4534;m/OlgicaMilenkovic;222/3243", "google_scholar": "-TqCZLIAAAAJ;M3T3YPIAAAAJ;IroP0EwAAAAJ;;G4LSqL8AAAAJ;N3BuEnYAAAAJ", "orcid": "0000-0001-7355-9487;0000-0002-9275-7072;;;;", "linkedin": ";chao-pan-5abb7314b/;pan-li-b951105a/;;;", "or_profile": "~Wei-Ning_Chen1;~Chao_Pan2;~Pan_Li2;~Ayfer_Ozgur1;~Olgica_Milenkovic1;~I_Chien2", "aff": "Stanford University;University of Illinois, Urbana Champaign;Purdue University;Stanford University;;Georgia Institute of Technology", "aff_domain": "stanford.edu;illinois.edu;purdue.edu;stanford.edu;;gatech.edu", "position": "PhD student;Postdoc;Assistant Professor;Associate Professor;;Postdoc", "bibtex": "@inproceedings{\nchien2023differentially,\ntitle={Differentially Private Decoupled Graph Convolutions for Multigranular Topology Protection},\nauthor={Eli Chien and Wei-Ning Chen and Chao Pan and Pan Li and Ayfer Ozgur and Olgica Milenkovic},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dd3KNayGFz}\n}", "github": "", "project": "", "reviewers": "ssFv;tLab;WwFs;fPNi;XfoH", "pdf_size": 1363943, "rating": "4;4;4;5;7", "confidence": "5;2;3;5;3", "soundness": "2;2;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "90;112;210;42;46", "wc_strengths": "37;58;98;16;42", "wc_weaknesses": "85;214;251;163;16", "wc_questions": "66;2;233;193;154", "wc_limitations": "133;29;104;1;1", "wc_review": "411;415;896;415;259", "wc_reply_reviewers": "37;0;11;20;5", "wc_reply_authors": "173;0;7;5;19", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 4.8, 1.16619037896906 ], "confidence_avg": [ 3.6, 1.2000000000000002 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 100.0, 61.03113959283409 ], "wc_strengths_avg": [ 50.2, 27.410946718418902 ], "wc_weaknesses_avg": [ 145.8, 85.50882995340307 ], "wc_questions_avg": [ 129.6, 84.41942904331917 ], "wc_limitations_avg": [ 53.6, 54.741574694193815 ], "wc_review_avg": [ 479.2, 216.84316913382352 ], "wc_reply_reviewers_avg": [ 14.6, 13.032267646115928 ], "wc_reply_authors_avg": [ 40.8, 66.39397563032357 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.05716619504750293, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11472122525857504460&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "stanford.edu;illinois.edu;purdue.edu;stanford.edu;;gatech.edu", "author_num": 6, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Stanford University;University of Illinois Urbana-Champaign;Purdue University;Georgia Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.stanford.edu;https://illinois.edu;https://www.purdue.edu;https://www.gatech.edu", "aff_unique_abbr": "Stanford;UIUC;Purdue;Georgia Tech", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Stanford;Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Functional Equivalence and Path Connectivity of Reducible Hyperbolic Tangent Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71011", "id": "ddKCg3OhGw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fb64a43508e0cfe53ee6179ff31ea900-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ddKCg3OhGw", "openreview": "https://openreview.net/forum?id=ddKCg3OhGw", "poster": "/media/PosterPDFs/NeurIPS%202023/71011.png?t=1707867452.9907305", "slides": "https://nips.cc/virtual/2023/poster/71011", "video": "https://nips.cc/virtual/2023/poster/71011", "tldr": "", "abstract": "Understanding the learning process of artificial neural networks requires clarifying the structure of the parameter space within which learning takes place. A neural network parameter's functional equivalence class is the set of parameters implementing the same input--output function. For many architectures, almost all parameters have a simple and well-documented functional equivalence class. However, there is also a vanishing minority of reducible parameters, with richer functional equivalence classes caused by redundancies among the network's units.\n\nIn this paper, we give an algorithmic characterisation of unit redundancies and reducible functional equivalence classes for a single-hidden-layer hyperbolic tangent architecture. We show that such functional equivalence classes are piecewise-linear path-connected sets, and that for parameters with a majority of redundant units, the sets have a diameter of at most 7 linear segments.", "keywords": "theory;neural network theory;structural redundancy;functional equivalence;functional equivalence class;partial identifiability;parameter canonicalisation;parameter space;piecewise-linear;connectivity", "primary_area": "", "supplementary_material": "", "author": "Matthew Farrugia-Roberts", "authorids": "~Matthew_Farrugia-Roberts1", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nfarrugia-roberts2023functional,\ntitle={Functional Equivalence and Path Connectivity of Reducible Hyperbolic Tangent Networks},\nauthor={Matthew Farrugia-Roberts},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ddKCg3OhGw}\n}", "github": "", "project": "", "reviewers": "QPQA;virJ;Wcpf;HCLr;NhDx", "pdf_size": 197937, "rating": "5;5;6;7;10", "confidence": "4;3;2;3;5", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "77;145;35;50;66", "wc_strengths": "77;38;147;55;69", "wc_weaknesses": "71;67;51;390;4", "wc_questions": "162;42;66;60;23", "wc_limitations": "4;22;1;48;1", "wc_review": "391;314;300;603;163", "wc_reply_reviewers": "0;0;5;682;0", "wc_reply_authors": "0;0;0;736;0", "reply_reviewers": "0;0;1;2;0", "reply_authors": "1;1;1;2;1", "rating_avg": [ 6.6, 1.8547236990991407 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 74.6, 37.9715683110403 ], "wc_strengths_avg": [ 77.2, 37.333095237336 ], "wc_weaknesses_avg": [ 116.6, 138.75820696448912 ], "wc_questions_avg": [ 70.6, 48.10654841079331 ], "wc_limitations_avg": [ 15.2, 18.170305445974208 ], "wc_review_avg": [ 354.2, 144.47615720249482 ], "wc_reply_reviewers_avg": [ 137.4, 272.3068857006741 ], "wc_reply_authors_avg": [ 147.2, 294.4 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.6132846549348298, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12960831875962622441&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "", "author_num": 1 }, { "title": "Fast Exact Leverage Score Sampling from Khatri-Rao Products with Applications to Tensor Decomposition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71010", "id": "deaHiTb6Cu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/959f70ee50044bed305e48e3484005a7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=deaHiTb6Cu", "openreview": "https://openreview.net/forum?id=deaHiTb6Cu", "poster": "/media/PosterPDFs/NeurIPS%202023/71010.png?t=1699807902.5824661", "slides": "https://nips.cc/virtual/2023/poster/71010", "video": "https://nips.cc/virtual/2023/poster/71010", "author_site": "Vivek Bharadwaj, Osman Asif Malik, Riley Murray, Laura Grigori, Aydin Buluc, James Demmel", "tldr": "", "abstract": "We present a data structure to randomly sample rows from the Khatri-Rao product of several matrices according to the exact distribution of its leverage scores. Our proposed sampler draws each row in time logarithmic in the height of the Khatri-Rao product and quadratic in its column count, with persistent space overhead at most the size of the input matrices. As a result, it tractably draws samples even when the matrices forming the Khatri-Rao product have tens of millions of rows each. When used to sketch the linear least-squares problems arising in Candecomp / PARAFAC decomposition, our method achieves lower asymptotic complexity per solve than recent state-of-the-art methods. Experiments on billion-scale sparse tensors and synthetic data validate our theoretical claims, with our algorithm achieving higher accuracy than competing methods as the decomposition rank grows.", "keywords": "Tensor Decomposition;Leverage Scores;Randomized Linear Algebra;Sketching;Khatri-Rao Product;Sparse Tensors", "primary_area": "", "supplementary_material": "/attachment/93b1321d7b2b89c88bbf9d885769b6d7f2e938e4.zip", "author": "Vivek Bharadwaj;Osman Asif Malik;Riley Murray;Laura Grigori;Aydin Buluc;James Demmel", "authorids": "~Vivek_Bharadwaj1;~Osman_Asif_Malik1;~Riley_Murray1;~Laura_Grigori1;~Aydin_Buluc1;~James_Demmel2", "gender": "M;;Not Specified;F;;M", "homepage": "https://vbharadwaj-bk.github.io/;https://osmanmalik.github.io/;https://rileyjmurray.com;https://who.rocq.inria.fr/Laura.Grigori/;https://people.eecs.berkeley.edu/~aydin/;https://people.eecs.berkeley.edu/~demmel/", "dblp": "64/10881;231/7644;;;80/1552;d/JDemmel", "google_scholar": "0cKXP7cAAAAJ;WAleKq0AAAAJ;9KRGFbYAAAAJ;;hRB3wSgAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-0483-9578;0000-0003-4477-481X;0000-0003-1461-6458;;0000-0001-7253-9038;", "linkedin": ";;riley-murray-b326714a/;;;", "or_profile": "~Vivek_Bharadwaj1;~Osman_Asif_Malik1;~Riley_Murray1;~Laura_Grigori1;~Aydin_Buluc1;~James_Demmel2", "aff": "University of California, Berkeley;Lawrence Berkeley National Lab;International Computer Science Institute;;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;lbl.gov;icsi.berkeley.edu;;berkeley.edu;berkeley.edu", "position": "PhD student;Postdoc;Postdoc;;Adjunct Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nbharadwaj2023fast,\ntitle={Fast Exact Leverage Score Sampling from Khatri-Rao Products with Applications to Tensor Decomposition},\nauthor={Vivek Bharadwaj and Osman Asif Malik and Riley Murray and Laura Grigori and Aydin Buluc and James Demmel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=deaHiTb6Cu}\n}", "github": "", "project": "", "reviewers": "LxVj;u8zc;qocR;Njzt", "pdf_size": 686705, "rating": "5;5;7;7", "confidence": "4;3;3;4", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "3;3;4;2", "wc_summary": "127;176;225;99", "wc_strengths": "56;88;369;44", "wc_weaknesses": "139;162;238;44", "wc_questions": "451;94;349;1", "wc_limitations": "1;24;1;1", "wc_review": "774;544;1182;189", "wc_reply_reviewers": "30;0;185;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 156.75, 48.0852108241193 ], "wc_strengths_avg": [ 139.25, 133.61769156814526 ], "wc_weaknesses_avg": [ 145.75, 69.23284986189721 ], "wc_questions_avg": [ 223.75, 182.8816215479292 ], "wc_limitations_avg": [ 6.75, 9.959292143521045 ], "wc_review_avg": [ 672.25, 360.6163993775103 ], "wc_reply_reviewers_avg": [ 56.5, 74.96165686536017 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10852122601793977571&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "berkeley.edu;lbl.gov;icsi.berkeley.edu;;berkeley.edu;berkeley.edu", "author_num": 6, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of California, Berkeley;Lawrence Berkeley National Laboratory;International Computer Science Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;https://www.lbl.gov;https://www.icsi.berkeley.edu/", "aff_unique_abbr": "UC Berkeley;LBNL;ICSI", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "AQuA: A Benchmarking Tool for Label Quality Assessment", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73524", "id": "dhJ8VbcEtX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fc20ea8d104cab737a5561096f9bde9b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=dhJ8VbcEtX", "openreview": "https://openreview.net/forum?id=dhJ8VbcEtX", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73524", "video": "https://nips.cc/virtual/2023/poster/73524", "author_site": "Mononito Goswami, Vedant Sanil, Arjun Choudhry, Arvind Srinivasan, Chalisa Udompanyawit, Artur Dubrawski", "tldr": "", "abstract": "Machine learning (ML) models are only as good as the data they are trained on. But recent studies have found datasets widely used to train and evaluate ML models, e.g. _ImageNet_, to have pervasive labeling errors. Erroneous labels on the train set hurt ML models' ability to generalize, and they impact evaluation and model selection using the test set. Consequently, learning in the presence of labeling errors is an active area of research, yet this field lacks a comprehensive benchmark to evaluate these methods. Most of these methods are evaluated on a few computer vision datasets with significant variance in the experimental protocols. With such a large pool of methods and inconsistent evaluation, it is also unclear how ML practitioners can choose the right models to assess label quality in their data. To this end, we propose a benchmarking environment _AQuA_ to rigorously evaluate methods that enable machine learning in the presence of label noise. We also introduce a design space to delineate concrete design choices of label error detection models. We hope that our proposed design space and benchmark enable practitioners to choose the right tools to improve their label quality and that our benchmark enables objective and rigorous evaluation of machine learning tools facing mislabeled data.", "keywords": "Label errors;evaluation;design space;benchmarking", "primary_area": "", "supplementary_material": "/attachment/566671158e70adba32059348ff6263aebf690d7f.pdf", "author": "Mononito Goswami;Vedant Sanil;Arjun Choudhry;Arvind Srinivasan;Chalisa Udompanyawit;Artur Dubrawski", "authorids": "~Mononito_Goswami1;~Vedant_Sanil1;~Arjun_Choudhry1;~Arvind_Srinivasan1;~Chalisa_Udompanyawit1;~Artur_Dubrawski2", "gender": "M;M;M;;F;M", "homepage": "https://mononito.com;https://vedant-sanil.github.io/;;;;https://www.autonlab.org", "dblp": "243/3771;;;;;76/48", "google_scholar": "https://scholar.google.co.in/citations?hl=en;OehqgZsAAAAJ;https://scholar.google.com/citations?view_op=list_works;uWNrmFcAAAAJ;;O3gezzcAAAAJ", "orcid": "0000-0002-4117-5558;;0000-0002-3416-6020;0009-0008-3283-5295;;0000-0002-2372-0831", "linkedin": "https://linkedin.com/in/mononitogoswami/;vedant-sanil/;;arvindsrinivasan8/;chalisa-udompanyawit-ba12081ab/;artur-dubrawski-33a2a87/", "or_profile": "~Mononito_Goswami1;~Vedant_Sanil1;~Arjun_Choudhry1;~Arvind_Srinivasan1;~Chalisa_Udompanyawit1;~Artur_Dubrawski2", "aff": "Carnegie Mellon University;;Carnegie Mellon University;;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;;andrew.cmu.edu;;andrew.cmu.edu;cmu.edu", "position": "PhD student;;Research Intern;;Undergrad student;Research Professor", "bibtex": "@inproceedings{\ngoswami2023aqua,\ntitle={{AQ}uA: A Benchmarking Tool for Label Quality Assessment},\nauthor={Mononito Goswami and Vedant Sanil and Arjun Choudhry and Arvind Srinivasan and Chalisa Udompanyawit and Artur Dubrawski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=dhJ8VbcEtX}\n}", "github": "", "project": "", "reviewers": "RN59;AcXH;Q72y;iCgr", "pdf_size": 2514698, "rating": "6;6;7;7", "confidence": "4;4;2;4", "wc_summary_and_contributions": "96;85;106;104", "wc_strengths": "234;154;85;88", "wc_improvement": "291;131;18;1689", "wc_limitations": "174;1;31;176", "wc_correctness": "1;1;11;949", "wc_clarity": "1;2;1;127", "wc_relation_to_prior_work": "1;1;1;3", "wc_documentation": "1;1;1;237", "wc_additional_feedback": "1;1;1;1", "wc_review": "800;377;255;3374", "wc_reply_reviewers": "0;17;0;1326", "wc_reply_authors": "242;506;143;2405", "reply_reviewers": "0;1;0;4", "reply_authors": "1;1;1;6", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_and_contributions_avg": [ 97.75, 8.257572282456872 ], "wc_strengths_avg": [ 140.25, 60.74691350183974 ], "wc_improvement_avg": [ 532.25, 674.8567903637038 ], "wc_limitations_avg": [ 95.5, 80.20754328615233 ], "wc_correctness_avg": [ 240.5, 409.07303748841724 ], "wc_clarity_avg": [ 32.75, 54.41679428264771 ], "wc_relation_to_prior_work_avg": [ 1.5, 0.8660254037844386 ], "wc_documentation_avg": [ 60.0, 102.19099764656376 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 1201.5, 1270.4940967985644 ], "wc_reply_reviewers_avg": [ 335.75, 571.7632267818559 ], "wc_reply_authors_avg": [ 824.0, 922.3841390657149 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 2.25, 2.165063509461097 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15549401819120919184&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cmu.edu;;andrew.cmu.edu;;andrew.cmu.edu;cmu.edu", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Improving Adversarial Transferability via Intermediate-level Perturbation Decay", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71009", "id": "dikH9tdPi2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/67b2e2e895380fa6acd537c2894e490e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dikH9tdPi2", "openreview": "https://openreview.net/forum?id=dikH9tdPi2", "poster": "/media/PosterPDFs/NeurIPS%202023/71009.png?t=1700712204.0607908", "slides": "https://nips.cc/virtual/2023/poster/71009", "video": "https://nips.cc/virtual/2023/poster/71009", "author_site": "Qizhang Li, Yiwen Guo, Wangmeng Zuo, Hao Chen", "tldr": "", "abstract": "Intermediate-level attacks that attempt to perturb feature representations following an adversarial direction drastically have shown favorable performance in crafting transferable adversarial examples. Existing methods in this category are normally formulated with two separate stages, where a directional guide is required to be determined at first and the scalar projection of the intermediate-level perturbation onto the directional guide is enlarged thereafter. The obtained perturbation deviates from the guide inevitably in the feature space, and it is revealed in this paper that such a deviation may lead to sub-optimal attack. To address this issue, we develop a novel intermediate-level method that crafts adversarial examples within a single stage of optimization. In particular, the proposed method, named intermediate-level perturbation decay (ILPD), encourages the intermediate-level perturbation to be in an effective adversarial direction and to possess a great magnitude simultaneously. In-depth discussion verifies the effectiveness of our method. Experimental results show that it outperforms state-of-the-arts by large margins in attacking various victim models on ImageNet (+10.07% on average) and CIFAR-10 (+3.88% on average). Our code is at https://github.com/qizhangli/ILPD-attack.", "keywords": "adversarial examples;black-box attack;adversarial transferability", "primary_area": "", "supplementary_material": "", "author": "Qizhang Li;Yiwen Guo;Wangmeng Zuo;Hao Chen", "authorids": "~Qizhang_Li1;~Yiwen_Guo1;~Wangmeng_Zuo3;~Hao_Chen5", "gender": "M;;M;", "homepage": ";;;https://www.cs.ucdavis.edu/~hchen/", "dblp": "272/9084;;93/2671;86/475-3", "google_scholar": "W5JLehEAAAAJ;;rUOpCEYAAAAJ;1Aa3qxIAAAAJ", "orcid": ";;0000-0002-3330-783X;0000-0002-4072-0710", "linkedin": ";;;", "or_profile": "~Qizhang_Li1;~Yiwen_Guo1;~Wangmeng_Zuo3;~Hao_Chen5", "aff": "Harbin Institute of Technology;;Harbin Institute of Technology;University of California, Davis", "aff_domain": "hit.edu;;hit.edu.cn;ucdavis.edu", "position": "PhD student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2023improving,\ntitle={Improving Adversarial Transferability via Intermediate-level Perturbation Decay},\nauthor={Qizhang Li and Yiwen Guo and Wangmeng Zuo and Hao Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dikH9tdPi2}\n}", "github": "", "project": "", "reviewers": "maPS;XQBy;ektX;ncUa;pWZd", "pdf_size": 2317637, "rating": "5;5;6;6;7", "confidence": "4;3;5;3;4", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;3;3;2;3", "wc_summary": "53;87;95;88;79", "wc_strengths": "50;34;59;42;166", "wc_weaknesses": "184;137;15;222;257", "wc_questions": "67;53;39;2;36", "wc_limitations": "4;12;39;1;22", "wc_review": "358;323;247;355;560", "wc_reply_reviewers": "67;253;13;0;143", "wc_reply_authors": "34;145;23;0;23", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;3;2;1;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 80.4, 14.60958589419974 ], "wc_strengths_avg": [ 70.2, 48.61440115850446 ], "wc_weaknesses_avg": [ 163.0, 84.06901926393576 ], "wc_questions_avg": [ 39.4, 21.712669112755346 ], "wc_limitations_avg": [ 15.6, 13.778243719719867 ], "wc_review_avg": [ 368.6, 103.72772049939205 ], "wc_reply_reviewers_avg": [ 95.2, 93.59572639816415 ], "wc_reply_authors_avg": [ 45.0, 51.21327952787246 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.28571428571428564, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2585974698940131357&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "hit.edu;;hit.edu.cn;ucdavis.edu", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Harbin Institute of Technology;University of California, Davis", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;https://www.ucdavis.edu", "aff_unique_abbr": "HIT;UC Davis", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Harbin;Davis", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "Scalable Transformer for PDE Surrogate Modeling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71008", "id": "djyn8Q0anK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/590daf74f99ee85df3d8c007df9c8187-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=djyn8Q0anK", "openreview": "https://openreview.net/forum?id=djyn8Q0anK", "poster": "/media/PosterPDFs/NeurIPS%202023/71008.png?t=1701915645.1524706", "slides": "https://nips.cc/virtual/2023/poster/71008", "video": "https://nips.cc/virtual/2023/poster/71008", "author_site": "Zijie Li, Dule Shu, Amir Barati Farimani", "tldr": "", "abstract": "Transformer has shown state-of-the-art performance on various applications and has recently emerged as a promising tool for surrogate modeling of partial differential equations (PDEs). Despite the introduction of linear-complexity attention, applying Transformer to problems with a large number of grid points can be numerically unstable and computationally expensive. In this work, we propose Factorized Transformer (FactFormer), which is based on an axial factorized kernel integral. Concretely, we introduce a learnable projection operator that decomposes the input function into multiple sub-functions with one-dimensional domain. These sub-functions are then evaluated and used to compute the instance-based kernel with an axial factorized scheme. We showcase that the proposed model is able to simulate 2D Kolmogorov flow on a $256\\times 256$ grid and 3D smoke buoyancy on a $64\\times64\\times64$ grid with good accuracy and efficiency. The proposed factorized scheme can serve as a computationally efficient low-rank surrogate for the full attention scheme when dealing with multi-dimensional problems.", "keywords": "Efficient attention;Neural PDE solver", "primary_area": "", "supplementary_material": "", "author": "Zijie Li;Dule Shu;Amir Barati Farimani", "authorids": "~Zijie_Li2;~Dule_Shu1;~Amir_Barati_Farimani2", "gender": "M;M;M", "homepage": "https://dlshu.github.io/portfolio/;https://sites.google.com/view/barati;https://zijieli-jlee.github.io/", "dblp": "96/8205;;", "google_scholar": "NNGmu5cAAAAJ;aH52nxkAAAAJ;ji7TXTMAAAAJ", "orcid": "0000-0001-6857-8261;0000-0002-2952-8576;0000-0002-8566-7538", "linkedin": "dule-shu-71a4b6a6/;amir-barati-farimani-a0b74169/;", "or_profile": "~Dule_Shu1;~Amir_Barati_Farimani2;~zijie_li1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;andrew.cmu.edu;cmu.edu", "position": "PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nli2023scalable,\ntitle={Scalable Transformer for {PDE} Surrogate Modeling},\nauthor={Zijie Li and Dule Shu and Amir Barati Farimani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=djyn8Q0anK}\n}", "github": "", "project": "", "reviewers": "YpQV;EAHP;hvcj;UCqx;TtUN;y1WA", "pdf_size": 9138380, "rating": "4;5;5;6;7;7", "confidence": "2;5;3;5;5;5", "soundness": "2;3;2;3;4;4", "novelty": "3;3;2;3;3;3", "presentation": "3;2;3;3;4;2", "wc_summary": "71;18;50;66;151;137", "wc_strengths": "55;1;27;27;56;140", "wc_weaknesses": "404;116;285;72;352;195", "wc_questions": "118;17;71;29;1;44", "wc_limitations": "1;1;6;1;1;21", "wc_review": "649;153;439;195;561;537", "wc_reply_reviewers": "251;15;0;52;19;45", "wc_reply_authors": "187;0;0;156;0;0", "reply_reviewers": "1;1;0;2;1;1", "reply_authors": "2;1;1;2;1;1", "rating_avg": [ 5.666666666666667, 1.1055415967851332 ], "confidence_avg": [ 4.166666666666667, 1.2133516482134197 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 2.8333333333333335, 0.6871842709362768 ], "wc_summary_avg": [ 82.16666666666667, 47.05109515787656 ], "wc_strengths_avg": [ 51.0, 43.99621195815234 ], "wc_weaknesses_avg": [ 237.33333333333334, 120.44869816186844 ], "wc_questions_avg": [ 46.666666666666664, 38.6551707048646 ], "wc_limitations_avg": [ 5.166666666666667, 7.312470322826768 ], "wc_review_avg": [ 422.3333333333333, 186.30321760208244 ], "wc_reply_reviewers_avg": [ 63.666666666666664, 85.63034249350844 ], "wc_reply_authors_avg": [ 57.166666666666664, 81.3396513939474 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7868995982344533, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9435852574674988352&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cmu.edu;andrew.cmu.edu;cmu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Sample Complexity of Forecast Aggregation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71007", "id": "dlDFakG6kJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/70de9e3948645a1be2de657f14d85c6d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dlDFakG6kJ", "openreview": "https://openreview.net/forum?id=dlDFakG6kJ", "poster": "/media/PosterPDFs/NeurIPS%202023/71007.png?t=1701408356.069318", "slides": "https://nips.cc/virtual/2023/poster/71007", "video": "https://nips.cc/virtual/2023/poster/71007", "author_site": "Tao Lin, Yiling Chen", "tldr": "", "abstract": "We consider a Bayesian forecast aggregation model where $n$ experts, after observing private signals about an unknown binary event, report their posterior beliefs about the event to a principal, who then aggregates the reports into a single prediction for the event. The signals of the experts and the outcome of the event follow a joint distribution that is unknown to the principal, but the principal has access to i.i.d. \"samples\" from the distribution, where each sample is a tuple of the experts' reports (not signals) and the realization of the event. Using these samples, the principal aims to find an $\\varepsilon$-approximately optimal aggregator, where optimality is measured in terms of the expected squared distance between the aggregated prediction and the realization of the event. We show that the sample complexity of this problem is at least $\\tilde \\Omega(m^{n-2} / \\varepsilon)$ for arbitrary discrete distributions, where $m$ is the size of each expert's signal space. This sample complexity grows exponentially in the number of experts $n$. But, if the experts' signals are independent conditioned on the realization of the event, then the sample complexity is significantly reduced, to $\\tilde O(1 / \\varepsilon^2)$, which does not depend on $n$. Our results can be generalized to non-binary events. The proof of our results uses a reduction from the distribution learning problem and reveals the fact that forecast aggregation is almost as difficult as distribution learning.", "keywords": "information aggregation;sample complexity;distribution learning;Bayesian forecast aggregation", "primary_area": "", "supplementary_material": "/attachment/1826cfad51e23e6ecf711e647edc4516735cfa7d.pdf", "author": "Tao Lin;Yiling Chen", "authorids": "~Tao_Lin2;~Yiling_Chen1", "gender": "M;F", "homepage": "https://tao-l.github.io/;https://yiling.seas.harvard.edu/", "dblp": "64/4492-13;72/3762-1", "google_scholar": "https://scholar.google.com/citations?hl=en;x_7xA0UAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Tao_Lin2;~Yiling_Chen1", "aff": "Harvard University;Harvard University", "aff_domain": "g.harvard.edu;fas.harvard.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nlin2023sample,\ntitle={Sample Complexity of Forecast Aggregation},\nauthor={Tao Lin and Yiling Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dlDFakG6kJ}\n}", "github": "", "project": "", "reviewers": "bpux;YcHV;mdHA;ey4x", "pdf_size": 592008, "rating": "7;7;7;8", "confidence": "2;4;4;4", "soundness": "3;4;3;3", "novelty": "3;3;3;4", "presentation": "3;3;4;4", "wc_summary": "139;152;120;75", "wc_strengths": "45;44;117;63", "wc_weaknesses": "50;163;6;72", "wc_questions": "37;27;66;66", "wc_limitations": "4;9;1;34", "wc_review": "275;395;310;310", "wc_reply_reviewers": "0;14;45;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 121.5, 29.159046623646667 ], "wc_strengths_avg": [ 67.25, 29.701641368786337 ], "wc_weaknesses_avg": [ 72.75, 57.26855594477654 ], "wc_questions_avg": [ 49.0, 17.363755354185336 ], "wc_limitations_avg": [ 12.0, 13.019216566291536 ], "wc_review_avg": [ 322.5, 44.22951503238533 ], "wc_reply_reviewers_avg": [ 17.75, 16.618889854620253 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4421878304824872699&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "g.harvard.edu;fas.harvard.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Trust Your $\\nabla$: Gradient-based Intervention Targeting for Causal Discovery", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71006", "id": "dmD63sv0TZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9eda77f505efbb89462970d739143f73-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dmD63sv0TZ", "openreview": "https://openreview.net/forum?id=dmD63sv0TZ", "poster": "/media/PosterPDFs/NeurIPS%202023/71006.png?t=1702476181.2569752", "slides": "https://nips.cc/virtual/2023/poster/71006", "video": "https://nips.cc/virtual/2023/poster/71006", "author_site": "Mateusz Olko, Micha\u0142 Zaj\u0105c, Aleksandra Nowak, Nino Scherrer, Yashas Annadani, Stefan Bauer, \u0141ukasz Kuci\u0144ski, Piotr Mi\u0142o\u015b", "tldr": "", "abstract": "Inferring causal structure from data is a challenging task of fundamental importance in science. Often, observational data alone is not enough to uniquely identify a system\u2019s causal structure. The use of interventional data can address this issue, however, acquiring these samples typically demands a considerable investment of time and physical or financial resources. In this work, we are concerned with the acquisition of interventional data in a targeted manner to minimize the number of required experiments. We propose a novel Gradient-based Intervention Targeting method, abbreviated GIT, that \u2019trusts\u2019 the gradient estimator of a gradient-based causal discovery framework to provide signals for the intervention targeting function. We provide extensive experiments in simulated and real-world datasets and demonstrate that GIT performs on par with competitive baselines, surpassing them in the low-data regime.", "keywords": "causal discovery;experimental design;active learning;neural networks", "primary_area": "", "supplementary_material": "/attachment/5906cadf32574504668e554cc7c9fde54d8bebe6.zip", "author": "Mateusz Olko;Micha\u0142 Zaj\u0105c;Aleksandra Nowak;Nino Scherrer;Yashas Annadani;Stefan Bauer;\u0141ukasz Kuci\u0144ski;Piotr Mi\u0142o\u015b", "authorids": "~Mateusz_Olko1;~Micha\u0142_Zaj\u0105c1;~Aleksandra_Nowak1;~Nino_Scherrer1;~Yashas_Annadani1;~Stefan_Bauer1;~\u0141ukasz_Kuci\u0144ski1;~Piotr_Mi\u0142o\u015b1", "gender": ";M;F;M;;;M;", "homepage": ";;;https://ninodimontalcino.github.io/;https://yashasannadani.com;https://cifar.ca/bios/stefan-bauer/;https://sites.google.com/view/lukaszkucinski;", "dblp": ";02/6977-5.html;34/10106;295/0198;190/7411;;250/9699;208/0989.html", "google_scholar": ";https://scholar.google.pl/citations?user=5HHtXzwAAAAJ;2A-eZhQAAAAJ;CG9n26kAAAAJ;ExgzcVMAAAAJ;O-oICE8AAAAJ;l6dK-VUAAAAJ;Se68XecAAAAJ", "orcid": ";;0000-0002-2830-6613;;;;0000-0002-5617-8129;", "linkedin": ";;;;;;https://linkedin.com/in/lukasz-kucinski;piotr-milos-4b02151/", "or_profile": "~Mateusz_Olko1;~Micha\u0142_Zaj\u0105c1;~Aleksandra_Nowak1;~Nino_Scherrer1;~Yashas_Annadani1;~Stefan_Bauer1;~\u0141ukasz_Kuci\u0144ski1;~Piotr_Mi\u0142o\u015b1", "aff": ";Department of Electrical Engineering, KU Leuven, Belgium, KU Leuven;IDEAS NCBR Sp.;FAR AI;KTH Royal Institute of Technology;KTH Royal Institute of Technology;Institute of Mathematics Polish Academy of Sciences;IDEAS NCBR", "aff_domain": ";esat.kuleuven.be;ideas-ncbr.pl;far.ai;kth.se;kth.se;impan.pl;ideas-ncbr.pl", "position": ";Intern;Researcher;Researcher;PhD student;Assistant Professor;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nolko2023trust,\ntitle={Trust Your \\${\\textbackslash}nabla\\$: Gradient-based Intervention Targeting for Causal Discovery},\nauthor={Mateusz Olko and Micha{\\l} Zaj{\\k{a}}c and Aleksandra Nowak and Nino Scherrer and Yashas Annadani and Stefan Bauer and {\\L}ukasz Kuci{\\'n}ski and Piotr Mi{\\l}o{\\'s}},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dmD63sv0TZ}\n}", "github": "", "project": "", "reviewers": "zVWz;8Vao;E5qP;6LvC;qbh5", "pdf_size": 2347566, "rating": "5;5;6;7;8", "confidence": "3;3;2;3;1", "soundness": "3;3;3;4;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;4;4", "wc_summary": "45;83;108;88;122", "wc_strengths": "13;64;51;135;38", "wc_weaknesses": "64;97;219;59;35", "wc_questions": "96;172;287;138;1", "wc_limitations": "1;27;6;13;1", "wc_review": "219;443;671;433;197", "wc_reply_reviewers": "8;25;13;27;0", "wc_reply_authors": "41;19;10;10;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;2;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 2.4, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 89.2, 26.16409753842085 ], "wc_strengths_avg": [ 60.2, 41.023895475685876 ], "wc_weaknesses_avg": [ 94.8, 65.17177303096794 ], "wc_questions_avg": [ 138.8, 93.68756587722834 ], "wc_limitations_avg": [ 9.6, 9.748846085563152 ], "wc_review_avg": [ 392.6, 173.2484920569296 ], "wc_reply_reviewers_avg": [ 14.6, 10.209799214480176 ], "wc_reply_authors_avg": [ 16.0, 13.870832707519762 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.7288689868556626, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4821522234620807281&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 11, "email": ";esat.kuleuven.be;ideas-ncbr.pl;far.ai;kth.se;kth.se;impan.pl;ideas-ncbr.pl", "author_num": 8, "aff_unique_index": "0;1;2;3;3;4;5", "aff_unique_norm": "KU Leuven;IDEAS NCBR;FAR AI;KTH Royal Institute of Technology;Polish Academy of Sciences;Institute for Development, Economic Analysis, and Simulation (IDEAS)", "aff_unique_dep": "Department of Electrical Engineering;;;;Institute of Mathematics;", "aff_unique_url": "https://www.kuleuven.be;;https://www.far.ai;https://www.kth.se;https://www.impan.pl/;https://www.ideas-ncbr.gov.pl", "aff_unique_abbr": "KU Leuven;;FAR AI;KTH;PAS;IDEAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;3;3;1;1", "aff_country_unique": "Belgium;Poland;United States;Sweden" }, { "title": "The Rank-Reduced Kalman Filter: Approximate Dynamical-Low-Rank Filtering In High Dimensions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71005", "id": "dnB71DMyDD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c14d902be45c72833018b2ccfac071e4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dnB71DMyDD", "openreview": "https://openreview.net/forum?id=dnB71DMyDD", "poster": "/media/PosterPDFs/NeurIPS%202023/71005.png?t=1702263066.1156147", "slides": "https://nips.cc/virtual/2023/poster/71005", "video": "https://nips.cc/virtual/2023/poster/71005", "author_site": "Jonathan Schmidt, Philipp Hennig, J\u00f6rg Nick, Filip Tronarp", "tldr": "", "abstract": "Inference and simulation in the context of high-dimensional dynamical systems remain computationally challenging problems.\nSome form of dimensionality reduction is required to make the problem tractable in general.\nIn this paper, we propose a novel approximate Gaussian filtering and smoothing method\nwhich propagates low-rank approximations of the covariance matrices.\nThis is accomplished by projecting the Lyapunov equations associated with the prediction step to a manifold of low-rank matrices,\nwhich are then solved by a recently developed, numerically stable, dynamical low-rank integrator.\nMeanwhile, the update steps are made tractable by noting that the covariance update only transforms the column space of the covariance matrix, which is low-rank by construction.\nThe algorithm differentiates itself from existing ensemble-based approaches in that\nthe low-rank approximations of the covariance matrices are deterministic, rather than stochastic.\nCrucially, this enables the method to reproduce the exact Kalman filter as the low-rank dimension approaches the true dimensionality of the problem.\nOur method reduces computational complexity from cubic (for the Kalman filter) to quadratic in the state-space size in the worst-case, and can achieve linear complexity if the state-space model satisfies certain criteria.\nThrough a set of experiments in classical data-assimilation and spatio-temporal regression, we show that the proposed method consistently outperforms the ensemble-based methods in terms of error in the mean and covariance with respect to the exact Kalman filter. This comes at no additional cost in terms of asymptotic computational complexity.", "keywords": "Gaussian;filtering;smoothing;bayesian;state-space models;dynamic-low-rank;high-dimensional;spatio-temporal;Gaussian processes;regression;low rank;state estimation", "primary_area": "", "supplementary_material": "/attachment/fef8c3f9c442619965c4985819c739530966ddb3.pdf", "author": "Jonathan Schmidt;Philipp Hennig;J\u00f6rg Nick;Filip Tronarp", "authorids": "~Jonathan_Schmidt1;~Philipp_Hennig1;~J\u00f6rg_Nick1;~Filip_Tronarp1", "gender": "M;M;;M", "homepage": "https://schmidtjonathan.github.io/;http://mml.inf.uni-tuebingen.de;https://na.uni-tuebingen.de/~nick;https://filtron.github.io/", "dblp": "63/7128;08/9077;;184/0638", "google_scholar": "https://scholar.google.de/citations?user=vJAxEkcAAAAJ;https://scholar.google.de/citations?user=UeG5w08AAAAJ;;q0rtB0EAAAAJ", "orcid": ";0000-0001-7293-6092;;", "linkedin": "jonathanschmidt96/;;;filip-tronarp-93097065/", "or_profile": "~Jonathan_Schmidt1;~Philipp_Hennig1;~J\u00f6rg_Nick1;~Filip_Tronarp1", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of Tuebingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "position": "PhD student;Full Professor;PhD student;Postdoc", "bibtex": "@inproceedings{\nschmidt2023the,\ntitle={The Rank-Reduced Kalman Filter: Approximate Dynamical-Low-Rank Filtering In High Dimensions},\nauthor={Jonathan Schmidt and Philipp Hennig and J{\\\"o}rg Nick and Filip Tronarp},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dnB71DMyDD}\n}", "github": "", "project": "", "reviewers": "HH4H;dWp4;U69V;e5WD;2SHM", "pdf_size": 5248062, "rating": "4;5;6;7;7", "confidence": "3;4;4;4;4", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;2;4;2;3", "wc_summary": "40;160;65;76;43", "wc_strengths": "5;87;28;249;89", "wc_weaknesses": "37;358;75;542;102", "wc_questions": "160;123;90;75;106", "wc_limitations": "1;1;1;120;12", "wc_review": "243;729;259;1062;352", "wc_reply_reviewers": "533;15;63;114;5", "wc_reply_authors": "379;0;229;0;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "2;1;2;1;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 76.8, 43.72367779590367 ], "wc_strengths_avg": [ 91.6, 85.26101101910533 ], "wc_weaknesses_avg": [ 222.8, 195.51306861690858 ], "wc_questions_avg": [ 110.8, 29.348935244740993 ], "wc_limitations_avg": [ 27.0, 46.69475345260964 ], "wc_review_avg": [ 529.0, 319.41008124353243 ], "wc_reply_reviewers_avg": [ 146.0, 197.34436906078673 ], "wc_reply_authors_avg": [ 121.6, 156.30047984571254 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7717436331412898, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6705406376590198233&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;University of T\u00fcbingen;University of Tuebingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "0;0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "dnEFueMZ43", "title": "Efficient Multi-Task Reinforcement Learning via Selective Behavior Sharing", "track": "main", "status": "Reject", "tldr": "", "abstract": "The ability to leverage shared behaviors between tasks is critical for sample-efficient multi-task reinforcement learning (MTRL). While prior methods have primarily explored parameter and data sharing, direct behavior-sharing has been limited to task families requiring similar behaviors. Our goal is to extend the efficacy of behavior-sharing to more general task families that could require a mix of shareable and conflicting behaviors. Our key insight is an agent's behavior across tasks can be used for mutually beneficial exploration. To this end, we propose a simple MTRL framework for identifying shareable behaviors over tasks and incorporating them to guide exploration. We empirically demonstrate how behavior sharing improves sample efficiency and final performance on manipulation and navigation MTRL tasks and is even complementary to parameter sharing. ", "keywords": "Reinforcement Learning;Multitask Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/286311065e812b603416dd9284c417cdaa085c13.zip", "author": "Grace Zhang;Ayush Jain;Injune Hwang;Shao-Hua Sun;Joseph J Lim", "authorids": "~Grace_Zhang1;~Ayush_Jain2;~Injune_Hwang1;~Shao-Hua_Sun1;~Joseph_J_Lim1", "gender": "F;;M;M;M", "homepage": "https://gracehzhang.github.io/;https://ayushj240.github.io/;;http://shaohua0116.github.io;http://people.csail.mit.edu/lim/", "dblp": "13/2999;131/6283-3.html;260/3346;158/9680;08/3086", "google_scholar": ";-zEc_sAAAAAJ;haW9gXcAAAAJ;uXsfnaQAAAAJ;jTnQTBoAAAAJ", "orcid": ";;;0000-0001-7579-6734;", "linkedin": ";;;shaohua0116/;", "or_profile": "~Grace_Zhang1;~Ayush_Jain2;~Injune_Hwang1;~Shao-Hua_Sun1;~Joseph_J_Lim1", "aff": "University of Southern California;University of Southern California;Korea Advanced Institute of Science & Technology;National Taiwan University;Korea Advanced Institute of Science & Technology", "aff_domain": "usc.edu;usc.edu;kaist.edu;ntu.edu.tw;kaist.ac.kr", "position": "PhD student;PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nzhang2023efficient,\ntitle={Efficient Multi-Task Reinforcement Learning via Selective Behavior Sharing},\nauthor={Grace Zhang and Ayush Jain and Injune Hwang and Shao-Hua Sun and Joseph J Lim},\nyear={2023},\nurl={https://openreview.net/forum?id=dnEFueMZ43}\n}", "github": "", "project": "", "reviewers": "cxcr;npBg;uAJb;xBdd", "site": "https://openreview.net/forum?id=dnEFueMZ43", "pdf_size": 7389950, "rating": "4;4;5;5", "confidence": "4;2;3;4", "soundness": "2;2;3;2", "novelty": "2;2;2;2", "presentation": "3;2;3;3", "wc_summary": "55;47;57;62", "wc_strengths": "24;82;73;60", "wc_weaknesses": "91;128;37;457", "wc_questions": "40;70;28;162", "wc_limitations": "14;10;13;33", "wc_review": "224;337;208;774", "wc_reply_reviewers": "0;4;21;103", "wc_reply_authors": "0;165;0;498", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;2", "rating_avg": [ 4.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 55.25, 5.402545696243577 ], "wc_strengths_avg": [ 59.75, 22.072324299900995 ], "wc_weaknesses_avg": [ 178.25, 164.15750820477265 ], "wc_questions_avg": [ 75.0, 52.507142371300304 ], "wc_limitations_avg": [ 17.5, 9.069178573608527 ], "wc_review_avg": [ 385.75, 229.6044152450035 ], "wc_reply_reviewers_avg": [ 32.0, 41.743262929483606 ], "wc_reply_authors_avg": [ 165.75, 203.3081097743029 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14235104083531205494&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;2;1", "aff_unique_norm": "University of Southern California;Korea Advanced Institute of Science and Technology;National Taiwan University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.usc.edu;https://www.kaist.ac.kr;https://www.ntu.edu.tw", "aff_unique_abbr": "USC;KAIST;NTU", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Los Angeles;;Taiwan", "aff_country_unique_index": "0;0;1;2;1", "aff_country_unique": "United States;South Korea;China" }, { "title": "Fully Dynamic $k$-Clustering in $\\tilde O(k)$ Update Time", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71004", "id": "dnGEPkmnzO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3b7ba46201bf15e5c3935272afae50db-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dnGEPkmnzO", "openreview": "https://openreview.net/forum?id=dnGEPkmnzO", "poster": "/media/PosterPDFs/NeurIPS%202023/71004.png?t=1699356682.5903778", "slides": "https://nips.cc/virtual/2023/poster/71004", "video": "https://nips.cc/virtual/2023/poster/71004", "author_site": "Sayan Bhattacharya, Mart\u00edn Costa, Silvio Lattanzi, Nikos Parotsidis", "tldr": "", "abstract": "We present a $O(1)$-approximate fully dynamic algorithm for the $k$-median and $k$-means problems on metric spaces with amortized update time $\\tilde O(k)$ and worst-case query time $\\tilde O(k^2)$. We complement our theoretical analysis with the first in-depth experimental study for the dynamic $k$-median problem on general metrics, focusing on comparing our dynamic algorithm to the current state-of-the-art by Henzinger and Kale [ESA'20]. Finally, we also provide a lower bound for dynamic $k$-median which shows that any $O(1)$-approximate algorithm with $\\tilde O(\\text{poly}(k))$ query time must have $\\tilde \\Omega(k)$ amortized update time, even in the incremental setting.", "keywords": "clustering;k-median;k-means;dynamic algorithms;amortized analysis", "primary_area": "", "supplementary_material": "/attachment/162b47aeaf4fe93e37badeaa0c7821029f69ff35.pdf", "author": "Sayan Bhattacharya;Martin Costa;Silvio Lattanzi;Nikos Parotsidis", "authorids": "~Sayan_Bhattacharya2;~Martin_Costa1;~Silvio_Lattanzi1;~Nikos_Parotsidis1", "gender": "M;M;M;M", "homepage": "https://www.dcs.warwick.ac.uk/~u1671158/;https://www.martincosta.com/;https://sites.google.com/site/silviolattanzi/;https://sites.google.com/view/nikosparotsidis", "dblp": "57/3907.html;351/0874.html;46/6611;129/9110", "google_scholar": "ca-urkIAAAAJ;8yBjYjgF3OwC;vxUZ4AUAAAAJ;https://scholar.google.gr/citations?user=Txeb6wsAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Sayan_Bhattacharya2;~Martin_Costa1;~Silvio_Lattanzi1;~Nikos_Parotsidis1", "aff": "University of Warwick;University of Warwick;Google;Google", "aff_domain": "warwick.ac.uk;warwick.ac.uk;google.com;google.com", "position": "Associate Professor;PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\nbhattacharya2023fully,\ntitle={Fully Dynamic \\$k\\$-Clustering in \\${\\textbackslash}tilde O(k)\\$ Update Time},\nauthor={Sayan Bhattacharya and Martin Costa and Silvio Lattanzi and Nikos Parotsidis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dnGEPkmnzO}\n}", "github": "", "project": "", "reviewers": "nK4h;f4hU;wnsQ;REqo", "pdf_size": 1150004, "rating": "5;6;6;7", "confidence": "2;3;4;4", "soundness": "3;3;4;4", "novelty": "2;3;3;4", "presentation": "2;3;4;3", "wc_summary": "116;124;121;261", "wc_strengths": "83;35;34;25", "wc_weaknesses": "71;63;23;8", "wc_questions": "47;164;9;39", "wc_limitations": "18;33;1;2", "wc_review": "335;419;188;335", "wc_reply_reviewers": "13;158;0;11", "wc_reply_authors": "0;92;0;0", "reply_reviewers": "1;2;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 155.5, 60.97745485013293 ], "wc_strengths_avg": [ 44.25, 22.708753818736948 ], "wc_weaknesses_avg": [ 41.25, 26.44215384570629 ], "wc_questions_avg": [ 64.75, 59.027006530909226 ], "wc_limitations_avg": [ 13.5, 13.124404748406688 ], "wc_review_avg": [ 319.25, 83.17564246821301 ], "wc_reply_reviewers_avg": [ 45.5, 65.14023334314976 ], "wc_reply_authors_avg": [ 23.0, 39.83716857408418 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "warwick.ac.uk;warwick.ac.uk;google.com;google.com", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of Warwick;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.warwick.ac.uk;https://www.google.com", "aff_unique_abbr": "Warwick;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Hyper-Skin: A Hyperspectral Dataset for Reconstructing Facial Skin-Spectra from RGB Images", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73523", "id": "doV2nhGm1l", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4c0986bd04d747745beba3752bdf4d9d-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=doV2nhGm1l", "openreview": "https://openreview.net/forum?id=doV2nhGm1l", "poster": "/media/PosterPDFs/NeurIPS%202023/73523.png?t=1701531800.0204892", "slides": "https://nips.cc/virtual/2023/poster/73523", "video": "https://nips.cc/virtual/2023/poster/73523", "author_site": "Pai Chet Ng, Zhixiang Chi, Yannick Verdie, Juwei Lu, Konstantinos N Plataniotis", "tldr": "", "abstract": "We introduce Hyper-Skin, a hyperspectral dataset covering wide range of wavelengths from visible (VIS) spectrum (400nm - 700nm) to near-infrared (NIR) spectrum (700nm - 1000nm), uniquely designed to facilitate research on facial skin-spectra reconstruction.\nBy reconstructing skin spectra from RGB images, our dataset enables the study of hyperspectral skin analysis, such as melanin and hemoglobin concentrations, directly on the consumer device. \nOvercoming limitations of existing datasets, Hyper-Skin consists of diverse facial skin data collected with a pushbroom hyperspectral camera. \nWith 330 hyperspectral cubes from 51 subjects, the dataset covers the facial skin from different angles and facial poses.\nEach hyperspectral cube has dimensions of 1024$\\times$1024$\\times$448, resulting in millions of spectra vectors per image. \nThe dataset, carefully curated in adherence to ethical guidelines, includes paired hyperspectral images and synthetic RGB images generated using real camera responses. \nWe demonstrate the efficacy of our dataset by showcasing skin spectra reconstruction using state-of-the-art models on 31 bands of hyperspectral data resampled in the VIS and NIR spectrum. \nThis Hyper-Skin dataset would be a valuable resource to NeurIPS community, encouraging the development of novel algorithms for skin spectral reconstruction while fostering interdisciplinary collaboration in hyperspectral skin analysis related to cosmetology and skin's well-being. \nInstructions to request the data and the related benchmarking codes are publicly available at: https://github.com/hyperspectral-skin/Hyper-Skin-2023.", "keywords": "hyperspectral skin analysis;skin-spectra reconstruction;hyperspectral dataset", "primary_area": "", "supplementary_material": "/attachment/2df39bdfd8cf6738763f6bf4e0f3af47829303c9.zip", "author": "Pai Chet Ng;Zhixiang Chi;Yannick Verdie;Juwei Lu;Konstantinos N Plataniotis", "authorids": "~Pai_Chet_Ng1;~Zhixiang_Chi1;~Yannick_Verdie3;~Juwei_Lu2;~Konstantinos_N_Plataniotis1", "gender": ";M;;M;", "homepage": ";;;http://www.dsp.utoronto.ca/juwei/;", "dblp": ";215/3585;86/7551;06/827;", "google_scholar": ";0s-HzGIAAAAJ;;https://scholar.google.ca/citations?user=Asz24wcAAAAJ;", "orcid": ";;;;", "linkedin": ";zhixiang-chi-51441a8a/;;https://linkedin.com/in/juwei-lu-35642621;", "or_profile": "~Pai_Chet_Ng1;~Zhixiang_Chi1;~Yannick_Verdie3;~Juwei_Lu2;~Konstantinos_N_Plataniotis1", "aff": ";Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;", "aff_domain": ";huawei.com;huawei.com;huawei.com;", "position": ";Computer Vision Researcher;Principal Researcher;Sr Principal Scientist;", "bibtex": "@inproceedings{\nng2023hyperskin,\ntitle={Hyper-Skin: A Hyperspectral Dataset for Reconstructing Facial Skin-Spectra from {RGB} Images},\nauthor={Pai Chet Ng and Zhixiang Chi and Yannick Verdie and Juwei Lu and Konstantinos N Plataniotis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=doV2nhGm1l}\n}", "github": "", "project": "", "reviewers": "5oGQ;cCX8;rZAx;CmSs", "pdf_size": 1220754, "rating": "6;6;7;8", "confidence": "5;3;3;3", "wc_summary_and_contributions": "129;53;165;119", "wc_strengths": "183;33;22;92", "wc_improvement": "350;24;2;101", "wc_limitations": "16;20;80;44", "wc_correctness": "1;26;2;65", "wc_clarity": "1;9;1;7", "wc_relation_to_prior_work": "5;16;2;35", "wc_documentation": "1;5;1;25", "wc_additional_feedback": "1;1;1;1", "wc_review": "687;187;276;489", "wc_reply_reviewers": "296;0;0;12", "wc_reply_authors": "1327;251;572;395", "reply_reviewers": "3;0;0;1", "reply_authors": "4;2;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_and_contributions_avg": [ 116.5, 40.4567670482949 ], "wc_strengths_avg": [ 82.5, 63.83768479511142 ], "wc_improvement_avg": [ 119.25, 138.2016190209073 ], "wc_limitations_avg": [ 40.0, 25.45584412271571 ], "wc_correctness_avg": [ 23.5, 25.96632434519757 ], "wc_clarity_avg": [ 4.5, 3.570714214271425 ], "wc_relation_to_prior_work_avg": [ 14.5, 12.932517156377562 ], "wc_documentation_avg": [ 8.0, 9.9498743710662 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 409.75, 194.07134641672377 ], "wc_reply_reviewers_avg": [ 77.0, 126.53458025377884 ], "wc_reply_authors_avg": [ 636.25, 414.69348620396727 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13773737372730985965&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": ";huawei.com;huawei.com;huawei.com;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Revisit Weakly-Supervised Audio-Visual Video Parsing from the Language Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71003", "id": "doWqIXcRlq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7fbae0a0885d3d688840bd34e4a8a698-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=doWqIXcRlq", "openreview": "https://openreview.net/forum?id=doWqIXcRlq", "poster": "/media/PosterPDFs/NeurIPS%202023/71003.png?t=1702350082.2698824", "slides": "https://nips.cc/virtual/2023/poster/71003", "video": "https://nips.cc/virtual/2023/poster/71003", "author_site": "Yingying Fan, Yu Wu, Bo Du, Yutian Lin", "tldr": "", "abstract": "We focus on the weakly-supervised audio-visual video parsing task (AVVP), which aims to identify and locate all the events in audio/visual modalities. Previous works only concentrate on video-level overall label denoising across modalities, but overlook the segment-level label noise, where adjacent video segments (i.e., 1-second video clips) may contain different events. However, recognizing events on the segment is challenging because its label could be any combination of events that occur in the video. To address this issue, we consider tackling AVVP from the language perspective, since language could freely describe how various events appear in each segment beyond fixed labels. Specifically, we design language prompts to describe all cases of event appearance for each video. Then, the similarity between language prompts and segments is calculated, where the event of the most similar prompt is regarded as the segment-level label. In addition, to deal with the mislabeled segments, we propose to perform dynamic re-weighting on the unreliable segments to adjust their labels. Experiments show that our simple yet effective approach outperforms state-of-the-art methods by a large margin.", "keywords": "Weakly-Supervised Audio-Visual Video Parsing;Language Guided Segment-Level Label Denoising;Dynamic Re-weighting", "primary_area": "", "supplementary_material": "/attachment/dc97a66514f730cbcd8354fe1ac81ff2aab79b89.pdf", "author": "Yingying Fan;Yu Wu;Bo Du;Yutian Lin", "authorids": "~Yingying_Fan2;~Yu_Wu3;~Bo_Du3;~Yutian_Lin2", "gender": "F;M;M;F", "homepage": "https://github.com/fyyCS;https://yu-wu.net;;https://vana77.github.io/", "dblp": ";22/0-11;70/6443-1.html;198/1146", "google_scholar": ";23SZHUwAAAAJ;Shy1gnMAAAAJ;gB6Xq5IAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yingying_Fan2;~Yu_Wu3;~Bo_Du1;~Yutian_Lin1", "aff": "Wuhan University;Wuhan University;Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;whu.edu.cn;whu.edu.cn;whu.edu.cn", "position": "MS student;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nfan2023revisit,\ntitle={Revisit Weakly-Supervised Audio-Visual Video Parsing from the Language Perspective},\nauthor={Yingying Fan and Yu Wu and Bo Du and Yutian Lin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=doWqIXcRlq}\n}", "github": "", "project": "", "reviewers": "Nz2w;ULdQ;Q15u;2DCt", "pdf_size": 8500133, "rating": "5;5;5;7", "confidence": "4;5;4;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;4;3;3", "wc_summary": "56;18;76;96", "wc_strengths": "29;24;36;35", "wc_weaknesses": "167;196;142;553", "wc_questions": "55;28;21;47", "wc_limitations": "69;35;16;15", "wc_review": "376;301;291;746", "wc_reply_reviewers": "96;32;23;216", "wc_reply_authors": "17;23;90;441", "reply_reviewers": "1;1;1;3", "reply_authors": "2;2;3;4", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 61.5, 28.822734082664677 ], "wc_strengths_avg": [ 31.0, 4.847679857416329 ], "wc_weaknesses_avg": [ 264.5, 167.6581343090755 ], "wc_questions_avg": [ 37.75, 13.77270852083932 ], "wc_limitations_avg": [ 33.75, 21.856063231972954 ], "wc_review_avg": [ 428.5, 186.22902566463694 ], "wc_reply_reviewers_avg": [ 91.75, 77.05963599706399 ], "wc_reply_authors_avg": [ 142.75, 174.56284684892142 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1085114083497548364&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "whu.edu.cn;whu.edu.cn;whu.edu.cn;whu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Large-Scale Distributed Learning via Private On-Device LSH", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71002", "id": "dpdbbN7AKr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/34345e243156da67605d4b63d71c8d98-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dpdbbN7AKr", "openreview": "https://openreview.net/forum?id=dpdbbN7AKr", "poster": "/media/PosterPDFs/NeurIPS%202023/71002.png?t=1697326711.425824", "slides": "https://nips.cc/virtual/2023/poster/71002", "video": "https://nips.cc/virtual/2023/poster/71002", "author_site": "Tahseen Rabbani, Marco Bornstein, Furong Huang", "tldr": "", "abstract": "Locality-sensitive hashing (LSH) based frameworks have been used efficiently to select weight vectors in a dense hidden layer with high cosine similarity to an input, enabling dynamic pruning. \n While this type of scheme has been shown to improve computational training efficiency, existing algorithms require repeated randomized projection of the full layer weight, which is impractical for computational- and memory-constrained devices. \n In a distributed setting, deferring LSH analysis to a centralized host is (i) slow if the device cluster is large and (ii) requires access to input data which is forbidden in a federated context. \n Using a new family of hash functions, we develop the first private, personalized, and memory-efficient on-device LSH framework.\nOur framework enables privacy and personalization by allowing each device to generate hash tables, without the help of a central host, using device-specific hashing hyper-parameters (e.g., number of hash tables or hash length).\nHash tables are generated with a compressed set of the full weights, and can be serially generated and discarded if the process is memory-intensive.\nThis allows devices to avoid maintaining (i) the fully-sized model and (ii) large amounts of hash tables in local memory for LSH analysis. We prove several statistical and sensitivity properties of our hash functions, and experimentally demonstrate that our framework is competitive in training large scale recommender networks compared to other LSH frameworks which assume unrestricted on-device capacity.", "keywords": "distributed learning;locality-sensitive hashing;recommender systems;compression", "primary_area": "", "supplementary_material": "/attachment/735fbe56d2559a31974112aedca2d9a44ffa0b9d.zip", "author": "Tahseen Rabbani;Marco Bornstein;Furong Huang", "authorids": "~Tahseen_Rabbani1;~Marco_Bornstein1;~Furong_Huang1", "gender": "M;M;F", "homepage": "https://www.cs.umd.edu/people/trabbani;https://marcobornstein.github.io;https://furong-huang.com", "dblp": "280/2362;332/0431;72/8513", "google_scholar": ";;13yyuCcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Tahseen_Rabbani1;~Marco_Bornstein1;~Furong_Huang1", "aff": "University of Maryland, College Park;Pacific Northwest National Laboratory;University of Maryland", "aff_domain": "umd.edu;pnnl.gov;cs.umd.edu", "position": "PhD student;Intern;Assistant Professor", "bibtex": "@inproceedings{\nrabbani2023largescale,\ntitle={Large-Scale Distributed Learning via Private On-Device {LSH}},\nauthor={Tahseen Rabbani and Marco Bornstein and Furong Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dpdbbN7AKr}\n}", "github": "", "project": "", "reviewers": "Q7Pn;QaZe;7yAa", "pdf_size": 865960, "rating": "3;5;7", "confidence": "4;3;5", "soundness": "2;3;4", "novelty": "1;3;4", "presentation": "2;4;4", "wc_summary": "304;121;67", "wc_strengths": "22;36;86", "wc_weaknesses": "3;137;1", "wc_questions": "451;2;16", "wc_limitations": "1;1;6", "wc_review": "781;297;176", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 5.0, 1.632993161855452 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 1.247219128924647 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 164.0, 101.41991914806479 ], "wc_strengths_avg": [ 48.0, 27.47119703738202 ], "wc_weaknesses_avg": [ 47.0, 63.64484791926733 ], "wc_questions_avg": [ 156.33333333333334, 208.43917311505743 ], "wc_limitations_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_review_avg": [ 418.0, 261.3898748357837 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17873211080811198709&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "umd.edu;pnnl.gov;cs.umd.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Maryland;Pacific Northwest National Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www/umd.edu;https://www.pnnl.gov", "aff_unique_abbr": "UMD;PNNL", "aff_campus_unique_index": "0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The Memory-Perturbation Equation: Understanding Model's Sensitivity to Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71001", "id": "dqS1GuoG2V", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/550ab405d0addd3de5b70e57b44878df-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dqS1GuoG2V", "openreview": "https://openreview.net/forum?id=dqS1GuoG2V", "poster": "/media/PosterPDFs/NeurIPS%202023/71001.png?t=1702094135.6306264", "slides": "https://nips.cc/virtual/2023/poster/71001", "video": "https://nips.cc/virtual/2023/poster/71001", "author_site": "Peter Nickl, Lu Xu, Dharmesh Tailor, Thomas M\u00f6llenhoff, Mohammad Emtiyaz Khan", "tldr": "", "abstract": "Understanding model\u2019s sensitivity to its training data is crucial but can also be challenging and costly, especially during training. To simplify such issues, we present the Memory-Perturbation Equation (MPE) which relates model's sensitivity to perturbation in its training data. Derived using Bayesian principles, the MPE unifies existing sensitivity measures, generalizes them to a wide-variety of models and algorithms, and unravels useful properties regarding sensitivities. Our empirical results show that sensitivity estimates obtained during training can be used to faithfully predict generalization on unseen test data. The proposed equation is expected to be useful for future research on robust and adaptive learning.", "keywords": "model interpretability;model understanding;bayesian learning;robustness;adaptive learning", "primary_area": "", "supplementary_material": "", "author": "Peter Nickl;Lu Xu;Dharmesh Tailor;Thomas M\u00f6llenhoff;Mohammad Emtiyaz Khan", "authorids": "~Peter_Nickl1;~Lu_Xu4;~Dharmesh_Tailor1;~Thomas_M\u00f6llenhoff1;~Mohammad_Emtiyaz_Khan1", "gender": "M;F;;M;M", "homepage": "https://pnickl.github.io;https://x-lu.github.io/;http://dvtailor.github.io;http://www.thomasmoellenhoff.net;https://emtiyaz.github.io/", "dblp": "278/2984;;215/3637;;58/10432", "google_scholar": "FTVom6gAAAAJ;;https://scholar.google.co.jp/citations?user=boyVlJgAAAAJ;KAqmeqAAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0006-9534-3955;;;;", "linkedin": "peter-nickl-a7a403160/?originalSubdomain=jp;;;;", "or_profile": "~Peter_Nickl1;~Lu_Xu4;~Dharmesh_Tailor1;~Thomas_M\u00f6llenhoff1;~Mohammad_Emtiyaz_Khan1", "aff": "RIKEN Center for Advanced Intelligence Project;RIKEN;University of Amsterdam;RIKEN Center for Advanced Intelligence Project (AIP);RIKEN Center for AI Project", "aff_domain": "riken.jp;riken.jp;uva.nl;riken.jp;riken.jp", "position": "Researcher;Postdoc;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nnickl2023the,\ntitle={The Memory-Perturbation Equation: Understanding Model's Sensitivity to Data},\nauthor={Peter Nickl and Lu Xu and Dharmesh Tailor and Thomas M{\\\"o}llenhoff and Mohammad Emtiyaz Khan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dqS1GuoG2V}\n}", "github": "", "project": "", "reviewers": "fdmt;YYDA;KnYf;WQKj;HWf4", "pdf_size": 3777760, "rating": "6;6;6;6;7", "confidence": "2;3;4;2;2", "soundness": "3;3;3;3;3", "novelty": "3;3;4;3;3", "presentation": "2;3;2;3;2", "wc_summary": "96;155;50;165;79", "wc_strengths": "64;72;84;70;20", "wc_weaknesses": "94;194;225;34;75", "wc_questions": "350;49;1;39;403", "wc_limitations": "7;14;15;18;12", "wc_review": "611;484;375;326;589", "wc_reply_reviewers": "50;133;0;12;144", "wc_reply_authors": "17;171;0;0;179", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;1;2", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 2.6, 0.8 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 109.0, 44.276404551408646 ], "wc_strengths_avg": [ 62.0, 21.98181066245454 ], "wc_weaknesses_avg": [ 124.4, 72.80274720091269 ], "wc_questions_avg": [ 168.4, 171.48714237516467 ], "wc_limitations_avg": [ 13.2, 3.655133376499413 ], "wc_review_avg": [ 477.0, 112.91944031033806 ], "wc_reply_reviewers_avg": [ 67.8, 60.14116726502737 ], "wc_reply_authors_avg": [ 73.4, 83.22643810713035 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.37500000000000006, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11166982107741155470&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "riken.jp;riken.jp;uva.nl;riken.jp;riken.jp", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "RIKEN;University of Amsterdam", "aff_unique_dep": "Center for Advanced Intelligence Project;", "aff_unique_url": "https://www.riken.jp/en/;https://www.uva.nl", "aff_unique_abbr": "RIKEN;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Japan;Netherlands" }, { "title": "Counterfactual-Augmented Importance Sampling for Semi-Offline Policy Evaluation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/71000", "id": "dsH244r9fA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/25b15618c98ff0c4655df0c5a277e1c6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dsH244r9fA", "openreview": "https://openreview.net/forum?id=dsH244r9fA", "poster": "/media/PosterPDFs/NeurIPS%202023/71000.png?t=1701898675.6085393", "slides": "https://nips.cc/virtual/2023/poster/71000", "video": "https://nips.cc/virtual/2023/poster/71000", "author_site": "Shengpu Tang, Jenna Wiens", "tldr": "", "abstract": "In applying reinforcement learning (RL) to high-stakes domains, quantitative and qualitative evaluation using observational data can help practitioners understand the generalization performance of new policies. However, this type of off-policy evaluation (OPE) is inherently limited since offline data may not reflect the distribution shifts resulting from the application of new policies. On the other hand, online evaluation by collecting rollouts according to the new policy is often infeasible, as deploying new policies in these domains can be unsafe. In this work, we propose a semi-offline evaluation framework as an intermediate step between offline and online evaluation, where human users provide annotations of unobserved counterfactual trajectories. While tempting to simply augment existing data with such annotations, we show that this naive approach can lead to biased results. Instead, we design a new family of OPE estimators based on importance sampling (IS) and a novel weighting scheme that incorporate counterfactual annotations without introducing additional bias. We analyze the theoretical properties of our approach, showing its potential to reduce both bias and variance compared to standard IS estimators. Our analyses reveal important practical considerations for handling biased, noisy, or missing annotations. In a series of proof-of-concept experiments involving bandits and a healthcare-inspired simulator, we demonstrate that our approach outperforms purely offline IS estimators and is robust to imperfect annotations. Our framework, combined with principled human-centered design of annotation solicitation, can enable the application of RL in high-stakes domains.", "keywords": "healthcare;reinforcement learning;offline RL;off-policy evaluation;counterfactuals", "primary_area": "", "supplementary_material": "", "author": "Shengpu Tang;Jenna Wiens", "authorids": "~Shengpu_Tang1;~Jenna_Wiens1", "gender": "M;F", "homepage": "https://shengpu-tang.me/;http://www-personal.umich.edu/~wiensj/", "dblp": "242/8881;63/10451", "google_scholar": "a_z5a5wAAAAJ;fvEfKxkAAAAJ", "orcid": "0000-0002-4213-2015;0000-0002-1057-7722", "linkedin": "shengpu-tang/;", "or_profile": "~Shengpu_Tang1;~Jenna_Wiens1", "aff": "University of Michigan - Ann Arbor;University of Michigan Ann Arbor", "aff_domain": "umich.edu;umich.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\ntang2023counterfactualaugmented,\ntitle={Counterfactual-Augmented Importance Sampling for Semi-Offline Policy Evaluation},\nauthor={Shengpu Tang and Jenna Wiens},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dsH244r9fA}\n}", "github": "", "project": "", "reviewers": "Qcjd;fzzo;noPk;71Lt;rPY7", "pdf_size": 962201, "rating": "4;5;6;6;7", "confidence": "3;4;4;4;4", "soundness": "3;2;4;3;3", "novelty": "2;2;4;2;3", "presentation": "2;4;4;3;4", "wc_summary": "54;38;100;176;185", "wc_strengths": "31;64;77;60;32", "wc_weaknesses": "165;162;264;300;84", "wc_questions": "33;90;63;2;216", "wc_limitations": "8;15;8;6;3", "wc_review": "291;369;512;544;520", "wc_reply_reviewers": "10;346;123;66;101", "wc_reply_authors": "60;1736;92;481;221", "reply_reviewers": "1;2;1;1;2", "reply_authors": "2;4;2;2;3", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 110.6, 60.661684777130944 ], "wc_strengths_avg": [ 52.8, 18.280043763623762 ], "wc_weaknesses_avg": [ 195.0, 77.58350340117414 ], "wc_questions_avg": [ 80.8, 73.72218119399344 ], "wc_limitations_avg": [ 8.0, 3.9496835316262997 ], "wc_review_avg": [ 447.2, 99.3808834736339 ], "wc_reply_reviewers_avg": [ 129.2, 114.92501903415113 ], "wc_reply_authors_avg": [ 518.0, 626.7921505571046 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7844645405527362, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6221914793353774389&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "umich.edu;umich.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Actively Testing Your Model While It Learns: Realizing Label-Efficient Learning in Practice", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70999", "id": "du0hvEpgj8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/63ef323523f3be8b58ed9277cc747485-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=du0hvEpgj8", "openreview": "https://openreview.net/forum?id=du0hvEpgj8", "poster": "/media/PosterPDFs/NeurIPS%202023/70999.png?t=1701461946.912969", "slides": "https://nips.cc/virtual/2023/poster/70999", "video": "https://nips.cc/virtual/2023/poster/70999", "author_site": "Dayou Yu, Weishi Shi, Qi Yu", "tldr": "", "abstract": "In active learning (AL), we focus on reducing the data annotation cost from the model training perspective. However, \"testing'', which often refers to the model evaluation process of using empirical risk to estimate the intractable true generalization risk, also requires data annotations. The annotation cost for \"testing'' (model evaluation) is under-explored. Even in works that study active model evaluation or active testing (AT), the learning and testing ends are disconnected. In this paper, we propose a novel active testing while learning (ATL) framework that integrates active learning with active testing. ATL provides an unbiased sample-efficient estimation of the model risk during active learning. It leverages test samples annotated from different periods of a dynamic active learning process to achieve fair model evaluations based on a theoretically guaranteed optimal integration of different test samples. Periodic testing also enables effective early-stopping to further save the total annotation cost. ATL further integrates an \"active feedback'' mechanism, which is inspired by human learning, where the teacher (active tester) provides immediate guidance given by the prior performance of the student (active learner). Our theoretical result reveals that active feedback maintains the label complexity of the integrated learning-testing objective, while improving the model's generalization capability. We study the realistic setting where we maximize the performance gain from choosing \"testing'' samples for feedback without sacrificing the risk estimation accuracy. An agnostic-style analysis and empirical evaluations on real-world datasets demonstrate that the ATL framework can effectively improve the annotation efficiency of both active learning and evaluation tasks.", "keywords": "active learning;active testing", "primary_area": "", "supplementary_material": "/attachment/9d0b5c9c8f292342ed0cfe00e636752e09ee2614.pdf", "author": "Dayou Yu;Weishi Shi;Qi Yu", "authorids": "~Dayou_Yu1;~Weishi_Shi2;~Qi_Yu1", "gender": ";M;M", "homepage": "https://people.rit.edu/~dy2507/;http://www.linkedin.com/in/weishi-shi-9b5b89b4;https://www.rit.edu/mining/", "dblp": "319/4611;202/1055;58/6957-1", "google_scholar": "Obh2NOwAAAAJ;;L3gWdfEAAAAJ", "orcid": "0009-0002-2373-4907;;0000-0002-0426-5407", "linkedin": ";;", "or_profile": "~Dayou_Yu1;~Weishi_Shi2;~Qi_Yu1", "aff": "Rochester Institute of Technology;University of North Texas;Rochester Institute of Technology", "aff_domain": "rit.edu;unt.edu;rit.edu", "position": "PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\nyu2023actively,\ntitle={Actively Testing Your Model While It Learns: Realizing Label-Efficient Learning in Practice},\nauthor={Dayou Yu and Weishi Shi and Qi Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=du0hvEpgj8}\n}", "github": "", "project": "", "reviewers": "5t4X;2XK2;bQCr;LqNc", "pdf_size": 2551104, "rating": "5;6;6;6", "confidence": "4;3;3;3", "soundness": "3;2;3;2", "novelty": "2;4;3;3", "presentation": "3;4;3;2", "wc_summary": "72;67;124;71", "wc_strengths": "24;99;94;19", "wc_weaknesses": "43;99;328;231", "wc_questions": "88;14;6;158", "wc_limitations": "10;1;110;1", "wc_review": "237;280;662;480", "wc_reply_reviewers": "21;0;11;81", "wc_reply_authors": "109;74;111;148", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;3;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 83.5, 23.4574082114798 ], "wc_strengths_avg": [ 59.0, 37.58324094593227 ], "wc_weaknesses_avg": [ 175.25, 111.51765555283163 ], "wc_questions_avg": [ 66.5, 61.747469583781324 ], "wc_limitations_avg": [ 30.5, 46.04617247937118 ], "wc_review_avg": [ 414.75, 169.66345363689848 ], "wc_reply_reviewers_avg": [ 28.25, 31.34784681600955 ], "wc_reply_authors_avg": [ 110.5, 26.177280225416848 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16336826563433088781&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "rit.edu;unt.edu;rit.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Rochester Institute of Technology;University of North Texas", "aff_unique_dep": ";", "aff_unique_url": "https://www.rit.edu;https://www.unt.edu", "aff_unique_abbr": "RIT;UNT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "dw6xO1Nbk5", "title": "Generalization in Neural Operator: Irregular Domains, Orthogonal Basis, and Super-Resolution", "track": "main", "status": "Reject", "tldr": "", "abstract": "Neural operators (NOs) have become popular for learning partial differential equation (PDE) operators. As a mapping between infinite-dimensional function spaces, each layer of NO contains a kernel operator and a linear transform, followed by nonlinear activation. NO can accurately simulate the operator and conduct super-resolution, i.e., train and test on grids with different resolutions. Despite its success, NO's design of kernel operator, choice of grids, the capability of generalization and super-resolution, and applicability to general problems on irregular domains are poorly understood.\nTo this end, we systematically analyze NOs from a unified perspective, considering the orthogonal bases in their kernel operators. This analysis facilitates a better understanding and enhancement of NOs in the following:\n(1) Generalization bounds of NOs,\n(2) Construction of NOs on arbitrary domains,\n(3) Enhancement of NOs' performance by designing proper orthogonal bases that align with the operator and domain,\n(4) Improvement of NOs' through the allocation of suitable grids, and\n(5) Investigation of super-resolution error.\nOur theory has multiple implications in practice: choosing the orthogonal basis and grid points to accelerate training, improving the generalization and super-resolution capabilities, and adapting NO to irregular domains.\nCorresponding experiments are conducted to verify our theory. Our paper provides a new perspective for studying NOs.", "keywords": "Deep Learning;AI for Science;Neural Operator;Partial Differential Equation", "primary_area": "", "supplementary_material": "/attachment/d4e3814d3ba842393a41011ec839abe5cf7d8721.pdf", "author": "Zheyuan Hu;Zhongkai Hao;Tianbo Li;Zekun Shi;Kenji Kawaguchi;Min Lin", "authorids": "~Zheyuan_Hu1;~Zhongkai_Hao1;~Tianbo_Li1;~Zekun_Shi3;~Kenji_Kawaguchi1;~Min_Lin1", "gender": "M;M;;M;M;M", "homepage": ";;https://ml.comp.nus.edu.sg/#members;https://linmin.me;https://zekun-shi.github.io/;https://haozhongkai.github.io/", "dblp": "270/0713;153/7013;;;;270/0220.html", "google_scholar": "On2YFigAAAAJ;;aLl3rYoAAAAJ;BGONmkIAAAAJ;X9vcv1oAAAAJ;dfSzq27ZiVoC", "orcid": ";;;;;", "linkedin": ";;;min-lin-08a3a422/;;", "or_profile": "~Zheyuan_Hu1;~Tianbo_Li1;~Kenji_Kawaguchi1;~Min_Lin1;~ZEKUN_SHI2;~Hao_Zhongkai1", "aff": "National University of Singapore;Sea AI Lab;National University of Singapore;Sea AI Lab;Sea AI Lab;Tsinghua University", "aff_domain": "nus.edu.sg;sea.com;nus.edu;sea.com;sea.com;mails.tsinghua.edu.cn", "position": "PhD student;Researcher;Presidential Young Professor;Principal Researcher;Researcher;PhD student", "bibtex": "@misc{\nhu2023generalization,\ntitle={Generalization in Neural Operator: Irregular Domains, Orthogonal Basis, and Super-Resolution},\nauthor={Zheyuan Hu and Zhongkai Hao and Tianbo Li and Zekun Shi and Kenji Kawaguchi and Min Lin},\nyear={2023},\nurl={https://openreview.net/forum?id=dw6xO1Nbk5}\n}", "github": "", "project": "", "reviewers": "59nv;2NvK;PCWZ;8nxh;xMYc;XCTm", "site": "https://openreview.net/forum?id=dw6xO1Nbk5", "pdf_size": 425426, "rating": "4;4;5;5;5;7", "confidence": "3;4;3;2;4;2", "soundness": "3;4;3;3;3;3", "novelty": "2;4;2;3;3;3", "presentation": "3;4;3;3;2;3", "wc_summary": "78;122;87;41;87;28", "wc_strengths": "42;104;36;19;81;65", "wc_weaknesses": "199;161;86;84;116;46", "wc_questions": "111;169;6;1;73;22", "wc_limitations": "7;75;8;28;35;4", "wc_review": "437;631;223;173;392;165", "wc_reply_reviewers": "184;200;34;24;91;67", "wc_reply_authors": "1270;701;0;0;0;323", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "4;2;1;1;1;2", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.6871842709362768 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 73.83333333333333, 31.259220861833537 ], "wc_strengths_avg": [ 57.833333333333336, 28.736832733541732 ], "wc_weaknesses_avg": [ 115.33333333333333, 51.21089293847116 ], "wc_questions_avg": [ 63.666666666666664, 61.11646441199149 ], "wc_limitations_avg": [ 26.166666666666668, 24.680739228979526 ], "wc_review_avg": [ 336.8333333333333, 167.797910859727 ], "wc_reply_reviewers_avg": [ 100.0, 68.74833331313081 ], "wc_reply_authors_avg": [ 382.3333333333333, 471.09046076334664 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.8333333333333333, 1.0671873729054748 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6123724356957945, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iiMUCkWhxB4J:scholar.google.com/&scioq=Generalization+in+Neural+Operator:+Irregular+Domains,+Orthogonal+Basis,+and+Super-Resolution&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;1;1;2", "aff_unique_norm": "National University of Singapore;Sea AI Lab;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;;https://www.tsinghua.edu.cn", "aff_unique_abbr": "NUS;;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;2", "aff_country_unique": "Singapore;;China" }, { "title": "SmoothHess: ReLU Network Feature Interactions via Stein's Lemma", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70998", "id": "dwIeEhbaD0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9ef5e965720193681fc8d16372ac4717-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dwIeEhbaD0", "openreview": "https://openreview.net/forum?id=dwIeEhbaD0", "poster": "/media/PosterPDFs/NeurIPS%202023/70998.png?t=1702167195.843354", "slides": "https://nips.cc/virtual/2023/poster/70998", "video": "https://nips.cc/virtual/2023/poster/70998", "author_site": "Max Torop, Aria Masoomi, Davin Hill, Kivanc Kose, Stratis Ioannidis, Jennifer Dy", "tldr": "", "abstract": "Several recent methods for interpretability model feature interactions by looking at the Hessian of a neural network. This poses a challenge for ReLU networks, which are piecewise-linear and thus have a zero Hessian almost everywhere. We propose SmoothHess, a method of estimating second-order interactions through Stein's Lemma. In particular, we estimate the Hessian of the network convolved with a Gaussian through an efficient sampling algorithm, requiring only network gradient calls. SmoothHess is applied post-hoc, requires no modifications to the ReLU network architecture, and the extent of smoothing can be controlled explicitly. We provide a non-asymptotic bound on the sample complexity of our estimation procedure. We validate the superior ability of SmoothHess to capture interactions on benchmark datasets and a real-world medical spirometry dataset.", "keywords": "Interpretability;Feature Interactions;Stein's Lemma", "primary_area": "", "supplementary_material": "/attachment/d226c01e12214a65f07d24da1444dc46bb4a18df.zip", "author": "Max Torop;Aria Masoomi;Davin Hill;Kivanc Kose;Stratis Ioannidis;Jennifer Dy", "authorids": "~Max_Torop1;~Aria_Masoomi1;~Davin_Hill1;~Kivanc_Kose1;~Stratis_Ioannidis1;~Jennifer_Dy1", "gender": "M;M;;M;M;", "homepage": "https://maxtorop.github.io/;;;http://kkose.github.io/about/;https://ece.northeastern.edu/fac-ece/ioannidis/;https://mllabneu.github.io/", "dblp": "305/7085;242/9324;;54/8026;42/6940;24/6000", "google_scholar": "NjhrmBEAAAAJ;KXcX8coAAAAJ;;BAQNDLAAAAAJ;GPIB5kUAAAAJ;6h7b0fAAAAAJ", "orcid": ";;;0000-0003-3185-2639;0000-0001-8355-4751;", "linkedin": "max-torop-048ab4a9/;aria-masoomi-779a02232;;kivanc-kose-8aa383a1/;stratis-ioannidis-87b826110;", "or_profile": "~Max_Torop1;~Aria_Masoomi1;~Davin_Hill1;~Kivanc_Kose1;~Stratis_Ioannidis1;~Jennifer_Dy1", "aff": "Northeastern University;Northeastern University;;Memorial Sloan Kettering Cancer Centre;Northeastern University;Northeastern University", "aff_domain": "northeastern.edu;northeastern.edu;;mskcc.org;northeastern.edu;northeastern.edu", "position": "PhD student;PhD student;;Research Scientist;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ntorop2023smoothhess,\ntitle={SmoothHess: Re{LU} Network Feature Interactions via Stein's Lemma},\nauthor={Max Torop and Aria Masoomi and Davin Hill and Kivanc Kose and Stratis Ioannidis and Jennifer Dy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dwIeEhbaD0}\n}", "github": "", "project": "", "reviewers": "HbQW;GnHq;FahQ;bZYQ;3HE6;yoRr", "pdf_size": 4243917, "rating": "6;6;6;7;7;8", "confidence": "3;2;2;3;4;4", "soundness": "3;3;3;3;4;3", "novelty": "3;2;3;3;3;4", "presentation": "4;2;2;3;3;4", "wc_summary": "159;53;152;218;76;190", "wc_strengths": "80;120;32;98;51;88", "wc_weaknesses": "215;86;468;43;115;172", "wc_questions": "83;26;219;98;18;103", "wc_limitations": "27;2;18;85;18;7", "wc_review": "564;287;889;542;278;560", "wc_reply_reviewers": "40;34;20;11;0;0", "wc_reply_authors": "0;0;845;0;0;0", "reply_reviewers": "1;1;1;1;0;0", "reply_authors": "1;1;3;1;1;1", "rating_avg": [ 6.666666666666667, 0.7453559924999299 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 141.33333333333334, 58.79814811898605 ], "wc_strengths_avg": [ 78.16666666666667, 29.19141806916699 ], "wc_weaknesses_avg": [ 183.16666666666666, 139.04605551958514 ], "wc_questions_avg": [ 91.16666666666667, 66.07424275431052 ], "wc_limitations_avg": [ 26.166666666666668, 27.528268299251145 ], "wc_review_avg": [ 520.0, 205.35903518796863 ], "wc_reply_reviewers_avg": [ 17.5, 15.489243579551154 ], "wc_reply_authors_avg": [ 140.83333333333334, 314.91290683122037 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.74535599249993 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8215838362577493, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5458190100499711064&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "northeastern.edu;northeastern.edu;;mskcc.org;northeastern.edu;northeastern.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Northeastern University;Memorial Sloan Kettering Cancer Center", "aff_unique_dep": ";", "aff_unique_url": "https://www.northeastern.edu;https://www.mskcc.org", "aff_unique_abbr": "NEU;MSKCC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Deep Patch Visual Odometry", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70997", "id": "dwfHbm8g66", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7ac484b0f1a1719ad5be9aa8c8455fbb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dwfHbm8g66", "openreview": "https://openreview.net/forum?id=dwfHbm8g66", "poster": "/media/PosterPDFs/NeurIPS%202023/70997.png?t=1702167392.387421", "slides": "https://nips.cc/virtual/2023/poster/70997", "video": "https://nips.cc/virtual/2023/poster/70997", "author_site": "Zachary Teed, Lahav Lipson, Jia Deng", "tldr": "", "abstract": "We propose Deep Patch Visual Odometry (DPVO), a new deep learning system for monocular Visual Odometry (VO). DPVO uses a novel recurrent network architecture designed for tracking image patches across time. Recent approaches to VO have significantly improved the state-of-the-art accuracy by using deep networks to predict dense flow between video frames. However, using dense flow incurs a large computational cost, making these previous methods impractical for many use cases. Despite this, it has been assumed that dense flow is important as it provides additional redundancy against incorrect matches. DPVO disproves this assumption, showing that it is possible to get the best accuracy and efficiency by exploiting the advantages of sparse patch-based matching over dense flow. DPVO introduces a novel recurrent update operator for patch based correspondence coupled with differentiable bundle adjustment. On Standard benchmarks, DPVO outperforms all prior work, including the learning-based state-of-the-art VO-system (DROID) using a third of the memory while running 3x faster on average. Code is available at https://github.com/princeton-vl/DPVO", "keywords": "SLAM;Simultaneous Localization and Mapping;Visual Odometry;Structure from motion;SfM", "primary_area": "", "supplementary_material": "/attachment/f214ea22c2166392a9599273f2736603ddcf1033.zip", "author": "Zachary Teed;Lahav Lipson;Jia Deng", "authorids": "~Zachary_Teed1;~Lahav_Lipson1;~Jia_Deng1", "gender": ";M;M", "homepage": "https://zachteed.github.io/;https://www.lahavlipson.com;", "dblp": ";302/0769;07/6526-1.html", "google_scholar": ";;U3Eub-EAAAAJ", "orcid": ";;", "linkedin": ";lahav-lipson-076493113/;", "or_profile": "~Zachary_Teed1;~Lahav_Lipson1;~Jia_Deng1", "aff": "Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nteed2023deep,\ntitle={Deep Patch Visual Odometry},\nauthor={Zachary Teed and Lahav Lipson and Jia Deng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dwfHbm8g66}\n}", "github": "", "project": "", "reviewers": "5bXL;kzhF;Uwmu;5ejm", "pdf_size": 2598437, "rating": "3;5;5;6", "confidence": "5;3;4;3", "soundness": "2;3;3;3", "novelty": "2;2;2;2", "presentation": "3;2;4;2", "wc_summary": "68;65;117;134", "wc_strengths": "81;36;89;39", "wc_weaknesses": "223;125;199;154", "wc_questions": "158;3;182;166", "wc_limitations": "1;26;37;1", "wc_review": "531;255;624;494", "wc_reply_reviewers": "114;10;206;53", "wc_reply_authors": "206;0;298;205", "reply_reviewers": "1;1;2;1", "reply_authors": "2;1;2;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 96.0, 30.124740662784138 ], "wc_strengths_avg": [ 61.25, 23.94133454926855 ], "wc_weaknesses_avg": [ 175.25, 38.14691992808856 ], "wc_questions_avg": [ 127.25, 72.2543251300571 ], "wc_limitations_avg": [ 16.25, 15.738090735537142 ], "wc_review_avg": [ 476.0, 136.1010653889234 ], "wc_reply_reviewers_avg": [ 95.75, 73.60154550007765 ], "wc_reply_authors_avg": [ 177.25, 109.08110514658348 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.899228803025897, "gs_citation": 138, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3193612104146573038&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "princeton.edu;princeton.edu;princeton.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Few-shot Generation via Recalling Brain-Inspired Episodic-Semantic Memory", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70996", "id": "dxPcdEeQk9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/17826a22eb8b58494dfdfca61e772c39-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dxPcdEeQk9", "openreview": "https://openreview.net/forum?id=dxPcdEeQk9", "poster": "/media/PosterPDFs/NeurIPS%202023/70996.png?t=1699598931.0794475", "slides": "https://nips.cc/virtual/2023/poster/70996", "video": "https://nips.cc/virtual/2023/poster/70996", "author_site": "Zhibin Duan, Zhiyi Lv, Chaojie Wang, Bo Chen, Bo An, Mingyuan Zhou", "tldr": "", "abstract": "Aimed at adapting a generative model to a novel generation task with only a few given data samples, the capability of few-shot generation is crucial for many real-world applications with limited data, \\emph{e.g.}, artistic domains.\nInstead of training from scratch, recent works tend to leverage the prior knowledge stored in previous datasets, which is quite similar to the memory mechanism of human intelligence, but few of these works directly imitate the memory-recall mechanism that humans make good use of in accomplishing creative tasks, \\emph{e.g.}, painting and writing.\nInspired by the memory mechanism of human brain, in this work, we carefully design a variational structured memory module (VSM), which can simultaneously store both episodic and semantic memories to assist existing generative models efficiently recall these memories during sample generation.\nMeanwhile, we introduce a bionic memory updating strategy for the conversion between episodic and semantic memories, which can also model the uncertainty during conversion.\nThen, we combine the developed VSM with various generative models under the Bayesian framework, and evaluate these memory-augmented generative models with few-shot generation tasks, demonstrating the effectiveness of our methods.", "keywords": "Generative Model;Memory-augmented Generative Model", "primary_area": "", "supplementary_material": "", "author": "Zhibin Duan;Lv Zhiyi;Chaojie Wang;Bo Chen;Bo An;Mingyuan Zhou", "authorids": "~Zhibin_Duan1;~Lv_Zhiyi1;~Chaojie_Wang1;~Bo_Chen1;~Bo_An2;~Mingyuan_Zhou1", "gender": "M;M;M;M;M;M", "homepage": ";;https://chaojiewang94.github.io/;http://web.xidian.edu.cn/bchen/en/index.html;https://personal.ntu.edu.sg/boan/;http://mingyuanzhou.github.io", "dblp": "268/2560;;134/9314-1;89/5615-1;42/6178-1.html;", "google_scholar": "https://scholar.google.com.hk/citations?user=bITyHaEAAAAJ;https://scholar.google.com.hk/citations?user=OJBfthMAAAAJ;https://scholar.google.com/citations?hl=en;;PEEpuNwAAAAJ;LXwCIisAAAAJ", "orcid": ";;;0000-0001-5151-9388;0000-0002-7064-7438;", "linkedin": ";;;;;", "or_profile": "~Zhibin_Duan1;~Lv_Zhiyi1;~Chaojie_Wang1;~Bo_Chen1;~Bo_An2;~Mingyuan_Zhou1", "aff": "Xidian University;;Nanyang Technological University;Xidian University;Nanyang Technological University;Google", "aff_domain": "xidian.edu;;ntu.edu;xidian.edu.cn;ntu.edu.sg;google.com", "position": "PhD student;;Researcher;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nduan2023fewshot,\ntitle={Few-shot Generation via Recalling Brain-Inspired Episodic-Semantic Memory},\nauthor={Zhibin Duan and Lv Zhiyi and Chaojie Wang and Bo Chen and Bo An and Mingyuan Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dxPcdEeQk9}\n}", "github": "", "project": "", "reviewers": "cAj4;xiFB;zaLT;aZnV;jSnX", "pdf_size": 5240611, "rating": "6;6;6;7;7", "confidence": "3;2;3;3;4", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;2;2;1;3", "wc_summary": "63;129;135;59;69", "wc_strengths": "67;36;70;32;167", "wc_weaknesses": "78;66;381;90;175", "wc_questions": "81;197;5;209;90", "wc_limitations": "36;7;23;8;1", "wc_review": "325;435;614;398;502", "wc_reply_reviewers": "46;60;85;32;36", "wc_reply_authors": "9;210;9;0;17", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;3;2;1;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 91.0, 33.68085509603342 ], "wc_strengths_avg": [ 74.4, 48.82868009684472 ], "wc_weaknesses_avg": [ 158.0, 117.90335024926136 ], "wc_questions_avg": [ 116.4, 76.72183522309669 ], "wc_limitations_avg": [ 15.0, 12.759310326189265 ], "wc_review_avg": [ 454.8, 98.01918179621782 ], "wc_reply_reviewers_avg": [ 51.8, 19.20833152566875 ], "wc_reply_authors_avg": [ 49.0, 80.67961328613319 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6454972243679027, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13687994765058124520&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "xidian.edu;;ntu.edu;xidian.edu.cn;ntu.edu.sg;google.com", "author_num": 6, "aff_unique_index": "0;1;0;1;2", "aff_unique_norm": "Xidian University;Nanyang Technological University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.ntu.edu.sg;https://www.google.com", "aff_unique_abbr": "Xidian;NTU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;1;2", "aff_country_unique": "China;Singapore;United States" }, { "title": "Equivariant Single View Pose Prediction Via Induced and Restriction Representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70995", "id": "dxVN2fZjx6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/93b3d975f9a2448964a906199db98a9d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dxVN2fZjx6", "openreview": "https://openreview.net/forum?id=dxVN2fZjx6", "poster": "/media/PosterPDFs/NeurIPS%202023/70995.png?t=1697348884.5832915", "slides": "https://nips.cc/virtual/2023/poster/70995", "video": "https://nips.cc/virtual/2023/poster/70995", "author_site": "Owen Howell, David Klee, Ondrej Biza, Linfeng Zhao, Robin Walters", "tldr": "", "abstract": "Learning about the three-dimensional world from two-dimensional images is a fundamental problem in computer vision. An ideal neural network architecture for such tasks would leverage the fact that objects can be rotated and translated in three dimensions to make predictions about novel images. However, imposing $SO(3)$-equivariance on two-dimensional inputs is difficult because the group of three-dimensional rotations does not have a natural action on the two-dimensional plane. Specifically, it is possible that an element of $SO(3)$ will rotate an image out of plane. We show that an algorithm that learns a three-dimensional representation of the world from two dimensional images must satisfy certain consistency properties which we formulate as $SO(2)$-equivariance constraints. We use the induced representation of $SO(2)$ on $SO(3)$ to construct and classify architectures that have two-dimensional inputs and \nwhich satisfy these consistency constraints. We prove that any architecture which respects said consistency constraints can be realized as an instance of our construction. We show that three previously proposed neural architectures for 3D pose prediction are special cases of our construction. We propose a new algorithm that is a learnable generalization of previously considered methods. We test our architecture on three pose predictions task and achieve SOTA results on both the PASCAL3D+ and SYMSOL pose estimation tasks.", "keywords": "Equivarient Machine Learning;Pose Prediction;Computer Vision", "primary_area": "", "supplementary_material": "/attachment/f0d05a4c0412812c62a15315d6e951cee6c5f6e2.zip", "author": "Owen Lewis Howell;David Klee;Ondrej Biza;Linfeng Zhao;Robin Walters", "authorids": "~Owen_Lewis_Howell1;~David_Klee1;~Ondrej_Biza1;~Linfeng_Zhao1;~Robin_Walters1", "gender": "M;M;M;;M", "homepage": ";;https://sites.google.com/view/obiza;http://lfzhao.com;http://www.robinwalters.com", "dblp": ";313/9930;230/8616.html;221/4652;258/3416", "google_scholar": "8bmIJtAAAAAJ;TJEEkJoAAAAJ;Gi9Xq8YAAAAJ;;fnprJmUAAAAJ", "orcid": ";;0000-0003-3390-8050;;", "linkedin": ";;ond%C5%99ej-b%C3%AD%C5%BEa-a9405353/;;", "or_profile": "~Owen_Lewis_Howell1;~David_Klee1;~Ondrej_Biza1;~Linfeng_Zhao1;~Robin_Walters1", "aff": "Northeastern University;Boston Dynamics Artificial Intelligence Institute;Northeastern University;Boston Dynamics AI Institute;Northeastern University ", "aff_domain": "neu.edu;theaiinstitute.com;northeastern.edu;theaiinstitute.com;northeastern.edu", "position": "PhD student;Intern;PhD student;Research Intern;Assistant Professor", "bibtex": "@inproceedings{\nhowell2023equivariant,\ntitle={Equivariant Single View Pose Prediction Via Induced and Restriction Representations},\nauthor={Owen Lewis Howell and David Klee and Ondrej Biza and Linfeng Zhao and Robin Walters},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dxVN2fZjx6}\n}", "github": "", "project": "", "reviewers": "hj9M;Nd1u;Uxrh;beVj;mVf3", "pdf_size": 1260558, "rating": "4;5;6;6;6", "confidence": "3;1;3;4;2", "soundness": "2;2;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;2;3;2;2", "wc_summary": "60;75;64;92;53", "wc_strengths": "26;31;54;77;56", "wc_weaknesses": "241;37;221;337;55", "wc_questions": "34;77;2;60;4", "wc_limitations": "31;1;8;1;4", "wc_review": "392;221;349;567;172", "wc_reply_reviewers": "127;0;66;95;0", "wc_reply_authors": "661;0;122;290;0", "reply_reviewers": "1;0;1;2;0", "reply_authors": "2;1;2;3;1", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 2.6, 1.019803902718557 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 68.8, 13.614697940094006 ], "wc_strengths_avg": [ 48.8, 18.497567407634985 ], "wc_weaknesses_avg": [ 178.2, 114.98591218057975 ], "wc_questions_avg": [ 35.4, 29.79664410634191 ], "wc_limitations_avg": [ 9.0, 11.29601699715435 ], "wc_review_avg": [ 340.2, 139.06746564168054 ], "wc_reply_reviewers_avg": [ 57.6, 50.835420722169694 ], "wc_reply_authors_avg": [ 214.6, 247.24044976500105 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.196116135138184, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16394843170222020644&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "neu.edu;theaiinstitute.com;northeastern.edu;theaiinstitute.com;northeastern.edu", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Northeastern University;Boston Dynamics Artificial Intelligence Institute;Boston Dynamics AI Institute", "aff_unique_dep": ";Artificial Intelligence;AI Institute", "aff_unique_url": "https://www.northeastern.edu;https://www.bostondynamics.com/;https://www.bostondynamics.com/", "aff_unique_abbr": "NEU;BD AI;BD AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Compositional Foundation Models for Hierarchical Planning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70994", "id": "dyXNh5HLq3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/46a126492ea6fb87410e55a58df2e189-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dyXNh5HLq3", "openreview": "https://openreview.net/forum?id=dyXNh5HLq3", "poster": "/media/PosterPDFs/NeurIPS%202023/70994.png?t=1702492409.9302647", "slides": "https://nips.cc/virtual/2023/poster/70994", "video": "https://nips.cc/virtual/2023/poster/70994", "author_site": "Anurag Ajay, Seungwook Han, Yilun Du, Shuang Li, Abhi Gupta, Tommi Jaakkola, Josh Tenenbaum, Leslie Kaelbling, Akash Srivastava, Pulkit Agrawal", "tldr": "", "abstract": "To make effective decisions in novel environments with long-horizon goals, it is crucial to engage in hierarchical reasoning across spatial and temporal scales. This entails planning abstract subgoal sequences, visually reasoning about the underlying plans, and executing actions in accordance with the devised plan through visual-motor control. We propose Compositional Foundation Models for Hierarchical Planning (HiP), a foundation model which leverages multiple expert foundation model trained on language, vision and action data individually jointly together to solve long-horizon tasks. We use a large language model to construct symbolic plans that are grounded in the environment through a large video diffusion model. Generated video plans are then grounded to visual-motor control, through an inverse dynamics model that infers actions from generated videos. To enable effective reasoning within this hierarchy, we enforce consistency between the models via iterative refinement. We illustrate the efficacy and adaptability of our approach in three different long-horizon table-top manipulation tasks.", "keywords": "Foundation Models;Composition;Hierarchical Planning", "primary_area": "", "supplementary_material": "/attachment/434d3d852f62b8955e6e807d768955372b0e178b.zip", "author": "Anurag Ajay;Seungwook Han;Yilun Du;Shuang Li;Abhi Gupta;Tommi S. Jaakkola;Joshua B. Tenenbaum;Leslie Pack Kaelbling;Akash Srivastava;Pulkit Agrawal", "authorids": "~Anurag_Ajay1;~Seungwook_Han1;~Yilun_Du1;~Shuang_Li5;~Abhi_Gupta1;~Tommi_S._Jaakkola1;~Joshua_B._Tenenbaum1;~Leslie_Pack_Kaelbling1;~Akash_Srivastava1;~Pulkit_Agrawal1", "gender": "M;;;;M;;;F;M;M", "homepage": "https://anuragajay.github.io/;;https://yilundu.github.io;;;;;http://people.csail.mit.edu/lpk/;http://akashgit.github.io;https://people.eecs.berkeley.edu/~pulkitag/", "dblp": "180/5483;119/3428;204/4379;;;;t/JoshuaBTenenbaum;k/LesliePackKaelbling;24/9528;149/2672", "google_scholar": ";B6tpjKkAAAAJ;;;ynyPc1kAAAAJ;;;IcasIiwAAAAJ;https://scholar.google.co.uk/citations?user=2h6SZeEAAAAJ;UpZmJI0AAAAJ", "orcid": ";;;;;;;0000-0001-6054-7145;;", "linkedin": ";;;;;;;;https://uk.linkedin.com/in/akash-srivastava-aa97361b;", "or_profile": "~Anurag_Ajay1;~Seungwook_Han1;~Yilun_Du1;~Shuang_Li5;~Abhi_Gupta1;~Tommi_S._Jaakkola1;~Joshua_B._Tenenbaum1;~Leslie_Pack_Kaelbling1;~Akash_Srivastava1;~Pulkit_Agrawal1", "aff": "Massachusetts Institute of Technology;MIT-IBM Watson AI Lab;Massachusetts Institute of Technology;;Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Massachusetts Institute of Technology;MIT-IBM Watson AI Research Lab;Massachusetts Institute of Technology", "aff_domain": "mit.edu;ibm.com;mit.edu;;mit.edu;;mit.edu;mit.edu;ibm.com;mit.edu", "position": "PhD student;Researcher;PhD student;;PhD student;;Professor;Full Professor;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\najay2023compositional,\ntitle={Compositional Foundation Models for Hierarchical Planning},\nauthor={Anurag Ajay and Seungwook Han and Yilun Du and Shuang Li and Abhi Gupta and Tommi S. Jaakkola and Joshua B. Tenenbaum and Leslie Pack Kaelbling and Akash Srivastava and Pulkit Agrawal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dyXNh5HLq3}\n}", "github": "", "project": "", "reviewers": "3JTC;zvNJ;7a1H;z2ah", "pdf_size": 27335160, "rating": "4;5;6;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "242;131;127;38", "wc_strengths": "123;78;76;46", "wc_weaknesses": "479;215;335;24", "wc_questions": "3;55;22;120", "wc_limitations": "1;19;1;6", "wc_review": "848;498;561;234", "wc_reply_reviewers": "0;24;52;23", "wc_reply_authors": "63;63;202;63", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 134.5, 72.34811676885585 ], "wc_strengths_avg": [ 80.75, 27.48977082479954 ], "wc_weaknesses_avg": [ 263.25, 166.78185602756673 ], "wc_questions_avg": [ 50.0, 44.4915722356493 ], "wc_limitations_avg": [ 6.75, 7.361215932167728 ], "wc_review_avg": [ 535.25, 218.29724574533688 ], "wc_reply_reviewers_avg": [ 24.75, 18.430613120566555 ], "wc_reply_authors_avg": [ 97.75, 60.188765563018485 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6553254235550482566&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "mit.edu;ibm.com;mit.edu;;mit.edu;;mit.edu;mit.edu;ibm.com;mit.edu", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "GMSF: Global Matching Scene Flow", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70993", "id": "dybrsuNAB9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cb1c4782f159b55380b4584671c4fd88-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dybrsuNAB9", "openreview": "https://openreview.net/forum?id=dybrsuNAB9", "poster": "/media/PosterPDFs/NeurIPS%202023/70993.png?t=1701771444.8697917", "slides": "https://nips.cc/virtual/2023/poster/70993", "video": "https://nips.cc/virtual/2023/poster/70993", "author_site": "Yushan Zhang, Johan Edstedt, Bastian Wandt, Per-Erik Forssen, Maria Magnusson, Michael Felsberg", "tldr": "", "abstract": "We tackle the task of scene flow estimation from point clouds. Given a source and a target point cloud, the objective is to estimate a translation from each point in the source point cloud to the target, resulting in a 3D motion vector field. Previous dominant scene flow estimation methods require complicated coarse-to-fine or recurrent architectures as a multi-stage refinement. In contrast, we propose a significantly simpler single-scale one-shot global matching to address the problem. Our key finding is that reliable feature similarity between point pairs is essential and sufficient to estimate accurate scene flow. We thus propose to decompose the feature extraction step via a hybrid local-global-cross transformer architecture which is crucial to accurate and robust feature representations. Extensive experiments show that the proposed Global Matching Scene Flow (GMSF) sets a new state-of-the-art on multiple scene flow estimation benchmarks. On FlyingThings3D, with the presence of occlusion points, GMSF reduces the outlier percentage from the previous best performance of 27.4% to 5.6%. On KITTI Scene Flow, without any fine-tuning, our proposed method shows state-of-the-art performance. On the Waymo-Open dataset, the proposed method outperforms previous methods by a large margin. The code is available at https://github.com/ZhangYushan3/GMSF.", "keywords": "Scene flow;point clouds;transformers", "primary_area": "", "supplementary_material": "/attachment/46b69ce46c8c56f8ca971cb15be44c17e430e101.zip", "author": "Yushan Zhang;Johan Edstedt;Bastian Wandt;Per-Erik Forssen;Maria Magnusson;Michael Felsberg", "authorids": "~Yushan_Zhang1;~Johan_Edstedt1;~Bastian_Wandt2;~Per-Erik_Forssen1;~Maria_Magnusson1;~Michael_Felsberg2", "gender": "F;M;M;M;F;", "homepage": "https://liu.se/en/employee/yuszh17;;http://bastianwandt.de;;https://liu.se/medarbetare/segma96;https://liu.se/en/employee/micfe03", "dblp": ";289/1724;;84/306;36/6944;00/78", "google_scholar": "mvY4rdIAAAAJ;Ul-vMR0AAAAJ;z4aXEBYAAAAJ;SZ6jH-4AAAAJ;;https://scholar.google.se/citations?hl=en", "orcid": ";0000-0002-1019-8634;;0000-0002-5698-5983;0000-0002-9072-2204;0000-0002-6096-3648", "linkedin": "https://se.linkedin.com/in/yushan-zhang-402395294;;;per-erik-forss\u00e9n-640a59130/;;https://linkedin.com/in/michael-felsberg-668a202", "or_profile": "~Yushan_Zhang1;~Johan_Edstedt1;~Bastian_Wandt2;~Per-Erik_Forssen1;~Maria_Magnusson1;~Michael_Felsberg2", "aff": "Link\u00f6ping University;Ericsson Research;Link\u00f6ping University;Link\u00f6ping University;Link\u00f6ping University;Link\u00f6ping University", "aff_domain": "liu.se;ericsson.com;liu.se;liu.se;liu.se;liu.se", "position": "PhD student;Intern;Assistant Professor;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023gmsf,\ntitle={{GMSF}: Global Matching Scene Flow},\nauthor={Yushan Zhang and Johan Edstedt and Bastian Wandt and Per-Erik Forssen and Maria Magnusson and Michael Felsberg},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dybrsuNAB9}\n}", "github": "", "project": "", "reviewers": "UZxB;YAmp;QN7Z;D7Yn;vPo9", "pdf_size": 641687, "rating": "5;5;5;5;7", "confidence": "4;4;5;5;5", "soundness": "3;3;2;3;3", "novelty": "2;2;2;2;3", "presentation": "3;3;3;3;3", "wc_summary": "61;134;22;116;62", "wc_strengths": "138;98;35;151;37", "wc_weaknesses": "266;188;109;216;304", "wc_questions": "34;5;15;31;16", "wc_limitations": "6;10;5;11;1", "wc_review": "505;435;186;525;420", "wc_reply_reviewers": "89;85;33;53;316", "wc_reply_authors": "492;0;0;0;403", "reply_reviewers": "2;1;1;1;2", "reply_authors": "2;1;1;1;2", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.0, 40.63496031744094 ], "wc_strengths_avg": [ 91.8, 48.799180320984895 ], "wc_weaknesses_avg": [ 216.6, 67.0450594749531 ], "wc_questions_avg": [ 20.2, 10.796295661012623 ], "wc_limitations_avg": [ 6.6, 3.6110940170535577 ], "wc_review_avg": [ 414.2, 120.88242221266086 ], "wc_reply_reviewers_avg": [ 115.2, 102.51321865983918 ], "wc_reply_authors_avg": [ 179.0, 221.02850494902236 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=188466894732592271&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "liu.se;ericsson.com;liu.se;liu.se;liu.se;liu.se", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Link\u00f6ping University;Ericsson", "aff_unique_dep": ";Research", "aff_unique_url": "https://www.liu.se;https://www.ericsson.com/research", "aff_unique_abbr": "LiU;Ericsson", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Sweden" }, { "title": "Characterizing Out-of-Distribution Error via Optimal Transport", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70992", "id": "dz5X8hnfJc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/38fd51cf36f28566230a93a5fbeaabbf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dz5X8hnfJc", "openreview": "https://openreview.net/forum?id=dz5X8hnfJc", "poster": "/media/PosterPDFs/NeurIPS%202023/70992.png?t=1701578542.2635715", "slides": "https://nips.cc/virtual/2023/poster/70992", "video": "https://nips.cc/virtual/2023/poster/70992", "author_site": "Yuzhe Lu, Yilong Qin, Runtian Zhai, Andrew Shen, Ketong Chen, Zhenlin Wang, Soheil Kolouri, Simon Stepputtis, Joseph Campbell, Katia Sycara", "tldr": "", "abstract": "Out-of-distribution (OOD) data poses serious challenges in deployed machine learning models,\nso methods of predicting a model's performance on OOD data without labels are important for machine learning safety.\nWhile a number of methods have been proposed by prior work, they often underestimate the actual error, sometimes by a large margin, which greatly impacts their applicability to real tasks. In this work, we identify *pseudo-label shift*, or the difference between the predicted and true OOD label distributions, as a key indicator of this underestimation. Based on this observation, we introduce a novel method for estimating model performance by leveraging optimal transport theory, Confidence Optimal Transport (COT), and show that it provably provides more robust error estimates in the presence of pseudo-label shift. Additionally, we introduce an empirically-motivated variant of COT, Confidence Optimal Transport with Thresholding (COTT), which applies thresholding to the individual transport costs and further improves the accuracy of COT's error estimates. We evaluate COT and COTT on a variety of standard benchmarks that induce various types of distribution shift -- synthetic, novel subpopulation, and natural -- and show that our approaches significantly outperform existing state-of-the-art methods with up to 3x lower prediction errors.", "keywords": "Distribution Shift;OOD Error Prediction;Optimal Transport;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/94cd618dc13dcce7218e56d52e705a057ab63e37.zip", "author": "Yuzhe Lu;Yilong Qin;Runtian Zhai;Andrew Shen;Ketong Chen;Zhenlin Wang;Soheil Kolouri;Simon Stepputtis;Joseph Campbell;Katia P. Sycara", "authorids": "~Yuzhe_Lu1;~Yilong_Qin1;~Runtian_Zhai1;~Andrew_Shen1;~Ketong_Chen1;~Zhenlin_Wang3;~Soheil_Kolouri1;~Simon_Stepputtis1;~Joseph_Campbell1;~Katia_P._Sycara1", "gender": "M;M;M;M;;M;M;;;F", "homepage": ";https://www.yilongq.in/about;http://www.runtianzhai.com;https://www.andrew-shen.net/;;https://criss-wang.github.io/;https://skolouri.github.io/;https://simonstepputtis.com/;;", "dblp": "263/1308;301/7967;242/8411;;;;143/9637;192/7092;179/2732;s/KatiaPSycara", "google_scholar": "R6bq6u4AAAAJ;CFeyF0EAAAAJ;EXd0ES8AAAAJ;;;dbnNfj8AAAAJ;yREBSy0AAAAJ;WUQgzsAAAAAJ;1NmM6OUAAAAJ;VWv6a9kAAAAJ", "orcid": ";;0000-0003-3332-3466;;;0009-0006-8670-7286;0000-0001-8495-5362;0009-0003-0519-3454;;", "linkedin": ";yilongqin/;;andrew-shen-141443177/;;zhenlin-wang/;skolouri/;simon-stepputtis/;;", "or_profile": "~Yuzhe_Lu1;~Yilong_Qin1;~Runtian_Zhai1;~Andrew_Shen1;~Ketong_Chen1;~Zhenlin_Wang3;~Soheil_Kolouri1;~Simon_Stepputtis1;~Joseph_Campbell1;~Katia_P._Sycara1", "aff": "Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;;Carnegie Mellon University;Vanderbilt University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cmu.edu;cs.cmu.edu;;cmu.edu;vanderbilt.edu;cmu.edu;cmu.edu;cmu.edu", "position": "MS student;MS student;PhD student;MS student;;MS student;Assistant Professor;Postdoc;Postdoc;Full Professor", "bibtex": "@inproceedings{\nlu2023characterizing,\ntitle={Characterizing Out-of-Distribution Error via Optimal Transport},\nauthor={Yuzhe Lu and Yilong Qin and Runtian Zhai and Andrew Shen and Ketong Chen and Zhenlin Wang and Soheil Kolouri and Simon Stepputtis and Joseph Campbell and Katia P. Sycara},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dz5X8hnfJc}\n}", "github": "", "project": "", "reviewers": "chpC;1ruP;dn4Z;1Jqy;HB65", "pdf_size": 1919434, "rating": "5;5;6;7;7", "confidence": "3;2;3;3;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;2;2;3;3", "wc_summary": "96;31;64;85;130", "wc_strengths": "52;19;50;53;127", "wc_weaknesses": "196;33;61;158;67", "wc_questions": "3;84;252;2;37", "wc_limitations": "28;7;1;20;12", "wc_review": "375;174;428;318;373", "wc_reply_reviewers": "27;17;24;107;15", "wc_reply_authors": "0;0;0;381;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 81.2, 32.956941605676946 ], "wc_strengths_avg": [ 60.2, 35.72897983430257 ], "wc_weaknesses_avg": [ 103.0, 62.664184347998976 ], "wc_questions_avg": [ 75.6, 93.12915762531088 ], "wc_limitations_avg": [ 13.6, 9.520504188329523 ], "wc_review_avg": [ 333.6, 87.05538466976067 ], "wc_reply_reviewers_avg": [ 38.0, 34.77930419085465 ], "wc_reply_authors_avg": [ 76.2, 152.4 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.5590169943749476, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9475446865553786184&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cs.cmu.edu;cs.cmu.edu;cmu.edu;cs.cmu.edu;;cmu.edu;vanderbilt.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 10, "aff_unique_index": "0;0;0;0;0;1;0;0;0", "aff_unique_norm": "Carnegie Mellon University;Vanderbilt University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.vanderbilt.edu", "aff_unique_abbr": "CMU;Vanderbilt", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Hypernetwork-based Meta-Learning for Low-Rank Physics-Informed Neural Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70991", "id": "dzqKAM2sKa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/24f8dd1b8f154f1ee0d7a59e368eccf3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=dzqKAM2sKa", "openreview": "https://openreview.net/forum?id=dzqKAM2sKa", "poster": "/media/PosterPDFs/NeurIPS%202023/70991.png?t=1702055980.7639167", "slides": "https://nips.cc/virtual/2023/poster/70991", "video": "https://nips.cc/virtual/2023/poster/70991", "author_site": "Woojin Cho, Kookjin Lee, Donsub Rim, Noseong Park", "tldr": "", "abstract": "In various engineering and applied science applications, repetitive numerical simulations of partial differential equations (PDEs) for varying input parameters are often required (e.g., aircraft shape optimization over many design parameters) and solvers are required to perform rapid execution. In this study, we suggest a path that potentially opens up a possibility for physics-informed neural networks (PINNs), emerging deep-learning-based solvers, to be considered as one such solver. Although PINNs have pioneered a proper integration of deep-learning and scientific computing, they require repetitive time-consuming training of neural networks, which is not suitable for many-query scenarios. To address this issue, we propose a lightweight low-rank PINNs containing only hundreds of model parameters and an associated hypernetwork-based meta-learning algorithm, which allows efficient approximation of solutions of PDEs for varying ranges of PDE input parameters. Moreover, we show that the proposed method is effective in overcoming a challenging issue, known as \"failure modes\" of PINNs.", "keywords": "Scientific machine learning;Physics-informed neural networks;Meta learning;Hypernetworks", "primary_area": "", "supplementary_material": "/attachment/94eaaaa931a58f73786440538c9d672008431757.zip", "author": "Woojin Cho;Kookjin Lee;Donsub Rim;Noseong Park", "authorids": "~Woojin_Cho1;~Kookjin_Lee1;~Donsub_Rim1;~Noseong_Park1", "gender": "M;M;M;", "homepage": "https://woojin-cho.github.io/;https://scholar.google.com/citations?hl=en&user=KL89hVQAAAAJ&view_op=list_works;https://dsrim.github.io;", "dblp": ";122/5103;239/0132;", "google_scholar": "cqIj5tQAAAAJ;https://scholar.google.com/citations?hl=en;;", "orcid": ";;0000-0002-6721-2070;", "linkedin": "woojin-cho-02b905264/;;;", "or_profile": "~Woojin_Cho1;~Kookjin_Lee1;~Donsub_Rim1;~Noseong_Park1", "aff": "Yonsei University;Arizona State University;Washington University, Saint Louis;", "aff_domain": "yonsei.ac.kr;asu.edu;wustl.edu;", "position": "MS student;Assistant Professor;Assistant Professor;", "bibtex": "@inproceedings{\ncho2023hypernetworkbased,\ntitle={Hypernetwork-based Meta-Learning for Low-Rank Physics-Informed Neural Networks},\nauthor={Woojin Cho and Kookjin Lee and Donsub Rim and Noseong Park},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=dzqKAM2sKa}\n}", "github": "", "project": "", "reviewers": "W4Fc;wUrs;uxdN;eb2d", "pdf_size": 2306890, "rating": "6;7;7;8", "confidence": "3;2;3;3", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "148;61;78;158", "wc_strengths": "56;131;55;203", "wc_weaknesses": "42;145;73;95", "wc_questions": "55;178;58;2", "wc_limitations": "29;13;7;21", "wc_review": "330;528;271;479", "wc_reply_reviewers": "10;21;20;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 111.25, 42.32832975679527 ], "wc_strengths_avg": [ 111.25, 61.287743472900026 ], "wc_weaknesses_avg": [ 88.75, 37.539146234297874 ], "wc_questions_avg": [ 73.25, 64.44910782935634 ], "wc_limitations_avg": [ 17.5, 8.2915619758885 ], "wc_review_avg": [ 402.0, 105.05950694725347 ], "wc_reply_reviewers_avg": [ 12.75, 8.525696452489967 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5321832260836527260&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 9, "email": "yonsei.ac.kr;asu.edu;wustl.edu;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Yonsei University;Arizona State University;Washington University in St. Louis", "aff_unique_dep": ";;", "aff_unique_url": "https://www.yonsei.ac.kr;https://www.asu.edu;https://wustl.edu", "aff_unique_abbr": "Yonsei;ASU;WUSTL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Saint Louis", "aff_country_unique_index": "0;1;1", "aff_country_unique": "South Korea;United States" }, { "title": "Private (Stochastic) Non-Convex Optimization Revisited: Second-Order Stationary Points and Excess Risks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70990", "id": "e0pRF9tOtm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cede701f00079e43d053ac57b1e75c3e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=e0pRF9tOtm", "openreview": "https://openreview.net/forum?id=e0pRF9tOtm", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70990", "video": "https://nips.cc/virtual/2023/poster/70990", "author_site": "Daogao Liu, Arun Ganesh, Sewoong Oh, Abhradeep Guha Thakurta", "tldr": "", "abstract": "We reconsider the challenge of non-convex optimization under differential privacy constraint. Building upon the previous variance-reduced algorithm SpiderBoost, we propose a novel framework that employs two types of gradient oracles: one that estimates the gradient at a single point and a more cost-effective option that calculates the gradient difference between two points. Our framework can ensure continuous accuracy of gradient estimations and subsequently enhances the rates of identifying second-order stationary points.\nAdditionally, we consider a more challenging task by attempting to locate the global minima of a non-convex objective via the exponential mechanism without almost any assumptions. Our preliminary results suggest that the regularized exponential mechanism can effectively emulate previous empirical and population risk bounds, negating the need for smoothness assumptions for algorithms with polynomial running time. Furthermore, with running time factors excluded, the exponential mechanism demonstrates promising population risk bound performance, and we provide a nearly matching lower bound.", "keywords": "Differential Privacy;Non-convex optimization;Stationary points;Exponential Mechanism", "primary_area": "", "supplementary_material": "/attachment/173b5f4b1ff90b7757c5e1912b86f5b3ddccffff.pdf", "author": "Daogao Liu;Arun Ganesh;Sewoong Oh;Abhradeep Guha Thakurta", "authorids": "~Daogao_Liu1;~Arun_Ganesh1;~Sewoong_Oh1;~Abhradeep_Guha_Thakurta1", "gender": "M;M;M;M", "homepage": "https://daogaoliu.github.io/;https://people.eecs.berkeley.edu/~arunganesh/;https://homes.cs.washington.edu/~sewoong/;https://athakurta.squarespace.com/", "dblp": "245/4078;201/4732;80/4366;31/8315", "google_scholar": "auA3AaQAAAAJ;fmwchbsAAAAJ;55TAOdgAAAAJ;1rV69hMAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Daogao_Liu1;~Arun_Ganesh1;~Sewoong_Oh1;~Abhradeep_Guha_Thakurta1", "aff": "University of Washington, Seattle;Google;University of Washington;Google", "aff_domain": "uw.edu;google.com;uw.edu;google.com", "position": "PhD student;Researcher;Associate Professor;Senior Research Scientist", "bibtex": "@inproceedings{\nliu2023private,\ntitle={Private (Stochastic) Non-Convex Optimization Revisited: Second-Order Stationary Points and Excess Risks},\nauthor={Daogao Liu and Arun Ganesh and Sewoong Oh and Abhradeep Guha Thakurta},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=e0pRF9tOtm}\n}", "github": "", "project": "", "reviewers": "52pN;xUe2;mRqH;kK73;nTrG", "pdf_size": 410671, "rating": "5;7;7;7;8", "confidence": "2;1;4;3;3", "soundness": "3;3;4;3;3", "novelty": "3;3;3;3;4", "presentation": "2;3;4;2;3", "wc_summary": "60;60;103;148;100", "wc_strengths": "30;49;77;76;50", "wc_weaknesses": "172;24;55;82;51", "wc_questions": "46;2;5;95;116", "wc_limitations": "2;8;1;28;1", "wc_review": "310;143;241;429;318", "wc_reply_reviewers": "42;0;0;17;15", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 2.6, 1.019803902718557 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 94.2, 32.69495373907111 ], "wc_strengths_avg": [ 56.4, 17.89525076661403 ], "wc_weaknesses_avg": [ 76.8, 51.0270516491008 ], "wc_questions_avg": [ 52.8, 46.23159093087755 ], "wc_limitations_avg": [ 8.0, 10.334408546211051 ], "wc_review_avg": [ 288.2, 94.33853931453466 ], "wc_reply_reviewers_avg": [ 14.8, 15.380507143784302 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.32025630761017426, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15409243485494996635&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 6, "email": "uw.edu;google.com;uw.edu;google.com", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Washington;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.washington.edu;https://www.google.com", "aff_unique_abbr": "UW;Google", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Seattle;Mountain View;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Aligning Gradient and Hessian for Neural Signed Distance Function", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70989", "id": "e0tt2G8hqf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c87bd5843849884e9430f1693b018d71-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=e0tt2G8hqf", "openreview": "https://openreview.net/forum?id=e0tt2G8hqf", "poster": "/media/PosterPDFs/NeurIPS%202023/70989.png?t=1697525514.5916586", "slides": "https://nips.cc/virtual/2023/poster/70989", "video": "https://nips.cc/virtual/2023/poster/70989", "author_site": "Ruian Wang, Zixiong Wang, Yunxiao Zhang, Shuangmin Chen, Shiqing Xin, Changhe Tu, Wenping Wang", "tldr": "", "abstract": "The Signed Distance Function (SDF), as an implicit surface representation, provides a crucial method for reconstructing a watertight surface from unorganized point clouds. The SDF has a fundamental relationship with the principles of surface vector calculus. Given a smooth surface, there exists a thin-shell space in which the SDF is differentiable everywhere such that the gradient of the SDF is an eigenvector of its Hessian matrix, with a corresponding eigenvalue of zero. In this paper, we introduce a method to directly learn the SDF from point clouds in the absence of normals. Our motivation is grounded in a fundamental observation: aligning the gradient and the Hessian of the SDF provides a more efficient mechanism to govern gradient directions. This, in turn, ensures that gradient changes more accurately reflect the true underlying variations in shape. Extensive experimental results demonstrate its ability to accurately recover the underlying shape while effectively suppressing the presence of ghost geometry.", "keywords": "implicit neural representation;signed distance function;shape operator", "primary_area": "", "supplementary_material": "/attachment/4ef847054011a0bc2830ebfca88090bf843862d0.pdf", "author": "Ruian Wang;Zixiong Wang;Yunxiao Zhang;Shuangmin Chen;Shiqing Xin;Changhe Tu;Wenping Wang", "authorids": "~Ruian_Wang1;~Zixiong_Wang1;~Yunxiao_Zhang2;~Shuangmin_Chen1;~Shiqing_Xin1;~Changhe_Tu1;~Wenping_Wang1", "gender": "M;;M;F;M;M;M", "homepage": ";https://bearprin.com/;;https://xk.qust.edu.cn/info/1023/6474.htm;https://irc.cs.sdu.edu.cn/~shiqing/index.html;;https://engineering.tamu.edu/cse/profiles/Wang-Wenping.html", "dblp": ";;;;72/3380;98/6239;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com/citations?hl=zh-CN;;28shvv0AAAAJ", "orcid": "0009-0001-4439-0210;0000-0002-6170-7339;0000-0001-7649-6493;;0000-0001-8452-8723;;0000-0002-2284-3952", "linkedin": ";;;;;;", "or_profile": "~Ruian_Wang1;~Zixiong_Wang1;~Yunxiao_Zhang2;~Shuangmin_Chen1;~Shiqing_Xin1;~Changhe_Tu1;~Wenping_Wang1", "aff": "Shandong University;Shandong University;Shandong University;Qingdao University of Science and Technology;Shandong University;Shandong University;Texas A&M University - College Station", "aff_domain": "sdu.edu.cn;sdu.edu.cn;sdu.edu.cn;qust.edu.cn;sdu.edu.cn;sdu.edu.cn;tamu.edu", "position": "MS student;MS student;MS student;Associate Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023aligning,\ntitle={Aligning Gradient and Hessian for Neural Signed Distance Function},\nauthor={Ruian Wang and Zixiong Wang and Yunxiao Zhang and Shuangmin Chen and Shiqing Xin and Changhe Tu and Wenping Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=e0tt2G8hqf}\n}", "github": "", "project": "", "reviewers": "csEg;krE5;ApDg;JRpq;XXzk", "pdf_size": 40885730, "rating": "5;5;6;6;6", "confidence": "5;5;4;3;4", "soundness": "2;2;3;3;3", "novelty": "3;3;3;3;3", "presentation": "2;1;2;3;3", "wc_summary": "94;79;66;53;67", "wc_strengths": "43;23;34;34;65", "wc_weaknesses": "210;259;109;170;88", "wc_questions": "3;88;169;3;18", "wc_limitations": "2;2;57;23;17", "wc_review": "352;451;435;283;255", "wc_reply_reviewers": "43;434;0;16;22", "wc_reply_authors": "29;599;0;33;41", "reply_reviewers": "1;2;0;1;1", "reply_authors": "2;4;1;2;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 71.8, 13.818827736099758 ], "wc_strengths_avg": [ 39.8, 14.105318146004365 ], "wc_weaknesses_avg": [ 167.2, 63.12970774524463 ], "wc_questions_avg": [ 56.2, 64.58296989145049 ], "wc_limitations_avg": [ 20.2, 20.173249614278806 ], "wc_review_avg": [ 355.2, 78.49687891884619 ], "wc_reply_reviewers_avg": [ 103.0, 166.07227342335023 ], "wc_reply_authors_avg": [ 140.4, 229.71773984609897 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8728715609439693, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6383995863856794757&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "sdu.edu.cn;sdu.edu.cn;sdu.edu.cn;qust.edu.cn;sdu.edu.cn;sdu.edu.cn;tamu.edu", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0;2", "aff_unique_norm": "Shandong University;Qingdao University of Science and Technology;Texas A&M University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.sdu.edu.cn;http://www.qust.edu.cn/;https://www.tamu.edu", "aff_unique_abbr": "SDU;QUST;TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Large Language Models of Code Fail at Completing Code with Potential Bugs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70988", "id": "e1WgjvFGWp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/819cebb05f993840e8a52d7564c5c282-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=e1WgjvFGWp", "openreview": "https://openreview.net/forum?id=e1WgjvFGWp", "poster": "/media/PosterPDFs/NeurIPS%202023/70988.png?t=1702442297.5384376", "slides": "https://nips.cc/virtual/2023/poster/70988", "video": "https://nips.cc/virtual/2023/poster/70988", "author_site": "Tuan Dinh, Jinman Zhao, Jinman Zhao, Samson Tan, Renato Negrinho, Leonard Lausen, Sheng Zha, George Karypis", "tldr": "", "abstract": "Large language models of code (Code-LLMs) have recently brought tremendous advances to code completion, a fundamental feature of programming assistance and code intelligence. However, most existing works ignore the possible presence of bugs in the code context for generation, which are inevitable in software development. Therefore, we introduce and study the buggy-code completion problem, inspired by the realistic scenario of real-time code suggestion where the code context contains potential bugs \u2013 anti-patterns that can become bugs in the completed program. To systematically study the task, we introduce two datasets: one with synthetic bugs derived from semantics-altering operator changes (buggy-HumanEval) and one with realistic bugs derived from user submissions to coding problems (buggy-FixEval). We find that the presence of potential bugs significantly degrades the generation performance of the high-performing Code-LLMs. For instance, the passing rates of CODEGEN-2B-MONO on test cases of buggy-HumanEval drop more than 50% given a single potential bug in the context. Finally, we investigate several post-hoc methods for mitigating the adverse effect of potential bugs and find that there remains a large gap in post-mitigation performance.", "keywords": "language model of code; code completion; language model; software engineering; machine learning for code", "primary_area": "", "supplementary_material": "", "author": "Tuan Dinh;Jinman Zhao;Samson Tan;Renato Negrinho;Leonard Lausen;Sheng Zha;George Karypis", "authorids": "~Tuan_Dinh1;~Jinman_Zhao1;~Samson_Tan1;~Renato_Negrinho1;~Leonard_Lausen1;~Sheng_Zha1;~George_Karypis1", "gender": "M;;;M;;M;M", "homepage": "https://tuanqdinh.com/;https://jmzhao.github.io/;https://samsontmr.github.io;https://www.cs.cmu.edu/~negrinho/;;https://github.com/szha;", "dblp": "79/7209;160/8761;241/8934.html;155/1907.html;;218/5471;", "google_scholar": "pVsYxE4AAAAJ;hSKabQ4AAAAJ;;y7j1O-8AAAAJ;;;ElqwScwAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;shengzha/;", "or_profile": "~Tuan_Dinh1;~Jinman_Zhao1;~Samson_Tan1;~Renato_Negrinho1;~Leonard_Lausen1;~Sheng_Zha1;~George_Karypis1", "aff": "Department of Computer Science, University of Wisconsin, Madison;Amazon;Amazon;Carnegie Mellon University;;Amazon;University of Minnesota, Minneapolis", "aff_domain": "cs.wisc.edu;amazon.com;amazon.com;cmu.edu;;amazon.com;umn.edu", "position": "Graduate student;Applied Scientist;Scientist;PhD student;;Researcher;Full Professor", "bibtex": "@inproceedings{\ndinh2023large,\ntitle={Large Language Models of Code Fail at Completing Code with Potential Bugs},\nauthor={Tuan Dinh and Jinman Zhao and Samson Tan and Renato Negrinho and Leonard Lausen and Sheng Zha and George Karypis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=e1WgjvFGWp}\n}", "github": "", "project": "", "reviewers": "Cn5Q;2xun;XA1K;8UBb;h8sG", "pdf_size": 1348211, "rating": "4;5;6;7;7", "confidence": "4;3;4;4;4", "soundness": "2;3;3;2;3", "novelty": "2;3;3;2;3", "presentation": "4;3;3;3;3", "wc_summary": "75;91;81;133;110", "wc_strengths": "44;87;128;109;125", "wc_weaknesses": "399;91;435;286;120", "wc_questions": "10;52;66;132;81", "wc_limitations": "12;76;27;54;18", "wc_review": "540;397;737;714;454", "wc_reply_reviewers": "300;0;0;111;40", "wc_reply_authors": "676;41;26;435;6", "reply_reviewers": "2;0;0;2;1", "reply_authors": "3;2;2;3;2", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 98.0, 21.147103820618085 ], "wc_strengths_avg": [ 98.6, 30.936063097944448 ], "wc_weaknesses_avg": [ 266.2, 140.42136589564993 ], "wc_questions_avg": [ 68.2, 39.72102717705069 ], "wc_limitations_avg": [ 37.4, 24.06324998831205 ], "wc_review_avg": [ 568.4, 136.30641951133484 ], "wc_reply_reviewers_avg": [ 90.2, 112.46403869682078 ], "wc_reply_authors_avg": [ 236.8, 271.3753120679919 ], "reply_reviewers_avg": [ 1.0, 0.8944271909999159 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3429971702850177, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8171629412547223308&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 9, "email": "cs.wisc.edu;amazon.com;amazon.com;cmu.edu;;amazon.com;umn.edu", "author_num": 7, "aff_unique_index": "0;1;1;2;1;3", "aff_unique_norm": "University of Wisconsin-Madison;Amazon;Carnegie Mellon University;University of Minnesota", "aff_unique_dep": "Department of Computer Science;Amazon.com, Inc.;;", "aff_unique_url": "https://www.wisc.edu;https://www.amazon.com;https://www.cmu.edu;https://www.minnesota.edu", "aff_unique_abbr": "UW-Madison;Amazon;CMU;UMN", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Madison;;Minneapolis", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "MathNAS: If Blocks Have a Role in Mathematical Architecture Design", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70987", "id": "e1l4ZYprQH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9410d94d47adfb07b41a0b226270f068-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=e1l4ZYprQH", "openreview": "https://openreview.net/forum?id=e1l4ZYprQH", "poster": "/media/PosterPDFs/NeurIPS%202023/70987.png?t=1699703256.6154978", "slides": "https://nips.cc/virtual/2023/poster/70987", "video": "https://nips.cc/virtual/2023/poster/70987", "author_site": "Qinsi Wang, Jinghan Ke, Zhi Liang, Sihai Zhang", "tldr": "", "abstract": "Neural Architecture Search (NAS) has emerged as a favoured method for unearthing effective neural architectures. \nRecent development of large models has intensified the demand for faster search speeds and more accurate search results. \nHowever, designing large models by NAS is challenging due to the dramatical increase of search space and the associated huge performance evaluation cost. \nConsider a typical modular search space widely used in NAS, in which a neural architecture consists of $m$ block nodes and a block node has $n$ alternative blocks. \nFacing the space containing $n^m$ candidate networks, existing NAS methods attempt to find the best one by searching and evaluating candidate networks directly.\nDifferent from the general strategy that takes architecture search as a whole problem, we propose a novel divide-and-conquer strategy by making use of the modular nature of the search space.\nHere, we introduce MathNAS, a general NAS framework based on mathematical programming. \nIn MathNAS, the performances of all possible building blocks in the search space are calculated first, and then the performance of a network is directly predicted based on the performances of its building blocks.\nAlthough estimating block performances involves network training, just as what happens for network performance evaluation in existing NAS methods, predicting network performance is completely training-free and thus extremely fast. In contrast to the $n^m$ candidate networks to evaluate in existing NAS methods, which requires training and a formidable computational burden, there are only $m*n$ possible blocks to handle in MathNAS.\nTherefore, our approach effectively reduces the complexity of network performance evaluation. \nThe superiority of MathNAS is validated on multiple large-scale CV and NLP benchmark datasets. \nNotably on ImageNet-1k, MathNAS achieves 82.5\\% top-1 accuracy, 1.2\\% and 0.96\\% higher than Swin-T and LeViT-256, respectively. \nIn addition, when deployed on mobile device, MathNAS achieves real-time search and dynamic network switching within 1s (0.4s on TX2 GPU), surpassing baseline dynamic networks in on-device performance.", "keywords": "Neural Architecture Search", "primary_area": "", "supplementary_material": "/attachment/de4d5408bda3b43a026e918f09ce60805857391a.pdf", "author": "Wang Qinsi;Jinghan Ke;Zhi Liang;Sihai Zhang", "authorids": "~Wang_Qinsi1;~Jinghan_Ke2;~Zhi_Liang2;~Sihai_Zhang1", "gender": "F;;M;M", "homepage": "https://wangqinsi1.github.io/;;;https://faculty.ustc.edu.cn/shzhang/zh_CN/index/45161/list/index.htm", "dblp": "79/10450;;;", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;", "orcid": "0000-0002-5868-9686;;;", "linkedin": ";;;", "or_profile": "~Wang_Qinsi1;~Jinghan_Ke2;~Zhi_Liang2;~Sihai_Zhang1", "aff": ";;Kangma Biotech;University of Science and Technology of China", "aff_domain": ";;healthcodon.com;ustc.edu.cn", "position": ";;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nqinsi2023mathnas,\ntitle={Math{NAS}: If Blocks Have a Role in Mathematical Architecture Design},\nauthor={Wang Qinsi and Jinghan Ke and Zhi Liang and Sihai Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=e1l4ZYprQH}\n}", "github": "", "project": "", "reviewers": "qRGY;8XzK;3m52;HYbp;qYEV", "pdf_size": 2314592, "rating": "5;6;6;7;7", "confidence": "4;4;4;4;3", "soundness": "3;3;3;3;3", "novelty": "3;3;3;4;3", "presentation": "3;3;3;2;3", "wc_summary": "73;88;52;82;56", "wc_strengths": "98;72;34;88;51", "wc_weaknesses": "256;143;263;144;185", "wc_questions": "88;118;2;287;111", "wc_limitations": "1;16;2;8;1", "wc_review": "516;437;353;609;404", "wc_reply_reviewers": "0;46;147;38;0", "wc_reply_authors": "26;0;283;0;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "2;1;2;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 70.2, 14.119490075778232 ], "wc_strengths_avg": [ 68.6, 23.491274976041634 ], "wc_weaknesses_avg": [ 198.2, 52.34271678084736 ], "wc_questions_avg": [ 121.2, 92.64426587760303 ], "wc_limitations_avg": [ 5.6, 5.817215828899594 ], "wc_review_avg": [ 463.8, 89.86523243168072 ], "wc_reply_reviewers_avg": [ 46.2, 53.84570549263887 ], "wc_reply_authors_avg": [ 61.8, 111.05746260382504 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5345224838248487, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5215196866291842855&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";;healthcodon.com;ustc.edu.cn", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Kangma Biotech;University of Science and Technology of China", "aff_unique_dep": ";", "aff_unique_url": ";http://www.ustc.edu.cn", "aff_unique_abbr": ";USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";China" }, { "title": "Multinomial Logistic Regression: Asymptotic Normality on Null Covariates in High-Dimensions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70986", "id": "e1oe8F2tjV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e0ac27bf3327c9cb99cc5f548db4f73a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=e1oe8F2tjV", "openreview": "https://openreview.net/forum?id=e1oe8F2tjV", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70986", "video": "https://nips.cc/virtual/2023/poster/70986", "author_site": "Kai Tan, Pierre C Bellec", "tldr": "", "abstract": "This paper investigates the asymptotic distribution of the maximum-likelihood estimate (MLE) in multinomial logistic models in the high-dimensional regime where dimension and sample size are of the same order. While classical large-sample theory provides asymptotic normality of the MLE under certain conditions, such classical results are expected to fail in high-dimensions as documented for the binary logistic case in the seminal work of Sur and Cand\u00e8s [2019]. We address this issue in classification problems with 3 or more classes, by developing asymptotic normality and asymptotic chi-square results for the multinomial logistic MLE (also known as cross-entropy minimizer) on null covariates. Our theory leads to a new methodology to test the significance of a given feature. Extensive simulation studies on synthetic data corroborate these asymptotic results and confirm the validity of proposed p-values for testing the significance of a given feature.", "keywords": "High-dimensional statistics;statistical inference;multi-class classification;asymptotic normality;multinomial logistic regression", "primary_area": "", "supplementary_material": "/attachment/ba2429c36bc9dcba84713bd02b78c70ca0ddc434.zip", "author": "Kai Tan;Pierre C Bellec", "authorids": "~Kai_Tan1;~Pierre_C_Bellec1", "gender": "M;", "homepage": "https://kaitan365.github.io/;", "dblp": ";", "google_scholar": "Qqqeap8AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Kai_Tan1;~Pierre_C_Bellec1", "aff": "Rutgers University, New Brunswick;", "aff_domain": "rutgers.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\ntan2023multinomial,\ntitle={Multinomial Logistic Regression: Asymptotic Normality on Null Covariates in High-Dimensions},\nauthor={Kai Tan and Pierre C Bellec},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=e1oe8F2tjV}\n}", "github": "", "project": "", "reviewers": "FsXG;U9Cr;uvEK;7JUy", "pdf_size": 2705364, "rating": "6;7;7;7", "confidence": "3;3;2;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "4;4;3;4", "wc_summary": "53;96;76;109", "wc_strengths": "25;91;38;53", "wc_weaknesses": "55;40;132;19", "wc_questions": "147;96;47;266", "wc_limitations": "1;19;32;21", "wc_review": "281;342;325;468", "wc_reply_reviewers": "16;0;23;19", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.5, 21.17191535974013 ], "wc_strengths_avg": [ 51.75, 24.732316915323562 ], "wc_weaknesses_avg": [ 61.5, 42.66438795998368 ], "wc_questions_avg": [ 139.0, 81.40331688573875 ], "wc_limitations_avg": [ 18.25, 11.121488209767612 ], "wc_review_avg": [ 354.0, 69.48021301061188 ], "wc_reply_reviewers_avg": [ 14.5, 8.73212459828649 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1546342285665286633&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 12, "email": "rutgers.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Rutgers University", "aff_unique_dep": "", "aff_unique_url": "https://www.rutgers.edu", "aff_unique_abbr": "Rutgers", "aff_campus_unique_index": "0", "aff_campus_unique": "New Brunswick", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Likelihood-Based Diffusion Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70985", "id": "e2MCL6hObn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/35b5c175e139bff5f22a5361270fce87-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=e2MCL6hObn", "openreview": "https://openreview.net/forum?id=e2MCL6hObn", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70985", "video": "https://nips.cc/virtual/2023/poster/70985", "author_site": "Ishaan Gulrajani, Tatsunori Hashimoto", "tldr": "", "abstract": "Despite a growing interest in diffusion-based language models, existing work has not shown that these models can attain nontrivial likelihoods on standard language modeling benchmarks. In this work, we take the first steps towards closing the likelihood gap between autoregressive and diffusion-based language models, with the goal of building and releasing a diffusion model which outperforms a small but widely-known autoregressive model. We pursue this goal through algorithmic improvements, scaling laws, and increased compute. On the algorithmic front, we introduce several methodological improvements for the maximum-likelihood training of diffusion language models. We then study scaling laws for our diffusion models and find compute-optimal training regimes which differ substantially from autoregressive models. Using our methods and scaling analysis, we train and release Plaid 1B, a large diffusion language model which outperforms GPT-2 124M in likelihood on benchmark datasets and generates fluent samples in unconditional and zero-shot control settings.", "keywords": "diffusion;language;model", "primary_area": "", "supplementary_material": "/attachment/27bccfe894e35e974ca47ad74a88b4f297bdf803.zip", "author": "Ishaan Gulrajani;Tatsunori Hashimoto", "authorids": "~Ishaan_Gulrajani1;~Tatsunori_Hashimoto1", "gender": "M;M", "homepage": "https://ishaan.io;https://thashim.github.io", "dblp": "164/5562;", "google_scholar": ";5ygiTwsAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Ishaan_Gulrajani1;~Tatsunori_Hashimoto1", "aff": "Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\ngulrajani2023likelihoodbased,\ntitle={Likelihood-Based Diffusion Language Models},\nauthor={Ishaan Gulrajani and Tatsunori Hashimoto},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=e2MCL6hObn}\n}", "github": "", "project": "", "reviewers": "hVSR;rRHQ;sFeF;h8jG;ogcC;s82i", "pdf_size": 721802, "rating": "5;5;5;6;7;7", "confidence": "4;2;3;5;3;4", "soundness": "1;3;2;3;4;4", "novelty": "2;2;2;3;4;4", "presentation": "3;2;2;3;3;3", "wc_summary": "117;29;97;100;54;69", "wc_strengths": "69;45;46;83;29;48", "wc_weaknesses": "892;95;257;125;142;47", "wc_questions": "6;4;160;210;38;137", "wc_limitations": "53;11;116;9;5;48", "wc_review": "1137;184;676;527;268;349", "wc_reply_reviewers": "260;17;62;0;0;0", "wc_reply_authors": "233;0;0;0;0;0", "reply_reviewers": "1;1;1;0;0;0", "reply_authors": "2;1;1;1;1;1", "rating_avg": [ 5.833333333333333, 0.8975274678557507 ], "confidence_avg": [ 3.5, 0.9574271077563381 ], "soundness_avg": [ 2.8333333333333335, 1.0671873729054748 ], "novelty_avg": [ 2.8333333333333335, 0.8975274678557507 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 77.66666666666667, 30.06474494967301 ], "wc_strengths_avg": [ 53.333333333333336, 17.65093639316497 ], "wc_weaknesses_avg": [ 259.6666666666667, 289.8779436628841 ], "wc_questions_avg": [ 92.5, 80.23662920802859 ], "wc_limitations_avg": [ 40.333333333333336, 38.805784219480586 ], "wc_review_avg": [ 523.5, 319.01972248331816 ], "wc_reply_reviewers_avg": [ 56.5, 93.61223210670708 ], "wc_reply_authors_avg": [ 38.833333333333336, 86.83397312624184 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.2909286827258562, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5166102755986747607&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "stanford.edu;stanford.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Estimating and Controlling for Equalized Odds via Sensitive Attribute Predictors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70984", "id": "e2aCgjtjMR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/752820c79b4ebb72809014bdfdedd603-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=e2aCgjtjMR", "openreview": "https://openreview.net/forum?id=e2aCgjtjMR", "poster": "/media/PosterPDFs/NeurIPS%202023/70984.png?t=1702312789.2516716", "slides": "https://nips.cc/virtual/2023/poster/70984", "video": "https://nips.cc/virtual/2023/poster/70984", "author_site": "Beepul Bharti, Paul Yi, Jeremias Sulam", "tldr": "", "abstract": "As the use of machine learning models in real world high-stakes decision settings continues to grow, it is highly important that we are able to audit and control for any potential fairness violations these models may exhibit towards certain groups. To do so, one naturally requires access to sensitive attributes, such as demographics, biological sex, or other potentially sensitive features that determine group membership. Unfortunately, in many settings, this information is often unavailable. In this work we study the well known equalized odds (EOD) definition of fairness. In a setting without sensitive attributes, we first provide tight and computable upper bounds for the EOD violation of a predictor. These bounds precisely reflect the worst possible EOD violation. Second, we demonstrate how one can provably control the worst-case EOD by a new post-processing correction method. Our results characterize when directly controlling for EOD with respect to the predicted sensitive attributes is -- and when is not -- optimal when it comes to controlling worst-case EOD. Our results hold under assumptions that are milder than previous works, and we illustrate these results with experiments on synthetic and real datasets.", "keywords": "fairness;sensitive attributes;equalized odds;missing data;proxies", "primary_area": "", "supplementary_material": "/attachment/3532ed1223c152894a1cc30e8e5a69daebe321ef.zip", "author": "Beepul Bharti;Paul Yi;Jeremias Sulam", "authorids": "~Beepul_Bharti1;~Paul_Yi1;~Jeremias_Sulam1", "gender": "M;M;M", "homepage": "https://beepulbharti.github.io;https://www.stjude.org/directory/y/paul-yi.html;", "dblp": ";;156/3028", "google_scholar": ";ghZVzxMAAAAJ;1awx1aIAAAAJ", "orcid": ";;", "linkedin": "bbharti/;;", "or_profile": "~Beepul_Bharti1;~Paul_Yi1;~Jeremias_Sulam1", "aff": "Johns Hopkins University;University of Maryland, Baltimore;Johns Hopkins University", "aff_domain": "jhu.edu;umaryland.edu;jhu.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nbharti2023estimating,\ntitle={Estimating and Controlling for Equalized Odds via Sensitive Attribute Predictors},\nauthor={Beepul Bharti and Paul Yi and Jeremias Sulam},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=e2aCgjtjMR}\n}", "github": "", "project": "", "reviewers": "ntnH;RpWQ;XmGE", "pdf_size": 741094, "rating": "7;7;7", "confidence": "4;4;3", "soundness": "4;3;3", "novelty": "3;3;3", "presentation": "4;4;4", "wc_summary": "59;72;46", "wc_strengths": "52;67;72", "wc_weaknesses": "49;178;70", "wc_questions": "60;137;56", "wc_limitations": "60;17;30", "wc_review": "280;471;274", "wc_reply_reviewers": "11;89;11", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 4.0, 0.0 ], "wc_summary_avg": [ 59.0, 10.614455552060438 ], "wc_strengths_avg": [ 63.666666666666664, 8.498365855987974 ], "wc_weaknesses_avg": [ 99.0, 56.515484603779164 ], "wc_questions_avg": [ 84.33333333333333, 37.276742823851386 ], "wc_limitations_avg": [ 35.666666666666664, 18.00617178142601 ], "wc_review_avg": [ 341.6666666666667, 91.48527507503903 ], "wc_reply_reviewers_avg": [ 37.0, 36.76955262170047 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14058696865011370143&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "jhu.edu;umaryland.edu;jhu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Johns Hopkins University;University of Maryland", "aff_unique_dep": ";", "aff_unique_url": "https://www.jhu.edu;https://www.umaryland.edu", "aff_unique_abbr": "JHU;UMD", "aff_campus_unique_index": "1", "aff_campus_unique": ";Baltimore", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CLadder: Assessing Causal Reasoning in Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70983", "id": "e2wtjx0Yqu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/631bb9434d718ea309af82566347d607-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=e2wtjx0Yqu", "openreview": "https://openreview.net/forum?id=e2wtjx0Yqu", "poster": "/media/PosterPDFs/NeurIPS%202023/70983.png?t=1701858916.4609177", "slides": "https://nips.cc/virtual/2023/poster/70983", "video": "https://nips.cc/virtual/2023/poster/70983", "author_site": "Zhijing Jin, Yuen Chen, Felix Leeb, Luigi Gresele, Ojasv Kamal, Zhiheng LYU, Kevin Blin, Fernando Gonzalez Adauto, Max Kleiman-Weiner, Mrinmaya Sachan, Bernhard Sch\u00f6lkopf", "tldr": "", "abstract": "The ability to perform causal reasoning is widely considered a core feature of intelligence. In this work, we investigate whether large language models (LLMs) can coherently reason about causality. Much of the existing work in natural language processing (NLP) focuses on evaluating _commonsense_ causal reasoning in LLMs, thus failing to assess whether a model can perform causal inference in accordance with a set of well-defined _formal rules_. To address this, we propose a new NLP task, _causal inference in natural language_, inspired by the _\"causal inference engine\"_ postulated by Judea Pearl et al. We compose a large dataset, CLadder, with 10K samples: based on a collection of causal graphs and queries (associational, interventional, and counterfactual), we obtain symbolic questions and ground-truth answers, through an oracle causal inference engine. These are then translated into natural language. We evaluate multiple LLMs on our dataset, and we introduce and evaluate a bespoke chain-of-thought prompting strategy, CausalCoT. We show that our task is highly challenging for LLMs, and we conduct an in-depth analysis to gain deeper insight into the causal reasoning abilities of LLMs. Our data is open-sourced at https://huggingface.co/datasets/causalNLP/cladder, and our code can be found at https://github.com/causalNLP/cladder.", "keywords": "Large Language Models;Causal Reasoning;Causal Inference;Benchmark Dataset;Natural Language Processing", "primary_area": "", "supplementary_material": "", "author": "Zhijing Jin;Yuen Chen;Felix Leeb;Luigi Gresele;Ojasv Kamal;Zhiheng LYU;Kevin Blin;Fernando Gonzalez Adauto;Max Kleiman-Weiner;Mrinmaya Sachan;Bernhard Sch\u00f6lkopf", "authorids": "~Zhijing_Jin1;~Yuen_Chen1;~Felix_Leeb1;~Luigi_Gresele1;~Ojasv_Kamal1;~Zhiheng_LYU1;~Kevin_Blin1;~Fernando_Gonzalez_Adauto1;~Max_Kleiman-Weiner1;~Mrinmaya_Sachan3;~Bernhard_Sch\u00f6lkopf1", "gender": ";F;;M;M;M;M;;Unspecified;;", "homepage": ";https://chenyuen0103.github.io/;https://ei.is.mpg.de/person/fleeb;https://lgresele.github.io/;;https://cogito233.github.io/;;;http://www.mit.edu/~maxkw/;;", "dblp": ";67/2614;;211/6114;;;300/8317;;160/7595;;", "google_scholar": ";w5flVIsAAAAJ;;JdZ8DWwAAAAJ;34PgtWEAAAAJ;;cm4rK24AAAAJ;;SACXQKYAAAAJ;;", "orcid": ";;;;;;0000-0002-4593-7230;;;;", "linkedin": ";;;;ojasv-kamal-996397182/;;kevinblin/;;;;", "or_profile": "~Zhijing_Jin1;~Yuen_Chen1;~Felix_Leeb1;~Luigi_Gresele1;~Ojasv_Kamal1;~Zhiheng_LYU1;~Kevin_Blin1;~Fernando_Gonzalez_Adauto1;~Max_Kleiman-Weiner1;~Mrinmaya_Sachan3;~Bernhard_Sch\u00f6lkopf1", "aff": ";Max-Planck-Institute for Intelligent Systems, Max-Planck Institute;Max Planck Institute for Intelligent Systems, Max-Planck Institute;Max-Planck-Institute for Intelligent Systems, Max-Planck Institute;Indian Institute of Technology Kharagpur;University of Hong Kong;ETHZ - ETH Zurich;;Common Sense Machines;;", "aff_domain": ";is.mpg.de;tuebingen.mpg.de;is.mpg.de;iitkgp.ac.in;hku.hk;ethz.ch;;csm.ai;;", "position": ";Intern;PhD student;PhD student;Undergrad student;Undergrad student;MS student;;Principal Researcher;;", "bibtex": "@inproceedings{\njin2023cladder,\ntitle={{CL}adder: A Benchmark to Assess Causal Reasoning Capabilities of Language Models},\nauthor={Zhijing Jin and Yuen Chen and Felix Leeb and Luigi Gresele and Ojasv Kamal and Zhiheng LYU and Kevin Blin and Fernando Gonzalez Adauto and Max Kleiman-Weiner and Mrinmaya Sachan and Bernhard Sch{\\\"o}lkopf},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=e2wtjx0Yqu}\n}", "github": "", "project": "", "reviewers": "hcc4;5uc1;YWrR;bMQ8;rbbm", "pdf_size": 1020380, "rating": "5;5;6;6;7", "confidence": "3;3;4;4;4", "soundness": "2;3;2;3;3", "novelty": "3;2;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "50;74;178;209;317", "wc_strengths": "56;105;84;67;153", "wc_weaknesses": "214;255;197;158;134", "wc_questions": "3;185;25;4;82", "wc_limitations": "1;18;8;9;80", "wc_review": "324;637;492;447;766", "wc_reply_reviewers": "158;582;24;41;40", "wc_reply_authors": "556;689;37;13;55", "reply_reviewers": "1;2;1;1;1", "reply_authors": "3;3;2;2;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 165.6, 96.65733288271511 ], "wc_strengths_avg": [ 93.0, 34.26368339802363 ], "wc_weaknesses_avg": [ 191.6, 42.41980669451477 ], "wc_questions_avg": [ 59.8, 68.87495916514216 ], "wc_limitations_avg": [ 23.2, 28.909514004908488 ], "wc_review_avg": [ 533.2, 153.53357938900533 ], "wc_reply_reviewers_avg": [ 169.0, 212.00943375236866 ], "wc_reply_authors_avg": [ 270.0, 291.17692216245433 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.8728715609439696, "gs_citation": 126, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17724290091181983868&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";is.mpg.de;tuebingen.mpg.de;is.mpg.de;iitkgp.ac.in;hku.hk;ethz.ch;;csm.ai;;", "author_num": 11, "aff_unique_index": "0;1;0;2;3;4;5", "aff_unique_norm": "Max-Planck-Institute for Intelligent Systems;Max Planck Institute for Intelligent Systems;Indian Institute of Technology Kharagpur;University of Hong Kong;ETH Zurich;Common Sense Machines", "aff_unique_dep": "Intelligent Systems;Intelligent Systems;;;;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.mpi-is.mpg.de;https://www.iitkgp.ac.in;https://www.hku.hk;https://www.ethz.ch;", "aff_unique_abbr": "MPI-IS;MPI-IS;IIT Kharagpur;HKU;ETHZ;", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Kharagpur;Hong Kong SAR", "aff_country_unique_index": "0;0;0;1;2;3;4", "aff_country_unique": "Germany;India;China;Switzerland;United States" }, { "title": "Gacs-Korner Common Information Variational Autoencoder", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70982", "id": "e4XidX6AHd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d04f08ccf582011f43af91ee1c1956d2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=e4XidX6AHd", "openreview": "https://openreview.net/forum?id=e4XidX6AHd", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70982", "video": "https://nips.cc/virtual/2023/poster/70982", "author_site": "Michael Kleinman, Alessandro Achille, Stefano Soatto, Jonathan Kao", "tldr": "", "abstract": "We propose a notion of common information that allows one to quantify and separate the information that is shared between two random variables from the information that is unique to each. Our notion of common information is defined by an optimization problem over a family of functions and recovers the G\\'acs-K\\\"orner common information as a special case. Importantly, our notion can be approximated empirically using samples from the underlying data distribution. We then provide a method to partition and quantify the common and unique information using a simple modification of a traditional variational auto-encoder. Empirically, we demonstrate that our formulation allows us to learn semantically meaningful common and unique factors of variation even on high-dimensional data such as images and videos. Moreover, on datasets where ground-truth latent factors are known, we show that we can accurately quantify the common information between the random variables.", "keywords": "Common Information;Gacs-Korner;Variational Autoencoder", "primary_area": "", "supplementary_material": "", "author": "Michael Kleinman;Alessandro Achille;Stefano Soatto;Jonathan Kao", "authorids": "~Michael_Kleinman2;~Alessandro_Achille1;~Stefano_Soatto1;~Jonathan_Kao1", "gender": ";M;;", "homepage": ";;http://seas.ucla.edu/~kao;https://www.cs.ucla.edu/~soatto", "dblp": "276/0181;190/7328;145/1310;08/1262", "google_scholar": "https://scholar.google.ca/citations?user=b5c-VcMAAAAJ;;;lH1PdF8AAAAJ", "orcid": ";;0000-0002-9298-0143;0000-0003-2902-6362", "linkedin": ";;;stefano-soatto-5765aa6/", "or_profile": "~Michael_Kleinman2;~Alessandro_Achille1;~Jonathan_Kao1;~Stefano_Soatto2", "aff": "University of California, Los Angeles;California Institute of Technology;University of California, Los Angeles;UCLA Computer Science Department, University of California, Los Angeles", "aff_domain": "ucla.edu;caltech.edu;ucla.edu;cs.ucla.edu", "position": "PhD student;Postdoc;Assistant Professor;Professor", "bibtex": "@inproceedings{\nkleinman2023gacskorner,\ntitle={Gacs-Korner Common Information Variational Autoencoder},\nauthor={Michael Kleinman and Alessandro Achille and Stefano Soatto and Jonathan Kao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=e4XidX6AHd}\n}", "github": "", "project": "", "reviewers": "iAUJ;hfAm;uZkW;cgd1", "pdf_size": 7989767, "rating": "5;5;5;7", "confidence": "4;3;4;3", "soundness": "3;2;3;4", "novelty": "2;3;3;3", "presentation": "2;3;2;4", "wc_summary": "333;35;65;42", "wc_strengths": "43;44;56;53", "wc_weaknesses": "172;103;179;45", "wc_questions": "45;4;5;34", "wc_limitations": "1;1;10;6", "wc_review": "594;187;315;180", "wc_reply_reviewers": "134;10;105;13", "wc_reply_authors": "46;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 118.75, 124.19415243883265 ], "wc_strengths_avg": [ 49.0, 5.612486080160912 ], "wc_weaknesses_avg": [ 124.75, 54.792221163227175 ], "wc_questions_avg": [ 22.0, 17.930421077041107 ], "wc_limitations_avg": [ 4.5, 3.774917217635375 ], "wc_review_avg": [ 319.0, 167.62010619254482 ], "wc_reply_reviewers_avg": [ 65.5, 54.974994315597705 ], "wc_reply_authors_avg": [ 11.5, 19.91858428704209 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16093465685876638716&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ucla.edu;caltech.edu;ucla.edu;cs.ucla.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of California, Los Angeles;California Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;https://www.caltech.edu", "aff_unique_abbr": "UCLA;Caltech", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Los Angeles;Pasadena", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Accessing Higher Dimensions for Unsupervised Word Translation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70981", "id": "e5srDjF9l7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/da31f4275972a58406b95c277ce7bc8d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=e5srDjF9l7", "openreview": "https://openreview.net/forum?id=e5srDjF9l7", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70981", "video": "https://nips.cc/virtual/2023/poster/70981", "tldr": "", "abstract": "The striking ability of unsupervised word translation has been demonstrated recently with the help of low-dimensional word vectors / pretraining, which is used by all successful methods and assumed to be necessary. We test and challenge this assumption by developing a method that can also make use of high dimensional signal. Freed from the limits of low dimensions, we show that relying on low-dimensional vectors and their incidental properties miss out on better denoising methods and signals in high dimensions, thus stunting the potential of the data. Our results show that unsupervised translation can be achieved more easily and robustly than previously thought -- less than 80MB and minutes of CPU time is required to achieve over 50\\% accuracy for English to Finnish, Hungarian, and Chinese translations when trained in the same domain; even under domain mismatch, the method still works fully unsupervised on English NewsCrawl to Chinese Wikipedia and English Europarl to Spanish Wikipedia, among others. These results challenge prevailing assumptions on the necessity and superiority of low-dimensional vectors and show that the higher dimension signal can be used rather than thrown away.", "keywords": "co-occurrences;unsupervised word translation;bilingual lexicon induction;robust statistics;unsupervised machine translation", "primary_area": "", "supplementary_material": "/attachment/d524dd08d9e277d2e79520a832febb9b11f5eb41.zip", "author": "Sida Wang", "authorids": "~Sida_Wang2", "gender": "M", "homepage": "https://www.sidaw.xyz", "dblp": "153/9609", "google_scholar": "XUI4PMEAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Sida_Wang2", "aff": "Meta Facebook", "aff_domain": "fb.com", "position": "Research Scientist", "bibtex": "@inproceedings{\nwang2023accessing,\ntitle={Accessing Higher Dimensions for Unsupervised Word Translation},\nauthor={Sida Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=e5srDjF9l7}\n}", "github": "", "project": "", "reviewers": "ZKax;RpPs;VTy1;LMyn;AbFD", "pdf_size": 4112189, "rating": "5;6;6;7;7", "confidence": "3;3;2;4;2", "soundness": "3;3;3;4;3", "novelty": "3;3;3;3;4", "presentation": "3;2;2;4;2", "wc_summary": "79;62;113;54;117", "wc_strengths": "71;58;48;68;61", "wc_weaknesses": "203;99;137;15;116", "wc_questions": "20;57;175;166;27", "wc_limitations": "2;8;34;26;15", "wc_review": "375;284;507;329;336", "wc_reply_reviewers": "16;120;1102;15;111", "wc_reply_authors": "42;0;1065;0;179", "reply_reviewers": "1;1;3;1;1", "reply_authors": "2;1;3;1;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 85.0, 25.822470834527046 ], "wc_strengths_avg": [ 61.2, 8.084553172563094 ], "wc_weaknesses_avg": [ 114.0, 60.794736614282655 ], "wc_questions_avg": [ 89.0, 67.75544258581742 ], "wc_limitations_avg": [ 17.0, 11.661903789690601 ], "wc_review_avg": [ 366.2, 76.09835740671411 ], "wc_reply_reviewers_avg": [ 272.8, 417.014819880541 ], "wc_reply_authors_avg": [ 257.2, 409.2116322882329 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.07142857142857145, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:tGSU3x8YKgIJ:scholar.google.com/&scioq=Accessing+Higher+Dimensions+for+Unsupervised+Word+Translation&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "fb.com", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "DynGFN: Towards Bayesian Inference of Gene Regulatory Networks with GFlowNets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70980", "id": "e7MK5Vq44Q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eb5254c4ee813d05af9c098f2d9c5708-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=e7MK5Vq44Q", "openreview": "https://openreview.net/forum?id=e7MK5Vq44Q", "poster": "/media/PosterPDFs/NeurIPS%202023/70980.png?t=1702149722.5989423", "slides": "https://nips.cc/virtual/2023/poster/70980", "video": "https://nips.cc/virtual/2023/poster/70980", "author_site": "Lazar Atanackovic, Alexander Tong, Bo Wang, Leo J Lee, Yoshua Bengio, Jason Hartford", "tldr": "", "abstract": "One of the grand challenges of cell biology is inferring the gene regulatory network (GRN) which describes interactions between genes and their products that control gene expression and cellular function. We can treat this as a causal discovery problem but with two non-standard challenges: (1) regulatory networks are inherently cyclic so we should not model a GRN as a directed acyclic graph (DAG), and (2) observations have significant measurement noise so for typical sample sizes, there will always be a large equivalence class of graphs that are likely given the data, and we want methods that capture this uncertainty. Existing methods either focus on challenge (1), identifying cyclic structure from dynamics, or on challenge (2) learning complex Bayesian posteriors over directed acyclic graphs, but not both. In this paper we leverage the fact that it is possible to estimate the ``velocity'' of the expression of a gene with RNA velocity techniques to develop an approach that addresses both challenges. Because we have access to velocity information, we can treat the Bayesian structure learning problem as a problem of sparse identification of a dynamical system, capturing cyclic feedback loops through time. We leverage Generative Flow Networks (GFlowNets) to estimate the posterior distribution over the combinatorial space of possible sparse dependencies. Our results indicate that our method learns posteriors that better encapsulate the distributions of cyclic structures compared to counterpart state-of-the-art Bayesian structure learning approaches.", "keywords": "Bayesian Structure Learning;Generative Flow Networks;Single-cell;Dynamical Systems", "primary_area": "", "supplementary_material": "/attachment/45ea2b6dc01291438eac48b25e6b6b99d558ee93.zip", "author": "Lazar Atanackovic;Alexander Tong;BO WANG;Leo J Lee;Yoshua Bengio;Jason Hartford", "authorids": "~Lazar_Atanackovic1;~Alexander_Tong1;~BO_WANG11;~Leo_J_Lee1;~Yoshua_Bengio1;~Jason_Hartford1", "gender": "M;;M;;M;M", "homepage": "https://lazaratan.github.io/;https://alextong.net;https://wanglab.ai/;;http://yoshuabengio.org;https://jhartford.github.io", "dblp": "235/6207;153/9296;;;56/953;191/6716", "google_scholar": "qhTWIh4AAAAJ;CS80pt4AAAAJ;37FDILIAAAAJ;;kukA0LcAAAAJ;https://scholar.google.ca/citations?user=eBNK7SsAAAAJ", "orcid": ";0000-0002-2031-4096;;;;", "linkedin": ";atong01/;;;yoshuabengio/?originalSubdomain=ca;jasonhartford1/", "or_profile": "~Lazar_Atanackovic1;~Alexander_Tong1;~BO_WANG11;~Leo_J_Lee1;~Yoshua_Bengio1;~Jason_Hartford1", "aff": "Valence Labs powered by recursion;Universit\u00e9 de Montr\u00e9al;Vector Institute;;University of Montreal;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal", "aff_domain": "valencelabs.com;umontreal.ca;vectorinstitute.ai;;umontreal.ca;mila.umontreal.ca", "position": "Intern;Postdoc;Assistant Professor;;Full Professor;Postdoc", "bibtex": "@inproceedings{\natanackovic2023dyngfn,\ntitle={Dyn{GFN}: Towards Bayesian Inference of Gene Regulatory Networks with {GF}lowNets},\nauthor={Lazar Atanackovic and Alexander Tong and BO WANG and Leo J Lee and Yoshua Bengio and Jason Hartford},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=e7MK5Vq44Q}\n}", "github": "", "project": "", "reviewers": "2wco;uDCE;b9zX;PVQy", "pdf_size": 3106934, "rating": "6;6;6;7", "confidence": "3;3;4;4", "soundness": "3;2;4;2", "novelty": "3;3;3;2", "presentation": "2;2;2;2", "wc_summary": "160;251;58;26", "wc_strengths": "56;115;30;30", "wc_weaknesses": "456;175;38;166", "wc_questions": "2;66;218;64", "wc_limitations": "2;54;32;1", "wc_review": "676;661;376;287", "wc_reply_reviewers": "67;0;45;34", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 123.75, 88.57870793819473 ], "wc_strengths_avg": [ 57.75, 34.71581051912803 ], "wc_weaknesses_avg": [ 208.75, 152.68820353910777 ], "wc_questions_avg": [ 87.5, 79.61626718202757 ], "wc_limitations_avg": [ 22.25, 22.16275028059469 ], "wc_review_avg": [ 500.0, 171.4948978832898 ], "wc_reply_reviewers_avg": [ 36.5, 24.19194080680589 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13256085709638582430&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "valencelabs.com;umontreal.ca;vectorinstitute.ai;;umontreal.ca;mila.umontreal.ca", "author_num": 6, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Valence Labs;Universit\u00e9 de Montr\u00e9al;Vector Institute;University of Montreal", "aff_unique_dep": ";;;", "aff_unique_url": ";https://www.umontreal.ca;https://vectorinstitute.ai/;https://wwwumontreal.ca", "aff_unique_abbr": ";UdeM;Vector Institute;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "1;1;1;1", "aff_country_unique": ";Canada" }, { "title": "Using Imperfect Surrogates for Downstream Inference: Design-based Supervised Learning for Social Science Applications of Large Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70979", "id": "e8RZwixcE4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d862f7f5445255090de13b825b880d59-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=e8RZwixcE4", "openreview": "https://openreview.net/forum?id=e8RZwixcE4", "poster": "/media/PosterPDFs/NeurIPS%202023/70979.png?t=1701962588.2214699", "slides": "https://nips.cc/virtual/2023/poster/70979", "video": "https://nips.cc/virtual/2023/poster/70979", "author_site": "Naoki Egami, Musashi Hinck, Brandon Stewart, Hanying Wei", "tldr": "", "abstract": "In computational social science (CSS), researchers analyze documents to explain social and political phenomena. In most scenarios, CSS researchers first obtain labels for documents and then explain labels using interpretable regression analyses in the second step. One increasingly common way to annotate documents cheaply at scale is through large language models (LLMs). However, like other scalable ways of producing annotations, such surrogate labels are often imperfect and biased. We present a new algorithm for using imperfect annotation surrogates for downstream statistical analyses while guaranteeing statistical properties\u2014like asymptotic unbiasedness and proper uncertainty quantification\u2014which are fundamental to CSS research. We show that direct use of surrogate labels in downstream statistical analyses leads to substantial bias and invalid confidence intervals, even with high surrogate accuracy of 80-90\\%. To address this, we build on debiased machine learning to propose the design-based supervised learning (DSL) estimator. DSL employs a doubly-robust procedure to combine surrogate labels with a smaller number of high-quality, gold-standard labels. Our approach guarantees valid inference for downstream statistical analyses, even when surrogates are arbitrarily biased and without requiring stringent assumptions, by controlling the probability of sampling documents for gold-standard labeling. Both our theoretical analysis and experimental results show that DSL provides valid statistical inference while achieving root mean squared errors comparable to existing alternatives that focus only on prediction without inferential guarantees.", "keywords": "Computational Social Science;Large Language Models;Statistical Inference;Causal Inference", "primary_area": "", "supplementary_material": "/attachment/41ed9cf64b6bbcc74ad61f1dd869838c04655540.zip", "author": "Naoki Egami;Musashi Hinck;Brandon M. Stewart;Hanying Wei", "authorids": "~Naoki_Egami1;~Musashi_Hinck1;~Brandon_M._Stewart2;~Hanying_Wei1", "gender": ";;;", "homepage": "https://naokiegami.com;;;", "dblp": "215/5043;;;", "google_scholar": "HZuqnTEAAAAJ;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Naoki_Egami1;~Musashi_Hinck1;~Brandon_M._Stewart2;~Hanying_Wei1", "aff": "Columbia University;;;", "aff_domain": "columbia.edu;;;", "position": "Assistant Professor;;;", "bibtex": "@inproceedings{\negami2023using,\ntitle={Using Imperfect Surrogates for Downstream Inference: Design-based Supervised Learning for Social Science Applications of Large Language Models},\nauthor={Naoki Egami and Musashi Hinck and Brandon M. Stewart and Hanying Wei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=e8RZwixcE4}\n}", "github": "", "project": "", "reviewers": "qrXw;rbu9;m9JA;qD8b", "pdf_size": 1162108, "rating": "4;6;6;6", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "novelty": "2;2;3;2", "presentation": "2;3;3;3", "wc_summary": "202;90;82;105", "wc_strengths": "54;75;58;56", "wc_weaknesses": "373;260;70;181", "wc_questions": "57;6;1;81", "wc_limitations": "1;8;7;56", "wc_review": "687;439;218;479", "wc_reply_reviewers": "326;0;13;48", "wc_reply_authors": "2055;48;89;37", "reply_reviewers": "1;0;1;1", "reply_authors": "5;2;3;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 119.75, 48.19945539111412 ], "wc_strengths_avg": [ 60.75, 8.347903928532 ], "wc_weaknesses_avg": [ 221.0, 110.70907821854539 ], "wc_questions_avg": [ 36.25, 33.87753680538182 ], "wc_limitations_avg": [ 18.0, 22.102036105300343 ], "wc_review_avg": [ 455.75, 166.45025533173566 ], "wc_reply_reviewers_avg": [ 96.75, 133.5166188157864 ], "wc_reply_authors_avg": [ 557.25, 864.9434591347575 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16992545043611864983&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "columbia.edu;;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Automatic Clipping: Differentially Private Deep Learning Made Easier and Stronger", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70978", "id": "e8i7OaPj0q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8249b30d877c91611fd8c7aa6ac2b5fe-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=e8i7OaPj0q", "openreview": "https://openreview.net/forum?id=e8i7OaPj0q", "poster": "/media/PosterPDFs/NeurIPS%202023/70978.png?t=1697419868.0374312", "slides": "https://nips.cc/virtual/2023/poster/70978", "video": "https://nips.cc/virtual/2023/poster/70978", "author_site": "Zhiqi Bu, Yu-Xiang Wang, Sheng Zha, George Karypis", "tldr": "", "abstract": "Per-example gradient clipping is a key algorithmic step that enables practical differential private (DP) training for deep learning models. The choice of clipping threshold $R$, however, is vital for achieving high accuracy under DP. We propose an easy-to-use replacement, called automatic clipping, that eliminates the need to tune $R$ for any DP optimizers, including DP-SGD, DP-Adam, DP-LAMB and many others.\nThe automatic variants are as private and computationally efficient as existing DP optimizers, but require no DP-specific hyperparameters and thus make DP training as amenable as the standard non-private training. We give a rigorous convergence analysis of automatic DP-SGD in the non-convex setting, showing that it can enjoy an asymptotic convergence rate that matches the standard SGD, under a symmetric gradient noise assumption of the per-sample gradients (commonly used in the non-DP literature). We demonstrate on various language and vision tasks that automatic clipping outperforms or matches the state-of-the-art, and can be easily employed with minimal changes to existing codebases.", "keywords": "deep learning;differential privacy;optimization;hyper-parameter tuning", "primary_area": "", "supplementary_material": "/attachment/89137879ae43a084db99726b48e86f70c02a5fe1.pdf", "author": "Zhiqi Bu;Yu-Xiang Wang;Sheng Zha;George Karypis", "authorids": "~Zhiqi_Bu1;~Yu-Xiang_Wang1;~Sheng_Zha1;~George_Karypis1", "gender": "M;;M;M", "homepage": "https://sites.google.com/view/zhiqi-bu;http://www.cs.ucsb.edu/~yuxiangw/publications.html;https://github.com/szha;", "dblp": "245/2573;62/1637-3.html;218/5471;", "google_scholar": "MEvTLxIAAAAJ;HGNZ1fkAAAAJ;;ElqwScwAAAAJ", "orcid": ";;;", "linkedin": ";;shengzha/;", "or_profile": "~Zhiqi_Bu1;~Yu-Xiang_Wang1;~Sheng_Zha1;~George_Karypis1", "aff": "Amazon;UC Santa Barbara;Amazon;University of Minnesota, Minneapolis", "aff_domain": "amazon.com;ucsb.edu;amazon.com;umn.edu", "position": "Researcher;Assistant Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nbu2023automatic,\ntitle={Automatic Clipping: Differentially Private Deep Learning Made Easier and Stronger},\nauthor={Zhiqi Bu and Yu-Xiang Wang and Sheng Zha and George Karypis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=e8i7OaPj0q}\n}", "github": "", "project": "", "reviewers": "V9NQ;jhaz;2WqJ;Cysx;FQ5E", "pdf_size": 1020024, "rating": "4;5;7;7;8", "confidence": "4;4;3;4;5", "soundness": "2;3;3;3;4", "novelty": "3;3;2;3;4", "presentation": "3;3;3;3;4", "wc_summary": "134;61;77;37;19", "wc_strengths": "106;27;96;43;54", "wc_weaknesses": "153;89;375;2;1", "wc_questions": "2;88;56;91;1", "wc_limitations": "2;11;39;2;1", "wc_review": "397;276;643;175;76", "wc_reply_reviewers": "141;57;37;9;0", "wc_reply_authors": "724;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "6;1;1;1;1", "rating_avg": [ 6.2, 1.469693845669907 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 65.6, 39.545416927881796 ], "wc_strengths_avg": [ 65.2, 30.62939764344053 ], "wc_weaknesses_avg": [ 124.0, 137.8985134075056 ], "wc_questions_avg": [ 47.6, 39.59090804717669 ], "wc_limitations_avg": [ 11.0, 14.463747785411636 ], "wc_review_avg": [ 313.4, 196.2331266631605 ], "wc_reply_reviewers_avg": [ 48.8, 50.344413791402914 ], "wc_reply_authors_avg": [ 144.8, 289.6 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.0, 2.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.21516574145596756, "gs_citation": 80, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14351327138950702901&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "amazon.com;ucsb.edu;amazon.com;umn.edu", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Amazon;University of California, Santa Barbara;University of Minnesota", "aff_unique_dep": "Amazon.com, Inc.;;", "aff_unique_url": "https://www.amazon.com;https://www.ucsb.edu;https://www.minnesota.edu", "aff_unique_abbr": "Amazon;UCSB;UMN", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Santa Barbara;Minneapolis", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "URL: A Representation Learning Benchmark for Transferable Uncertainty Estimates", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73522", "id": "e9n4JjkmXZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2d421cd0e763f9f01958a30bace955bf-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=e9n4JjkmXZ", "openreview": "https://openreview.net/forum?id=e9n4JjkmXZ", "poster": "/media/PosterPDFs/NeurIPS%202023/73522.png?t=1701441419.526886", "slides": "https://nips.cc/virtual/2023/poster/73522", "video": "https://nips.cc/virtual/2023/poster/73522", "author_site": "Michael Kirchhof, B\u00e1lint Mucs\u00e1nyi, Seong Joon Oh, Dr. Enkelejda Kasneci", "tldr": "", "abstract": "Representation learning has significantly driven the field to develop pretrained models that can act as a valuable starting point when transferring to new datasets. With the rising demand for reliable machine learning and uncertainty quantification, there is a need for pretrained models that not only provide embeddings but also transferable uncertainty estimates. To guide the development of such models, we propose the Uncertainty-aware Representation Learning (URL) benchmark. Besides the transferability of the representations, it also measures the zero-shot transferability of the uncertainty estimate using a novel metric. We apply URL to evaluate ten uncertainty quantifiers that are pretrained on ImageNet and transferred to eight downstream datasets. We find that approaches that focus on the uncertainty of the representation itself or estimate the prediction risk directly outperform those that are based on the probabilities of upstream classes. Yet, achieving transferable uncertainty quantification remains an open challenge. Our findings indicate that it is not necessarily in conflict with traditional representation learning goals. Code is available at [https://github.com/mkirchhof/url](https://github.com/mkirchhof/url).", "keywords": "Representation Learning;Uncertainty;Zero-shot;Transfer;Generalization;Downstream;Benchmark", "primary_area": "", "supplementary_material": "", "author": "Michael Kirchhof;B\u00e1lint Mucs\u00e1nyi;Seong Joon Oh;Enkelejda Kasneci", "authorids": "~Michael_Kirchhof1;~B\u00e1lint_Mucs\u00e1nyi1;~Seong_Joon_Oh1;~Enkelejda_Kasneci1", "gender": "M;M;M;F", "homepage": "https://www.hci.uni-tuebingen.de/chair/team/michael-kirchhof;https://bmucsanyi.github.io/;https://seongjoonoh.com;https://www.edu.sot.tum.de/hctl/prof-dr-enkelejda-kasneci/", "dblp": "65/6349;;168/8835;08/1610", "google_scholar": "Xtgj8q0AAAAJ;NexA8EEAAAAJ;https://scholar.google.de/citations?user=kmXOOdsAAAAJ;https://scholar.google.de/citations?user=bZVkVvoAAAAJ", "orcid": "0000-0003-4521-9391;0000-0002-7075-9018;0000-0002-8985-7689;0000-0003-3146-4484", "linkedin": "michael-kirchhof;b%C3%A1lint-mucs%C3%A1nyi-148a47222/;seong-joon-oh-32113479/;", "or_profile": "~Michael_Kirchhof1;~B\u00e1lint_Mucs\u00e1nyi1;~Seong_Joon_Oh1;~Enkelejda_Kasneci1", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Technische Universit\u00e4t M\u00fcnchen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;tum.de", "position": "PhD student;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nkirchhof2023url,\ntitle={{URL}: A Representation Learning Benchmark for Transferable Uncertainty Estimates},\nauthor={Michael Kirchhof and B{\\'a}lint Mucs{\\'a}nyi and Seong Joon Oh and Enkelejda Kasneci},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=e9n4JjkmXZ}\n}", "github": "", "project": "", "reviewers": "JYVQ;D238;qnX2;HXyY", "pdf_size": 1528506, "rating": "5;5;6;6", "confidence": "4;3;3;3", "wc_summary_and_contributions": "49;119;330;61", "wc_strengths": "47;114;27;152", "wc_improvement": "16;144;294;131", "wc_limitations": "16;6;35;14", "wc_correctness": "13;1;19;21", "wc_clarity": "10;20;18;9", "wc_relation_to_prior_work": "6;62;14;62", "wc_documentation": "28;13;35;8", "wc_additional_feedback": "1;1;1;1", "wc_review": "186;480;773;459", "wc_reply_reviewers": "0;0;1337;0", "wc_reply_authors": "83;384;1302;505", "reply_reviewers": "0;0;3;0", "reply_authors": "1;1;3;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 139.75, 112.98534196965552 ], "wc_strengths_avg": [ 85.0, 50.34381789256751 ], "wc_improvement_avg": [ 146.25, 98.7835386084139 ], "wc_limitations_avg": [ 17.75, 10.638961415476606 ], "wc_correctness_avg": [ 13.5, 7.794228634059948 ], "wc_clarity_avg": [ 14.25, 4.815340071064556 ], "wc_relation_to_prior_work_avg": [ 36.0, 26.153393661244042 ], "wc_documentation_avg": [ 21.0, 10.931605554537724 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 474.5, 207.72878953096512 ], "wc_reply_reviewers_avg": [ 334.25, 578.9379824298973 ], "wc_reply_authors_avg": [ 568.5, 450.50110987654625 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8297790127119381842&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;tum.de", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;Technische Universit\u00e4t M\u00fcnchen", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.tum.de", "aff_unique_abbr": "Uni T\u00fcbingen;TUM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "POP-3D: Open-Vocabulary 3D Occupancy Prediction from Images", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70977", "id": "eBXM62SqKY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9e30acdeff572463c1db9b7de59de64c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eBXM62SqKY", "openreview": "https://openreview.net/forum?id=eBXM62SqKY", "poster": "/media/PosterPDFs/NeurIPS%202023/70977.png?t=1701435311.2596774", "slides": "https://nips.cc/virtual/2023/poster/70977", "video": "https://nips.cc/virtual/2023/poster/70977", "author_site": "Antonin Vobecky, Oriane Sim\u00e9oni, David Hurych, Spyridon Gidaris, Andrei Bursuc, Patrick P\u00e9rez, Josef Sivic", "tldr": "", "abstract": "We describe an approach to predict open-vocabulary 3D semantic voxel occupancy map from input 2D images with the objective of enabling 3D grounding, segmentation and retrieval of free-form language queries. This is a challenging problem because of the 2D-3D ambiguity and the open-vocabulary nature of the target tasks, where obtaining annotated training data in 3D is difficult. The contributions of this work are three-fold. \nFirst, we design a new model architecture for open-vocabulary 3D semantic occupancy prediction. The architecture consists of a 2D-3D encoder together with occupancy prediction and 3D-language heads. The output is a dense voxel map of 3D grounded language embeddings enabling a range of open-vocabulary tasks. \nSecond, we develop a tri-modal self-supervised learning algorithm that leverages three modalities: (i) images, (ii) language and (iii) LiDAR point clouds, and enables training the proposed architecture using a strong pre-trained vision-language model without the need for any 3D manual language annotations. \nFinally, we demonstrate quantitatively the strengths of the proposed model on several open-vocabulary tasks:\nZero-shot 3D semantic segmentation using existing datasets; 3D grounding and retrieval of free-form language queries, using a small dataset that we propose as an extension of nuScenes. You can find the project page here https://vobecant.github.io/POP3D.", "keywords": "open-vocabulary segmentation;voxel occupancy prediction;semantic segmentation;autonomous driving;language-image alignment", "primary_area": "", "supplementary_material": "/attachment/c6dc157dc7bfeadc974169222f3efe50a3af8630.pdf", "author": "Anton\u00edn Vobeck\u00fd;Oriane Sim\u00e9oni;David Hurych;Spyros Gidaris;Andrei Bursuc;Patrick Perez;Josef Sivic", "authorids": "~Anton\u00edn_Vobeck\u00fd1;~Oriane_Sim\u00e9oni2;~David_Hurych1;~Spyros_Gidaris1;~Andrei_Bursuc1;~Patrick_Perez1;~Josef_Sivic1", "gender": "M;;M;M;M;M;M", "homepage": "https://vobecant.github.io/;https://osimeoni.github.io/;;;https://abursuc.github.io/;http://people.ciirc.cvut.cz/~sivic;https://ptrckprz.github.io/", "dblp": "218/4573;206/6091;16/8930;163/2312;40/8692.html;71/5006;71/1167", "google_scholar": "https://scholar.google.cz/citations?user=DGhPzZ0AAAAJ;PC7ELtEAAAAJ;XY1PVwYAAAAJ;https://scholar.google.fr/citations?user=7atfg7EAAAAJ;https://scholar.google.fr/citations?user=HTfERCsAAAAJ;https://scholar.google.fr/citations?user=NCtKHnQAAAAJ;https://scholar.google.fr/citations?user=8Cph5uQAAAAJ", "orcid": "0000-0001-8946-2057;;0000-0002-0580-4038;;;;", "linkedin": "antoninvobecky/?locale=en_US;;david-hurych-1b862b82/;;;;", "or_profile": "~Anton\u00edn_Vobeck\u00fd1;~Oriane_Sim\u00e9oni2;~David_Hurych1;~Spyros_Gidaris1;~Andrei_Bursuc1;~Josef_Sivic1;~Patrick_Perez2", "aff": "valeo.ai;Valeo.ai;Valeo.ai;Valeo.ai;Valeo;Czech Technical University in Prague;Valeo", "aff_domain": "valeo.com;valeo.com;valeo.com;valeo.com;valeo.com;cvut.cz;valeo.com", "position": "PhD student;Research Scientist;Researcher;Research scientist;Research Scientist;Principal investigator;Scientific Director", "bibtex": "@inproceedings{\nvobeck{\\'y}2023popd,\ntitle={{POP}-3D: Open-Vocabulary 3D Occupancy Prediction from Images},\nauthor={Anton{\\'\\i}n Vobeck{\\'y} and Oriane Sim{\\'e}oni and David Hurych and Spyros Gidaris and Andrei Bursuc and Patrick Perez and Josef Sivic},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eBXM62SqKY}\n}", "github": "", "project": "", "reviewers": "q7fa;9pAV;yGBj;Vrgc;nph2", "pdf_size": 11165497, "rating": "5;5;5;6;6", "confidence": "5;5;4;4;4", "soundness": "2;3;3;3;3", "novelty": "2;2;3;2;3", "presentation": "3;2;3;4;3", "wc_summary": "92;110;63;62;176", "wc_strengths": "93;62;26;40;213", "wc_weaknesses": "303;117;68;127;174", "wc_questions": "1;10;3;2;46", "wc_limitations": "14;3;3;3;13", "wc_review": "503;302;163;234;622", "wc_reply_reviewers": "56;64;0;21;90", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 100.6, 41.83586977702269 ], "wc_strengths_avg": [ 86.8, 67.0325294166944 ], "wc_weaknesses_avg": [ 157.8, 80.02849492524521 ], "wc_questions_avg": [ 12.4, 17.09502851708648 ], "wc_limitations_avg": [ 7.2, 5.1536394906900505 ], "wc_review_avg": [ 364.8, 171.47991135990245 ], "wc_reply_reviewers_avg": [ 46.2, 31.939943644283403 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.6666666666666665, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10267107273782246923&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "valeo.com;valeo.com;valeo.com;valeo.com;valeo.com;cvut.cz;valeo.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Valeo;Czech Technical University", "aff_unique_dep": ";", "aff_unique_url": "https://www.valeo.com;https://www.ctu.cz", "aff_unique_abbr": "Valeo;CTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Prague", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "France;Czech Republic" }, { "title": "On Sparse Modern Hopfield Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70976", "id": "eCgWNU2Imw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/57bc0a850255e2041341bf74c7e2b9fa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eCgWNU2Imw", "openreview": "https://openreview.net/forum?id=eCgWNU2Imw", "poster": "/media/PosterPDFs/NeurIPS%202023/70976.png?t=1701478469.0545144", "slides": "https://nips.cc/virtual/2023/poster/70976", "video": "https://nips.cc/virtual/2023/poster/70976", "author_site": "Jerry Yao-Chieh Hu, Donglin Yang, Dennis Wu, Chenwei Xu, Bo-Yu Chen, Han Liu", "tldr": "", "abstract": "We introduce the sparse modern Hopfield model as a sparse extension of the modern Hopfield model.\nLike its dense counterpart, the sparse modern Hopfield model equips a memory-retrieval dynamics whose one-step approximation corresponds to the sparse attention mechanism. \nTheoretically, our key contribution is a principled derivation of a closed-form sparse Hopfield energy using the convex conjugate of the sparse entropic regularizer.\nBuilding upon this, we derive the sparse memory retrieval dynamics from the sparse energy function and show its one-step approximation is equivalent to the sparse-structured attention.\nImportantly, we provide a sparsity-dependent memory retrieval error bound which is provably tighter than its dense analog.\nThe conditions for the benefits of sparsity to arise are therefore identified and discussed.\nIn addition, we show that the sparse modern Hopfield model maintains the robust theoretical properties of its dense counterpart, including rapid fixed point convergence and exponential memory capacity.\nEmpirically, we use both synthetic and real-world datasets to demonstrate that the sparse Hopfield model outperforms its dense counterpart in many situations.", "keywords": "Hopfield Models; Modern Hopfield Networks; Sparse Attention; Memory Networks", "primary_area": "", "supplementary_material": "/attachment/02b3c7b7d9b35e2a7a95ec7f6f0117ae78ab437b.pdf", "author": "Jerry Yao-Chieh Hu;Donglin Yang;Dennis Wu;Chenwei Xu;Bo-Yu Chen;Han Liu", "authorids": "~Jerry_Yao-Chieh_Hu1;~Donglin_Yang1;~Dennis_Wu1;~Chenwei_Xu2;~Bo-Yu_Chen1;~Han_Liu4", "gender": ";M;;;M;", "homepage": ";https://linydthu.github.io/;;;https://phys-mattchen.github.io/;", "dblp": ";;;;;", "google_scholar": ";jg2atr8AAAAJ;;;;", "orcid": ";;;;0000-0003-4997-1652;", "linkedin": ";donglin-yang-473635212/;;;;", "or_profile": "~Jerry_Yao-Chieh_Hu1;~Donglin_Yang1;~Dennis_Wu1;~Chenwei_Xu2;~Bo-Yu_Chen1;~Han_Liu4", "aff": ";Northwestern University;;;National Taiwan University;Northwestern University", "aff_domain": ";northwestern.edu;;;phys.ntu.edu.tw;u.northwestern.edu", "position": ";PhD student;;;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nhu2023on,\ntitle={On Sparse Modern Hopfield Model},\nauthor={Jerry Yao-Chieh Hu and Donglin Yang and Dennis Wu and Chenwei Xu and Bo-Yu Chen and Han Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eCgWNU2Imw}\n}", "github": "", "project": "", "reviewers": "yZUu;usKi;XuPM;haSG", "pdf_size": 3067656, "rating": "4;6;6;6", "confidence": "2;3;2;3", "soundness": "3;3;2;3", "novelty": "2;2;2;3", "presentation": "2;3;1;3", "wc_summary": "39;107;23;60", "wc_strengths": "37;114;49;125", "wc_weaknesses": "80;127;236;255", "wc_questions": "5;101;72;167", "wc_limitations": "4;49;3;19", "wc_review": "165;498;383;626", "wc_reply_reviewers": "0;71;37;19", "wc_reply_authors": "0;52;36;48", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 57.25, 31.578275760402118 ], "wc_strengths_avg": [ 81.25, 38.68058298423125 ], "wc_weaknesses_avg": [ 174.5, 73.2273855876338 ], "wc_questions_avg": [ 86.25, 58.186660842498945 ], "wc_limitations_avg": [ 18.75, 18.579222265746218 ], "wc_review_avg": [ 418.0, 169.48303749933206 ], "wc_reply_reviewers_avg": [ 31.75, 26.166533969939543 ], "wc_reply_authors_avg": [ 34.0, 20.493901531919196 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4298187964227415000&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";northwestern.edu;;;phys.ntu.edu.tw;u.northwestern.edu", "author_num": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "Northwestern University;National Taiwan University", "aff_unique_dep": ";", "aff_unique_url": "https://www.northwestern.edu;https://www.ntu.edu.tw", "aff_unique_abbr": "NU;NTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Taiwan", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "title": "Evaluating Post-hoc Explanations for Graph Neural Networks via Robustness Analysis", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70975", "id": "eD534mPhAg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e55c2f3fdde519014c879aa3554414c0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eD534mPhAg", "openreview": "https://openreview.net/forum?id=eD534mPhAg", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70975", "video": "https://nips.cc/virtual/2023/poster/70975", "author_site": "Junfeng Fang, Wei Liu, Yuan Gao, Zemin Liu, An Zhang, Xiang Wang, Xiangnan He", "tldr": "", "abstract": "This work studies the evaluation of explaining graph neural networks (GNNs), which is crucial to the credibility of post-hoc explainability in practical usage. Conventional evaluation metrics, and even explanation methods -- which mainly follow the paradigm of feeding the explanatory subgraph and measuring output difference -- always suffer from the notorious out-of-distribution (OOD) issue. In this work, we endeavor to confront the issue by introducing a novel evaluation metric, termed **O**OD-resistant **A**dversarial **R**obustness (OAR). Specifically, we draw inspiration from the notion of adversarial robustness and evaluate post-hoc explanation subgraphs by calculating their robustness under attack. On top of that, an elaborate OOD reweighting block is inserted into the pipeline to confine the evaluation process to the original data distribution. For applications involving large datasets, we further devise a **Sim**plified version of **OAR** (SimOAR), which achieves a significant improvement in computational efficiency at the cost of a small amount of performance. Extensive empirical studies validate the effectiveness of our OAR and SimOAR.", "keywords": "Post-hoc Explainability;Explanation Evaluation;Graph Neural Network;Robustness Analysis", "primary_area": "", "supplementary_material": "", "author": "Junfeng Fang;Wei Liu;Yuan Gao;Zemin Liu;An Zhang;Xiang Wang;Xiangnan He", "authorids": "~Junfeng_Fang1;~Wei_Liu34;~Yuan_Gao18;~Zemin_Liu1;~An_Zhang2;~Xiang_Wang6;~Xiangnan_He1", "gender": "M;M;;M;M;M;F", "homepage": "https://scholar.google.com/citations?user=beNNywsAAAAJ&hl=zh-CN;;https://github.com/blacksingular;https://zemin-liu.github.io/;https://github.com/xiangwang1223;http://staff.ustc.edu.cn/~hexn;https://github.com/anzhang314", "dblp": "340/7929;;76/2452-20;17/964.html;31/2864-10;59/1007;78/5581-3", "google_scholar": "beNNywsAAAAJ;9Hd32GUAAAAJ;;IxHO1nkAAAAJ;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;https://scholar.google.com.sg/citations?user=X45Go24AAAAJ;https://scholar.google.com.sg/citations?user=BcX7GJcAAAAJ", "orcid": ";;;0000-0001-6262-9435;0000-0002-6148-6329;0000-0001-8472-7992;", "linkedin": ";;;;;;", "or_profile": "~Junfeng_Fang1;~Wei_Liu34;~Yuan_Gao18;~Zemin_Liu1;~Xiang_Wang6;~Xiangnan_He1;~AN_ZHANG1", "aff": ";University of Science and Technology of China;University of Science and Technology of China;National University of Singapore;University of Science and Technology of China;University of Science and Technology of China;National University of Singapore", "aff_domain": ";ustc.edu.cn;mail.ustc.edu.cn;nus.edu;ustc.edu.cn;ustc.edu.cn;nus.edu.sg", "position": ";MS student;PhD student;Postdoc;Full Professor;Professor;Postdoc", "bibtex": "@inproceedings{\nfang2023evaluating,\ntitle={Evaluating Post-hoc Explanations for Graph Neural Networks via Robustness Analysis},\nauthor={Junfeng Fang and Wei Liu and Yuan Gao and Zemin Liu and An Zhang and Xiang Wang and Xiangnan He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eD534mPhAg}\n}", "github": "", "project": "", "reviewers": "xH8P;vGHJ;uvv7;QMrv", "pdf_size": 3228548, "rating": "6;7;7;8", "confidence": "4;4;5;4", "soundness": "3;3;4;3", "novelty": "3;3;4;4", "presentation": "3;4;3;3", "wc_summary": "124;124;104;64", "wc_strengths": "84;91;38;138", "wc_weaknesses": "320;132;86;123", "wc_questions": "296;63;3;53", "wc_limitations": "4;72;1;1", "wc_review": "828;482;232;379", "wc_reply_reviewers": "247;45;33;19", "wc_reply_authors": "134;0;0;73", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 104.0, 24.49489742783178 ], "wc_strengths_avg": [ 87.75, 35.442735503908274 ], "wc_weaknesses_avg": [ 165.25, 90.99278817576699 ], "wc_questions_avg": [ 103.75, 113.29910635128593 ], "wc_limitations_avg": [ 19.5, 30.335622624235025 ], "wc_review_avg": [ 480.25, 219.55224321331815 ], "wc_reply_reviewers_avg": [ 86.0, 93.40770846134703 ], "wc_reply_authors_avg": [ 51.75, 56.06413737854173 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7973094716757603002&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";ustc.edu.cn;mail.ustc.edu.cn;nus.edu;ustc.edu.cn;ustc.edu.cn;nus.edu.sg", "author_num": 7, "aff_unique_index": "0;0;1;0;0;1", "aff_unique_norm": "University of Science and Technology of China;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "USTC;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "How to Select Which Active Learning Strategy is Best Suited for Your Specific Problem and Budget", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70974", "id": "eDDZh8C4W4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2b09bb02b90584e2be94ff3ae09289bc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eDDZh8C4W4", "openreview": "https://openreview.net/forum?id=eDDZh8C4W4", "poster": "/media/PosterPDFs/NeurIPS%202023/70974.png?t=1701433248.0431192", "slides": "https://nips.cc/virtual/2023/poster/70974", "video": "https://nips.cc/virtual/2023/poster/70974", "author_site": "Guy Hacohen, Daphna Weinshall", "tldr": "", "abstract": "In the domain of Active Learning (AL), a learner actively selects which unlabeled examples to seek labels from an oracle, while operating within predefined budget constraints. Importantly, it has been recently shown that distinct query strategies are better suited for different conditions and budgetary constraints. In practice, the determination of the most appropriate AL strategy for a given situation remains an open problem. To tackle this challenge, we propose a practical derivative-based method that dynamically identifies the best strategy for a given budget. Intuitive motivation for our approach is provided by the theoretical analysis of a simplified scenario. We then introduce a method to dynamically select an AL strategy, which takes into account the unique characteristics of the problem and the available budget. Empirical results showcase the effectiveness of our approach across diverse budgets and computer vision tasks.", "keywords": "Deep Active learning;Low budget;High budget;Deep learning", "primary_area": "", "supplementary_material": "/attachment/6c8b3b0ce1bfb623768cc64b3ba4863ce93c6dd8.pdf", "author": "Guy Hacohen;Daphna Weinshall", "authorids": "~Guy_Hacohen1;~Daphna_Weinshall1", "gender": "M;F", "homepage": "https://www.cs.huji.ac.il/w~guy.hacohen/;http://www.cs.huji.ac.il/~daphna", "dblp": "239/4250;93/1568", "google_scholar": "AMK_eJwAAAAJ;https://scholar.google.co.il/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Guy_Hacohen1;~Daphna_Weinshall1", "aff": "Hebrew University of Jerusalem;Hebrew University of Jerusalem", "aff_domain": "huji.ac.il;huji.ac.il", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nhacohen2023how,\ntitle={How to Select Which Active Learning Strategy is Best Suited for Your Specific Problem and Budget},\nauthor={Guy Hacohen and Daphna Weinshall},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eDDZh8C4W4}\n}", "github": "", "project": "", "reviewers": "mgMr;5Syc;GcEN;uJGw", "pdf_size": 793796, "rating": "5;5;5;6", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;2", "presentation": "2;2;2;2", "wc_summary": "83;71;66;142", "wc_strengths": "33;33;57;273", "wc_weaknesses": "103;39;340;96", "wc_questions": "256;237;35;2", "wc_limitations": "8;15;1;1", "wc_review": "483;395;499;514", "wc_reply_reviewers": "0;24;134;5", "wc_reply_authors": "0;0;194;0", "reply_reviewers": "0;1;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 90.5, 30.36856927812043 ], "wc_strengths_avg": [ 99.0, 100.93562304756433 ], "wc_weaknesses_avg": [ 144.5, 115.5692433132622 ], "wc_questions_avg": [ 132.5, 114.79220356801241 ], "wc_limitations_avg": [ 6.25, 5.80409338312195 ], "wc_review_avg": [ 472.75, 46.20808911868137 ], "wc_reply_reviewers_avg": [ 40.75, 54.57735336199438 ], "wc_reply_authors_avg": [ 48.5, 84.00446416709055 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4731779245941094453&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "huji.ac.il;huji.ac.il", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Hebrew University of Jerusalem", "aff_unique_dep": "", "aff_unique_url": "https://www.huji.ac.il", "aff_unique_abbr": "HUJI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Jerusalem", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Revisiting Visual Model Robustness: A Frequency Long-Tailed Distribution View", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70973", "id": "eE5L1RkxW0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b9a4d7b88a41652c63962ebcc21701b7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eE5L1RkxW0", "openreview": "https://openreview.net/forum?id=eE5L1RkxW0", "poster": "/media/PosterPDFs/NeurIPS%202023/70973.png?t=1701395526.9615457", "slides": "https://nips.cc/virtual/2023/poster/70973", "video": "https://nips.cc/virtual/2023/poster/70973", "author_site": "Zhiyu Lin, Yifei Gao, Yunfan Yang, Jitao Sang", "tldr": "", "abstract": "A widely discussed hypothesis regarding the cause of visual models' lack of robustness is that they can exploit human-imperceptible high-frequency components (HFC) in images, which in turn leads to model vulnerabilities, such as the adversarial examples. However, (1) inconsistent findings regarding the validation of this hypothesis reflect in a limited understanding of HFC, and (2) solutions inspired by the hypothesis tend to involve a robustness-accuracy trade-off and leaning towards suppressing the model's learning on HFC. In this paper, inspired by the long-tailed characteristic observed in frequency spectrum, we first formally define the HFC from long-tailed perspective and then revisit the relationship between HFC and model robustness. In the frequency long-tailed scenario, experimental results on common datasets and various network structures consistently indicate that models in standard training exhibit high sensitivity to HFC. We investigate the reason of the sensitivity, which reflects in model's under-fitting behavior on HFC. Furthermore, the cause of the model's under-fitting behavior is attributed to the limited information content in HFC. Based on these findings, we propose a Balance Spectrum Sampling (BaSS) strategy, which effectively counteracts the long-tailed effect and enhances the model's learning on HFC. Extensive experimental results demonstrate that our method achieves a substantially better robustness-accuracy trade-off when combined with existing defense methods, while also indicating the potential of encouraging HFC learning in improving model performance.", "keywords": "visual models;robustness;frequency domain;long-tailed distribution", "primary_area": "", "supplementary_material": "/attachment/1ef7cc0d57b0e8b79b4ffc76f5039f898e6eab3a.pdf", "author": "Zhiyu Lin;Yifei Gao;Yunfan Yang;Jitao Sang", "authorids": "~Zhiyu_Lin2;~Yifei_Gao1;~Yunfan_Yang2;~Jitao_Sang1", "gender": "M;;;", "homepage": "https://github.com/zhiyugege;;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Zhiyu_Lin2;~Yifei_Gao1;~Yunfan_Yang2;~Jitao_Sang1", "aff": "Beijing Jiaotong University;;;", "aff_domain": "bjtu.edu.cn;;;", "position": "PhD student;;;", "bibtex": "@inproceedings{\nlin2023revisiting,\ntitle={Revisiting Visual Model Robustness: A Frequency Long-Tailed Distribution View},\nauthor={Zhiyu Lin and Yifei Gao and Yunfan Yang and Jitao Sang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eE5L1RkxW0}\n}", "github": "", "project": "", "reviewers": "fSNP;P5Z5;48f5;zwAk;oe9n", "pdf_size": 999979, "rating": "5;6;6;7;7", "confidence": "4;4;4;3;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;2;3;2", "wc_summary": "48;54;78;69;50", "wc_strengths": "42;58;51;52;50", "wc_weaknesses": "124;170;678;51;72", "wc_questions": "19;20;4;19;47", "wc_limitations": "1;5;4;10;6", "wc_review": "234;307;815;201;225", "wc_reply_reviewers": "22;44;59;30;20", "wc_reply_authors": "39;63;726;57;49", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 59.8, 11.702991070662236 ], "wc_strengths_avg": [ 50.6, 5.122499389946279 ], "wc_weaknesses_avg": [ 219.0, 233.21234958723778 ], "wc_questions_avg": [ 21.8, 13.934130758680285 ], "wc_limitations_avg": [ 5.2, 2.9257477676655586 ], "wc_review_avg": [ 356.4, 232.01344788610854 ], "wc_reply_reviewers_avg": [ 35.0, 14.669696656713798 ], "wc_reply_authors_avg": [ 186.8, 269.72015126793923 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8728715609439696, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12290577960159161268&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "bjtu.edu.cn;;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Beijing Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "http://www.njtu.edu.cn/en", "aff_unique_abbr": "BJTU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "OpenDataVal: a Unified Benchmark for Data Valuation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73521", "id": "eEK99egXeB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5b047c7d862059a5df623c1ce2982fca-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=eEK99egXeB", "openreview": "https://openreview.net/forum?id=eEK99egXeB", "poster": "/media/PosterPDFs/NeurIPS%202023/73521.png?t=1698869292.6630492", "slides": "https://nips.cc/virtual/2023/poster/73521", "video": "https://nips.cc/virtual/2023/poster/73521", "author_site": "Kevin Jiang, Weixin Liang, James Zou, Yongchan Kwon", "tldr": "", "abstract": "Assessing the quality and impact of individual data points is critical for improving model performance and mitigating undesirable biases within the training dataset. Several data valuation algorithms have been proposed to quantify data quality, however, there lacks a systemic and standardized benchmarking system for data valuation. In this paper, we introduce *OpenDataVal*, an easy-to-use and unified benchmark framework that empowers researchers and practitioners to apply and compare various data valuation algorithms. *OpenDataVal* provides an integrated environment that includes (i) a diverse collection of image, natural language, and tabular datasets, (ii) implementations of eleven different state-of-the-art data valuation algorithms, and (iii) a prediction model API that can import any models in scikit-learn. Furthermore, we propose four downstream machine learning tasks for evaluating the quality of data values. We perform benchmarking analysis using *OpenDataVal*, quantifying and comparing the efficacy of state-of-the-art data valuation approaches. We find that no single algorithm performs uniformly best across all tasks, and an appropriate algorithm should be employed for a user's downstream task. *OpenDataVal* is publicly available at https://opendataval.github.io with comprehensive documentation. Furthermore, we provide a leaderboard where researchers can evaluate the effectiveness of their own data valuation algorithms.", "keywords": "Data valuation;Benchmark;Data-centric AI", "primary_area": "", "supplementary_material": "", "author": "Kevin Fu Jiang;Weixin Liang;James Zou;Yongchan Kwon", "authorids": "~Kevin_Fu_Jiang1;~Weixin_Liang1;~James_Zou1;~Yongchan_Kwon1", "gender": "M;;;", "homepage": "https://kevinfjiang.github.io;https://ai.stanford.edu/~wxliang/;;", "dblp": ";231/1803;;", "google_scholar": ";7z9P1jYAAAAJ;23ZXZvEAAAAJ;", "orcid": ";;;", "linkedin": ";weixin-liang-2562aa154/;;", "or_profile": "~Kevin_Fu_Jiang1;~Weixin_Liang1;~James_Zou1;~Yongchan_Kwon1", "aff": "Columbia University;Stanford University;Stanford University;", "aff_domain": "columbia.edu;stanford.edu;stanford.edu;", "position": "Undergrad student;PhD student;Assistant Professor;", "bibtex": "@inproceedings{\njiang2023opendataval,\ntitle={OpenDataVal: a Unified Benchmark for Data Valuation},\nauthor={Kevin Fu Jiang and Weixin Liang and James Zou and Yongchan Kwon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=eEK99egXeB}\n}", "github": "", "project": "", "reviewers": "hFxZ;8KNo;CYt9;2UEH", "pdf_size": 2096308, "rating": "5;7;7;9", "confidence": "3;4;2;5", "wc_summary_and_contributions": "24;97;90;58", "wc_strengths": "35;113;107;305", "wc_improvement": "146;184;2;129", "wc_limitations": "7;54;48;37", "wc_correctness": "19;41;11;85", "wc_clarity": "1;9;24;32", "wc_relation_to_prior_work": "20;13;1;130", "wc_documentation": "1;26;1;49", "wc_additional_feedback": "1;1;1;1", "wc_review": "254;538;285;826", "wc_reply_reviewers": "0;36;0;19", "wc_reply_authors": "250;342;66;148", "reply_reviewers": "0;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "wc_summary_and_contributions_avg": [ 67.25, 28.977361853695378 ], "wc_strengths_avg": [ 140.0, 100.08496390567366 ], "wc_improvement_avg": [ 115.25, 68.34974396440707 ], "wc_limitations_avg": [ 36.5, 18.090052515125542 ], "wc_correctness_avg": [ 39.0, 28.74021572639983 ], "wc_clarity_avg": [ 16.5, 12.175795661885921 ], "wc_relation_to_prior_work_avg": [ 41.0, 51.83145762951299 ], "wc_documentation_avg": [ 19.25, 19.97967717456916 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 475.75, 230.27632857069787 ], "wc_reply_reviewers_avg": [ 13.75, 15.006248698458919 ], "wc_reply_authors_avg": [ 201.5, 104.06128002287883 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6324555320336759, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=546006575374451196&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "columbia.edu;stanford.edu;stanford.edu;", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Columbia University;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.columbia.edu;https://www.stanford.edu", "aff_unique_abbr": "Columbia;Stanford", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "RGMIL: Guide Your Multiple-Instance Learning Model with Regressor", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70972", "id": "eGoE9CVRPc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6feb9b30798abcfae937760d183605e1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eGoE9CVRPc", "openreview": "https://openreview.net/forum?id=eGoE9CVRPc", "poster": "/media/PosterPDFs/NeurIPS%202023/70972.png?t=1701653210.869063", "slides": "https://nips.cc/virtual/2023/poster/70972", "video": "https://nips.cc/virtual/2023/poster/70972", "author_site": "Zhaolong Du, Shasha Mao, Yimeng Zhang, Shuiping Gou, Licheng Jiao, Lin Xiong", "tldr": "", "abstract": "In video analysis, an important challenge is insufficient annotated data due to the rare occurrence of the critical patterns, and we need to provide discriminative frame-level representation with limited annotation in some applications. Multiple Instance Learning (MIL) is suitable for this scenario. However, many MIL models paid attention to analyzing the relationships between instance representations and aggregating them, but neglecting the critical information from the MIL problem itself, which causes difficultly achieving ideal instance-level performance compared with the supervised model.\nTo address this issue, we propose the $\\textbf{\\textit{Regressor-Guided MIL network} (RGMIL)}$, which effectively produces discriminative instance-level representations in a general multi-classification scenario. In the proposed method, we make full use of the $\\textit{regressor}$ through our newly introduced $\\textit{aggregator}$, $\\textbf{\\textit{Regressor-Guided Pooling} (RGP)}$. RGP focuses on simulating the correct inference process of humans while facing similar problems without introducing new parameters, and the MIL problem can be accurately described through the critical information from the $\\textit{regressor}$ in our method. \nIn experiments, RGP shows dominance on more than 20 MIL benchmark datasets, with the average bag-level classification accuracy close to 1. \nWe also perform a series of comprehensive experiments on the MMNIST dataset. Experimental results illustrate that our $\\textit{aggregator}$ outperforms existing methods under different challenging circumstances. Instance-level predictions are even possible under the guidance of RGP information table in a long sequence. RGMIL also presents comparable instance-level performance with S-O-T-A supervised models in complicated applications. Statistical results demonstrate the assumption that a MIL model can compete with a supervised model at the instance level, as long as a structure that accurately describes the MIL problem is provided. The codes are available on $\\url{https://github.com/LMBDA-design/RGMIL}$.", "keywords": "Video Analysis;Multiple-Instance Learning;Representation learning", "primary_area": "", "supplementary_material": "/attachment/a6409b6ca9eb685b2965d57b4e3ee18c3cc51bb9.pdf", "author": "Zhaolong Du;Shasha Mao;Yimeng Zhang;Shuiping Gou;Licheng Jiao;Lin Xiong", "authorids": "~Zhaolong_Du1;~Shasha_Mao4;~Yimeng_Zhang3;~Shuiping_Gou1;~Licheng_Jiao2;~Lin_Xiong1", "gender": "M;F;M;F;M;M", "homepage": ";https://web.xidian.edu.cn/ssmao/;;https://faculty.xidian.edu.cn/GSP2/zh_CN/index/335507/list/index.htm;https://web.xidian.edu.cn/lchjiao/;https://bruinxiong.github.io/xionglin.github.io/", "dblp": ";07/9307;;41/676;40/3714;", "google_scholar": ";QLCqaOwAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;FZbrL2YAAAAJ;Auze-lcAAAAJ", "orcid": "0009-0006-4305-0675;;0000-0002-7342-1012;0000-0002-2619-6481;0000-0003-3354-9617;0000-0003-3545-227X", "linkedin": ";;;;;lin-xiong-91079b29/", "or_profile": "~Zhaolong_Du1;~Shasha_Mao4;~Yimeng_Zhang3;~Shuiping_Gou1;~Licheng_Jiao2;~Lin_Xiong1", "aff": "Xidian University ;Xidian University;Xidian University;Xidian University;Xidian University;SenseTime", "aff_domain": "xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;sensetime.com", "position": "MS student;Associate Professor;Full Professor;Full Professor;Full Professor;Senior Expert", "bibtex": "@inproceedings{\ndu2023rgmil,\ntitle={{RGMIL}: Guide Your Multiple-Instance Learning Model with Regressor},\nauthor={Zhaolong Du and Shasha Mao and Yimeng Zhang and Shuiping Gou and Licheng Jiao and Lin Xiong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eGoE9CVRPc}\n}", "github": "", "project": "", "reviewers": "A2Ve;4ybk;oDjC;mChi", "pdf_size": 652676, "rating": "3;5;6;7", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "1;2;3;3", "wc_summary": "120;78;101;59", "wc_strengths": "111;109;137;73", "wc_weaknesses": "441;609;203;29", "wc_questions": "291;59;16;3", "wc_limitations": "124;34;2;3", "wc_review": "1087;889;459;167", "wc_reply_reviewers": "294;225;26;73", "wc_reply_authors": "1632;521;0;502", "reply_reviewers": "2;2;1;2", "reply_authors": "3;2;1;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 89.5, 23.04886114323222 ], "wc_strengths_avg": [ 107.5, 22.776083947860748 ], "wc_weaknesses_avg": [ 320.5, 221.65908508337753 ], "wc_questions_avg": [ 92.25, 116.60483480542305 ], "wc_limitations_avg": [ 40.75, 49.75628101054178 ], "wc_review_avg": [ 650.5, 359.81210374305084 ], "wc_reply_reviewers_avg": [ 154.5, 109.0699316952202 ], "wc_reply_authors_avg": [ 663.75, 596.7857132170642 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.09759000729485331, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7614019109849301797&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 4, "email": "xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;sensetime.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Xidian University;SenseTime", "aff_unique_dep": ";", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.sensetime.com", "aff_unique_abbr": "Xidian;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "AD-PT: Autonomous Driving Pre-Training with Large-scale Point Cloud Dataset", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70971", "id": "eIFZtkshgH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/95ab5c3e26fd82c7de3230bbad087d2d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eIFZtkshgH", "openreview": "https://openreview.net/forum?id=eIFZtkshgH", "poster": "/media/PosterPDFs/NeurIPS%202023/70971.png?t=1697272993.434982", "slides": "https://nips.cc/virtual/2023/poster/70971", "video": "https://nips.cc/virtual/2023/poster/70971", "author_site": "Jiakang Yuan, Bo Zhang, Xiangchao Yan, Botian Shi, Tao Chen, Yikang LI, Yu Qiao", "tldr": "", "abstract": "It is a long-term vision for Autonomous Driving (AD) community that the perception models can learn from a large-scale point cloud dataset, to obtain unified representations that can achieve promising results on different tasks or benchmarks. Previous works mainly focus on the self-supervised pre-training pipeline, meaning that they perform the pre-training and fine-tuning on the same benchmark, which is difficult to attain the performance scalability and cross-dataset application for the pre-training checkpoint. In this paper, for the first time, we are committed to building a large-scale pre-training point-cloud dataset with diverse data distribution, and meanwhile learning generalizable representations from such a diverse pre-training dataset. We formulate the point-cloud pre-training task as a semi-supervised problem, which leverages the few-shot labeled and massive unlabeled point-cloud data to generate the unified backbone representations that can be directly applied to many baseline models and benchmarks, decoupling the AD-related pre-training process and downstream fine-tuning task. During the period of backbone pre-training, by enhancing the scene- and instance-level distribution diversity and exploiting the backbone's ability to learn from unknown instances, we achieve significant performance gains on a series of downstream perception benchmarks including Waymo, nuScenes, and KITTI, under different baseline models like PV-RCNN++, SECOND, CenterPoint.", "keywords": "3D Object Detection;3D Pre-training;Autonomous Driving", "primary_area": "", "supplementary_material": "/attachment/7057ad2e7ab26346cf87b480255b623a9c3c9c84.zip", "author": "Jiakang Yuan;Bo Zhang;Xiangchao Yan;Botian Shi;Tao Chen;Yikang LI;Yu Qiao", "authorids": "~Jiakang_Yuan1;~Bo_Zhang17;~Xiangchao_Yan1;~Botian_Shi1;~Tao_Chen6;~Yikang_LI2;~Yu_Qiao1", "gender": "M;M;;M;M;M;", "homepage": "https://jiakangyuan.github.io/;https://bobrown.github.io/boZhang.github.io/;https://github.com/sky-fly97;;https://eetchen.github.io/;https://www.liyikang.top;", "dblp": "323/7363;36/2259-69;314/2496.html;245/8742;69/510-3;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;0mMk6PMAAAAJ;K0PpvLkAAAAJ;https://scholar.google.com.sg/citations?user=w3OoFL0AAAAJ;G9b6hpYAAAAJ;", "orcid": ";0000-0001-8052-782X;;0000-0003-3677-7252;;;", "linkedin": ";;;friskit/;;;", "or_profile": "~Jiakang_Yuan1;~Bo_Zhang17;~Xiangchao_Yan1;~Botian_Shi1;~Tao_Chen6;~Yikang_LI2;~Yu_Qiao1", "aff": "Shanghai AI Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;Shanghai AI Lab;Fudan University;Shanghai AI Lab;", "aff_domain": "pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;fudan.edu.cn;pjlab.org.cn;", "position": "Intern;Researcher;Researcher;Researcher;Full Professor;Researcher;", "bibtex": "@inproceedings{\nyuan2023adpt,\ntitle={{AD}-{PT}: Autonomous Driving Pre-Training with Large-scale Point Cloud Dataset},\nauthor={Jiakang Yuan and Bo Zhang and Xiangchao Yan and Botian Shi and Tao Chen and Yikang LI and Yu Qiao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eIFZtkshgH}\n}", "github": "", "project": "", "reviewers": "nLKe;hHpC;8so5;5kGB", "pdf_size": 4895135, "rating": "5;5;5;6", "confidence": "3;4;4;3", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "75;63;57;59", "wc_strengths": "69;16;101;71", "wc_weaknesses": "275;185;122;41", "wc_questions": "2;1;6;1", "wc_limitations": "2;26;7;1", "wc_review": "423;291;293;173", "wc_reply_reviewers": "48;63;23;0", "wc_reply_authors": "631;271;0;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 63.5, 6.98212002188447 ], "wc_strengths_avg": [ 64.25, 30.605350839354873 ], "wc_weaknesses_avg": [ 155.75, 85.7069862963341 ], "wc_questions_avg": [ 2.5, 2.0615528128088303 ], "wc_limitations_avg": [ 9.0, 10.074720839804943 ], "wc_review_avg": [ 295.0, 88.4420714366189 ], "wc_reply_reviewers_avg": [ 33.5, 24.046829312822098 ], "wc_reply_authors_avg": [ 225.5, 258.9406302610697 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3147787612974253679&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;fudan.edu.cn;pjlab.org.cn;", "author_num": 7, "aff_unique_index": "0;1;0;2;3;2", "aff_unique_norm": "Shanghai AI Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai AI Lab;Fudan University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.shanghai-ai-lab.com;http://www.shailab.org/;https://www.shanghaiailab.com;https://www.fudan.edu.cn", "aff_unique_abbr": "SAIL;Shanghai AI Lab;SAIL;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "M$^2$Hub: Unlocking the Potential of Machine Learning for Materials Discovery", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73520", "id": "eJ5nu9qvWz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f43380ca3f86cd989f3269583c3c8b55-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=eJ5nu9qvWz", "openreview": "https://openreview.net/forum?id=eJ5nu9qvWz", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73520", "video": "https://nips.cc/virtual/2023/poster/73520", "author_site": "Yuanqi Du, Yingheng Wang, Yining Huang, Jianan Canal Li, Yanqiao Zhu, Tian Xie, Chenru Duan, John Gregoire, Carla Gomes", "tldr": "", "abstract": "We introduce M$^2$Hub, a toolkit for advancing machine learning in materials discovery. Machine learning has achieved remarkable progress in modeling molecular structures, especially biomolecules for drug discovery. However, the development of machine learning approaches for modeling materials structures lag behind, which is partly due to the lack of an integrated platform that enables access to diverse tasks for materials discovery. To bridge this gap, M$^2$Hub will enable easy access to materials discovery tasks, datasets, machine learning methods, evaluations, and benchmark results that cover the entire workflow. Specifically, the first release of M$^2$Hub focuses on three key stages in materials discovery: virtual screening, inverse design, and molecular simulation, including 9 datasets that covers 6 types of materials with 56 tasks across 8 types of material properties. We further provide 2 synthetic datasets for the purpose of generative tasks on materials. In addition to random data splits, we also provide 3 additional data partitions to reflect the real-world materials discovery scenarios. State-of-the-art machine learning methods (including those are suitable for materials structures but never compared in the literature) are benchmarked on representative tasks. Our codes and library are publicly available at \\url{https://github.com/yuanqidu/M2Hub}.", "keywords": "Materials Discovery", "primary_area": "", "supplementary_material": "", "author": "Yuanqi Du;Yingheng Wang;Yining Huang;Jianan Canal Li;Yanqiao Zhu;Tian Xie;Chenru Duan;John Gregoire;Carla P Gomes", "authorids": "~Yuanqi_Du1;~Yingheng_Wang1;~Yining_Huang1;~Jianan_Canal_Li1;~Yanqiao_Zhu1;~Tian_Xie2;~Chenru_Duan1;~John_Gregoire1;~Carla_P_Gomes1", "gender": "M;M;M;M;M;M;M;;", "homepage": "https://yuanqidu.github.io/;https://isjakewong.github.io/publications/;https://yiningsamhuang.com/;https://canallee.github.io/;https://sxkdz.github.io;http://www.txie.me;https://www.deepprinciple.com;https://gregoire.people.caltech.edu/;", "dblp": "266/2837;265/6357;;;67/8383-1;;;;", "google_scholar": "fAc_zZMAAAAJ;4WEa7tMAAAAJ;YBBQLMcAAAAJ;;NBbJT3AAAAAJ;xFbOAf8AAAAJ;canPgVoAAAAJ;DY7mqg4AAAAJ;", "orcid": ";;;;0000-0003-2205-5304;;0000-0003-2592-4237;0000-0002-2863-5265;", "linkedin": ";;yining-huang-83b45a169;jianan-li-66b570169/;;txie-93/;chenru-duan-8882a010b/;;", "or_profile": "~Yuanqi_Du1;~Yingheng_Wang1;~Yining_Huang1;~Jianan_Canal_Li1;~Yanqiao_Zhu1;~Tian_Xie2;~Chenru_Duan1;~John_Gregoire1;~Carla_P_Gomes1", "aff": "Cornell University;Cornell University;Northwestern University;University of California, Berkeley;University of California, Los Angeles;Microsoft Research AI for Science;Microsoft;California Institute of Technology;", "aff_domain": "cornell.edu;cornell.edu;northwestern.edu;berkeley.edu;ucla.edu;microsoft.com;microsoft.com;caltech.edu;", "position": "PhD student;PhD student;Undergrad student;PhD student;PhD student;Senior Researcher;Researcher;Researcher;", "bibtex": "@inproceedings{\ndu2023mhub,\ntitle={M\\${\\textasciicircum}2\\$Hub: Unlocking the Potential of Machine Learning for Materials Discovery},\nauthor={Yuanqi Du and Yingheng Wang and Yining Huang and Jianan Canal Li and Yanqiao Zhu and Tian Xie and Chenru Duan and John Gregoire and Carla P Gomes},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=eJ5nu9qvWz}\n}", "github": "", "project": "", "reviewers": "1xJA;PX85;ouYe;GSxn", "pdf_size": 979541, "rating": "7;7;7;7", "confidence": "3;5;4;4", "wc_summary_and_contributions": "48;44;48;70", "wc_strengths": "46;162;78;37", "wc_improvement": "195;108;94;218", "wc_limitations": "32;18;1;41", "wc_correctness": "48;28;8;32", "wc_clarity": "17;10;4;1", "wc_relation_to_prior_work": "32;42;1;16", "wc_documentation": "8;16;16;39", "wc_additional_feedback": "1;1;1;1", "wc_review": "427;429;251;455", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "844;165;352;938", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;3", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 52.5, 10.23474474522936 ], "wc_strengths_avg": [ 80.75, 49.32228198289288 ], "wc_improvement_avg": [ 153.75, 53.60212215948171 ], "wc_limitations_avg": [ 23.0, 15.116216457830975 ], "wc_correctness_avg": [ 29.0, 14.247806848775006 ], "wc_clarity_avg": [ 8.0, 6.123724356957945 ], "wc_relation_to_prior_work_avg": [ 22.75, 15.610493265749165 ], "wc_documentation_avg": [ 19.75, 11.583932838203095 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 390.5, 81.29421873663588 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 574.75, 324.7917602095226 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17235334253029147023&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "cornell.edu;cornell.edu;northwestern.edu;berkeley.edu;ucla.edu;microsoft.com;microsoft.com;caltech.edu;", "author_num": 9, "aff_unique_index": "0;0;1;2;3;4;4;5", "aff_unique_norm": "Cornell University;Northwestern University;University of California, Berkeley;University of California, Los Angeles;Microsoft;California Institute of Technology", "aff_unique_dep": ";;;;AI for Science;", "aff_unique_url": "https://www.cornell.edu;https://www.northwestern.edu;https://www.berkeley.edu;https://www.ucla.edu;https://www.microsoft.com/en-us/research/group/ai-for-science;https://www.caltech.edu", "aff_unique_abbr": "Cornell;NU;UC Berkeley;UCLA;Microsoft Research;Caltech", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Berkeley;Los Angeles;Pasadena", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "What Planning Problems Can A Relational Neural Network Solve?", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70970", "id": "eJZ5vJEaaa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ba90e56a74fd77d0ddec033dc199f0fa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eJZ5vJEaaa", "openreview": "https://openreview.net/forum?id=eJZ5vJEaaa", "poster": "/media/PosterPDFs/NeurIPS%202023/70970.png?t=1701407535.5223064", "slides": "https://nips.cc/virtual/2023/poster/70970", "video": "https://nips.cc/virtual/2023/poster/70970", "author_site": "Jiayuan Mao, Tom\u00e1s Lozano-P\u00e9rez, Josh Tenenbaum, Leslie Kaelbling", "tldr": "", "abstract": "Goal-conditioned policies are generally understood to be \"feed-forward\" circuits, in the form of neural networks that map from the current state and the goal specification to the next action to take. However, under what circumstances such a policy can be learned and how efficient the policy will be are not well understood. In this paper, we present a circuit complexity analysis for relational neural networks (such as graph neural networks and transformers) representing policies for planning problems, by drawing connections with serialized goal regression search (S-GRS). We show that there are three general classes of planning problems, in terms of the growth of circuit width and depth as a function of the number of objects and planning horizon, providing constructive proofs. We also illustrate the utility of this analysis for designing neural networks for policy learning.", "keywords": "Planning;Relational Neural Network;Circuit Complexity", "primary_area": "", "supplementary_material": "", "author": "Jiayuan Mao;Tom\u00e1s Lozano-P\u00e9rez;Joshua B. Tenenbaum;Leslie Pack Kaelbling", "authorids": "~Jiayuan_Mao1;~Tom\u00e1s_Lozano-P\u00e9rez1;~Joshua_B._Tenenbaum1;~Leslie_Pack_Kaelbling1", "gender": "F;M;;F", "homepage": "http://jiayuanm.com;http://people.csail.mit.edu/tlp/;;http://people.csail.mit.edu/lpk/", "dblp": "200/8283;90/752;t/JoshuaBTenenbaum;k/LesliePackKaelbling", "google_scholar": "-xaOIZIAAAAJ;gQOKAggAAAAJ;;IcasIiwAAAAJ", "orcid": "0000-0003-4798-3748;;;0000-0001-6054-7145", "linkedin": ";;;", "or_profile": "~Jiayuan_Mao1;~Tom\u00e1s_Lozano-P\u00e9rez1;~Joshua_B._Tenenbaum1;~Leslie_Pack_Kaelbling1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;Full Professor;Professor;Full Professor", "bibtex": "@inproceedings{\nmao2023what,\ntitle={What Planning Problems Can A Relational Neural Network Solve?},\nauthor={Jiayuan Mao and Tom{\\'a}s Lozano-P{\\'e}rez and Joshua B. Tenenbaum and Leslie Pack Kaelbling},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eJZ5vJEaaa}\n}", "github": "", "project": "", "reviewers": "7eTK;aaQH;bz43;TK6j", "pdf_size": 543197, "rating": "6;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;4;3", "novelty": "2;2;2;3", "presentation": "3;3;2;2", "wc_summary": "169;64;197;88", "wc_strengths": "120;61;48;56", "wc_weaknesses": "112;82;141;12", "wc_questions": "90;368;186;448", "wc_limitations": "53;18;276;0", "wc_review": "544;593;848;604", "wc_reply_reviewers": "33;28;77;21", "wc_reply_authors": "0;0;45;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 129.5, 55.065869647178005 ], "wc_strengths_avg": [ 71.25, 28.525208149985513 ], "wc_weaknesses_avg": [ 86.75, 47.934199690826176 ], "wc_questions_avg": [ 273.0, 142.04576727238302 ], "wc_limitations_avg": [ 86.75, 110.91297264071503 ], "wc_review_avg": [ 647.25, 118.08339214301053 ], "wc_reply_reviewers_avg": [ 39.75, 21.924586655168667 ], "wc_reply_authors_avg": [ 11.25, 19.48557158514987 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12828074499706111587&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Frequency-Enhanced Data Augmentation for Vision-and-Language Navigation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70969", "id": "eKFrXWb0sT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0d9e08f247ca7fbbfd5e50b7ff9cf357-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eKFrXWb0sT", "openreview": "https://openreview.net/forum?id=eKFrXWb0sT", "poster": "/media/PosterPDFs/NeurIPS%202023/70969.png?t=1702089032.181773", "slides": "https://nips.cc/virtual/2023/poster/70969", "video": "https://nips.cc/virtual/2023/poster/70969", "author_site": "Keji He, Chenyang Si, Zhihe Lu, Yan Huang, Liang Wang, Xinchao Wang", "tldr": "", "abstract": "Vision-and-Language Navigation (VLN) is a challenging task that requires an agent to navigate through complex environments based on natural language instructions. In contrast to conventional approaches, which primarily focus on the spatial domain exploration, we propose a paradigm shift toward the Fourier domain. This alternative perspective aims to enhance visual-textual matching, ultimately improving the agent's ability to understand and execute navigation tasks based on the given instructions. In this study, we first explore the significance of high-frequency information in VLN and provide evidence that it is instrumental in bolstering visual-textual matching processes. Building upon this insight, we further propose a sophisticated and versatile Frequency-enhanced Data Augmentation (FDA) technique to improve the VLN model's capability of capturing critical high-frequency information. Specifically, this approach requires the agent to navigate in environments where only a subset of high-frequency visual information corresponds with the provided textual instructions, ultimately fostering the agent's ability to selectively discern and capture pertinent high-frequency features according to the given instructions. Promising results on R2R, RxR, CVDN and REVERIE demonstrate that our FDA can be readily integrated with existing VLN approaches, improving performance without adding extra parameters, and keeping models simple and efficient. The code is available at https://github.com/hekj/FDA.", "keywords": "Vision-and-Language Navigation; High-Frequency; Data Augmentation", "primary_area": "", "supplementary_material": "/attachment/ac490e44feb7d1ee8df4e690acdb75413fdae9ca.pdf", "author": "Keji He;Chenyang Si;Zhihe Lu;Yan Huang;Liang Wang;Xinchao Wang", "authorids": "~Keji_He1;~Chenyang_Si2;~Zhihe_Lu1;~Yan_Huang2;~Liang_Wang3;~Xinchao_Wang1", "gender": "M;M;M;M;M;M", "homepage": "http://chenyangsi.top/;https://zhihelu.github.io/;https://yanrockhuang.github.io/;;;https://sites.google.com/site/sitexinchaowang/", "dblp": "220/3068;195/9141.html;75/6434-8;56/4499-1;319/4518;", "google_scholar": "XdahAuoAAAAJ;X4LKIhgAAAAJ;6nUJrQ0AAAAJ;;RHPI-NQAAAAJ;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": ";0000-0002-6917-8654;0000-0002-8239-7229;;0000-0001-5136-8444;", "linkedin": ";;;;;", "or_profile": "~Chenyang_Si2;~Zhihe_Lu1;~Yan_Huang2;~Liang_Wang3;~He_Keji2;~Xinchao_WANG3", "aff": "Sea AI Lab;National University of Singapore;Institute of Automation, Chinese Academy of Sciences;Institute of Automation\uff0c CAS\uff0cChina;Institute of Automation, Chinese Academy of Sciences;National University of Singapore", "aff_domain": "sea.com;nus.edu;ia.ac.cn;ia.ac.cn;ia.ac.cn;nus.edu", "position": "Researcher;Postdoc;Associate Professor;Full Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhe2023frequencyenhanced,\ntitle={Frequency-Enhanced Data Augmentation for Vision-and-Language Navigation},\nauthor={Keji He and Chenyang Si and Zhihe Lu and Yan Huang and Liang Wang and Xinchao Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eKFrXWb0sT}\n}", "github": "", "project": "", "reviewers": "bEiK;LWMZ;r2mS;gjxf;wftp", "pdf_size": 2565421, "rating": "6;6;6;6;6", "confidence": "4;5;5;5;4", "soundness": "3;4;3;2;3", "novelty": "3;2;3;3;3", "presentation": "3;4;2;4;4", "wc_summary": "53;116;67;50;160", "wc_strengths": "32;81;54;62;71", "wc_weaknesses": "202;372;172;126;201", "wc_questions": "361;40;2;3;165", "wc_limitations": "89;7;1;1;1", "wc_review": "737;616;296;242;598", "wc_reply_reviewers": "44;23;29;0;0", "wc_reply_authors": "35;0;0;0;0", "reply_reviewers": "1;1;1;0;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 89.2, 42.59295716430123 ], "wc_strengths_avg": [ 60.0, 16.649324310613927 ], "wc_weaknesses_avg": [ 214.6, 83.40647456882469 ], "wc_questions_avg": [ 114.2, 137.07720452358225 ], "wc_limitations_avg": [ 19.8, 34.67794688270919 ], "wc_review_avg": [ 497.8, 193.58966914585088 ], "wc_reply_reviewers_avg": [ 19.2, 17.10438540258024 ], "wc_reply_authors_avg": [ 7.0, 14.0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7469876583149838277&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "sea.com;nus.edu;ia.ac.cn;ia.ac.cn;ia.ac.cn;nus.edu", "author_num": 6, "aff_unique_index": "0;1;2;2;2;1", "aff_unique_norm": "Sea AI Lab;National University of Singapore;Chinese Academy of Sciences", "aff_unique_dep": ";;Institute of Automation", "aff_unique_url": ";https://www.nus.edu.sg;http://www.ia.cas.cn", "aff_unique_abbr": ";NUS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;2;2;2;1", "aff_country_unique": ";Singapore;China" }, { "title": "Equivariant flow matching", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70968", "id": "eLH2NFOO1B", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bc827452450356f9f558f4e4568d553b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eLH2NFOO1B", "openreview": "https://openreview.net/forum?id=eLH2NFOO1B", "poster": "/media/PosterPDFs/NeurIPS%202023/70968.png?t=1699996269.7226737", "slides": "https://nips.cc/virtual/2023/poster/70968", "video": "https://nips.cc/virtual/2023/poster/70968", "author_site": "Leon Klein, Andreas Kr\u00e4mer, Frank Noe", "tldr": "", "abstract": "Normalizing flows are a class of deep generative models that are especially interesting for modeling probability distributions in physics, where the exact likelihood of flows allows reweighting to known target energy functions and computing unbiased observables. For instance, Boltzmann generators tackle the long-standing sampling problem in statistical physics by training flows to produce equilibrium samples of many-body systems such as small molecules and proteins. To build effective models for such systems, it is crucial to incorporate the symmetries of the target energy into the model, which can be achieved by equivariant continuous normalizing flows (CNFs). However, CNFs can be computationally expensive to train and generate samples from, which has hampered their scalability and practical application.\nIn this paper, we introduce equivariant flow matching, a new training objective for equivariant CNFs that is based on the recently proposed optimal transport flow matching. Equivariant flow matching exploits the physical symmetries of the target energy for efficient, simulation-free training of equivariant CNFs.\nWe demonstrate the effectiveness of flow matching on rotation and permutation invariant many-particle systems and a small molecule, alanine dipeptide, where for the first time we obtain a Boltzmann generator with significant sampling efficiency without relying on tailored internal coordinate featurization. Our results show that the equivariant flow matching objective yields flows with shorter integration paths, improved sampling efficiency, and higher scalability compared to existing methods.", "keywords": "Normalizing Flows;Flow Matching;Equivariance;Boltzmann Generators;Molecular Dynamics;Optimal Transport", "primary_area": "", "supplementary_material": "/attachment/ece13bfa5f860d6b4029fb36b67fa73cfe5c5cf5.zip", "author": "Leon Klein;Andreas Kr\u00e4mer;Frank Noe", "authorids": "~Leon_Klein1;~Andreas_Kr\u00e4mer1;~Frank_Noe1", "gender": ";M;M", "homepage": ";;", "dblp": "249/9262;;", "google_scholar": "P1vYX2AAAAAJ;QU6MEUsAAAAJ;QGiLc_cAAAAJ", "orcid": "0000-0003-1095-1902;0000-0002-7699-3083;", "linkedin": ";;", "or_profile": "~Leon_Klein1;~Andreas_Kr\u00e4mer1;~Frank_Noe1", "aff": "Microsoft;Freie Universit\u00e4t Berlin;Freie Universit\u00e4t Berlin", "aff_domain": "microsoft.com;fu-berlin.de;fu-berlin.de", "position": "Researcher;Postdoc;Professor", "bibtex": "@inproceedings{\nklein2023equivariant,\ntitle={Equivariant flow matching},\nauthor={Leon Klein and Andreas Kr{\\\"a}mer and Frank Noe},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eLH2NFOO1B}\n}", "github": "", "project": "", "reviewers": "coc5;nzPr;nDvW;W92U", "pdf_size": 4157618, "rating": "3;6;6;7", "confidence": "5;4;4;4", "soundness": "2;4;3;3", "novelty": "2;3;3;2", "presentation": "2;3;3;3", "wc_summary": "146;308;71;98", "wc_strengths": "273;83;50;19", "wc_weaknesses": "382;346;122;65", "wc_questions": "141;55;112;705", "wc_limitations": "1;4;50;54", "wc_review": "943;796;405;941", "wc_reply_reviewers": "589;54;358;203", "wc_reply_authors": "1688;132;740;552", "reply_reviewers": "2;1;2;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 155.75, 91.91402232521433 ], "wc_strengths_avg": [ 106.25, 98.89735840759347 ], "wc_weaknesses_avg": [ 228.75, 137.33421824148562 ], "wc_questions_avg": [ 253.25, 262.6465067348127 ], "wc_limitations_avg": [ 27.25, 24.81305100143874 ], "wc_review_avg": [ 771.25, 219.69567018946915 ], "wc_reply_reviewers_avg": [ 301.0, 197.99368676803815 ], "wc_reply_authors_avg": [ 778.0, 569.6349708365876 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9622504486493763, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6151804404766495464&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "microsoft.com;fu-berlin.de;fu-berlin.de", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Microsoft;Freie Universit\u00e4t Berlin", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.fu-berlin.de", "aff_unique_abbr": "Microsoft;FU Berlin", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Germany" }, { "title": "LAMM: Language-Assisted Multi-Modal Instruction-Tuning Dataset, Framework, and Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73519", "id": "eM6WLko4Dv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/548a41b9cac6f50dccf7e63e9e1b1b9b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=eM6WLko4Dv", "openreview": "https://openreview.net/forum?id=eM6WLko4Dv", "poster": "/media/PosterPDFs/NeurIPS%202023/73519.png?t=1701886211.997343", "slides": "https://nips.cc/virtual/2023/poster/73519", "video": "https://nips.cc/virtual/2023/poster/73519", "author_site": "Zhenfei Yin, Jiong Wang, Jianjian Cao, Zhelun Shi, Dingning Liu, Mukai Li, Xiaoshui Huang, Zhiyong Wang, Lu Sheng, LEI BAI, Jing Shao, Wanli Ouyang", "tldr": "", "abstract": "Large language models have emerged as a promising approach towards achieving general-purpose AI agents. The thriving open-source LLM community has greatly accelerated the development of agents that support human-machine dialogue interaction through natural language processing. However, human interaction with the world extends beyond only text as a modality, and other modalities such as vision are also crucial. Recent works on multi-modal large language models, such as GPT-4V and Bard, have demonstrated their effectiveness in handling visual modalities. However, the transparency of these works is limited and insufficient to support academic research. To the best of our knowledge, we present one of the very first open-source endeavors in the field, LAMM, encompassing a Language-Assisted Multi-Modal instruction tuning dataset, framework, and benchmark. Our aim is to establish LAMM as a growing ecosystem for training and evaluating MLLMs, with a specific focus on facilitating AI agents capable of bridging the gap between ideas and execution, thereby enabling seamless human-AI interaction. Our main contribution is three-fold: 1) We present a comprehensive dataset and benchmark, which cover a wide range of vision tasks for 2D and 3D vision. Extensive experiments validate the effectiveness of our dataset and benchmark. 2) We outline the detailed methodology of constructing multi-modal instruction tuning datasets and benchmarks for MLLMs, enabling rapid scaling and extension of MLLM research to diverse domains, tasks, and modalities. 3) We provide a primary but potential MLLM training framework optimized for modality extension. We also provide baseline models, comprehensive experimental observations, and analysis to accelerate future research. Our baseline model is trained within 24 A100 GPU hours, framework supports training with V100 and RTX3090 is available thanks to the open-source society. Codes and data are now available at https://openlamm.github.io.", "keywords": "Multi-modality;Large Language Model", "primary_area": "", "supplementary_material": "/attachment/6a6ddd2906fa0902724dd17cef18cf3f02ec8ff7.zip", "author": "Zhenfei Yin;Jiong WANG;Jianjian Cao;Zhelun Shi;Dingning Liu;Mukai Li;Xiaoshui Huang;Zhiyong Wang;Lu Sheng;LEI BAI;Jing Shao;Wanli Ouyang", "authorids": "~Zhenfei_Yin2;~Jiong_WANG2;~Jianjian_Cao1;~Zhelun_Shi1;~Dingning_Liu4;~Mukai_Li2;~Xiaoshui_Huang1;~Zhiyong_Wang1;~Lu_Sheng1;~LEI_BAI1;~Jing_Shao3;~Wanli_Ouyang1", "gender": "M;M;M;M;M;;Not Specified;M;M;M;F;", "homepage": "https://yinzhenfei.github.io/;https://wangjiongw.github.io;;https://github.com/Coach257;https://kns.cnki.net/kcms2/author/detail?v=3uoqIhG8C45UgIk_lOaz14XyqsX58x7M0WXtgcAez4UGRerJDCMVB19_2vmVIoiOmRpSmGxuOzT3rAn8jqEtaGKLY-8Ou370UN3Z1Bw5YL9O5zP0gfzFSLVtJxToOC4s&uniplatform=NZKPT;;https://xiaoshuihuang.github.io/;https://www.sydney.edu.au/engineering/about/our-people/academic-staff/zhiyong-wang.html;https://lucassheng.github.io/;http://leibai.site/;https://amandajshao.github.io/;", "dblp": "271/0669;;261/3612;349/3643;;;167/9599;62/234-1;132/1772;119/1223-1;;", "google_scholar": "https://scholar.google.com.hk/citations?user=ngPR1dIAAAAJ;0dbY4wUAAAAJ;S25Lr3oAAAAJ;EDLcoVkAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.ca/citations?user=rp7mYNsAAAAJ;https://scholar.google.com.au/citations?user=Sqou_P0AAAAJ;https://scholar.google.com.hk/citations?user=_8lB7xcAAAAJ;https://scholar.google.com.au/citations?user=sakOO04AAAAJ;VU5ObUwAAAAJ;", "orcid": "0000-0002-8666-1103;0000-0003-3676-2544;0000-0002-1473-3956;0009-0005-4079-7426;;;;0000-0002-8043-0312;0000-0002-8525-9163;0000-0003-3378-7201;;", "linkedin": "zhenfei-yin/;;;;;;;;;lei-bai-641370153/;;", "or_profile": "~Zhenfei_Yin2;~Jiong_WANG2;~Jianjian_Cao1;~Zhelun_Shi1;~Dingning_Liu4;~Mukai_Li2;~Xiaoshui_Huang1;~Zhiyong_Wang1;~Lu_Sheng1;~LEI_BAI1;~Jing_Shao3;~Wanli_Ouyang1", "aff": "Shanghai AI Laboratory;The Chinese University of Hong Kong, Shenzhen;Fudan University;Beihang University;Shanghai AI Laboratory;;Shanghai AI Laboratory;;Beihang University;Shanghai AI Laboratory;SenseTime Group Limited;", "aff_domain": "pjlab.org.cn;cuhk.edu.cn;fudan.edu.cn;buaa.edu.cn;org.cn;;pjlab.org.cn;;buaa.edu.cn;pjlab.org.cn;sensetime.com;", "position": "Intern;MS student;PhD student;MS student;Intern;;Research Fellow;;Associate Professor;Researcher;Researcher;", "bibtex": "@inproceedings{\nyin2023lamm,\ntitle={{LAMM}: Language-Assisted Multi-Modal Instruction-Tuning Dataset, Framework, and Benchmark},\nauthor={Zhenfei Yin and Jiong WANG and Jianjian Cao and Zhelun Shi and Dingning Liu and Mukai Li and Xiaoshui Huang and Zhiyong Wang and Lu Sheng and LEI BAI and Jing Shao and Wanli Ouyang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=eM6WLko4Dv}\n}", "github": "", "project": "", "reviewers": "8BVz;FCRL;XzFC;SwmA;Byte", "pdf_size": 14775773, "rating": "4;5;5;7;8", "confidence": "2;4;3;3;3", "wc_summary_and_contributions": "58;50;185;153;44", "wc_strengths": "82;67;99;155;35", "wc_improvement": "107;132;4;134;22", "wc_limitations": "58;18;403;4;9", "wc_correctness": "6;1;26;5;9", "wc_clarity": "21;1;14;14;1", "wc_relation_to_prior_work": "8;1;13;9;15", "wc_documentation": "16;46;26;19;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "357;317;771;494;137", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "464;845;763;584;386", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;2;2;1;1", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 98.0, 59.01525226583379 ], "wc_strengths_avg": [ 87.6, 39.737136283330734 ], "wc_improvement_avg": [ 79.8, 55.65752419933894 ], "wc_limitations_avg": [ 98.4, 153.48042220426683 ], "wc_correctness_avg": [ 9.4, 8.685620300243388 ], "wc_clarity_avg": [ 10.2, 7.934733769950949 ], "wc_relation_to_prior_work_avg": [ 9.2, 4.833218389437829 ], "wc_documentation_avg": [ 21.6, 14.677874505527019 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 415.2, 211.295432984246 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 608.4, 173.66012783595434 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.21516574145596756, "gs_citation": 176, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3715545124153756792&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "pjlab.org.cn;cuhk.edu.cn;fudan.edu.cn;buaa.edu.cn;org.cn;;pjlab.org.cn;;buaa.edu.cn;pjlab.org.cn;sensetime.com;", "author_num": 12, "aff_unique_index": "0;1;2;3;0;0;3;0;4", "aff_unique_norm": "Shanghai AI Laboratory;Chinese University of Hong Kong;Fudan University;Beihang University;SenseTime Group Limited", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.shanghai-ai-lab.com;https://www.cuhk.edu.cn;https://www.fudan.edu.cn;http://www.buaa.edu.cn/;https://www.sensetime.com", "aff_unique_abbr": "SAIL;CUHK;Fudan;BUAA;SenseTime", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Diversify \\& Conquer: Outcome-directed Curriculum RL via Out-of-Distribution Disagreement", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70967", "id": "eMR57voMz1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a815fe7cad6af20a6c118f2072a881d2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eMR57voMz1", "openreview": "https://openreview.net/forum?id=eMR57voMz1", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70967", "video": "https://nips.cc/virtual/2023/poster/70967", "author_site": "Daesol Cho, Seungjae Lee, H. Jin Kim", "tldr": "", "abstract": "Reinforcement learning (RL) often faces the challenges of uninformed search problems where the agent should explore without access to the domain knowledge such as characteristics of the environment or external rewards. To tackle these challenges, this work proposes a new approach for curriculum RL called $\\textbf{D}$iversify for $\\textbf{D}$isagreement \\& $\\textbf{C}$onquer ($\\textbf{D2C}$). Unlike previous curriculum learning methods, D2C requires only a few examples of desired outcomes and works in any environment, regardless of its geometry or the distribution of the desired outcome examples. The proposed method performs diversification of the goal-conditional classifiers to identify similarities between visited and desired outcome states and ensures that the classifiers disagree on states from out-of-distribution, which enables quantifying the unexplored region and designing an arbitrary goal-conditioned intrinsic reward signal in a simple and intuitive way. The proposed method then employs bipartite matching to define a curriculum learning objective that produces a sequence of well-adjusted intermediate goals, which enable the agent to automatically explore and conquer the unexplored region. We present experimental results demonstrating that D2C outperforms prior curriculum RL methods in both quantitative and qualitative aspects, even with the arbitrarily distributed desired outcome examples.", "keywords": "Curriculum learning;Out-of-distribution disagreement;Underspecification;Outcome-directed RL", "primary_area": "", "supplementary_material": "/attachment/589c11b0b86c05022144d9636bfbb8ca523a5e91.zip", "author": "Daesol Cho;Seungjae Lee;H. Jin Kim", "authorids": "~Daesol_Cho1;~Seungjae_Lee2;~H._Jin_Kim1", "gender": ";;F", "homepage": "https://dscho1234.github.io;https://sjlee.cc;http://larr.snu.ac.kr", "dblp": "317/6937;;91/5753", "google_scholar": "3ZRfI74AAAAJ;hpR9h74AAAAJ;TLQUwIMAAAAJ", "orcid": "0000-0002-4105-4422;;", "linkedin": ";;", "or_profile": "~Daesol_Cho1;~Seungjae_Lee2;~H._Jin_Kim1", "aff": "Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\ncho2023diversify,\ntitle={Diversify {\\textbackslash}\\& Conquer: Outcome-directed Curriculum {RL} via Out-of-Distribution Disagreement},\nauthor={Daesol Cho and Seungjae Lee and H. Jin Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eMR57voMz1}\n}", "github": "", "project": "", "reviewers": "TCnF;LCMf;geTA;pHjM;WkJV", "pdf_size": 9542854, "rating": "4;4;6;6;7", "confidence": "3;4;3;3;1", "soundness": "2;2;2;3;3", "novelty": "2;2;2;3;2", "presentation": "2;2;2;3;2", "wc_summary": "100;93;34;82;67", "wc_strengths": "55;52;11;53;56", "wc_weaknesses": "208;63;72;51;40", "wc_questions": "130;49;13;35;2", "wc_limitations": "1;13;30;1;0", "wc_review": "494;270;160;222;165", "wc_reply_reviewers": "341;42;474;0;10", "wc_reply_authors": "400;71;93;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;1;1", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 75.2, 23.421357774475844 ], "wc_strengths_avg": [ 45.4, 17.2580416038437 ], "wc_weaknesses_avg": [ 86.8, 61.55777773766692 ], "wc_questions_avg": [ 45.8, 45.19026443826148 ], "wc_limitations_avg": [ 9.0, 11.541230437002806 ], "wc_review_avg": [ 262.2, 122.71984354618449 ], "wc_reply_reviewers_avg": [ 173.4, 196.20560644385267 ], "wc_reply_authors_avg": [ 112.8, 148.37169541391646 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7824758900557374, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6654765791033840700&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Contextual Gaussian Process Bandits with Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70966", "id": "eNhW9UnlGG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5526c73e3ff4f2a34009e13d15f52fcb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eNhW9UnlGG", "openreview": "https://openreview.net/forum?id=eNhW9UnlGG", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70966", "video": "https://nips.cc/virtual/2023/poster/70966", "author_site": "Haoting Zhang, Jinghai He, Rhonda Righter, Zuo-Jun Shen, Zeyu Zheng", "tldr": "", "abstract": "Contextual decision-making problems have witnessed extensive applications in various fields such as online content recommendation, personalized healthcare, and autonomous vehicles, where a core practical challenge is to select a suitable surrogate model for capturing unknown complicated reward functions. It is often the case that both high approximation accuracy and explicit uncertainty quantification are desired. In this work, we propose a neural network-accompanied Gaussian process (NN-AGP) model, which leverages neural networks to approximate the unknown and potentially complicated reward function regarding the contextual variable, and maintains a Gaussian process surrogate model with respect to the decision variable. Our model is shown to outperform existing approaches by offering better approximation accuracy thanks to the use of neural networks and possessing explicit uncertainty quantification from the Gaussian process. We also analyze the maximum information gain of the NN-AGP model and prove regret bounds for the corresponding algorithms. Moreover, we conduct experiments on both synthetic and practical problems, illustrating the effectiveness of our approach.", "keywords": "contextual bandit;Gaussian process;neural network", "primary_area": "", "supplementary_material": "/attachment/5ea492fe7ae23a9e1821948518e9a16073de2a1c.pdf", "author": "Haoting Zhang;Jinghai He;Rhonda Righter;Zuo-Jun Shen;Zeyu Zheng", "authorids": "~Haoting_Zhang2;~Jinghai_He1;rrighter@berkeley.edu;~Zuo-Jun_Shen1;~Zeyu_Zheng2", "gender": ";M;;M;M", "homepage": ";https://www.linkedin.com/in/jinghai-he-ocean;;http://shen.ieor.berkeley.edu;https://zheng.ieor.berkeley.edu/", "dblp": ";314/9142;;;48/7883.html/", "google_scholar": ";;;;", "orcid": ";0000-0002-5458-2048;;;0000-0001-5653-152X", "linkedin": ";;;;", "or_profile": "~Haoting_Zhang2;~Jinghai_He1;rrighter@berkeley.edu;~Zuo-Jun_Shen1;~Zeyu_Zheng2", "aff": ";University of California, Berkeley;;;University of California, Berkeley", "aff_domain": ";berkeley.edu;;;berkeley.edu", "position": ";PhD student;;;Associate Professor", "bibtex": "@inproceedings{\nzhang2023contextual,\ntitle={Contextual Gaussian Process Bandits with Neural Networks},\nauthor={Haoting Zhang and Jinghai He and Rhonda Righter and Zuo-Jun Shen and Zeyu Zheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eNhW9UnlGG}\n}", "github": "", "project": "", "reviewers": "ThqQ;ZHPP;jVXy;9ysH", "pdf_size": 4261296, "rating": "5;5;6;7", "confidence": "3;3;3;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;4;3", "wc_summary": "35;134;74;62", "wc_strengths": "39;102;55;75", "wc_weaknesses": "148;173;76;22", "wc_questions": "31;36;38;39", "wc_limitations": "5;1;2;11", "wc_review": "258;446;245;209", "wc_reply_reviewers": "43;9;40;25", "wc_reply_authors": "254;8;26;7", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 76.25, 36.21032311371993 ], "wc_strengths_avg": [ 67.75, 23.53056522908024 ], "wc_weaknesses_avg": [ 104.75, 59.58764553160328 ], "wc_questions_avg": [ 36.0, 3.082207001484488 ], "wc_limitations_avg": [ 4.75, 3.897114317029974 ], "wc_review_avg": [ 289.5, 92.12084454671484 ], "wc_reply_reviewers_avg": [ 29.25, 13.534677683639163 ], "wc_reply_authors_avg": [ 73.75, 104.3416863003469 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=451436418039640712&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";berkeley.edu;;;berkeley.edu", "author_num": 5, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "TRIAGE: Characterizing and auditing training data for improved regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70965", "id": "eP6cDDwBNC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ed687a5f52b651b19e7c18f702907b8b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eP6cDDwBNC", "openreview": "https://openreview.net/forum?id=eP6cDDwBNC", "poster": "/media/PosterPDFs/NeurIPS%202023/70965.png?t=1701427885.3088222", "slides": "https://nips.cc/virtual/2023/poster/70965", "video": "https://nips.cc/virtual/2023/poster/70965", "author_site": "Nabeel Seedat, Jonathan Crabb\u00e9, Zhaozhi Qian, Mihaela van der Schaar", "tldr": "", "abstract": "Data quality is crucial for robust machine learning algorithms, with the recent interest in data-centric AI emphasizing the importance of training data characterization. However, current data characterization methods are largely focused on classification settings, with regression settings largely understudied. To address this, we introduce TRIAGE, a novel data characterization framework tailored to regression tasks and compatible with a broad class of regressors. TRIAGE utilizes conformal predictive distributions to provide a model-agnostic scoring method, the TRIAGE score. We operationalize the score to analyze individual samples' training dynamics and characterize samples as under-, over-, or well-estimated by the model. We show that TRIAGE's characterization is consistent and highlight its utility to improve performance via data sculpting/filtering, in multiple regression settings. Additionally, beyond sample level, we show TRIAGE enables new approaches to dataset selection and feature acquisition. Overall, TRIAGE highlights the value unlocked by data characterization in real-world regression applications.", "keywords": "data-centric AI;data characterization;data quality", "primary_area": "", "supplementary_material": "/attachment/99c160b37caf55d9b1e299d14c115de8beb64bef.pdf", "author": "Nabeel Seedat;Jonathan Crabb\u00e9;Zhaozhi Qian;Mihaela van der Schaar", "authorids": "~Nabeel_Seedat1;~Jonathan_Crabb\u00e91;~Zhaozhi_Qian1;~Mihaela_van_der_Schaar2", "gender": ";M;;F", "homepage": ";https://jonathancrabbe.github.io/;;https://www.vanderschaar-lab.com", "dblp": "227/8368;278/8353.html;194/2443;", "google_scholar": "https://scholar.google.com/citations?hl=en;Y_Nmd2sAAAAJ;PuTDB5gAAAAJ;DZ3S--MAAAAJ", "orcid": ";0000-0002-0341-7712;0000-0002-4561-0342;", "linkedin": "nabeel-seedat/;jonathan-crabb%C3%A9-4ab5701a5/;;", "or_profile": "~Nabeel_Seedat1;~Jonathan_Crabb\u00e91;~Zhaozhi_Qian1;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;University of Cambridge;University of Cambridge;University of California, Los Angeles", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk;ucla.edu", "position": "PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nseedat2023triage,\ntitle={{TRIAGE}: Characterizing and auditing training data for improved regression},\nauthor={Nabeel Seedat and Jonathan Crabb{\\'e} and Zhaozhi Qian and Mihaela van der Schaar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eP6cDDwBNC}\n}", "github": "", "project": "", "reviewers": "N4d8;pEXt;oVcJ;pNo8;ckq8", "pdf_size": 1684342, "rating": "6;6;6;7;7", "confidence": "3;3;3;2;3", "soundness": "3;3;3;3;3", "novelty": "3;3;2;3;3", "presentation": "3;3;1;4;4", "wc_summary": "83;158;157;56;168", "wc_strengths": "77;75;58;25;122", "wc_weaknesses": "138;63;201;19;312", "wc_questions": "44;57;309;49;2", "wc_limitations": "28;1;111;3;9", "wc_review": "370;354;836;152;613", "wc_reply_reviewers": "14;67;191;13;66", "wc_reply_authors": "19;295;372;17;41", "reply_reviewers": "1;2;2;1;1", "reply_authors": "2;3;3;2;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 1.0954451150103321 ], "wc_summary_avg": [ 124.4, 45.79344931319326 ], "wc_strengths_avg": [ 71.4, 31.423558041698588 ], "wc_weaknesses_avg": [ 146.6, 103.59652503824634 ], "wc_questions_avg": [ 92.2, 110.06071051924025 ], "wc_limitations_avg": [ 30.4, 41.41304142416975 ], "wc_review_avg": [ 465.0, 236.160962057661 ], "wc_reply_reviewers_avg": [ 70.2, 64.88574573818197 ], "wc_reply_authors_avg": [ 148.8, 152.99202593599446 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6123724356957945, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4481858413944276913&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk;ucla.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;UCLA", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Cambridge;Los Angeles", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Combating Bilateral Edge Noise for Robust Link Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70964", "id": "ePkLqJh5kw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/435986a8cc3e0667648df5d1c2d55c83-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ePkLqJh5kw", "openreview": "https://openreview.net/forum?id=ePkLqJh5kw", "poster": "/media/PosterPDFs/NeurIPS%202023/70964.png?t=1701400480.6460998", "slides": "https://nips.cc/virtual/2023/poster/70964", "video": "https://nips.cc/virtual/2023/poster/70964", "author_site": "Zhanke Zhou, Jiangchao Yao, Jiaxu Liu, Xiawei Guo, Quanming Yao, LI He, Liang Wang, Bo Zheng, Bo Han", "tldr": "", "abstract": "Although link prediction on graphs has achieved great success with the development of graph neural networks (GNNs), the potential robustness under the edge noise is still less investigated. To close this gap, we first conduct an empirical study to disclose that the edge noise bilaterally perturbs both input topology and target label, yielding severe performance degradation and representation collapse. To address this dilemma, we propose an information-theory-guided principle, Robust Graph Information Bottleneck (RGIB), to extract reliable supervision signals and avoid representation collapse. Different from the basic information bottleneck, RGIB further decouples and balances the mutual dependence among graph topology, target labels, and representation, building new learning objectives for robust representation against the bilateral noise. Two instantiations, RGIB-SSL and RGIB-REP, are explored to leverage the merits of different methodologies, i.e., self-supervised learning and data reparameterization, for implicit and explicit data denoising, respectively. Extensive experiments on six datasets and three GNNs with diverse noisy scenarios verify the effectiveness of our RGIB instantiations. The code is publicly available at: https://github.com/tmlr-group/RGIB.", "keywords": "Robust link prediction;Edge noise", "primary_area": "", "supplementary_material": "", "author": "Zhanke Zhou;Jiangchao Yao;Jiaxu Liu;Xiawei Guo;quanming yao;LI He;Liang Wang;Bo Zheng;Bo Han", "authorids": "~Zhanke_Zhou1;~Jiangchao_Yao1;~Jiaxu_Liu1;~Xiawei_Guo2;~quanming_yao1;~LI_He2;~Liang_Wang15;~Bo_Zheng5;~Bo_Han1", "gender": "M;M;M;M;M;M;M;M;M", "homepage": "https://andrewzhou924.github.io/;https://sunarker.github.io/;;;https://lars-group.github.io/;;;;https://bhanml.github.io/", "dblp": "285/5311;166/5900;;185/1356.html;158/1014;;;33/1610-7;241/0472-3", "google_scholar": "GVXErr0AAAAJ;w8oDh9QAAAAJ;;;https://scholar.google.com/schhp?hl=en;YBcGfoIAAAAJ;3hcLUEAAAAAJ;3gHhO9QAAAAJ;nTNjqHwAAAAJ", "orcid": ";;;;;;;0000-0002-4037-6315;", "linkedin": ";;jiaxu-liu-984379aa/;;;;;bo-zheng-0315254/;", "or_profile": "~Zhanke_Zhou1;~Jiangchao_Yao1;~Jiaxu_Liu1;~Xiawei_Guo2;~quanming_yao1;~LI_He2;~Liang_Wang15;~Bo_Zheng5;~bo_han2", "aff": "Hong Kong Baptist University;Shanghai Artificial Intelligence Laboratory;Alibaba Group;;Department of Electronic Engineering;;Alibaba Group;Alibaba Group;RIKEN", "aff_domain": "hkbu.edu.hk;pjlab.org.cn;alibaba-inc.com;;tsinghua.edu.cn;;alibaba-inc.com;alibaba-inc.com;riken.jp", "position": "PhD student;Researcher;Researcher;;Assistant Professor;;Senior Tech Expert;Principal Researcher;Adjunct Scientist", "bibtex": "@inproceedings{\nzhou2023combating,\ntitle={Combating Bilateral Edge Noise for Robust Link Prediction},\nauthor={Zhanke Zhou and Jiangchao Yao and Jiaxu Liu and Xiawei Guo and quanming yao and LI He and Liang Wang and Bo Zheng and Bo Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ePkLqJh5kw}\n}", "github": "", "project": "", "reviewers": "4Q8x;4XLr;Z7rx;cjox;Fboz", "pdf_size": 3798330, "rating": "6;7;7;7;7", "confidence": "3;4;4;3;3", "soundness": "3;3;4;3;4", "novelty": "3;3;3;3;4", "presentation": "3;3;3;3;3", "wc_summary": "65;99;70;68;163", "wc_strengths": "54;65;60;34;34", "wc_weaknesses": "60;55;60;48;5", "wc_questions": "46;41;43;47;19", "wc_limitations": "25;2;7;5;7", "wc_review": "250;262;240;202;228", "wc_reply_reviewers": "34;0;0;14;0", "wc_reply_authors": "46;0;0;49;0", "reply_reviewers": "1;0;0;1;0", "reply_authors": "2;1;1;2;1", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.0, 37.078295537955896 ], "wc_strengths_avg": [ 49.4, 13.047605144240071 ], "wc_weaknesses_avg": [ 45.6, 20.771133815947554 ], "wc_questions_avg": [ 39.2, 10.322790320451151 ], "wc_limitations_avg": [ 9.2, 8.109253973085316 ], "wc_review_avg": [ 236.4, 20.529003872570144 ], "wc_reply_reviewers_avg": [ 9.6, 13.350655414622906 ], "wc_reply_authors_avg": [ 19.0, 23.28948260481542 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5813033725903672033&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "hkbu.edu.hk;pjlab.org.cn;alibaba-inc.com;;tsinghua.edu.cn;;alibaba-inc.com;alibaba-inc.com;riken.jp", "author_num": 9, "aff_unique_index": "0;1;2;3;2;2;4", "aff_unique_norm": "Hong Kong Baptist University;Shanghai Artificial Intelligence Laboratory;Alibaba Group;Institution Name Not Provided;RIKEN", "aff_unique_dep": ";;;Department of Electronic Engineering;", "aff_unique_url": "https://www.hkbu.edu.hk;http://www.shailab.org/;https://www.alibaba.com;;https://www.riken.jp", "aff_unique_abbr": "HKBU;Shanghai AI Lab;Alibaba;;RIKEN", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;2", "aff_country_unique": "China;;Japan" }, { "title": "Classification of Heavy-tailed Features in High Dimensions: a Superstatistical Approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70963", "id": "eR7PrfJe9o", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/88be023075a5a3ff3dc3b5d26623fa22-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eR7PrfJe9o", "openreview": "https://openreview.net/forum?id=eR7PrfJe9o", "poster": "/media/PosterPDFs/NeurIPS%202023/70963.png?t=1702294340.0750473", "slides": "https://nips.cc/virtual/2023/poster/70963", "video": "https://nips.cc/virtual/2023/poster/70963", "author_site": "Urte Adomaityte, Gabriele Sicuro, Pierpaolo Vivo", "tldr": "", "abstract": "We characterise the learning of a mixture of two clouds of data points with generic centroids via empirical risk minimisation in the high dimensional regime, under the assumptions of generic convex loss and convex regularisation. Each cloud of data points is obtained via a double-stochastic process, where the sample is obtained from a Gaussian distribution whose variance is itself a random parameter sampled from a scalar distribution $\\varrho$. As a result, our analysis covers a large family of data distributions, including the case of power-law-tailed distributions with no covariance, and allows us to test recent ''Gaussian universality'' claims. We study the generalisation performance of the obtained estimator, we analyse the role of regularisation, and we analytically characterise the separability transition.", "keywords": "Classification;Gaussian Mixture Model;Superstatistics;Empirical Risk Minimization;Replica theory;Power-law distribution", "primary_area": "", "supplementary_material": "/attachment/e0220ed46ae4e13c898e63b88d588e7393cf509e.pdf", "author": "Urte Adomaityte;Gabriele Sicuro;Pierpaolo Vivo", "authorids": "~Urte_Adomaityte1;~Gabriele_Sicuro1;pierpaolo.vivo@kcl.ac.uk", "gender": "F;M;", "homepage": ";https://gsicuro.github.io/;", "dblp": ";145/7405;", "google_scholar": ";Lls7QvUAAAAJ;", "orcid": "0000-0002-5593-2177;0000-0002-9258-2436;", "linkedin": ";;", "or_profile": "~Urte_Adomaityte1;~Gabriele_Sicuro1;pierpaolo.vivo@kcl.ac.uk", "aff": "King's College London, University of London;King's College London;", "aff_domain": "kcl.ac.uk;kcl.ac.uk;", "position": "PhD student;Lecturer;", "bibtex": "@inproceedings{\nadomaityte2023classification,\ntitle={Classification of Heavy-tailed Features in High Dimensions: a Superstatistical Approach},\nauthor={Urte Adomaityte and Gabriele Sicuro and Pierpaolo Vivo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eR7PrfJe9o}\n}", "github": "", "project": "", "reviewers": "X846;26qY;7S1w;K5kC", "pdf_size": 5974479, "rating": "5;6;7;7", "confidence": "2;3;3;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;1;3;3", "wc_summary": "162;55;97;133", "wc_strengths": "53;33;35;172", "wc_weaknesses": "332;195;36;239", "wc_questions": "71;9;51;58", "wc_limitations": "1;26;1;58", "wc_review": "619;318;220;660", "wc_reply_reviewers": "32;80;25;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 111.75, 40.04606722263748 ], "wc_strengths_avg": [ 73.25, 57.54291876504006 ], "wc_weaknesses_avg": [ 200.5, 107.08057713703265 ], "wc_questions_avg": [ 47.25, 23.220411279734044 ], "wc_limitations_avg": [ 21.5, 23.41473894793619 ], "wc_review_avg": [ 454.25, 189.01901359387102 ], "wc_reply_reviewers_avg": [ 34.25, 28.968733144547414 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=349168024768578410&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "kcl.ac.uk;kcl.ac.uk;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "King's College London", "aff_unique_dep": "", "aff_unique_url": "https://www.kcl.ac.uk", "aff_unique_abbr": "KCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Strategyproof Voting under Correlated Beliefs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70962", "id": "eT1QOsssRB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7cefded8659ccc899196860af674b596-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eT1QOsssRB", "openreview": "https://openreview.net/forum?id=eT1QOsssRB", "poster": "/media/PosterPDFs/NeurIPS%202023/70962.png?t=1701313446.1340103", "slides": "https://nips.cc/virtual/2023/poster/70962", "video": "https://nips.cc/virtual/2023/poster/70962", "author_site": "Daniel Halpern, Rachel Li, Ariel Procaccia", "tldr": "", "abstract": "In voting theory, when voters have ranked preferences over candidates, the celebrated Gibbard-Satterthwaite Theorem essentially rules out the existence of reasonable strategyproof methods for picking a winner. What if we weaken strategyproofness to only hold for Bayesian voters with beliefs over others' preferences? When voters believe other participants' rankings are drawn independently from a fixed distribution, the impossibility persists. However, it is quite reasonable for a voter to believe that other votes are correlated, either to each other or to their own ranking. We consider such beliefs induced by classic probabilistic models in social choice such as the Mallows, Placket-Luce, and Thurstone-Mosteller models. We single out the plurality rule (choosing the candidate ranked first most often) as a particularly promising choice as it is strategyproof for a large class of beliefs containing the specific ones we introduce. Further, we show that plurality is unique among positional scoring rules in having this property: no other scoring rule is strategyproof for beliefs induced by the Mallows model when there are a sufficient number of voters. Finally, we give examples of prominent non-scoring voting rules failing to be strategyproof on beliefs in this class, further bolstering the case for plurality.", "keywords": "social choice;strategyproof;voting", "primary_area": "", "supplementary_material": "/attachment/75c6606d9f93c60366e9d54d990c733bccc6e8e4.pdf", "author": "Daniel Halpern;Rachel Li;Ariel D. Procaccia", "authorids": "~Daniel_Halpern1;~Rachel_Li1;~Ariel_D._Procaccia1", "gender": "M;F;M", "homepage": "https://dhalpern13.github.io;;http://procaccia.info/", "dblp": "83/5135-2;;p/ArielDProcaccia", "google_scholar": "https://scholar.google.ca/citations?user=Q4HPgdsAAAAJ;;https://scholar.google.com.tw/citations?user=8ZpV-lkAAAAJ", "orcid": ";;", "linkedin": ";rachelxli/;", "or_profile": "~Daniel_Halpern1;~Rachel_Li1;~Ariel_Procaccia1", "aff": "Harvard University;Harvard University;Harvard University", "aff_domain": "harvard.edu;harvard.edu;harvard.edu", "position": "PhD student;Undergrad student;Gordon McKay Professor of Computer Science", "bibtex": "@inproceedings{\nhalpern2023strategyproof,\ntitle={Strategyproof Voting under Correlated Beliefs},\nauthor={Daniel Halpern and Rachel Li and Ariel D. Procaccia},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eT1QOsssRB}\n}", "github": "", "project": "", "reviewers": "A4JJ;kNTn;qq2e;qwQq;jEy3", "pdf_size": 337464, "rating": "5;5;5;7;8", "confidence": "4;2;4;3;4", "soundness": "3;3;3;3;3", "novelty": "3;2;2;4;4", "presentation": "3;2;3;4;3", "wc_summary": "279;134;115;219;150", "wc_strengths": "8;130;37;51;93", "wc_weaknesses": "20;88;184;55;56", "wc_questions": "14;67;51;2;55", "wc_limitations": "3;10;28;7;2", "wc_review": "324;429;415;334;356", "wc_reply_reviewers": "56;26;95;0;1", "wc_reply_authors": "26;0;185;0;0", "reply_reviewers": "1;1;2;0;1", "reply_authors": "2;1;2;0;1", "rating_avg": [ 6.0, 1.2649110640673518 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 179.4, 60.92815441156903 ], "wc_strengths_avg": [ 63.8, 42.966964053793696 ], "wc_weaknesses_avg": [ 80.6, 55.99857141034939 ], "wc_questions_avg": [ 37.8, 25.182533629482162 ], "wc_limitations_avg": [ 10.0, 9.444575162494075 ], "wc_review_avg": [ 371.6, 42.66427076606373 ], "wc_reply_reviewers_avg": [ 35.6, 36.058840802222136 ], "wc_reply_authors_avg": [ 42.2, 72.10658777116 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.7483314773547883 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.1976423537605237, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lHYfjXlbofEJ:scholar.google.com/&scioq=Strategyproof+Voting+under+Correlated+Beliefs&hl=en&as_sdt=0,44", "gs_version_total": 7, "email": "harvard.edu;harvard.edu;harvard.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Spatially Resolved Gene Expression Prediction from Histology Images via Bi-modal Contrastive Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70961", "id": "eT1tMdAUoc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/df656d6ed77b565e8dcdfbf568aead0a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eT1tMdAUoc", "openreview": "https://openreview.net/forum?id=eT1tMdAUoc", "poster": "/media/PosterPDFs/NeurIPS%202023/70961.png?t=1699892008.5151482", "slides": "https://nips.cc/virtual/2023/poster/70961", "video": "https://nips.cc/virtual/2023/poster/70961", "author_site": "Ronald Xie, Kuan Pang, Sai Chung, Catia Perciani, Sonya MacParland, Bo Wang, Gary Bader", "tldr": "", "abstract": "Histology imaging is an important tool in medical diagnosis and research, enabling the examination of tissue structure and composition at the microscopic level. Understanding the underlying molecular mechanisms of tissue architecture is critical in uncovering disease mechanisms and developing effective treatments.Gene expression profiling provides insight into the molecular processes underlying tissue architecture, but the process can be time-consuming and expensive. We present BLEEP (Bi-modaL Embedding for Expression Prediction), a bi-modal embedding framework capable of generating spatially resolved gene expression profiles of whole-slide Hematoxylin and eosin (H&E) stained histology images. BLEEP uses contrastive learning to construct a low-dimensional joint embedding space from a reference dataset using paired image and expression profiles at micrometer resolution. With this approach, the gene expression of any query image patch can be imputed using the expression profiles from the reference dataset. We demonstrate BLEEP\u2019s effectiveness in gene expression prediction by benchmarking its performance on a human liver tissue dataset captured using the 10x Visium platform, where it achieves significant improvements over existing methods. Our results demonstrate the potential of BLEEP to provide insights into the molecular mechanisms underlying tissue architecture, with important implications in diagnosis and research of various diseases. The proposed approach can significantly reduce the time and cost associated with gene expression profiling, opening up new avenues for high-throughput analysis of histology images for both research and clinical applications.", "keywords": "BLEEP;Histology;H&E;Gene Expression Prediction;Spatial Transcriptomics;Contrastive Learning", "primary_area": "", "supplementary_material": "/attachment/36967c55d94e4107f51b78af4ea9f80658c9ffc1.pdf", "author": "Ronald Xie;Kuan Pang;Sai W Chung;Catia Perciani;Sonya MacParland;BO WANG;Gary Bader", "authorids": "~Ronald_Xie1;~Kuan_Pang1;~Sai_W_Chung1;~Catia_Perciani1;~Sonya_MacParland1;~BO_WANG11;~Gary_Bader1", "gender": "M;;;F;;M;M", "homepage": ";;;;https://www.uhnresearch.ca/researcher/sonya-macparland;https://wanglab.ai/;https://baderlab.org/", "dblp": ";;;;;;35/2796", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;;37FDILIAAAAJ;https://scholar.google.ca/citations?user=22M9eisAAAAJ", "orcid": ";;;0000-0003-0377-1778;;;0000-0003-0185-8861", "linkedin": ";;;;;;gary-bader-a08673/", "or_profile": "~Ronald_Xie1;~Kuan_Pang1;~Sai_W_Chung1;~Catia_Perciani1;~Sonya_MacParland1;~BO_WANG11;~Gary_Bader1", "aff": "University of Toronto;;;;University of Toronto;Vector Institute;University of Toronto", "aff_domain": "utoronto.ca;;;;mail.utoronto.ca;vectorinstitute.ai;utoronto.ca", "position": "PhD student;;;;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nxie2023spatially,\ntitle={Spatially Resolved Gene Expression Prediction from Histology Images via Bi-modal Contrastive Learning},\nauthor={Ronald Xie and Kuan Pang and Sai W Chung and Catia Perciani and Sonya MacParland and BO WANG and Gary Bader},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eT1tMdAUoc}\n}", "github": "", "project": "", "reviewers": "WHAP;VLr1;XnkF;J9t4;QFTu", "pdf_size": 5532673, "rating": "4;5;5;6;6", "confidence": "4;3;3;4;4", "soundness": "2;2;2;3;3", "novelty": "2;3;3;3;3", "presentation": "3;4;3;3;4", "wc_summary": "106;194;58;65;116", "wc_strengths": "57;95;50;157;102", "wc_weaknesses": "139;58;85;314;160", "wc_questions": "2;29;2;73;64", "wc_limitations": "16;44;26;57;11", "wc_review": "320;420;221;666;453", "wc_reply_reviewers": "0;14;28;15;23", "wc_reply_authors": "259;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 107.8, 48.60617244754003 ], "wc_strengths_avg": [ 92.2, 38.26957015697981 ], "wc_weaknesses_avg": [ 151.2, 89.21748707512447 ], "wc_questions_avg": [ 34.0, 29.979993328885183 ], "wc_limitations_avg": [ 30.8, 17.29045979724079 ], "wc_review_avg": [ 416.0, 149.0811859357176 ], "wc_reply_reviewers_avg": [ 16.0, 9.528903399657276 ], "wc_reply_authors_avg": [ 51.8, 103.59999999999998 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.21821789023599233, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=561759896238895771&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "utoronto.ca;;;;mail.utoronto.ca;vectorinstitute.ai;utoronto.ca", "author_num": 7, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Toronto;Vector Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.utoronto.ca;https://vectorinstitute.ai/", "aff_unique_abbr": "U of T;Vector Institute", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Direct Training of SNN using Local Zeroth Order Method", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70960", "id": "eTF3VDH2b6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3c5e64f26a97db6a2b0bbb788236431e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eTF3VDH2b6", "openreview": "https://openreview.net/forum?id=eTF3VDH2b6", "poster": "/media/PosterPDFs/NeurIPS%202023/70960.png?t=1701943264.3191204", "slides": "https://nips.cc/virtual/2023/poster/70960", "video": "https://nips.cc/virtual/2023/poster/70960", "author_site": "Bhaskar Mukhoty, Velibor Bojkovic, William de Vazelhes, Xiaohan Zhao, Giulia De Masi, Huan Xiong, Bin Gu", "tldr": "", "abstract": "Spiking neural networks are becoming increasingly popular for their low energy requirement in real-world tasks with accuracy comparable to traditional ANNs. SNN training algorithms face the loss of gradient information and non-differentiability due to the Heaviside function in minimizing the model loss over model parameters. To circumvent this problem, the surrogate method employs a differentiable approximation of the Heaviside function in the backward pass, while the forward pass continues to use the Heaviside as the spiking function. We propose to use the zeroth-order technique at the local or neuron level in training SNNs, motivated by its regularizing and potential energy-efficient effects and establish a theoretical connection between it and the existing surrogate methods. We perform experimental validation of the technique on standard static datasets (CIFAR-10, CIFAR-100, ImageNet-100) and neuromorphic datasets (DVS-CIFAR-10, DVS-Gesture, N-Caltech-101, NCARS) and obtain results that offer improvement over the state-of-the-art results. The proposed method also lends itself to efficient implementations of the back-propagation method, which could provide 3-4 times overall speedup in training time. The code is available at \\url{https://github.com/BhaskarMukhoty/LocalZO}.", "keywords": "Spiking Neural Network;Zeroth Order;Surrogate Gradient", "primary_area": "", "supplementary_material": "/attachment/d43cdb954b296799b555950aa6c82b25cb9ac55c.zip", "author": "Bhaskar Mukhoty;Velibor Bojkovic;William de Vazelhes;Xiaohan Zhao;Giulia De Masi;Huan Xiong;Bin Gu", "authorids": "~Bhaskar_Mukhoty1;~Velibor_Bojkovic1;~William_de_Vazelhes2;~Xiaohan_Zhao3;~Giulia_De_Masi1;~Huan_Xiong1;~Bin_Gu1", "gender": "M;;;M;M;M;M", "homepage": ";;;https://scholar.google.com/citations?user=l4hm14MAAAAJ&hl=en;https://mbzuai.ac.ae/study/faculty/bin-gu/;http://github.com/wdevazelhes;https://github.com/XiaohanZhao123", "dblp": "166/1438;;147/8719;;29/1758-1;247/1152;75/781", "google_scholar": "https://scholar.google.co.in/citations?user=lJglnOQAAAAJ;LXdhoooAAAAJ;G1K5hX0AAAAJ;l4hm14MAAAAJ;Vo8OgCgAAAAJ;ple0xCwAAAAJ;PliLuD4AAAAJ", "orcid": "0000-0002-8594-980X;;0000-0003-3284-880X;;0000-0001-6049-1815;;0009-0005-2793-3526", "linkedin": ";velibor-bojkovic-8242261b6/;;;;;", "or_profile": "~Bhaskar_Mukhoty1;~Velibor_Bojkovic1;~Giulia_De_Masi1;~Huan_Xiong1;~Bin_Gu1;~William_De_Vazelhes1;~Zhao_Xiaohan1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Technology Innovation Institute;;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Nanjing University of Information Science and Technology", "aff_domain": "mbzuai.ac.ae;mbzuai.ac.ae;tii.ae;;mbzuai.ac.ae;mbzuai.ac.ae;nuist.edu.cn", "position": "Postdoc;Postdoc;Principal Researcher;;Assistant Professor;PhD student;Undergrad student", "bibtex": "@inproceedings{\nmukhoty2023direct,\ntitle={Direct Training of {SNN} using Local Zeroth Order Method},\nauthor={Bhaskar Mukhoty and Velibor Bojkovic and William de Vazelhes and Xiaohan Zhao and Giulia De Masi and Huan Xiong and Bin Gu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eTF3VDH2b6}\n}", "github": "", "project": "", "reviewers": "DTqo;cVHq;8Gkq;Rm6P", "pdf_size": 3427722, "rating": "4;6;7;8", "confidence": "4;4;3;3", "soundness": "3;3;4;4", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "59;35;176;33", "wc_strengths": "49;27;110;38", "wc_weaknesses": "64;160;52;18", "wc_questions": "11;6;132;39", "wc_limitations": "18;11;21;26", "wc_review": "201;239;491;154", "wc_reply_reviewers": "218;48;8;73", "wc_reply_authors": "1095;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 75.75, 58.77658972754374 ], "wc_strengths_avg": [ 56.0, 32.132538026119256 ], "wc_weaknesses_avg": [ 73.5, 52.713850172416734 ], "wc_questions_avg": [ 47.0, 50.66063560596136 ], "wc_limitations_avg": [ 19.0, 5.431390245600108 ], "wc_review_avg": [ 271.25, 130.39627103563967 ], "wc_reply_reviewers_avg": [ 86.75, 79.24447930297731 ], "wc_reply_authors_avg": [ 273.75, 474.14890857198014 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8451542547285166, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1776608935483977836&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 7, "email": "mbzuai.ac.ae;mbzuai.ac.ae;tii.ae;;mbzuai.ac.ae;mbzuai.ac.ae;nuist.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;0;2", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Technology Innovation Institute;Nanjing University of Information Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://mbzuai.ac.ae;;http://www.nuist.edu.cn", "aff_unique_abbr": "MBZUAI;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;2", "aff_country_unique": "United Arab Emirates;;China" }, { "title": "A Rigorous Link between Deep Ensembles and (Variational) Bayesian Methods", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70959", "id": "eTHawKFT4h", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7d25b1db211d99d5750ec45d65fd6e4e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eTHawKFT4h", "openreview": "https://openreview.net/forum?id=eTHawKFT4h", "poster": "/media/PosterPDFs/NeurIPS%202023/70959.png?t=1698696439.5364945", "slides": "https://nips.cc/virtual/2023/poster/70959", "video": "https://nips.cc/virtual/2023/poster/70959", "author_site": "Veit David Wild, Sahra Ghalebikesabi, Dino Sejdinovic, Jeremias Knoblauch", "tldr": "", "abstract": "We establish the first mathematically rigorous link between Bayesian, variational Bayesian, and ensemble methods. A key step towards this it to reformulate the non-convex optimisation problem typically encountered in deep learning as a convex optimisation in the space of probability measures. On a technical level, our contribution amounts to studying generalised variational inference through the lense of Wasserstein gradient flows. The result is a unified theory of various seemingly disconnected approaches that are commonly used for uncertainty quantification in deep learning---including deep ensembles and (variational) Bayesian methods. This offers a fresh perspective on the reasons behind the success of deep ensembles over procedures based on parameterised variational inference, and allows the derivation of new ensembling schemes with convergence guarantees. We showcase this by proposing a family of interacting deep ensembles with direct parallels to the interactions of particle systems in thermodynamics, and use our theory to prove the convergence of these algorithms to a well-defined global minimiser on the space of probability measures.", "keywords": "Wasserstein gradient flow;generalised variational inference;deep ensembles;Bayesian deep learning;variational Bayes", "primary_area": "", "supplementary_material": "/attachment/4459723091322ec7c99ec810c28ce419c67babb9.pdf", "author": "Veit David Wild;Sahra Ghalebikesabi;Dino Sejdinovic;Jeremias Knoblauch", "authorids": "~Veit_David_Wild1;~Sahra_Ghalebikesabi1;~Dino_Sejdinovic1;~Jeremias_Knoblauch1", "gender": "M;;M;M", "homepage": ";;https://sejdino.github.io/;https://jeremiasknoblauch.github.io/", "dblp": ";;31/1783;220/5462", "google_scholar": "gUUTqicAAAAJ;;v8Dg1lIAAAAJ;https://scholar.google.co.uk/citations?user=4TPsxlsAAAAJ", "orcid": ";;0000-0001-5547-9213;", "linkedin": ";;https://linkedin.com/in/dinosejdinovic;", "or_profile": "~Veit_David_Wild1;~Sahra_Ghalebikesabi1;~Dino_Sejdinovic1;~Jeremias_Knoblauch1", "aff": "University of Oxford;;University of Adelaide;", "aff_domain": "ox.ac.uk;;adelaide.edu.au;", "position": "PhD student;;Full Professor;", "bibtex": "@inproceedings{\nwild2023a,\ntitle={A Rigorous Link between Deep Ensembles and (Variational) Bayesian Methods},\nauthor={Veit David Wild and Sahra Ghalebikesabi and Dino Sejdinovic and Jeremias Knoblauch},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eTHawKFT4h}\n}", "github": "", "project": "", "reviewers": "6hMj;9edE;4EE1;AtWA;NdiF", "pdf_size": 988242, "rating": "7;8;8;8;8", "confidence": "4;2;2;3;3", "soundness": "3;4;4;4;3", "novelty": "2;4;4;4;3", "presentation": "3;4;3;4;2", "wc_summary": "40;106;95;146;182", "wc_strengths": "69;74;103;86;48", "wc_weaknesses": "62;300;32;134;334", "wc_questions": "171;85;54;356;90", "wc_limitations": "4;30;40;40;338", "wc_review": "346;595;324;762;992", "wc_reply_reviewers": "17;18;29;0;80", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.8, 0.39999999999999997 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.4, 0.8 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 113.8, 48.059962546801884 ], "wc_strengths_avg": [ 76.0, 18.253766734567417 ], "wc_weaknesses_avg": [ 172.4, 123.10255886861167 ], "wc_questions_avg": [ 151.2, 109.46305312752791 ], "wc_limitations_avg": [ 90.4, 124.49835340276593 ], "wc_review_avg": [ 603.8, 253.20063191074388 ], "wc_reply_reviewers_avg": [ 28.8, 27.22792683991934 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8017837257372734, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15507281261245759032&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ox.ac.uk;;adelaide.edu.au;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Oxford;University of Adelaide", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.adelaide.edu.au", "aff_unique_abbr": "Oxford;Adelaide", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;Australia" }, { "title": "Double Randomized Underdamped Langevin with Dimension-Independent Convergence Guarantee", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70958", "id": "eTMHsUp3Ii", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d9af4d6ac714626b652da5616ca71f99-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eTMHsUp3Ii", "openreview": "https://openreview.net/forum?id=eTMHsUp3Ii", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70958", "video": "https://nips.cc/virtual/2023/poster/70958", "author_site": "Yuanshi Liu, Cong Fang, Tong Zhang", "tldr": "", "abstract": "This paper focuses on the high-dimensional sampling of log-concave distributions with composite structures: $p^*(\\mathrm{d}x)\\propto \\exp(-g(x)-f(x))\\mathrm{d}x$. We develop a double randomization technique, which leads to a fast underdamped Langevin algorithm with a dimension-independent convergence guarantee. We prove that the algorithm enjoys an overall $\\tilde{\\mathcal{O}}\\left(\\frac{\\left(\\mathrm{tr}(H)\\right)^{1/3}}{\\epsilon^{2/3}}\\right)$ iteration complexity to reach an $\\epsilon$-tolerated sample whose distribution $p$ admits $W_2(p,p^*)\\leq \\epsilon$. Here, $H$ is an upper bound of the Hessian matrices for $f$ and does not explicitly depend on dimension $d$. For the posterior sampling over linear models with normalized data, we show a clear superiority of convergence rate which is dimension-free and outperforms the previous best-known results by a $d^{1/3}$ factor. The analysis to achieve a faster convergence rate brings new insights into high-dimensional sampling.", "keywords": "Langevin;Dimension dependence;Acceleration", "primary_area": "", "supplementary_material": "/attachment/d39f8598c36921baba05df4aa5ce72354f00e1ca.pdf", "author": "Yuanshi Liu;Cong Fang;Tong Zhang", "authorids": "~Yuanshi_Liu1;~Cong_Fang1;~Tong_Zhang2", "gender": ";M;M", "homepage": "https://zero-lab-pku.github.io/personwise/liuyuanshi/;https://congfang-ml.github.io/;http://tongzhang-ml.org", "dblp": ";140/6568;07/4227-1", "google_scholar": ";N2M9RPoAAAAJ;LurWtuYAAAAJ", "orcid": ";;0000-0002-5511-2558", "linkedin": ";;", "or_profile": "~Yuanshi_Liu1;~Cong_Fang1;~Tong_Zhang2", "aff": ";Peking University;Hong Kong University of Science and Technology", "aff_domain": ";pku.edu.cn;ust.hk", "position": ";Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023double,\ntitle={Double Randomized Underdamped Langevin with Dimension-Independent Convergence Guarantee},\nauthor={Yuanshi Liu and Cong Fang and Tong Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eTMHsUp3Ii}\n}", "github": "", "project": "", "reviewers": "SZ7b;5mo9;TP8b;RFtN", "pdf_size": 495274, "rating": "5;6;6;7", "confidence": "4;4;2;3", "soundness": "4;3;3;3", "novelty": "2;3;3;3", "presentation": "4;3;2;3", "wc_summary": "26;54;86;148", "wc_strengths": "64;33;84;65", "wc_weaknesses": "115;157;109;207", "wc_questions": "127;78;326;75", "wc_limitations": "10;11;9;1", "wc_review": "342;333;614;496", "wc_reply_reviewers": "70;24;120;35", "wc_reply_authors": "22;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 78.5, 45.39548435692696 ], "wc_strengths_avg": [ 61.5, 18.282505298782223 ], "wc_weaknesses_avg": [ 147.0, 39.26830783214372 ], "wc_questions_avg": [ 151.5, 102.84089653440405 ], "wc_limitations_avg": [ 7.75, 3.960744879438715 ], "wc_review_avg": [ 446.25, 116.52118906018768 ], "wc_reply_reviewers_avg": [ 62.25, 37.41907935799597 ], "wc_reply_authors_avg": [ 5.5, 9.526279441628825 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13375707374421980373&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";pku.edu.cn;ust.hk", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Peking University;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.ust.hk", "aff_unique_abbr": "Peking U;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "ASPEN: Breaking Operator Barriers for Efficient Parallelization of Deep Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70957", "id": "eTp4RetK74", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d899a31938c7838965b589d9b14a5ca6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eTp4RetK74", "openreview": "https://openreview.net/forum?id=eTp4RetK74", "poster": "/media/PosterPDFs/NeurIPS%202023/70957.png?t=1700615450.5953486", "slides": "https://nips.cc/virtual/2023/poster/70957", "video": "https://nips.cc/virtual/2023/poster/70957", "author_site": "Jongseok Park, Kyungmin Bin, Gibum Park, Sangtae Ha, Kyunghan Lee", "tldr": "", "abstract": "Modern Deep Neural Network (DNN) frameworks use tensor operators as the main building blocks of DNNs. However, we observe that operator-based construction of DNNs incurs significant drawbacks in parallelism in the form of synchronization barriers. Synchronization barriers of operators confine the scope of parallel computation to each operator and obscure the rich parallel computation opportunities that exist across operators. To this end, we present ASPEN, a novel parallel computation solution for DNNs that achieves fine-grained dynamic execution of DNNs, which (1) removes the operator barriers and expresses DNNs in dataflow graphs of fine-grained tiles to expose the parallel computation opportunities across operators, and (2) exploits these opportunities by dynamically locating and scheduling them in runtime. This novel approach of ASPEN enables opportunistic parallelism, a new class of parallelism for DNNs that is unavailable in the existing operator-based approaches. ASPEN also achieves high resource utilization and memory reuse by letting each resource asynchronously traverse depthwise in the DNN graph to its full computing potential. We provide challenges and solutions to our approach and show that our proof-of-concept implementation of ASPEN on CPU shows exceptional performance, outperforming state-of-the-art inference systems of TorchScript and TVM by up to 3.2$\\times$ and 4.3$\\times$, respectively.", "keywords": "Deep Neural Network;Deep Learning;Parallel Execution Algorithm;Parallelization;Deep Learning Parallelism;Dynamic;Asynchronous;Scheduling;Dynamic Scheduling;Dynamic Execution;tile;tiling;dataflow;dataflow graph;tile-based dataflow graph;opportunistic parallelism", "primary_area": "", "supplementary_material": "/attachment/0b0c12dc01bb4dfe055920209fd893ecab20a7d5.zip", "author": "Jongseok Park;Kyungmin Bin;Gibum Park;Sangtae Ha;Kyunghan Lee", "authorids": "~Jongseok_Park1;~Kyungmin_Bin1;gibumpark@snu.ac.kr;~Sangtae_Ha1;~Kyunghan_Lee1", "gender": "M;M;;M;M", "homepage": "https://nxc.snu.ac.kr/;;;http://netstech.org;https://nxc.snu.ac.kr", "dblp": "137/1527;295/6310;;30/4930;49/6532.html", "google_scholar": ";https://scholar.google.com/citations?hl=ko;;https://scholar.google.com.tw/citations?user=GcMKuu8AAAAJ;IwQCRuQAAAAJ", "orcid": "0000-0003-3910-7182;;;;0000-0001-8647-1476", "linkedin": ";;;;", "or_profile": "~Jongseok_Park1;~Kyungmin_Bin1;gibumpark@snu.ac.kr;~Sangtae_Ha1;~Kyunghan_Lee1", "aff": "Seoul National University;Seoul National University;;University of Colorado Boulder;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;;colorado.edu;snu.ac.kr", "position": "MS student;PhD student;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\npark2023aspen,\ntitle={{ASPEN}: Breaking Operator Barriers for Efficient Parallelization of Deep Neural Networks},\nauthor={Jongseok Park and Kyungmin Bin and Gibum Park and Sangtae Ha and Kyunghan Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eTp4RetK74}\n}", "github": "", "project": "", "reviewers": "Cexe;yggz;qSHs;b91G;zpkQ", "pdf_size": 1016283, "rating": "6;6;6;6;6", "confidence": "4;3;4;2;3", "soundness": "3;2;3;3;3", "novelty": "3;2;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "87;105;116;125;201", "wc_strengths": "106;40;45;67;140", "wc_weaknesses": "421;273;67;22;56", "wc_questions": "145;80;126;1;23", "wc_limitations": "213;8;1;16;11", "wc_review": "972;506;355;231;431", "wc_reply_reviewers": "181;15;32;0;40", "wc_reply_authors": "0;0;63;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 126.8, 39.20408142017869 ], "wc_strengths_avg": [ 79.6, 38.1292538610448 ], "wc_weaknesses_avg": [ 167.8, 154.333923684976 ], "wc_questions_avg": [ 75.0, 56.046409340831104 ], "wc_limitations_avg": [ 49.8, 81.74447993595652 ], "wc_review_avg": [ 499.0, 253.36219133880257 ], "wc_reply_reviewers_avg": [ 53.6, 65.18466077230133 ], "wc_reply_authors_avg": [ 12.6, 25.2 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6166282404977991384&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 3, "email": "snu.ac.kr;snu.ac.kr;;colorado.edu;snu.ac.kr", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Seoul National University;University of Colorado", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.colorado.edu", "aff_unique_abbr": "SNU;CU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boulder", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Algorithmic Regularization in Tensor Optimization: Towards a Lifted Approach in Matrix Sensing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70956", "id": "eU6P4aUdCA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7db2348b5bfeca620aa7327df815adcc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eU6P4aUdCA", "openreview": "https://openreview.net/forum?id=eU6P4aUdCA", "poster": "/media/PosterPDFs/NeurIPS%202023/70956.png?t=1701374586.4184096", "slides": "https://nips.cc/virtual/2023/poster/70956", "video": "https://nips.cc/virtual/2023/poster/70956", "author_site": "Ziye Ma, Javad Lavaei, Somayeh Sojoudi", "tldr": "", "abstract": "Gradient descent (GD) is crucial for generalization in machine learning models, as it induces implicit regularization, promoting compact representations. In this work, we examine the role of GD in inducing implicit regularization for tensor optimization, particularly within the context of the lifted matrix sensing framework. This framework has been recently proposed to address the non-convex matrix sensing problem by transforming spurious solutions into strict saddles when optimizing over symmetric, rank-1 tensors. We show that, with sufficiently small initialization scale, GD applied to this lifted problem results in approximate rank-1 tensors and critical points with escape directions. Our findings underscore the significance of the tensor parametrization of matrix sensing, in combination with first-order methods, in achieving global optimality in such problems.", "keywords": "non-convex optimization;low-rank matrix optimization;matrix sensing;implicit bias;tensor;over-parametrization", "primary_area": "", "supplementary_material": "/attachment/8e640076d05248a639ba86b8a67167241a80a415.pdf", "author": "Ziye Ma;Javad Lavaei;Somayeh Sojoudi", "authorids": "~Ziye_Ma1;~Javad_Lavaei1;~Somayeh_Sojoudi1", "gender": "M;;F", "homepage": "https://gavenma.github.io;;https://eecs.berkeley.edu/~sojoudi/", "dblp": ";;06/7000", "google_scholar": "MJ7X-bAAAAAJ;;kNH8zcgAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ziye_Ma1;~Javad_Lavaei1;~Somayeh_Sojoudi1", "aff": "University of California, Berkeley;;University of California, Berkeley", "aff_domain": "berkeley.edu;;berkeley.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nma2023algorithmic,\ntitle={Algorithmic Regularization in Tensor Optimization: Towards a Lifted Approach in Matrix Sensing},\nauthor={Ziye Ma and Javad Lavaei and Somayeh Sojoudi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eU6P4aUdCA}\n}", "github": "", "project": "", "reviewers": "rWmQ;t1t9;Dh4F;RX1r", "pdf_size": 761674, "rating": "5;6;7;7", "confidence": "3;4;3;3", "soundness": "4;3;3;3", "novelty": "3;3;4;3", "presentation": "3;3;3;2", "wc_summary": "65;149;54;77", "wc_strengths": "58;129;70;40", "wc_weaknesses": "195;791;72;259", "wc_questions": "45;90;11;6", "wc_limitations": "12;26;63;6", "wc_review": "375;1185;270;388", "wc_reply_reviewers": "194;185;25;52", "wc_reply_authors": "429;433;40;162", "reply_reviewers": "2;2;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 86.25, 37.13068138345969 ], "wc_strengths_avg": [ 74.25, 33.36446462930284 ], "wc_weaknesses_avg": [ 329.25, 274.9312414041009 ], "wc_questions_avg": [ 38.0, 33.56337289367682 ], "wc_limitations_avg": [ 26.75, 22.151467220028564 ], "wc_review_avg": [ 554.5, 366.88315578668914 ], "wc_reply_reviewers_avg": [ 114.0, 76.16757840446287 ], "wc_reply_authors_avg": [ 266.0, 170.5505790081054 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14391715326116567382&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "berkeley.edu;;berkeley.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "XAGen: 3D Expressive Human Avatars Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70955", "id": "eUf0CaS5AP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6d6f9908ea35313dd7566f5ce8c6e815-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eUf0CaS5AP", "openreview": "https://openreview.net/forum?id=eUf0CaS5AP", "poster": "/media/PosterPDFs/NeurIPS%202023/70955.png?t=1699510013.2244246", "slides": "https://nips.cc/virtual/2023/poster/70955", "video": "https://nips.cc/virtual/2023/poster/70955", "author_site": "Zhongcong XU, Jianfeng Zhang, Jun Hao Liew, Jiashi Feng, Mike Zheng Shou", "tldr": "", "abstract": "Recent advances in 3D-aware GAN models have enabled the generation of realistic and controllable human body images. However, existing methods focus on the control of major body joints, neglecting the manipulation of expressive attributes, such as facial expressions, jaw poses, hand poses, and so on. In this work, we present XAGen, the first 3D generative model for human avatars capable of expressive control over body, face, and hands. To enhance the fidelity of small-scale regions like face and hands, we devise a multi-scale and multi-part 3D representation that models fine details. Based on this representation, we propose a multi-part rendering technique that disentangles the synthesis of body, face, and hands to ease model training and enhance geometric quality. Furthermore, we design multi-part discriminators that evaluate the quality of the generated avatars with respect to their appearance and fine-grained control capabilities. Experiments show that XAGen surpasses state-of-the-art methods in terms of realism, diversity, and expressive control abilities. Code and data will be made available at https://showlab.github.io/xagen.", "keywords": "Human Avatar;3D-aware GAN", "primary_area": "", "supplementary_material": "/attachment/d6f14900489efbc661fa3a614d6bec3b0b98d9a4.pdf", "author": "Zhongcong Xu;Jianfeng Zhang;Jun Hao Liew;Jiashi Feng;Mike Zheng Shou", "authorids": "~Zhongcong_Xu1;~Jianfeng_Zhang3;~Jun_Hao_Liew1;~Jiashi_Feng1;~Mike_Zheng_Shou1", "gender": ";M;;;", "homepage": ";https://jeff95.me;;;", "dblp": ";;;;", "google_scholar": ";https://scholar.google.com.sg/citations?hl=en;https://scholar.google.com.sg/citations?user=8gm-CYYAAAAJ;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Zhongcong_Xu1;~Jianfeng_Zhang3;~Jun_Hao_Liew1;~Jiashi_Feng1;~Mike_Zheng_Shou1", "aff": ";national university of singapore, National University of Singapore;ByteDance;;", "aff_domain": ";u.nus.edu;bytedance.com;;", "position": ";PhD student;Researcher;;", "bibtex": "@inproceedings{\nxu2023xagen,\ntitle={{XAG}en: 3D Expressive Human Avatars Generation},\nauthor={Zhongcong Xu and Jianfeng Zhang and Jun Hao Liew and Jiashi Feng and Mike Zheng Shou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eUf0CaS5AP}\n}", "github": "", "project": "", "reviewers": "GuK3;F4xj;7EQQ;YASD;3QMX", "pdf_size": 4490213, "rating": "5;6;7;7;7", "confidence": "5;4;5;5;4", "soundness": "4;4;3;3;4", "novelty": "2;3;2;3;3", "presentation": "4;2;3;4;3", "wc_summary": "242;147;108;89;177", "wc_strengths": "172;48;46;75;153", "wc_weaknesses": "310;222;305;140;161", "wc_questions": "103;4;27;161;103", "wc_limitations": "66;35;6;78;18", "wc_review": "893;456;492;543;612", "wc_reply_reviewers": "57;0;0;40;99", "wc_reply_authors": "17;0;0;148;33", "reply_reviewers": "1;0;0;1;1", "reply_authors": "2;1;1;2;2", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 152.6, 54.13538583957816 ], "wc_strengths_avg": [ 98.8, 53.3494142423326 ], "wc_weaknesses_avg": [ 227.6, 70.59915013652784 ], "wc_questions_avg": [ 79.6, 56.92661943238857 ], "wc_limitations_avg": [ 40.6, 27.507089995126712 ], "wc_review_avg": [ 599.2, 155.973587507629 ], "wc_reply_reviewers_avg": [ 39.2, 37.327737675889225 ], "wc_reply_authors_avg": [ 39.6, 55.57193536309492 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.10206207261596574, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9508706290323113098&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": ";u.nus.edu;bytedance.com;;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "National University of Singapore;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.bytedance.com", "aff_unique_abbr": "NUS;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Singapore;China" }, { "title": "Inferring the Future by Imagining the Past", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70954", "id": "eVrmcOvJV4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/42c3438f432bc62014ce65af880e0d94-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eVrmcOvJV4", "openreview": "https://openreview.net/forum?id=eVrmcOvJV4", "poster": "/media/PosterPDFs/NeurIPS%202023/70954.png?t=1701377158.9530058", "slides": "https://nips.cc/virtual/2023/poster/70954", "video": "https://nips.cc/virtual/2023/poster/70954", "author_site": "Kartik Chandra, Tony Chen, Tzu-Mao Li, Jonathan Ragan-Kelley, Josh Tenenbaum", "tldr": "", "abstract": "A single panel of a comic book can say a lot: it can depict not only where the characters currently are, but also their motions, their motivations, their emotions, and what they might do next. More generally, humans routinely infer complex sequences of past and future events from a *static snapshot* of a *dynamic scene*, even in situations they have never seen before.\n\nIn this paper, we model how humans make such rapid and flexible inferences. Building on a long line of work in cognitive science, we offer a Monte Carlo algorithm whose inferences correlate well with human intuitions in a wide variety of domains, while only using a small, cognitively-plausible number of samples. Our key technical insight is a surprising connection between our inference problem and Monte Carlo path tracing, which allows us to apply decades of ideas from the computer graphics community to this seemingly-unrelated theory of mind task.", "keywords": "cognitive science;cogsci;inverse planning;Bayesian inference;theory of mind;Monte Carlo;inverse reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/c5b3cc6b4dc704bf47e4d68934c43e94c3308503.zip", "author": "Kartik Chandra;Tony Chen;Tzu-Mao Li;Jonathan Ragan-Kelley;Joshua B. Tenenbaum", "authorids": "~Kartik_Chandra2;~Tony_Chen1;~Tzu-Mao_Li1;~Jonathan_Ragan-Kelley1;~Joshua_B._Tenenbaum1", "gender": ";;Not Specified;M;", "homepage": "https://cs.stanford.edu/~kach/;https://chentoast.github.io;https://cseweb.ucsd.edu/~tzli/;https://people.csail.mit.edu/jrk;", "dblp": "07/5865.html;;122/4798;;t/JoshuaBTenenbaum", "google_scholar": "oVcz4nIAAAAJ;;Y7MCOdYAAAAJ;https://scholar.google.com.tw/citations?user=nBcay4oAAAAJ;", "orcid": "0000-0002-1835-3707;;;;", "linkedin": ";;;;", "or_profile": "~Kartik_Chandra2;~Tony_Chen1;~Tzu-Mao_Li1;~Jonathan_Ragan-Kelley1;~Joshua_B._Tenenbaum1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;University of California, San Diego;Adobe Systems;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;ucsd.edu;adobe.com;mit.edu", "position": "PhD student;PhD student;Assistant Professor;Researcher;Professor", "bibtex": "@inproceedings{\nchandra2023inferring,\ntitle={Inferring the Future by Imagining the Past},\nauthor={Kartik Chandra and Tony Chen and Tzu-Mao Li and Jonathan Ragan-Kelley and Joshua B. Tenenbaum},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eVrmcOvJV4}\n}", "github": "", "project": "", "reviewers": "PVLo;iHfX;cLNU;yiSb", "pdf_size": 1553980, "rating": "4;5;8;8", "confidence": "3;2;3;3", "soundness": "3;3;3;3", "novelty": "2;2;3;4", "presentation": "2;2;4;4", "wc_summary": "112;79;84;113", "wc_strengths": "70;8;130;56", "wc_weaknesses": "203;190;119;119", "wc_questions": "50;21;114;218", "wc_limitations": "18;1;73;118", "wc_review": "453;299;520;624", "wc_reply_reviewers": "21;182;69;36", "wc_reply_authors": "0;428;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 1.7853571071357126 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 97.0, 15.604486534327235 ], "wc_strengths_avg": [ 66.0, 43.520110293977886 ], "wc_weaknesses_avg": [ 157.75, 39.02162861798569 ], "wc_questions_avg": [ 100.75, 75.59555211783297 ], "wc_limitations_avg": [ 52.5, 46.2412153819512 ], "wc_review_avg": [ 474.0, 117.98516855944224 ], "wc_reply_reviewers_avg": [ 77.0, 63.059495716347115 ], "wc_reply_authors_avg": [ 107.0, 185.32943640986986 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4042260417272216, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18423611126411003815&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "email": "mit.edu;mit.edu;ucsd.edu;adobe.com;mit.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of California, San Diego;Adobe", "aff_unique_dep": ";;Adobe Systems Incorporated", "aff_unique_url": "https://web.mit.edu;https://www.ucsd.edu;https://www.adobe.com", "aff_unique_abbr": "MIT;UCSD;Adobe", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Response Length Perception and Sequence Scheduling: An LLM-Empowered LLM Inference Pipeline", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70953", "id": "eW233GDOpm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ce7ff3405c782f761fac7f849b41ae9a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eW233GDOpm", "openreview": "https://openreview.net/forum?id=eW233GDOpm", "poster": "/media/PosterPDFs/NeurIPS%202023/70953.png?t=1698738898.1181977", "slides": "https://nips.cc/virtual/2023/poster/70953", "video": "https://nips.cc/virtual/2023/poster/70953", "author_site": "Zangwei Zheng, Zangwei Zheng, Xiaozhe Ren, Fuzhao Xue, Yang Luo, Xin Jiang, Yang You", "tldr": "", "abstract": "Large language models (LLMs) have revolutionized the field of AI, demonstrating unprecedented capacity across various tasks. However, the inference process for LLMs comes with significant computational costs. In this paper, we propose an efficient LLM inference pipeline that harnesses the power of LLMs. Our approach begins by tapping into the potential of LLMs to accurately perceive and predict the response length with minimal overhead. By leveraging this information, we introduce an efficient sequence scheduling technique that groups queries with similar response lengths into micro-batches. We evaluate our approach on real-world instruction datasets using the LLaMA-based model, and our results demonstrate an impressive 86% improvement in inference throughput without compromising effectiveness. Notably, our method is orthogonal to other inference acceleration techniques, making it a valuable addition to many existing toolkits (e.g., FlashAttention, Quantization) for LLM inference.", "keywords": "large language models;inference optimization;batch processing", "primary_area": "", "supplementary_material": "/attachment/8b98860ea64b5545de815adfe5a4fb97fa9de06d.zip", "author": "Zangwei Zheng;Xiaozhe Ren;Fuzhao Xue;Yang Luo;Xin Jiang;Yang You", "authorids": "~Zangwei_Zheng1;~Xiaozhe_Ren1;~Fuzhao_Xue1;~Yang_Luo4;~Xin_Jiang1;~Yang_You1", "gender": "M;M;M;M;M;M", "homepage": "https://zhengzangw.github.io;;https://xuefuzhao.github.io/;https://yangluo7.github.io/;;https://www.comp.nus.edu.sg/~youy/", "dblp": "289/0376;248/7679.html;248/1245;;42/4142-2;33/8167-1.html", "google_scholar": "FTqutJEAAAAJ;https://scholar.google.com/citations?hl=en;JMHsqIkAAAAJ;-esBZacAAAAJ;DUfcez0AAAAJ;jF4dPZwAAAAJ", "orcid": "0000-0002-1505-1535;0000-0002-0432-5510;;0000-0002-2165-2679;0000-0002-9117-8247;", "linkedin": ";;fuzhao-xue-6410561a6/;;xin-jiang-9577b76/;yang-you-0b92914b/", "or_profile": "~Zangwei_Zheng1;~Xiaozhe_Ren1;~Fuzhao_Xue1;~Yang_Luo4;~Xin_Jiang1;~Yang_You1", "aff": "National University of Singapore;Noah's Ark Lab;National University of Singapore;National University of Singapore;Noah\u2019s Ark Lab, Huawei Technologies;National University of Singapore", "aff_domain": "nus.edu.sg;huawei.com;nus.edu.sg;nus.edu.sg;huawei.com;nus.edu.sg", "position": "PhD student;Researcher;PhD student;MS student;Principal Researcher;Professor", "bibtex": "@inproceedings{\nzheng2023response,\ntitle={Response Length Perception and Sequence Scheduling: An {LLM}-Empowered {LLM} Inference Pipeline},\nauthor={Zangwei Zheng and Xiaozhe Ren and Fuzhao Xue and Yang Luo and Xin Jiang and Yang You},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eW233GDOpm}\n}", "github": "", "project": "", "reviewers": "Nixu;u5zm;WTWP;KzBJ", "pdf_size": 327707, "rating": "6;6;6;6", "confidence": "4;3;5;4", "soundness": "3;3;3;3", "novelty": "3;3;3;2", "presentation": "3;2;3;3", "wc_summary": "63;165;45;90", "wc_strengths": "44;57;37;33", "wc_weaknesses": "86;215;11;11", "wc_questions": "6;36;36;81", "wc_limitations": "14;40;1;1", "wc_review": "213;513;130;216", "wc_reply_reviewers": "26;315;0;17", "wc_reply_authors": "11;733;0;10", "reply_reviewers": "1;3;0;1", "reply_authors": "2;3;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 90.75, 45.7622934302904 ], "wc_strengths_avg": [ 42.75, 9.12071817347735 ], "wc_weaknesses_avg": [ 80.75, 83.33779154741264 ], "wc_questions_avg": [ 39.75, 26.78035660703569 ], "wc_limitations_avg": [ 14.0, 15.921683328090658 ], "wc_review_avg": [ 268.0, 145.60048076843702 ], "wc_reply_reviewers_avg": [ 89.5, 130.52681716796744 ], "wc_reply_authors_avg": [ 188.5, 314.39664438412825 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 68, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3898304643668534610&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "nus.edu.sg;huawei.com;nus.edu.sg;nus.edu.sg;huawei.com;nus.edu.sg", "author_num": 6, "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "National University of Singapore;Noah's Ark Lab;Huawei", "aff_unique_dep": ";;Noah\u2019s Ark Lab", "aff_unique_url": "https://www.nus.edu.sg;;https://www.huawei.com", "aff_unique_abbr": "NUS;;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;2;0", "aff_country_unique": "Singapore;;China" }, { "title": "Practical and Asymptotically Exact Conditional Sampling in Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70952", "id": "eWKqr1zcRv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/63e8bc7bbf1cfea36d1d1b6538aecce5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eWKqr1zcRv", "openreview": "https://openreview.net/forum?id=eWKqr1zcRv", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70952", "video": "https://nips.cc/virtual/2023/poster/70952", "author_site": "Luhuan Wu, Brian Trippe, Christian Naesseth, David Blei, John Cunningham", "tldr": "", "abstract": "Diffusion models have been successful on a range of conditional generation tasks including molecular design and text-to-image generation. However, these achievements have primarily depended on task-specific conditional training or error-prone heuristic approximations. Ideally, a conditional generation method should provide exact samples for a broad range of conditional distributions without requiring task-specific training. To this end, we introduce the Twisted Diffusion Sampler, or TDS. TDS is a sequential Monte Carlo (SMC) algorithm that targets the conditional distributions of diffusion models through simulating a set of weighted particles. The main idea is to use twisting, an SMC technique that enjoys good computational efficiency, to incorporate heuristic approximations without compromising asymptotic exactness. We first find in simulation and in conditional image generation tasks that TDS provides a computational statistical trade-off, yielding more accurate approximations with many particles but with empirical improvements over heuristics with as few as two particles. We then turn to motif-scaffolding, a core task in protein design, using a TDS extension to Riemannian diffusion models; on benchmark tasks, TDS allows flexible conditioning criteria and often outperforms the state-of-the-art, conditionally trained model. Code can be found in https://github.com/blt2114/twisted_diffusion_sampler", "keywords": "diffusion models; conditional sampling; sequential monte carlo methods; generative models; protein design", "primary_area": "", "supplementary_material": "/attachment/099f6187ec75a7c9c87fd4a35d70f76e3019a991.pdf", "author": "Luhuan Wu;Brian L. Trippe;Christian A Naesseth;David Blei;John Patrick Cunningham", "authorids": "~Luhuan_Wu1;~Brian_L._Trippe1;~Christian_A_Naesseth1;~David_Blei2;~John_Patrick_Cunningham1", "gender": ";M;M;M;M", "homepage": ";https://naesseth.github.io/;http://www.cs.columbia.edu/~blei/;stat.columbia.edu/~cunningham;http://www.briantrippe.com", "dblp": "245/5016;146/0902;86/1910;51/4077;241/7223", "google_scholar": ";GQ6rOssAAAAJ;https://scholar.google.com.tw/citations?user=8OYE6iEAAAAJ;88cU_4UAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Luhuan_Wu1;~Christian_A_Naesseth1;~David_Blei2;~John_Patrick_Cunningham1;~Brian_Trippe1", "aff": "Columbia University;University of Amsterdam;Columbia University;Columbia University;Columbia University", "aff_domain": "columbia.edu;uva.nl;columbia.edu;columbia.edu;columbia.edu", "position": "PhD student;Assistant Professor;Full Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nwu2023practical,\ntitle={Practical and Asymptotically Exact Conditional Sampling in Diffusion Models},\nauthor={Luhuan Wu and Brian L. Trippe and Christian A Naesseth and John Patrick Cunningham and David Blei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eWKqr1zcRv}\n}", "github": "", "project": "", "reviewers": "bGjC;UG6i;8aww;BFYS", "pdf_size": 13035715, "rating": "5;5;5;6", "confidence": "4;4;4;4", "soundness": "3;2;4;3", "novelty": "3;2;3;3", "presentation": "3;2;3;4", "wc_summary": "53;96;172;223", "wc_strengths": "56;47;77;81", "wc_weaknesses": "196;481;132;363", "wc_questions": "2;215;654;184", "wc_limitations": "1;11;34;2", "wc_review": "308;850;1069;853", "wc_reply_reviewers": "0;166;108;44", "wc_reply_authors": "0;377;0;13", "reply_reviewers": "0;2;1;1", "reply_authors": "1;2;1;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 136.0, 65.86729082025464 ], "wc_strengths_avg": [ 65.25, 14.184057952504283 ], "wc_weaknesses_avg": [ 293.0, 137.45362854432037 ], "wc_questions_avg": [ 263.75, 239.55414314930977 ], "wc_limitations_avg": [ 12.0, 13.285330255586423 ], "wc_review_avg": [ 770.0, 281.1289739603515 ], "wc_reply_reviewers_avg": [ 79.5, 62.998015841770766 ], "wc_reply_authors_avg": [ 97.5, 161.45665052886486 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 72, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7773238697402347182&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "columbia.edu;uva.nl;columbia.edu;columbia.edu;columbia.edu", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Columbia University;University of Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.columbia.edu;https://www.uva.nl", "aff_unique_abbr": "Columbia;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Netherlands" }, { "title": "Flat Seeking Bayesian Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70951", "id": "eX6xDto3Ed", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/61f4e5747b1b753cb35546b15d981f76-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eX6xDto3Ed", "openreview": "https://openreview.net/forum?id=eX6xDto3Ed", "poster": "/media/PosterPDFs/NeurIPS%202023/70951.png?t=1701918426.0039544", "slides": "https://nips.cc/virtual/2023/poster/70951", "video": "https://nips.cc/virtual/2023/poster/70951", "author_site": "Van-Anh Nguyen, Tung-Long Vuong, Hoang Phan, Thanh-Toan Do, Dinh Phung, Trung Le", "tldr": "", "abstract": "Bayesian Neural Networks (BNNs) provide a probabilistic interpretation for deep learning models by imposing a prior distribution over model parameters and inferring a posterior distribution based on observed data. The model sampled from the posterior distribution can be used for providing ensemble predictions and quantifying prediction uncertainty. It is well-known that deep learning models with lower sharpness have better generalization ability. However, existing posterior inferences are not aware of sharpness/flatness in terms of formulation, possibly leading to high sharpness for the models sampled from them. In this paper, we develop theories, the Bayesian setting, and the variational inference approach for the sharpness-aware posterior. Specifically, the models sampled from our sharpness-aware posterior, and the optimal approximate posterior estimating this sharpness-aware posterior, have better flatness, hence possibly possessing higher generalization ability. We conduct experiments by leveraging the sharpness-aware posterior with state-of-the-art Bayesian Neural Networks, showing that the flat-seeking counterparts outperform their baselines in all metrics of interest.", "keywords": "Bayesian;sharpness-aware;posterior", "primary_area": "", "supplementary_material": "/attachment/a29de3b73c24551a78e67a9cc52c0778c9887274.pdf", "author": "Van-Anh Nguyen;Long Tung Vuong;Hoang Phan;Thanh-Toan Do;Dinh Phung;Trung Le", "authorids": "~Van-Anh_Nguyen1;~Long_Tung_Vuong1;~Hoang_Phan1;~Thanh-Toan_Do4;~Dinh_Phung2;~Trung_Le2", "gender": "F;M;;;;M", "homepage": ";;;;;", "dblp": ";329/6838;;;;", "google_scholar": "I5kuXKsAAAAJ;DCC657sAAAAJ;;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;", "linkedin": ";long-vuong-783477131/;;;;", "or_profile": "~Van-Anh_Nguyen1;~Long_Tung_Vuong1;~Hoang_Phan1;~Thanh-Toan_Do4;~Dinh_Phung2;~Trung_Le2", "aff": "Monash University;Monash University;;;;Monash University", "aff_domain": "monash.edu;monash.edu;;;;monash.edu", "position": "PhD student;PhD student;;;;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2023flat,\ntitle={Flat Seeking Bayesian Neural Networks},\nauthor={Van-Anh Nguyen and Long Tung Vuong and Hoang Phan and Thanh-Toan Do and Dinh Phung and Trung Le},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eX6xDto3Ed}\n}", "github": "", "project": "", "reviewers": "yGhH;VfFA;FrCB;hAuQ;P465", "pdf_size": 1151493, "rating": "5;5;6;6;6", "confidence": "2;3;3;4;2", "soundness": "3;2;3;3;3", "novelty": "3;2;3;2;3", "presentation": "1;2;2;3;2", "wc_summary": "102;91;69;97;90", "wc_strengths": "95;77;54;59;42", "wc_weaknesses": "392;47;74;85;29", "wc_questions": "1;39;127;129;10", "wc_limitations": "14;48;55;22;20", "wc_review": "604;302;379;392;191", "wc_reply_reviewers": "49;22;62;101;11", "wc_reply_authors": "31;31;26;323;19", "reply_reviewers": "1;1;1;2;1", "reply_authors": "2;2;2;4;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.0, 0.6324555320336759 ], "wc_summary_avg": [ 89.8, 11.267652816802618 ], "wc_strengths_avg": [ 65.4, 18.596773913773323 ], "wc_weaknesses_avg": [ 125.4, 134.75102968066702 ], "wc_questions_avg": [ 61.2, 55.97285056167856 ], "wc_limitations_avg": [ 31.8, 16.448708155961672 ], "wc_review_avg": [ 373.6, 135.588495087157 ], "wc_reply_reviewers_avg": [ 49.0, 31.767908335299634 ], "wc_reply_authors_avg": [ 86.0, 118.58161746240435 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.4, 0.8 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3273268353539885, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13062652736645363514&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 10, "email": "monash.edu;monash.edu;;;;monash.edu", "author_num": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Monash University", "aff_unique_dep": "", "aff_unique_url": "https://www.monash.edu", "aff_unique_abbr": "Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "Penguin: Parallel-Packed Homomorphic Encryption for Fast Graph Convolutional Network Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70950", "id": "eXubleMT0q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3cc685788a311fa35d8d41df93e288ca-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eXubleMT0q", "openreview": "https://openreview.net/forum?id=eXubleMT0q", "poster": "/media/PosterPDFs/NeurIPS%202023/70950.png?t=1701917576.6904879", "slides": "https://nips.cc/virtual/2023/poster/70950", "video": "https://nips.cc/virtual/2023/poster/70950", "author_site": "Ran Ran, Nuo Xu, Tao Liu, Wei Wang, Gang Quan, Wujie Wen", "tldr": "", "abstract": "The marriage of Graph Convolutional Network (GCN) and Homomorphic Encryption (HE) enables the inference of graph data on the cloud with significantly enhanced client data privacy. However, the tremendous computation and memory overhead associated with HE operations challenges the practicality of HE-based GCN inference. GCN inference involves a sequence of expensive matrix-matrix multiplications, and we observe that directly applying the state-of-the-art HE-based secure matrix-matrix multiplication solutions to accelerate HE-GCN inference is far less efficient as it does not exploit the unique aggregation mechanism of two-dimension graph node-features in GCN layer computation. \nAs a result, in this paper, we propose a novel HE-based ciphertext packing technique, i.e., Penguin, that can take advantage of the unique computation pattern during the HE-GCN inference to significantly reduce the computation and memory overhead associated with HE operations.\nSpecifically, Penguin employs (i) an effective two-dimension parallel packing technique for feature ciphertext with optimal graph node partitioning and graph feature interleaving, and (ii) an interleaved assembly technique that can effectively make use of the blank slots to merge ciphertexts after feature reduction and significantly reduce the costly rotation operation.\nWe provide theoretical analysis and experimental validation to demonstrate the speedup achieved by Penguin in accelerating GCN inference using popular GCN models and datasets. Our results show that Penguin can achieve up to $\\sim10\\times$ speedup and around $\\sim79$% reduction in computational memory overhead, significantly outperforming state-of-the-art solutions. To the best of our knowledge, this is the first work that can ensure the protection of both graph structure and features when accelerating HE-GCN inference on encrypted data. Our code is publicly available at https://github.com/ranran0523/Penguin.", "keywords": "Cryptographic inference;Graph Convolutional Network;Parallel Packing", "primary_area": "", "supplementary_material": "/attachment/37e6789929601cab76605d35e034e45ac6c7fd17.pdf", "author": "Ran Ran;Nuo Xu;Tao Liu;Wei Wang;Gang Quan;Wujie Wen", "authorids": "~Ran_Ran2;~Nuo_Xu3;~Tao_Liu3;~Wei_Wang70;~Gang_Quan1;~Wujie_Wen2", "gender": "M;;;M;M;M", "homepage": ";;;;;https://www.lehigh.edu/~wuw219/", "dblp": ";;;;53/5678.html;70/11466.html", "google_scholar": "zjgo17YAAAAJ;;;;xP-U9_YAAAAJ;QKQrD1wAAAAJ", "orcid": ";;;;;", "linkedin": "ranran0523/;xu-nuo-a6a3a6106/;;wei-wang-9b012026/;;", "or_profile": "~Ran_Ran2;~Nuo_Xu3;~Tao_Liu3;~Wei_Wang70;~Gang_Quan1;~Wujie_Wen2", "aff": "Lehigh University;Lehigh University;;;Florida International University;North Carolina State University", "aff_domain": "lehigh.edu;lehigh.edu;;;fiu.edu;ncsu.edu", "position": "PhD student;PhD student;;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nran2023penguin,\ntitle={Penguin: Parallel-Packed Homomorphic Encryption for Fast Graph Convolutional Network Inference},\nauthor={Ran Ran and Nuo Xu and Tao Liu and Wei Wang and Gang Quan and Wujie Wen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eXubleMT0q}\n}", "github": "", "project": "", "reviewers": "MKSk;idN7;RE11;pXg9", "pdf_size": 643622, "rating": "5;6;6;7", "confidence": "2;4;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "2;3;3;4", "wc_summary": "78;113;52;53", "wc_strengths": "74;125;59;50", "wc_weaknesses": "18;171;83;179", "wc_questions": "1;57;1;47", "wc_limitations": "18;1;20;5", "wc_review": "189;467;215;334", "wc_reply_reviewers": "0;0;0;61", "wc_reply_authors": "0;0;0;27", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.0, 24.809272460110556 ], "wc_strengths_avg": [ 77.0, 29.008619408720573 ], "wc_weaknesses_avg": [ 112.75, 66.4167712253464 ], "wc_questions_avg": [ 26.5, 25.743931323712 ], "wc_limitations_avg": [ 11.0, 8.154753215150045 ], "wc_review_avg": [ 301.25, 110.20974321719473 ], "wc_reply_reviewers_avg": [ 15.25, 26.413774815425377 ], "wc_reply_authors_avg": [ 6.75, 11.691342951089922 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14281218005617633571&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "lehigh.edu;lehigh.edu;;;fiu.edu;ncsu.edu", "author_num": 6, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Lehigh University;Florida International University;North Carolina State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.lehigh.edu;https://www.fiu.edu;https://www.ncsu.edu", "aff_unique_abbr": "Lehigh;FIU;NCSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unleash the Potential of Image Branch for Cross-modal 3D Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70949", "id": "eYCGrGdKf3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a1f0c0cd6caaa4863af5f12608edf63e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eYCGrGdKf3", "openreview": "https://openreview.net/forum?id=eYCGrGdKf3", "poster": "/media/PosterPDFs/NeurIPS%202023/70949.png?t=1698420854.8696966", "slides": "https://nips.cc/virtual/2023/poster/70949", "video": "https://nips.cc/virtual/2023/poster/70949", "author_site": "Yifan Zhang, Qijian Zhang, Junhui Hou, Yixuan Yuan, Guoliang Xing", "tldr": "", "abstract": "To achieve reliable and precise scene understanding, autonomous vehicles typically incorporate multiple sensing modalities to capitalize on their complementary attributes. However, existing cross-modal 3D detectors do not fully utilize the image domain information to address the bottleneck issues of the LiDAR-based detectors. This paper presents a new cross-modal 3D object detector, namely UPIDet, which aims to unleash the potential of the image branch from two aspects. First, UPIDet introduces a new 2D auxiliary task called normalized local coordinate map estimation. This approach enables the learning of local spatial-aware features from the image modality to supplement sparse point clouds. Second, we discover that the representational capability of the point cloud backbone can be enhanced through the gradients backpropagated from the training objectives of the image branch, utilizing a succinct and effective point-to-pixel module. Extensive experiments and ablation studies validate the effectiveness of our method. Notably, we achieved the top rank in the highly competitive cyclist class of the KITTI benchmark at the time of submission. The source code is available at https://github.com/Eaphan/UPIDet.", "keywords": "3D object detection;3D point cloud", "primary_area": "", "supplementary_material": "/attachment/3564f415b923b2978726f51063abc9a0e899e224.zip", "author": "Yifan Zhang;Qijian Zhang;Junhui Hou;Yixuan Yuan;Guoliang Xing", "authorids": "~Yifan_Zhang15;~Qijian_Zhang1;~Junhui_Hou2;~Yixuan_Yuan2;~Guoliang_Xing2", "gender": "M;M;M;F;Not Specified", "homepage": "https://github.com/Eaphan;https://keeganhk.github.io/;http://www.cityu.edu.hk/stfprofile/csjhhou.htm;http://www.ee.cityu.edu.hk/~yxyuan/;https://www.ie.cuhk.edu.hk/people/glxing.shtml", "dblp": ";201/6850.html;122/2673.html;36/9220;63/4542", "google_scholar": ";4NIiTYgAAAAJ;j6eefhwAAAAJ;https://scholar.google.com.au/citations?hl=en;", "orcid": "0000-0003-0958-9934;0000-0003-4723-6136;0000-0003-3431-2021;;0000-0003-1772-7751", "linkedin": ";;;;", "or_profile": "~Yifan_Zhang15;~Qijian_Zhang1;~Junhui_Hou2;~Yixuan_Yuan2;~Guoliang_Xing2", "aff": "City University of Hong Kong;City University of Hong Kong;City University of Hong Kong;The Chinese University of Hong Kong;Chinese University of Hong Kong", "aff_domain": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;cuhk.edu.hk;cuhk.hk", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023unleash,\ntitle={Unleash the Potential of Image Branch for Cross-modal 3D Object Detection},\nauthor={Yifan Zhang and Qijian Zhang and Junhui Hou and Yixuan Yuan and Guoliang Xing},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eYCGrGdKf3}\n}", "github": "", "project": "", "reviewers": "s1rE;DmuL;5jwD;bgiL;gAXu", "pdf_size": 8518621, "rating": "4;5;5;5;7", "confidence": "4;4;4;5;5", "soundness": "3;2;3;3;4", "novelty": "2;2;3;2;4", "presentation": "3;2;3;3;4", "wc_summary": "71;152;37;110;56", "wc_strengths": "44;86;16;70;83", "wc_weaknesses": "101;85;58;63;216", "wc_questions": "53;1;85;34;37", "wc_limitations": "7;1;20;7;10", "wc_review": "276;325;216;284;402", "wc_reply_reviewers": "45;76;0;13;65", "wc_reply_authors": "295;326;70;70;48", "reply_reviewers": "1;2;0;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 85.2, 41.121284026644894 ], "wc_strengths_avg": [ 59.8, 26.445415481704952 ], "wc_weaknesses_avg": [ 104.6, 57.80864987179687 ], "wc_questions_avg": [ 42.0, 27.349588662354687 ], "wc_limitations_avg": [ 9.0, 6.2289646009589745 ], "wc_review_avg": [ 300.6, 61.50642242888136 ], "wc_reply_reviewers_avg": [ 39.8, 29.239699040858817 ], "wc_reply_authors_avg": [ 161.8, 122.07276518535984 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6666666666666666, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9895845199146732660&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;cuhk.edu.hk;cuhk.hk", "author_num": 5, "aff_unique_index": "0;0;0;1;1", "aff_unique_norm": "City University of Hong Kong;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.cityu.edu.hk;https://www.cuhk.edu.hk", "aff_unique_abbr": "CityU;CUHK", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Graph-Structured Gaussian Processes for Transferable Graph Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70948", "id": "eZbqD9BoXe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9f7f2f57d8eaf44b2f09020f64ff6d96-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eZbqD9BoXe", "openreview": "https://openreview.net/forum?id=eZbqD9BoXe", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70948", "video": "https://nips.cc/virtual/2023/poster/70948", "author_site": "Jun Wu, Lisa Ainsworth, Andrew Leakey, Haixun Wang, Jingrui He", "tldr": "", "abstract": "Transferable graph learning involves knowledge transferability from a source graph to a relevant target graph. The major challenge of transferable graph learning is the distribution shift between source and target graphs induced by individual node attributes and complex graph structures. To solve this problem, in this paper, we propose a generic graph-structured Gaussian process framework (GraphGP) for adaptively transferring knowledge across graphs with either homophily or heterophily assumptions. Specifically, GraphGP is derived from a novel graph structure-aware neural network in the limit on the layer width. The generalization analysis of GraphGP explicitly investigates the connection between knowledge transferability and graph domain similarity. Extensive experiments on several transferable graph learning benchmarks demonstrate the efficacy of GraphGP over state-of-the-art Gaussian process baselines.", "keywords": "graph learning;transfer learning;Gaussian process", "primary_area": "", "supplementary_material": "", "author": "Jun Wu;Lisa Ainsworth;Andrew Leakey;Haixun Wang;Jingrui He", "authorids": "~Jun_Wu3;~Lisa_Ainsworth1;leakey@illinois.edu;~Haixun_Wang2;~Jingrui_He1", "gender": "M;F;;M;F", "homepage": "https://junwu6.github.io/;;;https://haixun.github.io/;https://www.hejingrui.org", "dblp": "20/3894-19.html;;;;34/2685", "google_scholar": "TZXUS-oAAAAJ;ZMeBF7IAAAAJ;;Q1mcglAAAAAJ;hXpZynkAAAAJ", "orcid": "0000-0002-1512-524X;;;0000-0002-1378-4241;0000-0002-6429-6272", "linkedin": "jun-wu-08a962176/;;;haixun/;", "or_profile": "~Jun_Wu3;~Lisa_Ainsworth1;leakey@illinois.edu;~Haixun_Wang2;~Jingrui_He1", "aff": "University of Illinois, Urbana Champaign;;;Instacart;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;;;instacart.com;illinois.edu", "position": "PhD student;;;VP Engineering, Distinguished Scientist;Associate Professor", "bibtex": "@inproceedings{\nwu2023graphstructured,\ntitle={Graph-Structured Gaussian Processes for Transferable Graph Learning},\nauthor={Jun Wu and Lisa Ainsworth and Andrew Leakey and Haixun Wang and Jingrui He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eZbqD9BoXe}\n}", "github": "", "project": "", "reviewers": "heNY;e15N;pydS;V7Dg", "pdf_size": 975261, "rating": "4;5;6;6", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "2;2;3;2", "presentation": "1;3;3;3", "wc_summary": "61;65;82;66", "wc_strengths": "21;48;47;51", "wc_weaknesses": "45;101;116;174", "wc_questions": "241;75;8;2", "wc_limitations": "1;1;12;33", "wc_review": "369;290;265;326", "wc_reply_reviewers": "0;22;21;24", "wc_reply_authors": "182;15;76;21", "reply_reviewers": "0;1;1;1", "reply_authors": "3;2;3;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 68.5, 8.0156097709407 ], "wc_strengths_avg": [ 41.75, 12.07010770457331 ], "wc_weaknesses_avg": [ 109.0, 45.91840589567543 ], "wc_questions_avg": [ 81.5, 96.44298834026246 ], "wc_limitations_avg": [ 11.75, 13.06474263045392 ], "wc_review_avg": [ 312.5, 39.16950344336777 ], "wc_reply_reviewers_avg": [ 16.75, 9.730750228014282 ], "wc_reply_authors_avg": [ 73.5, 67.00186564566691 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14577774201319817475&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "illinois.edu;;;instacart.com;illinois.edu", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;Instacart", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.instacart.com", "aff_unique_abbr": "UIUC;Instacart", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Semantic HELM: A Human-Readable Memory for Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/73882", "id": "ebMPmx5mr7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1eeacdf8770e6dd5164cdeec8bcfa8cc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ebMPmx5mr7", "openreview": "https://openreview.net/forum?id=ebMPmx5mr7", "poster": "/media/PosterPDFs/NeurIPS%202023/73882.png?t=1698236778.0593483", "slides": "https://nips.cc/virtual/2023/poster/73882", "video": "https://nips.cc/virtual/2023/poster/73882", "author_site": "Fabian Paischer, Thomas Adler, Markus Hofmarcher, Sepp Hochreiter", "tldr": "", "abstract": "Reinforcement learning agents deployed in the real world often have to cope with partially observable environments. \nTherefore, most agents employ memory mechanisms to approximate the state of the environment. \nRecently, there have been impressive success stories in mastering partially observable environments, mostly in the realm of computer games like Dota 2, StarCraft II, or MineCraft. \nHowever, existing methods lack interpretability in the sense that it is not comprehensible for humans what the agent stores in its memory.\nIn this regard, we propose a novel memory mechanism that represents past events in human language.\nOur method uses CLIP to associate visual inputs with language tokens. \nThen we feed these tokens to a pretrained language model that serves the agent as memory and provides it with a coherent and human-readable representation of the past.\nWe train our memory mechanism on a set of partially observable environments and find that it excels on tasks that require a memory component, while mostly attaining performance on-par with strong baselines on tasks that do not. \nOn a challenging continuous recognition task, where memorizing the past is crucial, our memory mechanism converges two orders of magnitude faster than prior methods.\nSince our memory mechanism is human-readable, we can peek at an agent's memory and check whether crucial pieces of information have been stored.\nThis significantly enhances troubleshooting and paves the way toward more interpretable agents.", "keywords": "Reinforcement Learning;Language Models;History Compression;Partial Observability;Foundation Models;Interpretability;Explainable AI", "primary_area": "", "supplementary_material": "", "author": "Fabian Paischer;Thomas Adler;Markus Hofmarcher;Sepp Hochreiter", "authorids": "~Fabian_Paischer1;~Thomas_Adler1;~Markus_Hofmarcher1;~Sepp_Hochreiter1", "gender": "M;M;M;M", "homepage": ";;;https://www.jku.at/en/institute-for-machine-learning/about-us/team/sepp-hochreiter/", "dblp": "309/5971;250/9175;224/9960;h/SeppHochreiter.html", "google_scholar": "zdm5ZKwAAAAJ;R6p_vo4AAAAJ;FD27EMIAAAAJ;https://scholar.google.at/citations?user=tvUH3WMAAAAJ", "orcid": ";;;0000-0001-7449-2528", "linkedin": ";;;https://linkedin.com/in/sepp-hochreiter-41514846", "or_profile": "~Fabian_Paischer1;~Thomas_Adler1;~Markus_Hofmarcher1;~Sepp_Hochreiter1", "aff": "University College London, University of London;Johannes Kepler University Linz;Johannes Kepler Universit\u00e4t Linz;Johannes Kepler University Linz", "aff_domain": "ucl.ac.uk;jku.at;jku.at;jku.at", "position": "Researcher;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\npaischer2023semantic,\ntitle={Semantic {HELM}: A Human-Readable Memory for Reinforcement Learning},\nauthor={Fabian Paischer and Thomas Adler and Markus Hofmarcher and Sepp Hochreiter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ebMPmx5mr7}\n}", "github": "", "project": "", "reviewers": "RcPE;FbDD;UimZ;xnPR;f2kZ;a9Pk", "pdf_size": 8958167, "rating": "4;4;4;6;7;7", "confidence": "5;4;4;3;4;4", "soundness": "3;2;3;2;3;4", "novelty": "2;3;2;2;3;4", "presentation": "3;3;3;3;2;4", "wc_summary": "51;92;57;110;53;78", "wc_strengths": "58;97;40;151;93;136", "wc_weaknesses": "160;332;470;466;283;163", "wc_questions": "6;2;8;58;44;6", "wc_limitations": "31;2;31;39;77;17", "wc_review": "306;525;606;824;550;400", "wc_reply_reviewers": "0;0;0;186;29;22", "wc_reply_authors": "0;0;0;155;0;0", "reply_reviewers": "0;0;0;2;1;1", "reply_authors": "1;1;1;2;1;1", "rating_avg": [ 5.333333333333333, 1.3743685418725535 ], "confidence_avg": [ 4.0, 0.5773502691896257 ], "soundness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "novelty_avg": [ 2.6666666666666665, 0.7453559924999298 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 73.5, 21.96019125599775 ], "wc_strengths_avg": [ 95.83333333333333, 39.18935513064173 ], "wc_weaknesses_avg": [ 312.3333333333333, 125.97971618037917 ], "wc_questions_avg": [ 20.666666666666668, 21.89875694087579 ], "wc_limitations_avg": [ 32.833333333333336, 23.08258121518379 ], "wc_review_avg": [ 535.1666666666666, 163.0709627398112 ], "wc_reply_reviewers_avg": [ 39.5, 66.53257347595486 ], "wc_reply_authors_avg": [ 25.833333333333332, 57.76508941874456 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.74535599249993 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.42008402520840293, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=304798728035859637&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ucl.ac.uk;jku.at;jku.at;jku.at", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University College London;Johannes Kepler University;Johannes Kepler University Linz", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.jku.at;https://www.jku.at", "aff_unique_abbr": "UCL;JKU;JKU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Linz", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United Kingdom;Austria" }, { "title": "DiffVL: Scaling Up Soft Body Manipulation using Vision-Language Driven Differentiable Physics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70947", "id": "ecRaDicXxw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5f5f7b6080dcadced61cf5d96f7c6dde-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ecRaDicXxw", "openreview": "https://openreview.net/forum?id=ecRaDicXxw", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70947", "video": "https://nips.cc/virtual/2023/poster/70947", "author_site": "Zhiao Huang, Feng Chen, Yewen Pu, Chunru Lin, Hao Su, Chuang Gan", "tldr": "", "abstract": "Combining gradient-based trajectory optimization with differentiable physics simulation is an efficient technique for solving soft-body manipulation problems.\nUsing a well-crafted optimization objective, the solver can quickly converge onto a valid trajectory.\nHowever, writing the appropriate objective functions requires expert knowledge, making it difficult to collect a large set of naturalistic problems from non-expert users.\nWe introduce DiffVL, a method that enables non-expert users to communicate soft-body manipulation tasks -- a combination of vision and natural language, given in multiple stages -- that can be readily leveraged by a differential physics solver. \nWe have developed GUI tools that enable non-expert users to specify 100 tasks inspired by real-life soft-body manipulations from online videos, which we'll make public.\nWe leverage large language models to translate task descriptions into machine-interpretable optimization objectives. The optimization objectives can help differentiable physics solvers to solve these long-horizon multistage tasks that are challenging for previous baselines.", "keywords": "Differentiable physics; Soft body manipulation", "primary_area": "", "supplementary_material": "/attachment/b976352ee8d3fc59920e0f9fdfd79d891a655840.pdf", "author": "Zhiao Huang;Feng Chen;Yewen Pu;Chunru Lin;Hao Su;Chuang Gan", "authorids": "~Zhiao_Huang1;~Feng_Chen16;~Yewen_Pu1;~Chunru_Lin1;~Hao_Su1;~Chuang_Gan1", "gender": "M;M;M;F;M;M", "homepage": ";https://winniechen2002.github.io/;http://www.mit.edu/~yewenpu;https://xhrlyb.github.io;http://ai.ucsd.edu/~haosu;http://people.csail.mit.edu/ganchuang/", "dblp": "172/1410;;53/10322;324/5212;09/4945-1;139/6993", "google_scholar": ";xuVkkKwAAAAJ;LJnNKXMAAAAJ;PTYVWdIAAAAJ;1P8Zu04AAAAJ;PTeSCbIAAAAJ", "orcid": ";;;;;", "linkedin": ";https://linkedin.com/in/\u67ab-\u9648-822809265;;;;", "or_profile": "~Zhiao_Huang1;~Feng_Chen16;~Yewen_Pu1;~Chunru_Lin1;~Hao_Su1;~Chuang_Gan1", "aff": "University of California, San Diego, University of California, San Diego;IIIS, Tsinghua University;Autodesk;Shanghai Jiaotong University;University of California, San Diego;MIT-IBM Watson AI Lab", "aff_domain": "eng.ucsd.edu;mails.tsinghua.edu.cn;autodesk.com;sjtu.edu.cn;ucsd.edu;ibm.com", "position": "PhD student;Undergrad student;Principal Researcher;Undergrad student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nhuang2023diffvl,\ntitle={Diff{VL}: Scaling Up Soft Body Manipulation using Vision-Language Driven Differentiable Physics},\nauthor={Zhiao Huang and Feng Chen and Yewen Pu and Chunru Lin and Hao Su and Chuang Gan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ecRaDicXxw}\n}", "github": "", "project": "", "reviewers": "kids;C43D;EcLH;Scwt;xC3q", "pdf_size": 19386513, "rating": "4;6;6;6;8", "confidence": "4;3;4;4;5", "soundness": "3;3;3;4;4", "novelty": "2;3;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "47;57;88;39;155", "wc_strengths": "113;44;91;31;188", "wc_weaknesses": "435;83;99;43;80", "wc_questions": "11;131;18;26;41", "wc_limitations": "42;14;4;8;5", "wc_review": "648;329;300;147;469", "wc_reply_reviewers": "0;27;9;10;26", "wc_reply_authors": "200;50;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "5;2;1;1;1", "rating_avg": [ 6.0, 1.2649110640673518 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 77.2, 42.30555519077843 ], "wc_strengths_avg": [ 93.4, 55.98785582606285 ], "wc_weaknesses_avg": [ 148.0, 144.66789554009554 ], "wc_questions_avg": [ 45.4, 43.94815126942202 ], "wc_limitations_avg": [ 14.6, 14.136477637657833 ], "wc_review_avg": [ 378.6, 169.1302456688336 ], "wc_reply_reviewers_avg": [ 14.4, 10.480458005259122 ], "wc_reply_authors_avg": [ 50.0, 77.45966692414834 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 1.5491933384829668 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.49999999999999994, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15505969112546448605&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "eng.ucsd.edu;mails.tsinghua.edu.cn;autodesk.com;sjtu.edu.cn;ucsd.edu;ibm.com", "author_num": 6, "aff_unique_index": "0;1;2;3;0;4", "aff_unique_norm": "University of California, San Diego;Tsinghua University;Autodesk;Shanghai Jiao Tong University;Massachusetts Institute of Technology", "aff_unique_dep": ";Institute for Interdisciplinary Information Sciences;;;IBM Watson AI Lab", "aff_unique_url": "https://www.ucsd.edu;https://www.tsinghua.edu.cn;https://www.autodesk.com;https://www.sjtu.edu.cn;https://www.mitibmwatsonailab.org", "aff_unique_abbr": "UCSD;THU;Autodesk;SJTU;MIT-IBM AI Lab", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;1;0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Learning and processing the ordinal information of temporal sequences in recurrent neural circuits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70946", "id": "eeeqORvJbf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6b241c515433caae3051266668d808b7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eeeqORvJbf", "openreview": "https://openreview.net/forum?id=eeeqORvJbf", "poster": "/media/PosterPDFs/NeurIPS%202023/70946.png?t=1701762073.2643018", "slides": "https://nips.cc/virtual/2023/poster/70946", "video": "https://nips.cc/virtual/2023/poster/70946", "author_site": "xiaolong zou, Zhikun Chu, Qinghai Guo, Jie Cheng, Bo Ho, Si Wu, Yuanyuan Mi", "tldr": "", "abstract": "Temporal sequence processing is fundamental in brain cognitive functions. \nExperimental data has indicated that the representations of ordinal information and contents of temporal sequences are disentangled in the brain, but the neural mechanism underlying this disentanglement remains largely unclear. Here, we investigate how recurrent neural circuits learn to represent the abstract order structure of temporal sequences, and how this disentangled representation of order structure from that of contents facilitates the processing of temporal sequences. We show that with an appropriate learn protocol, a recurrent neural circuit can learn a set of tree-structured attractor states to encode the corresponding tree-structured orders of given temporal sequences. This abstract temporal order template can then be bound with different contents, allowing for flexible and robust temporal sequence processing. Using a transfer learning task, we demonstrate that the reuse of a temporal order template facilitates the acquisition of new temporal sequences of the same or similar ordinal structure. Using a key-word spotting task, we demonstrate that the attractor representation of order structure improves the robustness of temporal sequence discrimination, if the ordinal information is the key to differentiate different sequences. We hope this study gives us insights into the neural mechanism of representing the ordinal information of temporal sequences in the brain, and helps us to develop brain-inspired temporal sequence processing algorithms.", "keywords": "temporal sequence processing;temporal order structure;tree-structured attractor", "primary_area": "", "supplementary_material": "/attachment/04f30b1695bd8dccba70f7ee710da47e84389ab7.zip", "author": "Xiaolong Zou;Zhikun Chu;Qinghai Guo;Jie Cheng;Bo Hong;Si Wu;Yuanyuan Mi", "authorids": "~Xiaolong_Zou1;~Zhikun_Chu1;~Qinghai_Guo1;~Jie_Cheng2;~Bo_Hong2;~Si_Wu1;~Yuanyuan_Mi1", "gender": "M;;M;;M;M;F", "homepage": "https://www.researchgate.net/profile/Ben_Zou2?ev=hdr_xprf&_sg=4_qbmTPFzK47T60qCvL9GWs71qZNjrkYWpvY4BV6W595esmy6xa1AXjbRoS2P3fk-Tb_-1RYvjTh_jz-ZgYhLfn-;;https://www.semanticscholar.org/author/Qinghai-Guo/47747957;;http://neuro.med.tsinghua.edu.cn;https://mgv.pku.edu.cn/english/people/lbd/soeeace/267528.htm;", "dblp": "135/8911;;12/8502;90/1457-3;30/6939;25/437-1;48/9864", "google_scholar": ";;;2u88ffsAAAAJ;qSIysB4AAAAJ;;", "orcid": ";;0000-0003-4697-9464;0000-0003-0486-2321;0000-0003-2900-6791;;0000-0002-4156-5089", "linkedin": ";;;;;;", "or_profile": "~Xiaolong_Zou1;~Zhikun_Chu1;~Qinghai_Guo1;~Jie_Cheng2;~Bo_Hong2;~Si_Wu1;~Yuanyuan_Mi1", "aff": "Peking University;;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Department of Biomedical Engineering, Tsinghua University;Peking University;Chongqing University", "aff_domain": "pku.edu.cn;;huawei.com;huawei.com;tsinghua.edu.cn;pku.edu.cn;cqu.edu.cn", "position": "Postdoc;;Researcher;Principal Researcher;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzou2023learning,\ntitle={Learning and processing the ordinal information of temporal sequences in recurrent neural circuits},\nauthor={Xiaolong Zou and Zhikun Chu and Qinghai Guo and Jie Cheng and Bo Hong and Si Wu and Yuanyuan Mi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eeeqORvJbf}\n}", "github": "", "project": "", "reviewers": "tPSE;7Qop;CeTG;dJ2Y", "pdf_size": 7313689, "rating": "4;5;6;6", "confidence": "4;4;4;3", "soundness": "3;4;3;3", "novelty": "2;4;2;2", "presentation": "4;3;2;3", "wc_summary": "137;211;72;107", "wc_strengths": "5;52;50;95", "wc_weaknesses": "49;288;249;195", "wc_questions": "154;2;22;105", "wc_limitations": "1;39;4;22", "wc_review": "346;592;397;524", "wc_reply_reviewers": "8;394;16;13", "wc_reply_authors": "0;412;27;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;3;2;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 131.75, 51.21218116815569 ], "wc_strengths_avg": [ 50.5, 31.83158808479401 ], "wc_weaknesses_avg": [ 195.25, 90.66524968255479 ], "wc_questions_avg": [ 70.75, 61.65782594285984 ], "wc_limitations_avg": [ 16.5, 15.272524349301264 ], "wc_review_avg": [ 464.75, 97.97289165886653 ], "wc_reply_reviewers_avg": [ 107.75, 165.29122027500432 ], "wc_reply_authors_avg": [ 109.75, 174.8519016196278 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ead9-PVGsjAJ:scholar.google.com/&scioq=Learning+and+processing+the+ordinal+information+of+temporal+sequences+in+recurrent+neural+circuits&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "pku.edu.cn;;huawei.com;huawei.com;tsinghua.edu.cn;pku.edu.cn;cqu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;2;0;3", "aff_unique_norm": "Peking University;Huawei;Tsinghua University;Chongqing University", "aff_unique_dep": ";Huawei Technologies;Department of Biomedical Engineering;", "aff_unique_url": "http://www.pku.edu.cn;https://www.huawei.com;https://www.tsinghua.edu.cn;https://www.cqu.edu.cn", "aff_unique_abbr": "Peking U;Huawei;THU;CQU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Resilient Multiple Choice Learning: A learned scoring scheme with application to audio scene analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70945", "id": "eibTaY6qGI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/12d7ba753894ed348904df1bf0ce02ec-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eibTaY6qGI", "openreview": "https://openreview.net/forum?id=eibTaY6qGI", "poster": "/media/PosterPDFs/NeurIPS%202023/70945.png?t=1700131242.576868", "slides": "https://nips.cc/virtual/2023/poster/70945", "video": "https://nips.cc/virtual/2023/poster/70945", "author_site": "Victor Letzelter, Mathieu Fontaine, Mickael Chen, Patrick P\u00e9rez, Slim Essid, Ga\u00ebl Richard", "tldr": "", "abstract": "We introduce Resilient Multiple Choice Learning (rMCL), an extension of the MCL approach for conditional distribution estimation in regression settings where multiple targets may be sampled for each training input.\nMultiple Choice Learning is a simple framework to tackle multimodal density estimation, using the Winner-Takes-All (WTA) loss for a set of hypotheses. In regression settings, the existing MCL variants focus on merging the hypotheses, thereby eventually sacrificing the diversity of the predictions. In contrast, our method relies on a novel learned scoring scheme underpinned by a mathematical framework based on Voronoi tessellations of the output space, from which we can derive a probabilistic interpretation.\nAfter empirically validating rMCL with experiments on synthetic data, we further assess its merits on the sound source localization problem, demonstrating its practical usefulness and the relevance of its interpretation.", "keywords": "Multiple Choice Learning;Audio processing.", "primary_area": "", "supplementary_material": "/attachment/f5a190a3a8e7b6f233c5e543e064e6a79963a9dd.zip", "author": "Victor Letzelter;Mathieu Fontaine;Mickael Chen;Patrick Perez;Slim Essid;Ga\u00ebl Richard", "authorids": "~Victor_Letzelter1;mathieu.fontaine@telecom-paris.fr;~Mickael_Chen1;~Patrick_Perez1;~Slim_Essid1;~Ga\u00ebl_Richard1", "gender": ";;M;;Not Specified;M", "homepage": "https://victorletzelter.github.io;;https://sites.google.com/view/mickaelchen/home;;https://perso.telecom-paris.fr/essid/;https://perso.telecom-paristech.fr/grichard/", "dblp": "360/0588;;190/7274;;53/6904;34/1310", "google_scholar": "https://scholar.google.fr/citations?user=YhTdZh8AAAAJ;;https://scholar.google.fr/citations?user=QnRpMJAAAAAJ;;5dP_Pv0AAAAJ;https://scholar.google.fr/citations?user=xn70tPIAAAAJ", "orcid": ";;;;;", "linkedin": "victor-letzelter-3b832219b;;mickael-chen-ml/;;;", "or_profile": "~Victor_Letzelter1;mathieu.fontaine@telecom-paris.fr;~Mickael_Chen1;~Patrick_Perez1;~Slim_Essid1;~Ga\u00ebl_Richard1", "aff": "T\u00e9l\u00e9com ParisTech;;Valeo;;T\u00e9l\u00e9com ParisTech;Telecom Paris", "aff_domain": "telecom-paristech.fr;;valeo.com;;telecom-paristech.fr;telecom-paris.fr", "position": "PhD student;;Researcher;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nletzelter2023resilient,\ntitle={Resilient Multiple Choice Learning: A learned scoring scheme with application to audio scene analysis},\nauthor={Victor Letzelter and Mathieu Fontaine and Mickael Chen and Patrick Perez and Slim Essid and Ga{\\\"e}l Richard},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eibTaY6qGI}\n}", "github": "", "project": "", "reviewers": "3WKf;YyHX;yLZS", "pdf_size": 3243763, "rating": "6;6;6", "confidence": "1;5;3", "soundness": "3;3;3", "novelty": "3;3;2", "presentation": "2;3;3", "wc_summary": "169;139;144", "wc_strengths": "50;183;89", "wc_weaknesses": "78;127;107", "wc_questions": "2;115;129", "wc_limitations": "9;110;10", "wc_review": "308;674;479", "wc_reply_reviewers": "0;0;20", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 1.632993161855452 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 150.66666666666666, 13.123346456686352 ], "wc_strengths_avg": [ 107.33333333333333, 55.82313339666829 ], "wc_weaknesses_avg": [ 104.0, 20.11632835948615 ], "wc_questions_avg": [ 82.0, 56.85654462475421 ], "wc_limitations_avg": [ 43.0, 47.37791327893902 ], "wc_review_avg": [ 487.0, 149.5259174858994 ], "wc_reply_reviewers_avg": [ 6.666666666666667, 9.428090415820632 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13113954438501162183&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 15, "email": "telecom-paristech.fr;;valeo.com;;telecom-paristech.fr;telecom-paris.fr", "author_num": 6, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "T\u00e9l\u00e9com ParisTech;Valeo;Telecom Paris", "aff_unique_dep": ";;", "aff_unique_url": "https://www.telecom-paristech.fr;https://www.valeo.com;https://www.telecom-paris.fr", "aff_unique_abbr": "TP;;Telecom Paris", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Simultaneous embedding of multiple attractor manifolds in a recurrent neural network using constrained gradient optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70944", "id": "ekMLUoC2sq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/361e5112d2eca09513bbd266e4b2d2be-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ekMLUoC2sq", "openreview": "https://openreview.net/forum?id=ekMLUoC2sq", "poster": "/media/PosterPDFs/NeurIPS%202023/70944.png?t=1702169820.9300294", "slides": "https://nips.cc/virtual/2023/poster/70944", "video": "https://nips.cc/virtual/2023/poster/70944", "author_site": "Haggai Agmon, Yoram Burak", "tldr": "", "abstract": "The storage of continuous variables in working memory is hypothesized to be sustained in the brain by the dynamics of recurrent neural networks (RNNs) whose steady states form continuous manifolds. In some cases, it is thought that the synaptic connectivity supports multiple attractor manifolds, each mapped to a different context or task. For example, in hippocampal area CA3, positions in distinct environments are represented by distinct sets of population activity patterns, each forming a continuum. It has been argued that the embedding of multiple continuous attractors in a single RNN inevitably causes detrimental interference: quenched noise in the synaptic connectivity disrupts the continuity of each attractor, replacing it by a discrete set of steady states that can be conceptualized as lying on local minima of an abstract energy landscape. Consequently, population activity patterns exhibit systematic drifts towards one of these discrete minima, thereby degrading the stored memory over time. Here we show that it is possible to dramatically attenuate these detrimental interference effects by adjusting the synaptic weights. Synaptic weight adjustment are derived from a loss function that quantifies the roughness of the energy landscape along each of the embedded attractor manifolds. By minimizing this loss function, the stability of states can be dramatically improved, without compromising the capacity.", "keywords": "Theoretical Neuroscience;Computational Neuroscience;Recurrent Neural Networks;Attractor models", "primary_area": "", "supplementary_material": "/attachment/ac55dd49d7b0615d488fb96612fc0a7b5d8d85e4.pdf", "author": "Haggai Agmon;Yoram Burak", "authorids": "~Haggai_Agmon1;~Yoram_Burak1", "gender": ";M", "homepage": "https://profiles.stanford.edu/haggai-agmon;https://buraklab.me", "dblp": ";", "google_scholar": ";", "orcid": "0000-0002-7212-9052;", "linkedin": ";", "or_profile": "~Haggai_Agmon1;~Yoram_Burak1", "aff": "Stanford University;Hebrew University of Jerusalem", "aff_domain": "stanford.edu;huji.ac.il", "position": "Postdoc;Associate Professor", "bibtex": "@inproceedings{\nagmon2023simultaneous,\ntitle={Simultaneous embedding of multiple attractor manifolds in a recurrent neural network using constrained gradient optimization},\nauthor={Haggai Agmon and Yoram Burak},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ekMLUoC2sq}\n}", "github": "", "project": "", "reviewers": "vC81;NH89;MLg2;fsQk;m6Fs", "pdf_size": 3401411, "rating": "4;5;7;7;8", "confidence": "4;4;2;4;3", "soundness": "3;3;3;4;4", "novelty": "2;2;4;3;4", "presentation": "3;2;3;4;4", "wc_summary": "90;76;110;168;70", "wc_strengths": "15;41;31;33;49", "wc_weaknesses": "141;111;90;83;40", "wc_questions": "29;242;127;59;160", "wc_limitations": "4;4;15;1;47", "wc_review": "279;474;373;344;366", "wc_reply_reviewers": "42;153;18;53;195", "wc_reply_authors": "669;808;0;22;18", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 6.2, 1.469693845669907 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 102.8, 35.38587288735435 ], "wc_strengths_avg": [ 33.8, 11.356055653262711 ], "wc_weaknesses_avg": [ 93.0, 33.30465432938766 ], "wc_questions_avg": [ 123.4, 75.46813897268171 ], "wc_limitations_avg": [ 14.2, 17.08098357823694 ], "wc_review_avg": [ 367.2, 62.86302569873645 ], "wc_reply_reviewers_avg": [ 92.2, 69.03158697292132 ], "wc_reply_authors_avg": [ 303.4, 358.04334933077587 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5783517448238059, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9707893544423738352&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "stanford.edu;huji.ac.il", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Stanford University;Hebrew University of Jerusalem", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.huji.ac.il", "aff_unique_abbr": "Stanford;HUJI", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Stanford;Jerusalem", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Israel" }, { "title": "LaFTer: Label-Free Tuning of Zero-shot Classifier using Language and Unlabeled Image Collections", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70943", "id": "elPtHcfjpH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/123a18dfd821c8b440f42a00a27648d6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=elPtHcfjpH", "openreview": "https://openreview.net/forum?id=elPtHcfjpH", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70943", "video": "https://nips.cc/virtual/2023/poster/70943", "author_site": "Muhammad Jehanzeb Mirza, Leonid Karlinsky, Wei Lin, Horst Possegger, Mateusz Kozinski, Rogerio Feris, Horst Bischof", "tldr": "", "abstract": "Recently, large-scale pre-trained Vision and Language (VL) models have set a new state-of-the-art (SOTA) in zero-shot visual classification enabling open-vocabulary recognition of potentially unlimited set of categories defined as simple language prompts. However, despite these great advances, the performance of these zero-shot classifiers still falls short of the results of dedicated (closed category set) classifiers trained with supervised fine-tuning. In this paper we show, for the first time, how to reduce this gap without any labels and without any paired VL data, using an unlabeled image collection and a set of texts auto-generated using a Large Language Model (LLM) describing the categories of interest and effectively substituting labeled visual instances of those categories. Using our label-free approach, we are able to attain significant performance improvements over the zero-shot performance of the base VL model and other contemporary methods and baselines on a wide variety of datasets, demonstrating absolute improvement of up to $11.7\\%$ ($3.8\\%$ on average) in the label-free setting. Moreover, despite our approach being label-free, we observe $1.3\\%$ average gains over leading few-shot prompting baselines that do use 5-shot supervision.", "keywords": "VL Models", "primary_area": "", "supplementary_material": "/attachment/1c403d18e476167964ecf62dcdca9ab033a9fd94.pdf", "author": "Muhammad Jehanzeb Mirza;Leonid Karlinsky;Wei Lin;Horst Possegger;Mateusz Kozinski;Rogerio Feris;Horst Bischof", "authorids": "~Muhammad_Jehanzeb_Mirza1;~Leonid_Karlinsky3;~Wei_Lin9;~Horst_Possegger1;~Mateusz_Kozinski1;~Rogerio_Feris1;~Horst_Bischof2", "gender": "M;M;M;M;;M;M", "homepage": ";;https://wlin-at.github.io/;http://icg.tugraz.at/;;http://rogerioferis.com;https://www.tugraz.at/institute/icg/research/team-bischof", "dblp": "295/9034;05/4463;99/2649-19;135/4917;;;69/3793.html", "google_scholar": "cES2rkAAAAAJ;https://scholar.google.co.il/citations?user=WbO7tjYAAAAJ;JJRr8c8AAAAJ;https://scholar.google.at/citations?user=iWPrl3wAAAAJ;;xt3XLjcAAAAJ;https://scholar.google.at/citations?user=_pq05Q4AAAAJ", "orcid": ";;;0000-0002-5427-9938;;;0000-0002-9096-6671", "linkedin": ";;;;;;", "or_profile": "~Muhammad_Jehanzeb_Mirza1;~Leonid_Karlinsky3;~Wei_Lin9;~Horst_Possegger1;~Mateusz_Kozinski1;~Rogerio_Feris1;~Horst_Bischof2", "aff": ";International Business Machines;Technische Universit\u00e4t Graz;Graz University of Technology;;International Business Machines;Graz University of Technology", "aff_domain": ";ibm.com;tugraz.at;tugraz.at;;ibm.com;tugraz.at", "position": ";Principal Researcher;PhD student;Postdoc;;Research Manager;Full Professor", "bibtex": "@inproceedings{\nmirza2023lafter,\ntitle={La{FT}er: Label-Free Tuning of Zero-shot Classifier using Language and Unlabeled Image Collections},\nauthor={Muhammad Jehanzeb Mirza and Leonid Karlinsky and Wei Lin and Horst Possegger and Mateusz Kozinski and Rogerio Feris and Horst Bischof},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=elPtHcfjpH}\n}", "github": "", "project": "", "reviewers": "YPJ1;o7C2;zXQC;bBjN", "pdf_size": 4784152, "rating": "5;6;6;7", "confidence": "4;5;5;5", "soundness": "3;4;4;4", "novelty": "2;3;3;3", "presentation": "3;4;3;3", "wc_summary": "98;112;73;109", "wc_strengths": "74;68;112;72", "wc_weaknesses": "139;95;129;137", "wc_questions": "43;43;10;41", "wc_limitations": "43;31;60;58", "wc_review": "397;349;384;417", "wc_reply_reviewers": "241;77;0;30", "wc_reply_authors": "53;292;0;0", "reply_reviewers": "1;2;0;1", "reply_authors": "2;3;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 98.0, 15.346009253222807 ], "wc_strengths_avg": [ 81.5, 17.741194999210173 ], "wc_weaknesses_avg": [ 125.0, 17.72004514666935 ], "wc_questions_avg": [ 34.25, 14.02453207775575 ], "wc_limitations_avg": [ 48.0, 11.811011811017716 ], "wc_review_avg": [ 386.75, 24.762623043611516 ], "wc_reply_reviewers_avg": [ 87.0, 93.05106125133662 ], "wc_reply_authors_avg": [ 86.25, 120.74430628398177 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14943355983821991621&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 6, "email": ";ibm.com;tugraz.at;tugraz.at;;ibm.com;tugraz.at", "author_num": 7, "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "International Business Machines Corporation;Technische Universit\u00e4t Graz;Graz University of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ibm.com;https://www.tugraz.at;https://www.tugraz.at", "aff_unique_abbr": "IBM;TU Graz;TUGraz", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "United States;Austria" }, { "title": "Getting ViT in Shape: Scaling Laws for Compute-Optimal Model Design", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70942", "id": "en4LGxpd9E", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3504a4fa45685d668ce92797fbbf1895-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=en4LGxpd9E", "openreview": "https://openreview.net/forum?id=en4LGxpd9E", "poster": "/media/PosterPDFs/NeurIPS%202023/70942.png?t=1700231790.5872777", "slides": "https://nips.cc/virtual/2023/poster/70942", "video": "https://nips.cc/virtual/2023/poster/70942", "author_site": "Ibrahim Alabdulmohsin, Xiaohua Zhai, Alexander Kolesnikov, Lucas Beyer", "tldr": "", "abstract": "Scaling laws have been recently employed to derive compute-optimal model size (number of parameters) for a given compute duration. We advance and refine such methods to infer compute-optimal model shapes, such as width and depth, and successfully implement this in vision transformers. Our shape-optimized vision transformer, SoViT, achieves results competitive with models that exceed twice its size, despite being pre-trained with an equivalent amount of compute. For example, SoViT-400m/14 achieves 90.3% fine-tuning accuracy on ILSRCV2012, surpassing the much larger ViT-g/14 and approaching ViT-G/14 under identical settings, with also less than half the inference cost. We conduct a thorough evaluation across multiple tasks, such as image classification, captioning, VQA and zero-shot transfer, demonstrating the effectiveness of our model across a broad range of domains and identifying limitations. Overall, our findings challenge the prevailing approach of blindly scaling up vision models and pave a path for a more informed scaling.", "keywords": "Vision transformer;scaling laws;compute-optimal model design;vision", "primary_area": "", "supplementary_material": "/attachment/86769faa8d2c899e79c86176e38c2736f0c313de.pdf", "author": "Ibrahim Alabdulmohsin;Xiaohua Zhai;Alexander Kolesnikov;Lucas Beyer", "authorids": "~Ibrahim_Alabdulmohsin1;~Xiaohua_Zhai2;~Alexander_Kolesnikov2;~Lucas_Beyer1", "gender": "M;;;", "homepage": "http://ibomohsin.com;;;http://lucasb.eyer.be", "dblp": "153/5393;66/636;137/6963-3.html;126/4720", "google_scholar": "8WNMsPYAAAAJ;;H9I0CVwAAAAJ;p2gwhK4AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ibrahim_Alabdulmohsin1;~Xiaohua_Zhai2;~Alexander_Kolesnikov2;~Lucas_Beyer1", "aff": "Google;Google Brain;Google;Google Brain", "aff_domain": "google.com;google.com;google.com;google.com", "position": "Research Scientist;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nalabdulmohsin2023getting,\ntitle={Getting ViT in Shape: Scaling Laws for Compute-Optimal Model Design},\nauthor={Ibrahim Alabdulmohsin and Xiaohua Zhai and Alexander Kolesnikov and Lucas Beyer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=en4LGxpd9E}\n}", "github": "", "project": "", "reviewers": "hmtB;DDss;s4n2;T7K3", "pdf_size": 556166, "rating": "6;6;7;7", "confidence": "4;5;4;4", "soundness": "3;4;3;3", "novelty": "3;4;3;3", "presentation": "4;3;3;3", "wc_summary": "138;97;107;81", "wc_strengths": "74;138;149;56", "wc_weaknesses": "146;233;212;94", "wc_questions": "185;16;139;70", "wc_limitations": "48;1;9;2", "wc_review": "591;485;616;303", "wc_reply_reviewers": "0;442;102;0", "wc_reply_authors": "158;306;320;159", "reply_reviewers": "0;2;1;0", "reply_authors": "2;3;3;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 105.75, 20.801141795584204 ], "wc_strengths_avg": [ 104.25, 39.95231532714969 ], "wc_weaknesses_avg": [ 171.25, 54.95168332271542 ], "wc_questions_avg": [ 102.5, 64.56972975009265 ], "wc_limitations_avg": [ 15.0, 19.300259065618782 ], "wc_review_avg": [ 498.75, 123.25253547087785 ], "wc_reply_reviewers_avg": [ 136.0, 181.5103302845323 ], "wc_reply_authors_avg": [ 235.75, 77.40922102695518 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7489746875587325741&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "google.com;google.com;google.com;google.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Train Once and Explain Everywhere: Pre-training Interpretable Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70941", "id": "enfx8HM4Rp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6ecd51685e2d765bc0ad32a2e73faf62-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=enfx8HM4Rp", "openreview": "https://openreview.net/forum?id=enfx8HM4Rp", "poster": "/media/PosterPDFs/NeurIPS%202023/70941.png?t=1699535448.0192156", "slides": "https://nips.cc/virtual/2023/poster/70941", "video": "https://nips.cc/virtual/2023/poster/70941", "author_site": "Jun Yin, Chaozhuo Li, Chaozhuo Li, Hao Yan, Jianxun Lian, Senzhang Wang", "tldr": "", "abstract": "Intrinsic interpretable graph neural networks aim to provide transparent predictions by identifying the influential fraction of the input graph that guides the model prediction, i.e., the explanatory subgraph. However, current interpretable GNNs mostly are dataset-specific and hard to generalize to different graphs. A more generalizable GNN interpretation model which can effectively distill the universal structural patterns of different graphs is until-now unexplored. Motivated by the great success of recent pre-training techniques, we for the first time propose the Pre-training Interpretable Graph Neural Network ($\\pi$-GNN) to distill the universal interpretability of GNNs by pre-training over synthetic graphs with ground-truth explanations. Specifically, we introduce a structural pattern learning module to extract diverse universal structure patterns and integrate them together to comprehensively represent the graphs of different types. Next, a hypergraph refining module is proposed to identify the explanatory subgraph by incorporating the universal structure patterns with local edge interactions. Finally, the task-specific predictor is cascaded with the pre-trained $\\pi$-GNN model and fine-tuned over downstream tasks. Extensive experiments demonstrate that $\\pi$-GNN significantly surpasses the leading interpretable GNN baselines with up to 9.98\\% interpretation improvement and 16.06\\% classification accuracy improvement. Meanwhile, $\\pi$-GNN pre-trained on graph classification task also achieves the top-tier interpretation performance on node classification task, which further verifies its promising generalization performance among different downstream tasks. Our code and datasets are available at https://anonymous.4open.science/r/PI-GNN-F86C", "keywords": "Intrinsic Interpretability;Graph Neural Networks;Pre-training and Fine-tuning", "primary_area": "", "supplementary_material": "", "author": "Jun Yin;Chaozhuo Li;Hao Yan;Jianxun Lian;Senzhang Wang", "authorids": "~Jun_Yin11;~Chaozhuo_Li1;~Hao_Yan6;~Jianxun_Lian1;~Senzhang_Wang2", "gender": "M;;M;M;M", "homepage": "https://esperanto-mega.github.io/;https://scss.bupt.edu.cn/info/1063/5534.htm;https://sktsherlock.github.io/;https://www.microsoft.com/en-us/research/people/jialia/;https://senzhangwangcsu.github.io/index.html", "dblp": "58/5423-5;316/1269.html;;161/0030;118/5055", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;;tSq7dIkAAAAJ;zdWyGRMAAAAJ", "orcid": ";0000-0002-8179-7503;;0000-0003-3108-5601;0000-0002-3615-4859", "linkedin": ";;;;", "or_profile": "~Jun_Yin11;~Chaozhuo_Li1;~Hao_Yan6;~Jianxun_Lian1;~Senzhang_Wang2", "aff": "Central South University;Beijing University of Posts and Telecommunications;Central South University;Microsoft Research;Central South University", "aff_domain": "csu.edu.cn;bupt.edu.cn;csu.edu.cn;microsoft.com;csu.edu.cn", "position": "MS student;Associate Professor;MS student;Researcher;Full Professor", "bibtex": "@inproceedings{\nyin2023train,\ntitle={Train Once and Explain Everywhere: Pre-training Interpretable Graph Neural Networks},\nauthor={Jun Yin and Chaozhuo Li and Hao Yan and Jianxun Lian and Senzhang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=enfx8HM4Rp}\n}", "github": "", "project": "", "reviewers": "M6tM;XLHb;34Ne;EENr", "pdf_size": 1279441, "rating": "5;6;8;8", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;4;3", "presentation": "3;3;3;4", "wc_summary": "26;71;170;119", "wc_strengths": "46;21;150;199", "wc_weaknesses": "155;78;142;103", "wc_questions": "4;3;64;117", "wc_limitations": "1;4;13;28", "wc_review": "232;177;539;566", "wc_reply_reviewers": "42;35;18;93", "wc_reply_authors": "65;17;61;61", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;3;3", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 96.5, 53.68659050451984 ], "wc_strengths_avg": [ 104.0, 73.1334396839093 ], "wc_weaknesses_avg": [ 119.5, 30.663496212923928 ], "wc_questions_avg": [ 47.0, 47.36559933116016 ], "wc_limitations_avg": [ 11.5, 10.5 ], "wc_review_avg": [ 378.5, 175.34323482815068 ], "wc_reply_reviewers_avg": [ 47.0, 27.955321496988727 ], "wc_reply_authors_avg": [ 51.0, 19.697715603592208 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16152920807954164908&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "csu.edu.cn;bupt.edu.cn;csu.edu.cn;microsoft.com;csu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Central South University;Beijing University of Posts and Telecommunications;Microsoft", "aff_unique_dep": ";;Microsoft Research", "aff_unique_url": "https://www.csu.edu.cn;http://www.bupt.edu.cn/;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "CSU;BUPT;MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Black-Box Differential Privacy for Interactive ML", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70940", "id": "eoDNaH3pfB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f418594e90047a10f4c158f70d6701cc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eoDNaH3pfB", "openreview": "https://openreview.net/forum?id=eoDNaH3pfB", "poster": "/media/PosterPDFs/NeurIPS%202023/70940.png?t=1701819186.3481524", "slides": "https://nips.cc/virtual/2023/poster/70940", "video": "https://nips.cc/virtual/2023/poster/70940", "author_site": "Haim Kaplan, Yishay Mansour, Shay Moran, Kobbi Nissim, Uri Stemmer", "tldr": "", "abstract": "In this work we revisit an interactive variant of joint differential privacy, recently introduced by Naor et al. [2023], and generalize it towards handling online processes in which existing privacy definitions seem too restrictive. We study basic properties of this definition and demonstrate that it satisfies (suitable variants) of group privacy, composition, and post processing.\n\nIn order to demonstrate the advantages of this privacy definition compared to traditional forms of differential privacy,\nwe consider the basic setting of online classification. We show that any (possibly non-private) learning rule can be effectively transformed to a private learning rule with only a polynomial overhead in the mistake bound. This demonstrates a stark difference with traditional forms of differential privacy, such as the one studied by Golowich and Livni [2021], where only a double exponential overhead in the mistake bound is known (via an information theoretic upper bound).", "keywords": "Differential privacy;online learning", "primary_area": "", "supplementary_material": "", "author": "Haim Kaplan;Yishay Mansour;Shay Moran;Kobbi Nissim;Uri Stemmer", "authorids": "~Haim_Kaplan1;~Yishay_Mansour2;~Shay_Moran1;~Kobbi_Nissim2;~Uri_Stemmer1", "gender": ";;M;M;", "homepage": ";;http://www.cs.technion.ac.il/~shaymrn/;http://people.cs.georgetown.edu/~kobbi/;https://www.uri.co.il/", "dblp": ";;119/5111;65/801;125/8532", "google_scholar": ";;kALYnggAAAAJ;https://scholar.google.com.tw/citations?user=U-RE8IgAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Haim_Kaplan1;~Yishay_Mansour2;~Shay_Moran1;~Kobbi_Nissim2;~Uri_Stemmer1", "aff": ";;Google;Georgetown University;Tel Aviv University", "aff_domain": ";;google.com;georgetwon.edu;tau.ac.il", "position": ";;Visiting Faculty;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nkaplan2023blackbox,\ntitle={Black-Box Differential Privacy for Interactive {ML}},\nauthor={Haim Kaplan and Yishay Mansour and Shay Moran and Kobbi Nissim and Uri Stemmer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eoDNaH3pfB}\n}", "github": "", "project": "", "reviewers": "4d8Q;LQAr;FnVa;4WCj", "pdf_size": 522988, "rating": "3;5;6;7", "confidence": "4;3;3;3", "soundness": "3;3;2;4", "novelty": "2;2;2;4", "presentation": "1;2;3;3", "wc_summary": "15;117;48;238", "wc_strengths": "25;91;124;54", "wc_weaknesses": "241;214;371;93", "wc_questions": "13;158;34;191", "wc_limitations": "56;67;19;20", "wc_review": "350;647;596;596", "wc_reply_reviewers": "64;36;61;52", "wc_reply_authors": "69;41;78;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 104.5, 85.41223565742791 ], "wc_strengths_avg": [ 73.5, 37.379807383131336 ], "wc_weaknesses_avg": [ 229.75, 98.77594595851765 ], "wc_questions_avg": [ 99.0, 76.75610724886978 ], "wc_limitations_avg": [ 40.5, 21.360009363293827 ], "wc_review_avg": [ 547.25, 115.76997667789348 ], "wc_reply_reviewers_avg": [ 53.25, 10.894379284750462 ], "wc_reply_authors_avg": [ 47.0, 30.37268509697488 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8783100656536799, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5928349459783336090&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";;google.com;georgetwon.edu;tau.ac.il", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;Georgetown University;Tel Aviv University", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.georgetown.edu;https://www.tau.ac.il", "aff_unique_abbr": "Google;GU;TAU", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Israel" }, { "title": "Understanding and Improving Feature Learning for Out-of-Distribution Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70939", "id": "eozEoAtjG8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d73d5645ddbb9ada6c862116435574f6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eozEoAtjG8", "openreview": "https://openreview.net/forum?id=eozEoAtjG8", "poster": "/media/PosterPDFs/NeurIPS%202023/70939.png?t=1701688433.19581", "slides": "https://nips.cc/virtual/2023/poster/70939", "video": "https://nips.cc/virtual/2023/poster/70939", "author_site": "Yongqiang Chen, Wei Huang, Kaiwen Zhou, Yatao Bian, Bo Han, James Cheng", "tldr": "", "abstract": "A common explanation for the failure of out-of-distribution (OOD) generalization is that the model trained with empirical risk minimization (ERM) learns spurious features instead of invariant features. However, several recent studies challenged this explanation and found that deep networks may have already learned sufficiently good features for OOD generalization. Despite the contradictions at first glance, we theoretically show that ERM essentially learns both spurious and invariant features, while ERM tends to learn spurious features faster if the spurious correlation is stronger. Moreover, when fed the ERM learned features to the OOD objectives, the invariant feature learning quality significantly affects the final OOD performance, as OOD objectives rarely learn new features. Therefore, ERM feature learning can be a bottleneck to OOD generalization. To alleviate the reliance, we propose Feature Augmented Training (FeAT), to enforce the model to learn richer features ready for OOD generalization. FeAT iteratively augments the model to learn new features while retaining the already learned features. In each round, the retention and augmentation operations are performed on different subsets of the training data that capture distinct features. Extensive experiments show that FeAT effectively learns richer features thus boosting the performance of various OOD objectives.", "keywords": "Out-of-Distribution Generalization;Feature Learning;Invariant Risk Minimization", "primary_area": "", "supplementary_material": "/attachment/100825f816ab277e2f14cdff3e15cf5410997d16.pdf", "author": "Yongqiang Chen;Wei Huang;Kaiwen Zhou;Yatao Bian;Bo Han;James Cheng", "authorids": "~Yongqiang_Chen1;~Wei_Huang6;~Kaiwen_Zhou2;~Yatao_Bian1;~Bo_Han1;~James_Cheng2", "gender": ";M;M;M;M;M", "homepage": "https://lfhase.win;https://weihuang05.github.io/;https://jnhujnhu.github.io/;https://www.cse.cuhk.edu.hk/~jcheng/;https://bhanml.github.io/;https://yataobian.com", "dblp": "76/5774-2;81/6685-34;215/4936;06/4171;241/0472-3;222/2694", "google_scholar": "huQ_Ig8AAAAJ;RZfDh4MAAAAJ;nHmlZ5QAAAAJ;;nTNjqHwAAAAJ;oZBTlBkAAAAJ", "orcid": ";0000-0001-5674-7021;;;;0000-0002-2368-4084", "linkedin": ";;;;;", "or_profile": "~Yongqiang_Chen1;~Wei_Huang6;~Kaiwen_Zhou2;~James_Cheng2;~bo_han2;~An_Bian1", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;RIKEN AIP;Huawei Noah's Ark Lab;The Chinese University of Hong Kong;RIKEN;Tencent AI Lab", "aff_domain": "cse.cuhk.edu.hk;riken.jp;huawei.com;cuhk.edu.hk;riken.jp;tencent.com", "position": "PhD student;Postdoc;Researcher;Associate Professor;Adjunct Scientist;Senior researcher ", "bibtex": "@inproceedings{\nchen2023understanding,\ntitle={Understanding and Improving Feature Learning for Out-of-Distribution Generalization},\nauthor={Yongqiang Chen and Wei Huang and Kaiwen Zhou and Yatao Bian and Bo Han and James Cheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eozEoAtjG8}\n}", "github": "", "project": "", "reviewers": "Jcjd;KJZ8;qcQH;NB5i", "pdf_size": 21914044, "rating": "5;5;7;7", "confidence": "4;5;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "1;2;3;2", "wc_summary": "79;221;91;220", "wc_strengths": "41;8;73;160", "wc_weaknesses": "796;123;170;34", "wc_questions": "79;27;101;346", "wc_limitations": "17;1;28;65", "wc_review": "1012;380;463;825", "wc_reply_reviewers": "52;0;31;291", "wc_reply_authors": "282;159;40;1071", "reply_reviewers": "1;0;1;2", "reply_authors": "4;3;2;4", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 152.75, 67.8836320477919 ], "wc_strengths_avg": [ 70.5, 56.55307241874662 ], "wc_weaknesses_avg": [ 280.75, 301.46258059666377 ], "wc_questions_avg": [ 138.25, 122.9174011277492 ], "wc_limitations_avg": [ 27.75, 23.551804601770964 ], "wc_review_avg": [ 670.0, 258.8136395169312 ], "wc_reply_reviewers_avg": [ 93.5, 115.51731471948264 ], "wc_reply_authors_avg": [ 388.0, 403.50650552376476 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7711147142509088742&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "cse.cuhk.edu.hk;riken.jp;huawei.com;cuhk.edu.hk;riken.jp;tencent.com", "author_num": 6, "aff_unique_index": "0;1;2;0;1;3", "aff_unique_norm": "Chinese University of Hong Kong;RIKEN;Huawei;Tencent", "aff_unique_dep": "Department of Computer Science and Engineering;Advanced Institute for Computational Science;Noah's Ark Lab;Tencent AI Lab", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.aip.riken.jp;https://www.huawei.com;https://ai.tencent.com", "aff_unique_abbr": "CUHK;RIKEN AIP;Huawei;Tencent AI Lab", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;0;1;0", "aff_country_unique": "China;Japan" }, { "title": "TWIGMA: A dataset of AI-Generated Images with Metadata From Twitter", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73518", "id": "epUQ40eCzk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/769b70d1a9a6b21af53c00d0b322c763-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=epUQ40eCzk", "openreview": "https://openreview.net/forum?id=epUQ40eCzk", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73518", "video": "https://nips.cc/virtual/2023/poster/73518", "author_site": "Yiqun Chen, James Zou", "tldr": "", "abstract": "Recent progress in generative artificial intelligence (gen-AI) has enabled the generation of photo-realistic and artistically-inspiring photos at a single click, catering to millions of users online. To explore how people use gen-AI models such as DALLE and StableDiffusion, it is critical to understand the themes, contents, and variations present in the AI-generated photos. In this work, we introduce TWIGMA (TWItter Generative-ai images with MetadatA), a comprehensive dataset encompassing over 800,000 gen-AI images collected from Jan 2021 to March 2023 on Twitter, with associated metadata (e.g., tweet text, creation date, number of likes), available at https://zenodo.org/records/8031785. Through a comparative analysis of TWIGMA with natural images and human artwork, we find that gen-AI images possess distinctive characteristics and exhibit, on average, lower variability when compared to their non-gen-AI counterparts. Additionally, we find that the similarity between a gen-AI image and natural images is inversely correlated with the number of likes. Finally, we observe a longitudinal shift in the themes of AI-generated images on Twitter, with users increasingly sharing artistically sophisticated content such as intricate human portraits, whereas their interest in simple subjects such as natural scenes and animals has decreased. Our findings underscore the significance of TWIGMA as a unique data resource for studying AI-generated images.", "keywords": "Generative AI;Diffusion model;Large-scale Twitter datasets", "primary_area": "", "supplementary_material": "/attachment/286d5c47a812b1044d5b3ea8388984d223ef6a24.pdf", "author": "Yiqun T. Chen;James Zou", "authorids": "~Yiqun_T._Chen2;~James_Zou1", "gender": ";", "homepage": "https://yiqunchen.github.io/;", "dblp": "282/6282;", "google_scholar": "wXvfG6wAAAAJ;23ZXZvEAAAAJ", "orcid": "0000-0002-4100-1507;", "linkedin": ";", "or_profile": "~Yiqun_T._Chen2;~James_Zou1", "aff": "Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu", "position": "Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nchen2023twigma,\ntitle={{TWIGMA}: A dataset of {AI}-Generated Images with Metadata From Twitter},\nauthor={Yiqun T. Chen and James Zou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=epUQ40eCzk}\n}", "github": "", "project": "", "reviewers": "VLov;vkpY;8zrT;Araw;s6di", "pdf_size": 10953102, "rating": "5;6;6;7;9", "confidence": "5;4;3;3;3", "wc_summary_and_contributions": "34;61;52;35;97", "wc_strengths": "21;93;36;34;36", "wc_improvement": "81;260;16;193;40", "wc_limitations": "6;51;18;55;10", "wc_correctness": "1;13;12;29;11", "wc_clarity": "4;8;9;36;6", "wc_relation_to_prior_work": "1;13;10;68;7", "wc_documentation": "9;127;16;19;5", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "158;627;170;470;213", "wc_reply_reviewers": "0;158;14;40;50", "wc_reply_authors": "567;1313;361;752;716", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;2;2;2;1", "rating_avg": [ 6.6, 1.3564659966250536 ], "confidence_avg": [ 3.6, 0.8 ], "wc_summary_and_contributions_avg": [ 55.8, 23.007824755939012 ], "wc_strengths_avg": [ 44.0, 25.13165334791963 ], "wc_improvement_avg": [ 118.0, 93.43018784097569 ], "wc_limitations_avg": [ 28.0, 20.813457185196313 ], "wc_correctness_avg": [ 13.2, 8.997777503361593 ], "wc_clarity_avg": [ 12.6, 11.825396399275585 ], "wc_relation_to_prior_work_avg": [ 19.8, 24.42457778550123 ], "wc_documentation_avg": [ 35.2, 46.166654632970754 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 327.6, 187.96446472671371 ], "wc_reply_reviewers_avg": [ 52.4, 55.733652311686875 ], "wc_reply_authors_avg": [ 741.8, 317.04977527195945 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7003492917357614, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3807584080979852596&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "stanford.edu;stanford.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Supply-Side Equilibria in Recommender Systems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70938", "id": "eqyhjLG5Nr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2f1486343c2c942a617e4f5bb0cc64c8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=eqyhjLG5Nr", "openreview": "https://openreview.net/forum?id=eqyhjLG5Nr", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70938", "video": "https://nips.cc/virtual/2023/poster/70938", "author_site": "Meena Jagadeesan, Nikhil Garg, Jacob Steinhardt", "tldr": "", "abstract": "Algorithmic recommender systems such as Spotify and Netflix affect not only consumer behavior but also *producer incentives*. Producers seek to create content that will be shown by the recommendation algorithm, which can impact both the diversity and quality of their content. In this work, we investigate the resulting supply-side equilibria in personalized content recommender systems. We model the decisions of producers as choosing *multi-dimensional* content vectors and users as having *heterogenous* preferences, which contrasts with classical low-dimensional models. Multi-dimensionality and heterogeneity creates the potential for *specialization*, where different producers create different types of content at equilibrium. Using a duality argument, we derive necessary and sufficient conditions for whether specialization occurs. Then, we characterize the distribution of content at equilibrium in concrete settings with two populations of users. Lastly, we show that specialization can enable producers to achieve *positive profit at equilibrium*, which means that specialization can reduce the competitiveness of the marketplace. At a conceptual level, our analysis of supply-side competition takes a step towards elucidating how personalized recommendations shape the marketplace of digital goods.", "keywords": "content creator incentives;Nash equilibria;specialization;economic aspects of recommender systems", "primary_area": "", "supplementary_material": "/attachment/bee9d119288230dbdfe021377dfbc545eb2703c7.pdf", "author": "Meena Jagadeesan;Nikhil Garg;Jacob Steinhardt", "authorids": "~Meena_Jagadeesan1;~Nikhil_Garg2;~Jacob_Steinhardt1", "gender": "F;;", "homepage": "https://mjagadeesan.github.io;https://gargnikhil.com/;", "dblp": "205/2407;83/6058-1;35/10625", "google_scholar": "XW62DrcAAAAJ;8qSK3noAAAAJ;", "orcid": ";0000-0002-1988-792X;", "linkedin": ";;", "or_profile": "~Meena_Jagadeesan1;~Nikhil_Garg2;~Jacob_Steinhardt1", "aff": "University of California, Berkeley;Cornell University;University of California, Berkeley", "aff_domain": "berkeley.edu;cornell.edu;berkeley.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\njagadeesan2023supplyside,\ntitle={Supply-Side Equilibria in Recommender Systems},\nauthor={Meena Jagadeesan and Nikhil Garg and Jacob Steinhardt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=eqyhjLG5Nr}\n}", "github": "", "project": "", "reviewers": "rJ3F;ham5;sbgm;2uvz", "pdf_size": 1566503, "rating": "5;5;6;6", "confidence": "3;5;1;4", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "3;3;3;3", "wc_summary": "42;66;209;41", "wc_strengths": "31;119;41;34", "wc_weaknesses": "80;72;113;286", "wc_questions": "35;14;67;25", "wc_limitations": "6;1;7;1", "wc_review": "194;272;437;387", "wc_reply_reviewers": "0;0;9;120", "wc_reply_authors": "80;80;80;0", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 1.479019945774904 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.5, 69.71549325652082 ], "wc_strengths_avg": [ 56.25, 36.409991760504425 ], "wc_weaknesses_avg": [ 137.75, 86.96083888739804 ], "wc_questions_avg": [ 35.25, 19.778460506318485 ], "wc_limitations_avg": [ 3.75, 2.7726341266023544 ], "wc_review_avg": [ 322.5, 95.30608584975043 ], "wc_reply_reviewers_avg": [ 32.25, 50.795546064591136 ], "wc_reply_authors_avg": [ 60.0, 34.64101615137755 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.50709255283711, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=831398690193315958&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 9, "email": "berkeley.edu;cornell.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Berkeley;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.cornell.edu", "aff_unique_abbr": "UC Berkeley;Cornell", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Doubly-Robust Self-Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70937", "id": "esy7pkZmKn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/819f426947c27eb5067bb6fdbdde93dd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=esy7pkZmKn", "openreview": "https://openreview.net/forum?id=esy7pkZmKn", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70937", "video": "https://nips.cc/virtual/2023/poster/70937", "author_site": "Banghua Zhu, Mingyu Ding, Philip Jacobson, Ming Wu, Wei Zhan, Michael Jordan, Jiantao Jiao", "tldr": "", "abstract": "Self-training is a well-established technique in semi-supervised learning, which leverages unlabeled data by generating pseudo-labels and incorporating them with a limited labeled dataset for training. The effectiveness of self-training heavily relies on the accuracy of these pseudo-labels. In this paper, we introduce doubly-robust self-training, an innovative semi-supervised algorithm that provably balances between two extremes. When pseudo-labels are entirely incorrect, our method reduces to a training process solely using labeled data. Conversely, when pseudo-labels are completely accurate, our method transforms into a training process utilizing all pseudo-labeled data and labeled data, thus increasing the effective sample size. Through empirical evaluations on both the ImageNet dataset for image classification and the nuScenes autonomous driving dataset for 3D object detection, we demonstrate the superiority of the doubly-robust loss over the self-training baseline.", "keywords": "semi-supervised learning;self-training;auto-labeling;self-labeling;doubly robust", "primary_area": "", "supplementary_material": "/attachment/a85203350254b4544add0d3faa4d06f6b2ba34e1.zip", "author": "Banghua Zhu;Mingyu Ding;Philip Jacobson;Ming Wu;Wei Zhan;Michael Jordan;Jiantao Jiao", "authorids": "~Banghua_Zhu1;~Mingyu_Ding1;~Philip_Jacobson1;~Ming_Wu1;~Wei_Zhan2;~Michael_Jordan1;~Jiantao_Jiao1", "gender": "M;M;M;M;;M;M", "homepage": "https://people.eecs.berkeley.edu/~banghua/;https://dingmyu.github.io/;;https://www2.eecs.berkeley.edu/Faculty/Homepages/wu.html;;http://www.cs.berkeley.edu/~jordan/;https://scholar.google.com/citations?user=aO8KpGcAAAAJ&hl=en", "dblp": "204/5394;188/5243;330/3448;;;j/MichaelIJordan;43/8919", "google_scholar": "https://scholar.google.com/citations?hl=en;w4yTWwoAAAAJ;3yQFuR4AAAAJ;Yr2yu_sAAAAJ;;https://scholar.google.com.tw/citations?user=yxUduqMAAAAJ;aO8KpGcAAAAJ", "orcid": ";0000-0001-6556-8359;0000-0001-9957-7464;0000-0003-4808-6686;;0000-0001-8935-817X;", "linkedin": ";dingmyu/;;ming-wu-85933015/;;;", "or_profile": "~Banghua_Zhu1;~Mingyu_Ding1;~Philip_Jacobson1;~Ming_Wu1;~Wei_Zhan2;~Michael_Jordan1;~Jiantao_Jiao1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;;berkeley.edu;berkeley.edu", "position": "PhD student;Postdoc;PhD student;Full Professor;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhu2023doublyrobust,\ntitle={Doubly-Robust Self-Training},\nauthor={Banghua Zhu and Mingyu Ding and Philip Jacobson and Ming Wu and Wei Zhan and Michael Jordan and Jiantao Jiao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=esy7pkZmKn}\n}", "github": "", "project": "", "reviewers": "8nT5;yYuG;bmAm;AyWm", "pdf_size": 494361, "rating": "4;5;6;7", "confidence": "4;4;3;5", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;4", "wc_summary": "154;78;21;68", "wc_strengths": "46;67;34;115", "wc_weaknesses": "486;248;88;152", "wc_questions": "2;63;1;138", "wc_limitations": "4;5;1;12", "wc_review": "692;461;145;485", "wc_reply_reviewers": "0;164;43;25", "wc_reply_authors": "0;146;196;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 80.25, 47.70940682926167 ], "wc_strengths_avg": [ 65.5, 30.923292192132454 ], "wc_weaknesses_avg": [ 243.5, 151.1447981241829 ], "wc_questions_avg": [ 51.0, 56.15603262339675 ], "wc_limitations_avg": [ 5.5, 4.031128874149275 ], "wc_review_avg": [ 445.75, 195.4883308537878 ], "wc_reply_reviewers_avg": [ 58.0, 63.075351762792415 ], "wc_reply_authors_avg": [ 85.5, 87.30836156978323 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3162277660168379, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17875815499343838236&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;;berkeley.edu;berkeley.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Causal Discovery from Subsampled Time Series with Proxy Variables", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70936", "id": "etYk6TeO2q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/86cba2b31d17f237866a2e6c52c7878a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=etYk6TeO2q", "openreview": "https://openreview.net/forum?id=etYk6TeO2q", "poster": "/media/PosterPDFs/NeurIPS%202023/70936.png?t=1701737407.0916796", "slides": "https://nips.cc/virtual/2023/poster/70936", "video": "https://nips.cc/virtual/2023/poster/70936", "author_site": "Mingzhou Liu, Xinwei Sun, Lingjing Hu, Yizhou Wang", "tldr": "", "abstract": "Inferring causal structures from time series data is the central interest of many scientific inquiries. A major barrier to such inference is the problem of subsampling, *i.e.*, the frequency of measurement is much lower than that of causal influence. To overcome this problem, numerous methods have been proposed, yet either was limited to the linear case or failed to achieve identifiability. In this paper, we propose a constraint-based algorithm that can identify the entire causal structure from subsampled time series, without any parametric constraint. Our observation is that the challenge of subsampling arises mainly from hidden variables at the unobserved time steps. Meanwhile, every hidden variable has an observed proxy, which is essentially itself at some observable time in the future, benefiting from the temporal structure. Based on these, we can leverage the proxies to remove the bias induced by the hidden variables and hence achieve identifiability. Following this intuition, we propose a proxy-based causal discovery algorithm. Our algorithm is nonparametric and can achieve full causal identification. Theoretical advantages are reflected in synthetic and real-world experiments.", "keywords": "causal discovery;time series;subsampling;proxy variables", "primary_area": "", "supplementary_material": "/attachment/fecde1a0ca6b867050f6b212d1ac7a6de8d90cad.zip", "author": "Mingzhou Liu;Xinwei Sun;Lingjing Hu;Yizhou Wang", "authorids": "~Mingzhou_Liu1;~Xinwei_Sun1;~Lingjing_Hu1;~Yizhou_Wang1", "gender": "M;M;F;M", "homepage": ";https://sunxinwei0625.github.io/sunxw.github.io/;https://www.researchgate.net/profile/Lingjing_Hu;https://cfcs.pku.edu.cn/wangyizhou/", "dblp": "159/6544-1;145/6592-1;;71/3387-1", "google_scholar": ";;;831z_VcAAAAJ", "orcid": "0000-0002-0297-0938;;;", "linkedin": ";;;", "or_profile": "~Mingzhou_Liu1;~Xinwei_Sun1;~Lingjing_Hu1;~Yizhou_Wang1", "aff": "Peking University;Fudan University;Medical Imaging Technoloy Department, Capital Medical University, China;Peking University", "aff_domain": "pku.edu.cn;fudan.edu.cn;ccmu.edu.cn;pku.edu.cn", "position": "PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023causal,\ntitle={Causal Discovery from Subsampled Time Series with Proxy Variables},\nauthor={Mingzhou Liu and Xinwei Sun and Lingjing Hu and Yizhou Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=etYk6TeO2q}\n}", "github": "", "project": "", "reviewers": "7zAn;XP2k;Y9xG;GTBD;3AR7", "pdf_size": 1165650, "rating": "5;5;6;6;7", "confidence": "5;3;4;3;2", "soundness": "3;3;3;2;4", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "31;102;171;57;38", "wc_strengths": "20;30;245;38;36", "wc_weaknesses": "129;85;281;76;137", "wc_questions": "4;16;10;3;109", "wc_limitations": "1;8;40;3;1", "wc_review": "185;241;747;177;321", "wc_reply_reviewers": "0;38;47;10;6", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.8, 51.88217420270666 ], "wc_strengths_avg": [ 73.8, 85.82866653979892 ], "wc_weaknesses_avg": [ 141.6, 73.64672429918387 ], "wc_questions_avg": [ 28.4, 40.568953646846744 ], "wc_limitations_avg": [ 10.6, 14.921125962875589 ], "wc_review_avg": [ 334.2, 212.71426844478486 ], "wc_reply_reviewers_avg": [ 20.2, 18.701871564097534 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6813851438692469, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14685069507411053779&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;fudan.edu.cn;ccmu.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Peking University;Fudan University;Capital Medical University", "aff_unique_dep": ";;Medical Imaging Technology Department", "aff_unique_url": "http://www.pku.edu.cn;https://www.fudan.edu.cn;http://www.cmu.edu.cn", "aff_unique_abbr": "Peking U;Fudan;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "VPP: Efficient Conditional 3D Generation via Voxel-Point Progressive Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70935", "id": "etd0ebzGOG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/54d2d38a56a74387d5916ee40e462295-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=etd0ebzGOG", "openreview": "https://openreview.net/forum?id=etd0ebzGOG", "poster": "/media/PosterPDFs/NeurIPS%202023/70935.png?t=1701983611.7861555", "slides": "https://nips.cc/virtual/2023/poster/70935", "video": "https://nips.cc/virtual/2023/poster/70935", "author_site": "Zekun Qi, Muzhou Yu, Runpei Dong, Kaisheng Ma", "tldr": "", "abstract": "Conditional\u00a03D\u00a0generation\u00a0is\u00a0undergoing\u00a0a\u00a0significant\u00a0advancement,\u00a0enabling\u00a0the\u00a0free\u00a0creation\u00a0of\u00a03D\u00a0content\u00a0from\u00a0inputs\u00a0such\u00a0as\u00a0text\u00a0or\u00a02D\u00a0images.\u00a0However,\u00a0previous\u00a0approaches\u00a0have\u00a0suffered\u00a0from\u00a0low\u00a0inference\u00a0efficiency,\u00a0limited\u00a0generation\u00a0categories,\u00a0and\u00a0restricted\u00a0downstream\u00a0applications.\u00a0In\u00a0this\u00a0work,\u00a0we\u00a0revisit\u00a0the\u00a0impact\u00a0of\u00a0different\u00a03D\u00a0representations\u00a0on\u00a0generation\u00a0quality\u00a0and\u00a0efficiency.\u00a0We\u00a0propose\u00a0a\u00a0progressive\u00a0generation\u00a0method\u00a0through\u00a0Voxel-Point\u00a0Progressive\u00a0Representation\u00a0(VPP).\u00a0VPP\u00a0leverages\u00a0structured\u00a0voxel\u00a0representation\u00a0in\u00a0the\u00a0proposed\u00a0Voxel\u00a0Semantic\u00a0Generator\u00a0and\u00a0the\u00a0sparsity\u00a0of\u00a0unstructured\u00a0point\u00a0representation\u00a0in\u00a0the\u00a0Point\u00a0Upsampler,\u00a0enabling\u00a0efficient\u00a0generation\u00a0of\u00a0multi-category\u00a0objects.\u00a0VPP\u00a0can\u00a0generate\u00a0high-quality\u00a08K\u00a0point\u00a0clouds\u00a0within\u00a00.2\u00a0seconds.\u00a0Additionally,\u00a0the\u00a0masked\u00a0generation\u00a0Transformer\u00a0allows\u00a0for\u00a0various\u00a03D\u00a0downstream\u00a0tasks,\u00a0such\u00a0as\u00a0generation,\u00a0editing,\u00a0completion,\u00a0and\u00a0pre-training.\u00a0Extensive\u00a0experiments\u00a0demonstrate\u00a0that\u00a0VPP\u00a0efficiently\u00a0generates\u00a0high-fidelity\u00a0and\u00a0diverse\u00a03D\u00a0shapes\u00a0across\u00a0different\u00a0categories,\u00a0while\u00a0also\u00a0exhibiting\u00a0excellent\u00a0representation\u00a0transfer\u00a0performance.\u00a0Codes\u00a0will\u00a0be\u00a0released\u00a0at\u00a0https://github.com/qizekun/VPP.", "keywords": "Text to shape generation;3D shape generation;Efficient inference;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/0c6450aa38feb9ced875ade894bc14f5c1db6d1b.zip", "author": "Zekun Qi;Muzhou Yu;Runpei Dong;Kaisheng Ma", "authorids": "~Zekun_Qi2;~Muzhou_Yu1;~Runpei_Dong1;~Kaisheng_Ma1", "gender": "M;M;M;M", "homepage": "https://qizekun.github.io/;https://orcid.org/0000-0001-5304-841X;https://runpeidong.web.illinois.edu/;http://group.iiis.tsinghua.edu.cn/~maks/index.html", "dblp": "182/3981;;298/8727;133/4053.html", "google_scholar": "ap8yc3oAAAAJ;;z2SoXI8AAAAJ;VtDpVoEAAAAJ", "orcid": ";0000-0001-5304-841X;;0000-0001-9226-3366", "linkedin": ";;;", "or_profile": "~Zekun_Qi2;~Muzhou_Yu1;~Runpei_Dong1;~Kaisheng_Ma1", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University, Tsinghua University;;", "aff_domain": "xjtu.edu.cn;xjtu.edu.cn;;", "position": "MS student;PhD student;;", "bibtex": "@inproceedings{\nqi2023vpp,\ntitle={{VPP}: Efficient Conditional 3D Generation via Voxel-Point Progressive Representation},\nauthor={Zekun Qi and Muzhou Yu and Runpei Dong and Kaisheng Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=etd0ebzGOG}\n}", "github": "", "project": "", "reviewers": "THuC;A5WF;YMLb;AZ71;U3Fo", "pdf_size": 7578053, "rating": "4;5;6;6;7", "confidence": "4;5;3;4;4", "soundness": "2;3;3;3;3", "novelty": "2;4;3;2;3", "presentation": "3;2;3;3;3", "wc_summary": "47;65;119;151;46", "wc_strengths": "43;41;49;47;17", "wc_weaknesses": "165;177;356;115;78", "wc_questions": "76;54;28;24;55", "wc_limitations": "1;2;50;11;7", "wc_review": "332;339;602;348;203", "wc_reply_reviewers": "0;0;18;62;18", "wc_reply_authors": "52;52;22;41;28", "reply_reviewers": "0;0;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 85.6, 42.13122357587066 ], "wc_strengths_avg": [ 39.4, 11.551623262554921 ], "wc_weaknesses_avg": [ 178.2, 95.73170843560665 ], "wc_questions_avg": [ 47.4, 19.2 ], "wc_limitations_avg": [ 14.2, 18.258148865643527 ], "wc_review_avg": [ 364.8, 129.97445902945702 ], "wc_reply_reviewers_avg": [ 19.6, 22.676860452893386 ], "wc_reply_authors_avg": [ 39.0, 12.263767773404714 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.31008683647302115, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13338900918748342344&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "xjtu.edu.cn;xjtu.edu.cn;;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Xi'an Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.xjtu.edu.cn", "aff_unique_abbr": "XJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Geometry-Aware Adaptation for Pretrained Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70934", "id": "exGOXqxR0L", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9bbc8b6038603e6170e35f89e3c3e296-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=exGOXqxR0L", "openreview": "https://openreview.net/forum?id=exGOXqxR0L", "poster": "/media/PosterPDFs/NeurIPS%202023/70934.png?t=1702113512.0898058", "slides": "https://nips.cc/virtual/2023/poster/70934", "video": "https://nips.cc/virtual/2023/poster/70934", "author_site": "Nicholas Roberts, Xintong Li, Dyah Adila, Sonia Cromp, Tzu-Heng Huang, Jitian Zhao, Frederic Sala", "tldr": "", "abstract": "Machine learning models---including prominent zero-shot models---are often trained on datasets whose labels are only a small proportion of a larger label space. Such spaces are commonly equipped with a metric that relates the labels via distances between them. We propose a simple approach to exploit this information to adapt the trained model to reliably predict new classes---or, in the case of zero-shot prediction, to improve its performance---without any additional training. Our technique is a drop-in replacement of the standard prediction rule, swapping $\\text{argmax}$ with the Fr\u00e9chet mean. We provide a comprehensive theoretical analysis for this approach, studying (i) learning-theoretic results trading off label space diameter, sample complexity, and model dimension, (ii) characterizations of the full range of scenarios in which it is possible to predict any unobserved class, and (iii) an optimal active learning-like next class selection procedure to obtain optimal training classes for when it is not possible to predict the entire range of unobserved classes. Empirically, using easily-available external metrics, our proposed approach, Loki, gains up to 29.7% relative improvement over SimCLR on ImageNet and scales to hundreds of thousands of classes. When no such metric is available, Loki can use self-derived metrics from class embeddings and obtains a 10.5% improvement on pretrained zero-shot models such as CLIP.", "keywords": "structured prediction;learning on graphs;partially observed label spaces;high cardinality label spaces", "primary_area": "", "supplementary_material": "/attachment/b65dcfd06c5779fdf0889ba87045d9cf7c0c7cce.pdf", "author": "Nicholas Roberts;Xintong Li;Dyah Adila;Sonia Cromp;Tzu-Heng Huang;Jitian Zhao;Frederic Sala", "authorids": "~Nicholas_Roberts2;~Xintong_Li2;~Dyah_Adila1;~Sonia_Cromp1;~Tzu-Heng_Huang1;~Jitian_Zhao1;~Frederic_Sala1", "gender": "F;F;F;M;F;M;M", "homepage": "https://kaylee0501.github.io/;;;https://zihengh1.github.io/;https://jzhao326.github.io/;https://pages.cs.wisc.edu/~fredsala/;https://nick11roberts.science/", "dblp": ";;;185/7539;;133/3602;", "google_scholar": "Sw5mq4cAAAAJ;;;yIZ8NCQAAAAJ;;9KhIkNkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;0000-0002-0625-9182", "linkedin": "xintong-li-970ab31b5/;dyahadila/;sonia-cromp;zihengh1/;jitian-zhao-931b5a175/;;nick11roberts/", "or_profile": "~Xintong_Li2;~Dyah_Adila1;~Sonia_Cromp1;~Tzu-Heng_Huang1;~Jitian_Zhao1;~Frederic_Sala1;~Nicholas_Carl_Roberts1", "aff": "University of Wisconsin - Madison;University of Wisconsin, Madison;Department of Computer Science, University of Wisconsin - Madison;University of Wisconsin - Madison;University of Wisconsin - Madison;University of Wisconsin, Madison;Microsoft", "aff_domain": "wisc.edu;wisc.edu;cs.wisc.edu;wisc.edu;wisc.edu;wisc.edu;microsoft.com", "position": "Undergrad student;PhD student;PhD student;PhD student;PhD student;Assistant Professor;Intern", "bibtex": "@inproceedings{\nroberts2023geometryaware,\ntitle={Geometry-Aware Adaptation for Pretrained Models},\nauthor={Nicholas Roberts and Xintong Li and Dyah Adila and Sonia Cromp and Tzu-Heng Huang and Jitian Zhao and Frederic Sala},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=exGOXqxR0L}\n}", "github": "", "project": "", "reviewers": "dZLD;zgY7;cqEn;2m87;4Uud", "pdf_size": 3292734, "rating": "5;5;6;7;7", "confidence": "3;4;2;3;3", "soundness": "3;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "81;105;51;158;114", "wc_strengths": "43;36;42;273;138", "wc_weaknesses": "46;393;39;68;125", "wc_questions": "164;58;46;86;158", "wc_limitations": "1;14;15;16;15", "wc_review": "335;606;193;601;550", "wc_reply_reviewers": "32;44;22;0;53", "wc_reply_authors": "52;57;35;0;65", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 101.8, 35.58314207598874 ], "wc_strengths_avg": [ 106.4, 91.51743003384655 ], "wc_weaknesses_avg": [ 134.2, 132.8809993941948 ], "wc_questions_avg": [ 102.4, 49.612901547883695 ], "wc_limitations_avg": [ 12.2, 5.635601121442148 ], "wc_review_avg": [ 457.0, 165.0248466140781 ], "wc_reply_reviewers_avg": [ 30.2, 18.400000000000002 ], "wc_reply_authors_avg": [ 41.8, 23.094588110637524 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.35355339059327373, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3161940622778146777&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "wisc.edu;wisc.edu;cs.wisc.edu;wisc.edu;wisc.edu;wisc.edu;microsoft.com", "author_num": 7, "aff_unique_index": "0;1;0;0;0;1;2", "aff_unique_norm": "University of Wisconsin-Madison;University of Wisconsin;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu;https://www.microsoft.com", "aff_unique_abbr": "UW-Madison;UW;Microsoft", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Dictionary for Visual Attention", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70933", "id": "exPzwOhBgx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b113e1441ad107b80c576b5028fd2c51-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=exPzwOhBgx", "openreview": "https://openreview.net/forum?id=exPzwOhBgx", "poster": "/media/PosterPDFs/NeurIPS%202023/70933.png?t=1697545203.1390631", "slides": "https://nips.cc/virtual/2023/poster/70933", "video": "https://nips.cc/virtual/2023/poster/70933", "author_site": "Yingjie Liu, Xuan Liu, Hui Yu, XUAN TANG, Xian Wei", "tldr": "", "abstract": "Recently, the attention mechanism has shown outstanding competence in capturing global structure information and long-range relationships within data, thus enhancing the performance of deep vision models on various computer vision tasks. In this work, we propose a novel dictionary learning-based attention (\\textit{Dic-Attn}) module, which models this issue as a decomposition and reconstruction problem with the sparsity prior, inspired by sparse coding in the human visual perception system. The proposed \\textit{Dic-Attn} module decomposes the input into a dictionary and corresponding sparse representations, allowing for the disentanglement of underlying nonlinear structural information in visual data and the reconstruction of an attention embedding. By applying transformation operations in the spatial and channel domains, the module dynamically selects the dictionary's atoms and sparse representations. Finally, the updated dictionary and sparse representations capture the global contextual information and reconstruct the attention maps. The proposed \\textit{Dic-Attn} module is designed with plug-and-play compatibility, allowing for integration into deep attention encoders. Our approach offers an intuitive and elegant means to exploit the discriminative information from data, promoting visual attention construction. Extensive experimental results on various computer vision tasks, e.g., image and point cloud classification, validate that our method achieves promising performance, and shows a strong competitive comparison with state-of-the-art attention methods.", "keywords": "dictionary learning;attention;transformer;computer vision;point cloud", "primary_area": "", "supplementary_material": "/attachment/27d5b32afe25d2fb60d56b1799e936656426847f.pdf", "author": "Yingjie Liu;Xuan Liu;Hui Yu;Xuan Tang;Xian Wei", "authorids": "~Yingjie_Liu2;~Xuan_Liu4;~Hui_Yu4;~Xuan_Tang3;~Xian_Wei1", "gender": ";F;M;F;M", "homepage": "https://www.researchgate.net/profile/Yingjie-Liu-25;https://blog.csdn.net/weirdolx?spm=1000.2115.3001.5343;;https://faculty.ecnu.edu.cn/_s15/tx2_21642/main.psp;https://www.researchgate.net/", "dblp": ";;;;139/0725", "google_scholar": ";;;mFj-I10AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-8670-9990;;", "linkedin": ";;;;", "or_profile": "~Yingjie_Liu2;~Xuan_Liu4;~Hui_Yu4;~Xuan_Tang3;~Xian_Wei1", "aff": "East China Normal University;Liaoning Technical University;;East China Normal University;East China Normal University", "aff_domain": "ecnu.edu.cn;lntu.edu.cn;;ecnu.edu.cn;ecnu.edu.cn", "position": "PhD student;MS student;;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nliu2023learning,\ntitle={Learning Dictionary for Visual Attention},\nauthor={Yingjie Liu and Xuan Liu and Hui Yu and Xuan Tang and Xian Wei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=exPzwOhBgx}\n}", "github": "", "project": "", "reviewers": "q2Dg;Vec1;nmhX;1dqN;2tL6", "pdf_size": 4079441, "rating": "5;5;5;6;7", "confidence": "4;4;3;4;5", "soundness": "2;2;3;2;3", "novelty": "1;2;3;2;3", "presentation": "2;2;3;2;3", "wc_summary": "77;49;73;70;98", "wc_strengths": "29;41;124;62;226", "wc_weaknesses": "255;201;90;27;231", "wc_questions": "34;4;5;2;36", "wc_limitations": "13;4;10;2;5", "wc_review": "408;299;302;163;596", "wc_reply_reviewers": "44;281;0;39;0", "wc_reply_authors": "67;553;0;34;0", "reply_reviewers": "1;2;0;1;0", "reply_authors": "2;3;1;2;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 73.4, 15.653753543479594 ], "wc_strengths_avg": [ 96.4, 72.59917354901502 ], "wc_weaknesses_avg": [ 160.8, 87.55889446538256 ], "wc_questions_avg": [ 16.2, 15.393505123915086 ], "wc_limitations_avg": [ 6.8, 4.069397989875161 ], "wc_review_avg": [ 353.6, 144.00638874716634 ], "wc_reply_reviewers_avg": [ 72.8, 105.75329782091903 ], "wc_reply_authors_avg": [ 130.8, 212.5609559632248 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7905694150420948, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18123376360101931048&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "email": "ecnu.edu.cn;lntu.edu.cn;;ecnu.edu.cn;ecnu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "East China Normal University;Liaoning Technical University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ecnu.edu.cn;http://www.lntu.edu.cn/", "aff_unique_abbr": "ECNU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Model Spider: Learning to Rank Pre-Trained Models Efficiently", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70932", "id": "exg62lfHrB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2c71b14637802ed08eaa3cf50342b2b9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=exg62lfHrB", "openreview": "https://openreview.net/forum?id=exg62lfHrB", "poster": "/media/PosterPDFs/NeurIPS%202023/70932.png?t=1702252563.2872112", "slides": "https://nips.cc/virtual/2023/poster/70932", "video": "https://nips.cc/virtual/2023/poster/70932", "author_site": "Yi-Kai Zhang, Ting-Ji Huang, Yao-Xiang Ding, De-Chuan Zhan, Han-Jia Ye", "tldr": "", "abstract": "Figuring out which Pre-Trained Model (PTM) from a model zoo fits the target task is essential to take advantage of plentiful model resources. With the availability of numerous heterogeneous PTMs from diverse fields, efficiently selecting the most suitable one is challenging due to the time-consuming costs of carrying out forward or backward passes over all PTMs. In this paper, we propose Model Spider, which tokenizes both PTMs and tasks by summarizing their characteristics into vectors to enable efficient PTM selection. By leveraging the approximated performance of PTMs on a separate set of training tasks, Model Spider learns to construct representation and measure the fitness score between a model-task pair via their representation. The ability to rank relevant PTMs higher than others generalizes to new tasks. With the top-ranked PTM candidates, we further learn to enrich task repr. with their PTM-specific semantics to re-rank the PTMs for better selection. Model Spider balances efficiency and selection ability, making PTM selection like a spider preying on a web. Model Spider exhibits promising performance across diverse model zoos, including visual models and Large Language Models (LLMs). Code is available at https://github.com/zhangyikaii/Model-Spider.", "keywords": "Pre-trained Model Ranking;Transfer Learning", "primary_area": "", "supplementary_material": "", "author": "Yi-Kai Zhang;Ting-Ji Huang;Yao-Xiang Ding;De-Chuan Zhan;Han-Jia Ye", "authorids": "~Yi-Kai_Zhang2;~Ting-Ji_Huang1;~Yao-Xiang_Ding2;~De-Chuan_Zhan1;~Han-Jia_Ye1", "gender": "M;M;M;M;", "homepage": "http://www.lamda.nju.edu.cn/zhangyk;https://yaoxiangding.github.io/;http://www.lamda.nju.edu.cn/zhandc/;http://www.lamda.nju.edu.cn/yehj;http://www.lamda.nju.edu.cn/huangtj/", "dblp": "330/8964;186/8301-1;74/498;165/3014;", "google_scholar": ";POTjhnUAAAAJ;mYJf4TcAAAAJ;mgOYhtoAAAAJ;", "orcid": ";0000-0001-8580-1103;0000-0002-3533-2078;;", "linkedin": ";;;;", "or_profile": "~Yi-Kai_Zhang2;~Yao-Xiang_Ding2;~De-Chuan_Zhan1;~Han-Jia_Ye1;~Tingji_Huang2", "aff": "Nanjing University;Zhejiang University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;zju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "MS student;Assistant Professor;Full Professor;Associate Professor;MS student", "bibtex": "@inproceedings{\nzhang2023model,\ntitle={Model Spider: Learning to Rank Pre-Trained Models Efficiently},\nauthor={Yi-Kai Zhang and Ting-Ji Huang and Yao-Xiang Ding and De-Chuan Zhan and Han-Jia Ye},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=exg62lfHrB}\n}", "github": "", "project": "", "reviewers": "mKeC;1u8a;iJup;PpFQ;Gb1N", "pdf_size": 4781138, "rating": "5;5;7;7;7", "confidence": "4;3;3;2;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;4;3", "presentation": "3;3;3;4;3", "wc_summary": "78;181;85;272;166", "wc_strengths": "31;54;25;21;137", "wc_weaknesses": "200;89;144;12;116", "wc_questions": "86;14;2;4;26", "wc_limitations": "35;72;6;6;19", "wc_review": "430;410;262;315;464", "wc_reply_reviewers": "55;13;0;11;33", "wc_reply_authors": "23;20;0;17;18", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 156.4, 71.14098677977415 ], "wc_strengths_avg": [ 53.6, 43.23702117398931 ], "wc_weaknesses_avg": [ 112.2, 62.1494971821977 ], "wc_questions_avg": [ 26.4, 30.994193004496825 ], "wc_limitations_avg": [ 27.6, 24.630062931304092 ], "wc_review_avg": [ 376.2, 75.5417765213395 ], "wc_reply_reviewers_avg": [ 22.4, 19.46894963782073 ], "wc_reply_authors_avg": [ 15.6, 8.06473806146238 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6454972243679028, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2077181197094935543&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "nju.edu.cn;zju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Nanjing University;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.zju.edu.cn", "aff_unique_abbr": "Nanjing U;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "An Adaptive Algorithm for Learning with Unknown Distribution Drift", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70931", "id": "exiXmAfuDK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1fe6f635fe265292aba3987b5123ae3d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=exiXmAfuDK", "openreview": "https://openreview.net/forum?id=exiXmAfuDK", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70931", "video": "https://nips.cc/virtual/2023/poster/70931", "author_site": "Alessio Mazzetto, Eli Upfal", "tldr": "", "abstract": "We develop and analyze a general technique for learning with an unknown distribution drift. Given a sequence of independent observations from the last $T$ steps of a drifting distribution, our algorithm agnostically learns a family of functions with respect to the current distribution at time $T$. Unlike previous work, our technique does not require prior knowledge about the magnitude of the drift. Instead, the algorithm adapts to the sample data. Without explicitly estimating the drift, the algorithm learns a family of functions with almost the same error as a learning algorithm that knows the magnitude of the drift in advance. Furthermore, since our algorithm adapts to the data, it can guarantee a better learning error than an algorithm that relies on loose bounds on the drift. We demonstrate the application of our technique in two fundamental learning scenarios: binary classification and linear regression.", "keywords": "statistical learning;learning theory;machine learning;supervised learning;non-stationary;transfer learning;distribution drift", "primary_area": "", "supplementary_material": "", "author": "Alessio Mazzetto;Eli Upfal", "authorids": "~Alessio_Mazzetto1;~Eli_Upfal1", "gender": "M;M", "homepage": "https://cs.brown.edu/~amazzett/;", "dblp": "239/8316.html;u/EliUpfal", "google_scholar": "FkZ0hSsAAAAJ;", "orcid": "0009-0006-5893-0915;", "linkedin": ";", "or_profile": "~Alessio_Mazzetto1;~Eli_Upfal1", "aff": "Brown University;Brown University", "aff_domain": "brown.edu;brown.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nmazzetto2023an,\ntitle={An Adaptive Algorithm for Learning with Unknown Distribution Drift},\nauthor={Alessio Mazzetto and Eli Upfal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=exiXmAfuDK}\n}", "github": "", "project": "", "reviewers": "ePCf;k7dA;RWUF;sYaX", "pdf_size": 357156, "rating": "4;4;4;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;3;2;3", "wc_summary": "34;47;47;270", "wc_strengths": "25;57;29;73", "wc_weaknesses": "9;239;170;238", "wc_questions": "91;69;37;19", "wc_limitations": "5;6;1;88", "wc_review": "164;418;284;688", "wc_reply_reviewers": "24;241;0;177", "wc_reply_authors": "0;517;0;188", "reply_reviewers": "1;2;0;2", "reply_authors": "1;2;1;2", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.5, 98.58118481738795 ], "wc_strengths_avg": [ 46.0, 19.87460691435179 ], "wc_weaknesses_avg": [ 164.0, 93.75766635321082 ], "wc_questions_avg": [ 54.0, 27.874719729532707 ], "wc_limitations_avg": [ 25.0, 36.42114770294862 ], "wc_review_avg": [ 388.5, 194.86597958597082 ], "wc_reply_reviewers_avg": [ 110.5, 101.4211516400795 ], "wc_reply_authors_avg": [ 176.25, 211.17335887843427 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2391377914258925266&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "brown.edu;brown.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Brown University", "aff_unique_dep": "", "aff_unique_url": "https://www.brown.edu", "aff_unique_abbr": "Brown", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Continual Learning for Instruction Following from Realtime Feedback", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70930", "id": "ez6Cb0ZGzG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/666cccc6376058e251315b4de7e085b9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ez6Cb0ZGzG", "openreview": "https://openreview.net/forum?id=ez6Cb0ZGzG", "poster": "/media/PosterPDFs/NeurIPS%202023/70930.png?t=1702236781.9610953", "slides": "https://nips.cc/virtual/2023/poster/70930", "video": "https://nips.cc/virtual/2023/poster/70930", "author_site": "Alane Suhr, Yoav Artzi", "tldr": "", "abstract": "We propose and deploy an approach to continually train an instruction-following agent from feedback provided by users during collaborative interactions. During interaction, human users instruct an agent using natural language, and provide realtime binary feedback as they observe the agent following their instructions. We design a contextual bandit learning approach, converting user feedback to immediate reward. We evaluate through thousands of human-agent interactions, demonstrating 15.4% absolute improvement in instruction execution accuracy over time. We also show our approach is robust to several design variations, and that the feedback signal is roughly equivalent to the learning signal of supervised demonstration data.", "keywords": "continual learning;interaction;instruction following;user feedback;natural language processing;language grounding;situated interaction;collaboration", "primary_area": "", "supplementary_material": "/attachment/060c12b55eb821626756dcc23c205a0c73a42f8d.zip", "author": "Alane Suhr;Yoav Artzi", "authorids": "~Alane_Suhr1;~Yoav_Artzi1", "gender": "Not Specified;", "homepage": "http://www.alanesuhr.com;", "dblp": "203/9306;", "google_scholar": "daslsUkAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Alane_Suhr1;~Yoav_Artzi1", "aff": "Allen Institute for Artificial Intelligence;", "aff_domain": "allenai.org;", "position": "Postdoc;", "bibtex": "@inproceedings{\nsuhr2023continual,\ntitle={Continual Learning for Instruction Following from Realtime Feedback},\nauthor={Alane Suhr and Yoav Artzi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ez6Cb0ZGzG}\n}", "github": "", "project": "", "reviewers": "4ZhD;zvAL;Q7Gw;d6RP;hEYa", "pdf_size": 1693125, "rating": "5;5;7;7;8", "confidence": "2;3;3;3;5", "soundness": "3;2;3;3;4", "novelty": "2;2;3;3;3", "presentation": "3;2;3;4;4", "wc_summary": "77;77;85;57;278", "wc_strengths": "45;35;114;78;115", "wc_weaknesses": "42;53;121;9;203", "wc_questions": "121;89;62;119;99", "wc_limitations": "11;24;34;7;45", "wc_review": "296;278;416;270;740", "wc_reply_reviewers": "26;941;26;28;32", "wc_reply_authors": "0;1000;0;0;0", "reply_reviewers": "1;4;1;1;1", "reply_authors": "1;4;1;1;1", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 114.8, 82.12283482686163 ], "wc_strengths_avg": [ 77.4, 33.46998655512129 ], "wc_weaknesses_avg": [ 85.6, 69.09008611950054 ], "wc_questions_avg": [ 98.0, 21.67025611293046 ], "wc_limitations_avg": [ 24.2, 14.133647795243801 ], "wc_review_avg": [ 400.0, 178.02022357024495 ], "wc_reply_reviewers_avg": [ 210.6, 365.2065716823836 ], "wc_reply_authors_avg": [ 200.0, 400.0 ], "reply_reviewers_avg": [ 1.6, 1.2000000000000002 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7824758900557374, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12412904110948102124&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "allenai.org;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Allen Institute for Artificial Intelligence", "aff_unique_dep": "", "aff_unique_url": "https://allenai.org", "aff_unique_abbr": "AI2", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "$\\texttt{TACO}$: Temporal Latent Action-Driven Contrastive Loss for Visual Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70929", "id": "ezCsMOy1w9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/96d00450ed65531ffe2996daed487536-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ezCsMOy1w9", "openreview": "https://openreview.net/forum?id=ezCsMOy1w9", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70929", "video": "https://nips.cc/virtual/2023/poster/70929", "author_site": "Ruijie Zheng, Xiyao Wang, Yanchao Sun, Shuang Ma, Jieyu Zhao, Huazhe Xu, Hal Daum\u00e9 III, Furong Huang", "tldr": "", "abstract": "Despite recent progress in reinforcement learning (RL) from raw pixel data, sample inefficiency continues to present a substantial obstacle. \nPrior works have attempted to address this challenge by creating self-supervised auxiliary tasks, aiming to enrich the agent's learned representations with control-relevant information for future state prediction.\nHowever, these objectives are often insufficient to learn representations that can represent the optimal policy or value function, and they often consider tasks with small, abstract discrete action spaces and thus overlook the importance of action representation learning in continuous control.\nIn this paper, we introduce $\\texttt{TACO}$: $\\textbf{T}$emporal $\\textbf{A}$ction-driven $\\textbf{CO}$ntrastive Learning, a simple yet powerful temporal contrastive learning approach that facilitates the concurrent acquisition of latent state and action representations for agents. \n$\\texttt{TACO}$ simultaneously learns a state and an action representation by optimizing the mutual information between representations of current states paired with action sequences and representations of the corresponding future states. \nTheoretically, $\\texttt{TACO}$ can be shown to learn state and action representations that encompass sufficient information for control, thereby improving sample efficiency.\nFor online RL, $\\texttt{TACO}$ achieves 40% performance boost after one million environment interaction steps on average across nine challenging visual continuous control tasks from Deepmind Control Suite. \nIn addition, we show that $\\texttt{TACO}$ can also serve as a plug-and-play module adding to existing offline visual RL methods to establish the new state-of-the-art performance for offline visual RL across offline datasets with varying quality.", "keywords": "Deep Reinforcement Learning;Visual Reinforcement Learning;Online Visual RL;Offline Visual RL;Action Representation", "primary_area": "", "supplementary_material": "", "author": "Ruijie Zheng;Xiyao Wang;Yanchao Sun;Shuang Ma;Jieyu Zhao;Huazhe Xu;Hal Daum\u00e9 III;Furong Huang", "authorids": "~Ruijie_Zheng1;~Xiyao_Wang1;~Yanchao_Sun1;~Shuang_Ma3;~Jieyu_Zhao1;~Huazhe_Xu1;~Hal_Daum\u00e9_III1;~Furong_Huang1", "gender": ";M;F;F;M;M;F;F", "homepage": "http://www.ruijiezheng.com;;https://ycsun2017.github.io/home/index.html;http://jyzhao.net/;http://hxu.rocks;http://hal3.name;https://furong-huang.com;https://www.shuangma.me/", "dblp": "294/8474;;132/6840;59/2379-1;164/9006;77/2856.html;72/8513;98/3906", "google_scholar": ";puVqfbwAAAAJ;bloBY_QAAAAJ;9VaGBCQAAAAJ;t9HPFawAAAAJ;PbEw81gAAAAJ;13yyuCcAAAAJ;IHPRZuMAAAAJ", "orcid": ";;0000-0002-1137-9939;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Ruijie_Zheng1;~Xiyao_Wang1;~Yanchao_Sun1;~Jieyu_Zhao1;~Huazhe_Xu1;~Hal_Daum\u00e9_III1;~Furong_Huang1;~shuang_ma1", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;Tsinghua University;Microsoft;University of Maryland;Microsoft", "aff_domain": "cs.umd.edu;umd.edu;umd.edu;umd.edu;tsinghua.edu.cn;microsoft.com;cs.umd.edu;microsoft.com", "position": "PhD student;PhD student;PhD student;Postdoc;Assistant Professor;Senior Principle Researcher;Assistant Professor;Senior Research Scientist", "bibtex": "@inproceedings{\nzheng2023texttttaco,\ntitle={\\${\\textbackslash}texttt\\{{TACO}\\}\\$: Temporal Latent Action-Driven Contrastive Loss for Visual Reinforcement Learning},\nauthor={Ruijie Zheng and Xiyao Wang and Yanchao Sun and Shuang Ma and Jieyu Zhao and Huazhe Xu and Hal Daum{\\'e} III and Furong Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ezCsMOy1w9}\n}", "github": "", "project": "", "reviewers": "oD4S;1TK9;Y2ee;gh9U", "pdf_size": 7422681, "rating": "5;6;7;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "82;70;81;30", "wc_strengths": "41;63;65;37", "wc_weaknesses": "119;86;105;159", "wc_questions": "166;101;66;179", "wc_limitations": "34;29;5;2", "wc_review": "442;349;322;407", "wc_reply_reviewers": "188;30;13;229", "wc_reply_authors": "174;41;50;55", "reply_reviewers": "2;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 65.75, 21.1704392963396 ], "wc_strengths_avg": [ 51.5, 12.599603168354152 ], "wc_weaknesses_avg": [ 117.25, 26.799020504488592 ], "wc_questions_avg": [ 128.0, 46.416591861100706 ], "wc_limitations_avg": [ 17.5, 14.150971698084906 ], "wc_review_avg": [ 380.0, 47.16460537309732 ], "wc_reply_reviewers_avg": [ 115.0, 94.80770010922109 ], "wc_reply_authors_avg": [ 80.0, 54.502293529722216 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10356159954672406174&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cs.umd.edu;umd.edu;umd.edu;umd.edu;tsinghua.edu.cn;microsoft.com;cs.umd.edu;microsoft.com", "author_num": 8, "aff_unique_index": "0;0;0;0;1;2;0;2", "aff_unique_norm": "University of Maryland;Tsinghua University;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www/umd.edu;https://www.tsinghua.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "UMD;THU;Microsoft", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "CROMA: Remote Sensing Representations with Contrastive Radar-Optical Masked Autoencoders", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70928", "id": "ezqI5WgGvY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/11822e84689e631615199db3b75cd0e4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ezqI5WgGvY", "openreview": "https://openreview.net/forum?id=ezqI5WgGvY", "poster": "/media/PosterPDFs/NeurIPS%202023/70928.png?t=1701375168.262021", "slides": "https://nips.cc/virtual/2023/poster/70928", "video": "https://nips.cc/virtual/2023/poster/70928", "author_site": "Anthony Fuller, Koreen Millard, James Green", "tldr": "", "abstract": "A vital and rapidly growing application, remote sensing offers vast yet sparsely labeled, spatially aligned multimodal data; this makes self-supervised learning algorithms invaluable. We present CROMA: a framework that combines contrastive and reconstruction self-supervised objectives to learn rich unimodal and multimodal representations. Our method separately encodes masked-out multispectral optical and synthetic aperture radar samples\u2014aligned in space and time\u2014and performs cross-modal contrastive learning. Another encoder fuses these sensors, producing joint multimodal encodings that are used to predict the masked patches via a lightweight decoder. We show that these objectives are complementary when leveraged on spatially aligned multimodal data. We also introduce X- and 2D-ALiBi, which spatially biases our cross- and self-attention matrices. These strategies improve representations and allow our models to effectively extrapolate to images up to $17.6\\times$ larger at test-time. CROMA outperforms the current SoTA multispectral model, evaluated on: four classification benchmarks\u2014finetuning (avg.$\\uparrow$ 1.8%), linear (avg.$\\uparrow$ 2.4%) and nonlinear (avg.$\\uparrow$ 1.4%) probing, $k$NN classification (avg.$\\uparrow$ 3.5%), and $K$-means clustering (avg.$\\uparrow$ 8.4%); and three segmentation benchmarks (avg.$\\uparrow$ 6.4%). CROMA\u2019s rich, optionally multimodal representations can be widely leveraged across remote sensing applications.", "keywords": "Remote Sensing;Earth Observation;Self-supervised learning;Multimodal", "primary_area": "", "supplementary_material": "/attachment/f09566c0624042fcbd055c178450c9b65c0bfe7e.zip", "author": "Anthony Fuller;Koreen Millard;James R Green", "authorids": "~Anthony_Fuller1;~Koreen_Millard1;~James_R_Green1", "gender": "M;F;M", "homepage": "https://antofuller.github.io/;;http://www.sce.carleton.ca/faculty/green", "dblp": "329/0632;196/9939;20/1472", "google_scholar": "https://scholar.google.ca/citations?user=9NvupxcAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=nmxbwm4AAAAJ", "orcid": "0000-0001-8187-5850;0000-0002-6346-0325;0000-0002-6039-2355", "linkedin": ";;", "or_profile": "~Anthony_Fuller1;~Koreen_Millard1;~James_R_Green1", "aff": "Carleton University;Carleton University;Carleton University", "aff_domain": "carleton.ca;carleton.ca;carleton.ca", "position": "MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nfuller2023croma,\ntitle={{CROMA}: Remote Sensing Representations with Contrastive Radar-Optical Masked Autoencoders},\nauthor={Anthony Fuller and Koreen Millard and James R Green},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ezqI5WgGvY}\n}", "github": "", "project": "", "reviewers": "vsHk;too9;iZ73;FExC", "pdf_size": 6813372, "rating": "3;4;6;7", "confidence": "5;5;5;5", "soundness": "2;2;3;4", "novelty": "1;2;2;3", "presentation": "1;2;4;4", "wc_summary": "55;27;90;161", "wc_strengths": "45;34;139;135", "wc_weaknesses": "47;190;216;6", "wc_questions": "92;1;452;36", "wc_limitations": "86;1;166;51", "wc_review": "325;253;1063;389", "wc_reply_reviewers": "47;132;95;101", "wc_reply_authors": "0;313;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 1.299038105676658 ], "wc_summary_avg": [ 83.25, 50.13170154702511 ], "wc_strengths_avg": [ 88.25, 48.92532575262019 ], "wc_weaknesses_avg": [ 114.75, 89.9037679966752 ], "wc_questions_avg": [ 145.25, 180.05190223932652 ], "wc_limitations_avg": [ 76.0, 60.10407640085654 ], "wc_review_avg": [ 507.5, 324.30656792609057 ], "wc_reply_reviewers_avg": [ 93.75, 30.425112982534674 ], "wc_reply_authors_avg": [ 78.25, 135.53297569226464 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10102623953764630905&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "carleton.ca;carleton.ca;carleton.ca", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carleton University", "aff_unique_dep": "", "aff_unique_url": "https://carleton.ca", "aff_unique_abbr": "Carleton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "HubRouter: Learning Global Routing via Hub Generation and Pin-hub Connection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70927", "id": "f0Jj3C3Pnp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f7f98663c516fceb582354ee2d9d274d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=f0Jj3C3Pnp", "openreview": "https://openreview.net/forum?id=f0Jj3C3Pnp", "poster": "/media/PosterPDFs/NeurIPS%202023/70927.png?t=1699945849.890369", "slides": "https://nips.cc/virtual/2023/poster/70927", "video": "https://nips.cc/virtual/2023/poster/70927", "author_site": "Xingbo Du, Chonghua Wang, Ruizhe Zhong, Junchi Yan", "tldr": "", "abstract": "Global Routing (GR) is a core yet time-consuming task in VLSI systems. It recently attracted efforts from the machine learning community, especially generative models, but they suffer from the non-connectivity of generated routes. We argue that the inherent non-connectivity can harm the advantage of its one-shot generation and has to be post-processed by traditional approaches. Thus, we propose a novel definition, called hub, which represents the key point in the route. Equipped with hubs, global routing is transferred from a pin-pin connection problem to a hub-pin connection problem. Specifically, to generate definitely-connected routes, this paper proposes a two-phase learning scheme named HubRouter, which includes 1) hub-generation phase: A condition-guided hub generator using deep generative models; 2) pin-hub-connection phase: An RSMT construction module that connects the hubs and pins using an actor-critic model. In the first phase, we incorporate typical generative models into a multi-task learning framework to perform hub generation and address the impact of sensitive noise points with stripe mask learning. During the second phase, HubRouter employs an actor-critic model to finish the routing, which is efficient and has very slight errors. Experiments on simulated and real-world global routing benchmarks are performed to show our approach's efficiency, particularly HubRouter outperforms the state-of-the-art generative global routing methods in wirelength, overflow, and running time. Moreover, HubRouter also shows strength in other applications, such as RSMT construction and interactive path replanning.", "keywords": "global routing;generative models", "primary_area": "", "supplementary_material": "", "author": "Xingbo Du;Chonghua Wang;Ruizhe Zhong;Junchi Yan", "authorids": "~Xingbo_Du1;~Chonghua_Wang1;~Ruizhe_Zhong1;~Junchi_Yan2", "gender": "M;M;M;M", "homepage": "https://deepopo.github.io;https://philipwangovo.github.io/;;http://thinklab.sjtu.edu.cn/", "dblp": "246/3079;171/1034;335/1752;60/7949.html", "google_scholar": "7NfbVboAAAAJ;;rE0M3EoAAAAJ;ga230VoAAAAJ", "orcid": "0000-0003-2390-8188;;;0000-0001-9639-7679", "linkedin": ";;;", "or_profile": "~Xingbo_Du1;~Chonghua_Wang1;~Ruizhe_Zhong1;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\ndu2023hubrouter,\ntitle={HubRouter: Learning Global Routing via Hub Generation and Pin-hub Connection},\nauthor={Xingbo Du and Chonghua Wang and Ruizhe Zhong and Junchi Yan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=f0Jj3C3Pnp}\n}", "github": "", "project": "", "reviewers": "2ZCa;Co1X;GN8b;Vi96", "pdf_size": 970533, "rating": "4;5;6;7", "confidence": "3;2;3;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;4;3", "wc_summary": "51;128;70;90", "wc_strengths": "18;73;130;129", "wc_weaknesses": "60;80;61;52", "wc_questions": "204;99;52;23", "wc_limitations": "24;1;43;1", "wc_review": "357;381;356;295", "wc_reply_reviewers": "0;10;58;14", "wc_reply_authors": "0;0;36;23", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 84.75, 28.525208149985513 ], "wc_strengths_avg": [ 87.5, 46.28444663167099 ], "wc_weaknesses_avg": [ 63.25, 10.280442597476044 ], "wc_questions_avg": [ 94.5, 68.79135120056881 ], "wc_limitations_avg": [ 17.25, 17.583728273605686 ], "wc_review_avg": [ 347.25, 31.78344694963087 ], "wc_reply_reviewers_avg": [ 20.5, 22.242976419535225 ], "wc_reply_authors_avg": [ 14.75, 15.449514555480375 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6324555320336758, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5932673220727684471&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Iterative Reachability Estimation for Safe Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70926", "id": "f2U4HCY8bg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dca63f2650fe9e88956c1b68440b8ee9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=f2U4HCY8bg", "openreview": "https://openreview.net/forum?id=f2U4HCY8bg", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70926", "video": "https://nips.cc/virtual/2023/poster/70926", "author_site": "Milan Ganai, Zheng Gong, Chenning Yu, Sylvia Herbert, Sicun Gao", "tldr": "", "abstract": "Ensuring safety is important for the practical deployment of reinforcement learning (RL). Various challenges must be addressed, such as handling stochasticity in the environments, providing rigorous guarantees of persistent state-wise safety satisfaction, and avoiding overly conservative behaviors that sacrifice performance. We propose a new framework, Reachability Estimation for Safe Policy Optimization (RESPO), for safety-constrained RL in general stochastic settings. In the feasible set where there exist violation-free policies, we optimize for rewards while maintaining persistent safety. Outside this feasible set, our optimization produces the safest behavior by guaranteeing entrance into the feasible set whenever possible with the least cumulative discounted violations. We introduce a class of algorithms using our novel reachability estimation function to optimize in our proposed framework and in similar frameworks such as those concurrently handling multiple hard and soft constraints. We theoretically establish that our algorithms almost surely converge to locally optimal policies of our safe optimization framework. We evaluate the proposed methods on a diverse suite of safe RL environments from Safety Gym, PyBullet, and MuJoCo, and show the benefits in improving both reward performance and safety compared with state-of-the-art baselines.", "keywords": "Constraints;Safety;Hamilton Jacobi Reachability;Deep Reinforcement Learning;Robotics", "primary_area": "", "supplementary_material": "/attachment/4aca5113f63ae76f5201818641898e3fc3d03f31.zip", "author": "Milan Ganai;Zheng Gong;Chenning Yu;Sylvia Lee Herbert;Sicun Gao", "authorids": "~Milan_Ganai1;~Zheng_Gong4;~Chenning_Yu1;~Sylvia_Lee_Herbert1;~Sicun_Gao1", "gender": ";M;;F;M", "homepage": "https://milanganai.github.io;;https://GitHub.com/rainorangelemon;https://sylviaherbert.com;", "dblp": ";;319/4367;192/3242;22/8296", "google_scholar": "LCMIfaQAAAAJ;;;;", "orcid": ";0000-0002-2358-4104;;0000-0002-3863-8945;", "linkedin": "milanganai/;;;;", "or_profile": "~Milan_Ganai1;~Zheng_Gong4;~Chenning_Yu1;~Sylvia_Lee_Herbert1;~Sicun_Gao1", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu;ucsd.edu;ucsd.edu", "position": "Undergrad student;PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nganai2023iterative,\ntitle={Iterative Reachability Estimation for Safe Reinforcement Learning},\nauthor={Milan Ganai and Zheng Gong and Chenning Yu and Sylvia Lee Herbert and Sicun Gao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=f2U4HCY8bg}\n}", "github": "", "project": "", "reviewers": "yyXc;teXi;YH1f;hXDB", "pdf_size": 6635511, "rating": "5;7;7;8", "confidence": "3;4;4;5", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "45;77;158;135", "wc_strengths": "19;60;44;170", "wc_weaknesses": "46;211;200;90", "wc_questions": "46;4;219;238", "wc_limitations": "167;6;6;7", "wc_review": "323;358;627;640", "wc_reply_reviewers": "0;23;52;24", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 103.75, 44.963179380466414 ], "wc_strengths_avg": [ 73.25, 57.73809401080018 ], "wc_weaknesses_avg": [ 136.75, 70.5952370914639 ], "wc_questions_avg": [ 126.75, 103.04701596844035 ], "wc_limitations_avg": [ 46.5, 69.57190524917368 ], "wc_review_avg": [ 487.0, 147.09350767454015 ], "wc_reply_reviewers_avg": [ 24.75, 18.430613120566555 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9733285267845754, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7466402188008752368&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "ucsd.edu;ucsd.edu;ucsd.edu;ucsd.edu;ucsd.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Privacy Auditing with One (1) Training Run", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70925", "id": "f38EY21lBw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9a6f6e0d6781d1cb8689192408946d73-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=f38EY21lBw", "openreview": "https://openreview.net/forum?id=f38EY21lBw", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70925", "video": "https://nips.cc/virtual/2023/poster/70925", "author_site": "Thomas Steinke, Milad Nasr, Matthew Jagielski", "tldr": "", "abstract": "We propose a scheme for auditing differentially private machine learning systems with a single training run. This exploits the parallelism of being able to add or remove multiple training examples independently. We analyze this using the connection between differential privacy and statistical generalization, which avoids the cost of group privacy. Our auditing scheme requires minimal assumptions about the algorithm and can be applied in the black-box or white-box setting. We demonstrate the effectiveness of our framework by applying it to DP-SGD, where we can achieve meaningful empirical privacy lower bounds by training only one model. In contrast, standard methods would require training hundreds of models.", "keywords": "Differential privacy;membership inference attacks;privacy auditing", "primary_area": "", "supplementary_material": "/attachment/6bf63f60c9f2255c824bb0672c728434fba2547f.pdf", "author": "Thomas Steinke;Milad Nasr;Matthew Jagielski", "authorids": "~Thomas_Steinke2;~Milad_Nasr2;~Matthew_Jagielski1", "gender": "M;;M", "homepage": "http://www.thomas-steinke.net/;https://people.cs.umass.edu/~milad/;https://jagielski.github.io/", "dblp": "https://dblp.uni-trier.de/pid/73/4025-2.html;;218/5156", "google_scholar": "kwnwhrgAAAAJ;k6-nvDAAAAAJ;_8rw_GMAAAAJ", "orcid": ";;", "linkedin": "thomas-steinke-2841248/;;", "or_profile": "~Thomas_Steinke2;~Milad_Nasr2;~Matthew_Jagielski1", "aff": "Google;Google;Google", "aff_domain": "google.com;google.com;google.com", "position": "Research Scientist;Researcher;Researcher", "bibtex": "@inproceedings{\nsteinke2023privacy,\ntitle={Privacy Auditing with One (1) Training Run},\nauthor={Thomas Steinke and Milad Nasr and Matthew Jagielski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=f38EY21lBw}\n}", "github": "", "project": "", "reviewers": "x3UA;icCj;ZoZd;D1TM;U5hR", "pdf_size": 415527, "rating": "7;7;7;7;9", "confidence": "3;4;4;3;4", "soundness": "4;4;4;4;4", "novelty": "3;3;3;3;4", "presentation": "4;4;4;3;4", "wc_summary": "261;26;214;61;192", "wc_strengths": "65;24;84;152;106", "wc_weaknesses": "199;95;67;72;6", "wc_questions": "155;84;151;29;12", "wc_limitations": "29;8;10;25;6", "wc_review": "709;237;526;339;322", "wc_reply_reviewers": "9;32;53;19;17", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.4, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_avg": [ 150.8, 91.07667099757215 ], "wc_strengths_avg": [ 86.2, 42.531870403263476 ], "wc_weaknesses_avg": [ 87.8, 62.91390943185775 ], "wc_questions_avg": [ 86.2, 59.522768752805845 ], "wc_limitations_avg": [ 15.6, 9.478396488858229 ], "wc_review_avg": [ 426.6, 169.8194335168976 ], "wc_reply_reviewers_avg": [ 26.0, 15.388307249337076 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 90, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13145620078238790847&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "google.com;google.com;google.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Collaborative Alignment of NLP Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70924", "id": "f39Q3JyoIi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7a8fa1382ea068f3f402b72081df16be-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=f39Q3JyoIi", "openreview": "https://openreview.net/forum?id=f39Q3JyoIi", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70924", "video": "https://nips.cc/virtual/2023/poster/70924", "author_site": "Fereshte Khani, Marco Tulio Ribeiro", "tldr": "", "abstract": "Despite substantial advancements, Natural Language Processing (NLP) models often require post-training adjustments to enforce business rules, rectify undesired behavior, and align with user values. \nThese adjustments involve operationalizing \"concepts\"\u2014dictating desired model responses to certain inputs. \nHowever, it's difficult for a single entity to enumerate and define all possible concepts, indicating a need for a multi-user, collaborative model alignment framework. \nMoreover, the exhaustive delineation of a concept is challenging, and an improper approach can create shortcuts or interfere with original data or other concepts.\n\nTo address these challenges, we introduce CoAlign, a framework that enables multi-user interaction with the model, thereby mitigating individual limitations. \nCoAlign aids users in operationalizing their concepts using Large Language Models, and relying on the principle that NLP models exhibit simpler behaviors in local regions. \nOur main insight is learning a \\emph{local} model for each concept, and a \\emph{global} model to integrate the original data with all concepts.\nWe then steer a large language model to generate instances within concept boundaries where local and global disagree.\nOur experiments show CoAlign is effective at helping multiple users operationalize concepts and avoid interference for a variety of scenarios, tasks, and models.", "keywords": "alignment;collaborative alignment;debugging;nlp;interference;multi-user interaction", "primary_area": "", "supplementary_material": "/attachment/a2a20fa6738cfd747760cf0bad7090a875409c30.pdf", "author": "Fereshte Khani;Marco Tulio Ribeiro", "authorids": "~Fereshte_Khani1;~Marco_Tulio_Ribeiro1", "gender": "F;M", "homepage": "https://people.stanford.edu/fereshte/;", "dblp": "129/2345;21/10105", "google_scholar": ";rmsIyGMAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Fereshte_Khani1;~Marco_Tulio_Ribeiro1", "aff": "Microsoft;Microsoft", "aff_domain": "microsoft.com;microsoft.com", "position": "Researcher;Researcher", "bibtex": "@inproceedings{\nkhani2023collaborative,\ntitle={Collaborative Alignment of {NLP} Models},\nauthor={Fereshte Khani and Marco Tulio Ribeiro},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=f39Q3JyoIi}\n}", "github": "", "project": "", "reviewers": "g41d;rcBm;q4QC;MmNy;11Xh", "pdf_size": 1998878, "rating": "5;6;6;6;6", "confidence": "5;3;1;4;1", "soundness": "2;3;3;2;2", "novelty": "2;3;3;3;3", "presentation": "2;3;3;2;3", "wc_summary": "95;59;112;106;126", "wc_strengths": "49;30;39;35;80", "wc_weaknesses": "286;84;21;99;96", "wc_questions": "315;35;7;239;1", "wc_limitations": "19;7;1;5;8", "wc_review": "764;215;180;484;311", "wc_reply_reviewers": "34;0;8;62;23", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 2.8, 1.6 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 99.6, 22.63271967749347 ], "wc_strengths_avg": [ 46.6, 17.828067758453244 ], "wc_weaknesses_avg": [ 117.2, 89.0289840445234 ], "wc_questions_avg": [ 119.4, 131.4071535343491 ], "wc_limitations_avg": [ 8.0, 6.0 ], "wc_review_avg": [ 390.8, 214.32442697928767 ], "wc_reply_reviewers_avg": [ 25.4, 21.75867643033464 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6875000000000002, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3384493412090281885&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "microsoft.com;microsoft.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "The Learnability of In-Context Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70923", "id": "f3JNQd7CHM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/73950f0eb4ac0925dc71ba2406893320-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=f3JNQd7CHM", "openreview": "https://openreview.net/forum?id=f3JNQd7CHM", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70923", "video": "https://nips.cc/virtual/2023/poster/70923", "author_site": "Noam Wies, Yoav Levine, Amnon Shashua", "tldr": "", "abstract": "In-context learning is a surprising and important phenomenon that emerged when modern language models were scaled to billions of learned parameters. \n Without modifying a large language model's weights, it can be tuned to perform various downstream natural language tasks simply by including concatenated training examples of these tasks in its input.\n Though disruptive for many practical applications of large language models, this emergent learning paradigm is not well understood from a theoretical perspective. In this paper, we propose a first-of-its-kind PAC based framework for in-context learnability, and use it to provide the first finite sample complexity results for the in-context learning setup.\n Our framework includes an initial pretraining phase, which fits a function to the pretraining distribution, and then a second in-context learning phase, which keeps this function constant and concatenates training examples of the downstream task in its input.\n We use our framework in order to prove that, under mild assumptions, when the pretraining distribution is a mixture of latent tasks (a model often considered for natural language pretraining), these tasks can be efficiently learned via in-context learning, even though the model's weights are unchanged and the input significantly diverges from the pretraining distribution.\n Our theoretical analysis reveals that in this setting, in-context learning is more about identifying the task than about learning it, a result which is in line with a series of recent empirical findings. \n We hope that the in-context learnability framework presented in this paper will facilitate future progress towards a deeper understanding of this important new learning paradigm.", "keywords": "in-context;PAC;language models;foundation models;LLMs", "primary_area": "", "supplementary_material": "", "author": "Noam Wies;Yoav Levine;Amnon Shashua", "authorids": "~Noam_Wies1;~Yoav_Levine1;~Amnon_Shashua1", "gender": "M;M;M", "homepage": ";;http://www.cs.huji.ac.il/~shashua/", "dblp": "236/6106;199/1895;47/1492", "google_scholar": "https://scholar.google.co.il/citations?user=FxlR8voAAAAJ;;https://scholar.google.com.tw/citations?user=dwi5wvYAAAAJ", "orcid": "0000-0002-1337-2298;;", "linkedin": "noam-wies-a5ab1663/;;", "or_profile": "~Noam_Wies1;~Yoav_Levine1;~Amnon_Shashua1", "aff": "Hebrew University of Jerusalem;AI21 Labs;Hebrew University, Hebrew University of Jerusalem", "aff_domain": "huji.ac.il;ai21.com;cs.huji.ac.il", "position": "PhD student;Principal Researcher;Professor", "bibtex": "@inproceedings{\nwies2023the,\ntitle={The Learnability of In-Context Learning},\nauthor={Noam Wies and Yoav Levine and Amnon Shashua},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=f3JNQd7CHM}\n}", "github": "", "project": "", "reviewers": "o2CM;86vc;xPBs;42uY;yYWi;xztc", "pdf_size": 306798, "rating": "5;6;6;6;6;6", "confidence": "3;3;3;3;3;3", "soundness": "2;3;3;3;3;2", "novelty": "3;3;3;3;3;2", "presentation": "3;3;3;2;3;3", "wc_summary": "116;68;314;114;98;29", "wc_strengths": "64;30;248;60;69;41", "wc_weaknesses": "106;27;461;97;197;188", "wc_questions": "338;32;1;38;16;4", "wc_limitations": "7;18;1;42;7;9", "wc_review": "631;175;1025;351;387;271", "wc_reply_reviewers": "54;0;0;0;0;0", "wc_reply_authors": "41;0;0;0;0;0", "reply_reviewers": "1;0;0;0;0;0", "reply_authors": "2;1;1;1;1;1", "rating_avg": [ 5.833333333333333, 0.372677996249965 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 123.16666666666667, 90.45886112236632 ], "wc_strengths_avg": [ 85.33333333333333, 73.99474455812536 ], "wc_weaknesses_avg": [ 179.33333333333334, 138.49508615430688 ], "wc_questions_avg": [ 71.5, 119.93852592057316 ], "wc_limitations_avg": [ 14.0, 13.490737563232042 ], "wc_review_avg": [ 473.3333333333333, 283.3005863428846 ], "wc_reply_reviewers_avg": [ 9.0, 20.12461179749811 ], "wc_reply_authors_avg": [ 6.833333333333333, 15.279797846248558 ], "reply_reviewers_avg": [ 0.16666666666666666, 0.372677996249965 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2471404133086445085&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "huji.ac.il;ai21.com;cs.huji.ac.il", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Hebrew University of Jerusalem;AI21 Labs", "aff_unique_dep": ";", "aff_unique_url": "https://www.huji.ac.il;https://www.ai21labs.com", "aff_unique_abbr": "HUJI;AI21", "aff_campus_unique_index": "0", "aff_campus_unique": "Jerusalem;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "title": "Norm-guided latent space exploration for text-to-image generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70922", "id": "f56xMRb7Vt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b49213694c3e752252d62ca360b72a36-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=f56xMRb7Vt", "openreview": "https://openreview.net/forum?id=f56xMRb7Vt", "poster": "/media/PosterPDFs/NeurIPS%202023/70922.png?t=1697614215.2485595", "slides": "https://nips.cc/virtual/2023/poster/70922", "video": "https://nips.cc/virtual/2023/poster/70922", "author_site": "Dvir Samuel, Rami Ben-Ari, Nir Darshan, Haggai Maron, Gal Chechik", "tldr": "", "abstract": "Text-to-image diffusion models show great potential in synthesizing a large variety of concepts in new compositions and scenarios. However, the latent space of initial seeds is still not well understood and its structure was shown to impact the generation of various concepts. Specifically, simple operations like interpolation and finding the centroid of a set of seeds perform poorly when using standard Euclidean or spherical metrics in the latent space. This paper makes the observation that, in current training procedures, diffusion models observed inputs with a narrow range of norm values. This has strong implications for methods that rely on seed manipulation for image generation, with applications to few-shot and long-tail learning tasks. To address this issue, we propose a novel method for interpolating between two seeds and demonstrate that it defines a new non-Euclidean metric that takes into account a norm-based prior on seeds. We describe a simple yet efficient algorithm for approximating this interpolation procedure and use it to further define centroids in the latent seed space. We show that our new interpolation and centroid techniques significantly enhance the generation of rare concept images. This further leads to state-of-the-art performance on few-shot and long-tail benchmarks, improving prior approaches in terms of generation speed, image quality, and semantic content.", "keywords": "diffusion models;few-shot learning;long-tail learning", "primary_area": "", "supplementary_material": "/attachment/1d5ce6f64506341cf4378550b3a78313980e6ff5.pdf", "author": "Dvir Samuel;Rami Ben-Ari;Nir Darshan;Haggai Maron;Gal Chechik", "authorids": "~Dvir_Samuel1;~Rami_Ben-Ari2;~Nir_Darshan1;~Haggai_Maron1;~Gal_Chechik1", "gender": "M;M;M;M;", "homepage": "https://chechiklab.biu.ac.il/~dvirsamuel/;http://www.benarirami.com/;;https://haggaim.github.io/;https://chechiklab.biu.ac.il/~gal/", "dblp": "262/3701;07/1624;;181/6629;c/GalChechik", "google_scholar": "_CWxQ1gAAAAJ;https://scholar.google.co.il/citations?user=C4i_vUMAAAAJ;;https://scholar.google.co.il/citations?user=4v8uJrIAAAAJ;Wk2gAZUAAAAJ", "orcid": ";;;;0000-0001-9164-5303", "linkedin": "dvir-samuel-b47859105/;;nirdarshan/;;", "or_profile": "~Dvir_Samuel1;~Rami_Ben-Ari2;~Nir_Darshan1;~Haggai_Maron1;~Gal_Chechik1", "aff": "Bar-Ilan University;Bar-Ilan University;OriginAI;NVIDIA;NVIDIA", "aff_domain": "biu.ac.il;biu.ac.il;originai.co;nvidia.com;nvidia.com", "position": "PhD student;Lecturer;Principal Researcher;Research Scientist;Principal Researcher", "bibtex": "@inproceedings{\nsamuel2023normguided,\ntitle={Norm-guided latent space exploration for text-to-image generation},\nauthor={Dvir Samuel and Rami Ben-Ari and Nir Darshan and Haggai Maron and Gal Chechik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=f56xMRb7Vt}\n}", "github": "", "project": "", "reviewers": "WuD2;qXwY;yJ89;6Fzu", "pdf_size": 12602486, "rating": "5;5;6;6", "confidence": "3;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "95;103;69;160", "wc_strengths": "51;54;106;101", "wc_weaknesses": "61;134;160;123", "wc_questions": "254;6;65;81", "wc_limitations": "10;5;15;65", "wc_review": "471;302;415;530", "wc_reply_reviewers": "142;20;66;75", "wc_reply_authors": "93;22;33;36", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 106.75, 33.214266513051285 ], "wc_strengths_avg": [ 78.0, 25.583197610932064 ], "wc_weaknesses_avg": [ 119.5, 36.34900273735168 ], "wc_questions_avg": [ 101.5, 92.37017917055266 ], "wc_limitations_avg": [ 23.75, 24.076700355322778 ], "wc_review_avg": [ 429.5, 84.09667056429761 ], "wc_reply_reviewers_avg": [ 75.75, 43.56819367382587 ], "wc_reply_authors_avg": [ 46.0, 27.631503759296198 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2074581715021803887&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "biu.ac.il;biu.ac.il;originai.co;nvidia.com;nvidia.com", "author_num": 5, "aff_unique_index": "0;0;1;2;2", "aff_unique_norm": "Bar-Ilan University;OriginAI;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.biu.ac.il;;https://www.nvidia.com", "aff_unique_abbr": "BIU;;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;2;2", "aff_country_unique": "Israel;;United States" }, { "title": "SA-Solver: Stochastic Adams Solver for Fast Sampling of Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70921", "id": "f6a9XVFYIo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f4a6806490d31216a3ba667eb240c897-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=f6a9XVFYIo", "openreview": "https://openreview.net/forum?id=f6a9XVFYIo", "poster": "/media/PosterPDFs/NeurIPS%202023/70921.png?t=1702311888.472825", "slides": "https://nips.cc/virtual/2023/poster/70921", "video": "https://nips.cc/virtual/2023/poster/70921", "author_site": "Shuchen Xue, Mingyang Yi, Weijian Luo, Shifeng Zhang, Jiacheng Sun, Zhenguo Li, Zhi-Ming Ma", "tldr": "", "abstract": "Diffusion Probabilistic Models (DPMs) have achieved considerable success in generation tasks. As sampling from DPMs is equivalent to solving diffusion SDE or ODE which is time-consuming, numerous fast sampling methods built upon improved differential equation solvers are proposed. The majority of such techniques consider solving the diffusion ODE due to its superior efficiency. However, stochastic sampling could offer additional advantages in generating diverse and high-quality data. In this work, we engage in a comprehensive analysis of stochastic sampling from two aspects: variance-controlled diffusion SDE and linear multi-step SDE solver. Based on our analysis, we propose SA-Solver, which is an improved efficient stochastic Adams method for solving diffusion SDE to generate data with high quality. Our experiments show that SA-Solver achieves: 1) improved or comparable performance compared with the existing state-of-the-art (SOTA) sampling methods for few-step sampling; 2) SOTA FID on substantial benchmark datasets under a suitable number of function evaluations (NFEs).", "keywords": "Diffusion Model Sampler;Multi-step SDE Solver", "primary_area": "", "supplementary_material": "/attachment/bee6c0763208b85b504aa092afe4f8d58b2fae75.pdf", "author": "Shuchen Xue;Mingyang Yi;Weijian Luo;Shifeng Zhang;Jiacheng Sun;Zhenguo Li;Zhi-Ming Ma", "authorids": "~Shuchen_Xue1;~Mingyang_Yi1;~Weijian_Luo1;~Shifeng_Zhang5;~Jiacheng_Sun1;~Zhenguo_Li1;~Zhi-Ming_Ma1", "gender": "M;M;;M;M;M;", "homepage": ";http://mingyangyi.github.io;;https://github.com/zsffq999;;http://www.ee.columbia.edu/~zgli/;http://homepage.amss.ac.cn/research/homePage/8eb59241e2e74d828fb84eec0efadba5/myHomePage.html", "dblp": "356/7258;;;;165/5350;23/6479;", "google_scholar": "aA70TOwAAAAJ;RlOZiPUAAAAJ;;;;XboZC1AAAAAJ;", "orcid": ";;;;;;", "linkedin": "https://linkedin.com/in/shuchen-xue/;;;;https://www.linkedin.cn/incareer/in/jiacheng-sun-ab622b131;;", "or_profile": "~Shuchen_Xue1;~Mingyang_Yi1;~Weijian_Luo1;~Shifeng_Zhang5;~Jiacheng_Sun1;~Zhenguo_Li1;~Zhi-Ming_Ma1", "aff": "Academy of Mathematics and Systems Science, Chinese Academy of Sciences;Huawei Noah's ark Lab;;Huawei Technologies Ltd.;Huawei Noah's Ark Lab;Huawei Noah's Ark Lab;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences", "aff_domain": "amss.ac.cn;huawei.com;;huawei.com;huawei.com;huawei.com;amss.ac.cn", "position": "PhD student;Researcher;;Researcher;Senior Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nxue2023sasolver,\ntitle={{SA}-Solver: Stochastic Adams Solver for Fast Sampling of Diffusion Models},\nauthor={Shuchen Xue and Mingyang Yi and Weijian Luo and Shifeng Zhang and Jiacheng Sun and Zhenguo Li and Zhi-Ming Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=f6a9XVFYIo}\n}", "github": "", "project": "", "reviewers": "VoWY;tWFq;vQfu;txgZ;zg3j;UcRr", "pdf_size": 28361053, "rating": "5;5;6;6;7;7", "confidence": "3;4;3;4;5;4", "soundness": "3;3;3;3;3;3", "novelty": "2;3;3;2;3;4", "presentation": "3;2;2;3;3;3", "wc_summary": "52;99;43;59;48;221", "wc_strengths": "58;51;45;91;96;50", "wc_weaknesses": "74;147;43;621;122;27", "wc_questions": "2;50;17;154;3;31", "wc_limitations": "1;29;17;1;6;1", "wc_review": "187;376;165;926;275;330", "wc_reply_reviewers": "0;0;0;0;9;103", "wc_reply_authors": "0;0;0;0;0;82", "reply_reviewers": "0;0;0;0;1;1", "reply_authors": "1;1;1;1;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.8333333333333335, 0.6871842709362768 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8333333333333335, 0.6871842709362768 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 87.0, 62.67109913402402 ], "wc_strengths_avg": [ 65.16666666666667, 20.440292452789308 ], "wc_weaknesses_avg": [ 172.33333333333334, 204.9371177269316 ], "wc_questions_avg": [ 42.833333333333336, 52.3940730320605 ], "wc_limitations_avg": [ 9.166666666666666, 10.526421783092086 ], "wc_review_avg": [ 376.5, 256.59874642458146 ], "wc_reply_reviewers_avg": [ 18.666666666666668, 37.85792152538517 ], "wc_reply_authors_avg": [ 13.666666666666666, 30.559595692497123 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5940885257860047, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16725213153874671176&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "amss.ac.cn;huawei.com;;huawei.com;huawei.com;huawei.com;amss.ac.cn", "author_num": 7, "aff_unique_index": "0;1;1;1;1;0", "aff_unique_norm": "Chinese Academy of Sciences;Huawei", "aff_unique_dep": "Academy of Mathematics and Systems Science;Noah's ark Lab", "aff_unique_url": "http://www.amss.cas.cn;https://www.huawei.com", "aff_unique_abbr": "AMSS;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Reward Finetuning for Faster and More Accurate Unsupervised Object Discovery", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70920", "id": "f6rQJ83ycb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2ab3163ee384cd46baa7f1abb2b1bf19-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=f6rQJ83ycb", "openreview": "https://openreview.net/forum?id=f6rQJ83ycb", "poster": "/media/PosterPDFs/NeurIPS%202023/70920.png?t=1701979038.8800392", "slides": "https://nips.cc/virtual/2023/poster/70920", "video": "https://nips.cc/virtual/2023/poster/70920", "author_site": "Katie Luo, Zhenzhen Liu, Xiangyu Chen, Yurong You, Sagie Benaim, Cheng Perng Phoo, Mark Campbell, Wen Sun, Bharath Hariharan, Kilian Weinberger", "tldr": "", "abstract": "Recent advances in machine learning have shown that Reinforcement Learning from Human Feedback (RLHF) can improve machine learning models and align them with human preferences. Although very successful for Large Language Models (LLMs), these advancements have not had a comparable impact in research for autonomous vehicles\u2014where alignment with human expectations can be imperative. In this paper, we propose to adapt similar RL-based methods to unsupervised object discovery, i.e. learning to detect objects from LiDAR points without any training labels. Instead of labels, we use simple heuristics to mimic human feedback. More explicitly, we combine multiple heuristics into a simple reward function that positively correlates its score with bounding box accuracy, i.e., boxes containing objects are scored higher than those without. We start from the detector\u2019s own predictions to explore the space and reinforce boxes with high rewards through gradient updates. Empirically, we demonstrate that our approach is not only more accurate, but also orders of magnitudes faster to train compared to prior works on object discovery. Code is available at https://github.com/katieluo88/DRIFT.", "keywords": "Self Driving;Self-Supervised Object Discovery;Reward Ranked Finetuning", "primary_area": "", "supplementary_material": "", "author": "Katie Z Luo;Zhenzhen Liu;Xiangyu Chen;Yurong You;Sagie Benaim;Cheng Perng Phoo;Mark Campbell;Wen Sun;Bharath Hariharan;Kilian Q Weinberger", "authorids": "~Katie_Z_Luo1;~Zhenzhen_Liu1;~Xiangyu_Chen1;~Yurong_You1;~Sagie_Benaim1;~Cheng_Perng_Phoo1;~Mark_Campbell1;~Wen_Sun1;~Bharath_Hariharan3;~Kilian_Q_Weinberger1", "gender": "F;;M;M;M;M;M;;M;M", "homepage": "https://www.cs.cornell.edu/~katieluo/;https://www.cs.cornell.edu/~zliu/;https://www.cs.cornell.edu/~xchen/;http://yurongyou.com;https://sagiebenaim.github.io/;https://cpphoo.github.io/;http://campbell.mae.cornell.edu;https://wensun.github.io;http://www.cs.cornell.edu/~kilian/;http://home.bharathh.info", "dblp": "207/8564;81/891;;199/1968;129/1316;226/0521;;;88/4801;05/8412", "google_scholar": "qlmK27YAAAAJ;;xBv-PMEAAAAJ;rdwkreIAAAAJ;-zSM2I8AAAAJ;kt9D2usAAAAJ;e1iAhHQAAAAJ;iOLC30YAAAAJ;jsxk8vsAAAAJ;TpglobcAAAAJ", "orcid": ";0000-0003-1083-8512;;;0000-0003-0002-3467;;;;0009-0008-9313-7239;", "linkedin": "katieluo;;;yurong-you/;sagie-benaim-aab47474/;;;;;", "or_profile": "~Katie_Z_Luo1;~Zhenzhen_Liu1;~Xiangyu_Chen1;~Yurong_You1;~Sagie_Benaim1;~Cheng_Perng_Phoo1;~Mark_Campbell1;~Wen_Sun1;~Kilian_Q_Weinberger1;~Bharath_Hariharan2", "aff": "Cornell University;Cornell University;Cornell University;Cornell University;University of Copenhagen;Cornell University;Cornell University;Cornell University;ASAPP Inc.;Cornell University", "aff_domain": "cornell.edu;cornell.edu;cornell.edu;cornell.edu;di.ku;cornell.edu;cornell.edu;cornell.edu;asapp.com;cornell.edu", "position": "PhD student;PhD student;PhD student;PhD student;Postdoc;PhD student;Full Professor;Assistant Professor;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nluo2023reward,\ntitle={Reward Finetuning for Faster and More Accurate Unsupervised Object Discovery},\nauthor={Katie Z Luo and Zhenzhen Liu and Xiangyu Chen and Yurong You and Sagie Benaim and Cheng Perng Phoo and Mark Campbell and Wen Sun and Bharath Hariharan and Kilian Q Weinberger},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=f6rQJ83ycb}\n}", "github": "", "project": "", "reviewers": "dARk;Baoq;vhou;ZfQG;iC9C", "pdf_size": 17750686, "rating": "4;6;6;6;6", "confidence": "4;3;4;4;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;4;3;4", "wc_summary": "49;115;71;106;112", "wc_strengths": "57;124;82;49;137", "wc_weaknesses": "43;175;208;98;137", "wc_questions": "373;53;4;69;362", "wc_limitations": "76;3;15;4;64", "wc_review": "598;470;380;326;812", "wc_reply_reviewers": "206;15;51;0;16", "wc_reply_authors": "595;0;173;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;1;2;1;1", "rating_avg": [ 5.6, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 90.6, 26.096743091811284 ], "wc_strengths_avg": [ 89.8, 35.21022578740443 ], "wc_weaknesses_avg": [ 132.2, 57.8390871297257 ], "wc_questions_avg": [ 172.2, 160.93153823909097 ], "wc_limitations_avg": [ 32.4, 31.219224846238575 ], "wc_review_avg": [ 517.2, 173.80724956111584 ], "wc_reply_reviewers_avg": [ 57.6, 76.06470929412667 ], "wc_reply_authors_avg": [ 153.6, 230.64656945205144 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11132457202807827925&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "cornell.edu;cornell.edu;cornell.edu;cornell.edu;di.ku;cornell.edu;cornell.edu;cornell.edu;asapp.com;cornell.edu", "author_num": 10, "aff_unique_index": "0;0;0;0;1;0;0;0;2;0", "aff_unique_norm": "Cornell University;University of Copenhagen;ASAPP Inc.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cornell.edu;https://www.ku.dk;https://www.asapp.com", "aff_unique_abbr": "Cornell;UCPH;ASAPP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0;0;0;0", "aff_country_unique": "United States;Denmark" }, { "title": "Provable convergence guarantees for black-box variational inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70919", "id": "f71xXsoG1v", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d0bcff6425bbf850ec87d5327a965db9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=f71xXsoG1v", "openreview": "https://openreview.net/forum?id=f71xXsoG1v", "poster": "/media/PosterPDFs/NeurIPS%202023/70919.png?t=1701114726.7652931", "slides": "https://nips.cc/virtual/2023/poster/70919", "video": "https://nips.cc/virtual/2023/poster/70919", "author_site": "Justin Domke, Robert Gower, Guillaume Garrigos", "tldr": "", "abstract": "Black-box variational inference is widely used in situations where there is no proof that its stochastic optimization succeeds. We suggest this is due to a theoretical gap in existing stochastic optimization proofs\u2014namely the challenge of gradient estimators with unusual noise bounds, and a composite non-smooth objective. For dense Gaussian variational families, we observe that existing gradient estimators based on reparameterization satisfy a quadratic noise bound and give novel convergence guarantees for proximal and projected stochastic gradient descent using this bound. This provides rigorous guarantees that methods similar to those used in practice converge on realistic inference problems.", "keywords": "optimization;variational inference", "primary_area": "", "supplementary_material": "", "author": "Justin Domke;Robert M. Gower;Guillaume Garrigos", "authorids": "~Justin_Domke1;~Robert_M._Gower1;~Guillaume_Garrigos1", "gender": "Unspecified;M;M", "homepage": "https://people.cs.umass.edu/~domke/;https://gowerrobert.github.io/;https://guillaume-garrigos.com/", "dblp": "39/5186;143/0056;", "google_scholar": ";okKw87MAAAAJ;DN0Cu0IAAAAJ", "orcid": ";;0000-0002-8613-5664", "linkedin": ";;", "or_profile": "~Justin_Domke1;~Robert_M._Gower1;~Guillaume_Garrigos1", "aff": "University of Massachusetts at Amherst;Flatiron Institute;Universit\u00e9 Paris Cit\u00e9", "aff_domain": "umass.edu;simonsfoundation.org;u-paris.fr", "position": "Associate Professor;Researcher;Assistant Professor", "bibtex": "@inproceedings{\ndomke2023provable,\ntitle={Provable convergence guarantees for black-box variational inference},\nauthor={Justin Domke and Robert M. Gower and Guillaume Garrigos},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=f71xXsoG1v}\n}", "github": "", "project": "", "reviewers": "TUBr;6P4Y;vBBM;biC8;gXey", "pdf_size": 525301, "rating": "5;6;7;7;8", "confidence": "3;4;3;2;4", "soundness": "3;3;3;3;4", "novelty": "3;3;4;2;3", "presentation": "3;3;3;3;4", "wc_summary": "106;87;59;64;79", "wc_strengths": "57;108;120;29;194", "wc_weaknesses": "187;74;336;112;441", "wc_questions": "30;2;82;32;70", "wc_limitations": "9;1;10;4;2", "wc_review": "389;272;607;241;786", "wc_reply_reviewers": "17;28;87;31;168", "wc_reply_authors": "0;0;0;0;375", "reply_reviewers": "1;1;1;1;2", "reply_authors": "1;1;1;1;4", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 79.0, 16.84042754801671 ], "wc_strengths_avg": [ 101.6, 56.88092826246773 ], "wc_weaknesses_avg": [ 230.0, 138.46732466542423 ], "wc_questions_avg": [ 43.2, 29.054431675735806 ], "wc_limitations_avg": [ 5.2, 3.655133376499413 ], "wc_review_avg": [ 459.0, 207.95480278175833 ], "wc_reply_reviewers_avg": [ 66.2, 56.41772771035714 ], "wc_reply_authors_avg": [ 75.0, 150.0 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.6, 1.2 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.10482848367219187, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18032645304607098848&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "umass.edu;simonsfoundation.org;u-paris.fr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Massachusetts Amherst;Flatiron Institute;Universit\u00e9 Paris Cit\u00e9", "aff_unique_dep": ";;", "aff_unique_url": "https://www.umass.edu;https://flatironinstitute.org;https://www.universite-paris.fr", "aff_unique_abbr": "UMass Amherst;Flatiron;UPC", "aff_campus_unique_index": "0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;France" }, { "title": "Learning Re-sampling Methods with Parameter Attribution for Image Super-resolution", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70918", "id": "f7wFwPJwBe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8434e0db3227276c00ef2b18c7f01c65-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=f7wFwPJwBe", "openreview": "https://openreview.net/forum?id=f7wFwPJwBe", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70918", "video": "https://nips.cc/virtual/2023/poster/70918", "author_site": "Xiaotong Luo, Yuan Xie, Yanyun Qu", "tldr": "", "abstract": "Single image super-resolution (SISR) has made a significant breakthrough benefiting from the prevalent rise of deep neural networks and large-scale training samples. The mainstream deep SR models primarily focus on network architecture design as well as optimization schemes, while few pay attention to the training data. In fact, most of the existing SR methods train the model on uniformly sampled patch pairs from the whole image. However, the uneven image content makes the training data present an unbalanced distribution, i.e., the easily reconstructed region (smooth) occupies the majority of the data, while the hard reconstructed region (edge or texture) has rarely few samples. Based on this phenomenon, we consider rethinking the current paradigm of merely using uniform data sampling way for training SR models. In this paper, we propose a simple yet effective Bi-Sampling Parameter Attribution (BSPA) method for accurate image SR. Specifically, the bi-sampling consists of uniform sampling and inverse sampling, which is introduced to reconcile the unbalanced inherent data bias. The former aims to keep the intrinsic data distribution, and the latter is designed to enhance the feature extraction ability of the model on the hard samples. Moreover, integrated gradient is introduced to attribute the contribution of each parameter in the alternate models trained by both sampling data so as to filter the trivial parameters for further dynamic refinement. By progressively decoupling the allocation of parameters, the SR model can learn a more compact representation. Extensive experiments on publicly available datasets demonstrate that our proposal can effectively boost the performance of baseline methods from the data re-sampling view.", "keywords": "image super-resolution;long-tail distribution;re-sampling;integrated gradient", "primary_area": "", "supplementary_material": "", "author": "Xiaotong Luo;Yuan Xie;Yanyun Qu", "authorids": "~Xiaotong_Luo3;~Yuan_Xie4;~Yanyun_Qu1", "gender": ";;F", "homepage": ";;http://quyanyun.xmu.edu.cn", "dblp": ";;03/3500", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Xiaotong_Luo3;~Yuan_Xie4;~Yanyun_Qu1", "aff": ";;Xiamen University", "aff_domain": ";;xmu.edu.cn", "position": ";;Full Professor", "bibtex": "@inproceedings{\nluo2023learning,\ntitle={Learning Re-sampling Methods with Parameter Attribution for Image Super-resolution},\nauthor={Xiaotong Luo and Yuan Xie and Yanyun Qu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=f7wFwPJwBe}\n}", "github": "", "project": "", "reviewers": "ehLM;4W2B;tDMi;xmJz;zt8x", "pdf_size": 8155070, "rating": "5;5;5;6;6", "confidence": "4;5;4;5;5", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;2;3;3", "wc_summary": "90;55;64;59;168", "wc_strengths": "100;47;63;27;115", "wc_weaknesses": "162;73;162;10;173", "wc_questions": "152;68;100;78;2", "wc_limitations": "31;4;33;31;7", "wc_review": "535;247;422;205;465", "wc_reply_reviewers": "94;134;17;10;30", "wc_reply_authors": "232;329;39;39;57", "reply_reviewers": "1;2;1;1;1", "reply_authors": "3;4;3;3;3", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 87.2, 42.206160687747946 ], "wc_strengths_avg": [ 70.4, 32.71452276894774 ], "wc_weaknesses_avg": [ 116.0, 64.1342342279067 ], "wc_questions_avg": [ 80.0, 48.61275552774189 ], "wc_limitations_avg": [ 21.2, 12.874781551544865 ], "wc_review_avg": [ 374.8, 127.43060856795749 ], "wc_reply_reviewers_avg": [ 57.0, 48.65387959865071 ], "wc_reply_authors_avg": [ 139.2, 119.55985948469494 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 3.2, 0.39999999999999997 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6666666666666665, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1538927362152403714&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";;xmu.edu.cn", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Xiamen University", "aff_unique_dep": "", "aff_unique_url": "https://www.xmu.edu.cn", "aff_unique_abbr": "XMU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Sequential Memory with Temporal Predictive Coding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70917", "id": "f8zIs2IB6Q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8a8b9c7f979e8819a7986b3ef825c08a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=f8zIs2IB6Q", "openreview": "https://openreview.net/forum?id=f8zIs2IB6Q", "poster": "/media/PosterPDFs/NeurIPS%202023/70917.png?t=1701401358.5938063", "slides": "https://nips.cc/virtual/2023/poster/70917", "video": "https://nips.cc/virtual/2023/poster/70917", "author_site": "Mufeng Tang, Helen Barron, Rafal Bogacz", "tldr": "", "abstract": "Forming accurate memory of sequential stimuli is a fundamental function of biological agents. However, the computational mechanism underlying sequential memory in the brain remains unclear. Inspired by neuroscience theories and recent successes in applying predictive coding (PC) to \\emph{static} memory tasks, in this work we propose a novel PC-based model for \\emph{sequential} memory, called \\emph{temporal predictive coding} (tPC). We show that our tPC models can memorize and retrieve sequential inputs accurately with a biologically plausible neural implementation. Importantly, our analytical study reveals that tPC can be viewed as a classical Asymmetric Hopfield Network (AHN) with an implicit statistical whitening process, which leads to more stable performance in sequential memory tasks of structured inputs. Moreover, we find that tPC exhibits properties consistent with behavioral observations and theories in neuroscience, thereby strengthening its biological relevance. Our work establishes a possible computational mechanism underlying sequential memory in the brain that can also be theoretically interpreted using existing memory model frameworks.", "keywords": "Predictive coding;sequential memory;hippocampus", "primary_area": "", "supplementary_material": "/attachment/7b2b5a48eeeb922d5b882a3cddfce680380ca3a8.pdf", "author": "Mufeng Tang;Helen Barron;Rafal Bogacz", "authorids": "~Mufeng_Tang1;helen.barron@merton.ox.ac.uk;~Rafal_Bogacz1", "gender": "M;;", "homepage": "https://c16mftang.github.io/;;", "dblp": ";;46/45", "google_scholar": "eFyyxQkAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Mufeng_Tang1;helen.barron@merton.ox.ac.uk;~Rafal_Bogacz1", "aff": "University of Oxford;;University of Oxford", "aff_domain": "ox.ac.uk;;ox.ac.uk", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\ntang2023sequential,\ntitle={Sequential Memory with Temporal Predictive Coding},\nauthor={Mufeng Tang and Helen Barron and Rafal Bogacz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=f8zIs2IB6Q}\n}", "github": "", "project": "", "reviewers": "zZJv;EgZ5;SUeD;vgoV", "pdf_size": 3855656, "rating": "3;6;7;7", "confidence": "4;3;4;4", "soundness": "2;3;3;4", "novelty": "1;2;3;3", "presentation": "2;3;3;3", "wc_summary": "13;63;99;57", "wc_strengths": "24;93;59;80", "wc_weaknesses": "224;55;121;63", "wc_questions": "2;105;41;43", "wc_limitations": "2;13;9;141", "wc_review": "265;329;329;384", "wc_reply_reviewers": "10;20;39;48", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 58.0, 30.54504869860253 ], "wc_strengths_avg": [ 64.0, 26.086394921491163 ], "wc_weaknesses_avg": [ 115.75, 67.48842493346544 ], "wc_questions_avg": [ 47.75, 36.873940662749895 ], "wc_limitations_avg": [ 41.25, 57.72510285828861 ], "wc_review_avg": [ 326.75, 42.13297402272951 ], "wc_reply_reviewers_avg": [ 29.25, 15.022899187573616 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.08804509063256237, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6208758201609125430&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "ox.ac.uk;;ox.ac.uk", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Diffusion Model is an Effective Planner and Data Synthesizer for Multi-Task Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70916", "id": "fAdMly4ki5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ccda3c632cc8590ee60ca5ba226a4c30-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fAdMly4ki5", "openreview": "https://openreview.net/forum?id=fAdMly4ki5", "poster": "/media/PosterPDFs/NeurIPS%202023/70916.png?t=1701324953.5735795", "slides": "https://nips.cc/virtual/2023/poster/70916", "video": "https://nips.cc/virtual/2023/poster/70916", "author_site": "Haoran He, Chenjia Bai, Kang Xu, Zhuoran Yang, Weinan Zhang, Dong Wang, Bin Zhao, Xuelong Li", "tldr": "", "abstract": "Diffusion models have demonstrated highly-expressive generative capabilities in vision and NLP. Recent studies in reinforcement learning (RL) have shown that diffusion models are also powerful in modeling complex policies or trajectories in offline datasets. However, these works have been limited to single-task settings where a generalist agent capable of addressing multi-task predicaments is absent. In this paper, we aim to investigate the effectiveness of a single diffusion model in modeling large-scale multi-task offline data, which can be challenging due to diverse and multimodal data distribution. Specifically, we propose Multi-Task Diffusion Model (\\textsc{MTDiff}), a diffusion-based method that incorporates Transformer backbones and prompt learning for generative planning and data synthesis in multi-task offline settings. \\textsc{MTDiff} leverages vast amounts of knowledge available in multi-task data and performs implicit knowledge sharing among tasks. For generative planning, we find \\textsc{MTDiff} outperforms state-of-the-art algorithms across 50 tasks on Meta-World and 8 maps on Maze2D. For data synthesis, \\textsc{MTDiff} generates high-quality data for testing tasks given a single demonstration as a prompt, which enhances the low-quality datasets for even unseen tasks.", "keywords": "multi-task reinforcement learning;diffusion models;planning;data synthesis", "primary_area": "", "supplementary_material": "/attachment/732733e5eabc83ac68ac8ece3f48ec6bf11a03bf.zip", "author": "Haoran He;Chenjia Bai;Kang Xu;Zhuoran Yang;Weinan Zhang;Dong Wang;Bin Zhao;Xuelong Li", "authorids": "~Haoran_He1;~Chenjia_Bai2;~Kang_Xu2;~Zhuoran_Yang1;~Weinan_Zhang1;~Dong_Wang1;~Bin_Zhao7;~Xuelong_Li2", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://tinnerhrhe.github.io/;https://baichenjia.github.io/;https://kangxu023.github.io/;https://zhuoranyang.github.io/;http://wnzhang.net;https://redwang.github.io/;https://iopen.nwpu.edu.cn/info/1347/2105.htm;", "dblp": "299/7312;247/1943;295/1622;;28/10261-1;40/3934-28;73/4325-1.html;l/XuelongLi", "google_scholar": "Z33PHQ0AAAAJ;Rm_1y2kAAAAJ;7FTLsHUAAAAJ;;Qzss0GEAAAAJ;dasL9V4AAAAJ;https://scholar.google.com.hk/citations?user=DQB0hqwAAAAJ;ahUibskAAAAJ", "orcid": "0000-0002-7340-8643;;0000-0001-6040-3002;;0000-0002-0127-2425;;;", "linkedin": ";;;;;;;", "or_profile": "~Haoran_He1;~Chenjia_Bai2;~Kang_Xu2;~Zhuoran_Yang1;~Weinan_Zhang1;~Dong_Wang1;~Bin_Zhao7;~Xuelong_Li2", "aff": "Shanghai Jiaotong University;Shanghai AI Laboratory;Fudan University;Yale University;Shanghai Jiaotong University;Shanghai AI Laboratory;Northwest Polytechnical University Xi'an;Northwestern Polytechnical University", "aff_domain": "sjtu.edu.cn;pjlab.org.cn;fudan.edu.cn;yale.edu;sjtu.edu.cn;pjlab.org.cn;nwpu.edu.cn;nwpu.edu.cn", "position": "Undergrad student;Researcher;MS student;Assistant Professor;Associate Professor;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nhe2023diffusion,\ntitle={Diffusion Model is an Effective Planner and Data Synthesizer for Multi-Task Reinforcement Learning},\nauthor={Haoran He and Chenjia Bai and Kang Xu and Zhuoran Yang and Weinan Zhang and Dong Wang and Bin Zhao and Xuelong Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fAdMly4ki5}\n}", "github": "", "project": "", "reviewers": "nEaR;UScg;EBNs;gBBv;8zHR", "pdf_size": 3021505, "rating": "6;6;6;7;7", "confidence": "5;5;3;4;4", "soundness": "2;3;3;3;3", "novelty": "3;3;3;4;3", "presentation": "3;3;3;4;2", "wc_summary": "59;102;73;39;42", "wc_strengths": "61;49;14;30;42", "wc_weaknesses": "243;5;127;23;54", "wc_questions": "78;134;3;40;25", "wc_limitations": "13;4;1;20;1", "wc_review": "454;294;218;152;164", "wc_reply_reviewers": "18;6;20;16;9", "wc_reply_authors": "42;57;55;26;26", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 63.0, 23.03909720453473 ], "wc_strengths_avg": [ 39.2, 16.117071694324625 ], "wc_weaknesses_avg": [ 90.4, 86.93353783207031 ], "wc_questions_avg": [ 56.0, 46.030424721047275 ], "wc_limitations_avg": [ 7.8, 7.520638270785265 ], "wc_review_avg": [ 256.4, 110.78916914572471 ], "wc_reply_reviewers_avg": [ 13.8, 5.3814496188294845 ], "wc_reply_authors_avg": [ 41.2, 13.43726162579266 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.2182178902359924, "gs_citation": 100, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17610242278238607847&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "sjtu.edu.cn;pjlab.org.cn;fudan.edu.cn;yale.edu;sjtu.edu.cn;pjlab.org.cn;nwpu.edu.cn;nwpu.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;3;0;1;4;5", "aff_unique_norm": "Shanghai Jiao Tong University;Shanghai AI Laboratory;Fudan University;Yale University;Northwest Polytechnical University;Northwestern Polytechnical University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.shanghai-ai-lab.com;https://www.fudan.edu.cn;https://www.yale.edu;http://www.nwpu.edu.cn;https://www.nwpu.edu.cn", "aff_unique_abbr": "SJTU;SAIL;Fudan;Yale;NWPU;NWPU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0;0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Switching Autoregressive Low-rank Tensor Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70915", "id": "fFJThJ94rY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b4e3fea367538ea6b1b5ba6ebf5c39a8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fFJThJ94rY", "openreview": "https://openreview.net/forum?id=fFJThJ94rY", "poster": "/media/PosterPDFs/NeurIPS%202023/70915.png?t=1701992885.7652607", "slides": "https://nips.cc/virtual/2023/poster/70915", "video": "https://nips.cc/virtual/2023/poster/70915", "author_site": "Hyun Dong Lee, Andrew Warrington, Joshua Glaser, Scott Linderman", "tldr": "", "abstract": "An important problem in time-series analysis is modeling systems with time-varying dynamics. Probabilistic models with joint continuous and discrete latent states offer interpretable, efficient, and experimentally useful descriptions of such data. Commonly used models include autoregressive hidden Markov models (ARHMMs) and switching linear dynamical systems (SLDSs), each with its own advantages and disadvantages. ARHMMs permit exact inference and easy parameter estimation, but are parameter intensive when modeling long dependencies, and hence are prone to overfitting. In contrast, SLDSs can capture long-range dependencies in a parameter efficient way through Markovian latent dynamics, but present an intractable likelihood and a challenging parameter estimation task. In this paper, we propose _switching autoregressive low-rank tensor_ SALT models, which retain the advantages of both approaches while ameliorating the weaknesses. SALT parameterizes the tensor of an ARHMM with a low-rank factorization to control the number of parameters and allow longer range dependencies without overfitting. We prove theoretical and discuss practical connections between SALT, linear dynamical systems, and SLDSs. We empirically demonstrate quantitative advantages of SALT models on a range of simulated and real prediction tasks, including behavioral and neural datasets. Furthermore, the learned low-rank tensor provides novel insights into temporal dependencies within each discrete state.", "keywords": "switching;autoregressive;low-rank tensor;time-series;probabilistic;neural;neuroscience;behavioral;arhmm;slds", "primary_area": "", "supplementary_material": "/attachment/f93328dcb74f56c612d1576cbb4c6866a448049d.zip", "author": "Hyun Dong Lee;Andrew Warrington;Joshua I Glaser;Scott Linderman", "authorids": "~Hyun_Dong_Lee1;~Andrew_Warrington2;~Joshua_I_Glaser1;~Scott_Linderman1", "gender": "M;M;M;M", "homepage": ";;https://glaserlab.github.io/;https://web.stanford.edu/~swl1/", "dblp": "249/5364;207/8575;https://dblp.uni-trier.de/pers/hd/g/Glaser:Joshua_I=;142/2484", "google_scholar": "SWPcr1cAAAAJ;https://scholar.google.co.uk/citations?hl=en;tbfWCDgAAAAJ;6mD3I24AAAAJ", "orcid": ";;;", "linkedin": "hyundongleee/;;;", "or_profile": "~Hyun_Dong_Lee1;~Andrew_Warrington2;~Joshua_I_Glaser1;~Scott_W_Linderman1", "aff": "Stanford University;Stanford University;Northwestern University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;northwestern.edu;stanford.edu", "position": "PhD student;Postdoc;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nlee2023switching,\ntitle={Switching Autoregressive Low-rank Tensor Models},\nauthor={Hyun Dong Lee and Andrew Warrington and Joshua I Glaser and Scott Linderman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fFJThJ94rY}\n}", "github": "", "project": "", "reviewers": "BTJL;sUaQ;6MVR;4hZ1", "pdf_size": 7767179, "rating": "4;6;7;8", "confidence": "4;3;4;3", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "86;75;224;91", "wc_strengths": "132;175;27;154", "wc_weaknesses": "327;121;376;60", "wc_questions": "125;118;120;108", "wc_limitations": "30;196;19;47", "wc_review": "700;685;766;460", "wc_reply_reviewers": "83;0;271;18", "wc_reply_authors": "234;0;183;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 119.0, 60.897454790820284 ], "wc_strengths_avg": [ 122.0, 56.9166056612655 ], "wc_weaknesses_avg": [ 221.0, 133.39977511225422 ], "wc_questions_avg": [ 117.75, 6.179603547154137 ], "wc_limitations_avg": [ 73.0, 71.71122645722913 ], "wc_review_avg": [ 652.75, 115.38062012313853 ], "wc_reply_reviewers_avg": [ 93.0, 107.30563824888233 ], "wc_reply_authors_avg": [ 104.25, 105.79786150957872 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.50709255283711, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=731210509895312516&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "stanford.edu;stanford.edu;northwestern.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Stanford University;Northwestern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.northwestern.edu", "aff_unique_abbr": "Stanford;NU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Calibrated Stackelberg Games: Learning Optimal Commitments Against Calibrated Agents", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70914", "id": "fHsBNNDroC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c23ccf9eedf87e4380e92b75b24955bb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fHsBNNDroC", "openreview": "https://openreview.net/forum?id=fHsBNNDroC", "poster": "/media/PosterPDFs/NeurIPS%202023/70914.png?t=1702365220.9764235", "slides": "https://nips.cc/virtual/2023/poster/70914", "video": "https://nips.cc/virtual/2023/poster/70914", "author_site": "Nika Haghtalab, Chara Podimata, Kunhe Yang", "tldr": "", "abstract": "In this paper, we introduce a generalization of the standard Stackelberg Games (SGs) framework: _Calibrated Stackelberg Games_. In CSGs, a principal repeatedly interacts with an agent who (contrary to standard SGs) does not have direct access to the principal's action but instead best responds to _calibrated forecasts_ about it. CSG is a powerful modeling tool that goes beyond assuming that agents use ad hoc and highly specified algorithms for interacting in strategic settings to infer the principal's actions and thus more robustly addresses real-life applications that SGs were originally intended to capture. Along with CSGs, we also introduce a stronger notion of calibration, termed _adaptive calibration_, that provides fine-grained any-time calibration guarantees against adversarial sequences. We give a general approach for obtaining adaptive calibration algorithms and specialize them for finite CSGs. In our main technical result, we show that in CSGs, the principal can achieve utility that converges to the optimum Stackelberg value of the game both in _finite_ and _continuous_ settings and that no higher utility is achievable. Two prominent and immediate applications of our results are the settings of learning in Stackelberg Security Games and strategic classification, both against _calibrated_ agents.", "keywords": "calibration;Stackelberg games;learning in repeated games;strategic agents;best response;strategic classification;Stackelberg Security Games", "primary_area": "", "supplementary_material": "/attachment/6dc032bc85599f6c29d4d3d51d66c22142f8af56.pdf", "author": "Nika Haghtalab;Chara Podimata;Kunhe Yang", "authorids": "~Nika_Haghtalab2;~Chara_Podimata1;~Kunhe_Yang1", "gender": "F;F;F", "homepage": "https://people.eecs.berkeley.edu/~nika/;https://www.charapodimata.com/;https://kunheyang.com/", "dblp": ";209/9752;267/5467", "google_scholar": ";XY9hKvIAAAAJ;-j0q9B4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Nika_Haghtalab2;~Chara_Podimata1;~Kunhe_Yang1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu", "position": "Assistant Professor;Postdoc;PhD student", "bibtex": "@inproceedings{\nhaghtalab2023calibrated,\ntitle={Calibrated Stackelberg Games: Learning Optimal Commitments Against Calibrated Agents},\nauthor={Nika Haghtalab and Chara Podimata and Kunhe Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fHsBNNDroC}\n}", "github": "", "project": "", "reviewers": "M2eA;j8Y8;Crqu", "pdf_size": 1627783, "rating": "6;7;7", "confidence": "3;3;3", "soundness": "3;4;3", "novelty": "3;3;3", "presentation": "2;4;2", "wc_summary": "258;141;170", "wc_strengths": "106;118;221", "wc_weaknesses": "58;142;382", "wc_questions": "18;43;117", "wc_limitations": "1;20;1", "wc_review": "441;464;891", "wc_reply_reviewers": "14;56;87", "wc_reply_authors": "0;36;15", "reply_reviewers": "1;1;1", "reply_authors": "1;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 189.66666666666666, 49.74825513411389 ], "wc_strengths_avg": [ 148.33333333333334, 51.61610429141493 ], "wc_weaknesses_avg": [ 194.0, 137.2880184138441 ], "wc_questions_avg": [ 59.333333333333336, 42.0343774652235 ], "wc_limitations_avg": [ 7.333333333333333, 8.956685895029603 ], "wc_review_avg": [ 598.6666666666666, 206.9240332961726 ], "wc_reply_reviewers_avg": [ 52.333333333333336, 29.914693528246094 ], "wc_reply_authors_avg": [ 17.0, 14.7648230602334 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14351130940135919798&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Entropic Neural Optimal Transport via Diffusion Processes", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70913", "id": "fHyLsfMDIs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eeac51414a11484d048432f614d5bb1b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fHyLsfMDIs", "openreview": "https://openreview.net/forum?id=fHyLsfMDIs", "poster": "/media/PosterPDFs/NeurIPS%202023/70913.png?t=1701789012.1486754", "slides": "https://nips.cc/virtual/2023/poster/70913", "video": "https://nips.cc/virtual/2023/poster/70913", "author_site": "Nikita Gushchin, Alexander Kolesov, Alexander Korotin, Dmitry Vetrov, Evgeny Burnaev", "tldr": "", "abstract": "We propose a novel neural algorithm for the fundamental problem of computing the entropic optimal transport (EOT) plan between probability distributions which are accessible by samples. Our algorithm is based on the saddle point reformulation of the dynamic version of EOT which is known as the Schr\u00f6dinger Bridge problem. In contrast to the prior methods for large-scale EOT, our algorithm is end-to-end and consists of a single learning step, has fast inference procedure, and allows handling small values of the entropy regularization coefficient which is of particular importance in some applied problems. Empirically, we show the performance of the method on several large-scale EOT tasks. The code for the ENOT solver can be found at https://github.com/ngushchin/EntropicNeuralOptimalTransport", "keywords": "Optimal transport;Schr\u00f6dinger Bridge;Entropy regularized OT;Neural Networks;Unpaired Learning", "primary_area": "", "supplementary_material": "/attachment/8ab8b0283f285f8bfaa4649ddd8841ee9ce9ac57.pdf", "author": "Nikita Gushchin;Alexander Kolesov;Alexander Korotin;Dmitry P. Vetrov;Evgeny Burnaev", "authorids": "~Nikita_Gushchin1;~Alexander_Kolesov1;~Alexander_Korotin2;~Dmitry_P._Vetrov1;~Evgeny_Burnaev1", "gender": "M;M;M;M;M", "homepage": ";https://github.com/Kolessov;https://constructor.university/faculty-member/dmitry-vetrov;http://faculty.skoltech.ru/people/evgenyburnaev;https://akorotin.netlify.app", "dblp": "332/1999;287/4380;89/3348;144/7845;209/9906", "google_scholar": "UaRTbNoAAAAJ;WyAI_wUAAAAJ;https://scholar.google.ru/citations?user=7HU0UoUAAAAJ;https://scholar.google.ru/citations?user=pCRdcOwAAAAJ;https://scholar.google.ru/citations?user=1rIIvjAAAAAJ", "orcid": ";;;0000-0001-8424-0690;0000-0003-4286-925X", "linkedin": "nikita-gushchin-937522145/;;;;", "or_profile": "~Nikita_Gushchin1;~Alexander_Kolesov1;~Dmitry_P._Vetrov1;~Evgeny_Burnaev1;~Alexander_Andreevich_Korotin1", "aff": "Skolkovo Institute of Science and Technology;The Skolkovo Institute of Science and Technology;National Research University Higher School of Economics;Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology", "aff_domain": "skoltech.ru;skoltech.ru;hse.ru;skoltech.ru;skoltech.ru", "position": "PhD student;PhD student;Full Professor;Full Professor;Head of Research Group", "bibtex": "@inproceedings{\ngushchin2023entropic,\ntitle={Entropic Neural Optimal Transport via Diffusion Processes},\nauthor={Nikita Gushchin and Alexander Kolesov and Alexander Korotin and Dmitry P. Vetrov and Evgeny Burnaev},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fHyLsfMDIs}\n}", "github": "", "project": "", "reviewers": "KyaP;TSgQ;WYHi;LQN6", "pdf_size": 9298226, "rating": "7;8;9;9", "confidence": "4;3;4;3", "soundness": "3;4;3;3", "novelty": "2;4;3;3", "presentation": "2;4;4;4", "wc_summary": "51;141;74;192", "wc_strengths": "61;51;107;165", "wc_weaknesses": "213;353;44;51", "wc_questions": "83;106;325;272", "wc_limitations": "24;17;106;9", "wc_review": "432;668;656;689", "wc_reply_reviewers": "23;26;147;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 8.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 114.5, 55.63497101643893 ], "wc_strengths_avg": [ 96.0, 45.088801270381985 ], "wc_weaknesses_avg": [ 165.25, 127.7544030552372 ], "wc_questions_avg": [ 196.5, 104.02523732248824 ], "wc_limitations_avg": [ 39.0, 39.04484601070928 ], "wc_review_avg": [ 611.25, 104.1618332211948 ], "wc_reply_reviewers_avg": [ 49.0, 57.467382052778426 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6504070365539775359&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "skoltech.ru;skoltech.ru;hse.ru;skoltech.ru;skoltech.ru", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Skolkovo Institute of Science and Technology;National Research University Higher School of Economics", "aff_unique_dep": ";", "aff_unique_url": "https://www.skoltech.ru;https://hse.ru", "aff_unique_abbr": "Skoltech;HSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "Learning Energy-Based Prior Model with Diffusion-Amortized MCMC", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70912", "id": "fKQEmHoLb6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/85381f4549b5ddf1d48e2e287d7d3d15-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fKQEmHoLb6", "openreview": "https://openreview.net/forum?id=fKQEmHoLb6", "poster": "/media/PosterPDFs/NeurIPS%202023/70912.png?t=1699943604.6948283", "slides": "https://nips.cc/virtual/2023/poster/70912", "video": "https://nips.cc/virtual/2023/poster/70912", "author_site": "Peiyu Yu, Yaxuan Zhu, Sirui Xie, Xiaojian (Shawn) Ma, Ruiqi Gao, Song-Chun Zhu, Ying Nian Wu", "tldr": "", "abstract": "Latent space EBMs, also known as energy-based priors, have drawn growing interests in the field of generative modeling due to its flexibility in the formulation and strong modeling power of the latent space. However, the common practice of learning latent space EBMs with non-convergent short-run MCMC for prior and posterior sampling is hindering the model from further progress; the degenerate MCMC sampling quality in practice often leads to degraded generation quality and instability in training, especially with highly multi-modal and/or high-dimensional target distributions. To remedy this sampling issue, in this paper we introduce a simple but effective diffusion-based amortization method for long-run MCMC sampling and develop a novel learning algorithm for the latent space EBM based on it. We provide theoretical evidence that the learned amortization of MCMC is a valid long-run MCMC sampler. Experiments on several image modeling benchmark datasets demonstrate the superior performance of our method compared with strong counterparts.", "keywords": "Energy-Based Model;Denoising Diffusion Probabilistic Model;MCMC", "primary_area": "", "supplementary_material": "/attachment/5b69e699825a2ef9b21a4a815cc9f8f59acc8781.pdf", "author": "Peiyu Yu;Yaxuan Zhu;Sirui Xie;Xiaojian Ma;Ruiqi Gao;Song-Chun Zhu;Ying Nian Wu", "authorids": "~Peiyu_Yu1;~Yaxuan_Zhu1;~Sirui_Xie1;~Xiaojian_Ma1;~Ruiqi_Gao1;~Song-Chun_Zhu1;~Ying_Nian_Wu1", "gender": ";M;M;;F;M;", "homepage": ";;https://www.siruixie.com;;http://www.stat.ucla.edu/~ruiqigao/;https://zhusongchun.net/;", "dblp": ";289/6018;232/3072;;206/7084;10/10313;", "google_scholar": ";EptgCGsAAAAJ;9GJn5FIAAAAJ;;VdlgOXoAAAAJ;https://scholar.google.com.tw/citations?user=Al8dyb4AAAAJ;", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Peiyu_Yu1;~Yaxuan_Zhu1;~Sirui_Xie1;~Xiaojian_Ma1;~Ruiqi_Gao1;~Song-Chun_Zhu1;~Ying_Nian_Wu1", "aff": ";University of California, Los Angeles;University of California, Los Angeles;;Google;Peking University;", "aff_domain": ";ucla.edu;ucla.edu;;google.com;pku.edu.cn;", "position": ";PhD student;PhD student;;Researcher;Full Professor;", "bibtex": "@inproceedings{\nyu2023learning,\ntitle={Learning Energy-Based Prior Model with Diffusion-Amortized {MCMC}},\nauthor={Peiyu Yu and Yaxuan Zhu and Sirui Xie and Xiaojian Ma and Ruiqi Gao and Song-Chun Zhu and Ying Nian Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fKQEmHoLb6}\n}", "github": "", "project": "", "reviewers": "rkuE;hz9t;ga25;x1ko;NcyD", "pdf_size": 30697900, "rating": "3;4;6;6;8", "confidence": "2;4;3;3;3", "soundness": "3;3;3;3;4", "novelty": "2;2;3;2;4", "presentation": "2;3;2;3;4", "wc_summary": "37;34;72;60;53", "wc_strengths": "18;21;49;39;13", "wc_weaknesses": "151;133;70;227;1", "wc_questions": "3;2;17;41;25", "wc_limitations": "4;2;8;1;5", "wc_review": "213;192;216;368;97", "wc_reply_reviewers": "117;178;0;241;6", "wc_reply_authors": "404;791;0;675;0", "reply_reviewers": "2;1;0;2;1", "reply_authors": "2;2;1;4;1", "rating_avg": [ 5.4, 1.7435595774162693 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 51.2, 14.218298069740976 ], "wc_strengths_avg": [ 28.0, 13.682105101189656 ], "wc_weaknesses_avg": [ 116.4, 76.38743352148964 ], "wc_questions_avg": [ 17.6, 14.554724318928201 ], "wc_limitations_avg": [ 4.0, 2.449489742783178 ], "wc_review_avg": [ 217.2, 86.99747122761673 ], "wc_reply_reviewers_avg": [ 108.4, 94.59090865405618 ], "wc_reply_authors_avg": [ 374.0, 330.1944881429731 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.18136906252750293, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15686568561162362927&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "email": ";ucla.edu;ucla.edu;;google.com;pku.edu.cn;", "author_num": 7, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of California, Los Angeles;Google;Peking University", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.ucla.edu;https://www.google.com;http://www.pku.edu.cn", "aff_unique_abbr": "UCLA;Google;Peking U", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Los Angeles;Mountain View;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Reduced Policy Optimization for Continuous Control with Hard Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70911", "id": "fKVEMNmWqU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7984e22a06eb5f0e35d745cb38345983-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fKVEMNmWqU", "openreview": "https://openreview.net/forum?id=fKVEMNmWqU", "poster": "/media/PosterPDFs/NeurIPS%202023/70911.png?t=1699727852.4296997", "slides": "https://nips.cc/virtual/2023/poster/70911", "video": "https://nips.cc/virtual/2023/poster/70911", "author_site": "Shutong Ding, Jingya Wang, Yali Du, Ye Shi", "tldr": "", "abstract": "Recent advances in constrained reinforcement learning (RL) have endowed reinforcement learning with certain safety guarantees. However, deploying existing constrained RL algorithms in continuous control tasks with general hard constraints remains challenging, particularly in those situations with non-convex hard constraints. Inspired by the generalized reduced gradient (GRG) algorithm, a classical constrained optimization technique, we propose a reduced policy optimization (RPO) algorithm that combines RL with GRG to address general hard constraints. RPO partitions actions into basic actions and nonbasic actions following the GRG method and outputs the basic actions via a policy network. Subsequently, RPO calculates the nonbasic actions by solving equations based on equality constraints using the obtained basic actions. The policy network is then updated by implicitly differentiating nonbasic actions with respect to basic actions. Additionally, we introduce an action projection procedure based on the reduced gradient and apply a modified Lagrangian relaxation technique to ensure inequality constraints are satisfied. To the best of our knowledge, RPO is the first attempt that introduces GRG to RL as a way of efficiently handling both equality and inequality hard constraints. It is worth noting that there is currently a lack of RL environments with complex hard constraints, which motivates us to develop three new benchmarks: two robotics manipulation tasks and a smart grid operation control task. With these benchmarks, RPO achieves better performance than previous constrained RL algorithms in terms of both cumulative reward and constraint violation. We believe RPO, along with the new benchmarks, will open up new opportunities for applying RL to real-world problems with complex constraints.", "keywords": "Reinforcement Learning;Hard Constraint;Generalized Reduced Gradient", "primary_area": "", "supplementary_material": "/attachment/09525341ad9239b4326a81b68cfc3f96ef6e31a8.zip", "author": "Shutong Ding;Jingya Wang;Yali Du;Ye Shi", "authorids": "~Shutong_Ding1;~Jingya_Wang3;~Yali_Du1;~Ye_Shi1", "gender": "M;F;;M", "homepage": "https://dingsht.tech/;https://faculty.sist.shanghaitech.edu.cn/faculty/wangjingya/;;http://faculty.sist.shanghaitech.edu.cn/faculty/shiye", "dblp": ";;;34/11191-1", "google_scholar": "https://scholar.google.com.hk/citations?user=qJyqm40AAAAJ;https://scholar.google.com.au/citations?user=vmvJV_IAAAAJ;;gMqbZPUAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Shutong_Ding1;~Jingya_Wang3;~Yali_Du1;~Ye_Shi1", "aff": "ShanghaiTech University;ShanghaiTech University;;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;;shanghaitech.edu.cn", "position": "MS student;Assistant Professor;;Assistant Professor", "bibtex": "@inproceedings{\nding2023reduced,\ntitle={Reduced Policy Optimization for Continuous Control with Hard Constraints},\nauthor={Shutong Ding and Jingya Wang and Yali Du and Ye Shi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fKVEMNmWqU}\n}", "github": "", "project": "", "reviewers": "DYt8;ixcf;RHA4;u5yD", "pdf_size": 697918, "rating": "6;6;6;7", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "55;91;104;105", "wc_strengths": "54;31;83;58", "wc_weaknesses": "217;236;131;759", "wc_questions": "114;97;63;69", "wc_limitations": "10;9;10;9", "wc_review": "450;464;391;1000", "wc_reply_reviewers": "321;35;92;140", "wc_reply_authors": "692;24;30;425", "reply_reviewers": "3;1;1;2", "reply_authors": "4;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.75, 20.253086184579377 ], "wc_strengths_avg": [ 56.5, 18.445866745696716 ], "wc_weaknesses_avg": [ 335.75, 247.54532413277371 ], "wc_questions_avg": [ 85.75, 20.753011829611623 ], "wc_limitations_avg": [ 9.5, 0.5 ], "wc_review_avg": [ 576.25, 246.1812086654869 ], "wc_reply_reviewers_avg": [ 147.0, 107.11442479890371 ], "wc_reply_authors_avg": [ 292.75, 282.02604046435147 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15722256961902947899&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "shanghaitech.edu.cn;shanghaitech.edu.cn;;shanghaitech.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "ShanghaiTech University", "aff_unique_dep": "", "aff_unique_url": "https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Dynamics of Finite Width Kernel and Prediction Fluctuations in Mean Field Neural Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70910", "id": "fKwG6grp8o", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1ec69275e9f002ee068f5d68380f3290-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fKwG6grp8o", "openreview": "https://openreview.net/forum?id=fKwG6grp8o", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70910", "video": "https://nips.cc/virtual/2023/poster/70910", "author_site": "Blake Bordelon, Cengiz Pehlevan", "tldr": "", "abstract": "We analyze the dynamics of finite width effects in wide but finite feature learning neural networks. Starting from a dynamical mean field theory description of infinite width deep neural network kernel and prediction dynamics, we provide a characterization of the $\\mathcal{O}(1/\\sqrt{\\text{width}})$ fluctuations of the DMFT order parameters over random initializations of the network weights. Our results, while perturbative in width, unlike prior analyses, are non-perturbative in the strength of feature learning. In the lazy limit of network training, all kernels are random but static in time and the prediction variance has a universal form. However, in the rich, feature learning regime, the fluctuations of the kernels and predictions are dynamically coupled with a variance that can be computed self-consistently. In two layer networks, we show how feature learning can dynamically reduce the variance of the final tangent kernel and final network predictions. We also show how initialization variance can slow down online learning in wide but finite networks. In deeper networks, kernel variance can dramatically accumulate through subsequent layers at large feature learning strengths, but feature learning continues to improve the signal-to-noise ratio of the feature kernels. In discrete time, we demonstrate that large learning rate phenomena such as edge of stability effects can be well captured by infinite width dynamics and that initialization variance can decrease dynamically. For CNNs trained on CIFAR-10, we empirically find significant corrections to both the bias and variance of network dynamics due to finite width.", "keywords": "Deep Learning Theory;Feature Learning;Dynamics;Ensembles", "primary_area": "", "supplementary_material": "/attachment/2a0b91f711c8bfe7056571f3f5dfc7087d58e34e.zip", "author": "Blake Bordelon;Cengiz Pehlevan", "authorids": "~Blake_Bordelon1;~Cengiz_Pehlevan2", "gender": "M;", "homepage": "https://blakebordelon.github.io/;https://pehlevan.seas.harvard.edu/", "dblp": "228/6993;145/3480", "google_scholar": "yeQ8_pgAAAAJ;veDLTPEAAAAJ", "orcid": "0000-0003-0455-9445;0000-0001-9767-6063", "linkedin": ";", "or_profile": "~Blake_Bordelon1;~Cengiz_Pehlevan2", "aff": "Harvard University;School of Engineering and Applied Sciences, Harvard University", "aff_domain": "harvard.edu;seas.harvard.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nbordelon2023dynamics,\ntitle={Dynamics of Finite Width Kernel and Prediction Fluctuations in Mean Field Neural Networks},\nauthor={Blake Bordelon and Cengiz Pehlevan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fKwG6grp8o}\n}", "github": "", "project": "", "reviewers": "fkFE;2Zma;VrD5", "pdf_size": 1196622, "rating": "7;7;7", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "3;4;4", "presentation": "3;3;3", "wc_summary": "104;157;165", "wc_strengths": "16;142;125", "wc_weaknesses": "1;351;101", "wc_questions": "162;291;142", "wc_limitations": "1;131;46", "wc_review": "284;1072;579", "wc_reply_reviewers": "34;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 142.0, 27.067816067549053 ], "wc_strengths_avg": [ 94.33333333333333, 55.82313339666829 ], "wc_weaknesses_avg": [ 151.0, 147.19601443879745 ], "wc_questions_avg": [ 198.33333333333334, 66.03197878469356 ], "wc_limitations_avg": [ 59.333333333333336, 53.90320542932076 ], "wc_review_avg": [ 645.0, 325.06717254540894 ], "wc_reply_reviewers_avg": [ 11.333333333333334, 16.027753706895076 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18047490657513709651&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "harvard.edu;seas.harvard.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "AVeriTeC: A Dataset for Real-world Claim Verification with Evidence from the Web", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73517", "id": "fKzSz0oyaI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cd86a30526cd1aff61d6f89f107634e4-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=fKzSz0oyaI", "openreview": "https://openreview.net/forum?id=fKzSz0oyaI", "poster": "/media/PosterPDFs/NeurIPS%202023/73517.png?t=1699529446.3727417", "slides": "https://nips.cc/virtual/2023/poster/73517", "video": "https://nips.cc/virtual/2023/poster/73517", "author_site": "Michael Schlichtkrull, Zhijiang Guo, Andreas Vlachos", "tldr": "", "abstract": "Existing datasets for automated fact-checking have substantial limitations, such as relying on artificial claims, lacking annotations for evidence and intermediate reasoning, or including evidence published after the claim. In this paper we introduce AVeriTeC, a new dataset of 4,568 real-world claims covering fact-checks by 50 different organizations. Each claim is annotated with question-answer pairs supported by evidence available online, as well as textual justifications explaining how the evidence combines to produce a verdict. Through a multi-round annotation process, we avoid common pitfalls including context dependence, evidence insufficiency, and temporal leakage, and reach a substantial inter-annotator agreement of $\\kappa=0.619$ on verdicts. We develop a baseline as well as an evaluation scheme for verifying claims through question-answering against the open web.", "keywords": "fact-checking;real-world fact-checking;fact extraction and verification;natural language processing;information retrieval;question generation;question generation and answering", "primary_area": "", "supplementary_material": "/attachment/921bc54f771e5cfa94238dd450efebc7db571fb6.pdf", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nschlichtkrull2023averitec,\ntitle={{AV}eriTeC: A Dataset for Real-world Claim Verification with Evidence from the Web},\nauthor={Michael Sejr Schlichtkrull and Zhijiang Guo and Andreas Vlachos},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=fKzSz0oyaI}\n}", "github": "", "project": "", "reviewers": "qGDa;6s6p;YZ5D;rgH7;GQze", "pdf_size": 2238535, "rating": "6;7;7;8;9", "confidence": "5;3;4;3;4", "wc_summary_and_contributions": "117;50;29;61;59", "wc_strengths": "114;76;29;44;92", "wc_improvement": "226;147;8;9;11", "wc_limitations": "4;7;11;1;1", "wc_correctness": "1;5;2;1;1", "wc_clarity": "1;9;1;1;1", "wc_relation_to_prior_work": "1;9;1;1;1", "wc_documentation": "1;1;1;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "466;305;83;120;168", "wc_reply_reviewers": "0;51;0;0;0", "wc_reply_authors": "521;457;169;92;40", "reply_reviewers": "0;1;0;0;0", "reply_authors": "1;2;1;1;1", "rating_avg": [ 7.4, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 63.2, 29.191779664830303 ], "wc_strengths_avg": [ 71.0, 31.00967590930289 ], "wc_improvement_avg": [ 80.2, 90.32253317971103 ], "wc_limitations_avg": [ 4.8, 3.8157568056677826 ], "wc_correctness_avg": [ 2.0, 1.5491933384829668 ], "wc_clarity_avg": [ 2.6, 3.2000000000000006 ], "wc_relation_to_prior_work_avg": [ 2.6, 3.2000000000000006 ], "wc_documentation_avg": [ 1.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 228.4, 140.61379733155633 ], "wc_reply_reviewers_avg": [ 10.2, 20.4 ], "wc_reply_authors_avg": [ 255.8, 195.8299262114961 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": -0.41931393468876726, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17628164854816108983&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "", "author_num": 1 }, { "title": "C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73516", "id": "fOrm2rGX2r", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c6ec1844bec96d6d32ae95ae694e23d8-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=fOrm2rGX2r", "openreview": "https://openreview.net/forum?id=fOrm2rGX2r", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73516", "video": "https://nips.cc/virtual/2023/poster/73516", "author_site": "Yuzhen Huang, Yuzhuo Bai, Zhihao Zhu, Junlei Zhang, Jinghan Zhang, Tangjun Su, Junteng Liu, Chuancheng Lv, Yikai Zhang, jiayi lei, Yao Fu, Maosong Sun, Junxian He", "tldr": "", "abstract": "New NLP benchmarks are urgently needed to align with the rapid development of large language models (LLMs). We present C-Eval, the first comprehensive Chinese evaluation suite designed to assess advanced knowledge and reasoning abilities of foundation models in a Chinese context. C-Eval comprises multiple-choice questions across four difficulty levels: middle school, high school, college, and professional. The questions span 52 diverse disciplines, ranging from humanities to science and engineering. C-Eval is accompanied by C-Eval Hard, a subset of very challenging subjects in C-Eval that requires advanced reasoning abilities to solve. We conduct a comprehensive evaluation of the most advanced LLMs on C-Eval, including both English- and Chinese-oriented models. Results indicate that only GPT-4 could achieve an average accuracy of over 60%, suggesting that there is still significant room for improvement for current LLMs. We anticipate C-Eval will help analyze important strengths and shortcomings of foundation models, and foster their development and growth for Chinese users.", "keywords": "Chinese evaluation;foundation models", "primary_area": "", "supplementary_material": "/attachment/6a6d9c3906b7c5999f022f210190c7d0cbb9a99f.zip", "author": "Yuzhen Huang;Yuzhuo Bai;Zhihao Zhu;Junlei Zhang;Jinghan Zhang;Tangjun Su;Junteng Liu;Chuancheng Lv;Yikai Zhang;jiayi lei;Yao Fu;Maosong Sun;Junxian He", "authorids": "~Yuzhen_Huang2;~Yuzhuo_Bai1;~Zhihao_Zhu1;~Junlei_Zhang1;~Jinghan_Zhang1;~Tangjun_Su1;~Junteng_Liu2;~Chuancheng_Lv1;~Yikai_Zhang2;~jiayi_lei1;~Yao_Fu3;~Maosong_Sun1;~Junxian_He1", "gender": "M;F;M;F;M;M;M;F;M;M;M;M;M", "homepage": "https://hyz17.github.io/;;;https://jinghan23.github.io/;https://github.com/Shallowdream99;https://vicent0205.github.io/;https://arist12.github.io/;;https://franxyao.github.io/;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm;https://jxhe.github.io;;https://github.com/zzh068", "dblp": ";279/8666;197/3153.html;;;347/3273;;;;95/3291-1;188/6127.html;289/1635;", "google_scholar": "XZK8cewAAAAJ;b2jy5JMAAAAJ;;HqF5d38AAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.ca/citations?user=I_EmXRYAAAAJ;7VD1YLMAAAAJ;liSP4cEAAAAJ;https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ;BIFGeoUAAAAJ;;", "orcid": ";;;0009-0002-1489-6162;;;;;;;;;", "linkedin": ";;;;;;;;;;;chuancheng-lv-b89412226/;", "or_profile": "~Yuzhen_Huang2;~Yuzhuo_Bai1;~Junlei_Zhang1;~Jinghan_Zhang1;~Tangjun_Su1;~Junteng_Liu2;~Yikai_Zhang2;~jiayi_lei1;~Yao_Fu3;~Maosong_Sun1;~Junxian_He1;~Lv_Chuancheng1;~Zhu_Zhihao1", "aff": "Shanghai Jiaotong University;Tsinghua University;Westlake University;Southeast University;Shanghai Jiaotong University;Shanghai Jiaotong University;EPFL - EPF Lausanne;Shanghai Jiaotong University;University of Edinburgh;Tsinghua University;Hong Kong University of Science and Technology;Tsinghua University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;tsinghua.edu.cn;westlake.edu;seu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;epfl.ch;sjtu.edu;ed.ac.uk;tsinghua.edu.cn;ust.hk;tsinghua.edu.cn;sjtu.edu.cn", "position": "Undergrad student;PhD student;PhD student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;PhD student;Full Professor;Assistant Professor;MS student;PhD student", "bibtex": "@inproceedings{\nhuang2023ceval,\ntitle={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models},\nauthor={Yuzhen Huang and Yuzhuo Bai and Zhihao Zhu and Junlei Zhang and Jinghan Zhang and Tangjun Su and Junteng Liu and Chuancheng Lv and Yikai Zhang and jiayi lei and Yao Fu and Maosong Sun and Junxian He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=fOrm2rGX2r}\n}", "github": "", "project": "", "reviewers": "ZHas;jtRh;KZk1;UDRo;WJEA", "pdf_size": 0, "rating": "6;6;7;7;8", "confidence": "4;5;4;4;4", "wc_summary_and_contributions": "101;111;91;79;55", "wc_strengths": "68;235;116;17;82", "wc_improvement": "40;263;104;1;96", "wc_limitations": "14;1;114;15;1", "wc_correctness": "11;10;93;1;1", "wc_clarity": "10;1;38;1;1", "wc_relation_to_prior_work": "6;1;51;1;1", "wc_documentation": "4;1;7;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "255;624;615;117;239", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "71;676;350;8;427", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 87.4, 19.365949499056327 ], "wc_strengths_avg": [ 103.6, 73.01123201261571 ], "wc_improvement_avg": [ 100.8, 89.44137744914262 ], "wc_limitations_avg": [ 29.0, 42.927846440276966 ], "wc_correctness_avg": [ 23.2, 35.159067109353174 ], "wc_clarity_avg": [ 10.2, 14.330387294138285 ], "wc_relation_to_prior_work_avg": [ 12.0, 19.595917942265423 ], "wc_documentation_avg": [ 2.8, 2.4 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 370.0, 209.25391274716944 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 306.4, 243.92835013585443 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.5345224838248487, "gs_citation": 180, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9525037164026157781&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "sjtu.edu.cn;tsinghua.edu.cn;westlake.edu;seu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;epfl.ch;sjtu.edu;ed.ac.uk;tsinghua.edu.cn;ust.hk;tsinghua.edu.cn;sjtu.edu.cn", "author_num": 13, "aff_unique_index": "0;1;2;3;0;0;4;0;5;1;6;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Tsinghua University;Westlake University;Southeast University;EPFL;University of Edinburgh;Hong Kong University of Science and Technology", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.tsinghua.edu.cn;https://www.westlake.edu.cn;https://www.seu.edu.cn/;https://www.epfl.ch;https://www.ed.ac.uk;https://www.ust.hk", "aff_unique_abbr": "SJTU;THU;WU;SEU;EPFL;Edinburgh;HKUST", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Lausanne;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;1;0;2;0;0;0;0", "aff_country_unique": "China;Switzerland;United Kingdom" }, { "title": "In Defense of Softmax Parametrization for Calibrated and Consistent Learning to Defer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70909", "id": "fPAAgjISu0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/791d3337291b2c574545aeecfa75484c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fPAAgjISu0", "openreview": "https://openreview.net/forum?id=fPAAgjISu0", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70909", "video": "https://nips.cc/virtual/2023/poster/70909", "author_site": "Yuzhou Cao, Hussein Mozannar, Lei Feng, Hongxin Wei, Bo An", "tldr": "", "abstract": "Enabling machine learning classifiers to defer their decision to a downstream expert when the expert is more accurate will ensure improved safety and performance. This objective can be achieved with the learning-to-defer framework which aims to jointly learn how to classify and how to defer to the expert. In recent studies, it has been theoretically shown that popular estimators for learning to defer parameterized with softmax provide unbounded estimates for the likelihood of deferring which makes them uncalibrated. However, it remains unknown whether this is due to the widely used softmax parameterization and if we can find a softmax-based estimator that is both statistically consistent and possesses a valid probability estimator. In this work, we first show that the cause of the miscalibrated and unbounded estimator in prior literature is due to the symmetric nature of the surrogate losses used and not due to softmax. We then propose a novel statistically consistent asymmetric softmax-based surrogate loss that can produce valid estimates without the issue of unboundedness. We further analyze the non-asymptotic properties of our proposed method and empirically validate its performance and calibration on benchmark datasets.", "keywords": "Classification;Learning to Defer;Probability Estimation", "primary_area": "", "supplementary_material": "/attachment/3f16e232d44527af271f980bb77dc78f69cbfc70.zip", "author": "Yuzhou Cao;Hussein Mozannar;Lei Feng;Hongxin Wei;Bo An", "authorids": "~Yuzhou_Cao1;~Hussein_Mozannar1;~Lei_Feng1;~Hongxin_Wei1;~Bo_An2", "gender": "M;M;M;M;M", "homepage": "https://yzcao-nkg.github.io/;https://husseinmozannar.github.io/;https://lfeng1995.github.io/;https://hongxin001.github.io/;https://personal.ntu.edu.sg/boan/", "dblp": "256/5052;;76/847-6;150/6350;42/6178-1.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;XCfZyIkAAAAJ;https://scholar.google.com.sg/citations?user=KomQOFkAAAAJ;cABH034AAAAJ;PEEpuNwAAAAJ", "orcid": ";;0000-0003-2839-5799;;0000-0002-7064-7438", "linkedin": ";;;;", "or_profile": "~Yuzhou_Cao1;~Hussein_Mozannar1;~Lei_Feng1;~Hongxin_Wei1;~Bo_An2", "aff": "Nanyang Technological University;Massachusetts Institute of Technology;Nanyang Technological University;Southern University of Science and Technology;Nanyang Technological University", "aff_domain": "ntu.edu;mit.edu;ntu.edu.sg;sustech.edu.cn;ntu.edu.sg", "position": "PhD student;PhD student;Visiting Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ncao2023in,\ntitle={In Defense of Softmax Parametrization for Calibrated and Consistent Learning to Defer},\nauthor={Yuzhou Cao and Hussein Mozannar and Lei Feng and Hongxin Wei and Bo An},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fPAAgjISu0}\n}", "github": "", "project": "", "reviewers": "r3aW;Jwha;fCzx;hiuK;Ztxz", "pdf_size": 696027, "rating": "6;6;7;7;7", "confidence": "3;4;4;4;3", "soundness": "4;3;4;3;3", "novelty": "2;2;4;3;3", "presentation": "3;3;3;2;4", "wc_summary": "92;64;144;128;117", "wc_strengths": "118;55;215;68;53", "wc_weaknesses": "251;90;72;49;101", "wc_questions": "44;61;34;8;86", "wc_limitations": "13;2;1;1;1", "wc_review": "518;272;466;254;358", "wc_reply_reviewers": "23;37;17;15;92", "wc_reply_authors": "30;20;8;16;34", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 109.0, 28.156704352604905 ], "wc_strengths_avg": [ 101.8, 61.30383348535391 ], "wc_weaknesses_avg": [ 112.6, 71.40476174597882 ], "wc_questions_avg": [ 46.6, 26.135033958271414 ], "wc_limitations_avg": [ 3.6, 4.715930449020639 ], "wc_review_avg": [ 373.6, 104.17216518821138 ], "wc_reply_reviewers_avg": [ 36.8, 28.652399550473955 ], "wc_reply_authors_avg": [ 21.6, 9.414881836751857 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.16666666666666669, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13667291433472568822&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 7, "email": "ntu.edu;mit.edu;ntu.edu.sg;sustech.edu.cn;ntu.edu.sg", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Nanyang Technological University;Massachusetts Institute of Technology;Southern University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu.sg;https://web.mit.edu;https://www.sustech.edu.cn", "aff_unique_abbr": "NTU;MIT;SUSTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0", "aff_country_unique": "Singapore;United States;China" }, { "title": "Provable Guarantees for Nonlinear Feature Learning in Three-Layer Neural Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70908", "id": "fShubymWrc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/236b6a814a1d2c0ff504ca7bf380f7ff-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fShubymWrc", "openreview": "https://openreview.net/forum?id=fShubymWrc", "poster": "/media/PosterPDFs/NeurIPS%202023/70908.png?t=1701831442.9818003", "slides": "https://nips.cc/virtual/2023/poster/70908", "video": "https://nips.cc/virtual/2023/poster/70908", "author_site": "Eshaan Nichani, Alex Damian, Jason Lee", "tldr": "", "abstract": "One of the central questions in the theory of deep learning is to understand how neural networks learn hierarchical features. The ability of deep networks to extract salient features is crucial to both their outstanding generalization ability and the modern deep learning paradigm of pretraining and finetuneing. However, this feature learning process remains poorly understood from a theoretical perspective, with existing analyses largely restricted to two-layer networks. In this work we show that three-layer neural networks have provably richer feature learning capabilities than two-layer networks. We analyze the features learned by a three-layer network trained with layer-wise gradient descent, and present a general purpose theorem which upper bounds the sample complexity and width needed to achieve low test error when the target has specific hierarchical structure. We instantiate our framework in specific statistical learning settings -- single-index models and functions of quadratic features -- and show that in the latter setting three-layer networks obtain a sample complexity improvement over all existing guarantees for two-layer networks. Crucially, this sample complexity improvement relies on the ability of three-layer networks to efficiently learn *nonlinear* features. We then establish a concrete optimization-based depth separation by constructing a function which is efficiently learnable via gradient descent on a three-layer network, yet cannot be learned efficiently by a two-layer network. Our work makes progress towards understanding the provable benefit of three-layer neural networks over two-layer networks in the feature learning regime.", "keywords": "Deep Learning Theory;Feature Learning;Three-Layer Neural Network;Depth Separation;Gradient Descent;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/0aa934bb3e1307a407a7db47757fa6a337d49fa4.zip", "author": "Eshaan Nichani;Alex Damian;Jason D. Lee", "authorids": "~Eshaan_Nichani1;~Alex_Damian1;~Jason_D._Lee1", "gender": ";M;M", "homepage": "https://eshaannichani.com/;https://web.math.princeton.edu/~ad27/;https://jasondlee88.github.io/", "dblp": "260/6510;;88/3262", "google_scholar": ";YvHcBcEAAAAJ;GR_DsT0AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Eshaan_Nichani1;~Alex_Damian1;~Jason_D._Lee1", "aff": "Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nnichani2023provable,\ntitle={Provable Guarantees for Nonlinear Feature Learning in Three-Layer Neural Networks},\nauthor={Eshaan Nichani and Alex Damian and Jason D. Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fShubymWrc}\n}", "github": "", "project": "", "reviewers": "8d6Z;oYFv;3iYS;aPJJ", "pdf_size": 1041782, "rating": "6;6;8;8", "confidence": "3;2;4;3", "soundness": "3;3;4;4", "novelty": "3;2;4;3", "presentation": "3;3;4;4", "wc_summary": "94;52;150;100", "wc_strengths": "103;41;52;87", "wc_weaknesses": "159;126;2;136", "wc_questions": "112;19;80;58", "wc_limitations": "1;25;6;29", "wc_review": "469;263;290;410", "wc_reply_reviewers": "34;8;13;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 99.0, 34.77067730142742 ], "wc_strengths_avg": [ 70.75, 25.202926417382564 ], "wc_weaknesses_avg": [ 105.75, 61.08344702126755 ], "wc_questions_avg": [ 67.25, 33.83323070591988 ], "wc_limitations_avg": [ 15.25, 11.96609794377432 ], "wc_review_avg": [ 358.0, 84.6669947500205 ], "wc_reply_reviewers_avg": [ 16.25, 10.40132203135736 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10407699469303192718&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "princeton.edu;princeton.edu;princeton.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Curriculum Learning for Graph Neural Networks: Which Edges Should We Learn First", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70907", "id": "fTyGT5fulj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a07e5160196058120105ad7cb3505d3c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fTyGT5fulj", "openreview": "https://openreview.net/forum?id=fTyGT5fulj", "poster": "/media/PosterPDFs/NeurIPS%202023/70907.png?t=1701405295.4794078", "slides": "https://nips.cc/virtual/2023/poster/70907", "video": "https://nips.cc/virtual/2023/poster/70907", "author_site": "Zheng Zhang, Junxiang Wang, Liang Zhao", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have achieved great success in representing data with dependencies by recursively propagating and aggregating messages along the edges. However, edges in real-world graphs often have varying degrees of difficulty, and some edges may even be noisy to the downstream tasks. Therefore, existing GNNs may lead to suboptimal learned representations because they usually treat every edge in the graph equally. On the other hand, Curriculum Learning (CL), which mimics the human learning principle of learning data samples in a meaningful order, has been shown to be effective in improving the generalization ability and robustness of representation learners by gradually proceeding from easy to more difficult samples during training. Unfortunately, existing CL strategies are designed for independent data samples and cannot trivially generalize to handle data dependencies. To address these issues, we propose a novel CL strategy to gradually incorporate more edges into training according to their difficulty from easy to hard, where the degree of difficulty is measured by how well the edges are expected given the model training status. We demonstrate the strength of our proposed method in improving the generalization ability and robustness of learned representations through extensive experiments on nine synthetic datasets and nine real-world datasets. The code for our proposed method is available at https://github.com/rollingstonezz/Curriculum_learning_for_GNNs", "keywords": "Graph neural networks;Curriculum learning;Graph structure learning", "primary_area": "", "supplementary_material": "/attachment/efd45bb4f3745af88a88c9ad22d72a4fae413e70.zip", "author": "Zheng Zhang;Junxiang Wang;Liang Zhao", "authorids": "~Zheng_Zhang10;~Junxiang_Wang1;~Liang_Zhao6", "gender": "M;M;M", "homepage": ";https://xianggebenben.github.io/Junxiang_Wang/;https://cs.emory.edu/~lzhao41/", "dblp": "181/2621-18;53/8843;63/5422-2", "google_scholar": "fRdZRHsAAAAJ;;qnvyqtwAAAAJ", "orcid": ";0000-0002-6635-4296;0000-0002-2648-9989", "linkedin": ";;", "or_profile": "~Zheng_Zhang10;~Junxiang_Wang1;~Liang_Zhao6", "aff": "Emory University;NEC Labs America;Emory University", "aff_domain": "emory.edu;nec.com;emory.edu", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nzhang2023curriculum,\ntitle={Curriculum Learning for Graph Neural Networks: Which Edges Should We Learn First},\nauthor={Zheng Zhang and Junxiang Wang and Liang Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fTyGT5fulj}\n}", "github": "", "project": "", "reviewers": "u5nt;f4A7;TBej;4bWL;tVbH", "pdf_size": 616951, "rating": "3;5;5;6;6", "confidence": "3;4;4;4;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "1;3;3;3;2", "wc_summary": "150;45;145;38;115", "wc_strengths": "72;71;76;44;88", "wc_weaknesses": "303;9;113;191;846", "wc_questions": "329;39;5;5;164", "wc_limitations": "53;1;19;3;1", "wc_review": "907;165;358;281;1214", "wc_reply_reviewers": "387;0;132;13;426", "wc_reply_authors": "1312;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "4;1;1;1;1", "rating_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 98.6, 48.18547498987636 ], "wc_strengths_avg": [ 70.2, 14.427751037497147 ], "wc_weaknesses_avg": [ 292.4, 293.041703516752 ], "wc_questions_avg": [ 108.4, 124.86408610965765 ], "wc_limitations_avg": [ 15.4, 19.975985582694037 ], "wc_review_avg": [ 585.0, 404.88763873450125 ], "wc_reply_reviewers_avg": [ 191.6, 181.81595089540411 ], "wc_reply_authors_avg": [ 262.4, 524.8 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.372677996249965, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12029197126341561735&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "emory.edu;nec.com;emory.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Emory University;NEC Labs America", "aff_unique_dep": ";", "aff_unique_url": "https://www.emory.edu;https://www.nec-labs.com", "aff_unique_abbr": "Emory;NEC LA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Asynchronous Proportional Response Dynamics: Convergence in Markets with Adversarial Scheduling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70906", "id": "fU9U7OYxfE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5049acb0d5d976130388f3e8edcae183-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fU9U7OYxfE", "openreview": "https://openreview.net/forum?id=fU9U7OYxfE", "poster": "/media/PosterPDFs/NeurIPS%202023/70906.png?t=1701721454.0465102", "slides": "https://nips.cc/virtual/2023/poster/70906", "video": "https://nips.cc/virtual/2023/poster/70906", "author_site": "Yoav Kolumbus, Menahem Levy, Noam Nisan", "tldr": "", "abstract": "We study Proportional Response Dynamics (PRD) in linear Fisher markets, where participants act asynchronously. We model this scenario as a sequential process in which at each step, an adversary selects a subset of the players to update their bids, subject to liveness constraints. We show that if every bidder individually applies the PRD update rule whenever they are included in the group of bidders selected by the adversary, then, in the generic case, the entire dynamic converges to a competitive equilibrium of the market. Our proof technique reveals additional properties of linear Fisher markets, such as the uniqueness of the market equilibrium for generic parameters and the convergence of associated no swap regret dynamics and best response dynamics under certain conditions.", "keywords": "Asynchronous Dynamics;Fisher Markets;Proportional Response;Best Response;Game Dynamics;Competitive Equilibrium;Convergence", "primary_area": "", "supplementary_material": "", "author": "Yoav Kolumbus;Menahem Levy;Noam Nisan", "authorids": "~Yoav_Kolumbus1;~Menahem_Levy1;~Noam_Nisan3", "gender": ";;", "homepage": ";https://sites.google.com/view/menylevy/;https://www.cs.huji.ac.il/~noam/", "dblp": ";;n/NoamNisan.html", "google_scholar": ";;https://scholar.google.co.il/citations?user=zXQZPnMAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yoav_Kolumbus1;~Menahem_Levy1;~Noam_Nisan3", "aff": ";Hebrew University of Jerusalem;Hebrew University, Hebrew University of Jerusalem", "aff_domain": ";huji.ac.il;cs.huji.ac.il", "position": ";MS student;Full Professor", "bibtex": "@inproceedings{\nkolumbus2023asynchronous,\ntitle={Asynchronous Proportional Response Dynamics: Convergence in Markets with Adversarial Scheduling},\nauthor={Yoav Kolumbus and Menahem Levy and Noam Nisan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fU9U7OYxfE}\n}", "github": "", "project": "", "reviewers": "j9g9;pHCS;h7DH;zQMj;pfur", "pdf_size": 702083, "rating": "4;6;7;7;8", "confidence": "2;4;1;4;4", "soundness": "3;4;3;4;4", "novelty": "2;2;3;4;3", "presentation": "2;3;3;4;4", "wc_summary": "102;250;87;569;191", "wc_strengths": "57;35;58;75;39", "wc_weaknesses": "290;225;44;7;8", "wc_questions": "2;2;161;176;109", "wc_limitations": "2;1;1;34;46", "wc_review": "453;513;351;861;393", "wc_reply_reviewers": "0;74;26;10;5", "wc_reply_authors": "0;211;0;0;0", "reply_reviewers": "0;2;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 3.0, 1.2649110640673518 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 239.8, 175.03988116997795 ], "wc_strengths_avg": [ 52.8, 14.455448799674121 ], "wc_weaknesses_avg": [ 114.8, 119.06200065512085 ], "wc_questions_avg": [ 90.0, 75.21436033098999 ], "wc_limitations_avg": [ 16.8, 19.3225257148231 ], "wc_review_avg": [ 514.2, 181.8465286993403 ], "wc_reply_reviewers_avg": [ 23.0, 26.951808844676826 ], "wc_reply_authors_avg": [ 42.2, 84.4 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.34968930309011764, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7859914843032281501&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 3, "email": ";huji.ac.il;cs.huji.ac.il", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Hebrew University of Jerusalem", "aff_unique_dep": "", "aff_unique_url": "https://www.huji.ac.il", "aff_unique_abbr": "HUJI", "aff_campus_unique_index": "0", "aff_campus_unique": "Jerusalem;", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Two Sides of One Coin: the Limits of Untuned SGD and the Power of Adaptive Methods", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70905", "id": "fUZUoSLXw3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eb1a323fa10d4102ff13422476a744ff-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fUZUoSLXw3", "openreview": "https://openreview.net/forum?id=fUZUoSLXw3", "poster": "/media/PosterPDFs/NeurIPS%202023/70905.png?t=1701880056.5876005", "slides": "https://nips.cc/virtual/2023/poster/70905", "video": "https://nips.cc/virtual/2023/poster/70905", "author_site": "Junchi YANG, Xiang Li, Ilyas Fatkhullin, Niao He", "tldr": "", "abstract": "The classical analysis of Stochastic Gradient Descent (SGD) with polynomially decaying stepsize $\\eta_t = \\eta/\\sqrt{t}$ relies on well-tuned $\\eta$ depending on problem parameters such as Lipschitz smoothness constant, which is often unknown in practice. In this work, we prove that SGD with arbitrary $\\eta > 0$, referred to as untuned SGD, still attains an order-optimal convergence rate $\\widetilde{\\mathcal{O}}(T^{-1/4})$ in terms of gradient norm for minimizing smooth objectives. Unfortunately, it comes at the expense of a catastrophic exponential dependence on the smoothness constant, which we show is unavoidable for this scheme even in the noiseless setting. We then examine three families of adaptive methods \u2014 Normalized SGD (NSGD), AMSGrad, and AdaGrad \u2014 unveiling their power in preventing such exponential dependency in the absence of information about the smoothness parameter and boundedness of stochastic gradients. Our results provide theoretical justification for the advantage of adaptive methods over untuned SGD in alleviating the issue with large gradients.", "keywords": "Nonconvex optimization;Stochastic Gradient Descent;Adaptive methods", "primary_area": "", "supplementary_material": "/attachment/abe3b57ea739af5fd8c81555501943371ef6d85b.zip", "author": "Junchi YANG;Xiang Li;Ilyas Fatkhullin;Niao He", "authorids": "~Junchi_YANG1;~Xiang_Li38;~Ilyas_Fatkhullin1;~Niao_He3", "gender": "M;M;Not Specified;", "homepage": ";https://shawnli.me;https://ai.ethz.ch/people/ilyas-fatkhullin.html;http://people.inf.ethz.ch/niaohe", "dblp": "259/3033;40/1491;294/8711;https://dblp.uni-trier.de/pers/h/He:Niao.html", "google_scholar": ";VFwF8tEAAAAJ;UCOWHb4AAAAJ;iNcA81MAAAAJ", "orcid": ";;;", "linkedin": "junchi-yang-455206b0/;;;", "or_profile": "~Junchi_YANG1;~Xiang_Li38;~Ilyas_Fatkhullin1;~Niao_He1", "aff": "ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyang2023two,\ntitle={Two Sides of One Coin: the Limits of Untuned {SGD} and the Power of Adaptive Methods},\nauthor={Junchi YANG and Xiang Li and Ilyas Fatkhullin and Niao He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fUZUoSLXw3}\n}", "github": "", "project": "", "reviewers": "CaFA;hiCA;vyoV;WWqH;viw9", "pdf_size": 674734, "rating": "4;5;6;7;7", "confidence": "3;4;3;2;4", "soundness": "3;3;3;4;4", "novelty": "2;2;3;4;4", "presentation": "2;3;3;4;4", "wc_summary": "77;75;82;95;48", "wc_strengths": "81;40;61;181;93", "wc_weaknesses": "428;136;38;240;151", "wc_questions": "6;17;26;7;31", "wc_limitations": "6;1;10;62;1", "wc_review": "598;269;217;585;324", "wc_reply_reviewers": "87;191;20;21;0", "wc_reply_authors": "312;501;0;0;0", "reply_reviewers": "1;2;1;1;0", "reply_authors": "2;2;1;1;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 75.4, 15.370100845472681 ], "wc_strengths_avg": [ 91.2, 48.383468251046246 ], "wc_weaknesses_avg": [ 198.6, 131.38888841907448 ], "wc_questions_avg": [ 17.4, 9.971960689854328 ], "wc_limitations_avg": [ 16.0, 23.246505113672463 ], "wc_review_avg": [ 398.6, 161.14912348505032 ], "wc_reply_reviewers_avg": [ 63.8, 70.0554066435989 ], "wc_reply_authors_avg": [ 162.6, 207.9188303160635 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.1833396994056422, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10204382210812958294&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Learning Domain-Aware Detection Head with Prompt Tuning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70904", "id": "fW5ZUSVTkv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0d18ab3b5fabfa6fe47c62e711af02f0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fW5ZUSVTkv", "openreview": "https://openreview.net/forum?id=fW5ZUSVTkv", "poster": "/media/PosterPDFs/NeurIPS%202023/70904.png?t=1697525649.0825078", "slides": "https://nips.cc/virtual/2023/poster/70904", "video": "https://nips.cc/virtual/2023/poster/70904", "author_site": "Haochen Li, Rui Zhang, Hantao Yao, Xinkai Song, Yifan Hao, Yongwei Zhao, Ling Li, Yunji Chen", "tldr": "", "abstract": "Domain adaptive object detection (DAOD) aims to generalize detectors trained on an annotated source domain to an unlabelled target domain.\n However, existing methods focus on reducing the domain bias of the detection backbone by inferring a discriminative visual encoder, while ignoring the domain bias in the detection head.\n Inspired by the high generalization of vision-language models (VLMs), applying a VLM as the robust detection backbone following a domain-aware detection head is a reasonable way to learn the discriminative detector for each domain, rather than reducing the domain bias in traditional methods.\n To achieve the above issue, we thus propose a novel DAOD framework named Domain-Aware detection head with Prompt tuning (DA-Pro), which applies the learnable domain-adaptive prompt to generate the dynamic detection head for each domain. \n Formally, the domain-adaptive prompt consists of the domain-invariant tokens, domain-specific tokens, and the domain-related textual description along with the class label. \n Furthermore, two constraints between the source and target domains are applied to ensure that the domain-adaptive prompt can capture the domains-shared and domain-specific knowledge.\n A prompt ensemble strategy is also proposed to reduce the effect of prompt disturbance. \n Comprehensive experiments over multiple cross-domain adaptation tasks demonstrate that using the domain-adaptive prompt can produce an effectively domain-related detection head for boosting domain-adaptive object detection.\n Our code is available at https://github.com/Therock90421/DA-Pro.", "keywords": "domain adaptation;object detection;prompt tuning", "primary_area": "", "supplementary_material": "/attachment/ee69eccadce78b184932cc5dd665e71dc686e956.zip", "author": "Haochen Li;Rui Zhang;Hantao Yao;Xinkai Song;Yifan Hao;Yongwei Zhao;Ling Li;Yunji Chen", "authorids": "~Haochen_Li2;~Rui_Zhang1;~Hantao_Yao2;~Xinkai_Song1;~Yifan_Hao3;~Yongwei_Zhao1;~Ling_Li6;~Yunji_Chen1", "gender": "M;F;M;;M;;F;M", "homepage": "https://github.com/Therock90421;;http://www.hantaoyao.com/;;https://www.ict.ac.cn/sourcedb_2018_ict_cas/cn/jssrck/202311/t20231108_6923467.html;;;", "dblp": "49/11531-2;60/2536-40;167/3478;;;;92/5001-1;48/474", "google_scholar": "QxfHHQcAAAAJ;dse6jAsAAAAJ;;;;;;", "orcid": "0000-0003-0813-6351;;;;;;0000-0001-8877-9052;", "linkedin": ";;;;;;;", "or_profile": "~Haochen_Li2;~Rui_Zhang1;~Hantao_Yao2;~Xinkai_Song1;~Yifan_Hao3;~Yongwei_Zhao1;~Ling_Li6;~Yunji_Chen1", "aff": "Institute of Software, Chinese Academy of Sciences;Institute of Computing Technology, CAS;Institute of automation, Chinese academy of science;;Institute of Computing Technology, Chinese Academy of Sciences;;Institute of Software, CAS;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "iscas.ac.cn;ict.ac.cn;nlpr.ia.ac.cn;;ict.ac.cn;;iscas.ac.cn;ict.ac.cn", "position": "PhD student;Assistant Professor;Associate Professor;;Associate Professor;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2023learning,\ntitle={Learning Domain-Aware Detection Head with Prompt Tuning},\nauthor={Haochen Li and Rui Zhang and Hantao Yao and Xinkai Song and Yifan Hao and Yongwei Zhao and Ling Li and Yunji Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fW5ZUSVTkv}\n}", "github": "", "project": "", "reviewers": "P2GS;85mu;A2TR;eDFv;x91n", "pdf_size": 6859064, "rating": "5;5;6;6;6", "confidence": "4;4;3;5;5", "soundness": "3;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;2;3;4;3", "wc_summary": "86;67;66;85;81", "wc_strengths": "47;46;39;67;18", "wc_weaknesses": "265;126;125;199;13", "wc_questions": "11;8;3;87;119", "wc_limitations": "10;11;1;29;1", "wc_review": "419;258;234;467;232", "wc_reply_reviewers": "345;32;0;0;0", "wc_reply_authors": "429;30;25;30;29", "reply_reviewers": "2;1;0;0;0", "reply_authors": "3;2;2;2;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 77.0, 8.740709353364863 ], "wc_strengths_avg": [ 43.4, 15.755633912984905 ], "wc_weaknesses_avg": [ 145.6, 84.2605483010881 ], "wc_questions_avg": [ 45.6, 48.01499765698214 ], "wc_limitations_avg": [ 10.4, 10.229369482035539 ], "wc_review_avg": [ 322.0, 100.37330322351656 ], "wc_reply_reviewers_avg": [ 75.4, 135.36853400993897 ], "wc_reply_authors_avg": [ 108.6, 160.21061138389052 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.21821789023599236, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13017349468876151396&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "iscas.ac.cn;ict.ac.cn;nlpr.ia.ac.cn;;ict.ac.cn;;iscas.ac.cn;ict.ac.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Software", "aff_unique_url": "http://www.ios.ac.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Rethinking Tokenizer and Decoder in Masked Graph Modeling for Molecules", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70903", "id": "fWLf8DV0fI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/51fd9a7d1706023cb9f8210cc6ac357c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fWLf8DV0fI", "openreview": "https://openreview.net/forum?id=fWLf8DV0fI", "poster": "/media/PosterPDFs/NeurIPS%202023/70903.png?t=1701933974.3839347", "slides": "https://nips.cc/virtual/2023/poster/70903", "video": "https://nips.cc/virtual/2023/poster/70903", "author_site": "ZHIYUAN LIU, Yaorui Shi, An Zhang, Enzhi Zhang, Kenji Kawaguchi, Xiang Wang, Tat-Seng Chua", "tldr": "", "abstract": "Masked graph modeling excels in the self-supervised representation learning of molecular graphs. Scrutinizing previous studies, we can reveal a common scheme consisting of three key components: (1) graph tokenizer, which breaks a molecular graph into smaller fragments (\\ie subgraphs) and converts them into tokens; (2) graph masking, which corrupts the graph with masks; (3) graph autoencoder, which first applies an encoder on the masked graph to generate the representations, and then employs a decoder on the representations to recover the tokens of the original graph. However, the previous MGM studies focus extensively on graph masking and encoder, while there is limited understanding of tokenizer and decoder. To bridge the gap, we first summarize popular molecule tokenizers at the granularity of node, edge, motif, and Graph Neural Networks (GNNs), and then examine their roles as the MGM's reconstruction targets. Further, we explore the potential of adopting an expressive decoder in MGM. Our results show that a subgraph-level tokenizer and a sufficiently expressive decoder with remask decoding have a \\yuan{large impact on the encoder's representation learning}. Finally, we propose a novel MGM method SimSGT, featuring a Simple GNN-based Tokenizer (SGT) and an effective decoding strategy. We empirically validate that our method outperforms the existing molecule self-supervised learning methods. Our codes and checkpoints are available at https://github.com/syr-cn/SimSGT.", "keywords": "Molecular Representation Learning;Masked Graph Modeling;Graph Tokenizer", "primary_area": "", "supplementary_material": "/attachment/7a51bcfe9166499a95401ebd5efd3cb75ce4c5ba.pdf", "author": "Zhiyuan Liu;Yaorui Shi;An Zhang;Enzhi Zhang;Kenji Kawaguchi;Xiang Wang;Tat-Seng Chua", "authorids": "~Zhiyuan_Liu5;~Yaorui_Shi2;~An_Zhang2;~Enzhi_Zhang1;~Kenji_Kawaguchi1;~Xiang_Wang6;~Tat-Seng_Chua2", "gender": "M;M;M;;M;F;M", "homepage": "https://acharkq.github.io/;;;https://ml.comp.nus.edu.sg/#members;https://github.com/xiangwang1223;https://github.com/anzhang314;http://www.comp.nus.edu.sg/~chuats/", "dblp": "53/3245-10;;;;31/2864-10;78/5581-3;", "google_scholar": "https://scholar.google.com.sg/citations?user=zF0AH64AAAAJ;EWU3rdIAAAAJ;;aLl3rYoAAAAJ;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;https://scholar.google.com.sg/citations?user=BcX7GJcAAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "orcid": ";;0000-0002-6421-0192;;0000-0002-6148-6329;;0000-0001-6097-7807", "linkedin": ";https://www.linkedin.cn/incareer/in/ACoAADX8m7MBB85jekmcqEP6gMuGa_pp35cLmbo;;;;;", "or_profile": "~Zhiyuan_Liu5;~Yaorui_Shi2;~Enzhi_Zhang1;~Kenji_Kawaguchi1;~Xiang_Wang6;~AN_ZHANG1;~Tat-seng_Chua1", "aff": "National University of Singapore;Xi'an Jiaotong University;Hokkaido University;National University of Singapore;University of Science and Technology of China;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;xjtu.edu.cn;hokudai.ac.jp;nus.edu;ustc.edu.cn;nus.edu.sg;nus.edu.sg", "position": "PhD student;Undergrad student;PhD student;Presidential Young Professor;Full Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\nliu2023rethinking,\ntitle={Rethinking Tokenizer and Decoder in Masked Graph Modeling for Molecules},\nauthor={Zhiyuan Liu and Yaorui Shi and An Zhang and Enzhi Zhang and Kenji Kawaguchi and Xiang Wang and Tat-Seng Chua},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fWLf8DV0fI}\n}", "github": "", "project": "", "reviewers": "tR9n;Lzf1;9pKQ;Qgfd;PiLY", "pdf_size": 1772701, "rating": "5;5;5;6;8", "confidence": "3;4;4;4;3", "soundness": "2;2;3;3;3", "novelty": "3;2;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "72;59;26;99;37", "wc_strengths": "23;36;28;88;44", "wc_weaknesses": "82;224;108;43;52", "wc_questions": "14;111;1;254;122", "wc_limitations": "1;1;2;13;4", "wc_review": "192;431;165;497;259", "wc_reply_reviewers": "23;29;0;21;0", "wc_reply_authors": "110;422;98;38;0", "reply_reviewers": "1;1;0;1;0", "reply_authors": "3;5;2;2;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 58.6, 25.85033848907979 ], "wc_strengths_avg": [ 43.8, 23.224125387191656 ], "wc_weaknesses_avg": [ 101.8, 65.26990117964021 ], "wc_questions_avg": [ 100.4, 91.12321328838222 ], "wc_limitations_avg": [ 4.2, 4.534313619501854 ], "wc_review_avg": [ 308.8, 132.02484614647352 ], "wc_reply_reviewers_avg": [ 14.6, 12.208193969625484 ], "wc_reply_authors_avg": [ 133.6, 149.65774286684933 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.6, 1.3564659966250538 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4900980294098034, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15992963584682712974&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "nus.edu.sg;xjtu.edu.cn;hokudai.ac.jp;nus.edu;ustc.edu.cn;nus.edu.sg;nus.edu.sg", "author_num": 7, "aff_unique_index": "0;1;2;0;3;0;0", "aff_unique_norm": "National University of Singapore;Xi'an Jiao Tong University;Hokkaido University;University of Science and Technology of China", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nus.edu.sg;https://www.xjtu.edu.cn;https://www.hokudai.ac.jp;http://www.ustc.edu.cn", "aff_unique_abbr": "NUS;XJTU;Hokkaido U;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;1;0;0", "aff_country_unique": "Singapore;China;Japan" }, { "title": "Sample based Explanations via Generalized Representers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70902", "id": "fX64q0SNfL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/49cf35ff2298c10452db99d08036805b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fX64q0SNfL", "openreview": "https://openreview.net/forum?id=fX64q0SNfL", "poster": "/media/PosterPDFs/NeurIPS%202023/70902.png?t=1701639829.0084019", "slides": "https://nips.cc/virtual/2023/poster/70902", "video": "https://nips.cc/virtual/2023/poster/70902", "author_site": "Che-Ping Tsai, Chih-Kuan Yeh, Pradeep Ravikumar", "tldr": "", "abstract": "We propose a general class of sample based explanations of machine learning models, which we term generalized representers. To measure the effect of a training sample on a model's test prediction, generalized representers use two components: a global sample importance that quantifies the importance of the training point to the model and is invariant to test samples, and a local sample importance that measures similarity between the training sample and the test point with a kernel. A key contribution of the paper is to show that generalized representers are the only class of sample based explanations satisfying a natural set of axiomatic properties. We discuss approaches to extract global importances given a kernel, and also natural choices of kernels given modern non-linear models. As we show, many popular existing sample based explanations could be cast as generalized representers with particular choices of kernels and approaches to extract global importances. Additionally, we conduct empirical comparisons of different generalized representers on two image classification datasets.", "keywords": "explainable machine learning;sample based explanation;representer point", "primary_area": "", "supplementary_material": "/attachment/3841ac8c91504a6fb7b4ea304daa42e5d909f761.pdf", "author": "Che-Ping Tsai;Chih-Kuan Yeh;Pradeep Kumar Ravikumar", "authorids": "~Che-Ping_Tsai1;~Chih-Kuan_Yeh1;~Pradeep_Kumar_Ravikumar1", "gender": "M;M;M", "homepage": "https://chepingt.github.io/;https://chihkuanyeh.github.io/;http://www.cs.cmu.edu/~pradeepr/", "dblp": "218/6612;;94/3594", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com.tw/citations?user=Q4DTPw4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Che-Ping_Tsai1;~Chih-Kuan_Yeh1;~Pradeep_Kumar_Ravikumar1", "aff": "Carnegie Mellon University;Google;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;google.com;cmu.edu", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\ntsai2023sample,\ntitle={Sample based Explanations via Generalized Representers},\nauthor={Che-Ping Tsai and Chih-Kuan Yeh and Pradeep Kumar Ravikumar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fX64q0SNfL}\n}", "github": "", "project": "", "reviewers": "SFDZ;nmaD;83iG;3USW", "pdf_size": 599444, "rating": "4;5;5;7", "confidence": "3;3;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "97;182;99;105", "wc_strengths": "108;47;46;46", "wc_weaknesses": "129;96;144;61", "wc_questions": "76;141;8;139", "wc_limitations": "15;7;15;7", "wc_review": "425;473;312;358", "wc_reply_reviewers": "105;373;15;0", "wc_reply_authors": "244;445;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 120.75, 35.48503205578375 ], "wc_strengths_avg": [ 61.75, 26.705570579937064 ], "wc_weaknesses_avg": [ 107.5, 31.972644557496334 ], "wc_questions_avg": [ 91.0, 54.58479641805033 ], "wc_limitations_avg": [ 11.0, 4.0 ], "wc_review_avg": [ 392.0, 61.656305435859515 ], "wc_reply_reviewers_avg": [ 123.25, 149.68028427284602 ], "wc_reply_authors_avg": [ 172.25, 186.33353831235 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4733085809492710727&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "email": "andrew.cmu.edu;google.com;cmu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "CMU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Multi Time Scale World Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70901", "id": "fY7dShbtmo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/54d8aab579b5a9ed3395764c7341ebec-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fY7dShbtmo", "openreview": "https://openreview.net/forum?id=fY7dShbtmo", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70901", "video": "https://nips.cc/virtual/2023/poster/70901", "author_site": "Vaisakh Shaj Kumar, SALEH GHOLAM ZADEH, Ozan Demir, Luiz Douat, Gerhard Neumann", "tldr": "", "abstract": "Intelligent agents use internal world models to reason and make predictions about different courses of their actions at many scales. Devising learning paradigms and architectures that allow machines to learn world models that operate at multiple levels of temporal abstractions while dealing with complex uncertainty predictions is a major technical hurdle. In this work, we propose a probabilistic formalism to learn multi-time scale world models which we call the Multi Time Scale State Space (MTS3) model. Our model uses a computationally efficient inference scheme on multiple time scales for highly accurate long-horizon predictions and uncertainty estimates over several seconds into the future. Our experiments, which focus on action conditional long horizon future predictions, show that MTS3 outperforms recent methods on several system identification benchmarks including complex simulated and real-world dynamical systems. Code is available at this repository:\nhttps://github.com/ALRhub/MTS3.", "keywords": "Hierarchical Models; Multi Time Scale Learning; World Models", "primary_area": "", "supplementary_material": "/attachment/7b1cba3594a5f365969d239eb8fca2c7f9d952fa.pdf", "author": "Vaisakh Shaj;Saleh GHOLAM ZADEH;Ozan Demir;Luiz Ricardo Douat;Gerhard Neumann", "authorids": "~Vaisakh_Shaj1;~Saleh_GHOLAM_ZADEH1;~Ozan_Demir1;~Luiz_Ricardo_Douat1;~Gerhard_Neumann2", "gender": "M;M;M;M;M", "homepage": ";;https://www.bosch.com/research/;;https://alr.anthropomatik.kit.edu/", "dblp": "190/3994;;;40/475;60/4878", "google_scholar": ";;;;https://scholar.google.com.tw/citations?user=GL360kMAAAAJ", "orcid": ";;;;", "linkedin": ";saleh-gholam-zadeh-017248153/?originalSubdomain=ch;;;", "or_profile": "~Vaisakh_Shaj1;~Saleh_GHOLAM_ZADEH1;~Ozan_Demir1;~Luiz_Ricardo_Douat1;~Gerhard_Neumann1", "aff": "Karlsruhe Institute of Technology;SAP - SAP germany;;Robert Bosch GmbH, Bosch;Karlsruhe Institute of Technology", "aff_domain": "kit.edu;sap.com;;de.bosch.com;kit.edu", "position": "PhD student;PhD student;;Researcher;Full Professor", "bibtex": "@inproceedings{\nshaj2023multi,\ntitle={Multi Time Scale World Models},\nauthor={Vaisakh Shaj and Saleh GHOLAM ZADEH and Ozan Demir and Luiz Ricardo Douat and Gerhard Neumann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fY7dShbtmo}\n}", "github": "", "project": "", "reviewers": "hxxZ;BLVQ;HTHM;tb9F", "pdf_size": 584860, "rating": "6;6;7;7", "confidence": "3;4;3;3", "soundness": "3;3;3;4", "novelty": "3;3;2;3", "presentation": "2;3;2;3", "wc_summary": "95;85;99;59", "wc_strengths": "33;215;53;85", "wc_weaknesses": "303;156;437;137", "wc_questions": "318;332;148;40", "wc_limitations": "26;59;58;190", "wc_review": "775;847;795;511", "wc_reply_reviewers": "43;18;335;17", "wc_reply_authors": "10;8;179;8", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 84.5, 15.580436450882884 ], "wc_strengths_avg": [ 96.5, 70.88547100781655 ], "wc_weaknesses_avg": [ 258.25, 121.56351220658277 ], "wc_questions_avg": [ 209.5, 121.74871662567946 ], "wc_limitations_avg": [ 83.25, 63.04512272967672 ], "wc_review_avg": [ 732.0, 130.2727907124124 ], "wc_reply_reviewers_avg": [ 103.25, 134.2057655244364 ], "wc_reply_authors_avg": [ 51.25, 73.7610161264065 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8508473149165429954&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "kit.edu;sap.com;;de.bosch.com;kit.edu", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Karlsruhe Institute of Technology;SAP SE;Robert Bosch GmbH", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kit.edu;https://www.sap.com;https://www.bosch.com", "aff_unique_abbr": "KIT;SAP;Bosch", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "American Stories: A Large-Scale Structured Text Dataset of Historical U.S. Newspapers", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73515", "id": "fZq8Tw0jdm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ffeb860479ccae44d84c0de32acd693d-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=fZq8Tw0jdm", "openreview": "https://openreview.net/forum?id=fZq8Tw0jdm", "poster": "/media/PosterPDFs/NeurIPS%202023/73515.png?t=1701812994.2098439", "slides": "https://nips.cc/virtual/2023/poster/73515", "video": "https://nips.cc/virtual/2023/poster/73515", "author_site": "Melissa Dell, Jacob Carlson, Tom Bryan, Emily Silcock, Abhishek Arora, Zejiang Shen, Luca D'Amico-Wong, Quan Le, Pablo Querubin, Leander Heldring", "tldr": "", "abstract": "Existing full text datasets of U.S. public domain newspapers do not recognize the often complex layouts of newspaper scans, and as a result the digitized content scrambles texts from articles, headlines, captions, advertisements, and other layout regions. OCR quality can also be low. This study develops a novel, deep learning pipeline for extracting full article texts from newspaper images and applies it to the nearly 20 million scans in Library of Congress's public domain Chronicling America collection. The pipeline includes layout detection, legibility classification, custom OCR, and association of article texts spanning multiple bounding boxes. To achieve high scalability, it is built with efficient architectures designed for mobile phones. The resulting American Stories dataset provides high quality data that could be used for pre-training a large language model to achieve better understanding of historical English and historical world knowledge. The dataset could also be added to the external database of a retrieval-augmented language model to make historical information - ranging from interpretations of political events to minutiae about the lives of people's ancestors - more widely accessible. Furthermore, structured article texts facilitate using transformer-based methods for popular social science applications like topic classification, detection of reproduced content, and news story clustering. Finally, American Stories provides a massive silver quality dataset for innovating multimodal layout analysis models and other multimodal applications.", "keywords": "structured text dataset;layout dataset;historical texts;American newspapers", "primary_area": "", "supplementary_material": "/attachment/2c7d3bb54f84ee64b8b6f9151d8ee9d1a4a08126.pdf", "author": "Melissa Dell;Jacob Carlson;Tom Bryan;Emily Silcock;Abhishek Arora;Zejiang Shen;Luca D'Amico-Wong;Quan Le;Pablo Querubin;Leander Heldring", "authorids": "~Melissa_Dell1;~Jacob_Carlson1;~Tom_Bryan1;~Emily_Silcock1;~Abhishek_Arora1;~Zejiang_Shen1;~Luca_D'Amico-Wong1;~Quan_Le1;~Pablo_Querubin1;~Leander_Heldring1", "gender": ";M;M;;M;;M;M;;", "homepage": ";https://jscarlson.github.io/;;;https://econabhishek.github.io/;;;https://qlquanle.github.io/;;http://www.leanderheldring.com", "dblp": ";129/0862;149/9220;;344/4529;;330/9302;;;", "google_scholar": ";ijle68oAAAAJ;-PBqxS8AAAAJ;dfGziwkAAAAJ;https://scholar.google.com/citations?hl=en;;;;;", "orcid": ";;;;;;;;;", "linkedin": ";;;;abhishek-arora1996/;;luca-d-amico-wong-41879712b/;quan-le-0aa53272/;;", "or_profile": "~Melissa_Dell1;~Jacob_Carlson1;~Tom_Bryan1;~Emily_Silcock1;~Abhishek_Arora1;~Zejiang_Shen1;~Luca_D'Amico-Wong1;~Quan_Le1;~Pablo_Querubin1;~Leander_Heldring1", "aff": ";Harvard University;Harvard University, Harvard University;Department of Economics, Harvard University;Harvard University, Harvard University;;Harvard University;Princeton University;;Northwestern University", "aff_domain": ";g.harvard.edu;fas.harvard.edu;fas.harvard.edu;fas.harvard.edu;;harvard.edu;princeton.edu;;northwestern.edu", "position": ";PhD student;Researcher;Researcher;Researcher;;Undergrad student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\ndell2023american,\ntitle={American Stories: A Large-Scale Structured Text Dataset of Historical U.S. Newspapers},\nauthor={Melissa Dell and Jacob Carlson and Tom Bryan and Emily Silcock and Abhishek Arora and Zejiang Shen and Luca D'Amico-Wong and Quan Le and Pablo Querubin and Leander Heldring},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=fZq8Tw0jdm}\n}", "github": "", "project": "", "reviewers": "Z8NQ;1942;CqsX;z8qw;fRyu", "pdf_size": 8771320, "rating": "5;6;6;6;8", "confidence": "3;4;4;4;4", "wc_summary_and_contributions": "155;60;73;121;99", "wc_strengths": "15;64;71;80;166", "wc_improvement": "37;181;86;315;129", "wc_limitations": "35;34;61;147;76", "wc_correctness": "38;35;17;86;16", "wc_clarity": "43;38;5;10;15", "wc_relation_to_prior_work": "16;37;33;15;72", "wc_documentation": "14;41;17;2;46", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "354;491;364;777;620", "wc_reply_reviewers": "0;0;80;55;0", "wc_reply_authors": "2353;636;764;583;520", "reply_reviewers": "0;0;1;1;0", "reply_authors": "4;1;1;1;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 101.6, 34.00941046239996 ], "wc_strengths_avg": [ 79.2, 48.89744369596431 ], "wc_improvement_avg": [ 149.6, 95.38469478904884 ], "wc_limitations_avg": [ 70.6, 41.38888739746455 ], "wc_correctness_avg": [ 38.4, 25.44484230644788 ], "wc_clarity_avg": [ 22.2, 15.354478174135387 ], "wc_relation_to_prior_work_avg": [ 34.6, 20.674622124720923 ], "wc_documentation_avg": [ 24.0, 16.769019053003667 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 521.2, 160.48351940308387 ], "wc_reply_reviewers_avg": [ 27.0, 34.0 ], "wc_reply_authors_avg": [ 971.2, 695.5491068213661 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.6123724356957947, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17353738538484844380&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": ";g.harvard.edu;fas.harvard.edu;fas.harvard.edu;fas.harvard.edu;;harvard.edu;princeton.edu;;northwestern.edu", "author_num": 10, "aff_unique_index": "0;0;0;0;0;1;2", "aff_unique_norm": "Harvard University;Princeton University;Northwestern University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.harvard.edu;https://www.princeton.edu;https://www.northwestern.edu", "aff_unique_abbr": "Harvard;Princeton;NU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A fast heuristic to optimize time-space tradeoff for large models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70900", "id": "fbpTObq6TW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9be39b35906526b8d240056daac72c6f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fbpTObq6TW", "openreview": "https://openreview.net/forum?id=fbpTObq6TW", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70900", "video": "https://nips.cc/virtual/2023/poster/70900", "author_site": "Akifumi Imanishi, Zijian Xu, Masayuki Takagi, Sixue Wang, Emilio Castillo", "tldr": "", "abstract": "Training large-scale neural networks is heavily constrained by GPU memory. In order to circumvent this limitation, gradient checkpointing, or recomputation is a powerful technique. There is active research in this area with methods such as Checkmake or Moccasin. However, both Checkmate and Moccasin rely on mixed integer linear programming or constraint programming, resulting in limited scalability due to their exponentially large search space.\n\nThis paper proposes a novel algorithm for recomputation (FastSA) based on a simulated annealing heuristic that achieves comparable or even better solutions than state-of-the-art alternatives. FastSA can optimize computational graphs with thousands of nodes within 3 to 30 seconds, several orders of magnitude faster than current solutions.\n\nWe applied FastSA to PyTorch models and verified its effectiveness through popular large vision and text models, including recent language models with the transformer architecture. The results demonstrate significant memory reductions by 73% with extra 18% computational overheads on average. Our experiments demonstrate the practicality and effectiveness of our recomputation algorithm, further highlighting its potential for wide application in various deep learning domains.", "keywords": "Recomputation;Gradient checkpointing;Memory reduction;Computational graph optimization", "primary_area": "", "supplementary_material": "", "author": "Akifumi Imanishi;Zijian Xu;Masayuki Takagi;Sixue Wang;Emilio Castillo", "authorids": "imanishi@preferred.jp;~Zijian_Xu1;mtakagi@preferred.jp;cecilwang@preferred.jp;ecastill@preferred.jp", "gender": ";M;;;", "homepage": ";;;;", "dblp": ";45/3629-2.html;;;", "google_scholar": ";;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "imanishi@preferred.jp;~Zijian_Xu1;mtakagi@preferred.jp;cecilwang@preferred.jp;ecastill@preferred.jp", "aff": ";Preferred Networks, Inc.;;;", "aff_domain": ";preferred.jp;;;", "position": ";Researcher;;;", "bibtex": "@inproceedings{\nimanishi2023a,\ntitle={A fast heuristic to optimize time-space tradeoff for large models},\nauthor={Akifumi Imanishi and Zijian Xu and Masayuki Takagi and Sixue Wang and Emilio Castillo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fbpTObq6TW}\n}", "github": "", "project": "", "reviewers": "wXFY;a8KW;kH45;w4oG", "pdf_size": 1138432, "rating": "6;6;6;6", "confidence": "4;4;3;3", "soundness": "2;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;2", "wc_summary": "17;73;83;154", "wc_strengths": "5;42;104;116", "wc_weaknesses": "5;20;71;83", "wc_questions": "459;37;62;55", "wc_limitations": "5;16;16;24", "wc_review": "491;188;336;432", "wc_reply_reviewers": "64;58;24;24", "wc_reply_authors": "0;267;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 81.75, 48.71024019649256 ], "wc_strengths_avg": [ 66.75, 45.38378014224906 ], "wc_weaknesses_avg": [ 44.75, 32.95735881407975 ], "wc_questions_avg": [ 153.25, 176.76025429943238 ], "wc_limitations_avg": [ 15.25, 6.7592529172978875 ], "wc_review_avg": [ 361.75, 114.55648170225899 ], "wc_reply_reviewers_avg": [ 42.5, 18.621224449536072 ], "wc_reply_authors_avg": [ 66.75, 115.61439140522256 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9266923965493457233&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";preferred.jp;;;", "author_num": 5, "aff_unique_index": "0", "aff_unique_norm": "Preferred Networks, Inc.", "aff_unique_dep": "", "aff_unique_url": "https://www.preferred-networks.com", "aff_unique_abbr": "PFN", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "title": "LEPARD: Learning Explicit Part Discovery for 3D Articulated Shape Reconstruction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70899", "id": "fcYObrixSS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a99f50fb024a56d15f057a1830ed0a00-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fcYObrixSS", "openreview": "https://openreview.net/forum?id=fcYObrixSS", "poster": "/media/PosterPDFs/NeurIPS%202023/70899.png?t=1701860000.825408", "slides": "https://nips.cc/virtual/2023/poster/70899", "video": "https://nips.cc/virtual/2023/poster/70899", "author_site": "Di Liu, Anastasis Stathopoulos, Qilong Zhangli, Yunhe Gao, Dimitris Metaxas", "tldr": "", "abstract": "Reconstructing the 3D articulated shape of an animal from a single in-the-wild image is a challenging task. We propose LEPARD, a learning-based framework that discovers semantically meaningful 3D parts and reconstructs 3D shapes in a part-based manner. This is advantageous as 3D parts are robust to pose variations due to articulations and their shape is typically simpler than the overall shape of the object. In our framework, the parts are explicitly represented as parameterized primitive surfaces with global and local deformations in 3D that deform to match the image evidence. We propose a kinematics-inspired optimization to guide each transformation of the primitive deformation given 2D evidence. Similar to recent approaches, LEPARD is only trained using off-the-shelf deep features from DINO and does not require any form of 2D or 3D annotations. Experiments on 3D animal shape reconstruction, demonstrate significant improvement over existing alternatives in terms of both the overall reconstruction performance as well as the ability to discover semantically meaningful and consistent parts.", "keywords": "3D computer vision;deep learning", "primary_area": "", "supplementary_material": "/attachment/fefa6bc723bb773e2a4a407ea0ef1625539b2333.pdf", "author": "Di Liu;Anastasis Stathopoulos;Qilong Zhangli;Yunhe Gao;Dimitris N. Metaxas", "authorids": "~Di_Liu3;~Anastasis_Stathopoulos1;~Qilong_Zhangli1;~Yunhe_Gao2;~Dimitris_N._Metaxas1", "gender": "M;;M;M;M", "homepage": "https://lsn33096.github.io/;https://statho.github.io/;https://qzhangli.github.io;https://www.cs.rutgers.edu/people/graduate-students/details/yunhe-gao;https://www.cs.rutgers.edu/~dnm/", "dblp": "15/1777-3;267/5450;315/4939;237/4741;m/DNMetaxas", "google_scholar": "1uo3XsMAAAAJ;FdaFVdcAAAAJ;-uhPZUgAAAAJ;TOsFPu4AAAAJ;https://scholar.google.com.tw/citations?user=a7VNhCIAAAAJ", "orcid": ";0000-0002-4026-2166;0009-0002-2848-5719;;", "linkedin": "di-liu-20361a1b6/;;qilong-zhangli/;;dimitris-metaxas-1bb74914/", "or_profile": "~Di_Liu3;~Anastasis_Stathopoulos1;~Qilong_Zhangli1;~Yunhe_Gao2;~Dimitris_Metaxas1", "aff": "Rutgers University, New Brunswick;Rutgers University, Newark;Rutgers University;Rutgers University;Rutgers University", "aff_domain": "rutgers.edu;rutgers.edu;rutgers.edu;rutgers.edu;cs.rutgers.edu", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nliu2023lepard,\ntitle={{LEPARD}: Learning Explicit Part Discovery for 3D Articulated Shape Reconstruction},\nauthor={Di Liu and Anastasis Stathopoulos and Qilong Zhangli and Yunhe Gao and Dimitris N. Metaxas},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fcYObrixSS}\n}", "github": "", "project": "", "reviewers": "Hc7X;a5TN;ifgQ;pQ1r;5yZr", "pdf_size": 2810012, "rating": "4;5;6;7;8", "confidence": "2;3;4;4;3", "soundness": "2;2;4;4;4", "novelty": "3;2;3;3;3", "presentation": "2;1;3;3;4", "wc_summary": "79;54;60;51;78", "wc_strengths": "32;95;136;60;52", "wc_weaknesses": "251;243;93;56;40", "wc_questions": "45;5;107;136;61", "wc_limitations": "15;9;31;63;45", "wc_review": "422;406;427;366;276", "wc_reply_reviewers": "54;33;145;31;101", "wc_reply_authors": "52;52;224;87;57", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.9797958971132712 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 64.4, 11.876026271442818 ], "wc_strengths_avg": [ 75.0, 36.67151483099655 ], "wc_weaknesses_avg": [ 136.6, 91.80108931815569 ], "wc_questions_avg": [ 70.8, 46.1579895576053 ], "wc_limitations_avg": [ 32.6, 19.734234213670415 ], "wc_review_avg": [ 379.4, 55.96284481689615 ], "wc_reply_reviewers_avg": [ 72.8, 44.02908129861444 ], "wc_reply_authors_avg": [ 94.4, 66.09871405708283 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5669467095138409, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5466188492103443890&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "rutgers.edu;rutgers.edu;rutgers.edu;rutgers.edu;cs.rutgers.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Rutgers University", "aff_unique_dep": "", "aff_unique_url": "https://www.rutgers.edu", "aff_unique_abbr": "Rutgers", "aff_campus_unique_index": "0;1", "aff_campus_unique": "New Brunswick;Newark;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Representation Learning via Consistent Assignment of Views over Random Partitions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70898", "id": "fem6BIJkdv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7caf9d251b546bc78078b35b4a6f3b7e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fem6BIJkdv", "openreview": "https://openreview.net/forum?id=fem6BIJkdv", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70898", "video": "https://nips.cc/virtual/2023/poster/70898", "author_site": "Thalles Santos Silva, Ad\u00edn Ram\u00edrez Rivera", "tldr": "", "abstract": "We present Consistent Assignment of Views over Random Partitions (CARP), a self-supervised clustering method for representation learning of visual features. CARP learns prototypes in an end-to-end online fashion using gradient descent without additional non-differentiable modules to solve the cluster assignment problem. CARP optimizes a new pretext task based on random partitions of prototypes that regularizes the model and enforces consistency between views' assignments. Additionally, our method improves training stability and prevents collapsed solutions in joint-embedding training. Through an extensive evaluation, we demonstrate that CARP's representations are suitable for learning downstream tasks. We evaluate CARP's representations capabilities in 17 datasets across many standard protocols, including linear evaluation, few-shot classification, $k$-NN, $k$-means, image retrieval, and copy detection. We compare CARP performance to 11 existing self-supervised methods. We extensively ablate our method and demonstrate that our proposed random partition pretext task improves the quality of the learned representations by devising multiple random classification tasks.\nIn transfer learning tasks, CARP achieves the best performance on average against many SSL methods trained for a longer time.", "keywords": "representation learning;unsupervised learning;self-supervised learning;computer vision", "primary_area": "", "supplementary_material": "/attachment/fc8dcc025c4e61637223f607cc7fad2675f1823f.zip", "author": "Thalles Santos Silva;Ad\u00edn Ram\u00edrez Rivera", "authorids": "~Thalles_Santos_Silva1;~Ad\u00edn_Ram\u00edrez_Rivera1", "gender": "M;M", "homepage": "https://sthalles.github.io/;https://www.mn.uio.no/ifi/english/people/aca/adinr/", "dblp": "309/8339;85/9834", "google_scholar": ";p2aLoZAAAAAJ", "orcid": ";0000-0002-4321-9075", "linkedin": "thalles-silva-32ab08a3/;adinramirezrivera/", "or_profile": "~Thalles_Santos_Silva1;~Adin_Ramirez_Rivera1", "aff": "Universidade Estadual de Campinas;University of Oslo", "aff_domain": "unicamp.br;uio.no", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nsilva2023representation,\ntitle={Representation Learning via Consistent Assignment of Views over Random Partitions},\nauthor={Thalles Santos Silva and Ad{\\'\\i}n Ram{\\'\\i}rez Rivera},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fem6BIJkdv}\n}", "github": "", "project": "", "reviewers": "osfH;fyYo;FHva;VZFy", "pdf_size": 515885, "rating": "3;5;6;7", "confidence": "5;4;4;4", "soundness": "2;3;3;4", "novelty": "2;3;2;3", "presentation": "3;3;3;4", "wc_summary": "76;93;74;154", "wc_strengths": "34;71;137;73", "wc_weaknesses": "142;159;180;68", "wc_questions": "14;64;55;58", "wc_limitations": "1;108;6;49", "wc_review": "267;495;452;402", "wc_reply_reviewers": "356;33;110;304", "wc_reply_authors": "1405;21;824;25", "reply_reviewers": "2;1;2;1", "reply_authors": "6;2;4;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 99.25, 32.460552983583014 ], "wc_strengths_avg": [ 78.75, 37.04304928053305 ], "wc_weaknesses_avg": [ 137.25, 42.18634257671551 ], "wc_questions_avg": [ 47.75, 19.753164303473 ], "wc_limitations_avg": [ 41.0, 42.94764254298482 ], "wc_review_avg": [ 404.0, 85.67088186776182 ], "wc_reply_reviewers_avg": [ 200.75, 133.35924227439207 ], "wc_reply_authors_avg": [ 568.75, 583.1296489632473 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.5, 1.6583123951777 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8783100656536799, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14273902901872257103&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "unicamp.br;uio.no", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Universidade Estadual de Campinas;University of Oslo", "aff_unique_dep": ";", "aff_unique_url": "https://www.unicamp.br;https://www.uio.no", "aff_unique_abbr": "UNICAMP;UiO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Brazil;Norway" }, { "title": "Analysis of Variance of Multiple Causal Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70897", "id": "fezV91IJIo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/26c233f48fb05bbd52a520e4bb9e3760-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fezV91IJIo", "openreview": "https://openreview.net/forum?id=fezV91IJIo", "poster": "/media/PosterPDFs/NeurIPS%202023/70897.png?t=1701933165.5938132", "slides": "https://nips.cc/virtual/2023/poster/70897", "video": "https://nips.cc/virtual/2023/poster/70897", "author_site": "Zhongli Jiang, Dabao Zhang", "tldr": "", "abstract": "Constructing a directed cyclic graph (DCG) is challenged by both algorithmic difficulty and computational burden. Comparing multiple DCGs is even more difficult, compounded by the need to identify dynamic causalities across graphs. We propose to unify multiple DCGs with a single structural model and develop a limited-information-based method to simultaneously construct multiple networks and infer their disparities, which can be visualized by appropriate correspondence analysis. The algorithm provides DCGs with robust non-asymptotic theoretical properties. It is designed with two sequential stages, each of which involves parallel computation tasks that are scalable to the network complexity. Taking advantage of high-performance clusters, our method makes it possible to evaluate the statistical significance of DCGs using the bootstrap method. We demonstrated the effectiveness of our method by applying it to synthetic and real datasets.", "keywords": "causal inference;large graphs;multi-task learning;structural model;directed cyclic graph", "primary_area": "", "supplementary_material": "/attachment/22df91badd4c659ab621309093afe858a11e2d7d.pdf", "author": "Zhongli Jiang;Dabao Zhang", "authorids": "~Zhongli_Jiang1;~Dabao_Zhang1", "gender": ";M", "homepage": ";https://publichealth.uci.edu/faculty/zhang-dabao/", "dblp": ";", "google_scholar": ";JFI2P0MAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Zhongli_Jiang1;~Dabao_Zhang1", "aff": ";University of California, Irvine", "aff_domain": ";uci.edu", "position": ";Full Professor", "bibtex": "@inproceedings{\njiang2023analysis,\ntitle={Analysis of Variance of Multiple Causal Networks},\nauthor={Zhongli Jiang and Dabao Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fezV91IJIo}\n}", "github": "", "project": "", "reviewers": "mopu;bLo4;ufKZ;Wm1e;Daft", "pdf_size": 688407, "rating": "4;5;5;6;7", "confidence": "3;2;4;2;2", "soundness": "2;3;3;3;3", "novelty": "2;2;3;2;3", "presentation": "2;3;2;2;4", "wc_summary": "102;61;46;51;89", "wc_strengths": "53;30;34;58;78", "wc_weaknesses": "43;32;332;80;96", "wc_questions": "74;42;12;40;70", "wc_limitations": "11;2;12;20;21", "wc_review": "283;167;436;249;354", "wc_reply_reviewers": "42;0;90;13;0", "wc_reply_authors": "382;0;15;0;0", "reply_reviewers": "1;0;1;1;0", "reply_authors": "2;1;2;1;1", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 2.6, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 69.8, 21.92167876783163 ], "wc_strengths_avg": [ 50.6, 17.385051049680584 ], "wc_weaknesses_avg": [ 116.6, 110.2136107747133 ], "wc_questions_avg": [ 47.6, 22.606193841511665 ], "wc_limitations_avg": [ 13.2, 6.910861017268398 ], "wc_review_avg": [ 297.8, 91.61528256792096 ], "wc_reply_reviewers_avg": [ 29.0, 34.140884581393024 ], "wc_reply_authors_avg": [ 79.4, 151.4114922983061 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5393193716300062, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4IG3mfb1MnIJ:scholar.google.com/&scioq=Analysis+of+Variance+of+Multiple+Causal+Networks&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": ";uci.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of California, Irvine", "aff_unique_dep": "", "aff_unique_url": "https://www.uci.edu", "aff_unique_abbr": "UCI", "aff_campus_unique_index": "0", "aff_campus_unique": "Irvine", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "RS-Del: Edit Distance Robustness Certificates for Sequence Classifiers via Randomized Deletion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70896", "id": "ffFcRPpnWx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3ba82362eb0aa75487069f19fde794fe-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ffFcRPpnWx", "openreview": "https://openreview.net/forum?id=ffFcRPpnWx", "poster": "/media/PosterPDFs/NeurIPS%202023/70896.png?t=1698818144.9524653", "slides": "https://nips.cc/virtual/2023/poster/70896", "video": "https://nips.cc/virtual/2023/poster/70896", "author_site": "Zhuoqun Huang, Neil G Marchant, Keane Lucas, Lujo Bauer, Olga Ohrimenko, Benjamin Rubinstein", "tldr": "", "abstract": "Randomized smoothing is a leading approach for constructing classifiers that are certifiably robust against adversarial examples. Existing work on randomized smoothing has focused on classifiers with continuous inputs, such as images, where $\\ell_p$-norm bounded adversaries are commonly studied. However, there has been limited work for classifiers with discrete or variable-size inputs, such as for source code, which require different threat models and smoothing mechanisms. In this work, we adapt randomized smoothing for discrete sequence classifiers to provide certified robustness against edit distance-bounded adversaries. Our proposed smoothing mechanism randomized deletion (RS-Del) applies random deletion edits, which are (perhaps surprisingly) sufficient to confer robustness against adversarial deletion, insertion and substitution edits. Our proof of certification deviates from the established Neyman-Pearson approach, which is intractable in our setting, and is instead organized around longest common subsequences. We present a case study on malware detection\u2014a binary classification problem on byte sequences where classifier evasion is a well-established threat model. When applied to the popular MalConv malware detection model, our smoothing mechanism RS-Del achieves a certified accuracy of 91% at an edit distance radius of 128 bytes.", "keywords": "certified robustness;randomized smoothing;malware detection;sequence classification;edit distance", "primary_area": "", "supplementary_material": "/attachment/4fda2bb9fbf857867a295f992fb82e1a06c900fc.zip", "author": "Zhuoqun Huang;Neil G Marchant;Keane Lucas;Lujo Bauer;Olga Ohrimenko;Benjamin I. P. Rubinstein", "authorids": "~Zhuoqun_Huang1;~Neil_G_Marchant1;~Keane_Lucas1;~Lujo_Bauer1;~Olga_Ohrimenko1;~Benjamin_I._P._Rubinstein1", "gender": "M;M;M;;;M", "homepage": "https://github.com/Dovermore;https://www.ngmarchant.net/;https://keanelucas.com;;;http://www.bipr.net/", "dblp": "272/9127;198/0542;250/5769;;;90/1092", "google_scholar": "https://scholar.google.com.au/citations?hl=en;F7cVGr0AAAAJ;vJEa5voAAAAJ;;;https://scholar.google.com.au/citations?user=hMG_gR4AAAAJ", "orcid": "0000-0001-8397-8087;0000-0001-5713-4235;0000-0002-4705-3412;;;0000-0002-2947-6980", "linkedin": "calvin-zhuoqun-huang/;;keane-lucas/;;;benjaminrubinstein/", "or_profile": "~Zhuoqun_Huang1;~Neil_G_Marchant1;~Keane_Lucas1;~Lujo_Bauer1;~Olga_Ohrimenko1;~Benjamin_I._P._Rubinstein1", "aff": "University of Melbourne;The University of Melbourne;Carnegie Mellon University;;;The University of Melbourne", "aff_domain": "unimelb.edu;unimelb.edu.au;cmu.edu;;;unimelb.edu.au", "position": "PhD student;Postdoc;PhD student;;;Associate Professor", "bibtex": "@inproceedings{\nhuang2023rsdel,\ntitle={{RS}-Del: Edit Distance Robustness Certificates for Sequence Classifiers via Randomized Deletion},\nauthor={Zhuoqun Huang and Neil G Marchant and Keane Lucas and Lujo Bauer and Olga Ohrimenko and Benjamin I. P. Rubinstein},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ffFcRPpnWx}\n}", "github": "", "project": "", "reviewers": "MXYp;xRpL;r6FS;bHHQ", "pdf_size": 831984, "rating": "4;6;6;9", "confidence": "4;4;3;5", "soundness": "2;2;3;4", "novelty": "3;3;2;4", "presentation": "3;4;3;4", "wc_summary": "55;113;39;135", "wc_strengths": "167;72;51;78", "wc_weaknesses": "638;239;68;134", "wc_questions": "6;224;3;66", "wc_limitations": "114;97;9;39", "wc_review": "980;745;170;452", "wc_reply_reviewers": "546;81;20;109", "wc_reply_authors": "813;22;0;86", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 85.5, 39.68311983702894 ], "wc_strengths_avg": [ 92.0, 44.44659717008716 ], "wc_weaknesses_avg": [ 269.75, 221.18134528029256 ], "wc_questions_avg": [ 74.75, 89.75905246826083 ], "wc_limitations_avg": [ 64.75, 42.5345447842104 ], "wc_review_avg": [ 586.75, 304.76497092021583 ], "wc_reply_reviewers_avg": [ 189.0, 208.61088178712058 ], "wc_reply_authors_avg": [ 230.25, 337.9307436443154 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5940885257860046, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7439008813847603028&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "unimelb.edu;unimelb.edu.au;cmu.edu;;;unimelb.edu.au", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Melbourne;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.unimelb.edu.au;https://www.cmu.edu", "aff_unique_abbr": "UniMelb;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Australia;United States" }, { "title": "Neural Foundations of Mental Simulation: Future Prediction of Latent Representations on Dynamic Scenes", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70895", "id": "ffOhY40Nrh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/df438caa36714f69277daa92d608dd63-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ffOhY40Nrh", "openreview": "https://openreview.net/forum?id=ffOhY40Nrh", "poster": "/media/PosterPDFs/NeurIPS%202023/70895.png?t=1702051432.752001", "slides": "https://nips.cc/virtual/2023/poster/70895", "video": "https://nips.cc/virtual/2023/poster/70895", "author_site": "Aran Nayebi, Rishi Rajalingham, Mehrdad Jazayeri, Guangyu Robert Yang", "tldr": "", "abstract": "Humans and animals have a rich and flexible understanding of the physical world, which enables them to infer the underlying dynamical trajectories of objects and events, plausible future states, and use that to plan and anticipate the consequences of actions.\nHowever, the neural mechanisms underlying these computations are unclear.\nWe combine a goal-driven modeling approach with dense neurophysiological data and high-throughput human behavioral readouts that contain thousands of comparisons to directly impinge on this question.\nSpecifically, we construct and evaluate several classes of sensory-cognitive networks to predict the future state of rich, ethologically-relevant environments, ranging from self-supervised end-to-end models with pixel-wise or object-slot objectives, to models that future predict in the latent space of purely static image-pretrained or dynamic video-pretrained foundation models.\nWe find that ``scale is \\emph{not} all you need'', and that many state-of-the-art machine learning models fail to perform well on our neural and behavioral benchmarks for future prediction.\nIn fact, only one class of models matches these data well overall.\nWe find that neural responses are currently best predicted by models trained to predict the future state of their environment in the \\emph{latent} space of pretrained foundation models optimized for \\emph{dynamic} scenes in a self-supervised manner.\nThese models also approach the neurons' ability to predict the environmental state variables that are visually hidden from view, despite not being explicitly trained to do so.\nFinally, we find that not all foundation model latents are equal.\nNotably, models that future predict in the latent space of video foundation models that are optimized to support a \\emph{diverse} range of egocentric sensorimotor tasks, reasonably match \\emph{both} human behavioral error patterns and neural dynamics across all environmental scenarios that we were able to test.\nOverall, these findings suggest that the neural mechanisms and behaviors of primate mental simulation have strong inductive biases associated with them, and are thus far most consistent with being optimized to future predict on \\emph{reusable} visual representations that are useful for Embodied AI more generally.", "keywords": "neural coding;mental simulation;foundation models;primate frontal cortex", "primary_area": "", "supplementary_material": "/attachment/f3c974f38b5567bab840846cf4a41d9ad8cc09e1.pdf", "author": "Aran Nayebi;Rishi Rajalingham;Mehrdad Jazayeri;Guangyu Robert Yang", "authorids": "~Aran_Nayebi2;~Rishi_Rajalingham1;~Mehrdad_Jazayeri1;~Guangyu_Robert_Yang1", "gender": ";;;M", "homepage": "https://anayebi.github.io/;https://rishirajalingham.github.io/;;https://www.metaconscious.org/", "dblp": "43/7661;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;GUU7CggAAAAJ;AkJyWbAAAAAJ;hrI8aH8AAAAJ", "orcid": ";0000-0001-6499-1681;;0000-0002-8919-4248", "linkedin": ";;;", "or_profile": "~Aran_Nayebi2;~Rishi_Rajalingham1;~Mehrdad_Jazayeri1;~Guangyu_Robert_Yang1", "aff": "Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;;mit.edu;mit.edu", "position": "Postdoc;;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nnayebi2023neural,\ntitle={Neural Foundations of Mental Simulation: Future Prediction of Latent Representations on Dynamic Scenes},\nauthor={Aran Nayebi and Rishi Rajalingham and Mehrdad Jazayeri and Guangyu Robert Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ffOhY40Nrh}\n}", "github": "", "project": "", "reviewers": "tmZS;bK8w;ZwMf;6vWo", "pdf_size": 4462063, "rating": "5;7;7;8", "confidence": "4;2;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;3", "wc_summary": "41;142;189;82", "wc_strengths": "47;63;100;53", "wc_weaknesses": "143;52;653;476", "wc_questions": "77;85;176;152", "wc_limitations": "1;6;82;2", "wc_review": "309;348;1200;765", "wc_reply_reviewers": "147;22;73;848", "wc_reply_authors": "480;0;158;1498", "reply_reviewers": "1;1;1;2", "reply_authors": "2;1;2;4", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 113.5, 56.482298111886344 ], "wc_strengths_avg": [ 65.75, 20.58367071248469 ], "wc_weaknesses_avg": [ 331.0, 243.87189259937276 ], "wc_questions_avg": [ 122.5, 42.45291509425472 ], "wc_limitations_avg": [ 22.75, 34.259122872601395 ], "wc_review_avg": [ 655.5, 361.62445990281134 ], "wc_reply_reviewers_avg": [ 272.5, 335.22417872223957 ], "wc_reply_authors_avg": [ 534.0, 582.8258745114188 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10389802285715377203&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 11, "email": "mit.edu;;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Rotating Features for Object Discovery", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70894", "id": "fg7iyNK81W", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bb36593e5e438aac5dd07907e757e087-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fg7iyNK81W", "openreview": "https://openreview.net/forum?id=fg7iyNK81W", "poster": "/media/PosterPDFs/NeurIPS%202023/70894.png?t=1701939636.8306458", "slides": "https://nips.cc/virtual/2023/poster/70894", "video": "https://nips.cc/virtual/2023/poster/70894", "author_site": "Sindy L\u00f6we, Phillip Lippe, Francesco Locatello, Max Welling", "tldr": "", "abstract": "The binding problem in human cognition, concerning how the brain represents and connects objects within a fixed network of neural connections, remains a subject of intense debate. Most machine learning efforts addressing this issue in an unsupervised setting have focused on slot-based methods, which may be limiting due to their discrete nature and difficulty to express uncertainty. Recently, the Complex AutoEncoder was proposed as an alternative that learns continuous and distributed object-centric representations. However, it is only applicable to simple toy data. In this paper, we present Rotating Features, a generalization of complex-valued features to higher dimensions, and a new evaluation procedure for extracting objects from distributed representations. Additionally, we show the applicability of our approach to pre-trained features. Together, these advancements enable us to scale distributed object-centric representations from simple toy to real-world data. We believe this work advances a new paradigm for addressing the binding problem in machine learning and has the potential to inspire further innovation in the field.", "keywords": "Object Discovery;Object-Centric Representations;Structured Representation Learning", "primary_area": "", "supplementary_material": "/attachment/80427da35b9dfcbc0591d39b21ba7261881553cc.zip", "author": "Sindy L\u00f6we;Phillip Lippe;Francesco Locatello;Max Welling", "authorids": "~Sindy_L\u00f6we1;~Phillip_Lippe1;~Francesco_Locatello1;~Max_Welling1", "gender": "F;M;M;M", "homepage": "https://loewex.github.io/;https://phlippe.github.io;https://twitter.com/FrancescoLocat8;https://staff.fnwi.uva.nl/m.welling/", "dblp": "223/4740;267/9431;195/6074;16/2286", "google_scholar": "https://scholar.google.ch/citations?user=lZZIP9UAAAAJ;69hFZp4AAAAJ;;https://scholar.google.nl/citations?user=8200InoAAAAJ", "orcid": ";0000-0002-3639-6938;;0000-0003-1484-2121", "linkedin": ";phillip-lippe/;;", "or_profile": "~Sindy_L\u00f6we1;~Phillip_Lippe1;~Francesco_Locatello1;~Max_Welling1", "aff": "University of Amsterdam;Google DeepMind;Amazon;University of Amsterdam", "aff_domain": "uva.nl;google.com;amazon.com;uva.nl", "position": "PhD student;Intern;Senior Applied Scientist;Full Professor", "bibtex": "@inproceedings{\nl{\\\"o}we2023rotating,\ntitle={Rotating Features for Object Discovery},\nauthor={Sindy L{\\\"o}we and Phillip Lippe and Francesco Locatello and Max Welling},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fg7iyNK81W}\n}", "github": "", "project": "", "reviewers": "r4Rq;VTkG;cH5o;e8db", "pdf_size": 10788023, "rating": "6;7;8;8", "confidence": "5;4;4;2", "soundness": "3;4;4;3", "novelty": "2;3;4;4", "presentation": "3;3;4;3", "wc_summary": "117;204;64;56", "wc_strengths": "63;83;67;173", "wc_weaknesses": "216;159;467;86", "wc_questions": "345;78;9;168", "wc_limitations": "49;18;11;20", "wc_review": "790;542;618;503", "wc_reply_reviewers": "95;41;93;64", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 110.25, 58.98463783054025 ], "wc_strengths_avg": [ 96.5, 44.79676327593323 ], "wc_weaknesses_avg": [ 232.0, 143.2881711796197 ], "wc_questions_avg": [ 150.0, 125.91068262859987 ], "wc_limitations_avg": [ 24.5, 14.534441853748634 ], "wc_review_avg": [ 613.25, 110.10761781093986 ], "wc_reply_reviewers_avg": [ 73.25, 22.29770167528483 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7608859102526822, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7852652969188158055&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uva.nl;google.com;amazon.com;uva.nl", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Amsterdam;Google;Amazon", "aff_unique_dep": ";Google DeepMind;Amazon.com, Inc.", "aff_unique_url": "https://www.uva.nl;https://deepmind.com;https://www.amazon.com", "aff_unique_abbr": "UvA;DeepMind;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Netherlands;United Kingdom;United States" }, { "id": "fifeeUmV4Z", "title": "Generative Noisy-Label Learning by Implicit Dicriminative Approximation with Partial Label Prior", "track": "main", "status": "Reject", "tldr": "", "abstract": "The learning with noisy labels has been addressed with both discriminative and generative models. Although discriminative models have dominated the field due to their simpler modeling and more efficient computational training processes, generative models offer a more effective means of disentangling clean and noisy labels and improving the estimation of the label transition matrix. However, generative approaches maximize the joint likelihood of noisy labels and data using a complex formulation that only indirectly optimizes the model of interest associating data and clean labels. Additionally, these approaches rely on generative models that are challenging to train and tend to use uninformative clean label priors. In this paper, we propose a new generative noisy-label learning approach that addresses these three issues. First, we propose a new model optimisation that directly associates data and clean labels. Second, the generative model is implicitly estimated using a discriminative model, eliminating the inefficient training of a generative model. Third, we propose a new informative label prior inspired by partial label learning as supervision signal for noisy label learning. Extensive experiments on several noisy-label benchmarks demonstrate that our generative model provides state-of-the-art results while maintaining a similar computational complexity as discriminative models. *Code will be available if paper is accepted*.", "keywords": "noisy label learning; partial label learning; implicit generative model", "primary_area": "", "supplementary_material": "/attachment/bc7cc6efee85334a747eb9cbb91a2399a59bc2fa.zip", "author": "Fengbei Liu;Yuanhong Chen;Chong Wang;Yuyuan Liu;Gustavo Carneiro", "authorids": "~Fengbei_Liu1;~Yuanhong_Chen1;~Chong_Wang16;~Yuyuan_Liu1;~Gustavo_Carneiro1", "gender": "M;;;M;M", "homepage": ";;;;https://cs.adelaide.edu.au/~carneiro/", "dblp": "261/8207;;;184/6418;53/3609", "google_scholar": "oY_qRxMAAAAJ;;;SibDXFQAAAAJ;https://scholar.google.com.au/citations?user=E0TtOWAAAAAJ", "orcid": ";;;0000-0002-1673-9809;0000-0002-5571-6220", "linkedin": ";;;yuyuan-liu-a90606155/;gustavo-carneiro-3578812/", "or_profile": "~Fengbei_Liu1;~Yuanhong_Chen1;~Chong_Wang16;~Yuyuan_Liu1;~Gustavo_Carneiro1", "aff": "The University of Adelaide;;;University of Adelaide;University of Surrey", "aff_domain": "adelaide.edu.au;;;adelaide.edu.au;surrey.ac.uk", "position": "PhD student;;;PhD student;Full Professor", "bibtex": "@misc{\nliu2023generative,\ntitle={Generative Noisy-Label Learning by Implicit Dicriminative Approximation with Partial Label Prior},\nauthor={Fengbei Liu and Yuanhong Chen and Chong Wang and Yuyuan Liu and Gustavo Carneiro},\nyear={2023},\nurl={https://openreview.net/forum?id=fifeeUmV4Z}\n}", "github": "", "project": "", "reviewers": "rUmF;9Aeh;jVwM;6r3h", "site": "https://openreview.net/forum?id=fifeeUmV4Z", "pdf_size": 431256, "rating": "1;5;6;7", "confidence": "4;5;4;4", "soundness": "1;3;3;3", "novelty": "1;3;2;3", "presentation": "2;2;2;4", "wc_summary": "36;42;115;280", "wc_strengths": "17;54;29;100", "wc_weaknesses": "340;125;136;41", "wc_questions": "21;30;12;72", "wc_limitations": "96;6;35;1", "wc_review": "510;257;327;494", "wc_reply_reviewers": "158;137;53;206", "wc_reply_authors": "243;273;272;211", "reply_reviewers": "1;1;2;2", "reply_authors": "2;2;3;3", "rating_avg": [ 4.75, 2.277608394786075 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 118.25, 98.42859086667858 ], "wc_strengths_avg": [ 50.0, 31.804087787578503 ], "wc_weaknesses_avg": [ 160.5, 109.95567288685018 ], "wc_questions_avg": [ 33.75, 22.982330169066845 ], "wc_limitations_avg": [ 34.5, 37.80542289143186 ], "wc_review_avg": [ 397.0, 108.0254599619923 ], "wc_reply_reviewers_avg": [ 138.5, 55.33760023709015 ], "wc_reply_authors_avg": [ 249.75, 25.410381736605217 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.06337242505244779, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3797608088310286342&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Adelaide;University of Surrey", "aff_unique_dep": ";", "aff_unique_url": "https://www.adelaide.edu.au;https://www.surrey.ac.uk", "aff_unique_abbr": "Adelaide;Surrey", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Australia;United Kingdom" }, { "title": "Bootstrapped Training of Score-Conditioned Generator for Offline Design of Biological Sequences", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70893", "id": "fj0ZeRtUTU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d601a9b708cacfad167f6c6c45647a18-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fj0ZeRtUTU", "openreview": "https://openreview.net/forum?id=fj0ZeRtUTU", "poster": "/media/PosterPDFs/NeurIPS%202023/70893.png?t=1697422187.3871179", "slides": "https://nips.cc/virtual/2023/poster/70893", "video": "https://nips.cc/virtual/2023/poster/70893", "author_site": "Minsu Kim, Federico Berto, Sungsoo Ahn, Jinkyoo Park", "tldr": "", "abstract": "We study the problem of optimizing biological sequences, e.g., proteins, DNA, and RNA, to maximize a black-box score function that is only evaluated in an offline dataset. We propose a novel solution, bootstrapped training of score-conditioned generator (BootGen) algorithm. Our algorithm repeats a two-stage process. In the first stage, our algorithm trains the biological sequence generator with rank-based weights to enhance the accuracy of sequence generation based on high scores. The subsequent stage involves bootstrapping, which augments the training dataset with self-generated data labeled by a proxy score function. Our key idea is to align the score-based generation with a proxy score function, which distills the knowledge of the proxy score function to the generator. After training, we aggregate samples from multiple bootstrapped generators and proxies to produce a diverse design. Extensive experiments show that our method outperforms competitive baselines on biological sequential design tasks. We provide reproducible source code: https://github.com/kaist-silab/bootgen.", "keywords": "Biological sequence design;offline model based optimization;conditional generation;bootstrapping;ensemble", "primary_area": "", "supplementary_material": "", "author": "Minsu Kim;Federico Berto;Sungsoo Ahn;Jinkyoo Park", "authorids": "~Minsu_Kim2;~Federico_Berto1;~Sungsoo_Ahn1;~Jinkyoo_Park1", "gender": "M;M;M;M", "homepage": "https://minsuukim.github.io/;https://fedebotu.github.io/;https://sungsooahn.super.site/;http://silab.kaist.ac.kr/", "dblp": ";317/1711;90/5164;156/7535", "google_scholar": "https://scholar.google.ca/citations?user=VvyLuhAAAAAJ;https://scholar.google.com/citations?hl=en;XTenHs0AAAAJ;sH2a0nkAAAAJ", "orcid": ";0000-0002-7438-8365;;0000-0003-2620-1479", "linkedin": ";federicoberto/;;", "or_profile": "~Minsu_Kim2;~Federico_Berto1;~Sungsoo_Ahn1;~Jinkyoo_Park1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Pohang University of Science and Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;postech.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nkim2023bootstrapped,\ntitle={Bootstrapped Training of Score-Conditioned Generator for Offline Design of Biological Sequences},\nauthor={Minsu Kim and Federico Berto and Sungsoo Ahn and Jinkyoo Park},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fj0ZeRtUTU}\n}", "github": "", "project": "", "reviewers": "LsCS;ogL2;M7pC;jSje", "pdf_size": 664182, "rating": "5;6;7;7", "confidence": "3;4;3;3", "soundness": "2;3;2;3", "novelty": "2;2;3;2", "presentation": "3;3;3;3", "wc_summary": "68;115;82;60", "wc_strengths": "30;67;76;47", "wc_weaknesses": "184;221;129;107", "wc_questions": "2;203;2;83", "wc_limitations": "1;53;2;5", "wc_review": "285;659;291;302", "wc_reply_reviewers": "29;324;68;44", "wc_reply_authors": "62;1001;576;16", "reply_reviewers": "1;3;2;1", "reply_authors": "2;6;4;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.25, 21.016362672927016 ], "wc_strengths_avg": [ 55.0, 17.84656829757475 ], "wc_weaknesses_avg": [ 160.25, 44.90754390968181 ], "wc_questions_avg": [ 72.5, 82.28152891141487 ], "wc_limitations_avg": [ 15.25, 21.84462176372024 ], "wc_review_avg": [ 384.25, 158.74409437834214 ], "wc_reply_reviewers_avg": [ 116.25, 120.74844719498466 ], "wc_reply_authors_avg": [ 413.75, 404.0794321664987 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 3.5, 1.6583123951777 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=787703883601283912&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "kaist.ac.kr;kaist.ac.kr;postech.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Pohang University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.postech.ac.kr", "aff_unique_abbr": "KAIST;POSTECH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pohang", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Policy Finetuning in Reinforcement Learning via Design of Experiments using Offline Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70892", "id": "fjXTcUUgaC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bcdaaa1aec3ae2aa39542acefdec4e4b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fjXTcUUgaC", "openreview": "https://openreview.net/forum?id=fjXTcUUgaC", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70892", "video": "https://nips.cc/virtual/2023/poster/70892", "author_site": "Ruiqi Zhang, Andrea Zanette", "tldr": "", "abstract": "In some applications of reinforcement learning, \na dataset of pre-collected experience is already available\nbut it is also possible to acquire some additional online data to help improve the quality of the policy.\nHowever, it may be preferable to gather additional data with a single, non-reactive exploration policy\nand avoid the engineering costs associated with switching policies. \n\nIn this paper we propose an algorithm with provable guarantees \nthat can leverage an offline dataset to design a single non-reactive policy for exploration. \nWe theoretically analyze the algorithm and measure the quality of the final policy \nas a function of the local coverage of the original dataset and the amount of additional data collected.", "keywords": "offline RL;online RL;exploration;non-reactive;fine-tuning", "primary_area": "", "supplementary_material": "/attachment/8b5a4d2f203635c2a3acf604dae74927c83f3ddc.pdf", "author": "Ruiqi Zhang;Andrea Zanette", "authorids": "~Ruiqi_Zhang2;~Andrea_Zanette1", "gender": "M;", "homepage": "https://rqzhangberkeley.github.io/;", "dblp": ";", "google_scholar": "uErE2UUAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Ruiqi_Zhang2;~Andrea_Zanette1", "aff": "University of California, Berkeley;", "aff_domain": "berkeley.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nzhang2023policy,\ntitle={Policy Finetuning in Reinforcement Learning via Design of Experiments using Offline Data},\nauthor={Ruiqi Zhang and Andrea Zanette},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fjXTcUUgaC}\n}", "github": "", "project": "", "reviewers": "TPkj;CnHc;s51D;wsu5;rHsz;gUtm", "pdf_size": 504578, "rating": "5;5;7;7;7;7", "confidence": "2;3;4;3;3;2", "soundness": "3;4;3;4;3;4", "novelty": "3;3;3;3;3;3", "presentation": "3;3;4;3;4;3", "wc_summary": "84;133;61;110;86;185", "wc_strengths": "98;107;91;127;56;29", "wc_weaknesses": "34;139;64;296;84;32", "wc_questions": "113;45;42;2;45;21", "wc_limitations": "1;19;8;9;5;1", "wc_review": "330;443;266;544;276;268", "wc_reply_reviewers": "12;196;22;36;41;0", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "1;1;1;1;1;0", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.8333333333333335, 0.6871842709362768 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 109.83333333333333, 40.4286064838033 ], "wc_strengths_avg": [ 84.66666666666667, 32.73462319250911 ], "wc_weaknesses_avg": [ 108.16666666666667, 91.34990725531993 ], "wc_questions_avg": [ 44.666666666666664, 34.296096311711956 ], "wc_limitations_avg": [ 7.166666666666667, 6.121455890735652 ], "wc_review_avg": [ 354.5, 104.7373699625242 ], "wc_reply_reviewers_avg": [ 51.166666666666664, 66.22289802041453 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3429971702850178, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13737523317542012575&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "berkeley.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Prototypical Variational Autoencoder for 3D Few-shot Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70891", "id": "fljrZsJ2I8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/076a93fd42aa85f5ccee921a01d77dd5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fljrZsJ2I8", "openreview": "https://openreview.net/forum?id=fljrZsJ2I8", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70891", "video": "https://nips.cc/virtual/2023/poster/70891", "author_site": "Weiliang Tang, Biqi YANG, Xianzhi Li, Yun-Hui Liu, Pheng-Ann Heng, Chi-Wing Fu", "tldr": "", "abstract": "Few-Shot 3D Point Cloud Object Detection (FS3D) is a challenging task, aiming to detect 3D objects of novel classes using only limited annotated samples for training. Considering that the detection performance highly relies on the quality of the latent features, we design a VAE-based prototype learning scheme, named prototypical VAE (P-VAE), to learn a probabilistic latent space for enhancing the diversity and distinctiveness of the sampled features. The network encodes a multi-center GMM-like posterior, in which each distribution centers at a prototype. For regularization, P-VAE incorporates a reconstruction task to preserve geometric information. To adopt P-VAE for the detection framework, we formulate Geometric-informative Prototypical VAE (GP-VAE) to handle varying geometric components and Class-specific Prototypical VAE (CP-VAE) to handle varying object categories. In the first stage, we harness GP-VAE to aid feature extraction from the input scene. In the second stage, we cluster the geometric-informative features into per-instance features and use CP-VAE to refine each instance feature with category-level guidance. Experimental results show the top performance of our approach over the state of the arts on two FS3D benchmarks. Quantitative ablations and qualitative prototype analysis further demonstrate that our probabilistic modeling can significantly boost prototype learning for FS3D.", "keywords": "3D Point Cloud Object Detection;Few Shot Learning;Computer Vision;Geometric Prototype", "primary_area": "", "supplementary_material": "/attachment/4d22f7908bb0aab95fc521498d4133a4bf058640.pdf", "author": "Weiliang Tang;Biqi YANG;Xianzhi Li;Yun-Hui Liu;Pheng-Ann Heng;Chi-Wing Fu", "authorids": "~Weiliang_Tang3;~Biqi_YANG1;~Xianzhi_Li1;~Yun-Hui_Liu1;~Pheng-Ann_Heng1;~Chi-Wing_Fu2", "gender": "M;F;F;;M;", "homepage": "https://github.com/CUHKWilliam;https://github.com/zzzzfyang;https://nini-lxz.github.io/;http://www.mae.cuhk.edu.hk/~yhliu;http://www.cse.cuhk.edu.hk/~pheng;", "dblp": "08/8310;;126/1233;;52/2889;", "google_scholar": ";;https://scholar.google.com.hk/citations?user=qsJCXFoAAAAJ;;https://scholar.google.com/citations?sortby=pubdate;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Weiliang_Tang3;~Biqi_YANG1;~Xianzhi_Li1;~Yun-Hui_Liu1;~Pheng-Ann_Heng1;~Chi-Wing_Fu2", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;Department of Computer Science and Engineering, The Chinese University of Hong Kong;Huazhong University of Science and Technology;The Chinese University of Hong Kong;The Chinese University of Hong Kong;", "aff_domain": "cse.cuhk.edu.hk;cse.cuhk.edu.hk;hust.edu.cn;cuhk.edu.hk;cuhk.edu.hk;", "position": "PhD student;PhD student;Associate Professor;Full Professor;Full Professor;", "bibtex": "@inproceedings{\ntang2023prototypical,\ntitle={Prototypical Variational Autoencoder for 3D Few-shot Object Detection},\nauthor={Weiliang Tang and Biqi YANG and Xianzhi Li and Yun-Hui Liu and Pheng-Ann Heng and Chi-Wing Fu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fljrZsJ2I8}\n}", "github": "", "project": "", "reviewers": "cCbd;WEFx;B5ky;dL8b", "pdf_size": 5070572, "rating": "4;5;6;6", "confidence": "4;3;4;4", "soundness": "4;2;2;2", "novelty": "3;2;3;3", "presentation": "3;3;2;2", "wc_summary": "61;52;78;49", "wc_strengths": "21;37;92;47", "wc_weaknesses": "31;132;77;49", "wc_questions": "71;56;5;1", "wc_limitations": "1;44;18;1", "wc_review": "185;321;270;147", "wc_reply_reviewers": "0;35;23;69", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 60.0, 11.291589790636214 ], "wc_strengths_avg": [ 49.25, 26.366408553308887 ], "wc_weaknesses_avg": [ 72.25, 38.192767639960316 ], "wc_questions_avg": [ 33.25, 30.74390183434757 ], "wc_limitations_avg": [ 16.0, 17.592612085759182 ], "wc_review_avg": [ 230.75, 68.54332571447055 ], "wc_reply_reviewers_avg": [ 31.75, 24.913600703230355 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17060371543639602521&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "cse.cuhk.edu.hk;cse.cuhk.edu.hk;hust.edu.cn;cuhk.edu.hk;cuhk.edu.hk;", "author_num": 6, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Chinese University of Hong Kong;Huazhong University of Science and Technology", "aff_unique_dep": "Department of Computer Science and Engineering;", "aff_unique_url": "https://www.cuhk.edu.hk;http://www.hust.edu.cn", "aff_unique_abbr": "CUHK;HUST", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Are Diffusion Models Vision-And-Language Reasoners?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70890", "id": "fmJv8Hj0yo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1a675d804f50509b8e21d0d3ca709d03-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fmJv8Hj0yo", "openreview": "https://openreview.net/forum?id=fmJv8Hj0yo", "poster": "/media/PosterPDFs/NeurIPS%202023/70890.png?t=1701461321.6898878", "slides": "https://nips.cc/virtual/2023/poster/70890", "video": "https://nips.cc/virtual/2023/poster/70890", "author_site": "Benno Krojer, Elinor Poole-Dayan, Vikram Voleti, Chris Pal, Siva Reddy", "tldr": "", "abstract": "Text-conditioned image generation models have recently shown immense qualitative success using denoising diffusion processes. However, unlike discriminative vision-and-language models, it is a non-trivial task to subject these diffusion-based generative models to automatic fine-grained quantitative evaluation of high-level phenomena such as compositionality.\nTowards this goal, we perform two innovations. First, we transform diffusion-based models (in our case, Stable Diffusion) for any image-text matching (ITM) task using a novel method called DiffusionITM.\nSecond, we introduce the Generative-Discriminative Evaluation Benchmark (GDBench) benchmark with 7 complex vision-and-language tasks, bias evaluation and detailed analysis.\nWe find that Stable Diffusion + DiffusionITM is competitive on many tasks and outperforms CLIP on compositional tasks like like CLEVR and Winoground.\nWe further boost its compositional performance with a transfer setup by fine-tuning on MS-COCO while retaining generative capabilities. \nWe also measure the stereotypical bias in diffusion models, and find that Stable Diffusion 2.1 is, for the most part, less biased than Stable Diffusion 1.5.\nOverall, our results point in an exciting direction bringing discriminative and generative model evaluation closer. We will release code and benchmark setup soon.", "keywords": "diffusion model;automatic evaluation;vision-and-language;compositionality", "primary_area": "", "supplementary_material": "/attachment/92de75a12513b5065996a49a34638462bfb274bb.pdf", "author": "Benno Krojer;Elinor Poole-Dayan;Vikram Voleti;Christopher Pal;Siva Reddy", "authorids": "~Benno_Krojer1;~Elinor_Poole-Dayan1;~Vikram_Voleti1;~Christopher_Pal1;~Siva_Reddy1", "gender": "M;F;M;;M", "homepage": "https://www.bennokrojer.com/;https://elinorp-d.github.io/;https://voletiv.github.io;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ&hl=en&oi=ao;http://sivareddy.in", "dblp": "280/0462.html;304/3100;243/6609;45/1217;64/8153", "google_scholar": "D5eyaLwAAAAJ;QGDuhYgAAAAJ;PPCRqZUAAAAJ;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ;", "orcid": ";0009-0003-8217-0504;;;", "linkedin": ";elinor-poole-dayan/;vikram-voleti-45372222;;", "or_profile": "~Benno_Krojer1;~Elinor_Poole-Dayan1;~Vikram_Voleti1;~Christopher_Pal1;~Siva_Reddy1", "aff": "McGill University;McGill University;Meta;Polytechnique Montreal;Mila, McGill University", "aff_domain": "mcgill.ca;mcgill.ca;meta.com;polymtl.ca;mila.quebec", "position": "PhD student;Undergrad student;Intern;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nkrojer2023are,\ntitle={Are Diffusion Models Vision-And-Language Reasoners?},\nauthor={Benno Krojer and Elinor Poole-Dayan and Vikram Voleti and Christopher Pal and Siva Reddy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fmJv8Hj0yo}\n}", "github": "", "project": "", "reviewers": "ttjT;T2Yx;jr2c;cFkA;JbKR", "pdf_size": 35412181, "rating": "4;4;5;6;8", "confidence": "4;4;4;3;5", "soundness": "3;3;3;2;4", "novelty": "2;3;3;4;4", "presentation": "3;3;3;3;4", "wc_summary": "72;88;46;97;139", "wc_strengths": "101;52;57;57;136", "wc_weaknesses": "160;287;399;86;100", "wc_questions": "3;63;11;80;333", "wc_limitations": "3;15;1;11;11", "wc_review": "339;505;514;331;719", "wc_reply_reviewers": "71;0;0;65;173", "wc_reply_authors": "560;0;0;99;141", "reply_reviewers": "1;0;0;1;1", "reply_authors": "3;1;1;2;2", "rating_avg": [ 5.4, 1.4966629547095764 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 88.4, 30.66333315215422 ], "wc_strengths_avg": [ 80.6, 32.915649773322116 ], "wc_weaknesses_avg": [ 206.4, 119.6337744953322 ], "wc_questions_avg": [ 98.0, 121.13463583963095 ], "wc_limitations_avg": [ 8.2, 5.30659966456864 ], "wc_review_avg": [ 481.6, 142.10643898149021 ], "wc_reply_reviewers_avg": [ 61.8, 63.401577267446584 ], "wc_reply_authors_avg": [ 160.0, 207.50036144546834 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.42257712736425823, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13087701939019930153&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "mcgill.ca;mcgill.ca;meta.com;polymtl.ca;mila.quebec", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "McGill University;Meta;Polytechnique Montreal", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.mcgill.ca;https://meta.com;https://www.polymtl.ca", "aff_unique_abbr": "McGill;Meta;PolyMTL", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Canada;United States" }, { "title": "Permutation Equivariant Neural Functionals", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70889", "id": "fmYmXNPmhv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4e9d8aeeab6120c3c83ccf95d4c211d3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fmYmXNPmhv", "openreview": "https://openreview.net/forum?id=fmYmXNPmhv", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70889", "video": "https://nips.cc/virtual/2023/poster/70889", "author_site": "Allan Zhou, Kaien Yang, Kaylee Burns, Adriano Cardace, Yiding Jiang, Samuel Sokota, J. Zico Kolter, Chelsea Finn", "tldr": "", "abstract": "This work studies the design of neural networks that can process the weights or gradients of other neural networks, which we refer to as *neural functional networks* (NFNs). Despite a wide range of potential applications, including learned optimization, processing implicit neural representations, network editing, and policy evaluation, there are few unifying principles for designing effective architectures that process the weights of other networks. We approach the design of neural functionals through the lens of symmetry, in particular by focusing on the permutation symmetries that arise in the weights of deep feedforward networks because hidden layer neurons have no inherent order. We introduce a framework for building *permutation equivariant* neural functionals, whose architectures encode these symmetries as an inductive bias. The key building blocks of this framework are *NF-Layers* (neural functional layers) that we constrain to be permutation equivariant through an appropriate parameter sharing scheme. In our experiments, we find that permutation equivariant neural functionals are effective on a diverse set of tasks that require processing the weights of MLPs and CNNs, such as predicting classifier generalization, producing \"winning ticket\" sparsity masks for initializations, and classifying or editing implicit neural representations (INRs). In addition, we provide code for our models and experiments at https://github.com/AllanYangZhou/nfn.", "keywords": "equivariance;permutation;implicit neural representation;generalization", "primary_area": "", "supplementary_material": "", "author": "Allan Zhou;Kaien Yang;Kaylee Burns;Adriano Cardace;Yiding Jiang;Samuel Sokota;J Zico Kolter;Chelsea Finn", "authorids": "~Allan_Zhou1;kaieny@stanford.edu;~Kaylee_Burns2;~Adriano_Cardace1;~Yiding_Jiang2;~Samuel_Sokota1;~J_Zico_Kolter1;~Chelsea_Finn1", "gender": ";;F;M;M;M;;F", "homepage": "http://bland.website;;https://kayburns.github.io;https://github.com/adricarda;https://yidingjiang.github.io/;https://ssokota.github.io/;;https://ai.stanford.edu/~cbfinn/", "dblp": "195/6907;;217/3002;;;243/5881;;131/1783", "google_scholar": ";;N_rVVG8AAAAJ;0uhdTI0AAAAJ;x9qzWg8AAAAJ;;;vfPE6hgAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;adriano-cardace/;;samuel-sokota-87a153149/;;", "or_profile": "~Allan_Zhou1;kaieny@stanford.edu;~Kaylee_Burns2;~Adriano_Cardace1;~Yiding_Jiang2;~Samuel_Sokota1;~J_Zico_Kolter1;~Chelsea_Finn1", "aff": "Google Deepmind;;Stanford University;University of Bologna;Carnegie Mellon University;Carnegie Mellon University;;Google", "aff_domain": "google.com;;stanford.edu;unibo.it;andrew.cmu.edu;cmu.edu;;google.com", "position": "Intern;;PhD student;PhD student;PhD student;PhD student;;Research Scientist", "bibtex": "@inproceedings{\nzhou2023permutation,\ntitle={Permutation Equivariant Neural Functionals},\nauthor={Allan Zhou and Kaien Yang and Kaylee Burns and Adriano Cardace and Yiding Jiang and Samuel Sokota and J Zico Kolter and Chelsea Finn},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=fmYmXNPmhv}\n}", "github": "", "project": "", "reviewers": "CEwM;VWST;7rfP;9eY1;jZvx", "pdf_size": 5154374, "rating": "3;6;7;7;7", "confidence": "5;4;4;2;5", "soundness": "2;4;3;3;4", "novelty": "2;3;3;3;3", "presentation": "3;4;4;3;4", "wc_summary": "90;156;447;155;222", "wc_strengths": "67;121;75;57;365", "wc_weaknesses": "310;264;311;45;149", "wc_questions": "30;181;13;139;345", "wc_limitations": "5;42;24;40;153", "wc_review": "502;764;870;436;1234", "wc_reply_reviewers": "0;84;187;21;209", "wc_reply_authors": "0;0;271;12;0", "reply_reviewers": "0;1;2;1;1", "reply_authors": "1;1;2;2;1", "rating_avg": [ 6.0, 1.5491933384829668 ], "confidence_avg": [ 4.0, 1.0954451150103321 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 214.0, 123.75297976210511 ], "wc_strengths_avg": [ 137.0, 116.08962055239908 ], "wc_weaknesses_avg": [ 215.8, 103.82177035670313 ], "wc_questions_avg": [ 141.6, 119.93598292422503 ], "wc_limitations_avg": [ 52.8, 51.83589489919124 ], "wc_review_avg": [ 761.2, 285.77081726446454 ], "wc_reply_reviewers_avg": [ 100.2, 84.7900937610049 ], "wc_reply_authors_avg": [ 56.6, 107.30069897256028 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.4714045207910318, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8203329472715198738&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "google.com;;stanford.edu;unibo.it;andrew.cmu.edu;cmu.edu;;google.com", "author_num": 8, "aff_unique_index": "0;1;2;3;3;4", "aff_unique_norm": "DeepMind;Stanford University;University of Bologna;Carnegie Mellon University;Google", "aff_unique_dep": "DeepMind;;;;Google", "aff_unique_url": "https://deepmind.com;https://www.stanford.edu;https://www.unibo.it;https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "DeepMind;Stanford;Unibo;CMU;Google", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Mountain View", "aff_country_unique_index": "0;1;2;1;1;1", "aff_country_unique": "United Kingdom;United States;Italy" }, { "id": "fnQ2QPl5n7", "title": "GUARD: A Safe Reinforcement Learning Benchmark", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Due to the trial-and-error nature, it is typically challenging to apply RL algorithms to safety-critical real-world applications, such as autonomous driving, human-robot interaction, robot manipulation, etc, where such errors are not tolerable. Recently, safe RL (i.e. constrained RL) has emerged rapidly in the literature, in which the agents explore the environment while satisfying constraints. Due to the diversity of algorithms and tasks, it remains difficult to compare existing safe RL algorithms. To fill that gap, we introduce GUARD, a Generalized Unified SAfe Reinforcement Learning Development Benchmark. GUARD has several advantages compared to existing benchmarks. First, GUARD is a generalized benchmark with a wide variety of RL agents, tasks, and safety constraint specifications. Second, GUARD comprehensively covers state-of-the-art safe RL algorithms with self-contained implementations. Third, GUARD is highly customizable in tasks and algorithms. We present a comparison of state-of-the-art safe RL algorithms in various task settings using GUARD and establish baselines that future work can build on.", "keywords": "Safe Reinforcement Learning;Reinforcement Learning Benchmark;Safe Reinforcement Learning Algorithm;Customizable;Robotics", "primary_area": "", "supplementary_material": "/attachment/eb5fe3ba1632c4d044c68f8e1227d9a4831dca83.pdf", "author": "Weiye Zhao;Rui Chen;Yifan Sun;Ruixuan Liu;Tianhao Wei;Changliu Liu", "authorids": "~Weiye_Zhao1;~Rui_Chen11;~Yifan_Sun9;~Ruixuan_Liu1;~Tianhao_Wei1;~Changliu_Liu1", "gender": "M;M;M;M;M;F", "homepage": "https://github.com/CaesarAndylaw;https://ruichen.pub/;https://yifansun98.github.io/;https://waynekyrie.github.io/home/;;http://www.cs.cmu.edu/~cliu6/index.html", "dblp": "228/6863;;99/10261-11;;222/5386;166/3563", "google_scholar": "P-79KOcAAAAJ;XiUE0wMAAAAJ;DGhQSYUAAAAJ;3v7sVbwAAAAJ;V22j1C0AAAAJ;", "orcid": "0000-0002-8426-5238;0000-0002-8671-8771;0009-0007-2073-7789;0000-0001-6249-5473;;", "linkedin": ";;yifansun1/;ruixuan-wayne-liu-a71b50127/;;", "or_profile": "~Weiye_Zhao1;~Rui_Chen11;~Yifan_Sun9;~Ruixuan_Liu1;~Tianhao_Wei1;~Changliu_Liu1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;andrew.cmu.edu;andrew.cmu.edu;andrew.cmu.edu;andrew.cmu.edu;cmu.edu", "position": "PhD student;PhD student;MS student;PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nzhao2023guard,\ntitle={{GUARD}: A Safe Reinforcement Learning Benchmark},\nauthor={Weiye Zhao and Rui Chen and Yifan Sun and Ruixuan Liu and Tianhao Wei and Changliu Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=fnQ2QPl5n7}\n}", "github": "", "project": "", "reviewers": "nmvU;xtKP;CmEk;v1GQ", "site": "https://openreview.net/forum?id=fnQ2QPl5n7", "pdf_size": 14372170, "rating": "4;4;5;7", "confidence": "3;4;4;4", "wc_summary_and_contributions": "80;48;56;29", "wc_strengths": "72;251;76;60", "wc_improvement": "262;160;153;103", "wc_limitations": "8;531;37;37", "wc_correctness": "11;1;4;12", "wc_clarity": "21;1;5;23", "wc_relation_to_prior_work": "23;1;25;19", "wc_documentation": "21;1;13;9", "wc_additional_feedback": "1;1;1;1", "wc_review": "499;995;370;293", "wc_reply_reviewers": "23;20;0;0", "wc_reply_authors": "173;825;374;661", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 53.25, 18.2944663764757 ], "wc_strengths_avg": [ 114.75, 78.88401295573141 ], "wc_improvement_avg": [ 169.5, 57.75162335380712 ], "wc_limitations_avg": [ 153.25, 218.41517232097223 ], "wc_correctness_avg": [ 7.0, 4.636809247747852 ], "wc_clarity_avg": [ 12.5, 9.630680142129112 ], "wc_relation_to_prior_work_avg": [ 17.0, 9.486832980505138 ], "wc_documentation_avg": [ 11.0, 7.211102550927978 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 539.25, 273.2273549628587 ], "wc_reply_reviewers_avg": [ 10.75, 10.80219885023415 ], "wc_reply_authors_avg": [ 508.25, 252.03112407002433 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1913011720971941697&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Data-Informed Geometric Space Selection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70888", "id": "fpElyckKkd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/486ff0b164cf92b0255fe39863bcf99e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=fpElyckKkd", "openreview": "https://openreview.net/forum?id=fpElyckKkd", "poster": "/media/PosterPDFs/NeurIPS%202023/70888.png?t=1700199238.9839737", "slides": "https://nips.cc/virtual/2023/poster/70888", "video": "https://nips.cc/virtual/2023/poster/70888", "author_site": "Shuai Zhang, Wenqi Jiang", "tldr": "", "abstract": "Geometric representation learning (e.g., hyperbolic and spherical geometry) has proven to be efficacious in solving many intricate machine learning tasks. The fundamental challenge of geometric representation learning lies in aligning the inherent geometric bias with the underlying structure of the data, which is a rarely explored topic in the literature. Existing methods heavily rely on heuristic assumptions on the data structure to decide the type of geometry to be adopted, which often leads to suboptimal performance. This work aims to automate the alignment process via a data-informed strategy such that we optimize model performance with minimal overhead. Specifically, a sparse gating mechanism is employed to enable each input data point $\\mathit{p}$ to select $K$ geometric spaces from a given candidate geometric space pool with $N$ ($K 0$ \nthreshold. We leverage an adaptive sampling technique to iteratively build a set\nof sample points suitable for representing the target activation function. While \nthe theoretical worst case time complexity of our approach is\n$O(\\varepsilon^{-2d})$,\nit typically only takes $O(\\log^{\\beta} \\frac{1}{\\varepsilon})$ time for some $\\beta \\ge 1$ and is\nthus \nsufficiently fast in practice. We provide empirical evidence of SOL's practicality\nby incorporating it into a robustness certifier and observing that it\nproduces similar or higher certification rates while taking as low as quarter of the time compared to the other methods.", "keywords": "Neural network verification;Robustness;Linear bounding", "primary_area": "", "supplementary_material": "", "author": "Yuriy Biktairov;Jyotirmoy Deshmukh", "authorids": "~Yuriy_Biktairov1;~Jyotirmoy_Deshmukh2", "gender": "M;M", "homepage": ";https://jdeshmukh.github.io", "dblp": "277/1367;42/160", "google_scholar": ";https://scholar.google.com.tw/citations?user=CwFX74MAAAAJ", "orcid": ";0000-0002-8815-464X", "linkedin": "https://linkedin.com/in/yuriy-biktairov-55117287;jdeshmukh/", "or_profile": "~Yuriy_Biktairov1;~Jyotirmoy_Deshmukh1", "aff": "University of Southern California;University of Southern California", "aff_domain": "usc.edu;usc.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nbiktairov2023sol,\ntitle={{SOL}: Sampling-based Optimal Linear bounding of arbitrary scalar functions},\nauthor={Yuriy Biktairov and Jyotirmoy Deshmukh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gAQCx61chN}\n}", "github": "", "project": "", "reviewers": "173q;GjZS;8jE6;hjHu;Uxf6", "pdf_size": 367335, "rating": "4;6;7;7;7", "confidence": "4;4;3;5;3", "soundness": "3;3;3;4;3", "novelty": "2;2;3;4;3", "presentation": "3;3;3;4;3", "wc_summary": "137;98;40;290;74", "wc_strengths": "104;199;48;151;39", "wc_weaknesses": "250;318;45;371;70", "wc_questions": "36;34;148;317;5", "wc_limitations": "3;13;7;27;6", "wc_review": "530;662;288;1156;194", "wc_reply_reviewers": "0;213;152;57;19", "wc_reply_authors": "0;756;419;62;0", "reply_reviewers": "0;2;1;1;1", "reply_authors": "1;3;2;2;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 127.8, 87.04573510517329 ], "wc_strengths_avg": [ 108.2, 60.8388034070362 ], "wc_weaknesses_avg": [ 210.8, 131.15395533494214 ], "wc_questions_avg": [ 108.0, 115.36897329871667 ], "wc_limitations_avg": [ 11.2, 8.541662601625049 ], "wc_review_avg": [ 566.0, 338.9041162334857 ], "wc_reply_reviewers_avg": [ 88.2, 81.46999447649422 ], "wc_reply_authors_avg": [ 247.4, 298.2975695509436 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.1833396994056422, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7550065932045184101&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "usc.edu;usc.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "OpenAGI: When LLM Meets Domain Experts", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73509", "id": "gFf0a0ZxJM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1190733f217404edc8a7f4e15a57f301-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=gFf0a0ZxJM", "openreview": "https://openreview.net/forum?id=gFf0a0ZxJM", "poster": "/media/PosterPDFs/NeurIPS%202023/73509.png?t=1701378318.151941", "slides": "https://nips.cc/virtual/2023/poster/73509", "video": "https://nips.cc/virtual/2023/poster/73509", "author_site": "Yingqiang Ge, Wenyue Hua, Kai Mei, jianchao ji, Juntao Tan, Shuyuan Xu, Zelong Li, Yongfeng Zhang", "tldr": "", "abstract": "Human Intelligence (HI) excels at combining basic skills to solve complex tasks. This capability is vital for Artificial Intelligence (AI) and should be embedded in comprehensive AI Agents, enabling them to harness expert models for complex task-solving towards Artificial General Intelligence (AGI). Large Language Models (LLMs) show promising learning and reasoning abilities, and can effectively use external models, tools, plugins, or APIs to tackle complex problems. In this work, we introduce OpenAGI, an open-source AGI research and development platform designed for solving multi-step, real-world tasks. Specifically, OpenAGI uses a dual strategy, integrating standard benchmark tasks for benchmarking and evaluation, and open-ended tasks including more expandable models, tools, plugins, or APIs for creative problem-solving. Tasks are presented as natural language queries to the LLM, which then selects and executes appropriate models. We also propose a Reinforcement Learning from Task Feedback (RLTF) mechanism that uses task results to improve the LLM's task-solving ability, which creates a self-improving AI feedback loop. While we acknowledge that AGI is a broad and multifaceted research challenge with no singularly defined solution path, the integration of LLMs with domain-specific expert models, inspired by mirroring the blend of general and specialized intelligence in humans, offers a promising approach towards AGI. We are open-sourcing the OpenAGI project's code, dataset, benchmarks, evaluation methods, and the UI demo to foster community involvement in AGI advancement: https://github.com/agiresearch/OpenAGI.", "keywords": "Large Language Model;Artificial General Intelligence;AI Agents;Complex Task Solving;Open-domain Model Synthesis;Compositionality", "primary_area": "", "supplementary_material": "", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nge2023openagi,\ntitle={Open{AGI}: When {LLM} Meets Domain Experts},\nauthor={Yingqiang Ge and Wenyue Hua and Kai Mei and jianchao ji and Juntao Tan and Shuyuan Xu and Zelong Li and Yongfeng Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=gFf0a0ZxJM}\n}", "github": "", "project": "", "reviewers": "6ZWy;cNMX;fmdC;Zkzc;9AkB", "pdf_size": 11919711, "rating": "5;6;6;7;8", "confidence": "3;4;3;3;4", "wc_summary_and_contributions": "141;192;15;68;44", "wc_strengths": "10;73;39;49;12", "wc_improvement": "152;467;48;91;23", "wc_limitations": "11;71;70;6;6", "wc_correctness": "8;43;13;1;1", "wc_clarity": "60;1;47;1;1", "wc_relation_to_prior_work": "82;1;9;1;1", "wc_documentation": "31;39;32;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "496;888;274;219;90", "wc_reply_reviewers": "231;28;109;0;0", "wc_reply_authors": "1411;1052;1003;298;246", "reply_reviewers": "3;1;1;0;0", "reply_authors": "5;3;3;1;1", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 92.0, 65.13063795173512 ], "wc_strengths_avg": [ 36.6, 23.652484013312428 ], "wc_improvement_avg": [ 156.2, 161.44026759145316 ], "wc_limitations_avg": [ 32.8, 30.837639338963676 ], "wc_correctness_avg": [ 13.2, 15.574337867145427 ], "wc_clarity_avg": [ 22.0, 26.04611295375953 ], "wc_relation_to_prior_work_avg": [ 18.8, 31.751535395945815 ], "wc_documentation_avg": [ 20.8, 16.4 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 393.4, 279.95685381858397 ], "wc_reply_reviewers_avg": [ 73.6, 88.25100565999234 ], "wc_reply_authors_avg": [ 802.0, 455.3974088639504 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 2.6, 1.4966629547095767 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": 0.4803844614152616, "gs_citation": 277, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3946707198647426946&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "", "author_num": 1 }, { "title": "Theoretical and Practical Perspectives on what Influence Functions Do", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70864", "id": "gGl0n7Onug", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/57bb27b9be6ad04019ae3cea2b540872-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gGl0n7Onug", "openreview": "https://openreview.net/forum?id=gGl0n7Onug", "poster": "/media/PosterPDFs/NeurIPS%202023/70864.png?t=1701888135.4630191", "slides": "https://nips.cc/virtual/2023/poster/70864", "video": "https://nips.cc/virtual/2023/poster/70864", "author_site": "Andrea Schioppa, Katja Filippova, Ivan Titov, Polina Zablotskaia", "tldr": "", "abstract": "Influence functions (IF) have been seen as a technique for explaining model predictions through the lens of the training data. Their utility is assumed to be in identifying training examples \"responsible\" for a prediction so that, for example, correcting a prediction is possible by intervening on those examples (removing or editing them) and retraining the model. However, recent empirical studies have shown that the existing methods of estimating IF predict the leave-one-out-and-retrain effect poorly. \nIn order to understand the mismatch between the theoretical promise and the practical results, we analyse five assumptions made by IF methods which are problematic for modern-scale deep neural networks and which concern convexity, numeric stability, training trajectory and parameter divergence. This allows us to clarify what can be expected theoretically from IF. We show that while most assumptions can be addressed successfully, the parameter divergence poses a clear limitation on the predictive power of IF: influence fades over training time even with deterministic training. We illustrate this theoretical result with BERT and ResNet models.\nAnother conclusion from the theoretical analysis is that IF are still useful for model debugging and correcting even though some of the assumptions made in prior work do not hold: using natural language processing and computer vision tasks, we verify that mis-predictions can be successfully corrected by taking only a few fine-tuning steps on influential examples.", "keywords": "Explainable AI;Influence Functions;Training Data Attribution", "primary_area": "", "supplementary_material": "", "author": "Andrea Schioppa;Katja Filippova;Ivan Titov;Polina Zablotskaia", "authorids": "~Andrea_Schioppa1;~Katja_Filippova1;~Ivan_Titov1;~Polina_Zablotskaia1", "gender": ";F;;F", "homepage": ";;http://ivan-titov.org;", "dblp": ";24/5028;08/5391;188/6903", "google_scholar": ";https://scholar.google.ch/citations?user=23xz9QgAAAAJ;https://scholar.google.nl/citations?user=FKUc3vsAAAAJ;Lfd5sYsAAAAJ", "orcid": ";;;", "linkedin": ";katja-filippova-93a2144;;https://www.linkedin.com/mwlite/in/polina-zablotskaia-8a7644a2", "or_profile": "~Andrea_Schioppa1;~Katja_Filippova1;~Ivan_Titov1;~Polina_Zablotskaia1", "aff": ";Research, Google;University of Amsterdam;Google", "aff_domain": ";research.google.com;uva.nl;google.com", "position": ";Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nschioppa2023theoretical,\ntitle={Theoretical and Practical Perspectives on what Influence Functions Do},\nauthor={Andrea Schioppa and Katja Filippova and Ivan Titov and Polina Zablotskaia},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gGl0n7Onug}\n}", "github": "", "project": "", "reviewers": "qity;RiS8;7Fho;SKsY", "pdf_size": 2258964, "rating": "7;7;7;8", "confidence": "4;3;2;4", "soundness": "4;3;3;4", "novelty": "3;3;3;4", "presentation": "4;3;3;4", "wc_summary": "122;133;46;209", "wc_strengths": "85;96;53;294", "wc_weaknesses": "107;291;88;176", "wc_questions": "53;84;24;116", "wc_limitations": "18;18;1;4", "wc_review": "385;622;212;799", "wc_reply_reviewers": "21;19;8;39", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 127.5, 57.76028047023318 ], "wc_strengths_avg": [ 132.0, 94.855152732996 ], "wc_weaknesses_avg": [ 165.5, 79.51257762140528 ], "wc_questions_avg": [ 69.25, 34.332018583240924 ], "wc_limitations_avg": [ 10.25, 7.8222439235810075 ], "wc_review_avg": [ 504.5, 223.81521396008807 ], "wc_reply_reviewers_avg": [ 21.75, 11.121488209767612 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12859133359522655482&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";research.google.com;uva.nl;google.com", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Google;University of Amsterdam", "aff_unique_dep": "Google Research;", "aff_unique_url": "https://research.google;https://www.uva.nl", "aff_unique_abbr": "Google;UvA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Netherlands" }, { "title": "Generalizing Nonlinear ICA Beyond Structural Sparsity", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70863", "id": "gI1SOgW3kw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2aebc17b683792a17dd4a24fcb038ba6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gI1SOgW3kw", "openreview": "https://openreview.net/forum?id=gI1SOgW3kw", "poster": "/media/PosterPDFs/NeurIPS%202023/70863.png?t=1701917089.2426157", "slides": "https://nips.cc/virtual/2023/poster/70863", "video": "https://nips.cc/virtual/2023/poster/70863", "author_site": "Yujia Zheng, Kun Zhang", "tldr": "", "abstract": "Nonlinear independent component analysis (ICA) aims to uncover the true latent sources from their observable nonlinear mixtures. Despite its significance, the identifiability of nonlinear ICA is known to be impossible without additional assumptions. Recent advances have proposed conditions on the connective structure from sources to observed variables, known as Structural Sparsity, to achieve identifiability in an unsupervised manner. However, the sparsity constraint may not hold universally for all sources in practice. Furthermore, the assumptions of bijectivity of the mixing process and independence among all sources, which arise from the setting of ICA, may also be violated in many real-world scenarios. To address these limitations and generalize nonlinear ICA, we propose a set of new identifiability results in the general settings of undercompleteness, partial sparsity and source dependence, and flexible grouping structures. Specifically, we prove identifiability when there are more observed variables than sources (undercomplete), and when certain sparsity and/or source independence assumptions are not met for some changing sources. Moreover, we show that even in cases with flexible grouping structures (e.g., part of the sources can be divided into irreducible independent groups with various sizes), appropriate identifiability results can also be established. Theoretical claims are supported empirically on both synthetic and real-world datasets.", "keywords": "Latent variable models;nonlinear independent component analysis", "primary_area": "", "supplementary_material": "", "author": "Yujia Zheng;Kun Zhang", "authorids": "~Yujia_Zheng1;~Kun_Zhang1", "gender": "M;M", "homepage": "https://yjzheng.com;http://www.andrew.cmu.edu/user/kunz1/", "dblp": "245/6109-1.html;96/3115-1", "google_scholar": "https://scholar.google.co.uk/citations?user=ioiW248AAAAJ;RGoypN4AAAAJ", "orcid": "0009-0003-5225-6366;", "linkedin": ";", "or_profile": "~Yujia_Zheng1;~Kun_Zhang1", "aff": "Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu", "position": "MS student;Associate Professor", "bibtex": "@inproceedings{\nzheng2023generalizing,\ntitle={Generalizing Nonlinear {ICA} Beyond Structural Sparsity},\nauthor={Yujia Zheng and Kun Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gI1SOgW3kw}\n}", "github": "", "project": "", "reviewers": "m3QW;nmMP;qJ69;jekt;gBCZ", "pdf_size": 3326263, "rating": "6;6;7;7;8", "confidence": "2;3;3;3;4", "soundness": "2;3;3;4;4", "novelty": "3;3;3;3;4", "presentation": "2;3;3;3;4", "wc_summary": "87;189;50;161;319", "wc_strengths": "46;165;72;90;102", "wc_weaknesses": "299;821;136;135;242", "wc_questions": "211;453;221;110;118", "wc_limitations": "7;231;1;1;35", "wc_review": "650;1859;480;497;816", "wc_reply_reviewers": "10;693;0;67;40", "wc_reply_authors": "7;1273;0;16;13", "reply_reviewers": "1;4;0;1;1", "reply_authors": "2;9;1;2;2", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 161.2, 93.32180881230282 ], "wc_strengths_avg": [ 95.0, 39.75927564732537 ], "wc_weaknesses_avg": [ 326.6, 255.1035868034787 ], "wc_questions_avg": [ 222.6, 123.9687057285023 ], "wc_limitations_avg": [ 55.0, 88.89544420272615 ], "wc_review_avg": [ 860.4, 513.8492385904644 ], "wc_reply_reviewers_avg": [ 162.0, 266.54005327530047 ], "wc_reply_authors_avg": [ 261.8, 505.6296668511451 ], "reply_reviewers_avg": [ 1.4, 1.3564659966250538 ], "reply_authors_avg": [ 3.2, 2.9257477676655586 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8451542547285165, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18275624863257288606&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "cmu.edu;cmu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "How Does Adaptive Optimization Impact Local Neural Network Geometry?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70862", "id": "gIG8LvTLuc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1a5e6d0441a8e1eda9a50717b0870f94-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gIG8LvTLuc", "openreview": "https://openreview.net/forum?id=gIG8LvTLuc", "poster": "/media/PosterPDFs/NeurIPS%202023/70862.png?t=1701314096.6551359", "slides": "https://nips.cc/virtual/2023/poster/70862", "video": "https://nips.cc/virtual/2023/poster/70862", "author_site": "Kaiqi Jiang, Dhruv Malik, Yuanzhi Li", "tldr": "", "abstract": "Adaptive optimization methods are well known to achieve superior convergence relative to vanilla gradient methods. The traditional viewpoint in optimization, particularly in convex optimization, explains this improved performance by arguing that, unlike vanilla gradient schemes, adaptive algorithms mimic the behavior of a second-order method by adapting to the *global* geometry of the loss function. We argue that in the context of neural network optimization, this traditional viewpoint is insufficient. Instead, we advocate for a *local* trajectory analysis. For iterate trajectories produced by running a generic optimization algorithm OPT, we introduce $R^{\\text{OPT}}\\_{\\text{med}}$, a statistic that is analogous to the condition number of the loss Hessian evaluated at the iterates. Through extensive experiments on language models where adaptive algorithms converge faster than vanilla gradient methods like SGD, we show that adaptive methods such as Adam bias the trajectories towards regions where $R^{\\text{Adam}}_{\\text{med}}$ is small, where one might expect faster optimization. By contrast, SGD (with momentum) biases the trajectories towards regions where $R^{\\text{SGD}}\\_{\\text{med}}$ is comparatively large. We complement these empirical observations with a theoretical result that provably demonstrates this phenomenon in the simplified setting of a two-layer linear network. We view our findings as evidence for the need of a new explanation of the success of adaptive methods, one that is different than the conventional wisdom.", "keywords": "optimization;adaptive algorithms;neural networks", "primary_area": "", "supplementary_material": "/attachment/2b3d6bd3e69d7d8fd472ea2b9d7f9c666cb695bd.zip", "author": "Kaiqi Jiang;Dhruv Malik;Yuanzhi Li", "authorids": "~Kaiqi_Jiang2;~Dhruv_Malik1;~Yuanzhi_Li1", "gender": "M;;M", "homepage": ";;", "dblp": ";197/5777;73/3628", "google_scholar": ";;", "orcid": ";;", "linkedin": "kaiqi-jiang-23b010128/;;", "or_profile": "~Kaiqi_Jiang2;~Dhruv_Malik1;~Yuanzhi_Li1", "aff": "Princeton University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "princeton.edu;cmu.edu;andrew.cmu.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\njiang2023how,\ntitle={How Does Adaptive Optimization Impact Local Neural Network Geometry?},\nauthor={Kaiqi Jiang and Dhruv Malik and Yuanzhi Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gIG8LvTLuc}\n}", "github": "", "project": "", "reviewers": "8Quz;qhhF;cQDf;pmE6;PbqS", "pdf_size": 1293999, "rating": "4;6;6;6;6", "confidence": "4;4;4;3;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;2;3", "presentation": "3;2;3;3;3", "wc_summary": "111;29;78;75;80", "wc_strengths": "69;72;27;47;60", "wc_weaknesses": "122;406;159;59;190", "wc_questions": "16;2;57;106;16", "wc_limitations": "1;1;1;1;7", "wc_review": "319;510;322;288;353", "wc_reply_reviewers": "0;13;10;14;0", "wc_reply_authors": "64;7;0;0;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "2;2;1;1;1", "rating_avg": [ 5.6, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 74.6, 26.24957142507283 ], "wc_strengths_avg": [ 55.0, 16.480291259562133 ], "wc_weaknesses_avg": [ 187.2, 117.78183221532936 ], "wc_questions_avg": [ 39.4, 38.05049276947671 ], "wc_limitations_avg": [ 2.2, 2.4 ], "wc_review_avg": [ 358.4, 78.54323650066885 ], "wc_reply_reviewers_avg": [ 7.4, 6.1838499334961226 ], "wc_reply_authors_avg": [ 14.2, 25.047155527125227 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14454435308079895540&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "princeton.edu;cmu.edu;andrew.cmu.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Princeton University;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.cmu.edu", "aff_unique_abbr": "Princeton;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "NeRF Revisited: Fixing Quadrature Instability in Volume Rendering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70861", "id": "gJHAT79cZU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5301c49207917c5c870131959971851c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gJHAT79cZU", "openreview": "https://openreview.net/forum?id=gJHAT79cZU", "poster": "/media/PosterPDFs/NeurIPS%202023/70861.png?t=1702416351.980228", "slides": "https://nips.cc/virtual/2023/poster/70861", "video": "https://nips.cc/virtual/2023/poster/70861", "author_site": "Mikaela Angelina Uy, Kiyohiro Nakayama, Guandao Yang, Rahul Thomas, Leonidas Guibas, Ke Li", "tldr": "", "abstract": "Neural radiance fields (NeRF) rely on volume rendering to synthesize novel views. Volume rendering requires evaluating an integral along each ray, which is numerically approximated with a finite sum that corresponds to the exact integral along the ray under piecewise constant volume density. As a consequence, the rendered result is unstable w.r.t. the choice of samples along the ray, a phenomenon that we dub quadrature instability. We propose a mathematically principled solution by reformulating the sample-based rendering equation so that it corresponds to the exact integral under piecewise linear volume density. This simultaneously resolves multiple issues: conflicts between samples along different rays, imprecise hierarchical sampling, and non-differentiability of quantiles of ray termination distances w.r.t. model parameters. We demonstrate several benefits over the classical sample-based rendering equation, such as sharper textures, better geometric reconstruction, and stronger depth supervision. Our proposed formulation can be also be used as a drop-in replacement to the volume rendering equation of existing NeRF-based methods. Our project page can be found at pl-nerf.github.io.", "keywords": "neural radiance fields;volumetric rendering;nerfs;numerical quadrature;importance sampling", "primary_area": "", "supplementary_material": "/attachment/1f474c1bd63cbceb9eac6e9ae067c71c1b67c39a.zip", "author": "Mikaela Angelina Uy;Kiyohiro Nakayama;Guandao Yang;Rahul Krishna Thomas;Leonidas Guibas;Ke Li", "authorids": "~Mikaela_Angelina_Uy1;~Kiyohiro_Nakayama1;~Guandao_Yang1;~Rahul_Krishna_Thomas1;~Leonidas_Guibas1;~Ke_Li1", "gender": "F;M;M;M;M;M", "homepage": "http://mikacuy.github.io;https://georgenakayama.github.io/;http://www.guandaoyang.com;;http://geometry.stanford.edu/;http://www.sfu.ca/~keli/", "dblp": "218/5350;346/0509;209/9624;;g/LeonidasJGuibas;75/6627-11", "google_scholar": "PcX1zXwAAAAJ;Dh06_JMAAAAJ;_kElCmMAAAAJ;;https://scholar.google.com.tw/citations?user=5JlEyTAAAAAJ;vQc8tI4AAAAJ", "orcid": ";;0000-0002-2992-5803;;;", "linkedin": ";;guandao-yang-349b83a6/;rahul-thomas-3b27821b3/;;", "or_profile": "~Mikaela_Angelina_Uy1;~Kiyohiro_Nakayama1;~Guandao_Yang1;~Rahul_Krishna_Thomas1;~Leonidas_Guibas1;~Ke_Li1", "aff": "Stanford University;Stanford University;Cornell University;Stanford University;Stanford University;Simon Fraser University", "aff_domain": "stanford.edu;stanford.edu;cornell.edu;stanford.edu;stanford.edu;sfu.ca", "position": "PhD student;Undergrad student;PhD student;Undergrad student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nuy2023nerf,\ntitle={Ne{RF} Revisited: Fixing Quadrature Instability in Volume Rendering},\nauthor={Mikaela Angelina Uy and Kiyohiro Nakayama and Guandao Yang and Rahul Krishna Thomas and Leonidas Guibas and Ke Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gJHAT79cZU}\n}", "github": "", "project": "", "reviewers": "HvcE;am3W;tVLL;WoBC", "pdf_size": 47271369, "rating": "3;5;6;7", "confidence": "4;5;4;5", "soundness": "3;4;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;4", "wc_summary": "110;124;69;102", "wc_strengths": "229;42;42;112", "wc_weaknesses": "414;102;31;347", "wc_questions": "98;238;18;35", "wc_limitations": "23;38;28;41", "wc_review": "874;544;188;637", "wc_reply_reviewers": "101;0;4;53", "wc_reply_authors": "413;41;11;41", "reply_reviewers": "1;0;1;1", "reply_authors": "5;2;2;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 101.25, 20.216020874544032 ], "wc_strengths_avg": [ 106.25, 76.4145764890443 ], "wc_weaknesses_avg": [ 223.5, 160.74902799084043 ], "wc_questions_avg": [ 97.25, 86.554534832093 ], "wc_limitations_avg": [ 32.5, 7.297259759663212 ], "wc_review_avg": [ 560.75, 246.55767580831872 ], "wc_reply_reviewers_avg": [ 39.5, 41.18555572042218 ], "wc_reply_authors_avg": [ 126.5, 165.86364882034883 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.50709255283711, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16438712412035079394&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "stanford.edu;stanford.edu;cornell.edu;stanford.edu;stanford.edu;sfu.ca", "author_num": 6, "aff_unique_index": "0;0;1;0;0;2", "aff_unique_norm": "Stanford University;Cornell University;Simon Fraser University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.cornell.edu;https://www.sfu.ca", "aff_unique_abbr": "Stanford;Cornell;SFU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;Canada" }, { "title": "Pengi: An Audio Language Model for Audio Tasks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70860", "id": "gJLAfO4KUq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3a2e5889b4bbef997ddb13b55d5acf77-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gJLAfO4KUq", "openreview": "https://openreview.net/forum?id=gJLAfO4KUq", "poster": "/media/PosterPDFs/NeurIPS%202023/70860.png?t=1697170530.5439065", "slides": "https://nips.cc/virtual/2023/poster/70860", "video": "https://nips.cc/virtual/2023/poster/70860", "author_site": "Soham Deshmukh, Benjamin Elizalde, Rita Singh, Huaming Wang", "tldr": "", "abstract": "In the domain of audio processing, Transfer Learning has facilitated the rise of Self-Supervised Learning and Zero-Shot Learning techniques. These approaches have led to the development of versatile models capable of tackling a wide array of tasks, while delivering state-of-the-art performance. However, current models inherently lack the capacity to produce the requisite language for open-ended tasks, such as Audio Captioning or Audio Question Answering. We introduce Pengi, a novel Audio Language Model that leverages Transfer Learning by framing all audio tasks as text-generation tasks. It takes as input, an audio recording, and text, and generates free-form text as output. The input audio is represented as a sequence of continuous embeddings by an audio encoder. A text encoder does the same for the corresponding text input. Both sequences are combined as a prefix to prompt a pre-trained frozen language model. The unified architecture of Pengi enables open-ended tasks and close-ended tasks without any additional fine-tuning or task-specific extensions. When evaluated on 21 downstream tasks, our approach yields state-of-the-art performance in several of them. Our results show that connecting language models with audio models is a major step towards general-purpose audio understanding.", "keywords": "audio language model;audio representation learning;audio and speech processing;multi-task and transfer learning", "primary_area": "", "supplementary_material": "/attachment/0220c2d2b7cfe130e2d7e248e06424f630cc7091.zip", "author": "Soham Deshmukh;Benjamin Elizalde;Rita Singh;Huaming Wang", "authorids": "~Soham_Deshmukh1;~Benjamin_Elizalde1;~Rita_Singh1;~Huaming_Wang2", "gender": "M;;F;", "homepage": "https://soham97.github.io;;http://mlsp.cs.cmu.edu/people/rsingh/index.html;", "dblp": "241/9651;;;", "google_scholar": "MasiEogAAAAJ;;;https://scholar.google.com/citations?view_op=list_works", "orcid": ";;;", "linkedin": "sdeshmuk;;;huaming-wang-5533588/", "or_profile": "~Soham_Deshmukh1;~Benjamin_Elizalde1;~Rita_Singh1;~Huaming_Wang2", "aff": "Microsoft;;School of Computer Science, Carnegie Mellon University;Microsoft", "aff_domain": "microsoft.com;;cs.cmu.edu;microsoft.com", "position": "Researcher;;Research Professor;Principal Researcher", "bibtex": "@inproceedings{\ndeshmukh2023pengi,\ntitle={Pengi: An Audio Language Model for Audio Tasks},\nauthor={Soham Deshmukh and Benjamin Elizalde and Rita Singh and Huaming Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gJLAfO4KUq}\n}", "github": "", "project": "", "reviewers": "o28f;4yQP;sUJx;iz2N;vWxh", "pdf_size": 4012865, "rating": "4;5;5;6;7", "confidence": "4;4;5;4;5", "soundness": "3;4;2;3;4", "novelty": "2;4;2;3;3", "presentation": "3;2;4;3;4", "wc_summary": "40;89;120;133;54", "wc_strengths": "50;110;56;48;54", "wc_weaknesses": "73;1;311;125;75", "wc_questions": "51;29;44;455;191", "wc_limitations": "7;1;100;87;11", "wc_review": "221;230;631;848;385", "wc_reply_reviewers": "0;0;32;51;8", "wc_reply_authors": "0;0;0;232;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 87.2, 36.07436763132515 ], "wc_strengths_avg": [ 63.6, 23.37177785278647 ], "wc_weaknesses_avg": [ 117.0, 104.74349621814235 ], "wc_questions_avg": [ 154.0, 161.4335776720568 ], "wc_limitations_avg": [ 41.2, 43.0181357104187 ], "wc_review_avg": [ 463.0, 243.0580177653064 ], "wc_reply_reviewers_avg": [ 18.2, 20.163333057805694 ], "wc_reply_authors_avg": [ 46.4, 92.8 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4803844614152615, "gs_citation": 170, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10580047223748383516&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "microsoft.com;;cs.cmu.edu;microsoft.com", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Microsoft;Carnegie Mellon University", "aff_unique_dep": "Microsoft Corporation;School of Computer Science", "aff_unique_url": "https://www.microsoft.com;https://www.cmu.edu", "aff_unique_abbr": "Microsoft;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Fed-GraB: Federated Long-tailed Learning with Self-Adjusting Gradient Balancer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70859", "id": "gJewjFjfN2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f4b8ddb9b1aa3cb11462d64a70b84db2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gJewjFjfN2", "openreview": "https://openreview.net/forum?id=gJewjFjfN2", "poster": "/media/PosterPDFs/NeurIPS%202023/70859.png?t=1702024677.1548135", "slides": "https://nips.cc/virtual/2023/poster/70859", "video": "https://nips.cc/virtual/2023/poster/70859", "author_site": "Zikai Xiao, Zihan Chen, Songshang Liu, Hualiang Wang, YANG FENG, Jin Hao, Joey Tianyi Zhou, Jian Wu, Howard Yang, Zuozhu Liu", "tldr": "", "abstract": "Data privacy and long-tailed distribution are the norms rather than the exception in many real-world tasks. This paper investigates a federated long-tailed learning (Fed-LT) task in which each client holds a locally heterogeneous dataset; if the datasets can be globally aggregated, they jointly exhibit a long-tailed distribution. Under such a setting, existing federated optimization and/or centralized long-tailed learning methods hardly apply due to challenges in (a) characterizing the global long-tailed distribution under privacy constraints and (b) adjusting the local learning strategy to cope with the head-tail imbalance. In response, we propose a method termed $\\texttt{Fed-GraB}$, comprised of a Self-adjusting Gradient Balancer (SGB) module that re-weights clients' gradients in a closed-loop manner, based on the feedback of global long-tailed distribution evaluated by a Direct Prior Analyzer (DPA) module. Using $\\texttt{Fed-GraB}$, clients can effectively alleviate the distribution drift caused by data heterogeneity during the model training process and obtain a global model with better performance on the minority classes while maintaining the performance of the majority classes. Extensive experiments demonstrate that $\\texttt{Fed-GraB}$ achieves state-of-the-art performance on representative datasets such as CIFAR-10-LT, CIFAR-100-LT, ImageNet-LT, and iNaturalist.", "keywords": "Federated learning;Long-tailed learning;Data heterogeneity", "primary_area": "", "supplementary_material": "/attachment/6a8c86483cb174de25d3ea2a0b6b5d8c0f64ca5e.zip", "author": "Zikai Xiao;Zihan Chen;Songshang Liu;Hualiang Wang;YANG FENG;Jin Hao;Joey Tianyi Zhou;Jian Wu;Howard Hao Yang;Zuozhu Liu", "authorids": "~Zikai_Xiao1;~Zihan_Chen1;~Songshang_Liu1;~Hualiang_Wang1;~YANG_FENG6;~Jin_Hao1;~Joey_Tianyi_Zhou1;~Jian_Wu6;~Howard_Hao_Yang1;~Zuozhu_Liu1", "gender": "M;M;M;M;M;M;M;M;M;M", "homepage": ";https://www.linkedin.com/in/zihan-chen-961217144/;https://github.com/SongshangL;https://github.com/SiLangWHL;;;https://joeyzhouty.github.io/;https://scholar.google.com/citations?hl=zh-TW&user=VO9XIXYAAAAJ;https://person.zju.edu.cn/en/howardyang;https://person.zju.edu.cn/en/lzz", "dblp": ";139/3503-1;;;;86/1845;123/5110;96/2744-1;87/763;173/9297", "google_scholar": "d5G1eV0AAAAJ;;;4lzd8NsAAAAJ;;RBcwDr8AAAAJ;https://scholar.google.com.sg/citations?user=cYNqDokAAAAJ;https://scholar.google.com/citations?hl=zh-TW;https://scholar.google.com.sg/citations?user=q0z9D9cAAAAJ;h602wLIAAAAJ", "orcid": "0000-0002-8507-5241;;;0009-0006-0157-8885;;0000-0002-6685-2017;0000-0002-4675-7055;;;0000-0002-7816-502X", "linkedin": ";;;;https://www.linkedin.cn/incareer/in/%E6%B4%8B-%E5%86%AF-797451b0;;;;;", "or_profile": "~Zikai_Xiao1;~Zihan_Chen1;~Songshang_Liu1;~Hualiang_Wang1;~YANG_FENG6;~Jin_Hao1;~Joey_Tianyi_Zhou1;~Jian_Wu6;~Howard_Hao_Yang1;~Zuozhu_Liu1", "aff": "Zhejiang University;Singapore University of Technology and Design;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;;Stanford University;A*STAR Centre for Frontier AI Research;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;sutd.edu.sg;ust.hk;ust.hk;;stanford.edu;cfar.a-star.edu.sg;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;Postdoc;MS student;PhD student;;Postdoc;Principal Researcher;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nxiao2023fedgrab,\ntitle={Fed-GraB: Federated Long-tailed Learning with Self-Adjusting Gradient Balancer},\nauthor={Zikai Xiao and Zihan Chen and Songshang Liu and Hualiang Wang and YANG FENG and Jin Hao and Joey Tianyi Zhou and Jian Wu and Howard Hao Yang and Zuozhu Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gJewjFjfN2}\n}", "github": "", "project": "", "reviewers": "TCDG;vVFc;XuVs;nXLC", "pdf_size": 0, "rating": "5;5;5;7", "confidence": "5;4;3;4", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "2;2;3;4", "wc_summary": "90;44;83;21", "wc_strengths": "56;21;87;82", "wc_weaknesses": "59;143;174;94", "wc_questions": "136;5;84;3", "wc_limitations": "9;7;1;1", "wc_review": "350;220;429;201", "wc_reply_reviewers": "17;13;73;112", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 59.5, 28.30635970943632 ], "wc_strengths_avg": [ 61.5, 26.177280225416848 ], "wc_weaknesses_avg": [ 117.5, 44.206899913927465 ], "wc_questions_avg": [ 57.0, 56.10258461069329 ], "wc_limitations_avg": [ 4.5, 3.570714214271425 ], "wc_review_avg": [ 300.0, 93.99734038790672 ], "wc_reply_reviewers_avg": [ 53.75, 41.154434754956846 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3098493178934857052&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "zju.edu.cn;sutd.edu.sg;ust.hk;ust.hk;;stanford.edu;cfar.a-star.edu.sg;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 10, "aff_unique_index": "0;1;2;2;3;4;0;0;0", "aff_unique_norm": "Zhejiang University;Singapore University of Technology and Design;Hong Kong University of Science and Technology;Stanford University;A*STAR", "aff_unique_dep": ";;;;Centre for Frontier AI Research", "aff_unique_url": "https://www.zju.edu.cn;https://www.sutd.edu.sg;https://www.ust.hk;https://www.stanford.edu;https://www.a-star.edu.sg", "aff_unique_abbr": "ZJU;SUTD;HKUST;Stanford;A*STAR", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Hong Kong SAR;Stanford", "aff_country_unique_index": "0;1;0;0;2;1;0;0;0", "aff_country_unique": "China;Singapore;United States" }, { "title": "Labeling Neural Representations with Inverse Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70858", "id": "gLfgyIWiWW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4e52bbb99690d1e05c7ef7b4c8b3569a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gLfgyIWiWW", "openreview": "https://openreview.net/forum?id=gLfgyIWiWW", "poster": "/media/PosterPDFs/NeurIPS%202023/70858.png?t=1702502999.0414789", "slides": "https://nips.cc/virtual/2023/poster/70858", "video": "https://nips.cc/virtual/2023/poster/70858", "author_site": "Kirill Bykov, Laura Kopf, Shinichi Nakajima, Marius Kloft, Marina H\u00f6hne", "tldr": "", "abstract": "Deep Neural Networks (DNNs) demonstrate remarkable capabilities in learning complex hierarchical data representations, but the nature of these representations remains largely unknown. Existing global explainability methods, such as Network Dissection, face limitations such as reliance on segmentation masks, lack of statistical significance testing, and high computational demands. We propose Inverse Recognition (INVERT), a scalable approach for connecting learned representations with human-understandable concepts by leveraging their capacity to discriminate between these concepts. In contrast to prior work, INVERT is capable of handling diverse types of neurons, exhibits less computational complexity, and does not rely on the availability of segmentation masks. Moreover, INVERT provides an interpretable metric assessing the alignment between the representation and its corresponding explanation and delivering a measure of statistical significance. We demonstrate the applicability of INVERT in various scenarios, including the identification of representations affected by spurious correlations, and the interpretation of the hierarchical structure of decision-making within the models.", "keywords": "Explainable AI;Mechanistic Interpretability;Machine Learning;Deep Neural Networks", "primary_area": "", "supplementary_material": "/attachment/561ce91192cb34be03ff4a2167880193e1778290.zip", "author": "Kirill Bykov;Laura Kopf;Shinichi Nakajima;Marius Kloft;Marina MC H\u00f6hne", "authorids": "~Kirill_Bykov1;laura.kopf@gmx.de;~Shinichi_Nakajima2;~Marius_Kloft1;~Marina_MC_H\u00f6hne1", "gender": "M;;M;M;", "homepage": "https://www.linkedin.com/in/bykovkirill/;;https://web.ml.tu-berlin.de/author/dr.-shinichi-nakajima/;http://ml.informatik.uni-kl.de/;", "dblp": ";;97/6115.html;73/2217;", "google_scholar": "tI39EK8AAAAJ;;hXSvID4AAAAJ;https://scholar.google.de/citations?user=l-BJCdAAAAAJ;", "orcid": ";;0000-0003-3970-4569;;", "linkedin": ";;;;", "or_profile": "~Kirill_Bykov1;laura.kopf@gmx.de;~Shinichi_Nakajima2;~Marius_Kloft1;~Marina_MC_H\u00f6hne1", "aff": "TU Berlin;;BIFOLD, TU Berlin;RPTU Kaiserslautern-Landau;", "aff_domain": "tu-berlin.de;;tu-berlin.de;uni-kl.de;", "position": "PhD student;;Postdoc;Professor;", "bibtex": "@inproceedings{\nbykov2023labeling,\ntitle={Labeling Neural Representations with Inverse Recognition},\nauthor={Kirill Bykov and Laura Kopf and Shinichi Nakajima and Marius Kloft and Marina MC H{\\\"o}hne},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gLfgyIWiWW}\n}", "github": "", "project": "", "reviewers": "UsfE;LJJZ;bbMD;6wCX", "pdf_size": 3864230, "rating": "3;5;5;5", "confidence": "5;3;4;2", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "1;3;3;3", "wc_summary": "80;80;50;68", "wc_strengths": "30;63;126;55", "wc_weaknesses": "47;177;114;2", "wc_questions": "706;284;33;89", "wc_limitations": "1;24;1;7", "wc_review": "864;628;324;221", "wc_reply_reviewers": "118;28;0;56", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 69.5, 12.278029157808675 ], "wc_strengths_avg": [ 68.5, 35.358874416474286 ], "wc_weaknesses_avg": [ 85.0, 66.404066140561 ], "wc_questions_avg": [ 278.0, 264.0861601826192 ], "wc_limitations_avg": [ 8.25, 9.41740410091868 ], "wc_review_avg": [ 509.25, 253.6507194943472 ], "wc_reply_reviewers_avg": [ 50.5, 43.71212646394591 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1570697245699093104&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "tu-berlin.de;;tu-berlin.de;uni-kl.de;", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "Technische Universit\u00e4t Berlin;Rheinland-Pfalz Technical University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tu-berlin.de;https://www.rptu.de", "aff_unique_abbr": "TU Berlin;RPTU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Berlin;;Kaiserslautern-Landau", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Triangulation Residual Loss for Data-efficient 3D Pose Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70857", "id": "gLwjBDsE3G", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/29e8437db7b549160ce03d336ff66f65-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gLwjBDsE3G", "openreview": "https://openreview.net/forum?id=gLwjBDsE3G", "poster": "/media/PosterPDFs/NeurIPS%202023/70857.png?t=1699780820.1858208", "slides": "https://nips.cc/virtual/2023/poster/70857", "video": "https://nips.cc/virtual/2023/poster/70857", "author_site": "Jiachen Zhao, Tao Yu, Liang An, Yipeng Huang, Fang Deng, Qionghai Dai", "tldr": "", "abstract": "This paper presents Triangulation Residual loss (TR loss) for multiview 3D pose estimation in a data-efficient manner. Existing 3D supervised models usually require large-scale 3D annotated datasets, but the amount of existing data is still insufficient to train supervised models to achieve ideal performance, especially for animal pose estimation. To employ unlabeled multiview data for training, previous epipolar-based consistency provides a self-supervised loss that considers only the local consistency in pairwise views, resulting in limited performance and heavy calculations. In contrast, TR loss enables self-supervision with global multiview geometric consistency. Starting from initial 2D keypoint estimates, the TR loss can fine-tune the corresponding 2D detector without 3D supervision by simply minimizing the smallest singular value of the triangulation matrix in an end-to-end fashion. Our method achieves the state-of-the-art 25.8mm MPJPE and competitive 28.7mm MPJPE with only 5\\% 2D labeled training data on the Human3.6M dataset. Experiments on animals such as mice demonstrate our TR loss's data-efficient training ability.", "keywords": "3D pose estimation;triangulation;animal pose estimation", "primary_area": "", "supplementary_material": "/attachment/b20066386f6a94e4d93ef4dd4d0bf998b20f2a83.pdf", "author": "Jiachen Zhao;Tao Yu;Liang An;Yipeng Huang;Fang Deng;Qionghai Dai", "authorids": "~Jiachen_Zhao3;~Tao_Yu2;~Liang_An1;hyp744009246@163.com;dengfang@bit.edu.cn;~Qionghai_Dai1", "gender": "M;M;M;;;M", "homepage": "https://www.researchgate.net/profile/Jiachen-Zhao-4;https://ytrock.com;https://anl13.github.io/;;;https://www.researchgate.net/profile/Qionghai-Dai", "dblp": ";67/1014-7.html;187/9352-1;;;39/4543", "google_scholar": ";M3nYyZtiWUIC;s0T1w0gAAAAJ;;;CHAajY4AAAAJ", "orcid": ";0000-0002-3818-5069;0000-0002-1028-3759;;;", "linkedin": ";;;;;", "or_profile": "~Jiachen_Zhao3;~Tao_Yu2;~Liang_An1;hyp744009246@163.com;dengfang@bit.edu.cn;~Qionghai_Dai1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;;;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;;;tsinghua.edu.cn", "position": "Postdoc;Researcher;PhD student;;;Full Professor", "bibtex": "@inproceedings{\nzhao2023triangulation,\ntitle={Triangulation Residual Loss for Data-efficient 3D Pose Estimation},\nauthor={Jiachen Zhao and Tao Yu and Liang An and Yipeng Huang and Fang Deng and Qionghai Dai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gLwjBDsE3G}\n}", "github": "", "project": "", "reviewers": "iymk;y7FT;3C6c;ZQc8;4Fs8", "pdf_size": 2667593, "rating": "5;5;6;6;7", "confidence": "2;3;5;5;3", "soundness": "3;2;3;3;3", "novelty": "3;2;3;3;3", "presentation": "3;2;3;4;3", "wc_summary": "77;49;87;54;151", "wc_strengths": "64;50;46;85;99", "wc_weaknesses": "77;396;65;91;116", "wc_questions": "142;99;23;22;151", "wc_limitations": "18;6;37;8;164", "wc_review": "378;600;258;260;681", "wc_reply_reviewers": "0;115;35;0;0", "wc_reply_authors": "0;53;16;0;0", "reply_reviewers": "0;2;1;0;0", "reply_authors": "1;3;2;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.6, 1.2000000000000002 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 83.6, 36.52725010180756 ], "wc_strengths_avg": [ 68.8, 20.350921355064003 ], "wc_weaknesses_avg": [ 149.0, 124.66114069749241 ], "wc_questions_avg": [ 87.4, 55.830457637386424 ], "wc_limitations_avg": [ 46.6, 59.71800398539791 ], "wc_review_avg": [ 435.4, 174.89608343241994 ], "wc_reply_reviewers_avg": [ 30.0, 44.609416046390926 ], "wc_reply_authors_avg": [ 13.8, 20.556264252047356 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3563483225498992, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12992514090964752377&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;;;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "One Fits All: Power General Time Series Analysis by Pretrained LM", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70856", "id": "gMS6FVZvmF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/86c17de05579cde52025f9984e6e2ebb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gMS6FVZvmF", "openreview": "https://openreview.net/forum?id=gMS6FVZvmF", "poster": "/media/PosterPDFs/NeurIPS%202023/70856.png?t=1701397655.7994776", "slides": "https://nips.cc/virtual/2023/poster/70856", "video": "https://nips.cc/virtual/2023/poster/70856", "author_site": "Tian Zhou, Peisong Niu, xue wang, Liang Sun, Rong Jin", "tldr": "", "abstract": "Although we have witnessed great success of pre-trained models in natural language processing (NLP) and computer vision (CV), limited progress has been made for general time series analysis. Unlike NLP and CV where a unified model can be used to perform different tasks, specially designed approach still dominates in each time series analysis task such as classification, anomaly detection, forecasting, and few-shot learning. The main challenge that blocks the development of pre-trained model for time series analysis is the lack of a large amount of data for training. In this work, we address this challenge by leveraging language or CV models, pre-trained from billions of tokens, for time series analysis. Specifically, we refrain from altering the self-attention and feedforward layers of the residual blocks in the pre-trained language or image model. This model, known as the Frozen Pretrained Transformer (FPT), is evaluated through fine-tuning on all major types of tasks involving time series. Our results demonstrate that pre-trained models on natural language or images can lead to a comparable or state-of-the-art performance in all main time series analysis tasks, as illustrated in Figure1. We also found both theoretically and empirically that the self-attention module behaviors similarly to principle component analysis (PCA), an observation that helps explains how transformer bridges the domain gap and a crucial step towards understanding the universality of a pre-trained transformer. \nThe code is publicly available at https://anonymous.4open.science/r/Pretrained-LM-for-TSForcasting-C561.", "keywords": "general time series analysis;time series forecasting;cross modality knowledge transfer; pretrained language model;", "primary_area": "", "supplementary_material": "/attachment/383a3761223cb1f1dcd6d3b510c4b29b06184b7f.pdf", "author": "Tian Zhou;Peisong Niu;Xue Wang;Liang Sun;Rong Jin", "authorids": "~Tian_Zhou2;~Peisong_Niu1;~Xue_Wang9;~Liang_Sun2;~Rong_Jin1", "gender": "M;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=9o5r8bUAAAAJ&hl=en;https://dblp.org/pid/300/5732;https://www.linkedin.com/in/liang-sun-a0a87621/;https://www.cse.msu.edu/~rongjin/;https://www.linkedin.com/in/xue-wang-98739572/", "dblp": "31/4578-4.html;300/5732;18/5837-1;j/RongJin;", "google_scholar": "9o5r8bUAAAAJ;;D_cOMBgAAAAJ;;", "orcid": "0000-0003-1789-5413;0009-0007-7023-0900;0009-0002-5835-7259;;", "linkedin": ";;;;", "or_profile": "~Tian_Zhou2;~Peisong_Niu1;~Liang_Sun2;~Rong_Jin3;~xue_wang1", "aff": "Alibaba Group;Alibaba Group;Alibaba Group;Twitter;Alibaba Group US", "aff_domain": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;twitter.com;alibaba-inc.com", "position": "Researcher;Researcher;Staff Software Engineer;Researcher;Researcher", "bibtex": "@inproceedings{\nzhou2023one,\ntitle={One Fits All: Power General Time Series Analysis by Pretrained {LM}},\nauthor={Tian Zhou and Peisong Niu and Xue Wang and Liang Sun and Rong Jin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gMS6FVZvmF}\n}", "github": "", "project": "", "reviewers": "idRZ;kdgy;GLop;qAFk;qj2Q", "pdf_size": 1986844, "rating": "4;6;6;7;8", "confidence": "4;3;2;4;3", "soundness": "2;3;3;2;4", "novelty": "2;3;2;3;4", "presentation": "3;3;3;3;2", "wc_summary": "40;110;82;73;66", "wc_strengths": "33;89;32;67;113", "wc_weaknesses": "117;472;73;281;132", "wc_questions": "121;126;20;2;1", "wc_limitations": "5;22;1;2;1", "wc_review": "316;819;208;425;313", "wc_reply_reviewers": "0;291;18;679;18", "wc_reply_authors": "449;493;39;2137;42", "reply_reviewers": "0;1;1;7;1", "reply_authors": "2;3;2;9;2", "rating_avg": [ 6.2, 1.32664991614216 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 74.2, 22.719154913860685 ], "wc_strengths_avg": [ 66.8, 31.562002471326178 ], "wc_weaknesses_avg": [ 215.0, 146.32976457303553 ], "wc_questions_avg": [ 54.0, 57.16992216192008 ], "wc_limitations_avg": [ 6.2, 8.034923770640267 ], "wc_review_avg": [ 416.2, 212.7734945899042 ], "wc_reply_reviewers_avg": [ 201.2, 262.2833582215997 ], "wc_reply_authors_avg": [ 632.0, 776.8634371625428 ], "reply_reviewers_avg": [ 2.0, 2.5298221281347035 ], "reply_authors_avg": [ 3.6, 2.7276363393971716 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2417468892076141, "gs_citation": 465, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10743807738082159898&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;twitter.com;alibaba-inc.com", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Alibaba Group;Twitter, Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://twitter.com", "aff_unique_abbr": "Alibaba;Twitter", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "China;United States" }, { "title": "SiT Dataset: Socially Interactive Pedestrian Trajectory Dataset for Social Navigation Robots", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73508", "id": "gMYsxTin4x", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4d6a000c216974f59e597bc878cd6325-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=gMYsxTin4x", "openreview": "https://openreview.net/forum?id=gMYsxTin4x", "poster": "/media/PosterPDFs/NeurIPS%202023/73508.png?t=1697476690.6657348", "slides": "https://nips.cc/virtual/2023/poster/73508", "video": "https://nips.cc/virtual/2023/poster/73508", "author_site": "Jong Wook Bae, Jungho Kim, Junyong Yun, Changwon Kang, Jeongseon Choi, Chanhyeok Kim, Junho Lee, Jungwook Choi, Jun Won Choi", "tldr": "", "abstract": "To ensure secure and dependable mobility in environments shared by humans and robots, social navigation robots should possess the capability to accurately perceive and predict the trajectories of nearby pedestrians. In this paper, we present a novel dataset of pedestrian trajectories, referred to as Social Interactive Pedestrian Trajectory (SiT) dataset, which can be used to train pedestrian detection, tracking, and trajectory prediction models needed to design social navigation robots. Our dataset includes sequential raw data captured by two 3D LiDARs and five cameras covering a 360-degree view, two inertial measurement units (IMUs), and real-time kinematic positioning (RTK), as well as annotations including 2D & 3D boxes, object classes, and object IDs. Thus far, various human trajectory datasets have been introduced to support the development of pedestrian motion forecasting models. Our SiT dataset differs from these datasets in the following three respects. First, whereas the pedestrian trajectory data in other datasets were obtained from static scenes, our data was collected while the robot navigated in a crowded environment, capturing human-robot interactive scenarios in motion. Second, unlike many autonomous driving datasets where pedestrians are usually at a distance from vehicles and found on pedestrian paths, our dataset offers a distinctive view of navigation robots interacting closely with humans in crowded settings.Third, our dataset has been carefully organized to facilitate the training and evaluation of end-to-end prediction models encompassing 3D detection, 3D multi-object tracking, and trajectory prediction. This design allows for an end-to-end unified modular approach across different tasks. We introduce a comprehensive benchmark for assessing models across all aforementioned tasks and present the performance of multiple baseline models as part of our evaluation. Our dataset provides a strong foundation for future research in pedestrian trajectory prediction, which could expedite the development of safe and agile social navigation robots. The SiT dataset, development kit, and trained models are publicly available at: https://spalaboratory.github.io/SiT/", "keywords": "robotics;human interaction", "primary_area": "", "supplementary_material": "/attachment/d6488634f32970aa81875ad8da8d8fd31ecc733f.pdf", "author": "Jongwook Bae;Jungho Kim;Junyong Yun;Changwon Kang;Jeongseon Choi;Chanhyeok Kim;Junho Lee;Jungwook Choi;Jun Won Choi", "authorids": "~Jongwook_Bae1;~Jungho_Kim4;~Junyong_Yun1;~Changwon_Kang1;~Jeongseon_Choi1;~Chanhyeok_Kim2;~Junho_Lee3;~Jungwook_Choi1;~Jun_Won_Choi1", "gender": "M;M;M;M;M;;M;M;M", "homepage": "https://www.spa.hanyang.ac.kr/;https://www.spa.snu.ac.kr/;https://spa.snu.ac.kr/;https://www.spa.hanyang.ac.kr;https://www.spa.hanyang.ac.kr/;;https://github.com/jhlee-ai;;https://www.spa.snu.ac.kr/", "dblp": ";88/6832-6;334/0395;;;;;97/4140;", "google_scholar": ";9wVmZ5kAAAAJ;https://scholar.google.co.kr/citations?hl=ko;;;;Q80l6r4AAAAJ;YPT98zwAAAAJ;IHH2PyYAAAAJ", "orcid": ";0009-0007-0704-3619;;;;;;;0000-0002-3733-0148", "linkedin": ";jungho-kim-97a302193/;;;;%EC%B0%AC%ED%98%81-%EA%B9%80-98b7781a8/;junho-lee-35b359229?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;jungwook-choi-5854996b/;", "or_profile": "~Jongwook_Bae1;~Jungho_Kim4;~Junyong_Yun1;~Changwon_Kang1;~Jeongseon_Choi1;~Chanhyeok_Kim2;~Junho_Lee3;~Jungwook_Choi1;~Jun_Won_Choi1", "aff": "Hanyang University;Hanyang University;Hanyang University;Hanyang University;Hanyang University;Kwangwoon University;Hanyang University;Hanyang University;Hanyang University", "aff_domain": "hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr;kw.ac.kr;hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr", "position": "MS student;MS student;PhD student;PhD student;MS student;Undergrad student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nbae2023sit,\ntitle={SiT Dataset: Socially Interactive Pedestrian Trajectory Dataset for Social Navigation Robots},\nauthor={Jongwook Bae and Jungho Kim and Junyong Yun and Changwon Kang and Jeongseon Choi and Chanhyeok Kim and Junho Lee and Jungwook Choi and Jun Won Choi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=gMYsxTin4x}\n}", "github": "", "project": "", "reviewers": "YC2B;UimK;RCVX;qffA;b9eM", "pdf_size": 5837109, "rating": "6;6;7;7;7", "confidence": "4;4;4;5;3", "wc_summary_and_contributions": "44;54;93;57;44", "wc_strengths": "49;32;56;139;27", "wc_improvement": "151;39;50;50;137", "wc_limitations": "17;27;27;54;42", "wc_correctness": "1;1;17;18;64", "wc_clarity": "1;5;9;5;6", "wc_relation_to_prior_work": "1;7;18;27;12", "wc_documentation": "1;4;34;25;22", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "266;170;305;376;355", "wc_reply_reviewers": "0;35;0;0;0", "wc_reply_authors": "582;597;345;222;547", "reply_reviewers": "0;1;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 58.4, 18.07318455613177 ], "wc_strengths_avg": [ 60.6, 40.618222511577244 ], "wc_improvement_avg": [ 85.4, 48.21866858385868 ], "wc_limitations_avg": [ 33.4, 13.032267646115928 ], "wc_correctness_avg": [ 20.2, 23.11190169587955 ], "wc_clarity_avg": [ 5.2, 2.5612496949731396 ], "wc_relation_to_prior_work_avg": [ 13.0, 8.966604708583958 ], "wc_documentation_avg": [ 17.2, 12.671227249165726 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 294.4, 73.10978046745866 ], "wc_reply_reviewers_avg": [ 7.0, 14.0 ], "wc_reply_authors_avg": [ 458.6, 149.05113216611272 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=239799703651133768&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr;kw.ac.kr;hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr", "author_num": 9, "aff_unique_index": "0;0;0;0;0;1;0;0;0", "aff_unique_norm": "Hanyang University;Kwangwoon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.hanyang.ac.kr;http://www.kwangwoon.ac.kr", "aff_unique_abbr": "HYU;KWU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "AdaVAE: Bayesian Structural Adaptation for Variational Autoencoders", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70855", "id": "gMjIUZBKH8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2f76a3a0f44263b5e56fec69ee1220f9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gMjIUZBKH8", "openreview": "https://openreview.net/forum?id=gMjIUZBKH8", "poster": "/media/PosterPDFs/NeurIPS%202023/70855.png?t=1697490132.9134285", "slides": "https://nips.cc/virtual/2023/poster/70855", "video": "https://nips.cc/virtual/2023/poster/70855", "author_site": "Paribesh Regmi, Rui Li", "tldr": "", "abstract": "The neural network structures of generative models and their corresponding inference models paired in variational autoencoders (VAEs) play a critical role in the models' generative performance. However, powerful VAE network structures are hand-crafted and fixed prior to training, resulting in a one-size-fits-all approach that requires heavy computation to tune for given data. Moreover, existing VAE regularization methods largely overlook the importance of network structures and fail to prevent overfitting in deep VAE models with cascades of hidden layers. To address these issues, we propose a Bayesian inference framework that automatically adapts VAE network structures to data and prevent overfitting as they grow deeper. We model the number of hidden layers with a beta process to infer the most plausible encoding/decoding network depths warranted by data and perform layer-wise dropout regularization with a conjugate Bernoulli process. We develop a scalable estimator that performs joint inference on both VAE network structures and latent variables. Our experiments show that the inference framework effectively prevents overfitting in both shallow and deep VAE models, yielding state-of-the-art performance. We demonstrate that our framework is compatible with different types of VAE backbone networks and can be applied to various VAE variants, further improving their performance.", "keywords": "nonparametric Bayes;variational autoencoders", "primary_area": "", "supplementary_material": "/attachment/346d84feebfe494804dcf8cc33dce89c75218633.zip", "author": "Paribesh Regmi;Rui Li", "authorids": "~Paribesh_Regmi1;~Rui_Li3", "gender": "M;M", "homepage": "https://regmiparibesh.com.np/;https://ruililuci.com", "dblp": ";96/4282-2", "google_scholar": "https://scholar.google.com/citations?hl=en;AHx53ngAAAAJ", "orcid": ";0000-0001-5096-1553", "linkedin": "paribeshregmi/;", "or_profile": "~Paribesh_Regmi1;~Rui_Li3", "aff": "Rochester Institute of Technology;Rochester Institute of Technology", "aff_domain": "rit.edu;rit.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nregmi2023adavae,\ntitle={Ada{VAE}: Bayesian Structural Adaptation for Variational Autoencoders},\nauthor={Paribesh Regmi and Rui Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gMjIUZBKH8}\n}", "github": "", "project": "", "reviewers": "p4pj;uhJ9;kqwq;9dFM", "pdf_size": 5289832, "rating": "5;5;6;7", "confidence": "4;3;5;2", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "90;86;129;194", "wc_strengths": "60;64;90;374", "wc_weaknesses": "145;118;714;147", "wc_questions": "69;69;5;9", "wc_limitations": "17;1;8;17", "wc_review": "381;338;946;741", "wc_reply_reviewers": "14;23;276;13", "wc_reply_authors": "0;45;636;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;2;4;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 124.75, 43.36689405525833 ], "wc_strengths_avg": [ 147.0, 131.56367279762298 ], "wc_weaknesses_avg": [ 281.0, 250.2548700824821 ], "wc_questions_avg": [ 38.0, 31.0322412983658 ], "wc_limitations_avg": [ 10.75, 6.722164829874376 ], "wc_review_avg": [ 601.5, 253.07755728234773 ], "wc_reply_reviewers_avg": [ 81.5, 112.36213775111258 ], "wc_reply_authors_avg": [ 170.25, 269.5277119333001 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.40451991747794525, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10269494794498008302&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "rit.edu;rit.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Rochester Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.rit.edu", "aff_unique_abbr": "RIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "OpenProteinSet: Training data for structural biology at scale", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73507", "id": "gO0kS0eE0F", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0eb82171240776fe19da498bef3b1abe-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=gO0kS0eE0F", "openreview": "https://openreview.net/forum?id=gO0kS0eE0F", "poster": "/media/PosterPDFs/NeurIPS%202023/73507.png?t=1699587590.5047016", "slides": "https://nips.cc/virtual/2023/poster/73507", "video": "https://nips.cc/virtual/2023/poster/73507", "author_site": "Gustaf Ahdritz, Nazim Bouatta, Sachin Kadyan, Lukas Jarosch, Dan Berenberg, Ian Fisk, Andrew Watkins, Stephen Ra, Richard Bonneau, Mohammed AlQuraishi", "tldr": "", "abstract": "Multiple sequence alignments (MSAs) of proteins encode rich biological information and have been workhorses in bioinformatic methods for tasks like protein design and protein structure prediction for decades. Recent breakthroughs like AlphaFold2 that use transformers to attend directly over large quantities of raw MSAs have reaffirmed their importance. Generation of MSAs is highly computationally intensive, however, and no datasets comparable to those used to train AlphaFold2 have been made available to the research community, hindering progress in machine learning for proteins. To remedy this problem, we introduce OpenProteinSet, an open-source corpus of more than 16 million MSAs, associated structural homologs from the Protein Data Bank, and AlphaFold2 protein structure predictions. We have previously demonstrated the utility of OpenProteinSet by successfully retraining AlphaFold2 on it. We expect OpenProteinSet to be broadly useful as training and validation data for 1) diverse tasks focused on protein structure, function, and design and 2) large-scale multimodal machine learning research.", "keywords": "openproteinset;multiple sequence alignment;structural template;protein folding;alphafold 2;openfold", "primary_area": "", "supplementary_material": "/attachment/a15ec31d11ac71f4c1bb55d2856a239814847ee7.pdf", "author": "Gustaf Ahdritz;Nazim Bouatta;Sachin Kadyan;Lukas Jarosch;Dan Berenberg;Ian Fisk;Andrew Martin Watkins;Stephen Ra;Richard Bonneau;Mohammed AlQuraishi", "authorids": "~Gustaf_Ahdritz2;~Nazim_Bouatta1;~Sachin_Kadyan1;~Lukas_Jarosch1;~Dan_Berenberg1;~Ian_Fisk1;~Andrew_Martin_Watkins1;~Stephen_Ra1;~Richard_Bonneau1;~Mohammed_AlQuraishi1", "gender": "M;M;;M;M;M;M;M;M;M", "homepage": "https://gahdritz.github.io/;https://scholar.harvard.edu/nazimbouatta/home;;;;;;https://www.stephenra.com;https://as.nyu.edu/content/nyu-as/as/faculty/richard-bonneau.html;https://aqlab.io", "dblp": ";;;;220/3757.html;;;255/5897;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;C9xtOq8AAAAJ;KYpqau8AAAAJ;zglcuwEAAAAJ;bxl__-MAAAAJ;https://scholar.google.com.tw/citations?user=Wq8XTykAAAAJ;", "orcid": "0000-0001-8283-5324;;;0009-0002-3816-2454;0000-0003-4631-0947;;;;;", "linkedin": ";;;lukas-jarosch-56013b277/;daniel-j-berenberg;ian-fisk-ba2828/;;;;", "or_profile": "~Gustaf_Ahdritz2;~Nazim_Bouatta1;~Sachin_Kadyan1;~Lukas_Jarosch1;~Dan_Berenberg1;~Ian_Fisk1;~Andrew_Martin_Watkins1;~Stephen_Ra1;~Richard_Bonneau1;~Mohammed_AlQuraishi1", "aff": "Harvard University;Harvard University;;Ruprecht-Karls-Universit\u00e4t Heidelberg;Genentech;Simons Foundation;Prescient Design, Genentech;Prescient Design, Genentech;New York University;Columbia University", "aff_domain": "harvard.edu;harvard.edu;;uni-heidelberg.de;gene.com;simonsfoundation.org;gene.com;gene.com;nyu.edu;columbia.edu", "position": "PhD student;Principal Researcher;;MS student;Researcher;Researcher;Researcher;Director of Frontier Research;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nahdritz2023openproteinset,\ntitle={OpenProteinSet: Training data for structural biology at scale},\nauthor={Gustaf Ahdritz and Nazim Bouatta and Sachin Kadyan and Lukas Jarosch and Dan Berenberg and Ian Fisk and Andrew Martin Watkins and Stephen Ra and Richard Bonneau and Mohammed AlQuraishi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=gO0kS0eE0F}\n}", "github": "", "project": "", "reviewers": "yCQz;S8Br;GyWH;UMX4", "pdf_size": 2342104, "rating": "6;6;6;7", "confidence": "4;4;3;4", "wc_summary_and_contributions": "67;77;57;189", "wc_strengths": "73;8;59;50", "wc_improvement": "369;228;38;32", "wc_limitations": "87;55;1;98", "wc_correctness": "30;12;6;4", "wc_clarity": "6;5;1;46", "wc_relation_to_prior_work": "17;6;1;6", "wc_documentation": "18;23;1;6", "wc_additional_feedback": "1;1;1;1", "wc_review": "668;415;165;432", "wc_reply_reviewers": "83;16;4;240", "wc_reply_authors": "800;550;337;641", "reply_reviewers": "2;1;1;3", "reply_authors": "3;2;1;3", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 97.5, 53.298686662993866 ], "wc_strengths_avg": [ 47.5, 24.23324163210527 ], "wc_improvement_avg": [ 166.75, 140.8818210416092 ], "wc_limitations_avg": [ 60.25, 37.67874069020885 ], "wc_correctness_avg": [ 13.0, 10.246950765959598 ], "wc_clarity_avg": [ 14.5, 18.282505298782223 ], "wc_relation_to_prior_work_avg": [ 7.5, 5.852349955359813 ], "wc_documentation_avg": [ 12.0, 8.860022573334675 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 420.0, 177.97331260613205 ], "wc_reply_reviewers_avg": [ 85.75, 94.00631627715235 ], "wc_reply_authors_avg": [ 582.0, 167.37233941126593 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15125101728281548754&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "email": "harvard.edu;harvard.edu;;uni-heidelberg.de;gene.com;simonsfoundation.org;gene.com;gene.com;nyu.edu;columbia.edu", "author_num": 10, "aff_unique_index": "0;0;1;2;3;2;2;4;5", "aff_unique_norm": "Harvard University;Ruprecht-Karls-Universit\u00e4t Heidelberg;Genentech;Simons Foundation;New York University;Columbia University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.harvard.edu;https://www.uni-heidelberg.de/;https://www.genentech.com;https://www.simonsfoundation.org;https://www.nyu.edu;https://www.columbia.edu", "aff_unique_abbr": "Harvard;Uni Heidelberg;Genentech;Simons Foundation;NYU;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;0;0;0", "aff_country_unique": "United States;Germany" }, { "title": "Content-based Unrestricted Adversarial Attack", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70854", "id": "gO60SSGOMy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a24cd16bc361afa78e57d31d34f3d936-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gO60SSGOMy", "openreview": "https://openreview.net/forum?id=gO60SSGOMy", "poster": "/media/PosterPDFs/NeurIPS%202023/70854.png?t=1701775904.241985", "slides": "https://nips.cc/virtual/2023/poster/70854", "video": "https://nips.cc/virtual/2023/poster/70854", "author_site": "Zhaoyu Chen, Bo Li, Shuang Wu, Kaixun Jiang, Shouhong Ding, Wenqiang Zhang", "tldr": "", "abstract": "Unrestricted adversarial attacks typically manipulate the semantic content of an image (e.g., color or texture) to create adversarial examples that are both effective and photorealistic, demonstrating their ability to deceive human perception and deep neural networks with stealth and success. However, current works usually sacrifice unrestricted degrees and subjectively select some image content to guarantee the photorealism of unrestricted adversarial examples, which limits its attack performance. To ensure the photorealism of adversarial examples and boost attack performance, we propose a novel unrestricted attack framework called Content-based Unrestricted Adversarial Attack. By leveraging a low-dimensional manifold that represents natural images, we map the images onto the manifold and optimize them along its adversarial direction. Therefore, within this framework, we implement Adversarial Content Attack (ACA) based on Stable Diffusion and can generate high transferable unrestricted adversarial examples with various adversarial contents. Extensive experimentation and visualization demonstrate the efficacy of ACA, particularly in surpassing state-of-the-art attacks by an average of 13.3-50.4\\% and 16.8-48.0\\% in normally trained models and defense methods, respectively.", "keywords": "unrestricted attack;adversarial example;diffusion model;black-box attack;adversarial transferability", "primary_area": "", "supplementary_material": "", "author": "Zhaoyu Chen;Bo Li;Shuang Wu;Kaixun Jiang;Shouhong Ding;Wenqiang Zhang", "authorids": "~Zhaoyu_Chen1;~Bo_Li20;~Shuang_Wu7;~Kaixun_Jiang1;~Shouhong_Ding3;~Wenqiang_Zhang1", "gender": ";M;M;M;M;M", "homepage": "https://www.fudanroilab.com/2020/05/01/ZhaoyuChen.html;https://libraboli.github.io/;;http://www.fudanroilab.com/2018/09/02/KaixunJiang.html;;https://www.fudanroilab.com/2021/07/01/WenqiangZhang.html", "dblp": "119/8788-1;50/3402-115;85/3231-1;334/1058;119/6735;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?hl=zh-CN;Na9u1wMAAAAJ;https://scholar.google.com/citations?hl=en;OGf40fkAAAAJ;vL-VEJYAAAAJ", "orcid": "0000-0002-7112-2596;;;;0000-0002-3175-3553;0000-0002-3339-8751", "linkedin": ";;;;;", "or_profile": "~Zhaoyu_Chen1;~Bo_Li20;~Shuang_Wu7;~Kaixun_Jiang1;~Shouhong_Ding3;~Wenqiang_Zhang1", "aff": "Fudan University;Tencent Youtu Lab;Tencent YouTu Lab;Fudan University;Tencent Youtu Lab;Fudan University", "aff_domain": "fudan.edu.cn;tencent.com;tencent.com;fudan.edu;tencent.com;fudan.edu.cn", "position": "PhD student;Researcher;Researcher;PhD student;researcher;Full Professor", "bibtex": "@inproceedings{\nchen2023contentbased,\ntitle={Content-based Unrestricted Adversarial Attack},\nauthor={Zhaoyu Chen and Bo Li and Shuang Wu and Kaixun Jiang and Shouhong Ding and Wenqiang Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gO60SSGOMy}\n}", "github": "", "project": "", "reviewers": "frod;QZtm;Rhy1;oMF6", "pdf_size": 8799133, "rating": "5;6;6;6", "confidence": "4;5;3;4", "soundness": "3;3;2;3", "novelty": "3;3;2;2", "presentation": "3;3;2;3", "wc_summary": "176;47;55;65", "wc_strengths": "199;84;52;24", "wc_weaknesses": "372;81;104;161", "wc_questions": "68;1;309;31", "wc_limitations": "1;1;32;15", "wc_review": "816;214;552;296", "wc_reply_reviewers": "124;11;114;45", "wc_reply_authors": "104;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 85.75, 52.49464258379135 ], "wc_strengths_avg": [ 89.75, 66.55214121273634 ], "wc_weaknesses_avg": [ 179.5, 114.8923409109589 ], "wc_questions_avg": [ 102.25, 121.70327645548414 ], "wc_limitations_avg": [ 12.25, 12.754901018824098 ], "wc_review_avg": [ 469.5, 235.7175216228102 ], "wc_reply_reviewers_avg": [ 73.5, 47.193749586147526 ], "wc_reply_authors_avg": [ 26.0, 45.033320996790806 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 86, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7478355653647319063&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "fudan.edu.cn;tencent.com;tencent.com;fudan.edu;tencent.com;fudan.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;0;1;0", "aff_unique_norm": "Fudan University;Tencent", "aff_unique_dep": ";Youtu Lab", "aff_unique_url": "https://www.fudan.edu.cn;https://www.tencent.com", "aff_unique_abbr": "Fudan;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "gORnZ5qIsa", "title": "Interpretable factorization of clinical questionnaires to identify latent factors of psychopathology", "track": "main", "status": "Reject", "tldr": "", "abstract": "Psychiatry research seeks to understand the manifestations of psychopathology in behavior, as measured in questionnaire data, by identifying a small number of latent factors that explain them. While factor analysis is the traditional tool for this purpose, the resulting factors may not be interpretable, and may also be subject to confounding variables. Moreover, missing data are common, and explicit imputation is often required. To overcome these limitations, we introduce interpretability constrained questionnaire factorization (ICQF), a non-negative matrix factorization method with regularization tailored for questionnaire data. Our method aims to promote factor interpretability and solution stability. We provide an optimization procedure with theoretical convergence guarantees, and an automated procedure to detect latent dimensionality accurately. We validate these procedures using realistic synthetic data. We demonstrate the effectiveness of our method in a widely used general-purpose questionnaire, in two independent datasets (the Healthy Brain Network and Adolescent Brain Cognitive Development studies). Specifically, we show that ICQF improves interpretability, as defined by domain experts, while preserving diagnostic information across a range of disorders, and outperforms competing methods for smaller dataset sizes. This suggests that the regularization in our method matches domain characteristics.", "keywords": "Psychopathology;interpretable factorization;latent constructs;factor analysis;Healthy Brain Network Study", "primary_area": "", "supplementary_material": "/attachment/639d6dd74ea20b951908761fdeea07a389c3cba3.zip", "author": "Ka Chun Lam;Bridget Wilson Mahony;Armin Raznahan;Francisco Pereira", "authorids": "~Ka_Chun_Lam1;~Bridget_Wilson_Mahony1;~Armin_Raznahan1;~Francisco_Pereira1", "gender": "M;;M;M", "homepage": ";;https://www.nimh.nih.gov/research/research-conducted-at-nimh/principal-investigators/armin-raznahan;http://www.franciscopereira.org", "dblp": ";;;73/5236", "google_scholar": "NtJPo8oAAAAJ;;;HpbSzssAAAAJ", "orcid": ";0000-0002-8081-2791;;", "linkedin": ";;;francisco-pereira-35735a7/", "or_profile": "~Ka_Chun_Lam1;~Bridget_Wilson_Mahony1;~Armin_Raznahan1;~Francisco_Pereira1", "aff": "National Institute of Health;;;National Institute of Mental Health", "aff_domain": "nih.gov;;;nih.gov", "position": "Researcher;;;Staff Scientist", "bibtex": "@misc{\nlam2023interpretable,\ntitle={Interpretable factorization of clinical questionnaires to identify latent factors of psychopathology},\nauthor={Ka Chun Lam and Bridget Wilson Mahony and Armin Raznahan and Francisco Pereira},\nyear={2023},\nurl={https://openreview.net/forum?id=gORnZ5qIsa}\n}", "github": "", "project": "", "reviewers": "kCjw;LcU6;FBZh;gpT4", "site": "https://openreview.net/forum?id=gORnZ5qIsa", "pdf_size": 2604482, "rating": "3;3;4;6", "confidence": "3;4;4;3", "soundness": "2;3;3;4", "novelty": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "144;63;123;102", "wc_strengths": "20;41;41;68", "wc_weaknesses": "302;98;73;24", "wc_questions": "119;5;14;103", "wc_limitations": "27;7;1;70", "wc_review": "612;214;252;367", "wc_reply_reviewers": "0;78;44;62", "wc_reply_authors": "0;55;47;43", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 4.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 108.0, 29.924906014890006 ], "wc_strengths_avg": [ 42.5, 17.03672503740082 ], "wc_weaknesses_avg": [ 124.25, 106.01975051847651 ], "wc_questions_avg": [ 60.25, 51.16334136860102 ], "wc_limitations_avg": [ 26.25, 27.03123193641015 ], "wc_review_avg": [ 361.25, 155.3437720026136 ], "wc_reply_reviewers_avg": [ 46.0, 29.154759474226502 ], "wc_reply_authors_avg": [ 36.25, 21.370248009791556 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15494006025426129490&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "National Institutes of Health;National Institute of Mental Health", "aff_unique_dep": ";", "aff_unique_url": "https://www.nih.gov;https://www.nimh.nih.gov", "aff_unique_abbr": "NIH;NIMH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Partial Matrix Completion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70853", "id": "gPylY8sCbw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5ff7b1f30e0caf3cc0b2fbfd4d7ebdd4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gPylY8sCbw", "openreview": "https://openreview.net/forum?id=gPylY8sCbw", "poster": "/media/PosterPDFs/NeurIPS%202023/70853.png?t=1702340928.3783667", "slides": "https://nips.cc/virtual/2023/poster/70853", "video": "https://nips.cc/virtual/2023/poster/70853", "author_site": "Elad Hazan, Adam Tauman Kalai, Varun Kanade, Clara Mohri, Y. Jennifer Sun", "tldr": "", "abstract": "The matrix completion problem involves reconstructing a low-rank matrix by using a given set of revealed (and potentially noisy) entries. Although existing methods address the completion of the entire matrix, the accuracy of the completed entries can vary significantly across the matrix, due to differences in the sampling distribution. For instance, users may rate movies primarily from their country or favorite genres, leading to inaccurate predictions for the majority of completed entries.\n\nWe propose a novel formulation of the problem as Partial Matrix Completion, where the objective is to complete a substantial subset of the entries with high confidence. Our algorithm efficiently handles the unknown and arbitrarily complex nature of the sampling distribution, ensuring high accuracy for all completed entries and sufficient coverage across the matrix. Additionally, we introduce an online version of the problem and present a low-regret efficient algorithm based on iterative gradient updates. Finally, we conduct a preliminary empirical evaluation of our methods.", "keywords": "matrix completion;online learning", "primary_area": "", "supplementary_material": "/attachment/e237d047b0f6a931fca3ed6cd60e57c6abd7ff61.pdf", "author": "Elad Hazan;Adam Tauman Kalai;Varun Kanade;Clara Mohri;Y. Jennifer Sun", "authorids": "~Elad_Hazan1;~Adam_Tauman_Kalai1;~Varun_Kanade1;~Clara_Mohri1;~Y._Jennifer_Sun1", "gender": "M;;M;F;", "homepage": "https://www.ehazan.com;;;;https://orfe.princeton.edu/people/jennifer-sun", "dblp": "72/739;;31/6692;;", "google_scholar": "LnhCGNMAAAAJ;;;;", "orcid": ";;;;", "linkedin": ";;;clara-mohri-6b364a192/;", "or_profile": "~Elad_Hazan1;~Adam_Tauman_Kalai1;~Varun_Kanade1;~Clara_Mohri1;~Y._Jennifer_Sun1", "aff": "Princeton University;;University of Oxford;University of California, Berkeley;Princeton University", "aff_domain": "princeton.edu;;ox.ac.uk;berkeley.edu;princeton.edu", "position": "Full Professor;;Associate Professor;Undergrad student;PhD student", "bibtex": "@inproceedings{\nhazan2023partial,\ntitle={Partial Matrix Completion},\nauthor={Elad Hazan and Adam Tauman Kalai and Varun Kanade and Clara Mohri and Y. Jennifer Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gPylY8sCbw}\n}", "github": "", "project": "", "reviewers": "Hbwc;wLHP;PBAT;n6bT", "pdf_size": 4932816, "rating": "5;7;7;7", "confidence": "3;4;4;4", "soundness": "3;3;3;4", "novelty": "3;4;4;4", "presentation": "2;3;3;3", "wc_summary": "49;742;89;122", "wc_strengths": "86;158;69;47", "wc_weaknesses": "158;1064;25;83", "wc_questions": "79;485;167;85", "wc_limitations": "6;48;54;5", "wc_review": "378;2497;404;342", "wc_reply_reviewers": "50;221;82;26", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 250.5, 284.9425380668881 ], "wc_strengths_avg": [ 90.0, 41.62331077653482 ], "wc_weaknesses_avg": [ 332.5, 424.9555859145753 ], "wc_questions_avg": [ 204.0, 165.91865476793137 ], "wc_limitations_avg": [ 28.25, 22.851422275210794 ], "wc_review_avg": [ 905.25, 919.2609463585408 ], "wc_reply_reviewers_avg": [ 94.75, 75.54923891079248 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6930149444048913441&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "princeton.edu;;ox.ac.uk;berkeley.edu;princeton.edu", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Princeton University;University of Oxford;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "https://www.princeton.edu;https://www.ox.ac.uk;https://www.berkeley.edu", "aff_unique_abbr": "Princeton;Oxford;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Optimistic Rates for Multi-Task Representation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70852", "id": "gQ4h6WvME0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/06e3c330d140f3a25671acf2dc2d6357-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gQ4h6WvME0", "openreview": "https://openreview.net/forum?id=gQ4h6WvME0", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70852", "video": "https://nips.cc/virtual/2023/poster/70852", "author_site": "Austin Watkins, Enayat Ullah, Thanh Nguyen-Tang, Raman Arora", "tldr": "", "abstract": "We study the problem of transfer learning via Multi-Task Representation Learning (MTRL), wherein multiple source tasks are used to learn a good common representation, and a predictor is trained on top of it for the target task. Under standard regularity assumptions on the loss function and task diversity, we provide new statistical rates on the excess risk of the target task, which demonstrate the benefit of representation learning. Importantly, our rates are optimistic, i.e., they interpolate between the standard $O(m^{-1/2})$ rate and the fast $O(m^{-1})$ rate, depending on the difficulty of the learning task, where $m$ is the number of samples for the target task. Besides the main result, we make several new contributions, including giving optimistic rates for excess risk of source tasks (multi-task learning (MTL)), a local Rademacher complexity theorem for MTRL and MTL, as well as a chain rule for local Rademacher complexity for composite predictor classes.", "keywords": "Learning Theory;Multi-task and Transfer Learning;Classification", "primary_area": "", "supplementary_material": "/attachment/aee0cc19b9dddf047de468499be738c997bd3c6b.pdf", "author": "Austin Watkins;Enayat Ullah;Thanh Nguyen-Tang;Raman Arora", "authorids": "~Austin_Watkins1;~Enayat_Ullah1;~Thanh_Nguyen-Tang1;~Raman_Arora1", "gender": "M;;M;M", "homepage": "http://austinwatkins.com;https://enayatullah.github.io;http://www.cs.jhu.edu/~raman/Home.html;https://thanhnguyentang.github.io/", "dblp": "334/0263;223/5999;;287/5102.html", "google_scholar": ";;Spe0xdkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-1917-2190", "linkedin": "austin-watkins-6599a858/;;;thanhnguyentang/", "or_profile": "~Austin_Watkins1;~Enayat_Ullah1;~Raman_Arora1;~Thanh_Tang_Nguyen2", "aff": "Johns Hopkins University;Johns Hopkins University;Johns Hopkins University;Johns Hopkins University", "aff_domain": "jhu.edu;jhu.edu;jhu.edu;jhu.edu", "position": "PhD student;PhD student;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nwatkins2023optimistic,\ntitle={Optimistic Rates for Multi-Task Representation Learning},\nauthor={Austin Watkins and Enayat Ullah and Thanh Nguyen-Tang and Raman Arora},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gQ4h6WvME0}\n}", "github": "", "project": "", "reviewers": "BPxZ;JPW9;Sb8D;9335", "pdf_size": 603741, "rating": "5;6;6;8", "confidence": "2;3;4;4", "soundness": "3;3;3;4", "novelty": "2;3;2;4", "presentation": "2;2;3;3", "wc_summary": "39;138;162;109", "wc_strengths": "79;44;23;94", "wc_weaknesses": "50;383;141;10", "wc_questions": "61;4;228;1", "wc_limitations": "5;1;13;1", "wc_review": "234;570;567;215", "wc_reply_reviewers": "0;74;116;13", "wc_reply_authors": "0;0;720;0", "reply_reviewers": "0;1;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 112.0, 46.13566949768909 ], "wc_strengths_avg": [ 60.0, 28.026772914483036 ], "wc_weaknesses_avg": [ 146.0, 144.8326620621191 ], "wc_questions_avg": [ 73.5, 92.34852462275724 ], "wc_limitations_avg": [ 5.0, 4.898979485566356 ], "wc_review_avg": [ 396.5, 172.13439516842647 ], "wc_reply_reviewers_avg": [ 50.75, 46.90082621873521 ], "wc_reply_authors_avg": [ 180.0, 311.7691453623979 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7608859102526822, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14982346962624582642&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "jhu.edu;jhu.edu;jhu.edu;jhu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "gSyjaunurQ", "title": "On Representation of Natural Image Patches", "track": "main", "status": "Reject", "tldr": "", "abstract": "To optimize survival, organisms need to accurately and efficiently relay new information throughout their systems for processing and responses. \nFurthermore, they benefit from predicting environmental occurrences, or in mathematical terms, understanding the probability distribution of their environment, \nbased on both personal experiences and inherited evolutionary memory.\nThese twin objectives of information transmission and learning environmental probabilistic distributions form the core of an organism's information processing system. \nWhile the early vision neuroscience field has primarily focused on the former, employing information theory as a guiding framework, \nthe latter is largely explored by the machine learning community via probabilistic generative models. \nHowever, the relationship between these two objectives has not been thoroughly investigated.\nIn this paper, we study a biologically inspired information processing model and prove that these two objectives can be achieved independently.\nBy evenly partitioning the input space to model input probability, our model bypasses the often intractable normalization factor computation. \nWhen applied to image patches, this model produces a sparse, nonlinear binary population code similar to early visual systems, \nwith features like edge-detection and orientation-selective units. \nOur results not only offer potential new insights into the functioning of neurons in early vision systems, \nbut also present a novel approach to represent natural image patches.", "keywords": "neural coding;sparse binary code;natural image;edge detection;generative model", "primary_area": "", "supplementary_material": "/attachment/b1ba21a2534ed9e0f26b95e24ee4f0085b75283f.zip", "author": "Cheng Guo", "authorids": "~Cheng_Guo3", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@misc{\nguo2023on,\ntitle={On Representation of Natural Image Patches},\nauthor={Cheng Guo},\nyear={2023},\nurl={https://openreview.net/forum?id=gSyjaunurQ}\n}", "github": "", "project": "", "reviewers": "1uCk;Q2NW;aZBn;KcKg", "site": "https://openreview.net/forum?id=gSyjaunurQ", "pdf_size": 5429237, "rating": "3;3;3;4", "confidence": "3;4;3;4", "soundness": "3;3;2;3", "novelty": "3;2;2;2", "presentation": "3;3;1;3", "wc_summary": "101;99;63;139", "wc_strengths": "29;29;72;41", "wc_weaknesses": "157;117;283;126", "wc_questions": "136;32;2;160", "wc_limitations": "20;4;2;81", "wc_review": "443;281;422;547", "wc_reply_reviewers": "109;46;57;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 3.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 100.5, 26.88401011754013 ], "wc_strengths_avg": [ 42.75, 17.583728273605686 ], "wc_weaknesses_avg": [ 170.75, 66.48449067263734 ], "wc_questions_avg": [ 82.5, 66.89357218746805 ], "wc_limitations_avg": [ 26.75, 32.08874413248359 ], "wc_review_avg": [ 423.25, 94.79022892682558 ], "wc_reply_reviewers_avg": [ 53.0, 38.76209488662861 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YzmEbukMfa8J:scholar.google.com/&scioq=On+Representation+of+Natural+Image+Patches&hl=en&as_sdt=0,33", "gs_version_total": 0 }, { "title": "Rethinking Conditional Diffusion Sampling with Progressive Guidance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70851", "id": "gThGBHhqcU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/83ca9e252329e7b0704ead93893e6b1b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gThGBHhqcU", "openreview": "https://openreview.net/forum?id=gThGBHhqcU", "poster": "/media/PosterPDFs/NeurIPS%202023/70851.png?t=1702339866.6710002", "slides": "https://nips.cc/virtual/2023/poster/70851", "video": "https://nips.cc/virtual/2023/poster/70851", "author_site": "Anh-Dung Dinh, Daochang Liu, Chang Xu", "tldr": "", "abstract": "This paper tackles two critical challenges encountered in classifier guidance for diffusion generative models, i.e., the lack of diversity and the presence of adversarial effects. These issues often result in a scarcity of diverse samples or the generation of non-robust features. The underlying cause lies in the mechanism of classifier guidance, where discriminative gradients push samples to be recognized as conditions aggressively. This inadvertently suppresses information with common features among relevant classes, resulting in a limited pool of features with less diversity or the absence of robust features for image construction.\tWe propose a generalized classifier guidance method called Progressive Guidance, which mitigates the problems by allowing relevant classes' gradients to contribute to shared information construction when the image is noisy in early sampling steps. In the later sampling stage, we progressively enhance gradients to refine the details in the image toward the primary condition. This helps to attain a high level of diversity and robustness compared to the vanilla classifier guidance. Experimental results demonstrate that our proposed method further improves the image quality while offering a significant level of diversity as well as robust features.", "keywords": "Diffusion model;conditional generative model;guidance diffusion;generative models;classifier guidance", "primary_area": "", "supplementary_material": "/attachment/862df4bcf7f6161e537c195a6be0424fa97700c7.pdf", "author": "Anh-Dung Dinh;Daochang Liu;Chang Xu", "authorids": "~Anh-Dung_Dinh2;~Daochang_Liu1;~Chang_Xu4", "gender": "M;M;", "homepage": ";https://finspire13.github.io;https://sydney.edu.au/engineering/about/our-people/academic-staff/c-xu.html", "dblp": ";222/2701;97/2966-2", "google_scholar": "ZJbv3YoAAAAJ;https://scholar.google.com/citations?hl=en;N4F_3eoAAAAJ", "orcid": ";;0000-0002-4756-0609", "linkedin": ";;", "or_profile": "~Anh-Dung_Dinh2;~Daochang_Liu1;~Charles_Xu1", "aff": "University of Sydney, University of Sydney;University of Sydney;University of Sydney", "aff_domain": "uni.sydney.edu.au;usyd.edu.au;sydney.edu.au", "position": "PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\ndinh2023rethinking,\ntitle={Rethinking Conditional Diffusion Sampling with Progressive Guidance},\nauthor={Anh-Dung Dinh and Daochang Liu and Chang Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gThGBHhqcU}\n}", "github": "", "project": "", "reviewers": "vxij;cJ7t;xoqA;mBBP;zHLG;zai6", "pdf_size": 1109064, "rating": "5;5;6;7;7;7", "confidence": "4;4;4;5;3;4", "soundness": "3;3;3;3;3;3", "novelty": "2;3;3;4;3;2", "presentation": "2;3;3;3;4;2", "wc_summary": "80;20;71;60;134;74", "wc_strengths": "12;61;54;134;56;93", "wc_weaknesses": "104;101;46;304;86;146", "wc_questions": "2;2;5;18;5;175", "wc_limitations": "80;2;28;23;5;75", "wc_review": "278;186;204;539;286;563", "wc_reply_reviewers": "430;39;9;11;19;292", "wc_reply_authors": "1033;668;185;218;169;814", "reply_reviewers": "2;1;1;1;1;2", "reply_authors": "5;5;4;3;4;4", "rating_avg": [ 6.166666666666667, 0.8975274678557507 ], "confidence_avg": [ 4.0, 0.5773502691896257 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8333333333333335, 0.6871842709362768 ], "presentation_avg": [ 2.8333333333333335, 0.6871842709362768 ], "wc_summary_avg": [ 73.16666666666667, 33.54805839720419 ], "wc_strengths_avg": [ 68.33333333333333, 37.65043898224237 ], "wc_weaknesses_avg": [ 131.16666666666666, 82.6970710216242 ], "wc_questions_avg": [ 34.5, 63.068084903010444 ], "wc_limitations_avg": [ 35.5, 31.10600156454271 ], "wc_review_avg": [ 342.6666666666667, 151.7999414434084 ], "wc_reply_reviewers_avg": [ 133.33333333333334, 166.12311365035538 ], "wc_reply_authors_avg": [ 514.5, 341.06634252004403 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 4.166666666666667, 0.6871842709362768 ], "replies_avg": [ 43, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5616435835610189031&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "uni.sydney.edu.au;usyd.edu.au;sydney.edu.au", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Sydney", "aff_unique_dep": "", "aff_unique_url": "https://www.sydney.edu.au", "aff_unique_abbr": "USYD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "BiSLS/SPS: Auto-tune Step Sizes for Stable Bi-level Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70850", "id": "gUEekxYr6D", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9cf5fff2f85310e6ece5bc3a8489b6fa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gUEekxYr6D", "openreview": "https://openreview.net/forum?id=gUEekxYr6D", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70850", "video": "https://nips.cc/virtual/2023/poster/70850", "author_site": "Chen Fan, Gaspard Chon\u00e9-Ducasse, Mark Schmidt, Christos Thrampoulidis", "tldr": "", "abstract": "The popularity of bi-level optimization (BO) in deep learning has spurred a growing interest in studying gradient-based BO algorithms.\nHowever, existing algorithms involve two coupled learning rates that can be affected by approximation errors when computing hypergradients, making careful fine-tuning necessary to ensure fast convergence. To alleviate this issue, we investigate the use of recently proposed adaptive step-size methods, namely stochastic line search (SLS) and stochastic Polyak step size (SPS), for computing both the upper and lower-level learning rates. First, we revisit the use of SLS and SPS in single-level optimization without the additional interpolation condition that is typically assumed in prior works. For such settings, we investigate new variants of SLS and SPS that improve upon existing suggestions in the literature and are simpler to implement. Importantly, these two variants can be seen as special instances of general family of methods with an envelope-type step-size. This unified envelope strategy allows for the extension of the algorithms and their convergence guarantees to BO settings. Finally, our extensive experiments demonstrate that the new algorithms, which are available in both SGD and Adam versions, can find large learning rates with minimal tuning and converge faster than corresponding vanilla SGD or Adam BO algorithms that require fine-tuning.", "keywords": "Adaptive step sizes;bi-level optimization;convergence rates;line searches", "primary_area": "", "supplementary_material": "/attachment/e9a8f05a713360b0b67e0566e52b48c194aaf2aa.zip", "author": "Chen Fan;Gaspard Chon\u00e9-Ducasse;Mark Schmidt;Christos Thrampoulidis", "authorids": "~Chen_Fan1;~Gaspard_Chon\u00e9-Ducasse1;~Mark_Schmidt1;~Christos_Thrampoulidis1", "gender": "M;;;", "homepage": ";;;https://sites.google.com/view/cthrampo/home", "dblp": ";;35/2638;127/6532", "google_scholar": ";;https://scholar.google.com/citations?hl=en;", "orcid": ";;;", "linkedin": "chen-fan-a6b31a162/;gaspard-chon\u00e9-ducasse-155326249;;", "or_profile": "~Chen_Fan1;~Gaspard_Chon\u00e9-Ducasse1;~Mark_Schmidt1;~Christos_Thrampoulidis1", "aff": "University of British Columbia;University of British Columbia;University of British Columbia;University of British Columbia", "aff_domain": "ubc.ca;cs.ubc.ca;ubc.ca;ubc.ca", "position": "PhD student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nfan2023bislssps,\ntitle={Bi{SLS}/{SPS}: Auto-tune Step Sizes for Stable Bi-level Optimization},\nauthor={Chen Fan and Gaspard Chon{\\'e}-Ducasse and Mark Schmidt and Christos Thrampoulidis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gUEekxYr6D}\n}", "github": "", "project": "", "reviewers": "75AD;TsPX;ysEm;evaJ", "pdf_size": 1471782, "rating": "5;6;7;8", "confidence": "4;3;3;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "109;38;48;81", "wc_strengths": "38;31;57;103", "wc_weaknesses": "308;44;95;4", "wc_questions": "4;14;130;85", "wc_limitations": "4;1;10;108", "wc_review": "463;128;340;381", "wc_reply_reviewers": "12;0;18;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 69.0, 28.044607324760317 ], "wc_strengths_avg": [ 57.25, 28.07467720206236 ], "wc_weaknesses_avg": [ 112.75, 117.25053304782882 ], "wc_questions_avg": [ 58.25, 51.87665660005471 ], "wc_limitations_avg": [ 30.75, 44.71786555729153 ], "wc_review_avg": [ 328.0, 123.67093433786292 ], "wc_reply_reviewers_avg": [ 11.0, 6.708203932499369 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15105594130772540424&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ubc.ca;cs.ubc.ca;ubc.ca;ubc.ca", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Diffusion-TTA: Test-time Adaptation of Discriminative Models via Generative Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70849", "id": "gUTVpByfVX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/38e511a690709603d4cc3a1c52b4a9fd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gUTVpByfVX", "openreview": "https://openreview.net/forum?id=gUTVpByfVX", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70849", "video": "https://nips.cc/virtual/2023/poster/70849", "author_site": "Mihir Prabhudesai, Tsung-Wei Ke, Alex Li, Deepak Pathak, Katerina Fragkiadaki", "tldr": "", "abstract": "The advancements in generative modeling, particularly the advent of diffusion models, have sparked a fundamental question: how can these models be effectively used for discriminative tasks? In this work, we find that generative models can be great test-time adapters for discriminative models. Our method, Diffusion-TTA, adapts pre-trained discriminative models such as image classifiers, segmenters and depth predictors, to each unlabelled example in the test set using generative feedback from a diffusion model. We achieve this by modulating the conditioning of the diffusion model using the output of the discriminative model. We then maximize the image likelihood objective by backpropagating the gradients to discriminative model\u2019s parameters. We show Diffusion-TTA significantly enhances the accuracy of various large-scale pre-trained discriminative models, such as, ImageNet classifiers, CLIP models, image pixel labellers and image depth predictors. Diffusion-TTA outperforms existing test-time adaptation methods, including TTT-MAE and TENT, and particularly shines in online adaptation setups, where the discriminative model is continually adapted to each example in the test set. We provide access to code, results, and visualizations on our website: diffusion-tta.github.io/", "keywords": "test-time adaptation;diffusion models;generative models;classification;segmentation;depth prediction", "primary_area": "", "supplementary_material": "", "author": "Mihir Prabhudesai;Tsung-Wei Ke;Alexander Cong Li;Deepak Pathak;Katerina Fragkiadaki", "authorids": "~Mihir_Prabhudesai1;~Tsung-Wei_Ke2;~Alexander_Cong_Li2;~Deepak_Pathak1;~Katerina_Fragkiadaki1", "gender": "M;;M;M;F", "homepage": "https://mihirp1998.github.io/;https://twke18.github.io/;http://alexanderli.com/;https://www.cs.cmu.edu/~dpathak/;https://www.cs.cmu.edu/~katef/", "dblp": "249/9214;173/4984;243/3349.html;155/9860;21/8780", "google_scholar": ";WTEFsHMAAAAJ;bOitqMUAAAAJ;https://scholar.google.cl/citations?user=AEsPCAUAAAAJ;FWp7728AAAAJ", "orcid": ";;;;", "linkedin": ";;;pathak22/;", "or_profile": "~Mihir_Prabhudesai1;~Tsung-Wei_Ke2;~Alexander_Cong_Li2;~Deepak_Pathak1;~Katerina_Fragkiadaki1", "aff": "School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;andrew.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;Postdoc;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nprabhudesai2023testtime,\ntitle={Test-time Adaptation of Discriminative Models via Diffusion Generative Feedback},\nauthor={Mihir Prabhudesai and Tsung-Wei Ke and Alexander Cong Li and Deepak Pathak and Katerina Fragkiadaki},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gUTVpByfVX}\n}", "github": "", "project": "", "reviewers": "VyeN;6xj6;UVNj;4gPG;qddk", "pdf_size": 11263174, "rating": "6;6;6;7;7", "confidence": "4;4;4;3;4", "soundness": "2;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "2;3;3;3;4", "wc_summary": "104;84;91;40;127", "wc_strengths": "120;65;76;98;118", "wc_weaknesses": "177;111;361;1172;87", "wc_questions": "88;25;110;64;78", "wc_limitations": "47;1;6;100;64", "wc_review": "536;286;644;1474;474", "wc_reply_reviewers": "15;151;45;257;77", "wc_reply_authors": "24;367;30;115;41", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 89.2, 28.63145123810527 ], "wc_strengths_avg": [ 95.4, 22.01454064930722 ], "wc_weaknesses_avg": [ 381.6, 406.70165969663805 ], "wc_questions_avg": [ 73.0, 28.29840984931839 ], "wc_limitations_avg": [ 43.6, 36.978912909927466 ], "wc_review_avg": [ 682.8, 412.3350094280135 ], "wc_reply_reviewers_avg": [ 109.0, 86.74560507599217 ], "wc_reply_authors_avg": [ 115.4, 129.98861488607378 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6123724356957945, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10749303752212523814&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cs.cmu.edu;andrew.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "School of Computer Science", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Pittsburgh;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Provably Robust Estimators for Inverse Problems via Jittering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70848", "id": "gUlcyeHzw1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b3411e30afa6caeefa4d6d39a5ea84cd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gUlcyeHzw1", "openreview": "https://openreview.net/forum?id=gUlcyeHzw1", "poster": "/media/PosterPDFs/NeurIPS%202023/70848.png?t=1702225724.4809003", "slides": "https://nips.cc/virtual/2023/poster/70848", "video": "https://nips.cc/virtual/2023/poster/70848", "author_site": "Anselm Krainovic, Mahdi Soltanolkotabi, Reinhard Heckel", "tldr": "", "abstract": "Deep neural networks provide excellent performance for inverse problems such as denoising. However, neural networks can be sensitive to adversarial or worst-case perturbations. This raises the question of whether such networks can be trained efficiently to be worst-case robust. In this paper, we investigate whether jittering, a simple regularization technique that adds isotropic Gaussian noise during training, is effective for learning worst-case robust estimators for inverse problems. While well studied for prediction in classification tasks, the effectiveness of jittering for inverse problems has not been systematically investigated. In this paper, we present a novel analytical characterization of the optimal $\\ell_2$-worst-case robust estimator for linear denoising and show that jittering yields optimal robust denoisers. Furthermore, we examine jittering empirically via training deep neural networks (U-nets) for natural image denoising, deconvolution, and accelerated magnetic resonance imaging (MRI). The results show that jittering significantly enhances the worst-case robustness, but can be suboptimal for inverse problems beyond denoising. Moreover, our results imply that training on real data which often contains slight noise is somewhat robustness enhancing.", "keywords": "Adversarial Training;Jittering;Denoising;Deconvolution;Compressive Sensing;Inverse Problems;Robustness", "primary_area": "", "supplementary_material": "", "author": "Anselm Krainovic;Mahdi Soltanolkotabi;Reinhard Heckel", "authorids": "~Anselm_Krainovic1;~Mahdi_Soltanolkotabi1;~Reinhard_Heckel1", "gender": "M;M;M", "homepage": "https://www.ce.cit.tum.de/mli/people/anselm-krainovic/;http://www-bcf.usc.edu/~soltanol/;", "dblp": "353/0357.html;75/6691;81/9668", "google_scholar": ";narJyMAAAAAJ;ZWV0I7cAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Anselm_Krainovic1;~Mahdi_Soltanolkotabi1;~Reinhard_Heckel1", "aff": "Technische Universit\u00e4t M\u00fcnchen;University of Southern California;Technical University Munich", "aff_domain": "tum.de;usc.edu;tum.de", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nkrainovic2023learning,\ntitle={Learning Provably Robust Estimators for Inverse Problems via Jittering},\nauthor={Anselm Krainovic and Mahdi Soltanolkotabi and Reinhard Heckel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gUlcyeHzw1}\n}", "github": "", "project": "", "reviewers": "uuzi;yCWN;koTN", "pdf_size": 2109619, "rating": "5;6;7", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;4;3", "wc_summary": "58;137;83", "wc_strengths": "26;23;83", "wc_weaknesses": "119;17;175", "wc_questions": "526;327;84", "wc_limitations": "52;17;11", "wc_review": "781;521;436", "wc_reply_reviewers": "88;78;42", "wc_reply_authors": "28;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 92.66666666666667, 32.96799795087486 ], "wc_strengths_avg": [ 44.0, 27.60434748368452 ], "wc_weaknesses_avg": [ 103.66666666666667, 65.40812046085885 ], "wc_questions_avg": [ 312.3333333333333, 180.74352608661317 ], "wc_limitations_avg": [ 26.666666666666668, 18.080068829760823 ], "wc_review_avg": [ 579.3333333333334, 146.76133308500877 ], "wc_reply_reviewers_avg": [ 69.33333333333333, 19.754043186705406 ], "wc_reply_authors_avg": [ 9.333333333333334, 13.199326582148887 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17635695085900623712&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tum.de;usc.edu;tum.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;University of Southern California;Technical University of Munich", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tum.de;https://www.usc.edu;https://www.tum.de", "aff_unique_abbr": "TUM;USC;TUM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;United States" }, { "title": "Global Convergence Analysis of Local SGD for Two-layer Neural Network without Overparameterization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70847", "id": "gVLKXT9JwG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4dade38eae8c007f3a564b8ea820664a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gVLKXT9JwG", "openreview": "https://openreview.net/forum?id=gVLKXT9JwG", "poster": "/media/PosterPDFs/NeurIPS%202023/70847.png?t=1701721948.7630124", "slides": "https://nips.cc/virtual/2023/poster/70847", "video": "https://nips.cc/virtual/2023/poster/70847", "author_site": "Yajie Bao, Amarda Shehu, Mingrui Liu", "tldr": "", "abstract": "Local SGD, a cornerstone algorithm in federated learning, is widely used in training deep neural networks and shown to have strong empirical performance. A theoretical understanding of such performance on nonconvex loss landscapes is currently lacking. Analysis of the global convergence of SGD is challenging, as the noise depends on the model parameters. Indeed, many works narrow their focus to GD and rely on injecting noise to enable convergence to the local or global optimum. When expanding the focus to local SGD, existing analyses in the nonconvex case can only guarantee finding stationary points or assume the neural network is overparameterized so as to guarantee convergence to the global minimum through neural tangent kernel analysis. In this work, we provide the first global convergence analysis of the vanilla local SGD for two-layer neural networks \\emph{without overparameterization} and \\textit{without injecting noise}, when the input data is Gaussian. The main technical ingredients of our proof are \\textit{a self-correction mechanism} and \\textit{a new exact recursive characterization of the direction of global model parameters}. The self-correction mechanism guarantees the algorithm reaches a good region even if the initialization is in a bad region. A good (bad) region means updating the model by gradient descent will move closer to (away from) the optimal solution. The main difficulty in establishing a self-correction mechanism is to cope with the gradient dependency between two layers. To address this challenge, we divide the landscape of the objective into several regions to carefully control the interference of two layers during the correction process. As a result, we show that local SGD can correct the two layers and enter the good region in polynomial time. After that, we establish a new exact recursive characterization of the direction of global parameters, which is the key to showing convergence to the global minimum with linear speedup in the number of machines and reduced communication rounds. Experiments on synthetic data confirm theoretical results.", "keywords": "convolutional neural network;gaussian input;local SGD;global convergence;non-convex optimization", "primary_area": "", "supplementary_material": "/attachment/7cc6ae0e5371e8df0bcb3daff731ecf546e0ffc6.zip", "author": "Yajie Bao;Amarda Shehu;Mingrui Liu", "authorids": "~Yajie_Bao2;~Amarda_Shehu1;~Mingrui_Liu2", "gender": "M;F;", "homepage": "https://yajiebao.github.io/;https://cs.gmu.edu/~ashehu/;https://mingrliu.github.io", "dblp": "254/8290;53/3810;", "google_scholar": "1n_aUsIAAAAJ;https://scholar.google.com.tw/citations?user=HkB_Gz0AAAAJ;KFoEnFQAAAAJ", "orcid": "0000-0003-3843-7016;0000-0001-5230-4610;", "linkedin": ";;mingrui-liu-447a2aab/", "or_profile": "~Yajie_Bao2;~Amarda_Shehu1;~Mingrui_Liu2", "aff": "Shanghai Jiaotong University;George Mason University;George Mason University", "aff_domain": "sjtu.edu.cn;gmu.edu;gmu.edu", "position": "PhD student;Professor;Assistant Professor", "bibtex": "@inproceedings{\nbao2023global,\ntitle={Global Convergence Analysis of Local {SGD} for Two-layer Neural Network without Overparameterization},\nauthor={Yajie Bao and Amarda Shehu and Mingrui Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gVLKXT9JwG}\n}", "github": "", "project": "", "reviewers": "coFq;GGhs;p3Vw;bCYH", "pdf_size": 779394, "rating": "3;5;5;7", "confidence": "3;3;2;4", "soundness": "2;2;3;3", "novelty": "2;2;3;4", "presentation": "1;3;2;4", "wc_summary": "52;40;49;36", "wc_strengths": "11;57;57;52", "wc_weaknesses": "154;56;68;26", "wc_questions": "47;83;3;109", "wc_limitations": "29;48;9;7", "wc_review": "293;284;186;230", "wc_reply_reviewers": "148;12;203;13", "wc_reply_authors": "360;0;267;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;2;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 44.25, 6.49519052838329 ], "wc_strengths_avg": [ 44.25, 19.30511590226798 ], "wc_weaknesses_avg": [ 76.0, 47.56048780237646 ], "wc_questions_avg": [ 60.5, 39.834030677299026 ], "wc_limitations_avg": [ 23.25, 16.67895380412093 ], "wc_review_avg": [ 248.25, 43.268782048955345 ], "wc_reply_reviewers_avg": [ 94.0, 83.78842402146014 ], "wc_reply_authors_avg": [ 156.75, 160.16144198901307 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=838876029021480538&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "sjtu.edu.cn;gmu.edu;gmu.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Shanghai Jiao Tong University;George Mason University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.gmu.edu", "aff_unique_abbr": "SJTU;GMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "Differentiable sorting for censored time-to-event data.", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70846", "id": "gYWjI7wLhc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d1a25d7e93f06cb422b3a74a0aa3bf3f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gYWjI7wLhc", "openreview": "https://openreview.net/forum?id=gYWjI7wLhc", "poster": "/media/PosterPDFs/NeurIPS%202023/70846.png?t=1702160970.30636", "slides": "https://nips.cc/virtual/2023/poster/70846", "video": "https://nips.cc/virtual/2023/poster/70846", "author_site": "Andre Vauvelle, Benjamin Wild, Roland Eils, Spiros Denaxas", "tldr": "", "abstract": "Survival analysis is a crucial semi-supervised task in machine learning with significant real-world applications, especially in healthcare. The most common approach to survival analysis, Cox\u2019s partial likelihood, can be interpreted as a ranking model optimized on a lower bound of the concordance index. We follow these connections further, with listwise ranking losses that allow for a relaxation of the pairwise independence assumption. Given the inherent transitivity of ranking, we explore differentiable sorting networks as a means to introduce a stronger transitive inductive bias during optimization. Despite their potential, current differentiable sorting methods cannot account for censoring, a crucial aspect of many real-world datasets. We propose a novel method, Diffsurv, to overcome this limitation by extending differentiable sorting methods to handle censored tasks. Diffsurv predicts matrices of possible permutations that accommodate the label uncertainty introduced by censored samples. Our experiments reveal that Diffsurv outperforms established baselines in various simulated and real-world risk prediction scenarios. Furthermore, we demonstrate the algorithmic advantages of Diffsurv by presenting a novel method for top-k risk prediction that surpasses current methods.", "keywords": "Survival Analysis;Censored Data;Semi-supervised Learning;Time-to-event-data;Algorithmic Supervision;Sorting;Risk Prediction;Weakly-supervised Learning;Machine Learning;Cox's Partial Likelihood;Differentiable Sorting Networks;Transitive Inductive Bias;Ranking Losses;Listwise Ranking;Healthcare Applications;Deep Learning;Neural Networks;Top-k Risk Prediction", "primary_area": "", "supplementary_material": "", "author": "Andre Vauvelle;Benjamin Wild;Roland Eils;Spiros Denaxas", "authorids": "~Andre_Vauvelle1;~Benjamin_Wild1;roland.eils@bih-charite.de;s.denaxas@ucl.ac.uk", "gender": ";M;;", "homepage": ";;;", "dblp": "345/8340;131/9500;;", "google_scholar": ";;;", "orcid": ";0000-0002-7492-8448;;", "linkedin": "andre-vauvelle/;;;", "or_profile": "~Andre_Vauvelle1;~Benjamin_Wild1;roland.eils@bih-charite.de;s.denaxas@ucl.ac.uk", "aff": "University College London, University of London;Berlin Institute of Health;;", "aff_domain": "ucl.ac.uk;charite.de;;", "position": "PhD student;Postdoc;;", "bibtex": "@inproceedings{\nvauvelle2023differentiable,\ntitle={Differentiable sorting for censored time-to-event data.},\nauthor={Andre Vauvelle and Benjamin Wild and Roland Eils and Spiros Denaxas},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gYWjI7wLhc}\n}", "github": "", "project": "", "reviewers": "e7KQ;Rdgo;GX1f;RSfV", "pdf_size": 478061, "rating": "5;5;5;7", "confidence": "5;4;4;3", "soundness": "3;3;2;4", "novelty": "3;3;2;4", "presentation": "3;3;2;3", "wc_summary": "40;70;84;204", "wc_strengths": "47;18;27;72", "wc_weaknesses": "197;248;167;112", "wc_questions": "209;204;130;35", "wc_limitations": "62;34;33;17", "wc_review": "555;574;441;440", "wc_reply_reviewers": "480;68;250;109", "wc_reply_authors": "293;392;1204;29", "reply_reviewers": "3;1;2;1", "reply_authors": "3;2;3;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.5, 62.39190652640774 ], "wc_strengths_avg": [ 41.0, 20.74849392124643 ], "wc_weaknesses_avg": [ 181.0, 49.24936547814601 ], "wc_questions_avg": [ 144.5, 70.53545207907865 ], "wc_limitations_avg": [ 36.5, 16.194134740701646 ], "wc_review_avg": [ 502.5, 62.36385170914318 ], "wc_reply_reviewers_avg": [ 226.75, 161.04560689444466 ], "wc_reply_authors_avg": [ 479.5, 438.8305481618161 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9282767310020228335&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ucl.ac.uk;charite.de;;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University College London;Berlin Institute of Health", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://www.bih.org/", "aff_unique_abbr": "UCL;BIH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;Germany" }, { "title": "Best Arm Identification with Fixed Budget: A Large Deviation Perspective", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70845", "id": "gYetLsNO8x", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/364d565b4b726c607aa40e1632045873-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gYetLsNO8x", "openreview": "https://openreview.net/forum?id=gYetLsNO8x", "poster": "/media/PosterPDFs/NeurIPS%202023/70845.png?t=1701375520.8748662", "slides": "https://nips.cc/virtual/2023/poster/70845", "video": "https://nips.cc/virtual/2023/poster/70845", "author_site": "Po-An Wang, Ruo-Chun Tzeng, Alexandre Proutiere", "tldr": "", "abstract": "We consider the problem of identifying the best arm in stochastic Multi-Armed Bandits (MABs) using a fixed sampling budget. Characterizing the minimal instance-specific error probability for this problem constitutes one of the important remaining open problems in MABs. When arms are selected using a static sampling strategy, the error probability decays exponentially with the number of samples at a rate that can be explicitly derived via Large Deviation techniques. Analyzing the performance of algorithms with adaptive sampling strategies is however much more challenging. In this paper, we establish a connection between the Large Deviation Principle (LDP) satisfied by the empirical proportions of arm draws and that satisfied by the empirical arm rewards. This connection holds for any adaptive algorithm, and is leveraged (i) to improve error probability upper bounds of some existing algorithms, such as the celebrated SR (Successive Rejects) algorithm \\cite{audibert2010best}, and (ii) to devise and analyze new algorithms. In particular, we present CR (Continuous Rejects), a truly adaptive algorithm that can reject arms in {\\it any} round based on the observed empirical gaps between the rewards of various arms. Applying our Large Deviation results, we prove that CR enjoys better performance guarantees than existing algorithms, including SR. Extensive numerical experiments confirm this observation.", "keywords": "Best arm identification;Large deviation", "primary_area": "", "supplementary_material": "/attachment/2819381dc0bccc8343d9282643a8814d2c1f9c00.pdf", "author": "Po-An Wang;Ruo-Chun Tzeng;Alexandre Proutiere", "authorids": "~Po-An_Wang1;~Ruo-Chun_Tzeng1;~Alexandre_Proutiere1", "gender": ";F;M", "homepage": ";https://rctzeng.github.io/;https://people.kth.se/~alepro/", "dblp": "203/4451;242/3884;p/AlexandreProutiere", "google_scholar": "https://scholar.google.com.tw/citations?user=kzXIxFYAAAAJ;jntcHQ0AAAAJ;g5sya5cAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Po-An_Wang1;~Ruo-Chun_Tzeng1;~Alexandre_Proutiere1", "aff": "KTH Royal Institute of Technology, Stockholm, Sweden;KTH Royal Institute of Technology, Stockholm, Sweden;KTH Royal Institute of Technology, Stockholm, Sweden", "aff_domain": "kth.se;kth.se;kth.se", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2023best,\ntitle={Best Arm Identification with Fixed Budget: A Large Deviation Perspective},\nauthor={Po-An Wang and Ruo-Chun Tzeng and Alexandre Proutiere},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gYetLsNO8x}\n}", "github": "", "project": "", "reviewers": "64fe;zb7g;vyXZ;ooBf", "pdf_size": 412619, "rating": "6;6;7;8", "confidence": "3;1;4;3", "soundness": "4;3;4;4", "novelty": "3;2;4;4", "presentation": "3;3;4;4", "wc_summary": "74;106;158;94", "wc_strengths": "70;19;49;136", "wc_weaknesses": "51;43;26;19", "wc_questions": "25;66;26;50", "wc_limitations": "1;0;23;9", "wc_review": "221;234;282;308", "wc_reply_reviewers": "18;19;16;18", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 108.0, 31.04834939252005 ], "wc_strengths_avg": [ 68.5, 42.97964634568321 ], "wc_weaknesses_avg": [ 34.75, 12.813567028739499 ], "wc_questions_avg": [ 41.75, 17.210098779495716 ], "wc_limitations_avg": [ 8.25, 9.202581159652981 ], "wc_review_avg": [ 261.25, 35.280128968018246 ], "wc_reply_reviewers_avg": [ 17.75, 1.0897247358851685 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.48420012470625223, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7200800306980351126&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "kth.se;kth.se;kth.se", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "KTH Royal Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kth.se", "aff_unique_abbr": "KTH", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stockholm", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Sweden" }, { "id": "gZiLCwFT61", "title": "Towards Skilled Population Curriculum for Multi-Agent Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent advances in multi-agent reinforcement learning (MARL) allow agents to coordinate their behaviors in complex environments. However, common MARL algorithms still suffer from scalability and sparse reward issues. One promising approach to resolving them is automatic curriculum learning (ACL). ACL involves a student (curriculum learner) training on tasks of increasing difficulty controlled by a teacher (curriculum generator). Despite its success, ACL's applicability is limited by (1) the lack of a general student framework for dealing with the varying number of agents across tasks and the sparse reward problem, and (2) the non-stationarity of the teacher's task due to ever-changing student strategies. As a remedy for ACL, we introduce a novel automatic curriculum learning framework, Skilled Population Curriculum (SPC), which adapts curriculum learning to multi-agent coordination. Specifically, we endow the student with population-invariant communication and a hierarchical skill set, allowing it to learn cooperation and behavior skills from distinct tasks with varying numbers of agents. In addition, we model the teacher as a contextual bandit conditioned by student policies, enabling a team of agents to change its size while still retaining previously acquired skills. We also analyze the inherent non-stationarity of this multi-agent automatic curriculum teaching problem and provide a corresponding regret bound. Empirical results show that our method improves the performance, scalability and sample efficiency in several MARL environments. The source code and a video demonstration can be found at https://sites.google.com/view/marl-spc/.", "keywords": "multi-agent reinforcement learning;multi-agent curriculum;skill learning", "primary_area": "", "supplementary_material": "", "author": "Rundong Wang;Longtao Zheng;Wei Qiu;Bowei He;Bo An;Zinovi Rabinovich;Yujing Hu;Yingfeng Chen;Tangjie Lv;Changjie Fan", "authorids": "~Rundong_Wang1;~Longtao_Zheng1;~Wei_Qiu3;~Bowei_He1;~Bo_An2;~Zinovi_Rabinovich1;~Yujing_Hu2;~Yingfeng_Chen2;~Tangjie_Lv1;~Changjie_Fan1", "gender": "M;M;M;M;M;M;;M;M;M", "homepage": ";https://ltzheng.github.io/;;;https://personal.ntu.edu.sg/boan/;http://zinovi.zinovi.net;;;;http://chyf.ink/", "dblp": "254/1228;293/7155;11/5166-1;179/0894;42/6178-1.html;93/4009;https://dblp.uni-trier.de/pid/160/1923.html;;71/882;37/1835", "google_scholar": "JEVpgE8AAAAJ;https://scholar.google.com/citations?hl=en;gszGlZIAAAAJ;1cH0A9cAAAAJ;PEEpuNwAAAAJ;https://scholar.google.com.tw/citations?user=JwJRnmAAAAAJ;IR5WY-wAAAAJ;EIuWpJcAAAAJ;;SSBrkpMAAAAJ", "orcid": ";;;0000-0002-0360-2950;0000-0002-7064-7438;;;0000-0001-9858-809X;0000-0001-5420-0516;", "linkedin": ";longtaozheng;;;;;;;;", "or_profile": "~Rundong_Wang1;~Longtao_Zheng1;~Wei_Qiu3;~Bowei_He1;~Bo_An2;~Zinovi_Rabinovich1;~Yujing_Hu2;~Tangjie_Lv1;~Changjie_Fan1;~Charles_Chen1", "aff": "Nanyang Technological University;Nanyang Technological University, Singapore;Nanyang Technological University;City University of Hong Kong;Nanyang Technological University;Nanyang Technological University;NetEase, Inc.;NetEase, Inc.;Netease, Fuxi AI Lab;Fuxi AI Lab in Netease", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;my.cityu.edu.hk;ntu.edu.sg;ntu.edu.sg;corp.netease.com;netease.com;corp.netease.com;corp.netease.com", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor;Assistant Professor;Researcher;Researcher;Principal Researcher;Researcher", "bibtex": "@misc{\nwang2023towards,\ntitle={Towards Skilled Population Curriculum for Multi-Agent Reinforcement Learning},\nauthor={Rundong Wang and Longtao Zheng and Wei Qiu and Bowei He and Bo An and Zinovi Rabinovich and Yujing Hu and Yingfeng Chen and Tangjie Lv and Changjie Fan},\nyear={2023},\nurl={https://openreview.net/forum?id=gZiLCwFT61}\n}", "github": "", "project": "", "reviewers": "iWBE;p3Bh;reFd;k74L;oZVt", "site": "https://openreview.net/forum?id=gZiLCwFT61", "pdf_size": 3544284, "rating": "5;5;5;6;7", "confidence": "2;3;4;4;4", "soundness": "3;2;2;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;3;4;2", "wc_summary": "16;141;97;109;96", "wc_strengths": "52;39;71;47;58", "wc_weaknesses": "165;189;73;735;258", "wc_questions": "76;166;98;37;244", "wc_limitations": "39;13;17;111;1", "wc_review": "348;548;356;1039;657", "wc_reply_reviewers": "27;33;12;0;164", "wc_reply_authors": "0;0;0;0;659", "reply_reviewers": "1;1;1;0;2", "reply_authors": "1;1;1;1;3", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 91.8, 41.24754538151331 ], "wc_strengths_avg": [ 53.4, 10.781465577554842 ], "wc_weaknesses_avg": [ 284.0, 233.14544816487413 ], "wc_questions_avg": [ 124.2, 73.09281770461445 ], "wc_limitations_avg": [ 36.2, 39.3669912490147 ], "wc_review_avg": [ 589.6, 253.44553655568686 ], "wc_reply_reviewers_avg": [ 47.2, 59.52948849099914 ], "wc_reply_authors_avg": [ 131.8, 263.6 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.5625, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4727147527686440420&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;1;0;0;2;2;3;3", "aff_unique_norm": "Nanyang Technological University;City University of Hong Kong;NetEase, Inc.;Netease", "aff_unique_dep": ";;;Fuxi AI Lab", "aff_unique_url": "https://www.ntu.edu.sg;https://www.cityu.edu.hk;https://www.163.com;https://www.netease.com", "aff_unique_abbr": "NTU;CityU;NetEase;Netease", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;1;0;0;1;1;1;1", "aff_country_unique": "Singapore;China" }, { "title": "On Private and Robust Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70844", "id": "gaXAjtHic2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6d13e085b79d454da5910e4ca82a3d9d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gaXAjtHic2", "openreview": "https://openreview.net/forum?id=gaXAjtHic2", "poster": "/media/PosterPDFs/NeurIPS%202023/70844.png?t=1699633557.4901803", "slides": "https://nips.cc/virtual/2023/poster/70844", "video": "https://nips.cc/virtual/2023/poster/70844", "author_site": "Yulian Wu, Xingyu Zhou, Youming Tao, Di Wang", "tldr": "", "abstract": "We study private and robust multi-armed bandits (MABs), where the agent receives Huber's contaminated heavy-tailed rewards and meanwhile needs to ensure differential privacy. We consider both the finite $k$-th raw moment and the finite $k$-th central moment settings for heavy-tailed rewards distributions with $k\\ge 2$. We first present its minimax lower bound, characterizing the information-theoretic limit of regret with respect to privacy budget, contamination level, and heavy-tailedness. Then, we propose a meta-algorithm that builds on a private and robust mean estimation sub-routine \\texttt{PRM} that essentially relies on reward truncation and the Laplace mechanism. For the above two different heavy-tailed settings, we give corresponding schemes of \\texttt{PRM}, which enable us to achieve nearly-optimal regrets. Moreover, our two proposed truncation-based or histogram-based \\texttt{PRM} schemes achieve the optimal trade-off between estimation accuracy, privacy and robustness. Finally, we support our theoretical results and show the effectiveness of our algorithms with experimental studies.", "keywords": "Bandits;privacy;robustness", "primary_area": "", "supplementary_material": "/attachment/d0a3c3027cee288bc217d8b870d2fe149d45b5a4.pdf", "author": "Yulian Wu;Xingyu Zhou;Youming Tao;Di Wang", "authorids": "~Yulian_Wu1;~Xingyu_Zhou2;~Youming_Tao1;~Di_Wang1", "gender": "F;M;;", "homepage": "https://cemse.kaust.edu.sa/part/people/person/yulian-wu;http://xingyuzhou.org;https://youmingtao.github.io/;", "dblp": "182/8539;07/10352-1;279/3128;", "google_scholar": "10E7OtIAAAAJ;AsTyRmwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;", "orcid": "0000-0002-7187-2856;;0000-0001-6750-1190;", "linkedin": ";;;", "or_profile": "~Yulian_Wu1;~Xingyu_Zhou2;~Youming_Tao1;~Di_Wang1", "aff": "KAUST;Wayne State University;Shandong University;", "aff_domain": "kaust.edu.sa;wayne.edu;sdu.edu.cn;", "position": "PhD student;Assistant Professor;PhD student;", "bibtex": "@inproceedings{\nwu2023on,\ntitle={On Private and Robust Bandits},\nauthor={Yulian Wu and Xingyu Zhou and Youming Tao and Di Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gaXAjtHic2}\n}", "github": "", "project": "", "reviewers": "Ssa1;xHN8;nGB6;kQhg", "pdf_size": 662808, "rating": "6;6;7;8", "confidence": "1;3;3;4", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "2;3;4;4", "wc_summary": "128;36;70;71", "wc_strengths": "75;36;49;44", "wc_weaknesses": "196;134;127;52", "wc_questions": "35;17;2;6", "wc_limitations": "13;1;5;1", "wc_review": "447;224;253;174", "wc_reply_reviewers": "281;0;23;17", "wc_reply_authors": "517;0;0;0", "reply_reviewers": "2;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 76.25, 33.03312731183652 ], "wc_strengths_avg": [ 51.0, 14.611639196202457 ], "wc_weaknesses_avg": [ 127.25, 51.07531204016281 ], "wc_questions_avg": [ 15.0, 12.786711852544421 ], "wc_limitations_avg": [ 5.0, 4.898979485566356 ], "wc_review_avg": [ 274.5, 103.52415177145862 ], "wc_reply_reviewers_avg": [ 80.25, 116.20967042376465 ], "wc_reply_authors_avg": [ 129.25, 223.8675668782774 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7608859102526822, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4654460595539963018&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "kaust.edu.sa;wayne.edu;sdu.edu.cn;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "King Abdullah University of Science and Technology;Wayne State University;Shandong University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kaust.edu.sa;https://wayne.edu;http://www.sdu.edu.cn", "aff_unique_abbr": "KAUST;WSU;SDU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Saudi Arabia;United States;China" }, { "title": "Semi-Implicit Denoising Diffusion Models (SIDDMs)", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70843", "id": "gaktiSjatl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3882ca2c952276247fe9a993193b00e4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gaktiSjatl", "openreview": "https://openreview.net/forum?id=gaktiSjatl", "poster": "/media/PosterPDFs/NeurIPS%202023/70843.png?t=1701983220.166153", "slides": "https://nips.cc/virtual/2023/poster/70843", "video": "https://nips.cc/virtual/2023/poster/70843", "author_site": "yanwu xu, Mingming Gong, Shaoan Xie, Wei Wei, Matthias Grundmann, Kayhan Batmanghelich, Tingbo Hou", "tldr": "", "abstract": "Despite the proliferation of generative models, achieving fast sampling during inference without compromising sample diversity and quality remains challenging. Existing models such as Denoising Diffusion Probabilistic Models (DDPM) deliver high-quality, diverse samples but are slowed by an inherently high number of iterative steps. The Denoising Diffusion Generative Adversarial Networks (DDGAN) attempted to circumvent this limitation by integrating a GAN model for larger jumps in the diffusion process. However, DDGAN encountered scalability limitations when applied to large datasets. To address these limitations, we introduce a novel approach that tackles the problem by matching implicit and explicit factors. More specifically, our approach involves utilizing an implicit model to match the marginal distributions of noisy data and the explicit conditional distribution of the forward diffusion. This combination allows us to effectively match the joint denoising distributions. Unlike DDPM but similar to DDGAN, we do not enforce a parametric distribution for the reverse step, enabling us to take large steps during inference. Similar to the DDPM but unlike DDGAN, we take advantage of the exact form of the diffusion process. We demonstrate that our proposed method obtains comparable generative performance to diffusion-based models and vastly superior results to models with a small number of sampling steps.", "keywords": "Diffusion Model;GAN;Semi-implicit Modeling", "primary_area": "", "supplementary_material": "/attachment/9ac80c290cb41140e32b3e078dccba52fd2b172d.zip", "author": "yanwu xu;Mingming Gong;Shaoan Xie;Wei Wei;Matthias Grundmann;kayhan Batmanghelich;Tingbo Hou", "authorids": "~yanwu_xu4;~Mingming_Gong1;~Shaoan_Xie4;~Wei_Wei15;~Matthias_Grundmann3;~kayhan_Batmanghelich1;~Tingbo_Hou2", "gender": "M;M;;;M;M;M", "homepage": "https://xuyanwu.github.io/;https://mingming-gong.github.io/;https://shaoan.net;;http://batman-lab.com;https://research.google/people/106438/;http://www.weiwei.one", "dblp": "59/9180-3;98/8479;205/9276.html;67/6472;38/193;35/3986;", "google_scholar": "NOEyacoAAAAJ;https://scholar.google.com.au/citations?user=6BmiCJIAAAAJ;mChB-hQAAAAJ;_8SObXwAAAAJ;PvHFAfIAAAAJ;u-UDZcsAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0001-7147-5589;;;0000-0001-9893-9136;0009-0006-9667-9821;", "linkedin": ";;;;;tingbo-hou/;", "or_profile": "~yanwu_xu4;~Mingming_Gong1;~Shaoan_Xie4;~Matthias_Grundmann3;~kayhan_Batmanghelich1;~Tingbo_Hou2;~wei_wei3", "aff": "Boston University, Boston University;University of Melbourne;Carnegie Mellon University;Google;University of Pittsburgh;Google;Google", "aff_domain": "bu.edu;unimelb.edu.au;cmu.edu;google.com;pitt.edu;google.com;google.com", "position": "PhD student;Assistant Professor;PhD student;Research Scientist;Assistant Professor;Researcher;Research Scientist", "bibtex": "@inproceedings{\nxu2023semiimplicit,\ntitle={Semi-Implicit Denoising Diffusion Models ({SIDDM}s)},\nauthor={yanwu xu and Mingming Gong and Shaoan Xie and Wei Wei and Matthias Grundmann and kayhan Batmanghelich and Tingbo Hou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gaktiSjatl}\n}", "github": "", "project": "", "reviewers": "3HLp;SF8Z;g6aX;6duC;azcW", "pdf_size": 3625603, "rating": "5;5;5;6;6", "confidence": "3;4;2;4;4", "soundness": "3;2;3;3;3", "novelty": "3;2;2;3;3", "presentation": "3;2;3;3;2", "wc_summary": "44;67;100;68;180", "wc_strengths": "58;40;51;63;350", "wc_weaknesses": "34;97;41;52;263", "wc_questions": "24;4;5;47;112", "wc_limitations": "19;4;1;21;31", "wc_review": "179;212;198;251;936", "wc_reply_reviewers": "0;45;1;103;27", "wc_reply_authors": "0;57;0;155;0", "reply_reviewers": "0;1;1;2;1", "reply_authors": "1;3;1;3;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 91.8, 47.566374677917175 ], "wc_strengths_avg": [ 112.4, 119.0505774870496 ], "wc_weaknesses_avg": [ 97.4, 85.65652339431013 ], "wc_questions_avg": [ 38.4, 39.992999387392786 ], "wc_limitations_avg": [ 15.2, 11.178550889985695 ], "wc_review_avg": [ 355.2, 291.35915980109496 ], "wc_reply_reviewers_avg": [ 35.2, 37.86502343852437 ], "wc_reply_authors_avg": [ 42.4, 60.47346525543248 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6123724356957945, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16859180387007076410&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "bu.edu;unimelb.edu.au;cmu.edu;google.com;pitt.edu;google.com;google.com", "author_num": 7, "aff_unique_index": "0;1;2;3;4;3;3", "aff_unique_norm": "Boston University;University of Melbourne;Carnegie Mellon University;Google;University of Pittsburgh", "aff_unique_dep": ";;;Google;", "aff_unique_url": "https://www.bu.edu;https://www.unimelb.edu.au;https://www.cmu.edu;https://www.google.com;https://www.pitt.edu", "aff_unique_abbr": "BU;UniMelb;CMU;Google;Pitt", "aff_campus_unique_index": "0;2;2;2", "aff_campus_unique": "Boston;;Mountain View", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "United States;Australia" }, { "title": "Slot-guided Volumetric Object Radiance Fields", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70842", "id": "ganlU27uvj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cf66f995883298c4db2f0dcba28fb211-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ganlU27uvj", "openreview": "https://openreview.net/forum?id=ganlU27uvj", "poster": "/media/PosterPDFs/NeurIPS%202023/70842.png?t=1701433922.5302033", "slides": "https://nips.cc/virtual/2023/poster/70842", "video": "https://nips.cc/virtual/2023/poster/70842", "author_site": "DI QI, Tong Yang, Xiangyu Zhang", "tldr": "", "abstract": "We present a novel framework for 3D object-centric representation learning. Our approach effectively decomposes complex scenes into individual objects from a single image in an unsupervised fashion. This method, called \\underline{s}lot-guided \\underline{V}olumetric \\underline{O}bject \\underline{R}adiance \\underline{F}ields~(sVORF), composes volumetric object radiance fields with object slots as a guidance to implement unsupervised 3D scene decomposition. Specifically, sVORF obtains object slots from a single image via a transformer module, maps these slots to volumetric object radiance fields with a hypernetwork and composes object radiance fields with the guidance of object slots at a 3D location. Moreover, sVORF significantly reduces memory requirement due to small-sized pixel rendering during training. We demonstrate the effectiveness of our approach by showing top results in scene decomposition and generation tasks of complex synthetic datasets (e.g., Room-Diverse). Furthermore, we also confirm the potential of sVORF to segment objects in real-world scenes (e.g., the LLFF dataset). We hope our approach can provide preliminary understanding of the physical world and help ease future research in 3D object-centric representation learning.", "keywords": "3D object-centric representation learning;NeRF;3D-aware slot", "primary_area": "", "supplementary_material": "/attachment/ca7c5f6acd00adda8f55427eab4e4d49add346f1.pdf", "author": "DI QI;Tong Yang;Xiangyu Zhang", "authorids": "~DI_QI3;~Tong_Yang2;~Xiangyu_Zhang1", "gender": ";M;M", "homepage": ";;", "dblp": ";;95/3760-5.html", "google_scholar": ";yu7ijD0AAAAJ;yuB-cfoAAAAJ", "orcid": ";;0000-0003-2138-4608", "linkedin": ";;", "or_profile": "~DI_QI3;~Tong_Yang2;~Xiangyu_Zhang1", "aff": ";Megvii Technology Inc.;MEGVII Technology", "aff_domain": ";megvii.com;megvii.com", "position": ";Researcher;Principal Researcher", "bibtex": "@inproceedings{\nqi2023slotguided,\ntitle={Slot-guided Volumetric Object Radiance Fields},\nauthor={DI QI and Tong Yang and Xiangyu Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ganlU27uvj}\n}", "github": "", "project": "", "reviewers": "6zB4;thZt;uYMS;JyjT;3Ksj", "pdf_size": 2306174, "rating": "4;5;5;6;7", "confidence": "2;3;3;5;4", "soundness": "2;3;3;3;2", "novelty": "2;3;3;2;3", "presentation": "3;4;3;3;3", "wc_summary": "80;143;57;330;83", "wc_strengths": "57;104;40;76;71", "wc_weaknesses": "182;152;340;386;225", "wc_questions": "25;44;5;264;137", "wc_limitations": "12;1;7;45;33", "wc_review": "356;444;449;1101;549", "wc_reply_reviewers": "0;54;165;149;125", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 138.6, 99.8370672646187 ], "wc_strengths_avg": [ 69.6, 21.265935201631738 ], "wc_weaknesses_avg": [ 257.0, 90.77885216282479 ], "wc_questions_avg": [ 95.0, 95.83944908021958 ], "wc_limitations_avg": [ 19.6, 16.65653025092561 ], "wc_review_avg": [ 579.8, 267.66949770192343 ], "wc_reply_reviewers_avg": [ 98.6, 62.220896811280376 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8076923076923078, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6236377588686782709&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": ";megvii.com;megvii.com", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Megvii Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.megvii.com", "aff_unique_abbr": "Megvii", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Object-Centric Slot Diffusion", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70841", "id": "gbOukzirpK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1b3ceb8a495a63ced4a48f8429ccdcd8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gbOukzirpK", "openreview": "https://openreview.net/forum?id=gbOukzirpK", "poster": "/media/PosterPDFs/NeurIPS%202023/70841.png?t=1702074246.3217368", "slides": "https://nips.cc/virtual/2023/poster/70841", "video": "https://nips.cc/virtual/2023/poster/70841", "author_site": "Jindong Jiang, Fei Deng, Gautam Singh, Sungjin Ahn", "tldr": "", "abstract": "The recent success of transformer-based image generative models in object-centric learning highlights the importance of powerful image generators for handling complex scenes. However, despite the high expressiveness of diffusion models in image generation, their integration into object-centric learning remains largely unexplored in this domain. In this paper, we explore the feasibility and potential of integrating diffusion models into object-centric learning and investigate the pros and cons of this approach. We introduce Latent Slot Diffusion (LSD), a novel model that serves dual purposes: it is the first object-centric learning model to replace conventional slot decoders with a latent diffusion model conditioned on object slots, and it is also the first unsupervised compositional conditional diffusion model that operates without the need for supervised annotations like text. Through experiments on various object-centric tasks, including the first application of the FFHQ dataset in this field, we demonstrate that LSD significantly outperforms state-of-the-art transformer-based decoders, particularly in more complex scenes, and exhibits superior unsupervised compositional generation quality. In addition, we conduct a preliminary investigation into the integration of pre-trained diffusion models in LSD and demonstrate its effectiveness in real-world image segmentation and generation. Project page is available at https://latentslotdiffusion.github.io", "keywords": "Object-Centric Representation Learning;Diffusion Models;Unsupervised Representation Learning", "primary_area": "", "supplementary_material": "", "author": "Jindong Jiang;Fei Deng;Gautam Singh;Sungjin Ahn", "authorids": "~Jindong_Jiang1;~Fei_Deng1;~Gautam_Singh3;~Sungjin_Ahn1", "gender": "M;M;M;", "homepage": "https://www.jindongjiang.me;;https://singhgautam.github.io;", "dblp": "200/8116;46/10037-1;35/2642;", "google_scholar": "6oo8xOQAAAAJ;https://scholar.google.com/citations?hl=en;lXpFxDwAAAAJ;", "orcid": ";;;", "linkedin": ";;gautam-singh-61302463/;", "or_profile": "~Jindong_Jiang1;~Fei_Deng1;~Gautam_Singh3;~Sungjin_Ahn1", "aff": "Rutgers University;Rutgers University;Rutgers University;", "aff_domain": "rutgers.edu;rutgers.edu;rutgers.edu;", "position": "PhD student;PhD student;PhD student;", "bibtex": "@inproceedings{\njiang2023objectcentric,\ntitle={Object-Centric Slot Diffusion},\nauthor={Jindong Jiang and Fei Deng and Gautam Singh and Sungjin Ahn},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gbOukzirpK}\n}", "github": "", "project": "", "reviewers": "QNuS;ZjQw;TTEN;uRVS", "pdf_size": 42884172, "rating": "7;7;7;8", "confidence": "5;4;3;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "4;3;3;3", "wc_summary": "75;115;62;125", "wc_strengths": "75;89;51;138", "wc_weaknesses": "100;141;14;193", "wc_questions": "171;105;9;123", "wc_limitations": "60;64;16;77", "wc_review": "481;514;152;656", "wc_reply_reviewers": "20;23;12;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 94.25, 26.394838510587633 ], "wc_strengths_avg": [ 88.25, 31.77558024647229 ], "wc_weaknesses_avg": [ 112.0, 65.47900426854397 ], "wc_questions_avg": [ 102.0, 58.86425061104575 ], "wc_limitations_avg": [ 54.25, 22.960564017462637 ], "wc_review_avg": [ 450.75, 184.59059428909157 ], "wc_reply_reviewers_avg": [ 13.75, 8.898735865278843 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14381008946298252893&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "rutgers.edu;rutgers.edu;rutgers.edu;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Rutgers University", "aff_unique_dep": "", "aff_unique_url": "https://www.rutgers.edu", "aff_unique_abbr": "Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Synthetic Combinations: A Causal Inference Framework for Combinatorial Interventions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70840", "id": "gbhixjg2dX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3d17b7f7d52c83ab6e97e2dc0bda2e71-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gbhixjg2dX", "openreview": "https://openreview.net/forum?id=gbhixjg2dX", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70840", "video": "https://nips.cc/virtual/2023/poster/70840", "author_site": "Abhineet Agarwal, Anish Agarwal, Suhas Vijaykumar", "tldr": "", "abstract": "We consider a setting where there are $N$ heterogeneous units and $p$ interventions. Our goal is to learn unit-specific potential outcomes for any combination of these $p$ interventions, i.e., $N \\times 2^p$ causal parameters. Choosing a combination of interventions is a problem that naturally arises in a variety of applications such as factorial design experiments and recommendation engines (e.g., showing a set of movies that maximizes engagement for a given user). Running $N \\times 2^p$ experiments to estimate the various parameters is likely expensive and/or infeasible as $N$ and $p$ grow. Further, with observational data there is likely confounding, i.e., whether or not a unit is seen under a combination is correlated with its potential outcome under that combination. We study this problem under a novel model that imposes latent structure across both units and combinations of interventions. Specifically, we assume latent similarity in potential outcomes across units (i.e., the matrix of potential outcomes is approximately rank $r$) and regularity in how combinations of interventions interact (i.e., the coefficients in the Fourier expansion of the potential outcomes is approximately $s$ sparse). We establish identification for all $N \\times 2^p$ parameters despite unobserved confounding. We propose an estimation procedure, Synthetic Combinations, and establish finite-sample consistency under precise conditions on the observation pattern. We show that Synthetic Combinations is able to consistently estimate unit-specific potential outcomes given a total of $\\text{poly}(r) \\times \\left( N + s^2p\\right)$ observations. In comparison, previous methods that do not exploit structure across both units and combinations have poorer sample complexity scaling as $\\min(N \\times s^2p, \\ \\ r \\times (N + 2^p))$.", "keywords": "Causal Inference;Matrix Completion;Combinatorial Learning;Ranking", "primary_area": "", "supplementary_material": "", "author": "Abhineet Agarwal;Anish Agarwal;Suhas Vijaykumar", "authorids": "~Abhineet_Agarwal1;~Anish_Agarwal1;~Suhas_Vijaykumar1", "gender": "M;;M", "homepage": ";https://sites.google.com/view/anishagarwal;https://sites.google.com/view/suhasv/home", "dblp": "304/4687;;183/8660", "google_scholar": ";;QYoK7RIAAAAJ", "orcid": ";;", "linkedin": "abhineet-agarwal-126171185/;;suhas-vijaykumar-a04618ab/", "or_profile": "~Abhineet_Agarwal1;~Anish_Agarwal1;~Suhas_Vijaykumar1", "aff": "University of California, Berkeley;Columbia University;Massachusetts Institute of Technology", "aff_domain": "berkeley.edu;columbia.edu;mit.edu", "position": "PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nagarwal2023synthetic,\ntitle={Synthetic Combinations: A Causal Inference Framework for Combinatorial Interventions},\nauthor={Abhineet Agarwal and Anish Agarwal and Suhas Vijaykumar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gbhixjg2dX}\n}", "github": "", "project": "", "reviewers": "z6nm;rsAG;qb2F;Wabz", "pdf_size": 722605, "rating": "7;7;7;7", "confidence": "4;3;3;1", "soundness": "4;3;3;4", "novelty": "4;4;3;4", "presentation": "4;2;4;4", "wc_summary": "65;54;170;71", "wc_strengths": "49;105;141;25", "wc_weaknesses": "158;182;70;1", "wc_questions": "2;343;24;1", "wc_limitations": "1;1;1;1", "wc_review": "275;685;406;99", "wc_reply_reviewers": "25;69;0;0", "wc_reply_authors": "78;42;78;78", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 90.0, 46.58862522118462 ], "wc_strengths_avg": [ 80.0, 45.639894828976104 ], "wc_weaknesses_avg": [ 102.75, 72.03948570055175 ], "wc_questions_avg": [ 92.5, 144.918080307462 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 366.25, 213.8520224360761 ], "wc_reply_reviewers_avg": [ 23.5, 28.182441342083905 ], "wc_reply_authors_avg": [ 69.0, 15.588457268119896 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4555540623595180037&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "berkeley.edu;columbia.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Berkeley;Columbia University;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;https://www.columbia.edu;https://web.mit.edu", "aff_unique_abbr": "UC Berkeley;Columbia;MIT", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Towards Optimal Caching and Model Selection for Large Model Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70839", "id": "gd20oaZqqF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b914a8fcea5c176cf1ed75c762ce27fd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gd20oaZqqF", "openreview": "https://openreview.net/forum?id=gd20oaZqqF", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70839", "video": "https://nips.cc/virtual/2023/poster/70839", "author_site": "Banghua Zhu, Ying Sheng, Lianmin Zheng, Clark Barrett, Michael Jordan, Jiantao Jiao", "tldr": "", "abstract": "Large Language Models (LLMs) and other large foundation models have achieved impressive results, but their size exacerbates existing resource consumption and latency challenges. In particular, the large-scale deployment of these models is hindered by the significant resource requirements during inference. In this paper, we study two approaches for mitigating these challenges: employing a cache to store previous queries and learning a model selector to choose from an ensemble of models for query processing.\n\nTheoretically, we provide an optimal algorithm for jointly optimizing both approaches to reduce the inference cost in both offline and online tabular settings. \nBy combining a caching algorithm, namely Greedy Dual Size with Frequency (GDSF) or Least Expected Cost (LEC), with a model selector, we achieve optimal rates in both offline and online settings. Empirically, simulations show that our caching and model selection algorithm greatly improves over the baselines, with up to $50\\times$ improvement over the baseline when the ratio between the maximum cost and minimum cost is $100$. Experiments on real datasets show a $4.3\\times$ improvement in FLOPs over the baseline when the ratio for FLOPs is $10$, and a $1.8\\times$ improvement in latency when the ratio for average latency is $1.85$.", "keywords": "caching;model selection;large language models;foundation models;inference;bandit;regret", "primary_area": "", "supplementary_material": "/attachment/17b5b9cba89a98ea4d47fe96f5cd519212fde9c7.zip", "author": "Banghua Zhu;Ying Sheng;Lianmin Zheng;Clark Barrett;Michael Jordan;Jiantao Jiao", "authorids": "~Banghua_Zhu1;~Ying_Sheng1;~Lianmin_Zheng2;~Clark_Barrett1;~Michael_Jordan1;~Jiantao_Jiao1", "gender": "M;F;M;M;M;M", "homepage": "https://people.eecs.berkeley.edu/~banghua/;https://sites.google.com/view/yingsheng;http://lmzheng.net/;http://theory.stanford.edu/~barrett;http://www.cs.berkeley.edu/~jordan/;https://scholar.google.com/citations?user=aO8KpGcAAAAJ&hl=en", "dblp": "204/5394;262/6232.html;211/7027;b/ClarkWBarrett;j/MichaelIJordan;43/8919", "google_scholar": "https://scholar.google.com/citations?hl=en;xMhGYpgAAAAJ;_7Q8uIYAAAAJ;https://scholar.google.com.tw/citations?user=BtwmZfQAAAAJ;https://scholar.google.com.tw/citations?user=yxUduqMAAAAJ;aO8KpGcAAAAJ", "orcid": ";0000-0002-1883-2126;;0000-0002-9522-3084;0000-0001-8935-817X;", "linkedin": ";;;clark-barrett-a5b157/;;", "or_profile": "~Banghua_Zhu1;~Ying_Sheng1;~Lianmin_Zheng2;~Clark_Barrett1;~Michael_Jordan1;~Jiantao_Jiao1", "aff": "University of California, Berkeley;Stanford University;University of California, Berkeley;Stanford University;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;stanford.edu;berkeley.edu;stanford.edu;berkeley.edu;berkeley.edu", "position": "PhD student;PhD student;PhD student;Professor (Research);Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhu2023towards,\ntitle={Towards Optimal Caching and Model Selection for Large Model Inference},\nauthor={Banghua Zhu and Ying Sheng and Lianmin Zheng and Clark Barrett and Michael Jordan and Jiantao Jiao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gd20oaZqqF}\n}", "github": "", "project": "", "reviewers": "FCsT;13j8;rcfj;puCe;X4Ge", "pdf_size": 1756555, "rating": "5;6;7;7;8", "confidence": "3;3;3;3;1", "soundness": "3;3;3;4;3", "novelty": "2;3;3;3;3", "presentation": "2;3;3;4;3", "wc_summary": "57;38;89;526;40", "wc_strengths": "21;44;146;50;58", "wc_weaknesses": "340;107;130;82;36", "wc_questions": "43;233;65;155;29", "wc_limitations": "18;4;100;15;9", "wc_review": "479;426;530;828;172", "wc_reply_reviewers": "85;57;19;34;22", "wc_reply_authors": "34;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 2.6, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 150.0, 188.886209131318 ], "wc_strengths_avg": [ 63.8, 42.90640977756121 ], "wc_weaknesses_avg": [ 139.0, 105.22737286466862 ], "wc_questions_avg": [ 105.0, 77.59381418644143 ], "wc_limitations_avg": [ 29.2, 35.72897983430257 ], "wc_review_avg": [ 487.0, 210.304541082688 ], "wc_reply_reviewers_avg": [ 43.4, 24.727312834192073 ], "wc_reply_authors_avg": [ 6.8, 13.6 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6864064729836441, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1739851536693835609&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "berkeley.edu;stanford.edu;berkeley.edu;stanford.edu;berkeley.edu;berkeley.edu", "author_num": 6, "aff_unique_index": "0;1;0;1;0;0", "aff_unique_norm": "University of California, Berkeley;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.stanford.edu", "aff_unique_abbr": "UC Berkeley;Stanford", "aff_campus_unique_index": "0;1;0;1;0;0", "aff_campus_unique": "Berkeley;Stanford", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Finding Safe Zones of Markov Decision Processes Policies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70838", "id": "gdVcFOvxT3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dfaa29ed28dfa175bcc5e2a54aa199f8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gdVcFOvxT3", "openreview": "https://openreview.net/forum?id=gdVcFOvxT3", "poster": "/media/PosterPDFs/NeurIPS%202023/70838.png?t=1702014658.5170026", "slides": "https://nips.cc/virtual/2023/poster/70838", "video": "https://nips.cc/virtual/2023/poster/70838", "author_site": "Lee Cohen, Yishay Mansour, Michal Moshkovitz", "tldr": "", "abstract": "Given a policy of a Markov Decision Process, we define a SafeZone as a subset of states, such that most of the policy's trajectories are confined to this subset. The quality of a SafeZone is parameterized by the number of states and the escape probability, i.e., the probability that a random trajectory will leave the subset. SafeZones are especially interesting when they have a small number of states and low escape probability. We study the complexity of finding optimal SafeZones, and show that in general, the problem is computationally hard. For this reason, we concentrate on finding approximate SafeZones. Our main result is a bi-criteria approximation learning algorithm with a factor of almost $2$ approximation for both the escape probability and \\newprob size, using a polynomial size sample complexity.", "keywords": "Theoretical guarantees;algorithms;learning theory;MDP;computational complexity;Interpretability", "primary_area": "", "supplementary_material": "/attachment/2ab537c8815ed7016623954ea03b231429d69853.zip", "author": "Lee Cohen;Yishay Mansour;Michal Moshkovitz", "authorids": "~Lee_Cohen1;~Yishay_Mansour2;~Michal_Moshkovitz2", "gender": "F;F;M", "homepage": "https://sites.google.com/view/leecohen;https://sites.google.com/view/michal-moshkovitz;https://www.cs.tau.ac.il/~mansour/", "dblp": "162/2494.html;87/8727;m/YishayMansour", "google_scholar": ";G3eFbR0AAAAJ;OEJUgwkAAAAJ", "orcid": ";0009-0006-9456-9944;0000-0001-6891-2645", "linkedin": ";;", "or_profile": "~Lee_Cohen1;~Michal_Moshkovitz2;~Yishay_Mansour1", "aff": "Toyota Technological Institute at Chicago;Bosch Center for AI;School of Computer Science, Tel Aviv University", "aff_domain": "ttic.edu;bosch.com;cs.tau.ac.il", "position": "Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\ncohen2023finding,\ntitle={Finding Safe Zones of Markov Decision Processes Policies},\nauthor={Lee Cohen and Yishay Mansour and Michal Moshkovitz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gdVcFOvxT3}\n}", "github": "", "project": "", "reviewers": "N6Et;EfcG;JjfH;YYwR", "pdf_size": 2441631, "rating": "5;5;5;6", "confidence": "3;3;3;3", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "2;1;2;2", "wc_summary": "134;198;94;61", "wc_strengths": "113;113;58;78", "wc_weaknesses": "334;338;363;31", "wc_questions": "35;39;2;3", "wc_limitations": "1;1;8;1", "wc_review": "617;689;525;174", "wc_reply_reviewers": "15;16;80;0", "wc_reply_authors": "0;0;39;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 121.75, 51.050832510351874 ], "wc_strengths_avg": [ 90.5, 23.58495283014151 ], "wc_weaknesses_avg": [ 266.5, 136.41939011738765 ], "wc_questions_avg": [ 19.75, 17.311484627264065 ], "wc_limitations_avg": [ 2.75, 3.031088913245535 ], "wc_review_avg": [ 501.25, 197.6769776681139 ], "wc_reply_reviewers_avg": [ 27.75, 30.825111516424396 ], "wc_reply_authors_avg": [ 9.75, 16.887495373796554 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ljqfScoJ_PMJ:scholar.google.com/&scioq=Finding+Safe+Zones+of+Markov+Decision+Processes+Policies&hl=en&as_sdt=0,7", "gs_version_total": 5, "email": "ttic.edu;bosch.com;cs.tau.ac.il", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Toyota Technological Institute at Chicago;Bosch Center for AI;Tel Aviv University", "aff_unique_dep": ";Center for AI;School of Computer Science", "aff_unique_url": "https://www.tti-chicago.org;https://www.bosch-ai.com;https://www.tau.ac.il", "aff_unique_abbr": "TTI Chicago;BCAI;TAU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Chicago;;Tel Aviv", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United States;Germany;Israel" }, { "title": "xTrimoGene: An Efficient and Scalable Representation Learner for Single-Cell RNA-Seq Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70837", "id": "gdwcoBCMVi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/db68f1c25678f72561ab7c97ce15d912-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gdwcoBCMVi", "openreview": "https://openreview.net/forum?id=gdwcoBCMVi", "poster": "/media/PosterPDFs/NeurIPS%202023/70837.png?t=1699462184.3030171", "slides": "https://nips.cc/virtual/2023/poster/70837", "video": "https://nips.cc/virtual/2023/poster/70837", "author_site": "Jing Gong, Minsheng Hao, Xingyi Cheng, Xin Zeng, Chiming Liu, Jianzhu Ma, Xuegong Zhang, Taifeng Wang, Le Song", "tldr": "", "abstract": "Advances in high-throughput sequencing technology have led to significant progress in measuring gene expressions at the single-cell level. The amount of publicly available single-cell RNA-seq (scRNA-seq) data is already surpassing 50M records for humans with each record measuring 20,000 genes. This highlights the need for unsupervised representation learning to fully ingest these data, yet classical transformer architectures are prohibitive to train on such data in terms of both computation and memory. To address this challenge, we propose a novel asymmetric encoder-decoder transformer for scRNA-seq data, called xTrimoGene$^\\alpha$ (or xTrimoGene for short), which leverages the sparse characteristic of the data to scale up the pre-training. This scalable design of xTrimoGene reduces FLOPs by one to two orders of magnitude compared to classical transformers while maintaining high accuracy, enabling us to train the largest transformer models over the largest scRNA-seq dataset today. Our experiments also show that the performance of xTrimoGene improves as we scale up the model sizes, and it also leads to SOTA performance over various downstream tasks, such as cell type annotation, perturb-seq effect prediction, and drug combination prediction. \nxTrimoGene model is now available for use as a service via the following link: https://api.biomap.com/xTrimoGene/apply.", "keywords": "pre-train;encoder-decoder;scRNA-seq;scalable", "primary_area": "", "supplementary_material": "/attachment/d2f0df05fb8591e56c949dcd921619faba2ce4cb.pdf", "author": "Jing Gong;Minsheng Hao;Xingyi Cheng;Xin Zeng;Chiming Liu;Jianzhu Ma;Xuegong Zhang;Taifeng Wang;Le Song", "authorids": "~Jing_Gong1;~Minsheng_Hao1;~Xingyi_Cheng3;~Xin_Zeng5;~Chiming_Liu1;~Jianzhu_Ma2;zhangxg@tsinghua.edu.cn;~Taifeng_Wang2;~Le_Song1", "gender": "M;M;M;Not Specified;;M;;M;M", "homepage": "https://github.com/Tsinghua-gongjing;;;https://github.com/isZengxin/isZengxin;;https://majianzhu.com/;;https://scholar.google.com/citations?user=aMNBEk0AAAAJ&hl=zh-CN;http://www.cc.gatech.edu/~lsong", "dblp": "14/1793;309/3607;206/6376;;;24/9080.html;;01/1483;94/3481", "google_scholar": "zxgmfL8AAAAJ;UA6g8C8AAAAJ;shO7XmIAAAAJ;;;;;aMNBEk0AAAAJ;Xl4E0CsAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Jing_Gong1;~Minsheng_Hao1;~Xingyi_Cheng3;~Xin_Zeng5;~Chiming_Liu1;~Jianzhu_Ma2;zhangxg@tsinghua.edu.cn;~Taifeng_Wang2;~Le_Song1", "aff": ";Tsinghua University;BioMap;Biomap;;Tsinghua University;;BioMap;College of Computing, Georgia Institute of Technology", "aff_domain": ";tsinghua.edu.cn;biomap.com;biomap.com;;tsinghua.edu.cn;;biomap.com;cc.gatech.edu", "position": ";PhD student;Principal Researcher;Researcher;;Associate Professor;;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\ngong2023xtrimogene,\ntitle={xTrimoGene: An Efficient and Scalable Representation Learner for Single-Cell {RNA}-Seq Data},\nauthor={Jing Gong and Minsheng Hao and Xingyi Cheng and Xin Zeng and Chiming Liu and Jianzhu Ma and Xuegong Zhang and Taifeng Wang and Le Song},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gdwcoBCMVi}\n}", "github": "", "project": "", "reviewers": "MmMn;N2nG;TD4Y;9yec", "pdf_size": 1164701, "rating": "5;7;7;7", "confidence": "4;4;4;4", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "48;30;46;49", "wc_strengths": "67;56;59;101", "wc_weaknesses": "1090;81;42;416", "wc_questions": "246;5;29;3", "wc_limitations": "28;8;33;31", "wc_review": "1479;180;209;600", "wc_reply_reviewers": "845;24;25;133", "wc_reply_authors": "838;99;198;297", "reply_reviewers": "3;1;1;2", "reply_authors": "3;2;2;3", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 43.25, 7.725768570181222 ], "wc_strengths_avg": [ 70.75, 17.92170471802278 ], "wc_weaknesses_avg": [ 407.25, 420.1400808063901 ], "wc_questions_avg": [ 70.75, 101.69654615570776 ], "wc_limitations_avg": [ 25.0, 9.974968671630002 ], "wc_review_avg": [ 617.0, 524.5869803950533 ], "wc_reply_reviewers_avg": [ 256.75, 342.50282845547423 ], "wc_reply_authors_avg": [ 358.0, 285.8329931970765 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13528688777787083218&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": ";tsinghua.edu.cn;biomap.com;biomap.com;;tsinghua.edu.cn;;biomap.com;cc.gatech.edu", "author_num": 9, "aff_unique_index": "0;1;1;0;1;2", "aff_unique_norm": "Tsinghua University;BioMap;Georgia Institute of Technology", "aff_unique_dep": ";;College of Computing", "aff_unique_url": "https://www.tsinghua.edu.cn;;https://www.gatech.edu", "aff_unique_abbr": "THU;;Georgia Tech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Atlanta", "aff_country_unique_index": "0;0;2", "aff_country_unique": "China;;United States" }, { "title": "How do Minimum-Norm Shallow Denoisers Look in Function Space?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70836", "id": "gdzxWGGxWE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b39cef2ef90591cffdc9c674cd55bebe-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gdzxWGGxWE", "openreview": "https://openreview.net/forum?id=gdzxWGGxWE", "poster": "/media/PosterPDFs/NeurIPS%202023/70836.png?t=1701692899.4166253", "slides": "https://nips.cc/virtual/2023/poster/70836", "video": "https://nips.cc/virtual/2023/poster/70836", "author_site": "Chen Zeno, Greg Ongie, Yaniv Blumenfeld, Nir Weinberger, Daniel Soudry", "tldr": "", "abstract": "Neural network (NN) denoisers are an essential building block in many common tasks, ranging from image reconstruction to image generation. However, the success of these models is not well understood from a theoretical perspective. In this paper, we aim to characterize the functions realized by shallow ReLU NN denoisers --- in the common theoretical setting of interpolation (i.e., zero training loss) with a minimal representation cost (i.e., minimal $\\ell^2$ norm weights). First, for univariate data, we derive a closed form for the NN denoiser function, find it is contractive toward the clean data points, and prove it generalizes better than the empirical MMSE estimator at a low noise level. Next, for multivariate data, we find the NN denoiser functions in a closed form under various geometric assumptions on the training data: data contained in a low-dimensional subspace, data contained in a union of one-sided rays, or several types of simplexes. These functions decompose into a sum of simple rank-one piecewise linear interpolations aligned with edges and/or faces connecting training samples. \nWe empirically verify this alignment phenomenon on synthetic data and real images.", "keywords": "Denoiser;Denoising;Neural network;Function space", "primary_area": "", "supplementary_material": "/attachment/a062736d6e72a3ca2e475ab5ed2f80977d425e45.pdf", "author": "Chen Zeno;Greg Ongie;Yaniv Blumenfeld;Nir Weinberger;Daniel Soudry", "authorids": "~Chen_Zeno1;~Greg_Ongie1;~Yaniv_Blumenfeld1;~Nir_Weinberger1;~Daniel_Soudry1", "gender": "M;;M;M;M", "homepage": ";;;https://sites.google.com/view/nir-weinberger/home;https://soudry.github.io/", "dblp": ";;;82/11151.html;126/1779", "google_scholar": "zYd6mEUAAAAJ;;https://scholar.google.com/citations?hl=en;zRkNfH8AAAAJ;https://scholar.google.co.il/citations?user=AEBWEm8AAAAJ", "orcid": ";;;;0000-0001-9368-6352", "linkedin": "chen-zeno-471b28200/;;yaniv-blumenfeld-6103b516b/;;daniel-soudry-2aa3a88/", "or_profile": "~Chen_Zeno1;~Greg_Ongie1;~Yaniv_Blumenfeld1;~Nir_Weinberger1;~Daniel_Soudry1", "aff": "Technion - Israel Institute of Technology, Technion;;;Electrical Engineering Department, Technion \u2013 Israel Institute of Technology, Technion - Israel Institute of Technology;Technion - Israel Institute of Technology, Technion", "aff_domain": "campus.technion.ac.il;;;ee.technion.ac.il;technion.ac.il", "position": "PhD student;;;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nzeno2023how,\ntitle={How do Minimum-Norm Shallow Denoisers Look in Function Space?},\nauthor={Chen Zeno and Greg Ongie and Yaniv Blumenfeld and Nir Weinberger and Daniel Soudry},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gdzxWGGxWE}\n}", "github": "", "project": "", "reviewers": "qSF7;7Tj7;4XGM;kiMs", "pdf_size": 1938856, "rating": "5;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;3;4", "novelty": "2;3;2;3", "presentation": "2;4;3;4", "wc_summary": "120;101;120;115", "wc_strengths": "106;70;75;91", "wc_weaknesses": "124;82;334;147", "wc_questions": "6;86;90;84", "wc_limitations": "9;7;1;7", "wc_review": "365;346;620;444", "wc_reply_reviewers": "63;14;55;24", "wc_reply_authors": "266;0;427;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 114.0, 7.7781745930520225 ], "wc_strengths_avg": [ 85.5, 14.150971698084906 ], "wc_weaknesses_avg": [ 171.75, 96.53075934643837 ], "wc_questions_avg": [ 66.5, 34.99642838919423 ], "wc_limitations_avg": [ 6.0, 3.0 ], "wc_review_avg": [ 443.75, 108.19051483378753 ], "wc_reply_reviewers_avg": [ 39.0, 20.506096654409877 ], "wc_reply_authors_avg": [ 173.25, 182.36141998789108 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7037362232890507998&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "campus.technion.ac.il;;;ee.technion.ac.il;technion.ac.il", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Technion - Israel Institute of Technology;Technion \u2013 Israel Institute of Technology", "aff_unique_dep": ";Electrical Engineering Department", "aff_unique_url": "https://www.technion.ac.il;https://www.technion.ac.il", "aff_unique_abbr": "Technion;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "title": "Combating Representation Learning Disparity with Geometric Harmonization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70835", "id": "geLARFEK8O", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/40bb79c081828bebdc39d65a82367246-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=geLARFEK8O", "openreview": "https://openreview.net/forum?id=geLARFEK8O", "poster": "/media/PosterPDFs/NeurIPS%202023/70835.png?t=1699436032.259549", "slides": "https://nips.cc/virtual/2023/poster/70835", "video": "https://nips.cc/virtual/2023/poster/70835", "author_site": "Zhihan Zhou, Jiangchao Yao, Feng Hong, Ya Zhang, Bo Han, Yanfeng Wang", "tldr": "", "abstract": "Self-supervised learning (SSL) as an effective paradigm of representation learning has achieved tremendous success on various curated datasets in diverse scenarios. Nevertheless, when facing the long-tailed distribution in real-world applications, it is still hard for existing methods to capture transferable and robust representation. The attribution is that the vanilla SSL methods that pursue the sample-level uniformity easily leads to representation learning disparity, where head classes with the huge sample number dominate the feature regime but tail classes with the small sample number passively collapse. To address this problem, we propose a novel Geometric Harmonization (GH) method to encourage the category-level uniformity in representation learning, which is more benign to the minority and almost does not hurt the majority under long-tailed distribution. Specially, GH measures the population statistics of the embedding space on top of self-supervised learning, and then infer an fine-grained instance-wise calibration to constrain the space expansion of head classes and avoid the passive collapse of tail classes. Our proposal does not alter the setting of SSL and can be easily integrated into existing methods in a low-cost manner. Extensive results on a range of benchmark datasets show the effectiveness of \\methodspace with high tolerance to the distribution skewness.", "keywords": "Long-tailed learning;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/b8192be41af790ee55193fd5c634371b15e7569a.pdf", "author": "Zhihan Zhou;Jiangchao Yao;Feng Hong;Ya Zhang;Bo Han;Yanfeng Wang", "authorids": "~Zhihan_Zhou2;~Jiangchao_Yao1;~Feng_Hong1;~Ya_Zhang1;~Bo_Han1;~Yanfeng_Wang1", "gender": ";M;M;F;M;M", "homepage": ";https://sunarker.github.io/;;https://annzhanglion.github.io/;https://cmic.sjtu.edu.cn/wangyanfeng/;https://bhanml.github.io/", "dblp": "226/5688-2;166/5900;68/1260-4;85/3714-2;55/5407-1.html;241/0472-3", "google_scholar": ";w8oDh9QAAAAJ;DCTAaNQAAAAJ;pbjw9sMAAAAJ;https://scholar.google.com/citations?hl=zh-CN;nTNjqHwAAAAJ", "orcid": "0000-0002-9475-465X;;;0000-0002-5390-9053;0000-0002-3196-2347;", "linkedin": ";;;;;", "or_profile": "~Zhihan_Zhou2;~Jiangchao_Yao1;~Feng_Hong1;~Ya_Zhang1;~Yanfeng_Wang1;~bo_han2", "aff": "Shanghai Jiaotong University;Shanghai Artificial Intelligence Laboratory;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;RIKEN", "aff_domain": "sjtu.edu.cn;pjlab.org.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;riken.jp", "position": "PhD student;Researcher;PhD student;Professor;Full Professor;Adjunct Scientist", "bibtex": "@inproceedings{\nzhou2023combating,\ntitle={Combating Representation Learning Disparity with Geometric Harmonization},\nauthor={Zhihan Zhou and Jiangchao Yao and Feng Hong and Ya Zhang and Bo Han and Yanfeng Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=geLARFEK8O}\n}", "github": "", "project": "", "reviewers": "R4di;G4d9;krnL;pT3P;TBJn;M9kH", "pdf_size": 2456355, "rating": "6;6;6;7;7;7", "confidence": "2;4;4;3;4;4", "soundness": "2;2;3;3;3;3", "novelty": "3;2;3;3;3;3", "presentation": "2;3;3;3;3;4", "wc_summary": "107;74;95;65;91;89", "wc_strengths": "49;41;62;103;103;105", "wc_weaknesses": "1041;154;244;141;103;128", "wc_questions": "34;67;5;64;4;4", "wc_limitations": "38;5;1;13;1;34", "wc_review": "1269;341;407;386;302;360", "wc_reply_reviewers": "111;33;67;24;15;34", "wc_reply_authors": "70;41;58;45;41;50", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "2;2;2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.7637626158259734 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 86.83333333333333, 13.764890926637314 ], "wc_strengths_avg": [ 77.16666666666667, 27.20549372624744 ], "wc_weaknesses_avg": [ 301.8333333333333, 333.4633496436386 ], "wc_questions_avg": [ 29.666666666666668, 27.438820836342234 ], "wc_limitations_avg": [ 15.333333333333334, 15.195028426721974 ], "wc_review_avg": [ 510.8333333333333, 340.6823626521077 ], "wc_reply_reviewers_avg": [ 47.333333333333336, 32.6938662273046 ], "wc_reply_authors_avg": [ 50.833333333333336, 10.382945418115014 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.21821789023599236, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10058718690149638057&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "sjtu.edu.cn;pjlab.org.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;riken.jp", "author_num": 6, "aff_unique_index": "0;1;0;0;0;2", "aff_unique_norm": "Shanghai Jiao Tong University;Shanghai Artificial Intelligence Laboratory;RIKEN", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.shailab.org/;https://www.riken.jp", "aff_unique_abbr": "SJTU;Shanghai AI Lab;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;Japan" }, { "title": "Learning From Biased Soft Labels", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70834", "id": "gevmGxsTSI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bad8ddaed5feb552f9e8f2e37c0531a1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gevmGxsTSI", "openreview": "https://openreview.net/forum?id=gevmGxsTSI", "poster": "/media/PosterPDFs/NeurIPS%202023/70834.png?t=1699552614.492435", "slides": "https://nips.cc/virtual/2023/poster/70834", "video": "https://nips.cc/virtual/2023/poster/70834", "author_site": "Hua Yuan, Yu Shi, Ning Xu, Xu Yang, Xin Geng, Yong Rui", "tldr": "", "abstract": "Since the advent of knowledge distillation, many researchers have been intrigued by the $\\textit{dark knowledge}$ hidden in the soft labels generated by the teacher model. This prompts us to scrutinize the circumstances under which these soft labels are effective. Predominant existing theories implicitly require that the soft labels are close to the ground-truth labels. In this paper, however, we investigate whether biased soft labels are still effective. Here, bias refers to the discrepancy between the soft labels and the ground-truth labels. We present two indicators to measure the effectiveness of the soft labels. Based on the two indicators, we propose moderate conditions to ensure that, the biased soft label learning problem is both $\\textit{classifier-consistent}$ and $\\textit{Empirical Risk Minimization}$ (ERM) $\\textit{learnable}$, which can be applicable even for large-biased soft labels. We further design a heuristic method to train Skillful but Bad Teachers (SBTs), and these teachers with accuracy less than 30\\% can teach students to achieve accuracy over 90\\% on CIFAR-10, which is comparable to models trained on the original data. The proposed indicators adequately measure the effectiveness of the soft labels generated in this process. Moreover, our theoretical framework can be adapted to elucidate the effectiveness of soft labels in three weakly-supervised learning paradigms, namely incomplete supervision, partial label learning and learning with additive noise. Experimental results demonstrate that our indicators can measure the effectiveness of biased soft labels generated by teachers or in these weakly-supervised learning paradigms.", "keywords": "Soft label; knowledge distillation; weakly-supervised learning; Machine learning.", "primary_area": "", "supplementary_material": "", "author": "Hua Yuan;Yu Shi;Ning Xu;Xu Yang;Xin Geng;Yong Rui", "authorids": "~Hua_Yuan1;~Yu_Shi5;~Ning_Xu5;~Xu_Yang5;~Xin_Geng1;~Yong_Rui2", "gender": "M;;M;M;M;M", "homepage": "http://palm.seu.edu.cn/homepage/yuanhua/demo/index.html;;http://palm.seu.edu.cn/xuning/;;http://palm.seu.edu.cn/xgeng/index.htm;", "dblp": ";;04/5856-9;63/1534-21.html;;r/YongRui", "google_scholar": ";;;SqdxMH0AAAAJ;ZOCxkIcAAAAJ;rCGsLtcAAAAJ", "orcid": ";;;0000-0002-8276-2679;;", "linkedin": ";;;;;", "or_profile": "~Hua_Yuan1;~Yu_Shi5;~Ning_Xu5;~Xu_Yang5;~Xin_Geng1;~Yong_Rui2", "aff": "Southeast University;;Southeast University;Southeast University;Southeast University, China;Lenovo", "aff_domain": "seu.edu.cn;;seu.edu.cn;seu.edu.cn;seu.edu.cn;lenovo.com", "position": "PhD student;;Assistant Professor;Associate Professor;Professor;Full Professor", "bibtex": "@inproceedings{\nyuan2023learning,\ntitle={Learning From Biased Soft Labels},\nauthor={Hua Yuan and Yu Shi and Ning Xu and Xu Yang and Xin Geng and Yong Rui},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gevmGxsTSI}\n}", "github": "", "project": "", "reviewers": "vNqZ;mHw2;HDpR;M44N;XfEm", "pdf_size": 679341, "rating": "5;5;6;6;6", "confidence": "3;3;3;4;5", "soundness": "3;2;4;3;3", "novelty": "2;3;3;3;4", "presentation": "3;2;3;3;3", "wc_summary": "86;16;70;90;269", "wc_strengths": "23;77;27;115;93", "wc_weaknesses": "132;374;251;83;42", "wc_questions": "267;3;27;11;48", "wc_limitations": "82;22;1;10;67", "wc_review": "590;492;376;309;519", "wc_reply_reviewers": "84;19;213;25;0", "wc_reply_authors": "32;32;110;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;1;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 106.2, 85.58130637002452 ], "wc_strengths_avg": [ 67.0, 36.3758161420469 ], "wc_weaknesses_avg": [ 176.4, 121.1356264688469 ], "wc_questions_avg": [ 71.2, 99.10075680841192 ], "wc_limitations_avg": [ 36.4, 32.165820368832506 ], "wc_review_avg": [ 457.2, 101.21541384591579 ], "wc_reply_reviewers_avg": [ 68.2, 77.65925572653913 ], "wc_reply_authors_avg": [ 34.8, 40.23133107417651 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6123724356957945, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15467611333049345805&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "seu.edu.cn;;seu.edu.cn;seu.edu.cn;seu.edu.cn;lenovo.com", "author_num": 6, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Southeast University;Lenovo Group Limited", "aff_unique_dep": ";", "aff_unique_url": "https://www.seu.edu.cn/;https://www.lenovo.com", "aff_unique_abbr": "SEU;Lenovo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning to Configure Separators in Branch-and-Cut", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70833", "id": "gf5xJVQS5p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bcdec1c2d60f94a93b6e36f937aa0530-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gf5xJVQS5p", "openreview": "https://openreview.net/forum?id=gf5xJVQS5p", "poster": "/media/PosterPDFs/NeurIPS%202023/70833.png?t=1701722989.5642345", "slides": "https://nips.cc/virtual/2023/poster/70833", "video": "https://nips.cc/virtual/2023/poster/70833", "author_site": "Sirui Li, Wenbin Ouyang, Max Paulus, Cathy Wu", "tldr": "", "abstract": "Cutting planes are crucial in solving mixed integer linear programs (MILP) as they facilitate bound improvements on the optimal solution. Modern MILP solvers rely on a variety of separators to generate a diverse set of cutting planes by invoking the separators frequently during the solving process. This work identifies that MILP solvers can be drastically accelerated by appropriately selecting separators to activate. As the combinatorial separator selection space imposes challenges for machine learning, we *learn to separate* by proposing a novel data-driven strategy to restrict the selection space and a learning-guided algorithm on the restricted space. Our method predicts instance-aware separator configurations which can dynamically adapt during the solve, effectively accelerating the open source MILP solver SCIP by improving the relative solve time up to 72% and 37% on synthetic and real-world MILP benchmarks. Our work complements recent work on learning to select cutting planes and highlights the importance of separator management.", "keywords": "Combinatorial Optimization;Branch-and-Cut;Learning Guided Optimization;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/d0966c872dfeedd02732cec2554d38bbc5cc3b07.pdf", "author": "Sirui Li;Wenbin Ouyang;Max B. Paulus;Cathy Wu", "authorids": "~Sirui_Li1;~Wenbin_Ouyang1;~Max_B._Paulus1;~Cathy_Wu1", "gender": ";M;F;M", "homepage": "https://siruil.github.io/;;http://wucathy.com;https://ml.inf.ethz.ch/people/person-detail.MjIyMDk5.TGlzdC8xODA3LC0xNzg2MjE4NDI4.html", "dblp": ";169/1321;155/3740;267/5373", "google_scholar": "Q4VMj_sAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": ";;0000-0001-8594-303X;", "linkedin": ";wenbin-ouyang-996180208/;cathywu/;", "or_profile": "~Sirui_Li1;~Wenbin_Ouyang1;~Cathy_Wu1;~Max_B_Paulus1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;ethz.ch", "position": "PhD student;PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nli2023learning,\ntitle={Learning to Configure Separators in Branch-and-Cut},\nauthor={Sirui Li and Wenbin Ouyang and Max B. Paulus and Cathy Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gf5xJVQS5p}\n}", "github": "", "project": "", "reviewers": "wvgK;LDTP;JiCg;TfAQ", "pdf_size": 873362, "rating": "4;5;6;8", "confidence": "4;3;4;4", "soundness": "4;3;3;3", "novelty": "2;2;2;4", "presentation": "3;1;3;4", "wc_summary": "41;71;43;181", "wc_strengths": "60;93;33;87", "wc_weaknesses": "179;261;207;273", "wc_questions": "258;142;2;344", "wc_limitations": "1;97;1;11", "wc_review": "539;664;286;896", "wc_reply_reviewers": "108;68;156;187", "wc_reply_authors": "56;436;521;292", "reply_reviewers": "1;1;2;2", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 84.0, 57.245087125446844 ], "wc_strengths_avg": [ 68.25, 23.84716964337697 ], "wc_weaknesses_avg": [ 230.0, 38.535697735995385 ], "wc_questions_avg": [ 186.5, 128.39295151993352 ], "wc_limitations_avg": [ 27.5, 40.33298897924626 ], "wc_review_avg": [ 596.25, 220.21168792777553 ], "wc_reply_reviewers_avg": [ 129.75, 45.42232380669223 ], "wc_reply_authors_avg": [ 326.25, 176.19644576437972 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.29277002188455997, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13759965396779585514&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "mit.edu;mit.edu;mit.edu;ethz.ch", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.ethz.ch", "aff_unique_abbr": "MIT;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;Switzerland" }, { "title": "Reducing Shape-Radiance Ambiguity in Radiance Fields with a Closed-Form Color Estimation Method", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70832", "id": "gh9JNeqjzo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b87738474533cab76c7bee4e08443aca-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gh9JNeqjzo", "openreview": "https://openreview.net/forum?id=gh9JNeqjzo", "poster": "/media/PosterPDFs/NeurIPS%202023/70832.png?t=1698913595.5740855", "slides": "https://nips.cc/virtual/2023/poster/70832", "video": "https://nips.cc/virtual/2023/poster/70832", "author_site": "Qihang Fang, Qihang Fang, Yafei Song, Keqiang Li, Liefeng Bo", "tldr": "", "abstract": "A neural radiance field (NeRF) enables the synthesis of cutting-edge realistic novel view images of a 3D scene. It includes density and color fields to model the shape and radiance of a scene, respectively. Supervised by the photometric loss in an end-to-end training manner, NeRF inherently suffers from the shape-radiance ambiguity problem, i.e., it can perfectly fit training views but does not guarantee decoupling the two fields correctly. To deal with this issue, existing works have incorporated prior knowledge to provide an independent supervision signal for the density field, including total variation loss, sparsity loss, distortion loss, etc. These losses are based on general assumptions about the density field, e.g., it should be smooth, sparse, or compact, which are not adaptive to a specific scene. In this paper, we propose a more adaptive method to reduce the shape-radiance ambiguity. The key is a rendering method that is only based on the density field. Specifically, we first estimate the color field based on the density field and posed images in a closed form. Then NeRF's rendering process can proceed. We address the problems in estimating the color field, including occlusion and non-uniformly distributed views. Afterwards, it is applied to regularize NeRF's density field. As our regularization is guided by photometric loss, it is more adaptive compared to existing ones. Experimental results show that our method improves the density field of NeRF both qualitatively and quantitatively. Our code is available at https://github.com/qihangGH/Closed-form-color-field.", "keywords": "Neural radiance field;Novel-view synthesis;Regularization", "primary_area": "", "supplementary_material": "", "author": "Qihang Fang;Yafei Song;Keqiang Li;Liefeng Bo", "authorids": "~Qihang_Fang2;~Yafei_Song1;~Keqiang_Li1;~Liefeng_Bo1", "gender": "M;M;M;M", "homepage": ";https://uestcjay.github.io/;https://research.cs.washington.edu/istc/lfb/;", "dblp": ";49/8134;17/6808;", "google_scholar": "VMO6UOgAAAAJ;;FJwtMf0AAAAJ;TjhAoVoAAAAJ", "orcid": ";;;0000-0003-1438-0094", "linkedin": ";;;", "or_profile": "~Yafei_Song1;~Keqiang_Li1;~Liefeng_Bo1;~Qihang_Fang1", "aff": "Alibaba Group;Institute of Automation, Chinese Academy of Sciences;Alibaba Group;Alibaba Group", "aff_domain": "alibaba-inc.com;ia.ac.cn;alibaba-inc.com;alibaba-inc.com", "position": "Researcher;MS student;Principal Researcher;Intern", "bibtex": "@inproceedings{\nfang2023reducing,\ntitle={Reducing Shape-Radiance Ambiguity in Radiance Fields with a Closed-Form Color Estimation Method},\nauthor={Qihang Fang and Yafei Song and Keqiang Li and Liefeng Bo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gh9JNeqjzo}\n}", "github": "", "project": "", "reviewers": "hBrk;LJiP;Siz9;qn5t", "pdf_size": 2184434, "rating": "4;6;6;7", "confidence": "4;5;5;4", "soundness": "2;2;3;2", "novelty": "2;3;3;3", "presentation": "1;3;3;3", "wc_summary": "117;93;130;104", "wc_strengths": "53;40;46;65", "wc_weaknesses": "325;285;285;476", "wc_questions": "29;174;35;102", "wc_limitations": "11;42;8;33", "wc_review": "535;634;504;780", "wc_reply_reviewers": "0;115;16;233", "wc_reply_authors": "0;0;0;284", "reply_reviewers": "0;1;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 111.0, 13.874436925511608 ], "wc_strengths_avg": [ 51.0, 9.300537618869138 ], "wc_weaknesses_avg": [ 342.75, 78.64596302417563 ], "wc_questions_avg": [ 85.0, 58.834513680322026 ], "wc_limitations_avg": [ 23.5, 14.396180048887969 ], "wc_review_avg": [ 613.25, 107.5810740790405 ], "wc_reply_reviewers_avg": [ 91.0, 93.06718003678849 ], "wc_reply_authors_avg": [ 71.0, 122.97560733739029 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2294157338705618, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4479755148296732830&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "alibaba-inc.com;ia.ac.cn;alibaba-inc.com;alibaba-inc.com", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Alibaba Group;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "https://www.alibaba.com;http://www.ia.cas.cn", "aff_unique_abbr": "Alibaba;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Hierarchical Semi-Implicit Variational Inference with Application to Diffusion Model Acceleration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70831", "id": "ghIBaprxsV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9bbb3c3aa33616c55521e2f826c132bd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ghIBaprxsV", "openreview": "https://openreview.net/forum?id=ghIBaprxsV", "poster": "/media/PosterPDFs/NeurIPS%202023/70831.png?t=1701488025.0988364", "slides": "https://nips.cc/virtual/2023/poster/70831", "video": "https://nips.cc/virtual/2023/poster/70831", "author_site": "Longlin Yu, Tianyu Xie, Yu Zhu, Tong Yang, Xiangyu Zhang, Cheng Zhang", "tldr": "", "abstract": "Semi-implicit variational inference (SIVI) has been introduced to expand the analytical variational families by defining expressive semi-implicit distributions in a hierarchical manner. However, the single-layer architecture commonly used in current SIVI methods can be insufficient when the target posterior has complicated structures. In this paper, we propose hierarchical semi-implicit variational inference, called HSIVI, which generalizes SIVI to allow more expressive multi-layer construction of semi-implicit distributions. By introducing auxiliary distributions that interpolate between a simple base distribution and the target distribution, the conditional layers can be trained by progressively matching these auxiliary distributions one layer after another. Moreover, given pre-trained score networks, HSIVI can be used to accelerate the sampling process of diffusion models with the score matching objective. We show that HSIVI significantly enhances the expressiveness of SIVI on several Bayesian inference problems with complicated target distributions. When used for diffusion model acceleration, we show that HSIVI can produce high quality samples comparable to or better than the existing fast diffusion model based samplers with a small number of function evaluations on various datasets.", "keywords": "Hierarchical semi-implicit variational inference;Score based training;Diffusion model", "primary_area": "", "supplementary_material": "/attachment/b034820136ac32db4ef48abda736d357a2e012ab.zip", "author": "Longlin Yu;Tianyu Xie;Yu Zhu;Tong Yang;Xiangyu Zhang;Cheng Zhang", "authorids": "~Longlin_Yu1;~Tianyu_Xie1;~Yu_Zhu13;~Tong_Yang2;~Xiangyu_Zhang1;~Cheng_Zhang3", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/longinYu;https://tyuxie.github.io;https://github.com/zhuyu-cs;;;https://zcrabbit.github.io", "dblp": ";345/3987-1;;;95/3760-5.html;", "google_scholar": ";qbJJQ_AAAAAJ;;yu7ijD0AAAAJ;yuB-cfoAAAAJ;PddDrLgAAAAJ", "orcid": ";;0000-0001-8836-7939;;0000-0003-2138-4608;", "linkedin": ";;;;;", "or_profile": "~Longlin_Yu1;~Tianyu_Xie1;~Yu_Zhu13;~Tong_Yang2;~Xiangyu_Zhang1;~Cheng_Zhang3", "aff": "Peking University;Peking University;Institute of automation, Chinese academy of science;Megvii Technology Inc.;MEGVII Technology;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;ia.ac.cn;megvii.com;megvii.com;pku.edu.cn", "position": "PhD student;PhD student;PhD student;Researcher;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nyu2023hierarchical,\ntitle={Hierarchical Semi-Implicit Variational Inference with Application to Diffusion Model Acceleration},\nauthor={Longlin Yu and Tianyu Xie and Yu Zhu and Tong Yang and Xiangyu Zhang and Cheng Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ghIBaprxsV}\n}", "github": "", "project": "", "reviewers": "AGbR;MCSt;EivT;PvNK;T15h;i7TH", "pdf_size": 38740412, "rating": "3;6;6;6;6;7", "confidence": "2;3;4;4;2;4", "soundness": "4;2;3;3;3;3", "novelty": "1;2;2;3;3;3", "presentation": "3;4;3;3;3;3", "wc_summary": "37;272;56;36;71;81", "wc_strengths": "8;72;43;48;114;52", "wc_weaknesses": "204;345;36;76;93;44", "wc_questions": "39;299;118;5;91;201", "wc_limitations": "29;88;17;11;1;1", "wc_review": "317;1076;270;176;370;379", "wc_reply_reviewers": "76;0;28;0;8;0", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "1;0;1;0;1;0", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.1666666666666665, 0.8975274678557507 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 2.3333333333333335, 0.7453559924999298 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 92.16666666666667, 82.07601490217604 ], "wc_strengths_avg": [ 56.166666666666664, 32.07499891746772 ], "wc_weaknesses_avg": [ 133.0, 109.66919956548116 ], "wc_questions_avg": [ 125.5, 99.20643460313785 ], "wc_limitations_avg": [ 24.5, 29.9874973947477 ], "wc_review_avg": [ 431.3333333333333, 296.19569356911467 ], "wc_reply_reviewers_avg": [ 18.666666666666668, 27.487370837451067 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6451791670811048, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11392180956287393227&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;pku.edu.cn;ia.ac.cn;megvii.com;megvii.com;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;2;0", "aff_unique_norm": "Peking University;Chinese Academy of Sciences;Megvii Technology", "aff_unique_dep": ";Institute of Automation;", "aff_unique_url": "http://www.pku.edu.cn;http://www.ia.cas.cn;https://www.megvii.com", "aff_unique_abbr": "Peking U;CAS;Megvii", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Scaling Laws for Hyperparameter Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70830", "id": "ghzEUGfRMD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/945c781d7194ea81026148838af95af7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ghzEUGfRMD", "openreview": "https://openreview.net/forum?id=ghzEUGfRMD", "poster": "/media/PosterPDFs/NeurIPS%202023/70830.png?t=1702079531.025543", "slides": "https://nips.cc/virtual/2023/poster/70830", "video": "https://nips.cc/virtual/2023/poster/70830", "author_site": "Arlind Kadra, Maciej Janowski, Martin Wistuba, Josif Grabocka", "tldr": "", "abstract": "Hyperparameter optimization is an important subfield of machine learning that focuses on tuning the hyperparameters of a chosen algorithm to achieve peak performance. Recently, there has been a stream of methods that tackle the issue of hyperparameter optimization, however, most of the methods do not exploit the dominant power law nature of learning curves for Bayesian optimization. In this work, we propose Deep Power Laws (DPL), an ensemble of neural network models conditioned to yield predictions that follow a power-law scaling pattern. Our method dynamically decides which configurations to pause and train incrementally by making use of gray-box evaluations. We compare our method against 7 state-of-the-art competitors on 3 benchmarks related to tabular, image, and NLP datasets covering 59 diverse tasks. Our method achieves the best results across all benchmarks by obtaining the best any-time results compared to all competitors.", "keywords": "hyperparameter optimization;multi-fidelity hyperparameter optimization;multi-fidelity hpo;power laws;deep neural networks;deep power laws;deep ensemble;deep learning;large language models;scaling laws;llm", "primary_area": "", "supplementary_material": "/attachment/07ad251fc3eed70f72831f8571d8907a4c7c76a9.pdf", "author": "Arlind Kadra;Maciej Janowski;Martin Wistuba;Josif Grabocka", "authorids": "~Arlind_Kadra1;~Maciej_Janowski1;~Martin_Wistuba1;~Josif_Grabocka1", "gender": "M;M;M;M", "homepage": ";https://github.com/worstseed;;https://www.utn.de/departments/department-engineering/machine-learning-lab/", "dblp": "252/5295;219/8260;https://dblp.uni-trier.de/pers/hd/w/Wistuba:Martin;117/4936", "google_scholar": "bMa0KUcAAAAJ;;https://scholar.google.co.uk/citations?user=pTULHVsAAAAJ;KRy27XcAAAAJ", "orcid": "0000-0001-9308-6576;;;", "linkedin": ";;https://linkedin.com/in/wistuba/;", "or_profile": "~Arlind_Kadra1;~Maciej_Janowski1;~Martin_Wistuba1;~Josif_Grabocka1", "aff": "Universit\u00e4t Freiburg;Albert-Ludwigs-Universit\u00e4t Freiburg;Amazon;Universit\u00e4t Freiburg", "aff_domain": "uni-freiburg.de;uni-freiburg.de;amazon.com;uni-freiburg.de", "position": "PhD student;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nkadra2023scaling,\ntitle={Scaling Laws for Hyperparameter Optimization},\nauthor={Arlind Kadra and Maciej Janowski and Martin Wistuba and Josif Grabocka},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ghzEUGfRMD}\n}", "github": "", "project": "", "reviewers": "ALT2;sM9u;wp1o;rbxf;7buU", "pdf_size": 8395219, "rating": "5;6;7;7;7", "confidence": "1;3;3;3;4", "soundness": "3;3;3;4;4", "novelty": "2;3;3;3;3", "presentation": "3;2;4;4;3", "wc_summary": "22;80;102;99;70", "wc_strengths": "90;88;14;68;184", "wc_weaknesses": "165;91;187;173;596", "wc_questions": "23;168;4;98;272", "wc_limitations": "1;28;5;68;14", "wc_review": "301;455;312;506;1136", "wc_reply_reviewers": "0;16;0;31;32", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 74.6, 28.855502074994295 ], "wc_strengths_avg": [ 88.8, 54.94142335251245 ], "wc_weaknesses_avg": [ 242.4, 179.90619778095473 ], "wc_questions_avg": [ 113.0, 98.50076141837687 ], "wc_limitations_avg": [ 23.2, 24.243762084297064 ], "wc_review_avg": [ 542.0, 307.4677218831271 ], "wc_reply_reviewers_avg": [ 15.8, 14.09113196304683 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8675276172357091, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9513320358291647849&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "uni-freiburg.de;uni-freiburg.de;amazon.com;uni-freiburg.de", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Freiburg;Albert-Ludwigs-Universit\u00e4t Freiburg;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.uni-freiburg.de;https://www.uni-freiburg.de;https://www.amazon.com", "aff_unique_abbr": "Uni Freiburg;Albert-Ludwigs-Universit\u00e4t;Amazon", "aff_campus_unique_index": "1", "aff_campus_unique": ";Freiburg", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Germany;United States" }, { "title": "SimMTM: A Simple Pre-Training Framework for Masked Time-Series Modeling", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70829", "id": "ginTcBUnL8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5f9bfdfe3685e4ccdbc0e7fb29cccf2a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ginTcBUnL8", "openreview": "https://openreview.net/forum?id=ginTcBUnL8", "poster": "/media/PosterPDFs/NeurIPS%202023/70829.png?t=1697798641.4338367", "slides": "https://nips.cc/virtual/2023/poster/70829", "video": "https://nips.cc/virtual/2023/poster/70829", "author_site": "Jiaxiang Dong, Jiaxiang Dong, Haixu Wu, Haoran Zhang, Li Zhang, Jianmin Wang, Mingsheng Long", "tldr": "", "abstract": "Time series analysis is widely used in extensive areas. Recently, to reduce labeling expenses and benefit various tasks, self-supervised pre-training has attracted immense interest. One mainstream paradigm is masked modeling, which successfully pre-trains deep models by learning to reconstruct the masked content based on the unmasked part. However, since the semantic information of time series is mainly contained in temporal variations, the standard way of randomly masking a portion of time points will seriously ruin vital temporal variations of time series, making the reconstruction task too difficult to guide representation learning. We thus present SimMTM, a Simple pre-training framework for Masked Time-series Modeling. By relating masked modeling to manifold learning, SimMTM proposes to recover masked time points by the weighted aggregation of multiple neighbors outside the manifold, which eases the reconstruction task by assembling ruined but complementary temporal variations from multiple masked series. SimMTM further learns to uncover the local structure of the manifold, which is helpful for masked modeling. Experimentally, SimMTM achieves state-of-the-art fine-tuning performance compared to the most advanced time series pre-training methods in two canonical time series analysis tasks: forecasting and classification, covering both in- and cross-domain settings.", "keywords": "Time-series analysis;pre-training;masked time-series modeling", "primary_area": "", "supplementary_material": "/attachment/729113596cb5c91f613b9f361a53bfda34cf3227.pdf", "author": "Jiaxiang Dong;Haixu Wu;Haoran Zhang;Li Zhang;Jianmin Wang;Mingsheng Long", "authorids": "~Jiaxiang_Dong1;~Haixu_Wu1;~Haoran_Zhang9;lizhang@tsinghua.edu.cn;~Jianmin_Wang1;~Mingsheng_Long5", "gender": ";M;M;;M;", "homepage": ";;https://www.thss.tsinghua.edu.cn/;;https://www.thss.tsinghua.edu.cn/en/faculty/jianminwang.htm;", "dblp": ";286/8115;;;06/3456-1.html;", "google_scholar": ";oLL_x0wAAAAJ;;;https://scholar.google.com.tw/citations?user=MiovcboAAAAJ;", "orcid": ";;0009-0004-3245-459X;;0000-0001-6841-7943;", "linkedin": ";;;;;", "or_profile": "~Jiaxiang_Dong1;~Haixu_Wu1;~Haoran_Zhang9;lizhang@tsinghua.edu.cn;~Jianmin_Wang1;~Mingsheng_Long5", "aff": ";Tsinghua University;Tsinghua University;;Tsinghua University;", "aff_domain": ";tsinghua.edu.cn;mails.tsinghua.edu.cn;;tsinghua.edu.cn;", "position": ";PhD student;Undergrad student;;Full Professor;", "bibtex": "@inproceedings{\ndong2023simmtm,\ntitle={Sim{MTM}: A Simple Pre-Training Framework for Masked Time-Series Modeling},\nauthor={Jiaxiang Dong and Haixu Wu and Haoran Zhang and Li Zhang and Jianmin Wang and Mingsheng Long},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ginTcBUnL8}\n}", "github": "", "project": "", "reviewers": "ZNHw;xbHA;5Qee;L1Yx", "pdf_size": 2743504, "rating": "5;5;7;8", "confidence": "3;4;3;3", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "2;4;3;3", "wc_summary": "46;53;108;92", "wc_strengths": "46;23;45;74", "wc_weaknesses": "319;74;81;19", "wc_questions": "554;58;33;32", "wc_limitations": "76;16;1;9", "wc_review": "1041;224;268;226", "wc_reply_reviewers": "345;13;0;6", "wc_reply_authors": "616;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "4;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.75, 25.9939896899264 ], "wc_strengths_avg": [ 47.0, 18.096961070853858 ], "wc_weaknesses_avg": [ 123.25, 115.53868399804456 ], "wc_questions_avg": [ 169.25, 222.37960225704154 ], "wc_limitations_avg": [ 25.5, 29.635283025474887 ], "wc_review_avg": [ 439.75, 347.57616071876964 ], "wc_reply_reviewers_avg": [ 91.0, 146.71911940848065 ], "wc_reply_authors_avg": [ 154.0, 266.73582436560713 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 129, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3960822226035154817&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": ";tsinghua.edu.cn;mails.tsinghua.edu.cn;;tsinghua.edu.cn;", "author_num": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Creating Multi-Level Skill Hierarchies in Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70828", "id": "gjBk6IQofa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/97b73904e88cc1dc0a3485595eda3753-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gjBk6IQofa", "openreview": "https://openreview.net/forum?id=gjBk6IQofa", "poster": "/media/PosterPDFs/NeurIPS%202023/70828.png?t=1702071766.991021", "slides": "https://nips.cc/virtual/2023/poster/70828", "video": "https://nips.cc/virtual/2023/poster/70828", "author_site": "Joshua B. Evans, \u00d6zg\u00fcr \u015eim\u015fek", "tldr": "", "abstract": "What is a useful skill hierarchy for an autonomous agent? We propose an answer based on a graphical representation of how the interaction between an agent and its environment may unfold. Our approach uses modularity maximisation as a central organising principle to expose the structure of the interaction graph at multiple levels of abstraction. The result is a collection of skills that operate at varying time scales, organised into a hierarchy, where skills that operate over longer time scales are composed of skills that operate over shorter time scales. The entire skill hierarchy is generated automatically, with no human input, including the skills themselves (their behaviour, when they can be called, and when they terminate) as well as the dependency structure between them. In a wide range of environments, this approach generates skill hierarchies that are intuitively appealing and that considerably improve the learning performance of the agent.", "keywords": "Reinforcement Learning;Hierarchical Reinforcement Learning;RL;HRL;Skill Discovery;Skill Hierarchies;Graph-Based;Graphs;Graph Clustering;Graph Partitioning", "primary_area": "", "supplementary_material": "/attachment/67b840a00b31cfbe2018181c468f40ff3464d612.pdf", "author": "Joshua Benjamin Evans;\u00d6zg\u00fcr \u015eim\u015fek", "authorids": "~Joshua_Benjamin_Evans1;~\u00d6zg\u00fcr_\u015eim\u015fek1", "gender": "M;F", "homepage": "https://people.bath.ac.uk/jbe25/;https://researchportal.bath.ac.uk/en/persons/\u00f6zg\u00fcr-\u015fim\u015fek", "dblp": "349/7622;", "google_scholar": "-8FHC20AAAAJ;https://scholar.google.co.uk/citations?user=z1BYZG0AAAAJ", "orcid": "0009-0003-8593-600X;0000-0001-5449-0437", "linkedin": "jb-evans/;", "or_profile": "~Joshua_Benjamin_Evans1;~\u00d6zg\u00fcr_\u015eim\u015fek1", "aff": "University of Bath;University of Bath", "aff_domain": "bath.ac.uk;bath.ac.uk", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nevans2023creating,\ntitle={Creating Multi-Level Skill Hierarchies in Reinforcement Learning},\nauthor={Joshua Benjamin Evans and {\\\"O}zg{\\\"u}r {\\c{S}}im{\\c{s}}ek},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gjBk6IQofa}\n}", "github": "", "project": "", "reviewers": "UjdQ;7zy2;4HvS;cG6b", "pdf_size": 6011286, "rating": "2;5;7;7", "confidence": "4;4;4;4", "soundness": "1;3;3;3", "novelty": "1;2;3;3", "presentation": "2;3;3;4", "wc_summary": "52;148;58;207", "wc_strengths": "57;35;106;155", "wc_weaknesses": "213;163;51;170", "wc_questions": "143;179;19;108", "wc_limitations": "378;4;13;11", "wc_review": "843;529;247;651", "wc_reply_reviewers": "1262;0;33;56", "wc_reply_authors": "1995;0;0;0", "reply_reviewers": "2;0;1;1", "reply_authors": "5;1;1;1", "rating_avg": [ 5.25, 2.0463381929681126 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 116.25, 64.7393813686847 ], "wc_strengths_avg": [ 88.25, 46.32156625158523 ], "wc_weaknesses_avg": [ 149.25, 59.868084151741485 ], "wc_questions_avg": [ 112.25, 59.402756670040155 ], "wc_limitations_avg": [ 101.5, 159.6723207071282 ], "wc_review_avg": [ 567.5, 216.2608378787061 ], "wc_reply_reviewers_avg": [ 337.75, 533.9870667909477 ], "wc_reply_authors_avg": [ 498.75, 863.8603402749776 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1271991674156595498&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "bath.ac.uk;bath.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Bath", "aff_unique_dep": "", "aff_unique_url": "https://www.bath.ac.uk", "aff_unique_abbr": "Bath", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Structure of universal formulas", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70827", "id": "gmVoaAxB1R", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ac04e54e0a2d1927d60709019e4e7870-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gmVoaAxB1R", "openreview": "https://openreview.net/forum?id=gmVoaAxB1R", "poster": "/media/PosterPDFs/NeurIPS%202023/70827.png?t=1702242730.5412235", "slides": "https://nips.cc/virtual/2023/poster/70827", "video": "https://nips.cc/virtual/2023/poster/70827", "tldr": "", "abstract": "By universal formulas we understand parameterized analytic expressions that have a fixed complexity, but nevertheless can approximate any continuous function on a compact set. There exist various examples of such formulas, including some in the form of neural networks. In this paper we analyze the essential structural elements of these highly expressive models. We introduce a hierarchy of expressiveness classes connecting the global approximability property to the weaker property of infinite VC dimension, and prove a series of classification results for several increasingly complex functional families. In particular, we introduce a general family of polynomially-exponentially-algebraic functions that, as we prove, is subject to polynomial constraints. As a consequence, we show that fixed-size neural networks with not more than one layer of neurons having transcendental activations (e.g., sine or standard sigmoid) cannot in general approximate functions on arbitrary finite sets. On the other hand, we give examples of functional families, including two-hidden-layer neural networks, that approximate functions on arbitrary finite sets, but fail to do that on the whole domain of definition.", "keywords": "VC-dimension;neural networks;activation functions;approximation;polynomials;algebraic functions", "primary_area": "", "supplementary_material": "", "author": "Dmitry Yarotsky", "authorids": "~Dmitry_Yarotsky1", "gender": "M", "homepage": "http://yarotsky.info", "dblp": "132/4661", "google_scholar": "https://scholar.google.ru/citations?user=wNSSr_gAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Dmitry_Yarotsky1", "aff": "Skolkovo Institute of Science and Technology", "aff_domain": "skoltech.ru", "position": "Associate Professor", "bibtex": "@inproceedings{\nyarotsky2023structure,\ntitle={Structure of universal formulas},\nauthor={Dmitry Yarotsky},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gmVoaAxB1R}\n}", "github": "", "project": "", "reviewers": "FCbX;2cHj;BXX4;Lm8e", "pdf_size": 457389, "rating": "5;6;6;7", "confidence": "5;4;3;3", "soundness": "3;3;4;4", "novelty": "2;2;3;3", "presentation": "2;3;4;4", "wc_summary": "102;40;383;81", "wc_strengths": "12;7;117;85", "wc_weaknesses": "826;61;94;63", "wc_questions": "68;21;129;58", "wc_limitations": "10;1;1;50", "wc_review": "1018;130;724;337", "wc_reply_reviewers": "862;26;0;16", "wc_reply_authors": "625;451;0;0", "reply_reviewers": "3;1;0;1", "reply_authors": "4;3;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 151.5, 135.5036899866568 ], "wc_strengths_avg": [ 55.25, 47.16129239111244 ], "wc_weaknesses_avg": [ 261.0, 326.46515893736654 ], "wc_questions_avg": [ 69.0, 38.8136573901507 ], "wc_limitations_avg": [ 15.5, 20.254629100529094 ], "wc_review_avg": [ 552.25, 343.16495668992775 ], "wc_reply_reviewers_avg": [ 226.0, 367.31185660144433 ], "wc_reply_authors_avg": [ 269.0, 275.94474084497426 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.8528028654224418, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4472115378458667934&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "skoltech.ru", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Skolkovo Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.skoltech.ru", "aff_unique_abbr": "Skoltech", "aff_country_unique_index": "0", "aff_country_unique": "Russian Federation" }, { "title": "Coop: Memory is not a Commodity", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70826", "id": "gmmXyAq8TI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9c534edc7ac1d6438216311be6d42eb2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gmmXyAq8TI", "openreview": "https://openreview.net/forum?id=gmmXyAq8TI", "poster": "/media/PosterPDFs/NeurIPS%202023/70826.png?t=1702311912.9908643", "slides": "https://nips.cc/virtual/2023/poster/70826", "video": "https://nips.cc/virtual/2023/poster/70826", "author_site": "Jianhao Zhang, Shihan Ma, Peihong Liu, Jinhui Yuan", "tldr": "", "abstract": "Tensor rematerialization allows the training of deep neural networks (DNNs) under limited memory budgets by checkpointing the models and recomputing the evicted tensors as needed. However, the existing tensor rematerialization techniques overlook the memory system in deep learning frameworks and implicitly assume that free memory blocks at different addresses are identical. Under this flawed assumption, discontiguous tensors are evicted, among which some are not used to allocate the new tensor. This leads to severe memory fragmentation and increases the cost of potential rematerializations.\n\nTo address this issue, we propose to evict tensors within a sliding window to ensure all evictions are contiguous and are immediately used. Furthermore, we proposed cheap tensor partitioning and recomputable in-place to further reduce the rematerialization cost by optimizing the tensor allocation.\n\nWe named our method Coop as it is a co-optimization of tensor allocation and tensor rematerialization. We evaluated Coop on eight representative DNNs. The experimental results demonstrate that Coop achieves up to $2\\times$ memory saving and hugely reduces compute overhead, search latency, and memory fragmentation compared to the state-of-the-art baselines.", "keywords": "Tensor Rematerialization; Gradient Checkpointing; Activation Recomputing; Deep Learning; Deep Learning Frameworks; Memory Allocator", "primary_area": "", "supplementary_material": "/attachment/99c1f92127183d6bdc4d3c7a1a77b9ab4aade21a.pdf", "author": "Jianhao Zhang;Shihan Ma;Peihong Liu;Jinhui Yuan", "authorids": "~Jianhao_Zhang1;mmasss@sjtu.edu.cn;liupeihong@oneflow.org;yuanjinhui@oneflow.org", "gender": ";;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Jianhao_Zhang1;mmasss@sjtu.edu.cn;liupeihong@oneflow.org;yuanjinhui@oneflow.org", "aff": ";;;", "aff_domain": ";;;", "position": ";;;", "bibtex": "@inproceedings{\nzhang2023coop,\ntitle={Coop: Memory is not a Commodity},\nauthor={Jianhao Zhang and Shihan Ma and Peihong Liu and Jinhui Yuan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gmmXyAq8TI}\n}", "github": "", "project": "", "reviewers": "bdP2;BV8L;UBH8;ks3C", "pdf_size": 598407, "rating": "6;6;7;7", "confidence": "2;3;5;3", "soundness": "3;2;3;3", "novelty": "2;3;3;2", "presentation": "3;2;3;3", "wc_summary": "88;18;43;65", "wc_strengths": "54;5;41;45", "wc_weaknesses": "14;5;362;399", "wc_questions": "63;303;8;21", "wc_limitations": "10;5;24;16", "wc_review": "229;336;478;546", "wc_reply_reviewers": "9;35;210;19", "wc_reply_authors": "0;0;113;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 53.5, 25.947061490658243 ], "wc_strengths_avg": [ 36.25, 18.64638034579366 ], "wc_weaknesses_avg": [ 195.0, 185.98790283241541 ], "wc_questions_avg": [ 98.75, 119.66280750508906 ], "wc_limitations_avg": [ 13.75, 7.084313657652377 ], "wc_review_avg": [ 397.25, 123.19369910835537 ], "wc_reply_reviewers_avg": [ 68.25, 82.36314406334911 ], "wc_reply_authors_avg": [ 28.25, 48.93043531382078 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4134385884323711215&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": ";;;", "author_num": 4 }, { "title": "Contrastive Retrospection: honing in on critical steps for rapid learning and generalization in RL", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70825", "id": "gpJw8f4tIU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6357d6d068622c962391081d296bed69-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gpJw8f4tIU", "openreview": "https://openreview.net/forum?id=gpJw8f4tIU", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70825", "video": "https://nips.cc/virtual/2023/poster/70825", "author_site": "Chen Sun, Wannan Yang, Thomas Jiralerspong, Dane Malenfant, Benjamin Alsbury-Nealy, Yoshua Bengio, Blake Richards", "tldr": "", "abstract": "In real life, success is often contingent upon multiple critical steps that are distant in time from each other and from the final reward. These critical steps are challenging to identify with traditional reinforcement learning (RL) methods that rely on the Bellman equation for credit assignment. Here, we present a new RL algorithm that uses offline contrastive learning to hone in on these critical steps. This algorithm, which we call Contrastive Retrospection (ConSpec), can be added to any existing RL algorithm. ConSpec learns a set of prototypes for the critical steps in a task by a novel contrastive loss and delivers an intrinsic reward when the current state matches one of the prototypes. The prototypes in ConSpec provide two key benefits for credit assignment: (i) They enable rapid identification of all the critical steps. (ii) They do so in a readily interpretable manner, enabling out-of-distribution generalization when sensory features are altered. Distinct from other contemporary RL approaches to credit assignment, ConSpec takes advantage of the fact that it is easier to retrospectively identify the small set of steps that success is contingent upon (and ignoring other states) than it is to prospectively predict reward at every taken step. ConSpec greatly improves learning in a diverse set of RL tasks. The code is available at the link: https://github.com/sunchipsster1/ConSpec", "keywords": "Reinforcement learning;long term credit assignment;rapid credit assignment;contrastive learning;few-shot learning in RL", "primary_area": "", "supplementary_material": "", "author": "Chen Sun;Wannan Yang;Thomas Jiralerspong;Dane Malenfant;Benjamin Alsbury-Nealy;Yoshua Bengio;Blake Aaron Richards", "authorids": "~Chen_Sun7;~Wannan_Yang1;~Thomas_Jiralerspong1;~Dane_Malenfant1;~Benjamin_Alsbury-Nealy1;~Yoshua_Bengio1;~Blake_Aaron_Richards1", "gender": ";F;M;M;M;M;M", "homepage": ";https://scholar.google.com/citations?view_op=list_works&hl=en&user=45hU7qkAAAAJ&gmla=AJsN-F5_MZFI15RWdesbUvEXWqHbYW3zFkclZyLISUNOLxhy5TMvyDrS2Sx-HeDai8IPIvbcGO5na_kESVOPgBfiOuvhAxEIqKc8xuoaKdboKhvk8wwpT7M;https://superkaiba.github.io/;;https://www.silicolabs.ca;http://yoshuabengio.org;http://linclab.org", "dblp": "01/6072-7;;330/4595;362/1058;331/2100;56/953;70/10850", "google_scholar": "Xvl3OLEAAAAJ;;https://scholar.google.ca/citations?user=ifQsJnIAAAAJ;https://scholar.google.ca/citations?user=34eM-bQAAAAJ;gnHRSVkAAAAJ;kukA0LcAAAAJ;https://scholar.google.ca/citations?user=1CPY1LsAAAAJ", "orcid": "0000-0002-2419-794X;;;;0000-0003-1032-0342;;0000-0001-9662-2151", "linkedin": ";;thomas-jiralerspong/;danemalenfant/;benjamin-alsbury-nealy-a1855344/;yoshuabengio/?originalSubdomain=ca;", "or_profile": "~Chen_Sun7;~Wannan_Yang1;~Thomas_Jiralerspong1;~Dane_Malenfant1;~Benjamin_Alsbury-Nealy1;~Yoshua_Bengio1;~Blake_Aaron_Richards1", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal;New York University;McGill University;McGill University;University of Toronto;University of Montreal;Mila - Quebec Artificial Intelligence Institute", "aff_domain": "mila.umontreal.ca;nyu.edu;mcgill.ca;mcgill.ca;utoronto.ca;umontreal.ca;mila.quebec", "position": "Postdoc;PhD student;Undergrad student;Assistant Program Coordinator;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nsun2023contrastive,\ntitle={Contrastive Retrospection: honing in on critical steps for rapid learning and generalization in {RL}},\nauthor={Chen Sun and Wannan Yang and Thomas Jiralerspong and Dane Malenfant and Benjamin Alsbury-Nealy and Yoshua Bengio and Blake Aaron Richards},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gpJw8f4tIU}\n}", "github": "", "project": "", "reviewers": "viiH;fj2y;LNh7;t9VM", "pdf_size": 21296460, "rating": "6;6;6;6", "confidence": "3;4;4;4", "soundness": "3;4;3;2", "novelty": "3;3;3;2", "presentation": "3;4;3;3", "wc_summary": "44;98;57;55", "wc_strengths": "23;66;238;61", "wc_weaknesses": "124;160;291;59", "wc_questions": "118;51;7;143", "wc_limitations": "4;31;2;50", "wc_review": "313;406;595;368", "wc_reply_reviewers": "25;0;196;118", "wc_reply_authors": "139;183;889;366", "reply_reviewers": "1;0;2;1", "reply_authors": "3;3;5;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 63.5, 20.524375751773793 ], "wc_strengths_avg": [ 97.0, 83.08730348254178 ], "wc_weaknesses_avg": [ 158.5, 84.63007739568717 ], "wc_questions_avg": [ 79.75, 53.8115926172047 ], "wc_limitations_avg": [ 21.75, 19.929563467371782 ], "wc_review_avg": [ 420.5, 106.03419259842553 ], "wc_reply_reviewers_avg": [ 84.75, 77.83757126221244 ], "wc_reply_authors_avg": [ 394.25, 298.05819482107853 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.5, 0.8660254037844386 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3618946423287078477&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mila.umontreal.ca;nyu.edu;mcgill.ca;mcgill.ca;utoronto.ca;umontreal.ca;mila.quebec", "author_num": 7, "aff_unique_index": "0;1;2;2;3;0;4", "aff_unique_norm": "University of Montreal;New York University;McGill University;University of Toronto;Quebec Artificial Intelligence Institute", "aff_unique_dep": "Montreal Institute for Learning Algorithms;;;;Artificial Intelligence", "aff_unique_url": "https://www.umontreal.ca;https://www.nyu.edu;https://www.mcgill.ca;https://www.utoronto.ca;https://mila.quebec", "aff_unique_abbr": "UM;NYU;McGill;U of T;Mila", "aff_campus_unique_index": "0", "aff_campus_unique": "Montreal;", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "Canada;United States" }, { "title": "Spectral Evolution and Invariance in Linear-width Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70824", "id": "gpqBGyKeKH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/41ed4bd197d0a5fa036d361c1fc606ad-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gpqBGyKeKH", "openreview": "https://openreview.net/forum?id=gpqBGyKeKH", "poster": "/media/PosterPDFs/NeurIPS%202023/70824.png?t=1701923755.864069", "slides": "https://nips.cc/virtual/2023/poster/70824", "video": "https://nips.cc/virtual/2023/poster/70824", "author_site": "Zhichao Wang, Andrew Engel, Anand D Sarwate, Ioana Dumitriu, Tony Chiang", "tldr": "", "abstract": "We investigate the spectral properties of linear-width feed-forward neural networks, where the sample size is asymptotically proportional to network width. Empirically, we show that the spectra of weight in this high dimensional regime are invariant when trained by gradient descent for small constant learning rates; we provide a theoretical justification for this observation and prove the invariance of the bulk spectra for both conjugate and neural tangent kernels. We demonstrate similar characteristics when training with stochastic gradient descent with small learning rates. When the learning rate is large, we exhibit the emergence of an outlier whose corresponding eigenvector is aligned with the training data structure. We also show that after adaptive gradient training, where a lower test error and feature learning emerge, both weight and kernel matrices exhibit heavy tail behavior. Simple examples are provided to explain when heavy tails can have better generalizations. We exhibit different spectral properties such as invariant bulk, spike, and heavy-tailed distribution from a two-layer neural network using different training strategies, and then correlate them to the feature learning. Analogous phenomena also appear when we train conventional neural networks with real-world data. We conclude that monitoring the evolution of the spectra during training is an essential step toward understanding the training dynamics and feature learning.", "keywords": "Random matrix theory;Heavy tails;Feature learning;Linear-width neural networks;Spike phase transition", "primary_area": "", "supplementary_material": "/attachment/d42865fc5483562e663156911f55ad279fba5876.pdf", "author": "Zhichao Wang;Andrew William Engel;Anand Sarwate;Ioana Dumitriu;Tony Chiang", "authorids": "~Zhichao_Wang3;~Andrew_William_Engel1;~Anand_Sarwate1;~Ioana_Dumitriu3;~Tony_Chiang1", "gender": "M;M;M;F;M", "homepage": "https://mathweb.ucsd.edu/~zhw036/;;https://adsarwate.github.io/;https://math.ucsd.edu/people/profiles/ioana-dumitriu;https://www.pnnl.gov/people/tony-chiang", "dblp": "02/10606;97/5194;32/4477;57/6529;", "google_scholar": "IjXnDdoAAAAJ;;jgr1-eEAAAAJ;kSgtqZkAAAAJ;ifJp-DkAAAAJ", "orcid": "0000-0003-3886-5053;0000-0003-2348-483X;0000-0001-6123-5282;0000-0002-0721-7849;", "linkedin": ";andrew-engel-281149162/;anand-sarwate-a0bab3/;;eis2dot71828/", "or_profile": "~Zhichao_Wang3;~Andrew_William_Engel1;~Anand_Sarwate1;~Ioana_Dumitriu3;~Tony_Chiang1", "aff": "University of California, San Diego;Pacific Northwest National Laboratory;Rutgers University;University of California, San Diego;Pacific Northwest National Laboratory", "aff_domain": "ucsd.edu;pnnl.gov;rutgers.edu;ucsd.edu;pnnl.gov", "position": "PhD student;Researcher;Associate Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nwang2023spectral,\ntitle={Spectral Evolution and Invariance in Linear-width Neural Networks},\nauthor={Zhichao Wang and Andrew William Engel and Anand Sarwate and Ioana Dumitriu and Tony Chiang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gpqBGyKeKH}\n}", "github": "", "project": "", "reviewers": "zoyL;m9fj;jctx;C57a;DCEt", "pdf_size": 11431820, "rating": "4;5;6;7;7", "confidence": "4;3;3;2;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;4;2;3;3", "wc_summary": "66;76;362;93;113", "wc_strengths": "39;46;60;228;147", "wc_weaknesses": "325;17;137;137;148", "wc_questions": "278;52;61;54;90", "wc_limitations": "24;3;20;55;5", "wc_review": "732;194;640;567;503", "wc_reply_reviewers": "113;0;109;0;16", "wc_reply_authors": "219;0;98;0;0", "reply_reviewers": "1;0;1;0;1", "reply_authors": "2;1;2;1;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 142.0, 111.15214797744576 ], "wc_strengths_avg": [ 104.0, 73.14369419163897 ], "wc_weaknesses_avg": [ 152.8, 98.60709913591414 ], "wc_questions_avg": [ 107.0, 86.57944328765345 ], "wc_limitations_avg": [ 21.4, 18.682612237050794 ], "wc_review_avg": [ 527.2, 183.22598069051236 ], "wc_reply_reviewers_avg": [ 47.6, 52.10988389931415 ], "wc_reply_authors_avg": [ 63.4, 86.56465791534094 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8134892168199606, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16687814376404125408&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ucsd.edu;pnnl.gov;rutgers.edu;ucsd.edu;pnnl.gov", "author_num": 5, "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "University of California, San Diego;Pacific Northwest National Laboratory;Rutgers University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucsd.edu;https://www.pnnl.gov;https://www.rutgers.edu", "aff_unique_abbr": "UCSD;PNNL;Rutgers", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Neural Relation Graph: A Unified Framework for Identifying Label Noise and Outlier Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70823", "id": "gpyeRyc858", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/886ed40d7882c9f891824e42a452c228-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gpyeRyc858", "openreview": "https://openreview.net/forum?id=gpyeRyc858", "poster": "/media/PosterPDFs/NeurIPS%202023/70823.png?t=1698135014.7682805", "slides": "https://nips.cc/virtual/2023/poster/70823", "video": "https://nips.cc/virtual/2023/poster/70823", "author_site": "Jang-Hyun Kim, Sangdoo Yun, Hyun Oh Song", "tldr": "", "abstract": "Diagnosing and cleaning data is a crucial step for building robust machine learning systems. However, identifying problems within large-scale datasets with real-world distributions is challenging due to the presence of complex issues such as label errors, under-representation, and outliers. In this paper, we propose a unified approach for identifying the problematic data by utilizing a largely ignored source of information: a relational structure of data in the feature-embedded space. To this end, we present scalable and effective algorithms for detecting label errors and outlier data based on the relational graph structure of data. We further introduce a visualization tool that provides contextual information of a data point in the feature-embedded space, serving as an effective tool for interactively diagnosing data. We evaluate the label error and outlier/out-of-distribution (OOD) detection performances of our approach on the large-scale image, speech, and language domain tasks, including ImageNet, ESC-50, and SST2. Our approach achieves state-of-the-art detection performance on all tasks considered and demonstrates its effectiveness in debugging large-scale real-world datasets across various domains. We release codes at https://github.com/snu-mllab/Neural-Relation-Graph.", "keywords": "Dataset cleaning;Label error detection;Outlier detection;Neural Networks;Robustness", "primary_area": "", "supplementary_material": "/attachment/ae68a23347dd044871e3f48279985e55cd689b9d.zip", "author": "Jang-Hyun Kim;Sangdoo Yun;Hyun Oh Song", "authorids": "~Jang-Hyun_Kim1;~Sangdoo_Yun1;~Hyun_Oh_Song1", "gender": "M;M;M", "homepage": "https://sangdooyun.github.io/;https://mllab.snu.ac.kr/hyunoh;https://janghyun1230.github.io/", "dblp": "124/3009.html;05/10781;", "google_scholar": "o0qtjzYAAAAJ;ScoZZPsAAAAJ;8JKsHJcAAAAJ", "orcid": ";;", "linkedin": ";hyun-oh-song-5a39b03;", "or_profile": "~Sangdoo_Yun1;~Hyun_Oh_Song1;~JangHyun_Kim1", "aff": "NAVER;Seoul National University;Seoul National University", "aff_domain": "navercorp.com;snu.ac.kr;snu.ac.kr", "position": "Research Scientist;Associate Professor;PhD student", "bibtex": "@inproceedings{\nkim2023neural,\ntitle={Neural Relation Graph: A Unified Framework for Identifying Label Noise and Outlier Data},\nauthor={Jang-Hyun Kim and Sangdoo Yun and Hyun Oh Song},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gpyeRyc858}\n}", "github": "", "project": "", "reviewers": "nM5G;Yk3p;6KY3;Cygf", "pdf_size": 12950784, "rating": "5;6;6;7", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;4", "wc_summary": "64;114;70;173", "wc_strengths": "107;340;74;206", "wc_weaknesses": "208;255;84;87", "wc_questions": "130;338;98;117", "wc_limitations": "30;180;32;9", "wc_review": "539;1227;358;592", "wc_reply_reviewers": "24;0;8;9", "wc_reply_authors": "0;0;0;6", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 105.25, 43.61980628109208 ], "wc_strengths_avg": [ 181.75, 103.47554058810226 ], "wc_weaknesses_avg": [ 158.5, 74.87489565935968 ], "wc_questions_avg": [ 170.75, 97.23007507967893 ], "wc_limitations_avg": [ 62.75, 68.29119635794939 ], "wc_review_avg": [ 679.0, 328.0678283526137 ], "wc_reply_reviewers_avg": [ 10.25, 8.671072598012312 ], "wc_reply_authors_avg": [ 1.5, 2.598076211353316 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11425278531322745199&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "navercorp.com;snu.ac.kr;snu.ac.kr", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "NAVER Corporation;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.naver.com;https://www.snu.ac.kr", "aff_unique_abbr": "NAVER;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Diffusion with Forward Models: Solving Stochastic Inverse Problems Without Direct Supervision", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70822", "id": "gq4xkwQZ1l", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/28e4ee96c94e31b2d040b4521d2b299e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gq4xkwQZ1l", "openreview": "https://openreview.net/forum?id=gq4xkwQZ1l", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70822", "video": "https://nips.cc/virtual/2023/poster/70822", "author_site": "Ayush Tewari, Tianwei Yin, George Cazenavette, Semon Rezchikov, Josh Tenenbaum, Fredo Durand, Bill Freeman, Vincent Sitzmann", "tldr": "", "abstract": "Denoising diffusion models are a powerful type of generative models used to capture complex distributions of real-world signals. However, their applicability is limited to scenarios where training samples are readily available, which is not always the case in real-world applications. For example, in inverse graphics, the goal is to generate samples from a distribution of 3D scenes that align with a given image, but ground-truth 3D scenes are unavailable and only 2D images are accessible. To address this limitation, we propose a novel class of denoising diffusion probabilistic models that learn to sample from distributions of signals that are never directly observed. Instead, these signals are measured indirectly through a known differentiable forward model, which produces partial observations of the unknown signal. Our approach involves integrating the forward model directly into the denoising process. A key contribution of our work is the integration of a differentiable forward model into the denoising process. This integration effectively connects the generative modeling of observations with the generative modeling of the underlying signals, allowing for end-to-end training of a conditional generative model over signals. During inference, our approach enables sampling from the distribution of underlying signals that are consistent with a given partial observation. We demonstrate the effectiveness of our method on three challenging computer vision tasks. For instance, in the context of inverse graphics, our model enables direct sampling from the distribution of 3D scenes that align with a single 2D input image.", "keywords": "3D generative models;neural rendering;neural scene representations;NeRF;diffusion models;differentiable rendering;inverse graphics;inverse problems", "primary_area": "", "supplementary_material": "/attachment/bf55b6a53a6a50e538b7a9670be112ad05161a83.pdf", "author": "Ayush Tewari;Tianwei Yin;George Cazenavette;Semon Rezchikov;Joshua B. Tenenbaum;Fredo Durand;William T. Freeman;Vincent Sitzmann", "authorids": "~Ayush_Tewari2;~Tianwei_Yin1;~George_Cazenavette1;~Semon_Rezchikov1;~Joshua_B._Tenenbaum1;~Fredo_Durand1;~William_T._Freeman1;~Vincent_Sitzmann1", "gender": ";M;M;M;;M;M;M", "homepage": "https://ayushtewari.com;https://tianweiy.github.io;https://georgecazenavette.github.io/;https://www.rezchikov.me/;;http://people.csail.mit.edu/fredo/;https://billf.mit.edu/;https://vsitzmann.github.io", "dblp": "198/1021;267/9373;202/5478;294/5525;t/JoshuaBTenenbaum;87/2617;86/6650;192/1958", "google_scholar": "pDnzpeoAAAAJ;BHlY8ewAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.com.tw/citations?user=NJ9c4ygAAAAJ;https://scholar.google.com.tw/citations?user=0zZnyMEAAAAJ;X44QVV4AAAAJ", "orcid": ";;;;;0000-0001-9919-069X;;0000-0002-0107-5704", "linkedin": ";;;;;;;vincentsitzmann/", "or_profile": "~Ayush_Tewari2;~Tianwei_Yin1;~George_Cazenavette1;~Semon_Rezchikov1;~Joshua_B._Tenenbaum1;~Fredo_Durand1;~William_T._Freeman1;~Vincent_Sitzmann1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Princeton University;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;princeton.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "Postdoc;PhD student;PhD student;Postdoc;Professor;Full Professor;Professor;Assistant Professor", "bibtex": "@inproceedings{\ntewari2023diffusion,\ntitle={Diffusion with Forward Models: Solving Stochastic Inverse Problems Without Direct Supervision},\nauthor={Ayush Tewari and Tianwei Yin and George Cazenavette and Semon Rezchikov and Joshua B. Tenenbaum and Fredo Durand and William T. Freeman and Vincent Sitzmann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gq4xkwQZ1l}\n}", "github": "", "project": "", "reviewers": "zgt6;bLEB;5usH;LcVC", "pdf_size": 9561152, "rating": "5;6;6;8", "confidence": "3;4;4;4", "soundness": "2;3;3;4", "novelty": "3;3;3;4", "presentation": "1;3;4;4", "wc_summary": "89;102;189;51", "wc_strengths": "28;65;69;82", "wc_weaknesses": "223;163;34;52", "wc_questions": "48;30;334;109", "wc_limitations": "17;12;30;1", "wc_review": "405;372;656;295", "wc_reply_reviewers": "90;213;35;16", "wc_reply_authors": "56;345;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 107.75, 50.514230668198834 ], "wc_strengths_avg": [ 61.0, 20.062402647738878 ], "wc_weaknesses_avg": [ 118.0, 78.20166238642246 ], "wc_questions_avg": [ 130.25, 121.22370848971747 ], "wc_limitations_avg": [ 15.0, 10.41633332799983 ], "wc_review_avg": [ 432.0, 135.34585327966278 ], "wc_reply_reviewers_avg": [ 88.5, 76.84562446880109 ], "wc_reply_authors_avg": [ 100.25, 143.1439397948792 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 95, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12413855601201766378&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "mit.edu;mit.edu;mit.edu;princeton.edu;mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 8, "aff_unique_index": "0;0;0;1;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.princeton.edu", "aff_unique_abbr": "MIT;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Flow-Based Feature Fusion for Vehicle-Infrastructure Cooperative 3D Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70821", "id": "gsglrhvQxX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6ca5d2665de83394f437dad0c3746907-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gsglrhvQxX", "openreview": "https://openreview.net/forum?id=gsglrhvQxX", "poster": "/media/PosterPDFs/NeurIPS%202023/70821.png?t=1699943268.7273602", "slides": "https://nips.cc/virtual/2023/poster/70821", "video": "https://nips.cc/virtual/2023/poster/70821", "author_site": "Haibao Yu, Yingjuan Tang, Enze Xie, Jilei Mao, Ping Luo, Zaiqing Nie", "tldr": "", "abstract": "Cooperatively utilizing both ego-vehicle and infrastructure sensor data can significantly enhance autonomous driving perception abilities. However, the uncertain temporal asynchrony and limited communication conditions that are present in traffic environments can lead to fusion misalignment and constrain the exploitation of infrastructure data. To address these issues in vehicle-infrastructure cooperative 3D (VIC3D) object detection, we propose the Feature Flow Net (FFNet), a novel cooperative detection framework. FFNet is a flow-based feature fusion framework that uses a feature flow prediction module to predict future features and compensate for asynchrony. Instead of transmitting feature maps extracted from still-images, FFNet transmits feature flow, leveraging the temporal coherence of sequential infrastructure frames. Furthermore, we introduce a self-supervised training approach that enables FFNet to generate feature flow with feature prediction ability from raw infrastructure sequences. Experimental results demonstrate that our proposed method outperforms existing cooperative detection methods while only requiring about 1/100 of the transmission cost of raw data and covers all latency in one model on the DAIR-V2X dataset. The code is available https://github.com/haibao-yu/FFNet-VIC3D.", "keywords": "vehicle-infrastructure cooperative autonomous driving;3D object detection;feature flow;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/92034365bff00e410c0ff0404afff296d175fcce.pdf", "author": "Haibao Yu;Yingjuan Tang;Enze Xie;Jilei Mao;Ping Luo;Zaiqing Nie", "authorids": "~Haibao_Yu2;~Yingjuan_Tang1;~Enze_Xie1;~Jilei_Mao1;~Ping_Luo2;~Zaiqing_Nie2", "gender": "M;F;M;M;M;", "homepage": ";;https://xieenze.github.io/;;https://air.tsinghua.edu.cn/en/info/1046/1192.htm;http://luoping.me/", "dblp": "246/4643;342/9319;218/5441;;n/ZaiqingNie;54/4989-2.html", "google_scholar": "JW4F5HoAAAAJ;;42MVVPgAAAAJ;;;https://scholar.google.com.hk/citations?hl=en", "orcid": ";;;;0000-0002-1134-2343;0000-0002-6685-7950", "linkedin": ";https://www.linkedin.cn/incareer/in/tang-yingjuan-b340391a4;;https://www.linkedin.cn/in/jilei-mao-15978a1b7;;", "or_profile": "~Haibao_Yu2;~Yingjuan_Tang1;~Enze_Xie1;~Jilei_Mao1;~Zaiqing_Nie2;~Luo_Ping2", "aff": "The University of Hong Kong;Beijing Institute of Technology;Huawei Noah's Ark Lab;;Tsinghua University;The University of Hong Kong", "aff_domain": "hku.hk;bit.edu.cn;huawei.com;;tsinghua.edu.cn;hku.hk", "position": "PhD student;PhD student;Researcher;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nyu2023flowbased,\ntitle={Flow-Based Feature Fusion for Vehicle-Infrastructure Cooperative 3D Object Detection},\nauthor={Haibao Yu and Yingjuan Tang and Enze Xie and Jilei Mao and Ping Luo and Zaiqing Nie},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gsglrhvQxX}\n}", "github": "", "project": "", "reviewers": "Wmh8;NKqy;enaU;CL6E", "pdf_size": 2457226, "rating": "5;5;6;6", "confidence": "3;5;5;4", "soundness": "2;3;3;2", "novelty": "2;2;3;2", "presentation": "3;3;3;2", "wc_summary": "49;59;91;79", "wc_strengths": "44;18;65;70", "wc_weaknesses": "98;314;85;88", "wc_questions": "21;22;46;37", "wc_limitations": "1;1;1;2", "wc_review": "213;414;288;276", "wc_reply_reviewers": "15;30;0;20", "wc_reply_authors": "26;25;0;17", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.5, 16.454482671904334 ], "wc_strengths_avg": [ 49.25, 20.51066795596867 ], "wc_weaknesses_avg": [ 146.25, 96.97003403113769 ], "wc_questions_avg": [ 31.5, 10.5 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 297.75, 72.9121903387904 ], "wc_reply_reviewers_avg": [ 16.25, 10.825317547305483 ], "wc_reply_authors_avg": [ 17.0, 10.41633332799983 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4230500553651429653&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "hku.hk;bit.edu.cn;huawei.com;;tsinghua.edu.cn;hku.hk", "author_num": 6, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of Hong Kong;Beijing Institute of Technology;Huawei;Tsinghua University", "aff_unique_dep": ";;Noah's Ark Lab;", "aff_unique_url": "https://www.hku.hk;http://www.bit.edu.cn/;https://www.huawei.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "HKU;BIT;Huawei;THU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "NVFi: Neural Velocity Fields for 3D Physics Learning from Dynamic Videos", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70820", "id": "gsi9lJ3994", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6d0942e288ce41db8d4ebd041e7d1100-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gsi9lJ3994", "openreview": "https://openreview.net/forum?id=gsi9lJ3994", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70820", "video": "https://nips.cc/virtual/2023/poster/70820", "author_site": "Jinxi Li, Ziyang Song, Bo Yang", "tldr": "", "abstract": "In this paper, we aim to model 3D scene dynamics from multi-view videos. Unlike the majority of existing works which usually focus on the common task of novel view synthesis within the training time period, we propose to simultaneously learn the geometry, appearance, and physical velocity of 3D scenes only from video frames, such that multiple desirable applications can be supported, including future frame extrapolation, unsupervised 3D semantic scene decomposition, and dynamic motion transfer. Our method consists of three major components, 1) the keyframe dynamic radiance field, 2) the interframe velocity field, and 3) a joint keyframe and interframe optimization module which is the core of our framework to effectively train both networks. To validate our method, we further introduce two dynamic 3D datasets: 1) Dynamic Object dataset, and 2) Dynamic Indoor Scene dataset. We conduct extensive experiments on multiple datasets, demonstrating the superior performance of our method over all baselines, particularly in the critical tasks of future frame extrapolation and unsupervised 3D semantic scene decomposition.", "keywords": "Physics Learning;Velocity Field;Dynamic Radiance Field;Future Frame Extrapolation", "primary_area": "", "supplementary_material": "", "author": "Jinxi Li;Ziyang Song;Bo Yang", "authorids": "~Jinxi_Li2;~Ziyang_Song1;~Bo_Yang7", "gender": "M;M;M", "homepage": ";https://szy-young.github.io/;https://yang7879.github.io/", "dblp": "198/4279,;;46/999-27", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;7YcpCEwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-2419-4140", "linkedin": "jinxi-leo-li;;", "or_profile": "~Jinxi_Li2;~Ziyang_Song1;~Bo_Yang7", "aff": "Hong Kong Polytechnic University;The Hong Kong Polytechnic University;The Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;polyu.edu.hk;polyu.edu.hk", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nli2023nvfi,\ntitle={{NVF}i: Neural Velocity Fields for 3D Physics Learning from Dynamic Videos},\nauthor={Jinxi Li and Ziyang Song and Bo Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gsi9lJ3994}\n}", "github": "", "project": "", "reviewers": "3vYE;hQnr;e7NB;8aAM;c2wX", "pdf_size": 17754493, "rating": "4;6;6;7;7", "confidence": "5;5;4;3;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;1;3;3;4", "wc_summary": "80;95;91;72;58", "wc_strengths": "82;65;68;77;121", "wc_weaknesses": "162;197;70;146;138", "wc_questions": "168;26;26;26;37", "wc_limitations": "27;11;4;20;4", "wc_review": "519;394;259;341;358", "wc_reply_reviewers": "326;107;0;25;0", "wc_reply_authors": "766;735;0;27;0", "reply_reviewers": "2;2;0;1;0", "reply_authors": "3;3;1;2;1", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 79.2, 13.347658970770867 ], "wc_strengths_avg": [ 82.6, 20.14547095503106 ], "wc_weaknesses_avg": [ 142.6, 41.56729483620506 ], "wc_questions_avg": [ 56.6, 55.862688800307495 ], "wc_limitations_avg": [ 13.2, 9.064215354899728 ], "wc_review_avg": [ 374.2, 84.84668526230121 ], "wc_reply_reviewers_avg": [ 91.6, 123.61003195533928 ], "wc_reply_authors_avg": [ 305.6, 363.52529485580504 ], "reply_reviewers_avg": [ 1.0, 0.8944271909999159 ], "reply_authors_avg": [ 2.0, 0.8944271909999159 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8164965809277261, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8643204646481160064&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "polyu.edu.hk;polyu.edu.hk;polyu.edu.hk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong Polytechnic University", "aff_unique_dep": "", "aff_unique_url": "https://www.polyu.edu.hk", "aff_unique_abbr": "PolyU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Use perturbations when learning from explanations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70819", "id": "guyhQMSp2F", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/54f82cdae821aad5c2888d61a6515170-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=guyhQMSp2F", "openreview": "https://openreview.net/forum?id=guyhQMSp2F", "poster": "/media/PosterPDFs/NeurIPS%202023/70819.png?t=1701967071.3922513", "slides": "https://nips.cc/virtual/2023/poster/70819", "video": "https://nips.cc/virtual/2023/poster/70819", "author_site": "Juyeon Heo, Vihari Piratla, Matthew Wicker, Adrian Weller", "tldr": "", "abstract": "Machine learning from explanations (MLX) is an approach to learning that uses human-provided explanations of relevant or irrelevant features for each input to ensure that model predictions are right for the right reasons. Existing MLX approaches rely on local model interpretation methods and require strong model smoothing to align model and human explanations, leading to sub-optimal performance. We recast MLX as a robustness problem, where human explanations specify a lower dimensional manifold from which perturbations can be drawn, and show both theoretically and empirically how this approach alleviates the need for strong model smoothing. We consider various approaches to achieving robustness, leading to improved performance over prior MLX methods. Finally, we show how to combine robustness with an earlier MLX method, yielding state-of-the-art results on both synthetic and real-world benchmarks.", "keywords": "Learning from explanation;Robustness;Interpretability;Shortcuts;Explanations", "primary_area": "", "supplementary_material": "/attachment/5b03a52a5955474339e4fe1dfea25183ebf31eab.zip", "author": "Juyeon Heo;Vihari Piratla;Matthew Robert Wicker;Adrian Weller", "authorids": "~Juyeon_Heo1;~Vihari_Piratla1;~Matthew_Robert_Wicker1;~Adrian_Weller1", "gender": "F;M;M;M", "homepage": "https://sites.google.com/view/juyeonheo/%ED%99%88;https://vihari.github.io/;https://www.matthewwicker.org;http://mlg.eng.cam.ac.uk/adrian/", "dblp": ";161/3626;207/7909.html;73/8324", "google_scholar": ";https://scholar.google.co.in/citations?user=DQddccYAAAAJ;_0qEDNIAAAAJ;https://scholar.google.co.uk/citations?user=Ek4hM10AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Juyeon_Heo1;~Vihari_Piratla1;~Matthew_Robert_Wicker1;~Adrian_Weller1", "aff": "University of Cambridge;University of Cambridge;Alan Turing Institute;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk;turing.ac.uk;cam.ac.uk", "position": "PhD student;Postdoc;Postdoc;Principal Researcher", "bibtex": "@inproceedings{\nheo2023use,\ntitle={Use perturbations when learning from explanations},\nauthor={Juyeon Heo and Vihari Piratla and Matthew Robert Wicker and Adrian Weller},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=guyhQMSp2F}\n}", "github": "", "project": "", "reviewers": "Hnkb;haST;WA6d;k7a7", "pdf_size": 3538300, "rating": "5;5;7;7", "confidence": "4;1;4;3", "soundness": "2;2;4;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "93;143;152;55", "wc_strengths": "46;32;62;32", "wc_weaknesses": "99;39;139;30", "wc_questions": "94;73;82;44", "wc_limitations": "51;1;35;42", "wc_review": "383;288;470;203", "wc_reply_reviewers": "270;60;97;11", "wc_reply_authors": "816;447;22;20", "reply_reviewers": "2;2;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 110.75, 39.25796097608738 ], "wc_strengths_avg": [ 43.0, 12.36931687685298 ], "wc_weaknesses_avg": [ 76.75, 44.66752175798429 ], "wc_questions_avg": [ 73.25, 18.45772196128222 ], "wc_limitations_avg": [ 32.25, 18.91262805640718 ], "wc_review_avg": [ 336.0, 100.19730535298841 ], "wc_reply_reviewers_avg": [ 109.5, 97.55639394729594 ], "wc_reply_authors_avg": [ 326.25, 331.96112347683123 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13742854270939507266&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cam.ac.uk;cam.ac.uk;turing.ac.uk;cam.ac.uk", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Cambridge;Alan Turing Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.turing.ac.uk", "aff_unique_abbr": "Cambridge;ATI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Composable Coresets for Determinant Maximization: Greedy is Almost Optimal", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70818", "id": "gwvwbsnTps", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/81c565e605161fcf25d08aa230431eba-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gwvwbsnTps", "openreview": "https://openreview.net/forum?id=gwvwbsnTps", "poster": "/media/PosterPDFs/NeurIPS%202023/70818.png?t=1699570192.0961611", "slides": "https://nips.cc/virtual/2023/poster/70818", "video": "https://nips.cc/virtual/2023/poster/70818", "author_site": "Siddharth Gollapudi, Sepideh Mahabadi, Varun Sivashankar", "tldr": "", "abstract": "Given a set of $n$ vectors in $\\mathbb{R}^d$, the goal of the \\emph{determinant maximization} problem is to pick $k$ vectors with the maximum volume.\n Determinant maximization is the MAP-inference task for determinantal point processes (DPP) and has recently received considerable attention for modeling diversity.\n As most applications for the problem use large amounts of data, this problem has been studied in the relevant \\textit{composable coreset} setting.\nIn particular, [Indyk-Mahabadi-OveisGharan-Rezaei--SODA'20, ICML'19] showed that one can get composable coresets with optimal approximation factor of $\\tilde O(k)^k$ for the problem, and that a local search algorithm achieves an almost optimal approximation guarantee of $O(k)^{2k}$.\nIn this work, we show that the widely-used Greedy algorithm also provides composable coresets with an almost optimal approximation factor of $O(k)^{3k}$, which improves over the previously known guarantee of $C^{k^2}$, and supports the prior experimental results showing the practicality of the greedy algorithm as a coreset.\nOur main result follows by showing a local optimality property for Greedy:\nswapping a single point from the greedy solution with a vector that was not picked by the greedy algorithm can increase the volume by a factor of at most $(1+\\sqrt{k})$. This is tight up to the additive constant $1$. Finally, our experiments show that the local optimality of the greedy algorithm is even lower than the theoretical bound on real data sets.", "keywords": "Determinant Maximization;Composable Coresets;Greedy Algorithm;DPP", "primary_area": "", "supplementary_material": "/attachment/a1c8fd71ca1bbc26a2967dee3bd90fccb11a71d9.zip", "author": "Siddharth Gollapudi;Sepideh Mahabadi;Varun Sivashankar", "authorids": "sgollapu@berkeley.edu;~Sepideh_Mahabadi1;~Varun_Sivashankar1", "gender": ";F;M", "homepage": ";https://www.mit.edu/~mahabadi/;https://varunsivashankar.com/", "dblp": ";130/0388;307/3293.html", "google_scholar": ";NirVdpMAAAAJ;-UbYZ1UAAAAJ", "orcid": ";;0000-0003-0785-4474", "linkedin": ";;varun-sivashankar", "or_profile": "sgollapu@berkeley.edu;~Sepideh_Mahabadi1;~Varun_Sivashankar1", "aff": ";Microsoft Research;Microsoft Research", "aff_domain": ";microsoft.com;research.microsoft.com", "position": ";Researcher;Research Fellow", "bibtex": "@inproceedings{\ngollapudi2023composable,\ntitle={Composable Coresets for Determinant Maximization: Greedy is Almost Optimal},\nauthor={Siddharth Gollapudi and Sepideh Mahabadi and Varun Sivashankar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gwvwbsnTps}\n}", "github": "", "project": "", "reviewers": "KpLA;vec7;3pA1;rYtN", "pdf_size": 536371, "rating": "5;6;6;6", "confidence": "5;4;3;3", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;3;3;2", "wc_summary": "62;194;116;200", "wc_strengths": "25;47;77;92", "wc_weaknesses": "47;43;54;23", "wc_questions": "39;46;4;188", "wc_limitations": "21;1;4;1", "wc_review": "194;331;255;504", "wc_reply_reviewers": "0;143;9;51", "wc_reply_authors": "0;344;0;25", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 143.0, 57.3149195236284 ], "wc_strengths_avg": [ 60.25, 26.013217794036937 ], "wc_weaknesses_avg": [ 41.75, 11.519006033508273 ], "wc_questions_avg": [ 69.25, 70.38243744003186 ], "wc_limitations_avg": [ 6.75, 8.317902379807062 ], "wc_review_avg": [ 321.0, 116.26908445498313 ], "wc_reply_reviewers_avg": [ 50.75, 56.63203598670985 ], "wc_reply_authors_avg": [ 92.25, 145.70582520956395 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12092894926173233041&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": ";microsoft.com;research.microsoft.com", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Research", "aff_unique_url": "https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Emergent Communication for Rules Reasoning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70817", "id": "gx20B4ItIw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d8ace30c68b085556ccce04ed4ae4ebb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gx20B4ItIw", "openreview": "https://openreview.net/forum?id=gx20B4ItIw", "poster": "/media/PosterPDFs/NeurIPS%202023/70817.png?t=1699551464.5901532", "slides": "https://nips.cc/virtual/2023/poster/70817", "video": "https://nips.cc/virtual/2023/poster/70817", "author_site": "Yuxuan Guo, Yifan Hao, Rui Zhang, Enshuai Zhou, Zidong Du, xishan zhang, Xinkai Song, Yuanbo Wen, Yongwei Zhao, Xuehai Zhou, Jiaming Guo, Qi Yi, Shaohui Peng, Di Huang, Ruizhi Chen, Qi Guo, Yunji Chen", "tldr": "", "abstract": "Research on emergent communication between deep-learning-based agents has received extensive attention due to its inspiration for linguistics and artificial intelligence. \n However, previous attempts have hovered around emerging communication under perception-oriented environmental settings,\n that forces agents to describe low-level perceptual features intra image or symbol contexts.\n In this work, inspired by the classic human reasoning test (namely Raven's Progressive Matrix), we propose the Reasoning Game, a cognition-oriented environment that encourages agents to reason and communicate high-level rules, rather than perceived low-level contexts.\n Moreover, we propose 1) an unbiased dataset (namely rule-RAVEN) as a benchmark to avoid overfitting, 2) and a two-stage curriculum agent training method as a baseline for more stable convergence in the Reasoning Game,\n where contexts and semantics are bilaterally drifting.\n Experimental results show that, in the Reasoning Game, a semantically stable and compositional language emerges to solve reasoning problems.\n The emerged language helps agents apply the extracted rules to the generalization of unseen context attributes, and to the transfer between different context attributes or even tasks.", "keywords": "Emergent communication;Multi-agent communication;Raven's Progressive Matrices;Representation learning", "primary_area": "", "supplementary_material": "/attachment/09e2ef8fe0b1dd9f569831e9c173e322f9aab4a5.zip", "author": "Yuxuan Guo;Yifan Hao;Rui Zhang;Enshuai Zhou;Zidong Du;Xishan Zhang;Xinkai Song;Yuanbo Wen;Yongwei Zhao;Xuehai Zhou;Jiaming Guo;Qi Yi;Shaohui Peng;Di Huang;Ruizhi Chen;Qi Guo;Yunji Chen", "authorids": "~Yuxuan_Guo2;~Yifan_Hao3;~Rui_Zhang1;~Enshuai_Zhou1;~Zidong_Du1;~Xishan_Zhang1;~Xinkai_Song1;~Yuanbo_Wen1;~Yongwei_Zhao1;~Xuehai_Zhou1;~Jiaming_Guo2;~Qi_Yi1;~Shaohui_Peng2;~Di_Huang5;~Ruizhi_Chen3;~Qi_Guo4;~Yunji_Chen1", "gender": "M;M;F;M;;;;M;;M;M;M;;M;M;M;M", "homepage": ";https://www.ict.ac.cn/sourcedb_2018_ict_cas/cn/jssrck/202311/t20231108_6923467.html;;;https://zidongdu.github.io/;;;;;https://staff.ustc.edu.cn/~xhzhou;;;;;;http://novel.ict.ac.cn/qguo;", "dblp": ";;60/2536-40;;44/11216;133/6391;;262/3144;;https://dblp.uni-trier.de/pid/53/2969.html;63/8512;295/8813;;;120/4143;67/398-1;48/474", "google_scholar": ";;dse6jAsAAAAJ;;https://scholar.google.com.sg/citations?user=8N9ym9YAAAAJ;;;;;;;veu6_ykAAAAJ;;;x_wFaYgAAAAJ;;", "orcid": "0000-0003-1462-8336;;;0000-0001-6434-8602;0000-0002-7603-4210;;;0000-0002-7775-2724;;0000-0002-8360-3143;;;;0000-0002-2370-0072;0000-0001-7219-4658;;", "linkedin": ";;;;;;;;;;;;;;;;", "or_profile": "~Yuxuan_Guo2;~Yifan_Hao3;~Rui_Zhang1;~Enshuai_Zhou1;~Zidong_Du1;~Xishan_Zhang1;~Xinkai_Song1;~Yuanbo_Wen1;~Yongwei_Zhao1;~Xuehai_Zhou1;~Jiaming_Guo2;~Qi_Yi1;~Shaohui_Peng2;~Di_Huang5;~Ruizhi_Chen3;~Qi_Guo4;~Yunji_Chen1", "aff": "University of Science and Technology of China;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, CAS;University of Science and Technology of China;Institute of Computing Technology, Chinese Academy of Sciences;, Cambricon Techonologies;;Institute of Computing Technology, Chinese Academy of Sciences;;University of Science and Technology of China;Institute of Computing Technology, Chinese Academy of Sciences;University of Science and Technology of China;;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Software Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ustc.edu.cn;ict.ac.cn;ict.ac.cn;ustc.edu.cn;ict.ac.cn;cambricon.com;;ict.ac.cn;;ustc.edu.cn;ict.ac.cn;ustc.edu.cn;;ict.ac.cn;iscas.ac.cn;ict.ac.cn;ict.ac.cn", "position": "MS student;Associate Professor;Assistant Professor;MS student;Full Professor;Researcher;;Postdoc;;Full Professor;PhD student;PhD student;;PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nguo2023emergent,\ntitle={Emergent Communication for Rules Reasoning},\nauthor={Yuxuan Guo and Yifan Hao and Rui Zhang and Enshuai Zhou and Zidong Du and Xishan Zhang and Xinkai Song and Yuanbo Wen and Yongwei Zhao and Xuehai Zhou and Jiaming Guo and Qi Yi and Shaohui Peng and Di Huang and Ruizhi Chen and Qi Guo and Yunji Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gx20B4ItIw}\n}", "github": "", "project": "", "reviewers": "9tUw;Ntdv;3wvB;LuwD;M8eN", "pdf_size": 1247933, "rating": "6;6;6;6;7", "confidence": "4;4;5;4;4", "soundness": "3;3;3;4;3", "novelty": "2;3;3;2;3", "presentation": "4;3;3;3;4", "wc_summary": "96;50;65;108;104", "wc_strengths": "69;64;135;106;61", "wc_weaknesses": "95;165;244;452;44", "wc_questions": "116;19;20;69;159", "wc_limitations": "1;108;7;1;1", "wc_review": "377;406;471;736;369", "wc_reply_reviewers": "0;0;21;62;65", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 84.6, 22.957351763650788 ], "wc_strengths_avg": [ 87.0, 28.962044126753206 ], "wc_weaknesses_avg": [ 200.0, 142.84677105206123 ], "wc_questions_avg": [ 76.6, 54.628197846899546 ], "wc_limitations_avg": [ 23.6, 42.26393261399133 ], "wc_review_avg": [ 471.8, 136.88447684087484 ], "wc_reply_reviewers_avg": [ 29.6, 28.73743203558731 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 17, 0 ], "corr_rating_confidence": -0.25000000000000006, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Pp9ZteHFuVwJ:scholar.google.com/&scioq=Emergent+Communication+for+Rules+Reasoning&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "ustc.edu.cn;ict.ac.cn;ict.ac.cn;ustc.edu.cn;ict.ac.cn;cambricon.com;;ict.ac.cn;;ustc.edu.cn;ict.ac.cn;ustc.edu.cn;;ict.ac.cn;iscas.ac.cn;ict.ac.cn;ict.ac.cn", "author_num": 17, "aff_unique_index": "0;1;1;0;1;2;1;0;1;0;1;1;1;1", "aff_unique_norm": "University of Science and Technology of China;Chinese Academy of Sciences;Cambricon Technologies", "aff_unique_dep": ";Institute of Computing Technology;", "aff_unique_url": "http://www.ustc.edu.cn;http://www.ict.ac.cn;https://www.cambricon.com", "aff_unique_abbr": "USTC;CAS;Cambricon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Query-based Temporal Fusion with Explicit Motion for 3D Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70816", "id": "gySmwdmVDF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ef0dcb44a47185f5bacac62571f6e920-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gySmwdmVDF", "openreview": "https://openreview.net/forum?id=gySmwdmVDF", "poster": "/media/PosterPDFs/NeurIPS%202023/70816.png?t=1699257760.8058789", "slides": "https://nips.cc/virtual/2023/poster/70816", "video": "https://nips.cc/virtual/2023/poster/70816", "author_site": "Jinghua Hou, Zhe Liu, dingkang liang, Zhikang Zou, Xiaoqing Ye, Xiang Bai", "tldr": "", "abstract": "Effectively utilizing temporal information to improve 3D detection performance is vital for autonomous driving vehicles. Existing methods either conduct temporal fusion based on the dense BEV features or sparse 3D proposal features. However, the former does not pay more attention to foreground objects, leading to more computation costs and sub-optimal performance. The latter implements time-consuming operations to generate sparse 3D proposal features, and the performance is limited by the quality of 3D proposals. In this paper, we propose a simple and effective Query-based Temporal Fusion Network (QTNet). The main idea is to exploit the object queries in previous frames to enhance the representation of current object queries by the proposed Motion-guided Temporal Modeling (MTM) module, which utilizes the spatial position information of object queries along the temporal dimension to construct their relevance between adjacent frames reliably. Experimental results show our proposed QTNet outperforms BEV-based or proposal-based manners on the nuScenes dataset. Besides, the MTM is a plug-and-play module, which can be integrated into some advanced LiDAR-only or multi-modality 3D detectors and even brings new SOTA performance with negligible computation cost and latency on the nuScenes dataset. These experiments powerfully illustrate the superiority and generalization of our method. The code is available at https://github.com/AlmoonYsl/QTNet.", "keywords": "3D Object Detection;Temporal;LiDAR-only;Multi-modality;Autonomous Driving", "primary_area": "", "supplementary_material": "/attachment/f17ca82d5dc4a59cd84ce19b8778cbc25c82c47b.pdf", "author": "Jinghua Hou;Zhe Liu;dingkang liang;Zhikang Zou;Xiaoqing Ye;Xiang Bai", "authorids": "~Jinghua_Hou1;~Zhe_Liu12;~dingkang_liang1;~Zhikang_Zou2;~Xiaoqing_Ye1;~Xiang_Bai1", "gender": "M;M;M;M;F;M", "homepage": "https://github.com/AlmoonYsl;https://github.com/happinesslz;https://dk-liang.github.io/;https://bigteacher-777.github.io/;https://shuluoshu.github.io/;http://mclab.eic.hust.edu.cn/~xbai", "dblp": "https://dblp.uni-trier.de/pid/175/1330;70/1220-33;255/6274;229/8175;177/0181;59/2741", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;yprv7EsAAAAJ;Tre69v0AAAAJ;T-YePFgAAAAJ;bmN_nycAAAAJ;UeltiQ4AAAAJ", "orcid": "0009-0007-6555-4038;;;;0000-0003-3268-880X;", "linkedin": ";;;;;", "or_profile": "~Jinghua_Hou1;~Zhe_Liu12;~dingkang_liang1;~Zhikang_Zou2;~Xiaoqing_Ye1;~Xiang_Bai1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Baidu;Baidu Inc.;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;baidu.com;baidu.com;hust.edu.cn", "position": "MS student;PhD student;PhD student;Researcher;Researcher and Developer;Full Professor", "bibtex": "@inproceedings{\nhou2023querybased,\ntitle={Query-based Temporal Fusion with Explicit Motion for 3D Object Detection},\nauthor={Jinghua Hou and Zhe Liu and dingkang liang and Zhikang Zou and Xiaoqing Ye and Xiang Bai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gySmwdmVDF}\n}", "github": "", "project": "", "reviewers": "LNvD;BRpP;T8P4;ycEE;DMZS", "pdf_size": 0, "rating": "4;4;4;5;6", "confidence": "4;4;4;5;5", "soundness": "2;2;3;2;4", "novelty": "4;2;1;2;3", "presentation": "2;3;3;2;4", "wc_summary": "33;54;84;56;59", "wc_strengths": "89;38;47;26;53", "wc_weaknesses": "395;105;126;284;139", "wc_questions": "271;54;4;2;18", "wc_limitations": "24;65;1;2;5", "wc_review": "812;316;262;370;274", "wc_reply_reviewers": "194;0;0;23;33", "wc_reply_authors": "524;0;0;0;0", "reply_reviewers": "2;0;0;1;1", "reply_authors": "3;1;1;1;1", "rating_avg": [ 4.6, 0.8 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.4, 1.019803902718557 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 57.2, 16.240689640529432 ], "wc_strengths_avg": [ 50.6, 21.247117451550928 ], "wc_weaknesses_avg": [ 209.8, 112.0917481351772 ], "wc_questions_avg": [ 69.8, 102.31402640889468 ], "wc_limitations_avg": [ 19.4, 24.286621831782202 ], "wc_review_avg": [ 406.8, 206.1013342994169 ], "wc_reply_reviewers_avg": [ 50.0, 73.14916267463353 ], "wc_reply_authors_avg": [ 104.8, 209.6 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9185586535436916, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14222351481394628247&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "hust.edu.cn;hust.edu.cn;hust.edu.cn;baidu.com;baidu.com;hust.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;1;0", "aff_unique_norm": "Huazhong University of Science and Technology;Baidu", "aff_unique_dep": ";Baidu, Inc.", "aff_unique_url": "http://www.hust.edu.cn;https://www.baidu.com", "aff_unique_abbr": "HUST;Baidu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Voicebox: Text-Guided Multilingual Universal Speech Generation at Scale", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70815", "id": "gzCS252hCO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2d8911db9ecedf866015091b28946e15-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=gzCS252hCO", "openreview": "https://openreview.net/forum?id=gzCS252hCO", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70815", "video": "https://nips.cc/virtual/2023/poster/70815", "author_site": "Matthew Le, Apoorv Vyas, Bowen Shi, Brian Karrer, Leda Sari, Rashel Moritz, Mary Williamson, Vimal Manohar, Yossi Adi, Jay Mahadeokar, Wei-Ning Hsu", "tldr": "", "abstract": "Large-scale generative models such as GPT and DALL-E have revolutionized the research community. These models not only generate high fidelity outputs, but are also generalists which can solve tasks not explicitly taught. In contrast, speech generative models are still primitive in terms of scale and task generalization. In this paper, we present Voicebox, the most versatile text-guided generative model for speech at scale. Voicebox is a non-autoregressive flow-matching model trained to infill speech, given audio context and text, trained on over 50K hours of speech that are not filtered or enhanced. Similar to GPT, Voicebox can perform many different tasks through in-context learning, but is more flexible as it can also condition on future context. Voicebox can be used for mono or cross-lingual zero-shot text-to-speech synthesis, noise removal, content editing, style conversion, and diverse sample generation. In particular, Voicebox outperforms the state-of-the-art zero-shot TTS model VALL-E on both intelligibility (5.9\\% vs 1.9\\% word error rates) and audio similarity (0.580 vs 0.681) while being up to 20 times faster. Audio samples can be found in \\url{https://voicebox.metademolab.com}.", "keywords": "speech generation;flow-matching;diffusion;in-context learning;text-to-speech", "primary_area": "", "supplementary_material": "/attachment/8e15795cbb5feb07a419a544e66299b621b42157.pdf", "author": "Matthew Le;Apoorv Vyas;Bowen Shi;Brian Karrer;Leda Sari;Rashel Moritz;Mary Williamson;Vimal Manohar;Yossi Adi;Jay Mahadeokar;Wei-Ning Hsu", "authorids": "~Matthew_Le2;~Apoorv_Vyas1;~Bowen_Shi1;briankarrer@meta.com;~Leda_Sari1;~Rashel_Moritz1;~Mary_Williamson2;~Vimal_Manohar2;~Yossi_Adi1;~Jay_Mahadeokar1;~Wei-Ning_Hsu2", "gender": ";;M;;;;;M;M;M;", "homepage": ";https://apoorv2904.github.io/;;;;;;https://vimalmanohar.github.io/;http://adiyoss.github.io/;;", "dblp": ";162/6169;;;;;;;171/0957.html;125/3716;", "google_scholar": ";https://scholar.google.com/citations?hl=en;xqyoorYAAAAJ;;;;;8t3Ex0QAAAAJ;https://scholar.google.co.il/citations?user=4W-HuYYAAAAJ;;", "orcid": ";;;;;;;;0000-0003-2237-3898;;", "linkedin": ";;;;;rashelmoritz/;;;yossi-adi-31a32858?trk=nav_responsive_tab_profile_pic;;", "or_profile": "~Matthew_Le2;~Apoorv_Vyas1;~Bowen_Shi1;briankarrer@meta.com;~Leda_Sari1;~Rashel_Moritz1;~Mary_Williamson2;~Vimal_Manohar2;~Yossi_Adi1;~Jay_Mahadeokar1;~Wei-Ning_Hsu2", "aff": ";Meta ;Toyota Technological Institute at Chicago;;;;;Meta Platforms Inc.;Meta;;", "aff_domain": ";meta.com;ttic.edu;;;;;meta.com;meta.com;;", "position": ";Researcher;PhD student;;;;;Researcher;Research Scientist;;", "bibtex": "@inproceedings{\nle2023voicebox,\ntitle={Voicebox: Text-Guided Multilingual Universal Speech Generation at Scale},\nauthor={Matthew Le and Apoorv Vyas and Bowen Shi and Brian Karrer and Leda Sari and Rashel Moritz and Mary Williamson and Vimal Manohar and Yossi Adi and Jay Mahadeokar and Wei-Ning Hsu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=gzCS252hCO}\n}", "github": "", "project": "", "reviewers": "MQeA;pReF;F9C6;K4NY;Peh7", "pdf_size": 1742635, "rating": "5;5;6;7;8", "confidence": "5;4;5;5;4", "soundness": "2;3;3;4;4", "novelty": "1;3;3;4;4", "presentation": "3;3;3;4;4", "wc_summary": "33;57;48;139;69", "wc_strengths": "27;52;30;88;70", "wc_weaknesses": "476;88;120;41;55", "wc_questions": "170;19;36;126;71", "wc_limitations": "12;13;1;19;8", "wc_review": "718;229;235;413;273", "wc_reply_reviewers": "767;0;0;64;0", "wc_reply_authors": "1327;0;0;0;0", "reply_reviewers": "3;0;0;1;0", "reply_authors": "3;1;1;1;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 1.0954451150103321 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 69.2, 36.82607771674849 ], "wc_strengths_avg": [ 53.4, 23.320377355437454 ], "wc_weaknesses_avg": [ 156.0, 162.32436662436112 ], "wc_questions_avg": [ 84.4, 56.31553959610083 ], "wc_limitations_avg": [ 10.6, 5.9531504264548865 ], "wc_review_avg": [ 373.6, 184.60942554485132 ], "wc_reply_reviewers_avg": [ 166.2, 301.420901730454 ], "wc_reply_authors_avg": [ 265.4, 530.8 ], "reply_reviewers_avg": [ 0.8, 1.1661903789690602 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.21004201260420144, "gs_citation": 299, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5278109333565780800&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": ";meta.com;ttic.edu;;;;;meta.com;meta.com;;", "author_num": 11, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Meta;Toyota Technological Institute at Chicago", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.tti-chicago.org", "aff_unique_abbr": "Meta;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Resilient Constrained Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70814", "id": "h0RVoZuUl6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e32349fe7e3cd4f9ef598c2b7b7a31f4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=h0RVoZuUl6", "openreview": "https://openreview.net/forum?id=h0RVoZuUl6", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70814", "video": "https://nips.cc/virtual/2023/poster/70814", "author_site": "Ignacio Hounie, Alejandro Ribeiro, Luiz F. O. Chamon", "tldr": "", "abstract": "When deploying machine learning solutions, they must satisfy multiple requirements beyond accuracy, such as fairness, robustness, or safety. These requirements are imposed during training either implicitly, using penalties, or explicitly, using constrained optimization methods based on Lagrangian duality. Either way, specifying requirements is hindered by the presence of compromises and limited prior knowledge about the data. Furthermore, their impact on performance can often only be evaluated by actually solving the learning problem. This paper presents a constrained learning approach that adapts the requirements while simultaneously solving the learning task. To do so, it relaxes the learning constraints in a way that contemplates how much they affect the task at hand by balancing the performance gains obtained from the relaxation against a user-defined cost of that relaxation. We call this approach resilient constrained learning after the term used to describe ecological systems that adapt to disruptions by modifying their operation. We show conditions under which this balance can be achieved and introduce a practical algorithm to compute it, for which we derive approximation and generalization guarantees. We showcase the advantages of this resilient learning method in image classification tasks involving multiple potential invariances and in federated learning under distribution shift.", "keywords": "Constrained Learning;Relaxation;Lagrangian duality;Primal-Dual;Machine Learning;Federated Learning;Invariance", "primary_area": "", "supplementary_material": "/attachment/17e255f7b894c1e995d7b4142721956f1f0e2271.zip", "author": "Ignacio Hounie;Alejandro Ribeiro;Luiz F. O. Chamon", "authorids": "~Ignacio_Hounie1;~Alejandro_Ribeiro1;~Luiz_F._O._Chamon1", "gender": ";M;M", "homepage": ";https://alelab.seas.upenn.edu;https://www.luizchamon.com", "dblp": ";32/15;120/6982", "google_scholar": "V0h3OSYAAAAJ;7mrPM4kAAAAJ;https://scholar.google.ca/citations?user=FIm-l-sAAAAJ", "orcid": ";0000-0003-4230-9906;0000-0001-7731-6650", "linkedin": ";;luiz-chamon-abb07a18", "or_profile": "~Ignacio_Hounie1;~Alejandro_Ribeiro1;~Luiz_F._O._Chamon1", "aff": "University of Pennsylvania;University of Pennsylvania;Universit\u00e4t Stuttgart", "aff_domain": "upenn.edu;upenn.edu;uni-stuttgart.de", "position": "PhD student;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nhounie2023resilient,\ntitle={Resilient Constrained Learning},\nauthor={Ignacio Hounie and Alejandro Ribeiro and Luiz F. O. Chamon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=h0RVoZuUl6}\n}", "github": "", "project": "", "reviewers": "69dA;iyNP;4YtT;tedN", "pdf_size": 735322, "rating": "5;5;6;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "2;3;3;3", "wc_summary": "106;107;87;76", "wc_strengths": "86;97;55;60", "wc_weaknesses": "96;121;41;53", "wc_questions": "23;2;49;14", "wc_limitations": "6;1;67;9", "wc_review": "317;328;299;212", "wc_reply_reviewers": "22;0;25;15", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.0, 13.095800853708795 ], "wc_strengths_avg": [ 74.5, 17.528548142958105 ], "wc_weaknesses_avg": [ 77.75, 32.275183965393595 ], "wc_questions_avg": [ 22.0, 17.277152543170995 ], "wc_limitations_avg": [ 20.75, 26.85493436968335 ], "wc_review_avg": [ 289.0, 45.645372164108814 ], "wc_reply_reviewers_avg": [ 15.5, 9.656603957913983 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5511817433657885097&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 8, "email": "upenn.edu;upenn.edu;uni-stuttgart.de", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Pennsylvania;University of Stuttgart", "aff_unique_dep": ";", "aff_unique_url": "https://www.upenn.edu;https://www.uni-stuttgart.de", "aff_unique_abbr": "UPenn;Uni Stuttgart", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Germany" }, { "title": "Improved Convergence in High Probability of Clipped Gradient Methods with Heavy Tailed Noise", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70813", "id": "h1FhXVM0cB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4c454d34f3a4c8d6b4ca85a918e5d7ba-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=h1FhXVM0cB", "openreview": "https://openreview.net/forum?id=h1FhXVM0cB", "poster": "/media/PosterPDFs/NeurIPS%202023/70813.png?t=1701887076.380204", "slides": "https://nips.cc/virtual/2023/poster/70813", "video": "https://nips.cc/virtual/2023/poster/70813", "author_site": "Ta Duy Nguyen, Thien H Nguyen, Alina Ene, Huy Nguyen", "tldr": "", "abstract": "In this work, we study the convergence in high probability of clipped gradient methods when the noise distribution has heavy tails, i.e., with bounded $p$th moments, for some $1>.", "keywords": "decision making;artificial intelligence;reinforcement learning;curiosity;Gumbel MuZero;Java", "primary_area": "", "supplementary_material": "/attachment/3b9ed138fb875a10ae3c2d37b8a8af1fce7eb548.zip", "author": "Matthias Unverzagt", "authorids": "~Matthias_Unverzagt1", "gender": "M", "homepage": "", "dblp": "", "google_scholar": "https://scholar.google.com/citations?hl=de", "orcid": "0009-0006-6685-2208", "linkedin": "matthias-unverzagt-b101993/", "or_profile": "~Matthias_Unverzagt1", "aff": "ENPASOS - Enterprise Patterns & Solutions GmbH", "aff_domain": "enpasos.com", "position": "Researcher", "bibtex": "@misc{\nunverzagt2023agents,\ntitle={Agents Explore the Environment Beyond Good Actions to Improve Their Model for Better Decisions},\nauthor={Matthias Unverzagt},\nyear={2023},\nurl={https://openreview.net/forum?id=iQhtJD1l7C}\n}", "github": "", "project": "", "reviewers": "qakF;8jEf;8rVX;tFVD", "site": "https://openreview.net/forum?id=iQhtJD1l7C", "pdf_size": 835107, "rating": "1;3;3;4", "confidence": "5;4;3;5", "soundness": "1;1;2;3", "novelty": "1;1;2;2", "presentation": "1;2;2;2", "wc_summary": "118;71;34;48", "wc_strengths": "67;22;18;97", "wc_weaknesses": "322;121;44;406", "wc_questions": "66;1;4;82", "wc_limitations": "10;1;24;1", "wc_review": "583;216;124;634", "wc_reply_reviewers": "42;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 2.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 1.75, 0.82915619758885 ], "novelty_avg": [ 1.5, 0.5 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 67.75, 31.877695964420013 ], "wc_strengths_avg": [ 51.0, 32.79481666361317 ], "wc_weaknesses_avg": [ 223.25, 146.4024846100639 ], "wc_questions_avg": [ 38.25, 36.21032311371993 ], "wc_limitations_avg": [ 9.0, 9.40744386111339 ], "wc_review_avg": [ 389.25, 222.38185065333008 ], "wc_reply_reviewers_avg": [ 10.5, 18.186533479473212 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.20751433915982243, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9UnX_fHw6PcJ:scholar.google.com/&scioq=Agents+Explore+the+Environment+Beyond+Good+Actions+to+Improve+Their+Model+for+Better+Decisions&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "ENPASOS - Enterprise Patterns & Solutions GmbH", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "Uncertainty-Aware Alignment Network for Cross-Domain Video-Text Retrieval", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70738", "id": "iQlK3VJxV7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/78526d7ad4a2532bd91416e948b9644c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=iQlK3VJxV7", "openreview": "https://openreview.net/forum?id=iQlK3VJxV7", "poster": "/media/PosterPDFs/NeurIPS%202023/70738.png?t=1697375859.0915961", "slides": "https://nips.cc/virtual/2023/poster/70738", "video": "https://nips.cc/virtual/2023/poster/70738", "author_site": "Xiaoshuai Hao, Wanqian Zhang", "tldr": "", "abstract": "Video-text retrieval is an important but challenging research task in the multimedia community. In this paper, we address the challenge task of Unsupervised Domain Adaptation Video-text Retrieval (UDAVR), assuming that training (source) data and testing (target) data are from different domains. Previous approaches are mostly derived from classification based domain adaptation methods, which are neither multi-modal nor suitable for retrieval task. In addition, as to the pairwise misalignment issue in target domain, i.e., no pairwise annotations between target videos and texts, the existing method assumes that a video corresponds to a text. Yet we empirically find that in the real scene, one text usually corresponds to multiple videos and vice versa. To tackle this one-to-many issue, we propose a novel method named Uncertainty-aware Alignment Network (UAN). Specifically, we first introduce the multimodal mutual information module to balance the minimization of domain shift in a smooth manner. To tackle the multimodal uncertainties pairwise misalignment in target domain, we propose the Uncertainty-aware Alignment Mechanism (UAM) to fully exploit the semantic information of both modalities in target domain. Extensive experiments in the context of domain-adaptive video-text retrieval demonstrate that our proposed method consistently outperforms multiple baselines, showing a superior generalization ability for target data.", "keywords": "video-text retrieval; cross-domain;Unsupervised Domain Adaptation Video-text Retrieval;", "primary_area": "", "supplementary_material": "", "author": "Xiaoshuai Hao;Wanqian Zhang", "authorids": "~Xiaoshuai_Hao1;~Wanqian_Zhang1", "gender": "M;M", "homepage": "https://github.com/haoshuai714;https://people.ucas.ac.cn/~wanqian", "dblp": "271/8403;226/6532", "google_scholar": "https://scholar.google.com.hk/citations?user=ui0lvY4AAAAJ;", "orcid": ";0000-0001-5734-4072", "linkedin": ";", "or_profile": "~Xiaoshuai_Hao1;~Wanqian_Zhang1", "aff": "Institute of Information Engineering\uff0cChinese Academy of Sciences;IIE, CAS", "aff_domain": "iie.ac.cn;iie.ac.cn", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nhao2023uncertaintyaware,\ntitle={Uncertainty-Aware Alignment Network for Cross-Domain Video-Text Retrieval},\nauthor={Xiaoshuai Hao and Wanqian Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=iQlK3VJxV7}\n}", "github": "", "project": "", "reviewers": "dcEM;muWC;NzVt;FYKf;qpQ6", "pdf_size": 8233883, "rating": "5;5;5;6;6", "confidence": "4;4;5;5;5", "soundness": "3;3;3;3;3", "novelty": "3;2;4;3;3", "presentation": "2;2;4;4;3", "wc_summary": "59;49;46;113;78", "wc_strengths": "39;70;41;94;48", "wc_weaknesses": "83;291;24;255;91", "wc_questions": "1;85;314;78;104", "wc_limitations": "1;26;1;41;13", "wc_review": "183;521;426;581;334", "wc_reply_reviewers": "0;462;257;33;22", "wc_reply_authors": "0;660;310;33;34", "reply_reviewers": "0;3;1;1;1", "reply_authors": "1;5;2;2;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 69.0, 24.68197723035981 ], "wc_strengths_avg": [ 58.4, 20.924626639440906 ], "wc_weaknesses_avg": [ 148.8, 104.63727825206463 ], "wc_questions_avg": [ 116.4, 104.85914361656783 ], "wc_limitations_avg": [ 16.4, 15.383107618423528 ], "wc_review_avg": [ 409.0, 140.7963067697445 ], "wc_reply_reviewers_avg": [ 154.8, 179.58329543696428 ], "wc_reply_authors_avg": [ 207.4, 252.5356212497556 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 2.4, 1.3564659966250538 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6666666666666665, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6503250758776203332&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "iie.ac.cn;iie.ac.cn", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Chinese Academy of Sciences;Institute of Electrical Engineers, Chinese Academy of Sciences", "aff_unique_dep": "Institute of Information Engineering;", "aff_unique_url": "http://www.cas.cn;http://www.iie.cas.cn", "aff_unique_abbr": "CAS;IIE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "iRiEpc7jpa", "title": "Low-Rank Learning by Design: the Role of Network Architecture and Activation Linearity in Gradient Rank Collapse", "track": "main", "status": "Reject", "tldr": "", "abstract": "Our understanding of learning dynamics of deep neural networks (DNNs) remains incomplete. Recent research has begun to uncover the mathematical principles underlying these networks, including the phenomenon of ``Neural Collapse'', where linear classifiers within DNNs converge to specific geometrical structures during late-stage training. However, the role of geometric constraints in learning extends beyond this terminal phase. For instance, gradients in fully-connected layers naturally develop a low-rank structure due to the accumulation of rank-one outer products over a training batch. Despite the attention given to methods that exploit this structure for memory saving or regularization, the emergence of low-rank learning as an inherent aspect of certain DNN architectures has been under-explored. In this paper, we conduct a comprehensive study of gradient rank in DNNs, examining how architectural choices and structure of the data affect gradient rank bounds. Our theoretical analysis provides these bounds for training fully-connected, recurrent, and convolutional neural networks. We also demonstrate, both theoretically and empirically, how design choices like activation function linearity, bottleneck layer introduction, convolutional stride, and sequence truncation influence these bounds. Our findings not only contribute to the understanding of learning dynamics in DNNs, but also provide practical guidance for deep learning engineers to make informed design decisions.", "keywords": "deep learning theory;learning dynamics;neural collapse;back-propagation;low-rank;gradient;rank;theory;dynamics", "primary_area": "", "supplementary_material": "/attachment/a7c5f57407e4c172515b63c9ef4a3cc5e30cd7dd.zip", "author": "Bradley Thomas Baker;Barak A. Pearlmutter;Robyn Miller;Vince Calhoun;Sergey Plis", "authorids": "~Bradley_Thomas_Baker1;~Barak_A._Pearlmutter1;~Robyn_Miller1;~Vince_Calhoun1;~Sergey_Plis1", "gender": ";M;;;M", "homepage": "https://bbradt.com;http://barak.pearlmutter.net;;;", "dblp": "171/0946;36/3031;;48/3821.html;07/227", "google_scholar": ";AxFrw0sAAAAJ;zPpJc94AAAAJ;WNOoGKIAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-0521-4553;;;0000-0003-0040-0365", "linkedin": ";barak-pearlmutter-172ba5/;;;sergeyplis/", "or_profile": "~Bradley_Thomas_Baker1;~Barak_A._Pearlmutter1;~Robyn_Miller1;~Vince_Calhoun1;~Sergey_Plis1", "aff": "Georgia Institute of Technology;Maynooth University;Georgia State University;Emory University;Georgia State University", "aff_domain": "gatech.edu;mu.ie;gsu.edu;emory.edu;gsu.edu", "position": "PhD student;Full Professor;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@misc{\nbaker2023lowrank,\ntitle={Low-Rank Learning by Design: the Role of Network Architecture and Activation Linearity in Gradient Rank Collapse},\nauthor={Bradley Thomas Baker and Barak A. Pearlmutter and Robyn Miller and Vince Calhoun and Sergey Plis},\nyear={2023},\nurl={https://openreview.net/forum?id=iRiEpc7jpa}\n}", "github": "", "project": "", "reviewers": "TrYs;nQBm;ciGq;DvNk", "site": "https://openreview.net/forum?id=iRiEpc7jpa", "pdf_size": 3193798, "rating": "3;3;5;6", "confidence": "4;3;2;3", "soundness": "2;2;3;3", "novelty": "1;2;3;3", "presentation": "1;2;3;3", "wc_summary": "59;138;77;145", "wc_strengths": "63;27;87;48", "wc_weaknesses": "203;213;51;45", "wc_questions": "121;41;50;4", "wc_limitations": "110;8;13;1", "wc_review": "556;427;278;243", "wc_reply_reviewers": "80;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 104.75, 37.37897136091361 ], "wc_strengths_avg": [ 56.25, 21.878928218722233 ], "wc_weaknesses_avg": [ 128.0, 80.10617953691214 ], "wc_questions_avg": [ 54.0, 42.3497343557194 ], "wc_limitations_avg": [ 33.0, 44.65982534672521 ], "wc_review_avg": [ 376.0, 124.79382997568429 ], "wc_reply_reviewers_avg": [ 20.0, 34.64101615137755 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5443310539518174, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1100095102688186930&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "Georgia Institute of Technology;Maynooth University;Georgia State University;Emory University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.gatech.edu;https://www.maynoothuniversity.ie;https://www.gsu.edu;https://www.emory.edu", "aff_unique_abbr": "Georgia Tech;MU;GSU;Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Ireland" }, { "title": "A Trichotomy for Transductive Online Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70737", "id": "iSd8g75QvP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3e32af2df2cd13dfbcbe6e8d38111068-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=iSd8g75QvP", "openreview": "https://openreview.net/forum?id=iSd8g75QvP", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70737", "video": "https://nips.cc/virtual/2023/poster/70737", "author_site": "Steve Hanneke, Shay Moran, Jonathan Shafer", "tldr": "", "abstract": "We present new upper and lower bounds on the number of learner mistakes in the `transductive' online learning setting of Ben-David, Kushilevitz and Mansour (1997).\n This setting is similar to standard online learning, except that the adversary fixes a sequence of instances $x_1,\\dots,x_n$ to be labeled at the start of the game, and this sequence is known to the learner.\n Qualitatively, we prove a \\emph{trichotomy}, stating that the minimal number of mistakes made by the learner as $n$ grows can take only one of precisely three possible values: $n$, $\\Theta\\left(\\log (n)\\right)$, or $\\Theta(1)$.\n Furthermore, this behavior is determined by a combination of the VC dimension and the Littlestone dimension.\n Quantitatively, we show a variety of bounds relating the number of mistakes to well-known combinatorial dimensions.\n In particular, we improve the known lower bound on the constant in the $\\Theta(1)$ case from $\\Omega\\left(\\sqrt{\\log(d)}\\right)$ to $\\Omega(\\log(d))$ where $d$ is the Littlestone dimension.\n Finally, we extend our results to cover multiclass classification and the agnostic setting.", "keywords": "Online Learning;Transductive Online Learning;Offline Learning;Mistake Bound", "primary_area": "", "supplementary_material": "/attachment/9f8964d8cb51aec2b5e0786c288b5ca181eeefb5.pdf", "author": "Steve Hanneke;Shay Moran;Jonathan Shafer", "authorids": "~Steve_Hanneke1;~Shay_Moran1;~Jonathan_Shafer1", "gender": "M;M;", "homepage": "http://www.stevehanneke.com;http://www.cs.technion.ac.il/~shaymrn/;", "dblp": "40/154;119/5111;", "google_scholar": "fEhNO7YAAAAJ;kALYnggAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Steve_Hanneke1;~Shay_Moran1;~Jonathan_Shafer1", "aff": "Purdue University;Google;", "aff_domain": "purdue.edu;google.com;", "position": "Assistant Professor;Visiting Faculty;", "bibtex": "@inproceedings{\nhanneke2023a,\ntitle={A Trichotomy for Transductive Online Learning},\nauthor={Steve Hanneke and Shay Moran and Jonathan Shafer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=iSd8g75QvP}\n}", "github": "", "project": "", "reviewers": "GnLU;gZ3U;e1Y3;Hibd;a8Lo", "pdf_size": 415688, "rating": "5;5;6;7;7", "confidence": "3;4;3;4;4", "soundness": "4;4;3;4;3", "novelty": "2;3;1;2;3", "presentation": "4;4;2;3;3", "wc_summary": "158;187;62;83;91", "wc_strengths": "71;76;10;44;174", "wc_weaknesses": "165;141;149;369;243", "wc_questions": "71;21;20;143;102", "wc_limitations": "1;1;7;1;2", "wc_review": "466;426;248;640;612", "wc_reply_reviewers": "62;50;49;194;487", "wc_reply_authors": "45;24;375;185;118", "reply_reviewers": "1;1;2;2;2", "reply_authors": "3;3;4;4;3", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 116.2, 47.82217059063714 ], "wc_strengths_avg": [ 75.0, 54.779558231150425 ], "wc_weaknesses_avg": [ 213.4, 85.81281955512242 ], "wc_questions_avg": [ 71.4, 47.424044534392046 ], "wc_limitations_avg": [ 2.4, 2.33238075793812 ], "wc_review_avg": [ 478.4, 141.38401606970993 ], "wc_reply_reviewers_avg": [ 168.4, 168.37885853039865 ], "wc_reply_authors_avg": [ 149.4, 126.31167800326304 ], "reply_reviewers_avg": [ 1.6, 0.4898979485566356 ], "reply_authors_avg": [ 3.4, 0.4898979485566356 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.45643546458763845, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12545170525774618431&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "purdue.edu;google.com;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Purdue University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.purdue.edu;https://www.google.com", "aff_unique_abbr": "Purdue;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Adversarial Training from Mean Field Perspective", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70736", "id": "iT9MOAZqsb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/edcd1aa172dceda2ea9d45a48f25d3e3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=iT9MOAZqsb", "openreview": "https://openreview.net/forum?id=iT9MOAZqsb", "poster": "/media/PosterPDFs/NeurIPS%202023/70736.png?t=1701184767.4500334", "slides": "https://nips.cc/virtual/2023/poster/70736", "video": "https://nips.cc/virtual/2023/poster/70736", "author_site": "Soichiro Kumano, Hiroshi Kera, Toshihiko Yamasaki", "tldr": "", "abstract": "Although adversarial training is known to be effective against adversarial examples, training dynamics are not well understood. In this study, we present the first theoretical analysis of adversarial training in random deep neural networks without any assumptions on data distributions. We introduce a new theoretical framework based on mean field theory, which addresses the limitations of existing mean field-based approaches. Based on the framework, we derive the (empirically tight) upper bounds of $\\ell_q$ norm-based adversarial loss with $\\ell_p$ norm-based adversarial examples for various values of $p$ and $q$. Moreover, we prove that networks without shortcuts are generally not adversarially trainable and that adversarial training reduces network capacity. We also show that the network width alleviates these issues. Furthermore, the various impacts of input and output dimensions on the upper bounds and time evolution of weight variance are presented.", "keywords": "adversarial training; mean field theory", "primary_area": "", "supplementary_material": "", "author": "Soichiro Kumano;Hiroshi Kera;Toshihiko Yamasaki", "authorids": "~Soichiro_Kumano1;~Hiroshi_Kera1;~Toshihiko_Yamasaki1", "gender": "M;M;M", "homepage": "https://s-kumano.github.io/;;http://www.cvm.t.u-tokyo.ac.jp/en/", "dblp": "280/3037;190/2671;81/881", "google_scholar": ";https://scholar.google.co.jp/citations?user=M4Krt5gAAAAJ;rE9iY5MAAAAJ", "orcid": ";;0000-0002-1784-2314", "linkedin": ";;", "or_profile": "~Soichiro_Kumano1;~Hiroshi_Kera1;~Toshihiko_Yamasaki1", "aff": "The University of Tokyo;Chiba University;The University of Tokyo", "aff_domain": "g.ecc.u-tokyo.ac.jp;chiba-u.jp;u-tokyo.ac.jp", "position": "MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nkumano2023adversarial,\ntitle={Adversarial Training from Mean Field Perspective},\nauthor={Soichiro Kumano and Hiroshi Kera and Toshihiko Yamasaki},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=iT9MOAZqsb}\n}", "github": "", "project": "", "reviewers": "CfQ9;55dm;e8jY;QtQE", "pdf_size": 2823545, "rating": "6;6;7;8", "confidence": "3;2;3;4", "soundness": "3;3;4;4", "novelty": "3;2;4;4", "presentation": "2;3;4;4", "wc_summary": "54;43;82;78", "wc_strengths": "106;33;49;53", "wc_weaknesses": "858;30;56;262", "wc_questions": "2;21;59;3", "wc_limitations": "2;4;1;51", "wc_review": "1022;131;247;447", "wc_reply_reviewers": "22;0;0;23", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 64.25, 16.284578594486256 ], "wc_strengths_avg": [ 60.25, 27.453369556395078 ], "wc_weaknesses_avg": [ 301.5, 333.62965995246884 ], "wc_questions_avg": [ 21.25, 23.069189409253198 ], "wc_limitations_avg": [ 14.5, 21.10094784600919 ], "wc_review_avg": [ 461.75, 342.6407557486412 ], "wc_reply_reviewers_avg": [ 11.25, 11.255554184490428 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13628209105902401386&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "email": "g.ecc.u-tokyo.ac.jp;chiba-u.jp;u-tokyo.ac.jp", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Tokyo;Chiba University", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.chiba-u.ac.jp", "aff_unique_abbr": "UTokyo;Chiba U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "title": "On the Identifiability and Interpretability of Gaussian Process Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70735", "id": "iVYInarGXg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dea2b4f9012686bcc1f59a62bcd28158-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=iVYInarGXg", "openreview": "https://openreview.net/forum?id=iVYInarGXg", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70735", "video": "https://nips.cc/virtual/2023/poster/70735", "author_site": "Jiawen Chen, Wancen Mu, Yun Li, Didong Li", "tldr": "", "abstract": "In this paper, we critically examine the prevalent practice of using additive mixtures of Mat\\'ern kernels in single-output Gaussian process (GP) models and explore the properties of multiplicative mixtures of Mat\\'ern kernels for multi-output GP models. For the single-output case, we derive a series of theoretical results showing that the smoothness of a mixture of Mat\\'ern kernels is determined by the least smooth component and that a GP with such a kernel is effectively equivalent to the least smooth kernel component. Furthermore, we demonstrate that none of the mixing weights or parameters within individual kernel components are identifiable. We then turn our attention to multi-output GP models and analyze the identifiability of the covariance matrix $A$ in the multiplicative kernel $K(x,y) = AK_0(x,y)$, where $K_0$ is a standard single output kernel such as Mat\\'ern. We show that $A$ is identifiable up to a multiplicative constant, suggesting that multiplicative mixtures are well suited for multi-output tasks. Our findings are supported by extensive simulations and real applications for both single- and multi-output settings. This work provides insight into kernel selection and interpretation for GP models, emphasizing the importance of choosing appropriate kernel structures for different tasks.", "keywords": "Gaussian process;Identifiability;Interpretability;Mixture kernel;Separable kernel", "primary_area": "", "supplementary_material": "/attachment/451b3472e3b356a69c1a434c1575d7ea2d3bbe9c.pdf", "author": "Jiawen Chen;Wancen Mu;Yun Li;Didong Li", "authorids": "~Jiawen_Chen4;~Wancen_Mu1;~Yun_Li7;~Didong_Li1", "gender": "F;F;Not Specified;", "homepage": "https://github.com/JiawenChenn;;https://yunliweb.its.unc.edu;https://sites.google.com/view/didongli/", "dblp": "04/6087;321/4392;;211/6299", "google_scholar": "_5IUKh0AAAAJ;mw7wEcsAAAAJ;https://scholar.google.com/citations?hl=en;YBVhMxoAAAAJ", "orcid": "0000-0002-6193-534X;0000-0002-5061-7581;0000-0002-9275-4189;0000-0001-9146-705X", "linkedin": ";wancen-mu/;;", "or_profile": "~Jiawen_Chen4;~Wancen_Mu1;~Yun_Li7;~Didong_Li1", "aff": "University of North Carolina at Chapel Hill;University of North Carolina, Chapel Hill;University of North Carolina at Chapel Hill;University of North Carolina at Chapel Hill", "aff_domain": "unc.edu;unc.edu;unc.edu;unc.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2023on,\ntitle={On the Identifiability and Interpretability of Gaussian Process Models},\nauthor={Jiawen Chen and Wancen Mu and Yun Li and Didong Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=iVYInarGXg}\n}", "github": "", "project": "", "reviewers": "LXB2;j39y;dNVU;PY7Z", "pdf_size": 1346746, "rating": "3;5;6;7", "confidence": "3;3;3;3", "soundness": "2;2;3;3", "novelty": "1;3;3;3", "presentation": "1;3;3;3", "wc_summary": "117;112;59;108", "wc_strengths": "65;159;118;42", "wc_weaknesses": "429;1474;72;45", "wc_questions": "19;332;281;136", "wc_limitations": "19;2;15;39", "wc_review": "649;2079;545;370", "wc_reply_reviewers": "438;2576;5;85", "wc_reply_authors": "1920;4544;758;708", "reply_reviewers": "1;7;1;1", "reply_authors": "4;10;2;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 99.0, 23.313086453749534 ], "wc_strengths_avg": [ 96.0, 45.63441683641854 ], "wc_weaknesses_avg": [ 505.0, 579.6175463182598 ], "wc_questions_avg": [ 192.0, 123.07111765154325 ], "wc_limitations_avg": [ 18.75, 13.273563952458284 ], "wc_review_avg": [ 910.75, 681.8182950757482 ], "wc_reply_reviewers_avg": [ 776.0, 1051.9227633243802 ], "wc_reply_authors_avg": [ 1982.5, 1556.353028718099 ], "reply_reviewers_avg": [ 2.5, 2.598076211353316 ], "reply_authors_avg": [ 4.5, 3.278719262151 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9402829647354479105&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "unc.edu;unc.edu;unc.edu;unc.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of North Carolina", "aff_unique_dep": "", "aff_unique_url": "https://www.unc.edu", "aff_unique_abbr": "UNC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Provable benefits of annealing for estimating normalizing constants: Importance Sampling, Noise-Contrastive Estimation, and beyond", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70734", "id": "iWGC0Nsq9i", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/90080022263cddafddd4a0726f1fb186-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=iWGC0Nsq9i", "openreview": "https://openreview.net/forum?id=iWGC0Nsq9i", "poster": "/media/PosterPDFs/NeurIPS%202023/70734.png?t=1702229402.182562", "slides": "https://nips.cc/virtual/2023/poster/70734", "video": "https://nips.cc/virtual/2023/poster/70734", "author_site": "Omar Chehab, Aapo Hyvarinen, Andrej Risteski", "tldr": "", "abstract": "Recent research has developed several Monte Carlo methods for estimating the normalization constant (partition function) based on the idea of annealing. This means sampling successively from a path of distributions which interpolate between a tractable \"proposal\" distribution and the unnormalized \"target\" distribution. Prominent estimators in this family include annealed importance sampling and annealed noise-contrastive estimation (NCE). Such methods hinge on a number of design choices: which estimator to use, which path of distributions to use and whether to use a path at all; so far, there is no definitive theory on which choices are efficient. Here, we evaluate each design choice by the asymptotic estimation error it produces. First, we show that using NCE is more efficient than the importance sampling estimator, but in the limit of infinitesimal path steps, the difference vanishes. Second, we find that using the geometric path brings down the estimation error from an exponential to a polynomial function of the parameter distance between the target and proposal distributions. Third, we find that the arithmetic path, while rarely used, can offer optimality properties over the universally-used geometric path. In fact, in a particular limit, the optimal path is arithmetic. Based on this theory, we finally propose a two-step estimator to approximate the optimal path in an efficient way.", "keywords": "noise-contrastive estimation;annealed importance sampling", "primary_area": "", "supplementary_material": "/attachment/09061c11b23ead1fe381c0c1f1f847595f60a4a0.pdf", "author": "Omar Chehab;Aapo Hyvarinen;Andrej Risteski", "authorids": "~Omar_Chehab1;~Aapo_Hyvarinen1;~Andrej_Risteski2", "gender": ";;M", "homepage": ";https://www.cs.helsinki.fi/u/ahyvarin/;", "dblp": "271/4406;56/3623;63/11143", "google_scholar": "P0nFnfAAAAAJ;https://scholar.google.co.jp/citations?user=UnrY-40AAAAJ;", "orcid": ";0000-0002-5806-4432;", "linkedin": ";;", "or_profile": "~Omar_Chehab1;~Aapo_Hyvarinen1;~Andrej_Risteski2", "aff": "INRIA;University of Helsinki;Carnegie Mellon University", "aff_domain": "inria.fr;helsinki.fi;cmu.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchehab2023provable,\ntitle={Provable benefits of annealing for estimating normalizing constants: Importance Sampling, Noise-Contrastive Estimation, and beyond},\nauthor={Omar Chehab and Aapo Hyvarinen and Andrej Risteski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=iWGC0Nsq9i}\n}", "github": "", "project": "", "reviewers": "EpxH;q5r2;peRm;6Yhf", "pdf_size": 525972, "rating": "5;6;8;8", "confidence": "2;3;3;3", "soundness": "2;4;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "59;368;85;380", "wc_strengths": "67;56;60;183", "wc_weaknesses": "238;97;212;120", "wc_questions": "365;42;131;86", "wc_limitations": "1;2;1;117", "wc_review": "730;565;489;886", "wc_reply_reviewers": "467;64;296;0", "wc_reply_authors": "28;112;453;0", "reply_reviewers": "1;1;2;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 223.0, 151.33902338788894 ], "wc_strengths_avg": [ 91.5, 52.974050251042726 ], "wc_weaknesses_avg": [ 166.75, 59.52887954598172 ], "wc_questions_avg": [ 156.0, 124.70164393463304 ], "wc_limitations_avg": [ 30.25, 50.08679965819338 ], "wc_review_avg": [ 667.5, 153.3109585124299 ], "wc_reply_reviewers_avg": [ 206.75, 186.29194158631768 ], "wc_reply_authors_avg": [ 148.25, 180.7102307563133 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7777777777777777, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14544472219933691141&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "inria.fr;helsinki.fi;cmu.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "INRIA;University of Helsinki;Carnegie Mellon University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.inria.fr;https://www.helsinki.fi;https://www.cmu.edu", "aff_unique_abbr": "INRIA;UH;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "France;Finland;United States" }, { "title": "PCF-GAN: generating sequential data via the characteristic function of measures on the path space", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70733", "id": "iWWLgcUTZU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7d0e867582cdc156fd280d5a6aa1be08-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=iWWLgcUTZU", "openreview": "https://openreview.net/forum?id=iWWLgcUTZU", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70733", "video": "https://nips.cc/virtual/2023/poster/70733", "author_site": "Hang Lou, Siran Li, Hao Ni", "tldr": "", "abstract": "Generating high-fidelity time series data using generative adversarial networks (GANs) remains a challenging task, as it is difficult to capture the temporal dependence of joint probability distributions induced by time-series data. Towards this goal, a key step is the development of an effective discriminator to distinguish between time series distributions. We propose the so-called PCF-GAN, a novel GAN that incorporates the path characteristic function (PCF) as the principled representation of time series distribution into the discriminator to enhance its generative performance. On the one hand, we establish theoretical foundations of the PCF distance by proving its characteristicity, boundedness, differentiability with respect to generator parameters, and weak continuity, which ensure the stability and feasibility of training the PCF-GAN. On the other hand, we design efficient initialisation and optimisation schemes for PCFs to strengthen the discriminative power and accelerate training efficiency. To further boost the capabilities of complex time series generation, we integrate the auto-encoder structure via sequential embedding into the PCF-GAN, which provides additional reconstruction functionality. Extensive numerical experiments on various datasets demonstrate the consistently superior performance of PCF-GAN over state-of-the-art baselines, in both generation and reconstruction quality.", "keywords": "Generative adversarial networks;time series generation;rough path theory;Lie group", "primary_area": "", "supplementary_material": "", "author": "Hang Lou;Siran Li;Hao Ni", "authorids": "~Hang_Lou1;~Siran_Li1;~Hao_Ni2", "gender": "M;M;F", "homepage": "https://hanglou.github.io/;;https://iris.ucl.ac.uk/iris/browse/profile?upi=HNIXX56", "dblp": ";;", "google_scholar": "7QLW6sMAAAAJ;;https://scholar.google.co.uk/citations?user=VTTtSLcAAAAJ", "orcid": "0000-0001-7058-0029;0000-0003-4283-273X;0000-0001-5485-4376", "linkedin": ";;", "or_profile": "~Hang_Lou1;~Siran_Li1;~Hao_Ni2", "aff": "University College London, University of London;Shanghai Jiaotong University;University College London", "aff_domain": "ucl.ac.uk;sjtu.edu.cn;ucl.ac.uk", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nlou2023pcfgan,\ntitle={{PCF}-{GAN}: generating sequential data via the characteristic function of measures on the path space},\nauthor={Hang Lou and Siran Li and Hao Ni},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=iWWLgcUTZU}\n}", "github": "", "project": "", "reviewers": "9Utd;EJ4R;BgyV;cYLd;EqQ9", "pdf_size": 4948463, "rating": "4;5;5;5;7", "confidence": "3;2;4;3;2", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;2;3;3", "wc_summary": "69;77;106;55;126", "wc_strengths": "28;59;36;79;41", "wc_weaknesses": "123;73;273;180;97", "wc_questions": "3;1;49;20;17", "wc_limitations": "3;25;4;9;43", "wc_review": "226;235;468;343;324", "wc_reply_reviewers": "0;17;55;0;0", "wc_reply_authors": "0;7;140;0;0", "reply_reviewers": "0;1;1;0;0", "reply_authors": "1;2;2;1;1", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 86.6, 25.803875677889938 ], "wc_strengths_avg": [ 48.6, 18.2931681236466 ], "wc_weaknesses_avg": [ 149.2, 71.40420155705125 ], "wc_questions_avg": [ 18.0, 17.204650534085253 ], "wc_limitations_avg": [ 16.8, 15.289211882893113 ], "wc_review_avg": [ 319.2, 87.75739285097296 ], "wc_reply_reviewers_avg": [ 14.4, 21.341040274550817 ], "wc_reply_authors_avg": [ 29.4, 55.36641581319853 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.49099025303098287, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14107938343514502724&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ucl.ac.uk;sjtu.edu.cn;ucl.ac.uk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University College London;Shanghai Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://www.sjtu.edu.cn", "aff_unique_abbr": "UCL;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;China" }, { "title": "Similarity-based cooperative equilibrium", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70732", "id": "ia4AL3QnOv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4d0b6303d4a4811445f69f357bf6def5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ia4AL3QnOv", "openreview": "https://openreview.net/forum?id=ia4AL3QnOv", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70732", "video": "https://nips.cc/virtual/2023/poster/70732", "author_site": "Caspar Oesterheld, Johannes Treutlein, Roger Grosse, Vincent Conitzer, Jakob Foerster", "tldr": "", "abstract": "As machine learning agents act more autonomously in the world, they will increasingly interact with each other. Unfortunately, in many social dilemmas like the one-shot Prisoner\u2019s Dilemma, standard game theory predicts that ML agents will fail to cooperate with each other. Prior work has shown that one way to enable cooperative outcomes in the one-shot Prisoner\u2019s Dilemma is to make the agents mutually transparent to each other, i.e., to allow them to access one another\u2019s source code (Rubinstein, 1998; Tennenholtz, 2004) \u2013 or weights in the case of ML agents. However, full transparency is often unrealistic, whereas partial transparency is commonplace. Moreover, it is challenging for agents to learn their way to cooperation in the full transparency setting. In this paper, we introduce a more realistic setting in which agents only observe a single number indicating how similar they are to each other. We prove that this allows for the same set of cooperative outcomes as the full transparency setting. We also demonstrate experimentally that cooperation can be learned using simple ML methods.", "keywords": "Program equilibrium;multi-agent learning;game theory;opponent shaping;superrationality;decision theory;cooperative AI;Newcomb's problem", "primary_area": "", "supplementary_material": "/attachment/a9762d5d7aeb23f902b4585b2d7a2e8983a38a93.zip", "author": "Caspar Oesterheld;Johannes Treutlein;Roger Baker Grosse;Vincent Conitzer;Jakob Nicolaus Foerster", "authorids": "~Caspar_Oesterheld1;~Johannes_Treutlein1;~Roger_Baker_Grosse1;~Vincent_Conitzer2;~Jakob_Nicolaus_Foerster1", "gender": "M;;M;M;M", "homepage": "https://www.andrew.cmu.edu/user/coesterh/;;http://www.cs.toronto.edu/~rgrosse/;https://www.cs.cmu.edu/~conitzer/;https://www.jakobfoerster.com", "dblp": "162/0000;;26/7058;c/VincentConitzer;176/5095", "google_scholar": "xeEcRjkAAAAJ;;xgQd1qgAAAAJ;juRk4lQAAAAJ;6z4lQzMAAAAJ", "orcid": "0000-0003-4222-7855;;;0000-0003-1899-7884;", "linkedin": ";;;vincent-conitzer-2563082/;", "or_profile": "~Caspar_Oesterheld1;~Johannes_Treutlein1;~Roger_Baker_Grosse1;~Vincent_Conitzer2;~Jakob_Nicolaus_Foerster1", "aff": "School of Computer Science, Carnegie Mellon University;;Vector Institute;University of Oxford;University of Oxford, University of Oxford", "aff_domain": "cs.cmu.edu;;vectorinstitute.ai;oxford.ac.uk;eng.ox.ac.uk", "position": "PhD student;;Faculty Member;Full Professor;Associate Professor", "bibtex": "@inproceedings{\noesterheld2023similaritybased,\ntitle={Similarity-based cooperative equilibrium},\nauthor={Caspar Oesterheld and Johannes Treutlein and Roger Baker Grosse and Vincent Conitzer and Jakob Nicolaus Foerster},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ia4AL3QnOv}\n}", "github": "", "project": "", "reviewers": "xnVF;EVhh;2ynN;ty7R", "pdf_size": 1667302, "rating": "4;6;6;8", "confidence": "3;4;3;4", "soundness": "3;3;3;4", "novelty": "2;3;2;4", "presentation": "3;4;4;4", "wc_summary": "69;234;127;86", "wc_strengths": "43;129;127;72", "wc_weaknesses": "132;66;157;38", "wc_questions": "24;144;8;23", "wc_limitations": "79;57;1;33", "wc_review": "347;630;420;252", "wc_reply_reviewers": "17;126;4;2", "wc_reply_authors": "0;771;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 129.0, 64.1833311693932 ], "wc_strengths_avg": [ 92.75, 36.71767285654144 ], "wc_weaknesses_avg": [ 98.25, 48.11639533464659 ], "wc_questions_avg": [ 49.75, 54.78309502027062 ], "wc_limitations_avg": [ 42.5, 28.96118091514916 ], "wc_review_avg": [ 412.25, 139.11573419279358 ], "wc_reply_reviewers_avg": [ 37.25, 51.56246212119821 ], "wc_reply_authors_avg": [ 192.75, 333.8527931589011 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13768034560462152950&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "cs.cmu.edu;;vectorinstitute.ai;oxford.ac.uk;eng.ox.ac.uk", "author_num": 5, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Carnegie Mellon University;Vector Institute;University of Oxford", "aff_unique_dep": "School of Computer Science;;", "aff_unique_url": "https://www.cmu.edu;https://vectorinstitute.ai/;https://www.ox.ac.uk", "aff_unique_abbr": "CMU;Vector Institute;Oxford", "aff_campus_unique_index": "0", "aff_campus_unique": "Pittsburgh;", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "United States;Canada;United Kingdom" }, { "title": "DELIFFAS: Deformable Light Fields for Fast Avatar Synthesis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70731", "id": "iajxrSgOSX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/805c06617d2b643278936daadfde4280-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=iajxrSgOSX", "openreview": "https://openreview.net/forum?id=iajxrSgOSX", "poster": "/media/PosterPDFs/NeurIPS%202023/70731.png?t=1702207671.5144916", "slides": "https://nips.cc/virtual/2023/poster/70731", "video": "https://nips.cc/virtual/2023/poster/70731", "author_site": "Youngjoong Kwon, Lingjie Liu, Henry Fuchs, Marc Habermann, Christian Theobalt", "tldr": "", "abstract": "Generating controllable and photorealistic digital human avatars is a long-standing and important problem in Vision and Graphics. Recent methods have shown great progress in terms of either photorealism or inference speed while the combination of the two desired properties still remains unsolved. To this end, we propose a novel method, called DELIFFAS, which parameterizes the appearance of the human as a surface light field that is attached to a controllable and deforming human mesh model. At the core, we represent the light field around the human with a deformable two-surface parameterization, which enables fast and accurate inference of the human appearance. This allows perceptual supervision on the full image compared to previous approaches that could only supervise individual pixels or small patches due to their slow runtime. Our carefully designed human representation and supervision strategy leads to state-of-the-art synthesis results and inference time. The video results and code are available at https://vcai.mpi-inf.mpg.de/projects/DELIFFAS.", "keywords": "DELIFFAS: Avatar Modeling;Avatar Synthesis;Animatable Human;Light Fields;Human Performance Capture", "primary_area": "", "supplementary_material": "/attachment/0d762066f2aff2fb0f75902be1079ad31c7b8996.zip", "author": "YoungJoong Kwon;Lingjie Liu;Henry Fuchs;Marc Habermann;Christian Theobalt", "authorids": "~YoungJoong_Kwon1;~Lingjie_Liu1;~Henry_Fuchs1;~Marc_Habermann1;~Christian_Theobalt2", "gender": ";F;M;M;M", "homepage": ";https://lingjie0206.github.io/;http://www.cs.unc.edu/~fuchs/;https://people.mpi-inf.mpg.de/~mhaberma/;https://www.mpi-inf.mpg.de/~theobalt/", "dblp": ";204/0052;f/HenryFuchs;227/2744;55/3346", "google_scholar": ";https://scholar.google.de/citations?user=HZPnJ9gAAAAJ;https://scholar.google.com.tw/citations?user=guhwcP8AAAAJ;oWstvNcAAAAJ;https://scholar.google.com.tw/citations?user=eIWg8NMAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~YoungJoong_Kwon1;~Lingjie_Liu1;~Henry_Fuchs1;~Marc_Habermann1;~Christian_Theobalt2", "aff": ";Saarland Informatics Campus, Max-Planck Institute;University of North Carolina, Chapel Hill;Saarland Informatics Campus, Max-Planck Institute;Max-Planck-Institute for Informatics, Saarland Informatics Campus", "aff_domain": ";mpi-inf.mpg.de;cs.unc.edu;mpi-inf.mpg.de;mpi-inf.mpg.de", "position": ";Postdoc;Full Professor;Principal Researcher;Director", "bibtex": "@inproceedings{\nkwon2023deliffas,\ntitle={{DELIFFAS}: Deformable Light Fields for Fast Avatar Synthesis},\nauthor={YoungJoong Kwon and Lingjie Liu and Henry Fuchs and Marc Habermann and Christian Theobalt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=iajxrSgOSX}\n}", "github": "", "project": "", "reviewers": "LHHA;dLaT;qBLT;cQc3;QJHp", "pdf_size": 20051515, "rating": "5;5;6;6;7", "confidence": "4;5;4;3;5", "soundness": "3;3;3;2;3", "novelty": "3;2;3;2;3", "presentation": "3;3;2;2;3", "wc_summary": "43;84;84;96;123", "wc_strengths": "58;87;103;86;110", "wc_weaknesses": "148;398;183;138;93", "wc_questions": "17;1;107;102;112", "wc_limitations": "1;13;30;6;61", "wc_review": "267;583;507;428;499", "wc_reply_reviewers": "260;37;106;0;17", "wc_reply_authors": "468;0;43;0;0", "reply_reviewers": "2;1;1;0;1", "reply_authors": "3;1;2;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 86.0, 25.791471458604295 ], "wc_strengths_avg": [ 88.8, 17.948816116947658 ], "wc_weaknesses_avg": [ 192.0, 106.92988356862641 ], "wc_questions_avg": [ 67.8, 48.379334431139085 ], "wc_limitations_avg": [ 22.2, 21.738445206591937 ], "wc_review_avg": [ 456.8, 106.84643185432071 ], "wc_reply_reviewers_avg": [ 84.0, 95.09363806270112 ], "wc_reply_authors_avg": [ 102.2, 183.65663614473615 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0714285714285715, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18053954461040766563&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": ";mpi-inf.mpg.de;cs.unc.edu;mpi-inf.mpg.de;mpi-inf.mpg.de", "author_num": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Max-Planck Institute;University of North Carolina;Max-Planck-Institute for Informatics", "aff_unique_dep": "Informatics;;", "aff_unique_url": "https://www.mpi-sws.org;https://www.unc.edu;https://mpi-inf.mpg.de", "aff_unique_abbr": "MPI-SWS;UNC;MPII", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Saarland;Chapel Hill", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Germany;United States" }, { "title": "Interpretable Prototype-based Graph Information Bottleneck", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70730", "id": "icWwBKyVMs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f224f056694bcfe465c5d84579785761-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=icWwBKyVMs", "openreview": "https://openreview.net/forum?id=icWwBKyVMs", "poster": "/media/PosterPDFs/NeurIPS%202023/70730.png?t=1701952512.922482", "slides": "https://nips.cc/virtual/2023/poster/70730", "video": "https://nips.cc/virtual/2023/poster/70730", "author_site": "Sangwoo Seo, Sungwon Kim, Chanyoung Park", "tldr": "", "abstract": "The success of Graph Neural Networks (GNNs) has led to a need for understanding their decision-making process and providing explanations for their predictions, which has given rise to explainable AI (XAI) that offers transparent explanations for black-box models. Recently, the use of prototypes has successfully improved the explainability of models by learning prototypes to imply training graphs that affect the prediction. However, these approaches tend to provide prototypes with excessive information from the entire graph, leading to the exclusion of key substructures or the inclusion of irrelevant substructures, which can limit both the interpretability and the performance of the model in downstream tasks. In this work, we propose a novel framework of explainable GNNs, called interpretable Prototype-based Graph Information Bottleneck (PGIB) that incorporates prototype learning within the information bottleneck framework to provide prototypes with the key subgraph from the input graph that is important for the model prediction. This is the first work that incorporates prototype learning into the process of identifying the key subgraphs that have a critical impact on the prediction performance. Extensive experiments, including qualitative analysis, demonstrate that PGIB outperforms state-of-the-art methods in terms of both prediction performance and explainability.", "keywords": "Graph neural network;Explainable AI;Interpretability", "primary_area": "", "supplementary_material": "/attachment/85346a771732cbd3c5fa1771e000f05ac0fc3f00.pdf", "author": "Sangwoo Seo;Sungwon Kim;Chanyoung Park", "authorids": "~Sangwoo_Seo1;~Sungwon_Kim3;~Chanyoung_Park1", "gender": "M;M;M", "homepage": "https://github.com/tkddn8974;https://sung-won-kim.github.io;https://dsail.kaist.ac.kr/", "dblp": "234/8568;59/5163-2;170/5430.html", "google_scholar": ";https://scholar.google.co.kr/citations?hl=ko;lWk2LtQAAAAJ", "orcid": ";0000-0001-8605-2618;0000-0002-5957-5816", "linkedin": ";sungwon-kim/;", "or_profile": "~Sangwoo_Seo1;~Sungwon_Kim3;~Chanyoung_Park1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.edu;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nseo2023interpretable,\ntitle={Interpretable Prototype-based Graph Information Bottleneck},\nauthor={Sangwoo Seo and Sungwon Kim and Chanyoung Park},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=icWwBKyVMs}\n}", "github": "", "project": "", "reviewers": "qf5C;DMtB;Q5w4;fJjc", "pdf_size": 1431737, "rating": "5;6;6;6", "confidence": "4;4;3;3", "soundness": "3;2;2;3", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "36;86;80;36", "wc_strengths": "38;74;59;54", "wc_weaknesses": "133;185;109;120", "wc_questions": "99;84;124;3", "wc_limitations": "4;9;16;11", "wc_review": "310;438;388;224", "wc_reply_reviewers": "126;26;74;15", "wc_reply_authors": "485;0;494;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 59.5, 23.595550427993835 ], "wc_strengths_avg": [ 56.25, 12.852528934026953 ], "wc_weaknesses_avg": [ 136.75, 29.123658767400773 ], "wc_questions_avg": [ 77.5, 45.32383478921438 ], "wc_limitations_avg": [ 10.0, 4.301162633521313 ], "wc_review_avg": [ 340.0, 81.03085831953157 ], "wc_reply_reviewers_avg": [ 60.25, 43.968028156832325 ], "wc_reply_authors_avg": [ 244.75, 244.77068349784048 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11439946990543128822&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "kaist.edu;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "EvoPrompting: Language Models for Code-Level Neural Architecture Search", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70729", "id": "ifbF4WdT8f", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/184c1e18d00d7752805324da48ad25be-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ifbF4WdT8f", "openreview": "https://openreview.net/forum?id=ifbF4WdT8f", "poster": "/media/PosterPDFs/NeurIPS%202023/70729.png?t=1699629833.4495702", "slides": "https://nips.cc/virtual/2023/poster/70729", "video": "https://nips.cc/virtual/2023/poster/70729", "author_site": "Angelica Chen, David Dohan, David Dohan, David So", "tldr": "", "abstract": "Given the recent impressive accomplishments of language models (LMs) for code generation, we explore the use of LMs as general adaptive mutation and crossover operators for an evolutionary neural architecture search (NAS) algorithm.\nWhile NAS still proves too difficult a task for LMs to succeed at solely through prompting, we find that the combination of evolutionary prompt engineering with soft prompt-tuning, a method we term EvoPrompting, consistently finds diverse and high performing models. We first demonstrate that EvoPrompting is effective on the computationally efficient MNIST-1D dataset, where EvoPrompting produces convolutional architecture variants that outperform both those designed by human experts and naive few-shot prompting in terms of accuracy and model size. We then apply our method to searching for graph neural networks on the CLRS Algorithmic Reasoning Benchmark, where EvoPrompting is able to design *novel* architectures that outperform current state-of-the-art models on 21 out of 30 algorithmic reasoning tasks while maintaining similar model size. EvoPrompting is successful at designing accurate and efficient neural network architectures across a variety of machine learning tasks, while also being general enough for easy adaptation to other tasks beyond neural network design.", "keywords": "language models;evolution;prompting;neural architecture search;code generation", "primary_area": "", "supplementary_material": "", "author": "Angelica Chen;David Dohan;David So", "authorids": "~Angelica_Chen1;~David_Dohan1;~David_So1", "gender": "F;M;M", "homepage": ";https://www.davidrso.com/;http://www.ddohan.com", "dblp": "241/5892;;172/2151.html", "google_scholar": "QbW4GSwAAAAJ;;iZ5cY0AAAAAJ", "orcid": ";;", "linkedin": ";;ddohan", "or_profile": "~Angelica_Chen1;~David_So1;~David_Dohan2", "aff": "New York University;Google DeepMind;Research, Google", "aff_domain": "nyu.edu;google.com;research.google.com", "position": "PhD student;Research Engineer;Researcher", "bibtex": "@inproceedings{\nchen2023evoprompting,\ntitle={EvoPrompting: Language Models for Code-Level Neural Architecture Search},\nauthor={Angelica Chen and David Dohan and David So},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ifbF4WdT8f}\n}", "github": "", "project": "", "reviewers": "ziXe;xBLj;HTgv;VcYp;fsDm", "pdf_size": 1045964, "rating": "4;4;7;7;7", "confidence": "5;4;4;4;3", "soundness": "1;3;2;4;3", "novelty": "1;2;2;3;4", "presentation": "2;4;2;4;4", "wc_summary": "100;47;165;59;90", "wc_strengths": "9;54;94;184;67", "wc_weaknesses": "104;55;151;43;199", "wc_questions": "121;61;70;22;5", "wc_limitations": "2;27;1;1;1", "wc_review": "336;244;481;309;362", "wc_reply_reviewers": "0;72;31;0;18", "wc_reply_authors": "170;277;0;0;0", "reply_reviewers": "0;1;1;0;1", "reply_authors": "2;2;1;1;1", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 1.019803902718557 ], "novelty_avg": [ 2.4, 1.019803902718557 ], "presentation_avg": [ 3.2, 0.9797958971132712 ], "wc_summary_avg": [ 92.2, 41.25724178856362 ], "wc_strengths_avg": [ 81.6, 58.112305065278555 ], "wc_weaknesses_avg": [ 110.4, 58.568250784874905 ], "wc_questions_avg": [ 55.8, 40.50382697968181 ], "wc_limitations_avg": [ 6.4, 10.307278981380103 ], "wc_review_avg": [ 346.4, 77.91431190737681 ], "wc_reply_reviewers_avg": [ 24.2, 26.611275805567832 ], "wc_reply_authors_avg": [ 89.4, 114.60122163397735 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6454972243679027, "gs_citation": 106, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11979584976547827&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "nyu.edu;google.com;research.google.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "New York University;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.nyu.edu;https://deepmind.com", "aff_unique_abbr": "NYU;DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Maximum Independent Set: Self-Training through Dynamic Programming", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70728", "id": "igE3Zbxvws", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7fe3170d88a8310ca86df2843f54236c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=igE3Zbxvws", "openreview": "https://openreview.net/forum?id=igE3Zbxvws", "poster": "/media/PosterPDFs/NeurIPS%202023/70728.png?t=1701816196.8997672", "slides": "https://nips.cc/virtual/2023/poster/70728", "video": "https://nips.cc/virtual/2023/poster/70728", "author_site": "Lorenzo Brusca, Lars C.P.M. Quaedvlieg, Stratis Skoulakis, Grigorios Chrysos, Volkan Cevher", "tldr": "", "abstract": "This work presents a graph neural network (GNN) framework for solving the maximum independent set (MIS) problem, inspired by dynamic programming (DP). Specifically, given a graph, we propose a DP-like recursive algorithm based on GNNs that firstly constructs two smaller sub-graphs, predicts the one with the larger MIS, and then uses it in the next recursive call. To train our algorithm, we require annotated comparisons of different graphs concerning their MIS size. Annotating the comparisons with the output of our algorithm leads to a self-training process that results in more accurate self-annotation of the comparisons and vice versa. We provide numerical evidence showing the superiority of our method vs prior methods in multiple synthetic and real-world datasets.", "keywords": "Maximum Independent Set;Combinatorial Optimization;Graph Neural Networks;Dynamic Programming", "primary_area": "", "supplementary_material": "/attachment/f833260461256ee8653eb4c4e684d38c2c92320c.zip", "author": "Lorenzo Brusca;Lars C.P.M. Quaedvlieg;Stratis Skoulakis;Grigorios Chrysos;Volkan Cevher", "authorids": "~Lorenzo_Brusca1;~Lars_C.P.M._Quaedvlieg1;~Stratis_Skoulakis2;~Grigorios_Chrysos1;~Volkan_Cevher1", "gender": "M;M;M;M;M", "homepage": ";https://lars-quaedvlieg.github.io/;http://www.corelab.ntua.gr/~sskoul/;https://grigorisg9gr.github.io/;http://lions.epfl.ch", "dblp": ";;183/0979.html;75/6117-2;70/5301", "google_scholar": ";f_-rgVcAAAAJ;Juo2Tk8AAAAJ;1bU041kAAAAJ;https://scholar.google.ch/citations?user=hlWhzU8AAAAJ", "orcid": ";;;;", "linkedin": "lorenzo-brusca-5125a6125/;lars-quaedvlieg/;;;", "or_profile": "~Lorenzo_Brusca1;~Lars_C.P.M._Quaedvlieg1;~Stratis_Skoulakis2;~Grigorios_Chrysos1;~Volkan_Cevher1", "aff": "Polytechnic Institute of Turin;EPFL - EPF Lausanne;EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne;Amazon Development Center Germany", "aff_domain": "polito.it;epfl.ch;epfl.ch;epfl.ch;amazon.de", "position": "MS student;MS student;Postdoc;Postdoc;Amazon Scholar", "bibtex": "@inproceedings{\nbrusca2023maximum,\ntitle={Maximum Independent Set: Self-Training through Dynamic Programming},\nauthor={Lorenzo Brusca and Lars C.P.M. Quaedvlieg and Stratis Skoulakis and Grigorios Chrysos and Volkan Cevher},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=igE3Zbxvws}\n}", "github": "", "project": "", "reviewers": "qbTV;v2kw;T1ds;b8RC;rVy4;YVwa", "pdf_size": 573316, "rating": "5;6;6;6;6;7", "confidence": "4;5;1;4;3;4", "soundness": "2;3;2;3;3;3", "novelty": "3;3;3;3;3;3", "presentation": "3;3;3;4;2;4", "wc_summary": "76;332;38;198;143;35", "wc_strengths": "31;26;25;94;98;28", "wc_weaknesses": "1172;60;70;187;208;123", "wc_questions": "1;35;3;223;207;1", "wc_limitations": "9;31;9;1;34;4", "wc_review": "1289;484;145;703;690;191", "wc_reply_reviewers": "0;85;12;519;56;22", "wc_reply_authors": "285;166;225;841;144;27", "reply_reviewers": "0;1;1;2;1;1", "reply_authors": "3;3;3;4;3;2", "rating_avg": [ 6.0, 0.5773502691896257 ], "confidence_avg": [ 3.5, 1.2583057392117916 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.1666666666666665, 0.6871842709362768 ], "wc_summary_avg": [ 137.0, 104.64861840145494 ], "wc_strengths_avg": [ 50.333333333333336, 32.36596291716895 ], "wc_weaknesses_avg": [ 303.3333333333333, 392.29651483993024 ], "wc_questions_avg": [ 78.33333333333333, 97.46566346952939 ], "wc_limitations_avg": [ 14.666666666666666, 12.944325225965066 ], "wc_review_avg": [ 583.6666666666666, 382.8819081068673 ], "wc_reply_reviewers_avg": [ 115.66666666666667, 182.60948009953432 ], "wc_reply_authors_avg": [ 281.3333333333333, 262.4440681152632 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 3.0, 0.5773502691896257 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7711964954655770495&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "polito.it;epfl.ch;epfl.ch;epfl.ch;amazon.de", "author_num": 5, "aff_unique_index": "0;1;1;2;3", "aff_unique_norm": "Polytechnic Institute of Turin;EPFL;Swiss Federal Institute of Technology Lausanne;Amazon", "aff_unique_dep": ";;;Development Center", "aff_unique_url": "https://www.polito.it;https://www.epfl.ch;https://www.epfl.ch;https://www.amazon.de", "aff_unique_abbr": "Polito;EPFL;EPFL;Amazon", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1;1;1;2", "aff_country_unique": "Italy;Switzerland;Germany" }, { "title": "RVD: A Handheld Device-Based Fundus Video Dataset for Retinal Vessel Segmentation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73498", "id": "igEYxgQP7t", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3a71ee306d6991f2f87dd414e0bdf851-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=igEYxgQP7t", "openreview": "https://openreview.net/forum?id=igEYxgQP7t", "poster": "/media/PosterPDFs/NeurIPS%202023/73498.png?t=1699404501.7329495", "slides": "https://nips.cc/virtual/2023/poster/73498", "video": "https://nips.cc/virtual/2023/poster/73498", "author_site": "MD WAHIDUZZAMAN KHAN, Hongwei Sheng, Hu Zhang, Heming Du, Sen Wang, Minas Coroneo, Farshid Hajati, Sahar Shariflou, Michael Kalloniatis, Jack Phu, Ashish Agar, Zi Huang, S.Mojtaba Golzan, Xin Yu", "tldr": "", "abstract": "Retinal vessel segmentation is generally grounded in image-based datasets collected with bench-top devices. \nThe static images naturally lose the dynamic characteristics of retina fluctuation, resulting in diminished dataset richness, and the usage of bench-top devices further restricts dataset scalability due to its limited accessibility. Considering these limitations, we introduce the first video-based retinal dataset by employing handheld devices for data acquisition. The dataset comprises 635 smartphone-based fundus videos collected from four different clinics, involving 415 patients from 50 to 75 years old. It delivers comprehensive and precise annotations of retinal structures in both spatial and temporal dimensions, aiming to advance the landscape of vasculature segmentation. Specifically, the dataset provides three levels of spatial annotations: binary vessel masks for overall retinal structure delineation, general vein-artery masks for distinguishing the vein and artery, and fine-grained vein-artery masks for further characterizing the granularities of each artery and vein. In addition, the dataset offers temporal annotations that capture the vessel pulsation characteristics, assisting in detecting ocular diseases that require fine-grained recognition of hemodynamic fluctuation. In application, our dataset exhibits a significant domain shift with respect to data captured by bench-top devices, thus posing great challenges to existing methods. Thanks to rich annotations and data scales, our dataset potentially paves the path for more advanced retinal analysis and accurate disease diagnosis. In the experiments, we provide evaluation metrics and benchmark results on our dataset, reflecting both the potential and challenges it offers for vessel segmentation tasks. We hope this challenging dataset would significantly contribute to the development of eye disease diagnosis and early prevention.", "keywords": "Retinal Vessel Segmentation;Retinal Vessel Dataset;Handheld", "primary_area": "", "supplementary_material": "/attachment/6a4be079a64e5c2d78c8218f2cf11949e3e2c522.zip", "author": "MD WAHIDUZZAMAN KHAN;Hongwei Sheng;Hu Zhang;Heming Du;Sen Wang;Minas Theodore Coroneo;Farshid Hajati;Sahar Shariflou;Michael Kalloniatis;Jack Phu;Ashish Agar;Zi Huang;Mojtaba Golzan;Xin Yu", "authorids": "~MD_WAHIDUZZAMAN_KHAN1;~Hongwei_Sheng2;~Hu_Zhang1;~Heming_Du2;~Sen_Wang3;~Minas_Theodore_Coroneo1;~Farshid_Hajati1;~Sahar_Shariflou1;~Michael_Kalloniatis1;~Jack_Phu1;~Ashish_Agar1;~Zi_Huang1;~Mojtaba_Golzan1;~Xin_Yu1", "gender": "M;M;M;M;M;M;M;F;M;M;M;F;M;M", "homepage": ";https://orcid.org/0000-0001-8990-2235;https://huzhangcs.github.io/;;https://csenw.github.io/;;;;https://www.deakin.edu.au/about-deakin/people/michael-kalloniatis;https://research.unsw.edu.au/people/dr-jack-phu;;https://staff.itee.uq.edu.au/huang/;https://profiles.uts.edu.au/mojtaba.golzan;https://sites.google.com/view/xinyus-homepage/Home", "dblp": ";340/0211.html;69/5169-5;244/8133;69/6403-1;;14/9455.html;;;;;70/6862;;54/1184-2", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;Ha3UZTwAAAAJ;L6BLX7gAAAAJ;44dNbycAAAAJ;https://scholar.google.com.au/citations?user=xx79uaoAAAAJ;;https://scholar.google.com/citations?hl=en;;;https://scholar.google.com.au/citations?user=iAWMsgEAAAAJ;;oxdtuSEAAAAJ", "orcid": "0009-0001-9654-0057;0000-0001-8990-2235;0009-0009-9892-9515;0000-0002-7391-0449;0000-0002-5414-8276;0000-0002-3061-2011;0000-0002-8573-5297;0000-0002-9029-246X;0000-0002-5264-4639;;;;;0000-0002-0269-5649", "linkedin": ";https://au.linkedin.com/in/hongwei-sheng-6a78ba287;hu-zhang-336891138/?originalSubdomain=au;;;https://www.linkedin.com/feed/?trk=homepage-basic_sign-in-submit;farshid-hajati-phd-93637016a;;;;dr-ashish-agar/;;;", "or_profile": "~MD_WAHIDUZZAMAN_KHAN1;~Hongwei_Sheng2;~Hu_Zhang1;~Heming_Du2;~Sen_Wang3;~Minas_Theodore_Coroneo1;~Farshid_Hajati1;~Sahar_Shariflou1;~Michael_Kalloniatis1;~Jack_Phu1;~Ashish_Agar1;~Zi_Huang1;~Mojtaba_Golzan1;~Xin_Yu1", "aff": "University of Technology Sydney;University of Technology Sydney;University of Queensland;Australian National University;The University of Queensland;University of New South Wales;Victoria University;University of Technology Sydney;;University of New South Wales;University of New South Wales;University of Queensland;;University of Queensland", "aff_domain": "uts.edu.au;uts.edu.au;uq.edu.au;anu.edu.au;uq.edu.au;unsw.edu.au;vu.edu.au;uts.edu.au;;unsw.edu.au;unsw.edu.au;uq.edu.au;;uq.edu.au", "position": "PhD student;PhD student;Postdoc;PhD student;Lecturer;Full Professor;Lecturer;Postdoc;;Lecturer;Associate Professor;Full Professor;;Senior Lecturer", "bibtex": "@inproceedings{\nkhan2023rvd,\ntitle={{RVD}: A Handheld Device-Based Fundus Video Dataset for Retinal Vessel Segmentation},\nauthor={MD WAHIDUZZAMAN KHAN and Hongwei Sheng and Hu Zhang and Heming Du and Sen Wang and Minas Theodore Coroneo and Farshid Hajati and Sahar Shariflou and Michael Kalloniatis and Jack Phu and Ashish Agar and Zi Huang and Mojtaba Golzan and Xin Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=igEYxgQP7t}\n}", "github": "", "project": "", "reviewers": "1P9Y;LKzk;CCXZ;iQd3;sT8p", "pdf_size": 14784244, "rating": "6;7;8;8;9", "confidence": "4;4;4;4;4", "wc_summary_and_contributions": "54;39;59;128;46", "wc_strengths": "126;43;66;243;54", "wc_improvement": "60;80;168;276;27", "wc_limitations": "11;16;227;12;23", "wc_correctness": "23;10;9;1;30", "wc_clarity": "9;24;1;1;15", "wc_relation_to_prior_work": "15;11;1;1;6", "wc_documentation": "17;23;63;7;6", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "316;247;595;670;208", "wc_reply_reviewers": "0;0;62;25;0", "wc_reply_authors": "659;750;1069;731;203", "reply_reviewers": "0;0;1;1;0", "reply_authors": "1;2;3;1;1", "rating_avg": [ 7.6, 1.0198039027185568 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 65.2, 32.13347164562211 ], "wc_strengths_avg": [ 106.4, 74.08265653984068 ], "wc_improvement_avg": [ 122.2, 89.9608803869771 ], "wc_limitations_avg": [ 57.8, 84.70513561762355 ], "wc_correctness_avg": [ 14.6, 10.44222198576529 ], "wc_clarity_avg": [ 10.0, 8.763560920082657 ], "wc_relation_to_prior_work_avg": [ 6.8, 5.528109984434101 ], "wc_documentation_avg": [ 23.2, 20.88444397153058 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 407.2, 188.67686662651573 ], "wc_reply_reviewers_avg": [ 17.4, 24.311314238436392 ], "wc_reply_authors_avg": [ 682.4, 278.1234258382418 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13932702874124527088&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "uts.edu.au;uts.edu.au;uq.edu.au;anu.edu.au;uq.edu.au;unsw.edu.au;vu.edu.au;uts.edu.au;;unsw.edu.au;unsw.edu.au;uq.edu.au;;uq.edu.au", "author_num": 14, "aff_unique_index": "0;0;1;2;1;3;4;0;3;3;1;1", "aff_unique_norm": "University of Technology Sydney;University of Queensland;Australian National University;University of New South Wales;Victoria University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.uts.edu.au;https://www.uq.edu.au;https://www.anu.edu.au;https://www.unsw.edu.au;https://www.vu.edu.au", "aff_unique_abbr": "UTS;UQ;ANU;UNSW;VU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "Australia" }, { "title": "GNNEvaluator: Evaluating GNN Performance On Unseen Graphs Without Labels", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70727", "id": "ihlT8yvQ2I", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6a55f024db3f771194bdadc8f3a35381-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ihlT8yvQ2I", "openreview": "https://openreview.net/forum?id=ihlT8yvQ2I", "poster": "/media/PosterPDFs/NeurIPS%202023/70727.png?t=1699489725.6430392", "slides": "https://nips.cc/virtual/2023/poster/70727", "video": "https://nips.cc/virtual/2023/poster/70727", "author_site": "Xin Zheng, Miao Zhang, Chunyang Chen, Soheila Molaei, Chuan Zhou, Shirui Pan", "tldr": "", "abstract": "Evaluating the performance of graph neural networks (GNNs) is an essential task for practical GNN model deployment and serving, as deployed GNNs face significant performance uncertainty when inferring on unseen and unlabeled test graphs, due to mismatched training-test graph distributions. In this paper, we study a *new* problem, **GNN model evaluation**, that aims to assess the performance of a specific GNN model trained on labeled and observed graphs, by precisely estimating its performance (e.g., node classification accuracy) on unseen graphs without labels. Concretely, we propose a two-stage GNN model evaluation framework, including (1) DiscGraph set construction and (2) GNNEvaluator training and inference. The DiscGraph set captures wide-range and diverse graph data distribution discrepancies through a discrepancy measurement function, which exploits the GNN outputs of latent node embeddings and node class predictions. Under the effective training supervision from the DiscGraph set, GNNEvaluator learns to precisely estimate node classification accuracy of the to-be-evaluated GNN model and makes an accurate inference for evaluating GNN model performance. Extensive experiments on real-world unseen and unlabeled test graphs demonstrate the effectiveness of our proposed method for GNN model evaluation.", "keywords": "graph neural networks;GNN model evaluation;node classification accuracy", "primary_area": "", "supplementary_material": "/attachment/10be5fc5b04b8af7ef2a856959b4ddcb8600b580.pdf", "author": "Xin Zheng;Miao Zhang;Chunyang Chen;Soheila Molaei;Chuan Zhou;Shirui Pan", "authorids": "~Xin_Zheng4;~Miao_Zhang4;~Chunyang_Chen1;~Soheila_Molaei1;~Chuan_Zhou3;~Shirui_Pan1", "gender": "F;M;;F;M;", "homepage": ";https://sites.google.com/view/miaozhang;https://chunyang-chen.github.io/;https://www.researchgate.net/profile/Soheila-Molaei-2;http://www.chuanzhou.online/;", "dblp": ";60/7041-1.html;180/7246.html;236/6149;https://dblp.uni-trier.de/pid/52/564-1;91/8171", "google_scholar": "WAl7OtMAAAAJ;6EUV_UMAAAAJ;3tyGlPsAAAAJ;iAq1AngAAAAJ;4oBUWVEAAAAJ;https://scholar.google.com.au/citations?user=frWRJN4AAAAJ", "orcid": "0000-0003-0915-7787;0000-0002-1262-4174;;;0000-0001-9958-8673;0000-0003-0794-527X", "linkedin": ";miao-zhang-71b13a177/;;;;", "or_profile": "~Xin_Zheng4;~Miao_Zhang4;~Chunyang_Chen1;~Soheila_Molaei1;~Chuan_Zhou3;~Shirui_Pan1", "aff": "Monash University;Harbin Institute of Technology (Shenzhen);Monash University;University of Oxford;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;Griffith University", "aff_domain": "monash.edu;hit.edu.cn;monash.edu;ox.ac.uk;amss.ac.cn;griffith.edu.au", "position": "PhD student;Full Professor;Associate Professor;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzheng2023gnnevaluator,\ntitle={{GNNE}valuator: Evaluating {GNN} Performance On Unseen Graphs Without Labels},\nauthor={Xin Zheng and Miao Zhang and Chunyang Chen and Soheila Molaei and Chuan Zhou and Shirui Pan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ihlT8yvQ2I}\n}", "github": "", "project": "", "reviewers": "ZAqu;jBkz;3iA3;nFGD", "pdf_size": 15133023, "rating": "6;6;7;7", "confidence": "4;4;3;4", "soundness": "3;2;4;3", "novelty": "4;3;4;3", "presentation": "3;3;4;3", "wc_summary": "89;111;113;84", "wc_strengths": "121;41;64;191", "wc_weaknesses": "167;95;67;2", "wc_questions": "4;216;48;120", "wc_limitations": "7;1;1;1", "wc_review": "388;464;293;398", "wc_reply_reviewers": "25;0;0;0", "wc_reply_authors": "144;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 99.25, 12.891373084353738 ], "wc_strengths_avg": [ 104.25, 57.93692691194451 ], "wc_weaknesses_avg": [ 82.75, 59.19617808608931 ], "wc_questions_avg": [ 97.0, 80.21845174272562 ], "wc_limitations_avg": [ 2.5, 2.598076211353316 ], "wc_review_avg": [ 385.75, 60.993339800342135 ], "wc_reply_reviewers_avg": [ 6.25, 10.825317547305483 ], "wc_reply_authors_avg": [ 36.0, 62.353829072479584 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5819711477809901178&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "monash.edu;hit.edu.cn;monash.edu;ox.ac.uk;amss.ac.cn;griffith.edu.au", "author_num": 6, "aff_unique_index": "0;1;0;2;3;4", "aff_unique_norm": "Monash University;Harbin Institute of Technology;University of Oxford;Chinese Academy of Sciences;Griffith University", "aff_unique_dep": ";;;Academy of Mathematics and Systems Science;", "aff_unique_url": "https://www.monash.edu;http://en.hhit.edu.cn/;https://www.ox.ac.uk;http://www.cas.cn;https://www.griffith.edu.au", "aff_unique_abbr": "Monash;HIT;Oxford;CAS;Griffith", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;1;0;2;1;0", "aff_country_unique": "Australia;China;United Kingdom" }, { "title": "Frequency-domain MLPs are More Effective Learners in Time Series Forecasting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70726", "id": "iif9mGCTfy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f1d16af76939f476b5f040fd1398c0a3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=iif9mGCTfy", "openreview": "https://openreview.net/forum?id=iif9mGCTfy", "poster": "/media/PosterPDFs/NeurIPS%202023/70726.png?t=1701931946.5661523", "slides": "https://nips.cc/virtual/2023/poster/70726", "video": "https://nips.cc/virtual/2023/poster/70726", "author_site": "Kun Yi, Qi Zhang, Wei Fan, Shoujin Wang, Pengyang Wang, Hui He, Ning An, Defu Lian, Longbing Cao, Zhendong Niu", "tldr": "", "abstract": "Time series forecasting has played the key role in different industrial, including finance, traffic, energy, and healthcare domains. While existing literatures have designed many sophisticated architectures based on RNNs, GNNs, or Transformers, another kind of approaches based on multi-layer perceptrons (MLPs) are proposed with simple structure, low complexity, and superior performance. However, most MLP-based forecasting methods suffer from the point-wise mappings and information bottleneck, which largely hinders the forecasting performance. To overcome this problem, we explore a novel direction of applying MLPs in the frequency domain for time series forecasting. We investigate the learned patterns of frequency-domain MLPs and discover their two inherent characteristic benefiting forecasting, (i) global view: frequency spectrum makes MLPs own a complete view for signals and learn global dependencies more easily, and (ii) energy compaction: frequency-domain MLPs concentrate on smaller key part of frequency components with compact signal energy. Then, we propose FreTS, a simple yet effective architecture built upon Frequency-domain MLPs for Time Series forecasting. FreTS mainly involves two stages, (i) Domain Conversion, that transforms time-domain signals into complex numbers of frequency domain; (ii) Frequency Learning, that performs our redesigned MLPs for the learning of real and imaginary part of frequency components. The above stages operated on both inter-series and intra-series scales further contribute to channel-wise and time-wise dependency learning. Extensive experiments on 13 real-world benchmarks (including 7 benchmarks for short-term forecasting and 6 benchmarks for long-term forecasting) demonstrate our consistent superiority over state-of-the-art methods. Code is available at this repository: https://github.com/aikunyi/FreTS.", "keywords": "time series forecasting;multi-layer perceptrons;frequency domain", "primary_area": "", "supplementary_material": "/attachment/fdb666dbde2dd02c1a3f3e19cbdba672a9fc8d36.zip", "author": "Kun Yi;Qi Zhang;Wei Fan;Shoujin Wang;Pengyang Wang;Hui He;Ning An;Defu Lian;Longbing Cao;Zhendong Niu", "authorids": "~Kun_Yi2;~Qi_Zhang25;~Wei_Fan6;~Shoujin_Wang1;~Pengyang_Wang1;~Hui_He2;~Ning_An1;~Defu_Lian1;~Longbing_Cao1;~Zhendong_Niu2", "gender": ";M;M;M;M;F;M;M;M;M", "homepage": "https://github.com/aikunyi;https://sites.google.com/view/qizhang-bit-uts/home;https://weifan.site/;https://shoujinwang1.github.io/;https://pengyangwang.com/;https://www.researchgate.net/profile/Hui_He43;;https://faculty.ustc.edu.cn/liandefu/en/index.htm;https://www.datasciences.org;", "dblp": "202/8470-1;52/323-20;54/3488-10;16/8492;219/1752;https://dblp.uni-trier.de/pid/53/1151;98/6171-1.html;87/10734;14/2589;https://dblp.uni-trier.de/pid/06/3613.html", "google_scholar": "MhMZcIEAAAAJ;8UAk1p4AAAAJ;cQ8zLJ4AAAAJ;BQ0mBRIAAAAJ;o26vQZwAAAAJ;1IqAdRwAAAAJ;tr5oJtQAAAAJ;QW0ad4sAAAAJ;cDs3DM8AAAAJ;", "orcid": "0000-0002-9980-6033;0000-0002-1037-1361;0000-0001-7656-445X;0000-0003-1133-9379;0000-0003-3961-5523;0000-0001-5515-2739;0000-0003-3317-5299;0000-0002-3507-9607;0000-0003-1562-9429;", "linkedin": ";;;;;;ningan/;;;", "or_profile": "~Kun_Yi2;~Qi_Zhang25;~Wei_Fan6;~Shoujin_Wang1;~Pengyang_Wang1;~Hui_He2;~Ning_An1;~Defu_Lian1;~Longbing_Cao1;~Zhendong_Niu2", "aff": "Beijing Institute of Technology;Tongji University;University of Central Florida;University of Technology Sydney;University of Macau;Beijing Institute of Technology;Hefei University of Technology;University of Science and Technology of China;University of Technology Sydney;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;tongji.edu.cn;ucf.edu;uts.edu.au;um.edu.mo;bit.edu.cn;hfut.edu.cn;ustc.edu.cn;uts.edu.au;bit.edu.cn", "position": "PhD student;Researcher;PhD student;Lecturer;Assistant Professor;PhD student;Full Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyi2023frequencydomain,\ntitle={Frequency-domain {MLP}s are More Effective Learners in Time Series Forecasting},\nauthor={Kun Yi and Qi Zhang and Wei Fan and Shoujin Wang and Pengyang Wang and Hui He and Ning An and Defu Lian and Longbing Cao and Zhendong Niu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=iif9mGCTfy}\n}", "github": "", "project": "", "reviewers": "bjea;u93d;BMVc;WVuT;aNLj", "pdf_size": 3047910, "rating": "5;6;6;6;7", "confidence": "3;4;4;5;4", "soundness": "3;3;4;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;4;3;3", "wc_summary": "59;67;67;31;84", "wc_strengths": "29;47;58;44;214", "wc_weaknesses": "253;158;115;96;58", "wc_questions": "40;145;3;4;83", "wc_limitations": "26;1;3;1;1", "wc_review": "407;418;246;176;440", "wc_reply_reviewers": "24;80;0;22;22", "wc_reply_authors": "13;470;0;34;26", "reply_reviewers": "1;2;0;1;1", "reply_authors": "2;3;1;2;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 61.6, 17.338973441354593 ], "wc_strengths_avg": [ 78.4, 68.42981806201153 ], "wc_weaknesses_avg": [ 136.0, 66.78023659736465 ], "wc_questions_avg": [ 55.0, 53.691712582110846 ], "wc_limitations_avg": [ 6.4, 9.830564581955608 ], "wc_review_avg": [ 337.4, 106.08600284674694 ], "wc_reply_reviewers_avg": [ 29.6, 26.695317941541735 ], "wc_reply_authors_avg": [ 108.6, 181.06971033278867 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.49999999999999994, "gs_citation": 208, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2998083700892478954&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "bit.edu.cn;tongji.edu.cn;ucf.edu;uts.edu.au;um.edu.mo;bit.edu.cn;hfut.edu.cn;ustc.edu.cn;uts.edu.au;bit.edu.cn", "author_num": 10, "aff_unique_index": "0;1;2;3;4;0;5;6;3;0", "aff_unique_norm": "Beijing Institute of Technology;Tongji University;University of Central Florida;University of Technology Sydney;University of Macau;Hefei University of Technology;University of Science and Technology of China", "aff_unique_dep": ";;;;;;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.tongji.edu.cn;https://www.ucf.edu;https://www.uts.edu.au;https://www.um.edu.mo;http://www.hfut.edu.cn/;http://www.ustc.edu.cn", "aff_unique_abbr": "BIT;Tongji;UCF;UTS;UM;HUT;USTC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Macau SAR", "aff_country_unique_index": "0;0;1;2;0;0;0;0;2;0", "aff_country_unique": "China;United States;Australia" }, { "title": "Semi-Supervised Contrastive Learning for Deep Regression with Ordinal Rankings from Spectral Seriation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70725", "id": "ij3svnPLzG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b2d4051f03a7038a2771dfbbe5c7b54e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ij3svnPLzG", "openreview": "https://openreview.net/forum?id=ij3svnPLzG", "poster": "/media/PosterPDFs/NeurIPS%202023/70725.png?t=1698480548.1192663", "slides": "https://nips.cc/virtual/2023/poster/70725", "video": "https://nips.cc/virtual/2023/poster/70725", "author_site": "Weihang Dai, Yao DU, Hanru Bai, Kwang-Ting Cheng, Xiaomeng Li", "tldr": "", "abstract": "Contrastive learning methods can be applied to deep regression by enforcing label distance relationships in feature space. However, these methods are limited to labeled data only unlike for classification, where unlabeled data can be used for contrastive pretraining. In this work, we extend contrastive regression methods to allow unlabeled data to be used in a semi-supervised setting, thereby reducing the reliance on manual annotations. We observe that the feature similarity matrix between unlabeled samples still reflect inter-sample relationships, and that an accurate ordinal relationship can be recovered through spectral seriation algorithms if the level of error is within certain bounds. By using the recovered ordinal relationship for contrastive learning on unlabeled samples, we can allow more data to be used for feature representation learning, thereby achieve more robust results. The ordinal rankings can also be used to supervise predictions on unlabeled samples, which can serve as an additional training signal. We provide theoretical guarantees and empirical support through experiments on different datasets, demonstrating that our method can surpass existing state-of-the-art semi-supervised deep regression methods. To the best of our knowledge, this work is the first to explore using unlabeled data to perform contrastive learning for regression.", "keywords": "Semi-supervised learning;deep regression;contrastive learning", "primary_area": "", "supplementary_material": "/attachment/35110b623ab83fc97f045d64e86a800d75acaaf3.pdf", "author": "Weihang Dai;Yao DU;Hanru Bai;Kwang-Ting Cheng;Xiaomeng Li", "authorids": "~Weihang_Dai1;~Yao_DU4;2019310030117@cau.edu.cn;~Kwang-Ting_Cheng1;~Xiaomeng_Li1", "gender": "M;;;;F", "homepage": ";;;;https://xmengli.github.io/", "dblp": "152/9822;;;;02/9850-1", "google_scholar": ";;;;uVTzPpoAAAAJ", "orcid": ";;;;", "linkedin": "weihang-dai-89122120;;;;", "or_profile": "~Weihang_Dai1;~Yao_DU4;2019310030117@cau.edu.cn;~Kwang-Ting_Cheng1;~Xiaomeng_Li1", "aff": "Hong Kong University of Science and Technology;;;;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;;;;ust.hk", "position": "PhD student;;;;Assistant Professor", "bibtex": "@inproceedings{\ndai2023semisupervised,\ntitle={Semi-Supervised Contrastive Learning for Deep Regression with Ordinal Rankings from Spectral Seriation},\nauthor={Weihang Dai and Yao DU and Hanru Bai and Kwang-Ting Cheng and Xiaomeng Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ij3svnPLzG}\n}", "github": "", "project": "", "reviewers": "w9AQ;kMK1;jwFg;H1Yh", "pdf_size": 647039, "rating": "5;5;6;7", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;2", "wc_summary": "48;37;59;81", "wc_strengths": "45;52;51;72", "wc_weaknesses": "84;28;35;294", "wc_questions": "48;86;41;54", "wc_limitations": "1;39;1;19", "wc_review": "226;242;187;520", "wc_reply_reviewers": "18;18;0;63", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 56.25, 16.269219403523945 ], "wc_strengths_avg": [ 55.0, 10.173494974687902 ], "wc_weaknesses_avg": [ 110.25, 108.2598147975508 ], "wc_questions_avg": [ 57.25, 17.224619008848933 ], "wc_limitations_avg": [ 15.0, 15.684387141358123 ], "wc_review_avg": [ 293.75, 132.14835413277004 ], "wc_reply_reviewers_avg": [ 24.75, 23.27418097377435 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11934077428682851541&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ust.hk;;;;ust.hk", "author_num": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "AIMS: All-Inclusive Multi-Level Segmentation for Anything", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70724", "id": "ikkdTD3hQJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3da292ced54290c19fc55d9dba3da793-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ikkdTD3hQJ", "openreview": "https://openreview.net/forum?id=ikkdTD3hQJ", "poster": "/media/PosterPDFs/NeurIPS%202023/70724.png?t=1697748785.8368454", "slides": "https://nips.cc/virtual/2023/poster/70724", "video": "https://nips.cc/virtual/2023/poster/70724", "author_site": "Lu Qi, Jason Kuen, Weidong Guo, Jiuxiang Gu, Zhe Lin, Bo Du, Yu Xu, Ming-Hsuan Yang", "tldr": "", "abstract": "Despite the progress of image segmentation for accurate visual entity segmentation, completing the diverse requirements of image editing applications for different-level region-of-interest selections remains unsolved. In this paper, we propose a new task, All-Inclusive Multi-Level Segmentation (AIMS), which segments visual regions into three levels: part, entity, and relation (two entities with some semantic relationships). We also build a unified AIMS model through multi-dataset multi-task training to address the two major challenges of annotation inconsistency and task correlation. Specifically, we propose task complementarity, association, and prompt mask encoder for three-level predictions. Extensive experiments demonstrate the effectiveness and generalization capacity of our method compared to other state-of-the-art methods on a single dataset or the concurrent work on segment anything. We will make our code and training model publicly available.", "keywords": "Image Segmentation", "primary_area": "", "supplementary_material": "/attachment/edcbf718425fd75760154c79c08ed4d82a1dd350.pdf", "author": "Lu Qi;Jason Kuen;Weidong Guo;Jiuxiang Gu;Zhe Lin;Bo Du;Yu Xu;Ming-Hsuan Yang", "authorids": "~Lu_Qi1;~Jason_Kuen1;~Weidong_Guo1;~Jiuxiang_Gu2;~Zhe_Lin1;~Bo_Du3;~Yu_Xu1;~Ming-Hsuan_Yang1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://www.luqi.info;http://jasonkuen.com/;https://scholar.google.com/citations?user=FfvgqZYAAAAJ&hl=en;http://gujiuxiang.com;https://sites.google.com/site/zhelin625/;;https://faculty.ucmerced.edu/mhyang/;", "dblp": ";165/1403;;173/4935.html;42/1680-1;27/0;79/3711.html;70/6443-1.html", "google_scholar": "https://scholar.google.com.hk/citations?user=SSI90d4AAAAJ;e6u7GlQAAAAJ;FfvgqZYAAAAJ;https://scholar.google.com.sg/citations?user=zPxKV9EAAAAJ;R0bnqaAAAAAJ;;p9-ohHsAAAAJ;Shy1gnMAAAAJ", "orcid": ";;;;0000-0003-1154-9907;;0000-0003-4848-2304;", "linkedin": ";;;;;xy2020/;minghsuanyang/;", "or_profile": "~Lu_Qi1;~Jason_Kuen1;~Weidong_Guo1;~Jiuxiang_Gu2;~Zhe_Lin1;~Yu_Xu1;~Ming-Hsuan_Yang1;~Bo_Du1", "aff": "University of California, Merced;Adobe Research;Tencent;Adobe Systems;Adobe Research;;University of California at Merced;Wuhan University", "aff_domain": "ucmerced.edu;adobe.com;tencent.com;adobe.com;adobe.com;;umcerced.edu;whu.edu.cn", "position": "Postdoc;Researcher;Researcher;Researcher;Principal Researcher;;Professor;Full Professor", "bibtex": "@inproceedings{\nqi2023aims,\ntitle={{AIMS}: All-Inclusive Multi-Level Segmentation for Anything},\nauthor={Lu Qi and Jason Kuen and Weidong Guo and Jiuxiang Gu and Zhe Lin and Bo Du and Yu Xu and Ming-Hsuan Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ikkdTD3hQJ}\n}", "github": "", "project": "", "reviewers": "Tt8E;u59B;mZLg;b8VN;S47w", "pdf_size": 5770890, "rating": "6;6;6;7;8", "confidence": "4;4;5;4;5", "soundness": "2;3;3;3;3", "novelty": "1;3;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "90;35;77;135;64", "wc_strengths": "40;24;142;143;149", "wc_weaknesses": "182;108;117;57;322", "wc_questions": "4;57;54;47;144", "wc_limitations": "11;47;20;3;1", "wc_review": "327;271;410;385;680", "wc_reply_reviewers": "19;39;0;28;20", "wc_reply_authors": "310;40;40;0;40", "reply_reviewers": "1;1;0;1;1", "reply_authors": "3;2;2;1;2", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 80.2, 32.90835760107149 ], "wc_strengths_avg": [ 99.6, 55.478284039793444 ], "wc_weaknesses_avg": [ 157.2, 91.4885785221303 ], "wc_questions_avg": [ 61.2, 45.604385754003964 ], "wc_limitations_avg": [ 16.4, 16.70449041425688 ], "wc_review_avg": [ 414.6, 141.15891753622935 ], "wc_reply_reviewers_avg": [ 21.2, 12.796874618437114 ], "wc_reply_authors_avg": [ 86.0, 113.06635220082056 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.4082482904638631, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11467112810279029561&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ucmerced.edu;adobe.com;tencent.com;adobe.com;adobe.com;;umcerced.edu;whu.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;1;1;0;3", "aff_unique_norm": "University of California, Merced;Adobe;Tencent;Wuhan University", "aff_unique_dep": ";Adobe Research;Tencent Holdings Limited;", "aff_unique_url": "https://www.ucmerced.edu;https://research.adobe.com;https://www.tencent.com;http://www.whu.edu.cn/", "aff_unique_abbr": "UC Merced;Adobe;Tencent;WHU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Merced;", "aff_country_unique_index": "0;0;1;0;0;0;1", "aff_country_unique": "United States;China" }, { "title": "History Filtering in Imperfect Information Games: Algorithms and Complexity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70723", "id": "inIONNg8Sq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/87ee1bbac4635e7c948f3eea83c1f262-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=inIONNg8Sq", "openreview": "https://openreview.net/forum?id=inIONNg8Sq", "poster": "/media/PosterPDFs/NeurIPS%202023/70723.png?t=1701896407.2189555", "slides": "https://nips.cc/virtual/2023/poster/70723", "video": "https://nips.cc/virtual/2023/poster/70723", "author_site": "Christopher Solinas, Doug Rebstock, Nathan Sturtevant, Michael Buro", "tldr": "", "abstract": "Historically applied exclusively to perfect information games, depth-limited search with value functions has been key to recent advances in AI for imperfect information games. Most prominent approaches with strong theoretical guarantees require *subgame decomposition* - a process in which a subgame is computed from public information and player beliefs. However, subgame decomposition can itself require non-trivial computations, and its tractability depends on the existence of efficient algorithms for either full enumeration or generation of the histories that form the root of the subgame. Despite this, no formal analysis of the tractability of such computations has been established in prior work, and application domains have often consisted of games, such as poker, for which enumeration is trivial on modern hardware.\n\nApplying these ideas to more complex domains requires understanding their cost. In this work, we introduce and analyze the computational aspects and tractability of filtering histories for subgame decomposition. We show that constructing a single history from the root of the subgame is generally intractable, and then provide a necessary and sufficient condition for efficient enumeration. We also introduce a novel Markov Chain Monte Carlo-based generation algorithm for trick-taking card games - a domain where enumeration is often prohibitively expensive. Our experiments demonstrate its improved scalability in the trick-taking card game *Oh Hell*.\nThese contributions clarify when and how depth-limited search via subgame decomposition can be an effective tool for sequential decision-making in imperfect information settings.", "keywords": "search;game theory;multi-agent;learning;markov chain monte carlo;complexity", "primary_area": "", "supplementary_material": "/attachment/7aa0efc52c84daf1ec05bb1fd75b0d062488cf19.zip", "author": "Christopher Solinas;Doug Rebstock;Nathan R. Sturtevant;Michael Buro", "authorids": "~Christopher_Solinas1;~Doug_Rebstock1;~Nathan_R._Sturtevant1;~Michael_Buro1", "gender": "M;M;M;M", "homepage": ";;https://movingai.com/;https://skatgame.net/mburo/", "dblp": "200/8108;238/0247;18/991;26/2020", "google_scholar": "https://scholar.google.ca/citations?user=bcodZL8AAAAJ;https://scholar.google.ca/citations?user=cWfQJDwAAAAJ;https://scholar.google.ca/citations?user=3utEUeoAAAAJ;", "orcid": ";0000-0002-9121-1377;0000-0003-4318-2791;", "linkedin": ";;;", "or_profile": "~Christopher_Solinas1;~Doug_Rebstock1;~Nathan_R._Sturtevant1;~Michael_Buro1", "aff": "University of Alberta;University of Alberta;University of Alberta;University of Alberta", "aff_domain": "ualberta.ca;ualberta.ca;ualberta.ca;ualberta.ca", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsolinas2023history,\ntitle={History Filtering in Imperfect Information Games: Algorithms and Complexity},\nauthor={Christopher Solinas and Doug Rebstock and Nathan R. Sturtevant and Michael Buro},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=inIONNg8Sq}\n}", "github": "", "project": "", "reviewers": "KG55;684B;r9WW;8L8P", "pdf_size": 1748433, "rating": "3;5;6;7", "confidence": "4;5;4;4", "soundness": "2;4;4;3", "novelty": "3;3;3;3", "presentation": "4;2;4;1", "wc_summary": "99;117;54;68", "wc_strengths": "58;37;71;77", "wc_weaknesses": "418;362;61;1197", "wc_questions": "105;107;179;84", "wc_limitations": "1;8;146;110", "wc_review": "681;631;511;1536", "wc_reply_reviewers": "0;85;8;27", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 1.299038105676658 ], "wc_summary_avg": [ 84.5, 24.84451649760969 ], "wc_strengths_avg": [ 60.75, 15.335824073065002 ], "wc_weaknesses_avg": [ 509.5, 419.50476755336166 ], "wc_questions_avg": [ 118.75, 35.93309755643117 ], "wc_limitations_avg": [ 66.25, 63.0966520506437 ], "wc_review_avg": [ 839.75, 406.6997510449201 ], "wc_reply_reviewers_avg": [ 30.0, 33.23401871576773 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.09759000729485331, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10631287385434568126&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ualberta.ca;ualberta.ca;ualberta.ca;ualberta.ca", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Generalized Belief Transport", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70722", "id": "iohoef1bfM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/847bb9bb1351f557a52d2ecdacb7e2d3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=iohoef1bfM", "openreview": "https://openreview.net/forum?id=iohoef1bfM", "poster": "/media/PosterPDFs/NeurIPS%202023/70722.png?t=1701745671.9763372", "slides": "https://nips.cc/virtual/2023/poster/70722", "video": "https://nips.cc/virtual/2023/poster/70722", "author_site": "Junqi Wang, PEI WANG, Patrick Shafto", "tldr": "", "abstract": "Human learners have ability to adopt appropriate learning approaches depending on constraints such as prior on the hypothesis, urgency of decision, and drift of the environment. However, existing learning models are typically considered individually rather than in relation to one and other. To build agents that have the ability to move between different modes of learning over time, it is important to understand how learning models are related as points in a broader space of possibilities. We introduce a mathematical framework, Generalized Belief Transport (GBT), that unifies and generalizes prior models, including Bayesian inference, cooperative communication and classification, as parameterizations of three learning constraints within Unbalanced Optimal Transport (UOT). We visualize the space of learning models encoded by GBT as a cube which includes classic learning models as special points. We derive critical properties of this parameterized space including proving continuity and differentiability which is the basis for model interpolation, and study limiting behavior of the parameters, which allows attaching learning models on the boundaries. Moreover, we investigate the long-run behavior of GBT, explore convergence properties of models in GBT mathematical and computationally, document the ability to learn in the presence of distribution drift, and formulate conjectures about general behavior. We conclude with open questions and implications for more unified models of learning.", "keywords": "Bayesian theory;Belief transport;Unbalanced optimal transport;parametrization;asymptotic behavior;environment drift detection", "primary_area": "", "supplementary_material": "/attachment/9e40b9f79eec0aefc478468680e76416082756a7.zip", "author": "Junqi Wang;PEI WANG;Patrick Shafto", "authorids": "~Junqi_Wang1;~PEI_WANG1;~Patrick_Shafto1", "gender": "M;F;", "homepage": ";;http://www.shaftolab.com", "dblp": "213/0790;83/4555;03/5979", "google_scholar": "https://scholar.google.com/citations?pli=1;SO-bdTIAAAAJ;HUi6F7wAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Junqi_Wang1;~PEI_WANG1;~Patrick_Shafto1", "aff": "Beijing Institute for General Artificial Intelligence;Rutgers University;Rutgers University", "aff_domain": "bigai.ai;rutgers.edu;rutgers.edu", "position": "Researcher;Postdoc;Professor", "bibtex": "@inproceedings{\nwang2023generalized,\ntitle={Generalized Belief Transport},\nauthor={Junqi Wang and PEI WANG and Patrick Shafto},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=iohoef1bfM}\n}", "github": "", "project": "", "reviewers": "1jqk;mBxm;vUNg;7Pt6;NpBr", "pdf_size": 1331438, "rating": "4;5;5;6;7", "confidence": "2;1;1;3;2", "soundness": "3;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "1;3;2;3;3", "wc_summary": "47;47;29;81;135", "wc_strengths": "12;21;25;77;120", "wc_weaknesses": "42;41;49;131;185", "wc_questions": "19;64;1;2;51", "wc_limitations": "20;1;1;1;43", "wc_review": "140;174;105;292;534", "wc_reply_reviewers": "584;0;0;17;15", "wc_reply_authors": "580;0;0;25;2", "reply_reviewers": "3;0;0;1;1", "reply_authors": "4;1;1;2;2", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 1.8, 0.7483314773547883 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 67.8, 37.57871738098574 ], "wc_strengths_avg": [ 51.0, 41.31343607108951 ], "wc_weaknesses_avg": [ 89.6, 58.465716449899084 ], "wc_questions_avg": [ 27.4, 25.726251184344754 ], "wc_limitations_avg": [ 13.2, 16.61806246227279 ], "wc_review_avg": [ 249.0, 155.76649190374675 ], "wc_reply_reviewers_avg": [ 123.2, 230.5119519677884 ], "wc_reply_authors_avg": [ 121.4, 229.49474939527485 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3668996928526714, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hMSp-7ukZCcJ:scholar.google.com/&scioq=Generalized+Belief+Transport&hl=en&as_sdt=0,33", "gs_version_total": 7, "email": "bigai.ai;rutgers.edu;rutgers.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Beijing Institute for General Artificial Intelligence;Rutgers University", "aff_unique_dep": ";", "aff_unique_url": "http://www.bigaiai.org/;https://www.rutgers.edu", "aff_unique_abbr": "BIGAI;Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "Adaptive recurrent vision performs zero-shot computation scaling to unseen difficulty levels", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70721", "id": "iqezE0EyXq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3a40e042c66e84659249f3254460c123-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=iqezE0EyXq", "openreview": "https://openreview.net/forum?id=iqezE0EyXq", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70721", "video": "https://nips.cc/virtual/2023/poster/70721", "author_site": "Vijay Veerabadran, Srinivas Ravishankar, Yuan Tang, Ritik Raina, Virginia de Sa", "tldr": "", "abstract": "Humans solving algorithmic (or) reasoning problems typically exhibit solution times that grow as a function of problem difficulty. \nAdaptive recurrent neural networks have been shown to exhibit this property for various language-processing tasks. However, little work has been performed to assess whether such adaptive computation can also enable vision models to extrapolate solutions beyond their training distribution's difficulty level, with prior work focusing on very simple tasks. In this study, we investigate a critical functional role of such adaptive processing using recurrent neural networks: to dynamically scale computational resources conditional on input requirements that allow for zero-shot generalization to novel difficulty levels not seen during training using two challenging visual reasoning tasks: PathFinder and Mazes. We combine convolutional recurrent neural networks (ConvRNNs) with a learnable halting mechanism based on Graves (2016). We explore various implementations of such adaptive ConvRNNs (AdRNNs) ranging from tying weights across layers to more sophisticated biologically inspired recurrent networks that possess lateral connections and gating. We show that 1) AdRNNs learn to dynamically halt processing early (or late) to solve easier (or harder) problems, 2) these RNNs zero-shot generalize to more difficult problem settings not shown during training by dynamically increasing the number of recurrent iterations at test time. Our study provides modeling evidence supporting the hypothesis that recurrent processing enables the functional advantage of adaptively allocating compute resources conditional on input requirements and hence allowing generalization to harder difficulty levels of a visual reasoning problem without training.", "keywords": "cognitive science;recurrent neural networks;adaptive computation time;visual reasoning", "primary_area": "", "supplementary_material": "/attachment/baf0e0e6fa2d60f28498b800a719465a0d86f268.pdf", "author": "Vijay Veerabadran;Srinivas Ravishankar;Yuan Tang;Ritik Raina;Virginia R. de Sa", "authorids": "~Vijay_Veerabadran1;~Srinivas_Ravishankar1;~Yuan_Tang2;~Ritik_Raina1;~Virginia_R._de_Sa2", "gender": "M;M;M;M;F", "homepage": "https://vijayvee.github.io;https://srinivas-r.github.io/;;https://rainarit.github.io;http://cogsci.ucsd.edu/~desa", "dblp": "220/4325;209/9805.html;;;94/4658", "google_scholar": "https://scholar.google.co.in/citations?user=I6b38LoAAAAJ;KzNCmhwAAAAJ;;4lUt1VsAAAAJ;UVzXKcIAAAAJ", "orcid": ";;;;0000-0002-0989-3576", "linkedin": "vijayvee/;;hiyuantang/;;virginia-de-sa-0537471b1/", "or_profile": "~Vijay_Veerabadran1;~Srinivas_Ravishankar1;~Yuan_Tang2;~Ritik_Raina1;~Virginia_de_Sa1", "aff": "University of California, San Diego;International Business Machines;University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ibm.com;ucsd.edu;ucsd.edu;ucsd.edu", "position": "PhD student;Researcher;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nveerabadran2023adaptive,\ntitle={Adaptive recurrent vision performs zero-shot computation scaling to unseen difficulty levels},\nauthor={Vijay Veerabadran and Srinivas Ravishankar and Yuan Tang and Ritik Raina and Virginia R. de Sa},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=iqezE0EyXq}\n}", "github": "", "project": "", "reviewers": "cX6r;qD61;1Kd6;ikdz;T9g1", "pdf_size": 1379643, "rating": "4;5;5;6;6", "confidence": "4;4;3;4;3", "soundness": "3;3;3;3;4", "novelty": "1;3;4;2;4", "presentation": "3;1;2;3;4", "wc_summary": "51;110;126;137;74", "wc_strengths": "80;72;46;109;97", "wc_weaknesses": "191;183;447;284;218", "wc_questions": "104;27;184;7;59", "wc_limitations": "1;16;14;7;47", "wc_review": "427;408;817;544;495", "wc_reply_reviewers": "706;36;91;69;49", "wc_reply_authors": "769;0;210;37;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "2;1;2;2;1", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 1.16619037896906 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 99.6, 32.314702536152176 ], "wc_strengths_avg": [ 80.8, 21.664717861075413 ], "wc_weaknesses_avg": [ 264.6, 97.87052671769986 ], "wc_questions_avg": [ 76.2, 63.08533902579901 ], "wc_limitations_avg": [ 17.0, 15.912259424732868 ], "wc_review_avg": [ 538.2, 147.61219461819542 ], "wc_reply_reviewers_avg": [ 190.2, 258.5709960533083 ], "wc_reply_authors_avg": [ 203.2, 293.38670726534286 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3273268353539886, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16875479122947751364&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ucsd.edu;ibm.com;ucsd.edu;ucsd.edu;ucsd.edu", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of California, San Diego;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsd.edu;https://www.ibm.com", "aff_unique_abbr": "UCSD;IBM", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Punctuation-level Attack: Single-shot and Single Punctuation Can Fool Text Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70720", "id": "ir6WWkFR80", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9a9f4e15ad0d680429a3e0570a96f763-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ir6WWkFR80", "openreview": "https://openreview.net/forum?id=ir6WWkFR80", "poster": "/media/PosterPDFs/NeurIPS%202023/70720.png?t=1699610992.1871073", "slides": "https://nips.cc/virtual/2023/poster/70720", "video": "https://nips.cc/virtual/2023/poster/70720", "author_site": "wenqiang wang, Chongyang Du, Tao Wang, Kaihao Zhang, Wenhan Luo, Lin Ma, Wei Liu, Xiaochun Cao", "tldr": "", "abstract": "The adversarial attacks have attracted increasing attention in various fields including natural language processing. The current textual attacking models primarily focus on fooling models by adding character-/word-/sentence-level perturbations, ignoring their influence on human perception. In this paper, for the first time in the community, we propose a novel mode of textual attack, punctuation-level attack. With various types of perturbations, including insertion, displacement, deletion, and replacement, the punctuation-level attack achieves promising fooling rates against SOTA models on typical textual tasks and maintains minimal influence on human perception and understanding of the text by mere perturbation of single-shot single punctuation. Furthermore, we propose a search method named Text Position Punctuation Embedding and Paraphrase (TPPEP) to accelerate the pursuit of optimal position to deploy the attack, without exhaustive search, and we present a mathematical interpretation of TPPEP. Thanks to the integrated Text Position Punctuation Embedding (TPPE), the punctuation attack can be applied at a constant cost of time. Experimental results on public datasets and SOTA models demonstrate the effectiveness of the punctuation attack and the proposed TPPE. We additionally apply the single punctuation attack to summarization, semantic-similarity-scoring, and text-to-image tasks, and achieve encouraging results.", "keywords": "Punctuation-level Attack;Textual Adversarial attack;Natural Language Processing", "primary_area": "", "supplementary_material": "/attachment/4c081ad24e1ba29c81d38137533713bdf01485ee.pdf", "author": "Wenqiang Wang;Chongyang Du;Tao Wang;Kaihao Zhang;Wenhan Luo;Lin Ma;Wei Liu;Xiaochun Cao", "authorids": "~Wenqiang_Wang1;~Chongyang_Du1;~Tao_Wang6;~Kaihao_Zhang2;~Wenhan_Luo1;~Lin_Ma2;~Wei_Liu3;~Xiaochun_Cao3", "gender": "M;;M;M;M;M;M;M", "homepage": "https://github.com/zjwwq/wwq;https://duchongyang.github.io/;https://taowangzj.github.io/;https://zhangkaihao.github.io/;https://whluo.github.io/;http://forestlinma.com;https://sites.google.com/view/cuweiliu;https://scst.sysu.edu.cn/members/caoxiaochun.htm", "dblp": ";;12/5838-52.html;179/6089;64/9877;74/3608-2;49/3283-5;39/3695", "google_scholar": ";;https://scholar.google.com.hk/citations?user=TsDufoMAAAAJ;https://scholar.google.com.au/citations?user=eqwDXdMAAAAJ;g20Q12MAAAAJ;DAn1pA4AAAAJ;AjxoEpIAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-0202-0174;;0000-0002-5697-4168;;0000-0002-3865-8145;0000-0001-7141-708X", "linkedin": ";;;;wenhan-luo-a1843480/;;;", "or_profile": "~Wenqiang_Wang1;~Chongyang_Du1;~Tao_Wang6;~Kaihao_Zhang2;~Wenhan_Luo1;~Lin_Ma2;~Wei_Liu3;~Xiaochun_Cao3", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Nanyang Technological University;Australian National University;Sun Yat-sen University;Meituan;Tencent;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;sysu.edu.cn;ntu.edu.sg;anu.edu.au;sysu.edu.cn;meituan.com;tencent.com;sysu.edu.cn", "position": "PhD student;MS student;Intern;Research Fellow;Associate Professor;Principal Researcher and Research Manager ;Distinguished Scientist;Full Professor", "bibtex": "@inproceedings{\nwang2023punctuationlevel,\ntitle={Punctuation-level Attack: Single-shot and Single Punctuation Can Fool Text Models},\nauthor={Wenqiang Wang and Chongyang Du and Tao Wang and Kaihao Zhang and Wenhan Luo and Lin Ma and Wei Liu and Xiaochun Cao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ir6WWkFR80}\n}", "github": "", "project": "", "reviewers": "rnAT;LLfX;AjFS;y7qU", "pdf_size": 556342, "rating": "4;5;7;7", "confidence": "5;3;4;4", "soundness": "3;2;3;3", "novelty": "2;3;4;3", "presentation": "3;2;3;3", "wc_summary": "94;109;80;133", "wc_strengths": "83;70;88;87", "wc_weaknesses": "274;321;131;106", "wc_questions": "154;32;75;40", "wc_limitations": "4;47;23;49", "wc_review": "609;579;397;415", "wc_reply_reviewers": "0;74;44;0", "wc_reply_authors": "0;144;33;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 104.0, 19.63415391607186 ], "wc_strengths_avg": [ 82.0, 7.176350047203662 ], "wc_weaknesses_avg": [ 208.0, 91.45764046814242 ], "wc_questions_avg": [ 75.25, 48.25647624930772 ], "wc_limitations_avg": [ 30.75, 18.525320510047862 ], "wc_review_avg": [ 500.0, 94.81033698917012 ], "wc_reply_reviewers_avg": [ 29.5, 31.34884367883447 ], "wc_reply_authors_avg": [ 44.25, 59.14547742642712 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.2721655269759087, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14057020651942824895&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "sysu.edu.cn;sysu.edu.cn;ntu.edu.sg;anu.edu.au;sysu.edu.cn;meituan.com;tencent.com;sysu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;2;0;3;4;0", "aff_unique_norm": "Sun Yat-sen University;Nanyang Technological University;Australian National University;Meituan;Tencent", "aff_unique_dep": ";;;;Tencent Holdings Limited", "aff_unique_url": "http://www.sysu.edu.cn;https://www.ntu.edu.sg;https://www.anu.edu.au;https://www.meituan.com;https://www.tencent.com", "aff_unique_abbr": "SYSU;NTU;ANU;Meituan;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0;0;0;0", "aff_country_unique": "China;Singapore;Australia" }, { "title": "Improving Compositional Generalization using Iterated Learning and Simplicial Embeddings", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70719", "id": "irRHgjePdR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/be7430d22a4dae8516894e32f2fcc6db-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=irRHgjePdR", "openreview": "https://openreview.net/forum?id=irRHgjePdR", "poster": "/media/PosterPDFs/NeurIPS%202023/70719.png?t=1701578671.234087", "slides": "https://nips.cc/virtual/2023/poster/70719", "video": "https://nips.cc/virtual/2023/poster/70719", "author_site": "Yi Ren, Samuel Lavoie, Michael Galkin, Danica J. Sutherland, Aaron Courville", "tldr": "", "abstract": "Compositional generalization, the ability of an agent to generalize to unseen combinations of latent factors, is easy for humans but hard for deep neural networks. A line of research in cognitive science has hypothesized a process, \"iterated learning,\" to help explain how human language developed this ability; the theory rests on simultaneous pressures towards compressibility (when an ignorant agent learns from an informed one) and expressivity (when it uses the representation for downstream tasks). Inspired by this process, we propose to improve the compositional generalization of deep networks by using iterated learning on models with simplicial embeddings, which can approximately discretize representations. This approach is further motivated by an analysis of compositionality based on Kolmogorov complexity. We show that this combination of changes improves compositional generalization over other approaches, demonstrating these improvements both on vision tasks with well-understood latent factors and on real molecular graph prediction tasks where the latent structure is unknown.", "keywords": "compositional generalization;systematic generalization;iterated learning;representation learning;graph neural networks", "primary_area": "", "supplementary_material": "/attachment/f09f49dc081ab78176b51e8a0ad07b6502b5d690.zip", "author": "Yi Ren;Samuel Lavoie;Mikhail Galkin;Danica J. Sutherland;Aaron Courville", "authorids": "~Yi_Ren6;~Samuel_Lavoie1;~Mikhail_Galkin1;~Danica_J._Sutherland1;~Aaron_Courville3", "gender": "M;M;;F;M", "homepage": "https://joshua-ren.github.io/;https://migalkin.github.io/;;http://www.djsutherland.ml;http://example.com", "dblp": ";160/8154;56/1688;92/10966;225/6508", "google_scholar": "5QNce38AAAAJ;yfYRbG4AAAAJ;https://scholar.google.ca/citations?user=km6CP8cAAAAJ;https://scholar.google.co.uk/citations?user=uO_NqicAAAAJ;", "orcid": ";;;0000-0002-1525-3532;", "linkedin": ";;;;", "or_profile": "~Yi_Ren6;~Mikhail_Galkin1;~Aaron_Courville3;~Danica_J._Sutherland2;~Samuel_Lavoie-Marchildon1", "aff": "University of British Columbia;Intel;Universit\u00e9 de Montr\u00e9al;University of British Columbia;University of Montreal", "aff_domain": "ubc.ca;intel.com; ;cs.ubc.ca;umontreal.ca", "position": "PhD student;Researcher;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nren2023improving,\ntitle={Improving Compositional Generalization using Iterated Learning and Simplicial Embeddings},\nauthor={Yi Ren and Samuel Lavoie and Mikhail Galkin and Danica J. Sutherland and Aaron Courville},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=irRHgjePdR}\n}", "github": "", "project": "", "reviewers": "eFnA;7owJ;7GSE;r5b6", "pdf_size": 4362162, "rating": "4;5;5;6", "confidence": "4;4;4;3", "soundness": "2;3;3;4", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "67;72;117;83", "wc_strengths": "33;29;57;124", "wc_weaknesses": "4;99;227;339", "wc_questions": "234;8;90;3", "wc_limitations": "16;20;11;9", "wc_review": "354;228;502;558", "wc_reply_reviewers": "12;9;95;10", "wc_reply_authors": "0;0;217;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 84.75, 19.49839737004044 ], "wc_strengths_avg": [ 60.75, 38.05505879643336 ], "wc_weaknesses_avg": [ 167.25, 126.86286887817097 ], "wc_questions_avg": [ 83.75, 93.37123486384873 ], "wc_limitations_avg": [ 14.0, 4.301162633521313 ], "wc_review_avg": [ 410.5, 129.06103207397655 ], "wc_reply_reviewers_avg": [ 31.5, 36.67764987018661 ], "wc_reply_authors_avg": [ 54.25, 93.96375631061159 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12390267246817978257&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ubc.ca;intel.com; ;cs.ubc.ca;umontreal.ca", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "University of British Columbia;Intel;Universit\u00e9 de Montr\u00e9al;University of Montreal", "aff_unique_dep": ";Intel Corporation;;", "aff_unique_url": "https://www.ubc.ca;https://www.intel.com;https://www.umontreal.ca;https://wwwumontreal.ca", "aff_unique_abbr": "UBC;Intel;UdeM;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Canada;United States" }, { "title": "Saddle-to-Saddle Dynamics in Diagonal Linear Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70718", "id": "iuqCXg1Gng", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/17a9ab4190289f0e1504bbb98d1d111a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=iuqCXg1Gng", "openreview": "https://openreview.net/forum?id=iuqCXg1Gng", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70718", "video": "https://nips.cc/virtual/2023/poster/70718", "author_site": "Scott Pesme, Nicolas Flammarion", "tldr": "", "abstract": "In this paper we fully describe the trajectory of gradient flow over $2$-layer diagonal linear networks for the regression setting in the limit of vanishing initialisation. We show that the limiting flow successively jumps from a saddle of the training loss to another until reaching the minimum $\\ell_1$-norm solution. We explicitly characterise the visited saddles as well as the jump times through a recursive algorithm reminiscent of the LARS algorithm used for computing the Lasso path. Starting from the zero vector, coordinates are successively activated until the minimum $\\ell_1$-norm solution is recovered, revealing an incremental learning. Our proof leverages a convenient arc-length time-reparametrisation which enables to keep track of the transitions between the jumps. Our analysis requires negligible assumptions on the data, applies to both under and overparametrised settings and covers complex cases where there is no monotonicity of the number of active coordinates. We provide numerical experiments to support our findings.", "keywords": "gradient flow;saddle-to-saddle;diagonal linear network;incremental learning", "primary_area": "", "supplementary_material": "/attachment/5339d46d2e70903a601815bf36f393ec74f3db3b.pdf", "author": "Scott Pesme;Nicolas Flammarion", "authorids": "~Scott_Pesme1;~Nicolas_Flammarion1", "gender": "M;M", "homepage": "https://scottpesme.github.io/;", "dblp": "268/7836;164/7417", "google_scholar": "BwCLRb0AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Scott_Pesme1;~Nicolas_Flammarion1", "aff": "Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology Lausanne", "aff_domain": "epfl.ch;epfl.ch", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\npesme2023saddletosaddle,\ntitle={Saddle-to-Saddle Dynamics in Diagonal Linear Networks},\nauthor={Scott Pesme and Nicolas Flammarion},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=iuqCXg1Gng}\n}", "github": "", "project": "", "reviewers": "BqoH;8NQr;tpj2;FL6c;P2pV", "pdf_size": 4364734, "rating": "5;6;7;7;8", "confidence": "3;3;3;4;5", "soundness": "2;3;3;3;4", "novelty": "2;3;3;3;2", "presentation": "3;3;3;3;4", "wc_summary": "51;64;167;88;42", "wc_strengths": "64;56;212;92;50", "wc_weaknesses": "45;46;149;28;119", "wc_questions": "66;1;138;34;51", "wc_limitations": "25;1;5;22;124", "wc_review": "251;168;671;264;386", "wc_reply_reviewers": "9;9;54;13;16", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 82.4, 45.04486652216876 ], "wc_strengths_avg": [ 94.8, 60.34036791402585 ], "wc_weaknesses_avg": [ 77.4, 47.60924280011183 ], "wc_questions_avg": [ 58.0, 45.47086979594738 ], "wc_limitations_avg": [ 35.4, 45.26632302275059 ], "wc_review_avg": [ 348.0, 175.8624462470598 ], "wc_reply_reviewers_avg": [ 20.2, 17.104385402580238 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7844645405527363, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14122871202869043326&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "epfl.ch;epfl.ch", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Patch Diffusion: Faster and More Data-Efficient Training of Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70717", "id": "iv2sTQtbst", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e4667dd0a5a54b74019b72b677ed8ec1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=iv2sTQtbst", "openreview": "https://openreview.net/forum?id=iv2sTQtbst", "poster": "/media/PosterPDFs/NeurIPS%202023/70717.png?t=1701900696.8738196", "slides": "https://nips.cc/virtual/2023/poster/70717", "video": "https://nips.cc/virtual/2023/poster/70717", "author_site": "Zhendong Wang, Yifan Jiang, Huangjie Zheng, Peihao Wang, Pengcheng He, Zhangyang \"Atlas\" Wang, Weizhu Chen, Mingyuan Zhou", "tldr": "", "abstract": "Diffusion models are powerful, but they require a lot of time and data to train. We propose Patch Diffusion, a generic patch-wise training framework, to significantly reduce the training time costs while improving data efficiency, which thus helps democratize diffusion model training to broader users. At the core of our innovations is a new conditional score function at the patch level, where the patch location in the original image is included as additional coordinate channels, while the patch size is randomized and diversified throughout training to encode the cross-region dependency at multiple scales. Sampling with our method is as easy as in the original diffusion model. Through Patch Diffusion, we could achieve $\\mathbf{\\ge 2\\times}$ faster training, while maintaining comparable or better generation quality. Patch Diffusion meanwhile improves the performance of diffusion models trained on relatively small datasets, $e.g.$, as few as 5,000 images to train from scratch. We achieve outstanding FID scores in line with state-of-the-art benchmarks: 1.77 on CelebA-64$\\times$64, 1.93 on AFHQv2-Wild-64$\\times$64, and 2.72 on ImageNet-256$\\times$256. We share our code and pre-trained models at https://github.com/Zhendong-Wang/Patch-Diffusion.", "keywords": "diffusion models;training efficiency;data efficiency", "primary_area": "", "supplementary_material": "/attachment/a295a4f2d83e3369bb36afa951c907289ff015d3.zip", "author": "Zhendong Wang;Yifan Jiang;Huangjie Zheng;Peihao Wang;Pengcheng He;Zhangyang Wang;Weizhu Chen;Mingyuan Zhou", "authorids": "~Zhendong_Wang1;~Yifan_Jiang2;~Huangjie_Zheng1;~Peihao_Wang1;~Pengcheng_He2;~Zhangyang_Wang1;~Weizhu_Chen1;~Mingyuan_Zhou1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://zhendong-wang.github.io/;https://yifanjiang19.github.io/;;https://peihaowang.github.io/;;https://vita-group.github.io;https://www.microsoft.com/en-us/research/people/wzchen/;http://mingyuanzhou.github.io", "dblp": ";81/7246-1;192/2170;239/4075;116/8665;119/4026;79/2536;", "google_scholar": "lRiIjhcAAAAJ;PMeFEOIAAAAJ;Vl5wCXsAAAAJ;fqf2tBsAAAAJ;https://scholar.google.com/citations?hl=en;pxFyKAIAAAAJ;LG_E-4EAAAAJ;LXwCIisAAAAJ", "orcid": ";;0000-0003-0508-5034;;;;;", "linkedin": ";;;peihao-wang-25a411162/;;;;", "or_profile": "~Zhendong_Wang1;~Yifan_Jiang2;~Huangjie_Zheng1;~Peihao_Wang1;~Pengcheng_He2;~Zhangyang_Wang1;~Weizhu_Chen1;~Mingyuan_Zhou1", "aff": "University of Texas at Austin;University of Texas, Austin;University of Texas, Austin;University of Texas, Austin;Microsoft;University of Texas, Austin;Microsoft GenAI;Google", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;utexas.edu;microsoft.com;utexas.edu;microsoft.com;google.com", "position": "PhD student;PhD student;PhD student;PhD student;Principal Researcher;Assistant Professor;Vice President;Researcher", "bibtex": "@inproceedings{\nwang2023patch,\ntitle={Patch Diffusion: Faster and More Data-Efficient Training of Diffusion Models},\nauthor={Zhendong Wang and Yifan Jiang and Huangjie Zheng and Peihao Wang and Pengcheng He and Zhangyang Wang and Weizhu Chen and Mingyuan Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=iv2sTQtbst}\n}", "github": "", "project": "", "reviewers": "NLEo;Vqdw;ExTh;qnyP;nnk4", "pdf_size": 12639845, "rating": "5;6;6;6;7", "confidence": "5;5;4;4;3", "soundness": "2;3;4;3;2", "novelty": "2;2;3;3;3", "presentation": "2;3;4;3;3", "wc_summary": "112;140;47;37;81", "wc_strengths": "78;176;44;28;102", "wc_weaknesses": "273;251;62;54;91", "wc_questions": "265;175;33;13;30", "wc_limitations": "37;11;8;1;6", "wc_review": "765;753;194;133;310", "wc_reply_reviewers": "99;214;23;11;0", "wc_reply_authors": "97;248;16;23;0", "reply_reviewers": "1;2;1;1;0", "reply_authors": "3;3;2;2;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 83.4, 38.74325747791479 ], "wc_strengths_avg": [ 85.6, 52.052281410136096 ], "wc_weaknesses_avg": [ 146.2, 95.60209202731916 ], "wc_questions_avg": [ 103.2, 99.75650354738782 ], "wc_limitations_avg": [ 12.6, 12.626955294131676 ], "wc_review_avg": [ 431.0, 273.8079619003071 ], "wc_reply_reviewers_avg": [ 69.4, 80.20623417166523 ], "wc_reply_authors_avg": [ 76.8, 91.87687413054495 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.8451542547285165, "gs_citation": 231, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9727285603589752900&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "utexas.edu;utexas.edu;utexas.edu;utexas.edu;microsoft.com;utexas.edu;microsoft.com;google.com", "author_num": 8, "aff_unique_index": "0;0;0;0;1;0;1;2", "aff_unique_norm": "University of Texas at Austin;Microsoft;Google", "aff_unique_dep": ";Microsoft Corporation;Google", "aff_unique_url": "https://www.utexas.edu;https://www.microsoft.com;https://www.google.com", "aff_unique_abbr": "UT Austin;Microsoft;Google", "aff_campus_unique_index": "0;0;0;0;0;2", "aff_campus_unique": "Austin;;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Distilling Out-of-Distribution Robustness from Vision-Language Foundation Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70716", "id": "iwp3H8uSeK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/67f30132d98e758f7b4e28c36091d86e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=iwp3H8uSeK", "openreview": "https://openreview.net/forum?id=iwp3H8uSeK", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70716", "video": "https://nips.cc/virtual/2023/poster/70716", "author_site": "Andy Zhou, Jindong Wang, Yu-Xiong Wang, Haohan Wang", "tldr": "", "abstract": "We propose a conceptually simple and lightweight framework for improving the robustness of vision models through the combination of knowledge distillation and data augmentation. We address the conjecture that larger models do not make for better teachers by showing strong gains in out-of-distribution robustness when distilling from pretrained foundation models. Following this finding, we propose Discrete Adversarial Distillation (DAD), which leverages a robust teacher to generate adversarial examples and a VQGAN to discretize them, creating more informative samples than standard data augmentation techniques. We provide a theoretical framework for the use of a robust teacher in the knowledge distillation with data augmentation setting and demonstrate strong gains in out-of-distribution robustness and clean accuracy across different student architectures. Notably, our method adds minor computational overhead compared to similar techniques and can be easily combined with other data augmentations for further improvements.", "keywords": "robustness;knowledge distillation;adversarial training;data augmentation;generalization", "primary_area": "", "supplementary_material": "", "author": "Andy Zhou;Jindong Wang;Yu-Xiong Wang;Haohan Wang", "authorids": "~Andy_Zhou2;~Jindong_Wang1;~Yu-Xiong_Wang1;~Haohan_Wang1", "gender": "M;;M;M", "homepage": "https://www.andyzhou.ai;https://yxw.cs.illinois.edu/;http://cs.cmu.edu/~haohanw;https://jd92.wang/", "dblp": ";35/10700;132/4066;19/2969-1", "google_scholar": "https://scholar.google.com/citations?hl=en;T_Q-xDkAAAAJ;nZxJGeUAAAAJ;hBZ_tKsAAAAJ", "orcid": ";;;0000-0002-4833-0880", "linkedin": "andy-zhou-679376206/;;haohanwang/;jindong-wang/", "or_profile": "~Andy_Zhou2;~Yu-Xiong_Wang1;~Haohan_Wang1;~Jindong_Wang4", "aff": "Department of Computer Science;Department of Computer Science, University of Illinois Urbana-Champaign;University of Illinois, Urbana Champaign;Microsoft Research", "aff_domain": "cs.illinois.edu;cs.illinois.edu;illinois.edu;microsoft.com", "position": "Undergrad student;Assistant Professor;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nzhou2023distilling,\ntitle={Distilling Out-of-Distribution Robustness from Vision-Language Foundation Models},\nauthor={Andy Zhou and Jindong Wang and Yu-Xiong Wang and Haohan Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=iwp3H8uSeK}\n}", "github": "", "project": "", "reviewers": "ktZ5;qyW3;gGrQ;GKv1;Y3Hr", "pdf_size": 1859000, "rating": "5;5;5;5;6", "confidence": "3;4;3;4;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;2;3", "presentation": "1;3;2;3;3", "wc_summary": "78;59;57;61;142", "wc_strengths": "37;58;132;41;46", "wc_weaknesses": "163;108;66;87;280", "wc_questions": "107;3;72;55;215", "wc_limitations": "8;3;1;3;7", "wc_review": "393;231;328;247;690", "wc_reply_reviewers": "328;0;30;0;0", "wc_reply_authors": "1098;254;0;0;158", "reply_reviewers": "2;0;1;0;0", "reply_authors": "5;2;1;1;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 79.4, 32.178253526255894 ], "wc_strengths_avg": [ 62.8, 35.31232079600546 ], "wc_weaknesses_avg": [ 140.8, 76.72652735527654 ], "wc_questions_avg": [ 90.4, 70.75478782386391 ], "wc_limitations_avg": [ 4.4, 2.65329983228432 ], "wc_review_avg": [ 377.8, 166.6366106232361 ], "wc_reply_reviewers_avg": [ 71.6, 128.72544426025493 ], "wc_reply_authors_avg": [ 302.0, 409.64960637110346 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 2.2, 1.4696938456699071 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10197625638893270419&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "email": "cs.illinois.edu;cs.illinois.edu;illinois.edu;microsoft.com", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Unknown Institution;University of Illinois Urbana-Champaign;Microsoft", "aff_unique_dep": "Department of Computer Science;Department of Computer Science;Microsoft Research", "aff_unique_url": ";https://illinois.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": ";UIUC;MSR", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "1;1;1", "aff_country_unique": ";United States" }, { "title": "Open Visual Knowledge Extraction via Relation-Oriented Multimodality Model Prompting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70715", "id": "ixVAXsdtJO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/49d1cf22327c51331cbd52bcb76a09a6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ixVAXsdtJO", "openreview": "https://openreview.net/forum?id=ixVAXsdtJO", "poster": "/media/PosterPDFs/NeurIPS%202023/70715.png?t=1701922665.4268515", "slides": "https://nips.cc/virtual/2023/poster/70715", "video": "https://nips.cc/virtual/2023/poster/70715", "author_site": "Hejie Cui, Xinyu Fang, Zihan Zhang, Ran Xu, Xuan Kan, Xin Liu, Yue Yu, Manling Li, Yangqiu Song, Carl Yang", "tldr": "", "abstract": "Images contain rich relational knowledge that can help machines understand the world. Existing methods on visual knowledge extraction often rely on the pre-defined format (e.g., sub-verb-obj tuples) or vocabulary (e.g., relation types), restricting the expressiveness of the extracted knowledge. In this work, we take a first exploration to a new paradigm of open visual knowledge extraction. To achieve this, we present OpenVik which consists of an open relational region detector to detect regions potentially containing relational knowledge and a visual knowledge generator that generates format-free knowledge by prompting the large multimodality model with the detected region of interest. We also explore two data enhancement techniques for diversifying the generated format-free visual knowledge. Extensive knowledge quality evaluations highlight the correctness and uniqueness of the extracted open visual knowledge by OpenVik. Moreover, integrating our extracted knowledge across various visual reasoning applications shows consistent improvements, indicating the real-world applicability of OpenVik.", "keywords": "Visual Knowledge Extraction;Multimodality;Large Model Prompting", "primary_area": "", "supplementary_material": "/attachment/1dce6f9767808f7f1010933f81ac056b22bc5efa.pdf", "author": "Hejie Cui;Xinyu Fang;Zihan Zhang;Ran Xu;Xuan Kan;Xin Liu;Yue Yu;Manling Li;Yangqiu Song;Carl Yang", "authorids": "~Hejie_Cui1;~Xinyu_Fang1;~Zihan_Zhang8;~Ran_Xu4;~Xuan_Kan1;~Xin_Liu9;~Yue_Yu2;~Manling_Li1;~Yangqiu_Song1;~Carl_Yang1", "gender": "F;M;M;F;;M;M;F;M;M", "homepage": "https://hejiecui.com/;https://github.com/FangXinyu-0913;;https://ritaranx.github.io/;http://kanxuan.live;https://www.cse.ust.hk/~xliucr/;https://yueyu1030.github.io;https://limanling.github.io/;https://www.cse.ust.hk/~yqsong/;https://cs.emory.edu/~jyang71/", "dblp": "221/7865;143/0236;;71/1270-2;211/5244;76/1820-39.html;;178/3620;86/2159;305/0254", "google_scholar": "r0Vh6GEAAAAJ;QZk6nZ8AAAAJ;;mcC5NzwAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=WvC4upQAAAAJ;zQ3Jh6UAAAAJ;6U4SXnUAAAAJ;MdQZ-q8AAAAJ;mOINlwcAAAAJ", "orcid": "0000-0001-6388-2619;0009-0003-3764-1266;0009-0003-0481-6090;;;0000-0001-9610-9526;0000-0002-3683-5208;;0000-0002-7818-6090;0000-0001-9145-4531", "linkedin": "hejie-cui-b1071b13b/;;;ran-rita-xu-4568a9159/;xuan-kan-90077782/;xin-liu-179830143;;;yqsong/;", "or_profile": "~Hejie_Cui1;~Xinyu_Fang1;~Zihan_Zhang8;~Ran_Xu4;~Xuan_Kan1;~Xin_Liu9;~Yue_Yu2;~Manling_Li1;~Yangqiu_Song1;~Carl_Yang1", "aff": "Emory University;;Tongji University;Emory University;Emory University;Hong Kong University of Science and Technology;Google;University of Illinois, Urbana Champaign;Hong Kong University of Science and Technology;Emory University", "aff_domain": "emory.edu;;tongji.edu.cn;emory.edu;emory.edu;ust.hk;google.com;illinois.edu;ust.hk;emory.edu", "position": "PhD student;;Undergrad student;PhD student;PhD student;PhD student;Research Intern;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ncui2023open,\ntitle={Open Visual Knowledge Extraction via Relation-Oriented Multimodality Model Prompting},\nauthor={Hejie Cui and Xinyu Fang and Zihan Zhang and Ran Xu and Xuan Kan and Xin Liu and Yue Yu and Manling Li and Yangqiu Song and Carl Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ixVAXsdtJO}\n}", "github": "", "project": "", "reviewers": "nGQ3;PUUs;hCst;QgT9;GNm1", "pdf_size": 7387274, "rating": "4;4;5;7;7", "confidence": "4;4;4;4;4", "soundness": "3;2;3;3;2", "novelty": "2;2;2;3;2", "presentation": "3;3;3;3;4", "wc_summary": "92;54;92;48;132", "wc_strengths": "19;27;144;43;108", "wc_weaknesses": "50;163;124;20;266", "wc_questions": "99;52;149;20;22", "wc_limitations": "14;1;1;1;1", "wc_review": "274;297;510;132;529", "wc_reply_reviewers": "0;84;88;26;41", "wc_reply_authors": "52;381;209;78;41", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;2;2;3;2", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 83.6, 30.42104534693047 ], "wc_strengths_avg": [ 68.2, 49.15851909893137 ], "wc_weaknesses_avg": [ 124.6, 87.14952667685579 ], "wc_questions_avg": [ 68.4, 49.39068738132726 ], "wc_limitations_avg": [ 3.6, 5.2 ], "wc_review_avg": [ 348.4, 150.82254473386928 ], "wc_reply_reviewers_avg": [ 47.8, 33.860891896109294 ], "wc_reply_authors_avg": [ 152.2, 129.21826496281398 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1276218350728559611&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "emory.edu;;tongji.edu.cn;emory.edu;emory.edu;ust.hk;google.com;illinois.edu;ust.hk;emory.edu", "author_num": 10, "aff_unique_index": "0;1;0;0;2;3;4;2;0", "aff_unique_norm": "Emory University;Tongji University;Hong Kong University of Science and Technology;Google;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;;Google;", "aff_unique_url": "https://www.emory.edu;https://www.tongji.edu.cn;https://www.ust.hk;https://www.google.com;https://illinois.edu", "aff_unique_abbr": "Emory;Tongji;HKUST;Google;UIUC", "aff_campus_unique_index": "1;2;3;1", "aff_campus_unique": ";Hong Kong SAR;Mountain View;Urbana-Champaign", "aff_country_unique_index": "0;1;0;0;1;0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Non-adversarial training of Neural SDEs with signature kernel scores", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70714", "id": "ixcsBZw5pl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2460396f2d0d421885997dd1612ac56b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ixcsBZw5pl", "openreview": "https://openreview.net/forum?id=ixcsBZw5pl", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70714", "video": "https://nips.cc/virtual/2023/poster/70714", "author_site": "Zacharia Issa, Blanka Horvath, Maud Lemercier, Cristopher Salvi", "tldr": "", "abstract": "Neural SDEs are continuous-time generative models for sequential data. State-of-the-art performance for irregular time series generation has been previously obtained by training these models adversarially as GANs. However, as typical for GAN architectures, training is notoriously unstable, often suffers from mode collapse, and requires specialised techniques such as weight clipping and gradient penalty to mitigate these issues. In this paper, we introduce a novel class of scoring rules on pathspace based on signature kernels and use them as objective for training Neural SDEs non-adversarially. By showing strict properness of such kernel scores and consistency of the corresponding estimators, we provide existence and uniqueness guarantees for the minimiser. With this formulation, evaluating the generator-discriminator pair amounts to solving a system of linear path-dependent PDEs which allows for memory-efficient adjoint-based backpropagation. Moreover, because the proposed kernel scores are well-defined for paths with values in infinite dimensional spaces of functions, our framework can be easily extended to generate spatiotemporal data. Our procedure significantly outperforms alternative ways of training Neural SDEs on a variety of tasks including the simulation of rough volatility models, the conditional probabilistic forecasts of real-world forex pairs where the conditioning variable is an observed past trajectory, and the mesh-free generation of limit order book dynamics.", "keywords": "Neural SDEs;score-based generative models;signature kernels;time series", "primary_area": "", "supplementary_material": "/attachment/2a51b0c41f2ad9d16a19a413dabf744c5b72d391.zip", "author": "Zacharia Issa;Blanka Horvath;Maud Lemercier;Cristopher Salvi", "authorids": "zacharia.issa@kcl.ac.uk;~Blanka_Horvath1;~Maud_Lemercier1;~Cristopher_Salvi1", "gender": ";F;F;M", "homepage": ";https://www.turing.ac.uk/people/researchers/blanka-horvath;https://warwick.ac.uk/fac/sci/statistics/staff/research_students/mlemercier;https://www.maths.ox.ac.uk/people/cristopher.salvi", "dblp": ";;267/2274;", "google_scholar": ";https://scholar.google.co.uk/citations?hl=en;zKSZJbsAAAAJ;FVxJ4iIAAAAJ", "orcid": ";;;", "linkedin": ";;;cristopher-salvi/", "or_profile": "zacharia.issa@kcl.ac.uk;~Blanka_Horvath1;~Maud_Lemercier1;~Cristopher_Salvi1", "aff": ";;University of Oxford;Imperial College London", "aff_domain": ";;ox.ac.uk;ic.ac.uk", "position": ";;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nissa2023nonadversarial,\ntitle={Non-adversarial training of Neural {SDE}s with signature kernel scores},\nauthor={Zacharia Issa and Blanka Horvath and Maud Lemercier and Cristopher Salvi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ixcsBZw5pl}\n}", "github": "", "project": "", "reviewers": "dxEe;xQXq;kdBJ;YPT1;obEQ", "pdf_size": 4483089, "rating": "5;5;6;6;7", "confidence": "1;3;4;3;1", "soundness": "3;3;3;3;3", "novelty": "3;2;3;3;4", "presentation": "3;2;3;3;3", "wc_summary": "27;78;73;46;95", "wc_strengths": "26;30;76;83;54", "wc_weaknesses": "42;362;105;205;53", "wc_questions": "70;29;58;60;29", "wc_limitations": "1;9;39;57;18", "wc_review": "166;508;351;451;249", "wc_reply_reviewers": "26;94;111;0;17", "wc_reply_authors": "0;0;223;0;0", "reply_reviewers": "1;1;2;0;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 2.4, 1.2000000000000002 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 63.8, 24.21074141780875 ], "wc_strengths_avg": [ 53.8, 23.172397372736384 ], "wc_weaknesses_avg": [ 153.4, 119.1714730965427 ], "wc_questions_avg": [ 49.2, 16.98705389406886 ], "wc_limitations_avg": [ 24.8, 20.49780476051033 ], "wc_review_avg": [ 345.0, 125.77599134970076 ], "wc_reply_reviewers_avg": [ 49.6, 44.31974729169831 ], "wc_reply_authors_avg": [ 44.6, 89.20000000000002 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.1336306209562122, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7958524786287041276&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": ";;ox.ac.uk;ic.ac.uk", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Oxford;Imperial College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.imperial.ac.uk", "aff_unique_abbr": "Oxford;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "GPEX, A Framework For Interpreting Artificial Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70713", "id": "iy4Of0w8ML", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ca8c6f28d8ba1e732e3f217ab05c4ec0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=iy4Of0w8ML", "openreview": "https://openreview.net/forum?id=iy4Of0w8ML", "poster": "/media/PosterPDFs/NeurIPS%202023/70713.png?t=1697470665.1723468", "slides": "https://nips.cc/virtual/2023/poster/70713", "video": "https://nips.cc/virtual/2023/poster/70713", "author_site": "Amir Hossein Hosseini Akbarnejad, Gilbert Bigras, Nilanjan Ray", "tldr": "", "abstract": "The analogy between Gaussian processes (GPs) and deep artificial neural networks (ANNs) has received a lot of interest, and has shown promise to unbox the blackbox of deep ANNs. Existing theoretical works put strict assumptions on the ANN (e.g. requiring all intermediate layers to be wide, or using specific activation functions). Accommodating those theoretical assumptions is hard in recent deep architectures, and those theoretical conditions need refinement as new deep architectures emerge. In this paper we derive an evidence lower-bound that encourages the GP's posterior to match the ANN's output without any requirement on the ANN. Using our method we find out that on 5 datasets, only a subset of those theoretical assumptions are sufficient. Indeed, in our experiments we used a normal ResNet-18 or feed-forward backbone with a single wide layer in the end. One limitation of training GPs is the lack of scalability with respect to the number of inducing points. We use novel computational techniques that allow us to train GPs with hundreds of thousands of inducing points and with GPU acceleration. As shown in our experiments, doing so has been essential to get a close match between the GPs and the ANNs on 5 datasets. We implement our method as a publicly available tool called GPEX: https://github.com/amirakbarnejad/gpex. On 5 datasets (4 image datasets, and 1 biological dataset) and ANNs with 2 types of functionality (classifier or attention-mechanism) we were able to find GPs whose outputs closely match those of the corresponding ANNs. After matching the GPs to the ANNs, we used the GPs' kernel functions to explain the ANNs' decisions. We provide more than 200 explanations (around 30 in the paper and the rest in the supplementary) which are highly interpretable by humans and show the ability of the obtained GPs to unbox the ANNs' decisions.", "keywords": "Gaussian processes;Explainable AI", "primary_area": "", "supplementary_material": "/attachment/03a1134c1d66630d2c7e22164e1c2948c4d19643.pdf", "author": "Amir Akbarnejad;Gilbert Bigras;Nilanjan Ray", "authorids": "~Amir_Akbarnejad2;~Gilbert_Bigras1;~Nilanjan_Ray1", "gender": "M;M;M", "homepage": "https://amirakbarnejad.github.io/;;https://webdocs.cs.ualberta.ca/~nray1/", "dblp": ";185/7871;19/6409", "google_scholar": "nDc-pEAAAAAJ;;https://scholar.google.com.tw/citations?user=E3wuLqAAAAAJ", "orcid": ";;0000-0002-7588-5400", "linkedin": ";;", "or_profile": "~Amir_Akbarnejad2;~Gilbert_Bigras1;~Nilanjan_Ray1", "aff": "University of Alberta;;University of Alberta", "aff_domain": "ualberta.ca;;ualberta.ca", "position": "PhD student;;Professor", "bibtex": "@inproceedings{\nakbarnejad2023gpex,\ntitle={{GPEX}, A Framework For Interpreting Artificial Neural Networks},\nauthor={Amir Akbarnejad and Gilbert Bigras and Nilanjan Ray},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=iy4Of0w8ML}\n}", "github": "", "project": "", "reviewers": "ycEM;efq3;6BV2;nub9", "pdf_size": 3246401, "rating": "5;5;5;5", "confidence": "3;4;3;4", "soundness": "3;2;2;2", "novelty": "3;2;3;2", "presentation": "3;2;2;3", "wc_summary": "90;112;31;155", "wc_strengths": "37;45;27;34", "wc_weaknesses": "90;1097;47;451", "wc_questions": "97;118;1;314", "wc_limitations": "53;105;1;5", "wc_review": "367;1477;107;959", "wc_reply_reviewers": "0;1460;12;84", "wc_reply_authors": "698;1626;0;0", "reply_reviewers": "0;4;1;1", "reply_authors": "2;5;1;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 97.0, 44.704585894514224 ], "wc_strengths_avg": [ 35.75, 6.456585785072479 ], "wc_weaknesses_avg": [ 421.25, 420.5094380629286 ], "wc_questions_avg": [ 132.5, 113.6936673698232 ], "wc_limitations_avg": [ 41.0, 42.23742416388575 ], "wc_review_avg": [ 727.5, 531.5832484192857 ], "wc_reply_reviewers_avg": [ 389.0, 619.1760654288892 ], "wc_reply_authors_avg": [ 581.0, 667.2398369402115 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13750159095130931874&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ualberta.ca;;ualberta.ca", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Near-Optimal Algorithms for Gaussians with Huber Contamination: Mean Estimation and Linear Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70712", "id": "iyweRIXAeH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/86c283920335ed1fec3edee227e05fbf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=iyweRIXAeH", "openreview": "https://openreview.net/forum?id=iyweRIXAeH", "poster": "/media/PosterPDFs/NeurIPS%202023/70712.png?t=1702002518.5967813", "slides": "https://nips.cc/virtual/2023/poster/70712", "video": "https://nips.cc/virtual/2023/poster/70712", "author_site": "Ilias Diakonikolas, Daniel Kane, Ankit Pensia, Ankit Pensia, Thanasis Pittas", "tldr": "", "abstract": "We study the fundamental problems of Gaussian mean \nestimation and linear regression with Gaussian covariates \nin the presence of Huber contamination. Our main \ncontribution is the design of the first sample near-optimal \nand almost linear-time algorithms with optimal error \nguarantees for both these problems. Specifically, for \nGaussian robust mean estimation on $\\mathbb R^d$ with \ncontamination parameter $\\epsilon \\in (0, \\epsilon_0)$ for a small \nabsolute constant $\\epsilon_0$, we give an \nalgorithm with sample complexity $n = \\tilde{O}(d/\\epsilon^2)$ \nand almost linear runtime that approximates the target \nmean within $\\ell_2$-error $O(\\epsilon)$. \nThis improves on \nprior work that achieved this error guarantee with \npolynomially suboptimal sample and time complexity. \nFor robust linear \nregression, we give the first algorithm with sample \ncomplexity $n = \\tilde{O}(d/\\epsilon^2)$ and almost linear \nruntime that approximates the target regressor within \n$\\ell_2$-error $O(\\epsilon)$. This is the first polynomial \nsample and time algorithm achieving the optimal error \nguarantee, answering an open question in the literature. \nAt the technical level, we develop a methodology that \nyields almost-linear time algorithms for multi-directional \nfiltering that may be of broader interest.", "keywords": "robust statistics;high-dimensional inference;regression;nearly linear time algorithms", "primary_area": "", "supplementary_material": "", "author": "Ilias Diakonikolas;Daniel Kane;Ankit Pensia;Thanasis Pittas", "authorids": "~Ilias_Diakonikolas1;~Daniel_Kane1;~Ankit_Pensia1;~Thanasis_Pittas1", "gender": "M;M;M;M", "homepage": "http://www.iliasdiakonikolas.org/;http://cseweb.ucsd.edu/~dakane/;https://ankitp.net/;https://thanasispittas.github.io/", "dblp": "d/IliasDiakonikolas;52/6817;213/7640;284/9676", "google_scholar": "Vb3FLmkAAAAJ;https://scholar.google.com.tw/citations?user=DulpV-cAAAAJ;u1Qs7YIAAAAJ;pkIOtwcAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ilias_Diakonikolas1;~Daniel_Kane1;~Ankit_Pensia1;~Thanasis_Pittas1", "aff": "University of Wisconsin, Madison;University of California, San Diego;University of Wisconsin, Madison;University of Wisconsin, Madison", "aff_domain": "wisc.edu;ucsd.edu;wisc.edu;wisc.edu", "position": "Associate Professor;Assistant Professor;PhD student;PhD student", "bibtex": "@inproceedings{\ndiakonikolas2023nearoptimal,\ntitle={Near-Optimal Algorithms for Gaussians with Huber Contamination: Mean Estimation and Linear Regression},\nauthor={Ilias Diakonikolas and Daniel Kane and Ankit Pensia and Thanasis Pittas},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=iyweRIXAeH}\n}", "github": "", "project": "", "reviewers": "rxyU;7D1F;qzWP;bgYs;BEmV", "pdf_size": 596631, "rating": "5;6;6;6;8", "confidence": "3;3;3;3;3", "soundness": "4;3;2;3;4", "novelty": "3;2;2;3;4", "presentation": "2;3;2;3;4", "wc_summary": "84;292;72;61;131", "wc_strengths": "92;34;32;55;199", "wc_weaknesses": "85;1;137;44;78", "wc_questions": "1;72;38;79;15", "wc_limitations": "1;1;11;10;5", "wc_review": "263;400;290;249;428", "wc_reply_reviewers": "8;0;0;12;10", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 128.0, 85.40023419171636 ], "wc_strengths_avg": [ 82.4, 62.16301151006119 ], "wc_weaknesses_avg": [ 69.0, 45.18849411078001 ], "wc_questions_avg": [ 41.0, 30.62678566222711 ], "wc_limitations_avg": [ 5.6, 4.2708313008125245 ], "wc_review_avg": [ 326.0, 73.58532462386776 ], "wc_reply_reviewers_avg": [ 6.0, 5.059644256269407 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5762981068932437851&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "email": "wisc.edu;ucsd.edu;wisc.edu;wisc.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Wisconsin;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.ucsd.edu", "aff_unique_abbr": "UW;UCSD", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Madison;San Diego", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Privacy Amplification via Compression: Achieving the Optimal Privacy-Accuracy-Communication Trade-off in Distributed Mean Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70711", "id": "izNfcaHJk0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/da8860a2fe8ddb7589136853bcc313fc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=izNfcaHJk0", "openreview": "https://openreview.net/forum?id=izNfcaHJk0", "poster": "/media/PosterPDFs/NeurIPS%202023/70711.png?t=1701936991.9665341", "slides": "https://nips.cc/virtual/2023/poster/70711", "video": "https://nips.cc/virtual/2023/poster/70711", "author_site": "Wei-Ning Chen, Dan Song, Ayfer Ozgur, Peter Kairouz", "tldr": "", "abstract": "Privacy and communication constraints are two major bottlenecks in federated learning (FL) and analytics (FA). We study the optimal accuracy of mean and frequency estimation (canonical models for FL and FA respectively) under joint communication and $(\\varepsilon, \\delta)$-differential privacy (DP) constraints. We consider both the central and the multi-message shuffled DP models. We show that in order to achieve the optimal $\\ell_2$ error under $(\\varepsilon, \\delta)$-DP, it is sufficient for each client to send $\\Theta\\left( n \\min\\left(\\varepsilon, \\varepsilon^2\\right)\\right)$ bits for FL %{\\color{blue}(assuming the dimension $d \\gg n \\min\\left(\\varepsilon, \\varepsilon^2\\right)$)} \nand $\\Theta\\left(\\log\\left( n\\min\\left(\\varepsilon, \\varepsilon^2\\right) \\right)\\right)$ bits for FA to the server, where $n$ is the number of participating clients. Without compression, each client needs $O(d)$ bits and $O\\left(\\log d\\right)$ bits for the mean and frequency estimation problems respectively (where $d$ corresponds to the number of trainable parameters in FL or the domain size in FA), meaning that we can get significant savings in the regime $ n \\min\\left(\\varepsilon, \\varepsilon^2\\right) = o(d)$, which is often the relevant regime in practice. \n\nWe propose two different ways to leverage compression for privacy amplification and achieve the optimal privacy-communication-accuracy trade-offs. In both cases, each client communicates only partial information about its sample and we show that privacy is amplified by randomly selecting the part contributed by each client. In the first method, the random selection is revealed to the server, which results in a central DP guarantee with optimal privacy-communication-accuracy trade-offs. In the second method, the random data parts from the clients are shuffled by a secure shuffler resulting in a multi-message shuffling scheme with the same optimal trade-offs. As a result, we establish the optimal three-way trade-offs between privacy, communication, and accuracy for both the central DP and multi-message shuffling frameworks.", "keywords": "Differential Privacy;Federated Learning;Communication", "primary_area": "", "supplementary_material": "/attachment/a142c60d04c08298e555d7fc43adcb932aeed7ef.zip", "author": "Wei-Ning Chen;Dan Song;Ayfer Ozgur;Peter Kairouz", "authorids": "~Wei-Ning_Chen1;~Dan_Song2;~Ayfer_Ozgur1;~Peter_Kairouz1", "gender": ";M;;M", "homepage": "https://web.stanford.edu/~wnchen/index.html;;;https://kairouzp.github.io/", "dblp": "51/2118;;12/4534;129/1254", "google_scholar": "-TqCZLIAAAAJ;;;m8NUgw0AAAAJ", "orcid": "0000-0001-7355-9487;0000-0001-8486-8285;;", "linkedin": ";;;kayrouzp", "or_profile": "~Wei-Ning_Chen1;~Dan_Song2;~Ayfer_Ozgur1;~Peter_Kairouz1", "aff": "Stanford University;Stanford University;Stanford University;Google", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;google.com", "position": "PhD student;PhD student;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nchen2023privacy,\ntitle={Privacy Amplification via Compression: Achieving the Optimal Privacy-Accuracy-Communication Trade-off in Distributed Mean Estimation},\nauthor={Wei-Ning Chen and Dan Song and Ayfer Ozgur and Peter Kairouz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=izNfcaHJk0}\n}", "github": "", "project": "", "reviewers": "WRY9;rG5G;mo9D;B1Jv", "pdf_size": 777452, "rating": "6;6;7;8", "confidence": "3;3;4;3", "soundness": "3;4;4;3", "novelty": "3;4;3;4", "presentation": "3;3;3;3", "wc_summary": "87;162;65;120", "wc_strengths": "57;51;28;50", "wc_weaknesses": "293;189;58;18", "wc_questions": "138;223;9;1", "wc_limitations": "1;14;1;1", "wc_review": "576;639;161;190", "wc_reply_reviewers": "116;410;215;0", "wc_reply_authors": "122;641;355;0", "reply_reviewers": "2;2;3;0", "reply_authors": "2;3;3;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 108.5, 36.568429006453094 ], "wc_strengths_avg": [ 46.5, 11.01135777277262 ], "wc_weaknesses_avg": [ 139.5, 108.87722443192608 ], "wc_questions_avg": [ 92.75, 92.7964843084047 ], "wc_limitations_avg": [ 4.25, 5.629165124598851 ], "wc_review_avg": [ 391.5, 217.38732713753117 ], "wc_reply_reviewers_avg": [ 185.25, 150.42502285191782 ], "wc_reply_authors_avg": [ 279.5, 244.5960956352329 ], "reply_reviewers_avg": [ 1.75, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4203623026360354127&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "stanford.edu;stanford.edu;stanford.edu;google.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "Stanford;Google", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Versatile Energy-Based Probabilistic Models for High Energy Physics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70710", "id": "j0U6XJubbP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3d4c0a618d0acd7921493e4f30395c22-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=j0U6XJubbP", "openreview": "https://openreview.net/forum?id=j0U6XJubbP", "poster": "/media/PosterPDFs/NeurIPS%202023/70710.png?t=1702555200.3669088", "slides": "https://nips.cc/virtual/2023/poster/70710", "video": "https://nips.cc/virtual/2023/poster/70710", "author_site": "Taoli Cheng, Aaron Courville", "tldr": "", "abstract": "As a classical generative modeling approach, energy-based models have the natural advantage of flexibility in the form of the energy function. Recently, energy-based models have achieved great success in modeling high-dimensional data in computer vision and natural language processing. In line with these advancements, we build a multi-purpose energy-based probabilistic model for High Energy Physics events at the Large Hadron Collider. This framework builds on a powerful generative model and describes higher-order inter-particle interactions. It suits different encoding architectures and builds on implicit generation. As for applicative aspects, it can serve as a powerful parameterized event generator for physics simulation, a generic anomalous signal detector free from spurious correlations, and an augmented event classifier for particle identification.", "keywords": "Generative modeling;Energy-based models;Out-of-distribution detection;Sciences;Application;Physics", "primary_area": "", "supplementary_material": "/attachment/c1c8558c5e0c27b16d0cd1fe1a774746dea537b7.pdf", "author": "Taoli Cheng;Aaron Courville", "authorids": "~Taoli_Cheng1;~Aaron_Courville3", "gender": ";", "homepage": "https://taolicheng.github.io/;", "dblp": "311/5441;56/1688", "google_scholar": ";https://scholar.google.ca/citations?user=km6CP8cAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Taoli_Cheng1;~Aaron_Courville3", "aff": ";Universit\u00e9 de Montr\u00e9al", "aff_domain": "; ", "position": ";Assistant Professor", "bibtex": "@inproceedings{\ncheng2023versatile,\ntitle={Versatile Energy-Based Probabilistic Models for High Energy Physics},\nauthor={Taoli Cheng and Aaron Courville},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=j0U6XJubbP}\n}", "github": "", "project": "", "reviewers": "PfZ9;bfxx;npbJ;Xiwj", "pdf_size": 2874261, "rating": "5;6;6;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "2;2;3;4", "presentation": "2;3;2;3", "wc_summary": "84;45;143;88", "wc_strengths": "48;20;74;120", "wc_weaknesses": "84;32;302;39", "wc_questions": "17;106;208;2", "wc_limitations": "4;1;47;1", "wc_review": "237;204;774;250", "wc_reply_reviewers": "205;18;12;0", "wc_reply_authors": "91;0;28;0", "reply_reviewers": "2;1;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 90.0, 34.90701935141412 ], "wc_strengths_avg": [ 65.5, 36.806928695559485 ], "wc_weaknesses_avg": [ 114.25, 110.2188164516386 ], "wc_questions_avg": [ 83.25, 82.26595589914457 ], "wc_limitations_avg": [ 13.25, 19.524023663169434 ], "wc_review_avg": [ 366.25, 236.01099021020187 ], "wc_reply_reviewers_avg": [ 58.75, 84.68581640392918 ], "wc_reply_authors_avg": [ 29.75, 37.164331017791774 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9901021004119316105&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "; ", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": "", "aff_unique_url": "https://www.umontreal.ca", "aff_unique_abbr": "UdeM", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "id": "j2EaW49Rk7", "title": "Orthogonal Gradient Boosting for Interpretable Additive Rule Ensembles", "track": "main", "status": "Reject", "tldr": "", "abstract": "Gradient boosting of decision rules is an efficient approach to find interpretable yet accurate machine learning models. However, in practice, interpretability requires to limit the number and size of the generated rules, and existing boosting variants are not designed for this purpose. Through their strict greedy approach, they can increase accuracy only by adding further rules, even when the same gains can be achieved, in a more interpretable form, by altering already discovered rules. Here we address this shortcoming by adopting a weight correction step in each boosting round to maximise the predictive gain per added rule. This leads to a new objective function for rule selection that, based on orthogonal projections, anticipates the subsequent weight correction. This approach does not only correctly approximate the ideal update of adding the risk gradient itself to the model, it also favours the inclusion of more general and thus shorter rules. Additionally, we derive a fast incremental algorithm for rule evaluation, as necessary to enable efficient single-rule optimisation through either the greedy or the branch-and-bound approach. As we demonstrate on a range of classification, regression, and Poisson regression tasks, the resulting rule learner significantly improves the comprehensibility/accuracy trade-off of the fitted ensemble. At the same time, it has comparable computational cost to previous branch-and-bound rule learners.", "keywords": "Rule Learning;Gradient Boosting;Branch-and-bound", "primary_area": "", "supplementary_material": "/attachment/8515907fb2ae944ea8921e0a2ea1ab21cd336471.zip", "author": "Fan Yang;Pierre Le Bodic;Michael Kamp;Mario Boley", "authorids": "~Fan_Yang32;~Pierre_Le_Bodic1;~Michael_Kamp1;~Mario_Boley2", "gender": "M;;M;M", "homepage": ";;http://michaelkamp.org;https://marioboley.github.io/", "dblp": ";25/7443;133/7744;41/5449", "google_scholar": "https://scholar.google.com.au/citations?user=GcrIymIAAAAJ;wQcDC-YAAAAJ;https://scholar.google.de/citations?user=8R5jbvQAAAAJ;https://scholar.google.de/citations?hl=en", "orcid": ";0000-0003-0842-9533;0000-0001-6231-0694;0000-0002-0704-4968", "linkedin": ";pierre-le-bodic-19993b210/;michael-kamp-29096a95/;", "or_profile": "~Fan_Yang32;~Pierre_Le_Bodic1;~Michael_Kamp1;~Mario_Boley2", "aff": "Monash University;Monash University;Institute for AI in Medicine IKIM;Monash University", "aff_domain": "monash.edu;monash.edu;uk-essen.de;monash.edu", "position": "PhD student;Assistant Professor;Research Group Leader;Assistant Professor", "bibtex": "@misc{\nyang2023orthogonal,\ntitle={Orthogonal Gradient Boosting for Interpretable Additive Rule Ensembles},\nauthor={Fan Yang and Pierre Le Bodic and Michael Kamp and Mario Boley},\nyear={2023},\nurl={https://openreview.net/forum?id=j2EaW49Rk7}\n}", "github": "", "project": "", "reviewers": "vDxD;9SvB;8EKB;crnT;KXDe", "site": "https://openreview.net/forum?id=j2EaW49Rk7", "pdf_size": 638662, "rating": "3;5;5;6;7", "confidence": "5;4;3;3;4", "soundness": "2;3;3;3;2", "novelty": "2;3;3;3;2", "presentation": "2;2;3;3;4", "wc_summary": "22;202;79;73;149", "wc_strengths": "29;127;30;165;135", "wc_weaknesses": "30;351;71;189;629", "wc_questions": "1;75;172;91;89", "wc_limitations": "448;9;1;138;11", "wc_review": "530;764;353;656;1013", "wc_reply_reviewers": "746;32;22;11;0", "wc_reply_authors": "797;0;0;0;0", "reply_reviewers": "2;1;1;1;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.2, 1.32664991614216 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 105.0, 63.141111805225606 ], "wc_strengths_avg": [ 97.2, 56.71119818871754 ], "wc_weaknesses_avg": [ 254.0, 218.13940496847422 ], "wc_questions_avg": [ 85.6, 54.360279616646565 ], "wc_limitations_avg": [ 121.4, 171.03286233937618 ], "wc_review_avg": [ 663.2, 222.05350706530172 ], "wc_reply_reviewers_avg": [ 162.2, 292.0961485538623 ], "wc_reply_authors_avg": [ 159.4, 318.79999999999995 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5640760748177661, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3hbvNJoSpCcJ:scholar.google.com/&scioq=Orthogonal+Gradient+Boosting+for+Interpretable+Additive+Rule+Ensembles&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Monash University;Institute for AI in Medicine", "aff_unique_dep": ";AI in Medicine", "aff_unique_url": "https://www.monash.edu;", "aff_unique_abbr": "Monash;IKIM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Australia;Unknown" }, { "title": "Active Vision Reinforcement Learning under Limited Visual Observability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70709", "id": "j2oYaFpbrB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/20e6b4dd2b1f82bc599c593882f67f75-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=j2oYaFpbrB", "openreview": "https://openreview.net/forum?id=j2oYaFpbrB", "poster": "/media/PosterPDFs/NeurIPS%202023/70709.png?t=1701963979.783298", "slides": "https://nips.cc/virtual/2023/poster/70709", "video": "https://nips.cc/virtual/2023/poster/70709", "author_site": "Jinghuan Shang, Michael S Ryoo", "tldr": "", "abstract": "In this work, we investigate Active Vision Reinforcement Learning (ActiveVision-RL), where an embodied agent simultaneously learns action policy for the task while also controlling its visual observations in partially observable environments. We denote the former as motor policy and the latter as sensory policy. For example, humans solve real world tasks by hand manipulation (motor policy) together with eye movements (sensory policy). ActiveVision-RL poses challenges on coordinating two policies given their mutual influence. We propose SUGARL, Sensorimotor Understanding Guided Active Reinforcement Learning, a framework that models motor and sensory policies separately, but jointly learns them using with an intrinsic sensorimotor reward. This learnable reward is assigned by sensorimotor reward module, incentivizes the sensory policy to select observations that are optimal to infer its own motor action, inspired by the sensorimotor stage of humans. Through a series of experiments, we show the effectiveness of our method across a range of observability conditions and its adaptability to existed RL algorithms. The sensory policies learned through our method are observed to exhibit effective active vision strategies.", "keywords": "Reinforcement Learning;Active Reinforcement Learning;Visual Reinforcement Learning;Active Vision;Active Perception;Partial Observability;Sensorimotor", "primary_area": "", "supplementary_material": "/attachment/51cc3f7add8ecb543174e09cb9f1634e6403ac0f.pdf", "author": "Jinghuan Shang;Michael S Ryoo", "authorids": "~Jinghuan_Shang1;~Michael_S_Ryoo1", "gender": "M;M", "homepage": "https://www.cs.stonybrook.edu/~jishang;http://michaelryoo.com/", "dblp": "218/7364;r/MichaelSRyoo", "google_scholar": "gMvLIDUAAAAJ;vcw0TJIAAAAJ", "orcid": "0000-0001-7301-5981;", "linkedin": ";", "or_profile": "~Jinghuan_Shang1;~Michael_S_Ryoo1", "aff": "Department of Computer Science, State University of New York, Stony Brook;Google DeepMind", "aff_domain": "cs.stonybrook.edu;google.com", "position": "PhD student;Research Scientist", "bibtex": "@inproceedings{\nshang2023active,\ntitle={Active Vision Reinforcement Learning under Limited Visual Observability},\nauthor={Jinghuan Shang and Michael S Ryoo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=j2oYaFpbrB}\n}", "github": "", "project": "", "reviewers": "Rg6w;LTdh;Wwp7;3Naa;SANf", "pdf_size": 13683132, "rating": "5;5;6;7;7", "confidence": "3;3;5;5;4", "soundness": "3;2;2;3;3", "novelty": "3;2;3;3;2", "presentation": "3;3;3;4;2", "wc_summary": "103;81;140;75;96", "wc_strengths": "100;85;117;59;51", "wc_weaknesses": "248;151;577;228;22", "wc_questions": "5;46;102;6;215", "wc_limitations": "26;43;171;31;45", "wc_review": "482;406;1107;399;429", "wc_reply_reviewers": "280;46;378;26;23", "wc_reply_authors": "568;0;251;0;0", "reply_reviewers": "3;1;3;1;1", "reply_authors": "4;1;4;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 99.0, 22.829805080201627 ], "wc_strengths_avg": [ 82.4, 24.686838598735157 ], "wc_weaknesses_avg": [ 245.2, 183.89497002365235 ], "wc_questions_avg": [ 74.8, 78.52490050932887 ], "wc_limitations_avg": [ 63.2, 54.370580280147834 ], "wc_review_avg": [ 564.6, 272.7582079424925 ], "wc_reply_reviewers_avg": [ 150.6, 149.13296081014417 ], "wc_reply_authors_avg": [ 163.8, 224.26448671156118 ], "reply_reviewers_avg": [ 1.8, 0.9797958971132713 ], "reply_authors_avg": [ 2.2, 1.469693845669907 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.75, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12566214061036414785&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.stonybrook.edu;google.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "State University of New York;Google", "aff_unique_dep": "Department of Computer Science;Google DeepMind", "aff_unique_url": "https://www.stonybrook.edu;https://deepmind.com", "aff_unique_abbr": "SUNY Stony Brook;DeepMind", "aff_campus_unique_index": "0", "aff_campus_unique": "Stony Brook;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "MetaBox: A Benchmark Platform for Meta-Black-Box Optimization with Reinforcement Learning", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73497", "id": "j2wasUypqN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/232eee8ef411a0a316efa298d7be3c2b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=j2wasUypqN", "openreview": "https://openreview.net/forum?id=j2wasUypqN", "poster": "/media/PosterPDFs/NeurIPS%202023/73497.png?t=1699442958.064804", "slides": "https://nips.cc/virtual/2023/poster/73497", "video": "https://nips.cc/virtual/2023/poster/73497", "author_site": "Zeyuan Ma, Hongshu Guo, Jiacheng Chen, Zhenrui Li, Guojun Peng, Yue-Jiao Gong, Yining Ma, Zhiguang Cao", "tldr": "", "abstract": "Recently, Meta-Black-Box Optimization with Reinforcement Learning (MetaBBO-RL) has showcased the power of leveraging RL at the meta-level to mitigate manual fine-tuning of low-level black-box optimizers. However, this field is hindered by the lack of a unified benchmark. To fill this gap, we introduce MetaBox, the first benchmark platform expressly tailored for developing and evaluating MetaBBO-RL methods. MetaBox offers a flexible algorithmic template that allows users to effortlessly implement their unique designs within the platform. Moreover, it provides a broad spectrum of over 300 problem instances, collected from synthetic to realistic scenarios, and an extensive library of 19 baseline methods, including both traditional black-box optimizers and recent MetaBBO-RL methods. Besides, MetaBox introduces three standardized performance metrics, enabling a more thorough assessment of the methods. In a bid to illustrate the utility of MetaBox for facilitating rigorous evaluation and in-depth analysis, we carry out a wide-ranging benchmarking study on existing MetaBBO-RL methods. Our MetaBox is open-source and accessible at: https://github.com/GMC-DRL/MetaBox.", "keywords": "Black-Box Optimization;Meta-Black-Box Optimization;Reinforcement Learning;Benchmark Platform", "primary_area": "", "supplementary_material": "", "author": "Zeyuan Ma;Hongshu Guo;Jiacheng Chen;Zhenrui Li;Guojun Peng;Yue-Jiao Gong;Yining Ma;Zhiguang Cao", "authorids": "~Zeyuan_Ma1;~Hongshu_Guo1;~Jiacheng_Chen4;~Zhenrui_Li1;~Guojun_Peng3;~Yue-Jiao_Gong1;~Yining_Ma1;~Zhiguang_Cao1", "gender": "M;M;M;;M;F;M;M", "homepage": "https://metaevo.github.io/;https://orcid.org/0000-0001-8063-8984;https://jc-chen1.github.io/;https://github.com/Anchorrrr;https://github.com/PGJ-0419;https://www.gongyuejiao.com/;https://yining043.github.io/;https://zhiguangcaosg.github.io/", "dblp": "295/6478;;;;;65/7184;160/6245-1;178/8621", "google_scholar": "Jcy8wPgAAAAJ;;knvJGuQAAAAJ;;;Mi0Zu3IAAAAJ;4_VyBTsAAAAJ;https://scholar.google.com.sg/citations?user=2R-cOkYAAAAJ", "orcid": "0000-0001-6216-9379;0000-0001-8063-8984;0000-0002-7539-6156;;;0000-0002-5648-1160;0000-0002-6639-8547;0000-0002-4499-759X", "linkedin": ";;;;;;yiningma/;", "or_profile": "~Zeyuan_Ma1;~Hongshu_Guo1;~Jiacheng_Chen4;~Zhenrui_Li1;~Guojun_Peng3;~Yue-Jiao_Gong1;~Yining_Ma1;~Zhiguang_Cao1", "aff": "South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology;National University of Singapore;Institute for Infocomm Research, A*STAR", "aff_domain": "scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;u.nus.edu;i2r.a-star.edu.sg", "position": "PhD student;PhD student;Undergrad student;Undergrad student;scut;Full Professor;PhD student;Scientist ", "bibtex": "@inproceedings{\nma2023metabox,\ntitle={MetaBox: A Benchmark Platform for Meta-Black-Box Optimization with Reinforcement Learning},\nauthor={Zeyuan Ma and Hongshu Guo and Jiacheng Chen and Zhenrui Li and Guojun Peng and Yue-Jiao Gong and Yining Ma and Zhiguang Cao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=j2wasUypqN}\n}", "github": "", "project": "", "reviewers": "t7Zi;D9Jb;qtiV;nMBF", "pdf_size": 1240516, "rating": "7;7;8;9", "confidence": "3;1;4;4", "wc_summary_and_contributions": "49;50;89;35", "wc_strengths": "39;40;73;105", "wc_improvement": "73;90;91;10", "wc_limitations": "1;27;127;38", "wc_correctness": "1;4;107;13", "wc_clarity": "1;4;6;21", "wc_relation_to_prior_work": "1;5;20;71", "wc_documentation": "16;3;37;35", "wc_additional_feedback": "1;1;1;1", "wc_review": "182;224;551;329", "wc_reply_reviewers": "13;15;33;18", "wc_reply_authors": "421;242;711;347", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 7.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "wc_summary_and_contributions_avg": [ 55.75, 20.09197601033806 ], "wc_strengths_avg": [ 64.25, 27.215574585152524 ], "wc_improvement_avg": [ 66.0, 33.1134413795969 ], "wc_limitations_avg": [ 48.25, 47.409782745758285 ], "wc_correctness_avg": [ 31.25, 43.95665478627781 ], "wc_clarity_avg": [ 8.0, 7.713624310270756 ], "wc_relation_to_prior_work_avg": [ 24.25, 27.904972675134445 ], "wc_documentation_avg": [ 22.75, 14.042346669983617 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 321.5, 142.9099366734168 ], "wc_reply_reviewers_avg": [ 19.75, 7.854139036202504 ], "wc_reply_authors_avg": [ 430.25, 174.12262202252757 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.7385489458759963, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8067903546030985204&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": "scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;u.nus.edu;i2r.a-star.edu.sg", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;1;2", "aff_unique_norm": "South China University of Technology;National University of Singapore;Institute for Infocomm Research", "aff_unique_dep": ";;", "aff_unique_url": "https://www.scut.edu.cn;https://www.nus.edu.sg;https://www.i2r.a-star.edu.sg", "aff_unique_abbr": "SCUT;NUS;I2R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;1", "aff_country_unique": "China;Singapore" }, { "title": "Resolving the Tug-of-War: A Separation of Communication and Learning in Federated Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70708", "id": "j4QVhftpYM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0aa800df4298539770b57824afc77a89-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=j4QVhftpYM", "openreview": "https://openreview.net/forum?id=j4QVhftpYM", "poster": "/media/PosterPDFs/NeurIPS%202023/70708.png?t=1702144478.154445", "slides": "https://nips.cc/virtual/2023/poster/70708", "video": "https://nips.cc/virtual/2023/poster/70708", "author_site": "Junyi Li, Heng Huang", "tldr": "", "abstract": "Federated learning (FL) is a promising privacy-preserving machine learning paradigm over distributed data. In this paradigm, each client trains the parameter of a model locally and the server aggregates the parameter from clients periodically. Therefore, we perform the learning and communication over the same set of parameters. However, we find that learning and communication have fundamentally divergent requirements for parameter selection, akin to two opposite teams in a tug-of-war game. To mitigate this discrepancy, we introduce FedSep, a novel two-layer federated learning framework. FedSep consists of separated communication and learning layers for each client and the two layers are connected through decode/encode operations. In particular, the decoding operation is formulated as a minimization problem. We view FedSep as a federated bilevel optimization problem and propose an efficient algorithm to solve it. Theoretically, we demonstrate that its convergence matches that of the standard FL algorithms. The separation of communication and learning in FedSep offers innovative solutions to various challenging problems in FL, such as Communication-Efficient FL and Heterogeneous-Model FL. Empirical validation shows the superior performance of FedSep over various baselines in these tasks.", "keywords": "Federated Learning", "primary_area": "", "supplementary_material": "/attachment/084f7140e6ac4b217bca44ae763bf464af1b3f65.pdf", "author": "Junyi Li;Heng Huang", "authorids": "~Junyi_Li1;~Heng_Huang1", "gender": "M;M", "homepage": ";https://www.cs.umd.edu/~heng/", "dblp": ";03/281", "google_scholar": "MzvZSs0AAAAJ;4OqLaDwAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Junyi_Li1;~Heng_Huang1", "aff": "University of Pittsburgh;University of Pittsburgh", "aff_domain": "pitt.edu;pitt.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nli2023resolving,\ntitle={Resolving the Tug-of-War: A Separation of Communication and Learning in Federated Learning},\nauthor={Junyi Li and Heng Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=j4QVhftpYM}\n}", "github": "", "project": "", "reviewers": "Dwjg;G158;WQf3;A2g4", "pdf_size": 1067458, "rating": "3;6;6;7", "confidence": "3;3;4;4", "soundness": "3;4;3;3", "novelty": "2;4;3;4", "presentation": "2;4;3;3", "wc_summary": "97;66;48;41", "wc_strengths": "56;48;37;28", "wc_weaknesses": "728;53;120;94", "wc_questions": "7;43;195;2", "wc_limitations": "9;1;1;1", "wc_review": "897;211;401;166", "wc_reply_reviewers": "417;51;219;111", "wc_reply_authors": "1014;158;776;402", "reply_reviewers": "1;1;2;2", "reply_authors": "4;2;4;3", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 63.0, 21.644860821913362 ], "wc_strengths_avg": [ 42.25, 10.638961415476606 ], "wc_weaknesses_avg": [ 248.75, 277.72412120663915 ], "wc_questions_avg": [ 61.75, 78.54099248163344 ], "wc_limitations_avg": [ 3.0, 3.4641016151377544 ], "wc_review_avg": [ 418.75, 289.8623595777831 ], "wc_reply_reviewers_avg": [ 199.5, 139.25785435658557 ], "wc_reply_authors_avg": [ 587.5, 330.2707222870353 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6666666666666667, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5284588633838027048&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "pitt.edu;pitt.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Pittsburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.pitt.edu", "aff_unique_abbr": "Pitt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "AndroidInTheWild: A Large-Scale Dataset For Android Device Control", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73496", "id": "j4b3l5kOil", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bbbb6308b402fe909c39dd29950c32e0-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=j4b3l5kOil", "openreview": "https://openreview.net/forum?id=j4b3l5kOil", "poster": "/media/PosterPDFs/NeurIPS%202023/73496.png?t=1699937131.0177693", "slides": "https://nips.cc/virtual/2023/poster/73496", "video": "https://nips.cc/virtual/2023/poster/73496", "author_site": "Christopher Rawles, Alice Li, Daniel Rodriguez, Oriana Riva, Timothy Lillicrap", "tldr": "", "abstract": "There is a growing interest in device-control systems that can interpret human natural language instructions and execute them on a digital device by directly controlling its user interface. We present a dataset for device-control research, Android in the Wild (AitW), which is orders of magnitude larger than current datasets. The dataset contains human demonstrations of device interactions, including the screens and actions, and corresponding natural language instructions. It consists of 715k episodes spanning 30k unique instructions, four versions of Android (v10\u201313), and eight device types (Pixel 2 XL to Pixel 6) with varying screen resolutions. It contains multi-step tasks that require semantic understanding of language and visual context. This dataset poses a new challenge: actions available through the user interface must be inferred from their visual appearance, and, instead of simple UI element-based actions, the action space consists of precise gestures (e.g., horizontal scrolls to operate carousel widgets). We organize our dataset to encourage robustness analysis of device-control systems, i.e., how well a system performs in the presence of new task descriptions, new applications, or new platform versions. We develop two agents and report performance across the dataset. The dataset is available at https://github.com/google-research/google-research/tree/master/android_in_the_wild.", "keywords": "UI Automation;Device-control;Dataset;LLM", "primary_area": "", "supplementary_material": "", "author": "Christopher Rawles;Alice Li;Daniel Rodriguez;Oriana Riva;Timothy P Lillicrap", "authorids": "~Christopher_Rawles1;~Alice_Li2;~Daniel_Rodriguez2;~Oriana_Riva3;~Timothy_P_Lillicrap1", "gender": "M;F;M;;M", "homepage": ";;http://www.myowndomain.net;;http://contrastiveconvergence.net/~timothylillicrap/index.php", "dblp": "352/5268;;;53/3712;37/10849", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;;Eo7-pZ4AAAAJ;https://scholar.google.co.uk/citations?user=htPVdRMAAAAJ", "orcid": ";;;;", "linkedin": "chris-rawles-50854826/;alice-li-0a7b3b165;;;", "or_profile": "~Christopher_Rawles1;~Alice_Li2;~Daniel_Rodriguez2;~Oriana_Riva3;~Timothy_P_Lillicrap1", "aff": "Google;Research, Google;Research, Google;Microsoft;Google DeepMind", "aff_domain": "google.com;research.google.com;research.google.com;microsoft.com;deepmind.com", "position": "Researcher;Researcher;Researcher;Principal Researcher;Research Scientist", "bibtex": "@inproceedings{\nrawles2023androidinthewild,\ntitle={AndroidInTheWild: A Large-Scale Dataset For Android Device Control},\nauthor={Christopher Rawles and Alice Li and Daniel Rodriguez and Oriana Riva and Timothy P Lillicrap},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=j4b3l5kOil}\n}", "github": "", "project": "", "reviewers": "t95d;2zAP;YSqi", "pdf_size": 4411148, "rating": "7;8;8", "confidence": "4;4;4", "wc_summary_and_contributions": "96;47;80", "wc_strengths": "76;40;62", "wc_improvement": "57;35;112", "wc_limitations": "99;21;155", "wc_correctness": "1;45;11", "wc_clarity": "1;8;15", "wc_relation_to_prior_work": "87;15;88", "wc_documentation": "13;14;36", "wc_additional_feedback": "1;1;1", "wc_review": "431;226;560", "wc_reply_reviewers": "20;0;29", "wc_reply_authors": "193;128;436", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 7.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 74.33333333333333, 20.401524997465806 ], "wc_strengths_avg": [ 59.333333333333336, 14.817407180595247 ], "wc_improvement_avg": [ 68.0, 32.38312317653544 ], "wc_limitations_avg": [ 91.66666666666667, 54.950482759986336 ], "wc_correctness_avg": [ 19.0, 18.83259585576738 ], "wc_clarity_avg": [ 8.0, 5.715476066494082 ], "wc_relation_to_prior_work_avg": [ 63.333333333333336, 34.179265969622904 ], "wc_documentation_avg": [ 21.0, 10.614455552060438 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 405.6666666666667, 137.5265630907555 ], "wc_reply_reviewers_avg": [ 16.333333333333332, 12.119772641798562 ], "wc_reply_authors_avg": [ 252.33333333333334, 132.55522957955634 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 178, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4835623251963195406&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "google.com;research.google.com;research.google.com;microsoft.com;deepmind.com", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Google;Microsoft", "aff_unique_dep": "Google;Microsoft Corporation", "aff_unique_url": "https://www.google.com;https://www.microsoft.com", "aff_unique_abbr": "Google;Microsoft", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "What You See is What You Read? Improving Text-Image Alignment Evaluation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70707", "id": "j5AoleAIru", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/056e8e9c8ca9929cb6cf198952bf1dbb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=j5AoleAIru", "openreview": "https://openreview.net/forum?id=j5AoleAIru", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70707", "video": "https://nips.cc/virtual/2023/poster/70707", "author_site": "Michal Yarom, Yonatan Bitton, Soravit Changpinyo, Roee Aharoni, Jonathan Herzig, Oran Lang, Eran Ofek, Idan Szpektor", "tldr": "", "abstract": "Automatically determining whether a text and a corresponding image are semantically aligned is a significant challenge for vision-language models, with applications in generative text-to-image and image-to-text tasks. In this work, we study methods for automatic text-image alignment evaluation. We first introduce SeeTRUE: a comprehensive evaluation set, spanning multiple datasets from both text-to-image and image-to-text generation tasks, with human judgements for whether a given text-image pair is semantically aligned. We then describe two automatic methods to determine alignment: the first involving a pipeline based on question generation and visual question answering models, and the second employing an end-to-end classification approach by finetuning multimodal pretrained models. Both methods surpass prior approaches in various text-image alignment tasks, with significant improvements in challenging cases that involve complex composition or unnatural images. Finally, we demonstrate how our approaches can localize specific misalignments between an image and a given text, and how they can be used to automatically re-rank candidates in text-to-image generation.", "keywords": "Vision-and-language;Image-text alignment;Text-to-image generation;Image-to-text generation;Multi-modal models;Synthetic images;Meta-evaluation;Visual-question-answering", "primary_area": "", "supplementary_material": "/attachment/506e50a53f461ecdcae8132ce4be98985b505e82.zip", "author": "Michal Yarom;Yonatan Bitton;Soravit Changpinyo;Roee Aharoni;Jonathan Herzig;Oran Lang;Eran Ofek;Idan Szpektor", "authorids": "~Michal_Yarom1;~Yonatan_Bitton1;~Soravit_Changpinyo1;~Roee_Aharoni1;~Jonathan_Herzig2;~Oran_Lang1;eranofek@google.com;~Idan_Szpektor1", "gender": "F;M;M;M;M;M;;", "homepage": ";https://yonatanbitton.github.io/;https://schangpi.github.io/;http://www.roeeaharoni.com;https://jonathanherzig.github.io/;;;", "dblp": "181/6577;277/7042;139/1319;148/9506;133/3687.html;218/5554;;15/6513", "google_scholar": "GMVxiYgAAAAJ;P9Fpf4sAAAAJ;2TWx9x0AAAAJ;https://scholar.google.co.il/citations?user=wV0mHWgAAAAJ;https://scholar.google.co.il/citations?view_op=list_works;gypv57sAAAAJ;;XI2CP68AAAAJ", "orcid": ";;0000-0002-4013-1190;;;;;", "linkedin": ";yonatanbitton/;soravit-changpinyo-b6a35944;roeeaharoni;;;;", "or_profile": "~Michal_Yarom1;~Yonatan_Bitton1;~Soravit_Changpinyo1;~Roee_Aharoni1;~Jonathan_Herzig2;~Oran_Lang1;eranofek@google.com;~Idan_Szpektor1", "aff": "Research, Google;Hebrew University of Jerusalem;Google;Google;Research, Google;Google;;Google", "aff_domain": "research.google.com;huji.ac.il;google.com;google.com;research.google.com;google.com;;google.com", "position": "Researcher;PhD student;Researcher;Researcher;Researcher;Researcher;;Researcher", "bibtex": "@inproceedings{\nyarom2023what,\ntitle={What You See is What You Read? Improving Text-Image Alignment Evaluation},\nauthor={Michal Yarom and Yonatan Bitton and Soravit Changpinyo and Roee Aharoni and Jonathan Herzig and Oran Lang and Eran Ofek and Idan Szpektor},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=j5AoleAIru}\n}", "github": "", "project": "", "reviewers": "UX4d;prMg;j32y;xP6K", "pdf_size": 8475408, "rating": "6;6;6;7", "confidence": "5;4;3;3", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "3;3;2;3", "wc_summary": "108;58;101;176", "wc_strengths": "46;49;82;78", "wc_weaknesses": "66;214;64;130", "wc_questions": "26;63;116;1", "wc_limitations": "30;4;28;6", "wc_review": "276;388;391;391", "wc_reply_reviewers": "17;0;25;55", "wc_reply_authors": "0;0;0;139", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 110.75, 42.2573958023918 ], "wc_strengths_avg": [ 63.75, 16.345871038277526 ], "wc_weaknesses_avg": [ 118.5, 61.194362485444685 ], "wc_questions_avg": [ 51.5, 43.281058212571466 ], "wc_limitations_avg": [ 17.0, 12.041594578792296 ], "wc_review_avg": [ 361.5, 49.378639106399035 ], "wc_reply_reviewers_avg": [ 24.25, 19.917015338649513 ], "wc_reply_authors_avg": [ 34.75, 60.188765563018485 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 77, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7026611812654396896&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "research.google.com;huji.ac.il;google.com;google.com;research.google.com;google.com;;google.com", "author_num": 8, "aff_unique_index": "0;1;0;0;0;0;0", "aff_unique_norm": "Google;Hebrew University of Jerusalem", "aff_unique_dep": "Google Research;", "aff_unique_url": "https://research.google;https://www.huji.ac.il", "aff_unique_abbr": "Google;HUJI", "aff_campus_unique_index": "0;1;0;0;0;0;0", "aff_campus_unique": "Mountain View;Jerusalem", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "United States;Israel" }, { "title": "Scaling Data-Constrained Language Models", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70706", "id": "j5BuTrEj35", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9d89448b63ce1e2e8dc7af72c984c196-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=j5BuTrEj35", "openreview": "https://openreview.net/forum?id=j5BuTrEj35", "poster": "/media/PosterPDFs/NeurIPS%202023/70706.png?t=1699221010.9315512", "slides": "https://nips.cc/virtual/2023/poster/70706", "video": "https://nips.cc/virtual/2023/poster/70706", "author_site": "Niklas Muennighoff, Alexander Rush, Boaz Barak, Teven Le Scao, Nouamane Tazi, Aleksandra Piktus, Sampo Pyysalo, Thomas Wolf, Colin Raffel", "tldr": "", "abstract": "The current trend of scaling language models involves increasing both parameter count and training dataset size. Extrapolating this trend suggests that training dataset size may soon be limited by the amount of text data available on the internet. Motivated by this limit, we investigate scaling language models in data-constrained regimes. Specifically, we run a large set of experiments varying the extent of data repetition and compute budget, ranging up to 900 billion training tokens and 9 billion parameter models. We find that with constrained data for a fixed compute budget, training with up to 4 epochs of repeated data yields negligible changes to loss compared to having unique data. However, with more repetition, the value of adding compute eventually decays to zero. We propose and empirically validate a scaling law for compute optimality that accounts for the decreasing value of repeated tokens and excess parameters. Finally, we experiment with approaches mitigating data scarcity, including augmenting the training dataset with code data or removing commonly used filters. Models and datasets from our 400 training runs are freely available at https://github.com/huggingface/datablations.", "keywords": "large language models;scaling laws;data engineering", "primary_area": "", "supplementary_material": "/attachment/5520ed6bfca03ba7cee444d619bced7f0ab65067.pdf", "author": "Niklas Muennighoff;Alexander M Rush;Boaz Barak;Teven Le Scao;Nouamane Tazi;Aleksandra Piktus;Sampo Pyysalo;Thomas Wolf;Colin Raffel", "authorids": "~Niklas_Muennighoff1;~Alexander_M_Rush1;~Boaz_Barak2;~Teven_Le_Scao1;~Nouamane_Tazi1;~Aleksandra_Piktus1;~Sampo_Pyysalo2;~Thomas_Wolf1;~Colin_Raffel1", "gender": "M;M;M;;;F;M;M;", "homepage": "https://muennighoff.github.io/;http://rush.seas.harvard.edu/;https://boazbarak.org;;;;;https://thomwolf.io;http://colinraffel.com", "dblp": "281/6745;http://dblp.uni-trier.de/pers/hd/r/Rush:Alexander_M=;b/BBarak;;;241/7090;;;149/0082", "google_scholar": "Me0IoRMAAAAJ;LIjnUGgAAAAJ;I0fbJ6cAAAAJ;;q2bZs1IAAAAJ;bXvehs4AAAAJ;GUHpTS0AAAAJ;D2H5EFEAAAAJ;I66ZBYwAAAAJ", "orcid": ";0000-0002-9900-1606;0000-0002-4053-8927;0000-0002-7052-3048;;;;;", "linkedin": "niklasmuennighoff/;sasha-rush-a69b6917/;;;nouamanetazi/;;;;", "or_profile": "~Niklas_Muennighoff1;~Alexander_M_Rush1;~Boaz_Barak2;~Teven_Le_Scao1;~Nouamane_Tazi1;~Aleksandra_Piktus1;~Sampo_Pyysalo2;~Thomas_Wolf1;~Colin_Raffel1", "aff": "Hugging Face;School of Engineering and Applied Sciences, Harvard University;Harvard University;;Hugging Face;Hugging Face;University of Turku;Hugging Face;University of North Carolina, Chapel Hill", "aff_domain": "gmail.com;seas.harvard.edu;fas.harvard.edu;;huggingface.co;huggingface.co;utu.fi;huggingface.co;unc.edu", "position": "Researcher;Assistant Professor;Full Professor;;Researcher;Researcher;Principal Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nmuennighoff2023scaling,\ntitle={Scaling Data-Constrained Language Models},\nauthor={Niklas Muennighoff and Alexander M Rush and Boaz Barak and Teven Le Scao and Nouamane Tazi and Aleksandra Piktus and Sampo Pyysalo and Thomas Wolf and Colin Raffel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=j5BuTrEj35}\n}", "github": "", "project": "", "reviewers": "r7xA;sNww;v1HT;ATmT", "pdf_size": 1598846, "rating": "7;7;8;8", "confidence": "4;4;3;4", "soundness": "4;4;4;3", "novelty": "4;4;4;4", "presentation": "4;3;4;3", "wc_summary": "218;95;108;350", "wc_strengths": "42;57;34;133", "wc_weaknesses": "90;430;47;88", "wc_questions": "90;46;45;1", "wc_limitations": "1;5;8;2", "wc_review": "441;633;242;574", "wc_reply_reviewers": "11;0;17;29", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 4.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 192.75, 102.59477325868019 ], "wc_strengths_avg": [ 66.5, 39.271490931717885 ], "wc_weaknesses_avg": [ 163.75, 154.67445652078433 ], "wc_questions_avg": [ 45.5, 31.468237955119125 ], "wc_limitations_avg": [ 4.0, 2.7386127875258306 ], "wc_review_avg": [ 472.5, 150.15408752345039 ], "wc_reply_reviewers_avg": [ 14.25, 10.473180032826706 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 272, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11809663609430955246&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "gmail.com;seas.harvard.edu;fas.harvard.edu;;huggingface.co;huggingface.co;utu.fi;huggingface.co;unc.edu", "author_num": 9, "aff_unique_index": "0;1;1;0;0;2;0;3", "aff_unique_norm": "Hugging Face;Harvard University;University of Turku;University of North Carolina", "aff_unique_dep": ";School of Engineering and Applied Sciences;;", "aff_unique_url": "https://huggingface.co;https://www.harvard.edu;https://www.utu.fi;https://www.unc.edu", "aff_unique_abbr": "Hugging Face;Harvard;UTU;UNC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cambridge;Chapel Hill", "aff_country_unique_index": "0;0;0;0;0;1;0;0", "aff_country_unique": "United States;Finland" }, { "title": "DynPoint: Dynamic Neural Point For View Synthesis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70705", "id": "j7U4pFkCYB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dbdc7a9779ce0278c6e43b62c7e97759-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=j7U4pFkCYB", "openreview": "https://openreview.net/forum?id=j7U4pFkCYB", "poster": "/media/PosterPDFs/NeurIPS%202023/70705.png?t=1702226038.3265893", "slides": "https://nips.cc/virtual/2023/poster/70705", "video": "https://nips.cc/virtual/2023/poster/70705", "author_site": "Kaichen Zhou, Jia-Xing Zhong, Sangyun Shin, Kai Lu, Yiyuan Yang, Andrew Markham, Niki Trigoni", "tldr": "", "abstract": "The introduction of neural radiance fields has greatly improved the effectiveness of view synthesis for monocular videos. However, existing algorithms face difficulties when dealing with uncontrolled or lengthy scenarios, and require extensive training time specific to each new scenario.\nTo tackle these limitations, we propose DynPoint, an algorithm designed to facilitate the rapid synthesis of novel views for unconstrained monocular videos. \nRather than encoding the entirety of the scenario information into a latent representation, DynPoint concentrates on predicting the explicit 3D correspondence between neighboring frames to realize information aggregation.\nSpecifically, this correspondence prediction is achieved through the estimation of consistent depth and scene flow information across frames.\nSubsequently, the acquired correspondence is utilized to aggregate information from multiple reference frames to a target frame, by constructing hierarchical neural point clouds. \nThe resulting framework enables swift and accurate view synthesis for desired views of target frames. \nThe experimental results obtained demonstrate the considerable acceleration of training time achieved - typically an order of magnitude - by our proposed method while yielding comparable outcomes compared to prior approaches. Furthermore, our method exhibits strong robustness in handling long-duration videos without learning a canonical representation of video content.", "keywords": "View Synthesis;Monocular Video", "primary_area": "", "supplementary_material": "/attachment/4eccd872d31982e2607613467f54e51b0142ebae.pdf", "author": "Kaichen Zhou;Jia-Xing Zhong;Sangyun Shin;Kai Lu;Yiyuan Yang;Andrew Markham;Niki Trigoni", "authorids": "~Kaichen_Zhou1;~Jia-Xing_Zhong1;~Sangyun_Shin2;~Kai_Lu5;~Yiyuan_Yang1;~Andrew_Markham2;~Niki_Trigoni1", "gender": "M;M;;M;M;M;F", "homepage": "http://zalex97.github.io/;;;https://www.cs.ox.ac.uk/people/kai.lu/;https://yyysjz1997.github.io/;;https://www.cs.ox.ac.uk/people/niki.trigoni/", "dblp": ";208/4752;;;228/1875.html;83/7169;t/NikiTrigoni", "google_scholar": ";dIckm98AAAAJ;;;FUuGvZIAAAAJ;https://scholar.google.co.uk/citations?user=g3JTO9EAAAAJ;", "orcid": ";;;;0000-0002-5320-095X;;", "linkedin": ";;;;yiyuan-yang-8154941ab/;;", "or_profile": "~Kaichen_Zhou1;~Jia-Xing_Zhong1;~Sangyun_Shin2;~Kai_Lu5;~Yiyuan_Yang1;~Andrew_Markham2;~Niki_Trigoni1", "aff": "Department of Computer Science, University of Oxford;Department of Computer Science, University of Oxford;;University of Oxford;Department of Computer Science, University of Oxford;University of Oxford;University of Oxford", "aff_domain": "cs.ox.ac.uk;cs.ox.ac.uk;;ox.ac.uk;cs.ox.ac.uk;ox.ac.uk;ox.ac.uk", "position": "PhD student;PhD student;;PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2023dynpoint,\ntitle={DynPoint: Dynamic Neural Point For View Synthesis},\nauthor={Kaichen Zhou and Jia-Xing Zhong and Sangyun Shin and Kai Lu and Yiyuan Yang and Andrew Markham and Niki Trigoni},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=j7U4pFkCYB}\n}", "github": "", "project": "", "reviewers": "nc3b;J3AP;BEKz;Rm9i", "pdf_size": 25839752, "rating": "3;6;7;7", "confidence": "4;5;4;4", "soundness": "2;3;4;4", "novelty": "1;3;3;3", "presentation": "2;3;3;4", "wc_summary": "61;72;85;187", "wc_strengths": "20;63;93;279", "wc_weaknesses": "329;76;138;147", "wc_questions": "36;70;149;31", "wc_limitations": "4;21;64;128", "wc_review": "450;302;529;772", "wc_reply_reviewers": "847;5;49;17", "wc_reply_authors": "183;31;65;66", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 101.25, 50.231339818882 ], "wc_strengths_avg": [ 113.75, 98.87207644223925 ], "wc_weaknesses_avg": [ 172.5, 94.39941737108339 ], "wc_questions_avg": [ 71.5, 47.193749586147526 ], "wc_limitations_avg": [ 54.25, 47.8663503935698 ], "wc_review_avg": [ 513.25, 170.1666462618336 ], "wc_reply_reviewers_avg": [ 229.5, 356.8763791567046 ], "wc_reply_authors_avg": [ 86.25, 57.60805065266486 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.08804509063256237, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7954568707547305883&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cs.ox.ac.uk;cs.ox.ac.uk;;ox.ac.uk;cs.ox.ac.uk;ox.ac.uk;ox.ac.uk", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Oxford;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Learning from Both Structural and Textual Knowledge for Inductive Knowledge Graph Completion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70704", "id": "j7x9wW3tCf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/544242770e8333875325d013328b2079-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=j7x9wW3tCf", "openreview": "https://openreview.net/forum?id=j7x9wW3tCf", "poster": "/media/PosterPDFs/NeurIPS%202023/70704.png?t=1701431266.4532523", "slides": "https://nips.cc/virtual/2023/poster/70704", "video": "https://nips.cc/virtual/2023/poster/70704", "author_site": "Kunxun Qi, Jianfeng Du, Hai Wan", "tldr": "", "abstract": "Learning rule-based systems plays a pivotal role in knowledge graph completion (KGC). Existing rule-based systems restrict the input of the system to structural knowledge only, which may omit some useful knowledge for reasoning, e.g., textual knowledge. In this paper, we propose a two-stage framework that imposes both structural and textual knowledge to learn rule-based systems. In the first stage, we compute a set of triples with confidence scores (called \\emph{soft triples}) from a text corpus by distant supervision, where a textual entailment model with multi-instance learning is exploited to estimate whether a given triple is entailed by a set of sentences. In the second stage, these soft triples are used to learn a rule-based model for KGC. To mitigate the negative impact of noise from soft triples, we propose a new formalism for rules to be learnt, named \\emph{text enhanced rules} or \\emph{TE-rules} for short. To effectively learn TE-rules, we propose a neural model that simulates the inference of TE-rules. We theoretically show that any set of TE-rules can always be interpreted by a certain parameter assignment of the neural model. We introduce three new datasets to evaluate the effectiveness of our method. Experimental results demonstrate that the introduction of soft triples and TE-rules results in significant performance improvements in inductive link prediction.", "keywords": "Knowledge graph completion; Neural approximate rule learning; Neural rule-based system", "primary_area": "", "supplementary_material": "", "author": "Kunxun Qi;Jianfeng Du;Hai Wan", "authorids": "~Kunxun_Qi1;~Jianfeng_Du3;~Hai_Wan3", "gender": "M;M;M", "homepage": "https://github.com/qikunxun;https://sist.gdufs.edu.cn/info/1585/9980.htm;", "dblp": "177/5669;https://dblp.uni-trier.de/pid/72/5841;https://dblp.uni-trier.de/pid/54/977.html", "google_scholar": ";;", "orcid": "0000-0002-2356-4103;0000-0002-7541-1387;0000-0001-5357-9130", "linkedin": ";;", "or_profile": "~Kunxun_Qi1;~Jianfeng_Du3;~Hai_Wan3", "aff": "SUN YAT-SEN UNIVERSITY;Guangdong University of Foreign Studies;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;gdufs.edu.cn;sysu.edu.cn", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nqi2023learning,\ntitle={Learning from Both Structural and Textual Knowledge for Inductive Knowledge Graph Completion},\nauthor={Kunxun Qi and Jianfeng Du and Hai Wan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=j7x9wW3tCf}\n}", "github": "", "project": "", "reviewers": "KZDY;DS3c;PLPA;Vj83;4GPq", "pdf_size": 569589, "rating": "5;5;6;6;7", "confidence": "4;4;3;3;5", "soundness": "2;2;3;3;4", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "49;153;60;136;81", "wc_strengths": "40;68;17;79;50", "wc_weaknesses": "162;99;156;128;86", "wc_questions": "162;63;9;165;59", "wc_limitations": "162;1;1;28;8", "wc_review": "575;384;243;536;284", "wc_reply_reviewers": "13;21;143;34;82", "wc_reply_authors": "52;43;376;41;258", "reply_reviewers": "1;1;3;1;2", "reply_authors": "2;2;4;2;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 95.8, 41.42173342582369 ], "wc_strengths_avg": [ 50.8, 21.68317319951118 ], "wc_weaknesses_avg": [ 126.2, 30.095846889562686 ], "wc_questions_avg": [ 91.6, 61.72066104636275 ], "wc_limitations_avg": [ 40.0, 61.796440026914176 ], "wc_review_avg": [ 404.4, 132.20075642748796 ], "wc_reply_reviewers_avg": [ 58.6, 48.51638898351773 ], "wc_reply_authors_avg": [ 154.0, 138.27074889505732 ], "reply_reviewers_avg": [ 1.6, 0.8 ], "reply_authors_avg": [ 2.4, 0.8 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.2857142857142857, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6243473093119141637&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sysu.edu.cn;gdufs.edu.cn;sysu.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Sun Yat-sen University;Guangdong University of Foreign Studies", "aff_unique_dep": ";", "aff_unique_url": "http://www.sysu.edu.cn;http://www.gdufs.edu.cn", "aff_unique_abbr": "SYSU;GDUFS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "On Masked Pre-training and the Marginal Likelihood", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70703", "id": "j9wGUcS30B", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fc0e3f908a2116ba529ad0a1530a3675-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=j9wGUcS30B", "openreview": "https://openreview.net/forum?id=j9wGUcS30B", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70703", "video": "https://nips.cc/virtual/2023/poster/70703", "author_site": "Pablo Moreno-Mu\u00f1oz, Pol Garcia Recasens, S\u00f8ren Hauberg", "tldr": "", "abstract": "Masked pre-training removes random input dimensions and learns a model that can predict the missing values. Empirical results indicate that this intuitive form of self-supervised learning yields models that generalize very well to new domains. A theoretical understanding is, however, lacking. This paper shows that masked pre-training with a suitable cumulative scoring function corresponds to maximizing the model's marginal likelihood, which is de facto the Bayesian model selection measure of generalization. Beyond shedding light on the success of masked pre-training, this insight also suggests that Bayesian models can be trained with appropriately designed self-supervision. Empirically, we confirm the developed theory and explore the main learning principles of masked pre-training in large language models.", "keywords": "Marginal likelihood;masked pre-training;Bayesian inference", "primary_area": "", "supplementary_material": "/attachment/946ded774de43c026194b3ce4ff709744be9fb05.zip", "author": "Pablo Moreno-Mu\u00f1oz;Pol G. Recasens;S\u00f8ren Hauberg", "authorids": "~Pablo_Moreno-Mu\u00f1oz1;~Pol_G._Recasens1;~S\u00f8ren_Hauberg1", "gender": "M;M;M", "homepage": "https://pmorenoz.github.io/;https://www.lop1498.github.io;http://www2.compute.dtu.dk/~sohau/", "dblp": "220/5334;;39/7226", "google_scholar": "8vL8iawAAAAJ;xU2bqnkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-7249-2986;;", "linkedin": ";pol-garcia-recasens-854438168/;", "or_profile": "~Pablo_Moreno-Mu\u00f1oz1;~Pol_G._Recasens1;~S\u00f8ren_Hauberg1", "aff": "Technical University of Denmark;Universidad Polit\u00e9cnica de Cataluna;Technical University of Denmark", "aff_domain": "dtu.dk;upc.edu;dtu.dk", "position": "Postdoc;MS student;Professor", "bibtex": "@inproceedings{\nmoreno-mu{\\~n}oz2023on,\ntitle={On Masked Pre-training and the Marginal Likelihood},\nauthor={Pablo Moreno-Mu{\\~n}oz and Pol G. Recasens and S{\\o}ren Hauberg},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=j9wGUcS30B}\n}", "github": "", "project": "", "reviewers": "vzMk;mVT4;LKWQ;EYbT;Gnha", "pdf_size": 3347953, "rating": "4;4;6;6;7", "confidence": "4;4;2;4;2", "soundness": "3;3;3;4;4", "novelty": "2;2;3;4;3", "presentation": "3;3;3;4;3", "wc_summary": "72;61;174;76;34", "wc_strengths": "20;39;49;86;100", "wc_weaknesses": "273;90;63;117;135", "wc_questions": "35;24;100;768;34", "wc_limitations": "8;3;1;79;48", "wc_review": "408;217;387;1126;351", "wc_reply_reviewers": "526;147;0;0;36", "wc_reply_authors": "1003;695;0;1277;24", "reply_reviewers": "2;1;0;0;1", "reply_authors": "3;2;1;3;2", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 83.4, 47.613443479756846 ], "wc_strengths_avg": [ 58.8, 29.768439663509408 ], "wc_weaknesses_avg": [ 135.6, 72.90432085960337 ], "wc_questions_avg": [ 192.2, 289.16320651147856 ], "wc_limitations_avg": [ 27.8, 30.837639338963676 ], "wc_review_avg": [ 497.8, 321.05289283854773 ], "wc_reply_reviewers_avg": [ 141.8, 199.52182837975397 ], "wc_reply_authors_avg": [ 599.8, 514.1087044585026 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.748455199183749, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6566271683247369875&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 8, "email": "dtu.dk;upc.edu;dtu.dk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Technical University of Denmark;Universitat Polit\u00e8cnica de Catalunya", "aff_unique_dep": ";", "aff_unique_url": "https://www.tek.dk;https://www.upc.edu", "aff_unique_abbr": "DTU;UPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Denmark;Spain" }, { "title": "Jailbroken: How Does LLM Safety Training Fail?", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70702", "id": "jA235JGM09", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fd6613131889a4b656206c50a8bd7790-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jA235JGM09", "openreview": "https://openreview.net/forum?id=jA235JGM09", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70702", "video": "https://nips.cc/virtual/2023/poster/70702", "author_site": "Alexander Wei, Nika Haghtalab, Jacob Steinhardt", "tldr": "", "abstract": "Large language models trained for safety and harmlessness remain susceptible to adversarial misuse, as evidenced by the prevalence of \u201cjailbreak\u201d attacks on early releases of ChatGPT that elicit undesired behavior. Going beyond recognition of the issue, we investigate why such attacks succeed and how they can be created. We hypothesize two failure modes of safety training: competing objectives and mismatched generalization. Competing objectives arise when a model\u2019s capabilities and safety goals conflict, while mismatched generalization occurs when safety training fails to generalize to a domain for which capabilities exist. We use these failure modes to guide jailbreak design and then evaluate state-of-the-art models, including OpenAI\u2019s GPT-4 and Anthropic\u2019s Claude v1.3, against both existing and newly designed attacks. We find that vulnerabilities persist despite the extensive red-teaming and safety-training efforts behind these models. Notably, new attacks utilizing our failure modes succeed on every prompt in a collection of unsafe requests from the models\u2019 red-teaming evaluation sets and outperform existing ad hoc jailbreaks. Our analysis emphasizes the need for safety-capability parity\u2014that safety mechanisms should be as sophisticated as the underlying model\u2014and argues against the idea that scaling alone can resolve these safety failure modes.", "keywords": "red teaming;safety;RLHF;large language models", "primary_area": "", "supplementary_material": "", "author": "Alexander Wei;Nika Haghtalab;Jacob Steinhardt", "authorids": "~Alexander_Wei2;~Nika_Haghtalab2;~Jacob_Steinhardt1", "gender": ";F;", "homepage": "https://www.alexwei.org;https://people.eecs.berkeley.edu/~nika/;", "dblp": "223/5928;;35/10625", "google_scholar": "d5wGxRsAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Alexander_Wei2;~Nika_Haghtalab2;~Jacob_Steinhardt1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwei2023jailbroken,\ntitle={Jailbroken: How Does {LLM} Safety Training Fail?},\nauthor={Alexander Wei and Nika Haghtalab and Jacob Steinhardt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jA235JGM09}\n}", "github": "", "project": "", "reviewers": "VdLs;mfj9;qvjk;Q2oq;Nf9i;XG6N;PgD3", "pdf_size": 341955, "rating": "5;5;6;6;6;6;8", "confidence": "4;4;3;4;4;3;4", "soundness": "3;3;2;3;2;3;4", "novelty": "2;3;3;3;2;2;3", "presentation": "3;4;3;2;3;3;4", "wc_summary": "70;115;50;83;71;81;60", "wc_strengths": "17;133;17;33;111;47;57", "wc_weaknesses": "136;204;245;72;54;111;348", "wc_questions": "1;23;13;82;21;39;7", "wc_limitations": "5;40;15;21;1;42;7", "wc_review": "229;515;340;291;258;320;479", "wc_reply_reviewers": "13;9;133;0;0;7;13", "wc_reply_authors": "0;0;0;0;0;0;0", "reply_reviewers": "1;1;1;0;0;1;1", "reply_authors": "1;1;1;1;1;1;1", "rating_avg": [ 6.0, 0.9258200997725514 ], "confidence_avg": [ 3.7142857142857144, 0.45175395145262565 ], "soundness_avg": [ 2.857142857142857, 0.6388765649999399 ], "novelty_avg": [ 2.5714285714285716, 0.4948716593053935 ], "presentation_avg": [ 3.142857142857143, 0.6388765649999398 ], "wc_summary_avg": [ 75.71428571428571, 19.225832961440826 ], "wc_strengths_avg": [ 59.285714285714285, 42.31080336784748 ], "wc_weaknesses_avg": [ 167.14285714285714, 97.17793484329744 ], "wc_questions_avg": [ 26.571428571428573, 25.33127453145134 ], "wc_limitations_avg": [ 18.714285714285715, 15.368932351749521 ], "wc_review_avg": [ 347.42857142857144, 100.99909072130605 ], "wc_reply_reviewers_avg": [ 25.0, 44.3718057200419 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.7142857142857143, 0.45175395145262565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 948, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14029412962367612376&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 8, "email": "berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Hierarchical Adaptive Value Estimation for Multi-modal Visual Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70701", "id": "jB4wsc1DQW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9213010cbcd6ba8e1f1cf1533835d51c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jB4wsc1DQW", "openreview": "https://openreview.net/forum?id=jB4wsc1DQW", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70701", "video": "https://nips.cc/virtual/2023/poster/70701", "author_site": "Yangru Huang, Peixi Peng, Yifan Zhao, Haoran Xu, Mengyue Geng, Yonghong Tian", "tldr": "", "abstract": "Integrating RGB frames with alternative modality inputs is gaining increasing traction in many vision-based reinforcement learning (RL) applications. Existing multi-modal vision-based RL methods usually follow a Global Value Estimation (GVE) pipeline, which uses a fused modality feature to obtain a unified global environmental description. However, such a feature-level fusion paradigm with a single critic may fall short in policy learning as it tends to overlook the distinct values of each modality. To remedy this, this paper proposes a Local modality-customized Value Estimation (LVE) paradigm, which dynamically estimates the contribution and adjusts the importance weight of each modality from a value-level perspective. Furthermore, a task-contextual re-fusion process is developed to achieve a task-level re-balance of estimations from both feature and value levels. To this end, a Hierarchical Adaptive Value Estimation (HAVE) framework is formed, which adaptively coordinates the contributions of individual modalities as well as their collective efficacy. Agents trained by HAVE are able to exploit the unique characteristics of various modalities while capturing their intricate interactions, achieving substantially improved performance. We specifically highlight the potency of our approach within the challenging landscape of autonomous driving, utilizing the CARLA benchmark with neuromorphic event and depth data to demonstrate HAVE's capability and the effectiveness of its distinct components.", "keywords": "vision-based reinforcement learning;multi-modal;event camera", "primary_area": "", "supplementary_material": "/attachment/abebc5906927294694c6c002c53ffab67ed01052.pdf", "author": "Yangru Huang;Peixi Peng;Yifan Zhao;Haoran Xu;Mengyue Geng;Yonghong Tian", "authorids": "~Yangru_Huang1;~Peixi_Peng2;~Yifan_Zhao2;~Haoran_Xu5;~Mengyue_Geng1;~Yonghong_Tian1", "gender": "F;M;M;M;;M", "homepage": ";;https://zhao1f.github.io/;https://github.com/kyoran;;http://www.pkuml.org", "dblp": "241/9375;119/8511;13/7050-2.html;140/8357-4;;86/5857", "google_scholar": ";CFMuFGoAAAAJ;bUzykm0AAAAJ;UOwYW7gAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-9330-2475;;0000-0002-2978-5935", "linkedin": ";;;;;", "or_profile": "~Yangru_Huang1;~Peixi_Peng2;~Yifan_Zhao2;~Haoran_Xu5;~Mengyue_Geng1;~Yonghong_Tian1", "aff": "Peking University;;Peking University;SUN YAT-SEN UNIVERSITY;;Peking University", "aff_domain": "pku.edu.cn;;pku.edu.cn;sysu.edu.cn;;pku.edu.cn", "position": "PhD student;;Postdoc;PhD student;;Full Professor", "bibtex": "@inproceedings{\nhuang2023hierarchical,\ntitle={Hierarchical Adaptive Value Estimation for Multi-modal Visual Reinforcement Learning},\nauthor={Yangru Huang and Peixi Peng and Yifan Zhao and Haoran Xu and Mengyue Geng and Yonghong Tian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jB4wsc1DQW}\n}", "github": "", "project": "", "reviewers": "HRjH;9NFJ;VLp8;P2RM", "pdf_size": 2440221, "rating": "3;5;6;7", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "74;66;82;87", "wc_strengths": "68;15;74;141", "wc_weaknesses": "152;164;93;124", "wc_questions": "55;153;8;206", "wc_limitations": "53;31;41;58", "wc_review": "402;429;298;616", "wc_reply_reviewers": "0;20;14;0", "wc_reply_authors": "1401;32;35;0", "reply_reviewers": "0;1;1;0", "reply_authors": "4;2;2;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.25, 7.980444849756184 ], "wc_strengths_avg": [ 74.5, 44.73533279187716 ], "wc_weaknesses_avg": [ 133.25, 27.39867697535777 ], "wc_questions_avg": [ 105.5, 78.12329998150359 ], "wc_limitations_avg": [ 45.75, 10.520812706250407 ], "wc_review_avg": [ 436.25, 114.72657713014888 ], "wc_reply_reviewers_avg": [ 8.5, 8.760707733967616 ], "wc_reply_authors_avg": [ 367.0, 597.1377563008389 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5364406791011353963&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;;pku.edu.cn;sysu.edu.cn;;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Peking University;Sun Yat-sen University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;http://www.sysu.edu.cn", "aff_unique_abbr": "Peking U;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning Repeatable Speech Embeddings Using An Intra-class Correlation Regularizer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70700", "id": "jCPRG3FuHV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f0aa7e9e67515fa0c607c2959ccda6a0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jCPRG3FuHV", "openreview": "https://openreview.net/forum?id=jCPRG3FuHV", "poster": "/media/PosterPDFs/NeurIPS%202023/70700.png?t=1699465553.0593388", "slides": "https://nips.cc/virtual/2023/poster/70700", "video": "https://nips.cc/virtual/2023/poster/70700", "author_site": "Jianwei Zhang, Suren Jayasuriya, Visar Berisha", "tldr": "", "abstract": "A good supervised embedding for a specific machine learning task is only sensitive to changes in the label of interest and is invariant to other confounding factors. We leverage the concept of repeatability from measurement theory to describe this property and propose to use the intra-class correlation coefficient (ICC) to evaluate the repeatability of embeddings. We then propose a novel regularizer, the ICC regularizer, as a complementary component for contrastive losses to guide deep neural networks to produce embeddings with higher repeatability. We use simulated data to explain why the ICC regularizer works better on minimizing the intra-class variance than the contrastive loss alone. We implement the ICC regularizer and apply it to three speech tasks: speaker verification, voice style conversion, and a clinical application for detecting dysphonic voice. The experimental results demonstrate that adding an ICC regularizer can improve the repeatability of learned embeddings compared to only using the contrastive loss; further, these embeddings lead to improved performance in these downstream tasks.", "keywords": "repeatability;embeddings;metric learning;intra-class correlation;intra-class variance", "primary_area": "", "supplementary_material": "", "author": "Jianwei Zhang;Suren Jayasuriya;Visar Berisha", "authorids": "~Jianwei_Zhang7;~Suren_Jayasuriya3;~Visar_Berisha1", "gender": "M;M;M", "homepage": ";https://sites.google.com/asu.edu/imaging-lyceum;http://www.public.asu.edu/~visar/", "dblp": ";153/9770;", "google_scholar": "PQ6OA3IAAAAJ;DEfu2GoAAAAJ;MQBn718AAAAJ", "orcid": "0000-0001-6419-2038;;", "linkedin": "jianwei-zhang-95764310b/;suren-jayasuriya-4112926b/;", "or_profile": "~Jianwei_Zhang7;~Suren_Jayasuriya3;~Visar_Berisha1", "aff": "Arizona State University;Arizona State University;Arizona State University", "aff_domain": "asu.edu;asu.edu;asu.edu", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2023learning,\ntitle={Learning Repeatable Speech Embeddings Using An Intra-class Correlation Regularizer},\nauthor={Jianwei Zhang and Suren Jayasuriya and Visar Berisha},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jCPRG3FuHV}\n}", "github": "", "project": "", "reviewers": "89AX;Qvpp;Yotw;efaw;wtbk;hWT5", "pdf_size": 10148189, "rating": "5;5;6;6;6;7", "confidence": "2;4;4;4;4;2", "soundness": "3;2;3;3;3;3", "novelty": "3;2;2;3;3;3", "presentation": "3;3;3;3;3;4", "wc_summary": "72;111;46;91;122;100", "wc_strengths": "95;57;25;80;36;111", "wc_weaknesses": "18;38;29;156;102;66", "wc_questions": "173;190;45;59;8;84", "wc_limitations": "18;32;8;10;11;1", "wc_review": "376;428;153;396;279;362", "wc_reply_reviewers": "0;195;0;128;49;0", "wc_reply_authors": "0;424;0;0;0;0", "reply_reviewers": "0;2;0;1;1;0", "reply_authors": "1;2;1;1;1;1", "rating_avg": [ 5.833333333333333, 0.6871842709362768 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 90.33333333333333, 25.249862485874168 ], "wc_strengths_avg": [ 67.33333333333333, 30.858098162754978 ], "wc_weaknesses_avg": [ 68.16666666666667, 48.00144673745667 ], "wc_questions_avg": [ 93.16666666666667, 66.5517760010121 ], "wc_limitations_avg": [ 13.333333333333334, 9.72396809720988 ], "wc_review_avg": [ 332.3333333333333, 92.19303420300015 ], "wc_reply_reviewers_avg": [ 62.0, 74.99555542386051 ], "wc_reply_authors_avg": [ 70.66666666666667, 158.01547040998514 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.74535599249993 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.1714985851425088, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11660874156963949129&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "asu.edu;asu.edu;asu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Arizona State University", "aff_unique_dep": "", "aff_unique_url": "https://www.asu.edu", "aff_unique_abbr": "ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The Surprising Effectiveness of Diffusion Models for Optical Flow and Monocular Depth Estimation", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70699", "id": "jDIlzSU8wJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7c119415672ae2186e17d492e1d5da2f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jDIlzSU8wJ", "openreview": "https://openreview.net/forum?id=jDIlzSU8wJ", "poster": "/media/PosterPDFs/NeurIPS%202023/70699.png?t=1702169786.0551796", "slides": "https://nips.cc/virtual/2023/poster/70699", "video": "https://nips.cc/virtual/2023/poster/70699", "author_site": "Saurabh Saxena, Charles Herrmann, Junhwa Hur, Abhishek Kar, Mohammad Norouzi, Deqing Sun, David Fleet", "tldr": "", "abstract": "Denoising diffusion probabilistic models have transformed image generation with their impressive fidelity and diversity.\nWe show that they also excel in estimating optical flow and monocular depth, surprisingly without task-specific architectures and loss functions that are predominant for these tasks. \nCompared to the point estimates of conventional regression-based methods, diffusion models also enable Monte Carlo inference, e.g., capturing uncertainty and ambiguity in flow and depth.\nWith self-supervised pre-training, the combined use of synthetic and real data for supervised training, and technical innovations (infilling and step-unrolled denoising diffusion training) to handle noisy-incomplete training data, one can train state-of-the-art diffusion models for depth and optical flow estimation, with additional zero-shot coarse-to-fine refinement for high resolution estimates. \nExtensive experiments focus on quantitative performance against benchmarks, ablations, and the model's ability to capture uncertainty and multimodality, and impute missing values. Our model obtains a state-of-the-art relative depth error of 0.074 on the indoor NYU benchmark and an Fl-all score of 3.26\\% on the KITTI optical flow benchmark, about 25\\% better than the best published method.", "keywords": "Monocular depth;optical flow;diffusion;depth;flow", "primary_area": "", "supplementary_material": "", "author": "Saurabh Saxena;Charles Herrmann;Junhwa Hur;Abhishek Kar;Mohammad Norouzi;Deqing Sun;David J. Fleet", "authorids": "~Saurabh_Saxena1;~Charles_Herrmann1;~Junhwa_Hur1;~Abhishek_Kar1;~Mohammad_Norouzi1;~Deqing_Sun2;~David_J._Fleet1", "gender": "M;Unspecified;M;M;M;M;M", "homepage": ";https://scholar.google.com/citations?user=LQvi5XAAAAAJ&hl=en&oi=ao;https://hurjunhwa.github.io/;https://abhishekkar.info;https://norouzi.github.io/;https://deqings.github.io/;http://www.cs.toronto.edu/~fleet/index.html", "dblp": ";26/11300;135/9099;46/11300;https://dblp.org/pers/hd/n/Norouzi_0002:Mohammad;69/4250;07/2099", "google_scholar": "WTz38osAAAAJ;LQvi5XAAAAAJ;z4dNJdkAAAAJ;TIpmrtoAAAAJ;Lncr-VoAAAAJ;t4rgICIAAAAJ;https://scholar.google.com.tw/citations?user=njOmQFsAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;abhishekkar/;;;", "or_profile": "~Saurabh_Saxena1;~Charles_Herrmann1;~Junhwa_Hur1;~Abhishek_Kar1;~Mohammad_Norouzi1;~Deqing_Sun2;~David_J._Fleet1", "aff": "Google;Google;Google;Google;Google Brain;Google DeepMind;Department of Computer Science, University of Toronto", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;cs.toronto.edu", "position": "Researcher;Researcher;Researcher;Researcher;Research Scientist;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nsaxena2023the,\ntitle={The Surprising Effectiveness of Diffusion Models for Optical Flow and Monocular Depth Estimation},\nauthor={Saurabh Saxena and Charles Herrmann and Junhwa Hur and Abhishek Kar and Mohammad Norouzi and Deqing Sun and David J. Fleet},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jDIlzSU8wJ}\n}", "github": "", "project": "", "reviewers": "8jCd;LqWs;F9qi;xrrD", "pdf_size": 25180618, "rating": "6;7;7;8", "confidence": "3;4;5;3", "soundness": "3;4;3;4", "novelty": "2;3;4;3", "presentation": "3;4;3;4", "wc_summary": "81;177;83;70", "wc_strengths": "78;245;195;166", "wc_weaknesses": "147;294;14;108", "wc_questions": "40;56;195;11", "wc_limitations": "1;1;8;49", "wc_review": "347;773;495;404", "wc_reply_reviewers": "21;60;138;37", "wc_reply_authors": "0;0;357;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 102.75, 43.15307057440988 ], "wc_strengths_avg": [ 171.0, 60.67536567668958 ], "wc_weaknesses_avg": [ 140.75, 100.82503409372099 ], "wc_questions_avg": [ 75.5, 70.85372255569922 ], "wc_limitations_avg": [ 14.75, 19.97967717456916 ], "wc_review_avg": [ 504.75, 163.62208744543017 ], "wc_reply_reviewers_avg": [ 64.0, 44.91658936295141 ], "wc_reply_authors_avg": [ 89.25, 154.5855345755223 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 96, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3658414862660221835&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "google.com;google.com;google.com;google.com;google.com;google.com;cs.toronto.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "Google;University of Toronto", "aff_unique_dep": "Google;Department of Computer Science", "aff_unique_url": "https://www.google.com;https://www.utoronto.ca", "aff_unique_abbr": "Google;U of T", "aff_campus_unique_index": "0;0;0;0;0;2", "aff_campus_unique": "Mountain View;;Toronto", "aff_country_unique_index": "0;0;0;0;0;1;2", "aff_country_unique": "United States;United Kingdom;Canada" }, { "title": "Gradient Flossing: Improving Gradient Descent through Dynamic Control of Jacobians", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70698", "id": "jEQRoJzDx8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/214ce905bf2072535e34b3cf873cbbc8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jEQRoJzDx8", "openreview": "https://openreview.net/forum?id=jEQRoJzDx8", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70698", "video": "https://nips.cc/virtual/2023/poster/70698", "tldr": "", "abstract": "Training recurrent neural networks (RNNs) remains a challenge due to the instability of gradients across long time horizons, which can lead to exploding and vanishing gradients. Recent research has linked these problems to the values of Lyapunov exponents for the forward-dynamics, which describe the growth or shrinkage of infinitesimal perturbations. Here, we propose gradient flossing, a novel approach to tackling gradient instability by pushing Lyapunov exponents of the forward dynamics toward zero during learning. We achieve this by regularizing Lyapunov exponents through backpropagation using differentiable linear algebra. This enables us to \"floss\" the gradients, stabilizing them and thus improving network training. We show that gradient flossing controls not only the gradient norm but also the condition number of the long-term Jacobian, facilitating multidimensional error feedback propagation. We find that applying gradient flossing before training enhances both the success rate and convergence speed for tasks involving long time horizons.\nFor challenging tasks, we show that gradient flossing during training can further increase the time horizon that can be bridged by backpropagation through time. Moreover, we demonstrate the effectiveness of our approach on various RNN architectures and tasks of variable temporal complexity. Additionally, we provide a simple implementation of our gradient flossing algorithm that can be used in practice. Our results indicate that gradient flossing via regularizing Lyapunov exponents can significantly enhance the effectiveness of RNN training and mitigate the exploding and vanishing gradients problem.", "keywords": "exploding/vanishing gradients;Lyapunov exponents;Lyapunov spectrum;chaos;RNN;condition number;Jacobian", "primary_area": "", "supplementary_material": "/attachment/d8d053a76e1f20ba234f091ddf6e37bf18587ecb.pdf", "author": "Rainer Engelken", "authorids": "~Rainer_Engelken1", "gender": "M", "homepage": "https://ctn.zuckermaninstitute.columbia.edu/people/rainer-engelken", "dblp": "312/6447", "google_scholar": "HvZqeGQAAAAJ", "orcid": "0000-0001-7118-2129", "linkedin": "", "or_profile": "~Rainer_Engelken1", "aff": "Center for Theoretical Neuroscience, Columbia University", "aff_domain": "ctn.zuckermaninstitute.columbia.edu", "position": "Postdoc", "bibtex": "@inproceedings{\nengelken2023gradient,\ntitle={Gradient Flossing: Improving Gradient Descent through Dynamic Control of Jacobians},\nauthor={Rainer Engelken},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jEQRoJzDx8}\n}", "github": "", "project": "", "reviewers": "5Z2M;ZKKk;DGA6;tH3Y", "pdf_size": 3530457, "rating": "4;5;6;7", "confidence": "4;3;4;3", "soundness": "3;2;3;4", "novelty": "2;3;3;3", "presentation": "3;2;3;4", "wc_summary": "47;101;111;116", "wc_strengths": "42;21;175;70", "wc_weaknesses": "141;186;39;44", "wc_questions": "49;40;383;216", "wc_limitations": "4;2;19;21", "wc_review": "283;350;727;467", "wc_reply_reviewers": "108;89;0;87", "wc_reply_authors": "292;214;163;166", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 93.75, 27.52612395525385 ], "wc_strengths_avg": [ 77.0, 59.19037083850717 ], "wc_weaknesses_avg": [ 102.5, 63.065442200939174 ], "wc_questions_avg": [ 172.0, 140.54358754493214 ], "wc_limitations_avg": [ 11.5, 8.558621384311845 ], "wc_review_avg": [ 456.75, 169.35521102109612 ], "wc_reply_reviewers_avg": [ 71.0, 41.803109932156964 ], "wc_reply_authors_avg": [ 208.75, 52.15062319857741 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.4472135954999579, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11227443782596580429&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "ctn.zuckermaninstitute.columbia.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "Center for Theoretical Neuroscience", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Revisiting Evaluation Metrics for Semantic Segmentation: Optimization and Evaluation of Fine-grained Intersection over Union", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73495", "id": "jGyMUum1Lq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bd3611971089d466ab4ca96a20f7ab13-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=jGyMUum1Lq", "openreview": "https://openreview.net/forum?id=jGyMUum1Lq", "poster": "/media/PosterPDFs/NeurIPS%202023/73495.png?t=1701773778.4203331", "slides": "https://nips.cc/virtual/2023/poster/73495", "video": "https://nips.cc/virtual/2023/poster/73495", "author_site": "Zifu Wang, Maxim Berman, Amal Rannen-Triki, Philip Torr, Devis Tuia, Tinne Tuytelaars, Luc V Gool, Jiaqian Yu, Matthew Blaschko", "tldr": "", "abstract": "Semantic segmentation datasets often exhibit two types of imbalance: \\textit{class imbalance}, where some classes appear more frequently than others and \\textit{size imbalance}, where some objects occupy more pixels than others. This causes traditional evaluation metrics to be biased towards \\textit{majority classes} (e.g. overall pixel-wise accuracy) and \\textit{large objects} (e.g. mean pixel-wise accuracy and per-dataset mean intersection over union). To address these shortcomings, we propose the use of fine-grained mIoUs along with corresponding worst-case metrics, thereby offering a more holistic evaluation of segmentation techniques. These fine-grained metrics offer less bias towards large objects, richer statistical information, and valuable insights into model and dataset auditing. Furthermore, we undertake an extensive benchmark study, where we train and evaluate 15 modern neural networks with the proposed metrics on 12 diverse natural and aerial segmentation datasets. Our benchmark study highlights the necessity of not basing evaluations on a single metric and confirms that fine-grained mIoUs reduce the bias towards large objects. Moreover, we identify the crucial role played by architecture designs and loss functions, which lead to best practices in optimizing fine-grained metrics. The code is available at \\href{https://github.com/zifuwanggg/JDTLosses}{https://github.com/zifuwanggg/JDTLosses}.", "keywords": "Semantic Segmentation", "primary_area": "", "supplementary_material": "", "author": "Zifu Wang;Maxim Berman;Amal Rannen-Triki;Philip Torr;Devis Tuia;Tinne Tuytelaars;Luc Van Gool;Jiaqian Yu;Matthew B. Blaschko", "authorids": "~Zifu_Wang1;~Maxim_Berman1;~Amal_Rannen-Triki1;~Philip_Torr1;~Devis_Tuia1;~Tinne_Tuytelaars1;~Luc_Van_Gool1;~Jiaqian_Yu1;~Matthew_B._Blaschko1", "gender": "M;M;F;;;;;F;", "homepage": "https://zifuwang.com;http://bmax.im;http://amal.rannen.triki.me;http://www.robots.ox.ac.uk/~tvg/;;;;;", "dblp": ";190/2143;180/5447;;99/606;;61/5017;164/7325;", "google_scholar": "https://scholar.google.com/citations?hl=en;;sSwp5n0AAAAJ;;p3iJiLIAAAAJ;;https://scholar.google.be/citations?user=TwMib_QAAAAJ;8f7l1dIAAAAJ;", "orcid": ";;;;;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Zifu_Wang1;~Maxim_Berman1;~Amal_Rannen-Triki1;~Philip_Torr1;~Devis_Tuia1;~Tinne_Tuytelaars1;~Luc_Van_Gool1;~Jiaqian_Yu1;~Matthew_B._Blaschko1", "aff": "KU Leuven;Google;Google DeepMind;University of Oxford;EPFL - EPF Lausanne;;KU Leuven;Samsung R&D Institute China - Beijing;", "aff_domain": "kuleuven.be;google.com;google.com;ox.ac.uk;epfl.ch;;kuleuven.be;samsung.com;", "position": "PhD student;Researcher;Researcher;Full Professor;Associate Professor;;Emeritus;Researcher;", "bibtex": "@inproceedings{\nwang2023revisiting,\ntitle={Revisiting Evaluation Metrics for Semantic Segmentation: Optimization and Evaluation of Fine-grained Intersection over Union},\nauthor={Zifu Wang and Maxim Berman and Amal Rannen-Triki and Philip Torr and Devis Tuia and Tinne Tuytelaars and Luc Van Gool and Jiaqian Yu and Matthew B. Blaschko},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=jGyMUum1Lq}\n}", "github": "", "project": "", "reviewers": "QtsN;gpCb;NFd4;ahvw", "pdf_size": 35717246, "rating": "5;7;7;7", "confidence": "5;4;4;4", "wc_summary_and_contributions": "80;56;99;89", "wc_strengths": "16;84;105;60", "wc_improvement": "81;32;207;49", "wc_limitations": "39;40;56;18", "wc_correctness": "15;1;11;5", "wc_clarity": "3;1;34;5", "wc_relation_to_prior_work": "7;1;34;1", "wc_documentation": "7;19;27;8", "wc_additional_feedback": "1;1;1;1", "wc_review": "249;235;574;236", "wc_reply_reviewers": "80;21;203;0", "wc_reply_authors": "1360;130;1126;156", "reply_reviewers": "1;1;1;0", "reply_authors": "3;1;4;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 81.0, 15.921683328090658 ], "wc_strengths_avg": [ 66.25, 33.09361720936531 ], "wc_improvement_avg": [ 92.25, 68.54697294556486 ], "wc_limitations_avg": [ 38.25, 13.497684986693088 ], "wc_correctness_avg": [ 8.0, 5.385164807134504 ], "wc_clarity_avg": [ 10.75, 13.497684986693088 ], "wc_relation_to_prior_work_avg": [ 10.75, 13.645054048995188 ], "wc_documentation_avg": [ 15.25, 8.257572282456872 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 323.5, 144.73164823216794 ], "wc_reply_reviewers_avg": [ 76.0, 78.97151385151484 ], "wc_reply_authors_avg": [ 693.0, 556.2634268042435 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9171865020646572626&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "kuleuven.be;google.com;google.com;ox.ac.uk;epfl.ch;;kuleuven.be;samsung.com;", "author_num": 9, "aff_unique_index": "0;1;1;2;3;0;4", "aff_unique_norm": "Katholieke Universiteit Leuven;Google;University of Oxford;EPFL;Samsung", "aff_unique_dep": ";Google;;;Samsung R&D Institute China", "aff_unique_url": "https://www.kuleuven.be;https://www.google.com;https://www.ox.ac.uk;https://www.epfl.ch;https://www.samsung.com/cn", "aff_unique_abbr": "KU Leuven;Google;Oxford;EPFL;SRC", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Mountain View;Lausanne;Beijing", "aff_country_unique_index": "0;1;2;2;3;0;4", "aff_country_unique": "Belgium;United States;United Kingdom;Switzerland;China" }, { "title": "SAMRS: Scaling-up Remote Sensing Segmentation Dataset with Segment Anything Model", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73494", "id": "jHrgq55ftl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1be3843e534ee06d3a70c7f62b983b31-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=jHrgq55ftl", "openreview": "https://openreview.net/forum?id=jHrgq55ftl", "poster": "/media/PosterPDFs/NeurIPS%202023/73494.png?t=1697164815.3165767", "slides": "https://nips.cc/virtual/2023/poster/73494", "video": "https://nips.cc/virtual/2023/poster/73494", "author_site": "Di Wang, Jing Zhang, Bo Du, Minqiang Xu, Lin Liu, Dacheng Tao, Liangpei Zhang", "tldr": "", "abstract": "The success of the Segment Anything Model (SAM) demonstrates the significance of data-centric machine learning. However, due to the difficulties and high costs associated with annotating Remote Sensing (RS) images, a large amount of valuable RS data remains unlabeled, particularly at the pixel level. In this study, we leverage SAM and existing RS object detection datasets to develop an efficient pipeline for generating a large-scale RS segmentation dataset, dubbed SAMRS. SAMRS totally possesses 105,090 images and 1,668,241 instances, surpassing existing high-resolution RS segmentation datasets in size by several orders of magnitude. It provides object category, location, and instance information that can be used for semantic segmentation, instance segmentation, and object detection, either individually or in combination. We also provide a comprehensive analysis of SAMRS from various aspects. Moreover, preliminary experiments highlight the importance of conducting segmentation pre-training with SAMRS to address task discrepancies and alleviate the limitations posed by limited training data during fine-tuning. The code and dataset will be available at https://github.com/ViTAE-Transformer/SAMRS", "keywords": "Remote Sensing;Segmentation;Segment Anything Model;Pre-training", "primary_area": "", "supplementary_material": "/attachment/96c1e18e557eac0a3dd272f10e61f547e619cbd5.pdf", "author": "Di Wang;Jing Zhang;Bo Du;Minqiang Xu;Lin Liu;Dacheng Tao;Liangpei Zhang", "authorids": "~Di_Wang13;~Jing_Zhang17;~Bo_Du3;~Minqiang_Xu1;~Lin_Liu14;~Dacheng_Tao1;~Liangpei_Zhang1", "gender": "M;M;;M;;;M", "homepage": "https://dotwang.github.io/;;;;;;http://www.lmars.whu.edu.cn/prof_web/zhangliangpei/rs/xueshu.htm", "dblp": "18/5410-23;05/3499-37.html;;86/5698;;;12/4846.html", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;;;;vzj2hcYAAAAJ", "orcid": "0000-0001-6360-4360;0000-0001-6595-7661;;0009-0001-8931-5775;0009-0008-2405-2434;;", "linkedin": ";;;;;;", "or_profile": "~Di_Wang13;~Jing_Zhang17;~Bo_Du3;~Minqiang_Xu1;~Lin_Liu14;~Dacheng_Tao1;~Liangpei_Zhang1", "aff": "Wuhan University;The University of Sydney;;IFLYTEK CO.LTD.;University of Science and Technology of China;;Wuhan University", "aff_domain": "whu.edu.cn;sydney.edu.au;;iflytek.com;ustc.edu.cn;;whu.edu.cn", "position": "PhD student;Research Fellow;;Principal Researcher;PhD student;;Full Professor", "bibtex": "@inproceedings{\nwang2023samrs,\ntitle={{SAMRS}: Scaling-up Remote Sensing Segmentation Dataset with Segment Anything Model},\nauthor={Di Wang and Jing Zhang and Bo Du and Minqiang Xu and Lin Liu and Dacheng Tao and Liangpei Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=jHrgq55ftl}\n}", "github": "", "project": "", "reviewers": "ETXB;MpE3;55Yp;rfwi", "pdf_size": 8923338, "rating": "4;5;6;8", "confidence": "4;5;5;4", "wc_summary_and_contributions": "69;67;45;59", "wc_strengths": "11;30;77;188", "wc_improvement": "101;144;96;151", "wc_limitations": "9;29;218;28", "wc_correctness": "1;23;13;22", "wc_clarity": "1;5;14;8", "wc_relation_to_prior_work": "1;8;11;4", "wc_documentation": "5;9;17;57", "wc_additional_feedback": "1;1;1;1", "wc_review": "199;316;492;518", "wc_reply_reviewers": "308;0;28;46", "wc_reply_authors": "751;1165;2628;1836", "reply_reviewers": "2;0;1;1", "reply_authors": "4;4;6;5", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 60.0, 9.433981132056603 ], "wc_strengths_avg": [ 76.5, 68.7113527737593 ], "wc_improvement_avg": [ 123.0, 24.68805379125702 ], "wc_limitations_avg": [ 71.0, 85.24376810066528 ], "wc_correctness_avg": [ 14.75, 8.842369591913696 ], "wc_clarity_avg": [ 7.0, 4.743416490252569 ], "wc_relation_to_prior_work_avg": [ 6.0, 3.8078865529319543 ], "wc_documentation_avg": [ 22.0, 20.663978319771825 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 381.25, 130.80400414360412 ], "wc_reply_reviewers_avg": [ 95.5, 123.7770172528002 ], "wc_reply_authors_avg": [ 1595.0, 711.0566081543719 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 4.75, 0.82915619758885 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.16903085094570333, "gs_citation": 178, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5928342352678973236&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "whu.edu.cn;sydney.edu.au;;iflytek.com;ustc.edu.cn;;whu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Wuhan University;University of Sydney;iFLYTEK;University of Science and Technology of China", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.whu.edu.cn/;https://www.sydney.edu.au;https://www.iflytek.com;http://www.ustc.edu.cn", "aff_unique_abbr": "WHU;USYD;iFLYTEK;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Australia" }, { "title": "CluB: Cluster Meets BEV for LiDAR-Based 3D Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70697", "id": "jIhX7SpfCz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7f2fc4053a66edfa430bcdf9a6ff3b17-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jIhX7SpfCz", "openreview": "https://openreview.net/forum?id=jIhX7SpfCz", "poster": "/media/PosterPDFs/NeurIPS%202023/70697.png?t=1698112563.1925676", "slides": "https://nips.cc/virtual/2023/poster/70697", "video": "https://nips.cc/virtual/2023/poster/70697", "author_site": "Yingjie Wang, Jiajun Deng, Yuenan Hou, Yao Li, Yu Zhang, Jianmin Ji, Wanli Ouyang, Yanyong Zhang", "tldr": "", "abstract": "Currently, LiDAR-based 3D detectors are broadly categorized into two groups, namely, BEV-based detectors and cluster-based detectors.\nBEV-based detectors capture the contextual information from the Bird's Eye View (BEV) and fill their center voxels via feature diffusion with a stack of convolution layers, which, however, weakens the capability of presenting an object with the center point.\nOn the other hand, cluster-based detectors exploit the voting mechanism and aggregate the foreground points into object-centric clusters for further prediction.\nIn this paper, we explore how to effectively combine these two complementary representations into a unified framework.\nSpecifically, we propose a new 3D object detection framework, referred to as CluB, which incorporates an auxiliary cluster-based branch into the BEV-based detector by enriching the object representation at both feature and query levels.\nTechnically, CluB is comprised of two steps.\nFirst, we construct a cluster feature diffusion module to establish the association between cluster features and BEV features in a subtle and adaptive fashion. \nBased on that, an imitation loss is introduced to distill object-centric knowledge from the cluster features to the BEV features.\nSecond, we design a cluster query generation module to leverage the voting centers directly from the cluster branch, thus enriching the diversity of object queries.\nMeanwhile, a direction loss is employed to encourage a more accurate voting center for each cluster.\nExtensive experiments are conducted on Waymo and nuScenes datasets, and our CluB achieves state-of-the-art performance on both benchmarks.", "keywords": "3D object detection ; Point clouds", "primary_area": "", "supplementary_material": "/attachment/5f4382825d8ce0285584f7cc90666111515e0503.pdf", "author": "Yingjie Wang;Jiajun Deng;Yuenan Hou;Yao Li;Yu Zhang;Jianmin Ji;Wanli Ouyang;Yanyong Zhang", "authorids": "~Yingjie_Wang2;~Jiajun_Deng1;~Yuenan_Hou1;~Yao_Li8;~Yu_Zhang51;~Jianmin_Ji1;~Wanli_Ouyang1;~Yanyong_Zhang2", "gender": ";M;M;;F;M;;F", "homepage": ";https://dengjiajun.com/;https://cardwing.github.io/;;http://staff.ustc.edu.cn/~yuzhang/;http://staff.ustc.edu.cn/~jianmin/;;http://staff.ustc.edu.cn/~yanyongz/", "dblp": ";;210/3047;;;16/1844;;44/2799", "google_scholar": ";FAAHjxsAAAAJ;https://scholar.google.com.hk/citations?user=o9mX9sUAAAAJ;ck-BIHQAAAAJ;mXuodZgAAAAJ;dbpeb5sAAAAJ;;qfbPQ1YAAAAJ", "orcid": ";;0000-0002-2844-7416;0000-0002-6063-3331;0000-0001-6638-6442;0000-0002-1515-0402;;", "linkedin": ";%E5%AE%B6%E4%BF%8A-%E9%82%93-77519a160/;yuenan-hou-859589136/;;;;;", "or_profile": "~Yingjie_Wang2;~Jiajun_Deng1;~Yuenan_Hou1;~Yao_Li8;~Yu_Zhang51;~Jianmin_Ji1;~Wanli_Ouyang1;~Yanyong_Zhang2", "aff": ";University of Sydney;Shanghai AI Laboratory;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;;University of Science and Technology of China", "aff_domain": ";usyd.edu.au;pjlab.org.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn", "position": ";Postdoc;Researcher;PhD student;Full Professor;Associate Professor;;Full Professor", "bibtex": "@inproceedings{\nwang2023club,\ntitle={CluB: Cluster Meets {BEV} for Li{DAR}-Based 3D Object Detection},\nauthor={Yingjie Wang and Jiajun Deng and Yuenan Hou and Yao Li and Yu Zhang and Jianmin Ji and Wanli Ouyang and Yanyong Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jIhX7SpfCz}\n}", "github": "", "project": "", "reviewers": "KK49;oemX;Z2j1;UKd9;BdkH", "pdf_size": 823853, "rating": "5;5;5;6;6", "confidence": "4;5;4;5;4", "soundness": "2;3;2;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "76;69;161;65;41", "wc_strengths": "63;86;43;97;47", "wc_weaknesses": "68;167;57;229;67", "wc_questions": "97;109;84;46;21", "wc_limitations": "19;6;1;6;13", "wc_review": "323;437;346;443;189", "wc_reply_reviewers": "18;0;0;190;15", "wc_reply_authors": "0;0;0;217;0", "reply_reviewers": "1;0;0;2;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 82.4, 41.02487050558478 ], "wc_strengths_avg": [ 67.2, 21.226398658274558 ], "wc_weaknesses_avg": [ 117.6, 68.61953074744828 ], "wc_questions_avg": [ 71.4, 32.90349525506371 ], "wc_limitations_avg": [ 9.0, 6.29285308902091 ], "wc_review_avg": [ 347.6, 92.57991142791182 ], "wc_reply_reviewers_avg": [ 44.6, 73.07968253899301 ], "wc_reply_authors_avg": [ 43.4, 86.8 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.1666666666666666, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13796646678591967357&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": ";usyd.edu.au;pjlab.org.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;2;2;2", "aff_unique_norm": "University of Sydney;Shanghai AI Laboratory;University of Science and Technology of China", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sydney.edu.au;https://www.shanghai-ai-lab.com;http://www.ustc.edu.cn", "aff_unique_abbr": "USYD;SAIL;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "Australia;China" }, { "title": "Cola: A Benchmark for Compositional Text-to-image Retrieval", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73493", "id": "jKFKwW8JGG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/917cd410aa55b61594fa2a6f6e5a9e94-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=jKFKwW8JGG", "openreview": "https://openreview.net/forum?id=jKFKwW8JGG", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73493", "video": "https://nips.cc/virtual/2023/poster/73493", "author_site": "Arijit Ray, Filip Radenovic, Abhimanyu Dubey, Bryan Plummer, Ranjay Krishna, Kate Saenko", "tldr": "", "abstract": "Compositional reasoning is a hallmark of human visual intelligence. Yet, despite the size of large vision-language models, they struggle to represent simple compositions by combining objects with their attributes. To measure this lack of compositional capability, we design Cola, a text-to-image retrieval benchmark to Compose Objects Localized with Attributes. To solve Cola, a model must retrieve images with the correct configuration of attributes and objects and avoid choosing a distractor image with the same objects and attributes but in the wrong configuration. Cola contains about 1.2k composed queries of 168 objects and 197 attributes on around 30K images. Our human evaluation finds that Cola is 83.33% accurate, similar to contemporary compositionality benchmarks. Using Cola as a testbed, we explore empirical modeling designs to adapt pre-trained vision-language models to reason compositionally. We explore 6 adaptation strategies on 2 seminal vision-language models, using compositionality-centric test benchmarks - Cola and CREPE. We find the optimal adaptation strategy is to train a multi-modal attention layer that jointly attends over the frozen pre-trained image and language features. Surprisingly, training multimodal layers on CLIP performs better than tuning a larger FLAVA model with already pre-trained multimodal layers. Furthermore, our adaptation strategy improves CLIP and FLAVA to comparable levels, suggesting that training multimodal layers using contrastive attribute-object data is key, as opposed to using them pre-trained. Lastly, we show that Cola is harder than a closely related contemporary benchmark, CREPE, since simpler fine-tuning strategies without multimodal layers suffice on CREPE, but not on Cola. However, we still see a significant gap between our best adaptation and human accuracy, suggesting considerable room for further research. \nProject page: https://cs-people.bu.edu/array/research/cola/", "keywords": "compositionality;vision-language", "primary_area": "", "supplementary_material": "/attachment/9c0b514efcff07dfc79ff12e88e280903bc487d6.pdf", "author": "Arijit Ray;Filip Radenovic;Abhimanyu Dubey;Bryan A. Plummer;Ranjay Krishna;Kate Saenko", "authorids": "~Arijit_Ray1;~Filip_Radenovic1;~Abhimanyu_Dubey1;~Bryan_A._Plummer1;~Ranjay_Krishna1;~Kate_Saenko1", "gender": "M;M;M;M;F;M", "homepage": "https://arijitray1993.github.io/;https://filipradenovic.github.io;;http://ranjaykrishna.com;http://ai.bu.edu;http://bryanplummer.com/", "dblp": "164/9384;128/0945;172/0866;167/3785;88/2754;163/2330", "google_scholar": "VE-ZVW0AAAAJ;LV6XQ00AAAAJ;KJNUEgkAAAAJ;IcqahyAAAAAJ;https://scholar.google.com.tw/citations?user=9xDADY4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-4175-0655;;;0000-0001-8784-2531;0000-0002-5704-7614;", "linkedin": ";;;ranjay-krishna-1a344444/;;", "or_profile": "~Arijit_Ray1;~Filip_Radenovic1;~Abhimanyu_Dubey1;~Ranjay_Krishna1;~Kate_Saenko1;~Bryan_Allen_Plummer1", "aff": "Google;Meta Facebook;Harvard University;University of Washington;Boston University, Boston University;Boston University", "aff_domain": "google.com;fb.com;harvard.edu;cs.washington.edu;bu.edu;bu.edu", "position": "Intern;Research Scientist;Collaborator;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nray2023cola,\ntitle={Cola: A Benchmark for Compositional Text-to-image Retrieval},\nauthor={Arijit Ray and Filip Radenovic and Abhimanyu Dubey and Bryan A. Plummer and Ranjay Krishna and Kate Saenko},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=jKFKwW8JGG}\n}", "github": "", "project": "", "reviewers": "H1og;dcEa;wAA7;QhKB;pbZZ", "pdf_size": 7496250, "rating": "6;6;7;7;7", "confidence": "5;3;2;4;4", "wc_summary_and_contributions": "127;47;47;424;61", "wc_strengths": "36;36;91;74;40", "wc_improvement": "464;83;93;315;49", "wc_limitations": "34;15;110;36;10", "wc_correctness": "51;3;17;16;7", "wc_clarity": "374;1;9;19;6", "wc_relation_to_prior_work": "111;1;6;22;9", "wc_documentation": "30;21;22;31;7", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "1228;208;396;938;190", "wc_reply_reviewers": "37;0;0;59;0", "wc_reply_authors": "2692;217;493;576;205", "reply_reviewers": "1;0;0;1;0", "reply_authors": "5;1;1;2;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "wc_summary_and_contributions_avg": [ 141.2, 144.46923547939195 ], "wc_strengths_avg": [ 55.4, 22.817537115122658 ], "wc_improvement_avg": [ 200.8, 161.7756471166164 ], "wc_limitations_avg": [ 41.0, 35.977770914830174 ], "wc_correctness_avg": [ 18.8, 16.951696080333672 ], "wc_clarity_avg": [ 81.8, 146.2181931224702 ], "wc_relation_to_prior_work_avg": [ 29.8, 41.18931900383885 ], "wc_documentation_avg": [ 22.2, 8.611620056644394 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 592.0, 417.53754322216344 ], "wc_reply_reviewers_avg": [ 19.2, 24.522642598219303 ], "wc_reply_authors_avg": [ 836.6, 939.2875172171724 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 1.5491933384829668 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.32025630761017415, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16797324624295158290&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com;fb.com;harvard.edu;cs.washington.edu;bu.edu;bu.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;4;4", "aff_unique_norm": "Google;Meta;Harvard University;University of Washington;Boston University", "aff_unique_dep": "Google;Meta Platforms, Inc.;;;", "aff_unique_url": "https://www.google.com;https://meta.com;https://www.harvard.edu;https://www.washington.edu;https://www.bu.edu", "aff_unique_abbr": "Google;Meta;Harvard;UW;BU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Mountain View;;Boston", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Fast Partitioned Learned Bloom Filter", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70696", "id": "jL2eJxPK88", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7b2e844c52349134268e819a9b56b9e8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jL2eJxPK88", "openreview": "https://openreview.net/forum?id=jL2eJxPK88", "poster": "/media/PosterPDFs/NeurIPS%202023/70696.png?t=1701581659.6857893", "slides": "https://nips.cc/virtual/2023/poster/70696", "video": "https://nips.cc/virtual/2023/poster/70696", "author_site": "Atsuki Sato, Yusuke Matsui", "tldr": "", "abstract": "A Bloom filter is a memory-efficient data structure for approximate membership queries used in numerous fields of computer science.\nRecently, learned Bloom filters that achieve better memory efficiency using machine learning models have attracted attention.\nOne such filter, the partitioned learned Bloom filter (PLBF), achieves excellent memory efficiency.\nHowever, PLBF requires a $\\mathcal{O}(N^3k)$ time complexity to construct the data structure, where $N$ and $k$ are the hyperparameters of PLBF.\nOne can improve memory efficiency by increasing $N$, but the construction time becomes extremely long.\nThus, we propose two methods that can reduce the construction time while maintaining the memory efficiency of PLBF.\nFirst, we propose fast PLBF, which can construct the same data structure as PLBF with a smaller time complexity $\\mathcal{O}(N^2k)$.\nSecond, we propose fast PLBF++, which can construct the data structure with even smaller time complexity $\\mathcal{O}(Nk\\log N + Nk^2)$.\nFast PLBF++ does not necessarily construct the same data structure as PLBF.\nStill, it is almost as memory efficient as PLBF, and it is proved that fast PLBF++ has the same data structure as PLBF when the distribution satisfies a certain constraint.\nOur experimental results from real-world datasets show that (i) fast PLBF and fast PLBF++ can construct the data structure up to 233 and 761 times faster than PLBF, (ii) fast PLBF can achieve the same memory efficiency as PLBF, and (iii) fast PLBF++ can achieve almost the same memory efficiency as PLBF.\nThe codes are available at [this https URL](https://github.com/atsukisato/FastPLBF).", "keywords": "optimization;data structures;algorithms;theory;learned algorithms", "primary_area": "", "supplementary_material": "/attachment/f2410d7c42981afb06f283cd835c3374a15dee28.zip", "author": "Atsuki Sato;Yusuke Matsui", "authorids": "~Atsuki_Sato1;~Yusuke_Matsui1", "gender": "M;M", "homepage": "http://www.hal.t.u-tokyo.ac.jp/lab/ja/index_1.xhtml;http://yusukematsui.me/", "dblp": ";56/10540", "google_scholar": "SRNxWjMAAAAJ;kPVJu5UAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Atsuki_Sato1;~Yusuke_Matsui1", "aff": "The University of Tokyo;The University of Tokyo", "aff_domain": "u-tokyo.ac.jp;u-tokyo.ac.jp", "position": "Undergrad student;Lecturer", "bibtex": "@inproceedings{\nsato2023fast,\ntitle={Fast Partitioned Learned Bloom Filter},\nauthor={Atsuki Sato and Yusuke Matsui},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jL2eJxPK88}\n}", "github": "", "project": "", "reviewers": "SSjS;aq8D;YWKv;cwdf", "pdf_size": 2121199, "rating": "5;5;6;7", "confidence": "3;3;3;3", "soundness": "3;2;3;4", "novelty": "2;2;3;3", "presentation": "1;3;3;3", "wc_summary": "53;158;97;119", "wc_strengths": "22;34;33;116", "wc_weaknesses": "141;19;20;125", "wc_questions": "99;86;5;112", "wc_limitations": "1;1;15;3", "wc_review": "316;298;170;475", "wc_reply_reviewers": "300;0;0;44", "wc_reply_authors": "1139;0;0;252", "reply_reviewers": "4;0;0;1", "reply_authors": "5;1;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 106.75, 37.94980237102691 ], "wc_strengths_avg": [ 51.25, 37.67874069020885 ], "wc_weaknesses_avg": [ 76.25, 57.03233731840209 ], "wc_questions_avg": [ 75.5, 41.728287767412645 ], "wc_limitations_avg": [ 5.0, 5.830951894845301 ], "wc_review_avg": [ 314.75, 108.29906509291759 ], "wc_reply_reviewers_avg": [ 86.0, 124.85191228010886 ], "wc_reply_authors_avg": [ 347.75, 468.2693535776178 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17639309676260392&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "u-tokyo.ac.jp;u-tokyo.ac.jp", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "id": "jOuPR9IH00", "title": "Pessimistic Nonlinear Least-Squares Value Iteration for Offline Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Offline reinforcement learning, where the agent aims to learn the optimal policy based on the data collected by a behavior policy, has attracted increasing attention in recent years. While offline RL with linear function approximation has been extensively studied with optimal results being achieved under various assumptions, the theoretical understanding of offline RL with non-linear function approximation is still limited. Specifically, most existing works on offline RL with non-linear function approximation either have a poor dependency on the function class complexity or require an inefficient planning phase. \n In this paper, we propose an oracle-efficient algorithm VAPVI for offline RL with non-linear function approximation. Our algorithm enjoys a regret bound that has a tight dependence on the function class complexity and achieves minimax optimal instance-dependent regret when specialized to linear function approximation. In our theoretical analysis, we introduce a new coverage assumption for general function approximation, bridging the minimum-eigenvalue assumption and the uncertainty measure widely used in online nonlinear RL. Our algorithmic design includes 1) a variance-based weighted regression scheme for general function classes; 2) a variance estimation subroutine and 3) a pessimistic value iteration planning phase. To the best of our knowledge, this is the first statistically optimal algorithm for nonlinear offline RL. ", "keywords": "Offline Reinforcement Learning;Markov Decision Process;Nonlinear Function Approximation;Generalized Eluder Dimension", "primary_area": "", "supplementary_material": "/attachment/a2cf5c70e668bf7acd4a9a82c8c7c65d7708bdb5.pdf", "author": "Qiwei Di;Heyang Zhao;Jiafan He;Quanquan Gu", "authorids": "~Qiwei_Di1;~Heyang_Zhao1;~Jiafan_He1;~Quanquan_Gu1", "gender": "M;M;M;M", "homepage": "https://qiwei-di1234.github.io/;https://web.cs.ucla.edu/~hyzhao/;https://sites.google.com/g.ucla.edu/jiafan-he-homepage;http://web.cs.ucla.edu/~qgu/", "dblp": "354/3878;;214/5785;50/4597", "google_scholar": "SewL0pkAAAAJ;zHQ1ap0AAAAJ;F3AXNBwAAAAJ;GU9HgNAAAAAJ", "orcid": ";;;", "linkedin": "qiwei-di-00776a253/;;;", "or_profile": "~Qiwei_Di1;~Heyang_Zhao1;~Jiafan_He1;~Quanquan_Gu1", "aff": "University of California, Los Angeles;Computer Science Department, University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;cs.ucla.edu;ucla.edu;cs.ucla.edu", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@misc{\ndi2023pessimistic,\ntitle={Pessimistic Nonlinear Least-Squares Value Iteration for Offline Reinforcement Learning},\nauthor={Qiwei Di and Heyang Zhao and Jiafan He and Quanquan Gu},\nyear={2023},\nurl={https://openreview.net/forum?id=jOuPR9IH00}\n}", "github": "", "project": "", "reviewers": "gXdR;vaFF;17B8", "site": "https://openreview.net/forum?id=jOuPR9IH00", "pdf_size": 409014, "rating": "3;4;5", "confidence": "3;3;5", "soundness": "1;3;3", "novelty": "1;2;2", "presentation": "2;3;3", "wc_summary": "90;74;67", "wc_strengths": "58;38;26", "wc_weaknesses": "241;244;310", "wc_questions": "89;30;5", "wc_limitations": "1;1;1", "wc_review": "479;387;409", "wc_reply_reviewers": "40;0;15", "wc_reply_authors": "75;0;6", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 4.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 77.0, 9.626352718795768 ], "wc_strengths_avg": [ 40.666666666666664, 13.199326582148887 ], "wc_weaknesses_avg": [ 265.0, 31.843366656181317 ], "wc_questions_avg": [ 41.333333333333336, 35.216788925485844 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 425.0, 39.22584182228174 ], "wc_reply_reviewers_avg": [ 18.333333333333332, 16.49915822768611 ], "wc_reply_authors_avg": [ 27.0, 34.02939905434711 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13706149271000326202&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "IEBins: Iterative Elastic Bins for Monocular Depth Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70695", "id": "jOuxQGRVoQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a61023ce36d21010f1423304f8ec49af-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jOuxQGRVoQ", "openreview": "https://openreview.net/forum?id=jOuxQGRVoQ", "poster": "/media/PosterPDFs/NeurIPS%202023/70695.png?t=1701662442.5228624", "slides": "https://nips.cc/virtual/2023/poster/70695", "video": "https://nips.cc/virtual/2023/poster/70695", "author_site": "Shuwei Shao, Zhongcai Pei, Xingming Wu, Zhong Liu, Weihai Chen, Zhengguo Li", "tldr": "", "abstract": "Monocular depth estimation (MDE) is a fundamental topic of geometric computer vision and a core technique for many downstream applications. Recently, several methods reframe the MDE as a classification-regression problem where a linear combination of probabilistic distribution and bin centers is used to predict depth. In this paper, we propose a novel concept of iterative elastic bins (IEBins) for the classification-regression-based MDE. The proposed IEBins aims to search for high-quality depth by progressively optimizing the search range, which involves multiple stages and each stage performs a finer-grained depth search in the target bin on top of its previous stage. To alleviate the possible error accumulation during the iterative process, we utilize a novel elastic target bin to replace the original target bin, the width of which is adjusted elastically based on the depth uncertainty. Furthermore, we develop a dedicated framework composed of a feature extractor and an iterative optimizer that has powerful temporal context modeling capabilities benefiting from the GRU-based architecture. Extensive experiments on the KITTI, NYU-Depth-v2 and SUN RGB-D datasets demonstrate that the proposed method surpasses prior state-of-the-art competitors. The source code is publicly available at https://github.com/ShuweiShao/IEBins.", "keywords": "Monocular depth estimation;Iterative refinement;Deep learning", "primary_area": "", "supplementary_material": "/attachment/f1abd098bf839fad6899d53befd7516212b998d2.pdf", "author": "Shuwei Shao;Zhongcai Pei;Xingming Wu;Zhong Liu;Weihai Chen;Zhengguo Li", "authorids": "~Shuwei_Shao1;~Zhongcai_Pei1;~Xingming_Wu1;~Zhong_Liu4;~Weihai_Chen2;~Zhengguo_Li2", "gender": "M;M;M;M;M;M", "homepage": ";https://shi.buaa.edu.cn/peizhongcai/zh_CN/index.htm;http://dept3.buaa.edu.cn/szjs/zzjs/dgdzjxsyzx1/js/wxm.htm;http://irmct.buaa.edu.cn/;https://www.researchgate.net/profile/Weihai-Chen-2;", "dblp": "304/4196;69/8488;35/5537;30/2371;45/1415;46/4171", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;;;;https://scholar.google.com.hk/citations?user=5PoZrcYAAAAJ;LiUX7WQAAAAJ", "orcid": ";;;;0000-0001-7912-4505;", "linkedin": ";;;;;", "or_profile": "~Shuwei_Shao1;~Zhongcai_Pei1;~Xingming_Wu1;~Zhong_Liu4;~Weihai_Chen2;~Zhengguo_Li2", "aff": "Beihang University;Beihang University;Beihang University;Beihang University;Beihang University;Institute for Infocomm Researcher", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;a-star.edu.sg", "position": "PhD student;Full Professor;Full Professor;Associate Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nshao2023iebins,\ntitle={{IEB}ins: Iterative Elastic Bins for Monocular Depth Estimation},\nauthor={Shuwei Shao and Zhongcai Pei and Xingming Wu and Zhong Liu and Weihai Chen and Zhengguo Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jOuxQGRVoQ}\n}", "github": "", "project": "", "reviewers": "LZ7t;4WPj;TtXH;s85F;iovA;jxnL", "pdf_size": 2600358, "rating": "4;4;5;5;7;8", "confidence": "4;4;4;4;5;2", "soundness": "2;2;3;3;3;4", "novelty": "2;2;2;3;4;4", "presentation": "2;2;3;3;2;4", "wc_summary": "88;66;99;56;180;88", "wc_strengths": "25;13;77;30;174;60", "wc_weaknesses": "91;108;257;87;440;3", "wc_questions": "201;21;94;40;79;1", "wc_limitations": "25;35;9;62;20;7", "wc_review": "430;243;536;275;893;159", "wc_reply_reviewers": "125;0;294;32;155;0", "wc_reply_authors": "281;0;639;293;17;0", "reply_reviewers": "2;0;3;1;1;0", "reply_authors": "2;1;4;2;2;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.8333333333333335, 0.8975274678557507 ], "soundness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "novelty_avg": [ 2.8333333333333335, 0.8975274678557507 ], "presentation_avg": [ 2.6666666666666665, 0.7453559924999298 ], "wc_summary_avg": [ 96.16666666666667, 40.19293746695086 ], "wc_strengths_avg": [ 63.166666666666664, 54.0968781190026 ], "wc_weaknesses_avg": [ 164.33333333333334, 144.41798903029897 ], "wc_questions_avg": [ 72.66666666666667, 65.62181209188162 ], "wc_limitations_avg": [ 26.333333333333332, 18.544241394268166 ], "wc_review_avg": [ 422.6666666666667, 244.17116036820474 ], "wc_reply_reviewers_avg": [ 101.0, 104.80458005259122 ], "wc_reply_authors_avg": [ 205.0, 231.39792566053828 ], "reply_reviewers_avg": [ 1.1666666666666667, 1.0671873729054748 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.433289122413121, "gs_citation": 75, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3654899670765014389&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;a-star.edu.sg", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Beihang University;Institute for Infocomm Research", "aff_unique_dep": ";", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.i2r.a-star.edu.sg", "aff_unique_abbr": "BUAA;I2R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "Hokoff: Real Game Dataset from Honor of Kings and its Offline Reinforcement Learning Benchmarks", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73492", "id": "jP3BduIxy6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/464fefa022aaefc85d901317bbf13f85-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=jP3BduIxy6", "openreview": "https://openreview.net/forum?id=jP3BduIxy6", "poster": "/media/PosterPDFs/NeurIPS%202023/73492.png?t=1701421020.7798083", "slides": "https://nips.cc/virtual/2023/poster/73492", "video": "https://nips.cc/virtual/2023/poster/73492", "author_site": "Yun Qu, Boyuan Wang, Jianzhun Shao, Yuhang Jiang, Chen Chen, Zhenbin Ye, Liu Linc, Yang Feng, Lin Lai, Hongyang Qin, Minwen Deng, Juchao Zhuo, Deheng Ye, Qiang Fu, YANG GUANG, Wei Yang, Lanxiao Huang, Xiangyang Ji", "tldr": "", "abstract": "The advancement of Offline Reinforcement Learning (RL) and Offline Multi-Agent Reinforcement Learning (MARL) critically depends on the availability of high-quality, pre-collected offline datasets that represent real-world complexities and practical applications. However, existing datasets often fall short in their simplicity and lack of realism. To address this gap, we propose Hokoff, a comprehensive set of pre-collected datasets that covers both offline RL and offline MARL, accompanied by a robust framework, to facilitate further research. This data is derived from Honor of Kings, a recognized Multiplayer Online Battle Arena (MOBA) game known for its intricate nature, closely resembling real-life situations. Utilizing this framework, we benchmark a variety of offline RL and offline MARL algorithms. We also introduce a novel baseline algorithm tailored for the inherent hierarchical action space of the game. We reveal the incompetency of current offline RL approaches in handling task complexity, generalization and multi-task learning.", "keywords": "offline reinforcement learning;offline multi-agent reinforcement learning;datasets and benchmarks", "primary_area": "", "supplementary_material": "/attachment/b725b4bdde4ec2bc98f9ae06b31d5fc98fdd79f1.pdf", "author": "Yun Qu;Boyuan Wang;Jianzhun Shao;Yuhang Jiang;Chen Chen;Zhenbin Ye;Lin Liu;Yang Jun Feng;Lin Lai;Hongyang Qin;Minwen Deng;Juchao Zhuo;Deheng Ye;QIANG FU;YANG GUANG;Yang Wei;Lanxiao Huang;Xiangyang Ji", "authorids": "~Yun_Qu2;~Boyuan_Wang1;~Jianzhun_Shao1;~Yuhang_Jiang3;~Chen_Chen3;~Zhenbin_Ye1;~Lin_Liu15;~Yang_Jun_Feng1;~Lin_Lai1;~Hongyang_Qin1;~Minwen_Deng2;~Juchao_Zhuo1;~Deheng_Ye1;~QIANG_FU8;~YANG_GUANG1;~Yang_Wei2;~Lanxiao_Huang1;~Xiangyang_Ji1", "gender": "M;M;M;;F;Not Specified;;M;M;;M;M;M;M;M;M;M;", "homepage": "https://github.com/cloud-qu;https://github.com/BoyuanWang-hub;https://github.com/qyz55;;;https://ieeexplore.ieee.org/author/37086201790;;https://github.com/;https://lailin.xyz/;;;https://aiarena.tencent.com/aiarena/zh;http://yedeheng.github.io/;;https://iwiki.woa.com/space/~mikoyang;;;", "dblp": "80/10774-2;;263/2309;239/4567;;;;;;;256/8604.html;;159/9503;;;03/1094-32.html;255/6012.html;", "google_scholar": "l9Ky9goAAAAJ;;;https://scholar.google.com/citations?hl=en;l8_g4oAAAAAJ;;;;;;;;jz5XKuQAAAAJ;gANaxT0AAAAJ;;;;", "orcid": "0009-0000-1803-8435;;;;;;;;;;;;0000-0002-1754-1837;;;;;", "linkedin": ";;;;;;;;;;;;;;;;;", "or_profile": "~Yun_Qu2;~Boyuan_Wang1;~Jianzhun_Shao1;~Yuhang_Jiang3;~Chen_Chen3;~Zhenbin_Ye1;~Lin_Liu15;~Yang_Jun_Feng1;~Lin_Lai1;~Hongyang_Qin1;~Minwen_Deng2;~Juchao_Zhuo1;~Deheng_Ye1;~QIANG_FU8;~YANG_GUANG1;~Yang_Wei2;~Lanxiao_Huang1;~Xiangyang_Ji1", "aff": "Tencent TiMi Studio;Tsinghua University;Tsinghua University;Tsinghua University;Qiyuan Lab;;;;;;Tencent AI Lab;;Tencent;Tencent AI Lab;TianMei studio;Tencent AI Lab;Tencent TiMi L1 Studio;", "aff_domain": "tencent.com;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;qiyuanlab.com;;;;;;tencent.com;;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;", "position": "Intern;MS student;PhD student;PhD student;Researcher;;;;;;Researcher;;Team Manager;Principal Researcher;Undergrad student;Researcher;Researcher;", "bibtex": "@inproceedings{\nqu2023hokoff,\ntitle={Hokoff: Real Game Dataset from Honor of Kings and its Offline Reinforcement Learning Benchmarks},\nauthor={Yun Qu and Boyuan Wang and Jianzhun Shao and Yuhang Jiang and Chen Chen and Zhenbin Ye and Lin Liu and Yang Jun Feng and Lin Lai and Hongyang Qin and Minwen Deng and Juchao Zhuo and Deheng Ye and QIANG FU and YANG GUANG and Yang Wei and Lanxiao Huang and Xiangyang Ji},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=jP3BduIxy6}\n}", "github": "", "project": "", "reviewers": "DxzA;KDwt;D3sf", "pdf_size": 8335956, "rating": "6;6;7", "confidence": "3;2;3", "wc_summary_and_contributions": "128;156;42", "wc_strengths": "148;24;88", "wc_improvement": "139;39;844", "wc_limitations": "165;4;132", "wc_correctness": "23;2;7", "wc_clarity": "70;1;99", "wc_relation_to_prior_work": "102;5;31", "wc_documentation": "50;13;28", "wc_additional_feedback": "1;1;1", "wc_review": "826;245;1272", "wc_reply_reviewers": "32;133;21", "wc_reply_authors": "939;854;1779", "reply_reviewers": "1;1;1", "reply_authors": "3;3;4", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 108.66666666666667, 48.50658603621666 ], "wc_strengths_avg": [ 86.66666666666667, 50.63156678946007 ], "wc_improvement_avg": [ 340.6666666666667, 358.24417495458164 ], "wc_limitations_avg": [ 100.33333333333333, 69.43742186714276 ], "wc_correctness_avg": [ 10.666666666666666, 8.9566858950296 ], "wc_clarity_avg": [ 56.666666666666664, 41.10420362390634 ], "wc_relation_to_prior_work_avg": [ 46.0, 40.995934757810645 ], "wc_documentation_avg": [ 30.333333333333332, 15.195028426721974 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 781.0, 420.4767135843157 ], "wc_reply_reviewers_avg": [ 62.0, 50.40502620440412 ], "wc_reply_authors_avg": [ 1190.6666666666667, 417.45924618125565 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 18, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=68975355759795101&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "tencent.com;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;qiyuanlab.com;;;;;;tencent.com;;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;", "author_num": 18, "aff_unique_index": "0;1;1;1;2;0;0;0;3;0;0", "aff_unique_norm": "Tencent;Tsinghua University;Qiyuan Lab;TianMei studio", "aff_unique_dep": "TiMi Studio;;;", "aff_unique_url": "https://timi.qq.com;https://www.tsinghua.edu.cn;;", "aff_unique_abbr": "Tencent;THU;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Is Learning in Games Good for the Learners?", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70694", "id": "jR2FkqW6GB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a9ea92ef18aae17627d133534209e640-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jR2FkqW6GB", "openreview": "https://openreview.net/forum?id=jR2FkqW6GB", "poster": "/media/PosterPDFs/NeurIPS%202023/70694.png?t=1702332674.5629904", "slides": "https://nips.cc/virtual/2023/poster/70694", "video": "https://nips.cc/virtual/2023/poster/70694", "author_site": "William Brown, Jon Schneider, Kiran Vodrahalli", "tldr": "", "abstract": "We consider a number of questions related to tradeoffs between reward and regret in repeated gameplay between two agents. To facilitate this, we introduce a notion of generalized equilibrium which allows for asymmetric regret constraints, and yields polytopes of feasible values for each agent and pair of regret constraints, where we show that any such equilibrium is reachable by a pair of algorithms which maintain their regret guarantees against arbitrary opponents. As a central example, we highlight the case one agent is no-swap and the other's regret is unconstrained. We show that this captures an extension of Stackelberg equilibria with a matching optimal value, and that there exists a wide class of games where a player can significantly increase their utility by deviating from a no-swap-regret algorithm against a no-swap learner (in fact, almost any game without pure Nash equilibria is of this form). Additionally, we make use of generalized equilibria to consider tradeoffs in terms of the opponent's algorithm choice. We give a tight characterization for the maximal reward obtainable against some no-regret learner, yet we also show a class of games in which this is bounded away from the value obtainable against the class of common \"mean-based\" no-regret algorithms. Finally, we consider the question of learning reward-optimal strategies via repeated play with a no-regret agent when the game is initially unknown. Again we show tradeoffs depending on the opponent's learning algorithm: the Stackelberg strategy is learnable in exponential time with any no-regret agent (and in polynomial time with any no-adaptive-regret agent) for any game where it is learnable via queries, and there are games where it is learnable in polynomial time against any no-swap-regret agent but requires exponential time against a mean-based no-regret agent.", "keywords": "learning in games;correlated equilibria;Stackelberg equilibria;swap regret;dynamic regret", "primary_area": "", "supplementary_material": "/attachment/d1e5a75623f806cbc8456f6545d5b2713a0619d4.pdf", "author": "William Brown;Jon Schneider;Kiran Vodrahalli", "authorids": "~William_Brown7;~Jon_Schneider1;~Kiran_Vodrahalli1", "gender": "M;M;M", "homepage": "https://willcb.com;https://jschnei.github.io;https://kiranvodrahalli.github.io", "dblp": "36/3720-4.html;146/0503;188/5863", "google_scholar": "JUJdJMoAAAAJ;Jc97EyAAAAAJ;7oBE9-oAAAAJ", "orcid": ";;", "linkedin": "willcb/;;", "or_profile": "~William_Brown7;~Jon_Schneider1;~Kiran_Vodrahalli1", "aff": "Columbia University;Google;Google", "aff_domain": "columbia.edu;google.com;google.com", "position": "PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\nbrown2023is,\ntitle={Is Learning in Games Good for the Learners?},\nauthor={William Brown and Jon Schneider and Kiran Vodrahalli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jR2FkqW6GB}\n}", "github": "", "project": "", "reviewers": "hPuK;VEjN;vLcT;TzAU;o15k", "pdf_size": 372567, "rating": "6;7;7;7;8", "confidence": "5;2;4;2;5", "soundness": "3;3;4;3;4", "novelty": "3;3;3;3;4", "presentation": "3;3;2;3;4", "wc_summary": "86;87;428;154;166", "wc_strengths": "30;68;100;66;89", "wc_weaknesses": "12;114;76;23;158", "wc_questions": "3;135;14;1;1", "wc_limitations": "3;1;10;1;1", "wc_review": "134;405;628;245;415", "wc_reply_reviewers": "0;0;10;0;5", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 0.6324555320336759 ], "confidence_avg": [ 3.6, 1.3564659966250538 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 184.2, 126.31136132589181 ], "wc_strengths_avg": [ 70.6, 23.996666435153028 ], "wc_weaknesses_avg": [ 76.6, 54.90209467770788 ], "wc_questions_avg": [ 30.8, 52.323608438256635 ], "wc_limitations_avg": [ 3.2, 3.4871191548325386 ], "wc_review_avg": [ 365.4, 167.94594368427005 ], "wc_reply_reviewers_avg": [ 3.0, 4.0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17605485344877156621&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "columbia.edu;google.com;google.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Columbia University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.columbia.edu;https://www.google.com", "aff_unique_abbr": "Columbia;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Better with Less: Effective Augmentation for Sample-Efficient Visual Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70693", "id": "jRL6ErxMVB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bc26087d3f82e62044fc77752e86737e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jRL6ErxMVB", "openreview": "https://openreview.net/forum?id=jRL6ErxMVB", "poster": "/media/PosterPDFs/NeurIPS%202023/70693.png?t=1699606545.89875", "slides": "https://nips.cc/virtual/2023/poster/70693", "video": "https://nips.cc/virtual/2023/poster/70693", "author_site": "Guozheng Ma, Linrui Zhang, Haoyu Wang, Lu Li, Zilin Wang, Zhen Wang, Li Shen, Xueqian Wang, Dacheng Tao", "tldr": "", "abstract": "Data augmentation (DA) is a crucial technique for enhancing the sample efficiency of visual reinforcement learning (RL) algorithms.\nNotably, employing simple observation transformations alone can yield outstanding performance without extra auxiliary representation tasks or pre-trained encoders. However, it remains unclear which attributes of DA account for its effectiveness in achieving sample-efficient visual RL. To investigate this issue and further explore the potential of DA, this work conducts comprehensive experiments to assess the impact of DA's attributes on its efficacy and provides the following insights and improvements: (1) For individual DA operations, we reveal that both ample spatial diversity and slight hardness are indispensable. Building on this finding, we introduce Random PadResize (Rand PR), a new DA operation that offers abundant spatial diversity with minimal hardness. (2) For multi-type DA fusion schemes, the increased DA hardness and unstable data distribution result in the current fusion schemes being unable to achieve higher sample efficiency than their corresponding individual operations. Taking the non-stationary nature of RL into account, we propose a RL-tailored multi-type DA fusion scheme called Cycling Augmentation (CycAug), which performs periodic cycles of different DA operations to increase type diversity while maintaining data distribution consistency. Extensive evaluations on the DeepMind Control suite and CARLA driving simulator demonstrate that our methods achieve superior sample efficiency compared with the prior state-of-the-art methods.", "keywords": "Data Augmentation;Visual Reinforcement Learning;Sample Efficiency", "primary_area": "", "supplementary_material": "", "author": "Guozheng Ma;Linrui Zhang;Haoyu Wang;Lu Li;Zilin Wang;Zhen Wang;Li Shen;Xueqian Wang;Dacheng Tao", "authorids": "~Guozheng_Ma2;~Linrui_Zhang1;~Haoyu_Wang12;~Lu_Li5;~Zilin_Wang1;~Zhen_Wang9;~Li_Shen1;~Xueqian_Wang1;~Dacheng_Tao1", "gender": "M;M;M;M;;;M;M;", "homepage": "https://guozheng-ma.github.io/;;https://github.com/Harry-mic;https://github.com/lilucse;;;https://sites.google.com/site/mathshenli/home;;", "dblp": ";;;;;;91/3680-8;43/3563-1;", "google_scholar": "jDvVglUAAAAJ;;;QPsrZx8AAAAJ;;;yVhgENIAAAAJ;h9dN_ykAAAAJ;", "orcid": ";;;;;;;0000-0003-3542-0593;", "linkedin": ";%E9%BA%9F%E7%9D%BF-%E5%BC%A0-bb5312222/;;;;;;;", "or_profile": "~Guozheng_Ma2;~Linrui_Zhang1;~Haoyu_Wang12;~Lu_Li5;~Zilin_Wang1;~Zhen_Wang9;~Li_Shen1;~Xueqian_Wang1;~Dacheng_Tao1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;;;JD Explore Academy;Tsinghua University;", "aff_domain": "tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;;;jd.com;tsinghua.edu.cn;", "position": "MS student;MS student;MS student;MS student;;;Researcher;Full Professor;", "bibtex": "@inproceedings{\nma2023learning,\ntitle={Learning Better with Less: Effective Augmentation for Sample-Efficient Visual Reinforcement Learning},\nauthor={Guozheng Ma and Linrui Zhang and Haoyu Wang and Lu Li and Zilin Wang and Zhen Wang and Li Shen and Xueqian Wang and Dacheng Tao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jRL6ErxMVB}\n}", "github": "", "project": "", "reviewers": "HheB;oGs4;2eeL;EBtK;RHoY", "pdf_size": 4199245, "rating": "5;6;6;6;6", "confidence": "4;5;3;4;3", "soundness": "3;4;3;3;3", "novelty": "2;3;4;3;3", "presentation": "3;3;3;3;3", "wc_summary": "58;121;55;70;103", "wc_strengths": "37;113;141;110;82", "wc_weaknesses": "123;24;180;117;90", "wc_questions": "4;58;5;64;17", "wc_limitations": "11;38;5;25;37", "wc_review": "233;354;386;386;329", "wc_reply_reviewers": "64;0;0;0;0", "wc_reply_authors": "308;0;0;0;113", "reply_reviewers": "1;0;0;0;0", "reply_authors": "2;1;1;1;2", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.4, 26.112066176386733 ], "wc_strengths_avg": [ 96.6, 35.17157943567505 ], "wc_weaknesses_avg": [ 106.8, 50.720410092979336 ], "wc_questions_avg": [ 29.6, 26.11206617638673 ], "wc_limitations_avg": [ 23.2, 13.362634470792052 ], "wc_review_avg": [ 337.6, 56.51406904479627 ], "wc_reply_reviewers_avg": [ 12.8, 25.600000000000005 ], "wc_reply_authors_avg": [ 84.2, 120.153901309945 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.13363062095621223, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1700185672155716016&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;;;jd.com;tsinghua.edu.cn;", "author_num": 9, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Tsinghua University;JD", "aff_unique_dep": ";JD Explore Academy", "aff_unique_url": "https://www.tsinghua.edu.cn;", "aff_unique_abbr": "THU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "When are ensembles really effective?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70692", "id": "jS4DUGOtBD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/30b6fa308e62ed52180c31ae3ba6bb0a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jS4DUGOtBD", "openreview": "https://openreview.net/forum?id=jS4DUGOtBD", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70692", "video": "https://nips.cc/virtual/2023/poster/70692", "author_site": "Ryan Theisen, Hyunsuk Kim, Yaoqing Yang, Liam Hodgkinson, Michael Mahoney", "tldr": "", "abstract": "Ensembling has a long history in statistical data analysis, with many impactful applications. \nHowever, in many modern machine learning settings, the benefits of ensembling are less ubiquitous and less obvious. \nWe study, both theoretically and empirically, the fundamental question of when ensembling yields significant performance improvements in classification tasks. \nTheoretically, we prove new results relating the \\emph{ensemble improvement rate} (a measure of how much ensembling decreases the error rate versus a single model, on a relative scale) to the \\emph{disagreement-error ratio}. \nWe show that ensembling improves performance significantly whenever the disagreement rate is large relative to the average error rate; and that, conversely, one classifier is often enough whenever the disagreement rate is low relative to the average error rate. \nOn the way to proving these results, we derive, under a mild condition called \\emph{competence}, improved upper and lower bounds on the average test error rate of the majority vote classifier.\nTo complement this theory, we study ensembling empirically in a variety of settings, verifying the predictions made by our theory, and identifying practical scenarios where ensembling does and does not result in large performance improvements. \nPerhaps most notably, we demonstrate a distinct difference in behavior between interpolating models (popular in current practice) and non-interpolating models (such as tree-based methods, where ensembling is popular), demonstrating that ensembling helps considerably more in the latter case than in the former.", "keywords": "Ensembling;theory;deep learning", "primary_area": "", "supplementary_material": "/attachment/8ed732469ddd606e556aff847707550457536a4a.pdf", "author": "Ryan Theisen;Hyunsuk Kim;Yaoqing Yang;Liam Hodgkinson;Michael W. Mahoney", "authorids": "~Ryan_Theisen1;~Hyunsuk_Kim1;~Yaoqing_Yang1;~Liam_Hodgkinson1;~Michael_W._Mahoney1", "gender": ";;M;M;", "homepage": "http://ryantheisen.com;https://statistics.berkeley.edu/people/hyunsuk-kim;https://sites.google.com/site/yangyaoqingcmu/;http://www.liamhodgkinson.com;", "dblp": "251/5575;;04/4176;238/1555;", "google_scholar": "T1phq10AAAAJ;;LYvugWgAAAAJ;;", "orcid": ";;0000-0001-9908-5531;;", "linkedin": ";;;;", "or_profile": "~Ryan_Theisen1;~Hyunsuk_Kim1;~Yaoqing_Yang1;~Liam_Hodgkinson1;~Michael_W._Mahoney1", "aff": "University of California, Berkeley;University of California, Berkeley;Dartmouth College;University of Melbourne;", "aff_domain": "berkeley.edu;berkeley.edu;dartmouth.edu;unimelb.edu;", "position": "PhD student;PhD student;Assistant Professor;Lecturer;", "bibtex": "@inproceedings{\ntheisen2023when,\ntitle={When are ensembles really effective?},\nauthor={Ryan Theisen and Hyunsuk Kim and Yaoqing Yang and Liam Hodgkinson and Michael W. Mahoney},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jS4DUGOtBD}\n}", "github": "", "project": "", "reviewers": "GNtk;E93x;sRJm;G3j5;un8p", "pdf_size": 1486780, "rating": "4;7;7;7;9", "confidence": "2;4;4;4;5", "soundness": "2;4;3;4;3", "novelty": "2;4;3;3;4", "presentation": "3;4;3;4;4", "wc_summary": "9;69;118;103;182", "wc_strengths": "4;135;59;92;148", "wc_weaknesses": "20;42;22;69;111", "wc_questions": "13;204;24;19;193", "wc_limitations": "2;5;2;10;60", "wc_review": "48;455;225;293;694", "wc_reply_reviewers": "0;98;19;41;11", "wc_reply_authors": "0;41;0;182;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;1;2;1", "rating_avg": [ 6.8, 1.6 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 96.2, 56.968061227322806 ], "wc_strengths_avg": [ 87.6, 52.38549417539173 ], "wc_weaknesses_avg": [ 52.8, 34.03174988154444 ], "wc_questions_avg": [ 90.6, 88.23740703352519 ], "wc_limitations_avg": [ 15.8, 22.292599668948437 ], "wc_review_avg": [ 343.0, 218.72082662608972 ], "wc_reply_reviewers_avg": [ 33.8, 34.798850555729565 ], "wc_reply_authors_avg": [ 44.6, 70.5112756940335 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9951052080056662, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12934810017125247201&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "berkeley.edu;berkeley.edu;dartmouth.edu;unimelb.edu;", "author_num": 5, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of California, Berkeley;Dartmouth College;University of Melbourne", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;https://www.dartmouth.edu;https://www.unimelb.edu.au", "aff_unique_abbr": "UC Berkeley;Dartmouth;UniMelb", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;Australia" }, { "title": "FELM: Benchmarking Factuality Evaluation of Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73491", "id": "jSO7Vgolc6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8b8a7960d343e023a6a0afe37eee6022-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=jSO7Vgolc6", "openreview": "https://openreview.net/forum?id=jSO7Vgolc6", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73491", "video": "https://nips.cc/virtual/2023/poster/73491", "author_site": "shiqi chen, Yiran Zhao, Jinghan Zhang, I-Chun Chern, Siyang Gao, Pengfei Liu, Junxian He", "tldr": "", "abstract": "Assessing factuality of text generated by large language models (LLMs) is an emerging yet crucial research area, aimed at alerting users to potential errors and guiding the development of more reliable LLMs. Nonetheless, the evaluators assessing factuality necessitate suitable evaluation themselves to gauge progress and foster advancements. This direction remains under-explored, resulting in substantial impediments to the progress of factuality evaluators. To mitigate this issue, we introduce a benchmark for Factuality Evaluation of large Language Models, referred to as FELM. In this benchmark, we collect responses generated from LLMs and annotate factuality labels in a fine-grained manner. Contrary to previous studies that primarily concentrate on the factuality of world knowledge (e.g. information from Wikipedia), FELM focuses on factuality across diverse domains, spanning from world knowledge to math and reasoning. Our annotation is based on text segments, which can help pinpoint specific factual errors. The factuality annotations are further supplemented by predefined error types and reference links that either support or contradict the statement. In our experiments, we investigate the performance of several LLM-based factuality evaluators on FELM, including both vanilla LLMs and those augmented with retrieval mechanisms and chain-of-thought processes. Our findings reveal that while retrieval aids factuality evaluation, current LLMs are far from satisfactory to faithfully detect factual errors.", "keywords": "factuality;LLM", "primary_area": "", "supplementary_material": "/attachment/c0471818a954d38b54b5293a2e4c7b4cd8becbb2.zip", "author": "Shiqi Chen;Yiran Zhao;Jinghan Zhang;I-Chun Chern;Siyang Gao;Pengfei Liu;Junxian He", "authorids": "~Shiqi_Chen3;~Yiran_Zhao2;~Jinghan_Zhang1;~I-Chun_Chern1;~Siyang_Gao1;~Pengfei_Liu1;~Junxian_He1", "gender": "F;M;F;M;M;M;M", "homepage": ";https://zhaoyiran924.github.io/;https://jinghan23.github.io/;;https://www.cityu.edu.hk/stfprofile/siyangao.htm;http://pfliu.com/;https://jxhe.github.io", "dblp": ";;;284/9591;136/9876;34/3381-3;188/6127.html", "google_scholar": "4Tg7zOMAAAAJ;D_HwSlEAAAAJ;HqF5d38AAAAJ;zmit6DkAAAAJ;NK6nQ9YAAAAJ;oIz_CYEAAAAJ;BIFGeoUAAAAJ", "orcid": ";;0009-0002-1489-6162;;0000-0002-3574-6393;;", "linkedin": ";;;;;;", "or_profile": "~Shiqi_Chen3;~Yiran_Zhao2;~Jinghan_Zhang1;~I-Chun_Chern1;~Siyang_Gao1;~Pengfei_Liu1;~Junxian_He1", "aff": "City University of Hong Kong;National University of Singapore;Southeast University;Carnegie Mellon University;City University of Hong Kong;Carnegie Mellon University;Hong Kong University of Science and Technology", "aff_domain": "cityu.edu.hk;u.nus.edu;seu.edu.cn;andrew.cmu.edu;cityu.edu.hk;cmu.edu;ust.hk", "position": "PhD student;PhD student;Undergrad student;MS student;Associate Professor;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nchen2023felm,\ntitle={{FELM}: Benchmarking Factuality Evaluation of Large Language Models},\nauthor={Shiqi Chen and Yiran Zhao and Jinghan Zhang and I-Chun Chern and Siyang Gao and Pengfei Liu and Junxian He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=jSO7Vgolc6}\n}", "github": "", "project": "", "reviewers": "Bttr;x7TP;hoDi;jaKw;jaFr", "pdf_size": 2524536, "rating": "6;6;6;7;8", "confidence": "4;3;3;4;4", "wc_summary_and_contributions": "55;167;32;91;124", "wc_strengths": "16;47;48;112;94", "wc_improvement": "881;357;117;112;12", "wc_limitations": "6;42;1;7;1", "wc_correctness": "7;47;14;8;1", "wc_clarity": "9;8;17;4;1", "wc_relation_to_prior_work": "25;11;1;14;1", "wc_documentation": "5;9;1;9;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "1005;689;232;358;236", "wc_reply_reviewers": "21;23;0;12;0", "wc_reply_authors": "1963;992;509;406;24", "reply_reviewers": "1;1;0;1;0", "reply_authors": "3;3;1;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 93.8, 48.172191148005716 ], "wc_strengths_avg": [ 63.4, 34.788503848254244 ], "wc_improvement_avg": [ 295.8, 313.8467141774787 ], "wc_limitations_avg": [ 11.4, 15.49967741599805 ], "wc_correctness_avg": [ 15.4, 16.32911510156016 ], "wc_clarity_avg": [ 7.8, 5.418486873657626 ], "wc_relation_to_prior_work_avg": [ 10.4, 8.97997772825746 ], "wc_documentation_avg": [ 5.0, 3.5777087639996634 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 504.0, 300.78231330980884 ], "wc_reply_reviewers_avg": [ 11.2, 9.867117106835208 ], "wc_reply_authors_avg": [ 778.8, 667.7243143693362 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6123724356957947, "gs_citation": 89, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=189532262303458891&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "cityu.edu.hk;u.nus.edu;seu.edu.cn;andrew.cmu.edu;cityu.edu.hk;cmu.edu;ust.hk", "author_num": 7, "aff_unique_index": "0;1;2;3;0;3;4", "aff_unique_norm": "City University of Hong Kong;National University of Singapore;Southeast University;Carnegie Mellon University;Hong Kong University of Science and Technology", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.cityu.edu.hk;https://www.nus.edu.sg;https://www.seu.edu.cn/;https://www.cmu.edu;https://www.ust.hk", "aff_unique_abbr": "CityU;NUS;SEU;CMU;HKUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;2;0;2;0", "aff_country_unique": "China;Singapore;United States" }, { "title": "Spuriosity Rankings: Sorting Data to Measure and Mitigate Biases", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70691", "id": "jSuhnO9QJv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/81cca94f16f20d5548c76c3344b27dea-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jSuhnO9QJv", "openreview": "https://openreview.net/forum?id=jSuhnO9QJv", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70691", "video": "https://nips.cc/virtual/2023/poster/70691", "author_site": "Mazda Moayeri, Wenxiao Wang, Sahil Singla, Soheil Feizi", "tldr": "", "abstract": "We present a simple but effective method to measure and mitigate model biases caused by reliance on spurious cues. Instead of requiring costly changes to one's data or model training, our method better utilizes the data one already has by sorting them. Specifically, we rank images within their classes based on spuriosity (the degree to which common spurious cues are present), proxied via deep neural features of an interpretable network. With spuriosity rankings, it is easy to identify minority subpopulations (i.e. low spuriosity images) and assess model bias as the gap in accuracy between high and low spuriosity images. One can even efficiently remove a model's bias at little cost to accuracy by finetuning its classification head on low spuriosity images, resulting in fairer treatment of samples regardless of spuriosity. We demonstrate our method on ImageNet, annotating $5000$ class-feature dependencies ($630$ of which we find to be spurious) and generating a dataset of $325k$ soft segmentations for these features along the way. Having computed spuriosity rankings via the identified spurious neural features, we assess biases for $89$ diverse models and find that class-wise biases are highly correlated across models. Our results suggest that model bias due to spurious feature reliance is influenced far more by what the model is trained on than how it is trained.", "keywords": "spurious correlations;interpretability;bias;distributional robustness", "primary_area": "", "supplementary_material": "/attachment/fa7eb8210b6ab8b3f0a465d182826b2cd43660ad.zip", "author": "Mazda Moayeri;Wenxiao Wang;Sahil Singla;Soheil Feizi", "authorids": "~Mazda_Moayeri1;~Wenxiao_Wang1;~Sahil_Singla1;~Soheil_Feizi2", "gender": ";M;M;M", "homepage": "https://www.cs.umd.edu/people/mmoayeri;https://wangwenxiao.github.io;https://singlasahil14.github.io/;https://www.cs.umd.edu/~sfeizi/", "dblp": "261/8493;243/5853-2;55/8911-2;57/2132", "google_scholar": "4f4m6O0AAAAJ;hn0u5VgAAAAJ;jjjbOI4AAAAJ;lptAmrMAAAAJ", "orcid": ";;;", "linkedin": ";wenxiaowang/;;", "or_profile": "~Mazda_Moayeri1;~Wenxiao_Wang1;~Sahil_Singla1;~Soheil_Feizi2", "aff": "University of Maryland, College Park;Sony AI;Google;University of Maryland, College Park", "aff_domain": "umd.edu;sony.com;google.com;umd.edu", "position": "PhD student;Intern;Researcher;Associate Professor", "bibtex": "@inproceedings{\nmoayeri2023spuriosity,\ntitle={Spuriosity Rankings: Sorting Data to Measure and Mitigate Biases},\nauthor={Mazda Moayeri and Wenxiao Wang and Sahil Singla and Soheil Feizi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jSuhnO9QJv}\n}", "github": "", "project": "", "reviewers": "y13p;ck2C;4unL;9RXj", "pdf_size": 13106271, "rating": "6;6;6;7", "confidence": "4;4;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;4;2;3", "wc_summary": "168;105;142;46", "wc_strengths": "128;137;156;54", "wc_weaknesses": "89;125;144;45", "wc_questions": "1;74;42;65", "wc_limitations": "7;62;59;61", "wc_review": "393;503;543;271", "wc_reply_reviewers": "0;36;143;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 115.25, 45.82234716816676 ], "wc_strengths_avg": [ 118.75, 38.72579889427718 ], "wc_weaknesses_avg": [ 100.75, 37.764897722620674 ], "wc_questions_avg": [ 45.5, 28.217902119044926 ], "wc_limitations_avg": [ 47.25, 23.263436977368585 ], "wc_review_avg": [ 427.5, 105.73906562855565 ], "wc_reply_reviewers_avg": [ 44.75, 58.59767486854747 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1783446786859474224&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "umd.edu;sony.com;google.com;umd.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Maryland;Sony;Google", "aff_unique_dep": ";Sony AI;Google", "aff_unique_url": "https://www/umd.edu;https://www.sony.com;https://www.google.com", "aff_unique_abbr": "UMD;Sony AI;Google", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "College Park;;Mountain View", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Japan" }, { "title": "SPRING: Studying Papers and Reasoning to play Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70690", "id": "jU9qiRMDtR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/46c2a9a6f2b2be68682013eb1173c801-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jU9qiRMDtR", "openreview": "https://openreview.net/forum?id=jU9qiRMDtR", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70690", "video": "https://nips.cc/virtual/2023/poster/70690", "author_site": "Yue Wu, So Yeon Min, Shrimai Prabhumoye, Yonatan Bisk, Russ Salakhutdinov, Amos Azaria, Tom Mitchell, Yuanzhi Li", "tldr": "", "abstract": "Open-world survival games pose significant challenges for AI algorithms due to their multi-tasking, deep exploration, and goal prioritization requirements. Despite reinforcement learning (RL) being popular for solving games, its high sample complexity limits its effectiveness in complex open-world games like Crafter or Minecraft. We propose a novel approach, SPRING, to read Crafter's original academic paper and use the knowledge learned to reason and play the game through a large language model (LLM).\nPrompted with the LaTeX source as game context and a description of the agent's current observation, our SPRING framework employs a directed acyclic graph (DAG) with game-related questions as nodes and dependencies as edges. We identify the optimal action to take in the environment by traversing the DAG and calculating LLM responses for each node in topological order, with the LLM's answer to final node directly translating to environment actions.\nIn our experiments, we study the quality of in-context \"reasoning\" induced by different forms of prompts under the setting of the Crafter environment. Our experiments suggest that LLMs, when prompted with consistent chain-of-thought, have great potential in completing sophisticated high-level trajectories. Quantitatively, SPRING with GPT-4 outperforms all state-of-the-art RL baselines, trained for 1M steps, without any training. \nFinally, we show the potential of Crafter as a test bed for LLMs. Code at github.com/holmeswww/SPRING", "keywords": "Games;Instruction Manual;Crafter;Open-world games;Large Language Models;Language Models;Zero-shot;In-context prompting", "primary_area": "", "supplementary_material": "", "author": "Yue Wu;So Yeon Min;Shrimai Prabhumoye;Yonatan Bisk;Ruslan Salakhutdinov;Amos Azaria;Tom Mitchell;Yuanzhi Li", "authorids": "~Yue_Wu17;~So_Yeon_Min2;~Shrimai_Prabhumoye1;~Yonatan_Bisk1;~Ruslan_Salakhutdinov1;~Amos_Azaria1;~Tom_Mitchell2;~Yuanzhi_Li1", "gender": "M;F;F;M;Not Specified;M;M;M", "homepage": "https://www.yuewu.ml;;https://www.cs.cmu.edu/~sprabhum/;http://www.YonatanBisk.com;http://azariaa.com;http://www.cs.cmu.edu/~tom;;https://www.cs.cmu.edu/~rsalakhu/", "dblp": "41/5979;78/84;203/8169;38/9282;18/9923;;73/3628;", "google_scholar": "LcrSIhgAAAAJ;dkRTvvcAAAAJ;A6JZ81cAAAAJ;bWoGh8UAAAAJ;https://scholar.google.com.tw/citations?user=sdfKs_sAAAAJ;;;", "orcid": ";;;0000-0002-2111-9081;0000-0002-5057-1309;;;", "linkedin": ";;;yonatanbisk/;;;;", "or_profile": "~Yue_Wu17;~So_Yeon_Min2;~Shrimai_Prabhumoye1;~Yonatan_Bisk1;~Amos_Azaria1;~Tom_Mitchell2;~Yuanzhi_Li1;~Russ_Salakhutdinov1", "aff": "Microsoft Research;Carnegie Mellon University;NVIDIA;Meta;Ariel University;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University", "aff_domain": "research.microsoft.com;andrew.cmu.edu;nvidia.com;meta.com;ariel.ac.il;cs.cmu.edu;andrew.cmu.edu;cs.cmu.edu", "position": "Intern;PhD student;Researcher;Visiting Professor;Associate Professor;Full Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwu2023spring,\ntitle={{SPRING}: Studying Papers and Reasoning to play Games},\nauthor={Yue Wu and So Yeon Min and Shrimai Prabhumoye and Yonatan Bisk and Ruslan Salakhutdinov and Amos Azaria and Tom Mitchell and Yuanzhi Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jU9qiRMDtR}\n}", "github": "", "project": "", "reviewers": "AJEx;QUqN;FDDh;XAmG", "pdf_size": 3248191, "rating": "5;6;7;8", "confidence": "4;3;3;4", "soundness": "2;3;4;4", "novelty": "2;3;3;4", "presentation": "2;3;4;3", "wc_summary": "28;95;197;82", "wc_strengths": "24;77;39;104", "wc_weaknesses": "226;136;46;506", "wc_questions": "9;40;39;82", "wc_limitations": "1;6;1;1", "wc_review": "288;354;322;775", "wc_reply_reviewers": "13;11;0;195", "wc_reply_authors": "32;0;0;134", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 100.5, 61.11669166438903 ], "wc_strengths_avg": [ 61.0, 31.45631892005166 ], "wc_weaknesses_avg": [ 228.5, 172.39127008059313 ], "wc_questions_avg": [ 42.5, 25.985572920372565 ], "wc_limitations_avg": [ 2.25, 2.165063509461097 ], "wc_review_avg": [ 434.75, 197.82489100211833 ], "wc_reply_reviewers_avg": [ 54.75, 81.12451848855561 ], "wc_reply_authors_avg": [ 41.5, 54.97954164959908 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4486300248896724513&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "research.microsoft.com;andrew.cmu.edu;nvidia.com;meta.com;ariel.ac.il;cs.cmu.edu;andrew.cmu.edu;cs.cmu.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;4;1;1;1", "aff_unique_norm": "Microsoft;Carnegie Mellon University;NVIDIA;Meta;Ariel University", "aff_unique_dep": "Microsoft Research;;NVIDIA Corporation;Meta Platforms, Inc.;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.cmu.edu;https://www.nvidia.com;https://meta.com;https://www.ariel.ac.il", "aff_unique_abbr": "MSR;CMU;NVIDIA;Meta;Ariel U", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "United States;Israel" }, { "title": "RAPHAEL: Text-to-Image Generation via Large Mixture of Diffusion Paths", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70689", "id": "jUdZCcoOu3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/821655c7dc4836838cd8524d07f9d6fd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jUdZCcoOu3", "openreview": "https://openreview.net/forum?id=jUdZCcoOu3", "poster": "/media/PosterPDFs/NeurIPS%202023/70689.png?t=1702753189.8115768", "slides": "https://nips.cc/virtual/2023/poster/70689", "video": "https://nips.cc/virtual/2023/poster/70689", "author_site": "Zeyue Xue, Guanglu Song, Qiushan Guo, Boxiao Liu, Zhuofan Zong, Yu Liu, Ping Luo", "tldr": "", "abstract": "Text-to-image generation has recently witnessed remarkable achievements. We introduce a text-conditional image diffusion model, termed RAPHAEL, to generate highly artistic images, which accurately portray the text prompts, encompassing multiple nouns, adjectives, and verbs. This is achieved by stacking tens of mixture-of-experts (MoEs) layers, i.e., space-MoE and time-MoE layers, enabling billions of diffusion paths (routes) from the network input to the output. Each path intuitively functions as a \"painter\" for depicting a particular textual concept onto a specified image region at a diffusion timestep. Comprehensive experiments reveal that RAPHAEL outperforms recent cutting-edge models, such as Stable Diffusion, ERNIE-ViLG 2.0, DeepFloyd, and DALL-E 2, in terms of both image quality and aesthetic appeal. Firstly, RAPHAEL exhibits superior performance in switching images across diverse styles, such as Japanese comics, realism, cyberpunk, and ink illustration. Secondly, a single model with three billion parameters, trained on 1,000 A100 GPUs for two months, achieves a state-of-the-art zero-shot FID score of 6.61 on the COCO dataset. Furthermore, RAPHAEL significantly surpasses its counterparts in human evaluation on the ViLG-300 benchmark. We believe that RAPHAEL holds the potential to propel the frontiers of image generation research in both academia and industry, paving the way for future breakthroughs in this rapidly evolving field. More details can be found on a webpage: https://raphael-painter.github.io/.", "keywords": "Diffusion Model;Text-to-Image Generation", "primary_area": "", "supplementary_material": "/attachment/b67de78331e02c2401e5fb2c118445759343a8be.pdf", "author": "Zeyue Xue;Guanglu Song;Qiushan Guo;Boxiao Liu;Zhuofan Zong;Yu Liu;Ping Luo", "authorids": "~Zeyue_Xue1;~Guanglu_Song2;~Qiushan_Guo1;~Boxiao_Liu1;~Zhuofan_Zong1;~Yu_Liu2;~Ping_Luo2", "gender": ";M;M;M;M;M;", "homepage": ";;https://guoqiushan.github.io/;;https://zongzhuofan.github.io/;http://liuyu.us;", "dblp": ";207/4745;231/1814;188/2274;266/4989;97/2274-15;", "google_scholar": ";Bd3v08QAAAAJ;https://scholar.google.com/citations?hl=zh-CN;-zEM0ycAAAAJ;vls0YhoAAAAJ;;", "orcid": ";;;0000-0002-9792-1361;;;", "linkedin": ";;;;;;", "or_profile": "~Zeyue_Xue1;~Guanglu_Song2;~Qiushan_Guo1;~Boxiao_Liu1;~Zhuofan_Zong1;~Yu_Liu2;~Ping_Luo2", "aff": ";Sensetime;The University of Hong Kong;Sensetime Research;Beihang University;SenseTime;", "aff_domain": ";sensetime.com;hku.hk;sensetime.com;buaa.edu.cn;sensetime.com;", "position": ";Computer Vision Researcher;PhD student;Researcher;MS student;Principal Researcher;", "bibtex": "@inproceedings{\nxue2023raphael,\ntitle={{RAPHAEL}: Text-to-Image Generation via Large Mixture of Diffusion Paths},\nauthor={Zeyue Xue and Guanglu Song and Qiushan Guo and Boxiao Liu and Zhuofan Zong and Yu Liu and Ping Luo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jUdZCcoOu3}\n}", "github": "", "project": "", "reviewers": "fnVX;8jJJ;NLKN;PszJ", "pdf_size": 3756055, "rating": "5;6;7;7", "confidence": "4;4;4;5", "soundness": "3;3;2;4", "novelty": "2;2;2;3", "presentation": "3;3;2;3", "wc_summary": "96;54;102;144", "wc_strengths": "67;40;97;294", "wc_weaknesses": "116;56;627;278", "wc_questions": "139;53;32;52", "wc_limitations": "20;4;136;33", "wc_review": "438;207;994;801", "wc_reply_reviewers": "161;19;605;56", "wc_reply_authors": "26;11;534;21", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.0, 31.89043743820395 ], "wc_strengths_avg": [ 124.5, 99.91621490028533 ], "wc_weaknesses_avg": [ 269.25, 221.9362239473313 ], "wc_questions_avg": [ 69.0, 41.27347816697788 ], "wc_limitations_avg": [ 48.25, 51.69320554966581 ], "wc_review_avg": [ 610.0, 306.5656536535037 ], "wc_reply_reviewers_avg": [ 210.25, 233.78555879266796 ], "wc_reply_authors_avg": [ 148.0, 222.9226323189281 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 157, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6511449396589151082&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";sensetime.com;hku.hk;sensetime.com;buaa.edu.cn;sensetime.com;", "author_num": 7, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "SenseTime;University of Hong Kong;Beihang University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sensetime.com;https://www.hku.hk;http://www.buaa.edu.cn/", "aff_unique_abbr": "SenseTime;HKU;BUAA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Scalable 3D Captioning with Pretrained Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73490", "id": "jUpVFjRdUV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ee4814f9bce0cae7991d3341bb081b55-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=jUpVFjRdUV", "openreview": "https://openreview.net/forum?id=jUpVFjRdUV", "poster": "/media/PosterPDFs/NeurIPS%202023/73490.png?t=1697043906.4281673", "slides": "https://nips.cc/virtual/2023/poster/73490", "video": "https://nips.cc/virtual/2023/poster/73490", "author_site": "Tiange Luo, Chris Rockwell, Honglak Lee, Justin Johnson", "tldr": "", "abstract": "We introduce Cap3D, an automatic approach for generating descriptive text for 3D objects. This approach utilizes pretrained models from image captioning, image-text alignment, and LLM to consolidate captions from multiple views of a 3D asset, completely side-stepping the time-consuming and costly process of manual annotation. We apply Cap3D to the recently introduced large-scale 3D dataset, Objaverse, resulting in 660k 3D-text pairs. Our evaluation, conducted using 41k human annotations from the same dataset, demonstrates that Cap3D surpasses human-authored descriptions in terms of quality, cost, and speed. Through effective prompt engineering, Cap3D rivals human performance in generating geometric descriptions on 17k collected annotations from the ABO dataset. Finally, we finetune Text-to-3D models on Cap3D and human captions, and show Cap3D outperforms; and benchmark the SOTA including Point\u00b7E, Shape\u00b7E, and DreamFusion.", "keywords": "3D Captioning;Text-to-3D;Multi-Modal Learning", "primary_area": "", "supplementary_material": "", "author": "Tiange Luo;Chris Rockwell;Honglak Lee;Justin Johnson", "authorids": "~Tiange_Luo1;~Chris_Rockwell1;~Honglak_Lee2;~Justin_Johnson1", "gender": "M;;M;M", "homepage": "https://tiangeluo.github.io/;https://crockwell.github.io/;http://cs.stanford.edu/people/jcjohns/;http://web.eecs.umich.edu/~honglak", "dblp": "227/2386.html;61/4160-1;04/3396;58/2562", "google_scholar": "https://scholar.google.com/citations?hl=en;pB9hZ_MAAAAJ;mS5k4CYAAAAJ;fmSHtE8AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Tiange_Luo1;~Chris_Rockwell1;~Justin_Johnson1;~Honglak_Lee1", "aff": "Peking University;University of Michigan;Meta Facebook;University of Michigan", "aff_domain": "pku.edu;umich.edu;meta.com;umich.edu", "position": "MS student;PhD student;Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nluo2023scalable,\ntitle={Scalable 3D Captioning with Pretrained Models},\nauthor={Tiange Luo and Chris Rockwell and Honglak Lee and Justin Johnson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=jUpVFjRdUV}\n}", "github": "", "project": "", "reviewers": "bhAA;Dwxs;UuMn;WJz6", "pdf_size": 8590918, "rating": "6;7;7;7", "confidence": "5;4;4;4", "wc_summary_and_contributions": "174;34;122;82", "wc_strengths": "51;50;81;115", "wc_improvement": "132;29;47;194", "wc_limitations": "16;43;132;106", "wc_correctness": "1;9;14;17", "wc_clarity": "1;1;16;20", "wc_relation_to_prior_work": "1;28;19;18", "wc_documentation": "1;1;33;21", "wc_additional_feedback": "1;1;1;1", "wc_review": "378;196;465;574", "wc_reply_reviewers": "0;0;0;199", "wc_reply_authors": "741;384;1994;1912", "reply_reviewers": "0;0;0;3", "reply_authors": "2;2;4;5", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 103.0, 51.487862647424 ], "wc_strengths_avg": [ 74.25, 26.621185172715357 ], "wc_improvement_avg": [ 100.5, 66.53758336459177 ], "wc_limitations_avg": [ 74.25, 46.67105634116288 ], "wc_correctness_avg": [ 10.25, 6.057020719792859 ], "wc_clarity_avg": [ 9.5, 8.616843969807043 ], "wc_relation_to_prior_work_avg": [ 16.5, 9.759610647971568 ], "wc_documentation_avg": [ 14.0, 13.674794331177344 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 403.25, 138.34625943624206 ], "wc_reply_reviewers_avg": [ 49.75, 86.16952767655164 ], "wc_reply_authors_avg": [ 1257.75, 707.2087297962321 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 157, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1117781495679162693&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "pku.edu;umich.edu;meta.com;umich.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Peking University;University of Michigan;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "http://www.pku.edu.cn;https://www.umich.edu;https://meta.com", "aff_unique_abbr": "Peking U;UM;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Beyond Deep Ensembles: A Large-Scale Evaluation of Bayesian Deep Learning under Distribution Shift", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70688", "id": "jX49iKr6vb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5d97b7e62022c859347397f6c1e8d0f9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jX49iKr6vb", "openreview": "https://openreview.net/forum?id=jX49iKr6vb", "poster": "/media/PosterPDFs/NeurIPS%202023/70688.png?t=1701943536.101334", "slides": "https://nips.cc/virtual/2023/poster/70688", "video": "https://nips.cc/virtual/2023/poster/70688", "author_site": "Florian Seligmann, Philipp Becker, Michael Volpp, Gerhard Neumann", "tldr": "", "abstract": "Bayesian deep learning (BDL) is a promising approach to achieve well-calibrated predictions on distribution-shifted data. Nevertheless, there exists no large-scale survey that evaluates recent SOTA methods on diverse, realistic, and challenging benchmark tasks in a systematic manner. To provide a clear picture of the current state of BDL research, we evaluate modern BDL algorithms on real-world datasets from the WILDS collection containing challenging classification and regression tasks, with a focus on generalization capability and calibration under distribution shift. We compare the algorithms on a wide range of large, convolutional and transformer-based neural network architectures. In particular, we investigate a signed version of the expected calibration error that reveals whether the methods are over- or underconfident, providing further insight into the behavior of the methods. Further, we provide the first systematic evaluation of BDL for fine-tuning large pre-trained models, where training from scratch is prohibitively expensive. Finally, given the recent success of Deep Ensembles, we extend popular single-mode posterior approximations to multiple modes by the use of ensembles. While we find that ensembling single-mode approximations generally improves the generalization capability and calibration of the models by a significant margin, we also identify a failure mode of ensembles when finetuning large transformer-based language models.\n In this setting, variational inference based approaches such as last-layer Bayes By Backprop outperform other methods in terms of accuracy by a large margin, while modern approximate inference algorithms such as SWAG achieve the best calibration.", "keywords": "bayesian deep learning;distribution shift;calibration", "primary_area": "", "supplementary_material": "/attachment/db3351b7dc4a2123848a1620e5e20c8b577a5131.pdf", "author": "Florian Seligmann;Philipp Becker;Michael Volpp;Gerhard Neumann", "authorids": "~Florian_Seligmann1;~Philipp_Becker1;~Michael_Volpp1;~Gerhard_Neumann2", "gender": "M;M;M;M", "homepage": "https://github.com/Feuermagier;;;https://alr.anthropomatik.kit.edu/", "dblp": ";66/1316;239/6069;60/4878", "google_scholar": ";https://scholar.google.de/citations?user=jXx-LuQAAAAJ;https://scholar.google.de/citations?user=K4fbAT8AAAAJ;https://scholar.google.com.tw/citations?user=GL360kMAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Florian_Seligmann1;~Philipp_Becker1;~Michael_Volpp1;~Gerhard_Neumann1", "aff": "Karlsruhe Institute of Technology;FZI Forschungszentrum Informatik ;Bosch Center for Artificial Intelligence ;Karlsruhe Institute of Technology", "aff_domain": "kit.edu;fzi.de;bosch.com;kit.edu", "position": "MS student;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nseligmann2023beyond,\ntitle={Beyond Deep Ensembles: A Large-Scale Evaluation of Bayesian Deep Learning under Distribution Shift},\nauthor={Florian Seligmann and Philipp Becker and Michael Volpp and Gerhard Neumann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jX49iKr6vb}\n}", "github": "", "project": "", "reviewers": "eT5S;hcj9;CBPk;1hhR;Sjhm", "pdf_size": 633951, "rating": "4;6;6;7;8", "confidence": "4;4;3;3;4", "soundness": "2;3;3;3;4", "novelty": "1;3;2;2;3", "presentation": "1;4;3;3;4", "wc_summary": "25;85;155;107;65", "wc_strengths": "57;159;54;113;141", "wc_weaknesses": "910;293;307;147;118", "wc_questions": "104;52;346;200;68", "wc_limitations": "6;7;5;1;21", "wc_review": "1102;596;867;568;413", "wc_reply_reviewers": "31;69;28;0;43", "wc_reply_authors": "66;23;23;0;27", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 6.2, 1.32664991614216 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 1.0954451150103321 ], "wc_summary_avg": [ 87.4, 43.25551987897036 ], "wc_strengths_avg": [ 104.8, 42.85043757069465 ], "wc_weaknesses_avg": [ 355.0, 287.6129343405821 ], "wc_questions_avg": [ 154.0, 108.8852607105296 ], "wc_limitations_avg": [ 8.0, 6.81175454637056 ], "wc_review_avg": [ 709.2, 244.81780980966235 ], "wc_reply_reviewers_avg": [ 34.2, 22.39106964841117 ], "wc_reply_authors_avg": [ 27.8, 21.348536249588637 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.18463723646899913, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17172845891162654665&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "kit.edu;fzi.de;bosch.com;kit.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Karlsruhe Institute of Technology;FZI Forschungszentrum Informatik;Bosch Center for Artificial Intelligence", "aff_unique_dep": ";;Center for Artificial Intelligence", "aff_unique_url": "https://www.kit.edu;https://www.fzi.de;https://www.bosch-ai.com", "aff_unique_abbr": "KIT;FZI;BCAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Moral Responsibility for AI Systems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70687", "id": "jYIknUIgkd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0d5b7fd8c669fac58d6702188ed63afa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jYIknUIgkd", "openreview": "https://openreview.net/forum?id=jYIknUIgkd", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70687", "video": "https://nips.cc/virtual/2023/poster/70687", "tldr": "", "abstract": "As more and more decisions that have a significant ethical dimension are being outsourced to AI systems, it is important to have a definition of _moral responsibility_ that can be applied to AI systems. Moral responsibility for an outcome of an agent who performs some action is commonly taken to involve both a _causal condition_ and an _epistemic condition_: the action should cause the outcome, and the agent should have been aware - in some form or other - of the possible moral consequences of their action. This paper presents a formal definition of both conditions within the framework of causal models. I compare my approach to the existing approaches of Braham and van Hees (BvH) and of Halpern and Kleiman-Weiner (HK). I then generalize my definition into a _degree of responsibility_.", "keywords": "responsibility;causation;causal models", "primary_area": "", "supplementary_material": "", "author": "Sander Beckers", "authorids": "~Sander_Beckers1", "gender": "M", "homepage": "http://sanderbeckers.com", "dblp": "37/11297.html", "google_scholar": "", "orcid": "0000-0002-9202-0644", "linkedin": "", "or_profile": "~Sander_Beckers1", "aff": "University of Amsterdam", "aff_domain": "uva.nl", "position": "Postdoc", "bibtex": "@inproceedings{\nbeckers2023moral,\ntitle={Moral Responsibility for {AI} Systems},\nauthor={Sander Beckers},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jYIknUIgkd}\n}", "github": "", "project": "", "reviewers": "7Pui;UYHL;ZjtP;ir7c", "pdf_size": 273229, "rating": "3;6;7;7", "confidence": "3;3;1;2", "soundness": "2;4;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "113;68;72;43", "wc_strengths": "57;44;151;58", "wc_weaknesses": "535;32;14;75", "wc_questions": "74;28;80;17", "wc_limitations": "18;26;1;34", "wc_review": "797;198;318;227", "wc_reply_reviewers": "1442;596;0;0", "wc_reply_authors": "1396;430;0;0", "reply_reviewers": "2;2;0;0", "reply_authors": "3;2;1;1", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.0, 25.10975905897944 ], "wc_strengths_avg": [ 77.5, 42.79310692155922 ], "wc_weaknesses_avg": [ 164.0, 215.34042815969323 ], "wc_questions_avg": [ 49.75, 27.60774347895894 ], "wc_limitations_avg": [ 19.75, 12.214233500306108 ], "wc_review_avg": [ 385.0, 241.9535079307593 ], "wc_reply_reviewers_avg": [ 509.5, 590.8085561330337 ], "wc_reply_authors_avg": [ 456.5, 570.1199435206595 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.6897007348075542, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6317218164450032182&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "uva.nl", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_country_unique_index": "0", "aff_country_unique": "Netherlands" }, { "title": "Design from Policies: Conservative Test-Time Adaptation for Offline Policy Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70686", "id": "jZYf1GxH1V", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/31610e68fe41a62e460e044216a10766-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jZYf1GxH1V", "openreview": "https://openreview.net/forum?id=jZYf1GxH1V", "poster": "/media/PosterPDFs/NeurIPS%202023/70686.png?t=1701913967.9554508", "slides": "https://nips.cc/virtual/2023/poster/70686", "video": "https://nips.cc/virtual/2023/poster/70686", "author_site": "Jinxin Liu, Hongyin Zhang, Zifeng Zhuang, Yachen Kang, Donglin Wang, Bin Wang", "tldr": "", "abstract": "In this work, we decouple the iterative bi-level offline RL (value estimation and policy extraction) from the offline training phase, forming a non-iterative bi-level paradigm and avoiding the iterative error propagation over two levels. Specifically, this non-iterative paradigm allows us to conduct inner-level optimization (value estimation) in training, while performing outer-level optimization (policy extraction) in testing. Naturally, such a paradigm raises three core questions that are not fully answered by prior non-iterative offline RL counterparts like reward-conditioned policy: (q1) What information should we transfer from the inner-level to the outer-level? (q2) What should we pay attention to when exploiting the transferred information for safe/confident outer-level optimization? (q3) What are the benefits of concurrently conducting outer-level optimization during testing? Motivated by model-based optimization (MBO), we propose DROP (design from policies), which fully answers the above questions. Specifically, in the inner-level, DROP decomposes offline data into multiple subsets, and learns an MBO score model (a1). To keep safe exploitation to the score model in the outer-level, we explicitly learn a behavior embedding and introduce a conservative regularization (a2). During testing, we show that DROP permits deployment adaptation, enabling an adaptive inference across states (a3). Empirically, we evaluate DROP on various tasks, showing that DROP gains comparable or better performance compared to prior methods.", "keywords": "offline reinforcement learning;test-time adaptation", "primary_area": "", "supplementary_material": "/attachment/5a02b103d53ef7f89d22b37b8ab646354d9b08f8.zip", "author": "Jinxin Liu;Hongyin Zhang;Zifeng Zhuang;Yachen Kang;Donglin Wang;Bin Wang", "authorids": "~Jinxin_Liu1;~Hongyin_Zhang1;~Zifeng_Zhuang1;~Yachen_Kang1;~Donglin_Wang1;~Bin_Wang12", "gender": ";;M;M;M;M", "homepage": ";;;;https://milab.westlake.edu.cn/;http://binwang.top", "dblp": ";;276/5034;247/6551.html;;13/1898-34", "google_scholar": ";;;LCTdGEcAAAAJ;https://scholar.google.ca/citations?user=-fo6wdwAAAAJ;KWZG_YsAAAAJ", "orcid": ";;;;0000-0002-8188-3735;0000-0002-0267-3749", "linkedin": ";;;;;", "or_profile": "~Jinxin_Liu1;~Hongyin_Zhang1;~Zifeng_Zhuang1;~Yachen_Kang1;~Donglin_Wang1;~Bin_Wang12", "aff": ";;Zhejiang University;Zhejiang University;Westlake University;Huawei Noah's Ark Lab", "aff_domain": ";;zju.edu.cn;zju.edu.cn;westlake.edu.cn;huawei.com", "position": ";;PhD student;PhD student;Associate Professor;Senior Researcher", "bibtex": "@inproceedings{\nliu2023design,\ntitle={Design from Policies: Conservative Test-Time Adaptation for Offline Policy Optimization},\nauthor={Jinxin Liu and Hongyin Zhang and Zifeng Zhuang and Yachen Kang and Donglin Wang and Bin Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jZYf1GxH1V}\n}", "github": "", "project": "", "reviewers": "YuS3;SsTF;7FJj;hhjh", "pdf_size": 1187839, "rating": "3;5;6;7", "confidence": "4;4;3;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "2;2;2;2", "wc_summary": "99;52;81;76", "wc_strengths": "51;93;38;223", "wc_weaknesses": "1304;229;53;544", "wc_questions": "55;163;273;86", "wc_limitations": "15;18;1;26", "wc_review": "1524;555;446;955", "wc_reply_reviewers": "2312;180;692;140", "wc_reply_authors": "2125;419;1054;462", "reply_reviewers": "3;1;3;2", "reply_authors": "5;2;4;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 77.0, 16.777961735562517 ], "wc_strengths_avg": [ 101.25, 73.17231375322227 ], "wc_weaknesses_avg": [ 532.5, 478.89899770202067 ], "wc_questions_avg": [ 144.25, 84.09332613233941 ], "wc_limitations_avg": [ 15.0, 9.027735042633894 ], "wc_review_avg": [ 870.0, 422.4754430733223 ], "wc_reply_reviewers_avg": [ 831.0, 882.3213700234172 ], "wc_reply_authors_avg": [ 1015.0, 688.2307025990631 ], "reply_reviewers_avg": [ 2.25, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.29277002188455997, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15239852686055745245&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";;zju.edu.cn;zju.edu.cn;westlake.edu.cn;huawei.com", "author_num": 6, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Zhejiang University;Westlake University;Huawei", "aff_unique_dep": ";;Noah's Ark Lab", "aff_unique_url": "https://www.zju.edu.cn;https://www.westlake.edu.cn;https://www.huawei.com", "aff_unique_abbr": "ZJU;WU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Generator Born from Classifier", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70685", "id": "jcJVgIFY2r", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a97f0218b49bc17ea3f121a0e724f028-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jcJVgIFY2r", "openreview": "https://openreview.net/forum?id=jcJVgIFY2r", "poster": "/media/PosterPDFs/NeurIPS%202023/70685.png?t=1701741624.2008216", "slides": "https://nips.cc/virtual/2023/poster/70685", "video": "https://nips.cc/virtual/2023/poster/70685", "author_site": "Runpeng Yu, Xinchao Wang", "tldr": "", "abstract": "In this paper, we make a bold attempt toward an ambitious task: given a pre-trained classifier, we aim to reconstruct an image generator, without relying on any data samples. From a black-box perspective, this challenge seems intractable, since it inevitably involves identifying the inverse function for a classifier, which is, by nature, an information extraction process. As such, we resort to leveraging the knowledge encapsulated within the parameters of the neural network. Grounded on the theory of Maximum-Margin Bias of gradient descent, we propose a novel learning paradigm, in which the generator is trained to ensure that the convergence conditions of the network parameters are satisfied over the generated distribution of the samples. Empirical validation from various image generation tasks substantiates the efficacy of our strategy.", "keywords": "Generative Model", "primary_area": "", "supplementary_material": "/attachment/c871a401245235107c2b02aeefadd02edcc257ad.zip", "author": "Runpeng Yu;Xinchao Wang", "authorids": "~Runpeng_Yu2;~Xinchao_Wang1", "gender": ";M", "homepage": "https://yu-rp.github.io/;https://sites.google.com/site/sitexinchaowang/", "dblp": "290/7625-1;", "google_scholar": ";https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~R_Yu1;~Xinchao_WANG3", "aff": "National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyu2023generator,\ntitle={Generator Born from Classifier},\nauthor={Runpeng Yu and Xinchao Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jcJVgIFY2r}\n}", "github": "", "project": "", "reviewers": "KxBC;gUqt;LZcP;5Ctm", "pdf_size": 919088, "rating": "3;6;7;8", "confidence": "4;3;2;4", "soundness": "2;3;2;4", "novelty": "1;3;3;3", "presentation": "1;1;2;3", "wc_summary": "60;94;46;121", "wc_strengths": "25;101;16;117", "wc_weaknesses": "212;233;43;44", "wc_questions": "693;56;54;105", "wc_limitations": "1;6;29;57", "wc_review": "991;490;188;444", "wc_reply_reviewers": "365;161;122;14", "wc_reply_authors": "1110;57;820;0", "reply_reviewers": "1;1;2;1", "reply_authors": "3;2;4;1", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 80.25, 29.294837429144405 ], "wc_strengths_avg": [ 64.75, 44.72345581459465 ], "wc_weaknesses_avg": [ 133.0, 89.80812880803163 ], "wc_questions_avg": [ 227.0, 269.8193840330972 ], "wc_limitations_avg": [ 23.25, 22.16275028059469 ], "wc_review_avg": [ 528.25, 290.8903358655973 ], "wc_reply_reviewers_avg": [ 165.5, 127.14656896668505 ], "wc_reply_authors_avg": [ 496.75, 479.767326419797 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3223291856101521, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7139763476829307929&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "u.nus.edu;nus.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Interaction Measures, Partition Lattices and Kernel Tests for High-Order Interactions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70684", "id": "jcRB6xHdJ2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/74f11936d6144eae43730e1a49365479-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jcRB6xHdJ2", "openreview": "https://openreview.net/forum?id=jcRB6xHdJ2", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70684", "video": "https://nips.cc/virtual/2023/poster/70684", "author_site": "Zhaolu Liu, Robert Peach, Pedro A.M Mediano, Mauricio Barahona", "tldr": "", "abstract": "Models that rely solely on pairwise relationships often fail to capture the complete statistical structure of the complex multivariate data found in diverse domains, such as socio-economic, ecological, or biomedical systems. Non-trivial dependencies between groups of more than two variables can play a significant role in the analysis and modelling of such systems, yet extracting such high-order interactions from data remains challenging. Here, we introduce a hierarchy of $d$-order ($d \\geq 2$) interaction measures, increasingly inclusive of possible factorisations of the joint probability distribution, and define non-parametric, kernel-based tests to establish systematically the statistical significance of $d$-order interactions. We also establish mathematical links with lattice theory, which elucidate the derivation of the interaction measures and their composite permutation tests; clarify the connection of simplicial complexes with kernel matrix centring; and provide a means to enhance computational efficiency. We illustrate our results numerically with validations on synthetic data, and through an application to neuroimaging data.", "keywords": "High-order interactions; Lattice theory; Kernel tests", "primary_area": "", "supplementary_material": "", "author": "Zhaolu Liu;Robert Peach;Pedro A. M. Mediano;Mauricio Barahona", "authorids": "~Zhaolu_Liu1;~Robert_Peach1;~Pedro_A._M._Mediano1;~Mauricio_Barahona1", "gender": "Not Specified;M;Not Specified;Not Specified", "homepage": "https://timliuzhaolu.github.io;;https://www.doc.ic.ac.uk/~pam213;https://www.imperial.ac.uk/people/m.barahona", "dblp": "308/3103;236/4823.html;190/7253;80/8051.html", "google_scholar": "qLUZHCQAAAAJ;;I9-416MAAAAJ;https://scholar.google.co.uk/citations?user=weulBoAAAAAJ", "orcid": "0000-0002-8721-7506;0000-0002-8738-5825;0000-0003-1789-5894;0000-0002-1089-5675", "linkedin": ";;;", "or_profile": "~Zhaolu_Liu1;~Robert_Peach1;~Pedro_A._M._Mediano1;~Mauricio_Barahona1", "aff": "Imperial College London;Bayerische Julius-Maximilians-Universit\u00e4t W\u00fcrzburg;Imperial College London;Imperial College London", "aff_domain": "ic.ac.uk;uni-wuerzburg.de;ic.ac.uk;imperial.ac.uk", "position": "PhD student;Postdoc;Lecturer;Full Professor", "bibtex": "@inproceedings{\nliu2023interaction,\ntitle={Interaction Measures, Partition Lattices and Kernel Tests for High-Order Interactions},\nauthor={Zhaolu Liu and Robert Peach and Pedro A. M. Mediano and Mauricio Barahona},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jcRB6xHdJ2}\n}", "github": "", "project": "", "reviewers": "SriA;6qiu;KEns;vitW", "pdf_size": 6610261, "rating": "5;6;6;8", "confidence": "2;2;2;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "70;79;102;52", "wc_strengths": "41;51;72;45", "wc_weaknesses": "120;58;50;90", "wc_questions": "63;39;5;21", "wc_limitations": "3;8;14;15", "wc_review": "297;235;243;223", "wc_reply_reviewers": "154;0;0;71", "wc_reply_authors": "723;0;0;514", "reply_reviewers": "2;0;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.75, 18.005207580030838 ], "wc_strengths_avg": [ 52.25, 11.94518731540029 ], "wc_weaknesses_avg": [ 79.5, 27.76238462380348 ], "wc_questions_avg": [ 32.0, 21.563858652847824 ], "wc_limitations_avg": [ 10.0, 4.847679857416329 ], "wc_review_avg": [ 249.5, 28.33284313301438 ], "wc_reply_reviewers_avg": [ 56.25, 63.44436539205038 ], "wc_reply_authors_avg": [ 309.25, 317.95548037421844 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15023847006596309741&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ic.ac.uk;uni-wuerzburg.de;ic.ac.uk;imperial.ac.uk", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Imperial College London;University of W\u00fcrzburg", "aff_unique_dep": ";", "aff_unique_url": "https://www.imperial.ac.uk;https://www.uni-wuerzburg.de", "aff_unique_abbr": "ICL;JMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";W\u00fcrzburg", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "MKOR: Momentum-Enabled Kronecker-Factor-Based Optimizer Using Rank-1 Updates", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70683", "id": "jcnvDO96N5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/39bc6e3cbf5a1991d33dc10ebff9a9cf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jcnvDO96N5", "openreview": "https://openreview.net/forum?id=jcnvDO96N5", "poster": "/media/PosterPDFs/NeurIPS%202023/70683.png?t=1701380323.9763834", "slides": "https://nips.cc/virtual/2023/poster/70683", "video": "https://nips.cc/virtual/2023/poster/70683", "author_site": "Mohammad Mozaffari, Sikan Li, Zhao Zhang, Maryam Mehri Dehnavi", "tldr": "", "abstract": "This work proposes a Momentum-Enabled Kronecker-Factor-Based Optimizer Using Rank-1 updates, called MKOR, that improves the training time and convergence properties of deep neural networks (DNNs). Second-order techniques, while enjoying higher convergence rates vs first-order counterparts, have cubic complexity with respect to either the model size and/or the training batch size. Hence they exhibit poor scalability and performance in transformer models, e.g. large language models (LLMs), because the batch sizes in these models scale by the attention mechanism sequence length, leading to large model size and batch sizes. MKOR's complexity is quadratic with respect to the model size, alleviating the computation bottlenecks in second-order methods. Because of their high computation complexity, state-of-the-art implementations of second-order methods can only afford to update the second order information infrequently, and thus do not fully exploit the promise of better convergence from these updates. By reducing the communication complexity of the second-order updates as well as achieving a linear communication complexity, MKOR increases the frequency of second order updates. We also propose a hybrid version of MKOR (called MKOR-H) that mid-training falls backs to a first order optimizer if the second order updates no longer accelerate convergence. Our experiments show that MKOR outperforms state -of-the-art first order methods, e.g. the LAMB optimizer, and best implementations of second-order methods, i.e. KAISA/KFAC, up to 2.57x and 1.85x respectively on BERT-Large-Uncased on 64 GPUs.", "keywords": "machine learning;deep learning;optimizers;distributed training;second-order optimization;", "primary_area": "", "supplementary_material": "/attachment/2b8ce94cac270f7afcfbcd31dcbb0b24fc3dc5b9.pdf", "author": "Mohammad Mozaffari;Sikan Li;Zhao Zhang;Maryam Mehri Dehnavi", "authorids": "~Mohammad_Mozaffari1;sli@tacc.utexas.edu;~Zhao_Zhang1;~Maryam_Mehri_Dehnavi2", "gender": "M;;;F", "homepage": "https://www.cs.toronto.edu/~mmozaffari/;;https://zhaozhang.github.io/;https://www.cs.toronto.edu/~mmehride/", "dblp": ";;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;0bd5fscAAAAJ;", "orcid": "0009-0002-4319-2324;;0000-0001-5921-0035;", "linkedin": "mohammad-mozaffari-7804b7187/;;;", "or_profile": "~Mohammad_Mozaffari1;sli@tacc.utexas.edu;~Zhao_Zhang1;~Maryam_Mehri_Dehnavi2", "aff": "University of Toronto;;Texas Advanced Computing Center;Department of Computer Science", "aff_domain": "utoronto.ca;;utexas.edu;cs.toronto.edu", "position": "PhD student;;Researcher;Associate Professor", "bibtex": "@inproceedings{\nmozaffari2023mkor,\ntitle={{MKOR}: Momentum-Enabled Kronecker-Factor-Based Optimizer Using Rank-1 Updates},\nauthor={Mohammad Mozaffari and Sikan Li and Zhao Zhang and Maryam Mehri Dehnavi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jcnvDO96N5}\n}", "github": "", "project": "", "reviewers": "dQpV;KDBs;zw2W;Ar2i", "pdf_size": 749672, "rating": "5;5;7;7", "confidence": "4;4;4;4", "soundness": "2;2;3;4", "novelty": "2;2;3;3", "presentation": "2;2;3;4", "wc_summary": "76;80;56;72", "wc_strengths": "36;56;33;100", "wc_weaknesses": "113;57;28;63", "wc_questions": "1;322;84;4", "wc_limitations": "1;2;14;4", "wc_review": "227;517;215;243", "wc_reply_reviewers": "22;39;16;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 71.0, 9.1104335791443 ], "wc_strengths_avg": [ 56.25, 26.76167969317322 ], "wc_weaknesses_avg": [ 65.25, 30.58083550199373 ], "wc_questions_avg": [ 102.75, 130.88807241303542 ], "wc_limitations_avg": [ 5.25, 5.165994579942956 ], "wc_review_avg": [ 300.5, 125.39039038140044 ], "wc_reply_reviewers_avg": [ 19.25, 13.953046262375826 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2642606276805884331&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "utoronto.ca;;utexas.edu;cs.toronto.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Toronto;University of Texas at Austin;Unknown Institution", "aff_unique_dep": ";Texas Advanced Computing Center;Department of Computer Science", "aff_unique_url": "https://www.utoronto.ca;https://www.tacc.utexas.edu;", "aff_unique_abbr": "U of T;TACC;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;United States;" }, { "title": "From ViT Features to Training-free Video Object Segmentation via Streaming-data Mixture Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70682", "id": "jfsjKBDB1z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/240cc9ac4789351653d13cfcba4ee85c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jfsjKBDB1z", "openreview": "https://openreview.net/forum?id=jfsjKBDB1z", "poster": "/media/PosterPDFs/NeurIPS%202023/70682.png?t=1701199533.9377942", "slides": "https://nips.cc/virtual/2023/poster/70682", "video": "https://nips.cc/virtual/2023/poster/70682", "author_site": "Roy Uziel, Or Dinari, Oren Freifeld", "tldr": "", "abstract": "In the task of semi-supervised video object segmentation, the input is the binary mask of an object in the first frame, and the desired output consists of the corresponding masks of that object in the subsequent frames. Existing leading solutions have two main drawbacks: 1) an expensive and typically-supervised training on videos; 2) a large memory footprint during inference. Here we present a training-free solution, with a low-memory footprint, that yields state-of-the-art results. The proposed method combines pre-trained deep learning-based features (trained on still images) with more classical methods for streaming-data clustering. Designed to adapt to temporal concept drifts and generalize to diverse video content without relying on annotated images or videos, the method eliminates the need for additional training or fine-tuning, ensuring fast inference and immediate applicability to new videos. Concretely, we represent an object via a dynamic ensemble of temporally- and spatially-coherent mixtures over a representation built from pre-trained ViT features and positional embeddings. A convolutional conditional random field further improves spatial coherence and helps reject outliers. We demonstrate the efficacy of the method on key benchmarks: the DAVIS-2017 and YouTube-VOS 2018 validation datasets. Moreover, by the virtue of the low-memory footprint of the compact cluster-based representation, the method scales gracefully to high-resolution ViT features. Our code is available at https://github.com/BGU-CS-VIL/Training-Free-VOS", "keywords": "Unsupervised;video segmentation;clustering", "primary_area": "", "supplementary_material": "/attachment/64d3a6d2603b65b2c72c7f298d9fe2180b7ad800.zip", "author": "Roy Uziel;Or Dinari;Oren Freifeld", "authorids": "~Roy_Uziel1;~Or_Dinari1;~Oren_Freifeld1", "gender": ";M;M", "homepage": "https://uzielroy.wixsite.com/uzielroy;;https://www.cs.bgu.ac.il/~orenfr/", "dblp": "259/5174;244/3676;96/5159", "google_scholar": "WScNlk4AAAAJ;D1zF9dwAAAAJ;fxzlm6IAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Roy_Uziel1;~Or_Dinari1;~Oren_Freifeld1", "aff": "Ben Gurion University of the Negev;Apple;Ben-Gurion University", "aff_domain": "bgu.ac.il;apple.com;bgu.ac.il", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nuziel2023from,\ntitle={From ViT Features to Training-free Video Object Segmentation via Streaming-data Mixture Models},\nauthor={Roy Uziel and Or Dinari and Oren Freifeld},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jfsjKBDB1z}\n}", "github": "", "project": "", "reviewers": "5uUQ;sD4x;RHPb;XSWn;He22", "pdf_size": 7747280, "rating": "4;5;5;6;7", "confidence": "4;5;3;4;3", "soundness": "3;3;4;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;4;3;3", "wc_summary": "41;49;43;96;49", "wc_strengths": "47;38;37;76;53", "wc_weaknesses": "18;191;98;158;162", "wc_questions": "140;50;46;4;49", "wc_limitations": "3;8;21;9;32", "wc_review": "249;336;245;343;345", "wc_reply_reviewers": "63;9;0;11;11", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 55.6, 20.450916849862747 ], "wc_strengths_avg": [ 50.2, 14.190137420053409 ], "wc_weaknesses_avg": [ 125.4, 61.62986289129646 ], "wc_questions_avg": [ 57.8, 44.561867106305144 ], "wc_limitations_avg": [ 14.6, 10.518555033843764 ], "wc_review_avg": [ 303.6, 46.327529612531684 ], "wc_reply_reviewers_avg": [ 18.8, 22.471315048301026 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4193139346887674, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1541722178820770378&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "bgu.ac.il;apple.com;bgu.ac.il", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Ben Gurion University of the Negev;Apple;Ben-Gurion University of the Negev", "aff_unique_dep": ";Apple Inc.;", "aff_unique_url": "https://www.bgu.ac.il;https://www.apple.com;https://www.bgu.ac.il", "aff_unique_abbr": "BGU;Apple;BGU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Israel;United States" }, { "title": "A Toolkit for Reliable Benchmarking and Research in Multi-Objective Reinforcement Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73489", "id": "jfwRLudQyj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4aa8891583f07ae200ba07843954caeb-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=jfwRLudQyj", "openreview": "https://openreview.net/forum?id=jfwRLudQyj", "poster": "/media/PosterPDFs/NeurIPS%202023/73489.png?t=1701436834.400159", "slides": "https://nips.cc/virtual/2023/poster/73489", "video": "https://nips.cc/virtual/2023/poster/73489", "author_site": "Florian Felten, Lucas N. Alegre, Ann Nowe, Ana Bazzan, El Ghazali Talbi, Gr\u00e9goire Danoy, Bruno C. da Silva", "tldr": "", "abstract": "Multi-objective reinforcement learning algorithms (MORL) extend standard reinforcement learning (RL) to scenarios where agents must optimize multiple---potentially conflicting---objectives, each represented by a distinct reward function. To facilitate and accelerate research and benchmarking in multi-objective RL problems, we introduce a comprehensive collection of software libraries that includes: \n(i) MO-Gymnasium, an easy-to-use and flexible API enabling the rapid construction of novel MORL environments. It also includes more than 20 environments under this API. This allows researchers to effortlessly evaluate any algorithms on any existing domains; (ii) MORL-Baselines, a collection of reliable and efficient implementations of state-of-the-art MORL algorithms, designed to provide a solid foundation for advancing research. Notably, all algorithms are inherently compatible with MO-Gymnasium; and\n(iii) a thorough and robust set of benchmark results and comparisons of MORL-Baselines algorithms, tested across various challenging MO-Gymnasium environments. These benchmarks were constructed to serve as guidelines for the research community, underscoring the properties, advantages, and limitations of each particular state-of-the-art method.", "keywords": "Multi-Objective;Reinforcement Learning;Benchmarking", "primary_area": "", "supplementary_material": "", "author": "Florian Felten;Lucas Nunes Alegre;Ann Nowe;Ana L. C. Bazzan;El Ghazali Talbi;Gr\u00e9goire Danoy;Bruno Castro da Silva", "authorids": "~Florian_Felten1;~Lucas_Nunes_Alegre1;~Ann_Nowe1;~Ana_L._C._Bazzan2;~El_Ghazali_Talbi1;~Gr\u00e9goire_Danoy1;~Bruno_Castro_da_Silva1", "gender": "M;M;F;;M;M;M", "homepage": "https://ffelten.github.io/;http://www.inf.ufrgs.br/~lnalegre;https://ai.vub.ac.be/team/ann-nowe/?utm_source=www.google.com&utm_medium=organic&utm_campaign=Google&referrer-analytics=1;http://www.inf.ufrgs.br/~bazzan;https://pro.univ-lille.fr/el-ghazali-talbi;https://www.uni.lu/fstm-en/people/gregoire-danoy/;https://people.cs.umass.edu/~bsilva/", "dblp": "315/7185;250/5118;95/232.html;b/AnaLCBazzan;74/3045.html;76/5031.html;75/3139", "google_scholar": "fqzUV0AAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.be/citations?user=LH5QKbgAAAAJ;K6Z40w0AAAAJ;hoQmzocAAAAJ;https://scholar.google.com/citations?hl=fr;eskJDVUAAAAJ", "orcid": ";0000-0001-5465-4390;;0000-0002-2803-9607;;0000-0001-9419-4210;", "linkedin": "florian-felten/;lucas-alegre-b80628127;;;;gr\u00e9goire-danoy-927986/;", "or_profile": "~Florian_Felten1;~Lucas_Nunes_Alegre1;~Ann_Nowe1;~Ana_L._C._Bazzan2;~El_Ghazali_Talbi1;~Gr\u00e9goire_Danoy1;~Bruno_Castro_da_Silva1", "aff": "University of Luxemburg;Vrije Universiteit Brussel;Vrije Universiteit Brussel;UFRGS;University of Lille;University of Luxemburg;University of Massachusetts, Amherst", "aff_domain": "uni.lu;vub.be;vub.be;inf.ufrgs.br;univ-lille.fr;uni.lu;umass.edu", "position": "PhD student;PhD student;Full Professor;Full Professor;Full Professor;Lecturer;Assistant Professor", "bibtex": "@inproceedings{\nfelten2023a,\ntitle={A Toolkit for Reliable Benchmarking and Research in Multi-Objective Reinforcement Learning},\nauthor={Florian Felten and Lucas Nunes Alegre and Ann Nowe and Ana L. C. Bazzan and El Ghazali Talbi and Gr{\\'e}goire Danoy and Bruno Castro da Silva},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=jfwRLudQyj}\n}", "github": "", "project": "", "reviewers": "humL;JBtw;LKgu;WQpx;QTsf;SyY8", "pdf_size": 33519948, "rating": "6;7;7;7;7;7", "confidence": "4;3;3;4;4;3", "wc_summary_and_contributions": "34;31;51;71;55;50", "wc_strengths": "33;95;136;126;26;50", "wc_improvement": "19;179;56;95;82;70", "wc_limitations": "26;21;16;12;10;17", "wc_correctness": "8;11;94;10;1;1", "wc_clarity": "4;6;46;11;5;1", "wc_relation_to_prior_work": "33;8;19;17;2;1", "wc_documentation": "9;7;43;7;11;1", "wc_additional_feedback": "1;1;1;1;1;1", "wc_review": "167;359;462;350;193;192", "wc_reply_reviewers": "13;10;13;13;5;13", "wc_reply_authors": "194;330;348;243;188;728", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.833333333333333, 0.372677996249965 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 48.666666666666664, 13.374935098492585 ], "wc_strengths_avg": [ 77.66666666666667, 43.72133981885226 ], "wc_improvement_avg": [ 83.5, 48.91063279083598 ], "wc_limitations_avg": [ 17.0, 5.354126134736337 ], "wc_correctness_avg": [ 20.833333333333332, 32.96167808565308 ], "wc_clarity_avg": [ 12.166666666666666, 15.420945352200544 ], "wc_relation_to_prior_work_avg": [ 13.333333333333334, 11.115554667022044 ], "wc_documentation_avg": [ 13.0, 13.759844960366863 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 287.1666666666667, 109.57404903635816 ], "wc_reply_reviewers_avg": [ 11.166666666666666, 2.9674156357941426 ], "wc_reply_authors_avg": [ 338.5, 184.62732733807312 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.44721359549995787, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9400648890794899818&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "uni.lu;vub.be;vub.be;inf.ufrgs.br;univ-lille.fr;uni.lu;umass.edu", "author_num": 7, "aff_unique_index": "0;1;1;2;3;0;4", "aff_unique_norm": "University of Luxembourg;Vrije Universiteit Brussel;Universidade Federal do Rio Grande do Sul;University of Lille;University of Massachusetts Amherst", "aff_unique_dep": ";;;;", "aff_unique_url": "https://wwwen.uniluxembourg.lu;https://www.vub.be;https://www.ufrgs.br;https://www.univ-lille.fr;https://www.umass.edu", "aff_unique_abbr": "Uni Lu;VUB;UFRGS;ULille;UMass Amherst", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Brussels;Amherst", "aff_country_unique_index": "0;1;1;2;3;0;4", "aff_country_unique": "Luxembourg;Belgium;Brazil;France;United States" }, { "title": "Debiasing Scores and Prompts of 2D Diffusion for View-consistent Text-to-3D Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70681", "id": "jgIrJeHHlz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/27725882a88f202e07319abbb3be7693-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jgIrJeHHlz", "openreview": "https://openreview.net/forum?id=jgIrJeHHlz", "poster": "/media/PosterPDFs/NeurIPS%202023/70681.png?t=1702452940.8671184", "slides": "https://nips.cc/virtual/2023/poster/70681", "video": "https://nips.cc/virtual/2023/poster/70681", "author_site": "Susung Hong, Donghoon Ahn, Seungryong Kim", "tldr": "", "abstract": "Existing score-distilling text-to-3D generation techniques, despite their considerable promise, often encounter the view inconsistency problem. One of the most notable issues is the Janus problem, where the most canonical view of an object (\\textit{e.g}., face or head) appears in other views. In this work, we explore existing frameworks for score-distilling text-to-3D generation and identify the main causes of the view inconsistency problem---the embedded bias of 2D diffusion models. Based on these findings, we propose two approaches to debias the score-distillation frameworks for view-consistent text-to-3D generation. Our first approach, called score debiasing, involves cutting off the score estimated by 2D diffusion models and gradually increasing the truncation value throughout the optimization process. Our second approach, called prompt debiasing, identifies conflicting words between user prompts and view prompts using a language model, and adjusts the discrepancy between view prompts and the viewing direction of an object. Our experimental results show that our methods improve the realism of the generated 3D objects by significantly reducing artifacts and achieve a good trade-off between faithfulness to the 2D diffusion models and 3D consistency with little overhead. Our project page is available at~\\url{https://susunghong.github.io/Debiased-Score-Distillation-Sampling/}.", "keywords": "Text-to-3D;Diffusion Models", "primary_area": "", "supplementary_material": "/attachment/cc2e8bd2f51ca1c6209ed86dbcba74b759def060.pdf", "author": "Susung Hong;Donghoon Ahn;Seungryong Kim", "authorids": "~Susung_Hong1;~Donghoon_Ahn1;~Seungryong_Kim1", "gender": "M;M;M", "homepage": "https://susunghong.github.io/;https://sunovivid.github.io/;https://cvlab.korea.ac.kr/members/faculty", "dblp": "330/5127;211/5274;141/9955", "google_scholar": "HigIHvUAAAAJ;b_m86AoAAAAJ;cIK1hS8AAAAJ", "orcid": ";0009-0007-2602-6689;", "linkedin": ";donghoon-ahn-622290246/;", "or_profile": "~Susung_Hong1;~Donghoon_Ahn1;~Seungryong_Kim1", "aff": "Korea University;Korea University;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr", "position": "Undergrad student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nhong2023debiasing,\ntitle={Debiasing Scores and Prompts of 2D Diffusion for View-consistent Text-to-3D Generation},\nauthor={Susung Hong and Donghoon Ahn and Seungryong Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jgIrJeHHlz}\n}", "github": "", "project": "", "reviewers": "nz3q;9gp3;gQ7C;8eVk", "pdf_size": 19537178, "rating": "5;6;6;6", "confidence": "5;5;3;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "52;82;158;103", "wc_strengths": "83;24;46;93", "wc_weaknesses": "94;10;27;224", "wc_questions": "2;138;29;21", "wc_limitations": "2;63;15;13", "wc_review": "233;317;275;454", "wc_reply_reviewers": "16;48;16;46", "wc_reply_authors": "16;24;14;24", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 98.75, 38.71288545174591 ], "wc_strengths_avg": [ 61.5, 27.84331158465171 ], "wc_weaknesses_avg": [ 88.75, 84.164645190246 ], "wc_questions_avg": [ 47.5, 53.16248677404021 ], "wc_limitations_avg": [ 23.25, 23.47738273317535 ], "wc_review_avg": [ 319.75, 83.00414146294148 ], "wc_reply_reviewers_avg": [ 31.5, 15.5161206491829 ], "wc_reply_authors_avg": [ 19.5, 4.55521678957215 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7711602318761469491&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "korea.ac.kr;korea.ac.kr;korea.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Finite-Time Analysis of Single-Timescale Actor-Critic", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70680", "id": "jh3UNSQK0l", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/160adf2dc118a920e7858484b92a37d8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jh3UNSQK0l", "openreview": "https://openreview.net/forum?id=jh3UNSQK0l", "poster": "/media/PosterPDFs/NeurIPS%202023/70680.png?t=1699596657.4295902", "slides": "https://nips.cc/virtual/2023/poster/70680", "video": "https://nips.cc/virtual/2023/poster/70680", "author_site": "Xuyang Chen, Lin Zhao", "tldr": "", "abstract": "Actor-critic methods have achieved significant success in many challenging applications. However, its finite-time convergence is still poorly understood in the most practical single-timescale form. Existing works on analyzing single-timescale actor-critic have been limited to i.i.d. sampling or tabular setting for simplicity. We investigate the more practical online single-timescale actor-critic algorithm on continuous state space, where the critic assumes linear function approximation and updates with a single Markovian sample per actor step. Previous analysis has been unable to establish the convergence for such a challenging scenario. We demonstrate that the online single-timescale actor-critic method provably finds an $\\epsilon$-approximate stationary point with $\\widetilde{\\mathcal{O}}(\\epsilon^{-2})$ sample complexity under standard assumptions, which can be further improved to $\\mathcal{O}(\\epsilon^{-2})$ under the i.i.d. sampling. Our novel framework systematically evaluates and controls the error propagation between the actor and critic. It offers a promising approach for analyzing other single-timescale reinforcement learning algorithms as well.", "keywords": "Finite-time analysis;single-timescale actor-critic", "primary_area": "", "supplementary_material": "/attachment/d12c4a7cdbb44fab74751c9309948a0819fa3165.pdf", "author": "Xuyang Chen;Lin Zhao", "authorids": "~Xuyang_Chen1;~Lin_Zhao3", "gender": "M;M", "homepage": ";https://sites.google.com/view/lzhao", "dblp": ";", "google_scholar": "n7GqLNQAAAAJ;091lFhYAAAAJ", "orcid": ";0000-0002-1078-887X", "linkedin": ";", "or_profile": "~Xuyang_Chen1;~Lin_Zhao3", "aff": "National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu.sg", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchen2023finitetime,\ntitle={Finite-Time Analysis of Single-Timescale Actor-Critic},\nauthor={Xuyang Chen and Lin Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jh3UNSQK0l}\n}", "github": "", "project": "", "reviewers": "JDcB;2T31;9Zts;fvvv;bt4a", "pdf_size": 397980, "rating": "6;6;6;6;6", "confidence": "4;3;4;4;4", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "81;43;24;164;11", "wc_strengths": "37;70;38;149;24", "wc_weaknesses": "100;174;52;499;184", "wc_questions": "183;76;8;57;2", "wc_limitations": "1;24;1;18;2", "wc_review": "402;387;123;887;223", "wc_reply_reviewers": "82;35;0;262;182", "wc_reply_authors": "154;6;0;781;59", "reply_reviewers": "1;1;0;3;2", "reply_authors": "2;2;1;4;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 64.6, 55.02217734695711 ], "wc_strengths_avg": [ 63.6, 45.31048443793114 ], "wc_weaknesses_avg": [ 201.8, 156.34628233507826 ], "wc_questions_avg": [ 65.2, 65.30972362519995 ], "wc_limitations_avg": [ 9.2, 9.826494797230598 ], "wc_review_avg": [ 404.4, 262.79391164941393 ], "wc_reply_reviewers_avg": [ 112.2, 96.71897435353624 ], "wc_reply_authors_avg": [ 200.0, 295.7005241794475 ], "reply_reviewers_avg": [ 1.4, 1.019803902718557 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12248482281077201202&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "u.nus.edu;nus.edu.sg", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Adaptive Online Replanning with Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70679", "id": "jhs8F63xI6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/893a5db6100028ec814cfd99fe92c31b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jhs8F63xI6", "openreview": "https://openreview.net/forum?id=jhs8F63xI6", "poster": "/media/PosterPDFs/NeurIPS%202023/70679.png?t=1701779250.4193563", "slides": "https://nips.cc/virtual/2023/poster/70679", "video": "https://nips.cc/virtual/2023/poster/70679", "author_site": "Siyuan Zhou, Yilun Du, Shun Zhang, Mengdi Xu, Yikang Shen, Wei Xiao, Dit-Yan Yeung, Chuang Gan", "tldr": "", "abstract": "Diffusion models have risen a promising approach to data-driven planning, and have demonstrated impressive robotic control, reinforcement learning, and video planning performance. Given an effective planner, an important question to consider is replanning -- when given plans should be regenerated due to both action execution error and external environment changes. Direct plan execution, without replanning, is problematic as errors from individual actions rapidly accumulate and environments are partially observable and stochastic. Simultaneously, replanning at each timestep incurs a substantial computational cost, and may prevent successful task execution, as different generated plans prevent consistent progress to any particular goal. In this paper, we explore how we may effectively replan with diffusion models. We propose a principled approach to determine when to replan, based on the diffusion model's estimated likelihood of existing generated plans. We further present an approach to replan existing trajectories to ensure that new plans follow the same goal state as the original trajectory, which may efficiently bootstrap off previously generated plans. We illustrate how a combination of our proposed additions significantly improves the performance of diffusion planners leading to 38\\% gains over past diffusion planning approaches on Maze2D and further enables handling of stochastic and long-horizon robotic control tasks.", "keywords": "Decision making;Robotics;Planning-based", "primary_area": "", "supplementary_material": "/attachment/64d4092143e24a116339eb9095b15fc8b2df2a73.pdf", "author": "Siyuan Zhou;Yilun Du;Shun Zhang;Mengdi Xu;Yikang Shen;Wei Xiao;Dit-Yan Yeung;Chuang Gan", "authorids": "~Siyuan_Zhou2;~Yilun_Du1;~Shun_Zhang6;~Mengdi_Xu3;~Yikang_Shen1;~Wei_Xiao2;~Dit-Yan_Yeung2;~Chuang_Gan1", "gender": ";;;F;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=WjUmtm0AAAAJ&hl=zh-CN;https://yilundu.github.io;https://shunzh.github.io/;https://mxu34.github.io/;;;https://cse.hkust.edu.hk/faculty/dyyeung/;http://people.csail.mit.edu/ganchuang/", "dblp": ";204/4379;;;152/8226;20/4794-3;41/5668;139/6993", "google_scholar": "WjUmtm0AAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;qff5rRYAAAAJ;BxdZJNQAAAAJ;nEsOOx8AAAAJ;PTeSCbIAAAAJ", "orcid": ";;;0000-0001-9332-4175;;;0000-0003-3716-8125;", "linkedin": ";;;;;;;", "or_profile": "~Siyuan_Zhou2;~Yilun_Du1;~Shun_Zhang6;~Mengdi_Xu3;~Yikang_Shen1;~Wei_Xiao2;~Dit-Yan_Yeung2;~Chuang_Gan1", "aff": "Hong Kong University of Science and Technology;Massachusetts Institute of Technology;MIT-IBM Watson AI Lab;Carnegie Mellon University;International Business Machines;Massachusetts Institute of Technology;Hong Kong University of Science and Technology;MIT-IBM Watson AI Lab", "aff_domain": "hkust.edu;mit.edu;ibm.com;cmu.edu;ibm.com;mit.edu;ust.hk;ibm.com", "position": "PhD student;PhD student;Researcher;PhD student;Researcher;Postdoc;Chair Professor;PhD student", "bibtex": "@inproceedings{\nzhou2023adaptive,\ntitle={Adaptive Online Replanning with Diffusion Models},\nauthor={Siyuan Zhou and Yilun Du and Shun Zhang and Mengdi Xu and Yikang Shen and Wei Xiao and Dit-Yan Yeung and Chuang Gan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jhs8F63xI6}\n}", "github": "", "project": "", "reviewers": "bzBf;rt81;kWoD;XopQ;G16t", "pdf_size": 1263861, "rating": "5;5;5;6;6", "confidence": "4;4;4;4;4", "soundness": "2;3;3;3;3", "novelty": "2;2;3;2;3", "presentation": "1;3;3;3;3", "wc_summary": "134;85;34;28;108", "wc_strengths": "155;51;43;71;113", "wc_weaknesses": "961;223;119;228;308", "wc_questions": "44;71;101;42;67", "wc_limitations": "9;6;1;27;36", "wc_review": "1303;436;298;396;632", "wc_reply_reviewers": "25;85;0;147;87", "wc_reply_authors": "194;63;63;64;259", "reply_reviewers": "1;1;0;1;2", "reply_authors": "5;2;2;2;3", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 77.8, 41.28147284194207 ], "wc_strengths_avg": [ 86.6, 41.92183202103649 ], "wc_weaknesses_avg": [ 367.8, 302.61354893659336 ], "wc_questions_avg": [ 65.0, 21.475567512873788 ], "wc_limitations_avg": [ 15.8, 13.377593206552515 ], "wc_review_avg": [ 613.0, 361.69158132309354 ], "wc_reply_reviewers_avg": [ 68.8, 51.69293955657775 ], "wc_reply_authors_avg": [ 128.6, 82.53629504648242 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.8, 1.16619037896906 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4541690294214833114&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "hkust.edu;mit.edu;ibm.com;cmu.edu;ibm.com;mit.edu;ust.hk;ibm.com", "author_num": 8, "aff_unique_index": "0;1;1;2;3;1;0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Massachusetts Institute of Technology;Carnegie Mellon University;International Business Machines Corporation", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ust.hk;https://web.mit.edu;https://www.cmu.edu;https://www.ibm.com", "aff_unique_abbr": "HKUST;MIT;CMU;IBM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;1;1;1;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Statistical Guarantees for Variational Autoencoders using PAC-Bayesian Theory", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70678", "id": "jkPDRHff3s", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b29500824d22ee9bbd25e4cd97c49b55-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jkPDRHff3s", "openreview": "https://openreview.net/forum?id=jkPDRHff3s", "poster": "/media/PosterPDFs/NeurIPS%202023/70678.png?t=1701985569.1143532", "slides": "https://nips.cc/virtual/2023/poster/70678", "video": "https://nips.cc/virtual/2023/poster/70678", "author_site": "Sokhna Diarra Mbacke, Florence Clerc, Pascal Germain", "tldr": "", "abstract": "Since their inception, Variational Autoencoders (VAEs) have become central in machine learning. Despite their widespread use, numerous questions regarding their theoretical properties remain open. Using PAC-Bayesian theory, this work develops statistical guarantees for VAEs. First, we derive the first PAC-Bayesian bound for posterior distributions conditioned on individual samples from the data-generating distribution. Then, we utilize this result to develop generalization guarantees for the VAE's reconstruction loss, as well as upper bounds on the distance between the input and the regenerated distributions. More importantly, we provide upper bounds on the Wasserstein distance between the input distribution and the distribution defined by the VAE's generative model.", "keywords": "Variational Autoencoders;PAC-Bayes;Statistical Learning Theory", "primary_area": "", "supplementary_material": "/attachment/a4935b0470ca10942aa700f90ede026078140cbf.pdf", "author": "Sokhna Diarra Mbacke;Florence Clerc;Pascal Germain", "authorids": "~Sokhna_Diarra_Mbacke1;~Florence_Clerc1;~Pascal_Germain1", "gender": "F;;M", "homepage": "https://diarra2339.github.io/;;http://www.pascalgermain.info/", "dblp": "340/7531;164/4772;31/6421", "google_scholar": "https://scholar.google.com/citations?hl=en;0FdXRFAAAAAJ;mgOIj_4AAAAJ", "orcid": ";;0000-0003-3998-9533", "linkedin": ";;germainml/", "or_profile": "~Sokhna_Diarra_Mbacke1;~Florence_Clerc1;~Pascal_Germain1", "aff": "Universit\u00e9 Laval;McGill University;Universit\u00e9 Laval", "aff_domain": "ulaval.ca;mcgill.ca;ift.ulaval.ca", "position": "PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nmbacke2023statistical,\ntitle={Statistical Guarantees for Variational Autoencoders using {PAC}-Bayesian Theory},\nauthor={Sokhna Diarra Mbacke and Florence Clerc and Pascal Germain},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jkPDRHff3s}\n}", "github": "", "project": "", "reviewers": "sohu;Hutj;3pjC;8cep", "pdf_size": 341336, "rating": "6;7;7;7", "confidence": "3;2;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;4;4;3", "wc_summary": "133;76;42;55", "wc_strengths": "110;43;24;56", "wc_weaknesses": "83;118;48;51", "wc_questions": "46;25;23;78", "wc_limitations": "93;11;17;9", "wc_review": "465;273;154;249", "wc_reply_reviewers": "361;12;19;25", "wc_reply_authors": "1116;0;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 76.5, 34.80301711058971 ], "wc_strengths_avg": [ 58.25, 31.971667144520318 ], "wc_weaknesses_avg": [ 75.0, 28.36370920736567 ], "wc_questions_avg": [ 43.0, 22.124646889837585 ], "wc_limitations_avg": [ 32.5, 35.05353049266222 ], "wc_review_avg": [ 285.25, 112.91672816726492 ], "wc_reply_reviewers_avg": [ 104.25, 148.30606022681607 ], "wc_reply_authors_avg": [ 279.0, 483.2421753117168 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11237268741601224516&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ulaval.ca;mcgill.ca;ift.ulaval.ca", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Universit\u00e9 Laval;McGill University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ulaval.ca;https://www.mcgill.ca", "aff_unique_abbr": "ULaval;McGill", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Energy-based learning algorithms for analog computing: a comparative study", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70677", "id": "jl5a3t78Uh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a52b0d191b619477cc798d544f4f0e4b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jl5a3t78Uh", "openreview": "https://openreview.net/forum?id=jl5a3t78Uh", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70677", "video": "https://nips.cc/virtual/2023/poster/70677", "author_site": "Benjamin Scellier, Maxence Ernoult, Jack Kendall, Suhas Kumar", "tldr": "", "abstract": "Energy-based learning algorithms have recently gained a surge of interest due to their compatibility with analog (post-digital) hardware. Existing algorithms include contrastive learning (CL), equilibrium propagation (EP) and coupled learning (CpL), all consisting in contrasting two states, and differing in the type of perturbation used to obtain the second state from the first one. However, these algorithms have never been explicitly compared on equal footing with same models and datasets, making it difficult to assess their scalability and decide which one to select in practice. In this work, we carry out a comparison of seven learning algorithms, namely CL and different variants of EP and CpL depending on the signs of the perturbations. Specifically, using these learning algorithms, we train deep convolutional Hopfield networks (DCHNs) on five vision tasks (MNIST, F-MNIST, SVHN, CIFAR-10 and CIFAR-100). We find that, while all algorithms yield comparable performance on MNIST, important differences in performance arise as the difficulty of the task increases. Our key findings reveal that negative perturbations are better than positive ones, and highlight the centered variant of EP (which uses two perturbations of opposite sign) as the best-performing algorithm. We also endorse these findings with theoretical arguments. Additionally, we establish new SOTA results with DCHNs on all five datasets, both in performance and speed. In particular, our DCHN simulations are 13.5 times faster with respect to Laborieux et al. (2021), which we achieve thanks to the use of a novel energy minimisation algorithm based on asynchronous updates, combined with reduced precision (16 bits).", "keywords": "energy-based learning algorithm;contrastive learning;equilibrium propagation;coupled learning;convolutional Hopfield network", "primary_area": "", "supplementary_material": "/attachment/8b7c63b44f1ef7ba7f8bbd9666d52f974d0a2a6c.zip", "author": "Benjamin Scellier;Maxence Ernoult;Jack Kendall;Suhas Kumar", "authorids": "~Benjamin_Scellier1;~Maxence_Ernoult1;jack@rain.ai;suhas@rain.ai", "gender": ";M;;", "homepage": ";;;", "dblp": ";241/9703;;", "google_scholar": ";https://scholar.google.com/citations?hl=fr;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Benjamin_Scellier1;~Maxence_Ernoult1;jack@rain.ai;suhas@rain.ai", "aff": ";Rain AI;;", "aff_domain": ";rain.ai;;", "position": ";Researcher;;", "bibtex": "@inproceedings{\nscellier2023energybased,\ntitle={Energy-based learning algorithms for analog computing: a comparative study},\nauthor={Benjamin Scellier and Maxence Ernoult and Jack Kendall and Suhas Kumar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jl5a3t78Uh}\n}", "github": "", "project": "", "reviewers": "AFqD;5nrK;JWNX;dBi8", "pdf_size": 525530, "rating": "5;5;6;6", "confidence": "3;2;4;3", "soundness": "3;3;4;3", "novelty": "2;2;3;2", "presentation": "3;2;4;3", "wc_summary": "56;46;60;132", "wc_strengths": "20;47;33;70", "wc_weaknesses": "24;141;57;43", "wc_questions": "91;25;2;26", "wc_limitations": "2;10;2;4", "wc_review": "193;269;154;275", "wc_reply_reviewers": "47;75;66;85", "wc_reply_authors": "418;343;178;20", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 73.5, 34.15772240650714 ], "wc_strengths_avg": [ 42.5, 18.527007313648905 ], "wc_weaknesses_avg": [ 66.25, 44.71786555729153 ], "wc_questions_avg": [ 36.0, 33.17378483079674 ], "wc_limitations_avg": [ 4.5, 3.278719262151 ], "wc_review_avg": [ 222.75, 51.187767093320254 ], "wc_reply_reviewers_avg": [ 68.25, 13.988834833537782 ], "wc_reply_authors_avg": [ 239.75, 153.73414552401817 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17069839011119489761&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";rain.ai;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Rain AI", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "" }, { "title": "D-CIPHER: Discovery of Closed-form Partial Differential Equations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70676", "id": "jnCPN1vpSR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/57c30b677add9aa78e1745f0643104d0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jnCPN1vpSR", "openreview": "https://openreview.net/forum?id=jnCPN1vpSR", "poster": "/media/PosterPDFs/NeurIPS%202023/70676.png?t=1702269697.3067162", "slides": "https://nips.cc/virtual/2023/poster/70676", "video": "https://nips.cc/virtual/2023/poster/70676", "author_site": "Krzysztof Kacprzyk, Zhaozhi Qian, Mihaela van der Schaar", "tldr": "", "abstract": "Closed-form differential equations, including partial differential equations and higher-order ordinary differential equations, are one of the most important tools used by scientists to model and better understand natural phenomena. Discovering these equations directly from data is challenging because it requires modeling relationships between various derivatives that are not observed in the data (equation-data mismatch) and it involves searching across a huge space of possible equations. Current approaches make strong assumptions about the form of the equation and thus fail to discover many well-known phenomena. Moreover, many of them resolve the equation-data mismatch by estimating the derivatives, which makes them inadequate for noisy and infrequent observations. To this end, we propose D-CIPHER, which is robust to measurement artifacts and can uncover a new and very general class of differential equations. We further design a novel optimization procedure, CoLLie, to help D-CIPHER search through this class efficiently. Finally, we demonstrate empirically that it can discover many well-known equations that are beyond the capabilities of current methods.", "keywords": "differential equations;symbolic regression", "primary_area": "", "supplementary_material": "", "author": "Krzysztof Kacprzyk;Zhaozhi Qian;Mihaela van der Schaar", "authorids": "~Krzysztof_Kacprzyk1;~Zhaozhi_Qian1;~Mihaela_van_der_Schaar2", "gender": ";;F", "homepage": ";;https://www.vanderschaar-lab.com", "dblp": ";194/2443;", "google_scholar": ";PuTDB5gAAAAJ;DZ3S--MAAAAJ", "orcid": ";0000-0002-4561-0342;", "linkedin": ";;", "or_profile": "~Krzysztof_Kacprzyk1;~Zhaozhi_Qian1;~Mihaela_van_der_Schaar2", "aff": ";University of Cambridge;University of California, Los Angeles", "aff_domain": ";cam.ac.uk;ucla.edu", "position": ";Postdoc;Full Professor", "bibtex": "@inproceedings{\nkacprzyk2023dcipher,\ntitle={D-{CIPHER}: Discovery of Closed-form Partial Differential Equations},\nauthor={Krzysztof Kacprzyk and Zhaozhi Qian and Mihaela van der Schaar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jnCPN1vpSR}\n}", "github": "", "project": "", "reviewers": "ubhm;Znhz;kC9e", "pdf_size": 1109426, "rating": "6;6;7", "confidence": "3;3;2", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "3;4;4", "wc_summary": "83;73;112", "wc_strengths": "43;84;65", "wc_weaknesses": "94;124;171", "wc_questions": "113;2;6", "wc_limitations": "85;38;73", "wc_review": "418;321;427", "wc_reply_reviewers": "14;0;32", "wc_reply_authors": "52;0;48", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 89.33333333333333, 16.539514973407034 ], "wc_strengths_avg": [ 64.0, 16.753109164172084 ], "wc_weaknesses_avg": [ 129.66666666666666, 31.689465477067017 ], "wc_questions_avg": [ 40.333333333333336, 51.40903509003927 ], "wc_limitations_avg": [ 65.33333333333333, 19.93879523831757 ], "wc_review_avg": [ 388.6666666666667, 47.98842453018111 ], "wc_reply_reviewers_avg": [ 15.333333333333334, 13.097921802925667 ], "wc_reply_authors_avg": [ 33.333333333333336, 23.6267268622258 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13041852764656385921&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";cam.ac.uk;ucla.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;UCLA", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Cambridge;Los Angeles", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Learning Reliable Logical Rules with SATNet", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70675", "id": "jnIBiP2di1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2ff46d83d1dcc063e075058b29d55efe-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jnIBiP2di1", "openreview": "https://openreview.net/forum?id=jnIBiP2di1", "poster": "/media/PosterPDFs/NeurIPS%202023/70675.png?t=1700929524.1040163", "slides": "https://nips.cc/virtual/2023/poster/70675", "video": "https://nips.cc/virtual/2023/poster/70675", "author_site": "Zhaoyu Li, Jinpei Guo, Yuhe Jiang, Xujie Si", "tldr": "", "abstract": "Bridging logical reasoning and deep learning is crucial for advanced AI systems. In this work, we present a new framework that addresses this goal by generating interpretable and verifiable logical rules through differentiable learning, without relying on pre-specified logical structures. Our approach builds upon SATNet, a differentiable MaxSAT solver that learns the underlying rules from input-output examples. Despite its efficacy, the learned weights in SATNet are not straightforwardly interpretable, failing to produce human-readable rules. To address this, we propose a novel specification method called ``maximum equality'', which enables the interchangeability between the learned weights of SATNet and a set of propositional logical rules in weighted MaxSAT form. With the decoded weighted MaxSAT formula, we further introduce several effective verification techniques to validate it against the ground truth rules. Experiments on stream transformations and Sudoku problems show that our decoded rules are highly reliable: using exact solvers on them could achieve 100% accuracy, whereas the original SATNet fails to give correct solutions in many cases. Furthermore, we formally verify that our decoded logical rules are functionally equivalent to the ground truth ones.", "keywords": "Logical Reasoning;Rule Learning;Interpretation;SATNet", "primary_area": "", "supplementary_material": "/attachment/487bda67cef3e8ab408275aa5996a940be97803e.zip", "author": "Zhaoyu Li;Jinpei Guo;Yuhe Jiang;Xujie Si", "authorids": "~Zhaoyu_Li3;~Jinpei_Guo1;yuhe.jiang@mail.mcgill.ca;~Xujie_Si1", "gender": "M;M;;M", "homepage": "https://www.zhaoyu-li.com/;https://jp-guo.github.io/;;https://xujie.si", "dblp": ";;;142/8449", "google_scholar": ";;;Ru-jrx4AAAAJ", "orcid": ";;;", "linkedin": "zhaoyu-li-9171892a5/;;;", "or_profile": "~Zhaoyu_Li3;~Jinpei_Guo1;yuhe.jiang@mail.mcgill.ca;~Xujie_Si1", "aff": "McGill University;Shanghai Jiaotong University;;University of Toronto", "aff_domain": "cs.mcgill.ca;sjtu.edu.cn;;toronto.edu", "position": "PhD student;Undergrad student;;Assistant Professor", "bibtex": "@inproceedings{\nli2023learning,\ntitle={Learning Reliable Logical Rules with {SATN}et},\nauthor={Zhaoyu Li and Jinpei Guo and Yuhe Jiang and Xujie Si},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jnIBiP2di1}\n}", "github": "", "project": "", "reviewers": "ASVp;cdcd;fiQw;un3V;sq5Z", "pdf_size": 401503, "rating": "4;6;7;7;8", "confidence": "3;4;3;2;3", "soundness": "3;2;3;3;4", "novelty": "2;3;3;3;4", "presentation": "2;4;3;3;4", "wc_summary": "142;71;75;154;77", "wc_strengths": "75;74;97;69;90", "wc_weaknesses": "167;113;96;1;32", "wc_questions": "50;197;49;1;48", "wc_limitations": "27;71;31;10;10", "wc_review": "461;526;348;235;257", "wc_reply_reviewers": "7;320;4;0;10", "wc_reply_authors": "77;541;0;0;0", "reply_reviewers": "1;2;1;0;1", "reply_authors": "2;3;1;1;1", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 103.8, 36.339510178316935 ], "wc_strengths_avg": [ 81.0, 10.639548862616309 ], "wc_weaknesses_avg": [ 81.8, 59.06403304888686 ], "wc_questions_avg": [ 69.0, 66.64833081180653 ], "wc_limitations_avg": [ 29.8, 22.319498202244603 ], "wc_review_avg": [ 365.4, 113.1275386455482 ], "wc_reply_reviewers_avg": [ 68.2, 125.94347938658835 ], "wc_reply_authors_avg": [ 123.6, 210.8199231571817 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.23312620206007845, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12879401074991554685&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cs.mcgill.ca;sjtu.edu.cn;;toronto.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "McGill University;Shanghai Jiao Tong University;University of Toronto", "aff_unique_dep": ";;", "aff_unique_url": "https://www.mcgill.ca;https://www.sjtu.edu.cn;https://www.utoronto.ca", "aff_unique_abbr": "McGill;SJTU;U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Canada;China" }, { "title": "Implicit Differentiable Outlier Detection Enable Robust Deep Multimodal Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70674", "id": "jooPcatnVF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2cf153951b5e9b39564fc4a0ef6adc1a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jooPcatnVF", "openreview": "https://openreview.net/forum?id=jooPcatnVF", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70674", "video": "https://nips.cc/virtual/2023/poster/70674", "author_site": "Zhu Wang, Sourav Medya, Sathya Ravi", "tldr": "", "abstract": "Deep network models are often purely inductive during both training and inference on unseen data. When these models are used for prediction, but they may fail to capture important semantic information and implicit dependencies within datasets. Recent advancements have shown that combining multiple modalities in large-scale vision and language settings can improve understanding and generalization performance. However, as the model size increases, fine-tuning and deployment become computationally expensive, even for a small number of downstream tasks. Moreover, it is still unclear how domain or prior modal knowledge can be specified in a backpropagation friendly manner, especially in large-scale and noisy settings. To address these challenges, we propose a simplified alternative of combining features from pretrained deep networks and freely available semantic explicit knowledge. In order to remove irrelevant explicit knowledge that does not correspond well to the images, we introduce an implicit Differentiable Out-of-Distribution (OOD) detection layer. This layer addresses outlier detection by solving for fixed points of a differentiable function and using the last iterate of fixed point solver to backpropagate. In practice, we apply our model on several vision and language downstream tasks including visual question answering, visual reasoning, and image-text retrieval on different datasets. Our experiments show that it is possible to design models that perform similarly to state-of-the-art results but with significantly fewer samples and less training time. Our models and code are available here: https://github.com/ellenzhuwang/implicit_vkood", "keywords": "Implicit layer;Out-of-distribution detection;multimodal learning", "primary_area": "", "supplementary_material": "/attachment/2fd1321d635e3f5562d96f45628d4f75afe60466.zip", "author": "Zhu Wang;Sourav Medya;Sathya N. Ravi", "authorids": "~Zhu_Wang2;~Sourav_Medya1;~Sathya_N._Ravi1", "gender": "F;M;M", "homepage": ";https://souravmedya.github.io/;http://sathyaravi.com", "dblp": ";178/3021;159/2123", "google_scholar": "mMyQX4oAAAAJ;RCFhOM4AAAAJ;FW-0thoAAAAJ", "orcid": ";0000-0003-0996-2807;0000-0003-3881-6323", "linkedin": ";sourav-medya-35987a49/;sathya-narayanan-ravi-74a5a128/", "or_profile": "~Zhu_Wang2;~Sourav_Medya1;~Sathya_N._Ravi1", "aff": "University of Illinois at Chicago;University of Illinois at Chicago;University of Illinois, Chicago", "aff_domain": "cs.uic.edu;uic.edu;uic.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023implicit,\ntitle={Implicit Differentiable Outlier Detection Enable Robust Deep Multimodal Analysis},\nauthor={Zhu Wang and Sourav Medya and Sathya N. Ravi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jooPcatnVF}\n}", "github": "", "project": "", "reviewers": "uxWt;NYTC;MwwQ;oiir;h1Px", "pdf_size": 15901606, "rating": "4;5;5;5;6", "confidence": "3;4;4;4;3", "soundness": "2;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "2;4;3;3;3", "wc_summary": "81;61;35;85;62", "wc_strengths": "25;45;87;75;38", "wc_weaknesses": "252;329;137;305;27", "wc_questions": "263;28;5;140;39", "wc_limitations": "2;4;12;43;13", "wc_review": "623;467;276;648;179", "wc_reply_reviewers": "0;149;12;521;9", "wc_reply_authors": "42;363;0;665;0", "reply_reviewers": "0;2;1;2;1", "reply_authors": "2;3;1;3;1", "rating_avg": [ 5.0, 0.6324555320336759 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 64.8, 17.780888616714297 ], "wc_strengths_avg": [ 54.0, 23.27230113246217 ], "wc_weaknesses_avg": [ 210.0, 112.93183784920885 ], "wc_questions_avg": [ 95.0, 95.88951976102497 ], "wc_limitations_avg": [ 14.8, 14.74313399518569 ], "wc_review_avg": [ 438.6, 185.73594159451207 ], "wc_reply_reviewers_avg": [ 138.2, 199.18373427566817 ], "wc_reply_authors_avg": [ 214.0, 263.3545139161279 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 0.8944271909999159 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15223622056650442502&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "cs.uic.edu;uic.edu;uic.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois at Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uic.edu", "aff_unique_abbr": "UIC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Domain Re-Modulation for Few-Shot Generative Domain Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70673", "id": "jown9RvYn7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b2e20d7402c9985eae4ba924c65370a8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jown9RvYn7", "openreview": "https://openreview.net/forum?id=jown9RvYn7", "poster": "/media/PosterPDFs/NeurIPS%202023/70673.png?t=1697113226.6973784", "slides": "https://nips.cc/virtual/2023/poster/70673", "video": "https://nips.cc/virtual/2023/poster/70673", "author_site": "Yi Wu, Ziqiang Li, Chaoyue Wang, Heliang Zheng, Shanshan Zhao, Bin Li, Dacheng Tao", "tldr": "", "abstract": "In this study, we delve into the task of few-shot Generative Domain Adaptation (GDA), which involves transferring a pre-trained generator from one domain to a new domain using only a few reference images. Inspired by the way human brains acquire knowledge in new domains, we present an innovative generator structure called $\\textbf{Domain Re-Modulation (DoRM)}$. DoRM not only meets the criteria of $\\textit{high quality}$, $\\textit{large synthesis diversity}$, and $\\textit{cross-domain consistency}$, which were achieved by previous research in GDA, but also incorporates $\\textit{memory}$ and $\\textit{domain association}$, akin to how human brains operate. Specifically, DoRM freezes the source generator and introduces new mapping and affine modules (M\\&A modules) to capture the attributes of the target domain during GDA. This process resembles the formation of new synapses in human brains. Consequently, a linearly combinable domain shift occurs in the style space. By incorporating multiple new M\\&A modules, the generator gains the capability to perform high-fidelity multi-domain and hybrid-domain generation. Moreover, to maintain cross-domain consistency more effectively, we introduce a similarity-based structure loss. This loss aligns the auto-correlation map of the target image with its corresponding auto-correlation map of the source image during training. Through extensive experiments, we demonstrate the superior performance of our DoRM and similarity-based structure loss in few-shot GDA, both quantitatively and qualitatively. Code will be available at https://github.com/wuyi2020/DoRM.", "keywords": "StyleGAN;Few-Shot Generative Domain Adaptation", "primary_area": "", "supplementary_material": "/attachment/d34e0aba0c86e63e4d0e542d8c2277761dead696.zip", "author": "Yi Wu;Ziqiang Li;Chaoyue Wang;Heliang Zheng;Shanshan Zhao;Bin Li;Dacheng Tao", "authorids": "~Yi_Wu11;~Ziqiang_Li4;~Chaoyue_Wang2;~Heliang_Zheng1;~Shanshan_Zhao2;~Bin_Li8;~Dacheng_Tao1", "gender": "M;M;M;M;M;M;", "homepage": "https://scholar.google.com/citations?user=OnnO94cAAAAJ&hl=zh-CN&authuser=1;https://iceli1007.github.io/;;;https://sshan-zhao.github.io/;http://staff.ustc.edu.cn/~binli;", "dblp": "44/3684-18;17/616-1.html;174/7172;208/4220;;89/6764-25;", "google_scholar": "OnnO94cAAAAJ;https://scholar.google.com.hk/citations?user=mj5a8WgAAAAJ;https://scholar.google.com.au/citations?user=ioj1BycAAAAJ;VRgciTQAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;;", "orcid": "0000-0001-7384-5029;;;;0000-0003-0682-8645;0000-0002-2332-3959;", "linkedin": ";;;;;;", "or_profile": "~Yi_Wu11;~Ziqiang_Li4;~Chaoyue_Wang2;~Heliang_Zheng1;~Shanshan_Zhao2;~Bin_Li8;~Dacheng_Tao1", "aff": "University of Science and Technology of China;University of Science and Technology of China;JD.com;USTC;JD Explore Academy;University of Science and Technology of China;", "aff_domain": "ustc.edu.cn;ustc.edu.cn;jd.com;ustc.edu;jd.com;ustc.edu.cn;", "position": "PhD student;PhD student;Researcher;Researcher;Researcher;Full Professor;", "bibtex": "@inproceedings{\nwu2023domain,\ntitle={Domain Re-Modulation for Few-Shot Generative Domain Adaptation},\nauthor={Yi Wu and Ziqiang Li and Chaoyue Wang and Heliang Zheng and Shanshan Zhao and Bin Li and Dacheng Tao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jown9RvYn7}\n}", "github": "", "project": "", "reviewers": "URf4;cgji;eDDg;Jqfj;18N9", "pdf_size": 8319900, "rating": "3;3;7;7;8", "confidence": "5;4;4;5;4", "soundness": "1;2;4;4;4", "novelty": "2;2;3;4;4", "presentation": "2;2;4;3;3", "wc_summary": "107;115;99;52;149", "wc_strengths": "42;122;114;67;171", "wc_weaknesses": "408;389;104;204;5", "wc_questions": "42;4;2;81;262", "wc_limitations": "34;7;1;29;6", "wc_review": "633;637;320;433;593", "wc_reply_reviewers": "241;0;46;38;20", "wc_reply_authors": "1041;0;87;22;22", "reply_reviewers": "1;0;1;1;1", "reply_authors": "3;1;2;2;2", "rating_avg": [ 5.6, 2.1540659228538015 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.0, 1.2649110640673518 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 104.4, 31.251239975399375 ], "wc_strengths_avg": [ 103.2, 44.99511084551298 ], "wc_weaknesses_avg": [ 222.0, 157.36708677483995 ], "wc_questions_avg": [ 78.2, 96.34604299087742 ], "wc_limitations_avg": [ 15.4, 13.395521639712282 ], "wc_review_avg": [ 523.2, 125.94030331867556 ], "wc_reply_reviewers_avg": [ 69.0, 87.44827042314787 ], "wc_reply_authors_avg": [ 234.4, 404.3516291546258 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.22742941307367098, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8459397657276099150&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "ustc.edu.cn;ustc.edu.cn;jd.com;ustc.edu;jd.com;ustc.edu.cn;", "author_num": 7, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "University of Science and Technology of China;JD.com;JD", "aff_unique_dep": ";;JD Explore Academy", "aff_unique_url": "http://www.ustc.edu.cn;https://www.jd.com;", "aff_unique_abbr": "USTC;JD;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Fine-grained Expressivity of Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70672", "id": "jt10uWlEbc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9200d97ca2bf3a26db7b591844014f00-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jt10uWlEbc", "openreview": "https://openreview.net/forum?id=jt10uWlEbc", "poster": "/media/PosterPDFs/NeurIPS%202023/70672.png?t=1701509797.9415464", "slides": "https://nips.cc/virtual/2023/poster/70672", "video": "https://nips.cc/virtual/2023/poster/70672", "author_site": "Jan B\u00f6ker, Ron Levie, Ningyuan Huang, Soledad Villar, Christopher Morris", "tldr": "", "abstract": "Numerous recent works have analyzed the expressive power of message-passing graph neural networks (MPNNs), primarily utilizing combinatorial techniques such as the $1$-dimensional Weisfeiler--Leman test ($1$-WL) for the graph isomorphism problem. However, the graph isomorphism objective is inherently binary, not giving insights into the degree of similarity between two given graphs. This work resolves this issue by considering continuous extensions of both $1$-WL and MPNNs to graphons. Concretely, we show that the continuous variant of $1$-WL delivers an accurate topological characterization of the expressive power of MPNNs on graphons, revealing which graphs these networks can distinguish and the level of difficulty in separating them. We identify the finest topology where MPNNs separate points and prove a universal approximation theorem. Consequently, we provide a theoretical framework for graph and graphon similarity combining various topological variants of classical characterizations of the $1$-WL. In particular, we characterize the expressive power of MPNNs in terms of the tree distance, which is a graph distance based on the concept of fractional isomorphisms, and substructure counts via tree homomorphisms, showing that these concepts have the same expressive power as the $1$-WL and MPNNs on graphons. Empirically, we validate our theoretical findings by showing that randomly initialized MPNNs, without training, exhibit competitive performance compared to their trained counterparts. Moreover, we evaluate different MPNN architectures based on their ability to preserve graph distances, highlighting the significance of our continuous $1$-WL test in understanding MPNNs' expressivity.", "keywords": "graphons;universal approximation;weisfeiler-leman;graph metric;tree homomorphisms;tree distance;optimal transport;GNNs", "primary_area": "", "supplementary_material": "", "author": "Jan B\u00f6ker;Ron Levie;Ningyuan Teresa Huang;Soledad Villar;Christopher Morris", "authorids": "~Jan_B\u00f6ker1;~Ron_Levie1;~Ningyuan_Teresa_Huang1;~Soledad_Villar2;~Christopher_Morris1", "gender": ";;;;M", "homepage": "https://www.lics.rwth-aachen.de/cms/LICS/Der-Lehrstuhl/Team/Wissenschaftliche-Mitarbeiterinnen-und-M/~rrqo/JanBoeker/;;https://nhuang37.github.io/;;http://christophermorris.info", "dblp": "238/1535;;277/6356;;156/7303", "google_scholar": ";;cUQa7_kAAAAJ;;", "orcid": "0000-0003-4584-121X;;;;", "linkedin": ";;;;", "or_profile": "~Jan_B\u00f6ker1;~Ron_Levie1;~Ningyuan_Teresa_Huang1;~Soledad_Villar2;~Christopher_Morris1", "aff": "Rheinisch Westf\u00e4lische Technische Hochschule Aachen;;Johns Hopkins University;;Rheinisch Westf\u00e4lische Technische Hochschule Aachen", "aff_domain": "rwth-aachen.de;;jhu.edu;;rwth-aachen.de", "position": "PhD student;;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nb{\\\"o}ker2023finegrained,\ntitle={Fine-grained Expressivity of Graph Neural Networks},\nauthor={Jan B{\\\"o}ker and Ron Levie and Ningyuan Teresa Huang and Soledad Villar and Christopher Morris},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jt10uWlEbc}\n}", "github": "", "project": "", "reviewers": "xsds;oM2o;Svzd;7umZ;yRbN", "pdf_size": 1187172, "rating": "6;6;6;7;8", "confidence": "2;3;3;2;3", "soundness": "3;3;4;3;3", "novelty": "3;3;2;2;4", "presentation": "2;2;4;3;4", "wc_summary": "50;120;49;40;46", "wc_strengths": "32;44;110;15;60", "wc_weaknesses": "65;109;355;40;61", "wc_questions": "28;53;245;10;88", "wc_limitations": "1;1;12;1;26", "wc_review": "176;327;771;106;281", "wc_reply_reviewers": "0;55;260;12;44", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 61.0, 29.70521839677332 ], "wc_strengths_avg": [ 52.2, 32.43701589234127 ], "wc_weaknesses_avg": [ 126.0, 116.68076105339732 ], "wc_questions_avg": [ 84.8, 84.26956746062008 ], "wc_limitations_avg": [ 8.2, 9.867117106835208 ], "wc_review_avg": [ 332.2, 232.705307202049 ], "wc_reply_reviewers_avg": [ 74.2, 95.05451067676906 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.1020620726159658, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12308613621023502701&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "rwth-aachen.de;;jhu.edu;;rwth-aachen.de", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "RWTH Aachen University;Johns Hopkins University", "aff_unique_dep": ";", "aff_unique_url": "https://www.rwth-aachen.de;https://www.jhu.edu", "aff_unique_abbr": "RWTH;JHU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Aachen;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;United States" }, { "title": "Simple and Controllable Music Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70671", "id": "jtiQ26sCJi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/94b472a1842cd7c56dcb125fb2765fbd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jtiQ26sCJi", "openreview": "https://openreview.net/forum?id=jtiQ26sCJi", "poster": "/media/PosterPDFs/NeurIPS%202023/70671.png?t=1701430088.952955", "slides": "https://nips.cc/virtual/2023/poster/70671", "video": "https://nips.cc/virtual/2023/poster/70671", "author_site": "Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi, Alexandre Defossez", "tldr": "", "abstract": "We tackle the task of conditional music generation. We introduce MusicGen, a single Language Model (LM) that operates over several streams of compressed discrete music representation, i.e., tokens. Unlike prior work, MusicGen is comprised of a single-stage transformer LM together with efficient token interleaving patterns, which eliminates the need for cascading several models, e.g., hierarchically or upsampling. Following this approach, we demonstrate how MusicGen can generate high-quality samples, both mono and stereo, while being conditioned on textual description or melodic features, allowing better controls over the generated output. We conduct extensive empirical evaluation, considering both automatic and human studies, showing the proposed approach is superior to the evaluated baselines on a standard text-to-music benchmark. Through ablation studies, we shed light over the importance of each of the components comprising MusicGen. Music samples, code, and models are available at https://github.com/facebookresearch/audiocraft", "keywords": "Music generation;Generative AI;Transformer;Language Models", "primary_area": "", "supplementary_material": "/attachment/c1986cd749318b23a25809c4c08590109eddcb2c.pdf", "author": "Jade Copet;Felix Kreuk;Itai Gat;Tal Remez;David Kant;Gabriel Synnaeve;Yossi Adi;Alexandre D\u00e9fossez", "authorids": "~Jade_Copet1;~Felix_Kreuk1;~Itai_Gat1;~Tal_Remez2;~David_Kant1;~Gabriel_Synnaeve1;~Yossi_Adi1;~Alexandre_D\u00e9fossez1", "gender": ";M;M;M;;M;M;M", "homepage": ";https://scholar.google.co.il/citations?user=UiERcYsAAAAJ&hl=en;https://www.linkedin.com/in/itaigat/;https://talremez.github.io/;https://davidkantportfolio.com/;;http://adiyoss.github.io/;https://ai.honu.io/", "dblp": ";213/7459;221/4128;170/0030;;http://dblp.uni-trier.de/pers/hd/s/Synnaeve:Gabriel;171/0957.html;156/0054", "google_scholar": "GRMLwjAAAAAJ;;TnJqhXIAAAAJ;https://scholar.google.co.il/citations?user=XqHYn7EAAAAJ;;wN9rBkcAAAAJ;https://scholar.google.co.il/citations?user=4W-HuYYAAAAJ;https://scholar.google.fr/citations?user=DubNUU0AAAAJ", "orcid": ";;;;;;0000-0003-2237-3898;", "linkedin": "jadecopet/?locale=en_US;;;;;;yossi-adi-31a32858?trk=nav_responsive_tab_profile_pic;", "or_profile": "~Jade_Copet1;~Felix_Kreuk1;~Itai_Gat1;~Tal_Remez2;~David_Kant1;~Gabriel_Synnaeve1;~Yossi_Adi1;~Alexandre_D\u00e9fossez1", "aff": "Facebook AI Research;Meta Facebook;Technion;Meta;Meta Ai;Meta Facebook;Meta;Meta", "aff_domain": "facebook.com;fb.com;technion.ac.il;meta.com;ai.meta.com;fb.com;meta.com;meta.com", "position": "Research Engineering Manager;Researcher;PhD student;Researcher;Researcher;Research Scientist;Research Scientist;Researcher", "bibtex": "@inproceedings{\ncopet2023simple,\ntitle={Simple and Controllable Music Generation},\nauthor={Jade Copet and Felix Kreuk and Itai Gat and Tal Remez and David Kant and Gabriel Synnaeve and Yossi Adi and Alexandre D{\\'e}fossez},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jtiQ26sCJi}\n}", "github": "", "project": "", "reviewers": "VPXA;A118;7q8g;gnNB", "pdf_size": 653899, "rating": "5;5;7;8", "confidence": "4;4;3;4", "soundness": "3;3;4;3", "novelty": "3;2;3;3", "presentation": "4;2;3;4", "wc_summary": "53;67;86;80", "wc_strengths": "72;55;109;116", "wc_weaknesses": "256;98;136;328", "wc_questions": "87;35;82;21", "wc_limitations": "21;1;59;16", "wc_review": "489;256;472;561", "wc_reply_reviewers": "37;0;85;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 71.5, 12.698425099200294 ], "wc_strengths_avg": [ 88.0, 25.347583711273153 ], "wc_weaknesses_avg": [ 204.5, 92.11270270706424 ], "wc_questions_avg": [ 56.25, 28.734778579275673 ], "wc_limitations_avg": [ 24.25, 21.370248009791556 ], "wc_review_avg": [ 444.5, 113.84309377384295 ], "wc_reply_reviewers_avg": [ 30.5, 34.90343822605446 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 552, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16654940440682509571&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "facebook.com;fb.com;technion.ac.il;meta.com;ai.meta.com;fb.com;meta.com;meta.com", "author_num": 8, "aff_unique_index": "0;0;1;0;0;0;0;0", "aff_unique_norm": "Meta;Technion - Israel Institute of Technology", "aff_unique_dep": "Facebook AI Research;", "aff_unique_url": "https://research.facebook.com;https://www.technion.ac.il/en/", "aff_unique_abbr": "FAIR;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;0;0", "aff_country_unique": "United States;Israel" }, { "title": "Deep Reinforcement Learning with Plasticity Injection", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70670", "id": "jucDLW6G9l", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/75101364dc3aa7772d27528ea504472b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jucDLW6G9l", "openreview": "https://openreview.net/forum?id=jucDLW6G9l", "poster": "/media/PosterPDFs/NeurIPS%202023/70670.png?t=1701380557.0954864", "slides": "https://nips.cc/virtual/2023/poster/70670", "video": "https://nips.cc/virtual/2023/poster/70670", "author_site": "Evgenii Nikishin, Junhyuk Oh, Georg Ostrovski, Clare Lyle, Razvan Pascanu, Will Dabney, Andre Barreto", "tldr": "", "abstract": "A growing body of evidence suggests that neural networks employed in deep reinforcement learning (RL) gradually lose their plasticity, the ability to learn from new data; however, the analysis and mitigation of this phenomenon is hampered by the complex relationship between plasticity, exploration, and performance in RL. This paper introduces plasticity injection, a minimalistic intervention that increases the network plasticity without changing the number of trainable parameters or biasing the predictions. The applications of this intervention are two-fold: first, as a diagnostic tool \u2014 if injection increases the performance, we may conclude that an agent's network was losing its plasticity. This tool allows us to identify a subset of Atari environments where the lack of plasticity causes performance plateaus, motivating future studies on understanding and combating plasticity loss. Second, plasticity injection can be used to improve the computational efficiency of RL training if the agent has to re-learn from scratch due to exhausted plasticity or by growing the agent's network dynamically without compromising performance. The results on Atari show that plasticity injection attains stronger performance compared to alternative methods while being computationally efficient.", "keywords": "deep reinforcement learning;continual learning;loss of plasticity", "primary_area": "", "supplementary_material": "/attachment/4c6e6562a1d839129838aeafbea2b76740e4b843.zip", "author": "Evgenii Nikishin;Junhyuk Oh;Georg Ostrovski;Clare Lyle;Razvan Pascanu;Will Dabney;Andre Barreto", "authorids": "~Evgenii_Nikishin1;~Junhyuk_Oh2;~Georg_Ostrovski1;~Clare_Lyle1;~Razvan_Pascanu1;~Will_Dabney1;~Andre_Barreto1", "gender": "M;M;;M;M;M;M", "homepage": "http://evgenii-nikishin.github.io/;http://ostrovski.co.uk/;;https://razp.info;;https://sites.google.com/corp/view/andrebarreto/about;https://junhyuk.com/", "dblp": "294/4770;133/8425;192/1910;65/8368.html;https://dblp.uni-trier.de/pers/hd/d/Dabney:Will;72/953;167/4825", "google_scholar": "ez9FSEAAAAAJ;;;https://scholar.google.ca/citations?user=eSPY8LwAAAAJ;https://scholar.google.co.uk/citations?user=dR-7QW8AAAAJ;https://scholar.google.co.uk/citations?user=H-xtdV4AAAAJ;LNUeOu4AAAAJ", "orcid": ";0000-0001-7707-2633;;;;;", "linkedin": ";georg-ostrovski-5690a538;;;;;", "or_profile": "~Evgenii_Nikishin1;~Georg_Ostrovski1;~Clare_Lyle1;~Razvan_Pascanu1;~Will_Dabney1;~Andre_Barreto1;~Junhyuk_Oh1", "aff": "University of Montreal;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind", "aff_domain": "umontreal.ca;deepmind.com;google.com;google.com;google.com;google.com;google.com", "position": "PhD student;Researcher;Researcher;Research Scientist;Research Scientist;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nnikishin2023deep,\ntitle={Deep Reinforcement Learning with Plasticity Injection},\nauthor={Evgenii Nikishin and Junhyuk Oh and Georg Ostrovski and Clare Lyle and Razvan Pascanu and Will Dabney and Andre Barreto},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jucDLW6G9l}\n}", "github": "", "project": "", "reviewers": "Foz7;x17L;P5kx;227D", "pdf_size": 2165841, "rating": "5;7;7;7", "confidence": "3;4;4;4", "soundness": "3;4;3;4", "novelty": "2;3;3;4", "presentation": "3;4;4;3", "wc_summary": "146;165;59;82", "wc_strengths": "15;143;166;71", "wc_weaknesses": "67;128;166;112", "wc_questions": "276;149;113;98", "wc_limitations": "12;6;25;1", "wc_review": "516;591;529;364", "wc_reply_reviewers": "54;36;17;209", "wc_reply_authors": "63;15;13;296", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 113.0, 43.78926809162263 ], "wc_strengths_avg": [ 98.75, 59.71756441784946 ], "wc_weaknesses_avg": [ 118.25, 35.499119707395565 ], "wc_questions_avg": [ 159.0, 70.04641318440224 ], "wc_limitations_avg": [ 11.0, 8.972179222463181 ], "wc_review_avg": [ 500.0, 83.47754189001974 ], "wc_reply_reviewers_avg": [ 79.0, 76.18726927774745 ], "wc_reply_authors_avg": [ 96.75, 116.76552359322507 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4327396931034750894&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "umontreal.ca;deepmind.com;google.com;google.com;google.com;google.com;google.com", "author_num": 7, "aff_unique_index": "0;1;1;1;1;1;1", "aff_unique_norm": "University of Montreal;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://wwwumontreal.ca;https://deepmind.com", "aff_unique_abbr": "UM;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "Canada;United Kingdom" }, { "title": "Improving Language Plasticity via Pretraining with Active Forgetting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70669", "id": "jvEbQBxd8X", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6450ea28ebbc8437bc38775157818172-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jvEbQBxd8X", "openreview": "https://openreview.net/forum?id=jvEbQBxd8X", "poster": "/media/PosterPDFs/NeurIPS%202023/70669.png?t=1701951284.8183005", "slides": "https://nips.cc/virtual/2023/poster/70669", "video": "https://nips.cc/virtual/2023/poster/70669", "author_site": "Yihong Chen, Kelly Marchisio, Roberta Raileanu, David Adelani, Pontus Lars Erik Saito Stenetorp, Sebastian Riedel, Mikel Artetxe", "tldr": "", "abstract": "Pretrained language models (PLMs) are today the primary model for natural language processing. Despite their impressive downstream performance, it can be difficult to apply PLMs to new languages, a barrier to making their capabilities universally accessible. While prior work has shown it possible to address this issue by learning a new embedding layer for the new language, doing so is both data and compute inefficient. We propose to use an active forgetting mechanism during pretraining, as a simple way of creating PLMs that can quickly adapt to new languages. Concretely, by resetting the embedding layer every K updates during pretraining, we encourage the PLM to improve its ability of learning new embeddings within limited number of updates, similar to a meta-learning effect. Experiments with RoBERTa show that models pretrained with our forgetting mechanism not only demonstrate faster convergence during language adaptation, but also outperform standard ones in a low-data regime, particularly for languages that are distant from English. Code will be available at https://github.com/facebookresearch/language-model-plasticity.", "keywords": "plasticity;continual learning;meta-learning;embeddings;cross-lingual transfer;forgetting", "primary_area": "", "supplementary_material": "/attachment/b56b3373f4f1c3c540d52a667bc014176032ffab.pdf", "author": "Yihong Chen;Kelly Marchisio;Roberta Raileanu;David Ifeoluwa Adelani;Pontus Stenetorp;Sebastian Riedel;Mikel Artetxe", "authorids": "~Yihong_Chen3;~Kelly_Marchisio1;~Roberta_Raileanu2;~David_Ifeoluwa_Adelani1;~Pontus_Stenetorp1;~Sebastian_Riedel1;~Mikel_Artetxe1", "gender": ";;;M;Not Specified;M;M", "homepage": ";http://kellymarchisio.github.io/;;https://dadelani.github.io/;https://pontus.stenetorp.se;https://www.riedelcastro.org/;http://www.mikelartetxe.com", "dblp": ";247/6476;;230/6973;44/8358.html;18/3348-1.html;168/0354", "google_scholar": ";BE9tLKwAAAAJ;;https://scholar.google.ca/citations?user=W9sTkS0AAAAJ;;https://scholar.google.com.tw/citations?user=AcCtcrsAAAAJ;N5InzP8AAAAJ", "orcid": ";;;0000-0002-0193-2083;;;", "linkedin": ";kelly-marchisio;;david-adelani-7557b337/;;;artetxem", "or_profile": "~Yihong_Chen3;~Kelly_Marchisio1;~Roberta_Raileanu2;~David_Ifeoluwa_Adelani1;~Pontus_Stenetorp1;~Sebastian_Riedel1;~Mikel_Artetxe1", "aff": ";Johns Hopkins University;;University College London, University of London;University College London;University College London;Facebook AI Research", "aff_domain": ";jhu.edu;;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;fb.com", "position": ";PhD student;;Postdoc;Associate Professor;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nchen2023improving,\ntitle={Improving Language Plasticity via Pretraining with Active Forgetting},\nauthor={Yihong Chen and Kelly Marchisio and Roberta Raileanu and David Ifeoluwa Adelani and Pontus Stenetorp and Sebastian Riedel and Mikel Artetxe},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jvEbQBxd8X}\n}", "github": "", "project": "", "reviewers": "ar7w;1FBf;TTi1;LV8K;Divy", "pdf_size": 4999160, "rating": "4;6;6;7;7", "confidence": "4;4;4;4;3", "soundness": "3;3;2;4;3", "novelty": "2;2;3;3;3", "presentation": "3;3;2;3;3", "wc_summary": "39;80;114;55;30", "wc_strengths": "43;42;71;83;15", "wc_weaknesses": "122;94;194;49;7", "wc_questions": "46;95;19;271;11", "wc_limitations": "6;1;1;29;1", "wc_review": "256;312;399;487;64", "wc_reply_reviewers": "0;60;18;16;4", "wc_reply_authors": "0;213;0;0;0", "reply_reviewers": "0;2;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 63.6, 30.388155587333692 ], "wc_strengths_avg": [ 50.8, 23.93658288060349 ], "wc_weaknesses_avg": [ 93.2, 63.835413369069684 ], "wc_questions_avg": [ 88.4, 95.90745539320704 ], "wc_limitations_avg": [ 7.6, 10.873821775254548 ], "wc_review_avg": [ 303.6, 143.16507954106686 ], "wc_reply_reviewers_avg": [ 19.6, 21.331666601557412 ], "wc_reply_authors_avg": [ 42.6, 85.20000000000002 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4564354645876385, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10372560598329817447&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";jhu.edu;;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;fb.com", "author_num": 7, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Johns Hopkins University;University College London;Meta", "aff_unique_dep": ";;Facebook AI Research", "aff_unique_url": "https://www.jhu.edu;https://www.ucl.ac.uk;https://research.facebook.com", "aff_unique_abbr": "JHU;UCL;FAIR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Auxiliary Losses for Learning Generalizable Concept-based Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70668", "id": "jvYXln6Gzn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/555479a201da27c97aaeed842d16ca49-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jvYXln6Gzn", "openreview": "https://openreview.net/forum?id=jvYXln6Gzn", "poster": "/media/PosterPDFs/NeurIPS%202023/70668.png?t=1701669912.8333611", "slides": "https://nips.cc/virtual/2023/poster/70668", "video": "https://nips.cc/virtual/2023/poster/70668", "author_site": "Ivaxi Sheth, Samira Ebrahimi Kahou", "tldr": "", "abstract": "The increasing use of neural networks in various applications has lead to increasing apprehensions, underscoring the necessity to understand their operations beyond mere final predictions. As a solution to enhance model transparency, Concept Bottleneck Models (CBMs) have gained popularity since their introduction. CBMs essentially limit the latent space of a model to human-understandable high-level concepts. While beneficial, CBMs have been reported to often learn irrelevant concept representations that consecutively damage model performance. To overcome the performance trade-off, we propose a cooperative-Concept Bottleneck Model (coop-CBM). The concept representation of our model is particularly meaningful when fine-grained concept labels are absent. Furthermore, we introduce the concept orthogonal loss (COL) to encourage the separation between the concept representations and to reduce the intra-concept distance. This paper presents extensive experiments on real-world datasets for image classification tasks, namely CUB, AwA2, CelebA and TIL. We also study the performance of coop-CBM models under various distributional shift settings. We show that our proposed method achieves higher accuracy in all distributional shift settings even compared to the black-box models with the highest concept accuracy.", "keywords": "Interpretability;concept bottleneck models;explainability", "primary_area": "", "supplementary_material": "", "author": "Ivaxi Sheth;Samira Ebrahimi Kahou", "authorids": "~Ivaxi_Sheth1;~Samira_Ebrahimi_Kahou1", "gender": "F;F", "homepage": ";https://saebrahimi.github.io", "dblp": "291/2912.html;20/11069", "google_scholar": "Isz5M1UAAAAJ;https://scholar.google.ca/citations?user=F99FuaAAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Ivaxi_Sheth1;~Samira_Ebrahimi_Kahou1", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;\u00c9cole de technologie sup\u00e9rieure", "aff_domain": "mila.umontreal.ca;etsmtl.ca", "position": "Researcher;Associate Professor", "bibtex": "@inproceedings{\nsheth2023auxiliary,\ntitle={Auxiliary Losses for Learning Generalizable Concept-based Models},\nauthor={Ivaxi Sheth and Samira Ebrahimi Kahou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jvYXln6Gzn}\n}", "github": "", "project": "", "reviewers": "VWed;K2hb;1MWB;Mi12;G8F4", "pdf_size": 925545, "rating": "6;6;6;6;6", "confidence": "3;4;5;3;4", "soundness": "3;2;3;3;2", "novelty": "3;2;3;2;2", "presentation": "3;3;4;3;2", "wc_summary": "110;126;72;30;70", "wc_strengths": "85;208;38;26;54", "wc_weaknesses": "198;723;107;160;238", "wc_questions": "506;1095;348;81;118", "wc_limitations": "21;39;1;21;15", "wc_review": "920;2191;566;318;495", "wc_reply_reviewers": "609;1518;832;119;89", "wc_reply_authors": "845;930;1207;435;55", "reply_reviewers": "3;2;3;2;1", "reply_authors": "4;3;3;3;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 81.6, 33.6666006600013 ], "wc_strengths_avg": [ 82.2, 65.9405793119836 ], "wc_weaknesses_avg": [ 285.2, 223.12812462798138 ], "wc_questions_avg": [ 429.6, 367.11556763504325 ], "wc_limitations_avg": [ 19.4, 12.2245654319489 ], "wc_review_avg": [ 898.0, 675.4799774974829 ], "wc_reply_reviewers_avg": [ 633.4, 526.0291246689674 ], "wc_reply_authors_avg": [ 694.4, 404.24428258170826 ], "reply_reviewers_avg": [ 2.2, 0.7483314773547882 ], "reply_authors_avg": [ 3.0, 0.6324555320336759 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1580700940114444087&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "mila.umontreal.ca;etsmtl.ca", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Montreal;\u00c9cole de technologie sup\u00e9rieure", "aff_unique_dep": "Montreal Institute for Learning Algorithms;", "aff_unique_url": "https://www.mila.quebec;https://www.etsmtl.ca", "aff_unique_abbr": "MILA;ETS", "aff_campus_unique_index": "0", "aff_campus_unique": "Montreal;", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Ess-InfoGAIL: Semi-supervised Imitation Learning from Imbalanced Demonstrations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70667", "id": "jxhUNLoi4m", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bcf26768143c94bd36e363cd4bf5daf0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jxhUNLoi4m", "openreview": "https://openreview.net/forum?id=jxhUNLoi4m", "poster": "/media/PosterPDFs/NeurIPS%202023/70667.png?t=1697703066.5818036", "slides": "https://nips.cc/virtual/2023/poster/70667", "video": "https://nips.cc/virtual/2023/poster/70667", "author_site": "Huiqiao Fu, Kaiqiang Tang, Yuanyang Lu, Yuanyang Lu, Yiming Qi, Guizhou Deng, Flood Sung, Chunlin Chen", "tldr": "", "abstract": "Imitation learning aims to reproduce expert behaviors without relying on an explicit reward signal. However, real-world demonstrations often present challenges, such as multi-modal, data imbalance, and expensive labeling processes. In this work, we propose a novel semi-supervised imitation learning architecture that learns disentangled behavior representations from imbalanced demonstrations using limited labeled data. Specifically, our method consists of three key components. First, we adapt the concept of semi-supervised generative adversarial networks to the imitation learning context. Second, we employ a learnable latent distribution to align the generated and expert data distributions. Finally, we utilize a regularized information maximization approach in conjunction with an approximate label prior to further improve the semi-supervised learning performance. Experimental results demonstrate the efficiency of our method in learning multi-modal behaviors from imbalanced demonstrations compared to baseline methods.", "keywords": "Generative adversarial imitation learning;semi-supervised learning;multi-modal behaviors;imbalanced data", "primary_area": "", "supplementary_material": "/attachment/601c355c7e100074230a2cca39d822a8e21181d1.pdf", "author": "Huiqiao Fu;Kaiqiang Tang;Yuanyang Lu;Yiming Qi;Guizhou Deng;Flood Sung;Chunlin Chen", "authorids": "~Huiqiao_Fu1;~Kaiqiang_Tang1;522022150069@smail.nju.edu.cn;502022150005@smail.nju.edu.cn;gzdeng@mails.swust.edu.cn;~Flood_Sung1;~Chunlin_Chen1", "gender": "M;M;;;;M;M", "homepage": ";https://scholar.google.com/citations?user=gkKO99wAAAAJ&hl=zh-CN;;;;;https://sme.nju.edu.cn/ccl/list.htm", "dblp": "243/7065;;;;;202/2496;68/6992.html", "google_scholar": ";gkKO99wAAAAJ;;;;;", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Huiqiao_Fu1;~Kaiqiang_Tang1;522022150069@smail.nju.edu.cn;502022150005@smail.nju.edu.cn;gzdeng@mails.swust.edu.cn;~Flood_Sung1;~Chunlin_Chen1", "aff": "Nanjing University;Nanjing University;;;;Bytadance AI Lab;Nanjing University", "aff_domain": "nju.edu.cn;smail.nju.edu.cn;;;;bytedance.com;nju.edu.cn", "position": "PhD student;PhD student;;;;Researcher;Full Professor", "bibtex": "@inproceedings{\nfu2023essinfogail,\ntitle={Ess-Info{GAIL}: Semi-supervised Imitation Learning from Imbalanced Demonstrations},\nauthor={Huiqiao Fu and Kaiqiang Tang and Yuanyang Lu and Yiming Qi and Guizhou Deng and Flood Sung and Chunlin Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jxhUNLoi4m}\n}", "github": "", "project": "", "reviewers": "iRnn;18Ku;dvkr;fzCM", "pdf_size": 1912524, "rating": "4;5;7;7", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "88;123;48;462", "wc_strengths": "35;50;48;69", "wc_weaknesses": "123;177;203;160", "wc_questions": "56;57;4;58", "wc_limitations": "14;12;16;46", "wc_review": "316;419;319;795", "wc_reply_reviewers": "0;55;0;65", "wc_reply_authors": "25;44;24;46", "reply_reviewers": "0;2;0;1", "reply_authors": "2;3;2;3", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 180.25, 164.81865034030585 ], "wc_strengths_avg": [ 50.5, 12.134661099511597 ], "wc_weaknesses_avg": [ 165.75, 29.046299247924853 ], "wc_questions_avg": [ 43.75, 22.960564017462637 ], "wc_limitations_avg": [ 22.0, 13.92838827718412 ], "wc_review_avg": [ 462.25, 196.53418913766632 ], "wc_reply_reviewers_avg": [ 30.0, 30.20761493398643 ], "wc_reply_authors_avg": [ 34.75, 10.280442597476044 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4093025539641290357&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nju.edu.cn;smail.nju.edu.cn;;;;bytedance.com;nju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Nanjing University;ByteDance", "aff_unique_dep": ";AI Lab", "aff_unique_url": "https://www.nju.edu.cn;https://www.bytedance.com", "aff_unique_abbr": "Nanjing U;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Generalized test utilities for long-tail performance in extreme multi-label classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70666", "id": "jze2r6RDFz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/46994b3d6dd0fd5fca5f780af6259db5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jze2r6RDFz", "openreview": "https://openreview.net/forum?id=jze2r6RDFz", "poster": "/media/PosterPDFs/NeurIPS%202023/70666.png?t=1702171943.4034529", "slides": "https://nips.cc/virtual/2023/poster/70666", "video": "https://nips.cc/virtual/2023/poster/70666", "author_site": "Erik Schultheis, Marek Wydmuch, Wojciech Kotlowski, Rohit Babbar, Krzysztof Dembczynski", "tldr": "", "abstract": "Extreme multi-label classification (XMLC) is the task of selecting a small subset of relevant labels from a very large set of possible labels. \nAs such, it is characterized by long-tail labels, i.e., most labels have very few positive instances. With standard performance measures such as precision@k, a classifier can ignore tail labels and still report good performance. However, it is often argued that correct predictions in the tail are more \"interesting\" or \"rewarding,\" but the community has not yet settled on a metric capturing this intuitive concept. The existing propensity-scored metrics fall short on this goal by confounding the problems of long-tail and missing labels. In this paper, we analyze generalized metrics budgeted \"at k\" as an alternative solution. To tackle the challenging problem of optimizing these metrics, we formulate it in the expected test utility (ETU) framework, which aims to optimize the expected performance on a given test set. We derive optimal prediction rules and construct their computationally efficient approximations with provable regret guarantees and being robust against model misspecification. Our algorithm, based on block coordinate descent, scales effortlessly to XMLC problems and obtains promising results in terms of long-tail performance.", "keywords": "extreme multi-label classification;long-tail labels performance;complex performance measures", "primary_area": "", "supplementary_material": "", "author": "Erik Schultheis;Marek Wydmuch;Wojciech Kotlowski;Rohit Babbar;Krzysztof Dembczynski", "authorids": "~Erik_Schultheis1;~Marek_Wydmuch1;~Wojciech_Kotlowski1;~Rohit_Babbar1;~Krzysztof_Dembczynski1", "gender": ";M;M;;", "homepage": "https://www.aalto.fi/en/people/erik-schultheis;https://mwydmuch.pl;;;https://research.yahoo.com/researchers/kdembczynski", "dblp": "268/7969;180/5883;63/4977;;91/3569", "google_scholar": "MGxmO7EAAAAJ;lMXyoEAAAAAJ;;;https://scholar.google.pl/citations?user=SetMoyoAAAAJ", "orcid": "0000-0003-1685-8397;0000-0002-6598-6304;;;0000-0001-7477-6758", "linkedin": ";marekwydmuch;;;krzysztof-dembczynski-36155344/", "or_profile": "~Erik_Schultheis1;~Marek_Wydmuch1;~Wojciech_Kotlowski1;~Rohit_Babbar1;~Krzysztof_Dembczynski1", "aff": "Aalto University;Poznan University of Technology;Poznan University of Technology;;Yahoo Research", "aff_domain": "aalto.fi;put.poznan.pl;put.poznan.pl;;yahooinc.com", "position": "PhD student;PhD student;Assistant Professor;;Senior Research Scientist", "bibtex": "@inproceedings{\nschultheis2023generalized,\ntitle={Generalized test utilities for long-tail performance in extreme multi-label classification},\nauthor={Erik Schultheis and Marek Wydmuch and Wojciech Kotlowski and Rohit Babbar and Krzysztof Dembczynski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jze2r6RDFz}\n}", "github": "", "project": "", "reviewers": "m6Nv;UXPM;ELgR;4ZEa;4FGQ", "pdf_size": 991160, "rating": "5;5;5;6;7", "confidence": "5;2;3;3;3", "soundness": "3;3;3;3;3", "novelty": "3;2;2;3;3", "presentation": "2;3;2;3;3", "wc_summary": "43;39;97;87;53", "wc_strengths": "46;7;42;22;146", "wc_weaknesses": "58;48;138;90;26", "wc_questions": "29;23;25;210;40", "wc_limitations": "1;1;1;12;19", "wc_review": "177;118;303;421;284", "wc_reply_reviewers": "0;9;85;11;11", "wc_reply_authors": "0;0;76;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 63.8, 23.68459414893994 ], "wc_strengths_avg": [ 52.6, 48.775403637489255 ], "wc_weaknesses_avg": [ 72.0, 38.90501253052105 ], "wc_questions_avg": [ 65.4, 72.53854147968514 ], "wc_limitations_avg": [ 6.8, 7.4404300950953095 ], "wc_review_avg": [ 260.6, 105.25891886201377 ], "wc_reply_reviewers_avg": [ 23.2, 31.166648841349627 ], "wc_reply_authors_avg": [ 15.2, 30.400000000000002 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.15309310892394862, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4902452323564791420&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 13, "email": "aalto.fi;put.poznan.pl;put.poznan.pl;;yahooinc.com", "author_num": 5, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Aalto University;Poznan University of Technology;Yahoo", "aff_unique_dep": ";;Yahoo Research", "aff_unique_url": "https://www.aalto.fi;https://www.put.poznan.pl/;https://research.yahoo.com", "aff_unique_abbr": "Aalto;PUT;Yahoo Research", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "Finland;Poland;United States" }, { "title": "Metropolis Sampling for Constrained Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70665", "id": "jzseUq55eP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c47bfcc8e2eccdc540fad1e25f13aa4d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=jzseUq55eP", "openreview": "https://openreview.net/forum?id=jzseUq55eP", "poster": "/media/PosterPDFs/NeurIPS%202023/70665.png?t=1702061096.0423274", "slides": "https://nips.cc/virtual/2023/poster/70665", "video": "https://nips.cc/virtual/2023/poster/70665", "author_site": "Nic Fishman, Leo Klarner, Emile Mathieu, Michael Hutchinson, Valentin De Bortoli", "tldr": "", "abstract": "Denoising diffusion models have recently emerged as the predominant paradigm for generative modelling on image domains. In addition, their extension to Riemannian manifolds has facilitated a range of applications across the natural sciences. While many of these problems stand to benefit from the ability to specify arbitrary, domain-informed constraints, this setting is not covered by the existing (Riemannian) diffusion model methodology. Recent work has attempted to address this issue by constructing novel noising processes based on the reflected Brownian motion and logarithmic barrier methods. However, the associated samplers are either computationally burdensome or only apply to convex subsets of Euclidean space. In this paper, we introduce an alternative, simple noising scheme based on Metropolis sampling that affords substantial gains in computational efficiency and empirical performance compared to the earlier samplers. Of independent interest, we prove that this new process corresponds to a valid discretisation of the reflected Brownian motion. We demonstrate the scalability and flexibility of our approach on a range of problem settings with convex and non-convex constraints, including applications from geospatial modelling, robotics and protein design.", "keywords": "diffusion model;generative modelling;manifold;constraints;proteins;robotics", "primary_area": "", "supplementary_material": "", "author": "Nic Fishman;Leo Klarner;Emile Mathieu;Michael John Hutchinson;Valentin De Bortoli", "authorids": "~Nic_Fishman1;~Leo_Klarner2;~Emile_Mathieu1;~Michael_John_Hutchinson1;~Valentin_De_Bortoli1", "gender": "M;;M;M;", "homepage": "https://njw.fish;;http://emilemathieu.fr;https://mjhutchinson.github.io;https://vdeborto.github.io/", "dblp": ";;223/6084.html;352/6313.html;224/9338", "google_scholar": "saYhrnwAAAAJ;;g9BjTqgAAAAJ;ot1m2GUAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Nic_Fishman1;~Leo_Klarner2;~Emile_Mathieu1;~Michael_John_Hutchinson1;~Valentin_De_Bortoli1", "aff": "Oxofrd, University of Oxford;;University of Cambridge;University of Oxford;University of Oxford", "aff_domain": "stats.ox.ac.uk;;cam.ac.uk;ox.ac.uk;ox.ac.uk", "position": "MS student;;Postdoc;PhD student;Postdoc", "bibtex": "@inproceedings{\nfishman2023metropolis,\ntitle={Metropolis Sampling for Constrained Diffusion Models},\nauthor={Nic Fishman and Leo Klarner and Emile Mathieu and Michael John Hutchinson and Valentin De Bortoli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=jzseUq55eP}\n}", "github": "", "project": "", "reviewers": "vCsC;fvg9;u5aU;a8R9", "pdf_size": 14241082, "rating": "3;3;5;6", "confidence": "4;5;3;3", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;2", "wc_summary": "62;47;61;81", "wc_strengths": "40;32;42;68", "wc_weaknesses": "497;197;167;196", "wc_questions": "2;8;69;154", "wc_limitations": "20;1;16;1", "wc_review": "621;285;355;500", "wc_reply_reviewers": "287;0;47;38", "wc_reply_authors": "1058;36;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 62.75, 12.090802289343747 ], "wc_strengths_avg": [ 45.5, 13.518505834595775 ], "wc_weaknesses_avg": [ 264.25, 134.91733580233492 ], "wc_questions_avg": [ 58.25, 61.18159445454164 ], "wc_limitations_avg": [ 9.5, 8.616843969807043 ], "wc_review_avg": [ 440.25, 130.01033612755563 ], "wc_reply_reviewers_avg": [ 93.0, 113.38650713378554 ], "wc_reply_authors_avg": [ 273.5, 453.1696702119417 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8703882797784892, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9059692142538020771&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "stats.ox.ac.uk;;cam.ac.uk;ox.ac.uk;ox.ac.uk", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Oxford;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.cam.ac.uk", "aff_unique_abbr": "Oxford;Cambridge", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Oxford;Cambridge;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Lookaround Optimizer: $k$ steps around, 1 step average", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70664", "id": "k1Xy5zCNOJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5b4b967d4222d87fa5b28b6ec7144058-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=k1Xy5zCNOJ", "openreview": "https://openreview.net/forum?id=k1Xy5zCNOJ", "poster": "/media/PosterPDFs/NeurIPS%202023/70664.png?t=1702106112.162707", "slides": "https://nips.cc/virtual/2023/poster/70664", "video": "https://nips.cc/virtual/2023/poster/70664", "author_site": "Jiangtao Zhang, Shunyu Liu, Jie Song, Tongtian Zhu, Zhengqi Xu, Mingli Song", "tldr": "", "abstract": "Weight Average (WA) is an active research topic due to its simplicity in ensembling deep networks and the effectiveness in promoting generalization. Existing weight average approaches, however, are often carried out along only one training trajectory in a post-hoc manner (i.e., the weights are averaged after the entire training process is finished), which significantly degrades the diversity between networks and thus impairs the effectiveness. In this paper, inspired by weight average, we propose Lookaround, a straightforward yet effective SGD-based optimizer leading to flatter minima with better generalization. Specifically, Lookaround iterates two steps during the whole training period: the around step and the average step. In each iteration, 1) the around step starts from a common point and trains multiple networks simultaneously, each on transformed data by a different data augmentation, and 2) the average step averages these trained networks to get the averaged network, which serves as the starting point for the next iteration. The around step improves the functionality diversity while the average step guarantees the weight locality of these networks during the whole training, which is essential for WA to work. We theoretically explain the superiority of Lookaround by convergence analysis, and make extensive experiments to evaluate Lookaround on popular benchmarks including CIFAR and ImageNet with both CNNs and ViTs, demonstrating clear superiority over state-of-the-arts. Our code is available at https://github.com/Ardcy/Lookaround.", "keywords": "Deep Learning;Computer Vision;Mode Connectivity;Weight Average", "primary_area": "", "supplementary_material": "/attachment/8b81d9bb0790fff27f3a80e4a2b08521dc1aa2a9.zip", "author": "Jiangtao Zhang;Shunyu Liu;Jie Song;Tongtian Zhu;Zhengqi Xu;Mingli Song", "authorids": "~Jiangtao_Zhang1;~Shunyu_Liu1;~Jie_Song3;~Tongtian_Zhu1;~Zhengqi_Xu2;~Mingli_Song1", "gender": "M;;M;M;M;M", "homepage": "https://github.com/Ardcy;https://liushunyu.github.io/;https://person.zju.edu.cn/en/NB20021;https://raiden-zhu.github.io;https://github.com/hongyaohongyao;https://person.zju.edu.cn/msong", "dblp": ";235/0752-1;09/4756-11.html;323/5165;;71/5333", "google_scholar": ";4U-X6d4AAAAJ;4OjO-WYAAAAJ;QvBDUsIAAAAJ;;7oLbhAwAAAAJ", "orcid": ";0000-0003-0584-9129;0000-0003-3671-6521;;;0000-0003-2621-6048", "linkedin": ";;;;;", "or_profile": "~Jiangtao_Zhang1;~Shunyu_Liu1;~Jie_Song3;~Tongtian_Zhu1;~Zhengqi_Xu2;~Mingli_Song1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "MS student;PhD student;Assistant Professor;PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nzhang2023lookaround,\ntitle={Lookaround Optimizer: \\$k\\$ steps around, 1 step average},\nauthor={Jiangtao Zhang and Shunyu Liu and Jie Song and Tongtian Zhu and Zhengqi Xu and Mingli Song},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=k1Xy5zCNOJ}\n}", "github": "", "project": "", "reviewers": "d83u;vFuf;HsP8;V9QB;vocv;3YPD", "pdf_size": 830138, "rating": "5;5;5;6;6;8", "confidence": "3;4;2;2;3;4", "soundness": "1;3;2;4;3;3", "novelty": "2;3;3;2;2;3", "presentation": "2;2;3;3;3;4", "wc_summary": "54;121;100;45;114;103", "wc_strengths": "93;46;48;22;42;166", "wc_weaknesses": "282;583;196;94;162;144", "wc_questions": "310;107;18;19;2;161", "wc_limitations": "49;33;8;19;6;36", "wc_review": "788;890;370;199;326;610", "wc_reply_reviewers": "723;927;101;83;51;30", "wc_reply_authors": "874;1928;314;259;103;27", "reply_reviewers": "3;3;2;2;1;1", "reply_authors": "4;6;4;2;2;2", "rating_avg": [ 5.833333333333333, 1.0671873729054748 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.8333333333333335, 0.6871842709362768 ], "wc_summary_avg": [ 89.5, 29.227555491350966 ], "wc_strengths_avg": [ 69.5, 48.11704479703632 ], "wc_weaknesses_avg": [ 243.5, 162.21975835267418 ], "wc_questions_avg": [ 102.83333333333333, 108.5593795528 ], "wc_limitations_avg": [ 25.166666666666668, 15.528647361856374 ], "wc_review_avg": [ 530.5, 251.4423393146031 ], "wc_reply_reviewers_avg": [ 319.1666666666667, 363.19068300580375 ], "wc_reply_authors_avg": [ 584.1666666666666, 659.6174185760175 ], "reply_reviewers_avg": [ 2.0, 0.816496580927726 ], "reply_authors_avg": [ 3.3333333333333335, 1.49071198499986 ], "replies_avg": [ 39, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.38254602783800296, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3708517492267790983&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "L-C2ST: Local Diagnostics for Posterior Approximations in Simulation-Based Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70663", "id": "k2UVKezeWn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b0313c2f4501a81d0e0d4a1e8fbf4995-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=k2UVKezeWn", "openreview": "https://openreview.net/forum?id=k2UVKezeWn", "poster": "/media/PosterPDFs/NeurIPS%202023/70663.png?t=1701797054.9423912", "slides": "https://nips.cc/virtual/2023/poster/70663", "video": "https://nips.cc/virtual/2023/poster/70663", "author_site": "Julia Linhart, Alexandre Gramfort, Pedro Rodrigues", "tldr": "", "abstract": "Many recent works in simulation-based inference (SBI) rely on deep generative models to approximate complex, high-dimensional posterior distributions. However, evaluating whether or not these approximations can be trusted remains a challenge. Most approaches evaluate the posterior estimator only in expectation over the observation space. This limits their interpretability and is not sufficient to identify for which observations the approximation can be trusted or should be improved. Building upon the well-known classifier two-sample test (C2ST), we introduce $\\ell$-C2ST, a new method that allows for a local evaluation of the posterior estimator at any given observation. It offers theoretically grounded and easy to interpret -- e.g. graphical -- diagnostics, and unlike C2ST, does not require access to samples from the true posterior. In the case of normalizing flow-based posterior estimators, $\\ell$-C2ST can be specialized to offer better statistical power, while being computationally more efficient. On standard SBI benchmarks, $\\ell$-C2ST provides comparable results to C2ST and outperforms alternative local approaches such as coverage tests based on highest predictive density (HPD). We further highlight the importance of local evaluation and the benefit of interpretability of $\\ell$-C2ST on a challenging application from computational neuroscience.", "keywords": "machine learning;calibration;simulation-based inference;neuroscience;normalizing flows;classifier two-sample tests", "primary_area": "", "supplementary_material": "/attachment/dfd3b21ab3fd09d369d46c357046e644bfa892b0.zip", "author": "Julia Linhart;Alexandre Gramfort;Pedro L. C. Rodrigues", "authorids": "~Julia_Linhart1;~Alexandre_Gramfort1;~Pedro_L._C._Rodrigues1", "gender": "F;M;M", "homepage": ";http://alexandre.gramfort.net;https://plcrodrigues.github.io/", "dblp": ";15/7980;https://dblp.org/rec/journals/corr/abs-2211-09602", "google_scholar": "cGG7WiMAAAAJ;fhxshS0AAAAJ;8Jiux08AAAAJ", "orcid": ";0000-0001-9791-4404;", "linkedin": "julia-linhart-70817315b/;alexandregramfort/;", "or_profile": "~Julia_Linhart1;~Alexandre_Gramfort1;~Pedro_L._C._Rodrigues1", "aff": "Ecole Nationale Sup\u00e9rieure de Techniques Avanc\u00e9es;Meta;Inria", "aff_domain": "ensta.fr;meta.com;inria.fr", "position": "Assistant Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nlinhart2023lcst,\ntitle={L-C2{ST}: Local Diagnostics for Posterior Approximations in Simulation-Based Inference},\nauthor={Julia Linhart and Alexandre Gramfort and Pedro L. C. Rodrigues},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=k2UVKezeWn}\n}", "github": "", "project": "", "reviewers": "ukES;Ppab;Esye", "pdf_size": 5574974, "rating": "5;6;7", "confidence": "4;4;2", "soundness": "3;2;4", "novelty": "3;3;3", "presentation": "2;4;4", "wc_summary": "40;184;105", "wc_strengths": "58;79;94", "wc_weaknesses": "316;549;116", "wc_questions": "28;185;3", "wc_limitations": "7;28;1", "wc_review": "449;1025;319", "wc_reply_reviewers": "382;22;0", "wc_reply_authors": "860;0;0", "reply_reviewers": "3;1;0", "reply_authors": "4;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 109.66666666666667, 58.88029287366775 ], "wc_strengths_avg": [ 77.0, 14.7648230602334 ], "wc_weaknesses_avg": [ 327.0, 176.94255188243068 ], "wc_questions_avg": [ 72.0, 80.5522604690065 ], "wc_limitations_avg": [ 12.0, 11.575836902790225 ], "wc_review_avg": [ 597.6666666666666, 306.7956250593472 ], "wc_reply_reviewers_avg": [ 134.66666666666666, 175.12154509241734 ], "wc_reply_authors_avg": [ 286.6666666666667, 405.4078878802872 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844387, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17854327922214168347&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 10, "email": "ensta.fr;meta.com;inria.fr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Ecole Nationale Sup\u00e9rieure de Techniques Avanc\u00e9es;Meta;INRIA", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.ensae.fr;https://meta.com;https://www.inria.fr", "aff_unique_abbr": "ENSTA;Meta;Inria", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "France;United States" }, { "title": "Streaming Algorithms and Lower Bounds for Estimating Correlation Clustering Cost", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70662", "id": "k4ZCORSFEd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ee1a1ecc92f35702b5c29dad3dc909ea-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=k4ZCORSFEd", "openreview": "https://openreview.net/forum?id=k4ZCORSFEd", "poster": "/media/PosterPDFs/NeurIPS%202023/70662.png?t=1702181677.6630402", "slides": "https://nips.cc/virtual/2023/poster/70662", "video": "https://nips.cc/virtual/2023/poster/70662", "author_site": "Sepehr Assadi, Vihan Shah, Chen Wang", "tldr": "", "abstract": "Correlation clustering is a fundamental optimization problem at the intersection of machine learning and theoretical computer science. \nMotivated by applications to big data processing, recent years have witnessed a flurry of results on this problem in the streaming model. \nIn this model, the algorithm needs to process the input $n$-vertex graph by making one or few passes over the stream of its edges and using a limited memory, much smaller than the input size. \n\nAll previous work on streaming correlation clustering have focused on semi-streaming algorithms with $\\Omega(n)$ memory, whereas in this work, we study streaming algorithms with much smaller memory requirement of only $\\text{polylog}{(n)}$ bits. This stringent memory requirement is in the same spirit of classical streaming algorithms that instead of recovering a full solution to the problem---which can be prohibitively large with such small memory as is the case in our problem---, aimed to learn certain statistical properties of their inputs. In our case, this translates to determining the ``(correlation) clusterability'' of input graphs, or more precisely, estimating the cost of the optimal correlation clustering solution. \n\nAs our main result, we present two novel algorithms that in only $\\text{polylog}{(n)}$ space are able to estimate the optimal correlation clustering cost up to some constant multiplicative factor plus some extra additive error. One of the algorithms outputs a $3$-multiplicative approximation plus $o(n^2)$ additive approximation, and the other one improves the additive error further down at the cost of increasing the multiplicative factor to some large constant. We then present new lower bounds that justify this mix of both multiplicative and additive error approximation in our algorithms.", "keywords": "Correlation Clustering;Graph Streaming Algorithms;Large-scale Clustering;Graph Learning", "primary_area": "", "supplementary_material": "/attachment/6755f0bcba7c3c6abd84bf027da743655ae21830.pdf", "author": "Sepehr Assadi;Vihan Shah;Chen Wang", "authorids": "~Sepehr_Assadi1;~Vihan_Shah1;~Chen_Wang14", "gender": ";M;M", "homepage": "https://www.cs.rutgers.edu/~sa1497/;https://student.cs.uwaterloo.ca/~v46shah/;https://sites.google.com/view/chen-wang/home", "dblp": "125/2903;312/0996;82/4206-27", "google_scholar": "QSVAzVIAAAAJ;https://scholar.google.ca/citations?hl=en;DnrU0k0AAAAJ", "orcid": ";0009-0004-3024-9226;0000-0003-4044-9438", "linkedin": ";vihan-shah/;", "or_profile": "~Sepehr_Assadi1;~Vihan_Shah1;~Chen_Wang14", "aff": "University of Pennsylvania;Rutgers University, New Brunswick ;Google", "aff_domain": "upenn.edu;rutgers.edu;google.com", "position": "PhD student;MS student;Intern", "bibtex": "@inproceedings{\nassadi2023streaming,\ntitle={Streaming Algorithms and Lower Bounds for Estimating Correlation Clustering Cost},\nauthor={Sepehr Assadi and Vihan Shah and Chen Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=k4ZCORSFEd}\n}", "github": "", "project": "", "reviewers": "YPdG;E8E2;nMDo;DYBZ", "pdf_size": 2028307, "rating": "3;7;7;8", "confidence": "3;3;3;4", "soundness": "2;4;3;3", "novelty": "1;3;4;4", "presentation": "4;3;4;4", "wc_summary": "101;315;97;356", "wc_strengths": "24;91;79;109", "wc_weaknesses": "503;19;66;108", "wc_questions": "204;63;13;2", "wc_limitations": "10;6;11;17", "wc_review": "842;494;266;592", "wc_reply_reviewers": "0;0;17;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.920286436967152 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 1.224744871391589 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 217.25, 119.1435583655281 ], "wc_strengths_avg": [ 75.75, 31.728339067779768 ], "wc_weaknesses_avg": [ 174.0, 192.5396063151683 ], "wc_questions_avg": [ 70.5, 80.43164800002546 ], "wc_limitations_avg": [ 11.0, 3.9370039370059056 ], "wc_review_avg": [ 548.5, 206.64643718196547 ], "wc_reply_reviewers_avg": [ 4.25, 7.361215932167728 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5261522196019801, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1916688278369549454&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "upenn.edu;rutgers.edu;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Pennsylvania;Rutgers University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.upenn.edu;https://www.rutgers.edu;https://www.google.com", "aff_unique_abbr": "UPenn;Rutgers;Google", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";New Brunswick;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "BenchCLAMP: A Benchmark for Evaluating Language Models on Syntactic and Semantic Parsing", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73488", "id": "k4juAEW1tG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9c1535a02f0ce079433344e14d910597-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=k4juAEW1tG", "openreview": "https://openreview.net/forum?id=k4juAEW1tG", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73488", "video": "https://nips.cc/virtual/2023/poster/73488", "author_site": "Subhro Roy, Samuel Thomson, Tongfei Chen, Richard Shin, Adam Pauls, Jason Eisner, Benjamin Van Durme", "tldr": "", "abstract": "Recent work has shown that generation from a prompted or fine-tuned language model can perform well at semantic parsing when the output is constrained to be a valid semantic representation. We introduce BenchCLAMP, a Benchmark to evaluate Constrained LAnguage Model Parsing, that includes context-free grammars for seven semantic parsing datasets and two syntactic parsing datasets with varied output meaning representations, as well as a constrained decoding interface to generate only valid outputs covered by these grammars. We provide low, medium, and high resource splits for each dataset, allowing accurate comparison of various language models under different data regimes. Our benchmark supports evaluation of language models using prompt-based learning as well as fine-tuning. We benchmark seven language models, including two GPT-3 variants available only through an API. Our experiments show that encoder-decoder pretrained language models can achieve similar performance or even surpass state-of-the-art methods for both syntactic and semantic parsing when the model output is constrained to be valid.", "keywords": "Syntactic parsing;semantic parsing;benchmark;constrained decoding;GPT-3", "primary_area": "", "supplementary_material": "", "author": "Subhro Roy;Sam Thomson;Tongfei Chen;Richard Shin;Adam Pauls;Jason Eisner;Benjamin Van Durme", "authorids": "~Subhro_Roy1;~Sam_Thomson2;~Tongfei_Chen1;~Richard_Shin1;~Adam_Pauls1;~Jason_Eisner1;~Benjamin_Van_Durme2", "gender": "M;;M;M;M;M;", "homepage": "https://sroy9.github.io/;;http://cs.jhu.edu/~tongfei;https://rshin.github.io;;http://cs.jhu.edu/~jason;", "dblp": "47/9962;;137/9630;13/8735;24/5967;37/3263;", "google_scholar": "l2pAq_0AAAAJ;;_OS1gScAAAAJ;xPnkc80AAAAJ;;tjb2UccAAAAJ;", "orcid": ";;;;;0000-0002-8861-0772;", "linkedin": ";;;;;;", "or_profile": "~Subhro_Roy1;~Sam_Thomson2;~Tongfei_Chen1;~Richard_Shin1;~Adam_Pauls1;~Jason_Eisner1;~Benjamin_Van_Durme2", "aff": "Microsoft Semantic Machines;;Microsoft;Microsoft;Microsoft;Microsoft;", "aff_domain": "microsoft.com;;microsoft.com;microsoft.com;microsoft.com;microsoft.com;", "position": "Senior Researcher;;Researcher;Researcher;Principal Researcher;Director of Research ;", "bibtex": "@inproceedings{\nroy2023benchclamp,\ntitle={Bench{CLAMP}: A Benchmark for Evaluating Language Models on Syntactic and Semantic Parsing},\nauthor={Subhro Roy and Sam Thomson and Tongfei Chen and Richard Shin and Adam Pauls and Jason Eisner and Benjamin Van Durme},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=k4juAEW1tG}\n}", "github": "", "project": "", "reviewers": "ZUCA;1bZU;T3oy;pzWP", "pdf_size": 217610, "rating": "6;6;6;8", "confidence": "3;3;4;4", "wc_summary_and_contributions": "75;54;90;45", "wc_strengths": "88;57;86;36", "wc_improvement": "153;66;64;6", "wc_limitations": "10;1;9;3", "wc_correctness": "14;1;14;69", "wc_clarity": "5;1;6;128", "wc_relation_to_prior_work": "5;1;1;3", "wc_documentation": "5;1;1;3", "wc_additional_feedback": "1;1;1;1", "wc_review": "356;183;272;294", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;125;0;55", "reply_reviewers": "0;0;0;0", "reply_authors": "0;1;0;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 66.0, 17.621010186706094 ], "wc_strengths_avg": [ 66.75, 21.579793789561567 ], "wc_improvement_avg": [ 72.25, 52.48035346679746 ], "wc_limitations_avg": [ 5.75, 3.832427429188973 ], "wc_correctness_avg": [ 24.5, 26.23451924468981 ], "wc_clarity_avg": [ 35.0, 53.72615750265414 ], "wc_relation_to_prior_work_avg": [ 2.5, 1.6583123951777 ], "wc_documentation_avg": [ 2.5, 1.6583123951777 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 276.25, 62.02570031849701 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 45.0, 51.35659646043534 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.5, 0.5 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9502307350898382361&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "microsoft.com;;microsoft.com;microsoft.com;microsoft.com;microsoft.com;", "author_num": 7, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Semantic Machines", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "L2T-DLN: Learning to Teach with Dynamic Loss Network", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70661", "id": "k6yNi6DEqK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8667f264f88c7938a73a53ab01eb1327-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=k6yNi6DEqK", "openreview": "https://openreview.net/forum?id=k6yNi6DEqK", "poster": "/media/PosterPDFs/NeurIPS%202023/70661.png?t=1697531769.1287503", "slides": "https://nips.cc/virtual/2023/poster/70661", "video": "https://nips.cc/virtual/2023/poster/70661", "author_site": "Zhaoyang Hai, Liyuan Pan, Xiabi Liu, Zhengzheng Liu, Mirna Yunita", "tldr": "", "abstract": "With the concept of teaching being introduced to the machine learning community, a teacher model start using dynamic loss functions to teach the training of a student model. The dynamic intends to set adaptive loss functions to different phases of student model learning. In existing works, the teacher model 1) merely determines the loss function based on the present states of the student model, e.g., disregards the experience of the teacher; 2) only utilizes the states of the student model, e.g., training iteration number and loss/accuracy from training/validation sets, while ignoring the states of the loss function. In this paper, we first formulate the loss adjustment as a temporal task by designing a teacher model with memory units, and, therefore, enables the student learning to be guided by the experience of the teacher model. Then, with a Dynamic Loss Network, we can additionally use the states of the loss to assist the teacher learning in enhancing the interactions between the teacher and the student model. \n Extensive experiments demonstrate our approach can enhance student learning and improve the performance of various deep models on real-world tasks, including classification, objective detection, and semantic segmentation scenario.", "keywords": "Learning to teach;dynamic loss function;optimization", "primary_area": "", "supplementary_material": "/attachment/1d422939b579e7f5f3d988dc4b369c9032890029.pdf", "author": "Zhaoyang Hai;Liyuan Pan;Xiabi Liu;Zhengzheng Liu;Mirna Yunita", "authorids": "~Zhaoyang_Hai2;~Liyuan_Pan1;~Xiabi_Liu1;~Zhengzheng_Liu1;~Mirna_Yunita1", "gender": "M;F;M;F;F", "homepage": "https://github.com/1017137588qqcom;https://scholar.google.com/citations?hl=en&user=kAt6-AIAAAAJ;;https://blog.csdn.net/loveliuzz;", "dblp": "257/2945;199/2150;;;359/6325.html", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;;qFtyfAIAAAAJ", "orcid": ";;;;0000-0001-7569-1772", "linkedin": ";;;;", "or_profile": "~Zhaoyang_Hai2;~Liyuan_Pan1;~Xiabi_Liu1;~Zhengzheng_Liu1;~Mirna_Yunita1", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn", "position": "PhD student;Associate Professor;Full Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nhai2023ltdln,\ntitle={L2T-{DLN}: Learning to Teach with Dynamic Loss Network},\nauthor={Zhaoyang Hai and Liyuan Pan and Xiabi Liu and Zhengzheng Liu and Mirna Yunita},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=k6yNi6DEqK}\n}", "github": "", "project": "", "reviewers": "9Vkw;HMfR;wSe9;79Na", "pdf_size": 9881894, "rating": "3;5;5;7", "confidence": "3;3;3;4", "soundness": "2;2;3;4", "novelty": "1;2;2;4", "presentation": "1;3;2;3", "wc_summary": "82;95;70;68", "wc_strengths": "23;66;51;109", "wc_weaknesses": "353;62;87;67", "wc_questions": "2;81;155;29", "wc_limitations": "2;13;57;38", "wc_review": "462;317;420;311", "wc_reply_reviewers": "0;38;141;0", "wc_reply_authors": "0;52;191;0", "reply_reviewers": "0;1;2;0", "reply_authors": "1;2;3;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 1.0897247358851685 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 78.75, 10.80219885023415 ], "wc_strengths_avg": [ 62.25, 31.09159854365806 ], "wc_weaknesses_avg": [ 142.25, 122.03559931429845 ], "wc_questions_avg": [ 66.75, 58.32827359008665 ], "wc_limitations_avg": [ 27.5, 21.453437952924933 ], "wc_review_avg": [ 377.5, 65.24760532004221 ], "wc_reply_reviewers_avg": [ 44.75, 57.69477879323223 ], "wc_reply_authors_avg": [ 60.75, 78.13889876367595 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8147451930370669495&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 8, "email": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Beijing Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.bit.edu.cn/", "aff_unique_abbr": "BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "PDF: Point Diffusion Implicit Function for Large-scale Scene Neural Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70660", "id": "k8U8ZijXHh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0073cc73e1873b35345209b50a3dab66-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=k8U8ZijXHh", "openreview": "https://openreview.net/forum?id=k8U8ZijXHh", "poster": "/media/PosterPDFs/NeurIPS%202023/70660.png?t=1702145214.2185407", "slides": "https://nips.cc/virtual/2023/poster/70660", "video": "https://nips.cc/virtual/2023/poster/70660", "author_site": "Yuhan Ding, Fukun Yin, Jiayuan Fan, Hui Li, Xin Chen, Wen Liu, Chongshan Lu, Gang Yu, Tao Chen", "tldr": "", "abstract": "Recent advances in implicit neural representations have achieved impressive results by sampling and fusing individual points along sampling rays in the sampling space. However, due to the explosively growing sampling space, finely representing and synthesizing detailed textures remains a challenge for unbounded large-scale outdoor scenes. To alleviate the dilemma of using individual points to perceive the entire colossal space, we explore learning the surface distribution of the scene to provide structural priors and reduce the samplable space and propose a Point Diffusion implicit Function, PDF, for large-scale scene neural representation. The core of our method is a large-scale point cloud super-resolution diffusion module that enhances the sparse point cloud reconstructed from several training images into a dense point cloud as an explicit prior. Then in the rendering stage, only sampling points with prior points within the sampling radius are retained. That is, the sampling space is reduced from the unbounded space to the scene surface. Meanwhile, to fill in the background of the scene that cannot be provided by point clouds, the region sampling based on Mip-NeRF 360 is employed to model the background representation. Expensive experiments have demonstrated the effectiveness of our method for large-scale scene novel view synthesis, which outperforms relevant state-of-the-art baselines.", "keywords": "implicit neural representation; diffusion; point cloud; volume rendering", "primary_area": "", "supplementary_material": "/attachment/419a58ba47a61846c040427c7963902a1b23905b.pdf", "author": "Yuhan Ding;Fukun Yin;Jiayuan Fan;Hui Li;Xin Chen;Wen Liu;Chongshan Lu;Gang YU;Tao Chen", "authorids": "~Yuhan_Ding1;~Fukun_Yin1;~Jiayuan_Fan2;~Hui_Li15;~Xin_Chen16;~Wen_Liu2;~Chongshan_Lu1;~Gang_YU2;~Tao_Chen6", "gender": "M;M;F;;M;M;M;M;M", "homepage": "https://github.com/YuhanDing;https://fukunyin.github.io/;http://faet.fudan.edu.cn/e4/39/c23898a255033/page.htm;https://github.com/spicyduck;https://chenxin.tech/;https://github.com/StevenLiuWen;https://github.com/luchongshan;https://skicyyu.org/;https://eetchen.github.io/", "dblp": ";272/0842;76/10698;;24/1518-40;61/372-3;;;69/510-3", "google_scholar": ";HGFT79EAAAAJ;https://scholar.google.com.hk/citations?user=gsLd2ccAAAAJ;;7qeAJZ4AAAAJ;A6K6bkoAAAAJ;;https://scholar.google.com.sg/citations?user=BJdigYsAAAAJ;https://scholar.google.com.sg/citations?user=w3OoFL0AAAAJ", "orcid": ";;0000-0002-4983-1353;;0000-0002-9347-1367;;;0000-0001-5570-2710;", "linkedin": ";;;;xin-chen-cs/;;;;", "or_profile": "~Yuhan_Ding1;~Fukun_Yin1;~Jiayuan_Fan2;~Hui_Li15;~Xin_Chen16;~Wen_Liu2;~Chongshan_Lu1;~Gang_YU2;~Tao_Chen6", "aff": "Fudan University;Tencent PCG ;Fudan University;Fudan University;Tencent;Tencent PCG;Fudan University;Tencent;Fudan University", "aff_domain": "fudan.edu.cn;tencent.com;fudan.edu;fudan.edu.cn;tencent.com;tencent.com;fudan.edu.cn;tencent.com;fudan.edu.cn", "position": "MS student;Intern;Assistant Professor;MS student;Researcher;Researcher;MS student;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nding2023pdf,\ntitle={{PDF}: Point Diffusion Implicit Function for Large-scale Scene Neural Representation},\nauthor={Yuhan Ding and Fukun Yin and Jiayuan Fan and Hui Li and Xin Chen and Wen Liu and Chongshan Lu and Gang YU and Tao Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=k8U8ZijXHh}\n}", "github": "", "project": "", "reviewers": "rzgR;yNkC;v7wN;H9co;DZNn", "pdf_size": 20871239, "rating": "4;4;6;6;6", "confidence": "4;4;5;4;4", "soundness": "2;2;3;3;3", "novelty": "1;2;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "53;82;51;115;168", "wc_strengths": "65;40;46;138;133", "wc_weaknesses": "187;152;279;153;215", "wc_questions": "95;29;4;98;49", "wc_limitations": "11;18;8;2;43", "wc_review": "411;321;388;506;608", "wc_reply_reviewers": "0;117;79;194;213", "wc_reply_authors": "0;397;268;221;193", "reply_reviewers": "0;2;2;2;2", "reply_authors": "1;3;3;3;3", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 93.8, 43.796803536331275 ], "wc_strengths_avg": [ 84.4, 42.56101502549017 ], "wc_weaknesses_avg": [ 197.2, 47.135549217124854 ], "wc_questions_avg": [ 55.0, 36.774991502378356 ], "wc_limitations_avg": [ 16.4, 14.263239463740346 ], "wc_review_avg": [ 446.8, 100.05478499302271 ], "wc_reply_reviewers_avg": [ 120.6, 77.73441966079119 ], "wc_reply_authors_avg": [ 215.8, 128.58833539633366 ], "reply_reviewers_avg": [ 1.6, 0.8000000000000002 ], "reply_authors_avg": [ 2.6, 0.8000000000000002 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12511787400762390367&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "fudan.edu.cn;tencent.com;fudan.edu;fudan.edu.cn;tencent.com;tencent.com;fudan.edu.cn;tencent.com;fudan.edu.cn", "author_num": 9, "aff_unique_index": "0;1;0;0;1;1;0;1;0", "aff_unique_norm": "Fudan University;Tencent", "aff_unique_dep": ";PCG (Platform and Content Group)", "aff_unique_url": "https://www.fudan.edu.cn;https://www.tencent.com", "aff_unique_abbr": "Fudan;Tencent PCG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Open Compound Domain Adaptation with Object Style Compensation for Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70659", "id": "k9zSU3pdi4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c74a3a6f44a44b204e26b1a6d7fe4a66-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=k9zSU3pdi4", "openreview": "https://openreview.net/forum?id=k9zSU3pdi4", "poster": "/media/PosterPDFs/NeurIPS%202023/70659.png?t=1697113269.3928685", "slides": "https://nips.cc/virtual/2023/poster/70659", "video": "https://nips.cc/virtual/2023/poster/70659", "author_site": "Tingliang Feng, Hao Shi, Xueyang Liu, Wei Feng, Liang Wan, Yanlin Zhou, Di Lin", "tldr": "", "abstract": "Many methods of semantic image segmentation have borrowed the success of open compound domain adaptation. They minimize the style gap between the images of source and target domains, more easily predicting the accurate pseudo annotations for target domain's images that train segmentation network. The existing methods globally adapt the scene style of the images, whereas the object styles of different categories or instances are adapted improperly. This paper proposes the Object Style Compensation, where we construct the Object-Level Discrepancy Memory with multiple sets of discrepancy features. The discrepancy features in a set capture the style changes of the same category's object instances adapted from target to source domains. We learn the discrepancy features from the images of source and target domains, storing the discrepancy features in memory. With this memory, we select appropriate discrepancy features for compensating the style information of the object instances of various categories, adapting the object styles to a unified style of source domain. Our method enables a more accurate computation of the pseudo annotations for target domain's images, thus yielding state-of-the-art results on different datasets.", "keywords": "Object Style Compensation;Open Compound Domain Adaptation;Semantic Segmentation", "primary_area": "", "supplementary_material": "/attachment/ef6de5e63c82bf56b6bc2c9defcf2975d542e16f.pdf", "author": "Tingliang Feng;Hao Shi;Xueyang Liu;Wei Feng;Liang Wan;Yanlin Zhou;Di Lin", "authorids": "~Tingliang_Feng1;~Hao_Shi3;~Xueyang_Liu2;~Wei_Feng1;~Liang_Wan1;~Yanlin_Zhou3;~Di_Lin3", "gender": "M;M;M;M;F;M;M", "homepage": ";https://shihao1895.github.io;https://github.com/liuxuey;;http://cic.tju.edu.cn/faculty/lwan/index.html;;https://dilincv.github.io/", "dblp": "235/0433;;;17/1152-5;;;20/3191-2.html", "google_scholar": "JXKFHZ8AAAAJ;tSK_CIAAAAAJ;;https://scholar.google.co.jp/citations?user=7ory1i8AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?view_op=list_works;rW0r-hMAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Tingliang_Feng1;~Hao_Shi3;~Xueyang_Liu2;~Wei_Feng1;~Liang_Wan1;~Yanlin_Zhou3;~Di_Lin3", "aff": "Tianjin University;Tianjin University;Tianjin University;Tianjin University;Tianjin University;DUNHUANG ACADEMY;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;dha.ac.cn;tju.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Full Professor;Full Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nfeng2023open,\ntitle={Open Compound Domain Adaptation with Object Style Compensation for Semantic Segmentation},\nauthor={Tingliang Feng and Hao Shi and Xueyang Liu and Wei Feng and Liang Wan and Yanlin Zhou and Di Lin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=k9zSU3pdi4}\n}", "github": "", "project": "", "reviewers": "okqW;CD2C;uGsB;XmuL;LZYW", "pdf_size": 1386331, "rating": "3;5;5;6;6", "confidence": "4;3;5;5;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;4;2;3;2", "wc_summary": "140;54;99;118;83", "wc_strengths": "25;72;33;34;20", "wc_weaknesses": "378;62;245;161;50", "wc_questions": "4;2;75;14;57", "wc_limitations": "6;1;10;11;9", "wc_review": "553;191;462;338;219", "wc_reply_reviewers": "40;19;22;0;0", "wc_reply_authors": "604;28;100;98;98", "reply_reviewers": "1;1;1;0;0", "reply_authors": "4;2;3;3;3", "rating_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 98.8, 29.403401163810965 ], "wc_strengths_avg": [ 36.8, 18.345571672749806 ], "wc_weaknesses_avg": [ 179.2, 122.15629332948834 ], "wc_questions_avg": [ 30.4, 29.897157055479376 ], "wc_limitations_avg": [ 7.4, 3.6110940170535577 ], "wc_review_avg": [ 352.6, 138.78414895080778 ], "wc_reply_reviewers_avg": [ 16.2, 15.051910177781425 ], "wc_reply_authors_avg": [ 185.6, 210.9839804345344 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 3.0, 0.6324555320336759 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13204173754768719064&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;dha.ac.cn;tju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Tianjin University;Dunhuang Academy", "aff_unique_dep": ";", "aff_unique_url": "http://www.tju.edu.cn;", "aff_unique_abbr": "TJU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Discovering General Reinforcement Learning Algorithms with Adversarial Environment Design", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70658", "id": "kAU6Cdq1gV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fce2d8a485746f76aac7b5650db2679d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kAU6Cdq1gV", "openreview": "https://openreview.net/forum?id=kAU6Cdq1gV", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70658", "video": "https://nips.cc/virtual/2023/poster/70658", "author_site": "Matthew T Jackson, Minqi Jiang, Jack Parker-Holder, Risto Vuorio, Chris Lu, Greg Farquhar, Shimon Whiteson, Jakob Foerster", "tldr": "", "abstract": "The past decade has seen vast progress in deep reinforcement learning (RL) on the back of algorithms manually designed by human researchers. Recently, it has been shown that it is possible to meta-learn update rules, with the hope of discovering algorithms that can perform well on a wide range of RL tasks. Despite impressive initial results from algorithms such as Learned Policy Gradient (LPG), there remains a generalization gap when these algorithms are applied to unseen environments. In this work, we examine how characteristics of the meta-training distribution impact the generalization performance of these algorithms. Motivated by this analysis and building on ideas from Unsupervised Environment Design (UED), we propose a novel approach for automatically generating curricula to maximize the regret of a meta-learned optimizer, in addition to a novel approximation of regret, which we name algorithmic regret (AR). The result is our method, General RL Optimizers Obtained Via Environment Design (GROOVE). In a series of experiments, we show that GROOVE achieves superior generalization to LPG, and evaluate AR against baseline metrics from UED, identifying it as a critical component of environment design in this setting. We believe this approach is a step towards the discovery of truly general RL algorithms, capable of solving a wide range of real-world environments.", "keywords": "Reinforcement Learning;Meta-Learning;Meta-RL;Meta-Optimization;Policy Meta-Optimization;Environment Design;Unsupervised Environment Design;Auto-Curricula", "primary_area": "", "supplementary_material": "/attachment/1eebf043a8ce5c0574b550245fb23bb95085bf8b.pdf", "author": "Matthew Thomas Jackson;Minqi Jiang;Jack Parker-Holder;Risto Vuorio;Chris Lu;Gregory Farquhar;Shimon Whiteson;Jakob Nicolaus Foerster", "authorids": "~Matthew_Thomas_Jackson1;~Minqi_Jiang1;~Jack_Parker-Holder1;~Risto_Vuorio1;~Chris_Lu1;~Gregory_Farquhar1;~Shimon_Whiteson1;~Jakob_Nicolaus_Foerster1", "gender": "M;M;M;;M;;M;", "homepage": "https://matthewtjackson.com;https://twitter.com/minqijiang;https://jparkerholder.github.io/;;https://greg-farquhar.github.io/;;https://www.jakobfoerster.com;https://vuoristo.github.io/", "dblp": "331/5748;270/7949;237/9793.html;77/9579;195/5653;https://dblp.uni-trier.de/pers/w/Whiteson:Shimon.html;176/5095;222/2614", "google_scholar": "SdGawnwAAAAJ;;;4WLoIRsAAAAJ;6Z-RC-QAAAAJ;;6z4lQzMAAAAJ;qCk3GFAAAAAJ", "orcid": ";;;;;;;", "linkedin": "matthew-t-jackson/;minqi-jiang-585a6536/;;;;;;", "or_profile": "~Matthew_Thomas_Jackson1;~Minqi_Jiang1;~Jack_Parker-Holder1;~Chris_Lu1;~Gregory_Farquhar1;~Shimon_Whiteson1;~Jakob_Nicolaus_Foerster1;~Risto_Ilkka_Antero_Vuorio1", "aff": "University of Oxford;University College London;Google DeepMind;University of Oxford;Google DeepMind;University of Oxford;University of Oxford, University of Oxford;Department of Computer Science, University of Oxford", "aff_domain": "oxford.ac.uk;ucl.ac.uk;google.com;ox.ac.uk;google.com;ox.ac.uk;eng.ox.ac.uk;cs.ox.ac.uk", "position": "PhD student;PhD;Researcher;PhD student;Research Scientist;Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\njackson2023discovering,\ntitle={Discovering General Reinforcement Learning Algorithms with Adversarial Environment Design},\nauthor={Matthew Thomas Jackson and Minqi Jiang and Jack Parker-Holder and Risto Vuorio and Chris Lu and Gregory Farquhar and Shimon Whiteson and Jakob Nicolaus Foerster},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kAU6Cdq1gV}\n}", "github": "", "project": "", "reviewers": "4F7T;qWd8;8DkV;f1yc;TbjH", "pdf_size": 1166152, "rating": "4;5;5;6;7", "confidence": "4;2;5;3;2", "soundness": "2;3;2;3;3", "novelty": "3;2;3;2;2", "presentation": "1;3;2;3;3", "wc_summary": "81;76;48;110;91", "wc_strengths": "20;47;37;26;40", "wc_weaknesses": "356;151;91;69;58", "wc_questions": "3;11;63;8;21", "wc_limitations": "1;1;3;1;5", "wc_review": "461;286;242;214;215", "wc_reply_reviewers": "185;43;108;22;0", "wc_reply_authors": "173;0;141;0;0", "reply_reviewers": "1;1;3;1;0", "reply_authors": "2;1;3;1;1", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 81.2, 20.272148381461694 ], "wc_strengths_avg": [ 34.0, 9.736529155710468 ], "wc_weaknesses_avg": [ 145.0, 110.28871202439532 ], "wc_questions_avg": [ 21.2, 21.710826792179056 ], "wc_limitations_avg": [ 2.2, 1.6 ], "wc_review_avg": [ 283.6, 92.47399634491849 ], "wc_reply_reviewers_avg": [ 71.6, 67.21487930510624 ], "wc_reply_authors_avg": [ 62.8, 77.5768006558662 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5717718748968658, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6505493538064914072&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "oxford.ac.uk;ucl.ac.uk;google.com;ox.ac.uk;google.com;ox.ac.uk;eng.ox.ac.uk;cs.ox.ac.uk", "author_num": 8, "aff_unique_index": "0;1;2;0;2;0;0;0", "aff_unique_norm": "University of Oxford;University College London;Google", "aff_unique_dep": ";;Google DeepMind", "aff_unique_url": "https://www.ox.ac.uk;https://www.ucl.ac.uk;https://deepmind.com", "aff_unique_abbr": "Oxford;UCL;DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Oxford", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "SAME: Uncovering GNN Black Box with Structure-aware Shapley-based Multipiece Explanations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70657", "id": "kBBsj9KRgh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/14cdc9013d80338bf81483a7736ea05c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kBBsj9KRgh", "openreview": "https://openreview.net/forum?id=kBBsj9KRgh", "poster": "/media/PosterPDFs/NeurIPS%202023/70657.png?t=1700470630.960106", "slides": "https://nips.cc/virtual/2023/poster/70657", "video": "https://nips.cc/virtual/2023/poster/70657", "author_site": "Ziyuan Ye, Rihan Huang, Qilin Wu, Quanying Liu", "tldr": "", "abstract": "Post-hoc explanation techniques on graph neural networks (GNNs) provide economical solutions for opening the black-box graph models without model retraining. Many GNN explanation variants have achieved state-of-the-art explaining results on a diverse set of benchmarks, while they rarely provide theoretical analysis for their inherent properties and explanatory capability. In this work, we propose $\\underline{\\text{S}}$tructure-$\\underline{\\text{A}}$ware Shapley-based $\\underline{\\text{M}}$ultipiece $\\underline{\\text{E}}$xplanation (SAME) method to address the structure-aware feature interactions challenges for GNNs explanation. Specifically, SAME leverages an expansion-based Monte Carlo tree search to explore the multi-grained structure-aware connected substructure. Afterward, the explanation results are encouraged to be informative of the graph properties by optimizing the combination of distinct single substructures. With the consideration of fair feature interactions in the process of investigating multiple connected important substructures, the explanation provided by SAME has the potential to be as explainable as the theoretically optimal explanation obtained by the Shapley value within polynomial time. Extensive experiments on real-world and synthetic benchmarks show that SAME improves the previous state-of-the-art fidelity performance by 12.9\\% on BBBP, 7.01\\% on MUTAG, 42.3\\% on Graph-SST2, 38.9\\% on Graph-SST5, 11.3\\% on BA-2Motifs and 18.2\\% on BA-Shapes under the same testing condition. Code is available at https://github.com/same2023neurips/same.", "keywords": "GNN explainability;Shapley value;Monte Carlo tree search;structure awareness;multi-grained explanation", "primary_area": "", "supplementary_material": "", "author": "Ziyuan Ye;Rihan Huang;Qilin Wu;Quanying Liu", "authorids": "~Ziyuan_Ye1;~Rihan_Huang1;kyrinwu@gmail.com;~Quanying_Liu1", "gender": "M;M;;F", "homepage": "https://voldet.github.io/;;;", "dblp": "251/5430;;;", "google_scholar": "CmzVixkAAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-8370-7524;;;0000-0002-2501-7656", "linkedin": ";rihan-huang-98750a276/;;", "or_profile": "~Ziyuan_Ye1;~Rihan_Huang1;kyrinwu@gmail.com;~Quanying_Liu1", "aff": "Southern University of Science and Technology;Southern University of Science and Technology;;Southern University of Science and Technology", "aff_domain": "sustech.edu.cn;sustech.edu.cn;;sustech.edu.cn", "position": "MS student;Undergrad student;;Assistant Professor", "bibtex": "@inproceedings{\nye2023same,\ntitle={{SAME}: Uncovering {GNN} Black Box with Structure-aware Shapley-based Multipiece Explanations},\nauthor={Ziyuan Ye and Rihan Huang and Qilin Wu and Quanying Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kBBsj9KRgh}\n}", "github": "", "project": "", "reviewers": "dDYx;xcjE;8xQs;qWxr", "pdf_size": 4396380, "rating": "5;6;6;6", "confidence": "4;3;4;2", "soundness": "1;2;3;3", "novelty": "2;3;2;2", "presentation": "2;2;3;2", "wc_summary": "98;60;77;57", "wc_strengths": "40;202;96;63", "wc_weaknesses": "672;260;163;69", "wc_questions": "100;14;310;14", "wc_limitations": "1;26;191;1", "wc_review": "911;562;837;204", "wc_reply_reviewers": "268;16;0;27", "wc_reply_authors": "714;0;251;0", "reply_reviewers": "2;1;0;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 73.0, 16.32482771731451 ], "wc_strengths_avg": [ 100.25, 62.02570031849701 ], "wc_weaknesses_avg": [ 291.0, 230.10323769995068 ], "wc_questions_avg": [ 109.5, 120.9659042871172 ], "wc_limitations_avg": [ 54.75, 79.32330994102553 ], "wc_review_avg": [ 628.5, 277.444138521613 ], "wc_reply_reviewers_avg": [ 77.75, 110.25963676704181 ], "wc_reply_authors_avg": [ 241.25, 291.54362881050923 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6217552675332162173&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "sustech.edu.cn;sustech.edu.cn;;sustech.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Southern University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.sustech.edu.cn", "aff_unique_abbr": "SUSTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Coherent Soft Imitation Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70656", "id": "kCCD8d2aEu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2f0435cffef91068ced08d7c7d8e643e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kCCD8d2aEu", "openreview": "https://openreview.net/forum?id=kCCD8d2aEu", "poster": "/media/PosterPDFs/NeurIPS%202023/70656.png?t=1701593722.961266", "slides": "https://nips.cc/virtual/2023/poster/70656", "video": "https://nips.cc/virtual/2023/poster/70656", "author_site": "Joe Watson, Sandy Huang, Nicolas Heess", "tldr": "", "abstract": "Imitation learning methods seek to learn from an expert either through behavioral cloning (BC) for the policy or inverse reinforcement learning (IRL) for the reward.\nSuch methods enable agents to learn complex tasks from humans that are difficult to capture with hand-designed reward functions.\nChoosing between BC or IRL for imitation depends on the quality and state-action coverage of the demonstrations, as well as additional access to the Markov decision process. \nHybrid strategies that combine BC and IRL are rare, as initial policy optimization against inaccurate rewards diminishes the benefit of pretraining the policy with BC.\nOur work derives an imitation method that captures the strengths of both BC and IRL.\nIn the entropy-regularized (`soft') reinforcement learning setting, we show that the behavioral-cloned policy can be used as both a shaped reward and a critic hypothesis space by inverting the regularized policy update. \nThis coherency facilitates fine-tuning cloned policies using the reward estimate and additional interactions with the environment.\nThis approach conveniently achieves imitation learning through initial behavioral cloning and subsequent refinement via RL with online or offline data sources.\nThe simplicity of the approach enables graceful scaling to high-dimensional and vision-based tasks, with stable learning and minimal hyperparameter tuning, in contrast to adversarial approaches.\nFor the open-source implementation and simulation results, see https://joemwatson.github.io/csil/.", "keywords": "imitation learning;inverse reinforcement learning;behavioral cloning;learning from demonstration", "primary_area": "", "supplementary_material": "/attachment/d25f87dd545064dd40dddd90b5c7a63db3b9d0fd.zip", "author": "Joe Watson;Sandy Huang;Nicolas Heess", "authorids": "~Joe_Watson1;~Sandy_Huang1;~Nicolas_Heess1", "gender": "M;F;", "homepage": "http://joemwatson.github.io/;https://shhuang.github.io/;", "dblp": "143/2943;153/7841;76/9181", "google_scholar": "https://scholar.google.co.uk/citations?user=xLtXIZAAAAAJ;eurA6WgAAAAJ;79k7bGEAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Joe_Watson1;~Sandy_Huang1;~Nicolas_Heess1", "aff": "TU Darmstadt;Google DeepMind;Google DeepMind", "aff_domain": "tu-darmstadt.de;deepmind.com;google.com", "position": "PhD student;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nwatson2023coherent,\ntitle={Coherent Soft Imitation Learning},\nauthor={Joe Watson and Sandy Huang and Nicolas Heess},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kCCD8d2aEu}\n}", "github": "", "project": "", "reviewers": "BA8W;nMWn;qELY;tsdg", "pdf_size": 18629244, "rating": "6;7;7;7", "confidence": "4;3;3;3", "soundness": "3;4;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "96;113;135;72", "wc_strengths": "42;28;42;49", "wc_weaknesses": "481;27;268;106", "wc_questions": "88;401;57;42", "wc_limitations": "9;35;28;25", "wc_review": "716;604;530;294", "wc_reply_reviewers": "507;18;14;21", "wc_reply_authors": "1761;18;17;21", "reply_reviewers": "4;1;1;1", "reply_authors": "6;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 104.0, 23.075961518428652 ], "wc_strengths_avg": [ 40.25, 7.628073151196179 ], "wc_weaknesses_avg": [ 220.5, 173.68721887346805 ], "wc_questions_avg": [ 147.0, 147.5821804961561 ], "wc_limitations_avg": [ 24.25, 9.522998477370455 ], "wc_review_avg": [ 536.0, 154.6156525064652 ], "wc_reply_reviewers_avg": [ 140.0, 211.90210003678587 ], "wc_reply_authors_avg": [ 454.25, 754.4539001821119 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 3.0, 1.7320508075688772 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5373918021379123580&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "tu-darmstadt.de;deepmind.com;google.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.tu-darmstadt.de;https://deepmind.com", "aff_unique_abbr": "TU Darmstadt;DeepMind", "aff_campus_unique_index": "0", "aff_campus_unique": "Darmstadt;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Germany;United Kingdom" }, { "title": "Can Pre-Trained Text-to-Image Models Generate Visual Goals for Reinforcement Learning?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70655", "id": "kChEBODIx9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7866ff509c822c2e58d20d00154a15a2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kChEBODIx9", "openreview": "https://openreview.net/forum?id=kChEBODIx9", "poster": "/media/PosterPDFs/NeurIPS%202023/70655.png?t=1701744573.1663618", "slides": "https://nips.cc/virtual/2023/poster/70655", "video": "https://nips.cc/virtual/2023/poster/70655", "author_site": "Jialu Gao, Kaizhe Hu, Guowei Xu, Huazhe Xu", "tldr": "", "abstract": "Pre-trained text-to-image generative models can produce diverse, semantically rich, and realistic images from natural language descriptions. Compared with language, images usually convey information with more details and less ambiguity. In this study, we propose Learning from the Void (LfVoid), a method that leverages the power of pre-trained text-to-image models and advanced image editing techniques to guide robot learning. Given natural language instructions, LfVoid can edit the original observations to obtain goal images, such as \"wiping\" a stain off a table. Subsequently, LfVoid trains an ensembled goal discriminator on the generated image to provide reward signals for a reinforcement learning agent, guiding it to achieve the goal. The ability of LfVoid to learn with zero in-domain training on expert demonstrations or true goal observations (the void) is attributed to the utilization of knowledge from web-scale generative models. We evaluate LfVoid across three simulated tasks and validate its feasibility in the corresponding real-world scenarios. In addition, we offer insights into the key considerations for the effective integration of visual generative models into robot learning workflows. We posit that our work represents an initial step towards the broader application of pre-trained visual generative models in the robotics field. Our project page: https://lfvoid-rl.github.io/.", "keywords": "Visual Reinforcement Learning;Large Generative Models;Image Editing;Robotics", "primary_area": "", "supplementary_material": "/attachment/39b3ddc8b00e238754edff0009bc8b3a44269652.pdf", "author": "Jialu Gao;Kaizhe Hu;Guowei Xu;Huazhe Xu", "authorids": "~Jialu_Gao1;~Kaizhe_Hu1;~Guowei_Xu2;~Huazhe_Xu1", "gender": "F;M;M;M", "homepage": "https://gaojl19.github.io;https://hukz18.github.io/;https://xugw-kevin.github.io/;http://hxu.rocks", "dblp": "256/3849;330/4940;11/7718-1;164/9006", "google_scholar": "PokBJE0AAAAJ;mPpYLhcAAAAJ;7xKdIM4AAAAJ;t9HPFawAAAAJ", "orcid": ";;;", "linkedin": "danikagao/;%E5%BC%80%E5%93%B2-%E8%83%A1-40137718a/?miniProfileUrn=urn%3Ali%3Afs_miniProfile%3AACoAACyMbIEBJhMDJ4b7wLQyHotP_JGOnWDoEDU;;", "or_profile": "~Jialu_Gao1;~Kaizhe_Hu1;~Guowei_Xu2;~Huazhe_Xu1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "Undergrad student;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\ngao2023can,\ntitle={Can Pre-Trained Text-to-Image Models Generate Visual Goals for Reinforcement Learning?},\nauthor={Jialu Gao and Kaizhe Hu and Guowei Xu and Huazhe Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kChEBODIx9}\n}", "github": "", "project": "", "reviewers": "WQFw;NX6j;pQmz;jYPU", "pdf_size": 11385508, "rating": "5;5;6;7", "confidence": "4;2;3;4", "soundness": "3;2;3;3", "novelty": "1;2;4;3", "presentation": "2;2;4;4", "wc_summary": "41;72;73;55", "wc_strengths": "48;33;98;57", "wc_weaknesses": "227;247;228;20", "wc_questions": "17;102;84;206", "wc_limitations": "8;2;11;28", "wc_review": "341;456;494;366", "wc_reply_reviewers": "12;31;13;17", "wc_reply_authors": "84;22;7;11", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 60.25, 13.216939887886303 ], "wc_strengths_avg": [ 59.0, 24.093567606313517 ], "wc_weaknesses_avg": [ 180.5, 93.0067201873069 ], "wc_questions_avg": [ 102.25, 67.75830207435838 ], "wc_limitations_avg": [ 12.25, 9.65336728815391 ], "wc_review_avg": [ 414.25, 62.84256121451448 ], "wc_reply_reviewers_avg": [ 18.25, 7.595228765481656 ], "wc_reply_authors_avg": [ 31.0, 31.088583113419627 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4545454545454545, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8795614338823957697&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Limits, approximation and size transferability for GNNs on sparse graphs via graphops", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70654", "id": "kDQwossJuI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8154c89c8d3612d39fd1ed6a20f4bab1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kDQwossJuI", "openreview": "https://openreview.net/forum?id=kDQwossJuI", "poster": "/media/PosterPDFs/NeurIPS%202023/70654.png?t=1702396185.0622878", "slides": "https://nips.cc/virtual/2023/poster/70654", "video": "https://nips.cc/virtual/2023/poster/70654", "author_site": "Thien Le, Stefanie Jegelka", "tldr": "", "abstract": "Can graph neural networks generalize to graphs that are different from the graphs they were trained on, e.g., in size? In this work, we study this question from a theoretical perspective. While recent work established such transferability and approximation results via graph limits, e.g., via graphons, these only apply nontrivially to dense graphs. To include frequently encountered sparse graphs such as bounded-degree or power law graphs, we take a perspective of taking limits of operators derived from graphs, such as the aggregation operation that makes up GNNs. This leads to the recently introduced limit notion of graphops (Backhausz and Szegedy, 2022). We demonstrate how the operator perspective allows us to develop quantitative bounds on the distance between a finite GNN and its limit on an infinite graph, as well as the distance between the GNN on graphs of different sizes that share structural properties, under a regularity assumption verified for various graph sequences. Our results hold for dense and sparse graphs, and various notions of graph limits.", "keywords": "graph neural networks;convolution;graph limits;size transferability", "primary_area": "", "supplementary_material": "", "author": "Thien Le;Stefanie Jegelka", "authorids": "~Thien_Le1;~Stefanie_Jegelka3", "gender": "M;F", "homepage": "https://steven-le-thien.github.io;http://people.csail.mit.edu/stefje/", "dblp": "194/5549;38/7003", "google_scholar": "WhFGh74AAAAJ;gTWUZlsAAAAJ", "orcid": "0000-0001-5476-8451;", "linkedin": ";", "or_profile": "~Thien_Le1;~Stefanie_Jegelka3", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nle2023limits,\ntitle={Limits, approximation and size transferability for {GNN}s on sparse graphs via graphops},\nauthor={Thien Le and Stefanie Jegelka},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kDQwossJuI}\n}", "github": "", "project": "", "reviewers": "E8cZ;jR1r;3WrU;QqZq", "pdf_size": 661022, "rating": "3;5;6;6", "confidence": "2;1;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "2;3;2;3", "wc_summary": "125;70;105;40", "wc_strengths": "96;53;75;36", "wc_weaknesses": "198;58;227;147", "wc_questions": "248;65;749;22", "wc_limitations": "127;47;6;1", "wc_review": "794;293;1162;246", "wc_reply_reviewers": "206;36;105;38", "wc_reply_authors": "453;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 85.0, 32.59601202601324 ], "wc_strengths_avg": [ 65.0, 22.616365755797283 ], "wc_weaknesses_avg": [ 157.5, 64.18917354196111 ], "wc_questions_avg": [ 271.0, 288.7256483238024 ], "wc_limitations_avg": [ 45.25, 50.45976119642264 ], "wc_review_avg": [ 623.75, 377.75281269634513 ], "wc_reply_reviewers_avg": [ 96.25, 69.18227735482549 ], "wc_reply_authors_avg": [ 113.25, 196.15475395717536 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4923659639173309, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16732665148219987617&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "mit.edu;mit.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "kJHKkRAZ0W", "title": "NTKCPL: Active Learning on Top of Self-Supervised Model by Estimating True Coverage", "track": "main", "status": "Reject", "tldr": "", "abstract": "High annotation cost has driven extensive research in active learning and self-supervised learning. Recent research has shown that in the context of supervised learning, when we have different numbers of labels, we need to apply different active learning strategies to ensure that it outperforms the random baseline. This number of annotations that change the suitable active learning strategy is called the phase transition point. We found, however, when combining active learning with self-supervised models to achieve improved performance, the phase transition point occurs earlier. It becomes challenging to determine which strategy should be used for previously unseen datasets. We argue that existing active learning algorithms are heavily influenced by the phase transition because the empirical risk over the entire active learning pool estimated by these algorithms is inaccurate and influenced by the number of labeled samples. To address this issue, we propose a novel active learning strategy, neural tangent kernel clustering-pseudo-labels (NTKCPL). It estimates empirical risk based on pseudo-labels and the model prediction with NTK approximation. We analyze the factors affecting this approximation error and design a pseudo-label clustering generation method to reduce the approximation error. Finally, our method was validated on five datasets, empirically demonstrating that it outperforms the baseline methods in most cases and is valid over a longer range of training budgets.", "keywords": "active learning;low budget;neural tangent kernel;pseudo-label", "primary_area": "", "supplementary_material": "/attachment/f21a29362b6274514805073a6337c0fb1f987f55.zip", "author": "Ziting Wen;Oscar Pizarro;Stefan B. Williams", "authorids": "~Ziting_Wen1;~Oscar__Pizarro1;~Stefan_B._Williams1", "gender": ";;M", "homepage": "https://zitingw.github.io/;;", "dblp": "315/4143;60/931.html;38/1594", "google_scholar": "AC1AmJ4AAAAJ;Jt1tl0YAAAAJ;vxN3VO0AAAAJ", "orcid": "0000-0002-6251-5726;0000-0001-6612-2738;0000-0001-9416-5639", "linkedin": ";;stefan-williams-88216b23", "or_profile": "~Ziting_Wen1;~Oscar__Pizarro1;~Stefan_B._Williams1", "aff": "University of Sydney;University of Sydney;University of Sydney", "aff_domain": "sydney.edu.au;usyd.edu.au;usyd.edu.au", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@misc{\nwen2023ntkcpl,\ntitle={{NTKCPL}: Active Learning on Top of Self-Supervised Model by Estimating True Coverage},\nauthor={Ziting Wen and Oscar Pizarro and Stefan B. Williams},\nyear={2023},\nurl={https://openreview.net/forum?id=kJHKkRAZ0W}\n}", "github": "", "project": "", "reviewers": "YYhz;d28k;G9yf;UZLm;Pudn;VAm9", "site": "https://openreview.net/forum?id=kJHKkRAZ0W", "pdf_size": 1318512, "rating": "4;5;5;6;6;7", "confidence": "3;3;3;2;3;2", "soundness": "2;3;3;3;3;3", "novelty": "2;3;3;3;4;3", "presentation": "1;3;2;2;2;3", "wc_summary": "201;33;135;111;73;50", "wc_strengths": "47;88;53;142;47;48", "wc_weaknesses": "519;49;428;192;74;47", "wc_questions": "6;73;35;69;34;42", "wc_limitations": "15;63;9;11;9;18", "wc_review": "788;306;660;525;237;205", "wc_reply_reviewers": "74;52;459;14;0;0", "wc_reply_authors": "138;25;1518;0;0;0", "reply_reviewers": "1;2;2;1;0;0", "reply_authors": "2;2;6;1;1;1", "rating_avg": [ 5.5, 0.9574271077563381 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 2.1666666666666665, 0.6871842709362768 ], "wc_summary_avg": [ 100.5, 56.662009612555515 ], "wc_strengths_avg": [ 70.83333333333333, 34.963870240896135 ], "wc_weaknesses_avg": [ 218.16666666666666, 188.8186225514375 ], "wc_questions_avg": [ 43.166666666666664, 22.69667131737359 ], "wc_limitations_avg": [ 20.833333333333332, 19.134756044666183 ], "wc_review_avg": [ 453.5, 219.85809817546712 ], "wc_reply_reviewers_avg": [ 99.83333333333333, 162.91962912907567 ], "wc_reply_authors_avg": [ 280.1666666666667, 555.7326745437554 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.1666666666666665, 1.7716909687891083 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7385489458759964, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12032848778651175667&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Sydney", "aff_unique_dep": "", "aff_unique_url": "https://www.sydney.edu.au", "aff_unique_abbr": "USYD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "On the Identifiability of Sparse ICA without Assuming Non-Gaussianity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70653", "id": "kJIibP5bq2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/95b7a93e60fdfd10cc202f44fd6adf5f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kJIibP5bq2", "openreview": "https://openreview.net/forum?id=kJIibP5bq2", "poster": "/media/PosterPDFs/NeurIPS%202023/70653.png?t=1701917019.5689137", "slides": "https://nips.cc/virtual/2023/poster/70653", "video": "https://nips.cc/virtual/2023/poster/70653", "author_site": "Ignavier Ng, Yujia Zheng, Xinshuai Dong, Kun Zhang", "tldr": "", "abstract": "Independent component analysis (ICA) is a fundamental statistical tool used to reveal hidden generative processes from observed data. However, traditional ICA approaches struggle with the rotational invariance inherent in Gaussian distributions, often necessitating the assumption of non-Gaussianity in the underlying sources. This may limit their applicability in broader contexts. To accommodate Gaussian sources, we develop an identifiability theory that relies on second-order statistics without imposing further preconditions on the distribution of sources, by introducing novel assumptions on the connective structure from sources to observed variables. Different from recent work that focuses on potentially restrictive connective structures, our proposed assumption of structural variability is both considerably less restrictive and provably necessary. Furthermore, we propose two estimation methods based on second-order statistics and sparsity constraint. Experimental results are provided to validate our identifiability theory and estimation methods.", "keywords": "independent component analysis;second-order statistics;sparsity", "primary_area": "", "supplementary_material": "", "author": "Ignavier Ng;Yujia Zheng;Xinshuai Dong;Kun Zhang", "authorids": "~Ignavier_Ng1;~Yujia_Zheng1;~Xinshuai_Dong1;~Kun_Zhang1", "gender": "M;M;M;M", "homepage": "https://ignavierng.github.io/;https://yjzheng.com;https://dongxinshuai.github.io/;http://www.andrew.cmu.edu/user/kunz1/", "dblp": "251/3037;245/6109-1.html;279/6151.html;96/3115-1", "google_scholar": ";https://scholar.google.co.uk/citations?user=ioiW248AAAAJ;A7JyL1sAAAAJ;RGoypN4AAAAJ", "orcid": ";0009-0003-5225-6366;;", "linkedin": ";;;", "or_profile": "~Ignavier_Ng1;~Yujia_Zheng1;~Xinshuai_Dong1;~Kun_Zhang1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nng2023on,\ntitle={On the Identifiability of Sparse {ICA} without Assuming Non-Gaussianity},\nauthor={Ignavier Ng and Yujia Zheng and Xinshuai Dong and Kun Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kJIibP5bq2}\n}", "github": "", "project": "", "reviewers": "ouDH;6AWD;zk7j;vUDk", "pdf_size": 606305, "rating": "4;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "126;60;126;186", "wc_strengths": "118;69;81;100", "wc_weaknesses": "552;220;216;66", "wc_questions": "146;106;71;87", "wc_limitations": "61;2;38;1", "wc_review": "1003;457;532;440", "wc_reply_reviewers": "575;99;25;0", "wc_reply_authors": "721;401;24;0", "reply_reviewers": "4;2;1;0", "reply_authors": "6;7;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 124.5, 44.572973874310875 ], "wc_strengths_avg": [ 92.0, 18.641351882307248 ], "wc_weaknesses_avg": [ 263.5, 177.75474677206233 ], "wc_questions_avg": [ 102.5, 28.00446392988089 ], "wc_limitations_avg": [ 25.5, 25.342651794948374 ], "wc_review_avg": [ 608.0, 230.66534200004995 ], "wc_reply_reviewers_avg": [ 174.75, 233.9341520599333 ], "wc_reply_authors_avg": [ 286.5, 297.0223055597003 ], "reply_reviewers_avg": [ 1.75, 1.479019945774904 ], "reply_authors_avg": [ 4.0, 2.5495097567963922 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4511586963599728936&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cmu.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "When Do Graph Neural Networks Help with Node Classification? Investigating the Homophily Principle on Node Distinguishability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70652", "id": "kJmYu3Ti2z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5ba11de4c74548071899cf41dec078bf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kJmYu3Ti2z", "openreview": "https://openreview.net/forum?id=kJmYu3Ti2z", "poster": "/media/PosterPDFs/NeurIPS%202023/70652.png?t=1697583473.3488278", "slides": "https://nips.cc/virtual/2023/poster/70652", "video": "https://nips.cc/virtual/2023/poster/70652", "author_site": "Sitao Luan, Chenqing Hua, Minkai Xu, Qincheng Lu, Jiaqi Zhu, Xiao-Wen Chang, Jie Fu, Jure Leskovec, Doina Precup", "tldr": "", "abstract": "Homophily principle, i.e., nodes with the same labels are more likely to be connected, has been believed to be the main reason for the performance superiority of Graph Neural Networks (GNNs) over Neural Networks on node classification tasks. Recent research suggests that, even in the absence of homophily, the advantage of GNNs still exists as long as nodes from the same class share similar neighborhood patterns. However, this argument only considers intra-class Node Distinguishability (ND) but neglects inter-class ND, which provides incomplete understanding of homophily on GNNs. In this paper, we first demonstrate such deficiency with examples and argue that an ideal situation for ND is to have smaller intra-class ND than inter-class ND. To formulate this idea and study ND deeply, we propose Contextual Stochastic Block Model for Homophily (CSBM-H) and define two metrics, Probabilistic Bayes Error (PBE) and negative generalized Jeffreys divergence, to quantify ND. With the metrics, we visualize and analyze how graph filters, node degree distributions and class variances influence ND, and investigate the combined effect of intra- and inter-class ND. Besides, we discovered the mid-homophily pitfall, which occurs widely in graph datasets. Furthermore, we verified that, in real-work tasks, the superiority of GNNs is indeed closely related to both intra- and inter-class ND regardless of homophily levels. Grounded in this observation, we propose a new hypothesis-testing based performance metric beyond homophily, which is non-linear, feature-based and can provide statistical threshold value for GNNs' the superiority. Experiments indicate that it is significantly more effective than the existing homophily metrics on revealing the advantage and disadvantage of graph-aware modes on both synthetic and benchmark real-world datasets.", "keywords": "Graph Neural Networks;Homophily;Heterophily;Low-pass filter;High-pass filter;Node Distinguishability;Metrics", "primary_area": "", "supplementary_material": "/attachment/f5c26aca53a504596df3546580eac289d086714e.pdf", "author": "Sitao Luan;Chenqing Hua;Minkai Xu;Qincheng Lu;Jiaqi Zhu;Xiao-Wen Chang;Jie Fu;Jure Leskovec;Doina Precup", "authorids": "~Sitao_Luan1;~Chenqing_Hua1;~Minkai_Xu1;~Qincheng_Lu1;~Jiaqi_Zhu1;~Xiao-Wen_Chang1;~Jie_Fu2;~Jure_Leskovec1;~Doina_Precup1", "gender": "M;Non-Binary;M;;;M;;F;M", "homepage": ";https://willhua127.github.io/;https://minkaixu.com;https://github.com/wzzlcss;https://github.com/jzhu1905;https://www.cs.mcgill.ca/~chang;http://cs.stanford.edu/~jure/;http://cs.mcgill.ca/~dprecup/;https://bigaidream.github.io/", "dblp": "249/2879;272/8791;257/3355;;;;l/JureLeskovec;p/DoinaPrecup;", "google_scholar": "Ouoi7yYAAAAJ;Lxe71v4AAAAJ;https://scholar.google.com/citations?hl=en;;;;Q_kKkIUAAAAJ;https://scholar.google.com.tw/citations?user=j54VcVEAAAAJ;66osleIAAAAJ", "orcid": ";;;;;;0000-0002-5411-923X;;0000-0002-4494-843X", "linkedin": ";willhua/;;;;;leskovec/;;", "or_profile": "~Sitao_Luan1;~Chenqing_Hua1;~Minkai_Xu1;~Qincheng_Lu1;~Jiaqi_Zhu1;~Xiao-Wen_Chang1;~Jure_Leskovec1;~Doina_Precup1;~Jie_Fu1", "aff": "McGill University;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Stanford University;McGill University;;McGill University;Kumo.AI;McGill University;Beijing Academy of Artificial Intelligence", "aff_domain": "mcgill.ca;mila.umontreal.ca;stanford.edu;mcgill.ca;;mcgill.ca;kumo.ai;mcgill.ca;baai.ac.cn", "position": "PhD student;MS student;PhD student;PhD student;;Associate Professor;Chief Scientist;Associate Professor;Researcher", "bibtex": "@inproceedings{\nluan2023when,\ntitle={When Do Graph Neural Networks Help with Node Classification? Investigating the Homophily Principle on Node Distinguishability},\nauthor={Sitao Luan and Chenqing Hua and Minkai Xu and Qincheng Lu and Jiaqi Zhu and Xiao-Wen Chang and Jie Fu and Jure Leskovec and Doina Precup},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kJmYu3Ti2z}\n}", "github": "", "project": "", "reviewers": "GCuM;7wh2;NHN6;RYrF;FryU", "pdf_size": 3263353, "rating": "3;5;6;7;8", "confidence": "2;3;4;3;1", "soundness": "3;3;4;3;4", "novelty": "2;2;3;3;4", "presentation": "4;2;3;3;4", "wc_summary": "65;163;115;54;119", "wc_strengths": "20;34;62;35;49", "wc_weaknesses": "41;328;232;81;1", "wc_questions": "89;53;52;69;1", "wc_limitations": "7;4;13;1;85", "wc_review": "222;582;474;240;255", "wc_reply_reviewers": "138;38;18;12;0", "wc_reply_authors": "818;351;17;10;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "3;3;2;2;1", "rating_avg": [ 5.8, 1.7204650534085253 ], "confidence_avg": [ 2.6, 1.019803902718557 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 103.2, 39.61009972216682 ], "wc_strengths_avg": [ 40.0, 14.324803663575986 ], "wc_weaknesses_avg": [ 136.6, 123.5744310122446 ], "wc_questions_avg": [ 52.8, 29.178073959739017 ], "wc_limitations_avg": [ 22.0, 31.74901573277509 ], "wc_review_avg": [ 354.6, 146.0158895463093 ], "wc_reply_reviewers_avg": [ 41.2, 49.93756101372993 ], "wc_reply_authors_avg": [ 239.2, 318.3177029321492 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.15958626340564358, "gs_citation": 88, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5497721246052170047&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "mcgill.ca;mila.umontreal.ca;stanford.edu;mcgill.ca;;mcgill.ca;kumo.ai;mcgill.ca;baai.ac.cn", "author_num": 9, "aff_unique_index": "0;1;2;0;0;3;0;4", "aff_unique_norm": "McGill University;University of Montreal;Stanford University;Kumo.AI;Beijing Academy of Artificial Intelligence", "aff_unique_dep": ";Montreal Institute for Learning Algorithms;;;", "aff_unique_url": "https://www.mcgill.ca;https://www.mila.quebec;https://www.stanford.edu;https://www.kumo.ai;https://www.baaic.cn", "aff_unique_abbr": "McGill;MILA;Stanford;Kumo.AI;BAAI", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Montreal;Stanford", "aff_country_unique_index": "0;0;1;0;0;1;0;2", "aff_country_unique": "Canada;United States;China" }, { "title": "On Learning Necessary and Sufficient Causal Graphs", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70651", "id": "kKFDMtpeDW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/837b396039248acb08c385bebb6291b4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kKFDMtpeDW", "openreview": "https://openreview.net/forum?id=kKFDMtpeDW", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70651", "video": "https://nips.cc/virtual/2023/poster/70651", "author_site": "Hengrui Cai, Yixin Wang, Michael Jordan, Rui Song", "tldr": "", "abstract": "The causal revolution has stimulated interest in understanding complex relationships in various fields. Most of the existing methods aim to discover causal relationships among all variables within a complex large-scale graph. However, in practice, only a small subset of variables in the graph are relevant to the outcomes of interest. Consequently, causal estimation with the full causal graph---particularly given limited data---could lead to numerous *falsely discovered, spurious* variables that exhibit high correlation with, but exert no causal impact on, the target outcome. In this paper, we propose learning a class of *necessary and sufficient causal graphs (NSCG)* that exclusively comprises causally relevant variables for an outcome of interest, which we term *causal features*. The key idea is to employ *probabilities of causation* to systematically evaluate the importance of features in the causal graph, allowing us to identify a subgraph relevant to the outcome of interest. To learn NSCG from data, we develop a *necessary and sufficient causal structural learning (NSCSL)* algorithm, by establishing theoretical properties and relationships between probabilities of causation and natural causal effects of features. Across empirical studies of simulated and real data, we demonstrate that NSCSL outperforms existing algorithms and can reveal crucial yeast genes for target heritable traits of interest.", "keywords": "Causal structural learning;Necessity and sufficiency;Natural causal effects;Probabilities of causation;Variable selection", "primary_area": "", "supplementary_material": "/attachment/3862d1632492abcc872010e8103cc5fd7c1ac2f3.zip", "author": "Hengrui Cai;Yixin Wang;Michael Jordan;Rui Song", "authorids": "~Hengrui_Cai1;~Yixin_Wang1;~Michael_Jordan1;~Rui_Song2", "gender": "F;;M;", "homepage": "https://hengruicai.github.io/;;http://www.cs.berkeley.edu/~jordan/;https://song-ray.github.io/", "dblp": "277/5831;;j/MichaelIJordan;01/2743-6.html", "google_scholar": ";gFLW9qcAAAAJ;https://scholar.google.com.tw/citations?user=yxUduqMAAAAJ;", "orcid": ";0000-0002-6617-4842;0000-0001-8935-817X;0000-0003-1875-2115", "linkedin": "hengrui-cai-b1a6a5b9/;;;", "or_profile": "~Hengrui_Cai1;~Yixin_Wang1;~Michael_Jordan1;~Rui_Song2", "aff": "University of California, Irvine;University of Michigan - Ann Arbor;University of California, Berkeley;North Carolina State University", "aff_domain": "uci.edu;umich.edu;berkeley.edu;ncsu.edu", "position": "Assistant Professor;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ncai2023on,\ntitle={On Learning Necessary and Sufficient Causal Graphs},\nauthor={Hengrui Cai and Yixin Wang and Michael Jordan and Rui Song},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kKFDMtpeDW}\n}", "github": "", "project": "", "reviewers": "hThF;Qe9n;LXJs;hnpE;vBqs", "pdf_size": 677087, "rating": "6;6;6;6;6", "confidence": "3;4;2;3;4", "soundness": "3;3;2;3;3", "novelty": "3;3;2;2;2", "presentation": "3;2;3;3;2", "wc_summary": "56;36;81;132;81", "wc_strengths": "39;38;22;137;59", "wc_weaknesses": "198;95;252;178;370", "wc_questions": "465;111;139;75;88", "wc_limitations": "21;3;36;1;24", "wc_review": "779;283;530;523;622", "wc_reply_reviewers": "135;28;33;45;202", "wc_reply_authors": "583;73;142;138;562", "reply_reviewers": "2;1;1;1;2", "reply_authors": "3;2;3;3;4", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 77.2, 32.183225444321145 ], "wc_strengths_avg": [ 59.0, 40.728368491752775 ], "wc_weaknesses_avg": [ 218.6, 90.98043745773045 ], "wc_questions_avg": [ 175.6, 146.32785107422305 ], "wc_limitations_avg": [ 17.0, 13.251415018781957 ], "wc_review_avg": [ 547.4, 161.25086046282047 ], "wc_reply_reviewers_avg": [ 88.6, 68.81453334870476 ], "wc_reply_authors_avg": [ 299.6, 224.26288145834567 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 3.0, 0.6324555320336759 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13870260457378811668&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uci.edu;umich.edu;berkeley.edu;ncsu.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of California, Irvine;University of Michigan;University of California, Berkeley;North Carolina State University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uci.edu;https://www.umich.edu;https://www.berkeley.edu;https://www.ncsu.edu", "aff_unique_abbr": "UCI;UM;UC Berkeley;NCSU", "aff_campus_unique_index": "0;1;2", "aff_campus_unique": "Irvine;Ann Arbor;Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "ConDaFormer: Disassembled Transformer with Local Structure Enhancement for 3D Point Cloud Understanding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70650", "id": "kKXJkiniOx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4b4f1272c73d5afd222b6dd3391c3f77-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kKXJkiniOx", "openreview": "https://openreview.net/forum?id=kKXJkiniOx", "poster": "/media/PosterPDFs/NeurIPS%202023/70650.png?t=1699777604.979557", "slides": "https://nips.cc/virtual/2023/poster/70650", "video": "https://nips.cc/virtual/2023/poster/70650", "author_site": "Lunhao Duan, Shanshan Zhao, Nan Xue, Mingming Gong, Gui-Song Xia, Dacheng Tao", "tldr": "", "abstract": "Transformers have been recently explored for 3D point cloud understanding with impressive progress achieved. A large number of points, over 0.1 million, make the global self-attention infeasible for point cloud data. Thus, most methods propose to apply the transformer in a local region, e.g., spherical or cubic window. However, it still contains a large number of Query-Key pairs, which requires high computational costs. In addition, previous methods usually learn the query, key, and value using a linear projection without modeling the local 3D geometric structure. In this paper, we attempt to reduce the costs and model the local geometry prior by developing a new transformer block, named ConDaFormer. Technically, ConDaFormer disassembles the cubic window into three orthogonal 2D planes, leading to fewer points when modeling the attention in a similar range. The disassembling operation is beneficial to enlarging the range of attention without increasing the computational complexity, but ignores some contexts. To provide a remedy, we develop a local structure enhancement strategy that introduces a depth-wise convolution before and after the attention. This scheme can also capture the local geometric information. Taking advantage of these designs, ConDaFormer captures both long-range contextual information and local priors. The effectiveness is demonstrated by experimental results on several 3D point cloud understanding benchmarks. Our code will be available.", "keywords": "Point Cloud;Transformer;3D Segmentation;3D object detection", "primary_area": "", "supplementary_material": "/attachment/d1fac67e8d5371c8adeab5290acd7e6c4d57cb45.pdf", "author": "Lunhao Duan;Shanshan Zhao;Nan Xue;Mingming Gong;Gui-Song Xia;Dacheng Tao", "authorids": "~Lunhao_Duan1;~Shanshan_Zhao2;~Nan_Xue1;~Mingming_Gong1;~Gui-Song_Xia3;~Dacheng_Tao1", "gender": "M;M;M;M;;", "homepage": ";https://sshan-zhao.github.io/;https://xuenan.net;https://mingming-gong.github.io/;;", "dblp": "261/9477;;153/8762-1;98/8479;;", "google_scholar": ";https://scholar.google.com.hk/citations?hl=zh-CN;CKTrWqYAAAAJ;https://scholar.google.com.au/citations?user=6BmiCJIAAAAJ;;", "orcid": "0000-0003-4536-8039;0000-0003-0682-8645;;0000-0001-7147-5589;;", "linkedin": ";;;;;", "or_profile": "~Lunhao_Duan1;~Shanshan_Zhao2;~Nan_Xue1;~Mingming_Gong1;~Gui-Song_Xia3;~Dacheng_Tao1", "aff": "Wuhan University;JD Explore Academy;Wuhan University;University of Melbourne;;", "aff_domain": "whu.edu.cn;jd.com;whu.edu.cn;unimelb.edu.au;;", "position": "PhD student;Researcher;Research Associate Professor;Assistant Professor;;", "bibtex": "@inproceedings{\nduan2023condaformer,\ntitle={ConDaFormer: Disassembled Transformer with Local Structure Enhancement for 3D Point Cloud Understanding},\nauthor={Lunhao Duan and Shanshan Zhao and Nan Xue and Mingming Gong and Gui-Song Xia and Dacheng Tao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kKXJkiniOx}\n}", "github": "", "project": "", "reviewers": "Kwdg;teZK;JCcK;32ZL;BFnK", "pdf_size": 13831261, "rating": "5;5;5;6;6", "confidence": "5;5;5;4;4", "soundness": "3;3;3;3;3", "novelty": "3;3;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "59;70;54;54;203", "wc_strengths": "79;48;88;110;118", "wc_weaknesses": "34;66;119;97;62", "wc_questions": "15;5;5;4;48", "wc_limitations": "7;5;1;1;43", "wc_review": "194;194;267;266;474", "wc_reply_reviewers": "23;22;61;21;19", "wc_reply_authors": "35;35;42;35;35", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.0, 57.79619364629474 ], "wc_strengths_avg": [ 88.6, 24.751565607047972 ], "wc_weaknesses_avg": [ 75.6, 29.493050028777965 ], "wc_questions_avg": [ 15.4, 16.788091017146648 ], "wc_limitations_avg": [ 11.4, 15.969971822141703 ], "wc_review_avg": [ 279.0, 102.75018248158979 ], "wc_reply_reviewers_avg": [ 29.2, 15.954936540143304 ], "wc_reply_authors_avg": [ 36.4, 2.8 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4517950755713343068&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "whu.edu.cn;jd.com;whu.edu.cn;unimelb.edu.au;;", "author_num": 6, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Wuhan University;JD;University of Melbourne", "aff_unique_dep": ";JD Explore Academy;", "aff_unique_url": "http://www.whu.edu.cn/;;https://www.unimelb.edu.au", "aff_unique_abbr": "WHU;;UniMelb", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;2", "aff_country_unique": "China;;Australia" }, { "title": "On the choice of Perception Loss Function for Learned Video Compression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70649", "id": "kLIieSS2P3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/96d328a1f6d8396d8c8a62f2beee252a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kLIieSS2P3", "openreview": "https://openreview.net/forum?id=kLIieSS2P3", "poster": "/media/PosterPDFs/NeurIPS%202023/70649.png?t=1699687097.1213682", "slides": "https://nips.cc/virtual/2023/poster/70649", "video": "https://nips.cc/virtual/2023/poster/70649", "author_site": "Sadaf Salehkalaibar, Truong Buu Phan, Jun Chen, Wei Yu, Ashish Khisti", "tldr": "", "abstract": "We study causal, low-latency, sequential video compression when the output is subjected to both a mean squared-error (MSE) distortion loss as well as a perception loss to target realism. Motivated by prior approaches, we consider two different perception loss functions (PLFs). The first, PLF-JD, considers the joint distribution (JD) of all the video frames up to the current one, while the second metric, PLF-FMD, considers the framewise marginal distributions (FMD) between the source and reconstruction. Using information theoretic analysis and deep-learning based experiments, we demonstrate that the choice of PLF can have a significant effect on the reconstruction, especially at low-bit rates. In particular, while the reconstruction based on PLF-JD can better preserve the temporal correlation across frames, it also imposes a significant penalty in distortion compared to PLF-FMD and further makes it more difficult to recover from errors made in the earlier output frames. Although the choice of PLF decisively affects reconstruction quality, we also demonstrate that it may not be essential to commit to a particular PLF during encoding and the choice of PLF can be delegated to the decoder. In particular, encoded representations generated by training a system to minimize the MSE (without requiring either PLF) can be {\\em near universal} and can generate close to optimal reconstructions for either choice of PLF at the decoder. We validate our results using (one-shot) information-theoretic analysis, detailed study of the rate-distortion-perception tradeoff of the Gauss-Markov source model as well as deep-learning based experiments on moving MNIST and KTH datasets.", "keywords": "Video Compression;Information Theory;Neural Compression", "primary_area": "", "supplementary_material": "/attachment/7d704447f10ec10bb26885f927ad184fd091874d.pdf", "author": "Sadaf Salehkalaibar;Truong Buu Phan;Jun Chen;Wei Yu;Ashish J Khisti", "authorids": "~Sadaf_Salehkalaibar1;~Truong_Buu_Phan1;~Jun_Chen8;~Wei_Yu20;~Ashish_J_Khisti1", "gender": "F;M;M;M;M", "homepage": "https://sadafsaleh.com/;https://truongbuu.github.io/;https://www.ece.mcmaster.ca/~junchen/;https://www.comm.utoronto.ca/~weiyu/;https://www.comm.utoronto.ca/~akhisti/", "dblp": "13/8823.html;228/4651;85/5901-5.html;82/2790-1;84/5679.html", "google_scholar": "aPV4dVkAAAAJ;https://scholar.google.ca/citations?hl=en;https://scholar.google.ca/citations?user=XI79Mw0AAAAJ;https://scholar.google.ca/citations?user=agJYLEQAAAAJ;https://scholar.google.ca/citations?user=jiGeAg4AAAAJ", "orcid": ";;;0000-0002-7453-422X;", "linkedin": ";;;;", "or_profile": "~Sadaf_Salehkalaibar1;~Truong_Buu_Phan1;~Jun_Chen8;~Wei_Yu20;~Ashish_J_Khisti1", "aff": "University of Toronto;University of Toronto;McMaster University;University of Toronto;Toronto University", "aff_domain": "utoronto.ca;utoronto.ca;mcmaster.ca;utoronto.ca;utoronto.ca", "position": "Researcher;PhD student;Full Professor;Full Professor;Professor", "bibtex": "@inproceedings{\nsalehkalaibar2023on,\ntitle={On the choice of Perception Loss Function for Learned Video Compression},\nauthor={Sadaf Salehkalaibar and Truong Buu Phan and Jun Chen and Wei Yu and Ashish J Khisti},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kLIieSS2P3}\n}", "github": "", "project": "", "reviewers": "1Qbf;QNA6;tXFk;pcBB", "pdf_size": 3403301, "rating": "4;5;6;7", "confidence": "5;4;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "38;48;303;141", "wc_strengths": "29;33;28;141", "wc_weaknesses": "109;74;196;97", "wc_questions": "26;33;1;4", "wc_limitations": "4;1;5;26", "wc_review": "206;189;533;409", "wc_reply_reviewers": "0;10;20;17", "wc_reply_authors": "0;147;147;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 132.5, 106.31674374246043 ], "wc_strengths_avg": [ 57.75, 48.100805606559234 ], "wc_weaknesses_avg": [ 119.0, 46.20064934608604 ], "wc_questions_avg": [ 16.0, 13.765899897936205 ], "wc_limitations_avg": [ 9.0, 9.924716620639604 ], "wc_review_avg": [ 334.25, 143.73130313192044 ], "wc_reply_reviewers_avg": [ 11.75, 7.693341276714559 ], "wc_reply_authors_avg": [ 73.5, 73.5 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5214484291084824441&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "utoronto.ca;utoronto.ca;mcmaster.ca;utoronto.ca;utoronto.ca", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Toronto;McMaster University", "aff_unique_dep": ";", "aff_unique_url": "https://www.utoronto.ca;https://www.mcmaster.ca", "aff_unique_abbr": "U of T;McMaster", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "The RefinedWeb Dataset for Falcon LLM: Outperforming Curated Corpora with Web Data Only", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73487", "id": "kM5eGcdCzq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fa3ed726cc5073b9c31e3e49a807789c-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=kM5eGcdCzq", "openreview": "https://openreview.net/forum?id=kM5eGcdCzq", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73487", "video": "https://nips.cc/virtual/2023/poster/73487", "author_site": "Guilherme Penedo, Quentin Malartic, Daniel Hesslow, Ruxandra Cojocaru, Hamza Alobeidli, Alessandro Cappelli, Baptiste Pannier, Ebtesam Almazrouei, Julien Launay", "tldr": "", "abstract": "Large language models are commonly trained on a mixture of filtered web data and curated ``high-quality'' corpora, such as social media conversations, books, or technical papers. This curation process is believed to be necessary to produce performant models with broad zero-shot generalization abilities. However, as larger models requiring pretraining on trillions of tokens are considered, it is unclear how scalable is curation, and whether we will run out of unique high-quality data soon. At variance with previous beliefs, we show that properly filtered and deduplicated web data alone can lead to powerful models; even significantly outperforming models trained on The Pile. Despite extensive filtering, the high-quality data we extract from the web is still plentiful, and we are able to obtain five trillion tokens from CommonCrawl. We publicly release an extract of 500 billion tokens from our RefinedWeb dataset, and 1.3/7.5B parameters language models trained on it.", "keywords": "web data;crawl;curated;deduplication;NLP;LLM", "primary_area": "", "supplementary_material": "/attachment/fde1335864d9d0a56bfd1aea81838641876c8604.pdf", "author": "Guilherme Penedo;Quentin Malartic;Daniel Hesslow;Ruxandra Cojocaru;Hamza Alobeidli;Alessandro Cappelli;Baptiste Pannier;Ebtesam Almazrouei;Julien Launay", "authorids": "~Guilherme_Penedo1;~Quentin_Malartic1;~Daniel_Hesslow1;~Ruxandra_Cojocaru1;~Hamza_Alobeidli1;~Alessandro_Cappelli2;~Baptiste_Pannier1;~Ebtesam_Almazrouei1;~Julien_Launay1", "gender": "M;;;F;M;M;;M;M", "homepage": "https://github.com/guipenedo;;;;;;;https://lolo.science;", "dblp": ";;;;;;;242/9294;", "google_scholar": "L-jmoJYAAAAJ;;xslrgtIAAAAJ;https://scholar.google.es/citations?user=m6hjTrQAAAAJ;;;;https://scholar.google.fr/citations?user=Mm2mi0UAAAAJ;oQSxClYAAAAJ", "orcid": ";0000-0001-7570-3219;;0000-0003-0496-5355;;;;;", "linkedin": ";;;elena-ruxandra-cojocaru-032123ab/;hamza-alobeidli-5942341a4;https://linkedin.com/in/baptiste-pannier-b30758154;dr-ebtesam-almazrouei-90b956a9;julien-launay-400a7512a/;alessandro-cappelli-aa8060172/", "or_profile": "~Guilherme_Penedo1;~Quentin_Malartic1;~Daniel_Hesslow1;~Ruxandra_Cojocaru1;~Hamza_Alobeidli1;~Baptiste_Pannier1;~Ebtesam_Almazrouei1;~Julien_Launay1;~alessandro_cappelli1", "aff": "LightOn;;Lighton;Technology Innovation Institute;Mohamed bin Zayed University of Artificial Intelligence;;;LightOn;Lighton", "aff_domain": "lighton.ai;;lighton.ai;tii.ae;mbzuai.ac.ae;;;lighton.ai;lighton.ai", "position": "Researcher;;PhD student;Researcher;MS student;;;Researcher;Researcher", "bibtex": "@inproceedings{\npenedo2023the,\ntitle={The RefinedWeb Dataset for Falcon {LLM}: Outperforming Curated Corpora with Web Data Only},\nauthor={Guilherme Penedo and Quentin Malartic and Daniel Hesslow and Ruxandra Cojocaru and Hamza Alobeidli and Alessandro Cappelli and Baptiste Pannier and Ebtesam Almazrouei and Julien Launay},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=kM5eGcdCzq}\n}", "github": "", "project": "", "reviewers": "TYb3;om9q;2Ywv;GCg1", "pdf_size": 1397342, "rating": "7;7;7;7", "confidence": "3;4;3;4", "wc_summary_and_contributions": "117;29;53;64", "wc_strengths": "106;116;73;142", "wc_improvement": "134;210;91;125", "wc_limitations": "1;1;58;10", "wc_correctness": "13;7;17;26", "wc_clarity": "23;1;4;1", "wc_relation_to_prior_work": "18;1;5;1", "wc_documentation": "22;1;7;19", "wc_additional_feedback": "1;1;1;1", "wc_review": "435;367;309;389", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "219;1015;621;1274", "reply_reviewers": "0;0;0;0", "reply_authors": "1;3;1;3", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 65.75, 32.18209906143476 ], "wc_strengths_avg": [ 109.25, 24.71209218176397 ], "wc_improvement_avg": [ 140.0, 43.47988040461933 ], "wc_limitations_avg": [ 17.5, 23.66960075708925 ], "wc_correctness_avg": [ 15.75, 6.905613658466566 ], "wc_clarity_avg": [ 7.25, 9.175374651751284 ], "wc_relation_to_prior_work_avg": [ 6.25, 6.977642868476432 ], "wc_documentation_avg": [ 12.25, 8.584142356694699 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 375.0, 45.32107677449864 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 782.25, 399.76328933507637 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 147, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6580304766386941221&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "lighton.ai;;lighton.ai;tii.ae;mbzuai.ac.ae;;;lighton.ai;lighton.ai", "author_num": 9, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "LightOn;Technology Innovation Institute;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": ";;https://mbzuai.ac.ae", "aff_unique_abbr": ";;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";United Arab Emirates" }, { "title": "Point Cloud Completion with Pretrained Text-to-Image Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70648", "id": "kMmAYbT0VL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/284afdc2309f9667d2d4fb9290235b0c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kMmAYbT0VL", "openreview": "https://openreview.net/forum?id=kMmAYbT0VL", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70648", "video": "https://nips.cc/virtual/2023/poster/70648", "author_site": "Yoni Kasten, Ohad Rahamim, Gal Chechik", "tldr": "", "abstract": "Point cloud data collected in real-world applications are often incomplete. This is because they are observed from partial viewpoints, which capture only a specific perspective or angle, or due to occlusion and low resolution. Existing completion approaches rely on datasets of specific predefined objects to guide the completion of incomplete, and possibly noisy, point clouds. However, these approaches perform poorly with Out-Of-Distribution (OOD) objects, which are either absent from the dataset or poorly represented. In recent years, the field of text-guided image generation has made significant progress, leading to major breakthroughs in text guided shape generation. We describe an approach called SDS-Complete that uses a pre-trained text-to-image diffusion model and leverages the text semantic of a given incomplete point cloud of an object, to obtain a complete surface representation. SDS-Complete can complete a variety of objects at test time optimization without the need for an expensive collection of 3D information. We evaluate SDS-Complete on incomplete scanned objects, captured by real-world depth sensors and LiDAR scanners, and demonstrate that is effective in handling objects which are typically absent from common datasets.", "keywords": "Point Cloud;Text;3D", "primary_area": "", "supplementary_material": "/attachment/dd5d6973257d5898a1e7e340421c67f56b540c2a.zip", "author": "Yoni Kasten;Ohad Rahamim;Gal Chechik", "authorids": "~Yoni_Kasten1;~Ohad_Rahamim1;~Gal_Chechik1", "gender": ";M;", "homepage": "https://ykasten.github.io/;https://ohad204.github.io/ohadrahamim.github.io/;https://chechiklab.biu.ac.il/~gal/", "dblp": "183/6527;209/3695;c/GalChechik", "google_scholar": "https://scholar.google.co.il/citations?user=kc4-e8oAAAAJ;LO-hhSwAAAAJ;Wk2gAZUAAAAJ", "orcid": ";0000-0002-5141-5885;0000-0001-9164-5303", "linkedin": "yoni-kasten-788a87b3;;", "or_profile": "~Yoni_Kasten1;~Ohad_Rahamim1;~Gal_Chechik1", "aff": "NVIDIA;Bar-Ilan University;NVIDIA", "aff_domain": "nvidia.com;biu.ac.il;nvidia.com", "position": "Researcher;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nkasten2023point,\ntitle={Point Cloud Completion with Pretrained Text-to-Image Diffusion Models},\nauthor={Yoni Kasten and Ohad Rahamim and Gal Chechik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kMmAYbT0VL}\n}", "github": "", "project": "", "reviewers": "MGrG;BnfZ;zxYf;Fuyp;adEs", "pdf_size": 3902337, "rating": "4;4;5;6;6", "confidence": "3;5;4;5;2", "soundness": "2;1;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "68;47;93;114;48", "wc_strengths": "19;19;30;80;26", "wc_weaknesses": "160;267;98;75;53", "wc_questions": "81;2;29;185;41", "wc_limitations": "1;8;42;9;33", "wc_review": "329;343;292;463;201", "wc_reply_reviewers": "95;132;44;130;32", "wc_reply_authors": "255;380;305;300;114", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.0, 0.8944271909999159 ], "confidence_avg": [ 3.8, 1.16619037896906 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 74.0, 26.084478143140988 ], "wc_strengths_avg": [ 34.8, 22.990432792794486 ], "wc_weaknesses_avg": [ 130.6, 77.00025973982166 ], "wc_questions_avg": [ 67.6, 63.973744614490094 ], "wc_limitations_avg": [ 18.6, 15.932357013323546 ], "wc_review_avg": [ 325.6, 84.6725457276442 ], "wc_reply_reviewers_avg": [ 86.6, 41.97904239022134 ], "wc_reply_authors_avg": [ 270.8, 88.0713347236205 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.1917412472118426, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=698319296166159224&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "nvidia.com;biu.ac.il;nvidia.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "NVIDIA;Bar-Ilan University", "aff_unique_dep": "NVIDIA Corporation;", "aff_unique_url": "https://www.nvidia.com;https://www.biu.ac.il", "aff_unique_abbr": "NVIDIA;BIU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Israel" }, { "title": "Abide by the law and follow the flow: conservation laws for gradient flows", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70647", "id": "kMueEV8Eyy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c7bee9b76be21146fd592fc2b46614d5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kMueEV8Eyy", "openreview": "https://openreview.net/forum?id=kMueEV8Eyy", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70647", "video": "https://nips.cc/virtual/2023/poster/70647", "author_site": "Sibylle Marcotte, Remi Gribonval, Gabriel Peyr\u00e9", "tldr": "", "abstract": "Understanding the geometric properties of gradient descent dynamics is a key ingredient in deciphering the recent success of very large machine learning models. A striking observation is that trained over-parameterized models retain some properties of the optimization initialization. This \"implicit bias\" is believed to be responsible for some favorable properties of the trained models and could explain their good generalization properties. The purpose of this article is threefold. First, we rigorously expose the definition and basic properties of \"conservation laws\", that define quantities conserved during gradient flows of a given model (e.g. of a ReLU network with a given architecture) with any training data and any loss. Then we explain how to find the maximal number of independent conservation laws\nby performing finite-dimensional algebraic manipulations on the Lie algebra generated by the Jacobian of the model. Finally, we provide algorithms to: a) compute a family of polynomial laws; b) compute the maximal number of (not necessarily polynomial) independent conservation laws. We provide showcase examples that we fully work out theoretically. Besides, applying the two algorithms confirms for a number of ReLU network architectures that all known laws are recovered by the algorithm, and that there are no other independent laws. Such computational tools pave the way to understanding desirable properties of optimization initialization in large machine learning models.", "keywords": "Implicit bias;conservation laws;gradient flow;linear neural network;matrix factorization", "primary_area": "", "supplementary_material": "/attachment/bce115bec8e52f5168e05eb8ae1397c1d50bbcb9.pdf", "author": "Sibylle Marcotte;R\u00e9mi Gribonval;Gabriel Peyr\u00e9", "authorids": "~Sibylle_Marcotte1;~R\u00e9mi_Gribonval1;~Gabriel_Peyr\u00e92", "gender": "F;;M", "homepage": ";;http://gpeyre.com/", "dblp": "291/4809;;65/1759", "google_scholar": ";;https://scholar.google.fr/citations?user=KqA1dYcAAAAJ", "orcid": "0000-0002-2238-2973;;", "linkedin": ";;", "or_profile": "~Sibylle_Marcotte1;~R\u00e9mi_Gribonval1;~Gabriel_Peyr\u00e92", "aff": "Ecole Normale Sup\u00e9rieure de Paris;;CNRS", "aff_domain": "ens.fr;;cnrs.fr", "position": "PhD student;;Researcher", "bibtex": "@inproceedings{\nmarcotte2023abide,\ntitle={Abide by the law and follow the flow: conservation laws for gradient flows},\nauthor={Sibylle Marcotte and R{\\'e}mi Gribonval and Gabriel Peyr{\\'e}},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kMueEV8Eyy}\n}", "github": "", "project": "", "reviewers": "pz1Q;7TLM;Tpzp;wzMU;n1Hr", "pdf_size": 646791, "rating": "7;7;7;7;8", "confidence": "3;4;3;4;3", "soundness": "3;3;4;4;4", "novelty": "2;4;3;4;3", "presentation": "3;3;3;3;4", "wc_summary": "256;62;155;177;122", "wc_strengths": "93;87;100;178;110", "wc_weaknesses": "55;1;134;262;126", "wc_questions": "281;15;35;137;165", "wc_limitations": "56;4;10;19;15", "wc_review": "741;169;434;773;538", "wc_reply_reviewers": "62;0;0;28;14", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.2, 0.39999999999999997 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 154.4, 63.908058959727455 ], "wc_strengths_avg": [ 113.6, 33.097431924546655 ], "wc_weaknesses_avg": [ 115.6, 87.9604456559879 ], "wc_questions_avg": [ 126.6, 96.19480235438918 ], "wc_limitations_avg": [ 20.8, 18.301912468373356 ], "wc_review_avg": [ 531.0, 220.52936312427875 ], "wc_reply_reviewers_avg": [ 20.8, 23.068593368473945 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16493535231258320289&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 19, "email": "ens.fr;;cnrs.fr", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Ecole Normale Sup\u00e9rieure de Paris;Centre National de la Recherche Scientifique", "aff_unique_dep": ";", "aff_unique_url": "https://www.ens.fr;https://www.cnrs.fr", "aff_unique_abbr": "ENS Paris;CNRS", "aff_campus_unique_index": "0", "aff_campus_unique": "Paris;", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Online Ad Allocation with Predictions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70646", "id": "kPfd3pcwHV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3815d62554efad0878fad6c1c30ffda0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kPfd3pcwHV", "openreview": "https://openreview.net/forum?id=kPfd3pcwHV", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70646", "video": "https://nips.cc/virtual/2023/poster/70646", "author_site": "Fabian Spaeh, Alina Ene", "tldr": "", "abstract": "Display Ads and the generalized assignment problem are two well-studied online packing problems with important applications in ad allocation and other areas. In both problems, ad impressions arrive online and have to be allocated immediately to budget-constrained advertisers. Worst-case algorithms that achieve the ideal competitive ratio are known for both problems, but might act overly conservative given the predictable and usually tame nature of real-world input. Given this discrepancy, we develop an algorithm for both problems that incorporate machine-learned predictions and can thus improve the performance beyond the worst-case. Our algorithm is based on the work of Feldman et al. (2009) and similar in nature to Mahdian et al. (2007) who were the first to develop a learning-augmented algorithm for the related, but more structured Ad Words problem. We use a novel analysis to show that our algorithm is able to capitalize on a good prediction, while being robust against poor predictions. We experimentally evaluate our algorithm on synthetic and real-world data on a wide range of predictions. Our algorithm is consistently outperforming the worst-case algorithm without predictions.", "keywords": "Learning Augmented Algorithms;Display Ads;Generalized Assignment Problem", "primary_area": "", "supplementary_material": "/attachment/457e724e0b0909422e8ab0a8882b3fede8b6b1c2.zip", "author": "Fabian Christian Spaeh;Alina Ene", "authorids": "~Fabian_Christian_Spaeh1;~Alina_Ene1", "gender": "M;", "homepage": "https://cs-people.bu.edu/fspaeh/;", "dblp": "249/3514.html;", "google_scholar": ";", "orcid": "0000-0003-4816-471X;", "linkedin": "fabian-spaeh;", "or_profile": "~Fabian_Christian_Spaeh1;~Alina_Ene1", "aff": "Boston University, Boston University;", "aff_domain": "bu.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nspaeh2023online,\ntitle={Online Ad Allocation with Predictions},\nauthor={Fabian Christian Spaeh and Alina Ene},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kPfd3pcwHV}\n}", "github": "", "project": "", "reviewers": "2o36;9H3m;eG65;3Ccp", "pdf_size": 666137, "rating": "4;6;7;7", "confidence": "3;3;3;4", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "240;159;85;353", "wc_strengths": "64;135;83;69", "wc_weaknesses": "137;170;150;116", "wc_questions": "7;62;2;115", "wc_limitations": "7;9;1;23", "wc_review": "455;535;321;676", "wc_reply_reviewers": "0;0;27;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 209.25, 99.46450371866338 ], "wc_strengths_avg": [ 87.75, 28.154706533721853 ], "wc_weaknesses_avg": [ 143.25, 19.638928178492836 ], "wc_questions_avg": [ 46.5, 46.02445002387318 ], "wc_limitations_avg": [ 10.0, 8.06225774829855 ], "wc_review_avg": [ 496.75, 128.67084945705457 ], "wc_reply_reviewers_avg": [ 10.0, 11.157956802210698 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KE5MUiy8fYAJ:scholar.google.com/&scioq=Online+Ad+Allocation+with+Predictions&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "bu.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Boston University", "aff_unique_dep": "", "aff_unique_url": "https://www.bu.edu", "aff_unique_abbr": "BU", "aff_campus_unique_index": "0", "aff_campus_unique": "Boston", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Subclass-Dominant Label Noise: A Counterexample for the Success of Early Stopping", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70645", "id": "kR21XsZeAr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d763b4a2dde0ae7b77498516ce9f439e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kR21XsZeAr", "openreview": "https://openreview.net/forum?id=kR21XsZeAr", "poster": "/media/PosterPDFs/NeurIPS%202023/70645.png?t=1698028435.7702556", "slides": "https://nips.cc/virtual/2023/poster/70645", "video": "https://nips.cc/virtual/2023/poster/70645", "author_site": "Yingbin Bai, Zhongyi Han, Erkun Yang, Jun Yu, Bo Han, Dadong Wang, Tongliang Liu", "tldr": "", "abstract": "In this paper, we empirically investigate a previously overlooked and widespread type of label noise, subclass-dominant label noise (SDN). Our findings reveal that, during the early stages of training, deep neural networks can rapidly memorize mislabeled examples in SDN. This phenomenon poses challenges in effectively selecting confident examples using conventional early stopping techniques. To address this issue, we delve into the properties of SDN and observe that long-trained representations are superior at capturing the high-level semantics of mislabeled examples, leading to a clustering effect where similar examples are grouped together. Based on this observation, we propose a novel method called NoiseCluster that leverages the geometric structures of long-trained representations to identify and correct SDN. Our experiments demonstrate that NoiseCluster outperforms state-of-the-art baselines on both synthetic and real-world datasets, highlighting the importance of addressing SDN in learning with noisy labels. The code is available at https://github.com/tmllab/2023_NeurIPS_SDN.", "keywords": "learning with noisy labels;weakly supervised learning", "primary_area": "", "supplementary_material": "", "author": "Yingbin Bai;Zhongyi Han;Erkun Yang;Jun Yu;Bo Han;Dadong Wang;Tongliang Liu", "authorids": "~Yingbin_Bai1;~Zhongyi_Han1;~Erkun_Yang2;~Jun_Yu3;~Bo_Han1;~Dadong_Wang1;~Tongliang_Liu1", "gender": "M;M;M;M;;;M", "homepage": "https://bybeye.github.io/;https://zhyhan.github.io/;;https://faculty.ustc.edu.cn/yujun_AI/en/index.htm;;;https://tongliang-liu.github.io/", "dblp": "296/1646;181/7439;184/3481;50/5754-1.html;;;150/6667", "google_scholar": "EWMII50AAAAJ;https://scholar.google.com.sg/citations?user=0J-PErUAAAAJ;jo8L49AAAAAJ;efZyqyQAAAAJ;;;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ", "orcid": ";;;0000-0002-3197-8103;;;", "linkedin": ";;;;;;", "or_profile": "~Yingbin_Bai1;~Zhongyi_Han1;~Erkun_Yang2;~Jun_Yu3;~Bo_Han1;~Dadong_Wang1;~Tongliang_Liu1", "aff": "University of Sydney;Shandong University;Xidian University;University of Science and Technology of China;;;University of Sydney", "aff_domain": "sydney.edu.au;sdu.edu.cn;xidian.edu;ustc.edu.cn;;;sydney.edu.au", "position": "PhD student;PhD student;Associate Professor;Associate Professor;;;Lecturer", "bibtex": "@inproceedings{\nbai2023subclassdominant,\ntitle={Subclass-Dominant Label Noise: A Counterexample for the Success of Early Stopping},\nauthor={Yingbin Bai and Zhongyi Han and Erkun Yang and Jun Yu and Bo Han and Dadong Wang and Tongliang Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kR21XsZeAr}\n}", "github": "", "project": "", "reviewers": "tXVq;t1fi;7AfS;Agy8", "pdf_size": 7678295, "rating": "5;5;5;6", "confidence": "4;3;4;3", "soundness": "3;2;2;3", "novelty": "2;2;3;2", "presentation": "3;2;2;3", "wc_summary": "58;83;90;87", "wc_strengths": "16;48;37;91", "wc_weaknesses": "109;64;239;67", "wc_questions": "1;69;255;33", "wc_limitations": "13;10;8;5", "wc_review": "197;274;629;283", "wc_reply_reviewers": "24;54;29;0", "wc_reply_authors": "31;126;239;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 79.5, 12.658988901172163 ], "wc_strengths_avg": [ 48.0, 27.358728040608906 ], "wc_weaknesses_avg": [ 119.75, 71.1103895362696 ], "wc_questions_avg": [ 89.5, 98.53298939948995 ], "wc_limitations_avg": [ 9.0, 2.9154759474226504 ], "wc_review_avg": [ 345.75, 166.91521051120537 ], "wc_reply_reviewers_avg": [ 26.75, 19.17517926904466 ], "wc_reply_authors_avg": [ 99.0, 93.21212367498126 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13382772945438911770&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "sydney.edu.au;sdu.edu.cn;xidian.edu;ustc.edu.cn;;;sydney.edu.au", "author_num": 7, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of Sydney;Shandong University;Xidian University;University of Science and Technology of China", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sydney.edu.au;http://www.sdu.edu.cn;http://www.xidian.edu.cn/;http://www.ustc.edu.cn", "aff_unique_abbr": "USYD;SDU;Xidian;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "Australia;China" }, { "title": "NuTrea: Neural Tree Search for Context-guided Multi-hop KGQA", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70644", "id": "kR5ycmBclj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/707a2d58641b2192203b4bf4c532cfe1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kR5ycmBclj", "openreview": "https://openreview.net/forum?id=kR5ycmBclj", "poster": "/media/PosterPDFs/NeurIPS%202023/70644.png?t=1699418433.0629296", "slides": "https://nips.cc/virtual/2023/poster/70644", "video": "https://nips.cc/virtual/2023/poster/70644", "author_site": "Hyeong Kyu Choi, Seunghun Lee, Jaewon Chu, Hyunwoo Kim", "tldr": "", "abstract": "Multi-hop Knowledge Graph Question Answering (KGQA) is a task that involves retrieving nodes from a knowledge graph (KG) to answer natural language questions. Recent GNN-based approaches formulate this task as a KG path searching problem, where messages are sequentially propagated from the seed node towards the answer nodes. However, these messages are past-oriented, and they do not consider the full KG context. To make matters worse, KG nodes often represent pronoun entities and are sometimes encrypted, being uninformative in selecting between paths. To address these problems, we propose Neural Tree Search (NuTrea), a tree search-based GNN model that incorporates the broader KG context. Our model adopts a message-passing scheme that probes the unreached subtree regions to boost the past-oriented embeddings. In addition, we introduce the Relation Frequency-Inverse Entity Frequency (RF-IEF) node embedding that considers the global KG context to better characterize ambiguous KG nodes. The general effectiveness of our approach is demonstrated through experiments on three major multi-hop KGQA benchmark datasets, and our extensive analyses further validate its expressiveness and robustness. Overall, NuTrea provides a powerful means to query the KG with complex natural language questions. Code is available at https://github.com/mlvlab/NuTrea.", "keywords": "Knowledge Graph Question Answering;Knowledge Graph;Graph Neural Networks", "primary_area": "", "supplementary_material": "/attachment/d22239b64b543db80b6dcafdeae6fbb94722897d.pdf", "author": "Hyeong Kyu Choi;Seunghun Lee;Jaewon Chu;Hyunwoo J. Kim", "authorids": "~Hyeong_Kyu_Choi1;~Seunghun_Lee2;~Jaewon_Chu1;~Hyunwoo_J._Kim3", "gender": "M;M;M;M", "homepage": "https://sites.google.com/view/froilanchoi;https://github.com/llsshh319;https://github.com/allonsy07;https://hyunwoojkim.com/publications", "dblp": "225/4796;77/7676-1;355/0102;150/4259", "google_scholar": "https://scholar.google.co.kr/citations?hl=en;LPuuGcAAAAAJ;X3RX138AAAAJ;https://scholar.google.co.kr/citations?user=LfBoJt8AAAAJ", "orcid": "0000-0003-2090-9273;0000-0001-9377-2832;;0000-0002-2181-9264", "linkedin": "https://linkedin.com/in/hyeonggyufroilanchoi;;jaewon-chu-64b003262;", "or_profile": "~Hyeong_Kyu_Choi1;~Seunghun_Lee2;~Jaewon_Chu1;~Hyunwoo_Kim1", "aff": "Korea University;Korea University;Korea University;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr", "position": "MS student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchoi2023nutrea,\ntitle={NuTrea: Neural Tree Search for Context-guided Multi-hop {KGQA}},\nauthor={Hyeong Kyu Choi and Seunghun Lee and Jaewon Chu and Hyunwoo J. Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kR5ycmBclj}\n}", "github": "", "project": "", "reviewers": "jrbk;5UYV;QeEo;E45E;jk9R", "pdf_size": 1238783, "rating": "4;5;6;6;7", "confidence": "4;4;4;3;4", "soundness": "3;3;4;2;3", "novelty": "2;2;3;2;3", "presentation": "3;3;3;1;3", "wc_summary": "59;40;98;48;32", "wc_strengths": "42;63;94;23;117", "wc_weaknesses": "219;109;98;158;102", "wc_questions": "2;46;54;29;47", "wc_limitations": "7;1;34;5;4", "wc_review": "329;259;378;263;302", "wc_reply_reviewers": "62;104;0;249;0", "wc_reply_authors": "416;202;0;1060;0", "reply_reviewers": "1;2;0;5;0", "reply_authors": "2;3;1;7;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 55.4, 23.09632005320328 ], "wc_strengths_avg": [ 67.8, 34.06699282296575 ], "wc_weaknesses_avg": [ 137.2, 46.24889187861694 ], "wc_questions_avg": [ 35.6, 18.7040102651811 ], "wc_limitations_avg": [ 10.2, 12.056533498481228 ], "wc_review_avg": [ 306.2, 44.24206143479302 ], "wc_reply_reviewers_avg": [ 83.0, 91.88688698612006 ], "wc_reply_authors_avg": [ 335.6, 393.5284487810252 ], "reply_reviewers_avg": [ 1.6, 1.8547236990991407 ], "reply_authors_avg": [ 2.8, 2.2271057451320084 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.1961161351381841, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14138835157059561017&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Inferring Hybrid Neural Fluid Fields from Videos", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70643", "id": "kRdaTkaBwC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c8e1620b29d546c2999a9339ab29aa82-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kRdaTkaBwC", "openreview": "https://openreview.net/forum?id=kRdaTkaBwC", "poster": "/media/PosterPDFs/NeurIPS%202023/70643.png?t=1701983822.355365", "slides": "https://nips.cc/virtual/2023/poster/70643", "video": "https://nips.cc/virtual/2023/poster/70643", "author_site": "Hong-Xing Yu, Yang Zheng, Yuan Gao, Yitong Deng, Bo Zhu, Jiajun Wu", "tldr": "", "abstract": "We study recovering fluid density and velocity from sparse multiview videos. Existing neural dynamic reconstruction methods predominantly rely on optical flows; therefore, they cannot accurately estimate the density and uncover the underlying velocity due to the inherent visual ambiguities of fluid velocity, as fluids are often shapeless and lack stable visual features. The challenge is further pronounced by the turbulent nature of fluid flows, which calls for properly designed fluid velocity representations. To address these challenges, we propose hybrid neural fluid fields (HyFluid), a neural approach to jointly infer fluid density and velocity fields. Specifically, to deal with visual ambiguities of fluid velocity, we introduce a set of physics-based losses that enforce inferring a physically plausible velocity field, which is divergence-free and drives the transport of density. To deal with the turbulent nature of fluid velocity, we design a hybrid neural velocity representation that includes a base neural velocity field that captures most irrotational energy and a vortex particle-based velocity that models residual turbulent velocity. We show that our method enables recovering vortical flow details. Our approach opens up possibilities for various learning and reconstruction applications centered around 3D incompressible flow, including fluid re-simulation and editing, future prediction, and neural dynamic scene composition. Project website: https://kovenyu.com/HyFluid/", "keywords": "neural scene representations;fluid dynamics;flow reconstruction;physics-based learning", "primary_area": "", "supplementary_material": "", "author": "Hong-Xing Yu;Yang Zheng;Yuan Gao;Yitong Deng;Bo Zhu;Jiajun Wu", "authorids": "~Hong-Xing_Yu1;~Yang_Zheng2;~Yuan_Gao11;~Yitong_Deng1;~Bo_Zhu2;~Jiajun_Wu1", "gender": "M;M;M;M;M;M", "homepage": "https://kovenyu.com;https://y-zheng18.github.io/;https://www.linkedin.com/in/rab0na/;https://faculty.cc.gatech.edu/~bozhu/;https://jiajunwu.com;https://yitongdeng.github.io", "dblp": "205/2676.html;;;;117/4768;272/8948", "google_scholar": "kNKncZcAAAAJ;Q7Ouk0QAAAAJ;;atNjbs0AAAAJ;2efgcS0AAAAJ;oNqBXkgAAAAJ", "orcid": ";;;;0000-0002-4176-343X;", "linkedin": ";;rab0na/;;jiajunwu/;", "or_profile": "~Hong-Xing_Yu1;~Yang_Zheng2;~Yuan_Gao11;~Bo_Zhu2;~Jiajun_Wu1;~Yitong_Deng2", "aff": "Stanford University;Stanford University;Computer Science Department, Stanford University;Dartmouth College;Stanford University;Stanford University", "aff_domain": "cs.stanford.edu;stanford.edu;cs.stanford.edu;dartmouth.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;MS student;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nyu2023inferring,\ntitle={Inferring Hybrid Neural Fluid Fields from Videos},\nauthor={Hong-Xing Yu and Yang Zheng and Yuan Gao and Yitong Deng and Bo Zhu and Jiajun Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kRdaTkaBwC}\n}", "github": "", "project": "", "reviewers": "3dGa;qUcR;5YF8;MRuw;4cV8", "pdf_size": 5385847, "rating": "5;5;5;6;7", "confidence": "5;5;4;5;4", "soundness": "2;3;3;3;2", "novelty": "2;3;3;3;2", "presentation": "2;2;3;3;3", "wc_summary": "72;42;78;102;86", "wc_strengths": "93;38;35;94;93", "wc_weaknesses": "45;179;74;409;78", "wc_questions": "23;8;68;156;352", "wc_limitations": "26;1;1;1;11", "wc_review": "259;268;256;762;620", "wc_reply_reviewers": "19;322;0;40;38", "wc_reply_authors": "120;453;43;119;71", "reply_reviewers": "1;2;0;1;1", "reply_authors": "4;4;2;4;3", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 76.0, 19.75854245636555 ], "wc_strengths_avg": [ 70.6, 27.86108397029807 ], "wc_weaknesses_avg": [ 157.0, 133.9119113447344 ], "wc_questions_avg": [ 121.4, 126.3069277593276 ], "wc_limitations_avg": [ 8.0, 9.797958971132712 ], "wc_review_avg": [ 433.0, 215.42516101885593 ], "wc_reply_reviewers_avg": [ 83.8, 119.9806651090083 ], "wc_reply_authors_avg": [ 161.2, 148.817203306607 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 3.4, 0.8 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4082482904638631, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1151219845853932059&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.stanford.edu;stanford.edu;cs.stanford.edu;dartmouth.edu;stanford.edu;stanford.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Stanford University;Dartmouth College", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.dartmouth.edu", "aff_unique_abbr": "Stanford;Dartmouth", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Fractional Graph Laplacian Approach to Oversmoothing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70642", "id": "kS7ED7eE74", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2a514213ba899f2911723a38be8d4096-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kS7ED7eE74", "openreview": "https://openreview.net/forum?id=kS7ED7eE74", "poster": "/media/PosterPDFs/NeurIPS%202023/70642.png?t=1702331369.199643", "slides": "https://nips.cc/virtual/2023/poster/70642", "video": "https://nips.cc/virtual/2023/poster/70642", "author_site": "Sohir Maskey, Raffaele Paolino, Aras Bacho, Gitta Kutyniok", "tldr": "", "abstract": "Graph neural networks (GNNs) have shown state-of-the-art performances in various applications. However, GNNs often struggle to capture long-range dependencies in graphs due to oversmoothing. In this paper, we generalize the concept of oversmoothing from undirected to directed graphs. To this aim, we extend the notion of Dirichlet energy by considering a directed symmetrically normalized Laplacian. As vanilla graph convolutional networks are prone to oversmooth, we adopt a neural graph ODE framework. Specifically, we propose fractional graph Laplacian neural ODEs, which describe non-local dynamics. We prove that our approach allows propagating information between distant nodes while maintaining a low probability of long-distance jumps. Moreover, we show that our method is more flexible with respect to the convergence of the graph\u2019s Dirichlet energy, thereby mitigating oversmoothing. We conduct extensive experiments on synthetic and real-world graphs, both directed and undirected, demonstrating our method\u2019s versatility across diverse graph homophily levels. Our\ncode is available at https://github.com/RPaolino/fLode", "keywords": "Graph Neural Networks;Graph Neural ODE;Fractional Laplacian;Oversmoothing", "primary_area": "", "supplementary_material": "/attachment/28826ccf223cb69d1b3e8f09f152392dc123151e.pdf", "author": "Sohir Maskey;Raffaele Paolino;Aras Bacho;Gitta Kutyniok", "authorids": "~Sohir_Maskey1;~Raffaele_Paolino1;~Aras_Bacho1;~Gitta_Kutyniok2", "gender": ";M;;F", "homepage": "https://www.sohirmaskey.com/;;;https://www.ai.math.lmu.de/kutyniok", "dblp": "302/4278;331/2660;;13/2736", "google_scholar": "3KpzqLMAAAAJ;uF27REUAAAAJ;;https://scholar.google.de/citations?user=JHs9LssAAAAJ", "orcid": "0000-0002-9691-6712;;;0000-0001-9738-2487", "linkedin": ";raffaele-paolino-717653202;;gitta-kutyniok-2606b215/?originalSubdomain=de", "or_profile": "~Sohir_Maskey1;~Raffaele_Paolino1;~Aras_Bacho1;~Gitta_Kutyniok2", "aff": "University of Munich;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;;LMU Munich", "aff_domain": "math.lmu;lmu.de;;uni-muenchen.de", "position": "PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nmaskey2023a,\ntitle={A Fractional Graph Laplacian Approach to Oversmoothing},\nauthor={Sohir Maskey and Raffaele Paolino and Aras Bacho and Gitta Kutyniok},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kS7ED7eE74}\n}", "github": "", "project": "", "reviewers": "PWaE;R3EG;dNLj;ZW5p", "pdf_size": 1044645, "rating": "3;5;6;7", "confidence": "5;4;2;3", "soundness": "2;2;3;4", "novelty": "2;2;2;3", "presentation": "2;3;4;4", "wc_summary": "88;51;77;84", "wc_strengths": "38;31;19;74", "wc_weaknesses": "543;155;23;4", "wc_questions": "346;192;109;113", "wc_limitations": "6;16;24;1", "wc_review": "1021;445;252;276", "wc_reply_reviewers": "1098;31;45;20", "wc_reply_authors": "2123;0;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "5;1;1;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 75.0, 14.404860290887934 ], "wc_strengths_avg": [ 40.5, 20.5 ], "wc_weaknesses_avg": [ 181.25, 216.80218518271442 ], "wc_questions_avg": [ 190.0, 95.95571895410924 ], "wc_limitations_avg": [ 11.75, 8.898735865278843 ], "wc_review_avg": [ 498.5, 310.69961377510595 ], "wc_reply_reviewers_avg": [ 298.5, 461.676564274168 ], "wc_reply_authors_avg": [ 530.75, 919.2859661171816 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8315218406202999, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17331319869177923306&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "math.lmu;lmu.de;;uni-muenchen.de", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Munich;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig Maximilian University of Munich", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-muenchen.de;https://www.lmu.de;https://www.lmu.de", "aff_unique_abbr": "LMU;LMU;LMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Munich", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Bayesian Active Causal Discovery with Multi-Fidelity Experiments", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70641", "id": "kS8rIH43Zc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c9d9659d1d960b53e8121469ef1f2df5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kS8rIH43Zc", "openreview": "https://openreview.net/forum?id=kS8rIH43Zc", "poster": "/media/PosterPDFs/NeurIPS%202023/70641.png?t=1699020321.4200697", "slides": "https://nips.cc/virtual/2023/poster/70641", "video": "https://nips.cc/virtual/2023/poster/70641", "author_site": "Zeyu Zhang, Chaozhuo Li, Chaozhuo Li, Xu Chen, Xing Xie", "tldr": "", "abstract": "This paper studies the problem of active causal discovery when the experiments can be done based on multi-fidelity oracles, where higher fidelity experiments are more precise and expensive, while the lower ones are cheaper but less accurate. In this paper, we formally define the task of multi-fidelity active causal discovery, and design a probabilistic model for solving this problem. In specific, we first introduce a mutual-information based acquisition function to determine which variable should be intervened at which fidelity, and then a cascading model is proposed to capture the correlations between different fidelity oracles. Beyond the above basic framework, we also extend it to the batch intervention scenario. We find that the theoretical foundations behind the widely used and efficient greedy method do not hold in our problem. To solve this problem, we introduce a new concept called $\\epsilon$-submodular, and design a constraint based fidelity model to theoretically validate the greedy method. We conduct extensive experiments to demonstrate the effectiveness of our model.", "keywords": "Causal Discovery;Active Learning;Multi-fidelity", "primary_area": "", "supplementary_material": "/attachment/f224b716545bc9ff11ac25383ca104940a76160a.pdf", "author": "Zeyu Zhang;Chaozhuo Li;Xu Chen;Xing Xie", "authorids": "~Zeyu_Zhang6;~Chaozhuo_Li1;~Xu_Chen13;~Xing_Xie3", "gender": "M;;M;M", "homepage": "https://zeyu-zhang.cn;https://scss.bupt.edu.cn/info/1063/5534.htm;https://gsai.ruc.edu.cn/chenxu;http://research.microsoft.com/en-us/people/xingx/", "dblp": "44/8352-200.html;316/1269.html;83/6331-17;08/6809-1", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;loPoqy0AAAAJ;5EQfAFIAAAAJ", "orcid": "0000-0003-0048-1687;0000-0002-8179-7503;0000-0003-0144-1775;0000-0002-8608-8482", "linkedin": ";;;xingx/", "or_profile": "~Zeyu_Zhang6;~Chaozhuo_Li1;~Xu_Chen13;~Xing_Xie3", "aff": "Renmin University of China;Beijing University of Posts and Telecommunications;Renmin University of China;Microsoft Research Asia", "aff_domain": "ruc.edu.cn;bupt.edu.cn;ruc.edu.cn;microsoft.com", "position": "MS student;Associate Professor;Associate Professor;Senior Principal Researcher", "bibtex": "@inproceedings{\nzhang2023bayesian,\ntitle={Bayesian Active Causal Discovery with Multi-Fidelity Experiments},\nauthor={Zeyu Zhang and Chaozhuo Li and Xu Chen and Xing Xie},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kS8rIH43Zc}\n}", "github": "", "project": "", "reviewers": "ug4Y;am1k;HBUA;Xcc7;yEiY", "pdf_size": 2572538, "rating": "5;5;6;6;7", "confidence": "4;4;3;2;3", "soundness": "3;3;3;3;3", "novelty": "3;3;2;3;3", "presentation": "1;3;2;2;2", "wc_summary": "49;116;155;85;112", "wc_strengths": "9;179;27;77;57", "wc_weaknesses": "182;610;27;51;358", "wc_questions": "572;10;87;26;33", "wc_limitations": "69;17;51;8;1", "wc_review": "881;932;347;247;561", "wc_reply_reviewers": "38;262;106;32;87", "wc_reply_authors": "132;140;854;46;77", "reply_reviewers": "1;1;2;1;1", "reply_authors": "3;3;5;3;3", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.0, 0.6324555320336759 ], "wc_summary_avg": [ 103.4, 35.19431772317799 ], "wc_strengths_avg": [ 69.8, 59.445437167204005 ], "wc_weaknesses_avg": [ 245.6, 216.8507320716257 ], "wc_questions_avg": [ 145.6, 214.76554658510753 ], "wc_limitations_avg": [ 29.2, 26.278508329050947 ], "wc_review_avg": [ 593.6, 275.3612899446834 ], "wc_reply_reviewers_avg": [ 105.0, 83.41702464125653 ], "wc_reply_authors_avg": [ 249.8, 304.1002466292982 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 3.4, 0.8 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6428571428571428, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5524765833606862284&as_sdt=5,38&sciodt=0,38&hl=en", "gs_version_total": 4, "email": "ruc.edu.cn;bupt.edu.cn;ruc.edu.cn;microsoft.com", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Renmin University of China;Beijing University of Posts and Telecommunications;Microsoft", "aff_unique_dep": ";;Research", "aff_unique_url": "http://www.ruc.edu.cn;http://www.bupt.edu.cn/;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "RUC;BUPT;MSR Asia", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Beijing;Asia", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Towards Efficient Pre-Trained Language Model via Feature Correlation Distillation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70640", "id": "kVfHQV668B", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/34260a400e39a802961470b3d3de99cc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kVfHQV668B", "openreview": "https://openreview.net/forum?id=kVfHQV668B", "poster": "/media/PosterPDFs/NeurIPS%202023/70640.png?t=1702221979.486298", "slides": "https://nips.cc/virtual/2023/poster/70640", "video": "https://nips.cc/virtual/2023/poster/70640", "author_site": "Kun Huang, Xin Guo, Meng Wang", "tldr": "", "abstract": "Knowledge Distillation (KD) has emerged as a promising approach for compressing large Pre-trained Language Models (PLMs). The performance of KD relies on how to effectively formulate and transfer the knowledge from the teacher model to the student model. Prior arts mainly focus on directly aligning output features from the transformer block, which may impose overly strict constraints on the student model's learning process and complicate the training process by introducing extra parameters and computational cost. Moreover, our analysis indicates that the different relations within self-attention, as adopted in other works, involves more computation complexities and can easily be constrained by the number of heads, potentially leading to suboptimal solutions.\n To address these issues, we propose a novel approach that builds relationships directly from output features. Specifically, we introduce token-level and sequence-level relations concurrently\n to fully exploit the knowledge from the teacher model. Furthermore, we propose a correlation-based distillation loss to alleviate the exact match properties inherent in traditional KL divergence or MSE loss functions. Our method, dubbed FCD, presents a simple yet effective method to compress various architectures (BERT, RoBERTa, and GPT) and model sizes (base-size and large-size).\n Extensive experimental results demonstrate that our distilled, smaller language models significantly surpass existing KD methods across various NLP tasks.", "keywords": "Knowledge Distillation; Pre-Trained Language Model", "primary_area": "", "supplementary_material": "/attachment/7ae13fcbcd34b6f4d3447655704b5251de161ab5.zip", "author": "Kun Huang;Xin Guo;Meng Wang", "authorids": "~Kun_Huang11;bangzhu.gx@antgroup.com;~Meng_Wang16", "gender": "M;;", "homepage": "https://www.linkedin.com/feed/;;", "dblp": "10/4151;;", "google_scholar": ";;cbIkfv4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Kun_Huang11;bangzhu.gx@antgroup.com;~Meng_Wang16", "aff": "Shanghai Jiaotong University;;Ant Group", "aff_domain": "sjtu.edu.cn;;antgroup.com", "position": "MS student;;Researcher", "bibtex": "@inproceedings{\nhuang2023towards,\ntitle={Towards Efficient Pre-Trained Language Model via Feature Correlation Distillation},\nauthor={Kun Huang and Xin Guo and Meng Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kVfHQV668B}\n}", "github": "", "project": "", "reviewers": "R5h8;XAJW;qBd9;kWDa;NpzH", "pdf_size": 802087, "rating": "4;5;6;7;8", "confidence": "3;3;4;3;5", "soundness": "2;2;3;3;4", "novelty": "3;2;3;3;4", "presentation": "3;3;2;3;4", "wc_summary": "90;67;134;93;69", "wc_strengths": "125;18;33;51;59", "wc_weaknesses": "135;200;34;101;17", "wc_questions": "274;17;45;26;41", "wc_limitations": "1;6;1;1;8", "wc_review": "625;308;247;272;194", "wc_reply_reviewers": "329;231;5;29;5", "wc_reply_authors": "1143;466;10;10;10", "reply_reviewers": "3;1;1;1;1", "reply_authors": "4;2;2;2;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 90.6, 24.137936945812086 ], "wc_strengths_avg": [ 57.2, 36.77172827050695 ], "wc_weaknesses_avg": [ 97.4, 66.98835719735185 ], "wc_questions_avg": [ 80.6, 97.22674529161202 ], "wc_limitations_avg": [ 3.4, 3.0066592756745814 ], "wc_review_avg": [ 329.2, 152.4826547512864 ], "wc_reply_reviewers_avg": [ 119.8, 134.7091682106307 ], "wc_reply_authors_avg": [ 327.8, 444.216343688523 ], "reply_reviewers_avg": [ 1.4, 0.8000000000000002 ], "reply_authors_avg": [ 2.4, 0.8000000000000002 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9835067168369930867&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sjtu.edu.cn;;antgroup.com", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Shanghai Jiao Tong University;Ant Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.antgroup.com", "aff_unique_abbr": "SJTU;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "DaTaSeg: Taming a Universal Multi-Dataset Multi-Task Segmentation Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70639", "id": "kXOXrVnwbb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d4eed238cf5807c6b75face996302892-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kXOXrVnwbb", "openreview": "https://openreview.net/forum?id=kXOXrVnwbb", "poster": "/media/PosterPDFs/NeurIPS%202023/70639.png?t=1702109289.1215546", "slides": "https://nips.cc/virtual/2023/poster/70639", "video": "https://nips.cc/virtual/2023/poster/70639", "author_site": "Xiuye Gu, Yin Cui, Jonathan Huang, Abdullah Rashwan, Xuan Yang, Xingyi Zhou, Golnaz Ghiasi, Weicheng Kuo, Huizhong Chen, Liang-Chieh Chen, David Ross", "tldr": "", "abstract": "Observing the close relationship among panoptic, semantic and instance segmentation tasks, we propose to train a universal multi-dataset multi-task segmentation model: DaTaSeg. We use a shared representation (mask proposals with class predictions) for all tasks. To tackle task discrepancy, we adopt different merge operations and post-processing for different tasks. We also leverage weak-supervision, allowing our segmentation model to benefit from cheaper bounding box annotations. To share knowledge across datasets, we use text embeddings from the same semantic embedding space as classifiers and share all network parameters among datasets. We train DaTaSeg on ADE semantic, COCO panoptic, and Objects365 detection datasets. DaTaSeg improves performance on all datasets, especially small-scale datasets, achieving 54.0 mIoU on ADE semantic and 53.5 PQ on COCO panoptic. DaTaSeg also enables weakly-supervised knowledge transfer on ADE panoptic and Objects365 instance segmentation. Experiments show DaTaSeg scales with the number of training datasets and enables open-vocabulary segmentation through direct transfer. In addition, we annotate an Objects365 instance segmentation set of 1,000 images and release it as a public evaluation benchmark on https://laoreja.github.io/dataseg.", "keywords": "universal segmentation;multi-task segmentation;multi-dataset segmentation;panoptic segmentation;semantic segmentation;instance segmentation;weakly-supervised segmentation", "primary_area": "", "supplementary_material": "/attachment/c1641f0cf68d878bb69fcc94a101ef49a1d8c0e9.pdf", "author": "Xiuye Gu;Yin Cui;Jonathan Huang;Abdullah Rashwan;Xuan Yang;Xingyi Zhou;Golnaz Ghiasi;Weicheng Kuo;Huizhong Chen;Liang-Chieh Chen;David A Ross", "authorids": "~Xiuye_Gu1;~Yin_Cui1;~Jonathan_Huang1;~Abdullah_Rashwan1;~Xuan_Yang6;~Xingyi_Zhou2;~Golnaz_Ghiasi2;~Weicheng_Kuo1;~Huizhong_Chen2;~Liang-Chieh_Chen1;~David_A_Ross1", "gender": "F;M;;M;F;M;F;M;M;;M", "homepage": "https://laoreja.github.io/;https://ycui.me/;http://jonathan-huang.org/;;;http://xingyizhou.xyz;;https://weichengkuo.github.io/;https://huizhongchen.github.io/;http://liangchiehchen.com/;http://www.cs.toronto.edu/~dross/", "dblp": "199/1920;47/8023.html;55/2421;;;182/2328;17/8614;163/2203;05/10534;138/2443;68/2171", "google_scholar": "qCrypnoAAAAJ;iP5m52IAAAAJ;-pu6i_4AAAAJ;;HaA2AWIAAAAJ;47n-0mwAAAAJ;9pNIbGkAAAAJ;;WghqyVMAAAAJ;ACjYGPUAAAAJ;RqOzJR0AAAAJ", "orcid": ";0000-0003-2882-2033;;;;0000-0002-0914-8525;;;;;", "linkedin": "xiuyegu/;;;abdullah-rashwan-84655245/;xuan-yang-3607484a/;xingyi-zhou-21925290/;;;;;", "or_profile": "~Xiuye_Gu1;~Yin_Cui1;~Jonathan_Huang1;~Abdullah_Rashwan1;~Xuan_Yang6;~Xingyi_Zhou2;~Golnaz_Ghiasi2;~Weicheng_Kuo1;~Huizhong_Chen2;~Liang-Chieh_Chen1;~David_Alexander_Ross1", "aff": "Google;Google;Google;Google;Google;Google;Research, Google;Google Deepmind;Google;Google;Research, Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;research.google.com;google.com;google.com;google.com;research.google.com", "position": "Researcher;Research Scientist;Research Scientist;Researcher;Researcher;Researcher;Researcher;Research Scientist;Researcher;Research Scientist;Software Engineer", "bibtex": "@inproceedings{\ngu2023dataseg,\ntitle={DaTaSeg: Taming a Universal Multi-Dataset Multi-Task Segmentation Model},\nauthor={Xiuye Gu and Yin Cui and Jonathan Huang and Abdullah Rashwan and Xuan Yang and Xingyi Zhou and Golnaz Ghiasi and Weicheng Kuo and Huizhong Chen and Liang-Chieh Chen and David A Ross},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kXOXrVnwbb}\n}", "github": "", "project": "", "reviewers": "t7xz;8rwi;LhuR;EhPn;7Lho", "pdf_size": 11065645, "rating": "4;5;6;6;7", "confidence": "5;5;4;5;4", "soundness": "3;2;3;3;4", "novelty": "2;2;3;4;4", "presentation": "3;2;3;3;3", "wc_summary": "190;42;91;66;64", "wc_strengths": "115;20;31;65;95", "wc_weaknesses": "341;101;35;191;25", "wc_questions": "134;55;46;1;17", "wc_limitations": "7;27;9;1;27", "wc_review": "787;245;212;324;228", "wc_reply_reviewers": "409;154;0;0;0", "wc_reply_authors": "631;311;0;0;0", "reply_reviewers": "1;1;0;0;0", "reply_authors": "2;2;1;1;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 90.6, 52.067648304873536 ], "wc_strengths_avg": [ 65.2, 36.27891949879434 ], "wc_weaknesses_avg": [ 138.6, 117.2716504531253 ], "wc_questions_avg": [ 50.6, 46.01130295916428 ], "wc_limitations_avg": [ 14.2, 10.777754868245983 ], "wc_review_avg": [ 359.2, 217.33605315271555 ], "wc_reply_reviewers_avg": [ 112.6, 159.75180750151154 ], "wc_reply_authors_avg": [ 188.4, 251.9560279096335 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.7205766921228923, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12287707951771126963&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com;google.com;google.com;google.com;google.com;google.com;research.google.com;google.com;google.com;google.com;research.google.com", "author_num": 11, "aff_unique_index": "0;0;0;0;0;0;0;1;0;0;0", "aff_unique_norm": "Google;DeepMind", "aff_unique_dep": "Google;DeepMind", "aff_unique_url": "https://www.google.com;https://deepmind.com", "aff_unique_abbr": "Google;DeepMind", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0;0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "DESSERT: An Efficient Algorithm for Vector Set Search with Vector Set Queries", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70638", "id": "kXfrlWXLwH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d6cc45de2e2dea14b96c1eba88fd8ef7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kXfrlWXLwH", "openreview": "https://openreview.net/forum?id=kXfrlWXLwH", "poster": "/media/PosterPDFs/NeurIPS%202023/70638.png?t=1702134861.19364", "slides": "https://nips.cc/virtual/2023/poster/70638", "video": "https://nips.cc/virtual/2023/poster/70638", "author_site": "Joshua Engels, Benjamin Coleman, Vihan Lakshman, Anshumali Shrivastava", "tldr": "", "abstract": "We study the problem of $\\text{\\emph{vector set search}}$ with $\\text{\\emph{vector set queries}}$. This task is analogous to traditional near-neighbor search, with the exception that both the query and each element in the collection are $\\text{\\textit{sets}}$ of vectors. We identify this problem as a core subroutine for semantic search applications and find that existing solutions are unacceptably slow. Towards this end, we present a new approximate search algorithm, DESSERT ($\\text{\\bf D}$ESSERT $\\text{\\bf E}$ffeciently $\\text{\\bf S}$earches $\\text{\\bf S}$ets of $\\text{\\bf E}$mbeddings via $\\text{\\bf R}$etrieval $\\text{\\bf T}$ables). DESSERT is a general tool with strong theoretical guarantees and excellent empirical performance. When we integrate DESSERT into ColBERT, a state-of-the-art semantic search model, we find a 2-5x speedup on the MS MARCO and LoTTE retrieval benchmarks with minimal loss in recall, underscoring the effectiveness and practical applicability of our proposal.", "keywords": "Embedding Based Retrieval;Passage Ranking;Locality Sensitive Hashing;Randomized Algorithms", "primary_area": "", "supplementary_material": "", "author": "Joshua Engels;Benjamin Coleman;Vihan Lakshman;Anshumali Shrivastava", "authorids": "~Joshua_Engels1;~Benjamin_Coleman1;~Vihan_Lakshman1;~Anshumali_Shrivastava1", "gender": "M;M;;M", "homepage": "https://www.joshengels.com/;https://randorithms.com/research;;https://www.cs.rice.edu/~as143/", "dblp": "295/9447;217/2220;244/2300;63/9828", "google_scholar": "yVPnVK8AAAAJ;fInuVkEAAAAJ;EO6fMUUAAAAJ;https://scholar.google.com.tw/citations?user=SGT23RAAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Joshua_Engels1;~Benjamin_Coleman1;~Vihan_Lakshman1;~Anshumali_Shrivastava1", "aff": "ThirdAI;Google DeepMind;ThirdAI Corp;ThirdAI Corp.", "aff_domain": "thirdai.com;google.com;thirdai.com;thirdai.com", "position": "Researcher;Researcher;Researcher;CEO", "bibtex": "@inproceedings{\nengels2023dessert,\ntitle={{DESSERT}: An Efficient Algorithm for Vector Set Search with Vector Set Queries},\nauthor={Joshua Engels and Benjamin Coleman and Vihan Lakshman and Anshumali Shrivastava},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kXfrlWXLwH}\n}", "github": "", "project": "", "reviewers": "FWNJ;PFCR;iVRP;Uhsc;hUSH;yuRv", "pdf_size": 1254401, "rating": "5;6;6;7;7;7", "confidence": "3;3;4;2;4;3", "soundness": "3;3;3;4;4;3", "novelty": "2;3;3;3;3;3", "presentation": "2;3;3;4;3;3", "wc_summary": "66;47;53;118;91;96", "wc_strengths": "46;113;57;143;100;46", "wc_weaknesses": "244;90;163;155;50;102", "wc_questions": "16;32;198;288;32;1", "wc_limitations": "12;2;11;6;6;1", "wc_review": "384;284;482;710;279;246", "wc_reply_reviewers": "0;102;44;109;38;0", "wc_reply_authors": "0;138;0;0;0;0", "reply_reviewers": "0;1;1;1;1;0", "reply_authors": "1;2;1;1;1;1", "rating_avg": [ 6.333333333333333, 0.7453559924999298 ], "confidence_avg": [ 3.1666666666666665, 0.6871842709362768 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 78.5, 25.237207980810133 ], "wc_strengths_avg": [ 84.16666666666667, 36.9568066201012 ], "wc_weaknesses_avg": [ 134.0, 62.44731112439243 ], "wc_questions_avg": [ 94.5, 108.68264197500292 ], "wc_limitations_avg": [ 6.333333333333333, 4.109609335312651 ], "wc_review_avg": [ 397.5, 160.65257545399015 ], "wc_reply_reviewers_avg": [ 48.833333333333336, 43.5063852274072 ], "wc_reply_authors_avg": [ 23.0, 51.42956348249516 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.10846522890932805, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12603166016798514568&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "thirdai.com;google.com;thirdai.com;thirdai.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "ThirdAI;Google;ThirdAI Corporation;ThirdAI Corp.", "aff_unique_dep": ";Google DeepMind;;", "aff_unique_url": ";https://deepmind.com;;", "aff_unique_abbr": ";DeepMind;ThirdAI;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;2;2", "aff_country_unique": ";United Kingdom;United States" }, { "title": "DecodingTrust: A Comprehensive Assessment of Trustworthiness in GPT Models", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73486", "id": "kaHpo8OZw2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/63cb9921eecf51bfad27a99b2c53dd6d-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=kaHpo8OZw2", "openreview": "https://openreview.net/forum?id=kaHpo8OZw2", "poster": "/media/PosterPDFs/NeurIPS%202023/73486.png?t=1702252557.5787435", "slides": "https://nips.cc/virtual/2023/poster/73486", "video": "https://nips.cc/virtual/2023/poster/73486", "author_site": "Boxin Wang, Weixin Chen, Hengzhi Pei, Chulin Xie, Mintong Kang, Chenhui Zhang, Chejian Xu, Zidi Xiong, Ritik Dutta, Rylan Schaeffer, Sang Truong, Simran Arora, Mantas Mazeika, Dan Hendrycks, Zinan Lin, Yu Cheng, Sanmi Koyejo, Dawn Song, Bo Li", "tldr": "", "abstract": "Generative Pre-trained Transformer (GPT) models have exhibited exciting progress in capabilities, capturing the interest of practitioners and the public alike. Yet, while the literature on the trustworthiness of GPT models remains limited, practitioners have proposed employing capable GPT models for sensitive applications to healthcare and finance \u2013 where mistakes can be costly. To this end, this work proposes a comprehensive trustworthiness evaluation for large language models with a focus on GPT-4 and GPT-3.5, considering diverse perspectives \u2013 including toxicity, stereotype bias, adversarial robustness, out-of-distribution robustness, robustness on adversarial demonstrations, privacy, machine ethics, and fairness. Based on our evaluations, we discover previously unpublished vulnerabilities to trustworthiness threats. For instance, we find that GPT models can be easily misled to generate toxic and biased outputs and leak private information in both training data and conversation history. We also find that although GPT-4 is usually more trustworthy than GPT-3.5 on standard benchmarks, GPT-4 is more vulnerable given jailbreaking system or user prompts, potentially due to the reason that GPT-4 follows the (misleading) instructions more precisely. Our work illustrates a comprehensive trustworthiness evaluation of GPT models and sheds light on the trustworthiness gaps. Our benchmark is publicly available at https://decodingtrust.github.io/.", "keywords": "trustworthiness evaluation;GPT models;GPT-3.5;GPT-4;toxicity;stereotypes;bias;adversarial robustness;out-of-distribution robustness;privacy;ethics;fairness", "primary_area": "", "supplementary_material": "/attachment/86d4ad242da9ee3171f5f302ad989f043972b204.zip", "author": "Boxin Wang;Weixin Chen;Hengzhi Pei;Chulin Xie;Mintong Kang;Chenhui Zhang;Chejian Xu;Zidi Xiong;Ritik Dutta;Rylan Schaeffer;Sang T. Truong;Simran Arora;Mantas Mazeika;Dan Hendrycks;Zinan Lin;Yu Cheng;Sanmi Koyejo;Dawn Song;Bo Li", "authorids": "~Boxin_Wang1;~Weixin_Chen1;~Hengzhi_Pei1;~Chulin_Xie1;~Mintong_Kang1;~Chenhui_Zhang2;~Chejian_Xu1;~Zidi_Xiong2;~Ritik_Dutta1;~Rylan_Schaeffer2;~Sang_T._Truong1;~Simran_Arora1;~Mantas_Mazeika3;~Dan_Hendrycks1;~Zinan_Lin1;~Yu_Cheng1;~Sanmi_Koyejo1;~Dawn_Song1;~Bo_Li19", "gender": ";F;M;F;M;M;;M;M;M;M;;M;;M;M;F;F;M", "homepage": "https://wbx.life;https://chenweixin107.github.io/;;;https://kangmintong.github.io/;https://www.danielz.ch/;https://xuchejian.com/;https://polaris-73.github.io/;;https://rylanschaeffer.github.io;https://cs.stanford.edu/~sttruong;https://scholar.google.com/citations?user=rGRsWH8AAAAJ&hl=en;https://github.com/mmazeika;;https://zinanlin.me/;https://ych133.github.io;;http://boli.cs.illinois.edu/;https://cs.stanford.edu/~sanmi/", "dblp": "236/6319;72/8212;243/7002;245/4284;303/0335.html;;305/4129.html;314/6808;;280/1341;301/9134;243/2342;215/4447;182/2504;64/237-1;96/3060-1.html;s/DXSong;50/3402-26;14/8885", "google_scholar": "YOf2ATIAAAAJ;ZlBEHxwAAAAJ;Qgc5qxYAAAAJ;WeJnzAgAAAAJ;oHXw2SAAAAAJ;UYxdrBsAAAAJ;YbDy6k0AAAAJ;XL6QafwAAAAJ;;6tMEGz8AAAAJ;oXPm0dAAAAAJ;;;;67nE-wQ_g_cC;https://scholar.google.com/citations?hl=en;;K8vJkTcAAAAJ;EaaOeJwAAAAJ", "orcid": ";;;;;0000-0003-3915-6099;;;;;;;;;;;;;0000-0002-4023-419X", "linkedin": ";weixin-chen-0250872aa/;;;;danielz02/;;https://www.linkedin.com/public-profile/settings;;rylanschaeffer/;sangttruong/;;;;;chengyu05/;;;sanmi-koyejo-984754/", "or_profile": "~Boxin_Wang1;~Weixin_Chen1;~Hengzhi_Pei1;~Chulin_Xie1;~Mintong_Kang1;~Chenhui_Zhang2;~Chejian_Xu1;~Zidi_Xiong2;~Ritik_Dutta1;~Rylan_Schaeffer2;~Sang_T._Truong1;~Simran_Arora1;~Mantas_Mazeika3;~Dan_Hendrycks1;~Zinan_Lin1;~Yu_Cheng1;~Dawn_Song1;~Bo_Li19;~Oluwasanmi_O_Koyejo1", "aff": "Department of Computer Science, University of Illinois, Urbana Champaign;Tsinghua University;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana-Champaign;University of Illinois, Urbana Champaign;Department of Computer Science, University of Illinois at Urbana-Champaign;IIT Gandhinagar, Dhirubhai Ambani Institute Of Information and Communication Technology;Massachusetts Institute of Technology;Stanford University;The Wharton School, University of Pennsylvania;University of Illinois, Urbana-Champaign;Center for AI Safety;Carnegie Mellon University;Microsoft Research;University of California, Berkeley;University of Illinois, Urbana Champaign;Google", "aff_domain": "cs.illinois.edu;tsinghua.edu.cn;illinois.edu;illinois.edu;illinois.edu;illinois.edu;illinois.edu;cs.illinois.edu;iitgn.ac.in;mit.edu;stanford.edu;wharton.upenn.edu;uiuc.edu;safe.ai;cmu.edu;microsoft.com;berkeley.edu;illinois.edu;google.com", "position": "PhD student;MS student;MS student;PhD student;PhD student;Undergrad student;PhD student;Undergrad student;Undergrad student;Researcher;PhD student;Undergrad student;PhD student;Executive and Research Director;PhD student;Principal Researcher;Full Professor;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nwang2023decodingtrust,\ntitle={DecodingTrust: A Comprehensive Assessment of Trustworthiness in {GPT} Models},\nauthor={Boxin Wang and Weixin Chen and Hengzhi Pei and Chulin Xie and Mintong Kang and Chenhui Zhang and Chejian Xu and Zidi Xiong and Ritik Dutta and Rylan Schaeffer and Sang T. Truong and Simran Arora and Mantas Mazeika and Dan Hendrycks and Zinan Lin and Yu Cheng and Sanmi Koyejo and Dawn Song and Bo Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=kaHpo8OZw2}\n}", "github": "", "project": "", "reviewers": "imyr;qCen;UhDT;JNyJ;j1j9", "pdf_size": 42206195, "rating": "7;7;7;7;10", "confidence": "4;4;4;4;4", "wc_summary_and_contributions": "129;116;58;60;71", "wc_strengths": "56;123;122;39;52", "wc_improvement": "21;298;97;122;47", "wc_limitations": "188;128;5;14;45", "wc_correctness": "22;31;4;8;6", "wc_clarity": "6;6;5;55;8", "wc_relation_to_prior_work": "1;24;5;22;6", "wc_documentation": "7;209;19;16;17", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "431;936;316;337;253", "wc_reply_reviewers": "0;149;0;39;60", "wc_reply_authors": "940;2984;1502;874;1482", "reply_reviewers": "0;1;0;1;1", "reply_authors": "2;6;4;2;3", "rating_avg": [ 7.6, 1.2 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 86.8, 29.768439663509408 ], "wc_strengths_avg": [ 78.4, 36.44502709561347 ], "wc_improvement_avg": [ 117.0, 97.26458759486928 ], "wc_limitations_avg": [ 76.0, 70.84348946798146 ], "wc_correctness_avg": [ 14.2, 10.514751542475931 ], "wc_clarity_avg": [ 16.0, 19.524343778985248 ], "wc_relation_to_prior_work_avg": [ 11.6, 9.478396488858229 ], "wc_documentation_avg": [ 53.6, 77.80899690909787 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 454.6, 247.38035491930236 ], "wc_reply_reviewers_avg": [ 49.6, 54.81094781154582 ], "wc_reply_authors_avg": [ 1556.4, 760.546540324785 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 3.4, 1.4966629547095764 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 19, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 473, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12930725443717311591&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "cs.illinois.edu;tsinghua.edu.cn;illinois.edu;illinois.edu;illinois.edu;illinois.edu;illinois.edu;cs.illinois.edu;iitgn.ac.in;mit.edu;stanford.edu;wharton.upenn.edu;uiuc.edu;safe.ai;cmu.edu;microsoft.com;berkeley.edu;illinois.edu;google.com", "author_num": 19, "aff_unique_index": "0;1;0;0;0;2;0;0;3;4;5;6;2;7;8;9;10;0;11", "aff_unique_norm": "University of Illinois Urbana-Champaign;Tsinghua University;University of Illinois;Indian Institute of Technology Gandhinagar;Massachusetts Institute of Technology;Stanford University;University of Pennsylvania;Center for AI Safety;Carnegie Mellon University;Microsoft;University of California, Berkeley;Google", "aff_unique_dep": "Department of Computer Science;;;Dhirubhai Ambani Institute Of Information and Communication Technology;;;The Wharton School;;;Microsoft Research;;Google", "aff_unique_url": "https://illinois.edu;https://www.tsinghua.edu.cn;https://illinois.edu;https://www.iitgn.ac.in;https://web.mit.edu;https://www.stanford.edu;https://www.wharton.upenn.edu;https://www.centerforaisafety.org;https://www.cmu.edu;https://www.microsoft.com/en-us/research;https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "UIUC;THU;UIUC;IITGN;MIT;Stanford;UPenn Wharton;;CMU;MSR;UC Berkeley;Google", "aff_campus_unique_index": "0;0;0;0;0;0;0;2;3;0;4;0;5", "aff_campus_unique": "Urbana-Champaign;;Gandhinagar;Stanford;Berkeley;Mountain View", "aff_country_unique_index": "0;1;0;0;0;0;0;0;2;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;China;India" }, { "title": "Prompt Pre-Training with Twenty-Thousand Classes for Open-Vocabulary Visual Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70637", "id": "kdFR6IUEW6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/29962c2c9daf1fbd92530a7c958dfc2b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kdFR6IUEW6", "openreview": "https://openreview.net/forum?id=kdFR6IUEW6", "poster": "/media/PosterPDFs/NeurIPS%202023/70637.png?t=1701414196.1863694", "slides": "https://nips.cc/virtual/2023/poster/70637", "video": "https://nips.cc/virtual/2023/poster/70637", "author_site": "Shuhuai Ren, Aston Zhang, Yi Zhu, Shuai Zhang, Shuai Zheng, Mu Li, Alexander Smola, Xu Sun", "tldr": "", "abstract": "This work proposes POMP, a prompt pre-training method for vision-language models. Being memory and computation efficient, POMP enables the learned prompt to condense semantic information for a rich set of visual concepts with over twenty-thousand classes. Once pre-trained, the prompt with a strong transferable ability can be directly plugged into a variety of visual recognition tasks including image classification, semantic segmentation, and object detection, to boost recognition performances in a zero-shot manner. Empirical evaluation shows that POMP achieves state-of-the-art performances on 21 datasets, e.g., 67.0% average accuracy on 10 classification datasets (+3.1% compared to CoOp) and 84.4 hIoU on open-vocabulary Pascal VOC segmentation (+6.9 compared to ZSSeg).", "keywords": "Prompt Pre-Training;CLIP;Open-Vocabulary Visual Recognition", "primary_area": "", "supplementary_material": "/attachment/4a3cbfa317551e26497e322e9e453cab2e7ae13f.zip", "author": "Shuhuai Ren;Aston Zhang;Yi Zhu;Shuai Zhang;Shuai Zheng;Mu Li;Alex Smola;Xu Sun", "authorids": "~Shuhuai_Ren1;~Aston_Zhang2;~Yi_Zhu1;~Shuai_Zhang7;~Shuai_Zheng1;~Mu_Li4;~Alex_Smola1;~Xu_Sun1", "gender": "M;;M;;;;M;M", "homepage": "https://renshuhuai-andy.github.io/;;https://bryanyzhu.github.io/;;http://www.cse.ust.hk/~szhengac/;https://github.com/mli;http://alex.smola.org;https://xusun.org/", "dblp": "50/9511.html;;;;13/8659-4;;s/AlexanderJSmola;37/1971-1", "google_scholar": "https://scholar.google.com.hk/citations?user=3X8yS-cAAAAJ;;IXw4UiwAAAAJ;;82FZpFYAAAAJ;;Tb0ZrYwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-6482-6712;;;;;", "linkedin": "shuhuai-ren-69580817a/;;yi-zhu-546a437a/;;;;smola;", "or_profile": "~Shuhuai_Ren1;~Aston_Zhang2;~Yi_Zhu1;~Shuai_Zhang7;~Shuai_Zheng1;~Mu_Li4;~Alex_Smola1;~Xu_Sun1", "aff": "Peking University;;Amazon;;Amazon Web Services;Amazon;Boson AI;Peking University", "aff_domain": "pku.edu.cn;;amazon.com;;amazon.com;amazon.com;boson.ai;pku.edu.cn", "position": "PhD student;;Applied Scientist;;Senior Applied Scientist;Researcher;CEO;Associate Professor", "bibtex": "@inproceedings{\nren2023prompt,\ntitle={Prompt Pre-Training with Twenty-Thousand Classes for Open-Vocabulary Visual Recognition},\nauthor={Shuhuai Ren and Aston Zhang and Yi Zhu and Shuai Zhang and Shuai Zheng and Mu Li and Alex Smola and Xu Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kdFR6IUEW6}\n}", "github": "", "project": "", "reviewers": "Mx1X;9929;xVVp;i3py", "pdf_size": 4132118, "rating": "5;5;6;7", "confidence": "5;4;4;4", "soundness": "2;3;3;3", "novelty": "1;3;3;3", "presentation": "2;3;3;3", "wc_summary": "104;96;82;97", "wc_strengths": "37;73;42;173", "wc_weaknesses": "131;144;101;105", "wc_questions": "4;2;1;55", "wc_limitations": "1;2;1;29", "wc_review": "277;317;227;459", "wc_reply_reviewers": "17;11;13;16", "wc_reply_authors": "34;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.75, 7.980444849756184 ], "wc_strengths_avg": [ 81.25, 54.73744148204225 ], "wc_weaknesses_avg": [ 120.25, 17.907749719046222 ], "wc_questions_avg": [ 15.5, 22.830900113661748 ], "wc_limitations_avg": [ 8.25, 11.986972094736853 ], "wc_review_avg": [ 320.0, 86.35392289873113 ], "wc_reply_reviewers_avg": [ 14.25, 2.384848003542364 ], "wc_reply_authors_avg": [ 8.5, 14.722431864335457 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1277533519363785163&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "pku.edu.cn;;amazon.com;;amazon.com;amazon.com;boson.ai;pku.edu.cn", "author_num": 8, "aff_unique_index": "0;1;1;1;2;0", "aff_unique_norm": "Peking University;Amazon;Boson AI", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "http://www.pku.edu.cn;https://www.amazon.com;https://www.boson.ai", "aff_unique_abbr": "Peking U;Amazon;Boson AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "TextDiffuser: Diffusion Models as Text Painters", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70636", "id": "ke3RgcDmfO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1df4afb0b4ebf492a41218ce16b6d8df-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ke3RgcDmfO", "openreview": "https://openreview.net/forum?id=ke3RgcDmfO", "poster": "/media/PosterPDFs/NeurIPS%202023/70636.png?t=1697114918.0557044", "slides": "https://nips.cc/virtual/2023/poster/70636", "video": "https://nips.cc/virtual/2023/poster/70636", "author_site": "Jingye Chen, Yupan Huang, Yupan Huang, Tengchao Lv, Lei Cui, Qifeng Chen, Furu Wei", "tldr": "", "abstract": "Diffusion models have gained increasing attention for their impressive generation abilities but currently struggle with rendering accurate and coherent text. To address this issue, we introduce TextDiffuser, focusing on generating images with visually appealing text that is coherent with backgrounds. TextDiffuser consists of two stages: first, a Transformer model generates the layout of keywords extracted from text prompts, and then diffusion models generate images conditioned on the text prompt and the generated layout. Additionally, we contribute the first large-scale text images dataset with OCR annotations, MARIO-10M, containing 10 million image-text pairs with text recognition, detection, and character-level segmentation annotations. We further collect the MARIO-Eval benchmark to serve as a comprehensive tool for evaluating text rendering quality. Through experiments and user studies, we demonstrate that TextDiffuser is flexible and controllable to create high-quality text images using text prompts alone or together with text template images, and conduct text inpainting to reconstruct incomplete images with text. We will make the code, model and dataset publicly available.", "keywords": "Diffusion Model; Text Rendering", "primary_area": "", "supplementary_material": "", "author": "Jingye Chen;Yupan Huang;Tengchao Lv;Lei Cui;Qifeng Chen;Furu Wei", "authorids": "~Jingye_Chen2;~Yupan_Huang1;~Tengchao_Lv1;~Lei_Cui2;~Qifeng_Chen1;~Furu_Wei1", "gender": "M;F;M;M;M;M", "homepage": "https://jingyechen.github.io/;https://hypjudy.github.io/website/;;https://www.microsoft.com/en-us/research/people/lecu/;http://cqf.io/;https://www.microsoft.com/en-us/research/people/fuwei/", "dblp": "233/5857;239/4895;254/8010.html;47/5523-1.html;117/4819;72/5870", "google_scholar": "zfjjlw8AAAAJ;ZbCCBogAAAAJ;0LTZGhUAAAAJ;ajJQoUcAAAAJ;lLMX9hcAAAAJ;G-V1VpwAAAAJ", "orcid": ";;0000-0001-7548-9566;;;", "linkedin": ";;;;;", "or_profile": "~Jingye_Chen2;~Yupan_Huang1;~Tengchao_Lv1;~Lei_Cui2;~Qifeng_Chen1;~Furu_Wei1", "aff": "Hong Kong University of Science and Technology;SUN YAT-SEN UNIVERSITY;Microsoft;Microsoft Research Asia;Hong Kong University of Science and Technology;Microsoft Research", "aff_domain": "hkust.edu;sysu.edu.cn;microsoft.com;microsoft.com;hkust.edu;microsoft.com", "position": "PhD student;PhD student;Researcher;Principal Researcher;Assistant Professor;Distinguished Scientist", "bibtex": "@inproceedings{\nchen2023textdiffuser,\ntitle={TextDiffuser: Diffusion Models as Text Painters},\nauthor={Jingye Chen and Yupan Huang and Tengchao Lv and Lei Cui and Qifeng Chen and Furu Wei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ke3RgcDmfO}\n}", "github": "", "project": "", "reviewers": "AzyM;cdWT;V9VR;sG87", "pdf_size": 31167546, "rating": "3;6;7;7", "confidence": "5;5;4;4", "soundness": "3;3;2;4", "novelty": "3;3;2;3", "presentation": "3;3;4;3", "wc_summary": "61;47;163;75", "wc_strengths": "58;27;33;48", "wc_weaknesses": "165;209;156;135", "wc_questions": "52;3;66;53", "wc_limitations": "7;25;1;15", "wc_review": "343;311;419;326", "wc_reply_reviewers": "33;24;28;23", "wc_reply_authors": "381;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.5, 45.26311964502668 ], "wc_strengths_avg": [ 41.5, 12.216791722870616 ], "wc_weaknesses_avg": [ 166.25, 26.975683494584526 ], "wc_questions_avg": [ 43.5, 24.026027553467927 ], "wc_limitations_avg": [ 12.0, 9.0 ], "wc_review_avg": [ 349.75, 41.55342946135734 ], "wc_reply_reviewers_avg": [ 27.0, 3.9370039370059056 ], "wc_reply_authors_avg": [ 95.25, 164.97783942093557 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7624928516630233, "gs_citation": 128, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=545976320334168988&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "hkust.edu;sysu.edu.cn;microsoft.com;microsoft.com;hkust.edu;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;2;2;0;2", "aff_unique_norm": "Hong Kong University of Science and Technology;Sun Yat-sen University;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.ust.hk;http://www.sysu.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "HKUST;SYSU;Microsoft", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Hong Kong SAR;;Asia", "aff_country_unique_index": "0;0;1;0;0;1", "aff_country_unique": "China;United States" }, { "title": "End-to-End Meta-Bayesian Optimisation with Transformer Neural Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70635", "id": "kfWzpZvEUh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2561721d0ca69bab22b749cfc4f48f6c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kfWzpZvEUh", "openreview": "https://openreview.net/forum?id=kfWzpZvEUh", "poster": "/media/PosterPDFs/NeurIPS%202023/70635.png?t=1698848107.1457996", "slides": "https://nips.cc/virtual/2023/poster/70635", "video": "https://nips.cc/virtual/2023/poster/70635", "author_site": "Alexandre Maraval, Matthieu Zimmer, Antoine Grosnit, Haitham Bou Ammar", "tldr": "", "abstract": "Meta-Bayesian optimisation (meta-BO) aims to improve the sample efficiency of Bayesian optimisation by leveraging data from related tasks. While previous methods successfully meta-learn either a surrogate model or an acquisition function independently, joint training of both components remains an open challenge. This paper proposes the first end-to-end differentiable meta-BO framework that generalises neural processes to learn acquisition functions via transformer architectures. We enable this end-to-end framework with reinforcement learning (RL) to tackle the lack of labelled acquisition data. Early on, we notice that training transformer-based neural processes from scratch with RL is challenging due to insufficient supervision, especially when rewards are sparse. We formalise this claim with a combinatorial analysis showing that the widely used notion of regret as a reward signal exhibits a logarithmic sparsity pattern in trajectory lengths. To tackle this problem, we augment the RL objective with an auxiliary task that guides part of the architecture to learn a valid probabilistic model as an inductive bias. We demonstrate that our method achieves state-of-the-art regret results against various baselines in experiments on standard hyperparameter optimisation tasks and also outperforms others in the real-world problems of mixed-integer programming tuning, antibody design, and logic synthesis for electronic design automation.", "keywords": "meta-learning;bayesian optimisation;neural process;transformer;end-to-end;reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/523e06f8b4c21bfbbfa78cf875ac039ab674939e.zip", "author": "Alexandre Max Maraval;Matthieu Zimmer;Antoine Grosnit;Haitham Bou Ammar", "authorids": "~Alexandre_Max_Maraval1;~Matthieu_Zimmer1;~Antoine_Grosnit2;~Haitham_Bou_Ammar1", "gender": "Not Specified;M;M;M", "homepage": ";https://matthieu-zimmer.net/;;", "dblp": ";216/6664;;281/6860", "google_scholar": "IKZtDmoAAAAJ;https://scholar.google.fr/citations?user=6z-GF2sAAAAJ;https://scholar.google.co.uk/citations?user=AE5suDoAAAAJ;TIfomt8AAAAJ", "orcid": ";0000-0002-8029-308X;;", "linkedin": ";;;antoine-grosnit-087950158/", "or_profile": "~Alexandre_Max_Maraval1;~Matthieu_Zimmer1;~Haitham_Bou_Ammar1;~Antoine_Grosnit1", "aff": "Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei R&D UK;Huawei Technologies Ltd.", "aff_domain": "huawei.com;huawei.com;huawei.com;huawei.com", "position": "Researcher;Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nmaraval2023endtoend,\ntitle={End-to-End Meta-Bayesian Optimisation with Transformer Neural Processes},\nauthor={Alexandre Max Maraval and Matthieu Zimmer and Antoine Grosnit and Haitham Bou Ammar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kfWzpZvEUh}\n}", "github": "", "project": "", "reviewers": "6XK8;P5Ji;Mquj;K11E;DqJs;dCjX", "pdf_size": 2692707, "rating": "5;5;6;6;6;7", "confidence": "5;2;3;3;2;4", "soundness": "3;3;3;3;3;4", "novelty": "3;2;3;3;3;3", "presentation": "3;4;3;3;2;4", "wc_summary": "44;89;62;66;33;119", "wc_strengths": "43;54;48;246;40;99", "wc_weaknesses": "162;44;110;19;126;53", "wc_questions": "54;23;4;65;11;11", "wc_limitations": "18;15;1;1;1;5", "wc_review": "321;225;225;397;211;287", "wc_reply_reviewers": "25;45;4;17;18;19", "wc_reply_authors": "271;0;0;0;0;784", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "2;1;1;1;1;2", "rating_avg": [ 5.833333333333333, 0.6871842709362768 ], "confidence_avg": [ 3.1666666666666665, 1.0671873729054748 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 3.1666666666666665, 0.6871842709362768 ], "wc_summary_avg": [ 68.83333333333333, 28.51559027775664 ], "wc_strengths_avg": [ 88.33333333333333, 73.22264373508756 ], "wc_weaknesses_avg": [ 85.66666666666667, 50.486521853747156 ], "wc_questions_avg": [ 28.0, 23.18045153428495 ], "wc_limitations_avg": [ 6.833333333333333, 7.033649282003064 ], "wc_review_avg": [ 277.6666666666667, 66.07739569794062 ], "wc_reply_reviewers_avg": [ 21.333333333333332, 12.310790208412925 ], "wc_reply_authors_avg": [ 175.83333333333334, 289.4226532637385 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.03787770095392849, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11294143449380718869&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 8, "email": "huawei.com;huawei.com;huawei.com;huawei.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Mind2Web: Towards a Generalist Agent for the Web", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73485", "id": "kiYqbO3wqw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5950bf290a1570ea401bf98882128160-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=kiYqbO3wqw", "openreview": "https://openreview.net/forum?id=kiYqbO3wqw", "poster": "/media/PosterPDFs/NeurIPS%202023/73485.png?t=1701672092.931609", "slides": "https://nips.cc/virtual/2023/poster/73485", "video": "https://nips.cc/virtual/2023/poster/73485", "author_site": "Xiang Deng, Yu Gu, Boyuan Zheng, Shijie Chen, Sam Stevens, Boshi Wang, Huan Sun, Yu Su", "tldr": "", "abstract": "We introduce Mind2Web, the first dataset for developing and evaluating generalist agents for the web that can follow language instructions to complete complex tasks on any website. Existing datasets for web agents either use simulated websites or only cover a limited set of websites and tasks, thus not suitable for generalist web agents. With over 2,000 open-ended tasks collected from 137 websites spanning 31 domains and crowdsourced action sequences for the tasks, Mind2Web provides three necessary ingredients for building generalist web agents: 1) diverse domains, websites, and tasks, 2) use of real-world websites instead of simulated and simplified ones, and 3) a broad spectrum of user interaction patterns. Based on Mind2Web, we conduct an initial exploration of using large language models (LLMs) for building generalist web agents. While the raw HTML of real-world websites are often too large to be fed to LLMs, we show that first filtering it with a small LM significantly improves the effectiveness and efficiency of LLMs. Our solution demonstrates a decent level of performance, even on websites or entire domains the model has never seen before, but there is still a substantial room to improve towards truly generalizable agents. We open-source our dataset, model implementation, and trained models (https://osu-nlp-group.github.io/Mind2Web) to facilitate further research on building a generalist agent for the web.", "keywords": "Web Agent;Web Automation;Benchmark;Language Grounding;Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Xiang Deng;Yu Gu;Boyuan Zheng;Shijie Chen;Samuel Stevens;Boshi Wang;Huan Sun;Yu Su", "authorids": "~Xiang_Deng2;~Yu_Gu5;~Boyuan_Zheng1;~Shijie_Chen1;~Samuel_Stevens1;~Boshi_Wang2;~Huan_Sun1;~Yu_Su2", "gender": "M;M;M;M;M;M;F;M", "homepage": "https://xiang-deng.github.io/;http://entslscheia.github.io;https://boyuanzheng010.github.io/;https://chensj98.github.io/;https://samuelstevens.me;https://boshi-wang.github.io/;https://u.osu.edu/ihudas/people/;http://ysu1989.github.io", "dblp": "95/4545-1;15/4208-16;;;279/6356;216/7905;33/2952-1.html;38/1070-1", "google_scholar": "d-qpndsAAAAJ;c5RwjjcAAAAJ;amEL4n8AAAAJ;KXSlX3sAAAAJ;uR-A0LAAAAAJ;https://scholar.google.com/citations?hl=en;wIFkulcAAAAJ;rIh5OqoAAAAJ", "orcid": ";;;;0009-0000-9493-7766;;;", "linkedin": ";;boyuan-zheng-602238183/;;;;huan-sun-81527924/?originalSubdomain=cn;", "or_profile": "~Xiang_Deng2;~Yu_Gu5;~Boyuan_Zheng1;~Shijie_Chen1;~Samuel_Stevens1;~Boshi_Wang2;~Huan_Sun1;~Yu_Su2", "aff": "Ohio State University;Ohio State University;Johns Hopkins University;Ohio State University, Columbus;Ohio State University, Columbus;Ohio State University;The Ohio State University, Columbus;Microsoft", "aff_domain": "osu.edu;osu.edu;jhu.edu;osu.edu;osu.edu;osu.edu;osu.edu;microsoft.com", "position": "PhD student;PhD student;MS student;PhD student;PhD student;PhD student;Associate Professor;Senior Researcher", "bibtex": "@inproceedings{\ndeng2023mindweb,\ntitle={Mind2Web: Towards a Generalist Agent for the Web},\nauthor={Xiang Deng and Yu Gu and Boyuan Zheng and Shijie Chen and Samuel Stevens and Boshi Wang and Huan Sun and Yu Su},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=kiYqbO3wqw}\n}", "github": "", "project": "", "reviewers": "6KDX;39YG;6BP7;17i2", "pdf_size": 7563781, "rating": "7;7;7;8", "confidence": "5;4;4;3", "wc_summary_and_contributions": "125;128;110;45", "wc_strengths": "82;98;61;96", "wc_improvement": "111;74;12;142", "wc_limitations": "14;68;1;1", "wc_correctness": "14;92;1;13", "wc_clarity": "5;31;1;15", "wc_relation_to_prior_work": "24;42;1;12", "wc_documentation": "61;69;1;11", "wc_additional_feedback": "1;1;1;1", "wc_review": "437;603;189;336", "wc_reply_reviewers": "128;22;0;22", "wc_reply_authors": "403;563;42;426", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 102.0, 33.60803475361212 ], "wc_strengths_avg": [ 84.25, 14.771171246722448 ], "wc_improvement_avg": [ 84.75, 48.41164632606497 ], "wc_limitations_avg": [ 21.0, 27.649593125396983 ], "wc_correctness_avg": [ 30.0, 36.15936946352909 ], "wc_clarity_avg": [ 13.0, 11.575836902790225 ], "wc_relation_to_prior_work_avg": [ 19.75, 15.20485119953497 ], "wc_documentation_avg": [ 35.5, 29.845435161846776 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 391.25, 150.73880555450876 ], "wc_reply_reviewers_avg": [ 43.0, 49.889878733065686 ], "wc_reply_authors_avg": [ 358.5, 192.69730148603534 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 428, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17415845647081249140&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "osu.edu;osu.edu;jhu.edu;osu.edu;osu.edu;osu.edu;osu.edu;microsoft.com", "author_num": 8, "aff_unique_index": "0;0;1;0;0;0;0;2", "aff_unique_norm": "Ohio State University;Johns Hopkins University;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.osu.edu;https://www.jhu.edu;https://www.microsoft.com", "aff_unique_abbr": "OSU;JHU;Microsoft", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Columbus", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "On permutation symmetries in Bayesian neural network posteriors: a variational perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70634", "id": "kj33zJ9Vue", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d9dc5573f7368201d6409e07e882aa77-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kj33zJ9Vue", "openreview": "https://openreview.net/forum?id=kj33zJ9Vue", "poster": "/media/PosterPDFs/NeurIPS%202023/70634.png?t=1699546426.4614177", "slides": "https://nips.cc/virtual/2023/poster/70634", "video": "https://nips.cc/virtual/2023/poster/70634", "author_site": "Simone Rossi, Simone Rossi, Ankit Singh, Thomas Hannagan", "tldr": "", "abstract": "The elusive nature of gradient-based optimization in neural networks is tied to their loss landscape geometry, which is poorly understood. However recent work has brought solid evidence that there is essentially no loss barrier between the local solutions of gradient descent, once accounting for weight-permutations that leave the network's computation unchanged. This raises questions for approximate inference in Bayesian neural networks (BNNs), where we are interested in marginalizing over multiple points in the loss landscape.\nIn this work, we first extend the formalism of marginalized loss barrier and solution interpolation to BNNs, before proposing a matching algorithm to search for linearly connected solutions. This is achieved by aligning the distributions of two independent approximate Bayesian solutions with respect to permutation matrices. Building on the work of Ainsworth et al. (2023), we frame the problem as a combinatorial optimization one, using an approximation to the sum of bilinear assignment problem. We then experiment on a variety of architectures and datasets, finding nearly zero marginalized loss barriers for linearly connected solutions.", "keywords": "Bayesian deep learning;approximate inference;permutation symmetries", "primary_area": "", "supplementary_material": "/attachment/195f35be63ca4eb68091eed6d8ebec0013d16dbb.pdf", "author": "Simone Rossi;Ankit Singh;Thomas Hannagan", "authorids": "~Simone_Rossi1;~Ankit_Singh1;~Thomas_Hannagan1", "gender": ";;M", "homepage": ";;", "dblp": "86/5740-1.html;;98/8826", "google_scholar": ";;u6OFo3YAAAAJ", "orcid": "0000-0003-2908-3703;;", "linkedin": ";;", "or_profile": "~Simone_Rossi1;~Ankit_Singh1;~Thomas_Hannagan1", "aff": "Stellantis;;Stellantis", "aff_domain": "stellantis.com;;stellantis.com", "position": "Researcher;;Researcher", "bibtex": "@inproceedings{\nrossi2023on,\ntitle={On permutation symmetries in Bayesian neural network posteriors: a variational perspective},\nauthor={Simone Rossi and Ankit Singh and Thomas Hannagan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kj33zJ9Vue}\n}", "github": "", "project": "", "reviewers": "bvHV;2rWU;hffp;T6Qt;Kavz;Kqgj", "pdf_size": 1325746, "rating": "6;6;7;7;7;7", "confidence": "4;4;4;5;2;4", "soundness": "3;3;3;4;4;3", "novelty": "2;3;2;3;3;3", "presentation": "3;3;3;3;4;3", "wc_summary": "113;178;26;78;94;134", "wc_strengths": "96;111;15;100;82;41", "wc_weaknesses": "382;853;107;361;136;63", "wc_questions": "149;0;1;230;51;46", "wc_limitations": "9;0;1;6;103;35", "wc_review": "749;1142;150;775;466;319", "wc_reply_reviewers": "507;312;0;0;31;28", "wc_reply_authors": "486;206;0;0;0;0", "reply_reviewers": "4;1;0;0;1;1", "reply_authors": "4;2;1;1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.8333333333333335, 0.8975274678557507 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 103.83333333333333, 47.1112748948935 ], "wc_strengths_avg": [ 74.16666666666667, 34.55149136514306 ], "wc_weaknesses_avg": [ 317.0, 269.2068597441999 ], "wc_questions_avg": [ 79.5, 83.56384784502606 ], "wc_limitations_avg": [ 25.666666666666668, 36.51331568376422 ], "wc_review_avg": [ 600.1666666666666, 328.12772750189146 ], "wc_reply_reviewers_avg": [ 146.33333333333334, 194.78934490594932 ], "wc_reply_authors_avg": [ 115.33333333333333, 182.03540559157412 ], "reply_reviewers_avg": [ 1.1666666666666667, 1.3437096247164249 ], "reply_authors_avg": [ 1.6666666666666667, 1.1055415967851332 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.13130643285972254, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15927947193510727346&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "stellantis.com;;stellantis.com", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Stellantis", "aff_unique_dep": "", "aff_unique_url": "https://www.stellantis.com", "aff_unique_abbr": "Stellantis", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "title": "Inverse Dynamics Pretraining Learns Good Representations for Multitask Imitation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70633", "id": "kjMGHTo8Cs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d36dfcdb14473a8526111c221660f2ab-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kjMGHTo8Cs", "openreview": "https://openreview.net/forum?id=kjMGHTo8Cs", "poster": "/media/PosterPDFs/NeurIPS%202023/70633.png?t=1701717140.8933563", "slides": "https://nips.cc/virtual/2023/poster/70633", "video": "https://nips.cc/virtual/2023/poster/70633", "author_site": "David Brandfonbrener, David Brandfonbrener, Ofir Nachum, Joan Bruna", "tldr": "", "abstract": "In recent years, domains such as natural language processing and image recognition have popularized the paradigm of using large datasets to pretrain representations that can be effectively transferred to downstream tasks. In this work we evaluate how such a paradigm should be done in imitation learning, where both pretraining and finetuning data are trajectories collected by experts interacting with an unknown environment. Namely, we consider a setting where the pretraining corpus consists of multitask demonstrations and the task for each demonstration is set by an unobserved latent context variable. The goal is to use the pretraining corpus to learn a low dimensional representation of the high dimensional (e.g., visual) observation space which can be transferred to a novel context for finetuning on a limited dataset of demonstrations. Among a variety of possible pretraining objectives, we argue that inverse dynamics modeling -- i.e., predicting an action given the observations appearing before and after it in the demonstration -- is well-suited to this setting. We provide empirical evidence of this claim through evaluations on a variety of simulated visuomotor manipulation problems. While previous work has attempted various theoretical explanations regarding the benefit of inverse dynamics modeling, we find that these arguments are insufficient to explain the empirical advantages often observed in our settings, and so we derive a novel analysis using a simple but general environment model.", "keywords": "representation learning;imitation learning", "primary_area": "", "supplementary_material": "/attachment/ad18fb879bf02b129b5453faadf9e4beccdd5eb7.zip", "author": "David Brandfonbrener;Ofir Nachum;Joan Bruna", "authorids": "~David_Brandfonbrener1;~Ofir_Nachum1;~Joan_Bruna1", "gender": "M;M;M", "homepage": "https://davidbrandfonbrener.github.io;https://scholar.google.com/citations?user=C-ZlBWMAAAAJ&hl=en;http://cims.nyu.edu/~bruna", "dblp": "214/9461;;44/8776", "google_scholar": "https://scholar.google.com/citations?hl=en;C-ZlBWMAAAAJ;L4bNmsMAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~David_Brandfonbrener1;~Ofir_Nachum1;~Joan_Bruna1", "aff": "New York University;OpenAI;New York University", "aff_domain": "nyu.edu;openai.com;nyu.edu", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nbrandfonbrener2023inverse,\ntitle={Inverse Dynamics Pretraining Learns Good Representations for Multitask Imitation},\nauthor={David Brandfonbrener and Ofir Nachum and Joan Bruna},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kjMGHTo8Cs}\n}", "github": "", "project": "", "reviewers": "Rymd;8bvo;t1mu;JZDX", "pdf_size": 2860335, "rating": "6;6;6;6", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "77;125;104;78", "wc_strengths": "28;143;191;34", "wc_weaknesses": "69;161;208;312", "wc_questions": "116;95;33;36", "wc_limitations": "24;44;33;16", "wc_review": "314;568;569;476", "wc_reply_reviewers": "48;0;23;95", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 96.0, 19.937402037376884 ], "wc_strengths_avg": [ 99.0, 70.11775809308224 ], "wc_weaknesses_avg": [ 187.5, 87.5571242104262 ], "wc_questions_avg": [ 70.0, 36.28360511305347 ], "wc_limitations_avg": [ 29.25, 10.425329730996522 ], "wc_review_avg": [ 481.75, 103.95281381473038 ], "wc_reply_reviewers_avg": [ 41.5, 35.245567097154215 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17918648956995723868&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "nyu.edu;openai.com;nyu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "New York University;OpenAI", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://openai.com", "aff_unique_abbr": "NYU;OpenAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Offline Minimax Soft-Q-learning Under Realizability and Partial Coverage", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70632", "id": "kjkLJ7NJJZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2a095b46705d7e6f81fc50270fe770c2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kjkLJ7NJJZ", "openreview": "https://openreview.net/forum?id=kjkLJ7NJJZ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70632", "video": "https://nips.cc/virtual/2023/poster/70632", "author_site": "Masatoshi Uehara, Nathan Kallus, Jason Lee, Wen Sun", "tldr": "", "abstract": "We consider offline reinforcement learning (RL) where we only have only access to offline data. In contrast to numerous offline RL algorithms that necessitate the uniform coverage of the offline data over state and action space, we propose value-based algorithms with PAC guarantees under partial coverage, specifically, coverage of offline data against a single policy, and realizability of soft Q-function (a.k.a., entropy-regularized Q-function) and another function, which is defined as a solution to a saddle point of certain minimax optimization problem). Furthermore, we show the analogous result for Q-functions instead of soft Q-functions. To attain these guarantees, we use novel algorithms with minimax loss functions to accurately estimate soft Q-functions and Q-functions with \n-convergence guarantees measured on the offline data. We introduce these loss functions by casting the estimation problems into nonlinear convex optimization problems and taking the Lagrange functions.", "keywords": "Reinforcement learning theory;PAC RL;Offline Reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/d7faa369560ffd9e78a1240c0a4bcd137b438b93.pdf", "author": "Masatoshi Uehara;Nathan Kallus;Jason D. Lee;Wen Sun", "authorids": "~Masatoshi_Uehara1;~Nathan_Kallus1;~Jason_D._Lee1;~Wen_Sun1", "gender": "M;;M;", "homepage": "https://www.masatoshiuehara.com/;http://nathankallus.com/;https://jasondlee88.github.io/;https://wensun.github.io", "dblp": "225/6517;142/2900;88/3262;", "google_scholar": "https://scholar.google.co.jp/citations?user=xuLKJboAAAAJ;K2WfIlsAAAAJ;GR_DsT0AAAAJ;iOLC30YAAAAJ", "orcid": "0000-0001-9017-3105;0000-0003-1672-0507;;", "linkedin": ";;;", "or_profile": "~Masatoshi_Uehara1;~Nathan_Kallus1;~Jason_D._Lee1;~Wen_Sun1", "aff": "Cornell University;Cornell University;Princeton University;Cornell University", "aff_domain": "cornell.edu;cornell.edu;princeton.edu;cornell.edu", "position": "PhD student;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nuehara2023offline,\ntitle={Offline Minimax Soft-Q-learning Under Realizability and Partial Coverage},\nauthor={Masatoshi Uehara and Nathan Kallus and Jason D. Lee and Wen Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kjkLJ7NJJZ}\n}", "github": "", "project": "", "reviewers": "oZk7;CJ1Z;4xjy;3Bh2;6n4K", "pdf_size": 362816, "rating": "5;6;6;7;7", "confidence": "4;4;4;4;2", "soundness": "2;4;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "72;40;55;31;80", "wc_strengths": "44;85;47;25;64", "wc_weaknesses": "170;181;47;112;31", "wc_questions": "4;9;264;1;26", "wc_limitations": "4;1;1;1;11", "wc_review": "294;316;414;170;212", "wc_reply_reviewers": "225;0;10;0;0", "wc_reply_authors": "226;0;0;0;0", "reply_reviewers": "2;0;1;0;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 55.6, 18.51053753946654 ], "wc_strengths_avg": [ 53.0, 20.228692493584454 ], "wc_weaknesses_avg": [ 108.2, 61.38208207612381 ], "wc_questions_avg": [ 60.8, 101.96744578540742 ], "wc_limitations_avg": [ 3.6, 3.8781438859330635 ], "wc_review_avg": [ 281.2, 85.04681063978826 ], "wc_reply_reviewers_avg": [ 47.0, 89.08422980528034 ], "wc_reply_authors_avg": [ 45.2, 90.4 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5345224838248488, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10316420212683686788&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "cornell.edu;cornell.edu;princeton.edu;cornell.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Cornell University;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.princeton.edu", "aff_unique_abbr": "Cornell;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Accountability in Offline Reinforcement Learning: Explaining Decisions with a Corpus of Examples", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70631", "id": "kmbG9iBRIb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/096b1019463f34eb241e87cfce8dfe16-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kmbG9iBRIb", "openreview": "https://openreview.net/forum?id=kmbG9iBRIb", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70631", "video": "https://nips.cc/virtual/2023/poster/70631", "author_site": "Hao Sun, Alihan H\u00fcy\u00fck, Daniel Jarrett, Mihaela van der Schaar", "tldr": "", "abstract": "Learning controllers with offline data in decision-making systems is an essential area of research due to its potential to reduce the risk of applications in real-world systems. However, in responsibility-sensitive settings such as healthcare, decision accountability is of paramount importance, yet has not been adequately addressed by the literature.\nThis paper introduces the Accountable Offline Controller (AOC) that employs the offline dataset as the Decision Corpus and performs accountable control based on a tailored selection of examples, referred to as the Corpus Subset. AOC operates effectively in low-data scenarios, can be extended to the strictly offline imitation setting, and displays qualities of both conservation and adaptability.\nWe assess AOC's performance in both simulated and real-world healthcare scenarios, emphasizing its capability to manage offline control tasks with high levels of performance while maintaining accountability.", "keywords": "Accountability;Reinforcement Learning;Batched Control;Accountable Decision-Making;Offline RL;Interpretability in RL", "primary_area": "", "supplementary_material": "/attachment/e28b2a5ee09ae1bb6516a15d09cdeb7508e0877d.zip", "author": "Hao Sun;Alihan H\u00fcy\u00fck;Daniel Jarrett;Mihaela van der Schaar", "authorids": "~Hao_Sun1;~Alihan_H\u00fcy\u00fck1;~Daniel_Jarrett1;~Mihaela_van_der_Schaar2", "gender": "M;;;F", "homepage": "https://holarissun.github.io;;https://danieljarrett.github.io;https://www.vanderschaar-lab.com", "dblp": "SunLLZL19;227/2296;230/8183;", "google_scholar": "7ZNoHJkAAAAJ;EMq6KwMAAAAJ;Pczk-PQAAAAJ;DZ3S--MAAAAJ", "orcid": ";;0000-0002-2204-6515;", "linkedin": ";;danjarrett/;", "or_profile": "~Hao_Sun1;~Alihan_H\u00fcy\u00fck1;~Daniel_Jarrett1;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;University of Cambridge;University of Cambridge;University of California, Los Angeles", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk;ucla.edu", "position": "PhD student;PhD student;Ph.D.;Full Professor", "bibtex": "@inproceedings{\nsun2023accountability,\ntitle={Accountability in Offline Reinforcement Learning: Explaining Decisions with a Corpus of Examples},\nauthor={Hao Sun and Alihan H{\\\"u}y{\\\"u}k and Daniel Jarrett and Mihaela van der Schaar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kmbG9iBRIb}\n}", "github": "", "project": "", "reviewers": "qevo;fQBY;fhj3;izcS;9Kjz", "pdf_size": 5258610, "rating": "4;5;5;6;7", "confidence": "3;3;4;3;3", "soundness": "3;2;2;3;4", "novelty": "2;2;3;3;3", "presentation": "2;2;2;2;3", "wc_summary": "109;58;607;85;37", "wc_strengths": "40;27;59;112;45", "wc_weaknesses": "246;165;800;74;11", "wc_questions": "81;32;395;52;23", "wc_limitations": "7;15;70;12;31", "wc_review": "483;297;1931;335;147", "wc_reply_reviewers": "86;47;121;75;20", "wc_reply_authors": "221;957;724;272;69", "reply_reviewers": "2;1;1;1;1", "reply_authors": "3;4;3;2;2", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 179.2, 215.27879598325515 ], "wc_strengths_avg": [ 56.6, 29.533709553660884 ], "wc_weaknesses_avg": [ 259.2, 281.9272246520367 ], "wc_questions_avg": [ 116.6, 140.61664197384317 ], "wc_limitations_avg": [ 27.0, 22.952124084711635 ], "wc_review_avg": [ 638.6, 654.987511331323 ], "wc_reply_reviewers_avg": [ 69.8, 34.38255371551101 ], "wc_reply_authors_avg": [ 448.6, 335.0824376179689 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.1961161351381841, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6083720065173493338&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk;ucla.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;UCLA", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Cambridge;Los Angeles", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Understanding Multi-phase Optimization Dynamics and Rich Nonlinear Behaviors of ReLU Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70630", "id": "konBXvt2iS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7016d7b7b6e3c05b2128ac5b3aae492d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=konBXvt2iS", "openreview": "https://openreview.net/forum?id=konBXvt2iS", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70630", "video": "https://nips.cc/virtual/2023/poster/70630", "author_site": "Mingze Wang, Chao Ma", "tldr": "", "abstract": "The training process of ReLU neural networks often exhibits complicated nonlinear phenomena. \nThe nonlinearity of models and non-convexity of loss pose significant challenges for theoretical analysis. Therefore, most previous theoretical works on the optimization dynamics of neural networks focus either on local analysis (like the end of training) or approximate linear models (like Neural Tangent Kernel). \nIn this work, we conduct a complete theoretical characterization of the training process of a two-layer ReLU network trained by Gradient Flow on a linearly separable data. In this specific setting, our analysis captures the whole optimization process starting from random initialization to final convergence. \nDespite the relatively simple model and data that we studied, we reveal four different phases from the whole training process showing a general simplifying-to-complicating learning trend.\nSpecific nonlinear behaviors can also be precisely identified and captured theoretically, such as\ninitial condensation, saddle-to-plateau dynamics, plateau escape, changes of activation patterns, \nlearning with increasing complexity, etc.", "keywords": "non-convex optimization;training dynamics;neural network", "primary_area": "", "supplementary_material": "/attachment/4b6d4f2df39e64c66ed574998bd1e68cc6eccaaf.zip", "author": "Mingze Wang;Chao Ma", "authorids": "~Mingze_Wang2;~Chao_Ma8", "gender": ";M", "homepage": "https://wmz9.github.io/;", "dblp": "296/7556;", "google_scholar": "CkU47X0AAAAJ;n2BTRgUAAAAJ", "orcid": ";", "linkedin": ";chao-ma-9b593a129/", "or_profile": "~Mingze_Wang2;~Chao_Ma8", "aff": "Peking University;Stanford University", "aff_domain": "pku.edu.cn;stanford.edu", "position": "PhD student;Postdoc", "bibtex": "@inproceedings{\nwang2023understanding,\ntitle={Understanding Multi-phase Optimization Dynamics and Rich Nonlinear Behaviors of Re{LU} Networks},\nauthor={Mingze Wang and Chao Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=konBXvt2iS}\n}", "github": "", "project": "", "reviewers": "vhgo;yJvt;79Zb;w3Qj", "pdf_size": 1417354, "rating": "5;7;7;8", "confidence": "3;3;3;3", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;4;3", "wc_summary": "126;86;248;116", "wc_strengths": "120;35;52;106", "wc_weaknesses": "133;51;122;88", "wc_questions": "131;143;76;32", "wc_limitations": "4;43;227;1", "wc_review": "514;358;725;343", "wc_reply_reviewers": "0;35;38;35", "wc_reply_authors": "0;206;24;23", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 144.0, 61.82232606429493 ], "wc_strengths_avg": [ 78.25, 35.611620294504995 ], "wc_weaknesses_avg": [ 98.5, 32.05074102107469 ], "wc_questions_avg": [ 95.5, 44.52246623896749 ], "wc_limitations_avg": [ 68.75, 92.85573487943542 ], "wc_review_avg": [ 485.0, 153.89444434416728 ], "wc_reply_reviewers_avg": [ 27.0, 15.636495771111889 ], "wc_reply_authors_avg": [ 63.25, 82.9740170173772 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8341918670886190832&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "pku.edu.cn;stanford.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Peking University;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.stanford.edu", "aff_unique_abbr": "Peking U;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "title": "Easy Learning from Label Proportions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70629", "id": "kqBUgrkm1c", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3085fd61063840fdb2e6eafac58589f8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kqBUgrkm1c", "openreview": "https://openreview.net/forum?id=kqBUgrkm1c", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70629", "video": "https://nips.cc/virtual/2023/poster/70629", "author_site": "R\u00f3bert Busa-Fekete, Heejin Choi, Travis Dick, Claudio Gentile, Andres Munoz Medina", "tldr": "", "abstract": "We consider the problem of Learning from Label Proportions (LLP), a weakly supervised classification setup where instances are grouped into i.i.d. \u201cbags\u201d, and only the frequency of class labels at each bag is available. Albeit, the objective of the learner is to achieve low task loss at an individual instance level. Here we propose EASYLLP, a flexible and simple-to-implement debiasing approach based on aggregate labels, which operates on arbitrary loss functions. Our technique allows us to accurately estimate the expected loss of an arbitrary model at an individual level. We elucidate the differences between our method and standard methods based on label proportion matching, in terms of applicability and optimality conditions. We showcase the flexibility of our approach compared to alternatives by applying our method to popular learning frameworks, like Empirical Risk Minimization (ERM) and Stochastic Gradient Descent (SGD) with provable guarantees on instance level performance. Finally, we validate our theoretical results on multiple datasets, empirically illustrating the conditions under which our algorithm is expected to perform better or worse than previous LLP approaches", "keywords": "learning with partial information;unbiased loss;classification;proportion matching", "primary_area": "", "supplementary_material": "/attachment/bad4fd9872f8792d39d360d97f4251f91de4a796.pdf", "author": "Robert Istvan Busa-Fekete;Heejin Choi;Travis Dick;Claudio Gentile;Andres Munoz medina", "authorids": "~Robert_Istvan_Busa-Fekete1;~Heejin_Choi1;~Travis_Dick1;~Claudio_Gentile1;~Andres_Munoz_medina1", "gender": "M;;M;M;", "homepage": ";;https://www.cis.upenn.edu/~tbd/;https://sites.google.com/corp/view/cgentile;https://ammedina-ml.com", "dblp": "69/4876;;135/8679;56/5759;10/11472", "google_scholar": "UNtKl1MAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.it/citations?user=0SiZNDEAAAAJ;", "orcid": ";;;0000-0003-1551-2167;", "linkedin": ";heejin-choi-1134a326/;;;", "or_profile": "~Robert_Istvan_Busa-Fekete1;~Heejin_Choi1;~Travis_Dick1;~Claudio_Gentile1;~Andres_Munoz_medina1", "aff": "Google Research;Google;Google;Google;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com", "position": "Researcher;Software Engineer;Researcher;Research Scientist;Researcher", "bibtex": "@inproceedings{\nbusa-fekete2023easy,\ntitle={Easy Learning from Label Proportions},\nauthor={Robert Istvan Busa-Fekete and Heejin Choi and Travis Dick and Claudio Gentile and Andres Munoz medina},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kqBUgrkm1c}\n}", "github": "", "project": "", "reviewers": "YhoX;z2CW;c8Fj;xq8A", "pdf_size": 480482, "rating": "6;6;6;6", "confidence": "4;4;2;3", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;3;3;2", "wc_summary": "100;114;60;29", "wc_strengths": "84;40;64;77", "wc_weaknesses": "64;110;151;102", "wc_questions": "3;5;52;357", "wc_limitations": "1;1;1;2", "wc_review": "252;270;328;567", "wc_reply_reviewers": "27;20;0;11", "wc_reply_authors": "240;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 75.75, 33.48413803579241 ], "wc_strengths_avg": [ 66.25, 16.768646337734005 ], "wc_weaknesses_avg": [ 106.75, 30.898017735770686 ], "wc_questions_avg": [ 104.25, 147.2368415173322 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 354.25, 126.00074404542221 ], "wc_reply_reviewers_avg": [ 14.5, 10.111874208078342 ], "wc_reply_authors_avg": [ 60.0, 103.92304845413264 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11330942247381631355&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "google.com;google.com;google.com;google.com;google.com", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google Research", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SpatialRank: Urban Event Ranking with NDCG Optimization on Spatiotemporal Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70628", "id": "ks7Mf5lzSx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1f3cbee17170c3ffff3e413d2df54f6b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ks7Mf5lzSx", "openreview": "https://openreview.net/forum?id=ks7Mf5lzSx", "poster": "/media/PosterPDFs/NeurIPS%202023/70628.png?t=1697421147.659981", "slides": "https://nips.cc/virtual/2023/poster/70628", "video": "https://nips.cc/virtual/2023/poster/70628", "author_site": "BANG AN, Xun Zhou, Xun Zhou, YONGJIAN ZHONG, Tianbao Yang", "tldr": "", "abstract": "The problem of urban event ranking aims at predicting the top-$k$ most risky locations of future events such as traffic accidents and crimes. This problem is of fundamental importance to public safety and urban administration especially when limited resources are available. The problem is, however, challenging due to complex and dynamic spatio-temporal correlations between locations, uneven distribution of urban events in space, and the difficulty to correctly rank nearby locations with similar features. Prior works on event forecasting mostly aim at accurately predicting the actual risk score or counts of events for all the locations. Rankings obtained as such usually have low quality due to prediction errors. Learning-to-rank methods directly optimize measures such as Normalized Discounted Cumulative Gain (NDCG), but cannot handle the spatiotemporal autocorrelation existing among locations. Due to the common assumption that items are independent. In this paper, we bridge the gap by proposing a novel spatial event ranking approach named SpatialRank. SpatialRank features adaptive graph convolution layers that dynamically learn the spatiotemporal dependencies across locations from data. In addition, the model optimizes through surrogates a hybrid NDCG loss with a spatial component to better rank neighboring spatial locations. We design an importance-sampling with a spatial filtering algorithm to effectively evaluate the loss during training. Comprehensive experiments on three real-world datasets demonstrate that SpatialRank can effectively identify the top riskiest locations of crimes and traffic accidents and outperform state-of-art methods in terms of NDCG by up to 12.7%.", "keywords": "urban event;NDCG optimization;ranking;traffic accident;crime;spatiotemporal data", "primary_area": "", "supplementary_material": "/attachment/da6b8cc28cab9c3d681e0637d664bf008f828faa.zip", "author": "BANG AN;Xun Zhou;Yongjian Zhong;Tianbao Yang", "authorids": "~BANG_AN2;~Xun_Zhou1;~Yongjian_Zhong2;~Tianbao_Yang1", "gender": "M;M;M;", "homepage": ";https://xunzhou2023.github.io/;https://people.tamu.edu/~tianbao-yang/publications.html;", "dblp": "188/0741-2;16/1951;56/7047;232/5618", "google_scholar": "axn3xyIAAAAJ;WEoQSX0AAAAJ;https://scholar.google.com.tw/citations?user=BCxFU0EAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-0164-9763;0000-0003-4930-6572;;", "linkedin": ";;;", "or_profile": "~BANG_AN2;~Xun_Zhou1;~Tianbao_Yang1;~YONGJIAN_ZHONG1", "aff": "University of Iowa;University of Iowa;Texas A&M University - College Station;University of Iowa", "aff_domain": "uiowa.edu;iowa.edu;tamu.edu;uiowa.edu", "position": "PhD student;Associate Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nan2023spatialrank,\ntitle={SpatialRank: Urban Event Ranking with {NDCG} Optimization on Spatiotemporal Data},\nauthor={BANG AN and Xun Zhou and Yongjian Zhong and Tianbao Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ks7Mf5lzSx}\n}", "github": "", "project": "", "reviewers": "VBY2;yV4P;JmsG;RZQE", "pdf_size": 970668, "rating": "5;5;7;8", "confidence": "2;4;4;4", "soundness": "3;2;4;3", "novelty": "3;2;3;3", "presentation": "2;1;3;3", "wc_summary": "63;111;209;107", "wc_strengths": "98;27;130;105", "wc_weaknesses": "332;325;116;87", "wc_questions": "17;84;88;53", "wc_limitations": "40;9;9;1", "wc_review": "550;556;552;353", "wc_reply_reviewers": "277;87;23;31", "wc_reply_authors": "820;519;24;4", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 122.5, 53.37368265353253 ], "wc_strengths_avg": [ 90.0, 38.268786236304905 ], "wc_weaknesses_avg": [ 215.0, 113.98903456034708 ], "wc_questions_avg": [ 60.5, 28.53506614676055 ], "wc_limitations_avg": [ 14.75, 14.939461168328663 ], "wc_review_avg": [ 502.75, 86.48518659284953 ], "wc_reply_reviewers_avg": [ 104.5, 102.59995126704496 ], "wc_reply_authors_avg": [ 341.75, 344.6667774822517 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5752146738254619526&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "uiowa.edu;iowa.edu;tamu.edu;uiowa.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Iowa;Texas A&M University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uiowa.edu;https://www.tamu.edu", "aff_unique_abbr": "UIowa;TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Towards Last-layer Retraining for Group Robustness with Fewer Annotations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70627", "id": "kshC3NOP6h", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/265bee74aee86df77e8e36d25e786ab5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kshC3NOP6h", "openreview": "https://openreview.net/forum?id=kshC3NOP6h", "poster": "/media/PosterPDFs/NeurIPS%202023/70627.png?t=1699913722.3118243", "slides": "https://nips.cc/virtual/2023/poster/70627", "video": "https://nips.cc/virtual/2023/poster/70627", "author_site": "Tyler LaBonte, Vidya Muthukumar, Abhishek Kumar", "tldr": "", "abstract": "Empirical risk minimization (ERM) of neural networks is prone to over-reliance on spurious correlations and poor generalization on minority groups. The recent deep feature reweighting (DFR) technique achieves state-of-the-art group robustness via simple last-layer retraining, but it requires held-out group and class annotations to construct a group-balanced reweighting dataset. In this work, we examine this impractical requirement and find that last-layer retraining can be surprisingly effective with no group annotations (other than for model selection) and only a handful of class annotations. We first show that last-layer retraining can greatly improve worst-group accuracy even when the reweighting dataset has only a small proportion of worst-group data. This implies a \"free lunch\" where holding out a subset of training data to retrain the last layer can substantially outperform ERM on the entire dataset with no additional data, annotations, or computation for training. To further improve group robustness, we introduce a lightweight method called selective last-layer finetuning (SELF), which constructs the reweighting dataset using misclassifications or disagreements. Our experiments present the first evidence that model disagreement upsamples worst-group data, enabling SELF to nearly match DFR on four well-established benchmarks across vision and language tasks with no group annotations and less than 3% of the held-out class annotations.", "keywords": "spurious correlations;group robustness;last-layer retraining;distribution shift", "primary_area": "", "supplementary_material": "", "author": "Tyler LaBonte;Vidya Muthukumar;Abhishek Kumar", "authorids": "~Tyler_LaBonte1;~Vidya_Muthukumar3;~Abhishek_Kumar1", "gender": "M;F;", "homepage": "https://tyler-labonte.com;https://vmuthukumar.ece.gatech.edu;http://inductivebias.ml", "dblp": "251/5689.html;149/0019;67/6188-1", "google_scholar": "0_bKeg4AAAAJ;K2OEs2YAAAAJ;6vghMS0AAAAJ", "orcid": "0000-0002-3781-7212;;", "linkedin": "https://linkedin.com/in/tmlabonte;;", "or_profile": "~Tyler_LaBonte1;~Vidya_Muthukumar3;~Abhishek_Kumar1", "aff": "Google;Georgia Institute of Technology;Google DeepMind", "aff_domain": "google.com;gatech.edu;google.com", "position": "Intern;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nlabonte2023towards,\ntitle={Towards Last-Layer Retraining for Group Robustness with Fewer Annotations},\nauthor={Tyler LaBonte and Vidya Muthukumar and Abhishek Kumar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kshC3NOP6h}\n}", "github": "", "project": "", "reviewers": "x5jy;HP8n;Ep6f;KCny", "pdf_size": 636786, "rating": "3;6;6;7", "confidence": "4;3;5;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;4;3", "wc_summary": "24;40;81;101", "wc_strengths": "39;72;191;129", "wc_weaknesses": "191;169;399;240", "wc_questions": "6;49;151;3", "wc_limitations": "6;7;7;73", "wc_review": "266;337;829;546", "wc_reply_reviewers": "63;19;69;0", "wc_reply_authors": "0;0;325;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 61.5, 30.858548248418945 ], "wc_strengths_avg": [ 107.75, 57.850561795024944 ], "wc_weaknesses_avg": [ 249.75, 89.9204509552749 ], "wc_questions_avg": [ 52.25, 59.8472012712374 ], "wc_limitations_avg": [ 23.25, 28.726077003308337 ], "wc_review_avg": [ 494.5, 218.83841070525074 ], "wc_reply_reviewers_avg": [ 37.75, 29.11507341567251 ], "wc_reply_authors_avg": [ 81.25, 140.72912811497127 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12904691645468364442&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "google.com;gatech.edu;google.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Google;Georgia Institute of Technology", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.gatech.edu", "aff_unique_abbr": "Google;Georgia Tech", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Multi-task learning with summary statistics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70626", "id": "ktTSji9ZIs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a924b7178e5975dfed1de235f0b72973-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ktTSji9ZIs", "openreview": "https://openreview.net/forum?id=ktTSji9ZIs", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70626", "video": "https://nips.cc/virtual/2023/poster/70626", "author_site": "Parker Knight, Rui Duan", "tldr": "", "abstract": "Multi-task learning has emerged as a powerful machine learning paradigm for integrating data from multiple sources, leveraging similarities between tasks to improve overall model performance. However, the application of multi-task learning to real-world settings is hindered by data-sharing constraints, especially in healthcare settings. To address this challenge, we propose a flexible multi-task learning framework utilizing summary statistics from various sources. Additionally, we present an adaptive parameter selection approach based on a variant of Lepski's method, allowing for data-driven tuning parameter selection when only summary statistics are accessible. Our systematic non-asymptotic analysis characterizes the performance of the proposed methods under various regimes of the source datasets' sample complexity and overlap. We demonstrate our theoretical findings and the performance of the method through extensive simulations. This work offers a more flexible tool for training related models across various domains, with practical implications in genetic risk prediction and many other fields.", "keywords": "multi-task learning;genetic risk prediction;summary statistics", "primary_area": "", "supplementary_material": "/attachment/4113ee36ed3a8e7fcec9a5a88a128be614415095.pdf", "author": "Parker Knight;Rui Duan", "authorids": "~Parker_Knight1;rduan@hsph.harvard.edu", "gender": "M;", "homepage": "https://pknight24.github.io;", "dblp": "309/3679;", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Parker_Knight1;rduan@hsph.harvard.edu", "aff": "Harvard University;", "aff_domain": "harvard.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nknight2023multitask,\ntitle={Multi-task learning with summary statistics},\nauthor={Parker Knight and Rui Duan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ktTSji9ZIs}\n}", "github": "", "project": "", "reviewers": "TqYc;LHqp;wP1d;PbVj;Z4Yp", "pdf_size": 808240, "rating": "5;5;6;6;7", "confidence": "4;3;3;2;2", "soundness": "3;3;3;3;3", "novelty": "2;3;2;2;3", "presentation": "3;3;4;2;3", "wc_summary": "73;111;180;77;33", "wc_strengths": "87;268;131;32;13", "wc_weaknesses": "61;212;100;440;12", "wc_questions": "128;52;51;99;63", "wc_limitations": "1;1;1;13;1", "wc_review": "350;644;463;661;122", "wc_reply_reviewers": "48;0;34;0;0", "wc_reply_authors": "231;0;0;0;0", "reply_reviewers": "1;0;1;0;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 94.8, 49.26012586260819 ], "wc_strengths_avg": [ 106.2, 90.94481843403724 ], "wc_weaknesses_avg": [ 165.0, 152.50180326802698 ], "wc_questions_avg": [ 78.6, 30.229786635039286 ], "wc_limitations_avg": [ 3.4, 4.8 ], "wc_review_avg": [ 448.0, 199.9449924354196 ], "wc_reply_reviewers_avg": [ 16.4, 20.567936211491904 ], "wc_reply_authors_avg": [ 46.2, 92.4 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7857142857142858, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3108614112242156687&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "harvard.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "DDCoT: Duty-Distinct Chain-of-Thought Prompting for Multimodal Reasoning in Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70625", "id": "ktYjrgOENR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/108030643e640ac050e0ed5e6aace48f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ktYjrgOENR", "openreview": "https://openreview.net/forum?id=ktYjrgOENR", "poster": "/media/PosterPDFs/NeurIPS%202023/70625.png?t=1702207945.5568275", "slides": "https://nips.cc/virtual/2023/poster/70625", "video": "https://nips.cc/virtual/2023/poster/70625", "author_site": "Ge Zheng, Bin Yang, Jiajin Tang, Hong-Yu Zhou, Sibei Yang", "tldr": "", "abstract": "A long-standing goal of AI systems is to perform complex multimodal reasoning like humans. Recently, large language models (LLMs) have made remarkable strides in such multi-step reasoning on the language modality solely by leveraging the chain of thought (CoT) to mimic human thinking. However, the transfer of these advancements to multimodal contexts introduces heightened challenges, including but not limited to the impractical need for labor-intensive annotation and the limitations in terms of flexibility, generalizability, and explainability. To evoke CoT reasoning in multimodality, this work first conducts an in-depth analysis of these challenges posed by multimodality and presents two key insights: \u201ckeeping critical thinking\u201d and \u201cletting everyone do their jobs\u201d in multimodal CoT reasoning. Furthermore, this study proposes a novel DDCoT prompting that maintains a critical attitude through negative-space prompting and incorporates multimodality into reasoning by first dividing the reasoning responsibility of LLMs into reasoning and recognition and then integrating the visual recognition capability of visual models into the joint reasoning process. The rationales generated by DDCoT not only improve the reasoning abilities of both large and small language models in zero-shot prompting and fine-tuning learning, significantly outperforming state-of-the-art methods but also exhibit impressive generalizability and explainability.", "keywords": "Chain-of-Thought Reasoning;Multimodal Science Question Answering;Vision and Langauge", "primary_area": "", "supplementary_material": "/attachment/8df4ebb4b68cce20eb5730c09a28098640218c4c.pdf", "author": "Ge Zheng;Bin Yang;Jiajin Tang;Hong-Yu Zhou;Sibei Yang", "authorids": "~Ge_Zheng1;~Bin_Yang9;~Jiajin_Tang1;~Hong-Yu_Zhou2;~Sibei_Yang1", "gender": ";M;;F;M", "homepage": ";https://github.com/YangBin55;http://toneyaya.github.io;https://sibeiyang.github.io/;https://zhouhy.org", "dblp": "248/2063.html;;355/1821;215/4885;", "google_scholar": ";;;user=4pg3rtYAAAAJ;aJnvh8gAAAAJ", "orcid": "0000-0001-8770-2555;;0009-0002-2906-8941;;0000-0002-1256-7050", "linkedin": ";;;;", "or_profile": "~Ge_Zheng1;~Bin_Yang9;~Jiajin_Tang1;~Sibei_Yang1;~Hongyu_Zhou2", "aff": "ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;University of Hong Kong", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;hku.hk", "position": "Undergrad student;MS student;PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nzheng2023ddcot,\ntitle={{DDC}oT: Duty-Distinct Chain-of-Thought Prompting for Multimodal Reasoning in Language Models},\nauthor={Ge Zheng and Bin Yang and Jiajin Tang and Hong-Yu Zhou and Sibei Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ktYjrgOENR}\n}", "github": "", "project": "", "reviewers": "ndag;ka68;aDZq;kQGJ;oVaJ", "pdf_size": 4476266, "rating": "5;5;6;6;6", "confidence": "4;4;4;4;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;2;2", "presentation": "3;2;4;3;1", "wc_summary": "76;86;47;86;246", "wc_strengths": "47;44;54;123;122", "wc_weaknesses": "312;38;116;100;299", "wc_questions": "7;56;2;19;14", "wc_limitations": "1;12;2;17;8", "wc_review": "443;236;221;345;689", "wc_reply_reviewers": "19;137;0;0;12", "wc_reply_authors": "0;973;0;0;21", "reply_reviewers": "1;1;0;0;1", "reply_authors": "1;3;1;1;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 108.2, 70.3659008327187 ], "wc_strengths_avg": [ 78.0, 36.48013157871007 ], "wc_weaknesses_avg": [ 173.0, 111.35528725660043 ], "wc_questions_avg": [ 19.6, 19.106019993708788 ], "wc_limitations_avg": [ 8.0, 6.0332412515993425 ], "wc_review_avg": [ 386.8, 171.1495252695724 ], "wc_reply_reviewers_avg": [ 33.6, 52.20957766540541 ], "wc_reply_authors_avg": [ 198.8, 387.1854336103051 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 100, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18426395033030088175&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;hku.hk", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "ShanghaiTech University;University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.shanghaitech.edu.cn;https://www.hku.hk", "aff_unique_abbr": "ShanghaiTech;HKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Computing Optimal Nash Equilibria in Multiplayer Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70624", "id": "kupNhxLc6k", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/42cac45fb00f7038c892f1a1bfc216d3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kupNhxLc6k", "openreview": "https://openreview.net/forum?id=kupNhxLc6k", "poster": "/media/PosterPDFs/NeurIPS%202023/70624.png?t=1698634430.1629207", "slides": "https://nips.cc/virtual/2023/poster/70624", "video": "https://nips.cc/virtual/2023/poster/70624", "author_site": "Youzhi Zhang, Bo An, Venkatramanan Subrahmanian, Venkatramanan Subrahmanian", "tldr": "", "abstract": "Designing efficient algorithms to compute a Nash Equilibrium (NE) in multiplayer games is still an open challenge. In this paper, we focus on computing an NE that optimizes a given objective function. For example, when there is a team of players independently playing against an adversary in a game (e.g., several groups in a forest trying to interdict illegal loggers in green security games), these team members may need to find an NE minimizing the adversary\u2019s utility. Finding an optimal NE in multiplayer games can be formulated as a mixed-integer bilinear program by introducing auxiliary variables to represent bilinear terms, leading to a huge number of bilinear terms, making it hard to solve. To overcome this challenge, we first propose a general framework for this formulation based on a set of correlation plans. We then develop a novel algorithm called CRM based on this framework, which uses correlation plans with their relations to strictly reduce the feasible solution space after the convex relaxation of bilinear terms while minimizing the number of correlation plans to significantly reduce the number of bilinear terms. We show that our techniques can significantly reduce the time complexity and CRM can be several orders of magnitude faster than the state-of-the-art baseline.", "keywords": "Algorithmic game theory;Optimal Nash equilibrium", "primary_area": "", "supplementary_material": "/attachment/006d84ddc1099dbe2d221dcfe02916cde837d205.pdf", "author": "Youzhi Zhang;Bo An;Venkatramanan Siva Subrahmanian", "authorids": "~Youzhi_Zhang2;~Bo_An2;~Venkatramanan_Siva_Subrahmanian2", "gender": ";M;M", "homepage": "https://youzhi333.github.io/index.html;https://personal.ntu.edu.sg/boan/;https://vssubrah.github.io/", "dblp": "131/9490-1;42/6178-1.html;s/VSSubrahmanian.html", "google_scholar": "i2j5DmwAAAAJ;PEEpuNwAAAAJ;PLt9wB8AAAAJ", "orcid": "0000-0002-2984-734X;0000-0002-7064-7438;0000-0001-7191-0296", "linkedin": ";;v-s-subrahmanian-8500577/", "or_profile": "~Youzhi_Zhang2;~Bo_An2;~VS_Subrahmanian1", "aff": "Centre for Artificial Intelligence and Robotics, Hong Kong Institute of Science & Innovation, Chinese Academy of Sciences;Nanyang Technological University;Northwestern University", "aff_domain": "cair-cas.org.hk;ntu.edu.sg;northwestern.edu", "position": "Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023computing,\ntitle={Computing Optimal Nash Equilibria in Multiplayer Games},\nauthor={Youzhi Zhang and Bo An and Venkatramanan Siva Subrahmanian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kupNhxLc6k}\n}", "github": "", "project": "", "reviewers": "oqCe;WEGb;CmiG;PoVn", "pdf_size": 464148, "rating": "5;5;7;7", "confidence": "4;3;3;3", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "2;3;3;4", "wc_summary": "78;126;72;75", "wc_strengths": "34;23;33;83", "wc_weaknesses": "100;54;49;40", "wc_questions": "15;10;1;104", "wc_limitations": "1;8;1;6", "wc_review": "228;221;156;308", "wc_reply_reviewers": "19;24;4;5", "wc_reply_authors": "22;22;8;8", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 87.75, 22.18529918662356 ], "wc_strengths_avg": [ 43.25, 23.34925052330374 ], "wc_weaknesses_avg": [ 60.75, 23.209642392764263 ], "wc_questions_avg": [ 32.5, 41.58425182686349 ], "wc_limitations_avg": [ 4.0, 3.082207001484488 ], "wc_review_avg": [ 228.25, 53.92761352034781 ], "wc_reply_reviewers_avg": [ 13.0, 8.689073598491383 ], "wc_reply_authors_avg": [ 15.0, 7.0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5326890672703246146&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cair-cas.org.hk;ntu.edu.sg;northwestern.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Hong Kong Institute of Science & Innovation, Chinese Academy of Sciences;Nanyang Technological University;Northwestern University", "aff_unique_dep": "Centre for Artificial Intelligence and Robotics;;", "aff_unique_url": ";https://www.ntu.edu.sg;https://www.northwestern.edu", "aff_unique_abbr": ";NTU;NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "China;Singapore;United States" }, { "title": "PRIOR: Personalized Prior for Reactivating the Information Overlooked in Federated Learning.", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70623", "id": "kuxu4lCRr5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5a3674849d6d6d23ac088b9a2552f323-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kuxu4lCRr5", "openreview": "https://openreview.net/forum?id=kuxu4lCRr5", "poster": "/media/PosterPDFs/NeurIPS%202023/70623.png?t=1697622724.060322", "slides": "https://nips.cc/virtual/2023/poster/70623", "video": "https://nips.cc/virtual/2023/poster/70623", "author_site": "Mingjia Shi, Yuhao Zhou, Kai Wang, Huaizheng Zhang, Shudong Huang, Qing Ye, Jiancheng Lv", "tldr": "", "abstract": "Classical federated learning (FL) enables training machine learning models without sharing data for privacy preservation, but heterogeneous data characteristic degrades the performance of the localized model. Personalized FL (PFL) addresses this by synthesizing personalized models from a global model via training on local data. Such a global model may overlook the specific information that the clients have been sampled. In this paper, we propose a novel scheme to inject personalized prior knowledge into the global model in each client, which attempts to mitigate the introduced incomplete information problem in PFL. At the heart of our proposed approach is a framework, the $\\textit{PFL with Bregman Divergence}$ (pFedBreD), decoupling the personalized prior from the local objective function regularized by Bregman divergence for greater adaptability in personalized scenarios. We also relax the mirror descent (RMD) to extract the prior explicitly to provide optional strategies. Additionally, our pFedBreD is backed up by a convergence analysis. Sufficient experiments demonstrate that our method reaches the $\\textit{state-of-the-art}$ performances on 5 datasets and outperforms other methods by up to 3.5% across 8 benchmarks. Extensive analyses verify the robustness and necessity of proposed designs. The code will be made public.", "keywords": "Federated Learning;Personalized Federated Learning;Expectation Maximization;Relaxed Mirror Descent", "primary_area": "", "supplementary_material": "/attachment/1c498b364af1c00d9794f776faff5353f7657ffd.pdf", "author": "Mingjia Shi;Yuhao Zhou;Kai Wang;Huaizheng Zhang;Shudong Huang;Qing Ye;Jiancheng Lv", "authorids": "~Mingjia_Shi1;~Yuhao_Zhou4;~Kai_Wang8;~Huaizheng_Zhang1;~Shudong_Huang1;~Qing_Ye1;~Jiancheng_Lv2", "gender": "M;M;M;;M;M;M", "homepage": ";https://github.com/Soptq;https://kaiwang960112.github.io/;;https://huangsd.github.io/;;https://cs.scu.edu.cn/info/1303/13767.htm", "dblp": "271/0186;;78/2022-36;;48/2141;;", "google_scholar": "B6f3ImkAAAAJ;;i2II0XIAAAAJ;;xa2bfaAAAAAJ;jLoTsBYAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-9988-3741;0000-0001-8074-6416;0000-0002-1154-5175;;;0000-0003-3956-3348;", "linkedin": ";;;;;;", "or_profile": "~Mingjia_Shi1;~Yuhao_Zhou4;~Kai_Wang8;~Huaizheng_Zhang1;~Shudong_Huang1;~Qing_Ye1;~Jiancheng_Lv2", "aff": "Sichuan University;Sichuan University;National University of Singapore;;Sichuan University;Sichuan University;Sichuan University", "aff_domain": "scu.edu.cn;scu.edu.cn;u.nus.edu;;scu.edu.cn;scu.edu.cn;scu.edu.cn", "position": "MS student;PhD student;PhD student;;Associate Professor;Lecturer;Full Professor", "bibtex": "@inproceedings{\nshi2023prior,\ntitle={{PRIOR}: Personalized Prior for Reactivating the Information Overlooked in Federated Learning.},\nauthor={Mingjia Shi and Yuhao Zhou and Kai Wang and Huaizheng Zhang and Shudong Huang and Qing Ye and Jiancheng Lv},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kuxu4lCRr5}\n}", "github": "", "project": "", "reviewers": "HD18;JezB;jGgs;Q28a;qY3t", "pdf_size": 0, "rating": "5;6;6;6;7", "confidence": "3;3;4;3;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;2;3", "presentation": "3;3;2;2;2", "wc_summary": "38;59;36;89;122", "wc_strengths": "47;39;47;77;85", "wc_weaknesses": "167;50;76;66;155", "wc_questions": "40;484;80;104;69", "wc_limitations": "7;17;10;10;8", "wc_review": "299;649;249;346;439", "wc_reply_reviewers": "18;70;20;28;32", "wc_reply_authors": "32;76;19;56;24", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 68.8, 32.737745799000876 ], "wc_strengths_avg": [ 59.0, 18.373894524569362 ], "wc_weaknesses_avg": [ 102.8, 48.387601717795434 ], "wc_questions_avg": [ 155.4, 165.5821246391047 ], "wc_limitations_avg": [ 10.4, 3.4985711369071804 ], "wc_review_avg": [ 396.4, 140.9788636640259 ], "wc_reply_reviewers_avg": [ 33.6, 18.906083676954356 ], "wc_reply_authors_avg": [ 41.4, 21.4625254804741 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13922129340700634901&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 5, "email": "scu.edu.cn;scu.edu.cn;u.nus.edu;;scu.edu.cn;scu.edu.cn;scu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Sichuan University;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.scu.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "SCU;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Minimum-Risk Recalibration of Classifiers", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70622", "id": "kvXcHfBghm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dbd6b295535e44f2b8ec0c3f1da7c509-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kvXcHfBghm", "openreview": "https://openreview.net/forum?id=kvXcHfBghm", "poster": "/media/PosterPDFs/NeurIPS%202023/70622.png?t=1702183942.980414", "slides": "https://nips.cc/virtual/2023/poster/70622", "video": "https://nips.cc/virtual/2023/poster/70622", "author_site": "Zeyu Sun, Dogyoon Song, Alfred Hero", "tldr": "", "abstract": "Recalibrating probabilistic classifiers is vital for enhancing the reliability and accuracy of predictive models. Despite the development of numerous recalibration algorithms, there is still a lack of a comprehensive theory that integrates calibration and sharpness (which is essential for maintaining predictive power). In this paper, we introduce the concept of minimum-risk recalibration within the framework of mean-squared-error (MSE) decomposition, offering a principled approach for evaluating and recalibrating probabilistic classifiers. Using this framework, we analyze the uniform-mass binning (UMB) recalibration method and establish a finite-sample risk upper bound of order $\\tilde{O}(B/n + 1/B^2)$ where $B$ is the number of bins and $n$ is the sample size. By balancing calibration and sharpness, we further determine that the optimal number of bins for UMB scales with $n^{1/3}$, resulting in a risk bound of approximately $O(n^{-2/3})$. Additionally, we tackle the challenge of label shift by proposing a two-stage approach that adjusts the recalibration function using limited labeled data from the target domain. Our results show that transferring a calibrated classifier requires significantly fewer target samples compared to recalibrating from scratch. We validate our theoretical findings through numerical simulations, which confirm the tightness of the proposed bounds, the optimal number of bins, and the effectiveness of label shift adaptation.", "keywords": "probability calibration;optimal number of bins;label shift adaptation", "primary_area": "", "supplementary_material": "/attachment/d9ca762fce274fad3dcf0fe9b7c215507b394fe0.pdf", "author": "Zeyu Sun;Dogyoon Song;Alfred Hero", "authorids": "~Zeyu_Sun1;~Dogyoon_Song1;~Alfred_Hero1", "gender": ";;M", "homepage": ";https://dogyoons.github.io/;http://web.eecs.umich.edu/~hero/", "dblp": ";191/6676;h/AlfredOHeroIII", "google_scholar": ";CT84_rEAAAAJ;DSiNzkIAAAAJ", "orcid": ";0000-0001-5489-8213;0000-0002-2531-9670", "linkedin": ";dogyoons/;", "or_profile": "~Zeyu_Sun1;~Dogyoon_Song1;~Alfred_Hero1", "aff": ";University of Michigan - Ann Arbor;University of Michigan", "aff_domain": ";umich.edu;umich.edu", "position": ";Postdoc;Researcher", "bibtex": "@inproceedings{\nsun2023minimumrisk,\ntitle={Minimum-Risk Recalibration of Classifiers},\nauthor={Zeyu Sun and Dogyoon Song and Alfred Hero},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kvXcHfBghm}\n}", "github": "", "project": "", "reviewers": "GPZN;kAN5;a48G;USpM;yKq4", "pdf_size": 634517, "rating": "6;6;6;7;8", "confidence": "4;1;3;3;4", "soundness": "4;3;3;4;4", "novelty": "2;3;2;4;3", "presentation": "3;4;3;3;4", "wc_summary": "96;54;100;121;197", "wc_strengths": "58;101;47;148;171", "wc_weaknesses": "73;53;66;45;17", "wc_questions": "185;1;132;6;177", "wc_limitations": "1;1;1;13;23", "wc_review": "413;210;346;333;585", "wc_reply_reviewers": "0;28;47;44;71", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 113.6, 47.02595028279599 ], "wc_strengths_avg": [ 105.0, 48.567478831003775 ], "wc_weaknesses_avg": [ 50.8, 19.518196638009364 ], "wc_questions_avg": [ 100.2, 81.01209786198602 ], "wc_limitations_avg": [ 7.8, 8.908422980528034 ], "wc_review_avg": [ 377.4, 122.75601818240929 ], "wc_reply_reviewers_avg": [ 38.0, 23.45207879911715 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4564354645876385, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15340188694655117499&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";umich.edu;umich.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "PAD: A Dataset and Benchmark for Pose-agnostic Anomaly Detection", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73484", "id": "kxFKgqwFNk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8bc5aef775aacc1650a9790f1428bcea-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=kxFKgqwFNk", "openreview": "https://openreview.net/forum?id=kxFKgqwFNk", "poster": "/media/PosterPDFs/NeurIPS%202023/73484.png?t=1699859275.873461", "slides": "https://nips.cc/virtual/2023/poster/73484", "video": "https://nips.cc/virtual/2023/poster/73484", "author_site": "Qiang Zhou, Weize Li, Lihan Jiang, Guoliang Wang, Guyue Zhou, Shanghang Zhang, Hao Zhao", "tldr": "", "abstract": "Object anomaly detection is an important problem in the field of machine vision and has seen remarkable progress recently. However, two significant challenges hinder its research and application. First, existing datasets lack comprehensive visual information from various pose angles. They usually have an unrealistic assumption that the anomaly-free training dataset is pose-aligned, and the testing samples have the same pose as the training data. However, in practice, anomaly may exist in any regions on a object, the training and query samples may have different poses, calling for the study on pose-agnostic anomaly detection. Second, the absence of a consensus on experimental protocols for pose-agnostic anomaly detection leads to unfair comparisons of different methods, hindering the research on pose-agnostic anomaly detection. To address these issues, we develop Multi-pose Anomaly Detection (MAD) dataset and Pose-agnostic Anomaly Detection (PAD) benchmark, which takes the first step to address the pose-agnostic anomaly detection problem. Specifically, we build MAD using 20 complex-shaped LEGO toys including 4K views with various poses, and high-quality and diverse 3D anomalies in both simulated and real environments. Additionally, we propose a novel method OmniposeAD, trained using MAD, specifically designed for pose-agnostic anomaly detection. Through comprehensive evaluations, we demonstrate the relevance of our dataset and method. Furthermore, we provide an open-source benchmark library, including dataset and baseline methods that cover 8 anomaly detection paradigms, to facilitate future research and application in this domain. Code, data, and models are publicly available at https://github.com/EricLee0224/PAD.", "keywords": "Anomaly Detection;Neural Radiance Field", "primary_area": "", "supplementary_material": "/attachment/e06bed0b3a59e84a18273d866177b4a92addc9ec.pdf", "author": "Qiang Zhou;Weize Li;Lihan Jiang;Guoliang Wang;Guyue Zhou;Shanghang Zhang;Hao Zhao", "authorids": "~Qiang_Zhou6;~Weize_Li1;~Lihan_Jiang2;~Guoliang_Wang1;~Guyue_Zhou2;~Shanghang_Zhang4;~Hao_Zhao1", "gender": "M;M;M;M;M;M;F", "homepage": ";https://ericlee0224.github.io/;https://github.com/jianglh-WHU;https://github.com/Cross-ZBuild/Guoliang_Wang;https://air.tsinghua.edu.cn/en/info/1046/1196.htm;https://sites.google.com/view/fromandto;https://www.shanghangzhang.com/", "dblp": ";166/7043-1;358/4168;;133/4199;08/3737-2.html;95/11531", "google_scholar": "https://scholar.google.com.hk/citations?user=CMYTxUEAAAAJ;CyPiUucAAAAJ;JM2zk1AAAAAJ;;;ygQznUQAAAAJ;voqw10cAAAAJ", "orcid": ";;0009-0001-2899-273X;;;;", "linkedin": ";;;;;;", "or_profile": "~Qiang_Zhou6;~Weize_Li1;~Lihan_Jiang2;~Guoliang_Wang1;~Guyue_Zhou2;~Hao_Zhao1;~Shanghang_Zhang1", "aff": "Tsinghua University;Tsinghua University;Wuhan University;Changchun University of Technology;Tsinghua University;Peking University;Peking University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;whu.edu.cn;ccut.edu.cn;tsinghua.edu.cn;pku.edu.cn;pku.edu.cn", "position": "Researcher;Intern;Undergrad student;MS student;Associate Professor;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nzhou2023pad,\ntitle={{PAD}: A Dataset and Benchmark for Pose-agnostic Anomaly Detection},\nauthor={Qiang Zhou and Weize Li and Lihan Jiang and Guoliang Wang and Guyue Zhou and Shanghang Zhang and Hao Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=kxFKgqwFNk}\n}", "github": "", "project": "", "reviewers": "upSJ;9dF6;ryha;pCfQ", "pdf_size": 5253316, "rating": "6;6;8;8", "confidence": "4;5;5;4", "wc_summary_and_contributions": "258;28;133;82", "wc_strengths": "103;55;110;146", "wc_improvement": "100;73;148;120", "wc_limitations": "29;4;35;10", "wc_correctness": "247;1;12;24", "wc_clarity": "137;1;8;17", "wc_relation_to_prior_work": "63;1;20;15", "wc_documentation": "142;1;45;79", "wc_additional_feedback": "1;1;1;1", "wc_review": "1080;165;512;494", "wc_reply_reviewers": "436;30;147;85", "wc_reply_authors": "3609;1141;1226;610", "reply_reviewers": "2;1;1;1", "reply_authors": "6;3;3;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 125.25, 85.16271191078874 ], "wc_strengths_avg": [ 103.5, 32.407560846197605 ], "wc_improvement_avg": [ 110.25, 27.444261695297982 ], "wc_limitations_avg": [ 19.5, 12.854960132182441 ], "wc_correctness_avg": [ 71.0, 101.9387070743984 ], "wc_clarity_avg": [ 40.75, 55.858638544096294 ], "wc_relation_to_prior_work_avg": [ 24.75, 23.155722834755128 ], "wc_documentation_avg": [ 66.75, 51.49939320030868 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 562.75, 329.03447767673225 ], "wc_reply_reviewers_avg": [ 174.5, 156.54791598740624 ], "wc_reply_authors_avg": [ 1646.5, 1157.377315312513 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5167265228552112743&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "tsinghua.edu.cn;tsinghua.edu.cn;whu.edu.cn;ccut.edu.cn;tsinghua.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;2;0;3;3", "aff_unique_norm": "Tsinghua University;Wuhan University;Changchun University of Technology;Peking University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.whu.edu.cn/;http://www.ccut.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "THU;WHU;CUT;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Look Beneath the Surface: Exploiting Fundamental Symmetry for Sample-Efficient Offline RL", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70621", "id": "kyXMU3H7RB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/181a027913d36bc0a8857c0da661d621-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=kyXMU3H7RB", "openreview": "https://openreview.net/forum?id=kyXMU3H7RB", "poster": "/media/PosterPDFs/NeurIPS%202023/70621.png?t=1699845402.0040102", "slides": "https://nips.cc/virtual/2023/poster/70621", "video": "https://nips.cc/virtual/2023/poster/70621", "author_site": "Peng Cheng, Xianyuan Zhan, zhihao wu, Wenjia Zhang, Youfang Lin, Shou cheng Song, Han Wang, Li Jiang", "tldr": "", "abstract": "Offline reinforcement learning (RL) offers an appealing approach to real-world tasks by learning policies from pre-collected datasets without interacting with the environment. However, the performance of existing offline RL algorithms heavily depends on the scale and state-action space coverage of datasets. Real-world data collection is often expensive and uncontrollable, leading to small and narrowly covered datasets and posing significant challenges for practical deployments of offline RL. In this paper, we provide a new insight that leveraging the fundamental symmetry of system dynamics can substantially enhance offline RL performance under small datasets. Specifically, we propose a Time-reversal symmetry (T-symmetry) enforced Dynamics Model (TDM), which establishes consistency between a pair of forward and reverse latent dynamics. TDM provides both well-behaved representations for small datasets and a new reliability measure for OOD samples based on compliance with the T-symmetry. These can be readily used to construct a new offline RL algorithm (TSRL) with less conservative policy constraints and a reliable latent space data augmentation procedure. Based on extensive experiments, we find TSRL achieves great performance on small benchmark datasets with as few as 1% of the original samples, which significantly outperforms the recent offline RL algorithms in terms of data efficiency and generalizability. Code is available at:\nhttps://github.com/pcheng2/TSRL", "keywords": "sample efficiency; offline reinforcement learning; fundamental symmetry", "primary_area": "", "supplementary_material": "/attachment/6a31da3c69dcd8261b60baa14040d99947b2afb4.pdf", "author": "Peng Cheng;Xianyuan Zhan;Zhihao Wu;Wenjia Zhang;Youfang Lin;Shou cheng Song;Han Wang;Li Jiang", "authorids": "~Peng_Cheng1;~Xianyuan_Zhan1;~Zhihao_Wu2;~Wenjia_Zhang2;~Youfang_Lin1;~Shou_cheng_Song1;~Han_Wang21;~Li_Jiang4", "gender": "M;M;M;;M;M;M;", "homepage": ";http://zhanxianyuan.xyz/;;;https://faculty.bjtu.edu.cn/7443/;https://github.com/Song413026/My_repository;https://louieworth.github.io/;https://github.com/QQQQQAQQQQQ", "dblp": "76/185-13;181/5081;;;12/4988;;45/4954-8.html;", "google_scholar": ";pDMnGloAAAAJ;yETXy34AAAAJ;;e8xT-e0AAAAJ;;;", "orcid": ";0000-0002-3683-0554;;0000-0001-5212-6500;0000-0002-5143-3645;;;", "linkedin": ";;;;youfang-lin-a1625091/;;;", "or_profile": "~Peng_Cheng1;~Xianyuan_Zhan1;~Zhihao_Wu2;~Wenjia_Zhang2;~Youfang_Lin1;~Shou_cheng_Song1;~Li_Jiang4;~H_A1", "aff": "Beijing Jiaotong University;Tsinghua University;Beijing Jiaotong University;Tsinghua University;Beijing Jiaotong University;Beijing Jiaotong University;Tsinghua University;Beijing Jiaotong University", "aff_domain": "bjtu.edu.cn;tsinghua.edu.cn;bjtu.edu.cn;tsinghua.edu.cn;bjtu.edu.cn;bjtu.edu.cn;tsinghua.edu.cn;bjtu.edu.cn", "position": "PhD student;Associate Professor;Associate Professor;PhD student;Full Professor;MS student;MS student;Undergrad student", "bibtex": "@inproceedings{\ncheng2023look,\ntitle={Look Beneath the Surface: Exploiting Fundamental Symmetry for Sample-Efficient Offline {RL}},\nauthor={Peng Cheng and Xianyuan Zhan and Zhihao Wu and Wenjia Zhang and Youfang Lin and Shou cheng Song and Han Wang and Li Jiang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=kyXMU3H7RB}\n}", "github": "", "project": "", "reviewers": "xNUN;dZPC;5ovg;P7X7", "pdf_size": 18967650, "rating": "5;5;5;7", "confidence": "3;4;4;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "63;78;40;102", "wc_strengths": "32;46;35;35", "wc_weaknesses": "41;71;82;136", "wc_questions": "49;68;54;95", "wc_limitations": "21;1;7;34", "wc_review": "206;264;218;402", "wc_reply_reviewers": "0;28;21;332", "wc_reply_authors": "0;0;0;642", "reply_reviewers": "0;1;1;2", "reply_authors": "1;1;1;4", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.75, 22.554101622543072 ], "wc_strengths_avg": [ 37.0, 5.338539126015656 ], "wc_weaknesses_avg": [ 82.5, 34.34020966738555 ], "wc_questions_avg": [ 66.5, 17.867568385205637 ], "wc_limitations_avg": [ 15.75, 12.794041581923986 ], "wc_review_avg": [ 272.5, 77.83797273824646 ], "wc_reply_reviewers_avg": [ 95.25, 137.07548103143756 ], "wc_reply_authors_avg": [ 160.5, 277.9941546148048 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11799403449594778681&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "bjtu.edu.cn;tsinghua.edu.cn;bjtu.edu.cn;tsinghua.edu.cn;bjtu.edu.cn;bjtu.edu.cn;tsinghua.edu.cn;bjtu.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;1;0;0;1;0", "aff_unique_norm": "Beijing Jiao Tong University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.njtu.edu.cn/en;https://www.tsinghua.edu.cn", "aff_unique_abbr": "BJTU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "l0zLcLGdcL", "title": "Memory-Assisted Sub-Prototype Mining for Universal Domain Adaptation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Universal domain adaptation aims to align the classes and reduce the feature gap between the same category of the source and target domains. The target private category is set as the unknown class during the adaptation process, as it is not included in the source domain. However, most existing methods overlook the intra-class structure within a category, especially in cases where there exists significant concept shift between the samples belonging to the same category. When samples with large concept shift are forced to be pushed together, it may negatively affect the adaptation performance. Moreover, from the interpretability aspect, it is unreasonable to align visual features with significant differences, such as fighter jets and civil aircraft, into the same category. Unfortunately, due to such semantic ambiguity and annotation cost, categories are not always classified in detail, making it difficult for the model to perform precise adaptation. To address these issues, we propose a novel Memory-Assisted Sub-Prototype Mining (MemSPM) method that can learn the differences between samples belonging to the same category and mine sub-classes when there exists significant concept shift between them. By doing so, our model learns a more reasonable feature space that enhances the transferability and reflects the inherent differences among samples annotated as the same category. We evaluate the effectiveness of our MemSPM method over multiple scenarios, including UniDA, OSDA, and PDA. Our method achieves state-of-the-art performance on four benchmarks in most cases.", "keywords": "Transfer Learning;Universal Domain Adaption;Memory-Assisted Network;Sub-Prototype Mining", "primary_area": "", "supplementary_material": "/attachment/7f957b412b98214ff1f2faa2bc838fe2b69a7713.pdf", "author": "Yuxiang Lai;Xinghong Liu;Tao Zhou;Yi Zhou", "authorids": "~Yuxiang_Lai1;~Xinghong_Liu1;~Tao_Zhou5;~Yi_Zhou8", "gender": "M;;M;M", "homepage": ";;https://taozh2017.github.io/;https://cse.seu.edu.cn/2021/0303/c23024a362239/page.htm", "dblp": ";;98/4450-2;01/1901-7", "google_scholar": "0hFskFkAAAAJ;;LPPsgWUAAAAJ;https://scholar.google.co.uk/citations?user=EnDCJKMAAAAJ", "orcid": ";;0000-0002-3733-7286;", "linkedin": "yuxiang-lai-335528276/;;;", "or_profile": "~Yuxiang_Lai1;~Xinghong_Liu1;~Tao_Zhou5;~Yi_Zhou8", "aff": "Southeast University;;Nanjing University of Science and Technology;Southeast University", "aff_domain": "seu.edu.cn;;njust.edu.cn;seu.edu.cn", "position": "Undergrad student;;Full Professor;Associate Professor", "bibtex": "@misc{\nlai2023memoryassisted,\ntitle={Memory-Assisted Sub-Prototype Mining for Universal Domain Adaptation},\nauthor={Yuxiang Lai and Xinghong Liu and Tao Zhou and Yi Zhou},\nyear={2023},\nurl={https://openreview.net/forum?id=l0zLcLGdcL}\n}", "github": "", "project": "", "reviewers": "QTwQ;YkYx;pJBT;EAMn;S9DQ", "site": "https://openreview.net/forum?id=l0zLcLGdcL", "pdf_size": 1617109, "rating": "3;5;6;6;6", "confidence": "5;5;5;5;5", "soundness": "1;3;3;3;2", "novelty": "2;2;2;3;2", "presentation": "2;3;2;2;3", "wc_summary": "80;32;56;175;95", "wc_strengths": "39;24;48;109;151", "wc_weaknesses": "355;90;53;805;344", "wc_questions": "12;4;153;70;13", "wc_limitations": "19;6;1;16;92", "wc_review": "505;156;311;1175;695", "wc_reply_reviewers": "0;0;23;100;31", "wc_reply_authors": "0;0;10;48;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;2;2;1", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 87.6, 48.66456616471578 ], "wc_strengths_avg": [ 74.2, 48.07244532993927 ], "wc_weaknesses_avg": [ 329.4, 268.6161573695819 ], "wc_questions_avg": [ 50.4, 56.45741758174916 ], "wc_limitations_avg": [ 26.8, 33.246954747766 ], "wc_review_avg": [ 568.4, 353.366438700678 ], "wc_reply_reviewers_avg": [ 30.8, 36.73363581242674 ], "wc_reply_authors_avg": [ 11.6, 18.60752535938081 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4357270388731761345&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Southeast University;Nanjing University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.seu.edu.cn/;http://www.nust.edu.cn/", "aff_unique_abbr": "SEU;NUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Feature Likelihood Divergence: Evaluating the Generalization of Generative Models Using Samples", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70620", "id": "l2VKZkolT7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/68b138608ef80b08d65b1bd9594d9559-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=l2VKZkolT7", "openreview": "https://openreview.net/forum?id=l2VKZkolT7", "poster": "/media/PosterPDFs/NeurIPS%202023/70620.png?t=1702044517.610758", "slides": "https://nips.cc/virtual/2023/poster/70620", "video": "https://nips.cc/virtual/2023/poster/70620", "author_site": "Marco Jiralerspong, Joey Bose, Ian Gemp, Chongli Qin, Yoram Bachrach, Gauthier Gidel", "tldr": "", "abstract": "The past few years have seen impressive progress in the development of deep generative models capable of producing high-dimensional, complex, and photo-realistic data. However, current methods for evaluating such models remain incomplete: standard likelihood-based metrics do not always apply and rarely correlate with perceptual fidelity, while sample-based metrics, such as FID, are insensitive to overfitting, i.e., inability to generalize beyond the training set. To address these limitations, we propose a new metric called the Feature Likelihood Divergence (FLD), a parametric sample-based score that uses density estimation to provide a comprehensive trichotomic evaluation accounting for novelty (i.e., different from the training samples), fidelity, and diversity of generated samples. We empirically demonstrate the ability of FLD to identify specific overfitting problem cases, where previously proposed metrics fail. We also extensively evaluate FLD on various image datasets and model classes, demonstrating its ability to match intuitions of previous metrics like FID while offering a more comprehensive evaluation of generative models.", "keywords": "Generative model;FID;Evaluation;Precision;Recall;Likelihood;Overfitting;Memorization;Generalization;Diffusion;GANs", "primary_area": "", "supplementary_material": "", "author": "Marco Jiralerspong;Joey Bose;Ian Gemp;Chongli Qin;Yoram Bachrach;Gauthier Gidel", "authorids": "~Marco_Jiralerspong1;~Joey_Bose1;~Ian_Gemp1;~Chongli_Qin1;~Yoram_Bachrach2;~Gauthier_Gidel1", "gender": ";M;M;;M;M", "homepage": "https://marcojira.github.io/;https://joeybose.github.io/;https://imgemp.github.io/;https://www.chongliqin.com;https://gauthiergidel.github.io/;https://sites.google.com/view/yoram-bachrach", "dblp": "319/6631;174/3372;66/10996;;188/6326;70/2671", "google_scholar": "https://scholar.google.ca/citations?user=q2_P1YcAAAAJ;ybPyI7IAAAAJ;5vo3MeEAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.fr/citations?user=bDrXQPUAAAAJ;0W63ivcAAAAJ", "orcid": ";;;;;0000-0002-4382-7636", "linkedin": ";;;;;yoram-bachrach-0a03731/", "or_profile": "~Marco_Jiralerspong1;~Joey_Bose1;~Ian_Gemp1;~Chongli_Qin1;~Gauthier_Gidel1;~Yoram_Bachrach1", "aff": "Universit\u00e9 de Montr\u00e9al;McGill University and Mila;Google DeepMind;Google;Mila - Quebec Artificial Intelligence Institute;Google DeepMind", "aff_domain": "umontreal.ca;mcgill.ca;google.com;google.com;mila.quebec;google.com", "position": "MS student;PhD student;Research Scientist;Research Scientist;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\njiralerspong2023feature,\ntitle={Feature Likelihood Score: Evaluating the Generalization of Generative Models Using Samples},\nauthor={Marco Jiralerspong and Joey Bose and Ian Gemp and Chongli Qin and Yoram Bachrach and Gauthier Gidel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=l2VKZkolT7}\n}", "github": "", "project": "", "reviewers": "SDnT;euCm;V3Uo;HK8L;cRFc", "pdf_size": 6968576, "rating": "6;6;7;7;7", "confidence": "4;5;4;4;4", "soundness": "3;3;2;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "158;185;102;124;73", "wc_strengths": "94;52;141;136;67", "wc_weaknesses": "123;111;123;355;125", "wc_questions": "52;52;102;225;70", "wc_limitations": "111;12;60;13;37", "wc_review": "538;412;528;853;372", "wc_reply_reviewers": "54;182;60;148;14", "wc_reply_authors": "0;772;0;0;0", "reply_reviewers": "1;2;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 128.4, 39.661568299803776 ], "wc_strengths_avg": [ 98.0, 35.73793502708292 ], "wc_weaknesses_avg": [ 167.4, 93.93103853359655 ], "wc_questions_avg": [ 100.2, 65.0181513117683 ], "wc_limitations_avg": [ 46.6, 36.729279873147526 ], "wc_review_avg": [ 540.6, 168.95159069982148 ], "wc_reply_reviewers_avg": [ 91.6, 62.908187066549615 ], "wc_reply_authors_avg": [ 154.4, 308.79999999999995 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6123724356957946, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18349114133030485561&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "umontreal.ca;mcgill.ca;google.com;google.com;mila.quebec;google.com", "author_num": 6, "aff_unique_index": "0;1;2;2;3;2", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;McGill University;Google;Quebec Artificial Intelligence Institute", "aff_unique_dep": ";;Google DeepMind;Artificial Intelligence", "aff_unique_url": "https://www.umontreal.ca;https://www.mcgill.ca;https://deepmind.com;https://mila.quebec", "aff_unique_abbr": "UdeM;McGill;DeepMind;Mila", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;2;0;1", "aff_country_unique": "Canada;United Kingdom;United States" }, { "title": "Scan and Snap: Understanding Training Dynamics and Token Composition in 1-layer Transformer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70619", "id": "l3HUgVHqGQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e359ebe56ba306b674e8952349c6049e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=l3HUgVHqGQ", "openreview": "https://openreview.net/forum?id=l3HUgVHqGQ", "poster": "/media/PosterPDFs/NeurIPS%202023/70619.png?t=1702243917.0224617", "slides": "https://nips.cc/virtual/2023/poster/70619", "video": "https://nips.cc/virtual/2023/poster/70619", "author_site": "Yuandong Tian, Yiping Wang, Beidi Chen, Simon Du", "tldr": "", "abstract": "Transformer architecture has shown impressive performance in multiple research domains and has become the backbone of many neural network models. However, there is limited understanding on how it works. In particular, with a simple predictive loss, how the representation emerges from the gradient \\emph{training dynamics} remains a mystery. In this paper, for 1-layer transformer with one self-attention layer plus one decoder layer, we analyze its SGD training dynamics for the task of next token prediction in a mathematically rigorous manner. We open the black box of the dynamic process of how the self-attention layer combines input tokens, and reveal the nature of underlying inductive bias. More specifically, with the assumption (a) no positional encoding, (b) long input sequence, and (c) the decoder layer learns faster than the self-attention layer, we prove that self-attention acts as a \\emph{discriminative scanning algorithm}: \n starting from uniform attention, it gradually attends more to distinct key tokens for a specific next token to be predicted, and pays less attention to common key tokens that occur across different next tokens. Among distinct tokens, it progressively drops attention weights, following the order of low to high co-occurrence between the key and the query token in the training set. Interestingly, this procedure does not lead to winner-takes-all, but stops due to a \\emph{phase transition} that is controllable by the learning rate of the decoder layer, leaving (almost) fixed token combination. We verify this \\textbf{\\emph{scan and snap}} dynamics on synthetic and real-world data (WikiText-103).", "keywords": "transformer;training dynamics;theoretical analysis;self-attention;interpretability;neural network understanding", "primary_area": "", "supplementary_material": "/attachment/ab82bb9f238416dd29d2f20acd6cbea7bec1bd0b.pdf", "author": "Yuandong Tian;Yiping Wang;Beidi Chen;Simon Shaolei Du", "authorids": "~Yuandong_Tian1;~Yiping_Wang2;~Beidi_Chen1;~Simon_Shaolei_Du1", "gender": "M;M;F;M", "homepage": "http://yuandong-tian.com;https://ypwang61.github.io;https://www.andrew.cmu.edu/user/beidic/;http://simonshaoleidu.com", "dblp": "t/YuandongTian;13/1444-3;192/1339;176/5602", "google_scholar": "0mgEF28AAAAJ;IuMFxFUAAAAJ;;OttawxUAAAAJ", "orcid": "0000-0003-4202-4847;;;", "linkedin": "yuandongtian;yiping-wang-323647294/;;", "or_profile": "~Yuandong_Tian1;~Yiping_Wang2;~Beidi_Chen1;~Simon_Shaolei_Du1", "aff": "Meta AI (FAIR);Zhejiang University;Meta Facebook;Meta Facebook", "aff_domain": "meta.com;zju.edu.cn;fb.com;fb.com", "position": "Research Scientist;Undergrad student;Researcher;Visiting Professor", "bibtex": "@inproceedings{\ntian2023scan,\ntitle={Scan and Snap: Understanding Training Dynamics and Token Composition in 1-layer Transformer},\nauthor={Yuandong Tian and Yiping Wang and Beidi Chen and Simon Shaolei Du},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=l3HUgVHqGQ}\n}", "github": "", "project": "", "reviewers": "TXYd;sxfK;ZSxf;zgiA;nH6i", "pdf_size": 941906, "rating": "5;5;6;6;7", "confidence": "4;2;4;1;3", "soundness": "3;3;2;3;4", "novelty": "3;2;2;3;4", "presentation": "2;2;3;3;4", "wc_summary": "99;50;46;105;64", "wc_strengths": "31;41;31;73;49", "wc_weaknesses": "240;82;146;80;69", "wc_questions": "109;110;56;1;75", "wc_limitations": "10;7;42;1;20", "wc_review": "489;290;321;260;277", "wc_reply_reviewers": "1020;51;208;15;96", "wc_reply_authors": "1468;57;110;25;133", "reply_reviewers": "3;1;1;1;1", "reply_authors": "5;2;2;2;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 2.8, 1.16619037896906 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 72.8, 24.652788888886384 ], "wc_strengths_avg": [ 45.0, 15.543487382180357 ], "wc_weaknesses_avg": [ 123.4, 64.28561269833243 ], "wc_questions_avg": [ 70.2, 40.25617964983761 ], "wc_limitations_avg": [ 16.0, 14.38054240979804 ], "wc_review_avg": [ 327.4, 83.23124413343825 ], "wc_reply_reviewers_avg": [ 278.0, 376.63935004192007 ], "wc_reply_authors_avg": [ 358.6, 556.0066906072265 ], "reply_reviewers_avg": [ 1.4, 0.8000000000000002 ], "reply_authors_avg": [ 2.6, 1.2000000000000002 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.04583492485141057, "gs_citation": 90, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10559864520549789725&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 11, "email": "meta.com;zju.edu.cn;fb.com;fb.com", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Meta;Zhejiang University", "aff_unique_dep": "Facebook AI Research (FAIR);", "aff_unique_url": "https://ai.facebook.com;https://www.zju.edu.cn", "aff_unique_abbr": "Meta AI;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "BIRD: Generalizable Backdoor Detection and Removal for Deep Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70618", "id": "l3yxZS3QdT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/802e90325f4c8546e13e5763b2ecab88-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=l3yxZS3QdT", "openreview": "https://openreview.net/forum?id=l3yxZS3QdT", "poster": "/media/PosterPDFs/NeurIPS%202023/70618.png?t=1701555296.6597326", "slides": "https://nips.cc/virtual/2023/poster/70618", "video": "https://nips.cc/virtual/2023/poster/70618", "author_site": "Xuan Chen, Wenbo Guo, Wenbo Guo, Guanhong Tao, Xiangyu Zhang, Dawn Song", "tldr": "", "abstract": "Backdoor attacks pose a severe threat to the supply chain management of deep reinforcement learning (DRL) policies. Despite initial defenses proposed in recent studies, these methods have very limited generalizability and scalability. To address this issue, we propose BIRD, a technique to detect and remove backdoors from a pretrained DRL policy in a clean environment without requiring any knowledge about the attack specifications and accessing its training process. By analyzing the unique properties and behaviors of backdoor attacks, we formulate trigger restoration as an optimization problem and design a novel metric to detect backdoored policies. We also design a finetuning method to remove the backdoor, while maintaining the agent's performance in the clean environment. We evaluate BIRD against three backdoor attacks in ten different single-agent or multi-agent environments. Our results verify the effectiveness, efficiency, and generalizability of BIRD, as well as its robustness to different attack variations and adaptions.", "keywords": "Backdoor Defense;Deep Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/f14705f63ad94ce2578e1ae79508f343a5725407.zip", "author": "Xuan Chen;Wenbo Guo;Guanhong Tao;Xiangyu Zhang;Dawn Song", "authorids": "~Xuan_Chen3;~Wenbo_Guo1;~Guanhong_Tao1;~Xiangyu_Zhang3;~Dawn_Song1", "gender": ";M;;M;F", "homepage": ";https://henrygwb.github.io/;;https://www.cs.purdue.edu/homes/xyzhang;", "dblp": ";144/1238-2.html;;;s/DXSong", "google_scholar": ";KyPheRMAAAAJ;;PXbu1wIAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Xuan_Chen3;~Wenbo_Guo1;~Guanhong_Tao1;~Xiangyu_Zhang3;~Dawn_Song1", "aff": ";University of California, Berkeley;;Purdue University;University of California, Berkeley", "aff_domain": ";berkeley.edu;;cs.purdue.edu;berkeley.edu", "position": ";Postdoc;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023bird,\ntitle={{BIRD}: Generalizable Backdoor Detection and Removal for Deep Reinforcement Learning},\nauthor={Xuan Chen and Wenbo Guo and Guanhong Tao and Xiangyu Zhang and Dawn Song},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=l3yxZS3QdT}\n}", "github": "", "project": "", "reviewers": "M2QE;jZ8t;Mdit;V9nG", "pdf_size": 554188, "rating": "5;5;5;7", "confidence": "5;3;3;2", "soundness": "2;3;2;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "80;100;88;176", "wc_strengths": "24;167;32;130", "wc_weaknesses": "270;134;49;152", "wc_questions": "176;32;5;14", "wc_limitations": "8;54;5;1", "wc_review": "558;487;179;473", "wc_reply_reviewers": "152;31;10;8", "wc_reply_authors": "107;107;101;63", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;3;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 111.0, 38.19685850956856 ], "wc_strengths_avg": [ 88.25, 61.71861550618257 ], "wc_weaknesses_avg": [ 151.25, 78.82694653479862 ], "wc_questions_avg": [ 56.75, 69.53191713163099 ], "wc_limitations_avg": [ 17.0, 21.50581316760657 ], "wc_review_avg": [ 424.25, 145.21600290601583 ], "wc_reply_reviewers_avg": [ 50.25, 59.43220928082684 ], "wc_reply_authors_avg": [ 94.5, 18.350749303502567 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7089992926900472404&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";berkeley.edu;;cs.purdue.edu;berkeley.edu", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Berkeley;Purdue University", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.purdue.edu", "aff_unique_abbr": "UC Berkeley;Purdue", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "FOCAL: Contrastive Learning for Multimodal Time-Series Sensing Signals in Factorized Orthogonal Latent Space", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70617", "id": "l4CZCKXoSn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/93e98ddf39a9beb0a97fbbe56a986c80-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=l4CZCKXoSn", "openreview": "https://openreview.net/forum?id=l4CZCKXoSn", "poster": "/media/PosterPDFs/NeurIPS%202023/70617.png?t=1699556415.4960556", "slides": "https://nips.cc/virtual/2023/poster/70617", "video": "https://nips.cc/virtual/2023/poster/70617", "author_site": "Shengzhong Liu, Tomoyoshi Kimura, Dongxin Liu, Ruijie Wang, Jinyang Li, Suhas Diggavi, Mani Srivastava, Tarek Abdelzaher", "tldr": "", "abstract": "This paper proposes a novel contrastive learning framework, called FOCAL, for extracting comprehensive features from multimodal time-series sensing signals through self-supervised training. Existing multimodal contrastive frameworks mostly rely on the shared information between sensory modalities, but do not explicitly consider the exclusive modality information that could be critical to understanding the underlying sensing physics. Besides, contrastive frameworks for time series have not handled the temporal information locality appropriately. FOCAL solves these challenges by making the following contributions: First, given multimodal time series, it encodes each modality into a factorized latent space consisting of shared features and private features that are orthogonal to each other. The shared space emphasizes feature patterns consistent across sensory modalities through a modal-matching objective. In contrast, the private space extracts modality-exclusive information through a transformation-invariant objective. Second, we propose a temporal structural constraint for modality features, such that the average distance between temporally neighboring samples is no larger than that of temporally distant samples. Extensive evaluations are performed on four multimodal sensing datasets with two backbone encoders and two classifiers to demonstrate the superiority of FOCAL. It consistently outperforms the state-of-the-art baselines in downstream tasks with a clear margin, under different ratios of available labels. The code and self-collected dataset are available at https://github.com/tomoyoshki/focal.", "keywords": "Multimodal Time Series; Contrastive Learning; Factorized Latent Space", "primary_area": "", "supplementary_material": "/attachment/95e4e19c55e3b8ee3e19d867ee9a476c43b0119f.pdf", "author": "Shengzhong Liu;Tomoyoshi Kimura;Dongxin Liu;Ruijie Wang;Jinyang Li;Suhas Diggavi;Mani Srivastava;Tarek Abdelzaher", "authorids": "~Shengzhong_Liu1;~Tomoyoshi_Kimura1;~Dongxin_Liu1;~Ruijie_Wang2;~Jinyang_Li2;~Suhas_Diggavi1;~Mani_Srivastava1;~Tarek_Abdelzaher1", "gender": "M;M;M;M;;;M;M", "homepage": "https://liushengzhong1023.github.io/;https://www.tomoyoshikimura.com/;https://scholar.google.com/citations?user=Aa8c9EAAAAAJ&hl=en;https://wjerry5.github.io;;https://www.ee.ucla.edu/suhas-diggavi/;http://abdelzaher.cs.illinois.edu/;https://samueli.ucla.edu/people/mani-srivastava/", "dblp": "166/5424;;132/9305;57/5759-4;79/572-4;d/SNDiggavi.html#j15;a/TarekFAbdelzaher;s/ManiBSrivastava.html", "google_scholar": "REzrIucAAAAJ;8uuJfmoAAAAJ;;S1TuNNIAAAAJ;VbeL3UUAAAAJ;;https://scholar.google.com.tw/citations?user=cA28Zs0AAAAJ;X2Qs7XYAAAAJ", "orcid": ";0009-0008-4297-5865;;;0000-0001-9285-9872;;0000-0003-3883-7220;0000-0002-3782-9192", "linkedin": ";tomoyoshi-kimura/;dongxin-liu-b85b7960/;;;;tarek-abdelzaher-0216071/;msrivastava/", "or_profile": "~Shengzhong_Liu1;~Tomoyoshi_Kimura1;~Dongxin_Liu1;~Ruijie_Wang2;~Jinyang_Li2;~Suhas_Diggavi1;~Tarek_Abdelzaher1;~Mani_Srivastava2", "aff": "University of Illinois, Urbana Champaign;Department of Computer Science, University of Illinois at Urbana Champaign;;University of Illinois, Urbana-Champaign;University of Illinois, Urbana Champaign;University of California, Los Angeles;University of Illinois, Urbana Champaign;University of California, Los Angeles", "aff_domain": "illinois.edu;cs.illinois.edu;;uiuc.edu;uiuc.edu;ucla.edu;illinois.edu;ucla.edu", "position": "Postdoc;Undergrad student;;PhD student;PhD student;Professor;Full Professor;Distinguished Professor", "bibtex": "@inproceedings{\nliu2023focal,\ntitle={{FOCAL}: Contrastive Learning for Multimodal Time-Series Sensing Signals in Factorized Orthogonal Latent Space},\nauthor={Shengzhong Liu and Tomoyoshi Kimura and Dongxin Liu and Ruijie Wang and Jinyang Li and Suhas Diggavi and Mani Srivastava and Tarek Abdelzaher},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=l4CZCKXoSn}\n}", "github": "", "project": "", "reviewers": "8mBK;Q2hY;oGNZ;WTA9;zwVA", "pdf_size": 998192, "rating": "5;5;6;7;7", "confidence": "4;5;3;2;3", "soundness": "3;2;3;3;3", "novelty": "3;2;3;3;3", "presentation": "3;2;3;4;3", "wc_summary": "74;77;50;106;118", "wc_strengths": "51;19;86;50;51", "wc_weaknesses": "167;368;73;37;54", "wc_questions": "2;6;2;54;2", "wc_limitations": "14;7;9;1;21", "wc_review": "308;477;220;248;246", "wc_reply_reviewers": "20;20;7;5;51", "wc_reply_authors": "117;245;0;0;473", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;1;1;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 85.0, 24.24871130596428 ], "wc_strengths_avg": [ 51.4, 21.209431864149497 ], "wc_weaknesses_avg": [ 139.8, 122.64322239732613 ], "wc_questions_avg": [ 13.2, 20.458738964071074 ], "wc_limitations_avg": [ 10.4, 6.740919818541087 ], "wc_review_avg": [ 299.8, 93.18025541926788 ], "wc_reply_reviewers_avg": [ 20.6, 16.451139778143034 ], "wc_reply_authors_avg": [ 167.0, 177.76276325485043 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.8770580193070292, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12530178897226143848&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "illinois.edu;cs.illinois.edu;;uiuc.edu;uiuc.edu;ucla.edu;illinois.edu;ucla.edu", "author_num": 8, "aff_unique_index": "0;0;1;0;2;0;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Illinois;University of California, Los Angeles", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;https://illinois.edu;https://www.ucla.edu", "aff_unique_abbr": "UIUC;UIUC;UCLA", "aff_campus_unique_index": "0;0;0;0;1;0;1", "aff_campus_unique": "Urbana-Champaign;Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Relative Entropic Optimal Transport: a (Prior-aware) Matching Perspective to (Unbalanced) Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70616", "id": "l61Kp1zBwC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4621451c25a7aa175dc00e5dd4a243a3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=l61Kp1zBwC", "openreview": "https://openreview.net/forum?id=l61Kp1zBwC", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70616", "video": "https://nips.cc/virtual/2023/poster/70616", "author_site": "Liangliang Shi, Haoyu Zhen, Gu Zhang, Junchi Yan", "tldr": "", "abstract": "Classification is a fundamental problem in machine learning, and considerable efforts have been recently devoted to the demanding long-tailed setting due to its prevalence in nature. Departure from the Bayesian framework, this paper rethinks classification from a matching perspective by studying the matching probability between samples and labels with optimal transport (OT) formulation. Specifically, we first propose a new variant of optimal transport, called Relative Entropic Optimal Transport (RE-OT), which guides the coupling solution to a known prior information matrix. We gives some theoretical results and their proof for RE-OT and surprisingly find RE-OT can help to deblur for barycenter images. Then we adopt inverse RE-OT for training long-tailed data and find that the loss derived from RE-OT has a similar form to Softmax-based cross-entropy loss, indicating a close connection between optimal transport and classification and the potential for transferring concepts between these two academic fields, such as barycentric projection in OT, which can map the labels back to the feature space. We further derive an epoch-varying RE-OT loss, and do the experiments on unbalanced image classification, molecule classification, instance segmentation and representation learning. Experimental results show its effectiveness.", "keywords": "Optimal Transport; Unbalanced Classification", "primary_area": "", "supplementary_material": "/attachment/58c8dc18df84f07f46ad4f50e2d96fbbfde0f9d6.pdf", "author": "Liangliang Shi;Haoyu Zhen;Gu Zhang;Junchi Yan", "authorids": "~Liangliang_Shi1;~Haoyu_Zhen1;~Gu_Zhang1;~Junchi_Yan2", "gender": "M;M;M;M", "homepage": ";https://haoyuzhen.com;https://www.gu-zhang.com/;http://thinklab.sjtu.edu.cn/", "dblp": "89/8730;353/0317;;60/7949.html", "google_scholar": "Qf1k8lUAAAAJ;_btLQY0AAAAJ;ctFTmmgAAAAJ;ga230VoAAAAJ", "orcid": "0000-0001-7033-4207;;;0000-0001-9639-7679", "linkedin": ";;;", "or_profile": "~Liangliang_Shi1;~Haoyu_Zhen1;~Gu_Zhang1;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nshi2023relative,\ntitle={Relative Entropic Optimal Transport: a (Prior-aware) Matching Perspective to (Unbalanced) Classification},\nauthor={Liangliang Shi and Haoyu Zhen and Gu Zhang and Junchi Yan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=l61Kp1zBwC}\n}", "github": "", "project": "", "reviewers": "HboX;pKsm;RCtv;46Rh;Cbtk", "pdf_size": 2055913, "rating": "5;5;5;6;6", "confidence": "2;3;4;3;4", "soundness": "4;2;3;3;3", "novelty": "2;3;3;2;3", "presentation": "2;3;3;3;2", "wc_summary": "50;44;191;60;63", "wc_strengths": "69;29;32;37;69", "wc_weaknesses": "198;54;107;235;35", "wc_questions": "17;60;47;85;250", "wc_limitations": "40;21;3;9;1", "wc_review": "374;208;380;426;418", "wc_reply_reviewers": "191;0;29;16;10", "wc_reply_authors": "426;48;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;2;1;1;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 81.6, 55.123860532440936 ], "wc_strengths_avg": [ 47.2, 17.982213434391216 ], "wc_weaknesses_avg": [ 125.8, 78.60127225433442 ], "wc_questions_avg": [ 91.8, 82.0814229896144 ], "wc_limitations_avg": [ 14.8, 14.4 ], "wc_review_avg": [ 361.2, 79.26260152177696 ], "wc_reply_reviewers_avg": [ 49.2, 71.5189485381322 ], "wc_reply_authors_avg": [ 94.8, 166.64021123366354 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.32732683535398854, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3367935988313968669&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Fine-Grained Visual Prompting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70615", "id": "l6R4Go3noz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4e9fa6e716940a7cfc60c46e6f702f52-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=l6R4Go3noz", "openreview": "https://openreview.net/forum?id=l6R4Go3noz", "poster": "/media/PosterPDFs/NeurIPS%202023/70615.png?t=1697474830.1365113", "slides": "https://nips.cc/virtual/2023/poster/70615", "video": "https://nips.cc/virtual/2023/poster/70615", "author_site": "Lingfeng Yang, Yueze Wang, Yueze Wang, Xiang Li, Xinlong Wang, Jian Yang", "tldr": "", "abstract": "Vision-Language Models (VLMs), such as CLIP, have demonstrated impressive zero-shot transfer capabilities in image-level visual perception. However, these models have shown limited performance in instance-level tasks that demand precise localization and recognition. Previous works have suggested that incorporating visual prompts, such as colorful boxes or circles, can improve the ability of models to recognize objects of interest. Nonetheless, compared to language prompting, visual prompting designs are rarely explored. Existing approaches, which employ coarse visual cues such as colorful boxes or circles, often result in sub-optimal performance due to the inclusion of irrelevant and noisy pixels. In this paper, we carefully study the visual prompting designs by exploring more fine-grained markings, such as segmentation masks and their variations. In addition, we introduce a new zero-shot framework that leverages pixel-level annotations acquired from a generalist segmentation model for fine-grained visual prompting. Consequently, our investigation reveals that a straightforward application of blur outside the target mask, referred to as the Blur Reverse Mask, exhibits exceptional effectiveness. This proposed prompting strategy leverages the precise mask annotations to reduce focus on weakly related regions while retaining spatial coherence between the target and the surrounding background. Our **F**ine-**G**rained **V**isual **P**rompting (**FGVP**) demonstrates superior performance in zero-shot comprehension of referring expressions on the RefCOCO, RefCOCO+, and RefCOCOg benchmarks. It outperforms prior methods by an average margin of 3.0\\% to 4.6\\%, with a maximum improvement of 12.5\\% on the RefCOCO+ testA subset. The part detection experiments conducted on the PACO dataset further validate the preponderance of FGVP over existing visual prompting techniques. Code is available at https://github.com/ylingfeng/FGVP.", "keywords": "visual prompting;zero-shot;visual language model;referring expression comprehension", "primary_area": "", "supplementary_material": "/attachment/f9149ad7507019ee4ece5126c38f72ac3f475c4d.pdf", "author": "Lingfeng Yang;Yueze Wang;Xiang Li;Xinlong Wang;Jian Yang", "authorids": "~Lingfeng_Yang1;~Yueze_Wang1;~Xiang_Li20;~Xinlong_Wang2;~Jian_Yang1", "gender": "M;M;M;M;M", "homepage": ";https://yuezewang.github.io/;http://implus.github.io/;;", "dblp": "45/7593;;40/1491-41;;y/JianYang3.html", "google_scholar": "RLhH0jwAAAAJ;;oamjJdYAAAAJ;DPz0DjYAAAAJ;https://scholar.google.com.hk/citations?user=6CIDtZQAAAAJ", "orcid": "0000-0002-2725-8947;;;;", "linkedin": ";;;;", "or_profile": "~Lingfeng_Yang1;~Yueze_Wang1;~Xiang_Li20;~Xinlong_Wang2;~Jian_Yang1", "aff": "Beijing Academy of Artificial Intelligence;Tianjin University;Nankai University;Beijing Academy of Artificial Intelligence;Nanjing University of Science and Technology", "aff_domain": "baai.ac.cn;tju.edu.cn;nankai.edu.cn;baai.ac.cn;njust.edu.cn", "position": "PhD student;MS student;Associate Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nyang2023finegrained,\ntitle={Fine-Grained Visual Prompting},\nauthor={Lingfeng Yang and Yueze Wang and Xiang Li and Xinlong Wang and Jian Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=l6R4Go3noz}\n}", "github": "", "project": "", "reviewers": "Z75J;Xx88;TfGG;Rf2R;jMVP", "pdf_size": 2486591, "rating": "6;6;7;7;7", "confidence": "2;4;4;4;4", "soundness": "3;3;3;3;3", "novelty": "2;2;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "24;93;53;262;40", "wc_strengths": "8;117;41;44;24", "wc_weaknesses": "53;249;70;112;73", "wc_questions": "3;42;37;28;84", "wc_limitations": "3;6;15;38;12", "wc_review": "91;507;216;484;233", "wc_reply_reviewers": "18;161;0;68;20", "wc_reply_authors": "26;397;0;26;26", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.8000000000000002 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 94.4, 86.85758458534292 ], "wc_strengths_avg": [ 46.8, 37.40267370122088 ], "wc_weaknesses_avg": [ 111.4, 71.46075846224976 ], "wc_questions_avg": [ 38.8, 26.286118009321957 ], "wc_limitations_avg": [ 14.8, 12.351518125315609 ], "wc_review_avg": [ 306.2, 162.3199310004782 ], "wc_reply_reviewers_avg": [ 53.4, 58.32872362738618 ], "wc_reply_authors_avg": [ 95.0, 151.33538911966363 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6123724356957944, "gs_citation": 75, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4133783827854296544&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "baai.ac.cn;tju.edu.cn;nankai.edu.cn;baai.ac.cn;njust.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Beijing Academy of Artificial Intelligence;Tianjin University;Nankai University;Nanjing University of Science and Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.baaic.cn;http://www.tju.edu.cn;http://www.nankai.edu.cn;http://www.nust.edu.cn/", "aff_unique_abbr": "BAAI;TJU;NKU;NUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Practical Contextual Bandits with Feedback Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70614", "id": "l6pYRbuHpO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/618c95f4557c15b253fb0e6f548ea0c0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=l6pYRbuHpO", "openreview": "https://openreview.net/forum?id=l6pYRbuHpO", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70614", "video": "https://nips.cc/virtual/2023/poster/70614", "author_site": "Mengxiao Zhang, Yuheng Zhang, Olga Vrousgou, Haipeng Luo, Paul Mineiro", "tldr": "", "abstract": "While contextual bandit has a mature theory, effectively leveraging different feedback patterns to enhance the pace of learning remains unclear. Bandits with feedback graphs, which interpolates between the full information and bandit regimes, provides a promising framework to mitigate the statistical complexity of learning. In this paper, we propose and analyze an approach to contextual bandits with feedback graphs based upon reduction to regression. The resulting algorithms are computationally practical and achieve established minimax rates, thereby reducing the statistical complexity in real-world applications.", "keywords": "Online learning with feedback graphs;Contextual Bandits;Practical algorithms", "primary_area": "", "supplementary_material": "/attachment/e3fb0119e858b2d188eb5ac8a56a603a9b08da3a.zip", "author": "Mengxiao Zhang;Yuheng Zhang;Olga Vrousgou;Haipeng Luo;Paul Mineiro", "authorids": "~Mengxiao_Zhang2;~Yuheng_Zhang1;~Olga_Vrousgou1;~Haipeng_Luo1;~Paul_Mineiro1", "gender": ";M;F;M;", "homepage": ";;https://www.microsoft.com/en-us/research/people/olvrousg/;https://haipeng-luo.net/;", "dblp": ";;;62/2576;35/5613", "google_scholar": ";IoEBLNYAAAAJ;;ct2hw4UAAAAJ;", "orcid": ";;;;", "linkedin": ";;olga-vrousgou-5a175a105;;", "or_profile": "~Mengxiao_Zhang2;~Yuheng_Zhang1;~Olga_Vrousgou1;~Haipeng_Luo1;~Paul_Mineiro1", "aff": ";University of Illinois, Urbana Champaign;;University of Southern California;", "aff_domain": ";cs.illinois.edu;;usc.edu;", "position": ";PhD student;;Assistant Professor;", "bibtex": "@inproceedings{\nzhang2023practical,\ntitle={Practical Contextual Bandits with Feedback Graphs},\nauthor={Mengxiao Zhang and Yuheng Zhang and Olga Vrousgou and Haipeng Luo and Paul Mineiro},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=l6pYRbuHpO}\n}", "github": "", "project": "", "reviewers": "towH;o1h9;WhSo;n8Cc;q6E5", "pdf_size": 2719684, "rating": "5;6;6;6;6", "confidence": "3;4;4;3;2", "soundness": "3;3;4;3;3", "novelty": "3;3;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "90;147;94;189;98", "wc_strengths": "22;71;105;99;73", "wc_weaknesses": "126;71;72;227;74", "wc_questions": "2;1;175;69;176", "wc_limitations": "5;2;1;28;3", "wc_review": "245;292;447;612;424", "wc_reply_reviewers": "0;0;33;50;16", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 123.6, 38.69160115580641 ], "wc_strengths_avg": [ 74.0, 29.32575659723036 ], "wc_weaknesses_avg": [ 114.0, 60.20963378065009 ], "wc_questions_avg": [ 84.6, 78.20639360052348 ], "wc_limitations_avg": [ 7.8, 10.186265262597475 ], "wc_review_avg": [ 404.0, 129.10305960743145 ], "wc_reply_reviewers_avg": [ 19.8, 19.415457759218555 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.1336306209562122, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13603106929721880199&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";cs.illinois.edu;;usc.edu;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Southern California", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.usc.edu", "aff_unique_abbr": "UIUC;USC", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Urbana-Champaign;Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Generative Category-level Object Pose Estimation via Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70613", "id": "l6ypbj6Nv5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ab59d149fc0c2c9039d3e3049f7914b1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=l6ypbj6Nv5", "openreview": "https://openreview.net/forum?id=l6ypbj6Nv5", "poster": "/media/PosterPDFs/NeurIPS%202023/70613.png?t=1701992479.072002", "slides": "https://nips.cc/virtual/2023/poster/70613", "video": "https://nips.cc/virtual/2023/poster/70613", "author_site": "Jiyao Zhang, Mingdong Wu, Hao Dong", "tldr": "", "abstract": "Object pose estimation plays a vital role in embodied AI and computer vision, enabling intelligent agents to comprehend and interact with their surroundings. Despite the practicality of category-level pose estimation, current approaches encounter challenges with partially observed point clouds, known as the multihypothesis issue. In this study, we propose a novel solution by reframing categorylevel object pose estimation as conditional generative modeling, departing from traditional point-to-point regression. Leveraging score-based diffusion models, we estimate object poses by sampling candidates from the diffusion model and aggregating them through a two-step process: filtering out outliers via likelihood estimation and subsequently mean-pooling the remaining candidates. To avoid the costly integration process when estimating the likelihood, we introduce an alternative method that distils an energy-based model from the original score-based model, enabling end-to-end likelihood estimation. Our approach achieves state-of-the-art performance on the REAL275 dataset, surpassing 50% and 60% on strict 5 \u25e6 2cm and 5 \u25e6 5cm metrics, respectively. Furthermore, our method demonstrates strong generalization to novel categories without the need for fine-tuning and can readily adapt to object pose tracking tasks, yielding comparable results to the current state-of-the-art baselines. Our checkpoints and demonstrations can be found at https://sites.google.com/view/genpose.", "keywords": "Category-Level Object Pose Estimation;Diffusion Model", "primary_area": "", "supplementary_material": "/attachment/171a2cfda3d14376e848a2b2c596ea39c6603b9f.zip", "author": "Jiyao Zhang;Mingdong Wu;Hao Dong", "authorids": "~Jiyao_Zhang1;~Mingdong_Wu1;~Hao_Dong3", "gender": "M;M;M", "homepage": "https://jiyao06.github.io;https://aaronanima.github.io/;https://zsdonghao.github.io", "dblp": ";315/5136;14/1525-3.html", "google_scholar": "nf1Q7P4AAAAJ;https://scholar.google.com/citations?hl=en;xLFL4sMAAAAJ", "orcid": ";;0000-0003-2261-9122", "linkedin": ";;", "or_profile": "~Jiyao_Zhang1;~Mingdong_Wu1;~Hao_Dong3", "aff": "Peking University;Center on Frontiers of Computing Studies,Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023generative,\ntitle={Generative Category-level Object Pose Estimation via Diffusion Models},\nauthor={Jiyao Zhang and Mingdong Wu and Hao Dong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=l6ypbj6Nv5}\n}", "github": "", "project": "", "reviewers": "W8Ay;sHLy;sAr7;4bwh;7PCs", "pdf_size": 5550062, "rating": "5;6;7;7;7", "confidence": "4;4;4;4;4", "soundness": "2;3;3;4;4", "novelty": "3;3;3;3;3", "presentation": "2;3;4;4;3", "wc_summary": "60;63;15;72;72", "wc_strengths": "46;38;107;74;124", "wc_weaknesses": "570;60;55;137;199", "wc_questions": "2;25;38;83;8", "wc_limitations": "1;7;17;12;9", "wc_review": "679;193;232;378;412", "wc_reply_reviewers": "424;148;60;0;40", "wc_reply_authors": "1593;517;18;0;25", "reply_reviewers": "1;2;1;0;1", "reply_authors": "5;4;2;1;2", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 56.4, 21.247117451550928 ], "wc_strengths_avg": [ 77.8, 33.45683786612238 ], "wc_weaknesses_avg": [ 204.2, 190.47666523750357 ], "wc_questions_avg": [ 31.2, 28.84024965217881 ], "wc_limitations_avg": [ 9.2, 5.30659966456864 ], "wc_review_avg": [ 378.8, 171.63612673327256 ], "wc_reply_reviewers_avg": [ 134.4, 152.6847733076223 ], "wc_reply_authors_avg": [ 430.6, 612.993507306562 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.8, 1.469693845669907 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3049502381159421123&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "TradeMaster: A Holistic Quantitative Trading Platform Empowered by Reinforcement Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73483", "id": "l7Ggnzaws5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b8f6f7f2ba4137124ac976286eacb611-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=l7Ggnzaws5", "openreview": "https://openreview.net/forum?id=l7Ggnzaws5", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73483", "video": "https://nips.cc/virtual/2023/poster/73483", "author_site": "Shuo Sun, Molei Qin, Wentao Zhang, Haochong Xia, Chuqiao Zong, Jie Ying, Yonggang Xie, Lingxuan Zhao, Xinrun Wang, Bo An", "tldr": "", "abstract": "The financial markets, which involve over \\$90 trillion market capitals, attract the attention of innumerable profit-seeking investors globally. Recent explosion of reinforcement learning in financial trading (RLFT) research has shown stellar performance on many quantitative trading tasks. However, it is still challenging to deploy reinforcement learning (RL) methods into real-world financial markets due to the highly composite nature of this domain, which entails design choices and interactions between components that collect financial data, conduct feature engineering, build market environments, make investment decisions, evaluate model behaviors and offers user interfaces. Despite the availability of abundant financial data and advanced RL techniques, a remarkable gap still exists between the potential and realized utilization of RL in financial trading. In particular, orchestrating an RLFT project lifecycle poses challenges in engineering (i.e. hard to build), benchmarking (i.e. hard to compare) and usability (i.e. hard to optimize, maintain and use). To overcome these challenges, we introduce TradeMaster, a holistic open-source RLFT platform that serves as a i) software toolkit, ii) empirical benchmark, and iii) user interface. Our ultimate goal is to provide infrastructures for transparent and reproducible RLFT research and facilitate their real-world deployment with industry impact. TradeMaster will be updated continuously and welcomes contributions from both RL and finance communities.", "keywords": "quantitative trading;reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/1ce3c389f4a1d8d9f054a5d9771bce071d5e71df.pdf", "author": "Shuo Sun;Molei Qin;Wentao Zhang;Haochong Xia;Chuqiao Zong;Jie Ying;Yonggang Xie;Lingxuan Zhao;Xinrun Wang;Bo An", "authorids": "~Shuo_Sun2;~Molei_Qin1;~Wentao_Zhang9;~Haochong_Xia1;~Chuqiao_Zong1;~Jie_Ying1;~Yonggang_Xie2;~Lingxuan_Zhao1;~Xinrun_Wang1;~Bo_An2", "gender": "M;M;M;M;;;M;M;M;M", "homepage": ";;;https://github.com/ZONG0004;;https://www.linkedin.com/in/yonggang-xie-883991205;http://zlxor.com;https://rainwangphy.github.io/;https://personal.ntu.edu.sg/boan/;https://github.com/qinmoelei", "dblp": "04/4493;41/3249-7;356/9950;369/7684;;;;199/6413;42/6178-1.html;339/6915", "google_scholar": "kGgWv8IAAAAJ;Zvtt7ZcAAAAJ;gC_mxt4AAAAJ;;;;;ROANfPUAAAAJ;PEEpuNwAAAAJ;", "orcid": ";0009-0008-2767-6998;0009-0004-2947-5947;0009-0007-0107-4211;0000-0002-2218-9751;;;;0000-0002-7064-7438;", "linkedin": ";;haochong-xia-b21415203/;;;;;;;", "or_profile": "~Shuo_Sun2;~Wentao_Zhang9;~Haochong_Xia1;~Chuqiao_Zong1;~Jie_Ying1;~Yonggang_Xie2;~Lingxuan_Zhao1;~Xinrun_Wang1;~Bo_An2;~Qin_Molei1", "aff": ";Nanyang Technological University;National Technological University;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;School of Computer Science and Engineering, Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": ";ntu.edu.sg;ntu.edu;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;scse.ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": ";Researcher;PhD student;PhD student;MS student;MS student;Undergrad student;Postdoc;Full Professor;PhD student", "bibtex": "@inproceedings{\nsun2023trademaster,\ntitle={TradeMaster: A Holistic Quantitative Trading Platform Empowered by Reinforcement Learning},\nauthor={Shuo Sun and Molei Qin and Wentao Zhang and Haochong Xia and Chuqiao Zong and Jie Ying and Yonggang Xie and Lingxuan Zhao and Xinrun Wang and Bo An},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=l7Ggnzaws5}\n}", "github": "", "project": "", "reviewers": "fVic;DHLR;k4uA;Vtxm", "pdf_size": 490932, "rating": "6;6;7;7", "confidence": "4;3;3;5", "wc_summary_and_contributions": "59;60;121;51", "wc_strengths": "113;56;92;39", "wc_improvement": "90;99;116;101", "wc_limitations": "90;4;54;35", "wc_correctness": "17;15;5;1", "wc_clarity": "1;5;6;1", "wc_relation_to_prior_work": "155;14;12;1", "wc_documentation": "3;1;5;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "529;255;412;231", "wc_reply_reviewers": "67;0;0;146", "wc_reply_authors": "1775;504;607;2127", "reply_reviewers": "2;0;0;2", "reply_authors": "6;1;1;6", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 72.75, 28.07467720206236 ], "wc_strengths_avg": [ 75.0, 29.111853256019273 ], "wc_improvement_avg": [ 101.5, 9.340770846134703 ], "wc_limitations_avg": [ 45.75, 31.163881337214722 ], "wc_correctness_avg": [ 9.5, 6.689544080129826 ], "wc_clarity_avg": [ 3.25, 2.277608394786075 ], "wc_relation_to_prior_work_avg": [ 45.5, 63.413326675076746 ], "wc_documentation_avg": [ 2.5, 1.6583123951777 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 356.75, 121.33502173733683 ], "wc_reply_reviewers_avg": [ 53.25, 60.13058705850127 ], "wc_reply_authors_avg": [ 1253.25, 709.696546067402 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 3.5, 2.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6335014780895104839&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";ntu.edu.sg;ntu.edu;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;scse.ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "author_num": 10, "aff_unique_index": "0;1;0;0;0;0;0;0;0", "aff_unique_norm": "Nanyang Technological University;National Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.ntu.edu", "aff_unique_abbr": "NTU;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0;0;0", "aff_country_unique": "Singapore;United States" }, { "title": "Visual Instruction Inversion: Image Editing via Image Prompting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70612", "id": "l9BsCh8ikK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1e75f7539cbde5de895fab238ff42519-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=l9BsCh8ikK", "openreview": "https://openreview.net/forum?id=l9BsCh8ikK", "poster": "/media/PosterPDFs/NeurIPS%202023/70612.png?t=1702527232.3909361", "slides": "https://nips.cc/virtual/2023/poster/70612", "video": "https://nips.cc/virtual/2023/poster/70612", "author_site": "Thao Nguyen, Yuheng Li, Utkarsh Ojha, Yong Jae Lee", "tldr": "", "abstract": "Text-conditioned image editing has emerged as a powerful tool for editing images.\nHowever, in many situations, language can be ambiguous and ineffective in describing specific image edits.\nWhen faced with such challenges, visual prompts can be a more informative and intuitive way to convey ideas.\nWe present a method for image editing via visual prompting.\nGiven pairs of example that represent the \"before\" and \"after\" images of an edit, our goal is to learn a text-based editing direction that can be used to perform the same edit on new images.\nWe leverage the rich, pretrained editing capabilities of text-to-image diffusion models by inverting visual prompts into editing instructions.\nOur results show that with just one example pair, we can achieve competitive results compared to state-of-the-art text-conditioned image editing frameworks.", "keywords": "image editing;diffusion models;visual prompting", "primary_area": "", "supplementary_material": "/attachment/0257cd38bdc754e574b50d2c304b53e6a794a873.zip", "author": "Thao Nguyen;Yuheng Li;Utkarsh Ojha;Yong Jae Lee", "authorids": "~Thao_Nguyen4;~Yuheng_Li1;~Utkarsh_Ojha1;~Yong_Jae_Lee2", "gender": "F;M;M;M", "homepage": "https://thaoshibe.github.io/;;https://utkarshojha.github.io/;https://pages.cs.wisc.edu/~yongjaelee/", "dblp": ";39/3954;194/5532;15/5471", "google_scholar": "P_6-46UAAAAJ;ZphbAXEAAAAJ;QGdSgfoAAAAJ;4GTpCxcAAAAJ", "orcid": ";;;", "linkedin": ";;utkarsh-ojha-16a20b11b/;", "or_profile": "~Thao_Nguyen4;~Yuheng_Li1;~Utkarsh_Ojha1;~Yong_Jae_Lee1", "aff": "Department of Computer Science, University of Wisconsin - Madison;University of Wisconsin - Madison;University of Wisconsin - Madison;University of Wisconsin - Madison", "aff_domain": "cs.wisc.edu;wisc.edu;wisc.edu;cs.wisc.edu", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nnguyen2023visual,\ntitle={Visual Instruction Inversion: Image Editing via Image Prompting},\nauthor={Thao Nguyen and Yuheng Li and Utkarsh Ojha and Yong Jae Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=l9BsCh8ikK}\n}", "github": "", "project": "", "reviewers": "h5yQ;vo15;VA7A;uc51;G8X8", "pdf_size": 26634926, "rating": "3;5;5;6;7", "confidence": "4;5;4;5;4", "soundness": "3;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "66;56;30;60;120", "wc_strengths": "87;20;59;80;152", "wc_weaknesses": "160;148;97;134;175", "wc_questions": "31;8;18;168;209", "wc_limitations": "33;22;19;50;25", "wc_review": "377;254;223;492;681", "wc_reply_reviewers": "548;70;19;155;51", "wc_reply_authors": "2431;92;14;22;8", "reply_reviewers": "2;1;1;1;1", "reply_authors": "5;2;2;2;2", "rating_avg": [ 5.2, 1.32664991614216 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 66.4, 29.486267990371385 ], "wc_strengths_avg": [ 79.6, 43.065531460786595 ], "wc_weaknesses_avg": [ 142.8, 26.588719412562916 ], "wc_questions_avg": [ 86.8, 84.35970602129906 ], "wc_limitations_avg": [ 29.8, 11.124747188138704 ], "wc_review_avg": [ 405.4, 167.60262527776825 ], "wc_reply_reviewers_avg": [ 168.6, 194.9672793060415 ], "wc_reply_authors_avg": [ 513.4, 959.2779784817329 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.6, 1.2000000000000002 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.18463723646899913, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1260405035214767511&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cs.wisc.edu;wisc.edu;wisc.edu;cs.wisc.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Wisconsin-Madison", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW-Madison", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Globally solving the Gromov-Wasserstein problem for point clouds in low dimensional Euclidean spaces", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70611", "id": "l9MbuqzlZt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/188409d2ad91db4fb13644d024d99074-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=l9MbuqzlZt", "openreview": "https://openreview.net/forum?id=l9MbuqzlZt", "poster": "/media/PosterPDFs/NeurIPS%202023/70611.png?t=1699531164.269138", "slides": "https://nips.cc/virtual/2023/poster/70611", "video": "https://nips.cc/virtual/2023/poster/70611", "author_site": "Martin Ryner, Jan Kronqvist, Johan Karlsson", "tldr": "", "abstract": "This paper presents a framework for computing the Gromov-Wasserstein problem between two sets of points in low dimensional spaces, where the discrepancy is the squared Euclidean norm.\nThe Gromov-Wasserstein problem is a generalization of the optimal transport problem that finds the assignment between two sets preserving pairwise distances as much as possible. This can be used to quantify the similarity between two formations or shapes, a common problem in AI and machine learning.\nThe problem can be formulated as a Quadratic Assignment Problem (QAP), which is in general computationally intractable even for small problems. Our framework addresses this challenge by reformulating the QAP as an optimization problem with a low-dimensional domain, leveraging the fact that the problem can be expressed as a concave quadratic optimization problem with low rank. The method scales well with the number of points, and it can be used to find the global solution for large-scale problems with thousands of points.\nWe compare the computational complexity of our approach with state-of-the-art methods on synthetic problems and apply it to a near-symmetrical problem which is of particular interest in computational biology.", "keywords": "Gromov-Wasserstein problem;QAP;Global optimization", "primary_area": "", "supplementary_material": "", "author": "Martin Ryner;Jan Kronqvist;Johan Karlsson", "authorids": "~Martin_Ryner1;~Jan_Kronqvist1;~Johan_Karlsson2", "gender": "M;M;", "homepage": "https://www.kth.se/profile/martinrr;https://www.kth.se/profile/jankr;", "dblp": "325/5435;;", "google_scholar": "jDfUkZkAAAAJ;hEdm6bAAAAAJ;", "orcid": "0000-0002-3316-770X;0000-0003-0299-5745;", "linkedin": ";;", "or_profile": "~Martin_Ryner1;~Jan_Kronqvist1;~Johan_Karlsson2", "aff": "KTH Royal Institute of Technology;KTH Royal Institute of Technology, Stockholm, Sweden;", "aff_domain": "kth.se;kth.se;", "position": "PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nryner2023globally,\ntitle={Globally solving the Gromov-Wasserstein problem for point clouds in low dimensional Euclidean spaces},\nauthor={Martin Ryner and Jan Kronqvist and Johan Karlsson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=l9MbuqzlZt}\n}", "github": "", "project": "", "reviewers": "xsEn;yHuE;G7H7;pM3y;FaYe", "pdf_size": 715658, "rating": "6;6;6;6;8", "confidence": "4;4;3;4;4", "soundness": "3;3;3;3;4", "novelty": "3;3;3;4;4", "presentation": "3;3;3;3;4", "wc_summary": "449;344;49;159;74", "wc_strengths": "145;18;62;99;123", "wc_weaknesses": "759;509;72;99;25", "wc_questions": "94;89;81;226;27", "wc_limitations": "33;19;17;53;1", "wc_review": "1480;979;281;636;250", "wc_reply_reviewers": "53;284;88;540;21", "wc_reply_authors": "2;47;14;117;0", "reply_reviewers": "1;1;1;2;1", "reply_authors": "2;2;3;2;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 215.0, 156.18578680533003 ], "wc_strengths_avg": [ 89.4, 45.08037266926705 ], "wc_weaknesses_avg": [ 292.8, 290.55560569364343 ], "wc_questions_avg": [ 103.4, 65.8258307961244 ], "wc_limitations_avg": [ 24.6, 17.453939383417143 ], "wc_review_avg": [ 725.2, 461.56750318886185 ], "wc_reply_reviewers_avg": [ 197.2, 194.3248826064228 ], "wc_reply_authors_avg": [ 36.0, 43.858864554386265 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.25000000000000006, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8552652835839811796&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "kth.se;kth.se;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "KTH Royal Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kth.se", "aff_unique_abbr": "KTH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stockholm", "aff_country_unique_index": "0;0", "aff_country_unique": "Sweden" }, { "title": "Unsupervised Learning for Solving the Travelling Salesman Problem", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70610", "id": "lAEc7aIW20", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/93b8618a9061f8a55825c13ecf28392b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lAEc7aIW20", "openreview": "https://openreview.net/forum?id=lAEc7aIW20", "poster": "/media/PosterPDFs/NeurIPS%202023/70610.png?t=1700457867.704287", "slides": "https://nips.cc/virtual/2023/poster/70610", "video": "https://nips.cc/virtual/2023/poster/70610", "author_site": "Yimeng Min, Yiwei Bai, Carla Gomes", "tldr": "", "abstract": "We propose UTSP, an Unsupervised Learning (UL) framework for solving the Travelling Salesman Problem (TSP). We train a Graph Neural Network (GNN) using a surrogate loss. The GNN outputs a heat map representing the probability for each edge to be part of the optimal path. We then apply local search to generate our final prediction based on the heat map. Our loss function consists of two parts: one pushes the model to find the shortest path and the other serves as a surrogate for the constraint that the route should form a Hamiltonian Cycle. \nExperimental results show that UTSP \noutperforms the existing data-driven TSP heuristics.\nOur approach is parameter efficient as well as data efficient: the model takes $\\sim$ 10\\% of the number of parameters and $\\sim$ 0.2\\% of training samples compared with Reinforcement Learning or Supervised Learning methods.", "keywords": "Combinatorial Optimization;Graph Neural Network;Travelling Salesman Problem", "primary_area": "", "supplementary_material": "", "author": "Yimeng Min;Yiwei Bai;Carla P Gomes", "authorids": "~Yimeng_Min1;~Yiwei_Bai1;~Carla_P_Gomes1", "gender": "M;;", "homepage": ";;", "dblp": "251/3350;206/6703;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yimeng_Min1;~Yiwei_Bai1;~Carla_P_Gomes1", "aff": "Cornell University;Cornell University;", "aff_domain": "cornell.edu;cornell.edu;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\nmin2023unsupervised,\ntitle={Unsupervised Learning for Solving the Travelling Salesman Problem},\nauthor={Yimeng Min and Yiwei Bai and Carla P Gomes},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lAEc7aIW20}\n}", "github": "", "project": "", "reviewers": "vHRb;su5g;X3D7;wEAA", "pdf_size": 859948, "rating": "4;6;6;8", "confidence": "5;4;4;3", "soundness": "2;2;3;4", "novelty": "1;3;3;4", "presentation": "3;2;3;2", "wc_summary": "74;103;100;193", "wc_strengths": "60;74;41;60", "wc_weaknesses": "429;427;316;385", "wc_questions": "166;69;5;95", "wc_limitations": "9;17;41;38", "wc_review": "738;690;503;771", "wc_reply_reviewers": "47;83;178;130", "wc_reply_authors": "0;1843;815;151", "reply_reviewers": "1;3;2;2", "reply_authors": "1;5;4;3", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 117.5, 45.0249930594109 ], "wc_strengths_avg": [ 58.75, 11.734031702701335 ], "wc_weaknesses_avg": [ 389.25, 45.79505977722925 ], "wc_questions_avg": [ 83.75, 57.68611184678683 ], "wc_limitations_avg": [ 26.25, 13.589977924926883 ], "wc_review_avg": [ 675.5, 103.67376717376484 ], "wc_reply_reviewers_avg": [ 109.5, 49.29756586282937 ], "wc_reply_authors_avg": [ 702.25, 726.4734595978025 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.25, 1.479019945774904 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9405171939322190134&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cornell.edu;cornell.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "DrugCLIP: Contrastive Protein-Molecule Representation Learning for Virtual Screening", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70609", "id": "lAbCgNcxm7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8bd31288ad8e9a31d519fdeede7ee47d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lAbCgNcxm7", "openreview": "https://openreview.net/forum?id=lAbCgNcxm7", "poster": "/media/PosterPDFs/NeurIPS%202023/70609.png?t=1701861100.6151557", "slides": "https://nips.cc/virtual/2023/poster/70609", "video": "https://nips.cc/virtual/2023/poster/70609", "author_site": "Bowen Gao, Bo Qiang, Haichuan Tan, Yinjun Jia, Minsi Ren, Minsi Lu, Jingjing Liu, Wei-Ying Ma, Yanyan Lan", "tldr": "", "abstract": "Virtual screening, which identifies potential drugs from vast compound databases to bind with a particular protein pocket, is a critical step in AI-assisted drug discovery. Traditional docking methods are highly time-consuming, and can only work with a restricted search library in real-life applications. Recent supervised learning approaches using scoring functions for binding-affinity prediction, although promising, have not yet surpassed docking methods due to their strong dependency on limited data with reliable binding-affinity labels. In this paper, we propose a novel contrastive learning framework, DrugCLIP, by reformulating virtual screening as a dense retrieval task and employing contrastive learning to align representations of binding protein pockets and molecules from a large quantity of pairwise data without explicit binding-affinity scores. We also introduce a biological-knowledge inspired data augmentation strategy to learn better protein-molecule representations. Extensive experiments show that DrugCLIP significantly outperforms traditional docking and supervised learning methods on diverse virtual screening benchmarks with highly reduced computation time, especially in zero-shot setting.", "keywords": "Application;Drug Discovery;Representation Learning;Dataset Augmentation", "primary_area": "", "supplementary_material": "/attachment/87ea6a675185d105e245ff204e97df07e8b5b002.zip", "author": "Bowen Gao;Bo Qiang;Haichuan Tan;Yinjun Jia;Minsi Ren;Minsi Lu;Jingjing Liu;Wei-Ying Ma;Yanyan Lan", "authorids": "~Bowen_Gao1;~Bo_Qiang1;~Haichuan_Tan1;~Yinjun_Jia1;~Minsi_Ren1;~Minsi_Lu1;~Jingjing_Liu2;~Wei-Ying_Ma2;~Yanyan_Lan2", "gender": "M;M;M;M;F;;M;;M", "homepage": "https://www.linkedin.com/in/bgao/;;https://github.com/thchuan2001;;;https://air.tsinghua.edu.cn/en/info/1046/1194.htm#:~:text=Jingjing%20Liu%20is%20Professor%2C%20Principal,CVPR%2C%20ACL%2C%20etc.);https://air.tsinghua.edu.cn/en/info/1046/1189.htm;;https://github.com/EBGU", "dblp": ";;;313/3162;;30/3008-1;m/WYMa.html;00/6040.html;", "google_scholar": "cTGzVe8AAAAJ;7FQInvgAAAAJ;;SodlECMAAAAJ;https://scholar.google.com.hk/citations?user=KBhKa_4AAAAJ;BzJ_GboAAAAJ;SToCbu8AAAAJ;;", "orcid": ";0000-0001-7428-4104;;;;;;;", "linkedin": ";;;https://www.linkedin.cn/incareer/in/ACoAADIyJsgBrvXiwCdovg-un2CwBcF8p4v5aas;minsi-lu-72957b263/;jingjing-liu-65703431/;wei-ying-ma-16a0171/;;", "or_profile": "~Bowen_Gao1;~Bo_Qiang1;~Haichuan_Tan1;~Minsi_Ren1;~Minsi_Lu1;~Jingjing_Liu2;~Wei-Ying_Ma2;~Yanyan_Lan2;~Yinjun_Harold_Jia1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Institute of Automation, Chinese Academy of Sciences;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;ia.ac.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "position": "Researcher;Intern;PhD student;MS student;Undergrad student;Full Professor;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\ngao2023drugclip,\ntitle={Drug{CLIP}: Contrasive Protein-Molecule Representation Learning for Virtual Screening},\nauthor={Bowen Gao and Bo Qiang and Haichuan Tan and Yinjun Jia and Minsi Ren and Minsi Lu and Jingjing Liu and Wei-Ying Ma and Yanyan Lan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lAbCgNcxm7}\n}", "github": "", "project": "", "reviewers": "8ynk;Bb5s;GQNT;cZpn", "pdf_size": 5148016, "rating": "4;4;7;8", "confidence": "4;5;4;5", "soundness": "3;2;3;4", "novelty": "2;2;3;4", "presentation": "3;2;3;3", "wc_summary": "134;67;58;141", "wc_strengths": "145;51;47;157", "wc_weaknesses": "203;358;84;8", "wc_questions": "210;185;75;2", "wc_limitations": "29;7;4;2", "wc_review": "721;668;268;310", "wc_reply_reviewers": "0;127;47;0", "wc_reply_authors": "0;1006;0;0", "reply_reviewers": "0;2;1;0", "reply_authors": "1;3;1;1", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 100.0, 37.71604433129222 ], "wc_strengths_avg": [ 100.0, 51.19570294468082 ], "wc_weaknesses_avg": [ 163.25, 132.18429369633898 ], "wc_questions_avg": [ 118.0, 84.05058000989642 ], "wc_limitations_avg": [ 10.5, 10.828203913853857 ], "wc_review_avg": [ 491.75, 204.15481258104106 ], "wc_reply_reviewers_avg": [ 43.5, 51.88689622631132 ], "wc_reply_authors_avg": [ 251.5, 435.6107781035726 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.14002800840280097, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13215166916177032709&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;ia.ac.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;1;0;0;0;0;0", "aff_unique_norm": "Tsinghua University;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ia.cas.cn", "aff_unique_abbr": "THU;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Normalization Layers Are All That Sharpness-Aware Minimization Needs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70608", "id": "lArwl3y9x6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/da909fc3893d272f26fd9db82e09d954-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lArwl3y9x6", "openreview": "https://openreview.net/forum?id=lArwl3y9x6", "poster": "/media/PosterPDFs/NeurIPS%202023/70608.png?t=1701962438.773245", "slides": "https://nips.cc/virtual/2023/poster/70608", "video": "https://nips.cc/virtual/2023/poster/70608", "author_site": "Maximilian Mueller, Tiffany Vlaar, David Rolnick, Matthias Hein", "tldr": "", "abstract": "Sharpness-aware minimization (SAM) was proposed to reduce sharpness of minima and has been shown to enhance generalization performance in various settings. In this work we show that perturbing only the affine normalization parameters (typically comprising 0.1% of the total parameters) in the adversarial step of SAM can outperform perturbing all of the parameters. This finding generalizes\nto different SAM variants and both ResNet (Batch Normalization) and Vision Transformer (Layer Normalization) architectures. We consider alternative sparse perturbation approaches and find that these do not achieve similar performance enhancement at such extreme sparsity levels, showing that this behaviour is unique to the normalization layers. Although our findings reaffirm the effectiveness\nof SAM in improving generalization performance, they cast doubt on whether this is solely caused by reduced sharpness.", "keywords": "sharpness-aware minimization;flatness;generalization;normalization layers", "primary_area": "", "supplementary_material": "/attachment/6b42797c322e12f5cc94da352385892dbeb0c9fb.pdf", "author": "Maximilian Mueller;Tiffany Joyce Vlaar;David Rolnick;Matthias Hein", "authorids": "~Maximilian_Mueller1;~Tiffany_Joyce_Vlaar1;~David_Rolnick1;~Matthias_Hein2", "gender": ";;M;M", "homepage": "https://mueller-mp.github.io/;;http://www.davidrolnick.com/;https://uni-tuebingen.de/de/164260", "dblp": "150/1874;248/2062.html;37/10718;97/1213-1", "google_scholar": "ii_xC_sAAAAJ;E1pdMzIAAAAJ;P_luG3cAAAAJ;0ZAb3tsAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Maximilian_Mueller1;~Tiffany_Joyce_Vlaar1;~David_Rolnick1;~Matthias_Hein2", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;McGill University;McGill University;University of T\u00fcbingen", "aff_domain": "uni-tuebingen.de;cs.mcgill.ca;cs.mcgill.ca;uni-tuebingen.de", "position": "PhD student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nmueller2023normalization,\ntitle={Normalization Layers Are All That Sharpness-Aware Minimization Needs},\nauthor={Maximilian Mueller and Tiffany Joyce Vlaar and David Rolnick and Matthias Hein},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lArwl3y9x6}\n}", "github": "", "project": "", "reviewers": "sERj;Ftd6;wExQ;dJUQ", "pdf_size": 739391, "rating": "3;4;7;9", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "2;2;3;4", "wc_summary": "38;64;83;133", "wc_strengths": "34;45;101;48", "wc_weaknesses": "131;120;93;99", "wc_questions": "46;45;34;2", "wc_limitations": "13;1;17;1", "wc_review": "262;275;328;283", "wc_reply_reviewers": "82;273;88;437", "wc_reply_authors": "180;919;20;717", "reply_reviewers": "1;1;1;4", "reply_authors": "2;3;2;4", "rating_avg": [ 5.75, 2.384848003542364 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 79.5, 34.77427209878016 ], "wc_strengths_avg": [ 57.0, 25.93260495977988 ], "wc_weaknesses_avg": [ 110.75, 15.400892831261439 ], "wc_questions_avg": [ 31.75, 17.80975856096876 ], "wc_limitations_avg": [ 8.0, 7.14142842854285 ], "wc_review_avg": [ 287.0, 24.829418035870273 ], "wc_reply_reviewers_avg": [ 220.0, 146.94046413428808 ], "wc_reply_authors_avg": [ 459.0, 370.380210054479 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=801189192470296136&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "uni-tuebingen.de;cs.mcgill.ca;cs.mcgill.ca;uni-tuebingen.de", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;McGill University;University of T\u00fcbingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.mcgill.ca;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;McGill;Uni T\u00fcbingen", "aff_campus_unique_index": "0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Germany;Canada" }, { "title": "Adversarial Learning for Feature Shift Detection and Correction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70607", "id": "lBhRTO2uWf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b3cd64ddad0a28da0f28a0e03a73ea7d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lBhRTO2uWf", "openreview": "https://openreview.net/forum?id=lBhRTO2uWf", "poster": "/media/PosterPDFs/NeurIPS%202023/70607.png?t=1701376748.4713726", "slides": "https://nips.cc/virtual/2023/poster/70607", "video": "https://nips.cc/virtual/2023/poster/70607", "author_site": "M\u00edriam Barrab\u00e9s, Daniel Mas Montserrat, Margarita Geleta, Xavier Gir\u00f3-i-Nieto, Alexander Ioannidis", "tldr": "", "abstract": "Data shift is a phenomenon present in many real-world applications, and while there are multiple methods attempting to detect shifts, the task of localizing and correcting the features originating such shifts has not been studied in depth. Feature shifts can occur in many datasets, including in multi-sensor data, where some sensors are malfunctioning, or in tabular and structured data, including biomedical, financial, and survey data, where faulty standardization and data processing pipelines can lead to erroneous features. In this work, we explore using the principles of adversarial learning, where the information from several discriminators trained to distinguish between two distributions is used to both detect the corrupted features and fix them in order to remove the distribution shift between datasets. We show that mainstream supervised classifiers, such as random forest or gradient boosting trees, combined with simple iterative heuristics, can localize and correct feature shifts, outperforming current statistical and neural network-based techniques. The code is available at https://github.com/AI-sandbox/DataFix.", "keywords": "feature shift detection;distribution shift;shift;data-centric AI", "primary_area": "", "supplementary_material": "/attachment/29acbd819ba1c8dd001d3c88f60a1e10ca88741a.zip", "author": "M\u00edriam Barrab\u00e9s;Daniel Mas Montserrat;Margarita Geleta;Xavier Gir\u00f3-i-Nieto;Alexander G Ioannidis", "authorids": "~M\u00edriam_Barrab\u00e9s1;~Daniel_Mas_Montserrat1;~Margarita_Geleta1;~Xavier_Gir\u00f3-i-Nieto1;~Alexander_G_Ioannidis1", "gender": "F;M;F;;M", "homepage": ";https://dmasmont.github.io/;https://margaritageleta.github.io;https://ai-page.org/;https://imatge.upc.edu/web/people/xavier-giro", "dblp": "329/2368;222/5714;266/1286;328/5925;12/7205", "google_scholar": "xNVHPQwAAAAJ;;jv-xu10AAAAJ;9Q8pE2YAAAAJ;M3ZUEc8AAAAJ", "orcid": "0009-0007-7379-1658;0000-0002-7946-7724;0000-0001-5823-9776;0000-0002-4735-7803;0000-0002-9935-5332", "linkedin": "m%C3%ADriam-barrab%C3%A9s-torrella/;;margarita-geleta/;alexgioannidis;xaviergiro/", "or_profile": "~M\u00edriam_Barrab\u00e9s1;~Daniel_Mas_Montserrat1;~Margarita_Geleta1;~Alexander_G_Ioannidis1;~Xavier_Giro-i-Nieto1", "aff": "Munster Technological University;Stanford University;Amazon;Stanford University;Amazon", "aff_domain": "mtu.ie;stanford.edu;amazon.com;stanford.edu;amazon.com", "position": "MS student;Postdoc;Researcher;Adjunct Professor;Researcher", "bibtex": "@inproceedings{\nbarrab{\\'e}s2023adversarial,\ntitle={Adversarial Learning for Feature Shift Detection and Correction},\nauthor={M{\\'\\i}riam Barrab{\\'e}s and Daniel Mas Montserrat and Margarita Geleta and Xavier Gir{\\'o}-i-Nieto and Alexander G Ioannidis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lBhRTO2uWf}\n}", "github": "", "project": "", "reviewers": "5LeU;MQtc;kRpA;dDZx", "pdf_size": 7253683, "rating": "5;6;7;7", "confidence": "3;2;4;4", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "100;102;381;160", "wc_strengths": "38;29;93;10", "wc_weaknesses": "339;207;414;14", "wc_questions": "126;115;198;94", "wc_limitations": "38;1;47;1", "wc_review": "641;454;1133;279", "wc_reply_reviewers": "137;187;462;0", "wc_reply_authors": "134;390;648;0", "reply_reviewers": "1;4;2;0", "reply_authors": "2;4;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 185.75, 115.27440088762118 ], "wc_strengths_avg": [ 42.5, 30.858548248418945 ], "wc_weaknesses_avg": [ 243.5, 151.81650107942812 ], "wc_questions_avg": [ 133.25, 39.111219617905036 ], "wc_limitations_avg": [ 21.75, 20.99255820523073 ], "wc_review_avg": [ 626.75, 319.08648905900105 ], "wc_reply_reviewers_avg": [ 196.5, 167.87867643033167 ], "wc_reply_authors_avg": [ 293.0, 248.276056034407 ], "reply_reviewers_avg": [ 1.75, 1.479019945774904 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6475037227320845724&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "mtu.ie;stanford.edu;amazon.com;stanford.edu;amazon.com", "author_num": 5, "aff_unique_index": "0;1;2;1;2", "aff_unique_norm": "Munster Technological University;Stanford University;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.mtu.ie;https://www.stanford.edu;https://www.amazon.com", "aff_unique_abbr": "MTU;Stanford;Amazon", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Ireland;United States" }, { "title": "Team-PSRO for Learning Approximate TMECor in Large Team Games via Cooperative Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70606", "id": "lCThtrJxoH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8e4ccc9ca6ae2225c4cbb7782ab48daf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lCThtrJxoH", "openreview": "https://openreview.net/forum?id=lCThtrJxoH", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70606", "video": "https://nips.cc/virtual/2023/poster/70606", "author_site": "Stephen McAleer, Gabriele Farina, Gaoyue Zhou, Mingzhi Wang, Yaodong Yang, Tuomas Sandholm", "tldr": "", "abstract": "Recent algorithms have achieved superhuman performance at a number of two-player zero-sum games such as poker and go. However, many real-world situations are multi-player games. Zero-sum two-team games, such as bridge and football, involve two teams where each member of the team shares the same reward with every other member of that team, and each team has the negative of the reward of the other team. A popular solution concept in this setting, called TMECor, assumes that teams can jointly correlate their strategies before play, but are not able to communicate during play. This setting is harder than two-player zero-sum games because each player on a team has different information and must use their public actions to signal to other members of the team. Prior works either have game-theoretic guarantees but only work in very small games, or are able to scale to large games but do not have game-theoretic guarantees. In this paper we introduce two algorithms: Team-PSRO, an extension of PSRO from two-player games to team games, and Team-PSRO Mix-and-Match which improves upon Team PSRO by better using population policies. In Team-PSRO, in every iteration both teams learn a joint best response to the opponent's meta-strategy via reinforcement learning. As the reinforcement learning joint best response approaches the optimal best response, Team-PSRO is guaranteed to converge to a TMECor. In experiments on Kuhn poker and Liar's Dice, we show that a tabular version of Team-PSRO converges to TMECor, and a version of Team PSRO using deep cooperative reinforcement learning beats self-play reinforcement learning in the large game of Google Research Football.", "keywords": "PSRO;team games;TMECor;populations;equilibrium;game theory;RL", "primary_area": "", "supplementary_material": "", "author": "Stephen Marcus McAleer;Gabriele Farina;Gaoyue Zhou;Mingzhi Wang;Yaodong Yang;Tuomas Sandholm", "authorids": "~Stephen_Marcus_McAleer1;~Gabriele_Farina1;~Gaoyue_Zhou1;mzwang_beijing@outlook.com;~Yaodong_Yang1;~Tuomas_Sandholm1", "gender": "M;M;F;;M;M", "homepage": "https://www.andrew.cmu.edu/user/smcaleer/;http://www.cs.cmu.edu/~gfarina/about/;https://gaoyuezhou.github.io/;;https://www.yangyaodong.com;http://www.cs.cmu.edu/~sandholm", "dblp": ";;;;170/1496-1;s/TuomasSandholm", "google_scholar": "iEFL4-YAAAAJ;sktDNcEAAAAJ;-1iyBukAAAAJ;;https://scholar.google.co.uk/citations?user=6yL0xw8AAAAJ;0DpK1EMAAAAJ", "orcid": ";;;;0000-0001-8132-5613;", "linkedin": "stephen-mcaleer/;;gaoyue-zhou/;;yaodong-yang;", "or_profile": "~Stephen_Marcus_McAleer1;~Gabriele_Farina1;~Gaoyue_Zhou1;mzwang_beijing@outlook.com;~Yaodong_Yang1;~Tuomas_Sandholm1", "aff": "Carnegie Mellon University;FAIR, Meta AI;Carnegie Mellon University;;Peking University;Carnegie Mellon University", "aff_domain": "cmu.edu;meta.com;cmu.edu;;pku.edu.cn;cmu.edu", "position": "Postdoc;Researcher;MS student;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nmcaleer2023teampsro,\ntitle={Team-{PSRO} for Learning Approximate {TMEC}or in Large Team Games via Cooperative Reinforcement Learning},\nauthor={Stephen Marcus McAleer and Gabriele Farina and Gaoyue Zhou and Mingzhi Wang and Yaodong Yang and Tuomas Sandholm},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lCThtrJxoH}\n}", "github": "", "project": "", "reviewers": "MAu9;eV3g;Gu1R;kqkC", "pdf_size": 613712, "rating": "4;5;6;6", "confidence": "4;3;3;4", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "64;75;69;54", "wc_strengths": "26;63;47;69", "wc_weaknesses": "290;148;62;341", "wc_questions": "174;289;17;127", "wc_limitations": "6;11;1;11", "wc_review": "560;586;196;602", "wc_reply_reviewers": "897;1024;0;86", "wc_reply_authors": "832;1319;0;224", "reply_reviewers": "3;3;0;1", "reply_authors": "4;4;1;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 65.5, 7.697402159170326 ], "wc_strengths_avg": [ 51.25, 16.64894891577243 ], "wc_weaknesses_avg": [ 210.25, 111.02786812327795 ], "wc_questions_avg": [ 151.75, 97.59962858535887 ], "wc_limitations_avg": [ 7.25, 4.14578098794425 ], "wc_review_avg": [ 486.0, 168.10116001979284 ], "wc_reply_reviewers_avg": [ 501.75, 461.9439224624565 ], "wc_reply_authors_avg": [ 593.75, 517.6883111487066 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8916661022815108477&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cmu.edu;meta.com;cmu.edu;;pku.edu.cn;cmu.edu", "author_num": 6, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Carnegie Mellon University;Meta;Peking University", "aff_unique_dep": ";Meta AI;", "aff_unique_url": "https://www.cmu.edu;https://meta.ai;http://www.pku.edu.cn", "aff_unique_abbr": "CMU;Meta AI;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Interactive Visual Reasoning under Uncertainty", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73482", "id": "lCuoehPWrB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/844f722dbbcb27933ff5baf58a1f00c8-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=lCuoehPWrB", "openreview": "https://openreview.net/forum?id=lCuoehPWrB", "poster": "/media/PosterPDFs/NeurIPS%202023/73482.png?t=1701403187.8272092", "slides": "https://nips.cc/virtual/2023/poster/73482", "video": "https://nips.cc/virtual/2023/poster/73482", "author_site": "Manjie Xu, Guangyuan Jiang, Wei Liang, Chi Zhang, Yixin Zhu", "tldr": "", "abstract": "One of the fundamental cognitive abilities of humans is to quickly resolve uncertainty by generating hypotheses and testing them via active trials. Encountering a novel phenomenon accompanied by ambiguous cause-effect relationships, humans make hypotheses against data, conduct inferences from observation, test their theory via experimentation, and correct the proposition if inconsistency arises. These iterative processes persist until the underlying mechanism becomes clear. In this work, we devise the **IVRE** (pronounced as *\"ivory\"*) environment for evaluating artificial agents' reasoning ability under uncertainty. **IVRE** is an interactive environment featuring rich scenarios centered around *Blicket* detection. Agents in **IVRE** are placed into environments with various ambiguous action-effect pairs and asked to determine each object's role. They are encouraged to propose effective and efficient experiments to validate their hypotheses based on observations and actively gather new information. The game ends when all uncertainties are resolved or the maximum number of trials is consumed. By evaluating modern artificial agents in **IVRE**, we notice a clear failure of today's learning methods compared to humans. Such inefficacy in interactive reasoning ability under uncertainty calls for future research in building human-like intelligence.", "keywords": "visual reasoning;uncertainty;few-shot;interactive", "primary_area": "", "supplementary_material": "/attachment/8253078adb1e9965b59b03cc9f9406b80bef1032.zip", "author": "Manjie Xu;Guangyuan Jiang;Wei Liang;Chi Zhang;Yixin Zhu", "authorids": "~Manjie_Xu1;~Guangyuan_Jiang1;~Wei_Liang1;~Chi_Zhang12;~Yixin_Zhu1", "gender": "M;M;F;;M", "homepage": "https://mjtsu.github.io;https://jiang.gy/;https://liangwei-bit.github.io/web/;;https://yzhu.io/", "dblp": "322/5851;322/5214;;;91/1103-1.html", "google_scholar": "j-WwUGEAAAAJ;3L79mEAAAAAJ;3p6YfBEAAAAJ;;qG9l6JEAAAAJ", "orcid": ";;;;0000-0001-7024-1545", "linkedin": ";;;;", "or_profile": "~Manjie_Xu1;~Guangyuan_Jiang1;~Wei_Liang1;~Chi_Zhang12;~Yixin_Zhu1", "aff": "Tencent AI Lab;Peking University;Beijing Institute of Technology;;Peking University", "aff_domain": "tencent.com;pku.edu.cn;bit.edu.cn;;pku.edu.cn", "position": "Intern;Undergrad student;Full Professor;;Assistant Professor", "bibtex": "@inproceedings{\nxu2023interactive,\ntitle={Interactive Visual Reasoning under Uncertainty},\nauthor={Manjie Xu and Guangyuan Jiang and Wei Liang and Chi Zhang and Yixin Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=lCuoehPWrB}\n}", "github": "", "project": "", "reviewers": "pTW5;7bBX;KESZ;svXQ;hE8F", "pdf_size": 4952592, "rating": "7;7;7;7;7", "confidence": "4;4;3;4;3", "wc_summary_and_contributions": "41;86;78;58;59", "wc_strengths": "35;155;103;45;37", "wc_improvement": "12;262;124;32;119", "wc_limitations": "85;67;6;2;17", "wc_correctness": "15;18;1;4;7", "wc_clarity": "1;8;29;4;16", "wc_relation_to_prior_work": "109;14;10;5;1", "wc_documentation": "11;52;21;4;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "310;663;373;155;258", "wc_reply_reviewers": "58;147;59;0;0", "wc_reply_authors": "474;1077;605;162;1001", "reply_reviewers": "1;1;1;0;0", "reply_authors": "2;3;2;1;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 64.4, 15.932357013323546 ], "wc_strengths_avg": [ 75.0, 47.17626521885767 ], "wc_improvement_avg": [ 109.8, 88.39547499730968 ], "wc_limitations_avg": [ 35.4, 33.99176370828675 ], "wc_correctness_avg": [ 9.0, 6.48074069840786 ], "wc_clarity_avg": [ 11.6, 10.05186549850325 ], "wc_relation_to_prior_work_avg": [ 27.8, 40.838217394984326 ], "wc_documentation_avg": [ 17.8, 18.432579851990337 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 351.8, 171.21378449178675 ], "wc_reply_reviewers_avg": [ 52.8, 53.879123972091456 ], "wc_reply_authors_avg": [ 663.8, 339.32957430792857 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2248305426440527854&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "tencent.com;pku.edu.cn;bit.edu.cn;;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Tencent;Peking University;Beijing Institute of Technology", "aff_unique_dep": "Tencent AI Lab;;", "aff_unique_url": "https://ai.tencent.com;http://www.pku.edu.cn;http://www.bit.edu.cn/", "aff_unique_abbr": "Tencent AI Lab;Peking U;BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "An information-theoretic quantification of the content of communication between brain regions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70605", "id": "lD8xaUWw24", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ca9eaef07eca2a50fc626cb929617b1c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lD8xaUWw24", "openreview": "https://openreview.net/forum?id=lD8xaUWw24", "poster": "/media/PosterPDFs/NeurIPS%202023/70605.png?t=1702043027.1289756", "slides": "https://nips.cc/virtual/2023/poster/70605", "video": "https://nips.cc/virtual/2023/poster/70605", "author_site": "Marco Celotto, Jan B\u00edm, Alejandro Tlaie, Vito De Feo, Alessandro Toso, Stefan Lemke, Daniel Chicharro, Hamed Nili, Malte Bieler, Ileana Hanganu-Opatz, Tobias Donner, Andrea Brovelli, Stefano Panzeri", "tldr": "", "abstract": "Quantifying the amount, content and direction of communication between brain regions is key to understanding brain function. Traditional methods to analyze brain activity based on the Wiener-Granger causality principle quantify the overall information propagated by neural activity between simultaneously recorded brain regions, but do not reveal the information flow about specific features of interest (such as sensory stimuli). Here, we develop a new information theoretic measure termed Feature-specific Information Transfer (FIT), quantifying how much information about a specific feature flows between two regions. FIT merges the Wiener-Granger causality principle with information-content specificity. We first derive FIT and prove analytically its key properties. We then illustrate and test them with simulations of neural activity, demonstrating that FIT identifies, within the total information propagated between regions, the information that is transmitted about specific features. We then analyze three neural datasets obtained with different recording methods, magneto- and electro-encephalography, and spiking activity, to demonstrate the ability of FIT to uncover the content and direction of information flow between brain regions beyond what can be discerned with traditional analytical methods. FIT can improve our understanding of how brain regions communicate by uncovering previously unaddressed feature-specific information flow.", "keywords": "Information transmission; Brain data analysis; Sensory processing; Partial information decomposition", "primary_area": "", "supplementary_material": "/attachment/1a871df78881c2cedbf6d834283d0d082a2aba7f.zip", "author": "Marco Celotto;Jan B\u00edm;Alejandro Tlaie;Vito De Feo;Alessandro Toso;Stefan M Lemke;Daniel Chicharro;Hamed Nili;Malte Bieler;Ileana Livia Hanganu-Opatz;Tobias H. Donner;Andrea Brovelli;Stefano Panzeri", "authorids": "~Marco_Celotto1;~Jan_B\u00edm1;~Alejandro_Tlaie1;~Vito_De_Feo1;~Alessandro_Toso1;~Stefan_M_Lemke1;~Daniel_Chicharro1;~Hamed_Nili1;~Malte_Bieler1;~Ileana_Livia_Hanganu-Opatz1;~Tobias_H._Donner1;~Andrea_Brovelli1;~Stefano_Panzeri1", "gender": "M;M;M;M;M;;M;M;;F;M;M;M", "homepage": ";;https://alejandrotlaie.net;https://www.essex.ac.uk/people/defeo60201/vito-de-feo;;;;https://www.uke.de/allgemein/arztprofile-und-wissenschaftlerprofile/wissenschaftlerprofilseite_hamed_nili.html;;http://www.opatzlab.com;https://tobiasdonner.net;https://brovelli.github.io/;https://www.uke.de/english/departments-institutes/institutes/department-of-excellence-for-neural-information-processing/team/index.html", "dblp": "327/1630;117/7283;;56/316;311/6386;327/1837;119/4993;;;;125/4685;13/11370;18/2874", "google_scholar": "https://scholar.google.com/citations?hl=en;xDXKucIAAAAJ;8K-5SF8AAAAJ;https://scholar.google.it/citations?user=56FwOjUAAAAJ;;sngRCNkAAAAJ;;https://scholar.google.co.uk/citations?user=QBgOje0AAAAJ;;;https://scholar.google.de/citations?user=ssqr44UAAAAJ;https://scholar.google.fr/citations?user=vsskO0AAAAAJ;https://scholar.google.it/citations?user=C-HCQ9cAAAAJ", "orcid": "0000-0002-0890-0703;0000-0003-2780-5610;;0000-0002-5596-2050;0000-0003-2289-3455;0000-0002-1721-5425;0000-0002-4038-258X;;;;0000-0002-7559-6019;0000-0002-5342-1330;0000-0003-1700-8909", "linkedin": ";jan-bim-jr/;;vitod/;;;;;;;https://linkedin.com/in/tobias-h-donner-51216213?original_referer=https%3A%2F%2Fwww.google.com%2F;;", "or_profile": "~Marco_Celotto1;~Jan_B\u00edm1;~Alejandro_Tlaie1;~Vito_De_Feo1;~Alessandro_Toso1;~Stefan_M_Lemke1;~Daniel_Chicharro1;~Hamed_Nili1;~Malte_Bieler1;~Ileana_Livia_Hanganu-Opatz1;~Tobias_H._Donner1;~Andrea_Brovelli1;~Stefano_Panzeri1", "aff": "University of Bologna;;Ernst Strungmann Institute for Neuroscience;University of Essex;University Medical Center Hamburg-Eppendorf;University of North Carolina at Chapel Hill;City, University of London;Universit\u00e4t Hamburg;;University Medical Center Hamburg-Eppendorf;University Medical Center Hamburg-Eppendorf;CNRS;University Medical Center Hamburg-Eppendorf", "aff_domain": "unibo.it;;esi-frankfurt.de;essex.ac.uk;uke.de;unc.edu;city.ac.uk;uni-hamburg.de;;uke.de;uke.de;cnrs.fr;uke.de", "position": "PhD student;;Postdoc;Lecturer;Postdoc;Postdoc;Lecturer;Postdoc;;Full Professor;Full Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\ncelotto2023an,\ntitle={An information-theoretic quantification of the content of communication between brain regions},\nauthor={Marco Celotto and Jan B{\\'\\i}m and Alejandro Tlaie and Vito De Feo and Alessandro Toso and Stefan M Lemke and Daniel Chicharro and Hamed Nili and Malte Bieler and Ileana Livia Hanganu-Opatz and Tobias H. Donner and Andrea Brovelli and Stefano Panzeri},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lD8xaUWw24}\n}", "github": "", "project": "", "reviewers": "bTa8;sTPd;TXME;LrX8", "pdf_size": 16265206, "rating": "6;6;7;7", "confidence": "4;4;3;3", "soundness": "3;3;3;4", "novelty": "3;4;3;3", "presentation": "3;3;3;3", "wc_summary": "54;226;145;109", "wc_strengths": "94;112;67;53", "wc_weaknesses": "67;557;34;139", "wc_questions": "11;250;81;70", "wc_limitations": "8;64;15;29", "wc_review": "234;1209;342;400", "wc_reply_reviewers": "21;485;30;25", "wc_reply_authors": "117;1057;167;110", "reply_reviewers": "1;1;1;1", "reply_authors": "3;5;3;3", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 133.5, 62.46799180380301 ], "wc_strengths_avg": [ 81.5, 22.96192500641007 ], "wc_weaknesses_avg": [ 199.25, 210.00758914858292 ], "wc_questions_avg": [ 103.0, 88.94661320140301 ], "wc_limitations_avg": [ 29.0, 21.575449010391416 ], "wc_review_avg": [ 546.25, 387.24822465700214 ], "wc_reply_reviewers_avg": [ 140.25, 199.06704272681603 ], "wc_reply_authors_avg": [ 362.75, 401.4276865140221 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.5, 0.8660254037844386 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6432022206233859943&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "email": "unibo.it;;esi-frankfurt.de;essex.ac.uk;uke.de;unc.edu;city.ac.uk;uni-hamburg.de;;uke.de;uke.de;cnrs.fr;uke.de", "author_num": 13, "aff_unique_index": "0;1;2;3;4;5;6;3;3;7;3", "aff_unique_norm": "University of Bologna;Ernst Strungmann Institute for Neuroscience;University of Essex;University Medical Center Hamburg-Eppendorf;University of North Carolina;City, University of London;University of Hamburg;Centre National de la Recherche Scientifique", "aff_unique_dep": ";;;;;;;", "aff_unique_url": "https://www.unibo.it;https://www.ernst-strungmann-institute.org;https://www.essex.ac.uk;https://www.uke.de;https://www.unc.edu;https://www.city.ac.uk;https://www.uni-hamburg.de;https://www.cnrs.fr", "aff_unique_abbr": "Unibo;;Essex;UMCH;UNC;City, University of London;UHH;CNRS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chapel Hill", "aff_country_unique_index": "0;1;2;1;3;2;1;1;1;4;1", "aff_country_unique": "Italy;Germany;United Kingdom;United States;France" }, { "title": "AutoGO: Automated Computation Graph Optimization for Neural Network Evolution", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70604", "id": "lDI3ZuyzM9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eb5d9195b201ec7ba66c8e20b396d349-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lDI3ZuyzM9", "openreview": "https://openreview.net/forum?id=lDI3ZuyzM9", "poster": "/media/PosterPDFs/NeurIPS%202023/70604.png?t=1701195382.229938", "slides": "https://nips.cc/virtual/2023/poster/70604", "video": "https://nips.cc/virtual/2023/poster/70604", "author_site": "Mohammad Salameh, Keith Mills, Negar Hassanpour, Fred Han, Shuting Zhang, Wei Lu, Shangling Jui, CHUNHUA ZHOU, Fengyu Sun, Di Niu", "tldr": "", "abstract": "Optimizing Deep Neural Networks (DNNs) to obtain high-quality models for efficient real-world deployment has posed multi-faceted challenges to machine learning engineers. Existing methods either search for neural architectures in heuristic design spaces or apply low-level adjustments to computation primitives to improve inference efficiency on hardware. We present Automated Graph Optimization (AutoGO), a framework to evolve neural networks in a low-level Computation Graph (CG) of primitive operations to improve both its performance and hardware friendliness. Through a tokenization scheme, AutoGO performs variable-sized segment mutations, making both primitive changes and larger-grained changes to CGs. We introduce our segmentation and mutation algorithms, efficient frequent segment mining technique, as well as a pretrained context-aware predictor to estimate the impact of segment replacements. Extensive experimental results show that AutoGO can automatically evolve several typical large convolutional networks to achieve significant task performance improvement and FLOPs reduction on a range of CV tasks, ranging from Classification, Semantic Segmentation, Human Pose Estimation, to Super Resolution, yet without introducing any newer primitive operations. We also demonstrate the lightweight deployment results of AutoGO-optimized super-resolution and denoising U-Nets on a cycle simulator for a Neural Processing Unit (NPU), achieving PSNR improvement and latency/power reduction simultaneously. Code available at https://github.com/Ascend-Research/AutoGO.", "keywords": "Neural Architecture Search;Optimization Framework;Performance Prediction", "primary_area": "", "supplementary_material": "/attachment/c6dd670cef7ca9f6608b201ee27d4d7225c3b212.zip", "author": "Mohammad Salameh;Keith G. Mills;Negar Hassanpour;Fred X. Han;Shuting Zhang;Wei Lu;SHANGLING JUI;CHUNHUA ZHOU;Fengyu Sun;Di Niu", "authorids": "~Mohammad_Salameh1;~Keith_G._Mills1;~Negar_Hassanpour1;~Fred_X._Han1;~Shuting_Zhang1;~Wei_Lu17;~SHANGLING_JUI1;~CHUNHUA_ZHOU1;~Fengyu_Sun1;~Di_Niu1", "gender": "M;M;F;;F;M;M;M;M;M", "homepage": ";https://kgmills.github.io/;http://webdocs.cs.ualberta.ca/~hassanpo/;;https://github.com/stzhang1994;;;;https://github.com/GideonsunDzgg;https://www.ualberta.ca/~dniu", "dblp": "91/9402;299/5864;165/8146;;;;;;;82/4953", "google_scholar": "https://scholar.google.ca/citations?hl=en;CBOD_ngAAAAJ;https://scholar.google.ca/citations?user=g7GMn3gAAAAJ;;;;;;;https://scholar.google.ca/citations?user=3kC5OogAAAAJ", "orcid": ";0000-0001-6054-1798;;;;;0000-0002-1047-4264;;;0000-0002-5250-7327", "linkedin": "mohammadsalameh;kgmills/;;;;wei-lu-8969221a0/;;chunhua-zhou-6002b218a/;;", "or_profile": "~Mohammad_Salameh1;~Keith_G._Mills1;~Negar_Hassanpour1;~Fred_X._Han1;~Shuting_Zhang1;~Wei_Lu17;~SHANGLING_JUI1;~CHUNHUA_ZHOU1;~Fengyu_Sun1;~Di_Niu1", "aff": "Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;;Huawei Technologies Ltd.;;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;University of Alberta", "aff_domain": "huawei.com;huawei.com;huawei.com;;huawei.com;;huawei.com;huawei.com;huawei.com;ualberta.ca", "position": "Principal Researcher;Research Intern;Researcher;;Researcher;;Principal Researcher;Researcher;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nsalameh2023autogo,\ntitle={Auto{GO}: Automated Computation Graph Optimization for Neural Network Evolution},\nauthor={Mohammad Salameh and Keith G. Mills and Negar Hassanpour and Fred X. Han and Shuting Zhang and Wei Lu and SHANGLING JUI and CHUNHUA ZHOU and Fengyu Sun and Di Niu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lDI3ZuyzM9}\n}", "github": "", "project": "", "reviewers": "AwFS;MAXz;A8eP;sA4y", "pdf_size": 753363, "rating": "3;4;7;7", "confidence": "5;4;4;4", "soundness": "1;3;3;4", "novelty": "1;2;3;4", "presentation": "2;2;3;4", "wc_summary": "87;100;74;148", "wc_strengths": "12;120;33;86", "wc_weaknesses": "352;737;72;138", "wc_questions": "1;88;112;101", "wc_limitations": "34;45;6;8", "wc_review": "486;1090;297;481", "wc_reply_reviewers": "0;861;41;20", "wc_reply_authors": "93;1657;371;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;4;2;1", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 102.25, 27.9676152004421 ], "wc_strengths_avg": [ 62.75, 42.65779530167962 ], "wc_weaknesses_avg": [ 324.75, 259.543228576667 ], "wc_questions_avg": [ 75.5, 43.84347157787577 ], "wc_limitations_avg": [ 23.25, 16.723860200324566 ], "wc_review_avg": [ 588.5, 299.38979608530417 ], "wc_reply_reviewers_avg": [ 230.5, 364.30790548655403 ], "wc_reply_authors_avg": [ 530.25, 664.6951839001092 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.7276068751089989, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14350776300835692004&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "huawei.com;huawei.com;huawei.com;;huawei.com;;huawei.com;huawei.com;huawei.com;ualberta.ca", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;0;1", "aff_unique_norm": "Huawei;University of Alberta", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;https://www.ualberta.ca", "aff_unique_abbr": "Huawei;UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;1", "aff_country_unique": "China;Canada" }, { "id": "lENeWLXn4W", "title": "A New Linear Scaling Rule for Differentially Private Hyperparameter Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "A major direction in differentially private (DP) machine learning is DP fine-tuning: pretraining a model on a source of public data and transferring the extracted features to downstream tasks.\nThis is an important setting because many industry deployments fine-tune publicly available feature extractors on proprietary data for downstream tasks.\nIn this paper we propose a new linear scaling rule, a hyperparameter optimization algorithm that privately selects hyperparameters to optimize the privacy-utility tradeoff.\nA key insight into the design of our method is that our new linear scaling rule jointly increases the step size and number of steps as $\\varepsilon$ increases.\nOur work is the first to obtain state-of-the-art performance on a suite of 16 benchmark tasks across computer vision and natural language processing for a wide range of $\\varepsilon \\in [0.01,8.0]$ while accounting for the privacy cost of hyperparameter tuning.", "keywords": "Differential privacy;deep learning", "primary_area": "", "supplementary_material": "/attachment/ec451f12154690d6214f5f388033238b8eaa4484.pdf", "author": "Ashwinee Panda;Xinyu Tang;Vikash Sehwag;Saeed Mahloujifar;Prateek Mittal", "authorids": "~Ashwinee_Panda1;~Xinyu_Tang1;~Vikash_Sehwag1;~Saeed_Mahloujifar1;~Prateek_Mittal1", "gender": "M;;M;M;", "homepage": "https://kiddyboots216.github.io/;;https://vsehwag.github.io/;https://www.cs.virginia.edu/~sm5fd/;http://www.princeton.edu/~pmittal/", "dblp": "270/1582.html;65/5518;187/5613;208/0825;", "google_scholar": "FM7JCgQAAAAJ;uwcdL7gAAAAJ;JAkeEG8AAAAJ;kW-hl3YAAAAJ;https://scholar.google.com.tw/citations?user=xTKD8J4AAAAJ", "orcid": ";;;;0000-0002-4057-0118", "linkedin": "https://linkedin.com/in/ashwineepanda;;;;", "or_profile": "~Ashwinee_Panda1;~Xinyu_Tang1;~Vikash_Sehwag1;~Saeed_Mahloujifar1;~Prateek_Mittal1", "aff": "Princeton University;Princeton University;Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu;princeton.edu;princeton.edu", "position": "PhD student;PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@misc{\npanda2023a,\ntitle={A New Linear Scaling Rule for Differentially Private Hyperparameter Optimization},\nauthor={Ashwinee Panda and Xinyu Tang and Vikash Sehwag and Saeed Mahloujifar and Prateek Mittal},\nyear={2023},\nurl={https://openreview.net/forum?id=lENeWLXn4W}\n}", "github": "", "project": "", "reviewers": "KKeT;kJWb;tXcJ;BCCV", "site": "https://openreview.net/forum?id=lENeWLXn4W", "pdf_size": 383634, "rating": "3;5;5;6", "confidence": "4;3;3;4", "soundness": "1;2;2;3", "novelty": "3;2;3;4", "presentation": "3;2;1;3", "wc_summary": "24;97;142;119", "wc_strengths": "47;19;127;108", "wc_weaknesses": "351;226;698;173", "wc_questions": "35;177;218;48", "wc_limitations": "5;16;53;44", "wc_review": "462;535;1238;492", "wc_reply_reviewers": "80;423;183;121", "wc_reply_authors": "561;1649;115;945", "reply_reviewers": "1;5;1;1", "reply_authors": "3;6;2;3", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 95.5, 44.24081825644729 ], "wc_strengths_avg": [ 75.25, 43.91113184603649 ], "wc_weaknesses_avg": [ 362.0, 204.47126937543084 ], "wc_questions_avg": [ 119.5, 79.46854723725608 ], "wc_limitations_avg": [ 29.5, 19.653244007033546 ], "wc_review_avg": [ 681.75, 322.1974355888017 ], "wc_reply_reviewers_avg": [ 201.75, 132.89728176302177 ], "wc_reply_authors_avg": [ 817.5, 562.7937011019225 ], "reply_reviewers_avg": [ 2.0, 1.7320508075688772 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 37, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2294157338705618, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vBWXHH-o5bYJ:scholar.google.com/&scioq=A+New+Linear+Scaling+Rule+for+Differentially+Private+Hyperparameter+Optimization&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "The CLIP Model is Secretly an Image-to-Prompt Converter", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70603", "id": "lHa7gFbmvS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b00ef390dcd5f147fd7c5c2bb35f09be-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lHa7gFbmvS", "openreview": "https://openreview.net/forum?id=lHa7gFbmvS", "poster": "/media/PosterPDFs/NeurIPS%202023/70603.png?t=1699330183.4786546", "slides": "https://nips.cc/virtual/2023/poster/70603", "video": "https://nips.cc/virtual/2023/poster/70603", "author_site": "Yuxuan Ding, Chunna Tian, Haoxuan Ding, Lingqiao Liu", "tldr": "", "abstract": "The Stable Diffusion model is a prominent text-to-image generation model that relies on a text prompt as its input, which is encoded using the Contrastive Language-Image Pre-Training (CLIP). However, text prompts have limitations when it comes to incorporating implicit information from reference images. Existing methods have attempted to address this limitation by employing expensive training procedures involving millions of training samples for image-to-image generation. In contrast, this paper demonstrates that the CLIP model, as utilized in Stable Diffusion, inherently possesses the ability to instantaneously convert images into text prompts. Such an image-to-prompt conversion can be achieved by utilizing a linear projection matrix that is calculated in a closed form. Moreover, the paper showcases that this capability can be further enhanced by either utilizing a small amount of similar-domain training data (approximately 100 images) or incorporating several online training steps (around 30 iterations) on the reference images. By leveraging these approaches, the proposed method offers a simple and flexible solution to bridge the gap between images and text prompts. This methodology can be applied to various tasks such as image variation and image editing, facilitating more effective and seamless interaction between images and textual prompts.", "keywords": "Diffusion Model;CLIP model;Image Variation;Customized Generation", "primary_area": "", "supplementary_material": "/attachment/db5ec3a27d0d996c2fbdc269468d13d84175b96e.pdf", "author": "Yuxuan Ding;Chunna Tian;Haoxuan Ding;Lingqiao Liu", "authorids": "~Yuxuan_Ding1;~Chunna_Tian1;~Haoxuan_Ding1;~Lingqiao_Liu3", "gender": "M;F;M;M", "homepage": ";;;https://sites.google.com/site/lingqiaoliu83/", "dblp": ";23/3700;254/9727;45/7776", "google_scholar": "uOii3uEAAAAJ;;;Y2xu62UAAAAJ", "orcid": ";;0000-0001-5444-7332;", "linkedin": "yuxuan-ding-007563318/;;;", "or_profile": "~Yuxuan_Ding1;~Chunna_Tian1;~Haoxuan_Ding1;~Lingqiao_Liu3", "aff": "Xidian University;Xidian University ;Northwest Polytechnical University Xi'an;The University of Adelaide", "aff_domain": "xidian.edu.cn;xidian.edu.cn;nwpu.edu.cn;adelaide.edu.au", "position": "PhD student;Full Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nding2023the,\ntitle={The {CLIP} Model is Secretly an Image-to-Prompt Converter},\nauthor={Yuxuan Ding and Chunna Tian and Haoxuan Ding and Lingqiao Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lHa7gFbmvS}\n}", "github": "", "project": "", "reviewers": "hEWJ;aD4y;rWok;1WWU", "pdf_size": 10401351, "rating": "5;5;5;7", "confidence": "5;3;4;3", "soundness": "3;3;3;4", "novelty": "2;2;2;4", "presentation": "2;2;3;4", "wc_summary": "37;78;87;100", "wc_strengths": "46;147;75;178", "wc_weaknesses": "145;532;115;34", "wc_questions": "16;2;52;28", "wc_limitations": "8;29;2;28", "wc_review": "252;788;331;368", "wc_reply_reviewers": "12;255;18;17", "wc_reply_authors": "85;328;93;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 75.5, 23.56374333589636 ], "wc_strengths_avg": [ 111.5, 53.16248677404021 ], "wc_weaknesses_avg": [ 206.5, 192.26349107409862 ], "wc_questions_avg": [ 24.5, 18.350749303502567 ], "wc_limitations_avg": [ 16.75, 11.94518731540029 ], "wc_review_avg": [ 434.75, 208.2082791341401 ], "wc_reply_reviewers_avg": [ 75.5, 103.65929770165337 ], "wc_reply_authors_avg": [ 126.5, 121.91082806707531 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13264900683062250627&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "xidian.edu.cn;xidian.edu.cn;nwpu.edu.cn;adelaide.edu.au", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Xidian University;Northwest Polytechnical University;University of Adelaide", "aff_unique_dep": ";;", "aff_unique_url": "http://www.xidian.edu.cn/;http://www.nwpu.edu.cn;https://www.adelaide.edu.au", "aff_unique_abbr": "Xidian;NWPU;Adelaide", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;Australia" }, { "title": "Gold-YOLO: Efficient Object Detector via Gather-and-Distribute Mechanism", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70602", "id": "lJDoPAjkCV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a0673542a242759ea637972f053b2e0b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lJDoPAjkCV", "openreview": "https://openreview.net/forum?id=lJDoPAjkCV", "poster": "/media/PosterPDFs/NeurIPS%202023/70602.png?t=1698075176.0605502", "slides": "https://nips.cc/virtual/2023/poster/70602", "video": "https://nips.cc/virtual/2023/poster/70602", "author_site": "Chengcheng Wang, Wei He, Ying Nie, Jianyuan Guo, Chuanjian Liu, Yunhe Wang, Kai Han", "tldr": "", "abstract": "In the past years, YOLO-series models have emerged as the leading approaches in the area of real-time object detection. Many studies pushed up the baseline to a higher level by modifying the architecture, augmenting data and designing new losses. However, we find previous models still suffer from information fusion problem, although Feature Pyramid Network (FPN) and Path Aggregation Network (PANet) have alleviated this. Therefore, this study provides an advanced Gatherand-Distribute mechanism (GD) mechanism, which is realized with convolution and self-attention operations. This new designed model named as Gold-YOLO, which boosts the multi-scale feature fusion capabilities and achieves an ideal balance between latency and accuracy across all model scales. Additionally, we implement MAE-style pretraining in the YOLO-series for the first time, allowing YOLOseries models could be to benefit from unsupervised pretraining. Gold-YOLO-N attains an outstanding 39.9% AP on the COCO val2017 datasets and 1030 FPS on a T4 GPU, which outperforms the previous SOTA model YOLOv6-3.0-N with similar FPS by +2.4%. The PyTorch code is available at https://github.com/huawei-noah/Efficient-Computing/tree/master/Detection/Gold-YOLO, and the MindSpore code is available at https://gitee.com/mindspore/models/tree/master/research/cv/Gold_YOLO.", "keywords": "YOLO;object detection;computer vision", "primary_area": "", "supplementary_material": "/attachment/ff7c9d0b907107f56441ffb9c474dab569eba821.pdf", "author": "Chengcheng Wang;Wei He;Ying Nie;Jianyuan Guo;Chuanjian Liu;Yunhe Wang;Kai Han", "authorids": "~Chengcheng_Wang1;~Wei_He10;~Ying_Nie1;~Jianyuan_Guo1;~Chuanjian_Liu1;~Yunhe_Wang1;~Kai_Han2", "gender": "M;;M;M;M;M;M", "homepage": ";;;https://ggjy.github.io/;;https://www.wangyunhe.site/;https://iamhankai.github.io", "dblp": ";;;190/0258;239/4010;63/8217-1;51/4757-2", "google_scholar": "OfmE9XUAAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;BHfo1zkAAAAJ;https://scholar.google.com.sg/citations?user=isizOkYAAAAJ;vThoBVcAAAAJ", "orcid": ";;;;;0000-0002-0142-509X;0000-0002-9761-2702", "linkedin": ";;;;;;", "or_profile": "~Chengcheng_Wang1;~Wei_He10;~Ying_Nie1;~Jianyuan_Guo1;~Chuanjian_Liu1;~Yunhe_Wang1;~Kai_Han2", "aff": "Huawei Technologies Ltd.;;Huawei Noah's Ark Lab;University of Sydney;Huawei Technologies Ltd.;Huawei Noah's Ark Lab;Institute of Software, Chinese Academy of Sciences", "aff_domain": "huawei.com;;huawei.com;usyd.edu.au;huawei.com;huawei.com;ios.ac.cn", "position": "Researcher;;Researcher;PhD student;Researcher;Principal Researcher;PhD student", "bibtex": "@inproceedings{\nwang2023goldyolo,\ntitle={Gold-{YOLO}: Efficient Object Detector via Gather-and-Distribute Mechanism},\nauthor={Chengcheng Wang and Wei He and Ying Nie and Jianyuan Guo and Chuanjian Liu and Yunhe Wang and Kai Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lJDoPAjkCV}\n}", "github": "", "project": "", "reviewers": "c8e6;YJPg;CoJm;3APV;bFVa", "pdf_size": 34980227, "rating": "5;5;5;6;6", "confidence": "4;3;5;4;3", "soundness": "3;2;3;3;2", "novelty": "2;3;2;3;3", "presentation": "3;2;4;3;2", "wc_summary": "35;61;51;71;104", "wc_strengths": "37;41;68;86;26", "wc_weaknesses": "137;125;472;40;217", "wc_questions": "5;64;93;32;27", "wc_limitations": "1;1;42;19;11", "wc_review": "215;292;726;248;385", "wc_reply_reviewers": "0;0;156;0;0", "wc_reply_authors": "38;38;74;38;38", "reply_reviewers": "0;0;1;0;0", "reply_authors": "2;2;3;2;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 64.4, 23.09632005320328 ], "wc_strengths_avg": [ 51.6, 22.059918404200864 ], "wc_weaknesses_avg": [ 198.2, 147.95323585511738 ], "wc_questions_avg": [ 44.2, 30.837639338963676 ], "wc_limitations_avg": [ 14.8, 15.184202316881846 ], "wc_review_avg": [ 373.2, 185.41995577607068 ], "wc_reply_reviewers_avg": [ 31.2, 62.39999999999999 ], "wc_reply_authors_avg": [ 45.2, 14.4 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.32732683535398854, "gs_citation": 359, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8480207964516979155&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "huawei.com;;huawei.com;usyd.edu.au;huawei.com;huawei.com;ios.ac.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;0;2", "aff_unique_norm": "Huawei;University of Sydney;Chinese Academy of Sciences", "aff_unique_dep": "Huawei Technologies;;Institute of Software", "aff_unique_url": "https://www.huawei.com;https://www.sydney.edu.au;http://www.ios.ac.cn", "aff_unique_abbr": "Huawei;USYD;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;Australia" }, { "title": "Unlimiformer: Long-Range Transformers with Unlimited Length Input", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70601", "id": "lJWUJWLCJo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6f9806a5adc72b5b834b27e4c7c0df9b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lJWUJWLCJo", "openreview": "https://openreview.net/forum?id=lJWUJWLCJo", "poster": "/media/PosterPDFs/NeurIPS%202023/70601.png?t=1702278516.0184972", "slides": "https://nips.cc/virtual/2023/poster/70601", "video": "https://nips.cc/virtual/2023/poster/70601", "author_site": "Amanda Bertsch, Uri Alon, Graham Neubig, Matthew Gormley", "tldr": "", "abstract": "Since the proposal of transformers, these models have been limited to bounded input lengths, because of their need to attend to every token in the input. In this work, we propose Unlimiformer: a general approach that wraps any existing pretrained encoder-decoder transformer, and offloads the cross-attention computation to a single $k$-nearest-neighbor ($k$NN) index, while the returned $k$NN distances are the attention dot-product scores. This $k$NN index can be kept on either the GPU or CPU memory and queried in sub-linear time; this way, we can index practically unlimited input sequences, while every attention head in every decoder layer retrieves its top-$k$ keys, instead of attending to every key. We evaluate Unlimiformer on several long-document and book-summarization benchmarks, showing that it can process even **500k** token-long inputs from the BookSum dataset, without any input truncation at test time. We demonstrate that Unlimiformer improves pretrained models such as BART and Longformer by extending them to unlimited inputs without additional learned weights and without modifying their code. Our code and models are publicly available at https://github.com/abertsch72/unlimiformer , and support LLaMA-2 as well.", "keywords": "retrieval augmentation;summarization;long-context;generation;long-input;encoder-decoder;transformers;language models;natural language generation;natural language processing;deep learning;neural networks", "primary_area": "", "supplementary_material": "", "author": "Amanda Bertsch;Uri Alon;Graham Neubig;Matthew R. Gormley", "authorids": "~Amanda_Bertsch1;~Uri_Alon1;~Graham_Neubig1;~Matthew_R._Gormley1", "gender": "F;M;M;M", "homepage": "https://www.cs.cmu.edu/~abertsch/;https://urialon.ml/;http://phontron.com;http://www.cs.cmu.edu/~mgormley/", "dblp": "305/7615;40/2257-2;03/8155;116/0475", "google_scholar": "G1Jw4CYAAAAJ;https://scholar.google.co.il/citations?user=QBn7vq8AAAAJ;wlosgkoAAAAJ;GU0SZmYAAAAJ", "orcid": "0000-0002-1368-1111;;;", "linkedin": "amandabertsch;https://linkedin.com/in/urialon1/;;", "or_profile": "~Amanda_Bertsch1;~Uri_Alon1;~Graham_Neubig1;~Matthew_R._Gormley1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;3M", "aff_domain": "cmu.edu;cmu.edu;cmu.edu;mmm.com", "position": "PhD student;Postdoc;Associate Professor;consultant", "bibtex": "@inproceedings{\nbertsch2023unlimiformer,\ntitle={Unlimiformer: Long-Range Transformers with Unlimited Length Input},\nauthor={Amanda Bertsch and Uri Alon and Graham Neubig and Matthew R. Gormley},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lJWUJWLCJo}\n}", "github": "", "project": "", "reviewers": "4zy3;VEz6;cHBL;ym6S;EQX8", "pdf_size": 372775, "rating": "5;6;7;7;7", "confidence": "4;3;3;4;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;2;3;3", "wc_summary": "58;31;161;76;144", "wc_strengths": "73;38;80;33;45", "wc_weaknesses": "139;122;47;253;102", "wc_questions": "22;16;672;124;196", "wc_limitations": "1;1;39;4;13", "wc_review": "293;208;999;490;500", "wc_reply_reviewers": "53;18;157;272;0", "wc_reply_authors": "32;25;89;671;0", "reply_reviewers": "1;1;2;3;0", "reply_authors": "2;2;3;4;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 94.0, 50.15575739633487 ], "wc_strengths_avg": [ 53.8, 19.051509126575773 ], "wc_weaknesses_avg": [ 132.6, 67.69519923894161 ], "wc_questions_avg": [ 206.0, 242.46071846796133 ], "wc_limitations_avg": [ 11.6, 14.38888459888396 ], "wc_review_avg": [ 498.0, 274.661245901201 ], "wc_reply_reviewers_avg": [ 100.0, 101.75067567343227 ], "wc_reply_authors_avg": [ 163.4, 255.46553583604972 ], "reply_reviewers_avg": [ 1.4, 1.019803902718557 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.10206207261596584, "gs_citation": 138, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3043498043356524323&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "cmu.edu;cmu.edu;cmu.edu;mmm.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Carnegie Mellon University;3M Company", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.3m.com", "aff_unique_abbr": "CMU;3M", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "PDP: Parameter-free Differentiable Pruning is All You Need", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70600", "id": "lLztVBaBVU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8f9f4eb32b9081a90f2a0b2627eb2a24-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lLztVBaBVU", "openreview": "https://openreview.net/forum?id=lLztVBaBVU", "poster": "/media/PosterPDFs/NeurIPS%202023/70600.png?t=1697000886.2061157", "slides": "https://nips.cc/virtual/2023/poster/70600", "video": "https://nips.cc/virtual/2023/poster/70600", "author_site": "Minsik Cho, Saurabh Adya, Devang Naik", "tldr": "", "abstract": "DNN pruning is a popular way to reduce the size of a model, improve the inference\nlatency, and minimize the power consumption on DNN accelerators. However,\nexisting approaches might be too complex, expensive or ineffective to apply to\na variety of vision/language tasks, DNN architectures and to honor structured\npruning constraints. In this paper, we propose an efficient yet effective train-time\npruning scheme, Parameter-free Differentiable Pruning (PDP), which offers state-\nof-the-art qualities in model size, accuracy, and training cost. PDP uses a dynamic\nfunction of weights during training to generate soft pruning masks for the weights\nin a parameter-free manner for a given pruning target. While differentiable, the\nsimplicity and efficiency of PDP make it universal enough to deliver state-of-the-art\nrandom/structured/channel pruning results on various vision and natural language\ntasks. For example, for MobileNet-v1, PDP can achieve 68.2% top-1 ImageNet1k\naccuracy at 86.6% sparsity, which is 1.7% higher accuracy than those from the\nstate-of-the-art algorithms. Also, PDP yields over 83.1% accuracy on Multi-Genre\nNatural Language Inference with 90% sparsity for BERT, while the next best from\nthe existing techniques shows 81.5% accuracy. In addition, PDP can be applied to\nstructured pruning, such as N:M pruning and channel pruning. For 1:4 structured\npruning of ResNet18, PDP improved the top-1 ImageNet1k accuracy by over 3.6%\nover the state-of-the-art. For channel pruning of ResNet50, PDP reduced the top-1\nImageNet1k accuracy by 0.6% from the state-of-the-art.", "keywords": "pruning;cnn;transformers", "primary_area": "", "supplementary_material": "", "author": "Minsik Cho;Saurabh Adya;Devang Naik", "authorids": "~Minsik_Cho1;~Saurabh_Adya1;~Devang_Naik1", "gender": "M;M;M", "homepage": ";;", "dblp": ";https://dblp.uni-trier.de/pid/230/3574.html;66/9317", "google_scholar": "_AZys7EAAAAJ;greEG1EAAAAJ;wIQcv5sAAAAJ", "orcid": ";;", "linkedin": ";saurabh-adya-b126a3;https://linkedin.com/in/denaik", "or_profile": "~Minsik_Cho1;~Saurabh_Adya1;~Devang_Naik1", "aff": ";;", "aff_domain": ";;", "position": ";;", "bibtex": "@inproceedings{\ncho2023pdp,\ntitle={{PDP}: Parameter-free Differentiable Pruning is All You Need},\nauthor={Minsik Cho and Saurabh Adya and Devang Naik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lLztVBaBVU}\n}", "github": "", "project": "", "reviewers": "tv9j;qvwL;v4x5;8xNe;R2q9", "pdf_size": 8652984, "rating": "5;5;6;7;7", "confidence": "4;2;4;4;4", "soundness": "3;2;3;3;3", "novelty": "3;2;3;2;3", "presentation": "1;3;3;3;3", "wc_summary": "80;94;39;49;108", "wc_strengths": "63;49;46;42;85", "wc_weaknesses": "328;56;133;129;51", "wc_questions": "23;5;4;5;47", "wc_limitations": "10;14;50;3;8", "wc_review": "504;218;272;228;299", "wc_reply_reviewers": "44;0;14;0;12", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.6, 0.8000000000000002 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 74.0, 26.237377917772193 ], "wc_strengths_avg": [ 57.0, 15.684387141358123 ], "wc_weaknesses_avg": [ 139.4, 100.48800923493312 ], "wc_questions_avg": [ 16.8, 16.690116836020053 ], "wc_limitations_avg": [ 17.0, 16.87601848778319 ], "wc_review_avg": [ 304.2, 104.13529660974707 ], "wc_reply_reviewers_avg": [ 14.0, 16.099689437998485 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5590169943749473, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15939468504750396916&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";;", "author_num": 3 }, { "title": "On the Interplay between Social Welfare and Tractability of Equilibria", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70599", "id": "lM0xyViO90", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9c6d29852a049218d70108bbf5c48dfe-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lM0xyViO90", "openreview": "https://openreview.net/forum?id=lM0xyViO90", "poster": "/media/PosterPDFs/NeurIPS%202023/70599.png?t=1701644670.2521014", "slides": "https://nips.cc/virtual/2023/poster/70599", "video": "https://nips.cc/virtual/2023/poster/70599", "author_site": "Ioannis Anagnostides, Tuomas Sandholm", "tldr": "", "abstract": "Computational tractability and social welfare (aka. efficiency) of equilibria are two fundamental but in general orthogonal considerations in algorithmic game theory. Nevertheless, we show that when (approximate) full efficiency can be guaranteed via a smoothness argument a la Roughgarden, Nash equilibria are approachable under a family of no-regret learning algorithms, thereby enabling fast and decentralized computation. We leverage this connection to obtain new convergence results in large games---wherein the number of players $n \\gg 1$---under the well-documented property of full efficiency via smoothness in the limit. Surprisingly, our framework unifies equilibrium computation in disparate classes of problems including games with vanishing strategic sensitivity and two-player zero-sum games, illuminating en route an immediate but overlooked equivalence between smoothness and a well-studied condition in the optimization literature known as the Minty property. Finally, we establish that a family of no-regret dynamics attains a welfare bound that improves over the smoothness framework while at the same time guaranteeing convergence to the set of coarse correlated equilibria. We show this by employing the clairvoyant mirror descent algortihm recently introduced by Piliouras et al.", "keywords": "learning in games;optimistic gradient descent;Nash equilibrium;price of anarchy;smooth games;social welfare", "primary_area": "", "supplementary_material": "", "author": "Ioannis Anagnostides;Tuomas Sandholm", "authorids": "~Ioannis_Anagnostides1;~Tuomas_Sandholm1", "gender": "M;M", "homepage": ";http://www.cs.cmu.edu/~sandholm", "dblp": "273/7648;s/TuomasSandholm", "google_scholar": "QVwDo_sAAAAJ;0DpK1EMAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Ioannis_Anagnostides1;~Tuomas_Sandholm1", "aff": "Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nanagnostides2023on,\ntitle={On the Interplay between Social Welfare and Tractability of Equilibria},\nauthor={Ioannis Anagnostides and Tuomas Sandholm},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lM0xyViO90}\n}", "github": "", "project": "", "reviewers": "Qpnv;Q8mF;vrCA;SmPo;9HSU", "pdf_size": 593171, "rating": "4;6;7;7;8", "confidence": "3;3;4;3;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;4", "presentation": "2;3;3;3;3", "wc_summary": "99;60;222;129;167", "wc_strengths": "47;73;91;53;66", "wc_weaknesses": "336;49;107;54;72", "wc_questions": "69;25;29;56;5", "wc_limitations": "6;6;1;14;3", "wc_review": "557;213;450;306;313", "wc_reply_reviewers": "0;16;22;5;5", "wc_reply_authors": "43;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 135.4, 55.765939425423475 ], "wc_strengths_avg": [ 66.0, 15.517731793016658 ], "wc_weaknesses_avg": [ 123.6, 108.13066170148039 ], "wc_questions_avg": [ 36.8, 22.87706274852609 ], "wc_limitations_avg": [ 6.0, 4.427188724235731 ], "wc_review_avg": [ 367.8, 121.11052803121618 ], "wc_reply_reviewers_avg": [ 9.6, 8.114185110040317 ], "wc_reply_authors_avg": [ 8.6, 17.2 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.22116293423234576, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7387033137043337205&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "cmu.edu;cmu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Hypothesis Selection with Memory Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70598", "id": "lM1UnEssuX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9dd67d30e0edd53581363c1b49006e1d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lM1UnEssuX", "openreview": "https://openreview.net/forum?id=lM1UnEssuX", "poster": "/media/PosterPDFs/NeurIPS%202023/70598.png?t=1702173787.5701618", "slides": "https://nips.cc/virtual/2023/poster/70598", "video": "https://nips.cc/virtual/2023/poster/70598", "author_site": "Maryam Aliakbarpour, Mark Bun, Adam Smith", "tldr": "", "abstract": "Hypothesis selection is a fundamental problem in learning theory and statistics. \nGiven a dataset and a finite set of candidate distributions, the goal is to select a distribution that matches the data as well as possible. \nMore specifically, suppose we have sample access to an unknown distribution $P$ over a domain $\\mathcal{X}$ that we know is well-approximated by one of a \na class of $n$ distributions (a.k.a. hypotheses), $\\mathcal{H} \\coloneqq \\{H_1, H_2, \\ldots, H_n\\}$. The goal is to design an algorithm that outputs a distribution $\\hat{H} \\in \\mathcal{H}$ whose total variation distance from $P$ is nearly minimal.\n\nIn this work, we study the hypothesis selection problem under memory constraints. We consider a model where samples from $P$ are presented in a stream and we access each sample $x$ via ``PDF-comparison'' queries that allow us to compare the probability densities of any pair of hypotheses\nat the domain point $x$ (i.e., is $H_i(x) < H_j(x)$?). This model allows us to study how much memory is needed at any point in time to store information about the portion of the stream seen so far.\n\nOur main result is an algorithm that achieves a nearly optimal tradeoff between memory usage and the number of samples required. In particular, given $b$ bits of memory (for $b$ roughly between $\\log n$ and $n$), our algorithm solves the hypothesis selection problem with $s$ samples, where $b \\cdot s = O(n \\log n)$. This result is optimal up to an $O(\\log n)$ factor, for all $b$.", "keywords": "Hypothesis selection;memory constrained algorithms;density estimation;limited space", "primary_area": "", "supplementary_material": "/attachment/1bf898f8e86406271474dae69d41db4eb834f629.pdf", "author": "Maryam Aliakbarpour;Mark Bun;Adam Smith", "authorids": "~Maryam_Aliakbarpour1;~Mark_Bun1;~Adam_Smith1", "gender": "F;;M", "homepage": "https://maryamaliakbarpour.com;https://cs-people.bu.edu/mbun/;http://cs-people.bu.edu/ads22", "dblp": "175/1689;126/4933;04/5072", "google_scholar": "Q0crxvwAAAAJ;oDwLyYUAAAAJ;fkGi-JMAAAAJ", "orcid": "0000-0001-5064-3221;;", "linkedin": ";;", "or_profile": "~Maryam_Aliakbarpour1;~Mark_Bun1;~Adam_Smith1", "aff": "Northeastern University;Boston University;Google", "aff_domain": "northeastern.edu;bu.edu;google.com", "position": "Postdoc;Assistant Professor;Researcher", "bibtex": "@inproceedings{\naliakbarpour2023hypothesis,\ntitle={Hypothesis Selection with Memory Constraints},\nauthor={Maryam Aliakbarpour and Mark Bun and Adam Smith},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lM1UnEssuX}\n}", "github": "", "project": "", "reviewers": "Awx8;5jdF;5dQu;8owE", "pdf_size": 492641, "rating": "6;6;6;7", "confidence": "2;3;3;4", "soundness": "3;4;3;4", "novelty": "3;3;3;3", "presentation": "3;4;2;3", "wc_summary": "167;131;169;41", "wc_strengths": "36;18;52;106", "wc_weaknesses": "108;58;178;395", "wc_questions": "24;29;3;54", "wc_limitations": "4;13;6;18", "wc_review": "339;249;408;614", "wc_reply_reviewers": "0;0;57;34", "wc_reply_authors": "0;0;0;294", "reply_reviewers": "0;0;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 127.0, 51.90375708944392 ], "wc_strengths_avg": [ 53.0, 32.87856444554719 ], "wc_weaknesses_avg": [ 184.75, 128.65336179051056 ], "wc_questions_avg": [ 27.5, 18.145247311624054 ], "wc_limitations_avg": [ 10.25, 5.584576975922169 ], "wc_review_avg": [ 402.5, 134.49628247650566 ], "wc_reply_reviewers_avg": [ 22.75, 24.159625411003375 ], "wc_reply_authors_avg": [ 73.5, 127.30573435631248 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3085957977480129027&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "northeastern.edu;bu.edu;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Northeastern University;Boston University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.northeastern.edu;https://www.bu.edu;https://www.google.com", "aff_unique_abbr": "NEU;BU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Energy-Based Cross Attention for Bayesian Context Update in Text-to-Image Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70597", "id": "lOCHMGO6ow", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f0878b7efa656b3bbd407c9248d13751-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lOCHMGO6ow", "openreview": "https://openreview.net/forum?id=lOCHMGO6ow", "poster": "/media/PosterPDFs/NeurIPS%202023/70597.png?t=1702022464.2981806", "slides": "https://nips.cc/virtual/2023/poster/70597", "video": "https://nips.cc/virtual/2023/poster/70597", "author_site": "Geon Yeong Park, Jeongsol Kim, Beomsu Kim, Sang Wan Lee, Jong Chul Ye", "tldr": "", "abstract": "Despite the remarkable performance of text-to-image diffusion models in image generation tasks, recent studies have raised the issue that generated images sometimes cannot capture the intended semantic contents of the text prompts, which phenomenon is often called semantic misalignment. To address this, here we present a novel energy-based model (EBM) framework for adaptive context control by modeling the posterior of context vectors. Specifically, we first formulate EBMs of latent image representations and text embeddings in each cross-attention layer of the denoising autoencoder. Then, we obtain the gradient of the log posterior of context vectors, which can be updated and transferred to the subsequent cross-attention layer, thereby implicitly minimizing a nested hierarchy of energy functions. \nOur latent EBMs further allow zero-shot compositional generation as a linear combination of cross-attention outputs from different contexts. \nUsing extensive experiments, we demonstrate that the proposed method is highly effective in handling various image generation tasks, including multi-concept generation, text-guided image inpainting, and real and synthetic image editing. Code: https://github.com/EnergyAttention/Energy-Based-CrossAttention.", "keywords": "Diffusion model;Energy-based model;Text-to-image generation", "primary_area": "", "supplementary_material": "/attachment/a99f0cfea8cd482e3d18089e07d95600773008ba.pdf", "author": "Geon Yeong Park;Jeongsol Kim;Beomsu Kim;Sang Wan Lee;Jong Chul Ye", "authorids": "~Geon_Yeong_Park1;~Jeongsol_Kim1;~Beomsu_Kim1;~Sang_Wan_Lee1;~Jong_Chul_Ye1", "gender": "M;M;M;M;M", "homepage": "https://geonyeong-park.github.io/;https://bispl.weebly.com/;;https://aibrain.kaist.ac.kr/sang-wan-lee;https://bispl.weebly.com/", "dblp": "289/5924;282/3103;;77/6650;15/5613", "google_scholar": "HGF4a14AAAAJ;ZaVNwcQAAAAJ;https://scholar.google.co.kr/citations?user=TofIFUgAAAAJ;0rMoHW4AAAAJ;HNMjoNEAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Geon_Yeong_Park1;~Jeongsol_Kim1;~Beomsu_Kim1;~Sang_Wan_Lee1;~Jong_Chul_Ye1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\npark2023energybased,\ntitle={Energy-Based Cross Attention for Bayesian Context Update in Text-to-Image Diffusion Models},\nauthor={Geon Yeong Park and Jeongsol Kim and Beomsu Kim and Sang Wan Lee and Jong Chul Ye},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lOCHMGO6ow}\n}", "github": "", "project": "", "reviewers": "SVoL;tGPJ;jCTY;MvGR;nFGp;5Nym;2y1g", "pdf_size": 11363735, "rating": "3;5;5;5;5;6;6", "confidence": "4;2;3;3;4;3;4", "soundness": "2;3;2;2;2;4;3", "novelty": "2;3;3;2;2;3;4", "presentation": "2;3;2;2;2;3;4", "wc_summary": "35;51;51;54;47;56;73", "wc_strengths": "37;36;55;20;47;47;65", "wc_weaknesses": "219;59;142;255;127;165;106", "wc_questions": "29;47;55;4;104;52;292", "wc_limitations": "1;1;8;10;4;5;7", "wc_review": "321;194;311;343;329;325;543", "wc_reply_reviewers": "139;90;272;38;84;22;60", "wc_reply_authors": "620;86;778;43;67;67;50", "reply_reviewers": "1;1;3;1;1;1;1", "reply_authors": "3;2;4;2;2;2;2", "rating_avg": [ 5.0, 0.9258200997725514 ], "confidence_avg": [ 3.2857142857142856, 0.6998542122237652 ], "soundness_avg": [ 2.5714285714285716, 0.7284313590846836 ], "novelty_avg": [ 2.7142857142857144, 0.6998542122237652 ], "presentation_avg": [ 2.5714285714285716, 0.7284313590846836 ], "wc_summary_avg": [ 52.42857142857143, 10.526933386823165 ], "wc_strengths_avg": [ 43.857142857142854, 13.46348046095247 ], "wc_weaknesses_avg": [ 153.28571428571428, 61.812421973290135 ], "wc_questions_avg": [ 83.28571428571429, 89.69745064973266 ], "wc_limitations_avg": [ 5.142857142857143, 3.1815796359028696 ], "wc_review_avg": [ 338.0, 95.65861920690382 ], "wc_reply_reviewers_avg": [ 100.71428571428571, 78.36310043766824 ], "wc_reply_authors_avg": [ 244.42857142857142, 290.85826550442897 ], "reply_reviewers_avg": [ 1.2857142857142858, 0.6998542122237652 ], "reply_authors_avg": [ 2.4285714285714284, 0.7284313590846836 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2204792759220492, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=630942755220512950&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 9, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Functional-Group-Based Diffusion for Pocket-Specific Molecule Generation and Elaboration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70596", "id": "lRG11M91dx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6cdd4ce9330025967dd1ed0bed3010f5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lRG11M91dx", "openreview": "https://openreview.net/forum?id=lRG11M91dx", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70596", "video": "https://nips.cc/virtual/2023/poster/70596", "author_site": "Haitao Lin, Yufei Huang, Odin Zhang, Yunfan Liu, Lirong Wu, Siyuan Li, Zhiyuan Chen, Stan Z. Li", "tldr": "", "abstract": "In recent years, AI-assisted drug design methods have been proposed to generate molecules given the pockets' structures of target proteins. Most of them are {\\em atom-level-based} methods, which consider atoms as basic components and generate atom positions and types. In this way, however, it is hard to generate realistic fragments with complicated structures. To solve this, we propose \\textsc{D3FG}, a {\\em functional-group-based} diffusion model for pocket-specific molecule generation and elaboration. \\textsc{D3FG} decomposes molecules into two categories of components: functional groups defined as rigid bodies and linkers as mass points. And the two kinds of components can together form complicated fragments that enhance ligand-protein interactions.\n To be specific, in the diffusion process, \\textsc{D3FG} diffuses the data distribution of the positions, orientations, and types of the components into a prior distribution; In the generative process, the noise is gradually removed from the three variables by denoisers parameterized with designed equivariant graph neural networks. In the experiments, our method can generate molecules with more realistic 3D structures, competitive affinities toward the protein targets, and better drug properties. Besides, \\textsc{D3FG} as a solution to a new task of molecule elaboration, could generate molecules with high affinities based on existing ligands and the hotspots of target proteins.", "keywords": "sturcture-based drug design; molecule generation; diffusion model", "primary_area": "", "supplementary_material": "/attachment/915215500b231d9301a3ffaadda08b3f7a9f0bb2.pdf", "author": "Haitao Lin;Yufei Huang;Odin Zhang;Yunfan Liu;Lirong Wu;Siyuan Li;Zhiyuan Chen;Stan Z. Li", "authorids": "~Haitao_Lin2;~Yufei_Huang4;~Odin_Zhang1;~Yunfan_Liu2;~Lirong_Wu1;~Siyuan_Li6;~Zhiyuan_Chen5;~Stan_Z._Li2", "gender": "M;M;;M;;M;M;M", "homepage": ";https://2021.igem.org/Team:ZJU-China;https://haotianzhangai4science.github.io/;https://github.com/XYxiyang;;https://lupin1998.github.io/;https://zyc.ai;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": "34/1040;68/1946-2;;170/8550-2;15/10330;63/9705-2;192/0196-8;l/StanZLi", "google_scholar": "o5A23qIAAAAJ;qmTjdwIAAAAJ;ypnp3YwAAAAJ;;Tk7TrCoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;CKiY8PIAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0009-0007-8184-4529;;0009-0002-1639-5855;;0000-0001-6806-2468;0000-0003-3210-0324;", "linkedin": ";;;;;https://www.linkedin.cn/incareer/in/siyuan-li-lupin1998/;%E9%99%9F%E5%8E%9F-%E9%99%88-0b473aa9;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Haitao_Lin2;~Yufei_Huang4;~Odin_Zhang1;~Yunfan_Liu2;~Lirong_Wu1;~Siyuan_Li6;~Zhiyuan_Chen5;~Stan_Z._Li1", "aff": "Westlake University;Zhejiang University;;Tongji University;Westlake University;Alibaba Group;DP Technology;Westlake University", "aff_domain": "westlake.edu.cn;zju.edu.cn;;tongji.edu.cn;westlake.edu.cn;alibaba-inc.com;dp.tech;westlake.edu.cn", "position": "PhD student;PhD student;;Undergrad student;PhD student;Intern;Researcher;Chair Professor", "bibtex": "@inproceedings{\nlin2023functionalgroupbased,\ntitle={Functional-Group-Based Diffusion for Pocket-Specific Molecule Generation and Elaboration},\nauthor={Haitao Lin and Yufei Huang and Odin Zhang and Yunfan Liu and Lirong Wu and Siyuan Li and Zhiyuan Chen and Stan Z. Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lRG11M91dx}\n}", "github": "", "project": "", "reviewers": "sFPS;6gZx;6wRC;i3fG;Y6qT", "pdf_size": 16978817, "rating": "3;4;5;6;7", "confidence": "4;5;4;4;4", "soundness": "2;3;2;3;4", "novelty": "2;1;2;3;3", "presentation": "2;3;3;3;4", "wc_summary": "108;31;160;97;77", "wc_strengths": "59;21;262;59;55", "wc_weaknesses": "277;194;242;119;58", "wc_questions": "9;23;207;73;327", "wc_limitations": "1;8;87;1;6", "wc_review": "454;277;958;349;523", "wc_reply_reviewers": "0;169;0;62;33", "wc_reply_authors": "51;541;51;25;201", "reply_reviewers": "0;2;0;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 94.6, 41.99333280414881 ], "wc_strengths_avg": [ 91.2, 86.58498715135322 ], "wc_weaknesses_avg": [ 178.0, 80.04248871693083 ], "wc_questions_avg": [ 127.8, 121.69864419951439 ], "wc_limitations_avg": [ 20.6, 33.31426121047862 ], "wc_review_avg": [ 512.2, 238.40922800932015 ], "wc_reply_reviewers_avg": [ 52.8, 62.53766864858331 ], "wc_reply_authors_avg": [ 173.8, 193.84364833545618 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.35355339059327384, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11853113174866087278&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "westlake.edu.cn;zju.edu.cn;;tongji.edu.cn;westlake.edu.cn;alibaba-inc.com;dp.tech;westlake.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;0;3;4;0", "aff_unique_norm": "Westlake University;Zhejiang University;Tongji University;Alibaba Group;DP Technology", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.westlake.edu.cn;https://www.zju.edu.cn;https://www.tongji.edu.cn;https://www.alibaba.com;", "aff_unique_abbr": "WU;ZJU;Tongji;Alibaba;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Social Motion Prediction with Cognitive Hierarchies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70595", "id": "lRu0dN7BY6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f4b52b45a677d855dee0ca9ba1ddf638-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lRu0dN7BY6", "openreview": "https://openreview.net/forum?id=lRu0dN7BY6", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70595", "video": "https://nips.cc/virtual/2023/poster/70595", "author_site": "Wentao Zhu, Jason Qin, Yuke Lou, Hang Ye, Xiaoxuan Ma, Hai Ci, Yizhou Wang", "tldr": "", "abstract": "Humans exhibit a remarkable capacity for anticipating the actions of others and planning their own actions accordingly. In this study, we strive to replicate this ability by addressing the social motion prediction problem. We introduce a new benchmark, a novel formulation, and a cognition-inspired framework. We present Wusi, a 3D multi-person motion dataset under the context of team sports, which features intense and strategic human interactions and diverse pose distributions. By reformulating the problem from a multi-agent reinforcement learning perspective, we incorporate behavioral cloning and generative adversarial imitation learning to boost learning efficiency and generalization. Furthermore, we take into account the cognitive aspects of the human social action planning process and develop a cognitive hierarchy framework to predict strategic human social interactions. We conduct comprehensive experiments to validate the effectiveness of our proposed dataset and approach.", "keywords": "multi-person motion prediction", "primary_area": "", "supplementary_material": "", "author": "Wentao Zhu;Jason Qin;Yuke Lou;Hang Ye;Xiaoxuan Ma;Hai Ci;Yizhou Wang", "authorids": "~Wentao_Zhu3;~Jason_Qin1;~Yuke_Lou1;~Hang_Ye1;~Xiaoxuan_Ma2;~Hai_Ci1;~Yizhou_Wang1", "gender": "M;M;M;M;F;M;M", "homepage": "https://wentao.live;https://github.com/Asonin;https://thorin666.github.io/;https://alvinyh.github.io/;https://shirleymaxx.github.io/;;https://cfcs.pku.edu.cn/wangyizhou/", "dblp": "117/0354-4;;330/4468;40/11094;;227/4707;71/3387-1", "google_scholar": "https://scholar.google.com/citations?hl=en;;;https://scholar.google.com/citations?hl=en;mjP_5SEAAAAJ;GMrjppAAAAAJ;831z_VcAAAAJ", "orcid": ";;;;0000-0003-0571-2659;;", "linkedin": ";;;;;;", "or_profile": "~Wentao_Zhu3;~Jason_Qin1;~Yuke_Lou1;~Hang_Ye1;~Xiaoxuan_Ma2;~Hai_Ci1;~Yizhou_Wang1", "aff": "Peking University;Peking University;Peking University;Peking University;Peking University;National University of Singapore;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;nus.edu.sg;pku.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Undergrad student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzhu2023social,\ntitle={Social Motion Prediction with Cognitive Hierarchies},\nauthor={Wentao Zhu and Jason Qin and Yuke Lou and Hang Ye and Xiaoxuan Ma and Hai Ci and Yizhou Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lRu0dN7BY6}\n}", "github": "", "project": "", "reviewers": "Nb5i;EKS7;yiis;8iac;X2WW", "pdf_size": 1251158, "rating": "4;4;5;5;5", "confidence": "4;4;4;4;4", "soundness": "2;3;3;3;3", "novelty": "3;3;2;2;3", "presentation": "3;3;2;4;2", "wc_summary": "49;1;94;80;50", "wc_strengths": "25;1;41;59;52", "wc_weaknesses": "642;1;195;283;117", "wc_questions": "177;1;273;18;37", "wc_limitations": "40;1;3;1;71", "wc_review": "933;5;606;441;327", "wc_reply_reviewers": "0;0;1161;0;0", "wc_reply_authors": "43;0;2734;0;0", "reply_reviewers": "0;0;8;0;0", "reply_authors": "2;1;9;1;1", "rating_avg": [ 4.6, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 54.8, 32.008748804037936 ], "wc_strengths_avg": [ 35.6, 20.76150283577757 ], "wc_weaknesses_avg": [ 247.6, 217.91704843816143 ], "wc_questions_avg": [ 101.2, 106.15535784876805 ], "wc_limitations_avg": [ 23.2, 28.145337091603643 ], "wc_review_avg": [ 462.4, 306.62393905238383 ], "wc_reply_reviewers_avg": [ 232.2, 464.4 ], "wc_reply_authors_avg": [ 555.4, 1089.4272990888378 ], "reply_reviewers_avg": [ 1.6, 3.2000000000000006 ], "reply_authors_avg": [ 2.8, 3.124099870362662 ], "replies_avg": [ 43, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8896847002545562591&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;nus.edu.sg;pku.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Peking University;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "Peking U;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "China;Singapore" }, { "title": "Extensible Prompts for Language Models on Zero-shot Language Style Customization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70594", "id": "lRxpVfDMzz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6fcbfb3721c1781728b10c6685cc2f6c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lRxpVfDMzz", "openreview": "https://openreview.net/forum?id=lRxpVfDMzz", "poster": "/media/PosterPDFs/NeurIPS%202023/70594.png?t=1702173590.7980795", "slides": "https://nips.cc/virtual/2023/poster/70594", "video": "https://nips.cc/virtual/2023/poster/70594", "author_site": "Tao Ge, Hu Jing, Li Dong, Shaoguang Mao, Yan Xia, Xun Wang, Si-Qing Chen, Furu Wei", "tldr": "", "abstract": "We propose eXtensible Prompt (X-Prompt) for prompting a large language model (LLM) beyond natural language (NL). X-Prompt instructs an LLM with not only NL but also an extensible vocabulary of imaginary words. Registering new imaginary words allows us to instruct the LLM to comprehend concepts that are difficult to describe with NL words, thereby making a prompt more descriptive. Also, these imaginary words are designed to be out-of-distribution (OOD) robust so that they can be (re)used like NL words in various prompts, distinguishing X-Prompt from soft prompt that is for fitting in-distribution data. We propose context-augmented learning (CAL) to learn imaginary words for general usability, enabling them to work properly in OOD (unseen) prompts. We experiment X-Prompt for zero-shot language style customization as a case study. The promising results of X-Prompt demonstrate its potential to facilitate advanced interaction beyond the natural language interface, bridging the communication gap between humans and LLMs.", "keywords": "large language model;prompt;imaginary words;OOD robustness;natural language;zero-shot", "primary_area": "", "supplementary_material": "", "author": "Tao Ge;Jing Hu;Li Dong;Shaoguang Mao;Yan Xia;Xun Wang;Si-Qing Chen;Furu Wei", "authorids": "~Tao_Ge1;v-hjing@microsoft.com;~Li_Dong1;shamao@microsoft.com;yanxia@microsoft.com;xunwang@microsoft.com;~Si-Qing_Chen1;~Furu_Wei1", "gender": "M;;M;;;;F;M", "homepage": "https://getao.github.io/;;http://dong.li;;;;;https://www.microsoft.com/en-us/research/people/fuwei/", "dblp": "136/7923;;85/5090-4;;;;;72/5870", "google_scholar": "LYbs7Q8AAAAJ;;wEfQgPgAAAAJ;;;;;G-V1VpwAAAAJ", "orcid": ";;;;;;0000-0002-6945-4540;", "linkedin": ";;;;;;si-qing-chen-seattle/;", "or_profile": "~Tao_Ge1;v-hjing@microsoft.com;~Li_Dong1;shamao@microsoft.com;yanxia@microsoft.com;xunwang@microsoft.com;~Si-Qing_Chen1;~Furu_Wei1", "aff": "Microsoft Research;;Microsoft Research;;;;Microsoft;Microsoft Research", "aff_domain": "microsoft.com;;microsoft.com;;;;microsoft.com;microsoft.com", "position": "Principal Researcher;;Principal Researcher;;;;Partner Applied Science Manager;Distinguished Scientist", "bibtex": "@inproceedings{\nge2023extensible,\ntitle={Extensible Prompts for Language Models on Zero-shot Language Style Customization},\nauthor={Tao Ge and Jing Hu and Li Dong and Shaoguang Mao and Yan Xia and Xun Wang and Si-Qing Chen and Furu Wei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lRxpVfDMzz}\n}", "github": "", "project": "", "reviewers": "X5KZ;fphF;KFc6;kqay;GeEV", "pdf_size": 1387025, "rating": "5;5;6;6;7", "confidence": "4;4;3;4;4", "soundness": "3;3;3;3;3", "novelty": "3;3;2;3;3", "presentation": "4;1;3;3;3", "wc_summary": "113;28;60;42;79", "wc_strengths": "163;67;41;48;108", "wc_weaknesses": "209;381;102;66;220", "wc_questions": "177;27;24;26;154", "wc_limitations": "57;13;11;1;7", "wc_review": "719;516;238;183;568", "wc_reply_reviewers": "81;0;17;0;183", "wc_reply_authors": "292;0;22;0;46", "reply_reviewers": "1;0;1;0;1", "reply_authors": "2;1;2;1;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 64.4, 29.736173257499026 ], "wc_strengths_avg": [ 85.4, 45.257485568687976 ], "wc_weaknesses_avg": [ 195.6, 110.18638754401562 ], "wc_questions_avg": [ 81.6, 68.89586344621861 ], "wc_limitations_avg": [ 17.8, 20.023985617254127 ], "wc_review_avg": [ 444.8, 203.34148617534984 ], "wc_reply_reviewers_avg": [ 56.2, 70.0668252456182 ], "wc_reply_authors_avg": [ 72.0, 111.3049864112116 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.13363062095621217, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10095897245226366141&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "microsoft.com;;microsoft.com;;;;microsoft.com;microsoft.com", "author_num": 8, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Research", "aff_unique_url": "https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Masked Space-Time Hash Encoding for Efficient Dynamic Scene Reconstruction", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70593", "id": "lSLYXuLqRQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/df31126302921ca9351fab73923a172f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lSLYXuLqRQ", "openreview": "https://openreview.net/forum?id=lSLYXuLqRQ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70593", "video": "https://nips.cc/virtual/2023/poster/70593", "author_site": "Feng Wang, Zilong Chen, Guokang Wang, Yafei Song, Huaping Liu", "tldr": "", "abstract": "In this paper, we propose the Masked Space-Time Hash encoding (MSTH), a novel method for efficiently reconstructing dynamic 3D scenes from multi-view or monocular videos. Based on the observation that dynamic scenes often contain substantial static areas that result in redundancy in storage and computations, MSTH represents a dynamic scene as a weighted combination of a 3D hash encoding and a 4D hash encoding. The weights for the two components are represented by a learnable mask which is guided by an uncertainty-based objective to reflect the spatial and temporal importance of each 3D position. With this design, our method can reduce the hash collision rate by avoiding redundant queries and modifications on static areas, making it feasible to represent a large number of space-time voxels by hash tables with small size.Besides, without the requirements to fit the large numbers of temporally redundant features independently, our method is easier to optimize and converge rapidly with only twenty minutes of training for a 300-frame dynamic scene. We evaluate our method on extensive dynamic scenes. As a result, MSTH obtains consistently better results than previous state-of-the-art methods with only 20 minutes of training time and 130 MB of memory storage.", "keywords": "NeRF;Dynamic Scenes", "primary_area": "", "supplementary_material": "/attachment/db7e4738174a93a631717addb9722530b5543e71.zip", "author": "Feng Wang;Zilong Chen;Guokang Wang;Yafei Song;Huaping Liu", "authorids": "~Feng_Wang12;~Zilong_Chen1;~Guokang_Wang2;~Yafei_Song1;~Huaping_Liu3", "gender": "M;M;M;M;M", "homepage": ";https://heheyas.github.io/;https://github.com/Hymwgk;;https://sites.google.com/site/thuliuhuaping/", "dblp": "90/4225-34;;;;69/1097-1", "google_scholar": "bKG4Un8AAAAJ;2pbka1gAAAAJ;;VMO6UOgAAAAJ;https://scholar.google.com.hk/citations?user=HXnkIkwAAAAJ", "orcid": ";;0009-0006-2972-0863;;", "linkedin": ";https://www.linkedin.cn/incareer/in/zilong-chen-99671523b;;;", "or_profile": "~Feng_Wang12;~Zilong_Chen1;~Guokang_Wang2;~Yafei_Song1;~Huaping_Liu3", "aff": "Tsinghua University;Tsinghua University;Zhengzhou University;Alibaba Group;Tsinghua University", "aff_domain": "tsinghua.edu.cn;cs.tsinghua.edu.cn;zzu.edu.cn;alibaba-inc.com;tsinghua.edu.cn", "position": "PhD student;PhD student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nwang2023masked,\ntitle={Masked Space-Time Hash Encoding for Efficient Dynamic Scene Reconstruction},\nauthor={Feng Wang and Zilong Chen and Guokang Wang and Yafei Song and Huaping Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lSLYXuLqRQ}\n}", "github": "", "project": "", "reviewers": "r2SF;hvP1;ohyg;Vvsv", "pdf_size": 9922984, "rating": "5;6;7;7", "confidence": "4;4;5;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;4;4", "wc_summary": "46;139;174;138", "wc_strengths": "66;30;72;168", "wc_weaknesses": "111;19;21;336", "wc_questions": "9;131;134;4", "wc_limitations": "31;17;34;11", "wc_review": "263;336;435;657", "wc_reply_reviewers": "31;15;24;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 124.25, 47.446680600438214 ], "wc_strengths_avg": [ 84.0, 51.088159097779204 ], "wc_weaknesses_avg": [ 121.75, 129.1576072091768 ], "wc_questions_avg": [ 69.5, 63.0337211340089 ], "wc_limitations_avg": [ 23.25, 9.54921462739214 ], "wc_review_avg": [ 422.75, 148.3818974807911 ], "wc_reply_reviewers_avg": [ 21.5, 6.5 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2014160932447642467&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;cs.tsinghua.edu.cn;zzu.edu.cn;alibaba-inc.com;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Tsinghua University;Zhengzhou University;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.zzu.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "THU;ZZU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "lSZXSDwvGv", "title": "Improving Language Model Negotiation with Self-Play and In-Context Learning from AI Feedback", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study whether multiple large language models (LLMs) can autonomously\nimprove each other in a negotiation game by playing, reflecting, and criticizing.\nWe are interested in this question because if LLMs were able to improve each other, it\nwould imply the possibility of creating strong AI agents with minimal human\nintervention. We ask two LLMs to bargain with each other, playing the roles of a\nbuyer and a seller, respectively. They aim to reach a deal with the buyer targeting\na lower price and the seller a higher one. A third language model, playing the\ncritic, provides feedback to a player to improve the player\u2019s negotiation strategies.\nWe let the two agents play multiple rounds, using previous negotiation history\nand AI feedback as in-context demonstrations to improve the\nmodel\u2019s negotiation strategy iteratively. We use different LLMs (GPT and Claude)\nfor different roles and use the deal price as the evaluation metric. Our experiments\nreveal multiple intriguing findings: (1) Only a subset of the language models we\nconsider can self-play and improve the deal price from AI feedback, weaker models\neither do not understand the game\u2019s rules or cannot incorporate AI feedback for\nfurther improvement. (2) Models\u2019 abilities to learn from the feedback differ when\nplaying different roles. For example, it is harder for Claude-instant to improve\nas the buyer than as the seller. (3) When unrolling the game to multiple rounds,\nstronger agents can consistently improve their performance by meaningfully using\nprevious experiences and iterative AI feedback, yet have a higher risk of breaking\nthe deal. We hope our work provides insightful initial explorations of having\nmodels autonomously improve each other with game playing and AI feedback.", "keywords": "large language models;negotiation game;in-context learning;self-play;AI feedback", "primary_area": "", "supplementary_material": "", "author": "Yao Fu;Hao Peng;Tushar Khot;Mirella Lapata", "authorids": "~Yao_Fu3;~Hao_Peng4;~Tushar_Khot1;~Mirella_Lapata1", "gender": "M;M;F;M", "homepage": "https://franxyao.github.io/;https://allenai.org/team/tushark/;https://homepages.inf.ed.ac.uk/mlap/;https://haopeng-nlp.github.io/", "dblp": ";83/8117;59/6701;", "google_scholar": "liSP4cEAAAAJ;_8mkIjgAAAAJ;j67B9Q4AAAAJ;6Y37nm0AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yao_Fu3;~Tushar_Khot1;~Mirella_Lapata1;~Hao_Peng1", "aff": "University of Edinburgh;Allen Institute for Artificial Intelligence;Edinburgh University, University of Edinburgh;Allen Institute for Artificial Intelligence", "aff_domain": "ed.ac.uk;allenai.org;inf.ed.ac.uk;allenai.org", "position": "PhD student;Lead Research Scientist;Full Professor;Researcher", "bibtex": "@misc{\nfu2023improving,\ntitle={Improving Language Model Negotiation with Self-Play and In-Context Learning from {AI} Feedback},\nauthor={Yao Fu and Hao Peng and Tushar Khot and Mirella Lapata},\nyear={2023},\nurl={https://openreview.net/forum?id=lSZXSDwvGv}\n}", "github": "", "project": "", "reviewers": "nXyn;SEMS;Tqe8;sQTf", "site": "https://openreview.net/forum?id=lSZXSDwvGv", "pdf_size": 2504214, "rating": "4;4;5;5", "confidence": "4;4;3;4", "soundness": "3;2;3;2", "novelty": "3;2;2;1", "presentation": "3;2;3;3", "wc_summary": "100;111;52;94", "wc_strengths": "31;129;69;67", "wc_weaknesses": "254;803;124;281", "wc_questions": "95;4;75;163", "wc_limitations": "10;4;15;12", "wc_review": "490;1051;335;617", "wc_reply_reviewers": "272;0;0;16", "wc_reply_authors": "1117;0;0;0", "reply_reviewers": "4;0;0;1", "reply_authors": "5;1;1;1", "rating_avg": [ 4.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 89.25, 22.353690970396812 ], "wc_strengths_avg": [ 74.0, 35.17101079013795 ], "wc_weaknesses_avg": [ 365.5, 259.4710966562557 ], "wc_questions_avg": [ 84.25, 56.66292879828927 ], "wc_limitations_avg": [ 10.25, 4.02336923485777 ], "wc_review_avg": [ 623.25, 266.38916550790873 ], "wc_reply_reviewers_avg": [ 72.0, 115.65465835840769 ], "wc_reply_authors_avg": [ 279.25, 483.675188013609 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 150, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5996322299549159114&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Edinburgh;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.ed.ac.uk;https://allenai.org", "aff_unique_abbr": "Edinburgh;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Rewarded soups: towards Pareto-optimal alignment by interpolating weights fine-tuned on diverse rewards", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70592", "id": "lSbbC2VyCu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e12a3b98b67e8395f639fde4c2b03168-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lSbbC2VyCu", "openreview": "https://openreview.net/forum?id=lSbbC2VyCu", "poster": "/media/PosterPDFs/NeurIPS%202023/70592.png?t=1699825446.3630633", "slides": "https://nips.cc/virtual/2023/poster/70592", "video": "https://nips.cc/virtual/2023/poster/70592", "author_site": "Alexandre Rame, Guillaume Couairon, Corentin Dancette, Jean-Baptiste Gaya, Mustafa Shukor, Laure Soulier, Matthieu Cord", "tldr": "", "abstract": "Foundation models are first pre-trained on vast unsupervised datasets and then fine-tuned on labeled data. Reinforcement learning, notably from human feedback (RLHF), can further align the network with the intended usage. Yet the imperfections in the proxy reward may hinder the training and lead to suboptimal results; the diversity of objectives in real-world tasks and human opinions exacerbate the issue. This paper proposes embracing the heterogeneity of diverse rewards by following a multi-policy strategy. Rather than focusing on a single a priori reward, we aim for Pareto-optimal generalization across the entire space of preferences. To this end, we propose rewarded soup, first specializing multiple networks independently (one for each proxy reward) and then interpolating their weights linearly. This succeeds empirically because we show that the weights remain linearly connected when fine-tuned on diverse rewards from a shared pre-trained initialization. We demonstrate the effectiveness of our approach for text-to-text (summarization, Q&A, helpful assistant, review), text-image (image captioning, text-to-image generation, visual grounding), and control (locomotion) tasks. We hope to enhance the alignment of deep models, and how they interact with the world in all its diversity.", "keywords": "Deep learning;Foundation models;Fine-tuning;Reward optimization;Linear mode connectivity;Weight averaging;Model soups;Robustness;Generalization;Alignment;Multi objective learning.", "primary_area": "", "supplementary_material": "/attachment/e163deb5d7a029286cbdc295d1e4736fc4c59f97.pdf", "author": "Alexandre Rame;Guillaume Couairon;Corentin Dancette;Jean-Baptiste Gaya;Mustafa Shukor;Laure Soulier;Matthieu Cord", "authorids": "~Alexandre_Rame1;~Guillaume_Couairon1;~Corentin_Dancette1;~Jean-Baptiste_Gaya1;~Mustafa_Shukor1;~Laure_Soulier1;~Matthieu_Cord1", "gender": "M;;M;;M;;M", "homepage": "https://alexrame.github.io/;;https://cdancette.fr;https://twitter.com/jb_gaya;https://twitter.com/MustafaShukor1;;https://cord.isir.upmc.fr/", "dblp": ";;;304/2605;;;68/3117", "google_scholar": "7znwivwAAAAJ;;https://scholar.google.fr/citations?user=2zReQdQAAAAJ;;lhp9mRgAAAAJ;;SpAotDcAAAAJ", "orcid": ";;;;;;", "linkedin": "alexandre-ram%C3%A9-05259587;;;;;;", "or_profile": "~Alexandre_Rame1;~Guillaume_Couairon1;~Corentin_Dancette1;~Jean-Baptiste_Gaya1;~Mustafa_Shukor1;~Laure_Soulier1;~Matthieu_Cord1", "aff": "Universit\u00e9 Pierre et Marie Curie - Paris 6, Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);;Sorbonne Universite;Meta Facebook;Universit\u00e9 Pierre et Marie Curie - Paris 6, Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);;Sorbonne Universit\u00e9", "aff_domain": "isir.upmc.fr;;sorbonne-universite.fr;fb.com;isir.upmc.fr;;isir.upmc.fr", "position": "PhD student;;PhD student;PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nrame2023rewarded,\ntitle={Rewarded soups: towards Pareto-optimal alignment by interpolating weights fine-tuned on diverse rewards},\nauthor={Alexandre Rame and Guillaume Couairon and Corentin Dancette and Jean-Baptiste Gaya and Mustafa Shukor and Laure Soulier and Matthieu Cord},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lSbbC2VyCu}\n}", "github": "", "project": "", "reviewers": "bXWy;bJvT;DNE5;stHc;QPfR;ntSF;TSwH", "pdf_size": 6759398, "rating": "4;4;5;5;6;6;7", "confidence": "4;3;4;3;3;3;3", "soundness": "2;3;3;3;3;3;4", "novelty": "2;2;3;3;2;3;3", "presentation": "3;2;3;3;2;3;4", "wc_summary": "121;55;55;100;122;63;84", "wc_strengths": "28;55;44;63;77;101;171", "wc_weaknesses": "70;210;84;78;145;75;102", "wc_questions": "106;2;37;31;73;117;104", "wc_limitations": "1;2;9;4;7;14;7", "wc_review": "326;324;229;276;424;370;468", "wc_reply_reviewers": "520;0;71;0;0;32;21", "wc_reply_authors": "1032;35;35;0;0;0;0", "reply_reviewers": "2;0;1;0;0;1;1", "reply_authors": "4;2;2;1;1;1;1", "rating_avg": [ 5.285714285714286, 1.0301575072754257 ], "confidence_avg": [ 3.2857142857142856, 0.4517539514526256 ], "soundness_avg": [ 3.0, 0.5345224838248488 ], "novelty_avg": [ 2.5714285714285716, 0.49487165930539345 ], "presentation_avg": [ 2.857142857142857, 0.6388765649999399 ], "wc_summary_avg": [ 85.71428571428571, 27.18042512920064 ], "wc_strengths_avg": [ 77.0, 44.04867437603218 ], "wc_weaknesses_avg": [ 109.14285714285714, 47.52271207817696 ], "wc_questions_avg": [ 67.14285714285714, 41.13888869750015 ], "wc_limitations_avg": [ 6.285714285714286, 4.130523512800274 ], "wc_review_avg": [ 345.2857142857143, 76.54450486148431 ], "wc_reply_reviewers_avg": [ 92.0, 176.33490862560367 ], "wc_reply_authors_avg": [ 157.42857142857142, 357.36889989511525 ], "reply_reviewers_avg": [ 0.7142857142857143, 0.6998542122237652 ], "reply_authors_avg": [ 1.7142857142857142, 1.0301575072754254 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4823819106188661, "gs_citation": 134, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11519722875319146614&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "isir.upmc.fr;;sorbonne-universite.fr;fb.com;isir.upmc.fr;;isir.upmc.fr", "author_num": 7, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Universit\u00e9 Pierre et Marie Curie - Paris 6;Sorbonne University;Meta;Sorbonne Universit\u00e9", "aff_unique_dep": "Facult\u00e9 des Sciences;;Meta Platforms, Inc.;", "aff_unique_url": "https://www.upmc.fr;https://www.sorbonne-universite.fr;https://meta.com;https://www.sorbonne-universite.fr", "aff_unique_abbr": "UPMC;Sorbonne;Meta;Sorbonne U", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Paris;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "France;United States" }, { "title": "Unconstrained Dynamic Regret via Sparse Coding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70591", "id": "lT9n36RH1w", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ec2833cda146c277cdaa39066764f25c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lT9n36RH1w", "openreview": "https://openreview.net/forum?id=lT9n36RH1w", "poster": "/media/PosterPDFs/NeurIPS%202023/70591.png?t=1701446267.7072265", "slides": "https://nips.cc/virtual/2023/poster/70591", "video": "https://nips.cc/virtual/2023/poster/70591", "author_site": "Zhiyu Zhang, Zhiyu Zhang, Ashok Cutkosky, Yannis Paschalidis", "tldr": "", "abstract": "Motivated by the challenge of nonstationarity in sequential decision making, we study Online Convex Optimization (OCO) under the coupling of two problem structures: the domain is unbounded, and the comparator sequence $u_1,\\ldots,u_T$ is arbitrarily time-varying. As no algorithm can guarantee low regret simultaneously against all comparator sequences, handling this setting requires moving from minimax optimality to comparator adaptivity. That is, sensible regret bounds should depend on certain complexity measures of the comparator relative to one's prior knowledge. This paper achieves a new type of such adaptive regret bounds leveraging a sparse coding framework. The complexity of the comparator is measured by its energy and its sparsity on a user-specified dictionary, which offers considerable versatility. For example, equipped with a wavelet dictionary, our framework improves the state-of-the-art bound (Jacobsen & Cutkosky, 2022) by adapting to both ($i$) the magnitude of the comparator average $||\\bar u||=||\\sum_{t=1}^Tu_t/T||$, rather than the maximum $\\max_t||u_t||$; and ($ii$) the comparator variability $\\sum_{t=1}^T||u_t-\\bar u||$, rather than the uncentered sum $\\sum_{t=1}^T||u_t||$. Furthermore, our proof is simpler due to decoupling function approximation from regret minimization.", "keywords": "Dynamic online learning;parameter-free online learning;time series forecasting;wavelet", "primary_area": "", "supplementary_material": "", "author": "Zhiyu Zhang;Ashok Cutkosky;Ioannis Paschalidis", "authorids": "~Zhiyu_Zhang1;~Ashok_Cutkosky1;~Ioannis_Paschalidis1", "gender": ";;M", "homepage": "https://zhiyuzz.github.io/;http://www.cs.stanford.edu/~ashokc;http://sites.bu.edu/paschalidis/", "dblp": "45/6271-3;191/6725;44/2060", "google_scholar": "5KHfVTQAAAAJ;h4AbGp0AAAAJ;Es_hZ0QAAAAJ", "orcid": ";;0000-0002-3343-2913", "linkedin": ";;yannis-paschalidis-75a921/", "or_profile": "~Zhiyu_Zhang1;~Ashok_Cutkosky1;~Ioannis_Paschalidis1", "aff": "Boston University;Boston University;Boston University", "aff_domain": "bu.edu;bu.edu;bu.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023unconstrained,\ntitle={Unconstrained Dynamic Regret via Sparse Coding},\nauthor={Zhiyu Zhang and Ashok Cutkosky and Ioannis Paschalidis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lT9n36RH1w}\n}", "github": "", "project": "", "reviewers": "pdGo;4s3X;45WG;qkG5", "pdf_size": 1067787, "rating": "4;6;7;7", "confidence": "4;2;4;3", "soundness": "3;2;4;3", "novelty": "2;3;4;3", "presentation": "3;2;4;3", "wc_summary": "124;101;94;100", "wc_strengths": "49;65;94;82", "wc_weaknesses": "156;303;158;151", "wc_questions": "78;71;3;55", "wc_limitations": "1;9;1;30", "wc_review": "408;549;350;418", "wc_reply_reviewers": "85;79;36;0", "wc_reply_authors": "0;6;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 104.75, 11.431863365173676 ], "wc_strengths_avg": [ 72.5, 17.03672503740082 ], "wc_weaknesses_avg": [ 192.0, 64.13657302974646 ], "wc_questions_avg": [ 51.75, 29.354514133264068 ], "wc_limitations_avg": [ 10.25, 11.861176164276458 ], "wc_review_avg": [ 431.25, 72.77147449378774 ], "wc_reply_reviewers_avg": [ 50.0, 34.50362299817223 ], "wc_reply_authors_avg": [ 1.5, 2.598076211353316 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.24618298195866545, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10534465860176834417&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "bu.edu;bu.edu;bu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Boston University", "aff_unique_dep": "", "aff_unique_url": "https://www.bu.edu", "aff_unique_abbr": "BU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Not All Out-of-Distribution Data Are Harmful to Open-Set Active Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70590", "id": "lV3LIGlc1w", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2c8d9636f74d0207ff4f65956010f450-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lV3LIGlc1w", "openreview": "https://openreview.net/forum?id=lV3LIGlc1w", "poster": "/media/PosterPDFs/NeurIPS%202023/70590.png?t=1702000739.04317", "slides": "https://nips.cc/virtual/2023/poster/70590", "video": "https://nips.cc/virtual/2023/poster/70590", "author_site": "Yang Yang, Yuxuan Zhang, XIN SONG, Yi Xu", "tldr": "", "abstract": "Active learning (AL) methods have been proven to be an effective way to reduce the labeling effort by intelligently selecting valuable instances for annotation. Despite their great success with in-distribution (ID) scenarios, AL methods suffer from performance degradation in many real-world applications because out-of-distribution (OOD) instances are always inevitably contained in unlabeled data, which may lead to inefficient sampling. Therefore, several attempts have been explored open-set AL by strategically selecting pure ID instances while filtering OOD instances. However, concentrating solely on selecting pseudo-ID instances may cause the training constraint of the ID classifier and OOD detector. To address this issue, we propose a simple yet effective sampling scheme, Progressive Active Learning (PAL), which employs a progressive sampling mechanism to leverage the active selection of valuable OOD instances. The proposed PAL measures unlabeled instances by synergistically evaluating instances' informativeness and representativeness, and thus it can balance the pseudo-ID and pseudo-OOD instances in each round to enhance both the capacity of the ID classifier and the OOD detector. %Meanwhile, PAL measures unlabeled instances by synergistically evaluating instances' informativeness and representativeness, which can more effectively estimate the values of instances. \nExtensive experiments on various open-set AL scenarios demonstrate the effectiveness of the proposed PAL, compared with the state-of-the-art methods. The code is available at \\url{https://github.com/njustkmg/PAL}.", "keywords": "Out-of-Distribution;Active Learning", "primary_area": "", "supplementary_material": "/attachment/af2c5f103876d8b1011a50d70b620938cf814c20.pdf", "author": "Yang Yang;Yuxuan Zhang;XIN SONG;Yi Xu", "authorids": "~Yang_Yang17;~Yuxuan_Zhang7;~XIN_SONG1;~Yi_Xu8", "gender": "M;M;M;M", "homepage": "http://www.njustkmg.cn/;https://github.com/Zechun-zyx;;https://yxu71.github.io", "dblp": "48/450-74;;;14/5580", "google_scholar": "_6NJip0AAAAJ;5hPP6JIAAAAJ;https://scholar.google.com/citations?view_op=list_works;D4jEMqEAAAAJ", "orcid": "0000-0002-5245-3584;;0000-0001-5571-3436;0009-0000-9900-6143", "linkedin": ";;;", "or_profile": "~Yang_Yang17;~Yuxuan_Zhang7;~XIN_SONG1;~YI_XU3", "aff": "Nanjing University of Science and Technology;Nanjing University of Science and Technology;Baidu;Dalian University of Technology", "aff_domain": "njust.edu.cn;njust.edu.cn;baidu.com;dlut.edu.cn", "position": "Full Professor;MS student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nyang2023not,\ntitle={Not All Out-of-Distribution Data Are Harmful to Open-Set Active Learning},\nauthor={Yang Yang and Yuxuan Zhang and XIN SONG and Yi Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lV3LIGlc1w}\n}", "github": "", "project": "", "reviewers": "ACvU;k1sx;d3fV;oop5", "pdf_size": 2682693, "rating": "5;6;6;7", "confidence": "4;5;4;4", "soundness": "2;3;3;3", "novelty": "1;3;3;3", "presentation": "2;2;3;3", "wc_summary": "42;58;69;79", "wc_strengths": "19;45;59;123", "wc_weaknesses": "156;231;143;75", "wc_questions": "1;46;27;68", "wc_limitations": "1;3;1;10", "wc_review": "219;383;299;355", "wc_reply_reviewers": "18;34;20;86", "wc_reply_authors": "27;27;27;259", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 62.0, 13.729530217745982 ], "wc_strengths_avg": [ 61.5, 38.29817228014935 ], "wc_weaknesses_avg": [ 151.25, 55.37316588384666 ], "wc_questions_avg": [ 35.5, 24.642443060703215 ], "wc_limitations_avg": [ 3.75, 3.6996621467371855 ], "wc_review_avg": [ 314.0, 62.63385665915839 ], "wc_reply_reviewers_avg": [ 39.5, 27.54541704167864 ], "wc_reply_authors_avg": [ 85.0, 100.45894683899488 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3711596324602421905&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "njust.edu.cn;njust.edu.cn;baidu.com;dlut.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Nanjing University of Science and Technology;Baidu;Dalian University of Technology", "aff_unique_dep": ";Baidu, Inc.;", "aff_unique_url": "http://www.nust.edu.cn/;https://www.baidu.com;http://www.dlut.edu.cn/", "aff_unique_abbr": "NUST;Baidu;DUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Entropy-based Training Methods for Scalable Neural Implicit Samplers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70589", "id": "lXOoR4KYcJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1646e34971facbcda3727d1dc28ab635-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lXOoR4KYcJ", "openreview": "https://openreview.net/forum?id=lXOoR4KYcJ", "poster": "/media/PosterPDFs/NeurIPS%202023/70589.png?t=1702430003.3485968", "slides": "https://nips.cc/virtual/2023/poster/70589", "video": "https://nips.cc/virtual/2023/poster/70589", "author_site": "Weijian Luo, Boya Zhang, Zhihua Zhang", "tldr": "", "abstract": "Efficiently sampling from un-normalized target distributions is a fundamental problem in scientific computing and machine learning. Traditional approaches such as Markov Chain Monte Carlo (MCMC) guarantee asymptotically unbiased samples from such distributions but suffer from computational inefficiency, particularly when dealing with high-dimensional targets, as they require numerous iterations to generate a batch of samples. In this paper, we introduce an efficient and scalable neural implicit sampler that overcomes these limitations. The implicit sampler can generate large batches of samples with low computational costs by leveraging a neural transformation that directly maps easily sampled latent vectors to target samples without the need for iterative procedures. To train the neural implicit samplers, we introduce two novel methods: the KL training method and the Fisher training method. The former method minimizes the Kullback-Leibler divergence, while the latter minimizes the Fisher divergence between the sampler and the target distributions. By employing the two training methods, we effectively optimize the neural implicit samplers to learn and generate from the desired target distribution. To demonstrate the effectiveness, efficiency, and scalability of our proposed samplers, we evaluate them on three sampling benchmarks with different scales. These benchmarks include sampling from 2D targets, Bayesian inference, and sampling from high-dimensional energy-based models (EBMs). Notably, in the experiment involving high-dimensional EBMs, our sampler produces samples that are comparable to those generated by MCMC-based methods while being more than 100 times more efficient, showcasing the efficiency of our neural sampler. Besides the theoretical contributions and strong empirical performances, the proposed neural samplers and corresponding training methods will shed light on further research on developing efficient samplers for various applications beyond the ones explored in this study.", "keywords": "implicit sampler;learning to sample;generative models", "primary_area": "", "supplementary_material": "", "author": "Weijian Luo;Boya Zhang;Zhihua Zhang", "authorids": "~Weijian_Luo1;~Boya_Zhang1;~Zhihua_Zhang1", "gender": ";F;M", "homepage": ";;http://www.math.pku.edu.cn/teachers/zhzhang/", "dblp": ";;52/5331", "google_scholar": ";;", "orcid": ";;", "linkedin": ";%E5%8D%9A%E9%9B%85-%E5%BC%A0-790ab7239/;", "or_profile": "~Weijian_Luo1;~Boya_Zhang1;~Zhihua_Zhang1", "aff": ";Peking University;Peking University", "aff_domain": ";pku.edu.cn;pku.edu.cn", "position": ";PhD student;Full Professor", "bibtex": "@inproceedings{\nluo2023entropybased,\ntitle={Entropy-based Training Methods for Scalable Neural Implicit Samplers},\nauthor={Weijian Luo and Boya Zhang and Zhihua Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lXOoR4KYcJ}\n}", "github": "", "project": "", "reviewers": "wbpF;Dhq1;cgAz;MaqB", "pdf_size": 2904703, "rating": "5;5;6;6", "confidence": "3;4;4;4", "soundness": "2;3;3;2", "novelty": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "92;134;116;38", "wc_strengths": "64;201;83;46", "wc_weaknesses": "98;625;128;58", "wc_questions": "246;4;113;2", "wc_limitations": "1;33;28;2", "wc_review": "501;997;468;146", "wc_reply_reviewers": "72;335;0;10", "wc_reply_authors": "888;1492;223;0", "reply_reviewers": "1;4;0;1", "reply_authors": "3;6;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.0, 36.124783736376884 ], "wc_strengths_avg": [ 98.5, 60.60734278946735 ], "wc_weaknesses_avg": [ 227.25, 230.97984219407545 ], "wc_questions_avg": [ 91.25, 99.99843748779278 ], "wc_limitations_avg": [ 16.0, 14.611639196202457 ], "wc_review_avg": [ 528.0, 304.22606725920116 ], "wc_reply_reviewers_avg": [ 104.25, 136.04847481688282 ], "wc_reply_authors_avg": [ 650.75, 585.327846168282 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 3.0, 1.8708286933869707 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1121211779583957548&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";pku.edu.cn;pku.edu.cn", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "DoReMi: Optimizing Data Mixtures Speeds Up Language Model Pretraining", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70588", "id": "lXuByUeHhd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dcba6be91359358c2355cd920da3fcbd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lXuByUeHhd", "openreview": "https://openreview.net/forum?id=lXuByUeHhd", "poster": "/media/PosterPDFs/NeurIPS%202023/70588.png?t=1701377400.2247705", "slides": "https://nips.cc/virtual/2023/poster/70588", "video": "https://nips.cc/virtual/2023/poster/70588", "author_site": "Sang Michael Xie, Hieu Pham, Xuanyi Dong, Nan Du, Hanxiao Liu, Yifeng Lu, Percy Liang, Quoc V Le, Tengyu Ma, Adams Wei Yu", "tldr": "", "abstract": "The mixture proportions of pretraining data domains (e.g., Wikipedia, books, web text) greatly affect language model (LM) performance. In this paper, we propose Domain Reweighting with Minimax Optimization (DoReMi), which first trains a small proxy model using group distributionally robust optimization (Group DRO) over domains to produce domain weights (mixture proportions) without knowledge of downstream tasks. We then resample a dataset with these domain weights and train a larger, full-sized model. In our experiments, we use DoReMi on a 280M-parameter proxy model to set the domain weights for training an 8B-parameter model (30x larger) more efficiently. On The Pile, DoReMi improves perplexity across all domains, even when it downweights a domain. DoReMi improves average few-shot downstream accuracy by 6.5% points over a baseline model trained using The Pile's default domain weights and reaches the baseline accuracy with 2.6x fewer training steps. On the GLaM dataset, DoReMi, which has no knowledge of downstream tasks, even matches the performance of using domain weights tuned on downstream tasks.", "keywords": "language models;pretraining;domain reweighting;data curation", "primary_area": "", "supplementary_material": "", "author": "Sang Michael Xie;Hieu Pham;Xuanyi Dong;Nan Du;Hanxiao Liu;Yifeng Lu;Percy Liang;Quoc V Le;Tengyu Ma;Adams Wei Yu", "authorids": "~Sang_Michael_Xie1;~Hieu_Pham1;~Xuanyi_Dong1;~Nan_Du1;~Hanxiao_Liu1;~Yifeng_Lu1;~Percy_Liang1;~Quoc_V_Le1;~Tengyu_Ma1;~Adams_Wei_Yu1", "gender": ";M;M;M;M;M;;M;M;M", "homepage": "https://cs.stanford.edu/~eix/;;https://xuanyidong.com/;;https://quark0.github.io/;;https://cs.stanford.edu/~pliang/;;http://ai.stanford.edu/~tengyuma/;https://adamsyu.github.io/", "dblp": "220/3987;;198/1522;;157/6334;69/8051;04/1701;29/6166;54/9061;65/10635", "google_scholar": "EBNa5IEAAAAJ;GpcGdRkAAAAJ;7zp9arUAAAAJ;v474hP4AAAAJ;IMkVH_8AAAAJ;CM4o-cgAAAAJ;pouyVyUAAAAJ;;i38QlUwAAAAJ;-hW6cvgAAAAJ", "orcid": ";;0000-0001-9272-1590;;;;;;;", "linkedin": ";;;dunangatech/;;;;;;", "or_profile": "~Sang_Michael_Xie1;~Hieu_Pham1;~Xuanyi_Dong1;~Nan_Du1;~Hanxiao_Liu1;~Yifeng_Lu1;~Percy_Liang1;~Quoc_V_Le1;~Tengyu_Ma1;~Adams_Wei_Yu1", "aff": "Stanford University;Carnegie Mellon University;Google Brain;Google Brain;Google Brain;Google Deepmind;Stanford University;Google;Facebook AI Research;Google Brain", "aff_domain": "stanford.edu;cmu.edu;google.com;google.com;google.com;google.com;stanford.edu;google.com;fb.com;google.com", "position": "PhD student;PhD student;Researcher;Research Scientist;Research Scientist;Researcher;Associate Professor;Scientist;Visiting Scientist;Research Scientist", "bibtex": "@inproceedings{\nxie2023doremi,\ntitle={DoReMi: Optimizing Data Mixtures Speeds Up Language Model Pretraining},\nauthor={Sang Michael Xie and Hieu Pham and Xuanyi Dong and Nan Du and Hanxiao Liu and Yifeng Lu and Percy Liang and Quoc V Le and Tengyu Ma and Adams Wei Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lXuByUeHhd}\n}", "github": "", "project": "", "reviewers": "ajJF;5e3R;MeFt;uiyj;VPSZ", "pdf_size": 7172522, "rating": "6;7;7;8;8", "confidence": "3;5;4;4;5", "soundness": "3;4;2;4;3", "novelty": "3;4;3;4;4", "presentation": "3;4;4;4;4", "wc_summary": "54;92;146;131;110", "wc_strengths": "134;77;187;88;144", "wc_weaknesses": "385;221;143;40;251", "wc_questions": "38;180;96;52;168", "wc_limitations": "6;1;35;1;19", "wc_review": "617;571;607;312;692", "wc_reply_reviewers": "69;14;10;9;36", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.2, 0.7483314773547882 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.6, 0.4898979485566356 ], "presentation_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_avg": [ 106.6, 32.05994385522221 ], "wc_strengths_avg": [ 126.0, 39.88483421051164 ], "wc_weaknesses_avg": [ 208.0, 114.71355630438802 ], "wc_questions_avg": [ 106.8, 58.23538443249087 ], "wc_limitations_avg": [ 12.4, 13.078226179417452 ], "wc_review_avg": [ 559.8, 130.0052306640006 ], "wc_reply_reviewers_avg": [ 27.6, 22.913751329714653 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.6428571428571428, "gs_citation": 163, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12833954223004559003&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "stanford.edu;cmu.edu;google.com;google.com;google.com;google.com;stanford.edu;google.com;fb.com;google.com", "author_num": 10, "aff_unique_index": "0;1;2;2;2;3;0;2;4;2", "aff_unique_norm": "Stanford University;Carnegie Mellon University;Google;DeepMind;Meta", "aff_unique_dep": ";;Google Brain;DeepMind;Facebook AI Research", "aff_unique_url": "https://www.stanford.edu;https://www.cmu.edu;https://brain.google.com;https://deepmind.com;https://research.facebook.com", "aff_unique_abbr": "Stanford;CMU;Google Brain;DeepMind;FAIR", "aff_campus_unique_index": "0;2;2;2;0;2;2", "aff_campus_unique": "Stanford;;Mountain View", "aff_country_unique_index": "0;0;0;0;0;1;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Lift Yourself Up: Retrieval-augmented Text Generation with Self-Memory", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70587", "id": "lYNSvp51a7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/887262aeb3eafb01ef0fd0e3a87a8831-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lYNSvp51a7", "openreview": "https://openreview.net/forum?id=lYNSvp51a7", "poster": "/media/PosterPDFs/NeurIPS%202023/70587.png?t=1702142720.6989512", "slides": "https://nips.cc/virtual/2023/poster/70587", "video": "https://nips.cc/virtual/2023/poster/70587", "author_site": "Xin Cheng, Di Luo, Xiuying Chen, Lemao Liu, Dongyan Zhao, Rui Yan", "tldr": "", "abstract": "With direct access to human-written reference as memory, retrieval-augmented generation has achieved much progress in a wide range of text generation tasks. Since better memory would typically prompt better generation (we define this as primal problem). The traditional approach for memory retrieval involves selecting memory that exhibits the highest similarity to the input. However, this method is constrained by the quality of the fixed corpus from which memory is retrieved. In this paper, by exploring the duality of the primal problem: better generation also prompts better memory, we propose a novel framework, selfmem, which addresses this limitation by iteratively employing a retrieval-augmented generator to create an unbounded memory pool and using a memory selector to choose one output as memory for the subsequent generation round. This enables the model to leverage its own output, referred to as self-memory, for improved generation. We evaluate the effectiveness of selfmem on three distinct text generation tasks: neural machine translation, abstractive text summarization, and dialogue generation, under two generation paradigms: fine-tuned small model and few-shot LLM. Our approach achieves state-of-the-art results in four directions in JRC-Acquis translation dataset, 50.3 ROUGE-1 in XSum, and 62.9 ROUGE-1 in BigPatent, demonstrating the potential of self-memory in enhancing retrieval-augmented generation models. Furthermore, we conduct thorough analyses of each component in the selfmem framework to identify current system bottlenecks and provide insights for future research.", "keywords": "natural language processing;retrieval-augmented text generation;self memory", "primary_area": "", "supplementary_material": "", "author": "Xin Cheng;Di Luo;Xiuying Chen;Lemao Liu;Dongyan Zhao;Rui Yan", "authorids": "~Xin_Cheng2;~Di_Luo2;~Xiuying_Chen1;~Lemao_Liu3;~Dongyan_Zhao2;~Rui_Yan2", "gender": ";;F;;M;M", "homepage": ";;https://iriscxy.github.io/;;https://www.wict.pku.edu.cn/zhaodongyan/en/;https://gsai.ruc.edu.cn/english/ruiyan", "dblp": ";;33/11343.html;;63/1870;19/2405-1", "google_scholar": ";;COUnAF4AAAAJ;;lhR8-68AAAAJ;eLw6g-UAAAAJ", "orcid": ";;;;;0000-0002-3356-6823", "linkedin": ";;;;;", "or_profile": "~Xin_Cheng2;~Di_Luo2;~Xiuying_Chen1;~Lemao_Liu3;~Dongyan_Zhao2;~Rui_Yan2", "aff": ";;King Abdullah University of Science and Technology;;Peking University;Renmin University of China", "aff_domain": ";;kaust.edu.sa;;pku.edu.cn;ruc.edu.cn", "position": ";;PhD student;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ncheng2023lift,\ntitle={Lift Yourself Up: Retrieval-augmented Text Generation with Self-Memory},\nauthor={Xin Cheng and Di Luo and Xiuying Chen and Lemao Liu and Dongyan Zhao and Rui Yan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lYNSvp51a7}\n}", "github": "", "project": "", "reviewers": "Mggy;dohg;Z8iX;Ysdi;LVYL", "pdf_size": 675591, "rating": "5;5;5;6;6", "confidence": "4;4;4;3;3", "soundness": "3;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;2;2;3;2", "wc_summary": "78;83;123;76;62", "wc_strengths": "48;123;62;56;43", "wc_weaknesses": "90;129;285;124;154", "wc_questions": "33;141;56;49;94", "wc_limitations": "30;7;9;4;1", "wc_review": "279;483;535;309;354", "wc_reply_reviewers": "4;217;0;26;0", "wc_reply_authors": "41;423;68;68;68", "reply_reviewers": "1;2;0;1;0", "reply_authors": "2;3;2;2;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 84.4, 20.51925924588897 ], "wc_strengths_avg": [ 66.4, 29.042038495945842 ], "wc_weaknesses_avg": [ 156.4, 67.45843164497674 ], "wc_questions_avg": [ 74.6, 38.774218238411976 ], "wc_limitations_avg": [ 10.2, 10.264501936285072 ], "wc_review_avg": [ 392.0, 99.83185864241935 ], "wc_reply_reviewers_avg": [ 49.4, 84.35543847316544 ], "wc_reply_authors_avg": [ 133.6, 145.07735867460502 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 107, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17034340209508969759&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": ";;kaust.edu.sa;;pku.edu.cn;ruc.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "King Abdullah University of Science and Technology;Peking University;Renmin University of China", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kast.kau.edu.sa;http://www.pku.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "KAUST;Peking U;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Saudi Arabia;China" }, { "title": "A Single 2D Pose with Context is Worth Hundreds for 3D Human Pose Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70586", "id": "lclQ2RvWYu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5752f9fd2d5c40174738d6f02c202e72-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lclQ2RvWYu", "openreview": "https://openreview.net/forum?id=lclQ2RvWYu", "poster": "/media/PosterPDFs/NeurIPS%202023/70586.png?t=1701897607.685142", "slides": "https://nips.cc/virtual/2023/poster/70586", "video": "https://nips.cc/virtual/2023/poster/70586", "author_site": "Qitao Zhao, Ce Zheng, Ce Zheng, Mengyuan Liu, Chen Chen", "tldr": "", "abstract": "The dominant paradigm in 3D human pose estimation that lifts a 2D pose sequence to 3D heavily relies on long-term temporal clues (i.e., using a daunting number of video frames) for improved accuracy, which incurs performance saturation, intractable computation and the non-causal problem. This can be attributed to their inherent inability to perceive spatial context as plain 2D joint coordinates carry no visual cues. To address this issue, we propose a straightforward yet powerful solution: leveraging the $\\textit{readily available}$ intermediate visual representations produced by off-the-shelf (pre-trained) 2D pose detectors -- no finetuning on the 3D task is even needed. The key observation is that, while the pose detector learns to localize 2D joints, such representations (e.g., feature maps) implicitly encode the joint-centric spatial context thanks to the regional operations in backbone networks. We design a simple baseline named $\\textbf{Context-Aware PoseFormer}$ to showcase its effectiveness. $\\textit{Without access to any temporal information}$, the proposed method significantly outperforms its context-agnostic counterpart, PoseFormer, and other state-of-the-art methods using up to $\\textit{hundreds of}$ video frames regarding both speed and precision. $\\textit{Project page:}$ https://qitaozhao.github.io/ContextAware-PoseFormer", "keywords": "Human Pose Estimation; 2D-to-3D Lifting; Context-Aware", "primary_area": "", "supplementary_material": "/attachment/176c359fc4e3edfb7a0fd62c3405912eed163764.zip", "author": "Qitao Zhao;Ce Zheng;Mengyuan Liu;Chen Chen", "authorids": "~Qitao_Zhao1;~Ce_Zheng3;~Mengyuan_Liu2;~Chen_Chen18", "gender": "M;M;;M", "homepage": "https://qitaozhao.github.io;;https://www.semanticscholar.org/author/Mengyuan-Liu/47842072;https://www.crcv.ucf.edu/chenchen/", "dblp": ";;;65/4423-1", "google_scholar": "r9nmsasAAAAJ;YFKLC58AAAAJ;woX_4AcAAAAJ;TuEwcZ0AAAAJ", "orcid": ";0000-0002-9033-0622;0000-0002-6332-8316;0000-0003-3957-7061", "linkedin": ";;;dennychen/", "or_profile": "~Qitao_Zhao1;~Ce_Zheng3;~Mengyuan_Liu2;~Chen_Chen18", "aff": "Shandong University;University of Central Florida;SUN YAT-SEN UNIVERSITY;University of Central Florida", "aff_domain": "sdu.edu.cn;ucf.edu;sysu.edu.cn;ucf.edu", "position": "Undergrad student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhao2023a,\ntitle={A Single 2D Pose with Context is Worth Hundreds for 3D Human Pose Estimation},\nauthor={Qitao Zhao and Ce Zheng and Mengyuan Liu and Chen Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lclQ2RvWYu}\n}", "github": "", "project": "", "reviewers": "jUmb;4XvM;THhA;dcLQ;ez57", "pdf_size": 5110321, "rating": "4;5;5;7;7", "confidence": "5;4;4;5;5", "soundness": "2;3;3;4;4", "novelty": "2;3;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "48;47;52;110;61", "wc_strengths": "64;44;54;51;60", "wc_weaknesses": "283;103;178;124;10", "wc_questions": "2;8;4;41;33", "wc_limitations": "1;36;4;31;10", "wc_review": "398;238;292;357;174", "wc_reply_reviewers": "201;8;0;50;11", "wc_reply_authors": "1179;19;31;38;27", "reply_reviewers": "1;1;0;1;1", "reply_authors": "3;2;2;2;2", "rating_avg": [ 5.6, 1.2 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 63.6, 23.72003372678884 ], "wc_strengths_avg": [ 54.6, 6.974238309665077 ], "wc_weaknesses_avg": [ 139.6, 89.90795292964911 ], "wc_questions_avg": [ 17.6, 16.156732342896568 ], "wc_limitations_avg": [ 16.4, 14.347125147568763 ], "wc_review_avg": [ 291.8, 80.37512052868101 ], "wc_reply_reviewers_avg": [ 54.0, 75.50629112862053 ], "wc_reply_authors_avg": [ 258.8, 460.14102186177666 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14041823852222185051&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "sdu.edu.cn;ucf.edu;sysu.edu.cn;ucf.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Shandong University;University of Central Florida;Sun Yat-sen University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.sdu.edu.cn;https://www.ucf.edu;http://www.sysu.edu.cn", "aff_unique_abbr": "SDU;UCF;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;United States" }, { "title": "A Tale of Two Features: Stable Diffusion Complements DINO for Zero-Shot Semantic Correspondence", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70585", "id": "lds9D17HRd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8e9bdc23f169a05ea9b72ccef4574551-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lds9D17HRd", "openreview": "https://openreview.net/forum?id=lds9D17HRd", "poster": "/media/PosterPDFs/NeurIPS%202023/70585.png?t=1702233082.3567994", "slides": "https://nips.cc/virtual/2023/poster/70585", "video": "https://nips.cc/virtual/2023/poster/70585", "author_site": "Junyi Zhang, Charles Herrmann, Junhwa Hur, Luisa Polania Cabrera, Varun Jampani, Deqing Sun, Ming-Hsuan Yang", "tldr": "", "abstract": "Text-to-image diffusion models have made significant advances in generating and editing high-quality images. As a result, numerous approaches have explored the ability of diffusion model features to understand and process single images for downstream tasks, e.g., classification, semantic segmentation, and stylization. However, significantly less is known about what these features reveal across multiple, different images and objects. In this work, we exploit Stable Diffusion (SD) features for semantic and dense correspondence and discover that with simple post-processing, SD features can perform quantitatively similar to SOTA representations. Interestingly, the qualitative analysis reveals that SD features have very different properties compared to existing representation learning features, such as the recently released DINOv2: while DINOv2 provides sparse but accurate matches, SD features provide high-quality spatial information but sometimes inaccurate semantic matches. We demonstrate that a simple fusion of these two features works surprisingly well, and a zero-shot evaluation using nearest neighbors on these fused features provides a significant performance gain over state-of-the-art methods on benchmark datasets, e.g., SPair-71k, PF-Pascal, and TSS. We also show that these correspondences can enable interesting applications such as instance swapping in two images. Project page: https://sd-complements-dino.github.io/.", "keywords": "Semantic Correspondence;Diffusion Models;Vision Transformer;Representation", "primary_area": "", "supplementary_material": "/attachment/c9f71ae0ee4e5a9256d1dcf2ac448f77d30ecbff.pdf", "author": "Junyi Zhang;Charles Herrmann;Junhwa Hur;Luisa Polania Cabrera;Varun Jampani;Deqing Sun;Ming-Hsuan Yang", "authorids": "~Junyi_Zhang3;~Charles_Herrmann1;~Junhwa_Hur1;~Luisa_Polania_Cabrera1;~Varun_Jampani2;~Deqing_Sun2;~Ming-Hsuan_Yang1", "gender": "M;Unspecified;M;;M;M;M", "homepage": "https://www.junyi42.com/;https://scholar.google.com/citations?user=LQvi5XAAAAAJ&hl=en&oi=ao;https://hurjunhwa.github.io/;;https://deqings.github.io/;https://faculty.ucmerced.edu/mhyang/;https://varunjampani.github.io/", "dblp": "00/1627-4;26/11300;135/9099;42/8759.html;69/4250;79/3711.html;124/2785", "google_scholar": "LTi1tYsAAAAJ;LQvi5XAAAAAJ;z4dNJdkAAAAJ;HGLobX4AAAAJ;t4rgICIAAAAJ;p9-ohHsAAAAJ;1Cv6Sf4AAAAJ", "orcid": "0000-0002-9291-3098;;;;;0000-0003-4848-2304;", "linkedin": ";;;;;minghsuanyang/;", "or_profile": "~Junyi_Zhang3;~Charles_Herrmann1;~Junhwa_Hur1;~Luisa_Polania_Cabrera1;~Deqing_Sun2;~Ming-Hsuan_Yang1;~Varun_Jampani1", "aff": "University of California, Merced;Google;Google;Google DeepMind;Google DeepMind;University of California at Merced;Google Research", "aff_domain": "ucmerced.edu;google.com;google.com;google.com;google.com;umcerced.edu;google.com", "position": "Visiting Student;Researcher;Researcher;Researcher;Research Scientist;Professor;Researcher", "bibtex": "@inproceedings{\nzhang2023a,\ntitle={A Tale of Two Features: Stable Diffusion Complements {DINO} for Zero-Shot Semantic Correspondence},\nauthor={Junyi Zhang and Charles Herrmann and Junhwa Hur and Luisa Polania Cabrera and Varun Jampani and Deqing Sun and Ming-Hsuan Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lds9D17HRd}\n}", "github": "", "project": "", "reviewers": "fh35;fsqe;YopU;NMVR;Pbqb", "pdf_size": 3727854, "rating": "6;6;7;7;7", "confidence": "3;2;4;5;5", "soundness": "1;3;3;3;4", "novelty": "2;3;3;3;3", "presentation": "2;3;4;3;4", "wc_summary": "125;69;83;57;78", "wc_strengths": "86;87;159;28;58", "wc_weaknesses": "161;142;30;291;166", "wc_questions": "43;80;112;46;107", "wc_limitations": "9;34;8;4;1", "wc_review": "424;412;392;426;410", "wc_reply_reviewers": "23;0;17;18;18", "wc_reply_authors": "118;0;15;12;12", "reply_reviewers": "1;0;1;1;1", "reply_authors": "3;1;2;2;2", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 1.16619037896906 ], "soundness_avg": [ 2.8, 0.9797958971132712 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 82.4, 23.06165648863932 ], "wc_strengths_avg": [ 83.6, 43.47229002479626 ], "wc_weaknesses_avg": [ 158.0, 82.94817659237603 ], "wc_questions_avg": [ 77.6, 29.15201536772372 ], "wc_limitations_avg": [ 11.2, 11.754148203932091 ], "wc_review_avg": [ 412.8, 12.172099243762352 ], "wc_reply_reviewers_avg": [ 15.2, 7.88416133777081 ], "wc_reply_authors_avg": [ 31.4, 43.605504239717256 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.910182054618206, "gs_citation": 165, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4088751988372467068&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ucmerced.edu;google.com;google.com;google.com;google.com;umcerced.edu;google.com", "author_num": 7, "aff_unique_index": "0;1;1;1;1;0;1", "aff_unique_norm": "University of California, Merced;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ucmerced.edu;https://www.google.com", "aff_unique_abbr": "UC Merced;Google", "aff_campus_unique_index": "0;1;1;0;1", "aff_campus_unique": "Merced;Mountain View;", "aff_country_unique_index": "0;0;0;1;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "ldulVsMDDk", "title": "Towards a Better Theoretical Understanding of Independent Subnetwork Training", "track": "main", "status": "Reject", "tldr": "", "abstract": "Modern advancements in large-scale machine learning would be impossible without the paradigm of data-parallel distributed computing. Since distributed computing with large-scale models imparts excessive pressure on communication channels, a lot of recent research was directed towards co-designing communication compression strategies and training algorithms with the goal of reducing communication costs. While pure data parallelism allows better data scaling, it suffers from poor model scaling properties. Indeed, compute nodes are severely limited by memory constraints, preventing further increases in model size. For this reason, the latest achievements in training giant neural network models rely on some form of model parallelism as well. In this work, we take a closer theoretical look at Independent Subnetwork Training (IST), which is a recently proposed and highly effective technique for solving the aforementioned problems. We identify fundamental differences between IST and alternative approaches, such as distributed methods with compressed communication, and provide a precise analysis of its optimization performance on a quadratic model.", "keywords": "Optimization;Distributed Training;Federated Learning;Independent Subnetwork Training", "primary_area": "", "supplementary_material": "/attachment/795e827c68db7e143de2be641487878ccd81701f.pdf", "author": "Egor Shulgin;Peter Richt\u00e1rik", "authorids": "~Egor_Shulgin1;~Peter_Richt\u00e1rik1", "gender": ";M", "homepage": "https://shulgin-egor.github.io/;https://richtarik.org", "dblp": "234/8612;62/8001", "google_scholar": "cND99UYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-6500-7746;0000-0003-4380-5848", "linkedin": "egor-shulgin-a34373127/;richtarik/", "or_profile": "~Egor_Shulgin1;~Peter_Richtarik1", "aff": "KAUST;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "kaust.edu.sa;kaust.edu.sa", "position": "PhD student;Full Professor", "bibtex": "@misc{\nshulgin2023towards,\ntitle={Towards a Better Theoretical Understanding of Independent Subnetwork Training},\nauthor={Egor Shulgin and Peter Richt{\\'a}rik},\nyear={2023},\nurl={https://openreview.net/forum?id=ldulVsMDDk}\n}", "github": "", "project": "", "reviewers": "wecA;87cd;ijy7;LM8J", "site": "https://openreview.net/forum?id=ldulVsMDDk", "pdf_size": 382659, "rating": "4;5;6;7", "confidence": "3;3;2;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "82;40;53;82", "wc_strengths": "13;99;26;66", "wc_weaknesses": "150;233;56;158", "wc_questions": "114;191;57;39", "wc_limitations": "8;64;15;42", "wc_review": "367;627;207;387", "wc_reply_reviewers": "32;70;0;38", "wc_reply_authors": "18;0;0;1", "reply_reviewers": "2;1;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.25, 18.335416548308903 ], "wc_strengths_avg": [ 51.0, 33.904277016329374 ], "wc_weaknesses_avg": [ 149.25, 62.822667087604614 ], "wc_questions_avg": [ 100.25, 59.259492910419006 ], "wc_limitations_avg": [ 32.25, 22.29770167528483 ], "wc_review_avg": [ 397.0, 150.0 ], "wc_reply_reviewers_avg": [ 35.0, 24.839484696748443 ], "wc_reply_authors_avg": [ 4.75, 7.660776723022281 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3162277660168379, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17068243367403856419&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "aff_unique_index": "0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaust.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Saudi Arabia" }, { "title": "Toward Re-Identifying Any Animal", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70584", "id": "leS8668NJm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7df69dbf39705c7a39b40f2d70e806c1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=leS8668NJm", "openreview": "https://openreview.net/forum?id=leS8668NJm", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70584", "video": "https://nips.cc/virtual/2023/poster/70584", "author_site": "Bingliang Jiao, Lingqiao Liu, Liying Gao, Ruiqi Wu, Guosheng Lin, PENG WANG, Yanning Zhang", "tldr": "", "abstract": "The current state of re-identification (ReID) models poses limitations to their applicability in the open world, as they are primarily designed and trained for specific categories like person or vehicle. In light of the importance of ReID technology for tracking wildlife populations and migration patterns, we propose a new task called ``Re-identify Any Animal in the Wild'' (ReID-AW). This task aims to develop a ReID model capable of handling any unseen wildlife category it encounters. To address this challenge, we have created a comprehensive dataset called Wildlife-71, which includes ReID data from 71 different wildlife categories. This dataset is the first of its kind to encompass multiple object categories in the realm of ReID. Furthermore, we have developed a universal re-identification model named UniReID specifically for the ReID-AW task. To enhance the model's adaptability to the target category, we employ a dynamic prompting mechanism using category-specific visual prompts. These prompts are generated based on knowledge gained from a set of pre-selected images within the target category. Additionally, we leverage explicit semantic knowledge derived from the large-scale pre-trained language model, GPT-4. This allows UniReID to focus on regions that are particularly useful for distinguishing individuals within the target category. Extensive experiments have demonstrated the remarkable generalization capability of our UniReID model. It showcases promising performance in handling arbitrary wildlife categories, offering significant advancements in the field of ReID for wildlife conservation and research purposes.", "keywords": "Re-identification;Category-generalizable", "primary_area": "", "supplementary_material": "/attachment/a7febcc068e8001dd52aac14aa8aa1c27c617dfc.pdf", "author": "Bingliang Jiao;Lingqiao Liu;Liying Gao;Ruiqi Wu;Guosheng Lin;PENG WANG;Yanning Zhang", "authorids": "~Bingliang_Jiao1;~Lingqiao_Liu3;~Liying_Gao1;~Ruiqi_Wu2;~Guosheng_Lin2;~PENG_WANG15;~Yanning_Zhang1", "gender": "M;M;F;M;M;M;F", "homepage": ";https://sites.google.com/site/lingqiaoliu83/;;;https://guosheng.github.io/;https://wangpengnorman.github.io/;http://teacher.nwpu.edu.cn/ynzhang", "dblp": ";45/7776;;;126/4778;95/4442-15.html;14/6655", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;Y2xu62UAAAAJ;https://scholar.google.com/citations?hl=zh-CN;cRmHbWkAAAAJ;https://scholar.google.com.au/citations?user=ZudEhvcAAAAJ;https://scholar.google.com.au/citations?user=aPLp7pAAAAAJ;", "orcid": ";;0000-0003-4204-2092;0009-0003-5171-4548;0000-0002-0329-7458;0000-0001-7689-3405;", "linkedin": ";;;ruiqi-wu-832331220;;;", "or_profile": "~Bingliang_Jiao1;~Lingqiao_Liu3;~Liying_Gao1;~Ruiqi_Wu2;~Guosheng_Lin2;~PENG_WANG15;~Yanning_Zhang1", "aff": "Northwest Polytechnical University Xi'an;The University of Adelaide;Northwestern Polytechnical University;Northwestern Polytechnical University, Northwest Polytechnical University Xi'an;Nanyang Technological University;Northwestern Polytechnical University;Northwestern Polytechnical University", "aff_domain": "nwpu.edu.cn;adelaide.edu.au;nwpu.edu.cn;mai.nwpu.edu.cn;ntu.edu.sg;nwpu.edu.cn;nwpu.edu.cn", "position": "PhD student;Assistant Professor;PhD student;MS student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\njiao2023toward,\ntitle={Toward Re-Identifying Any Animal},\nauthor={Bingliang Jiao and Lingqiao Liu and Liying Gao and Ruiqi Wu and Guosheng Lin and PENG WANG and Yanning Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=leS8668NJm}\n}", "github": "", "project": "", "reviewers": "ABAL;LLxx;5HdD;9xKB", "pdf_size": 1007914, "rating": "2;4;5;7", "confidence": "5;5;4;5", "soundness": "2;3;4;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "97;72;122;76", "wc_strengths": "83;57;46;76", "wc_weaknesses": "639;311;81;42", "wc_questions": "89;76;204;55", "wc_limitations": "1;31;20;2", "wc_review": "909;547;473;251", "wc_reply_reviewers": "1320;228;153;0", "wc_reply_authors": "1034;909;40;0", "reply_reviewers": "9;1;1;0", "reply_authors": "8;5;2;1", "rating_avg": [ 4.5, 1.8027756377319946 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 91.75, 19.879323429131084 ], "wc_strengths_avg": [ 65.5, 14.739402972983676 ], "wc_weaknesses_avg": [ 268.25, 237.45249524904978 ], "wc_questions_avg": [ 106.0, 57.86622503671723 ], "wc_limitations_avg": [ 13.5, 12.619429464123963 ], "wc_review_avg": [ 545.0, 236.70656940608978 ], "wc_reply_reviewers_avg": [ 425.25, 523.0780892945144 ], "wc_reply_authors_avg": [ 495.75, 478.0075182463138 ], "reply_reviewers_avg": [ 2.75, 3.6314597615834874 ], "reply_authors_avg": [ 4.0, 2.7386127875258306 ], "replies_avg": [ 49, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.16012815380508713, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4529526587798102684&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "nwpu.edu.cn;adelaide.edu.au;nwpu.edu.cn;mai.nwpu.edu.cn;ntu.edu.sg;nwpu.edu.cn;nwpu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;2;3;2;2", "aff_unique_norm": "Northwest Polytechnical University;University of Adelaide;Northwestern Polytechnical University;Nanyang Technological University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.nwpu.edu.cn;https://www.adelaide.edu.au;https://www.nwpu.edu.cn;https://www.ntu.edu.sg", "aff_unique_abbr": "NWPU;Adelaide;NWPU;NTU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Xi'an;", "aff_country_unique_index": "0;1;0;0;2;0;0", "aff_country_unique": "China;Australia;Singapore" }, { "title": "CMMA: Benchmarking Multi-Affection Detection in Chinese Multi-Modal Conversations", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73481", "id": "lh2f1AD4ax", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3be60b4a739b95a07a944a1a2c41e05e-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=lh2f1AD4ax", "openreview": "https://openreview.net/forum?id=lh2f1AD4ax", "poster": "/media/PosterPDFs/NeurIPS%202023/73481.png?t=1697812600.8588452", "slides": "https://nips.cc/virtual/2023/poster/73481", "video": "https://nips.cc/virtual/2023/poster/73481", "author_site": "Yazhou Zhang, Yang Yu, Qing Guo, Benyou Wang, Dongming Zhao, Sagar Uprety, Dawei Song, Qiuchi Li, Jing Qin", "tldr": "", "abstract": "Human communication has a multi-modal and multi-affection nature. The inter-relatedness of different emotions and sentiments poses a challenge to jointly detect multiple human affections with multi-modal clues. Recent advances in this field employed multi-task learning paradigms to render the inter-relatedness across tasks, but the scarcity of publicly available resources sets a limit to the potential of works. To fill this gap, we build the first Chinese Multi-modal Multi-Affection conversation (CMMA) dataset, which contains 3,000 multi-party conversations and 21,795 multi-modal utterances collected from various styles of TV-series. CMMA contains a wide variety of affection labels, including sentiment, emotion, sarcasm and humor, as well as the novel inter-correlations values between certain pairs of tasks. Moreover, it provides the topic and speaker information in conversations, which promotes better modeling of conversational context. On the dataset, we empirically analyze the influence of different data modalities and conversational contexts on different affection analysis tasks, and exhibit the practical benefit of inter-task correlations. The full dataset will be publicly available for research\\footnote{https://github.com/annoymity2022/Chinese-Dataset}", "keywords": "multi-modal affection detection;conversation dataset;multi-task learning;deep learning;multi-modal fusion", "primary_area": "", "supplementary_material": "/attachment/bf16651e0a6b0cd12b188ae826801046741a8ba9.pdf", "author": "Yazhou Zhang;Yang Yu;Qing Guo;Benyou Wang;Dongming Zhao;Sagar Uprety;Dawei Song;Qiuchi Li;Jing Qin", "authorids": "~Yazhou_Zhang1;~Yang_Yu19;~Qing_Guo3;~Benyou_Wang2;~Dongming_Zhao1;~Sagar_Uprety1;~Dawei_Song1;~Qiuchi_Li1;~Jing_Qin3", "gender": "M;M;M;M;M;M;M;M;M", "homepage": "https://yzzhang2008.github.io/;https://outlook.live.com/mail/0/inbox/id/AQQkADAwATNiZmYAZC0zYzIzLTQ4OABmLTAwAi0wMAoAEACVQJJq8MBYTYzK1sCGeERN;https://tsingqguo.github.io;https://wabyking.github.io/old.html;https://xueshu.baidu.com/scholarID/CN-BH749U3J;http://sagaruprety.com;;;https://harry-qinjing.github.io/", "dblp": ";;25/3038-5;169/1793;;220/3397.html;47/6784-1.html;166/3079;00/1015-1", "google_scholar": "https://scholar.google.com/citations?hl=en;;Rj2x4QUAAAAJ;Jk4vJU8AAAAJ;;kz6M6T0AAAAJ;https://scholar.google.com/citations?hl=en;;X3Wi7wkAAAAJ", "orcid": "0000-0002-5699-0176;;0000-0003-0974-9299;0000-0002-1501-9914;0000-0003-4592-9545;0000-0001-7858-6265;;;0000-0002-7059-0929", "linkedin": ";;;;;upretysagar/;;;", "or_profile": "~Yazhou_Zhang1;~Yang_Yu19;~Qing_Guo3;~Benyou_Wang2;~Dongming_Zhao1;~Sagar_Uprety1;~Dawei_Song1;~Qiuchi_Li1;~Jing_Qin3", "aff": "Hong Kong Polytechnic University;Zhengzhou University of Light Industry; Agency for Science, Technology and Research (A*STAR));The Chinese University of Hong Kong, Shenzhen;Tianjin University;Bravura Solutions;Open University;Copenhagen University;Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;zzuli.edu;cfar.a-star.edu.sg;cuhk.edu.cn;tju.edu;bravurasolutions.com;open.ac.uk;ku.dk;polyu.edu.hk", "position": "Postdoc;MS student;Researcher;Assistant Professor;Researcher;Researcher;Full Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2023cmma,\ntitle={{CMMA}: Benchmarking Multi-Affection Detection in Chinese Multi-Modal Conversations},\nauthor={Yazhou Zhang and Yang Yu and Qing Guo and Benyou Wang and Dongming Zhao and Sagar Uprety and Dawei Song and Qiuchi Li and Jing Qin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=lh2f1AD4ax}\n}", "github": "", "project": "", "reviewers": "LR2c;GqNB;hf9n;4J35", "pdf_size": 1509213, "rating": "6;7;8;9", "confidence": "4;5;4;4", "wc_summary_and_contributions": "59;69;64;159", "wc_strengths": "64;7;70;76", "wc_improvement": "136;8;86;57", "wc_limitations": "29;5;41;139", "wc_correctness": "64;4;17;20", "wc_clarity": "12;7;8;23", "wc_relation_to_prior_work": "17;17;17;27", "wc_documentation": "39;4;83;91", "wc_additional_feedback": "1;1;1;1", "wc_review": "421;122;387;593", "wc_reply_reviewers": "0;0;23;31", "wc_reply_authors": "683;163;737;569", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 87.75, 41.287861412284364 ], "wc_strengths_avg": [ 54.25, 27.60774347895894 ], "wc_improvement_avg": [ 71.75, 46.40245144386232 ], "wc_limitations_avg": [ 53.5, 51.03675146401855 ], "wc_correctness_avg": [ 26.25, 22.609455986378798 ], "wc_clarity_avg": [ 12.5, 6.34428877022476 ], "wc_relation_to_prior_work_avg": [ 19.5, 4.330127018922194 ], "wc_documentation_avg": [ 54.25, 35.12388788275011 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 380.75, 168.56805005694287 ], "wc_reply_reviewers_avg": [ 13.5, 13.793114224133722 ], "wc_reply_authors_avg": [ 538.0, 224.8399430706208 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4297256325933629438&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "polyu.edu.hk;zzuli.edu;cfar.a-star.edu.sg;cuhk.edu.cn;tju.edu;bravurasolutions.com;open.ac.uk;ku.dk;polyu.edu.hk", "author_num": 9, "aff_unique_index": "0;1;2;3;4;5;6;7;0", "aff_unique_norm": "Hong Kong Polytechnic University;Zhengzhou University of Light Industry;Agency for Science, Technology and Research;Chinese University of Hong Kong;Tianjin University;Bravura Solutions;Open University;University of Copenhagen", "aff_unique_dep": ";;;;;;;", "aff_unique_url": "https://www.polyu.edu.hk;http://www.zuli.edu.cn/;https://www.a-star.edu.sg;https://www.cuhk.edu.cn;http://www.tju.edu.cn;https://www.bravurasolutions.com;https://www.open.ac.uk;https://www.ku.dk", "aff_unique_abbr": "PolyU;;A*STAR;CUHK;TJU;;OU;UCPH", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Hong Kong SAR;;Shenzhen", "aff_country_unique_index": "0;0;1;0;0;2;3;4;0", "aff_country_unique": "China;Singapore;Australia;United Kingdom;Denmark" }, { "title": "Transformers as Statisticians: Provable In-Context Learning with In-Context Algorithm Selection", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70583", "id": "liMSqUuVg9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b2e63e36c57e153b9015fece2352a9f9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=liMSqUuVg9", "openreview": "https://openreview.net/forum?id=liMSqUuVg9", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70583", "video": "https://nips.cc/virtual/2023/poster/70583", "author_site": "Yu Bai, Fan Chen, Huan Wang, Caiming Xiong, Song Mei", "tldr": "", "abstract": "Neural sequence models based on the transformer architecture have demonstrated remarkable \\emph{in-context learning} (ICL) abilities, where they can perform new tasks when prompted with training and test examples, without any parameter update to the model. This work first provides a comprehensive statistical theory for transformers to perform ICL. Concretely, we show that transformers can implement a broad class of standard machine learning algorithms in context, such as least squares, ridge regression, Lasso, learning generalized linear models, and gradient descent on two-layer neural networks, with near-optimal predictive power on various in-context data distributions. Using an efficient implementation of in-context gradient descent as the underlying mechanism, our transformer constructions admit mild size bounds, and can be learned with polynomially many pretraining sequences.\n \nBuilding on these ``base'' ICL algorithms, intriguingly, we show that transformers can implement more complex ICL procedures involving \\emph{in-context algorithm selection}, akin to what a statistician can do in real life---A \\emph{single} transformer can adaptively select different base ICL algorithms---or even perform qualitatively different tasks---on different input sequences, without any explicit prompting of the right algorithm or task. We both establish this in theory by explicit constructions, and also observe this phenomenon experimentally. In theory, we construct two general mechanisms for algorithm selection with concrete examples: pre-ICL testing, and post-ICL validation. As an example, we use the post-ICL validation mechanism to construct a transformer that can perform nearly Bayes-optimal ICL on a challenging task---noisy linear models with mixed noise levels. Experimentally, we demonstrate the strong in-context algorithm selection capabilities of standard transformer architectures.", "keywords": "in-context learning;transformers;deep learning theory;learning theory", "primary_area": "", "supplementary_material": "", "author": "Yu Bai;Fan Chen;Huan Wang;Caiming Xiong;Song Mei", "authorids": "~Yu_Bai1;~Fan_Chen4;~Huan_Wang1;~Caiming_Xiong1;~Song_Mei1", "gender": ";M;M;M;M", "homepage": "https://yubai.org;https://sites.google.com/view/chen-fan;http://www.cs.yale.edu/homes/wang-huan/;http://cmxiong.com/;https://www.stat.berkeley.edu/~songmei/", "dblp": "03/6325-17.html;;70/6155-16.html;80/7282;https://dblp.org/pers/hd/m/Mei:Song", "google_scholar": "owqhKD8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;7NpTttkAAAAJ;vaSdahkAAAAJ;https://scholar.google.com.hk/citations?hl=en", "orcid": ";;;;", "linkedin": ";;huanwangyale/;caiming-xiong-150a1417;", "or_profile": "~Yu_Bai1;~Fan_Chen4;~Huan_Wang1;~Caiming_Xiong1;~Song_Mei1", "aff": "Salesforce Research;Peking University;Salesforce.com;Salesforce Research;University of California, Berkeley", "aff_domain": "salesforce.com;pku.edu.cn;salesforce.com;salesforce.com;berkeley.edu", "position": "Research Scientist;Undergrad student;Researcher;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nbai2023transformers,\ntitle={Transformers as Statisticians: Provable In-Context Learning with In-Context Algorithm Selection},\nauthor={Yu Bai and Fan Chen and Huan Wang and Caiming Xiong and Song Mei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=liMSqUuVg9}\n}", "github": "", "project": "", "reviewers": "oV7P;RiCR;pWto;Utbg", "pdf_size": 1980621, "rating": "4;7;7;9", "confidence": "4;3;3;3", "soundness": "3;3;4;4", "novelty": "2;3;4;4", "presentation": "3;3;3;4", "wc_summary": "68;71;101;105", "wc_strengths": "59;234;96;34", "wc_weaknesses": "376;262;31;7", "wc_questions": "8;148;44;1", "wc_limitations": "8;43;1;1", "wc_review": "519;758;273;148", "wc_reply_reviewers": "777;76;57;0", "wc_reply_authors": "2107;20;39;0", "reply_reviewers": "4;1;1;0", "reply_authors": "7;2;2;1", "rating_avg": [ 6.75, 1.7853571071357126 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.25, 16.843025262701474 ], "wc_strengths_avg": [ 105.75, 77.26051708343661 ], "wc_weaknesses_avg": [ 169.0, 155.55224202820094 ], "wc_questions_avg": [ 50.25, 58.74680842394759 ], "wc_limitations_avg": [ 13.25, 17.41228014936585 ], "wc_review_avg": [ 424.5, 234.284549213131 ], "wc_reply_reviewers_avg": [ 227.5, 318.48430102596893 ], "wc_reply_authors_avg": [ 541.5, 903.9470393778608 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 3.0, 2.345207879911715 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8892972917998875, "gs_citation": 227, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5025827678267881928&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "salesforce.com;pku.edu.cn;salesforce.com;salesforce.com;berkeley.edu", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Salesforce;Peking University;University of California, Berkeley", "aff_unique_dep": "Salesforce Research;;", "aff_unique_url": "https://research.salesforce.com;http://www.pku.edu.cn;https://www.berkeley.edu", "aff_unique_abbr": "Salesforce;Peking U;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Nominality Score Conditioned Time Series Anomaly Detection by Point/Sequential Reconstruction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70582", "id": "ljgM3vNqfQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f1cf02ce09757f57c3b93c0db83181e0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ljgM3vNqfQ", "openreview": "https://openreview.net/forum?id=ljgM3vNqfQ", "poster": "/media/PosterPDFs/NeurIPS%202023/70582.png?t=1698892484.9514565", "slides": "https://nips.cc/virtual/2023/poster/70582", "video": "https://nips.cc/virtual/2023/poster/70582", "author_site": "Chih-Yu (Andrew) Lai, Fan-Keng Sun, Zhengqi Gao, Jeffrey H Lang, Duane Boning", "tldr": "", "abstract": "Time series anomaly detection is challenging due to the complexity and variety of patterns that can occur. One major difficulty arises from modeling time-dependent relationships to find contextual anomalies while maintaining detection accuracy for point anomalies. In this paper, we propose a framework for unsupervised time series anomaly detection that utilizes point-based and sequence-based reconstruction models. The point-based model attempts to quantify point anomalies, and the sequence-based model attempts to quantify both point and contextual anomalies. Under the formulation that the observed time point is a two-stage deviated value from a nominal time point, we introduce a nominality score calculated from the ratio of a combined value of the reconstruction errors. We derive an induced anomaly score by further integrating the nominality score and anomaly score, then theoretically prove the superiority of the induced anomaly score over the original anomaly score under certain conditions. Extensive studies conducted on several public datasets show that the proposed framework outperforms most state-of-the-art baselines for time series anomaly detection.", "keywords": "time series;anomaly detection;point anomalies;contextual anomalies;nominality score;induced anomaly score", "primary_area": "", "supplementary_material": "/attachment/d710f23f4f4f064221a397631b3546ecd5693ea0.zip", "author": "Chih-Yu Lai;Fan-Keng Sun;Zhengqi Gao;Jeffrey Lang;Duane S Boning", "authorids": "~Chih-Yu_Lai1;~Fan-Keng_Sun1;~Zhengqi_Gao1;~Jeffrey_Lang1;~Duane_S_Boning1", "gender": "M;;M;M;M", "homepage": "https://chihyulai.com/;https://daikon-sun.github.io/;http://zhengqigao.github.io/;https://www.rle.mit.edu/people/principal-investigators/;https://boning.mit.edu/", "dblp": "350/0338.html;https://dblp.org/pers/hd/s/Sun:Fan=Keng;256/9403;;26/1132", "google_scholar": "KJvqi-4AAAAJ;sfEwE4gAAAAJ;igvvVY4AAAAJ;;https://scholar.google.com.tw/citations?user=oIdI_PcAAAAJ", "orcid": "0000-0001-5789-4234;;;0000-0002-5765-4369;0000-0002-0417-445X", "linkedin": "chih-yu-lai/;fan-keng-sun/;zhengqi-gao-729b51146/;;", "or_profile": "~Chih-Yu_Lai1;~Fan-Keng_Sun1;~Zhengqi_Gao1;~Jeffrey_Lang1;~Duane_S_Boning1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;Ph.D.;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nlai2023nominality,\ntitle={Nominality Score Conditioned Time Series Anomaly Detection by Point/Sequential Reconstruction},\nauthor={Chih-Yu Lai and Fan-Keng Sun and Zhengqi Gao and Jeffrey Lang and Duane S Boning},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ljgM3vNqfQ}\n}", "github": "", "project": "", "reviewers": "UPqe;gJSP;m3iW;EBhj", "pdf_size": 4703108, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "1;3;3;4", "wc_summary": "69;143;45;79", "wc_strengths": "94;67;47;70", "wc_weaknesses": "184;120;100;233", "wc_questions": "70;83;32;143", "wc_limitations": "1;1;75;11", "wc_review": "418;414;299;536", "wc_reply_reviewers": "65;140;0;215", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 84.0, 36.235341863986875 ], "wc_strengths_avg": [ 69.5, 16.680827317612277 ], "wc_weaknesses_avg": [ 159.25, 52.68479382136747 ], "wc_questions_avg": [ 82.0, 39.89360851063739 ], "wc_limitations_avg": [ 22.0, 30.870698080866262 ], "wc_review_avg": [ 416.75, 83.80744298688512 ], "wc_reply_reviewers_avg": [ 105.0, 80.54501846793507 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13930312516456956883&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Theoretical Analysis of the Test Error of Finite-Rank Kernel Ridge Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70581", "id": "lk6KDG6qI7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0f580c1ace3b857a390575ca42de7938-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lk6KDG6qI7", "openreview": "https://openreview.net/forum?id=lk6KDG6qI7", "poster": "/media/PosterPDFs/NeurIPS%202023/70581.png?t=1697108171.1560893", "slides": "https://nips.cc/virtual/2023/poster/70581", "video": "https://nips.cc/virtual/2023/poster/70581", "author_site": "Tin Sum Cheng, Aurelien Lucchi, Anastasis Kratsios, Ivan Dokmani\u0107, David Belius", "tldr": "", "abstract": "Existing statistical learning guarantees for general kernel regressors often yield loose bounds when used with finite-rank kernels. Yet, finite-rank kernels naturally appear in a number of machine learning problems, e.g. when fine-tuning a pre-trained deep neural network's last layer to adapt it to a novel task when performing transfer learning. We address this gap for finite-rank kernel ridge regression (KRR) by deriving sharp non-asymptotic upper and lower bounds for the KRR test error of any finite-rank KRR. Our bounds are tighter than previously derived bounds on finite-rank KRR and, unlike comparable results, they also remain valid for any regularization parameters.", "keywords": "Kernel;regression;bias-variance;generalization", "primary_area": "", "supplementary_material": "/attachment/05f29087c407a142aeee020be355f19046e6f486.zip", "author": "Tin Sum Cheng;Aurelien Lucchi;Anastasis Kratsios;Ivan Dokmani\u0107;David Belius", "authorids": "~Tin_Sum_Cheng1;~Aurelien_Lucchi1;~Anastasis_Kratsios1;~Ivan_Dokmani\u01071;~David_Belius1", "gender": "M;M;Non-Binary;;M", "homepage": ";http://people.inf.ethz.ch/alucchi/;https://anastasiskratsios.github.io/;https://davidbelius.github.io/;http://dokmanic.ece.illinois.edu", "dblp": ";14/5780;;;52/8859", "google_scholar": "5wfAh9kAAAAJ;https://scholar.google.ch/citations?user=V1ONSgIAAAAJ;https://scholar.google.ca/citations?user=9D-bHFgAAAAJ;;0SQnwL4AAAAJ", "orcid": "0000-0002-3000-311X;;0000-0001-6791-3371;0000-0003-3706-043X;", "linkedin": "tin-sum-cheng;;anastasiskratsios/;;", "or_profile": "~Tin_Sum_Cheng1;~Aurelien_Lucchi1;~Anastasis_Kratsios1;~David_Belius1;~Ivan_Dokmanic1", "aff": "University of Basel;University of Basel;McMaster University;University of Basel;University of Basel", "aff_domain": "unibas.ch;unibas.ch;mcmaster.ca;unibas.ch;unibas.ch", "position": "PhD student;Assistant Professor;Assistant Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\ncheng2023a,\ntitle={A Theoretical Analysis of the Test Error of Finite-Rank Kernel Ridge Regression},\nauthor={Tin Sum Cheng and Aurelien Lucchi and Anastasis Kratsios and Ivan Dokmani{\\'c} and David Belius},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lk6KDG6qI7}\n}", "github": "", "project": "", "reviewers": "Gn3m;BvqB;S6gY;cigT;MpJJ", "pdf_size": 622474, "rating": "4;6;6;7;7", "confidence": "4;2;3;3;4", "soundness": "2;4;3;3;4", "novelty": "1;3;3;3;3", "presentation": "2;4;2;2;3", "wc_summary": "36;32;74;55;83", "wc_strengths": "13;75;62;14;31", "wc_weaknesses": "187;52;25;68;73", "wc_questions": "44;40;53;139;78", "wc_limitations": "1;1;7;29;9", "wc_review": "281;200;221;305;274", "wc_reply_reviewers": "169;34;12;14;13", "wc_reply_authors": "367;19;0;56;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;1;2;1", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 56.0, 20.149441679609886 ], "wc_strengths_avg": [ 39.0, 25.25866188063018 ], "wc_weaknesses_avg": [ 81.0, 55.58057214530991 ], "wc_questions_avg": [ 70.8, 36.56993300513415 ], "wc_limitations_avg": [ 9.4, 10.307278981380101 ], "wc_review_avg": [ 256.2, 39.27034504559388 ], "wc_reply_reviewers_avg": [ 48.4, 60.849322099757195 ], "wc_reply_authors_avg": [ 88.4, 140.7928975481363 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.24397501823713333, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12638340221176981992&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "unibas.ch;unibas.ch;mcmaster.ca;unibas.ch;unibas.ch", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Basel;McMaster University", "aff_unique_dep": ";", "aff_unique_url": "https://www.unibas.ch;https://www.mcmaster.ca", "aff_unique_abbr": "UniBas;McMaster", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Switzerland;Canada" }, { "title": "Do SSL Models Have D\u00e9j\u00e0 Vu? A Case of Unintended Memorization in Self-supervised Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70580", "id": "lkBygTc0SI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/854b6ec839294bf332db0d86e2f83c3f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lkBygTc0SI", "openreview": "https://openreview.net/forum?id=lkBygTc0SI", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70580", "video": "https://nips.cc/virtual/2023/poster/70580", "author_site": "Casey Meehan, Florian Bordes, Pascal Vincent, Kamalika Chaudhuri, Chuan Guo", "tldr": "", "abstract": "Self-supervised learning (SSL) algorithms can produce useful image representations by learning to associate different parts of natural images with one another. However, when taken to the extreme, SSL models can unintendedly memorize specific parts in individual training samples rather than learning semantically meaningful associations. In this work, we perform a systematic study of the unintended memorization of image-specific information in SSL models -- which we refer to as d\u00e9j\u00e0 vu memorization. Concretely, we show that given the trained model and a crop of a training image containing only the background (e.g., water, sky, grass), it is possible to infer the foreground object with high accuracy or even visually reconstruct it. Furthermore, we show that d\u00e9j\u00e0 vu memorization is common to different SSL algorithms, is exacerbated by certain design choices, and cannot be detected by conventional techniques for evaluating representation quality. Our study of d\u00e9j\u00e0 vu memorization reveals previously unknown privacy risks in SSL models, as well as suggests potential practical mitigation strategies.", "keywords": "self-supervised learning;privacy;data reconstruction;memorization", "primary_area": "", "supplementary_material": "", "author": "Casey Meehan;Florian Bordes;Pascal Vincent;Kamalika Chaudhuri;Chuan Guo", "authorids": "~Casey_Meehan1;~Florian_Bordes1;~Pascal_Vincent1;~Kamalika_Chaudhuri1;~Chuan_Guo1", "gender": "M;M;M;F;M", "homepage": "https://casey-meehan.github.io/;;http://www.iro.umontreal.ca/~vincentp;http://cseweb.ucsd.edu/users/kamalika;https://sites.google.com/view/chuanguo", "dblp": "255/5544;194/9862;43/861;56/6435;", "google_scholar": "s-lqUEUAAAAJ;OADfWhUAAAAJ;WBCKQMsAAAAJ;I-DJ7EsAAAAJ;0gp5M-kAAAAJ", "orcid": ";;;;", "linkedin": "casey-meehan-ucsd/;florianbordes;;;", "or_profile": "~Casey_Meehan1;~Florian_Bordes1;~Pascal_Vincent1;~Kamalika_Chaudhuri1;~Chuan_Guo1", "aff": "University of California, San Diego;University of Montreal;Facebook A.I. Research;University of California, San Diego;Meta", "aff_domain": "ucsd.edu;umontreal.ca;fb.com;ucsd.edu;meta.com", "position": "PhD student;PhD student;Research Scientist;Associate Professor;Researcher", "bibtex": "@inproceedings{\nmeehan2023do,\ntitle={Do {SSL} Models Have D\\'ej\\`a Vu? A Case of Unintended Memorization in Self-supervised Learning},\nauthor={Casey Meehan and Florian Bordes and Pascal Vincent and Kamalika Chaudhuri and Chuan Guo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lkBygTc0SI}\n}", "github": "", "project": "", "reviewers": "mdS9;9ALm;2FNq;H9Sq", "pdf_size": 27784203, "rating": "6;6;6;8", "confidence": "3;4;4;3", "soundness": "3;3;3;4", "novelty": "3;3;2;3", "presentation": "3;3;3;3", "wc_summary": "56;205;107;318", "wc_strengths": "55;195;130;68", "wc_weaknesses": "95;74;277;437", "wc_questions": "76;112;41;66", "wc_limitations": "51;31;9;9", "wc_review": "333;617;564;898", "wc_reply_reviewers": "232;0;146;220", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 171.5, 100.10619361458112 ], "wc_strengths_avg": [ 112.0, 55.673153314681215 ], "wc_weaknesses_avg": [ 220.75, 147.71319338501894 ], "wc_questions_avg": [ 73.75, 25.498774480354932 ], "wc_limitations_avg": [ 25.0, 17.4928556845359 ], "wc_review_avg": [ 603.0, 201.02363045174565 ], "wc_reply_reviewers_avg": [ 149.5, 92.38371068538002 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7642009905455218176&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ucsd.edu;umontreal.ca;fb.com;ucsd.edu;meta.com", "author_num": 5, "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "University of California, San Diego;University of Montreal;Meta", "aff_unique_dep": ";;Facebook A.I. Research", "aff_unique_url": "https://www.ucsd.edu;https://wwwumontreal.ca;https://research.facebook.com", "aff_unique_abbr": "UCSD;UM;FAIR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Single-Pass Pivot Algorithm for Correlation Clustering. Keep it simple!", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70579", "id": "lkEiOZlmPm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/149ad6e32c08b73a3ecc3d11977fcc47-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lkEiOZlmPm", "openreview": "https://openreview.net/forum?id=lkEiOZlmPm", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70579", "video": "https://nips.cc/virtual/2023/poster/70579", "author_site": "Konstantin Makarychev, Sayak Chakrabarty", "tldr": "", "abstract": "We show that a simple single-pass semi-streaming variant of the Pivot algorithm for Correlation Clustering gives a (3+eps)-approximation using O(n/eps) words of memory. This is a slight improvement over the recent results of Cambus, Kuhn, Lindy, Pai, and Uitto, who gave a (3+eps)-approximation using O(n log n) words of memory, and Behnezhad, Charikar, Ma, and Tan, who gave a 5-approximation using O(n) words of memory. One of the main contributions of our paper is that the algorithm and its analysis are simple and easy to understand.", "keywords": "correlation clustering;Pivot algorithm;streaming", "primary_area": "", "supplementary_material": "/attachment/6eb91c22c9f95f627f3d2fdba8d4b2d3b11dd277.zip", "author": "Konstantin Makarychev;Sayak Chakrabarty", "authorids": "~Konstantin_Makarychev1;~Sayak_Chakrabarty1", "gender": "M;M", "homepage": "http://konstantin.makarychev.net/;https://hellokayas.github.io/", "dblp": "37/1011;336/3841", "google_scholar": "https://scholar.google.com.tw/citations?user=-E3hYj8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-9587-3677;0009-0004-6179-389X", "linkedin": "konstantin-makarychev-143b3a132/;sayak-chakrabarty-cs/", "or_profile": "~Konstantin_Makarychev1;~Sayak_Chakrabarty1", "aff": "Northwestern University;Northwestern University", "aff_domain": "northwestern.edu;northwestern.edu", "position": "Full Professor;PhD student", "bibtex": "@inproceedings{\nmakarychev2023singlepass,\ntitle={Single-Pass Pivot Algorithm for Correlation Clustering. Keep it simple!},\nauthor={Konstantin Makarychev and Sayak Chakrabarty},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lkEiOZlmPm}\n}", "github": "", "project": "", "reviewers": "NEv2;tNUH;mSWo;mH8W;aXJT", "pdf_size": 280931, "rating": "4;4;7;7;8", "confidence": "4;4;4;5;4", "soundness": "4;3;4;4;4", "novelty": "2;2;3;3;4", "presentation": "3;3;4;4;3", "wc_summary": "66;215;196;96;300", "wc_strengths": "29;25;59;25;143", "wc_weaknesses": "68;123;19;19;139", "wc_questions": "13;35;177;22;1", "wc_limitations": "1;1;2;1;1", "wc_review": "177;399;453;163;584", "wc_reply_reviewers": "0;0;0;0;40", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;1", "reply_authors": "0;1;1;1;1", "rating_avg": [ 6.0, 1.6733200530681511 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 174.6, 84.60165483015093 ], "wc_strengths_avg": [ 56.2, 45.23007848766128 ], "wc_weaknesses_avg": [ 73.6, 50.42063069815767 ], "wc_questions_avg": [ 49.6, 64.66405493007689 ], "wc_limitations_avg": [ 1.2, 0.4 ], "wc_review_avg": [ 355.2, 162.80589669910609 ], "wc_reply_reviewers_avg": [ 8.0, 16.0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 0.8, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.29880715233359845, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=911532311344889822&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "northwestern.edu;northwestern.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Northwestern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northwestern.edu", "aff_unique_abbr": "NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A General Framework for Robust G-Invariance in G-Equivariant Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70578", "id": "llP6lmMiXE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d42523d621194ba54dda098669645f91-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=llP6lmMiXE", "openreview": "https://openreview.net/forum?id=llP6lmMiXE", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70578", "video": "https://nips.cc/virtual/2023/poster/70578", "author_site": "Sophia Sanborn, Sophia Sanborn, Nina Miolane", "tldr": "", "abstract": "We introduce a general method for achieving robust group-invariance in group-equivariant convolutional neural networks ($G$-CNNs), which we call the $G$-triple-correlation ($G$-TC) layer. The approach leverages the theory of the triple-correlation on groups, which is the unique, lowest-degree polynomial invariant map that is also \\textit{complete}. Many commonly used invariant maps\\textemdash such as the \\texttt{max}\\textemdash are incomplete: they remove both group and signal structure. A complete invariant, by contrast, removes only the variation due to the actions of the group, while preserving all information about the structure of the signal. The completeness of the triple correlation endows the $G$-TC layer with strong robustness, which can be observed in its resistance to invariance-based adversarial attacks. In addition, we observe that it yields measurable improvements in classification accuracy over standard Max $G$-Pooling in $G$-CNN architectures. We provide a general and efficient implementation of the method for any discretized group, which requires only a table defining the group's product structure. We demonstrate the benefits of this method for $G$-CNNs defined on both commutative and non-commutative groups\\textemdash $SO(2)$, $O(2)$, $SO(3)$, and $O(3)$ (discretized as the cyclic $C8$, dihedral $D16$, chiral octahedral $O$ and full octahedral $O_h$ groups)\\textemdash acting on $\\mathbb{R}^2$ and $\\mathbb{R}^3$ on both $G$-MNIST and $G$-ModelNet10 datasets.", "keywords": "equivariance;group-equivariant cnns;invariance;pooling;convolutional neural networks", "primary_area": "", "supplementary_material": "", "author": "Sophia Sanborn;Nina Miolane", "authorids": "~Sophia_Sanborn1;~Nina_Miolane2", "gender": "F;", "homepage": "https://sophiasanborn.com;https://www.ece.ucsb.edu/people/faculty/nina-miolane", "dblp": "212/5137;", "google_scholar": "tCUvC4oAAAAJ;", "orcid": "0000-0002-1957-7067;", "linkedin": "sophia-sanborn;", "or_profile": "~Sophia_Sanborn1;~Nina_Miolane2", "aff": "University of California, Santa Barbara;University of California, Santa Barbara", "aff_domain": "ucsb.edu;ucsb.edu", "position": "Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nsanborn2023a,\ntitle={A General Framework for Robust G-Invariance in G-Equivariant Networks},\nauthor={Sophia Sanborn and Nina Miolane},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=llP6lmMiXE}\n}", "github": "", "project": "", "reviewers": "Axj4;E2w2;NbDh;aA5W", "pdf_size": 3154441, "rating": "3;5;6;7", "confidence": "4;4;3;5", "soundness": "2;2;4;3", "novelty": "2;2;3;2", "presentation": "2;3;3;3", "wc_summary": "93;44;95;245", "wc_strengths": "15;63;87;126", "wc_weaknesses": "284;53;48;283", "wc_questions": "6;46;75;23", "wc_limitations": "1;1;19;1", "wc_review": "399;207;324;678", "wc_reply_reviewers": "266;32;26;67", "wc_reply_authors": "1472;121;55;52", "reply_reviewers": "1;1;1;1", "reply_authors": "5;2;2;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 119.25, 75.42007358787181 ], "wc_strengths_avg": [ 72.75, 40.21426985536353 ], "wc_weaknesses_avg": [ 167.0, 116.51394766293004 ], "wc_questions_avg": [ 37.5, 25.889186931999237 ], "wc_limitations_avg": [ 5.5, 7.794228634059948 ], "wc_review_avg": [ 402.0, 173.41712718183288 ], "wc_reply_reviewers_avg": [ 97.75, 98.393025667473 ], "wc_reply_authors_avg": [ 425.0, 605.1144519840853 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.23904572186687872, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1842724388834709845&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ucsb.edu;ucsb.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Flexible Attention-Based Multi-Policy Fusion for Efficient Deep Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70577", "id": "lmXNcKhj4c", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2c23b3c72127e15fedc276722faee927-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lmXNcKhj4c", "openreview": "https://openreview.net/forum?id=lmXNcKhj4c", "poster": "/media/PosterPDFs/NeurIPS%202023/70577.png?t=1701978513.617019", "slides": "https://nips.cc/virtual/2023/poster/70577", "video": "https://nips.cc/virtual/2023/poster/70577", "author_site": "Zih-Yun Chiu, Yi-Lin Tuan, William Yang Wang, Michael Yip", "tldr": "", "abstract": "Reinforcement learning (RL) agents have long sought to approach the efficiency of human learning. Humans are great observers who can learn by aggregating external knowledge from various sources, including observations from others' policies of attempting a task. Prior studies in RL have incorporated external knowledge policies to help agents improve sample efficiency. However, it remains non-trivial to perform arbitrary combinations and replacements of those policies, an essential feature for generalization and transferability. In this work, we present Knowledge-Grounded RL (KGRL), an RL paradigm fusing multiple knowledge policies and aiming for human-like efficiency and flexibility. We propose a new actor architecture for KGRL, Knowledge-Inclusive Attention Network (KIAN), which allows free knowledge rearrangement due to embedding-based attentive action prediction. KIAN also addresses entropy imbalance, a problem arising in maximum entropy KGRL that hinders an agent from efficiently exploring the environment, through a new design of policy distributions. The experimental results demonstrate that KIAN outperforms alternative methods incorporating external knowledge policies and achieves efficient and flexible learning. Our implementation is available at https://github.com/Pascalson/KGRL.git .", "keywords": "Reinforcement Learning;Deep Reinforcement Learning;Sample Efficiency;Generalizability;Multi-Policy Decision Making;Multi-Policy Continuous Control", "primary_area": "", "supplementary_material": "", "author": "Zih-Yun Chiu;Yi-Lin Tuan;William Yang Wang;Michael C. Yip", "authorids": "~Zih-Yun_Chiu1;~Yi-Lin_Tuan1;~William_Yang_Wang2;~Michael_C._Yip1", "gender": "F;;;", "homepage": "https://sarahchiu.github.io/;;;http://www.ucsdarclab.com", "dblp": "216/8771;;;", "google_scholar": "6ZEE3pUAAAAJ;;;gSYxbCYAAAAJ", "orcid": ";;;", "linkedin": "zihyun-chiu/;;;michael-yip-43913421/", "or_profile": "~Zih-Yun_Chiu1;~Yi-Lin_Tuan1;~William_Yang_Wang2;~Michael_C._Yip1", "aff": "University of California, San Diego, University of California, San Diego;;;University of California, San Diego", "aff_domain": "eng.ucsd.edu;;;ucsd.edu", "position": "PhD student;;;Associate Professor", "bibtex": "@inproceedings{\nchiu2023flexible,\ntitle={Flexible Attention-Based Multi-Policy Fusion for Efficient Deep Reinforcement Learning},\nauthor={Zih-Yun Chiu and Yi-Lin Tuan and William Yang Wang and Michael C. Yip},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lmXNcKhj4c}\n}", "github": "", "project": "", "reviewers": "CuzY;7i4w;K32E;2LpZ", "pdf_size": 2403115, "rating": "5;5;6;7", "confidence": "3;3;4;3", "soundness": "2;3;4;3", "novelty": "3;3;3;3", "presentation": "3;3;3;2", "wc_summary": "64;301;94;99", "wc_strengths": "231;53;38;71", "wc_weaknesses": "319;167;112;37", "wc_questions": "6;162;147;14", "wc_limitations": "25;31;29;23", "wc_review": "645;714;420;244", "wc_reply_reviewers": "16;99;39;85", "wc_reply_authors": "0;0;66;0", "reply_reviewers": "1;2;2;1", "reply_authors": "1;1;3;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 139.5, 94.1979299135602 ], "wc_strengths_avg": [ 98.25, 77.52862374632997 ], "wc_weaknesses_avg": [ 158.75, 103.3885269263471 ], "wc_questions_avg": [ 82.25, 72.49956896423592 ], "wc_limitations_avg": [ 27.0, 3.1622776601683795 ], "wc_review_avg": [ 505.75, 186.16172404659343 ], "wc_reply_reviewers_avg": [ 59.75, 33.625697018797986 ], "wc_reply_authors_avg": [ 16.5, 28.578838324886476 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6095274893443629261&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "eng.ucsd.edu;;;ucsd.edu", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Sample Complexity for Quadratic Bandits: Hessian Dependent Bounds and Optimal Algorithms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70576", "id": "lnTpBUge5G", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6e60a9023d2c63f7f0856910129ae753-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lnTpBUge5G", "openreview": "https://openreview.net/forum?id=lnTpBUge5G", "poster": "/media/PosterPDFs/NeurIPS%202023/70576.png?t=1702202020.3735836", "slides": "https://nips.cc/virtual/2023/poster/70576", "video": "https://nips.cc/virtual/2023/poster/70576", "author_site": "Qian Yu, Yining Wang, Baihe Huang, Qi Lei, Jason Lee", "tldr": "", "abstract": "In stochastic zeroth-order optimization, a problem of practical relevance is understanding how to fully exploit the local geometry of the underlying objective function. We consider a fundamental setting in which the objective function is quadratic, and provide the first tight characterization of the optimal Hessian-dependent sample complexity. Our contribution is twofold. First, from an information-theoretic point of view, we prove tight lower bounds on Hessian-dependent complexities by introducing a concept called \\emph{energy allocation}, which captures the interaction between the searching algorithm and the geometry of objective functions. A matching upper bound is obtained by solving the optimal energy spectrum. Then, algorithmically, we show the existence of a Hessian-independent algorithm that universally achieves the asymptotic optimal sample complexities for all Hessian instances. The optimal sample complexities achieved by our algorithm remain valid for heavy-tailed noise distributions, which are enabled by a truncation method.", "keywords": "optimization;quadratic bandits;sample complexity;optimality", "primary_area": "", "supplementary_material": "/attachment/1156dd2d8c95e0efafd1f1e649de574918f8a4b5.pdf", "author": "Qian Yu;Yining Wang;Baihe Huang;Qi Lei;Jason D. Lee", "authorids": "~Qian_Yu5;~Yining_Wang1;~Baihe_Huang1;~Qi_Lei1;~Jason_D._Lee1", "gender": ";M;;F;M", "homepage": "https://scholar.princeton.edu/qyu;https://yining-wang.com;;https://cecilialeiqi.github.io/;https://jasondlee88.github.io/", "dblp": "16/3790-1;04/7235;279/4131;;88/3262", "google_scholar": "SxUNhucAAAAJ;HpQGq54AAAAJ;chICXXMAAAAJ;kGOgaowAAAAJ;GR_DsT0AAAAJ", "orcid": "0000-0002-2034-5941;;;;", "linkedin": ";;;;", "or_profile": "~Qian_Yu5;~Yining_Wang1;~Baihe_Huang1;~Qi_Lei1;~Jason_D._Lee1", "aff": "University of California, Santa Barbara;University of Texas at Dallas;University of California, Berkeley;New York University;Princeton University", "aff_domain": "ucsb.edu;cs.utdallas.edu;berkeley.edu;nyu.edu;princeton.edu", "position": "Assistant Professor;Associate Professor;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nyu2023sample,\ntitle={Sample Complexity for Quadratic Bandits: Hessian Dependent Bounds and Optimal Algorithms},\nauthor={Qian Yu and Yining Wang and Baihe Huang and Qi Lei and Jason D. Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lnTpBUge5G}\n}", "github": "", "project": "", "reviewers": "q1aQ;tYpf;gm1W;Cwak", "pdf_size": 362659, "rating": "4;5;6;7", "confidence": "3;4;3;3", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "3;3;1;3", "wc_summary": "36;107;44;79", "wc_strengths": "44;33;38;6", "wc_weaknesses": "89;34;308;2", "wc_questions": "39;111;54;125", "wc_limitations": "13;39;1;27", "wc_review": "221;324;445;239", "wc_reply_reviewers": "0;24;32;21", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 66.5, 28.429737951659 ], "wc_strengths_avg": [ 30.25, 14.53229162933362 ], "wc_weaknesses_avg": [ 108.25, 119.44951862607066 ], "wc_questions_avg": [ 82.25, 36.47858961089368 ], "wc_limitations_avg": [ 20.0, 14.317821063276353 ], "wc_review_avg": [ 307.25, 88.53353884263296 ], "wc_reply_reviewers_avg": [ 19.25, 11.818946653572814 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2931267056113688005&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "ucsb.edu;cs.utdallas.edu;berkeley.edu;nyu.edu;princeton.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "University of California, Santa Barbara;University of Texas at Dallas;University of California, Berkeley;New York University;Princeton University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ucsb.edu;https://www.utdallas.edu;https://www.berkeley.edu;https://www.nyu.edu;https://www.princeton.edu", "aff_unique_abbr": "UCSB;UT Dallas;UC Berkeley;NYU;Princeton", "aff_campus_unique_index": "0;1;2", "aff_campus_unique": "Santa Barbara;Dallas;Berkeley;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "LargeST: A Benchmark Dataset for Large-Scale Traffic Forecasting", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73480", "id": "loOw3oyhFW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ee57cd73a76bd927ffca3dda1dc3b9d4-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=loOw3oyhFW", "openreview": "https://openreview.net/forum?id=loOw3oyhFW", "poster": "/media/PosterPDFs/NeurIPS%202023/73480.png?t=1700017211.6347127", "slides": "https://nips.cc/virtual/2023/poster/73480", "video": "https://nips.cc/virtual/2023/poster/73480", "author_site": "Xu Liu, Yutong Xia, Yuxuan Liang, Junfeng Hu, Yiwei Wang, LEI BAI, Chao Huang, Zhenguang Liu, Bryan Hooi, Roger Zimmermann", "tldr": "", "abstract": "Road traffic forecasting plays a critical role in smart city initiatives and has experienced significant advancements thanks to the power of deep learning in capturing non-linear patterns of traffic data. However, the promising results achieved on current public datasets may not be applicable to practical scenarios due to limitations within these datasets. First, the limited sizes of them may not reflect the real-world scale of traffic networks. Second, the temporal coverage of these datasets is typically short, posing hurdles in studying long-term patterns and acquiring sufficient samples for training deep models. Third, these datasets often lack adequate metadata for sensors, which compromises the reliability and interpretability of the data. To mitigate these limitations, we introduce the LargeST benchmark dataset. It encompasses a total number of 8,600 sensors in California with a 5-year time coverage and includes comprehensive metadata. Using LargeST, we perform in-depth data analysis to extract data insights, benchmark well-known baselines in terms of their performance and efficiency, and identify challenges as well as opportunities for future research. We release the datasets and baseline implementations at: https://github.com/liuxu77/LargeST.", "keywords": "Traffic Forecasting Benchmark Dataset", "primary_area": "", "supplementary_material": "", "author": "Xu Liu;Yutong Xia;Yuxuan Liang;Junfeng Hu;Yiwei Wang;LEI BAI;Chao Huang;Zhenguang Liu;Bryan Hooi;Roger Zimmermann", "authorids": "~Xu_Liu9;~Yutong_Xia1;~Yuxuan_Liang1;~Junfeng_Hu4;~Yiwei_Wang2;~LEI_BAI1;~Chao_Huang7;~Zhenguang_Liu1;~Bryan_Hooi1;~Roger_Zimmermann1", "gender": ";F;M;M;M;M;M;M;;M", "homepage": ";https://yutong-xia.github.io/;https://yuxuanliang.com;https://github.com/p0werHu;;http://leibai.site/;;https://sites.google.com/view/zhenguangliu/;http://bhooi.github.io;https://www.comp.nus.edu.sg/cs/bio/rogerz/", "dblp": "93/3167-14;307/5917;183/0977;;50/5889-1;119/1223-1;;145/1147;169/9975;79/1490", "google_scholar": "JTzLTycAAAAJ;V7b4y2oAAAAJ;n9cODgcAAAAJ;kLMHzqEAAAAJ;https://scholar.google.com.hk/citations?user=Sh9QvBkAAAAJ;https://scholar.google.com.au/citations?user=sakOO04AAAAJ;Zkv9FqwAAAAJ;OP2ySB8AAAAJ;;https://scholar.google.com.tw/citations?user=IDREwXEAAAAJ", "orcid": "0000-0003-2708-0584;0000-0001-9026-0049;0000-0003-2817-7337;;;0000-0003-3378-7201;;;0000-0002-5645-1754;0000-0002-7410-2590", "linkedin": "liuxu-187825160/;yutong-xia/;yoshall/;;;lei-bai-641370153/;;;;roger-zimmermann-76b56b6/", "or_profile": "~Xu_Liu9;~Yutong_Xia1;~Yuxuan_Liang1;~Junfeng_Hu4;~Yiwei_Wang2;~LEI_BAI1;~Chao_Huang7;~Zhenguang_Liu1;~Bryan_Hooi1;~Roger_Zimmermann1", "aff": "National University of Singapore;National University of Singapore;The Hong Kong University of Science and Technology (Guangzhou);National University of Singapore;National University of Singapore;Shanghai AI Laboratory;University of Hong Kong;Zhejiang Gongshang University;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;u.nus.edu;hkust-gz.edu.cn;nus.edu.sg;u.nus.edu;pjlab.org.cn;hku.hk;zjgsu.edu.cn;nus.edu.sg;nus.edu.sg", "position": "PhD student;PhD student;Assistant Professor;PhD student;PhD student;Researcher;Assistant Professor;Full Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023largest,\ntitle={Large{ST}: A Benchmark Dataset for Large-Scale Traffic Forecasting},\nauthor={Xu Liu and Yutong Xia and Yuxuan Liang and Junfeng Hu and Yiwei Wang and LEI BAI and Chao Huang and Zhenguang Liu and Bryan Hooi and Roger Zimmermann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=loOw3oyhFW}\n}", "github": "", "project": "", "reviewers": "4UAT;4GMz;nPQ9;M7Lz;G2Gc", "pdf_size": 2875718, "rating": "6;6;6;7;8", "confidence": "4;4;3;3;4", "wc_summary_and_contributions": "57;41;199;55;76", "wc_strengths": "41;55;57;66;65", "wc_improvement": "23;79;185;31;41", "wc_limitations": "130;190;28;20;5", "wc_correctness": "1;51;51;1;8", "wc_clarity": "1;6;10;9;4", "wc_relation_to_prior_work": "1;32;18;1;9", "wc_documentation": "1;1;13;1;5", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "256;456;562;185;214", "wc_reply_reviewers": "12;34;0;0;0", "wc_reply_authors": "452;988;530;87;221", "reply_reviewers": "1;1;0;0;0", "reply_authors": "2;4;1;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 85.6, 57.78442696782586 ], "wc_strengths_avg": [ 56.8, 8.997777503361595 ], "wc_improvement_avg": [ 71.8, 59.767549723909546 ], "wc_limitations_avg": [ 74.6, 72.6404845798815 ], "wc_correctness_avg": [ 22.4, 23.491274976041638 ], "wc_clarity_avg": [ 6.0, 3.286335345030997 ], "wc_relation_to_prior_work_avg": [ 12.2, 11.720068259186888 ], "wc_documentation_avg": [ 4.2, 4.664761515876241 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 334.6, 148.02107957990307 ], "wc_reply_reviewers_avg": [ 9.2, 13.242356285797479 ], "wc_reply_authors_avg": [ 455.6, 309.8067784926599 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 1.1661903789690604 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.1020620726159658, "gs_citation": 109, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16370877294228551866&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "nus.edu.sg;u.nus.edu;hkust-gz.edu.cn;nus.edu.sg;u.nus.edu;pjlab.org.cn;hku.hk;zjgsu.edu.cn;nus.edu.sg;nus.edu.sg", "author_num": 10, "aff_unique_index": "0;0;1;0;0;2;3;4;0;0", "aff_unique_norm": "National University of Singapore;Hong Kong University of Science and Technology;Shanghai AI Laboratory;University of Hong Kong;Zhejiang Gongshang University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.nus.edu.sg;https://www.ust.hk;https://www.shanghai-ai-lab.com;https://www.hku.hk;http://www.hzic.edu.cn", "aff_unique_abbr": "NUS;HKUST;SAIL;HKU;ZJGSU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Guangzhou;Hong Kong SAR", "aff_country_unique_index": "0;0;1;0;0;1;1;1;0;0", "aff_country_unique": "Singapore;China" }, { "title": "Robust Learning for Smoothed Online Convex Optimization with Feedback Delay", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70575", "id": "loixpHDZKj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/36848567d39a5128e671ad04a6075374-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=loixpHDZKj", "openreview": "https://openreview.net/forum?id=loixpHDZKj", "poster": "/media/PosterPDFs/NeurIPS%202023/70575.png?t=1701988115.4909878", "slides": "https://nips.cc/virtual/2023/poster/70575", "video": "https://nips.cc/virtual/2023/poster/70575", "author_site": "Pengfei Li, Jianyi Yang, Adam Wierman, Shaolei Ren", "tldr": "", "abstract": "We study a general form of Smoothed Online Convex Optimization, a.k.a. SOCO, including multi-step switching costs and feedback delay. We propose a novel machine learning (ML) augmented online algorithm, Robustness-Constrained Learning (RCL), which combines untrusted ML predictions with a trusted expert online algorithm via constrained projection to robustify the ML prediction. Specifically, we prove that RCL is able to guarantee $(1+\\lambda)$-competitiveness against any given expert for any $\\lambda>0$, while also explicitly training the ML model in a robustification-aware manner to improve the average-case performance. Importantly, RCL is the first ML-augmented algorithm with a provable robustness guarantee in the case of multi-step switching cost and feedback delay. We demonstrate the improvement of RCL in both robustness and average performance using battery management as a case study.", "keywords": "Online optimization;competitive algorithm;switching cost", "primary_area": "", "supplementary_material": "/attachment/48ab096ac8a093fd89768a26c807e97733dadebc.pdf", "author": "Pengfei Li;Jianyi Yang;Adam Wierman;Shaolei Ren", "authorids": "~Pengfei_Li2;~Jianyi_Yang1;~Adam_Wierman1;~Shaolei_Ren1", "gender": "M;M;M;", "homepage": "https://www.cs.ucr.edu/~pli081/;https://jyang-ai.github.io;https://adamwierman.com/;", "dblp": ";124/1315;56/4447;", "google_scholar": "irA8gqoAAAAJ;n7UUdJQAAAAJ;4OvOdSgAAAAJ;", "orcid": "0000-0003-3257-9929;;0000-0002-5923-0199;", "linkedin": ";jianyi-yang-b7a9181a6/;adam-wierman-a529474/;", "or_profile": "~Pengfei_Li2;~Jianyi_Yang1;~Adam_Wierman1;~Shaolei_Ren1", "aff": "University of California, Riverside;University of California, Riverside;California Institute of Technology;", "aff_domain": "ucr.edu;ucr.edu;caltech.edu;", "position": "PhD student;PhD student;Professor;", "bibtex": "@inproceedings{\nli2023robust,\ntitle={Robust Learning for Smoothed Online Convex Optimization with Feedback Delay},\nauthor={Pengfei Li and Jianyi Yang and Adam Wierman and Shaolei Ren},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=loixpHDZKj}\n}", "github": "", "project": "", "reviewers": "WzoZ;rcGc;axLK;6WgB;atug;6vwR", "pdf_size": 1285660, "rating": "5;5;5;5;6;6", "confidence": "2;4;3;4;3;2", "soundness": "3;3;2;3;3;2", "novelty": "2;2;2;3;3;3", "presentation": "2;3;3;3;2;3", "wc_summary": "151;57;140;65;174;107", "wc_strengths": "29;50;59;88;77;89", "wc_weaknesses": "62;101;27;63;64;32", "wc_questions": "1;2;233;23;109;48", "wc_limitations": "1;10;2;1;6;10", "wc_review": "244;220;461;240;430;286", "wc_reply_reviewers": "5;19;17;18;11;15", "wc_reply_authors": "40;37;100;45;37;37", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "2;2;3;2;2;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 115.66666666666667, 43.44984337013068 ], "wc_strengths_avg": [ 65.33333333333333, 21.62303298696914 ], "wc_weaknesses_avg": [ 58.166666666666664, 24.368125811304314 ], "wc_questions_avg": [ 69.33333333333333, 81.81822671154774 ], "wc_limitations_avg": [ 5.0, 3.9157800414902435 ], "wc_review_avg": [ 313.5, 95.7944848795239 ], "wc_reply_reviewers_avg": [ 14.166666666666666, 4.844813951249544 ], "wc_reply_authors_avg": [ 49.333333333333336, 22.83759084394752 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.1666666666666665, 0.3726779962499649 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4330127018922193, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1232506682962968920&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "ucr.edu;ucr.edu;caltech.edu;", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of California, Riverside;California Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucr.edu;https://www.caltech.edu", "aff_unique_abbr": "UCR;Caltech", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Riverside;Pasadena", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Computation and Communication Efficient Method for Distributed Nonconvex Problems in the Partial Participation Setting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70574", "id": "loxinzXlCx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/778ff1fcfb6d6707fc015908a1845b62-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=loxinzXlCx", "openreview": "https://openreview.net/forum?id=loxinzXlCx", "poster": "/media/PosterPDFs/NeurIPS%202023/70574.png?t=1699514907.8884082", "slides": "https://nips.cc/virtual/2023/poster/70574", "video": "https://nips.cc/virtual/2023/poster/70574", "author_site": "Alexander Tyurin, Peter Richtarik", "tldr": "", "abstract": "We present a new method that includes three key components of distributed optimization and federated learning: variance reduction of stochastic gradients, partial participation, and compressed communication. We prove that the new method has optimal oracle complexity and state-of-the-art communication complexity in the partial participation setting. Regardless of the communication compression feature, our method successfully combines variance reduction and partial participation: we get the optimal oracle complexity, never need the participation of all nodes, and do not require the bounded gradients (dissimilarity) assumption.", "keywords": "Nonconvex Optimization;Partial Participation;Variance Reduction;Compressed Communication;Distributed Optimization", "primary_area": "", "supplementary_material": "/attachment/eeea76533cfaf698d91fd740f406020d8c8acafa.pdf", "author": "Alexander Tyurin;Peter Richt\u00e1rik", "authorids": "~Alexander_Tyurin1;~Peter_Richt\u00e1rik1", "gender": "M;M", "homepage": "https://k3nfalt.github.io/;https://richtarik.org", "dblp": "203/8919;62/8001", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-4380-5848", "linkedin": ";richtarik/", "or_profile": "~Alexander_Tyurin1;~Peter_Richtarik1", "aff": "KAUST;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "kaust.edu.sa;kaust.edu.sa", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\ntyurin2023a,\ntitle={A Computation and Communication Efficient Method for Distributed Nonconvex Problems in the Partial Participation Setting},\nauthor={Alexander Tyurin and Peter Richt{\\'a}rik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=loxinzXlCx}\n}", "github": "", "project": "", "reviewers": "ZASs;MQ7w;wGcA;QePY", "pdf_size": 1362887, "rating": "5;6;6;7", "confidence": "4;4;3;2", "soundness": "4;3;3;4", "novelty": "2;3;3;2", "presentation": "4;2;3;3", "wc_summary": "143;77;80;46", "wc_strengths": "36;32;94;111", "wc_weaknesses": "110;379;99;117", "wc_questions": "1;109;2;149", "wc_limitations": "1;1;1;31", "wc_review": "291;598;276;454", "wc_reply_reviewers": "13;54;10;18", "wc_reply_authors": "0;7;3;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 86.5, 35.23137805990563 ], "wc_strengths_avg": [ 68.25, 34.802119188348286 ], "wc_weaknesses_avg": [ 176.25, 117.23347431514601 ], "wc_questions_avg": [ 65.25, 65.30074655009696 ], "wc_limitations_avg": [ 8.5, 12.99038105676658 ], "wc_review_avg": [ 404.75, 131.61188206237307 ], "wc_reply_reviewers_avg": [ 23.75, 17.69710428290459 ], "wc_reply_authors_avg": [ 2.5, 2.8722813232690143 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8528028654224418, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6008471627894662321&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "kaust.edu.sa;kaust.edu.sa", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaust.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Saudi Arabia" }, { "title": "ProtoDiff: Learning to Learn Prototypical Networks by Task-Guided Diffusion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70573", "id": "lp9GR2t3hn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/911dd89c81efc624c4e1c39381179505-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lp9GR2t3hn", "openreview": "https://openreview.net/forum?id=lp9GR2t3hn", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70573", "video": "https://nips.cc/virtual/2023/poster/70573", "author_site": "Yingjun Du, Zehao Xiao, Shengcai Liao, Cees Snoek", "tldr": "", "abstract": "Prototype-based meta-learning has emerged as a powerful technique for addressing few-shot learning challenges. However, estimating a deterministic prototype using a simple average function from a limited number of examples remains a fragile process. To overcome this limitation, we introduce ProtoDiff, a novel framework that leverages a task-guided diffusion model during the meta-training phase to gradually generate prototypes, thereby providing efficient class representations. Specifically, a set of prototypes is optimized to achieve per-task prototype overfitting, enabling accurately obtaining the overfitted prototypes for individual tasks.\nFurthermore, we introduce a task-guided diffusion process within the prototype space, enabling the meta-learning of a generative process that transitions from a vanilla prototype to an overfitted prototype. ProtoDiff gradually generates task-specific prototypes from random noise during the meta-test stage, conditioned on the limited samples available for the new task. Furthermore, to expedite training and enhance ProtoDiff's performance, we propose the utilization of residual prototype learning, which leverages the sparsity of the residual prototype. We conduct thorough ablation studies to demonstrate its ability to accurately capture the underlying prototype distribution and enhance generalization. The new state-of-the-art performance on within-domain, cross-domain, and few-task few-shot classi\ufb01cation further substantiates the bene\ufb01t of ProtoDiff.", "keywords": "Meta-learning;few-shot learning;diffusion model;prototype", "primary_area": "", "supplementary_material": "/attachment/d752cc4ad9bd0b5aa235425051341a616b374cc0.zip", "author": "Yingjun Du;Zehao Xiao;Shengcai Liao;Cees G. M. Snoek", "authorids": "~Yingjun_Du1;~Zehao_Xiao1;~Shengcai_Liao2;~Cees_G._M._Snoek1", "gender": "M;M;M;M", "homepage": "https://yingjundu.github.io/;https://zzzx1224.github.io/;https://shengcailiao.github.io/;http://www.ceessnoek.info", "dblp": "263/6794;225/5426;16/8313;s/CeesSnoek", "google_scholar": "oAeW6rAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;CnqsHlAAAAAJ;https://scholar.google.nl/citations?user=0uKdbscAAAAJ", "orcid": ";;;0000-0001-9092-1556", "linkedin": "%E8%8B%B1%E5%86%9B-%E6%9D%9C-a938a0174/;;;cgmsnoek/", "or_profile": "~Yingjun_Du1;~Zehao_Xiao1;~Shengcai_Liao2;~Cees_Snoek1", "aff": "University of Amsterdam;University of Amsterdam;Inception Institute of Artificial Intelligence;University of Amsterdam", "aff_domain": "uva.nl;uva.nl;inceptioniai.org;uva.nl", "position": "PhD student;PhD student;Lead Scientist;Full Professor", "bibtex": "@inproceedings{\ndu2023protodiff,\ntitle={ProtoDiff: Learning to Learn Prototypical Networks by Task-Guided Diffusion},\nauthor={Yingjun Du and Zehao Xiao and Shengcai Liao and Cees G. M. Snoek},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lp9GR2t3hn}\n}", "github": "", "project": "", "reviewers": "ZGD4;BNd7;8tyC;zHCT", "pdf_size": 2576008, "rating": "6;7;7;7", "confidence": "5;4;3;5", "soundness": "3;3;3;3", "novelty": "2;2;4;3", "presentation": "3;3;3;2", "wc_summary": "42;158;91;91", "wc_strengths": "78;87;82;24", "wc_weaknesses": "210;133;90;316", "wc_questions": "3;45;58;3", "wc_limitations": "40;14;35;8", "wc_review": "373;437;356;442", "wc_reply_reviewers": "86;40;48;566", "wc_reply_authors": "56;47;41;778", "reply_reviewers": "1;1;1;3", "reply_authors": "2;2;2;4", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.5, 41.2583324917525 ], "wc_strengths_avg": [ 67.75, 25.459526704163217 ], "wc_weaknesses_avg": [ 187.25, 85.87017817612818 ], "wc_questions_avg": [ 27.25, 24.681724007856502 ], "wc_limitations_avg": [ 24.25, 13.534677683639163 ], "wc_review_avg": [ 402.0, 38.019731719200756 ], "wc_reply_reviewers_avg": [ 185.0, 220.6558406206371 ], "wc_reply_authors_avg": [ 230.5, 316.144349941605 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11377172858517799316&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 7, "email": "uva.nl;uva.nl;inceptioniai.org;uva.nl", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Amsterdam;Inception Institute of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.uva.nl;https://www.inceptioniai.org", "aff_unique_abbr": "UvA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Netherlands;United Arab Emirates" }, { "title": "SPA: A Graph Spectral Alignment Perspective for Domain Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70572", "id": "lpx9LZPVtZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/754e80f98b2a141942f45a0eeb258a3c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lpx9LZPVtZ", "openreview": "https://openreview.net/forum?id=lpx9LZPVtZ", "poster": "/media/PosterPDFs/NeurIPS%202023/70572.png?t=1699866427.9658678", "slides": "https://nips.cc/virtual/2023/poster/70572", "video": "https://nips.cc/virtual/2023/poster/70572", "author_site": "Zhiqing Xiao, Haobo Wang, Ying Jin, Lei Feng, Gang Chen, Fei Huang, Junbo Zhao", "tldr": "", "abstract": "Unsupervised domain adaptation (UDA) is a pivotal form in machine learning to extend the in-domain model to the distinctive target domains where the data distributions differ. Most prior works focus on capturing the inter-domain transferability but largely overlook rich intra-domain structures, which empirically results in even worse discriminability. In this work, we introduce a novel graph SPectral Alignment (SPA) framework to tackle the tradeoff. The core of our method is briefly condensed as follows: (i)-by casting the DA problem to graph primitives, SPA composes a coarse graph alignment mechanism with a novel spectral regularizer towards aligning the domain graphs in eigenspaces; (ii)-we further develop a fine-grained message propagation module --- upon a novel neighbor-aware self-training mechanism --- in order for enhanced discriminability in the target domain. On standardized benchmarks, the extensive experiments of SPA demonstrate that its performance has surpassed the existing cutting-edge DA methods. Coupled with dense model analysis, we conclude that our approach indeed possesses superior efficacy, robustness, discriminability, and transferability. Code and data are available at: https://github.com/CrownX/SPA.", "keywords": "Domain Adaptation;Self-training;Graph Spectra", "primary_area": "", "supplementary_material": "/attachment/a9bbdf4540db29f81501bdd85570396355ba7d23.zip", "author": "Zhiqing Xiao;Haobo Wang;Ying Jin;Lei Feng;Gang Chen;Fei Huang;Junbo Zhao", "authorids": "~Zhiqing_Xiao1;~Haobo_Wang1;~Ying_Jin1;~Lei_Feng1;~Gang_Chen6;~Fei_Huang1;~Junbo_Zhao1", "gender": "Not Specified;M;F;M;M;M;M", "homepage": "https://github.com/CrownX;https://hbzju.github.io/;https://jin-ying.github.io/;https://lfeng1995.github.io/;;http://jakezhao.net/;https://sites.google.com/view/fei-huang", "dblp": "156/5282;;46/176/;76/847-6;67/6383-1;191/6665;h/FeiHuang.html", "google_scholar": "cq3BcWsAAAAJ;DnN-rggAAAAJ;RSqGfysAAAAJ;https://scholar.google.com.sg/citations?user=KomQOFkAAAAJ;;8ipao8MAAAAJ;9r98PpoAAAAJ", "orcid": "0009-0007-4889-644X;0000-0001-8586-3048;;0000-0003-2839-5799;0000-0002-7483-0045;;", "linkedin": ";;;;;;fei-huang-cas-cmu", "or_profile": "~Zhiqing_Xiao1;~Haobo_Wang1;~Ying_Jin1;~Lei_Feng1;~Gang_Chen6;~Junbo_Zhao1;~Fei_Huang2", "aff": "Zhejiang University;Zhejiang University;The Chinese University of Hong Kong;Nanyang Technological University;College of Computer Science and Technology, Zhejiang University;Zhejiang University;Alibaba Group US", "aff_domain": "zju.edu.cn;zju.edu.cn;ie.cuhk.edu;ntu.edu.sg;cs.zju.edu.cn;zju.edu.cn;alibaba-inc.com", "position": "PhD student;PhD student;PhD student;Visiting Professor;Full Professor;Assistant Professor;Senior Research Director", "bibtex": "@inproceedings{\nxiao2023spa,\ntitle={{SPA}: A Graph Spectral Alignment Perspective for Domain Adaptation},\nauthor={Zhiqing Xiao and Haobo Wang and Ying Jin and Lei Feng and Gang Chen and Fei Huang and Junbo Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lpx9LZPVtZ}\n}", "github": "", "project": "", "reviewers": "54MB;Yk67;pjo2;Ecsg;AnYf", "pdf_size": 3760954, "rating": "3;6;6;7;8", "confidence": "5;4;4;5;5", "soundness": "2;4;2;4;3", "novelty": "2;3;3;3;4", "presentation": "3;3;2;3;4", "wc_summary": "38;69;34;63;107", "wc_strengths": "18;29;23;56;92", "wc_weaknesses": "109;112;10;150;116", "wc_questions": "20;15;101;2;77", "wc_limitations": "17;61;1;4;16", "wc_review": "202;286;169;275;408", "wc_reply_reviewers": "0;39;13;18;4", "wc_reply_authors": "90;203;5;48;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;2;2;2;1", "rating_avg": [ 6.0, 1.6733200530681511 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 62.2, 26.209921785461322 ], "wc_strengths_avg": [ 43.6, 27.528893911670334 ], "wc_weaknesses_avg": [ 99.4, 47.07270971592776 ], "wc_questions_avg": [ 43.0, 38.76596445337069 ], "wc_limitations_avg": [ 19.8, 21.55365398256175 ], "wc_review_avg": [ 268.0, 82.61961026293939 ], "wc_reply_reviewers_avg": [ 14.8, 13.67333170810977 ], "wc_reply_authors_avg": [ 69.2, 74.42418961601128 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18310533353686377330&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "zju.edu.cn;zju.edu.cn;ie.cuhk.edu;ntu.edu.sg;cs.zju.edu.cn;zju.edu.cn;alibaba-inc.com", "author_num": 7, "aff_unique_index": "0;0;1;2;0;0;3", "aff_unique_norm": "Zhejiang University;Chinese University of Hong Kong;Nanyang Technological University;Alibaba Group", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.cuhk.edu.hk;https://www.ntu.edu.sg;https://www.alibaba.com", "aff_unique_abbr": "ZJU;CUHK;NTU;Alibaba", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;1;0;0;2", "aff_country_unique": "China;Singapore;United States" }, { "title": "K-Nearest-Neighbor Local Sampling Based Conditional Independence Testing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70571", "id": "luyXPdkNSN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/48db67447e92539501bd71645ff33b72-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=luyXPdkNSN", "openreview": "https://openreview.net/forum?id=luyXPdkNSN", "poster": "/media/PosterPDFs/NeurIPS%202023/70571.png?t=1701926572.2985156", "slides": "https://nips.cc/virtual/2023/poster/70571", "video": "https://nips.cc/virtual/2023/poster/70571", "author_site": "Shuai Li, Yingjie Zhang, Hongtu Zhu, Christina Wang, Hai Shu, Ziqi Chen, Zhuoran Sun, Yanfeng Yang", "tldr": "", "abstract": "Conditional independence (CI) testing is a fundamental task in statistics and machine learning, but its effectiveness is hindered by the challenges posed by high-dimensional conditioning variables and limited data samples. This article introduces a novel testing approach to address these challenges and enhance control of the type I error while achieving high power under alternative hypotheses. The proposed approach incorporates a computationally efficient classifier-based conditional mutual information (CMI) estimator, capable of capturing intricate dependence structures among variables. To approximate a distribution encoding the null hypothesis, a $k$-nearest-neighbor local sampling strategy is employed. An important advantage of this approach is its ability to operate without assumptions about distribution forms or feature dependencies. Furthermore, it eliminates the need to derive asymptotic null distributions for the estimated CMI and avoids dataset splitting, making it particularly suitable for small datasets. The method presented in this article demonstrates asymptotic control of the type I error and consistency against all alternative hypotheses. Extensive analyses using both synthetic and real data highlight the computational efficiency of the proposed test. Moreover, it outperforms existing state-of-the-art methods in terms of type I and II errors, even in scenarios with high-dimensional conditioning sets. Additionally, the proposed approach exhibits robustness in the presence of heavy-tailed data.", "keywords": "Conditional Independence testing;causal inference;conditional mutual information;k-nearest neighbor;conditional randomization test;conditional permutation test", "primary_area": "", "supplementary_material": "/attachment/3f1c71ea5d7808d8950bcaf3546b259780a56d74.zip", "author": "Shuai Li;Yingjie Zhang;Hongtu Zhu;Christina Dan Wang;Hai Shu;Ziqi Chen;Zhuoran Sun;Yanfeng Yang", "authorids": "~Shuai_Li22;~Yingjie_Zhang3;~Hongtu_Zhu3;~Christina_Dan_Wang1;~Hai_Shu1;~Ziqi_Chen2;~Zhuoran_Sun1;~Yanfeng_Yang1", "gender": ";M;;F;M;M;M;M", "homepage": ";https://www.researchgate.net/profile/Yingjie-Zhang-34;;https://shanghai.nyu.edu/academics/faculty/directory/christina-dan-wang;https://wp.nyu.edu/haishu/;https://faculty.ecnu.edu.cn/_s35/czq2/main.psp;https://www.researchgate.net/profile/Zhuoran-Sun-2;https://www.researchgate.net/profile/Yanfeng-Yang-6", "dblp": ";04/376-4;;246/4851;220/1462;76/9998-2.html;;152/7883-1", "google_scholar": ";VGfIFIkAAAAJ;;;wDtkk1QAAAAJ;b0q985EAAAAJ;;", "orcid": ";0009-0000-3250-1756;;0000-0003-4204-6843;0000-0002-6968-4063;0000-0002-4128-2986;;0009-0006-1059-3040", "linkedin": ";;;;;;;", "or_profile": "~Shuai_Li22;~Yingjie_Zhang3;~Hongtu_Zhu3;~Christina_Dan_Wang1;~Hai_Shu1;~Ziqi_Chen2;~Zhuoran_Sun1;~Yanfeng_Yang1", "aff": ";East China Normal University;;New York University Shanghai;New York University;East China Normal University;Central China Normal University;East China Normal University", "aff_domain": ";ecnu.edu.cn;;nyu.edu;nyu.edu;ecnu.edu.cn;ccnu.edu.cn;ecnu.edu.cn", "position": ";Undergrad student;;Assistant Professor;Assistant Professor;Full Professor;Undergrad student;MS student", "bibtex": "@inproceedings{\nli2023knearestneighbor,\ntitle={K-Nearest-Neighbor Local Sampling Based Conditional Independence Testing},\nauthor={Shuai Li and Yingjie Zhang and Hongtu Zhu and Christina Dan Wang and Hai Shu and Ziqi Chen and Zhuoran Sun and Yanfeng Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=luyXPdkNSN}\n}", "github": "", "project": "", "reviewers": "xTib;BxQV;P7Sn;671y", "pdf_size": 522107, "rating": "5;5;7;7", "confidence": "3;3;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;4;4", "wc_summary": "56;91;84;101", "wc_strengths": "37;54;70;77", "wc_weaknesses": "108;48;145;131", "wc_questions": "43;116;41;66", "wc_limitations": "1;26;11;11", "wc_review": "245;335;351;386", "wc_reply_reviewers": "103;28;22;16", "wc_reply_authors": "22;9;13;9", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 83.0, 16.718253497300488 ], "wc_strengths_avg": [ 59.5, 15.435349040433131 ], "wc_weaknesses_avg": [ 108.0, 37.074249823833256 ], "wc_questions_avg": [ 66.5, 30.220026472523152 ], "wc_limitations_avg": [ 12.25, 8.926785535678562 ], "wc_review_avg": [ 329.25, 52.02102940157951 ], "wc_reply_reviewers_avg": [ 42.25, 35.329697139941636 ], "wc_reply_authors_avg": [ 13.25, 5.3091901453988255 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15934075708109697760&as_sdt=8000005&sciodt=0,19&hl=en", "gs_version_total": 5, "email": ";ecnu.edu.cn;;nyu.edu;nyu.edu;ecnu.edu.cn;ccnu.edu.cn;ecnu.edu.cn", "author_num": 8, "aff_unique_index": "0;1;1;0;2;0", "aff_unique_norm": "East China Normal University;New York University;Central China Normal University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ecnu.edu.cn;https://www.nyu.edu;http://www.ccnu.edu.cn", "aff_unique_abbr": "ECNU;NYU;CCNU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shanghai", "aff_country_unique_index": "0;1;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "H-InDex: Visual Reinforcement Learning with Hand-Informed Representations for Dexterous Manipulation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70570", "id": "lvvaNwnP6M", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eb4b1f7feadcd124a59de6ff7b9196f3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lvvaNwnP6M", "openreview": "https://openreview.net/forum?id=lvvaNwnP6M", "poster": "/media/PosterPDFs/NeurIPS%202023/70570.png?t=1697723497.4439397", "slides": "https://nips.cc/virtual/2023/poster/70570", "video": "https://nips.cc/virtual/2023/poster/70570", "author_site": "Yanjie Ze, Yanjie Ze, Yuyao Liu, Ruizhe Shi, Jiaxin Qin, Zhecheng Yuan, Jiashun Wang, Huazhe Xu", "tldr": "", "abstract": "Human hands possess remarkable dexterity and have long served as a source of inspiration for robotic manipulation. In this work, we propose a human $\\textbf{H}$and-$\\textbf{In}$formed visual representation learning framework to solve difficult $\\textbf{Dex}$terous manipulation tasks ($\\textbf{H-InDex}$) with reinforcement learning. Our framework consists of three stages: $\\textit{(i)}$ pre-training representations with 3D human hand pose estimation, $\\textit{(ii)}$ offline adapting representations with self-supervised keypoint detection, and $\\textit{(iii)}$ reinforcement learning with exponential moving average BatchNorm. The last two stages only modify $0.36$% parameters of the pre-trained representation in total, ensuring the knowledge from pre-training is maintained to the full extent. We empirically study $\\textbf{12}$ challenging dexterous manipulation tasks and find that $\\textbf{H-InDex}$ largely surpasses strong baseline methods and the recent visual foundation models for motor control. Code and videos are available at https://yanjieze.com/H-InDex .", "keywords": "Visual Reinforcement Learning;Representation Learning;Dexterous Manipulation", "primary_area": "", "supplementary_material": "/attachment/f65cd06609629b8e30695dd3aad5a9bf92ec6405.pdf", "author": "Yanjie Ze;Yuyao Liu;Ruizhe Shi;Jiaxin Qin;Zhecheng Yuan;Jiashun Wang;Huazhe Xu", "authorids": "~Yanjie_Ze1;~Yuyao_Liu1;~Ruizhe_Shi1;~Jiaxin_Qin2;~Zhecheng_Yuan1;~Jiashun_Wang1;~Huazhe_Xu1", "gender": "M;M;M;M;M;M;F", "homepage": "http://yanjieze.com;;http://srzer.github.io;http://www.github.com;https://jiashunwang.github.io/;http://hxu.rocks;https://github.com/JiaxinQin0814", "dblp": "312/5407;;304/0634.html;314/5755;260/6495;164/9006;348/8439", "google_scholar": "BO_b2O8AAAAJ;https://scholar.google.com/citations?hl=en;0tlXSPkAAAAJ;;gdO9Gb0AAAAJ;t9HPFawAAAAJ;", "orcid": ";;;;;;", "linkedin": "yanjie-ze-a71a0a247/;;;;;;", "or_profile": "~Yanjie_Ze1;~Yuyao_Liu1;~Ruizhe_Shi1;~Zhecheng_Yuan1;~Jiashun_Wang1;~Huazhe_Xu1;~JIAXIN_QIN1", "aff": "Shanghai Jiaotong University;Tsinghua University;Tsinghua University;Tsinghua University;Boston Dynamics AI Institute;Tsinghua University;Renmin University of China", "aff_domain": "sjtu.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;theaiinstitute.com;tsinghua.edu.cn;ruc.edu.cn", "position": "Undergrad student;Undergrad student;Undergrad student;MS student;Intern;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\nze2023hindex,\ntitle={H-InDex: Visual Reinforcement Learning with Hand-Informed Representations for Dexterous Manipulation},\nauthor={Yanjie Ze and Yuyao Liu and Ruizhe Shi and Jiaxin Qin and Zhecheng Yuan and Jiashun Wang and Huazhe Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lvvaNwnP6M}\n}", "github": "", "project": "", "reviewers": "s3to;di2q;rofX;GX9c", "pdf_size": 6272621, "rating": "4;5;6;6", "confidence": "4;3;4;4", "soundness": "3;2;3;3", "novelty": "1;3;4;2", "presentation": "3;3;3;3", "wc_summary": "68;57;241;37", "wc_strengths": "86;38;85;35", "wc_weaknesses": "146;68;354;51", "wc_questions": "22;101;194;29", "wc_limitations": "8;10;19;6", "wc_review": "330;274;893;158", "wc_reply_reviewers": "91;28;277;10", "wc_reply_authors": "262;0;330;79", "reply_reviewers": "1;1;2;1", "reply_authors": "3;1;3;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 100.75, 81.73241401059924 ], "wc_strengths_avg": [ 61.0, 24.525496936861444 ], "wc_weaknesses_avg": [ 154.75, 120.48521693552284 ], "wc_questions_avg": [ 86.5, 69.34154598795732 ], "wc_limitations_avg": [ 10.75, 4.968651728587948 ], "wc_review_avg": [ 413.75, 283.56337475068955 ], "wc_reply_reviewers_avg": [ 101.5, 105.6941341797169 ], "wc_reply_authors_avg": [ 167.75, 133.43982726307763 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15446668096337882337&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "sjtu.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;theaiinstitute.com;tsinghua.edu.cn;ruc.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;1;2;1;3", "aff_unique_norm": "Shanghai Jiao Tong University;Tsinghua University;Boston Dynamics AI Institute;Renmin University of China", "aff_unique_dep": ";;AI Institute;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.tsinghua.edu.cn;https://www.bostondynamics.com/;http://www.ruc.edu.cn", "aff_unique_abbr": "SJTU;THU;BD AI;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "CARE: Modeling Interacting Dynamics Under Temporal Environmental Variation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70569", "id": "lwg3ohkFRv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0c7ca207a051228f978971447a56464a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lwg3ohkFRv", "openreview": "https://openreview.net/forum?id=lwg3ohkFRv", "poster": "/media/PosterPDFs/NeurIPS%202023/70569.png?t=1701504665.467413", "slides": "https://nips.cc/virtual/2023/poster/70569", "video": "https://nips.cc/virtual/2023/poster/70569", "author_site": "Xiao Luo, Haixin Wang, Zijie Huang, Huiyu Jiang, Abhijeet Gangan, Song Jiang, Yizhou Sun", "tldr": "", "abstract": "Modeling interacting dynamical systems, such as fluid dynamics and intermolecular interactions, is a fundamental research problem for understanding and simulating complex real-world systems. Many of these systems can be naturally represented by dynamic graphs, and graph neural network-based approaches have been proposed and shown promising performance. However, most of these approaches assume the underlying dynamics does not change over time, which is unfortunately untrue. For example, a molecular dynamics can be affected by the environment temperature over the time. In this paper, we take an attempt to provide a probabilistic view for time-varying dynamics and propose a model Context-attended Graph ODE (CARE) for modeling time-varying interacting dynamical systems. In our CARE, we explicitly use a context variable to model time-varying environment and construct an encoder to initialize the context variable from historical trajectories. Furthermore, we employ a neural ODE model to depict the dynamic evolution of the context variable inferred from system states. This context variable is incorporated into a coupled ODE to simultaneously drive the evolution of systems. Comprehensive experiments on four datasets demonstrate the effectiveness of our proposed CARE compared with several state-of-the-art approaches.", "keywords": "Dynamical System;Distribution Shift;Neural ODE;Graph Neural Network", "primary_area": "", "supplementary_material": "", "author": "Xiao Luo;Haixin Wang;Zijie Huang;Huiyu Jiang;Abhijeet Sadashiv Gangan;Song Jiang;Yizhou Sun", "authorids": "~Xiao_Luo3;~Haixin_Wang3;~Zijie_Huang1;~Huiyu_Jiang1;~Abhijeet_Sadashiv_Gangan1;~Song_Jiang1;~Yizhou_Sun1", "gender": "M;;F;M;;M;F", "homepage": "http://luoxiao12.github.io;https://willdreamer.github.io/;https://zijieh.github.io/;;;https://songjiang0909.github.io/;http://web.cs.ucla.edu/~yzsun/", "dblp": "50/1585-1;81/5956-3;246/8147-2;;;08/237-2;37/3868", "google_scholar": "https://scholar.google.com.hk/citations?;RGZUJOkAAAAJ;SejA1zsAAAAJ;;;SjbhMQEAAAAJ;https://scholar.google.com.tw/citations?user=TQgOjK0AAAAJ", "orcid": ";0000-0002-5714-0149;;;;;", "linkedin": "%E9%9C%84-%E7%BD%97-303548214/;;zijie-huang-62514a177/;huiyu-jiang/;;;", "or_profile": "~Xiao_Luo3;~Haixin_Wang3;~Zijie_Huang1;~Huiyu_Jiang1;~Abhijeet_Sadashiv_Gangan1;~Song_Jiang1;~Yizhou_Sun1", "aff": "University of California, Los Angeles;Peking University;University of California, Los Angeles;University of California, Santa Barbara;;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;pku.edu.cn;cs.ucla.edu;ucsb.edu;;ucla.edu;ucla.edu", "position": "Postdoc;MS student;PhD student;PhD student;;PhD student;Associate Professor", "bibtex": "@inproceedings{\nluo2023care,\ntitle={{CARE}: Modeling Interacting Dynamics Under Temporal Environmental Variation},\nauthor={Xiao Luo and Haixin Wang and Zijie Huang and Huiyu Jiang and Abhijeet Sadashiv Gangan and Song Jiang and Yizhou Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lwg3ohkFRv}\n}", "github": "", "project": "", "reviewers": "QmzE;P3pg;ZaqZ", "pdf_size": 7121216, "rating": "6;7;7", "confidence": "3;4;4", "soundness": "3;3;3", "novelty": "3;3;4", "presentation": "4;3;4", "wc_summary": "66;63;77", "wc_strengths": "78;62;144", "wc_weaknesses": "163;128;8", "wc_questions": "129;131;84", "wc_limitations": "15;1;4", "wc_review": "451;385;317", "wc_reply_reviewers": "48;22;15", "wc_reply_authors": "35;31;31", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 68.66666666666667, 6.018490028422596 ], "wc_strengths_avg": [ 94.66666666666667, 35.490217744549774 ], "wc_weaknesses_avg": [ 99.66666666666667, 66.37435917246626 ], "wc_questions_avg": [ 114.66666666666667, 21.69997439834639 ], "wc_limitations_avg": [ 6.666666666666667, 6.018490028422596 ], "wc_review_avg": [ 384.3333333333333, 54.70730197047639 ], "wc_reply_reviewers_avg": [ 28.333333333333332, 14.197026292697903 ], "wc_reply_authors_avg": [ 32.333333333333336, 1.8856180831641267 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5469534348490982506&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "cs.ucla.edu;pku.edu.cn;cs.ucla.edu;ucsb.edu;;ucla.edu;ucla.edu", "author_num": 7, "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "University of California, Los Angeles;Peking University;University of California, Santa Barbara", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucla.edu;http://www.pku.edu.cn;https://www.ucsb.edu", "aff_unique_abbr": "UCLA;Peking U;UCSB", "aff_campus_unique_index": "0;0;2;0;0", "aff_campus_unique": "Los Angeles;;Santa Barbara", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Spontaneous symmetry breaking in generative diffusion models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70568", "id": "lxGFGMMSVl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d0da30e312b75a3fffd9e9191f8bc1b0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lxGFGMMSVl", "openreview": "https://openreview.net/forum?id=lxGFGMMSVl", "poster": "/media/PosterPDFs/NeurIPS%202023/70568.png?t=1697378506.7533357", "slides": "https://nips.cc/virtual/2023/poster/70568", "video": "https://nips.cc/virtual/2023/poster/70568", "author_site": "Gabriel Raya, Luca Ambrogioni", "tldr": "", "abstract": "Generative diffusion models have recently emerged as a leading approach for generating high-dimensional data. In this paper, we show that the dynamics of these models exhibit a spontaneous symmetry breaking that divides the generative dynamics into two distinct phases: 1) A linear steady-state dynamics around a central fixed-point and 2) an attractor dynamics directed towards the data manifold. These two \"phases'' are separated by the change in stability of the central fixed-point, with the resulting window of instability being responsible for the diversity of the generated samples. Using both theoretical and empirical evidence, we show that an accurate simulation of the early dynamics does not significantly contribute to the final generation, since early fluctuations are reverted to the central fixed point. To leverage this insight, we propose a Gaussian late initialization scheme, which significantly improves model performance, achieving up to 3x FID improvements on fast samplers, while also increasing sample diversity (e.g., racial composition of generated CelebA images). Our work offers a new way to understand the generative dynamics of diffusion models that has the potential to bring about higher performance and less biased fast-samplers.", "keywords": "generative models;diffusion models;score-based generative models; symmetry-breaking", "primary_area": "", "supplementary_material": "/attachment/21e2682457d342291bf046e72127a8872ff44200.pdf", "author": "Gabriel Raya;Luca Ambrogioni", "authorids": "~Gabriel_Raya1;~Luca_Ambrogioni1", "gender": "M;M", "homepage": "https://gabrielraya.com/;https://scholar.google.nl/citations?user=J9IABpQAAAAJ&hl=en", "dblp": ";151/9813", "google_scholar": "6bTCWLcAAAAJ;https://scholar.google.nl/citations?user=J9IABpQAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Gabriel_Raya1;~Luca_Ambrogioni1", "aff": "Eindhoven University of Technology;Radboud University Nijmegen", "aff_domain": "tue.nl;ru.nl", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nraya2023spontaneous,\ntitle={Spontaneous symmetry breaking in generative diffusion models},\nauthor={Gabriel Raya and Luca Ambrogioni},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lxGFGMMSVl}\n}", "github": "", "project": "", "reviewers": "FFjV;Z6e7;kBdj;J7iR;sN4H", "pdf_size": 2897005, "rating": "4;5;6;7;7", "confidence": "4;4;4;4;3", "soundness": "2;3;4;3;4", "novelty": "3;3;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "62;30;80;155;73", "wc_strengths": "62;23;68;88;156", "wc_weaknesses": "465;54;167;154;37", "wc_questions": "3;109;6;138;50", "wc_limitations": "3;49;2;6;24", "wc_review": "595;265;323;541;340", "wc_reply_reviewers": "0;59;15;24;18", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 80.0, 41.22620525830627 ], "wc_strengths_avg": [ 79.4, 43.7154434954056 ], "wc_weaknesses_avg": [ 175.4, 153.81105291883287 ], "wc_questions_avg": [ 61.2, 54.30064456339354 ], "wc_limitations_avg": [ 16.8, 17.971087891388212 ], "wc_review_avg": [ 412.8, 130.2618900523096 ], "wc_reply_reviewers_avg": [ 23.2, 19.56936381183609 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5144957554275266, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9646136037338390977&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "tue.nl;ru.nl", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Eindhoven University of Technology;Radboud University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tue.nl;https://www.ru.nl/", "aff_unique_abbr": "TU/e;RU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Nijmegen", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "title": "DiffComplete: Diffusion-based Generative 3D Shape Completion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70567", "id": "lzqaQRsITh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ef7bd1f9cbf8a5ab7ddcaccd50699c90-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=lzqaQRsITh", "openreview": "https://openreview.net/forum?id=lzqaQRsITh", "poster": "/media/PosterPDFs/NeurIPS%202023/70567.png?t=1700161668.5659897", "slides": "https://nips.cc/virtual/2023/poster/70567", "video": "https://nips.cc/virtual/2023/poster/70567", "author_site": "Ruihang Chu, Enze Xie, Shentong Mo, Shentong Mo, Zhenguo Li, Matthias Niessner, Chi-Wing Fu, Jiaya Jia", "tldr": "", "abstract": "We introduce a new diffusion-based approach for shape completion on 3D range scans. Compared with prior deterministic and probabilistic methods, we strike a balance between realism, multi-modality, and high fidelity. We propose DiffComplete by casting shape completion as a generative task conditioned on the incomplete shape. Our key designs are two-fold. First, we devise a hierarchical feature aggregation mechanism to inject conditional features in a spatially-consistent manner. So, we can capture both local details and broader contexts of the conditional inputs to control the shape completion. Second, we propose an occupancy-aware fusion strategy in our model to enable the completion of multiple partial shapes and introduce higher flexibility on the input conditions. DiffComplete sets a new SOTA performance (e.g., 40% decrease on $l_1$ error) on two large-scale 3D shape completion benchmarks. Our completed shapes not only have a realistic outlook compared with the deterministic methods but also exhibit high similarity to the ground truths compared with the probabilistic alternatives. Further, DiffComplete has strong generalizability on objects of entirely unseen classes for both synthetic and real data, eliminating the need for model re-training in various applications.", "keywords": "3d shape completion;conditional generation;diffusion models", "primary_area": "", "supplementary_material": "/attachment/bca4ca62dea5e71ab98371d78215d71a89f69264.pdf", "author": "Ruihang Chu;Enze Xie;Shentong Mo;Zhenguo Li;Matthias Nie\u00dfner;Chi-Wing Fu;Jiaya Jia", "authorids": "~Ruihang_Chu1;~Enze_Xie1;~Shentong_Mo1;~Zhenguo_Li1;~Matthias_Nie\u00dfner2;~Chi-Wing_Fu2;~Jiaya_Jia1", "gender": "M;M;;M;;;M", "homepage": "https://ruihang-chu.github.io/;https://xieenze.github.io/;;http://www.ee.columbia.edu/~zgli/;;;https://jiaya.me", "dblp": "250/9173.html;218/5441;;23/6479;;;31/5649", "google_scholar": "https://scholar.google.com.hk/citations?user=62zPPxkAAAAJ;42MVVPgAAAAJ;;XboZC1AAAAAJ;;;https://scholar.google.com.tw/citations?user=XPAkzTEAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Ruihang_Chu1;~Enze_Xie1;~Shentong_Mo1;~Zhenguo_Li1;~Matthias_Nie\u00dfner2;~Chi-Wing_Fu2;~Jiaya_Jia1", "aff": "The Chinese University of Hong Kong;Huawei Noah's Ark Lab;;Huawei Noah's Ark Lab;;;Department of Computer Science and Engineering, Hong Kong University of Science and Technology", "aff_domain": "cuhk.edu.hk;huawei.com;;huawei.com;;;cse.ust.hk", "position": "PhD student;Researcher;;Principal Researcher;;;Full Professor", "bibtex": "@inproceedings{\nchu2023diffcomplete,\ntitle={DiffComplete: Diffusion-based Generative 3D Shape Completion},\nauthor={Ruihang Chu and Enze Xie and Shentong Mo and Zhenguo Li and Matthias Nie{\\ss}ner and Chi-Wing Fu and Jiaya Jia},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=lzqaQRsITh}\n}", "github": "", "project": "", "reviewers": "hw6P;DbPU;Ct9H;9z1Q;KuxT", "pdf_size": 18379806, "rating": "5;5;5;8;8", "confidence": "4;4;4;5;3", "soundness": "3;3;2;4;4", "novelty": "3;2;2;3;3", "presentation": "2;3;2;4;4", "wc_summary": "55;65;109;56;120", "wc_strengths": "53;70;125;93;49", "wc_weaknesses": "106;245;240;74;32", "wc_questions": "38;117;83;71;42", "wc_limitations": "5;9;16;49;13", "wc_review": "257;506;573;343;256", "wc_reply_reviewers": "0;69;84;12;23", "wc_reply_authors": "0;107;236;23;20", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;3;2;2", "rating_avg": [ 6.2, 1.469693845669907 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 81.0, 27.79208520424475 ], "wc_strengths_avg": [ 78.0, 28.156704352604905 ], "wc_weaknesses_avg": [ 139.4, 87.40617827133273 ], "wc_questions_avg": [ 70.2, 28.937173324289986 ], "wc_limitations_avg": [ 18.4, 15.742934923323542 ], "wc_review_avg": [ 387.0, 130.1952380081545 ], "wc_reply_reviewers_avg": [ 37.6, 32.92779980502797 ], "wc_reply_authors_avg": [ 77.2, 87.49262826089979 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6736360131276610450&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "cuhk.edu.hk;huawei.com;;huawei.com;;;cse.ust.hk", "author_num": 7, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Chinese University of Hong Kong;Huawei;Hong Kong University of Science and Technology", "aff_unique_dep": ";Noah's Ark Lab;Department of Computer Science and Engineering", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.huawei.com;https://www.ust.hk", "aff_unique_abbr": "CUHK;Huawei;HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "StyleTTS 2: Towards Human-Level Text-to-Speech through Style Diffusion and Adversarial Training with Large Speech Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70566", "id": "m0RbqrUM26", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3eaad2a0b62b5ed7a2e66c2188bb1449-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=m0RbqrUM26", "openreview": "https://openreview.net/forum?id=m0RbqrUM26", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70566", "video": "https://nips.cc/virtual/2023/poster/70566", "author_site": "Yinghao Aaron Li, Cong Han, Vinay Raghavan, Gavin Mischler, Nima Mesgarani", "tldr": "", "abstract": "In this paper, we present StyleTTS 2, a text-to-speech (TTS) model that leverages style diffusion and adversarial training with large speech language models (SLMs) to achieve human-level TTS synthesis. StyleTTS 2 differs from its predecessor by modeling styles as a latent random variable through diffusion models to generate the most suitable style for the text without requiring reference speech, achieving efficient latent diffusion while benefiting from the diverse speech synthesis offered by diffusion models. Furthermore, we employ large pre-trained SLMs, such as WavLM, as discriminators with our novel differentiable duration modeling for end-to-end training, resulting in improved speech naturalness. StyleTTS 2 surpasses human recordings on the single-speaker LJSpeech dataset and matches it on the multispeaker VCTK dataset as judged by native English speakers. Moreover, when trained on the LibriTTS dataset, our model outperforms previous publicly available models for zero-shot speaker adaptation. This work achieves the first human-level TTS on both single and multispeaker datasets, showcasing the potential of style diffusion and adversarial training with large SLMs. The audio demos and source code are available at https://styletts2.github.io/.", "keywords": "Speech Processing;Text-to-Speech;Diffusion Model;Large Language Model;Self-Supervised Speech Model;WavLM", "primary_area": "", "supplementary_material": "", "author": "Yinghao Aaron Li;Cong Han;Vinay S Raghavan;Gavin Mischler;Nima Mesgarani", "authorids": "~Yinghao_Aaron_Li1;~Cong_Han1;~Vinay_S_Raghavan1;~Gavin_Mischler1;~Nima_Mesgarani1", "gender": ";M;M;;M", "homepage": ";;;;http://nima.ee.columbia.edu/", "dblp": ";;277/3468;266/1797;", "google_scholar": ";-NweZ-gAAAAJ;uYoQgFgAAAAJ;v8hF5dMAAAAJ;", "orcid": ";;0000-0002-4387-0781;0000-0003-4776-3518;", "linkedin": ";;vinaysraghavan/;;", "or_profile": "~Yinghao_Aaron_Li1;~Cong_Han1;~Vinay_S_Raghavan1;~Gavin_Mischler1;~Nima_Mesgarani1", "aff": ";Columbia University;Columbia University;Columbia University;Columbia University", "aff_domain": ";columbia.edu;columbia.edu;columbia.edu;ee.columbia.edu", "position": ";PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nli2023styletts,\ntitle={Style{TTS} 2: Towards Human-Level Text-to-Speech through Style Diffusion and Adversarial Training with Large Speech Language Models},\nauthor={Yinghao Aaron Li and Cong Han and Vinay S Raghavan and Gavin Mischler and Nima Mesgarani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=m0RbqrUM26}\n}", "github": "", "project": "", "reviewers": "JmdN;mqLA;4B59;ogQb;5CmP", "pdf_size": 3595606, "rating": "5;6;7;7;7", "confidence": "4;5;4;4;4", "soundness": "2;4;4;3;3", "novelty": "2;4;4;4;3", "presentation": "1;4;3;3;3", "wc_summary": "61;45;69;91;57", "wc_strengths": "94;29;65;38;75", "wc_weaknesses": "133;14;6;391;99", "wc_questions": "1;292;43;200;11", "wc_limitations": "1;25;1;39;1", "wc_review": "290;405;184;759;243", "wc_reply_reviewers": "0;0;0;34;28", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.4, 0.8 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 64.6, 15.304901175767194 ], "wc_strengths_avg": [ 60.2, 23.87802336877992 ], "wc_weaknesses_avg": [ 128.6, 139.93798626534542 ], "wc_questions_avg": [ 109.4, 116.09754519368616 ], "wc_limitations_avg": [ 13.4, 15.818975946628152 ], "wc_review_avg": [ 376.2, 204.68453776482482 ], "wc_reply_reviewers_avg": [ 12.4, 15.304901175767194 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2500000000000001, "gs_citation": 116, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9743856097026037407&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": ";columbia.edu;columbia.edu;columbia.edu;ee.columbia.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learn to Categorize or Categorize to Learn? Self-Coding for Generalized Category Discovery", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70565", "id": "m0vfXMrLwF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e6789e468c65a7816760a00a487d3c4e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=m0vfXMrLwF", "openreview": "https://openreview.net/forum?id=m0vfXMrLwF", "poster": "/media/PosterPDFs/NeurIPS%202023/70565.png?t=1702476743.690879", "slides": "https://nips.cc/virtual/2023/poster/70565", "video": "https://nips.cc/virtual/2023/poster/70565", "author_site": "Sarah Rastegar, Hazel Doughty, Cees Snoek", "tldr": "", "abstract": "In the quest for unveiling novel categories at test time, we confront the inherent limitations of traditional supervised recognition models that are restricted by a predefined category set. While strides have been made in the realms of self-supervised and open-world learning towards test-time category discovery, a crucial yet often overlooked question persists: what exactly delineates a category? In this paper, we conceptualize a category through the lens of optimization, viewing it as an optimal solution to a well-defined problem. Harnessing this unique conceptualization, we propose a novel, efficient and self-supervised method capable of discovering previously unknown categories at test time. A salient feature of our approach is the assignment of minimum length category codes to individual data instances, which encapsulates the implicit category hierarchy prevalent in real-world datasets. This mechanism affords us enhanced control over category granularity, thereby equipping our model to handle fine-grained categories adeptly. Experimental evaluations, bolstered by state-of-the-art benchmark comparisons, testify to the efficacy of our solution in managing unknown categories at test time. Furthermore, we fortify our proposition with a theoretical foundation, providing proof of its optimality. Our code is available at: https://github.com/SarahRastegar/InfoSieve.", "keywords": "Generalized category discovery;Open world learning;Open-set recognition", "primary_area": "", "supplementary_material": "/attachment/e7b7f7143646be08e1d8d4b81b717a51d48d6a10.pdf", "author": "Sarah Rastegar;Hazel Doughty;Cees G. M. Snoek", "authorids": "~Sarah_Rastegar1;~Hazel_Doughty1;~Cees_G._M._Snoek1", "gender": "F;F;M", "homepage": "https://sarahrastegar.github.io/;https://hazeldoughty.github.io/;http://www.ceessnoek.info", "dblp": "191/4660;198/0823;s/CeesSnoek", "google_scholar": "e_HGE3gAAAAJ;b3koBVwAAAAJ;https://scholar.google.nl/citations?user=0uKdbscAAAAJ", "orcid": "0000-0002-4542-7388;;0000-0001-9092-1556", "linkedin": "sarah-rastegar;;cgmsnoek/", "or_profile": "~Sarah_Rastegar1;~Hazel_Doughty1;~Cees_Snoek1", "aff": "University of Amsterdam;University of Amsterdam;University of Amsterdam", "aff_domain": "uva.nl;uva.nl;uva.nl", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nrastegar2023learn,\ntitle={Learn to Categorize or Categorize to Learn? Self-Coding for Generalized Category Discovery},\nauthor={Sarah Rastegar and Hazel Doughty and Cees G. M. Snoek},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=m0vfXMrLwF}\n}", "github": "", "project": "", "reviewers": "GHXK;xjo1;ApGA;Sjrc", "pdf_size": 4327101, "rating": "5;5;6;7", "confidence": "3;5;3;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "57;76;68;38", "wc_strengths": "30;35;31;55", "wc_weaknesses": "112;221;33;100", "wc_questions": "31;24;27;47", "wc_limitations": "68;1;8;6", "wc_review": "298;357;167;246", "wc_reply_reviewers": "99;0;27;24", "wc_reply_authors": "1284;0;106;68", "reply_reviewers": "3;0;1;1", "reply_authors": "6;1;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 59.75, 14.254385290148432 ], "wc_strengths_avg": [ 37.75, 10.133484099755622 ], "wc_weaknesses_avg": [ 116.5, 67.4258852370512 ], "wc_questions_avg": [ 32.25, 8.870597499605086 ], "wc_limitations_avg": [ 20.75, 27.39867697535777 ], "wc_review_avg": [ 267.0, 69.82478070141 ], "wc_reply_reviewers_avg": [ 37.5, 37.016888037759195 ], "wc_reply_authors_avg": [ 364.5, 532.229978486744 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 1.920286436967152 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14149812305678598790&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "uva.nl;uva.nl;uva.nl", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "title": "Efficient Hyper-parameter Optimization with Cubic Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70564", "id": "m11TbsaQQI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b7500454af92cf3934eb1cc2d59abbdf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=m11TbsaQQI", "openreview": "https://openreview.net/forum?id=m11TbsaQQI", "poster": "/media/PosterPDFs/NeurIPS%202023/70564.png?t=1701571939.4774015", "slides": "https://nips.cc/virtual/2023/poster/70564", "video": "https://nips.cc/virtual/2023/poster/70564", "author_site": "Zhenqian Shen, Hansi Yang, Yong Li, James Kwok, Quanming Yao", "tldr": "", "abstract": "As hyper-parameters are ubiquitous and can significantly affect the model performance, hyper-parameter optimization is extremely important in machine learning. In this paper, we consider a sub-class of hyper-parameter optimization problems, where the hyper-gradients are not available. Such problems frequently appear when the performance metric is non-differentiable or the hyper-parameter is not continuous. However, existing algorithms, like Bayesian optimization and reinforcement learning, often get trapped in local optimals with poor performance. To address the above limitations, we propose to use cubic regularization to accelerate convergence and avoid saddle points. First, we adopt stochastic relaxation, which allows obtaining gradient and Hessian information without hyper-gradients. Then, we exploit the rich curvature information by cubic regularization. Theoretically, we prove that the proposed method can converge to approximate second-order stationary points, and the convergence is also guaranteed when the lower-level problem is inexactly solved. Experiments on synthetic and real-world data demonstrate the effectiveness of our proposed method.", "keywords": "hyper-parameter optimization;cubic regularization", "primary_area": "", "supplementary_material": "/attachment/714e74d2f65f3d956b3538358567706dbfe0ed42.pdf", "author": "Zhenqian Shen;Hansi Yang;Yong Li;James Kwok;quanming yao", "authorids": "~Zhenqian_Shen1;~Hansi_Yang1;~Yong_Li7;~James_Kwok1;~quanming_yao1", "gender": "M;M;M;;M", "homepage": ";https://www.linkedin.com/in/%E7%80%9A%E6%80%9D-%E6%9D%A8-6463a4a1;http://fi.ee.tsinghua.edu.cn/~liyong/;;https://lars-group.github.io/", "dblp": ";252/5354;;;158/1014", "google_scholar": ";;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/schhp?hl=en", "orcid": "0000-0003-4921-7000;0000-0002-0479-9898;;;", "linkedin": ";%E7%80%9A%E6%80%9D-%E6%9D%A8-6463a4a1;;;", "or_profile": "~Zhenqian_Shen1;~Hansi_Yang1;~Yong_Li7;~James_Kwok1;~quanming_yao1", "aff": "Tsinghua University;Department of Computer Science and Engineering, Hong Kong University of Science and Technology;Tsinghua University;;Department of Electronic Engineering", "aff_domain": "tsinghua.edu.cn;cse.ust.hk;tsinghua.edu.cn;;tsinghua.edu.cn", "position": "PhD student;PhD student;Full Professor;;Assistant Professor", "bibtex": "@inproceedings{\nshen2023efficient,\ntitle={Efficient Hyper-parameter Optimization with Cubic Regularization},\nauthor={Zhenqian Shen and Hansi Yang and Yong Li and James Kwok and quanming yao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=m11TbsaQQI}\n}", "github": "", "project": "", "reviewers": "yqty;EwuD;S7HE;gbiM", "pdf_size": 2928299, "rating": "5;6;6;7", "confidence": "4;3;4;2", "soundness": "3;3;3;2", "novelty": "2;3;3;2", "presentation": "4;2;3;3", "wc_summary": "65;69;81;75", "wc_strengths": "60;32;66;44", "wc_weaknesses": "116;43;222;82", "wc_questions": "4;335;845;38", "wc_limitations": "4;1;4;14", "wc_review": "249;480;1218;253", "wc_reply_reviewers": "32;25;25;24", "wc_reply_authors": "28;24;1112;26", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;4;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 72.5, 6.06217782649107 ], "wc_strengths_avg": [ 50.5, 13.369741957120938 ], "wc_weaknesses_avg": [ 115.75, 66.55965369501257 ], "wc_questions_avg": [ 305.5, 337.04191134041474 ], "wc_limitations_avg": [ 5.75, 4.9180788932265 ], "wc_review_avg": [ 550.0, 396.8419080691957 ], "wc_reply_reviewers_avg": [ 26.5, 3.2015621187164243 ], "wc_reply_authors_avg": [ 297.5, 470.25392077047053 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8528028654224418, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2684107237995524843&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;cse.ust.hk;tsinghua.edu.cn;;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Tsinghua University;Hong Kong University of Science and Technology;Institution Name Not Provided", "aff_unique_dep": ";Department of Computer Science and Engineering;Department of Electronic Engineering", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.ust.hk;", "aff_unique_abbr": "THU;HKUST;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "title": "Learning List-Level Domain-Invariant Representations for Ranking", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70563", "id": "m21rQusNgb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cc473bb3ec4176a5e640c3a6b5fb5239-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=m21rQusNgb", "openreview": "https://openreview.net/forum?id=m21rQusNgb", "poster": "/media/PosterPDFs/NeurIPS%202023/70563.png?t=1700867614.8337116", "slides": "https://nips.cc/virtual/2023/poster/70563", "video": "https://nips.cc/virtual/2023/poster/70563", "author_site": "Ruicheng Xian, Honglei Zhuang, Zhen Qin, Hamed Zamani, Jing Lu, Ji Ma, Kai Hui, Han Zhao, Xuanhui Wang, Michael Bendersky", "tldr": "", "abstract": "Domain adaptation aims to transfer the knowledge learned on (data-rich) source domains to (low-resource) target domains, and a popular method is invariant representation learning, which matches and aligns the data distributions on the feature space. Although this method is studied extensively and applied on classification and regression problems, its adoption on ranking problems is sporadic, and the few existing implementations lack theoretical justifications. This paper revisits invariant representation learning for ranking. Upon reviewing prior work, we found that they implement what we call item-level alignment, which aligns the distributions of the items being ranked from all lists in aggregate but ignores their list structure. However, the list structure should be leveraged, because it is intrinsic to ranking problems where the data and the metrics are defined and computed on lists, not the items by themselves. To close this discrepancy, we propose list-level alignment\u2014learning domain-invariant representations at the higher level of lists. The benefits are twofold: it leads to the first domain adaptation generalization bound for ranking, in turn providing theoretical support for the proposed method, and it achieves better empirical transfer performance for unsupervised domain adaptation on ranking tasks, including passage reranking.", "keywords": "learning to rank;domain adaptation;text ranking", "primary_area": "", "supplementary_material": "/attachment/1429dc0e02f2edcb4f23b6050f3a01f4f660f617.zip", "author": "Ruicheng Xian;Honglei Zhuang;Zhen Qin;Hamed Zamani;Jing Lu;Ji Ma;Kai Hui;Han Zhao;Xuanhui Wang;Michael Bendersky", "authorids": "~Ruicheng_Xian1;~Honglei_Zhuang1;~Zhen_Qin5;~Hamed_Zamani1;~Jing_Lu4;~Ji_Ma3;~Kai_Hui1;~Han_Zhao1;~Xuanhui_Wang1;~Michael_Bendersky1", "gender": "M;M;M;M;;M;M;M;M;", "homepage": "https://rxian.github.io;https://hongleizhuang.github.io/;http://alumni.cs.ucr.edu/~zqin001/;https://groups.cs.umass.edu/zamani/;;https://research.google/people/JiMa/;https://khui.github.io/;https://hanzhaoml.github.io/;;http://bendersky.github.io/", "dblp": "243/3086.html;10/9988;;150/5324;;253/2346;37/10077;03/3520-2;67/2661;80/4305", "google_scholar": "Nmk26z4AAAAJ;FxEDj4wAAAAJ;Kv1yk3YAAAAJ;d2uzDIAAAAAJ;;https://scholar.google.co.uk/citations?user=LOccM9MAAAAJ;VorTj3AAAAAJ;x942ipYAAAAJ;;C9mxM5IAAAAJ", "orcid": ";0000-0001-8134-1509;0000-0001-6739-134X;;;;0000-0002-3110-7404;0000-0002-8579-1600;;0000-0002-2941-6240", "linkedin": ";;;;;;;;;", "or_profile": "~Ruicheng_Xian1;~Honglei_Zhuang1;~Zhen_Qin5;~Hamed_Zamani1;~Jing_Lu4;~Ji_Ma3;~Kai_Hui1;~Han_Zhao1;~Xuanhui_Wang1;~Michael_Bendersky1", "aff": "University of Illinois Urbana-Champaign;Google DeepMind;Google Deepmind;Google;;Research, Google;Google;University of Illinois, Urbana Champaign;Google;Google", "aff_domain": "illinois.edu;google.com;google.com;google.com;;research.google.com;google.com;illinois.edu;google.com;google.com", "position": "PhD student;Research Scientist;Researcher;Visiting Faculty Researcher;;Researcher;Software Engineer;Assistant Professor;Software Engineer;Researcher", "bibtex": "@inproceedings{\nxian2023learning,\ntitle={Learning List-Level Domain-Invariant Representations for Ranking},\nauthor={Ruicheng Xian and Honglei Zhuang and Zhen Qin and Hamed Zamani and Jing Lu and Ji Ma and Kai Hui and Han Zhao and Xuanhui Wang and Michael Bendersky},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=m21rQusNgb}\n}", "github": "", "project": "", "reviewers": "sWYy;D69g;nxwQ;3wjd", "pdf_size": 597304, "rating": "5;6;7;7", "confidence": "3;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "76;118;41;83", "wc_strengths": "52;167;44;82", "wc_weaknesses": "94;175;94;84", "wc_questions": "5;19;94;26", "wc_limitations": "1;7;1;43", "wc_review": "228;486;274;318", "wc_reply_reviewers": "0;0;61;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.5, 27.33587386567329 ], "wc_strengths_avg": [ 86.25, 48.72563493685844 ], "wc_weaknesses_avg": [ 111.75, 36.74489760497367 ], "wc_questions_avg": [ 36.0, 34.32928778754374 ], "wc_limitations_avg": [ 13.0, 17.4928556845359 ], "wc_review_avg": [ 326.5, 97.43074463432987 ], "wc_reply_reviewers_avg": [ 18.75, 25.053692342646823 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15700370554328215255&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "illinois.edu;google.com;google.com;google.com;;research.google.com;google.com;illinois.edu;google.com;google.com", "author_num": 10, "aff_unique_index": "0;1;2;1;1;1;0;1;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Google;DeepMind", "aff_unique_dep": ";Google DeepMind;DeepMind", "aff_unique_url": "https://illinois.edu;https://deepmind.com;https://deepmind.com", "aff_unique_abbr": "UIUC;DeepMind;DeepMind", "aff_campus_unique_index": "0;2;2;2;0;2;2", "aff_campus_unique": "Urbana-Champaign;;Mountain View", "aff_country_unique_index": "0;1;1;0;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Better with Less: A Data-Active Perspective on Pre-Training Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70562", "id": "m2WR1yJ8N9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b29adb4bf2364acec8fb402ef731bb3b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=m2WR1yJ8N9", "openreview": "https://openreview.net/forum?id=m2WR1yJ8N9", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70562", "video": "https://nips.cc/virtual/2023/poster/70562", "author_site": "Jiarong Xu, Renhong Huang, XIN JIANG, Yuxuan Cao, Carl Yang, Chunping Wang, YANG YANG", "tldr": "", "abstract": "Pre-training on graph neural networks (GNNs) aims to learn transferable knowledge for downstream tasks with unlabeled data, and it has recently become an active research area. The success of graph pre-training models is often attributed to the massive amount of input data. In this paper, however, we identify the curse of big data phenomenon in graph pre-training: more training data do not necessarily lead to better downstream performance. Motivated by this observation, we propose a better-with-less framework for graph pre-training: fewer, but carefully chosen data are fed into a GNN model to enhance pre-training. The proposed pre-training pipeline is called the data-active graph pre-training (APT) framework, and is composed of a graph selector and a pre-training model. The graph selector chooses the most representative and instructive data points based on the inherent properties of graphs as well as predictive uncertainty. The proposed predictive uncertainty, as feedback from the pre-training model, measures the confidence level of the model in the data. When fed with the chosen data, on the other hand, the pre-training model grasps an initial understanding of the new, unseen data, and at the same time attempts to remember the knowledge learned from previous data. Therefore, the integration and interaction between these two components form a unified framework (APT), in which graph pre-training is performed in a progressive and iterative way. Experiment results show that the proposed APT is able to obtain an efficient pre-training model with fewer training data and better downstream performance.", "keywords": "graph neural networks;pre-training", "primary_area": "", "supplementary_material": "", "author": "Jiarong Xu;Renhong Huang;XIN JIANG;Yuxuan Cao;Carl Yang;Chunping Wang;Yang Yang", "authorids": "~Jiarong_Xu2;~Renhong_Huang1;~XIN_JIANG5;~Yuxuan_Cao1;~Carl_Yang1;~Chunping_Wang1;~Yang_Yang35", "gender": "F;M;M;F;M;F;M", "homepage": "https://galina0217.github.io/;https://github.com/renH2;https://jiangxjames.github.io/;https://scholar.google.com/citations?user=rwPrfJ0AAAAJ&hl=zh-CN;https://cs.emory.edu/~jyang71/;;http://yangy.org", "dblp": ";325/0914;;;305/0254;54/2715-1;", "google_scholar": ";;zs_h9Y4AAAAJ;;mOINlwcAAAAJ;Rmy5RogAAAAJ;", "orcid": "0000-0003-2973-1889;0000-0002-7808-9768;0000-0003-1231-8529;0009-0000-2867-8938;0000-0001-9145-4531;0000-0003-1854-8667;0000-0002-5058-4417", "linkedin": ";;;;;https://linkedin.com/in/chunping-wang-7b94a15/;", "or_profile": "~Jiarong_Xu2;~Renhong_Huang1;~XIN_JIANG5;~Yuxuan_Cao1;~Carl_Yang1;~Chunping_Wang1;~Yang_Yang35", "aff": "Fudan University;Zhejiang University;Lehigh University;Zhejiang University;Emory University;Finvolution Group;Zhejiang University", "aff_domain": "fudan.edu.cn;zju.edu.cn;lehigh.edu;zju.edu.cn;emory.edu;xinye.com;zju.edu.cn", "position": "Assistant Professor;MS student;Postdoc;MS student;Assistant Professor;Principal Scientist;Associate Professor", "bibtex": "@inproceedings{\nxu2023better,\ntitle={Better with Less: A Data-Active Perspective on Pre-Training Graph Neural Networks},\nauthor={Jiarong Xu and Renhong Huang and XIN JIANG and Yuxuan Cao and Carl Yang and Chunping Wang and Yang Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=m2WR1yJ8N9}\n}", "github": "", "project": "", "reviewers": "gzMz;KR8J;16eq;i2Gc", "pdf_size": 6972388, "rating": "6;6;6;7", "confidence": "3;3;4;4", "soundness": "2;3;3;3", "novelty": "3;3;2;3", "presentation": "2;2;3;3", "wc_summary": "34;73;56;80", "wc_strengths": "35;20;95;142", "wc_weaknesses": "232;38;115;64", "wc_questions": "3;308;56;93", "wc_limitations": "20;2;1;1", "wc_review": "324;441;323;380", "wc_reply_reviewers": "38;33;23;35", "wc_reply_authors": "39;18;24;24", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 60.75, 17.73943347460679 ], "wc_strengths_avg": [ 73.0, 48.728841562261664 ], "wc_weaknesses_avg": [ 112.25, 74.4794434726791 ], "wc_questions_avg": [ 115.0, 115.92885749458587 ], "wc_limitations_avg": [ 6.0, 8.093207028119323 ], "wc_review_avg": [ 367.0, 48.55409354524086 ], "wc_reply_reviewers_avg": [ 32.25, 5.629165124598851 ], "wc_reply_authors_avg": [ 26.25, 7.75806032459145 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3796274986501709788&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 14, "email": "fudan.edu.cn;zju.edu.cn;lehigh.edu;zju.edu.cn;emory.edu;xinye.com;zju.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;1;3;4;1", "aff_unique_norm": "Fudan University;Zhejiang University;Lehigh University;Emory University;FinVolution Group", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.zju.edu.cn;https://www.lehigh.edu;https://www.emory.edu;https://www.finvolutiongroup.com", "aff_unique_abbr": "Fudan;ZJU;Lehigh;Emory;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "m2getD1hpk", "title": "FITS: Modeling Time Series with 10k Parameters", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper, we introduce FITS, a lightweight yet powerful model for time series analysis. Unlike existing models that directly process raw time-domain data, FITS operates on the principle that time series can be manipulated through interpolation in the complex frequency domain. By discarding high-frequency components with negligible impact on time series data, FITS achieves performance comparable to state-of-the-art models for time series forecasting and anomaly detection tasks, while having a remarkably compact size of only approximately $10k$ parameters. Such a lightweight model can be easily trained and deployed in edge devices, creating opportunities for various applications.\nThe anonymous code repo is available in: \\url{https://anonymous.4open.science/r/FITS}", "keywords": "Time series analysis;Time series forecasting;Complex-valued neural network", "primary_area": "", "supplementary_material": "/attachment/ceee38c5443b65dcc04fc3dfa3cf14dda836a068.pdf", "author": "Zhijian Xu;Ailing Zeng;Qiang Xu", "authorids": "~Zhijian_Xu1;~Ailing_Zeng1;~Qiang_Xu1", "gender": "M;F;M", "homepage": "http://notfornow.com;https://ailingzeng.site/;https://github.com/cure-lab", "dblp": "72/8350;226/4720;43/1230-1", "google_scholar": ";Tn7fzS8AAAAJ;https://scholar.google.com.tw/citations?user=eSiKPqUAAAAJ", "orcid": ";;", "linkedin": ";%E7%88%B1%E7%8E%B2-%E6%9B%BE-65504112a/;", "or_profile": "~Zhijian_Xu1;~Ailing_Zeng1;~Qiang_Xu1", "aff": "The Chinese University of Hong Kong;International Digital Economy Academy;The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.hk;idea.edu.cn;cuhk.edu.hk", "position": "PhD student;Researcher;Full Professor", "bibtex": "@misc{\nxu2023fits,\ntitle={{FITS}: Modeling Time Series with 10k Parameters},\nauthor={Zhijian Xu and Ailing Zeng and Qiang Xu},\nyear={2023},\nurl={https://openreview.net/forum?id=m2getD1hpk}\n}", "github": "", "project": "", "reviewers": "pZTr;o3Wj;dtUP;S88N;P7AV", "site": "https://openreview.net/forum?id=m2getD1hpk", "pdf_size": 1074047, "rating": "3;5;6;6;7", "confidence": "4;3;5;4;4", "soundness": "3;3;3;3;3", "novelty": "3;2;2;3;3", "presentation": "3;2;3;3;3", "wc_summary": "80;94;61;75;42", "wc_strengths": "43;106;41;62;40", "wc_weaknesses": "78;149;140;59;301", "wc_questions": "1;57;50;44;54", "wc_limitations": "92;78;1;2;8", "wc_review": "294;484;293;242;445", "wc_reply_reviewers": "324;148;87;16;28", "wc_reply_authors": "739;339;536;17;17", "reply_reviewers": "1;2;2;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 70.4, 17.692936443677176 ], "wc_strengths_avg": [ 58.4, 25.12846990964631 ], "wc_weaknesses_avg": [ 145.4, 85.16008454669358 ], "wc_questions_avg": [ 41.2, 20.565991344936428 ], "wc_limitations_avg": [ 36.2, 40.16167327191435 ], "wc_review_avg": [ 351.6, 94.88645846484103 ], "wc_reply_reviewers_avg": [ 120.6, 112.04213493146227 ], "wc_reply_authors_avg": [ 329.6, 284.8631952358886 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.23312620206007845, "gs_citation": 123, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10969437811591379516&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;International Digital Economy Academy", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.hk;", "aff_unique_abbr": "CUHK;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0", "aff_country_unique": "China;" }, { "title": "A Comprehensive Study on Text-attributed Graphs: Benchmarking and Rethinking", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73479", "id": "m2mbfoSuJ1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/37d00f567a18b478065f1a91b95622a0-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=m2mbfoSuJ1", "openreview": "https://openreview.net/forum?id=m2mbfoSuJ1", "poster": "/media/PosterPDFs/NeurIPS%202023/73479.png?t=1701583905.6890128", "slides": "https://nips.cc/virtual/2023/poster/73479", "video": "https://nips.cc/virtual/2023/poster/73479", "author_site": "Hao Yan, Chaozhuo Li, Ruosong Long, Chao Yan, Jianan Zhao, Wenwen Zhuang, Jun Yin, Peiyan Zhang, Weihao Han, Hao Sun, Weiwei Deng, Qi Zhang, Lichao Sun, Xing Xie, Senzhang Wang", "tldr": "", "abstract": "Text-attributed graphs (TAGs) are prevalent in various real-world scenarios, where each node is associated with a text description. The cornerstone of representation learning on TAGs lies in the seamless integration of textual semantics within individual nodes and the topological connections across nodes. Recent advancements in pre-trained language models (PLMs) and graph neural networks (GNNs) have facilitated effective learning on TAGs, garnering increased research interest. However, the absence of meaningful benchmark datasets and standardized evaluation procedures for TAGs has impeded progress in this field. In this paper, we propose CS-TAG, a comprehensive and diverse collection of challenging benchmark datasets for TAGs. The CS-TAG datasets are notably large in scale and encompass a wide range of domains, spanning from citation networks to purchase graphs. In addition to building the datasets, we conduct extensive benchmark experiments over CS-TAG with various learning paradigms, including PLMs, GNNs, PLM-GNN co-training methods, and the proposed novel topological pre-training of language models. In a nutshell, we provide an overview of the CS-TAG datasets, standardized evaluation procedures, and present baseline experiments. The entire CS-TAG project is publicly accessible at \\url{https://github.com/sktsherlock/TAG-Benchmark}.", "keywords": "Graph Representation Learning;Pretrained Language Models;Graph Neural Networks;Text-attributed Graphs", "primary_area": "", "supplementary_material": "", "author": "Hao Yan;Chaozhuo Li;Ruosong Long;Chao Yan;Jianan Zhao;Wenwen Zhuang;Jun Yin;Peiyan Zhang;Weihao Han;Hao Sun;Weiwei Deng;Qi Zhang;Lichao Sun;Xing Xie;Senzhang Wang", "authorids": "~Hao_Yan6;~Chaozhuo_Li1;~Ruosong_Long1;~Chao_Yan4;~Jianan_Zhao2;~Wenwen_Zhuang2;~Jun_Yin11;~Peiyan_Zhang1;~Weihao_Han1;~Hao_Sun6;~Weiwei_Deng2;~Qi_Zhang19;~Lichao_Sun1;~Xing_Xie3;~Senzhang_Wang2", "gender": "M;;M;M;M;;M;M;M;M;M;M;M;M;M", "homepage": "https://sktsherlock.github.io/;https://scss.bupt.edu.cn/info/1063/5534.htm;https://github.com/RobertLoong;;https://andyjzhao.github.io/;;https://esperanto-mega.github.io/;https://peiyance.github.io/;;;;;https://lichao-sun.github.io/;http://research.microsoft.com/en-us/people/xingx/;https://senzhangwangcsu.github.io/index.html", "dblp": ";316/1269.html;;;135/9355-2;;58/5423-5;277/0918;234/8823;;311/3565.html;;121/0780-1.html;08/6809-1;118/5055", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com/citations?view_op=new_articles;;https://scholar.google.com/citations?hl=en;A1_FpIcAAAAJ;;OjWD_SsAAAAJ;;;WhGUE7AAAAAJ;5EQfAFIAAAAJ;zdWyGRMAAAAJ", "orcid": ";0000-0002-8179-7503;;0000-0001-5929-8233;0000-0002-9743-7588;;;0000-0002-8691-1846;;0009-0004-5027-7478;0009-0001-4793-9715;;;0000-0002-8608-8482;0000-0002-3615-4859", "linkedin": ";;;;;;;;;;;qizhang07/;lichao-sun-b273a290/;xingx/;", "or_profile": "~Hao_Yan6;~Chaozhuo_Li1;~Ruosong_Long1;~Chao_Yan4;~Jianan_Zhao2;~Wenwen_Zhuang2;~Jun_Yin11;~Peiyan_Zhang1;~Weihao_Han1;~Hao_Sun6;~Weiwei_Deng2;~Qi_Zhang19;~Lichao_Sun1;~Xing_Xie3;~Senzhang_Wang2", "aff": "Central South University;Beijing University of Posts and Telecommunications;University of Birmingham;Peking University;Universit\u00e9 de Montr\u00e9al;;Central South University;Department of Computer Science and Engineering, Hong Kong University of Science and Technology;Microsoft;Microsoft;Microsoft;Microsoft;Lehigh University;Microsoft Research Asia;Central South University", "aff_domain": "csu.edu.cn;bupt.edu.cn;bham.ac.uk;pku.edu.cn;umontreal.ca;;csu.edu.cn;cse.ust.hk;microsoft.com;microsoft.com;microsoft.com;microsoft.com;lehigh.edu;microsoft.com;csu.edu.cn", "position": "MS student;Associate Professor;MS student;MS student;PhD student;;MS student;PhD student;Researcher;Researcher;Researcher;Researcher;Assistant Professor;Senior Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nyan2023a,\ntitle={A Comprehensive Study on Text-attributed Graphs: Benchmarking and Rethinking},\nauthor={Hao Yan and Chaozhuo Li and Ruosong Long and Chao Yan and Jianan Zhao and Wenwen Zhuang and Jun Yin and Peiyan Zhang and Weihao Han and Hao Sun and Weiwei Deng and Qi Zhang and Lichao Sun and Xing Xie and Senzhang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=m2mbfoSuJ1}\n}", "github": "", "project": "", "reviewers": "fbKv;ZLyP;pXJE;ghFj", "pdf_size": 845370, "rating": "6;6;6;8", "confidence": "3;4;5;5", "wc_summary_and_contributions": "44;38;89;102", "wc_strengths": "29;35;71;37", "wc_improvement": "83;31;135;128", "wc_limitations": "1;47;1;152", "wc_correctness": "1;9;17;225", "wc_clarity": "1;11;9;509", "wc_relation_to_prior_work": "3;17;1;17", "wc_documentation": "3;5;1;91", "wc_additional_feedback": "1;1;1;1", "wc_review": "166;194;325;1262", "wc_reply_reviewers": "17;0;0;18", "wc_reply_authors": "1225;1767;1768;3904", "reply_reviewers": "1;0;0;1", "reply_authors": "4;4;4;7", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 68.25, 27.716195626384224 ], "wc_strengths_avg": [ 43.0, 16.431676725154983 ], "wc_improvement_avg": [ 94.25, 41.61354947610213 ], "wc_limitations_avg": [ 50.25, 61.67404235170579 ], "wc_correctness_avg": [ 63.0, 93.70165420097983 ], "wc_clarity_avg": [ 132.5, 217.40457676875158 ], "wc_relation_to_prior_work_avg": [ 9.5, 7.533259586659682 ], "wc_documentation_avg": [ 25.0, 38.13135192987524 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 486.75, 451.59681963007665 ], "wc_reply_reviewers_avg": [ 8.75, 8.757139944068497 ], "wc_reply_authors_avg": [ 2166.0, 1027.5857628441531 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 4.75, 1.299038105676658 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3184889112262870391&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "csu.edu.cn;bupt.edu.cn;bham.ac.uk;pku.edu.cn;umontreal.ca;;csu.edu.cn;cse.ust.hk;microsoft.com;microsoft.com;microsoft.com;microsoft.com;lehigh.edu;microsoft.com;csu.edu.cn", "author_num": 15, "aff_unique_index": "0;1;2;3;4;0;5;6;6;6;6;7;6;0", "aff_unique_norm": "Central South University;Beijing University of Posts and Telecommunications;University of Birmingham;Peking University;Universit\u00e9 de Montr\u00e9al;Hong Kong University of Science and Technology;Microsoft;Lehigh University", "aff_unique_dep": ";;;;;Department of Computer Science and Engineering;Microsoft Corporation;", "aff_unique_url": "https://www.csu.edu.cn;http://www.bupt.edu.cn/;https://www.birmingham.ac.uk;http://www.pku.edu.cn;https://www.umontreal.ca;https://www.ust.hk;https://www.microsoft.com;https://www.lehigh.edu", "aff_unique_abbr": "CSU;BUPT;Birmingham;Peking U;UdeM;HKUST;Microsoft;Lehigh", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Beijing;Hong Kong SAR;Asia", "aff_country_unique_index": "0;0;1;0;2;0;0;3;3;3;3;3;0;0", "aff_country_unique": "China;United Kingdom;Canada;United States" }, { "title": "Equivariant Adaptation of Large Pretrained Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70561", "id": "m6dRQJw280", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9d5856318032ef3630cb580f4e24f823-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=m6dRQJw280", "openreview": "https://openreview.net/forum?id=m6dRQJw280", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70561", "video": "https://nips.cc/virtual/2023/poster/70561", "author_site": "Arnab Kumar Mondal, Siba Smarak Panigrahi, Oumar Kaba, Sai Rajeswar Mudumba, Siamak Ravanbakhsh", "tldr": "", "abstract": "Equivariant networks are specifically designed to ensure consistent behavior with respect to a set of input transformations, leading to higher sample efficiency and more accurate and robust predictions. However, redesigning each component of prevalent deep neural network architectures to achieve chosen equivariance is a difficult problem and can result in a computationally expensive network during both training and inference. A recently proposed alternative towards equivariance that removes the architectural constraints is to use a simple canonicalization network that transforms the input to a canonical form before feeding it to an unconstrained prediction network. We show here that this approach can effectively be used to make a large pretrained network equivariant. However, we observe that the produced canonical orientations can be misaligned with those of the training distribution, hindering performance. Using dataset-dependent priors to inform the canonicalization function, we are able to make large pretrained models equivariant while maintaining their performance. This significantly improves the robustness of these models to deterministic transformations of the data, such as rotations. We believe this equivariant adaptation of large pretrained models can help their domain-specific applications with known symmetry priors.", "keywords": "deep learning;large pretrained models;symmetry;equivariance;group theory;computer vision;point clouds;foundation models", "primary_area": "", "supplementary_material": "", "author": "Arnab Kumar Mondal;Siba Smarak Panigrahi;S\u00e9kou-Oumar Kaba;Sai Rajeswar;Siamak Ravanbakhsh", "authorids": "~Arnab_Kumar_Mondal1;~Siba_Smarak_Panigrahi1;~S\u00e9kou-Oumar_Kaba1;~Sai_Rajeswar2;~Siamak_Ravanbakhsh1", "gender": "M;M;M;;", "homepage": "https://arnab39.github.io;https://sibasmarak.github.io/;https://oumarkaba.github.io;;", "dblp": ";304/7822;279/3144;;", "google_scholar": "NhWR4yIAAAAJ;https://scholar.google.co.in/citations?hl=en;https://scholar.google.ca/citations?user=jKqh8jAAAAAJ;;", "orcid": ";;0000-0002-7258-4696;;", "linkedin": "arnab-mondal-01b522a9/;siba-smarak-panigrahi-42b38a213/;oumar-kaba/;;", "or_profile": "~Arnab_Kumar_Mondal1;~Siba_Smarak_Panigrahi1;~S\u00e9kou-Oumar_Kaba1;~Sai_Rajeswar2;~Siamak_Ravanbakhsh1", "aff": "ServiceNow Inc;Mila - Quebec AI Institute;McGill University;;", "aff_domain": "servicenow.com;mila.quebec;mcgill.ca;;", "position": "Intern;Researcher;PhD student;;", "bibtex": "@inproceedings{\nmondal2023equivariant,\ntitle={Equivariant Adaptation of Large Pretrained Models},\nauthor={Arnab Kumar Mondal and Siba Smarak Panigrahi and S{\\'e}kou-Oumar Kaba and Sai Rajeswar and Siamak Ravanbakhsh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=m6dRQJw280}\n}", "github": "", "project": "", "reviewers": "45sc;qZrQ;vNov;nPdb", "pdf_size": 13873202, "rating": "3;5;6;7", "confidence": "3;4;4;4", "soundness": "2;3;3;4", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "88;84;75;149", "wc_strengths": "64;36;58;37", "wc_weaknesses": "219;218;401;158", "wc_questions": "129;34;8;40", "wc_limitations": "1;1;9;9", "wc_review": "501;373;551;393", "wc_reply_reviewers": "0;43;30;54", "wc_reply_authors": "0;106;83;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.0, 29.248931604419333 ], "wc_strengths_avg": [ 48.75, 12.43734296383275 ], "wc_weaknesses_avg": [ 249.0, 91.16742839413646 ], "wc_questions_avg": [ 52.75, 45.63647116068463 ], "wc_limitations_avg": [ 5.0, 4.0 ], "wc_review_avg": [ 454.5, 73.99155357201253 ], "wc_reply_reviewers_avg": [ 31.75, 20.20365066021485 ], "wc_reply_authors_avg": [ 47.25, 47.94462952198087 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8783100656536799, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7852419609056361464&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "servicenow.com;mila.quebec;mcgill.ca;;", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "ServiceNow;Quebec AI Institute;McGill University", "aff_unique_dep": ";AI Institute;", "aff_unique_url": "https://www.servicenow.com;https://mila.quebec;https://www.mcgill.ca", "aff_unique_abbr": "ServiceNow;Mila;McGill", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Canada" }, { "title": "Characterizing Graph Datasets for Node Classification: Homophily-Heterophily Dichotomy and Beyond", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70560", "id": "m7PIJWOdlY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/01b681025fdbda8e935a66cc5bb6e9de-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=m7PIJWOdlY", "openreview": "https://openreview.net/forum?id=m7PIJWOdlY", "poster": "/media/PosterPDFs/NeurIPS%202023/70560.png?t=1701257817.8780048", "slides": "https://nips.cc/virtual/2023/poster/70560", "video": "https://nips.cc/virtual/2023/poster/70560", "author_site": "Oleg Platonov, Denis Kuznedelev, Artem Babenko, Liudmila Prokhorenkova", "tldr": "", "abstract": "Homophily is a graph property describing the tendency of edges to connect similar nodes; the opposite is called heterophily. It is often believed that heterophilous graphs are challenging for standard message-passing graph neural networks (GNNs), and much effort has been put into developing efficient methods for this setting. However, there is no universally agreed-upon measure of homophily in the literature. In this work, we show that commonly used homophily measures have critical drawbacks preventing the comparison of homophily levels across different datasets. For this, we formalize desirable properties for a proper homophily measure and verify which measures satisfy which properties. In particular, we show that a measure that we call adjusted homophily satisfies more desirable properties than other popular homophily measures while being rarely used in graph machine learning literature. Then, we go beyond the homophily-heterophily dichotomy and propose a new characteristic that allows one to further distinguish different sorts of heterophily. The proposed label informativeness (LI) characterizes how much information a neighbor's label provides about a node's label. We prove that this measure satisfies important desirable properties. We also observe empirically that LI better agrees with GNN performance compared to homophily measures, which confirms that it is a useful characteristic of the graph structure.", "keywords": "graph characteristics;homophily;heterophily;label informativeness;constant baseline;GNN", "primary_area": "", "supplementary_material": "/attachment/03efe18eea9652276c277fe3a768179090e4d3fd.zip", "author": "Oleg Platonov;Denis Kuznedelev;Artem Babenko;Liudmila Prokhorenkova", "authorids": "~Oleg_Platonov1;~Denis_Kuznedelev1;~Artem_Babenko1;~Liudmila_Prokhorenkova1", "gender": "M;M;M;F", "homepage": "https://t.me/Oleg_Platonov;https://github.com/Godofnothing;;", "dblp": "329/5448;322/8616;117/4834;45/11468", "google_scholar": "q024V_QAAAAJ;;q885d1wAAAAJ;https://scholar.google.ru/citations?user=6JyZlSEAAAAJ", "orcid": ";0009-0005-2420-9620;0000-0002-1830-8252;", "linkedin": ";;;", "or_profile": "~Oleg_Platonov1;~Denis_Kuznedelev1;~Artem_Babenko1;~Liudmila_Prokhorenkova1", "aff": "Higher School of Economics;;Yandex;Yandex", "aff_domain": "hse.ru;;yandex-team.ru;yandex-team.ru", "position": "MS student;;Researcher;Researcher", "bibtex": "@inproceedings{\nplatonov2023characterizing,\ntitle={Characterizing Graph Datasets for Node Classification: Homophily-Heterophily Dichotomy and Beyond},\nauthor={Oleg Platonov and Denis Kuznedelev and Artem Babenko and Liudmila Prokhorenkova},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=m7PIJWOdlY}\n}", "github": "", "project": "", "reviewers": "oQPA;kh3x;QJti;eRJB", "pdf_size": 862318, "rating": "3;6;6;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "3;4;4;3", "wc_summary": "52;104;165;59", "wc_strengths": "76;72;59;111", "wc_weaknesses": "227;29;178;44", "wc_questions": "81;27;119;34", "wc_limitations": "48;12;11;9", "wc_review": "484;244;532;257", "wc_reply_reviewers": "24;19;114;25", "wc_reply_authors": "69;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 95.0, 45.07216435894775 ], "wc_strengths_avg": [ 79.5, 19.241881404893856 ], "wc_weaknesses_avg": [ 119.5, 84.9543995329259 ], "wc_questions_avg": [ 65.25, 37.33882028131044 ], "wc_limitations_avg": [ 20.0, 16.20185174601965 ], "wc_review_avg": [ 379.25, 129.94494026317454 ], "wc_reply_reviewers_avg": [ 45.5, 39.613760235554516 ], "wc_reply_authors_avg": [ 17.25, 29.877876430563134 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.33333333333333337, "gs_citation": 79, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2618458520747056829&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "hse.ru;;yandex-team.ru;yandex-team.ru", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Higher School of Economics;Yandex", "aff_unique_dep": ";", "aff_unique_url": "https://www.hse.ru;https://yandex.com", "aff_unique_abbr": "HSE;Yandex", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "Learning Motion Refinement for Unsupervised Face Animation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70559", "id": "m9uHv1Pxq7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/df2df463f98abc4de7734dbd0b0dc49d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=m9uHv1Pxq7", "openreview": "https://openreview.net/forum?id=m9uHv1Pxq7", "poster": "/media/PosterPDFs/NeurIPS%202023/70559.png?t=1701615425.1215925", "slides": "https://nips.cc/virtual/2023/poster/70559", "video": "https://nips.cc/virtual/2023/poster/70559", "author_site": "Jiale Tao, Shuhang Gu, Wen Li, Lixin Duan", "tldr": "", "abstract": "Unsupervised face animation aims to generate a human face video based on the\nappearance of a source image, mimicking the motion from a driving video. Existing\nmethods typically adopted a prior-based motion model (e.g., the local affine motion\nmodel or the local thin-plate-spline motion model). While it is able to capture\nthe coarse facial motion, artifacts can often be observed around the tiny motion\nin local areas (e.g., lips and eyes), due to the limited ability of these methods\nto model the finer facial motions. In this work, we design a new unsupervised\nface animation approach to learn simultaneously the coarse and finer motions. In\nparticular, while exploiting the local affine motion model to learn the global coarse\nfacial motion, we design a novel motion refinement module to compensate for\nthe local affine motion model for modeling finer face motions in local areas. The\nmotion refinement is learned from the dense correlation between the source and\ndriving images. Specifically, we first construct a structure correlation volume based\non the keypoint features of the source and driving images. Then, we train a model\nto generate the tiny facial motions iteratively from low to high resolution. The\nlearned motion refinements are combined with the coarse motion to generate the\nnew image. Extensive experiments on widely used benchmarks demonstrate that\nour method achieves the best results among state-of-the-art baselines.", "keywords": "Face animation;Motion refinement;Structure correlation", "primary_area": "", "supplementary_material": "/attachment/30754f876d56915038cccda0ca30ccbed08248fe.zip", "author": "Jiale Tao;Shuhang Gu;Wen Li;Lixin Duan", "authorids": "~Jiale_Tao1;~Shuhang_Gu3;~Wen_Li2;~Lixin_Duan1", "gender": "M;M;M;M", "homepage": ";;http://wenli-vision.github.io;http://lxduan.info/", "dblp": "304/1144;126/1028;06/721-1;54/7057.html", "google_scholar": "WF5DPWkAAAAJ;-kSTt40AAAAJ;https://scholar.google.ch/citations?user=yjG4Eg4AAAAJ;inRIcS0AAAAJ", "orcid": ";;0000-0002-5559-8594;0000-0002-0723-4016", "linkedin": ";;;lxduan/", "or_profile": "~Jiale_Tao1;~Shuhang_Gu3;~Wen_Li2;~Lixin_Duan1", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China", "aff_domain": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn", "position": "PhD student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ntao2023learning,\ntitle={Learning Motion Refinement for Unsupervised Face Animation},\nauthor={Jiale Tao and Shuhang Gu and Wen Li and Lixin Duan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=m9uHv1Pxq7}\n}", "github": "", "project": "", "reviewers": "pDRj;ELv3;zuqN;mUhK;2aeH;SbHP", "pdf_size": 2248815, "rating": "4;4;5;5;5;6", "confidence": "3;4;5;2;5;3", "soundness": "3;2;3;3;3;3", "novelty": "2;2;2;2;3;3", "presentation": "3;3;3;3;3;4", "wc_summary": "128;140;69;163;29;46", "wc_strengths": "169;264;57;84;23;51", "wc_weaknesses": "197;733;79;153;149;69", "wc_questions": "68;37;115;72;1;1", "wc_limitations": "10;7;98;38;8;82", "wc_review": "572;1181;418;510;210;249", "wc_reply_reviewers": "44;0;141;49;0;44", "wc_reply_authors": "45;43;581;89;39;25", "reply_reviewers": "1;0;2;1;0;1", "reply_authors": "2;2;3;3;2;2", "rating_avg": [ 4.833333333333333, 0.6871842709362768 ], "confidence_avg": [ 3.6666666666666665, 1.1055415967851334 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 95.83333333333333, 50.27728667654566 ], "wc_strengths_avg": [ 108.0, 83.37465642108118 ], "wc_weaknesses_avg": [ 230.0, 229.24150293231517 ], "wc_questions_avg": [ 49.0, 40.8207463593371 ], "wc_limitations_avg": [ 40.5, 36.840874039577294 ], "wc_review_avg": [ 523.3333333333334, 321.3521778706692 ], "wc_reply_reviewers_avg": [ 46.333333333333336, 47.03426646841877 ], "wc_reply_authors_avg": [ 137.0, 199.53612872526784 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.6871842709362768 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.07312724241271304, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5070335291774396024&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Electronic Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "https://www.uestc.edu.cn", "aff_unique_abbr": "UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Provably Fast Convergence of Independent Natural Policy Gradient for Markov Potential Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70558", "id": "mA7nTGXjD3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8936fa1691764912d9519e1b5673ea66-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mA7nTGXjD3", "openreview": "https://openreview.net/forum?id=mA7nTGXjD3", "poster": "/media/PosterPDFs/NeurIPS%202023/70558.png?t=1702084667.232974", "slides": "https://nips.cc/virtual/2023/poster/70558", "video": "https://nips.cc/virtual/2023/poster/70558", "author_site": "Youbang Sun, Tao Liu, Ruida Zhou, P. R. Kumar, Shahin Shahrampour", "tldr": "", "abstract": "This work studies an independent natural policy gradient (NPG) algorithm for the multi-agent reinforcement learning problem in Markov potential games. It is shown that, under mild technical assumptions and the introduction of the \\textit{suboptimality gap}, the independent NPG method with an oracle providing exact policy evaluation asymptotically reaches an $\\epsilon$-Nash Equilibrium (NE) within $\\mathcal{O}(1/\\epsilon)$ iterations. This improves upon the previous best result of $\\mathcal{O}(1/\\epsilon^2)$ iterations and is of the same order, $\\mathcal{O}(1/\\epsilon)$, that is achievable for the single-agent case. Empirical results for a synthetic potential game and a congestion game are presented to verify the theoretical bounds.", "keywords": "Multi Agent Reinforcement Learning;Markov Potential Games;Natural Policy Gradient;Nash Equilibrium", "primary_area": "", "supplementary_material": "/attachment/639685e8965d436a9b7e31af07f3d94efea1d09d.zip", "author": "Youbang Sun;Tao Liu;Ruida Zhou;Panganamala Kumar;Shahin Shahrampour", "authorids": "~Youbang_Sun1;~Tao_Liu8;~Ruida_Zhou1;~Panganamala_Kumar1;~Shahin_Shahrampour2", "gender": "M;M;M;M;", "homepage": ";;https://sites.google.com/view/ruida-zhou;https://cesg.tamu.edu/faculty/p-r-kumar/;", "dblp": ";43/656-35.html;215/2026;https://dblp.org/pers/k/Kumar:P=_R=.html;127/7489", "google_scholar": "TUR1VtcAAAAJ;XQjEQ4MAAAAJ;kXbo1twAAAAJ;qGUpTVwAAAAJ;nr4EJS8AAAAJ", "orcid": ";0000-0001-7879-5315;;0000-0003-0389-5367;", "linkedin": "sun-yb/;tao-liu-a19661174/;;;shahin-shahrampour-425a8823/", "or_profile": "~Youbang_Sun1;~Tao_Liu8;~Ruida_Zhou1;~Panganamala_Kumar1;~Shahin_Shahrampour2", "aff": "Northeastern University;Texas A&M University - College Station;Texas A&M University;Texas A&M;Northeastern University", "aff_domain": "northeastern.edu;tamu.edu;tamu.edu;tamu.edu;northeastern.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nsun2023provably,\ntitle={Provably Fast Convergence of Independent Natural Policy Gradient for Markov Potential Games},\nauthor={Youbang Sun and Tao Liu and Ruida Zhou and Panganamala Kumar and Shahin Shahrampour},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mA7nTGXjD3}\n}", "github": "", "project": "", "reviewers": "XDXN;tti4;gegG;Qt2L", "pdf_size": 499023, "rating": "6;6;6;7", "confidence": "5;3;4;4", "soundness": "3;3;2;4", "novelty": "2;3;2;2", "presentation": "2;3;2;4", "wc_summary": "99;47;25;58", "wc_strengths": "125;75;39;77", "wc_weaknesses": "1218;97;26;257", "wc_questions": "299;104;111;302", "wc_limitations": "200;8;1;25", "wc_review": "1941;331;202;719", "wc_reply_reviewers": "186;41;48;0", "wc_reply_authors": "389;190;32;0", "reply_reviewers": "1;2;1;0", "reply_authors": "3;3;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 57.25, 26.873546472321067 ], "wc_strengths_avg": [ 79.0, 30.56141357987225 ], "wc_weaknesses_avg": [ 399.5, 479.91066877076196 ], "wc_questions_avg": [ 204.0, 96.53755745822451 ], "wc_limitations_avg": [ 58.5, 82.1599050632363 ], "wc_review_avg": [ 798.25, 686.6576202883065 ], "wc_reply_reviewers_avg": [ 68.75, 70.13335511723362 ], "wc_reply_authors_avg": [ 152.75, 154.2033965254981 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8388659628131654988&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "northeastern.edu;tamu.edu;tamu.edu;tamu.edu;northeastern.edu", "author_num": 5, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "Northeastern University;Texas A&M University", "aff_unique_dep": ";", "aff_unique_url": "https://www.northeastern.edu;https://www.tamu.edu", "aff_unique_abbr": "NEU;TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Theoretically Guaranteed Bidirectional Data Rectification for Robust Sequential Recommendation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70557", "id": "mHsxsrLl0y", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/08309150af77fc7c79ade0bf8bb6a562-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mHsxsrLl0y", "openreview": "https://openreview.net/forum?id=mHsxsrLl0y", "poster": "/media/PosterPDFs/NeurIPS%202023/70557.png?t=1698192214.2982178", "slides": "https://nips.cc/virtual/2023/poster/70557", "video": "https://nips.cc/virtual/2023/poster/70557", "author_site": "Yatong Sun, Bin Wang, Zhu Sun, Xiaochun Yang, Yan Wang", "tldr": "", "abstract": "Sequential recommender systems (SRSs) are typically trained to predict the next item as the target given its preceding (and succeeding) items as the input. Such a paradigm assumes that every input-target pair is reliable for training. However, users can be induced to click on items that are inconsistent with their true preferences, resulting in unreliable instances, i.e., mismatched input-target pairs. Current studies on mitigating this issue suffer from two limitations: (i) they discriminate instance reliability according to models trained with unreliable data, yet without theoretical guarantees that such a seemingly contradictory solution can be effective; and (ii) most methods can only tackle either unreliable input or targets but fail to handle both simultaneously. To fill the gap, we theoretically unveil the relationship between SRS predictions and instance reliability, whereby two error-bounded strategies are proposed to rectify unreliable targets and input, respectively. On this basis, we devise a model-agnostic Bidirectional Data Rectification (BirDRec) framework, which can be flexibly implemented with most existing SRSs for robust training against unreliable data. Additionally, a rectification sampling strategy is devised and a self-ensemble mechanism is adopted to reduce the (time and space) complexity of BirDRec. Extensive experiments on four real-world datasets verify the generality, effectiveness, and efficiency of our proposed BirDRec.", "keywords": "recommender systems;sequential recommendation", "primary_area": "", "supplementary_material": "/attachment/8b5cba73c824e9e71587259cf54d46d5917a2b54.pdf", "author": "yatong sun;Bin Wang;Zhu Sun;Xiaochun Yang;Yan Wang", "authorids": "~yatong_sun1;~Bin_Wang15;~Zhu_Sun1;~Xiaochun_Yang3;~Yan_Wang16", "gender": "M;M;F;F;M", "homepage": ";;https://sites.google.com/view/zhusun/home;;http://web.science.mq.edu.au/~yanwang/", "dblp": "https://dblp.uni-trier.de/pid/256/1865;13/1898-15;163/5129-1.html;86/2859-1.html;59/2227-2", "google_scholar": ";;https://scholar.google.com.sg/citations?user=kJy0fd8AAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-2694-1023;0000-0002-3350-7022;0000-0002-6184-4771;0000-0002-5344-1884", "linkedin": ";;;;yan-wang-967884/", "or_profile": "~yatong_sun1;~Bin_Wang15;~Zhu_Sun1;~Xiaochun_Yang3;~Yan_Wang16", "aff": "Northeastern University;Northeastern University;Institute of High Performance Computing, Singapore, A*STAR;Northeastern University;Macquarie University", "aff_domain": "neu.edu.cn;neu.edu.cn;ihpc.a-star.edu.sg;neu.edu.cn;mq.edu.au", "position": "PhD student;Full Professor;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsun2023theoretically,\ntitle={Theoretically Guaranteed Bidirectional Data Rectification for Robust Sequential Recommendation},\nauthor={yatong sun and Bin Wang and Zhu Sun and Xiaochun Yang and Yan Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mHsxsrLl0y}\n}", "github": "", "project": "", "reviewers": "1jEu;E67v;rz4h;cUYC;Uqk2", "pdf_size": 0, "rating": "5;5;6;7;8", "confidence": "3;3;4;2;4", "soundness": "3;3;3;3;4", "novelty": "2;2;3;3;4", "presentation": "3;3;3;3;3", "wc_summary": "88;49;68;148;164", "wc_strengths": "78;42;41;77;81", "wc_weaknesses": "160;119;101;114;37", "wc_questions": "165;6;66;8;20", "wc_limitations": "60;1;34;7;37", "wc_review": "551;217;310;354;339", "wc_reply_reviewers": "47;34;12;9;12", "wc_reply_authors": "44;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 103.4, 44.96932287682348 ], "wc_strengths_avg": [ 63.8, 18.258148865643527 ], "wc_weaknesses_avg": [ 106.2, 39.83666652720832 ], "wc_questions_avg": [ 53.0, 60.05997002996255 ], "wc_limitations_avg": [ 27.8, 21.497906874856447 ], "wc_review_avg": [ 354.2, 109.30580954368345 ], "wc_reply_reviewers_avg": [ 22.8, 15.065191668213185 ], "wc_reply_authors_avg": [ 8.8, 17.6 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.18333969940564224, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10576216083576474270&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "neu.edu.cn;neu.edu.cn;ihpc.a-star.edu.sg;neu.edu.cn;mq.edu.au", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Northeastern University;Institute of High Performance Computing;Macquarie University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.northeastern.edu;https://www.ihpc.a-star.edu.sg;https://www.mq.edu.au", "aff_unique_abbr": "NEU;IHPC;MQ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;2", "aff_country_unique": "United States;Singapore;Australia" }, { "title": "Efficient Testable Learning of Halfspaces with Adversarial Label Noise", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70556", "id": "mIm0hsUUt1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7c319b62e2257b34cb0e1040ced2e007-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mIm0hsUUt1", "openreview": "https://openreview.net/forum?id=mIm0hsUUt1", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70556", "video": "https://nips.cc/virtual/2023/poster/70556", "author_site": "Ilias Diakonikolas, Daniel Kane, Vasilis Kontonis, Sihan Liu, Nikos Zarifis", "tldr": "", "abstract": "We give the first polynomial-time algorithm for the testable learning \nof halfspaces in the presence of adversarial label noise under the Gaussian distribution. In the recently introduced testable learning \nmodel, one is required to produce a tester-learner such that if the data passes the tester, then one can trust the output of the robust learner on the data. Our tester-learner runs in time $\\text{poly}(d/\\epsilon)$ and outputs a halfspace with misclassification error $O(\\text{opt})+\\epsilon$, where $\\text{opt}$ is the 0-1 error of the best fitting halfspace. At a technical level, our algorithm employs an iterative soft localization technique enhanced with appropriate testers to ensure that the data distribution is sufficiently similar to a Gaussian. Finally, our algorithm can be readily adapted to yield an efficient and testable active learner requiring only $d ~ \\text{polylog}(1/\\epsilon)$ labeled examples.", "keywords": "Machine Learning", "primary_area": "", "supplementary_material": "", "author": "Ilias Diakonikolas;Daniel Kane;Vasilis Kontonis;Sihan Liu;Nikos Zarifis", "authorids": "~Ilias_Diakonikolas1;~Daniel_Kane1;~Vasilis_Kontonis1;~Sihan_Liu2;~Nikos_Zarifis1", "gender": "M;M;M;M;M", "homepage": "http://www.iliasdiakonikolas.org/;http://cseweb.ucsd.edu/~dakane/;http://vkonton.github.io/;https://lteins.github.io/;https://nikoszarifis.github.io/", "dblp": "d/IliasDiakonikolas;52/6817;203/8777;;241/9782", "google_scholar": "Vb3FLmkAAAAJ;https://scholar.google.com.tw/citations?user=DulpV-cAAAAJ;7_44KWAAAAAJ;eq7JPDgAAAAJ;P1ha1IkAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Ilias_Diakonikolas1;~Daniel_Kane1;~Vasilis_Kontonis1;~Sihan_Liu2;~NIKOLAOS_ZARIFIS1", "aff": "University of Wisconsin, Madison;University of California, San Diego;, University of Texas at Austin;Computer Science and Engineering Department, University of California, San Diego;University of Wisconsin, Madison", "aff_domain": "wisc.edu;ucsd.edu;cs.utexas.edu;cse.ucsd.edu;wisc.edu", "position": "Associate Professor;Assistant Professor;Postdoc;PhD student;PhD student", "bibtex": "@inproceedings{\ndiakonikolas2023efficient,\ntitle={Efficient Testable Learning of Halfspaces with Adversarial Label Noise},\nauthor={Ilias Diakonikolas and Daniel Kane and Vasilis Kontonis and Sihan Liu and Nikos Zarifis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mIm0hsUUt1}\n}", "github": "", "project": "", "reviewers": "1kSW;8SDJ;fkEL;XEN1", "pdf_size": 394787, "rating": "4;7;7;8", "confidence": "4;3;4;3", "soundness": "2;4;3;4", "novelty": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "411;147;216;231", "wc_strengths": "57;142;125;56", "wc_weaknesses": "23;46;10;28", "wc_questions": "14;49;49;26", "wc_limitations": "24;1;1;47", "wc_review": "529;385;401;388", "wc_reply_reviewers": "0;0;12;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 251.25, 97.5201902172058 ], "wc_strengths_avg": [ 95.0, 38.96793553679743 ], "wc_weaknesses_avg": [ 26.75, 12.910751333675357 ], "wc_questions_avg": [ 34.5, 15.107944929738128 ], "wc_limitations_avg": [ 18.25, 19.070592544543548 ], "wc_review_avg": [ 425.75, 59.914000867910666 ], "wc_reply_reviewers_avg": [ 6.5, 6.5383484153110105 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6666666666666667, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9765047950014997585&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "email": "wisc.edu;ucsd.edu;cs.utexas.edu;cse.ucsd.edu;wisc.edu", "author_num": 5, "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "University of Wisconsin;University of California, San Diego;University of Texas at Austin", "aff_unique_dep": ";;", "aff_unique_url": "https://www.wisc.edu;https://www.ucsd.edu;https://www.utexas.edu", "aff_unique_abbr": "UW;UCSD;UT Austin", "aff_campus_unique_index": "0;1;2;1;0", "aff_campus_unique": "Madison;San Diego;Austin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Provable Adversarial Robustness for Group Equivariant Tasks: Graphs, Point Clouds, Molecules, and More", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70555", "id": "mLe63bAYc7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/00db17c36b5435195760520efa96d99c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mLe63bAYc7", "openreview": "https://openreview.net/forum?id=mLe63bAYc7", "poster": "/media/PosterPDFs/NeurIPS%202023/70555.png?t=1701529700.1135688", "slides": "https://nips.cc/virtual/2023/poster/70555", "video": "https://nips.cc/virtual/2023/poster/70555", "author_site": "Jan Schuchardt, Yan Scholten, Stephan G\u00fcnnemann", "tldr": "", "abstract": "A machine learning model is traditionally considered robust if its prediction remains (almost) constant under input perturbations with small norm. However, real-world tasks like molecular property prediction or point cloud segmentation have inherent equivariances, such as rotation or permutation equivariance. In such tasks, even perturbations with large norm do not necessarily change an input's semantic content. Furthermore, there are perturbations for which a model's prediction explicitly needs to change. For the first time, we propose a sound notion of adversarial robustness that accounts for task equivariance. We then demonstrate that provable robustness can be achieved by (1) choosing a model that matches the task's equivariances (2) certifying traditional adversarial robustness. Certification methods are, however, unavailable for many models, such as those with continuous equivariances. We close this gap by developing the framework of equivariance-preserving randomized smoothing, which enables architecture-agnostic certification. We additionally derive the first architecture-specific graph edit distance certificates, i.e. sound robustness guarantees for isomorphism equivariant tasks like node classification. Overall, a sound notion of robustness is an important prerequisite for future work at the intersection of robust and geometric machine learning.", "keywords": "Adversarial robustness;Geometric machine learning;Equivariances;Robustness Certification;Graph neural networks", "primary_area": "", "supplementary_material": "", "author": "Jan Schuchardt;Yan Scholten;Stephan G\u00fcnnemann", "authorids": "~Jan_Schuchardt1;~Yan_Scholten1;~Stephan_G\u00fcnnemann1", "gender": ";;M", "homepage": "https://www.cs.cit.tum.de/daml/team/jan-schuchardt/;;http://www.daml.in.tum.de", "dblp": "241/5487;240/9194;43/3011", "google_scholar": "O-cixlwAAAAJ;8G2bJ7sAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jan_Schuchardt1;~Yan_Scholten1;~Stephan_G\u00fcnnemann1", "aff": "Department of Informatics, Technical University Munich;Technische Universit\u00e4t M\u00fcnchen;Technical University Munich", "aff_domain": "in.tum.de;tum.de;tum.de", "position": "PhD student;PhD student;Professor", "bibtex": "@inproceedings{\nschuchardt2023provable,\ntitle={(Provable) Adversarial Robustness for Group Equivariant Tasks: Graphs, Point Clouds, Molecules, and More},\nauthor={Jan Schuchardt and Yan Scholten and Stephan G{\\\"u}nnemann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mLe63bAYc7}\n}", "github": "", "project": "", "reviewers": "t1fA;nMMh;jBnt;y2Vs;4EgB;kw3s", "pdf_size": 1311089, "rating": "4;5;5;6;7;7", "confidence": "3;3;1;2;4;4", "soundness": "4;3;2;3;4;4", "novelty": "2;2;2;3;4;3", "presentation": "3;2;2;2;3;4", "wc_summary": "287;51;27;105;137;60", "wc_strengths": "49;30;31;80;79;206", "wc_weaknesses": "196;244;52;184;35;147", "wc_questions": "2;36;19;50;99;37", "wc_limitations": "11;1;28;6;7;64", "wc_review": "545;362;157;425;357;514", "wc_reply_reviewers": "0;33;0;33;24;65", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "0;1;0;1;1;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 5.666666666666667, 1.1055415967851332 ], "confidence_avg": [ 2.8333333333333335, 1.0671873729054748 ], "soundness_avg": [ 3.3333333333333335, 0.7453559924999298 ], "novelty_avg": [ 2.6666666666666665, 0.7453559924999299 ], "presentation_avg": [ 2.6666666666666665, 0.7453559924999298 ], "wc_summary_avg": [ 111.16666666666667, 86.54943994940439 ], "wc_strengths_avg": [ 79.16666666666667, 60.18697718129913 ], "wc_weaknesses_avg": [ 143.0, 75.99561390852325 ], "wc_questions_avg": [ 40.5, 30.24758943563382 ], "wc_limitations_avg": [ 19.5, 21.623675296612586 ], "wc_review_avg": [ 393.3333333333333, 126.96543711665086 ], "wc_reply_reviewers_avg": [ 25.833333333333332, 22.26669161675249 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5179697702828122, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12541016976376027112&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "in.tum.de;tum.de;tum.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Technical University Munich;Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich", "aff_unique_dep": "Department of Informatics;;", "aff_unique_url": "https://www.tum.de;https://www.tum.de;https://www.tum.de", "aff_unique_abbr": "TUM;TUM;TUM", "aff_campus_unique_index": "0", "aff_campus_unique": "Munich;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Slimmed Asymmetrical Contrastive Learning and Cross Distillation for Lightweight Model Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70554", "id": "mOVEJletyD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8393d955a00c463a982cefe77d0404e1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mOVEJletyD", "openreview": "https://openreview.net/forum?id=mOVEJletyD", "poster": "/media/PosterPDFs/NeurIPS%202023/70554.png?t=1701751245.6404185", "slides": "https://nips.cc/virtual/2023/poster/70554", "video": "https://nips.cc/virtual/2023/poster/70554", "author_site": "Jian Meng, Li Yang, Kyungmin Lee, Jinwoo Shin, Deliang Fan, Jae-sun Seo", "tldr": "", "abstract": "Contrastive learning (CL) has been widely investigated with various learning mechanisms and achieves strong capability in learning representations of data in a self-supervised manner using unlabeled data. A common fashion of contrastive learning on this line is employing mega-sized encoders to achieve comparable performance as the supervised learning counterpart. Despite the success of the labelless training, current contrastive learning algorithms *failed* to achieve good performance with lightweight (compact) models, e.g., MobileNet, while the requirements of the heavy encoders impede the energy-efficient computation, especially for resource-constrained AI applications. Motivated by this, we propose a new self-supervised CL scheme, named SACL-XD, consisting of two technical components, **S**limmed **A**symmetrical **C**ontrastive **L**earning (SACL) and **Cross**-**D**istillation (XD), which collectively enable efficient CL with compact models. While relevant prior works employed a strong pre-trained model as the teacher of unsupervised knowledge distillation to a lightweight encoder, our proposed method trains CL models from scratch and outperforms them even without such an expensive requirement. Compared to the SoTA lightweight CL training (distillation) algorithms, SACL-XD achieves 1.79% ImageNet-1K accuracy improvement on MobileNet-V3 with 64$\\times$ training FLOPs reduction.", "keywords": "Contrastive Learning;Self-supervised Learning;Energy-efficient contrastive learning", "primary_area": "", "supplementary_material": "/attachment/1f23abf074c290357fcce7bcd1f027b1f3b18c3c.pdf", "author": "Jian Meng;Li Yang;Kyungmin Lee;Jinwoo Shin;Deliang Fan;Jae-sun Seo", "authorids": "~Jian_Meng1;~Li_Yang6;~Kyungmin_Lee1;~Jinwoo_Shin1;~Deliang_Fan1;~Jae-sun_Seo1", "gender": "M;M;M;M;M;M", "homepage": "https://mengjian0502.github.io/;https://lyang-666.github.io/;https://kyungmnlee.github.io/;https://sites.google.com/site/mijirim/;https://faculty.engineering.asu.edu/dfan/;https://seo.ece.cornell.edu/", "dblp": ";;57/5118;31/7062;129/1701;60/2321", "google_scholar": "ei2__0AAAAAJ;qpUT1I8AAAAJ;6dpime0AAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ;sAflhJUAAAAJ;0eA8Fr8AAAAJ", "orcid": ";0000-0002-2839-6196;;;0000-0002-7989-6297;", "linkedin": "jian-meng/;li-yang-268710139/;;;;", "or_profile": "~Jian_Meng1;~Li_Yang6;~Kyungmin_Lee1;~Jinwoo_Shin1;~Deliang_Fan1;~Jae-sun_Seo1", "aff": "Arizona State University;Arizona State University;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Arizona State University;Cornell Tech", "aff_domain": "asu.edu;asu.edu;kaist.ac.kr;kaist.ac.kr;asu.edu;cornell.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nmeng2023slimmed,\ntitle={Slimmed Asymmetrical Contrastive Learning and Cross Distillation for Lightweight Model Training},\nauthor={Jian Meng and Li Yang and Kyungmin Lee and Jinwoo Shin and Deliang Fan and Jae-sun Seo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mOVEJletyD}\n}", "github": "", "project": "", "reviewers": "8Hzi;J8Hb;UkVS;CDjk", "pdf_size": 646720, "rating": "6;6;6;6", "confidence": "4;4;4;3", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "102;65;59;76", "wc_strengths": "95;21;106;87", "wc_weaknesses": "93;156;54;135", "wc_questions": "123;4;69;75", "wc_limitations": "28;9;7;4", "wc_review": "441;255;295;377", "wc_reply_reviewers": "48;31;0;27", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.5, 16.469669092000604 ], "wc_strengths_avg": [ 77.25, 33.16907445196504 ], "wc_weaknesses_avg": [ 109.5, 39.2587569849072 ], "wc_questions_avg": [ 67.75, 42.34014052881733 ], "wc_limitations_avg": [ 12.0, 9.40744386111339 ], "wc_review_avg": [ 342.0, 72.11795892841117 ], "wc_reply_reviewers_avg": [ 26.5, 17.211914478058507 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6400658663624892647&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "asu.edu;asu.edu;kaist.ac.kr;kaist.ac.kr;asu.edu;cornell.edu", "author_num": 6, "aff_unique_index": "0;0;1;1;0;2", "aff_unique_norm": "Arizona State University;Korea Advanced Institute of Science and Technology;Cornell University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.asu.edu;https://www.kaist.ac.kr;https://tech.cornell.edu", "aff_unique_abbr": "ASU;KAIST;Cornell Tech", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York City", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "United States;South Korea" }, { "title": "Scaling Open-Vocabulary Object Detection", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70553", "id": "mQPNcBWjGc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e6d58fc68c0f3c36ae6e0e64478a69c0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mQPNcBWjGc", "openreview": "https://openreview.net/forum?id=mQPNcBWjGc", "poster": "/media/PosterPDFs/NeurIPS%202023/70553.png?t=1701429362.8216121", "slides": "https://nips.cc/virtual/2023/poster/70553", "video": "https://nips.cc/virtual/2023/poster/70553", "author_site": "Matthias Minderer, Alexey Gritsenko, Neil Houlsby", "tldr": "", "abstract": "Open-vocabulary object detection has benefited greatly from pretrained vision-language models, but is still limited by the amount of available detection training data. While detection training data can be expanded by using Web image-text pairs as weak supervision, this has not been done at scales comparable to image-level pretraining. Here, we scale up detection data with self-training, which uses an existing detector to generate pseudo-box annotations on image-text pairs. Major challenges in scaling self-training are the choice of label space, pseudo-annotation filtering, and training efficiency. We present the OWLv2 model and OWL-ST self-training recipe, which address these challenges. OWLv2 surpasses the performance of previous state-of-the-art open-vocabulary detectors already at comparable training scales (~10M examples). However, with OWL-ST, we can scale to over 1B examples, yielding further large improvement: With an L/14 architecture, OWL-ST improves AP on LVIS rare classes, for which the model has seen no human box annotations, from 31.2% to 44.6% (43% relative improvement). OWL-ST unlocks Web-scale training for open-world localization, similar to what has been seen for image classification and language modelling. Code and checkpoints are available on GitHub.", "keywords": "object detection;open-vocabulary object detection;vision transformers;vision-language models;scaling;self-training", "primary_area": "", "supplementary_material": "/attachment/53bdfa634616b3ce18b1a6fead17bb0a4675c405.pdf", "author": "Matthias Minderer;Alexey A. Gritsenko;Neil Houlsby", "authorids": "~Matthias_Minderer1;~Alexey_A._Gritsenko1;~Neil_Houlsby1", "gender": "M;M;Not Specified", "homepage": "https://mjlm.github.io/;https://neilhoulsby.github.io/;", "dblp": "243/3155;91/10669;30/11478", "google_scholar": "57BFBY0AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.nl/citations?user=zTy9cUwAAAAJ", "orcid": "0000-0002-6428-8256;;", "linkedin": ";;agritsenko/", "or_profile": "~Matthias_Minderer1;~Neil_Houlsby1;~Alexey_Alexeevich_Gritsenko1", "aff": "Google;Google;Google", "aff_domain": "google.com;google.com;google.com", "position": "Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nminderer2023scaling,\ntitle={Scaling Open-Vocabulary Object Detection},\nauthor={Matthias Minderer and Alexey A. Gritsenko and Neil Houlsby},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mQPNcBWjGc}\n}", "github": "", "project": "", "reviewers": "F7mV;tv8z;EGtY;5UCC", "pdf_size": 2199599, "rating": "6;7;7;7", "confidence": "5;4;4;4", "soundness": "3;4;4;3", "novelty": "3;4;3;3", "presentation": "3;3;3;3", "wc_summary": "40;147;94;57", "wc_strengths": "40;31;60;74", "wc_weaknesses": "43;66;51;2", "wc_questions": "18;80;3;125", "wc_limitations": "48;33;8;26", "wc_review": "189;357;216;284", "wc_reply_reviewers": "0;25;12;42", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 84.5, 41.027429848821875 ], "wc_strengths_avg": [ 51.25, 16.813313177360374 ], "wc_weaknesses_avg": [ 40.5, 23.71181140275875 ], "wc_questions_avg": [ 56.5, 48.96171974103851 ], "wc_limitations_avg": [ 28.75, 14.376630342329875 ], "wc_review_avg": [ 261.5, 65.10184329187615 ], "wc_reply_reviewers_avg": [ 19.75, 15.594470173750693 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 209, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16992784699932392868&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "google.com;google.com;google.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The Rise of AI Language Pathologists: Exploring Two-level Prompt Learning for Few-shot Weakly-supervised Whole Slide Image Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70552", "id": "mSDfBXr8Py", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d599b81036fd1a3b3949b7d444f31082-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mSDfBXr8Py", "openreview": "https://openreview.net/forum?id=mSDfBXr8Py", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70552", "video": "https://nips.cc/virtual/2023/poster/70552", "author_site": "Linhao Qu, xiaoyuan luo, Kexue Fu, Manning Wang, Zhijian Song", "tldr": "", "abstract": "This paper introduces the novel concept of few-shot weakly supervised learning for pathology Whole Slide Image (WSI) classification, denoted as FSWC. A solution is proposed based on prompt learning and the utilization of a large language model, GPT-4. Since a WSI is too large and needs to be divided into patches for processing, WSI classification is commonly approached as a Multiple Instance Learning (MIL) problem. In this context, each WSI is considered a bag, and the obtained patches are treated as instances. The objective of FSWC is to classify both bags and instances with only a limited number of labeled bags. Unlike conventional few-shot learning problems, FSWC poses additional challenges due to its weak bag labels within the MIL framework. Drawing inspiration from the recent achievements of vision-language models (V-L models) in downstream few-shot classification tasks, we propose a two-level prompt learning MIL framework tailored for pathology, incorporating language prior knowledge. Specifically, we leverage CLIP to extract instance features for each patch, and introduce a prompt-guided pooling strategy to aggregate these instance features into a bag feature. Subsequently, we employ a small number of labeled bags to facilitate few-shot prompt learning based on the bag features. Our approach incorporates the utilization of GPT-4 in a question-and-answer mode to obtain language prior knowledge at both the instance and bag levels, which are then integrated into the instance and bag level language prompts. Additionally, a learnable component of the language prompts is trained using the available few-shot labeled data. We conduct extensive experiments on three real WSI datasets encompassing breast cancer, lung cancer, and cervical cancer, demonstrating the notable performance of the proposed method in bag and instance classification. All codes will be made publicly accessible.", "keywords": "multiple instance learning;whole slide image classification;prompt learning;vision-language model;few-shot learning", "primary_area": "", "supplementary_material": "/attachment/04517bafa439a78077abd140ce81955927743299.pdf", "author": "Linhao Qu;xiaoyuan Luo;Kexue Fu;Manning Wang;Zhijian Song", "authorids": "~Linhao_Qu1;~xiaoyuan_Luo1;~Kexue_Fu1;~Manning_Wang1;~Zhijian_Song1", "gender": "M;M;M;M;M", "homepage": "https://linhao-qu.com/;;https://kexuefu.me/;http://www.fudanmiccai.org/nd.jsp?id=58#_np=117_394;https://miccai.fudan.edu.cn/34225/list.htm", "dblp": "308/1001.html;;;23/5931;", "google_scholar": "C8gTFhUAAAAJ;;wRs-_DwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;", "orcid": "0000-0001-8815-7050;0000-0002-8456-5847;0000-0003-1204-0942;0000-0002-9255-3897;", "linkedin": ";;;;", "or_profile": "~Linhao_Qu1;~xiaoyuan_Luo1;~Kexue_Fu1;~Manning_Wang1;~Zhijian_Song1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nqu2023the,\ntitle={The Rise of {AI} Language Pathologists: Exploring Two-level Prompt Learning for Few-shot Weakly-supervised Whole Slide Image Classification},\nauthor={Linhao Qu and xiaoyuan Luo and Kexue Fu and Manning Wang and Zhijian Song},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mSDfBXr8Py}\n}", "github": "", "project": "", "reviewers": "r2fX;5SeZ;VPd6;7zsM", "pdf_size": 512439, "rating": "4;4;5;6", "confidence": "5;4;5;4", "soundness": "3;1;2;2", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "119;89;60;56", "wc_strengths": "46;98;86;83", "wc_weaknesses": "74;420;573;130", "wc_questions": "109;42;65;43", "wc_limitations": "1;19;6;27", "wc_review": "349;668;790;339", "wc_reply_reviewers": "28;0;77;95", "wc_reply_authors": "81;0;32;31", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.0, 25.367301787931645 ], "wc_strengths_avg": [ 78.25, 19.447043477094404 ], "wc_weaknesses_avg": [ 299.25, 205.4888987269142 ], "wc_questions_avg": [ 64.75, 27.151197027018902 ], "wc_limitations_avg": [ 13.25, 10.304731922762475 ], "wc_review_avg": [ 536.5, 197.30496699272425 ], "wc_reply_reviewers_avg": [ 50.0, 37.87479372881125 ], "wc_reply_authors_avg": [ 36.0, 28.991378028648448 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9746650575821325817&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu;fudan.edu.cn;fudan.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "InfoPrompt: Information-Theoretic Soft Prompt Tuning for Natural Language Understanding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70551", "id": "mSNfjOcDUv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c01c0da4fe2ef2df9863f55261e2e924-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mSNfjOcDUv", "openreview": "https://openreview.net/forum?id=mSNfjOcDUv", "poster": "/media/PosterPDFs/NeurIPS%202023/70551.png?t=1702272675.6431801", "slides": "https://nips.cc/virtual/2023/poster/70551", "video": "https://nips.cc/virtual/2023/poster/70551", "author_site": "Junda Wu, Junda Wu, Tong Yu, Rui Wang, Rui Wang, Zhao Song, Ruiyi Zhang, Handong Zhao, Chaochao Lu, Shuai Li, Ricardo Henao", "tldr": "", "abstract": "Soft prompt tuning achieves superior performances across a wide range of few-shot tasks. However, the performances of prompt tuning can be highly sensitive to the initialization of the prompts. We have also empirically observed that conventional prompt tuning methods cannot encode and learn sufficient task-relevant information from prompt tokens. In this work, we develop an information-theoretic framework that formulates soft prompt tuning as maximizing the mutual information between prompts and other model parameters (or encoded representations). This novel view helps us to develop a more efficient, accurate and robust soft prompt tuning method, InfoPrompt. With this framework, we develop two novel mutual information based loss functions, to (i) explore proper prompt initialization for the downstream tasks and learn sufficient task-relevant information from prompt tokens and (ii) encourage the output representation from the pretrained language model to be more aware of the task-relevant information captured in the learnt prompts. Extensive experiments validate that InfoPrompt can significantly accelerate the convergence of the prompt tuning and outperform traditional prompt tuning methods. Finally, we provide a formal theoretical result to show that a gradient descent type algorithm can be used to train our mutual information loss.", "keywords": "soft prompt tuning", "primary_area": "", "supplementary_material": "", "author": "Junda Wu;Tong Yu;Rui Wang;Zhao Song;Ruiyi Zhang;Handong Zhao;Chaochao Lu;Shuai Li;Ricardo Henao", "authorids": "~Junda_Wu1;~Tong_Yu3;~Rui_Wang25;~Zhao_Song3;~Ruiyi_Zhang3;~Handong_Zhao3;~Chaochao_Lu1;~Shuai_Li3;~Ricardo_Henao1", "gender": "M;;;M;;;;F;M", "homepage": "https://scholar.google.com/citations?user=_iKeQFwAAAAJ&hl=en;https://www.linkedin.com/in/tong-yu-42790744;;https://www.youtube.com/@zhaosong2031;;;https://causallu.com/;http://shuaili8.github.io;http://rhenaog.github.io", "dblp": "295/8249;32/1593-1;;76/4051-2;;;142/2790;57/2281-10;27/3207", "google_scholar": "_iKeQFwAAAAJ;https://scholar.google.com/citations?hl=en;;yDZct7UAAAAJ;;;C_Qxt0IAAAAJ;https://scholar.google.com.hk/citations?user=kMZgQxcAAAAJ;p_mm4-YAAAAJ", "orcid": ";0000-0002-5991-2050;;;;;;;0000-0003-4980-845X", "linkedin": ";tong-yu-42790744;;;;;;;", "or_profile": "~Junda_Wu1;~Tong_Yu3;~Rui_Wang25;~Zhao_Song3;~Ruiyi_Zhang3;~Handong_Zhao3;~Chaochao_Lu1;~Shuai_Li3;~Ricardo_Henao1", "aff": "New York University;Adobe Research;;Adobe;;;Shanghai AI Laboratory ;John Hopcroft Center, Shanghai Jiao Tong University;King Abdullah University of Science and Technology", "aff_domain": "nyu.edu;adobe.com;;adobe.com;;;pjlab.org.cn;sjtu.edu.cn;kaust.edu.sa", "position": "MS student;Senior Research Scientist;;Researcher;;;Research Scientist;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2023infoprompt,\ntitle={InfoPrompt: Information-Theoretic Soft Prompt Tuning for Natural Language Understanding},\nauthor={Junda Wu and Tong Yu and Rui Wang and Zhao Song and Ruiyi Zhang and Handong Zhao and Chaochao Lu and Shuai Li and Ricardo Henao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mSNfjOcDUv}\n}", "github": "", "project": "", "reviewers": "7rsH;kCoa;WbYf;ysu9", "pdf_size": 674279, "rating": "4;5;6;7", "confidence": "4;4;4;3", "soundness": "2;3;3;4", "novelty": "2;2;2;3", "presentation": "2;3;3;4", "wc_summary": "79;70;90;87", "wc_strengths": "43;31;53;53", "wc_weaknesses": "188;53;176;124", "wc_questions": "1;106;125;43", "wc_limitations": "20;1;44;14", "wc_review": "331;261;488;321", "wc_reply_reviewers": "767;0;167;120", "wc_reply_authors": "1179;0;51;40", "reply_reviewers": "1;0;1;1", "reply_authors": "3;1;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 81.5, 7.762087348130012 ], "wc_strengths_avg": [ 45.0, 9.055385138137417 ], "wc_weaknesses_avg": [ 135.25, 53.23239145482758 ], "wc_questions_avg": [ 68.75, 49.50946879133324 ], "wc_limitations_avg": [ 19.75, 15.594470173750693 ], "wc_review_avg": [ 350.25, 83.91476330181716 ], "wc_reply_reviewers_avg": [ 263.5, 297.005471330075 ], "wc_reply_authors_avg": [ 317.5, 497.74918382655335 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17817433328016643878&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "nyu.edu;adobe.com;;adobe.com;;;pjlab.org.cn;sjtu.edu.cn;kaust.edu.sa", "author_num": 9, "aff_unique_index": "0;1;1;2;3;4", "aff_unique_norm": "New York University;Adobe;Shanghai AI Laboratory;Shanghai Jiao Tong University;King Abdullah University of Science and Technology", "aff_unique_dep": ";Adobe Research;;John Hopcroft Center;", "aff_unique_url": "https://www.nyu.edu;https://research.adobe.com;https://www.shanghai-ai-lab.com;https://www.sjtu.edu.cn;https://www.kast.kau.edu.sa", "aff_unique_abbr": "NYU;Adobe;SAIL;SJTU;KAUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shanghai", "aff_country_unique_index": "0;0;0;1;1;2", "aff_country_unique": "United States;China;Saudi Arabia" }, { "title": "Hierarchical Gaussian Mixture based Task Generative Model for Robust Meta-Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70550", "id": "mVTyeQIiE4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/982ca2640e64bf7a1908b028ebc8734a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mVTyeQIiE4", "openreview": "https://openreview.net/forum?id=mVTyeQIiE4", "poster": "/media/PosterPDFs/NeurIPS%202023/70550.png?t=1701986446.0141807", "slides": "https://nips.cc/virtual/2023/poster/70550", "video": "https://nips.cc/virtual/2023/poster/70550", "author_site": "Yizhou Zhang, Jingchao Ni, Wei Cheng, Zhengzhang Chen, Liang Tong, Haifeng Chen, Yan Liu", "tldr": "", "abstract": "Meta-learning enables quick adaptation of machine learning models to new tasks with limited data. While tasks could come from varying distributions in reality, most of the existing meta-learning methods consider both training and testing tasks as from the same uni-component distribution, overlooking two critical needs of a practical solution: (1) the various sources of tasks may compose a multi-component mixture distribution, and (2) novel tasks may come from a distribution that is unseen during meta-training. In this paper, we demonstrate these two challenges can be solved jointly by modeling the density of task instances. We develop a meta-training framework underlain by a novel Hierarchical Gaussian Mixture based Task Generative Model (HTGM). HTGM extends the widely used empirical process of sampling tasks to a theoretical model, which learns task embeddings, fits the mixture distribution of tasks, and enables density-based scoring of novel tasks. The framework is agnostic to the encoder and scales well with large backbone networks. The model parameters are learned end-to-end by maximum likelihood estimation via an Expectation-Maximization (EM) algorithm. Extensive experiments on benchmark datasets indicate the effectiveness of our method for both sample classification and novel task detection.", "keywords": "Few-Shot Learning;Meta Learning;Task Representation", "primary_area": "", "supplementary_material": "/attachment/b8af8baa7c3570228d46ea4d7b03a1bcbf238a80.pdf", "author": "Yizhou Zhang;Jingchao Ni;Wei Cheng;Zhengzhang Chen;Liang Tong;Haifeng Chen;Yan Liu", "authorids": "~Yizhou_Zhang3;~Jingchao_Ni1;~Wei_Cheng1;~Zhengzhang_Chen1;~Liang_Tong1;~Haifeng_Chen1;~Yan_Liu1", "gender": ";M;M;M;M;;F", "homepage": "https://yizhouzhang1997.netlify.app/;;https://chengw07.github.io/;https://zhengzhangchen.github.io/;;https://haifengchen.gitlab.io/intro/;http://www-bcf.usc.edu/~liu32/", "dblp": ";151/3208;89/2506-2.html;14/3744;71/6379;08/57-1.html;150/4295", "google_scholar": "k127fcwAAAAJ;rH9MTZMAAAAJ;PRrGVmoAAAAJ;2t7wQ24AAAAJ;;QzakB68AAAAJ;UUKLPMYAAAAJ", "orcid": ";;;0000-0002-6803-0535;;;0000-0002-7055-9518", "linkedin": ";jingchao-ni-930a3871/;wei-cheng-ml/;;;;", "or_profile": "~Yizhou_Zhang3;~Jingchao_Ni1;~Wei_Cheng1;~Zhengzhang_Chen1;~Liang_Tong1;~Haifeng_Chen1;~Yan_Liu1", "aff": "University of Southern California;Amazon;NEC-Labs;NEC Labs America;Stellar Cyber Inc.;NEC-Labs;University of Southern California", "aff_domain": "usc.edu;amazon.com;nec-labs.com;nec-labs.com;stellarcyber.ai;nec-labs.com;usc.edu", "position": "PhD student;Applied Scientist;Principal Researcher;Senior Research Scientist;Researcher;Researcher;Professor", "bibtex": "@inproceedings{\nzhang2023hierarchical,\ntitle={Hierarchical Gaussian Mixture based Task Generative Model for Robust Meta-Learning},\nauthor={Yizhou Zhang and Jingchao Ni and Wei Cheng and Zhengzhang Chen and Liang Tong and Haifeng Chen and Yan Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mVTyeQIiE4}\n}", "github": "", "project": "", "reviewers": "KXFG;XMuD;dA8b;k7fv", "pdf_size": 939125, "rating": "5;5;6;6", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "54;55;59;75", "wc_strengths": "31;31;80;82", "wc_weaknesses": "267;79;148;130", "wc_questions": "68;1;255;145", "wc_limitations": "4;15;14;8", "wc_review": "424;181;556;440", "wc_reply_reviewers": "43;22;50;83", "wc_reply_authors": "102;82;22;33", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 60.75, 8.437268515343103 ], "wc_strengths_avg": [ 56.0, 25.0099980007996 ], "wc_weaknesses_avg": [ 156.0, 68.9021044671351 ], "wc_questions_avg": [ 117.25, 94.45203809341544 ], "wc_limitations_avg": [ 10.25, 4.493050188902857 ], "wc_review_avg": [ 400.25, 136.44847928797154 ], "wc_reply_reviewers_avg": [ 49.5, 21.914607000811127 ], "wc_reply_authors_avg": [ 59.75, 33.24436042398771 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3654565701645929376&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "usc.edu;amazon.com;nec-labs.com;nec-labs.com;stellarcyber.ai;nec-labs.com;usc.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;4;2;0", "aff_unique_norm": "University of Southern California;Amazon;NEC Laboratories;NEC Labs America;Stellar Cyber Inc.", "aff_unique_dep": ";Amazon.com, Inc.;;;", "aff_unique_url": "https://www.usc.edu;https://www.amazon.com;https://www.nec-labs.com;https://www.nec-labs.com;https://www.stellarcyber.com", "aff_unique_abbr": "USC;Amazon;NEC-Labs;NEC LA;Stellar Cyber", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Reining Generalization in Offline Reinforcement Learning via Representation Distinction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70549", "id": "mVywRIDNIl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/802a4350ca4fced76b13b8b320af1543-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mVywRIDNIl", "openreview": "https://openreview.net/forum?id=mVywRIDNIl", "poster": "/media/PosterPDFs/NeurIPS%202023/70549.png?t=1701938830.8887892", "slides": "https://nips.cc/virtual/2023/poster/70549", "video": "https://nips.cc/virtual/2023/poster/70549", "author_site": "Yi Ma, Hongyao Tang, Dong Li, Zhaopeng Meng", "tldr": "", "abstract": "Offline Reinforcement Learning (RL) aims to address the challenge of distribution shift between the dataset and the learned policy, where the value of out-of-distribution (OOD) data may be erroneously estimated due to overgeneralization. It has been observed that a considerable portion of the benefits derived from the conservative terms designed by existing offline RL approaches originates from their impact on the learned representation. This observation prompts us to scrutinize the learning dynamics of offline RL, formalize the process of generalization, and delve into the prevalent overgeneralization issue in offline RL. We then investigate the potential to rein the generalization from the representation perspective to enhance offline RL. Finally, we present Representation Distinction (RD), an innovative plug-in method for improving offline RL algorithm performance by explicitly differentiating between the representations of in-sample and OOD state-action pairs generated by the learning policy. Considering scenarios in which the learning policy mirrors the behavioral policy and similar samples may be erroneously distinguished, we suggest a dynamic adjustment mechanism for RD based on an OOD data generator to prevent data representation collapse and further enhance policy performance. We demonstrate the efficacy of our approach by applying RD to specially-designed backbone algorithms and widely-used offline RL algorithms. The proposed RD method significantly improves their performance across various continuous control tasks on D4RL datasets, surpassing several state-of-the-art offline RL algorithms.", "keywords": "Offline Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/ec312beee7a08985d10b454710f5ff344cf9f032.pdf", "author": "Yi Ma;Hongyao Tang;Dong Li;Zhaopeng Meng", "authorids": "~Yi_Ma5;~Hongyao_Tang1;~Dong_Li18;~Zhaopeng_Meng1", "gender": ";M;;", "homepage": "https://mayi1996.top/;https://bluecontra.github.io/;https://github.com/dongleecsu;http://cic.tju.edu.cn/info/1104/1205.htm", "dblp": "69/1112-5.html;220/4275;47/4826-16;67/8175", "google_scholar": "TdVWzqgAAAAJ;yIqzRH4AAAAJ;;", "orcid": "0000-0001-9375-6605;;;", "linkedin": ";;;", "or_profile": "~Yi_Ma5;~Hongyao_Tang1;~Dong_Li18;~Zhaopeng_Meng1", "aff": "Tianjin University;College of Intelligence and Computing, Tianjin University;Institute of Automation, Chinese Academy of Sciences;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;ia.ac.cn;tju.edu.cn", "position": "PhD student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nma2023reining,\ntitle={Reining Generalization in Offline Reinforcement Learning via Representation Distinction},\nauthor={Yi Ma and Hongyao Tang and Dong Li and Zhaopeng Meng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mVywRIDNIl}\n}", "github": "", "project": "", "reviewers": "XJcM;yLhs;S7qm;xZbD", "pdf_size": 632286, "rating": "6;6;6;6", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "26;53;30;105", "wc_strengths": "30;150;61;101", "wc_weaknesses": "153;169;50;116", "wc_questions": "13;88;56;276", "wc_limitations": "10;9;14;7", "wc_review": "232;469;211;605", "wc_reply_reviewers": "43;49;42;36", "wc_reply_authors": "6;92;20;15", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 53.5, 31.468237955119125 ], "wc_strengths_avg": [ 85.5, 44.947191235938206 ], "wc_weaknesses_avg": [ 122.0, 45.79847159021794 ], "wc_questions_avg": [ 108.25, 100.43996963360752 ], "wc_limitations_avg": [ 10.0, 2.5495097567963922 ], "wc_review_avg": [ 379.25, 165.08236580567896 ], "wc_reply_reviewers_avg": [ 42.5, 4.6097722286464435 ], "wc_reply_authors_avg": [ 33.25, 34.28829975370607 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11876922388220100002&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "tju.edu.cn;tju.edu.cn;ia.ac.cn;tju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Tianjin University;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "http://www.tju.edu.cn;http://www.ia.cas.cn", "aff_unique_abbr": "TJU;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "mWMJN0vbDF", "title": "Towards Faithful Sign Language Translation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Sign language translation (SLT) aims to translate perceived visual signals into spoken language. Recent works have achieved impressive performance by improving visual representations and adopting advanced machine translation techniques, but the faithfulness (\\ie, whether the SLT model captures correct visual signals) in SLT has not received enough attention. In this paper, we explore the association among SLT-relevant tasks and find that the imprecise glosses and limited corpora may hinder faithfulness in SLT. To improve faithfulness in SLT, we first integrate SLT subtasks into a single framework named MonoSLT, which can share the acquired knowledge among SLT subtasks based on their monotonically aligned nature. We further propose two kinds of constraints: the alignment constraint aligns the visual and linguistic embeddings through a sharing translation module and synthetic code-switching corpora; the consistency constraint integrates the advantages of subtasks by regularizing the prediction consistency. Experimental results show that the proposed MonoSLT is competitive against previous SLT methods by increasing the utilization of visual signals, especially when glosses are imprecise.", "keywords": "Sign Language Recognition; Sign Language Translation;", "primary_area": "", "supplementary_material": "/attachment/f05a369e2054e6c883bcd28b856030027b5e3acd.pdf", "author": "Yuecong Min;Xilin CHEN", "authorids": "~Yuecong_Min1;~Xilin_CHEN2", "gender": "M;M", "homepage": "https://ycmin95.github.io/;http://vipl.ict.ac.cn/people/_xlchen/", "dblp": "263/3327;c/XilinChen", "google_scholar": "qc2906sAAAAJ;vVx2v20AAAAJ", "orcid": "0000-0002-0696-2468;0000-0003-3024-4404", "linkedin": ";", "or_profile": "~Yuecong_Min1;~Xilin_Chen4", "aff": "University of Chinese Academy of Sciences;Institute of Computing Technology", "aff_domain": "ucas.ac.cn;ict.ac.cn", "position": "PhD student;Full Professor", "bibtex": "@misc{\nmin2023towards,\ntitle={Towards Faithful Sign Language Translation},\nauthor={Yuecong Min and Xilin CHEN},\nyear={2023},\nurl={https://openreview.net/forum?id=mWMJN0vbDF}\n}", "github": "", "project": "", "reviewers": "ujKb;yVLq;mHke;T3bw;6xPF", "site": "https://openreview.net/forum?id=mWMJN0vbDF", "pdf_size": 839636, "rating": "4;5;5;5;6", "confidence": "4;4;5;5;5", "soundness": "3;3;2;3;3", "novelty": "3;3;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "99;81;82;72;75", "wc_strengths": "19;74;118;34;78", "wc_weaknesses": "177;41;88;148;255", "wc_questions": "1;82;64;5;28", "wc_limitations": "2;37;83;13;10", "wc_review": "298;315;435;272;446", "wc_reply_reviewers": "0;34;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.0, 0.6324555320336759 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.8, 9.368030742904295 ], "wc_strengths_avg": [ 64.6, 35.029130734290284 ], "wc_weaknesses_avg": [ 141.8, 73.68419097744102 ], "wc_questions_avg": [ 36.0, 32.09361307176243 ], "wc_limitations_avg": [ 29.0, 29.414282245195107 ], "wc_review_avg": [ 353.2, 72.6674617693504 ], "wc_reply_reviewers_avg": [ 6.8, 13.6 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6454972243679027, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zr6aKZqA7e4J:scholar.google.com/&scioq=Towards+Faithful+Sign+Language+Translation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Institute of Computing Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ict.ac.cn", "aff_unique_abbr": "UCAS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Class-Conditional Conformal Prediction with Many Classes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70548", "id": "mYz6ApeU4J", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cb931eddd563f8d473c355518ce8601c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mYz6ApeU4J", "openreview": "https://openreview.net/forum?id=mYz6ApeU4J", "poster": "/media/PosterPDFs/NeurIPS%202023/70548.png?t=1701380149.1980631", "slides": "https://nips.cc/virtual/2023/poster/70548", "video": "https://nips.cc/virtual/2023/poster/70548", "author_site": "Tiffany Ding, Anastasios Angelopoulos, Stephen Bates, Michael Jordan, Ryan Tibshirani", "tldr": "", "abstract": "Standard conformal prediction methods provide a marginal coverage guarantee,\nwhich means that for a random test point, the conformal prediction set contains \nthe true label with a user-specified probability. In many classification\nproblems, we would like to obtain a stronger guarantee--that for test points\nof a specific class, the prediction set contains the true label with the\nsame user-chosen probability. For the latter goal, existing conformal prediction\nmethods do not work well when there is a limited amount of labeled data per\nclass, as is often the case in real applications where the number of classes is\nlarge. We propose a method called clustered conformal prediction that\nclusters together classes having \"similar\" conformal scores and performs \nconformal prediction at the cluster level. Based on empirical evaluation across\nfour image data sets with many (up to 1000) classes, we find that clustered\nconformal typically outperforms existing methods in terms of class-conditional\ncoverage and set size metrics.", "keywords": "conformal prediction;uncertainty quantification;class imbalance", "primary_area": "", "supplementary_material": "/attachment/efd5bb5e9fdc3e471f4d10ab34793dcd2894c7ee.zip", "author": "Tiffany Ding;Anastasios Nikolas Angelopoulos;Stephen Bates;Michael Jordan;Ryan Tibshirani", "authorids": "~Tiffany_Ding1;~Anastasios_Nikolas_Angelopoulos1;~Stephen_Bates1;~Michael_Jordan1;~Ryan_Tibshirani1", "gender": "F;M;;M;", "homepage": "https://tiffanyding.github.io;http://angelopoulos.ai;https://stephenbates19.github.io/;http://www.cs.berkeley.edu/~jordan/;https://www.stat.berkeley.edu/~ryantibs/", "dblp": ";;;j/MichaelIJordan;", "google_scholar": "U9EvD0wAAAAJ;nfX25MMAAAAJ;;https://scholar.google.com.tw/citations?user=yxUduqMAAAAJ;", "orcid": ";;0000-0002-3273-8179;0000-0001-8935-817X;", "linkedin": "tiffany-ding-904980149/;anastasiosa/;;;", "or_profile": "~Tiffany_Ding1;~Anastasios_Nikolas_Angelopoulos1;~Stephen_Bates1;~Michael_Jordan1;~Ryan_Tibshirani1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "position": "PhD student;PhD student;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nding2023classconditional,\ntitle={Class-Conditional Conformal Prediction with Many Classes},\nauthor={Tiffany Ding and Anastasios Nikolas Angelopoulos and Stephen Bates and Michael Jordan and Ryan Tibshirani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mYz6ApeU4J}\n}", "github": "", "project": "", "reviewers": "omUB;MBi5;xNf7;aaQt", "pdf_size": 837458, "rating": "4;5;6;7", "confidence": "3;4;3;4", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "23;84;89;142", "wc_strengths": "61;44;89;90", "wc_weaknesses": "196;143;294;132", "wc_questions": "202;262;55;126", "wc_limitations": "6;40;16;64", "wc_review": "488;573;543;554", "wc_reply_reviewers": "218;73;44;27", "wc_reply_authors": "386;63;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.5, 42.15744299646268 ], "wc_strengths_avg": [ 71.0, 19.45507645834372 ], "wc_weaknesses_avg": [ 191.25, 64.0678351436975 ], "wc_questions_avg": [ 161.25, 78.01081655770564 ], "wc_limitations_avg": [ 31.5, 22.46664193866097 ], "wc_review_avg": [ 539.5, 31.610915836147488 ], "wc_reply_reviewers_avg": [ 90.5, 75.42711713966006 ], "wc_reply_authors_avg": [ 112.25, 160.128659208775 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4472135954999579, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4665192811569552689&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Towards the Difficulty for a Deep Neural Network to Learn Concepts of Different Complexities", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70547", "id": "mZ3hnyL9bS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8143b8c73073a9a23b9c18e400066471-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mZ3hnyL9bS", "openreview": "https://openreview.net/forum?id=mZ3hnyL9bS", "poster": "/media/PosterPDFs/NeurIPS%202023/70547.png?t=1699595403.1510599", "slides": "https://nips.cc/virtual/2023/poster/70547", "video": "https://nips.cc/virtual/2023/poster/70547", "author_site": "Dongrui Liu, Huiqi Deng, Xu Cheng, Xu Cheng, Qihan Ren, Kangrui Wang, Quanshi Zhang", "tldr": "", "abstract": "This paper theoretically explains the intuition that simple concepts are more likely to be learned by deep neural networks (DNNs) than complex concepts. In fact, recent studies have observed [24, 15] and proved [26] the emergence of interactive concepts in a DNN, i.e., it is proven that a DNN usually only encodes a small number of interactive concepts, and can be considered to use their interaction effects to compute inference scores. Each interactive concept is encoded by the DNN to represent the collaboration between a set of input variables. Therefore, in this study, we aim to theoretically explain that interactive concepts involving more input variables (i.e., more complex concepts) are more difficult to learn. Our finding clarifies the exact conceptual complexity that boosts the learning difficulty.", "keywords": "representation complexity;deep learning", "primary_area": "", "supplementary_material": "/attachment/cb430a604dfd782463339f3a0c62d45cd1ba0523.pdf", "author": "Dongrui Liu;Huiqi Deng;Xu Cheng;Qihan Ren;Kangrui Wang;Quanshi Zhang", "authorids": "~Dongrui_Liu1;~Huiqi_Deng1;~Xu_Cheng1;~Qihan_Ren1;~Kangrui_Wang2;~Quanshi_Zhang1", "gender": "M;F;F;M;M;M", "homepage": "https://shenqildr.github.io/;;https://cx1208.github.io/ChengXuSJTU.github.io/;https://nebularaid2000.github.io/;https://jameskrw.github.io/;http://qszhang.com", "dblp": "199/9200.html;229/1317;30/828-5;268/5838;216/9159;http://dblp.uni-trier.de/pers/hd/z/Zhang:Quanshi", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;QEjqzXgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;ybTy_DwAAAAJ;;iFFhHK0AAAAJ", "orcid": "0000-0003-0087-1124;;0009-0001-5086-5673;;;", "linkedin": ";;;;wang-kangrui-8b9a37257/;", "or_profile": "~Dongrui_Liu1;~Huiqi_Deng1;~Xu_Cheng1;~Qihan_Ren1;~Kangrui_Wang2;~Quanshi_Zhang1", "aff": "Shanghai Jiao Tong University,;Shanghai jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;edu.cn;sjtu.edu.cn;sjtu.edu.cn;;sjtu.edu.cn", "position": "PhD student;Postdoc;PhD student;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nliu2023towards,\ntitle={Towards the Difficulty for a Deep Neural Network to Learn Concepts of Different Complexities},\nauthor={Dongrui Liu and Huiqi Deng and Xu Cheng and Qihan Ren and Kangrui Wang and Quanshi Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mZ3hnyL9bS}\n}", "github": "", "project": "", "reviewers": "jTKY;DenZ;B4Wy;otLW", "pdf_size": 1357995, "rating": "6;6;6;8", "confidence": "3;2;4;3", "soundness": "3;4;2;4", "novelty": "3;2;3;4", "presentation": "3;3;3;4", "wc_summary": "91;71;123;32", "wc_strengths": "101;54;20;47", "wc_weaknesses": "133;49;56;41", "wc_questions": "2;25;15;1", "wc_limitations": "41;24;2;1", "wc_review": "368;223;216;122", "wc_reply_reviewers": "0;13;33;29", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 79.25, 32.98768709685479 ], "wc_strengths_avg": [ 55.5, 29.176188921790317 ], "wc_weaknesses_avg": [ 69.75, 36.9010501205589 ], "wc_questions_avg": [ 10.75, 9.908960591303208 ], "wc_limitations_avg": [ 17.0, 16.62828914831589 ], "wc_review_avg": [ 232.25, 87.93854388150852 ], "wc_reply_reviewers_avg": [ 18.75, 13.160072188251856 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7974751641512584420&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "sjtu.edu.cn;edu.cn;sjtu.edu.cn;sjtu.edu.cn;;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "maSAKOKXTi", "title": "Generative Evolutionary Strategy For Black-Box Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": " Numerous scientific and technological challenges arise in the context of optimization, particularly, black-box optimization within high-dimensional spaces presents significant challenges. Recent investigations into neural network-based black-box optimization have shown promising results. However, the effectiveness of these methods in navigating high-dimensional search spaces remains limited. In this study, we propose a black-box optimization method that combines an evolutionary strategy (ES) with a generative surrogate neural network (GSN) model. This integrated model is designed to function in a complementary manner, where ES addresses the instability inherent in surrogate neural network learning associated with GSN models, and GSN improves the mutation efficiency of ES. Based on our experimental findings, this approach outperforms both classical optimization techniques and standalone GSN model", "keywords": "Non-convex optimization;black-box optimization", "primary_area": "", "supplementary_material": "/attachment/1257b05c748de22ded737d411a018f98ed434bee.pdf", "author": "Changhwi Park", "authorids": "~Changhwi_Park1", "gender": "M", "homepage": "https://www.facebook.com/changhwi.park.1", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Changhwi_Park1", "aff": "Samsung", "aff_domain": "samsung.com", "position": "Researcher", "bibtex": "@misc{\npark2023generative,\ntitle={Generative Evolutionary Strategy For Black-Box Optimization},\nauthor={Changhwi Park},\nyear={2023},\nurl={https://openreview.net/forum?id=maSAKOKXTi}\n}", "github": "", "project": "", "reviewers": "Hd6y;baKP;GwEM;7diD", "site": "https://openreview.net/forum?id=maSAKOKXTi", "pdf_size": 2094718, "rating": "3;4;4;7", "confidence": "4;2;3;4", "soundness": "2;2;2;4", "novelty": "2;2;2;4", "presentation": "2;2;2;3", "wc_summary": "53;77;112;155", "wc_strengths": "72;19;72;58", "wc_weaknesses": "173;17;96;97", "wc_questions": "123;79;69;32", "wc_limitations": "21;12;16;1", "wc_review": "442;204;365;343", "wc_reply_reviewers": "58;53;0;14", "wc_reply_authors": "60;5;0;5", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 99.25, 38.42118556213486 ], "wc_strengths_avg": [ 55.25, 21.695333599647643 ], "wc_weaknesses_avg": [ 95.75, 55.160561092142636 ], "wc_questions_avg": [ 75.75, 32.41431011143072 ], "wc_limitations_avg": [ 12.5, 7.365459931328117 ], "wc_review_avg": [ 338.5, 85.91420138719792 ], "wc_reply_reviewers_avg": [ 31.25, 24.81305100143874 ], "wc_reply_authors_avg": [ 17.5, 24.62214450449026 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.3015113445777637, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EBQn9vM2YWAJ:scholar.google.com/&scioq=Generative+Evolutionary+Strategy+For+Black-Box+Optimization&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "Samsung", "aff_unique_dep": "Samsung", "aff_unique_url": "https://www.samsung.com", "aff_unique_abbr": "Samsung", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "title": "Bridging Discrete and Backpropagation: Straight-Through and Beyond", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70546", "id": "mayAyPrhJI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/28b5dfc51e5ae12d84fb7c6172a00df4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mayAyPrhJI", "openreview": "https://openreview.net/forum?id=mayAyPrhJI", "poster": "/media/PosterPDFs/NeurIPS%202023/70546.png?t=1701664895.6712952", "slides": "https://nips.cc/virtual/2023/poster/70546", "video": "https://nips.cc/virtual/2023/poster/70546", "author_site": "Liyuan Liu, Chengyu Dong, Xiaodong Liu, Bin Yu, Jianfeng Gao", "tldr": "", "abstract": "Backpropagation, the cornerstone of deep learning, is limited to computing gradients for continuous variables. This limitation poses challenges for problems involving discrete latent variables. To address this issue, we propose a novel approach to approximate the gradient of parameters involved in generating discrete latent variables. First, we examine the widely used Straight-Through (ST) heuristic and demonstrate that it works as a first-order approximation of the gradient. Guided by our findings, we propose ReinMax, which achieves second-order accuracy by integrating Heun\u2019s method, a second-order numerical method for solving ODEs. ReinMax does not require Hessian or other second-order derivatives, thus having negligible computation overheads. Extensive experimental results on various tasks demonstrate the superiority of ReinMax over the state of the art.", "keywords": "discrete random variables;back-propagation;straight through", "primary_area": "", "supplementary_material": "/attachment/9b106468ce664fbec1e37ba725e427c6f82b6989.zip", "author": "Liyuan Liu;Chengyu Dong;Xiaodong Liu;Bin Yu;Jianfeng Gao", "authorids": "~Liyuan_Liu3;~Chengyu_Dong1;~Xiaodong_Liu1;~Bin_Yu5;~Jianfeng_Gao1", "gender": ";;M;M;M", "homepage": "https://www.chengyu-dong.me/;;https://binyu.stat.berkeley.edu;https://www.microsoft.com/en-us/research/people/jfgao/;https://liyuanlucasliu.github.io/", "dblp": "14/3155;65/622;27/116;92/5339;06/1624", "google_scholar": "Ppfi7j0AAAAJ;NIewcxMAAAAJ;https://scholar.google.com.hk/citations?user=z1iJa3UAAAAJ;https://scholar.google.com/citations?hl=en;RmvbkzYAAAAJ", "orcid": ";;0000-0003-3097-1433;;", "linkedin": ";;bin-yu-b665063/;;", "or_profile": "~Chengyu_Dong1;~Xiaodong_Liu1;~Bin_Yu5;~Jianfeng_Gao1;~Liyuan_Liu1", "aff": "University of California, San Diego;Microsoft Research;University of California, Berkeley;Microsoft Research;University of Illinois, Urbana Champaign", "aff_domain": "ucsd.edu;microsoft.com;berkeley.edu;microsoft.com;illinois.edu", "position": "PhD student;Researcher;Full Professor;Principal Researcher;PhD student", "bibtex": "@inproceedings{\nliu2023bridging,\ntitle={Bridging Discrete and Backpropagation: Straight-Through and Beyond},\nauthor={Liyuan Liu and Chengyu Dong and Xiaodong Liu and Bin Yu and Jianfeng Gao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mayAyPrhJI}\n}", "github": "", "project": "", "reviewers": "A6oc;fihA;txhR;ZCX1;GD4h;WPn8", "pdf_size": 4912187, "rating": "7;7;7;7;7;8", "confidence": "3;4;4;2;3;4", "soundness": "3;3;4;3;4;4", "novelty": "3;3;3;3;3;4", "presentation": "3;3;4;3;4;3", "wc_summary": "158;87;61;89;59;141", "wc_strengths": "78;104;135;85;313;26", "wc_weaknesses": "135;159;183;317;321;63", "wc_questions": "101;15;55;47;43;30", "wc_limitations": "47;5;11;2;21;30", "wc_review": "519;370;445;540;757;290", "wc_reply_reviewers": "88;0;96;83;57;0", "wc_reply_authors": "25;0;22;20;35;0", "reply_reviewers": "1;0;1;1;1;0", "reply_authors": "2;1;2;2;2;1", "rating_avg": [ 7.166666666666667, 0.37267799624996495 ], "confidence_avg": [ 3.3333333333333335, 0.7453559924999298 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.1666666666666665, 0.3726779962499649 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 99.16666666666667, 37.71125679275207 ], "wc_strengths_avg": [ 123.5, 90.8125358453703 ], "wc_weaknesses_avg": [ 196.33333333333334, 94.17477841167926 ], "wc_questions_avg": [ 48.5, 26.75661911876511 ], "wc_limitations_avg": [ 19.333333333333332, 15.58489297008128 ], "wc_review_avg": [ 486.8333333333333, 147.75815450330387 ], "wc_reply_reviewers_avg": [ 54.0, 40.00416644967538 ], "wc_reply_authors_avg": [ 17.0, 12.909944487358056 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.39999999999999997, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9905135602869020838&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ucsd.edu;microsoft.com;berkeley.edu;microsoft.com;illinois.edu", "author_num": 5, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "University of California, San Diego;Microsoft;University of California, Berkeley;University of Illinois Urbana-Champaign", "aff_unique_dep": ";Microsoft Research;;", "aff_unique_url": "https://www.ucsd.edu;https://www.microsoft.com/en-us/research;https://www.berkeley.edu;https://illinois.edu", "aff_unique_abbr": "UCSD;MSR;UC Berkeley;UIUC", "aff_campus_unique_index": "0;2;3", "aff_campus_unique": "San Diego;;Berkeley;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SEENN: Towards Temporal Spiking Early Exit Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70545", "id": "mbaN0Y0QTw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c801e68207da477bbc44182b9fac1129-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mbaN0Y0QTw", "openreview": "https://openreview.net/forum?id=mbaN0Y0QTw", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70545", "video": "https://nips.cc/virtual/2023/poster/70545", "author_site": "Yuhang Li, Tamar Geller, Youngeun Kim, Priyadarshini Panda", "tldr": "", "abstract": "Spiking Neural Networks (SNNs) have recently become more popular as a biologically plausible substitute for traditional Artificial Neural Networks (ANNs). SNNs are cost-efficient and deployment-friendly because they process input in both spatial and temporal manner using binary spikes. However, we observe that the information capacity in SNNs is affected by the number of timesteps, leading to an accuracy-efficiency tradeoff. In this work, we study a fine-grained adjustment of the number of timesteps in SNNs. Specifically, we treat the number of timesteps as a variable conditioned on different input samples to reduce redundant timesteps for certain data. \nWe call our method Spiking Early-Exit Neural Networks (**SEENNs**). To determine the appropriate number of timesteps, we propose SEENN-I which uses a confidence score thresholding to filter out the uncertain predictions, and SEENN-II which determines the number of timesteps by reinforcement learning. \nMoreover, we demonstrate that SEENN is compatible with both the directly trained SNN and the ANN-SNN conversion. \nBy dynamically adjusting the number of timesteps, our SEENN achieves a remarkable reduction in the average number of timesteps during inference. For example, our SEENN-II ResNet-19 can achieve **96.1**\\% accuracy with an average of **1.08** timesteps on the CIFAR-10 test dataset. Code is shared at https://github.com/Intelligent-Computing-Lab-Yale/SEENN.", "keywords": "Spiking Neural Networks;ANN-SNN Conversion;Conditional Computing", "primary_area": "", "supplementary_material": "/attachment/ce240bf272ea88f846dea3a7392f96dd383dae9e.zip", "author": "Yuhang Li;Tamar Geller;Youngeun Kim;Priyadarshini Panda", "authorids": "~Yuhang_Li1;~Tamar_Geller1;~Youngeun_Kim1;~Priyadarshini_Panda1", "gender": "M;F;M;F", "homepage": ";;https://youngryan1993.github.io/homepage/;https://intelligentcomputinglab.yale.edu/", "dblp": ";;58/2943;168/8446", "google_scholar": "3UzXL-AAAAAJ;;bh5Ve0EAAAAJ;qA5WsYUAAAAJ", "orcid": ";;;", "linkedin": ";tamar-geller-276081181/;youngeun-kim-3b97b6179/;", "or_profile": "~Yuhang_Li1;~Tamar_Geller1;~Youngeun_Kim1;~Priyadarshini_Panda1", "aff": "Yale University;Yale University;Yale University;Yale University", "aff_domain": "yale.edu;yale.edu;yale.edu;yale.edu", "position": "PhD student;Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nli2023seenn,\ntitle={{SEENN}: Towards Temporal Spiking Early Exit Neural Networks},\nauthor={Yuhang Li and Tamar Geller and Youngeun Kim and Priyadarshini Panda},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mbaN0Y0QTw}\n}", "github": "", "project": "", "reviewers": "Sp8z;ZThW;DV4t;J7UY", "pdf_size": 1287115, "rating": "5;6;7;7", "confidence": "4;3;5;5", "soundness": "2;2;4;3", "novelty": "2;2;4;3", "presentation": "3;2;4;3", "wc_summary": "65;45;82;93", "wc_strengths": "48;40;44;61", "wc_weaknesses": "217;111;26;42", "wc_questions": "124;2;21;29", "wc_limitations": "9;1;7;2", "wc_review": "463;199;180;227", "wc_reply_reviewers": "100;0;0;32", "wc_reply_authors": "474;54;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 71.25, 18.14352501582865 ], "wc_strengths_avg": [ 48.25, 7.8859051477937525 ], "wc_weaknesses_avg": [ 99.0, 75.24293986813646 ], "wc_questions_avg": [ 44.0, 47.217581471312144 ], "wc_limitations_avg": [ 4.75, 3.344772040064913 ], "wc_review_avg": [ 267.25, 114.24617061416107 ], "wc_reply_reviewers_avg": [ 33.0, 40.82891132518721 ], "wc_reply_authors_avg": [ 132.0, 198.68064827758138 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11100614982402938956&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "yale.edu;yale.edu;yale.edu;yale.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Neural Lighting Simulation for Urban Scenes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70544", "id": "mcx8IGneYw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3d7259031023c5aa463187c4a31c95c8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mcx8IGneYw", "openreview": "https://openreview.net/forum?id=mcx8IGneYw", "poster": "/media/PosterPDFs/NeurIPS%202023/70544.png?t=1699832687.6344917", "slides": "https://nips.cc/virtual/2023/poster/70544", "video": "https://nips.cc/virtual/2023/poster/70544", "author_site": "Ava Pun, Gary Sun, Jingkang Wang, Yun Chen, Ze Yang, Sivabalan Manivasagam, Wei-Chiu Ma, Raquel Urtasun", "tldr": "", "abstract": "Different outdoor illumination conditions drastically alter the appearance of urban scenes, and they can harm the performance of image-based robot perception systems if not seen during training. Camera simulation provides a cost-effective solution to create a large dataset of images captured under different lighting conditions. Towards this goal, we propose LightSim, a neural lighting camera simulation system that enables diverse, realistic, and controllable data generation. LightSim automatically builds lighting-aware digital twins at scale from collected raw sensor data and decomposes the scene into dynamic actors and static background with accurate geometry, appearance, and estimated scene lighting. These digital twins enable actor insertion, modification, removal, and rendering from a new viewpoint, all in a lighting-aware manner. LightSim then combines physically-based and learnable deferred rendering to perform realistic relighting of modified scenes, such as altering the sun location and modifying the shadows or changing the sun brightness, producing spatially- and temporally-consistent camera videos. Our experiments show that LightSim generates more realistic relighting results than prior work. Importantly, training perception models on data generated by LightSim can significantly improve their performance. Our project page is available at https://waabi.ai/lightsim/.", "keywords": "Scene Relighting;Lighting Estimation;Camera Simulation;Self-Driving;Lighting Simulation;Scene Editing", "primary_area": "", "supplementary_material": "/attachment/cdc1bf42af62266ab947bf363959ca9a00546730.zip", "author": "Ava Pun;Gary Sun;Jingkang Wang;Yun Chen;Ze Yang;Sivabalan Manivasagam;Wei-Chiu Ma;Raquel Urtasun", "authorids": "~Ava_Pun1;~Gary_Sun1;~Jingkang_Wang1;~Yun_Chen3;~Ze_Yang5;~Sivabalan_Manivasagam1;~Wei-Chiu_Ma1;~Raquel_Urtasun1", "gender": "F;;M;;;;M;F", "homepage": "https://avapun.com;https://github.com/fruithead123;http://www.cs.toronto.edu/~wangjk/;;;;https://www.cs.cornell.edu/~weichiu/;http://www.cs.toronto.edu/~urtasun/", "dblp": "283/5875;;223/9910;;;;151/4277;u/RaquelUrtasun", "google_scholar": "GCmF-F0AAAAJ;;c0BTYC4AAAAJ;;;;SVIdh6AAAAAJ;https://scholar.google.ca/citations?user=jyxO2akAAAAJ", "orcid": "0009-0008-4148-3164;;;;;;;", "linkedin": "https://linkedin.com/in/avapun;garysun3698/;;;;;;", "or_profile": "~Ava_Pun1;~Gary_Sun1;~Jingkang_Wang1;~Yun_Chen3;~Ze_Yang5;~Sivabalan_Manivasagam1;~Wei-Chiu_Ma1;~Raquel_Urtasun1", "aff": "University of Waterloo;University of Waterloo;University of Toronto;;;;Massachusetts Institute of Technology;Department of Computer Science, University of Toronto", "aff_domain": "uwaterloo.ca;cs.uwaterloo.ca;toronto.edu;;;;mit.edu;cs.toronto.edu", "position": "Undergrad student;Undergrad student;PhD student;;;;PhD student;Full Professor", "bibtex": "@inproceedings{\npun2023neural,\ntitle={Neural Lighting Simulation for Urban Scenes},\nauthor={Ava Pun and Gary Sun and Jingkang Wang and Yun Chen and Ze Yang and Sivabalan Manivasagam and Wei-Chiu Ma and Raquel Urtasun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mcx8IGneYw}\n}", "github": "", "project": "", "reviewers": "G49E;25CV;5Bwf;ENz6;6GXP", "pdf_size": 48023545, "rating": "4;5;5;6;7", "confidence": "3;4;3;4;5", "soundness": "3;3;3;3;4", "novelty": "2;3;2;2;3", "presentation": "2;3;4;2;4", "wc_summary": "34;98;172;106;257", "wc_strengths": "34;117;117;78;77", "wc_weaknesses": "128;206;383;118;79", "wc_questions": "3;27;238;183;73", "wc_limitations": "3;10;34;50;8", "wc_review": "202;458;944;535;494", "wc_reply_reviewers": "115;202;250;115;123", "wc_reply_authors": "398;265;603;256;258", "reply_reviewers": "1;1;1;1;1", "reply_authors": "4;4;4;4;3", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 133.4, 75.6983487270363 ], "wc_strengths_avg": [ 84.6, 30.858386218336175 ], "wc_weaknesses_avg": [ 182.8, 108.25414541716174 ], "wc_questions_avg": [ 104.8, 90.86781608468422 ], "wc_limitations_avg": [ 21.0, 18.022208521710095 ], "wc_review_avg": [ 526.6, 238.92559511278822 ], "wc_reply_reviewers_avg": [ 161.0, 55.27748185292091 ], "wc_reply_authors_avg": [ 356.0, 134.65362973199052 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.8, 0.39999999999999997 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8910421112136307, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12923691923447379377&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "uwaterloo.ca;cs.uwaterloo.ca;toronto.edu;;;;mit.edu;cs.toronto.edu", "author_num": 8, "aff_unique_index": "0;0;1;2;1", "aff_unique_norm": "University of Waterloo;University of Toronto;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://uwaterloo.ca;https://www.utoronto.ca;https://web.mit.edu", "aff_unique_abbr": "UW;U of T;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Canada;United States" }, { "title": "Large Language Models Are Zero-Shot Time Series Forecasters", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70543", "id": "md68e8iZK1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3eb7ca52e8207697361b2c0fb3926511-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=md68e8iZK1", "openreview": "https://openreview.net/forum?id=md68e8iZK1", "poster": "/media/PosterPDFs/NeurIPS%202023/70543.png?t=1702441946.321654", "slides": "https://nips.cc/virtual/2023/poster/70543", "video": "https://nips.cc/virtual/2023/poster/70543", "author_site": "Nate Gruver, Marc Finzi, Shikai Qiu, Andrew Wilson", "tldr": "", "abstract": "By encoding time series as a string of numerical digits, we can frame time series forecasting as next-token prediction in text. Developing this approach, we find that large language models (LLMs) such as GPT-3 and LLaMA-2 can surprisingly zero-shot extrapolate time series at a level comparable to or exceeding the performance of purpose-built time series models trained on the downstream tasks. To facilitate this performance, we propose procedures for effectively tokenizing time series data and converting discrete distributions over tokens into highly flexible densities over continuous values. We argue the success of LLMs for time series stems from their ability to naturally represent multimodal distributions, in conjunction with biases for simplicity, and repetition, which align with the salient features in many time series, such as repeated seasonal trends. We also show how LLMs can naturally handle missing data without imputation through non-numerical text, accommodate textual side information, and answer questions to help explain predictions. While we find that increasing model size generally improves performance on time series, we show GPT-4 can perform worse than GPT-3 because of how it tokenizes numbers, and poor uncertainty calibration, which is likely the result of alignment interventions such as RLHF.", "keywords": "large language models;time series;probabilistic forecasting", "primary_area": "", "supplementary_material": "/attachment/db40c53559a410108168af3004cb3879ba53ced1.pdf", "author": "Nate Gruver;Marc Anton Finzi;Shikai Qiu;Andrew Gordon Wilson", "authorids": "~Nate_Gruver1;~Marc_Anton_Finzi1;~Shikai_Qiu1;~Andrew_Gordon_Wilson1", "gender": "M;M;M;Not Specified", "homepage": "https://ngruver.github.io/;https://mfinzi.github.io;https://shikaiqiu.github.io/;https://cims.nyu.edu/~andrewgw", "dblp": "223/5568;222/3062;;65/10453", "google_scholar": "R5QNdhcAAAAJ;ysMAhlwAAAAJ;pK0OAsQAAAAJ;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Nate_Gruver1;~Marc_Anton_Finzi1;~Shikai_Qiu1;~Andrew_Gordon_Wilson1", "aff": "New York University;New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu;nyu.edu", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\ngruver2023large,\ntitle={Large Language Models Are Zero-Shot Time Series Forecasters},\nauthor={Nate Gruver and Marc Anton Finzi and Shikai Qiu and Andrew Gordon Wilson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=md68e8iZK1}\n}", "github": "", "project": "", "reviewers": "AFnc;Zuxw;duBe;kUEg;s41P;URMj;z6DB;eErN", "pdf_size": 1284787, "rating": "2;3;3;4;5;6;7;7", "confidence": "4;4;3;3;3;2;4;4", "soundness": "2;2;2;2;3;3;3;3", "novelty": "2;1;1;2;3;2;3;4", "presentation": "3;2;2;3;2;3;3;3", "wc_summary": "50;94;368;166;57;65;45;220", "wc_strengths": "147;30;15;50;72;149;70;68", "wc_weaknesses": "224;120;7;108;221;66;314;11", "wc_questions": "646;20;7;57;200;28;9;61", "wc_limitations": "44;16;15;4;45;59;6;15", "wc_review": "1111;280;412;385;595;367;444;375", "wc_reply_reviewers": "0;0;88;282;268;109;37;13", "wc_reply_authors": "0;0;0;1154;1092;177;0;0", "reply_reviewers": "0;0;1;1;1;1;1;1", "reply_authors": "1;1;1;3;3;2;1;1", "rating_avg": [ 4.625, 1.79843682124227 ], "confidence_avg": [ 3.375, 0.6959705453537527 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.9682458365518543 ], "presentation_avg": [ 2.625, 0.4841229182759271 ], "wc_summary_avg": [ 133.125, 106.32313659312351 ], "wc_strengths_avg": [ 75.125, 46.08806108961409 ], "wc_weaknesses_avg": [ 133.875, 102.97018682609058 ], "wc_questions_avg": [ 128.5, 204.17578210943628 ], "wc_limitations_avg": [ 25.5, 19.35846068260594 ], "wc_review_avg": [ 496.125, 246.99313629127428 ], "wc_reply_reviewers_avg": [ 99.625, 107.8829661021609 ], "wc_reply_authors_avg": [ 302.875, 477.184827268219 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.625, 0.8569568250501305 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.08738402006519946, "gs_citation": 438, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13126498984912232673&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "nyu.edu;nyu.edu;nyu.edu;nyu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Beyond Average Return in Markov Decision Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70542", "id": "mgNu8nDFwa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b0a34e3c64f7e842f20ec10479c32b35-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mgNu8nDFwa", "openreview": "https://openreview.net/forum?id=mgNu8nDFwa", "poster": "/media/PosterPDFs/NeurIPS%202023/70542.png?t=1701799719.500346", "slides": "https://nips.cc/virtual/2023/poster/70542", "video": "https://nips.cc/virtual/2023/poster/70542", "author_site": "Alexandre Marthe, Aur\u00e9lien Garivier, Claire Vernade", "tldr": "", "abstract": "What are the functionals of the reward that can be computed and optimized exactly in Markov Decision Processes?\nIn the finite-horizon, undiscounted setting, Dynamic Programming (DP) can only handle these operations efficiently for certain classes of statistics. We summarize the characterization of these classes for policy evaluation, and give a new answer for the planning problem. Interestingly, we prove that only generalized means can be optimized exactly, even in the more general framework of Distributional Reinforcement Learning (DistRL).\nDistRL permits, however, to evaluate other functionals approximately. We provide error bounds on the resulting estimators, and discuss the potential of this approach as well as its limitations.\nThese results contribute to advancing the theory of Markov Decision Processes by examining overall characteristics of the return, and particularly risk-conscious strategies.", "keywords": "Markov Decision Process;Dynamic Programming;statistical functionnals;Distributionnal Reinforcement Learning;Policy Evaluation;Planning", "primary_area": "", "supplementary_material": "", "author": "Alexandre Marthe;Aur\u00e9lien Garivier;Claire Vernade", "authorids": "~Alexandre_Marthe1;~Aur\u00e9lien_Garivier1;~Claire_Vernade1", "gender": "M;;F", "homepage": "https://perso.ens-lyon.fr/alexandre.marthe/;;https://www.cvernade.com", "dblp": ";62/6263;168/8721", "google_scholar": ";;tE2hCaYAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Alexandre_Marthe1;~Aur\u00e9lien_Garivier1;~Claire_Vernade1", "aff": "ENS de Lyon;ENS Lyon;Eberhard-Karls-Universit\u00e4t T\u00fcbingen", "aff_domain": "ens-lyon.fr;ens-lyon.fr;uni-tuebingen.de", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nmarthe2023beyond,\ntitle={Beyond Average Return in Markov Decision Processes},\nauthor={Alexandre Marthe and Aur{\\'e}lien Garivier and Claire Vernade},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mgNu8nDFwa}\n}", "github": "", "project": "", "reviewers": "3T3p;kE1T;RipE;3Rfs", "pdf_size": 547132, "rating": "6;6;7;7", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "novelty": "2;3;4;3", "presentation": "3;3;3;4", "wc_summary": "60;218;221;3", "wc_strengths": "23;189;33;67", "wc_weaknesses": "108;193;218;39", "wc_questions": "87;106;353;94", "wc_limitations": "2;17;1;37", "wc_review": "280;723;826;240", "wc_reply_reviewers": "27;62;0;46", "wc_reply_authors": "57;81;0;0", "reply_reviewers": "1;2;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 125.5, 96.14182232514631 ], "wc_strengths_avg": [ 78.0, 66.12866246946176 ], "wc_weaknesses_avg": [ 139.5, 70.91720524668185 ], "wc_questions_avg": [ 160.0, 111.63556780883053 ], "wc_limitations_avg": [ 14.25, 14.58380951603524 ], "wc_review_avg": [ 517.25, 260.1993226355518 ], "wc_reply_reviewers_avg": [ 33.75, 23.09085316743407 ], "wc_reply_authors_avg": [ 34.5, 35.52815784698103 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11493508505146554532&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "ens-lyon.fr;ens-lyon.fr;uni-tuebingen.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "\u00c9cole Normale Sup\u00e9rieure de Lyon;Ecole Normale Sup\u00e9rieure de Lyon;Eberhard Karls University of T\u00fcbingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ens-lyon.fr;https://www.ens-lyon.fr;https://www.uni-tuebingen.de/", "aff_unique_abbr": "ENS de Lyon;ENS Lyon;Uni T\u00fcbingen", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;0;1", "aff_country_unique": "France;Germany" }, { "title": "Memory-Constrained Algorithms for Convex Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70541", "id": "mkKQr56xdB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1395b425d06a50e42fafe91cf04f3a98-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mkKQr56xdB", "openreview": "https://openreview.net/forum?id=mkKQr56xdB", "poster": "/media/PosterPDFs/NeurIPS%202023/70541.png?t=1701833734.3570282", "slides": "https://nips.cc/virtual/2023/poster/70541", "video": "https://nips.cc/virtual/2023/poster/70541", "author_site": "Moise Blanchard, Junhui Zhang, Patrick Jaillet", "tldr": "", "abstract": "We propose a family of recursive cutting-plane algorithms to solve feasibility problems with constrained memory, which can also be used for first-order convex optimization. Precisely, in order to find a point within a ball of radius $\\epsilon$ with a separation oracle in dimension $d$---or to minimize $1$-Lipschitz convex functions to accuracy $\\epsilon$ over the unit ball---our algorithms use $\\mathcal O(\\frac{d^2}{p}\\ln \\frac{1}{\\epsilon})$ bits of memory, and make $\\mathcal O((C\\frac{d}{p}\\ln \\frac{1}{\\epsilon})^p)$ oracle calls. The family is parametrized by $p\\in[d]$ and provides an oracle-complexity/memory trade-off in the sub-polynomial regime $\\ln\\frac{1}{\\epsilon}\\gg\\ln d$. While several works gave lower-bound trade-offs (impossibility results)---we explicit here their dependence with $\\ln\\frac{1}{\\epsilon}$, showing that these also hold in any sub-polynomial regime---to the best of our knowledge this is the first class of algorithms that provides a positive trade-off between gradient descent and cutting-plane methods in any regime with $\\epsilon\\leq 1/\\sqrt d$. The algorithms divide the $d$ variables into $p$ blocks and optimize over blocks sequentially, with approximate separation vectors constructed using a variant of Vaidya's method. In the regime $\\epsilon \\leq d^{-\\Omega(d)}$, our algorithm with $p=d$ achieves the information-theoretic optimal memory usage and improves the oracle-complexity of gradient descent.", "keywords": "Convex optimization;feasibility problem;first-order methods;memory constraints;cutting planes;oracle complexity", "primary_area": "", "supplementary_material": "/attachment/550644c96c64e5d0bdd40a09af3ddb8cc0edafb5.pdf", "author": "Moise Blanchard;Junhui Zhang;Patrick Jaillet", "authorids": "~Moise_Blanchard1;~Junhui_Zhang1;~Patrick_Jaillet1", "gender": "M;;M", "homepage": "https://moiseb.github.io/;;http://web.mit.edu/jaillet/www/", "dblp": "304/2559;;https://dblp.uni-trier.de/pers/hd/j/Jaillet:Patrick", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;ND0FM6EAAAAJ", "orcid": ";;0000-0002-8585-6566", "linkedin": ";;patrick-jaillet-1260445/", "or_profile": "~Moise_Blanchard1;~Junhui_Zhang1;~Patrick_Jaillet1", "aff": "Massachusetts Institute of Technology;;Massachusetts Institute of Technology", "aff_domain": "mit.edu;;mit.edu", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nblanchard2023memoryconstrained,\ntitle={Memory-Constrained Algorithms for Convex Optimization},\nauthor={Moise Blanchard and Junhui Zhang and Patrick Jaillet},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mkKQr56xdB}\n}", "github": "", "project": "", "reviewers": "MixU;U1uo;6ZsG;snNf;H4w2;LsGk", "pdf_size": 624138, "rating": "6;6;6;6;7;7", "confidence": "4;2;2;3;3;3", "soundness": "3;3;4;4;3;4", "novelty": "3;3;4;3;3;3", "presentation": "3;3;4;3;2;4", "wc_summary": "115;164;90;143;205;55", "wc_strengths": "23;30;35;34;60;95", "wc_weaknesses": "48;18;33;337;64;169", "wc_questions": "160;38;1;102;19;51", "wc_limitations": "15;9;2;9;1;31", "wc_review": "361;259;161;625;349;401", "wc_reply_reviewers": "0;67;0;462;9;44", "wc_reply_authors": "0;0;0;291;0;0", "reply_reviewers": "0;1;0;1;1;1", "reply_authors": "1;1;1;2;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.8333333333333335, 0.6871842709362768 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.1666666666666665, 0.3726779962499649 ], "presentation_avg": [ 3.1666666666666665, 0.6871842709362768 ], "wc_summary_avg": [ 128.66666666666666, 48.97164712588522 ], "wc_strengths_avg": [ 46.166666666666664, 24.653712814818697 ], "wc_weaknesses_avg": [ 111.5, 112.06359801469878 ], "wc_questions_avg": [ 61.833333333333336, 53.96114857520691 ], "wc_limitations_avg": [ 11.166666666666666, 10.040196987886022 ], "wc_review_avg": [ 359.3333333333333, 142.4940544568634 ], "wc_reply_reviewers_avg": [ 97.0, 165.07372090473194 ], "wc_reply_authors_avg": [ 48.5, 108.4492969087398 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.17149858514250882, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15277479786322434434&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "mit.edu;;mit.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "FIND: A Function Description Benchmark for Evaluating Interpretability Methods", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73478", "id": "mkSDXjX6EM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ef0164c1112f56246224af540857348f-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=mkSDXjX6EM", "openreview": "https://openreview.net/forum?id=mkSDXjX6EM", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73478", "video": "https://nips.cc/virtual/2023/poster/73478", "author_site": "Sarah Schwettmann, Tamar Shaham, Joanna Materzynska, Neil Chowdhury, Shuang Li, Jacob Andreas, David Bau, Antonio Torralba", "tldr": "", "abstract": "Labeling neural network submodules with human-legible descriptions is useful for many downstream tasks: such descriptions can surface failures, guide interventions, and perhaps even explain important model behaviors. To date, most mechanistic descriptions of trained networks have involved small models, narrowly delimited phenomena, and large amounts of human labor. Labeling all human-interpretable sub-computations in models of increasing size and complexity will almost certainly require tools that can generate and validate descriptions automatically. Recently, techniques that use learned models in-the-loop for labeling have begun to gain traction, but methods for evaluating their efficacy are limited and ad-hoc. How should we validate and compare open-ended labeling tools? This paper introduces FIND (Function INterpretation and Description), a benchmark suite for evaluating the building blocks of automated interpretability methods. FIND contains functions that resemble components of trained neural networks, and accompanying descriptions of the kind we seek to generate. The functions are procedurally constructed across textual and numeric domains, and involve a range of real-world complexities, including noise, composition, approximation, and bias. We evaluate methods that use pretrained language models (LMs) to produce code-based and natural language descriptions of function behavior. Additionally, we introduce a new interactive method in which an Automated Interpretability Agent (AIA) generates function descriptions. We find that an AIA, built with an off-the-shelf LM augmented with black-box access to functions, can sometimes infer function structure\u2014acting as a scientist by forming hypotheses, proposing experiments, and updating descriptions in light of new data. However, FIND also reveals that LM-based descriptions capture global function behavior while missing local details. These results suggest that FIND will be useful for characterizing the performance of more sophisticated interpretability methods before they are applied to real-world models.", "keywords": "Interpretability;Explainability;Dataset;Benchmark;LLMs;LM Agents;Language Model", "primary_area": "", "supplementary_material": "", "author": "Sarah Schwettmann;Tamar Rott Shaham;Joanna Materzynska;Neil Chowdhury;Shuang Li;Jacob Andreas;David Bau;Antonio Torralba", "authorids": "~Sarah_Schwettmann2;~Tamar_Rott_Shaham1;~Joanna_Materzynska1;~Neil_Chowdhury1;~Shuang_Li5;~Jacob_Andreas1;~David_Bau1;~Antonio_Torralba1", "gender": "F;F;F;M;;M;M;M", "homepage": ";https://tamarott.github.io/;https://joaanna.github.io/;;;http://web.mit.edu/jda/www;https://baulab.info/;http://web.mit.edu/torralba/www//", "dblp": ";185/7904;191/4638;;;97/8154;47/3614;t/AntonioBTorralba", "google_scholar": ";https://scholar.google.co.il/citations?user=YRJ-ePMAAAAJ;kxqgE9cAAAAJ;-KN22JEAAAAJ;;dnZ8udEAAAAJ;CYI6cKgAAAAJ;https://scholar.google.com.tw/citations?user=8cxDHS4AAAAJ", "orcid": "0000-0001-6385-1396;;;0000-0002-0590-993X;;;0000-0003-1744-6765;", "linkedin": ";;;;;;david-bau-4b8130/;", "or_profile": "~Sarah_Schwettmann2;~Tamar_Rott_Shaham1;~Joanna_Materzynska1;~Neil_Chowdhury1;~Shuang_Li5;~Jacob_Andreas1;~David_Bau1;~Antonio_Torralba1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;;Microsoft;Northeastern University;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;;microsoft.com;northeastern.edu;mit.edu", "position": "Postdoc;Postdoc;PhD student;Undergrad student;;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nschwettmann2023find,\ntitle={{FIND}: A Function Description Benchmark for Evaluating Interpretability Methods},\nauthor={Sarah Schwettmann and Tamar Rott Shaham and Joanna Materzynska and Neil Chowdhury and Shuang Li and Jacob Andreas and David Bau and Antonio Torralba},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=mkSDXjX6EM}\n}", "github": "", "project": "", "reviewers": "FzK6;fw4P;gaMy;C5TS;riiN", "pdf_size": 10595838, "rating": "4;6;7;7;8", "confidence": "3;4;4;3;3", "wc_summary_and_contributions": "192;45;119;173;169", "wc_strengths": "51;93;82;121;16", "wc_improvement": "134;686;44;25;11", "wc_limitations": "56;21;7;131;10", "wc_correctness": "47;14;1;12;7", "wc_clarity": "17;1;5;22;9", "wc_relation_to_prior_work": "1;34;12;1;24", "wc_documentation": "18;6;11;5;7", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "517;901;282;491;254", "wc_reply_reviewers": "94;43;0;19;67", "wc_reply_authors": "999;1169;75;268;49", "reply_reviewers": "1;1;0;1;1", "reply_authors": "4;3;1;1;1", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 139.6, 53.10216568088349 ], "wc_strengths_avg": [ 72.6, 36.10318545502599 ], "wc_improvement_avg": [ 180.0, 256.60631325047325 ], "wc_limitations_avg": [ 45.0, 46.39396512478751 ], "wc_correctness_avg": [ 16.2, 16.04244370412438 ], "wc_clarity_avg": [ 10.8, 7.704544113703289 ], "wc_relation_to_prior_work_avg": [ 14.4, 12.970736293672768 ], "wc_documentation_avg": [ 9.4, 4.758150901348127 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 489.0, 231.7783423877218 ], "wc_reply_reviewers_avg": [ 44.6, 33.434114314573975 ], "wc_reply_authors_avg": [ 512.0, 476.1705576786536 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.0, 1.2649110640673518 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.06019292654288467, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15037132774725035935&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "mit.edu;mit.edu;mit.edu;mit.edu;;microsoft.com;northeastern.edu;mit.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;1;2;0", "aff_unique_norm": "Massachusetts Institute of Technology;Microsoft;Northeastern University", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://web.mit.edu;https://www.microsoft.com;https://www.northeastern.edu", "aff_unique_abbr": "MIT;Microsoft;NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Robust Mean Estimation Without Moments for Symmetric Distributions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70540", "id": "mkve1raJUc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6c59ace4fc4872a14df13d91762ad4f0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mkve1raJUc", "openreview": "https://openreview.net/forum?id=mkve1raJUc", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70540", "video": "https://nips.cc/virtual/2023/poster/70540", "author_site": "Gleb Novikov, Gleb Novikov, David Steurer, Stefan Tiegel", "tldr": "", "abstract": "We study the problem of robustly estimating the mean or location parameter without moment assumptions.\nKnown computationally efficient algorithms rely on strong distributional assumptions, such as sub-Gaussianity, or (certifiably) bounded moments.\nMoreover, the guarantees that they achieve in the heavy-tailed setting are weaker than those for sub-Gaussian distributions with known covariance.\nIn this work, we show that such a tradeoff, between error guarantees and heavy-tails, is not necessary for symmetric distributions.\nWe show that for a large class of symmetric distributions, the same error as in the Gaussian setting can be achieved efficiently.\nThe distributions we study include products of arbitrary symmetric one-dimensional distributions, such as product Cauchy distributions, as well as elliptical distributions, \na vast generalization of the Gaussian distribution.\n\nFor product distributions and elliptical distributions with known scatter (covariance) matrix, we show that given an $\\varepsilon$-corrupted sample, we can with probability at least $1-\\delta$ estimate its location up to error $O(\\varepsilon \\sqrt{\\log(1/\\varepsilon)})$ using $\\tfrac{d\\log(d) + \\log(1/\\delta)}{\\varepsilon^2 \\log(1/\\varepsilon)}$ samples.\nThis result matches the best-known guarantees for the Gaussian distribution and known SQ lower bounds (up to the $\\log(d)$ factor).\nFor elliptical distributions with unknown scatter (covariance) matrix, we propose a sequence of efficient algorithms that approaches this optimal error.\nSpecifically, for every $k \\in \\mathbb{N}$, we design an estimator using time and \nsamples $\\tilde{O}({d^k})$ achieving error $O(\\varepsilon^{1-\\frac{1}{2k}})$.\nThis matches the error and running time guarantees when assuming certifiably bounded moments of order up to $k$.\nFor unknown covariance, such error bounds of $o(\\sqrt{\\varepsilon})$ are not even known for (general) sub-Gaussian distributions.\n\nOur algorithms are based on a generalization of the well-known filtering technique [DK22].\nMore specifically, we show how this machinery can be combined with Huber-loss-based \ntechniques to work with projections of the noise that behave more nicely than the initial noise.\nMoreover, we show how sum-of-squares proofs can be used to obtain algorithmic guarantees even for distributions without a first moment.\nWe believe that this approach may find other applications in future works.", "keywords": "Robust Mean Estimation;Unbounded First Moment;Symmetric Distributions (Spherical;Elliptical;Product);Filtering Algorithm;Huber Loss", "primary_area": "", "supplementary_material": "/attachment/ef5f5f0d40592496b1376a087f7246f972b4268c.pdf", "author": "Gleb Novikov;David Steurer;Stefan Tiegel", "authorids": "~Gleb_Novikov1;~David_Steurer1;~Stefan_Tiegel1", "gender": ";;", "homepage": ";;https://stefantiegel.com", "dblp": "200/9864;;218/5553", "google_scholar": ";;https://scholar.google.ch/citations?user=WvpFkwsAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Gleb_Novikov1;~David_Steurer1;~Stefan_Tiegel1", "aff": "Department of Computer Science, Swiss Federal Institute of Technology;;Swiss Federal Institute of Technology", "aff_domain": "inf.ethz.ch;;ethz.ch", "position": "PhD student;;PhD student", "bibtex": "@inproceedings{\nnovikov2023robust,\ntitle={Robust Mean Estimation Without Moments for Symmetric Distributions},\nauthor={Gleb Novikov and David Steurer and Stefan Tiegel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mkve1raJUc}\n}", "github": "", "project": "", "reviewers": "DMaa;QF3Z;371F;6FtY", "pdf_size": 571456, "rating": "5;6;7;7", "confidence": "3;4;3;4", "soundness": "3;4;4;4", "novelty": "2;3;3;3", "presentation": "3;3;3;2", "wc_summary": "96;42;63;187", "wc_strengths": "101;73;52;115", "wc_weaknesses": "132;61;134;96", "wc_questions": "612;161;5;49", "wc_limitations": "1;1;5;3", "wc_review": "942;338;259;450", "wc_reply_reviewers": "240;10;29;10", "wc_reply_authors": "402;0;20;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 97.0, 55.41209254305417 ], "wc_strengths_avg": [ 85.25, 24.437420076595647 ], "wc_weaknesses_avg": [ 105.75, 29.93639089803579 ], "wc_questions_avg": [ 206.75, 240.7845250426198 ], "wc_limitations_avg": [ 2.5, 1.6583123951777 ], "wc_review_avg": [ 497.25, 265.59308631814946 ], "wc_reply_reviewers_avg": [ 72.25, 97.16062731374268 ], "wc_reply_authors_avg": [ 105.5, 171.3789660372591 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15987753612589742559&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "inf.ethz.ch;;ethz.ch", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Swiss Federal Institute of Technology", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Unleashing the Power of Randomization in Auditing Differentially Private ML", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70539", "id": "mlbes5TAAg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d09ef5264966e17adffd3157265c9946-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mlbes5TAAg", "openreview": "https://openreview.net/forum?id=mlbes5TAAg", "poster": "/media/PosterPDFs/NeurIPS%202023/70539.png?t=1699496559.04448", "slides": "https://nips.cc/virtual/2023/poster/70539", "video": "https://nips.cc/virtual/2023/poster/70539", "author_site": "Krishna Pillutla, Galen Andrew, Peter Kairouz, H. Brendan McMahan, Alina Oprea, Sewoong Oh", "tldr": "", "abstract": "We present a rigorous methodology for auditing differentially private machine learning by adding multiple carefully designed examples called canaries. We take a first principles approach based on three key components. First, we introduce Lifted Differential Privacy (LiDP) that expands the definition of differential privacy to handle randomized datasets. This gives us the freedom to design randomized canaries. Second, we audit LiDP by trying to distinguish between the model trained with $K$ canaries versus $K-1$ canaries in the dataset, leaving one canary out. By drawing the canaries i.i.d., LiDP can leverage the symmetry in the design and reuse each privately trained model to run multiple statistical tests, one for each canary. Third, we introduce novel confidence intervals that take advantage of the multiple test statistics by adapting to the empirical higher-order correlations. Together, this new recipe demonstrates significant improvements in sample complexity, both theoretically and empirically, using synthetic and real data. Further, recent advances in designing stronger canaries can be readily incorporated in the new framework.", "keywords": "Differential privacy auditing;multiple canaries;randomization;lifting;adaptive confidence intervals", "primary_area": "", "supplementary_material": "", "author": "Krishna Pillutla;Galen Andrew;Peter Kairouz;Hugh Brendan McMahan;Alina Oprea;Sewoong Oh", "authorids": "~Krishna_Pillutla1;~Galen_Andrew1;~Peter_Kairouz1;~Hugh_Brendan_McMahan1;~Alina_Oprea1;~Sewoong_Oh1", "gender": "M;M;M;M;F;M", "homepage": "https://krishnap25.github.io;;https://kairouzp.github.io/;;http://www.ccs.neu.edu/home/alina/;https://homes.cs.washington.edu/~sewoong/", "dblp": "173/5185.html;31/1971;129/1254;;35/3425;80/4366", "google_scholar": "IL7N6sMAAAAJ;;m8NUgw0AAAAJ;;https://scholar.google.com.tw/citations?user=16J3izoAAAAJ;55TAOdgAAAAJ", "orcid": ";;;;0000-0002-4979-5292;", "linkedin": ";;kayrouzp;;alina-oprea-9588bb1;", "or_profile": "~Krishna_Pillutla1;~Galen_Andrew1;~Peter_Kairouz1;~Hugh_Brendan_McMahan1;~Alina_Oprea1;~Sewoong_Oh1", "aff": "Google;Google;Google;Google;Google Research;University of Washington", "aff_domain": "google.com;google.com;google.com;google.com;google.com;uw.edu", "position": "Visiting Researcher;Researcher;Research Scientist;Research Scientist;Visiting Faculty;Associate Professor", "bibtex": "@inproceedings{\npillutla2023unleashing,\ntitle={Unleashing the Power of Randomization in Auditing Differentially Private {ML}},\nauthor={Krishna Pillutla and Galen Andrew and Peter Kairouz and Hugh Brendan McMahan and Alina Oprea and Sewoong Oh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mlbes5TAAg}\n}", "github": "", "project": "", "reviewers": "W8eW;jDkd;1JBM;2DXh", "pdf_size": 1214162, "rating": "5;7;7;8", "confidence": "3;4;5;4", "soundness": "3;4;3;4", "novelty": "2;3;3;4", "presentation": "3;3;4;4", "wc_summary": "69;132;475;282", "wc_strengths": "49;78;105;119", "wc_weaknesses": "117;104;58;55", "wc_questions": "63;4;357;184", "wc_limitations": "63;8;144;1", "wc_review": "361;326;1139;641", "wc_reply_reviewers": "146;20;0;16", "wc_reply_authors": "313;0;36;0", "reply_reviewers": "1;1;0;1", "reply_authors": "3;1;2;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 239.5, 156.43928534738325 ], "wc_strengths_avg": [ 87.75, 26.789690181112583 ], "wc_weaknesses_avg": [ 83.5, 27.408940147331492 ], "wc_questions_avg": [ 152.0, 134.9759237790207 ], "wc_limitations_avg": [ 54.0, 57.24071977185472 ], "wc_review_avg": [ 616.75, 325.2986128159787 ], "wc_reply_reviewers_avg": [ 45.5, 58.504273348192264 ], "wc_reply_authors_avg": [ 87.25, 131.16282819457652 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6488856845230502, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7998359361741544937&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "google.com;google.com;google.com;google.com;google.com;uw.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Google;University of Washington", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.washington.edu", "aff_unique_abbr": "Google;UW", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Language Quantized AutoEncoders: Towards Unsupervised Text-Image Alignment", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70538", "id": "mlxRLIy7kc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0df1738319f8c6e15b58cb16ea3cfa57-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mlxRLIy7kc", "openreview": "https://openreview.net/forum?id=mlxRLIy7kc", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70538", "video": "https://nips.cc/virtual/2023/poster/70538", "author_site": "Hao Liu, Wilson Yan, Pieter Abbeel", "tldr": "", "abstract": "Recent progress in scaling up large language models has shown impressive capabilities in performing few-shot learning across a wide range of natural language tasks. However, a key limitation is that these language models fundamentally lack grounding to visual perception - a crucial attribute needed to extend to real world tasks such as in visual-question answering and robotics. While prior works have largely connected image to text through pretraining or fine-tuning, learning such alignments are generally costly due to a combination of curating massive datasets and large computational burdens. In order to resolve these limitations, we propose a simple yet effective approach called Language-Quantized AutoEncoder (LQAE), a modification of VQ-VAE that learns to align text-image data in an unsupervised manner by leveraging pretrained language model denoisers (e.g., BERT). Our main idea is to encode images as sequences of text tokens by directly quantizing image embeddings using a pretrained language codebook. We then feed a masked version of the quantized embeddings into a BERT to reconstruct the original input. \nBy doing so, LQAE learns to represent similar images with similar clusters of text tokens, thereby aligning these two modalities without the use of aligned text-image pairs. We show LQAE learns text-aligned image tokens that enable few-shot multi-modal learning with large language models, outperforming baseline methods in tasks such as image classification and VQA while requiring as few as 1-10 image-text pairs.", "keywords": "Large Language Model;VQVAE;Vector Quantization;Multimodal", "primary_area": "", "supplementary_material": "/attachment/8cf6c82a0c1e1f2713f8449b71ff74e986bbe1d5.pdf", "author": "Hao Liu;Wilson Yan;Pieter Abbeel", "authorids": "~Hao_Liu1;~Wilson_Yan1;~Pieter_Abbeel2", "gender": "M;M;M", "homepage": "https://wilson1yan.github.io/;https://people.eecs.berkeley.edu/~pabbeel/;https://haoliu.ai", "dblp": ";;09/3214-55", "google_scholar": "tR2Qw0YAAAAJ;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;wtK4Yh4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Wilson_Yan1;~Pieter_Abbeel2;~Hao_Liu10", "aff": "University of California, Berkeley;Covariant;University of California, Berkeley", "aff_domain": "berkeley.edu;covariant.ai;berkeley.edu", "position": "PhD student;Founder;PhD student", "bibtex": "@inproceedings{\nliu2023language,\ntitle={Language Quantized AutoEncoders: Towards Unsupervised Text-Image Alignment},\nauthor={Hao Liu and Wilson Yan and Pieter Abbeel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mlxRLIy7kc}\n}", "github": "", "project": "", "reviewers": "Vpea;wNXC;Z64R;FHFJ", "pdf_size": 962469, "rating": "5;6;6;6", "confidence": "5;4;4;4", "soundness": "3;2;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "45;78;142;79", "wc_strengths": "48;58;111;55", "wc_weaknesses": "96;130;39;77", "wc_questions": "33;192;93;2", "wc_limitations": "4;76;6;8", "wc_review": "226;534;391;221", "wc_reply_reviewers": "140;404;30;15", "wc_reply_authors": "820;1162;41;33", "reply_reviewers": "2;2;1;1", "reply_authors": "3;4;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.0, 35.106979363083916 ], "wc_strengths_avg": [ 68.0, 25.089838580588754 ], "wc_weaknesses_avg": [ 85.5, 32.882366094914765 ], "wc_questions_avg": [ 80.0, 72.46723397508697 ], "wc_limitations_avg": [ 23.5, 30.343862641397518 ], "wc_review_avg": [ 343.0, 129.76709906598052 ], "wc_reply_reviewers_avg": [ 147.25, 155.89319260314096 ], "wc_reply_authors_avg": [ 514.0, 492.09501115130195 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5060984023238945473&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "berkeley.edu;covariant.ai;berkeley.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Berkeley;Covariant", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;", "aff_unique_abbr": "UC Berkeley;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "title": "A Causal Framework for Decomposing Spurious Variations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70537", "id": "mm9svgvwvk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6cda6dae05ae5e42ea78be85d5a26f77-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mm9svgvwvk", "openreview": "https://openreview.net/forum?id=mm9svgvwvk", "poster": "/media/PosterPDFs/NeurIPS%202023/70537.png?t=1702485921.1404607", "slides": "https://nips.cc/virtual/2023/poster/70537", "video": "https://nips.cc/virtual/2023/poster/70537", "author_site": "Drago Plecko, Elias Bareinboim", "tldr": "", "abstract": "One of the fundamental challenges found throughout the data sciences is to explain why things happen in specific ways, or through which mechanisms a certain variable $X$ exerts influences over another variable $Y$. In statistics and machine learning, significant efforts have been put into developing machinery to estimate correlations across variables efficiently. In causal inference, a large body of literature is concerned with the decomposition of causal effects under the rubric of mediation analysis. However, many variations are spurious in nature, including different phenomena throughout the applied sciences. Despite the statistical power to estimate correlations and the identification power to decompose causal effects, there is still little understanding of the properties of spurious associations and how they can be decomposed in terms of the underlying causal mechanisms. In this manuscript, we develop formal tools for decomposing spurious variations in both Markovian and Semi-Markovian models. We prove the first results that allow a non-parametric decomposition of spurious effects and provide sufficient conditions for the identification of such decompositions. The described approach has several applications, ranging from explainable and fair AI to questions in epidemiology and medicine, and we empirically demonstrate its use.", "keywords": "Causal Inference;Confounding;Fair and Explainable AI", "primary_area": "", "supplementary_material": "/attachment/0d900bea92a2f0076dfe8bef2d9752069a2d49f0.zip", "author": "Drago Plecko;Elias Bareinboim", "authorids": "~Drago_Plecko1;~Elias_Bareinboim2", "gender": "M;M", "homepage": "https://people.math.ethz.ch/~pleckod/;https://causalai.net", "dblp": "254/3058;85/9005", "google_scholar": ";r5U-D7YAAAAJ", "orcid": "0000-0002-5433-196X;", "linkedin": ";", "or_profile": "~Drago_Plecko1;~Elias_Bareinboim2", "aff": "Columbia University;Columbia University", "aff_domain": "cs.columbia.edu;columbia.edu", "position": "Postdoc;Associate Professor", "bibtex": "@inproceedings{\nplecko2023a,\ntitle={A Causal Framework for Decomposing Spurious Variations},\nauthor={Drago Plecko and Elias Bareinboim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mm9svgvwvk}\n}", "github": "", "project": "", "reviewers": "dHJ7;1quh;VtmC;EguC;G3e3", "pdf_size": 570748, "rating": "5;7;7;8;8", "confidence": "2;2;3;3;3", "soundness": "3;4;3;4;3", "novelty": "2;3;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "23;41;29;80;97", "wc_strengths": "35;28;25;83;81", "wc_weaknesses": "87;13;125;142;200", "wc_questions": "10;33;4;53;34", "wc_limitations": "8;23;4;44;45", "wc_review": "163;138;187;402;457", "wc_reply_reviewers": "11;0;12;56;0", "wc_reply_authors": "0;0;25;72;0", "reply_reviewers": "1;0;1;1;0", "reply_authors": "1;1;2;2;1", "rating_avg": [ 7.0, 1.0954451150103321 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 54.0, 29.257477676655586 ], "wc_strengths_avg": [ 50.4, 26.012304780622575 ], "wc_weaknesses_avg": [ 113.4, 62.01483693439821 ], "wc_questions_avg": [ 26.8, 17.769637024992942 ], "wc_limitations_avg": [ 24.8, 17.29045979724079 ], "wc_review_avg": [ 269.4, 132.78042024334763 ], "wc_reply_reviewers_avg": [ 15.8, 20.7499397589487 ], "wc_reply_authors_avg": [ 19.4, 28.025702488965376 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.74535599249993, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5413296321287660118&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cs.columbia.edu;columbia.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Optimizing Solution-Samplers for Combinatorial Problems: The Landscape of Policy-Gradient Method", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70536", "id": "mmTy1iyU5G", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2d950a2cfd8a75124c178a89545b97fd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mmTy1iyU5G", "openreview": "https://openreview.net/forum?id=mmTy1iyU5G", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70536", "video": "https://nips.cc/virtual/2023/poster/70536", "author_site": "Constantine Caramanis, Dimitris Fotakis, Alkis Kalavasis, Vasilis Kontonis, Christos Tzamos", "tldr": "", "abstract": "Deep Neural Networks and Reinforcement Learning methods have empirically shown great promise in tackling challenging combinatorial problems. In those methods a deep neural network is used as a solution generator which is then trained by gradient-based methods (e.g., policy gradient) to successively obtain better solution distributions.\nIn this work we introduce a novel theoretical framework for analyzing the effectiveness of such methods. We ask whether there exist generative models that (i) are expressive enough to generate approximately optimal solutions; (ii) have a tractable, i.e, polynomial in the size of the input, number of parameters; (iii) their optimization landscape is benign in the sense that it does not contain sub-optimal stationary points. Our main contribution is a positive answer to this question. Our result holds for a broad class of combinatorial problems including Max- and Min-Cut, Max-$k$-CSP, Maximum-Weight-Bipartite-Matching, and the Traveling Salesman Problem. As a byproduct of our analysis we introduce a novel regularization process over vanilla gradient descent and provide theoretical and experimental evidence that it helps address vanishing-gradient issues and escape bad stationary points.", "keywords": "Policy Gradient;Combinatorial Optimization;Gradient Descent", "primary_area": "", "supplementary_material": "/attachment/dda6ab3226bdb4f4616896a90218a06d87097c28.zip", "author": "Constantine Caramanis;Dimitris Fotakis;Alkis Kalavasis;Vasilis Kontonis;Christos Tzamos", "authorids": "~Constantine_Caramanis1;~Dimitris_Fotakis1;~Alkis_Kalavasis1;~Vasilis_Kontonis1;~Christos_Tzamos1", "gender": "M;M;M;M;", "homepage": "http://users.ece.utexas.edu/~cmcaram/constantine_caramanis/Home.html;http://www.softlab.ntua.gr/~fotakis/;https://alkisk.github.io/;http://vkonton.github.io/;https://tzamos.com", "dblp": "96/5760;95/4731;269/9425;203/8777;79/8819", "google_scholar": "47YTUrEAAAAJ;zFDLf0UAAAAJ;NgVIFJwAAAAJ;7_44KWAAAAAJ;wB01auEAAAAJ", "orcid": ";0000-0001-6864-8960;;;", "linkedin": ";;;;", "or_profile": "~Constantine_Caramanis1;~Dimitris_Fotakis1;~Alkis_Kalavasis1;~Vasilis_Kontonis1;~Christos_Tzamos1", "aff": "University of Texas, Austin;National Technical University of Athens;National Technical University of Athens;, University of Texas at Austin;University of Wisconsin, Madison", "aff_domain": "utexas.edu;ntua.gr;ntua.gr;cs.utexas.edu;wisc.edu", "position": "Full Professor;Full Professor;PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\ncaramanis2023optimizing,\ntitle={Optimizing Solution-Samplers for Combinatorial Problems: The Landscape of Policy-Gradient Method},\nauthor={Constantine Caramanis and Dimitris Fotakis and Alkis Kalavasis and Vasilis Kontonis and Christos Tzamos},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mmTy1iyU5G}\n}", "github": "", "project": "", "reviewers": "m1vy;VMa2;jXn4;dPn4", "pdf_size": 1917492, "rating": "6;7;7;10", "confidence": "4;4;4;3", "soundness": "3;4;3;3", "novelty": "4;4;4;4", "presentation": "3;4;4;4", "wc_summary": "91;80;256;141", "wc_strengths": "44;65;169;97", "wc_weaknesses": "646;88;96;5", "wc_questions": "283;37;175;1", "wc_limitations": "88;24;1;5", "wc_review": "1152;294;697;249", "wc_reply_reviewers": "33;0;20;18", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 4.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 142.0, 69.71728623519421 ], "wc_strengths_avg": [ 93.75, 47.36757857437933 ], "wc_weaknesses_avg": [ 208.75, 254.94840164237155 ], "wc_questions_avg": [ 124.0, 112.44998888394787 ], "wc_limitations_avg": [ 29.5, 34.87477598494362 ], "wc_review_avg": [ 598.0, 364.3260901994256 ], "wc_reply_reviewers_avg": [ 17.75, 11.755317945508747 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9622504486493763, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13305506768451509278&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "utexas.edu;ntua.gr;ntua.gr;cs.utexas.edu;wisc.edu", "author_num": 5, "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "University of Texas at Austin;National Technical University of Athens;University of Wisconsin", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://www.ntua.gr;https://www.wisc.edu", "aff_unique_abbr": "UT Austin;NTUA;UW", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Austin;;Madison", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United States;Greece" }, { "title": "Transient Neural Radiance Fields for Lidar View Synthesis and 3D Reconstruction", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70535", "id": "mmmd2vp0n0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e261e92e1cfb820da930ad8c38d0aead-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mmmd2vp0n0", "openreview": "https://openreview.net/forum?id=mmmd2vp0n0", "poster": "/media/PosterPDFs/NeurIPS%202023/70535.png?t=1702333193.5536785", "slides": "https://nips.cc/virtual/2023/poster/70535", "video": "https://nips.cc/virtual/2023/poster/70535", "author_site": "Anagh Malik, Parsa Mirdehghan, Sotiris Nousias, Kyros Kutulakos, David Lindell", "tldr": "", "abstract": "Neural radiance fields (NeRFs) have become a ubiquitous tool for modeling scene appearance and geometry from multiview imagery. Recent work has also begun to explore how to use additional supervision from lidar or depth sensor measurements in the NeRF framework. However, previous lidar-supervised NeRFs focus on rendering conventional camera imagery and use lidar-derived point cloud data as auxiliary supervision; thus, they fail to incorporate the underlying image formation model of the lidar. Here, we propose a novel method for rendering transient NeRFs that take as input the raw, time-resolved photon count histograms measured by a single-photon lidar system, and we seek to render such histograms from novel views. Different from conventional NeRFs, the approach relies on a time-resolved version of the volume rendering equation to render the lidar measurements and capture transient light transport phenomena at picosecond timescales. We evaluate our method on a first-of-its-kind dataset of simulated and captured transient multiview scans from a prototype single-photon lidar. Overall, our work brings NeRFs to a new dimension of imaging at transient timescales, newly enabling rendering of transient imagery from novel views. Additionally, we show that our approach recovers improved geometry and conventional appearance compared to point cloud-based supervision when training on few input viewpoints. Transient NeRFs may be especially useful for applications which seek to simulate raw lidar measurements for downstream tasks in autonomous driving, robotics, and remote sensing.", "keywords": "neural radiance fields;3D reconstruction;single-photon lidar;computational imaging", "primary_area": "", "supplementary_material": "/attachment/5ef701db8fa940b8c206172914a0f3cbfe723e97.pdf", "author": "Anagh Malik;Parsa Mirdehghan;Sotiris Nousias;Kyros Kutulakos;David B. Lindell", "authorids": "~Anagh_Malik2;~Parsa_Mirdehghan1;~Sotiris_Nousias2;~Kyros_Kutulakos1;~David_B._Lindell1", "gender": "M;M;M;M;M", "homepage": "https://anaghmalik.github.io;https://www.cs.toronto.edu/~parsa/;;https://www.davidlindell.com/;http://www.cs.toronto.edu/~kyros/", "dblp": "307/3214;230/1343;205/2687;170/2608;64/4875", "google_scholar": ";WwLfOqIAAAAJ;https://scholar.google.ca/citations?user=BIFpV5UAAAAJ;_m-BTtAAAAAJ;https://scholar.google.ca/citations?user=bQJWJPYAAAAJ", "orcid": ";;;;0000-0002-5165-902X", "linkedin": "anaghmalik/;;;;kyros-kutulakos-10b123210", "or_profile": "~Anagh_Malik2;~Parsa_Mirdehghan1;~Sotiris_Nousias2;~David_B._Lindell1;~Kiriakos_N_Kutulakos1", "aff": "University of Toronto;Department of Computer Science, University of Toronto;University of Toronto;University of Toronto;University of Toronto", "aff_domain": "utoronto.ca;cs.toronto.edu;utoronto.ca;cs.toronto.edu;cs.toronto.edu", "position": "PhD student;PhD student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nmalik2023transient,\ntitle={Transient Neural Radiance Fields for Lidar View Synthesis and 3D Reconstruction},\nauthor={Anagh Malik and Parsa Mirdehghan and Sotiris Nousias and Kyros Kutulakos and David B. Lindell},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mmmd2vp0n0}\n}", "github": "", "project": "", "reviewers": "tsWB;Zi1e;kaKw;vJ74", "pdf_size": 2625724, "rating": "4;5;7;9", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "3;2;3;4", "presentation": "3;4;3;4", "wc_summary": "119;301;70;183", "wc_strengths": "103;91;71;113", "wc_weaknesses": "292;76;20;52", "wc_questions": "142;55;21;29", "wc_limitations": "25;1;1;7", "wc_review": "681;524;183;384", "wc_reply_reviewers": "526;29;8;0", "wc_reply_authors": "352;15;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;1;1", "rating_avg": [ 6.25, 1.920286436967152 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 168.25, 86.48518659284953 ], "wc_strengths_avg": [ 94.5, 15.644487847162015 ], "wc_weaknesses_avg": [ 110.0, 106.9392350823588 ], "wc_questions_avg": [ 61.75, 48.0071609241788 ], "wc_limitations_avg": [ 8.5, 9.836157786453 ], "wc_review_avg": [ 443.0, 183.22527118277108 ], "wc_reply_reviewers_avg": [ 140.75, 222.67619428219083 ], "wc_reply_authors_avg": [ 91.75, 150.38014330356253 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12762793492943666813&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "utoronto.ca;cs.toronto.edu;utoronto.ca;cs.toronto.edu;cs.toronto.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Efficient Sampling of Stochastic Differential Equations with Positive Semi-Definite Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70534", "id": "mookk2nLO9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cab5ae2704d3e01f06a92512a5376b87-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mookk2nLO9", "openreview": "https://openreview.net/forum?id=mookk2nLO9", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70534", "video": "https://nips.cc/virtual/2023/poster/70534", "author_site": "Anant Raj, Umut Simsekli, Alessandro Rudi", "tldr": "", "abstract": "This paper deals with the problem of efficient sampling from a stochastic differential equation, given the drift function and the diffusion matrix. The proposed approach leverages a recent model for probabilities (Rudi and Ciliberto, 2021) (the positive semi-definite -- PSD model) from which it is possible to obtain independent and identically distributed (i.i.d.) samples at precision $\\varepsilon$ with a cost that is $m^2 d \\log(1/\\varepsilon)$ where $m$ is the dimension of the model, $d$ the dimension of the space. The proposed approach consists in: first, computing the PSD model that satisfies the Fokker-Planck equation (or its fractional variant) associated with the SDE, up to error $\\varepsilon$, and then sampling from the resulting PSD model. Assuming some regularity of the Fokker-Planck solution (i.e. $\\beta$-times differentiability plus some geometric condition on its zeros) We obtain an algorithm that: (a) in the preparatory phase obtains a PSD model with L2 distance $\\varepsilon$ from the solution of the equation, with a model of dimension $m = \\varepsilon^{-(d+1)/(\\beta-2s)} (\\log(1/\\varepsilon))^{d+1}$ where $1/2\\leq s\\leq1$ is the fractional power to the Laplacian, and total computational complexity of $O(m^{3.5} \\log(1/\\varepsilon))$ and then (b) for Fokker-Planck equation, it is able to produce i.i.d.\\ samples with error $\\varepsilon$ in Wasserstein-1 distance, with a cost that is $O(d \\varepsilon^{-2(d+1)/\\beta-2} \\log(1/\\varepsilon)^{2d+3})$ per sample. This means that, if the probability associated with the SDE is somewhat regular, i.e. $\\beta \\geq 4d+2$, then the algorithm requires $O(\\varepsilon^{-0.88} \\log(1/\\varepsilon)^{4.5d})$ in the preparatory phase, and $O(\\varepsilon^{-1/2}\\log(1/\\varepsilon)^{2d+2})$ for each sample. Our results suggest that as the true solution gets smoother, we can circumvent the curse of dimensionality without requiring any sort of convexity.", "keywords": "Kernel Methods;Sampling;Fokker-Planck Equation;Fractional Fokker-Planck Equation;Stochastic Differential Equations;Partial Differential Equations", "primary_area": "", "supplementary_material": "/attachment/2464cc017a20571db1fc48ffed3b444a1bd886ce.pdf", "author": "Anant Raj;Umut Simsekli;Alessandro Rudi", "authorids": "~Anant_Raj2;~Umut_Simsekli1;~Alessandro_Rudi1", "gender": ";M;", "homepage": ";https://www.di.ens.fr/~simsekli/;http://www.di.ens.fr/~rudi/", "dblp": ";https://dblp.org/pers/s/Simsekli:Umut.html;63/9170", "google_scholar": ";https://scholar.google.fr/citations?user=CuArAkgAAAAJ;EL-7KFsAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Anant_Raj2;~Umut_Simsekli1;~Alessandro_Rudi1", "aff": ";INRIA;\u00c9cole Normale Sup\u00e9rieure, Paris", "aff_domain": ";inria.fr;ens.fr", "position": ";Research Faculty;Associate Professor", "bibtex": "@inproceedings{\nraj2023efficient,\ntitle={Efficient Sampling of Stochastic Differential Equations with Positive Semi-Definite Models},\nauthor={Anant Raj and Umut Simsekli and Alessandro Rudi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mookk2nLO9}\n}", "github": "", "project": "", "reviewers": "biEz;jfmV;T6Bn;oceV", "pdf_size": 611264, "rating": "3;6;7;7", "confidence": "2;2;3;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "1;3;3;2", "wc_summary": "45;114;90;312", "wc_strengths": "24;94;36;60", "wc_weaknesses": "103;152;16;587", "wc_questions": "28;24;197;38", "wc_limitations": "1;68;1;17", "wc_review": "201;452;340;1014", "wc_reply_reviewers": "263;0;62;28", "wc_reply_authors": "689;125;133;35", "reply_reviewers": "1;0;1;1", "reply_authors": "4;3;4;3", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 140.25, 102.20659225314188 ], "wc_strengths_avg": [ 53.5, 26.7348087705897 ], "wc_weaknesses_avg": [ 214.5, 220.5090701082384 ], "wc_questions_avg": [ 71.75, 72.49267204345554 ], "wc_limitations_avg": [ 21.75, 27.48977082479954 ], "wc_review_avg": [ 501.75, 308.8238778009239 ], "wc_reply_reviewers_avg": [ 88.25, 103.25302659002301 ], "wc_reply_authors_avg": [ 245.5, 258.93000984822135 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6897007348075542, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9400083474807839732&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";inria.fr;ens.fr", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "INRIA;\u00c9cole Normale Sup\u00e9rieure", "aff_unique_dep": ";", "aff_unique_url": "https://www.inria.fr;https://www.ens.fr", "aff_unique_abbr": "INRIA;ENS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "SUPA: A Lightweight Diagnostic Simulator for Machine Learning in Particle Physics", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73477", "id": "msWIK6SKBK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cca79c22037280d066fbd8bc35ac2e72-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=msWIK6SKBK", "openreview": "https://openreview.net/forum?id=msWIK6SKBK", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73477", "video": "https://nips.cc/virtual/2023/poster/73477", "author_site": "Atul Kumar Sinha, Daniele Paliotta, B\u00e1lint M\u00e1t\u00e9, John Raine, Tobias Golling, Fran\u00e7ois Fleuret", "tldr": "", "abstract": "Deep learning methods have gained popularity in high energy physics for fast modeling of particle showers in detectors. Detailed simulation frameworks such as the gold standard \\textsc{Geant4} are computationally intensive, and current deep generative architectures work on discretized, lower resolution versions of the detailed simulation. The development of models that work at higher spatial resolutions is currently hindered by the complexity of the full simulation data, and by the lack of simpler, more interpretable benchmarks. Our contribution is \\textsc{SUPA}, the SUrrogate PArticle propagation simulator, an algorithm and software package for generating data by simulating simplified particle propagation, scattering and shower development in matter. The generation is extremely fast and easy to use compared to \\textsc{Geant4}, but still exhibits the key characteristics and challenges of the detailed simulation. The proposed simulator generates thousands of particle showers per second on a desktop machine, a speed up of up to 6 orders of magnitudes over \\textsc{Geant4}, and stores detailed geometric information about the shower propagation. \\textsc{\\textsc{SUPA}} provides much greater flexibility for setting initial conditions and defining multiple benchmarks for the development of models. Moreover, interpreting particle showers as point clouds creates a connection to geometric machine learning and provides challenging and fundamentally new datasets for the field.", "keywords": "HEP;Point Clouds;Generative Models;Simulation;Synthetic dataset;Calorimeter", "primary_area": "", "supplementary_material": "/attachment/e0885bfa416caa4ce49c6a9ab7bd5e9c05339e01.pdf", "author": "Atul Kumar Sinha;Daniele Paliotta;B\u00e1lint M\u00e1t\u00e9;John Andrew Raine;Tobias Golling;Fran\u00e7ois Fleuret", "authorids": "~Atul_Kumar_Sinha1;~Daniele_Paliotta1;~B\u00e1lint_M\u00e1t\u00e91;~John_Andrew_Raine1;~Tobias_Golling1;~Fran\u00e7ois_Fleuret2", "gender": "M;M;;M;M;M", "homepage": ";https://danielepaliotta.com;https://balintmate.github.io;;https://www.unige.ch/dpnc/en/groups/tobias-golling/home/;https://fleuret.org/francois/", "dblp": "166/1635;314/5880;301/7700;;;90/5265", "google_scholar": ";_xugfIEAAAAJ;;;;https://scholar.google.ch/citations?user=Bj1tRlsAAAAJ", "orcid": ";;;0000-0002-5987-4648;;0000-0001-9457-7393", "linkedin": "atulkumarin/;;;;;francois-fleuret/", "or_profile": "~Atul_Kumar_Sinha1;~Daniele_Paliotta1;~B\u00e1lint_M\u00e1t\u00e91;~John_Andrew_Raine1;~Tobias_Golling1;~Francois_Fleuret1", "aff": "University of Geneva;University of Geneva;Microsoft;University of Geneva, Switzerland;University of Genoa;University of Geneva", "aff_domain": "unige.ch;unige.ch;microsoft.com;unige.ch;unige.ch;unige.ch", "position": "PhD student;PhD student;Intern;Ma\u00eetre Assistant;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsinha2023supa,\ntitle={{SUPA}: A Lightweight Diagnostic Simulator for Machine Learning in Particle Physics},\nauthor={Atul Kumar Sinha and Daniele Paliotta and B{\\'a}lint M{\\'a}t{\\'e} and John Andrew Raine and Tobias Golling and Fran{\\c{c}}ois Fleuret},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=msWIK6SKBK}\n}", "github": "", "project": "", "reviewers": "q153;XfFU;hC4c;Y4mj;fTbf", "pdf_size": 1858531, "rating": "4;5;5;7;9", "confidence": "4;3;4;3;4", "wc_summary_and_contributions": "66;56;80;37;37", "wc_strengths": "22;27;42;89;80", "wc_improvement": "1;144;39;118;51", "wc_limitations": "134;32;159;17;25", "wc_correctness": "14;32;25;16;11", "wc_clarity": "1;5;76;15;5", "wc_relation_to_prior_work": "1;18;8;34;1", "wc_documentation": "19;17;4;15;29", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "259;332;434;342;240", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "305;259;415;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;0;0", "rating_avg": [ 6.0, 1.7888543819998317 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 55.2, 16.702095676890373 ], "wc_strengths_avg": [ 52.0, 27.488179277645873 ], "wc_improvement_avg": [ 70.6, 52.65206548655048 ], "wc_limitations_avg": [ 73.4, 60.39403944099119 ], "wc_correctness_avg": [ 19.6, 7.761443164772902 ], "wc_clarity_avg": [ 20.4, 28.18226392609366 ], "wc_relation_to_prior_work_avg": [ 12.4, 12.467557900406959 ], "wc_documentation_avg": [ 16.8, 8.009993757800315 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 321.4, 68.89876631696681 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 195.8, 167.71571184596868 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.6, 0.48989794855663565 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17253983988536741852&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "unige.ch;unige.ch;microsoft.com;unige.ch;unige.ch;unige.ch", "author_num": 6, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "University of Geneva;Microsoft;University of Genoa", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.unige.ch;https://www.microsoft.com;https://www.unige.it", "aff_unique_abbr": "UNIGE;Microsoft;UniGe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;2;0", "aff_country_unique": "Switzerland;United States;Italy" }, { "id": "muFvu66v7u", "title": "DP-SGD Without Clipping: The Lipschitz Neural Network Way", "track": "main", "status": "Reject", "tldr": "", "abstract": "State-of-the-art approaches for training Differentially Private (DP) Deep Neural Networks (DNN) faces difficulties to estimate tight bounds on the sensitivity of the network's layers, and instead rely on a process of per-sample gradient clipping. This clipping process not only biases the direction of gradients but also proves costly both in memory consumption and in computation cost. To provide sensitivity bounds and bypass the drawbacks of the clipping process, our theoretical analysis of Lipschitz constrained networks reveals an unexplored link between the Lipschitz constant with respect to their input and the one with respect to their parameters. By bounding the Lipschitz constant of each layer with respect to its parameters we guarantee DP training of these networks. This analysis not only allows computing the aforementioned sensitivities at scale but also provides leads on to how maximize the gradient to noise ratio for fixed privacy guarantees. To facilitate the application of Lipschitz networks and foster robust and certifiable learning under privacy guarantees, we provide a Python package that implements building blocks allowing the construction and private training of such networks.", "keywords": "privacy;lipschitz;dpsgd;gnp;neural;framework", "primary_area": "", "supplementary_material": "/attachment/1425a6443b0737b30a34799f53e86d9541b8672d.zip", "author": "Louis B\u00e9thune;Thomas Massena;Thibaut Boissin;Corentin Friedrich;Yannick Prudent;Franck Mamalet;Aur\u00e9lien Bellet;Mathieu Serrurier;David Vigouroux", "authorids": "~Louis_B\u00e9thune1;~Thomas_Massena1;~Thibaut_Boissin1;~Corentin_Friedrich1;~Yannick_Prudent1;~Franck_Mamalet2;~Aur\u00e9lien_Bellet1;~Mathieu_Serrurier1;~David_Vigouroux1", "gender": "M;M;M;M;M;M;;M;", "homepage": "https://louis-bethune.fr/;;;;;https://www.researchgate.net/profile/Franck-Mamalet;http://researchers.lille.inria.fr/abellet/;;", "dblp": "270/0797;;;258/6442;;15/6625;61/8017;30/2092;", "google_scholar": "1zvpCDcAAAAJ;n09aacYAAAAJ;zC-MstIAAAAJ;;;https://scholar.google.fr/citations?user=5C5p0osAAAAJ;https://scholar.google.fr/citations?user=j8svx3IAAAAJ;https://scholar.google.com/scholar?scilib=1;", "orcid": "0000-0003-1498-8251;;;;;;0000-0003-3440-1251;;", "linkedin": ";thomas-mass%C3%A9na-9240b5223/;;corentin-friedrich/;yannick-prudent/;franck-mamalet-0453a91b;;;", "or_profile": "~Louis_B\u00e9thune1;~Thomas_Massena1;~Thibaut_Boissin1;~Corentin_Friedrich1;~Yannick_Prudent1;~Franck_Mamalet2;~Aur\u00e9lien_Bellet1;~Mathieu_Serrurier1;~David_Vigouroux1", "aff": "Institut de Recherche en Informatique de Toulouse;Grenoble INP - Phelma, UGA;IRT Saint exup\u00e9ry;IRT Saint Exup\u00e9ry;IRT Saint-Exup\u00e9ry;IRT Saint Exupery;INRIA;university Paul Sabatier;", "aff_domain": "irit.fr;phelma.grenoble-inp.fr;irt-saintexupery.com;irt-saintexupery.com;irt-saintexupery.com;irt-saintexupery.com;inria.fr;irit.fr;", "position": "PhD student;MS student;Researcher;Researcher;Researcher;Researcher;Tenured researcher;Assistant Professor;", "bibtex": "@misc{\nb{\\'e}thune2023dpsgd,\ntitle={{DP}-{SGD} Without Clipping: The Lipschitz Neural Network Way},\nauthor={Louis B{\\'e}thune and Thomas Massena and Thibaut Boissin and Corentin Friedrich and Yannick Prudent and Franck Mamalet and Aur{\\'e}lien Bellet and Mathieu Serrurier and David Vigouroux},\nyear={2023},\nurl={https://openreview.net/forum?id=muFvu66v7u}\n}", "github": "", "project": "", "reviewers": "mBgX;Coyf;vJyY;6vik", "site": "https://openreview.net/forum?id=muFvu66v7u", "pdf_size": 4758530, "rating": "3;4;5;6", "confidence": "4;3;4;3", "soundness": "3;2;3;3", "novelty": "2;2;4;3", "presentation": "4;3;3;3", "wc_summary": "62;123;46;90", "wc_strengths": "19;16;84;38", "wc_weaknesses": "138;61;43;166", "wc_questions": "110;129;272;182", "wc_limitations": "11;5;9;1", "wc_review": "340;334;454;477", "wc_reply_reviewers": "830;129;120;121", "wc_reply_authors": "1238;330;221;276", "reply_reviewers": "5;1;1;3", "reply_authors": "5;2;2;3", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 80.25, 29.277764600460877 ], "wc_strengths_avg": [ 39.25, 27.178806081209675 ], "wc_weaknesses_avg": [ 102.0, 51.36633138545131 ], "wc_questions_avg": [ 173.25, 62.822667087604614 ], "wc_limitations_avg": [ 6.5, 3.840572873934304 ], "wc_review_avg": [ 401.25, 64.79728003550767 ], "wc_reply_reviewers_avg": [ 300.0, 306.01552248211203 ], "wc_reply_authors_avg": [ 516.25, 418.48080899845337 ], "reply_reviewers_avg": [ 2.5, 1.6583123951777 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.4472135954999579, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1811895221198899715&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 32, "aff_unique_index": "0;1;2;2;3;4;5;6", "aff_unique_norm": "Institut de Recherche en Informatique de Toulouse;Grenoble INP;IRT Saint Exup\u00e9ry;IRT Saint-Exup\u00e9ry;IRT Saint Exupery;INRIA;Paul Sabatier University", "aff_unique_dep": "Informatique;Phelma;;;;;", "aff_unique_url": "https://www.irit.fr;https://www.grenoble-inp.fr;;https://www.irt-saintexupery.com;;https://www.inria.fr;https://www.univ-toulouse1.fr", "aff_unique_abbr": "IRIT;GINP;;;;INRIA;UT1", "aff_campus_unique_index": "1", "aff_campus_unique": ";Grenoble", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "France" }, { "title": "Reliable Off-Policy Learning for Dosage Combinations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70533", "id": "muVKSb8gi5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d69103d7895f4e2083f24b664003d386-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=muVKSb8gi5", "openreview": "https://openreview.net/forum?id=muVKSb8gi5", "poster": "/media/PosterPDFs/NeurIPS%202023/70533.png?t=1701190332.0669198", "slides": "https://nips.cc/virtual/2023/poster/70533", "video": "https://nips.cc/virtual/2023/poster/70533", "author_site": "Jonas Schweisthal, Dennis Frauen, Valentyn Melnychuk, Stefan Feuerriegel", "tldr": "", "abstract": "Decision-making in personalized medicine such as cancer therapy or critical care must often make choices for dosage combinations, i.e., multiple continuous treatments. Existing work for this task has modeled the effect of multiple treatments independently, while estimating the joint effect has received little attention but comes with non-trivial challenges. In this paper, we propose a novel method for reliable off-policy learning for dosage combinations. Our method proceeds along three steps: (1) We develop a tailored neural network that estimates the individualized dose-response function while accounting for the joint effect of multiple dependent dosages. (2) We estimate the generalized propensity score using conditional normalizing flows in order to detect regions with limited overlap in the shared covariate-treatment space. (3) We present a gradient-based learning algorithm to find the optimal, individualized dosage combinations. Here, we ensure reliable estimation of the policy value by avoiding regions with limited overlap. We finally perform an extensive evaluation of our method to show its effectiveness. To the best of our knowledge, ours is the first work to provide a method for reliable off-policy learning for optimal dosage combinations.", "keywords": "off-policy learning;causal inference;reliable machine learning;medicine;dosaging;normalizing flows", "primary_area": "", "supplementary_material": "/attachment/55505dfba05eb7f0a5f4c707a9b1af6f152f6d47.zip", "author": "Jonas Schweisthal;Dennis Frauen;Valentyn Melnychuk;Stefan Feuerriegel", "authorids": "~Jonas_Schweisthal1;~Dennis_Frauen1;~Valentyn_Melnychuk1;~Stefan_Feuerriegel1", "gender": "M;M;M;M", "homepage": "https://www.som.lmu.de/ai/en/institute/contact-page/jonas-schweisthal-0f01481a.html;https://www.ai.bwl.uni-muenchen.de/team/research_team/dennis_frauen/index.html;https://valentyn1997.github.io/;http://www.ai.bwl.lmu.de", "dblp": "329/4240;315/0115;254/1513;125/0630", "google_scholar": "https://scholar.google.de/citations?user=GHpjcEsAAAAJ;ieyW4WQAAAAJ;EMExrOMAAAAJ;https://scholar.google.de/citations?hl=de", "orcid": "0000-0003-3725-3821;;0000-0002-2401-6803;0000-0001-7856-8729", "linkedin": ";dennis-frauen-6b5746171/;valentyn-melnychuk/;", "or_profile": "~Jonas_Schweisthal1;~Dennis_Frauen1;~Valentyn_Melnychuk1;~Stefan_Feuerriegel1", "aff": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;LMU Munich", "aff_domain": "lmu.de;lmu.de;lmu.de;lmu.de", "position": "PhD student;PhD student;PhD student;Professor", "bibtex": "@inproceedings{\nschweisthal2023reliable,\ntitle={Reliable Off-Policy Learning for Dosage Combinations},\nauthor={Jonas Schweisthal and Dennis Frauen and Valentyn Melnychuk and Stefan Feuerriegel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=muVKSb8gi5}\n}", "github": "", "project": "", "reviewers": "LeHo;5Gdr;aVCP;bKrg", "pdf_size": 2100121, "rating": "5;6;6;7", "confidence": "4;4;3;4", "soundness": "3;2;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "124;190;123;112", "wc_strengths": "99;20;50;171", "wc_weaknesses": "181;61;232;284", "wc_questions": "129;120;318;193", "wc_limitations": "50;3;56;6", "wc_review": "583;394;779;766", "wc_reply_reviewers": "118;198;353;34", "wc_reply_authors": "457;648;936;45", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;3;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 137.25, 30.81700017847292 ], "wc_strengths_avg": [ 85.0, 57.10078808562978 ], "wc_weaknesses_avg": [ 189.5, 82.64532654663542 ], "wc_questions_avg": [ 190.0, 79.07907434966599 ], "wc_limitations_avg": [ 28.75, 24.365703355331238 ], "wc_review_avg": [ 630.5, 157.0039808412513 ], "wc_reply_reviewers_avg": [ 175.75, 117.62307384182748 ], "wc_reply_authors_avg": [ 521.5, 323.66070197044314 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10586345917328177787&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "lmu.de;lmu.de;lmu.de;lmu.de", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig Maximilian University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.lmu.de;https://www.lmu.de", "aff_unique_abbr": "LMU;LMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Munich", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Thinker: Learning to Plan and Act", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70532", "id": "mumEBl0arj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4761fab863f0900d90cf601fce6d5155-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mumEBl0arj", "openreview": "https://openreview.net/forum?id=mumEBl0arj", "poster": "/media/PosterPDFs/NeurIPS%202023/70532.png?t=1699536681.112018", "slides": "https://nips.cc/virtual/2023/poster/70532", "video": "https://nips.cc/virtual/2023/poster/70532", "author_site": "Stephen Chung, Ivan Anokhin, David Krueger", "tldr": "", "abstract": "We propose the Thinker algorithm, a novel approach that enables reinforcement learning agents to autonomously interact with and utilize a learned world model. The Thinker algorithm wraps the environment with a world model and introduces new actions designed for interacting with the world model. These model-interaction actions enable agents to perform planning by proposing alternative plans to the world model before selecting a final action to execute in the environment. This approach eliminates the need for handcrafted planning algorithms by enabling the agent to learn how to plan autonomously and allows for easy interpretation of the agent's plan with visualization. We demonstrate the algorithm's effectiveness through experimental results in the game of Sokoban and the Atari 2600 benchmark, where the Thinker algorithm achieves state-of-the-art performance and competitive results, respectively. Visualizations of agents trained with the Thinker algorithm demonstrate that they have learned to plan effectively with the world model to select better actions. Thinker is the first work showing that an RL agent can learn to plan with a learned world model in complex environments.", "keywords": "Reinforcement learning;model-based reinforcement learning;planning;Monte Carlo Tree Search;Markov Decision Process", "primary_area": "", "supplementary_material": "", "author": "Stephen Chung;Ivan Anokhin;David Krueger", "authorids": "~Stephen_Chung1;~Ivan_Anokhin1;~David_Krueger1", "gender": "M;M;M", "homepage": "https://stephen-c.com;;https://mila.umontreal.ca/en/person/david-scott-krueger/", "dblp": "274/1299;261/3500;142/2741.html", "google_scholar": "QPyTwPIAAAAJ;CJ-86AYAAAAJ;https://scholar.google.ca/citations?user=5Uz70IoAAAAJ", "orcid": ";;", "linkedin": ";ivan-anokhin-914384131/;", "or_profile": "~Stephen_Chung1;~Ivan_Anokhin1;~David_Krueger1", "aff": "University of Cambridge;University of Cambridge;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk", "position": "PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nchung2023thinker,\ntitle={Thinker: Learning to Plan and Act},\nauthor={Stephen Chung and Ivan Anokhin and David Krueger},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mumEBl0arj}\n}", "github": "", "project": "", "reviewers": "sy9V;aBLR;9fjR;WUsk;U2R9;fWg3", "pdf_size": 5185857, "rating": "5;5;6;6;7;8", "confidence": "3;3;4;3;3;3", "soundness": "1;2;3;3;4;3", "novelty": "1;2;3;3;3;3", "presentation": "2;2;1;4;3;4", "wc_summary": "66;206;57;57;46;179", "wc_strengths": "44;56;62;110;92;103", "wc_weaknesses": "91;67;470;195;99;534", "wc_questions": "7;237;120;111;178;58", "wc_limitations": "27;95;85;27;70;38", "wc_review": "235;661;794;500;485;912", "wc_reply_reviewers": "22;124;219;55;16;240", "wc_reply_authors": "0;260;327;0;0;0", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "1;2;2;1;1;1", "rating_avg": [ 6.166666666666667, 1.0671873729054748 ], "confidence_avg": [ 3.1666666666666665, 0.3726779962499649 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "novelty_avg": [ 2.5, 0.7637626158259734 ], "presentation_avg": [ 2.6666666666666665, 1.1055415967851334 ], "wc_summary_avg": [ 101.83333333333333, 64.84190174742118 ], "wc_strengths_avg": [ 77.83333333333333, 24.96942574874765 ], "wc_weaknesses_avg": [ 242.66666666666666, 188.56004054117324 ], "wc_questions_avg": [ 118.5, 74.98166442537803 ], "wc_limitations_avg": [ 57.0, 27.562051205718827 ], "wc_review_avg": [ 597.8333333333334, 221.78173704994038 ], "wc_reply_reviewers_avg": [ 112.66666666666667, 89.94380961961134 ], "wc_reply_authors_avg": [ 97.83333333333333, 139.70256102003125 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.06984302957695786, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9388827538302684385&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Optimality in Mean Estimation: Beyond Worst-Case, Beyond Sub-Gaussian, and Beyond $1+\\alpha$ Moments", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70531", "id": "mvSDs51eqQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0cddb777d3441326544e21b67f41bdc8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=mvSDs51eqQ", "openreview": "https://openreview.net/forum?id=mvSDs51eqQ", "poster": "/media/PosterPDFs/NeurIPS%202023/70531.png?t=1702277087.8093898", "slides": "https://nips.cc/virtual/2023/poster/70531", "video": "https://nips.cc/virtual/2023/poster/70531", "author_site": "Trung Dang, Jasper Lee, Maoyuan 'Raymond' Song, Paul Valiant", "tldr": "", "abstract": "There is growing interest in improving our algorithmic understanding of fundamental statistical problems such as mean estimation, driven by the goal of understanding the fundamental limits of what we can extract from limited and valuable data.\nThe state of the art results for mean estimation in $\\mathbb{R}$ are 1) the optimal sub-Gaussian mean estimator by [Lee and Valiant, 2022], attaining the optimal sub-Gaussian error constant for all distributions with finite but unknown variance, and 2) the analysis of the median-of-means algorithm by [Bubeck, Cesa-Bianchi and Lugosi, 2013] and a matching lower bound by [Devroye, Lerasle, Lugosi, and Oliveira, 2016], characterizing the big-O optimal errors for distributions that have tails heavy enough that only a $1+\\alpha$ moment exists for some $\\alpha \\in (0,1)$.\nBoth of these results, however, are optimal only in the worst case.\nMotivated by the recent effort in the community to go \"beyond the worst-case analysis\" of algorithms, we initiate the fine-grained study of the mean estimation problem:\nIs it possible for algorithms to leverage *beneficial* features/quirks of their input distribution to *beat* the sub-Gaussian rate, without explicit knowledge of these features?\n\nWe resolve this question, finding an unexpectedly nuanced answer: \"Yes in limited regimes, but in general no\".\nGiven a distribution $p$, assuming *only* that it has a finite mean and absent any additional assumptions,\nwe show how to construct a distribution $q_{n,\\delta}$ such that the means of $p$ and $q$ are well-separated, yet $p$ and $q$ are impossible to distinguish with $n$ samples with probability $1-\\delta$, and $q$ further preserves the finiteness of moments of $p$.\nMoreover, the variance of $q$ is at most twice the variance of $p$ if it exists.\nThe main consequence of our result is that, no reasonable estimator can asymptotically achieve better than the sub-Gaussian error rate for any distribution, up to constant factors, which matches the worst-case result of [Lee and Valiant, 2022].\nMore generally, we introduce a new definitional framework to analyze the fine-grained optimality of algorithms, which we call \"neighborhood optimality\", interpolating between the unattainably strong \"instance optimality\" and the trivially weak admissibility/Pareto optimality definitions.\nAs an application of the new framework, we show that the median-of-means algorithm is neighborhood optimal, up to constant factors.\nIt is an open question to find a neighborhood-optimal estimator *without* constant factor slackness.", "keywords": "mean estimation;instance optimality", "primary_area": "", "supplementary_material": "", "author": "Trung Dang;Jasper C.H. Lee;Maoyuan Song;Paul Valiant", "authorids": "~Trung_Dang2;~Jasper_C.H._Lee1;~Maoyuan_Song1;~Paul_Valiant1", "gender": "M;M;M;M", "homepage": "https://kuroni.github.io/;https://jasperchlee.github.io/;https://maoyuans.github.io;https://www.cs.purdue.edu/homes/pvaliant/", "dblp": "267/3239;150/4950;329/6071;", "google_scholar": "CA5tZl4AAAAJ;z0Y4snAAAAAJ;1W8rVegAAAAJ;abUcBIkAAAAJ", "orcid": ";;0009-0007-9389-5075;", "linkedin": ";;;", "or_profile": "~Trung_Dang2;~Jasper_C.H._Lee1;~Maoyuan_Song1;~Paul_Valiant1", "aff": "Purdue University;University of Wisconsin - Madison;Computer Science Department, Purdue University;Purdue University", "aff_domain": "purdue.edu;wisc.edu;cs.purdue.edu;purdue.edu", "position": "Undergrad student;Postdoc;PhD student;Associate Professor", "bibtex": "@inproceedings{\ndang2023optimality,\ntitle={Optimality in Mean Estimation: Beyond Worst-Case, Beyond Sub-Gaussian, and Beyond \\$1+{\\textbackslash}alpha\\$ Moments},\nauthor={Trung Dang and Jasper C.H. Lee and Maoyuan Song and Paul Valiant},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=mvSDs51eqQ}\n}", "github": "", "project": "", "reviewers": "iso5;bzea;Z2th;x13e", "pdf_size": 433983, "rating": "6;7;7;7", "confidence": "3;4;4;3", "soundness": "3;3;4;4", "novelty": "3;3;3;4", "presentation": "3;4;4;4", "wc_summary": "75;90;211;148", "wc_strengths": "37;87;135;190", "wc_weaknesses": "48;117;76;69", "wc_questions": "1;36;26;191", "wc_limitations": "1;5;1;3", "wc_review": "162;335;449;601", "wc_reply_reviewers": "35;30;99;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 131.0, 53.63301222195151 ], "wc_strengths_avg": [ 112.25, 56.70703219178376 ], "wc_weaknesses_avg": [ 77.5, 25.024987512484397 ], "wc_questions_avg": [ 63.5, 74.70776398741967 ], "wc_limitations_avg": [ 2.5, 1.6583123951777 ], "wc_review_avg": [ 386.75, 160.44372066241795 ], "wc_reply_reviewers_avg": [ 44.5, 32.407560846197605 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5150569427720556754&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "purdue.edu;wisc.edu;cs.purdue.edu;purdue.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Purdue University;University of Wisconsin-Madison", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://www.wisc.edu", "aff_unique_abbr": "Purdue;UW-Madison", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "2Direction: Theoretically Faster Distributed Training with Bidirectional Communication Compression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70530", "id": "n18MhTsSGb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2717ad172c5495837582d70a8519abfb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=n18MhTsSGb", "openreview": "https://openreview.net/forum?id=n18MhTsSGb", "poster": "/media/PosterPDFs/NeurIPS%202023/70530.png?t=1699514484.581832", "slides": "https://nips.cc/virtual/2023/poster/70530", "video": "https://nips.cc/virtual/2023/poster/70530", "author_site": "Alexander Tyurin, Peter Richtarik", "tldr": "", "abstract": "We consider distributed convex optimization problems in the regime when the communication between the server and the workers is expensive in both uplink and downlink directions. We develop a new and provably accelerated method, which we call 2Direction, based on fast bidirectional compressed communication and a new bespoke error-feedback mechanism which may be of independent interest. Indeed, we find that the EF and EF21-P mechanisms (Seide et al., 2014; Gruntkowska et al., 2023) that have considerable success in the design of efficient non-accelerated methods are not appropriate for accelerated methods. In particular, we prove that 2Direction improves the previous state-of-the-art communication complexity $\\widetilde{\\Theta}\\left(K \\times \\left(\\frac{L}{\\alpha \\mu} + \\frac{L_{\\max} \\omega}{n \\mu} + \\omega\\right)\\right)$ (Gruntkowska et al., 2023) to $\\widetilde{\\Theta}(K \\times (\\sqrt{\\frac{L (\\omega + 1)}{\\alpha \\mu}} + \\sqrt{\\frac{L_{\\max} \\omega^2}{n \\mu}} + \\frac{1}{\\alpha} + \\omega))$ in the $\\mu$--strongly-convex setting, where $L$ and $L_{\\max}$ are smoothness constants, $n$ is \\# of workers, $\\omega$ and $\\alpha$ are compression errors of the Rand$K$ and Top$K$ sparsifiers (as examples), $K$ is \\# of coordinates/bits that the server and workers send to each other. Moreover, our method is the first that improves upon the communication complexity of the vanilla accelerated gradient descent method (AGD). We obtain similar improvements in the general convex regime as well. Finally, our theoretical findings are corroborated by experimental evidence.", "keywords": "convex optimization;accelerated method;communication compression;bidirectional compression;distributed optimization", "primary_area": "", "supplementary_material": "/attachment/b8bdcc437ec912d129fc901044d6eac6f6dffe40.zip", "author": "Alexander Tyurin;Peter Richt\u00e1rik", "authorids": "~Alexander_Tyurin1;~Peter_Richt\u00e1rik1", "gender": "M;M", "homepage": "https://k3nfalt.github.io/;https://richtarik.org", "dblp": "203/8919;62/8001", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-4380-5848", "linkedin": ";richtarik/", "or_profile": "~Alexander_Tyurin1;~Peter_Richtarik1", "aff": "KAUST;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "kaust.edu.sa;kaust.edu.sa", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\ntyurin2023direction,\ntitle={2Direction: Theoretically Faster Distributed Training with Bidirectional Communication Compression},\nauthor={Alexander Tyurin and Peter Richt{\\'a}rik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=n18MhTsSGb}\n}", "github": "", "project": "", "reviewers": "EaSZ;AqDA;gU3b;M4eW", "pdf_size": 1048992, "rating": "4;6;6;7", "confidence": "3;3;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "35;59;50;46", "wc_strengths": "12;67;38;131", "wc_weaknesses": "150;148;94;31", "wc_questions": "2;238;2;149", "wc_limitations": "1;59;1;1", "wc_review": "200;571;185;358", "wc_reply_reviewers": "0;1286;0;0", "wc_reply_authors": "390;2351;323;0", "reply_reviewers": "0;3;0;0", "reply_authors": "2;6;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 47.5, 8.616843969807043 ], "wc_strengths_avg": [ 62.0, 44.33395989532178 ], "wc_weaknesses_avg": [ 105.75, 48.65375114007141 ], "wc_questions_avg": [ 97.75, 100.7878340872548 ], "wc_limitations_avg": [ 15.5, 25.11473670974872 ], "wc_review_avg": [ 328.5, 155.54822403357744 ], "wc_reply_reviewers_avg": [ 321.5, 556.854334633394 ], "wc_reply_authors_avg": [ 766.0, 926.9042561127875 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 1.920286436967152 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8366731725843309710&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "email": "kaust.edu.sa;kaust.edu.sa", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaust.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Saudi Arabia" }, { "title": "AirDelhi: Fine-Grained Spatio-Temporal Particulate Matter Dataset From Delhi For ML based Modeling", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73476", "id": "n2wW7goGky", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ee799aff607fcf39c01df6391e96f92c-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=n2wW7goGky", "openreview": "https://openreview.net/forum?id=n2wW7goGky", "poster": "/media/PosterPDFs/NeurIPS%202023/73476.png?t=1698582840.8308084", "slides": "https://nips.cc/virtual/2023/poster/73476", "video": "https://nips.cc/virtual/2023/poster/73476", "author_site": "Sachin Chauhan, Zeel Bharatkumar Patel, Sayan Ranu, Rijurekha Sen, Nipun Batra", "tldr": "", "abstract": "Air pollution poses serious health concerns in developing countries, such as India, necessitating large-scale measurement for correlation analysis, policy recommendations, and informed decision-making. However, fine-grained data collection is costly. Specifically, static sensors for pollution measurement cost several thousand dollars per unit, leading to inadequate deployment and coverage. To complement the existing sparse static sensor network, we propose a mobile sensor network utilizing lower-cost PM2.5 sensors mounted on public buses in the Delhi-NCR region of India. Through this exercise, we introduce a novel dataset AirDelhi comprising PM2.5 and PM10 measurements. This dataset is made publicly available, at https://www.cse.iitd.ac.in/pollutiondata, serving as a valuable resource for machine learning (ML) researchers and environmentalists. We present three key contributions with the release of this dataset. Firstly, through in-depth statistical analysis, we demonstrate that the released dataset significantly differs from existing pollution datasets, highlighting its uniqueness and potential for new insights. Secondly, the dataset quality been validated against existing expensive sensors. Thirdly, we conduct a benchmarking exercise (https://github.com/sachin-iitd/DelhiPMDatasetBenchmark), evaluating state-of-the-art methods for interpolation, feature imputation, and forecasting on this dataset, which is the largest publicly available PM dataset to date. The results of the benchmarking exercise underscore the substantial disparities in accuracy between the proposed dataset and other publicly available datasets. This finding highlights the complexity and richness of our dataset, emphasizing its value for advancing research in the field of air pollution.", "keywords": "PM2.5 Dataset;PM10 Dataset;Machine Learning (ML) Modeling;Mobile Sensing;Low Cost Sensing", "primary_area": "", "supplementary_material": "/attachment/f136b4c0e9e812ed010a6259d2ee267091c2b87c.pdf", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nchauhan2023airdelhi,\ntitle={AirDelhi: Fine-Grained Spatio-Temporal Particulate Matter Dataset From Delhi For {ML} based Modeling},\nauthor={Sachin Chauhan and Zeel B Patel and Sayan Ranu and Rijurekha Sen and Nipun Batra},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=n2wW7goGky}\n}", "github": "", "project": "", "reviewers": "5SWu;8Gda;npPd;Q5NU", "pdf_size": 0, "rating": "4;6;6;8", "confidence": "5;4;3;4", "wc_summary_and_contributions": "46;20;167;109", "wc_strengths": "45;45;30;95", "wc_improvement": "675;148;2;390", "wc_limitations": "24;48;121;74", "wc_correctness": "9;3;2;137", "wc_clarity": "229;3;1;20", "wc_relation_to_prior_work": "130;28;1;32", "wc_documentation": "8;9;2;25", "wc_additional_feedback": "1;1;1;1", "wc_review": "1167;305;327;883", "wc_reply_reviewers": "0;0;0;46", "wc_reply_authors": "2102;636;972;1166", "reply_reviewers": "0;0;0;1", "reply_authors": "4;1;2;3", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 85.5, 57.107355042936454 ], "wc_strengths_avg": [ 53.75, 24.5903944661325 ], "wc_improvement_avg": [ 303.75, 255.23359398793883 ], "wc_limitations_avg": [ 66.75, 35.967867604293694 ], "wc_correctness_avg": [ 37.75, 57.36451429237417 ], "wc_clarity_avg": [ 63.25, 95.98014117514101 ], "wc_relation_to_prior_work_avg": [ 47.75, 48.961081483153535 ], "wc_documentation_avg": [ 11.0, 8.514693182963201 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 670.5, 368.52781441839636 ], "wc_reply_reviewers_avg": [ 11.5, 19.91858428704209 ], "wc_reply_authors_avg": [ 1219.0, 543.9200308868942 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17466801784643622230&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "", "author_num": 1 }, { "title": "Fast Optimal Transport through Sliced Generalized Wasserstein Geodesics", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70529", "id": "n3XuYdvhNW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6f1346bac8b02f76a631400e2799b24b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=n3XuYdvhNW", "openreview": "https://openreview.net/forum?id=n3XuYdvhNW", "poster": "/media/PosterPDFs/NeurIPS%202023/70529.png?t=1701845945.1742337", "slides": "https://nips.cc/virtual/2023/poster/70529", "video": "https://nips.cc/virtual/2023/poster/70529", "author_site": "Guillaume Mahey, Laetitia Chapel, Gilles Gasso, Cl\u00e9ment Bonet, Nicolas Courty", "tldr": "", "abstract": "Wasserstein distance (WD) and the associated optimal transport plan have been proven useful in many applications where probability measures are at stake. In this paper, we propose a new proxy of the squared WD, coined $\\textnormal{min-SWGG}$, that is based on the transport map induced by an optimal one-dimensional projection of the two input distributions. We draw connections between $\\textnormal{min-SWGG}$, and Wasserstein generalized geodesics in which the pivot measure is supported on a line. We notably provide a new closed form for the exact Wasserstein distance in the particular case of one of the distributions supported on a line allowing us to derive a fast computational scheme that is amenable to gradient descent optimization. We show that $\\textnormal{min-SWGG}$, is an upper bound of WD and that it has a complexity similar to as Sliced-Wasserstein, with the additional feature of providing an associated transport plan. We also investigate some theoretical properties such as metricity, weak convergence, computational and topological properties. Empirical evidences support the benefits of $\\textnormal{min-SWGG}$, in various contexts, from gradient flows, shape matching and image colorization, among others.", "keywords": "Optimal Transport;Wasserstein distance;Generalized Geodesics;Sliced Wasserstein", "primary_area": "", "supplementary_material": "", "author": "Guillaume Mahey;Laetitia Chapel;Gilles Gasso;Cl\u00e9ment Bonet;Nicolas Courty", "authorids": "~Guillaume_Mahey1;~Laetitia_Chapel1;~Gilles_Gasso1;~Cl\u00e9ment_Bonet1;~Nicolas_Courty1", "gender": "M;;M;M;M", "homepage": "https://github.com/MaheyG/;;http://asi.insa-rouen.fr/enseignants/~gasso/;https://clbonet.github.io;http://people.irisa.fr/Nicolas.Courty/", "dblp": ";;;304/8220;74/4219", "google_scholar": ";;https://scholar.google.fr/citations?user=wPTfsEQAAAAJ;wjCPk5kAAAAJ;https://scholar.google.fr/citations?user=ibEREjcAAAAJ", "orcid": ";;;0000-0002-3390-1169;0000-0003-1353-0126", "linkedin": ";;;cl\u00e9ment-bonet-2840a9153;", "or_profile": "~Guillaume_Mahey1;~Laetitia_Chapel1;~Gilles_Gasso1;~Cl\u00e9ment_Bonet1;~Nicolas_Courty1", "aff": "Institut National des Sciences Appliqu\u00e9es de Rouen;;INSA Rouen Normandy;Universit\u00e9 Bretagne Sud;IRISA", "aff_domain": "insa-rouen.fr;;insa-rouen.fr;univ-ubs.fr;irisa.fr", "position": "PhD student;;Full Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nmahey2023fast,\ntitle={Fast Optimal Transport through Sliced Generalized Wasserstein Geodesics},\nauthor={Guillaume Mahey and Laetitia Chapel and Gilles Gasso and Cl{\\'e}ment Bonet and Nicolas Courty},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=n3XuYdvhNW}\n}", "github": "", "project": "", "reviewers": "CJnA;rywq;5WTB;LrDA", "pdf_size": 15893216, "rating": "6;6;7;8", "confidence": "3;2;4;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "4;3;3;2", "wc_summary": "223;90;386;88", "wc_strengths": "95;39;15;44", "wc_weaknesses": "179;107;441;117", "wc_questions": "86;2;256;32", "wc_limitations": "90;23;23;21", "wc_review": "673;261;1121;302", "wc_reply_reviewers": "0;17;168;41", "wc_reply_authors": "0;0;414;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 196.75, 122.19528427889514 ], "wc_strengths_avg": [ 48.25, 29.13224158900238 ], "wc_weaknesses_avg": [ 211.0, 135.6244815658294 ], "wc_questions_avg": [ 94.0, 98.25477087653302 ], "wc_limitations_avg": [ 39.25, 29.31190031369512 ], "wc_review_avg": [ 589.25, 346.4219789505279 ], "wc_reply_reviewers_avg": [ 56.5, 66.00189391222042 ], "wc_reply_authors_avg": [ 103.5, 179.2672585833788 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9438798074485388, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14155561732076945417&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "insa-rouen.fr;;insa-rouen.fr;univ-ubs.fr;irisa.fr", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Institut National des Sciences Appliqu\u00e9es;INSA Rouen Normandy;Universit\u00e9 Bretagne Sud;Institut de Recherche en Informatique et Automatique", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.insa-rouen.fr;https://www.insa-rouen.fr;https://www.univ-ubs.fr;https://www.irisa.fr", "aff_unique_abbr": "INSA Rouen;INSA Rouen;UBS;IRISA", "aff_campus_unique_index": "0", "aff_campus_unique": "Rouen;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "DRAUC: An Instance-wise Distributionally Robust AUC Optimization Framework", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70528", "id": "n3ZVdny7OH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8c086821724b99f4c756648bb0f165db-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=n3ZVdny7OH", "openreview": "https://openreview.net/forum?id=n3ZVdny7OH", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70528", "video": "https://nips.cc/virtual/2023/poster/70528", "author_site": "Siran Dai, Qianqian Xu, Zhiyong Yang, Xiaochun Cao, Qingming Huang", "tldr": "", "abstract": "The Area Under the ROC Curve (AUC) is a widely employed metric in long-tailed classification scenarios. Nevertheless, most existing methods primarily assume that training and testing examples are drawn i.i.d. from the same distribution, which is often unachievable in practice. Distributionally Robust Optimization (DRO) enhances model performance by optimizing it for the local worst-case scenario, but directly integrating AUC optimization with DRO results in an intractable optimization problem. To tackle this challenge, methodically we propose an instance-wise surrogate loss of Distributionally Robust AUC (DRAUC) and build our optimization framework on top of it. Moreover, we highlight that conventional DRAUC may induce label bias, hence introducing distribution-aware DRAUC as a more suitable metric for robust AUC learning. Theoretically, we affirm that the generalization gap between the training loss and testing error diminishes if the training set is sufficiently large. Empirically, experiments on corrupted benchmark datasets demonstrate the effectiveness of our proposed method. Code is available at: https://github.com/EldercatSAM/DRAUC.", "keywords": "Robust Learning AUC", "primary_area": "", "supplementary_material": "/attachment/7e82e2b04c826b452fb1e1c48c0fcc02b67851b8.pdf", "author": "Siran Dai;Qianqian Xu;Zhiyong Yang;Xiaochun Cao;Qingming Huang", "authorids": "~Siran_Dai1;~Qianqian_Xu2;~Zhiyong_Yang1;~Xiaochun_Cao3;~Qingming_Huang2", "gender": ";F;M;M;", "homepage": ";http://vipl.ict.ac.cn/people/~qianqianxu;https://joshuaas.github.io/;https://scst.sysu.edu.cn/members/caoxiaochun.htm;https://qmhuang-ucas.github.io/", "dblp": "360/0801;07/7627;01/452-1.html;39/3695;68/4388", "google_scholar": ";https://scholar.google.com.hk/citations?user=MjifS2MAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=J1vMnRgAAAAJ", "orcid": ";;0000-0002-4409-4999;0000-0001-7141-708X;", "linkedin": ";;;;", "or_profile": "~Siran_Dai1;~Qianqian_Xu2;~Zhiyong_Yang1;~Xiaochun_Cao3;~Qingming_Huang2", "aff": "University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;University of Chinese Academy of Sciences;SUN YAT-SEN UNIVERSITY;University of Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ict.ac.cn;ucas.ac.cn;sysu.edu.cn;ucas.ac.cn", "position": "PhD student;Full Professor;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\ndai2023drauc,\ntitle={{DRAUC}: An Instance-wise Distributionally Robust {AUC} Optimization Framework},\nauthor={Siran Dai and Qianqian Xu and Zhiyong Yang and Xiaochun Cao and Qingming Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=n3ZVdny7OH}\n}", "github": "", "project": "", "reviewers": "2tJ9;u8fo;9FMm;uBXL", "pdf_size": 1103539, "rating": "4;5;5;6", "confidence": "3;1;3;1", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "53;89;43;48", "wc_strengths": "19;51;68;25", "wc_weaknesses": "261;90;81;76", "wc_questions": "4;27;4;43", "wc_limitations": "4;8;16;15", "wc_review": "341;265;212;207", "wc_reply_reviewers": "65;0;83;9", "wc_reply_authors": "562;38;846;26", "reply_reviewers": "1;0;1;1", "reply_authors": "4;3;3;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 2.0, 1.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 58.25, 18.102140757380052 ], "wc_strengths_avg": [ 40.75, 19.803724397193573 ], "wc_weaknesses_avg": [ 127.0, 77.52741450609584 ], "wc_questions_avg": [ 19.5, 16.5 ], "wc_limitations_avg": [ 10.75, 4.968651728587948 ], "wc_review_avg": [ 256.25, 53.9507877606991 ], "wc_reply_reviewers_avg": [ 39.25, 35.470938809115275 ], "wc_reply_authors_avg": [ 368.0, 350.70785562915466 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1181344239812282863&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 7, "email": "ucas.ac.cn;ict.ac.cn;ucas.ac.cn;sysu.edu.cn;ucas.ac.cn", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences;Sun Yat-sen University", "aff_unique_dep": ";Institute of Computing Technology;", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ict.ac.cn;http://www.sysu.edu.cn", "aff_unique_abbr": "UCAS;CAS;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Robust Distributed Learning: Tight Error Bounds and Breakdown Point under Data Heterogeneity", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70527", "id": "n3fPDW87is", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8f182e220092f7f1fc44f3313023f5a0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=n3fPDW87is", "openreview": "https://openreview.net/forum?id=n3fPDW87is", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70527", "video": "https://nips.cc/virtual/2023/poster/70527", "author_site": "Youssef Allouah, Rachid Guerraoui, Nirupam Gupta, Rafael Pinot, Rafael Pinot, Geovani Rizk", "tldr": "", "abstract": "The theory underlying robust distributed learning algorithms, designed to resist adversarial machines, matches empirical observations when data is homogeneous. Under data heterogeneity however, which is the norm in practical scenarios, established lower bounds on the learning error are essentially vacuous and greatly mismatch empirical observations. This is because the heterogeneity model considered is too restrictive and does not cover basic learning tasks such as least-squares regression. We consider in this paper a more realistic heterogeneity model, namely $(G,B)$-gradient dissimilarity, and show that it covers a larger class of learning problems than existing theory. Notably, we show that the breakdown point under heterogeneity is lower than the classical fraction $\\frac{1}{2}$. We also prove a new lower bound on the learning error of any distributed learning algorithm. We derive a matching upper bound for a robust variant of distributed gradient descent, and empirically show that our analysis reduces the gap between theory and practice.", "keywords": "Optimization;Byzantine resilience;Distributed machine learning;federated learning", "primary_area": "", "supplementary_material": "", "author": "Youssef Allouah;Rachid Guerraoui;Nirupam Gupta;Rafael Pinot;Geovani Rizk", "authorids": "~Youssef_Allouah1;~Rachid_Guerraoui1;~Nirupam_Gupta1;~Rafael_Pinot1;~Geovani_Rizk1", "gender": "M;M;;;M", "homepage": "https://youssefallouah.com/;https://lpdwww.epfl.ch/rachid/;;;", "dblp": "312/3936;g/RachidGuerraoui;;;259/2889", "google_scholar": "kVZu88cAAAAJ;;;;", "orcid": "0000-0003-1048-7548;;;;", "linkedin": ";;;;", "or_profile": "~Youssef_Allouah1;~Rachid_Guerraoui1;~Nirupam_Gupta1;~Rafael_Pinot1;~Geovani_Rizk1", "aff": "EPFL - Swiss Federal Institute of Technology Lausanne;;;;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;;;;epfl.ch", "position": "PhD student;;;;Postdoc", "bibtex": "@inproceedings{\nallouah2023robust,\ntitle={Robust Distributed Learning: Tight Error Bounds and Breakdown Point under Data Heterogeneity},\nauthor={Youssef Allouah and Rachid Guerraoui and Nirupam Gupta and Rafael Pinot and Geovani Rizk},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=n3fPDW87is}\n}", "github": "", "project": "", "reviewers": "UheS;YywB;VidP;GcGL", "pdf_size": 637632, "rating": "6;7;7;8", "confidence": "2;3;3;4", "soundness": "3;3;3;4", "novelty": "3;2;3;4", "presentation": "3;3;3;4", "wc_summary": "173;115;94;159", "wc_strengths": "62;64;110;72", "wc_weaknesses": "49;55;84;17", "wc_questions": "22;104;76;147", "wc_limitations": "9;10;6;44", "wc_review": "315;348;370;439", "wc_reply_reviewers": "16;0;29;33", "wc_reply_authors": "36;0;0;17", "reply_reviewers": "1;0;1;2", "reply_authors": "2;1;1;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 135.25, 32.01854931129766 ], "wc_strengths_avg": [ 77.0, 19.4164878389476 ], "wc_weaknesses_avg": [ 51.25, 23.79469478686373 ], "wc_questions_avg": [ 87.25, 45.372761652780184 ], "wc_limitations_avg": [ 17.25, 15.514106484100203 ], "wc_review_avg": [ 368.0, 45.42576361493552 ], "wc_reply_reviewers_avg": [ 19.5, 12.893796958227627 ], "wc_reply_authors_avg": [ 13.25, 14.85555451674558 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=43307885292524023&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "epfl.ch;;;;epfl.ch", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "REASONER: An Explainable Recommendation Dataset with Comprehensive Labeling Ground Truths", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73475", "id": "n4OwK8cpx2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2ebf43d20e5933ab6d98225bbb908ade-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=n4OwK8cpx2", "openreview": "https://openreview.net/forum?id=n4OwK8cpx2", "poster": "/media/PosterPDFs/NeurIPS%202023/73475.png?t=1701608717.1668952", "slides": "https://nips.cc/virtual/2023/poster/73475", "video": "https://nips.cc/virtual/2023/poster/73475", "author_site": "Xu Chen, Jingsen Zhang, Lei Wang, Quanyu Dai, Zhenhua Dong, Ruiming Tang, Rui Zhang, Li Chen, Xin Zhao, Ji-Rong Wen", "tldr": "", "abstract": "Explainable recommendation has attracted much attention from the industry and academic communities. It has shown great potential to improve the recommendation persuasiveness, informativeness and user satisfaction. In the past few years, while a lot of promising explainable recommender models have been proposed, the datasets used to evaluate them still suffer from several limitations, for example, the explanation ground truths are not labeled by the real users, the explanations are mostly single-modal and around only one aspect. To bridge these gaps, in this paper, we build a new explainable recommendation dataset, which, to our knowledge, is the first contribution that provides a large amount of real user labeled multi-modal and multi-aspect explaination ground truths. In specific, we firstly develop a video recommendation platform, where a series of questions around the recommendation explainability are carefully designed. Then, we recruit about 3000 high-quality labelers with different backgrounds to use the system, and collect their behaviors and feedback to our questions. In this paper, we detail the construction process of our dataset and also provide extensive analysis on its characteristics. In addition, we develop a library, where ten well-known explainable recommender models are implemented in a unified framework. Based on this library, we build several benchmarks for different explainable recommendation tasks. At last, we present many new opportunities brought by our dataset, which are expected to promote the field of explainable recommendation. Our dataset, library and the related documents have been released at https://reasoner2023.github.io/.", "keywords": "Explainable Recommendation;Recommendation Dataset;Labeling Ground Truths", "primary_area": "", "supplementary_material": "/attachment/46e97dfc7d7d4d9ba3f2b6522fc9e40d58ed679a.pdf", "author": "Xu Chen;Jingsen Zhang;Lei Wang;Quanyu Dai;Zhenhua Dong;Ruiming Tang;Rui Zhang;Li Chen;Xin Zhao;Ji-Rong Wen", "authorids": "~Xu_Chen13;~Jingsen_Zhang1;~Lei_Wang46;~Quanyu_Dai1;~Zhenhua_Dong1;~Ruiming_Tang2;~Rui_Zhang11;~Li_Chen20;~Xin_Zhao10;~Ji-Rong_Wen1", "gender": "M;M;M;M;;M;;;M;M", "homepage": "https://gsai.ruc.edu.cn/chenxu;;https://paitesanshi.github.io/;;;https://scholar.google.com/citations?user=fUtHww0AAAAJ&hl=en;;;https://gsai.ruc.edu.cn/addons/teacher/index/info.html?user_id=5&ruccode=20140041&ln=cn;https://gsai.ruc.edu.cn/english/jrwen", "dblp": "83/6331-17;297/0200;181/2817-198;210/1089;;24/10003.html;;;https://dblp.uni-trier.de/pid/52/8700.html;w/JRWen", "google_scholar": "loPoqy0AAAAJ;gCRR0RYAAAAJ;E6NenUwAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.sg/citations?user=fUtHww0AAAAJ;;;JNhNacoAAAAJ;tbxCHJgAAAAJ", "orcid": "0000-0003-0144-1775;0000-0003-2997-3386;0009-0002-7769-6918;0000-0001-7578-2738;;0000-0002-9224-2431;;;0000-0002-8333-6196;0000-0002-9777-9676", "linkedin": ";;;;;;;;;", "or_profile": "~Xu_Chen13;~Jingsen_Zhang1;~Lei_Wang46;~Quanyu_Dai1;~Zhenhua_Dong1;~Ruiming_Tang2;~Rui_Zhang11;~Li_Chen20;~Xin_Zhao10;~Ji-Rong_Wen1", "aff": "Renmin University of China;Renmin University of China;Renmin University of China;Huawei Technologies Ltd.;;Huawei Technologies Ltd.;;;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;huawei.com;;huawei.com;;;ruc.edu.cn;ruc.edu.cn", "position": "Associate Professor;PhD student;PhD student;Researcher;;Principal Researcher;;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023reasoner,\ntitle={{REASONER}: An Explainable Recommendation Dataset with Comprehensive Labeling Ground Truths},\nauthor={Xu Chen and Jingsen Zhang and Lei Wang and Quanyu Dai and Zhenhua Dong and Ruiming Tang and Rui Zhang and Li Chen and Xin Zhao and Ji-Rong Wen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=n4OwK8cpx2}\n}", "github": "", "project": "", "reviewers": "ZxYM;pC4V;1MRi;zpBA;VntB", "pdf_size": 2950639, "rating": "6;6;6;6;6", "confidence": "5;3;5;4;5", "wc_summary_and_contributions": "88;69;63;64;88", "wc_strengths": "104;51;103;94;45", "wc_improvement": "204;82;113;21;113", "wc_limitations": "158;15;115;1;174", "wc_correctness": "59;11;36;10;8", "wc_clarity": "5;7;34;6;1", "wc_relation_to_prior_work": "17;31;33;1;1", "wc_documentation": "11;17;27;1;8", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "647;284;525;199;439", "wc_reply_reviewers": "0;0;27;0;30", "wc_reply_authors": "785;411;833;705;869", "reply_reviewers": "0;0;1;0;1", "reply_authors": "3;3;3;3;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.4, 0.7999999999999999 ], "wc_summary_and_contributions_avg": [ 74.4, 11.288932633336069 ], "wc_strengths_avg": [ 79.4, 25.943014474035202 ], "wc_improvement_avg": [ 106.6, 59.16282616643664 ], "wc_limitations_avg": [ 92.6, 71.85708037486633 ], "wc_correctness_avg": [ 24.8, 19.933890739140715 ], "wc_clarity_avg": [ 10.6, 11.876026271442816 ], "wc_relation_to_prior_work_avg": [ 16.6, 13.879481258317977 ], "wc_documentation_avg": [ 12.8, 8.772684879784524 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 418.8, 161.39690207683665 ], "wc_reply_reviewers_avg": [ 11.4, 13.994284547628721 ], "wc_reply_authors_avg": [ 720.6, 164.25541086977927 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3093453683746447297&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;huawei.com;;huawei.com;;;ruc.edu.cn;ruc.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;1;1;0;0", "aff_unique_norm": "Renmin University of China;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.ruc.edu.cn;https://www.huawei.com", "aff_unique_abbr": "RUC;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "AbdomenAtlas-8K: Annotating 8,000 CT Volumes for Multi-Organ Segmentation in Three Weeks", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73474", "id": "n581purqB4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7331077e0449e94a91370c46b4f80f57-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=n581purqB4", "openreview": "https://openreview.net/forum?id=n581purqB4", "poster": "/media/PosterPDFs/NeurIPS%202023/73474.png?t=1702362576.823318", "slides": "https://nips.cc/virtual/2023/poster/73474", "video": "https://nips.cc/virtual/2023/poster/73474", "author_site": "Chongyu Qu, Tiezheng Zhang, Hualin Qiao, jie liu, Yucheng Tang, Alan Yuille, Zongwei Zhou", "tldr": "", "abstract": "Annotating medical images, particularly for organ segmentation, is laborious and time-consuming. For example, annotating an abdominal organ requires an estimated rate of 30-60 minutes per CT volume based on the expertise of an annotator and the size, visibility, and complexity of the organ. Therefore, publicly available datasets for multi-organ segmentation are often limited in data size and organ diversity. This paper proposes an active learning procedure to expedite the annotation process for organ segmentation and creates the largest multi-organ dataset (by far) with the spleen, liver, kidneys, stomach, gallbladder, pancreas, aorta, and IVC annotated in 8,448 CT volumes, equating to 3.2 million slices. The conventional annotation methods would take an experienced annotator up to 1,600 weeks (or roughly 30.8 years) to complete this task. In contrast, our annotation procedure has accomplished this task in three weeks (based on an 8-hour workday, five days a week) while maintaining a similar or even better annotation quality. This achievement is attributed to three unique properties of our method: (1) label bias reduction using multiple pre-trained segmentation models, (2) effective error detection in the model predictions, and (3) attention guidance for annotators to make corrections on the most salient errors. Furthermore, we summarize the taxonomy of common errors made by AI algorithms and annotators. This allows for continuous improvement of AI and annotations, significantly reducing the annotation costs required to create large-scale datasets for a wider variety of medical imaging tasks. Code and dataset are available at https://github.com/MrGiovanni/AbdomenAtlas", "keywords": "Organ Segmentation;Active Learning;Interactive Segmentation;Medical Image Analysis", "primary_area": "", "supplementary_material": "/attachment/2f781c9096b0e616371d1cfa2b4a5b50664e11c9.pdf", "author": "Chongyu Qu;Tiezheng Zhang;Hualin Qiao;Jie Liu;Yucheng Tang;Alan Yuille;Zongwei Zhou", "authorids": "~Chongyu_Qu1;~Tiezheng_Zhang1;~Hualin_Qiao1;~Jie_Liu17;~Yucheng_Tang1;~Alan_Yuille1;~Zongwei_Zhou1", "gender": "M;M;F;M;M;M;M", "homepage": ";;;https://ljwztc.github.io;https://tangy5.github.io/;;https://www.zongweiz.com/", "dblp": ";347/8124;;03/2134-44;201/0160;y/AlanLYuille;", "google_scholar": "https://scholar.google.com/citations?hl=en;hUMQfb4AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;0xheliUAAAAJ;;JVOeczAAAAAJ", "orcid": ";;;0000-0002-1327-1315;;;0000-0002-3154-9851", "linkedin": "chongyu-qu-307014279/;;;;;;", "or_profile": "~Chongyu_Qu1;~Tiezheng_Zhang1;~Hualin_Qiao1;~Jie_Liu17;~Yucheng_Tang1;~Alan_Yuille1;~Zongwei_Zhou1", "aff": "Johns Hopkins University;Johns Hopkins University;;City University of Hong Kong;NVIDIA;Johns Hopkins University;Johns Hopkins University", "aff_domain": "jh.edu;jh.edu;;cityu.edu.hk;nvidia.com;johnshopkins.edu;jhu.edu", "position": "Intern;MS student;;PhD student;Researcher;Full Professor;Postdoc", "bibtex": "@inproceedings{\nqu2023abdomenatlask,\ntitle={AbdomenAtlas-8K: Annotating 8,000 {CT} Volumes for Multi-Organ Segmentation in Three Weeks},\nauthor={Chongyu Qu and Tiezheng Zhang and Hualin Qiao and Jie Liu and Yucheng Tang and Alan Yuille and Zongwei Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=n581purqB4}\n}", "github": "", "project": "", "reviewers": "3MZ3;R6jP;pW4Y;Tr7Z;nhxF", "pdf_size": 8947110, "rating": "5;7;7;7;9", "confidence": "5;3;5;4;4", "wc_summary_and_contributions": "75;140;14;118;111", "wc_strengths": "73;30;51;63;19", "wc_improvement": "210;37;52;392;55", "wc_limitations": "1;53;122;66;33", "wc_correctness": "1;80;140;74;18", "wc_clarity": "1;5;6;51;1", "wc_relation_to_prior_work": "2;16;37;9;20", "wc_documentation": "1;16;53;31;21", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "365;378;476;805;279", "wc_reply_reviewers": "0;0;309;296;0", "wc_reply_authors": "1226;737;1937;3237;397", "reply_reviewers": "0;0;2;1;0", "reply_authors": "3;1;4;6;1", "rating_avg": [ 7.0, 1.2649110640673518 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 91.6, 44.07538995856985 ], "wc_strengths_avg": [ 47.2, 20.1037309970065 ], "wc_improvement_avg": [ 149.2, 136.7909353721949 ], "wc_limitations_avg": [ 55.0, 40.0349847008838 ], "wc_correctness_avg": [ 62.6, 49.41092996493792 ], "wc_clarity_avg": [ 12.8, 19.20833152566875 ], "wc_relation_to_prior_work_avg": [ 16.8, 11.82201336490532 ], "wc_documentation_avg": [ 24.4, 17.26962651593832 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 460.6, 183.18799087276435 ], "wc_reply_reviewers_avg": [ 121.0, 148.25113827556265 ], "wc_reply_authors_avg": [ 1506.8, 1008.1429263750255 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 3.0, 1.8973665961010275 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1225743625922414254&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "jh.edu;jh.edu;;cityu.edu.hk;nvidia.com;johnshopkins.edu;jhu.edu", "author_num": 7, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "Johns Hopkins University;City University of Hong Kong;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.jhu.edu;https://www.cityu.edu.hk;https://www.nvidia.com", "aff_unique_abbr": "JHU;CityU;NVIDIA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Learning with Explanation Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70526", "id": "n6ztJ3Lrdj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9c537882044c8b5352c363e840872ddb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=n6ztJ3Lrdj", "openreview": "https://openreview.net/forum?id=n6ztJ3Lrdj", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70526", "video": "https://nips.cc/virtual/2023/poster/70526", "author_site": "Rattana Pukdee, Dylan Sam, J. Zico Kolter, Maria-Florina Balcan, Pradeep Ravikumar", "tldr": "", "abstract": "As larger deep learning models are hard to interpret, there has been a recent focus on generating explanations of these black-box models. \nIn contrast, we may have apriori explanations of how models should behave. In this paper, we formalize this notion as learning from explanation constraints and provide a learning theoretic framework to analyze how such explanations can improve the learning of our models. One may naturally ask, \"When would these explanations be helpful?\"\nOur first key contribution addresses this question via a class of models that satisfies these explanation constraints in expectation over new data. We provide a characterization of the benefits of these models (in terms of the reduction of their Rademacher complexities) for a canonical class of explanations given by gradient information in the settings of both linear models and two layer neural networks. In addition, we provide an algorithmic solution for our framework, via a variational approximation that achieves better performance and satisfies these constraints more frequently, when compared to simpler augmented Lagrangian methods to incorporate these explanations. We demonstrate the benefits of our approach over a large array of synthetic and real-world experiments.", "keywords": "Interpretable ML;Semi-supervised learning;Learning theory", "primary_area": "", "supplementary_material": "", "author": "Rattana Pukdee;Dylan Sam;J Zico Kolter;Nina Balcan;Pradeep Kumar Ravikumar", "authorids": "~Rattana_Pukdee1;~Dylan_Sam1;~J_Zico_Kolter1;~Nina_Balcan1;~Pradeep_Kumar_Ravikumar1", "gender": "M;M;F;M;M", "homepage": ";https://dsam99.github.io/;http://www.cs.cmu.edu/~ninamf/;http://www.cs.cmu.edu/~pradeepr/;http://www.zicokolter.com", "dblp": ";289/8487.html;b/MariaFlorinaBalcan;94/3594;67/2526", "google_scholar": "KhnQ8zoAAAAJ;43ffAwcAAAAJ;https://scholar.google.com.tw/citations?user=LWlN_BUAAAAJ;https://scholar.google.com.tw/citations?user=Q4DTPw4AAAAJ;UXh1I6UAAAAJ", "orcid": ";;;;", "linkedin": "rattana-pukdee/;;;;", "or_profile": "~Rattana_Pukdee1;~Dylan_Sam1;~Nina_Balcan1;~Pradeep_Kumar_Ravikumar1;~Zico_Kolter1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;andrew.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;PhD student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\npukdee2023learning,\ntitle={Learning with Explanation Constraints},\nauthor={Rattana Pukdee and Dylan Sam and J Zico Kolter and Nina Balcan and Pradeep Kumar Ravikumar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=n6ztJ3Lrdj}\n}", "github": "", "project": "", "reviewers": "w12b;3fR6;9kMr;o4F1;zYz5", "pdf_size": 1134834, "rating": "4;6;7;7;7", "confidence": "3;4;3;3;2", "soundness": "2;3;3;4;3", "novelty": "2;2;4;3;3", "presentation": "3;2;3;4;3", "wc_summary": "103;148;125;200;179", "wc_strengths": "28;113;32;77;128", "wc_weaknesses": "327;228;47;598;81", "wc_questions": "4;3;8;4;43", "wc_limitations": "1;1;22;4;73", "wc_review": "463;493;234;883;504", "wc_reply_reviewers": "400;245;8;90;25", "wc_reply_authors": "1077;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;1;1;1;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 151.0, 35.139721114431175 ], "wc_strengths_avg": [ 75.6, 40.775482829759355 ], "wc_weaknesses_avg": [ 256.2, 198.5420862185144 ], "wc_questions_avg": [ 12.4, 15.396103403134186 ], "wc_limitations_avg": [ 20.2, 27.53470537340104 ], "wc_review_avg": [ 515.4, 208.65914789435905 ], "wc_reply_reviewers_avg": [ 153.6, 148.89539952597596 ], "wc_reply_authors_avg": [ 215.4, 430.80000000000007 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2711630722733202, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15912400968164435058&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "andrew.cmu.edu;andrew.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Clifford Group Equivariant Neural Networks", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70525", "id": "n84bzMrGUD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c6e0125e14ea3d1a3de3c33fd2d49fc4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=n84bzMrGUD", "openreview": "https://openreview.net/forum?id=n84bzMrGUD", "poster": "/media/PosterPDFs/NeurIPS%202023/70525.png?t=1702141159.9715858", "slides": "https://nips.cc/virtual/2023/poster/70525", "video": "https://nips.cc/virtual/2023/poster/70525", "author_site": "David Ruhe, Johannes Brandstetter, Patrick Forr\u00e9", "tldr": "", "abstract": "We introduce Clifford Group Equivariant Neural Networks: a novel approach for constructing $\\mathrm{O}(n)$- and $\\mathrm{E}(n)$-equivariant models. We identify and study the *Clifford group*: a subgroup inside the Clifford algebra tailored to achieve several favorable properties. Primarily, the group's action forms an orthogonal automorphism that extends beyond the typical vector space to the entire Clifford algebra while respecting the multivector grading. This leads to several non-equivalent subrepresentations corresponding to the multivector decomposition. Furthermore, we prove that the action respects not just the vector space structure of the Clifford algebra but also its multiplicative structure, i.e., the geometric product. These findings imply that every polynomial in multivectors, including their grade projections, constitutes an equivariant map with respect to the Clifford group, allowing us to parameterize equivariant neural network layers. An advantage worth mentioning is that we obtain expressive layers that can elegantly generalize to inner-product spaces of any dimension. We demonstrate, notably from a single core implementation, state-of-the-art performance on several distinct tasks, including a three-dimensional $n$-body experiment, a four-dimensional Lorentz-equivariant high-energy physics experiment, and a five-dimensional convex hull experiment.", "keywords": "Clifford algebras;geometric deep dearning;Clifford group equivariance;E(n)-equivariant neural networks;O(n)-equivariant neural networks", "primary_area": "", "supplementary_material": "/attachment/cf361e5c1c85cdca4f889260cb513999b0a99d93.pdf", "author": "David Ruhe;Johannes Brandstetter;Patrick Forr\u00e9", "authorids": "~David_Ruhe1;~Johannes_Brandstetter1;~Patrick_Forr\u00e91", "gender": ";M;", "homepage": ";;", "dblp": "243/3507;251/8691;", "google_scholar": ";KiRvOHcAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~David_Ruhe1;~Johannes_Brandstetter1;~Patrick_Forr\u00e91", "aff": "University of Amsterdam;Microsoft;", "aff_domain": "uva.nl;microsoft.com;", "position": "PhD student;Researcher;", "bibtex": "@inproceedings{\nruhe2023clifford,\ntitle={Clifford Group Equivariant Neural Networks},\nauthor={David Ruhe and Johannes Brandstetter and Patrick Forr{\\'e}},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=n84bzMrGUD}\n}", "github": "", "project": "", "reviewers": "gQKE;w3C8;5NZ6;W8CU;jDZi", "pdf_size": 746557, "rating": "7;7;8;8;8", "confidence": "4;2;4;1;4", "soundness": "3;3;4;3;4", "novelty": "4;3;4;3;3", "presentation": "2;2;4;3;2", "wc_summary": "196;43;106;125;208", "wc_strengths": "305;27;72;78;97", "wc_weaknesses": "349;143;108;290;194", "wc_questions": "105;143;364;24;221", "wc_limitations": "1;1;11;18;55", "wc_review": "956;357;661;535;775", "wc_reply_reviewers": "66;0;239;63;44", "wc_reply_authors": "73;0;369;0;0", "reply_reviewers": "1;0;2;1;1", "reply_authors": "2;1;2;1;1", "rating_avg": [ 7.6, 0.48989794855663565 ], "confidence_avg": [ 3.0, 1.2649110640673518 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 135.6, 60.750637856733654 ], "wc_strengths_avg": [ 115.8, 97.34762452160813 ], "wc_weaknesses_avg": [ 216.8, 90.15409031208733 ], "wc_questions_avg": [ 171.4, 115.32319801323582 ], "wc_limitations_avg": [ 17.2, 19.963967541548445 ], "wc_review_avg": [ 656.8, 204.07096804788281 ], "wc_reply_reviewers_avg": [ 82.4, 81.77187780649285 ], "wc_reply_authors_avg": [ 88.4, 143.12036892070955 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13632650489436171884&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "uva.nl;microsoft.com;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Amsterdam;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.uva.nl;https://www.microsoft.com", "aff_unique_abbr": "UvA;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Netherlands;United States" }, { "title": "Environment-Aware Dynamic Graph Learning for Out-of-Distribution Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70524", "id": "n8JWIzYPRz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9bf12308ece130daa083fb21f7faf1b6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=n8JWIzYPRz", "openreview": "https://openreview.net/forum?id=n8JWIzYPRz", "poster": "/media/PosterPDFs/NeurIPS%202023/70524.png?t=1700533953.949009", "slides": "https://nips.cc/virtual/2023/poster/70524", "video": "https://nips.cc/virtual/2023/poster/70524", "author_site": "Haonan Yuan, Qingyun Sun, Xingcheng Fu, Ziwei Zhang, Cheng Ji, Hao Peng, Jianxin Li", "tldr": "", "abstract": "Dynamic graph neural networks (DGNNs) are increasingly pervasive in exploiting spatio-temporal patterns on dynamic graphs. However, existing works fail to generalize under distribution shifts, which are common in real-world scenarios. As the generation of dynamic graphs is heavily influenced by latent environments, investigating their impacts on the out-of-distribution (OOD) generalization is critical. However, it remains unexplored with the following two major challenges: **(1)** How to properly model and infer the complex environments on dynamic graphs with distribution shifts? **(2)** How to discover invariant patterns given inferred spatio-temporal environments? To solve these challenges, we propose a novel **E**nvironment-**A**ware dynamic **G**raph **LE**arning (**EAGLE**) framework for OOD generalization by modeling complex coupled environments and exploiting spatio-temporal invariant patterns. Specifically, we first design the environment-aware EA-DGNN to model environments by multi-channel environments disentangling. Then, we propose an environment instantiation mechanism for environment diversification with inferred distributions. Finally, we discriminate spatio-temporal invariant patterns for out-of-distribution prediction by the invariant pattern recognition mechanism and perform fine-grained causal interventions node-wisely with a mixture of instantiated environment samples. Experiments on real-world and synthetic dynamic graph datasets demonstrate the superiority of our method against state-of-the-art baselines under distribution shifts. To the best of our knowledge, we are the first to study OOD generalization on dynamic graphs from the environment learning perspective.", "keywords": "dynamic graph learning;out-of-distribution generalization;invariant learning;link prediction", "primary_area": "", "supplementary_material": "/attachment/515e66b33c50f337bf494a0feda5642fb7c748e1.pdf", "author": "Haonan Yuan;Qingyun Sun;Xingcheng Fu;Ziwei Zhang;Cheng Ji;Hao Peng;Jianxin Li", "authorids": "~Haonan_Yuan2;~Qingyun_Sun2;~Xingcheng_Fu1;~Ziwei_Zhang1;~Cheng_Ji1;~Hao_Peng10;~Jianxin_Li3", "gender": "M;F;M;;M;M;M", "homepage": ";https://sunqysunqy.github.io/;https://fuxingcheng.github.io/;;https://scholar.google.com/citations?hl=en&user=fRAeIZAAAAAJ;https://dblp.org/pid/69/7742-2.html;http://myjianxin.github.io", "dblp": "258/2050;;236/7003;;32/598-1.html;;l/JianxinLi-2.html", "google_scholar": "4UL1RIsAAAAJ;e2oYBzUAAAAJ;gN4tbgMAAAAJ;;https://scholar.google.com/citations?hl=en;;EY2lqD0AAAAJ", "orcid": "0000-0001-9205-8610;;0000-0002-4643-8126;;0000-0003-2513-3822;;0000-0001-5152-0055", "linkedin": ";;;;;;", "or_profile": "~Haonan_Yuan2;~Qingyun_Sun2;~Xingcheng_Fu1;~Ziwei_Zhang1;~Cheng_Ji1;~Hao_Peng10;~Jianxin_Li3", "aff": "Beihang University;Beihang University;Beihang University;;Beihang University;Wuhan University;Beihang University ", "aff_domain": "buaa.edu.cn;buaa.edu.cn;act.buaa.edu.cn;;buaa.edu.cn;whu.edu.cn;buaa.edu.cn", "position": "PhD student;Assistant Professor;PhD student;;PhD student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nyuan2023environmentaware,\ntitle={Environment-Aware Dynamic Graph Learning for Out-of-Distribution Generalization},\nauthor={Haonan Yuan and Qingyun Sun and Xingcheng Fu and Ziwei Zhang and Cheng Ji and Hao Peng and Jianxin Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=n8JWIzYPRz}\n}", "github": "", "project": "", "reviewers": "aDm2;iWRC;YPMZ;oZr8", "pdf_size": 2817890, "rating": "4;5;7;7", "confidence": "4;4;4;3", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "1;3;3;3", "wc_summary": "81;128;81;90", "wc_strengths": "21;96;155;44", "wc_weaknesses": "357;162;69;100", "wc_questions": "4;95;58;54", "wc_limitations": "4;35;1;1", "wc_review": "467;516;364;289", "wc_reply_reviewers": "194;141;104;0", "wc_reply_authors": "1062;512;71;0", "reply_reviewers": "2;2;1;0", "reply_authors": "5;4;2;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 95.0, 19.403607911932255 ], "wc_strengths_avg": [ 79.0, 51.60910772334666 ], "wc_weaknesses_avg": [ 172.0, 111.93524913984871 ], "wc_questions_avg": [ 52.75, 32.36800117399899 ], "wc_limitations_avg": [ 10.25, 14.341809509263467 ], "wc_review_avg": [ 409.0, 88.37137545608306 ], "wc_reply_reviewers_avg": [ 109.75, 70.98019089858803 ], "wc_reply_authors_avg": [ 411.25, 423.8286062785286 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 1.5811388300841898 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15709963190907109186&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "buaa.edu.cn;buaa.edu.cn;act.buaa.edu.cn;;buaa.edu.cn;whu.edu.cn;buaa.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Beihang University;Wuhan University", "aff_unique_dep": ";", "aff_unique_url": "http://www.buaa.edu.cn/;http://www.whu.edu.cn/", "aff_unique_abbr": "BUAA;WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SMPLer-X: Scaling Up Expressive Human Pose and Shape Estimation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73473", "id": "n8hpztIuet", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2614947a25d7c435bcd56c51958ddcb1-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=n8hpztIuet", "openreview": "https://openreview.net/forum?id=n8hpztIuet", "poster": "/media/PosterPDFs/NeurIPS%202023/73473.png?t=1701680803.5875423", "slides": "https://nips.cc/virtual/2023/poster/73473", "video": "https://nips.cc/virtual/2023/poster/73473", "author_site": "Zhongang Cai, Wanqi Yin, Ailing Zeng, CHEN WEI, Qingping SUN, Wang Yanjun, Hui En Pang, Haiyi Mei, Mingyuan Zhang, Lei Zhang, Chen Change Loy, Lei Yang, Ziwei Liu", "tldr": "", "abstract": "Expressive human pose and shape estimation (EHPS) unifies body, hands, and face motion capture with numerous applications. Despite encouraging progress, current state-of-the-art methods still depend largely on a confined set of training datasets. In this work, we investigate scaling up EHPS towards the first generalist foundation model (dubbed SMPLer-X), with up to ViT-Huge as the backbone and training with up to 4.5M instances from diverse data sources. With big data and the large model, SMPLer-X exhibits strong performance across diverse test benchmarks and excellent transferability to even unseen environments. 1) For the data scaling, we perform a systematic investigation on 32 EHPS datasets, including a wide range of scenarios that a model trained on any single dataset cannot handle. More importantly, capitalizing on insights obtained from the extensive benchmarking process, we optimize our training scheme and select datasets that lead to a significant leap in EHPS capabilities. 2) For the model scaling, we take advantage of vision transformers to study the scaling law of model sizes in EHPS. Moreover, our finetuning strategy turn SMPLer-X into specialist models, allowing them to achieve further performance boosts. Notably, our foundation model SMPLer-X consistently delivers state-of-the-art results on seven benchmarks such as AGORA (107.2 mm NMVE), UBody (57.4 mm PVE), EgoBody (63.6 mm PVE), and EHF (62.3 mm PVE without finetuning).", "keywords": "Expressive Human Pose and Shape Estimation;Benchmarks;Datasets;Foundation Models", "primary_area": "", "supplementary_material": "/attachment/e99eeadb206abb553ea4d1d3c0ad4f665dee737c.pdf", "author": "Zhongang Cai;Wanqi Yin;Ailing Zeng;CHEN WEI;Qingping SUN;Yanjun Wang;Hui En Pang;Haiyi Mei;Mingyuan Zhang;Lei Zhang;Chen Change Loy;Lei Yang;Ziwei Liu", "authorids": "~Zhongang_Cai1;~Wanqi_Yin1;~Ailing_Zeng1;~CHEN_WEI4;~Qingping_SUN1;~Yanjun_Wang1;~Hui_En_Pang1;~Haiyi_Mei1;~Mingyuan_Zhang1;~Lei_Zhang23;~Chen_Change_Loy2;~Lei_Yang7;~Ziwei_Liu1", "gender": "M;;F;M;Not Specified;M;F;M;M;M;M;M;M", "homepage": "https://caizhongang.com;;https://ailingzeng.site/;;;https://github.com/WYJSJTU;;;https://mingyuan-zhang.github.io/;https://www.mmlab-ntu.com/person/ccloy/index.html;https://www.yanglei.me;https://liuziwei7.github.io/;https://www.leizhang.org/", "dblp": "232/3190;;226/4720;;;;;;;01/5855;50/2484-45;05/6300-2;z/LeiZhang", "google_scholar": "WrDKqIAAAAAJ;zlIJwBEAAAAJ;Tn7fzS8AAAAJ;;;X-WP6DYAAAAJ;;TOZ9wR4AAAAJ;2QLD4fAAAAAJ;https://scholar.google.co.uk/citations?user=559LF80AAAAJ;jZH2IPYAAAAJ;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ;fIlGZToAAAAJ", "orcid": "0000-0002-1810-3855;;;;0000-0003-3998-5356;;0000-0002-2353-9071;;;0000-0001-5345-1591;0000-0002-0571-5924;;", "linkedin": "caizhongang/;yinwanqi;%E7%88%B1%E7%8E%B2-%E6%9B%BE-65504112a/;chen-wei-005833211/;;;;;;;;;", "or_profile": "~Zhongang_Cai1;~Wanqi_Yin1;~Ailing_Zeng1;~CHEN_WEI4;~Qingping_SUN1;~Yanjun_Wang1;~Hui_En_Pang1;~Haiyi_Mei1;~Mingyuan_Zhang1;~Chen_Change_Loy2;~Lei_Yang7;~Ziwei_Liu1;~Lei_Zhang1", "aff": "Nanyang Technological University;The University of Tokyo;International Digital Economy Academy;SenseTime International PTE. LTD.;City University of Hong Kong;SenseTime;Nanyang Technological University;SenseTime;Nanyang Technological University;Nanyang Technological University;Sensetime Ltd.;Nanyang Technological University;International Digital Economy Academy", "aff_domain": "ntu.edu.sg;u-tokyo.ac.jp;idea.edu.cn;sensetime.com;cityu.edu;sensetime.com;ntu.edu.sg;sensetime.com;ntu.edu.sg;ntu.edu.sg;sensetime.com;ntu.edu.sg;idea.edu.cn", "position": "PhD student;PhD student;Researcher;Researcher;PhD student;Intern;PhD student;Researcher;PhD student;Full Professor;Researcher;Assistant Professor;Chief Scientist", "bibtex": "@inproceedings{\ncai2023smplerx,\ntitle={{SMPL}er-X: Scaling Up Expressive Human Pose and Shape Estimation},\nauthor={Zhongang Cai and Wanqi Yin and Ailing Zeng and CHEN WEI and Qingping SUN and Yanjun Wang and Hui En Pang and Haiyi Mei and Mingyuan Zhang and Lei Zhang and Chen Change Loy and Lei Yang and Ziwei Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=n8hpztIuet}\n}", "github": "", "project": "", "reviewers": "5yuA;uVnR;6mpM;nrrt;er9Y", "pdf_size": 3013673, "rating": "4;6;7;7;7", "confidence": "5;4;4;4;4", "wc_summary_and_contributions": "56;41;71;83;34", "wc_strengths": "54;13;37;61;55", "wc_improvement": "131;49;27;28;43", "wc_limitations": "9;40;1;11;30", "wc_correctness": "1;5;6;20;1", "wc_clarity": "2;14;1;146;1", "wc_relation_to_prior_work": "1;24;1;7;1", "wc_documentation": "2;5;1;10;9", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "257;192;146;367;175", "wc_reply_reviewers": "80;0;48;0;0", "wc_reply_authors": "1617;346;306;926;278", "reply_reviewers": "1;0;1;0;0", "reply_authors": "4;2;2;3;2", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 57.0, 18.20988742414406 ], "wc_strengths_avg": [ 44.0, 17.435595774162696 ], "wc_improvement_avg": [ 55.6, 38.64505142964621 ], "wc_limitations_avg": [ 18.2, 14.46927779814874 ], "wc_correctness_avg": [ 6.6, 7.002856560004639 ], "wc_clarity_avg": [ 32.8, 56.81337870607591 ], "wc_relation_to_prior_work_avg": [ 6.8, 8.908422980528034 ], "wc_documentation_avg": [ 5.4, 3.6110940170535577 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 227.4, 78.7263615315734 ], "wc_reply_reviewers_avg": [ 25.6, 32.9460164511584 ], "wc_reply_authors_avg": [ 694.6, 519.7028381681208 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.9432422182837988, "gs_citation": 89, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8342626543629394820&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "ntu.edu.sg;u-tokyo.ac.jp;idea.edu.cn;sensetime.com;cityu.edu;sensetime.com;ntu.edu.sg;sensetime.com;ntu.edu.sg;ntu.edu.sg;sensetime.com;ntu.edu.sg;idea.edu.cn", "author_num": 13, "aff_unique_index": "0;1;2;3;4;3;0;3;0;0;3;0;2", "aff_unique_norm": "Nanyang Technological University;University of Tokyo;International Digital Economy Academy;SenseTime;City University of Hong Kong", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.u-tokyo.ac.jp;;https://www.sensetime.com;https://www.cityu.edu.hk", "aff_unique_abbr": "NTU;UTokyo;;SenseTime;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;3;3;0;3;0;0;3;0", "aff_country_unique": "Singapore;Japan;;China" }, { "title": "Deconstructing Data Reconstruction: Multiclass, Weight Decay and General Losses", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70523", "id": "nA9Fh3HFHJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a1d20cc72a21ef971d7e49a90d8fa56f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nA9Fh3HFHJ", "openreview": "https://openreview.net/forum?id=nA9Fh3HFHJ", "poster": "/media/PosterPDFs/NeurIPS%202023/70523.png?t=1701376722.2692254", "slides": "https://nips.cc/virtual/2023/poster/70523", "video": "https://nips.cc/virtual/2023/poster/70523", "author_site": "Gon Buzaglo, Niv Haim, Gilad Yehudai, Gal Vardi, Yakir Oz, Yaniv Nikankin, Michal Irani", "tldr": "", "abstract": "Memorization of training data is an active research area, yet our understanding of the inner workings of neural networks is still in its infancy.\nRecently, Haim et al. 2022 proposed a scheme to reconstruct training samples from multilayer perceptron binary classifiers, effectively demonstrating that a large portion of training samples are encoded in the parameters of such networks.\nIn this work, we extend their findings in several directions, including reconstruction from multiclass and convolutional neural networks. \nWe derive a more general reconstruction scheme which is applicable to a wider range of loss functions such as regression losses. \nMoreover, we study the various factors that contribute to networks' susceptibility to such reconstruction schemes. \nIntriguingly, we observe that using weight decay during training increases reconstructability both in terms of quantity and quality. \nAdditionally, we examine the influence of the number of neurons relative to the number of training samples on the reconstructability.\nCode: https://github.com/gonbuzaglo/decoreco", "keywords": "memorization;data reconstruction;implicit bias", "primary_area": "", "supplementary_material": "/attachment/b91021246bbf1bf26f7a923874fbe1bb32a64494.pdf", "author": "Gon Buzaglo;Niv Haim;Gilad Yehudai;Gal Vardi;Yakir Oz;Yaniv Nikankin;michal Irani", "authorids": "~Gon_Buzaglo1;~Niv_Haim1;~Gilad_Yehudai2;~Gal_Vardi1;~Yakir_Oz1;~Yaniv_Nikankin1;~michal_Irani1", "gender": ";;M;M;;M;F", "homepage": "https://www.buzaglo.me/;https://nivha.github.io/;;https://sites.google.com/view/galvardi/home;;;http://www.weizmann.ac.il/math/irani/", "dblp": "346/4869.html;232/3047;239/4344;https://dblp.uni-trier.de/pid/167/9638.html;;334/2234;04/3190", "google_scholar": "YZHL8N0AAAAJ;https://scholar.google.co.il/citations?user=f7SCiakAAAAJ;opVT1qkAAAAJ;https://scholar.google.co.il/citations?hl=en;;iNg1bngAAAAJ;https://scholar.google.co.uk/citations?user=5hJNWakAAAAJ", "orcid": ";0000-0002-6537-1297;;;;0000-0003-3425-2540;", "linkedin": "gonbuzaglo;niv-haim-736b3b5b/;;;yakir-oz-443aab172;yaniv-nikankin-79628aa2/;", "or_profile": "~Gon_Buzaglo1;~Niv_Haim1;~Gilad_Yehudai2;~Gal_Vardi1;~Yakir_Oz1;~Yaniv_Nikankin1;~michal_Irani1", "aff": "Technion - Israel Institute of Technology, Technion;Weizmann Institute of Science;Weizmann Institute of Science;Toyota Technological Institute at Chicago;Weizmann Institute of Science;Weizmann Institute of Science;Google", "aff_domain": "technion.ac.il;weizmann.ac.il;weizmann.ac.il;ttic.edu;weizmann.ac.il;weizmann.ac.il;google.com", "position": "Undergrad student;PhD student;PhD student;Postdoc;MS student;MS student;Researcher", "bibtex": "@inproceedings{\nbuzaglo2023deconstructing,\ntitle={Deconstructing Data Reconstruction: Multiclass, Weight Decay and General Losses},\nauthor={Gon Buzaglo and Niv Haim and Gilad Yehudai and Gal Vardi and Yakir Oz and Yaniv Nikankin and michal Irani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nA9Fh3HFHJ}\n}", "github": "", "project": "", "reviewers": "ZiTy;FQAU;nrvg;Poqu", "pdf_size": 5788917, "rating": "4;5;7;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "82;42;70;60", "wc_strengths": "62;35;96;93", "wc_weaknesses": "223;137;161;103", "wc_questions": "87;2;69;16", "wc_limitations": "20;5;110;40", "wc_review": "474;221;506;312", "wc_reply_reviewers": "671;44;90;90", "wc_reply_authors": "801;0;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 63.5, 14.654350889752845 ], "wc_strengths_avg": [ 71.5, 24.924887161229034 ], "wc_weaknesses_avg": [ 156.0, 43.829214001622255 ], "wc_questions_avg": [ 43.5, 35.42950747611375 ], "wc_limitations_avg": [ 43.75, 40.21426985536353 ], "wc_review_avg": [ 378.25, 116.83829637580308 ], "wc_reply_reviewers_avg": [ 223.75, 258.9018877876328 ], "wc_reply_authors_avg": [ 200.25, 346.84317421566766 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9254074112549543966&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "technion.ac.il;weizmann.ac.il;weizmann.ac.il;ttic.edu;weizmann.ac.il;weizmann.ac.il;google.com", "author_num": 7, "aff_unique_index": "0;1;1;2;1;1;3", "aff_unique_norm": "Technion - Israel Institute of Technology;Weizmann Institute of Science;Toyota Technological Institute at Chicago;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.technion.ac.il;https://www.weizmann.org.il;https://www.tti-chicago.org;https://www.google.com", "aff_unique_abbr": "Technion;Weizmann;TTI Chicago;Google", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Chicago;Mountain View", "aff_country_unique_index": "0;0;0;1;0;0;1", "aff_country_unique": "Israel;United States" }, { "title": "Training Transitive and Commutative Multimodal Transformers with LoReTTa", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70522", "id": "nArzDm353Y", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/860a092bb4d9d81d3133a01c50c01578-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nArzDm353Y", "openreview": "https://openreview.net/forum?id=nArzDm353Y", "poster": "/media/PosterPDFs/NeurIPS%202023/70522.png?t=1701446824.8727643", "slides": "https://nips.cc/virtual/2023/poster/70522", "video": "https://nips.cc/virtual/2023/poster/70522", "author_site": "Manuel Tran, Yashin Dicente Cid, Amal Lahiani, Fabian Theis, Tingying Peng, Eldad Klaiman", "tldr": "", "abstract": "Training multimodal foundation models is challenging due to the limited availability of multimodal datasets. While many public datasets pair images with text, few combine images with audio or text with audio. Even rarer are datasets that align all three modalities at once. Critical domains such as healthcare, infrastructure, or transportation are particularly affected by missing modalities. This makes it difficult to integrate all modalities into a large pre-trained neural network that can be used out-of-the-box or fine-tuned for different downstream tasks. We introduce LoReTTa ($\\textbf{L}$inking m$\\textbf{O}$dalities with a t$\\textbf{R}$ansitive and commutativ$\\textbf{E}$ pre-$\\textbf{T}$raining s$\\textbf{T}$r$\\textbf{A}$tegy) to address this understudied problem. Our self-supervised framework unifies causal modeling and masked modeling with the rules of commutativity and transitivity. This allows us to transition within and between modalities. As a result, our pre-trained models are better at exploring the true underlying joint probability distribution. Given a dataset containing only the disjoint combinations $(A, B)$ and $(B, C)$, LoReTTa can model the relation $A \\leftrightarrow C$ with $A \\leftrightarrow B \\leftrightarrow C$. In particular, we show that a transformer pre-trained with LoReTTa can handle any mixture of modalities at inference time, including the never-seen pair $(A, C)$ and the triplet $(A, B, C)$. We extensively evaluate our approach on a synthetic, medical, and reinforcement learning dataset. Across different domains, our universal multimodal transformer consistently outperforms strong baselines such as GPT, BERT, and CLIP on tasks involving the missing modality tuple.", "keywords": "generative pre-training;causal modeling;masked modeling;commutative modeling;transitive modeling;multimodal learning", "primary_area": "", "supplementary_material": "/attachment/b6904ce08fffb40ac281644e4061a01147cc4097.pdf", "author": "Manuel Tran;Yashin Dicente Cid;Amal Lahiani;Fabian J Theis;Tingying Peng;Eldad Klaiman", "authorids": "~Manuel_Tran2;~Yashin_Dicente_Cid3;~Amal_Lahiani1;~Fabian_J_Theis1;~Tingying_Peng1;~Eldad_Klaiman1", "gender": ";M;;F;M;", "homepage": "https://github.com/manuel-tran;;;https://www.helmholtz.ai/themenmenue/our-research/research-groups/peng-group/index.html;;https://www.helmholtz-munich.de/en/icb/pi/fabian-theis", "dblp": "315/9601;159/1631;220/4029;02/11511;220/3930;t/FabianJTheis", "google_scholar": "https://scholar.google.de/citations?user=pqxK6yEAAAAJ;;fKR6eloAAAAJ;https://scholar.google.de/citations?user=jUiKc6QAAAAJ;;sqWpn2AAAAAJ", "orcid": "0009-0004-9431-754X;0000-0001-7742-5363;;0000-0002-7881-1749;;0000-0002-2419-1943", "linkedin": ";;amal-lahiani/;;eldadk/;", "or_profile": "~Manuel_Tran2;~Yashin_Dicente_Cid3;~Amal_Lahiani1;~Tingying_Peng1;~Eldad_Klaiman1;~Fabian_J._Theis1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Roche Diagnostics;Roche Diagnostics GmbH;Helmholtz Zentrum Muenchen;Roche Diagnostics GmbH;Technical University Munich", "aff_domain": "tum.de;roche.com;roche.com;helmholtz-muenchen.de;roche.com;tum.de", "position": "PhD student;Senior Software Engineer;Principal Software Engineer;Principal Researcher;Senior SW Engineering Manager ;Full Professor", "bibtex": "@inproceedings{\ntran2023training,\ntitle={Training Transitive and Commutative Multimodal Transformers with LoRe{TT}a},\nauthor={Manuel Tran and Yashin Dicente Cid and Amal Lahiani and Fabian J Theis and Tingying Peng and Eldad Klaiman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nArzDm353Y}\n}", "github": "", "project": "", "reviewers": "a22R;u4o4;SR2A;FSDR;QEx4;7Goz", "pdf_size": 1722480, "rating": "4;5;6;6;6;7", "confidence": "3;3;4;2;4;3", "soundness": "2;1;4;2;2;4", "novelty": "2;2;4;2;3;3", "presentation": "2;2;4;3;4;2", "wc_summary": "76;135;155;63;224;87", "wc_strengths": "24;225;64;34;68;83", "wc_weaknesses": "289;294;182;116;654;100", "wc_questions": "65;11;76;5;20;2", "wc_limitations": "10;17;14;1;34;13", "wc_review": "464;682;491;219;1000;285", "wc_reply_reviewers": "0;181;444;0;912;162", "wc_reply_authors": "0;711;1655;0;1717;219", "reply_reviewers": "0;2;3;0;4;1", "reply_authors": "1;2;5;1;5;3", "rating_avg": [ 5.666666666666667, 0.9428090415820632 ], "confidence_avg": [ 3.1666666666666665, 0.6871842709362768 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 2.6666666666666665, 0.7453559924999298 ], "presentation_avg": [ 2.8333333333333335, 0.8975274678557507 ], "wc_summary_avg": [ 123.33333333333333, 55.48773638281606 ], "wc_strengths_avg": [ 83.0, 66.62331924083838 ], "wc_weaknesses_avg": [ 272.5, 186.50982994648476 ], "wc_questions_avg": [ 29.833333333333332, 29.469852768926796 ], "wc_limitations_avg": [ 14.833333333333334, 9.92331709773613 ], "wc_review_avg": [ 523.5, 260.24779858178755 ], "wc_reply_reviewers_avg": [ 283.1666666666667, 318.0107003790211 ], "wc_reply_authors_avg": [ 717.0, 725.2656984397742 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.4907119849998596 ], "reply_authors_avg": [ 2.8333333333333335, 1.674979270186815 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.08574929257125441, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14256302617605614703&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "tum.de;roche.com;roche.com;helmholtz-muenchen.de;roche.com;tum.de", "author_num": 6, "aff_unique_index": "0;1;1;2;1;3", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Roche Diagnostics;Helmholtz Zentrum M\u00fcnchen;Technical University of Munich", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tum.de;https://www.roche-diagnostics.com;https://www.helmholtz-muenchen.de;https://www.tum.de", "aff_unique_abbr": "TUM;;HMGU;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "Germany;Switzerland" }, { "title": "Predicting Global Label Relationship Matrix for Graph Neural Networks under Heterophily", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70521", "id": "nBFMCyEi0j", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/23aa2163dea287441ebebc1295d5b3fc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nBFMCyEi0j", "openreview": "https://openreview.net/forum?id=nBFMCyEi0j", "poster": "/media/PosterPDFs/NeurIPS%202023/70521.png?t=1699538331.9989963", "slides": "https://nips.cc/virtual/2023/poster/70521", "video": "https://nips.cc/virtual/2023/poster/70521", "author_site": "Langzhang Liang, Xiangjing Hu, Zenglin Xu, Zixing Song, Irwin King", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have been shown to achieve remarkable performance on node classification tasks by exploiting both graph structures and node features. The majority of existing GNNs rely on the implicit homophily assumption. Recent studies have demonstrated that GNNs may struggle to model heterophilous graphs where nodes with different labels are more likely connected. To address this issue, we propose a generic GNN applicable to both homophilous and heterophilous graphs, namely Low-Rank Graph Neural Network (LRGNN). Our analysis demonstrates that a signed graph's global label relationship matrix has a low rank. This insight inspires us to predict the label relationship matrix by solving a robust low-rank matrix approximation problem, as prior research has proven that low-rank approximation could achieve perfect recovery under certain conditions. The experimental results reveal that the solution bears a strong resemblance to the label relationship matrix, presenting two advantages for graph modeling: a block diagonal structure and varying distributions of within-class and between-class entries.", "keywords": "graph neural networks;heterophily problem;global label relationship matrix", "primary_area": "", "supplementary_material": "/attachment/d695f9a5f6a63bd2227cc0b3196df7f7ebe866e4.pdf", "author": "Langzhang Liang;Xiangjing Hu;Zenglin Xu;Zixing Song;Irwin King", "authorids": "~Langzhang_Liang1;~Xiangjing_Hu1;~Zenglin_Xu1;~Zixing_Song2;~Irwin_King1", "gender": "M;;M;;M", "homepage": "https://orcid.org/0000-0001-8919-0215;https://github.com/Starry-Hu;https://faculty.fudan.edu.cn/xuzenglin/en/index.htm;;https://www.cse.cuhk.edu.hk/irwin.king/", "dblp": "304/3069;;68/1538;;k/IrwinKing", "google_scholar": "Gq2LVnIAAAAJ;;gF0H9nEAAAAJ;;MXvC7tkAAAAJ", "orcid": "0000-0001-8919-0215;;0000-0001-5550-6461;;0000-0001-8106-6447", "linkedin": ";;;;irwinking/", "or_profile": "~Langzhang_Liang1;~Xiangjing_Hu1;~Zenglin_Xu1;~Zixing_Song2;~Irwin_King1", "aff": "Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology;Harbin Institute of Technology Shenzhen;;The Chinese University of Hong Kong", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn;;cuhk.edu.hk", "position": "MS student;MS student;Full Professor;;Full Professor", "bibtex": "@inproceedings{\nliang2023predicting,\ntitle={Predicting Global Label Relationship Matrix for Graph Neural Networks under Heterophily},\nauthor={Langzhang Liang and Xiangjing Hu and Zenglin Xu and Zixing Song and Irwin King},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nBFMCyEi0j}\n}", "github": "", "project": "", "reviewers": "B8tM;jP2E;rmTT;mJPn;JZ4m", "pdf_size": 2686517, "rating": "5;5;5;6;7", "confidence": "4;4;5;4;2", "soundness": "3;3;2;3;3", "novelty": "3;2;2;2;3", "presentation": "3;3;3;3;4", "wc_summary": "69;70;33;134;130", "wc_strengths": "98;67;16;27;75", "wc_weaknesses": "149;176;343;145;11", "wc_questions": "1;182;4;117;126", "wc_limitations": "1;1;4;1;1", "wc_review": "318;496;400;424;343", "wc_reply_reviewers": "0;67;20;286;0", "wc_reply_authors": "0;117;33;709;0", "reply_reviewers": "0;1;1;2;0", "reply_authors": "1;2;2;3;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 87.2, 38.95330537964654 ], "wc_strengths_avg": [ 56.6, 30.611109094575458 ], "wc_weaknesses_avg": [ 164.8, 105.99698108908574 ], "wc_questions_avg": [ 86.0, 71.73004949113029 ], "wc_limitations_avg": [ 1.6, 1.2000000000000002 ], "wc_review_avg": [ 396.2, 62.7579477038566 ], "wc_reply_reviewers_avg": [ 74.6, 108.49811058262719 ], "wc_reply_authors_avg": [ 171.8, 271.9863231855602 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.867527617235709, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10353823195653528426&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "hit.edu.cn;hit.edu.cn;hit.edu.cn;;cuhk.edu.hk", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Harbin Institute of Technology;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://en.hhit.edu.cn/;https://www.cuhk.edu.hk", "aff_unique_abbr": "HIT;CUHK", "aff_campus_unique_index": "0;1;0;2", "aff_campus_unique": "Shenzhen;Harbin;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "The Equivalence of Dynamic and Strategic Stability under Regularized Learning in Games", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70520", "id": "nCLdsEzZBV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f7e8bc4c853e3e58bc487e213c79c587-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nCLdsEzZBV", "openreview": "https://openreview.net/forum?id=nCLdsEzZBV", "poster": "/media/PosterPDFs/NeurIPS%202023/70520.png?t=1702391972.156183", "slides": "https://nips.cc/virtual/2023/poster/70520", "video": "https://nips.cc/virtual/2023/poster/70520", "author_site": "Victor Boone, Panayotis Mertikopoulos", "tldr": "", "abstract": "In this paper, we examine the long-run behavior of regularized, no-regret learning in finite N-player games. A well-known result in the field states that the empirical frequencies of play under no-regret learning converge to the game\u2019s set of coarse correlated equilibria; however, our understanding of how the players' _actual strategies_ evolve over time is much more limited \u2013 and, in many cases, non-existent. This issue is exacerbated further by a series of recent results showing that _only_ strict Nash equilibria are stable and attracting under regularized learning, thus making the relation between learning and _pointwise_ solution concepts particularly elusive. In lieu of this, we take a more general approach and instead seek to characterize the _setwise_ rationality properties of the players' day-to-day trajectory of play. To do so, we focus on one of the most stringent criteria of setwise strategic stability, namely that any unilateral deviation from the set in question incurs a cost to the deviator \u2013 a property known as _closedness under better replies_ (club). In so doing, we obtain a remarkable equivalence between strategic and dynamic stability: _a product of pure strategies is closed under better replies if and only if its span is stable and attracting under regularized learning._ In addition, we estimate the rate of convergence to such sets, and we show that methods based on entropic regularization (like the exponential weights algorithm) converge at a geometric rate, while projection-based methods converge within a finite number of iterations, even with bandit, payoff-based feedback.", "keywords": "Regularized learning;dynamic stability;strategic stability;Nash equilibrium", "primary_area": "", "supplementary_material": "/attachment/ff055ed9385c11c9f3d5eab88b6c80e779402653.pdf", "author": "Victor Boone;Panayotis Mertikopoulos", "authorids": "~Victor_Boone1;~Panayotis_Mertikopoulos1", "gender": "M;M", "homepage": "https://victor-boone.github.io/;http://polaris.imag.fr/panayotis.mertikopoulos/", "dblp": "249/9447.html;49/6721", "google_scholar": ";xsusqPYAAAAJ", "orcid": ";0000-0003-2026-9616", "linkedin": ";", "or_profile": "~Victor_Boone1;~Panayotis_Mertikopoulos1", "aff": "Universit\u00e9 Grenoble Alpes;French National Center for Scientific Research", "aff_domain": "univ-grenoble-alpes.fr;imag.fr", "position": "PhD student;Principal Researcher", "bibtex": "@inproceedings{\nboone2023the,\ntitle={The Equivalence of Dynamic and Strategic Stability under Regularized Learning in Games},\nauthor={Victor Boone and Panayotis Mertikopoulos},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nCLdsEzZBV}\n}", "github": "", "project": "", "reviewers": "7Hjy;vfB1;nBFX", "pdf_size": 3028497, "rating": "6;7;7", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "29;79;72", "wc_strengths": "34;80;87", "wc_weaknesses": "50;70;9", "wc_questions": "58;106;30", "wc_limitations": "1;11;21", "wc_review": "172;346;219", "wc_reply_reviewers": "12;112;15", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 60.0, 22.105806175452337 ], "wc_strengths_avg": [ 67.0, 23.50886357667394 ], "wc_weaknesses_avg": [ 43.0, 25.39028685672272 ], "wc_questions_avg": [ 64.66666666666667, 31.382939455839523 ], "wc_limitations_avg": [ 11.0, 8.16496580927726 ], "wc_review_avg": [ 245.66666666666666, 73.49527573630536 ], "wc_reply_reviewers_avg": [ 46.333333333333336, 46.44949467492144 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2200132025952660063&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 12, "email": "univ-grenoble-alpes.fr;imag.fr", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Universit\u00e9 Grenoble Alpes;French National Center for Scientific Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.univ-grenoble-alpes.fr;https://www.cnrs.fr", "aff_unique_abbr": "UGA;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "FouriDown: Factoring Down-Sampling into Shuffling and Superposing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70519", "id": "nCwStXFDQu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2dae7d1ccf1edf76f8ce7c282bdf4730-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nCwStXFDQu", "openreview": "https://openreview.net/forum?id=nCwStXFDQu", "poster": "/media/PosterPDFs/NeurIPS%202023/70519.png?t=1699864397.3198793", "slides": "https://nips.cc/virtual/2023/poster/70519", "video": "https://nips.cc/virtual/2023/poster/70519", "author_site": "Qi Zhu, man zhou, Jie Huang, Naishan Zheng, Hongzhi Gao, Chongyi Li, Yuan Xu, Feng Zhao", "tldr": "", "abstract": "Spatial down-sampling techniques, such as strided convolution, Gaussian, and Nearest down-sampling, are essential in deep neural networks. In this study, we revisit the working mechanism of the spatial down-sampling family and analyze the biased effects caused by the static weighting strategy employed in previous approaches. To overcome this limitation, we propose a novel down-sampling paradigm in the Fourier domain, abbreviated as FouriDown, which unifies existing down-sampling techniques. Drawing inspiration from the signal sampling theorem, we parameterize the non-parameter static weighting down-sampling operator as a learnable and context-adaptive operator within a unified Fourier function. Specifically, we organize the corresponding frequency positions of the 2D plane in a physically-closed manner within a single channel dimension. We then perform point-wise channel shuffling based on an indicator that determines whether a channel's signal frequency bin is susceptible to aliasing, ensuring the consistency of the weighting parameter learning. FouriDown, as a generic operator, comprises four key components: 2D discrete Fourier transform, context shuffling rules, Fourier weighting-adaptively superposing rules, and 2D inverse Fourier transform. These components can be easily integrated into existing image restoration networks. To demonstrate the efficacy of FouriDown, we conduct extensive experiments on image de-blurring and low-light image enhancement. The results consistently show that FouriDown can provide significant performance improvements. We will make the code publicly available to facilitate further exploration and application of FouriDown.", "keywords": "Image restoration;Down-Sampling;Fourier transform", "primary_area": "", "supplementary_material": "", "author": "Qi Zhu;Man Zhou;Jie Huang;Naishan Zheng;Hongzhi Gao;Chongyi Li;Yuan Xu;Feng Zhao", "authorids": "~Qi_Zhu9;~Man_Zhou4;~Jie_Huang4;~Naishan_Zheng1;~Hongzhi_Gao1;~Chongyi_Li1;~Yuan_Xu3;~Feng_Zhao6", "gender": "M;;M;M;M;;M;M", "homepage": ";;;;https://github.com/Irvingao;;;https://bivlab123.github.io/", "dblp": ";;;324/4929;;;89/3127;181/2734-4", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;aL_WRTkAAAAJ;;;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?hl=en", "orcid": "0000-0002-1545-1854;;0000-0002-3518-3404;0000-0002-7451-8780;;;;0000-0001-6767-8105", "linkedin": ";;;;;;;", "or_profile": "~Qi_Zhu9;~Man_Zhou4;~Jie_Huang4;~Naishan_Zheng1;~Hongzhi_Gao1;~Chongyi_Li1;~Yuan_Xu3;~Feng_Zhao6", "aff": "University of Science and Technology of China;;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;;Nanyang Technological University;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;ntu.edu.sg;ustc.edu.cn", "position": "PhD student;;PhD student;PhD student;MS student;;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzhu2023fouridown,\ntitle={FouriDown: Factoring Down-Sampling into Shuffling and Superposing},\nauthor={Qi Zhu and Man Zhou and Jie Huang and Naishan Zheng and Hongzhi Gao and Chongyi Li and Yuan Xu and Feng Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nCwStXFDQu}\n}", "github": "", "project": "", "reviewers": "e5Db;k1g9;fgKa;V6WG;rAiK", "pdf_size": 46804119, "rating": "2;6;6;7;8", "confidence": "4;4;4;5;5", "soundness": "3;3;3;4;4", "novelty": "1;3;3;4;4", "presentation": "2;3;3;4;4", "wc_summary": "73;82;94;42;59", "wc_strengths": "24;86;52;106;61", "wc_weaknesses": "148;235;21;117;89", "wc_questions": "4;41;50;4;98", "wc_limitations": "4;43;1;17;7", "wc_review": "253;487;218;286;314", "wc_reply_reviewers": "176;186;0;13;9", "wc_reply_authors": "411;697;0;0;0", "reply_reviewers": "1;2;0;1;1", "reply_authors": "2;3;1;1;1", "rating_avg": [ 5.8, 2.039607805437114 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 1.0954451150103321 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 70.0, 18.077610461562667 ], "wc_strengths_avg": [ 65.8, 28.23047998175022 ], "wc_weaknesses_avg": [ 122.0, 70.37044834303673 ], "wc_questions_avg": [ 39.4, 34.8 ], "wc_limitations_avg": [ 14.4, 15.278743403827423 ], "wc_review_avg": [ 311.6, 93.40364018602274 ], "wc_reply_reviewers_avg": [ 76.8, 85.24177379665443 ], "wc_reply_authors_avg": [ 221.6, 286.0759339755793 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.6805446536716203, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8678180654468392918&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;ntu.edu.sg;ustc.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "University of Science and Technology of China;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.ntu.edu.sg", "aff_unique_abbr": "USTC;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;Singapore" }, { "title": "Private Distribution Learning with Public Data: The View from Sample Compression", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70518", "id": "nDIrJmKPd5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1687466683649e8bdcdec0e3f5c8de64-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nDIrJmKPd5", "openreview": "https://openreview.net/forum?id=nDIrJmKPd5", "poster": "/media/PosterPDFs/NeurIPS%202023/70518.png?t=1702139738.1395063", "slides": "https://nips.cc/virtual/2023/poster/70518", "video": "https://nips.cc/virtual/2023/poster/70518", "author_site": "Shai Ben-David, Alex Bie, Cl\u00e9ment L Canonne, Gautam Kamath, Vikrant Singhal", "tldr": "", "abstract": "We study the problem of private distribution learning with access to public data. In this setup, which we refer to as *public-private learning*, the learner is given public and private samples drawn from an unknown distribution $p$ belonging to a class $\\mathcal Q$, with the goal of outputting an estimate of $p$ while adhering to privacy constraints (here, pure differential privacy) only with respect to the private samples. \n \nWe show that the public-private learnability of a class $\\mathcal Q$ is connected to the existence of a sample compression scheme for $\\mathcal Q$, as well as to an intermediate notion we refer to as \\emph{list learning}. Leveraging this connection: (1) approximately recovers previous results on Gaussians over $\\mathbb R^d$; and (2) leads to new ones, including sample complexity upper bounds for arbitrary $k$-mixtures of Gaussians over $\\mathbb R^d$, results for agnostic and distribution-shift resistant learners, as well as closure properties for public-private learnability under taking mixtures and products of distributions. Finally, via the connection to list learning, we show that for Gaussians in $\\mathbb R^d$, at least $d$ public samples are necessary for private learnability, which is close to the known upper bound of $d+1$ public samples.", "keywords": "differential privacy;distribution learning;gaussians;mixture of gaussians;compression schemes;robust compression schemes;privacy", "primary_area": "", "supplementary_material": "", "author": "Shai Ben-David;Alex Bie;Clement Louis Canonne;Gautam Kamath;Vikrant Singhal", "authorids": "~Shai_Ben-David2;~Alex_Bie1;~Clement_Louis_Canonne1;~Gautam_Kamath1;~Vikrant_Singhal2", "gender": "M;;M;M;M", "homepage": "https://cs.uwaterloo.ca/~shai/;;https://ccanonne.github.io/;http://www.gautamkamath.com/;https://www.vikrantsinghal.com/", "dblp": "15/6319;;28/9840L;73/11140;181/0834", "google_scholar": "https://scholar.google.com.tw/citations?user=kezPqwoAAAAJ;;u_OXsBIAAAAJ;MK6zHkYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-7153-5211;;", "linkedin": ";;;;vikrantsinghal/", "or_profile": "~Shai_Ben-David2;~Alex_Bie1;~Clement_Louis_Canonne1;~Gautam_Kamath1;~Vikrant_Singhal2", "aff": "University of Waterloo;;University of Sydney;University of Waterloo;University of Waterloo", "aff_domain": "uwaterloo.ca;;sydney.edu.au;uwaterloo.ca;uwaterloo.ca", "position": "Full Professor;;Lecturer;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nben-david2023private,\ntitle={Private Distribution Learning with Public Data: The View from Sample Compression},\nauthor={Shai Ben-David and Alex Bie and Clement Louis Canonne and Gautam Kamath and Vikrant Singhal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nDIrJmKPd5}\n}", "github": "", "project": "", "reviewers": "rJcv;cweb;PbeY;7w9P", "pdf_size": 526528, "rating": "6;7;7;8", "confidence": "4;3;3;3", "soundness": "4;4;4;3", "novelty": "3;4;3;3", "presentation": "3;4;3;4", "wc_summary": "202;435;487;49", "wc_strengths": "138;245;52;39", "wc_weaknesses": "154;103;117;4", "wc_questions": "177;41;178;154", "wc_limitations": "12;28;10;1", "wc_review": "683;852;844;247", "wc_reply_reviewers": "764;24;10;0", "wc_reply_authors": "123;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 293.25, 177.21226678760135 ], "wc_strengths_avg": [ 118.5, 82.34834546000302 ], "wc_weaknesses_avg": [ 94.5, 55.472966389043954 ], "wc_questions_avg": [ 137.5, 56.53538714822779 ], "wc_limitations_avg": [ 12.75, 9.730750228014282 ], "wc_review_avg": [ 656.5, 245.85005592840528 ], "wc_reply_reviewers_avg": [ 199.5, 326.0256891718811 ], "wc_reply_authors_avg": [ 30.75, 53.26056233274298 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7937097520194482709&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uwaterloo.ca;;sydney.edu.au;uwaterloo.ca;uwaterloo.ca", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Waterloo;University of Sydney", "aff_unique_dep": ";", "aff_unique_url": "https://uwaterloo.ca;https://www.sydney.edu.au", "aff_unique_abbr": "UW;USYD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Canada;Australia" }, { "title": "Contrastive Training of Complex-Valued Autoencoders for Object Discovery", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70517", "id": "nF6X3u0FaA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2439ec22091b9d6cfbebf3284b40116e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nF6X3u0FaA", "openreview": "https://openreview.net/forum?id=nF6X3u0FaA", "poster": "/media/PosterPDFs/NeurIPS%202023/70517.png?t=1699451757.7305546", "slides": "https://nips.cc/virtual/2023/poster/70517", "video": "https://nips.cc/virtual/2023/poster/70517", "author_site": "Aleksandar Stani\u0107, Anand Gopalakrishnan, Kazuki Irie, J\u00fcrgen Schmidhuber", "tldr": "", "abstract": "Current state-of-the-art object-centric models use slots and attention-based routing for binding. However, this class of models has several conceptual limitations: the number of slots is hardwired; all slots have equal capacity; training has high computational cost; there are no object-level relational factors within slots. Synchrony-based models in principle can address these limitations by using complex-valued activations which store binding information in their phase components. However, working examples of such synchrony-based models have been developed only very recently, and are still limited to toy grayscale datasets and simultaneous storage of less than three objects in practice. Here we introduce architectural modifications and a novel contrastive learning method that greatly improve the state-of-the-art synchrony-based model. For the first time, we obtain a class of synchrony-based models capable of discovering objects in an unsupervised manner in multi-object color datasets and simultaneously representing more than three objects.", "keywords": "object-centric learning;complex-valued networks;unsupervised learning;temporal correlation hypothesis", "primary_area": "", "supplementary_material": "/attachment/6b026fc53842d016ac45d6caa6edb6362ede9cee.pdf", "author": "Aleksandar Stani\u0107;Anand Gopalakrishnan;Kazuki Irie;J\u00fcrgen Schmidhuber", "authorids": "~Aleksandar_Stani\u01071;~Anand_Gopalakrishnan1;~Kazuki_Irie1;~J\u00fcrgen_Schmidhuber1", "gender": "M;;M;M", "homepage": "https://agopal42.github.io/;https://sites.harvard.edu/kazuki-irie/;http://people.idsia.ch/~juergen/;http://astanic.github.io/", "dblp": "191/1040;148/9667;s/JurgenSchmidhuber;180/5949", "google_scholar": "SsbgJ1UAAAAJ;https://scholar.google.de/citations?user=-gZ-BdwAAAAJ;https://scholar.google.ch/citations?user=gLnCTgIAAAAJ;tx0opKcAAAAJ", "orcid": ";0000-0003-0923-691X;;", "linkedin": ";;;", "or_profile": "~Anand_Gopalakrishnan1;~Kazuki_Irie1;~J\u00fcrgen_Schmidhuber1;~Aleksandar_Stanic1", "aff": "Dalle Molle Institute for Artificial Intelligence Research;The Swiss AI Lab IDSIA, Dalle Molle Institute for Artificial Intelligence Research;IDSIA;The Swiss AI Lab - IDSIA", "aff_domain": "idsia.ch;idsia.ch;idsia.ch;idsia.ch", "position": "PhD student;Postdoc;Scientific Director;PhD student", "bibtex": "@inproceedings{\nstani{\\'c}2023contrastive,\ntitle={Contrastive Training of Complex-Valued Autoencoders for Object Discovery},\nauthor={Aleksandar Stani{\\'c} and Anand Gopalakrishnan and Kazuki Irie and J{\\\"u}rgen Schmidhuber},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nF6X3u0FaA}\n}", "github": "", "project": "", "reviewers": "bBXA;QpC7;BSdH;pFdh", "pdf_size": 6067624, "rating": "5;6;7;8", "confidence": "4;4;3;4", "soundness": "2;1;3;4", "novelty": "3;3;3;4", "presentation": "4;3;2;4", "wc_summary": "125;81;33;132", "wc_strengths": "61;124;31;94", "wc_weaknesses": "217;481;81;211", "wc_questions": "45;91;130;18", "wc_limitations": "25;4;17;37", "wc_review": "473;781;292;492", "wc_reply_reviewers": "213;959;55;21", "wc_reply_authors": "560;2524;0;0", "reply_reviewers": "1;4;1;1", "reply_authors": "2;7;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 92.75, 39.65081966365891 ], "wc_strengths_avg": [ 77.5, 34.88911004883902 ], "wc_weaknesses_avg": [ 247.5, 145.3504385958295 ], "wc_questions_avg": [ 71.0, 42.91270208225066 ], "wc_limitations_avg": [ 20.75, 12.007809958522827 ], "wc_review_avg": [ 509.5, 175.11210694866304 ], "wc_reply_reviewers_avg": [ 312.0, 380.5062417359274 ], "wc_reply_authors_avg": [ 771.0, 1037.5948149446392 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 2.48746859276655 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=119964014399174303&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "idsia.ch;idsia.ch;idsia.ch;idsia.ch", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Dalle Molle Institute for Artificial Intelligence Research;IDSIA;Institute of Digital Technologies", "aff_unique_dep": "Artificial Intelligence Research;Swiss AI Lab;", "aff_unique_url": "http://www.dallemolle.ch/;https://www.idsia.ch/;https://www.idsia.ch", "aff_unique_abbr": "DMI;IDSIA;IDSIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Label Correction of Crowdsourced Noisy Annotations with an Instance-Dependent Noise Transition Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70516", "id": "nFEQNYsjQO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/015a8c69bedcb0a7b2ed2e1678f34399-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nFEQNYsjQO", "openreview": "https://openreview.net/forum?id=nFEQNYsjQO", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70516", "video": "https://nips.cc/virtual/2023/poster/70516", "author_site": "Hui GUO, Boyu Wang, Grace Yi", "tldr": "", "abstract": "The predictive ability of supervised learning algorithms hinges on the quality of annotated examples, whose labels often come from multiple crowdsourced annotators with diverse expertise. To aggregate noisy crowdsourced annotations, many existing methods employ an annotator-specific instance-independent noise transition matrix to characterize the labeling skills of each annotator. Learning an instance-dependent noise transition model, however, is challenging and remains relatively less explored. To address this problem, in this paper, we formulate the noise transition model in a Bayesian framework and subsequently design a new label correction algorithm. Specifically, we approximate the instance-dependent noise transition matrices using a Bayesian network with a hierarchical spike and slab prior. To theoretically characterize the distance between the noise transition model and the true instance-dependent noise transition matrix, we provide a posterior-concentration theorem that ensures the posterior consistency in terms of the Hellinger distance. We further formulate the label correction process as a hypothesis testing problem and propose a novel algorithm to infer the true label from the noisy annotations based on the pairwise likelihood ratio test. Moreover, we establish an information-theoretic bound on the Bayes error for the proposed method. We validate the effectiveness of our approach through experiments on benchmark and real-world datasets.", "keywords": "Noisy Label;Instance-Dependent Transition Matrix;Label Correction;Crowdsourcing", "primary_area": "", "supplementary_material": "", "author": "Hui Guo;Boyu Wang;Grace Yi", "authorids": "~Hui_Guo5;~Boyu_Wang3;~Grace_Yi1", "gender": "F;M;F", "homepage": "https://github.com/hguo1728;https://sites.google.com/site/borriewang/;http://fisher.stats.uwo.ca/faculty/yyi/", "dblp": ";41/6565-4.html;", "google_scholar": ";qAZM5KcAAAAJ;", "orcid": ";0000-0002-7413-4162;", "linkedin": ";;", "or_profile": "~Hui_Guo5;~Boyu_Wang3;~Grace_Yi1", "aff": "University of Western Ontario;University of Western Ontario;University of Western Ontario", "aff_domain": "uwo.ca;uwo.ca;uwo.ca", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nguo2023label,\ntitle={Label Correction of Crowdsourced Noisy Annotations with an Instance-Dependent Noise Transition Model},\nauthor={Hui Guo and Boyu Wang and Grace Yi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nFEQNYsjQO}\n}", "github": "", "project": "", "reviewers": "i77Y;wDkZ;ysQF;uiTb;PGxM", "pdf_size": 2263008, "rating": "3;5;6;6;6", "confidence": "4;3;3;3;2", "soundness": "2;3;3;3;4", "novelty": "2;2;3;3;3", "presentation": "3;3;2;3;3", "wc_summary": "71;62;68;91;111", "wc_strengths": "46;60;77;99;118", "wc_weaknesses": "477;109;84;70;189", "wc_questions": "2;65;134;44;26", "wc_limitations": "31;40;134;1;1", "wc_review": "627;336;497;305;445", "wc_reply_reviewers": "460;59;0;0;0", "wc_reply_authors": "1876;697;0;0;0", "reply_reviewers": "1;1;0;0;0", "reply_authors": "4;2;1;1;1", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 80.6, 18.05103875127412 ], "wc_strengths_avg": [ 80.0, 25.96150997149434 ], "wc_weaknesses_avg": [ 185.8, 151.31344950135795 ], "wc_questions_avg": [ 54.2, 44.96398558846847 ], "wc_limitations_avg": [ 41.4, 48.88598981303334 ], "wc_review_avg": [ 442.0, 115.98620607641237 ], "wc_reply_reviewers_avg": [ 103.8, 179.55990643793507 ], "wc_reply_authors_avg": [ 514.6, 732.2730638225061 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 1.1661903789690604 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8134892168199606, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15974392709863555842&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "uwo.ca;uwo.ca;uwo.ca", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Western Ontario", "aff_unique_dep": "", "aff_unique_url": "https://www.uwo.ca", "aff_unique_abbr": "UWO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Regret-Optimal Model-Free Reinforcement Learning for Discounted MDPs with Short Burn-In Time", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70515", "id": "nFsbQHFmj2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ff887781480973bd3cb6026feb378d1e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nFsbQHFmj2", "openreview": "https://openreview.net/forum?id=nFsbQHFmj2", "poster": "/media/PosterPDFs/NeurIPS%202023/70515.png?t=1701917470.549807", "slides": "https://nips.cc/virtual/2023/poster/70515", "video": "https://nips.cc/virtual/2023/poster/70515", "author_site": "Xiang Ji, Gen Li", "tldr": "", "abstract": "A crucial problem in reinforcement learning is learning the optimal policy. We study this in tabular infinite-horizon discounted Markov decision processes under the online setting. The existing algorithms either fail to achieve regret optimality or have to incur a high memory and computational cost. In addition, existing optimal algorithms all require a long burn-in time in order to achieve optimal sample efficiency, i.e., their optimality is not guaranteed unless sample size surpasses a high threshold. We address both open problems by introducing a model-free algorithm that employs variance reduction and a novel technique that switches the execution policy in a slow-yet-adaptive manner. This is the first regret-optimal model-free algorithm in the discounted setting, with the additional benefit of a low burn-in time.", "keywords": "reinforcement learning theory;regret minimization;minimax optimality", "primary_area": "", "supplementary_material": "", "author": "Xiang Ji;Gen Li", "authorids": "~Xiang_Ji3;~Gen_Li2", "gender": ";M", "homepage": ";", "dblp": ";28/538-5.html", "google_scholar": "oCcK0LoAAAAJ;https://scholar.google.com/citations?view_op=list_works", "orcid": ";0000-0002-3078-9191", "linkedin": ";", "or_profile": "~Xiang_Ji3;~Gen_Li2", "aff": "Princeton University;The Wharton School, University of Pennsylvania", "aff_domain": "princeton.edu;wharton.upenn.edu", "position": "PhD student;Postdoc", "bibtex": "@inproceedings{\nji2023regretoptimal,\ntitle={Regret-Optimal Model-Free Reinforcement Learning for Discounted {MDP}s with Short Burn-In Time},\nauthor={Xiang Ji and Gen Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nFsbQHFmj2}\n}", "github": "", "project": "", "reviewers": "Th7P;wb6F;zms9;pMQU;an4o", "pdf_size": 378365, "rating": "6;6;6;7;8", "confidence": "3;3;4;4;4", "soundness": "3;3;3;4;4", "novelty": "3;3;2;3;4", "presentation": "3;3;2;4;3", "wc_summary": "54;70;81;26;22", "wc_strengths": "70;44;28;138;6", "wc_weaknesses": "47;124;108;73;8", "wc_questions": "30;87;132;273;36", "wc_limitations": "68;16;1;4;8", "wc_review": "269;341;350;514;80", "wc_reply_reviewers": "14;13;1838;16;15", "wc_reply_authors": "0;79;2944;0;15", "reply_reviewers": "1;1;6;1;1", "reply_authors": "1;3;11;1;2", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 50.6, 23.388886249669948 ], "wc_strengths_avg": [ 57.2, 45.4770271675711 ], "wc_weaknesses_avg": [ 72.0, 41.76601489249364 ], "wc_questions_avg": [ 111.6, 88.820267957263 ], "wc_limitations_avg": [ 19.4, 24.816123790793757 ], "wc_review_avg": [ 310.8, 140.53810871076925 ], "wc_reply_reviewers_avg": [ 379.2, 729.4006854946053 ], "wc_reply_authors_avg": [ 607.6, 1168.5643499610965 ], "reply_reviewers_avg": [ 2.0, 2.0 ], "reply_authors_avg": [ 3.6, 3.773592452822642 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6123724356957947, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14759349054911337407&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "princeton.edu;wharton.upenn.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Princeton University;University of Pennsylvania", "aff_unique_dep": ";The Wharton School", "aff_unique_url": "https://www.princeton.edu;https://www.wharton.upenn.edu", "aff_unique_abbr": "Princeton;UPenn Wharton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "What Truly Matters in Trajectory Prediction for Autonomous Driving?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70514", "id": "nG35q8pNL9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e197fe307eb3467035f892dc100d570a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nG35q8pNL9", "openreview": "https://openreview.net/forum?id=nG35q8pNL9", "poster": "/media/PosterPDFs/NeurIPS%202023/70514.png?t=1701838485.9851322", "slides": "https://nips.cc/virtual/2023/poster/70514", "video": "https://nips.cc/virtual/2023/poster/70514", "author_site": "Tran Phong, Haoran Wu, Cunjun Yu, Panpan Cai, Sifa Zheng, David Hsu", "tldr": "", "abstract": "Trajectory prediction plays a vital role in the performance of autonomous driving systems, and prediction accuracy, such as average displacement error (ADE) or final displacement error (FDE), is widely used as a performance metric. However, a significant disparity exists between the accuracy of predictors on fixed datasets and driving performance when the predictors are used downstream for vehicle control, because of a dynamics gap. In the real world, the prediction algorithm influences the behavior of the ego vehicle, which, in turn, influences the behaviors of other vehicles nearby. This interaction results in predictor-specific dynamics that directly impacts prediction results. In fixed datasets, since other vehicles' responses are predetermined, this interaction effect is lost, leading to a significant dynamics gap. This paper studies the overlooked significance of this dynamics gap. We also examine several other factors contributing to the disparity between prediction performance and driving performance. The findings highlight the trade-off between the predictor's computational efficiency and prediction accuracy in determining real-world driving performance. In summary, an interactive, task-driven evaluation protocol for trajectory prediction is crucial to capture its effectiveness for autonomous driving. Source code along with experimental settings is available online (https://whatmatters23.github.io/).", "keywords": "trajectory prediction; autonomous driving", "primary_area": "", "supplementary_material": "/attachment/05c0573f95dda09cd5474a6a4cb29adec0e5a90c.pdf", "author": "Tran Phong;Haoran Wu;Cunjun Yu;Panpan Cai;Sifa Zheng;David Hsu", "authorids": "~Tran_Phong1;~Haoran_Wu9;~Cunjun_Yu1;~Panpan_Cai1;~Sifa_Zheng1;~David_Hsu1", "gender": "M;M;Unspecified;F;M;M", "homepage": "https://tpvt99.github.io;https://wuhaoran111.github.io/;;https://cindycia.github.io/;http://www.svm.tsinghua.edu.cn/essay/80/1835.html;http://www.comp.nus.edu.sg/~dyhsu/", "dblp": "294/7874.html;19/4036;232/3014;215/4265;;29/331", "google_scholar": "-MjdFtAAAAAJ;5hmsPUYAAAAJ;4xwyGM8AAAAJ;https://scholar.google.com.sg/citations?user=MZfL0qUAAAAJ;;S9LHLKEAAAAJ", "orcid": ";;;;0000-0001-5160-1365;0000-0002-2309-4535", "linkedin": ";;;;;david-hsu-a86200a1/", "or_profile": "~Tran_Phong1;~Haoran_Wu9;~Cunjun_Yu1;~Panpan_Cai1;~Sifa_Zheng1;~David_Hsu1", "aff": "Singapore Management University;Tsinghua University;National University of Singapore;Shanghai Jiaotong University;Tsinghua University;National University of Singapore", "aff_domain": "smu.edu.sg;mail.tsinghua.edu.cn;u.nus.edu;sjtu.edu.cn;tsinghua.edu.cn;nus.edu.sg", "position": "Researcher;PhD student;PhD student;Associate Professor;Full Professor;Professor", "bibtex": "@inproceedings{\nphong2023what,\ntitle={What Truly Matters in Trajectory Prediction for Autonomous Driving?},\nauthor={Tran Phong and Haoran Wu and Cunjun Yu and Panpan Cai and Sifa Zheng and David Hsu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nG35q8pNL9}\n}", "github": "", "project": "", "reviewers": "RCyQ;Bibu;Aygk;tS95;1Qcn", "pdf_size": 847310, "rating": "4;4;4;5;7", "confidence": "3;5;3;3;4", "soundness": "3;2;2;3;3", "novelty": "2;2;3;2;4", "presentation": "3;2;2;3;3", "wc_summary": "382;66;70;62;95", "wc_strengths": "56;52;71;16;75", "wc_weaknesses": "292;330;364;56;81", "wc_questions": "124;118;6;31;424", "wc_limitations": "58;48;6;2;6", "wc_review": "912;614;517;167;681", "wc_reply_reviewers": "732;573;294;0;362", "wc_reply_authors": "782;914;702;0;421", "reply_reviewers": "2;2;1;0;2", "reply_authors": "3;3;2;1;3", "rating_avg": [ 4.8, 1.16619037896906 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 135.0, 124.03547879538338 ], "wc_strengths_avg": [ 54.0, 20.890189084831185 ], "wc_weaknesses_avg": [ 224.6, 129.71599747139902 ], "wc_questions_avg": [ 140.6, 149.15173482061815 ], "wc_limitations_avg": [ 24.0, 23.933240482642546 ], "wc_review_avg": [ 578.2, 243.35274808392856 ], "wc_reply_reviewers_avg": [ 392.2, 250.01151973459142 ], "wc_reply_authors_avg": [ 563.8, 324.8423617695205 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 2.4, 0.8 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.12862393885688164, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4000477621479237605&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 4, "email": "smu.edu.sg;mail.tsinghua.edu.cn;u.nus.edu;sjtu.edu.cn;tsinghua.edu.cn;nus.edu.sg", "author_num": 6, "aff_unique_index": "0;1;2;3;1;2", "aff_unique_norm": "Singapore Management University;Tsinghua University;National University of Singapore;Shanghai Jiao Tong University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.smu.edu.sg;https://www.tsinghua.edu.cn;https://www.nus.edu.sg;https://www.sjtu.edu.cn", "aff_unique_abbr": "SMU;THU;NUS;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1;0", "aff_country_unique": "Singapore;China" }, { "title": "$H$-Consistency Bounds: Characterization and Extensions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70513", "id": "nI7EmXq2PL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0e441913d4fa486c3eec967d79750b13-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nI7EmXq2PL", "openreview": "https://openreview.net/forum?id=nI7EmXq2PL", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70513", "video": "https://nips.cc/virtual/2023/poster/70513", "author_site": "Anqi Mao, Mehryar Mohri, Yutao Zhong", "tldr": "", "abstract": "A series of recent publications by Awasthi et al. have introduced the key notion of *$H$-consistency bounds* for surrogate loss functions. These are upper bounds on the zero-one estimation error of any predictor in a hypothesis set, expressed in terms of its surrogate loss estimation error. They are both non-asymptotic and hypothesis set-specific and thus stronger and more informative than Bayes-consistency. However, determining if they hold and deriving these bounds have required a specific proof and analysis for each surrogate loss. Can we derive more general tools and characterizations? This paper provides both a general characterization and an extension of $H$-consistency bounds for multi-class classification. We present new and tight $H$-consistency bounds for both the family of constrained losses and that of comp-sum losses, which covers the familiar cross-entropy, or logistic loss applied to the outputs of a neural network. We further extend our analysis beyond the completeness assumptions adopted in previous studies and cover more realistic bounded hypothesis sets. Our characterizations are based on error transformations, which are explicitly defined for each formulation. We illustrate the application of our general results through several special examples. A by-product of our analysis is the observation that a recently derived multi-class $H$-consistency bound for cross-entropy reduces to an excess bound and is not significant. Instead, we prove a much stronger and more significant guarantee.", "keywords": "consistency;H-consistency;characterization;learning theory", "primary_area": "", "supplementary_material": "/attachment/e3ef412ffc8746969636c883299976ed3bfbb60f.pdf", "author": "Anqi Mao;Mehryar Mohri;Yutao Zhong", "authorids": "~Anqi_Mao1;~Mehryar_Mohri2;~Yutao_Zhong1", "gender": "F;M;", "homepage": "https://anqi-mao.github.io;https://cs.nyu.edu/~mohri/;", "dblp": "241/6864;03/5448;51/3178-2", "google_scholar": "nkjIZ-oAAAAJ;ktwwLjsAAAAJ;", "orcid": ";;", "linkedin": ";mehryar-mohri-3737b981/;", "or_profile": "~Anqi_Mao1;~Mehryar_Mohri2;~Yutao_Zhong1", "aff": "Courant Institute of Mathematical Sciences, NYU;Google Research;Google", "aff_domain": "cims.nyu.edu;google.com;google.com", "position": "PhD student;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nmao2023hconsistency,\ntitle={\\$H\\$-Consistency Bounds: Characterization and Extensions},\nauthor={Anqi Mao and Mehryar Mohri and Yutao Zhong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nI7EmXq2PL}\n}", "github": "", "project": "", "reviewers": "VFQY;3BV1;PRWE;C2H4;AHgy;x2ot", "pdf_size": 481547, "rating": "4;5;6;6;7;7", "confidence": "3;1;3;4;3;2", "soundness": "3;2;3;3;3;3", "novelty": "2;2;3;3;3;3", "presentation": "1;2;2;3;3;3", "wc_summary": "37;34;75;79;53;141", "wc_strengths": "13;16;95;38;210;46", "wc_weaknesses": "221;20;215;30;13;3", "wc_questions": "10;101;144;140;27;18", "wc_limitations": "10;8;11;14;16;5", "wc_review": "291;179;540;301;319;213", "wc_reply_reviewers": "85;0;0;0;23;5", "wc_reply_authors": "421;0;0;0;0;0", "reply_reviewers": "2;0;0;0;1;1", "reply_authors": "2;1;1;1;1;1", "rating_avg": [ 5.833333333333333, 1.0671873729054746 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.7453559924999298 ], "wc_summary_avg": [ 69.83333333333333, 36.10363170405745 ], "wc_strengths_avg": [ 69.66666666666667, 68.29999186595037 ], "wc_weaknesses_avg": [ 83.66666666666667, 95.34440495149965 ], "wc_questions_avg": [ 73.33333333333333, 56.89659236037095 ], "wc_limitations_avg": [ 10.666666666666666, 3.636237371545238 ], "wc_review_avg": [ 307.1666666666667, 115.43023385963008 ], "wc_reply_reviewers_avg": [ 18.833333333333332, 30.68885067179212 ], "wc_reply_authors_avg": [ 70.16666666666667, 156.89743642123526 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.74535599249993 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.11043152607484659, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9927993322177290800&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cims.nyu.edu;google.com;google.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "New York University;Google", "aff_unique_dep": "Courant Institute of Mathematical Sciences;Google Research", "aff_unique_url": "https://www.courant.nyu.edu;https://research.google", "aff_unique_abbr": "NYU;Google Research", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "New York;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "PromptRestorer: A Prompting Image Restoration Method with Degradation Perception", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70512", "id": "nIaNgaQvsV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1c364d98a5cdc426fd8c76fbb2c10e34-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nIaNgaQvsV", "openreview": "https://openreview.net/forum?id=nIaNgaQvsV", "poster": "/media/PosterPDFs/NeurIPS%202023/70512.png?t=1699532193.8812706", "slides": "https://nips.cc/virtual/2023/poster/70512", "video": "https://nips.cc/virtual/2023/poster/70512", "author_site": "Cong Wang, Jinshan Pan, Wei Wang, Jiangxin Dong, Mengzhu Wang, Yakun Ju, Junyang Chen", "tldr": "", "abstract": "We show that raw degradation features can effectively guide deep restoration models, providing accurate degradation priors to facilitate better restoration. While networks that do not consider them for restoration forget gradually degradation during the learning process, model capacity is severely hindered. To address this, we propose a Prompting image Restorer, termed as PromptRestorer. Specifically, PromptRestorer contains two branches: a restoration branch and a prompting branch. The former is used to restore images, while the latter perceives degradation priors to prompt the restoration branch with reliable perceived content to guide the restoration process for better recovery. To better perceive the degradation which is extracted by a pre-trained model from given degradation observations, we propose a prompting degradation perception modulator, which adequately considers the characters of the self-attention mechanism and pixel-wise modulation, to better perceive the degradation priors from global and local perspectives. To control the propagation of the perceived content for the restoration branch, we propose gated degradation perception propagation, enabling the restoration branch to adaptively learn more useful features for better recovery. Extensive experimental results show that our PromptRestorer achieves state-of-the-art results on 4 image restoration tasks, including image deraining, deblurring, dehazing, and desnowing.", "keywords": "Degradation Vanishing;Prompting Learning;Image Restoration", "primary_area": "", "supplementary_material": "/attachment/d545ba641d27fe3390f1277336c8875de5c4cfcc.pdf", "author": "Cong Wang;Jinshan Pan;Wei Wang;Jiangxin Dong;Mengzhu Wang;Yakun Ju;Junyang Chen", "authorids": "~Cong_Wang11;~Jinshan_Pan1;~Wei_Wang83;~Jiangxin_Dong1;~Mengzhu_Wang1;~Yakun_Ju1;~Junyang_Chen1", "gender": ";;M;;;M;M", "homepage": ";https://jspan.github.io/;;;;https://juyakun.github.io/;https://csse.szu.edu.cn/pages/user/index?id=1101", "dblp": ";06/10816;35/7092-335.html;;;221/9647;196/7893.html", "google_scholar": ";https://scholar.google.it/citations?user=CMsNjGIAAAAJ;;;;hE10pMYAAAAJ;Q0u3dRQAAAAJ", "orcid": ";;;;;0000-0003-4065-4108;0000-0002-1139-8654", "linkedin": ";;;;;;", "or_profile": "~Cong_Wang11;~Jinshan_Pan1;~Wei_Wang83;~Jiangxin_Dong1;~Mengzhu_Wang1;~Yakun_Ju1;~Junyang_Chen1", "aff": ";Nanjing University of Science and Technology;Dalian University of Technology;;;Hong Kong Polytechnic University;Shenzhen University", "aff_domain": ";njust.edu.cn;dlut.edu.cn;;;polyu.edu.hk;szu.edu", "position": ";Full Professor;PhD student;;;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nwang2023promptrestorer,\ntitle={PromptRestorer: A Prompting Image Restoration Method with Degradation Perception},\nauthor={Cong Wang and Jinshan Pan and Wei Wang and Jiangxin Dong and Mengzhu Wang and Yakun Ju and Junyang Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nIaNgaQvsV}\n}", "github": "", "project": "", "reviewers": "TXGv;NMEt;zfG4;Hjzg;24Ac;T7gZ", "pdf_size": 4387180, "rating": "3;4;6;7;7;8", "confidence": "4;5;4;5;5;5", "soundness": "2;2;3;4;4;3", "novelty": "2;2;3;4;4;4", "presentation": "3;3;2;4;3;4", "wc_summary": "60;114;73;100;61;75", "wc_strengths": "22;117;62;91;67;94", "wc_weaknesses": "222;290;142;43;83;84", "wc_questions": "4;3;67;2;2;7", "wc_limitations": "1;11;30;11;2;16", "wc_review": "309;535;374;247;215;276", "wc_reply_reviewers": "213;680;57;13;11;22", "wc_reply_authors": "514;2050;0;0;0;0", "reply_reviewers": "3;3;1;1;1;1", "reply_authors": "3;6;1;1;1;1", "rating_avg": [ 5.833333333333333, 1.7716909687891083 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.1666666666666665, 0.8975274678557507 ], "presentation_avg": [ 3.1666666666666665, 0.6871842709362768 ], "wc_summary_avg": [ 80.5, 19.956202043475106 ], "wc_strengths_avg": [ 75.5, 30.059662894539144 ], "wc_weaknesses_avg": [ 144.0, 86.53130454735249 ], "wc_questions_avg": [ 14.166666666666666, 23.688370329950708 ], "wc_limitations_avg": [ 11.833333333333334, 9.685326817178424 ], "wc_review_avg": [ 326.0, 105.937088248954 ], "wc_reply_reviewers_avg": [ 166.0, 240.2970384059418 ], "wc_reply_authors_avg": [ 427.3333333333333, 749.5569061489299 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.1666666666666665, 1.8633899812498247 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5321520841901913, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13792761849795000557&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";njust.edu.cn;dlut.edu.cn;;;polyu.edu.hk;szu.edu", "author_num": 7, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Nanjing University of Science and Technology;Dalian University of Technology;Hong Kong Polytechnic University;Shenzhen University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.nust.edu.cn/;http://www.dlut.edu.cn/;https://www.polyu.edu.hk;https://www.szu.edu.cn", "aff_unique_abbr": "NUST;DUT;PolyU;SZU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Evaluating Robustness and Uncertainty of Graph Models Under Structural Distributional Shifts", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70511", "id": "nJFJcgjnGo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eec7fee9a8595ca964b9a11562767345-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nJFJcgjnGo", "openreview": "https://openreview.net/forum?id=nJFJcgjnGo", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70511", "video": "https://nips.cc/virtual/2023/poster/70511", "author_site": "Gleb Bazhenov, Denis Kuznedelev, Andrey Malinin, Artem Babenko, Liudmila Prokhorenkova", "tldr": "", "abstract": "In reliable decision-making systems based on machine learning, models have to be robust to distributional shifts or provide the uncertainty of their predictions. In node-level problems of graph learning, distributional shifts can be especially complex since the samples are interdependent. To evaluate the performance of graph models, it is important to test them on diverse and meaningful distributional shifts. However, most graph benchmarks considering distributional shifts for node-level problems focus mainly on node features, while structural properties are also essential for graph problems. In this work, we propose a general approach for inducing diverse distributional shifts based on graph structure. We use this approach to create data splits according to several structural node properties: popularity, locality, and density. In our experiments, we thoroughly evaluate the proposed distributional shifts and show that they can be quite challenging for existing graph models. We also reveal that simple models often outperform more sophisticated methods on the considered structural shifts. Finally, our experiments provide evidence that there is a trade-off between the quality of learned representations for the base classification task under structural distributional shift and the ability to separate the nodes from different distributions using these representations.", "keywords": "graph;distributional shift;structural shift;uncertainty;robustness;graph neural networks", "primary_area": "", "supplementary_material": "", "author": "Gleb Bazhenov;Denis Kuznedelev;Andrey Malinin;Artem Babenko;Liudmila Prokhorenkova", "authorids": "~Gleb_Bazhenov1;~Denis_Kuznedelev1;~Andrey_Malinin1;~Artem_Babenko1;~Liudmila_Prokhorenkova1", "gender": "M;M;M;M;F", "homepage": ";https://github.com/Godofnothing;;;", "dblp": "322/8649.html;322/8616;174/5705;117/4834;45/11468", "google_scholar": "DLt-B68AAAAJ;;;q885d1wAAAAJ;https://scholar.google.ru/citations?user=6JyZlSEAAAAJ", "orcid": ";0009-0005-2420-9620;;0000-0002-1830-8252;", "linkedin": "bazhenov-gleb/;;;;", "or_profile": "~Gleb_Bazhenov1;~Denis_Kuznedelev1;~Andrey_Malinin1;~Artem_Babenko1;~Liudmila_Prokhorenkova1", "aff": "Skolkovo Institute of Science and Technology;;Yandex;Yandex;Yandex", "aff_domain": "skoltech.ru;;yandex.ru;yandex-team.ru;yandex-team.ru", "position": "MS student;;Principal Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nbazhenov2023evaluating,\ntitle={Evaluating Robustness and Uncertainty of Graph Models Under Structural Distributional Shifts},\nauthor={Gleb Bazhenov and Denis Kuznedelev and Andrey Malinin and Artem Babenko and Liudmila Prokhorenkova},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nJFJcgjnGo}\n}", "github": "", "project": "", "reviewers": "2ynW;gSyv;E4Gb;AZW5", "pdf_size": 16673848, "rating": "4;6;7;7", "confidence": "4;4;5;5", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;4;3", "wc_summary": "51;71;34;135", "wc_strengths": "53;59;22;241", "wc_weaknesses": "221;121;92;161", "wc_questions": "52;6;36;344", "wc_limitations": "1;8;21;166", "wc_review": "378;265;205;1047", "wc_reply_reviewers": "223;67;143;0", "wc_reply_authors": "641;31;42;0", "reply_reviewers": "2;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.75, 38.25163395202877 ], "wc_strengths_avg": [ 93.75, 86.1666263700744 ], "wc_weaknesses_avg": [ 148.75, 48.37548449369784 ], "wc_questions_avg": [ 109.5, 136.39189858638966 ], "wc_limitations_avg": [ 49.0, 67.9301111437336 ], "wc_review_avg": [ 473.75, 336.7442464244935 ], "wc_reply_reviewers_avg": [ 108.25, 83.35878777909382 ], "wc_reply_authors_avg": [ 178.5, 267.46822241156053 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8164965809277259, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11074659852284538603&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "skoltech.ru;;yandex.ru;yandex-team.ru;yandex-team.ru", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Skolkovo Institute of Science and Technology;Yandex", "aff_unique_dep": ";", "aff_unique_url": "https://www.skoltech.ru;https://yandex.com", "aff_unique_abbr": "Skoltech;Yandex", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "A Framework for Fast and Stable Representations of Multiparameter Persistent Homology Decompositions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70510", "id": "nKCUDd9GYu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/702b67152ec4435795f681865b67999c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nKCUDd9GYu", "openreview": "https://openreview.net/forum?id=nKCUDd9GYu", "poster": "/media/PosterPDFs/NeurIPS%202023/70510.png?t=1699883427.336034", "slides": "https://nips.cc/virtual/2023/poster/70510", "video": "https://nips.cc/virtual/2023/poster/70510", "author_site": "David Loiseaux, Mathieu Carri\u00e8re, Andrew Blumberg", "tldr": "", "abstract": "Topological data analysis (TDA) is an area of data science that focuses on using invariants from algebraic topology to provide multiscale shape descriptors for geometric data sets such as point clouds. One of the most important such descriptors is persistent homology, which encodes the change in shape as a filtration parameter changes; a typical parameter is the feature scale. For many data sets, it is useful to simultaneously vary multiple filtration parameters, for example feature scale and density. While the theoretical properties of single parameter persistent homology are well understood, less is known about the multiparameter case. A central question is the problem of representing multiparameter persistent homology by elements of a vector space for integration with standard machine learning algorithms. Existing approaches to this problem either ignore most of the multiparameter information to reduce to the one-parameter case or are heuristic and potentially unstable in the face of noise. In this article, we introduce a new general representation framework that leverages recent results on decompositions of multiparameter persistent homology. This framework is rich in information, fast to compute, and encompasses previous approaches. Moreover, we establish theoretical stability guarantees under this framework as well as efficient algorithms for practical computation, making this framework an applicable and versatile tool for analyzing geometric and point cloud data. We validate our stability results and algorithms with numerical experiments that demonstrate statistical convergence, prediction accuracy, and fast running times on several real data sets.", "keywords": "Topological Data Analysis;Multiparameter Persistent Homology;Kernel Methods;Convergence Rate;Statistical Learning", "primary_area": "", "supplementary_material": "/attachment/e8008b538ace8e9ea8a2d325844b8c83a3a318a8.zip", "author": "David Loiseaux;Mathieu Carri\u00e8re;Andrew Blumberg", "authorids": "~David_Loiseaux1;~Mathieu_Carri\u00e8re1;~Andrew_Blumberg1", "gender": "M;;M", "homepage": "https://davidlapous.github.io/;https://mathieucarriere.github.io/website/;https://ajblumberg.github.io", "dblp": "322/2006;167/1015;93/1054", "google_scholar": "oAjKKKcAAAAJ;;", "orcid": "0009-0003-5559-3712;;", "linkedin": "david-loiseaux/;;", "or_profile": "~David_Loiseaux1;~Mathieu_Carri\u00e8re1;~Andrew_J._Blumberg1", "aff": "INRIA;INRIA;Columbia University", "aff_domain": "inria.fr;inria.fr;columbia.edu", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nloiseaux2023a,\ntitle={A Framework for Fast and Stable Representations of Multiparameter Persistent Homology Decompositions},\nauthor={David Loiseaux and Mathieu Carri{\\`e}re and Andrew Blumberg},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nKCUDd9GYu}\n}", "github": "", "project": "", "reviewers": "BEmH;pDMj;HVag;MKJT", "pdf_size": 2962623, "rating": "6;6;7;7", "confidence": "3;2;5;3", "soundness": "3;3;4;3", "novelty": "3;3;3;4", "presentation": "4;3;4;4", "wc_summary": "67;80;105;61", "wc_strengths": "104;101;43;52", "wc_weaknesses": "238;38;90;24", "wc_questions": "293;53;44;387", "wc_limitations": "21;1;39;13", "wc_review": "723;273;321;537", "wc_reply_reviewers": "93;8;18;6", "wc_reply_authors": "30;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 78.25, 16.90229274388537 ], "wc_strengths_avg": [ 75.0, 27.703790354390136 ], "wc_weaknesses_avg": [ 97.5, 84.76290462224617 ], "wc_questions_avg": [ 194.25, 149.52487251290336 ], "wc_limitations_avg": [ 18.5, 13.811227316933133 ], "wc_review_avg": [ 463.5, 179.81865865365586 ], "wc_reply_reviewers_avg": [ 31.25, 35.94005425705421 ], "wc_reply_authors_avg": [ 7.5, 12.99038105676658 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6514882558332699177&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "inria.fr;inria.fr;columbia.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "INRIA;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.inria.fr;https://www.columbia.edu", "aff_unique_abbr": "INRIA;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "France;United States" }, { "title": "Provably Efficient Algorithm for Nonstationary Low-Rank MDPs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70509", "id": "nMB41QjLDY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/145c28cd4b1df9b426990fd68045f4f7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nMB41QjLDY", "openreview": "https://openreview.net/forum?id=nMB41QjLDY", "poster": "/media/PosterPDFs/NeurIPS%202023/70509.png?t=1701737259.6236155", "slides": "https://nips.cc/virtual/2023/poster/70509", "video": "https://nips.cc/virtual/2023/poster/70509", "author_site": "Yuan Cheng, Jing Yang, Yingbin Liang", "tldr": "", "abstract": "Reinforcement learning (RL) under changing environment models many real-world applications via nonstationary Markov Decision Processes (MDPs), and hence gains considerable interest. However, theoretical studies on nonstationary MDPs in the literature have mainly focused on tabular and linear (mixture) MDPs, which do not capture the nature of unknown representation in deep RL. In this paper, we make the first effort to investigate nonstationary RL under episodic low-rank MDPs, where both transition kernels and rewards may vary over time, and the low-rank model contains unknown representation in addition to the linear state embedding function. We first propose a parameter-dependent policy optimization algorithm called PORTAL,\nand further improve PORTAL to its parameter-free version of Ada-PORTAL, which is able to tune its hyper-parameters adaptively without any prior knowledge of nonstationarity. For both algorithms, we provide upper bounds on the average dynamic suboptimality gap, which show that as long as the nonstationarity is not significantly large, PORTAL and Ada-PORTAL are sample-efficient and can achieve arbitrarily small average dynamic suboptimality gap with polynomial sample complexity.", "keywords": "Reinforcement Learning;Nonstationary Environment;Representation Learning;Policy Optimization;Statistical Complexity", "primary_area": "", "supplementary_material": "/attachment/be0f60ccd2e94439ef15131ba2778fcd93d8d3fc.pdf", "author": "Yuan Cheng;Jing Yang;Yingbin Liang", "authorids": "~Yuan_Cheng6;~Jing_Yang3;~Yingbin_Liang1", "gender": ";;F", "homepage": ";http://www.ee.psu.edu/yang;https://sites.google.com/view/yingbinliang/home", "dblp": ";;51/332", "google_scholar": "5v47GU0AAAAJ;https://scholar.google.com/citations?hl=en;lGgLAiIAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yuan_Cheng6;~Jing_Yang3;~Yingbin_Liang1", "aff": "University of Science and Technology of China;Pennsylvania State University;The Ohio State University", "aff_domain": "ustc.edu.cn;psu.edu;osu.edu", "position": "MS student;Associate Professor;Professor", "bibtex": "@inproceedings{\ncheng2023provably,\ntitle={Provably Efficient Algorithm for Nonstationary Low-Rank {MDP}s},\nauthor={Yuan Cheng and Jing Yang and Yingbin Liang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nMB41QjLDY}\n}", "github": "", "project": "", "reviewers": "ga5z;FzWn;5jBS;niTs", "pdf_size": 556020, "rating": "3;6;7;7", "confidence": "3;2;3;3", "soundness": "1;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;2", "wc_summary": "19;189;49;82", "wc_strengths": "9;115;65;33", "wc_weaknesses": "122;105;26;50", "wc_questions": "4;77;2;248", "wc_limitations": "4;1;14;61", "wc_review": "158;487;156;474", "wc_reply_reviewers": "0;47;0;55", "wc_reply_authors": "128;0;0;52", "reply_reviewers": "0;1;0;1", "reply_authors": "3;1;1;2", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 84.75, 64.18089669052623 ], "wc_strengths_avg": [ 55.5, 39.68311983702894 ], "wc_weaknesses_avg": [ 75.75, 39.15593824696326 ], "wc_questions_avg": [ 82.75, 100.07840676189845 ], "wc_limitations_avg": [ 20.0, 24.155744658362327 ], "wc_review_avg": [ 318.75, 161.81683317875184 ], "wc_reply_reviewers_avg": [ 25.5, 25.656383221335 ], "wc_reply_authors_avg": [ 45.0, 52.41183072551463 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.08804509063256237, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8949627156735365886&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ustc.edu.cn;psu.edu;osu.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Science and Technology of China;Pennsylvania State University;Ohio State University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.psu.edu;https://www.osu.edu", "aff_unique_abbr": "USTC;PSU;OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "GPT-ST: Generative Pre-Training of Spatio-Temporal Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70508", "id": "nMH5cUaSj8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/de7858e3e7f9f0f7b2c7bfdc86f6d928-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nMH5cUaSj8", "openreview": "https://openreview.net/forum?id=nMH5cUaSj8", "poster": "/media/PosterPDFs/NeurIPS%202023/70508.png?t=1699508437.5875752", "slides": "https://nips.cc/virtual/2023/poster/70508", "video": "https://nips.cc/virtual/2023/poster/70508", "author_site": "Zhonghang Li, Lianghao Xia, Yong Xu, Chao Huang", "tldr": "", "abstract": "In recent years, there has been a rapid development of spatio-temporal prediction techniques in response to the increasing demands of traffic management and travel planning. While advanced end-to-end models have achieved notable success in improving predictive performance, their integration and expansion pose significant challenges. This work aims to address these challenges by introducing a spatio-temporal pre-training framework that seamlessly integrates with downstream baselines and enhances their performance. The framework is built upon two key designs: (i) We propose a spatio-temporal mask autoencoder as a pre-training model for learning spatio-temporal dependencies. The model incorporates customized parameter learners and hierarchical spatial pattern encoding networks. These modules are specifically designed to capture spatio-temporal customized representations and intra- and inter-cluster region semantic relationships, which have often been neglected in existing approaches. (ii) We introduce an adaptive mask strategy as part of the pre-training mechanism. This strategy guides the mask autoencoder in learning robust spatio-temporal representations and facilitates the modeling of different relationships, ranging from intra-cluster to inter-cluster, in an easy-to-hard training manner. Extensive experiments conducted on representative benchmarks demonstrate the effectiveness of our proposed method. We have made our model implementation publicly available at https://github.com/HKUDS/GPT-ST.", "keywords": "Spatial Temporal Prediction;Deep Neural Networks;Pre-training Model", "primary_area": "", "supplementary_material": "/attachment/fad2b93de4bb008b6c92b1c62a9483d22a9fda5b.pdf", "author": "Zhonghang Li;Lianghao Xia;Yong Xu;Chao Huang", "authorids": "~Zhonghang_Li1;~Lianghao_Xia1;~Yong_Xu2;~Chao_Huang7", "gender": "M;M;M;M", "homepage": "https://github.com/LZH-YS1998;https://akaxlh.github.io/;;", "dblp": "258/0356;270/6586;07/4630-7;", "google_scholar": "__9uvQkAAAAJ;fDDjoUEAAAAJ;;Zkv9FqwAAAAJ", "orcid": "0000-0002-3977-1334;0000-0003-0725-2211;;", "linkedin": ";;;", "or_profile": "~Zhonghang_Li1;~Lianghao_Xia1;~Yong_Xu2;~Chao_Huang7", "aff": "South China University of Technology;University of Hong Kong;South China University of Technology;University of Hong Kong", "aff_domain": "scut.edu.cn;hku.hk;scut.edu.cn;hku.hk", "position": "PhD student;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2023generative,\ntitle={Generative Pre-Training of Spatio-Temporal Graph Neural Networks},\nauthor={Zhonghang Li and Lianghao Xia and Yong Xu and Chao Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nMH5cUaSj8}\n}", "github": "", "project": "", "reviewers": "8n4n;H5wT;fFpq;83m7", "pdf_size": 3090104, "rating": "4;5;6;7", "confidence": "1;5;4;4", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "1;2;2;3", "wc_summary": "46;122;78;63", "wc_strengths": "7;44;106;37", "wc_weaknesses": "92;165;384;53", "wc_questions": "143;7;23;2", "wc_limitations": "1;1;8;1", "wc_review": "289;339;599;156", "wc_reply_reviewers": "598;10;20;17", "wc_reply_authors": "1325;5;8;22", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 1.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 77.25, 28.207933281259724 ], "wc_strengths_avg": [ 48.5, 35.98958182585621 ], "wc_weaknesses_avg": [ 173.5, 128.0087887607722 ], "wc_questions_avg": [ 43.75, 57.824627106450066 ], "wc_limitations_avg": [ 2.75, 3.031088913245535 ], "wc_review_avg": [ 345.75, 160.78459969785663 ], "wc_reply_reviewers_avg": [ 161.25, 252.18383671440958 ], "wc_reply_authors_avg": [ 340.0, 568.7262082935865 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5962847939999439, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5931783457040982287&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "scut.edu.cn;hku.hk;scut.edu.cn;hku.hk", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "South China University of Technology;University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.scut.edu.cn;https://www.hku.hk", "aff_unique_abbr": "SCUT;HKU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Memory Efficient Optimizers with 4-bit States", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70507", "id": "nN8TnHB5nw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3122aaa22b2fe83f9cead1a696f65ceb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nN8TnHB5nw", "openreview": "https://openreview.net/forum?id=nN8TnHB5nw", "poster": "/media/PosterPDFs/NeurIPS%202023/70507.png?t=1701592673.1757753", "slides": "https://nips.cc/virtual/2023/poster/70507", "video": "https://nips.cc/virtual/2023/poster/70507", "author_site": "Bingrui Li, Jianfei Chen, Jun Zhu", "tldr": "", "abstract": "Optimizer states are a major source of memory consumption for training neural networks, limiting the maximum trainable model within given memory budget. Compressing the optimizer states from 32-bit floating points to lower bitwidth is promising to reduce the training memory footprint, while the current lowest achievable bitwidth is 8-bit. In this work, we push optimizer states bitwidth down to 4-bit through a detailed empirical analysis of first and second moments. Specifically, we find that moments have complicated outlier patterns, that current block-wise quantization cannot accurately approximate. We use a smaller block size and propose to utilize both row-wise and column-wise information for better quantization. We further identify a zero point problem of quantizing the second moment, and solve this problem with a linear quantizer that excludes the zero point. Our 4-bit optimizers are evaluated on a wide variety of benchmarks including natural language understanding, machine translation, image classification, and instruction tuning. On all the tasks our optimizers can achieve comparable accuracy with their full-precision counterparts, while enjoying better memory efficiency.", "keywords": "memory efficiency;optimizer;Adam;quantization", "primary_area": "", "supplementary_material": "/attachment/1f68049af56e9ad919599339264ef9c1249e5e49.zip", "author": "Bingrui Li;Jianfei Chen;Jun Zhu", "authorids": "~Bingrui_Li1;~Jianfei_Chen1;~Jun_Zhu2", "gender": "M;M;M", "homepage": "https://bingrui-li.github.io;http://ml.cs.tsinghua.edu.cn/~jianfei;http://ml.cs.tsinghua.edu.cn/~jun", "dblp": ";48/6809-1;50/2644-1", "google_scholar": "w-dkujgAAAAJ;di5RZ1MAAAAJ;axsP38wAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Bingrui_Li1;~Jianfei_Chen1;~Jun_Zhu2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn", "position": "PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\nli2023memory,\ntitle={Memory Efficient Optimizers with 4-bit States},\nauthor={Bingrui Li and Jianfei Chen and Jun Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nN8TnHB5nw}\n}", "github": "", "project": "", "reviewers": "rPDD;f2sV;d65g;qs7T", "pdf_size": 12764498, "rating": "5;6;7;8", "confidence": "5;3;3;5", "soundness": "3;2;3;4", "novelty": "3;2;4;4", "presentation": "3;3;2;4", "wc_summary": "54;52;63;102", "wc_strengths": "23;26;57;63", "wc_weaknesses": "38;98;52;10", "wc_questions": "19;1;94;106", "wc_limitations": "15;1;10;6", "wc_review": "149;178;276;287", "wc_reply_reviewers": "62;78;251;0", "wc_reply_authors": "835;219;950;0", "reply_reviewers": "1;2;2;0", "reply_authors": "3;3;4;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 67.75, 20.20365066021485 ], "wc_strengths_avg": [ 42.25, 17.907749719046222 ], "wc_weaknesses_avg": [ 49.5, 31.823733281939127 ], "wc_questions_avg": [ 55.0, 45.645372164108814 ], "wc_limitations_avg": [ 8.0, 5.1478150704935 ], "wc_review_avg": [ 222.5, 60.01041576259908 ], "wc_reply_reviewers_avg": [ 97.75, 93.15142242606926 ], "wc_reply_authors_avg": [ 501.0, 401.1489748210757 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4080914880108526887&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Eliminating Domain Bias for Federated Learning in Representation Space", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70506", "id": "nO5i1XdUS0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2e0d3c6ad1a4d85bef3cfe63af58bc76-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nO5i1XdUS0", "openreview": "https://openreview.net/forum?id=nO5i1XdUS0", "poster": "/media/PosterPDFs/NeurIPS%202023/70506.png?t=1701398175.305869", "slides": "https://nips.cc/virtual/2023/poster/70506", "video": "https://nips.cc/virtual/2023/poster/70506", "author_site": "Jianqing Zhang, Yang Hua, Jian Cao, Hao Wang, Tao Song, Zhengui XUE, Ruhui Ma, Haibing Guan", "tldr": "", "abstract": "Recently, federated learning (FL) is popular for its privacy-preserving and collaborative learning abilities. However, under statistically heterogeneous scenarios, we observe that biased data domains on clients cause a representation bias phenomenon and further degenerate generic representations during local training, i.e., the representation degeneration phenomenon. To address these issues, we propose a general framework Domain Bias Eliminator (DBE) for FL. Our theoretical analysis reveals that DBE can promote bi-directional knowledge transfer between server and client, as it reduces the domain discrepancy between server and client in representation space. Besides, extensive experiments on four datasets show that DBE can greatly improve existing FL methods in both generalization and personalization abilities. The DBE-equipped FL method can outperform ten state-of-the-art personalized FL methods by a large margin. Our code is public at https://github.com/TsingZ0/DBE.", "keywords": "Federated Learning;Personalized Federated Learning;Representation;Knowledge Transfer", "primary_area": "", "supplementary_material": "/attachment/52913fd34ab0abb268c0cc00a1bce0ead6f1fc11.zip", "author": "Jianqing Zhang;Yang Hua;Jian Cao;Hao Wang;Tao Song;Zhengui XUE;Ruhui Ma;Haibing Guan", "authorids": "~Jianqing_Zhang1;~Yang_Hua2;~Jian_Cao1;~Hao_Wang29;~Tao_Song2;~Zhengui_XUE1;~Ruhui_Ma1;~Haibing_Guan1", "gender": "M;M;M;M;M;F;M;M", "homepage": ";https://pure.qub.ac.uk/en/persons/yang-hua;https://www.cs.sjtu.edu.cn/en/PeopleDetail.aspx?id=182;https://www.haow.us;https://www.cs.sjtu.edu.cn/PeopleDetail.aspx?id=424;;https://www.cs.sjtu.edu.cn/PeopleDetail.aspx?id=328;http://www.cs.sjtu.edu.cn/~hbguan/", "dblp": "29/2597;;50/2102;w/HaoWang-22;30/982-3;81/8134;01/5518.html;96/5680.html", "google_scholar": "https://scholar.google.com/citations?hl=en;N0tFi8MAAAAJ;;r-Ik__gAAAAJ;https://scholar.google.com.hk/citations?user=tIjK-3QAAAAJ;9a1vZQsAAAAJ;;", "orcid": ";0000-0001-5536-503X;;0000-0002-1444-2657;;;;", "linkedin": ";;;haowanguoft/;;;;", "or_profile": "~Jianqing_Zhang1;~Yang_Hua2;~Jian_Cao1;~Hao_Wang29;~Tao_Song2;~Zhengui_XUE1;~Ruhui_Ma1;~Haibing_Guan1", "aff": "Shanghai Jiaotong University;Queen's University Belfast;Shanghai Jiaotong University;Louisiana State University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;qub.ac.uk;sjtu.edu.cn;lsu.edu;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Assistant Professor;Full Professor;Assistant Professor;Assistant Professor;Adjunct researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023eliminating,\ntitle={Eliminating Domain Bias for Federated Learning in Representation Space},\nauthor={Jianqing Zhang and Yang Hua and Jian Cao and Hao Wang and Tao Song and Zhengui XUE and Ruhui Ma and Haibing Guan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nO5i1XdUS0}\n}", "github": "", "project": "", "reviewers": "xd4W;FLKZ;8bi2;dEaC", "pdf_size": 47405914, "rating": "5;6;7;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "119;57;129;80", "wc_strengths": "102;50;69;20", "wc_weaknesses": "108;31;52;98", "wc_questions": "30;203;71;13", "wc_limitations": "1;10;11;6", "wc_review": "360;351;332;217", "wc_reply_reviewers": "41;53;159;0", "wc_reply_authors": "18;29;359;0", "reply_reviewers": "1;1;2;0", "reply_authors": "2;2;3;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 96.25, 29.13224158900238 ], "wc_strengths_avg": [ 60.25, 29.768901558505647 ], "wc_weaknesses_avg": [ 72.25, 31.830606340439072 ], "wc_questions_avg": [ 79.25, 74.49286878621335 ], "wc_limitations_avg": [ 7.0, 3.9370039370059056 ], "wc_review_avg": [ 315.0, 57.47608198198621 ], "wc_reply_reviewers_avg": [ 63.25, 58.67015851350668 ], "wc_reply_authors_avg": [ 101.5, 149.02768199230638 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5237553789383122849&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "sjtu.edu.cn;qub.ac.uk;sjtu.edu.cn;lsu.edu;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;2;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Queen's University Belfast;Louisiana State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.qub.ac.uk;https://www.lsu.edu", "aff_unique_abbr": "SJTU;QUB;LSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0;0;0;0", "aff_country_unique": "China;United Kingdom;United States" }, { "title": "Multiclass Boosting: Simple and Intuitive Weak Learning Criteria", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70505", "id": "nQ84YY9Iut", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/050f8591be3874b52fdac4e1060eeb29-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nQ84YY9Iut", "openreview": "https://openreview.net/forum?id=nQ84YY9Iut", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70505", "video": "https://nips.cc/virtual/2023/poster/70505", "author_site": "Nataly Brukhim, Amit Daniely, Yishay Mansour, Shay Moran", "tldr": "", "abstract": "We study a generalization of boosting to the multiclass setting.\nWe introduce a weak learning condition for multiclass classification that captures the original notion of weak learnability as being \u201cslightly better than random guessing\u201d. We give a simple and efficient boosting algorithm, that does not require realizability assumptions and its sample and oracle complexity bounds are independent of the number of classes. \n\nIn addition, we utilize our new boosting technique in several theoretical applications within the context of List PAC Learning. \nFirst, we establish an equivalence to weak PAC learning. Furthermore, we present a new result on boosting for list learners, as well as provide a novel proof for the characterization of multiclass PAC learning and List PAC learning. Notably, our technique gives rise to simplified algorithms and analysis compared to previous works.", "keywords": "Boosting;Multiclass classification;PAC Learning;List PAC Learning", "primary_area": "", "supplementary_material": "/attachment/77d183ef0a2d77574046ada78a8e083803e449e2.pdf", "author": "Nataly Brukhim;Amit Daniely;Yishay Mansour;Shay Moran", "authorids": "~Nataly_Brukhim1;~Amit_Daniely2;~Yishay_Mansour2;~Shay_Moran1", "gender": ";M;M;M", "homepage": "https://www.cs.princeton.edu/~nbrukhim/;https://www.cs.huji.ac.il/~amitd/;http://www.cs.technion.ac.il/~shaymrn/;https://www.cs.tau.ac.il/~mansour/", "dblp": "215/3691;19/7805;119/5111;m/YishayMansour", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=jUtYwE0AAAAJ;kALYnggAAAAJ;OEJUgwkAAAAJ", "orcid": ";;;0000-0001-6891-2645", "linkedin": ";;;", "or_profile": "~Nataly_Brukhim1;~Amit_Daniely2;~Shay_Moran1;~Yishay_Mansour1", "aff": "Princeton University;Google;Google;School of Computer Science, Tel Aviv University", "aff_domain": "princeton.edu;google.com;google.com;cs.tau.ac.il", "position": "PhD student;Researcher;Visiting Faculty;Full Professor", "bibtex": "@inproceedings{\nbrukhim2023multiclass,\ntitle={Multiclass Boosting: Simple and Intuitive Weak Learning Criteria},\nauthor={Nataly Brukhim and Amit Daniely and Yishay Mansour and Shay Moran},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nQ84YY9Iut}\n}", "github": "", "project": "", "reviewers": "Ut8K;Zpmo;bGbo;Rb54", "pdf_size": 425787, "rating": "4;4;7;8", "confidence": "4;1;3;3", "soundness": "3;3;4;4", "novelty": "3;3;3;4", "presentation": "2;2;4;3", "wc_summary": "95;69;143;215", "wc_strengths": "97;37;46;1", "wc_weaknesses": "204;44;47;1", "wc_questions": "27;17;58;1", "wc_limitations": "7;1;59;1", "wc_review": "430;168;353;219", "wc_reply_reviewers": "156;0;196;100", "wc_reply_authors": "164;0;50;129", "reply_reviewers": "1;0;1;2", "reply_authors": "2;1;2;2", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 130.5, 55.54052574472085 ], "wc_strengths_avg": [ 45.25, 34.29559009552103 ], "wc_weaknesses_avg": [ 74.0, 77.2301754497554 ], "wc_questions_avg": [ 25.75, 20.801141795584204 ], "wc_limitations_avg": [ 17.0, 24.372115213907882 ], "wc_review_avg": [ 292.5, 104.2461030446702 ], "wc_reply_reviewers_avg": [ 113.0, 73.61385739111897 ], "wc_reply_authors_avg": [ 85.75, 64.46850006010688 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.22487239817113241, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2852801364681040717&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "princeton.edu;google.com;google.com;cs.tau.ac.il", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Princeton University;Google;Tel Aviv University", "aff_unique_dep": ";Google;School of Computer Science", "aff_unique_url": "https://www.princeton.edu;https://www.google.com;https://www.tau.ac.il", "aff_unique_abbr": "Princeton;Google;TAU", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Mountain View;Tel Aviv", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;Israel" }, { "title": "Interpretability at Scale: Identifying Causal Mechanisms in Alpaca", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70504", "id": "nRfClnMhVX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f6a8b109d4d4fd64c75e94aaf85d9697-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nRfClnMhVX", "openreview": "https://openreview.net/forum?id=nRfClnMhVX", "poster": "/media/PosterPDFs/NeurIPS%202023/70504.png?t=1702054811.9243937", "slides": "https://nips.cc/virtual/2023/poster/70504", "video": "https://nips.cc/virtual/2023/poster/70504", "author_site": "Zhengxuan Wu, Atticus Geiger, Thomas Icard, Christopher Potts, Noah Goodman", "tldr": "", "abstract": "Obtaining human-interpretable explanations of large, general-purpose language models is an urgent goal for AI safety. However, it is just as important that our interpretability methods are faithful to the causal dynamics underlying model behavior and able to robustly generalize to unseen inputs. Distributed Alignment Search (DAS) is a powerful gradient descent method grounded in a theory of causal abstraction that uncovered perfect alignments between interpretable symbolic algorithms and small deep learning models fine-tuned for specific tasks. In the present paper, we scale DAS significantly by replacing the remaining brute-force search steps with learned parameters -- an approach we call Boundless DAS. This enables us to efficiently search for interpretable causal structure in large language models while they follow instructions. We apply Boundless DAS to the Alpaca model (7B parameters), which, off the shelf, solves a simple numerical reasoning problem. With Boundless DAS, we discover that Alpaca does this by implementing a causal model with two interpretable boolean variables. Furthermore, we find that the alignment of neural representations with these variables is robust to changes in inputs and instructions. These findings mark a first step toward deeply understanding the inner-workings of our largest and most widely deployed language models.", "keywords": "Mechanistic Interpretability", "primary_area": "", "supplementary_material": "/attachment/b37b952b334bfdfb4e705bb97d0a7ecf237079ae.zip", "author": "Zhengxuan Wu;Atticus Geiger;Thomas Icard;Christopher Potts;Noah Goodman", "authorids": "~Zhengxuan_Wu1;~Atticus_Geiger1;~Thomas_Icard1;~Christopher_Potts1;~Noah_Goodman1", "gender": "M;M;M;;", "homepage": "https://cs.stanford.edu/~wuzhengx/;https://atticusg.github.io/;http://web.stanford.edu/~cgpotts/;https://cocolab.stanford.edu/;https://web.stanford.edu/~icard/", "dblp": "234/4650;229/4086;13/2617;96/1216;149/3822", "google_scholar": "CBvE6lwAAAAJ;;3j08YoAAAAAJ;OUpIbcQAAAAJ;", "orcid": ";;0000-0002-7978-6055;;", "linkedin": ";;;;", "or_profile": "~Zhengxuan_Wu1;~Atticus_Geiger1;~Christopher_Potts1;~Noah_Goodman1;~Thomas_F_Icard1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwu2023interpretability,\ntitle={Interpretability at Scale: Identifying Causal Mechanisms in Alpaca},\nauthor={Zhengxuan Wu and Atticus Geiger and Thomas Icard and Christopher Potts and Noah Goodman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nRfClnMhVX}\n}", "github": "", "project": "", "reviewers": "8NXa;ExBe;SQSS;iGjk;AVCz;6cS3", "pdf_size": 3484715, "rating": "4;5;6;6;7;8", "confidence": "5;4;2;2;3;3", "soundness": "3;2;3;3;3;4", "novelty": "1;3;2;3;4;4", "presentation": "2;2;3;1;3;4", "wc_summary": "44;92;105;88;205;69", "wc_strengths": "59;88;63;25;137;87", "wc_weaknesses": "105;478;95;51;177;49", "wc_questions": "4;286;62;160;353;91", "wc_limitations": "9;59;86;5;7;1", "wc_review": "221;1003;411;329;879;297", "wc_reply_reviewers": "87;1151;186;39;303;35", "wc_reply_authors": "0;1129;119;0;132;0", "reply_reviewers": "1;6;2;1;1;1", "reply_authors": "1;6;2;1;2;1", "rating_avg": [ 6.0, 1.2909944487358056 ], "confidence_avg": [ 3.1666666666666665, 1.0671873729054748 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 2.8333333333333335, 1.0671873729054748 ], "presentation_avg": [ 2.5, 0.9574271077563381 ], "wc_summary_avg": [ 100.5, 50.585735802364944 ], "wc_strengths_avg": [ 76.5, 34.26246731726521 ], "wc_weaknesses_avg": [ 159.16666666666666, 148.80682854702007 ], "wc_questions_avg": [ 159.33333333333334, 123.70888767137505 ], "wc_limitations_avg": [ 27.833333333333332, 32.62114174716895 ], "wc_review_avg": [ 523.3333333333334, 302.65363408064707 ], "wc_reply_reviewers_avg": [ 300.1666666666667, 391.77392148477105 ], "wc_reply_authors_avg": [ 230.0, 405.9609997688612 ], "reply_reviewers_avg": [ 2.0, 1.8257418583505538 ], "reply_authors_avg": [ 2.1666666666666665, 1.7716909687891083 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6048583789091336, "gs_citation": 97, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12524499658927280166&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Differentiable Clustering with Perturbed Spanning Forests", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70503", "id": "nRfcVBsF9n", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/637a456d89289769ac1ab29617ef7213-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nRfcVBsF9n", "openreview": "https://openreview.net/forum?id=nRfcVBsF9n", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70503", "video": "https://nips.cc/virtual/2023/poster/70503", "author_site": "Lawrence Stewart, Francis Bach, Felipe Llinares-Lopez, Quentin Berthet", "tldr": "", "abstract": "We introduce a differentiable clustering method based on stochastic perturbations of minimum-weight spanning forests. This allows us to include clustering in end-to-end trainable pipelines, with efficient gradients. We show that our method performs well even in difficult settings, such as data sets with high noise and challenging geometries. We also formulate an ad hoc loss to efficiently learn from partial clustering data using this operation. We demonstrate its performance on several data sets for supervised and semi-supervised tasks.", "keywords": "Structured learning;Clustering;Differentiable;weakly supervised;semi-supervised;representation learning", "primary_area": "", "supplementary_material": "/attachment/f9664334a07f65bc766ea77f9a9d158723e4cab9.pdf", "author": "Lawrence Stewart;Francis Bach;Felipe Llinares-L\u00f3pez;Quentin Berthet", "authorids": "lawrence.stewart@ens.fr;~Francis_Bach1;~Felipe_Llinares-L\u00f3pez1;~Quentin_Berthet2", "gender": ";M;;M", "homepage": ";http://www.di.ens.fr/~fbach;;http://q-berthet.github.io/", "dblp": ";b/FrancisRBach;;129/1262", "google_scholar": ";https://scholar.google.fr/citations?user=6PJWcFEAAAAJ;;bHwGZjcAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "lawrence.stewart@ens.fr;~Francis_Bach1;~Felipe_Llinares-L\u00f3pez1;~Quentin_Berthet2", "aff": ";Ecole Normale Superieure;;Google", "aff_domain": ";ens.fr;;google.com", "position": ";Faculty;;Researcher", "bibtex": "@inproceedings{\nstewart2023differentiable,\ntitle={Differentiable Clustering with Perturbed Spanning Forests},\nauthor={Lawrence Stewart and Francis Bach and Felipe Llinares-L{\\'o}pez and Quentin Berthet},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nRfcVBsF9n}\n}", "github": "", "project": "", "reviewers": "cJCp;sM5n;mX78;ukVi", "pdf_size": 1399160, "rating": "5;6;6;8", "confidence": "2;4;2;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "113;28;84;19", "wc_strengths": "60;88;102;55", "wc_weaknesses": "63;82;61;383", "wc_questions": "89;2;4;2", "wc_limitations": "21;2;25;5", "wc_review": "346;202;276;464", "wc_reply_reviewers": "0;120;54;49", "wc_reply_authors": "0;170;39;36", "reply_reviewers": "0;2;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 61.0, 39.00640972968417 ], "wc_strengths_avg": [ 76.25, 19.472737352514155 ], "wc_weaknesses_avg": [ 147.25, 136.3568388457286 ], "wc_questions_avg": [ 24.25, 37.392345473371954 ], "wc_limitations_avg": [ 13.25, 9.908960591303208 ], "wc_review_avg": [ 322.0, 96.5090669315583 ], "wc_reply_reviewers_avg": [ 55.75, 42.67537346058028 ], "wc_reply_authors_avg": [ 61.25, 64.63503307030948 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3458572319330373, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18046602092684920819&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 14, "email": ";ens.fr;;google.com", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Ecole Normale Superieure;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ens.fr;https://www.google.com", "aff_unique_abbr": "ENS;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1", "aff_country_unique": "France;United States" }, { "title": "Shape Non-rigid Kinematics (SNK): A Zero-Shot Method for Non-Rigid Shape Matching via Unsupervised Functional Map Regularized Reconstruction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70502", "id": "nSgMh5v5Ne", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dd9b76f050a86a3ded6135ad3556e786-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nSgMh5v5Ne", "openreview": "https://openreview.net/forum?id=nSgMh5v5Ne", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70502", "video": "https://nips.cc/virtual/2023/poster/70502", "author_site": "Souhaib Attaiki, Maks Ovsjanikov", "tldr": "", "abstract": "We present Shape Non-rigid Kinematics (SNK), a novel zero-shot method for non-rigid shape matching that eliminates the need for extensive training or ground truth data.SNK operates on a single pair of shapes, and employs a reconstruction-based strategy using an encoder-decoder architecture, which deforms the source shape to closely match the target shape. During the process, an unsupervised functional map is predicted and converted into a point-to-point map, serving as a supervisory mechanism for the reconstruction. To aid in training, we have designed a new decoder architecture that generates smooth, realistic deformations. SNK demonstrates competitive results on traditional benchmarks, simplifying the shape-matching process without compromising accuracy. Our code can be found online: https://github.com/pvnieo/SNK", "keywords": "shape matching", "primary_area": "", "supplementary_material": "", "author": "Souhaib Attaiki;Maks Ovsjanikov", "authorids": "~Souhaib_Attaiki2;~Maks_Ovsjanikov1", "gender": "M;M", "homepage": "http://www.lix.polytechnique.fr/~maks/;", "dblp": "94/5668;280/0782", "google_scholar": "https://scholar.google.com/citations?hl=en;REUg_ToAAAAJ", "orcid": "0000-0002-5867-4046;", "linkedin": ";souhaib-attaiki-595121102/", "or_profile": "~Maks_Ovsjanikov1;~SOUHAIB_ATTAIKI1", "aff": "\u00c9cole Polytechnique;Ecole polytechnique", "aff_domain": "polytechnique.edu;polytechnique.edu", "position": "Full Professor;PhD student", "bibtex": "@inproceedings{\nattaiki2023shape,\ntitle={Shape Non-rigid Kinematics ({SNK}): A Zero-Shot Method for Non-Rigid Shape Matching via Unsupervised Functional Map Regularized Reconstruction},\nauthor={Souhaib Attaiki and Maks Ovsjanikov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nSgMh5v5Ne}\n}", "github": "", "project": "", "reviewers": "j61z;E6YV;nxSf;osbV", "pdf_size": 5592091, "rating": "5;5;6;7", "confidence": "5;4;5;3", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "4;2;4;3", "wc_summary": "81;70;94;102", "wc_strengths": "54;71;104;99", "wc_weaknesses": "375;164;157;94", "wc_questions": "34;122;93;23", "wc_limitations": "1;55;71;6", "wc_review": "545;482;519;324", "wc_reply_reviewers": "0;66;57;14", "wc_reply_authors": "0;98;295;0", "reply_reviewers": "0;2;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 86.75, 12.234684303242156 ], "wc_strengths_avg": [ 82.0, 20.481699148264042 ], "wc_weaknesses_avg": [ 197.5, 106.04362309917556 ], "wc_questions_avg": [ 68.0, 40.99390198553927 ], "wc_limitations_avg": [ 33.25, 30.334592464709328 ], "wc_review_avg": [ 467.5, 85.82103471760288 ], "wc_reply_reviewers_avg": [ 34.25, 27.87808278917329 ], "wc_reply_authors_avg": [ 98.25, 120.4333321800904 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6363636363636364, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14865291930086580936&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "polytechnique.edu;polytechnique.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Ecole Polytechnique", "aff_unique_dep": "", "aff_unique_url": "https://www.polytechnique.edu", "aff_unique_abbr": "X", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Robust Matrix Sensing in the Semi-Random Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70501", "id": "nSr2epejn2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c836d71b4702d9046b14ce1228c4c11b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nSr2epejn2", "openreview": "https://openreview.net/forum?id=nSr2epejn2", "poster": "/media/PosterPDFs/NeurIPS%202023/70501.png?t=1702155424.009197", "slides": "https://nips.cc/virtual/2023/poster/70501", "video": "https://nips.cc/virtual/2023/poster/70501", "author_site": "Xing Gao, Yu Cheng", "tldr": "", "abstract": "Low-rank matrix recovery is a fundamental problem in machine learning with numerous applications. In practice, the problem can be solved by convex optimization namely nuclear norm minimization, or by non-convex optimization as it is well-known that for low-rank matrix problems like matrix sensing and matrix completion, all local optima of the natural non-convex objectives are also globally optimal under certain ideal assumptions.\n\nIn this paper, we study new approaches for matrix sensing in a semi-random model where an adversary can add any number of arbitrary sensing matrices. More precisely, the problem is to recover a low-rank matrix $X^\\star$ from linear measurements $b_i = \\langle A_i, X^\\star \\rangle$, where an unknown subset of the sensing matrices satisfies the Restricted Isometry Property (RIP) and the rest of the $A_i$'s are chosen adversarially.\n\nIt is known that in the semi-random model, existing non-convex objectives can have bad local optima. To fix this, we present a descent-style algorithm that provably recovers the ground-truth matrix $X^\\star$. For the closely-related problem of semi-random matrix completion, prior work [CG18] showed that all bad local optima can be eliminated by reweighting the input data. However, the analogous approach for matrix sensing requires reweighting a set of matrices to satisfy RIP, which is a condition that is NP-hard to check. Instead, we build on the framework proposed in [KLL$^+$23] for semi-random sparse linear regression, where the algorithm in each iteration reweights the input based on the current solution, and then takes a weighted gradient step that is guaranteed to work well locally. Our analysis crucially exploits the connection between sparsity in vector problems and low-rankness in matrix problems, which may have other applications in obtaining robust algorithms for sparse and low-rank problems.", "keywords": "Matrix sensing;Optimization;Low-rank matrix recovery;Semi-random;Adversarial input;Robustness", "primary_area": "", "supplementary_material": "", "author": "Xing Gao;Yu Cheng", "authorids": "~Xing_Gao2;~Yu_Cheng2", "gender": ";M", "homepage": "https://xgao27.github.io;https://cs.brown.edu/people/ycheng79/", "dblp": ";96/3060-2", "google_scholar": ";lVoOIv4AAAAJ", "orcid": ";0000-0002-0019-2570", "linkedin": ";yu-cheng-40401632/", "or_profile": "~Xing_Gao2;~Yu_Cheng2", "aff": "University of Illinois, Chicago;Brown University", "aff_domain": "uic.edu;brown.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\ngao2023robust,\ntitle={Robust Matrix Sensing in the Semi-Random Model},\nauthor={Xing Gao and Yu Cheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nSr2epejn2}\n}", "github": "", "project": "", "reviewers": "qNDU;kSWp;PQDV;ZDRS", "pdf_size": 396882, "rating": "5;6;6;7", "confidence": "3;4;4;3", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "4;3;3;3", "wc_summary": "98;41;65;58", "wc_strengths": "39;58;57;26", "wc_weaknesses": "89;29;54;942", "wc_questions": "207;15;161;2", "wc_limitations": "22;4;51;1", "wc_review": "455;147;388;1029", "wc_reply_reviewers": "69;5;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 65.5, 20.694202086574876 ], "wc_strengths_avg": [ 45.0, 13.322912594474229 ], "wc_weaknesses_avg": [ 278.5, 383.66424123183543 ], "wc_questions_avg": [ 96.25, 89.36267397521182 ], "wc_limitations_avg": [ 19.5, 19.880895352071043 ], "wc_review_avg": [ 504.75, 323.6235274203654 ], "wc_reply_reviewers_avg": [ 18.5, 29.227555491350966 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12141634090551944782&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uic.edu;brown.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Illinois at Chicago;Brown University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uic.edu;https://www.brown.edu", "aff_unique_abbr": "UIC;Brown", "aff_campus_unique_index": "0", "aff_campus_unique": "Chicago;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "RegBN: Batch Normalization of Multimodal Data with Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70500", "id": "nUbdkXqC8R", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4488bf8354049b1cd592b6418dc30466-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nUbdkXqC8R", "openreview": "https://openreview.net/forum?id=nUbdkXqC8R", "poster": "/media/PosterPDFs/NeurIPS%202023/70500.png?t=1702483413.2216191", "slides": "https://nips.cc/virtual/2023/poster/70500", "video": "https://nips.cc/virtual/2023/poster/70500", "author_site": "Morteza Ghahremani Boozandani, Christian Wachinger", "tldr": "", "abstract": "Recent years have witnessed a surge of interest in integrating high-dimensional data captured by multisource sensors, driven by the impressive success of neural networks in integrating multimodal data. However, the integration of heterogeneous multimodal data poses a significant challenge, as confounding effects and dependencies among such heterogeneous data sources introduce unwanted variability and bias, leading to suboptimal performance of multimodal models. Therefore, it becomes crucial to normalize the low- or high-level features extracted from data modalities before their fusion takes place. This paper introduces RegBN, a novel approach for multimodal Batch Normalization with REGularization. RegBN uses the Frobenius norm as a regularizer term to address the side effects of confounders and underlying dependencies among different data sources. The proposed method generalizes well across multiple modalities and eliminates the need for learnable parameters, simplifying training and inference. We validate the effectiveness of RegBN on eight databases from five research areas, encompassing diverse modalities such as language, audio, image, video, depth, tabular, and 3D MRI. The proposed method demonstrates broad applicability across different architectures such as multilayer perceptrons, convolutional neural networks, and vision transformers, enabling effective normalization of both low- and high-level features in multimodal neural networks. RegBN is available at https://mogvision.github.io/RegBN.", "keywords": "Multimodal Data;Multimodality;Batch Normalization;Heterogeneous data;Regularization;Confounder;Confounding Effect Removal;Data Dependency", "primary_area": "", "supplementary_material": "/attachment/1a7a762545b24ac6b1198df50877b23c10655c10.pdf", "author": "MORTEZA GHAHREMANI;Christian Wachinger", "authorids": "~MORTEZA_GHAHREMANI3;~Christian_Wachinger1", "gender": "M;M", "homepage": "https://mogvision.github.io/;https://ai-med.de/people/christian-wachinger/", "dblp": "152/6299;79/5985", "google_scholar": "yhXUlXsAAAAJ;https://scholar.google.de/citations?user=UOIBNdUAAAAJ", "orcid": "0000-0001-6423-6475;0000-0002-3652-1874", "linkedin": "morteza-ghahremani-3a040421a/;", "or_profile": "~MORTEZA_GHAHREMANI3;~Christian_Wachinger1", "aff": "Technische Universit\u00e4t M\u00fcnchen;LMU", "aff_domain": "tum.de;uni-muenchen.de", "position": "AI Scientist;Professor", "bibtex": "@inproceedings{\nghahremani2023regbn,\ntitle={Reg{BN}: Batch Normalization of Multimodal Data with Regularization},\nauthor={MORTEZA GHAHREMANI and Christian Wachinger},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nUbdkXqC8R}\n}", "github": "", "project": "", "reviewers": "6Kg3;JscA;eB1v;Fqi7;saMu", "pdf_size": 1522986, "rating": "5;5;5;6;7", "confidence": "3;3;4;3;4", "soundness": "2;3;3;4;3", "novelty": "3;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "60;53;68;150;128", "wc_strengths": "69;34;24;42;120", "wc_weaknesses": "294;124;58;395;102", "wc_questions": "9;41;2;117;29", "wc_limitations": "9;1;2;22;11", "wc_review": "441;253;154;726;390", "wc_reply_reviewers": "33;23;0;12;40", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 91.8, 39.448193875005224 ], "wc_strengths_avg": [ 57.8, 34.504492461127434 ], "wc_weaknesses_avg": [ 194.6, 128.26472625004897 ], "wc_questions_avg": [ 39.6, 41.122256747411136 ], "wc_limitations_avg": [ 9.0, 7.563068160475615 ], "wc_review_avg": [ 392.8, 194.8962801081642 ], "wc_reply_reviewers_avg": [ 21.6, 14.347125147568763 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17741631790069945145&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "tum.de;uni-muenchen.de", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Ludwig Maximilian University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://www.lmu.de", "aff_unique_abbr": "TUM;LMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "FedGame: A Game-Theoretic Defense against Backdoor Attacks in Federated Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70499", "id": "nX0zYBGEka", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a6678e2be4ce7aef9d2192e03cd586b7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nX0zYBGEka", "openreview": "https://openreview.net/forum?id=nX0zYBGEka", "poster": "/media/PosterPDFs/NeurIPS%202023/70499.png?t=1702332259.160826", "slides": "https://nips.cc/virtual/2023/poster/70499", "video": "https://nips.cc/virtual/2023/poster/70499", "author_site": "Jinyuan Jia, Zhuowen Yuan, Dinuka Sahabandu, Luyao Niu, Arezoo Rajabi, Bhaskar Ramasubramanian, Bo Li, Radha Poovendran", "tldr": "", "abstract": "Federated learning (FL) provides a distributed training paradigm where multiple clients can jointly train a global model without sharing their local data. However, recent studies have shown that FL offers an additional surface for backdoor attacks. For instance, an attacker can compromise a subset of clients and thus corrupt the global model to misclassify an input with a backdoor trigger as the adversarial target. Existing defenses for FL against backdoor attacks usually detect and exclude the corrupted information from the compromised clients based on a static attacker model. However, such defenses are inadequate against dynamic attackers who strategically adapt their attack strategies. To bridge this gap, we model the strategic interactions between the defender and dynamic attackers as a minimax game. Based on the analysis of the game, we design an interactive defense mechanism FedGame. We prove that under mild assumptions, the global model trained with FedGame under backdoor attacks is close to that trained without attacks. Empirically, we compare FedGame with multiple state-of-the-art baselines on several benchmark datasets under various attacks. We show that FedGame can effectively defend against strategic attackers and achieves significantly higher robustness than baselines. Our code is available at: https://github.com/AI-secure/FedGame.", "keywords": "backdoor defense;federated learning;game theory", "primary_area": "", "supplementary_material": "/attachment/e197e688e5d999ecb92a50c51459070bbda4e4db.zip", "author": "Jinyuan Jia;Zhuowen Yuan;Dinuka Sahabandu;Luyao Niu;Arezoo Rajabi;Bhaskar Ramasubramanian;Bo Li;Radha Poovendran", "authorids": "~Jinyuan_Jia2;~Zhuowen_Yuan1;~Dinuka_Sahabandu1;~Luyao_Niu1;~Arezoo_Rajabi1;~Bhaskar_Ramasubramanian1;~Bo_Li19;~Radha_Poovendran1", "gender": ";M;M;M;F;M;F;Not Specified", "homepage": "https://jinyuan-jia.github.io/;;;;;https://sites.google.com/view/rbhaskar;http://boli.cs.illinois.edu/;https://people.ece.uw.edu/radha/index.html", "dblp": "24/5124-1.html;304/3576;;181/8375;;173/4698;50/3402-26;29/5044", "google_scholar": "iyg4ytkAAAAJ;F-r0bYQAAAAJ;;nSFafMoAAAAJ;;ANJ9dgkAAAAJ;K8vJkTcAAAAJ;EEoNZ7NbVzMC", "orcid": "0000-0002-9785-7769;;0000-0001-7776-7865;0000-0001-8591-5522;;0000-0002-2166-7838;;", "linkedin": ";;;;arezoo-rajabi/;;;", "or_profile": "~Jinyuan_Jia2;~Zhuowen_Yuan1;~Dinuka_Sahabandu1;~Luyao_Niu1;~Arezoo_Rajabi1;~Bhaskar_Ramasubramanian1;~Bo_Li19;~Radha_Poovendran1", "aff": "University of Illinois Urbana-Champaign;University of Illinois Urbana-Champaign;University of Washington, Seattle;University of Washington;University of Washington;Western Washington University;University of Illinois, Urbana Champaign;University of Washington, Seattle", "aff_domain": "cs.illinois.edu;illinois.edu;uw.edu;uw.edu;uw.edu;wwu.edu;illinois.edu;uw.edu", "position": "Postdoc;PhD student;PhD student;Postdoc;Postdoc;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\njia2023fedgame,\ntitle={FedGame: A Game-Theoretic Defense against Backdoor Attacks in Federated Learning},\nauthor={Jinyuan Jia and Zhuowen Yuan and Dinuka Sahabandu and Luyao Niu and Arezoo Rajabi and Bhaskar Ramasubramanian and Bo Li and Radha Poovendran},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nX0zYBGEka}\n}", "github": "", "project": "", "reviewers": "89HC;cSQx;8T34;Mtx6", "pdf_size": 419547, "rating": "5;6;6;6", "confidence": "4;4;3;2", "soundness": "2;2;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "84;70;113;224", "wc_strengths": "58;25;75;49", "wc_weaknesses": "293;188;121;161", "wc_questions": "263;5;267;37", "wc_limitations": "152;5;3;27", "wc_review": "850;293;579;498", "wc_reply_reviewers": "30;9;0;27", "wc_reply_authors": "15;14;0;18", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 122.75, 60.478818606186415 ], "wc_strengths_avg": [ 51.75, 18.046814123273947 ], "wc_weaknesses_avg": [ 190.75, 63.664648746380436 ], "wc_questions_avg": [ 143.0, 122.53162856993292 ], "wc_limitations_avg": [ 46.75, 61.4913611818766 ], "wc_review_avg": [ 555.0, 199.68349956869247 ], "wc_reply_reviewers_avg": [ 16.5, 12.459935794377111 ], "wc_reply_authors_avg": [ 11.75, 6.94172168845741 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=122131288221597484&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cs.illinois.edu;illinois.edu;uw.edu;uw.edu;uw.edu;wwu.edu;illinois.edu;uw.edu", "author_num": 8, "aff_unique_index": "0;0;1;1;1;2;0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Washington;Western Washington University", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;https://www.washington.edu;https://www.wwu.edu", "aff_unique_abbr": "UIUC;UW;WWU", "aff_campus_unique_index": "0;0;1;0;1", "aff_campus_unique": "Urbana-Champaign;Seattle;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Aggregating Capacity in FL through Successive Layer Training for Computationally-Constrained Devices", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70498", "id": "nXNsqB4Yr1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6f43166f50f26e8d8f3edc5545b0749f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nXNsqB4Yr1", "openreview": "https://openreview.net/forum?id=nXNsqB4Yr1", "poster": "/media/PosterPDFs/NeurIPS%202023/70498.png?t=1698425027.9380279", "slides": "https://nips.cc/virtual/2023/poster/70498", "video": "https://nips.cc/virtual/2023/poster/70498", "author_site": "Kilian Pfeiffer, Ramin Khalili, Joerg Henkel", "tldr": "", "abstract": "Federated learning (FL) is usually performed on resource-constrained edge devices, e.g., with limited memory for the computation. If the required memory to train a model exceeds this limit, the device will be excluded from the training. This can lead to a lower accuracy as valuable data and computation resources are excluded from training, also causing bias and unfairness. The FL training process should be adjusted to such constraints. The state-of-the-art techniques propose training subsets of the FL model at constrained devices, reducing their resource requirements for training. However, these techniques largely limit the co-adaptation among parameters of the model and are highly inefficient, as we show: it is actually better to train a smaller (less accurate) model by the system where all the devices can train the model end-to-end than applying such techniques. We propose a new method that enables successive freezing and training of the parameters of the FL model at devices, reducing the training\u2019s resource requirements at the devices while still allowing enough co-adaptation between parameters. We show through extensive experimental evaluation that our technique greatly improves the accuracy of the trained model (by 52.4 p.p. ) compared with the state of the art, efficiently aggregating the computation capacity available on distributed devices.", "keywords": "Federated Learning;Memory;Resource Constraints", "primary_area": "", "supplementary_material": "/attachment/cd838067e800bd9e1bdeaece61ece99fc24b1112.zip", "author": "Kilian Pfeiffer;Ramin Khalili;Joerg Henkel", "authorids": "~Kilian_Pfeiffer1;~Ramin_Khalili1;~Joerg_Henkel1", "gender": "M;M;", "homepage": ";;https://ces.itec.kit.edu/~henkel", "dblp": "242/8984;90/4201;h/JorgHenkel.html", "google_scholar": "https://scholar.google.de/citations?user=WZHlr-gAAAAJ;_neUydcAAAAJ;AmnIAhEAAAAJ", "orcid": "0000-0003-3872-0495;0000-0003-2463-7033;0000-0001-9602-2922", "linkedin": ";ramin-khalili-96a3b58a/;", "or_profile": "~Kilian_Pfeiffer1;~Ramin_Khalili1;~Joerg_Henkel1", "aff": "Karlsruhe Institute of Technology;Huawei Technologies Ltd., Munich research center;Karlsruhe Institute of Technology", "aff_domain": "kit.edu;huawei.com;kit.edu", "position": "PhD student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\npfeiffer2023aggregating,\ntitle={Aggregating Capacity in {FL} through Successive Layer Training for Computationally-Constrained Devices},\nauthor={Kilian Pfeiffer and Ramin Khalili and Joerg Henkel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nXNsqB4Yr1}\n}", "github": "", "project": "", "reviewers": "tzDU;cUHB;AkhZ;iW6b;akhJ", "pdf_size": 570368, "rating": "5;5;6;6;7", "confidence": "3;3;4;4;4", "soundness": "3;3;3;3;3", "novelty": "2;3;3;2;3", "presentation": "3;3;3;2;4", "wc_summary": "101;55;124;90;168", "wc_strengths": "43;48;109;40;81", "wc_weaknesses": "158;44;114;151;179", "wc_questions": "4;53;67;9;66", "wc_limitations": "17;37;6;5;15", "wc_review": "323;237;420;295;509", "wc_reply_reviewers": "11;0;0;11;19", "wc_reply_authors": "0;357;0;0;0", "reply_reviewers": "1;0;0;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 107.6, 37.515863311404686 ], "wc_strengths_avg": [ 64.2, 26.783577057592588 ], "wc_weaknesses_avg": [ 129.2, 47.48641911115219 ], "wc_questions_avg": [ 39.8, 27.679595372765117 ], "wc_limitations_avg": [ 16.0, 11.523888232710346 ], "wc_review_avg": [ 356.8, 96.40829839801137 ], "wc_reply_reviewers_avg": [ 8.2, 7.30479294709987 ], "wc_reply_authors_avg": [ 71.4, 142.80000000000004 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8728715609439696, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16324488223793453400&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "kit.edu;huawei.com;kit.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Karlsruhe Institute of Technology;Huawei", "aff_unique_dep": ";Huawei Technologies Ltd.", "aff_unique_url": "https://www.kit.edu;https://www.huawei.com", "aff_unique_abbr": "KIT;Huawei", "aff_campus_unique_index": "1", "aff_campus_unique": ";Munich", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Mitigating Source Bias for Fairer Weak Supervision", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70497", "id": "nXPqMyWUnx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6a5181cfe76f67b37a7e1bb19837abdf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nXPqMyWUnx", "openreview": "https://openreview.net/forum?id=nXPqMyWUnx", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70497", "video": "https://nips.cc/virtual/2023/poster/70497", "author_site": "Changho Shin, Sonia Cromp, Dyah Adila, Frederic Sala", "tldr": "", "abstract": "Weak supervision enables efficient development of training sets by reducing the need for ground truth labels. However, the techniques that make weak supervision attractive---such as integrating any source of signal to estimate unknown labels---also entail the danger that the produced pseudolabels are highly biased. Surprisingly, given everyday use and the potential for increased bias, weak supervision has not been studied from the point of view of fairness. We begin such a study, starting with the observation that even when a fair model can be built from a dataset with access to ground-truth labels, the corresponding dataset labeled via weak supervision can be arbitrarily unfair. To address this, we propose and empirically validate a model for source unfairness in weak supervision, then introduce a simple counterfactual fairness-based technique that can mitigate these biases. Theoretically, we show that it is possible for our approach to simultaneously improve both accuracy and fairness---in contrast to standard fairness approaches that suffer from tradeoffs. Empirically, we show that our technique improves accuracy on weak supervision baselines by as much as 32\\% while reducing demographic parity gap by 82.5\\%. A simple extension of our method aimed at maximizing performance produces state-of-the-art performance in five out of ten datasets in the WRENCH benchmark.", "keywords": "Weak supervision;fairness", "primary_area": "", "supplementary_material": "", "author": "Changho Shin;Sonia Cromp;Dyah Adila;Frederic Sala", "authorids": "~Changho_Shin2;~Sonia_Cromp1;~Dyah_Adila1;~Frederic_Sala1", "gender": ";F;F;M", "homepage": ";;;https://pages.cs.wisc.edu/~fredsala/", "dblp": ";;;133/3602", "google_scholar": "VpvIQAcAAAAJ;;;9KhIkNkAAAAJ", "orcid": ";;;", "linkedin": ";sonia-cromp;dyahadila/;", "or_profile": "~Changho_Shin2;~Sonia_Cromp1;~Dyah_Adila1;~Frederic_Sala1", "aff": "University of Wisconsin, Madison;Department of Computer Science, University of Wisconsin - Madison;University of Wisconsin, Madison;University of Wisconsin, Madison", "aff_domain": "wisc.edu;cs.wisc.edu;wisc.edu;wisc.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nshin2023mitigating,\ntitle={Mitigating Source Bias for Fairer Weak Supervision},\nauthor={Changho Shin and Sonia Cromp and Dyah Adila and Frederic Sala},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nXPqMyWUnx}\n}", "github": "", "project": "", "reviewers": "wmSM;6e6W;GLUc;UYjh", "pdf_size": 1332683, "rating": "5;7;7;7", "confidence": "4;4;4;4", "soundness": "2;4;3;4", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "93;47;108;113", "wc_strengths": "72;66;22;30", "wc_weaknesses": "253;85;33;26", "wc_questions": "30;44;2;33", "wc_limitations": "32;3;7;18", "wc_review": "480;245;172;220", "wc_reply_reviewers": "0;23;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 90.25, 26.03243169586737 ], "wc_strengths_avg": [ 47.5, 21.788758569500924 ], "wc_weaknesses_avg": [ 99.25, 91.64708124103025 ], "wc_questions_avg": [ 27.25, 15.481844205391036 ], "wc_limitations_avg": [ 15.0, 11.247221879201993 ], "wc_review_avg": [ 279.25, 118.83470663068093 ], "wc_reply_reviewers_avg": [ 5.75, 9.959292143521045 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7018389137298406264&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "wisc.edu;cs.wisc.edu;wisc.edu;wisc.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Wisconsin;University of Wisconsin-Madison", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "UW;UW-Madison", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Regret Matching+: (In)Stability and Fast Convergence in Games", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70496", "id": "nYgs0qZJ97", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c209cd57e13f3344a4cad4ce84d0ee1b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nYgs0qZJ97", "openreview": "https://openreview.net/forum?id=nYgs0qZJ97", "poster": "/media/PosterPDFs/NeurIPS%202023/70496.png?t=1701378034.7007327", "slides": "https://nips.cc/virtual/2023/poster/70496", "video": "https://nips.cc/virtual/2023/poster/70496", "author_site": "Gabriele Farina, Julien Grand-Cl\u00e9ment, Christian Kroer, Chung-Wei Lee, Haipeng Luo", "tldr": "", "abstract": "Regret Matching$^+$ (RM$^+$) and its variants are important algorithms for solving large-scale games.\nHowever, a theoretical understanding of their success in practice is still a mystery.\nMoreover, recent advances on fast convergence in games are limited to no-regret algorithms such as online mirror descent, which satisfy stability.\nIn this paper, we first give counterexamples showing that RM+ and its predictive version can be unstable, which might cause other players to suffer large regret. \nWe then provide two fixes: restarting and chopping off the positive orthant that RM$^+$ works in.\nWe show that these fixes are sufficient to get $O(T^{1/4})$ individual regret and $O(1)$ social regret in normal-form games via RM$^+$ with predictions.\nWe also apply our stabilizing techniques to clairvoyant updates in the uncoupled learning setting for RM$^+$ and prove desirable results akin to recent works for Clairvoyant online mirror descent. \nOur experiments show the advantages of our algorithms over vanilla RM$^+$-based algorithms in matrix and extensive-form games.", "keywords": "Regret Matching;Predictive algorithms;Extensive-Form Games", "primary_area": "", "supplementary_material": "/attachment/5310f9bc1398110e7aad9f34c6c8949b0048fec9.pdf", "author": "Gabriele Farina;Julien Grand-Cl\u00e9ment;Christian Kroer;Chung-Wei Lee;Haipeng Luo", "authorids": "~Gabriele_Farina1;~Julien_Grand-Cl\u00e9ment1;~Christian_Kroer1;~Chung-Wei_Lee1;~Haipeng_Luo1", "gender": "M;M;M;;M", "homepage": "http://www.cs.cmu.edu/~gfarina/about/;https://julien-grand-clement.fr/;http://www.columbia.edu/~ck2945/;https://chungwei.net/;https://haipeng-luo.net/", "dblp": ";197/0112;64/10660;80/2550;62/2576", "google_scholar": "sktDNcEAAAAJ;https://scholar.google.fr/citations?user=K_ZLzdoAAAAJ;https://scholar.google.ch/citations?user=ckHwjPAAAAAJ;VVVc6BIAAAAJ;ct2hw4UAAAAJ", "orcid": ";;0000-0002-9009-8683;;", "linkedin": ";;;;", "or_profile": "~Gabriele_Farina1;~Julien_Grand-Cl\u00e9ment1;~Christian_Kroer1;~Chung-Wei_Lee1;~Haipeng_Luo1", "aff": "FAIR, Meta AI;HEC Paris;Columbia University;University of Southern California;University of Southern California", "aff_domain": "meta.com;hec.fr;columbia.edu;usc.edu;usc.edu", "position": "Researcher;Assistant Professor;Assistant Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nfarina2023regret,\ntitle={Regret Matching+: (In)Stability and Fast Convergence in Games},\nauthor={Gabriele Farina and Julien Grand-Cl{\\'e}ment and Christian Kroer and Chung-Wei Lee and Haipeng Luo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nYgs0qZJ97}\n}", "github": "", "project": "", "reviewers": "NRiL;QXqy;GXrh;jyGc;zLtq", "pdf_size": 541113, "rating": "6;7;7;7;8", "confidence": "3;4;3;3;3", "soundness": "3;3;3;3;4", "novelty": "2;4;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "68;29;159;182;90", "wc_strengths": "29;88;74;84;150", "wc_weaknesses": "52;15;284;144;43", "wc_questions": "91;19;150;84;27", "wc_limitations": "1;7;41;1;7", "wc_review": "241;158;708;495;317", "wc_reply_reviewers": "11;7;16;26;5", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 0.6324555320336759 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 105.6, 56.94418319723271 ], "wc_strengths_avg": [ 85.0, 38.709172039711724 ], "wc_weaknesses_avg": [ 107.6, 98.26616915296943 ], "wc_questions_avg": [ 74.2, 47.74683235566523 ], "wc_limitations_avg": [ 11.4, 15.041276541570532 ], "wc_review_avg": [ 383.8, 196.62695644290483 ], "wc_reply_reviewers_avg": [ 13.0, 7.5099933422074345 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8761248706788084750&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "meta.com;hec.fr;columbia.edu;usc.edu;usc.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Meta;HEC Paris;Columbia University;University of Southern California", "aff_unique_dep": "Meta AI;;;", "aff_unique_url": "https://meta.ai;https://www.hec.edu;https://www.columbia.edu;https://www.usc.edu", "aff_unique_abbr": "Meta AI;HEC;Columbia;USC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;France" }, { "title": "Uncertainty Quantification via Neural Posterior Principal Components", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70495", "id": "nZ0jnXizyR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/74fc5575632191d96881d8015f79dde3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nZ0jnXizyR", "openreview": "https://openreview.net/forum?id=nZ0jnXizyR", "poster": "/media/PosterPDFs/NeurIPS%202023/70495.png?t=1699563973.575239", "slides": "https://nips.cc/virtual/2023/poster/70495", "video": "https://nips.cc/virtual/2023/poster/70495", "author_site": "Elias Nehme, Omer Yair, Tomer Michaeli", "tldr": "", "abstract": "Uncertainty quantification is crucial for the deployment of image restoration models in safety-critical domains, like autonomous driving and biological imaging. To date, methods for uncertainty visualization have mainly focused on per-pixel estimates. Yet, a heatmap of per-pixel variances is typically of little practical use, as it does not capture the strong correlations between pixels. A more natural measure of uncertainty corresponds to the variances along the principal components (PCs) of the posterior distribution. Theoretically, the PCs can be computed by applying PCA on samples generated from a conditional generative model for the input image. However, this requires generating a very large number of samples at test time, which is painfully slow with the current state-of-the-art (diffusion) models. In this work, we present a method for predicting the PCs of the posterior distribution for any input image, in a single forward pass of a neural network. Our method can either wrap around a pre-trained model that was trained to minimize the mean square error (MSE), or can be trained from scratch to output both a predicted image and the posterior PCs. We showcase our method on multiple inverse problems in imaging, including denoising, inpainting, super-resolution, and biological image-to-image translation. Our method reliably conveys instance-adaptive uncertainty directions, achieving uncertainty quantification comparable with posterior samplers while being orders of magnitude faster. Code and examples are available on our [webpage](https://eliasnehme.github.io/NPPC/).", "keywords": "Uncertainty Quantification;Inverse Problems;Probabilistic Modelling;Principal Components Analysis;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/23bf337371c484a8df210bfe6564138b5459d55b.zip", "author": "Elias Nehme;Omer Yair;Tomer Michaeli", "authorids": "~Elias_Nehme1;~Omer_Yair1;~Tomer_Michaeli1", "gender": "M;M;M", "homepage": "https://eliasnehme.github.io/;;https://tomer.net.technion.ac.il/", "dblp": "275/8151;166/1235;70/3188.html", "google_scholar": "https://scholar.google.co.il/citations?user=jWLfyAIAAAAJ;EF3AXOkAAAAJ;n2EbR2cAAAAJ", "orcid": "0000-0003-1759-1751;;", "linkedin": "elias-nehme-2a010571/;yairomer/?originalSubdomain=il;", "or_profile": "~Elias_Nehme1;~Omer_Yair1;~Tomer_Michaeli1", "aff": "Technion - Israel Institute of Technology;Technion, Technion;Technion, Technion", "aff_domain": "ee.technion.ac.il;technion.ac.il;technion.ac.il", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nnehme2023uncertainty,\ntitle={Uncertainty Quantification via Neural Posterior Principal Components},\nauthor={Elias Nehme and Omer Yair and Tomer Michaeli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nZ0jnXizyR}\n}", "github": "", "project": "", "reviewers": "Ch4c;Dscw;aQW2;2qC5", "pdf_size": 3543282, "rating": "4;5;6;7", "confidence": "4;2;4;5", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "50;76;89;100", "wc_strengths": "12;76;77;220", "wc_weaknesses": "186;63;32;200", "wc_questions": "86;67;59;11", "wc_limitations": "1;28;1;1", "wc_review": "335;310;258;532", "wc_reply_reviewers": "0;0;21;118", "wc_reply_authors": "0;0;0;264", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.75, 18.64638034579366 ], "wc_strengths_avg": [ 96.25, 76.14583048335608 ], "wc_weaknesses_avg": [ 120.25, 73.73728703986878 ], "wc_questions_avg": [ 55.75, 27.63489641739227 ], "wc_limitations_avg": [ 7.75, 11.691342951089922 ], "wc_review_avg": [ 358.75, 103.81082554338926 ], "wc_reply_reviewers_avg": [ 34.75, 48.823022233368555 ], "wc_reply_authors_avg": [ 66.0, 114.3153532995459 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5129891760425771, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4830853271221779592&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "ee.technion.ac.il;technion.ac.il;technion.ac.il", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "title": "Generating Behaviorally Diverse Policies with Latent Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70494", "id": "nafgeYknRT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/180d4373aca26bd86bf45fc50d1a709f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nafgeYknRT", "openreview": "https://openreview.net/forum?id=nafgeYknRT", "poster": "/media/PosterPDFs/NeurIPS%202023/70494.png?t=1701911063.902209", "slides": "https://nips.cc/virtual/2023/poster/70494", "video": "https://nips.cc/virtual/2023/poster/70494", "author_site": "Shashank Hegde, Sumeet Batra, K.R. Zentner, Gaurav Sukhatme", "tldr": "", "abstract": "Recent progress in Quality Diversity Reinforcement Learning (QD-RL) has enabled learning a collection of behaviorally diverse, high performing policies. However, these methods typically involve storing thousands of policies, which results in high space-complexity and poor scaling to additional behaviors. Condensing the archive into a single model while retaining the performance and coverage of the\noriginal collection of policies has proved challenging. In this work, we propose using diffusion models to distill the archive into a single generative model over policy parameters. We show that our method achieves a compression ratio of 13x while recovering 98% of the original rewards and 89% of the original humanoid archive coverage. Further, the conditioning mechanism of diffusion models allows\nfor flexibly selecting and sequencing behaviors, including using language. Project website: https://sites.google.com/view/policydiffusion/home.", "keywords": "Latent Diffusion;Quality Diversity;Reinforcement Learning;Graph Neural Networks", "primary_area": "", "supplementary_material": "", "author": "Shashank Hegde;Sumeet Batra;K.R. Zentner;Gaurav S. Sukhatme", "authorids": "~Shashank_Hegde1;~Sumeet_Batra1;~K.R._Zentner1;~Gaurav_S._Sukhatme1", "gender": "M;M;Not Specified;M", "homepage": "https://hegde95.github.io/;https://sumeetbatra.github.io/;https://zentner.io/;http://www-robotics.usc.edu/~gaurav/", "dblp": "125/2982.html;255/5461;295/9732;s/GauravSSukhatme", "google_scholar": "QbCHQHUAAAAJ;https://scholar.google.com/citations?hl=ja;IjVj4hsAAAAJ;https://scholar.google.com.tw/citations?user=lRUi-A8AAAAJ", "orcid": ";;;0000-0003-2408-474X", "linkedin": "karkala-shashank-hegde/;sumeetbatra/;zentnerkyle/;gaurav-sukhatme-9b6420b/", "or_profile": "~Shashank_Hegde1;~Sumeet_Batra1;~K.R._Zentner1;~Gaurav_S._Sukhatme1", "aff": "University of Southern California;University of Southern California;University of Southern California;University of Southern California", "aff_domain": "usc.edu;usc.edu;usc.edu;usc.edu", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nhegde2023generating,\ntitle={Generating Behaviorally Diverse Policies with Latent Diffusion Models},\nauthor={Shashank Hegde and Sumeet Batra and K.R. Zentner and Gaurav S. Sukhatme},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nafgeYknRT}\n}", "github": "", "project": "", "reviewers": "GgS4;s86A;mRc8;Usyj", "pdf_size": 2039104, "rating": "3;5;6;7", "confidence": "4;3;3;4", "soundness": "2;3;2;3", "novelty": "2;3;3;3", "presentation": "2;4;3;3", "wc_summary": "183;52;33;67", "wc_strengths": "57;40;25;52", "wc_weaknesses": "210;190;42;75", "wc_questions": "81;111;26;106", "wc_limitations": "7;11;1;55", "wc_review": "538;404;127;355", "wc_reply_reviewers": "33;14;0;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 83.75, 58.554995517035096 ], "wc_strengths_avg": [ 43.5, 12.338962679253067 ], "wc_weaknesses_avg": [ 129.25, 72.0533656396424 ], "wc_questions_avg": [ 81.0, 33.726843908080106 ], "wc_limitations_avg": [ 18.5, 21.37171027316251 ], "wc_review_avg": [ 356.0, 148.2143717727805 ], "wc_reply_reviewers_avg": [ 16.75, 11.861176164276458 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.16903085094570333, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=773943476336754545&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "usc.edu;usc.edu;usc.edu;usc.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Curves for Deep Structured Gaussian Feature Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70493", "id": "nbG6zfJtIe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/85d456fd41f3eec83bd3b0c337037a0e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nbG6zfJtIe", "openreview": "https://openreview.net/forum?id=nbG6zfJtIe", "poster": "/media/PosterPDFs/NeurIPS%202023/70493.png?t=1702046924.9954233", "slides": "https://nips.cc/virtual/2023/poster/70493", "video": "https://nips.cc/virtual/2023/poster/70493", "author_site": "Jacob Zavatone-Veth, Cengiz Pehlevan", "tldr": "", "abstract": "In recent years, significant attention in deep learning theory has been devoted to analyzing when models that interpolate their training data can still generalize well to unseen examples. Many insights have been gained from studying models with multiple layers of Gaussian random features, for which one can compute precise generalization asymptotics. However, few works have considered the effect of weight anisotropy; most assume that the random features are generated using independent and identically distributed Gaussian weights, and allow only for structure in the input data. Here, we use the replica trick from statistical physics to derive learning curves for models with many layers of structured Gaussian features. We show that allowing correlations between the rows of the first layer of features can aid generalization, while structure in later layers is generally detrimental. Our results shed light on how weight structure affects generalization in a simple class of solvable models.", "keywords": "random feature models;generalization;deep networks;ridge regression", "primary_area": "", "supplementary_material": "/attachment/02f14f79bc82891db62cb33314512f734342184f.zip", "author": "Jacob A Zavatone-Veth;Cengiz Pehlevan", "authorids": "~Jacob_A_Zavatone-Veth1;~Cengiz_Pehlevan2", "gender": "M;", "homepage": "https://jzv.io;https://pehlevan.seas.harvard.edu/", "dblp": "270/9915;145/3480", "google_scholar": "i_HogJkAAAAJ;veDLTPEAAAAJ", "orcid": "0000-0002-4060-1738;0000-0001-9767-6063", "linkedin": ";", "or_profile": "~Jacob_A_Zavatone-Veth1;~Cengiz_Pehlevan2", "aff": "Harvard University;School of Engineering and Applied Sciences, Harvard University", "aff_domain": "harvard.edu;seas.harvard.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzavatone-veth2023learning,\ntitle={Learning Curves for Deep Structured Gaussian Feature Models},\nauthor={Jacob A Zavatone-Veth and Cengiz Pehlevan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nbG6zfJtIe}\n}", "github": "", "project": "", "reviewers": "nhrG;bdDi;cGDX;hUXg;VEZW", "pdf_size": 10145726, "rating": "5;5;6;6;7", "confidence": "3;4;1;3;3", "soundness": "3;3;3;3;4", "novelty": "3;2;3;2;3", "presentation": "1;2;2;3;3", "wc_summary": "75;54;47;147;82", "wc_strengths": "40;78;23;69;80", "wc_weaknesses": "48;211;74;369;101", "wc_questions": "374;25;36;106;95", "wc_limitations": "7;54;1;2;14", "wc_review": "544;422;181;693;372", "wc_reply_reviewers": "45;13;9;20;25", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 81.0, 35.43444651747787 ], "wc_strengths_avg": [ 58.0, 22.60088493842664 ], "wc_weaknesses_avg": [ 160.6, 118.06879350615895 ], "wc_questions_avg": [ 127.2, 127.40392458633289 ], "wc_limitations_avg": [ 15.6, 19.744366285095097 ], "wc_review_avg": [ 442.4, 171.36230624031646 ], "wc_reply_reviewers_avg": [ 22.4, 12.57934815481311 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3273268353539886, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17437183018524858476&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 12, "email": "harvard.edu;seas.harvard.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Symbolic Discovery of Optimization Algorithms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70492", "id": "ne6zeqLFCZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9a39b4925e35cf447ccba8757137d84f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ne6zeqLFCZ", "openreview": "https://openreview.net/forum?id=ne6zeqLFCZ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70492", "video": "https://nips.cc/virtual/2023/poster/70492", "author_site": "Xiangning Chen, Chen Liang, Da Huang, Esteban Real, Kaiyuan Wang, Hieu Pham, Xuanyi Dong, Thang Luong, Cho-Jui Hsieh, Yifeng Lu, Quoc V Le", "tldr": "", "abstract": "We present a method to formulate algorithm discovery as program search, and apply it to discover optimization algorithms for deep neural network training. We leverage efficient search techniques to explore an infinite and sparse program space. To bridge the large generalization gap between proxy and target tasks, we also introduce program selection and simplification strategies.\nOur method discovers a simple and effective optimization algorithm, $\\textbf{Lion}$ ($\\textit{Evo$\\textbf{L}$ved S$\\textbf{i}$gn M$\\textbf{o}$me$\\textbf{n}$tum}$). It is more memory-efficient than Adam as it only keeps track of the momentum. Different from adaptive optimizers, its update has the same magnitude for each parameter calculated through the sign operation.\nWe compare Lion with widely used optimizers, such as Adam and Adafactor, for training a variety of models on different tasks. On image classification, Lion boosts the accuracy of ViT by up to 2\\% on ImageNet and saves up to 5x the pre-training compute on JFT. On vision-language contrastive learning, we achieve 88.3\\% $\\textit{zero-shot}$ and 91.1\\% $\\textit{fine-tuning}$ accuracy on ImageNet, surpassing the previous best results by 2\\% and 0.1\\%, respectively. On diffusion models, Lion outperforms Adam by achieving a better FID score and reducing the training compute by up to 2.3x. For autoregressive, masked language modeling, and fine-tuning, Lion exhibits a similar or better performance compared to Adam. Our analysis of Lion reveals that its performance gain grows with the training batch size. It also requires a smaller learning rate than Adam due to the larger norm of the update produced by the sign function. Additionally, we examine the limitations of Lion and identify scenarios where its improvements are small or not statistically significant.", "keywords": "AutoML", "primary_area": "", "supplementary_material": "", "author": "Xiangning Chen;Chen Liang;Da Huang;Esteban Real;Kaiyuan Wang;Hieu Pham;Xuanyi Dong;Thang Luong;Cho-Jui Hsieh;Yifeng Lu;Quoc V Le", "authorids": "~Xiangning_Chen1;~Chen_Liang1;~Da_Huang2;~Esteban_Real1;~Kaiyuan_Wang1;~Hieu_Pham1;~Xuanyi_Dong1;~Thang_Luong1;~Cho-Jui_Hsieh1;~Yifeng_Lu1;~Quoc_V_Le1", "gender": "M;;M;M;M;M;;M;M;M;M", "homepage": ";;https://www.estebanreal.com/;http://kaiyuanw.github.io/;;https://xuanyidong.com/;http://www.thangluong.com;http://web.cs.ucla.edu/~chohsieh/index.html;;;http://crazydonkey200.github.io/", "dblp": "56/7393;;156/0082;193/4231;;198/1522;153/2222;14/2770;69/8051;29/6166;35/3221", "google_scholar": "vNcBx1sAAAAJ;ZjuMpLoAAAAJ;ipTsozQAAAAJ;OjcQcisAAAAJ;GpcGdRkAAAAJ;7zp9arUAAAAJ;Bmbkv6sAAAAJ;Wy89g4IAAAAJ;CM4o-cgAAAAJ;;ILQ8_ekAAAAJ", "orcid": ";;;;;0000-0001-9272-1590;;;;;", "linkedin": ";;;;;;;;;;", "or_profile": "~Xiangning_Chen1;~Da_Huang2;~Esteban_Real1;~Kaiyuan_Wang1;~Hieu_Pham1;~Xuanyi_Dong1;~Thang_Luong1;~Cho-Jui_Hsieh1;~Yifeng_Lu1;~Quoc_V_Le1;~Chen_Liang2", "aff": "University of California, Los Angeles;Google;Google;Google;Carnegie Mellon University;Google Brain;Google;Amazon;Google Deepmind;Google;Google Brain", "aff_domain": "cs.ucla.edu;google.com;google.com;google.com;cmu.edu;google.com;google.com;amazon.com;google.com;google.com;google.com", "position": "PhD student;Researcher;Engineer/Researcher;Researcher;PhD student;Researcher;Research Scientist;visiting scholar;Researcher;Scientist;Researcher", "bibtex": "@inproceedings{\nchen2023symbolic,\ntitle={Symbolic Discovery of Optimization Algorithms},\nauthor={Xiangning Chen and Chen Liang and Da Huang and Esteban Real and Kaiyuan Wang and Hieu Pham and Xuanyi Dong and Thang Luong and Cho-Jui Hsieh and Yifeng Lu and Quoc V Le},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ne6zeqLFCZ}\n}", "github": "", "project": "", "reviewers": "Y4eN;weMp;Gizf;hEv6", "pdf_size": 1324488, "rating": "6;6;6;6", "confidence": "5;3;4;5", "soundness": "4;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "125;114;50;146", "wc_strengths": "123;23;70;559", "wc_weaknesses": "164;24;124;689", "wc_questions": "215;42;2;2", "wc_limitations": "36;59;5;3", "wc_review": "663;262;251;1399", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 108.75, 35.81462690019261 ], "wc_strengths_avg": [ 193.75, 213.8239638113558 ], "wc_weaknesses_avg": [ 250.25, 258.39347418230204 ], "wc_questions_avg": [ 65.25, 87.98685981440637 ], "wc_limitations_avg": [ 25.75, 23.23117517475171 ], "wc_review_avg": [ 643.75, 466.57227467992567 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 518, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12397282142807393966&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cs.ucla.edu;google.com;google.com;google.com;cmu.edu;google.com;google.com;amazon.com;google.com;google.com;google.com", "author_num": 11, "aff_unique_index": "0;1;1;1;2;1;1;3;4;1;1", "aff_unique_norm": "University of California, Los Angeles;Google;Carnegie Mellon University;Amazon;DeepMind", "aff_unique_dep": ";Google;;Amazon.com, Inc.;DeepMind", "aff_unique_url": "https://www.ucla.edu;https://www.google.com;https://www.cmu.edu;https://www.amazon.com;https://deepmind.com", "aff_unique_abbr": "UCLA;Google;CMU;Amazon;DeepMind", "aff_campus_unique_index": "0;1;1;1;1;1;1;1", "aff_campus_unique": "Los Angeles;Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Post-processing Private Synthetic Data for Improving Utility on Selected Measures", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70491", "id": "neu9JlNweE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ca6980a3dba7fb3e4e66925656dba68b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=neu9JlNweE", "openreview": "https://openreview.net/forum?id=neu9JlNweE", "poster": "/media/PosterPDFs/NeurIPS%202023/70491.png?t=1701482209.3640704", "slides": "https://nips.cc/virtual/2023/poster/70491", "video": "https://nips.cc/virtual/2023/poster/70491", "author_site": "Hao Wang, Shivchander Sudalairaj, John Henning, Kristjan Greenewald, Akash Srivastava", "tldr": "", "abstract": "Existing private synthetic data generation algorithms are agnostic to downstream tasks. However, end users may have specific requirements that the synthetic data must satisfy. Failure to meet these requirements could significantly reduce the utility of the data for downstream use. We introduce a post-processing technique that improves the utility of the synthetic data with respect to measures selected by the end user, while preserving strong privacy guarantees and dataset quality. Our technique involves resampling from the synthetic data to filter out samples that do not meet the selected utility measures, using an efficient stochastic first-order algorithm to find optimal resampling weights. Through comprehensive numerical experiments, we demonstrate that our approach consistently improves the utility of synthetic data across multiple benchmark datasets and state-of-the-art synthetic data generation algorithms.", "keywords": "differential privacy;synthetic data", "primary_area": "", "supplementary_material": "/attachment/9bb5d812764e3581a1033d3cd1b9fd44f08acb56.pdf", "author": "Hao Wang;Shivchander Sudalairaj;John Henning;Kristjan Greenewald;Akash Srivastava", "authorids": "~Hao_Wang22;~Shivchander_Sudalairaj1;~John_Henning1;~Kristjan_Greenewald1;~Akash_Srivastava1", "gender": "M;M;M;;M", "homepage": "https://haowang94.github.io;;https://John.henning.ai;https://researcher.watson.ibm.com/researcher/view.php?person=ibm-Kristjan.H.Greenewald;http://akashgit.github.io", "dblp": ";314/2623;;146/0563;24/9528", "google_scholar": "A3WtYhAAAAAJ;O71amfMAAAAJ;;L3zNUG4AAAAJ;https://scholar.google.co.uk/citations?user=2h6SZeEAAAAJ", "orcid": ";;;;", "linkedin": ";shivchanders/;;;https://uk.linkedin.com/in/akash-srivastava-aa97361b", "or_profile": "~Hao_Wang22;~Shivchander_Sudalairaj1;~John_Henning1;~Kristjan_Greenewald1;~Akash_Srivastava1", "aff": "MIT-IBM Watson AI Lab;MIT-IBM Watson AI Lab;MIT-IBM Watson AI Lab;MIT-IBM Watson AI Lab, IBM Research;MIT-IBM Watson AI Research Lab", "aff_domain": "ibm.com;ibm.com;ibm.com;ibm.com;ibm.com", "position": "Researcher;Researcher;Researcher;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nwang2023postprocessing,\ntitle={Post-processing Private Synthetic Data for Improving Utility on Selected Measures},\nauthor={Hao Wang and Shivchander Sudalairaj and John Henning and Kristjan Greenewald and Akash Srivastava},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=neu9JlNweE}\n}", "github": "", "project": "", "reviewers": "5bmq;SorM;rPbc;zgZz", "pdf_size": 541960, "rating": "6;6;7;7", "confidence": "3;5;4;4", "soundness": "3;2;4;4", "novelty": "2;1;3;3", "presentation": "2;3;4;3", "wc_summary": "101;193;147;50", "wc_strengths": "39;56;89;64", "wc_weaknesses": "102;284;34;69", "wc_questions": "172;55;18;61", "wc_limitations": "45;1;38;4", "wc_review": "459;589;326;248", "wc_reply_reviewers": "22;20;29;22", "wc_reply_authors": "56;38;57;52", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 122.75, 53.124264700793745 ], "wc_strengths_avg": [ 62.0, 18.01388353465182 ], "wc_weaknesses_avg": [ 122.25, 96.43229490165626 ], "wc_questions_avg": [ 76.5, 57.543461835381436 ], "wc_limitations_avg": [ 22.0, 19.685019685029527 ], "wc_review_avg": [ 405.5, 130.05864062029866 ], "wc_reply_reviewers_avg": [ 23.25, 3.418698582794336 ], "wc_reply_authors_avg": [ 50.75, 7.595228765481656 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10865880910266826118&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "ibm.com;ibm.com;ibm.com;ibm.com;ibm.com", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;IBM", "aff_unique_dep": "IBM Watson AI Lab;AI Lab", "aff_unique_url": "https://www.mitibmwatsonailab.org;https://www.ibmwatsonai.org/", "aff_unique_abbr": "MIT-IBM AI Lab;MIT-IBM AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Beyond Pretrained Features: Noisy Image Modeling Provides Adversarial Defense", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70490", "id": "niHkj9ixUZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8629b0fff229b8a27efb1422e990605f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=niHkj9ixUZ", "openreview": "https://openreview.net/forum?id=niHkj9ixUZ", "poster": "/media/PosterPDFs/NeurIPS%202023/70490.png?t=1701393475.453823", "slides": "https://nips.cc/virtual/2023/poster/70490", "video": "https://nips.cc/virtual/2023/poster/70490", "author_site": "Zunzhi You, Daochang Liu, Bohyung Han, Chang Xu", "tldr": "", "abstract": "Recent advancements in masked image modeling (MIM) have made it a prevailing framework for self-supervised visual representation learning. The MIM pretrained models, like most deep neural network methods, remain vulnerable to adversarial attacks, limiting their practical application, and this issue has received little research attention. In this paper, we investigate how this powerful self-supervised learning paradigm can provide adversarial robustness to downstream classifiers. During the exploration, we find that noisy image modeling (NIM), a simple variant of MIM that adopts denoising as the pre-text task, reconstructs noisy images surprisingly well despite severe corruption. Motivated by this observation, we propose an adversarial defense method, referred to as De^3, by exploiting the pretrained decoder for denoising. Through De^3, NIM is able to enhance adversarial robustness beyond providing pretrained features. Furthermore, we incorporate a simple modification, sampling the noise scale hyperparameter from random distributions, and enable the defense to achieve a better and tunable trade-off between accuracy and robustness. Experimental results demonstrate that, in terms of adversarial robustness, NIM is superior to MIM thanks to its effective denoising capability. Moreover, the defense provided by NIM achieves performance on par with adversarial training while offering the extra tunability advantage. Source code and models are available at https://github.com/youzunzhi/NIM-AdvDef.", "keywords": "self-supervised learning;adversarial robustness", "primary_area": "", "supplementary_material": "/attachment/e7b922bd2f747977f9905649002d1d0b44fe12ac.pdf", "author": "Zunzhi You;Daochang Liu;Bohyung Han;Chang Xu", "authorids": "~Zunzhi_You1;~Daochang_Liu1;~Bohyung_Han1;~Chang_Xu4", "gender": "M;M;Not Specified;", "homepage": "https://youzunzhi.github.io;https://finspire13.github.io;http://cvlab.snu.ac.kr/~bhhan;https://sydney.edu.au/engineering/about/our-people/academic-staff/c-xu.html", "dblp": "278/0958;222/2701;73/4880.html;97/2966-2", "google_scholar": "akqjKT0AAAAJ;https://scholar.google.com/citations?hl=en;9aaeCToAAAAJ;N4F_3eoAAAAJ", "orcid": ";;;0000-0002-4756-0609", "linkedin": ";;;", "or_profile": "~Zunzhi_You1;~Daochang_Liu1;~Bohyung_Han1;~Charles_Xu1", "aff": "University of Sydney;University of Sydney;Seoul National University;University of Sydney", "aff_domain": "usyd.edu.au;usyd.edu.au;snu.ac.kr;sydney.edu.au", "position": "PhD student;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nyou2023beyond,\ntitle={Beyond Pretrained Features: Noisy Image Modeling Provides Adversarial Defense},\nauthor={Zunzhi You and Daochang Liu and Bohyung Han and Chang Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=niHkj9ixUZ}\n}", "github": "", "project": "", "reviewers": "qarJ;ovhG;XF5y;judd;P9xe", "pdf_size": 901341, "rating": "3;5;7;8;8", "confidence": "5;3;3;5;4", "soundness": "2;2;3;4;4", "novelty": "2;3;3;4;4", "presentation": "2;3;4;3;4", "wc_summary": "35;164;59;51;104", "wc_strengths": "6;62;77;74;116", "wc_weaknesses": "117;188;307;79;160", "wc_questions": "2;30;27;14;14", "wc_limitations": "2;1;7;8;1", "wc_review": "162;445;477;226;395", "wc_reply_reviewers": "510;0;53;20;0", "wc_reply_authors": "2319;62;62;0;0", "reply_reviewers": "2;0;1;1;0", "reply_authors": "6;2;2;1;1", "rating_avg": [ 6.2, 1.9390719429665317 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 82.6, 46.70160596810349 ], "wc_strengths_avg": [ 67.0, 35.485208185947 ], "wc_weaknesses_avg": [ 170.2, 77.82390378283526 ], "wc_questions_avg": [ 17.4, 10.11137972781163 ], "wc_limitations_avg": [ 3.8, 3.059411708155671 ], "wc_review_avg": [ 341.0, 124.49417657063321 ], "wc_reply_reviewers_avg": [ 116.6, 197.65181506882246 ], "wc_reply_authors_avg": [ 488.6, 915.6199211463237 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.4, 1.8547236990991407 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.11531640100361064, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13373829687301592515&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "usyd.edu.au;usyd.edu.au;snu.ac.kr;sydney.edu.au", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Sydney;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sydney.edu.au;https://www.snu.ac.kr", "aff_unique_abbr": "USYD;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Australia;South Korea" }, { "title": "Practical Sharpness-Aware Minimization Cannot Converge All the Way to Optima", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70489", "id": "nijJN0LHqM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5305b7891e1098dd9773d35cd9333180-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nijJN0LHqM", "openreview": "https://openreview.net/forum?id=nijJN0LHqM", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70489", "video": "https://nips.cc/virtual/2023/poster/70489", "author_site": "Dongkuk Si, Chulhee Yun", "tldr": "", "abstract": "Sharpness-Aware Minimization (SAM) is an optimizer that takes a descent step based on the gradient at a perturbation $y_t = x_t + \\rho \\frac{\\nabla f(x_t)}{\\lVert \\nabla f(x_t) \\rVert}$ of the current point $x_t$. \nExisting studies prove convergence of SAM for smooth functions, but they do so by assuming decaying perturbation size $\\rho$ and/or no gradient normalization in $y_t$, which is detached from practice. To address this gap, we study deterministic/stochastic versions of SAM with practical configurations (i.e., constant $\\rho$ and gradient normalization in $y_t$) and explore their convergence properties on smooth functions with (non)convexity assumptions.\nPerhaps surprisingly, in many scenarios, we find out that SAM has limited capability to converge to global minima or stationary points.\nFor smooth strongly convex functions, we show that while deterministic SAM enjoys tight global convergence rates of $\\tilde \\Theta(\\frac{1}{T^2})$, the convergence bound of stochastic SAM suffers an inevitable additive term $\\mathcal O(\\rho^2)$, indicating convergence only up to neighborhoods of optima.\nIn fact, such $\\mathcal O(\\rho^2)$ factors arise for stochastic SAM in all the settings we consider, and also for deterministic SAM in nonconvex cases; importantly, we prove by examples that such terms are unavoidable.\nOur results highlight vastly different characteristics of SAM with vs. without decaying perturbation size or gradient normalization, and suggest that the intuitions gained from one version may not apply to the other.", "keywords": "Sharpness-Aware Minimization;convex optimization", "primary_area": "", "supplementary_material": "", "author": "Dongkuk Si;Chulhee Yun", "authorids": "~Dongkuk_Si1;~Chulhee_Yun1", "gender": "M;M", "homepage": "https://github.com/parroteffect;https://chulheeyun.github.io/", "dblp": ";138/0148.html", "google_scholar": ";Ukl64ggAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Dongkuk_Si1;~Chulhee_Yun1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nsi2023practical,\ntitle={Practical Sharpness-Aware Minimization Cannot Converge All the Way to Optima},\nauthor={Dongkuk Si and Chulhee Yun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nijJN0LHqM}\n}", "github": "", "project": "", "reviewers": "NH1R;y8UR;T74n;AJAE", "pdf_size": 1089533, "rating": "5;6;7;7", "confidence": "3;4;3;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;2;2", "wc_summary": "65;199;62;125", "wc_strengths": "29;106;175;47", "wc_weaknesses": "54;440;4;67", "wc_questions": "146;44;113;103", "wc_limitations": "1;34;1;32", "wc_review": "295;823;355;374", "wc_reply_reviewers": "22;171;24;0", "wc_reply_authors": "35;373;39;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 112.75, 55.778019864459154 ], "wc_strengths_avg": [ 89.25, 57.115562677785114 ], "wc_weaknesses_avg": [ 141.25, 174.0795436000451 ], "wc_questions_avg": [ 101.5, 36.81372026839993 ], "wc_limitations_avg": [ 17.0, 16.015617378046965 ], "wc_review_avg": [ 461.75, 210.59602916484442 ], "wc_reply_reviewers_avg": [ 54.25, 68.06017558014378 ], "wc_reply_authors_avg": [ 111.75, 151.59382408264526 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7755150125149553610&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "kaist.ac.kr;kaist.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "nkfSodI4ow", "title": "XYZ Data Efficiency: Improving Deep Learning Model Quality and Training Efficiency via Efficient Data Sampling and Routing", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent advances on deep learning models come at the price of formidable training cost. The increasing model size is one of the root causes, but another less-emphasized fact is that data scale is actually increasing at a similar speed as model scale, and the training cost is proportional to both of them. Compared to the rapidly evolving model architecture, how to efficiently use the training data (especially for the expensive foundation model pretraining) is both less explored and difficult to realize due to the lack of a convenient framework that focus on data efficiency capabilities. To this end, we present XYZ Data Efficiency, a framework that makes better use of data, increases training efficiency, and improves model quality. Specifically, we propose and combine two data efficiency techniques: efficient data sampling via a general curriculum learning library, and efficient data routing via a novel random layerwise token dropping technique. For GPT-3 1.3B language model pretraining, our work achieves 12.5x less data/time/cost (`$`3.7K if rent on Azure), while still maintaining 95% of model quality compared to baseline with full data and cost (`$`46.3K). For GPT-3 1.3B and BERT-large pretraining, our work can also achieve the same model quality with up to 2x less data/time/cost, or achieve better model quality under same data/time/cost. XYZ Data Efficiency is easy to use and tune, enabling us to easily apply it and verify its benefit on additional tasks including GPT-3 MoE model pretraining and small-scale GPT-2/ViT finetuning.", "keywords": "data efficiency;training efficiency;foundation model;pretraining;finetuning;language model;GPT-3;BERT;ViT", "primary_area": "", "supplementary_material": "/attachment/0430723085565eaa418dd41d88b6ec6d24aabfb9.zip", "author": "Conglong Li;Zhewei Yao;Xiaoxia Wu;Minjia Zhang;Connor Holmes;Cheng Li;Yuxiong He", "authorids": "~Conglong_Li1;~Zhewei_Yao1;~Xiaoxia_Wu1;~Minjia_Zhang1;~Connor_Holmes1;~Cheng_Li10;~Yuxiong_He1", "gender": ";M;F;M;M;F;", "homepage": ";;https://sites.google.com/view/xwu/home;https://minjiazhang.github.io/;;https://chengli.netlify.app/;", "dblp": "158/7995;195/2887;63/1016;58/9033;;;https://dblp.org/pers/hd/h/He:Yuxiong", "google_scholar": ";gpSeMjYAAAAJ;Ry0Bdt8AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;da9Vl6QAAAAJ;SB3_eb0AAAAJ", "orcid": ";;;0000-0002-8165-166X;;;", "linkedin": ";;;minjia-zhang-05857226/;;;", "or_profile": "~Conglong_Li1;~Zhewei_Yao1;~Xiaoxia_Wu1;~Minjia_Zhang1;~Connor_Holmes1;~Cheng_Li10;~Yuxiong_He1", "aff": "Microsoft;Microsoft;Microsoft;Microsoft ;Colorado School of Mines;Microsoft;Microsoft", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;mines.edu;microsoft.com;microsoft.com", "position": "Researcher;Researcher;Researcher;Principle Researcher;PhD student;Researcher;Researcher", "bibtex": "@misc{\nli2023xyz,\ntitle={{XYZ} Data Efficiency: Improving Deep Learning Model Quality and Training Efficiency via Efficient Data Sampling and Routing},\nauthor={Conglong Li and Zhewei Yao and Xiaoxia Wu and Minjia Zhang and Connor Holmes and Cheng Li and Yuxiong He},\nyear={2023},\nurl={https://openreview.net/forum?id=nkfSodI4ow}\n}", "github": "", "project": "", "reviewers": "Na2w;8PyZ;NZ9R;6DjP;sHP9", "site": "https://openreview.net/forum?id=nkfSodI4ow", "pdf_size": 621413, "rating": "3;4;5;6;7", "confidence": "4;5;3;3;4", "soundness": "2;3;3;3;4", "novelty": "2;3;2;2;3", "presentation": "2;3;3;3;3", "wc_summary": "100;49;94;89;43", "wc_strengths": "66;39;86;96;110", "wc_weaknesses": "424;218;93;115;92", "wc_questions": "102;6;70;202;4", "wc_limitations": "1;1;19;8;15", "wc_review": "693;313;362;510;264", "wc_reply_reviewers": "404;0;0;0;16", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;0;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 75.0, 24.008331887076203 ], "wc_strengths_avg": [ 79.4, 24.767720928660353 ], "wc_weaknesses_avg": [ 188.4, 126.62164112030771 ], "wc_questions_avg": [ 76.8, 73.03259546257411 ], "wc_limitations_avg": [ 8.8, 7.277362159464101 ], "wc_review_avg": [ 428.4, 155.83658107132612 ], "wc_reply_reviewers_avg": [ 84.0, 160.11995503371838 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.37796447300922725, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WM7CZnim1UcJ:scholar.google.com/&scioq=XYZ+Data+Efficiency:+Improving+Deep+Learning+Model+Quality+and+Training+Efficiency+via+Efficient+Data+Sampling+and+Routing&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Microsoft;Colorado School of Mines", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.mines.edu", "aff_unique_abbr": "Microsoft;CSM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Joint Feature and Differentiable $ k $-NN Graph Learning using Dirichlet Energy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70488", "id": "noMktb4ait", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4d689f0f30199661a10aa2200488aebb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=noMktb4ait", "openreview": "https://openreview.net/forum?id=noMktb4ait", "poster": "/media/PosterPDFs/NeurIPS%202023/70488.png?t=1701357957.7725656", "slides": "https://nips.cc/virtual/2023/poster/70488", "video": "https://nips.cc/virtual/2023/poster/70488", "author_site": "Lei Xu, Lei Chen, Rong Wang, Feiping Nie, Xuelong Li", "tldr": "", "abstract": "Feature selection (FS) plays an important role in machine learning, which extracts important features and accelerates the learning process. In this paper, we propose a deep FS method that simultaneously conducts feature selection and differentiable $ k $-NN graph learning based on the Dirichlet Energy. The Dirichlet Energy identifies important features by measuring their smoothness on the graph structure, and facilitates the learning of a new graph that reflects the inherent structure in new feature subspace. We employ Optimal Transport theory to address the non-differentiability issue of learning $ k $-NN graphs in neural networks, which theoretically makes our method applicable to other graph neural networks for dynamic graph learning. Furthermore, the proposed framework is interpretable, since all modules are designed algorithmically. We validate the effectiveness of our model with extensive experiments on both synthetic and real-world datasets.", "keywords": "Feature Selection;Differential k-NN Graph;Dirichlet Energy", "primary_area": "", "supplementary_material": "/attachment/5b31ea511ca568759e9ab3f693e33b374cb8ba88.zip", "author": "Lei Xu;Lei Chen;Rong Wang;Feiping Nie;Xuelong Li", "authorids": "~Lei_Xu6;~Lei_Chen12;~Rong_Wang2;~Feiping_Nie2;~Xuelong_Li2", "gender": "M;M;M;M;M", "homepage": "http://faculty.cs.njupt.edu.cn/~chenlei/;https://www.researchgate.net/profile/Rong-Wang-40;https://dblp.org/pid/80/5755.html;;https://solerxl.github.io/", "dblp": "c/LeiChen0011;66/4610-1.html;;l/XuelongLi;274/3257", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;ahUibskAAAAJ;6l4c0oEAAAAJ", "orcid": "0000-0002-6071-8888;;;;0000-0002-5072-5080", "linkedin": ";;;;%E7%A3%8A-%E8%AE%B8-91710416a/", "or_profile": "~Lei_Chen12;~Rong_Wang2;~Feiping_Nie2;~Xuelong_Li2;~Xu_Lei2", "aff": "Nanjing University of Posts and Telecommunications;Northwest Polytechnical University Xi'an;Northwest Polytechnical University Xi'an;Northwestern Polytechnical University;Northwestern Polytechnical University", "aff_domain": "njupt.edu.cn;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn", "position": "Full Professor;Full Professor;Full Professor;Full Professor;MS student", "bibtex": "@inproceedings{\nxu2023joint,\ntitle={Joint Feature and Differentiable \\$ k \\$-{NN} Graph Learning using Dirichlet Energy},\nauthor={Lei Xu and Lei Chen and Rong Wang and Feiping Nie and Xuelong Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=noMktb4ait}\n}", "github": "", "project": "", "reviewers": "JXCq;f5Nx;EQBi;MNAm", "pdf_size": 2005079, "rating": "4;5;7;8", "confidence": "3;4;3;3", "soundness": "2;2;4;4", "novelty": "2;2;3;4", "presentation": "2;3;4;2", "wc_summary": "113;77;134;26", "wc_strengths": "58;25;62;86", "wc_weaknesses": "180;228;49;69", "wc_questions": "180;78;31;19", "wc_limitations": "180;14;4;12", "wc_review": "711;422;280;212", "wc_reply_reviewers": "35;181;21;36", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 87.5, 40.94203219186854 ], "wc_strengths_avg": [ 57.75, 21.72987574745884 ], "wc_weaknesses_avg": [ 131.5, 74.79471906491794 ], "wc_questions_avg": [ 77.0, 63.42318188170631 ], "wc_limitations_avg": [ 52.5, 73.70719096533254 ], "wc_review_avg": [ 406.25, 191.56770996177826 ], "wc_reply_reviewers_avg": [ 68.25, 65.36579763148309 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.36514837167011077, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12111917026898967865&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "njupt.edu.cn;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;2;2", "aff_unique_norm": "Nanjing University of Posts and Telecommunications;Northwest Polytechnical University;Northwestern Polytechnical University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.njupt.edu.cn;http://www.nwpu.edu.cn;https://www.nwpu.edu.cn", "aff_unique_abbr": "NJUPT;NWPU;NWPU", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Nanjing;Xi'an;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Marginal Density Ratio for Off-Policy Evaluation in Contextual Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70487", "id": "noyleECBam", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a51f974947c42b40a40a882a7d9b2479-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=noyleECBam", "openreview": "https://openreview.net/forum?id=noyleECBam", "poster": "/media/PosterPDFs/NeurIPS%202023/70487.png?t=1701776798.7430937", "slides": "https://nips.cc/virtual/2023/poster/70487", "video": "https://nips.cc/virtual/2023/poster/70487", "author_site": "Muhammad Faaiz Taufiq, Arnaud Doucet, Rob Cornish, Jean-Francois Ton", "tldr": "", "abstract": "Off-Policy Evaluation (OPE) in contextual bandits is crucial for assessing new policies using existing data without costly experimentation. However, current OPE methods, such as Inverse Probability Weighting (IPW) and Doubly Robust (DR) estimators, suffer from high variance, particularly in cases of low overlap between target and behaviour policies or large action and context spaces. In this paper, we introduce a new OPE estimator for contextual bandits, the Marginal Ratio (MR) estimator, which focuses on the shift in the marginal distribution of outcomes $Y$ instead of the policies themselves. Through rigorous theoretical analysis, we demonstrate the benefits of the MR estimator compared to conventional methods like IPW and DR in terms of variance reduction. Additionally, we establish a connection between the MR estimator and the state-of-the-art Marginalized Inverse Propensity Score (MIPS) estimator, proving that MR achieves lower variance among a generalized family of MIPS estimators. We further illustrate the utility of the MR estimator in causal inference settings, where it exhibits enhanced performance in estimating Average Treatment Effects (ATE). Our experiments on synthetic and real-world datasets corroborate our theoretical findings and highlight the practical advantages of the MR estimator in OPE for contextual bandits.", "keywords": "contextual bandits;variance reduction;off-policy evaluation", "primary_area": "", "supplementary_material": "", "author": "Muhammad Faaiz Taufiq;Arnaud Doucet;Rob Cornish;Jean-Francois Ton", "authorids": "~Muhammad_Faaiz_Taufiq1;~Arnaud_Doucet2;~Rob_Cornish1;~Jean-Francois_Ton2", "gender": "M;;Not Specified;", "homepage": "https://faaizt.github.io/;https://www.stats.ox.ac.uk/~doucet/;https://savior287.github.io/JFT-webpage/;https://jrmcornish.github.io", "dblp": "322/2165;68/1628;;", "google_scholar": "oDL6ahoAAAAJ;W4SZGV8AAAAJ;WWVOu4kAAAAJ;", "orcid": ";0000-0002-7662-419X;;", "linkedin": "muhammadftaufiq/;;;", "or_profile": "~Muhammad_Faaiz_Taufiq1;~Arnaud_Doucet2;~Jean-Francois_Ton2;~Robert_Cornish1", "aff": "University of Oxford;University of Oxford;Bytedance;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk;bytedance.com;ox.ac.uk", "position": "PhD student;Full Professor;Researcher;Researcher", "bibtex": "@inproceedings{\ntaufiq2023marginal,\ntitle={Marginal Density Ratio for Off-Policy Evaluation in Contextual Bandits},\nauthor={Muhammad Faaiz Taufiq and Arnaud Doucet and Rob Cornish and Jean-Francois Ton},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=noyleECBam}\n}", "github": "", "project": "", "reviewers": "jTTf;KWAf;LKfa;ceqD;2CGe", "pdf_size": 17296326, "rating": "5;7;7;7;7", "confidence": "4;4;4;3;5", "soundness": "3;3;3;3;4", "novelty": "3;3;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "125;67;140;184;133", "wc_strengths": "112;84;57;26;117", "wc_weaknesses": "242;251;149;168;294", "wc_questions": "142;66;106;17;54", "wc_limitations": "15;25;1;9;8", "wc_review": "636;493;453;404;606", "wc_reply_reviewers": "328;348;14;42;241", "wc_reply_authors": "293;667;0;0;521", "reply_reviewers": "1;2;1;1;3", "reply_authors": "2;2;1;1;3", "rating_avg": [ 6.6, 0.7999999999999999 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 129.8, 37.46678529044092 ], "wc_strengths_avg": [ 79.2, 34.20760149440472 ], "wc_weaknesses_avg": [ 220.8, 54.15311625382236 ], "wc_questions_avg": [ 77.0, 43.16480047446067 ], "wc_limitations_avg": [ 11.6, 8.039900496896712 ], "wc_review_avg": [ 518.4, 88.89679409292553 ], "wc_reply_reviewers_avg": [ 194.6, 140.9845381593315 ], "wc_reply_authors_avg": [ 296.2, 269.631897222862 ], "reply_reviewers_avg": [ 1.6, 0.8 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6411252154731576156&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ox.ac.uk;ox.ac.uk;bytedance.com;ox.ac.uk", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Oxford;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.bytedance.com", "aff_unique_abbr": "Oxford;Bytedance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United Kingdom;China" }, { "title": "Truncated Affinity Maximization: One-class Homophily Modeling for Graph Anomaly Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70486", "id": "nq4OhifyEe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9b905031125e56a557db38dff4fa8d21-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nq4OhifyEe", "openreview": "https://openreview.net/forum?id=nq4OhifyEe", "poster": "/media/PosterPDFs/NeurIPS%202023/70486.png?t=1701938970.2041726", "slides": "https://nips.cc/virtual/2023/poster/70486", "video": "https://nips.cc/virtual/2023/poster/70486", "author_site": "Hezhe Qiao, Guansong Pang", "tldr": "", "abstract": "We reveal a one-class homophily phenomenon, which is one prevalent property we find empirically in real-world graph anomaly detection (GAD) datasets, i.e., normal nodes tend to have strong connection/affinity with each other, while the homophily in abnormal nodes is significantly weaker than normal nodes. However, this anomaly-discriminative property is ignored by existing GAD methods that are typically built using a conventional anomaly detection objective, such as data reconstruction.\nIn this work, we explore this property to introduce a novel unsupervised anomaly scoring measure for GAD -- local node affinity-- that assigns a larger anomaly score to nodes that are less affiliated with their neighbors, with the affinity defined as similarity on node attributes/representations. We further propose Truncated Affinity Maximization (TAM) that learns tailored node representations for our anomaly measure by maximizing the local affinity of nodes to their neighbors. Optimizing on the original graph structure can be biased by non-homophily edges(i.e., edges connecting normal and abnormal nodes). Thus, TAM is instead optimized on truncated graphs where non-homophily edges are removed iteratively to mitigate this bias. The learned representations result in significantly stronger local affinity for normal nodes than abnormal nodes. Extensive empirical results on 10 real-world GAD datasets show that TAM substantially outperforms seven competing models, achieving over 10% increase in AUROC/AUPRC compared to the best contenders on challenging datasets. Our code is available at https://github.com/mala-lab/TAM-master/.", "keywords": "Anomaly Detection;Graph Neural Network;Graph Anomaly Detection;One-Class Homophily;Local Node Affinity", "primary_area": "", "supplementary_material": "/attachment/65ddb058652a9752e5355eda413e2d9a271ce152.zip", "author": "Hezhe Qiao;Guansong Pang", "authorids": "~Hezhe_Qiao1;~Guansong_Pang1", "gender": "M;", "homepage": "https://hezheqiao2022.github.io/;http://guansongpang.com/", "dblp": "300/2321;07/11150", "google_scholar": "bMjKCuEAAAAJ;https://scholar.google.com.tw/citations?hl=en", "orcid": "0000-0003-3511-0528;0000-0002-9877-2716", "linkedin": "hezhe-qiao-83761b247/;guansong-pang-5587b21b/", "or_profile": "~Hezhe_Qiao1;~Guansong_Pang1", "aff": "Singapore Management University;Singapore Management University", "aff_domain": "smu.edu.sg;smu.edu.sg", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nqiao2023truncated,\ntitle={Truncated Affinity Maximization: One-class Homophily Modeling for Graph Anomaly Detection},\nauthor={Hezhe Qiao and Guansong Pang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nq4OhifyEe}\n}", "github": "", "project": "", "reviewers": "NSnj;oFzr;zqNH;uzaW", "pdf_size": 2444214, "rating": "4;5;6;7", "confidence": "4;3;4;5", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "160;81;124;78", "wc_strengths": "125;54;82;47", "wc_weaknesses": "393;166;43;104", "wc_questions": "45;4;227;2", "wc_limitations": "95;1;12;2", "wc_review": "818;306;488;233", "wc_reply_reviewers": "0;0;34;31", "wc_reply_authors": "0;0;25;23", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 110.75, 33.75925798947601 ], "wc_strengths_avg": [ 77.0, 30.651264247988205 ], "wc_weaknesses_avg": [ 176.5, 132.34519258363713 ], "wc_questions_avg": [ 69.5, 92.5378301020723 ], "wc_limitations_avg": [ 27.5, 39.207779840230685 ], "wc_review_avg": [ 461.25, 225.93514002916854 ], "wc_reply_reviewers_avg": [ 16.25, 16.284578594486256 ], "wc_reply_authors_avg": [ 12.0, 12.020815280171307 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6324555320336758, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9316940146277290212&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "smu.edu.sg;smu.edu.sg", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Singapore Management University", "aff_unique_dep": "", "aff_unique_url": "https://www.smu.edu.sg", "aff_unique_abbr": "SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "CLIP4HOI: Towards Adapting CLIP for Practical Zero-Shot HOI Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70485", "id": "nqIIWnwe73", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8fd5bc08e744fe0dfe798c61d1575a22-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nqIIWnwe73", "openreview": "https://openreview.net/forum?id=nqIIWnwe73", "poster": "/media/PosterPDFs/NeurIPS%202023/70485.png?t=1701494860.7724395", "slides": "https://nips.cc/virtual/2023/poster/70485", "video": "https://nips.cc/virtual/2023/poster/70485", "author_site": "Yunyao Mao, Jiajun Deng, Wengang Zhou, Li Li, Yao Fang, Houqiang Li", "tldr": "", "abstract": "Zero-shot Human-Object Interaction (HOI) detection aims to identify both seen and unseen HOI categories. A strong zero-shot HOI detector is supposed to be not only capable of discriminating novel interactions but also robust to positional distribution discrepancy between seen and unseen categories when locating human-object pairs. However, top-performing zero-shot HOI detectors rely on seen and predefined unseen categories to distill knowledge from CLIP and jointly locate human-object pairs without considering the potential positional distribution discrepancy, leading to impaired transferability. In this paper, we introduce CLIP4HOI, a novel framework for zero-shot HOI detection. CLIP4HOI is developed on the vision-language model CLIP and ameliorates the above issues in the following two aspects. First, to avoid the model from overfitting to the joint positional distribution of seen human-object pairs, we seek to tackle the problem of zero-shot HOI detection in a disentangled two-stage paradigm. To be specific, humans and objects are independently identified and all feasible human-object pairs are processed by Human-Object interactor for pairwise proposal generation. Second, to facilitate better transferability, the CLIP model is elaborately adapted into a fine-grained HOI classifier for proposal discrimination, avoiding data-sensitive knowledge distillation. Finally, experiments on prevalent benchmarks show that our CLIP4HOI outperforms previous approaches on both rare and unseen categories, and sets a series of state-of-the-art records under a variety of zero-shot settings.", "keywords": "human-object interaction detection;zero-shot learning;CLIP model adaptatiion", "primary_area": "", "supplementary_material": "/attachment/a2e124e71d3053af50e49d59068756a9fc988e00.zip", "author": "Yunyao Mao;Jiajun Deng;Wengang Zhou;Li Li;Yao Fang;Houqiang Li", "authorids": "~Yunyao_Mao1;~Jiajun_Deng1;~Wengang_Zhou1;~Li_Li1;~Yao_Fang1;~Houqiang_Li1", "gender": "M;M;M;M;M;M", "homepage": "http://home.ustc.edu.cn/~myy2016/;https://dengjiajun.com/;http://staff.ustc.edu.cn/~zhwg/index.html;https://faculty.ustc.edu.cn/lil1/en;https://www.linkedin.com/in/%E8%80%80-%E6%96%B9-210369180;https://staff.ustc.edu.cn/~lihq/", "dblp": "299/1533;;22/4544-1;53/2189-40;;59/7017.html", "google_scholar": "uQJ7Df0AAAAJ;FAAHjxsAAAAJ;8s1JF8YAAAAJ;dEm6VKAAAAAJ;;7sFMIKoAAAAJ", "orcid": ";;0000-0003-1690-9836;0000-0002-7163-6263;;0000-0003-2188-3028", "linkedin": ";%E5%AE%B6%E4%BF%8A-%E9%82%93-77519a160/;;;;", "or_profile": "~Yunyao_Mao1;~Jiajun_Deng1;~Wengang_Zhou1;~Li_Li1;~Yao_Fang1;~Houqiang_Li1", "aff": "University of Science and Technology of China;University of Sydney;University of Science and Technology of China;University of Science and Technology of China;Merchants Union Consumer Finance Company Limited;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;usyd.edu.au;ustc.edu.cn;ustc.edu.cn;mucfc.com;ustc.edu.cn", "position": "PhD student;Postdoc;Full Professor;Professor;Researcher;Professor", "bibtex": "@inproceedings{\nmao2023cliphoi,\ntitle={{CLIP}4{HOI}: Towards Adapting {CLIP} for Practical Zero-Shot {HOI} Detection},\nauthor={Yunyao Mao and Jiajun Deng and Wengang Zhou and Li Li and Yao Fang and Houqiang Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nqIIWnwe73}\n}", "github": "", "project": "", "reviewers": "2e3Y;Rad1;baYx;dBok", "pdf_size": 2397134, "rating": "3;5;5;6", "confidence": "5;4;5;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "64;51;58;117", "wc_strengths": "40;37;59;67", "wc_weaknesses": "34;123;63;148", "wc_questions": "232;39;23;6", "wc_limitations": "74;9;73;2", "wc_review": "444;259;276;340", "wc_reply_reviewers": "0;0;0;18", "wc_reply_authors": "0;0;0;5", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.5, 26.100766272276374 ], "wc_strengths_avg": [ 50.75, 12.616952880945542 ], "wc_weaknesses_avg": [ 92.0, 45.557655778145566 ], "wc_questions_avg": [ 75.0, 91.39201278011114 ], "wc_limitations_avg": [ 39.5, 34.09178786746157 ], "wc_review_avg": [ 329.75, 72.54782905090958 ], "wc_reply_reviewers_avg": [ 4.5, 7.794228634059948 ], "wc_reply_authors_avg": [ 1.25, 2.165063509461097 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7608859102526822, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11265336223197669494&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ustc.edu.cn;usyd.edu.au;ustc.edu.cn;ustc.edu.cn;mucfc.com;ustc.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "University of Science and Technology of China;University of Sydney;Merchants Union Consumer Finance Company Limited", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.sydney.edu.au;", "aff_unique_abbr": "USTC;USYD;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;Australia" }, { "title": "Leave No Stone Unturned: Mine Extra Knowledge for Imbalanced Facial Expression Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70484", "id": "nrQif5tH7O", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2e6744370a8616c90d1e3b7a41993b7c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nrQif5tH7O", "openreview": "https://openreview.net/forum?id=nrQif5tH7O", "poster": "/media/PosterPDFs/NeurIPS%202023/70484.png?t=1702393523.715473", "slides": "https://nips.cc/virtual/2023/poster/70484", "video": "https://nips.cc/virtual/2023/poster/70484", "author_site": "Yuhang Zhang, Yaqi Li, lixiong Qin, Xuannan Liu, Weihong Deng", "tldr": "", "abstract": "Facial expression data is characterized by a significant imbalance, with most collected data showing happy or neutral expressions and fewer instances of fear or disgust. This imbalance poses challenges to facial expression recognition (FER) models, hindering their ability to fully understand various human emotional states. Existing FER methods typically report overall accuracy on highly imbalanced test sets but exhibit low performance in terms of the mean accuracy across all expression classes. In this paper, our aim is to address the imbalanced FER problem. Existing methods primarily focus on learning knowledge of minor classes solely from minor-class samples. However, we propose a novel approach to extract extra knowledge related to the minor classes from both major and minor class samples. Our motivation stems from the belief that FER resembles a distribution learning task, wherein a sample may contain information about multiple classes. For instance, a sample from the major class surprise might also contain useful features of the minor class fear. Inspired by that, we propose a novel method that leverages re-balanced attention maps to regularize the model, enabling it to extract transformation invariant information about the minor classes from all training samples. Additionally, we introduce re-balanced smooth labels to regulate the cross-entropy loss, guiding the model to pay more attention to the minor classes by utilizing the extra information regarding the label distribution of the imbalanced training data. Extensive experiments on different datasets and backbones show that the two proposed modules work together to regularize the model and achieve state-of-the-art performance under the imbalanced FER task. Code is available at https://github.com/zyh-uaiaaaa.", "keywords": "Facial expression recognition;imbalanced learning", "primary_area": "", "supplementary_material": "/attachment/9a253f1e181cc238be51f449b54ad013483eb712.pdf", "author": "Yuhang Zhang;Yaqi Li;lixiong Qin;Xuannan Liu;Weihong Deng", "authorids": "~Yuhang_Zhang6;~Yaqi_Li1;~lixiong_Qin1;~Xuannan_Liu1;~Weihong_Deng1", "gender": "M;F;M;M;M", "homepage": ";https://github.com/AdventureStory;https://lxq1000.github.io/;;http://whdeng.cn", "dblp": ";;354/8450;296/7706;39/232", "google_scholar": "https://scholar.google.com.au/citations?hl=en;;;ddbkQoIAAAAJ;1rhBlUEAAAAJ", "orcid": "0000-0003-4161-5020;;;;", "linkedin": "zhang-yuhang-1431931a0;;;;", "or_profile": "~Yuhang_Zhang6;~Yaqi_Li1;~lixiong_Qin1;~Xuannan_Liu1;~Weihong_Deng1", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Post and Telecommunication", "aff_domain": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "position": "PhD student;Undergrad student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023leave,\ntitle={Leave No Stone Unturned: Mine Extra Knowledge for Imbalanced Facial Expression Recognition},\nauthor={Yuhang Zhang and Yaqi Li and lixiong Qin and Xuannan Liu and Weihong Deng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nrQif5tH7O}\n}", "github": "", "project": "", "reviewers": "N9Q5;Ey1e;ss3S;Ux1i;31BV", "pdf_size": 3563137, "rating": "6;6;7;7;7", "confidence": "5;5;4;5;4", "soundness": "3;2;3;4;4", "novelty": "3;3;3;4;3", "presentation": "3;3;3;4;4", "wc_summary": "90;58;136;145;71", "wc_strengths": "42;30;119;162;105", "wc_weaknesses": "269;200;134;175;83", "wc_questions": "5;34;122;74;80", "wc_limitations": "37;34;30;55;15", "wc_review": "443;356;541;611;354", "wc_reply_reviewers": "46;21;48;211;166", "wc_reply_authors": "136;30;79;58;497", "reply_reviewers": "1;1;1;1;2", "reply_authors": "3;2;2;2;3", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 100.0, 34.71599055190562 ], "wc_strengths_avg": [ 91.6, 49.27717524371704 ], "wc_weaknesses_avg": [ 172.2, 62.55685414085334 ], "wc_questions_avg": [ 63.0, 40.23928428786974 ], "wc_limitations_avg": [ 34.2, 12.859237924542807 ], "wc_review_avg": [ 461.0, 101.68382368892311 ], "wc_reply_reviewers_avg": [ 98.4, 75.53171519302339 ], "wc_reply_authors_avg": [ 160.0, 172.0523176246109 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6666666666666666, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11003014687962061135&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Scale-Invariant Sorting Criterion to Find a Causal Order in Additive Noise Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70483", "id": "nrbR2F29vU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/027e86facfe7c1ea52ca1fca7bc1402b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nrbR2F29vU", "openreview": "https://openreview.net/forum?id=nrbR2F29vU", "poster": "/media/PosterPDFs/NeurIPS%202023/70483.png?t=1701339315.2593381", "slides": "https://nips.cc/virtual/2023/poster/70483", "video": "https://nips.cc/virtual/2023/poster/70483", "author_site": "Alexander Reisach, Myriam Tami, Christof Seiler, Antoine Chambaz, Sebastian Weichwald", "tldr": "", "abstract": "Additive Noise Models (ANMs) are a common model class for causal discovery from observational data. Due to a lack of real-world data for which an underlying ANM is known, ANMs with randomly sampled parameters are commonly used to simulate data for the evaluation of causal discovery algorithms. While some parameters may be fixed by explicit assumptions, fully specifying an ANM requires choosing all parameters. Reisach et al. (2021) show that, for many ANM parameter choices, sorting the variables by increasing variance yields an ordering close to a causal order and introduce \u2018var-sortability\u2019 to quantify this alignment. Since increasing variances may be unrealistic and cannot be exploited when data scales are arbitrary, ANM data are often rescaled to unit variance in causal discovery benchmarking.\n\nWe show that synthetic ANM data are characterized by another pattern that is scale-invariant and thus persists even after standardization: the explainable fraction of a variable\u2019s variance, as captured by the coefficient of determination $R^2$, tends to increase along the causal order. The result is high \u2018$R^2$-sortability\u2019, meaning that sorting the variables by increasing $R^2$ yields an ordering close to a causal order. We propose a computationally efficient baseline algorithm termed \u2018$R^2$-SortnRegress\u2019 that exploits high $R^2$-sortability and that can match and exceed the performance of established causal discovery algorithms. We show analytically that sufficiently high edge weights lead to a relative decrease of the noise contributions along causal chains, resulting in increasingly deterministic relationships and high $R^2$. We characterize $R^2$-sortability on synthetic data with different simulation parameters and find high values in common settings. Our findings reveal high $R^2$-sortability as an assumption about the data generating process relevant to causal discovery and implicit in many ANM sampling schemes. It should be made explicit, as its prevalence in real-world data is an open question. For causal discovery benchmarking, we provide implementations of $R^2$-sortability, the $R^2$-SortnRegress algorithm, and ANM simulation procedures in our library CausalDisco at https://causaldisco.github.io/CausalDisco/.", "keywords": "Causal Discovery;Directed Acyclic Graph;Varsortability;Additive Noise Model;Structural Causal Model;Simulation;Benchmark", "primary_area": "", "supplementary_material": "/attachment/ea0e216a149c614390e7522d2579185649899710.zip", "author": "Alexander Gilbert Reisach;Myriam Tami;Christof Seiler;Antoine Chambaz;Sebastian Weichwald", "authorids": "~Alexander_Gilbert_Reisach1;~Myriam_Tami1;~Christof_Seiler2;~Antoine_Chambaz2;~Sebastian_Weichwald1", "gender": ";;;M;", "homepage": "https://scriddie.github.io/;https://myriamtami.github.io/;https://christofseiler.github.io/;https://helios2.mi.parisdescartes.fr/~chambaz/;https://sweichwald.de", "dblp": ";228/8539;83/7425;36/7367;158/0010", "google_scholar": ";kavk5oUAAAAJ;fxMt84EAAAAJ;;", "orcid": ";;0000-0001-8802-3642;0000-0002-5592-6471;", "linkedin": "alexander-reisach-2033a9175/;;;;", "or_profile": "~Alexander_Gilbert_Reisach1;~Myriam_Tami1;~Christof_Seiler2;~Antoine_Chambaz2;~Sebastian_Weichwald1", "aff": "Universit\u00e9 Paris Cit\u00e9;CentraleSupelec;Maastricht University;Universit\u00e9\u00a0Paris Cit\u00e9;University of Copenhagen", "aff_domain": "u-paris.fr;centralesupelec.fr;maastrichtuniversity.nl;u-paris.fr;math.ku.dk", "position": "PhD student;Associate Professor;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nreisach2023a,\ntitle={A Scale-Invariant Sorting Criterion to Find a Causal Order in Additive Noise Models},\nauthor={Alexander Gilbert Reisach and Myriam Tami and Christof Seiler and Antoine Chambaz and Sebastian Weichwald},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nrbR2F29vU}\n}", "github": "", "project": "", "reviewers": "UFwV;iqwe;zF5c;j7gp", "pdf_size": 1206147, "rating": "6;6;7;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "123;59;67;221", "wc_strengths": "35;103;90;29", "wc_weaknesses": "432;242;301;46", "wc_questions": "44;41;109;46", "wc_limitations": "1;17;10;1", "wc_review": "635;462;577;343", "wc_reply_reviewers": "36;29;237;28", "wc_reply_authors": "44;0;526;0", "reply_reviewers": "1;1;2;1", "reply_authors": "2;1;3;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 117.5, 64.64325177464389 ], "wc_strengths_avg": [ 64.25, 32.64486942844159 ], "wc_weaknesses_avg": [ 255.25, 139.00966693003764 ], "wc_questions_avg": [ 60.0, 28.34607556611673 ], "wc_limitations_avg": [ 7.25, 6.722164829874376 ], "wc_review_avg": [ 504.25, 111.99860490202545 ], "wc_reply_reviewers_avg": [ 82.5, 89.25385145751414 ], "wc_reply_authors_avg": [ 142.5, 222.14128387132365 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1107502724565402975&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 18, "email": "u-paris.fr;centralesupelec.fr;maastrichtuniversity.nl;u-paris.fr;math.ku.dk", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Universit\u00e9 Paris Cit\u00e9;CentraleSup\u00e9lec;Maastricht University;University of Copenhagen", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.universite-paris.fr;https://www.centralesupelec.fr;https://www.maastrichtuniversity.nl;https://www.ku.dk", "aff_unique_abbr": "UPC;CS;MU;UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;2", "aff_country_unique": "France;Netherlands;Denmark" }, { "title": "State-Action Similarity-Based Representations for Off-Policy Evaluation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70482", "id": "nvX3MiQM0G", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/83dc5747870ea454cab25e30bef4eb8a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nvX3MiQM0G", "openreview": "https://openreview.net/forum?id=nvX3MiQM0G", "poster": "/media/PosterPDFs/NeurIPS%202023/70482.png?t=1701538796.9145095", "slides": "https://nips.cc/virtual/2023/poster/70482", "video": "https://nips.cc/virtual/2023/poster/70482", "author_site": "Brahma Pavse, Josiah Hanna", "tldr": "", "abstract": "In reinforcement learning, off-policy evaluation (OPE) is the problem of estimating the expected return of an evaluation policy given a fixed dataset that was collected by running one or more different policies. One of the more empirically successful algorithms for OPE has been the fitted q-evaluation (FQE) algorithm that uses temporal difference updates to learn an action-value function, which is then used to estimate the expected return of the evaluation policy. Typically, the original fixed dataset is fed directly into FQE to learn the action-value function of the evaluation policy. Instead, in this paper, we seek to enhance the data-efficiency of FQE by first transforming the fixed dataset using a learned encoder, and then feeding the transformed dataset into FQE. To learn such an encoder, we introduce an OPE-tailored state-action behavioral similarity metric, and use this metric and the fixed dataset to learn an encoder that models this metric. Theoretically, we show that this metric allows us to bound the error in the resulting OPE estimate. Empirically, we show that other state-action similarity metrics lead to representations that cannot represent the action-value function of the evaluation policy, and that our state-action representation method boosts the data-efficiency of FQE and lowers OPE error relative to other OPE-based representation learning methods on challenging OPE tasks. We also empirically show that the learned representations significantly mitigate divergence of FQE under varying distribution shifts. Our code is available here: https://github.com/Badger-RL/ROPE.", "keywords": "reinforcement learning;off-policy evaluation;off-policy RL;representation learning;behavioral similarity metrics", "primary_area": "", "supplementary_material": "", "author": "Brahma S Pavse;Josiah P. Hanna", "authorids": "~Brahma_S_Pavse1;~Josiah_P._Hanna1", "gender": "M;M", "homepage": "https://brahmasp.github.io/;https://pages.cs.wisc.edu/~jphanna/", "dblp": "243/3510;135/6336", "google_scholar": "2Dc_GnUAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Brahma_S_Pavse1;~Josiah_Hanna2", "aff": "University of Wisconsin - Madison;University of Wisconsin - Madison", "aff_domain": "wisc.edu;wisc.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\npavse2023stateaction,\ntitle={State-Action Similarity-Based Representations for Off-Policy Evaluation},\nauthor={Brahma S Pavse and Josiah P. Hanna},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nvX3MiQM0G}\n}", "github": "", "project": "", "reviewers": "KriT;y8hg;h5zX;BFUR", "pdf_size": 9709209, "rating": "5;5;6;7", "confidence": "3;4;4;4", "soundness": "3;2;3;3", "novelty": "3;2;2;3", "presentation": "3;2;2;3", "wc_summary": "73;68;74;36", "wc_strengths": "135;38;93;10", "wc_weaknesses": "137;38;88;15", "wc_questions": "72;220;123;85", "wc_limitations": "7;12;26;1", "wc_review": "424;376;404;147", "wc_reply_reviewers": "11;0;52;0", "wc_reply_authors": "7;45;14;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 62.75, 15.610493265749165 ], "wc_strengths_avg": [ 69.0, 48.40970976983853 ], "wc_weaknesses_avg": [ 69.5, 47.06644239795483 ], "wc_questions_avg": [ 125.0, 57.96119391454941 ], "wc_limitations_avg": [ 11.5, 9.233092656309694 ], "wc_review_avg": [ 337.75, 111.44140837229222 ], "wc_reply_reviewers_avg": [ 15.75, 21.405314760591587 ], "wc_reply_authors_avg": [ 16.5, 17.18284027743958 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5362825991665336528&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "wisc.edu;wisc.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Wisconsin-Madison", "aff_unique_dep": "", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW-Madison", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Variational Gaussian Processes with Decoupled Conditionals", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70481", "id": "nwK8UkK3uB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/90bfd7201f6717b215e5dcfd987064da-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nwK8UkK3uB", "openreview": "https://openreview.net/forum?id=nwK8UkK3uB", "poster": "/media/PosterPDFs/NeurIPS%202023/70481.png?t=1697408642.2523708", "slides": "https://nips.cc/virtual/2023/poster/70481", "video": "https://nips.cc/virtual/2023/poster/70481", "author_site": "Xinran Zhu, Kaiwen Wu, Natalie Maus, Jacob Gardner, David Bindel", "tldr": "", "abstract": "Variational Gaussian processes (GPs) approximate exact GP inference by using a small set of inducing points to form a sparse approximation of the true posterior, with the fidelity of the model increasing with additional inducing points. Although the approximation error in principle can be reduced through the use of more inducing points, this leads to scaling optimization challenges and computational complexity. To achieve scalability, inducing point methods typically introduce conditional independencies and then approximations to the training and test conditional distributions. In this paper, we consider an alternative approach to modifying the training and test conditionals, in which we make them more flexible. In particular, we investigate decoupling the parametric form of the predictive mean and covariance in the conditionals, and learn independent parameters for predictive mean and covariance. We derive new evidence lower bounds (ELBO) under these more flexible conditionals, and provide two concrete examples of applying the decoupled conditionals. Empirically, we find this additional flexibility leads to improved model performance on a variety of regression tasks and Bayesian optimization (BO) applications.", "keywords": "Gaussian processes;variational inference;variational Gaussian processes;Bayesian optimization", "primary_area": "", "supplementary_material": "", "author": "Xinran Zhu;Kaiwen Wu;Natalie Maus;Jacob R. Gardner;David Bindel", "authorids": "~Xinran_Zhu1;~Kaiwen_Wu2;~Natalie_Maus1;~Jacob_R._Gardner1;~David_Bindel1", "gender": "F;;F;;M", "homepage": "https://xinranzhu.com/;;https://sites.google.com/seas.upenn.edu/natalie-maus/;;http://www.cs.cornell.edu/~bindel/", "dblp": ";;264/7932;;96/6719", "google_scholar": "https://scholar.google.com/citations?hl=en;;hNRd6lsAAAAJ;;04TuVhEAAAAJ", "orcid": "0000-0003-4988-0734;;;;0000-0002-8733-5799", "linkedin": ";;natalie-maus-14b936178/;;david-bindel-5333a81/", "or_profile": "~Xinran_Zhu1;~Kaiwen_Wu2;~Natalie_Maus1;~Jacob_R._Gardner1;~David_Bindel1", "aff": "Cornell University;;University of Pennsylvania;;Cornell University", "aff_domain": "cornell.edu;;upenn.edu;;cs.cornell.edu", "position": "PhD student;;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nzhu2023variational,\ntitle={Variational Gaussian Processes with Decoupled Conditionals},\nauthor={Xinran Zhu and Kaiwen Wu and Natalie Maus and Jacob R. Gardner and David Bindel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nwK8UkK3uB}\n}", "github": "", "project": "", "reviewers": "TCkp;3gEr;2Aqn;u1uu", "pdf_size": 962127, "rating": "3;6;6;6", "confidence": "3;5;3;3", "soundness": "3;4;3;3", "novelty": "2;3;3;2", "presentation": "1;3;2;3", "wc_summary": "68;81;104;122", "wc_strengths": "10;147;47;28", "wc_weaknesses": "133;286;171;122", "wc_questions": "1;161;38;40", "wc_limitations": "9;1;11;31", "wc_review": "221;676;371;343", "wc_reply_reviewers": "0;53;21;56", "wc_reply_authors": "53;5;35;53", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 93.75, 20.78911975048487 ], "wc_strengths_avg": [ 58.0, 53.02357966037374 ], "wc_weaknesses_avg": [ 178.0, 64.94998075442363 ], "wc_questions_avg": [ 60.0, 60.34484236453021 ], "wc_limitations_avg": [ 13.0, 11.045361017187261 ], "wc_review_avg": [ 402.75, 167.538614951897 ], "wc_reply_reviewers_avg": [ 32.5, 23.243278598338918 ], "wc_reply_authors_avg": [ 36.5, 19.615045245933032 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17089467120802000670&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cornell.edu;;upenn.edu;;cs.cornell.edu", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Cornell University;University of Pennsylvania", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.upenn.edu", "aff_unique_abbr": "Cornell;UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Individual Arbitrariness and Group Fairness", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70480", "id": "nzkWhoXUpv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d891d240b5784656a0356bf4b00f5cdd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=nzkWhoXUpv", "openreview": "https://openreview.net/forum?id=nzkWhoXUpv", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70480", "video": "https://nips.cc/virtual/2023/poster/70480", "author_site": "Carol Long, Hsiang Hsu, Hsiang Hsu, Wael Alghamdi, Flavio Calmon", "tldr": "", "abstract": "Machine learning tasks may admit multiple competing models that achieve similar performance yet produce conflicting outputs for individual samples---a phenomenon known as predictive multiplicity. We demonstrate that fairness interventions in machine learning optimized solely for group fairness and accuracy can exacerbate predictive multiplicity. Consequently, state-of-the-art fairness interventions can mask high predictive multiplicity behind favorable group fairness and accuracy metrics. We argue that a third axis of ``arbitrariness'' should be considered when deploying models to aid decision-making in applications of individual-level impact.\nTo address this challenge, we propose an ensemble algorithm applicable to any fairness intervention that provably ensures more consistent predictions.", "keywords": "predictive multiplicity;fairness in machine learning;Rashomon effect", "primary_area": "", "supplementary_material": "", "author": "Carol Xuan Long;Hsiang Hsu;Wael Alghamdi;Flavio Calmon", "authorids": "~Carol_Xuan_Long1;~Hsiang_Hsu1;~Wael_Alghamdi1;~Flavio_Calmon1", "gender": "F;M;;", "homepage": ";https://hsianghsu.github.io;;http://people.seas.harvard.edu/~flavio/", "dblp": ";;;89/4611", "google_scholar": "DGQASc8AAAAJ;https://scholar.google.com.tw/citations?user=JRl3iYIAAAAJ;;P8N_YH4AAAAJ", "orcid": ";0000-0001-8084-3929;0000-0001-6631-2160;", "linkedin": "carol-xuan-long;;;", "or_profile": "~Carol_Xuan_Long1;~Hsiang_Hsu1;~Wael_Alghamdi1;~Flavio_Calmon1", "aff": "Harvard University, Harvard University;Harvard University;Harvard University;Harvard University", "aff_domain": "g.harvard.edu;harvard.edu;harvard.edu;harvard.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlong2023individual,\ntitle={Individual Arbitrariness and Group Fairness},\nauthor={Carol Xuan Long and Hsiang Hsu and Wael Alghamdi and Flavio Calmon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=nzkWhoXUpv}\n}", "github": "", "project": "", "reviewers": "gGVC;APJf;47CU;MNFT", "pdf_size": 3284543, "rating": "7;7;7;8", "confidence": "3;3;4;4", "soundness": "3;3;4;4", "novelty": "4;4;4;4", "presentation": "4;2;4;3", "wc_summary": "49;76;202;29", "wc_strengths": "54;73;194;42", "wc_weaknesses": "164;461;96;126", "wc_questions": "89;31;139;5", "wc_limitations": "1;37;8;3", "wc_review": "357;678;639;205", "wc_reply_reviewers": "23;63;25;26", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 4.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 89.0, 67.3386961560736 ], "wc_strengths_avg": [ 90.75, 60.627448404167566 ], "wc_weaknesses_avg": [ 211.75, 145.9081474764175 ], "wc_questions_avg": [ 66.0, 51.97114584074513 ], "wc_limitations_avg": [ 12.25, 14.515078366994786 ], "wc_review_avg": [ 469.75, 196.73506931912266 ], "wc_reply_reviewers_avg": [ 34.25, 16.63392617513977 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17316225766641527029&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "g.harvard.edu;harvard.edu;harvard.edu;harvard.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Active Observing in Continuous-time Control", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70479", "id": "o0ggjFD24U", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9050e8d5b5de08d16e65dc79ad5c0146-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=o0ggjFD24U", "openreview": "https://openreview.net/forum?id=o0ggjFD24U", "poster": "/media/PosterPDFs/NeurIPS%202023/70479.png?t=1702277196.034263", "slides": "https://nips.cc/virtual/2023/poster/70479", "video": "https://nips.cc/virtual/2023/poster/70479", "author_site": "Samuel Holt, Alihan H\u00fcy\u00fck, Mihaela van der Schaar", "tldr": "", "abstract": "The control of continuous-time environments while actively deciding when to take costly observations in time is a crucial yet unexplored problem, particularly relevant to real-world scenarios such as medicine, low-power systems, and resource management. Existing approaches either rely on continuous-time control methods that take regular, expensive observations in time or discrete-time control with costly observation methods, which are inapplicable to continuous-time settings due to the compounding discretization errors introduced by time discretization. In this work, we are the first to formalize the continuous-time control problem with costly observations. Our key theoretical contribution shows that observing at regular time intervals is not optimal in certain environments, while irregular observation policies yield higher expected utility. This perspective paves the way for the development of novel methods that can take irregular observations in continuous-time control with costly observations. We empirically validate our theoretical findings in various continuous-time environments, including a cancer simulation, by constructing a simple initial method to solve this new problem, with a heuristic threshold on the variance of reward rollouts in an offline continuous-time model-based model predictive control (MPC) planner. Although determining the optimal method remains an open problem, our work offers valuable insights and understanding of this unique problem, laying the foundation for future research in this area.", "keywords": "Sensing;Model-Based Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/945223615893ea8430ce44107235ffbed71d7436.pdf", "author": "Samuel Holt;Alihan H\u00fcy\u00fck;Mihaela van der Schaar", "authorids": "~Samuel_Holt1;~Alihan_H\u00fcy\u00fck1;~Mihaela_van_der_Schaar2", "gender": ";;F", "homepage": "https://samholt.github.io/;;https://www.vanderschaar-lab.com", "dblp": "322/3656;227/2296;", "google_scholar": "Ey5aInIAAAAJ;EMq6KwMAAAAJ;DZ3S--MAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Samuel_Holt1;~Alihan_H\u00fcy\u00fck1;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;University of Cambridge;University of California, Los Angeles", "aff_domain": "cam.ac.uk;cam.ac.uk;ucla.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nholt2023active,\ntitle={Active Observing in Continuous-time Control},\nauthor={Samuel Holt and Alihan H{\\\"u}y{\\\"u}k and Mihaela van der Schaar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=o0ggjFD24U}\n}", "github": "", "project": "", "reviewers": "wAUT;mUFv;KW9G;U2ft", "pdf_size": 1663282, "rating": "4;6;6;6", "confidence": "3;3;4;3", "soundness": "2;3;3;2", "novelty": "2;2;2;2", "presentation": "2;3;3;3", "wc_summary": "162;153;64;91", "wc_strengths": "145;25;45;24", "wc_weaknesses": "136;421;231;358", "wc_questions": "136;81;44;74", "wc_limitations": "136;8;2;26", "wc_review": "715;688;386;573", "wc_reply_reviewers": "12;58;13;35", "wc_reply_authors": "598;56;67;524", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 117.5, 41.246211947280685 ], "wc_strengths_avg": [ 59.75, 49.92682144899673 ], "wc_weaknesses_avg": [ 286.5, 110.60402343495466 ], "wc_questions_avg": [ 83.75, 33.214266513051285 ], "wc_limitations_avg": [ 43.0, 54.415071441651165 ], "wc_review_avg": [ 590.5, 129.5501833267711 ], "wc_reply_reviewers_avg": [ 29.5, 18.848076824970764 ], "wc_reply_authors_avg": [ 311.25, 251.14674495203 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1526805131414978045&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;cam.ac.uk;ucla.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;UCLA", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Cambridge;Los Angeles", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Identifiability Guarantees for Causal Disentanglement from Soft Interventions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70478", "id": "o16sYKHk3S", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9d3a4cdf6f70559e8c6fe02170fba568-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=o16sYKHk3S", "openreview": "https://openreview.net/forum?id=o16sYKHk3S", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70478", "video": "https://nips.cc/virtual/2023/poster/70478", "author_site": "Jiaqi Zhang, Kristjan Greenewald, Chandler Squires, Akash Srivastava, Karthikeyan Shanmugam, Caroline Uhler", "tldr": "", "abstract": "Causal disentanglement aims to uncover a representation of data using latent variables that are interrelated through a causal model. Such a representation is identifiable if the latent model that explains the data is unique. In this paper, we focus on the scenario where unpaired observational and interventional data are available, with each intervention changing the mechanism of a latent variable. When the causal variables are fully observed, statistically consistent algorithms have been developed to identify the causal model under faithfulness assumptions. We here show that identifiability can still be achieved with unobserved causal variables, given a generalized notion of faithfulness. Our results guarantee that we can recover the latent causal model up to an equivalence class and predict the effect of unseen combinations of interventions, in the limit of infinite data. We implement our causal disentanglement framework by developing an autoencoding variational Bayes algorithm and apply it to the problem of predicting combinatorial perturbation effects in genomics.", "keywords": "Causality;Identifiability;Disentanglement", "primary_area": "", "supplementary_material": "/attachment/14d49e74c3781e1f63c7e2664c52936c5f4957a6.zip", "author": "Jiaqi Zhang;Kristjan Greenewald;Chandler Squires;Akash Srivastava;Karthikeyan Shanmugam;Caroline Uhler", "authorids": "~Jiaqi_Zhang2;~Kristjan_Greenewald1;~Chandler_Squires1;~Akash_Srivastava1;~Karthikeyan_Shanmugam1;~Caroline_Uhler1", "gender": "F;;M;M;M;F", "homepage": ";https://researcher.watson.ibm.com/researcher/view.php?person=ibm-Kristjan.H.Greenewald;https://chandlersquires.com;http://akashgit.github.io;https://sites.google.com/corp/view/karthikeyan-shanmugam/;https://www.carolineuhler.com/", "dblp": ";146/0563;231/7704;24/9528;;66/10813", "google_scholar": ";L3zNUG4AAAAJ;https://scholar.google.com.tr/citations?user=Nh3BtpUAAAAJ;https://scholar.google.co.uk/citations?user=2h6SZeEAAAAJ;https://scholar.google.ca/citations?user=m4DyPcUAAAAJ;https://scholar.google.com.tw/citations?user=dIJFcaoAAAAJ", "orcid": "0000-0001-9039-6843;;;;0009-0008-2879-5868;", "linkedin": "vicky-jiaqi-zhang-34b490180/;;chandler-squires-749885a0/;https://uk.linkedin.com/in/akash-srivastava-aa97361b;;", "or_profile": "~Jiaqi_Zhang2;~Kristjan_Greenewald1;~Chandler_Squires1;~Akash_Srivastava1;~Karthikeyan_Shanmugam1;~Caroline_Uhler1", "aff": "Microsoft Research;MIT-IBM Watson AI Lab, IBM Research;Massachusetts Institute of Technology;MIT-IBM Watson AI Research Lab;Google Research;Electrical Engineering & Computer Science, Massachusetts Institute of Technology", "aff_domain": "research.microsoft.com;ibm.com;mit.edu;ibm.com;google.com;eecs.mit.edu", "position": "Intern;Research Scientist;PhD student;Research Scientist;Researcher;Associate Professor", "bibtex": "@inproceedings{\nzhang2023identifiability,\ntitle={Identifiability Guarantees for Causal Disentanglement from Soft Interventions},\nauthor={Jiaqi Zhang and Kristjan Greenewald and Chandler Squires and Akash Srivastava and Karthikeyan Shanmugam and Caroline Uhler},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=o16sYKHk3S}\n}", "github": "", "project": "", "reviewers": "jsuJ;YmWw;8AvV;xRy1", "pdf_size": 3547157, "rating": "5;5;6;6", "confidence": "4;4;3;3", "soundness": "3;3;3;2", "novelty": "2;2;2;2", "presentation": "2;3;4;3", "wc_summary": "111;61;96;491", "wc_strengths": "207;39;72;78", "wc_weaknesses": "673;75;140;231", "wc_questions": "253;63;92;70", "wc_limitations": "12;1;17;4", "wc_review": "1256;239;417;874", "wc_reply_reviewers": "63;34;39;41", "wc_reply_authors": "413;590;27;43", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 189.75, 174.87048779025008 ], "wc_strengths_avg": [ 99.0, 64.0975818576645 ], "wc_weaknesses_avg": [ 279.75, 233.70641304850835 ], "wc_questions_avg": [ 119.5, 77.81548689046416 ], "wc_limitations_avg": [ 8.5, 6.34428877022476 ], "wc_review_avg": [ 696.5, 397.48364746238303 ], "wc_reply_reviewers_avg": [ 44.25, 11.121488209767612 ], "wc_reply_authors_avg": [ 268.25, 241.56507922297047 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5261106792462174728&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "research.microsoft.com;ibm.com;mit.edu;ibm.com;google.com;eecs.mit.edu", "author_num": 6, "aff_unique_index": "0;1;2;2;3;2", "aff_unique_norm": "Microsoft;IBM;Massachusetts Institute of Technology;Google", "aff_unique_dep": "Microsoft Research;AI Lab;;Google Research", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.ibmwatsonai.org/;https://web.mit.edu;https://research.google", "aff_unique_abbr": "MSR;MIT-IBM AI Lab;MIT;Google Research", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Cambridge", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Causal de Finetti: On the Identification of Invariant Causal Structure in Exchangeable Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70477", "id": "o4RtDFMSNL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7279908471a7dd4898d2715f7c6a7413-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=o4RtDFMSNL", "openreview": "https://openreview.net/forum?id=o4RtDFMSNL", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70477", "video": "https://nips.cc/virtual/2023/poster/70477", "author_site": "Siyuan Guo, Viktor Toth, Bernhard Sch\u00f6lkopf, Ferenc Huszar", "tldr": "", "abstract": "Constraint-based causal discovery methods leverage conditional independence tests to infer causal relationships in a wide variety of applications. Just as the majority of machine learning methods, existing work focuses on studying $\\textit{independent and identically distributed}$ data. However, it is known that even with infinite $i.i.d.\\$ data, constraint-based methods can only identify causal structures up to broad Markov equivalence classes, posing a fundamental limitation for causal discovery. In this work, we observe that exchangeable data contains richer conditional independence structure than $i.i.d.\\$ data, and show how the richer structure can be leveraged for causal discovery. We first present causal de Finetti theorems, which state that exchangeable distributions with certain non-trivial conditional independences can always be represented as $\\textit{independent causal mechanism (ICM)}$ generative processes. We then present our main identifiability theorem, which shows that given data from an ICM generative process, its unique causal structure can be identified through performing conditional independence tests. We finally develop a causal discovery algorithm and demonstrate its applicability to inferring causal relationships from multi-environment data.", "keywords": "Independent Causal Mechanism;Causal Discovery;Exchangeable;Bayesian Statistics", "primary_area": "", "supplementary_material": "/attachment/9444ecda9aab9dcb9e29674dd6e8b266c9315ba2.pdf", "author": "Siyuan Guo;Viktor T\u00f3th;Bernhard Sch\u00f6lkopf;Ferenc Husz\u00e1r", "authorids": "~Siyuan_Guo1;toth.viktor7400@gmail.com;~Bernhard_Sch\u00f6lkopf1;~Ferenc_Husz\u00e1r1", "gender": "F;;;", "homepage": "https://siyuanguo.com/;;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Siyuan_Guo1;toth.viktor7400@gmail.com;~Bernhard_Sch\u00f6lkopf1;~Ferenc_Husz\u00e1r1", "aff": "Max Planck Institute for Intelligent Systems, Max-Planck Institute;;;", "aff_domain": "tuebingen.mpg.de;;;", "position": "PhD student;;;", "bibtex": "@inproceedings{\nguo2023causal,\ntitle={Causal de Finetti: On the Identification of Invariant Causal Structure in Exchangeable Data},\nauthor={Siyuan Guo and Viktor T{\\'o}th and Bernhard Sch{\\\"o}lkopf and Ferenc Husz{\\'a}r},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=o4RtDFMSNL}\n}", "github": "", "project": "", "reviewers": "ikix;3rag;idLT;TMn5", "pdf_size": 1149111, "rating": "6;7;7;8", "confidence": "4;2;4;4", "soundness": "3;3;4;4", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "82;32;26;106", "wc_strengths": "82;39;60;84", "wc_weaknesses": "276;52;278;42", "wc_questions": "302;53;1;38", "wc_limitations": "5;6;67;1", "wc_review": "747;182;432;271", "wc_reply_reviewers": "94;65;91;0", "wc_reply_authors": "82;13;39;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 61.5, 33.65635155509284 ], "wc_strengths_avg": [ 66.25, 18.335416548308903 ], "wc_weaknesses_avg": [ 162.0, 115.05650785592269 ], "wc_questions_avg": [ 98.5, 119.00525198494392 ], "wc_limitations_avg": [ 19.75, 27.343874999714288 ], "wc_review_avg": [ 408.0, 215.25682335294275 ], "wc_reply_reviewers_avg": [ 62.5, 37.80542289143186 ], "wc_reply_authors_avg": [ 33.5, 31.32491021535417 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17929044021779513396&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 2, "email": "tuebingen.mpg.de;;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Max Planck Institute for Intelligent Systems", "aff_unique_dep": "Intelligent Systems", "aff_unique_url": "https://www.mpi-is.mpg.de", "aff_unique_abbr": "MPI-IS", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "Certifiably Robust Graph Contrastive Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70476", "id": "o50nH0sV9x", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/37050ebbbd7096719ab96cec19a4c69f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=o50nH0sV9x", "openreview": "https://openreview.net/forum?id=o50nH0sV9x", "poster": "/media/PosterPDFs/NeurIPS%202023/70476.png?t=1702141428.1814775", "slides": "https://nips.cc/virtual/2023/poster/70476", "video": "https://nips.cc/virtual/2023/poster/70476", "author_site": "Minhua Lin, Teng Xiao, Enyan Dai, Xiang Zhang, Suhang Wang", "tldr": "", "abstract": "Graph Contrastive Learning (GCL) has emerged as a popular unsupervised graph representation learning method. However, it has been shown that GCL is vulnerable to adversarial attacks on both the graph structure and node attributes. Although empirical approaches have been proposed to enhance the robustness of GCL, the certifiable robustness of GCL is still remain unexplored. In this paper, we develop the first certifiably robust framework in GCL. Specifically, we first propose a unified criteria to evaluate and certify the robustness of GCL. We then introduce a novel technique, RES (Randomized Edgedrop Smoothing), to ensure certifiable robustness for any GCL model, and this certified robustness can be provably preserved in downstream tasks. Furthermore, an effective training method is proposed for robust GCL. Extensive experiments on real-world datasets demonstrate the effectiveness of our proposed method in providing effective certifiable robustness and enhancing the robustness of any GCL model. The source code of RES is available at https://github.com/ventr1c/RES-GCL.", "keywords": "Certifiable Robustness;Graph Contrastive Learning", "primary_area": "", "supplementary_material": "/attachment/10cf846d123ea1a83031a250cef90c30bbf31e8a.zip", "author": "Minhua Lin;Teng Xiao;Enyan Dai;Xiang Zhang;Suhang Wang", "authorids": "~Minhua_Lin1;~Teng_Xiao2;~Enyan_Dai1;~Xiang_Zhang4;~Suhang_Wang1", "gender": "M;M;M;M;", "homepage": "https://ventr1c.github.io/;https://enyandai.github.io/;https://faculty.ist.psu.edu/szw494/;https://tengxiao1.github.io/;https://faculty.ist.psu.edu/xzz89/", "dblp": "274/1711;250/2886;136/9440;;91/4353-1", "google_scholar": "qLjk9gIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;cdT_WMMAAAAJ;ld3OKXwAAAAJ;", "orcid": "0000-0003-1591-7172;0000-0001-9715-0280;0000-0003-3448-4878;;0000-0003-0940-6595", "linkedin": ";;;;", "or_profile": "~Minhua_Lin1;~Enyan_Dai1;~Suhang_Wang1;~Teng_Xiao1;~Xiang_Zhang24", "aff": "Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;The Pennsylvania State University;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;psu.edu;psu.edu;psu.edu", "position": "PhD student;PhD student;Assistant Professor;PhD student;Associate Professor", "bibtex": "@inproceedings{\nlin2023certifiably,\ntitle={Certifiably Robust Graph Contrastive Learning},\nauthor={Minhua Lin and Teng Xiao and Enyan Dai and Xiang Zhang and Suhang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=o50nH0sV9x}\n}", "github": "", "project": "", "reviewers": "A65y;vCmM;F8a7;ssR8;661C", "pdf_size": 1632866, "rating": "4;5;5;6;7", "confidence": "3;3;3;3;4", "soundness": "3;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "4;2;3;3;3", "wc_summary": "80;51;105;84;70", "wc_strengths": "29;45;101;44;74", "wc_weaknesses": "160;206;131;68;80", "wc_questions": "7;61;361;208;6", "wc_limitations": "39;1;1;19;11", "wc_review": "315;364;699;423;241", "wc_reply_reviewers": "21;16;71;90;0", "wc_reply_authors": "46;40;617;260;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;2;1", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 78.0, 17.67484087622856 ], "wc_strengths_avg": [ 58.6, 25.726251184344754 ], "wc_weaknesses_avg": [ 129.0, 51.0215640685387 ], "wc_questions_avg": [ 128.6, 137.62354449729887 ], "wc_limitations_avg": [ 14.2, 14.119490075778232 ], "wc_review_avg": [ 408.4, 157.0854544507543 ], "wc_reply_reviewers_avg": [ 39.6, 34.63293230438335 ], "wc_reply_authors_avg": [ 192.6, 230.88144143694186 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7844645405527363, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15315490124033705496&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "psu.edu;psu.edu;psu.edu;psu.edu;psu.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Arbitrarily Scalable Environment Generators via Neural Cellular Automata", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70475", "id": "o6Dnt1uEyZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b2fbf1c9bc92e7ef2f6cab2e8a3e09af-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=o6Dnt1uEyZ", "openreview": "https://openreview.net/forum?id=o6Dnt1uEyZ", "poster": "/media/PosterPDFs/NeurIPS%202023/70475.png?t=1702152613.9233925", "slides": "https://nips.cc/virtual/2023/poster/70475", "video": "https://nips.cc/virtual/2023/poster/70475", "author_site": "Yulun Zhang, Matthew Fontaine, Varun Bhatt, Stefanos Nikolaidis, Jiaoyang Li", "tldr": "", "abstract": "We study the problem of generating arbitrarily large environments to improve the throughput of multi-robot systems. Prior work proposes Quality Diversity (QD) algorithms as an effective method for optimizing the environments of automated warehouses. However, these approaches optimize only relatively small environments, falling short when it comes to replicating real-world warehouse sizes. The challenge arises from the exponential increase in the search space as the environment size increases. Additionally, the previous methods have only been tested with up to 350 robots in simulations, while practical warehouses could host thousands of robots. In this paper, instead of optimizing environments, we propose to optimize Neural Cellular Automata (NCA) environment generators via QD algorithms. We train a collection of NCA generators with QD algorithms in small environments and then generate arbitrarily large environments from the generators at test time. We show that NCA environment generators maintain consistent, regularized patterns regardless of environment size, significantly enhancing the scalability of multi-robot systems in two different domains with up to 2,350 robots. Additionally, we demonstrate that our method scales a single-agent reinforcement learning policy to arbitrarily large environments with similar patterns. We include the source code at https://github.com/lunjohnzhang/warehouse_env_gen_nca_public.", "keywords": "Multi-robot systems;quality diversity;automatic environment generation;neural cellular automata", "primary_area": "", "supplementary_material": "/attachment/268353c448088f825fb91cbb077070d9965f2567.zip", "author": "Yulun Zhang;Matthew Christopher Fontaine;Varun Bhatt;Stefanos Nikolaidis;Jiaoyang Li", "authorids": "~Yulun_Zhang2;~Matthew_Christopher_Fontaine1;~Varun_Bhatt1;~Stefanos_Nikolaidis1;jiaoyangli@cmu.edu", "gender": "M;M;;;", "homepage": "https://yulunzhang.net/;;;http://stefanosnikolaidis.net/;", "dblp": "166/2763;239/8516;226/9861;62/6555;", "google_scholar": "Eed2gcMAAAAJ;RqSvzikAAAAJ;OgAUSRMAAAAJ;;", "orcid": "0000-0003-3199-8697;;;;", "linkedin": "yulun-zhang/;;varun-bhatt-049a49168/;;", "or_profile": "~Yulun_Zhang2;~Matthew_Christopher_Fontaine1;~Varun_Bhatt1;~Stefanos_Nikolaidis1;jiaoyangli@cmu.edu", "aff": "Carnegie Mellon University;University of Southern California;University of Southern California;University of Southern California;", "aff_domain": "cmu.edu;usc.edu;usc.edu;usc.edu;", "position": "PhD student;PhD student;PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nzhang2023arbitrarily,\ntitle={Arbitrarily Scalable Environment Generators via Neural Cellular Automata},\nauthor={Yulun Zhang and Matthew Christopher Fontaine and Varun Bhatt and Stefanos Nikolaidis and Jiaoyang Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=o6Dnt1uEyZ}\n}", "github": "", "project": "", "reviewers": "ShTu;6eLV;YTho;v7gi;fmHH", "pdf_size": 8335929, "rating": "4;6;6;7;8", "confidence": "3;3;4;4;4", "soundness": "2;3;2;3;4", "novelty": "2;2;2;3;2", "presentation": "3;2;3;3;3", "wc_summary": "133;48;280;52;101", "wc_strengths": "93;31;276;71;43", "wc_weaknesses": "157;110;378;79;325", "wc_questions": "384;81;457;177;39", "wc_limitations": "159;1;354;25;43", "wc_review": "926;271;1745;404;551", "wc_reply_reviewers": "87;58;0;168;5", "wc_reply_authors": "30;47;0;27;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;2;1", "rating_avg": [ 6.2, 1.32664991614216 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 122.8, 84.73346446357543 ], "wc_strengths_avg": [ 102.8, 89.26006945997744 ], "wc_weaknesses_avg": [ 209.8, 119.51468529013495 ], "wc_questions_avg": [ 227.6, 165.35247201055085 ], "wc_limitations_avg": [ 116.4, 130.63475800873212 ], "wc_review_avg": [ 779.4, 530.1768006995402 ], "wc_reply_reviewers_avg": [ 63.6, 61.57467011685893 ], "wc_reply_authors_avg": [ 20.8, 18.301912468373352 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7385489458759964, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8997871844429837456&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "email": "cmu.edu;usc.edu;usc.edu;usc.edu;", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Carnegie Mellon University;University of Southern California", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.usc.edu", "aff_unique_abbr": "CMU;USC", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Recursion in Recursion: Two-Level Nested Recursion for Length Generalization with Scalability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70474", "id": "o6yTKfdnbA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/db178cd03313e23cffb8937e93f0d464-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=o6yTKfdnbA", "openreview": "https://openreview.net/forum?id=o6yTKfdnbA", "poster": "/media/PosterPDFs/NeurIPS%202023/70474.png?t=1702065146.5817342", "slides": "https://nips.cc/virtual/2023/poster/70474", "video": "https://nips.cc/virtual/2023/poster/70474", "author_site": "Jishnu Ray Chowdhury, Cornelia Caragea", "tldr": "", "abstract": "Binary Balanced Tree Recursive Neural Networks (BBT-RvNNs) enforce sequence composition according to a preset balanced binary tree structure. Thus, their non-linear recursion depth (which is the tree depth) is just $\\log_2 n$ ($n$ being the sequence length). Such logarithmic scaling makes BBT-RvNNs efficient and scalable on long sequence tasks such as Long Range Arena (LRA). However, such computational efficiency comes at a cost because BBT-RvNNs cannot solve simple arithmetic tasks like ListOps. On the flip side, RvNN models (e.g., Beam Tree RvNN) that do succeed on ListOps (and other structure-sensitive tasks like formal logical inference) are generally several times more expensive (in time and space) than even Recurrent Neural Networks. In this paper, we introduce a novel framework --- Recursion in Recursion (RIR) to strike a balance between the two sides - getting some of the benefits from both worlds. In RIR, we use a form of two-level nested recursion - where the outer recursion is a $k$-ary balanced tree model with another recursive model (inner recursion) implementing its cell function. For the inner recursion, we choose Beam Tree RvNNs. To adjust Beam Tree RvNNs within RIR we also propose a novel strategy of beam alignment. Overall, this entails that the total recursive depth in RIR is upper-bounded by $k \\log_k n$. Our best RIR-based model is the first model that demonstrates high ($\\geq 90\\%$) length-generalization performance on ListOps while at the same time being scalable enough to be trainable on long sequence inputs from LRA (it can reduce the memory usage of the original Beam Tree RvNN by hundreds of times). Moreover, in terms of accuracy in the LRA language tasks, it performs competitively with Structured State Space Models (SSMs) without any special initialization - outperforming Transformers by a large margin. On the other hand, while SSMs can marginally outperform RIR on LRA, they (SSMs) fail to length-generalize on ListOps. Our code is available at: https://github.com/JRC1995/BeamRecursionFamily/", "keywords": "Recursive Neural Networks;Long Range Arena;RvNN;Long Range Sequence Modeling;Length Generalization;LRA;Structured Encoding;Inductive Bias;Hierarchical Model;Recursive Models", "primary_area": "", "supplementary_material": "", "author": "Jishnu Ray Chowdhury;Cornelia Caragea", "authorids": "~Jishnu_Ray_Chowdhury2;~Cornelia_Caragea2", "gender": ";", "homepage": ";https://www.cs.uic.edu/~cornelia/", "dblp": ";69/6680.html", "google_scholar": ";vkX6VV4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Jishnu_Ray_Chowdhury2;~Cornelia_Caragea2", "aff": ";University of Illinois at Chicago", "aff_domain": ";uic.edu", "position": ";Full Professor", "bibtex": "@inproceedings{\nchowdhury2023recursion,\ntitle={Recursion in Recursion: Two-Level Nested Recursion for Length Generalization with Scalability},\nauthor={Jishnu Ray Chowdhury and Cornelia Caragea},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=o6yTKfdnbA}\n}", "github": "", "project": "", "reviewers": "3jTN;yLki;ozsz;619V;jzGF;UxTS;CUBJ;8Jr4", "pdf_size": 1636670, "rating": "4;4;5;5;6;6;7;7", "confidence": "3;2;4;4;3;2;3;4", "soundness": "3;2;2;3;3;3;3;3", "novelty": "2;2;2;3;2;3;3;3", "presentation": "2;2;2;3;4;3;3;4", "wc_summary": "80;217;63;35;98;164;98;99", "wc_strengths": "73;11;15;7;115;41;61;91", "wc_weaknesses": "50;299;67;286;376;23;176;95", "wc_questions": "23;32;321;4;116;6;39;92", "wc_limitations": "43;100;69;1;13;2;6;57", "wc_review": "269;659;535;333;718;236;380;434", "wc_reply_reviewers": "0;161;0;20;28;13;14;37", "wc_reply_authors": "0;727;0;0;0;0;11;0", "reply_reviewers": "0;1;0;1;1;1;1;1", "reply_authors": "1;2;1;1;1;1;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.125, 0.7806247497997998 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.875, 0.7806247497997998 ], "wc_summary_avg": [ 106.75, 54.1104195141749 ], "wc_strengths_avg": [ 51.75, 37.429099641856205 ], "wc_weaknesses_avg": [ 171.5, 124.89695752899668 ], "wc_questions_avg": [ 79.125, 98.74264213094564 ], "wc_limitations_avg": [ 36.375, 34.43086369814153 ], "wc_review_avg": [ 445.5, 165.8199324568672 ], "wc_reply_reviewers_avg": [ 34.125, 49.39999367408866 ], "wc_reply_authors_avg": [ 92.25, 239.93996644994348 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.2148344622118299, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13360208435841693539&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";uic.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Illinois at Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uic.edu", "aff_unique_abbr": "UIC", "aff_campus_unique_index": "0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Label-Retrieval-Augmented Diffusion Models for Learning from Noisy Labels", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70473", "id": "o778eWSr1S", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d191ba4c8923ed8fd8935b7c98658b5f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=o778eWSr1S", "openreview": "https://openreview.net/forum?id=o778eWSr1S", "poster": "/media/PosterPDFs/NeurIPS%202023/70473.png?t=1698084855.3603463", "slides": "https://nips.cc/virtual/2023/poster/70473", "video": "https://nips.cc/virtual/2023/poster/70473", "author_site": "Jian Chen, Ruiyi Zhang, Tong Yu, Rohan Sharma, Zhiqiang Xu, Tong Sun, Changyou Chen", "tldr": "", "abstract": "Learning from noisy labels is an important and long-standing problem in machine learning for real applications. One of the main research lines focuses on learning a label corrector to purify potential noisy labels. However, these methods typically rely on strict assumptions and are limited to certain types of label noise. In this paper, we reformulate the label-noise problem from a generative-model perspective, *i.e.*, labels are generated by gradually refining an initial random guess. This new perspective immediately enables existing powerful diffusion models to seamlessly learn the stochastic generative process. Once the generative uncertainty is modeled, we can perform classification inference using maximum likelihood estimation of labels. To mitigate the impact of noisy labels, we propose the **L**abel-**R**etrieval-**A**ugmented (LRA) diffusion model, which leverages neighbor consistency to effectively construct pseudo-clean labels for diffusion training. Our model is flexible and general, allowing easy incorporation of different types of conditional information, *e.g.*, use of pre-trained models, to further boost model performance. Extensive experiments are conducted for evaluation. Our model achieves new state-of-the-art (SOTA) results on all the standard real-world benchmark datasets. Remarkably, by incorporating conditional information from the powerful CLIP model, our method can boost the current SOTA accuracy by 10-20 absolute points in many cases. Code is available: https://anonymous.4open.science/r/LRA-diffusion-5F2F", "keywords": "diffusion model;label noise;retrieval augmented learning", "primary_area": "", "supplementary_material": "/attachment/8b256afa88d341979d9acf579a62856c23fb07e3.pdf", "author": "Jian Chen;Ruiyi Zhang;Tong Yu;Rohan Sharma;zhiqiang xu;Tong Sun;Changyou Chen", "authorids": "~Jian_Chen9;~Ruiyi_Zhang3;~Tong_Yu3;~Rohan_Sharma1;~zhiqiang_xu1;~Tong_Sun1;~Changyou_Chen1", "gender": "M;;M;M;F;M;M", "homepage": "https://puar-playground.github.io/CV/;https://www.linkedin.com/in/tong-yu-42790744;;https://scholar.google.com/citations?user=0R20iBMAAAAJ&hl=en;https://research.adobe.com/person/tong-sun/;https://www.cse.buffalo.edu/~changyou/;http://zhangry868.github.io/", "dblp": "49/6002-43;32/1593-1;;72/51-3.html;;65/2802;", "google_scholar": "uBGjz-EAAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.com/citations?hl=en;LtEcKBcAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-1999-1137;0000-0002-5991-2050;;0000-0002-5693-8933;;;", "linkedin": "jian-chen-1a0b9a11b/;tong-yu-42790744;rs1561/;;tong-sun/?trk=hb_tab_pro_top;;", "or_profile": "~Jian_Chen9;~Tong_Yu3;~Rohan_Sharma1;~zhiqiang_xu1;~Tong_Sun1;~Changyou_Chen1;~RUIYI_ZHANG1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Adobe Research;State University of New York at Buffalo;Mohamed bin Zayed University of Artificial Intelligence;Adobe Systems;State University of New York, Buffalo;Adobe Systems", "aff_domain": "mbzuai.ac.ae;adobe.com;buffalo.edu;mbzuai.ac.ae;adobe.com;buffalo.edu;adobe.com", "position": "Researcher;Senior Research Scientist;PhD student;Assistant Professor;Director, Document Intelligence Lab;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nchen2023labelretrievalaugmented,\ntitle={Label-Retrieval-Augmented Diffusion Models for Learning from Noisy Labels},\nauthor={Jian Chen and Ruiyi Zhang and Tong Yu and Rohan Sharma and zhiqiang xu and Tong Sun and Changyou Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=o778eWSr1S}\n}", "github": "", "project": "", "reviewers": "21mt;ZUT9;3Gk3;poWF;KNdw", "pdf_size": 4130366, "rating": "3;5;5;6;7", "confidence": "4;3;3;4;4", "soundness": "2;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;3;4;3;3", "wc_summary": "48;63;83;77;159", "wc_strengths": "60;34;42;33;76", "wc_weaknesses": "213;271;129;25;275", "wc_questions": "31;4;109;118;3", "wc_limitations": "10;5;6;7;38", "wc_review": "362;377;369;260;551", "wc_reply_reviewers": "0;87;11;183;231", "wc_reply_authors": "51;163;0;607;0", "reply_reviewers": "0;2;1;4;1", "reply_authors": "2;2;1;4;1", "rating_avg": [ 5.2, 1.32664991614216 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 86.0, 38.449967490233334 ], "wc_strengths_avg": [ 49.0, 16.61324772583615 ], "wc_weaknesses_avg": [ 182.6, 94.85483646077306 ], "wc_questions_avg": [ 53.0, 50.48960289010005 ], "wc_limitations_avg": [ 13.2, 12.512393855693642 ], "wc_review_avg": [ 383.8, 93.8326169303617 ], "wc_reply_reviewers_avg": [ 102.4, 91.77276284388523 ], "wc_reply_authors_avg": [ 164.2, 229.26438886141912 ], "reply_reviewers_avg": [ 1.6, 1.3564659966250536 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.1230914909793327, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10026396317687230853&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "mbzuai.ac.ae;adobe.com;buffalo.edu;mbzuai.ac.ae;adobe.com;buffalo.edu;adobe.com", "author_num": 7, "aff_unique_index": "0;1;2;0;1;2;1", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Adobe;State University of New York at Buffalo", "aff_unique_dep": ";Adobe Research;", "aff_unique_url": "https://mbzuai.ac.ae;https://research.adobe.com;https://www.buffalo.edu", "aff_unique_abbr": "MBZUAI;Adobe;SUNY Buffalo", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Buffalo", "aff_country_unique_index": "0;1;1;0;1;1;1", "aff_country_unique": "United Arab Emirates;United States" }, { "title": "Regression with Cost-based Rejection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70472", "id": "o7HckkxOZH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8ddcba644f602835a52b962d9a119eea-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=o7HckkxOZH", "openreview": "https://openreview.net/forum?id=o7HckkxOZH", "poster": "/media/PosterPDFs/NeurIPS%202023/70472.png?t=1699846509.3704107", "slides": "https://nips.cc/virtual/2023/poster/70472", "video": "https://nips.cc/virtual/2023/poster/70472", "author_site": "Xin Cheng, Yuzhou Cao, Haobo Wang, Hongxin Wei, Bo An, Lei Feng", "tldr": "", "abstract": "Learning with rejection is an important framework that can refrain from making predictions to avoid critical mispredictions by balancing between prediction and rejection. Previous studies on cost-based rejection only focused on the classification setting, which cannot handle the continuous and infinite target space in the regression setting. In this paper, we investigate a novel regression problem called regression with cost-based rejection, where the model can reject to make predictions on some examples given certain rejection costs. To solve this problem, we first formulate the expected risk for this problem and then derive the Bayes optimal solution, which shows that the optimal model should reject to make predictions on the examples whose variance is larger than the rejection cost when the mean squared error is used as the evaluation metric. Furthermore, we propose to train the model by a surrogate loss function that considers rejection as binary classification and we provide conditions for the model consistency, which implies that the Bayes optimal solution can be recovered by our proposed surrogate loss. Extensive experiments demonstrate the effectiveness of our proposed method.", "keywords": "regression;rejection costs;surrogate loss", "primary_area": "", "supplementary_material": "/attachment/75c8d30e76a4780c6baaa8106f5005d917b1e690.zip", "author": "Xin Cheng;Yuzhou Cao;Haobo Wang;Hongxin Wei;Bo An;Lei Feng", "authorids": "~Xin_Cheng4;~Yuzhou_Cao1;~Haobo_Wang1;~Hongxin_Wei1;~Bo_An2;~Lei_Feng1", "gender": "M;M;M;M;M;M", "homepage": ";https://yzcao-nkg.github.io/;https://hbzju.github.io/;https://hongxin001.github.io/;https://personal.ntu.edu.sg/boan/;https://lfeng1995.github.io/", "dblp": "96/4269-7.html;256/5052;;150/6350;42/6178-1.html;76/847-6", "google_scholar": "N2E4jI4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;DnN-rggAAAAJ;cABH034AAAAJ;PEEpuNwAAAAJ;https://scholar.google.com.sg/citations?user=KomQOFkAAAAJ", "orcid": ";;0000-0001-8586-3048;;0000-0002-7064-7438;0000-0003-2839-5799", "linkedin": ";;;;;", "or_profile": "~Xin_Cheng4;~Yuzhou_Cao1;~Haobo_Wang1;~Hongxin_Wei1;~Bo_An2;~Lei_Feng1", "aff": "Chongqing University;Nanyang Technological University;Zhejiang University;Southern University of Science and Technology;Nanyang Technological University;Nanyang Technological University", "aff_domain": "cqu.edu.cn;ntu.edu;zju.edu.cn;sustech.edu.cn;ntu.edu.sg;ntu.edu.sg", "position": "MS student;PhD student;PhD student;Assistant Professor;Full Professor;Visiting Professor", "bibtex": "@inproceedings{\ncheng2023regression,\ntitle={Regression with Cost-based Rejection},\nauthor={Xin Cheng and Yuzhou Cao and Haobo Wang and Hongxin Wei and Bo An and Lei Feng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=o7HckkxOZH}\n}", "github": "", "project": "", "reviewers": "4Fnw;1UF6;J2H6;mRfi", "pdf_size": 545165, "rating": "5;5;6;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;4", "presentation": "3;3;3;3", "wc_summary": "117;50;63;120", "wc_strengths": "101;22;40;102", "wc_weaknesses": "79;132;461;127", "wc_questions": "100;2;30;27", "wc_limitations": "11;1;1;1", "wc_review": "408;207;595;377", "wc_reply_reviewers": "50;0;383;10", "wc_reply_authors": "29;0;675;24", "reply_reviewers": "1;0;2;1", "reply_authors": "2;1;3;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.5, 31.35681744055031 ], "wc_strengths_avg": [ 66.25, 35.82160660830276 ], "wc_weaknesses_avg": [ 199.75, 152.2454843336905 ], "wc_questions_avg": [ 39.75, 36.44430682562093 ], "wc_limitations_avg": [ 3.5, 4.330127018922194 ], "wc_review_avg": [ 396.75, 137.68147115716044 ], "wc_reply_reviewers_avg": [ 110.75, 158.29304311939927 ], "wc_reply_authors_avg": [ 182.0, 284.8446945266841 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7579144987789359249&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "cqu.edu.cn;ntu.edu;zju.edu.cn;sustech.edu.cn;ntu.edu.sg;ntu.edu.sg", "author_num": 6, "aff_unique_index": "0;1;2;3;1;1", "aff_unique_norm": "Chongqing University;Nanyang Technological University;Zhejiang University;Southern University of Science and Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cqu.edu.cn;https://www.ntu.edu.sg;https://www.zju.edu.cn;https://www.sustech.edu.cn", "aff_unique_abbr": "CQU;NTU;ZJU;SUSTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1;1", "aff_country_unique": "China;Singapore" }, { "title": "Recovering Unbalanced Communities in the Stochastic Block Model with Application to Clustering with a Faulty Oracle", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70471", "id": "o7W0Zet6p3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/41623b137cd34807f56028aa9f6f84a7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=o7W0Zet6p3", "openreview": "https://openreview.net/forum?id=o7W0Zet6p3", "poster": "/media/PosterPDFs/NeurIPS%202023/70471.png?t=1701574976.3248472", "slides": "https://nips.cc/virtual/2023/poster/70471", "video": "https://nips.cc/virtual/2023/poster/70471", "author_site": "Chandra Sekhar Mukherjee, Pan Peng, Jiapeng Zhang", "tldr": "", "abstract": "The stochastic block model (SBM) is a fundamental model for studying graph clustering or community detection in networks. It has received great attention in the last decade and the balanced case, i.e., assuming all clusters have large size, has been well studied. \nHowever, our understanding of SBM with unbalanced communities (arguably, more relevant in practice) is still limited. In this paper, we provide a simple SVD-based algorithm for recovering the communities in the SBM with communities of varying sizes.\nWe improve upon a result of Ailon, Chen and Xu [ICML 2013; JMLR 2015] by removing the assumption that there is a large interval such that the sizes of clusters do not fall in, and also remove the dependency of the size of the recoverable clusters on the number of underlying clusters. We further complement our theoretical improvements with experimental comparisons.\nUnder the planted clique conjecture, the size of the clusters that can be recovered by our algorithm is nearly optimal (up to poly-logarithmic factors) when the probability parameters are constant. \n\nAs a byproduct, we obtain an efficient clustering algorithm with sublinear query complexity in a faulty oracle model, which is capable of detecting all clusters larger than $\\tilde{\\Omega}({\\sqrt{n}})$, even in the presence of $\\Omega(n)$ small clusters in the graph. In contrast, previous efficient algorithms that use a sublinear number of queries are incapable of recovering any large clusters if there are more than $\\tilde{\\Omega}(n^{2/5})$ small clusters.", "keywords": "SBM;Unbalanced SBM;Spectral algorithms;Small cluster barrier", "primary_area": "", "supplementary_material": "/attachment/d2e7cd052d1b16003c5edb2da7e1a71089b1cd2b.zip", "author": "Chandra Sekhar Mukherjee;Pan Peng;Jiapeng Zhang", "authorids": "~Chandra_Sekhar_Mukherjee1;~Pan_Peng1;~Jiapeng_Zhang2", "gender": "M;M;M", "homepage": "https://sites.google.com/view/chandra-mukherjee/home;http://staff.ustc.edu.cn/~ppeng/;https://sites.google.com/site/jiapeng0708/home", "dblp": "263/7714;08/9919-1;38/9461", "google_scholar": "NYQMTH4AAAAJ;;9eQOP14AAAAJ", "orcid": ";0000-0003-2700-5699;", "linkedin": ";;", "or_profile": "~Chandra_Sekhar_Mukherjee1;~Pan_Peng1;~Jiapeng_Zhang2", "aff": "University of Southern California;University of Science and Technology of China;University of Southern California", "aff_domain": "usc.edu;ustc.edu.cn;usc.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nmukherjee2023recovering,\ntitle={Recovering Unbalanced Communities in the Stochastic Block Model with Application to Clustering with a Faulty Oracle},\nauthor={Chandra Sekhar Mukherjee and Pan Peng and Jiapeng Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=o7W0Zet6p3}\n}", "github": "", "project": "", "reviewers": "tK15;Ajy4;5GRr;MiyQ", "pdf_size": 381825, "rating": "5;5;7;7", "confidence": "1;4;3;3", "soundness": "3;3;4;4", "novelty": "2;2;3;4", "presentation": "4;3;4;3", "wc_summary": "64;56;303;61", "wc_strengths": "30;98;59;96", "wc_weaknesses": "2;97;41;70", "wc_questions": "82;43;38;60", "wc_limitations": "2;1;1;1", "wc_review": "180;295;442;288", "wc_reply_reviewers": "7;17;32;15", "wc_reply_authors": "0;7;24;10", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 121.0, 105.11660192376844 ], "wc_strengths_avg": [ 70.75, 28.19020219863632 ], "wc_weaknesses_avg": [ 52.5, 35.245567097154215 ], "wc_questions_avg": [ 55.75, 17.210098779495716 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 301.25, 93.17557351580939 ], "wc_reply_reviewers_avg": [ 17.75, 9.03811374126261 ], "wc_reply_authors_avg": [ 10.25, 8.728545125048045 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.22941573387056177, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15636600427735056740&as_sdt=4000005&sciodt=0,18&hl=en", "gs_version_total": 6, "email": "usc.edu;ustc.edu.cn;usc.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Southern California;University of Science and Technology of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.usc.edu;http://www.ustc.edu.cn", "aff_unique_abbr": "USC;USTC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "id": "o8FCeFpipg", "title": "Towards the Universal Learning Principle for Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph neural networks (GNNs) are currently highly regarded in graph representation learning tasks due to their significant performance. Although various propagation mechanisms and graph filters were proposed, few works have investigated their rationale from the perspective of learning. In this paper, we elucidate the criterion for the graph filter formed by power series, and further establish a scalable regularized learning framework that theoretically realizes GNN with infinite depth. Following the framework, we introduce Adaptive Power GNN (APGNN), a deep GNN that employs exponentially decaying weights to aggregate graph information of varying orders, thus facilitating more effective mining of deeper neighbor information. Moreover, the multiple $P$-hop message passing strategy is proposed to efficiently perceive the higher-order neighborhoods. Different from other GNNs, the proposed APGNN can be seamlessly extended to an infinite-depth network. To clarify the learning guarantee, we theoretically analyze the generalization of the proposed learning framework via uniform convergence. Experimental results show that APGNN obtains superior performance compared to state-of-the-art GNNs, highlighting the effectiveness of our framework.", "keywords": "Graph Neural Network;Graph Filter;Learning Framework", "primary_area": "", "supplementary_material": "/attachment/54b9bfb1f2a4140445fbeb331129e603aa84209b.pdf", "author": "Foping Chen;Junhong Zhang;Guangfei Liang;Richard Yi Da Xu;Zhihui Lai", "authorids": "~Foping_Chen1;~Junhong_Zhang1;~Guangfei_Liang1;~Richard_Yi_Da_Xu1;~Zhihui_Lai1", "gender": "M;M;M;M;M", "homepage": ";;;https://www.math.hkbu.edu.hk/people/xu-yi-da/;https://www.scholat.com/laizhihui.cn", "dblp": "https://dblp.org/;;368/5290;38/3064;61/7577-1", "google_scholar": ";ribcEAIAAAAJ;0Tz_-WQAAAAJ;ykOUWa4AAAAJ;CkK6ULsAAAAJ", "orcid": ";;0000-0002-5515-7414;0000-0003-2080-4762;0000-0002-4388-3080", "linkedin": ";;;richard-xu-0221a943/;", "or_profile": "~Foping_Chen1;~Junhong_Zhang1;~Guangfei_Liang1;~Richard_Yi_Da_Xu1;~Zhihui_Lai1", "aff": "Shenzhen University;Shenzhen University;Shenzhen University;Hong Kong Baptist University;Shenzhen University", "aff_domain": "szu.edu.cn;szu.edu.cn;szu.edu.cn;hkbu.edu.hk;szu.edu.cn", "position": "MS student;PhD student;MS student;Full Professor;Full Professor", "bibtex": "@misc{\nchen2023towards,\ntitle={Towards the Universal Learning Principle for Graph Neural Networks},\nauthor={Foping Chen and Junhong Zhang and Guangfei Liang and Richard Yi Da Xu and Zhihui Lai},\nyear={2023},\nurl={https://openreview.net/forum?id=o8FCeFpipg}\n}", "github": "", "project": "", "reviewers": "yz22;8PH8;qgt5;3RkM;UcBg", "site": "https://openreview.net/forum?id=o8FCeFpipg", "pdf_size": 488980, "rating": "3;3;3;5;6", "confidence": "5;4;4;4;4", "soundness": "2;2;2;3;3", "novelty": "2;2;2;3;3", "presentation": "3;3;1;3;4", "wc_summary": "37;44;222;53;137", "wc_strengths": "4;36;159;76;82", "wc_weaknesses": "687;234;441;153;235", "wc_questions": "38;5;145;123;88", "wc_limitations": "8;1;99;30;10", "wc_review": "774;320;1066;435;552", "wc_reply_reviewers": "22;18;21;0;51", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 4.0, 1.2649110640673518 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 98.6, 71.49433543994937 ], "wc_strengths_avg": [ 71.4, 52.15975460064973 ], "wc_weaknesses_avg": [ 350.0, 193.56652603174962 ], "wc_questions_avg": [ 79.8, 52.01307527920263 ], "wc_limitations_avg": [ 29.6, 36.014441547801354 ], "wc_review_avg": [ 629.4, 264.9676206633558 ], "wc_reply_reviewers_avg": [ 22.4, 16.37803407005859 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.39528470752104744, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VevL8YYxxNoJ:scholar.google.com/&scioq=Towards+the+Universal+Learning+Principle+for+Graph+Neural+Networks&hl=en&as_sdt=0,48", "gs_version_total": 2, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Shenzhen University;Hong Kong Baptist University", "aff_unique_dep": ";", "aff_unique_url": "https://www.szu.edu.cn;https://www.hkbu.edu.hk", "aff_unique_abbr": "SZU;HKBU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Distributional Policy Evaluation: a Maximum Entropy approach to Representation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70470", "id": "o91in9tDEs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2a98af4fea6a24b73af7b588ca95f755-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=o91in9tDEs", "openreview": "https://openreview.net/forum?id=o91in9tDEs", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70470", "video": "https://nips.cc/virtual/2023/poster/70470", "author_site": "Riccardo Zamboni, Alberto Maria Metelli, Marcello Restelli", "tldr": "", "abstract": "The Maximum Entropy (Max-Ent) framework has been effectively employed in a variety of Reinforcement Learning (RL) tasks. In this paper, we first propose a novel Max-Ent framework for policy evaluation in a distributional RL setting, named *Distributional Maximum Entropy Policy Evaluation* (D-Max-Ent PE). We derive a generalization-error bound that depends on the complexity of the representation employed, showing that this framework can explicitly take into account the features used to represent the state space while evaluating a policy. Then, we exploit these favorable properties to drive the representation learning of the state space in a Structural Risk Minimization fashion. We employ state-aggregation functions as feature functions and we specialize the D-Max-Ent approach into an algorithm, named *D-Max-Ent Progressive Factorization*, which constructs a progressively finer-grained representation of the state space by balancing the trade-off between preserving information (bias) and reducing the effective number of states, i.e., the complexity of the representation space (variance). Finally, we report the results of some illustrative numerical simulations, showing that the proposed algorithm matches the expected theoretical behavior and highlighting the relationship between aggregations and sample regimes.", "keywords": "reinforcement learning;distributional reinforcement learning;maximum entropy estimation;representation learning", "primary_area": "", "supplementary_material": "/attachment/1e4a6a3d09bd3da4b62864fa41cb33eb4f8b96e2.pdf", "author": "Riccardo Zamboni;Alberto Maria Metelli;Marcello Restelli", "authorids": "~Riccardo_Zamboni1;~Alberto_Maria_Metelli2;~Marcello_Restelli1", "gender": ";M;M", "homepage": "https://ricczamboni.github.io;https://albertometelli.github.io/;http://home.deib.polimi.it/restelli/", "dblp": "275/1582;209/4941;64/1011", "google_scholar": "jXy474MAAAAJ;R31IsPwAAAAJ;https://scholar.google.com.tw/citations?user=xdgxRiEAAAAJ", "orcid": ";0000-0002-3424-5212;0000-0002-6322-1076", "linkedin": "riccardo-zamboni-rz95/;;", "or_profile": "~Riccardo_Zamboni1;~Alberto_Maria_Metelli2;~Marcello_Restelli1", "aff": "Polytechnic Institute of Milan;Politecnico di Milano;Politecnico di Milano", "aff_domain": "polimi.it;polimi.it;polimi.it", "position": "PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nzamboni2023distributional,\ntitle={Distributional Policy Evaluation: a Maximum Entropy approach to Representation Learning},\nauthor={Riccardo Zamboni and Alberto Maria Metelli and Marcello Restelli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=o91in9tDEs}\n}", "github": "", "project": "", "reviewers": "wzKJ;HRB1;Tf1V;SYd5;W4d8", "pdf_size": 464081, "rating": "2;5;5;6;6", "confidence": "4;1;4;3;4", "soundness": "1;3;3;3;4", "novelty": "1;3;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "63;55;89;71;103", "wc_strengths": "29;44;53;161;52", "wc_weaknesses": "56;48;29;162;124", "wc_questions": "356;73;1;160;45", "wc_limitations": "1;38;1;55;13", "wc_review": "505;258;173;609;337", "wc_reply_reviewers": "393;13;0;190;22", "wc_reply_authors": "871;0;0;20;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "3;1;1;2;1", "rating_avg": [ 4.8, 1.469693845669907 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 2.8, 0.9797958971132712 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 76.2, 17.508854902591434 ], "wc_strengths_avg": [ 67.8, 47.38522976624678 ], "wc_weaknesses_avg": [ 83.8, 50.574301774715586 ], "wc_questions_avg": [ 127.0, 125.73464120917512 ], "wc_limitations_avg": [ 21.6, 21.481154531356083 ], "wc_review_avg": [ 376.4, 159.73928759074894 ], "wc_reply_reviewers_avg": [ 123.6, 151.536926192925 ], "wc_reply_authors_avg": [ 178.2, 346.48659425726703 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.21004201260420147, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MvCoN1CUCXsJ:scholar.google.com/&scioq=Distributional+Policy+Evaluation:+a+Maximum+Entropy+approach+to+Representation+Learning&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "polimi.it;polimi.it;polimi.it", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Polytechnic Institute of Milan;Politecnico di Milano", "aff_unique_dep": ";", "aff_unique_url": "https://www.polimi.it/;https://www.polimi.it", "aff_unique_abbr": "Politecnico di Milano;Polimi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Italy" }, { "id": "oAHjj0of5z", "title": "Indeterminate Probability Neural Network", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose a new general model called IPNN - Indeterminate Probability Neural Network, which combines neural network and probability theory together. In the classical probability theory, the calculation of probability is based on the occurrence of events, which is hardly used in current neural networks. In this paper, we propose a new general probability theory, which is an extension of classical probability theory, and makes classical probability theory a special case to our theory. With this new theory, some intractable probability problems have now become tractable (analytical solution). Besides, for our proposed neural network framework, the output of neural network is defined as probability events, and based on the statistical analysis of these events, the inference model for classification task is deduced. IPNN shows new property: It can perform unsupervised clustering while doing classification. Besides, IPNN is capable of making very large classification with very small neural network, e.g. model with 100 output nodes can classify 10 billion categories. Theoretical advantages are reflected in experimental results.", "keywords": "Indeterminate Probability;Discrete Random Variable;Unsupervised Clustering;Classification;IPNN", "primary_area": "", "supplementary_material": "/attachment/f7079e531c335c97a3df16bff98e27b2b9607032.zip", "author": "Tao Yang;Chuang Liu;Xiaofeng Ma;Weijia Lu;Ning Wu;Bingyang Li;ZHIFEI YANG;Peng Liu;Lin Sun;xiaodong Zhang;Can Zhang", "authorids": "~Tao_Yang19;~Chuang_Liu5;~Xiaofeng_Ma1;~Weijia_Lu1;~Ning_Wu3;~Bingyang_Li1;~ZHIFEI_YANG2;~Peng_Liu17;~Lin_Sun11;~xiaodong_Zhang6;~Can_Zhang4", "gender": "M;;;M;M;Not Specified;;M;M;M;F", "homepage": "https://github.com/Starfruit007;https://www.researchgate.net/profile/Chuang-Liu-15;;https://alfredlu.users.sourceforge.net;https://github.com/uaeswuning;https://github.com/alostbear;https://github.com/fzy9012;https://github.com/Lllllp93;https://github.com/sunlin-ai;https://github.com/Small-Step-AI;", "dblp": "67/1120;52/1800-3;;10/4203;60/467;94/8403;56/6850;21/6121;95/6619;37/4356;35/1714", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;-AXrsOwAAAAJ;;PcVWy2EAAAAJ;;;;;;;", "orcid": ";;;0000-0002-7899-6034;;;;;;;0000-0002-7083-5228", "linkedin": "https://www.linkedin.com/mwlite/in/tao-yang-754b40217;chuang-liu-4b94a159;https://www.linkedin.com/mwlite/in/xiaofeng-ma0124;;;;;;;;", "or_profile": "~Tao_Yang19;~Chuang_Liu5;~Xiaofeng_Ma1;~Weijia_Lu1;~Ning_Wu3;~Bingyang_Li1;~ZHIFEI_YANG2;~Peng_Liu17;~Lin_Sun11;~xiaodong_Zhang6;~Can_Zhang4", "aff": "United Automotive Electronic Systems Co., Ltd.;United Automotive Electronic Systems Co., Ltd.;UAES;UAES;UAES;UAES;United Automotive Electronic Systems Co., Ltd.;UAES;UAES;United Automotive Electronic Systems Co., Ltd.;University of Technology Sydney", "aff_domain": "uaes.com;uaes.com;uaes.com;uaes.com;uaes.com;uaes.com;uaes.com;uaes.com;uaes.com;uaes.com;uts.edu.au", "position": "Researcher;Researcher;Researcher;Principal Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;PhD student", "bibtex": "@misc{\nyang2023indeterminate,\ntitle={Indeterminate Probability Neural Network},\nauthor={Tao Yang and Chuang Liu and Xiaofeng Ma and Weijia Lu and Ning Wu and Bingyang Li and ZHIFEI YANG and Peng Liu and Lin Sun and xiaodong Zhang and Can Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=oAHjj0of5z}\n}", "github": "", "project": "", "reviewers": "Xo5z;1Vya;zXvC", "site": "https://openreview.net/forum?id=oAHjj0of5z", "pdf_size": 882074, "rating": "3;3;4", "confidence": "3;2;4", "soundness": "2;2;2", "novelty": "2;2;2", "presentation": "3;1;2", "wc_summary": "68;141;43", "wc_strengths": "54;56;45", "wc_weaknesses": "604;172;208", "wc_questions": "2;253;35", "wc_limitations": "1;12;1", "wc_review": "729;634;332", "wc_reply_reviewers": "90;22;0", "wc_reply_authors": "77;0;0", "reply_reviewers": "1;1;0", "reply_authors": "2;1;1", "rating_avg": [ 3.3333333333333335, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.0, 0.0 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 84.0, 41.57723736212721 ], "wc_strengths_avg": [ 51.666666666666664, 4.784233364802441 ], "wc_weaknesses_avg": [ 328.0, 195.71407716360108 ], "wc_questions_avg": [ 96.66666666666667, 111.36227168819589 ], "wc_limitations_avg": [ 4.666666666666667, 5.185449728701348 ], "wc_review_avg": [ 565.0, 169.2591701110066 ], "wc_reply_reviewers_avg": [ 37.333333333333336, 38.30868772948971 ], "wc_reply_authors_avg": [ 25.666666666666668, 36.29814810090944 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TiRGWJ4_RbEJ:scholar.google.com/&scioq=Indeterminate+Probability+Neural+Network&hl=en&as_sdt=0,10", "gs_version_total": 6, "aff_unique_index": "0;0;1;1;1;1;0;1;1;0;2", "aff_unique_norm": "United Automotive Electronic Systems;United Arab Emirates Space Agency;University of Technology Sydney", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.uaes.ae;https://www.uts.edu.au", "aff_unique_abbr": ";UAES;UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;1;1;0;1;1;0;2", "aff_country_unique": "China;United Arab Emirates;Australia" }, { "title": "Unified Off-Policy Learning to Rank: a Reinforcement Learning Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70469", "id": "oDcWnfZyZW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3f1b6e97a5eb3b10e6b0c99b022988eb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oDcWnfZyZW", "openreview": "https://openreview.net/forum?id=oDcWnfZyZW", "poster": "/media/PosterPDFs/NeurIPS%202023/70469.png?t=1701715693.0508687", "slides": "https://nips.cc/virtual/2023/poster/70469", "video": "https://nips.cc/virtual/2023/poster/70469", "author_site": "Zeyu Zhang, Yi Su, Hui Yuan, Yiran Wu, Rishab Balasubramanian, Qingyun Wu, Huazheng Wang, Mengdi Wang", "tldr": "", "abstract": "Off-policy Learning to Rank (LTR) aims to optimize a ranker from data collected by a deployed logging policy. However, existing off-policy learning to rank methods often make strong assumptions about how users generate the click data, i.e., the click model, and hence need to tailor their methods specifically under different click models. In this paper, we unified the ranking process under general stochastic click models as a Markov Decision Process (MDP), and the optimal ranking could be learned with offline reinforcement learning (RL) directly. Building upon this, we leverage offline RL techniques for off-policy LTR and propose the Click Model-Agnostic Unified Off-policy Learning to Rank (CUOLR) method, which could be easily applied to a wide range of click models. Through a dedicated formulation of the MDP, we show that offline RL algorithms can adapt to various click models without complex debiasing techniques and prior knowledge of the model. Results on various large-scale datasets demonstrate that CUOLR consistently outperforms the state-of-the-art off-policy learning to rank algorithms while maintaining consistency and robustness under different click models.", "keywords": "learning to rank;off-policy learning;reinforcement learning;click model", "primary_area": "", "supplementary_material": "", "author": "Zeyu Zhang;Yi Su;Hui Yuan;Yiran Wu;Rishab Balasubramanian;Qingyun Wu;Huazheng Wang;Mengdi Wang", "authorids": "~Zeyu_Zhang9;~Yi_Su2;~Hui_Yuan2;~Yiran_Wu2;~Rishab_Balasubramanian1;~Qingyun_Wu2;~Huazheng_Wang1;~Mengdi_Wang1", "gender": "M;F;F;M;M;;F;F", "homepage": ";https://www.yisu.moe/;;https://github.com/kevin666aa;https://rishabbala.github.io/;https://huazhengwang.github.io/;http://mwang.princeton.edu;https://qingyun-wu.github.io/", "dblp": ";;21/780-2;;277/9164.html;163/2233;;183/0579", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;2Gx5IQ4AAAAJ;K5x0hwcAAAAJ;w3PrbKwAAAAJ;;Y54J21sAAAAJ", "orcid": ";;;;;;;", "linkedin": "https://www.linkedin.cn/incareer/in/ACoAAEJn1OABMzkqdSKulrnJvRHyVLvpjVKyjU8;;;;rishab-bala-b01110142/;;;", "or_profile": "~Zeyu_Zhang9;~Yi_Su2;~Hui_Yuan2;~Yiran_Wu2;~Rishab_Balasubramanian1;~Huazheng_Wang1;~Mengdi_Wang1;~Qingyun_Wu1", "aff": "University of Science and Technology of China;Google;Princeton University;Pennsylvania State University;Oregon State University;Oregon State University;Princeton University;Pennsylvania State University", "aff_domain": "ustc.edu.cn;google.com;princeton.edu;psu.edu;oregonstate.edu;oregonstate.edu;princeton.edu;psu.edu", "position": "Undergrad student;Researcher;PhD student;PhD student;MS student;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023unified,\ntitle={Unified Off-Policy Learning to Rank: a Reinforcement Learning Perspective},\nauthor={Zeyu Zhang and Yi Su and Hui Yuan and Yiran Wu and Rishab Balasubramanian and Qingyun Wu and Huazheng Wang and Mengdi Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oDcWnfZyZW}\n}", "github": "", "project": "", "reviewers": "ynTd;XmTH;79J3;atvb;fhCJ", "pdf_size": 829813, "rating": "5;5;6;6;6", "confidence": "3;3;4;2;2", "soundness": "3;3;3;2;2", "novelty": "2;3;3;2;3", "presentation": "2;2;3;2;3", "wc_summary": "92;164;83;95;186", "wc_strengths": "57;100;96;66;212", "wc_weaknesses": "93;212;132;152;161", "wc_questions": "46;147;80;96;15", "wc_limitations": "57;89;34;26;35", "wc_review": "345;712;425;435;609", "wc_reply_reviewers": "0;102;22;63;30", "wc_reply_authors": "0;452;78;110;16", "reply_reviewers": "0;2;1;1;1", "reply_authors": "1;3;3;3;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 124.0, 42.40283009422838 ], "wc_strengths_avg": [ 106.2, 55.44871504372306 ], "wc_weaknesses_avg": [ 150.0, 38.83812559843742 ], "wc_questions_avg": [ 76.8, 44.888305826796355 ], "wc_limitations_avg": [ 48.2, 22.85082055419455 ], "wc_review_avg": [ 505.2, 134.5843973126157 ], "wc_reply_reviewers_avg": [ 43.4, 35.60674093482862 ], "wc_reply_authors_avg": [ 131.2, 165.33408601979207 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.4, 0.8 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.21821789023599236, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2856298703919708158&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "ustc.edu.cn;google.com;princeton.edu;psu.edu;oregonstate.edu;oregonstate.edu;princeton.edu;psu.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;4;4;2;3", "aff_unique_norm": "University of Science and Technology of China;Google;Princeton University;Pennsylvania State University;Oregon State University", "aff_unique_dep": ";Google;;;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.google.com;https://www.princeton.edu;https://www.psu.edu;https://oregonstate.edu", "aff_unique_abbr": "USTC;Google;Princeton;PSU;OSU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Directional diffusion models for graph representation learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70468", "id": "oDtyJt5JLk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6751ee6546b31ceb7d4ee12276b9f4d9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oDtyJt5JLk", "openreview": "https://openreview.net/forum?id=oDtyJt5JLk", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70468", "video": "https://nips.cc/virtual/2023/poster/70468", "author_site": "Run Yang, Yuling Yang, Fan Zhou, Qiang Sun", "tldr": "", "abstract": "Diffusion models have achieved remarkable success in diverse domains such as image synthesis, super-resolution, and 3D molecule generation. Surprisingly, the application of diffusion models in graph learning has garnered little attention. In this paper, we aim to bridge this gap by exploring the use of diffusion models for unsupervised graph representation learning. Our investigation commences with the identification of anisotropic structures within graphs and the recognition of a crucial limitation in the vanilla forward diffusion process when dealing with these anisotropic structures. The original forward diffusion process continually adds isotropic Gaussian noise to the data, which may excessively dilute anisotropic signals, leading to rapid signal-to-noise conversion. This rapid conversion poses challenges for training denoising neural networks and obstructs the acquisition of semantically meaningful representations during the reverse process. To overcome this challenge, we introduce a novel class of models termed {\\it directional diffusion models}. These models adopt data-dependent, anisotropic, and directional noises in the forward diffusion process. In order to assess the effectiveness of our proposed models, we conduct extensive experiments on 12 publicly available datasets, with a particular focus on two distinct graph representation learning tasks. The experimental results unequivocally establish the superiority of our models over state-of-the-art baselines, underscoring their effectiveness in capturing meaningful graph representations. Our research not only sheds light on the intricacies of the forward process in diffusion models but also underscores the vast potential of these models in addressing a wide spectrum of graph-related tasks. Our code is available at \\url{https://github.com/statsle/DDM}.", "keywords": "diffusion models;graph representation learning;unsupervised learning", "primary_area": "", "supplementary_material": "/attachment/e5869c594d7a34a6c4754cc698127a337bd46d8c.pdf", "author": "Run Yang;Yuling Yang;Fan Zhou;Qiang Sun", "authorids": "~Run_Yang2;~Yuling_Yang1;~Fan_Zhou7;~Qiang_Sun2", "gender": "M;F;;M", "homepage": "https://github.com/luckyyangrun;;;https://sites.google.com/view/qsun", "dblp": ";;;73/2066-7", "google_scholar": "5YNYqc0AAAAJ;;4QJkjl0AAAAJ;f0V2fAYAAAAJ", "orcid": ";;;", "linkedin": ";yuling-yang-5a0649177;;", "or_profile": "~Run_Yang2;~Yuling_Yang1;~Fan_Zhou7;~Qiang_Sun2", "aff": "Baidu;Shanghai University of Finance and Economics;Shanghai University of Finance and Economics;University of Toronto", "aff_domain": "baidu.com;sufe.edu;shufe.edu;utoronto.ca", "position": "Researcher;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nyang2023directional,\ntitle={Directional diffusion models for graph representation learning},\nauthor={Run Yang and Yuling Yang and Fan Zhou and Qiang Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oDtyJt5JLk}\n}", "github": "", "project": "", "reviewers": "tkhv;jRHd;8dwQ;BUxE;ejpR;emkS", "pdf_size": 1169890, "rating": "5;5;6;6;7;7", "confidence": "5;3;4;3;2;4", "soundness": "2;3;3;2;4;3", "novelty": "2;3;4;2;4;3", "presentation": "2;3;2;3;2;4", "wc_summary": "67;36;102;103;61;80", "wc_strengths": "49;54;32;87;55;208", "wc_weaknesses": "118;19;78;232;16;261", "wc_questions": "86;34;18;122;52;205", "wc_limitations": "60;16;26;134;1;16", "wc_review": "380;159;256;678;185;770", "wc_reply_reviewers": "19;11;40;57;0;61", "wc_reply_authors": "35;0;0;208;34;106", "reply_reviewers": "1;1;1;1;0;1", "reply_authors": "2;1;1;2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.5, 0.9574271077563381 ], "soundness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.7453559924999298 ], "wc_summary_avg": [ 74.83333333333333, 23.518904925376283 ], "wc_strengths_avg": [ 80.83333333333333, 59.15351966611023 ], "wc_weaknesses_avg": [ 120.66666666666667, 95.92300152842498 ], "wc_questions_avg": [ 86.16666666666667, 63.16754616801961 ], "wc_limitations_avg": [ 42.166666666666664, 44.8643015721359 ], "wc_review_avg": [ 404.6666666666667, 237.87088000752752 ], "wc_reply_reviewers_avg": [ 31.333333333333332, 22.954060400915758 ], "wc_reply_authors_avg": [ 63.833333333333336, 73.52191661145825 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8821071832551667645&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "baidu.com;sufe.edu;shufe.edu;utoronto.ca", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Baidu;Shanghai University of Finance and Economics;University of Toronto", "aff_unique_dep": "Baidu, Inc.;;", "aff_unique_url": "https://www.baidu.com;http://www.sufe.edu.cn;https://www.utoronto.ca", "aff_unique_abbr": "Baidu;SUFE;U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;Canada" }, { "title": "Gradient Informed Proximal Policy Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70467", "id": "oFaLc6fHSt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1bd8cfc0e4c53869b7f1d0ed4b1e78e1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oFaLc6fHSt", "openreview": "https://openreview.net/forum?id=oFaLc6fHSt", "poster": "/media/PosterPDFs/NeurIPS%202023/70467.png?t=1701557979.570726", "slides": "https://nips.cc/virtual/2023/poster/70467", "video": "https://nips.cc/virtual/2023/poster/70467", "author_site": "Sanghyun Son, Laura Zheng, Ryan Sullivan, Yi-Ling Qiao, Ming Lin", "tldr": "", "abstract": "We introduce a novel policy learning method that integrates analytical gradients from differentiable environments with the Proximal Policy Optimization (PPO) algorithm. To incorporate analytical gradients into the PPO framework, we introduce the concept of an \u03b1-policy that stands as a locally superior policy. By adaptively modifying the \u03b1 value, we can effectively manage the influence of analytical policy gradients during learning. To this end, we suggest metrics for assessing the variance and bias of analytical gradients, reducing dependence on these gradients when high variance or bias is detected. Our proposed approach outperforms baseline algorithms in various scenarios, such as function optimization, physics simulations, and traffic control environments. Our code can be found online: https://github.com/SonSang/gippo.", "keywords": "Reinforcement Learning;Analytic Gradient-Based Policy Learning;Proximal Policy Optimization;Differentiable Programming", "primary_area": "", "supplementary_material": "/attachment/3b8ab5428a6a8a7c67f6a4cadd658e1d82074932.zip", "author": "Sanghyun Son;Laura Yu Zheng;Ryan Sullivan;Yi-Ling Qiao;Ming Lin", "authorids": "~Sanghyun_Son3;~Laura_Yu_Zheng1;~Ryan_Sullivan2;~Yi-Ling_Qiao1;~Ming_Lin2", "gender": "M;F;M;;F", "homepage": "https://sanghyun.phd.sh/;https://laurayuzheng.github.io/;https://ryannavillus.github.io/;;http://www.cs.umd.edu/~lin", "dblp": "68/6424-3;285/3002;;226/5117;l/MingCLin.html", "google_scholar": "WzuMJR8AAAAJ;kGUj-dIAAAAJ;https://scholar.google.com/citations?hl=en;ghpLm2cAAAAJ;ugFNit4AAAAJ", "orcid": ";;;;0000-0003-3736-6949", "linkedin": ";;ryan-navillus/;;mlin2/", "or_profile": "~Sanghyun_Son3;~Laura_Yu_Zheng1;~Ryan_Sullivan2;~Yi-Ling_Qiao1;~Ming_Lin2", "aff": "University of Maryland, College Park;Kitware Inc.;University of Maryland, College Park;University of Maryland, College Park;Amazon", "aff_domain": "umd.edu;kitware.com;umd.edu;umd.edu;amazon.com", "position": "PhD student;Intern;PhD student;PhD student;Amazon Scholar", "bibtex": "@inproceedings{\nson2023gradient,\ntitle={Gradient Informed Proximal Policy Optimization},\nauthor={Sanghyun Son and Laura Yu Zheng and Ryan Sullivan and Yi-Ling Qiao and Ming Lin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oFaLc6fHSt}\n}", "github": "", "project": "", "reviewers": "8kSu;39SH;DV21;WNVi;xDc8", "pdf_size": 5640475, "rating": "4;5;6;7;7", "confidence": "4;3;4;4;4", "soundness": "2;3;3;3;4", "novelty": "3;2;3;3;3", "presentation": "1;3;3;4;3", "wc_summary": "35;66;78;41;110", "wc_strengths": "15;34;112;74;107", "wc_weaknesses": "325;566;102;64;68", "wc_questions": "37;80;54;297;54", "wc_limitations": "0;9;9;45;12", "wc_review": "412;755;355;521;351", "wc_reply_reviewers": "0;240;22;65;40", "wc_reply_authors": "22;964;14;18;15", "reply_reviewers": "0;2;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 66.0, 27.077666073722085 ], "wc_strengths_avg": [ 68.4, 38.6191662261111 ], "wc_weaknesses_avg": [ 225.0, 195.94897294959216 ], "wc_questions_avg": [ 104.4, 97.27610189558379 ], "wc_limitations_avg": [ 15.0, 15.53061492665374 ], "wc_review_avg": [ 478.8, 151.12167283351516 ], "wc_reply_reviewers_avg": [ 73.4, 85.98976683303658 ], "wc_reply_authors_avg": [ 206.6, 378.7102322356765 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3429971702850177, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10474221749176940694&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "umd.edu;kitware.com;umd.edu;umd.edu;amazon.com", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "University of Maryland;Kitware Inc.;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www/umd.edu;https://www.kitware.com;https://www.amazon.com", "aff_unique_abbr": "UMD;Kitware;Amazon", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Generate What You Prefer: Reshaping Sequential Recommendation via Guided Diffusion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70466", "id": "oFpBnt6bgC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4c5e2bcbf21bdf40d75fddad0bd43dc9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oFpBnt6bgC", "openreview": "https://openreview.net/forum?id=oFpBnt6bgC", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70466", "video": "https://nips.cc/virtual/2023/poster/70466", "author_site": "Zhengyi Yang, Jiancan Wu, Zhicai Wang, Xiang Wang, Yancheng Yuan, Xiangnan He", "tldr": "", "abstract": "Sequential recommendation aims to recommend the next item that matches a user\u2019s\ninterest, based on the sequence of items he/she interacted with before. Scrutinizing\nprevious studies, we can summarize a common learning-to-classify paradigm\u2014\ngiven a positive item, a recommender model performs negative sampling to add\nnegative items and learns to classify whether the user prefers them or not, based on\nhis/her historical interaction sequence. Although effective, we reveal two inherent\nlimitations: (1) it may differ from human behavior in that a user could imagine\nan oracle item in mind and select potential items matching the oracle; and (2)\nthe classification is limited in the candidate pool with noisy or easy supervision\nfrom negative samples, which dilutes the preference signals towards the oracle\nitem. Yet, generating the oracle item from the historical interaction sequence is\nmostly unexplored. To bridge the gap, we reshape sequential recommendation\nas a learning-to-generate paradigm, which is achieved via a guided diffusion\nmodel, termed DreamRec. Specifically, for a sequence of historical items, it\napplies a Transformer encoder to create guidance representations. Noising target\nitems explores the underlying distribution of item space; then, with the guidance of\nhistorical interactions, the denoising process generates an oracle item to recover\nthe positive item, so as to cast off negative sampling and depict the true preference\nof the user directly. We evaluate the effectiveness of DreamRec through extensive\nexperiments and comparisons with existing methods. Codes and data are open-sourced\nat https://github.com/YangZhengyi98/DreamRec.", "keywords": "Sequential Recommendation;Recommendation System;Generative Model;Diffusion Model", "primary_area": "", "supplementary_material": "", "author": "Zhengyi Yang;Jiancan Wu;Zhicai Wang;Xiang Wang;Yancheng Yuan;Xiangnan He", "authorids": "~Zhengyi_Yang1;~Jiancan_Wu1;~Zhicai_Wang1;~Xiang_Wang6;~Yancheng_Yuan1;~Xiangnan_He1", "gender": ";M;M;M;;M", "homepage": "https://github.com/YangZhengyi98;https://wujcan.github.io/;;https://github.com/xiangwang1223;;http://staff.ustc.edu.cn/~hexn", "dblp": ";257/4945;250/1975;31/2864-10;;59/1007", "google_scholar": ";z9zW1UgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;;https://scholar.google.com.sg/citations?user=X45Go24AAAAJ", "orcid": ";0000-0002-6941-5218;;0000-0002-6148-6329;;0000-0001-8472-7992", "linkedin": ";;;;;", "or_profile": "~Zhengyi_Yang1;~Jiancan_Wu1;~Zhicai_Wang1;~Xiang_Wang6;~Yancheng_Yuan1;~Xiangnan_He1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn", "position": "PhD student;Postdoc;PhD student;Full Professor;;Professor", "bibtex": "@inproceedings{\nyang2023generate,\ntitle={Generate What You Prefer: Reshaping Sequential Recommendation via Guided Diffusion},\nauthor={Zhengyi Yang and Jiancan Wu and Zhicai Wang and Xiang Wang and Yancheng Yuan and Xiangnan He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oFpBnt6bgC}\n}", "github": "", "project": "", "reviewers": "RbjC;b6yV;VUDN", "pdf_size": 1284046, "rating": "5;6;6", "confidence": "3;3;3", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "78;172;121", "wc_strengths": "33;51;51", "wc_weaknesses": "42;212;140", "wc_questions": "45;144;73", "wc_limitations": "13;69;75", "wc_review": "211;648;460", "wc_reply_reviewers": "0;53;201", "wc_reply_authors": "153;174;824", "reply_reviewers": "0;1;1", "reply_authors": "2;3;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 123.66666666666667, 38.42163742245016 ], "wc_strengths_avg": [ 45.0, 8.48528137423857 ], "wc_weaknesses_avg": [ 131.33333333333334, 69.67224858020747 ], "wc_questions_avg": [ 87.33333333333333, 41.66799997866735 ], "wc_limitations_avg": [ 52.333333333333336, 27.920522121829233 ], "wc_review_avg": [ 439.6666666666667, 178.982929043216 ], "wc_reply_reviewers_avg": [ 84.66666666666667, 85.05815004388991 ], "wc_reply_authors_avg": [ 383.6666666666667, 311.48069317731756 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 72, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15649965598499064835&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "UniT: A Unified Look at Certified Robust Training against Text Adversarial Perturbation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70465", "id": "oGxE2Nvlda", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/46b065f7d301a15a23909f6cad409a97-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oGxE2Nvlda", "openreview": "https://openreview.net/forum?id=oGxE2Nvlda", "poster": "/media/PosterPDFs/NeurIPS%202023/70465.png?t=1700329028.042474", "slides": "https://nips.cc/virtual/2023/poster/70465", "video": "https://nips.cc/virtual/2023/poster/70465", "author_site": "Muchao Ye, Ziyi Yin, Tianrong Zhang, Tianyu Du, Tianyu Du, Jinghui Chen, Ting Wang, Fenglong Ma", "tldr": "", "abstract": "Recent years have witnessed a surge of certified robust training pipelines against text adversarial perturbation constructed by synonym substitutions. Given a base model, existing pipelines provide prediction certificates either in the discrete word space or the continuous latent space. However, they are isolated from each other with a structural gap. We observe that existing training frameworks need unification to provide stronger certified robustness. Additionally, they mainly focus on building the certification process but neglect to improve the robustness of the base model. To mitigate the aforementioned limitations, we propose a unified framework named UniT that enables us to train flexibly in either fashion by working in the word embedding space. It can provide a stronger robustness guarantee obtained directly from the word embedding space without extra modules. In addition, we introduce the decoupled regularization (DR) loss to improve the robustness of the base model, which includes two separate robustness regularization terms for the feature extraction and classifier modules. Experimental results on widely used text classification datasets further demonstrate the effectiveness of the designed unified framework and the proposed DR loss for improving the certified robust accuracy.", "keywords": "certified robust training;text adversarial defense", "primary_area": "", "supplementary_material": "/attachment/8c656f9bcfaabde0c4d674edaade8138f86aaa80.zip", "author": "Muchao Ye;Ziyi Yin;Tianrong Zhang;Tianyu Du;Jinghui Chen;Ting Wang;Fenglong Ma", "authorids": "~Muchao_Ye1;~Ziyi_Yin1;~Tianrong_Zhang1;~Tianyu_Du2;~Jinghui_Chen1;~Ting_Wang1;~Fenglong_Ma1", "gender": ";M;M;F;M;M;M", "homepage": "https://sites.google.com/view/mcye;https://ericyinyzy.github.io/;https://zhangtianrong.github.io/profile/#en;https://tydusky.github.io/;https://jinghuichen.github.io/;https://alps-lab.github.io/;https://fenglong-ma.github.io/", "dblp": "251/3433.html;358/6428;;128/2982.html;67/5633;12/2633-6.html;85/10856", "google_scholar": ";wvbK37AAAAAJ;;kBqTzrwAAAAJ;mKia7Y4AAAAJ;cwcBTegAAAAJ;DLJIxNMAAAAJ", "orcid": "0009-0006-9112-8895;0009-0002-3502-3205;;0000-0003-0896-0690;;;0000-0002-4999-0303", "linkedin": ";%E6%A2%93%E8%AF%91-%E6%AE%B7-ab816a249/?locale=en_US&trk=eml-email_network_conversations_01-header-0-profile_glimmer;;;;;fenglong-ma-69805832/", "or_profile": "~Muchao_Ye1;~Ziyi_Yin1;~Tianrong_Zhang1;~Tianyu_Du2;~Jinghui_Chen1;~Ting_Wang1;~Fenglong_Ma1", "aff": "Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;psu.edu;psu.edu;psu.edu;psu.edu;psu.edu", "position": "PhD student;PhD student;PhD student;Postdoc;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nye2023unit,\ntitle={UniT: A Unified Look at Certified Robust Training against Text Adversarial Perturbation},\nauthor={Muchao Ye and Ziyi Yin and Tianrong Zhang and Tianyu Du and Jinghui Chen and Ting Wang and Fenglong Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oGxE2Nvlda}\n}", "github": "", "project": "", "reviewers": "3SMy;ZzE2;yTLz;Wx5f;FiMX", "pdf_size": 645241, "rating": "5;5;6;6;6", "confidence": "2;4;4;4;3", "soundness": "3;2;4;3;3", "novelty": "3;2;2;3;3", "presentation": "3;2;3;3;3", "wc_summary": "92;75;129;55;120", "wc_strengths": "65;106;30;47;111", "wc_weaknesses": "221;4;49;288;80", "wc_questions": "53;213;285;133;4", "wc_limitations": "4;2;32;13;1", "wc_review": "435;400;525;536;316", "wc_reply_reviewers": "53;42;35;0;34", "wc_reply_authors": "31;26;23;0;25", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 94.2, 27.520174418051933 ], "wc_strengths_avg": [ 71.8, 31.983745871926885 ], "wc_weaknesses_avg": [ 128.4, 107.86027999221956 ], "wc_questions_avg": [ 137.6, 102.4101557463907 ], "wc_limitations_avg": [ 10.4, 11.60344776348823 ], "wc_review_avg": [ 442.4, 81.74741586129802 ], "wc_reply_reviewers_avg": [ 32.8, 17.747112441183216 ], "wc_reply_authors_avg": [ 21.0, 10.825894882179487 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15824139394556506791&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "psu.edu;psu.edu;psu.edu;psu.edu;psu.edu;psu.edu;psu.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "LightZero: A Unified Benchmark for Monte Carlo Tree Search in General Sequential Decision Scenarios", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73472", "id": "oIUXpBnyjv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/765043fe026f7d704c96cec027f13843-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=oIUXpBnyjv", "openreview": "https://openreview.net/forum?id=oIUXpBnyjv", "poster": "/media/PosterPDFs/NeurIPS%202023/73472.png?t=1702046054.8563359", "slides": "https://nips.cc/virtual/2023/poster/73472", "video": "https://nips.cc/virtual/2023/poster/73472", "author_site": "Yazhe Niu, YUAN PU, Zhenjie Yang, Xueyan Li, Tong Zhou, Jiyuan Ren, Shuai Hu, Hongsheng Li, Yu Liu", "tldr": "", "abstract": "Building agents based on tree-search planning capabilities with learned models has achieved remarkable success in classic decision-making problems, such as Go and Atari.\nHowever, it has been deemed challenging or even infeasible to extend Monte Carlo Tree Search (MCTS) based algorithms to diverse real-world applications, especially when these environments involve complex action spaces and significant simulation costs, or inherent stochasticity.\nIn this work, we introduce LightZero, the first unified benchmark for deploying MCTS/MuZero in general sequential decision scenarios. \nSpecificially, we summarize the most critical challenges in designing a general MCTS-style decision-making solver, then decompose the tightly-coupled algorithm and system design of tree-search RL methods into distinct sub-modules.\nBy incorporating more appropriate exploration and optimization strategies, we can significantly enhance these sub-modules and construct powerful LightZero agents to tackle tasks across a wide range of domains, such as board games, Atari, MuJoCo, MiniGrid and GoBigger.\nDetailed benchmark results reveal the significant potential of such methods in building scalable and efficient decision intelligence.\nThe code is available as part of OpenDILab at https://github.com/opendilab/LightZero.", "keywords": "Reinforcement Learning;Monte Carlo Tree Search;Model-Based RL;Algorithm Benchmark", "primary_area": "", "supplementary_material": "", "author": "Yazhe Niu;Yuan Pu;Zhenjie Yang;Xueyan Li;Tong Zhou;Jiyuan Ren;Shuai Hu;Hongsheng Li;Yu Liu", "authorids": "~Yazhe_Niu1;~Yuan_Pu1;~Zhenjie_Yang1;~Xueyan_Li1;~Tong_Zhou5;~Jiyuan_Ren1;~Shuai_Hu1;~Hongsheng_Li3;~Yu_Liu2", "gender": "M;M;M;Not Specified;M;F;M;M;M", "homepage": "https://github.com/PaParaZz1;https://github.com/puyuan1996;https://github.com/jayyoung0802;https://github.com/karroyan;https://github.com/timothijoe;https://github.com/nighood;;http://www.ee.cuhk.edu.hk/~hsli;http://liuyu.us", "dblp": "252/5570.html;;;;;;;27/7402-1;97/2274-15", "google_scholar": "P3BUrBQAAAAJ;NdowrLgAAAAJ;jVlRiUEAAAAJ;;fG5PC9UAAAAJ;;;BN2Ze-QAAAAJ;", "orcid": ";;;;;;0000-0001-7563-5367;;", "linkedin": ";;;;;;;;", "or_profile": "~Yazhe_Niu1;~Yuan_Pu1;~Zhenjie_Yang1;~Xueyan_Li1;~Tong_Zhou5;~Jiyuan_Ren1;~Shuai_Hu1;~Hongsheng_Li3;~Yu_Liu2", "aff": "The Chinese University of Hong Kong;Shanghai Artificial Intelligence Laboratory;Shanghai Jiaotong University;Shanghai Jiaotong University;Chinese University of Hong Kong;Tsinghua University;;The Chinese University of Hong Kong;SenseTime", "aff_domain": "cuhk.edu.hk;pjlab.org.cn;sjtu.edu.cn;sjtu.edu.cn;cuhk.hk;mail.tsinghua.edu.cn;;cuhk.edu.hk;sensetime.com", "position": "PhD student;Researcher;PhD student;MS student;PhD student;MS student;;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nniu2023lightzero,\ntitle={LightZero: A Unified Benchmark for Monte Carlo Tree Search in General Sequential Decision Scenarios},\nauthor={Yazhe Niu and Yuan Pu and Zhenjie Yang and Xueyan Li and Tong Zhou and Jiyuan Ren and Shuai Hu and Hongsheng Li and Yu Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=oIUXpBnyjv}\n}", "github": "", "project": "", "reviewers": "AvYb;UFjT;dyMN;8SGB;xvyT", "pdf_size": 14260266, "rating": "6;7;7;7;9", "confidence": "3;4;3;5;5", "wc_summary_and_contributions": "87;64;60;41;74", "wc_strengths": "82;26;54;67;70", "wc_improvement": "214;21;103;212;119", "wc_limitations": "12;30;66;2;7", "wc_correctness": "1;1;4;6;1", "wc_clarity": "1;5;82;17;1", "wc_relation_to_prior_work": "1;1;5;17;1", "wc_documentation": "1;4;4;22;18", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "400;153;379;385;292", "wc_reply_reviewers": "107;0;55;20;0", "wc_reply_authors": "1290;677;399;1222;751", "reply_reviewers": "1;0;1;1;0", "reply_authors": "2;1;1;2;1", "rating_avg": [ 7.2, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "wc_summary_and_contributions_avg": [ 65.2, 15.27612516314265 ], "wc_strengths_avg": [ 59.8, 19.103926298015285 ], "wc_improvement_avg": [ 133.8, 72.71698563609468 ], "wc_limitations_avg": [ 23.4, 23.303218661807215 ], "wc_correctness_avg": [ 2.6, 2.0591260281974 ], "wc_clarity_avg": [ 21.2, 30.96062014882777 ], "wc_relation_to_prior_work_avg": [ 5.0, 6.196773353931867 ], "wc_documentation_avg": [ 9.8, 8.49470423263812 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 321.8, 92.48005190309962 ], "wc_reply_reviewers_avg": [ 36.4, 40.61822251157724 ], "wc_reply_authors_avg": [ 867.8, 338.6829786097908 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.6846531968814576, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1680296543039835610&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cuhk.edu.hk;pjlab.org.cn;sjtu.edu.cn;sjtu.edu.cn;cuhk.hk;mail.tsinghua.edu.cn;;cuhk.edu.hk;sensetime.com", "author_num": 9, "aff_unique_index": "0;1;2;2;0;3;0;4", "aff_unique_norm": "Chinese University of Hong Kong;Shanghai Artificial Intelligence Laboratory;Shanghai Jiao Tong University;Tsinghua University;SenseTime", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.cuhk.edu.hk;http://www.shailab.org/;https://www.sjtu.edu.cn;https://www.tsinghua.edu.cn;https://www.sensetime.com", "aff_unique_abbr": "CUHK;Shanghai AI Lab;SJTU;THU;SenseTime", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Worst-case Performance of Popular Approximate Nearest Neighbor Search Implementations: Guarantees and Limitations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70464", "id": "oKqaWlEfjY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d0ac28b79816b51124fcc804b2496a36-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oKqaWlEfjY", "openreview": "https://openreview.net/forum?id=oKqaWlEfjY", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70464", "video": "https://nips.cc/virtual/2023/poster/70464", "author_site": "Piotr Indyk, Haike Xu", "tldr": "", "abstract": "Graph-based approaches to nearest neighbor search are popular and powerful tools for handling large datasets in practice, but they have limited theoretical guarantees. \nWe study the worst-case performance of recent graph-based approximate nearest neighbor search algorithms, such as HNSW, NSG and DiskANN. For DiskANN, we show that its \"slow preprocessing'' version provably supports approximate nearest neighbor search query with constant approximation ratio and poly-logarithmic query time, on data sets with bounded \"intrinsic'' dimension. \nFor the other data structure variants studied, including DiskANN with \"fast preprocessing'', HNSW and NSG, we present a family of instances on which the empirical query time required to achieve a \"reasonable'' accuracy is linear in instance size. For example, for DiskANN, we show that the query procedure can take at least $0.1 n$ steps on instances of size $n$ before it encounters any of the $5$ nearest neighbors of the query.", "keywords": "Nearest neighbor search; graph-based algorithms; worst-case analysis", "primary_area": "", "supplementary_material": "/attachment/af4b653daf3c40aceba4368d358ee5192b65bdd4.zip", "author": "Piotr Indyk;Haike Xu", "authorids": "~Piotr_Indyk1;~Haike_Xu1", "gender": ";M", "homepage": "https://people.csail.mit.edu/indyk/;https://www.haikexu.com/", "dblp": "i/PiotrIndyk;285/4898", "google_scholar": "oOwNKsAAAAAJ;bvDRaVcAAAAJ", "orcid": ";0009-0006-5526-6924", "linkedin": ";", "or_profile": "~Piotr_Indyk1;~Haike_Xu1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu", "position": "Full Professor;PhD student", "bibtex": "@inproceedings{\nindyk2023worstcase,\ntitle={Worst-case Performance of Popular Approximate Nearest Neighbor Search Implementations: Guarantees and Limitations},\nauthor={Piotr Indyk and Haike Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oKqaWlEfjY}\n}", "github": "", "project": "", "reviewers": "GZP3;ZVW4;EYFR", "pdf_size": 943313, "rating": "5;5;6", "confidence": "4;2;4", "soundness": "3;3;3", "novelty": "3;3;2", "presentation": "2;2;2", "wc_summary": "58;83;81", "wc_strengths": "110;87;169", "wc_weaknesses": "478;48;428", "wc_questions": "193;85;564", "wc_limitations": "14;6;25", "wc_review": "853;309;1267", "wc_reply_reviewers": "114;22;62", "wc_reply_authors": "0;0;103", "reply_reviewers": "1;1;1", "reply_authors": "1;1;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 74.0, 11.343133018115703 ], "wc_strengths_avg": [ 122.0, 34.53500639447844 ], "wc_weaknesses_avg": [ 318.0, 192.00694431886225 ], "wc_questions_avg": [ 280.6666666666667, 205.1411438227078 ], "wc_limitations_avg": [ 15.0, 7.788880963698615 ], "wc_review_avg": [ 809.6666666666666, 392.3003384596495 ], "wc_reply_reviewers_avg": [ 66.0, 37.66519171153476 ], "wc_reply_authors_avg": [ 34.333333333333336, 48.55466564147626 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17059671982688120543&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "mit.edu;mit.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "When Demonstrations meet Generative World Models: A Maximum Likelihood Framework for Offline Inverse Reinforcement Learning", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70463", "id": "oML3v2cFg2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ce9d3c592712d23f2ec3671941d67fa1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oML3v2cFg2", "openreview": "https://openreview.net/forum?id=oML3v2cFg2", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70463", "video": "https://nips.cc/virtual/2023/poster/70463", "author_site": "Siliang Zeng, Chenliang Li, Alfredo Garcia, Mingyi Hong", "tldr": "", "abstract": "Offline inverse reinforcement learning (Offline IRL) aims to recover the structure of rewards and environment dynamics that underlie observed actions in a fixed, finite set of demonstrations from an expert agent. Accurate models of expertise in executing a task has applications in safety-sensitive applications such as clinical decision making and autonomous driving. However, the structure of an expert's preferences implicit in observed actions is closely linked to the expert's model of the environment dynamics (i.e. the ``world''). Thus, inaccurate models of the world obtained from finite data with limited coverage could compound inaccuracy in estimated rewards. To address this issue, we propose a bi-level optimization formulation of the estimation task wherein the upper level is likelihood maximization based upon a conservative model of the expert's policy (lower level). The policy model is conservative in that it maximizes reward subject to a penalty that is increasing in the uncertainty of the estimated model of the world. We propose a new algorithmic framework to solve the bi-level optimization problem formulation and provide statistical and computational guarantees of performance for the associated optimal reward estimator. Finally, we demonstrate that the proposed algorithm outperforms the state-of-the-art offline IRL and imitation learning benchmarks by a large margin, over the continuous control tasks in MuJoCo and different datasets in the D4RL benchmark.", "keywords": "Inverse Reinforcement Learning;Model-based Offline Inverse Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/3c367c7acabb836bd0d9b99d66f5b70d651fd421.pdf", "author": "Siliang Zeng;Chenliang Li;Alfredo Garcia;Mingyi Hong", "authorids": "~Siliang_Zeng1;~Chenliang_Li3;~Alfredo_Garcia1;~Mingyi_Hong1", "gender": "M;M;M;M", "homepage": "https://siliangzeng.github.io/index.html;;https://agarcia.engr.tamu.edu;http://people.ece.umn.edu/~mhong/mingyi.html", "dblp": "38/9;;;57/8053", "google_scholar": "IfqsDyYAAAAJ;;;qRnP-p0AAAAJ", "orcid": ";;;", "linkedin": ";https://www.linkedin.cn/incareer/in/%E7%90%9B%E8%89%AF-%E6%9D%8E-5a333a23b;;", "or_profile": "~Siliang_Zeng1;~Chenliang_Li3;~Alfredo_Garcia1;~Mingyi_Hong1", "aff": "University of Minnesota, Twin Cities;The Chinese University of Hong Kong;Texas A&M University - College Station;University of Minnesota, Minneapolis", "aff_domain": "umn.edu;cuhk.edu.hk;tamu.edu;umn.edu", "position": "PhD student;MS student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzeng2023when,\ntitle={When Demonstrations meet Generative World Models: A Maximum Likelihood Framework for Offline Inverse Reinforcement Learning},\nauthor={Siliang Zeng and Chenliang Li and Alfredo Garcia and Mingyi Hong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oML3v2cFg2}\n}", "github": "", "project": "", "reviewers": "1VXD;2Qwv;VBZK;VRGm;5szs", "pdf_size": 2178481, "rating": "6;7;7;8;8", "confidence": "4;3;3;3;3", "soundness": "3;3;2;4;4", "novelty": "2;3;3;4;4", "presentation": "2;3;3;3;4", "wc_summary": "86;262;121;208;67", "wc_strengths": "35;207;68;53;34", "wc_weaknesses": "95;239;24;275;126", "wc_questions": "169;60;112;117;22", "wc_limitations": "7;25;18;35;1", "wc_review": "392;793;343;688;250", "wc_reply_reviewers": "12;20;26;73;16", "wc_reply_authors": "18;22;17;24;21", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 7.2, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 148.8, 74.467173976189 ], "wc_strengths_avg": [ 79.4, 65.02491830060227 ], "wc_weaknesses_avg": [ 151.8, 92.74351729366317 ], "wc_questions_avg": [ 96.0, 50.592489561198704 ], "wc_limitations_avg": [ 17.2, 12.204917041913886 ], "wc_review_avg": [ 493.2, 209.65438225803913 ], "wc_reply_reviewers_avg": [ 29.4, 22.28542124349459 ], "wc_reply_authors_avg": [ 20.4, 2.576819745345025 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8017837257372733, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2232423337960987212&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "umn.edu;cuhk.edu.hk;tamu.edu;umn.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Minnesota;Chinese University of Hong Kong;Texas A&M University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.minnesota.edu;https://www.cuhk.edu.hk;https://www.tamu.edu", "aff_unique_abbr": "UMN;CUHK;TAMU", "aff_campus_unique_index": "0;1;2;3", "aff_campus_unique": "Twin Cities;Hong Kong SAR;College Station;Minneapolis", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Unbiased constrained sampling with Self-Concordant Barrier Hamiltonian Monte Carlo", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70462", "id": "oMm1dfo3tK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6745cb9889cc213bda803535f2d3902e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oMm1dfo3tK", "openreview": "https://openreview.net/forum?id=oMm1dfo3tK", "poster": "/media/PosterPDFs/NeurIPS%202023/70462.png?t=1701781665.8166625", "slides": "https://nips.cc/virtual/2023/poster/70462", "video": "https://nips.cc/virtual/2023/poster/70462", "author_site": "Maxence Noble, Valentin De Bortoli, Alain Durmus", "tldr": "", "abstract": "In this paper, we propose Barrier Hamiltonian Monte Carlo (BHMC), a version of the\n HMC algorithm which aims at sampling from a Gibbs distribution $\\pi$ on a manifold\n $\\mathsf{M}$, endowed with a Hessian metric $\\mathfrak{g}$ derived from a self-concordant\n barrier. Our method relies on Hamiltonian\n dynamics which comprises $\\mathfrak{g}$. Therefore, it incorporates the constraints defining\n $\\mathsf{M}$ and is able to exploit its underlying geometry. However, \n the corresponding Hamiltonian dynamics is defined via non separable Ordinary Differential Equations (ODEs) in contrast to the Euclidean case. It implies unavoidable bias in existing generalization of HMC to Riemannian manifolds. In this paper, we propose a new filter step, called ``involution checking step'', to address this problem. This step is implemented in two versions of BHMC, coined continuous BHMC (c-bHMC) and numerical BHMC (n-BHMC) respectively.\n Our main results establish that these two new algorithms generate reversible Markov\n chains with respect to $\\pi$ and do not suffer from any bias in comparison to previous implementations. Our conclusions are supported by numerical experiments where\n we consider target distributions defined on polytopes.", "keywords": "Hamiltonian Monte Carlo;Riemannian manifold;self-concordant barrier;constrained sampling", "primary_area": "", "supplementary_material": "/attachment/2d42a47b35f62334fb2767329701563681bf184a.pdf", "author": "Maxence Noble;Valentin De Bortoli;Alain Durmus", "authorids": "~Maxence_Noble1;~Valentin_De_Bortoli1;~Alain_Durmus1", "gender": "M;;M", "homepage": "https://maxencenoble.github.io/;https://vdeborto.github.io/;", "dblp": "306/7678;224/9338;01/11275", "google_scholar": "4eGHx3gAAAAJ;;", "orcid": ";;", "linkedin": "maxence-noble-393588172/;;", "or_profile": "~Maxence_Noble1;~Valentin_De_Bortoli1;~Alain_Durmus1", "aff": "\u00c9cole Polytechnique;University of Oxford;\u00c9cole Polytechnique", "aff_domain": "polytechnique.fr;ox.ac.uk;polytechnique.fr", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nnoble2023unbiased,\ntitle={Unbiased constrained sampling with Self-Concordant Barrier Hamiltonian Monte Carlo},\nauthor={Maxence Noble and Valentin De Bortoli and Alain Durmus},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oMm1dfo3tK}\n}", "github": "", "project": "", "reviewers": "SWNi;iZM7;prii;nmMp", "pdf_size": 2914780, "rating": "6;6;6;7", "confidence": "2;3;4;3", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "2;3;2;3", "wc_summary": "84;56;56;58", "wc_strengths": "62;167;28;20", "wc_weaknesses": "116;128;194;13", "wc_questions": "105;238;34;189", "wc_limitations": "1;7;14;1", "wc_review": "368;596;326;281", "wc_reply_reviewers": "0;78;12;73", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 63.5, 11.863810517704673 ], "wc_strengths_avg": [ 69.25, 58.59767486854747 ], "wc_weaknesses_avg": [ 112.75, 64.79728003550767 ], "wc_questions_avg": [ 141.5, 78.1936698205168 ], "wc_limitations_avg": [ 5.75, 5.356071321407137 ], "wc_review_avg": [ 392.75, 121.31235510037715 ], "wc_reply_reviewers_avg": [ 40.75, 35.0526389876711 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5787134501568465686&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "email": "polytechnique.fr;ox.ac.uk;polytechnique.fr", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Ecole Polytechnique;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://www.polytechnique.edu;https://www.ox.ac.uk", "aff_unique_abbr": "X;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "France;United Kingdom" }, { "title": "Particle-based Variational Inference with Generalized Wasserstein Gradient Flow", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70461", "id": "oNuam8eFz2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d1c1f61023dca672117b58f813a12d99-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oNuam8eFz2", "openreview": "https://openreview.net/forum?id=oNuam8eFz2", "poster": "/media/PosterPDFs/NeurIPS%202023/70461.png?t=1701493320.41877", "slides": "https://nips.cc/virtual/2023/poster/70461", "video": "https://nips.cc/virtual/2023/poster/70461", "author_site": "Ziheng Cheng, Shiyue Zhang, Longlin Yu, Cheng Zhang", "tldr": "", "abstract": "Particle-based variational inference methods (ParVIs) such as Stein variational gradient descent (SVGD) update the particles based on the kernelized Wasserstein gradient flow for the Kullback-Leibler (KL) divergence. However, the design of kernels is often non-trivial and can be restrictive for the flexibility of the method. Recent works show that functional gradient flow approximations with quadratic form regularization terms can improve performance. In this paper, we propose a ParVI framework, called generalized Wasserstein gradient descent (GWG), based on a generalized Wasserstein gradient flow of the KL divergence, which can be viewed as a functional gradient method with a broader class of regularizers induced by convex functions. We show that GWG exhibits strong convergence guarantees. We also provide an adaptive version that automatically chooses Wasserstein metric to accelerate convergence. In experiments, we demonstrate the effectiveness and efficiency of the proposed framework on both simulated and real data problems.", "keywords": "Particle-based VI;generalized Wasserstein gradient flow", "primary_area": "", "supplementary_material": "/attachment/11f68bcefd4cc291541323e9315d03c132f1cb34.pdf", "author": "Ziheng Cheng;Shiyue Zhang;Longlin Yu;Cheng Zhang", "authorids": "~Ziheng_Cheng4;~Shiyue_Zhang3;~Longlin_Yu1;~Cheng_Zhang3", "gender": "M;M;M;M", "homepage": "https://alexczh1.github.io/;https://github.com/ShiyueZhang66;https://github.com/longinYu;https://zcrabbit.github.io", "dblp": ";;;", "google_scholar": "M8Hz2NSNe3QC;nu6YfFkAAAAJ;;PddDrLgAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ziheng_Cheng4;~Shiyue_Zhang3;~Longlin_Yu1;~Cheng_Zhang3", "aff": "Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncheng2023particlebased,\ntitle={Particle-based Variational Inference with Generalized Wasserstein Gradient Flow},\nauthor={Ziheng Cheng and Shiyue Zhang and Longlin Yu and Cheng Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oNuam8eFz2}\n}", "github": "", "project": "", "reviewers": "t31F;QMGR;GLuk;Yghj", "pdf_size": 1059698, "rating": "4;5;6;7", "confidence": "3;5;5;3", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;2;3;3", "wc_summary": "48;64;108;144", "wc_strengths": "38;28;52;43", "wc_weaknesses": "132;47;63;38", "wc_questions": "26;18;106;157", "wc_limitations": "1;17;34;6", "wc_review": "245;174;363;388", "wc_reply_reviewers": "10;27;163;24", "wc_reply_authors": "16;13;88;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 91.0, 37.66961640367472 ], "wc_strengths_avg": [ 40.25, 8.671072598012312 ], "wc_weaknesses_avg": [ 70.0, 36.89850945499018 ], "wc_questions_avg": [ 76.75, 57.71210878143338 ], "wc_limitations_avg": [ 14.5, 12.658988901172163 ], "wc_review_avg": [ 292.5, 87.16220511207825 ], "wc_reply_reviewers_avg": [ 56.0, 62.10877554742164 ], "wc_reply_authors_avg": [ 29.25, 34.4483308739335 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3348265229871808639&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Neural Fields with Hard Constraints of Arbitrary Differential Order", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70460", "id": "oO1IreC6Sd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/47547ee84e3fbbcbbbbad7c1fd9a973b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oO1IreC6Sd", "openreview": "https://openreview.net/forum?id=oO1IreC6Sd", "poster": "/media/PosterPDFs/NeurIPS%202023/70460.png?t=1701996110.2368946", "slides": "https://nips.cc/virtual/2023/poster/70460", "video": "https://nips.cc/virtual/2023/poster/70460", "author_site": "Fangcheng Zhong, Kyle Fogarty, Param Hanji, Tianhao Wu, Alejandro Sztrajman, Andrew Spielberg, Andrea Tagliasacchi, Petra Bosilj, Cengiz Oztireli", "tldr": "", "abstract": "While deep learning techniques have become extremely popular for solving a broad range of optimization problems, methods to enforce hard constraints during optimization, particularly on deep neural networks, remain underdeveloped. Inspired by the rich literature on meshless interpolation and its extension to spectral collocation methods in scientific computing, we develop a series of approaches for enforcing hard constraints on neural fields, which we refer to as Constrained Neural Fields (CNF). The constraints can be specified as a linear operator applied to the neural field and its derivatives. We also design specific model representations and training strategies for problems where standard models may encounter difficulties, such as conditioning of the system, memory consumption, and capacity of the network when being constrained. Our approaches are demonstrated in a wide range of real-world applications. Additionally, we develop a framework that enables highly efficient model and constraint specification, which can be readily applied to any downstream task where hard constraints need to be explicitly satisfied during optimization.", "keywords": "neural fields;constrained optimization", "primary_area": "", "supplementary_material": "", "author": "Fangcheng Zhong;Kyle Thomas Fogarty;Param Hanji;Tianhao Walter Wu;Alejandro Sztrajman;Andrew Everett Spielberg;Andrea Tagliasacchi;Petra Bosilj;Cengiz Oztireli", "authorids": "~Fangcheng_Zhong1;~Kyle_Thomas_Fogarty1;~Param_Hanji1;~Tianhao_Walter_Wu1;~Alejandro_Sztrajman1;~Andrew_Everett_Spielberg1;~Andrea_Tagliasacchi2;~Petra_Bosilj1;~Cengiz_Oztireli1", "gender": ";M;M;M;;M;M;F;", "homepage": "https://www.cl.cam.ac.uk/~fz261/;https://kyle-fogarty.github.io;https://www.cl.cam.ac.uk/~pmh64/;https://chikayan.github.io/;;http://www.andrewspielberg.com;http://taiya.github.io;;", "dblp": "253/0188;;274/7377;17/1976-3;;;46/5514;;", "google_scholar": ";yEwwq4EAAAAJ;https://scholar.google.com/citations?view_op=list_works;HwE5K78AAAAJ;;8JeQMMUAAAAJ;1RmD-YsAAAAJ;pycrLqgAAAAJ;", "orcid": ";0000-0002-1888-4006;0000-0002-7985-4177;0000-0002-3807-5839;;;;;", "linkedin": "fangcheng-zhong-125b9a85/;kylefogarty/;;;;;;;", "or_profile": "~Fangcheng_Zhong1;~Kyle_Thomas_Fogarty1;~Param_Hanji1;~Tianhao_Walter_Wu1;~Alejandro_Sztrajman1;~Andrew_Everett_Spielberg1;~Andrea_Tagliasacchi2;~Petra_Bosilj1;~Cengiz_Oztireli1", "aff": "University of Cambridge;Computer Laboratory, University of Cambridge;University of Cambridge;University of Cambridge;;School of Engineering and Applied Sciences, Harvard University;Google DeepMind;University of Lincoln;", "aff_domain": "cam.ac.uk;cl.cam.ac.uk;cam.ac.uk;cam.ac.uk;;seas.harvard.edu;google.com;lincoln.ac.uk;", "position": "Researcher;PhD student;Postdoc;PhD student;;Postdoc;Researcher;Lecturer;", "bibtex": "@inproceedings{\nzhong2023neural,\ntitle={Neural Fields with Hard Constraints of Arbitrary Differential Order},\nauthor={Fangcheng Zhong and Kyle Thomas Fogarty and Param Hanji and Tianhao Walter Wu and Alejandro Sztrajman and Andrew Everett Spielberg and Andrea Tagliasacchi and Petra Bosilj and Cengiz Oztireli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oO1IreC6Sd}\n}", "github": "", "project": "", "reviewers": "t12b;o12o;RAef;rctK", "pdf_size": 40629042, "rating": "4;6;6;7", "confidence": "4;4;3;4", "soundness": "2;4;3;3", "novelty": "2;3;3;3", "presentation": "3;3;2;4", "wc_summary": "65;61;57;172", "wc_strengths": "49;66;126;92", "wc_weaknesses": "185;339;75;61", "wc_questions": "134;16;44;1", "wc_limitations": "40;29;13;1", "wc_review": "473;511;315;327", "wc_reply_reviewers": "323;83;12;0", "wc_reply_authors": "603;376;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 88.75, 48.14755964740061 ], "wc_strengths_avg": [ 83.25, 29.046299247924853 ], "wc_weaknesses_avg": [ 165.0, 111.34630662936243 ], "wc_questions_avg": [ 48.75, 51.58185242893086 ], "wc_limitations_avg": [ 20.75, 14.905955185763842 ], "wc_review_avg": [ 406.5, 86.6530438011268 ], "wc_reply_reviewers_avg": [ 104.5, 130.0778612985315 ], "wc_reply_authors_avg": [ 244.75, 257.5726839165986 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6802005468099916987&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "cam.ac.uk;cl.cam.ac.uk;cam.ac.uk;cam.ac.uk;;seas.harvard.edu;google.com;lincoln.ac.uk;", "author_num": 9, "aff_unique_index": "0;0;0;0;1;2;3", "aff_unique_norm": "University of Cambridge;Harvard University;Google;University of Lincoln", "aff_unique_dep": ";School of Engineering and Applied Sciences;Google DeepMind;", "aff_unique_url": "https://www.cam.ac.uk;https://www.harvard.edu;https://deepmind.com;https://www.lincoln.ac.uk", "aff_unique_abbr": "Cambridge;Harvard;DeepMind;UoL", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Activity Grammars for Temporal Action Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70459", "id": "oOXZ5JEjPb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ee6c4b99b4c0d3d60efd22c1ecdd9891-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oOXZ5JEjPb", "openreview": "https://openreview.net/forum?id=oOXZ5JEjPb", "poster": "/media/PosterPDFs/NeurIPS%202023/70459.png?t=1701430268.1996157", "slides": "https://nips.cc/virtual/2023/poster/70459", "video": "https://nips.cc/virtual/2023/poster/70459", "author_site": "Dayoung Gong, Joonseok Lee, Deunsol Jung, Suha Kwak, Minsu Cho", "tldr": "", "abstract": "Sequence prediction on temporal data requires the ability to understand compositional structures of multi-level semantics beyond individual and contextual properties of parts. The task of temporal action segmentation remains challenging for the reason, aiming at translating an untrimmed activity video into a sequence of action segments. \nThis paper addresses the problem by introducing an effective activity grammar to guide neural predictions for temporal action segmentation. \nWe propose a novel grammar induction algorithm, dubbed KARI, that extracts a powerful context-free grammar from action sequence data. We also develop an efficient generalized parser, dubbed BEP, that transforms frame-level probability distributions into a reliable sequence of actions according to the induced grammar with recursive rules. \nOur approach can be combined with any neural network for temporal action segmentation to enhance the sequence prediction and discover its compositional structure. \nExperimental results demonstrate that our method significantly improves temporal action segmentation in terms of both performance and interpretability on two standard benchmarks, Breakfast and 50 Salads.", "keywords": "neuro-symbolic approach;Temporal action segmentation;grammar", "primary_area": "", "supplementary_material": "", "author": "Dayoung Gong;Joonseok Lee;Deunsol Jung;Suha Kwak;Minsu Cho", "authorids": "~Dayoung_Gong1;~Joonseok_Lee3;~Deunsol_Jung1;~Suha_Kwak3;~Minsu_Cho1", "gender": "F;M;M;M;M", "homepage": "https://gongda0e.github.io/;https://jsleeo424.github.io;https://hesedjds.github.io;https://suhakwak.github.io/;http://cvlab.postech.ac.kr/~mcho/", "dblp": "321/1839;;225/4579;65/6173;", "google_scholar": "https://scholar.google.com/citations?hl=ko;;T5B5N7cAAAAJ;-gscDIEAAAAJ;5TyoF5QAAAAJ", "orcid": ";;;;", "linkedin": "dayoung-gong-11120717a/;jameslee0424/;deunsol-jung-27b737146/;;minsu-cho-062b3750/", "or_profile": "~Dayoung_Gong1;~Joonseok_Lee3;~Deunsol_Jung1;~Suha_Kwak3;~Minsu_Cho1", "aff": "Pohang University of Science and Technology;Pohang University of Science and Technology;Pohang University of Science and Technology;POSTECH;POSTECH", "aff_domain": "postech.ac.kr;postech.edu;postech.ac.kr;postech.ac.kr;postech.ac.kr", "position": "PhD student;MS student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ngong2023activity,\ntitle={Activity Grammars for Temporal Action Segmentation},\nauthor={Dayoung Gong and Joonseok Lee and Deunsol Jung and Suha Kwak and Minsu Cho},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oOXZ5JEjPb}\n}", "github": "", "project": "", "reviewers": "rBwB;FXdk;CeJ9;YXrU", "pdf_size": 11381558, "rating": "5;5;5;5", "confidence": "5;3;3;3", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;2", "wc_summary": "46;104;74;37", "wc_strengths": "43;64;86;35", "wc_weaknesses": "130;102;67;263", "wc_questions": "3;13;18;89", "wc_limitations": "7;7;6;9", "wc_review": "229;290;251;433", "wc_reply_reviewers": "18;0;0;0", "wc_reply_authors": "304;269;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "3;2;1;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 65.25, 26.20472285676763 ], "wc_strengths_avg": [ 57.0, 19.81161275615895 ], "wc_weaknesses_avg": [ 140.5, 74.16367035145981 ], "wc_questions_avg": [ 30.75, 34.06152521540984 ], "wc_limitations_avg": [ 7.25, 1.0897247358851685 ], "wc_review_avg": [ 300.75, 79.41780341963633 ], "wc_reply_reviewers_avg": [ 4.5, 7.794228634059948 ], "wc_reply_authors_avg": [ 143.25, 143.78347436336347 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7801762928218220035&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "postech.ac.kr;postech.edu;postech.ac.kr;postech.ac.kr;postech.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Pohang University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.postech.ac.kr", "aff_unique_abbr": "POSTECH", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Pohang", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "SoundCam: A Dataset for Finding Humans Using Room Acoustics", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73471", "id": "oQSfcVTNr1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a4289154c9209b679ac761a50d5fec3a-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=oQSfcVTNr1", "openreview": "https://openreview.net/forum?id=oQSfcVTNr1", "poster": "/media/PosterPDFs/NeurIPS%202023/73471.png?t=1702348224.845735", "slides": "https://nips.cc/virtual/2023/poster/73471", "video": "https://nips.cc/virtual/2023/poster/73471", "author_site": "Mason Wang, Samuel Clarke, Jui-Hsien Wang, Ruohan Gao, Jiajun Wu", "tldr": "", "abstract": "A room\u2019s acoustic properties are a product of the room\u2019s geometry, the objects within the room, and their specific positions. A room\u2019s acoustic properties can be characterized by its impulse response (RIR) between a source and listener location, or roughly inferred from recordings of natural signals present in the room. Variations in the positions of objects in a room can effect measurable changes in the room\u2019s acoustic properties, as characterized by the RIR. Existing datasets of RIRs either do not systematically vary positions of objects in an environment, or they consist of only simulated RIRs. We present SoundCam, the largest dataset of unique RIRs from in-the-wild rooms publicly released to date. It includes 5,000 10-channel real-world measurements of room impulse responses and 2,000 10-channel recordings of music in three different rooms, including a controlled acoustic lab, an in-the-wild living room, and a conference room, with different humans in positions throughout each room. We show that these measurements can be used for interesting tasks, such as detecting and identifying humans, and tracking their positions.", "keywords": "audio learning;acoustics", "primary_area": "", "supplementary_material": "/attachment/541cf8714940620151db1101c18cbd81239d36c9.zip", "author": "Mason Long Wang;Samuel Clarke;Jui-Hsien Wang;Ruohan Gao;Jiajun Wu", "authorids": "~Mason_Long_Wang1;~Samuel_Clarke1;~Jui-Hsien_Wang1;~Ruohan_Gao2;~Jiajun_Wu1", "gender": "M;;M;M;M", "homepage": "https://masonlwang.com/t;;http://juiwang.com/;https://ruohangao.github.io/;https://jiajunwu.com", "dblp": ";;204/0029;176/5787;117/4768", "google_scholar": "UA0JP1gAAAAJ;;DSUfEqMAAAAJ;i02oEgMAAAAJ;2efgcS0AAAAJ", "orcid": ";;0000-0002-9210-8718;0000-0002-8346-1114;0000-0002-4176-343X", "linkedin": "mason-wang-3b5288104/;;;;jiajunwu/", "or_profile": "~Mason_Long_Wang1;~Samuel_Clarke1;~Jui-Hsien_Wang1;~Ruohan_Gao2;~Jiajun_Wu1", "aff": "Stanford University;;Adobe Research;Stanford University;Stanford University", "aff_domain": "stanford.edu;;adobe.com;cs.stanford.edu;stanford.edu", "position": "MS student;;Researcher;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nwang2023soundcam,\ntitle={SoundCam: A Dataset for Finding Humans Using Room Acoustics},\nauthor={Mason Long Wang and Samuel Clarke and Jui-Hsien Wang and Ruohan Gao and Jiajun Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=oQSfcVTNr1}\n}", "github": "", "project": "", "reviewers": "m2YB;DvPP;JEJV;TBmK;pmuj", "pdf_size": 9912205, "rating": "5;6;7;7;8", "confidence": "3;3;4;3;4", "wc_summary_and_contributions": "134;53;66;83;76", "wc_strengths": "21;28;98;83;127", "wc_improvement": "7;302;11;49;124", "wc_limitations": "105;10;62;43;36", "wc_correctness": "3;35;1;45;37", "wc_clarity": "3;17;1;16;26", "wc_relation_to_prior_work": "14;33;1;20;45", "wc_documentation": "8;87;1;90;45", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "296;566;242;430;517", "wc_reply_reviewers": "0;31;0;68;57", "wc_reply_authors": "855;908;408;256;247", "reply_reviewers": "0;1;0;1;2", "reply_authors": "3;3;1;2;2", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 82.4, 27.702707448911923 ], "wc_strengths_avg": [ 71.4, 40.88324840322745 ], "wc_improvement_avg": [ 98.6, 110.03744817106585 ], "wc_limitations_avg": [ 51.2, 31.644272783554374 ], "wc_correctness_avg": [ 24.2, 18.443427013437606 ], "wc_clarity_avg": [ 12.6, 9.350935782048767 ], "wc_relation_to_prior_work_avg": [ 22.6, 15.21315220458929 ], "wc_documentation_avg": [ 46.2, 37.64784190362045 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 410.2, 124.42250600273248 ], "wc_reply_reviewers_avg": [ 31.2, 28.166646942793882 ], "wc_reply_authors_avg": [ 534.8, 289.2897509418541 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7205766921228922, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8942901567299590964&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "stanford.edu;;adobe.com;cs.stanford.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Stanford University;Adobe", "aff_unique_dep": ";Adobe Research", "aff_unique_url": "https://www.stanford.edu;https://research.adobe.com", "aff_unique_abbr": "Stanford;Adobe", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Analyzing the Sample Complexity of Self-Supervised Image Reconstruction Methods", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70458", "id": "oRn953uhFq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cfaea3a519edf73c3a0480ae8f00bc4e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oRn953uhFq", "openreview": "https://openreview.net/forum?id=oRn953uhFq", "poster": "/media/PosterPDFs/NeurIPS%202023/70458.png?t=1701941459.7596245", "slides": "https://nips.cc/virtual/2023/poster/70458", "video": "https://nips.cc/virtual/2023/poster/70458", "author_site": "Tobit Klug, Dogukan Atik, Reinhard Heckel", "tldr": "", "abstract": "Supervised training of deep neural networks on pairs of clean image and noisy measurement achieves state-of-the-art performance for many image reconstruction tasks, but such training pairs are difficult to collect. Self-supervised methods enable training based on noisy measurements only, without clean images. In this work, we investigate the cost of self-supervised training in terms of sample complexity for a class of self-supervised methods that enable the computation of unbiased estimates of gradients of the supervised loss, including noise2noise methods. We analytically show that a model trained with such self-supervised training is as good as the same model trained in a supervised fashion, but self-supervised training requires more examples than supervised training. We then study self-supervised denoising and accelerated MRI empirically and characterize the cost of self-supervised training in terms of the number of additional samples required, and find that the performance gap between self-supervised and supervised training vanishes as a function of the training examples, at a problem-dependent rate, as predicted by our theory.", "keywords": "image reconstruction;denoising;accelerated MRI;self-supervised;sample complexity", "primary_area": "", "supplementary_material": "", "author": "Tobit Klug;Dogukan Atik;Reinhard Heckel", "authorids": "~Tobit_Klug1;~Dogukan_Atik1;~Reinhard_Heckel1", "gender": "M;;M", "homepage": "https://www.ce.cit.tum.de/mli/people/tobit-klug/;;", "dblp": "330/2923;;81/9668", "google_scholar": ";;ZWV0I7cAAAAJ", "orcid": ";;", "linkedin": ";do%C4%9Fukan-atik-715959157/;", "or_profile": "~Tobit_Klug1;~Dogukan_Atik1;~Reinhard_Heckel1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;Technical University Munich", "aff_domain": "tum.de;tum.de;tum.de", "position": "PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nklug2023analyzing,\ntitle={Analyzing the Sample Complexity of Self-Supervised Image Reconstruction Methods},\nauthor={Tobit Klug and Dogukan Atik and Reinhard Heckel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oRn953uhFq}\n}", "github": "", "project": "", "reviewers": "X8db;8mky;RTiS", "pdf_size": 942733, "rating": "5;5;7", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "2;3;2", "presentation": "3;3;4", "wc_summary": "66;13;80", "wc_strengths": "71;84;41", "wc_weaknesses": "72;131;62", "wc_questions": "60;17;129", "wc_limitations": "43;8;52", "wc_review": "312;253;364", "wc_reply_reviewers": "12;15;13", "wc_reply_authors": "0;28;0", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 53.0, 28.855964143772194 ], "wc_strengths_avg": [ 65.33333333333333, 18.00617178142601 ], "wc_weaknesses_avg": [ 88.33333333333333, 30.44484995674784 ], "wc_questions_avg": [ 68.66666666666667, 46.13265895460852 ], "wc_limitations_avg": [ 34.333333333333336, 18.979521127315678 ], "wc_review_avg": [ 309.6666666666667, 45.345586579315764 ], "wc_reply_reviewers_avg": [ 13.333333333333334, 1.247219128924647 ], "wc_reply_authors_avg": [ 9.333333333333334, 13.199326582148887 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3875852035061531913&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tum.de;tum.de;tum.de", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://www.tum.de", "aff_unique_abbr": "TUM;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Neural Harmonics: Bridging Spectral Embedding and Matrix Completion in Self-Supervised Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70457", "id": "oSYjkJKHZx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bede8c7d5ed2348494d2b0621d613592-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oSYjkJKHZx", "openreview": "https://openreview.net/forum?id=oSYjkJKHZx", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70457", "video": "https://nips.cc/virtual/2023/poster/70457", "author_site": "Marina Munkhoeva, Ivan Oseledets", "tldr": "", "abstract": "Self-supervised methods received tremendous attention thanks to their seemingly heuristic approach to learning representations that respect the semantics of the data without any apparent supervision in the form of labels. A growing body of literature is already being published in an attempt to build a coherent and theoretically grounded understanding of the workings of a zoo of losses used in modern self-supervised representation learning methods. \nIn this paper, we attempt to provide an understanding from the perspective of a Laplace operator and connect the inductive bias stemming from the augmentation process to a low-rank matrix completion problem.\nTo this end, we leverage the results from low-rank matrix completion to provide theoretical analysis on the convergence of modern SSL methods and a key property that affects their downstream performance.", "keywords": "unsupervised learning;self-supervised learning;representation learning;matrix completion", "primary_area": "", "supplementary_material": "", "author": "Marina Munkhoeva;Ivan Oseledets", "authorids": "~Marina_Munkhoeva1;~Ivan_Oseledets1", "gender": ";M", "homepage": ";http://oseledets.github.io", "dblp": ";56/7175", "google_scholar": ";https://scholar.google.ru/citations?user=5kMqBQEAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Marina_Munkhoeva1;~Ivan_Oseledets1", "aff": ";Institute of Numerical Mathematics", "aff_domain": ";inm.ras.ru", "position": ";Researcher", "bibtex": "@inproceedings{\nmunkhoeva2023neural,\ntitle={Neural Harmonics: Bridging Spectral Embedding and Matrix Completion in Self-Supervised Learning},\nauthor={Marina Munkhoeva and Ivan Oseledets},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oSYjkJKHZx}\n}", "github": "", "project": "", "reviewers": "2Q8M;JcEa;LpdS;1mcz", "pdf_size": 386761, "rating": "5;6;7;8", "confidence": "3;4;3;4", "soundness": "3;3;4;4", "novelty": "2;3;4;4", "presentation": "3;3;3;3", "wc_summary": "80;48;193;78", "wc_strengths": "56;63;47;80", "wc_weaknesses": "44;22;18;17", "wc_questions": "45;14;94;1", "wc_limitations": "51;6;11;33", "wc_review": "276;153;363;209", "wc_reply_reviewers": "0;13;13;9", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 99.75, 55.30992225631853 ], "wc_strengths_avg": [ 61.5, 12.093386622447824 ], "wc_weaknesses_avg": [ 25.25, 10.985786271359915 ], "wc_questions_avg": [ 38.5, 35.80851853958776 ], "wc_limitations_avg": [ 25.25, 18.005207580030838 ], "wc_review_avg": [ 250.25, 78.31786194732335 ], "wc_reply_reviewers_avg": [ 8.75, 5.3091901453988255 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4472135954999579, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1360936380804970482&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";inm.ras.ru", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Institute of Numerical Mathematics", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "" }, { "title": "Softmax Output Approximation for Activation Memory-Efficient Training of Attention-based Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70456", "id": "oScaeIibRx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/311257424b6d80e930fc93b224f0a63e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oScaeIibRx", "openreview": "https://openreview.net/forum?id=oScaeIibRx", "poster": "/media/PosterPDFs/NeurIPS%202023/70456.png?t=1701410696.444472", "slides": "https://nips.cc/virtual/2023/poster/70456", "video": "https://nips.cc/virtual/2023/poster/70456", "author_site": "Changhyeon Lee, Seulki Lee", "tldr": "", "abstract": "In this paper, we propose to approximate the softmax output, which is the key product of the attention mechanism, to reduce its activation memory usage when training attention-based networks (aka Transformers). During the forward pass of the network, the proposed softmax output approximation method stores only a small fraction of the entire softmax output required for back-propagation and evicts the rest of the softmax output from memory. Then, during the backward pass, the evicted softmax activation output is approximated to compose the gradient to perform back-propagation for model training. Considering most attention-based models heavily rely on the softmax-based attention module that usually takes one of the biggest portions of the network, approximating the softmax activation output can be a simple yet effective way to decrease the training memory requirement of many attention-based networks. The experiment with various attention-based models and relevant tasks, i.e., machine translation, text classification, and sentiment analysis, shows that it curtails the activation memory usage of the softmax-based attention module by up to 84% (6.2\u00d7 less memory) in model training while achieving comparable or better performance, e.g., up to 5.4% higher classification accuracy.", "keywords": "Memory efficient;Activation saving memory;NLP;Transformer", "primary_area": "", "supplementary_material": "", "author": "Changhyeon Lee;Seulki Lee", "authorids": "~Changhyeon_Lee1;~Seulki_Lee1", "gender": "M;M", "homepage": "https://github.com/2changhyeon;https://sites.google.com/view/seulkilee", "dblp": ";19/1764-2", "google_scholar": "https://scholar.google.co.kr/citations?user=hKS77mYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0009-0004-7162-0845", "linkedin": ";seulki-lee-0b9997a5/", "or_profile": "~Changhyeon_Lee1;~Seulki_Lee1", "aff": "Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology", "aff_domain": "unist.ac.kr;unist.ac.kr", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nlee2023softmax,\ntitle={Softmax Output Approximation for Activation Memory-Efficient Training of Attention-based Networks},\nauthor={Changhyeon Lee and Seulki Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oScaeIibRx}\n}", "github": "", "project": "", "reviewers": "UEEi;jkF6;fWPe;8HU2;s2ms;r1xy", "pdf_size": 931063, "rating": "4;5;5;7;7;8", "confidence": "4;3;4;5;4;3", "soundness": "2;3;3;3;4;4", "novelty": "3;3;3;3;3;4", "presentation": "2;3;3;3;4;4", "wc_summary": "111;146;55;69;56;27", "wc_strengths": "62;29;23;78;43;22", "wc_weaknesses": "626;64;33;214;44;6", "wc_questions": "18;182;117;197;80;28", "wc_limitations": "9;10;18;31;26;1", "wc_review": "826;431;246;589;249;84", "wc_reply_reviewers": "650;0;0;89;0;10", "wc_reply_authors": "453;0;0;103;0;43", "reply_reviewers": "1;0;0;1;0;1", "reply_authors": "2;1;1;2;1;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.8333333333333335, 0.6871842709362768 ], "soundness_avg": [ 3.1666666666666665, 0.6871842709362768 ], "novelty_avg": [ 3.1666666666666665, 0.3726779962499649 ], "presentation_avg": [ 3.1666666666666665, 0.6871842709362768 ], "wc_summary_avg": [ 77.33333333333333, 39.592367390136644 ], "wc_strengths_avg": [ 42.833333333333336, 20.907866674744437 ], "wc_weaknesses_avg": [ 164.5, 216.97446086271688 ], "wc_questions_avg": [ 103.66666666666667, 69.10057082896557 ], "wc_limitations_avg": [ 15.833333333333334, 10.318537794776072 ], "wc_review_avg": [ 404.1666666666667, 246.45041196061237 ], "wc_reply_reviewers_avg": [ 124.83333333333333, 237.00240270699553 ], "wc_reply_authors_avg": [ 99.83333333333333, 162.2070864324025 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3510650466495127852&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "unist.ac.kr;unist.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Ulsan National Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.unist.ac.kr", "aff_unique_abbr": "UNIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Learning Cuts via Enumeration Oracles", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70455", "id": "oU4QHdcIWW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fa0126bb7ebad258bf4ffdbbac2dd787-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oU4QHdcIWW", "openreview": "https://openreview.net/forum?id=oU4QHdcIWW", "poster": "/media/PosterPDFs/NeurIPS%202023/70455.png?t=1702240014.6745172", "slides": "https://nips.cc/virtual/2023/poster/70455", "video": "https://nips.cc/virtual/2023/poster/70455", "author_site": "Daniel Thuerck, Boro Sofranac, Marc E Pfetsch, Sebastian Pokutta", "tldr": "", "abstract": "Cutting-planes are one of the most important building blocks for solving large-scale integer programming (IP) problems to (near) optimality. The majority of cutting plane approaches rely on explicit rules to derive valid inequalities that can separate the target point from the feasible set. Local cuts, on the other hand, seek to directly derive the facets of the underlying polyhedron and use them as cutting planes. However, current approaches rely on solving Linear Programming (LP) problems in order to derive such a hyperplane. In this paper, we present a novel generic approach for learning the facets of the underlying polyhedron by accessing it implicitly via an enumeration oracle in a reduced dimension. This is achieved by embedding the oracle in a variant of the Frank-Wolfe algorithm which is capable of generating strong cutting planes, effectively turning the enumeration oracle into a separation oracle. We demonstrate the effectiveness of our approach with a case study targeting the multidimensional knapsack problem (MKP).", "keywords": "Integer Programming;Cutting Planes;Optimization", "primary_area": "", "supplementary_material": "/attachment/7147be49306d95181a983ccdba26e7e9f9999696.zip", "author": "Daniel Thuerck;Boro Sofranac;Marc Pfetsch;Sebastian Pokutta", "authorids": "~Daniel_Thuerck2;~Boro_Sofranac1;~Marc_Pfetsch1;~Sebastian_Pokutta1", "gender": "M;M;M;M", "homepage": "https://www.culip.org;http://www.sofranac.me/;https://www2.mathematik.tu-darmstadt.de/~pfetsch/index.en.html;http://www.pokutta.com", "dblp": ";;48/1761;75/7718", "google_scholar": ";;;", "orcid": ";0000-0003-2252-9469;0000-0002-0947-7193;", "linkedin": ";;;", "or_profile": "~Daniel_Thuerck2;~Boro_Sofranac1;~Marc_Pfetsch1;~Sebastian_Pokutta1", "aff": ";Technische Universit\u00e4t Berlin;Technische Universit\u00e4t Darmstadt;TU Berlin", "aff_domain": ";tu-berlin.de;tu-darmstadt.de;tu-berlin.de", "position": ";PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nthuerck2023learning,\ntitle={Learning Cuts via Enumeration Oracles},\nauthor={Daniel Thuerck and Boro Sofranac and Marc Pfetsch and Sebastian Pokutta},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oU4QHdcIWW}\n}", "github": "", "project": "", "reviewers": "bBeY;cm37;avng;5Xhs;XrPs;kHh3", "pdf_size": 465678, "rating": "3;5;6;6;7;7", "confidence": "4;3;3;4;4;4", "soundness": "2;2;4;3;3;3", "novelty": "2;2;2;3;4;3", "presentation": "2;3;4;4;4;4", "wc_summary": "117;163;69;249;61;56", "wc_strengths": "5;103;24;44;57;90", "wc_weaknesses": "5;165;31;204;179;93", "wc_questions": "10;102;46;255;67;394", "wc_limitations": "5;34;1;1;1;1", "wc_review": "142;567;171;753;365;634", "wc_reply_reviewers": "245;103;12;94;38;0", "wc_reply_authors": "158;25;0;51;20;0", "reply_reviewers": "2;1;1;1;1;0", "reply_authors": "2;2;1;2;2;1", "rating_avg": [ 5.666666666666667, 1.3743685418725535 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "novelty_avg": [ 2.6666666666666665, 0.7453559924999299 ], "presentation_avg": [ 3.5, 0.7637626158259734 ], "wc_summary_avg": [ 119.16666666666667, 69.12890535481922 ], "wc_strengths_avg": [ 53.833333333333336, 34.41616222390611 ], "wc_weaknesses_avg": [ 112.83333333333333, 75.4197513181321 ], "wc_questions_avg": [ 145.66666666666666, 135.43592170797558 ], "wc_limitations_avg": [ 7.166666666666667, 12.088791870525727 ], "wc_review_avg": [ 438.6666666666667, 230.40302852947823 ], "wc_reply_reviewers_avg": [ 82.0, 82.37515806762781 ], "wc_reply_authors_avg": [ 42.333333333333336, 54.5242046149129 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.08574929257125441, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=517902807673181510&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": ";tu-berlin.de;tu-darmstadt.de;tu-berlin.de", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Technische Universit\u00e4t Berlin;Technische Universit\u00e4t Darmstadt", "aff_unique_dep": ";", "aff_unique_url": "https://www.tu-berlin.de;https://www.tu-darmstadt.de", "aff_unique_abbr": "TU Berlin;TUD", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berlin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Follow-ups Also Matter: Improving Contextual Bandits via Post-serving Contexts", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70454", "id": "oaCDiKoJ2w", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/29f47df77b7e536ebd0fe5e0cc964a32-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oaCDiKoJ2w", "openreview": "https://openreview.net/forum?id=oaCDiKoJ2w", "poster": "/media/PosterPDFs/NeurIPS%202023/70454.png?t=1697784149.3531094", "slides": "https://nips.cc/virtual/2023/poster/70454", "video": "https://nips.cc/virtual/2023/poster/70454", "author_site": "Chaoqi Wang, Ziyu Ye, Zhe Feng, Ashwinkumar Badanidiyuru Varadaraja, Haifeng Xu", "tldr": "", "abstract": "Standard contextual bandit problem assumes that all the relevant contexts are observed before the algorithm chooses an arm. This modeling paradigm, while useful, often falls short when dealing with problems in which additional valuable contexts can be observed after arm selection. For example, content recommendation platforms like Youtube, Instagram, Tiktok receive much additional features about a user's reward after the user clicks a content (e.g., how long the user stayed, what is the user's watch speed, etc.). To improve online learning efficiency in these applications, we study a novel contextual bandit problem with post-serving contexts and design a new algorithm, poLinUCB, that achieves tight regret under standard assumptions. Core to our technical proof is a robustified and generalized version of the well-known Elliptical Potential Lemma (EPL), which can accommodate noise in data. Such robustification is necessary for tackling our problem, though we believe it could also be of general interest.\nExtensive empirical tests on both synthetic and real-world datasets demonstrate the significant benefit of utilitzing post-serving contexts as well as the superior performance of our algorithm over the state-of-the-art approaches.", "keywords": "linear stochastic bandits;online learning;partial information;contextual bandits", "primary_area": "", "supplementary_material": "", "author": "Chaoqi Wang;Ziyu Ye;Zhe Feng;Ashwinkumar Badanidiyuru;Haifeng Xu", "authorids": "~Chaoqi_Wang1;~Ziyu_Ye1;~Zhe_Feng3;~Ashwinkumar_Badanidiyuru1;~Haifeng_Xu1", "gender": "M;;M;M;M", "homepage": "https://alecwangcq.github.io;https://hazelye-bot.github.io/;https://scholar.harvard.edu/zfeng/home;https://sites.google.com/site/ashwinkumarbv/home;http://www.haifeng-xu.com/", "dblp": "210/1073;;36/1508-4;15/2638;04/1895", "google_scholar": "https://scholar.google.ca/citations?user=yN2iRpwAAAAJ;S2da4LUAAAAJ;MKbTrgIAAAAJ;HhValEMAAAAJ;nLgg388AAAAJ", "orcid": ";0000-0002-0078-6758;;;", "linkedin": ";;;ashwinkumar-badanidiyuru-varadaraja-42b9832b/;", "or_profile": "~Chaoqi_Wang1;~Ziyu_Ye1;~Zhe_Feng3;~Ashwinkumar_Badanidiyuru1;~Haifeng_Xu1", "aff": "University of Chicago;University of Chicago;Google;Google;University of Chicago", "aff_domain": "uchicago.edu;uchicago.edu;google.com;google.com;cs.uchicago.edu", "position": "PhD student;PhD student;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nwang2023followups,\ntitle={Follow-ups Also Matter: Improving Contextual Bandits via Post-serving Contexts},\nauthor={Chaoqi Wang and Ziyu Ye and Zhe Feng and Ashwinkumar Badanidiyuru and Haifeng Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oaCDiKoJ2w}\n}", "github": "", "project": "", "reviewers": "8bhs;BiVL;uygu;3eSw;FTfP", "pdf_size": 2712938, "rating": "6;6;7;7;8", "confidence": "4;3;3;2;3", "soundness": "3;3;3;3;4", "novelty": "2;2;4;3;4", "presentation": "3;1;3;4;4", "wc_summary": "119;63;130;51;196", "wc_strengths": "66;63;61;122;158", "wc_weaknesses": "352;74;8;50;103", "wc_questions": "60;64;111;91;33", "wc_limitations": "8;3;67;30;6", "wc_review": "605;267;377;344;496", "wc_reply_reviewers": "42;13;42;0;20", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 3.0, 1.0954451150103321 ], "wc_summary_avg": [ 111.8, 52.05919707410017 ], "wc_strengths_avg": [ 94.0, 39.27849284277593 ], "wc_weaknesses_avg": [ 117.4, 121.35831244706725 ], "wc_questions_avg": [ 71.8, 26.873034811870433 ], "wc_limitations_avg": [ 22.8, 24.07820591323199 ], "wc_review_avg": [ 417.8, 119.17281569217033 ], "wc_reply_reviewers_avg": [ 23.4, 16.487571076419957 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12568018752701522438&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 9, "email": "uchicago.edu;uchicago.edu;google.com;google.com;cs.uchicago.edu", "author_num": 5, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "University of Chicago;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.uchicago.edu;https://www.google.com", "aff_unique_abbr": "UChicago;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "TFLEX: Temporal Feature-Logic Embedding Framework for Complex Reasoning over Temporal Knowledge Graph", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70453", "id": "oaGdsgB18L", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e71a42c64851834013e2658b69d7fe93-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oaGdsgB18L", "openreview": "https://openreview.net/forum?id=oaGdsgB18L", "poster": "/media/PosterPDFs/NeurIPS%202023/70453.png?t=1701020182.3172169", "slides": "https://nips.cc/virtual/2023/poster/70453", "video": "https://nips.cc/virtual/2023/poster/70453", "author_site": "Xueyuan Lin, Haihong E, Chengjin Xu, Gengxian Zhou, Haoran Luo, Tianyi Hu, Fenglong Su, Ningyuan Li, Mingzhi Sun", "tldr": "", "abstract": "Multi-hop logical reasoning over knowledge graph plays a fundamental role in many artificial intelligence tasks. Recent complex query embedding methods for reasoning focus on static KGs, while temporal knowledge graphs have not been fully explored. Reasoning over TKGs has two challenges: 1. The query should answer entities or timestamps; 2. The operators should consider both set logic on entity set and temporal logic on timestamp set.\n\nTo bridge this gap, we introduce the multi-hop logical reasoning problem on TKGs and then propose the first temporal complex query embedding named Temporal Feature-Logic Embedding framework (TFLEX) to answer the temporal complex queries. Specifically, we utilize fuzzy logic to compute the logic part of the Temporal Feature-Logic embedding, thus naturally modeling all first-order logic operations on the entity set. In addition, we further extend fuzzy logic on timestamp set to cope with three extra temporal operators (**After**, **Before** and **Between**).\n\nExperiments on numerous query patterns demonstrate the effectiveness of our method.", "keywords": "Temporal Knowledge Graph Reasoning;Temporal Knowledge Graph Embedding;Temporal Knowledge Graph;Temporal Logic;Knowledge Graph Reasoning;Knowledge Graph Embedding;Knowledge Graph;Machine Learning", "primary_area": "", "supplementary_material": "/attachment/d164eff5237ebda69cfee2a1b6c2f87938828637.zip", "author": "Xueyuan Lin;Haihong E;Chengjin Xu;Gengxian Zhou;Haoran Luo;Tianyi Hu;Fenglong Su;Ningyuan Li;Mingzhi Sun", "authorids": "~Xueyuan_Lin1;~Haihong_E1;~Chengjin_Xu1;~Gengxian_Zhou1;~Haoran_Luo1;~Tianyi_Hu2;~Fenglong_Su1;~Ningyuan_Li2;~Mingzhi_Sun1", "gender": "M;F;M;M;M;M;M;M;M", "homepage": "https://github.com/LinXueyuanStdio;https://teacher.bupt.edu.cn/ehaihong/zh_CN/index.htm;https://soledad921.github.io/chengjin_xu/;;https://lhrlab.github.io/;https://github.com/TianYi2000;;;http://none", "dblp": "184/6525;43/10222.html;247/6268.html;;227/5902-1.html;;205/0212.html;183/6738;299/7222", "google_scholar": "2_ssNsIAAAAJ;https://scholar.google.com.hk/citations?user=J4akh64AAAAJ;https://scholar.google.de/citations?user=sIts5VgAAAAJ;;https://scholar.google.com.hk/citations?user=Q9Nv9mcAAAAJ;;;D5Oz9T8AAAAJ;", "orcid": "0000-0002-8489-7796;;;0000-0002-4660-8279;0000-0003-2727-0361;;;0009-0003-4981-8268;", "linkedin": ";;;;haoran-luo-88a96b255/;;;https://www.linkedin.cn/incareer/in/%E6%B3%9E%E5%8E%9F-%E6%9D%8E-18950823a;", "or_profile": "~Xueyuan_Lin1;~Haihong_E1;~Chengjin_Xu1;~Gengxian_Zhou1;~Haoran_Luo1;~Tianyi_Hu2;~Fenglong_Su1;~Ningyuan_Li2;~Mingzhi_Sun1", "aff": "Beijing University of Post and Telecommunication;Beijing University of Post and Telecommunication;University of Bonn;;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications", "aff_domain": "bupt.edu.cn;bupt.edu.cn;uni-bonn.de;;bupt.edu.cn;bupt.edu.cn;;bupt.edu.cn;bupt.edu.cn", "position": "MS student;Full Professor;PhD student;;PhD student;MS student;;MS student;PhD student", "bibtex": "@inproceedings{\nlin2023tflex,\ntitle={{TFLEX}: Temporal Feature-Logic Embedding Framework for Complex Reasoning over Temporal Knowledge Graph},\nauthor={Xueyuan Lin and Haihong E and Chengjin Xu and Gengxian Zhou and Haoran Luo and Tianyi Hu and Fenglong Su and Ningyuan Li and Mingzhi Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oaGdsgB18L}\n}", "github": "", "project": "", "reviewers": "Zrqw;NJDL;SQqW;BpBt;AEhV", "pdf_size": 5150664, "rating": "5;6;6;7;7", "confidence": "3;3;3;4;3", "soundness": "2;2;3;4;3", "novelty": "3;3;3;4;3", "presentation": "2;2;2;3;3", "wc_summary": "181;132;192;107;73", "wc_strengths": "34;114;21;117;23", "wc_weaknesses": "462;455;27;75;50", "wc_questions": "5;134;52;95;22", "wc_limitations": "12;27;22;32;1", "wc_review": "694;862;314;426;169", "wc_reply_reviewers": "150;25;0;0;0", "wc_reply_authors": "239;766;0;0;0", "reply_reviewers": "2;1;0;0;0", "reply_authors": "3;3;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 137.0, 44.68109219793089 ], "wc_strengths_avg": [ 61.8, 44.079019952807485 ], "wc_weaknesses_avg": [ 213.8, 200.38502938093953 ], "wc_questions_avg": [ 61.6, 47.37341026356452 ], "wc_limitations_avg": [ 18.8, 11.08873302050329 ], "wc_review_avg": [ 493.0, 252.2173665709798 ], "wc_reply_reviewers_avg": [ 35.0, 58.309518948453004 ], "wc_reply_authors_avg": [ 201.0, 297.2783207702842 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.5345224838248487, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14389841437173672665&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "bupt.edu.cn;bupt.edu.cn;uni-bonn.de;;bupt.edu.cn;bupt.edu.cn;;bupt.edu.cn;bupt.edu.cn", "author_num": 9, "aff_unique_index": "0;0;1;0;0;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;University of Bonn", "aff_unique_dep": ";", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.uni-bonn.de/", "aff_unique_abbr": "BUPT;UBonn", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;Germany" }, { "title": "FGPrompt: Fine-grained Goal Prompting for Image-goal Navigation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70452", "id": "oaJEB5Qcia", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/27c4e15d9af120d7fef04432c7db577f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oaJEB5Qcia", "openreview": "https://openreview.net/forum?id=oaJEB5Qcia", "poster": "/media/PosterPDFs/NeurIPS%202023/70452.png?t=1702003229.95901", "slides": "https://nips.cc/virtual/2023/poster/70452", "video": "https://nips.cc/virtual/2023/poster/70452", "author_site": "Xinyu Sun, Peihao Chen, Jugang Fan, Jian Chen, Thomas Li, Mingkui Tan", "tldr": "", "abstract": "Learning to navigate to an image-specified goal is an important but challenging task for autonomous systems like household robots. The agent is required to well understand and reason the location of the navigation goal from a picture shot in the goal position. Existing methods try to solve this problem by learning a navigation policy, which captures semantic features of the goal image and observation image independently and lastly fuses them for predicting a sequence of navigation actions. However, these methods suffer from two major limitations. 1) They may miss detailed information in the goal image, and thus fail to reason the goal location. 2) More critically, it is hard to focus on the goal-relevant regions in the observation image, because they attempt to understand observation without goal conditioning. In this paper, we aim to overcome these limitations by designing a Fine-grained Goal Prompting (\\sexyname) method for image-goal navigation. In particular, we leverage fine-grained and high-resolution feature maps in the goal image as prompts to perform conditioned embedding, which preserves detailed information in the goal image and guides the observation encoder to pay attention to goal-relevant regions. Compared with existing methods on the image-goal navigation benchmark, our method brings significant performance improvement on 3 benchmark datasets (\\textit{i.e.,} Gibson, MP3D, and HM3D). Especially on Gibson, we surpass the state-of-the-art success rate by 8\\% with only 1/50 model size.", "keywords": "Visual Navigation;Image-Goal Navigation;Embodied AI", "primary_area": "", "supplementary_material": "/attachment/bcd7e29891682dc70e985e8c28853e7632133ca2.pdf", "author": "Xinyu Sun;Peihao Chen;Jugang Fan;Jian Chen;Thomas H. Li;Mingkui Tan", "authorids": "~Xinyu_Sun1;~Peihao_Chen1;~Jugang_Fan1;~Jian_Chen7;~Thomas_H._Li3;~Mingkui_Tan2", "gender": "M;M;;;M;M", "homepage": "https://github.com/XinyuSun;https://peihaochen.github.io/;https://github.com/felixfjg;;http://pku.edu.cn;https://tanmingkui.github.io/", "dblp": ";249/8975;;;213/4037;49/2007", "google_scholar": "ALq8sMgAAAAJ;KkpEXpsAAAAJ;;;;https://scholar.google.com.sg/citations?user=EVsoTGkAAAAJ", "orcid": ";0000-0002-6847-1621;;0000-0003-4769-1526;;0000-0001-8856-756X", "linkedin": ";;;;;", "or_profile": "~Xinyu_Sun1;~Peihao_Chen1;~Jugang_Fan1;~Jian_Chen7;~Thomas_H._Li3;~Mingkui_Tan1", "aff": "South China University of Technology;South China University of Technology;Hunan University;South China University of Technology;AIIT, Peking University;South China University of Technology", "aff_domain": "scut.edu.cn;scut.edu.cn;hnu.edu.cn;scut.edu.cn;aiit.org.cn;scut.edu.cn", "position": "MS student;PhD student;Undergrad student;Full Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nsun2023fgprompt,\ntitle={{FGP}rompt: Fine-grained Goal Prompting for Image-goal Navigation},\nauthor={Xinyu Sun and Peihao Chen and Jugang Fan and Jian Chen and Thomas H. Li and Mingkui Tan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oaJEB5Qcia}\n}", "github": "", "project": "", "reviewers": "CU6j;mycq;9djz;o71w", "pdf_size": 1473607, "rating": "5;7;7;7", "confidence": "4;4;5;3", "soundness": "3;3;4;3", "novelty": "3;3;2;3", "presentation": "3;3;2;2", "wc_summary": "68;77;87;60", "wc_strengths": "54;29;59;27", "wc_weaknesses": "64;92;140;99", "wc_questions": "166;57;111;170", "wc_limitations": "7;4;59;1", "wc_review": "359;259;456;357", "wc_reply_reviewers": "32;59;59;11", "wc_reply_authors": "18;6;0;5", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 73.0, 10.074720839804943 ], "wc_strengths_avg": [ 42.25, 14.376630342329875 ], "wc_weaknesses_avg": [ 98.75, 27.178806081209675 ], "wc_questions_avg": [ 126.0, 46.15733961137708 ], "wc_limitations_avg": [ 17.75, 23.909987452945266 ], "wc_review_avg": [ 357.75, 69.6540558761656 ], "wc_reply_reviewers_avg": [ 40.25, 20.16649448962313 ], "wc_reply_authors_avg": [ 7.25, 6.609652033201143 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10155596198166524680&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "scut.edu.cn;scut.edu.cn;hnu.edu.cn;scut.edu.cn;aiit.org.cn;scut.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "South China University of Technology;Hunan University;Peking University", "aff_unique_dep": ";;AIIT", "aff_unique_url": "https://www.scut.edu.cn;http://www.hunu.edu.cn/;http://www.pku.edu.cn", "aff_unique_abbr": "SCUT;HNU;PKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SQ Lower Bounds for Learning Mixtures of Linear Classifiers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70451", "id": "obCNIzeSrg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/571fcbd2ad4273d9df51d7abc1172112-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=obCNIzeSrg", "openreview": "https://openreview.net/forum?id=obCNIzeSrg", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70451", "video": "https://nips.cc/virtual/2023/poster/70451", "author_site": "Ilias Diakonikolas, Daniel Kane, Yuxin Sun", "tldr": "", "abstract": "We study the problem of learning mixtures of linear classifiers under Gaussian covariates.\nGiven sample access to a mixture of $r$ distributions on $\\mathbb{R}^n$ of the form $(\\mathbf{x},y_{\\ell})$, $\\ell \\in [r]$,\nwhere $\\mathbf{x}\\sim\\mathcal{N}(0,\\mathbf{I}_n)$ and\n\n$y_\\ell=\\mathrm{sign}(\\langle\\mathbf{v}_{\\ell},\\mathbf{x}\\rangle)$\n\nfor an unknown unit vector $\\mathbf{v}_{\\ell}$,\n\nthe goal is to learn the underlying distribution in total variation distance. Our main result is a Statistical Query (SQ) lower bound suggesting that known algorithms for this problem are essentially best possible,\neven for the special case of uniform mixtures.\nIn particular, we show that the complexity of any SQ algorithm for the problem is $n^{\\mathrm{poly}(1/\\Delta) \\log(r)}$,\nwhere $\\Delta$ is a lower bound on the pairwise $\\ell_2$-separation between the $\\mathbf{v}_{\\ell}$'s.\nThe key technical ingredient underlying our result is a new construction of spherical designs on the unit sphere that may be of independent interest.", "keywords": "mixtures models;linear classifier;Statistical Query model;spherical designs", "primary_area": "", "supplementary_material": "/attachment/3b3f4f664e4c1146aceffe9199049be7c6b6d5e7.pdf", "author": "Ilias Diakonikolas;Daniel Kane;Yuxin Sun", "authorids": "~Ilias_Diakonikolas1;~Daniel_Kane1;~Yuxin_Sun2", "gender": "M;M;M", "homepage": "http://www.iliasdiakonikolas.org/;http://cseweb.ucsd.edu/~dakane/;https://pages.cs.wisc.edu/~yxsun/", "dblp": "d/IliasDiakonikolas;52/6817;", "google_scholar": "Vb3FLmkAAAAJ;https://scholar.google.com.tw/citations?user=DulpV-cAAAAJ;8VuomNgAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ilias_Diakonikolas1;~Daniel_Kane1;~Yuxin_Sun2", "aff": "University of Wisconsin, Madison;University of California, San Diego;Department of Computer Science, University of Wisconsin, Madison", "aff_domain": "wisc.edu;ucsd.edu;cs.wisc.edu", "position": "Associate Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\ndiakonikolas2023sq,\ntitle={{SQ} Lower Bounds for Learning Mixtures of Linear Classifiers},\nauthor={Ilias Diakonikolas and Daniel Kane and Yuxin Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=obCNIzeSrg}\n}", "github": "", "project": "", "reviewers": "n8Pi;TFYT;Jk98;Azis;vDTe", "pdf_size": 360816, "rating": "5;5;6;7;7", "confidence": "3;2;2;3;3", "soundness": "3;3;4;4;3", "novelty": "2;3;3;3;3", "presentation": "4;2;4;3;3", "wc_summary": "55;60;96;170;190", "wc_strengths": "25;53;59;60;23", "wc_weaknesses": "37;160;44;27;16", "wc_questions": "43;47;22;93;88", "wc_limitations": "1;1;1;54;1", "wc_review": "161;321;222;404;318", "wc_reply_reviewers": "26;51;0;115;15", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 114.2, 55.91565076076643 ], "wc_strengths_avg": [ 44.0, 16.516658257650064 ], "wc_weaknesses_avg": [ 56.8, 52.45722066598648 ], "wc_questions_avg": [ 58.6, 27.441574298862665 ], "wc_limitations_avg": [ 11.6, 21.200000000000003 ], "wc_review_avg": [ 285.2, 84.72402256739231 ], "wc_reply_reviewers_avg": [ 41.4, 40.39108812597155 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.45643546458763845, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1491779993555941619&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "wisc.edu;ucsd.edu;cs.wisc.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Wisconsin;University of California, San Diego;University of Wisconsin-Madison", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": "https://www.wisc.edu;https://www.ucsd.edu;https://www.wisc.edu", "aff_unique_abbr": "UW;UCSD;UW-Madison", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Madison;San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "FedGCN: Convergence-Communication Tradeoffs in Federated Training of Graph Convolutional Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70450", "id": "ody3RBUuJS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fc07feae9af49dd3f1a1e049b77f4e17-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ody3RBUuJS", "openreview": "https://openreview.net/forum?id=ody3RBUuJS", "poster": "/media/PosterPDFs/NeurIPS%202023/70450.png?t=1701976797.9404993", "slides": "https://nips.cc/virtual/2023/poster/70450", "video": "https://nips.cc/virtual/2023/poster/70450", "author_site": "Yuhang Yao, Weizhao Jin, Srivatsan Ravi, Carlee Joe-Wong", "tldr": "", "abstract": "Methods for training models on graphs distributed across multiple clients have recently grown in popularity, due to the size of these graphs as well as regulations on keeping data where it is generated. However, the cross-client edges naturally exist among clients. Thus, distributed methods for training a model on a single graph incur either significant communication overhead between clients or a loss of available information to the training. We introduce the Federated Graph Convolutional Network (FedGCN) algorithm, which uses federated learning to train GCN models for semi-supervised node classification with fast convergence and little communication. Compared to prior methods that require extra communication among clients at each training round, FedGCN clients only communicate with the central server in one pre-training step, greatly reducing communication costs and allowing the use of homomorphic encryption to further enhance privacy. We theoretically analyze the tradeoff between FedGCN's convergence rate and communication cost under different data distributions. Experimental results show that our FedGCN algorithm achieves better model accuracy with 51.7\\% faster convergence on average and at least 100$\\times$ less communication compared to prior work.", "keywords": "Federated Graph Learning;", "primary_area": "", "supplementary_material": "/attachment/dea06a862fd7ddac8c24d6bce51a582b5943eaa8.zip", "author": "Yuhang Yao;Weizhao Jin;Srivatsan Ravi;Carlee Joe-Wong", "authorids": "~Yuhang_Yao2;~Weizhao_Jin1;~Srivatsan_Ravi1;~Carlee_Joe-Wong1", "gender": ";;;F", "homepage": "https://www.andrew.cmu.edu/user/yuhangya/;https://weizhaojin.netlify.app/;https://sites.usc.edu/srivatsr/;https://www.andrew.cmu.edu/user/cjoewong/", "dblp": "203/0159;266/5162;;40/9937.html", "google_scholar": "oQIV0BoAAAAJ;I4E46yEAAAAJ;;XEztdZgAAAAJ", "orcid": "0000-0002-7045-0002;0000-0003-3269-5823;;", "linkedin": "yuhang-yao/;;;", "or_profile": "~Yuhang_Yao2;~Weizhao_Jin1;~Srivatsan_Ravi1;~Carlee_Joe-Wong1", "aff": "Carnegie Mellon University;University of Southern California;University of Southern California;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;usc.edu;usc.edu;cmu.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nyao2023fedgcn,\ntitle={Fed{GCN}: Convergence-Communication Tradeoffs in Federated Training of Graph Convolutional Networks},\nauthor={Yuhang Yao and Weizhao Jin and Srivatsan Ravi and Carlee Joe-Wong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ody3RBUuJS}\n}", "github": "", "project": "", "reviewers": "BmP4;d6pK;PXRJ;z8YN", "pdf_size": 697434, "rating": "6;6;6;7", "confidence": "3;3;4;3", "soundness": "2;3;2;4", "novelty": "2;3;2;3", "presentation": "2;3;3;4", "wc_summary": "144;78;90;125", "wc_strengths": "28;29;54;82", "wc_weaknesses": "24;222;324;79", "wc_questions": "48;119;6;83", "wc_limitations": "26;2;1;1", "wc_review": "270;450;475;370", "wc_reply_reviewers": "13;20;428;93", "wc_reply_authors": "36;26;1639;113", "reply_reviewers": "1;1;3;1", "reply_authors": "2;2;5;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 109.25, 26.47050245084139 ], "wc_strengths_avg": [ 48.25, 22.094965489902897 ], "wc_weaknesses_avg": [ 162.25, 118.08550927188314 ], "wc_questions_avg": [ 64.0, 41.850925915683156 ], "wc_limitations_avg": [ 7.5, 10.688779163215974 ], "wc_review_avg": [ 391.25, 80.02929151254558 ], "wc_reply_reviewers_avg": [ 138.5, 170.05366799925253 ], "wc_reply_authors_avg": [ 453.5, 685.2760392717668 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 78, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3835880075013408732&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 9, "email": "andrew.cmu.edu;usc.edu;usc.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Carnegie Mellon University;University of Southern California", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.usc.edu", "aff_unique_abbr": "CMU;USC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Demystifying Structural Disparity in Graph Neural Networks: Can One Size Fit All?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70449", "id": "oef30oScVB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/74f1edadbdf495e7258ee8db7b1d3acd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oef30oScVB", "openreview": "https://openreview.net/forum?id=oef30oScVB", "poster": "/media/PosterPDFs/NeurIPS%202023/70449.png?t=1699921881.0836067", "slides": "https://nips.cc/virtual/2023/poster/70449", "video": "https://nips.cc/virtual/2023/poster/70449", "author_site": "Haitao Mao, Zhikai Chen, Wei Jin, Haoyu Han, Yao Ma, Tong Zhao, Neil Shah, Jiliang Tang", "tldr": "", "abstract": "Recent studies on Graph Neural Networks(GNNs) provide both empirical and theoretical evidence supporting their effectiveness in capturing structural patterns on both homophilic and certain heterophilic graphs. Notably, most real-world homophilic and heterophilic graphs are comprised of a mixture of nodes in both homophilic and heterophilic structural patterns, exhibiting a structural disparity. However, the analysis of GNN performance with respect to nodes exhibiting different structural patterns, e.g., homophilic nodes in heterophilic graphs, remains rather limited. In the present study, we provide evidence that Graph Neural Networks(GNNs) on node classification typically perform admirably on homophilic nodes within homophilic graphs and heterophilic nodes within heterophilic graphs while struggling on the opposite node set, exhibiting a performance disparity. We theoretically and empirically identify effects of GNNs on testing nodes exhibiting distinct structural patterns. We then propose a rigorous, non-i.i.d PAC-Bayesian generalization bound for GNNs, revealing reasons for the performance disparity, namely the aggregated feature distance and homophily ratio difference between training and testing nodes. Furthermore, we demonstrate the practical implications of our new findings via (1) elucidating the effectiveness of deeper GNNs; and (2) revealing an over-looked distribution shift factor on graph out-of-distribution problem and proposing a new scenario accordingly.", "keywords": "Graph Neural Network", "primary_area": "", "supplementary_material": "/attachment/f1de23435a7fbecca590df2989d146b8e7bb0334.zip", "author": "Haitao Mao;Zhikai Chen;Wei Jin;Haoyu Han;Yao Ma;Tong Zhao;Neil Shah;Jiliang Tang", "authorids": "~Haitao_Mao1;~Zhikai_Chen3;~Wei_Jin4;~Haoyu_Han1;~Yao_Ma3;~Tong_Zhao3;~Neil_Shah2;~Jiliang_Tang1", "gender": "M;;M;M;M;M;M;M", "homepage": "http://currytang.github.io;http://www.cs.emory.edu/~wjin30/;https://cse.msu.edu/~hanhaoy1/;https://yaoma24.github.io/;https://tzhao.io/;http://nshah.net;https://www.cse.msu.edu/~tangjili/;", "dblp": "92/40;66/2173-9;257/5633-1;212/7871.html;94/6503-3;71/7771;64/10812;", "google_scholar": "6hUny38AAAAJ;eWow24EAAAAJ;;wf9TTOIAAAAJ;05cRc-MAAAAJ;Qut69OgAAAAJ;WtzKMWAAAAAJ;3GmlKM4AAAAJ", "orcid": "0009-0009-7305-8629;;0000-0002-2529-6042;;0000-0001-7660-1732;0000-0003-3261-8430;0000-0001-7125-3898;", "linkedin": ";;;;;;;", "or_profile": "~Zhikai_Chen3;~Wei_Jin4;~Haoyu_Han1;~Yao_Ma3;~Tong_Zhao3;~Neil_Shah2;~Jiliang_Tang1;~Mao_Haitao1", "aff": "Michigan State University;Michigan State University;Michigan State University;New Jersey Institute of Technology;Snap Inc.;Snap Inc.;Michigan State University;Michigan State University", "aff_domain": "msu.edu;msu.edu;msu.edu;njit.edu;snap.com;snap.com;msu.edu;msu.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Researcher;Research Scientist;Full Professor;PhD student", "bibtex": "@inproceedings{\nmao2023demystifying,\ntitle={Demystifying Structural Disparity in Graph Neural Networks: Can One Size Fit All?},\nauthor={Haitao Mao and Zhikai Chen and Wei Jin and Haoyu Han and Yao Ma and Tong Zhao and Neil Shah and Jiliang Tang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oef30oScVB}\n}", "github": "", "project": "", "reviewers": "itvA;GxBV;pgwY;JCYy", "pdf_size": 1642206, "rating": "5;5;7;8", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;4", "presentation": "2;3;4;3", "wc_summary": "72;123;132;60", "wc_strengths": "50;75;74;79", "wc_weaknesses": "11;71;30;49", "wc_questions": "233;4;32;34", "wc_limitations": "1;1;1;1", "wc_review": "367;274;269;223", "wc_reply_reviewers": "21;12;0;38", "wc_reply_authors": "74;50;0;38", "reply_reviewers": "1;1;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 96.75, 31.20396609407208 ], "wc_strengths_avg": [ 69.5, 11.412712210513327 ], "wc_weaknesses_avg": [ 40.25, 22.26404051379713 ], "wc_questions_avg": [ 75.75, 91.55974825216592 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 283.25, 52.27989575353034 ], "wc_reply_reviewers_avg": [ 17.75, 13.863170633011771 ], "wc_reply_authors_avg": [ 40.5, 26.7348087705897 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6632988786894265203&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "msu.edu;msu.edu;msu.edu;njit.edu;snap.com;snap.com;msu.edu;msu.edu", "author_num": 8, "aff_unique_index": "0;0;0;1;2;2;0;0", "aff_unique_norm": "Michigan State University;New Jersey Institute of Technology;Snap Inc.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.msu.edu;https://www.njit.edu;https://www.snapinc.com", "aff_unique_abbr": "MSU;NJIT;Snap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Online (Multinomial) Logistic Bandit: Improved Regret and Constant Computation Cost", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70448", "id": "ofa1U5BJVJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5ef04392708bb2340cb9b7da41225660-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ofa1U5BJVJ", "openreview": "https://openreview.net/forum?id=ofa1U5BJVJ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70448", "video": "https://nips.cc/virtual/2023/poster/70448", "author_site": "Yu-Jie Zhang, Masashi Sugiyama", "tldr": "", "abstract": "This paper investigates the logistic bandit problem, a variant of the generalized linear bandit model that utilizes a logistic model to depict the feedback from an action. While most existing research focuses on the binary logistic bandit problem, the multinomial case, which considers more than two possible feedback values, offers increased practical relevance and adaptability for use in complex decision-making problems such as reinforcement learning. In this paper, we provide an algorithm that enjoys both statistical and computational efficiency for the logistic bandit problem. In the binary case, our method improves the state-of-the-art binary logistic bandit method by reducing the per-round computation cost from $\\mathcal{O}(\\log T)$ to $\\mathcal{O}(1)$ with respect to the time horizon $T$, while still preserving the minimax optimal guarantee up to logarithmic factors. In the multinomial case, with $K+1$ potential feedback values, our algorithm achieves an $\\tilde{\\mathcal{O}}(K\\sqrt{T})$ regret bound with $\\mathcal{O}(1)$ computational cost per round. The result not only improves the $\\tilde{\\mathcal{O}}(K\\sqrt{\\kappa T})$ bound for the best-known tractable algorithm\u2014where the large constant $\\kappa$ increases exponentially with the diameter of the parameter domain\u2014but also reduces the $\\mathcal{O}(T)$ computational complexity demanded by the previous method.", "keywords": "Logistic Bandit;Generalized Linear Bandit;Regret Bound;Computation Cost", "primary_area": "", "supplementary_material": "", "author": "Yu-Jie Zhang;Masashi Sugiyama", "authorids": "~Yu-Jie_Zhang1;~Masashi_Sugiyama1", "gender": "M;M", "homepage": "https://yujie-zhang96.github.io/;http://www.ms.k.u-tokyo.ac.jp/sugi/", "dblp": "234/6681;35/1228", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ", "orcid": ";0000-0001-6658-6743", "linkedin": ";", "or_profile": "~Yu-Jie_Zhang1;~Masashi_Sugiyama1", "aff": "The University of Tokyo;The University of Tokyo", "aff_domain": "u-tokyo.ac.jp;u-tokyo.ac.jp", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023online,\ntitle={Online (Multinomial) Logistic Bandit: Improved Regret and Constant Computation Cost},\nauthor={Yu-Jie Zhang and Masashi Sugiyama},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ofa1U5BJVJ}\n}", "github": "", "project": "", "reviewers": "6tgu;Ru1r;XVWd;oJs3;Rks7", "pdf_size": 3227844, "rating": "6;6;6;7;7", "confidence": "4;4;3;3;2", "soundness": "2;2;2;3;4", "novelty": "3;3;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "242;50;387;47;148", "wc_strengths": "145;84;79;49;170", "wc_weaknesses": "91;137;159;52;253", "wc_questions": "314;150;127;55;156", "wc_limitations": "7;11;75;13;3", "wc_review": "799;432;827;216;730", "wc_reply_reviewers": "49;98;60;24;0", "wc_reply_authors": "98;133;84;73;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;2;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 174.8, 128.195787762313 ], "wc_strengths_avg": [ 105.4, 44.89365211252032 ], "wc_weaknesses_avg": [ 138.4, 68.22199058954524 ], "wc_questions_avg": [ 160.4, 84.78820672711507 ], "wc_limitations_avg": [ 21.8, 26.820887382784335 ], "wc_review_avg": [ 600.8, 238.20445000041457 ], "wc_reply_reviewers_avg": [ 46.2, 33.19277029715959 ], "wc_reply_authors_avg": [ 77.6, 43.747457068954304 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7637626158259732, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7275095335757658019&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 4, "email": "u-tokyo.ac.jp;u-tokyo.ac.jp", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Learning Efficient Coding of Natural Images with Maximum Manifold Capacity Representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70447", "id": "og9V7NgOrQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4bc6e94f2308c888fb69626138a2633e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=og9V7NgOrQ", "openreview": "https://openreview.net/forum?id=og9V7NgOrQ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70447", "video": "https://nips.cc/virtual/2023/poster/70447", "author_site": "Thomas Yerxa, Yilun Kuang, Eero Simoncelli, SueYeon Chung", "tldr": "", "abstract": "The efficient coding hypothesis proposes that the response properties of sensory systems are adapted to the statistics of their inputs such that they capture maximal information about the environment, subject to biological constraints. While elegant, information theoretic properties are notoriously difficult to measure in practical settings or to employ as objective functions in optimization. This difficulty has necessitated that computational models designed to test the hypothesis employ several different information metrics ranging from approximations and lower bounds to proxy measures like reconstruction error. Recent theoretical advances have characterized a novel and ecologically relevant efficiency metric, the ``manifold capacity,\u201d which is the number of object categories that may be represented in a linearly separable fashion. However, calculating manifold capacity is a computationally intensive iterative procedure that until now has precluded its use as an objective. Here we outline the simplifying assumptions that allow manifold capacity to be optimized directly, yielding Maximum Manifold Capacity Representations (MMCR). The resulting method is closely related to and inspired by advances in the field of self supervised learning (SSL), and we demonstrate that MMCRs are competitive with state of the art results on standard SSL benchmarks. Empirical analyses reveal differences between MMCRs and representations learned by other SSL frameworks, and suggest a mechanism by which manifold compression gives rise to class separability. Finally we evaluate a set of SSL methods on a suite of neural predicitivity benchmarks, and find MMCRs are higly competitive as models of the ventral stream.", "keywords": "computational neuroscience;theoretical neuroscience;efficient coding;representation geometry;neural manifolds;self-supervised learning;statistical physics of learning", "primary_area": "", "supplementary_material": "/attachment/3ed6f7a369fc840d1242fe6b3fcc1573d8322010.zip", "author": "Thomas Edward Yerxa;Yilun Kuang;Eero P Simoncelli;SueYeon Chung", "authorids": "~Thomas_Edward_Yerxa1;~Yilun_Kuang1;~Eero_P_Simoncelli1;~SueYeon_Chung1", "gender": "M;M;F;M", "homepage": ";https://yilunkuang.github.io/;https://sites.google.com/site/sueyeonchung/;https://www.cns.nyu.edu/~eero/", "dblp": "311/8930;;173/5418;30/5604", "google_scholar": "n4Uu99gAAAAJ;XvIasgEAAAAJ;h7yVv0QAAAAJ;MplR7_cAAAAJ", "orcid": ";;;0000-0002-1206-527X", "linkedin": ";yilun-mark-kuang/;;eero-simoncelli-445782123", "or_profile": "~Thomas_Edward_Yerxa1;~Yilun_Kuang1;~SueYeon_Chung1;~Eero_Peter_Simoncelli1", "aff": "New York University;New York University;Flatiron Institute / Simons Foundation;New York University", "aff_domain": "nyu.edu;nyu.edu;simonsfoundation.org;nyu.edu", "position": "PhD student;Undergrad student;Principal Investigator;Full Professor", "bibtex": "@inproceedings{\nyerxa2023learning,\ntitle={Learning Efficient Coding of Natural Images with Maximum Manifold Capacity Representations},\nauthor={Thomas Edward Yerxa and Yilun Kuang and Eero P Simoncelli and SueYeon Chung},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=og9V7NgOrQ}\n}", "github": "", "project": "", "reviewers": "eLMY;9aX7;g5Gb;1Hrd;moiT", "pdf_size": 1130094, "rating": "5;6;6;7;7", "confidence": "4;1;4;3;4", "soundness": "2;3;2;3;3", "novelty": "3;3;2;3;3", "presentation": "2;4;2;3;3", "wc_summary": "102;49;74;96;46", "wc_strengths": "62;45;39;90;60", "wc_weaknesses": "493;25;272;116;929", "wc_questions": "66;1;31;45;80", "wc_limitations": "11;1;1;1;1", "wc_review": "734;121;417;348;1116", "wc_reply_reviewers": "69;0;563;0;1081", "wc_reply_authors": "0;0;466;0;760", "reply_reviewers": "1;0;4;0;3", "reply_authors": "1;1;3;1;4", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 73.4, 23.130931671681537 ], "wc_strengths_avg": [ 59.2, 17.70197729068705 ], "wc_weaknesses_avg": [ 367.0, 322.71039648576556 ], "wc_questions_avg": [ 44.6, 27.557938965024217 ], "wc_limitations_avg": [ 3.0, 4.0 ], "wc_review_avg": [ 547.2, 345.4581884975373 ], "wc_reply_reviewers_avg": [ 342.6, 425.06874738093836 ], "wc_reply_authors_avg": [ 245.2, 314.36946416597146 ], "reply_reviewers_avg": [ 1.6, 1.624807680927192 ], "reply_authors_avg": [ 2.0, 1.2649110640673518 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.04583492485141061, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2582991262278743378&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "nyu.edu;nyu.edu;simonsfoundation.org;nyu.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "New York University;Flatiron Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://flatironinstitute.org", "aff_unique_abbr": "NYU;Flatiron", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Disentangling Cognitive Diagnosis with Limited Exercise Labels", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70446", "id": "ogPBujRhiN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3a14ae9951e8153a8fc814b5f506b5b7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ogPBujRhiN", "openreview": "https://openreview.net/forum?id=ogPBujRhiN", "poster": "/media/PosterPDFs/NeurIPS%202023/70446.png?t=1698029000.0004303", "slides": "https://nips.cc/virtual/2023/poster/70446", "video": "https://nips.cc/virtual/2023/poster/70446", "author_site": "Xiangzhi Chen, Le Wu, Fei Liu, Lei Chen, Kun Zhang, Richang Hong, Meng Wang", "tldr": "", "abstract": "Cognitive diagnosis is an important task in intelligence education, which aims at measuring students\u2019 proficiency in specific knowledge concepts. Given a fully labeled exercise-concept matrix, most existing models focused on mining students' response records for cognitive diagnosis. Despite their success, due to the huge cost of labeling exercises, a more practical scenario is that limited exercises are labeled with concepts. Performing cognitive diagnosis with limited exercise labels is under-explored and remains pretty much open.\n In this paper, we propose Disentanglement based Cognitive Diagnosis (DCD) to address the challenges of limited exercise labels. Specifically, we utilize students' response records to model student proficiency, exercise difficulty and exercise label distribution. \n Then, we introduce two novel modules - group-based disentanglement and limited-labeled alignment modules - to disentangle the factors relevant to concepts and align them with real limited labels. \n Particularly, we introduce the tree-like structure of concepts with negligible cost for group-based disentangling, as concepts of different levels exhibit different independence relationships.\nExtensive experiments on widely used benchmarks demonstrate the superiority of our proposed model.", "keywords": "Intelligent Education System;Cognitive Diagnosis;Disentangled Representation Learning;Interpretability", "primary_area": "", "supplementary_material": "/attachment/1d2870f9c166bc20c22a97d9be0e5b4c63a6fcfb.zip", "author": "Xiangzhi Chen;Le Wu;Fei Liu;Lei Chen;Kun Zhang;Richang Hong;Meng Wang", "authorids": "~Xiangzhi_Chen1;~Le_Wu1;~Fei_Liu12;~Lei_Chen15;~Kun_Zhang5;~Richang_Hong1;~Meng_Wang3", "gender": "M;F;F;M;M;M;M", "homepage": "https://kervias.com/;http://le-wu.com/;https://faculty.hfut.edu.cn/feiliu/zh_CN/index.htm;;https://zhangkunzk.github.io/;https://sites.google.com/site/homeofrichanghong/;https://sites.google.com/view/meng-wang/home", "dblp": "98/8590-1.html;121/4234;64/1350-38.html;09/3666-51;96/3115-15;59/1501;93/6765-2", "google_scholar": "OJJsTUsAAAAJ;4EzlnxwAAAAJ;RieYF9oAAAAJ;l4QgYBIAAAAJ;40coXH4AAAAJ;https://scholar.google.com/scholar?hl=en;rHagaaIAAAAJ", "orcid": "0009-0001-6904-6715;0000-0003-4556-0581;0000-0003-0022-4103;0000-0002-3193-7256;0000-0002-0743-9003;;0000-0002-3094-7735", "linkedin": ";;;;;;", "or_profile": "~Xiangzhi_Chen1;~Le_Wu1;~Fei_Liu12;~Lei_Chen15;~Kun_Zhang5;~Richang_Hong1;~Meng_Wang3", "aff": "Hefei University of Technology;Hefei University of Technology;Hefei University of Technology;Tsinghua University;Hefei University of Technology;Hefei University of Technology;Hefei University of Technology", "aff_domain": "hfut.edu.cn;hfut.edu;hfut.edu.cn;tsinghua.edu.cn;hfut.edu.cn;hfut.edu;hfut.edu.cn", "position": "PhD student;Full Professor;PhD student;Postdoc;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023disentangling,\ntitle={Disentangling Cognitive Diagnosis with Limited Exercise Labels},\nauthor={Xiangzhi Chen and Le Wu and Fei Liu and Lei Chen and Kun Zhang and Richang Hong and Meng Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ogPBujRhiN}\n}", "github": "", "project": "", "reviewers": "GRC1;zBu1;d3Wh;B6tk;xiTN", "pdf_size": 1378121, "rating": "5;6;6;7;7", "confidence": "5;5;4;4;3", "soundness": "3;4;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;4;2;3;3", "wc_summary": "106;52;54;84;66", "wc_strengths": "128;25;66;50;64", "wc_weaknesses": "104;104;257;131;93", "wc_questions": "57;29;74;272;26", "wc_limitations": "10;6;1;93;60", "wc_review": "405;216;452;630;309", "wc_reply_reviewers": "24;19;106;62;59", "wc_reply_authors": "45;19;230;478;33", "reply_reviewers": "2;1;2;2;1", "reply_authors": "3;2;3;3;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 72.4, 20.293841430345317 ], "wc_strengths_avg": [ 66.6, 34.00941046239996 ], "wc_weaknesses_avg": [ 137.8, 60.90451543194479 ], "wc_questions_avg": [ 91.6, 91.94911636334523 ], "wc_limitations_avg": [ 34.0, 36.3483149540663 ], "wc_review_avg": [ 402.4, 139.8121597000776 ], "wc_reply_reviewers_avg": [ 54.0, 31.36239786751007 ], "wc_reply_authors_avg": [ 161.0, 176.21237186985482 ], "reply_reviewers_avg": [ 1.6, 0.4898979485566356 ], "reply_authors_avg": [ 2.6, 0.4898979485566356 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7857142857142858, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=524245042120318919&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "hfut.edu.cn;hfut.edu;hfut.edu.cn;tsinghua.edu.cn;hfut.edu.cn;hfut.edu;hfut.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0;0", "aff_unique_norm": "Hefei University of Technology;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.hfut.edu.cn/;https://www.tsinghua.edu.cn", "aff_unique_abbr": "HUT;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Successor-Predecessor Intrinsic Exploration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70445", "id": "ohKbQp0jIY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e6f2b968c4ee8ba260cd7077e39590dd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ohKbQp0jIY", "openreview": "https://openreview.net/forum?id=ohKbQp0jIY", "poster": "/media/PosterPDFs/NeurIPS%202023/70445.png?t=1701725369.8308895", "slides": "https://nips.cc/virtual/2023/poster/70445", "video": "https://nips.cc/virtual/2023/poster/70445", "author_site": "Changmin Yu, Neil Burgess, Maneesh Sahani, Samuel J Gershman", "tldr": "", "abstract": "Exploration is essential in reinforcement learning, particularly in environments where external rewards are sparse. Here we focus on exploration with intrinsic rewards, where the agent transiently augments the external rewards with self-generated intrinsic rewards. Although the study of intrinsic rewards has a long history, existing methods focus on composing the intrinsic reward based on measures of future prospects of states, ignoring the information contained in the retrospective structure of transition sequences. Here we argue that the agent can utilise retrospective information to generate explorative behaviour with structure-awareness, facilitating efficient exploration based on global instead of local information. We propose Successor-Predecessor Intrinsic Exploration (SPIE), an exploration algorithm based on a novel intrinsic reward combining prospective and retrospective information. We show that SPIE yields more efficient and ethologically plausible exploratory behaviour in environments with sparse rewards and bottleneck states than competing methods. We also implement SPIE in deep reinforcement learning agents, and show that the resulting agent achieves stronger empirical performance than existing methods on sparse-reward Atari games.", "keywords": "Exploration;reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/cd99fb45a122776b6abc0b00db1fed6fb070ec77.zip", "author": "Changmin Yu;Neil Burgess;Maneesh Sahani;Samuel Gershman", "authorids": "~Changmin_Yu1;~Neil_Burgess1;~Maneesh_Sahani1;~Samuel_Gershman1", "gender": "M;M;;M", "homepage": "https://changmin-yu.github.io;https://www.ucl.ac.uk/icn/people/neil-burgess;http://www.gatsby.ucl.ac.uk/~maneesh;http://gershmanlab.com/", "dblp": "266/9733;54/4203;44/3197;44/10432", "google_scholar": ";;https://scholar.google.co.uk/citations?user=rwxX83UAAAAJ;0HuMHFwAAAAJ", "orcid": ";0000-0003-0646-6584;0000-0001-5560-3341;", "linkedin": ";;;", "or_profile": "~Changmin_Yu1;~Neil_Burgess1;~Maneesh_Sahani1;~Samuel_Gershman1", "aff": "University College London;University College London;University College London;Harvard University", "aff_domain": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;harvard.edu", "position": "PhD student;Full Professor;Full Professor;Professor", "bibtex": "@inproceedings{\nyu2023successorpredecessor,\ntitle={Successor-Predecessor Intrinsic Exploration},\nauthor={Changmin Yu and Neil Burgess and Maneesh Sahani and Samuel Gershman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ohKbQp0jIY}\n}", "github": "", "project": "", "reviewers": "7ZrB;pKLV;dj2L;WHHE", "pdf_size": 4857095, "rating": "3;4;5;7", "confidence": "4;3;3;4", "soundness": "2;3;2;4", "novelty": "2;3;2;4", "presentation": "2;3;2;2", "wc_summary": "122;85;60;96", "wc_strengths": "46;41;23;96", "wc_weaknesses": "86;61;73;246", "wc_questions": "580;216;130;109", "wc_limitations": "21;7;9;20", "wc_review": "855;410;295;567", "wc_reply_reviewers": "207;274;431;35", "wc_reply_authors": "490;329;1101;40", "reply_reviewers": "1;1;4;1", "reply_authors": "2;3;5;2", "rating_avg": [ 4.75, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 90.75, 22.26404051379713 ], "wc_strengths_avg": [ 51.5, 27.07858932810201 ], "wc_weaknesses_avg": [ 116.5, 75.2877812131557 ], "wc_questions_avg": [ 258.75, 189.75691686997868 ], "wc_limitations_avg": [ 14.25, 6.299801584177076 ], "wc_review_avg": [ 531.75, 210.12302943751786 ], "wc_reply_reviewers_avg": [ 236.75, 142.04642726939667 ], "wc_reply_authors_avg": [ 490.0, 387.86015521061194 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.16903085094570333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=813201179898469195&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;harvard.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University College London;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://www.harvard.edu", "aff_unique_abbr": "UCL;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "DISCS: A Benchmark for Discrete Sampling", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73470", "id": "oi1MUMk5NF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f9ad87c1ebbae8a3555adb31dbcacf44-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=oi1MUMk5NF", "openreview": "https://openreview.net/forum?id=oi1MUMk5NF", "poster": "/media/PosterPDFs/NeurIPS%202023/73470.png?t=1702057434.7748227", "slides": "https://nips.cc/virtual/2023/poster/73470", "video": "https://nips.cc/virtual/2023/poster/73470", "author_site": "Katayoon Goshvadi, Haoran Sun, Xingchao Liu, Azade Nova, Ruqi Zhang, Will Grathwohl, Dale Schuurmans, Hanjun Dai", "tldr": "", "abstract": "Sampling in discrete spaces, with critical applications in simulation and optimization, has recently been boosted by significant advances in gradient-based approaches that exploit modern accelerators like GPUs. However, two key challenges are hindering further advancement in research on discrete sampling. First, since there is no consensus on experimental settings and evaluation setups, the empirical results in different research papers are often not comparable. Second, implementing samplers and target distributions often requires a nontrivial amount of effort in terms of calibration and parallelism. To tackle these challenges, we propose DISCS (DISCrete Sampling), a tailored package and benchmark that supports unified and efficient experiment implementation and evaluations for discrete sampling in three types of tasks: sampling from classical graphical models and energy based generative models, and sampling for solving combinatorial optimization. Throughout the comprehensive evaluations in DISCS, we gained new insights into scalability, design principles for proposal distributions, and lessons for adaptive sampling design. DISCS efficiently implements representative discrete samplers in existing research works as baselines and offers a simple interface that researchers can conveniently add new discrete samplers and directly compare their performance with the benchmark result in a calibrated setup.", "keywords": "discrete;MCMC;sampling;benchmark;combinatorial optimization;language model", "primary_area": "", "supplementary_material": "/attachment/484b66513851210f4e0cfad1d542d634c12c5eef.pdf", "author": "Katayoon Goshvadi;Haoran Sun;Xingchao Liu;Azade Nova;Ruqi Zhang;Will Sussman Grathwohl;Dale Schuurmans;Hanjun Dai", "authorids": "~Katayoon_Goshvadi1;~Haoran_Sun2;~Xingchao_Liu1;~Azade_Nova1;~Ruqi_Zhang1;~Will_Sussman_Grathwohl2;~Dale_Schuurmans1;~Hanjun_Dai1", "gender": "F;M;M;;F;M;;M", "homepage": ";;;;https://ruqizhang.github.io/;http://www.cs.toronto.edu/~wgrathwohl/;;https://hanjun-dai.github.io", "dblp": ";;228/7309;;;192/1565;;144/7311", "google_scholar": ";p7of_yoAAAAJ;VOTVE0UAAAAJ;;4ojpmc8AAAAJ;;;obpl7GQAAAAJ", "orcid": ";;;;;;;", "linkedin": "katayoon-goshvadi/;;;;;will-grathwohl-b44a383b/;;hanjun-dai", "or_profile": "~Katayoon_Goshvadi1;~Haoran_Sun2;~Xingchao_Liu1;~Azade_Nova1;~Ruqi_Zhang1;~Will_Sussman_Grathwohl2;~Dale_Schuurmans1;~Hanjun_Dai1", "aff": ";Georgia Institute of Technology;University of Texas, Austin;;Purdue University;Google DeepMind;;Google Research", "aff_domain": ";gatech.edu;utexas.edu;;purdue.edu;deepmind.com;;google.com", "position": ";PhD student;PhD student;;Assistant Professor;Senior Research Scientist;;Researcher", "bibtex": "@inproceedings{\ngoshvadi2023discs,\ntitle={{DISCS}: A Benchmark for Discrete Sampling},\nauthor={Katayoon Goshvadi and Haoran Sun and Xingchao Liu and Azade Nova and Ruqi Zhang and Will Sussman Grathwohl and Dale Schuurmans and Hanjun Dai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=oi1MUMk5NF}\n}", "github": "", "project": "", "reviewers": "2XnF;GKfw;q6pU;Edfj", "pdf_size": 7270908, "rating": "5;6;6;7", "confidence": "3;3;3;3", "wc_summary_and_contributions": "74;72;134;17", "wc_strengths": "28;66;87;74", "wc_improvement": "87;38;63;42", "wc_limitations": "35;1;37;22", "wc_correctness": "17;1;11;2", "wc_clarity": "19;1;69;10", "wc_relation_to_prior_work": "6;35;29;4", "wc_documentation": "45;1;21;48", "wc_additional_feedback": "1;1;1;1", "wc_review": "312;216;452;220", "wc_reply_reviewers": "0;11;0;9", "wc_reply_authors": "729;162;579;223", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 74.25, 41.390669238368204 ], "wc_strengths_avg": [ 63.75, 21.958768180387533 ], "wc_improvement_avg": [ 57.5, 19.5 ], "wc_limitations_avg": [ 23.75, 14.341809509263467 ], "wc_correctness_avg": [ 7.75, 6.609652033201143 ], "wc_clarity_avg": [ 24.75, 26.328454189336675 ], "wc_relation_to_prior_work_avg": [ 18.5, 13.683932183404009 ], "wc_documentation_avg": [ 28.75, 19.13602623325961 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 300.0, 95.79144011862438 ], "wc_reply_reviewers_avg": [ 5.0, 5.049752469181039 ], "wc_reply_authors_avg": [ 423.25, 237.7460567496336 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=63853189901274995&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";gatech.edu;utexas.edu;;purdue.edu;deepmind.com;;google.com", "author_num": 8, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Georgia Institute of Technology;University of Texas at Austin;Purdue University;Google", "aff_unique_dep": ";;;Google DeepMind", "aff_unique_url": "https://www.gatech.edu;https://www.utexas.edu;https://www.purdue.edu;https://deepmind.com", "aff_unique_abbr": "Georgia Tech;UT Austin;Purdue;DeepMind", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Austin;Mountain View", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Multi-Fidelity Multi-Armed Bandits Revisited", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70444", "id": "oi45JlpSOT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/64602b87c31db70a3ef060f6c5d5b01d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oi45JlpSOT", "openreview": "https://openreview.net/forum?id=oi45JlpSOT", "poster": "/media/PosterPDFs/NeurIPS%202023/70444.png?t=1701568422.87881", "slides": "https://nips.cc/virtual/2023/poster/70444", "video": "https://nips.cc/virtual/2023/poster/70444", "author_site": "Xuchuang Wang, Qingyun Wu, Wei Chen, John C.S. Lui", "tldr": "", "abstract": "We study the multi-fidelity multi-armed bandit ($\\texttt{MF-MAB}$), an extension of the canonical multi-armed bandit (MAB) problem.\n$\\texttt{MF-MAB}$ allows each arm to be pulled with different costs (fidelities) and observation accuracy.\nWe study both the best arm identification with fixed confidence ($\\texttt{BAI}$) and the regret minimization objectives.\nFor $\\texttt{BAI}$, we present (a) a cost complexity lower bound, (b) an algorithmic framework with two alternative fidelity selection procedures,\nand (c) both procedures' cost complexity upper bounds.\nFrom both cost complexity bounds of $\\texttt{MF-MAB}$,\none can recover the standard sample complexity bounds of the classic (single-fidelity) MAB.\nFor regret minimization of $\\texttt{MF-MAB}$, we propose a new regret definition, prove its problem-independent regret lower bound $\\Omega(K^{1/3}\\Lambda^{2/3})$ and problem-dependent lower bound $\\Omega(K\\log \\Lambda)$, where $K$ is the number of arms and $\\Lambda$ is the decision budget in terms of cost, and devise an elimination-based algorithm whose worst-cost regret upper bound matches its corresponding lower bound up to some logarithmic terms and, whose problem-dependent bound matches its corresponding lower bound in terms of $\\Lambda$.", "keywords": "Multi-fidelity;multi-armed bandits", "primary_area": "", "supplementary_material": "/attachment/338b8e7fe4f4d60a61f298ba88e9716ed00762d8.zip", "author": "Xuchuang Wang;Qingyun Wu;Wei Chen;John C.S. Lui", "authorids": "~Xuchuang_Wang1;~Qingyun_Wu2;~Wei_Chen10;~John_C.S._Lui2", "gender": "M;M;M;F", "homepage": "https://xuchuangw.com;https://www.microsoft.com/en-us/research/people/weic/;http://www.cse.cuhk.edu.hk/~cslui/Index.html;https://qingyun-wu.github.io/", "dblp": "319/5123;c/WeiChen13;l/JohnCSLui;183/0579", "google_scholar": "QJ66dEcAAAAJ;hlEPkxAAAAAJ;https://scholar.google.com.tw/citations?user=7LVjQ7MAAAAJ;Y54J21sAAAAJ", "orcid": ";;0000-0001-7466-0384;", "linkedin": ";;;", "or_profile": "~Xuchuang_Wang1;~Wei_Chen10;~John_C.S._Lui2;~Qingyun_Wu1", "aff": "The Chinese University of Hong Kong;Microsoft Research;The Chinese University of Hong Kong;Pennsylvania State University", "aff_domain": "cuhk.edu.hk;microsoft.com;cse.cuhk.edu.hk;psu.edu", "position": "PhD student;Pricipal Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023multifidelity,\ntitle={Multi-Fidelity Multi-Armed Bandits Revisited},\nauthor={Xuchuang Wang and Qingyun Wu and Wei Chen and John C.S. Lui},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oi45JlpSOT}\n}", "github": "", "project": "", "reviewers": "rkrs;Rfmo;QHcL;3tC7;zHKa;ZToP", "pdf_size": 764705, "rating": "3;4;5;6;7;7", "confidence": "4;2;2;3;4;3", "soundness": "2;3;3;3;4;4", "novelty": "2;2;3;3;3;3", "presentation": "1;3;3;3;3;3", "wc_summary": "223;86;53;57;47;81", "wc_strengths": "38;55;19;37;79;62", "wc_weaknesses": "344;102;128;1;169;119", "wc_questions": "3;30;4;1;147;9", "wc_limitations": "3;1;6;1;6;1", "wc_review": "611;274;210;97;448;272", "wc_reply_reviewers": "320;0;0;0;39;13", "wc_reply_authors": "790;37;43;0;0;0", "reply_reviewers": "2;0;0;0;1;1", "reply_authors": "3;2;2;1;1;1", "rating_avg": [ 5.333333333333333, 1.4907119849998596 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.1666666666666665, 0.6871842709362768 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.7453559924999298 ], "wc_summary_avg": [ 91.16666666666667, 60.669642784143335 ], "wc_strengths_avg": [ 48.333333333333336, 19.439364415764445 ], "wc_weaknesses_avg": [ 143.83333333333334, 103.05244080348392 ], "wc_questions_avg": [ 32.333333333333336, 52.19088894518744 ], "wc_limitations_avg": [ 3.0, 2.23606797749979 ], "wc_review_avg": [ 318.6666666666667, 166.98469657093995 ], "wc_reply_reviewers_avg": [ 62.0, 116.2081465876353 ], "wc_reply_authors_avg": [ 145.0, 289.0121104729004 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.74535599249993 ], "reply_authors_avg": [ 1.6666666666666667, 0.74535599249993 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.13693063937629155, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5317696053961929757&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "cuhk.edu.hk;microsoft.com;cse.cuhk.edu.hk;psu.edu", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Chinese University of Hong Kong;Microsoft;Pennsylvania State University", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.microsoft.com/en-us/research;https://www.psu.edu", "aff_unique_abbr": "CUHK;MSR;PSU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Explore In-Context Learning for 3D Point Cloud Understanding", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70443", "id": "ooXpTZYwXa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8407d254b5baacf69ee977aa34f0e521-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ooXpTZYwXa", "openreview": "https://openreview.net/forum?id=ooXpTZYwXa", "poster": "/media/PosterPDFs/NeurIPS%202023/70443.png?t=1698200643.064123", "slides": "https://nips.cc/virtual/2023/poster/70443", "video": "https://nips.cc/virtual/2023/poster/70443", "author_site": "Zhongbin Fang, Xiangtai Li, Xia Li, Xia Li, Joachim M Buhmann, Chen Change Loy, Mengyuan Liu", "tldr": "", "abstract": "With the rise of large-scale models trained on broad data, in-context learning has become a new learning paradigm that has demonstrated significant potential in natural language processing and computer vision tasks. Meanwhile, in-context learning is still largely unexplored in the 3D point cloud domain. Although masked modeling has been successfully applied for in-context learning in 2D vision, directly extending it to 3D point clouds remains a formidable challenge. In the case of point clouds, the tokens themselves are the point cloud positions (coordinates) that are masked during inference. Moreover, position embedding in previous works may inadvertently introduce information leakage. To address these challenges, we introduce a novel framework, named Point-In-Context, designed especially for in-context learning in 3D point clouds, where both inputs and outputs are modeled as coordinates for each task. Additionally, we propose the Joint Sampling module, carefully designed to work in tandem with the general point sampling operator, effectively resolving the aforementioned technical issues. We conduct extensive experiments to validate the versatility and adaptability of our proposed methods in handling a wide range of tasks. Furthermore, with a more effective prompt selection strategy, our framework surpasses the results of individually trained models.", "keywords": "In-context learning;Point cloud;Prompt tuning", "primary_area": "", "supplementary_material": "/attachment/a7933bba53708db58d3169c75a373857b4f446b9.zip", "author": "Zhongbin Fang;Xiangtai Li;Xia Li;Joachim M. Buhmann;Chen Change Loy;Mengyuan Liu", "authorids": "~Zhongbin_Fang1;~Xiangtai_Li1;~Xia_Li3;~Joachim_M._Buhmann1;~Chen_Change_Loy2;~Mengyuan_Liu2", "gender": ";;;M;M;", "homepage": ";;;https://ise.ethz.ch;https://www.mmlab-ntu.com/person/ccloy/index.html;https://www.semanticscholar.org/author/Mengyuan-Liu/47842072", "dblp": ";;;b/JMBuhmann;01/5855;", "google_scholar": ";;;https://scholar.google.ch/citations?user=zQWbCzYAAAAJ;https://scholar.google.co.uk/citations?user=559LF80AAAAJ;woX_4AcAAAAJ", "orcid": ";;;;0000-0001-5345-1591;0000-0002-6332-8316", "linkedin": ";;;;;", "or_profile": "~Zhongbin_Fang1;~Xiangtai_Li1;~Xia_Li3;~Joachim_M._Buhmann1;~Chen_Change_Loy2;~Mengyuan_Liu2", "aff": ";;;Department of Computer Science, ETHZ - ETH Zurich;Nanyang Technological University;SUN YAT-SEN UNIVERSITY", "aff_domain": ";;;inf.ethz.ch;ntu.edu.sg;sysu.edu.cn", "position": ";;;Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nfang2023explore,\ntitle={Explore In-Context Learning for 3D Point Cloud Understanding},\nauthor={Zhongbin Fang and Xiangtai Li and Xia Li and Joachim M. Buhmann and Chen Change Loy and Mengyuan Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ooXpTZYwXa}\n}", "github": "", "project": "", "reviewers": "uxmh;kyHc;GuqQ;Uvps", "pdf_size": 0, "rating": "5;6;6;7", "confidence": "4;5;5;5", "soundness": "3;3;2;3", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "71;38;68;74", "wc_strengths": "33;37;73;117", "wc_weaknesses": "121;69;334;56", "wc_questions": "2;87;95;5", "wc_limitations": "2;5;16;1", "wc_review": "229;236;586;253", "wc_reply_reviewers": "0;18;97;15", "wc_reply_authors": "120;0;141;0", "reply_reviewers": "0;1;2;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 62.75, 14.446020213193666 ], "wc_strengths_avg": [ 65.0, 33.823069050575526 ], "wc_weaknesses_avg": [ 145.0, 111.79669046979879 ], "wc_questions_avg": [ 47.25, 43.85416171813115 ], "wc_limitations_avg": [ 6.0, 5.958187643906492 ], "wc_review_avg": [ 326.0, 150.36455699399377 ], "wc_reply_reviewers_avg": [ 32.5, 37.85828839237189 ], "wc_reply_authors_avg": [ 65.25, 65.67105526790323 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6233865000547078496&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": ";;;inf.ethz.ch;ntu.edu.sg;sysu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "ETH Zurich;Nanyang Technological University;Sun Yat-sen University", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://www.ethz.ch;https://www.ntu.edu.sg;http://www.sysu.edu.cn", "aff_unique_abbr": "ETHZ;NTU;SYSU", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Switzerland;Singapore;China" }, { "title": "Sample-Conditioned Hypothesis Stability Sharpens Information-Theoretic Generalization Bounds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70442", "id": "oqDSDKLd3S", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9b912f91a5e299472764377db6ca2431-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oqDSDKLd3S", "openreview": "https://openreview.net/forum?id=oqDSDKLd3S", "poster": "/media/PosterPDFs/NeurIPS%202023/70442.png?t=1701035034.8165858", "slides": "https://nips.cc/virtual/2023/poster/70442", "video": "https://nips.cc/virtual/2023/poster/70442", "author_site": "Ziqiao Wang, Yongyi Mao", "tldr": "", "abstract": "We present new information-theoretic generalization guarantees through the a novel construction of the \"neighboring-hypothesis\" matrix and a new family of stability notions termed sample-conditioned hypothesis (SCH) stability. Our approach yields sharper bounds that improve upon previous information-theoretic bounds in various learning scenarios. Notably, these bounds address the limitations of existing information-theoretic bounds in the context of stochastic convex optimization (SCO) problems, as explored in the recent work by Haghifam et al. (2023).", "keywords": "generalization;information-theoretic bounds;stability", "primary_area": "", "supplementary_material": "/attachment/88e5ef9464a4eb26f33d1ad6b584fce782ffe629.pdf", "author": "Ziqiao Wang;Yongyi Mao", "authorids": "~Ziqiao_Wang1;~Yongyi_Mao2", "gender": "M;M", "homepage": "https://ziqiaowanggeothe.github.io;http://www.eecs.uottawa.ca/~yymao", "dblp": "222/9220;86/2933", "google_scholar": "iBL7APIAAAAJ;https://scholar.google.ca/citations?user=jM5l70wAAAAJ", "orcid": "0000-0003-0504-4830;0000-0001-5298-5778", "linkedin": "ziqiao-wang-987565155/?locale=en_US;", "or_profile": "~Ziqiao_Wang1;~Yongyi_Mao1", "aff": "University of Ottawa;University of Ottawa", "aff_domain": "uottawa.ca;eecs.uottawa.ca", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2023sampleconditioned,\ntitle={Sample-Conditioned Hypothesis Stability Sharpens Information-Theoretic Generalization Bounds},\nauthor={Ziqiao Wang and Yongyi Mao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oqDSDKLd3S}\n}", "github": "", "project": "", "reviewers": "Ptk3;s7qd;uPAq;LJ4V", "pdf_size": 426646, "rating": "5;5;6;7", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "3;2;2;3", "wc_summary": "42;295;82;251", "wc_strengths": "40;125;31;124", "wc_weaknesses": "287;512;244;36", "wc_questions": "5;146;174;161", "wc_limitations": "1;46;12;5", "wc_review": "375;1124;543;577", "wc_reply_reviewers": "436;502;91;5", "wc_reply_authors": "1062;609;138;0", "reply_reviewers": "7;2;2;1", "reply_authors": "8;2;3;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 167.5, 107.57439286372943 ], "wc_strengths_avg": [ 80.0, 44.615019892408434 ], "wc_weaknesses_avg": [ 269.75, 169.03013784529668 ], "wc_questions_avg": [ 121.5, 67.98713113523765 ], "wc_limitations_avg": [ 16.0, 17.76231966833161 ], "wc_review_avg": [ 654.75, 281.5087698456302 ], "wc_reply_reviewers_avg": [ 258.5, 213.96086090684904 ], "wc_reply_authors_avg": [ 452.25, 418.2250440851193 ], "reply_reviewers_avg": [ 3.0, 2.345207879911715 ], "reply_authors_avg": [ 3.5, 2.692582403567252 ], "replies_avg": [ 45, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15582677809484153676&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "uottawa.ca;eecs.uottawa.ca", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Ottawa", "aff_unique_dep": "", "aff_unique_url": "https://www.uottawa.ca", "aff_unique_abbr": "U Ottawa", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Bypassing the Simulator: Near-Optimal Adversarial Linear Contextual Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70441", "id": "orh4e0AO9R", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a3a661eb3308d0bb686f6a4bac521032-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=orh4e0AO9R", "openreview": "https://openreview.net/forum?id=orh4e0AO9R", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70441", "video": "https://nips.cc/virtual/2023/poster/70441", "author_site": "Haolin Liu, Chen-Yu Wei, Julian Zimmert", "tldr": "", "abstract": "We consider the adversarial linear contextual bandit problem, \nwhere the loss vectors are selected fully adversarially and the per-round action set (i.e. the context) is drawn from a fixed distribution. Existing methods for this problem either require access to a simulator to generate free i.i.d. contexts, achieve a sub-optimal regret no better than $\\tilde{\\mathcal{O}}(T^{\\frac{5}{6}})$, or are computationally inefficient. \nWe greatly improve these results by achieving a regret of $\\tilde{\\mathcal{O}}(\\sqrt{T})$ without a simulator, while maintaining computational efficiency when the action set in each round is small. \nIn the special case of sleeping bandits with adversarial loss and stochastic arm availability, our result answers affirmatively the open question by [SGV20] on whether there exists a polynomial-time algorithm with $poly(d)\\sqrt{T}$ regret. Our approach naturally handles the case where the loss is linear up to an additive misspecification error, and our regret shows near-optimal dependence on the magnitude of the error.", "keywords": "adversarial linear contextual bandits;log-determinant barrier", "primary_area": "", "supplementary_material": "/attachment/8cdc74c296413fbe39f78030308153d923eb0df6.pdf", "author": "Haolin Liu;Chen-Yu Wei;Julian Zimmert", "authorids": "~Haolin_Liu8;~Chen-Yu_Wei1;~Julian_Zimmert1", "gender": "M;M;", "homepage": "https://liuhl2000.github.io/;https://bahh723.github.io/;", "dblp": ";183/1729;190/7636", "google_scholar": ";2L2cR-kAAAAJ;", "orcid": "0000-0002-8247-9742;;", "linkedin": ";;", "or_profile": "~Haolin_Liu8;~Chen-Yu_Wei1;~Julian_Zimmert1", "aff": "University of Virginia, Charlottesville;Massachusetts Institute of Technology;Google", "aff_domain": "virginia.edu;mit.edu;google.com", "position": "PhD student;Postdoc;Postdoc", "bibtex": "@inproceedings{\nliu2023bypassing,\ntitle={Bypassing the Simulator: Near-Optimal Adversarial Linear Contextual Bandits},\nauthor={Haolin Liu and Chen-Yu Wei and Julian Zimmert},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=orh4e0AO9R}\n}", "github": "", "project": "", "reviewers": "f8CU;6s4U;V7BF;He1a", "pdf_size": 548469, "rating": "6;7;7;8", "confidence": "2;3;4;4", "soundness": "3;3;4;3", "novelty": "3;3;4;4", "presentation": "3;3;4;3", "wc_summary": "46;149;126;130", "wc_strengths": "30;227;104;48", "wc_weaknesses": "9;40;195;10", "wc_questions": "279;39;128;20", "wc_limitations": "6;55;1;1", "wc_review": "370;510;554;209", "wc_reply_reviewers": "34;0;21;0", "wc_reply_authors": "282;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "3;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 112.75, 39.5055375865207 ], "wc_strengths_avg": [ 102.25, 77.02069527081666 ], "wc_weaknesses_avg": [ 63.5, 76.93666226188917 ], "wc_questions_avg": [ 116.5, 102.29491678475524 ], "wc_limitations_avg": [ 15.75, 22.75274708689041 ], "wc_review_avg": [ 410.75, 134.8469039318293 ], "wc_reply_reviewers_avg": [ 13.75, 14.49784466739798 ], "wc_reply_authors_avg": [ 70.5, 122.10958193360585 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15486254481508953513&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "virginia.edu;mit.edu;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Virginia;Massachusetts Institute of Technology;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.virginia.edu;https://web.mit.edu;https://www.google.com", "aff_unique_abbr": "UVA;MIT;Google", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Charlottesville;;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Model and Feature Diversity for Bayesian Neural Networks in Mutual Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70440", "id": "os2BdbiGwX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ff521f7570d6ed23217ba5780753a1f7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=os2BdbiGwX", "openreview": "https://openreview.net/forum?id=os2BdbiGwX", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70440", "video": "https://nips.cc/virtual/2023/poster/70440", "author_site": "Van Cuong Pham, Cuong C Nguyen, Cuong Nguyen, Trung Le, Dinh Phung, Gustavo Carneiro, Thanh-Toan Do", "tldr": "", "abstract": "Bayesian Neural Networks (BNNs) offer probability distributions for model parameters, enabling uncertainty quantification in predictions. However, they often underperform compared to deterministic neural networks. Utilizing mutual learning can effectively enhance the performance of peer BNNs. In this paper, we propose a novel approach to improve BNNs performance through deep mutual learning. The proposed approaches aim to increase diversity in both network parameter distributions and feature distributions, promoting peer networks to acquire distinct features that capture different characteristics of the input, which enhances the effectiveness of mutual learning. Experimental results demonstrate significant improvements in the classification accuracy, negative log-likelihood, and expected calibration error when compared to traditional mutual learning for BNNs.", "keywords": "Bayesian Neural Networks;Deep Mutual Learning", "primary_area": "", "supplementary_material": "/attachment/87672fee9e9e1e1d22c8260e729c725784a6214d.pdf", "author": "Cuong Pham;Cuong C. Nguyen;Trung Le;Dinh Phung;Gustavo Carneiro;Thanh-Toan Do", "authorids": "~Cuong_Pham3;~Cuong_C._Nguyen1;~Trung_Le2;~Dinh_Phung2;~Gustavo_Carneiro1;~Thanh-Toan_Do4", "gender": ";;M;;M;", "homepage": ";;;;https://cs.adelaide.edu.au/~carneiro/;", "dblp": ";;;;53/3609;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.au/citations?user=E0TtOWAAAAAJ;", "orcid": ";;;;0000-0002-5571-6220;", "linkedin": ";;;;gustavo-carneiro-3578812/;", "or_profile": "~Cuong_Pham3;~Cuong_C._Nguyen1;~Trung_Le2;~Dinh_Phung2;~Gustavo_Carneiro1;~Thanh-Toan_Do4", "aff": ";;Monash University;;University of Surrey;", "aff_domain": ";;monash.edu;;surrey.ac.uk;", "position": ";;Assistant Professor;;Full Professor;", "bibtex": "@inproceedings{\npham2023model,\ntitle={Model and Feature Diversity for Bayesian Neural Networks in Mutual Learning},\nauthor={Cuong Pham and Cuong C. Nguyen and Trung Le and Dinh Phung and Gustavo Carneiro and Thanh-Toan Do},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=os2BdbiGwX}\n}", "github": "", "project": "", "reviewers": "tB5V;5v8y;phoh;xPiJ;fyNb", "pdf_size": 286683, "rating": "4;5;5;5;6", "confidence": "4;2;3;4;3", "soundness": "2;3;2;3;3", "novelty": "2;2;2;3;2", "presentation": "3;3;3;3;3", "wc_summary": "253;117;66;62;29", "wc_strengths": "42;41;127;67;44", "wc_weaknesses": "62;181;151;168;34", "wc_questions": "242;241;10;27;30", "wc_limitations": "4;5;14;7;1", "wc_review": "603;585;368;331;138", "wc_reply_reviewers": "0;12;30;19;24", "wc_reply_authors": "107;26;27;73;27", "reply_reviewers": "0;1;1;1;2", "reply_authors": "4;2;2;3;2", "rating_avg": [ 5.0, 0.6324555320336759 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.4, 78.98506187881353 ], "wc_strengths_avg": [ 64.2, 32.83534680797509 ], "wc_weaknesses_avg": [ 119.2, 59.56979100181567 ], "wc_questions_avg": [ 110.0, 107.58624447391033 ], "wc_limitations_avg": [ 6.2, 4.354308211415448 ], "wc_review_avg": [ 405.0, 173.05374887589116 ], "wc_reply_reviewers_avg": [ 17.0, 10.353743284435827 ], "wc_reply_authors_avg": [ 52.0, 32.83900120283807 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12765554235881168687&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": ";;monash.edu;;surrey.ac.uk;", "author_num": 6, "aff_unique_index": "0;1", "aff_unique_norm": "Monash University;University of Surrey", "aff_unique_dep": ";", "aff_unique_url": "https://www.monash.edu;https://www.surrey.ac.uk", "aff_unique_abbr": "Monash;Surrey", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Australia;United Kingdom" }, { "title": "Linear Time Algorithms for k-means with Multi-Swap Local Search", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70439", "id": "oss2jXD1Zs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8eec8d7bcecf034304174e6b57dbc19a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oss2jXD1Zs", "openreview": "https://openreview.net/forum?id=oss2jXD1Zs", "poster": "/media/PosterPDFs/NeurIPS%202023/70439.png?t=1701390487.1013162", "slides": "https://nips.cc/virtual/2023/poster/70439", "video": "https://nips.cc/virtual/2023/poster/70439", "author_site": "Junyu Huang, Qilong Feng, Ziyun Huang, Jinhui Xu, Jianxin Wang", "tldr": "", "abstract": "The local search methods have been widely used to solve the clustering problems. In practice, local search algorithms for clustering problems mainly adapt the single-swap strategy, which enables them to handle large-scale datasets and achieve linear running time in the data size. However, compared with multi-swap local search algorithms, there is a considerable gap on the approximation ratios of the single-swap local search algorithms. Although the current multi-swap local search algorithms provide small constant approximation, the proposed algorithms tend to have large polynomial running time, which cannot be used to handle large-scale datasets. In this paper, we propose a multi-swap local search algorithm for the $k$-means problem with linear running time in the data size. Given a swap size $t$, our proposed algorithm can achieve a $(50(1+\\frac{1}{t})+\\epsilon)$-approximation, which improves the current best result 509 (ICML 2019) with linear running time in the data size. Our proposed method, compared with previous multi-swap local search algorithms, is the first one to achieve linear running time in the data size. To obtain a more practical algorithm for the problem with better clustering quality and running time, we propose a sampling-based method which accelerates the process of clustering cost update during swaps. Besides, a recombination mechanism is proposed to find potentially better solutions. Empirical experiments show that our proposed algorithms achieve better performances compared with branch and bound solver (NeurIPS 2022) and other existing state-of-the-art local search algorithms on both small and large datasets.", "keywords": "Approximation Algorithms;k-means Clustering;Local Search", "primary_area": "", "supplementary_material": "/attachment/5fb5da2e65ce6db82ee57cb892c6af4ccf815da1.zip", "author": "Junyu Huang;Qilong Feng;Ziyun Huang;Jinhui Xu;Jianxin Wang", "authorids": "~Junyu_Huang1;~Qilong_Feng1;~Ziyun_Huang1;~Jinhui_Xu1;~Jianxin_Wang1", "gender": "M;M;M;M;", "homepage": ";;;https://www.cse.buffalo.edu/~jinhui/;https://faculty.csu.edu.cn/wangjianxin1/zh_CN/index/106082/list/", "dblp": "277/9525;75/6154;;24/6437-1.html;75/2669-1.html", "google_scholar": ";;1MPrmtEAAAAJ;https://scholar.google.com/citations?hl=en;7pgY2F0AAAAJ", "orcid": ";;;;0000-0003-1516-0480", "linkedin": ";;;;", "or_profile": "~Junyu_Huang1;~Qilong_Feng1;~Ziyun_Huang1;~Jinhui_Xu1;~Jianxin_Wang1", "aff": "Central South University;Central South University, China;Pennsylvania State University, Erie;University at Buffalo, State University of New York;Central South University", "aff_domain": "csu.edu.cn;csu.edu.cn;psu.edu;buffalo.edu;csu.edu.cn", "position": "PhD student;Full Professor;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2023linear,\ntitle={Linear Time Algorithms for k-means with Multi-Swap Local Search},\nauthor={Junyu Huang and Qilong Feng and Ziyun Huang and Jinhui Xu and Jianxin Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oss2jXD1Zs}\n}", "github": "", "project": "", "reviewers": "hjYC;dZuQ;BGQc;bF7j", "pdf_size": 377121, "rating": "6;6;6;7", "confidence": "5;3;4;3", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "2;3;3;4", "wc_summary": "405;412;78;215", "wc_strengths": "123;27;69;55", "wc_weaknesses": "193;91;122;49", "wc_questions": "680;30;246;110", "wc_limitations": "1;3;10;1", "wc_review": "1402;563;525;430", "wc_reply_reviewers": "6;0;0;31", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 277.5, 139.68983499167004 ], "wc_strengths_avg": [ 68.5, 34.910600109422354 ], "wc_weaknesses_avg": [ 113.75, 52.58029573899333 ], "wc_questions_avg": [ 266.5, 250.91183710618358 ], "wc_limitations_avg": [ 3.75, 3.6996621467371855 ], "wc_review_avg": [ 730.0, 390.9916878911878 ], "wc_reply_reviewers_avg": [ 9.25, 12.794041581923986 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13271518383905762875&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "csu.edu.cn;csu.edu.cn;psu.edu;buffalo.edu;csu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Central South University;Pennsylvania State University;University at Buffalo", "aff_unique_dep": ";;", "aff_unique_url": "https://www.csu.edu.cn;https://www.psu.edu;https://www.buffalo.edu", "aff_unique_abbr": "CSU;PSU;UB", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Erie;Buffalo", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "China;United States" }, { "title": "On the Properties of Kullback-Leibler Divergence Between Multivariate Gaussian Distributions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70438", "id": "ouLe91yibj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b5b4d92374323c53c24bbbc8ee0e715c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ouLe91yibj", "openreview": "https://openreview.net/forum?id=ouLe91yibj", "poster": "/media/PosterPDFs/NeurIPS%202023/70438.png?t=1699797461.4327173", "slides": "https://nips.cc/virtual/2023/poster/70438", "video": "https://nips.cc/virtual/2023/poster/70438", "author_site": "Yufeng Zhang, Jialu Pan, Li Ken Li, Wanwei Liu, Zhenbang Chen, Xinwang Liu, J Wang", "tldr": "", "abstract": "Kullback-Leibler (KL) divergence is one of the most important measures to calculate the difference between probability distributions. In this paper, we theoretically study several properties of KL divergence between multivariate Gaussian distributions. Firstly, for any two $n$-dimensional Gaussian distributions $\\mathcal{N}_1$ and $\\mathcal{N}_2$, we prove that when $KL(\\mathcal{N}_2||\\mathcal{N}_1)\\leq \\varepsilon\\ (\\varepsilon>0)$ the supremum of $KL(\\mathcal{N}_1||\\mathcal{N}_2)$ is $(1/2)\\left((-W_{0}(-e^{-(1+2\\varepsilon)}))^{-1}+\\log(-W_{0}(-e^{-(1+2\\varepsilon)})) -1 \\right)$, where $W_0$ is the principal branch of Lambert $W$ function.\tFor small $\\varepsilon$, the supremum is $\\varepsilon + 2\\varepsilon^{1.5} + O(\\varepsilon^2)$. This quantifies the approximate symmetry of small KL divergence between Gaussian distributions. We further derive the infimum of $KL(\\mathcal{N}_1||\\mathcal{N}_2)$ when $KL(\\mathcal{N}_2||\\mathcal{N}_1)\\geq M\\ (M>0)$. We give the conditions when the supremum and infimum can be attained. Secondly, for any three $n$-dimensional Gaussian distributions $\\mathcal{N}_1$, $\\mathcal{N}_2$, and $\\mathcal{N}_3$, we theoretically show that an upper bound of $KL(\\mathcal{N}_1||\\mathcal{N}_3)$ is $3\\varepsilon_1+3\\varepsilon_2+2\\sqrt{\\varepsilon_1\\varepsilon_2}+o(\\varepsilon_1)+o(\\varepsilon_2)$ when $KL(\\mathcal{N}_1||\\mathcal{N}_2)\\leq \\varepsilon_1$ and $KL(\\mathcal{N}_2||\\mathcal{N}_3)\\leq \\varepsilon_2$ ($\\varepsilon_1,\\varepsilon_2\\ge 0$). This reveals that KL divergence between Gaussian distributions follows a relaxed triangle inequality. Note that, all these bounds in the theorems presented in this work are independent of the dimension $n$. Finally, we discuss several applications of our theories in deep learning, reinforcement learning, and sample complexity research.", "keywords": "Kullback-Leibler divergence;statistical divergence;multivariate Gaussian distribution;mathematical optimization;Lambert $W$ function;machine learning;flow-based model;reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/c0f1aca7173eafb0e510573ea86f0979f99e2a4c.pdf", "author": "Yufeng Zhang;Jialu Pan;Kenli Li;Wanwei Liu;Zhenbang Chen;Xinwang Liu;J Wang", "authorids": "~Yufeng_Zhang5;~Jialu_Pan1;~Kenli_Li1;~Wanwei_Liu1;~Zhenbang_Chen2;~Xinwang_Liu1;~J_Wang1", "gender": ";F;M;;M;M;", "homepage": ";;http://csee.hnu.edu.cn/people/likenli;;https://zbchen.github.io/;https://xinwangliu.github.io/;", "dblp": ";;l/KenliLi.html;04/5600;;45/6569-2.html;", "google_scholar": ";;https://scholar.google.com/citations?view_op=list_works;;;A56vWC4AAAAJ;", "orcid": ";0009-0004-1181-8550;0000-0002-2635-7716;;;;", "linkedin": ";;;;;;", "or_profile": "~Yufeng_Zhang5;~Jialu_Pan1;~Kenli_Li1;~Wanwei_Liu1;~Zhenbang_Chen2;~Xinwang_Liu1;~J_Wang1", "aff": ";Hunan University;Hunan University;National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;", "aff_domain": ";hnu.edu.cn;hnu.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;", "position": ";PhD student;Full Professor;Full Professor;Full Professor;Full Professor;", "bibtex": "@inproceedings{\nzhang2023on,\ntitle={On the Properties of Kullback-Leibler Divergence Between Multivariate Gaussian Distributions},\nauthor={Yufeng Zhang and Jialu Pan and Kenli Li and Wanwei Liu and Zhenbang Chen and Xinwang Liu and J Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ouLe91yibj}\n}", "github": "", "project": "", "reviewers": "8dDm;ZfdJ;odZe;rMwu;azx2", "pdf_size": 931059, "rating": "6;6;7;7;7", "confidence": "3;3;4;3;4", "soundness": "3;3;3;3;4", "novelty": "3;2;3;3;4", "presentation": "3;2;3;3;4", "wc_summary": "103;131;172;238;153", "wc_strengths": "60;34;57;44;54", "wc_weaknesses": "53;95;45;1;14", "wc_questions": "28;44;2;1;32", "wc_limitations": "2;10;8;1;20", "wc_review": "246;314;284;285;273", "wc_reply_reviewers": "11;17;34;0;9", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 159.4, 45.53064901799666 ], "wc_strengths_avg": [ 49.8, 9.558242516278817 ], "wc_weaknesses_avg": [ 41.6, 32.87308929808697 ], "wc_questions_avg": [ 21.4, 17.083325203250098 ], "wc_limitations_avg": [ 8.2, 6.823488843692792 ], "wc_review_avg": [ 280.4, 21.914378841299612 ], "wc_reply_reviewers_avg": [ 14.2, 11.303096920755832 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6666666666666665, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15587144903224758000&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";hnu.edu.cn;hnu.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;", "author_num": 7, "aff_unique_index": "0;0;1;1;1", "aff_unique_norm": "Hunan University;National University of Defense Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.hunu.edu.cn/;http://www.nudt.edu.cn/", "aff_unique_abbr": "HNU;NUDT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MIMONets: Multiple-Input-Multiple-Output Neural Networks Exploiting Computation in Superposition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70437", "id": "ox7aynitoW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7c7a12559be4501f70d221352514397c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ox7aynitoW", "openreview": "https://openreview.net/forum?id=ox7aynitoW", "poster": "/media/PosterPDFs/NeurIPS%202023/70437.png?t=1699694088.1849563", "slides": "https://nips.cc/virtual/2023/poster/70437", "video": "https://nips.cc/virtual/2023/poster/70437", "author_site": "Nicolas Menet, Michael Hersche, Geethan Karunaratne, Luca Benini, Abu Sebastian, Abbas Rahimi", "tldr": "", "abstract": "With the advent of deep learning, progressively larger neural networks have been designed to solve complex tasks. We take advantage of these capacity-rich models to lower the cost of inference by exploiting computation in superposition. To reduce the computational burden per input, we propose Multiple-Input-Multiple-Output Neural Networks (MIMONets) capable of handling many inputs at once. MIMONets augment various deep neural network architectures with variable binding mechanisms to represent an arbitrary number of inputs in a compositional data structure via fixed-width distributed representations. Accordingly, MIMONets adapt nonlinear neural transformations to process the data structure holistically, leading to a speedup nearly proportional to the number of superposed input items in the data structure. After processing in superposition, an unbinding mechanism recovers each transformed input of interest. MIMONets also provide a dynamic trade-off between accuracy and throughput by an instantaneous on-demand switching between a set of accuracy-throughput operating points, yet within a single set of fixed parameters. We apply the concept of MIMONets to both CNN and Transformer architectures resulting in MIMOConv and MIMOFormer, respectively. Empirical evaluations show that MIMOConv achieves $\\approx 2$\u2013$4\\times$ speedup at an accuracy delta within [+0.68, -3.18]% compared to WideResNet CNNs on CIFAR10 and CIFAR100. \nSimilarly, MIMOFormer can handle $2$\u2013$4$ inputs at once while maintaining a high average accuracy within a [-1.07, -3.43]% delta on the long range arena benchmark. \nFinally, we provide mathematical bounds on the interference between superposition channels in MIMOFormer. Our code is available at https://github.com/IBM/multiple-input-multiple-output-nets.", "keywords": "Computation in superposition;Vector-symbolic architectures;Convolutional neural networks;Transformers", "primary_area": "", "supplementary_material": "/attachment/bf3b32b743bf02853508523f2c209fa22da5acc6.zip", "author": "Nicolas Menet;Michael Hersche;Geethan Karunaratne;Luca Benini;Abu Sebastian;Abbas Rahimi", "authorids": "menetn@student.ethz.ch;~Michael_Hersche1;~Geethan_Karunaratne1;~Luca_Benini2;~Abu_Sebastian1;~Abbas_Rahimi1", "gender": ";M;M;M;M;M", "homepage": ";https://research.ibm.com/people/michael-hersche--1;https://researcher.watson.ibm.com/researcher/view.php?person=zurich-KAR;https://ee.ethz.ch/the-department/people-a-z/person-detail.luca-benini.html;https://researcher.watson.ibm.com/researcher/view.php?person=zurich-ASE;https://research.ibm.com/people/abbas-rahimi", "dblp": ";231/2484;174/2075;b/LucaBenini.html;;48/9350", "google_scholar": ";uhC6m3EAAAAJ;CrCeq1QAAAAJ;8riq3sYAAAAJ;https://scholar.google.ch/citations?user=sUCQ7KMAAAAJ;yx0pEmYAAAAJ", "orcid": ";0000-0003-3065-7639;0000-0002-0805-4789;0000-0001-8068-3806;;0000-0003-3141-4970", "linkedin": ";;https://linkedin.com/in/geethan09;lubenini/;;", "or_profile": "menetn@student.ethz.ch;~Michael_Hersche1;~Geethan_Karunaratne1;~Luca_Benini2;~Abu_Sebastian1;~Abbas_Rahimi1", "aff": ";International Business Machines;Swiss Federal Institute of Technology;University of Bologna;;IBM Research - Zurich", "aff_domain": ";ibm.com;ethz.ch;unibo.it;;zurich.ibm.com", "position": ";PhD student;PhD student;Full Professor;;Principal Researcher", "bibtex": "@inproceedings{\nmenet2023mimonets,\ntitle={{MIMON}ets: Multiple-Input-Multiple-Output Neural Networks Exploiting Computation in Superposition},\nauthor={Nicolas Menet and Michael Hersche and Geethan Karunaratne and Luca Benini and Abu Sebastian and Abbas Rahimi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ox7aynitoW}\n}", "github": "", "project": "", "reviewers": "3r5d;zbz3;A964;b2eJ", "pdf_size": 2244296, "rating": "5;6;6;7", "confidence": "2;2;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "66;105;66;61", "wc_strengths": "30;42;20;23", "wc_weaknesses": "74;53;54;523", "wc_questions": "18;43;15;2", "wc_limitations": "2;44;18;11", "wc_review": "190;287;173;620", "wc_reply_reviewers": "15;87;19;169", "wc_reply_authors": "16;256;0;567", "reply_reviewers": "1;1;1;3", "reply_authors": "2;2;1;4", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.5, 17.727097901235837 ], "wc_strengths_avg": [ 28.75, 8.46684711093805 ], "wc_weaknesses_avg": [ 176.0, 200.51558542916308 ], "wc_questions_avg": [ 19.5, 14.84082207965583 ], "wc_limitations_avg": [ 18.75, 15.642490210960657 ], "wc_review_avg": [ 317.5, 179.98124902333575 ], "wc_reply_reviewers_avg": [ 72.5, 62.63186090162099 ], "wc_reply_authors_avg": [ 209.75, 229.83730658881294 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11429985299105797738&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 7, "email": ";ibm.com;ethz.ch;unibo.it;;zurich.ibm.com", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "International Business Machines Corporation;Swiss Federal Institute of Technology;University of Bologna;IBM", "aff_unique_dep": ";;;Research", "aff_unique_url": "https://www.ibm.com;https://www.ethz.ch;https://www.unibo.it;https://www.ibm.com/research", "aff_unique_abbr": "IBM;ETH Zurich;Unibo;IBM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "United States;Switzerland;Italy" }, { "id": "oxj8mx4Sv2", "title": "Deep Discriminative to Kernel Generative Networks for Calibrated Inference", "track": "main", "status": "Reject", "tldr": "", "abstract": "The fight between discriminative versus generative goes deep, in both the study of artificial and natural intelligence. In our view, both camps have complementary values. So, we sought to synergistically combine them. Here, we propose a methodology to convert deep discriminative networks to kernel generative networks. We leveraged the fact that deep models, including both random forests and deep networks, learn internal representations which are unions of polytopes with affine activation functions to conceptualize them both as generalized partitioning rules. We replace the affine function in each polytope populated by the training data with Gaussian kernel that results in a generative model. Theoretically, we derive the conditions under which our generative models are a consistent estimator of the corresponding class conditional density. Moreover, our proposed models obtain well calibrated posteriors for in-distribution, and extrapolate beyond the training data to handle out-of-distribution inputs reasonably. We believe this approach may be an important step in unifying the thinking and the approaches across the discriminative and the generative divide.", "keywords": "Generative models;Posterior Calibration;OOD Calibration;Random Forest;ReLU-net;Polytopes", "primary_area": "", "supplementary_material": "/attachment/521f6180247b00581119c9860d50bddbbcbbe845.zip", "author": "Jayanta Dey;Haoyin Xu;Ashwin De Silva;Will LeVine;Tyler M. Tomita;Ali Geisa;Tiffany Chu;Jacob Maxwell Desman;Joshua T Vogelstein", "authorids": "~Jayanta_Dey1;~Haoyin_Xu1;~Ashwin_De_Silva1;~Will_LeVine1;~Tyler_M._Tomita1;~Ali_Geisa1;tiffanyformosa@gmail.com;~Jacob_Maxwell_Desman1;~Joshua_T_Vogelstein1", "gender": "M;M;M;;M;M;;M;M", "homepage": ";;https://laknath1996.github.io;;https://tyler-tomita.github.io;;;;https://neurodata.io/", "dblp": "236/4496;300/8957;270/4302;;;;;;04/700", "google_scholar": "o0AWbc4AAAAJ;7gDN3c8AAAAJ;xqhwEGIAAAAJ;x0BaVhQAAAAJ;;;;c0Xt7BAAAAAJ;DWPfdT4AAAAJ", "orcid": ";0000-0001-8235-4950;0000-0002-6406-7090;;;;;0000-0002-5411-6637;0000-0003-2487-6237", "linkedin": "jayanta-dey-22431575/;haoyinxu/;ashwin-de-silva-6852b14b/;https://www.linkedin.com/mwlite/in/will-levine-63b986123;;ali-geisa-26256211a;;jacobdesman/;jovo1/", "or_profile": "~Jayanta_Dey1;~Haoyin_Xu1;~Ashwin_De_Silva1;~Will_LeVine1;~Tyler_M._Tomita1;~Ali_Geisa1;tiffanyformosa@gmail.com;~Jacob_Maxwell_Desman1;~Joshua_T_Vogelstein1", "aff": "Johns Hopkins University;Johns Hopkins University;Johns Hopkins University;;Johns Hopkins University;Johns Hopkins University;;Icahn School of Medicine at Mount Sinai;Johns Hopkins University", "aff_domain": "jhmi.edu;jhu.edu;jhu.edu;;jh.edu;jhu.edu;;icahn.mssm.edu;jhu.edu", "position": "PhD student;Research Assistant;PhD student;;Postdoc;Researcher;;Medical Student;Associate Professor", "bibtex": "@misc{\ndey2023deep,\ntitle={Deep Discriminative to Kernel Generative Networks for Calibrated Inference},\nauthor={Jayanta Dey and Haoyin Xu and Ashwin De Silva and Will LeVine and Tyler M. Tomita and Ali Geisa and Tiffany Chu and Jacob Maxwell Desman and Joshua T Vogelstein},\nyear={2023},\nurl={https://openreview.net/forum?id=oxj8mx4Sv2}\n}", "github": "", "project": "", "reviewers": "Up6z;fq3V;Quey;c3Ag", "site": "https://openreview.net/forum?id=oxj8mx4Sv2", "pdf_size": 4032465, "rating": "4;4;4;4", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "novelty": "3;3;2;2", "presentation": "3;3;2;3", "wc_summary": "172;58;84;26", "wc_strengths": "105;71;85;38", "wc_weaknesses": "123;125;113;323", "wc_questions": "297;76;13;51", "wc_limitations": "14;11;42;41", "wc_review": "711;341;337;479", "wc_reply_reviewers": "294;191;75;64", "wc_reply_authors": "0;730;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 4.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 85.0, 54.26785420486054 ], "wc_strengths_avg": [ 74.75, 24.416951079117148 ], "wc_weaknesses_avg": [ 171.0, 87.87491109526086 ], "wc_questions_avg": [ 109.25, 110.69411682650528 ], "wc_limitations_avg": [ 27.0, 14.543039572248986 ], "wc_review_avg": [ 467.0, 152.0328911781921 ], "wc_reply_reviewers_avg": [ 156.0, 93.93348710656919 ], "wc_reply_authors_avg": [ 182.5, 316.09927238132013 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:A7zzZ2_fkEAJ:scholar.google.com/&scioq=Deep+Discriminative+to+Kernel+Generative+Networks+for+Calibrated+Inference&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Johns Hopkins University;Icahn School of Medicine at Mount Sinai", "aff_unique_dep": ";School of Medicine", "aff_unique_url": "https://www.jhu.edu;https://icahn.mssm.edu", "aff_unique_abbr": "JHU;ISMMS", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Language-based Action Concept Spaces Improve Video Self-Supervised Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70436", "id": "oyFyOPZUCs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ed67dff7cb96e7e86c4d91c0d5db49bb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oyFyOPZUCs", "openreview": "https://openreview.net/forum?id=oyFyOPZUCs", "poster": "/media/PosterPDFs/NeurIPS%202023/70436.png?t=1701721643.6580348", "slides": "https://nips.cc/virtual/2023/poster/70436", "video": "https://nips.cc/virtual/2023/poster/70436", "author_site": "Kanchana Ranasinghe, Michael S Ryoo", "tldr": "", "abstract": "Recent contrastive language image pre-training has led to learning highly transferable and robust image representations. However, adapting these models to video domain with minimal supervision remains an open problem. We explore a simple step in that direction, using language tied self-supervised learning to adapt an image CLIP model to the video domain. A backbone modified for temporal modeling is trained under self-distillation settings with train objectives operating in an action concept space. Feature vectors of various action concepts extracted from a language encoder using relevant textual prompts construct this space. A large language model aware of actions and their attributes generates the relevant textual prompts.\nWe introduce two train objectives, concept distillation and concept alignment, that retain generality of original representations while enforcing relations between actions and their attributes. Our approach improves zero-shot and linear probing performance on three action recognition benchmarks.", "keywords": "self-supervised learning for videos;zero-shot action recognition", "primary_area": "", "supplementary_material": "", "author": "Kanchana Ranasinghe;Michael S Ryoo", "authorids": "~Kanchana_Ranasinghe1;~Michael_S_Ryoo1", "gender": "M;M", "homepage": "http://michaelryoo.com/;http://kahnchana.github.io/", "dblp": "r/MichaelSRyoo;211/4048", "google_scholar": "vcw0TJIAAAAJ;K2WBZTwAAAAJ", "orcid": ";0000-0003-2374-7804", "linkedin": ";", "or_profile": "~Michael_S_Ryoo1;~Kanchana_Nisal_Ranasinghe1", "aff": "Google DeepMind;Meta AI", "aff_domain": "google.com;facebook.com", "position": "Research Scientist;Intern", "bibtex": "@inproceedings{\nranasinghe2023languagebased,\ntitle={Language-based Action Concept Spaces Improve Video Self-Supervised Learning},\nauthor={Kanchana Ranasinghe and Michael S Ryoo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oyFyOPZUCs}\n}", "github": "", "project": "", "reviewers": "vhEc;mCow;WHFS;aWeV;Cb1q", "pdf_size": 468709, "rating": "5;5;5;6;6", "confidence": "4;4;4;4;4", "soundness": "3;3;2;3;3", "novelty": "2;2;2;3;3", "presentation": "3;3;3;2;3", "wc_summary": "51;54;98;52;92", "wc_strengths": "42;50;74;41;74", "wc_weaknesses": "106;126;167;113;131", "wc_questions": "7;63;67;21;35", "wc_limitations": "4;6;15;25;14", "wc_review": "210;299;421;252;346", "wc_reply_reviewers": "255;96;38;48;45", "wc_reply_authors": "717;353;24;23;21", "reply_reviewers": "4;2;1;1;1", "reply_authors": "6;3;2;2;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 69.4, 21.010473578670233 ], "wc_strengths_avg": [ 56.2, 14.864723340849638 ], "wc_weaknesses_avg": [ 128.6, 21.17167919651155 ], "wc_questions_avg": [ 38.6, 23.337523433303716 ], "wc_limitations_avg": [ 12.8, 7.467261881037788 ], "wc_review_avg": [ 305.6, 73.4917682465186 ], "wc_reply_reviewers_avg": [ 96.4, 81.91361303226712 ], "wc_reply_authors_avg": [ 227.6, 276.12866566149916 ], "reply_reviewers_avg": [ 1.8, 1.1661903789690604 ], "reply_authors_avg": [ 3.0, 1.5491933384829668 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6206320190242490265&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com;facebook.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Google;Meta", "aff_unique_dep": "Google DeepMind;Meta AI", "aff_unique_url": "https://deepmind.com;https://meta.com", "aff_unique_abbr": "DeepMind;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Temperature Balancing, Layer-wise Weight Analysis, and Neural Network Training", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70435", "id": "oyV9FslE3j", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c8a4dd7d9e13583d714ce8580da7bbc7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=oyV9FslE3j", "openreview": "https://openreview.net/forum?id=oyV9FslE3j", "poster": "/media/PosterPDFs/NeurIPS%202023/70435.png?t=1701658723.0549757", "slides": "https://nips.cc/virtual/2023/poster/70435", "video": "https://nips.cc/virtual/2023/poster/70435", "author_site": "Yefan Zhou, TIANYU PANG, Keqin Liu, charles martin, Michael Mahoney, Yaoqing Yang", "tldr": "", "abstract": "Regularization in modern machine learning is crucial, and it can take various forms in algorithmic design: training set, model family, error function, regularization terms, and optimizations. \nIn particular, the learning rate, which can be interpreted as a temperature-like parameter within the statistical mechanics of learning, plays a crucial role in neural network training. \nIndeed, many widely adopted training strategies basically just define the decay of the learning rate over time. \nThis process can be interpreted as decreasing a temperature, using either a global learning rate (for the entire model) or a learning rate that varies for each parameter. \nThis paper proposes TempBalance, a straightforward yet effective layer-wise learning rate method. TempBalance is based on Heavy-Tailed Self-Regularization (HT-SR) Theory, an approach which characterizes the implicit self-regularization of different layers in trained models. \nWe demonstrate the efficacy of using HT-SR-motivated metrics to guide the scheduling and balancing of temperature across all network layers during model training, resulting in improved performance during testing. \nWe implement TempBalance on CIFAR10, CIFAR100, SVHN, and TinyImageNet datasets using ResNets, VGGs and WideResNets with various depths and widths. \nOur results show that TempBalance significantly outperforms ordinary SGD and carefully-tuned spectral norm regularization. \nWe also show that TempBalance outperforms a number of state-of-the-art optimizers and learning rate schedulers.", "keywords": "Heavy-tail self-regularization;learning rate schedule", "primary_area": "", "supplementary_material": "/attachment/ff00e503c95378736143ea1656bbaaf77cc0d133.zip", "author": "Yefan Zhou;Tianyu Pang;Keqin Liu;charles h martin;Michael W. Mahoney;Yaoqing Yang", "authorids": "~Yefan_Zhou1;~Tianyu_Pang2;~Keqin_Liu1;~charles_h_martin1;~Michael_W._Mahoney1;~Yaoqing_Yang1", "gender": "M;M;M;M;;M", "homepage": "https://yefanzhou.github.io/;https://tdcsz327.github.io;https://scholar.xjtlu.edu.cn/en/persons/kevin-liu;https://calculationconsulting.com/;;https://sites.google.com/site/yangyaoqingcmu/", "dblp": "237/4333;;66/3620;207/8239.html;;04/4176", "google_scholar": "TAeVaicAAAAJ;https://scholar.google.com/citations?hl=en;VXtqaQ4AAAAJ;;;LYvugWgAAAAJ", "orcid": ";0009-0006-3728-1389;0000-0002-4783-346X;;;0000-0001-9908-5531", "linkedin": "yefan-zhou/;;;charlesmartin14/;;", "or_profile": "~Yefan_Zhou1;~Tianyu_Pang2;~Keqin_Liu1;~charles_h_martin1;~Michael_W._Mahoney1;~Yaoqing_Yang1", "aff": "International Computer Science Institute;Nanjing University;Nanjing University;;;Dartmouth College", "aff_domain": "icsi.berkeley.edu;nju.edu.cn;nju.edu.cn;;;dartmouth.edu", "position": "Researcher;MS student;Lecturer;;;Assistant Professor", "bibtex": "@inproceedings{\nzhou2023temperature,\ntitle={Temperature Balancing, Layer-wise Weight Analysis, and Neural Network Training},\nauthor={Yefan Zhou and Tianyu Pang and Keqin Liu and charles h martin and Michael W. Mahoney and Yaoqing Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=oyV9FslE3j}\n}", "github": "", "project": "", "reviewers": "MZpk;Gitq;QLkn;zFJE", "pdf_size": 8220058, "rating": "6;7;7;9", "confidence": "2;4;2;3", "soundness": "3;2;3;4", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "89;104;109;149", "wc_strengths": "56;102;101;154", "wc_weaknesses": "154;59;452;58", "wc_questions": "29;117;123;219", "wc_limitations": "6;15;99;8", "wc_review": "334;397;884;588", "wc_reply_reviewers": "15;0;181;15", "wc_reply_authors": "20;0;70;20", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 7.25, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 112.75, 22.18529918662356 ], "wc_strengths_avg": [ 103.25, 34.694199803425356 ], "wc_weaknesses_avg": [ 180.75, 161.3867637075606 ], "wc_questions_avg": [ 122.0, 67.23838189605696 ], "wc_limitations_avg": [ 32.0, 38.82653731663435 ], "wc_review_avg": [ 550.75, 213.9291646784047 ], "wc_reply_reviewers_avg": [ 52.75, 74.29796430589468 ], "wc_reply_authors_avg": [ 27.5, 25.8602010819715 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3458572319330373, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15292835631478108726&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "icsi.berkeley.edu;nju.edu.cn;nju.edu.cn;;;dartmouth.edu", "author_num": 6, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "International Computer Science Institute;Nanjing University;Dartmouth College", "aff_unique_dep": ";;", "aff_unique_url": "https://www.icsi.berkeley.edu/;https://www.nju.edu.cn;https://www.dartmouth.edu", "aff_unique_abbr": "ICSI;Nanjing U;Dartmouth", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;China" }, { "title": "SynMob: Creating High-Fidelity Synthetic GPS Trajectory Dataset for Urban Mobility Analysis", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73469", "id": "oz4AGs0phP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4786c0d1b9687a841bc579b0b8b01b8e-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=oz4AGs0phP", "openreview": "https://openreview.net/forum?id=oz4AGs0phP", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73469", "video": "https://nips.cc/virtual/2023/poster/73469", "author_site": "Yuanshao Zhu, Yongchao Ye, Ying Wu, Xiangyu Zhao, James Yu", "tldr": "", "abstract": "Urban mobility analysis has been extensively studied in the past decade using a vast amount of GPS trajectory data, which reveals hidden patterns in movement and human activity within urban landscapes. Despite its significant value, the availability of such datasets often faces limitations due to privacy concerns, proprietary barriers, and quality inconsistencies. To address these challenges, this paper presents a synthetic trajectory dataset with high fidelity, offering a general solution to these data accessibility issues. Specifically, the proposed dataset adopts a diffusion model as its synthesizer, with the primary aim of accurately emulating the spatial-temporal behavior of the original trajectory data. These synthesized data can retain the geo-distribution and statistical properties characteristic of real-world datasets. Through rigorous analysis and case studies, we validate the high similarity and utility between the proposed synthetic trajectory dataset and real-world counterparts. Such validation underscores the practicality of synthetic datasets for urban mobility analysis and advocates for its wider acceptance within the research community. Finally, we publicly release the trajectory synthesizer and datasets, aiming to enhance the quality and availability of synthetic trajectory datasets and encourage continued contributions to this rapidly evolving field. The dataset is released for public online availability https://github.com/Applied-Machine-Learning-Lab/SynMob.", "keywords": "Synthetic dataset;urban mobility analysis;spatial-temporal data mining", "primary_area": "", "supplementary_material": "/attachment/4a1bf236575f9fad27eb6f36b2f1e2ea62005d7e.pdf", "author": "Yuanshao Zhu;Yongchao Ye;Ying Wu;Xiangyu Zhao;James Yu", "authorids": "~Yuanshao_Zhu1;~Yongchao_Ye1;~Ying_Wu9;~Xiangyu_Zhao1;~James_Yu1", "gender": ";M;F;M;", "homepage": ";;;https://zhaoxyai.github.io/;", "dblp": ";259/1930;;08/890-1.html;", "google_scholar": ";u6IHWCkAAAAJ;wlRWmIcAAAAJ;;", "orcid": ";0000-0001-9782-218X;;0000-0003-2926-4416;", "linkedin": ";;;;", "or_profile": "~Yuanshao_Zhu1;~Yongchao_Ye1;~Ying_Wu9;~Xiangyu_Zhao1;~James_Yu1", "aff": ";Southern University of Science and Technology;University of Leeds;City University of Hong Kong;", "aff_domain": ";sustech.edu.cn;leeds.ac.uk;cityu.edu.hk;", "position": ";MS student;PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nzhu2023synmob,\ntitle={SynMob: Creating High-Fidelity Synthetic {GPS} Trajectory Dataset for Urban Mobility Analysis},\nauthor={Yuanshao Zhu and Yongchao Ye and Ying Wu and Xiangyu Zhao and James Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=oz4AGs0phP}\n}", "github": "", "project": "", "reviewers": "jX7K;ra5b;ihi1;ot2s;MCwX", "pdf_size": 6630700, "rating": "5;6;6;7;7", "confidence": "3;3;3;2;3", "wc_summary_and_contributions": "70;83;68;74;28", "wc_strengths": "29;26;106;88;26", "wc_improvement": "71;95;61;33;214", "wc_limitations": "9;47;16;21;8", "wc_correctness": "8;23;1;49;4", "wc_clarity": "1;1;1;13;8", "wc_relation_to_prior_work": "6;1;1;15;21", "wc_documentation": "23;18;11;45;36", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "218;295;266;339;346", "wc_reply_reviewers": "0;50;0;51;0", "wc_reply_authors": "709;1201;654;394;578", "reply_reviewers": "0;2;0;1;0", "reply_authors": "1;2;1;2;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 64.6, 19.01157542130583 ], "wc_strengths_avg": [ 55.0, 34.77930419085465 ], "wc_improvement_avg": [ 94.8, 62.828019227093264 ], "wc_limitations_avg": [ 20.2, 14.218298069740976 ], "wc_correctness_avg": [ 17.0, 17.69745744450315 ], "wc_clarity_avg": [ 4.8, 4.915282290977803 ], "wc_relation_to_prior_work_avg": [ 8.8, 7.95989949685296 ], "wc_documentation_avg": [ 26.6, 12.306096050332128 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 292.8, 47.50326304581613 ], "wc_reply_reviewers_avg": [ 20.2, 24.741867350707384 ], "wc_reply_authors_avg": [ 707.2, 268.87127031350894 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5345224838248487, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18311016548287054407&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 6, "email": ";sustech.edu.cn;leeds.ac.uk;cityu.edu.hk;", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Southern University of Science and Technology;University of Leeds;City University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sustech.edu.cn;https://www.leeds.ac.uk;https://www.cityu.edu.hk", "aff_unique_abbr": "SUSTech;Leeds;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "FlowPG: Action-constrained Policy Gradient with Normalizing Flows", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70434", "id": "p1gzxzJ4Y5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3fd9fe8ec6d7238bf71784797399bb61-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=p1gzxzJ4Y5", "openreview": "https://openreview.net/forum?id=p1gzxzJ4Y5", "poster": "/media/PosterPDFs/NeurIPS%202023/70434.png?t=1702113114.7174165", "slides": "https://nips.cc/virtual/2023/poster/70434", "video": "https://nips.cc/virtual/2023/poster/70434", "author_site": "Janaka Brahmanage, Jiajing LING, Akshat Kumar", "tldr": "", "abstract": "Action-constrained reinforcement learning (ACRL) is a popular approach for solving safety-critical and resource-allocation related decision making problems. A major challenge in ACRL is to ensure agent taking a valid action satisfying constraints in each RL step. Commonly used approach of using a projection layer on top of the policy network requires solving an optimization program which can result in longer training time, slow convergence, and zero gradient problem. To address this, first we use a normalizing flow model to learn an invertible, differentiable mapping between the feasible action space and the support of a simple distribution on a latent variable, such as Gaussian. Second, learning the flow model requires sampling from the feasible action space, which is also challenging. We develop multiple methods, based on Hamiltonian Monte-Carlo and probabilistic sentential decision diagrams for such action sampling for convex and non-convex constraints. Third, we integrate the learned normalizing flow with the DDPG algorithm. By design, a well-trained normalizing flow will transform policy output into a valid action without requiring an optimization solver. Empirically, our approach results in significantly fewer constraint violations (upto an order-of-magnitude for several instances) and is multiple times faster on a variety of continuous control tasks.", "keywords": "action-constrained reinforcement learning;decision making", "primary_area": "", "supplementary_material": "", "author": "Janaka Chathuranga Brahmanage;Jiajing Ling;Akshat Kumar", "authorids": "~Janaka_Chathuranga_Brahmanage1;jjling.2018@smu.edu.sg;~Akshat_Kumar2", "gender": "M;;M", "homepage": ";;http://www.smu.edu.sg/faculty/profile/102291/Akshat-KUMAR", "dblp": ";;73/193", "google_scholar": ";;https://scholar.google.com.tw/citations?user=zsYC3R0AAAAJ", "orcid": "0000-0002-7267-6136;;", "linkedin": ";;", "or_profile": "~Janaka_Chathuranga_Brahmanage1;jjling.2018@smu.edu.sg;~Akshat_Kumar2", "aff": "Singapore Management University;;Singapore Management University", "aff_domain": "smu.edu.sg;;smu.edu.sg", "position": "PhD student;;Associate Professor", "bibtex": "@inproceedings{\nbrahmanage2023flowpg,\ntitle={Flow{PG}: Action-constrained Policy Gradient with Normalizing Flows},\nauthor={Janaka Chathuranga Brahmanage and Jiajing Ling and Akshat Kumar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=p1gzxzJ4Y5}\n}", "github": "", "project": "", "reviewers": "JVCX;RnL4;NbGB;Vb4Z", "pdf_size": 2688863, "rating": "5;5;5;6", "confidence": "3;2;3;4", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;3;3;3", "wc_summary": "50;29;48;264", "wc_strengths": "63;40;8;189", "wc_weaknesses": "146;58;23;357", "wc_questions": "182;22;117;157", "wc_limitations": "4;5;2;100", "wc_review": "445;154;198;1067", "wc_reply_reviewers": "79;5;11;152", "wc_reply_authors": "445;30;21;210", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 97.75, 96.3337298146397 ], "wc_strengths_avg": [ 75.0, 68.65493427278189 ], "wc_weaknesses_avg": [ 146.0, 129.80177194476198 ], "wc_questions_avg": [ 119.5, 60.878978309429606 ], "wc_limitations_avg": [ 27.75, 41.72753886823425 ], "wc_review_avg": [ 466.0, 364.2835434108985 ], "wc_reply_reviewers_avg": [ 61.75, 59.66311674728366 ], "wc_reply_authors_avg": [ 176.5, 172.37821788149452 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18277863460830033317&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "smu.edu.sg;;smu.edu.sg", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Singapore Management University", "aff_unique_dep": "", "aff_unique_url": "https://www.smu.edu.sg", "aff_unique_abbr": "SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Principle-Driven Self-Alignment of Language Models from Scratch with Minimal Human Supervision", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70433", "id": "p40XRfBX96", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0764db1151b936aca59249e2c1386101-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=p40XRfBX96", "openreview": "https://openreview.net/forum?id=p40XRfBX96", "poster": "/media/PosterPDFs/NeurIPS%202023/70433.png?t=1701988518.0359366", "slides": "https://nips.cc/virtual/2023/poster/70433", "video": "https://nips.cc/virtual/2023/poster/70433", "author_site": "Zhiqing Sun, Yikang Shen, Qinhong Zhou, Hongxin Zhang, Zhenfang Chen, David Cox, Yiming Yang, Chuang Gan", "tldr": "", "abstract": "Recent AI-assistant agents, such as ChatGPT, predominantly rely on supervised fine-tuning (SFT) with human annotations and reinforcement learning from human feedback (RLHF) to align the output of large language models (LLMs) with human intentions, ensuring they are helpful, ethical, and reliable. However, this dependence can significantly constrain the true potential of AI-assistant agents due to the high cost of obtaining human supervision and the related issues on quality, reliability, diversity, self-consistency, and undesirable biases. To address these challenges, we propose a novel approach called SELF-ALIGN, which combines principle-driven reasoning and the generative power of LLMs for the self-alignment of AI agents with minimal human supervision. Our approach encompasses four stages: first, we use an LLM to generate synthetic prompts, and a topic-guided method to augment the prompt diversity; second, we use a small set of human-written principles for AI models to follow, and guide the LLM through in-context learning from demonstrations (of principles application) to produce helpful, ethical, and reliable responses to user's queries; third, we fine-tune the original LLM with the high-quality self-aligned responses so that the resulting model can generate desirable responses for each query directly without the principle set and the demonstrations anymore; and finally, we offer a refinement step to address the issues of overly-brief or indirect responses. Applying SELF-ALIGN to the LLaMA-65b base language model, we develop an AI assistant named Dromedary. With fewer than 300 lines of human annotations (including < 200 seed prompts, 16 generic principles, and 5 exemplars for in-context learning). Dromedary significantly surpasses the performance of several state-of-the-art AI systems, including Text-Davinci-003 and Alpaca, on benchmark datasets with various settings.", "keywords": "AI Alignment;Large Language Models;In Context Learning;Neural Symbolics", "primary_area": "", "supplementary_material": "", "author": "Zhiqing Sun;Yikang Shen;Qinhong Zhou;Hongxin Zhang;Zhenfang Chen;David Daniel Cox;Yiming Yang;Chuang Gan", "authorids": "~Zhiqing_Sun1;~Yikang_Shen1;~Qinhong_Zhou1;~Hongxin_Zhang1;~Zhenfang_Chen1;~David_Daniel_Cox1;~Yiming_Yang1;~Chuang_Gan1", "gender": "M;M;M;M;M;;F;M", "homepage": "https://www.cs.cmu.edu/~zhiqings/;;https://zhouqqhh.github.io/;https://icefoxzhx.github.io/;https://zfchenunique.github.io;;http://www.cs.cmu.edu/~yiming/;http://people.csail.mit.edu/ganchuang/", "dblp": "211/7692;152/8226;337/9618;284/2962-5;207/5321;48/7659;25/1666;139/6993", "google_scholar": "https://scholar.google.com/citations?hl=en;qff5rRYAAAAJ;sQW6Ni4AAAAJ;https://scholar.google.com/citations?hl=en;QSRdIzAAAAAJ;;MlZq4XwAAAAJ;PTeSCbIAAAAJ", "orcid": ";;;0000-0002-6041-2440;;;0000-0001-8322-607X;", "linkedin": "zhiqing-sun-5781b3100/;;https://www.linkedin.cn/incareer/in/%E6%B2%81%E6%B3%93-%E5%91%A8-9500b7127;;\u632f\u65b9-\u9648-512011bb/;;yiming-yang-24100924/;", "or_profile": "~Zhiqing_Sun1;~Yikang_Shen1;~Qinhong_Zhou1;~Hongxin_Zhang1;~Zhenfang_Chen1;~David_Daniel_Cox1;~Yiming_Yang1;~Chuang_Gan1", "aff": "Carnegie Mellon University;International Business Machines;Department of Computer Science and Technology, Tsinghua University;Shanghai Jiaotong University;MIT-IBM Watson AI lab;International Business Machines;School of Computer Science, Carnegie Mellon University;MIT-IBM Watson AI Lab", "aff_domain": "cs.cmu.edu;ibm.com;mails.tsinghua.edu.cn;sjtu.edu.cn;ibm.com;ibm.com;cs.cmu.edu;ibm.com", "position": "PhD student;Researcher;MS student;Undergrad student;Researcher;IBM Director, MIT-IBM Watson AI Lab;Full Professor;PhD student", "bibtex": "@inproceedings{\nsun2023principledriven,\ntitle={Principle-Driven Self-Alignment of Language Models from Scratch with Minimal Human Supervision},\nauthor={Zhiqing Sun and Yikang Shen and Qinhong Zhou and Hongxin Zhang and Zhenfang Chen and David Daniel Cox and Yiming Yang and Chuang Gan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=p40XRfBX96}\n}", "github": "", "project": "", "reviewers": "FPHi;VSmd;PEmx;Yvp3;EoHL", "pdf_size": 1541064, "rating": "6;6;7;7;7", "confidence": "4;4;4;5;4", "soundness": "3;3;3;4;3", "novelty": "2;2;3;4;3", "presentation": "3;3;3;4;3", "wc_summary": "80;64;102;55;48", "wc_strengths": "18;43;84;146;30", "wc_weaknesses": "196;189;91;151;41", "wc_questions": "97;4;97;8;9", "wc_limitations": "1;1;3;87;18", "wc_review": "392;301;377;447;146", "wc_reply_reviewers": "0;0;0;13;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 69.8, 19.332873557751316 ], "wc_strengths_avg": [ 64.2, 46.55491381154088 ], "wc_weaknesses_avg": [ 133.6, 59.422554640473 ], "wc_questions_avg": [ 43.0, 44.122556589572184 ], "wc_limitations_avg": [ 22.0, 33.117970952339455 ], "wc_review_avg": [ 332.6, 104.31222363654223 ], "wc_reply_reviewers_avg": [ 2.6, 5.2 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 356, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8543141659827671365&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cs.cmu.edu;ibm.com;mails.tsinghua.edu.cn;sjtu.edu.cn;ibm.com;ibm.com;cs.cmu.edu;ibm.com", "author_num": 8, "aff_unique_index": "0;1;2;3;4;1;0;4", "aff_unique_norm": "Carnegie Mellon University;International Business Machines Corporation;Tsinghua University;Shanghai Jiao Tong University;Massachusetts Institute of Technology", "aff_unique_dep": ";;Department of Computer Science and Technology;;IBM Watson AI lab", "aff_unique_url": "https://www.cmu.edu;https://www.ibm.com;https://www.tsinghua.edu.cn;https://www.sjtu.edu.cn;https://www.mitibmwatsonailab.org", "aff_unique_abbr": "CMU;IBM;THU;SJTU;MIT-IBM AI Lab", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;1;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "How does GPT-2 compute greater-than?: Interpreting mathematical abilities in a pre-trained language model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70432", "id": "p4PckNQR8k", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/efbba7719cc5172d175240f24be11280-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=p4PckNQR8k", "openreview": "https://openreview.net/forum?id=p4PckNQR8k", "poster": "/media/PosterPDFs/NeurIPS%202023/70432.png?t=1699047396.9160943", "slides": "https://nips.cc/virtual/2023/poster/70432", "video": "https://nips.cc/virtual/2023/poster/70432", "author_site": "Michael Hanna, Ollie Liu, Alexandre Variengien", "tldr": "", "abstract": "Pre-trained language models can be surprisingly adept at tasks they were not explicitly trained on, but how they implement these capabilities is poorly understood. In this paper, we investigate the basic mathematical abilities often acquired by pre-trained language models. Concretely, we use mechanistic interpretability techniques to explain the (limited) mathematical abilities of GPT-2 small. As a case study, we examine its ability to take in sentences such as \"The war lasted from the year 1732 to the year 17\", and predict valid two-digit end years (years > 32). We first identify a circuit, a small subset of GPT-2 small's computational graph that computes this task's output. Then, we explain the role of each circuit component, showing that GPT-2 small's final multi-layer perceptrons boost the probability of end years greater than the start year. Finally, we find related tasks that activate our circuit. Our results suggest that GPT-2 small computes greater-than using a complex but general mechanism that activates across diverse contexts.", "keywords": "interpretability;language models;NLP", "primary_area": "", "supplementary_material": "/attachment/6a9b252fa1174c07353e34cea35d3043f17ea205.zip", "author": "Michael Hanna;Ollie Liu;Alexandre Variengien", "authorids": "~Michael_Hanna1;~Ollie_Liu1;~Alexandre_Variengien1", "gender": "M;M;M", "homepage": "http://hannamw.github.io;https://ollieliu.com;https://avariengien.github.io/", "dblp": "306/9666;;", "google_scholar": "0wOdTeYAAAAJ;https://scholar.google.com/citations?view_op=list_works;BQOwWG8AAAAJ", "orcid": ";;", "linkedin": "michael-hanna-a29279140/;oliu/;", "or_profile": "~Michael_Hanna1;~Ollie_Liu1;~Alexandre_Variengien1", "aff": "University of Amsterdam;University of Southern California;EPFL - EPF Lausanne", "aff_domain": "uva.nl;usc.edu;epfl.ch", "position": "PhD student;PhD;MS student", "bibtex": "@inproceedings{\nhanna2023how,\ntitle={How does {GPT}-2 compute greater-than?: Interpreting mathematical abilities in a pre-trained language model},\nauthor={Michael Hanna and Ollie Liu and Alexandre Variengien},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=p4PckNQR8k}\n}", "github": "", "project": "", "reviewers": "GMC8;WeSY;BwzU;Nf2K;eiM2", "pdf_size": 1621539, "rating": "4;6;6;6;7", "confidence": "5;4;4;3;3", "soundness": "3;4;4;2;3", "novelty": "2;4;3;3;2", "presentation": "1;3;2;4;4", "wc_summary": "136;131;100;82;89", "wc_strengths": "34;15;40;41;53", "wc_weaknesses": "257;28;89;376;226", "wc_questions": "69;48;282;42;208", "wc_limitations": "1;6;2;1;1", "wc_review": "497;228;513;542;577", "wc_reply_reviewers": "0;13;123;81;20", "wc_reply_authors": "33;0;762;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;1;2;1;1", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 1.16619037896906 ], "wc_summary_avg": [ 107.6, 21.969069165533618 ], "wc_strengths_avg": [ 36.6, 12.435433245367852 ], "wc_weaknesses_avg": [ 195.2, 123.84732536474093 ], "wc_questions_avg": [ 129.8, 97.34146084788331 ], "wc_limitations_avg": [ 2.2, 1.9390719429665317 ], "wc_review_avg": [ 471.4, 124.71182782719528 ], "wc_reply_reviewers_avg": [ 47.4, 46.957853443274004 ], "wc_reply_authors_avg": [ 159.0, 301.7707739327982 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8728715609439696, "gs_citation": 182, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9195897995277413274&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uva.nl;usc.edu;epfl.ch", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Amsterdam;University of Southern California;EPFL", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uva.nl;https://www.usc.edu;https://www.epfl.ch", "aff_unique_abbr": "UvA;USC;EPFL", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Los Angeles;Lausanne", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Netherlands;United States;Switzerland" }, { "title": "Riemannian stochastic optimization methods avoid strict saddle points", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70431", "id": "p4SjKPchJy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5e809ba53f34d9170386ebfc8b60300f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=p4SjKPchJy", "openreview": "https://openreview.net/forum?id=p4SjKPchJy", "poster": "/media/PosterPDFs/NeurIPS%202023/70431.png?t=1702269894.1108692", "slides": "https://nips.cc/virtual/2023/poster/70431", "video": "https://nips.cc/virtual/2023/poster/70431", "author_site": "Ya-Ping Hsieh, Mohammad Reza Karimi Jaghargh, Andreas Krause, Panayotis Mertikopoulos", "tldr": "", "abstract": "Many modern machine learning applications - from online principal component analysis to covariance matrix identification and dictionary learning - can be formulated as minimization problems on Riemannian manifolds, typically solved with a Riemannian stochastic gradient method (or some variant thereof). However, in many cases of interest, the resulting minimization problem is _not_ geodesically convex, so the convergence of the chosen solver to a desirable solution - i.e., a local minimizer - is by no means guaranteed. In this paper, we study precisely this question, that is, whether stochastic Riemannian optimization algorithms are guaranteed to avoid saddle points with probability $1$. For generality, we study a family of retraction-based methods which, in addition to having a potentially much lower per-iteration cost relative to Riemannian gradient descent, include other widely used algorithms, such as natural policy gradient methods and mirror descent in ordinary convex spaces. In this general setting, we show that, under mild assumptions for the ambient manifold and the oracle providing gradient information, the policies under study avoid strict saddle points / submanifolds with probability $1$, from any initial condition. This result provides an important sanity check for the use of gradient methods on manifolds as it shows that, almost always, the end state of a stochastic Riemannian algorithm can only be a local minimizer.", "keywords": "Riemannian optimization;saddle points;stochastic approximation", "primary_area": "", "supplementary_material": "/attachment/cb0df49202605216936884549e71d945a722bb97.pdf", "author": "Ya-Ping Hsieh;Mohammad Reza Karimi Jaghargh;Andreas Krause;Panayotis Mertikopoulos", "authorids": "~Ya-Ping_Hsieh1;~Mohammad_Reza_Karimi_Jaghargh1;~Andreas_Krause1;~Panayotis_Mertikopoulos1", "gender": "M;M;M;M", "homepage": ";http://moreka.github.io;https://las.inf.ethz.ch/krausea;http://polaris.imag.fr/panayotis.mertikopoulos/", "dblp": "122/5313;https://dblp.uni-trier.de/pers/hd/k/Karimi:Mohammad_Reza;87/1831-1.html;49/6721", "google_scholar": ";CEZbTgMAAAAJ;https://scholar.google.ch/citations?user=eDHv58AAAAAJ;xsusqPYAAAAJ", "orcid": ";;0000-0001-7260-9673;0000-0003-2026-9616", "linkedin": ";;krausea/;", "or_profile": "~Ya-Ping_Hsieh1;~Mohammad_Reza_Karimi_Jaghargh1;~Andreas_Krause1;~Panayotis_Mertikopoulos1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;Swiss Federal Institute of Technology;ETH Zurich;French National Center for Scientific Research", "aff_domain": "inf.ethz.ch;ethz.ch;ethz.ch;imag.fr", "position": "Postdoc;PhD student;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nhsieh2023riemannian,\ntitle={Riemannian stochastic optimization methods avoid strict saddle points},\nauthor={Ya-Ping Hsieh and Mohammad Reza Karimi Jaghargh and Andreas Krause and Panayotis Mertikopoulos},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=p4SjKPchJy}\n}", "github": "", "project": "", "reviewers": "MfPv;SLQm;5i8d;K9Za", "pdf_size": 5049003, "rating": "6;7;7;8", "confidence": "4;3;4;3", "soundness": "4;4;3;4", "novelty": "4;3;3;3", "presentation": "4;4;3;4", "wc_summary": "49;27;144;86", "wc_strengths": "33;70;164;283", "wc_weaknesses": "312;72;21;263", "wc_questions": "2;47;173;226", "wc_limitations": "1;1;9;14", "wc_review": "397;217;511;872", "wc_reply_reviewers": "0;36;20;18", "wc_reply_authors": "0;9;8;9", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 76.5, 44.30857704779064 ], "wc_strengths_avg": [ 137.5, 96.62944685757029 ], "wc_weaknesses_avg": [ 167.0, 123.06705489285099 ], "wc_questions_avg": [ 112.0, 90.88729284118875 ], "wc_limitations_avg": [ 6.25, 5.539629951540085 ], "wc_review_avg": [ 499.25, 239.37457571763966 ], "wc_reply_reviewers_avg": [ 18.5, 12.757350822173073 ], "wc_reply_authors_avg": [ 6.5, 3.774917217635375 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11675182556158215073&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 13, "email": "inf.ethz.ch;ethz.ch;ethz.ch;imag.fr", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology;French National Center for Scientific Research", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch;https://www.cnrs.fr", "aff_unique_abbr": "ETHZ;ETH Zurich;CNRS", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Switzerland;France" }, { "title": "DIN-SQL: Decomposed In-Context Learning of Text-to-SQL with Self-Correction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70430", "id": "p53QDxSIc5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/72223cc66f63ca1aa59edaec1b3670e6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=p53QDxSIc5", "openreview": "https://openreview.net/forum?id=p53QDxSIc5", "poster": "/media/PosterPDFs/NeurIPS%202023/70430.png?t=1699239429.0143461", "slides": "https://nips.cc/virtual/2023/poster/70430", "video": "https://nips.cc/virtual/2023/poster/70430", "author_site": "Mohammadreza Pourreza, Davood Rafiei", "tldr": "", "abstract": "There is currently a significant gap between the performance of fine-tuned models and prompting approaches using Large Language Models (LLMs) on the challenging task of text-to-SQL, as evaluated on datasets such as Spider. To improve the performance of LLMs in the reasoning process, we study how decomposing the task into smaller sub-tasks can be effective. In particular, we show that breaking down the generation problem into sub-problems and feeding the solutions of those sub-problems into LLMs can be an effective approach for significantly improving their performance. Our experiments with three LLMs show that this approach consistently improves their simple few-shot performance by roughly 10%, pushing the accuracy of LLMs towards SOTA or surpassing it. On the holdout test set of Spider, the SOTA, in terms of execution accuracy, was 79.9 and the new SOTA at the time of this writing using our approach is 85.3. Our approach with in-context learning beats many heavily fine-tuned models by at least 5%. Additionally, when evaluated on the BIRD benchmark, our approach achieved an execution accuracy of 55.9%, setting a new SOTA on its holdout test set.", "keywords": "In-Context Learning;Text-to-SQL;Task Decomposition;Spider Challenge;Natural Language Interfaces to Databases", "primary_area": "", "supplementary_material": "/attachment/5120151dc1707ec5c4dc67728c3a79624a510783.pdf", "author": "Mohammadreza Pourreza;Davood Rafiei", "authorids": "pourreza@ualberta.ca;~Davood_Rafiei2", "gender": ";M", "homepage": ";https://webdocs.cs.ualberta.ca/~drafiei/", "dblp": ";r/DRafiei", "google_scholar": ";https://scholar.google.com.tw/citations?user=lNxSDIwAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "pourreza@ualberta.ca;~Davood_Rafiei2", "aff": ";University of Alberta", "aff_domain": ";ualberta.ca", "position": ";Full Professor", "bibtex": "@inproceedings{\npourreza2023dinsql,\ntitle={{DIN}-{SQL}: Decomposed In-Context Learning of Text-to-{SQL} with Self-Correction},\nauthor={Mohammadreza Pourreza and Davood Rafiei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=p53QDxSIc5}\n}", "github": "", "project": "", "reviewers": "XH1V;VYLB;Qajd;DUDq;kanX", "pdf_size": 1187150, "rating": "4;5;6;6;6", "confidence": "4;4;5;4;4", "soundness": "3;3;4;4;3", "novelty": "2;2;2;3;3", "presentation": "3;4;4;4;3", "wc_summary": "65;176;66;87;125", "wc_strengths": "50;48;16;74;119", "wc_weaknesses": "106;181;180;164;168", "wc_questions": "42;135;39;54;116", "wc_limitations": "1;4;4;15;5", "wc_review": "264;544;305;394;533", "wc_reply_reviewers": "27;67;14;24;24", "wc_reply_authors": "0;135;0;128;0", "reply_reviewers": "1;2;1;1;1", "reply_authors": "1;2;1;2;1", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 103.8, 42.13976744121875 ], "wc_strengths_avg": [ 61.4, 34.1970758983864 ], "wc_weaknesses_avg": [ 159.8, 27.701263509089255 ], "wc_questions_avg": [ 77.2, 40.20646714149354 ], "wc_limitations_avg": [ 5.8, 4.791659420284375 ], "wc_review_avg": [ 408.0, 114.59668407070075 ], "wc_reply_reviewers_avg": [ 31.2, 18.432579851990333 ], "wc_reply_authors_avg": [ 52.6, 64.45959975054143 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.37500000000000006, "gs_citation": 361, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9281613595484623187&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";ualberta.ca", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "Modality-Independent Teachers Meet Weakly-Supervised Audio-Visual Event Parser", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70429", "id": "p8gTWkFIvx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e95e9f0c127aa1cfa2628adb2f3cb107-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=p8gTWkFIvx", "openreview": "https://openreview.net/forum?id=p8gTWkFIvx", "poster": "/media/PosterPDFs/NeurIPS%202023/70429.png?t=1700711601.0714993", "slides": "https://nips.cc/virtual/2023/poster/70429", "video": "https://nips.cc/virtual/2023/poster/70429", "author_site": "Yung-Hsuan Lai, Yen-Chun Chen, Frank Wang", "tldr": "", "abstract": "Audio-visual learning has been a major pillar of multi-modal machine learning, where the community mostly focused on its $\\textit{modality-aligned}$ setting, $\\textit{i.e.}$, the audio and visual modality are $\\textit{both}$ assumed to signal the prediction target.\nWith the Look, Listen, and Parse dataset (LLP), we investigate the under-explored $\\textit{unaligned}$ setting, where the goal is to recognize audio and visual events in a video with only weak labels observed.\nSuch weak video-level labels only tell what events happen without knowing the modality they are perceived (audio, visual, or both).\nTo enhance learning in this challenging setting, we incorporate large-scale contrastively pre-trained models as the modality teachers. A simple, effective, and generic method, termed $\\textbf{V}$isual-$\\textbf{A}$udio $\\textbf{L}$abel Elab$\\textbf{or}$ation (VALOR), is innovated to harvest modality labels for the training events.\nEmpirical studies show that the harvested labels significantly improve an attentional baseline by $\\textbf{8.0}$ in average F-score (Type@AV).\nSurprisingly, we found that modality-independent teachers outperform their modality-fused counterparts since they are noise-proof from the other potentially unaligned modality.\nMoreover, our best model achieves the new state-of-the-art on all metrics of LLP by a substantial margin ($\\textbf{+5.4}$ F-score for Type@AV). VALOR is further generalized to Audio-Visual Event Localization and achieves the new state-of-the-art as well.", "keywords": "Audio-Visual Video Parsing;Audio-Visual Learning", "primary_area": "", "supplementary_material": "/attachment/80d33b93c95eb1b8fbb33848d832706eb29beaa6.zip", "author": "Yung-Hsuan Lai;Yen-Chun Chen;Yu-Chiang Frank Wang", "authorids": "~Yung-Hsuan_Lai1;~Yen-Chun_Chen1;~Yu-Chiang_Frank_Wang2", "gender": "M;M;M", "homepage": "https://franklin905.github.io/;;http://vllab.ee.ntu.edu.tw/ycwang.html", "dblp": "348/6382;160/0623-1;30/1690", "google_scholar": "rU7n-9YAAAAJ;Gptgy4YAAAAJ;HSGvdtoAAAAJ", "orcid": ";;0000-0002-2333-157X", "linkedin": "yung-hsuan-lai-1629a7212/;;", "or_profile": "~Yung-Hsuan_Lai1;~Yen-Chun_Chen1;~Yu-Chiang_Frank_Wang2", "aff": "National Taiwan University;Microsoft;National Taiwan University", "aff_domain": "ntu.edu.tw;microsoft.com;ntu.edu.tw", "position": "MS student;Researcher;Full Professor", "bibtex": "@inproceedings{\nlai2023modalityindependent,\ntitle={Modality-Independent Teachers Meet Weakly-Supervised Audio-Visual Event Parser},\nauthor={Yung-Hsuan Lai and Yen-Chun Chen and Yu-Chiang Frank Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=p8gTWkFIvx}\n}", "github": "", "project": "", "reviewers": "uN7g;Qd1s;82wA;veqt;t3gt;JWv6", "pdf_size": 1453654, "rating": "6;6;6;6;7;8", "confidence": "2;4;4;5;4;3", "soundness": "3;3;2;3;3;4", "novelty": "3;3;2;3;3;3", "presentation": "3;3;3;3;4;3", "wc_summary": "135;59;69;91;157;106", "wc_strengths": "105;73;103;94;172;39", "wc_weaknesses": "235;103;385;196;201;203", "wc_questions": "232;3;9;5;49;2", "wc_limitations": "66;1;39;8;44;7", "wc_review": "773;239;605;394;623;357", "wc_reply_reviewers": "216;37;20;13;66;11", "wc_reply_authors": "46;25;26;25;27;19", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "2;2;2;2;2;2", "rating_avg": [ 6.5, 0.7637626158259734 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 102.83333333333333, 34.604511587684954 ], "wc_strengths_avg": [ 97.66666666666667, 40.14833606625421 ], "wc_weaknesses_avg": [ 220.5, 84.0272773171506 ], "wc_questions_avg": [ 50.0, 83.01003955345803 ], "wc_limitations_avg": [ 27.5, 23.7679756535273 ], "wc_review_avg": [ 498.5, 182.79109205137252 ], "wc_reply_reviewers_avg": [ 60.5, 72.00636545935829 ], "wc_reply_authors_avg": [ 28.0, 8.445906306213285 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.23145502494313783, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16666862516762678515&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ntu.edu.tw;microsoft.com;ntu.edu.tw", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "National Taiwan University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.ntu.edu.tw;https://www.microsoft.com", "aff_unique_abbr": "NTU;Microsoft", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "From Trainable Negative Depth to Edge Heterophily in Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70428", "id": "p8lowHbuv8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/de2d52c5cf2bea853ef39bb2e1535dde-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=p8lowHbuv8", "openreview": "https://openreview.net/forum?id=p8lowHbuv8", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70428", "video": "https://nips.cc/virtual/2023/poster/70428", "author_site": "Yuchen Yan, Yuzhong Chen, Huiyuan Chen, Minghua Xu, Mahashweta Das, Hao Yang, Hanghang Tong", "tldr": "", "abstract": "Finding the proper depth $d$ of a graph convolutional network (GCN) that provides strong representation ability has drawn significant attention, yet nonetheless largely remains an open problem for the graph learning community. Although noteworthy progress has been made, the depth or the number of layers of a corresponding GCN is realized by a series of graph convolution operations, which naturally makes $d$ a positive integer ($d \\in \\mathbb{N}+$). An interesting question is whether breaking the constraint of $\\mathbb{N}+$ by making $d$ a real number ($d \\in \\mathbb{R}$) can bring new insights into graph learning mechanisms. In this work, by redefining GCN's depth $d$ as a trainable parameter continuously adjustable within $(-\\infty,+\\infty)$, we open a new door of controlling its signal processing capability to model graph homophily/heterophily (nodes with similar/dissimilar labels/attributes tend to be inter-connected). A simple and powerful GCN model TEDGCN, is proposed to retain the simplicity of GCN and meanwhile automatically search for the optimal $d$ without the prior knowledge regarding whether the input graph is homophilic or heterophilic. Negative-valued $d$ intrinsically enables high-pass frequency filtering functionality via augmented topology for graph heterophily. Extensive experiments demonstrate the superiority of TEDGCN on node classification tasks for a variety of homophilic and heterophilic graphs.", "keywords": "graph convolutional network", "primary_area": "", "supplementary_material": "", "author": "Yuchen Yan;Yuzhong Chen;Huiyuan Chen;Minghua Xu;Mahashweta Das;Hao Yang;Hanghang Tong", "authorids": "~Yuchen_Yan1;~Yuzhong_Chen2;~Huiyuan_Chen1;~Minghua_Xu2;~Mahashweta_Das2;~Hao_Yang8;~Hanghang_Tong3", "gender": ";M;M;;F;;", "homepage": ";https://usa.visa.com/about-visa/visa-research/yuzhong-chen.html;;;;;http://tonghanghang.org", "dblp": ";;204/5464;33/2798-3.html;;;58/1757", "google_scholar": ";Kc8V0tUAAAAJ;j3y4dJwAAAAJ;;;https://scholar.google.com/citations?hl=en;RaINcuUAAAAJ", "orcid": ";;0000-0002-6360-558X;;;;0000-0003-4405-3887", "linkedin": ";;;;mahashwetadas/;;htong/", "or_profile": "~Yuchen_Yan1;~Yuzhong_Chen2;~Huiyuan_Chen1;~Minghua_Xu2;~Mahashweta_Das2;~Hao_Yang8;~Hanghang_Tong3", "aff": ";VISA;;;VISA;Visa Research;University of Illinois, Urbana Champaign", "aff_domain": ";visa.com;;;visa.com;visa.com;illinois.edu", "position": ";Researcher;;;Principal Researcher;Vice President;Associate Professor", "bibtex": "@inproceedings{\nyan2023from,\ntitle={From Trainable Negative Depth to Edge Heterophily in Graphs},\nauthor={Yuchen Yan and Yuzhong Chen and Huiyuan Chen and Minghua Xu and Mahashweta Das and Hao Yang and Hanghang Tong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=p8lowHbuv8}\n}", "github": "", "project": "", "reviewers": "iB1Z;yXi3;jrHF;wZLL", "pdf_size": 959190, "rating": "6;6;7;7", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "3;4;4;4", "wc_summary": "163;126;115;74", "wc_strengths": "57;178;70;122", "wc_weaknesses": "286;111;107;112", "wc_questions": "107;22;158;108", "wc_limitations": "36;19;31;23", "wc_review": "649;456;481;439", "wc_reply_reviewers": "214;19;36;30", "wc_reply_authors": "478;9;13;180", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 119.5, 31.7214438511238 ], "wc_strengths_avg": [ 106.75, 47.7879430400598 ], "wc_weaknesses_avg": [ 154.0, 76.2331948694268 ], "wc_questions_avg": [ 98.75, 48.874200760728556 ], "wc_limitations_avg": [ 27.25, 6.6473679001541655 ], "wc_review_avg": [ 506.25, 83.75970093069817 ], "wc_reply_reviewers_avg": [ 74.75, 80.62684106425105 ], "wc_reply_authors_avg": [ 170.0, 190.74459363242775 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15891800476715076607&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";visa.com;;;visa.com;visa.com;illinois.edu", "author_num": 7, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "VISA;Visa Inc.;University of Illinois Urbana-Champaign", "aff_unique_dep": ";Research;", "aff_unique_url": "https://www.visa.com;https://www.visa.com/;https://illinois.edu", "aff_unique_abbr": "VISA;Visa;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Demystifying the Optimal Performance of Multi-Class Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70427", "id": "p9k5MS0JAL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/647e122fc406573c51276692f20379b5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=p9k5MS0JAL", "openreview": "https://openreview.net/forum?id=p9k5MS0JAL", "poster": "/media/PosterPDFs/NeurIPS%202023/70427.png?t=1701140325.0816963", "slides": "https://nips.cc/virtual/2023/poster/70427", "video": "https://nips.cc/virtual/2023/poster/70427", "author_site": "Minoh Jeong, Martina Cardone, Alex Dytso", "tldr": "", "abstract": "Classification is a fundamental task in science and engineering on which machine learning methods have shown outstanding performances. However, it is challenging to determine whether such methods have achieved the Bayes error rate, that is, the lowest error rate attained by any classifier. This is mainly due to the fact that the Bayes error rate is not known in general and hence, effectively estimating it is paramount. Inspired by the work by Ishida et al. (2023), we propose an estimator for the Bayes error rate of supervised multi-class classification problems. We analyze several theoretical aspects of such estimator, including its consistency, unbiasedness, convergence rate, variance, and robustness. We also propose a denoising method that reduces the noise that potentially corrupts the data labels, and we improve the robustness of the proposed estimator to outliers by incorporating the median-of-means estimator. Our analysis demonstrates the consistency, asymptotic unbiasedness, convergence rate, and robustness of the proposed estimators. Finally, we validate the effectiveness of our theoretical results via experiments both on synthetic data under various noise settings and on real data.", "keywords": "Bayes error;estimation;classification;minimum error probability", "primary_area": "", "supplementary_material": "", "author": "Minoh Jeong;Martina Cardone;Alex Dytso", "authorids": "~Minoh_Jeong1;~Martina_Cardone1;~Alex_Dytso1", "gender": "M;F;", "homepage": "https://minosota.github.io;https://mcardone.umn.edu/;", "dblp": "230/0986;;", "google_scholar": "O_jelIsAAAAJ;;oVxK8g0AAAAJ", "orcid": "0000-0002-4854-917X;;", "linkedin": "minoh-jeong-a149b6163/;;", "or_profile": "~Minoh_Jeong1;~Martina_Cardone1;~Alex_Dytso1", "aff": "University of Minnesota - Twin Cities;University of Minnesota, Minneapolis;New Jersey Institute of Technology", "aff_domain": "umn.edu;umn.edu;njit.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\njeong2023demystifying,\ntitle={Demystifying the Optimal Performance of Multi-Class Classification},\nauthor={Minoh Jeong and Martina Cardone and Alex Dytso},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=p9k5MS0JAL}\n}", "github": "", "project": "", "reviewers": "7qAC;HZ6f;iHwx;2nax", "pdf_size": 4284492, "rating": "5;6;6;6", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "2;2;3;3", "wc_summary": "91;47;109;123", "wc_strengths": "24;168;97;72", "wc_weaknesses": "122;306;144;188", "wc_questions": "25;9;378;275", "wc_limitations": "24;2;20;14", "wc_review": "286;532;748;672", "wc_reply_reviewers": "92;32;124;413", "wc_reply_authors": "107;24;47;436", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 92.5, 28.613807855648993 ], "wc_strengths_avg": [ 90.25, 51.99218691303531 ], "wc_weaknesses_avg": [ 190.0, 71.06335201775947 ], "wc_questions_avg": [ 171.75, 159.07761470426945 ], "wc_limitations_avg": [ 15.0, 8.306623862918075 ], "wc_review_avg": [ 559.5, 175.88845897329364 ], "wc_reply_reviewers_avg": [ 165.25, 146.80152417464882 ], "wc_reply_authors_avg": [ 153.5, 165.89228432932015 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9971785914240919429&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "umn.edu;umn.edu;njit.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Minnesota;New Jersey Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.minnesota.edu;https://www.njit.edu", "aff_unique_abbr": "UMN;NJIT", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Twin Cities;Minneapolis;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Latent Space Translation via Semantic Alignment", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70426", "id": "pBa70rGHlr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ad5fa03c906ca15905144ca3fbf2a768-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pBa70rGHlr", "openreview": "https://openreview.net/forum?id=pBa70rGHlr", "poster": "/media/PosterPDFs/NeurIPS%202023/70426.png?t=1702485202.1371627", "slides": "https://nips.cc/virtual/2023/poster/70426", "video": "https://nips.cc/virtual/2023/poster/70426", "author_site": "Valentino Maiorca, Luca Moschella, Antonio Norelli, Marco Fumero, Francesco Locatello, Emanuele Rodol\u00e0", "tldr": "", "abstract": "While different neural models often exhibit latent spaces that are alike when exposed to semantically related data, this intrinsic similarity is not always immediately discernible. Towards a better understanding of this phenomenon, our work shows how representations learned from these neural modules can be translated between different pre-trained networks via simpler transformations than previously thought. An advantage of this approach is the ability to estimate these transformations using standard, well-understood algebraic procedures that have closed-form solutions. Our method directly estimates a transformation between two given latent spaces, thereby enabling effective stitching of encoders and decoders without additional training. We extensively validate the adaptability of this translation procedure in different experimental settings: across various trainings, domains, architectures (e.g., ResNet, CNN, ViT), and in multiple downstream tasks (classification, reconstruction). Notably, we show how it is possible to zero-shot stitch text encoders and vision decoders, or vice-versa, yielding surprisingly good classification performance in this multimodal setting.", "keywords": "latent space translation;relative representation;Procrustes analysis;zero-shot;stitching;latent communication;representation learning;manifold alignment;multimodal", "primary_area": "", "supplementary_material": "/attachment/6fbfe97a0885f0b92a40d6066b1113980de6669e.zip", "author": "Valentino Maiorca;Luca Moschella;Antonio Norelli;Marco Fumero;Francesco Locatello;Emanuele Rodol\u00e0", "authorids": "~Valentino_Maiorca1;~Luca_Moschella1;~Antonio_Norelli2;~Marco_Fumero1;~Francesco_Locatello1;~Emanuele_Rodol\u00e01", "gender": "M;M;M;;M;M", "homepage": "https://gladia.di.uniroma1.it/authors/maiorca/;https://luca.moschella.dev;https://phd.uniroma1.it/web/ANTONIO-NORELLI_nP1612487_EN.aspx;;https://twitter.com/FrancescoLocat8;", "dblp": "305/9789;205/3639;261/9526;273/9625;195/6074;54/8401", "google_scholar": "https://scholar.google.it/citations?user=2VUUfFEAAAAJ;4byA-nefJJMC;;VYEljYEAAAAJ;;-EH4wBYAAAAJ", "orcid": "0000-0001-5795-3695;0000-0002-0550-7498;;0000-0001-5614-5004;;0000-0003-0091-7241", "linkedin": "valentino-maiorca;lucamoschella/;;;;", "or_profile": "~Valentino_Maiorca1;~Luca_Moschella1;~Antonio_Norelli2;~Marco_Fumero1;~Francesco_Locatello1;~Emanuele_Rodol\u00e01", "aff": "University of Roma \"La Sapienza\";NVIDIA;Sapienza University of Rome;Sapienza University of Rome;Amazon;Sapienza University of Rome", "aff_domain": "uniroma1.it;nvidia.com;uniroma1.it;uniroma1.it;amazon.com;uniroma1.it", "position": "PhD student;Intern;PhD student;PhD student;Senior Applied Scientist;Full Professor", "bibtex": "@inproceedings{\nmaiorca2023latent,\ntitle={Latent Space Translation via Semantic Alignment},\nauthor={Valentino Maiorca and Luca Moschella and Antonio Norelli and Marco Fumero and Francesco Locatello and Emanuele Rodol{\\`a}},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pBa70rGHlr}\n}", "github": "", "project": "", "reviewers": "NjdJ;rL3F;ivQN;zvRG", "pdf_size": 4858561, "rating": "5;6;6;8", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;4", "presentation": "2;3;1;4", "wc_summary": "80;41;44;34", "wc_strengths": "64;71;50;70", "wc_weaknesses": "51;564;170;159", "wc_questions": "66;1;1;7", "wc_limitations": "1;1;1;1", "wc_review": "262;678;266;271", "wc_reply_reviewers": "4;99;39;13", "wc_reply_authors": "0;572;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 49.75, 17.83781096435322 ], "wc_strengths_avg": [ 63.75, 8.37779804005802 ], "wc_weaknesses_avg": [ 236.0, 194.99615380822257 ], "wc_questions_avg": [ 18.75, 27.38955092731533 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 369.25, 178.28541022753376 ], "wc_reply_reviewers_avg": [ 38.75, 37.08352059877811 ], "wc_reply_authors_avg": [ 143.0, 247.68326548234944 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15397335032546820020&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "uniroma1.it;nvidia.com;uniroma1.it;uniroma1.it;amazon.com;uniroma1.it", "author_num": 6, "aff_unique_index": "0;1;2;2;3;2", "aff_unique_norm": "University of Rome La Sapienza;NVIDIA;Sapienza University of Rome;Amazon", "aff_unique_dep": ";NVIDIA Corporation;;Amazon.com, Inc.", "aff_unique_url": "https://www.uniroma1.it;https://www.nvidia.com;https://www.uniroma1.it;https://www.amazon.com", "aff_unique_abbr": "La Sapienza;NVIDIA;Sapienza;Amazon", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Rome;", "aff_country_unique_index": "0;1;0;0;1;0", "aff_country_unique": "Italy;United States" }, { "id": "pCucay08Co", "title": "Exponential Hardness of Optimization from the Locality in Quantum Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Quantum neural networks (QNNs) have become a leading paradigm for establishing near-term quantum applications in recent years. The trainability issue of QNNs has garnered extensive attention, spurring demand for a comprehensive analysis of QNNs in order to identify viable solutions. In this work, we propose a perspective that characterizes the trainability of QNNs based on their locality. We prove that the entire variation range of the loss function via adjusting any local quantum gate vanishes exponentially in the number of qubits with a high probability for a broad class of QNNs. This result reveals extra harsh constraints independent of gradients and unifies the restrictions on gradient-based and gradient-free optimizations naturally. We showcase the validity of our results with numerical simulations of representative models and examples. Our findings, as a fundamental property of random quantum circuits, deepen the understanding of the role of locality in QNNs and serve as a guideline for assessing the effectiveness of diverse training strategies for quantum neural networks.", "keywords": "quantum neural network;training landscape;quantum machine learning;quantum state learning;variational quantum algorithms", "primary_area": "", "supplementary_material": "/attachment/7587040ef1b49df894b086b735b6635916691ed3.zip", "author": "Hao-Kai Zhang;Chengkai Zhu;Geng Liu;Xin Wang", "authorids": "zhk20@mails.tsinghua.edu.cn;zhuchengkai7@gmail.com;luke97624@gmail.com;~Xin_Wang48", "gender": ";;;M", "homepage": ";;;https://www.xinwang.info/", "dblp": ";;;10/5630-22", "google_scholar": ";;;BFkAPOQAAAAJ", "orcid": ";;;0000-0002-0641-3186", "linkedin": ";;;", "or_profile": "zhk20@mails.tsinghua.edu.cn;zhuchengkai7@gmail.com;luke97624@gmail.com;~Xin_Wang48", "aff": ";;;Baidu", "aff_domain": ";;;baidu.com", "position": ";;;Researcher", "bibtex": "@misc{\nzhang2023exponential,\ntitle={Exponential Hardness of Optimization from the Locality in Quantum Neural Networks},\nauthor={Hao-Kai Zhang and Chengkai Zhu and Geng Liu and Xin Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=pCucay08Co}\n}", "github": "", "project": "", "reviewers": "n6Yp;T1RK;J4bd;dzQA", "site": "https://openreview.net/forum?id=pCucay08Co", "pdf_size": 987010, "rating": "4;5;6;7", "confidence": "5;3;3;4", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "105;70;94;53", "wc_strengths": "55;48;50;40", "wc_weaknesses": "233;61;201;140", "wc_questions": "196;96;75;25", "wc_limitations": "11;12;25;23", "wc_review": "600;287;445;281", "wc_reply_reviewers": "294;40;13;44", "wc_reply_authors": "523;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 80.5, 20.303940504246953 ], "wc_strengths_avg": [ 48.25, 5.402545696243577 ], "wc_weaknesses_avg": [ 158.75, 65.58343922058373 ], "wc_questions_avg": [ 98.0, 62.18118686548207 ], "wc_limitations_avg": [ 17.75, 6.299801584177076 ], "wc_review_avg": [ 403.25, 131.2561903302088 ], "wc_reply_reviewers_avg": [ 97.75, 113.93062582115486 ], "wc_reply_authors_avg": [ 130.75, 226.4656430896307 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.40451991747794525, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15862668973835609916&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Baidu", "aff_unique_dep": "Baidu, Inc.", "aff_unique_url": "https://www.baidu.com", "aff_unique_abbr": "Baidu", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "FlatMatch: Bridging Labeled Data and Unlabeled Data with Cross-Sharpness for Semi-Supervised Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70425", "id": "pE3yaP0Eqg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3b11c5cc84b6da2838db348b37dbd1a2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pE3yaP0Eqg", "openreview": "https://openreview.net/forum?id=pE3yaP0Eqg", "poster": "/media/PosterPDFs/NeurIPS%202023/70425.png?t=1701407607.4521787", "slides": "https://nips.cc/virtual/2023/poster/70425", "video": "https://nips.cc/virtual/2023/poster/70425", "author_site": "Zhuo Huang, Li Shen, Jun Yu, Bo Han, Tongliang Liu", "tldr": "", "abstract": "Semi-Supervised Learning (SSL) has been an effective way to leverage abundant unlabeled data with extremely scarce labeled data. However, most SSL methods are commonly based on instance-wise consistency between different data transformations. Therefore, the label guidance on labeled data is hard to be propagated to unlabeled data. Consequently, the learning process on labeled data is much faster than on unlabeled data which is likely to fall into a local minima that does not favor unlabeled data, leading to sub-optimal generalization performance. In this paper, we propose FlatMatch which minimizes a cross-sharpness measure to ensure consistent learning performance between the two datasets. Specifically, we increase the empirical risk on labeled data to obtain a worst-case model which is a failure case needing to be enhanced. Then, by leveraging the richness of unlabeled data, we penalize the prediction difference (i.e., cross-sharpness) between the worst-case model and the original model so that the learning direction is beneficial to generalization on unlabeled data. Therefore, we can calibrate the learning process without being limited to insufficient label information. As a result, the mismatched learning performance can be mitigated, further enabling the effective exploitation of unlabeled data and improving SSL performance. Through comprehensive validation, we show FlatMatch achieves state-of-the-art results in many SSL settings.", "keywords": "Semi-Supervised Learning", "primary_area": "", "supplementary_material": "/attachment/fd6189c2fdafc39b946cb1f2a4a67444cefb1710.pdf", "author": "Zhuo Huang;Li Shen;Jun Yu;Bo Han;Tongliang Liu", "authorids": "~Zhuo_Huang2;~Li_Shen1;~Jun_Yu3;~Bo_Han1;~Tongliang_Liu1", "gender": "M;M;M;M;M", "homepage": "https://zhuohuangai.github.io/;https://sites.google.com/site/mathshenli/home;https://faculty.ustc.edu.cn/yujun_AI/en/index.htm;https://tongliang-liu.github.io/;https://bhanml.github.io/", "dblp": ";91/3680-8;50/5754-1.html;150/6667;241/0472-3", "google_scholar": ";yVhgENIAAAAJ;efZyqyQAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;nTNjqHwAAAAJ", "orcid": ";;0000-0002-3197-8103;;", "linkedin": ";;;;", "or_profile": "~Zhuo_Huang2;~Li_Shen1;~Jun_Yu3;~Tongliang_Liu1;~bo_han2", "aff": "University of Sydney;JD Explore Academy;University of Science and Technology of China;University of Sydney;RIKEN", "aff_domain": "uni.sydney.edu.au;jd.com;ustc.edu.cn;sydney.edu.au;riken.jp", "position": "PhD student;Researcher;Associate Professor;Lecturer;Adjunct Scientist", "bibtex": "@inproceedings{\nhuang2023flatmatch,\ntitle={FlatMatch: Bridging Labeled Data and Unlabeled Data with Cross-Sharpness for Semi-Supervised Learning},\nauthor={Zhuo Huang and Li Shen and Jun Yu and Bo Han and Tongliang Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pE3yaP0Eqg}\n}", "github": "", "project": "", "reviewers": "yYJy;9MiC;Z9Pn;Y5aq;9rMs", "pdf_size": 3494347, "rating": "4;5;5;5;6", "confidence": "5;3;3;4;4", "soundness": "2;1;3;2;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;2;3", "wc_summary": "108;94;54;74;85", "wc_strengths": "81;93;50;12;102", "wc_weaknesses": "151;212;198;63;159", "wc_questions": "2;21;88;27;53", "wc_limitations": "19;11;9;16;25", "wc_review": "361;431;399;192;424", "wc_reply_reviewers": "0;492;100;0;0", "wc_reply_authors": "302;454;147;398;0", "reply_reviewers": "0;4;1;0;0", "reply_authors": "4;5;3;5;1", "rating_avg": [ 5.0, 0.6324555320336759 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 83.0, 18.286607121059937 ], "wc_strengths_avg": [ 67.6, 32.891336245278936 ], "wc_weaknesses_avg": [ 156.6, 52.11755942098594 ], "wc_questions_avg": [ 38.2, 29.768439663509408 ], "wc_limitations_avg": [ 16.0, 5.727128425310541 ], "wc_review_avg": [ 361.4, 88.17391904639376 ], "wc_reply_reviewers_avg": [ 118.4, 190.77274438451633 ], "wc_reply_authors_avg": [ 260.2, 166.62100707893947 ], "reply_reviewers_avg": [ 1.0, 1.5491933384829668 ], "reply_authors_avg": [ 3.6, 1.4966629547095764 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2990204346507762748&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "uni.sydney.edu.au;jd.com;ustc.edu.cn;sydney.edu.au;riken.jp", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "University of Sydney;JD;University of Science and Technology of China;RIKEN", "aff_unique_dep": ";JD Explore Academy;;", "aff_unique_url": "https://www.sydney.edu.au;;http://www.ustc.edu.cn;https://www.riken.jp", "aff_unique_abbr": "USYD;;USTC;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;2;0;3", "aff_country_unique": "Australia;;China;Japan" }, { "title": "Causal Effect Identification in Uncertain Causal Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70424", "id": "pH4Fv7C3yC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/017c897b4d85a744f345ccbf9d71e501-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pH4Fv7C3yC", "openreview": "https://openreview.net/forum?id=pH4Fv7C3yC", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70424", "video": "https://nips.cc/virtual/2023/poster/70424", "author_site": "Sina Akbari, Fateme Jamshidi, Ehsan Mokhtarian, Matthew Vowels, Jalal Etesami, Negar Kiyavash", "tldr": "", "abstract": "Causal identification is at the core of the causal inference literature, where complete algorithms have been proposed to identify causal queries of interest. The validity of these algorithms hinges on the restrictive assumption of having access to a correctly specified causal structure. In this work, we study the setting where a probabilistic model of the causal structure is available. Specifically, the edges in a causal graph exist with uncertainties which may, for example, represent degree of belief from domain experts. Alternatively, the uncertainty about an edge may reflect the confidence of a particular statistical test. The question that naturally arises in this setting is: Given such a probabilistic graph and a specific causal effect of interest, what is the subgraph which has the highest plausibility and for which the causal effect is identifiable? We show that answering this question reduces to solving an NP-hard combinatorial optimization problem which we call the edge ID problem. We propose efficient algorithms to approximate this problem and evaluate them against both real-world networks and randomly generated graphs.", "keywords": "Causal effect;identifiability;causal DAGs;probabilistic graphs", "primary_area": "", "supplementary_material": "", "author": "Sina Akbari;Fateme Jamshidi;Ehsan Mokhtarian;Matthew James Vowels;Jalal Etesami;Negar Kiyavash", "authorids": "~Sina_Akbari1;~Fateme_Jamshidi1;~Ehsan_Mokhtarian1;~Matthew_James_Vowels1;~Jalal_Etesami2;~Negar_Kiyavash1", "gender": "M;F;M;M;M;F", "homepage": "https://sinaakbarii.github.io;;;;https://www.cs.cit.tum.de/en/dss/members/prof-jalal-etesami/;https://people.epfl.ch/negar.kiyavash?lang=en", "dblp": ";304/8469;276/5445;254/1206;76/10800;85/4976", "google_scholar": "-kNnS1AAAAAJ;;https://scholar.google.com/scholar?hl=en;7qaOZXYAAAAJ;3Usg1G0AAAAJ;7tBDvOwAAAAJ", "orcid": ";;;0000-0002-8811-1156;;0000-0002-8545-7709", "linkedin": "sina-akbari/;fateme-jamshidi/;https://linkedin.com/in/ehsanmokhtarian/;matthew-vowels-5750b736/;;", "or_profile": "~Sina_Akbari1;~Fateme_Jamshidi1;~Ehsan_Mokhtarian1;~Matthew_James_Vowels1;~Jalal_Etesami2;~Negar_Kiyavash1", "aff": "Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology Lausanne;Universit\u00e9 de Lausanne;Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology Lausanne", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;unil.ch;epfl.ch;epfl.ch", "position": "PhD student;PhD student;PhD student;Postdoc;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nakbari2023causal,\ntitle={Causal Effect Identification in Uncertain Causal Networks},\nauthor={Sina Akbari and Fateme Jamshidi and Ehsan Mokhtarian and Matthew James Vowels and Jalal Etesami and Negar Kiyavash},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pH4Fv7C3yC}\n}", "github": "", "project": "", "reviewers": "N9rZ;DoFR;eheu;NvDh;aizw", "pdf_size": 1402730, "rating": "6;6;6;7;7", "confidence": "3;3;4;3;3", "soundness": "3;3;3;3;4", "novelty": "3;2;2;2;4", "presentation": "2;2;4;3;4", "wc_summary": "117;103;90;86;102", "wc_strengths": "151;35;59;20;17", "wc_weaknesses": "166;60;89;62;92", "wc_questions": "203;107;146;39;20", "wc_limitations": "121;8;1;11;7", "wc_review": "758;313;385;218;238", "wc_reply_reviewers": "11;21;0;6;11", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 99.6, 10.9288608738514 ], "wc_strengths_avg": [ 56.4, 49.58064138350773 ], "wc_weaknesses_avg": [ 93.8, 38.452048059888824 ], "wc_questions_avg": [ 103.0, 67.60177512462228 ], "wc_limitations_avg": [ 29.6, 45.815281293472374 ], "wc_review_avg": [ 382.4, 196.86401397919326 ], "wc_reply_reviewers_avg": [ 9.8, 6.910861017268399 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8050057726576042074&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "epfl.ch;epfl.ch;epfl.ch;unil.ch;epfl.ch;epfl.ch", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;Universit\u00e9 de Lausanne", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.unil.ch", "aff_unique_abbr": "EPFL;UNIL", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "What Makes Good Examples for Visual In-Context Learning?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70423", "id": "pIXTMrBe7f", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/398ae57ed4fda79d0781c65c926d667b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pIXTMrBe7f", "openreview": "https://openreview.net/forum?id=pIXTMrBe7f", "poster": "/media/PosterPDFs/NeurIPS%202023/70423.png?t=1701417678.470897", "slides": "https://nips.cc/virtual/2023/poster/70423", "video": "https://nips.cc/virtual/2023/poster/70423", "author_site": "Yuanhan Zhang, Kaiyang Zhou, Ziwei Liu", "tldr": "", "abstract": "Large vision models with billions of parameters and trained on broad data have great potential in numerous downstream applications. However, these models are typically difficult to adapt due to their large parameter size and sometimes lack of accesss to their weights---entities able to develop large vision models often provide APIs only. In this paper, we study how to better utilize large vision models through the lens of in-context learning, a concept that has been well-known in natural language processing but has only been studied very recently in computer vision. In-context learning refers to the ability to perform inference on tasks never seen during training by simply conditioning on in-context examples (i.e., input-output pairs) without updating any internal model parameters. To demystify in-context learning in computer vision, we conduct an extensive research and identify a critical problem: downstream performance is highly sensitivie to the choice of visual in-context examples. To address this problem, we propose a prompt retrieval framework specifically for large vision models, allowing the selection of in-context examples to be fully automated. Concretely, we provide two implementations: (i) an unsupervised prompt retrieval method based on nearest example search using an off-the-shelf model, and (ii) a supervised prompt retrieval method, which trains a neural network to choose examples that directly maximize in-context learning performance. Both methods do not require access to the internal weights of large vision models. Our results demonstrate that our methods can bring non-trivial improvements to visual in-context learning in comparison to the commonly-used random selection. Code and models will be released.", "keywords": "computer vision;visual in-context learning;prompt learning", "primary_area": "", "supplementary_material": "/attachment/88f388cc781a64de58763bb031f039ddd53fa5b4.pdf", "author": "Yuanhan Zhang;Kaiyang Zhou;Ziwei Liu", "authorids": "~Yuanhan_Zhang1;~Kaiyang_Zhou1;~Ziwei_Liu1", "gender": "M;M;M", "homepage": "https://zhangyuanhan-ai.github.io/;https://kaiyangzhou.github.io/;https://liuziwei7.github.io/", "dblp": "10/2476;203/3155;05/6300-2", "google_scholar": "g6grFy0AAAAJ;https://scholar.google.co.uk/citations?user=gRIejugAAAAJ;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yuanhan_Zhang1;~Kaiyang_Zhou1;~Ziwei_Liu1", "aff": "Nanyang Technological University;Hong Kong Baptist University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;hkbu.edu.hk;ntu.edu.sg", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023what,\ntitle={What Makes Good Examples for Visual In-Context Learning?},\nauthor={Yuanhan Zhang and Kaiyang Zhou and Ziwei Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pIXTMrBe7f}\n}", "github": "", "project": "", "reviewers": "QAph;mHCs;EQqT;vwKB", "pdf_size": 7678250, "rating": "4;5;6;7", "confidence": "4;5;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;4;3", "wc_summary": "58;72;45;83", "wc_strengths": "35;83;78;78", "wc_weaknesses": "143;82;60;56", "wc_questions": "6;62;161;83", "wc_limitations": "1;1;11;1", "wc_review": "243;300;355;301", "wc_reply_reviewers": "0;26;23;36", "wc_reply_authors": "205;231;22;143", "reply_reviewers": "0;1;1;1", "reply_authors": "4;4;2;3", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 64.5, 14.326548781894402 ], "wc_strengths_avg": [ 68.5, 19.448650338776723 ], "wc_weaknesses_avg": [ 85.25, 34.78056210011563 ], "wc_questions_avg": [ 78.0, 55.57427462414602 ], "wc_limitations_avg": [ 3.5, 4.330127018922194 ], "wc_review_avg": [ 299.75, 39.60665979352462 ], "wc_reply_reviewers_avg": [ 21.25, 13.179055353097201 ], "wc_reply_authors_avg": [ 150.25, 80.65164288469268 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 120, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15675269575816398724&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ntu.edu.sg;hkbu.edu.hk;ntu.edu.sg", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Nanyang Technological University;Hong Kong Baptist University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.hkbu.edu.hk", "aff_unique_abbr": "NTU;HKBU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;China" }, { "title": "Optimal Exploration for Model-Based RL in Nonlinear Systems", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70422", "id": "pJQu0zpKCS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/31e018f43ab9c7065c058cc2c5848128-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pJQu0zpKCS", "openreview": "https://openreview.net/forum?id=pJQu0zpKCS", "poster": "/media/PosterPDFs/NeurIPS%202023/70422.png?t=1701318116.5514832", "slides": "https://nips.cc/virtual/2023/poster/70422", "video": "https://nips.cc/virtual/2023/poster/70422", "author_site": "Andrew Wagenmaker, Guanya Shi, Kevin Jamieson", "tldr": "", "abstract": "Learning to control unknown nonlinear dynamical systems is a fundamental problem in reinforcement learning and control theory. A commonly applied approach is to first explore the environment (exploration), learn an accurate model of it (system identification), and then compute an optimal controller with the minimum cost on this estimated system (policy optimization). While existing work has shown that it is possible to learn a uniformly good model of the system (Mania et al., 2020), in practice, if we aim to learn a good controller with a low cost on the actual system, certain system parameters may be significantly more critical than others, and we therefore ought to focus our exploration on learning such parameters.\n\nIn this work, we consider the setting of nonlinear dynamical systems and seek to formally quantify, in such settings, (a) which parameters are most relevant to learning a good controller, and (b) how we can best explore so as to minimize uncertainty in such parameters. Inspired by recent work in linear systems (Wagenmaker et al., 2021), we show that minimizing the controller loss in nonlinear systems translates to estimating the system parameters in a particular, task-dependent metric. Motivated by this, we develop an algorithm able to efficiently explore the system to reduce uncertainty in this metric, and prove a lower bound showing that our approach learns a controller at a near-instance-optimal rate. Our algorithm relies on a general reduction from policy optimization to optimal experiment design in arbitrary systems, and may be of independent interest. We conclude with experiments demonstrating the effectiveness of our method in realistic nonlinear robotic systems.", "keywords": "reinforcement learning;control theory;system identification;experiment design;active learning", "primary_area": "", "supplementary_material": "", "author": "Andrew Wagenmaker;Guanya Shi;Kevin Jamieson", "authorids": "~Andrew_Wagenmaker1;~Guanya_Shi1;~Kevin_Jamieson1", "gender": "M;M;M", "homepage": "https://wagenmaker.github.io;http://guanyashi.github.io;", "dblp": "195/1036;230/4386;85/10260", "google_scholar": "ym8AZSIAAAAJ;joR1Z4UAAAAJ;", "orcid": ";0000-0002-9075-3705;", "linkedin": ";guanya-shi-b07b43126/;", "or_profile": "~Andrew_Wagenmaker1;~Guanya_Shi1;~Kevin_Jamieson1", "aff": "University of Washington, Seattle;University of Washington;University of Washington", "aff_domain": "uw.edu;uw.edu;washington.edu", "position": "PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nwagenmaker2023optimal,\ntitle={Optimal Exploration for Model-Based {RL} in Nonlinear Systems},\nauthor={Andrew Wagenmaker and Guanya Shi and Kevin Jamieson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pJQu0zpKCS}\n}", "github": "", "project": "", "reviewers": "oMB3;vdw9;DNZf;KGe1;srf1;o9V7", "pdf_size": 2087455, "rating": "4;6;6;6;7;8", "confidence": "2;2;3;2;3;3", "soundness": "3;3;3;3;3;4", "novelty": "2;3;3;3;3;4", "presentation": "3;3;3;4;3;4", "wc_summary": "99;122;153;101;126;80", "wc_strengths": "26;156;5;97;84;35", "wc_weaknesses": "24;352;130;165;48;141", "wc_questions": "17;64;5;33;1;20", "wc_limitations": "25;63;5;52;1;18", "wc_review": "191;757;298;448;260;294", "wc_reply_reviewers": "0;54;0;22;0;7", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "0;1;0;1;0;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.166666666666667, 1.2133516482134197 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 113.5, 23.371991785040485 ], "wc_strengths_avg": [ 67.16666666666667, 51.08625603384491 ], "wc_weaknesses_avg": [ 143.33333333333334, 106.08434799200535 ], "wc_questions_avg": [ 23.333333333333332, 20.949675149960893 ], "wc_limitations_avg": [ 27.333333333333332, 22.96857756926962 ], "wc_review_avg": [ 374.6666666666667, 187.44925239173995 ], "wc_reply_reviewers_avg": [ 13.833333333333334, 19.582446788443526 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.686802819743445, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15705315841918531754&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "uw.edu;uw.edu;washington.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Adversarial Examples Exist in Two-Layer ReLU Networks for Low Dimensional Linear Subspaces", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70421", "id": "pJbEXBBN88", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0ffd11b5bce666816802b86c77b54cf7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pJbEXBBN88", "openreview": "https://openreview.net/forum?id=pJbEXBBN88", "poster": "/media/PosterPDFs/NeurIPS%202023/70421.png?t=1701617650.418975", "slides": "https://nips.cc/virtual/2023/poster/70421", "video": "https://nips.cc/virtual/2023/poster/70421", "author_site": "Odelia Melamed, Gilad Yehudai, Gal Vardi", "tldr": "", "abstract": "Despite a great deal of research, it is still not well-understood why trained neural networks are highly vulnerable to adversarial examples.\nIn this work we focus on two-layer neural networks trained using data which lie on a low dimensional linear subspace.\nWe show that standard gradient methods lead to non-robust neural networks, namely, networks which have large gradients in directions orthogonal to the data subspace, and are susceptible to small adversarial $L_2$-perturbations in these directions.\nMoreover, we show that decreasing the initialization scale of the training algorithm, or adding $L_2$ regularization, can make the trained network more robust to adversarial perturbations orthogonal to the data.", "keywords": "Adversarial Examples;Robustness;Neural Networks;Classification", "primary_area": "", "supplementary_material": "/attachment/dad5f3fb36a9be3f694edff8034e6131c61f18bf.pdf", "author": "Odelia Melamed;Gilad Yehudai;Gal Vardi", "authorids": "~Odelia_Melamed1;~Gilad_Yehudai2;~Gal_Vardi1", "gender": "F;M;M", "homepage": ";;https://sites.google.com/view/galvardi/home", "dblp": "295/8736;239/4344;https://dblp.uni-trier.de/pid/167/9638.html", "google_scholar": ";opVT1qkAAAAJ;https://scholar.google.co.il/citations?hl=en", "orcid": ";;", "linkedin": "odelia-melamed-ba6397201/;;", "or_profile": "~Odelia_Melamed1;~Gilad_Yehudai2;~Gal_Vardi1", "aff": "Weizmann Institute, Technion;Weizmann Institute of Science;Toyota Technological Institute at Chicago", "aff_domain": "weizmann.ac.il;weizmann.ac.il;ttic.edu", "position": "PhD student;PhD student;Postdoc", "bibtex": "@inproceedings{\nmelamed2023adversarial,\ntitle={Adversarial Examples Exist in Two-Layer Re{LU} Networks for Low Dimensional Linear Subspaces},\nauthor={Odelia Melamed and Gilad Yehudai and Gal Vardi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pJbEXBBN88}\n}", "github": "", "project": "", "reviewers": "2uZr;X8ox;5bBJ;ZcHg", "pdf_size": 3051014, "rating": "6;6;6;6", "confidence": "4;4;3;4", "soundness": "4;3;3;3", "novelty": "3;3;2;3", "presentation": "4;3;3;3", "wc_summary": "250;36;64;81", "wc_strengths": "77;43;27;21", "wc_weaknesses": "40;333;158;17", "wc_questions": "19;9;88;77", "wc_limitations": "29;1;1;15", "wc_review": "415;422;338;211", "wc_reply_reviewers": "5;243;205;12", "wc_reply_authors": "0;770;355;0", "reply_reviewers": "1;3;2;1", "reply_authors": "1;3;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 107.75, 83.68504944134287 ], "wc_strengths_avg": [ 42.0, 21.748563170931547 ], "wc_weaknesses_avg": [ 137.0, 125.16588992213494 ], "wc_questions_avg": [ 48.25, 34.65093793824346 ], "wc_limitations_avg": [ 11.5, 11.6081867662439 ], "wc_review_avg": [ 346.5, 84.88963423174822 ], "wc_reply_reviewers_avg": [ 116.25, 108.61255682470605 ], "wc_reply_authors_avg": [ 281.25, 317.2218269602519 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14476576091880722857&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "weizmann.ac.il;weizmann.ac.il;ttic.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Weizmann Institute of Science;Toyota Technological Institute at Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.weizmann.org.il;https://www.tti-chicago.org", "aff_unique_abbr": "Weizmann;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Israel;United States" }, { "title": "PID-Inspired Inductive Biases for Deep Reinforcement Learning in Partially Observable Control Tasks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70420", "id": "pKnhUWqZTJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ba1c5356d9164bb64c446a4b690226b0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pKnhUWqZTJ", "openreview": "https://openreview.net/forum?id=pKnhUWqZTJ", "poster": "/media/PosterPDFs/NeurIPS%202023/70420.png?t=1702056313.3390918", "slides": "https://nips.cc/virtual/2023/poster/70420", "video": "https://nips.cc/virtual/2023/poster/70420", "author_site": "Ian Char, Jeff Schneider", "tldr": "", "abstract": "Deep reinforcement learning (RL) has shown immense potential for learning to control systems through data alone. However, one challenge deep RL faces is that the full state of the system is often not observable. When this is the case, the policy needs to leverage the history of observations to infer the current state. At the same time, differences between the training and testing environments makes it critical for the policy not to overfit to the sequence of observations it sees at training time. As such, there is an important balancing act between having the history encoder be flexible enough to extract relevant information, yet be robust to changes in the environment. To strike this balance, we look to the PID controller for inspiration. We assert the PID controller's success shows that only summing and differencing are needed to accumulate information over time for many control tasks. Following this principle, we propose two architectures for encoding history: one that directly uses PID features and another that extends these core ideas and can be used in arbitrary control tasks. When compared with prior approaches, our encoders produce policies that are often more robust and achieve better performance on a variety of tracking tasks. Going beyond tracking tasks, our policies achieve 1.7x better performance on average over previous state-of-the-art methods on a suite of locomotion control tasks.", "keywords": "Reinforcement Learning;Control;POMDP", "primary_area": "", "supplementary_material": "/attachment/69c01264fd3cb2ba46b8316034596f2820724b66.zip", "author": "Ian Char;Jeff Schneider", "authorids": "~Ian_Char1;~Jeff_Schneider1", "gender": "M;", "homepage": "http://ianchar.com;https://www.cs.cmu.edu/~schneide", "dblp": "157/7519;38/247", "google_scholar": "3SDKldkAAAAJ;3bSbb20AAAAJ", "orcid": ";0000-0002-5080-9073", "linkedin": ";jeff-schneider-1593b322/", "or_profile": "~Ian_Char1;~Jeff_Schneider1", "aff": "Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cs.cmu.edu", "position": "PhD student;Researcher", "bibtex": "@inproceedings{\nchar2023pidinspired,\ntitle={{PID}-Inspired Inductive Biases for Deep Reinforcement Learning in Partially Observable Control Tasks},\nauthor={Ian Char and Jeff Schneider},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pKnhUWqZTJ}\n}", "github": "", "project": "", "reviewers": "EZtX;vEC9;RWrF;64AN;QQYm", "pdf_size": 11432463, "rating": "5;5;6;6;7", "confidence": "4;4;4;3;4", "soundness": "3;3;2;3;4", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "28;103;90;35;149", "wc_strengths": "14;128;89;43;224", "wc_weaknesses": "282;528;324;75;185", "wc_questions": "16;385;253;13;116", "wc_limitations": "10;1;83;3;13", "wc_review": "350;1145;839;169;687", "wc_reply_reviewers": "0;124;23;294;0", "wc_reply_authors": "0;18;0;242;0", "reply_reviewers": "0;1;1;2;0", "reply_authors": "1;2;1;2;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 81.0, 44.975548912714785 ], "wc_strengths_avg": [ 99.6, 73.3828317796472 ], "wc_weaknesses_avg": [ 278.8, 151.31873644727543 ], "wc_questions_avg": [ 156.6, 143.87299955168794 ], "wc_limitations_avg": [ 22.0, 30.815580474818255 ], "wc_review_avg": [ 638.0, 347.20483867596084 ], "wc_reply_reviewers_avg": [ 88.2, 112.64528396697308 ], "wc_reply_authors_avg": [ 52.0, 95.25544603853368 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.13363062095621223, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=161418736132930104&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cmu.edu;cs.cmu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Non-Smooth Weakly-Convex Finite-sum Coupled Compositional Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70419", "id": "pLOWV1UGF6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1160792eab11de2bbaf9e71fce191e8c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pLOWV1UGF6", "openreview": "https://openreview.net/forum?id=pLOWV1UGF6", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70419", "video": "https://nips.cc/virtual/2023/poster/70419", "author_site": "Quanqi Hu, Dixian Zhu, Tianbao Yang", "tldr": "", "abstract": "This paper investigates new families of compositional optimization problems, called non-smooth weakly-convex finite-sum coupled compositional optimization (NSWC FCCO). There has been a growing interest in FCCO due to its wide-ranging applications in machine learning and AI, as well as its ability to address the shortcomings of stochastic algorithms based on empirical risk minimization. However, current research on FCCO presumes that both the inner and outer functions are smooth, limiting their potential to tackle a more diverse set of problems. Our research expands on this area by examining non-smooth weakly-convex FCCO, where the outer function is weakly convex and non-decreasing, and the inner function is weakly-convex. We analyze a single-loop algorithm and establish its complexity for finding an $\\epsilon$-stationary point of the Moreau envelop of the objective function. Additionally, we also extend the algorithm for solving novel non-smooth weakly-convex tri-level finite-sum coupled compositional optimization problems, which feature a nested arrangement of three functions. Lastly, we explore the applications of our algorithms in deep learning for two-way partial AUC maximization and multi-instance two-way partial AUC maximization, using empirical studies to showcase the effectiveness of the proposed algorithms.", "keywords": "non-smooth optimization;weakly-convex optimization;compositional optimization;AUC maximization", "primary_area": "", "supplementary_material": "/attachment/4f7a01d1ef0fa72226ef99e88c3868c9870286b5.zip", "author": "Quanqi Hu;Dixian Zhu;Tianbao Yang", "authorids": "~Quanqi_Hu1;~Dixian_Zhu1;~Tianbao_Yang1", "gender": "F;M;M", "homepage": "https://quanqihu.github.io/;;https://people.tamu.edu/~tianbao-yang/publications.html", "dblp": ";181/6721;56/7047", "google_scholar": "AGEYvcAAAAAJ;;https://scholar.google.com.tw/citations?user=BCxFU0EAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Quanqi_Hu1;~Dixian_Zhu1;~Tianbao_Yang1", "aff": "Texas A&M University - College Station;University of Iowa;Texas A&M University - College Station", "aff_domain": "tamu.edu;cs.uiowa.edu;tamu.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nhu2023nonsmooth,\ntitle={Non-Smooth Weakly-Convex Finite-sum Coupled Compositional Optimization},\nauthor={Quanqi Hu and Dixian Zhu and Tianbao Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pLOWV1UGF6}\n}", "github": "", "project": "", "reviewers": "SCt1;FiRa;GoBV;mWSr;evM3", "pdf_size": 891872, "rating": "5;6;6;6;7", "confidence": "5;3;3;2;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;2;3", "presentation": "2;3;2;3;3", "wc_summary": "51;66;73;126;296", "wc_strengths": "20;38;19;73;116", "wc_weaknesses": "19;272;123;38;70", "wc_questions": "77;2;48;37;39", "wc_limitations": "1;1;1;1;3", "wc_review": "168;379;264;275;524", "wc_reply_reviewers": "10;27;0;18;12", "wc_reply_authors": "12;44;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 122.4, 90.40929155789243 ], "wc_strengths_avg": [ 53.2, 36.983239447079264 ], "wc_weaknesses_avg": [ 104.4, 90.90786544628578 ], "wc_questions_avg": [ 40.6, 24.021656895393374 ], "wc_limitations_avg": [ 1.4, 0.8 ], "wc_review_avg": [ 322.0, 121.1131702169504 ], "wc_reply_reviewers_avg": [ 13.4, 8.935323161475472 ], "wc_reply_authors_avg": [ 11.2, 17.045820602130014 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6454972243679028, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16697320977103816185&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "tamu.edu;cs.uiowa.edu;tamu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Texas A&M University;University of Iowa", "aff_unique_dep": ";", "aff_unique_url": "https://www.tamu.edu;https://www.uiowa.edu", "aff_unique_abbr": "TAMU;UIowa", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Station;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "An active learning framework for multi-group mean estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70418", "id": "pLcSrn8NpJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/67390075fe466276797f489115582cdc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pLcSrn8NpJ", "openreview": "https://openreview.net/forum?id=pLcSrn8NpJ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70418", "video": "https://nips.cc/virtual/2023/poster/70418", "author_site": "Abdellah Aznag, Rachel Cummings, Adam N. Elmachtoub", "tldr": "", "abstract": "We consider a fundamental problem where there are multiple groups whose data distributions are unknown, and an analyst would like to learn the mean of each group. We consider an active learning framework to sequentially collect $T$ samples with bandit, each period observing a sample from a chosen group. After observing a sample, the analyst may update their estimate of the mean and variance of that group and choose the next group accordingly. The objective is to dynamically collect samples to minimize the $p$-norm of the vector of variances of our mean estimators after $T$ rounds. We propose an algorithm, Variance-UCB, that selects groups according to a an upper bound on the variance estimate adjusted to the $p$-norm chosen. We show that the regret of Variance-UCB is $O(T^{-2})$ for finite $p$, and prove that no algorithm can do better. When $p$ is infinite, we recover the $O(T^{-1.5})$ obtained in \\cite{activelearning, carpentier2011upper} and provide a new lower bound showing that no algorithm can do better.", "keywords": "Active learning;mean estimation;bandit feedback;data acquisition", "primary_area": "", "supplementary_material": "/attachment/f3d5ef65400b92c3622bc02ca1b4a08ee1557c00.zip", "author": "Abdellah Aznag;Rachel Cummings;Adam N. Elmachtoub", "authorids": "~Abdellah_Aznag1;~Rachel_Cummings1;~Adam_N._Elmachtoub1", "gender": "M;;M", "homepage": ";https://rachelcummings.com/;http://www.columbia.edu/~ae2516/", "dblp": "294/5386;56/9841;15/9298.html", "google_scholar": ";;Z-CFWPwAAAAJ", "orcid": ";;", "linkedin": "abdellah-aznag/;;", "or_profile": "~Abdellah_Aznag1;~Rachel_Cummings1;~Adam_N._Elmachtoub1", "aff": "Columbia University;Columbia University;Columbia University", "aff_domain": "columbia.edu;columbia.edu;columbia.edu", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\naznag2023an,\ntitle={An active learning framework for multi-group mean estimation},\nauthor={Abdellah Aznag and Rachel Cummings and Adam N. Elmachtoub},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pLcSrn8NpJ}\n}", "github": "", "project": "", "reviewers": "kPNK;kmyz;JGPv;m8Me;ZDPZ", "pdf_size": 673195, "rating": "5;5;7;7;7", "confidence": "4;3;3;4;3", "soundness": "2;2;4;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "137;41;59;70;148", "wc_strengths": "26;71;72;34;185", "wc_weaknesses": "414;53;44;57;152", "wc_questions": "17;78;8;145;158", "wc_limitations": "1;10;6;1;1", "wc_review": "595;253;189;307;644", "wc_reply_reviewers": "76;11;8;0;121", "wc_reply_authors": "162;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 91.0, 43.197222132910355 ], "wc_strengths_avg": [ 77.6, 56.873895593672856 ], "wc_weaknesses_avg": [ 144.0, 140.58022620553717 ], "wc_questions_avg": [ 81.2, 62.38397230058375 ], "wc_limitations_avg": [ 3.8, 3.655133376499413 ], "wc_review_avg": [ 397.6, 185.64008187888737 ], "wc_reply_reviewers_avg": [ 43.2, 47.47799490290212 ], "wc_reply_authors_avg": [ 32.4, 64.8 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.16666666666666663, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15814423993719317052&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "columbia.edu;columbia.edu;columbia.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Kernelized Cumulants: Beyond Kernel Mean Embeddings", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70417", "id": "pLsPFxqn7J", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/243697ace81f57daef8737ff2c5cffd3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pLsPFxqn7J", "openreview": "https://openreview.net/forum?id=pLsPFxqn7J", "poster": "/media/PosterPDFs/NeurIPS%202023/70417.png?t=1701903574.8571432", "slides": "https://nips.cc/virtual/2023/poster/70417", "video": "https://nips.cc/virtual/2023/poster/70417", "author_site": "Patric Bonnier, Harald Oberhauser, Zoltan Szabo", "tldr": "", "abstract": "In $\\mathbb{R}^d$, it is well-known that cumulants provide an alternative to moments that can achieve the same goals with numerous benefits such as lower variance estimators. In this paper we extend cumulants to reproducing kernel Hilbert spaces (RKHS) using tools from tensor algebras and show that they are computationally tractable by a kernel trick. These kernelized cumulants provide a new set of all-purpose statistics; the classical maximum mean discrepancy and Hilbert-Schmidt independence criterion arise as the degree one objects in our general construction. We argue both theoretically and empirically (on synthetic, environmental, and traffic data analysis) that going beyond degree one has several advantages and can be achieved with the same computational complexity and minimal overhead in our experiments.", "keywords": "kernel;cumulant;mean embedding;Hilbert-Schmidt independence criterion;maximum mean discrepancy", "primary_area": "", "supplementary_material": "/attachment/a52db897c2393c0f1412faeb62a783d4a48d12e9.zip", "author": "Patric Bonnier;Harald Oberhauser;Zolt\u00e1n Szab\u00f3", "authorids": "~Patric_Bonnier1;~Harald_Oberhauser1;~Zolt\u00e1n_Szab\u00f31", "gender": ";;M", "homepage": "https://www.maths.ox.ac.uk/people/patric.bonnier;https://www.maths.ox.ac.uk/people/harald.oberhauser;https://zoltansz.github.io/", "dblp": "241/6943;175/1262;73/2909-1", "google_scholar": "zGQsYlgAAAAJ;pQ7hxSIAAAAJ;aFW7v7EAAAAJ", "orcid": ";;0000-0001-6183-7603", "linkedin": ";;zoltan-szabo-a8972159", "or_profile": "~Patric_Bonnier1;~Harald_Oberhauser1;~Zoltan_Szabo1", "aff": ";University of Oxford;London School of Economics", "aff_domain": ";oxford.ac.uk;lse.ac.uk", "position": ";Associate Professor;Professor of Data Science", "bibtex": "@inproceedings{\nbonnier2023kernelized,\ntitle={Kernelized Cumulants: Beyond Kernel Mean Embeddings},\nauthor={Patric Bonnier and Harald Oberhauser and Zolt{\\'a}n Szab{\\'o}},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pLsPFxqn7J}\n}", "github": "", "project": "", "reviewers": "6dmY;h8uP;952h;Lvaf;Q1mu", "pdf_size": 546631, "rating": "6;7;7;7;7", "confidence": "3;3;3;3;4", "soundness": "3;3;3;3;4", "novelty": "2;3;3;4;3", "presentation": "3;3;3;3;3", "wc_summary": "40;70;16;21;151", "wc_strengths": "63;58;67;61;100", "wc_weaknesses": "35;309;39;36;217", "wc_questions": "16;104;24;4;49", "wc_limitations": "7;79;1;1;67", "wc_review": "161;620;147;123;584", "wc_reply_reviewers": "10;151;11;39;44", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 59.6, 49.47160801914569 ], "wc_strengths_avg": [ 69.8, 15.380507143784303 ], "wc_weaknesses_avg": [ 127.2, 114.6410048804528 ], "wc_questions_avg": [ 39.4, 35.50549253284623 ], "wc_limitations_avg": [ 31.0, 34.57166469813104 ], "wc_review_avg": [ 327.0, 225.15328112199475 ], "wc_reply_reviewers_avg": [ 51.0, 51.91146308860886 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.2500000000000001, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16631653829927564576&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": ";oxford.ac.uk;lse.ac.uk", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Oxford;London School of Economics", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.lse.ac.uk", "aff_unique_abbr": "Oxford;LSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "PRODIGY: Enabling In-context Learning Over Graphs", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70416", "id": "pLwYhNNnoR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/34dce0dc3121951dd0399ba02c0f0d06-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pLwYhNNnoR", "openreview": "https://openreview.net/forum?id=pLwYhNNnoR", "poster": "/media/PosterPDFs/NeurIPS%202023/70416.png?t=1701715522.8409512", "slides": "https://nips.cc/virtual/2023/poster/70416", "video": "https://nips.cc/virtual/2023/poster/70416", "author_site": "Qian Huang, Hongyu Ren, Peng Chen, Gregor Kr\u017emanc, Daniel Zeng, Percy Liang, Jure Leskovec", "tldr": "", "abstract": "In-context learning is the ability of a pretrained model to adapt to novel and diverse downstream tasks by conditioning on prompt examples, without optimizing any parameters. While large language models have demonstrated this ability, how in-context learning could be performed over graphs is unexplored. In this paper, we develop \\textbf{Pr}etraining \\textbf{O}ver \\textbf{D}iverse \\textbf{I}n-Context \\textbf{G}raph S\\textbf{y}stems (PRODIGY), the first pretraining framework that enables in-context learning over graphs. The key idea of our framework is to formulate in-context learning over graphs with a novel \\emph{prompt graph} representation, which connects prompt examples and queries. We then propose a graph neural network architecture over the prompt graph and a corresponding family of in-context pretraining objectives. With PRODIGY, the pretrained model can directly perform novel downstream classification tasks on unseen graphs via in-context learning. We provide empirical evidence of the effectiveness of our framework by showcasing its strong in-context learning performance on tasks involving citation networks and knowledge graphs. Our approach outperforms the in-context learning accuracy of contrastive pretraining baselines with hard-coded adaptation by 18\\% on average across all setups. Moreover, it also outperforms standard finetuning with limited data by 33\\% on average with in-context learning.", "keywords": "Graph Neural Network;in-context learning;pretraining", "primary_area": "", "supplementary_material": "/attachment/f61fcf787622b4159393caa645d94cb6fb678703.zip", "author": "Qian Huang;Hongyu Ren;Peng Chen;Gregor Kr\u017emanc;Daniel Zeng;Percy Liang;Jure Leskovec", "authorids": "~Qian_Huang2;~Hongyu_Ren1;~Peng_Chen7;~Gregor_Kr\u017emanc1;~Daniel_Zeng3;~Percy_Liang1;~Jure_Leskovec1", "gender": "F;;M;M;;;", "homepage": "https://q-hwang.github.io/;;;https://github.com/gregorkrz;https://danielzeng.com;https://cs.stanford.edu/~pliang/;http://cs.stanford.edu/~jure/", "dblp": "07/4378.html;30/10885;;;181/2891-3;04/1701;l/JureLeskovec", "google_scholar": "L3hkmG0AAAAJ;;;;PpLhFIgAAAAJ;pouyVyUAAAAJ;Q_kKkIUAAAAJ", "orcid": ";;;;;;0000-0002-5411-923X", "linkedin": "qian-huang-b20315149/;;;;daniel-zeng/;;leskovec/", "or_profile": "~Qian_Huang2;~Hongyu_Ren1;~Peng_Chen7;~Gregor_Kr\u017emanc1;~Daniel_Zeng3;~Percy_Liang1;~Jure_Leskovec1", "aff": "Stanford University;Computer Science Department, Stanford University;Stanford University;University of Ljubljana;Computer Science Department, Stanford University;Stanford University;Kumo.AI", "aff_domain": "stanford.edu;cs.stanford.edu;stanford.edu;uni-lj.si;cs.stanford.edu;stanford.edu;kumo.ai", "position": "PhD student;PhD student;MS student;Undergrad student;MS student;Associate Professor;Chief Scientist", "bibtex": "@inproceedings{\nhuang2023prodigy,\ntitle={{PRODIGY}: Enabling In-context Learning Over Graphs},\nauthor={Qian Huang and Hongyu Ren and Peng Chen and Gregor Kr{\\v{z}}manc and Daniel Zeng and Percy Liang and Jure Leskovec},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pLwYhNNnoR}\n}", "github": "", "project": "", "reviewers": "GRdC;FSU3;DFam;GCaE", "pdf_size": 524441, "rating": "6;6;6;8", "confidence": "3;4;2;4", "soundness": "3;2;2;4", "novelty": "3;3;2;4", "presentation": "3;3;2;4", "wc_summary": "80;245;97;41", "wc_strengths": "94;30;98;45", "wc_weaknesses": "74;157;93;94", "wc_questions": "200;152;2;46", "wc_limitations": "35;3;1;1", "wc_review": "483;587;291;227", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 115.75, 77.33490479725181 ], "wc_strengths_avg": [ 66.75, 29.76050234791073 ], "wc_weaknesses_avg": [ 104.5, 31.34086788842964 ], "wc_questions_avg": [ 100.0, 79.41032678436729 ], "wc_limitations_avg": [ 10.0, 14.45683229480096 ], "wc_review_avg": [ 397.0, 144.59598887936 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 79, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5444545322386353263&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "stanford.edu;cs.stanford.edu;stanford.edu;uni-lj.si;cs.stanford.edu;stanford.edu;kumo.ai", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0;2", "aff_unique_norm": "Stanford University;University of Ljubljana;Kumo.AI", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.uni-lj.si;https://www.kumo.ai", "aff_unique_abbr": "Stanford;UL;Kumo.AI", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;Slovenia" }, { "title": "Statistical Knowledge Assessment for Large Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70415", "id": "pNtG6NAmx0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5f0a4cd23e1c6eedd3edebba674ab877-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pNtG6NAmx0", "openreview": "https://openreview.net/forum?id=pNtG6NAmx0", "poster": "/media/PosterPDFs/NeurIPS%202023/70415.png?t=1700990614.4831073", "slides": "https://nips.cc/virtual/2023/poster/70415", "video": "https://nips.cc/virtual/2023/poster/70415", "author_site": "Qingxiu Dong, Jingjing Xu, Lingpeng Kong, Zhifang Sui, Lei Li", "tldr": "", "abstract": "Given varying prompts regarding a factoid question, can a large language model (LLM) reliably generate factually correct answers? Existing LLMs may generate distinct responses for different prompts. In this paper, we study the problem of quantifying knowledge contained in an LLM regarding a given set of facts. We propose KaRR, a statistical approach to assess factual knowledge for LLMs. The main idea is to estimate the ratio of LLM generating text corresponding to the answer entity given diverse prompts of the subject and the querying relation, versus it generating by random chances. Our assessment suite contains a comprehensive set of 994,123 entities and 600 relations, with 1,395,905 text aliases. We use our method to evaluate 20 LLMs of various sizes, including LLaMA, Alpaca, OPT, etc. Experiments show that our results have a strong correlation (0.43 Kendall's $\\tau$) with the results of human assessment on LLMs. Our results reveal that the knowledge in LLMs with the same backbone architecture adheres to the scaling law, while tuning on instruction-following data sometimes compromises the model's capability to generate factually correct text reliably.", "keywords": "Large Language Models;Knowledge Assessment;Evaluation", "primary_area": "", "supplementary_material": "/attachment/4c0e85e32ce9f19e23a53a8453c001fdf240f77a.zip", "author": "Qingxiu Dong;Jingjing Xu;Lingpeng Kong;Zhifang Sui;Lei Li", "authorids": "~Qingxiu_Dong1;~Jingjing_Xu1;~Lingpeng_Kong1;~Zhifang_Sui1;~Lei_Li11", "gender": "F;F;M;F;M", "homepage": "https://dqxiu.github.io/;;https://ikekonglp.github.io/;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6024;https://www.cs.cmu.edu/~leili", "dblp": "284/0673;25/624;144/7656;;13/7007-5.html", "google_scholar": "ibcR7VkAAAAJ;;f1hBi5wAAAAJ;;BYXqAlwAAAAJ", "orcid": ";;;;0000-0003-3095-9776", "linkedin": "qingxiu-dong-a3758a199/;;;;", "or_profile": "~Qingxiu_Dong1;~Jingjing_Xu1;~Lingpeng_Kong1;~Zhifang_Sui1;~Lei_Li11", "aff": "Peking University;;Department of Computer Science, The University of Hong Kong;Peking University;Computer Science Department, UC Santa Barbara", "aff_domain": "pku.edu.cn;;cs.hku.hk;pku.edu.cn;cs.ucsb.edu", "position": "PhD student;;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ndong2023statistical,\ntitle={Statistical Knowledge Assessment for Large Language Models},\nauthor={Qingxiu Dong and Jingjing Xu and Lingpeng Kong and Zhifang Sui and Lei Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pNtG6NAmx0}\n}", "github": "", "project": "", "reviewers": "e9uD;adWh;4epC;zX87;bA4R", "pdf_size": 961401, "rating": "4;5;6;7;7", "confidence": "3;3;3;4;3", "soundness": "3;3;3;3;4", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;4", "wc_summary": "86;47;79;112;191", "wc_strengths": "44;72;103;108;103", "wc_weaknesses": "179;79;55;77;67", "wc_questions": "44;95;1;137;19", "wc_limitations": "1;6;1;9;21", "wc_review": "354;299;239;443;401", "wc_reply_reviewers": "0;36;0;22;22", "wc_reply_authors": "0;255;122;57;196", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;2;2;2;2", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 103.0, 48.63332190998266 ], "wc_strengths_avg": [ 86.0, 24.584547992590792 ], "wc_weaknesses_avg": [ 91.4, 44.62107125562989 ], "wc_questions_avg": [ 59.2, 50.13741118167152 ], "wc_limitations_avg": [ 7.6, 7.364781055808788 ], "wc_review_avg": [ 347.2, 72.31707958705191 ], "wc_reply_reviewers_avg": [ 16.0, 14.028542333400146 ], "wc_reply_authors_avg": [ 126.0, 91.8629413855228 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5144957554275266, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14752333346054955069&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "pku.edu.cn;;cs.hku.hk;pku.edu.cn;cs.ucsb.edu", "author_num": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Peking University;University of Hong Kong;University of California, Santa Barbara", "aff_unique_dep": ";Department of Computer Science;Computer Science Department", "aff_unique_url": "http://www.pku.edu.cn;https://www.hku.hk;https://www.ucsb.edu", "aff_unique_abbr": "Peking U;HKU;UCSB", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;Santa Barbara", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "ESSEN: Improving Evolution State Estimation for Temporal Networks using Von Neumann Entropy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70414", "id": "pO7d6iFdnc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0147d967a5db3b8dde08d2a327b24568-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pO7d6iFdnc", "openreview": "https://openreview.net/forum?id=pO7d6iFdnc", "poster": "/media/PosterPDFs/NeurIPS%202023/70414.png?t=1701435282.0732918", "slides": "https://nips.cc/virtual/2023/poster/70414", "video": "https://nips.cc/virtual/2023/poster/70414", "author_site": "Qiyao Huang, Yingyue Zhang, Zhihong Zhang, Edwin Hancock", "tldr": "", "abstract": "Temporal networks are widely used as abstract graph representations for real-world dynamic systems. Indeed, recognizing the network evolution states is crucial in understanding and analyzing temporal networks. For instance, social networks will generate the clustering and formation of tightly-knit groups or communities over time, relying on the triadic closure theory. However, the existing methods often struggle to account for the time-varying nature of these network structures, hindering their performance when applied to networks with complex evolution states. To mitigate this problem, we propose a novel framework called ESSEN, an Evolution StateS awarE Network, to measure temporal network evolution using von Neumann entropy and thermodynamic temperature. The developed framework utilizes a von Neumann entropy aware attention mechanism and network evolution state contrastive learning in the graph encoding. In addition, it employs a unique decoder the so-called Mixture of Thermodynamic Experts (MoTE) for decoding. ESSEN extracts local and global network evolution information using thermodynamic features and adaptively recognizes the network evolution states. Moreover, the proposed method is evaluated on link prediction tasks under both transductive and inductive settings, with the corresponding results demonstrating its effectiveness compared to various state-of-the-art baselines.", "keywords": "Temporal Network;Graph Neural Network;Von Neumann Entropy", "primary_area": "", "supplementary_material": "/attachment/e8869050dec9c5ed6959f80b186331a458376bce.pdf", "author": "Qiyao Huang;Yingyue Zhang;Zhihong Zhang;Edwin Hancock", "authorids": "~Qiyao_Huang1;~Yingyue_Zhang1;~Zhihong_Zhang2;~Edwin_Hancock1", "gender": "M;F;M;M", "homepage": ";https://orcid.org/0000-0003-0252-1507;https://informatics.xmu.edu.cn/info/1019/15293.htm;https://pure.york.ac.uk/portal/en/persons/edwin-r-hancock", "dblp": ";;;h/EdwinRHancock", "google_scholar": ";;;EjDU2ncAAAAJ", "orcid": "0000-0002-3947-9544;0000-0003-0252-1507;;0000-0003-4496-2028", "linkedin": ";;;edwin-hancock-11913117/?originalSubdomain=uk", "or_profile": "~Qiyao_Huang1;~Yingyue_Zhang1;~Zhihong_Zhang2;~Edwin_Hancock1", "aff": "Xiamen University;Xiamen University;;Anhui University", "aff_domain": "xmu.edu.cn;xmu.edu.cn;;ahu.edu.cn", "position": "MS student;MS student;;Honorary Professor", "bibtex": "@inproceedings{\nhuang2023essen,\ntitle={{ESSEN}: Improving Evolution State Estimation for Temporal Networks using Von Neumann Entropy},\nauthor={Qiyao Huang and Yingyue Zhang and Zhihong Zhang and Edwin Hancock},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pO7d6iFdnc}\n}", "github": "", "project": "", "reviewers": "jRZR;oGt7;w7vF", "pdf_size": 2649296, "rating": "6;6;6", "confidence": "3;5;2", "soundness": "3;2;2", "novelty": "3;2;3", "presentation": "3;2;3", "wc_summary": "100;76;162", "wc_strengths": "58;15;44", "wc_weaknesses": "34;22;45", "wc_questions": "141;9;30", "wc_limitations": "1;1;17", "wc_review": "334;123;298", "wc_reply_reviewers": "86;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 112.66666666666667, 36.23380864453651 ], "wc_strengths_avg": [ 39.0, 17.90716802475106 ], "wc_weaknesses_avg": [ 33.666666666666664, 9.392668535736913 ], "wc_questions_avg": [ 60.0, 57.91372894228103 ], "wc_limitations_avg": [ 6.333333333333333, 7.542472332656507 ], "wc_review_avg": [ 251.66666666666666, 92.16048803883125 ], "wc_reply_reviewers_avg": [ 28.666666666666668, 40.54078878802872 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12974351755562785492&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "xmu.edu.cn;xmu.edu.cn;;ahu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Xiamen University;Anhui University", "aff_unique_dep": ";", "aff_unique_url": "https://www.xmu.edu.cn;http://www.ahu.edu.cn/", "aff_unique_abbr": "XMU;AHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Leveraging Vision-Centric Multi-Modal Expertise for 3D Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70413", "id": "pQF9kbM8Ea", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/79206ac5b7e88eeeed74997f3b6f4c7f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pQF9kbM8Ea", "openreview": "https://openreview.net/forum?id=pQF9kbM8Ea", "poster": "/media/PosterPDFs/NeurIPS%202023/70413.png?t=1701839742.699822", "slides": "https://nips.cc/virtual/2023/poster/70413", "video": "https://nips.cc/virtual/2023/poster/70413", "author_site": "Linyan Huang, Zhiqi Li, Chonghao Sima, Wenhai Wang, Jingdong Wang, Yu Qiao, Hongyang Li", "tldr": "", "abstract": "Current research is primarily dedicated to advancing the accuracy of camera-only 3D object detectors (apprentice) through the knowledge transferred from LiDAR- or multi-modal-based counterparts (expert). However, the presence of the domain gap between LiDAR and camera features, coupled with the inherent incompatibility in temporal fusion, significantly hinders the effectiveness of distillation-based enhancements for apprentices. Motivated by the success of uni-modal distillation, an apprentice-friendly expert model would predominantly rely on camera features, while still achieving comparable performance to multi-modal models. To this end, we introduce VCD, a framework to improve the camera-only apprentice model, including an apprentice-friendly multi-modal expert and temporal-fusion-friendly distillation supervision. The multi-modal expert VCD-E adopts an identical structure as that of the camera-only apprentice in order to alleviate the feature disparity, and leverages LiDAR input as a depth prior to reconstruct the 3D scene, achieving the performance on par with other heterogeneous multi-modal experts. Additionally, a fine-grained trajectory-based distillation module is introduced with the purpose of individually rectifying the motion misalignment for each object in the scene. With those improvements, our camera-only apprentice VCD-A sets new state-of-the-art on nuScenes with a score of 63.1% NDS. The code will be released at https://github.com/OpenDriveLab/Birds-eye-view-Perception.", "keywords": "camera-only detection;multi-modal distillation;multi-view object detection", "primary_area": "", "supplementary_material": "/attachment/9849182a9bce88759923248cc73acef4c3ae8d3e.pdf", "author": "Linyan Huang;Zhiqi Li;Chonghao Sima;Wenhai Wang;Jingdong Wang;Yu Qiao;Hongyang Li", "authorids": "~Linyan_Huang3;~Zhiqi_Li2;~Chonghao_Sima1;~Wenhai_Wang2;~Jingdong_Wang1;~Yu_Qiao1;~Hongyang_Li1", "gender": "Not Specified;;;;M;;M", "homepage": ";;;;https://jingdongwang2017.github.io/;;https://datascience.hku.hk/people/hongyang-li/", "dblp": "237/8239;;317/0445;;49/3441;;95/8433-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;dgYJ6esAAAAJ;;z5SPCmgAAAAJ;;https://scholar.google.com.hk/citations?user=Hfrih1EAAAAJ", "orcid": ";;;;0000-0002-4888-4445;;0000-0001-9110-5534", "linkedin": ";;;;;;hongyangli2020/", "or_profile": "~Linyan_Huang3;~Zhiqi_Li2;~Chonghao_Sima1;~Wenhai_Wang2;~Jingdong_Wang1;~Yu_Qiao1;~Hongyang_Li1", "aff": "Shanghai AI Laboratory;;Purdue University;;Baidu;;Shanghai AI Lab", "aff_domain": "pjlab.org.cn;;purdue.edu;;baidu.com;;pjlab.org.cn", "position": "Intern;;PhD student;;Chief Scientist for Computer Vision;;Researcher", "bibtex": "@inproceedings{\nhuang2023leveraging,\ntitle={Leveraging Vision-Centric Multi-Modal Expertise for 3D Object Detection},\nauthor={Linyan Huang and Zhiqi Li and Chonghao Sima and Wenhai Wang and Jingdong Wang and Yu Qiao and Hongyang Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pQF9kbM8Ea}\n}", "github": "", "project": "", "reviewers": "bEge;xwmh;KRDk;oCet;LE1a", "pdf_size": 1902926, "rating": "5;5;5;6;6", "confidence": "4;5;3;5;5", "soundness": "3;3;3;3;4", "novelty": "2;3;2;3;4", "presentation": "3;3;3;3;2", "wc_summary": "35;62;63;94;74", "wc_strengths": "39;34;77;69;69", "wc_weaknesses": "182;59;36;35;193", "wc_questions": "6;21;54;82;36", "wc_limitations": "8;5;50;1;7", "wc_review": "270;181;280;281;379", "wc_reply_reviewers": "23;0;0;34;708", "wc_reply_authors": "0;0;0;12;702", "reply_reviewers": "1;0;0;1;3", "reply_authors": "1;1;1;2;3", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 4.4, 0.7999999999999999 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 65.6, 19.147845831842286 ], "wc_strengths_avg": [ 57.6, 17.54536975956905 ], "wc_weaknesses_avg": [ 101.0, 71.2320152740325 ], "wc_questions_avg": [ 39.8, 26.430285658690863 ], "wc_limitations_avg": [ 14.2, 18.059900331950892 ], "wc_review_avg": [ 278.2, 62.74838643343748 ], "wc_reply_reviewers_avg": [ 153.0, 277.81432648443456 ], "wc_reply_authors_avg": [ 142.8, 279.6386239416866 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6123724356957945, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8060342985706270545&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "pjlab.org.cn;;purdue.edu;;baidu.com;;pjlab.org.cn", "author_num": 7, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Shanghai AI Laboratory;Purdue University;Baidu;Shanghai AI Lab", "aff_unique_dep": ";;Baidu, Inc.;", "aff_unique_url": "https://www.shanghai-ai-lab.com;https://www.purdue.edu;https://www.baidu.com;https://www.shanghaiailab.com", "aff_unique_abbr": "SAIL;Purdue;Baidu;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Detecting Any Human-Object Interaction Relationship: Universal HOI Detector with Spatial Prompt Learning on Foundation Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70412", "id": "pQvAL40Cdj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/02687e7b22abc64e651be8da74ec610e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pQvAL40Cdj", "openreview": "https://openreview.net/forum?id=pQvAL40Cdj", "poster": "/media/PosterPDFs/NeurIPS%202023/70412.png?t=1701778821.9243581", "slides": "https://nips.cc/virtual/2023/poster/70412", "video": "https://nips.cc/virtual/2023/poster/70412", "author_site": "Yichao Cao, Qingfei Tang, Xiu Su, Song Chen, Shan You, Xiaobo Lu, Chang Xu", "tldr": "", "abstract": "Human-object interaction (HOI) detection aims to comprehend the intricate relationships between humans and objects, predicting triplets, and serving as the foundation for numerous computer vision tasks. The complexity and diversity of human-object interactions in the real world, however, pose significant challenges for both annotation and recognition, particularly in recognizing interactions within an open world context. This study explores the universal interaction recognition in an open-world setting through the use of Vision-Language (VL) foundation models and large language models (LLMs). The proposed method is dubbed as UniHOI. We conduct a deep analysis of the three hierarchical features inherent in visual HOI detectors and propose a method for high-level relation extraction aimed at VL foundation models, which we call HO prompt-based learning. Our design includes an HO Prompt-guided Decoder (HOPD), facilitates the association of high-level relation representations in the foundation model with various HO pairs within the image. Furthermore, we utilize a LLM (i.e. GPT) for interaction interpretation, generating a richer linguistic understanding for complex HOIs. For open-category interaction recognition, our method supports either of two input types: interaction phrase or interpretive sentence. Our efficient architecture design and learning methods effectively unleash the potential of the VL foundation models and LLMs, allowing UniHOI to surpass all existing methods with a substantial margin, under both supervised and zero-shot settings. The code and pre-trained weights will be made publicly available.", "keywords": "Human-object interaction;Commonsense Knowledge;Foundation Models", "primary_area": "", "supplementary_material": "/attachment/5a39cbf89be81b4d75ddd9c2359797ff8f1cec1a.pdf", "author": "Yichao Cao;Qingfei Tang;Xiu Su;Song Chen;Shan You;Xiaobo Lu;Chang Xu", "authorids": "~Yichao_Cao1;~Qingfei_Tang1;~Xiu_Su1;~Song_Chen4;~Shan_You3;xblu@seu.edu.cn;~Chang_Xu4", "gender": "M;M;;M;M;;", "homepage": ";;https://xiusu.github.io/;https://bestsongc.github.io/;https://shanyou92.github.io/;;", "dblp": "160/6077;252/4285;189/3416;;179/2548;;", "google_scholar": "--8h8o0AAAAJ;;7OMxmYcAAAAJ;;https://scholar.google.com/citations?hl=en;;", "orcid": ";;;;0000-0003-1964-0430;;", "linkedin": ";;%E4%BF%AE-%E8%8B%8F-13896b198;;;;", "or_profile": "~Yichao_Cao1;~Qingfei_Tang1;~Xiu_Su1;~Song_Chen4;~Shan_You3;xblu@seu.edu.cn;~Chang_Xu4", "aff": "Southeast University;Nanjing Enbo Technology Co., Ltd., Nanjing, China ;University of Sydney;;SenseTime Research;;", "aff_domain": "seu.edu.cn;enbo-tech.com;sydney.edu.au;;sensetime.com;;", "position": "PhD student;Researcher;PhD student;;Researcher;;", "bibtex": "@inproceedings{\ncao2023detecting,\ntitle={Detecting Any Human-Object Interaction Relationship: Universal {HOI} Detector with Spatial Prompt Learning on Foundation Models},\nauthor={Yichao Cao and Qingfei Tang and Xiu Su and Song Chen and Shan You and Xiaobo Lu and Chang Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pQvAL40Cdj}\n}", "github": "", "project": "", "reviewers": "pXLy;djfM;WxZv;X6bU;WF8N", "pdf_size": 1728793, "rating": "4;5;5;6;7", "confidence": "5;5;4;4;5", "soundness": "3;3;3;3;4", "novelty": "2;3;2;2;3", "presentation": "3;2;3;2;3", "wc_summary": "52;88;68;130;66", "wc_strengths": "94;72;53;40;60", "wc_weaknesses": "433;96;147;263;42", "wc_questions": "45;179;34;4;6", "wc_limitations": "1;36;1;9;6", "wc_review": "625;471;303;446;180", "wc_reply_reviewers": "0;0;0;28;25", "wc_reply_authors": "145;89;83;86;0", "reply_reviewers": "0;0;0;1;1", "reply_authors": "3;2;2;2;1", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 80.8, 27.147007201531444 ], "wc_strengths_avg": [ 63.8, 18.312837027615355 ], "wc_weaknesses_avg": [ 196.2, 139.12210464192955 ], "wc_questions_avg": [ 53.6, 64.66714776453342 ], "wc_limitations_avg": [ 10.6, 13.062924634246343 ], "wc_review_avg": [ 405.0, 151.96446953153227 ], "wc_reply_reviewers_avg": [ 10.6, 13.016912076218384 ], "wc_reply_authors_avg": [ 80.6, 46.366367121007016 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.0800640769025435, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6291905955860543808&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "seu.edu.cn;enbo-tech.com;sydney.edu.au;;sensetime.com;;", "author_num": 7, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Southeast University;Nanjing Enbo Technology Co., Ltd.;University of Sydney;SenseTime", "aff_unique_dep": ";;;SenseTime Research", "aff_unique_url": "https://www.seu.edu.cn/;;https://www.sydney.edu.au;https://www.sensetime.com", "aff_unique_abbr": "SEU;;USYD;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;Australia" }, { "title": "OpenIllumination: A Multi-Illumination Dataset for Inverse Rendering Evaluation on Real Objects", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73468", "id": "pRnrg2bWr0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/74a67268c5cc5910f64938cac4526a90-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=pRnrg2bWr0", "openreview": "https://openreview.net/forum?id=pRnrg2bWr0", "poster": "/media/PosterPDFs/NeurIPS%202023/73468.png?t=1702067597.994183", "slides": "https://nips.cc/virtual/2023/poster/73468", "video": "https://nips.cc/virtual/2023/poster/73468", "author_site": "Isabella Liu, Linghao Chen, Ziyang Fu, Liwen Wu, Haian Jin, Zhong Li, Chin Ming Ryan Wong, Yi Xu, Ravi Ramamoorthi, Zexiang Xu, Hao Su", "tldr": "", "abstract": "We introduce OpenIllumination, a real-world dataset containing over 108K images of 64 objects with diverse materials, captured under 72 camera views and a large number of different illuminations. For each image in the dataset, we provide accurate camera parameters, illumination ground truth, and foreground segmentation masks. Our dataset enables the quantitative evaluation of most inverse rendering and material decomposition methods for real objects. We examine several state-of-the-art inverse rendering methods on our dataset and compare their performances. The dataset and code can be found on the project page: https://oppo-us-research.github.io/OpenIllumination.", "keywords": "Inverse Rendering; Neural Rendering", "primary_area": "", "supplementary_material": "/attachment/38c23d8947f06f63da37588c09dd21c5434167ff.pdf", "author": "Isabella Liu;Linghao Chen;Ziyang Fu;Liwen Wu;Haian Jin;Zhong Li;Chin Ming Ryan Wong;Yi Xu;Ravi Ramamoorthi;Zexiang Xu;Hao Su", "authorids": "~Isabella_Liu1;~Linghao_Chen2;~Ziyang_Fu1;~Liwen_Wu2;~Haian_Jin1;~Zhong_Li1;~Chin_Ming_Ryan_Wong1;~Yi_Xu7;~Ravi_Ramamoorthi3;~Zexiang_Xu1;~Hao_Su1", "gender": ";;M;M;M;M;M;M;M;M;M", "homepage": ";https://ootts.github.io/;http://fzy28.github.io;https://lwwu2.github.io/;https://haian-jin.github.io/;https://sites.google.com/site/lizhong19900216/;;https://www.linkedin.com/in/yi-xu-42654823;https://cseweb.ucsd.edu/~ravir/;https://cseweb.ucsd.edu/~zex014/;http://ai.ucsd.edu/~haosu", "dblp": ";262/3716;;82/3277;345/8396;70/3488;;;88/6919;154/0366;09/4945-1", "google_scholar": ";;;_7OXdUcAAAAJ;VZvmpKoAAAAJ;C-wK73YAAAAJ;;ldanjkUAAAAJ;q0MzO6cAAAAJ;_RRIYvEAAAAJ;1P8Zu04AAAAJ", "orcid": ";;;;;0000-0002-7416-1216;;0000-0003-2126-6054;;;", "linkedin": ";;;;;lizhong323/;chin-ming-ryan-wong/;yi-xu-42654823;;;", "or_profile": "~Isabella_Liu1;~Linghao_Chen2;~Ziyang_Fu1;~Liwen_Wu2;~Haian_Jin1;~Zhong_Li1;~Chin_Ming_Ryan_Wong1;~Yi_Xu7;~Ravi_Ramamoorthi3;~Zexiang_Xu1;~Hao_Su1", "aff": ";Zhejiang University;University of California, San Diego;Computer Science and Engineering Department, University of California, San Diego;Zhejiang University;OPPO US Research Center;Innopeak Technology, Inc.;OPPO US Research Center;NVIDIA;Adobe Research;University of California, San Diego", "aff_domain": ";zju.edu.cn;ucsd.edu;cse.ucsd.edu;zju.edu.cn;innopeaktech.com;innopeaktech.com;oppo.com;nvidia.com;adobe.com;ucsd.edu", "position": ";PhD student;MS student;PhD student;Undergrad student;Research Scientist;Camera Hardware Engineer;Principal Researcher;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nliu2023openillumination,\ntitle={OpenIllumination: A Multi-Illumination Dataset for Inverse Rendering Evaluation on Real Objects},\nauthor={Isabella Liu and Linghao Chen and Ziyang Fu and Liwen Wu and Haian Jin and Zhong Li and Chin Ming Ryan Wong and Yi Xu and Ravi Ramamoorthi and Zexiang Xu and Hao Su},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=pRnrg2bWr0}\n}", "github": "", "project": "", "reviewers": "USxp;dCTX;rUYo", "pdf_size": 15531472, "rating": "5;6;7", "confidence": "4;4;4", "wc_summary_and_contributions": "39;64;34", "wc_strengths": "34;81;18", "wc_improvement": "207;194;35", "wc_limitations": "43;34;8", "wc_correctness": "33;48;8", "wc_clarity": "15;6;19", "wc_relation_to_prior_work": "162;57;15", "wc_documentation": "28;12;17", "wc_additional_feedback": "1;1;1", "wc_review": "562;497;155", "wc_reply_reviewers": "0;76;0", "wc_reply_authors": "527;738;234", "reply_reviewers": "0;3;0", "reply_authors": "1;3;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 45.666666666666664, 13.12334645668635 ], "wc_strengths_avg": [ 44.333333333333336, 26.737406173540638 ], "wc_improvement_avg": [ 145.33333333333334, 78.19775501182173 ], "wc_limitations_avg": [ 28.333333333333332, 14.83988619303471 ], "wc_correctness_avg": [ 29.666666666666668, 16.49915822768611 ], "wc_clarity_avg": [ 13.333333333333334, 5.436502143433364 ], "wc_relation_to_prior_work_avg": [ 78.0, 61.82232606429493 ], "wc_documentation_avg": [ 19.0, 6.683312551921141 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 404.6666666666667, 178.52419692828445 ], "wc_reply_reviewers_avg": [ 25.333333333333332, 35.82674358011841 ], "wc_reply_authors_avg": [ 499.6666666666667, 206.66290319153933 ], "reply_reviewers_avg": [ 1.0, 1.4142135623730951 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1541579018097487790&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": ";zju.edu.cn;ucsd.edu;cse.ucsd.edu;zju.edu.cn;innopeaktech.com;innopeaktech.com;oppo.com;nvidia.com;adobe.com;ucsd.edu", "author_num": 11, "aff_unique_index": "0;1;1;0;2;3;2;4;5;1", "aff_unique_norm": "Zhejiang University;University of California, San Diego;OPPO;InnoPeak Technology;NVIDIA;Adobe", "aff_unique_dep": ";;US Research Center;;NVIDIA Corporation;Adobe Research", "aff_unique_url": "https://www.zju.edu.cn;https://www.ucsd.edu;https://www.oppo.com;;https://www.nvidia.com;https://research.adobe.com", "aff_unique_abbr": "ZJU;UCSD;OPPO;;NVIDIA;Adobe", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;1;1;0;1;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Parameter-efficient Tuning of Large-scale Multimodal Foundation Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70411", "id": "pT8DIhsJCw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/32ebb6b560ee58abbdae834e5f37cb5d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pT8DIhsJCw", "openreview": "https://openreview.net/forum?id=pT8DIhsJCw", "poster": "/media/PosterPDFs/NeurIPS%202023/70411.png?t=1699357640.4366517", "slides": "https://nips.cc/virtual/2023/poster/70411", "video": "https://nips.cc/virtual/2023/poster/70411", "author_site": "Haixin Wang, Xinlong Yang, Jianlong Chang, Dian Jin, Jinan Sun, Shikun Zhang, Xiao Luo, Qi Tian", "tldr": "", "abstract": "Driven by the progress of large-scale pre-training, parameter-efficient transfer learning has gained immense popularity across different subfields of Artificial Intelligence. The core is to adapt the model to downstream tasks with only a small set of parameters. Recently, researchers have leveraged such proven techniques in multimodal tasks and achieve promising results. However, two critical issues remain unresolved: how to further reduce the complexity with lightweight design and how to boost alignment between modalities under extremely low parameters. In this paper, we propose A gracefUl pRompt framewOrk for cRoss-modal trAnsfer (AURORA) to overcome these challenges. Considering the redundancy in existing architectures, we first utilize the mode approximation to generate 0.1M trainable parameters to implement the multimodal parameter-efficient tuning, which explores the low intrinsic dimension with only 0.04% parameters of the pre-trained model. Then, for better modality alignment, we propose the Informative Context Enhancement and Gated Query Transformation module under extremely few parameters scenes. A thorough evaluation on six cross-modal benchmarks shows that it not only outperforms the state-of-the-art but even outperforms the full fine-tuning approach. Our code is available at: https://github.com/WillDreamer/Aurora.", "keywords": "parameter-efficient transfer learning; multi-modal learning; prompt learning", "primary_area": "", "supplementary_material": "/attachment/e8efaea07ea3f19f811f8943e35e424491ff7789.zip", "author": "Haixin Wang;Xinlong Yang;Jianlong Chang;Dian Jin;Jinan Sun;Shikun Zhang;Xiao Luo;Qi Tian", "authorids": "~Haixin_Wang3;~Xinlong_Yang1;~Jianlong_Chang2;~Dian_Jin2;~Jinan_Sun1;~Shikun_Zhang2;~Xiao_Luo2;~Qi_Tian3", "gender": ";M;M;;M;M;M;M", "homepage": "https://willdreamer.github.io/;https://xinlong-yang.github.io/;https://jianlongchange.github.io/;https://www.google.com/?gws_rd=ssl;;;https://www.qitian1987.com/index.html;http://luoxiao12.github.io", "dblp": "81/5956-3;347/2205;92/2332;;16/10588;83/3715.html;78/1467-1.html;50/1585-1", "google_scholar": "RGZUJOkAAAAJ;;RDwnNsQAAAAJ;;;uiklLscAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?", "orcid": "0000-0002-5714-0149;;;;;;0000-0002-7252-5047;", "linkedin": ";;;;;;;%E9%9C%84-%E7%BD%97-303548214/", "or_profile": "~Haixin_Wang3;~Xinlong_Yang1;~Jianlong_Chang2;~Dian_Jin2;~Jinan_Sun1;~Shikun_Zhang2;~Qi_Tian3;~Xiao_Luo3", "aff": "Peking University;Peking University;Huawei Technologies Ltd.;University of Wisconsin - Madison;Peking University;Peking University;Huawei Technologies Ltd.;University of California, Los Angeles", "aff_domain": "pku.edu.cn;pku.edu.cn;huawei.com;wisc.edu;pku.edu.cn;pku.edu.cn;huawei.com;cs.ucla.edu", "position": "MS student;MS student;Principal Researcher;MS student;Associate Professor;Full Professor;Principal Researcher;Postdoc", "bibtex": "@inproceedings{\nwang2023parameterefficient,\ntitle={Parameter-efficient Tuning of Large-scale Multimodal Foundation Model},\nauthor={Haixin Wang and Xinlong Yang and Jianlong Chang and Dian Jin and Jinan Sun and Shikun Zhang and Xiao Luo and Qi Tian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pT8DIhsJCw}\n}", "github": "", "project": "", "reviewers": "Srms;kJXf;ME39;37bN;NuU2", "pdf_size": 9528552, "rating": "5;5;5;5;6", "confidence": "4;4;4;1;4", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;2;3;3", "wc_summary": "169;101;123;116;77", "wc_strengths": "110;32;61;64;54", "wc_weaknesses": "240;214;194;32;165", "wc_questions": "117;12;6;43;25", "wc_limitations": "74;10;11;7;9", "wc_review": "710;369;395;262;330", "wc_reply_reviewers": "33;25;46;10;103", "wc_reply_authors": "285;280;294;231;340", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;3;3;3;3", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.4, 1.2 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 117.2, 30.32094985319556 ], "wc_strengths_avg": [ 64.2, 25.49039034616771 ], "wc_weaknesses_avg": [ 169.0, 72.7681248899544 ], "wc_questions_avg": [ 40.6, 40.252204908551285 ], "wc_limitations_avg": [ 22.2, 25.93376177880872 ], "wc_review_avg": [ 413.2, 155.0218049178889 ], "wc_reply_reviewers_avg": [ 43.4, 32.00374978029919 ], "wc_reply_authors_avg": [ 286.0, 34.762048271067115 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.25000000000000006, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11377972477387036218&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;pku.edu.cn;huawei.com;wisc.edu;pku.edu.cn;pku.edu.cn;huawei.com;cs.ucla.edu", "author_num": 8, "aff_unique_index": "0;0;1;2;0;0;1;3", "aff_unique_norm": "Peking University;Huawei;University of Wisconsin-Madison;University of California, Los Angeles", "aff_unique_dep": ";Huawei Technologies;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.huawei.com;https://www.wisc.edu;https://www.ucla.edu", "aff_unique_abbr": "Peking U;Huawei;UW-Madison;UCLA", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Madison;Los Angeles", "aff_country_unique_index": "0;0;0;1;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "CorresNeRF: Image Correspondence Priors for Neural Radiance Fields", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70410", "id": "pTCZWSDltG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7f77492bb8070a5c825a87c0c5181da2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pTCZWSDltG", "openreview": "https://openreview.net/forum?id=pTCZWSDltG", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70410", "video": "https://nips.cc/virtual/2023/poster/70410", "author_site": "Yixing Lao, Xiaogang Xu, zhipeng cai, Xihui Liu, Hengshuang Zhao", "tldr": "", "abstract": "Neural Radiance Fields (NeRFs) have achieved impressive results in novel view synthesis and surface reconstruction tasks. However, their performance suffers under challenging scenarios with sparse input views. We present CorresNeRF, a novel method that leverages image correspondence priors computed by off-the-shelf methods to supervise NeRF training. We design adaptive processes for augmentation and filtering to generate dense and high-quality correspondences. The correspondences are then used to regularize NeRF training via the correspondence pixel reprojection and depth loss terms. We evaluate our methods on novel view synthesis and surface reconstruction tasks with density-based and SDF-based NeRF models on different datasets. Our method outperforms previous methods in both photometric and geometric metrics. We show that this simple yet effective technique of using correspondence priors can be applied as a plug-and-play module across different NeRF variants. The project page is at https://yxlao.github.io/corres-nerf/.", "keywords": "Neural Radiance Fields;3D Reconstruction;Few View", "primary_area": "", "supplementary_material": "", "author": "Yixing Lao;Xiaogang Xu;zhipeng cai;Xihui Liu;Hengshuang Zhao", "authorids": "~Yixing_Lao1;~Xiaogang_Xu2;~zhipeng_cai3;~Xihui_Liu1;~Hengshuang_Zhao2", "gender": "M;M;M;F;M", "homepage": "https://github.com/yxlao/;https://xiaogang00.github.io;https://zhipengcai.github.io;https://xh-liu.github.io/;https://hszhao.github.io", "dblp": "213/7784;118/2268-2;;184/3911;185/7848", "google_scholar": "2w9VSWIAAAAJ;https://scholar.google.com.hk/citations?user=R65xDQwAAAAJ;;https://scholar.google.com.hk/citations?user=4YL23GMAAAAJ;4uE10I0AAAAJ", "orcid": ";0000-0002-7928-7336;;0000-0003-1831-9952;0000-0001-8277-2706", "linkedin": ";;;;hengshuang-zhao-347b8391/?originalSubdomain=hk", "or_profile": "~Yixing_Lao1;~Xiaogang_Xu2;~zhipeng_cai3;~Xihui_Liu1;~Hengshuang_Zhao2", "aff": "University of Hong Kong;The Chinese University of Hong Kong;Intel;University of Hong Kong;The University of Hong Kong", "aff_domain": "hku.hk;cuhk.edu.hk;intel.com;hku.hk;hku.hk", "position": "PhD student;Postdoc;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nlao2023corresnerf,\ntitle={CorresNe{RF}: Image Correspondence Priors for Neural Radiance Fields},\nauthor={Yixing Lao and Xiaogang Xu and zhipeng cai and Xihui Liu and Hengshuang Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pTCZWSDltG}\n}", "github": "", "project": "", "reviewers": "7TAK;cKcF;1K2a;NREe;yxQE", "pdf_size": 8773431, "rating": "4;4;5;5;5", "confidence": "3;3;3;5;4", "soundness": "2;2;4;2;3", "novelty": "2;2;2;2;3", "presentation": "2;2;3;3;3", "wc_summary": "89;194;125;10;118", "wc_strengths": "127;289;50;19;62", "wc_weaknesses": "171;169;110;227;116", "wc_questions": "159;112;90;11;104", "wc_limitations": "8;4;30;12;12", "wc_review": "554;768;405;279;412", "wc_reply_reviewers": "0;72;35;98;0", "wc_reply_authors": "111;343;111;950;111", "reply_reviewers": "0;1;1;2;0", "reply_authors": "3;4;3;5;3", "rating_avg": [ 4.6, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 107.2, 59.57650543628755 ], "wc_strengths_avg": [ 109.4, 96.45019440104826 ], "wc_weaknesses_avg": [ 158.6, 42.70175640415743 ], "wc_questions_avg": [ 95.2, 48.05580089853878 ], "wc_limitations_avg": [ 13.2, 8.908422980528034 ], "wc_review_avg": [ 483.6, 166.73643872891134 ], "wc_reply_reviewers_avg": [ 41.0, 39.00769154923167 ], "wc_reply_authors_avg": [ 325.2, 325.06516269818883 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 3.6, 0.8 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6123724356957945, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6711961927203147239&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "hku.hk;cuhk.edu.hk;intel.com;hku.hk;hku.hk", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Hong Kong;Chinese University of Hong Kong;Intel", "aff_unique_dep": ";;Intel Corporation", "aff_unique_url": "https://www.hku.hk;https://www.cuhk.edu.hk;https://www.intel.com", "aff_unique_abbr": "HKU;CUHK;Intel", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "DynaDojo: An Extensible Benchmarking Platform for Scalable Dynamical System Identification", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73467", "id": "pTSNoBTk8E", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/32093649cbbcff773d9a991d8c30a7fe-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=pTSNoBTk8E", "openreview": "https://openreview.net/forum?id=pTSNoBTk8E", "poster": "/media/PosterPDFs/NeurIPS%202023/73467.png?t=1702360097.2594523", "slides": "https://nips.cc/virtual/2023/poster/73467", "video": "https://nips.cc/virtual/2023/poster/73467", "author_site": "Logan M Bhamidipaty, Tommy Bruzzese, Caryn Tran, Rami Ratl Mrad, Maxinder S. Kanwal", "tldr": "", "abstract": "Modeling complex dynamical systems poses significant challenges, with traditional methods struggling to work across a variety of systems and scale to high-dimensional dynamics. In response, we present DynaDojo, a novel benchmarking platform designed for data-driven dynamical system identification. DynaDojo enables comprehensive evaluation of how an algorithm's performance scales across three key dimensions: (1) the number of training samples provided, (2) the complexity of the dynamical system being modeled, and (3) the training samples required to achieve a target error threshold. Furthermore, DynaDojo enables studying out-of-distribution generalization (by providing multiple test conditions for each system) and active learning (by supporting closed-loop control). Through its user-friendly and easily extensible API, DynaDojo accommodates a wide range of user-defined $\\texttt{Algorithms}$, $\\texttt{Systems}$, and $\\texttt{Challenges}$ (scaling metrics). The platform also prioritizes resource-efficient training for running on a cluster. To showcase its utility, in DynaDojo $\\texttt{0.9}$, we include implementations of 7 baseline algorithms and 20 dynamical systems, along with many demo notebooks. This work aspires to make DynaDojo a unifying benchmarking platform for system identification, paralleling the role of OpenAI\u2019s Gym in reinforcement learning.", "keywords": "system identification;benchmarking;scaling", "primary_area": "", "supplementary_material": "", "author": "Logan Mondal Bhamidipaty;Tommy Bruzzese;Caryn Tran;Rami Ratl Mrad;Max Kanwal", "authorids": "~Logan_Mondal_Bhamidipaty1;~Tommy_Bruzzese1;~Caryn_Tran1;~Rami_Ratl_Mrad1;~Max_Kanwal1", "gender": "M;;F;M;Not Specified", "homepage": "https://logan-mondal-bhamidipaty.com/;https://www.tommybruzzese.com;https://www.caryntran.com;;", "dblp": "399/2851;;;;", "google_scholar": "wJ_Eo0sAAAAJ;;uIez_QYAAAAJ;;o1qFlsgAAAAJ", "orcid": "0009-0001-3978-9462;;0000-0002-4645-6607;;", "linkedin": "logan-bhamidipaty/;;;rami-ratl-mrad/;mkanwal/", "or_profile": "~Logan_Mondal_Bhamidipaty1;~Tommy_Bruzzese1;~Caryn_Tran1;~Rami_Ratl_Mrad1;~Max_Kanwal1", "aff": "Stanford University;Stanford University;Northwestern University, Northwestern University;Stanford University;Stanford University", "aff_domain": "cs.stanford.edu;stanford.edu;u.northwestern.edu;stanford.edu;stanford.edu", "position": "MS student;Undergrad student;PhD student;Intern;PhD student", "bibtex": "@inproceedings{\nbhamidipaty2023dynadojo,\ntitle={DynaDojo: An Extensible Platform for Benchmarking Scaling in Dynamical System Identification},\nauthor={Logan Mondal Bhamidipaty and Tommy Bruzzese and Caryn Tran and Rami Ratl Mrad and Max Kanwal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=pTSNoBTk8E}\n}", "github": "", "project": "", "reviewers": "rZxW;j4gM;33kq;Nmnd", "pdf_size": 6099727, "rating": "3;6;6;7", "confidence": "4;3;2;4", "wc_summary_and_contributions": "95;128;53;71", "wc_strengths": "86;45;41;132", "wc_improvement": "230;97;73;180", "wc_limitations": "17;43;9;135", "wc_correctness": "22;6;10;24", "wc_clarity": "10;106;19;46", "wc_relation_to_prior_work": "25;73;29;70", "wc_documentation": "7;47;15;9", "wc_additional_feedback": "1;1;1;1", "wc_review": "493;546;250;668", "wc_reply_reviewers": "214;249;0;19", "wc_reply_authors": "465;253;98;260", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 86.75, 28.092481200492063 ], "wc_strengths_avg": [ 76.0, 36.817115585010185 ], "wc_improvement_avg": [ 145.0, 63.12289600454022 ], "wc_limitations_avg": [ 51.0, 50.0999001995014 ], "wc_correctness_avg": [ 15.5, 7.664854858377946 ], "wc_clarity_avg": [ 45.25, 37.49249924984996 ], "wc_relation_to_prior_work_avg": [ 49.25, 22.320114247019436 ], "wc_documentation_avg": [ 19.5, 16.147755261955144 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 489.25, 152.0088402034566 ], "wc_reply_reviewers_avg": [ 120.5, 111.88945437350206 ], "wc_reply_authors_avg": [ 269.0, 130.37829573974344 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3015113445777637, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "cs.stanford.edu;stanford.edu;u.northwestern.edu;stanford.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Stanford University;Northwestern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.northwestern.edu", "aff_unique_abbr": "Stanford;NU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "ToolQA: A Dataset for LLM Question Answering with External Tools", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73466", "id": "pV1xV2RK6I", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9cb2a7495900f8b602cb10159246a016-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=pV1xV2RK6I", "openreview": "https://openreview.net/forum?id=pV1xV2RK6I", "poster": "/media/PosterPDFs/NeurIPS%202023/73466.png?t=1699757746.0408652", "slides": "https://nips.cc/virtual/2023/poster/73466", "video": "https://nips.cc/virtual/2023/poster/73466", "author_site": "Yuchen Zhuang, Yue Yu, Kuan Wang, Haotian Sun, Chao Zhang", "tldr": "", "abstract": "Large Language Models (LLMs) have demonstrated impressive performance in various NLP tasks, but they still suffer from challenges such as hallucination and weak numerical reasoning. To overcome these challenges, external tools can be used to enhance LLMs' question-answering abilities. However, current evaluation methods do not distinguish between questions that can be answered using LLMs' internal knowledge and those that require external information through tool use. To address this issue, we introduce a new dataset called ToolQA, which is designed to faithfully evaluate LLMs' ability to use external tools for question answering. Our development of ToolQA involved a scalable, automated process for dataset curation, along with 13 specialized tools designed for interaction with external knowledge in order to answer questions. Importantly, we strive to minimize the overlap between our benchmark data and LLMs' pre-training data, enabling a more precise evaluation of LLMs' tool-use reasoning abilities. We conducted an in-depth diagnosis of existing tool-use LLMs to highlight their strengths, weaknesses, and potential improvements. Our findings set a new benchmark for evaluating LLMs and suggest new directions for future advancements. Our data and code are freely available for the broader scientific community on GitHub.", "keywords": "Large Language Model;External Knowledge;Tool-Augmetned LLMs", "primary_area": "", "supplementary_material": "/attachment/17a5a09f461d328fef43a6dcbcf854c90de9dbad.pdf", "author": "Yuchen Zhuang;Yue Yu;Kuan Wang;Haotian Sun;Chao Zhang", "authorids": "~Yuchen_Zhuang1;~Yue_Yu2;~Kuan_Wang1;~Haotian_Sun1;~Chao_Zhang15", "gender": "M;M;M;M;", "homepage": "https://night-chen.github.io/;https://yueyu1030.github.io;http://kuanwang.me;https://haotiansun.tech/;http://chaozhang.org/", "dblp": "191/5231.html;;;12/8162;94/3019-14", "google_scholar": "T-f6XlEAAAAJ;zQ3Jh6UAAAAJ;c1-_-dUAAAAJ;lcWkVCQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-3683-5208;;0000-0001-9013-7016;0000-0003-3009-598X", "linkedin": ";;;haotian-sun-159597218/;", "or_profile": "~Yuchen_Zhuang1;~Yue_Yu2;~Kuan_Wang1;~Haotian_Sun1;~Chao_Zhang15", "aff": "Adobe Systems;Google;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "adobe.com;google.com;gatech.edu;gatech.edu;gatech.edu", "position": "Intern;Research Intern;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhuang2023toolqa,\ntitle={Tool{QA}: A Dataset for {LLM} Question Answering with External Tools},\nauthor={Yuchen Zhuang and Yue Yu and Kuan Wang and Haotian Sun and Chao Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=pV1xV2RK6I}\n}", "github": "", "project": "", "reviewers": "3WCN;H5AJ;MPGH;sogC;HHeV", "pdf_size": 714910, "rating": "6;6;7;7;9", "confidence": "4;4;3;4;3", "wc_summary_and_contributions": "107;64;36;71;115", "wc_strengths": "107;13;165;85;67", "wc_improvement": "126;104;126;108;33", "wc_limitations": "76;1;98;22;18", "wc_correctness": "152;9;13;11;5", "wc_clarity": "65;1;33;1;5", "wc_relation_to_prior_work": "12;1;8;43;8", "wc_documentation": "117;1;9;1;4", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "763;195;489;343;256", "wc_reply_reviewers": "733;73;0;0;0", "wc_reply_authors": "2532;586;564;753;270", "reply_reviewers": "3;1;0;0;0", "reply_authors": "5;2;1;2;1", "rating_avg": [ 7.0, 1.0954451150103321 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 78.6, 29.04203849594584 ], "wc_strengths_avg": [ 87.4, 49.72564730599291 ], "wc_improvement_avg": [ 99.4, 34.40697603684462 ], "wc_limitations_avg": [ 43.0, 37.26660703632677 ], "wc_correctness_avg": [ 38.0, 57.06137047074842 ], "wc_clarity_avg": [ 21.0, 25.043961347997644 ], "wc_relation_to_prior_work_avg": [ 14.4, 14.732277488562318 ], "wc_documentation_avg": [ 26.4, 45.39427276650657 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 409.2, 202.6212229752846 ], "wc_reply_reviewers_avg": [ 161.2, 287.29455268069387 ], "wc_reply_authors_avg": [ 941.0, 810.5578326066561 ], "reply_reviewers_avg": [ 0.8, 1.1661903789690604 ], "reply_authors_avg": [ 2.2, 1.469693845669907 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.74535599249993, "gs_citation": 242, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2694605361330155618&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "adobe.com;google.com;gatech.edu;gatech.edu;gatech.edu", "author_num": 5, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "Adobe;Google;Georgia Institute of Technology", "aff_unique_dep": "Adobe Systems Incorporated;Google;", "aff_unique_url": "https://www.adobe.com;https://www.google.com;https://www.gatech.edu", "aff_unique_abbr": "Adobe;Google;Georgia Tech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "RETVec: Resilient and Efficient Text Vectorizer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70409", "id": "pVlC0reMKq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bf64451da212313c5ef1a00f49232c47-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pVlC0reMKq", "openreview": "https://openreview.net/forum?id=pVlC0reMKq", "poster": "/media/PosterPDFs/NeurIPS%202023/70409.png?t=1702437523.483456", "slides": "https://nips.cc/virtual/2023/poster/70409", "video": "https://nips.cc/virtual/2023/poster/70409", "author_site": "Elie Bursztein, Marina Zhang, Owen Vallis, XINYU JIA, Alexey Kurakin", "tldr": "", "abstract": "This paper describes RETVec, an efficient, resilient, and multilingual text vectorizer designed for neural-based text processing. RETVec combines a novel character encoding with an optional small embedding model to embed words into a 256-dimensional vector space. The RETVec embedding model is pre-trained using pair-wise metric learning to be robust against typos and character-level adversarial attacks. In this paper, we evaluate and compare RETVec to state-of-the-art vectorizers and word embeddings on popular model architectures and datasets. These comparisons demonstrate that RETVec leads to competitive, multilingual models that are significantly more resilient to typos and adversarial text attacks. RETVec is available under the Apache 2 license at https://github.com/google-research/retvec.", "keywords": "language modeling;text embedding;adversarial text attack;text vectorization", "primary_area": "", "supplementary_material": "/attachment/aa6408bdec226a14d843ecc3d8dcd6af9eb863bd.zip", "author": "Elie Bursztein;Marina Zhang;Owen Skipper Vallis;Xinyu Jia;Alexey Kurakin", "authorids": "~Elie_Bursztein1;~Marina_Zhang1;~Owen_Skipper_Vallis1;~Xinyu_Jia1;~Alexey_Kurakin1", "gender": ";F;;;M", "homepage": "https://elie.net;https://marinazhang.github.io/;https://www.owenvallis.com;;http://kurakin.me", "dblp": "20/7004;340/6965;28/4883;;56/9834", "google_scholar": "Ayw4GsAAAAAJ;V9ehnpwAAAAJ;https://scholar.google.com/citations?hl=en;;nCh4qyMAAAAJ", "orcid": ";;;;", "linkedin": "bursztein;marina-zhang;;;", "or_profile": "~Elie_Bursztein1;~Marina_Zhang1;~Owen_Skipper_Vallis1;~Xinyu_Jia1;~Alexey_Kurakin1", "aff": "Google;Google;Google;;Research, Google", "aff_domain": "google.com;google.com;google.com;;research.google.com", "position": "Researcher;Researcher;Researcher;;Research Software Engineer", "bibtex": "@inproceedings{\nbursztein2023retvec,\ntitle={{RETV}ec: Resilient and Efficient Text Vectorizer},\nauthor={Elie Bursztein and Marina Zhang and Owen Skipper Vallis and Xinyu Jia and Alexey Kurakin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pVlC0reMKq}\n}", "github": "", "project": "", "reviewers": "LKoT;AFvP;aZHy", "pdf_size": 712786, "rating": "7;7;7", "confidence": "3;2;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;2", "wc_summary": "118;71;105", "wc_strengths": "105;156;78", "wc_weaknesses": "70;82;17", "wc_questions": "117;5;31", "wc_limitations": "57;1;36", "wc_review": "467;315;267", "wc_reply_reviewers": "14;15;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 98.0, 19.8158185969358 ], "wc_strengths_avg": [ 113.0, 32.341923257592455 ], "wc_weaknesses_avg": [ 56.333333333333336, 28.241026106633512 ], "wc_questions_avg": [ 51.0, 47.86090958879351 ], "wc_limitations_avg": [ 31.333333333333332, 23.098821518760552 ], "wc_review_avg": [ 349.6666666666667, 85.24995927011082 ], "wc_reply_reviewers_avg": [ 9.666666666666666, 6.847546194724712 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8007667728812494810&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "google.com;google.com;google.com;;research.google.com", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Principled Weight Initialisation for Input-Convex Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70408", "id": "pWZ97hUQtQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9062b7d6e522dadf4f7d85d49b60d81e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pWZ97hUQtQ", "openreview": "https://openreview.net/forum?id=pWZ97hUQtQ", "poster": "/media/PosterPDFs/NeurIPS%202023/70408.png?t=1701700781.7880006", "slides": "https://nips.cc/virtual/2023/poster/70408", "video": "https://nips.cc/virtual/2023/poster/70408", "author_site": "Pieter-Jan Hoedt, G\u00fcnter Klambauer", "tldr": "", "abstract": "Input-Convex Neural Networks (ICNNs) are networks that guarantee convexity in their input-output mapping. \nThese networks have been successfully applied for energy-based modelling, optimal transport problems and learning invariances.\nThe convexity of ICNNs is achieved by using non-decreasing convex activation functions and non-negative weights. \nBecause of these peculiarities, previous initialisation strategies, which implicitly assume centred weights, are not effective for ICNNs. \nBy studying signal propagation through layers with non-negative weights, we are able to derive a principled weight initialisation for ICNNs. \nConcretely, we generalise signal propagation theory by removing the assumption that weights are sampled from a centred distribution. \nIn a set of experiments, we demonstrate that our principled initialisation effectively accelerates learning in ICNNs and leads to better generalisation. \nMoreover, we find that, in contrast to common belief, ICNNs can be trained without skip-connections when initialised correctly. \nFinally, we apply ICNNs to a real-world drug discovery task and show that they allow for more effective molecular latent space exploration.", "keywords": "initialization;signal propagation;input-convex networks", "primary_area": "", "supplementary_material": "/attachment/87bd3e32b3898b1bea073faed1b311fc3540bc3c.pdf", "author": "Pieter-Jan Hoedt;G\u00fcnter Klambauer", "authorids": "~Pieter-Jan_Hoedt1;~G\u00fcnter_Klambauer1", "gender": "M;M", "homepage": "https://www.jku.at/en/institute-for-machine-learning/about-us/team/pieter-jan-hoedt/;http://www.bioinf.jku.at/people/klambauer/", "dblp": "254/0837;119/4499", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.at/citations?user=rb2AvxIAAAAJ", "orcid": "0009-0006-7321-9718;0000-0003-2861-5552", "linkedin": "pieter-janhoedt;", "or_profile": "~Pieter-Jan_Hoedt1;~G\u00fcnter_Klambauer1", "aff": "Johannes Kepler University;Johannes Kepler Universit\u00e4t Linz", "aff_domain": "jku.at;jku.at", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nhoedt2023principled,\ntitle={Principled Weight Initialisation for Input-Convex Neural Networks},\nauthor={Pieter-Jan Hoedt and G{\\\"u}nter Klambauer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pWZ97hUQtQ}\n}", "github": "", "project": "", "reviewers": "f5Cq;Ryn9;YxjJ;kLTe", "pdf_size": 1198233, "rating": "3;6;6;7", "confidence": "3;4;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;4;3", "wc_summary": "87;60;38;64", "wc_strengths": "21;47;43;57", "wc_weaknesses": "275;33;70;267", "wc_questions": "56;1;3;12", "wc_limitations": "4;1;18;80", "wc_review": "443;142;172;480", "wc_reply_reviewers": "0;0;0;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 62.25, 17.383541066192468 ], "wc_strengths_avg": [ 42.0, 13.152946437965905 ], "wc_weaknesses_avg": [ 161.25, 110.56304762442106 ], "wc_questions_avg": [ 18.0, 22.327113561766108 ], "wc_limitations_avg": [ 25.75, 31.971667144520318 ], "wc_review_avg": [ 309.25, 153.17861306331247 ], "wc_reply_reviewers_avg": [ 5.0, 8.660254037844387 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.33333333333333337, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18282423917724899865&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "jku.at;jku.at", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Johannes Kepler University;Johannes Kepler University Linz", "aff_unique_dep": ";", "aff_unique_url": "https://www.jku.at;https://www.jku.at", "aff_unique_abbr": "JKU;JKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Linz", "aff_country_unique_index": "0;0", "aff_country_unique": "Austria" }, { "title": "SubseasonalClimateUSA: A Dataset for Subseasonal Forecasting and Benchmarking", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73465", "id": "pWkrU6raMt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/18ef499ee57c4822e1e3ea9b9948af18-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=pWkrU6raMt", "openreview": "https://openreview.net/forum?id=pWkrU6raMt", "poster": "/media/PosterPDFs/NeurIPS%202023/73465.png?t=1702397662.0398777", "slides": "https://nips.cc/virtual/2023/poster/73465", "video": "https://nips.cc/virtual/2023/poster/73465", "author_site": "Soukayna Mouatadid, Paulo Orenstein, Genevieve Flaspohler, Miruna Oprescu, Judah Cohen, Franklyn Wang, Sean Knight, Maria Geogdzhayeva, Sam Levang, Ernest Fraenkel, Lester Mackey", "tldr": "", "abstract": "Subseasonal forecasting of the weather two to six weeks in advance is critical for resource allocation and advance disaster notice but poses many challenges for the forecasting community. At this forecast horizon, physics-based dynamical models have limited skill, and the targets for prediction depend in a complex manner on both local weather variables and global climate variables. Recently, machine learning methods have shown promise in advancing the state of the art but only at the cost of complex data curation, integrating expert knowledge with aggregation across multiple relevant data sources, file formats, and temporal and spatial resolutions. To streamline this process and accelerate future development, we introduce SubseasonalClimateUSA, a curated dataset for training and benchmarking subseasonal forecasting models in the United States. We use this dataset to benchmark a diverse suite of models, including operational dynamical models, classical meteorological baselines, and ten state-of-the-art machine learning and deep learning-based methods from the literature. Overall, our benchmarks suggest simple and effective ways to extend the accuracy of current operational models. SubseasonalClimateUSA is regularly updated and accessible via the https://github.com/microsoft/subseasonal_data/ Python package.", "keywords": "weather and climate prediction;subseasonal forecasting;deep learning;subseasonal benchmark dataset;bias correction;statistical postprocessing", "primary_area": "", "supplementary_material": "/attachment/ff484b4c25ad790a479a81b3069d949f2ca943cd.pdf", "author": "Soukayna Mouatadid;Paulo Orenstein;Genevieve Elaine Flaspohler;Miruna Oprescu;Judah Cohen;Franklyn Wang;Sean Edward Knight;Maria Geogdzhayeva;Samuel James Levang;Ernest Fraenkel;Lester Mackey", "authorids": "~Soukayna_Mouatadid1;~Paulo_Orenstein1;~Genevieve_Elaine_Flaspohler1;~Miruna_Oprescu1;~Judah_Cohen1;~Franklyn_Wang1;~Sean_Edward_Knight1;~Maria_Geogdzhayeva1;~Samuel_James_Levang1;~Ernest_Fraenkel1;~Lester_Mackey1", "gender": ";;F;F;M;M;M;;M;M;M", "homepage": "https://www.cs.toronto.edu/~soukayna/;;http://geflaspohler.com;https://mirunaoprescu.com;http://www.judahcohen.org/;;https://knightsean00.github.io/;https://www.mgeo.info/;;http://fraenkel.mit.edu;https://stanford.edu/~lmackey", "dblp": ";;172/5432;218/5317;;;;;;94/5033;05/2961", "google_scholar": ";;https://scholar.google.com/citations?hl=en;KLSUWBAAAAAJ;kc2doosAAAAJ;NWpgImoAAAAJ;;;;;erv7TP0AAAAJ", "orcid": ";;;;0000-0002-7762-4482;;;;;0000-0001-9249-8181;0000-0002-1102-0387", "linkedin": ";;;;;;knightsean00/;;slevang/;ernest-fraenkel-22982b162/;lester-mackey-5902909", "or_profile": "~Soukayna_Mouatadid1;~Paulo_Orenstein1;~Genevieve_Elaine_Flaspohler1;~Miruna_Oprescu1;~Judah_Cohen1;~Franklyn_Wang1;~Sean_Edward_Knight1;~Maria_Geogdzhayeva1;~Samuel_James_Levang1;~Ernest_Fraenkel1;~Lester_Mackey1", "aff": "Department of Computer Science, University of Toronto;;;Cornell University;Atmospheric Environmental Research;;Massachusetts Institute of Technology;Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Microsoft Research New England", "aff_domain": "cs.toronto.edu;;;cornell.edu;aer.com;;mit.edu;mit.edu;;mit.edu;microsoft.com", "position": "PhD student;;;PhD student;Principal Researcher;;Undergrad student;Undergrad student;;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nmouatadid2023subseasonalclimateusa,\ntitle={SubseasonalClimate{USA}: A Dataset for Subseasonal Forecasting and Benchmarking},\nauthor={Soukayna Mouatadid and Paulo Orenstein and Genevieve Elaine Flaspohler and Miruna Oprescu and Judah Cohen and Franklyn Wang and Sean Edward Knight and Maria Geogdzhayeva and Samuel James Levang and Ernest Fraenkel and Lester Mackey},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=pWkrU6raMt}\n}", "github": "", "project": "", "reviewers": "SYV5;kSL6;UtoY;sMGZ", "pdf_size": 12572756, "rating": "6;6;7;9", "confidence": "3;2;3;4", "wc_summary_and_contributions": "53;53;63;137", "wc_strengths": "49;63;49;73", "wc_improvement": "1;61;132;525", "wc_limitations": "71;15;10;43", "wc_correctness": "1;1;28;9", "wc_clarity": "1;6;3;13", "wc_relation_to_prior_work": "1;1;14;6", "wc_documentation": "19;1;44;22", "wc_additional_feedback": "1;1;1;1", "wc_review": "197;202;344;829", "wc_reply_reviewers": "258;17;49;0", "wc_reply_authors": "401;1114;637;714", "reply_reviewers": "1;1;1;0", "reply_authors": "2;6;7;6", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 76.5, 35.16745654721137 ], "wc_strengths_avg": [ 58.5, 10.136567466356647 ], "wc_improvement_avg": [ 179.75, 204.6526019868792 ], "wc_limitations_avg": [ 34.75, 24.416951079117148 ], "wc_correctness_avg": [ 9.75, 11.031205736455105 ], "wc_clarity_avg": [ 5.75, 4.548351349665063 ], "wc_relation_to_prior_work_avg": [ 5.5, 5.315072906367325 ], "wc_documentation_avg": [ 21.5, 15.272524349301264 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 393.0, 258.55076870897136 ], "wc_reply_reviewers_avg": [ 81.0, 103.69426213634003 ], "wc_reply_authors_avg": [ 716.5, 256.84285078623464 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 5.25, 1.920286436967152 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.8660254037844386, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16601312098800212509&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.toronto.edu;;;cornell.edu;aer.com;;mit.edu;mit.edu;;mit.edu;microsoft.com", "author_num": 11, "aff_unique_index": "0;1;2;3;3;3;4", "aff_unique_norm": "University of Toronto;Cornell University;Atmospheric Environmental Research;Massachusetts Institute of Technology;Microsoft", "aff_unique_dep": "Department of Computer Science;;;;Microsoft Research", "aff_unique_url": "https://www.utoronto.ca;https://www.cornell.edu;https://www.aer.com;https://web.mit.edu;https://www.microsoft.com/en-us/research/group/microsoft-research-new-england", "aff_unique_abbr": "U of T;Cornell;AER;MIT;MSR NE", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Toronto;;New England", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "Canada;United States" }, { "title": "Renku: a platform for sustainable data science", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73464", "id": "pX5xlL1T4C", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/838694e9ab6b0a193b84daaafcac0eed-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=pX5xlL1T4C", "openreview": "https://openreview.net/forum?id=pX5xlL1T4C", "poster": "/media/PosterPDFs/NeurIPS%202023/73464.png?t=1701962740.8779566", "slides": "https://nips.cc/virtual/2023/poster/73464", "video": "https://nips.cc/virtual/2023/poster/73464", "author_site": "Rok Ro\u0161kar, Chandrasekhar Ramakrishnan, Michele Volpi, Fernando Perez-Cruz, Lilian Gasser, Firat Ozdemir, Patrick Paitz, Mohammad Alisafaee, Philipp Fischer, Ralf Grubenmann, Eliza Harris, Tasko Olevski, Carl Remlinger, Luis Salamanca, Elisabet Capon Garcia, Lorenzo Cavazzi, Jakub Chrobasik, Darlin Cordoba Osnas, Alessandro Degano, Jimena Dupre, Wesley Johnson, Eike Kettner, Laura Kinkead, Sean Murphy, Flora Thiebaut, Olivier Verscheure", "tldr": "", "abstract": "Data and code working together is fundamental to machine learning (ML), but the context around datasets and interactions between datasets and code are in general captured only rudimentarily. Context such as how the dataset was prepared and created, what source data were used, what code was used in processing, how the dataset evolved, and where it has been used and reused can provide much insight, but this information is often poorly documented. That is unfortunate since it makes datasets into black-boxes with potentially hidden characteristics that have downstream consequences. We argue that making dataset preparation more accessible and dataset usage easier to record and document would have significant benefits for the ML community: it would allow for greater diversity in datasets by inviting modification to published sources, simplify use of alternative datasets and, in doing so, make results more transparent and robust, while allowing for all contributions to be adequately credited. We present a platform, Renku, designed to support and encourage such sustainable development and use of data, datasets, and code, and we demonstrate its benefits through a few illustrative projects which span the spectrum from dataset creation to dataset consumption and showcasing.", "keywords": "reproducibility;reusability;platforms;sustainability;community;dataset development", "primary_area": "", "supplementary_material": "/attachment/75a5e3981261ef16c63a182257ceb3be9a803295.pdf", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nro{\\v{s}}kar2023renku,\ntitle={Renku: a platform for sustainable data science},\nauthor={Rok Ro{\\v{s}}kar and Chandrasekhar Ramakrishnan and Michele Volpi and Fernando Perez-Cruz and Lilian Gasser and Firat Ozdemir and Patrick Paitz and Mohammad Alisafaee and Philipp Fischer and Ralf Grubenmann and Eliza Jean Harris and Tasko Olevski and Carl Remlinger and Luis Salamanca and Elisabet Capon Garcia and Lorenzo Cavazzi and Jakub Chrobasik and Darlin Andrea Cordoba Osnas and Alessandro Degano and Jimena Dupre and Wesley Johnson and Eike Kettner and Laura Kinkead and Sean Murphy and Flora Thiebaut and Olivier Verscheure},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=pX5xlL1T4C}\n}", "github": "", "project": "", "reviewers": "G2Nn;oZQs;GuX6;1JZB", "pdf_size": 760186, "rating": "7;7;8;8", "confidence": "3;4;3;4", "wc_summary_and_contributions": "63;78;332;124", "wc_strengths": "79;71;67;102", "wc_improvement": "273;362;240;237", "wc_limitations": "85;95;28;153", "wc_correctness": "4;9;122;150", "wc_clarity": "9;6;2;34", "wc_relation_to_prior_work": "17;9;34;94", "wc_documentation": "8;16;39;45", "wc_additional_feedback": "1;1;1;1", "wc_review": "539;647;865;940", "wc_reply_reviewers": "117;14;200;97", "wc_reply_authors": "845;773;864;844", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;2;2", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 149.25, 107.87811409178416 ], "wc_strengths_avg": [ 79.75, 13.5531361684298 ], "wc_improvement_avg": [ 278.0, 50.51237472144821 ], "wc_limitations_avg": [ 90.25, 44.33607447666065 ], "wc_correctness_avg": [ 71.25, 65.52623520392423 ], "wc_clarity_avg": [ 12.75, 12.517487767120047 ], "wc_relation_to_prior_work_avg": [ 38.5, 33.29038900343461 ], "wc_documentation_avg": [ 27.0, 15.411035007422441 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 747.75, 161.58182911453875 ], "wc_reply_reviewers_avg": [ 107.0, 66.14000302388865 ], "wc_reply_authors_avg": [ 831.5, 34.70230539892127 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16408493030043065899&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "", "author_num": 1 }, { "title": "The Best of Both Worlds in Network Population Games: Reaching Consensus and Convergence to Equilibrium", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70407", "id": "pXtVyj4R33", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f39931608cdc52d7d9f8ba7003af9136-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pXtVyj4R33", "openreview": "https://openreview.net/forum?id=pXtVyj4R33", "poster": "/media/PosterPDFs/NeurIPS%202023/70407.png?t=1701750732.4584508", "slides": "https://nips.cc/virtual/2023/poster/70407", "video": "https://nips.cc/virtual/2023/poster/70407", "author_site": "Shuyue Hu, Harold Soh, Georgios Piliouras", "tldr": "", "abstract": "Reaching consensus and convergence to equilibrium are two major challenges of multi-agent systems. Although each has attracted significant attention, relatively few studies address both challenges at the same time. This paper examines the connection between the notions of consensus and equilibrium in a multi-agent system where multiple interacting sub-populations coexist. We argue that consensus can be seen as an intricate component of intra-population stability, whereas equilibrium can be seen as encoding inter-population stability. We show that smooth fictitious play, a well-known learning model in game theory, can achieve both consensus and convergence to equilibrium in diverse multi-agent settings. Moreover, we show that the consensus formation process plays a crucial role in the seminal thorny problem of equilibrium selection in multi-agent learning.", "keywords": "Multi-Agent Learning;Consensus Formation;Smooth Fictitious Play;Network Game;Population Game", "primary_area": "", "supplementary_material": "/attachment/9a6e08f0794361244f888de7f3449bf2fea8b735.zip", "author": "Shuyue Hu;Harold Soh;Georgios Piliouras", "authorids": "~Shuyue_Hu1;~Harold_Soh1;~Georgios_Piliouras1", "gender": ";M;", "homepage": ";http://www.haroldsoh.com;", "dblp": ";06/4578;62/1236", "google_scholar": ";https://scholar.google.com.sg/citations?user=lkgd1BsAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Shuyue_Hu1;~Harold_Soh1;~Georgios_Piliouras1", "aff": ";National University of Singapore;Singapore University of Technology and Design", "aff_domain": ";nus.edu.sg;sutd.edu.sg", "position": ";Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nhu2023the,\ntitle={The Best of Both Worlds in Network Population Games: Reaching Consensus and Convergence to Equilibrium},\nauthor={Shuyue Hu and Harold Soh and Georgios Piliouras},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pXtVyj4R33}\n}", "github": "", "project": "", "reviewers": "3rNt;2uya;3EKR;YUjL", "pdf_size": 1395940, "rating": "4;6;6;8", "confidence": "2;3;3;4", "soundness": "2;3;2;4", "novelty": "2;3;3;4", "presentation": "2;3;3;3", "wc_summary": "111;217;225;62", "wc_strengths": "42;18;95;15", "wc_weaknesses": "122;24;240;132", "wc_questions": "44;36;11;5", "wc_limitations": "77;9;1;1", "wc_review": "396;304;572;215", "wc_reply_reviewers": "46;190;60;0", "wc_reply_authors": "521;499;9;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 153.75, 69.5031474107468 ], "wc_strengths_avg": [ 42.5, 32.06633748964793 ], "wc_weaknesses_avg": [ 129.5, 76.49019545013596 ], "wc_questions_avg": [ 24.0, 16.38596960817394 ], "wc_limitations_avg": [ 22.0, 31.921779399024736 ], "wc_review_avg": [ 371.75, 132.14457045221343 ], "wc_reply_reviewers_avg": [ 74.0, 70.55494312945054 ], "wc_reply_authors_avg": [ 257.25, 252.88967456185316 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2626216318390364766&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": ";nus.edu.sg;sutd.edu.sg", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "National University of Singapore;Singapore University of Technology and Design", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.sutd.edu.sg", "aff_unique_abbr": "NUS;SUTD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Enhancing Robot Program Synthesis Through Environmental Context", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70406", "id": "pZ2Ww45GkL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0c1e94af650f5c74b1f3da467c2308c2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pZ2Ww45GkL", "openreview": "https://openreview.net/forum?id=pZ2Ww45GkL", "poster": "/media/PosterPDFs/NeurIPS%202023/70406.png?t=1701680342.6698616", "slides": "https://nips.cc/virtual/2023/poster/70406", "video": "https://nips.cc/virtual/2023/poster/70406", "author_site": "Tianyi Chen, Qidi Wang, Zhen Dong, Liwei Shen, Xin Peng", "tldr": "", "abstract": "Program synthesis aims to automatically generate an executable program that conforms to the given specification. Recent advancements have demonstrated that deep neural methodologies and large-scale pretrained language models are highly proficient in capturing program semantics.\nFor robot programming, prior works have facilitated program synthesis by incorporating global environments. However, the assumption of acquiring a comprehensive understanding of the entire environment is often excessively challenging to achieve.\nIn this work, we present a framework that learns to synthesize a program by rectifying potentially erroneous code segments, with the aid of partially observed environments. To tackle the issue of inadequate attention to partial observations, we propose to first learn an environment embedding space that can implicitly evaluate the impacts of each program token based on the precondition. Furthermore, by employing a graph structure, the model can aggregate both environmental and syntactic information flow and furnish smooth program rectification guidance.\nExtensive experimental evaluations and ablation studies on the partially observed VizDoom domain authenticate that our method offers superior generalization capability across various tasks and greater robustness when encountering noises.", "keywords": "program synthesis;partial envrionment;robotic programming;domain-specific language", "primary_area": "", "supplementary_material": "/attachment/8cfd1bcffd8a7ea0e4f59488e5a9370768b3227c.pdf", "author": "Tianyi Chen;Qidi Wang;Zhen Dong;Liwei Shen;Xin Peng", "authorids": "~Tianyi_Chen9;~Qidi_Wang1;~Zhen_Dong5;~Liwei_Shen1;~Xin_Peng6", "gender": "M;M;M;M;M", "homepage": "https://github.com/ttyXCX;https://github.com/fudanwqd;https://zhendong2050.github.io/;;https://cspengxin.github.io", "dblp": ";;60/1749-4;39/2105;", "google_scholar": ";;jfYe73AAAAAJ;;wATYGXEAAAAJ", "orcid": ";;;;0000-0003-3376-2581", "linkedin": ";;;;", "or_profile": "~Tianyi_Chen9;~Qidi_Wang1;~Zhen_Dong5;~Liwei_Shen1;~Xin_Peng6", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "MS student;MS student;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023enhancing,\ntitle={Enhancing Robot Program Synthesis Through Environmental Context},\nauthor={Tianyi Chen and Qidi Wang and Zhen Dong and Liwei Shen and Xin Peng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pZ2Ww45GkL}\n}", "github": "", "project": "", "reviewers": "mJeF;wkgC;2nvQ;emor;ivtK", "pdf_size": 491974, "rating": "3;5;6;6;7", "confidence": "4;3;2;3;3", "soundness": "3;2;4;3;4", "novelty": "2;2;3;3;3", "presentation": "3;1;4;3;4", "wc_summary": "141;48;90;70;106", "wc_strengths": "150;56;51;47;114", "wc_weaknesses": "237;187;21;99;89", "wc_questions": "67;3;12;2;97", "wc_limitations": "69;1;18;39;1", "wc_review": "664;295;192;257;407", "wc_reply_reviewers": "347;58;13;9;11", "wc_reply_authors": "453;31;8;8;8", "reply_reviewers": "2;1;1;1;1", "reply_authors": "3;2;2;2;2", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 1.0954451150103321 ], "wc_summary_avg": [ 91.0, 31.673332631726648 ], "wc_strengths_avg": [ 83.6, 41.224264699324834 ], "wc_weaknesses_avg": [ 126.6, 76.37172251560128 ], "wc_questions_avg": [ 36.2, 38.73706235635325 ], "wc_limitations_avg": [ 25.6, 25.811625287842688 ], "wc_review_avg": [ 363.0, 165.9144357794101 ], "wc_reply_reviewers_avg": [ 87.6, 130.9772499329559 ], "wc_reply_authors_avg": [ 101.6, 175.92566612066582 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6993786061802353, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17630134586050451683&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Kernel-Based Tests for Likelihood-Free Hypothesis Testing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70405", "id": "paTESG8iSE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/32c6d65ec2591dfcfb3f0e345a51f585-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=paTESG8iSE", "openreview": "https://openreview.net/forum?id=paTESG8iSE", "poster": "/media/PosterPDFs/NeurIPS%202023/70405.png?t=1702259964.5336533", "slides": "https://nips.cc/virtual/2023/poster/70405", "video": "https://nips.cc/virtual/2023/poster/70405", "author_site": "Patrik Robert Gerber, Tianze Jiang, Yury Polyanskiy, Rui Sun", "tldr": "", "abstract": "Given $n$ observations from two balanced classes, consider the task of labeling an additional $m$ inputs that are known to all belong to \\emph{one} of the two classes. \nSpecial cases of this problem are well-known: with complete\nknowledge of class distributions ($n=\\infty$) the\nproblem is solved optimally by the likelihood-ratio test; when\n$m=1$ it corresponds to binary classification; and when $m\\approx n$ it is equivalent to two-sample testing. The intermediate settings occur in the field of likelihood-free inference, where labeled samples are obtained by running forward simulations and the unlabeled sample is collected experimentally. In recent work it was discovered that there is a fundamental trade-off\nbetween $m$ and $n$: increasing the data sample $m$ reduces the amount $n$ of training/simulation\ndata needed. In this work we (a) introduce a generalization where unlabeled samples \ncome from a mixture of the two classes -- a case often encountered in practice; (b) study the minimax sample complexity for non-parametric classes of densities under \\textit{maximum mean\ndiscrepancy} (MMD) separation; and (c) investigate the empirical performance of kernels parameterized by neural networks on two tasks: detection\nof the Higgs boson and detection of planted DDPM generated images amidst\nCIFAR-10 images. For both problems we confirm the existence of the theoretically predicted asymmetric $m$ vs $n$ trade-off.", "keywords": "Kernel methods;Maximum mean discrepancy;Likelihood-free inference;Hypothesis testing;Minimax statistics", "primary_area": "", "supplementary_material": "/attachment/182aa319582f5f5923076861603e82c9e6e566a1.pdf", "author": "Patrik Robert Gerber;Tianze Jiang;Yury Polyanskiy;Rui Sun", "authorids": "~Patrik_Robert_Gerber1;~Tianze_Jiang1;~Yury_Polyanskiy1;~Rui_Sun7", "gender": "M;M;M;M", "homepage": "https://patrikgerber.github.io/;https://petyrrrrr.github.io/;http://www.mit.edu/~ypol/;https://github.com/Sr-11", "dblp": "294/0178;;74/8860;", "google_scholar": "kB9AcDEAAAAJ;lQscqDAAAAAJ;;", "orcid": ";;;", "linkedin": "patrik-robert-gerber/;;;", "or_profile": "~Patrik_Robert_Gerber1;~Tianze_Jiang1;~Yury_Polyanskiy1;~Rui_Sun7", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;Undergrad student;Full Professor;Undergrad student", "bibtex": "@inproceedings{\ngerber2023kernelbased,\ntitle={Kernel-Based Tests for Likelihood-Free Hypothesis Testing},\nauthor={Patrik Robert Gerber and Tianze Jiang and Yury Polyanskiy and Rui Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=paTESG8iSE}\n}", "github": "", "project": "", "reviewers": "m3Zx;92Pk;x91N;5XYH", "pdf_size": 986259, "rating": "5;5;6;7", "confidence": "2;2;3;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;4;3", "wc_summary": "255;145;121;149", "wc_strengths": "83;51;54;167", "wc_weaknesses": "537;140;363;136", "wc_questions": "185;2;79;3", "wc_limitations": "1;3;1;50", "wc_review": "1061;341;618;505", "wc_reply_reviewers": "143;0;12;59", "wc_reply_authors": "288;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 167.5, 51.6405848146591 ], "wc_strengths_avg": [ 88.75, 46.874166659259124 ], "wc_weaknesses_avg": [ 294.0, 167.6976445869172 ], "wc_questions_avg": [ 67.25, 74.81435356935192 ], "wc_limitations_avg": [ 13.75, 20.94486810653149 ], "wc_review_avg": [ 631.25, 266.94791158576237 ], "wc_reply_reviewers_avg": [ 53.5, 56.18051263561058 ], "wc_reply_authors_avg": [ 72.0, 124.70765814495917 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15104702750644602550&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Free-Bloom: Zero-Shot Text-to-Video Generator with LLM Director and LDM Animator", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70404", "id": "paa2OU5jN8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/52f050499cf82fa8efb588e263f6f3a7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=paa2OU5jN8", "openreview": "https://openreview.net/forum?id=paa2OU5jN8", "poster": "/media/PosterPDFs/NeurIPS%202023/70404.png?t=1702114556.7870243", "slides": "https://nips.cc/virtual/2023/poster/70404", "video": "https://nips.cc/virtual/2023/poster/70404", "author_site": "Hanzhuo Huang, Yufan Feng, Cheng Shi, Lan Xu, Jingyi Yu, Sibei Yang", "tldr": "", "abstract": "Text-to-video is a rapidly growing research area that aims to generate a semantic, identical, and temporal coherence sequence of frames that accurately align with the input text prompt. This study focuses on zero-shot text-to-video generation considering the data- and cost-efficient. To generate a semantic-coherent video, exhibiting a rich portrayal of temporal semantics such as the whole process of flower blooming rather than a set of ``moving images'', we propose a novel Free-Bloom pipeline that harnesses large language models (LLMs) as the director to generate a semantic-coherence prompt sequence, while pre-trained latent diffusion models (LDMs) as the animator to generate the high fidelity frames. Furthermore, to ensure temporal and identical coherence while maintaining semantic coherence, we propose a series of annotative modifications to adapting LDMs in the reverse process, including joint noise sampling, step-aware attention shift, and dual-path interpolation. Without any video data and training requirements, Free-Bloom generates vivid and high-quality videos, awe-inspiring in generating complex scenes with semantic meaningful frame sequences. In addition, Free-Bloom is naturally compatible with LDMs-based extensions.", "keywords": "Text-to-Video;Zero-Shot Generation;Large Language Model;Latent Diffusion Models", "primary_area": "", "supplementary_material": "/attachment/f2d0c5a77c34a820786ffe50cd192e361bcd6be7.zip", "author": "Hanzhuo Huang;Yufan Feng;Cheng Shi;Lan Xu;Jingyi Yu;Sibei Yang", "authorids": "~Hanzhuo_Huang1;~Yufan_Feng1;~Cheng_Shi4;~Lan_Xu2;~Jingyi_Yu5;~Sibei_Yang1", "gender": ";F;M;M;M;F", "homepage": "https://github.com/JudgementH;https://github.com/JoycexxZ;https://github.com/ChengShiest;http://xu-lan.com/;;https://sibeiyang.github.io/", "dblp": "357/5702;;;;;215/4885", "google_scholar": ";;https://scholar.google.com/citations?view_op=list_works;aPS5pJkAAAAJ;R9L_AfQAAAAJ;user=4pg3rtYAAAAJ", "orcid": ";;0000-0002-6942-8481;0000-0002-8807-7787;;", "linkedin": ";;;;;", "or_profile": "~Hanzhuo_Huang1;~Yufan_Feng1;~Cheng_Shi4;~Lan_Xu2;~Jingyi_Yu5;~Sibei_Yang1", "aff": "ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cm;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "position": "MS student;Undergrad student;MS student;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nhuang2023freebloom,\ntitle={Free-Bloom: Zero-Shot Text-to-Video Generator with {LLM} Director and {LDM} Animator},\nauthor={Hanzhuo Huang and Yufan Feng and Cheng Shi and Lan Xu and Jingyi Yu and Sibei Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=paa2OU5jN8}\n}", "github": "", "project": "", "reviewers": "yWMb;VS1k;6FyB;mgrm", "pdf_size": 18935190, "rating": "5;5;7;7", "confidence": "5;4;4;4", "soundness": "3;2;3;4", "novelty": "2;2;3;3", "presentation": "3;2;3;4", "wc_summary": "121;60;79;74", "wc_strengths": "100;36;65;60", "wc_weaknesses": "70;117;37;191", "wc_questions": "31;51;100;5", "wc_limitations": "57;4;5;1", "wc_review": "379;268;286;331", "wc_reply_reviewers": "0;177;24;92", "wc_reply_authors": "0;356;43;60", "reply_reviewers": "0;2;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 83.5, 22.74313083108832 ], "wc_strengths_avg": [ 65.25, 22.862359895688808 ], "wc_weaknesses_avg": [ 103.75, 57.84191819087607 ], "wc_questions_avg": [ 46.75, 34.802119188348286 ], "wc_limitations_avg": [ 16.75, 23.284920012746447 ], "wc_review_avg": [ 316.0, 43.00581356049435 ], "wc_reply_reviewers_avg": [ 73.25, 68.75090908489865 ], "wc_reply_authors_avg": [ 114.75, 140.99179940691587 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 75, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2181601531904898249&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cm;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "ShanghaiTech University", "aff_unique_dep": "", "aff_unique_url": "https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning Generalizable Agents via Saliency-guided Features Decorrelation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70403", "id": "pb1OwZNgr2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7bd4a7d0e6773072c2e3c77b11d93065-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pb1OwZNgr2", "openreview": "https://openreview.net/forum?id=pb1OwZNgr2", "poster": "/media/PosterPDFs/NeurIPS%202023/70403.png?t=1697279961.3526192", "slides": "https://nips.cc/virtual/2023/poster/70403", "video": "https://nips.cc/virtual/2023/poster/70403", "author_site": "Sili Huang, Yanchao Sun, Jifeng Hu, Siyuan Guo, Hechang Chen, Yi Chang, Lichao Sun, Bo Yang", "tldr": "", "abstract": "In visual-based Reinforcement Learning (RL), agents often struggle to generalize well to environmental variations in the state space that were not observed during training. The variations can arise in both task-irrelevant features, such as background noise, and task-relevant features, such as robot configurations, that are related to the optimal decisions. To achieve generalization in both situations, agents are required to accurately understand the impact of changed features on the decisions, i.e., establishing the true associations between changed features and decisions in the policy model. However, due to the inherent correlations among features in the state space, the associations between features and decisions become entangled, making it difficult for the policy to distinguish them. To this end, we propose Saliency-Guided Features Decorrelation (SGFD) to eliminate these correlations through sample reweighting. Concretely, SGFD consists of two core techniques: Random Fourier Functions (RFF) and the saliency map. RFF is utilized to estimate the complex non-linear correlations in high-dimensional images, while the saliency map is designed to identify the changed features. Under the guidance of the saliency map, SGFD employs sample reweighting to minimize the estimated correlations related to changed features, thereby achieving decorrelation in visual RL tasks. Our experimental results demonstrate that SGFD can generalize well on a wide range of test environments and significantly outperforms state-of-the-art methods in handling both task-irrelevant variations and task-relevant variations.", "keywords": "reinforcement learning;generalization", "primary_area": "", "supplementary_material": "/attachment/1cacb6a30beede257612bf61c67cadd9b97dfa03.pdf", "author": "Sili Huang;Yanchao Sun;Jifeng Hu;Siyuan Guo;Hechang Chen;Yi Chang;Lichao Sun;Bo Yang", "authorids": "~Sili_Huang1;~Yanchao_Sun1;~Jifeng_Hu1;~Siyuan_Guo2;~Hechang_Chen2;~Yi_Chang4;~Lichao_Sun1;~Bo_Yang6", "gender": "M;F;;M;M;M;M;", "homepage": ";https://ycsun2017.github.io/home/index.html;;;http://sai.jlu.edu.cn/info/1094/2387.htm;http://www.yichang-cs.com;https://lichao-sun.github.io/;http://ccst.jlu.edu.cn/info/1367/19045.htm", "dblp": "26/6752;132/6840;;244/5858;145/1142;02/5438.html;121/0780-1.html;46/999-2", "google_scholar": "ZMhi8A0AAAAJ;bloBY_QAAAAJ;;JE1Yco4AAAAJ;EezEcbgAAAAJ;https://scholar.google.com.hk/citations?user=drEkR50AAAAJ;WhGUE7AAAAAJ;", "orcid": "0000-0001-5387-7904;0000-0002-1137-9939;;0000-0002-9304-5405;;0000-0003-2697-8093;;0000-0003-1927-8419", "linkedin": ";;;;;;lichao-sun-b273a290/;", "or_profile": "~Sili_Huang1;~Yanchao_Sun1;~Jifeng_Hu1;~Siyuan_Guo2;~Hechang_Chen2;~Yi_Chang4;~Lichao_Sun1;~Bo_Yang6", "aff": "Jilin University;University of Maryland, College Park;;Jilin University;Jilin University;Jilin University, China;Lehigh University;Jilin University", "aff_domain": "jlu.edu.cn;umd.edu;;jlu.edu.cn;jlu.edu.cn;jlu.edu.cn;lehigh.edu;jlu.edu.cn", "position": "PhD student;PhD student;;PhD student;Associate Professor;Full Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2023learning,\ntitle={Learning Generalizable Agents via Saliency-guided Features Decorrelation},\nauthor={Sili Huang and Yanchao Sun and Jifeng Hu and Siyuan Guo and Hechang Chen and Yi Chang and Lichao Sun and Bo Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pb1OwZNgr2}\n}", "github": "", "project": "", "reviewers": "NDX8;NdCS;SPDk;jEbK;tXZg", "pdf_size": 2365246, "rating": "5;7;7;7;7", "confidence": "4;3;2;4;4", "soundness": "2;4;2;3;3", "novelty": "2;3;2;3;3", "presentation": "2;3;3;3;2", "wc_summary": "62;124;76;76;72", "wc_strengths": "28;77;39;28;62", "wc_weaknesses": "124;177;56;40;178", "wc_questions": "107;116;32;73;139", "wc_limitations": "5;23;28;16;27", "wc_review": "326;517;231;233;478", "wc_reply_reviewers": "277;26;45;16;13", "wc_reply_authors": "1072;0;90;0;0", "reply_reviewers": "3;1;1;1;1", "reply_authors": "4;1;2;1;1", "rating_avg": [ 6.6, 0.7999999999999999 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 82.0, 21.614809737770074 ], "wc_strengths_avg": [ 46.8, 19.548913013259842 ], "wc_weaknesses_avg": [ 115.0, 58.309518948453004 ], "wc_questions_avg": [ 93.4, 37.31273241133648 ], "wc_limitations_avg": [ 19.8, 8.518215775618742 ], "wc_review_avg": [ 357.0, 120.37773880581076 ], "wc_reply_reviewers_avg": [ 75.4, 101.41913034531503 ], "wc_reply_authors_avg": [ 232.4, 421.24463201327563 ], "reply_reviewers_avg": [ 1.4, 0.8000000000000002 ], "reply_authors_avg": [ 1.8, 1.1661903789690602 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.37500000000000017, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3051326154664448041&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "jlu.edu.cn;umd.edu;;jlu.edu.cn;jlu.edu.cn;jlu.edu.cn;lehigh.edu;jlu.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;0;0;2;0", "aff_unique_norm": "Jilin University;University of Maryland;Lehigh University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.jlu.edu.cn;https://www/umd.edu;https://www.lehigh.edu", "aff_unique_abbr": "JLU;UMD;Lehigh", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;1;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Binarized Spectral Compressive Imaging", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70402", "id": "pcKwgdVAlq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/788e086c07b8d6fa6b279df56e512312-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pcKwgdVAlq", "openreview": "https://openreview.net/forum?id=pcKwgdVAlq", "poster": "/media/PosterPDFs/NeurIPS%202023/70402.png?t=1697254761.734118", "slides": "https://nips.cc/virtual/2023/poster/70402", "video": "https://nips.cc/virtual/2023/poster/70402", "author_site": "Yuanhao Cai, Yuxin Zheng, Jing Lin, Xin Yuan, Yulun Zhang, Haoqian Wang", "tldr": "", "abstract": "Existing deep learning models for hyperspectral image (HSI) reconstruction achieve good performance but require powerful hardwares with enormous memory and computational resources. Consequently, these methods can hardly be deployed on resource-limited mobile devices. In this paper, we propose a novel method, Binarized Spectral-Redistribution Network (BiSRNet), for efficient and practical HSI restoration from compressed measurement in snapshot compressive imaging (SCI) systems. Firstly, we redesign a compact and easy-to-deploy base model to be binarized. Then we present the basic unit, Binarized Spectral-Redistribution Convolution (BiSR-Conv). BiSR-Conv can adaptively redistribute the HSI representations before binarizing activation and uses a scalable hyperbolic tangent function to closer approximate the Sign function in backpropagation. Based on our BiSR-Conv, we customize four binarized convolutional modules to address the dimension mismatch and propagate full-precision information throughout the whole network. Finally, our BiSRNet is derived by using the proposed techniques to binarize the base model. Comprehensive quantitative and qualitative experiments manifest that our proposed BiSRNet outperforms state-of-the-art binarization algorithms. Code and models are publicly available at https://github.com/caiyuanhao1998/BiSCI", "keywords": "Applications;Computer Vision;Low-level Vision;Image Restoration;Snapshot Compressive Imaging;Hyperspectral Image Reconstruction", "primary_area": "", "supplementary_material": "/attachment/a3e19c4c434508b88394b873d5ece675638110c8.zip", "author": "Yuanhao Cai;Yuxin Zheng;Jing Lin;Xin Yuan;Yulun Zhang;Haoqian Wang", "authorids": "~Yuanhao_Cai1;~Yuxin_Zheng2;~Jing_Lin3;~Xin_Yuan4;~Yulun_Zhang1;~Haoqian_Wang1", "gender": "F;M;M;M;M;M", "homepage": "https://user.qzone.qq.com/1052427141/infocenter;https://jinglin7.github.io/;https://en.westlake.edu.cn/faculty/xin-yuan.html;http://yulunzhang.com/;;https://caiyuanhao1998.github.io", "dblp": ";;78/713-2;166/2763-1.html;;260/1004", "google_scholar": ";SvaU2GMAAAAJ;cS9CbWkAAAAJ;ORmLjWoAAAAJ;;3YozQwcAAAAJ", "orcid": ";;0000-0002-8311-7524;0000-0002-2288-5079;0000-0003-2792-8469;", "linkedin": ";;xin-yuan-0024bb31/;yulun-zhang-1116b5b9/;;", "or_profile": "~Yuxin_Zheng2;~Jing_Lin3;~Xin_Yuan4;~Yulun_Zhang1;~Haoqian_Wang1;~Cai_Yuanhao1", "aff": "Tsinghua University;Tsinghua University;Westlake University;Swiss Federal Institute of Technology;Tsinghua University;Shenzhen Internatioanl Graduate School, Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;westlake.edu.cn;ethz.ch;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;MS student;Associate Professor;Postdoc;Full Professor;MS student", "bibtex": "@inproceedings{\ncai2023binarized,\ntitle={Binarized Spectral Compressive Imaging},\nauthor={Yuanhao Cai and Yuxin Zheng and Jing Lin and Xin Yuan and Yulun Zhang and Haoqian Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pcKwgdVAlq}\n}", "github": "", "project": "", "reviewers": "aGHs;F7DC;kC2s;ZUfp;nobf", "pdf_size": 13810487, "rating": "6;6;7;7;7", "confidence": "4;3;4;5;4", "soundness": "3;3;3;4;4", "novelty": "4;3;2;4;4", "presentation": "4;3;3;4;4", "wc_summary": "86;98;85;73;88", "wc_strengths": "21;19;56;115;212", "wc_weaknesses": "24;33;62;125;79", "wc_questions": "30;19;113;4;15", "wc_limitations": "1;4;11;58;4", "wc_review": "162;173;327;375;398", "wc_reply_reviewers": "0;0;52;29;25", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.4, 0.8 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 86.0, 7.974960814950754 ], "wc_strengths_avg": [ 84.6, 72.55508252355585 ], "wc_weaknesses_avg": [ 64.6, 36.081019941237805 ], "wc_questions_avg": [ 36.2, 39.29071137050079 ], "wc_limitations_avg": [ 15.6, 21.453204888780604 ], "wc_review_avg": [ 287.0, 100.28559218551786 ], "wc_reply_reviewers_avg": [ 21.2, 19.610201426808445 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6454972243679027, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4885143711692987304&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;tsinghua.edu.cn;westlake.edu.cn;ethz.ch;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "Tsinghua University;Westlake University;Swiss Federal Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.westlake.edu.cn;https://www.ethz.ch", "aff_unique_abbr": "THU;WU;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;Switzerland" }, { "title": "Initialization-Dependent Sample Complexity of Linear Predictors and Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70401", "id": "pcpjtYNJCH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/18210aa6209b9adfc97b8c17c3741d95-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pcpjtYNJCH", "openreview": "https://openreview.net/forum?id=pcpjtYNJCH", "poster": "/media/PosterPDFs/NeurIPS%202023/70401.png?t=1699890993.2198765", "slides": "https://nips.cc/virtual/2023/poster/70401", "video": "https://nips.cc/virtual/2023/poster/70401", "author_site": "Roey Magen, Ohad Shamir", "tldr": "", "abstract": "We provide several new results on the sample complexity of vector-valued linear predictors (parameterized by a matrix), and more generally neural networks. Focusing on size-independent bounds, where only the Frobenius norm distance of the parameters from some fixed reference matrix $W_0$ is controlled, we show that the sample complexity behavior can be surprisingly different than what we may expect considering the well-studied setting of scalar-valued linear predictors. This also leads to new sample complexity bounds for feed-forward neural networks, tackling some open questions in the literature, and establishing a new convex linear prediction problem that is provably learnable without uniform convergence.", "keywords": "sample complexity; learning theory; neural networks; linear predictors", "primary_area": "", "supplementary_material": "/attachment/ae938aca181c97ff67cfef79ffe74980559f070b.pdf", "author": "Roey Magen;Ohad Shamir", "authorids": "~Roey_Magen1;~Ohad_Shamir1", "gender": "M;", "homepage": ";http://www.wisdom.weizmann.ac.il/~shamiro/", "dblp": "319/2818;12/5897", "google_scholar": ";all0DHsAAAAJ", "orcid": ";", "linkedin": "https://linkedin.com/in/roey-magen;", "or_profile": "~Roey_Magen1;~Ohad_Shamir1", "aff": "Weizmann Institute of Science;Weizmann Institute", "aff_domain": "weizmann.ac.il;weizmann.ac.il", "position": "MS student;Associate Professor", "bibtex": "@inproceedings{\nmagen2023initializationdependent,\ntitle={Initialization-Dependent Sample Complexity of Linear Predictors and Neural Networks},\nauthor={Roey Magen and Ohad Shamir},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pcpjtYNJCH}\n}", "github": "", "project": "", "reviewers": "9YyA;Zmxy;xxjV;8WAD", "pdf_size": 476219, "rating": "3;6;7;7", "confidence": "5;3;3;2", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "2;2;2;4", "wc_summary": "387;196;184;84", "wc_strengths": "38;29;60;63", "wc_weaknesses": "545;386;142;20", "wc_questions": "92;71;56;130", "wc_limitations": "2;1;1;18", "wc_review": "1064;683;443;315", "wc_reply_reviewers": "0;60;129;54", "wc_reply_authors": "0;101;234;170", "reply_reviewers": "0;1;1;2", "reply_authors": "1;2;2;2", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 212.75, 109.59784441310879 ], "wc_strengths_avg": [ 47.5, 14.396180048887969 ], "wc_weaknesses_avg": [ 273.25, 204.89189222612006 ], "wc_questions_avg": [ 87.25, 27.797257058925798 ], "wc_limitations_avg": [ 5.5, 7.22841614740048 ], "wc_review_avg": [ 626.25, 285.17659002800355 ], "wc_reply_reviewers_avg": [ 60.75, 45.811434162226355 ], "wc_reply_authors_avg": [ 126.25, 86.7478385897885 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9446104285307815, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15789874728777615500&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "weizmann.ac.il;weizmann.ac.il", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Weizmann Institute of Science", "aff_unique_dep": "", "aff_unique_url": "https://www.weizmann.org.il", "aff_unique_abbr": "Weizmann", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Reducing Blackwell and Average Optimality to Discounted MDPs via the Blackwell Discount Factor", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70400", "id": "pcuC65JWAa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a4e720ce31ccd8ba747d8863e1580fa8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pcuC65JWAa", "openreview": "https://openreview.net/forum?id=pcuC65JWAa", "poster": "/media/PosterPDFs/NeurIPS%202023/70400.png?t=1701740539.7400382", "slides": "https://nips.cc/virtual/2023/poster/70400", "video": "https://nips.cc/virtual/2023/poster/70400", "author_site": "Julien Grand-Cl\u00e9ment, Marek Petrik", "tldr": "", "abstract": "We introduce the Blackwell discount factor for Markov Decision Processes (MDPs). Classical objectives for MDPs include discounted, average, and Blackwell optimality. Many existing approaches to computing average-optimal policies solve for discount-optimal policies with a discount factor close to $1$, but they only work under strong or hard-to-verify assumptions on the MDP structure such as unichain or ergodicity. We are the first to highlight the shortcomings of the classical definition of Blackwell optimality, which does not lead to simple algorithms for computing Blackwell-optimal policies and overlooks the pathological behaviors of optimal policies as regards the discount factors. To resolve this issue, in this paper, we show that when the discount factor is larger than the Blackwell discount factor $\\gamma_{\\sf bw}$, all discount-optimal policies become Blackwell- and average-optimal, and we derive a general upper bound on $\\gamma_{\\sf bw}$. Our upper bound on $\\gamma_{\\sf bw}$, parametrized by the bit-size of the rewards and transition probabilities of the MDP instance, provides the first reduction from average and Blackwell optimality to discounted optimality, without any assumptions, along with new polynomial-time algorithms. Our work brings new ideas from polynomials and algebraic numbers to the analysis of MDPs. Our results also apply to robust MDPs, enabling the first algorithms to compute robust Blackwell-optimal policies.", "keywords": "Markov Decision Process;Blackwell optimality;average optimality;robust optimization", "primary_area": "", "supplementary_material": "/attachment/4eb73394c22dcc4e0d34037e163206758f97363d.pdf", "author": "Julien Grand-Cl\u00e9ment;Marek Petrik", "authorids": "~Julien_Grand-Cl\u00e9ment1;~Marek_Petrik2", "gender": "M;", "homepage": "https://julien-grand-clement.fr/;", "dblp": "197/0112;", "google_scholar": "https://scholar.google.fr/citations?user=K_ZLzdoAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Julien_Grand-Cl\u00e9ment1;~Marek_Petrik2", "aff": "HEC Paris;", "aff_domain": "hec.fr;", "position": "Assistant Professor;", "bibtex": "@inproceedings{\ngrand-cl{\\'e}ment2023reducing,\ntitle={Reducing Blackwell and Average Optimality to Discounted {MDP}s via the Blackwell Discount Factor},\nauthor={Julien Grand-Cl{\\'e}ment and Marek Petrik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pcuC65JWAa}\n}", "github": "", "project": "", "reviewers": "TMw8;ePug;uLuR;LSPb;MXst;mmfD", "pdf_size": 2585747, "rating": "5;5;5;7;7;7", "confidence": "2;3;3;3;4;4", "soundness": "3;3;3;3;3;4", "novelty": "2;3;2;3;3;4", "presentation": "2;3;3;4;3;3", "wc_summary": "109;41;168;72;82;42", "wc_strengths": "48;44;69;126;96;59", "wc_weaknesses": "107;92;291;175;247;11", "wc_questions": "55;92;143;2;59;27", "wc_limitations": "1;32;21;9;25;5", "wc_review": "320;301;692;384;509;144", "wc_reply_reviewers": "12;15;54;35;29;13", "wc_reply_authors": "0;11;0;0;0;0", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "1;2;1;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.1666666666666665, 0.6871842709362768 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.6871842709362768 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 85.66666666666667, 43.63739476896188 ], "wc_strengths_avg": [ 73.66666666666667, 28.905977851571734 ], "wc_weaknesses_avg": [ 153.83333333333334, 95.1847618523516 ], "wc_questions_avg": [ 63.0, 45.34681172181054 ], "wc_limitations_avg": [ 15.5, 11.221259584675273 ], "wc_review_avg": [ 391.6666666666667, 172.5887855246942 ], "wc_reply_reviewers_avg": [ 26.333333333333332, 15.051762539834183 ], "wc_reply_authors_avg": [ 1.8333333333333333, 4.099457958749615 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7276068751089989, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8888895906063744434&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "hec.fr;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "HEC Paris", "aff_unique_dep": "", "aff_unique_url": "https://www.hec.edu", "aff_unique_abbr": "HEC", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "title": "Recurrent Hypernetworks are Surprisingly Strong in Meta-RL", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70399", "id": "pefAAzu8an", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c3fa3a7d50b34732c6d08f6f66380d75-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pefAAzu8an", "openreview": "https://openreview.net/forum?id=pefAAzu8an", "poster": "/media/PosterPDFs/NeurIPS%202023/70399.png?t=1701381676.7034028", "slides": "https://nips.cc/virtual/2023/poster/70399", "video": "https://nips.cc/virtual/2023/poster/70399", "author_site": "Jacob Beck, Risto Vuorio, Zheng Xiong, Shimon Whiteson", "tldr": "", "abstract": "Deep reinforcement learning (RL) is notoriously impractical to deploy due to sample inefficiency. Meta-RL directly addresses this sample inefficiency by learning to perform few-shot learning when a distribution of related tasks is available for meta-training. While many specialized meta-RL methods have been proposed, recent work suggests that end-to-end learning in conjunction with an off-the-shelf sequential model, such as a recurrent network, is a surprisingly strong baseline. However, such claims have been controversial due to limited supporting evidence, particularly in the face of prior work establishing precisely the opposite. In this paper, we conduct an empirical investigation. While we likewise find that a recurrent network can achieve strong performance, we demonstrate that the use of hypernetworks is crucial to maximizing their potential. Surprisingly, when combined with hypernetworks, the recurrent baselines that are far simpler than existing specialized methods actually achieve the strongest performance of all methods evaluated. We provide code at https://github.com/jacooba/hyper.", "keywords": "meta-RL;RL;reinforcement learning;memory;rnn;recurrent;hypernetwork;few-shot;in-context", "primary_area": "", "supplementary_material": "", "author": "Jacob Beck;Risto Vuorio;Zheng Xiong;Shimon Whiteson", "authorids": "~Jacob_Beck1;~Risto_Vuorio1;~Zheng_Xiong1;~Shimon_Whiteson1", "gender": ";;M;", "homepage": ";;http://jakebeck.com;https://vuoristo.github.io/", "dblp": "217/1483;https://dblp.uni-trier.de/pers/w/Whiteson:Shimon.html;;222/2614", "google_scholar": "F5bted4AAAAJ;;https://scholar.google.ca/citations?user=PrS_dHMAAAAJ;qCk3GFAAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Zheng_Xiong1;~Shimon_Whiteson1;~Jacob_Austin_Beck1;~Risto_Ilkka_Antero_Vuorio1", "aff": "University of Oxford;University of Oxford;Department of Computer Science, University of Oxford;Department of Computer Science, University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk;cs.ox.ac.uk;cs.ox.ac.uk", "position": "PhD student;Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nbeck2023recurrent,\ntitle={Recurrent Hypernetworks are Surprisingly Strong in Meta-{RL}},\nauthor={Jacob Beck and Risto Vuorio and Zheng Xiong and Shimon Whiteson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pefAAzu8an}\n}", "github": "", "project": "", "reviewers": "YdBr;k7Er;rCPG;NnYJ;8GEx", "pdf_size": 12597763, "rating": "4;5;6;6;7", "confidence": "3;3;3;1;4", "soundness": "2;3;4;3;2", "novelty": "2;2;3;2;3", "presentation": "2;3;4;2;3", "wc_summary": "135;149;48;60;90", "wc_strengths": "71;54;62;55;159", "wc_weaknesses": "778;84;74;163;373", "wc_questions": "166;124;21;101;23", "wc_limitations": "76;12;13;96;5", "wc_review": "1226;423;218;475;650", "wc_reply_reviewers": "461;0;0;149;252", "wc_reply_authors": "598;57;63;368;126", "reply_reviewers": "2;0;0;2;2", "reply_authors": "4;2;2;3;3", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 96.4, 39.91290518115663 ], "wc_strengths_avg": [ 80.2, 39.866778149230974 ], "wc_weaknesses_avg": [ 294.4, 264.63454045154424 ], "wc_questions_avg": [ 87.0, 57.02280245656118 ], "wc_limitations_avg": [ 40.4, 37.86607980765899 ], "wc_review_avg": [ 598.4, 342.70722198401364 ], "wc_reply_reviewers_avg": [ 172.4, 172.98392988945534 ], "wc_reply_authors_avg": [ 242.4, 210.87114548937225 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.12009611535381534, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8012227335711265742&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "ox.ac.uk;ox.ac.uk;cs.ox.ac.uk;cs.ox.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Oxford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "pgmNZQdH7R", "title": "Modulate Your Spectrum in Self-Supervised Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Whitening loss provides theoretical guarantee in avoiding feature collapse for self-supervised learning (SSL) using joint embedding architectures. One typical implementation of whitening loss is hard whitening that designs whitening transformation over embedding and imposes the loss on the whitened output. In this paper, we propose spectral transformation (ST) framework to map the spectrum of embedding to a desired distribution during forward pass, and to modulate the spectrum of embedding by implicit gradient update during backward pass. We show that whitening transformation is a special instance of ST by definition, and there exist other instances that can avoid collapse by our empirical investigation. Furthermore, we propose a new instance of ST, called IterNorm with trace loss (INTL). We theoretically prove that INTL can avoid collapse and modulate the spectrum of embedding towards an equal-eigenvalue distribution during the course of optimization. Moreover, INTL achieves 76.6\\% top-1 accuracy in linear evaluation on ImageNet using ResNet-50, which exceeds the performance of the supervised baseline, and this result is obtained by using a batch size of only 256. Comprehensive experiments show that INTL is a promising SSL method in practice. ", "keywords": "self-supervised learning;whitening;dimensional collapse;spectral transformation;iterative normalization", "primary_area": "", "supplementary_material": "/attachment/4ad4ded597ebd20fb9201345599cdffda9127362.zip", "author": "Xi Weng;Yunhao Ni;Tengwei Song;Jie Luo;Rao Muhammad Anwer;Salman Khan;Fahad Khan;Lei Huang", "authorids": "~Xi_Weng1;~Yunhao_Ni1;~Tengwei_Song1;~Jie_Luo5;~Rao_Muhammad_Anwer2;~Salman_Khan4;~Fahad_Khan1;~Lei_Huang1", "gender": "M;;F;M;;M;M;M", "homepage": "https://winci-ai.github.io;https://github.com/Musicath;;https://www.linkedin.com/in/luo-jie-32847334;;https://salman-h-khan.github.io/;https://sites.google.com/view/fahadkhans/home;https://huangleibuaa.github.io/", "dblp": ";;258/5053;29/186-4;;32/11535-1;05/8618;18/1763-15", "google_scholar": "https://scholar.google.com.hk/citations?user=kT4c594AAAAJ;;https://scholar.google.com/citations?view_op=list_works;gwJTOVgAAAAJ;;https://scholar.google.es/citations?user=M59O9lkAAAAJ;zvaeYnUAAAAJ;https://scholar.google.com.hk/citations?user=yTshbKkAAAAJ", "orcid": ";0009-0000-7637-1447;;0000-0002-4157-9931;;0000-0002-9502-1749;;", "linkedin": ";;;luo-jie-32847334;;;;", "or_profile": "~Xi_Weng1;~Yunhao_Ni1;~Tengwei_Song1;~Jie_Luo5;~Rao_Muhammad_Anwer2;~Salman_Khan4;~Fahad_Khan1;~Lei_Huang1", "aff": "Beihang University ;Beihang University;Beihang University;Beihang University;;Australian National University;Link\u00f6ping University;Beihang University", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;;anu.edu.au;liu.se;buaa.edu.cn", "position": "MS student;Undergrad student;PhD student;Associate Professor;;Lecturer;Associate Professor;Associate Professor", "bibtex": "@misc{\nweng2023modulate,\ntitle={Modulate Your Spectrum in Self-Supervised Learning},\nauthor={Xi Weng and Yunhao Ni and Tengwei Song and Jie Luo and Rao Muhammad Anwer and Salman Khan and Fahad Khan and Lei Huang},\nyear={2023},\nurl={https://openreview.net/forum?id=pgmNZQdH7R}\n}", "github": "", "project": "", "reviewers": "Ckne;Fnki;SEtq;tQK6;QdWo;TM8b", "site": "https://openreview.net/forum?id=pgmNZQdH7R", "pdf_size": 0, "rating": "3;5;5;5;6;6", "confidence": "3;5;4;3;2;3", "soundness": "3;2;3;3;3;3", "novelty": "2;2;3;4;3;2", "presentation": "3;3;2;2;4;4", "wc_summary": "56;97;47;109;59;62", "wc_strengths": "95;112;64;41;31;89", "wc_weaknesses": "331;177;134;113;108;191", "wc_questions": "38;102;4;47;4;5", "wc_limitations": "1;1;18;14;4;8", "wc_review": "521;489;267;324;206;355", "wc_reply_reviewers": "0;0;0;53;0;0", "wc_reply_authors": "56;0;0;48;0;0", "reply_reviewers": "0;0;0;1;0;0", "reply_authors": "2;1;1;2;1;1", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.7453559924999298 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 71.66666666666667, 22.88861920013719 ], "wc_strengths_avg": [ 72.0, 29.223278392404914 ], "wc_weaknesses_avg": [ 175.66666666666666, 75.9466186797601 ], "wc_questions_avg": [ 33.333333333333336, 35.2309838384088 ], "wc_limitations_avg": [ 7.666666666666667, 6.446359868604572 ], "wc_review_avg": [ 360.3333333333333, 112.72927254661448 ], "wc_reply_reviewers_avg": [ 8.833333333333334, 19.751933801248143 ], "wc_reply_authors_avg": [ 17.333333333333332, 24.62158041682585 ], "reply_reviewers_avg": [ 0.16666666666666666, 0.372677996249965 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.1767766952966369, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6464286174143687705&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0;1;2;0", "aff_unique_norm": "Beihang University;Australian National University;Link\u00f6ping University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.anu.edu.au;https://www.liu.se", "aff_unique_abbr": "BUAA;ANU;LiU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;2;0", "aff_country_unique": "China;Australia;Sweden" }, { "title": "FedFed: Feature Distillation against Data Heterogeneity in Federated Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70398", "id": "phnGilhPH8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bdcdf38389d7fcefc73c4c3720217155-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=phnGilhPH8", "openreview": "https://openreview.net/forum?id=phnGilhPH8", "poster": "/media/PosterPDFs/NeurIPS%202023/70398.png?t=1702357965.1522229", "slides": "https://nips.cc/virtual/2023/poster/70398", "video": "https://nips.cc/virtual/2023/poster/70398", "author_site": "Zhiqin Yang, Yonggang Zhang, Yu Zheng, Xinmei Tian, Hao Peng, Tongliang Liu, Bo Han", "tldr": "", "abstract": "Federated learning (FL) typically faces data heterogeneity, i.e., distribution shifting among clients. \nSharing clients' information has shown great potentiality in mitigating data heterogeneity, yet incurs a dilemma in preserving privacy and promoting model performance. To alleviate the dilemma, we raise a fundamental question: Is it possible to share partial features in the data to tackle data heterogeneity?\nIn this work, we give an affirmative answer to this question by proposing a novel approach called **Fed**erated **Fe**ature **d**istillation (FedFed).\nSpecifically, FedFed partitions data into performance-sensitive features (i.e., greatly contributing to model performance) and performance-robust features (i.e., limitedly contributing to model performance).\nThe performance-sensitive features are globally shared to mitigate data heterogeneity, while the performance-robust features are kept locally.\nFedFed enables clients to train models over local and shared data. Comprehensive experiments demonstrate the efficacy of FedFed in promoting model performance.", "keywords": "Federated Learning;Data Heterogeneity", "primary_area": "", "supplementary_material": "", "author": "Zhiqin Yang;Yonggang Zhang;Yu Zheng;Xinmei Tian;Hao Peng;Tongliang Liu;Bo Han", "authorids": "~Zhiqin_Yang1;~Yonggang_Zhang1;~Yu_Zheng10;~Xinmei_Tian1;~Hao_Peng10;~Tongliang_Liu1;~Bo_Han1", "gender": "M;F;F;M;M;M;M", "homepage": "https://yonggangzhangben.github.io/index.html;https://yuzhengcuhk.github.io;https://faculty.ustc.edu.cn/tianxinmei1/zh_CN/index.htm;https://dblp.org/pid/69/7742-2.html;https://tongliang-liu.github.io/;https://bhanml.github.io/;https://visitworld123.github.io/", "dblp": "27/6859-3;;03/5204-1;;150/6667;241/0472-3;251/6782", "google_scholar": "XSbEr98AAAAJ;fH3uUgYAAAAJ;https://scholar.google.com.au/citations?hl=zh-CN;;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;nTNjqHwAAAAJ;DSjGPu0AAAAJ", "orcid": "0000-0002-4080-7592;;0000-0002-5952-8753;;;;", "linkedin": ";;;;;;", "or_profile": "~Yonggang_Zhang1;~Yu_Zheng10;~Xinmei_Tian1;~Hao_Peng10;~Tongliang_Liu1;~bo_han2;~Zhiqin_Brian_Yang1", "aff": "Hong Kong Baptist University;Chinese University of Hong Kong;University of Science and Technology of China;Wuhan University;University of Sydney;RIKEN;Beihang University", "aff_domain": "hkbu.edu.hk;cuhk.hk;ustc.edu.cn;whu.edu.cn;sydney.edu.au;riken.jp;buaa.edu.cn", "position": "Postdoc;PhD student;Full Professor;Undergrad student;Lecturer;Adjunct Scientist;MS student", "bibtex": "@inproceedings{\nyang2023fedfed,\ntitle={FedFed: Feature Distillation against Data Heterogeneity in Federated Learning},\nauthor={Zhiqin Yang and Yonggang Zhang and Yu Zheng and Xinmei Tian and Hao Peng and Tongliang Liu and Bo Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=phnGilhPH8}\n}", "github": "", "project": "", "reviewers": "ezSw;bAmN;xchj;LGae;wKGY", "pdf_size": 2611027, "rating": "5;5;5;6;7", "confidence": "4;3;2;3;4", "soundness": "2;3;3;2;2", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;2", "wc_summary": "87;54;54;65;40", "wc_strengths": "32;48;80;57;33", "wc_weaknesses": "231;319;65;29;41", "wc_questions": "149;104;2;466;39", "wc_limitations": "18;14;1;101;48", "wc_review": "517;539;202;718;201", "wc_reply_reviewers": "92;36;0;26;134", "wc_reply_authors": "46;15;0;29;627", "reply_reviewers": "1;1;0;1;3", "reply_authors": "2;2;1;2;4", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 60.0, 15.658863304850707 ], "wc_strengths_avg": [ 50.0, 17.69745744450315 ], "wc_weaknesses_avg": [ 137.0, 116.63961591157612 ], "wc_questions_avg": [ 152.0, 165.03211808614708 ], "wc_limitations_avg": [ 36.4, 35.78044158475409 ], "wc_review_avg": [ 435.4, 203.30922261422378 ], "wc_reply_reviewers_avg": [ 57.6, 48.586417855199 ], "wc_reply_authors_avg": [ 143.4, 242.27802211508993 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4677071733467427, "gs_citation": 87, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18238861602351103296&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "hkbu.edu.hk;cuhk.hk;ustc.edu.cn;whu.edu.cn;sydney.edu.au;riken.jp;buaa.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;3;4;5;6", "aff_unique_norm": "Hong Kong Baptist University;Chinese University of Hong Kong;University of Science and Technology of China;Wuhan University;University of Sydney;RIKEN;Beihang University", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.hkbu.edu.hk;https://www.cuhk.edu.hk;http://www.ustc.edu.cn;http://www.whu.edu.cn/;https://www.sydney.edu.au;https://www.riken.jp;http://www.buaa.edu.cn/", "aff_unique_abbr": "HKBU;CUHK;USTC;WHU;USYD;RIKEN;BUAA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;1;2;0", "aff_country_unique": "China;Australia;Japan" }, { "title": "Learning Probabilistic Symmetrization for Architecture Agnostic Equivariance", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70397", "id": "phnN1eu5AX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3b5c7c9c5c7bd77eb73d0baec7a07165-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=phnN1eu5AX", "openreview": "https://openreview.net/forum?id=phnN1eu5AX", "poster": "/media/PosterPDFs/NeurIPS%202023/70397.png?t=1701961044.0791948", "slides": "https://nips.cc/virtual/2023/poster/70397", "video": "https://nips.cc/virtual/2023/poster/70397", "author_site": "Jinwoo Kim, Dat Nguyen, Ayhan Suleymanzade, Hyeokjun An, Seunghoon Hong", "tldr": "", "abstract": "We present a novel framework to overcome the limitations of equivariant architectures in learning functions with group symmetries. In contrary to equivariant architectures, we use an arbitrary base model such as an MLP or a transformer and symmetrize it to be equivariant to the given group by employing a small equivariant network that parameterizes the probabilistic distribution underlying the symmetrization. The distribution is end-to-end trained with the base model which can maximize performance while reducing sample complexity of symmetrization. We show that this approach ensures not only equivariance to given group but also universal approximation capability in expectation. We implement our method on various base models, including patch-based transformers that can be initialized from pretrained vision transformers, and test them for a wide range of symmetry groups including permutation and Euclidean groups and their combinations. Empirical tests show competitive results against tailored equivariant architectures, suggesting the potential for learning equivariant functions for diverse groups using a non-equivariant universal base architecture. We further show evidence of enhanced learning in symmetric modalities, like graphs, when pretrained from non-symmetric modalities, like vision. Code is available at https://github.com/jw9730/lps.", "keywords": "equivariant machine learning;transformers;graphs;general-purpose architectures", "primary_area": "", "supplementary_material": "/attachment/d3c130d4e77d4d71bd5ebab8a2ac4b6d12725aa6.pdf", "author": "Jinwoo Kim;Dat Tien Nguyen;Ayhan Suleymanzade;Hyeokjun An;Seunghoon Hong", "authorids": "~Jinwoo_Kim4;~Dat_Tien_Nguyen2;~Ayhan_Suleymanzade1;~Hyeokjun_An1;~Seunghoon_Hong2", "gender": "M;M;M;M;M", "homepage": "https://jw9730.github.io/;;;https://hyeokjun-an.github.io/hyeokjun-an/Hyeokjun-An;https://maga33.github.io/", "dblp": ";;;;142/3014.html", "google_scholar": "kSJAiE4AAAAJ;;;;hvr3ALkAAAAJ", "orcid": ";;;;", "linkedin": "jw9730/;tiendatnguyen-vision/;ayhansuleymanzade/;;seunghoon-hong-194489a4/", "or_profile": "~Jinwoo_Kim4;~Dat_Tien_Nguyen2;~Ayhan_Suleymanzade1;~Hyeokjun_An1;~Seunghoon_Hong1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.edu;kaist.edu;", "position": "PhD student;Undergrad student;Undergrad student;MS student;", "bibtex": "@inproceedings{\nkim2023learning,\ntitle={Learning Probabilistic Symmetrization for Architecture Agnostic Equivariance},\nauthor={Jinwoo Kim and Dat Tien Nguyen and Ayhan Suleymanzade and Hyeokjun An and Seunghoon Hong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=phnN1eu5AX}\n}", "github": "", "project": "", "reviewers": "GZXj;wgQS;Ry8Q;SgC2;SdiQ", "pdf_size": 1382077, "rating": "5;6;6;6;7", "confidence": "3;3;3;3;4", "soundness": "3;3;4;3;3", "novelty": "2;2;3;3;2", "presentation": "3;3;3;3;4", "wc_summary": "36;58;95;57;53", "wc_strengths": "52;30;53;62;95", "wc_weaknesses": "322;221;227;179;115", "wc_questions": "81;13;47;188;335", "wc_limitations": "53;7;7;13;29", "wc_review": "544;329;429;499;627", "wc_reply_reviewers": "18;22;0;15;35", "wc_reply_authors": "0;121;86;79;373", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;2;2;2;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 59.8, 19.30181338631166 ], "wc_strengths_avg": [ 58.4, 21.114923632350653 ], "wc_weaknesses_avg": [ 212.8, 67.67687936067975 ], "wc_questions_avg": [ 132.8, 116.90064157223432 ], "wc_limitations_avg": [ 21.8, 17.55448660599335 ], "wc_review_avg": [ 485.6, 101.30271467241143 ], "wc_reply_reviewers_avg": [ 18.0, 11.29601699715435 ], "wc_reply_authors_avg": [ 131.8, 126.92580509888444 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7905694150420948, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7767318490197144793&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "kaist.ac.kr;kaist.ac.kr;kaist.edu;kaist.edu;", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Kernelized Reinforcement Learning with Order Optimal Regret Bounds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70396", "id": "pirH9ycaNg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0d17d033059bacd127f25ab28784f829-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pirH9ycaNg", "openreview": "https://openreview.net/forum?id=pirH9ycaNg", "poster": "/media/PosterPDFs/NeurIPS%202023/70396.png?t=1702225669.4163928", "slides": "https://nips.cc/virtual/2023/poster/70396", "video": "https://nips.cc/virtual/2023/poster/70396", "author_site": "Sattar Vakili, Julia Olkhovskaya", "tldr": "", "abstract": "Modern reinforcement learning (RL) has shown empirical success in various real world settings with complex models and large state-action spaces. The existing analytical results, however, typically focus on settings with a small number of state-actions or simple models such as linearly modeled state-action value functions. To derive RL policies that efficiently handle large state-action spaces with more general value functions, some recent works have considered nonlinear function approximation using kernel ridge regression. We propose $\\pi$-KRVI, an optimistic modification of least-squares value iteration, when the action-value function is represented by an RKHS. We prove the first order-optimal regret guarantees under a general setting. Our results show a significant polynomial in the number of episodes improvement over the state of the art. In particular, with highly non-smooth kernels (such as Neural Tangent kernel or some Mat\u00e9rn kernels) the existing results lead to trivial (superlinear in the number of episodes) regret bounds. We show a sublinear regret bound that is order optimal in the cases where a lower bound on regret is known (which includes the kernels mentioned above).", "keywords": "Reinforcement Learning;Kernel ridge regression;Gaussian processes;LSVI", "primary_area": "", "supplementary_material": "", "author": "Sattar Vakili;Julia Olkhovskaya", "authorids": "~Sattar_Vakili1;~Julia_Olkhovskaya1", "gender": ";F", "homepage": "https://sattar-vakili.github.io/;https://sites.google.com/view/julia-olkhovskaya/home", "dblp": "140/5473;", "google_scholar": "N9xs8w0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Sattar_Vakili1;~Julia_Olkhovskaya1", "aff": "MediaTek Research;Vrije Universiteit Amsterdam", "aff_domain": "mtkresearch.com;vu.nl", "position": "Principal AI Research Manager;Postdoc", "bibtex": "@inproceedings{\nvakili2023kernelized,\ntitle={Kernelized Reinforcement Learning with Order Optimal Regret Bounds},\nauthor={Sattar Vakili and Julia Olkhovskaya},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pirH9ycaNg}\n}", "github": "", "project": "", "reviewers": "qSio;o5D2;2xmH;fuZF", "pdf_size": 453875, "rating": "5;6;6;7", "confidence": "3;4;3;4", "soundness": "3;1;3;4", "novelty": "2;2;3;4", "presentation": "3;2;3;4", "wc_summary": "69;86;109;92", "wc_strengths": "98;57;48;42", "wc_weaknesses": "166;418;298;37", "wc_questions": "138;16;11;31", "wc_limitations": "2;1;11;5", "wc_review": "473;578;477;207", "wc_reply_reviewers": "260;212;0;32", "wc_reply_authors": "559;400;0;0", "reply_reviewers": "1;2;0;1", "reply_authors": "2;3;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 89.0, 14.300349646075091 ], "wc_strengths_avg": [ 61.25, 21.878928218722233 ], "wc_weaknesses_avg": [ 229.75, 142.57695290614117 ], "wc_questions_avg": [ 49.0, 51.90857347298228 ], "wc_limitations_avg": [ 4.75, 3.897114317029974 ], "wc_review_avg": [ 433.75, 137.5088633506946 ], "wc_reply_reviewers_avg": [ 126.0, 111.87493016757597 ], "wc_reply_authors_avg": [ 239.75, 246.25228425336485 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5153852524241743240&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mtkresearch.com;vu.nl", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "MediaTek Inc.;Vrije Universiteit Amsterdam", "aff_unique_dep": "Research;", "aff_unique_url": "https://www.mediatek.com/;https://www.vu.nl", "aff_unique_abbr": "MediaTek;VU Amsterdam", "aff_campus_unique_index": "0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;1", "aff_country_unique": "China;Netherlands" }, { "title": "Wasserstein Quantum Monte Carlo: A Novel Approach for Solving the Quantum Many-Body Schr\u00f6dinger Equation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70395", "id": "pjSzKhSrfs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c8450235f227f136242f774b2799581f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pjSzKhSrfs", "openreview": "https://openreview.net/forum?id=pjSzKhSrfs", "poster": "/media/PosterPDFs/NeurIPS%202023/70395.png?t=1702324664.5834334", "slides": "https://nips.cc/virtual/2023/poster/70395", "video": "https://nips.cc/virtual/2023/poster/70395", "author_site": "Kirill Neklyudov, Jannes Nys, Luca Thiede, Juan Carrasquilla, Qiang Liu, Max Welling, Alireza Makhzani", "tldr": "", "abstract": "Solving the quantum many-body Schr\u00f6dinger equation is a fundamental and challenging problem in the fields of quantum physics, quantum chemistry, and material sciences. One of the common computational approaches to this problem is Quantum Variational Monte Carlo (QVMC), in which ground-state solutions are obtained by minimizing the energy of the system within a restricted family of parameterized wave functions. Deep learning methods partially address the limitations of traditional QVMC by representing a rich family of wave functions in terms of neural networks. However, the optimization objective in QVMC remains notoriously hard to minimize and requires second-order optimization methods such as natural gradient. In this paper, we first reformulate energy functional minimization in the space of Born distributions corresponding to particle-permutation (anti-)symmetric wave functions, rather than the space of wave functions. We then interpret QVMC as the Fisher--Rao gradient flow in this distributional space, followed by a projection step onto the variational manifold. This perspective provides us with a principled framework to derive new QMC algorithms, by endowing the distributional space with better metrics, and following the projected gradient flow induced by those metrics. More specifically, we propose \"Wasserstein Quantum Monte Carlo\" (WQMC), which uses the gradient flow induced by the Wasserstein metric, rather than the Fisher--Rao metric, and corresponds to *transporting* the probability mass, rather than *teleporting* it. We demonstrate empirically that the dynamics of WQMC results in faster convergence to the ground state of molecular systems.", "keywords": "Quantum Monte Carlo;Schr\u00f6dinger equation;Wasserstein Fisher-Rao gradient flow", "primary_area": "", "supplementary_material": "/attachment/c313521f4260c070816a8548456b48ffcc1c3c05.zip", "author": "Kirill Neklyudov;Jannes Nys;Luca Thiede;Juan Felipe Carrasquilla Alvarez;qiang liu;Max Welling;Alireza Makhzani", "authorids": "~Kirill_Neklyudov1;jannes.nys@epfl.ch;~Luca_Thiede1;carrasqu@vectorinstitute.ai;~qiang_liu4;~Max_Welling1;~Alireza_Makhzani1", "gender": "M;;M;;;M;", "homepage": "https://necludov.github.io/;;;;;https://staff.fnwi.uva.nl/m.welling/;http://www.alireza.ai/", "dblp": "195/1093;;241/6690;;;16/2286;122/5126.html", "google_scholar": "https://scholar.google.ru/citations?user=eOttYWgAAAAJ;;https://scholar.google.com/citations?view_op=list_works;;;https://scholar.google.nl/citations?user=8200InoAAAAJ;B0KVWJEAAAAJ", "orcid": ";;;;;0000-0003-1484-2121;", "linkedin": ";;;;;;", "or_profile": "~Kirill_Neklyudov1;jannes.nys@epfl.ch;~Luca_Thiede1;carrasqu@vectorinstitute.ai;~qiang_liu4;~Max_Welling1;~Alireza_Makhzani1", "aff": "Vector Institute;;Vector Institute;;;University of Amsterdam;Vector Institute", "aff_domain": "vectorinstitute.ai;;vectorinstitute.ai;;;uva.nl;vectorinstitute.ai", "position": "Postdoc;;PhD student;;;Full Professor;Researcher", "bibtex": "@inproceedings{\nneklyudov2023wasserstein,\ntitle={Wasserstein Quantum Monte Carlo: A Novel Approach for Solving the Quantum Many-Body Schr\\\"odinger Equation},\nauthor={Kirill Neklyudov and Jannes Nys and Luca Thiede and Juan Felipe Carrasquilla Alvarez and qiang liu and Max Welling and Alireza Makhzani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pjSzKhSrfs}\n}", "github": "", "project": "", "reviewers": "n7Uw;Qbaw;Nk4c;f58M;wiqn", "pdf_size": 1388918, "rating": "5;6;7;7;8", "confidence": "1;3;3;3;4", "soundness": "3;2;4;3;3", "novelty": "3;3;3;3;4", "presentation": "3;3;4;3;4", "wc_summary": "59;66;117;98;50", "wc_strengths": "31;71;43;24;36", "wc_weaknesses": "23;208;72;19;30", "wc_questions": "46;29;192;40;56", "wc_limitations": "8;4;61;1;43", "wc_review": "167;378;485;182;215", "wc_reply_reviewers": "34;337;131;9;13", "wc_reply_authors": "0;450;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 78.0, 25.337718918639855 ], "wc_strengths_avg": [ 41.0, 16.235762994081924 ], "wc_weaknesses_avg": [ 70.4, 71.35432712877335 ], "wc_questions_avg": [ 72.6, 60.33771623122639 ], "wc_limitations_avg": [ 23.4, 24.137936945812083 ], "wc_review_avg": [ 285.4, 124.96495508741641 ], "wc_reply_reviewers_avg": [ 104.8, 124.27453480098005 ], "wc_reply_authors_avg": [ 90.0, 180.0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.9207368843792512, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13092765661642914686&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "vectorinstitute.ai;;vectorinstitute.ai;;;uva.nl;vectorinstitute.ai", "author_num": 7, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Vector Institute;University of Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://vectorinstitute.ai/;https://www.uva.nl", "aff_unique_abbr": "Vector Institute;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Canada;Netherlands" }, { "title": "TpuGraphs: A Performance Prediction Dataset on Large Tensor Computational Graphs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73463", "id": "plAix1NxhU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ded1a89e2b3b925444ada973af66336e-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=plAix1NxhU", "openreview": "https://openreview.net/forum?id=plAix1NxhU", "poster": "/media/PosterPDFs/NeurIPS%202023/73463.png?t=1702008901.5427425", "slides": "https://nips.cc/virtual/2023/poster/73463", "video": "https://nips.cc/virtual/2023/poster/73463", "author_site": "Mangpo Phothilimthana, Sami Abu-El-Haija, Kaidi Cao, Bahare Fatemi, Michael Burrows, Charith Mendis, Bryan Perozzi", "tldr": "", "abstract": "Precise hardware performance models play a crucial role in code optimizations. They can assist compilers in making heuristic decisions or aid autotuners in identifying the optimal configuration for a given program. For example, the autotuner for XLA, a machine learning compiler, discovered 10\u201320\\% speedup on state-of-the-art models serving substantial production traffic at Google. Although there exist a few datasets for program performance prediction, they target small sub-programs such as basic blocks or kernels. This paper introduces TpuGraphs, a performance prediction dataset on full tensor programs, represented as computational graphs, running on Tensor Processing Units (TPUs). Each graph in the dataset represents the main computation of a machine learning workload, e.g., a training epoch or an inference step. Each data sample contains a computational graph, a compilation configuration, and the execution time of the graph when compiled with the configuration. The graphs in the dataset are collected from open-source machine learning programs, featuring popular model architectures (e.g., ResNet, EfficientNet, Mask R-CNN, and Transformer). TpuGraphs provides 25x more graphs than the largest graph property prediction dataset (with comparable graph sizes), and 770x larger graphs on average compared to existing performance prediction datasets on machine learning programs. This graph-level prediction task on large graphs introduces new challenges in learning, ranging from scalability, training efficiency, to model quality.", "keywords": "graph dataset;graph neural networks;compilers;performance prediction", "primary_area": "", "supplementary_material": "", "author": "Phitchaya Mangpo Phothilimthana;Sami Abu-El-Haija;Kaidi Cao;Bahare Fatemi;Michael Burrows;Charith Mendis;Bryan Perozzi", "authorids": "~Phitchaya_Mangpo_Phothilimthana1;~Sami_Abu-El-Haija1;~Kaidi_Cao1;~Bahare_Fatemi1;~Michael_Burrows1;~Charith_Mendis1;~Bryan_Perozzi1", "gender": "F;M;M;F;;M;", "homepage": "https://mangpo.net/;http://www.haija.org;https://ai.stanford.edu/~kaidicao/;;https://en.wikipedia.org/wiki/Michael_Burrows;https://charithmendis.com;http://www.perozzi.net/", "dblp": "127/3128;127/6620;203/8207;;;163/3175;91/10813", "google_scholar": "7Fxbm0AAAAAJ;t80qlTcAAAAJ;https://scholar.google.com.hk/citations?user=4Zw1PJ8AAAAJ;;;utZ3JYUAAAAJ;rZgbMs4AAAAJ", "orcid": ";;;;;0000-0002-8140-2321;", "linkedin": ";samihaija/;;bahare-fatemi-b0049179/;;charith-mendis-36650728/;", "or_profile": "~Phitchaya_Mangpo_Phothilimthana1;~Sami_Abu-El-Haija1;~Kaidi_Cao1;~Bahare_Fatemi1;~Michael_Burrows1;~Charith_Mendis1;~Bryan_Perozzi1", "aff": "Google;Research, Google;Stanford University;Google;Google;University of Illinois, Urbana Champaign;Google", "aff_domain": "google.com;research.google.com;stanford.edu;google.com;google.com;illinois.edu;google.com", "position": "Researcher;Research Scientist;PhD student;Researcher;Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nphothilimthana2023tpugraphs,\ntitle={TpuGraphs: A Performance Prediction Dataset on Large Tensor Computational Graphs},\nauthor={Phitchaya Mangpo Phothilimthana and Sami Abu-El-Haija and Kaidi Cao and Bahare Fatemi and Michael Burrows and Charith Mendis and Bryan Perozzi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=plAix1NxhU}\n}", "github": "", "project": "", "reviewers": "2Nc2;fGu5;jdZ8;cC99;h3jB", "pdf_size": 570681, "rating": "5;5;7;7;8", "confidence": "4;3;4;3;3", "wc_summary_and_contributions": "87;79;82;114;127", "wc_strengths": "42;40;296;81;126", "wc_improvement": "44;6;332;157;99", "wc_limitations": "31;176;270;183;121", "wc_correctness": "26;5;13;8;16", "wc_clarity": "6;5;39;11;57", "wc_relation_to_prior_work": "62;9;60;23;11", "wc_documentation": "19;10;9;56;14", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "318;331;1102;634;572", "wc_reply_reviewers": "88;0;0;0;259", "wc_reply_authors": "523;855;482;780;675", "reply_reviewers": "1;0;0;0;1", "reply_authors": "2;2;1;1;1", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 97.8, 19.156200040717888 ], "wc_strengths_avg": [ 117.0, 94.83881062096889 ], "wc_improvement_avg": [ 127.6, 114.22539122279248 ], "wc_limitations_avg": [ 156.2, 78.70806820142393 ], "wc_correctness_avg": [ 13.6, 7.28285658241325 ], "wc_clarity_avg": [ 23.6, 20.819221887476967 ], "wc_relation_to_prior_work_avg": [ 33.0, 23.366642891095847 ], "wc_documentation_avg": [ 21.6, 17.556765077883796 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 591.4, 284.76629013982676 ], "wc_reply_reviewers_avg": [ 69.4, 100.74045860526941 ], "wc_reply_authors_avg": [ 663.0, 143.56740577164442 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.2721655269759087, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6985686274156473689&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "google.com;research.google.com;stanford.edu;google.com;google.com;illinois.edu;google.com", "author_num": 7, "aff_unique_index": "0;0;1;0;0;2;0", "aff_unique_norm": "Google;Stanford University;University of Illinois Urbana-Champaign", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.stanford.edu;https://illinois.edu", "aff_unique_abbr": "Google;Stanford;UIUC", "aff_campus_unique_index": "0;0;1;0;0;2;0", "aff_campus_unique": "Mountain View;Stanford;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70394", "id": "ppJuFSOAnM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1a87980b9853e84dfb295855b425c262-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ppJuFSOAnM", "openreview": "https://openreview.net/forum?id=ppJuFSOAnM", "poster": "/media/PosterPDFs/NeurIPS%202023/70394.png?t=1701261035.429292", "slides": "https://nips.cc/virtual/2023/poster/70394", "video": "https://nips.cc/virtual/2023/poster/70394", "author_site": "Zhengyi Wang, Cheng Lu, Yikai Wang, Fan Bao, Chongxuan LI, Hang Su, Jun Zhu", "tldr": "", "abstract": "Score distillation sampling (SDS) has shown great promise in text-to-3D generation by distilling pretrained large-scale text-to-image diffusion models, but suffers from over-saturation, over-smoothing, and low-diversity problems. In this work, we propose to model the 3D parameter as a random variable instead of a constant as in SDS and present *variational score distillation* (VSD), a principled particle-based variational framework to explain and address the aforementioned issues in text-to-3D generation. We show that SDS is a special case of VSD and leads to poor samples with both small and large CFG weights. In comparison, VSD works well with various CFG weights as ancestral sampling from diffusion models and simultaneously improves the diversity and sample quality with a common CFG weight (i.e., 7.5). We further present various improvements in the design space for text-to-3D such as distillation time schedule and density initialization, which are orthogonal to the distillation algorithm yet not well explored. Our overall approach, dubbed *ProlificDreamer*, can generate high rendering resolution (i.e., 512$\\times$512) and high-fidelity NeRF with rich structure and complex effects (e.g., smoke and drops). Further, initialized from NeRF, meshes fine-tuned by VSD are meticulously detailed and photo-realistic.", "keywords": "diffusion models;text to 3D", "primary_area": "", "supplementary_material": "/attachment/79a6d817a5ae7c4cd256cc63e8dbc119cf0bbd0d.zip", "author": "Zhengyi Wang;Cheng Lu;Yikai Wang;Fan Bao;Chongxuan Li;Hang Su;Jun Zhu", "authorids": "~Zhengyi_Wang1;~Cheng_Lu5;~Yikai_Wang2;~Fan_Bao1;~Chongxuan_Li1;~Hang_Su3;~Jun_Zhu2", "gender": "M;M;M;M;M;M;M", "homepage": "https://thuwzy.github.io;https://luchengthu.github.io/;https://yikaiw.github.io/;https://baofff.github.io/;http://ml.cs.tsinghua.edu.cn/~chongxuan;http://ml.cs.tsinghua.edu.cn/~jun;", "dblp": ";91/1482-11;85/9555-1;71/3877;161/9965;50/2644-1;26/5371-6", "google_scholar": "dtuPuRQAAAAJ;vPE9VRoAAAAJ;MnW5aegAAAAJ;;UKMcQn4AAAAJ;axsP38wAAAAJ;dxN1_X0AAAAJ", "orcid": ";;;;0000-0002-0912-9076;;", "linkedin": ";;;;;;", "or_profile": "~Zhengyi_Wang1;~Cheng_Lu5;~Yikai_Wang2;~Fan_Bao1;~Chongxuan_Li1;~Jun_Zhu2;~Hang_Su2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Renmin University of China;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;ruc.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;Postdoc;PhD student;Assistant Professor;Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023prolificdreamer,\ntitle={ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation},\nauthor={Zhengyi Wang and Cheng Lu and Yikai Wang and Fan Bao and Chongxuan Li and Hang Su and Jun Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ppJuFSOAnM}\n}", "github": "", "project": "", "reviewers": "wz2D;eq7N;Gck3;krEH;W2L2", "pdf_size": 15518119, "rating": "6;7;7;7;7", "confidence": "3;5;3;4;4", "soundness": "3;3;4;3;3", "novelty": "4;3;4;4;3", "presentation": "4;2;3;3;3", "wc_summary": "21;61;80;106;110", "wc_strengths": "199;63;56;132;108", "wc_weaknesses": "386;235;54;482;335", "wc_questions": "147;6;5;83;177", "wc_limitations": "67;6;6;27;34", "wc_review": "820;371;201;830;764", "wc_reply_reviewers": "0;0;0;21;200", "wc_reply_authors": "0;0;0;44;170", "reply_reviewers": "0;0;0;1;1", "reply_authors": "1;1;1;2;2", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 75.6, 32.62269148920733 ], "wc_strengths_avg": [ 111.6, 52.00230764110377 ], "wc_weaknesses_avg": [ 298.4, 145.9268309804609 ], "wc_questions_avg": [ 83.6, 70.63030511048356 ], "wc_limitations_avg": [ 28.0, 22.47665455533808 ], "wc_review_avg": [ 597.2, 260.6909281121996 ], "wc_reply_reviewers_avg": [ 44.2, 78.32343199834901 ], "wc_reply_authors_avg": [ 42.8, 65.84345069936721 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5345224838248488, "gs_citation": 887, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8357509567185503962&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;ruc.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Tsinghua University;Renmin University of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "THU;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Distributional Pareto-Optimal Multi-Objective Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70393", "id": "prIwYTU9PV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/32285dd184dbfc33cb2d1f0db53c23c5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=prIwYTU9PV", "openreview": "https://openreview.net/forum?id=prIwYTU9PV", "poster": "/media/PosterPDFs/NeurIPS%202023/70393.png?t=1702050756.088681", "slides": "https://nips.cc/virtual/2023/poster/70393", "video": "https://nips.cc/virtual/2023/poster/70393", "author_site": "Xin-Qiang Cai, Pushi Zhang, Li Zhao, Jiang Bian, Masashi Sugiyama, Ashley Llorens", "tldr": "", "abstract": "Multi-objective reinforcement learning (MORL) has been proposed to learn control policies over multiple competing objectives with each possible preference over returns. However, current MORL algorithms fail to account for distributional preferences over the multi-variate returns, which are particularly important in real-world scenarios such as autonomous driving. To address this issue, we extend the concept of Pareto-optimality in MORL into distributional Pareto-optimality, which captures the optimality of return distributions, rather than the expectations. Our proposed method, called Distributional Pareto-Optimal Multi-Objective Reinforcement Learning~(DPMORL), is capable of learning distributional Pareto-optimal policies that balance multiple objectives while considering the return uncertainty. We evaluated our method on several benchmark problems and demonstrated its effectiveness in discovering distributional Pareto-optimal policies and satisfying diverse distributional preferences compared to existing MORL methods.", "keywords": "Multi-Objective Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Xin-Qiang Cai;Pushi Zhang;Li Zhao;Jiang Bian;Masashi Sugiyama;Ashley Juan Llorens", "authorids": "~Xin-Qiang_Cai1;~Pushi_Zhang1;~Li_Zhao1;~Jiang_Bian1;~Masashi_Sugiyama1;~Ashley_Juan_Llorens1", "gender": "M;;F;M;M;M", "homepage": "https://caixq1996.github.io/;https://zpschang.github.io/;https://www.microsoft.com/en-us/research/people/lizo/;https://sites.google.com/view/jiangbian;http://www.ms.k.u-tokyo.ac.jp/sugi/;", "dblp": "248/8034.html;288/4226;97/4708-7;09/851-2.html;35/1228;", "google_scholar": "rtMUMooAAAAJ;_DLMSkIAAAAJ;b-LJkLQAAAAJ;pZBEnY8AAAAJ;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ;", "orcid": ";;;0000-0002-9472-600X;0000-0001-6658-6743;", "linkedin": ";;;jbian/;;ashley-soulstice-llorens/", "or_profile": "~Xin-Qiang_Cai1;~Pushi_Zhang1;~Li_Zhao1;~Jiang_Bian1;~Masashi_Sugiyama1;~Ashley_Juan_Llorens1", "aff": "The University of Tokyo;Microsoft;Microsoft;Microsoft;The University of Tokyo;", "aff_domain": "u-tokyo.ac.jp;microsoft.com;microsoft.com;microsoft.com;u-tokyo.ac.jp;", "position": "PhD student;Researcher;Researcher;Partner Research Manager;Full Professor;", "bibtex": "@inproceedings{\ncai2023distributional,\ntitle={Distributional Pareto-Optimal Multi-Objective Reinforcement Learning},\nauthor={Xin-Qiang Cai and Pushi Zhang and Li Zhao and Jiang Bian and Masashi Sugiyama and Ashley Juan Llorens},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=prIwYTU9PV}\n}", "github": "", "project": "", "reviewers": "GTy6;JBHP;y77J;1W6z", "pdf_size": 5527677, "rating": "5;5;5;8", "confidence": "4;4;4;5", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "94;112;103;123", "wc_strengths": "158;29;152;155", "wc_weaknesses": "301;50;286;35", "wc_questions": "216;91;221;26", "wc_limitations": "51;9;18;50", "wc_review": "820;291;780;389", "wc_reply_reviewers": "515;4;31;71", "wc_reply_authors": "159;77;153;39", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;3;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 108.0, 10.747092630102339 ], "wc_strengths_avg": [ 123.5, 54.60082416960389 ], "wc_weaknesses_avg": [ 168.0, 125.72390385284733 ], "wc_questions_avg": [ 138.5, 83.25412902673357 ], "wc_limitations_avg": [ 32.0, 18.774983355518586 ], "wc_review_avg": [ 570.0, 233.02467680483971 ], "wc_reply_reviewers_avg": [ 155.25, 209.06503174849686 ], "wc_reply_authors_avg": [ 107.0, 50.85272854036448 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3353642595963310528&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "u-tokyo.ac.jp;microsoft.com;microsoft.com;microsoft.com;u-tokyo.ac.jp;", "author_num": 6, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "University of Tokyo;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.microsoft.com", "aff_unique_abbr": "UTokyo;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "Japan;United States" }, { "title": "Label Poisoning is All You Need", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70392", "id": "prftZp6mDH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e0c9b65fb3e41aaa86576df3ec33ad2e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=prftZp6mDH", "openreview": "https://openreview.net/forum?id=prftZp6mDH", "poster": "/media/PosterPDFs/NeurIPS%202023/70392.png?t=1702059141.72644", "slides": "https://nips.cc/virtual/2023/poster/70392", "video": "https://nips.cc/virtual/2023/poster/70392", "author_site": "Rishi Jha, Jonathan Hayase, Sewoong Oh", "tldr": "", "abstract": "In a backdoor attack, an adversary injects corrupted data into a model's training dataset in order to gain control over its predictions on images with a specific attacker-defined trigger. A typical corrupted training example requires altering both the image, by applying the trigger, and the label. Models trained on clean images, therefore, were considered safe from backdoor attacks. However, in some common machine learning scenarios, the training labels are provided by potentially malicious third-parties. This includes crowd-sourced annotation and knowledge distillation. We, hence, investigate a fundamental question: can we launch a successful backdoor attack by only corrupting labels? We introduce a novel approach to design label-only backdoor attacks, which we call FLIP, and demonstrate its strengths on three datasets (CIFAR-10, CIFAR-100, and Tiny-ImageNet) and four architectures (ResNet-32, ResNet-18, VGG-19, and Vision Transformer). With only 2% of CIFAR-10 labels corrupted, FLIP achieves a near-perfect attack success rate of 99.4% while suffering only a 1.8% drop in the clean test accuracy. Our approach builds upon the recent advances in trajectory matching, originally introduced for dataset distillation.", "keywords": "security;backdoor attack", "primary_area": "", "supplementary_material": "", "author": "Rishi Dev Jha;Jonathan Hayase;Sewoong Oh", "authorids": "~Rishi_Dev_Jha1;~Jonathan_Hayase2;~Sewoong_Oh1", "gender": "M;M;M", "homepage": "http://rishijha.com/;https://jhayase.github.io/;https://homes.cs.washington.edu/~sewoong/", "dblp": "359/6028.html;244/9599;80/4366", "google_scholar": "v8oRh6YAAAAJ;Zw-l1d8AAAAJ;55TAOdgAAAAJ", "orcid": ";0000-0002-3757-6586;", "linkedin": ";jonathan-hayase-5ab849128;", "or_profile": "~Rishi_Dev_Jha1;~Jonathan_Hayase2;~Sewoong_Oh1", "aff": "University of Washington;University of Washington;University of Washington", "aff_domain": "uw.edu;washington.edu;uw.edu", "position": "MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\njha2023label,\ntitle={Label Poisoning is All You Need},\nauthor={Rishi Dev Jha and Jonathan Hayase and Sewoong Oh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=prftZp6mDH}\n}", "github": "", "project": "", "reviewers": "TrAf;QpmH;ewXv;uwV8;tDVF", "pdf_size": 1105395, "rating": "4;4;5;5;5", "confidence": "4;5;4;3;4", "soundness": "2;3;3;3;2", "novelty": "2;4;3;3;2", "presentation": "3;3;3;2;3", "wc_summary": "46;115;77;240;81", "wc_strengths": "39;35;70;52;25", "wc_weaknesses": "220;41;311;153;118", "wc_questions": "145;22;175;35;44", "wc_limitations": "5;9;12;47;59", "wc_review": "455;222;645;527;327", "wc_reply_reviewers": "51;0;77;16;42", "wc_reply_authors": "75;0;76;7;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;3;2;1", "rating_avg": [ 4.6, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 111.8, 67.72709945066302 ], "wc_strengths_avg": [ 44.2, 15.535765188750762 ], "wc_weaknesses_avg": [ 168.6, 91.65718738866036 ], "wc_questions_avg": [ 84.2, 63.00285707807226 ], "wc_limitations_avg": [ 26.4, 22.159422375143265 ], "wc_review_avg": [ 435.2, 148.3218122866627 ], "wc_reply_reviewers_avg": [ 37.2, 26.94735608552349 ], "wc_reply_authors_avg": [ 31.6, 35.93661085856595 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6454972243679027, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=898094630380926700&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "uw.edu;washington.edu;uw.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Self-Predictive Universal AI", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70391", "id": "psXVkKO9No", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/56a225639da77e8f7c0409f6d5ba996b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=psXVkKO9No", "openreview": "https://openreview.net/forum?id=psXVkKO9No", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70391", "video": "https://nips.cc/virtual/2023/poster/70391", "author_site": "Elliot Catt, Jordi Grau-Moya, Marcus Hutter, Matthew Aitchison, Tim Genewein, Gr\u00e9goire Del\u00e9tang, Kevin Li, Joel Veness", "tldr": "", "abstract": "Reinforcement Learning (RL) algorithms typically utilize learning and/or planning techniques to derive effective policies. The integration of both approaches has proven to be highly successful in addressing complex sequential decision-making challenges, as evidenced by algorithms such as AlphaZero and MuZero, which consolidate the planning process into a parametric search-policy. AIXI, the most potent theoretical universal agent, leverages planning through comprehensive search as its primary means to find an optimal policy. Here we define an alternative universal agent, which we call Self-AIXI, that on the contrary to AIXI, maximally exploits learning to obtain good policies. It does so by self-predicting its own stream of action data, which is generated, similarly to other TD(0) agents, by taking an action maximization step over the current on-policy (universal mixture-policy) Q-value estimates. We prove that Self-AIXI converges to AIXI, and inherits a series of properties like maximal Legg-Hutter intelligence and the self-optimizing property.", "keywords": "General Reinforcement Learning;Reinforcement Learning;Self-Modeling;Bayes-optimality;Policy Distillation;Uncertainty;Universal AI", "primary_area": "", "supplementary_material": "/attachment/9d7423761dc657860b3dafd21a63d6df26759f72.pdf", "author": "Elliot Catt;Jordi Grau-Moya;Marcus Hutter;Matthew Aitchison;Tim Genewein;Gregoire Deletang;Li Kevin Wenliang;Joel Veness", "authorids": "~Elliot_Catt1;~Jordi_Grau-Moya2;~Marcus_Hutter1;~Matthew_Aitchison1;~Tim_Genewein1;~Gregoire_Deletang1;~Li_Kevin_Wenliang1;~Joel_Veness2", "gender": "M;;;M;M;;;", "homepage": ";;http://www.hutter1.net/;;http://tim.inversetemperature.net/;;https://kevin-w-li.github.io/;", "dblp": "204/2511;116/3023;h/MarcusHutter;;116/3039;;255/7009;", "google_scholar": "d1JYeMIAAAAJ;;https://scholar.google.com.tw/citations?user=7hmCntEAAAAJ;81URpqMAAAAJ;https://scholar.google.de/citations?user=peNTK9oAAAAJ;;https://scholar.google.co.uk/citations?user=MW45NMEAAAAJ;", "orcid": "0000-0001-9411-927X;;0000-0002-3263-4097;;;;;", "linkedin": ";jordi-g-9a1b02104;hutter1/;;;;;", "or_profile": "~Elliot_Catt1;~Jordi_Grau-Moya2;~Marcus_Hutter1;~Matthew_Aitchison1;~Tim_Genewein1;~Gregoire_Deletang1;~Li_Kevin_Wenliang1;~Joel_Veness2", "aff": "Google DeepMind;Google DeepMind;Australian National University;Australian National University;Google DeepMind;;Google DeepMind;", "aff_domain": "deepmind.com;deepmind.com;anu.edu.au;anu.edu.au;google.com;;deepmind.com;", "position": "Researcher;Researcher;Full Professor;PhD student;Researcher;;Researcher;", "bibtex": "@inproceedings{\ncatt2023selfpredictive,\ntitle={Self-Predictive Universal {AI}},\nauthor={Elliot Catt and Jordi Grau-Moya and Marcus Hutter and Matthew Aitchison and Tim Genewein and Gregoire Deletang and Li Kevin Wenliang and Joel Veness},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=psXVkKO9No}\n}", "github": "", "project": "", "reviewers": "xEuK;sXfu;mKJS;dcXk;AePK", "pdf_size": 402370, "rating": "5;5;5;6;6", "confidence": "2;2;3;3;1", "soundness": "3;2;2;3;3", "novelty": "1;3;2;3;2", "presentation": "3;3;3;3;3", "wc_summary": "37;52;69;83;84", "wc_strengths": "18;113;65;111;27", "wc_weaknesses": "40;185;95;58;54", "wc_questions": "83;157;7;70;28", "wc_limitations": "5;14;1;49;12", "wc_review": "183;521;237;371;205", "wc_reply_reviewers": "57;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 2.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 65.0, 18.18790807102345 ], "wc_strengths_avg": [ 66.8, 40.14174884082655 ], "wc_weaknesses_avg": [ 86.4, 52.54559924484637 ], "wc_questions_avg": [ 69.0, 51.896049946021904 ], "wc_limitations_avg": [ 16.2, 17.057549648176316 ], "wc_review_avg": [ 303.4, 126.89144967254492 ], "wc_reply_reviewers_avg": [ 11.4, 22.8 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.21821789023599236, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7363189540056117921&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "deepmind.com;deepmind.com;anu.edu.au;anu.edu.au;google.com;;deepmind.com;", "author_num": 8, "aff_unique_index": "0;0;1;1;0;0", "aff_unique_norm": "Google;Australian National University", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.anu.edu.au", "aff_unique_abbr": "DeepMind;ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "United Kingdom;Australia" }, { "title": "Are These the Same Apple? Comparing Images Based on Object Intrinsics", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73462", "id": "pu3sNlrgQr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/803c6ab3d62346e004ef70211d2d15b8-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=pu3sNlrgQr", "openreview": "https://openreview.net/forum?id=pu3sNlrgQr", "poster": "/media/PosterPDFs/NeurIPS%202023/73462.png?t=1702066580.4109848", "slides": "https://nips.cc/virtual/2023/poster/73462", "video": "https://nips.cc/virtual/2023/poster/73462", "author_site": "Klemen Kotar, Stephen Tian, Hong-Xing Yu, Dan Yamins, Jiajun Wu", "tldr": "", "abstract": "The human visual system can effortlessly recognize an object under different extrinsic factors such as lighting, object poses, and background, yet current computer vision systems often struggle with these variations. An important step to understanding and improving artificial vision systems is to measure image similarity purely based on intrinsic object properties that define object identity. This problem has been studied in the computer vision literature as re-identification, though mostly restricted to specific object categories such as people and cars. We propose to extend it to general object categories, exploring an image similarity metric based on object intrinsics. To benchmark such measurements, we collect the Common paired objects Under differenT Extrinsics (CUTE) dataset of 18, 000 images of 180 objects under different extrinsic factors such as lighting, poses, and imaging conditions. While existing methods such as LPIPS and CLIP scores do not measure object intrinsics well, we find that combining deep features learned from contrastive self-supervised learning with foreground filtering is a simple yet effective approach to approximating the similarity. We conduct an extensive survey of pre-trained features and foreground extraction methods to arrive at a strong baseline that best measures intrinsic object-centric image similarity among current methods. Finally, we demonstrate that our approach can aid in downstream applications such as acting as an analog for human subjects and improving generalizable re-identification. Please see our project website at https://s-tian.github.io/projects/cute/ for visualizations of the data and demos of our metric.", "keywords": "Object-centric;intrinsic;dataset;metric;image similarity", "primary_area": "", "supplementary_material": "/attachment/e800b4d1f348d2b6a0d163a446d1bf82abac4943.pdf", "author": "Klemen Kotar;Stephen Tian;Hong-Xing Yu;Daniel LK Yamins;Jiajun Wu", "authorids": "~Klemen_Kotar1;~Stephen_Tian1;~Hong-Xing_Yu1;~Daniel_LK_Yamins1;~Jiajun_Wu1", "gender": "M;M;M;M;M", "homepage": ";http://s-tian.github.io;https://kovenyu.com;https://Neuroailab.stanford.edu;https://jiajunwu.com", "dblp": "274/1138;237/9780;205/2676.html;;117/4768", "google_scholar": "uXxhUDoAAAAJ;l19pn2sAAAAJ;kNKncZcAAAAJ;;2efgcS0AAAAJ", "orcid": ";;;;0000-0002-4176-343X", "linkedin": "klemen-kotar-06028ba8/;;;;jiajunwu/", "or_profile": "~Klemen_Kotar1;~Stephen_Tian1;~Hong-Xing_Yu1;~Daniel_LK_Yamins1;~Jiajun_Wu1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nkotar2023are,\ntitle={Are These the Same Apple? Comparing Images Based on Object Intrinsics},\nauthor={Klemen Kotar and Stephen Tian and Hong-Xing Yu and Daniel LK Yamins and Jiajun Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=pu3sNlrgQr}\n}", "github": "", "project": "", "reviewers": "Ewhk;xype;ieDf;iSG4", "pdf_size": 16841288, "rating": "5;6;6;8", "confidence": "3;5;3;3", "wc_summary_and_contributions": "102;82;113;149", "wc_strengths": "116;92;201;69", "wc_improvement": "500;184;156;456", "wc_limitations": "84;252;10;7", "wc_correctness": "80;14;27;23", "wc_clarity": "183;72;25;36", "wc_relation_to_prior_work": "62;76;68;10", "wc_documentation": "20;65;19;41", "wc_additional_feedback": "1;1;1;1", "wc_review": "1148;838;620;792", "wc_reply_reviewers": "351;61;96;279", "wc_reply_authors": "1609;427;432;1054", "reply_reviewers": "2;1;1;2", "reply_authors": "4;2;2;3", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_and_contributions_avg": [ 111.5, 24.336187047275914 ], "wc_strengths_avg": [ 119.5, 49.90240475167504 ], "wc_improvement_avg": [ 324.0, 155.09996776273036 ], "wc_limitations_avg": [ 88.25, 99.44439400991892 ], "wc_correctness_avg": [ 36.0, 25.836021365527625 ], "wc_clarity_avg": [ 79.0, 62.50999920012797 ], "wc_relation_to_prior_work_avg": [ 54.0, 25.88435821108957 ], "wc_documentation_avg": [ 36.25, 18.779976038323372 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 849.5, 190.53280557426325 ], "wc_reply_reviewers_avg": [ 196.75, 121.59024426326316 ], "wc_reply_authors_avg": [ 880.5, 491.840675422438 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10111233190590818233&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "GraphPatcher: Mitigating Degree Bias for Graph Neural Networks via Test-time Augmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70390", "id": "puupdGOWUp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ae9bbdcea94d808882f3535e8ca00542-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=puupdGOWUp", "openreview": "https://openreview.net/forum?id=puupdGOWUp", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70390", "video": "https://nips.cc/virtual/2023/poster/70390", "author_site": "Mingxuan Ju, Tong Zhao, Wenhao Yu, Neil Shah, Yanfang Ye", "tldr": "", "abstract": "Recent studies have shown that graph neural networks (GNNs) exhibit strong biases towards the node degree: they usually perform satisfactorily on high-degree nodes with rich neighbor information but struggle with low-degree nodes. Existing works tackle this problem by deriving either designated GNN architectures or training strategies specifically for low-degree nodes. Though effective, these approaches unintentionally create an artificial out-of-distribution scenario, where models mainly or even only observe low-degree nodes during the training, leading to a downgraded performance for high-degree nodes that GNNs originally perform well at. In light of this, we propose a test-time augmentation framework, namely GraphPatcher, to enhance test-time generalization of any GNNs on low-degree nodes. Specifically, GraphPatcher iteratively generates virtual nodes to patch artificially created low-degree nodes via corruptions, aiming at progressively reconstructing target GNN's predictions over a sequence of increasingly corrupted nodes. Through this scheme, GraphPatcher not only learns how to enhance low-degree nodes (when the neighborhoods are heavily corrupted) but also preserves the original superior performance of GNNs on high-degree nodes (when lightly corrupted). Additionally, GraphPatcher is model-agnostic and can also mitigate the degree bias for either self-supervised or supervised GNNs. Comprehensive experiments are conducted over seven benchmark datasets and GraphPatcher consistently enhances common GNNs' overall performance by up to 3.6% and low-degree performance by up to 6.5%, significantly outperforming state-of-the-art baselines. The source code is publicly available at https://github.com/jumxglhf/GraphPatcher.", "keywords": "Graph neural network;Test-time Augmentation", "primary_area": "", "supplementary_material": "/attachment/006338594a91bc51de8e5a4eaa3a2e00c648c2b8.zip", "author": "Mingxuan Ju;Tong Zhao;Wenhao Yu;Neil Shah;Yanfang Ye", "authorids": "~Mingxuan_Ju1;~Tong_Zhao3;~Wenhao_Yu2;~Neil_Shah2;~Yanfang_Ye1", "gender": "M;M;M;M;", "homepage": "https://jumxglhf.github.io;https://tzhao.io/;https://wyu97.github.io/;http://nshah.net;http://yes-lab.org/", "dblp": "234/2715;94/6503-3;159/8117-2.html;71/7771;", "google_scholar": "qNoO67AAAAAJ;05cRc-MAAAAJ;z4qSdX8AAAAJ;Qut69OgAAAAJ;egjr888AAAAJ", "orcid": "0009-0008-9054-3856;0000-0001-7660-1732;0000-0002-4075-5980;0000-0003-3261-8430;", "linkedin": ";;;;", "or_profile": "~Mingxuan_Ju1;~Tong_Zhao3;~Wenhao_Yu2;~Neil_Shah2;~Yanfang_Ye1", "aff": "University of Notre Dame;Snap Inc.;University of Notre Dame;Snap Inc.;University of Notre Dame", "aff_domain": "nd.edu;snap.com;nd.edu;snap.com;nd.edu", "position": "PhD student;Researcher;PhD student;Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nju2023graphpatcher,\ntitle={GraphPatcher: Mitigating Degree Bias for Graph Neural Networks via Test-time Augmentation},\nauthor={Mingxuan Ju and Tong Zhao and Wenhao Yu and Neil Shah and Yanfang Ye},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=puupdGOWUp}\n}", "github": "", "project": "", "reviewers": "LJuX;xNZm;Cs3J;ZZgb", "pdf_size": 646317, "rating": "5;5;6;6", "confidence": "3;4;3;4", "soundness": "2;2;2;3", "novelty": "3;2;2;3", "presentation": "3;3;3;4", "wc_summary": "156;106;61;101", "wc_strengths": "84;61;24;66", "wc_weaknesses": "113;204;78;169", "wc_questions": "19;6;48;149", "wc_limitations": "1;30;11;5", "wc_review": "373;407;222;490", "wc_reply_reviewers": "15;15;19;52", "wc_reply_authors": "21;37;20;37", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 106.0, 33.726843908080106 ], "wc_strengths_avg": [ 58.75, 21.810261346439663 ], "wc_weaknesses_avg": [ 141.0, 48.74935897014442 ], "wc_questions_avg": [ 55.5, 56.082528473669946 ], "wc_limitations_avg": [ 11.75, 11.121488209767612 ], "wc_review_avg": [ 373.0, 97.01288574204975 ], "wc_reply_reviewers_avg": [ 25.25, 15.530212490497354 ], "wc_reply_authors_avg": [ 28.75, 8.257572282456872 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16074852272943935971&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "nd.edu;snap.com;nd.edu;snap.com;nd.edu", "author_num": 5, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "University of Notre Dame;Snap Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.nd.edu;https://www.snapinc.com", "aff_unique_abbr": "Notre Dame;Snap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Most Neural Networks Are Almost Learnable", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70389", "id": "pvPujuvjQd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4f7b1f51ef415f09e171f632172284ff-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pvPujuvjQd", "openreview": "https://openreview.net/forum?id=pvPujuvjQd", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70389", "video": "https://nips.cc/virtual/2023/poster/70389", "author_site": "Amit Daniely, Nati Srebro, Gal Vardi", "tldr": "", "abstract": "We present a PTAS for learning random constant-depth networks. We show that for any fixed $\\epsilon>0$ and depth $i$, there is a poly-time algorithm that for any distribution on $\\sqrt{d} \\cdot \\mathbb{S}^{d-1}$ learns random Xavier networks of depth $i$, up to an additive error of $\\epsilon$. The algorithm runs in time and sample complexity of $(\\bar{d})^{\\mathrm{poly}(\\epsilon^{-1})}$, where $\\bar d$ is the size of the network. For some cases of sigmoid and ReLU-like activations the bound can be improved to $(\\bar{d})^{\\mathrm{polylog}(\\epsilon^{-1})}$, resulting in a quasi-poly-time algorithm for learning constant depth random networks.", "keywords": "learning neural networks;computational complexity;random networks", "primary_area": "", "supplementary_material": "", "author": "Amit Daniely;Nathan Srebro;Gal Vardi", "authorids": "~Amit_Daniely2;~Nathan_Srebro1;~Gal_Vardi1", "gender": "M;M;M", "homepage": "https://www.cs.huji.ac.il/~amitd/;http://ttic.uchicago.edu/~nati/;https://sites.google.com/view/galvardi/home", "dblp": "19/7805;50/3633;https://dblp.uni-trier.de/pid/167/9638.html", "google_scholar": "https://scholar.google.com.tw/citations?user=jUtYwE0AAAAJ;https://scholar.google.com.tw/citations?user=ZnT-QpMAAAAJ;https://scholar.google.co.il/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Amit_Daniely2;~Nathan_Srebro1;~Gal_Vardi1", "aff": "Google;University of Chicago;Toyota Technological Institute at Chicago", "aff_domain": "google.com;uchicago.edu;ttic.edu", "position": "Researcher;Full Professor;Postdoc", "bibtex": "@inproceedings{\ndaniely2023most,\ntitle={Most Neural Networks Are Almost Learnable},\nauthor={Amit Daniely and Nathan Srebro and Gal Vardi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pvPujuvjQd}\n}", "github": "", "project": "", "reviewers": "PwvA;JZMD;cQDt;mRGU", "pdf_size": 366670, "rating": "4;6;7;7", "confidence": "3;4;3;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "2;2;2;4", "wc_summary": "49;66;96;245", "wc_strengths": "51;39;72;121", "wc_weaknesses": "343;71;186;22", "wc_questions": "109;313;6;159", "wc_limitations": "139;26;146;17", "wc_review": "691;515;506;564", "wc_reply_reviewers": "227;69;18;9", "wc_reply_authors": "159;7;0;0", "reply_reviewers": "4;1;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 114.0, 77.48225603323641 ], "wc_strengths_avg": [ 70.75, 31.32391259086259 ], "wc_weaknesses_avg": [ 155.5, 123.54047919609184 ], "wc_questions_avg": [ 146.75, 110.70766685284268 ], "wc_limitations_avg": [ 82.0, 60.63414879422321 ], "wc_review_avg": [ 569.0, 73.81395532011545 ], "wc_reply_reviewers_avg": [ 80.75, 87.48249824964991 ], "wc_reply_authors_avg": [ 41.5, 67.89882178653765 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=199387861101002013&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "google.com;uchicago.edu;ttic.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;University of Chicago;Toyota Technological Institute at Chicago", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.uchicago.edu;https://www.tti-chicago.org", "aff_unique_abbr": "Google;UChicago;TTI Chicago", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Mountain View;;Chicago", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Flow-Attention-based Spatio-Temporal Aggregation Network for 3D Mask Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70388", "id": "pvSKVt3EsM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/456f9445d0fa1a932d19584ab788c787-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pvSKVt3EsM", "openreview": "https://openreview.net/forum?id=pvSKVt3EsM", "poster": "/media/PosterPDFs/NeurIPS%202023/70388.png?t=1699780571.8615441", "slides": "https://nips.cc/virtual/2023/poster/70388", "video": "https://nips.cc/virtual/2023/poster/70388", "author_site": "Yuxin Cao, Yian Li, Yumeng Zhu, Derui Wang, Minhui Xue", "tldr": "", "abstract": "Anti-spoofing detection has become a necessity for face recognition systems due to the security threat posed by spoofing attacks. Despite great success in traditional attacks, most deep-learning-based methods perform poorly in 3D masks, which can highly simulate real faces in appearance and structure, suffering generalizability insufficiency while focusing only on the spatial domain with single frame input. This has been mitigated by the recent introduction of a biomedical technology called rPPG (remote photoplethysmography). However, rPPG-based methods are sensitive to noisy interference and require at least one second (> 25 frames) of observation time, which induces high computational overhead. To address these challenges, we propose a novel 3D mask detection framework, called FASTEN (Flow-Attention-based Spatio-Temporal aggrEgation Network). We tailor the network for focusing more on fine-grained details in large movements, which can eliminate redundant spatio-temporal feature interference and quickly capture splicing traces of 3D masks in fewer frames. Our proposed network contains three key modules: 1) a facial optical flow network to obtain non-RGB inter-frame flow information; 2) flow attention to assign different significance to each frame; 3) spatio-temporal aggregation to aggregate high-level spatial features and temporal transition features. Through extensive experiments, FASTEN only requires five frames of input and outperforms eight competitors for both intra-dataset and cross-dataset evaluations in terms of multiple detection metrics. Moreover, FASTEN has been deployed in real-world mobile devices for practical 3D mask detection.", "keywords": "3D mask detection;spatio-temporal aggregation;optical flow;deep learning", "primary_area": "", "supplementary_material": "", "author": "Yuxin Cao;Yian Li;Yumeng Zhu;Derui Wang;Minhui Xue", "authorids": "~Yuxin_Cao1;~Yian_Li2;~Yumeng_Zhu2;~Derui_Wang1;~Minhui_Xue2", "gender": ";M;;;", "homepage": ";https://github.com/LeeeeTX;https://github.com/SanerZ;;", "dblp": "151/7989;263/9744.html;185/7126;;", "google_scholar": "https://scholar.google.com/citations?hl=en;G4qoGK8AAAAJ;;;", "orcid": "0009-0002-5766-0846;0009-0004-3577-0862;;;", "linkedin": ";;;;", "or_profile": "~Yuxin_Cao1;~Yian_Li2;~Yumeng_Zhu2;~Derui_Wang1;~Minhui_Xue2", "aff": "Tsinghua University;ShanghaiTech University;Pingan Technology;;", "aff_domain": "mails.tsinghua.edu.cn;shanghaitech.edu.cn;pingan.com.cn;;", "position": "MS student;Undergrad student;Researcher;;", "bibtex": "@inproceedings{\ncao2023flowattentionbased,\ntitle={Flow-Attention-based Spatio-Temporal Aggregation Network for 3D Mask Detection},\nauthor={Yuxin Cao and Yian Li and Yumeng Zhu and Derui Wang and Minhui Xue},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pvSKVt3EsM}\n}", "github": "", "project": "", "reviewers": "iaxD;bEHS;VV4D;jUZR;U1im", "pdf_size": 1357122, "rating": "3;3;5;6;7", "confidence": "5;5;1;4;4", "soundness": "3;2;3;3;4", "novelty": "3;2;2;2;3", "presentation": "3;3;2;3;4", "wc_summary": "88;30;68;107;43", "wc_strengths": "74;25;66;80;88", "wc_weaknesses": "125;231;56;33;44", "wc_questions": "1;8;3;62;58", "wc_limitations": "2;8;1;39;13", "wc_review": "290;302;194;321;246", "wc_reply_reviewers": "0;0;20;7;39", "wc_reply_authors": "0;0;0;0;31", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 4.8, 1.6 ], "confidence_avg": [ 3.8, 1.469693845669907 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 67.2, 28.23756363427978 ], "wc_strengths_avg": [ 66.6, 22.01454064930722 ], "wc_weaknesses_avg": [ 97.8, 73.92266228972005 ], "wc_questions_avg": [ 26.4, 27.557938965024217 ], "wc_limitations_avg": [ 12.6, 13.893883546366725 ], "wc_review_avg": [ 270.6, 45.55260695064553 ], "wc_reply_reviewers_avg": [ 13.2, 14.824304368165137 ], "wc_reply_authors_avg": [ 6.2, 12.4 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3572172541558801, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9407744856925682937&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mails.tsinghua.edu.cn;shanghaitech.edu.cn;pingan.com.cn;;", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Tsinghua University;ShanghaiTech University;PingAn Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.shanghaitech.edu.cn;https://www.pingan.com", "aff_unique_abbr": "THU;ShanghaiTech;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "ChessGPT: Bridging Policy Learning and Language Modeling", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73461", "id": "pvdm4B6JMK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/16b14e3f288f076e0ca73bdad6405f77-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=pvdm4B6JMK", "openreview": "https://openreview.net/forum?id=pvdm4B6JMK", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73461", "video": "https://nips.cc/virtual/2023/poster/73461", "author_site": "Xidong Feng, Yicheng Luo, Ziyan Wang, Hongrui Tang, Mengyue Yang, Kun Shao, David Mguni, Yali Du, Jun Wang", "tldr": "", "abstract": "When solving decision-making tasks, humans typically depend on information from two key sources: (1) Historical policy data, which provides interaction replay from the environment, and (2) Analytical insights in natural language form, exposing the invaluable thought process or strategic considerations. Despite this, the majority of preceding research focuses on only one source: they either use historical replay exclusively to directly learn policy or value functions, or engaged in language model training utilizing mere language corpus. In this paper, we argue that a powerful autonomous agent should cover both sources. Thus, we propose ChessGPT, a GPT model bridging policy learning and language modeling by integrating data from these two sources in Chess games. Specifically, we build a large-scale game and language dataset related to chess. Leveraging the dataset, we showcase two model examples ChessCLIP and ChessGPT, integrating policy learning and language modeling. Finally, we propose a full evaluation framework for evaluating language model's chess ability. Experimental results validate our model and dataset's effectiveness. We open source our code, model, and dataset at https://github.com/waterhorse1/ChessGPT.", "keywords": "Chess;Dataset;large language model;policy learning;language modeling", "primary_area": "", "supplementary_material": "/attachment/5d385de528afcfae2ba22e75b2c52887425ea20f.pdf", "author": "Xidong Feng;Yicheng Luo;Ziyan Wang;Hongrui Tang;Mengyue Yang;Kun Shao;David Henry Mguni;Yali Du;Jun Wang", "authorids": "~Xidong_Feng1;~Yicheng_Luo1;~Ziyan_Wang3;~Hongrui_Tang1;~Mengyue_Yang1;~Kun_Shao1;~David_Henry_Mguni1;~Yali_Du1;~Jun_Wang2", "gender": ";M;M;M;F;;M;;M", "homepage": "https://waterhorse1.github.io/;https://luoyicheng.net/;https://ziyan-wang98.github.io/;;https://ymy4323460.github.io/;;;;http://www0.cs.ucl.ac.uk/staff/jun.wang/", "dblp": ";;;;262/3824.html;;217/2369;;w/JunWang12", "google_scholar": "JfOLNu8AAAAJ;635-7jQAAAAJ;1Yu8JFIAAAAJ;;kJJkqdcAAAAJ;;K-_yzBsAAAAJ;;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ", "orcid": ";0000-0003-0547-411X;;;;;;;", "linkedin": ";yichengluo/;;hongrui-tang-235532229/;;;;;", "or_profile": "~Xidong_Feng1;~Yicheng_Luo1;~Ziyan_Wang3;~Hongrui_Tang1;~Mengyue_Yang1;~Kun_Shao1;~David_Henry_Mguni1;~Yali_Du1;~Jun_Wang2", "aff": "University College London;University College London, University of London;King's College London;University College London, University of London;University College London;;Queen Mary University, London;;University College London", "aff_domain": "ucl.ac.uk;ucl.ac.uk;kcl.ac.uk;ucl.ac.uk;ucl.ac.uk;;qmul.ac.uk;;ucl.ac.uk", "position": "PhD student;PhD student;PhD student;Undergrad student;PhD student;;Lecturer;;Professor", "bibtex": "@inproceedings{\nfeng2023chessgpt,\ntitle={Chess{GPT}: Bridging Policy Learning and Language Modeling},\nauthor={Xidong Feng and Yicheng Luo and Ziyan Wang and Hongrui Tang and Mengyue Yang and Kun Shao and David Henry Mguni and Yali Du and Jun Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=pvdm4B6JMK}\n}", "github": "", "project": "", "reviewers": "HpTJ;y51X;jbSR;9skW", "pdf_size": 2218626, "rating": "6;7;8;8", "confidence": "4;4;4;2", "wc_summary_and_contributions": "54;99;185;142", "wc_strengths": "452;32;162;87", "wc_improvement": "11;228;45;178", "wc_limitations": "11;7;9;63", "wc_correctness": "1;16;21;1", "wc_clarity": "1;1;19;1", "wc_relation_to_prior_work": "1;23;10;12", "wc_documentation": "1;1;9;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "533;408;461;486", "wc_reply_reviewers": "23;22;20;82", "wc_reply_authors": "752;606;127;801", "reply_reviewers": "1;1;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_and_contributions_avg": [ 120.0, 48.74935897014442 ], "wc_strengths_avg": [ 183.25, 161.8786196506506 ], "wc_improvement_avg": [ 115.5, 90.07358103239817 ], "wc_limitations_avg": [ 22.5, 23.425413550244958 ], "wc_correctness_avg": [ 9.75, 8.926785535678562 ], "wc_clarity_avg": [ 5.5, 7.794228634059948 ], "wc_relation_to_prior_work_avg": [ 11.5, 7.826237921249264 ], "wc_documentation_avg": [ 3.0, 3.4641016151377544 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 472.0, 45.09434554353794 ], "wc_reply_reviewers_avg": [ 36.75, 26.14741861063918 ], "wc_reply_authors_avg": [ 571.5, 266.4681031568319 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11832492475153297831&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "ucl.ac.uk;ucl.ac.uk;kcl.ac.uk;ucl.ac.uk;ucl.ac.uk;;qmul.ac.uk;;ucl.ac.uk", "author_num": 9, "aff_unique_index": "0;0;1;0;0;2;0", "aff_unique_norm": "University College London;King's College London;Queen Mary University of London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.kcl.ac.uk;https://www.qmul.ac.uk", "aff_unique_abbr": "UCL;KCL;QMUL", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Unified Enhancement of Privacy Bounds for Mixture Mechanisms via $f$-Differential Privacy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70387", "id": "pw5hEuEroL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/acb3e20075b0a2dfa3565f06681578e5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pw5hEuEroL", "openreview": "https://openreview.net/forum?id=pw5hEuEroL", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70387", "video": "https://nips.cc/virtual/2023/poster/70387", "author_site": "Chendi Wang, Buxin Su, Jiayuan Ye, Reza Shokri, Weijie Su", "tldr": "", "abstract": "Differentially private (DP) machine learning algorithms incur many sources of randomness, such as random initialization, random batch subsampling, and shuffling. However, such randomness is difficult to take into account when proving differential privacy bounds because it induces mixture distributions for the algorithm's output that are difficult to analyze. \nThis paper focuses on improving privacy bounds for shuffling models and one-iteration differentially private gradient descent (DP-GD) with random initializations using $f$-DP. \nWe derive a closed-form expression of the trade-off function for shuffling models that outperforms the most up-to-date results based on $(\\epsilon,\\delta)$-DP.\nMoreover, we investigate the effects of random initialization on the privacy of one-iteration DP-GD. \nOur numerical computations of the trade-off function indicate that random initialization can enhance the privacy of DP-GD.\nOur analysis of $f$-DP guarantees for these mixture mechanisms relies on an inequality for trade-off functions introduced in this paper. This inequality implies the joint convexity of $F$-divergences. \nFinally, we study an $f$-DP analog of the advanced joint convexity of the hockey-stick divergence related to $(\\epsilon,\\delta)$-DP and apply it to analyze the privacy of mixture mechanisms.", "keywords": "Differential privacy;$f$-DP;mixture mechanisms;shuffling;differentially private gradient descent", "primary_area": "", "supplementary_material": "/attachment/3ad11784542068d4e7b23594475935ec37d19a25.pdf", "author": "Chendi Wang;Buxin Su;Jiayuan Ye;Reza Shokri;Weijie J Su", "authorids": "~Chendi_Wang2;~Buxin_Su1;~Jiayuan_Ye1;~Reza_Shokri1;~Weijie_J_Su1", "gender": "M;M;;;M", "homepage": ";https://www.math.upenn.edu/people/buxin-su;;;http://stat.wharton.upenn.edu/~suw/", "dblp": ";;;;228/9127", "google_scholar": ";;;;Uhf4nBkAAAAJ", "orcid": "0000-0001-5321-1846;;;;", "linkedin": ";;;;", "or_profile": "~Chendi_Wang2;~Buxin_Su1;~Jiayuan_Ye1;~Reza_Shokri1;~Weijie_J_Su1", "aff": "Shenzhen Research Institute of Big Data;University of Pennsylvania;;;University of Pennsylvania", "aff_domain": "cuhk.edu.cn;upenn.edu;;;upenn.edu", "position": "Postdoc;MS student;;;Associate Professor", "bibtex": "@inproceedings{\nwang2023unified,\ntitle={Unified Enhancement of Privacy Bounds for Mixture Mechanisms via \\$f\\$-Differential Privacy},\nauthor={Chendi Wang and Buxin Su and Jiayuan Ye and Reza Shokri and Weijie J Su},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pw5hEuEroL}\n}", "github": "", "project": "", "reviewers": "s9n5;jEfn;oyTp;xB9B;wgR7", "pdf_size": 3375718, "rating": "3;5;6;7;8", "confidence": "4;2;3;4;5", "soundness": "2;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "2;3;3;3;3", "wc_summary": "47;114;67;48;143", "wc_strengths": "54;15;60;105;89", "wc_weaknesses": "271;57;94;645;216", "wc_questions": "96;25;1;53;20", "wc_limitations": "301;12;21;15;1", "wc_review": "769;223;243;866;469", "wc_reply_reviewers": "849;24;0;17;20", "wc_reply_authors": "3479;0;0;28;23", "reply_reviewers": "3;1;0;1;1", "reply_authors": "8;1;1;2;2", "rating_avg": [ 5.8, 1.7204650534085253 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 83.8, 38.300913827218274 ], "wc_strengths_avg": [ 64.6, 31.052214091751978 ], "wc_weaknesses_avg": [ 256.6, 209.27933486132832 ], "wc_questions_avg": [ 39.0, 33.00303016391071 ], "wc_limitations_avg": [ 70.0, 115.68232362811528 ], "wc_review_avg": [ 514.0, 264.2256611307842 ], "wc_reply_reviewers_avg": [ 182.0, 333.60035971203627 ], "wc_reply_authors_avg": [ 706.0, 1386.5477993924335 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.8, 2.638181191654584 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.41036467732879783, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15440923409149019011&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "cuhk.edu.cn;upenn.edu;;;upenn.edu", "author_num": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "Shenzhen Research Institute of Big Data;University of Pennsylvania", "aff_unique_dep": ";", "aff_unique_url": "http://www.sribd.cn;https://www.upenn.edu", "aff_unique_abbr": ";UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "Evaluating Self-Supervised Learning for Molecular Graph Embeddings", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73460", "id": "pyhv4qYCEJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d6dc15cc2442a40904e704d624d1fbe8-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=pyhv4qYCEJ", "openreview": "https://openreview.net/forum?id=pyhv4qYCEJ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73460", "video": "https://nips.cc/virtual/2023/poster/73460", "author_site": "Hanchen Wang, Jean Kaddour, Shengchao Liu, Jian Tang, Joan Lasenby, Qi Liu", "tldr": "", "abstract": "Graph Self-Supervised Learning (GSSL) provides a robust pathway for acquiring embeddings without expert labelling, a capability that carries profound implications for molecular graphs due to the staggering number of potential molecules and the high cost of obtaining labels. However, GSSL methods are designed not for optimisation within a specific domain but rather for transferability across a variety of downstream tasks. This broad applicability complicates their evaluation. Addressing this challenge, we present \"Molecular Graph Representation Evaluation\" (MOLGRAPHEVAL), generating detailed profiles of molecular graph embeddings with interpretable and diversified attributes. MOLGRAPHEVAL offers a suite of probing tasks grouped into three categories: (i) generic graph, (ii) molecular substructure, and (iii) embedding space properties. By leveraging MOLGRAPHEVAL to benchmark existing GSSL methods against both current downstream datasets and our suite of tasks, we uncover significant inconsistencies between inferences drawn solely from existing datasets and those derived from more nuanced probing. These findings suggest that current evaluation methodologies fail to capture the entirety of the landscape.", "keywords": "molecular graphs;pre-training;probe models", "primary_area": "", "supplementary_material": "", "author": "Hanchen Wang;Jean Kaddour;Shengchao Liu;Jian Tang;Joan Lasenby;Qi Liu", "authorids": "~Hanchen_Wang1;~Jean_Kaddour1;~Shengchao_Liu1;~Jian_Tang1;~Joan_Lasenby1;~Qi_Liu5", "gender": "M;M;M;;;M", "homepage": "https://www.hanchenw.com/;https://jeankaddour.com/;https://chao1224.github.io/;http://www.jian-tang.com;;http://leuchine.github.io/", "dblp": ";;;181/2667-5;;", "google_scholar": "Yu_0vEEAAAAJ;z90bmSMAAAAJ;F1ws3XUAAAAJ;https://scholar.google.ca/citations?user=1ir6WUEAAAAJ;;Y-OeKMwAAAAJ", "orcid": "0000-0002-1691-024X;;0000-0003-2030-2367;;;0000-0003-4608-5778", "linkedin": "hanchenwang/;;;;;", "or_profile": "~Hanchen_Wang1;~Jean_Kaddour1;~Shengchao_Liu1;~Jian_Tang1;~Joan_Lasenby1;~Qi_Liu5", "aff": "Genentech;University College London;MILA-UdeM;Mila, HEC Montreal;;University of Hong Kong", "aff_domain": "gene.com;ucl.ac.uk;mila.quebec;hec.ca;;hku.hk", "position": "Joint PostDoc;PhD student;PhD student;Assistant Professor;;Assistant Professor", "bibtex": "@inproceedings{\nwang2023evaluating,\ntitle={Evaluating Self-Supervised Learning for Molecular Graph Embeddings},\nauthor={Hanchen Wang and Jean Kaddour and Shengchao Liu and Jian Tang and Joan Lasenby and Qi Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=pyhv4qYCEJ}\n}", "github": "", "project": "", "reviewers": "rFyK;qGTN;Naos;cQn4;xXbt", "pdf_size": 3913466, "rating": "6;6;6;7;7", "confidence": "4;5;4;4;4", "wc_summary_and_contributions": "83;86;97;15;97", "wc_strengths": "38;78;93;35;116", "wc_improvement": "132;355;86;77;129", "wc_limitations": "9;1;6;1;116", "wc_correctness": "14;45;13;1;242", "wc_clarity": "4;28;85;1;27", "wc_relation_to_prior_work": "31;1;8;1;45", "wc_documentation": "9;1;36;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "321;596;425;133;774", "wc_reply_reviewers": "11;20;0;64;156", "wc_reply_authors": "140;1020;633;169;539", "reply_reviewers": "1;1;0;4;1", "reply_authors": "1;4;1;4;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 75.6, 30.825963083089555 ], "wc_strengths_avg": [ 72.0, 31.426103799230347 ], "wc_improvement_avg": [ 155.8, 102.02627112660738 ], "wc_limitations_avg": [ 26.6, 44.80446406330512 ], "wc_correctness_avg": [ 63.0, 90.67524469225324 ], "wc_clarity_avg": [ 29.0, 30.166206257996713 ], "wc_relation_to_prior_work_avg": [ 17.2, 17.735839421916292 ], "wc_documentation_avg": [ 9.6, 13.558761005342634 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 449.8, 220.92840469256097 ], "wc_reply_reviewers_avg": [ 50.2, 57.188810094283305 ], "wc_reply_authors_avg": [ 500.2, 325.1986469836552 ], "reply_reviewers_avg": [ 1.4, 1.3564659966250536 ], "reply_authors_avg": [ 2.4, 1.3564659966250538 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.40824829046386313, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15061609449538523749&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "gene.com;ucl.ac.uk;mila.quebec;hec.ca;;hku.hk", "author_num": 6, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Genentech;University College London;Mila;HEC Montreal;University of Hong Kong", "aff_unique_dep": ";;Montreal Institute for Learning Algorithms;HEC Business School;", "aff_unique_url": "https://www.genentech.com;https://www.ucl.ac.uk;https://mila.quebec;https://www.hec.ca;https://www.hku.hk", "aff_unique_abbr": "Genentech;UCL;MILA;HEC;HKU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Montreal;Hong Kong SAR", "aff_country_unique_index": "0;1;2;2;3", "aff_country_unique": "United States;United Kingdom;Canada;China" }, { "title": "StateMask: Explaining Deep Reinforcement Learning through State Mask", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70386", "id": "pzc6LnUxYN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c4bf73386022473a652a18941e9ea6f8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=pzc6LnUxYN", "openreview": "https://openreview.net/forum?id=pzc6LnUxYN", "poster": "/media/PosterPDFs/NeurIPS%202023/70386.png?t=1699596994.6211433", "slides": "https://nips.cc/virtual/2023/poster/70386", "video": "https://nips.cc/virtual/2023/poster/70386", "author_site": "Zelei Cheng, Xian Wu, Jiahao Yu, Wenhai Sun, Wenbo Guo, Wenbo Guo, Xinyu Xing", "tldr": "", "abstract": "Despite the promising performance of deep reinforcement learning (DRL) agents in many challenging scenarios, the black-box nature of these agents greatly limits their applications in critical domains. Prior research has proposed several explanation techniques to understand the deep learning-based policies in RL. Most existing methods explain why an agent takes individual actions rather than pinpointing the critical steps to its final reward. To fill this gap, we propose StateMask, a novel method to identify the states most critical to the agent's final reward. The high-level idea of StateMask is to learn a mask net that blinds a target agent and forces it to take random actions at some steps without compromising the agent's performance. Through careful design, we can theoretically ensure that the masked agent performs similarly to the original agent. We evaluate StateMask in various popular RL environments and show its superiority over existing explainers in explanation fidelity. We also show that StateMask has better utilities, such as launching adversarial attacks and patching policy errors.", "keywords": "deep reinforcement learning;interpretation;explanation", "primary_area": "", "supplementary_material": "/attachment/d21aed40768c0a4177fb9b115e154fc1d1db4e73.zip", "author": "Zelei Cheng;Xian Wu;Jiahao Yu;Wenhai Sun;Wenbo Guo;Xinyu Xing", "authorids": "~Zelei_Cheng1;~Xian_Wu8;~Jiahao_Yu1;~Wenhai_Sun1;~Wenbo_Guo1;~Xinyu_Xing3", "gender": ";M;M;;M;M", "homepage": ";https://nuwuxian.github.io/;https://sherdencooper.github.io/;;https://henrygwb.github.io/;http://xinyuxing.org/", "dblp": "258/0335;03/5595-7.html;238/6241-1;;144/1238-2.html;", "google_scholar": "https://scholar.google.com/citations?hl=en;ptWUm0EAAAAJ;mB4eowUAAAAJ;;KyPheRMAAAAJ;71rdofMAAAAJ", "orcid": "0000-0001-7478-933X;;;;;", "linkedin": ";;;;;", "or_profile": "~Zelei_Cheng1;~Xian_Wu8;~Jiahao_Yu1;~Wenhai_Sun1;~Wenbo_Guo1;~Xinyu_Xing3", "aff": "Northwestern University;Northwestern University;Northwestern University;;University of California, Berkeley;Northwestern University", "aff_domain": "northwestern.edu;northwestern.edu;northwestern.edu;;berkeley.edu;northwestern.edu", "position": "Researcher;PhD student;PhD student;;Postdoc;Associate Professor", "bibtex": "@inproceedings{\ncheng2023statemask,\ntitle={StateMask: Explaining Deep Reinforcement Learning through State Mask},\nauthor={Zelei Cheng and Xian Wu and Jiahao Yu and Wenhai Sun and Wenbo Guo and Xinyu Xing},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=pzc6LnUxYN}\n}", "github": "", "project": "", "reviewers": "AA7o;qciU;oH2u;ENqW", "pdf_size": 31928837, "rating": "6;6;6;6", "confidence": "4;4;4;3", "soundness": "3;2;3;3", "novelty": "3;2;3;2", "presentation": "3;2;3;4", "wc_summary": "67;53;103;105", "wc_strengths": "56;29;133;57", "wc_weaknesses": "64;215;247;65", "wc_questions": "43;110;12;100", "wc_limitations": "1;16;49;1", "wc_review": "231;423;544;328", "wc_reply_reviewers": "21;229;25;86", "wc_reply_authors": "49;1040;67;90", "reply_reviewers": "1;2;1;1", "reply_authors": "2;5;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 82.0, 22.561028345356956 ], "wc_strengths_avg": [ 68.75, 38.758063677124014 ], "wc_weaknesses_avg": [ 147.75, 84.01599550085686 ], "wc_questions_avg": [ 66.25, 40.42508503392417 ], "wc_limitations_avg": [ 16.75, 19.60070151805797 ], "wc_review_avg": [ 381.5, 115.80263382151547 ], "wc_reply_reviewers_avg": [ 90.25, 84.14682109265922 ], "wc_reply_authors_avg": [ 311.5, 420.85062670738654 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7774961599417450297&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "northwestern.edu;northwestern.edu;northwestern.edu;;berkeley.edu;northwestern.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Northwestern University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.northwestern.edu;https://www.berkeley.edu", "aff_unique_abbr": "NU;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "On the Consistency of Maximum Likelihood Estimation of Probabilistic Principal Component Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70385", "id": "q0RfX96un8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5b0c0b2c2efdd736a53688ebfdc3bcdb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=q0RfX96un8", "openreview": "https://openreview.net/forum?id=q0RfX96un8", "poster": "/media/PosterPDFs/NeurIPS%202023/70385.png?t=1699901853.477561", "slides": "https://nips.cc/virtual/2023/poster/70385", "video": "https://nips.cc/virtual/2023/poster/70385", "author_site": "Arghya Datta, Sayak Chakrabarty", "tldr": "", "abstract": "Probabilistic principal component analysis (PPCA) is currently one of the most used statistical tools to reduce the ambient dimension of the data. From multidimensional scaling to the imputation of missing data, PPCA has a broad spectrum of applications ranging from science and engineering to quantitative finance.\\\\\n\nDespite this wide applicability in various fields, hardly any theoretical guarantees exist to justify the soundness of the maximal likelihood (ML) solution for this model. In fact, it is well known that the maximum likelihood estimation (MLE) can only recover the true model parameters up to a rotation. The main obstruction is posed by the inherent identifiability nature of the PPCA model resulting from the rotational symmetry of the parameterization. To resolve this ambiguity, we propose a novel approach using quotient topological spaces and in particular, we show that the maximum likelihood solution is consistent in an appropriate quotient Euclidean space. Furthermore, our consistency results encompass a more general class of estimators beyond the MLE. Strong consistency of the ML estimate and consequently strong covariance estimation of the PPCA model have also been established under a compactness assumption.", "keywords": "maximum likelihood estimate;non-identifiability;Redner approach;quotient topological spaces;consistency", "primary_area": "", "supplementary_material": "", "author": "Arghya Datta;Sayak Chakrabarty", "authorids": "~Arghya_Datta1;~Sayak_Chakrabarty1", "gender": "M;M", "homepage": ";https://hellokayas.github.io/", "dblp": ";336/3841", "google_scholar": "https://scholar.google.ca/citations?user=yUJuOS4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-1860-397X;0009-0004-6179-389X", "linkedin": ";sayak-chakrabarty-cs/", "or_profile": "~Arghya_Datta1;~Sayak_Chakrabarty1", "aff": "Universit\u00e9 de Montr\u00e9al;Northwestern University", "aff_domain": "umontreal.ca;northwestern.edu", "position": "PhD student;PhD student", "bibtex": "@inproceedings{\ndatta2023on,\ntitle={On the Consistency of Maximum Likelihood Estimation of Probabilistic Principal Component Analysis},\nauthor={Arghya Datta and Sayak Chakrabarty},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=q0RfX96un8}\n}", "github": "", "project": "", "reviewers": "QWak;ZSYo;MhxU;gPBN", "pdf_size": 303272, "rating": "3;5;6;7", "confidence": "3;4;2;5", "soundness": "3;3;3;3", "novelty": "1;2;3;3", "presentation": "3;4;3;4", "wc_summary": "49;139;60;75", "wc_strengths": "17;16;20;60", "wc_weaknesses": "79;26;15;41", "wc_questions": "15;89;48;129", "wc_limitations": "2;16;1;8", "wc_review": "162;286;144;313", "wc_reply_reviewers": "90;0;0;29", "wc_reply_authors": "431;0;41;19", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 80.75, 34.873879910328306 ], "wc_strengths_avg": [ 28.25, 18.38987493160299 ], "wc_weaknesses_avg": [ 40.25, 24.200981385059574 ], "wc_questions_avg": [ 70.25, 42.868257487329714 ], "wc_limitations_avg": [ 6.75, 5.973901572674261 ], "wc_review_avg": [ 226.25, 74.14302057510201 ], "wc_reply_reviewers_avg": [ 29.75, 36.74489760497367 ], "wc_reply_authors_avg": [ 122.75, 178.55863882769717 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3779644730092272, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18376031589052984577&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "umontreal.ca;northwestern.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;Northwestern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.umontreal.ca;https://www.northwestern.edu", "aff_unique_abbr": "UdeM;NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;United States" }, { "title": "SPQR: Controlling Q-ensemble Independence with Spiked Random Model for Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70384", "id": "q0sdoFIfNg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cdcaf772b4f8eda0385d0930517de64a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=q0sdoFIfNg", "openreview": "https://openreview.net/forum?id=q0sdoFIfNg", "poster": "/media/PosterPDFs/NeurIPS%202023/70384.png?t=1701887061.4109771", "slides": "https://nips.cc/virtual/2023/poster/70384", "video": "https://nips.cc/virtual/2023/poster/70384", "author_site": "Dohyeok Lee, Seungyub Han, Taehyun Cho, Jungwoo Lee", "tldr": "", "abstract": "Alleviating overestimation bias is a critical challenge for deep reinforcement learning to achieve successful performance on more complex tasks or offline datasets containing out-of-distribution data. \nIn order to overcome overestimation bias, ensemble methods for Q-learning have been investigated to exploit the diversity of multiple Q-functions. \nSince network initialization has been the predominant approach to promote diversity in Q-functions, heuristically designed diversity injection methods have been studied in the literature. \nHowever, previous studies have not attempted to approach guaranteed independence over an ensemble from a theoretical perspective. \nBy introducing a novel regularization loss for Q-ensemble independence based on random matrix theory, we propose spiked Wishart Q-ensemble independence regularization (SPQR) for reinforcement learning. \nSpecifically, we modify the intractable hypothesis testing criterion for the Q-ensemble independence into a tractable KL divergence between the spectral distribution of the Q-ensemble and the target Wigner's semicircle distribution. \nWe implement SPQR in several online and offline ensemble Q-learning algorithms. \nIn the experiments, SPQR outperforms the baseline algorithms in both online and offline RL benchmarks.", "keywords": "Deep Reinforcement Learning;Ensemble Q-learning", "primary_area": "", "supplementary_material": "", "author": "Dohyeok Lee;Seungyub Han;Taehyun Cho;Jungwoo Lee", "authorids": "~Dohyeok_Lee1;~Seungyub_Han1;~Taehyun_Cho1;~Jungwoo_Lee1", "gender": ";M;M;M", "homepage": "https://dohyeoklee.github.io/;;;https://cml.snu.ac.kr", "dblp": "366/4271.html;347/8731;274/0287;34/516-1", "google_scholar": "RcFFkMYAAAAJ;ot1-XNAAAAAJ;https://scholar.google.com/citations?view_op=list_works;j98IWfoAAAAJ", "orcid": ";0009-0001-8704-8968;0000-0003-1047-9847;0000-0002-6804-980X", "linkedin": "dohyeoklee/;;;", "or_profile": "~Dohyeok_Lee1;~Seungyub_Han1;~Taehyun_Cho1;~Jungwoo_Lee1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "MS student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nlee2023spqr,\ntitle={{SPQR}: Controlling Q-ensemble Independence with Spiked Random Model for Reinforcement Learning},\nauthor={Dohyeok Lee and Seungyub Han and Taehyun Cho and Jungwoo Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=q0sdoFIfNg}\n}", "github": "", "project": "", "reviewers": "JH5V;ppMG;KBA2;ve3n;BeK9", "pdf_size": 8207703, "rating": "5;6;6;7;7", "confidence": "3;2;3;4;4", "soundness": "3;3;3;4;4", "novelty": "2;3;3;4;3", "presentation": "3;2;3;3;3", "wc_summary": "47;100;101;79;133", "wc_strengths": "48;85;152;19;190", "wc_weaknesses": "167;144;121;96;98", "wc_questions": "3;53;8;84;39", "wc_limitations": "18;1;1;19;32", "wc_review": "283;383;383;297;492", "wc_reply_reviewers": "11;21;45;12;16", "wc_reply_authors": "16;17;17;17;17", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 92.0, 28.35489375751565 ], "wc_strengths_avg": [ 98.8, 63.697409680457184 ], "wc_weaknesses_avg": [ 125.2, 27.24261367783936 ], "wc_questions_avg": [ 37.4, 29.883774861954773 ], "wc_limitations_avg": [ 14.2, 11.855800268223145 ], "wc_review_avg": [ 367.6, 74.95491978516154 ], "wc_reply_reviewers_avg": [ 21.0, 12.505998560690786 ], "wc_reply_authors_avg": [ 16.8, 0.39999999999999997 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6428571428571428, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2347929337731029853&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Learning Linear Causal Representations from Interventions under General Nonlinear Mixing", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70383", "id": "q131tA7HCT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8e5de4cb639ef718f44060dc257cb04f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=q131tA7HCT", "openreview": "https://openreview.net/forum?id=q131tA7HCT", "poster": "/media/PosterPDFs/NeurIPS%202023/70383.png?t=1701715143.865961", "slides": "https://nips.cc/virtual/2023/poster/70383", "video": "https://nips.cc/virtual/2023/poster/70383", "author_site": "Simon Buchholz, Goutham Rajendran, Elan Rosenfeld, Bryon Aragam, Bernhard Sch\u00f6lkopf, Pradeep Ravikumar", "tldr": "", "abstract": "We study the problem of learning causal representations from unknown, latent interventions in a general setting, where the latent distribution is Gaussian but the mixing function is completely general. We prove strong identifiability results given unknown single-node interventions, i.e., without having access to the intervention targets. This generalizes prior works which have focused on weaker classes, such as linear maps or paired counterfactual data. This is also the first instance of identifiability from non-paired interventions for deep neural network embeddings and general causal structures. Our proof relies on carefully uncovering the high-dimensional geometric structure present in the data distribution after a non-linear density transformation, which we capture by analyzing quadratic forms of precision matrices of the latent distributions. Finally, we propose a contrastive algorithm to identify the latent variables in practice and evaluate its performance on various tasks.", "keywords": "Causal Representation Learning;Interventional data;Gaussian Structural Causal models", "primary_area": "", "supplementary_material": "/attachment/39869436e8494110dc7f0246282a394ae8ef16ca.pdf", "author": "Simon Buchholz;Goutham Rajendran;Elan Rosenfeld;Bryon Aragam;Bernhard Sch\u00f6lkopf;Pradeep Kumar Ravikumar", "authorids": "~Simon_Buchholz1;~Goutham_Rajendran1;~Elan_Rosenfeld1;~Bryon_Aragam1;~Bernhard_Sch\u00f6lkopf1;~Pradeep_Kumar_Ravikumar1", "gender": ";M;M;;;M", "homepage": "https://www.is.mpg.de/person/sbuchholz;https://gouthamrdn.github.io/;;http://bryonaragam.com/;;http://www.cs.cmu.edu/~pradeepr/", "dblp": "207/9068;274/1323;236/4508;140/7564;;94/3594", "google_scholar": ";YVrGTe8AAAAJ;f0j0K8QAAAAJ;u-W3_9QAAAAJ;;https://scholar.google.com.tw/citations?user=Q4DTPw4AAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Simon_Buchholz1;~Goutham_Rajendran1;~Elan_Rosenfeld1;~Bryon_Aragam1;~Bernhard_Sch\u00f6lkopf1;~Pradeep_Kumar_Ravikumar1", "aff": "Max-Planck Institute;Carnegie Mellon University;Carnegie Mellon University;Booth School of Business;;Carnegie Mellon University", "aff_domain": "mpg.de;cmu.edu;andrew.cmu.edu;chicagobooth.edu;;cmu.edu", "position": "Postdoc;Postdoc;PhD student;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\nbuchholz2023learning,\ntitle={Learning Linear Causal Representations from Interventions under General Nonlinear Mixing},\nauthor={Simon Buchholz and Goutham Rajendran and Elan Rosenfeld and Bryon Aragam and Bernhard Sch{\\\"o}lkopf and Pradeep Kumar Ravikumar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=q131tA7HCT}\n}", "github": "", "project": "", "reviewers": "RqDB;LWnm;uV6M;2jDD", "pdf_size": 643231, "rating": "7;7;8;8", "confidence": "3;4;4;5", "soundness": "2;4;4;4", "novelty": "2;3;4;4", "presentation": "3;4;4;4", "wc_summary": "57;133;106;76", "wc_strengths": "26;59;114;135", "wc_weaknesses": "176;370;251;91", "wc_questions": "31;18;26;232", "wc_limitations": "5;25;41;10", "wc_review": "295;605;538;544", "wc_reply_reviewers": "66;336;28;57", "wc_reply_authors": "0;774;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.8660254037844386 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 93.0, 28.956864471140516 ], "wc_strengths_avg": [ 83.5, 43.26950427263987 ], "wc_weaknesses_avg": [ 222.0, 102.49634139812015 ], "wc_questions_avg": [ 76.75, 89.75348182661216 ], "wc_limitations_avg": [ 20.25, 14.060138690638865 ], "wc_review_avg": [ 495.5, 118.68972154319009 ], "wc_reply_reviewers_avg": [ 121.75, 124.49171659190823 ], "wc_reply_authors_avg": [ 193.5, 335.15183126457777 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13741908409976149849&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "email": "mpg.de;cmu.edu;andrew.cmu.edu;chicagobooth.edu;;cmu.edu", "author_num": 6, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.;Carnegie Mellon University;University of Chicago Booth School of Business", "aff_unique_dep": ";;Booth School of Business", "aff_unique_url": "https://www.mpg.de;https://www.cmu.edu;https://www.chicagobooth.edu", "aff_unique_abbr": "MPG;CMU;Booth", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Germany;United States" }, { "title": "Learning to Search Feasible and Infeasible Regions of Routing Problems with Flexible Neural k-Opt", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70382", "id": "q1JukwH2yP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9bae70d354793a95fa18751888cea07d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=q1JukwH2yP", "openreview": "https://openreview.net/forum?id=q1JukwH2yP", "poster": "/media/PosterPDFs/NeurIPS%202023/70382.png?t=1699455393.4515402", "slides": "https://nips.cc/virtual/2023/poster/70382", "video": "https://nips.cc/virtual/2023/poster/70382", "author_site": "Yining Ma, Zhiguang Cao, Yeow Meng Chee", "tldr": "", "abstract": "In this paper, we present Neural k-Opt (NeuOpt), a novel learning-to-search (L2S) solver for routing problems. It learns to perform flexible k-opt exchanges based on a tailored action factorization method and a customized recurrent dual-stream decoder. As a pioneering work to circumvent the pure feasibility masking scheme and enable the autonomous exploration of both feasible and infeasible regions, we then propose the Guided Infeasible Region Exploration (GIRE) scheme, which supplements the NeuOpt policy network with feasibility-related features and leverages reward shaping to steer reinforcement learning more effectively. Additionally, we equip NeuOpt with Dynamic Data Augmentation (D2A) for more diverse searches during inference. Extensive experiments on the Traveling Salesman Problem (TSP) and Capacitated Vehicle Routing Problem (CVRP) demonstrate that our NeuOpt not only significantly outstrips existing (masking-based) L2S solvers, but also showcases superiority over the learning-to-construct (L2C) and learning-to-predict (L2P) solvers. Notably, we offer fresh perspectives on how neural solvers can handle VRP constraints. Our code is available: https://github.com/yining043/NeuOpt.", "keywords": "learning to optimize;vehicle routing problem;combinatorial optimization", "primary_area": "", "supplementary_material": "", "author": "Yining Ma;Zhiguang Cao;Yeow Meng Chee", "authorids": "~Yining_Ma1;~Zhiguang_Cao1;~Yeow_Meng_Chee2", "gender": "M;M;M", "homepage": "https://yining043.github.io/;https://zhiguangcaosg.github.io/;", "dblp": "160/6245-1;178/8621;c/YeowMengChee.html", "google_scholar": "4_VyBTsAAAAJ;https://scholar.google.com.sg/citations?user=2R-cOkYAAAAJ;https://scholar.google.com.sg/citations?user=99AJNXEAAAAJ", "orcid": "0000-0002-6639-8547;0000-0002-4499-759X;0000-0001-7823-8068", "linkedin": "yiningma/;;", "or_profile": "~Yining_Ma1;~Zhiguang_Cao1;~Yeow_Meng_Chee2", "aff": "National University of Singapore;Institute for Infocomm Research, A*STAR;National University of Singapore", "aff_domain": "u.nus.edu;i2r.a-star.edu.sg;nus.edu.sg", "position": "PhD student;Scientist ;Full Professor", "bibtex": "@inproceedings{\nma2023learning,\ntitle={Learning to Search Feasible and Infeasible Regions of Routing Problems with Flexible Neural k-Opt},\nauthor={Yining Ma and Zhiguang Cao and Yeow Meng Chee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=q1JukwH2yP}\n}", "github": "", "project": "", "reviewers": "2dzf;vydT;NeSp;8kc7", "pdf_size": 2535924, "rating": "5;6;6;7", "confidence": "4;2;3;4", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "109;135;176;87", "wc_strengths": "59;71;143;122", "wc_weaknesses": "65;46;87;206", "wc_questions": "42;112;69;108", "wc_limitations": "52;12;2;20", "wc_review": "327;376;477;543", "wc_reply_reviewers": "277;12;24;55", "wc_reply_authors": "1327;28;62;33", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 126.75, 33.12382073372575 ], "wc_strengths_avg": [ 98.75, 34.816483165305485 ], "wc_weaknesses_avg": [ 101.0, 62.333778964538965 ], "wc_questions_avg": [ 82.75, 28.908260065247788 ], "wc_limitations_avg": [ 21.5, 18.728320800328042 ], "wc_review_avg": [ 430.75, 84.41082572750962 ], "wc_reply_reviewers_avg": [ 92.0, 107.95600955944973 ], "wc_reply_authors_avg": [ 362.5, 557.0056103846711 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3051444634716723703&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 9, "email": "u.nus.edu;i2r.a-star.edu.sg;nus.edu.sg", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "National University of Singapore;Institute for Infocomm Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.i2r.a-star.edu.sg", "aff_unique_abbr": "NUS;I2R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "id": "q1NaqDadKM", "title": "LVLM-eHub: A Comprehensive Evaluation Benchmark for Large Vision-Language Models", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Large Vision-Language Models (LVLM) have recently played a dominant role in multimodal vision-language learning. Despite the great success, it lacks a holistic evaluation of their efficacy. This paper presents a comprehensive evaluation of publicly available large multimodal models by building an LVLM evaluation Hub (LVLM-eHub). Our LVLM-eHub consists of $8$ representative LVLMs such as InstructBLIP and MiniGPT-4, which are thoroughly evaluated by a quantitative capability evaluation and an online arena platform. The former evaluates $6$ categories of multimodal capabilities of LVLMs such as visual question answering and embodied artificial intelligence on $40$ standard text-related visual benchmarks, while the latter provides the user-level evaluation of LVLMs in an open-world question-answering scenario. The study reveals several innovative findings. First, Instruction-tuned LVLM with massive in-domain data such as InstructBLIP heavily overfits many existing tasks, generalizing poorly in the open-world scenario. Second, Instruction-tuned LVLM with moderate instruction-following data may result in object hallucination issues (i.e., generate objects that are inconsistent with target images in the descriptions). It either makes the current evaluation metric such as CIDER for image captioning ineffective or generates wrong answers. Third, employing a multi-turn reasoning evaluation framework can mitigate the issue of object hallucination, shedding light on developing an effective metric for LVLM evaluation. The findings provide a foundational framework for the conception and assessment of innovative strategies aimed at enhancing zero-shot multimodal techniques. The evaluation pipeline will be available at [vlarena page](https://github.com/OpenGVLab/Multi-Modality-Arena/tree/main/LVLM_evaluation).", "keywords": "Multi-modality Evaluation; Large Visual-Language Models", "primary_area": "", "supplementary_material": "/attachment/b67ff35b21edaf4630ca1078ce131fa5139b299c.pdf", "author": "Peng Xu;Wenqi Shao;Kaipeng Zhang;Peng Gao;Shuo Liu;Fanqing Meng;Siyuan Huang;Meng Lei;Ping Luo;Yu Qiao", "authorids": "~Peng_Xu11;~Wenqi_Shao2;~Kaipeng_Zhang1;~Peng_Gao3;~Shuo_Liu5;~Fanqing_Meng1;~Siyuan_Huang4;~Meng_Lei1;~Ping_Luo2;~Yu_Qiao1", "gender": "M;M;M;;F;M;M;;;", "homepage": ";https://wqshao126.github.io/;http://kpzhang93.github.io/;;;https://github.com/FanqingM;https://siyuanhuang95.github.io/;https://www.https.com;;", "dblp": ";227/3122;179/2126;;07/6773;;62/885-4.html;;;", "google_scholar": ";Bs9mrwwAAAAJ;4OqZBmYAAAAJ;;https://scholar.google.com.tw/citations?hl=zh-CN;iUIC-JEAAAAJ;QNkS4KEAAAAJ;MBpW398AAAAJ;;", "orcid": ";;;;;0000-0002-0920-3539;0009-0005-6363-833X;0000-0001-8368-3211;;", "linkedin": "https://www.linkedin.cn/incareer/in/peng-xu-250466206;;;;;;siyuan-huang-979672149/;;;", "or_profile": "~Peng_Xu11;~Wenqi_Shao2;~Kaipeng_Zhang1;~Peng_Gao3;~Shuo_Liu5;~Fanqing_Meng1;~Siyuan_Huang4;~Meng_Lei1;~Ping_Luo2;~Yu_Qiao1", "aff": "University of Hong Kong;Shanghai AI Laboratory;Shanghai AI Laboratory;;Shanghai AI lab;Tongji University;Shanghai Jiaotong University;Peking University;;", "aff_domain": "hku.hk;pjlab.org.cn;pjlab.org.cn;;pjlab.org;tongji.edu.cn;sjtu.edu.cn;pku.edu.cn;;", "position": "PhD student;Researcher;Researcher;;Researcher;Undergrad student;PhD student;MS student;;", "bibtex": "@misc{\nxu2023lvlmehub,\ntitle={{LVLM}-eHub: A Comprehensive Evaluation Benchmark for Large Vision-Language Models},\nauthor={Peng Xu and Wenqi Shao and Kaipeng Zhang and Peng Gao and Shuo Liu and Fanqing Meng and Siyuan Huang and Meng Lei and Ping Luo and Yu Qiao},\nyear={2023},\nurl={https://openreview.net/forum?id=q1NaqDadKM}\n}", "github": "", "project": "", "reviewers": "r23C;Z9KL;5Q2T;ayea;j6vp", "site": "https://openreview.net/forum?id=q1NaqDadKM", "pdf_size": 4747670, "rating": "5;6;6;8;8", "confidence": "4;3;4;4;4", "wc_summary_and_contributions": "33;66;47;85;41", "wc_strengths": "31;96;26;81;49", "wc_improvement": "110;692;45;224;54", "wc_limitations": "29;24;113;36;74", "wc_correctness": "5;77;17;4;1", "wc_clarity": "8;77;1;11;136", "wc_relation_to_prior_work": "5;8;1;1;1", "wc_documentation": "8;27;1;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "230;1068;252;444;358", "wc_reply_reviewers": "0;367;0;0;29", "wc_reply_authors": "998;3913;561;658;812", "reply_reviewers": "0;1;0;0;1", "reply_authors": "2;6;1;1;2", "rating_avg": [ 6.6, 1.2 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 54.4, 18.778711350888802 ], "wc_strengths_avg": [ 56.6, 27.55793896502422 ], "wc_improvement_avg": [ 225.0, 242.0561918233037 ], "wc_limitations_avg": [ 55.2, 33.83134641127958 ], "wc_correctness_avg": [ 20.8, 28.624465060503752 ], "wc_clarity_avg": [ 46.6, 52.446544214085264 ], "wc_relation_to_prior_work_avg": [ 3.2, 2.85657137141714 ], "wc_documentation_avg": [ 7.6, 10.071742649611338 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 470.4, 308.5278593579517 ], "wc_reply_reviewers_avg": [ 79.2, 144.33765967341995 ], "wc_reply_authors_avg": [ 1388.4, 1270.9310917591085 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 1.8547236990991407 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.24999999999999997, "gs_citation": 207, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5567829722392728085&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;1;2;3;4;5", "aff_unique_norm": "University of Hong Kong;Shanghai AI Laboratory;Shanghai AI Lab;Tongji University;Shanghai Jiao Tong University;Peking University", "aff_unique_dep": ";;AI Research;;;", "aff_unique_url": "https://www.hku.hk;https://www.shanghai-ai-lab.com;https://www.shanghaiailab.com;https://www.tongji.edu.cn;https://www.sjtu.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "HKU;SAIL;Shanghai AI Lab;Tongji;SJTU;Peking U", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "IMP-MARL: a Suite of Environments for Large-scale Infrastructure Management Planning via MARL", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73459", "id": "q3FJk2Nvkk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a7a7c0c92f195cce85f99768621ac6c0-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=q3FJk2Nvkk", "openreview": "https://openreview.net/forum?id=q3FJk2Nvkk", "poster": "/media/PosterPDFs/NeurIPS%202023/73459.png?t=1700475721.9568927", "slides": "https://nips.cc/virtual/2023/poster/73459", "video": "https://nips.cc/virtual/2023/poster/73459", "author_site": "Pascal Leroy, Pablo G. Morato, Jonathan Pisane, Athanasios Kolios, Damien Ernst", "tldr": "", "abstract": "We introduce IMP-MARL, an open-source suite of multi-agent reinforcement learning (MARL) environments for large-scale Infrastructure Management Planning (IMP), offering a platform for benchmarking the scalability of cooperative MARL methods in real-world engineering applications.\nIn IMP, a multi-component engineering system is subject to a risk of failure due to its components' damage condition.\nSpecifically, each agent plans inspections and repairs for a specific system component, aiming to minimise maintenance costs while cooperating to minimise system failure risk.\nWith IMP-MARL, we release several environments including one related to offshore wind structural systems, in an effort to meet today's needs to improve management strategies to support sustainable and reliable energy systems.\nSupported by IMP practical engineering environments featuring up to 100 agents, we conduct a benchmark campaign, where the scalability and performance of state-of-the-art cooperative MARL methods are compared against expert-based heuristic policies. \nThe results reveal that centralised training with decentralised execution methods scale better with the number of agents than fully centralised or decentralised RL approaches, while also outperforming expert-based heuristic policies in most IMP environments.\nBased on our findings, we additionally outline remaining cooperation and scalability challenges that future MARL methods should still address.\nThrough IMP-MARL, we encourage the implementation of new environments and the further development of MARL methods.", "keywords": "cooperative multi-agent reinforcement learning;infrastructure management planning;centralised training decentralised execution;benchmark", "primary_area": "", "supplementary_material": "/attachment/7dc1a51d07a836c26dce5e4c10ddc579e8b82f7b.pdf", "author": "Pascal Leroy;Pablo G. Morato;Jonathan Pisane;Athanasios Kolios;Damien Ernst", "authorids": "~Pascal_Leroy1;~Pablo_G._Morato1;~Jonathan_Pisane1;~Athanasios_Kolios1;~Damien_Ernst1", "gender": "M;M;M;M;M", "homepage": "https://paleroy.github.io/;https://moratodpg.github.io/;;https://orbit.dtu.dk/en/persons/athanasios-kolios;http://www.damien-ernst.be", "dblp": ";281/2561;;;", "google_scholar": "https://scholar.google.be/citations?user=5v2aNJkAAAAJ;-px7BBgAAAAJ;3gp_OsAAAAAJ;;https://scholar.google.be/citations?user=91ZxYSsAAAAJ", "orcid": "0009-0006-2418-8273;0000-0002-2744-0650;;;", "linkedin": "pascal-leroy-1b1a1613a/;pablomorato;jpisane/;;", "or_profile": "~Pascal_Leroy1;~Pablo_G._Morato1;~Jonathan_Pisane1;~Athanasios_Kolios1;~Damien_Ernst1", "aff": "University of Li\u00e8ge;University of Li\u00e8ge;Thales;Technical University of Denmark;University of Li\u00e8ge", "aff_domain": "uliege.be;uliege.be;thalesgroup.com;dtu.dk;uliege.be", "position": "PhD student;Postdoc;Principal Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nleroy2023impmarl,\ntitle={{IMP}-{MARL}: a Suite of Environments for Large-scale Infrastructure Management Planning via {MARL}},\nauthor={Pascal Leroy and Pablo G. Morato and Jonathan Pisane and Athanasios Kolios and Damien Ernst},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=q3FJk2Nvkk}\n}", "github": "", "project": "", "reviewers": "y9Yt;FPDs;3EU2;quZJ", "pdf_size": 4431446, "rating": "4;8;8;9", "confidence": "4;3;4;4", "wc_summary_and_contributions": "34;71;68;40", "wc_strengths": "88;115;108;26", "wc_improvement": "63;89;152;28", "wc_limitations": "2;210;10;22", "wc_correctness": "1;45;10;8", "wc_clarity": "1;15;21;6", "wc_relation_to_prior_work": "1;11;46;13", "wc_documentation": "1;41;11;9", "wc_additional_feedback": "1;1;1;1", "wc_review": "192;598;427;153", "wc_reply_reviewers": "0;27;16;0", "wc_reply_authors": "407;756;683;285", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 7.25, 1.920286436967152 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 53.25, 16.422164899914993 ], "wc_strengths_avg": [ 84.25, 35.05977039285911 ], "wc_improvement_avg": [ 83.0, 45.33762234612662 ], "wc_limitations_avg": [ 61.0, 86.31917515824627 ], "wc_correctness_avg": [ 16.0, 17.073371078963874 ], "wc_clarity_avg": [ 10.75, 7.75806032459145 ], "wc_relation_to_prior_work_avg": [ 17.75, 16.931848688197046 ], "wc_documentation_avg": [ 15.5, 15.190457530963313 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 342.5, 180.95648648224798 ], "wc_reply_reviewers_avg": [ 10.75, 11.431863365173676 ], "wc_reply_authors_avg": [ 532.75, 193.3964516220502 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.22549380840084865, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13504183353405499435&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "uliege.be;uliege.be;thalesgroup.com;dtu.dk;uliege.be", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of Li\u00e8ge;Thales Group;Technical University of Denmark", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ulg.ac.be;https://www.thalesgroup.com;https://www.tek.dk", "aff_unique_abbr": "ULi\u00e8ge;Thales;DTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "Belgium;France;Denmark" }, { "title": "Brain Dissection: fMRI-trained Networks Reveal Spatial Selectivity in the Processing of Natural Images", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70381", "id": "q3fA5tTod3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/90e06fe49254204248cb12562528b952-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=q3fA5tTod3", "openreview": "https://openreview.net/forum?id=q3fA5tTod3", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70381", "video": "https://nips.cc/virtual/2023/poster/70381", "author_site": "Gabriel Sarch, Michael Tarr, Katerina Fragkiadaki, Leila Wehbe", "tldr": "", "abstract": "The alignment between deep neural network (DNN) features and cortical responses currently provides the most accurate quantitative explanation for higher visual areas. At the same time, these model features have been critiqued as uninterpretable explanations, trading one black box (the human brain) for another (a neural network). In this paper, we train networks to directly predict, from scratch, brain responses to images from a large-scale dataset of natural scenes (Allen et. al., 2021). We then use \"network dissection\" (Bau et. al., 2017), an explainable AI technique used for enhancing neural network interpretability by identifying and localizing the most significant features in images for individual units of a trained network, and which has been used to study category selectivity in the human brain (Khosla & Wehbe, 2022). We adapt this approach to create a hypothesis-neutral model that is then used to explore the tuning properties of specific visual regions beyond category selectivity, which we call \"brain dissection\". We use brain dissection to examine a range of ecologically important, intermediate properties, including depth, surface normals, curvature, and object relations across sub-regions of the parietal, lateral, and ventral visual streams, and scene-selective regions. Our findings reveal distinct preferences in brain regions for interpreting visual scenes, with ventro-lateral areas favoring closer and curvier features, medial and parietal areas opting for more varied and flatter 3D elements, and the parietal region uniquely preferring spatial relations. Scene-selective regions exhibit varied preferences, as the retrosplenial complex prefers distant and outdoor features, while the occipital and parahippocampal place areas favor proximity, verticality, and in the case of the OPA, indoor elements. Such findings show the potential of using explainable AI to uncover spatial feature selectivity across the visual cortex, contributing to a deeper, more fine-grained understanding of the functional characteristics of human visual cortex when viewing natural scenes.", "keywords": "Computational Neuroscience;Deep Neural Networks;Visual Neuroscience;Visual Streams;Scene Perception;Brain Imaging", "primary_area": "", "supplementary_material": "", "author": "Gabriel Herbert Sarch;Michael J. Tarr;Katerina Fragkiadaki;Leila Wehbe", "authorids": "~Gabriel_Herbert_Sarch1;~Michael_J._Tarr1;~Katerina_Fragkiadaki1;~Leila_Wehbe1", "gender": "M;F;F;M", "homepage": "https://gabesarch.me/;https://www.cs.cmu.edu/~katef/;http://www.cs.cmu.edu/~lwehbe/;https://tarrlab.org", "dblp": "280/0151;21/8780;125/4359;36/1880", "google_scholar": "9rYWAhsAAAAJ;FWp7728AAAAJ;YezyUawAAAAJ;O8ALPlkAAAAJ", "orcid": ";;0000-0001-8545-2062;0000-0003-4724-1744", "linkedin": ";;;michael-tarr-ab078046/", "or_profile": "~Gabriel_Herbert_Sarch1;~Katerina_Fragkiadaki1;~Leila_Wehbe1;~Michael_Tarr1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nsarch2023brain,\ntitle={Brain Dissection: f{MRI}-trained Networks Reveal Spatial Selectivity in the Processing of Natural Images},\nauthor={Gabriel Herbert Sarch and Michael J. Tarr and Katerina Fragkiadaki and Leila Wehbe},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=q3fA5tTod3}\n}", "github": "", "project": "", "reviewers": "c89B;HWnv;MstC;9bSv", "pdf_size": 49244931, "rating": "5;5;6;6", "confidence": "4;4;4;5", "soundness": "3;3;4;3", "novelty": "2;2;3;3", "presentation": "4;4;3;3", "wc_summary": "57;47;67;101", "wc_strengths": "32;25;187;23", "wc_weaknesses": "117;97;8;23", "wc_questions": "108;108;77;299", "wc_limitations": "95;7;33;14", "wc_review": "409;284;372;460", "wc_reply_reviewers": "55;7;26;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 68.0, 20.322401432901575 ], "wc_strengths_avg": [ 66.75, 69.50674427708437 ], "wc_weaknesses_avg": [ 61.25, 46.59600304747179 ], "wc_questions_avg": [ 148.0, 88.09370011527498 ], "wc_limitations_avg": [ 37.25, 34.67257561820293 ], "wc_review_avg": [ 381.25, 64.25486362914484 ], "wc_reply_reviewers_avg": [ 25.5, 18.33712082089225 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3844397718091979107&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "cmu.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient Data Subset Selection to Generalize Training Across Models: Transductive and Inductive Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70380", "id": "q3fCWoC9l0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0f25eb6e9dc26c933a5d7516abf1eb8c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=q3fCWoC9l0", "openreview": "https://openreview.net/forum?id=q3fCWoC9l0", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70380", "video": "https://nips.cc/virtual/2023/poster/70380", "author_site": "Eeshaan Jain, Tushar Nandy, Gaurav Aggarwal, Ashish Tendulkar, Rishabh Iyer, Abir De", "tldr": "", "abstract": "Existing subset selection methods for efficient learning predominantly employ discrete combinatorial and model-specific approaches, which lack generalizability--- for each new model, the algorithm has to be executed from the beginning. Therefore, for an unseen architecture, one cannot use the subset chosen for a different model. In this work, we propose $\\texttt{SubSelNet}$, a non-adaptive subset selection framework, which tackles these problems. Here, we first introduce an attention-based neural gadget that leverages the graph structure of architectures and acts as a surrogate to trained deep neural networks for quick model prediction. Then, we use these predictions to build subset samplers. This naturally provides us two variants of $\\texttt{SubSelNet}$. The first variant is transductive (called Transductive-$\\texttt{SubSelNet}$), which computes the subset separately for each model by solving a small optimization problem. Such an optimization is still super fast, thanks to the replacement of explicit model training by the model approximator. The second variant is inductive (called Inductive-$\\texttt{SubSelNet}$), which computes the subset using a trained subset selector, without any optimization. \nOur experiments show that our model outperforms several methods across several real datasets.", "keywords": "Data Subset Selection;Efficient Learning", "primary_area": "", "supplementary_material": "", "author": "Eeshaan Jain;Tushar Nandy;Gaurav Aggarwal;Ashish V. Tendulkar;Rishabh K Iyer;Abir De", "authorids": "~Eeshaan_Jain1;~Tushar_Nandy1;~Gaurav_Aggarwal4;~Ashish_V._Tendulkar1;~Rishabh_K_Iyer2;~Abir_De1", "gender": "M;M;;;M;M", "homepage": "https://eeshaanjain.github.io;;;;https://www.rishiyer.com;", "dblp": ";;14/5218;08/1521;37/10544.html;118/7174", "google_scholar": "r5rqqJEAAAAJ;;https://scholar.google.co.in/citations?user=9XiIwDQAAAAJ;;l_XxJ1kAAAAJ;https://scholar.google.co.in/citations?user=_9ZKKbIAAAAJ", "orcid": ";;;;;", "linkedin": "eeshaanjain/;tushar-nandy/;;;rishabh-iyer-36893717/;", "or_profile": "~Eeshaan_Jain1;~Tushar_Nandy1;~Gaurav_Aggarwal4;~Ashish_V._Tendulkar1;~Rishabh_K_Iyer2;~Abir_De1", "aff": "Indian Institute of Technology, Bombay;Indian Institute of Technology, Bombay;Google;Google;Microsoft;Indian Institute of Technology Bombay,", "aff_domain": "iitb.ac.in;iitb.ac.in;google.com;google.com;microsoft.com;iitb.ac.in", "position": "Undergrad student;Undergrad student;Researcher;Researcher;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\njain2023efficient,\ntitle={Efficient Data Subset Selection to Generalize Training Across Models: Transductive and Inductive Networks},\nauthor={Eeshaan Jain and Tushar Nandy and Gaurav Aggarwal and Ashish V. Tendulkar and Rishabh K Iyer and Abir De},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=q3fCWoC9l0}\n}", "github": "", "project": "", "reviewers": "X3mh;rt8F;U6Yc;VPxr;KRgz;awrD", "pdf_size": 1746826, "rating": "5;5;6;6;6;9", "confidence": "3;3;2;3;4;2", "soundness": "2;2;3;3;2;4", "novelty": "3;3;3;2;3;4", "presentation": "1;3;3;3;3;3", "wc_summary": "17;41;75;148;188;180", "wc_strengths": "46;167;56;76;200;74", "wc_weaknesses": "167;316;117;796;196;50", "wc_questions": "18;423;2;53;35;217", "wc_limitations": "14;28;35;9;27;9", "wc_review": "262;975;285;1082;646;530", "wc_reply_reviewers": "423;63;13;253;69;15", "wc_reply_authors": "765;192;0;965;0;0", "reply_reviewers": "2;1;1;2;1;1", "reply_authors": "3;2;1;3;1;1", "rating_avg": [ 6.166666666666667, 1.3437096247164249 ], "confidence_avg": [ 2.8333333333333335, 0.6871842709362768 ], "soundness_avg": [ 2.6666666666666665, 0.7453559924999298 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 2.6666666666666665, 0.7453559924999298 ], "wc_summary_avg": [ 108.16666666666667, 67.13522837444502 ], "wc_strengths_avg": [ 103.16666666666667, 58.49905032467527 ], "wc_weaknesses_avg": [ 273.6666666666667, 247.18593990939064 ], "wc_questions_avg": [ 124.66666666666667, 151.17833472058385 ], "wc_limitations_avg": [ 20.333333333333332, 10.126971687308874 ], "wc_review_avg": [ 630.0, 313.05856747047613 ], "wc_reply_reviewers_avg": [ 139.33333333333334, 150.32150729538188 ], "wc_reply_authors_avg": [ 320.3333333333333, 395.3128156564397 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.8333333333333333, 0.8975274678557508 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5114083119567587, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=304134140646087627&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "iitb.ac.in;iitb.ac.in;google.com;google.com;microsoft.com;iitb.ac.in", "author_num": 6, "aff_unique_index": "0;0;1;1;2;0", "aff_unique_norm": "Indian Institute of Technology Bombay;Google;Microsoft", "aff_unique_dep": ";Google;Microsoft Corporation", "aff_unique_url": "https://www.iitb.ac.in;https://www.google.com;https://www.microsoft.com", "aff_unique_abbr": "IIT Bombay;Google;Microsoft", "aff_campus_unique_index": "0;0;1;1;0", "aff_campus_unique": "Bombay;Mountain View;", "aff_country_unique_index": "0;0;1;1;1;0", "aff_country_unique": "India;United States" }, { "title": "Discriminative Feature Attributions: Bridging Post Hoc Explainability and Inherent Interpretability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70379", "id": "q4HlFS7B7Y", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/89beb2a345269f3f9afe48cee35403aa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=q4HlFS7B7Y", "openreview": "https://openreview.net/forum?id=q4HlFS7B7Y", "poster": "/media/PosterPDFs/NeurIPS%202023/70379.png?t=1701797659.8398728", "slides": "https://nips.cc/virtual/2023/poster/70379", "video": "https://nips.cc/virtual/2023/poster/70379", "author_site": "Usha Bhalla, Suraj Srinivas, Himabindu Lakkaraju", "tldr": "", "abstract": "With the increased deployment of machine learning models in various real-world applications, researchers and practitioners alike have emphasized the need for explanations of model behaviour. To this end, two broad strategies have been outlined in prior literature to explain models. Post hoc explanation methods explain the behaviour of complex black-box models by identifying features critical to model predictions; however, prior work has shown that these explanations may not be faithful, in that they incorrectly attribute high importance to features that are unimportant or non-discriminative for the underlying task. Inherently interpretable models, on the other hand, circumvent these issues by explicitly encoding explanations into model architecture, meaning their explanations are naturally faithful, but they often exhibit poor predictive performance due to their limited expressive power. In this work, we identify a key reason for the lack of faithfulness of feature attributions: the lack of robustness of the underlying black-box models, especially the erasure of unimportant distractor features in the input. To address this issue, we propose Distractor Erasure Tuning (DiET), a method that adapts black-box models to be robust to distractor erasure, thus providing discriminative and faithful attributions. This strategy naturally combines the ease-of-use of post hoc explanations with the faithfulness of inherently interpretable models. We perform extensive experiments on semi-synthetic and real-world datasets, and show that DiET produces models that (1) closely approximate the original black-box models they are intended to explain, and (2) yield explanations that match approximate ground truths available by construction.", "keywords": "Machine Learning Explainability;Machine Learning Interpretability", "primary_area": "", "supplementary_material": "", "author": "Usha Bhalla;Suraj Srinivas;Himabindu Lakkaraju", "authorids": "~Usha_Bhalla1;~Suraj_Srinivas1;~Himabindu_Lakkaraju1", "gender": ";M;", "homepage": ";https://suraj-srinivas.github.io/;", "dblp": ";144/0584;", "google_scholar": ";https://scholar.google.co.in/citations?user=J2JWgKgAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Usha_Bhalla1;~Suraj_Srinivas1;~Himabindu_Lakkaraju1", "aff": ";School of Engineering and Applied Sciences, Harvard University;", "aff_domain": ";seas.harvard.edu;", "position": ";Postdoc;", "bibtex": "@inproceedings{\nbhalla2023discriminative,\ntitle={Discriminative Feature Attributions: Bridging Post Hoc Explainability and Inherent Interpretability},\nauthor={Usha Bhalla and Suraj Srinivas and Himabindu Lakkaraju},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=q4HlFS7B7Y}\n}", "github": "", "project": "", "reviewers": "2ozo;qGoL;QRQ5;dxrg", "pdf_size": 1214250, "rating": "3;5;5;7", "confidence": "5;4;4;4", "soundness": "2;2;2;3", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "41;158;158;128", "wc_strengths": "28;78;48;129", "wc_weaknesses": "565;158;164;305", "wc_questions": "3;214;253;35", "wc_limitations": "11;5;11;29", "wc_review": "648;613;634;626", "wc_reply_reviewers": "88;532;445;44", "wc_reply_authors": "16;945;1471;0", "reply_reviewers": "1;1;2;1", "reply_authors": "2;3;3;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 121.25, 47.92376758978784 ], "wc_strengths_avg": [ 70.75, 38.04848880047669 ], "wc_weaknesses_avg": [ 298.0, 164.99545448284326 ], "wc_questions_avg": [ 126.25, 108.72298515033516 ], "wc_limitations_avg": [ 14.0, 9.0 ], "wc_review_avg": [ 630.25, 12.695963925594622 ], "wc_reply_reviewers_avg": [ 277.25, 214.04365792987187 ], "wc_reply_authors_avg": [ 608.0, 628.1850841909572 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15777621156131520626&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";seas.harvard.edu;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "School of Engineering and Applied Sciences", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "rPPG-Toolbox: Deep Remote PPG Toolbox", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73458", "id": "q4XNX15kSe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d7d0d548a6317407e02230f15ce75817-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=q4XNX15kSe", "openreview": "https://openreview.net/forum?id=q4XNX15kSe", "poster": "/media/PosterPDFs/NeurIPS%202023/73458.png?t=1702170924.7002192", "slides": "https://nips.cc/virtual/2023/poster/73458", "video": "https://nips.cc/virtual/2023/poster/73458", "author_site": "Xin Liu, Girish Narayanswamy, Akshay Paruchuri, Xiaoyu Zhang, Jiankai Tang, Yuzhe Zhang, Roni Sengupta, Shwetak Patel, Yuntao Wang, Daniel McDuff", "tldr": "", "abstract": "Camera-based physiological measurement is a fast growing field of computer vision. Remote photoplethysmography (rPPG) utilizes imaging devices (e.g., cameras) to measure the peripheral blood volume pulse (BVP) via photoplethysmography, and enables cardiac measurement via webcams and smartphones. However, the task is non-trivial with important pre-processing, modeling and post-processing steps required to obtain state-of-the-art results. Replication of results and benchmarking of new models is critical for scientific progress; however, as with many other applications of deep learning, reliable codebases are not easy to find or use. We present a comprehensive toolbox, rPPG-Toolbox, unsupervised and supervised rPPG models with support for public benchmark datasets, data augmentation and systematic evaluation: https://github.com/ubicomplab/rPPG-Toolbox.", "keywords": "rPPG;Computer Vision;Physiological Sensing;Mobile Health;Machine Learning for Healthcare", "primary_area": "", "supplementary_material": "", "author": "Xin Liu;Girish Narayanswamy;Akshay Paruchuri;Xiaoyu Zhang;Jiankai Tang;Yuzhe Zhang;Roni Sengupta;Shwetak Patel;Yuntao Wang;Daniel McDuff", "authorids": "~Xin_Liu8;~Girish_Narayanswamy1;~Akshay_Paruchuri1;~Xiaoyu_Zhang11;~Jiankai_Tang1;~Yuzhe_Zhang4;~Roni_Sengupta1;~Shwetak_Patel1;~Yuntao_Wang1;~Daniel_McDuff1", "gender": "M;M;M;F;M;M;F;M;M;M", "homepage": "https://homes.cs.washington.edu/~xliu0/;https://girishvn.github.io/;https://www.cs.unc.edu/~akshaypa/;https://xiaoyu-sz.github.io/;https://github.com/McJackTang;https://github.com/zyzzzz-123;https://www.cs.unc.edu/~ronisen/;http://abstract.cs.washington.edu/~shwetak/;https://pi.cs.tsinghua.edu.cn/lab/people/YuntaoWang/;http://alumni.media.mit.edu/~djmcduff/", "dblp": "76/1820-61;;316/6004;;;;54/10603;p/ShwetakNPatel;52/4107-1.html;63/9606", "google_scholar": "p9F83HoAAAAJ;uNbzzSgAAAAJ;https://scholar.google.com/citations?hl=en;rTuQ8tgAAAAJ;_jENFHIAAAAJ;;Id8SJl8AAAAJ;https://scholar.google.com.tw/citations?user=z4S5rC0AAAAJ;kHpwoAUAAAAJ;m7Jr-b4AAAAJ", "orcid": ";0000-0001-5647-3577;0000-0003-4664-3186;0000-0002-0985-6636;0009-0009-5388-4552;;0009-0001-5357-0563;;0000-0002-4249-8893;", "linkedin": ";;akshayparuchuri/;%E6%BD%87%E5%AE%87-%E5%BC%A0-038a23277/;jack-tang-941879223/;;;;;", "or_profile": "~Xin_Liu8;~Girish_Narayanswamy1;~Akshay_Paruchuri1;~Xiaoyu_Zhang11;~Jiankai_Tang1;~Yuzhe_Zhang4;~Roni_Sengupta1;~Shwetak_Patel1;~Yuntao_Wang1;~Daniel_McDuff1", "aff": "Department of Computer Science, University of Washington;University of Washington;Kitware;Tsinghua University;Tsinghua University;Shanghai Jiaotong University;University of North Carolina at Chapel Hill;University of Washington;Tsinghua University;Google", "aff_domain": "cs.washington.edu;uw.edu;kitware.com;tsinghua.edu.cn;tsinghua.edu.cn;sjtu.edu.cn;cs.unc.edu;u.washington.edu;tsinghua.edu.cn;google.com", "position": "PhD student;PhD student;Intern;PhD student;Undergrad student;Undergrad student;Assistant Professor;Full Professor;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nliu2023rppgtoolbox,\ntitle={r{PPG}-Toolbox: Deep Remote {PPG} Toolbox},\nauthor={Xin Liu and Girish Narayanswamy and Akshay Paruchuri and Xiaoyu Zhang and Jiankai Tang and Yuzhe Zhang and Roni Sengupta and Shwetak Patel and Yuntao Wang and Daniel McDuff},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=q4XNX15kSe}\n}", "github": "", "project": "", "reviewers": "zpiW;fuy4;fJMT;Up8R", "pdf_size": 1754124, "rating": "5;6;8;8", "confidence": "2;3;4;3", "wc_summary_and_contributions": "73;28;68;65", "wc_strengths": "78;36;76;174", "wc_improvement": "72;126;393;183", "wc_limitations": "12;15;239;49", "wc_correctness": "8;11;473;32", "wc_clarity": "10;10;8;19", "wc_relation_to_prior_work": "14;1;24;67", "wc_documentation": "17;32;19;35", "wc_additional_feedback": "1;1;1;1", "wc_review": "285;260;1301;625", "wc_reply_reviewers": "0;71;140;0", "wc_reply_authors": "552;951;1145;711", "reply_reviewers": "0;1;1;0", "reply_authors": "3;4;5;3", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 58.5, 17.839562774911272 ], "wc_strengths_avg": [ 91.0, 50.76416058598822 ], "wc_improvement_avg": [ 193.5, 121.68504427414241 ], "wc_limitations_avg": [ 78.75, 93.65461814560989 ], "wc_correctness_avg": [ 131.0, 197.67017984511472 ], "wc_clarity_avg": [ 11.75, 4.264680527307995 ], "wc_relation_to_prior_work_avg": [ 26.5, 24.763884994079586 ], "wc_documentation_avg": [ 25.75, 7.854139036202504 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 617.75, 419.997247014787 ], "wc_reply_reviewers_avg": [ 52.75, 58.11787590750371 ], "wc_reply_authors_avg": [ 839.75, 226.34638830783229 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 78, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11800859537406070637&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cs.washington.edu;uw.edu;kitware.com;tsinghua.edu.cn;tsinghua.edu.cn;sjtu.edu.cn;cs.unc.edu;u.washington.edu;tsinghua.edu.cn;google.com", "author_num": 10, "aff_unique_index": "0;0;1;2;2;3;4;0;2;5", "aff_unique_norm": "University of Washington;Kitware;Tsinghua University;Shanghai Jiao Tong University;University of North Carolina;Google", "aff_unique_dep": "Department of Computer Science;;;;;Google", "aff_unique_url": "https://www.washington.edu;https://www.kitware.com;https://www.tsinghua.edu.cn;https://www.sjtu.edu.cn;https://www.unc.edu;https://www.google.com", "aff_unique_abbr": "UW;Kitware;THU;SJTU;UNC;Google", "aff_campus_unique_index": "0;2;3", "aff_campus_unique": "Seattle;;Chapel Hill;Mountain View", "aff_country_unique_index": "0;0;0;1;1;1;0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Diff-Foley: Synchronized Video-to-Audio Synthesis with Latent Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70378", "id": "q5FAZAIooz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/98c50f47a37f63477c01558600dd225a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=q5FAZAIooz", "openreview": "https://openreview.net/forum?id=q5FAZAIooz", "poster": "/media/PosterPDFs/NeurIPS%202023/70378.png?t=1701943727.9210045", "slides": "https://nips.cc/virtual/2023/poster/70378", "video": "https://nips.cc/virtual/2023/poster/70378", "author_site": "Simian Luo, Chuanhao Yan, Chenxu Hu, Hang Zhao", "tldr": "", "abstract": "The Video-to-Audio (V2A) model has recently gained attention for its practical application in generating audio directly from silent videos, particularly in video/film production. However, previous methods in V2A have limited generation quality in terms of temporal synchronization and audio-visual relevance. We present Diff-Foley, a synchronized Video-to-Audio synthesis method with a latent diffusion model (LDM) that generates high-quality audio with improved synchronization and audio-visual relevance. We adopt contrastive audio-visual pretraining (CAVP) to learn more temporally and semantically aligned features, then train an LDM with CAVP-aligned visual features on spectrogram latent space. The CAVP-aligned features enable LDM to capture the subtler audio-visual correlation via a cross-attention module. We further significantly improve sample quality with `double guidance'. Diff-Foley achieves state-of-the-art V2A performance on current large scale V2A dataset. Furthermore, we demonstrate Diff-Foley practical applicability and adaptability via customized downstream finetuning. Project Page: https://diff-foley.github.io/", "keywords": "Video-to-Audio Generation; Contrastive Audio-Visual Pretraining; Latent Diffusion Model;", "primary_area": "", "supplementary_material": "", "author": "Simian Luo;Chuanhao Yan;Chenxu Hu;Hang Zhao", "authorids": "~Simian_Luo1;~Chuanhao_Yan1;~Chenxu_Hu1;~Hang_Zhao1", "gender": "M;M;M;M", "homepage": "https://github.com/luosiallen;;https://huchenxucs.github.io/;http://www.mit.edu/~hangzhao/", "dblp": "317/0715;331/1540;222/6365;", "google_scholar": ";;4LzKZggAAAAJ;DmahiOYAAAAJ", "orcid": ";0000-0003-1610-7411;;", "linkedin": ";;;", "or_profile": "~Simian_Luo1;~Chuanhao_Yan1;~Chenxu_Hu1;~Hang_Zhao1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mail.tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nluo2023difffoley,\ntitle={Diff-Foley: Synchronized Video-to-Audio Synthesis with Latent Diffusion Models},\nauthor={Simian Luo and Chuanhao Yan and Chenxu Hu and Hang Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=q5FAZAIooz}\n}", "github": "", "project": "", "reviewers": "tzzm;55K5;MA6i;k7sd", "pdf_size": 19054153, "rating": "5;6;7;7", "confidence": "3;2;4;5", "soundness": "2;3;3;3", "novelty": "3;4;3;3", "presentation": "3;3;3;3", "wc_summary": "81;86;145;104", "wc_strengths": "55;103;133;71", "wc_weaknesses": "147;70;321;276", "wc_questions": "21;91;46;10", "wc_limitations": "1;1;84;28", "wc_review": "305;351;729;489", "wc_reply_reviewers": "10;77;44;75", "wc_reply_authors": "88;85;29;58", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 104.0, 25.16942589730644 ], "wc_strengths_avg": [ 90.5, 30.012497396917837 ], "wc_weaknesses_avg": [ 203.5, 100.09620372421723 ], "wc_questions_avg": [ 42.0, 31.15284898689043 ], "wc_limitations_avg": [ 28.5, 33.88583774971485 ], "wc_review_avg": [ 468.5, 164.93862494879724 ], "wc_reply_reviewers_avg": [ 51.5, 27.299267389437397 ], "wc_reply_authors_avg": [ 65.0, 23.843238035132728 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.674199862463242, "gs_citation": 97, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2361987756066029516&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "mail.tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Predict, Refine, Synthesize: Self-Guiding Diffusion Models for Probabilistic Time Series Forecasting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70377", "id": "q6X038vKgU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5a1a10c2c2c9b9af1514687bc24b8f3d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=q6X038vKgU", "openreview": "https://openreview.net/forum?id=q6X038vKgU", "poster": "/media/PosterPDFs/NeurIPS%202023/70377.png?t=1702030996.3869164", "slides": "https://nips.cc/virtual/2023/poster/70377", "video": "https://nips.cc/virtual/2023/poster/70377", "author_site": "Marcel Kollovieh, Abdul Fatir Ansari, Michael Bohlke-Schneider, Jasper Zschiegner, Hao Wang, Yuyang (Bernie) Wang", "tldr": "", "abstract": "Diffusion models have achieved state-of-the-art performance in generative modeling tasks across various domains. Prior works on time series diffusion models have primarily focused on developing conditional models tailored to specific forecasting or imputation tasks. In this work, we explore the potential of task-agnostic, unconditional diffusion models for several time series applications. We propose TSDiff, an unconditionally-trained diffusion model for time series. Our proposed self-guidance mechanism enables conditioning TSDiff for downstream tasks during inference, without requiring auxiliary networks or altering the training procedure. We demonstrate the effectiveness of our method on three different time series tasks: forecasting, refinement, and synthetic data generation. First, we show that TSDiff is competitive with several task-specific conditional forecasting methods (*predict*). Second, we leverage the learned implicit probability density of TSDiff to iteratively refine the predictions of base forecasters with reduced computational overhead over reverse diffusion (*refine*). Notably, the generative performance of the model remains intact \u2014 downstream forecasters trained on synthetic samples from TSDiff outperform forecasters that are trained on samples from other state-of-the-art generative time series models, occasionally even outperforming models trained on real data (*synthesize*).\n\nOur code is available at https://github.com/amazon-science/unconditional-time-series-diffusion", "keywords": "diffusion models;time series forecasting;generative modeling;deep learning", "primary_area": "", "supplementary_material": "", "author": "Marcel Kollovieh;Abdul Fatir Ansari;Michael Bohlke-Schneider;Jasper Zschiegner;Hao Wang;Bernie Wang", "authorids": "~Marcel_Kollovieh1;~Abdul_Fatir_Ansari2;~Michael_Bohlke-Schneider1;~Jasper_Zschiegner1;~Hao_Wang3;~Bernie_Wang1", "gender": "M;M;M;;M;M", "homepage": "https://marcelkollovieh.de;https://abdulfatir.com;;;http://web.mit.edu/~ywang02/www/;http://www.wanghao.in", "dblp": "299/1647;202/5475.html;242/8809;242/9118;43/8355-1;w/HaoWang-14", "google_scholar": "4oq7nmIAAAAJ;https://scholar.google.com.sg/citations?user=BZ0EoqIAAAAJ;https://scholar.google.de/citations?user=19k2WQEAAAAJ;U3m0jOYAAAAJ;IKUm624AAAAJ;NrOA9QoAAAAJ", "orcid": ";;0000-0002-4969-2218;;0000-0002-0291-7184;", "linkedin": "marcel-kollovieh-8a2ab21b7/;abdulfatir/;michael-bohlke-schneider-16a4ab93/;;;", "or_profile": "~Marcel_Kollovieh1;~Abdul_Fatir_Ansari2;~Michael_Bohlke-Schneider1;~Jasper_Zschiegner1;~Bernie_Wang1;~Hao_Wang4", "aff": "Amazon;AWS AI Labs (Amazon);Amazon Development Center Germany;Amazon;Amazon;Rutgers University", "aff_domain": "amazon.com;amazon.de;amazon.de;amazon.com;amazon.com;cs.rutgers.edu", "position": "Intern;Scientist;Researcher;Researcher;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nkollovieh2023predict,\ntitle={Predict, Refine, Synthesize: Self-Guiding Diffusion Models for Probabilistic Time Series Forecasting},\nauthor={Marcel Kollovieh and Abdul Fatir Ansari and Michael Bohlke-Schneider and Jasper Zschiegner and Hao Wang and Bernie Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=q6X038vKgU}\n}", "github": "", "project": "", "reviewers": "SUq5;8t5R;D1QU;PT4A", "pdf_size": 1643865, "rating": "4;5;7;7", "confidence": "3;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "129;40;60;90", "wc_strengths": "50;62;36;51", "wc_weaknesses": "177;182;51;23", "wc_questions": "115;2;183;220", "wc_limitations": "7;8;77;14", "wc_review": "478;294;407;398", "wc_reply_reviewers": "0;20;22;119", "wc_reply_authors": "0;18;26;553", "reply_reviewers": "0;1;1;2", "reply_authors": "1;2;2;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 79.75, 33.54381463101655 ], "wc_strengths_avg": [ 49.75, 9.229707470987366 ], "wc_weaknesses_avg": [ 108.25, 71.95614984141383 ], "wc_questions_avg": [ 130.0, 82.94275134090984 ], "wc_limitations_avg": [ 26.5, 29.278831943914703 ], "wc_review_avg": [ 394.25, 65.65201824772792 ], "wc_reply_reviewers_avg": [ 40.25, 46.27296727031886 ], "wc_reply_authors_avg": [ 149.25, 233.2952796350582 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7777777777777777, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3600104704271895443&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "amazon.com;amazon.de;amazon.de;amazon.com;amazon.com;cs.rutgers.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Amazon;Rutgers University", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.rutgers.edu", "aff_unique_abbr": "Amazon;Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;Germany" }, { "title": "Triple Eagle: Simple, Fast and Practical Budget-Feasible Mechanisms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70376", "id": "q6bVqOgGxP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6af779991368999ab3da0d366c208fba-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=q6bVqOgGxP", "openreview": "https://openreview.net/forum?id=q6bVqOgGxP", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70376", "video": "https://nips.cc/virtual/2023/poster/70376", "author_site": "Kai Han, You Wu, He Huang, Shuang Cui", "tldr": "", "abstract": "We revisit the classical problem of designing Budget-Feasible Mechanisms (BFMs) for submodular valuation functions, which has been extensively studied since the seminal paper of Singer [FOCS\u201910] due to its wide applications in crowdsourcing and social marketing. We propose TripleEagle, a novel algorithmic framework for designing BFMs, based on which we present several simple yet effective BFMs that\nachieve better approximation ratios than the state-of-the-art work for both monotone and non-monotone submodular valuation functions. Moreover, our BFMs are the first in the literature to achieve linear complexities while ensuring obvious strategyproofness, making them more practical than the previous BFMs. We conduct extensive experiments to evaluate the empirical performance of our BFMs, and the experimental results strongly demonstrate the efficiency and effectiveness of our approach.", "keywords": "mechanism design;budget-feasible;truthful", "primary_area": "", "supplementary_material": "", "author": "Kai Han;You Wu;He Huang;Shuang Cui", "authorids": "~Kai_Han5;~You_Wu7;~He_Huang5;~Shuang_Cui1", "gender": "M;M;M;M", "homepage": ";;http://home.ustc.edu.cn/~huang83/;https://scholar.google.com/citations?user=2e_9NCQAAAAJ&hl=zh-CN", "dblp": "51/4757-3;;75/4947-1;86/9266", "google_scholar": "n3GdeCUAAAAJ;;VlD077cAAAAJ;2e_9NCQAAAAJ", "orcid": "0000-0002-6302-5366;0009-0002-1198-831X;;", "linkedin": ";;;", "or_profile": "~Kai_Han5;~You_Wu7;~He_Huang5;~Shuang_Cui1", "aff": "Soochow University;Soochow University;Soochow University, China;", "aff_domain": "suda.edu.cn;suda.edu.cn;suda.edu.cn;", "position": "Full Professor;Undergrad student;Full Professor;", "bibtex": "@inproceedings{\nhan2023triple,\ntitle={Triple Eagle: Simple, Fast and Practical Budget-Feasible Mechanisms},\nauthor={Kai Han and You Wu and He Huang and Shuang Cui},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=q6bVqOgGxP}\n}", "github": "", "project": "", "reviewers": "LiBq;QWwV;kDRB;Mo9x", "pdf_size": 459042, "rating": "7;7;7;7", "confidence": "3;3;4;4", "soundness": "4;4;3;3", "novelty": "3;4;4;2", "presentation": "4;3;4;2", "wc_summary": "89;222;170;114", "wc_strengths": "46;31;22;43", "wc_weaknesses": "31;14;1;88", "wc_questions": "164;20;58;11", "wc_limitations": "1;3;1;24", "wc_review": "331;290;252;280", "wc_reply_reviewers": "10;5;8;9", "wc_reply_authors": "4;4;6;4", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 148.75, 51.46540099911785 ], "wc_strengths_avg": [ 35.5, 9.604686356149273 ], "wc_weaknesses_avg": [ 33.5, 33.21520736048475 ], "wc_questions_avg": [ 63.25, 60.78394113579672 ], "wc_limitations_avg": [ 7.25, 9.705024471890836 ], "wc_review_avg": [ 288.25, 28.340562803162538 ], "wc_reply_reviewers_avg": [ 8.0, 1.8708286933869707 ], "wc_reply_authors_avg": [ 4.5, 0.8660254037844386 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=716452005717744138&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "suda.edu.cn;suda.edu.cn;suda.edu.cn;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Soochow University", "aff_unique_dep": "", "aff_unique_url": "https://www.soochow.edu.cn", "aff_unique_abbr": "Soochow U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Learning from Active Human Involvement through Proxy Value Propagation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70375", "id": "q8SukwaEBy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f57ffe47d0b528fbb97901d16bd4eba2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=q8SukwaEBy", "openreview": "https://openreview.net/forum?id=q8SukwaEBy", "poster": "/media/PosterPDFs/NeurIPS%202023/70375.png?t=1701737880.9824455", "slides": "https://nips.cc/virtual/2023/poster/70375", "video": "https://nips.cc/virtual/2023/poster/70375", "author_site": "Zhenghao (Mark) Peng, Wenjie Mo, Chenda Duan, Quanyi Li, Bolei Zhou", "tldr": "", "abstract": "Learning from active human involvement enables the human subject to actively intervene and demonstrate to the AI agent during training. The interaction and corrective feedback from human brings safety and AI alignment to the learning process. In this work, we propose a new reward-free active human involvement method called Proxy Value Propagation for policy optimization. Our key insight is that a proxy value function can be designed to express human intents, wherein state- action pairs in the human demonstration are labeled with high values, while those agents\u2019 actions that are intervened receive low values. Through the TD-learning framework, labeled values of demonstrated state-action pairs are further propagated to other unlabeled data generated from agents\u2019 exploration. The proxy value function thus induces a policy that faithfully emulates human behaviors. Human- in-the-loop experiments show the generality and efficiency of our method. With minimal modification to existing reinforcement learning algorithms, our method can learn to solve continuous and discrete control tasks with various human control devices, including the challenging task of driving in Grand Theft Auto V. Demo video and code are available at: https://metadriverse.github.io/pvp.", "keywords": "Machine Learning;Human-in-the-loop Reinforcement Learning;Safety;Sample Efficiency;Reward-free", "primary_area": "", "supplementary_material": "", "author": "Zhenghao Peng;Wenjie Mo;Chenda Duan;Quanyi Li;Bolei Zhou", "authorids": "~Zhenghao_Peng1;~Wenjie_Mo1;~Chenda_Duan1;~Quanyi_Li1;~Bolei_Zhou5", "gender": "M;M;M;M;M", "homepage": "https://pengzhenghao.github.io;https://wenjie-mo.github.io/;https://chendaduan.com/;https://quanyili.github.io;https://boleizhou.github.io/", "dblp": "220/3963;344/2097;349/8266;270/7691;46/8066", "google_scholar": "JZ8ws6IAAAAJ;;DooYOyoAAAAJ;Ty49X3UAAAAJ;9D4aG8AAAAAJ", "orcid": ";;0009-0003-8652-3960;;", "linkedin": ";wenjie-mo;chenda-d/;https://www.linkedin.com/mwlite/in/quanyi-li-2b7985183;", "or_profile": "~Zhenghao_Peng1;~Wenjie_Mo1;~Chenda_Duan1;~Quanyi_Li1;~Bolei_Zhou5", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;Shanghai Artificial Intelligence Laboratory;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;ucla.edu;ucla.edu;pjlab.org.cn;ucla.edu", "position": "PhD student;Undergrad student;MS student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\npeng2023learning,\ntitle={Learning from Active Human Involvement through Proxy Value Propagation},\nauthor={Zhenghao Peng and Wenjie Mo and Chenda Duan and Quanyi Li and Bolei Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=q8SukwaEBy}\n}", "github": "", "project": "", "reviewers": "pvGG;C5fY;iaMb;sDoy;e73G", "pdf_size": 4215519, "rating": "5;5;7;8;8", "confidence": "3;3;4;5;4", "soundness": "3;2;3;4;4", "novelty": "3;2;3;4;3", "presentation": "3;3;4;3;4", "wc_summary": "88;70;104;94;99", "wc_strengths": "51;63;75;180;120", "wc_weaknesses": "264;706;378;372;226", "wc_questions": "49;29;128;147;158", "wc_limitations": "1;55;74;39;33", "wc_review": "453;923;759;832;636", "wc_reply_reviewers": "31;308;129;112;0", "wc_reply_authors": "0;386;0;0;0", "reply_reviewers": "1;2;1;1;0", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.6, 1.3564659966250536 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 91.0, 11.764352935882194 ], "wc_strengths_avg": [ 97.8, 47.26690173895471 ], "wc_weaknesses_avg": [ 389.2, 169.17021014351195 ], "wc_questions_avg": [ 102.2, 52.86738124779778 ], "wc_limitations_avg": [ 40.4, 24.294855422496344 ], "wc_review_avg": [ 720.6, 163.4730558838367 ], "wc_reply_reviewers_avg": [ 116.0, 107.45231500530828 ], "wc_reply_authors_avg": [ 77.2, 154.39999999999998 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9063269671749656, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4265918704798171104&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "cs.ucla.edu;ucla.edu;ucla.edu;pjlab.org.cn;ucla.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of California, Los Angeles;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;http://www.shailab.org/", "aff_unique_abbr": "UCLA;Shanghai AI Lab", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Deep Contract Design via Discontinuous Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70374", "id": "q8mH2d6uw2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cf7700139af1fa346d2f57f1f5c26c18-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=q8mH2d6uw2", "openreview": "https://openreview.net/forum?id=q8mH2d6uw2", "poster": "/media/PosterPDFs/NeurIPS%202023/70374.png?t=1702147993.1736143", "slides": "https://nips.cc/virtual/2023/poster/70374", "video": "https://nips.cc/virtual/2023/poster/70374", "author_site": "Tonghan Wang, Paul Duetting, Dmitry Ivanov, Inbal Talgam-Cohen, David Parkes", "tldr": "", "abstract": "Contract design involves a principal who establishes contractual agreements about payments for outcomes that arise from the actions of an agent. In this paper, we initiate the study of deep learning for the automated design of optimal contracts. We introduce a novel representation: the Discontinuous ReLU (DeLU) network, which models the principal's utility as a discontinuous piecewise affine function of the design of a contract where each piece corresponds to the agent taking a particular action. DeLU networks implicitly learn closed-form expressions for the incentive compatibility constraints of the agent and the utility maximization objective of the principal, and support parallel inference on each piece through linear programming or interior-point methods that solve for optimal contracts. We provide empirical results that demonstrate success in approximating the principal's utility function with a small number of training samples and scaling to find approximately optimal contracts on problems with a large number of actions and outcomes.", "keywords": "Automated contract design;discontinuous neural networks", "primary_area": "", "supplementary_material": "/attachment/bf1bad6e08116e96e0eedea39d34bde730e924de.zip", "author": "Tonghan Wang;Paul Duetting;Dmitry Ivanov;Inbal Talgam-Cohen;David C. Parkes", "authorids": "~Tonghan_Wang1;~Paul_Duetting1;~Dmitry_Ivanov1;~Inbal_Talgam-Cohen2;~David_C._Parkes1", "gender": "M;;Not Specified;F;M", "homepage": "https://tonghanwang.github.io/;http://paulduetting.com/;;http://www.inbaltalgam.com/;https://parkes.seas.harvard.edu/", "dblp": "175/6039-1.html;https://dblp.org/pers/d/D=uuml=tting:Paul.html;122/1439;07/8319;p/DavidCParkes.html", "google_scholar": "-AR1yc4AAAAJ;Oqky1hIAAAAJ;https://scholar.google.ru/citations?user=G9szMAwAAAAJ;R1YK5BsAAAAJ;JUn8PgwAAAAJ", "orcid": ";;0000-0002-6974-8397;;0000-0002-2701-3464", "linkedin": ";;;;", "or_profile": "~Tonghan_Wang1;~Paul_Duetting1;~Dmitry_Ivanov1;~Inbal_Talgam-Cohen2;~David_C._Parkes1", "aff": "Tsinghua University;Google;Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;Technion, Technion;Google", "aff_domain": "tsinghua.edu.cn;google.com;campus.technion.ac.il;technion.ac.il;deepmind.com", "position": "MS student;Researcher;Postdoc;Assistant Professor;Senior Research Scientist", "bibtex": "@inproceedings{\nwang2023deep,\ntitle={Deep Contract Design via Discontinuous Networks},\nauthor={Tonghan Wang and Paul Duetting and Dmitry Ivanov and Inbal Talgam-Cohen and David C. Parkes},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=q8mH2d6uw2}\n}", "github": "", "project": "", "reviewers": "GXTm;aB6m;GYpK;1ATX;z4Vn", "pdf_size": 1823264, "rating": "6;6;7;8;9", "confidence": "4;4;4;4;3", "soundness": "3;3;4;4;4", "novelty": "3;3;3;4;4", "presentation": "3;3;4;4;4", "wc_summary": "139;83;239;91;184", "wc_strengths": "108;11;45;132;212", "wc_weaknesses": "97;214;159;290;7", "wc_questions": "49;85;133;12;163", "wc_limitations": "9;2;35;1;9", "wc_review": "402;395;611;526;575", "wc_reply_reviewers": "0;62;73;382;31", "wc_reply_authors": "0;96;99;720;110", "reply_reviewers": "0;1;1;2;1", "reply_authors": "1;2;2;4;2", "rating_avg": [ 7.2, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 147.2, 58.52999231163455 ], "wc_strengths_avg": [ 101.6, 70.0930809709489 ], "wc_weaknesses_avg": [ 153.4, 96.93007789123044 ], "wc_questions_avg": [ 88.4, 54.69040135160831 ], "wc_limitations_avg": [ 11.2, 12.367699866992245 ], "wc_review_avg": [ 501.8, 88.58306835959117 ], "wc_reply_reviewers_avg": [ 109.6, 138.56204386483333 ], "wc_reply_authors_avg": [ 205.0, 260.5348345231401 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7717436331412897, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2484110038559429646&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;google.com;campus.technion.ac.il;technion.ac.il;deepmind.com", "author_num": 5, "aff_unique_index": "0;1;2;2;1", "aff_unique_norm": "Tsinghua University;Google;Technion - Israel Institute of Technology", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.google.com;https://www.technion.ac.il/en/", "aff_unique_abbr": "THU;Google;Technion", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;2;2;1", "aff_country_unique": "China;United States;Israel" }, { "title": "Trust Region-Based Safe Distributional Reinforcement Learning for Multiple Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70373", "id": "q9WMXjUxxT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3f20f2b0315c72201e23512fdbd1ee91-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=q9WMXjUxxT", "openreview": "https://openreview.net/forum?id=q9WMXjUxxT", "poster": "/media/PosterPDFs/NeurIPS%202023/70373.png?t=1701406932.49708", "slides": "https://nips.cc/virtual/2023/poster/70373", "video": "https://nips.cc/virtual/2023/poster/70373", "author_site": "Dohyeong Kim, Kyungjae Lee, Songhwai Oh", "tldr": "", "abstract": "In safety-critical robotic tasks, potential failures must be reduced, and multiple constraints must be met, such as avoiding collisions, limiting energy consumption, and maintaining balance.\nThus, applying safe reinforcement learning (RL) in such robotic tasks requires to handle multiple constraints and use risk-averse constraints rather than risk-neutral constraints.\nTo this end, we propose a trust region-based safe RL algorithm for multiple constraints called a safe distributional actor-critic (SDAC).\nOur main contributions are as follows: 1) introducing a gradient integration method to manage infeasibility issues in multi-constrained problems, ensuring theoretical convergence, and 2) developing a TD($\\lambda$) target distribution to estimate risk-averse constraints with low biases. \nWe evaluate SDAC through extensive experiments involving multi- and single-constrained robotic tasks.\nWhile maintaining high scores, SDAC shows 1.93 times fewer steps to satisfy all constraints in multi-constrained tasks and 1.78 times fewer constraint violations in single-constrained tasks compared to safe RL baselines.\nCode is available at: https://github.com/rllab-snu/Safe-Distributional-Actor-Critic.", "keywords": "Reinforcement learning;Safety;Multiple Constraints;Distributional Critic", "primary_area": "", "supplementary_material": "/attachment/c29d388294366221d21f58152127e70d0d2f5249.zip", "author": "Dohyeong Kim;Kyungjae Lee;Songhwai Oh", "authorids": "~Dohyeong_Kim1;~Kyungjae_Lee1;~Songhwai_Oh1", "gender": "M;M;", "homepage": "https://dobro12.github.io/;https://sites.google.com/view/kyungjaelee;https://rllab.snu.ac.kr/", "dblp": "126/4248;13/7265-1;17/3173", "google_scholar": ";https://scholar.google.co.kr/citations?user=OZZJagIAAAAJ;VEzNY_oAAAAJ", "orcid": "0000-0003-0788-6089;0000-0003-0147-2715;0000-0002-9781-2018", "linkedin": ";;", "or_profile": "~Dohyeong_Kim1;~Kyungjae_Lee1;~Songhwai_Oh1", "aff": "Seoul National University;ChungAng University;Seoul National University", "aff_domain": "snu.ac.kr;cau.ac.kr;snu.ac.kr", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nkim2023trust,\ntitle={Trust Region-Based Safe Distributional Reinforcement Learning for Multiple Constraints},\nauthor={Dohyeong Kim and Kyungjae Lee and Songhwai Oh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=q9WMXjUxxT}\n}", "github": "", "project": "", "reviewers": "Cyuw;HJro;dXag;A8gA", "pdf_size": 3612171, "rating": "6;6;6;6", "confidence": "3;3;3;4", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "64;40;63;62", "wc_strengths": "121;16;57;51", "wc_weaknesses": "119;97;87;73", "wc_questions": "213;122;95;129", "wc_limitations": "6;33;9;12", "wc_review": "523;308;311;327", "wc_reply_reviewers": "72;10;16;25", "wc_reply_authors": "19;11;10;14", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 57.25, 9.98436277385793 ], "wc_strengths_avg": [ 61.25, 37.88386859865291 ], "wc_weaknesses_avg": [ 94.0, 16.76305461424021 ], "wc_questions_avg": [ 139.75, 44.15526582413472 ], "wc_limitations_avg": [ 15.0, 10.606601717798213 ], "wc_review_avg": [ 367.25, 90.21190331658012 ], "wc_reply_reviewers_avg": [ 30.75, 24.40671014290947 ], "wc_reply_authors_avg": [ 13.5, 3.5 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15879278810641354652&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "snu.ac.kr;cau.ac.kr;snu.ac.kr", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Seoul National University;Chungang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;http://www.cau.ac.kr", "aff_unique_abbr": "SNU;CAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Exploring Why Object Recognition Performance Degrades Across Income Levels and Geographies with Factor Annotations", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73457", "id": "q9hc7R8N7P", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4e3378a8e80af4ffc456c4fa13d46550-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=q9hc7R8N7P", "openreview": "https://openreview.net/forum?id=q9hc7R8N7P", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73457", "video": "https://nips.cc/virtual/2023/poster/73457", "author_site": "Laura Gustafson, Megan Richards, Melissa Hall, Caner Hazirbas, Diane Bouchacourt, Mark Ibrahim", "tldr": "", "abstract": "Despite impressive advances in object-recognition, deep learning systems\u2019 performance degrades significantly across geographies and lower income levels---raising pressing concerns of inequity. Addressing such performance gaps remains a challenge, as little is understood about why performance degrades across incomes or geographies.\nWe take a step in this direction by annotating images from Dollar Street, a popular benchmark of geographically and economically diverse images, labeling each image with factors such as color, shape, and background. These annotations unlock a new granular view into how objects differ across incomes/regions. We then use these object differences to pinpoint model vulnerabilities across incomes and regions.\nWe study a range of modern vision models, finding that performance disparities are most associated with differences in _texture, occlusion_, and images with _darker lighting_.\nWe illustrate how insights from our factor labels can surface mitigations to improve models' performance disparities.\nAs an example, we show that mitigating a model's vulnerability to texture \ncan improve performance on the lower income level.\n**We release all the factor annotations along with an interactive dashboard\nto facilitate research into more equitable vision systems.**", "keywords": "computer vision;fairness;robustness;machine learning;image classification;object recognition", "primary_area": "", "supplementary_material": "/attachment/80adc7d2754cc5aacd0964a6b637283c5e31cc2c.pdf", "author": "Laura Gustafson;Megan Richards;Melissa Hall;Caner Hazirbas;Diane Bouchacourt;Mark Ibrahim", "authorids": "~Laura_Gustafson1;~Megan_Richards1;~Melissa_Hall1;~Caner_Hazirbas2;~Diane_Bouchacourt3;~Mark_Ibrahim1", "gender": ";;F;M;;", "homepage": ";;;https://hazirbas.com;;https://markibrahim.me/", "dblp": ";;287/5067;161/7891;;180/5660", "google_scholar": "c8IpF9gAAAAJ;;DcfHZoUAAAAJ;JEiXKpcAAAAJ;;AqYyoCMAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Laura_Gustafson1;~Megan_Richards1;~Melissa_Hall1;~Caner_Hazirbas2;~Diane_Bouchacourt3;~Mark_Ibrahim1", "aff": "Meta Facebook;;Research, Facebook;Meta AI;;Facebook AI Research (FAIR) Meta", "aff_domain": "fb.com;;research.facebook.com;meta.com;;ai.facebook.com", "position": "Research Engineer;;Researcher;Researcher;;Researcher", "bibtex": "@inproceedings{\ngustafson2023exploring,\ntitle={Exploring Why Object Recognition Performance Degrades Across Income Levels and Geographies with Factor Annotations},\nauthor={Laura Gustafson and Megan Richards and Melissa Hall and Caner Hazirbas and Diane Bouchacourt and Mark Ibrahim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=q9hc7R8N7P}\n}", "github": "", "project": "", "reviewers": "uwZ7;cjSi;3kzU", "pdf_size": 33984362, "rating": "7;7;9", "confidence": "3;4;5", "wc_summary_and_contributions": "57;162;31", "wc_strengths": "72;45;30", "wc_improvement": "213;41;28", "wc_limitations": "37;99;21", "wc_correctness": "14;14;5", "wc_clarity": "25;5;26", "wc_relation_to_prior_work": "3;9;7", "wc_documentation": "28;20;31", "wc_additional_feedback": "1;1;1", "wc_review": "450;396;180", "wc_reply_reviewers": "16;19;15", "wc_reply_authors": "883;822;600", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 7.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 83.33333333333333, 56.629399510226925 ], "wc_strengths_avg": [ 49.0, 17.378147196982766 ], "wc_improvement_avg": [ 94.0, 84.31290925277497 ], "wc_limitations_avg": [ 52.333333333333336, 33.6386021641143 ], "wc_correctness_avg": [ 11.0, 4.242640687119285 ], "wc_clarity_avg": [ 18.666666666666668, 9.672412085697939 ], "wc_relation_to_prior_work_avg": [ 6.333333333333333, 2.494438257849294 ], "wc_documentation_avg": [ 26.333333333333332, 4.642796092394706 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 342.0, 116.65333257134148 ], "wc_reply_reviewers_avg": [ 16.666666666666668, 1.699673171197595 ], "wc_reply_authors_avg": [ 768.3333333333334, 121.60683460325009 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14717345380851256916&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "fb.com;;research.facebook.com;meta.com;;ai.facebook.com", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Complex-valued Neurons Can Learn More but Slower than Real-valued Neurons via Gradient Descent", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70372", "id": "qA0uHmaVKk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4ac4365b98bc242acd5ab974a05c68a8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qA0uHmaVKk", "openreview": "https://openreview.net/forum?id=qA0uHmaVKk", "poster": "/media/PosterPDFs/NeurIPS%202023/70372.png?t=1702005867.2707195", "slides": "https://nips.cc/virtual/2023/poster/70372", "video": "https://nips.cc/virtual/2023/poster/70372", "author_site": "Jin-Hui Wu, Shao-Qun Zhang, Yuan Jiang, Zhi-Hua Zhou", "tldr": "", "abstract": "Complex-valued neural networks potentially possess better representations and performance than real-valued counterparts when dealing with some complicated tasks such as acoustic analysis, radar image classification, etc. Despite empirical successes, it remains unknown theoretically when and to what extent complex-valued neural networks outperform real-valued ones. We take one step in this direction by comparing the learnability of real-valued neurons and complex-valued neurons via gradient descent. We show that a complex-valued neuron can efficiently learn functions expressed by any one real-valued neuron and any one complex-valued neuron with convergence rate $O(t^{-3})$ and $O(t^{-1})$ where $t$ is the iteration index of gradient descent, respectively, whereas a two-layer real-valued neural network with finite width cannot learn a single non-degenerate complex-valued neuron. We prove that a complex-valued neuron learns a real-valued neuron with rate $\\Omega (t^{-3})$, exponentially slower than the $O(\\mathrm{e}^{- c t})$ rate of learning one real-valued neuron using a real-valued neuron with a constant $c$. We further verify and extend these results via simulation experiments in more general settings.", "keywords": "Complex-valued Neural Networks; Learning Neurons; Real-valued Neural Networks; Convergence Rate", "primary_area": "", "supplementary_material": "/attachment/e0e8195000a1f4fde79c9bc8cf119b08fbfc8b4f.pdf", "author": "Jin-Hui Wu;Shao-Qun Zhang;Yuan Jiang;Zhi-Hua Zhou", "authorids": "~Jin-Hui_Wu1;~Shao-Qun_Zhang1;~Yuan_Jiang1;~Zhi-Hua_Zhou2", "gender": "M;M;F;M", "homepage": "http://www.lamda.nju.edu.cn/wujh/;http://www.lamda.nju.edu.cn/zhangsq/;http://lamda.nju.edu.cn/jiangy;https://cs.nju.edu.cn/zhouzh/", "dblp": "306/0797;249/2560;;z/ZhiHuaZhou", "google_scholar": "sq2aNZ4AAAAJ;TCh08AUAAAAJ;;https://scholar.google.com.tw/citations?user=rSVIHasAAAAJ", "orcid": ";0000-0002-0614-8984;;0000-0003-0746-1494", "linkedin": ";;;", "or_profile": "~Jin-Hui_Wu1;~Shao-Qun_Zhang1;~Yuan_Jiang1;~Zhi-hua_Zhou1", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwu2023complexvalued,\ntitle={Complex-valued Neurons Can Learn More but Slower than Real-valued Neurons via Gradient Descent},\nauthor={Jin-Hui Wu and Shao-Qun Zhang and Yuan Jiang and Zhi-Hua Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qA0uHmaVKk}\n}", "github": "", "project": "", "reviewers": "Jsv8;D3zC;4m8w;cJ2H;qTS2", "pdf_size": 713347, "rating": "5;6;6;7;7", "confidence": "3;2;2;3;3", "soundness": "2;3;2;4;4", "novelty": "3;3;3;3;3", "presentation": "4;3;2;3;4", "wc_summary": "31;58;122;152;57", "wc_strengths": "77;53;37;71;88", "wc_weaknesses": "283;39;248;136;80", "wc_questions": "100;23;123;53;85", "wc_limitations": "8;1;23;1;10", "wc_review": "499;174;553;413;320", "wc_reply_reviewers": "658;0;0;236;22", "wc_reply_authors": "233;0;0;16;0", "reply_reviewers": "1;0;0;1;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 84.0, 45.34754679141971 ], "wc_strengths_avg": [ 65.2, 18.093092604637828 ], "wc_weaknesses_avg": [ 157.2, 94.2876450018771 ], "wc_questions_avg": [ 76.8, 35.21590549737434 ], "wc_limitations_avg": [ 8.6, 8.06473806146238 ], "wc_review_avg": [ 391.8, 134.550213675044 ], "wc_reply_reviewers_avg": [ 183.2, 253.5085008436601 ], "wc_reply_authors_avg": [ 49.8, 91.80936771375784 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.21821789023599233, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6876942895779486086&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "VLATTACK: Multimodal Adversarial Attacks on Vision-Language Tasks via Pre-trained Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70371", "id": "qBAED3u1XZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a5e3cf29c269b041ccd644b6beaf5c42-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qBAED3u1XZ", "openreview": "https://openreview.net/forum?id=qBAED3u1XZ", "poster": "/media/PosterPDFs/NeurIPS%202023/70371.png?t=1699477837.312502", "slides": "https://nips.cc/virtual/2023/poster/70371", "video": "https://nips.cc/virtual/2023/poster/70371", "author_site": "Ziyi Yin, Muchao Ye, Tianrong Zhang, Tianyu Du, Tianyu Du, Jinguo Zhu, Han Liu, Jinghui Chen, Ting Wang, Fenglong Ma", "tldr": "", "abstract": "Vision-Language (VL) pre-trained models have shown their superiority on many multimodal tasks. However, the adversarial robustness of such models has not been fully explored. Existing approaches mainly focus on exploring the adversarial robustness under the white-box setting, which is unrealistic. In this paper, we aim to investigate a new yet practical task to craft image and text perturbations using pre-trained VL models to attack black-box fine-tuned models on different downstream tasks. Towards this end, we propose VLATTACK to generate adversarial samples by fusing perturbations of images and texts from both single-modal and multi-modal levels. At the single-modal level, we propose a new block-wise similarity attack (BSA) strategy to learn image perturbations for disrupting universal representations. Besides, we adopt an existing text attack strategy to generate text perturbations independent of the image-modal attack. At the multi-modal level, we design a novel iterative cross-search attack (ICSA) method to update adversarial image-text pairs periodically, starting with the outputs from the single-modal level. We conduct extensive experiments to attack three widely-used VL pretrained models for six tasks on eight datasets. Experimental results show that the proposed VLATTACK framework achieves the highest attack success rates on all tasks compared with state-of-the-art baselines, which reveals a significant blind spot in the deployment of pre-trained VL models.", "keywords": "vision-language;adversarial attacks;pre-trained model;fine-tuned model", "primary_area": "", "supplementary_material": "/attachment/9252c8343e53db9dc59e6d12911e7cf62c0ef6d7.zip", "author": "Ziyi Yin;Muchao Ye;Tianrong Zhang;Tianyu Du;Jinguo Zhu;Han Liu;Jinghui Chen;Ting Wang;Fenglong Ma", "authorids": "~Ziyi_Yin1;~Muchao_Ye1;~Tianrong_Zhang1;~Tianyu_Du2;~Jinguo_Zhu1;~Han_Liu3;~Jinghui_Chen1;~Ting_Wang1;~Fenglong_Ma1", "gender": "M;;M;F;M;M;M;M;M", "homepage": "https://ericyinyzy.github.io/;https://sites.google.com/view/mcye;https://zhangtianrong.github.io/profile/#en;https://tydusky.github.io/;https://lechatelia.github.io/;http://faculty.dlut.edu.cn/liuhan/zh_CN/index.htm;https://jinghuichen.github.io/;https://alps-lab.github.io/;https://fenglong-ma.github.io/", "dblp": "358/6428;251/3433.html;;128/2982.html;244/7280;35/2899-8;67/5633;12/2633-6.html;85/10856", "google_scholar": "wvbK37AAAAAJ;;;kBqTzrwAAAAJ;YfHg5lQAAAAJ;https://scholar.google.com.hk/citations?user=idpbcG0AAAAJ;mKia7Y4AAAAJ;cwcBTegAAAAJ;DLJIxNMAAAAJ", "orcid": "0009-0002-3502-3205;0009-0006-9112-8895;;0000-0003-0896-0690;;0000-0001-6921-2050;;;0000-0002-4999-0303", "linkedin": "%E6%A2%93%E8%AF%91-%E6%AE%B7-ab816a249/?locale=en_US&trk=eml-email_network_conversations_01-header-0-profile_glimmer;;;;;;;;fenglong-ma-69805832/", "or_profile": "~Ziyi_Yin1;~Muchao_Ye1;~Tianrong_Zhang1;~Tianyu_Du2;~Jinguo_Zhu1;~Han_Liu3;~Jinghui_Chen1;~Ting_Wang1;~Fenglong_Ma1", "aff": "Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;Xi'an Jiaotong University;Dalian University of Technology;Pennsylvania State University;Pennsylvania State University;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;psu.edu;psu.edu;xjtu.edu.cn;dlut.edu.cn;psu.edu;psu.edu;psu.edu", "position": "PhD student;PhD student;PhD student;Postdoc;PhD student;Associate Professor;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nyin2023vlattack,\ntitle={{VLATTACK}: Multimodal Adversarial Attacks on Vision-Language Tasks via Pre-trained Models},\nauthor={Ziyi Yin and Muchao Ye and Tianrong Zhang and Tianyu Du and Jinguo Zhu and Han Liu and Jinghui Chen and Ting Wang and Fenglong Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qBAED3u1XZ}\n}", "github": "", "project": "", "reviewers": "zqVz;AgmN;pd9a;NrLG;3TAC", "pdf_size": 38909691, "rating": "5;5;5;6;7", "confidence": "4;4;4;4;4", "soundness": "2;2;3;3;3", "novelty": "3;3;2;3;3", "presentation": "3;3;4;3;3", "wc_summary": "70;50;66;87;116", "wc_strengths": "63;36;49;48;74", "wc_weaknesses": "221;118;332;118;171", "wc_questions": "176;41;6;11;33", "wc_limitations": "15;9;1;4;32", "wc_review": "545;254;454;268;426", "wc_reply_reviewers": "32;14;221;19;15", "wc_reply_authors": "23;23;822;24;21", "reply_reviewers": "1;1;2;1;1", "reply_authors": "2;2;4;2;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 77.8, 22.435685859808252 ], "wc_strengths_avg": [ 54.0, 13.16054710108968 ], "wc_weaknesses_avg": [ 192.0, 79.79223019818409 ], "wc_questions_avg": [ 53.4, 62.682054848257806 ], "wc_limitations_avg": [ 12.2, 10.979981785048643 ], "wc_review_avg": [ 389.4, 112.06712274347014 ], "wc_reply_reviewers_avg": [ 60.2, 80.65581194185575 ], "wc_reply_authors_avg": [ 182.6, 319.70148576445496 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.4, 0.8 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=841180732953431533&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "psu.edu;psu.edu;psu.edu;psu.edu;xjtu.edu.cn;dlut.edu.cn;psu.edu;psu.edu;psu.edu", "author_num": 9, "aff_unique_index": "0;0;0;0;1;2;0;0;0", "aff_unique_norm": "Pennsylvania State University;Xi'an Jiao Tong University;Dalian University of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.psu.edu;https://www.xjtu.edu.cn;http://www.dlut.edu.cn/", "aff_unique_abbr": "PSU;XJTU;DUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Gradient Descent with Linearly Correlated Noise: Theory and Applications to Differential Privacy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70370", "id": "qCglMj6A4z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/70255afc962aca0930327c090eb7d8c5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qCglMj6A4z", "openreview": "https://openreview.net/forum?id=qCglMj6A4z", "poster": "/media/PosterPDFs/NeurIPS%202023/70370.png?t=1702304416.4301538", "slides": "https://nips.cc/virtual/2023/poster/70370", "video": "https://nips.cc/virtual/2023/poster/70370", "author_site": "Anastasiia Koloskova, Ryan McKenna, Zachary Charles, John Rush, H. Brendan McMahan", "tldr": "", "abstract": "We study gradient descent under linearly correlated noise. Our work is motivated by recent practical methods for optimization with differential privacy (DP), such as DP-FTRL, which achieve strong performance in settings where privacy amplification techniques are infeasible (such as in federated learning). These methods inject privacy noise through a matrix factorization mechanism, making the noise *linearly correlated* over iterations. We propose a simplified setting that distills key facets of these methods and isolates the impact of linearly correlated noise. We analyze the behavior of gradient descent in this setting, for both convex and non-convex functions. Our analysis is demonstrably tighter than prior work and recovers multiple important special cases exactly (including anticorrelated perturbed gradient descent). We use our results to develop new, effective matrix factorizations for differentially private optimization, and highlight the benefits of these factorizations theoretically and empirically.", "keywords": "optimization;machine learning;differential privacy", "primary_area": "", "supplementary_material": "/attachment/cf98b59bb52a110e64ebb55c5c0a6416aabdf650.pdf", "author": "Anastasia Koloskova;Ryan McKenna;Zachary Charles;J Keith Rush;Hugh Brendan McMahan", "authorids": "~Anastasia_Koloskova2;~Ryan_McKenna2;~Zachary_Charles1;~J_Keith_Rush1;~Hugh_Brendan_McMahan1", "gender": ";;;;M", "homepage": ";;;https://www.jkrush.com;", "dblp": ";;;249/8135;", "google_scholar": ";;;OrUyRAcAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Anastasia_Koloskova2;~Ryan_McKenna2;~Zachary_Charles1;~J_Keith_Rush1;~Hugh_Brendan_McMahan1", "aff": ";;;Google;Google", "aff_domain": ";;;google.com;google.com", "position": ";;;Researcher;Research Scientist", "bibtex": "@inproceedings{\nkoloskova2023gradient,\ntitle={Gradient Descent with Linearly Correlated Noise: Theory and Applications to Differential Privacy},\nauthor={Anastasia Koloskova and Ryan McKenna and Zachary Charles and J Keith Rush and Hugh Brendan McMahan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qCglMj6A4z}\n}", "github": "", "project": "", "reviewers": "ptMd;xFGx;2HBd;dQeP;WD4o", "pdf_size": 557398, "rating": "5;6;7;7;8", "confidence": "2;4;2;4;5", "soundness": "3;4;3;4;4", "novelty": "3;3;3;3;4", "presentation": "3;4;3;3;4", "wc_summary": "36;78;112;294;63", "wc_strengths": "33;110;52;124;77", "wc_weaknesses": "210;71;23;271;102", "wc_questions": "10;32;107;70;66", "wc_limitations": "4;27;10;43;27", "wc_review": "293;318;304;802;335", "wc_reply_reviewers": "0;14;8;68;15", "wc_reply_authors": "0;0;0;56;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.4, 1.2 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 116.6, 92.03390679526758 ], "wc_strengths_avg": [ 79.2, 34.16079624364748 ], "wc_weaknesses_avg": [ 135.4, 91.48682965323479 ], "wc_questions_avg": [ 57.0, 33.41855771872868 ], "wc_limitations_avg": [ 22.2, 13.847743498491008 ], "wc_review_avg": [ 410.4, 196.3044574124592 ], "wc_reply_reviewers_avg": [ 21.0, 24.099792530227308 ], "wc_reply_authors_avg": [ 11.2, 22.4 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6210344279375829, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8701630122974604304&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";;;google.com;google.com", "author_num": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Temporal Graph Benchmark for Machine Learning on Temporal Graphs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73456", "id": "qG7IkQ7IBO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/066b98e63313162f6562b35962671288-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=qG7IkQ7IBO", "openreview": "https://openreview.net/forum?id=qG7IkQ7IBO", "poster": "/media/PosterPDFs/NeurIPS%202023/73456.png?t=1699982233.5942144", "slides": "https://nips.cc/virtual/2023/poster/73456", "video": "https://nips.cc/virtual/2023/poster/73456", "author_site": "Shenyang Huang, Farimah Poursafaei, Jacob Danovitch, Matthias Fey, Weihua Hu, Emanuele Rossi, Jure Leskovec, Michael Bronstein, Guillaume Rabusseau, Reihaneh Rabbany", "tldr": "", "abstract": "We present the Temporal Graph Benchmark (TGB), a collection of challenging and diverse benchmark datasets for realistic, reproducible, and robust evaluation of machine learning models on temporal graphs. TGB datasets are of large scale, spanning years in duration, incorporate both node and edge-level prediction tasks and cover a diverse set of domains including social, trade, transaction, and transportation networks. For both tasks, we design evaluation protocols based on realistic use-cases. We extensively benchmark each dataset and find that the performance of common models can vary drastically across datasets. In addition, on dynamic node property prediction tasks, we show that simple methods often achieve superior performance compared to existing temporal graph models. We believe that these findings open up opportunities for future research on temporal graphs. Finally, TGB provides an automated machine learning pipeline for reproducible and accessible temporal graph research, including data loading, experiment setup and performance evaluation. TGB will be maintained and updated on a regular basis and welcomes community feedback. TGB datasets, data loaders, example codes, evaluation setup, and leaderboards are publicly available at https://tgb.complexdatalab.com/.", "keywords": "Temporal Graph Learning; Graph Datasets; Benchmark Evaluation", "primary_area": "", "supplementary_material": "", "author": "Shenyang Huang;Farimah Poursafaei;Jacob Danovitch;Matthias Fey;Weihua Hu;Emanuele Rossi;Jure Leskovec;Michael M. Bronstein;Guillaume Rabusseau;Reihaneh Rabbany", "authorids": "~Shenyang_Huang1;~Farimah_Poursafaei1;~Jacob_Danovitch1;~Matthias_Fey2;~Weihua_Hu1;~Emanuele_Rossi1;~Jure_Leskovec1;~Michael_M._Bronstein1;~Guillaume_Rabusseau1;~Reihaneh_Rabbany1", "gender": "F;;M;M;M;;M;M;F;M", "homepage": ";http://jacobdanovitch.me/;http://rusty1s.github.io;http://web.stanford.edu/~weihuahu/;https://www.emanuelerossi.co.uk/;http://cs.stanford.edu/~jure/;http://www.inf.usi.ch/bronstein/;https://www-labs.iro.umontreal.ca/~grabus/;http://www.reirab.com/;https://shenyanghuang.github.io/", "dblp": "277/0215;251/8947;180/9174;42/1232;;l/JureLeskovec;07/2668;143/7327;94/9024;249/2209", "google_scholar": "https://scholar.google.ca/citations?user=gZ7HEsMAAAAJ;;https://scholar.google.de/citations?user=5HaSBN0AAAAJ;wAFMjfkAAAAJ;DHlkBOYAAAAJ;Q_kKkIUAAAAJ;UU3N6-UAAAAJ;https://scholar.google.fr/citations?user=t2i4V4EAAAAJ;https://scholar.google.ca/citations?user=Foh_c-QAAAAJ;ljIXv6kAAAAJ", "orcid": ";;;;;0000-0002-5411-923X;;;;", "linkedin": "farimah-poursafaei-133195167/?originalSubdomain=ca;;;weihua-hu-a8284228/;;leskovec/;mbronstein/;;;", "or_profile": "~Farimah_Poursafaei1;~Jacob_Danovitch1;~Matthias_Fey2;~Weihua_Hu1;~Emanuele_Rossi1;~Jure_Leskovec1;~Michael_M._Bronstein1;~Guillaume_Rabusseau1;~Reihaneh_Rabbany1;~Andy_Huang1", "aff": "Mila, Quebec AI Institute;McGill University;TU Dortmund University;;Imperial College London;Kumo.AI;Twitter;Universit\u00e9 de Montr\u00e9al;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal;McGill University, Mila", "aff_domain": "mila.quebec;mcgill.ca;udo.edu;;ic.ac.uk;kumo.ai;twitter.com;umontreal.ca;mila.umontreal.ca;mcgill.ca", "position": "Postdoc;MS student;PhD student;;PhD student;Chief Scientist;Head of Graph ML;Associate Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nhuang2023temporal,\ntitle={Temporal Graph Benchmark for Machine Learning on Temporal Graphs},\nauthor={Shenyang Huang and Farimah Poursafaei and Jacob Danovitch and Matthias Fey and Weihua Hu and Emanuele Rossi and Jure Leskovec and Michael M. Bronstein and Guillaume Rabusseau and Reihaneh Rabbany},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=qG7IkQ7IBO}\n}", "github": "", "project": "", "reviewers": "WDxN;yfpt;87xo;j6Gx;12NE", "pdf_size": 534957, "rating": "6;6;7;7;9", "confidence": "5;3;4;4;5", "wc_summary_and_contributions": "61;95;29;61;109", "wc_strengths": "144;93;32;61;235", "wc_improvement": "238;174;145;198;72", "wc_limitations": "19;1;1;11;8", "wc_correctness": "1;13;1;1;12", "wc_clarity": "5;7;1;1;4", "wc_relation_to_prior_work": "18;31;1;1;6", "wc_documentation": "1;25;1;1;15", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "488;440;212;336;462", "wc_reply_reviewers": "18;0;11;335;0", "wc_reply_authors": "783;643;357;1081;277", "reply_reviewers": "1;0;1;2;0", "reply_authors": "1;2;1;3;1", "rating_avg": [ 7.0, 1.0954451150103321 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 71.0, 28.22764602300376 ], "wc_strengths_avg": [ 113.0, 71.42828571371429 ], "wc_improvement_avg": [ 165.4, 55.76235289153427 ], "wc_limitations_avg": [ 8.0, 6.752777206453653 ], "wc_correctness_avg": [ 5.6, 5.642694391866353 ], "wc_clarity_avg": [ 3.6, 2.33238075793812 ], "wc_relation_to_prior_work_avg": [ 11.4, 11.60344776348823 ], "wc_documentation_avg": [ 8.6, 9.830564581955606 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 387.6, 101.84223092607506 ], "wc_reply_reviewers_avg": [ 72.8, 131.27893966665025 ], "wc_reply_authors_avg": [ 628.2, 291.924921854918 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.48795003647426666, "gs_citation": 120, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6910742723391791342&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "mila.quebec;mcgill.ca;udo.edu;;ic.ac.uk;kumo.ai;twitter.com;umontreal.ca;mila.umontreal.ca;mcgill.ca", "author_num": 10, "aff_unique_index": "0;1;2;3;4;5;6;7;1", "aff_unique_norm": "Quebec AI Institute;McGill University;Technische Universit\u00e4t Dortmund;Imperial College London;Kumo.AI;Twitter, Inc.;Universit\u00e9 de Montr\u00e9al;University of Montreal", "aff_unique_dep": "AI Institute;;;;;;;Montreal Institute for Learning Algorithms", "aff_unique_url": "https://www.mila.quebec;https://www.mcgill.ca;https://www.tu-dortmund.de;https://www.imperial.ac.uk;https://www.kumo.ai;https://twitter.com;https://www.umontreal.ca;https://www.umontreal.ca", "aff_unique_abbr": "Mila;McGill;TU Dortmund;ICL;Kumo.AI;Twitter;UdeM;UM", "aff_campus_unique_index": "0;2;3", "aff_campus_unique": "Quebec;;Dortmund;Montreal", "aff_country_unique_index": "0;0;1;2;3;3;0;0;0", "aff_country_unique": "Canada;Germany;United Kingdom;United States" }, { "title": "Towards Revealing the Mystery behind Chain of Thought: A Theoretical Perspective", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70369", "id": "qHrADgAdYu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dfc310e81992d2e4cedc09ac47eff13e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qHrADgAdYu", "openreview": "https://openreview.net/forum?id=qHrADgAdYu", "poster": "/media/PosterPDFs/NeurIPS%202023/70369.png?t=1701768154.957121", "slides": "https://nips.cc/virtual/2023/poster/70369", "video": "https://nips.cc/virtual/2023/poster/70369", "author_site": "Guhao Feng, Bohang Zhang, Yuntian Gu, Haotian Ye, Di He, Liwei Wang", "tldr": "", "abstract": "Recent studies have discovered that Chain-of-Thought prompting (CoT) can dramatically improve the performance of Large Language Models (LLMs), particularly when dealing with complex tasks involving mathematics or reasoning. Despite the enormous empirical success, the underlying mechanisms behind CoT and how it unlocks the potential of LLMs remain elusive. In this paper, we take a first step towards theoretically answering these questions. Specifically, we examine the expressivity of LLMs with CoT in solving fundamental mathematical and decision-making problems. By using circuit complexity theory, we first give impossibility results showing that bounded-depth Transformers are unable to directly produce correct answers for basic arithmetic/equation tasks unless the model size grows super-polynomially with respect to the input length. In contrast, we then prove by construction that autoregressive Transformers of constant size suffice to solve both tasks by generating CoT derivations using a commonly used math language format. Moreover, we show LLMs with CoT can handle a general class of decision-making problems known as Dynamic Programming, thus justifying their power in tackling complex real-world tasks. Finally, an extensive set of experiments show that, while Transformers always fail to directly predict the answers, they can consistently learn to generate correct solutions step-by-step given sufficient CoT demonstrations.", "keywords": "Chain-of-Thought Prompting;Large Language Models;Theory;Circuit Complexity;Dynamic Programming", "primary_area": "", "supplementary_material": "", "author": "Guhao Feng;Bohang Zhang;Yuntian Gu;Haotian Ye;Di He;Liwei Wang", "authorids": "~Guhao_Feng1;~Bohang_Zhang1;~Yuntian_Gu1;~Haotian_Ye1;~Di_He1;~Liwei_Wang1", "gender": "M;M;;M;M;M", "homepage": ";https://zbh2047.github.io;https://github.com/guyuntian;https://haotianye.com;https://dihe-pku.github.io/;http://www.liweiwang-pku.com/", "dblp": ";276/0156.html;;284/0539;74/184;", "google_scholar": "wmDqYvUAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;qdyC5XsAAAAJ;VU4chlsAAAAJ;https://scholar.google.co.jp/citations?user=orVoz4IAAAAJ;VZHxoh8AAAAJ", "orcid": ";;;;;", "linkedin": ";zhangbohang;;;;", "or_profile": "~Guhao_Feng1;~Bohang_Zhang1;~Yuntian_Gu1;~Haotian_Ye1;~Di_He1;~Liwei_Wang1", "aff": "Peking University;Peking University;Peking University;Peking University;Microsoft;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;microsoft.com;pku.edu.cn", "position": "Undergrad student;PhD student;Undergrad student;Undergrad student;Senior Researcher;Full Professor", "bibtex": "@inproceedings{\nfeng2023towards,\ntitle={Towards Revealing the Mystery behind Chain of Thought: A Theoretical Perspective},\nauthor={Guhao Feng and Bohang Zhang and Yuntian Gu and Haotian Ye and Di He and Liwei Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qHrADgAdYu}\n}", "github": "", "project": "", "reviewers": "7MZe;P6n5;QoTB;C8fV", "pdf_size": 0, "rating": "7;8;8;9", "confidence": "4;2;3;2", "soundness": "4;4;3;4", "novelty": "3;4;3;4", "presentation": "3;4;3;4", "wc_summary": "62;133;47;72", "wc_strengths": "66;66;61;54", "wc_weaknesses": "31;45;259;9", "wc_questions": "153;47;128;59", "wc_limitations": "45;5;35;7", "wc_review": "357;296;530;201", "wc_reply_reviewers": "0;11;234;15", "wc_reply_authors": "0;14;391;0", "reply_reviewers": "0;1;2;1", "reply_authors": "1;2;3;1", "rating_avg": [ 8.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 78.5, 32.69938837348491 ], "wc_strengths_avg": [ 61.75, 4.9180788932265 ], "wc_weaknesses_avg": [ 86.0, 100.70253224224304 ], "wc_questions_avg": [ 96.75, 44.83511458667191 ], "wc_limitations_avg": [ 23.0, 17.378147196982766 ], "wc_review_avg": [ 346.0, 119.89787320882719 ], "wc_reply_reviewers_avg": [ 65.0, 97.72665961752709 ], "wc_reply_authors_avg": [ 101.25, 167.38484847799097 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8528028654224418, "gs_citation": 247, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3110079060146457333&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;microsoft.com;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Peking University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "Peking U;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Don't be so Monotone: Relaxing Stochastic Line Search in Over-Parameterized Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70368", "id": "qHrZszJSXj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6d0bf1265ea9635fb4f9d56f16d7efb2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qHrZszJSXj", "openreview": "https://openreview.net/forum?id=qHrZszJSXj", "poster": "/media/PosterPDFs/NeurIPS%202023/70368.png?t=1702132519.2306693", "slides": "https://nips.cc/virtual/2023/poster/70368", "video": "https://nips.cc/virtual/2023/poster/70368", "author_site": "Leonardo Galli, Holger Rauhut, Mark Schmidt", "tldr": "", "abstract": "Recent works have shown that line search methods can speed up Stochastic Gradient Descent (SGD) and Adam in modern over-parameterized settings. However, existing line searches may take steps that are smaller than necessary since they require a monotone decrease of the (mini-)batch objective function. We explore nonmonotone line search methods to relax this condition and possibly accept larger step sizes. Despite the lack of a monotonic decrease, we prove the same fast rates of convergence as in the monotone case. Our experiments show that nonmonotone methods improve the speed of convergence and generalization properties of SGD/Adam even beyond the previous monotone line searches. We propose a POlyak NOnmonotone Stochastic (PoNoS) method, obtained by combining a nonmonotone line search with a Polyak initial step size. Furthermore, we develop a new resetting technique that in the majority of the iterations reduces the amount of backtracks to zero while still maintaining a large initial step size. To the best of our knowledge, a first runtime comparison shows that the epoch-wise advantage of line-search-based methods gets reflected in the overall computational time.", "keywords": "line search;nonmonotone;stochastic gradient descent;over-parametrized models;Polyak step size;optimization", "primary_area": "", "supplementary_material": "/attachment/e24734354554312bf376e75e2c41fb189364b020.pdf", "author": "Leonardo Galli;Holger Rauhut;Mark Schmidt", "authorids": "~Leonardo_Galli1;~Holger_Rauhut1;~Mark_Schmidt1", "gender": "M;M;", "homepage": "https://www.mathc.rwth-aachen.de/en/~galli;https://www.mathc.rwth-aachen.de/~rauhut/home/;", "dblp": "216/7831;52/5600;35/2638", "google_scholar": "nXtiLYgAAAAJ;https://scholar.google.de/citations?user=KA2BM_UAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-8045-7101;0000-0003-4750-5092;", "linkedin": "leonardo-galli-0a48b5135;;", "or_profile": "~Leonardo_Galli1;~Holger_Rauhut1;~Mark_Schmidt1", "aff": "Rheinisch Westf\u00e4lische Technische Hochschule Aachen;Rheinisch Westf\u00e4lische Technische Hochschule Aachen;University of British Columbia", "aff_domain": "rwth-aachen.de;rwth-aachen.de;ubc.ca", "position": "Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ngalli2023dont,\ntitle={Don't be so Monotone: Relaxing Stochastic Line Search in Over-Parameterized Models},\nauthor={Leonardo Galli and Holger Rauhut and Mark Schmidt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qHrZszJSXj}\n}", "github": "", "project": "", "reviewers": "qQnN;CWjK;hZgC;TC8b;9zy1;WR5e", "pdf_size": 1210417, "rating": "5;5;5;5;6;8", "confidence": "4;4;2;3;3;3", "soundness": "3;3;2;3;3;3", "novelty": "2;2;3;2;3;3", "presentation": "4;3;2;3;3;3", "wc_summary": "76;56;38;43;122;119", "wc_strengths": "75;35;16;81;236;46", "wc_weaknesses": "280;65;91;56;65;55", "wc_questions": "161;125;49;79;7;160", "wc_limitations": "52;1;2;11;52;1", "wc_review": "644;282;196;270;482;381", "wc_reply_reviewers": "237;61;0;6;17;15", "wc_reply_authors": "429;0;0;0;0;0", "reply_reviewers": "1;1;0;1;1;1", "reply_authors": "2;1;1;1;1;1", "rating_avg": [ 5.666666666666667, 1.1055415967851332 ], "confidence_avg": [ 3.1666666666666665, 0.6871842709362768 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 75.66666666666667, 33.9050963065371 ], "wc_strengths_avg": [ 81.5, 72.60107896351587 ], "wc_weaknesses_avg": [ 102.0, 80.48602363143554 ], "wc_questions_avg": [ 96.83333333333333, 57.12097299202417 ], "wc_limitations_avg": [ 19.833333333333332, 23.003019125516744 ], "wc_review_avg": [ 375.8333333333333, 150.3090335128117 ], "wc_reply_reviewers_avg": [ 56.0, 83.29065573840401 ], "wc_reply_authors_avg": [ 71.5, 159.87886039123495 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.372677996249965 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.14625448482542613, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4836604558639051613&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "rwth-aachen.de;rwth-aachen.de;ubc.ca", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "RWTH Aachen University;University of British Columbia", "aff_unique_dep": ";", "aff_unique_url": "https://www.rwth-aachen.de;https://www.ubc.ca", "aff_unique_abbr": "RWTH;UBC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Aachen;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Germany;Canada" }, { "title": "Sketching Algorithms for Sparse Dictionary Learning: PTAS and Turnstile Streaming", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70367", "id": "qHzEFxtheD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9768645621c2cd6c5b851a06205b92cf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qHzEFxtheD", "openreview": "https://openreview.net/forum?id=qHzEFxtheD", "poster": "/media/PosterPDFs/NeurIPS%202023/70367.png?t=1699497589.4726655", "slides": "https://nips.cc/virtual/2023/poster/70367", "video": "https://nips.cc/virtual/2023/poster/70367", "author_site": "Gregory Dexter, Petros Drineas, David Woodruff, Taisuke Yasuda", "tldr": "", "abstract": "Sketching algorithms have recently proven to be a powerful approach both for designing low-space streaming algorithms as well as fast polynomial time approximation schemes (PTAS). In this work, we develop new techniques to extend the applicability of sketching-based approaches to the sparse dictionary learning and the Euclidean $k$-means clustering problems. In particular, we initiate the study of the challenging setting where the dictionary/clustering assignment for each of the $n$ input points must be output, which has surprisingly received little attention in prior work. On the fast algorithms front, we obtain a new approach for designing PTAS's for the $k$-means clustering problem, which generalizes to the first PTAS for the sparse dictionary learning problem. On the streaming algorithms front, we obtain new upper bounds and lower bounds for dictionary learning and $k$-means clustering. In particular, given a design matrix $\\mathbf A\\in\\mathbb R^{n\\times d}$ in a turnstile stream, we show an $\\tilde O(nr/\\epsilon^2 + dk/\\epsilon)$ space upper bound for $r$-sparse dictionary learning of size $k$, an $\\tilde O(n/\\epsilon^2 + dk/\\epsilon)$ space upper bound for $k$-means clustering, as well as an $\\tilde O(n)$ space upper bound for $k$-means clustering on random order row insertion streams with a natural \"bounded sensitivity\" assumption. On the lower bounds side, we obtain a general $\\tilde\\Omega(n/\\epsilon + dk/\\epsilon)$ lower bound for $k$-means clustering, as well as an $\\tilde\\Omega(n/\\epsilon^2)$ lower bound for algorithms which can estimate the cost of a single fixed set of candidate centers.", "keywords": "dictionary learning;k means clustering;sketching;ptas;streaming", "primary_area": "", "supplementary_material": "/attachment/c14a6cabe90bff0eb973d6713927ecd230be405c.pdf", "author": "Gregory Dexter;Petros Drineas;David Woodruff;Taisuke Yasuda", "authorids": "~Gregory_Dexter1;~Petros_Drineas1;~David_Woodruff1;~Taisuke_Yasuda1", "gender": ";;M;M", "homepage": "https://www.gregorydexter.com/;https://www.cs.purdue.edu/homes/pdrineas/;http://www.cs.cmu.edu/~dwoodruf/;https://taisukeyasuda.github.io/", "dblp": "252/2283;67/1567;w/DPWoodruff;177/9741-2", "google_scholar": "nEIGQ00AAAAJ;https://scholar.google.co.uk/citations?user=Yw2PquQAAAAJ;https://scholar.google.com.tw/citations?user=0G2t-6sAAAAJ;c62WqiEAAAAJ", "orcid": ";;;", "linkedin": "gregorydexter1;;;taisukeyasuda/", "or_profile": "~Gregory_Dexter1;~Petros_Drineas1;~David_Woodruff1;~Taisuke_Yasuda1", "aff": "Purdue University;Purdue University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University", "aff_domain": "purdue.edu;purdue.edu;cmu.edu;cs.cmu.edu", "position": "PhD student;Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\ndexter2023sketching,\ntitle={Sketching Algorithms for Sparse Dictionary Learning: {PTAS} and Turnstile Streaming},\nauthor={Gregory Dexter and Petros Drineas and David Woodruff and Taisuke Yasuda},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qHzEFxtheD}\n}", "github": "", "project": "", "reviewers": "ZwUH;NUiP;MGS2;rhYv", "pdf_size": 634927, "rating": "5;6;6;7", "confidence": "4;4;4;3", "soundness": "3;4;3;4", "novelty": "3;3;4;3", "presentation": "3;3;2;4", "wc_summary": "275;161;159;217", "wc_strengths": "71;39;123;128", "wc_weaknesses": "93;68;201;26", "wc_questions": "49;70;434;143", "wc_limitations": "1;10;8;7", "wc_review": "489;348;925;521", "wc_reply_reviewers": "0;0;173;0", "wc_reply_authors": "0;0;36;0", "reply_reviewers": "0;0;3;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 203.0, 47.644516998286385 ], "wc_strengths_avg": [ 90.25, 37.06329046374593 ], "wc_weaknesses_avg": [ 97.0, 64.64131805586888 ], "wc_questions_avg": [ 174.0, 154.11197227989783 ], "wc_limitations_avg": [ 6.5, 3.3541019662496847 ], "wc_review_avg": [ 570.75, 214.6326804100438 ], "wc_reply_reviewers_avg": [ 43.25, 74.91119742735394 ], "wc_reply_authors_avg": [ 9.0, 15.588457268119896 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14953791734587667304&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "purdue.edu;purdue.edu;cmu.edu;cs.cmu.edu", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Purdue University;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://www.cmu.edu", "aff_unique_abbr": "Purdue;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Is Heterogeneity Notorious? Taming Heterogeneity to Handle Test-Time Shift in Federated Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70366", "id": "qJJmu4qsLO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/565f995643da6329cec701f26f8579f5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qJJmu4qsLO", "openreview": "https://openreview.net/forum?id=qJJmu4qsLO", "poster": "/media/PosterPDFs/NeurIPS%202023/70366.png?t=1701389940.2285948", "slides": "https://nips.cc/virtual/2023/poster/70366", "video": "https://nips.cc/virtual/2023/poster/70366", "author_site": "Yue Tan, Chen Chen, Weiming Zhuang, Xin Dong, Lingjuan Lyu, Guodong Long", "tldr": "", "abstract": "Federated learning (FL) is an effective machine learning paradigm where multiple clients can train models based on heterogeneous data in a decentralized manner without accessing their private data. However, existing FL systems undergo performance deterioration due to feature-level test-time shifts, which are well investigated in centralized settings but rarely studied in FL. The common non-IID issue in FL usually refers to inter-client heterogeneity during training phase, while the test-time shift refers to the intra-client heterogeneity during test phase. Although the former is always deemed to be notorious for FL, there is still a wealth of useful information delivered by heterogeneous data sources, which may potentially help alleviate the latter issue. To explore the possibility of using inter-client heterogeneity in handling intra-client heterogeneity, we firstly propose a contrastive learning-based FL framework, namely FedICON, to capture invariant knowledge among heterogeneous clients and consistently tune the model to adapt to test data. In FedICON, each client performs sample-wise supervised contrastive learning during the local training phase, which enhances sample-wise invariance encoding ability. Through global aggregation, the invariance extraction ability can be mutually boosted among inter-client heterogeneity. During the test phase, our test-time adaptation procedure leverages unsupervised contrastive learning to guide the model to smoothly generalize to test data under intra-client heterogeneity. Extensive experiments validate the effectiveness of the proposed FedICON in taming heterogeneity to handle test-time shift problems.", "keywords": "Federated Learning;Test-Time Shift;Contrastive Learning", "primary_area": "", "supplementary_material": "/attachment/c67ac2b1440f6b0b8c86000431511085a36b2a80.pdf", "author": "Yue Tan;Chen Chen;Weiming Zhuang;Xin Dong;Lingjuan Lyu;Guodong Long", "authorids": "~Yue_Tan2;~Chen_Chen20;~Weiming_Zhuang1;~Xin_Dong1;~Lingjuan_Lyu1;~Guodong_Long2", "gender": ";M;;M;F;M", "homepage": "https://yuetan031.github.io/;https://cc233.github.io/;https://weiming.me/;http://www.simonxin.com/;https://sites.google.com/view/lingjuan-lyu;https://www.uts.edu.au/staff/guodong.long", "dblp": "28/4386;65/4423-43;274/0724;25/7257-9;178/9876;34/10089", "google_scholar": "WOtb3AcAAAAJ;;lLuLAzEAAAAJ;O8nBN64AAAAJ;;https://scholar.google.com.au/citations?user=Pl8m7hMAAAAJ", "orcid": ";0000-0001-7359-8515;;;;0000-0003-3740-9515", "linkedin": ";;;;;", "or_profile": "~Yue_Tan2;~Chen_Chen20;~Weiming_Zhuang1;~Xin_Dong1;~Lingjuan_Lyu1;~Guodong_Long2", "aff": "University of Technology Sydney;Zhejiang University;Sony Research;Harvard University;Sony;University of Technology Sydney", "aff_domain": "student.uts.edu.au;zju.edu.cn;sony.com;g.harvard.edu;sony.com;uts.edu.au", "position": "PhD student;PhD student;Researcher;PhD student;scientist;Associate Professor", "bibtex": "@inproceedings{\ntan2023is,\ntitle={Is Heterogeneity Notorious? Taming Heterogeneity to Handle Test-Time Shift in Federated Learning},\nauthor={Yue Tan and Chen Chen and Weiming Zhuang and Xin Dong and Lingjuan Lyu and Guodong Long},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qJJmu4qsLO}\n}", "github": "", "project": "", "reviewers": "v7Ae;pJGu;CniS;9n3S", "pdf_size": 1080782, "rating": "4;4;8;8", "confidence": "4;4;4;5", "soundness": "2;2;4;4", "novelty": "2;2;4;4", "presentation": "3;3;4;3", "wc_summary": "69;49;60;101", "wc_strengths": "54;48;102;144", "wc_weaknesses": "432;149;60;190", "wc_questions": "44;106;55;26", "wc_limitations": "1;5;7;9", "wc_review": "600;357;284;470", "wc_reply_reviewers": "508;601;44;16", "wc_reply_authors": "203;685;0;0", "reply_reviewers": "2;2;1;1", "reply_authors": "2;4;1;1", "rating_avg": [ 6.0, 2.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 69.75, 19.382659776202026 ], "wc_strengths_avg": [ 87.0, 39.0 ], "wc_weaknesses_avg": [ 207.75, 137.73593394608395 ], "wc_questions_avg": [ 57.75, 29.71847068743612 ], "wc_limitations_avg": [ 5.5, 2.958039891549808 ], "wc_review_avg": [ 427.75, 119.5039225297647 ], "wc_reply_reviewers_avg": [ 292.25, 264.4885394492548 ], "wc_reply_authors_avg": [ 222.0, 279.86514609718733 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7537118860595675485&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "student.uts.edu.au;zju.edu.cn;sony.com;g.harvard.edu;sony.com;uts.edu.au", "author_num": 6, "aff_unique_index": "0;1;2;3;4;0", "aff_unique_norm": "University of Technology Sydney;Zhejiang University;Sony;Harvard University;Sony Corporation", "aff_unique_dep": ";;Research;;", "aff_unique_url": "https://www.uts.edu.au;https://www.zju.edu.cn;https://www.sony.com;https://www.harvard.edu;https://www.sony.com", "aff_unique_abbr": "UTS;ZJU;Sony;Harvard;Sony", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;3;2;0", "aff_country_unique": "Australia;China;Japan;United States" }, { "title": "VaRT: Variational Regression Trees", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70365", "id": "qJRlz3SucN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8eff4196f50c43eda7bcf0f0cf87a0d0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qJRlz3SucN", "openreview": "https://openreview.net/forum?id=qJRlz3SucN", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70365", "video": "https://nips.cc/virtual/2023/poster/70365", "tldr": "", "abstract": "Decision trees are a well-established tool in machine learning for classification and regression tasks. In this paper, we introduce a novel non-parametric Bayesian model that uses variational inference to approximate a posterior distribution over the space of stochastic decision trees. We evaluate the model's performance on 18 datasets and demonstrate its competitiveness with other state-of-the-art methods in regression tasks. We also explore its application to causal inference problems. We provide a fully vectorized implementation of our algorithm in PyTorch.", "keywords": "Probabilistic Machine Learning;Variational Inference;Bayesian Inference;Bayesian Nonparametrics", "primary_area": "", "supplementary_material": "/attachment/911cd426477928d06238a900fed1a2a64ec71dcc.zip", "author": "Sebastian Salazar", "authorids": "~Sebastian_Salazar1", "gender": "M", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "sebastian-salazar-866782181/", "or_profile": "~Sebastian_Salazar1", "aff": "Columbia University", "aff_domain": "columbia.edu", "position": "PhD student", "bibtex": "@inproceedings{\nsalazar2023vart,\ntitle={Va{RT}: Variational Regression Trees},\nauthor={Sebastian Salazar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qJRlz3SucN}\n}", "github": "", "project": "", "reviewers": "MwKc;2ow5;AUTX;Ya4w", "pdf_size": 780711, "rating": "5;6;7;8", "confidence": "4;4;3;5", "soundness": "3;3;3;4", "novelty": "3;2;3;4", "presentation": "2;2;3;4", "wc_summary": "67;29;45;106", "wc_strengths": "35;26;24;65", "wc_weaknesses": "128;112;132;189", "wc_questions": "211;63;17;185", "wc_limitations": "18;9;10;6", "wc_review": "459;239;228;551", "wc_reply_reviewers": "97;191;38;28", "wc_reply_authors": "101;448;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 61.75, 28.89095879336648 ], "wc_strengths_avg": [ 37.5, 16.408839081421938 ], "wc_weaknesses_avg": [ 140.25, 29.123658767400773 ], "wc_questions_avg": [ 119.0, 81.17881497040962 ], "wc_limitations_avg": [ 10.75, 4.437059837324712 ], "wc_review_avg": [ 369.25, 139.64665230502305 ], "wc_reply_reviewers_avg": [ 88.5, 64.7861867993479 ], "wc_reply_authors_avg": [ 137.25, 184.0888032988427 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.3162277660168379, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1005256193810302383&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "columbia.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "TriRE: A Multi-Mechanism Learning Paradigm for Continual Knowledge Retention and Promotion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70364", "id": "qL3zPoWJda", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e991e5587c1daa49bbf9a818b3f02f9a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qL3zPoWJda", "openreview": "https://openreview.net/forum?id=qL3zPoWJda", "poster": "/media/PosterPDFs/NeurIPS%202023/70364.png?t=1702446255.1189976", "slides": "https://nips.cc/virtual/2023/poster/70364", "video": "https://nips.cc/virtual/2023/poster/70364", "author_site": "Preetha Vijayan, Prashant Bhat, Bahram Zonooz, Elahe Arani", "tldr": "", "abstract": "Continual learning (CL) has remained a persistent challenge for deep neural networks due to catastrophic forgetting (CF) of previously learned tasks. Several techniques such as weight regularization, experience rehearsal, and parameter isolation have been proposed to alleviate CF. Despite their relative success, these research directions have predominantly remained orthogonal and suffer from several shortcomings, while missing out on the advantages of competing strategies. On the contrary, the brain continually learns, accommodates, and transfers knowledge across tasks by simultaneously leveraging several neurophysiological processes, including neurogenesis, active forgetting, neuromodulation, metaplasticity, experience rehearsal, and context-dependent gating, rarely resulting in CF. Inspired by how the brain exploits multiple mechanisms concurrently, we propose TriRE, a novel CL paradigm that encompasses retaining the most prominent neurons for each task, revising and solidifying the extracted knowledge of current and past tasks, and actively promoting less active neurons for subsequent tasks through rewinding and relearning. Across CL settings, TriRE significantly reduces task interference and surpasses different CL approaches considered in isolation.", "keywords": "Continual Learning;Catastrophic Forgetting;Experience Replay;Lifelong Learning;Bio-Inspired;Active Forgetting;Scalable Neurogenesis", "primary_area": "", "supplementary_material": "/attachment/df2001a9397417849c90abc4dcca70abdb16ce8e.zip", "author": "Preetha Vijayan;Prashant Shivaram Bhat;Bahram Zonooz;Elahe Arani", "authorids": "~Preetha_Vijayan1;~Prashant_Shivaram_Bhat1;~Bahram_Zonooz1;~Elahe_Arani1", "gender": "F;M;M;F", "homepage": "https://www.linkedin.com/in/preetha-vijayan/;;https://sites.google.com/view/bahramzonooz;https://sites.google.com/view/elahe-arani", "dblp": ";340/2336;250/9573;", "google_scholar": ";https://scholar.google.com/citations?hl=en;;e_I_v6cAAAAJ", "orcid": ";;;0000-0002-0952-7007", "linkedin": ";prashant-s-bhat/;;elahe-arani-630870b2/", "or_profile": "~Preetha_Vijayan1;~Prashant_Shivaram_Bhat1;~Bahram_Zonooz1;~Elahe_Arani1", "aff": "NavInfo Europe;NavInfo Europe B.V;Eindhoven University of Technology;Advanced Research Lab, NavInfo Europe", "aff_domain": "navinfo.eu;navinfo.eu;tue.nl;navinfo.eu", "position": "Researcher;Researcher;Assistant Professor;Sr. AI Manager & Sr. Research Scientist", "bibtex": "@inproceedings{\nvijayan2023trire,\ntitle={Tri{RE}: A Multi-Mechanism Learning Paradigm for Continual Knowledge Retention and Promotion},\nauthor={Preetha Vijayan and Prashant Shivaram Bhat and Bahram Zonooz and Elahe Arani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qL3zPoWJda}\n}", "github": "", "project": "", "reviewers": "6DDf;SoXi;tURo;3Tuq", "pdf_size": 626392, "rating": "4;5;7;8", "confidence": "3;4;4;4", "soundness": "2;2;3;4", "novelty": "2;2;2;3", "presentation": "3;2;3;4", "wc_summary": "46;144;121;81", "wc_strengths": "23;47;59;68", "wc_weaknesses": "182;377;315;123", "wc_questions": "2;9;70;128", "wc_limitations": "2;14;114;32", "wc_review": "255;591;679;432", "wc_reply_reviewers": "78;81;80;91", "wc_reply_authors": "1147;113;384;68", "reply_reviewers": "1;1;1;1", "reply_authors": "6;3;2;2", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 98.0, 37.54330832518626 ], "wc_strengths_avg": [ 49.25, 16.887495373796554 ], "wc_weaknesses_avg": [ 249.25, 101.3715319998667 ], "wc_questions_avg": [ 52.25, 51.109563684304725 ], "wc_limitations_avg": [ 40.5, 43.75785643744447 ], "wc_review_avg": [ 489.25, 161.63906551326014 ], "wc_reply_reviewers_avg": [ 82.5, 5.024937810560445 ], "wc_reply_authors_avg": [ 428.0, 432.35459983675435 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7302967433402215, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=587511416614489769&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "navinfo.eu;navinfo.eu;tue.nl;navinfo.eu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "NavInfo;NavInfo Europe;Eindhoven University of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.navinfo.com;https://www.navinfo.com;https://www.tue.nl", "aff_unique_abbr": "NavInfo;NavInfo;TU/e", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Unknown;Netherlands" }, { "id": "qO9VagA7kF", "title": "From Comprehensive Study to Low-Rank Compensation: Exploring Post-Training Quantization in LLMs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Post-training quantization (PTQ) has recently been demonstrated as a viable method to reduce memory consumption and compute cost for large language models. However, a comprehensive study on the effect of different quantization schemes, model families, and quantization bit precision has been lacking. In this work, we provide an extensive analysis of these components. We examine the impact of PTQ on weight-only, activation-only, and weight-and-activation quantization using various methods such as round-to-nearest (RTN), GPTQ, ZeroQuant, and their variants, applied to two different model families (OPT and BLOOM) with sizes ranging from 125M to 176B. We contribute by: (1) conducting a sensitivity analysis, revealing that activation quantization is generally more sensitive to weight quantization, and smaller models typically perform better than larger models in terms of activation quantization; (2) evaluating and comparing existing PTQ methods to optimize model size reduction and minimize accuracy impact, discovering that current methods can hardly achieve original model quality for quantization with either INT4-weight or INT4-weight-and-INT8-activation; (3) based on these insights, we optimize existing methods and introduce a technique called Low Rank Compensation (LoRC), which uses low-rank matrix to enhance model quality recovery with a negligible increase in model size", "keywords": "Understanding Quantization;Int8;Empirical Investigation;Post-Training Quantization", "primary_area": "", "supplementary_material": "/attachment/76acdaf732725f20cc12da08e0e5d7221429ac09.pdf", "author": "Zhewei Yao;Xiaoxia Wu;Cheng Li;stephen youn;Yuxiong He", "authorids": "~Zhewei_Yao1;~Xiaoxia_Wu1;~Cheng_Li10;sanghonline@gmail.com;~Yuxiong_He1", "gender": "M;F;F;;", "homepage": ";https://sites.google.com/view/xwu/home;https://chengli.netlify.app/;;", "dblp": "195/2887;63/1016;;;https://dblp.org/pers/hd/h/He:Yuxiong", "google_scholar": "gpSeMjYAAAAJ;Ry0Bdt8AAAAJ;da9Vl6QAAAAJ;;SB3_eb0AAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Zhewei_Yao1;~Xiaoxia_Wu1;~Cheng_Li10;sanghonline@gmail.com;~Yuxiong_He1", "aff": "Microsoft;Microsoft;Microsoft;;Microsoft", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;;microsoft.com", "position": "Researcher;Researcher;Researcher;;Researcher", "bibtex": "@misc{\nyao2023from,\ntitle={From Comprehensive Study to Low-Rank Compensation: Exploring Post-Training Quantization in {LLM}s},\nauthor={Zhewei Yao and Xiaoxia Wu and Cheng Li and stephen youn and Yuxiong He},\nyear={2023},\nurl={https://openreview.net/forum?id=qO9VagA7kF}\n}", "github": "", "project": "", "reviewers": "LU91;iFFk;wnse;x1uC", "site": "https://openreview.net/forum?id=qO9VagA7kF", "pdf_size": 369917, "rating": "4;5;6;7", "confidence": "3;2;5;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "29;38;48;74", "wc_strengths": "8;20;55;80", "wc_weaknesses": "88;64;91;29", "wc_questions": "12;7;92;10", "wc_limitations": "1;1;11;2", "wc_review": "138;130;297;195", "wc_reply_reviewers": "0;29;14;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 47.25, 16.843025262701474 ], "wc_strengths_avg": [ 40.75, 28.49012986983387 ], "wc_weaknesses_avg": [ 68.0, 24.829418035870273 ], "wc_questions_avg": [ 30.25, 35.695763053897586 ], "wc_limitations_avg": [ 3.75, 4.205650960315181 ], "wc_review_avg": [ 190.0, 66.66708333203125 ], "wc_reply_reviewers_avg": [ 10.75, 11.986972094736853 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6000000000000001, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6C_-R2taNWcJ:scholar.google.com/&scioq=From+Comprehensive+Study+to+Low-Rank+Compensation:+Exploring+Post-Training+Quantization+in+LLMs&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Read and Reap the Rewards: Learning to Play Atari with the Help of Instruction Manuals", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70363", "id": "qP0Drg2HuH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/034d7bfeace2a9a258648b16fc626298-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qP0Drg2HuH", "openreview": "https://openreview.net/forum?id=qP0Drg2HuH", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70363", "video": "https://nips.cc/virtual/2023/poster/70363", "author_site": "Yue Wu, Yewen Fan, Paul Pu Liang, Amos Azaria, Yuanzhi Li, Tom Mitchell", "tldr": "", "abstract": "High sample complexity has long been a challenge for RL. On the other hand, humans learn to perform tasks not only from interaction or demonstrations, but also by reading unstructured text documents, e.g., instruction manuals. Instruction manuals and wiki pages are among the most abundant data that could inform agents of valuable features and policies or task-specific environmental dynamics and reward structures. Therefore, we hypothesize that the ability to utilize human-written instruction manuals to assist learning policies for specific tasks should lead to a more efficient and better-performing agent. We propose the Read and Reward framework. Read and Reward speeds up RL algorithms on Atari games by reading manuals released by the Atari game developers. Our framework consists of a QA Extraction module that extracts and summarizes relevant information from the manual and a Reasoning module that evaluates object-agent interactions based on information from the manual. An auxiliary reward is then provided to a standard A2C RL agent, when interaction is detected. Experimentally, various RL algorithms obtain significant improvement in performance and training speed when assisted by our design. Code at github.com/Holmeswww/RnR", "keywords": "Games;Instruction Manual;Atari Games;Large Language Models;Language Models;Zero-shot;In-context prompting", "primary_area": "", "supplementary_material": "", "author": "Yue Wu;Yewen Fan;Paul Pu Liang;Amos Azaria;Yuanzhi Li;Tom Mitchell", "authorids": "~Yue_Wu17;~Yewen_Fan1;~Paul_Pu_Liang1;~Amos_Azaria1;~Yuanzhi_Li1;~Tom_Mitchell2", "gender": "M;M;M;Not Specified;M;M", "homepage": "https://www.yuewu.ml;https://tofuwen.github.io/;https://pliang279.github.io/;http://azariaa.com;;http://www.cs.cmu.edu/~tom", "dblp": "41/5979;200/1168;207/9749;18/9923;73/3628;", "google_scholar": "LcrSIhgAAAAJ;Q9_yaekAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=sdfKs_sAAAAJ;;", "orcid": ";;;0000-0002-5057-1309;;", "linkedin": ";yewen-fan;;;;", "or_profile": "~Yue_Wu17;~Yewen_Fan1;~Paul_Pu_Liang1;~Amos_Azaria1;~Yuanzhi_Li1;~Tom_Mitchell2", "aff": "Microsoft Research;Carnegie Mellon University;Carnegie Mellon University;Ariel University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University", "aff_domain": "research.microsoft.com;cmu.edu;cs.cmu.edu;ariel.ac.il;andrew.cmu.edu;cs.cmu.edu", "position": "Intern;PhD student;PhD student;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwu2023read,\ntitle={Read and Reap the Rewards: Learning to Play Atari with the Help of Instruction Manuals},\nauthor={Yue Wu and Yewen Fan and Paul Pu Liang and Amos Azaria and Yuanzhi Li and Tom Mitchell},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qP0Drg2HuH}\n}", "github": "", "project": "", "reviewers": "LzHL;7mzB;qAA2;3Ei6", "pdf_size": 3967228, "rating": "5;6;6;6", "confidence": "4;2;4;4", "soundness": "2;3;2;1", "novelty": "2;2;2;2", "presentation": "2;3;3;2", "wc_summary": "77;101;63;161", "wc_strengths": "51;120;138;49", "wc_weaknesses": "105;101;367;74", "wc_questions": "57;56;63;22", "wc_limitations": "11;1;114;8", "wc_review": "301;379;745;314", "wc_reply_reviewers": "47;11;29;29", "wc_reply_authors": "30;0;0;65", "reply_reviewers": "1;1;1;2", "reply_authors": "2;1;1;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 100.5, 37.47999466382032 ], "wc_strengths_avg": [ 89.5, 40.0156219494337 ], "wc_weaknesses_avg": [ 161.75, 119.09948572517011 ], "wc_questions_avg": [ 49.5, 16.101242188104617 ], "wc_limitations_avg": [ 33.5, 46.61812952060604 ], "wc_review_avg": [ 434.75, 181.54389964964398 ], "wc_reply_reviewers_avg": [ 29.0, 12.727922061357855 ], "wc_reply_authors_avg": [ 23.75, 26.78035660703569 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8634829825588138333&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "research.microsoft.com;cmu.edu;cs.cmu.edu;ariel.ac.il;andrew.cmu.edu;cs.cmu.edu", "author_num": 6, "aff_unique_index": "0;1;1;2;1;1", "aff_unique_norm": "Microsoft;Carnegie Mellon University;Ariel University", "aff_unique_dep": "Microsoft Research;;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.cmu.edu;https://www.ariel.ac.il", "aff_unique_abbr": "MSR;CMU;Ariel U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;Israel" }, { "title": "Monitor-Guided Decoding of Code LMs with Static Analysis of Repository Context", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70362", "id": "qPUbKxKvXq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/662b1774ba8845fc1fa3d1fc0177ceeb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qPUbKxKvXq", "openreview": "https://openreview.net/forum?id=qPUbKxKvXq", "poster": "/media/PosterPDFs/NeurIPS%202023/70362.png?t=1702168775.9405863", "slides": "https://nips.cc/virtual/2023/poster/70362", "video": "https://nips.cc/virtual/2023/poster/70362", "author_site": "Lakshya A Agrawal, Aditya Kanade, Navin Goyal, Shuvendu Lahiri, Sriram Rajamani", "tldr": "", "abstract": "Language models of code (LMs) work well when the surrounding code provides sufficient context. This is not true when it becomes necessary to use types, functionality or APIs defined elsewhere in the repository or a linked library, especially those not seen during training. LMs suffer from limited awareness of such global context and end up hallucinating.\n\nIntegrated development environments (IDEs) assist developers in understanding repository context using static analysis. We extend this assistance, enjoyed by developers, to LMs. We propose monitor-guided decoding (MGD) where a monitor uses static analysis to guide the decoding. We construct a repository-level dataset PragmaticCode for method-completion in Java and evaluate MGD on it. On models of varying parameter scale, by monitoring for type-consistent object dereferences, MGD consistently improves compilation rates and agreement with ground truth. Further, LMs with fewer parameters, when augmented with MGD, can outperform larger LMs. With MGD, SantaCoder-1.1B achieves better compilation rate and next-identifier match than the much larger text-davinci-003 model.\n\nWe also conduct a generalizability study to evaluate the ability of MGD to generalize to multiple programming languages (Java, C# and Rust), coding scenarios (e.g., correct number of arguments to method calls), and to enforce richer semantic constraints (e.g., stateful API protocols). Our data and implementation are available at https://github.com/microsoft/monitors4codegen.", "keywords": "Language models;code generation;correctness;program analysis", "primary_area": "", "supplementary_material": "/attachment/ae6dfd06faaa4121f8e943fc6d4b2bf8d8675724.zip", "author": "Lakshya Agrawal;Aditya Kanade;Navin Goyal;Shuvendu K Lahiri;Sriram Rajamani", "authorids": "t-lakagrawal@microsoft.com;~Aditya_Kanade2;~Navin_Goyal1;~Shuvendu_K_Lahiri1;sriram@microsoft.com", "gender": ";;;M;", "homepage": ";https://www.microsoft.com/en-us/research/people/kanadeaditya/;;https://www.microsoft.com/en-us/research/people/shuvendu/;", "dblp": ";61/2636-1;20/6275;32/2903.html;", "google_scholar": ";FCCMbWYAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;", "linkedin": ";;;shuvendu-lahiri-9a35151/;", "or_profile": "t-lakagrawal@microsoft.com;~Aditya_Kanade2;~Navin_Goyal1;~Shuvendu_K_Lahiri1;sriram@microsoft.com", "aff": ";Microsoft;Microsoft;Microsoft Research;", "aff_domain": ";microsoft.com;microsoft.com;research.microsoft.com;", "position": ";Principal Researcher;Researcher;Principal Researcher;", "bibtex": "@inproceedings{\nagrawal2023monitorguided,\ntitle={Monitor-Guided Decoding of Code {LM}s with Static Analysis of Repository Context},\nauthor={Lakshya Agrawal and Aditya Kanade and Navin Goyal and Shuvendu K Lahiri and Sriram Rajamani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qPUbKxKvXq}\n}", "github": "", "project": "", "reviewers": "hw3j;fL4k;LEhe;27c5", "pdf_size": 2623480, "rating": "5;6;6;7", "confidence": "4;5;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;2;3", "wc_summary": "106;72;180;46", "wc_strengths": "63;52;108;95", "wc_weaknesses": "194;279;325;105", "wc_questions": "17;191;352;72", "wc_limitations": "13;9;55;69", "wc_review": "393;603;1020;387", "wc_reply_reviewers": "233;140;175;25", "wc_reply_authors": "557;148;54;21", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 101.0, 50.32891812864648 ], "wc_strengths_avg": [ 79.5, 22.808989455914087 ], "wc_weaknesses_avg": [ 225.75, 84.07548691503368 ], "wc_questions_avg": [ 158.0, 128.45427201926762 ], "wc_limitations_avg": [ 36.5, 26.014419078657127 ], "wc_review_avg": [ 600.75, 257.20845145523504 ], "wc_reply_reviewers_avg": [ 143.25, 75.92224641039014 ], "wc_reply_authors_avg": [ 195.0, 214.1319686548461 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9872060343364786537&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";microsoft.com;microsoft.com;research.microsoft.com;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Statistical Insights into HSIC in High Dimensions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70361", "id": "qPyvuFT0U9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3cfc102893d47c46295cb437949dccb5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qPyvuFT0U9", "openreview": "https://openreview.net/forum?id=qPyvuFT0U9", "poster": "/media/PosterPDFs/NeurIPS%202023/70361.png?t=1698070956.9795034", "slides": "https://nips.cc/virtual/2023/poster/70361", "video": "https://nips.cc/virtual/2023/poster/70361", "author_site": "Tao Zhang, Yaowu Zhang, Tingyou Zhou", "tldr": "", "abstract": "Measuring the nonlinear dependence between random vectors and testing for their statistical independence is a fundamental problem in statistics. One of the most popular dependence measures is the Hilbert-Schmidt independence criterion (HSIC), which has attracted increasing attention in recent years. However, most existing works have focused on either fixed or very high-dimensional covariates. In this work, we bridge the gap between these two scenarios and provide statistical insights into the performance of HSIC when the dimensions grow at different rates. We first show that, under the null hypothesis, the rescaled HSIC converges in distribution to a standard normal distribution. Then we provide a general condition for the HSIC based tests to have nontrivial power in high dimensions. By decomposing this condition, we illustrate how the ability of HSIC to measure nonlinear dependence changes with increasing dimensions. Moreover, we demonstrate that, depending on the sample size, the covariate dimensions and the dependence structures within covariates, the HSIC can capture different types of associations between random vectors. We also conduct extensive numerical studies to validate our theoretical results.", "keywords": "High dimensionality; Independence test; Kernel method; Nonlinear dependency.", "primary_area": "", "supplementary_material": "/attachment/cbe765a775ed4865ad7d32bc8f9c48d76017a831.pdf", "author": "Tao Zhang;Yaowu Zhang;Tingyou Zhou", "authorids": "~Tao_Zhang2;~Yaowu_Zhang1;~Tingyou_Zhou1", "gender": "M;;F", "homepage": ";;https://ds.zufe.edu.cn/info/1092/4616.htm", "dblp": ";;197/8093", "google_scholar": ";;", "orcid": "0009-0003-6672-452X;;", "linkedin": ";;", "or_profile": "~Tao_Zhang2;~Yaowu_Zhang1;~Tingyou_Zhou1", "aff": "Shanghai University of Finance and Economics;;Zhejiang University of Finance and Economics", "aff_domain": "sufe.edu;;zufe.edu.cn", "position": "PhD student;;Associate Professor", "bibtex": "@inproceedings{\nzhang2023statistical,\ntitle={Statistical Insights into {HSIC} in High Dimensions},\nauthor={Tao Zhang and Yaowu Zhang and Tingyou Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qPyvuFT0U9}\n}", "github": "", "project": "", "reviewers": "V7Sb;WrHD;56Ec;TY3X;Uk8i", "pdf_size": 357381, "rating": "4;6;7;8;8", "confidence": "3;2;3;4;3", "soundness": "2;3;3;4;3", "novelty": "2;2;3;4;3", "presentation": "2;2;3;4;4", "wc_summary": "45;69;51;118;81", "wc_strengths": "89;24;45;68;94", "wc_weaknesses": "34;111;163;1;89", "wc_questions": "60;55;121;31;14", "wc_limitations": "23;6;26;14;1", "wc_review": "251;265;406;232;279", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 1.4966629547095764 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 72.8, 25.97229292919668 ], "wc_strengths_avg": [ 64.0, 26.465071320516028 ], "wc_weaknesses_avg": [ 79.6, 57.073987069417186 ], "wc_questions_avg": [ 56.2, 36.41647978594307 ], "wc_limitations_avg": [ 14.0, 9.57078889120432 ], "wc_review_avg": [ 286.6, 61.691490499095586 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.42257712736425823, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13828816561218593776&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "sufe.edu;;zufe.edu.cn", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Shanghai University of Finance and Economics;Zhejiang University of Finance and Economics", "aff_unique_dep": ";", "aff_unique_url": "http://www.sufe.edu.cn;http://www.zufe.edu.cn", "aff_unique_abbr": "SUFE;ZUFE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Complex Query Answering on Eventuality Knowledge Graph with Implicit Logical Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70360", "id": "qQnO1HLQHe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6174c67b136621f3f2e4a6b1d3286f6b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qQnO1HLQHe", "openreview": "https://openreview.net/forum?id=qQnO1HLQHe", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70360", "video": "https://nips.cc/virtual/2023/poster/70360", "author_site": "Jiaxin Bai, Xin Liu, Weiqi Wang, Chen Luo, Yangqiu Song", "tldr": "", "abstract": "Querying knowledge graphs (KGs) using deep learning approaches can naturally leverage the reasoning and generalization ability to learn to infer better answers. Traditional neural complex query answering (CQA) approaches mostly work on entity-centric KGs. However, in the real world, we also need to make logical inferences about events, states, and activities (i.e., eventualities or situations) to push learning systems from System I to System II, as proposed by Yoshua Bengio. Querying logically from an EVentuality-centric KG (EVKG) can naturally provide references to such kind of intuitive and logical inference. Thus, in this paper, we propose a new framework to leverage neural methods to answer complex logical queries based on an EVKG, which can satisfy not only traditional first-order logic constraints but also implicit logical constraints over eventualities concerning their occurrences and orders. For instance, if we know that *Food is bad* happens before *PersonX adds soy sauce*, then *PersonX adds soy sauce* is unlikely to be the cause of *Food is bad* due to implicit temporal constraint. To facilitate consistent reasoning on EVKGs, we propose Complex Eventuality Query Answering (CEQA), a more rigorous definition of CQA that considers the implicit logical constraints governing the temporal order and occurrence of eventualities. In this manner, we propose to leverage theorem provers for constructing benchmark datasets to ensure the answers satisfy implicit logical constraints. We also propose a Memory-Enhanced Query Encoding (MEQE) approach to significantly improve the performance of state-of-the-art neural query encoders on the CEQA task.", "keywords": "Knowledge Graph;Complex Query Answering;Eventuality Graph", "primary_area": "", "supplementary_material": "", "author": "Jiaxin Bai;Xin Liu;Weiqi Wang;Chen Luo;Yangqiu Song", "authorids": "~Jiaxin_Bai1;~Xin_Liu9;~Weiqi_Wang1;~Chen_Luo3;~Yangqiu_Song1", "gender": "M;M;M;M;M", "homepage": "http://bjx.fun/;https://www.cse.ust.hk/~xliucr/;https://mighty-weaver.github.io/;https://chen-luo.com/;https://www.cse.ust.hk/~yqsong/", "dblp": "250/9281;76/1820-39.html;51/5775-1;46/4719-3.html;86/2159", "google_scholar": "BHQiBpQAAAAJ;https://scholar.google.com.hk/citations?user=WvC4upQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;4EoNAFcAAAAJ;MdQZ-q8AAAAJ", "orcid": "0000-0002-8985-6467;0000-0001-9610-9526;0000-0002-1617-9805;0000-0001-5339-5817;0000-0002-7818-6090", "linkedin": ";xin-liu-179830143;weiqi-wang-a49b5019a/;chen-luo-a7a45b84/;yqsong/", "or_profile": "~Jiaxin_Bai1;~Xin_Liu9;~Weiqi_Wang1;~Chen_Luo3;~Yangqiu_Song1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Amazon;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;ust.hk;ust.hk;amazon.com;ust.hk", "position": "PhD student;PhD student;PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nbai2023complex,\ntitle={Complex Query Answering on Eventuality Knowledge Graph with Implicit Logical Constraints},\nauthor={Jiaxin Bai and Xin Liu and Weiqi Wang and Chen Luo and Yangqiu Song},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qQnO1HLQHe}\n}", "github": "", "project": "", "reviewers": "DAya;YBho;ktzN;Ne3e;dKK5", "pdf_size": 1191565, "rating": "3;5;5;6;6", "confidence": "4;4;3;3;3", "soundness": "1;3;3;3;4", "novelty": "1;3;3;3;3", "presentation": "2;2;3;4;3", "wc_summary": "236;156;91;46;52", "wc_strengths": "2;107;45;45;29", "wc_weaknesses": "2;232;101;211;123", "wc_questions": "2;40;73;3;5", "wc_limitations": "2;12;7;34;1", "wc_review": "244;547;317;339;210", "wc_reply_reviewers": "0;55;152;0;0", "wc_reply_authors": "0;409;810;0;0", "reply_reviewers": "0;2;1;0;0", "reply_authors": "1;2;2;1;1", "rating_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.9797958971132712 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 116.2, 71.5832382614813 ], "wc_strengths_avg": [ 45.6, 34.4882588716798 ], "wc_weaknesses_avg": [ 133.8, 82.6641397463253 ], "wc_questions_avg": [ 24.6, 28.075612192791095 ], "wc_limitations_avg": [ 11.2, 12.056533498481228 ], "wc_review_avg": [ 331.4, 117.57993026022766 ], "wc_reply_reviewers_avg": [ 41.4, 59.26077961012662 ], "wc_reply_authors_avg": [ 243.8, 324.40369911577767 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.74535599249993, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=686037623346610203&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "ust.hk;ust.hk;ust.hk;amazon.com;ust.hk", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.ust.hk;https://www.amazon.com", "aff_unique_abbr": "HKUST;Amazon", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Provably Efficient Offline Goal-Conditioned Reinforcement Learning with General Function Approximation and Single-Policy Concentrability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70359", "id": "qS9aHF8bXz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0cfc9404f89400c5ed897035e0d3748c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qS9aHF8bXz", "openreview": "https://openreview.net/forum?id=qS9aHF8bXz", "poster": "/media/PosterPDFs/NeurIPS%202023/70359.png?t=1701999764.5835164", "slides": "https://nips.cc/virtual/2023/poster/70359", "video": "https://nips.cc/virtual/2023/poster/70359", "author_site": "Hanlin Zhu, Amy Zhang", "tldr": "", "abstract": "Goal-conditioned reinforcement learning (GCRL) refers to learning general-purpose skills that aim to reach diverse goals. In particular, offline GCRL only requires purely pre-collected datasets to perform training tasks without additional interactions with the environment. Although offline GCRL has become increasingly prevalent and many previous works have demonstrated its empirical success, the theoretical understanding of efficient offline GCRL algorithms is not well established, especially when the state space is huge and the offline dataset only covers the policy we aim to learn. In this paper, we provide a rigorous theoretical analysis of an existing empirically successful offline GCRL algorithm. We prove that under slight modification, this algorithm enjoys an $\\tilde{O}(\\text{poly}(1/\\epsilon))$ sample complexity (where $\\epsilon$ is the desired suboptimality of the learned policy) with general function approximation thanks to the property of (semi-)strong convexity of the objective functions. We only require nearly minimal assumptions on the dataset (single-policy concentrability) and the function class (realizability). Moreover, this algorithm consists of two uninterleaved optimization steps, which we refer to as $V$-learning and policy learning, and is computationally stable since it does not involve minimax optimization. We also empirically validate our theory by showing that the modified algorithm outperforms the previous algorithm in various real-world environments.\nTo the best of our knowledge, this is the first algorithm that is both provably efficient with general function approximation and single-policy concentrability, and empirically successful without requiring solving minimax optimization problems.", "keywords": "offline goal-conditioned RL;provably efficient algorithm;single-policy concentrability;general function approximation", "primary_area": "", "supplementary_material": "/attachment/ea97a13dfb6b2bc85c0b629c995bb5967c7cc931.pdf", "author": "Hanlin Zhu;Amy Zhang", "authorids": "~Hanlin_Zhu2;~Amy_Zhang1", "gender": "M;F", "homepage": "https://hanlinzhu.com/;", "dblp": ";43/2754", "google_scholar": "yDVn5LEAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Hanlin_Zhu2;~Amy_Zhang2", "aff": "Electrical Engineering & Computer Science Department, University of California Berkeley;Meta Facebook", "aff_domain": "eecs.berkeley.edu;facebook.com", "position": "PhD student;Research Scientist", "bibtex": "@inproceedings{\nzhu2023provably,\ntitle={Provably Efficient Offline Goal-Conditioned Reinforcement Learning with General Function Approximation and Single-Policy Concentrability},\nauthor={Hanlin Zhu and Amy Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qS9aHF8bXz}\n}", "github": "", "project": "", "reviewers": "vRJf;quHd;fB34;39fV", "pdf_size": 426729, "rating": "6;6;6;7", "confidence": "4;4;2;1", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "102;56;42;164", "wc_strengths": "48;29;28;39", "wc_weaknesses": "116;211;12;38", "wc_questions": "210;8;1;27", "wc_limitations": "6;12;1;13", "wc_review": "482;316;84;281", "wc_reply_reviewers": "71;188;113;6", "wc_reply_authors": "26;142;105;15", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 91.0, 47.634021455258214 ], "wc_strengths_avg": [ 36.0, 8.154753215150045 ], "wc_weaknesses_avg": [ 94.25, 77.51249899209805 ], "wc_questions_avg": [ 61.5, 86.26268022731499 ], "wc_limitations_avg": [ 8.0, 4.847679857416329 ], "wc_review_avg": [ 290.75, 141.46974058080406 ], "wc_reply_reviewers_avg": [ 94.5, 66.08517231573207 ], "wc_reply_authors_avg": [ 72.0, 53.27757501988994 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7777777777777777, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4443937938352737629&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "eecs.berkeley.edu;facebook.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of California, Berkeley;Meta", "aff_unique_dep": "Electrical Engineering & Computer Science Department;Meta Platforms, Inc.", "aff_unique_url": "https://www.berkeley.edu;https://meta.com", "aff_unique_abbr": "UC Berkeley;Meta", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "The Double-Edged Sword of Implicit Bias: Generalization vs. Robustness in ReLU Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70358", "id": "qSCziWQBPD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1c26c389d60ec419fd24b5fee5b35796-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qSCziWQBPD", "openreview": "https://openreview.net/forum?id=qSCziWQBPD", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70358", "video": "https://nips.cc/virtual/2023/poster/70358", "author_site": "Spencer Frei, Spencer Frei, Gal Vardi, Peter Bartlett, Nati Srebro", "tldr": "", "abstract": "In this work, we study the implications of the implicit bias of gradient flow on generalization and adversarial robustness in ReLU networks. We focus on a setting where the data consists of clusters and the correlations between cluster means are small, and show that in two-layer ReLU networks gradient flow is biased towards solutions that generalize well, but are vulnerable to adversarial examples. Our results hold even in cases where the network is highly overparameterized. Despite the potential for harmful overfitting in such settings, we prove that the implicit bias of gradient flow prevents it. However, the implicit bias also leads to non-robust solutions (susceptible to small adversarial $\\ell_2$-perturbations), even though robust networks that fit the data exist.", "keywords": "adversarial robustness;neural networks;implicit bias;generalization", "primary_area": "", "supplementary_material": "/attachment/103a671b5ae2ddcc673bd0a047073641b4da5d79.pdf", "author": "Spencer Frei;Gal Vardi;Peter Bartlett;Nathan Srebro", "authorids": "~Spencer_Frei1;~Gal_Vardi1;~Peter_Bartlett1;~Nathan_Srebro1", "gender": "M;M;M;M", "homepage": "http://spencerfrei.github.io/;https://sites.google.com/view/galvardi/home;https://www.stat.berkeley.edu/~bartlett/;http://ttic.uchicago.edu/~nati/", "dblp": "250/2714;https://dblp.uni-trier.de/pid/167/9638.html;https://dblp.org/pers/hd/b/Bartlett:Peter_L=;50/3633", "google_scholar": "c7N8SoEAAAAJ;https://scholar.google.co.il/citations?hl=en;yQNhFGUAAAAJ;https://scholar.google.com.tw/citations?user=ZnT-QpMAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Spencer_Frei1;~Gal_Vardi1;~Peter_Bartlett1;~Nathan_Srebro1", "aff": "University of California, Berkeley;Toyota Technological Institute at Chicago;University of California, Berkeley;University of Chicago", "aff_domain": "berkeley.edu;ttic.edu;berkeley;uchicago.edu", "position": "Postdoc;Postdoc;Professor;Full Professor", "bibtex": "@inproceedings{\nfrei2023the,\ntitle={The Double-Edged Sword of Implicit Bias: Generalization vs. Robustness in Re{LU} Networks},\nauthor={Spencer Frei and Gal Vardi and Peter Bartlett and Nathan Srebro},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qSCziWQBPD}\n}", "github": "", "project": "", "reviewers": "ZRWm;kUQf;rYUh;8zLE;DAg1", "pdf_size": 396898, "rating": "5;5;7;7;7", "confidence": "2;1;4;3;4", "soundness": "4;3;3;4;4", "novelty": "2;3;3;3;3", "presentation": "3;3;3;4;4", "wc_summary": "166;50;33;62;111", "wc_strengths": "126;21;166;98;51", "wc_weaknesses": "213;6;50;99;64", "wc_questions": "144;1;26;34;50", "wc_limitations": "254;1;1;17;42", "wc_review": "903;79;276;310;318", "wc_reply_reviewers": "396;0;0;17;17", "wc_reply_authors": "417;0;0;0;0", "reply_reviewers": "2;0;0;1;1", "reply_authors": "3;0;1;1;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 2.8, 1.16619037896906 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 84.4, 48.35948717676812 ], "wc_strengths_avg": [ 92.4, 51.74785019689224 ], "wc_weaknesses_avg": [ 86.4, 69.96742099005793 ], "wc_questions_avg": [ 51.0, 49.120260585628 ], "wc_limitations_avg": [ 63.0, 96.67057463364951 ], "wc_review_avg": [ 377.2, 277.0020938549021 ], "wc_reply_reviewers_avg": [ 86.0, 155.18633960500517 ], "wc_reply_authors_avg": [ 83.4, 166.8 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.2, 0.9797958971132712 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9101820546182064, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6246456797668643610&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "berkeley.edu;ttic.edu;berkeley;uchicago.edu", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of California, Berkeley;Toyota Technological Institute at Chicago;University of Chicago", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;https://www.tti-chicago.org;https://www.uchicago.edu", "aff_unique_abbr": "UC Berkeley;TTI Chicago;UChicago", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Berkeley;Chicago;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Alleviating the Semantic Gap for Generalized fMRI-to-Image Reconstruction", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70357", "id": "qSS9izTOpo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3106c718fe84b91fc301fe2f5b738448-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qSS9izTOpo", "openreview": "https://openreview.net/forum?id=qSS9izTOpo", "poster": "/media/PosterPDFs/NeurIPS%202023/70357.png?t=1697534394.1567144", "slides": "https://nips.cc/virtual/2023/poster/70357", "video": "https://nips.cc/virtual/2023/poster/70357", "author_site": "Tao Fang, Qian Zheng, Gang Pan", "tldr": "", "abstract": "Although existing fMRI-to-image reconstruction methods could predict high-quality images, they do not explicitly consider the semantic gap between training and testing data, resulting in reconstruction with unstable and uncertain semantics. This paper addresses the problem of generalized fMRI-to-image reconstruction by explicitly alleviates the semantic gap. Specifically, we leverage the pre-trained CLIP model to map the training data to a compact feature representation, which essentially extends the sparse semantics of training data to dense ones, thus alleviating the semantic gap of the instances nearby known concepts (i.e., inside the training super-classes). Inspired by the robust low-level representation in fMRI data, which could help alleviate the semantic gap for instances that far from the known concepts (i.e., outside the training super-classes), we leverage structural information as a general cue to guide image reconstruction. Further, we quantify the semantic uncertainty based on probability density estimation and achieve Generalized fMRI-to-image reconstruction by adaptively integrating Expanded Semantics and Structural information (GESS) within a diffusion process. Experimental results demonstrate that the proposed GESS model outperforms state-of-the-art methods, and we propose a generalized scenario split strategy to evaluate the advantage of GESS in closing the semantic gap.", "keywords": "fMRI;image reconstruction;brain decoding", "primary_area": "", "supplementary_material": "/attachment/f443f723a61076c7fa9d1a3f15bb7af145158f41.pdf", "author": "Tao Fang;Qian Zheng;Gang Pan", "authorids": "~Tao_Fang4;~Qian_Zheng5;~Gang_Pan1", "gender": ";;", "homepage": ";;", "dblp": ";;", "google_scholar": "https://scholar.google.com.hk/citations?user=Gi9QuygAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Tao_Fang4;~Qian_Zheng5;~Gang_Pan1", "aff": "Zhejiang University;;", "aff_domain": "zju.edu.cn;;", "position": "PhD student;;", "bibtex": "@inproceedings{\nfang2023alleviating,\ntitle={Alleviating the Semantic Gap for Generalized f{MRI}-to-Image Reconstruction},\nauthor={Tao Fang and Qian Zheng and Gang Pan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qSS9izTOpo}\n}", "github": "", "project": "", "reviewers": "AK4m;qNjJ;NuyD;ciXJ", "pdf_size": 10270737, "rating": "6;6;6;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "4;3;2;2", "wc_summary": "76;68;74;84", "wc_strengths": "63;6;64;17", "wc_weaknesses": "50;6;64;54", "wc_questions": "7;144;56;185", "wc_limitations": "23;6;11;26", "wc_review": "219;230;269;366", "wc_reply_reviewers": "0;33;24;34", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 75.5, 5.722761571129799 ], "wc_strengths_avg": [ 37.5, 26.291633650269812 ], "wc_weaknesses_avg": [ 43.5, 22.242976419535225 ], "wc_questions_avg": [ 98.0, 70.23175919767353 ], "wc_limitations_avg": [ 16.5, 8.261355820929152 ], "wc_review_avg": [ 271.0, 57.90941201566461 ], "wc_reply_reviewers_avg": [ 22.75, 13.699908758820257 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2182636023682017431&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "zju.edu.cn;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Multi-resolution Spectral Coherence for Graph Generation with Score-based Diffusion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70356", "id": "qUlpDjYnsp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/427f20d90386fd27804f1831d6a3d48f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qUlpDjYnsp", "openreview": "https://openreview.net/forum?id=qUlpDjYnsp", "poster": "/media/PosterPDFs/NeurIPS%202023/70356.png?t=1701154320.2851446", "slides": "https://nips.cc/virtual/2023/poster/70356", "video": "https://nips.cc/virtual/2023/poster/70356", "author_site": "Hyuna Cho, Minjae Jeong, Sooyeon Jeon, Sungsoo Ahn, Won Hwa Kim", "tldr": "", "abstract": "Successful graph generation depends on the accurate estimation of the joint distribution of graph components such as nodes and edges from training data. While recent deep neural networks have demonstrated sampling of realistic graphs together with diffusion models, however, they still suffer from oversmoothing problems which are inherited from conventional graph convolution and thus high-frequency characteristics of nodes and edges become intractable. To overcome such issues and generate graphs with high fidelity, this paper introduces a novel approach that captures the dependency between nodes and edges at multiple resolutions in the spectral space. By modeling the joint distribution of node and edge signals in a shared graph wavelet space, together with a score-based diffusion model, we propose a Wavelet Graph Diffusion Model (Wave-GD) which lets us sample synthetic graphs with real-like frequency characteristics of nodes and edges. Experimental results on four representative benchmark datasets validate the superiority of the Wave-GD over existing approaches, highlighting its potential for a wide range of applications that involve graph data.", "keywords": "graph wavelet transform;multi-scale wavelet filtering;graph generation;diffusion model", "primary_area": "", "supplementary_material": "", "author": "Hyuna Cho;Minjae Jeong;Sooyeon Jeon;Sungsoo Ahn;Won Hwa Kim", "authorids": "~Hyuna_Cho1;~Minjae_Jeong1;~Sooyeon_Jeon1;~Sungsoo_Ahn1;~Won_Hwa_Kim4", "gender": "F;M;;M;M", "homepage": "https://sites.google.com/view/hyunacho;https://minjaetidtid.github.io/;https://sooyeon-j.github.io/;https://sungsooahn.super.site/;https://wwplato.github.io/", "dblp": "302/4777;;;90/5164;12/10278", "google_scholar": ";oWIp_2QAAAAJ;;XTenHs0AAAAJ;aWPSHNwAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Hyuna_Cho1;~Minjae_Jeong1;~Sooyeon_Jeon1;~Sungsoo_Ahn1;~Won_Hwa_Kim1", "aff": "Pohang University of Science and Technology;Pohang University of Science and Technology;Pohang University of Science and Technology;Pohang University of Science and Technology;University of Texas, Arlington", "aff_domain": "postech.ac.kr;postech.ac.kr;postech.edu;postech.ac.kr;uta.edu", "position": "PhD student;PhD student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ncho2023multiresolution,\ntitle={Multi-resolution Spectral Coherence for Graph Generation with Score-based Diffusion},\nauthor={Hyuna Cho and Minjae Jeong and Sooyeon Jeon and Sungsoo Ahn and Won Hwa Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qUlpDjYnsp}\n}", "github": "", "project": "", "reviewers": "ZNjR;nurq;jRF4;6cNy", "pdf_size": 2361023, "rating": "6;6;6;6", "confidence": "3;4;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;2;3", "wc_summary": "45;157;77;164", "wc_strengths": "37;75;75;79", "wc_weaknesses": "251;299;157;74", "wc_questions": "53;105;87;29", "wc_limitations": "40;3;6;10", "wc_review": "426;639;402;356", "wc_reply_reviewers": "11;21;29;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 110.75, 51.080206538345166 ], "wc_strengths_avg": [ 66.5, 17.109938632268673 ], "wc_weaknesses_avg": [ 195.25, 86.6555681996258 ], "wc_questions_avg": [ 68.5, 29.47456530637899 ], "wc_limitations_avg": [ 14.75, 14.788086421170252 ], "wc_review_avg": [ 455.75, 108.74827584840139 ], "wc_reply_reviewers_avg": [ 18.5, 7.123903424387503 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14721523331514034535&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "email": "postech.ac.kr;postech.ac.kr;postech.edu;postech.ac.kr;uta.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Pohang University of Science and Technology;University of Texas at Arlington", "aff_unique_dep": ";", "aff_unique_url": "https://www.postech.ac.kr;https://www.uta.edu", "aff_unique_abbr": "POSTECH;UTA", "aff_campus_unique_index": "0;0;0;0;1", "aff_campus_unique": "Pohang;Arlington", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "South Korea;United States" }, { "title": "LambdaBeam: Neural Program Search with Higher-Order Functions and Lambdas", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70355", "id": "qVMPXrX4FR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a10da26f47120217c1b7c2aeb2979048-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qVMPXrX4FR", "openreview": "https://openreview.net/forum?id=qVMPXrX4FR", "poster": "/media/PosterPDFs/NeurIPS%202023/70355.png?t=1702370164.0338056", "slides": "https://nips.cc/virtual/2023/poster/70355", "video": "https://nips.cc/virtual/2023/poster/70355", "author_site": "Kensen Shi, Hanjun Dai, Wen-Ding Li, Kevin Ellis, Charles Sutton", "tldr": "", "abstract": "Search is an important technique in program synthesis that allows for adaptive strategies such as focusing on particular search directions based on execution results. Several prior works have demonstrated that neural models are effective at guiding program synthesis searches. However, a common drawback of those approaches is the inability to handle iterative loops, higher-order functions, or lambda functions, thus limiting prior neural searches from synthesizing longer and more general programs. We address this gap by designing a search algorithm called LambdaBeam that can construct arbitrary lambda functions that compose operations within a given DSL. We create semantic vector representations of the execution behavior of the lambda functions and train a neural policy network to choose which lambdas to construct during search, and pass them as arguments to higher-order functions to perform looping computations. Our experiments show that LambdaBeam outperforms neural, symbolic, and LLM-based techniques in an integer list manipulation domain.", "keywords": "Program Synthesis;Programming By Example;Lambdas;Functional Programming", "primary_area": "", "supplementary_material": "", "author": "Kensen Shi;Hanjun Dai;Wen-Ding Li;Kevin Ellis;Charles Sutton", "authorids": "~Kensen_Shi1;~Hanjun_Dai1;~Wen-Ding_Li1;~Kevin_Ellis1;~Charles_Sutton1", "gender": "M;M;;M;M", "homepage": ";https://hanjun-dai.github.io;https://www.cs.cornell.edu/~wdli/;https://www.cs.cornell.edu/~ellisk/;http://homepages.inf.ed.ac.uk/csutton/", "dblp": "135/8307;144/7311;132/0674;;59/5879", "google_scholar": "LAL4SIMAAAAJ;obpl7GQAAAAJ;2G2mr9QAAAAJ;L7XI6asAAAAJ;https://scholar.google.co.uk/citations?user=hYtGXD0AAAAJ", "orcid": "0000-0001-7140-7869;;;;0000-0002-0041-3820", "linkedin": ";hanjun-dai;;;charles-sutton-772aa126", "or_profile": "~Kensen_Shi1;~Hanjun_Dai1;~Wen-Ding_Li1;~Kevin_Ellis1;~Charles_Sutton1", "aff": "Google;Google Research;Cornell University;Cornell University;University of Edinburgh", "aff_domain": "google.com;google.com;cornell.edu;cornell.edu;ed.ac.uk", "position": "Software Engineer;Researcher;PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\nshi2023lambdabeam,\ntitle={LambdaBeam: Neural Program Search with Higher-Order Functions and Lambdas},\nauthor={Kensen Shi and Hanjun Dai and Wen-Ding Li and Kevin Ellis and Charles Sutton},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qVMPXrX4FR}\n}", "github": "", "project": "", "reviewers": "WhTE;JwfZ;c5as;ErYC", "pdf_size": 797502, "rating": "6;7;7;7", "confidence": "4;4;2;4", "soundness": "3;3;3;3", "novelty": "3;4;3;3", "presentation": "3;3;2;3", "wc_summary": "82;115;99;120", "wc_strengths": "88;60;87;50", "wc_weaknesses": "103;257;147;124", "wc_questions": "43;107;58;63", "wc_limitations": "10;16;25;25", "wc_review": "326;555;416;382", "wc_reply_reviewers": "99;139;329;20", "wc_reply_authors": "203;29;44;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 104.0, 14.882876066137216 ], "wc_strengths_avg": [ 71.25, 16.63392617513977 ], "wc_weaknesses_avg": [ 157.75, 59.37749994737064 ], "wc_questions_avg": [ 67.75, 23.826193569263218 ], "wc_limitations_avg": [ 19.0, 6.363961030678928 ], "wc_review_avg": [ 419.75, 84.44043758768662 ], "wc_reply_reviewers_avg": [ 146.75, 113.60100131600953 ], "wc_reply_authors_avg": [ 69.0, 78.96518220076491 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8520906870986074273&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "google.com;google.com;cornell.edu;cornell.edu;ed.ac.uk", "author_num": 5, "aff_unique_index": "0;0;1;1;2", "aff_unique_norm": "Google;Cornell University;University of Edinburgh", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.cornell.edu;https://www.ed.ac.uk", "aff_unique_abbr": "Google;Cornell;Edinburgh", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Stable Bias: Evaluating Societal Representations in Diffusion Models", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73455", "id": "qVXYU3F017", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b01153e7112b347d8ed54f317840d8af-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=qVXYU3F017", "openreview": "https://openreview.net/forum?id=qVXYU3F017", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73455", "video": "https://nips.cc/virtual/2023/poster/73455", "author_site": "Sasha Luccioni, Christopher Akiki, Margaret Mitchell, Yacine Jernite", "tldr": "", "abstract": "As machine learning-enabled Text-to-Image (TTI) systems are becoming increasingly prevalent and seeing growing adoption as commercial services, characterizing the social biases they exhibit is a necessary first step to lowering their risk of discriminatory outcomes. This evaluation, however, is made more difficult by the synthetic nature of these systems\u2019 outputs: common definitions of diversity are grounded in social categories of people living in the world, whereas the artificial depictions of fictive humans created by these systems have no inherent gender or ethnicity. To address this need, we propose a new method for exploring the social biases in TTI systems. Our approach relies on characterizing the variation in generated images triggered by enumerating gender and ethnicity markers in the prompts, and comparing it to the variation engendered by spanning different professions. This allows us to (1) identify specific bias trends, (2) provide targeted scores to directly compare models in terms of diversity and representation, and (3) jointly model interdependent social variables to support a multidimensional analysis. We leverage this method to analyze images generated by 3 popular TTI systems (Dall\u00b7E 2 , Stable Diffusion v 1.4 and 2) and find that while all of their outputs show correlations with US labor demographics, they also consistently under-represent marginalized identities to different extents. We also release the datasets and low-code interactive bias exploration platforms developed for\nthis work, as well as the necessary tools to similarly evaluate additional TTI systems.", "keywords": "text-to-image models;diffusion models;bias and fairness;data exploration", "primary_area": "", "supplementary_material": "/attachment/a34e9b27113d2dd05c25d91ae19c3dba51c466cf.pdf", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nluccioni2023stable,\ntitle={Stable Bias: Evaluating Societal Representations in Diffusion Models},\nauthor={Sasha Luccioni and Christopher Akiki and Margaret Mitchell and Yacine Jernite},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=qVXYU3F017}\n}", "github": "", "project": "", "reviewers": "JWck;75aP;MKkt;M7wi;Uoyq", "pdf_size": 1661848, "rating": "5;7;8;8;8", "confidence": "3;3;5;3;5", "wc_summary_and_contributions": "117;157;51;34;77", "wc_strengths": "102;148;155;81;79", "wc_improvement": "194;234;1003;20;357", "wc_limitations": "109;46;18;166;40", "wc_correctness": "80;18;46;13;10", "wc_clarity": "180;31;134;23;5", "wc_relation_to_prior_work": "1;32;211;35;31", "wc_documentation": "76;28;43;20;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "860;695;1662;393;601", "wc_reply_reviewers": "0;20;0;9;22", "wc_reply_authors": "732;439;797;54;445", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.2, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "wc_summary_and_contributions_avg": [ 87.2, 44.73209138862166 ], "wc_strengths_avg": [ 113.0, 32.526911934581186 ], "wc_improvement_avg": [ 361.6, 338.37174822966534 ], "wc_limitations_avg": [ 75.8, 54.31169303197977 ], "wc_correctness_avg": [ 33.4, 26.575176386996944 ], "wc_clarity_avg": [ 74.6, 69.34724219462515 ], "wc_relation_to_prior_work_avg": [ 62.0, 75.5142370682509 ], "wc_documentation_avg": [ 33.6, 25.160286166894046 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 842.2, 436.8099815709344 ], "wc_reply_reviewers_avg": [ 10.2, 9.431860898041277 ], "wc_reply_authors_avg": [ 493.4, 263.6198778544592 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": 0.560112033611204, "gs_citation": 107, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2996823709795268401&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "", "author_num": 1 }, { "title": "CoPriv: Network/Protocol Co-Optimization for Communication-Efficient Private Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70354", "id": "qVeDwgYsho", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f96839fc751b67492e17e70f5c9730e4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qVeDwgYsho", "openreview": "https://openreview.net/forum?id=qVeDwgYsho", "poster": "/media/PosterPDFs/NeurIPS%202023/70354.png?t=1701757390.8460145", "slides": "https://nips.cc/virtual/2023/poster/70354", "video": "https://nips.cc/virtual/2023/poster/70354", "author_site": "Wenxuan Zeng, Meng Li, Haichuan Yang, Wen-jie Lu, Runsheng Wang, Ru Huang", "tldr": "", "abstract": "Deep neural network (DNN) inference based on secure 2-party computation (2PC) can offer cryptographically-secure privacy protection but suffers from orders of magnitude latency overhead due to enormous communication. Previous works heavily rely on a proxy metric of ReLU counts to approximate the communication overhead and focus on reducing the ReLUs to improve the communication efficiency. However, we observe these works achieve limited communication reduction for state-of-the-art (SOTA) 2PC protocols due to the ignorance of other linear and non-linear operations, which now contribute to the majority of communication. In this work, we present CoPriv, a framework that jointly optimizes the 2PC inference protocol and the DNN architecture. CoPriv features a new 2PC protocol for convolution based on Winograd transformation and develops DNN-aware optimization to significantly reduce the inference communication. CoPriv further develops a 2PC-aware network optimization algorithm that is compatible with the proposed protocol and simultaneously reduces the communication for all the linear and non-linear operations. We compare CoPriv with the SOTA 2PC protocol, CrypTFlow2, and demonstrate 2.1\u00d7 communication reduction for both ResNet-18 and ResNet-32 on CIFAR-100. We also compare CoPriv with SOTA network optimization methods, including SNL, MetaPruning, etc. CoPriv achieves 9.98\u00d7 and 3.88\u00d7 online and total communication reduction with a higher accuracy compare to SNL, respectively. CoPriv also achieves 3.87\u00d7 online communication reduction with more than 3% higher accuracy compared to MetaPruning.", "keywords": "Private Inference;Network/Protocol Co-Optimization;Winograd Convolution;Structural Re-parameterization", "primary_area": "", "supplementary_material": "/attachment/61313e13369a6e3db04d85a9ff4f3a181edfea6c.pdf", "author": "Wenxuan Zeng;Meng Li;Haichuan Yang;Wen-jie Lu;Runsheng Wang;Ru Huang", "authorids": "~Wenxuan_Zeng1;~Meng_Li1;~Haichuan_Yang1;~Wen-jie_Lu1;~Runsheng_Wang3;~Ru_Huang2", "gender": "M;M;M;M;M;F", "homepage": "https://xuanland.cn;https://mengli.me;https://hyang1990.github.io/;https://fionser.github.io;;http://www.aais.pku.edu.cn/en/duiwu/showproduct.php?id=107&lang=cn", "dblp": "326/3742;70/1726-4;39/5066;231/4234;;", "google_scholar": "P1c6nDYAAAAJ;lvdRkEkAAAAJ;4KNoCFIAAAAJ;;TZ_39qQAAAAJ;", "orcid": ";;;;;", "linkedin": ";;haichuan-yang-5470b1161/;;;", "or_profile": "~Wenxuan_Zeng1;~Meng_Li1;~Haichuan_Yang1;~Wen-jie_Lu1;~Runsheng_Wang3;~Ru_Huang2", "aff": "University of Electronic Science and Technology of China;Peking University;Meta Facebook;Alibaba Group;Peking University;Peking University", "aff_domain": "uestc.edu.cn;pku.edu.cn;fb.com;alibaba-inc.com;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;Assistant Professor;Research Scientist;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzeng2023copriv,\ntitle={CoPriv: Network/Protocol Co-Optimization for Communication-Efficient Private Inference},\nauthor={Wenxuan Zeng and Meng Li and Haichuan Yang and Wen-jie Lu and Runsheng Wang and Ru Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qVeDwgYsho}\n}", "github": "", "project": "", "reviewers": "P7t8;vLvc;AJR8;yVeD", "pdf_size": 3081920, "rating": "6;6;7;7", "confidence": "4;4;2;4", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "34;45;28;65", "wc_strengths": "24;47;19;64", "wc_weaknesses": "125;283;62;9", "wc_questions": "6;6;1;30", "wc_limitations": "6;30;1;56", "wc_review": "195;411;111;224", "wc_reply_reviewers": "0;20;44;21", "wc_reply_authors": "0;0;186;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 43.0, 14.089002803605371 ], "wc_strengths_avg": [ 38.5, 18.117670931993437 ], "wc_weaknesses_avg": [ 119.75, 102.808985502241 ], "wc_questions_avg": [ 10.75, 11.299889379989523 ], "wc_limitations_avg": [ 23.25, 21.856063231972954 ], "wc_review_avg": [ 235.25, 109.62749427036997 ], "wc_reply_reviewers_avg": [ 21.25, 15.578430601315397 ], "wc_reply_authors_avg": [ 46.5, 80.5403625519528 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8041003677871627176&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "uestc.edu.cn;pku.edu.cn;fb.com;alibaba-inc.com;pku.edu.cn;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;1;1", "aff_unique_norm": "University of Electronic Science and Technology of China;Peking University;Meta;Alibaba Group", "aff_unique_dep": ";;Meta Platforms, Inc.;", "aff_unique_url": "https://www.uestc.edu.cn;http://www.pku.edu.cn;https://meta.com;https://www.alibaba.com", "aff_unique_abbr": "UESTC;Peking U;Meta;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "StressID: a Multimodal Dataset for Stress Identification", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73454", "id": "qWsQi9DGJb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5f09bfe6730e9627a9f800d01a8ad5cd-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=qWsQi9DGJb", "openreview": "https://openreview.net/forum?id=qWsQi9DGJb", "poster": "/media/PosterPDFs/NeurIPS%202023/73454.png?t=1699615500.3207722", "slides": "https://nips.cc/virtual/2023/poster/73454", "video": "https://nips.cc/virtual/2023/poster/73454", "author_site": "Hava Chaptoukaev, Valeriya Strizhkova, Michele Panariello, Bianca Dalpaos, Aglind Reka, Valeria Manera, Susanne Th\u00fcmmler, Esma ISMAILOVA, Nicholas W., francois bremond, Massimiliano Todisco, Maria A Zuluaga, Laura M. Ferrari", "tldr": "", "abstract": "StressID is a new dataset specifically designed for stress identification from\nunimodal and multimodal data. It contains videos of facial expressions, audio\nrecordings, and physiological signals. The video and audio recordings are acquired\nusing an RGB camera with an integrated microphone. The physiological data\nis composed of electrocardiography (ECG), electrodermal activity (EDA), and\nrespiration signals that are recorded and monitored using a wearable device. This\nexperimental setup ensures a synchronized and high-quality multimodal data col-\nlection. Different stress-inducing stimuli, such as emotional video clips, cognitive\ntasks including mathematical or comprehension exercises, and public speaking\nscenarios, are designed to trigger a diverse range of emotional responses. The\nfinal dataset consists of recordings from 65 participants who performed 11 tasks,\nas well as their ratings of perceived relaxation, stress, arousal, and valence levels.\nStressID is one of the largest datasets for stress identification that features three\ndifferent sources of data and varied classes of stimuli, representing more than\n39 hours of annotated data in total. StressID offers baseline models for stress\nclassification including a cleaning, feature extraction, and classification phase for\neach modality. Additionally, we provide multimodal predictive models combining\nvideo, audio, and physiological inputs. The data and the code for the baselines are\navailable at https://project.inria.fr/stressid/.", "keywords": "Stress identification;dataset", "primary_area": "", "supplementary_material": "/attachment/278c94c999e4d8580a18350ffa22697bd7539881.pdf", "author": "Hava Chaptoukaev;Valeriya Strizhkova;Michele Panariello;Bianca Dalpaos;Aglind Reka;Valeria Manera;Susanne Thummler;Esma ISMAILOVA;Nicholas Evans;Francois Bremond;Massimiliano Todisco;Maria A Zuluaga;Laura M. Ferrari", "authorids": "~Hava_Chaptoukaev1;~Valeriya_Strizhkova1;~Michele_Panariello1;~Bianca_Dalpaos1;~Aglind_Reka1;~Valeria_Manera1;~Susanne_Thummler1;~Esma_ISMAILOVA1;~Nicholas_Evans2;~Francois_Bremond1;~Massimiliano_Todisco1;~Maria_A_Zuluaga1;~Laura_M._Ferrari1", "gender": "F;F;M;F;M;F;;F;M;M;;F;F", "homepage": "https://www.eurecom.fr/fr/people/chaptoukaev-hava;;;;;;;https://www.mines-stetienne.fr/en/research/centres-and-departments/department-of-bioelectronics-bel/people/;http://www.eurecom.fr/~evans/;http://www-sop.inria.fr/members/Francois.Bremond/;https://www.massimilianotodisco.eu;http://www.eurecom.fr/~zuluaga/;", "dblp": "369/7128;;;;;;;;;90/6418;;69/9122;290/1716", "google_scholar": "b8P1k_oAAAAJ;6n5PrUAAAAAJ;aj-ZQiIAAAAJ;;;;OEQW4C0AAAAJ;https://scholar.google.fr/citations?user=IOsWduoAAAAJ;https://scholar.google.com.tw/citations?user=-_Ch8uoAAAAJ;h-oGBzsAAAAJ;;https://scholar.google.fr/citations?user=0SaJdxQAAAAJ;https://scholar.google.com/citations?view_op=list_works", "orcid": "0009-0000-0859-4059;;0009-0007-4154-5460;;;0000-0003-4490-4485;0000-0001-9993-6981;;;0000-0003-2988-2142;;0000-0002-1147-766X;0000-0001-8521-9666", "linkedin": ";valeriya-strizhkova-5187bb87;michele-panariello/;bianca-d\u2019alpaos-09995423b/;aglind-reka;;;;;francois-bremond-05263a5/;;mariazuluaga/;", "or_profile": "~Hava_Chaptoukaev1;~Valeriya_Strizhkova1;~Michele_Panariello1;~Bianca_Dalpaos1;~Aglind_Reka1;~Valeria_Manera1;~Susanne_Thummler1;~Esma_ISMAILOVA1;~Nicholas_Evans2;~Francois_Bremond1;~Massimiliano_Todisco1;~Maria_A_Zuluaga1;~Laura_M._Ferrari1", "aff": "Eurecom;INRIA;Eurecom;Polytechnic Institute of Turin;Universit\u00e9 de Nice-Sophia Antipolis;INRIA;INRIA;Ecole des Mines;EURECOM;inria;Eurecom;Eurecom;INRIA", "aff_domain": "eurecom.fr;inria.fr;eurecom.fr;polito.it;unice.fr;inria.fr;inria.fr;mines.org;eurecom.fr;inria.fr;eurecom.fr;eurecom.fr;inria.fr", "position": "PhD student;PhD student;PhD student;MS student;MS student;Lecturer;Assistant Professor;Associate Professor;Associate Professor;Researcher;Associate Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nchaptoukaev2023stressid,\ntitle={Stress{ID}: a Multimodal Dataset for Stress Identification},\nauthor={Hava Chaptoukaev and Valeriya Strizhkova and Michele Panariello and Bianca Dalpaos and Aglind Reka and Valeria Manera and Susanne Thummler and Esma ISMAILOVA and Nicholas Evans and Francois Bremond and Massimiliano Todisco and Maria A Zuluaga and Laura M. Ferrari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=qWsQi9DGJb}\n}", "github": "", "project": "", "reviewers": "jqtB;QzsT;wbdk;9RXE;qPpi", "pdf_size": 2507538, "rating": "5;5;5;6;7", "confidence": "3;4;4;4;3", "wc_summary_and_contributions": "121;64;139;84;69", "wc_strengths": "35;38;44;118;120", "wc_improvement": "36;359;70;290;126", "wc_limitations": "137;9;1;22;96", "wc_correctness": "32;27;135;20;71", "wc_clarity": "5;16;19;8;109", "wc_relation_to_prior_work": "12;22;173;77;32", "wc_documentation": "1;15;1;1;66", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "380;551;583;621;690", "wc_reply_reviewers": "22;63;17;54;73", "wc_reply_authors": "772;516;892;1321;416", "reply_reviewers": "1;1;1;1;2", "reply_authors": "1;1;2;3;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 95.4, 29.560784833965418 ], "wc_strengths_avg": [ 71.0, 39.30394382247155 ], "wc_improvement_avg": [ 176.2, 126.34935694335765 ], "wc_limitations_avg": [ 53.0, 53.862788639282314 ], "wc_correctness_avg": [ 57.0, 42.83456548162943 ], "wc_clarity_avg": [ 31.4, 39.13361726188879 ], "wc_relation_to_prior_work_avg": [ 63.2, 59.226345489148656 ], "wc_documentation_avg": [ 16.8, 25.190474390134064 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 565.0, 103.44660458420083 ], "wc_reply_reviewers_avg": [ 45.8, 22.355312567709717 ], "wc_reply_authors_avg": [ 783.4, 318.5665393603038 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9559961448416352520&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 17, "email": "eurecom.fr;inria.fr;eurecom.fr;polito.it;unice.fr;inria.fr;inria.fr;mines.org;eurecom.fr;inria.fr;eurecom.fr;eurecom.fr;inria.fr", "author_num": 13, "aff_unique_index": "0;1;0;2;3;1;1;4;0;1;0;0;1", "aff_unique_norm": "EURECOM;INRIA;Polytechnic Institute of Turin;Universit\u00e9 de Nice-Sophia Antipolis;Ecole des Mines", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.eurecom.fr;https://www.inria.fr;https://www.polito.it;https://www.unice.fr;https://www.mines-paris.psl.eu", "aff_unique_abbr": ";INRIA;Polito;UNICA;Mines ParisTech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Sophia Antipolis", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0;0;0;0;0", "aff_country_unique": "France;Italy" }, { "title": "Are GATs Out of Balance?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70353", "id": "qY7UqLoora", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/25d463c05b414125f598cdf8022b3b46-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qY7UqLoora", "openreview": "https://openreview.net/forum?id=qY7UqLoora", "poster": "/media/PosterPDFs/NeurIPS%202023/70353.png?t=1701322786.5766408", "slides": "https://nips.cc/virtual/2023/poster/70353", "video": "https://nips.cc/virtual/2023/poster/70353", "author_site": "Nimrah Mustafa, Aleksandar Bojchevski, Rebekka Burkholz", "tldr": "", "abstract": "While the expressive power and computational capabilities of graph neural networks (GNNs) have been theoretically studied, their optimization and learning dynamics, in general, remain largely unexplored. Our study undertakes the Graph Attention Network (GAT), a popular GNN architecture in which a node's neighborhood aggregation is weighted by parameterized attention coefficients. We derive a conservation law of GAT gradient flow dynamics, which explains why a high portion of parameters in GATs with standard initialization struggle to change during training. This effect is amplified in deeper GATs, which perform significantly worse than their shallow counterparts. To alleviate this problem, we devise an initialization scheme that balances the GAT network. Our approach i) allows more effective propagation of gradients and in turn enables trainability of deeper networks, and ii) attains a considerable speedup in training and convergence time in comparison to the standard initialization. Our main theorem serves as a stepping stone to studying the learning dynamics of positive homogeneous models with attention mechanisms.", "keywords": "graph attention networks;gradient flow;conservation law", "primary_area": "", "supplementary_material": "/attachment/f9613348d88994501f475879d6f583182450a110.zip", "author": "Nimrah Mustafa;Aleksandar Bojchevski;Rebekka Burkholz", "authorids": "~Nimrah_Mustafa1;~Aleksandar_Bojchevski1;~Rebekka_Burkholz1", "gender": "F;M;F", "homepage": "https://cispa.de/en/people/c01nimu;https://abojchevski.github.io/;https://sites.google.com/view/rebekkaburkholz/startseite", "dblp": ";203/8114;194/3172", "google_scholar": ";https://scholar.google.de/citations?user=F1APiN4AAAAJ;https://scholar.google.ch/citations?user=vkWBb2wAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Nimrah_Mustafa1;~Aleksandar_Bojchevski1;~Rebekka_Burkholz1", "aff": "CISPA, saarland university, saarland informatics campus;CISPA Helmholtz Center for Information Security;Helmholtz Center CISPA for Information Security", "aff_domain": "cispa.saarland;cispa.de;cispa.saarland", "position": "PhD student;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nmustafa2023are,\ntitle={Are {GAT}s Out of Balance?},\nauthor={Nimrah Mustafa and Aleksandar Bojchevski and Rebekka Burkholz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qY7UqLoora}\n}", "github": "", "project": "", "reviewers": "hDWR;d9X8;yMby;cXS5", "pdf_size": 1890039, "rating": "6;6;7;7", "confidence": "3;3;4;3", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "56;64;93;72", "wc_strengths": "80;32;49;16", "wc_weaknesses": "65;34;117;38", "wc_questions": "23;20;100;104", "wc_limitations": "61;17;1;15", "wc_review": "285;167;360;245", "wc_reply_reviewers": "35;0;26;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.25, 13.77270852083932 ], "wc_strengths_avg": [ 44.25, 23.710493457539005 ], "wc_weaknesses_avg": [ 63.5, 33.10966626228661 ], "wc_questions_avg": [ 61.75, 40.28880117352712 ], "wc_limitations_avg": [ 23.5, 22.511108368980857 ], "wc_review_avg": [ 264.25, 69.68993829815032 ], "wc_reply_reviewers_avg": [ 15.25, 15.578430601315397 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2464939898382247677&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cispa.saarland;cispa.de;cispa.saarland", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Saarland University;CISPA Helmholtz Center for Information Security;Helmholtz Center CISPA", "aff_unique_dep": "CISPA;;Information Security", "aff_unique_url": "https://www.uni-saarland.de;https://www.cispa.de/;https://www.cispa.de/", "aff_unique_abbr": "Saarland U;CISPA;CISPA", "aff_campus_unique_index": "0", "aff_campus_unique": "Saarland Informatics Campus;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Holistic Evaluation of Text-to-Image Models", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73453", "id": "qY9LR74O3Z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dd83eada2c3c74db3c7fe1c087513756-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=qY9LR74O3Z", "openreview": "https://openreview.net/forum?id=qY9LR74O3Z", "poster": "/media/PosterPDFs/NeurIPS%202023/73453.png?t=1701416413.7275698", "slides": "https://nips.cc/virtual/2023/poster/73453", "video": "https://nips.cc/virtual/2023/poster/73453", "author_site": "Tony Lee, Michihiro Yasunaga, Chenlin Meng, Yifan Mai, Joon Sung Park, Agrim Gupta, Yunzhi Zhang, Deepak Narayanan, Hannah Teufel, Marco Bellagente, Minguk Kang, Taesung Park, Jure Leskovec, Jun-Yan Zhu, Fei-Fei Li, Jiajun Wu, Stefano Ermon, Percy Liang", "tldr": "", "abstract": "The stunning qualitative improvement of text-to-image models has led to their widespread attention and adoption. However, we lack a comprehensive quantitative understanding of their capabilities and risks. To fill this gap, we introduce a new benchmark, Holistic Evaluation of Text-to-Image Models (HEIM). Whereas previous evaluations focus mostly on image-text alignment and image quality, we identify 12 aspects, including text-image alignment, image quality, aesthetics, originality, reasoning, knowledge, bias, toxicity, fairness, robustness, multilinguality, and efficiency. We curate 62 scenarios encompassing these aspects and evaluate 26 state-of-the-art text-to-image models on this benchmark. Our results reveal that no single model excels in all aspects, with different models demonstrating different strengths. We release the generated images and human evaluation results for full transparency at https://crfm.stanford.edu/heim/latest and the code at https://github.com/stanford-crfm/helm, which is integrated with the HELM codebase", "keywords": "text-to-image;image generation;multimodal;holistic evaluation;benchmarking;human evaluation", "primary_area": "", "supplementary_material": "/attachment/9355314b2125fa9944d9a2972b430ec13d6e16d8.pdf", "author": "Tony Lee;Michihiro Yasunaga;Chenlin Meng;Yifan Mai;Joon Sung Park;Agrim Gupta;Yunzhi Zhang;Deepak Narayanan;Hannah Benita Teufel;Marco Bellagente;Minguk Kang;Taesung Park;Jure Leskovec;Jun-Yan Zhu;Li Fei-Fei;Jiajun Wu;Stefano Ermon;Percy Liang", "authorids": "~Tony_Lee1;~Michihiro_Yasunaga1;~Chenlin_Meng1;~Yifan_Mai1;~Joon_Sung_Park1;~Agrim_Gupta1;~Yunzhi_Zhang1;~Deepak_Narayanan2;~Hannah_Benita_Teufel1;~Marco_Bellagente1;~Minguk_Kang1;~Taesung_Park2;~Jure_Leskovec1;~Jun-Yan_Zhu1;~Li_Fei-Fei1;~Jiajun_Wu1;~Stefano_Ermon1;~Percy_Liang1", "gender": "M;;F;Non-Binary;M;;F;M;F;Not Specified;M;;M;F;M;M;;M", "homepage": ";;https://chenlin9.github.io/;https://yifanmai.com/;http://www.joonsungpark.com/;;https://cs.stanford.edu/~yzzhang/;https://deepakn94.github.io/;;https://marcobellagente93.github.io;;http://cs.stanford.edu/~jure/;https://www.cs.cmu.edu/~junyanz/;https://profiles.stanford.edu/fei-fei-li;https://jiajunwu.com;http://cs.stanford.edu/~ermon/;https://cs.stanford.edu/~pliang/;https://taesung.me", "dblp": "46/4265;202/1809;227/2517;156/8369;;200/8282;58/10932;;;294/7150;268/5657;l/JureLeskovec;117/4782.html;79/2528;117/4768;47/8135;04/1701;55/4543", "google_scholar": "OYNdx48AAAAJ;SieJYoEAAAAJ;nEFU7wIAAAAJ;QLbLGIMAAAAJ;https://scholar.google.com/citations?hl=en;AxzVaI8AAAAJ;https://scholar.google.com/citations?hl=en;sTzb6LAAAAAJ;;;https://scholar.google.com/citations?hl=ko;Q_kKkIUAAAAJ;UdpacsMAAAAJ;rDfyQnIAAAAJ;2efgcS0AAAAJ;;pouyVyUAAAAJ;hHkuxSUAAAAJ", "orcid": ";;;0009-0004-7270-2607;;;;;;;;0000-0002-5411-923X;0000-0001-8504-3410;;0000-0002-4176-343X;;;", "linkedin": "tonyhlee/;;;yifan-mai;;;;;hannah-t-3a565a69/;marco-bellagente-025884128/;;leskovec/;jun-yan-zhu-99b18814;fei-fei-li-4541247/;jiajunwu/;;;", "or_profile": "~Tony_Lee1;~Michihiro_Yasunaga1;~Chenlin_Meng1;~Yifan_Mai1;~Joon_Sung_Park1;~Agrim_Gupta1;~Yunzhi_Zhang1;~Deepak_Narayanan2;~Hannah_Benita_Teufel1;~Marco_Bellagente1;~Minguk_Kang1;~Jure_Leskovec1;~Jun-Yan_Zhu1;~Li_Fei-Fei1;~Jiajun_Wu1;~Stefano_Ermon1;~Percy_Liang1;~Taesung_Park1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Microsoft Research;Aleph Alpha GmbH;Aleph-Alpha gmbh;POSTECH;Kumo.AI;Carnegie Mellon University;Stanford University;Stanford University;Stanford University;Stanford University;Adobe Systems", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;microsoft.com;aleph-alpha.com;aleph-alpha.com;postech.ac.kr;kumo.ai;cmu.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;adobe.com", "position": "Researcher;PhD student;PhD student;Researcher;PhD student;PhD student;PhD student;Researcher;Researcher;Researcher;PhD student;Chief Scientist;Assistant Professor;Full Professor;Assistant Professor;Associate Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\nlee2023holistic,\ntitle={Holistic Evaluation of Text-to-Image Models},\nauthor={Tony Lee and Michihiro Yasunaga and Chenlin Meng and Yifan Mai and Joon Sung Park and Agrim Gupta and Yunzhi Zhang and Deepak Narayanan and Hannah Benita Teufel and Marco Bellagente and Minguk Kang and Taesung Park and Jure Leskovec and Jun-Yan Zhu and Li Fei-Fei and Jiajun Wu and Stefano Ermon and Percy Liang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=qY9LR74O3Z}\n}", "github": "", "project": "", "reviewers": "9fSw;9koT;uZiQ;oyyP", "pdf_size": 10857034, "rating": "8;9;9;9", "confidence": "4;3;3;4", "wc_summary_and_contributions": "93;65;30;48", "wc_strengths": "96;52;78;20", "wc_improvement": "56;1;25;4", "wc_limitations": "5;1;1;45", "wc_correctness": "7;12;7;1", "wc_clarity": "10;6;1;2", "wc_relation_to_prior_work": "15;11;1;1", "wc_documentation": "15;15;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "298;164;145;123", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "86;8;64;287", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 8.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 59.0, 23.205602771744587 ], "wc_strengths_avg": [ 61.5, 28.613807855648993 ], "wc_improvement_avg": [ 21.5, 21.96019125599775 ], "wc_limitations_avg": [ 13.0, 18.547236990991408 ], "wc_correctness_avg": [ 6.75, 3.897114317029974 ], "wc_clarity_avg": [ 4.75, 3.5619517121937516 ], "wc_relation_to_prior_work_avg": [ 7.0, 6.164414002968976 ], "wc_documentation_avg": [ 8.0, 7.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 182.5, 68.24404735945839 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 111.25, 105.37878107095375 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 18, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 148, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16959181306121021072&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;microsoft.com;aleph-alpha.com;aleph-alpha.com;postech.ac.kr;kumo.ai;cmu.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;adobe.com", "author_num": 18, "aff_unique_index": "0;0;0;0;0;0;0;1;2;3;4;5;6;0;0;0;0;7", "aff_unique_norm": "Stanford University;Microsoft;Aleph Alpha;Aleph-Alpha GmbH;Pohang University of Science and Technology;Kumo.AI;Carnegie Mellon University;Adobe", "aff_unique_dep": ";Microsoft Research;;;;;;Adobe Systems Incorporated", "aff_unique_url": "https://www.stanford.edu;https://www.microsoft.com/en-us/research;https://www.aleph-alpha.com;https://www.aleph-alpha.com;https://www.postech.ac.kr;https://www.kumo.ai;https://www.cmu.edu;https://www.adobe.com", "aff_unique_abbr": "Stanford;MSR;Aleph Alpha;Aleph-Alpha;POSTECH;Kumo.AI;CMU;Adobe", "aff_campus_unique_index": "0;0;0;0;0;0;0;2;0;0;0;0", "aff_campus_unique": "Stanford;;Pohang", "aff_country_unique_index": "0;0;0;0;0;0;0;0;1;1;2;0;0;0;0;0;0;0", "aff_country_unique": "United States;Germany;South Korea" }, { "title": "Multi-Objective Intrinsic Reward Learning for Conversational Recommender Systems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70352", "id": "qYAp31KwU2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/396ea38391e8b96a3add6126006f1a53-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qYAp31KwU2", "openreview": "https://openreview.net/forum?id=qYAp31KwU2", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70352", "video": "https://nips.cc/virtual/2023/poster/70352", "author_site": "Zhendong Chu, Nan Wang, Hongning Wang", "tldr": "", "abstract": "Conversational Recommender Systems (CRS) actively elicit user preferences to generate adaptive recommendations. Mainstream reinforcement learning-based CRS solutions heavily rely on handcrafted reward functions, which may not be aligned with user intent in CRS tasks. Therefore, the design of task-specific rewards is critical to facilitate CRS policy learning, which remains largely under-explored in the literature. In this work, we propose a novel approach to address this challenge by learning intrinsic rewards from interactions with users. Specifically, we formulate intrinsic reward learning as a multi-objective bi-level optimization problem. The inner level optimizes the CRS policy augmented by the learned intrinsic rewards, while the outer level drives the intrinsic rewards to optimize two CRS-specific objectives: maximizing the success rate and minimizing the number of turns to reach a successful recommendation}in conversations. To evaluate the effectiveness of our approach, we conduct extensive experiments on three public CRS benchmarks. The results show that our algorithm significantly improves CRS performance by exploiting informative learned intrinsic rewards.", "keywords": "Conversational Recommendation;Reinforcement Learning;Meta Learning", "primary_area": "", "supplementary_material": "/attachment/5e5f579a2fea9f358edee6613204210f28203c00.zip", "author": "Zhendong Chu;Nan Wang;Hongning Wang", "authorids": "~Zhendong_Chu1;~Nan_Wang6;~Hongning_Wang1", "gender": "M;M;M", "homepage": "https://zdchu.github.io/;http://www.cs.virginia.edu/~nw6a/;http://www.cs.virginia.edu/~hw5x/", "dblp": "236/6321;84/864;05/6545", "google_scholar": ";https://scholar.google.com/citations?hl=en;qkdvKNoAAAAJ", "orcid": ";;0000-0002-6524-9195", "linkedin": ";https://www.linkedin.com/public-profile/in/nan-nolen-wang-493341163?challengeId=AQEquDuYuK0KdAAAAXd-p60BoYifuxHUM8sbuGC1zveND5ifUDR5jduLsQ3NFivCjMxOS21SsmFG6K4n20UdyeCKLgXz2EFH-w&submissionId=b5d1bff9-5998-6116-18d7-1a300fe1552b;", "or_profile": "~Zhendong_Chu1;~Nan_Wang6;~Hongning_Wang1", "aff": "University of Virginia;University of Virginia;University of Virginia", "aff_domain": "virginia.edu;virginia.edu;virginia.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nchu2023multiobjective,\ntitle={Multi-Objective Intrinsic Reward Learning for Conversational Recommender Systems},\nauthor={Zhendong Chu and Nan Wang and Hongning Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qYAp31KwU2}\n}", "github": "", "project": "", "reviewers": "5ZDc;vRxN;Kjom;reFg", "pdf_size": 703722, "rating": "5;5;5;6", "confidence": "3;4;3;3", "soundness": "3;2;3;3", "novelty": "2;2;2;2", "presentation": "3;2;3;3", "wc_summary": "86;89;95;100", "wc_strengths": "142;56;32;171", "wc_weaknesses": "156;91;284;246", "wc_questions": "4;3;52;172", "wc_limitations": "4;3;7;1", "wc_review": "392;242;470;690", "wc_reply_reviewers": "0;17;17;38", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 92.5, 5.408326913195984 ], "wc_strengths_avg": [ 100.25, 57.803005977198104 ], "wc_weaknesses_avg": [ 194.25, 75.59224497261607 ], "wc_questions_avg": [ 57.75, 68.87080295742166 ], "wc_limitations_avg": [ 3.75, 2.165063509461097 ], "wc_review_avg": [ 448.5, 161.7243024409133 ], "wc_reply_reviewers_avg": [ 18.0, 13.47219358530748 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17705097842270230771&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "virginia.edu;virginia.edu;virginia.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Virginia", "aff_unique_dep": "", "aff_unique_url": "https://www.virginia.edu", "aff_unique_abbr": "UVA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "One Risk to Rule Them All: A Risk-Sensitive Perspective on Model-Based Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70351", "id": "qZjl2TKvUY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f49287371916715b9209fa41a275851e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qZjl2TKvUY", "openreview": "https://openreview.net/forum?id=qZjl2TKvUY", "poster": "/media/PosterPDFs/NeurIPS%202023/70351.png?t=1701520538.2319252", "slides": "https://nips.cc/virtual/2023/poster/70351", "video": "https://nips.cc/virtual/2023/poster/70351", "author_site": "Marc Rigter, Marc Rigter, Bruno Lacerda, Nick Hawes", "tldr": "", "abstract": "Offline reinforcement learning (RL) is suitable for safety-critical domains where online exploration is not feasible. In such domains, decision-making should take into consideration the risk of catastrophic outcomes. In other words, decision-making should be *risk-averse*. An additional challenge of offline RL is avoiding *distributional shift*, i.e. ensuring that state-action pairs visited by the policy remain near those in the dataset. Previous offline RL algorithms that consider risk combine offline RL techniques (to avoid distributional shift), with risk-sensitive RL algorithms (to achieve risk-aversion). In this work, we propose risk-aversion as a mechanism to jointly address *both* of these issues. We propose a model-based approach, and use an ensemble of models to estimate epistemic uncertainty, in addition to aleatoric uncertainty. We train a policy that is risk-averse, and avoids high uncertainty actions. Risk-aversion to epistemic uncertainty prevents distributional shift, as areas not covered by the dataset have high epistemic uncertainty. Risk-aversion to aleatoric uncertainty discourages actions that are risky due to environment stochasticity. Thus, by considering epistemic uncertainty via a model ensemble and introducing risk-aversion, our algorithm (1R2R) avoids distributional shift in addition to achieving risk-aversion to aleatoric risk. Our experiments show that 1R2R achieves strong performance on deterministic benchmarks, and outperforms existing approaches for risk-sensitive objectives in stochastic domains.", "keywords": "offline reinforcement learning;model-based reinforcement learning;risk;uncertainty", "primary_area": "", "supplementary_material": "", "author": "Marc Rigter;Bruno Lacerda;Nick Hawes", "authorids": "~Marc_Rigter1;~Bruno_Lacerda1;~Nick_Hawes1", "gender": ";M;M", "homepage": ";https://bfalacerda.github.io/;https://www.robots.ox.ac.uk/~nickh/", "dblp": "226/6276;87/10333;35/1190", "google_scholar": "0PthAD8AAAAJ;https://scholar.google.co.uk/citations?user=k9XjG_MAAAAJ;bRsi4zoAAAAJ", "orcid": ";0000-0003-0862-331X;0000-0002-7556-6098", "linkedin": "marc-rigter-791157a0;;", "or_profile": "~Marc_Rigter1;~Bruno_Lacerda1;~Nick_Hawes1", "aff": "University of Oxford;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk;ox.ac.uk", "position": "Postdoc;Senior Researcher;Associate Professor", "bibtex": "@inproceedings{\nrigter2023one,\ntitle={One Risk to Rule Them All: A Risk-Sensitive Perspective on Model-Based Offline Reinforcement Learning},\nauthor={Marc Rigter and Bruno Lacerda and Nick Hawes},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qZjl2TKvUY}\n}", "github": "", "project": "", "reviewers": "unPm;8yT3;RPWe;uPGp", "pdf_size": 1887887, "rating": "6;6;6;7", "confidence": "4;4;5;2", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "73;124;68;54", "wc_strengths": "22;65;126;66", "wc_weaknesses": "143;111;315;18", "wc_questions": "176;106;1;217", "wc_limitations": "3;18;76;6", "wc_review": "417;424;586;361", "wc_reply_reviewers": "0;69;0;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.75, 26.479945241635225 ], "wc_strengths_avg": [ 69.75, 37.016043818863196 ], "wc_weaknesses_avg": [ 146.75, 107.44388070057782 ], "wc_questions_avg": [ 125.0, 81.85658189785352 ], "wc_limitations_avg": [ 25.75, 29.549746191803408 ], "wc_review_avg": [ 447.0, 83.88384826651672 ], "wc_reply_reviewers_avg": [ 21.25, 28.331740151286155 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10513184958330821384&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ox.ac.uk;ox.ac.uk;ox.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Multi-Head Adapter Routing for Cross-Task Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70350", "id": "qcQhBli5Ho", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b295b3a940706f431076c86b78907757-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qcQhBli5Ho", "openreview": "https://openreview.net/forum?id=qcQhBli5Ho", "poster": "/media/PosterPDFs/NeurIPS%202023/70350.png?t=1702068285.4783912", "slides": "https://nips.cc/virtual/2023/poster/70350", "video": "https://nips.cc/virtual/2023/poster/70350", "author_site": "Lucas Page-Caccia, Edoardo Maria Ponti, Zhan Su, Matheus Pereira, Nicolas Le Roux, Alessandro Sordoni", "tldr": "", "abstract": "Parameter-efficient fine-tuning (PEFT) for cross-task generalization consists in pre-training adapters on a multi-task training set before few-shot adaptation to test tasks. Polytropon [Ponti et al., 2023] ($\\texttt{Poly}$) jointly learns an inventory of adapters and a *routing* function that selects a (variable-size) subset of adapters for each task during both pre-training and few-shot adaptation. In this paper, we investigate the role that adapter routing plays in its success and design new variants based on our findings.\nFirst, we build on the intuition that finer-grained routing provides more expressivity. Hence,\nwe propose $\\texttt{MHR}$ (Multi-Head Routing) which combines *subsets* of adapter parameters and outperforms $\\texttt{Poly}$ under a comparable parameter budget; by only fine-tuning the routing function and not the adapters ($\\texttt{MHR}$-$z$) we achieve competitive performance with extreme parameter efficiency. Second, we find that $\\texttt{Poly}$/$\\texttt{MHR}$ performance is a result of better multi-task optimization, rather than modular inductive biases that facilitate adapter recombination and local adaptation, as previously hypothesized. In fact, we find that $\\texttt{MHR}$ exhibits high gradient alignment between training tasks. We find that routing is most beneficial during multi-task pre-training rather than during few-shot adaptation and propose $\\texttt{MHR}$-$\\mu$, which discards routing and fine-tunes the average of the pre-trained adapters on each downstream tasks. This establishes $\\texttt{MHR}$-$\\mu$ as an effective method for single-adapter fine-tuning. We also show that $\\texttt{MHR}$-$\\mu$ can be used as an effective zero-shot transfer method by training the average of the pre-trained adapters for a few additional steps on the multi-task training set: this yields gains up to 3\\% on absolute accuracy w.r.t. the baselines. Code is available at .", "keywords": "Parameter Efficient Finetuning;Multitask Learning;Transfer Learning;Natural Language Processing", "primary_area": "", "supplementary_material": "/attachment/88efded9e24b0f6286a045b7639505881cd0a4b1.zip", "author": "Lucas Caccia;Edoardo Ponti;Zhan Su;Matheus Pereira;Nicolas Le Roux;Alessandro Sordoni", "authorids": "~Lucas_Caccia1;~Edoardo_Ponti1;~Zhan_Su1;~Matheus_Pereira1;~Nicolas_Le_Roux2;~Alessandro_Sordoni2", "gender": "M;;M;;M;M", "homepage": "https://www.cs.mcgill.ca/~lpagec/;https://ducdauge.github.io/;https://shuishen112.github.io/zhansu.github.io/;;;http://nicolas.le-roux.name", "dblp": ";178/8829;02/6524;;57/7642;http://dblp.uni-trier.de/pers/hd/r/Roux:Nicolas_Le", "google_scholar": "fuvIITUAAAAJ;https://scholar.google.ca/citations?user=tklL2q0AAAAJ;VzEpVpoAAAAJ;;;https://scholar.google.fr/citations?user=LmKtwk8AAAAJ", "orcid": ";0000-0002-6308-1050;0000-0001-5189-9165;;;", "linkedin": ";edoardo-maria-ponti/;;matheper;;", "or_profile": "~Lucas_Caccia1;~Edoardo_Ponti1;~Zhan_Su1;~Matheus_Pereira1;~Alessandro_Sordoni1;~Nicolas_Le_Roux1", "aff": "McGill University;University of Edinburgh;University of Copenhagen;Microsoft;Microsoft;Microsoft", "aff_domain": "mcgill.ca;ed.ac.uk;ku.dk;microsoft.com;microsoft.com;microsoft.com", "position": "PhD student;Assistant Professor;PhD student;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\ncaccia2023multihead,\ntitle={Multi-Head Adapter Routing for Cross-Task Generalization},\nauthor={Lucas Caccia and Edoardo Ponti and Zhan Su and Matheus Pereira and Nicolas Le Roux and Alessandro Sordoni},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qcQhBli5Ho}\n}", "github": "", "project": "", "reviewers": "m1AN;5omF;HX6c;1NFM", "pdf_size": 537232, "rating": "5;5;6;7", "confidence": "3;3;4;4", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "3;2;4;3", "wc_summary": "53;95;44;70", "wc_strengths": "85;29;39;57", "wc_weaknesses": "115;100;38;229", "wc_questions": "3;146;67;99", "wc_limitations": "1;8;117;60", "wc_review": "257;378;305;515", "wc_reply_reviewers": "36;44;18;198", "wc_reply_authors": "0;0;0;155", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 65.5, 19.42292459955503 ], "wc_strengths_avg": [ 52.5, 21.277922830953212 ], "wc_weaknesses_avg": [ 120.5, 68.97282073396738 ], "wc_questions_avg": [ 78.75, 51.982569193913456 ], "wc_limitations_avg": [ 46.5, 46.650294747193186 ], "wc_review_avg": [ 363.75, 97.37395699056293 ], "wc_reply_reviewers_avg": [ 74.0, 72.20803279414278 ], "wc_reply_authors_avg": [ 38.75, 67.11696879329399 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13649912441764730618&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "mcgill.ca;ed.ac.uk;ku.dk;microsoft.com;microsoft.com;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;2;3;3;3", "aff_unique_norm": "McGill University;University of Edinburgh;University of Copenhagen;Microsoft", "aff_unique_dep": ";;;Microsoft Corporation", "aff_unique_url": "https://www.mcgill.ca;https://www.ed.ac.uk;https://www.ku.dk;https://www.microsoft.com", "aff_unique_abbr": "McGill;Edinburgh;UCPH;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;3;3;3", "aff_country_unique": "Canada;United Kingdom;Denmark;United States" }, { "title": "Parsel\ud83d\udc0d: Algorithmic Reasoning with Language Models by Composing Decompositions", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70349", "id": "qd9qcbVAwQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6445dd88ebb9a6a3afa0b126ad87fe41-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qd9qcbVAwQ", "openreview": "https://openreview.net/forum?id=qd9qcbVAwQ", "poster": "/media/PosterPDFs/NeurIPS%202023/70349.png?t=1701393593.731503", "slides": "https://nips.cc/virtual/2023/poster/70349", "video": "https://nips.cc/virtual/2023/poster/70349", "author_site": "Eric Zelikman, Qian Huang, Gabriel Poesia, Noah Goodman, Nick Haber", "tldr": "", "abstract": "Despite recent success in large language model (LLM) reasoning, LLMs struggle with hierarchical multi-step reasoning tasks like generating complex programs. For these tasks, humans often start with a high-level algorithmic design and implement each part gradually. We introduce Parsel, a framework enabling automatic implementation and validation of complex algorithms with code LLMs. With Parsel, we automatically decompose algorithmic tasks into hierarchical natural language function descriptions and then search over combinations of possible function implementations using tests. We show that Parsel can be used across domains requiring hierarchical reasoning, including program synthesis and robotic planning. We find that, using Parsel, LLMs solve more competition-level problems in the APPS dataset, resulting in pass rates over 75\\% higher than prior results from directly sampling AlphaCode and Codex, while often using a smaller sample budget. Moreover, with automatically generated tests, we find that Parsel can improve the state-of-the-art pass@1 performance on HumanEval from 67\\% to 85\\%. We also find that LLM-generated robotic plans using Parsel are more than twice as likely to be considered accurate than directly generated plans. Lastly, we explore how Parsel addresses LLM limitations and discuss how Parsel may be useful for human programmers. We release our code at https://github.com/ezelikman/parsel.", "keywords": "reasoning;language models;code synthesis;decomposition", "primary_area": "", "supplementary_material": "/attachment/64363b03300c24ccfa1bc3607fd44766b3cbba71.pdf", "author": "Eric Zelikman;Qian Huang;Gabriel Poesia;Noah Goodman;Nick Haber", "authorids": "~Eric_Zelikman1;~Qian_Huang2;~Gabriel_Poesia1;~Noah_Goodman1;~Nick_Haber1", "gender": "M;F;M;;", "homepage": "https://zelikman.me;https://q-hwang.github.io/;https://gpoesia.com;https://cocolab.stanford.edu/;", "dblp": "217/2378;07/4378.html;150/2695.html;96/1216;179/4983", "google_scholar": "V5B8dSUAAAAJ;L3hkmG0AAAAJ;as5iYn4AAAAJ;OUpIbcQAAAAJ;euNCoVYAAAAJ", "orcid": ";;;;0000-0001-8804-7804", "linkedin": "ericzelikman/;qian-huang-b20315149/;;;", "or_profile": "~Eric_Zelikman1;~Qian_Huang2;~Gabriel_Poesia1;~Noah_Goodman1;~Nick_Haber1", "aff": "Google;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "google.com;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "Research Intern;PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzelikman2023parsel,\ntitle={Parsel\ud83d\udc0d: Algorithmic Reasoning with Language Models by Composing Decompositions},\nauthor={Eric Zelikman and Qian Huang and Gabriel Poesia and Noah Goodman and Nick Haber},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qd9qcbVAwQ}\n}", "github": "", "project": "", "reviewers": "AJpS;9Jji;fDT5;LfRB;PK3i", "pdf_size": 1469375, "rating": "6;7;7;7;8", "confidence": "4;4;5;4;4", "soundness": "1;3;3;4;4", "novelty": "2;4;4;3;4", "presentation": "3;3;3;4;4", "wc_summary": "91;157;96;60;54", "wc_strengths": "44;154;112;135;18", "wc_weaknesses": "679;453;236;96;10", "wc_questions": "25;1209;63;7;261", "wc_limitations": "9;122;7;9;1", "wc_review": "848;2095;514;307;344", "wc_reply_reviewers": "255;653;53;18;11", "wc_reply_authors": "230;957;341;0;0", "reply_reviewers": "2;3;2;1;1", "reply_authors": "2;4;2;1;1", "rating_avg": [ 7.0, 0.6324555320336759 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 1.0954451150103321 ], "novelty_avg": [ 3.4, 0.8 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 91.6, 36.63113429857175 ], "wc_strengths_avg": [ 92.6, 52.67105466952413 ], "wc_weaknesses_avg": [ 294.8, 243.60903103128177 ], "wc_questions_avg": [ 313.0, 457.0776739242467 ], "wc_limitations_avg": [ 29.6, 46.29298002937379 ], "wc_review_avg": [ 821.6, 664.7972924132589 ], "wc_reply_reviewers_avg": [ 198.0, 244.4045826084282 ], "wc_reply_authors_avg": [ 305.6, 351.58873702096884 ], "reply_reviewers_avg": [ 1.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5668483125178056605&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "google.com;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Google;Stanford University", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.stanford.edu", "aff_unique_abbr": "Google;Stanford", "aff_campus_unique_index": "0;1;1;1;1", "aff_campus_unique": "Mountain View;Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Cross-Domain Policy Adaptation via Value-Guided Data Filtering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70348", "id": "qdM260dXsa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e8ad87f1076fb0f75d89a45828f186b0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qdM260dXsa", "openreview": "https://openreview.net/forum?id=qdM260dXsa", "poster": "/media/PosterPDFs/NeurIPS%202023/70348.png?t=1699500802.6831195", "slides": "https://nips.cc/virtual/2023/poster/70348", "video": "https://nips.cc/virtual/2023/poster/70348", "author_site": "Kang Xu, Chenjia Bai, Xiaoteng Ma, Dong Wang, Bin Zhao, Zhen Wang, Xuelong Li, Wei Li", "tldr": "", "abstract": "Generalizing policies across different domains with dynamics mismatch poses a significant challenge in reinforcement learning. For example, a robot learns the policy in a simulator, but when it is deployed in the real world, the dynamics of the environment may be different. Given the source and target domain with dynamics mismatch, we consider the online dynamics adaptation problem, in which case the agent can access sufficient source domain data while online interactions with the target domain are limited. Existing research has attempted to solve the problem from the dynamics discrepancy perspective. In this work, we reveal the limitations of these methods and explore the problem from the value difference perspective via a novel insight on the value consistency across domains. Specifically, we present the Value-Guided Data Filtering (VGDF) algorithm, which selectively shares transitions from the source domain based on the proximity of paired value targets across the two domains. Empirical results on various environments with kinematic and morphology shifts demonstrate that our method achieves superior performance compared to prior approaches.", "keywords": "Reinforcement Learning; Domain Adaptation; Online Dynamics Adaptation", "primary_area": "", "supplementary_material": "/attachment/25a478efb95f4bfea6610841d40fd6b7cfd670af.zip", "author": "Kang Xu;Chenjia Bai;Xiaoteng Ma;Dong Wang;Bin Zhao;Zhen Wang;Xuelong Li;Wei Li", "authorids": "~Kang_Xu2;~Chenjia_Bai2;~Xiaoteng_Ma1;~Dong_Wang1;~Bin_Zhao7;~Zhen_Wang11;~Xuelong_Li2;~Wei_Li34", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://kangxu023.github.io/;https://baichenjia.github.io/;https://xtma.github.io/;https://redwang.github.io/;https://iopen.nwpu.edu.cn/info/1347/2105.htm;http://iopen.nwpu.edu.cn/info/1015/1351.htm?ivk_sa=1024320u;;", "dblp": "295/1622;247/1943;238/3249;40/3934-28;73/4325-1.html;;l/XuelongLi;64/6025-55", "google_scholar": "7FTLsHUAAAAJ;Rm_1y2kAAAAJ;CeDFnNMAAAAJ;dasL9V4AAAAJ;https://scholar.google.com.hk/citations?user=DQB0hqwAAAAJ;https://scholar.google.co.uk/citations?hl=zh-CN;ahUibskAAAAJ;https://scholar.google.co.uk/citations?user=AG3OXS0AAAAJ", "orcid": "0000-0001-6040-3002;;0000-0002-7250-6268;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Kang_Xu2;~Chenjia_Bai2;~Xiaoteng_Ma1;~Dong_Wang1;~Bin_Zhao7;~Zhen_Wang11;~Xuelong_Li2;~Wei_Li34", "aff": "Fudan University;Shanghai AI Laboratory;Department of Automation, Tsinghua University;Shanghai AI Laboratory;Northwest Polytechnical University Xi'an;Northwestern Polytechnical University;Northwestern Polytechnical University;Fudan University, China", "aff_domain": "fudan.edu.cn;pjlab.org.cn;tsinghua.edu.cn;pjlab.org.cn;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn;fudan.edu.cn", "position": "MS student;Researcher;PhD student;Researcher;Associate Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nxu2023crossdomain,\ntitle={Cross-Domain Policy Adaptation via Value-Guided Data Filtering},\nauthor={Kang Xu and Chenjia Bai and Xiaoteng Ma and Dong Wang and Bin Zhao and Zhen Wang and Xuelong Li and Wei Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qdM260dXsa}\n}", "github": "", "project": "", "reviewers": "Xugz;zSdL;uy5c;mz61;aEHs", "pdf_size": 2147136, "rating": "4;5;6;7;7", "confidence": "1;4;4;3;3", "soundness": "3;2;3;3;4", "novelty": "2;2;3;3;3", "presentation": "1;3;3;4;3", "wc_summary": "117;105;276;93;404", "wc_strengths": "51;42;71;63;165", "wc_weaknesses": "126;67;387;79;84", "wc_questions": "62;206;167;124;39", "wc_limitations": "9;9;4;29;3", "wc_review": "365;429;905;388;695", "wc_reply_reviewers": "148;312;26;40;24", "wc_reply_authors": "488;862;43;26;37", "reply_reviewers": "2;2;1;1;1", "reply_authors": "2;4;2;2;2", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 199.0, 122.27019260637483 ], "wc_strengths_avg": [ 78.4, 44.42341724811364 ], "wc_weaknesses_avg": [ 148.6, 120.84800370713619 ], "wc_questions_avg": [ 119.6, 62.522316015963455 ], "wc_limitations_avg": [ 10.8, 9.431860898041277 ], "wc_review_avg": [ 556.4, 210.69181284520764 ], "wc_reply_reviewers_avg": [ 110.0, 110.99549540409286 ], "wc_reply_authors_avg": [ 291.2, 334.99098495332674 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.4, 0.8000000000000002 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.46966821831386213, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15154635028224128717&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "fudan.edu.cn;pjlab.org.cn;tsinghua.edu.cn;pjlab.org.cn;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn;fudan.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;1;3;4;4;0", "aff_unique_norm": "Fudan University;Shanghai AI Laboratory;Tsinghua University;Northwest Polytechnical University;Northwestern Polytechnical University", "aff_unique_dep": ";;Department of Automation;;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.shanghai-ai-lab.com;https://www.tsinghua.edu.cn;http://www.nwpu.edu.cn;https://www.nwpu.edu.cn", "aff_unique_abbr": "Fudan;SAIL;THU;NWPU;NWPU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ANTN: Bridging Autoregressive Neural Networks and Tensor Networks for Quantum Many-Body Simulation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70347", "id": "qdsDy0zbn4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/01772a8b0420baec00c4d59fe2fbace6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qdsDy0zbn4", "openreview": "https://openreview.net/forum?id=qdsDy0zbn4", "poster": "/media/PosterPDFs/NeurIPS%202023/70347.png?t=1701980681.2584765", "slides": "https://nips.cc/virtual/2023/poster/70347", "video": "https://nips.cc/virtual/2023/poster/70347", "author_site": "Zhuo Chen, Laker Newhouse, Eddie Chen, Di Luo, Marin Soljacic", "tldr": "", "abstract": "Quantum many-body physics simulation has important impacts on understanding fundamental science and has applications to quantum materials design and quantum technology. However, due to the exponentially growing size of the Hilbert space with respect to the particle number, a direct simulation is intractable. While representing quantum states with tensor networks and neural networks are the two state-of-the-art methods for approximate simulations, each has its own limitations in terms of expressivity and inductive bias. To address these challenges, we develop a novel architecture, Autoregressive Neural TensorNet (ANTN), which bridges tensor networks and autoregressive neural networks. We show that Autoregressive Neural TensorNet parameterizes normalized wavefunctions, allows for exact sampling, generalizes the expressivity of tensor networks and autoregressive neural networks, and inherits a variety of symmetries from autoregressive neural networks. We demonstrate our approach on quantum state learning as well as finding the ground state of the challenging 2D $J_1$-$J_2$ Heisenberg model with different systems sizes and coupling parameters, outperforming both tensor networks and autoregressive neural networks. Our work opens up new opportunities for quantum many-body physics simulation, quantum technology design, and generative modeling in artificial intelligence.", "keywords": "Autoregressive neural network;tensor network;quantum many-body physics;variational Monte Carlo", "primary_area": "", "supplementary_material": "", "author": "Zhuo Chen;Laker Newhouse;Eddie Chen;Di Luo;Marin Soljacic", "authorids": "~Zhuo_Chen8;lakern@mit.edu;ezchen@mit.edu;~Di_Luo1;~Marin_Soljacic1", "gender": ";;;M;", "homepage": ";;;;https://www.rle.mit.edu/marin/", "dblp": ";;;;131/2044", "google_scholar": ";;;OxZytTQAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Zhuo_Chen8;lakern@mit.edu;ezchen@mit.edu;~Di_Luo1;~Marin_Soljacic1", "aff": ";;;Massachusetts Institute of Technology;", "aff_domain": ";;;mit.edu;", "position": ";;;Postdoc;", "bibtex": "@inproceedings{\nchen2023antn,\ntitle={{ANTN}: Bridging Autoregressive Neural Networks and Tensor Networks for Quantum Many-Body Simulation},\nauthor={Zhuo Chen and Laker Newhouse and Eddie Chen and Di Luo and Marin Soljacic},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qdsDy0zbn4}\n}", "github": "", "project": "", "reviewers": "2bnN;dUKK;wT6D;Vtcz", "pdf_size": 571079, "rating": "3;5;7;7", "confidence": "5;3;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "97;54;181;249", "wc_strengths": "141;77;39;111", "wc_weaknesses": "480;297;166;230", "wc_questions": "249;4;7;457", "wc_limitations": "1;6;5;200", "wc_review": "968;438;398;1247", "wc_reply_reviewers": "572;0;18;379", "wc_reply_authors": "2814;951;951;2286", "reply_reviewers": "1;0;1;1", "reply_authors": "6;4;4;6", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 145.25, 75.32720292165374 ], "wc_strengths_avg": [ 92.0, 38.06573262134856 ], "wc_weaknesses_avg": [ 293.25, 117.34857263725026 ], "wc_questions_avg": [ 179.25, 188.67481946460157 ], "wc_limitations_avg": [ 53.0, 84.89110671913755 ], "wc_review_avg": [ 762.75, 358.86304839032954 ], "wc_reply_reviewers_avg": [ 242.25, 243.10941466755253 ], "wc_reply_authors_avg": [ 1750.5, 821.0044153352648 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 5.0, 1.0 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1936385189231197085&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": ";;;mit.edu;", "author_num": 5, "aff_unique_index": "0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "VTaC: A Benchmark Dataset of Ventricular Tachycardia Alarms from ICU Monitors", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73452", "id": "qf4CWnrvZa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7a53bf4e02022aad32a4019d41b3b476-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=qf4CWnrvZa", "openreview": "https://openreview.net/forum?id=qf4CWnrvZa", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73452", "video": "https://nips.cc/virtual/2023/poster/73452", "author_site": "Li-wei Lehman, Benjamin Moody, Harsh Deep, Feng Wu, Hasan Saeed, Lucas McCullum, Diane Perry, Tristan Struja, Qiao Li, Gari Clifford, Roger Mark", "tldr": "", "abstract": "False arrhythmia alarms in intensive care units (ICUs) are a continuing problem despite considerable effort from industrial and academic algorithm developers. Of all life-threatening arrhythmias, ventricular tachycardia (VT) stands out as the most challenging arrhythmia to detect reliably. We introduce a new annotated VT alarm database, VTaC (Ventricular Tachycardia annotated alarms from ICUs) consisting of over 5,000 waveform recordings with VT alarms triggered by bedside monitors in the ICUs. Each VT alarm in the dataset has been labeled by at least two independent human expert annotators. The dataset encompasses data collected from ICUs in three major US hospitals and includes data from three leading bedside monitor manufacturers, providing a diverse and representative collection of alarm waveform data. Each waveform recording comprises at least two electrocardiogram (ECG) leads and one or more pulsatile waveforms, such as photoplethysmogram (PPG or PLETH) and arterial blood pressure (ABP) waveforms. We demonstrate the utility of this new benchmark dataset for the task of false arrhythmia alarm reduction, and present performance of multiple machine learning approaches, including conventional supervised machine learning, deep learning, contrastive learning and generative approaches for the task of VT false alarm reduction.", "keywords": "arrhythmia;false alarm;ICU;machine learning;deep learning;contrastive learning;physiological waveforms.", "primary_area": "", "supplementary_material": "", "author": "Li-wei H. Lehman;Benjamin E Moody;Harsh Deep;Feng Wu;Hasan Saeed;Lucas McCullum;Diane Perry;Tristan Struja;Qiao Li;Gari Clifford;Roger Mark", "authorids": "~Li-wei_H._Lehman1;~Benjamin_E_Moody1;~Harsh_Deep1;~Feng_Wu6;~Hasan_Saeed1;~Lucas_McCullum1;~Diane_Perry1;~Tristan_Struja1;qiaoli@dbmi.emory.edu;~Gari_Clifford1;~Roger_Mark1", "gender": ";M;M;;M;;F;M;;M;M", "homepage": "http://web.mit.edu/lilehman/www/;https://physionet.org;;;;;;;;http://www.bmi.emory.edu/gariclifford;https://imes.mit.edu/people/faculty/mark-roger/", "dblp": "87/2340.html;;;;;;;;;72/5989;86/6416.html", "google_scholar": "it0h7rMAAAAJ;;Cg--3_YAAAAJ;;;https://scholar.google.ca/citations?user=4916fzwAAAAJ;;https://scholar.google.com/citations?hl=de;;https://scholar.google.com.tw/citations?user=VwYoZ6gAAAAJ;", "orcid": ";;0000-0002-6015-308X;;;0000-0001-9788-7987;;0000-0003-0199-0184;;;", "linkedin": ";;harsh-deep-harker/;;hasan-saeed/;https://www.linkedin.com/mwlite/in/lucas-mccullum;diane-perry-32106b98/;tristan-struja;;;", "or_profile": "~Li-wei_H._Lehman1;~Benjamin_E_Moody1;~Harsh_Deep1;~Feng_Wu6;~Hasan_Saeed1;~Lucas_McCullum1;~Diane_Perry1;~Tristan_Struja1;qiaoli@dbmi.emory.edu;~Gari_Clifford1;~Roger_Mark1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;;Massachusetts Institute of Technology;MD Anderson Cancer Center;Massachusetts Institute of Technology;Massachusetts Institute of Technology;;Georgia Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;;mit.edu;mdanderson.edu;mit.edu;mit.edu;;gatech.edu;mit.edu", "position": "Researcher;Researcher;Undergrad student;;Undergrad student;PhD student;Researcher;Postdoc;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nlehman2023vtac,\ntitle={{VT}aC: A Benchmark Dataset of Ventricular Tachycardia Alarms from {ICU} Monitors},\nauthor={Li-wei H. Lehman and Benjamin E Moody and Harsh Deep and Feng Wu and Hasan Saeed and Lucas McCullum and Diane Perry and Tristan Struja and Qiao Li and Gari Clifford and Roger Mark},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=qf4CWnrvZa}\n}", "github": "", "project": "", "reviewers": "uV7r;2kri;sRHE;pRjp", "pdf_size": 539192, "rating": "6;6;7;7", "confidence": "4;4;5;4", "wc_summary_and_contributions": "79;100;168;116", "wc_strengths": "33;99;43;96", "wc_improvement": "35;145;42;315", "wc_limitations": "66;12;29;35", "wc_correctness": "36;16;25;9", "wc_clarity": "6;9;3;16", "wc_relation_to_prior_work": "25;28;18;9", "wc_documentation": "77;7;61;24", "wc_additional_feedback": "1;1;1;1", "wc_review": "358;417;390;621", "wc_reply_reviewers": "0;0;0;14", "wc_reply_authors": "492;254;434;771", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 115.75, 32.89661836724255 ], "wc_strengths_avg": [ 67.75, 29.978117018918983 ], "wc_improvement_avg": [ 134.25, 113.07823619070118 ], "wc_limitations_avg": [ 35.5, 19.525624189766635 ], "wc_correctness_avg": [ 21.5, 10.111874208078342 ], "wc_clarity_avg": [ 8.5, 4.8218253804964775 ], "wc_relation_to_prior_work_avg": [ 20.0, 7.314369419163897 ], "wc_documentation_avg": [ 42.25, 27.99441908666797 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 446.5, 102.8895038378551 ], "wc_reply_reviewers_avg": [ 3.5, 6.06217782649107 ], "wc_reply_authors_avg": [ 487.75, 185.59145319760822 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ey4c3UXNleoJ:scholar.google.com/&scioq=VTaC:+A+Benchmark+Dataset+of+Ventricular+Tachycardia+Alarms+from+ICU+Monitors&hl=en&as_sdt=0,39", "gs_version_total": 4, "email": "mit.edu;mit.edu;mit.edu;;mit.edu;mdanderson.edu;mit.edu;mit.edu;;gatech.edu;mit.edu", "author_num": 11, "aff_unique_index": "0;0;0;0;1;0;0;2;0", "aff_unique_norm": "Massachusetts Institute of Technology;MD Anderson Cancer Center;Georgia Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.mdanderson.org;https://www.gatech.edu", "aff_unique_abbr": "MIT;MD Anderson;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Affinity-Aware Graph Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70346", "id": "qgiG7WZohZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d642b0633afad94f660554e05b40608e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qgiG7WZohZ", "openreview": "https://openreview.net/forum?id=qgiG7WZohZ", "poster": "/media/PosterPDFs/NeurIPS%202023/70346.png?t=1702305322.528568", "slides": "https://nips.cc/virtual/2023/poster/70346", "video": "https://nips.cc/virtual/2023/poster/70346", "author_site": "Ameya Velingker, Ali Sinop, Ira Ktena, Petar Veli\u010dkovi\u0107, Sreenivas Gollapudi", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have emerged as a powerful technique for learning on relational data. Owing to the relatively limited number of message passing steps they perform\u2014and hence a smaller receptive field\u2014there has been significant interest in improving their expressivity by incorporating structural aspects of the underlying graph. In this paper, we explore the use of affinity measures as features in graph neural networks, in particular measures arising from random walks, including effective resistance, hitting and commute times. We propose message passing networks based on these features and evaluate their performance on a variety of node and graph property prediction tasks. Our architecture has low computational complexity, while our features are invariant to the permutations of the underlying graph. The measures we compute allow the network to exploit the connectivity properties of the graph, thereby allowing us to outperform relevant benchmarks for a wide variety of tasks, often with significantly fewer message passing steps. On one of the largest publicly available graph regression datasets, OGB-LSC-PCQM4Mv1, we obtain the best known single-model validation MAE at the time of writing.", "keywords": "graph neural networks;message passing;effective resistance;hitting time", "primary_area": "", "supplementary_material": "/attachment/8f60e48c200473c02a84d829dad27e3b697ae026.zip", "author": "Ameya Velingker;Ali Kemal Sinop;Ira Ktena;Petar Veli\u010dkovi\u0107;Sreenivas Gollapudi", "authorids": "~Ameya_Velingker1;~Ali_Kemal_Sinop1;~Ira_Ktena1;~Petar_Veli\u010dkovi\u01071;~Sreenivas_Gollapudi2", "gender": "M;M;F;M;M", "homepage": "http://www.ameyavelingker.com;;https://sites.google.com/corp/view/sk1712/home;https://petar-v.com;https://www.sreenivasgollapudi.com", "dblp": "117/3666.html;29/2539;190/7605;184/4786.html;https://dblp.uni-trier.de/pers/g/Gollapudi:Sreenivas.html", "google_scholar": "6dFFudUAAAAJ;;https://scholar.google.co.uk/citations?user=asj41ygAAAAJ;https://scholar.google.co.uk/citations?user=kcTK_FAAAAAJ;Ysd-WJgAAAAJ", "orcid": ";;;0000-0002-2820-4692;", "linkedin": "ameya-velingker-5811b711;;;petarvelickovic;", "or_profile": "~Ameya_Velingker1;~Ali_Kemal_Sinop1;~Ira_Ktena1;~Petar_Veli\u010dkovi\u01071;~Sreenivas_Gollapudi2", "aff": "Google;Google;Google;Google DeepMind;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com", "position": "Research Scientist;Researcher;Researcher;Senior Staff Research Scientist;Researcher", "bibtex": "@inproceedings{\nvelingker2023affinityaware,\ntitle={Affinity-Aware Graph Networks},\nauthor={Ameya Velingker and Ali Kemal Sinop and Ira Ktena and Petar Veli{\\v{c}}kovi{\\'c} and Sreenivas Gollapudi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qgiG7WZohZ}\n}", "github": "", "project": "", "reviewers": "YKRe;8Ddc;2Foc;vQdB", "pdf_size": 408043, "rating": "4;5;7;7", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "118;57;84;93", "wc_strengths": "131;46;99;66", "wc_weaknesses": "343;99;60;25", "wc_questions": "148;44;420;397", "wc_limitations": "135;21;4;1", "wc_review": "875;267;667;582", "wc_reply_reviewers": "11;11;12;56", "wc_reply_authors": "0;0;18;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.0, 21.805962487356524 ], "wc_strengths_avg": [ 85.5, 32.37668914512415 ], "wc_weaknesses_avg": [ 131.75, 124.74248474357083 ], "wc_questions_avg": [ 252.25, 160.72394812223845 ], "wc_limitations_avg": [ 40.25, 55.23302906775981 ], "wc_review_avg": [ 597.75, 218.6931354661138 ], "wc_reply_reviewers_avg": [ 22.5, 19.345542122153102 ], "wc_reply_authors_avg": [ 4.5, 7.794228634059948 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10593787060225164238&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 6, "email": "google.com;google.com;google.com;google.com;google.com", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Convolutional Visual Prompt for Robust Visual Perception", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70345", "id": "qgmrC8jhCo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/58be158bf831a706b1a66cffbc401cac-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qgmrC8jhCo", "openreview": "https://openreview.net/forum?id=qgmrC8jhCo", "poster": "/media/PosterPDFs/NeurIPS%202023/70345.png?t=1699850939.6924675", "slides": "https://nips.cc/virtual/2023/poster/70345", "video": "https://nips.cc/virtual/2023/poster/70345", "author_site": "Yun-Yun Tsai, Chengzhi Mao, Junfeng Yang", "tldr": "", "abstract": "Vision models are often vulnerable to out-of-distribution (OOD) samples without adapting. While visual prompts offer a lightweight method of input-space adaptation for large-scale vision models, they rely on a high-dimensional additive vector and labeled data. This leads to overfitting when adapting models in a self-supervised test-time setting without labels. We introduce convolutional visual prompts (CVP) for label-free test-time adaptation for robust visual perception. The structured nature of CVP demands fewer trainable parameters, less than 1\\% compared to standard visual prompts, combating overfitting. Extensive experiments and analysis on a wide variety of OOD visual perception tasks show that our approach is effective, improving robustness by up to 5.87\\% over several large-scale models.", "keywords": "self-supervised learning;representation learning;visual prompts;domain generalization;input adaptation", "primary_area": "", "supplementary_material": "/attachment/c9a732b3998527c488fd0a5babc4f35dc47fd0e4.zip", "author": "Yun-Yun Tsai;Chengzhi Mao;Junfeng Yang", "authorids": "~Yun-Yun_Tsai1;~Chengzhi_Mao2;~Junfeng_Yang1", "gender": "F;M;M", "homepage": "https://yunyuntsai.github.io/;http://www.cs.columbia.edu/~mcz/;https://www.cs.columbia.edu/~junfeng/", "dblp": "241/1190;;71/3724.html", "google_scholar": "v1SiKHcAAAAJ;pTTEiHUAAAAJ;JJ9AvbAAAAAJ", "orcid": ";;0009-0000-2277-6545", "linkedin": ";;", "or_profile": "~Yun-Yun_Tsai1;~Chengzhi_Mao2;~Junfeng_Yang1", "aff": "Columbia University;Columbia University;, Columbia University", "aff_domain": "columbia.edu;columbia.edu;cs.columbia.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ntsai2023convolutional,\ntitle={Convolutional Visual Prompt for Robust Visual Perception},\nauthor={Yun-Yun Tsai and Chengzhi Mao and Junfeng Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qgmrC8jhCo}\n}", "github": "", "project": "", "reviewers": "QKSZ;zs9s;HEpT;uZKT;XDjm", "pdf_size": 26614915, "rating": "3;4;6;6;7", "confidence": "4;4;4;4;4", "soundness": "2;2;2;3;3", "novelty": "2;1;4;3;3", "presentation": "2;2;3;3;4", "wc_summary": "56;184;56;68;52", "wc_strengths": "47;20;77;74;114", "wc_weaknesses": "601;151;208;117;45", "wc_questions": "88;58;320;82;58", "wc_limitations": "27;3;31;32;37", "wc_review": "819;416;692;373;306", "wc_reply_reviewers": "284;311;324;41;74", "wc_reply_authors": "340;334;658;43;240", "reply_reviewers": "1;1;2;1;1", "reply_authors": "4;5;4;2;3", "rating_avg": [ 5.2, 1.469693845669907 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 1.019803902718557 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 83.2, 50.684908996662905 ], "wc_strengths_avg": [ 66.4, 31.512537187602014 ], "wc_weaknesses_avg": [ 224.4, 195.5521413843377 ], "wc_questions_avg": [ 121.2, 100.14868945722655 ], "wc_limitations_avg": [ 26.0, 11.933147112141038 ], "wc_review_avg": [ 521.2, 198.59446115136242 ], "wc_reply_reviewers_avg": [ 206.8, 123.02747660583793 ], "wc_reply_authors_avg": [ 323.0, 198.95929231880578 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 3.6, 1.019803902718557 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6445514516024173350&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "email": "columbia.edu;columbia.edu;cs.columbia.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Diffusion Self-Guidance for Controllable Image Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70344", "id": "qgv56R2YJ7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3469b211b829b39d2b0cfd3b880a869c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qgv56R2YJ7", "openreview": "https://openreview.net/forum?id=qgv56R2YJ7", "poster": "/media/PosterPDFs/NeurIPS%202023/70344.png?t=1701200025.1579902", "slides": "https://nips.cc/virtual/2023/poster/70344", "video": "https://nips.cc/virtual/2023/poster/70344", "author_site": "Dave Epstein, Allan Jabri, Ben Poole, Alexei Efros, Aleksander Holynski", "tldr": "", "abstract": "Large-scale generative models are capable of producing high-quality images from detailed prompts. However, many aspects of an image are difficult or impossible to convey through text. We introduce self-guidance, a method that provides precise control over properties of the generated image by guiding the internal representations of diffusion models. We demonstrate that the size, location, and appearance of objects can be extracted from these representations, and show how to use them to steer the sampling process. Self-guidance operates similarly to standard classifier guidance, but uses signals present in the pretrained model itself, requiring no additional models or training. We demonstrate the flexibility and effectiveness of self-guided generation through a wide range of challenging image manipulations, such as modifying the position or size of a single object (keeping the rest of the image unchanged), merging the appearance of objects in one image with the layout of another, composing objects from multiple images into one, and more. We also propose a new method for reconstruction using self-guidance, which allows extending our approach to editing real images.", "keywords": "generative models;image editing;diffusion;guidance", "primary_area": "", "supplementary_material": "", "author": "Dave Epstein;Allan Jabri;Ben Poole;Alexei A Efros;Aleksander Holynski", "authorids": "~Dave_Epstein1;~Allan_Jabri2;~Ben_Poole1;~Alexei_A_Efros1;~Aleksander_Holynski1", "gender": ";;M;;M", "homepage": "https://dave.ml;http://ajabri.github.io;https://cs.stanford.edu/~poole;https://holynski.org;http://www.eecs.berkeley.edu/~efros/", "dblp": "https://dblp.org/pers/hd/e/Epstein:Dave;172/0858;16/10397;230/7958;40/6158", "google_scholar": "https://scholar.google.com/citations?hl=en;;i5FMLA4AAAAJ;ypBMJMgAAAAJ;https://scholar.google.com.tw/citations?user=d97bGd8AAAAJ", "orcid": ";;;;0000-0001-5720-8070", "linkedin": ";;;;alexei-efros-890736a3/", "or_profile": "~Dave_Epstein1;~Allan_Jabri2;~Ben_Poole1;~Aleksander_Holynski1;~Alyosha_Efros1", "aff": "University of California, Berkeley;University of California, Berkeley;Google;Google DeepMind;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;google.com;google.com;berkeley.edu", "position": "PhD student;PhD student;Research Scientist;Researcher;Professor", "bibtex": "@inproceedings{\nepstein2023diffusion,\ntitle={Diffusion Self-Guidance for Controllable Image Generation},\nauthor={Dave Epstein and Allan Jabri and Ben Poole and Alexei A Efros and Aleksander Holynski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qgv56R2YJ7}\n}", "github": "", "project": "", "reviewers": "hErC;jH2E;EBx5;qibF", "pdf_size": 27204189, "rating": "5;6;6;7", "confidence": "4;5;4;4", "soundness": "2;4;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "59;101;105;198", "wc_strengths": "109;185;50;211", "wc_weaknesses": "255;245;143;326", "wc_questions": "88;8;76;274", "wc_limitations": "30;16;23;21", "wc_review": "541;555;397;1030", "wc_reply_reviewers": "384;28;0;78", "wc_reply_authors": "310;0;0;0", "reply_reviewers": "2;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 115.75, 50.79062413477511 ], "wc_strengths_avg": [ 138.75, 63.483757765274106 ], "wc_weaknesses_avg": [ 242.25, 65.25862012025691 ], "wc_questions_avg": [ 111.5, 98.65470085099848 ], "wc_limitations_avg": [ 22.5, 5.024937810560445 ], "wc_review_avg": [ 630.75, 238.6591450164858 ], "wc_reply_reviewers_avg": [ 122.5, 153.54071121367127 ], "wc_reply_authors_avg": [ 77.5, 134.23393758658798 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 217, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12151353437875610702&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "berkeley.edu;berkeley.edu;google.com;google.com;berkeley.edu", "author_num": 5, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Google", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Berkeley;Mountain View;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Framework and Benchmarks for Combinatorial and Mixed-variable Bayesian Optimization", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73451", "id": "qi0Zrm6E5E", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dbc4b67c6430c22460623186c3d3fdc2-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=qi0Zrm6E5E", "openreview": "https://openreview.net/forum?id=qi0Zrm6E5E", "poster": "/media/PosterPDFs/NeurIPS%202023/73451.png?t=1697462794.8118775", "slides": "https://nips.cc/virtual/2023/poster/73451", "video": "https://nips.cc/virtual/2023/poster/73451", "author_site": "Kamil Dreczkowski, Antoine Grosnit, Haitham Bou Ammar", "tldr": "", "abstract": "This paper introduces a modular framework for Mixed-variable and Combinatorial Bayesian Optimization (MCBO) to address the lack of systematic benchmarking and standardized evaluation in the field. Current MCBO papers often introduce non-diverse or non-standard benchmarks to evaluate their methods, impeding the proper assessment of different MCBO primitives and their combinations. Additionally, papers introducing a solution for a single MCBO primitive often omit benchmarking against baselines that utilize the same methods for the remaining primitives. This omission is primarily due to the significant implementation overhead involved, resulting in a lack of controlled assessments and an inability to showcase the merits of a contribution effectively.\nTo overcome these challenges, our proposed framework enables an effortless combination of Bayesian Optimization components, and provides a diverse set of synthetic and real-world benchmarking tasks. \nLeveraging this flexibility, we implement 47 novel MCBO algorithms and benchmark them against seven existing MCBO solvers and five standard black-box optimization algorithms on ten tasks, conducting over 4000 experiments. \nOur findings reveal a superior combination of MCBO primitives outperforming existing approaches and illustrate the significance of model fit and the use of a trust region. We make our MCBO library available under the MIT license at \\url{https://github.com/huawei-noah/HEBO/tree/master/MCBO}.", "keywords": "Bayesian Optimisation;Combinatorial Optimisation;Mixed Space Optimisation;Library;Benchmarks", "primary_area": "", "supplementary_material": "", "author": "Kamil Dreczkowski;Antoine Grosnit;Haitham Bou Ammar", "authorids": "~Kamil_Dreczkowski1;~Antoine_Grosnit1;~Haitham_Bou_Ammar1", "gender": "M;M;M", "homepage": ";;", "dblp": ";281/6860;", "google_scholar": "5hrKfnwAAAAJ;TIfomt8AAAAJ;https://scholar.google.co.uk/citations?user=AE5suDoAAAAJ", "orcid": "0000-0001-8278-6550;;", "linkedin": "kamil-dreczkowski/;antoine-grosnit-087950158/;", "or_profile": "~Kamil_Dreczkowski1;~Antoine_Grosnit1;~Haitham_Bou_Ammar1", "aff": "Imperial College London;Huawei Technologies Ltd.;Huawei R&D UK", "aff_domain": "ic.ac.uk;huawei.com;huawei.com", "position": "PhD student;Researcher;Principal Researcher", "bibtex": "@inproceedings{\ndreczkowski2023framework,\ntitle={Framework and Benchmarks for Combinatorial and Mixed-variable Bayesian Optimization},\nauthor={Kamil Dreczkowski and Antoine Grosnit and Haitham Bou Ammar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=qi0Zrm6E5E}\n}", "github": "", "project": "", "reviewers": "X2sz;fKag;Vgsh;m29E;MaRf", "pdf_size": 1121898, "rating": "7;7;7;7;8", "confidence": "3;3;4;3;4", "wc_summary_and_contributions": "65;87;116;197;76", "wc_strengths": "101;97;42;73;104", "wc_improvement": "89;1;412;136;7", "wc_limitations": "66;1;22;89;1", "wc_correctness": "22;10;33;124;1", "wc_clarity": "4;24;5;207;1", "wc_relation_to_prior_work": "46;37;24;60;1", "wc_documentation": "12;8;16;74;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "406;266;671;961;193", "wc_reply_reviewers": "11;0;277;34;0", "wc_reply_authors": "506;107;1901;1983;190", "reply_reviewers": "1;0;1;1;0", "reply_authors": "1;1;4;4;1", "rating_avg": [ 7.2, 0.39999999999999997 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 108.2, 47.53693301002916 ], "wc_strengths_avg": [ 83.4, 23.414525406251563 ], "wc_improvement_avg": [ 129.0, 150.33695487138218 ], "wc_limitations_avg": [ 35.8, 35.650525942824466 ], "wc_correctness_avg": [ 38.0, 44.339598554790726 ], "wc_clarity_avg": [ 48.2, 79.8132821026676 ], "wc_relation_to_prior_work_avg": [ 33.6, 20.085815890822058 ], "wc_documentation_avg": [ 22.2, 26.369679558159213 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 499.4, 282.7016802213952 ], "wc_reply_reviewers_avg": [ 64.4, 107.02261443265157 ], "wc_reply_authors_avg": [ 937.4, 831.3941544177467 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 1.469693845669907 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6123724356957947, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3168584267364444820&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ic.ac.uk;huawei.com;huawei.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Imperial College London;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.imperial.ac.uk;https://www.huawei.com", "aff_unique_abbr": "ICL;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;China" }, { "title": "Transformers learn through gradual rank increase", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70343", "id": "qieeNlO3C7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4d69c1c057a8bd570ba4a7b71aae8331-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qieeNlO3C7", "openreview": "https://openreview.net/forum?id=qieeNlO3C7", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70343", "video": "https://nips.cc/virtual/2023/poster/70343", "author_site": "Enric Boix-Adsera, Etai Littwin, Emmanuel Abbe, Samy Bengio, Joshua Susskind", "tldr": "", "abstract": "We identify incremental learning dynamics in transformers, where the difference between trained and initial weights progressively increases in rank. We rigorously prove this occurs under the simplifying assumptions of diagonal weight matrices and small initialization. Our experiments support the theory and also show that phenomenon can occur in practice without the simplifying assumptions.", "keywords": "transformers;low-rank bias;incremental learning", "primary_area": "", "supplementary_material": "/attachment/b992f4326972ee88479fac25e37c23c735a9f9b2.pdf", "author": "Enric Boix-Adser\u00e0;Etai Littwin;Emmanuel Abbe;Samy Bengio;Joshua M. Susskind", "authorids": "~Enric_Boix-Adser\u00e01;~Etai_Littwin1;~Emmanuel_Abbe1;~Samy_Bengio1;~Joshua_M._Susskind1", "gender": "M;;M;M;M", "homepage": ";;http://bengio.abracadoudou.com;http://www.apple.com;http://web.mit.edu/eboix/www/", "dblp": ";84/5016;b/SamyBengio;132/7797;", "google_scholar": "NOVS7vwAAAAJ;;Vs-MdPcAAAAJ;Sv2TGqsAAAAJ;", "orcid": ";;;;", "linkedin": ";;bengio;joshua-susskind-8ab2ab5/;", "or_profile": "~Etai_Littwin1;~Emmanuel_Abbe1;~Samy_Bengio1;~Joshua_M._Susskind1;~Enric_Boix_Adser\u00e01", "aff": "Apple;Swiss Federal Institute of Technology Lausanne;Apple;Apple;Apple", "aff_domain": "apple.com;epfl.ch;apple.com;apple.com;apple.com", "position": "Researcher;Full Professor;Senior Director;Researcher;Intern", "bibtex": "@inproceedings{\nabbe2023transformers,\ntitle={Transformers learn through gradual rank increase},\nauthor={Emmanuel Abbe and Samy Bengio and Enric Boix-Adser{\\`a} and Etai Littwin and Joshua M. Susskind},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qieeNlO3C7}\n}", "github": "", "project": "", "reviewers": "ZvU1;yLKb;1Eo1;ryvs", "pdf_size": 2031425, "rating": "4;6;6;6", "confidence": "3;3;3;3", "soundness": "2;4;4;3", "novelty": "2;3;3;3", "presentation": "1;3;3;3", "wc_summary": "61;142;34;114", "wc_strengths": "74;139;41;31", "wc_weaknesses": "126;212;23;15", "wc_questions": "101;65;10;28", "wc_limitations": "40;56;31;4", "wc_review": "402;614;139;192", "wc_reply_reviewers": "50;101;0;26", "wc_reply_authors": "55;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 87.75, 42.5345447842104 ], "wc_strengths_avg": [ 71.25, 42.22780482099442 ], "wc_weaknesses_avg": [ 94.0, 80.97839217964258 ], "wc_questions_avg": [ 51.0, 35.02142201567492 ], "wc_limitations_avg": [ 32.75, 18.859679212542297 ], "wc_review_avg": [ 336.75, 187.87146536927847 ], "wc_reply_reviewers_avg": [ 44.25, 37.23153904957462 ], "wc_reply_authors_avg": [ 13.75, 23.81569860407206 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9943701636676547451&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "apple.com;epfl.ch;apple.com;apple.com;apple.com", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Apple;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": "Apple Inc.;", "aff_unique_url": "https://www.apple.com;https://www.epfl.ch", "aff_unique_abbr": "Apple;EPFL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Switzerland" }, { "title": "High-Fidelity Audio Compression with Improved RVQGAN", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70342", "id": "qjnl1QUnFA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/58d0e78cf042af5876e12661087bea12-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qjnl1QUnFA", "openreview": "https://openreview.net/forum?id=qjnl1QUnFA", "poster": "/media/PosterPDFs/NeurIPS%202023/70342.png?t=1698803620.9353921", "slides": "https://nips.cc/virtual/2023/poster/70342", "video": "https://nips.cc/virtual/2023/poster/70342", "author_site": "Rithesh Kumar, Prem Seetharaman, Alejandro Luebs, Ishaan Kumar, Kundan Kumar", "tldr": "", "abstract": "Language models have been successfully used to model natural signals, such as images, speech, and music. A key component of these models is a high quality neural compression model that can compress high-dimensional natural signals into lower dimensional discrete tokens. To that end, we introduce a high-fidelity universal neural audio compression algorithm that achieves ~90x compression of 44.1 KHz audio into tokens at just 8kbps bandwidth. We achieve this by combining advances in high-fidelity audio generation with better vector quantization techniques from the image domain, along with improved adversarial and reconstruction losses. We compress all domains (speech, environment, music, etc.) with a single universal model, making it widely applicable to generative modeling of all audio. We compare with competing audio compression algorithms, and find our method outperforms them significantly. We provide thorough ablations for every design choice, as well as open-source code and trained model weights. We hope our work can lay the foundation for the next generation of high-fidelity audio modeling.", "keywords": "audio generation;audio compression;GAN;audio;speech", "primary_area": "", "supplementary_material": "/attachment/9dc3a69306fcabed0fe426ac50b9c29269ab1a48.zip", "author": "Rithesh Kumar;Prem Seetharaman;Alejandro Luebs;Ishaan Kumar;Kundan Kumar", "authorids": "~Rithesh_Kumar1;~Prem_Seetharaman1;alejandro@descript.com;~Ishaan_Kumar2;~Kundan_Kumar1", "gender": "M;M;;M;M", "homepage": ";https://pseeth.github.io;;;http://kundan2510.github.io", "dblp": "192/1862;;;;", "google_scholar": "https://scholar.google.ca/citations?user=hJjeVsQAAAAJ;XHD-48cAAAAJ;;;", "orcid": ";;;;", "linkedin": "rithesh-kumar-b0479488/;;;;https://ca.linkedin.com/in/kundan-kumar-50a0a361", "or_profile": "~Rithesh_Kumar1;~Prem_Seetharaman1;alejandro@descript.com;~Ishaan_Kumar2;~Kundan_Kumar1", "aff": "Descript Inc.;Descript;;University of Montreal;University of Montreal", "aff_domain": "descript.com;descript.com;;umontreal.ca;umontreal.ca", "position": "Researcher;Research Scientist;;MS student;PhD student", "bibtex": "@inproceedings{\nkumar2023highfidelity,\ntitle={High-Fidelity Audio Compression with Improved {RVQGAN}},\nauthor={Rithesh Kumar and Prem Seetharaman and Alejandro Luebs and Ishaan Kumar and Kundan Kumar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qjnl1QUnFA}\n}", "github": "", "project": "", "reviewers": "5w24;ZhX4;B66v;d8D3", "pdf_size": 345048, "rating": "7;7;7;7", "confidence": "5;4;4;5", "soundness": "3;3;4;3", "novelty": "2;3;4;3", "presentation": "3;3;3;3", "wc_summary": "191;143;66;20", "wc_strengths": "46;151;54;51", "wc_weaknesses": "193;70;31;92", "wc_questions": "150;30;36;32", "wc_limitations": "5;7;16;5", "wc_review": "585;401;203;200", "wc_reply_reviewers": "123;0;360;0", "wc_reply_authors": "175;0;364;0", "reply_reviewers": "2;0;2;0", "reply_authors": "4;1;2;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.0, 66.30610831590103 ], "wc_strengths_avg": [ 75.5, 43.68352092036538 ], "wc_weaknesses_avg": [ 96.5, 59.84354601792912 ], "wc_questions_avg": [ 62.0, 50.85272854036448 ], "wc_limitations_avg": [ 8.25, 4.548351349665063 ], "wc_review_avg": [ 347.25, 159.61261698249297 ], "wc_reply_reviewers_avg": [ 120.75, 146.97512544645096 ], "wc_reply_authors_avg": [ 134.75, 150.4084023583789 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 303, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10772995201378011347&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "descript.com;descript.com;;umontreal.ca;umontreal.ca", "author_num": 5, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Descript Inc.;Descript;University of Montreal", "aff_unique_dep": ";;", "aff_unique_url": "https://www.descript.com;https://www.descript.com;https://wwwumontreal.ca", "aff_unique_abbr": ";Descript;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United States;Canada" }, { "title": "Rank-1 Matrix Completion with Gradient Descent and Small Random Initialization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70341", "id": "qjqJL2lfkH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/21c426323068204f4199c490d730e88e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qjqJL2lfkH", "openreview": "https://openreview.net/forum?id=qjqJL2lfkH", "poster": "/media/PosterPDFs/NeurIPS%202023/70341.png?t=1702147913.4830997", "slides": "https://nips.cc/virtual/2023/poster/70341", "video": "https://nips.cc/virtual/2023/poster/70341", "author_site": "Daesung Kim, Hye Won Chung", "tldr": "", "abstract": "The nonconvex formulation of the matrix completion problem has received significant attention in recent years due to its affordable complexity compared to the convex formulation. Gradient Descent (GD) is a simple yet efficient baseline algorithm for solving nonconvex optimization problems. The success of GD has been witnessed in many different problems in both theory and practice when it is combined with random initialization. However, previous works on matrix completion require either careful initialization or regularizers to prove the convergence of GD. In this paper, we study the rank-1 symmetric matrix completion and prove that GD converges to the ground truth when small random initialization is used. We show that in a logarithmic number of iterations, the trajectory enters the region where local convergence occurs. We provide an upper bound on the initialization size that is sufficient to guarantee the convergence, and show that a larger initialization can be used as more samples are available. We observe that the implicit regularization effect of GD plays a critical role in the analysis, and for the entire trajectory, it prevents each entry from becoming much larger than the others.", "keywords": "Matrix completion;gradient descent;random initialization", "primary_area": "", "supplementary_material": "", "author": "Daesung Kim;Hye Won Chung", "authorids": "~Daesung_Kim1;~Hye_Won_Chung2", "gender": "M;F", "homepage": ";https://iids.kaist.ac.kr/", "dblp": "129/1057;https://dblp.uni-trier.de/pers/hd/c/Chung:Hye_Won", "google_scholar": "AvyPJgYAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Daesung_Kim1;~Hye_Won_Chung2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nkim2023rank,\ntitle={Rank-1 Matrix Completion with Gradient Descent and Small Random Initialization},\nauthor={Daesung Kim and Hye Won Chung},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qjqJL2lfkH}\n}", "github": "", "project": "", "reviewers": "KM6V;ewei;irzy;zH8c;kLXw;96zD", "pdf_size": 1841715, "rating": "4;4;5;6;6;8", "confidence": "4;5;2;4;3;4", "soundness": "3;3;3;3;3;4", "novelty": "2;2;2;3;3;3", "presentation": "3;3;2;3;3;4", "wc_summary": "18;62;195;24;75;290", "wc_strengths": "50;39;74;34;47;136", "wc_weaknesses": "428;140;221;48;76;101", "wc_questions": "7;19;95;353;130;37", "wc_limitations": "9;4;1;7;5;1", "wc_review": "512;264;586;466;333;565", "wc_reply_reviewers": "0;0;112;57;51;0", "wc_reply_authors": "0;36;300;0;0;0", "reply_reviewers": "0;0;2;1;1;0", "reply_authors": "1;2;2;1;1;1", "rating_avg": [ 5.5, 1.3844373104863459 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 110.66666666666667, 99.17268889277038 ], "wc_strengths_avg": [ 63.333333333333336, 34.86003761074021 ], "wc_weaknesses_avg": [ 169.0, 128.15355372885034 ], "wc_questions_avg": [ 106.83333333333333, 118.20380234530904 ], "wc_limitations_avg": [ 4.5, 2.9297326385411577 ], "wc_review_avg": [ 454.3333333333333, 118.31690026741272 ], "wc_reply_reviewers_avg": [ 36.666666666666664, 41.48761528724232 ], "wc_reply_authors_avg": [ 56.0, 109.90905331227269 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.74535599249993 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.12768847961381222, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11665059671613022858&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "email": "kaist.ac.kr;kaist.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "qkhpbRNSSE", "title": "ProofNet: Autoformalizing and Formally Proving Undergraduate-Level Mathematics", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "We introduce ProofNet, a benchmark for autoformalization and formal proving of undergraduate-level mathematics. The ProofNet benchmarks consists of 371 examples, each consisting of a formal theorem statement in Lean 3, a natural language theorem statement, and a natural language proof. The problems are primarily drawn from popular undergraduate pure mathematics textbooks and cover topics such as real and complex analysis, linear algebra, abstract algebra, and topology. We intend for ProofNet to be a challenging benchmark that will drive progress in autoformalization and automatic theorem proving. We report baseline results on statement autoformalization via in-context learning. Moreover we demonstrate improvements over our baselines by applying prompt retrieval and distilled backtranslation.", "keywords": "autoformalization;theorem proving;in context learning", "primary_area": "", "supplementary_material": "", "author": "Zhangir Azerbayev;Bartosz Piotrowski;Hailey Schoelkopf;Edward W. Ayers;Dragomir Radev;Jeremy Avigad", "authorids": "~Zhangir_Azerbayev1;~Bartosz_Piotrowski1;~Hailey_Schoelkopf1;contact@edayers.com;~Dragomir_Radev2;avigad@cmu.edu", "gender": "M;;F;;;", "homepage": ";;;;;", "dblp": ";;;;;", "google_scholar": ";;XLahYIYAAAAJ;;;", "orcid": ";;;;;", "linkedin": "zhangir-azerbayev-314ab21b8/;;;;;", "or_profile": "~Zhangir_Azerbayev1;~Bartosz_Piotrowski1;~Hailey_Schoelkopf1;contact@edayers.com;~Dragomir_Radev2;avigad@cmu.edu", "aff": "Yale University;;Yale University;;;", "aff_domain": "yale.edu;;yale.edu;;;", "position": "Undergrad student;;Undergrad student;;;", "bibtex": "@misc{\nazerbayev2023proofnet,\ntitle={ProofNet: Autoformalizing and Formally Proving Undergraduate-Level Mathematics},\nauthor={Zhangir Azerbayev and Bartosz Piotrowski and Hailey Schoelkopf and Edward W. Ayers and Dragomir Radev and Jeremy Avigad},\nyear={2023},\nurl={https://openreview.net/forum?id=qkhpbRNSSE}\n}", "github": "", "project": "", "reviewers": "bFjQ;u8Jf;8y1v;YvZf", "site": "https://openreview.net/forum?id=qkhpbRNSSE", "pdf_size": 298078, "rating": "4;5;5;7", "confidence": "3;3;4;4", "wc_summary_and_contributions": "93;99;58;68", "wc_strengths": "38;20;66;114", "wc_improvement": "123;137;16;52", "wc_limitations": "16;18;3;17", "wc_correctness": "1;1;18;4", "wc_clarity": "1;1;415;6", "wc_relation_to_prior_work": "1;6;50;44", "wc_documentation": "1;1;118;8", "wc_additional_feedback": "1;1;1;1", "wc_review": "275;284;745;314", "wc_reply_reviewers": "0;0;324;0", "wc_reply_authors": "0;0;299;0", "reply_reviewers": "0;0;3;0", "reply_authors": "0;0;2;0", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 79.5, 17.00735135169495 ], "wc_strengths_avg": [ 59.5, 35.47886694921358 ], "wc_improvement_avg": [ 82.0, 49.90490957811666 ], "wc_limitations_avg": [ 13.5, 6.103277807866851 ], "wc_correctness_avg": [ 6.0, 7.035623639735144 ], "wc_clarity_avg": [ 105.75, 178.55723872192917 ], "wc_relation_to_prior_work_avg": [ 25.25, 21.924586655168667 ], "wc_documentation_avg": [ 32.0, 49.73429400323282 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 404.5, 197.11735083447118 ], "wc_reply_reviewers_avg": [ 81.0, 140.29611541307906 ], "wc_reply_authors_avg": [ 74.75, 129.4707978657736 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 0.5, 0.8660254037844386 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14477851218864564414&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Stability and Generalization of the Decentralized Stochastic Gradient Descent Ascent Algorithm", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70340", "id": "ql6LVyi2Dg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/64e2449d74f84e5b1a5c96ba7b3d308e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ql6LVyi2Dg", "openreview": "https://openreview.net/forum?id=ql6LVyi2Dg", "poster": "/media/PosterPDFs/NeurIPS%202023/70340.png?t=1697443482.2220187", "slides": "https://nips.cc/virtual/2023/poster/70340", "video": "https://nips.cc/virtual/2023/poster/70340", "author_site": "Miaoxi Zhu, Li Shen, Bo Du, Dacheng Tao", "tldr": "", "abstract": "The growing size of available data has attracted increasing interest in solving minimax problems in a decentralized manner for various machine learning tasks. Previous theoretical research has primarily focused on the convergence rate and communication complexity of decentralized minimax algorithms, with little attention given to their generalization. In this paper, we investigate the primal-dual generalization bound of the decentralized stochastic gradient descent ascent (D-SGDA) algorithm using the approach of algorithmic stability under both convex-concave and nonconvex-nonconcave settings. Our theory refines the algorithmic stability in a decentralized manner and demonstrates that the decentralized structure does not destroy the stability and generalization of D-SGDA, implying that it can generalize as well as the vanilla SGDA in certain situations. Our results analyze the impact of different topologies on the generalization bound of the D-SGDA algorithm beyond trivial factors such as sample sizes, learning rates, and iterations. We also evaluate the optimization error and balance it with the generalization gap to obtain the optimal population risk of D-SGDA in the convex-concave setting. Additionally, we perform several numerical experiments which validate our theoretical findings.", "keywords": "decentralized algorithm;minimax problem;algorithmic stability;generalization analysis", "primary_area": "", "supplementary_material": "/attachment/df5727ba5bd9584bb076dcedc3e655d4704ca00d.zip", "author": "Miaoxi Zhu;Li Shen;Bo Du;Dacheng Tao", "authorids": "~Miaoxi_Zhu2;~Li_Shen1;~Bo_Du3;~Dacheng_Tao1", "gender": ";M;;", "homepage": ";https://sites.google.com/site/mathshenli/home;;", "dblp": "344/1052;91/3680-8;;", "google_scholar": ";yVhgENIAAAAJ;;", "orcid": ";;;", "linkedin": "https://www.linkedin.cn/incareer/in/%E5%98%BB-%E5%98%BB-0a078423a;;;", "or_profile": "~Miaoxi_Zhu2;~Li_Shen1;~Bo_Du3;~Dacheng_Tao1", "aff": "Wuhan University;JD Explore Academy;;", "aff_domain": "whu.edu.cn;jd.com;;", "position": "MS student;Researcher;;", "bibtex": "@inproceedings{\nzhu2023stability,\ntitle={Stability and Generalization of the Decentralized Stochastic Gradient Descent Ascent Algorithm},\nauthor={Miaoxi Zhu and Li Shen and Bo Du and Dacheng Tao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ql6LVyi2Dg}\n}", "github": "", "project": "", "reviewers": "Z6QM;CPGp;5Agt;ipno;FcPE", "pdf_size": 899857, "rating": "5;5;5;6;6", "confidence": "4;1;3;4;4", "soundness": "3;2;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;4", "wc_summary": "203;45;71;188;39", "wc_strengths": "150;28;31;130;59", "wc_weaknesses": "576;14;105;71;223", "wc_questions": "9;14;152;1;57", "wc_limitations": "19;2;10;52;1", "wc_review": "957;103;369;442;379", "wc_reply_reviewers": "10;0;15;0;12", "wc_reply_authors": "0;0;36;0;0", "reply_reviewers": "1;0;1;0;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 109.2, 71.43780511745864 ], "wc_strengths_avg": [ 79.6, 50.88260999595048 ], "wc_weaknesses_avg": [ 197.8, 201.06854552614638 ], "wc_questions_avg": [ 46.6, 56.166181995930614 ], "wc_limitations_avg": [ 16.8, 18.755265927200288 ], "wc_review_avg": [ 450.0, 278.970966231255 ], "wc_reply_reviewers_avg": [ 7.4, 6.248199740725324 ], "wc_reply_authors_avg": [ 7.2, 14.400000000000002 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5601120336112039, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10659830152467662264&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "whu.edu.cn;jd.com;;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Wuhan University;JD", "aff_unique_dep": ";JD Explore Academy", "aff_unique_url": "http://www.whu.edu.cn/;", "aff_unique_abbr": "WHU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "China;" }, { "title": "Bayesian nonparametric (non-)renewal processes for analyzing neural spike train variability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70339", "id": "qlJoo2y3gY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d6db7eb6245ec0c6e45f445956994143-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qlJoo2y3gY", "openreview": "https://openreview.net/forum?id=qlJoo2y3gY", "poster": "/media/PosterPDFs/NeurIPS%202023/70339.png?t=1699279776.601749", "slides": "https://nips.cc/virtual/2023/poster/70339", "video": "https://nips.cc/virtual/2023/poster/70339", "author_site": "David Liu, Mate Lengyel", "tldr": "", "abstract": "Neural spiking activity is generally variable, non-stationary, and exhibits complex dependencies on covariates, such as sensory input or behavior. These dependencies have been proposed to be signatures of specific computations, and so characterizing them with quantitative rigor is critical for understanding neural computations. Approaches based on point processes provide a principled statistical framework for modeling neural spiking activity. However, currently, they only allow the instantaneous mean, but not the instantaneous variability, of responses to depend on covariates. To resolve this limitation, we propose a scalable Bayesian approach generalizing modulated renewal processes using sparse variational Gaussian processes. We leverage pathwise conditioning for computing nonparametric priors over conditional interspike interval distributions and rely on automatic relevance determination to detect lagging interspike interval dependencies beyond renewal order. After systematically validating our method on synthetic data, we apply it to two foundational datasets of animal navigation: head direction cells in freely moving mice and hippocampal place cells in rats running along a linear track. Our model exhibits competitive or better predictive power compared to state-of-the-art baselines, and outperforms them in terms of capturing interspike interval statistics. These results confirm the importance of modeling covariate-dependent spiking variability, and further analyses of our fitted models reveal rich patterns of variability modulation beyond the temporal resolution of flexible count-based approaches.", "keywords": "Gaussian processes;renewal processes;point processes;neural data analysis;Bayesian machine learning;non-stationary time series", "primary_area": "", "supplementary_material": "/attachment/e55b7357606306ccad4f7a9acf0e896eb8f26cd1.pdf", "author": "David Liu;M\u00e1t\u00e9 Lengyel", "authorids": "~David_Liu4;~M\u00e1t\u00e9_Lengyel1", "gender": "M;M", "homepage": "https://davindicode.github.io/;http://lengyellab.org", "dblp": ";48/6327", "google_scholar": "rixiw8sAAAAJ;WvgoL14AAAAJ", "orcid": "0000-0002-8336-3805;0000-0001-7266-0049", "linkedin": "david-liu-143b8410b/;", "or_profile": "~David_Liu4;~M\u00e1t\u00e9_Lengyel1", "aff": "University of Cambridge;Central European University", "aff_domain": "cam.ac.uk;ceu.edu", "position": "PhD student;Principal Researcher", "bibtex": "@inproceedings{\nliu2023bayesian,\ntitle={Bayesian nonparametric (non-)renewal processes for analyzing neural spike train variability},\nauthor={David Liu and M{\\'a}t{\\'e} Lengyel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qlJoo2y3gY}\n}", "github": "", "project": "", "reviewers": "C2R4;hah4;3xxY;yMLW;Mj1g", "pdf_size": 1580886, "rating": "5;6;6;7;8", "confidence": "5;3;2;3;4", "soundness": "3;3;3;3;3", "novelty": "3;2;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "65;45;78;131;89", "wc_strengths": "33;19;92;106;74", "wc_weaknesses": "176;18;86;18;41", "wc_questions": "410;131;30;54;172", "wc_limitations": "15;13;24;1;1", "wc_review": "699;226;310;310;377", "wc_reply_reviewers": "0;19;23;17;25", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 81.6, 28.716545753276108 ], "wc_strengths_avg": [ 64.8, 33.55830746625938 ], "wc_weaknesses_avg": [ 67.8, 59.52612871672405 ], "wc_questions_avg": [ 159.4, 135.36557908124206 ], "wc_limitations_avg": [ 10.8, 8.818163074019441 ], "wc_review_avg": [ 384.4, 164.43187039014063 ], "wc_reply_reviewers_avg": [ 16.8, 8.863407922464136 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.15384615384615383, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15603916355568123685&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cam.ac.uk;ceu.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Cambridge;Central European University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ceu.edu", "aff_unique_abbr": "Cambridge;CEU", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;Hungary" }, { "title": "Aligning Synthetic Medical Images with Clinical Knowledge using Human Feedback", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70338", "id": "qlnlamFQEa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2b1d1e5affe5fdb70372cd90dd8afd49-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qlnlamFQEa", "openreview": "https://openreview.net/forum?id=qlnlamFQEa", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70338", "video": "https://nips.cc/virtual/2023/poster/70338", "author_site": "Shenghuan Sun, Greg Goldgof, Atul Butte, Ahmed Alaa", "tldr": "", "abstract": "Generative models capable of precisely capturing nuanced clinical features in medical images hold great promise for facilitating clinical data sharing, enhancing rare disease datasets, and efficiently synthesizing (annotated) medical images at scale. Despite their potential, assessing the quality of synthetic medical images remains a challenge. While modern generative models can synthesize visually-realistic medical images, the clinical plausibility of these images may be called into question. Domain-agnostic scores, such as FID score, precision, and recall, cannot incorporate clinical knowledge and are, therefore, not suitable for assessing clinical sensibility. Additionally, there are numerous unpredictable ways in which generative models may fail to synthesize clinically plausible images, making it challenging to anticipate potential failures and design automated scores for their detection. To address these challenges, this paper introduces a pathologist-in-the-loop framework for generating clinically-plausible synthetic medical images. Our framework comprises three steps: (1) pretraining a conditional diffusion model to generate medical images conditioned on a clinical concept, (2) expert pathologist evaluation of the generated images to assess whether they satisfy clinical desiderata, and (3) training a reward model that predicts human feedback on new samples, which we use to incorporate expert knowledge into the finetuning objective of the diffusion model. Our results show that human feedback significantly improves the quality of synthetic images in terms of fidelity, diversity, utility in downstream applications, and plausibility as evaluated by experts. We also demonstrate that human feedback can teach the model new clinical concepts not annotated in the original training data. Our results demonstrate the value of incorporating human feedback in clinical applications where generative models may struggle to capture extensive domain knowledge from raw data alone.", "keywords": "Synthetic clinical data;Machine learning for healthcare", "primary_area": "", "supplementary_material": "", "author": "Shenghuan Sun;Gregory Goldgof;Atul Butte;Ahmed Alaa", "authorids": "shenghuan.sun@ucsf.edu;goldgofg@mskcc.org;atul.butte@ucsf.edu;~Ahmed_Alaa1", "gender": ";;;M", "homepage": ";;;https://alaalab.berkeley.edu/", "dblp": ";;;140/7324", "google_scholar": ";;;https://scholar.google.com.eg/citations?user=_pv1sEcAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "shenghuan.sun@ucsf.edu;goldgofg@mskcc.org;atul.butte@ucsf.edu;~Ahmed_Alaa1", "aff": ";;;University of California, Berkeley", "aff_domain": ";;;berkeley.edu", "position": ";;;Assistant Professor", "bibtex": "@inproceedings{\nsun2023aligning,\ntitle={Aligning Synthetic Medical Images with Clinical Knowledge using Human Feedback},\nauthor={Shenghuan Sun and Gregory Goldgof and Atul Butte and Ahmed Alaa},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qlnlamFQEa}\n}", "github": "", "project": "", "reviewers": "Mvby;ZdBm;n6aW;uSrF;q99y", "pdf_size": 36654290, "rating": "6;6;7;7;8", "confidence": "5;4;4;4;5", "soundness": "4;3;3;3;4", "novelty": "4;2;3;3;4", "presentation": "3;4;3;4;4", "wc_summary": "86;111;31;105;69", "wc_strengths": "51;28;91;58;46", "wc_weaknesses": "106;40;49;74;89", "wc_questions": "4;73;24;95;165", "wc_limitations": "47;9;20;143;7", "wc_review": "294;261;215;475;376", "wc_reply_reviewers": "20;11;40;35;64", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 80.4, 28.786107760515314 ], "wc_strengths_avg": [ 54.8, 20.643643089338664 ], "wc_weaknesses_avg": [ 71.6, 24.499795917517353 ], "wc_questions_avg": [ 72.2, 56.75702599678739 ], "wc_limitations_avg": [ 45.2, 50.936823615141144 ], "wc_review_avg": [ 324.2, 91.92910311756555 ], "wc_reply_reviewers_avg": [ 34.0, 18.231840280125315 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2182178902359924, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16765094997256599467&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";;;berkeley.edu", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "COOM: A Game Benchmark for Continual Reinforcement Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73450", "id": "qmCxdPkNsa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d61d9f4fe4357296cb658795fd7999f0-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=qmCxdPkNsa", "openreview": "https://openreview.net/forum?id=qmCxdPkNsa", "poster": "/media/PosterPDFs/NeurIPS%202023/73450.png?t=1701060532.7609744", "slides": "https://nips.cc/virtual/2023/poster/73450", "video": "https://nips.cc/virtual/2023/poster/73450", "author_site": "Tristan Tomilin, Meng Fang, Yudi Zhang, Mykola Pechenizkiy", "tldr": "", "abstract": "The advancement of continual reinforcement learning (RL) has been facing various obstacles, including standardized metrics and evaluation protocols, demanding computational requirements, and a lack of widely accepted standard benchmarks. In response to these challenges, we present COOM ($\\textbf{C}$ontinual D$\\textbf{OOM}$), a continual RL benchmark tailored for embodied pixel-based RL. COOM presents a meticulously crafted suite of task sequences set within visually distinct 3D environments, serving as a robust evaluation framework to assess crucial aspects of continual RL, such as catastrophic forgetting, knowledge transfer, and sample-efficient learning. Following an in-depth empirical evaluation of popular continual learning (CL) methods, we pinpoint their limitations, provide valuable insight into the benchmark and highlight unique algorithmic challenges. This makes our work the first to benchmark image-based CRL in 3D environments with embodied perception. The primary objective of the COOM benchmark is to offer the research community a valuable and cost-effective challenge. It seeks to deepen our comprehension of the capabilities and limitations of current and forthcoming CL methods in an RL setting. The code and environments are open-sourced and accessible on GitHub.", "keywords": "benchmark;vizdoom;vision-based learning;embodied AI;continual learning;reinforcement learning;deep learning;simulation environment", "primary_area": "", "supplementary_material": "", "author": "Tristan Tomilin;Meng Fang;Yudi Zhang;Mykola Pechenizkiy", "authorids": "~Tristan_Tomilin1;~Meng_Fang1;~Yudi_Zhang3;~Mykola_Pechenizkiy1", "gender": "M;M;F;M", "homepage": "https://ttomilin.github.io/;;https://github.com/ReedZyd;http://www.win.tue.nl/~mpechen/", "dblp": "330/1961;67/463;344/3890;37/4649", "google_scholar": "https://scholar.google.pl/citations?user=CRzm2AMAAAAJ;IcNYP1oAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=F0uFT_kAAAAJ", "orcid": "0009-0009-5805-3395;;;0000-0003-4955-0743", "linkedin": "tristan-tomilin-272463155/;;;mpechen/", "or_profile": "~Tristan_Tomilin1;~Meng_Fang1;~Yudi_Zhang3;~Mykola_Pechenizkiy1", "aff": "Eindhoven University of Technology;Eindhoven University of Technology;Eindhoven University of Technology;Eindhoven University of Technology", "aff_domain": "tue.nl;tue.nl;tue.nl;tue.nl", "position": "PhD student;Assistant Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\ntomilin2023coom,\ntitle={{COOM}: A Game Benchmark for Continual Reinforcement Learning},\nauthor={Tristan Tomilin and Meng Fang and Yudi Zhang and Mykola Pechenizkiy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=qmCxdPkNsa}\n}", "github": "", "project": "", "reviewers": "eYzv;vkRe;5vZ2;TUS9;u5Cf", "pdf_size": 21067976, "rating": "6;6;7;7;7", "confidence": "4;4;4;4;3", "wc_summary_and_contributions": "84;61;33;96;92", "wc_strengths": "68;52;33;57;18", "wc_improvement": "146;178;325;75;54", "wc_limitations": "14;1;36;17;11", "wc_correctness": "1;7;15;6;10", "wc_clarity": "5;5;9;11;10", "wc_relation_to_prior_work": "20;19;10;11;15", "wc_documentation": "1;22;10;19;9", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "340;346;472;293;220", "wc_reply_reviewers": "17;0;309;0;0", "wc_reply_authors": "1132;1677;1227;853;59", "reply_reviewers": "1;0;1;0;0", "reply_authors": "2;4;2;2;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 73.2, 23.472537144501445 ], "wc_strengths_avg": [ 45.6, 17.850490189347745 ], "wc_improvement_avg": [ 155.6, 96.03041184958023 ], "wc_limitations_avg": [ 15.8, 11.443775600735973 ], "wc_correctness_avg": [ 7.8, 4.621688003316537 ], "wc_clarity_avg": [ 8.0, 2.5298221281347035 ], "wc_relation_to_prior_work_avg": [ 15.0, 4.049691346263317 ], "wc_documentation_avg": [ 12.2, 7.520638270785267 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 334.2, 82.3417269675588 ], "wc_reply_reviewers_avg": [ 65.2, 122.07768018765755 ], "wc_reply_authors_avg": [ 989.6, 535.5055928746216 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7764836526195731128&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "tue.nl;tue.nl;tue.nl;tue.nl", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Eindhoven University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.tue.nl", "aff_unique_abbr": "TU/e", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Netherlands" }, { "id": "qoiOpVrIEa", "title": "Using persistent homology to understand dimensionality reduction in resting-state fMRI", "track": "main", "status": "Reject", "tldr": "", "abstract": "Evaluating the success of a manifold learning method remains a challenging problem, especially for methods adapted to a specific application domain. The present work investigates shared geometric structure across different dimensionality reduction (DR) algorithms within the scope of neuroimaging applications. We examine reduced-dimension embeddings produced by a representative assay of dimension reductions for brain data (\u201cbrain representations\u201d) through the lens of persistent homology, making statistical claims about topological differences using a recent topological boostrap method. We cluster these methods based on their induced topologies, finding feature type and number --- rather than reduction algorithm --- as the main drivers of observed topological differences.", "keywords": "persistent homology;neuroimaging;neuroscience;manifold learning;dimensionality reduction;topological data analysis", "primary_area": "", "supplementary_material": "/attachment/b0474b0014123a848c596685318d3f59ced5cfc5.pdf", "author": "Ty O Easley;Kevin Freese;Elizabeth Munch;Janine Diane Bijsterbosch", "authorids": "~Ty_O_Easley1;~Kevin_Freese1;~Elizabeth_Munch1;~Janine_Diane_Bijsterbosch1", "gender": "Non-Binary;Non-Binary;F;F", "homepage": ";;https://elizabethmunch.com/;https://www.mir.wustl.edu/research/research-centers/computational-imaging-research-center-circ/labs/bijsterbosch-lab", "dblp": ";;;", "google_scholar": "gat6STsAAAAJ;;hQD-jzkAAAAJ;OTLnkfsAAAAJ", "orcid": "0000-0002-8215-2598;0009-0008-9676-0392;0000-0002-9459-9493;0000-0002-1385-9178", "linkedin": ";kelsey-freese-04a684b3/;;janine-bijsterbosch-7b910381/", "or_profile": "~Ty_O_Easley1;~Kevin_Freese1;~Elizabeth_Munch1;~Janine_Diane_Bijsterbosch1", "aff": "Washington University, Saint Louis;International Business Machines;Michigan State University;Washington University, Saint Louis", "aff_domain": "wustl.edu;ibm.com;msu.edu;wustl.edu", "position": "PhD student;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@misc{\neasley2023using,\ntitle={Using persistent homology to understand dimensionality reduction in resting-state f{MRI}},\nauthor={Ty O Easley and Kevin Freese and Elizabeth Munch and Janine Diane Bijsterbosch},\nyear={2023},\nurl={https://openreview.net/forum?id=qoiOpVrIEa}\n}", "github": "", "project": "", "reviewers": "MybT;af6F;Qnet;SXyw;R8ML", "site": "https://openreview.net/forum?id=qoiOpVrIEa", "pdf_size": 3286489, "rating": "3;3;3;3;8", "confidence": "3;4;3;3;4", "soundness": "2;3;2;3;4", "novelty": "1;2;2;1;4", "presentation": "2;1;1;3;4", "wc_summary": "132;32;67;111;76", "wc_strengths": "26;17;31;101;124", "wc_weaknesses": "190;90;165;293;76", "wc_questions": "144;108;105;207;44", "wc_limitations": "5;7;6;4;106", "wc_review": "497;254;374;716;426", "wc_reply_reviewers": "63;74;0;43;88", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 4.0, 2.0 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.0, 1.0954451150103321 ], "presentation_avg": [ 2.2, 1.16619037896906 ], "wc_summary_avg": [ 83.6, 34.897564384925204 ], "wc_strengths_avg": [ 59.8, 43.869807385034186 ], "wc_weaknesses_avg": [ 162.8, 78.14192216729762 ], "wc_questions_avg": [ 121.6, 53.43631723837263 ], "wc_limitations_avg": [ 25.6, 40.21243588742169 ], "wc_review_avg": [ 453.4, 153.4113424750595 ], "wc_reply_reviewers_avg": [ 53.6, 30.571882506643256 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6123724356957946, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:k5gPFZ1aQfAJ:scholar.google.com/&scioq=Using+persistent+homology+to+understand+dimensionality+reduction+in+resting-state+fMRI&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Washington University in St. Louis;International Business Machines Corporation;Michigan State University", "aff_unique_dep": ";;", "aff_unique_url": "https://wustl.edu;https://www.ibm.com;https://www.msu.edu", "aff_unique_abbr": "WUSTL;IBM;MSU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Saint Louis;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Lower Bounds on Adaptive Sensing for Matrix Recovery", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70337", "id": "qptO6YDZEP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e258bb98cc032ab6ae9053db453431f7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qptO6YDZEP", "openreview": "https://openreview.net/forum?id=qptO6YDZEP", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70337", "video": "https://nips.cc/virtual/2023/poster/70337", "author_site": "Praneeth Kacham, David Woodruff", "tldr": "", "abstract": "We study lower bounds on adaptive sensing algorithms for recovering low rank matrices using linear measurements. Given an $n \\times n$ matrix $A$, a general linear measurement $S(A)$, for an $n \\times n$ matrix $S$, is just the inner product of $S$ and $A$, each treated as $n^2$-dimensional vectors. By performing as few linear measurements as possible on a rank-$r$ matrix $A$, we hope to construct a matrix $\\hat{A}$ that satisfies $|A - \\hat{A}|\\_F^2 \\le c |A|\\_F^2$, for a small constant $c$. Here $|A|\\_F$ denotes the Frobenius norm $(\\sum_{i,j} A_{i,j}^2)^{1/2}$. It is commonly assumed that when measuring $A$ with $S$, the response is corrupted with an independent Gaussian random variable of mean $0$ and variance $\\sigma^2$. Cand\u00e8s and Plan (IEEE Trans. Inform. Theory 2011) study non-adaptive algorithms for low rank matrix recovery using random linear measurements. They use the restricted isometry property (RIP) of Random Gaussian Matrices to give tractable algorithms to estimate $A$ from the measurements.\n\nAt the edge of the noise level where recovery is information-theoretically feasible, it is known that their non-adaptive algorithms need to perform $\\Omega(n^2)$ measurements, which amounts to reading the entire matrix. An important question is whether adaptivity helps in decreasing the overall number of measurements. While for the related problem of sparse recovery, adaptive algorithms have been extensively studied, as far as we are aware adaptive algorithms and lower bounds on them seem largely unexplored for matrix recovery. We show that any adaptive algorithm that uses $k$ linear measurements in each round and outputs an approximation as in (1) with probability $\\ge 9/10$ must run for $t = \\Omega(\\log(n^2/k)/\\log\\log n)$ rounds. Our lower bound shows that any adaptive algorithm which uses $n^{2-\\beta}$ ($\\beta > 0$ is arbitrary constant) linear measurements in each round must run for $\\Omega(\\log n/\\log\\log n)$ rounds. Our techniques also readily extend to obtain lower bounds on adaptive algorithms for tensor recovery. \n\nOur hard distribution also allows us to give a measurement-vs-rounds trade-off for many sensing problems in numerical linear algebra, such as spectral norm low rank approximation, Frobenius norm low rank approximation, singular vector approximation, and more.", "keywords": "Compressed Sensing;Matrix Recovery;Low rank approximation", "primary_area": "", "supplementary_material": "", "author": "Praneeth Kacham;David Woodruff", "authorids": "~Praneeth_Kacham1;~David_Woodruff1", "gender": "M;M", "homepage": "https://www.praneethkacham.com;http://www.cs.cmu.edu/~dwoodruf/", "dblp": "255/5684;w/DPWoodruff", "google_scholar": "hKhPmTkAAAAJ;https://scholar.google.com.tw/citations?user=0G2t-6sAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Praneeth_Kacham1;~David_Woodruff1", "aff": "Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nkacham2023lower,\ntitle={Lower Bounds on Adaptive Sensing for Matrix Recovery},\nauthor={Praneeth Kacham and David Woodruff},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qptO6YDZEP}\n}", "github": "", "project": "", "reviewers": "cbbb;m7zx;Txfq;NmzP", "pdf_size": 461393, "rating": "5;6;7;7", "confidence": "2;2;3;2", "soundness": "4;2;3;4", "novelty": "2;3;3;3", "presentation": "4;2;3;3", "wc_summary": "102;95;335;70", "wc_strengths": "28;45;88;33", "wc_weaknesses": "141;31;39;41", "wc_questions": "85;50;57;45", "wc_limitations": "12;2;1;1", "wc_review": "368;223;520;190", "wc_reply_reviewers": "0;12;11;7", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 150.5, 107.18325428909127 ], "wc_strengths_avg": [ 48.5, 23.62731470142132 ], "wc_weaknesses_avg": [ 63.0, 45.18849411078001 ], "wc_questions_avg": [ 59.25, 15.465687828221544 ], "wc_limitations_avg": [ 4.0, 4.636809247747852 ], "wc_review_avg": [ 325.25, 130.8651500591353 ], "wc_reply_reviewers_avg": [ 7.5, 4.716990566028302 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17111194964449976078&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cmu.edu;cmu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "PHOTOSWAP: Personalized Subject Swapping in Images", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70336", "id": "qqcIM8NiiB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6e9a0a72da9b76c3ebc8cc33ff10ac29-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qqcIM8NiiB", "openreview": "https://openreview.net/forum?id=qqcIM8NiiB", "poster": "/media/PosterPDFs/NeurIPS%202023/70336.png?t=1699865510.0308316", "slides": "https://nips.cc/virtual/2023/poster/70336", "video": "https://nips.cc/virtual/2023/poster/70336", "author_site": "Jing Gu, Yilin Wang, Nanxuan Zhao, Tsu-Jui Fu, Wei Xiong, Qing Liu, Zhifei Zhang, HE Zhang, Jianming Zhang, HyunJoon Jung, Xin Eric Wang", "tldr": "", "abstract": "In an era where images and visual content dominate our digital landscape, the ability to manipulate and personalize these images has become a necessity.\nEnvision seamlessly substituting a tabby cat lounging on a sunlit window sill in a photograph with your own playful puppy, all while preserving the original charm and composition of the image. \nWe present \\emph{Photoswap}, a novel approach that enables this immersive image editing experience through personalized subject swapping in existing images.\n\\emph{Photoswap} first learns the visual concept of the subject from reference images and then swaps it into the target image using pre-trained diffusion models in a training-free manner. We establish that a well-conceptualized visual subject can be seamlessly transferred to any image with appropriate self-attention and cross-attention manipulation, maintaining the pose of the swapped subject and the overall coherence of the image. \nComprehensive experiments underscore the efficacy and controllability of \\emph{Photoswap} in personalized subject swapping. Furthermore, \\emph{Photoswap} significantly outperforms baseline methods in human ratings across subject swapping, background preservation, and overall quality, revealing its vast application potential, from entertainment to professional editing.", "keywords": "image editing;diffusion model;text to image generation", "primary_area": "", "supplementary_material": "/attachment/176fbd6a4d2685594804aaea17c52bef7175f60c.pdf", "author": "Jing Gu;Yilin Wang;Nanxuan Zhao;Tsu-Jui Fu;Wei Xiong;Qing Liu;Zhifei Zhang;HE Zhang;Jianming Zhang;HyunJoon Jung;Xin Eric Wang", "authorids": "~Jing_Gu2;~Yilin_Wang4;~Nanxuan_Zhao1;~Tsu-Jui_Fu2;~Wei_Xiong5;~Qing_Liu1;~Zhifei_Zhang2;~HE_Zhang2;~Jianming_Zhang1;~HyunJoon_Jung1;~Xin_Eric_Wang2", "gender": "M;M;F;M;M;F;M;M;M;M;M", "homepage": "https://g-jing.github.io/;https://www.yilinwang.org;http://nxzhao.com;https://tsujuifu.github.io;https://wxiong.me;https://qliu24.github.io/;https://zzutk.github.io/;https://scholar.google.com/citations?user=HZLiJt0AAAAJ&hl=en;https://jimmie33.github.io/;;https://eric-xw.github.io", "dblp": ";47/3464-2.html;224/0709;218/5366.html;;53/4481-17;66/9500;24/2058-4;;40/10919.html;10/5630-61", "google_scholar": "B3YeB3YAAAAJ;fYqdLx4AAAAJ;;https://scholar.google.com.tw/citations?user=7QRDcC0AAAAJ;uO0k6DMAAAAJ;1ytghtEAAAAJ;HuerflQAAAAJ;HZLiJt0AAAAJ;TkVHKDgAAAAJ;dX3FbO4AAAAJ;YjqluE0AAAAJ", "orcid": ";;;;;;0000-0003-0466-9548;;0000-0002-9954-6294;;0000-0003-2605-5504", "linkedin": ";;;tsujuifu1996;;;zhifei-zhang-108b0899/;;;hyunvincero/;", "or_profile": "~Jing_Gu2;~Yilin_Wang4;~Nanxuan_Zhao1;~Tsu-Jui_Fu2;~Wei_Xiong5;~Qing_Liu1;~Zhifei_Zhang2;~HE_Zhang2;~Jianming_Zhang1;~HyunJoon_Jung1;~Xin_Eric_Wang2", "aff": "University of California, Santa Cruz;Adobe Systems;Adobe Research;UC Santa Barbara;Adobe Systems;Adobe Systems;Adobe Research;Adobe Systems;Adobe Systems;Adobe Systems;University of California, Santa Cruz", "aff_domain": "ucsc.edu;adobe.com;adobe.com;ucsb.edu;adobe.com;adobe.com;adobe.com;adobe.com;adobe.com;adobe.com;ucsc.edu", "position": "PhD student;research scientist;Researcher;PhD student;Researcher;Researcher;Research Engineer;Researcher;Research Scientist;Director of Applied Research;Assistant Professor", "bibtex": "@inproceedings{\ngu2023photoswap,\ntitle={{PHOTOSWAP}: Personalized Subject Swapping in Images},\nauthor={Jing Gu and Yilin Wang and Nanxuan Zhao and Tsu-Jui Fu and Wei Xiong and Qing Liu and Zhifei Zhang and HE Zhang and Jianming Zhang and HyunJoon Jung and Xin Eric Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qqcIM8NiiB}\n}", "github": "", "project": "", "reviewers": "p4oH;Ep7B;z7Bi;TtiF", "pdf_size": 10503575, "rating": "3;5;5;7", "confidence": "4;3;4;3", "soundness": "2;2;3;3", "novelty": "2;2;3;2", "presentation": "1;2;3;3", "wc_summary": "74;26;58;96", "wc_strengths": "11;23;98;106", "wc_weaknesses": "222;93;173;235", "wc_questions": "6;19;3;53", "wc_limitations": "20;2;5;28", "wc_review": "333;163;337;518", "wc_reply_reviewers": "224;17;0;60", "wc_reply_authors": "1558;112;92;308", "reply_reviewers": "1;1;0;1", "reply_authors": "5;3;2;4", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 63.5, 25.509802037648196 ], "wc_strengths_avg": [ 59.5, 42.80478945164898 ], "wc_weaknesses_avg": [ 180.75, 55.68830667204741 ], "wc_questions_avg": [ 20.25, 19.84155991851447 ], "wc_limitations_avg": [ 13.75, 10.685855136581255 ], "wc_review_avg": [ 337.75, 125.54954201429808 ], "wc_reply_reviewers_avg": [ 75.25, 88.62103305649286 ], "wc_reply_authors_avg": [ 517.5, 606.6323021402668 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10854374199500034033&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "ucsc.edu;adobe.com;adobe.com;ucsb.edu;adobe.com;adobe.com;adobe.com;adobe.com;adobe.com;adobe.com;ucsc.edu", "author_num": 11, "aff_unique_index": "0;1;1;2;1;1;1;1;1;1;0", "aff_unique_norm": "University of California, Santa Cruz;Adobe;University of California, Santa Barbara", "aff_unique_dep": ";Adobe Systems Incorporated;", "aff_unique_url": "https://www.ucsc.edu;https://www.adobe.com;https://www.ucsb.edu", "aff_unique_abbr": "UCSC;Adobe;UCSB", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Santa Cruz;;Santa Barbara", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "TabMT: Generating tabular data with masked transformers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70335", "id": "qs4swxtIAQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/90debc7cedb5cac83145fc8d18378dc5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qs4swxtIAQ", "openreview": "https://openreview.net/forum?id=qs4swxtIAQ", "poster": "/media/PosterPDFs/NeurIPS%202023/70335.png?t=1699498409.667649", "slides": "https://nips.cc/virtual/2023/poster/70335", "video": "https://nips.cc/virtual/2023/poster/70335", "author_site": "Manbir Gulati, Paul Roysdon", "tldr": "", "abstract": "Autoregressive and Masked Transformers are incredibly effective as generative models and classifiers.\n While these models are most prevalent in NLP, they also exhibit strong performance in other domains, such as vision. \n This work contributes to the exploration of transformer-based models in synthetic data generation for diverse application domains. \n In this paper, we present TabMT, a novel Masked Transformer design for generating synthetic tabular data. \n TabMT effectively addresses the unique challenges posed by heterogeneous data fields and is natively able to handle missing data. \n Our design leverages improved masking techniques to allow for generation and demonstrates state-of-the-art performance from extremely small to extremely large tabular datasets. \n We evaluate TabMT for privacy-focused applications and find that it is able to generate high quality data with superior privacy tradeoffs.", "keywords": "Tabular Data;Deep Learning;Generative Modeling;Transformers;Masked Transformers;Synthetic data", "primary_area": "", "supplementary_material": "/attachment/bc19f59e8b85a022be5056960477c8cc2ea82adc.zip", "author": "Manbir S Gulati;Paul F Roysdon", "authorids": "~Manbir_S_Gulati1;roysdonp@leidos.com", "gender": "M;", "homepage": ";", "dblp": "324/7962;", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Manbir_S_Gulati1;roysdonp@leidos.com", "aff": "Leidos Inc;", "aff_domain": "leidos.com;", "position": "Researcher;", "bibtex": "@inproceedings{\ngulati2023tabmt,\ntitle={Tab{MT}: Generating tabular data with masked transformers},\nauthor={Manbir S Gulati and Paul F Roysdon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qs4swxtIAQ}\n}", "github": "", "project": "", "reviewers": "EYXT;gJJU;sX8C;aH7d", "pdf_size": 440624, "rating": "4;4;6;7", "confidence": "2;4;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "107;31;45;76", "wc_strengths": "55;46;52;42", "wc_weaknesses": "116;119;64;32", "wc_questions": "48;2;104;19", "wc_limitations": "4;1;1;20", "wc_review": "330;199;266;189", "wc_reply_reviewers": "0;0;241;0", "wc_reply_authors": "0;0;574;0", "reply_reviewers": "0;0;3;0", "reply_authors": "1;1;4;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.75, 29.328953271468794 ], "wc_strengths_avg": [ 48.75, 5.0682837331783235 ], "wc_weaknesses_avg": [ 82.75, 36.56073713698891 ], "wc_questions_avg": [ 43.25, 38.7387080321479 ], "wc_limitations_avg": [ 6.5, 7.88986691902975 ], "wc_review_avg": [ 246.0, 56.819890883386954 ], "wc_reply_reviewers_avg": [ 60.25, 104.35606115602485 ], "wc_reply_authors_avg": [ 143.5, 248.5492908861339 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2321058102443704208&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "leidos.com;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Leidos Inc", "aff_unique_dep": "", "aff_unique_url": "https://www.leidos.com", "aff_unique_abbr": "Leidos", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Performance Scaling via Optimal Transport: Enabling Data Selection from Partially Revealed Sources", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70334", "id": "quMBEd27x9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c142c14699223f7417cad706fd6f652e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=quMBEd27x9", "openreview": "https://openreview.net/forum?id=quMBEd27x9", "poster": "/media/PosterPDFs/NeurIPS%202023/70334.png?t=1702369813.9331112", "slides": "https://nips.cc/virtual/2023/poster/70334", "video": "https://nips.cc/virtual/2023/poster/70334", "author_site": "Feiyang Kang, Hoang Anh Just, Anit Kumar Sahu, Ruoxi Jia", "tldr": "", "abstract": "Traditionally, data selection has been studied in settings where all samples from prospective sources are fully revealed to a machine learning developer. However, in practical data exchange scenarios, data providers often reveal only a limited subset of samples before an acquisition decision is made. Recently, there have been efforts to fit scaling functions that predict model performance at any *size and data source composition* using the limited available samples. However, these scaling functions are usually black-box, computationally expensive to fit, highly susceptible to overfitting, or/and difficult to optimize for data selection. This paper proposes a framework called **, which predicts model performance and supports data selection decisions based on partial samples of prospective data sources. Our approach distinguishes itself from existing work by introducing a novel *two-stage* performance inference process. In the first stage, we leverage the Optimal Transport distance to predict the model's performance for any data mixture ratio within the range of disclosed data sizes. In the second stage, we extrapolate the performance to larger undisclosed data sizes based on a novel parameter-free mapping technique inspired by neural scaling laws. We further derive an efficient gradient-based method to select data sources based on the projected model performance. Evaluation over a diverse range of applications (e.g., vision, text, fine-tuning, noisy data sources, etc.) demonstrates that ** significantly improves existing performance scaling approaches in terms of both the accuracy of performance inference and the computation costs associated with constructing the performance predictor. Also, ** outperforms by a wide margin in data selection effectiveness compared to a range of other off-the-shelf solutions. We provide ** an open-source toolkit.", "keywords": "data-centric AI;data acquisition;data valuation;performance prediction;data markets;optimal transport;scaling laws", "primary_area": "", "supplementary_material": "/attachment/834cdb76bffc116b9ed35ec4b52fbe1cd682a4d8.pdf", "author": "Feiyang Kang;Hoang Anh Just;Anit Kumar Sahu;Ruoxi Jia", "authorids": "~Feiyang_Kang1;~Hoang_Anh_Just1;~Anit_Kumar_Sahu1;~Ruoxi_Jia1", "gender": "M;;;", "homepage": ";https://justhoanganh.com;;https://ruoxijia.info/", "dblp": "218/1175;307/2901;;147/5355-1", "google_scholar": "_6mV_iEAAAAJ;XcBDQhAAAAAJ;;JCrug-YAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Feiyang_Kang1;~Hoang_Anh_Just1;~Anit_Kumar_Sahu1;~Ruoxi_Jia1", "aff": "Virginia Tech;Virginia Polytechnic Institute and State University;;Virginia Tech", "aff_domain": "vt.edu;vt.edu;;vt.edu", "position": "PhD student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nkang2023performance,\ntitle={Performance Scaling via Optimal Transport: Enabling Data Selection from Partially Revealed Sources},\nauthor={Feiyang Kang and Hoang Anh Just and Anit Kumar Sahu and Ruoxi Jia},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=quMBEd27x9}\n}", "github": "", "project": "", "reviewers": "d24K;WiXX;WNMz;Paf5", "pdf_size": 2660941, "rating": "5;6;6;6", "confidence": "3;3;4;2", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "2;3;4;2", "wc_summary": "79;334;109;149", "wc_strengths": "19;59;39;69", "wc_weaknesses": "585;143;138;91", "wc_questions": "103;1;54;23", "wc_limitations": "7;14;1;0", "wc_review": "793;551;341;332", "wc_reply_reviewers": "225;38;5;55", "wc_reply_authors": "3673;116;141;113", "reply_reviewers": "1;1;1;2", "reply_authors": "8;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 167.75, 99.14478049801714 ], "wc_strengths_avg": [ 46.5, 19.20286436967152 ], "wc_weaknesses_avg": [ 239.25, 200.64692247826778 ], "wc_questions_avg": [ 45.25, 38.290827883450106 ], "wc_limitations_avg": [ 5.5, 5.5901699437494745 ], "wc_review_avg": [ 504.25, 188.33663345191238 ], "wc_reply_reviewers_avg": [ 80.75, 85.2008656059315 ], "wc_reply_authors_avg": [ 1010.75, 1537.089193085424 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 2.598076211353316 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16450272954413647133&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "vt.edu;vt.edu;;vt.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Virginia Tech", "aff_unique_dep": "", "aff_unique_url": "https://www.vt.edu", "aff_unique_abbr": "VT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "On the Robustness of Mechanism Design under Total Variation Distance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70333", "id": "qumBHr77ht", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/058983528186511a74968e88a6d0ad63-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qumBHr77ht", "openreview": "https://openreview.net/forum?id=qumBHr77ht", "poster": "/media/PosterPDFs/NeurIPS%202023/70333.png?t=1701813321.9126604", "slides": "https://nips.cc/virtual/2023/poster/70333", "video": "https://nips.cc/virtual/2023/poster/70333", "author_site": "Anuran Makur, Marios Mertzanidis, Alexandros Psomas, Athina Terzoglou", "tldr": "", "abstract": "We study the problem of designing mechanisms when agents' valuation functions are drawn from unknown and correlated prior distributions. In particular, we are given a prior distribution $D$, and we are interested in designing a (truthful) mechanism that has good performance for all \"true distributions\" that are close to $D$ in Total Variation (TV) distance. We show that DSIC and BIC mechanisms in this setting are strongly robust with respect to TV distance, for any bounded objective function $\\mathcal{O}$, extending a recent result of Brustle et al. ([BCD20], EC 2020). At the heart of our result is a fundamental duality property of total variation distance. As direct applications of our result, we (i) demonstrate how to find approximately revenue-optimal and approximately BIC mechanisms for weakly dependent prior distributions; (ii) show how to find correlation-robust mechanisms when only ``noisy'' versions of marginals are accessible, extending recent results of Bei et. al. ([BGLT19], SODA 2019); (iii) prove that prophet-inequality type guarantees are preserved for correlated priors, recovering a variant of a result of D{\\\"u}tting and Kesselheim ([DK19], EC 2019) as a special case; (iv) give a new necessary condition for a correlated distribution to witness an infinite separation in revenue between simple and optimal mechanisms, complementing recent results of Psomas et al. ([PSCW22], NeurIPS 2022); (v) give a new condition for simple mechanisms to approximate revenue-optimal mechanisms for the case of a single agent whose type is drawn from a correlated distribution that can be captured by a Markov Random Field, complementing recent results of Cai and Oikonomou ([CO21], EC 2021).", "keywords": "mechanism design;revenue maximization;correlated distributions;total variation distance", "primary_area": "", "supplementary_material": "/attachment/38c7986543ff483df9f1c67c8086c8b06ce0b589.pdf", "author": "Anuran Makur;Marios Mertzanidis;Alexandros Psomas;Athina Terzoglou", "authorids": "~Anuran_Makur1;~Marios_Mertzanidis1;~Alexandros_Psomas1;~Athina_Terzoglou1", "gender": ";M;;F", "homepage": ";https://mertzanidismarios.com/;https://www.alexpsomas.com/;https://sites.google.com/view/athinaterzoglou", "dblp": ";;19/10537;358/9405", "google_scholar": ";;FrTxJzcAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Anuran_Makur1;~Marios_Mertzanidis1;~Alexandros_Psomas1;~Athina_Terzoglou1", "aff": ";Purdue University;Purdue University;Purdue University", "aff_domain": ";purdue.edu;purdue.edu;purdue.edu", "position": ";PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nmakur2023on,\ntitle={On the Robustness of Mechanism Design under Total Variation Distance},\nauthor={Anuran Makur and Marios Mertzanidis and Alexandros Psomas and Athina Terzoglou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qumBHr77ht}\n}", "github": "", "project": "", "reviewers": "acoM;JheU;XhLk;K563;yCAy", "pdf_size": 314627, "rating": "5;6;6;7;7", "confidence": "3;3;4;3;3", "soundness": "3;4;3;3;3", "novelty": "3;3;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "121;82;186;159;162", "wc_strengths": "37;70;52;63;107", "wc_weaknesses": "149;61;63;36;48", "wc_questions": "86;61;25;6;115", "wc_limitations": "35;1;2;12;37", "wc_review": "428;275;328;276;469", "wc_reply_reviewers": "0;10;17;0;107", "wc_reply_authors": "0;0;0;0;131", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 142.0, 36.51301137950689 ], "wc_strengths_avg": [ 65.8, 23.421357774475844 ], "wc_weaknesses_avg": [ 71.4, 40.00299988750844 ], "wc_questions_avg": [ 58.6, 39.5808034279245 ], "wc_limitations_avg": [ 17.4, 15.679285698015711 ], "wc_review_avg": [ 355.2, 79.61758599706475 ], "wc_reply_reviewers_avg": [ 26.8, 40.61231340369569 ], "wc_reply_authors_avg": [ 26.2, 52.4 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.13363062095621223, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5318067766605277733&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": ";purdue.edu;purdue.edu;purdue.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Multimodal Deep Learning Model Unveils Behavioral Dynamics of V1 Activity in Freely Moving Mice", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70332", "id": "qv5UZJTNda", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/31a19921acd38cdf7a8c86ec032cef2d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qv5UZJTNda", "openreview": "https://openreview.net/forum?id=qv5UZJTNda", "poster": "/media/PosterPDFs/NeurIPS%202023/70332.png?t=1698429962.7887812", "slides": "https://nips.cc/virtual/2023/poster/70332", "video": "https://nips.cc/virtual/2023/poster/70332", "author_site": "Aiwen Xu, Yuchen Hou, Cristopher Niell, Michael Beyeler", "tldr": "", "abstract": "Despite their immense success as a model of macaque visual cortex, deep convolutional neural networks (CNNs) have struggled to predict activity in visual cortex of the mouse, which is thought to be strongly dependent on the animal\u2019s behavioral state. Furthermore, most computational models focus on predicting neural responses to static images presented under head fixation, which are dramatically different from the dynamic, continuous visual stimuli that arise during movement in the real world. Consequently, it is still unknown how natural visual input and different behavioral variables may integrate over time to generate responses in primary visual cortex (V1). To address this, we introduce a multimodal recurrent neural network that integrates gaze-contingent visual input with behavioral and temporal dynamics to explain V1 activity in freely moving mice. We show that the model achieves state-of-the-art predictions of V1 activity during free exploration and demonstrate the importance of each component in an extensive ablation study. Analyzing our model using maximally activating stimuli and saliency maps, we reveal new insights into cortical function, including the prevalence of mixed selectivity for behavioral variables in mouse V1. In summary, our model offers a comprehensive deep-learning framework for exploring the computational principles underlying V1 neurons in freely-moving animals engaged in natural behavior.", "keywords": "neuroscience;cognitive science;multimodal learning;representation learning;network architecture;computational biology;visual perception", "primary_area": "", "supplementary_material": "/attachment/494349ee9e77be21785b1d3ec3311b45007301f3.pdf", "author": "Aiwen Xu;Yuchen Hou;Cris M. Niell;Michael Beyeler", "authorids": "~Aiwen_Xu1;yuchenhou@ucsb.edu;cniell@uoregon.edu;~Michael_Beyeler1", "gender": ";;;M", "homepage": ";;;", "dblp": ";;;136/0857", "google_scholar": ";;;dK-0kG4AAAAJ", "orcid": ";;;0000-0001-5233-844X", "linkedin": ";;;", "or_profile": "~Aiwen_Xu1;yuchenhou@ucsb.edu;cniell@uoregon.edu;~Michael_Beyeler1", "aff": ";;;University of California, Santa Barbara", "aff_domain": ";;;ucsb.edu", "position": ";;;Assistant Professor", "bibtex": "@inproceedings{\nxu2023multimodal,\ntitle={Multimodal Deep Learning Model Unveils Behavioral Dynamics of V1 Activity in Freely Moving Mice},\nauthor={Aiwen Xu and Yuchen Hou and Cris M. Niell and Michael Beyeler},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qv5UZJTNda}\n}", "github": "", "project": "", "reviewers": "riS4;ZQ1m;NhDi;xdTa", "pdf_size": 1684972, "rating": "5;6;7;8", "confidence": "5;4;3;3", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;2;4;4", "wc_summary": "74;149;68;154", "wc_strengths": "122;181;38;40", "wc_weaknesses": "366;219;32;103", "wc_questions": "88;11;40;210", "wc_limitations": "11;8;1;8", "wc_review": "661;568;179;515", "wc_reply_reviewers": "194;0;276;21", "wc_reply_authors": "0;0;313;0", "reply_reviewers": "1;0;2;1", "reply_authors": "1;1;3;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 111.25, 40.34460930533347 ], "wc_strengths_avg": [ 95.25, 59.997395776816845 ], "wc_weaknesses_avg": [ 180.0, 126.44168616401792 ], "wc_questions_avg": [ 87.25, 76.01767886485354 ], "wc_limitations_avg": [ 7.0, 3.6742346141747673 ], "wc_review_avg": [ 480.75, 181.88509422159916 ], "wc_reply_reviewers_avg": [ 122.75, 116.17094085871905 ], "wc_reply_authors_avg": [ 78.25, 135.53297569226464 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9438798074485388, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10466266600843421970&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 15, "email": ";;;ucsb.edu", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Reinforcement Learning with Simple Sequence Priors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70331", "id": "qxF8Pge6vM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c3909e3abe8ebdb20c42a42ce0bc907d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qxF8Pge6vM", "openreview": "https://openreview.net/forum?id=qxF8Pge6vM", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70331", "video": "https://nips.cc/virtual/2023/poster/70331", "author_site": "Tankred Saanum, No\u00e9mi \u00c9ltet\u0151, Peter Dayan, Marcel Binz, Eric Schulz", "tldr": "", "abstract": "In reinforcement learning (RL), simplicity is typically quantified on an action-by-action basis -- but this timescale ignores temporal regularities, like repetitions, often present in sequential strategies. We therefore propose an RL algorithm that learns to solve tasks with sequences of actions that are compressible. We explore two possible sources of simple action sequences: Sequences that can be learned by autoregressive models, and sequences that are compressible with off-the-shelf data compression algorithms. Distilling these preferences into sequence priors, we derive a novel information-theoretic objective that incentivizes agents to learn policies that maximize rewards while conforming to these priors. We show that the resulting RL algorithm leads to faster learning, and attains higher returns than state-of-the-art model-free approaches in a series of continuous control tasks from the DeepMind Control Suite. These priors also produce a powerful information-regularized agent that is robust to noisy observations and can perform open-loop control.", "keywords": "Deep Reinforcement Learning;Compression;Sequence learning;Information bottleneck;Mutual information", "primary_area": "", "supplementary_material": "/attachment/0c500fd009e5916852912b6ac57707ea5f8d2476.zip", "author": "Tankred Saanum;Noemi Elteto;Peter Dayan;Marcel Binz;Eric Schulz", "authorids": "~Tankred_Saanum1;~Noemi_Elteto1;~Peter_Dayan1;~Marcel_Binz1;~Eric_Schulz1", "gender": "M;F;;M;M", "homepage": ";https://noemielteto.github.io;https://www.mpg.de/12309370/biological-cybernetics-dayan;;https://cpilab.org", "dblp": ";;22/522;212/5102;124/0016", "google_scholar": "https://scholar.google.com/citations?hl=no;L9n83LoAAAAJ;;https://scholar.google.de/citations?user=Lvm9Q8QAAAAJ;", "orcid": ";;0000-0003-3476-1839;;", "linkedin": ";;;;", "or_profile": "~Tankred_Saanum1;~Noemi_Elteto1;~Peter_Dayan1;~Marcel_Binz1;~Eric_Schulz1", "aff": "Max Planck Institute for Biological Cybernetics, Max-Planck Institute;Max Planck Institute for Biological Cybernetics;Max-Planck Institute;Max Planck Institute for Biological Cybernetics, Max-Planck Institute;Max Planck Institute for Biological Cybernetics", "aff_domain": "tuebingen.mpg.de;tuebingen.mpg.de;mpg.de;tuebingen.mpg.de;tuebingen.mpg.de", "position": "PhD student;PhD student;Professor;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nsaanum2023reinforcement,\ntitle={Reinforcement Learning with Simple Sequence Priors},\nauthor={Tankred Saanum and Noemi Elteto and Peter Dayan and Marcel Binz and Eric Schulz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qxF8Pge6vM}\n}", "github": "", "project": "", "reviewers": "GvQF;7uiK;teDV;6AwJ", "pdf_size": 4747600, "rating": "5;6;6;7", "confidence": "4;3;4;4", "soundness": "1;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "111;192;132;93", "wc_strengths": "18;19;90;85", "wc_weaknesses": "311;79;106;153", "wc_questions": "16;2;92;124", "wc_limitations": "7;37;4;11", "wc_review": "463;329;424;466", "wc_reply_reviewers": "144;121;40;16", "wc_reply_authors": "373;156;66;35", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 132.0, 37.28940868396816 ], "wc_strengths_avg": [ 53.0, 34.5470693402494 ], "wc_weaknesses_avg": [ 162.25, 89.87039278872659 ], "wc_questions_avg": [ 58.5, 51.01715397785337 ], "wc_limitations_avg": [ 14.75, 13.083864108129525 ], "wc_review_avg": [ 420.5, 55.36469994500106 ], "wc_reply_reviewers_avg": [ 80.25, 53.555461906326606 ], "wc_reply_authors_avg": [ 157.5, 132.11831818487548 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4661116639805229340&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "tuebingen.mpg.de;tuebingen.mpg.de;mpg.de;tuebingen.mpg.de;tuebingen.mpg.de", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Max Planck Institute for Biological Cybernetics;Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.", "aff_unique_dep": "Biological Cybernetics;", "aff_unique_url": "https://www.biological-cybernetics.de;https://www.mpg.de", "aff_unique_abbr": "MPIBC;MPG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Diffusion Schr\u00f6dinger Bridge Matching", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70330", "id": "qy07OHsJT5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c428adf74782c2092d254329b6b02482-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qy07OHsJT5", "openreview": "https://openreview.net/forum?id=qy07OHsJT5", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70330", "video": "https://nips.cc/virtual/2023/poster/70330", "author_site": "Yuyang Shi, Valentin De Bortoli, Andrew Campbell, Arnaud Doucet", "tldr": "", "abstract": "Solving transport problems, i.e. finding a map transporting one given distribution to another, has numerous applications in machine learning. Novel mass transport methods motivated by generative modeling have recently been proposed, e.g. Denoising Diffusion Models (DDMs) and Flow Matching Models (FMMs) implement such a transport through a Stochastic Differential Equation (SDE) or an Ordinary Differential Equation (ODE). However, while it is desirable in many applications to approximate the deterministic dynamic Optimal Transport (OT) map which admits attractive properties, DDMs and FMMs are not guaranteed to provide transports close to the OT map. In contrast, Schr\u00f6dinger bridges (SBs) compute stochastic dynamic mappings which recover entropy-regularized versions of OT. Unfortunately, existing numerical methods approximating SBs either scale poorly with dimension or accumulate errors across iterations. In this work, we introduce Iterative Markovian Fitting (IMF), a new methodology for solving SB problems, and Diffusion Schr\u00f6dinger Bridge Matching (DSBM), a novel numerical algorithm for computing IMF iterates. DSBM significantly improves over previous SB numerics and recovers as special/limiting cases various recent transport methods. We demonstrate the performance of DSBM on a variety of problems.", "keywords": "diffusion Schr\u00f6dinger bridge;bridge matching;optimal transport", "primary_area": "", "supplementary_material": "", "author": "Yuyang Shi;Valentin De Bortoli;Andrew Campbell;Arnaud Doucet", "authorids": "~Yuyang_Shi2;~Valentin_De_Bortoli1;~Andrew_Campbell4;~Arnaud_Doucet2", "gender": ";;;", "homepage": ";https://vdeborto.github.io/;;https://www.stats.ox.ac.uk/~doucet/", "dblp": "23/10173-2;224/9338;93/3398;68/1628", "google_scholar": ";;;W4SZGV8AAAAJ", "orcid": "0000-0003-4383-1747;;0000-0003-2086-0238;0000-0002-7662-419X", "linkedin": "yuyang-s-b2541616a/;;;", "or_profile": "~Yuyang_Shi2;~Valentin_De_Bortoli1;~Andrew_Campbell4;~Arnaud_Doucet2", "aff": "University of Oxford;University of Oxford;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk;ox.ac.uk;ox.ac.uk", "position": "PhD student;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nshi2023diffusion,\ntitle={Diffusion Schr\\\"odinger Bridge Matching},\nauthor={Yuyang Shi and Valentin De Bortoli and Andrew Campbell and Arnaud Doucet},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qy07OHsJT5}\n}", "github": "", "project": "", "reviewers": "J5AJ;Lk9k;Axbu;7Dnn;24jx", "pdf_size": 19509139, "rating": "5;6;6;7;7", "confidence": "4;4;3;3;2", "soundness": "3;3;3;3;3", "novelty": "3;3;2;4;3", "presentation": "4;3;2;3;3", "wc_summary": "103;117;52;105;89", "wc_strengths": "65;84;28;90;111", "wc_weaknesses": "93;146;137;288;29", "wc_questions": "57;55;325;109;85", "wc_limitations": "4;43;22;1;1", "wc_review": "322;445;564;593;315", "wc_reply_reviewers": "0;0;0;18;30", "wc_reply_authors": "0;0;0;12;24", "reply_reviewers": "0;0;0;1;1", "reply_authors": "1;1;1;2;3", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 93.2, 22.435685859808252 ], "wc_strengths_avg": [ 75.6, 27.96140196771256 ], "wc_weaknesses_avg": [ 138.6, 85.40398117183999 ], "wc_questions_avg": [ 126.2, 101.36350428038683 ], "wc_limitations_avg": [ 14.2, 16.387800340497197 ], "wc_review_avg": [ 447.8, 116.66601904582156 ], "wc_reply_reviewers_avg": [ 9.6, 12.354756169184402 ], "wc_reply_authors_avg": [ 7.2, 9.6 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7857142857142858, "gs_citation": 130, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18264375654715910946&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ox.ac.uk;ox.ac.uk;ox.ac.uk;ox.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Landscape Surrogate: Learning Decision Losses for Mathematical Optimization Under Partial Information", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70329", "id": "qyEm4tF2p1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/574f145eac328cc4aaf9358e27120eb5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qyEm4tF2p1", "openreview": "https://openreview.net/forum?id=qyEm4tF2p1", "poster": "/media/PosterPDFs/NeurIPS%202023/70329.png?t=1699927076.3042555", "slides": "https://nips.cc/virtual/2023/poster/70329", "video": "https://nips.cc/virtual/2023/poster/70329", "author_site": "Arman Zharmagambetov, Brandon Amos, Aaron Ferber, Aaron Ferber, Taoan Huang, Bistra Dilkina, Yuandong Tian", "tldr": "", "abstract": "Recent works in learning-integrated optimization have shown promise in settings where the optimization problem is only partially observed or where general-purpose optimizers perform poorly without expert tuning. By learning an optimizer $\\mathbf{g}$ to tackle these challenging problems with $f$ as the objective, the optimization process can be substantially accelerated by leveraging past experience. The optimizer can be trained with supervision from known optimal solutions or implicitly by optimizing the compound function $f\\circ \\mathbf{g}$. The implicit approach may not require optimal solutions as labels and is capable of handling problem uncertainty; however, it is slow to train and deploy due to frequent calls to optimizer $\\mathbf{g}$ during both training and testing. The training is further challenged by sparse gradients of $\\mathbf{g}$, especially for combinatorial solvers. To address these challenges, we propose using a smooth and learnable **Landscape Surrogate** $\\mathcal{M}$ as a replacement for $f\\circ \\mathbf{g}$. This surrogate, learnable by neural networks, can be computed faster than the solver $\\mathbf{g}$, provides dense and smooth gradients during training, can generalize to unseen optimization problems, and is efficiently learned via alternating optimization. We test our approach on both synthetic problems, including shortest path and multidimensional knapsack, and real-world problems such as portfolio optimization, achieving comparable or superior objective values compared to state-of-the-art baselines while reducing the number of calls to $\\mathbf{g}$. Notably, our approach outperforms existing methods for computationally expensive high-dimensional problems.", "keywords": "learning surrogates;predict+optimize framework;combinatorial nonlinear optimization;argmin differentiation", "primary_area": "", "supplementary_material": "", "author": "Arman Zharmagambetov;Brandon Amos;Aaron M Ferber;Taoan Huang;Bistra Dilkina;Yuandong Tian", "authorids": "~Arman_Zharmagambetov1;~Brandon_Amos1;~Aaron_M_Ferber1;~Taoan_Huang2;~Bistra_Dilkina2;~Yuandong_Tian1", "gender": "M;;M;M;F;M", "homepage": "https://arman-z.github.io/;http://bamos.github.io;https://aaron-ferber.github.io/;;;http://yuandong-tian.com", "dblp": "252/5004;133/4801.html;163/7788;241/7690;30/5718;t/YuandongTian", "google_scholar": "D6QocXMAAAAJ;d8gdZR4AAAAJ;TuVq07oAAAAJ;;1jjyaBYAAAAJ;0mgEF28AAAAJ", "orcid": ";;;;0000-0002-6784-473X;0000-0003-4202-4847", "linkedin": ";bdamos;aaron-ferber-64a73980/;;;yuandongtian", "or_profile": "~Arman_Zharmagambetov1;~Brandon_Amos1;~Aaron_M_Ferber1;~Taoan_Huang2;~Bistra_Dilkina2;~Yuandong_Tian1", "aff": "Meta AI (FAIR);Meta;University of Southern California;University of Southern California;University of Southern California;Meta AI (FAIR)", "aff_domain": "meta.com;meta.com;usc.edu;usc.edu;usc.edu;meta.com", "position": "Postdoc;Research Scientist;PhD student;PhD student;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nzharmagambetov2023landscape,\ntitle={Landscape Surrogate: Learning Decision Losses for Mathematical Optimization Under Partial Information},\nauthor={Arman Zharmagambetov and Brandon Amos and Aaron M Ferber and Taoan Huang and Bistra Dilkina and Yuandong Tian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qyEm4tF2p1}\n}", "github": "", "project": "", "reviewers": "7fCr;kDSQ;Nect;1xim", "pdf_size": 1831988, "rating": "5;5;7;7", "confidence": "5;4;5;4", "soundness": "2;2;3;3", "novelty": "3;2;3;3", "presentation": "2;2;4;3", "wc_summary": "117;122;131;116", "wc_strengths": "20;29;62;167", "wc_weaknesses": "197;358;360;108", "wc_questions": "2;98;238;42", "wc_limitations": "19;14;50;85", "wc_review": "355;621;841;518", "wc_reply_reviewers": "42;17;96;231", "wc_reply_authors": "72;94;0;218", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 121.5, 5.937171043518958 ], "wc_strengths_avg": [ 69.5, 58.42302628245134 ], "wc_weaknesses_avg": [ 255.75, 107.94066657196443 ], "wc_questions_avg": [ 95.0, 89.32524839036273 ], "wc_limitations_avg": [ 42.0, 28.398943642325854 ], "wc_review_avg": [ 583.75, 176.22056491794595 ], "wc_reply_reviewers_avg": [ 96.5, 82.73602601043876 ], "wc_reply_authors_avg": [ 96.0, 78.54934754662193 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17610288690849751901&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "meta.com;meta.com;usc.edu;usc.edu;usc.edu;meta.com", "author_num": 6, "aff_unique_index": "0;0;1;1;1;0", "aff_unique_norm": "Meta;University of Southern California", "aff_unique_dep": "Facebook AI Research (FAIR);", "aff_unique_url": "https://ai.facebook.com;https://www.usc.edu", "aff_unique_abbr": "Meta AI;USC", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Global Update Tracking: A Decentralized Learning Algorithm for Heterogeneous Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70328", "id": "qyixBZl8Ph", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/98f8c89ae042c512e6c87e0e0c2a0f98-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=qyixBZl8Ph", "openreview": "https://openreview.net/forum?id=qyixBZl8Ph", "poster": "/media/PosterPDFs/NeurIPS%202023/70328.png?t=1701292802.8993154", "slides": "https://nips.cc/virtual/2023/poster/70328", "video": "https://nips.cc/virtual/2023/poster/70328", "author_site": "Sai Aparna Aketi, Abolfazl Hashemi, Kaushik Roy", "tldr": "", "abstract": "Decentralized learning enables the training of deep learning models over large distributed datasets generated at different locations, without the need for a central server. However, in practical scenarios, the data distribution across these devices can be significantly different, leading to a degradation in model performance. In this paper, we focus on designing a decentralized learning algorithm that is less susceptible to variations in data distribution across devices. We propose Global Update Tracking (GUT), a novel tracking-based method that aims to mitigate the impact of heterogeneous data in decentralized learning without introducing any communication overhead. We demonstrate the effectiveness of the proposed technique through an exhaustive set of experiments on various Computer Vision datasets (CIFAR-10, CIFAR-100, Fashion MNIST, and ImageNette), model architectures, and network topologies. Our experiments show that the proposed method achieves state-of-the-art performance for decentralized learning on heterogeneous data via a 1-6% improvement in test accuracy compared to other existing techniques.", "keywords": "Federated Learning;Decentralized Learning;Non-IID Data;Heterogeneous data distribution;Peer-to-peer connectivity", "primary_area": "", "supplementary_material": "/attachment/2647bca474674e365aecf59d397e9e61ec8686df.zip", "author": "Sai Aparna Aketi;Abolfazl Hashemi;Kaushik Roy", "authorids": "~Sai_Aparna_Aketi1;~Abolfazl_Hashemi1;~Kaushik_Roy1", "gender": "F;M;M", "homepage": "https://aparna-aketi.github.io/;https://abolfazlh.github.io/;https://engineering.purdue.edu/NRL/Group", "dblp": "217/0935;176/5595;r/KaushikRoy", "google_scholar": "YGtRZCUAAAAJ;Se7mocgAAAAJ;to4P8KgAAAAJ", "orcid": ";0000-0002-8421-4270;", "linkedin": "sai-aparna-aketi;abolfazlh;", "or_profile": "~Sai_Aparna_Aketi1;~Abolfazl_Hashemi1;~Kaushik_Roy1", "aff": "Purdue University;Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;purdue.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\naketi2023global,\ntitle={Global Update Tracking: A Decentralized Learning Algorithm for Heterogeneous Data},\nauthor={Sai Aparna Aketi and Abolfazl Hashemi and Kaushik Roy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=qyixBZl8Ph}\n}", "github": "", "project": "", "reviewers": "b7eV;tQhr;NsAw;LUfd", "pdf_size": 782819, "rating": "5;5;6;6", "confidence": "3;1;3;1", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "3;2;2;2", "wc_summary": "87;63;37;82", "wc_strengths": "51;46;62;85", "wc_weaknesses": "31;167;107;124", "wc_questions": "6;13;3;36", "wc_limitations": "17;9;68;24", "wc_review": "192;298;277;351", "wc_reply_reviewers": "0;42;0;34", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.0, 1.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 67.25, 19.62619423117992 ], "wc_strengths_avg": [ 61.0, 15.016657417681207 ], "wc_weaknesses_avg": [ 107.25, 49.15473018947414 ], "wc_questions_avg": [ 14.5, 12.932517156377562 ], "wc_limitations_avg": [ 29.5, 22.85278976405288 ], "wc_review_avg": [ 279.5, 57.2647360947381 ], "wc_reply_reviewers_avg": [ 19.0, 19.209372712298546 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=280383483441261909&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "purdue.edu;purdue.edu;purdue.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Wyze Rule: Federated Rule Dataset for Rule Recommendation Benchmarking", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73449", "id": "qynH28Y4xE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/02b9d1e6d1b5295a6f883969ddc1bbbd-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=qynH28Y4xE", "openreview": "https://openreview.net/forum?id=qynH28Y4xE", "poster": "/media/PosterPDFs/NeurIPS%202023/73449.png?t=1702197108.3485522", "slides": "https://nips.cc/virtual/2023/poster/73449", "video": "https://nips.cc/virtual/2023/poster/73449", "author_site": "Mohammad Mahdi Kamani, Yuhang Yao, Hanjia Lyu, Zhongwei Cheng, Lin Chen, Liangju Li, Carlee Joe-Wong, Jiebo Luo", "tldr": "", "abstract": "In the rapidly evolving landscape of smart home automation, the potential of IoT devices is vast. In this realm, rules are the main tool utilized for this automation, which are predefined conditions or triggers that establish connections between devices, enabling seamless automation of specific processes. However, one significant challenge researchers face is the lack of comprehensive datasets to explore and advance the field of smart home rule recommendations. These datasets are essential for developing and evaluating intelligent algorithms that can effectively recommend rules for automating processes while preserving the privacy of the users, as it involves personal information about users' daily lives. To bridge this gap, we present the Wyze Rule Dataset, a large-scale dataset designed specifically for smart home rule recommendation research. Wyze Rule encompasses over 1 million rules gathered from a diverse user base of 300,000 individuals from Wyze Labs, offering an extensive and varied collection of real-world data. With a focus on federated learning, our dataset is tailored to address the unique challenges of a cross-device federated learning setting in the recommendation domain, featuring a large-scale number of clients with widely heterogeneous data. To establish a benchmark for comparison and evaluation, we have meticulously implemented multiple baselines in both centralized and federated settings. Researchers can leverage these baselines to gauge the performance and effectiveness of their rule recommendation systems, driving advancements in the domain. The Wyze Rule Dataset is publicly accessible through [HuggingFace](https://huggingface.co/datasets/wyzelabs/RuleRecommendation)'s dataset API.", "keywords": "Rule Recommendation;Federated Learning;Graph Representation Learning", "primary_area": "", "supplementary_material": "/attachment/43c5e17a7a319a8ee8bb1b182c1476eeda290b38.zip", "author": "Mohammad Mahdi Kamani;Yuhang Yao;Hanjia Lyu;Zhongwei Cheng;Lin Chen;Liangju Li;Carlee Joe-Wong;Jiebo Luo", "authorids": "~Mohammad_Mahdi_Kamani2;~Yuhang_Yao2;~Hanjia_Lyu1;~Zhongwei_Cheng1;~Lin_Chen7;~Liangju_Li1;~Carlee_Joe-Wong1;~Jiebo_Luo1", "gender": ";;;;M;M;F;", "homepage": "https://mmkamani.com;https://www.andrew.cmu.edu/user/yuhangya/;https://brucelyu17.github.io/;;https://sites.google.com/site/gggchenlin;;https://www.andrew.cmu.edu/user/cjoewong/;", "dblp": "194/7523.html;203/0159;256/5541;;13/3479-21;;40/9937.html;", "google_scholar": "jUXXvNIAAAAJ;oQIV0BoAAAAJ;tPhwyYsAAAAJ;;https://scholar.google.com/citations?hl=en;;XEztdZgAAAAJ;", "orcid": "0000-0003-3930-4151;0000-0002-7045-0002;0000-0002-3876-0094;;;;;", "linkedin": "mm-kamani7/;yuhang-yao/;;;lin-chen-44346b15/;liangju-li-43a5766a/;;", "or_profile": "~Mohammad_Mahdi_Kamani2;~Yuhang_Yao2;~Hanjia_Lyu1;~Zhongwei_Cheng1;~Lin_Chen7;~Liangju_Li1;~Carlee_Joe-Wong1;~Jiebo_Luo1", "aff": "Wyze Labs;Carnegie Mellon University;University of Rochester;;Wyze Labs, Inc.;;Carnegie Mellon University;", "aff_domain": "wyze.com;andrew.cmu.edu;rochester.edu;;wyze.com;;cmu.edu;", "position": "Researcher;PhD student;PhD student;;Chief Scientist;;Assistant Professor;", "bibtex": "@inproceedings{\nkamani2023wyze,\ntitle={Wyze Rule: Federated Rule Dataset for Rule Recommendation Benchmarking},\nauthor={Mohammad Mahdi Kamani and Yuhang Yao and Hanjia Lyu and Zhongwei Cheng and Lin Chen and Liangju Li and Carlee Joe-Wong and Jiebo Luo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=qynH28Y4xE}\n}", "github": "", "project": "", "reviewers": "qebj;2wK7;e1DN;YUfZ;8wtf", "pdf_size": 3334550, "rating": "5;6;7;7;8", "confidence": "4;3;3;4;4", "wc_summary_and_contributions": "46;51;165;133;62", "wc_strengths": "13;19;148;82;82", "wc_improvement": "94;92;105;198;67", "wc_limitations": "19;1;79;66;1", "wc_correctness": "5;1;91;1;10", "wc_clarity": "40;7;9;1;5", "wc_relation_to_prior_work": "8;1;15;1;7", "wc_documentation": "7;1;15;1;4", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "233;174;628;484;239", "wc_reply_reviewers": "506;0;0;0;6", "wc_reply_authors": "1409;282;450;215;235", "reply_reviewers": "3;0;0;0;1", "reply_authors": "4;1;1;1;2", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 91.4, 48.384294972645826 ], "wc_strengths_avg": [ 68.8, 49.42630878388553 ], "wc_improvement_avg": [ 111.2, 45.14598542506299 ], "wc_limitations_avg": [ 33.2, 33.01151314314447 ], "wc_correctness_avg": [ 21.6, 34.85742388645495 ], "wc_clarity_avg": [ 12.4, 14.051334456200237 ], "wc_relation_to_prior_work_avg": [ 6.4, 5.2 ], "wc_documentation_avg": [ 5.6, 5.2 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 351.6, 174.47819347987303 ], "wc_reply_reviewers_avg": [ 102.4, 201.81337914023442 ], "wc_reply_authors_avg": [ 518.2, 453.01187622401244 ], "reply_reviewers_avg": [ 0.8, 1.1661903789690602 ], "reply_authors_avg": [ 1.8, 1.1661903789690602 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.08006407690254361, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13568385038340544692&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "wyze.com;andrew.cmu.edu;rochester.edu;;wyze.com;;cmu.edu;", "author_num": 8, "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "Wyze Labs;Carnegie Mellon University;University of Rochester", "aff_unique_dep": ";;", "aff_unique_url": "https://wyze.com;https://www.cmu.edu;https://www.rochester.edu", "aff_unique_abbr": "Wyze;CMU;U of R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Cambridge Law Corpus: A Dataset for Legal AI Research", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73448", "id": "r30thTMcaM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/819b8452be7d6af1351d4c4f9cbdbd9b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=r30thTMcaM", "openreview": "https://openreview.net/forum?id=r30thTMcaM", "poster": "/media/PosterPDFs/NeurIPS%202023/73448.png?t=1702268474.0745044", "slides": "https://nips.cc/virtual/2023/poster/73448", "video": "https://nips.cc/virtual/2023/poster/73448", "author_site": "Andreas \u00d6stling, Holli Sargeant, Huiyuan Xie, Ludwig Bull, Alexander Terenin, Leif Jonsson, M\u00e5ns Magnusson, Felix Steffek", "tldr": "", "abstract": "We introduce the Cambridge Law Corpus (CLC), a dataset for legal AI research. It consists of over 250 000 court cases from the UK. Most cases are from the 21st century, but the corpus includes cases as old as the 16th century. This paper presents the first release of the corpus, containing the raw text and meta-data. Together with the corpus, we provide annotations on case outcomes for 638 cases, done by legal experts. Using our annotated data, we have trained and evaluated case outcome extraction with GPT-3, GPT-4 and RoBERTa models to provide benchmarks. We include an extensive legal and ethical discussion to address the potentially sensitive nature of this material. As a consequence, the corpus will only be released for research purposes under certain restrictions.", "keywords": "Law;AI;Corpus;Legal Research;dataset", "primary_area": "", "supplementary_material": "", "author": "Andreas \u00d6stling;Holli Sargeant;Huiyuan Xie;Ludwig Konrad Bull;Alexander Terenin;Leif Jonsson;M\u00e5ns Magnusson;Felix Steffek", "authorids": "~Andreas_\u00d6stling1;~Holli_Sargeant1;~Huiyuan_Xie1;~Ludwig_Konrad_Bull1;~Alexander_Terenin1;~Leif_Jonsson3;~M\u00e5ns_Magnusson2;~Felix_Steffek1", "gender": "M;;;M;M;;M;", "homepage": ";;;https://www.courtcorrect.com;https://avt.im/;https://scholar.google.com/citations?user=JUrEbmcAAAAJ;http://www.mansmagnusson.com;https://www.law.cam.ac.uk/people/academic/f-steffek/6136", "dblp": ";;;;185/1040;;119/9862;", "google_scholar": "t1bu-QEAAAAJ;;;;6Qa-wXMAAAAJ;;https://scholar.google.se/citations?user=6AA-AAcAAAAJ;", "orcid": "0000-0003-2281-1341;;;;0000-0001-5292-3104;;;", "linkedin": ";;;ludwig-bull/;;;;felix-steffek-8b7663249/", "or_profile": "~Andreas_\u00d6stling1;~Holli_Sargeant1;~Huiyuan_Xie1;~Ludwig_Konrad_Bull1;~Alexander_Terenin1;~Leif_Jonsson3;~M\u00e5ns_Magnusson2;~Felix_Steffek1", "aff": "Uppsala University;;;HWZ - University of Applied Sciences in Business Administration Zurich;University of Cambridge;;Uppsala University;University of Cambridge", "aff_domain": "uu.se;;;fh-hwz.ch;cam.ac.uk;;statistik.uu.se;cam.ac.uk", "position": "PhD student;;;Lecturer;Postdoc;;Assistant Professor;Professor of Law", "bibtex": "@inproceedings{\n{\\\"o}stling2023the,\ntitle={The Cambridge Law Corpus: A Corpus for Legal {AI} Research},\nauthor={Andreas {\\\"O}stling and Holli Sargeant and Huiyuan Xie and Ludwig Konrad Bull and Alexander Terenin and Leif Jonsson and M{\\r{a}}ns Magnusson and Felix Steffek},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=r30thTMcaM}\n}", "github": "", "project": "", "reviewers": "JUfX;Z7DF;rSsA;Sw7Y;Hryg", "pdf_size": 259834, "rating": "4;5;6;7;9", "confidence": "3;4;5;3;5", "wc_summary_and_contributions": "17;115;134;139;175", "wc_strengths": "17;81;37;133;162", "wc_improvement": "88;517;83;381;151", "wc_limitations": "1;51;15;58;39", "wc_correctness": "1;58;14;27;135", "wc_clarity": "1;105;6;10;40", "wc_relation_to_prior_work": "26;156;23;15;48", "wc_documentation": "1;533;9;48;74", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "153;1617;322;812;825", "wc_reply_reviewers": "0;580;0;0;187", "wc_reply_authors": "615;3386;729;904;1029", "reply_reviewers": "0;1;0;0;1", "reply_authors": "1;5;1;2;2", "rating_avg": [ 6.2, 1.7204650534085253 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "wc_summary_and_contributions_avg": [ 116.0, 53.17142089506354 ], "wc_strengths_avg": [ 86.0, 55.08538826222431 ], "wc_improvement_avg": [ 244.0, 174.4729205349644 ], "wc_limitations_avg": [ 32.8, 21.6 ], "wc_correctness_avg": [ 47.0, 47.89572005931219 ], "wc_clarity_avg": [ 32.4, 38.76390073251142 ], "wc_relation_to_prior_work_avg": [ 53.6, 52.35494246009636 ], "wc_documentation_avg": [ 133.0, 201.74538408598102 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 745.8, 510.0319205696835 ], "wc_reply_reviewers_avg": [ 153.4, 225.2603826685909 ], "wc_reply_authors_avg": [ 1332.6, 1036.4945923640894 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 1.469693845669907 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5198752449100363, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12197845418503474631&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "email": "uu.se;;;fh-hwz.ch;cam.ac.uk;;statistik.uu.se;cam.ac.uk", "author_num": 8, "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "Uppsala University;University of Applied Sciences in Business Administration Zurich;University of Cambridge", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uu.se;https://www.hwz.ch;https://www.cam.ac.uk", "aff_unique_abbr": "UU;HWZ;Cambridge", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;2;0;2", "aff_country_unique": "Sweden;Switzerland;United Kingdom" }, { "id": "r44obB8FY5", "title": "Deterministic Strided and Transposed Convolutions for Point Clouds Operating Directly on the Points", "track": "main", "status": "Reject", "tldr": "", "abstract": "The application of Convolutional Neural Networks (CNNs) to process point cloud data as geometric representations of real objects has gained considerable attention. However, point clouds are less structured than images, which makes it difficult to directly transfer important CNN operations (initially developed for use on images) to point clouds. For instance, the order of a set of points does not contain semantic information. Therefore, ideally, all operations must be invariant to the point order. Inspired by CNN-related operations applied to images, we transfer the concept of strided and transposed convolutions to point cloud CNNs, enabling deterministic network modules to operate directly on points. To this end, we propose a novel strided convolutional layer with an auxiliary loss, which, as we prove theoretically, enforces a uniform distribution of the selected points within the lower feature hierarchy. This loss ensures a learnable and deterministic selection, unlike the iterative Farthest Point Sampling (FPS), which is commonly used in point cloud CNNs. The high flexibility of the proposed operations is evaluated by deploying them in exemplary network architectures and comparing their performances with those of similar (already existing) structures. Notably, we develop a light-weight autoencoder architecture based on our proposed operators, which shows the best generalization performance.", "keywords": "Farthest Point Sampling;Strided Convolutions;Point Clouds;Autoencoder;Auxiliary Selection Loss", "primary_area": "", "supplementary_material": "/attachment/63ffed020a8e48432d4fdc86d52896f8db732a52.zip", "author": "Jonathan Heins;Konstantin Dietrich;Moritz Seiler;Lukas Felix Untersch\u00fctz;Pascal Kerschke", "authorids": "~Jonathan_Heins1;~Konstantin_Dietrich1;~Moritz_Seiler1;~Lukas_Felix_Untersch\u00fctz1;~Pascal_Kerschke1", "gender": "Not Specified;M;M;M;M", "homepage": ";;https://www.wi.uni-muenster.de/de/institut/statistik/personen/moritz-seiler;https://tu-dresden.de/bu/verkehr/ivw/bda/die-professur/beschaeftigte;https://tu-dresden.de/bu/verkehr/ivw/bda/die-professur/inhaber-in?set_language=en", "dblp": "299/8715;;261/3628;;160/8543", "google_scholar": ";;;;https://scholar.google.de/citations?user=xQKB_kEAAAAJ", "orcid": "0000-0002-3571-667X;0000-0002-5383-7475;0000-0002-1750-9060;;0000-0003-2862-1418", "linkedin": ";;;lukas-untersch%C3%BCtz-180b09271/;", "or_profile": "~Jonathan_Heins1;~Konstantin_Dietrich1;~Moritz_Seiler1;~Lukas_Felix_Untersch\u00fctz1;~Pascal_Kerschke1", "aff": "Technische Universit\u00e4t Dresden;Center for Scalable Data Analytics and Artificial Intelligence (ScaDS.AI) Dresden/Lepizig;University of M\u00fcnster;Technische Universit\u00e4t Dresden;Technische Universit\u00e4t Dresden", "aff_domain": "tu-dresden.de;tu-dresden.de;uni-muenster.de;tu-dresden.de;tu-dresden.de", "position": "PhD student;PhD student;PhD student;MS student;Full Professor", "bibtex": "@misc{\nheins2023deterministic,\ntitle={Deterministic Strided and Transposed Convolutions for Point Clouds Operating Directly on the Points},\nauthor={Jonathan Heins and Konstantin Dietrich and Moritz Seiler and Lukas Felix Untersch{\\\"u}tz and Pascal Kerschke},\nyear={2023},\nurl={https://openreview.net/forum?id=r44obB8FY5}\n}", "github": "", "project": "", "reviewers": "KAHP;eM7t;MuCB;Dxvh", "site": "https://openreview.net/forum?id=r44obB8FY5", "pdf_size": 450068, "rating": "3;3;3;4", "confidence": "4;4;4;4", "soundness": "2;2;2;2", "novelty": "2;2;1;1", "presentation": "1;1;1;2", "wc_summary": "74;132;23;71", "wc_strengths": "54;90;14;14", "wc_weaknesses": "332;252;226;17", "wc_questions": "179;90;31;157", "wc_limitations": "1;35;88;4", "wc_review": "640;599;382;263", "wc_reply_reviewers": "161;40;0;184", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 3.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.0, 0.0 ], "novelty_avg": [ 1.5, 0.5 ], "presentation_avg": [ 1.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.0, 38.632887544163715 ], "wc_strengths_avg": [ 43.0, 31.670175244226233 ], "wc_weaknesses_avg": [ 206.75, 116.30858738717447 ], "wc_questions_avg": [ 114.25, 58.17806717311946 ], "wc_limitations_avg": [ 32.0, 34.96426747409418 ], "wc_review_avg": [ 471.0, 155.0241916605276 ], "wc_reply_reviewers_avg": [ 96.25, 77.97555706758368 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GYHlQHGuRn0J:scholar.google.com/&scioq=Deterministic+Strided+and+Transposed+Convolutions+for+Point+Clouds+Operating+Directly+on+the+Points&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Technische Universit\u00e4t Dresden;Center for Scalable Data Analytics and Artificial Intelligence;University of M\u00fcnster", "aff_unique_dep": ";;", "aff_unique_url": "https://tu-dresden.de;;https://www.uni-muenster.de", "aff_unique_abbr": "TUD;ScaDS.AI;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Dresden/Lepizig", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Meta-Learning Adversarial Bandit Algorithms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70327", "id": "r6xGZ0XL2g", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6f627c706a7d9961cc1ff55f37f07f97-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=r6xGZ0XL2g", "openreview": "https://openreview.net/forum?id=r6xGZ0XL2g", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70327", "video": "https://nips.cc/virtual/2023/poster/70327", "author_site": "Misha Khodak, Ilya Osadchiy, Keegan Harris, Maria-Florina Balcan, Kfir Y. Levy, Ron Meir, Steven Wu", "tldr": "", "abstract": "We study online meta-learning with bandit feedback, with the goal of improving performance across multiple tasks if they are similar according to some natural similarity measure.\n As the first to target the adversarial online-within-online partial-information setting, we design meta-algorithms that combine outer learners to simultaneously tune the initialization and other hyperparameters of an inner learner for two important cases:\n multi-armed bandits (MAB) and bandit linear optimization (BLO).\n For MAB, the meta-learners initialize and set hyperparameters of the Tsallis-entropy generalization of Exp3, with the task-averaged regret improving if the entropy of the optima-in-hindsight is small.\n For BLO, we learn to initialize and tune online mirror descent (OMD) with self-concordant barrier regularizers, showing that task-averaged regret varies directly with an action space-dependent measure they induce.\n Our guarantees rely on proving that unregularized follow-the-leader combined with two levels of low-dimensional hyperparameter tuning is enough to learn a sequence of affine functions of non-Lipschitz and sometimes non-convex Bregman divergences bounding the regret of OMD.", "keywords": "online learning;multi-armed bandits;meta-learning;multi-task learning;bandit linear optimization", "primary_area": "", "supplementary_material": "/attachment/7ec5045fe6a67e55f276df3784dc70a5b0d8379e.pdf", "author": "Mikhail Khodak;Ilya Osadchiy;Keegan Harris;Nina Balcan;Kfir Yehuda Levy;Ron Meir;Steven Wu", "authorids": "~Mikhail_Khodak1;~Ilya_Osadchiy1;~Keegan_Harris1;~Nina_Balcan1;~Kfir_Yehuda_Levy1;~Ron_Meir1;~Steven_Wu1", "gender": ";M;M;F;M;M;", "homepage": ";;https://keeganharris.github.io/;http://www.cs.cmu.edu/~ninamf/;http://kfiryehud.wixsite.com/kfir-y-levy;https://ronmeir.net.technion.ac.il/;", "dblp": ";;294/5044;b/MariaFlorinaBalcan;83/11388;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;TnvQIrYAAAAJ;https://scholar.google.com.tw/citations?user=LWlN_BUAAAAJ;;https://scholar.google.co.il/citations?user=r3NAa9oAAAAJ;", "orcid": ";0000-0002-3828-7583;;;;;", "linkedin": ";ilya-osadchiy;;;;;", "or_profile": "~Mikhail_Khodak1;~Ilya_Osadchiy1;~Keegan_Harris1;~Nina_Balcan1;~Kfir_Yehuda_Levy1;~Ron_Meir1;~Steven_Wu1", "aff": ";Technion - Israel Institute of Technology, Technion;Carnegie Mellon University;Carnegie Mellon University;Technion - Israel Institute of Technology, Technion;Technion, Technion;", "aff_domain": ";technion.ac.il;cmu.edu;cmu.edu;technion.ac.il;technion.ac.il;", "position": ";MS student;PhD student;Full Professor;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nkhodak2023metalearning,\ntitle={Meta-Learning Adversarial Bandit Algorithms},\nauthor={Mikhail Khodak and Ilya Osadchiy and Keegan Harris and Nina Balcan and Kfir Yehuda Levy and Ron Meir and Steven Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=r6xGZ0XL2g}\n}", "github": "", "project": "", "reviewers": "fExn;zCMR;qSEU;yLU7;eUxe;WoUK", "pdf_size": 585131, "rating": "6;6;6;6;6;7", "confidence": "4;3;2;2;2;2", "soundness": "3;3;3;3;3;3", "novelty": "3;3;3;3;3;3", "presentation": "3;3;3;3;2;3", "wc_summary": "72;93;66;156;62;74", "wc_strengths": "70;66;22;49;54;59", "wc_weaknesses": "230;19;22;18;167;85", "wc_questions": "5;89;25;38;132;56", "wc_limitations": "2;1;22;1;31;3", "wc_review": "379;268;157;262;446;277", "wc_reply_reviewers": "11;12;17;9;16;5", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.166666666666667, 0.37267799624996495 ], "confidence_avg": [ 2.5, 0.7637626158259734 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 87.16666666666667, 32.29249998924759 ], "wc_strengths_avg": [ 53.333333333333336, 15.659572862061793 ], "wc_weaknesses_avg": [ 90.16666666666667, 82.05976819096892 ], "wc_questions_avg": [ 57.5, 42.30346715499018 ], "wc_limitations_avg": [ 10.0, 11.972189997378647 ], "wc_review_avg": [ 298.1666666666667, 92.17992671340593 ], "wc_reply_reviewers_avg": [ 11.666666666666666, 4.068851871911234 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.29277002188455997, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9188500282416497300&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";technion.ac.il;cmu.edu;cmu.edu;technion.ac.il;technion.ac.il;", "author_num": 7, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Technion - Israel Institute of Technology;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.technion.ac.il;https://www.cmu.edu", "aff_unique_abbr": "Technion;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "Israel;United States" }, { "title": "Learning Adaptive Tensorial Density Fields for Clean Cryo-ET Reconstruction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70326", "id": "r7g9nFsulw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e4be7e9867ef163563f4a5e90cec478f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=r7g9nFsulw", "openreview": "https://openreview.net/forum?id=r7g9nFsulw", "poster": "/media/PosterPDFs/NeurIPS%202023/70326.png?t=1702217713.9779394", "slides": "https://nips.cc/virtual/2023/poster/70326", "video": "https://nips.cc/virtual/2023/poster/70326", "author_site": "YUANHAO WANG, Ramzi Idoughi, Wolfgang Heidrich", "tldr": "", "abstract": "We present a novel learning-based framework for reconstructing 3D structures from tilt-series cryo-Electron Tomography (cryo-ET) data. Cryo-ET is a powerful imaging technique that can achieve near-atomic resolutions. Still, it suffers from challenges such as missing-wedge acquisition, large data size, and high noise levels. Our framework addresses these challenges by using an adaptive tensorial-based representation for the 3D density field of the scanned sample. First, we optimize a quadtree structure to partition the volume of interest. Then, we learn a vector-matrix factorization of the tensor representing the density field in each node. Moreover, we use a loss function that combines a differentiable tomographic formation model with three regularization terms: total variation, boundary consistency constraint, and an isotropic Fourier prior. Our framework allows us to query the density at any location using the learned representation and obtain a high-quality 3D tomogram. We demonstrate the superiority of our framework over existing methods using synthetic and real data. Thus, our framework boosts the quality of the reconstruction while reducing the computation time and the memory footprint. The code is available at https://github.com/yuanhaowang1213/adaptivetensordf.", "keywords": "Neural density fields;Coordinate-based representations;Quadtree structure;Cryo-electron microscope", "primary_area": "", "supplementary_material": "/attachment/ad275cf2f76d2a23cd09f4802a0f9c0e7117cfb3.pdf", "author": "YUANHAO WANG;Ramzi Idoughi;Wolfgang Heidrich", "authorids": "~YUANHAO_WANG2;~Ramzi_Idoughi1;~Wolfgang_Heidrich3", "gender": "M;M;M", "homepage": "https://yuanhaowang1213.github.io/;https://vccimaging.org/People/idoughr/;https://vccimaging.org/People/heidriw/", "dblp": "178/5079-3;204/0074;h/WolfgangHeidrich.html", "google_scholar": ";SHxaDHIAAAAJ;IQSbom0AAAAJ", "orcid": ";0000-0002-4661-8717;0000-0002-4227-8508", "linkedin": ";;", "or_profile": "~YUANHAO_WANG2;~Ramzi_Idoughi1;~Wolfgang_Heidrich1", "aff": "KAUST;King Abdullah University of Science and Technology;KAUST", "aff_domain": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nwang2023learning,\ntitle={Learning Adaptive Tensorial Density Fields for Clean Cryo-{ET} Reconstruction},\nauthor={YUANHAO WANG and Ramzi Idoughi and Wolfgang Heidrich},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=r7g9nFsulw}\n}", "github": "", "project": "", "reviewers": "tcrC;bANQ;9Yvf;6xkQ", "pdf_size": 22890981, "rating": "3;5;6;8", "confidence": "5;4;3;4", "soundness": "2;3;3;4", "novelty": "2;2;3;4", "presentation": "1;3;3;3", "wc_summary": "229;146;123;138", "wc_strengths": "142;65;24;216", "wc_weaknesses": "395;129;7;53", "wc_questions": "169;105;102;24", "wc_limitations": "125;36;9;5", "wc_review": "1060;481;265;436", "wc_reply_reviewers": "214;59;0;0", "wc_reply_authors": "345;246;0;0", "reply_reviewers": "1;2;0;0", "reply_authors": "2;3;1;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 159.0, 41.24924241728568 ], "wc_strengths_avg": [ 111.75, 73.60154550007765 ], "wc_weaknesses_avg": [ 146.0, 150.2165104108067 ], "wc_questions_avg": [ 100.0, 51.395525097035446 ], "wc_limitations_avg": [ 43.75, 48.40131713083849 ], "wc_review_avg": [ 560.5, 299.4332145904993 ], "wc_reply_reviewers_avg": [ 68.25, 87.52820973834665 ], "wc_reply_authors_avg": [ 147.75, 151.83934766719725 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5883484054145521, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7900200917982647501&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "email": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaust.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Saudi Arabia" }, { "title": "TexQ: Zero-shot Network Quantization with Texture Feature Distribution Calibration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70325", "id": "r8LYNleLf9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0113ef4642264adc2e6924a3cbbdf532-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=r8LYNleLf9", "openreview": "https://openreview.net/forum?id=r8LYNleLf9", "poster": "/media/PosterPDFs/NeurIPS%202023/70325.png?t=1698129208.3044324", "slides": "https://nips.cc/virtual/2023/poster/70325", "video": "https://nips.cc/virtual/2023/poster/70325", "author_site": "Xinrui Chen, Yizhi Wang, Renao YAN, Yiqing Liu, Tian Guan, Yonghong He", "tldr": "", "abstract": "Quantization is an effective way to compress neural networks. By reducing the bit width of the parameters, the processing efficiency of neural network models at edge devices can be notably improved. Most conventional quantization methods utilize real datasets to optimize quantization parameters and fine-tune. Due to the inevitable privacy and security issues of real samples, the existing real-data-driven methods are no longer applicable. Thus, a natural method is to introduce synthetic samples for zero-shot quantization (ZSQ). However, the conventional synthetic samples fail to retain the detailed texture feature distributions, which severely limits the knowledge transfer and performance of the quantized model. In this paper, a novel ZSQ method, TexQ is proposed to address this issue. We first synthesize a calibration image and extract its calibration center for each class with a texture feature energy distribution calibration method. Then, the calibration centers are used to guide the generator to synthesize samples. Finally, we introduce the mixup knowledge distillation module to diversify synthetic samples for fine-tuning. Extensive experiments on CIFAR10/100 and ImageNet show that TexQ is observed to perform state-of-the-art in ultra-low bit width quantization. For example, when ResNet-18 is quantized to 3-bit, TexQ achieves a 12.18% top-1 accuracy increase on ImageNet compared to state-of-the-art methods. Code at https://github.com/dangsingrue/TexQ.", "keywords": "Zero-shot quantization;Texture feature calibration;Post-training quantization;low bit width;Neural network compression", "primary_area": "", "supplementary_material": "", "author": "Xinrui Chen;Yizhi Wang;Renao Yan;Yiqing Liu;Tian Guan;Yonghong He", "authorids": "~Xinrui_Chen1;~Yizhi_Wang3;~Renao_Yan1;~Yiqing_Liu1;~Tian_Guan1;~Yonghong_He1", "gender": "M;M;M;M;M;M", "homepage": ";https://github.com/yz-WangRainyfog;;;http://www.sigs.tsinghua.edu.cn;https://scholar.google.com/citations?hl=zh-CN&user=KR_wIQgAAAAJ", "dblp": ";;277/9821;159/3798;84/6828;171/2389", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=zh-CN;;;", "orcid": "0009-0002-8053-8494;;0000-0002-2399-199X;0000-0002-8157-2814;;", "linkedin": ";;;;;", "or_profile": "~Xinrui_Chen1;~Yizhi_Wang3;~Renao_Yan1;~Yiqing_Liu1;~Tian_Guan1;~Yonghong_He1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Biomedical Engineering, Tsinghua University;Graduate School at Shenzhen, Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;MS student;MS student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023texq,\ntitle={TexQ: Zero-shot Network Quantization with Texture Feature Distribution Calibration},\nauthor={Xinrui Chen and Yizhi Wang and Renao Yan and Yiqing Liu and Tian Guan and Yonghong He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=r8LYNleLf9}\n}", "github": "", "project": "", "reviewers": "F97y;zKQV;h8LC;yCFw;qhGS", "pdf_size": 1581323, "rating": "5;5;6;6;7", "confidence": "4;5;5;4;2", "soundness": "3;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;2;2;3;3", "wc_summary": "60;26;90;98;41", "wc_strengths": "71;12;30;38;32", "wc_weaknesses": "131;141;205;26;39", "wc_questions": "59;63;24;144;48", "wc_limitations": "57;5;9;33;1", "wc_review": "378;247;358;339;161", "wc_reply_reviewers": "155;226;13;162;28", "wc_reply_authors": "447;1205;0;443;0", "reply_reviewers": "1;2;1;3;1", "reply_authors": "2;4;1;3;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.0, 1.0954451150103321 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 63.0, 27.62607463973121 ], "wc_strengths_avg": [ 36.6, 19.26239860453521 ], "wc_weaknesses_avg": [ 108.4, 67.09873322202141 ], "wc_questions_avg": [ 67.6, 40.539363586519215 ], "wc_limitations_avg": [ 21.0, 21.166010488516726 ], "wc_review_avg": [ 296.6, 81.29108192169667 ], "wc_reply_reviewers_avg": [ 116.8, 82.56730587829544 ], "wc_reply_authors_avg": [ 419.0, 440.5174230379543 ], "reply_reviewers_avg": [ 1.6, 0.8 ], "reply_authors_avg": [ 2.2, 1.16619037896906 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7319250547113999, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=759506378488055982&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Grounding Neural Inference with Satisfiability Modulo Theories", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70324", "id": "r8snfquzs3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/47167991e38c65a72914763c11cd8d23-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=r8snfquzs3", "openreview": "https://openreview.net/forum?id=r8snfquzs3", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70324", "video": "https://nips.cc/virtual/2023/poster/70324", "author_site": "Zifan Wang, Saranya Vijayakumar, Kaiji Lu, Vijay Ganesh, Somesh Jha, Matt Fredrikson", "tldr": "", "abstract": "Recent techniques that integrate solver layers into Deep Neural Networks (DNNs) have shown promise in bridging a long-standing gap between inductive learning and symbolic reasoning techniques. In this paper we present a set of techniques for integrating Satisfiability Modulo Theories (SMT) solvers into the forward and backward passes of a deep network layer, called SMTLayer.\nUsing this approach, one can encode rich domain knowledge into the network in the form of mathematical formulas.\nIn the forward pass, the solver uses symbols produced by prior layers, along with these formulas, to construct inferences; in the backward pass, the solver informs updates to the network, driving it towards representations that are compatible with the solver's theory.\nNotably, the solver need not be differentiable. We implement SMTLayer as a Pytorch module, and our empirical results show that it leads to models that 1) require fewer training samples than conventional models, 2) that are robust to certain types of covariate shift, and 3) that ultimately learn representations that are consistent with symbolic knowledge, and thus naturally interpretable.", "keywords": "Satisfiability Modulo Theories;Solver Layer;Combinatorial Problem;MAXSAT;SAT", "primary_area": "", "supplementary_material": "/attachment/9e98da082b1caf84fd08aaaf18efb1d5c65df37f.pdf", "author": "Zifan Wang;Saranya Vijayakumar;Kaiji Lu;Vijay Ganesh;Somesh Jha;Matt Fredrikson", "authorids": "~Zifan_Wang1;~Saranya_Vijayakumar1;~Kaiji_Lu1;~Vijay_Ganesh1;~Somesh_Jha1;~Matt_Fredrikson1", "gender": "M;F;M;M;M;M", "homepage": "https://www.zifanw.net;http://svijayakumar2.github.io;https://www.linkedin.com/in/calebkaijilu/;https://vganesh1.github.io/;;https://cs.cmu.edu/~mfredrik", "dblp": ";338/9494;224/0239;g/VijayGanesh.html;j/SomeshJha;38/2612", "google_scholar": "HJOP3wMAAAAJ;4Lazaq0AAAAJ;;YP23eR0AAAAJ;BaI7l8QAAAAJ;https://scholar.google.com.tw/citations?user=tMYCvLAAAAAJ", "orcid": ";;;0000-0002-6029-2047;;", "linkedin": "zifan-wang-sail/;;;ganeshvijay/;;", "or_profile": "~Zifan_Wang1;~Saranya_Vijayakumar1;~Kaiji_Lu1;~Vijay_Ganesh1;~Somesh_Jha1;~Matt_Fredrikson1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Pinterest Inc.;University of Waterloo;Department of Computer Science, University of Wisconsin, Madison;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;pinterest.com;uwaterloo.ca;cs.wisc.edu;cmu.edu", "position": "PhD student;PhD student;Researcher;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023grounding,\ntitle={Grounding Neural Inference with Satisfiability Modulo Theories},\nauthor={Zifan Wang and Saranya Vijayakumar and Kaiji Lu and Vijay Ganesh and Somesh Jha and Matt Fredrikson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=r8snfquzs3}\n}", "github": "", "project": "", "reviewers": "nvtS;tqWk;oM4L;DzNN", "pdf_size": 642336, "rating": "6;6;7;8", "confidence": "4;4;3;3", "soundness": "2;3;3;4", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "73;46;57;174", "wc_strengths": "31;22;39;66", "wc_weaknesses": "260;124;63;15", "wc_questions": "96;154;1;146", "wc_limitations": "18;1;23;37", "wc_review": "478;347;183;438", "wc_reply_reviewers": "13;14;108;211", "wc_reply_authors": "0;0;54;146", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 87.5, 50.855186559484764 ], "wc_strengths_avg": [ 39.5, 16.439282222773596 ], "wc_weaknesses_avg": [ 115.5, 91.93611912627159 ], "wc_questions_avg": [ 99.25, 60.923620214166526 ], "wc_limitations_avg": [ 19.75, 12.871965661856 ], "wc_review_avg": [ 361.5, 113.46475223610194 ], "wc_reply_reviewers_avg": [ 86.5, 81.57971561607701 ], "wc_reply_authors_avg": [ 50.0, 59.648973168026956 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9661351319137447126&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cmu.edu;cmu.edu;pinterest.com;uwaterloo.ca;cs.wisc.edu;cmu.edu", "author_num": 6, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Carnegie Mellon University;Pinterest;University of Waterloo;University of Wisconsin-Madison", "aff_unique_dep": ";;;Department of Computer Science", "aff_unique_url": "https://www.cmu.edu;https://www.pinterest.com;https://uwaterloo.ca;https://www.wisc.edu", "aff_unique_abbr": "CMU;Pinterest;UW;UW-Madison", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Learning to Group Auxiliary Datasets for Molecule", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70323", "id": "r9eZH6WNm2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8e2571d13f432b301d4c5e3cc70227a6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=r9eZH6WNm2", "openreview": "https://openreview.net/forum?id=r9eZH6WNm2", "poster": "/media/PosterPDFs/NeurIPS%202023/70323.png?t=1699547914.7833571", "slides": "https://nips.cc/virtual/2023/poster/70323", "video": "https://nips.cc/virtual/2023/poster/70323", "author_site": "Tinglin Huang, Ziniu Hu, Rex Ying", "tldr": "", "abstract": "The limited availability of annotations in small molecule datasets presents a challenge to machine learning models. To address this, one common strategy is to collaborate with additional auxiliary datasets. However, having more data does not always guarantee improvements. Negative transfer can occur when the knowledge in the target dataset differs or contradicts that of the auxiliary molecule datasets. In light of this, identifying the auxiliary molecule datasets that can benefit the target dataset when jointly trained remains a critical and unresolved problem. Through an empirical analysis, we observe that combining graph structure similarity and task similarity can serve as a more reliable indicator for identifying high-affinity auxiliary datasets. Motivated by this insight, we propose MolGroup, which separates the dataset affinity into task and structure affinity to predict the potential benefits of each auxiliary molecule dataset. MolGroup achieves this by utilizing a routing mechanism optimized through a bi-level optimization framework. Empowered by the meta gradient, the routing mechanism is optimized toward maximizing the target dataset's performance and quantifies the affinity as the gating score. As a result, MolGroup is capable of predicting the optimal combination of auxiliary datasets for each target dataset. Our extensive experiments demonstrate the efficiency and effectiveness of MolGroup, showing an average improvement of 4.41%/3.47% for GIN/Graphormer trained with the group of molecule datasets selected by MolGroup on 11 target molecule datasets.", "keywords": "molecule;routing mechanism;meta gradient", "primary_area": "", "supplementary_material": "/attachment/97116a125e6256a0e62521c4615c528797620982.zip", "author": "Tinglin Huang;Ziniu Hu;Zhitao Ying", "authorids": "~Tinglin_Huang1;~Ziniu_Hu1;~Zhitao_Ying1", "gender": "M;M;M", "homepage": "https://huangtinglin.github.io/;http://acbull.github.io;https://www.cs.yale.edu/homes/ying-rex", "dblp": ";180/5436;209/4936", "google_scholar": "izW2ygYAAAAJ;x6ct1CsAAAAJ;6fqNXooAAAAJ", "orcid": "0009-0005-5644-4879;;", "linkedin": ";;rex-ying-92770148/", "or_profile": "~Tinglin_Huang1;~Ziniu_Hu1;~Zhitao_Ying1", "aff": "Yale University;University of California, Los Angeles;Yale University", "aff_domain": "yale.edu;ucla.edu;yale.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhuang2023learning,\ntitle={Learning to Group Auxiliary Datasets for Molecule},\nauthor={Tinglin Huang and Ziniu Hu and Zhitao Ying},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=r9eZH6WNm2}\n}", "github": "", "project": "", "reviewers": "VvkN;ohh4;NGKd;B1V2;sjEd", "pdf_size": 10567967, "rating": "4;6;6;6;7", "confidence": "4;4;3;3;4", "soundness": "2;2;3;3;3", "novelty": "2;3;3;2;3", "presentation": "3;3;3;2;3", "wc_summary": "69;93;73;76;75", "wc_strengths": "30;88;73;175;44", "wc_weaknesses": "93;194;178;19;74", "wc_questions": "41;11;4;6;146", "wc_limitations": "10;6;1;1;1", "wc_review": "243;392;329;277;340", "wc_reply_reviewers": "118;0;14;0;19", "wc_reply_authors": "218;0;14;0;18", "reply_reviewers": "2;0;1;0;1", "reply_authors": "3;1;2;1;2", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 77.2, 8.255906976220118 ], "wc_strengths_avg": [ 82.0, 50.821255395749525 ], "wc_weaknesses_avg": [ 111.6, 65.62499523809507 ], "wc_questions_avg": [ 41.6, 53.88357820338215 ], "wc_limitations_avg": [ 3.8, 3.655133376499413 ], "wc_review_avg": [ 316.2, 51.71228093983092 ], "wc_reply_reviewers_avg": [ 30.2, 44.54391091945116 ], "wc_reply_authors_avg": [ 50.0, 84.3136999543965 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.16666666666666663, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1529338211400843168&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "yale.edu;ucla.edu;yale.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Yale University;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.yale.edu;https://www.ucla.edu", "aff_unique_abbr": "Yale;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Invariant Molecular Representation in Latent Discrete Space", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70322", "id": "r9fzp8eyhZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f780a86b7145988ac219d49d8e37a58f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=r9fzp8eyhZ", "openreview": "https://openreview.net/forum?id=r9fzp8eyhZ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70322", "video": "https://nips.cc/virtual/2023/poster/70322", "author_site": "Xiang Zhuang, Qiang Zhang, Keyan Ding, Yatao Bian, Xiao Wang, Jingsong Lv, Hongyang Chen, Huajun Chen", "tldr": "", "abstract": "Molecular representation learning lays the foundation for drug discovery. However, existing methods suffer from poor out-of-distribution (OOD) generalization, particularly when data for training and testing originate from different environments. To address this issue, we propose a new framework for learning molecular representations that exhibit invariance and robustness against distribution shifts. Specifically, we propose a strategy called ``first-encoding-then-separation'' to identify invariant molecule features in the latent space, which deviates from conventional practices. Prior to the separation step, we introduce a residual vector quantization module that mitigates the over-fitting to training data distributions while preserving the expressivity of encoders. Furthermore, we design a task-agnostic self-supervised learning objective to encourage precise invariance identification, which enables our method widely applicable to a variety of tasks, such as regression and multi-label classification. Extensive experiments on 18 real-world molecular datasets demonstrate that our model achieves stronger generalization against state-of-the-art baselines in the presence of various distribution shifts. Our code is available at https://github.com/HICAI-ZJU/iMoLD.", "keywords": "molecular representation learning;out-of-distribution", "primary_area": "", "supplementary_material": "/attachment/87958671afb3bf54f27b88d988dda3dcff2b88d2.zip", "author": "Xiang Zhuang;Qiang Zhang;Keyan Ding;Yatao Bian;Xiao Wang;Jingsong Lv;Hongyang Chen;Huajun Chen", "authorids": "~Xiang_Zhuang1;~Qiang_Zhang6;~Keyan_Ding1;~Yatao_Bian1;~Xiao_Wang2;~Jingsong_Lv1;~Hongyang_Chen2;~Huajun_Chen1", "gender": ";;M;;M;M;M;M", "homepage": ";https://qiangairesearcher.github.io;;;https://wangxiaocs.github.io/;;https://www.linkedin.com/in/hongyangchen/;", "dblp": ";72/3527-26;195/3500;;49/67-17;01/8009;13/3715;94/5089", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;A7u-ZowAAAAJ;;MnzarAQAAAAJ;;https://scholar.google.ca/citations?user=s-HDT8UAAAAJ;", "orcid": ";;;;0000-0002-4444-7811;;0000-0002-7626-0162;", "linkedin": ";;;;;;hongyangchen/;", "or_profile": "~Xiang_Zhuang1;~Qiang_Zhang6;~Keyan_Ding1;~Yatao_Bian1;~Xiao_Wang2;~Jingsong_Lv1;~Hongyang_Chen2;~Huajun_Chen1", "aff": ";Zhejiang University;Zhejiang University;;Beihang University;Zhejiang Lab;Zhejiang Lab, China;Zhejiang University", "aff_domain": ";zju.edu.cn;zju.edu.cn;;buaa.edu.cn;zhejianglab.com;zhejianglab.com;zju.edu.cn", "position": ";Principal Researcher;Researcher;;Associate Professor;Researcher;Senior Research Expert;Full Professor", "bibtex": "@inproceedings{\nzhuang2023learning,\ntitle={Learning Invariant Molecular Representation in Latent Discrete Space},\nauthor={Xiang Zhuang and Qiang Zhang and Keyan Ding and Yatao Bian and Xiao Wang and Jingsong Lv and Hongyang Chen and Huajun Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=r9fzp8eyhZ}\n}", "github": "", "project": "", "reviewers": "GuEJ;d9FY;gGdj;czws;zQUd", "pdf_size": 1485050, "rating": "5;5;6;6;7", "confidence": "2;4;3;3;3", "soundness": "3;3;4;2;3", "novelty": "2;2;4;2;3", "presentation": "3;3;2;2;3", "wc_summary": "47;106;54;71;189", "wc_strengths": "26;25;40;30;137", "wc_weaknesses": "45;161;340;30;48", "wc_questions": "26;6;140;340;15", "wc_limitations": "6;1;36;21;1", "wc_review": "150;299;610;492;390", "wc_reply_reviewers": "16;0;17;0;11", "wc_reply_authors": "28;28;13;0;11", "reply_reviewers": "1;0;1;0;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 93.4, 51.97153066824182 ], "wc_strengths_avg": [ 51.6, 43.028362739012046 ], "wc_weaknesses_avg": [ 124.8, 117.36677553720219 ], "wc_questions_avg": [ 105.4, 126.9576307277353 ], "wc_limitations_avg": [ 13.0, 13.638181696985855 ], "wc_review_avg": [ 388.2, 157.9042747996393 ], "wc_reply_reviewers_avg": [ 8.8, 7.467261881037787 ], "wc_reply_authors_avg": [ 16.0, 10.75174404457249 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18429650722400470117&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";zju.edu.cn;zju.edu.cn;;buaa.edu.cn;zhejianglab.com;zhejianglab.com;zju.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;2;2;0", "aff_unique_norm": "Zhejiang University;Beihang University;Zhejiang Lab", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;http://www.buaa.edu.cn/;http://www.zhejianglab.com", "aff_unique_abbr": "ZJU;BUAA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning better with Dale\u2019s Law: A Spectral Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70321", "id": "rDiMgZulwi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/02dd0db10c40092de3d9ec2508d12f60-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rDiMgZulwi", "openreview": "https://openreview.net/forum?id=rDiMgZulwi", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70321", "video": "https://nips.cc/virtual/2023/poster/70321", "author_site": "Pingsheng Li, Jonathan Cornford, Arna Ghosh, Blake Richards", "tldr": "", "abstract": "Most recurrent neural networks (RNNs) do not include a fundamental constraint of real neural circuits: Dale's Law, which implies that neurons must be excitatory (E) or inhibitory (I). Dale's Law is generally absent from RNNs because simply partitioning a standard network's units into E and I populations impairs learning. However, here we extend a recent feedforward bio-inspired EI network architecture, named Dale's ANNs, to recurrent networks, and demonstrate that good performance is possible while respecting Dale's Law. This begs the question: What makes some forms of EI network learn poorly and others learn well? And, why does the simple approach of incorporating Dale's Law impair learning? Historically the answer was thought to be the sign constraints on EI network parameters, and this was a motivation behind Dale's ANNs. However, here we show the spectral properties of the recurrent weight matrix at initialisation are more impactful on network performance than sign constraints. We find that simple EI partitioning results in a singular value distribution that is multimodal and dispersed, whereas standard RNNs have an unimodal, more clustered singular value distribution, as do recurrent Dale's ANNs. We also show that the spectral properties and performance of partitioned EI networks are worse for small networks with fewer I units, and we present normalised SVD entropy as a measure of spectrum pathology that correlates with performance. Overall, this work sheds light on a long-standing mystery in neuroscience-inspired AI and computational neuroscience, paving the way for greater alignment between neural networks and biology.", "keywords": "Dale's Law;RNNs;brain-inspired neural networks;DANNs;computational neuroscience;spectral properties;inhibition", "primary_area": "", "supplementary_material": "/attachment/a452eb4144ce8bd8042d072495ab990c31be3125.pdf", "author": "Pingsheng Li;Jonathan Cornford;Arna Ghosh;Blake Aaron Richards", "authorids": "~Pingsheng_Li1;~Jonathan_Cornford1;~Arna_Ghosh1;~Blake_Aaron_Richards1", "gender": "M;;M;M", "homepage": "https://linclab.mila.quebec/team/pingsheng;;https://arnaghosh.github.io/;http://linclab.org", "dblp": "318/3169;;190/7223;70/10850", "google_scholar": "Xd8kzPgAAAAJ;;https://scholar.google.ca/citations?user=YjS546oAAAAJ;https://scholar.google.ca/citations?user=1CPY1LsAAAAJ", "orcid": ";;;0000-0001-9662-2151", "linkedin": ";;;", "or_profile": "~Pingsheng_Li1;~Jonathan_Cornford1;~Arna_Ghosh1;~Blake_Aaron_Richards1", "aff": "McGill University, McGill University;;McGill University;Mila - Quebec Artificial Intelligence Institute", "aff_domain": "mail.mcgill.ca;;mcgill.ca;mila.quebec", "position": "Undergrad student;;PhD student;Associate Professor", "bibtex": "@inproceedings{\nli2023learning,\ntitle={Learning better with Dale{\\textquoteright}s Law: A Spectral Perspective},\nauthor={Pingsheng Li and Jonathan Cornford and Arna Ghosh and Blake Aaron Richards},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rDiMgZulwi}\n}", "github": "", "project": "", "reviewers": "gdj1;TD5G;SnGV;YJwG", "pdf_size": 12605401, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;2;2;3", "presentation": "3;3;3;4", "wc_summary": "95;94;67;69", "wc_strengths": "93;83;198;105", "wc_weaknesses": "236;75;310;85", "wc_questions": "135;68;396;98", "wc_limitations": "2;25;9;2", "wc_review": "561;345;980;359", "wc_reply_reviewers": "0;0;128;160", "wc_reply_authors": "0;0;458;197", "reply_reviewers": "0;0;2;2", "reply_authors": "1;1;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 81.25, 13.273563952458284 ], "wc_strengths_avg": [ 119.75, 45.84416538666616 ], "wc_weaknesses_avg": [ 176.5, 100.04623930963122 ], "wc_questions_avg": [ 174.25, 130.20824666663782 ], "wc_limitations_avg": [ 9.5, 9.394147114027968 ], "wc_review_avg": [ 561.25, 256.42774323383964 ], "wc_reply_reviewers_avg": [ 72.0, 72.8834686331544 ], "wc_reply_authors_avg": [ 163.75, 187.96060092476827 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5802143908980721721&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "mail.mcgill.ca;;mcgill.ca;mila.quebec", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "McGill University;Quebec Artificial Intelligence Institute", "aff_unique_dep": ";Artificial Intelligence", "aff_unique_url": "https://www.mcgill.ca;https://mila.quebec", "aff_unique_abbr": "McGill;Mila", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "FLuID: Mitigating Stragglers in Federated Learning using Invariant Dropout", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70320", "id": "rG1M3kOVba", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e7feb9dbd9a94b6c552fc403fcebf2ef-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rG1M3kOVba", "openreview": "https://openreview.net/forum?id=rG1M3kOVba", "poster": "/media/PosterPDFs/NeurIPS%202023/70320.png?t=1701381718.610842", "slides": "https://nips.cc/virtual/2023/poster/70320", "video": "https://nips.cc/virtual/2023/poster/70320", "author_site": "Irene Wang, Prashant Nair, Divya Mahajan", "tldr": "", "abstract": "Federated Learning (FL) allows machine learning models to train locally on individual mobile devices, synchronizing model updates via a shared server. This approach safeguards user privacy; however, it also generates a heterogeneous training environment due to the varying performance capabilities across devices. As a result, \u201cstraggler\u201d devices with lower performance often dictate the overall\ntraining time in FL. In this work, we aim to alleviate this performance bottleneck due to stragglers by dynamically balancing the training load across the system. We introduce Invariant Dropout, a method that extracts a sub-model based on the weight update threshold, thereby minimizing potential impacts on accuracy. Building on this dropout technique, we develop an adaptive training framework, Federated Learning using Invariant Dropout (FLuID). FLuID offers a lightweight sub-model extraction to regulate computational intensity, thereby reducing the load on straggler devices without affecting model quality. Our method leverages neuron updates from non-straggler devices to construct a tailored sub-model for each straggler based on client performance profiling. Furthermore, FLuID can dynamically adapt to changes in stragglers as runtime conditions shift. We evaluate FLuID using five real-world mobile clients. The evaluations show that Invariant Dropout maintains baseline model efficiency while alleviating the performance bottleneck of stragglers through a dynamic, runtime approach.", "keywords": "Federated Learning", "primary_area": "", "supplementary_material": "/attachment/132c26dcebd062031f0483b9175c4287f5b760b6.zip", "author": "Irene Wang;Prashant J. Nair;Divya Mahajan", "authorids": "~Irene_Wang1;~Prashant_J._Nair1;~Divya_Mahajan1", "gender": "F;M;", "homepage": ";https://prashantnair.bitbucket.io/;", "dblp": "216/9321;159/0073;", "google_scholar": "vNPoWx0AAAAJ;IweLDwwAAAAJ;", "orcid": "0000-0003-1912-5834;0000-0002-1732-4314;", "linkedin": "irenewang05/;prashant-nair-4b4458177?;", "or_profile": "~Irene_Wang1;~Prashant_J._Nair1;~Divya_Mahajan1", "aff": "University of British Columbia;University of British Columbia;", "aff_domain": "ubc.ca;ubc.ca;", "position": "Undergrad student;Assistant Professor;", "bibtex": "@inproceedings{\nwang2023fluid,\ntitle={{FL}u{ID}: Mitigating Stragglers in Federated Learning using Invariant Dropout},\nauthor={Irene Wang and Prashant J. Nair and Divya Mahajan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rG1M3kOVba}\n}", "github": "", "project": "", "reviewers": "DQm5;a2Bu;sKfw;yFRv", "pdf_size": 2038362, "rating": "5;5;5;6", "confidence": "4;3;3;4", "soundness": "2;3;2;3", "novelty": "2;2;1;3", "presentation": "3;2;2;4", "wc_summary": "107;78;52;203", "wc_strengths": "29;60;65;95", "wc_weaknesses": "178;51;193;62", "wc_questions": "66;1;125;79", "wc_limitations": "24;95;1;31", "wc_review": "404;285;436;470", "wc_reply_reviewers": "0;0;37;0", "wc_reply_authors": "0;0;20;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 110.0, 57.109543860899464 ], "wc_strengths_avg": [ 62.25, 23.40272420040026 ], "wc_weaknesses_avg": [ 121.0, 64.8344044470218 ], "wc_questions_avg": [ 67.75, 44.33607447666065 ], "wc_limitations_avg": [ 37.75, 34.86671048435743 ], "wc_review_avg": [ 398.75, 69.69711256572973 ], "wc_reply_reviewers_avg": [ 9.25, 16.021469970012117 ], "wc_reply_authors_avg": [ 5.0, 8.660254037844387 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1832788123362361104&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "ubc.ca;ubc.ca;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Formalizing locality for normative synaptic plasticity models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70319", "id": "rGN3X9jnEg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/120339238f293d4ae53a7167403abc4b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rGN3X9jnEg", "openreview": "https://openreview.net/forum?id=rGN3X9jnEg", "poster": "/media/PosterPDFs/NeurIPS%202023/70319.png?t=1701703629.4486597", "slides": "https://nips.cc/virtual/2023/poster/70319", "video": "https://nips.cc/virtual/2023/poster/70319", "author_site": "Colin Bredenberg, Ezekiel Williams, Cristina Savin, Blake Richards, Guillaume Lajoie", "tldr": "", "abstract": "In recent years, many researchers have proposed new models for synaptic plasticity in the brain based on principles of machine learning. The central motivation has been the development of learning algorithms that are able to learn difficult tasks while qualifying as \"biologically plausible\". However, the concept of a biologically plausible learning algorithm is only heuristically defined as an algorithm that is potentially implementable by biological neural networks. Further, claims that neural circuits could implement any given algorithm typically rest on an amorphous concept of \"locality\" (both in space and time). As a result, it is unclear what many proposed local learning algorithms actually predict biologically, and which of these are consequently good candidates for experimental investigation. Here, we address this lack of clarity by proposing formal and operational definitions of locality. Specifically, we define different classes of locality, each of which makes clear what quantities cannot be included in a learning rule if an algorithm is to qualify as local with respect to a given (biological) constraint. We subsequently use this framework to distill testable predictions from various classes of biologically plausible synaptic plasticity models that are robust to arbitrary choices about neural network architecture. Therefore, our framework can be used to guide claims of biological plausibility and to identify potential means of experimentally falsifying a proposed learning algorithm for the brain.", "keywords": "synaptic plasticity;computational neuroscience", "primary_area": "", "supplementary_material": "/attachment/79be0457b0ca1f80f9451c2318eb68d3e2828a63.pdf", "author": "Colin Bredenberg;Ezekiel Williams;Cristina Savin;Blake Aaron Richards;Guillaume Lajoie", "authorids": "~Colin_Bredenberg1;~Ezekiel_Williams1;~Cristina_Savin1;~Blake_Aaron_Richards1;~Guillaume_Lajoie1", "gender": "M;;F;M;M", "homepage": "http://www.colinbredenberg.com/;https://computationalcognition.ca/;http://csavin.wixsite.com/savinlab;http://linclab.org;https://dms.umontreal.ca/~lajoie/", "dblp": "280/1131;;23/10829;70/10850;31/10384", "google_scholar": "qIs3yv0AAAAJ;KwgL380AAAAJ;muNtwgcAAAAJ;https://scholar.google.ca/citations?user=1CPY1LsAAAAJ;", "orcid": ";;0000-0002-3414-8244;0000-0001-9662-2151;", "linkedin": ";;cristina-savin-1889199b/;;", "or_profile": "~Colin_Bredenberg1;~Ezekiel_Williams1;~Cristina_Savin1;~Blake_Aaron_Richards1;~Guillaume_Lajoie1", "aff": "Mila- Quebec AI Institute;University of Montreal;New York University;Mila - Quebec Artificial Intelligence Institute;Mila - Quebec Artificial Intelligence Institute", "aff_domain": "mila.quebec;umontreal.ca;nyu.edu;mila.quebec;mila.quebec", "position": "Postdoc;PhD student;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nbredenberg2023formalizing,\ntitle={Formalizing locality for normative synaptic plasticity models},\nauthor={Colin Bredenberg and Ezekiel Williams and Cristina Savin and Blake Aaron Richards and Guillaume Lajoie},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rGN3X9jnEg}\n}", "github": "", "project": "", "reviewers": "p6bX;1DnT;GWia;X8HR", "pdf_size": 836210, "rating": "6;6;7;7", "confidence": "4;3;3;3", "soundness": "3;4;4;3", "novelty": "3;2;3;3", "presentation": "2;4;3;3", "wc_summary": "35;129;106;41", "wc_strengths": "97;107;74;27", "wc_weaknesses": "589;193;50;154", "wc_questions": "103;102;190;112", "wc_limitations": "11;43;1;3", "wc_review": "835;574;421;337", "wc_reply_reviewers": "249;54;132;32", "wc_reply_authors": "245;0;44;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 77.75, 40.62865368185365 ], "wc_strengths_avg": [ 76.25, 30.849432733844555 ], "wc_weaknesses_avg": [ 246.5, 204.5342269645841 ], "wc_questions_avg": [ 126.75, 36.72448093574639 ], "wc_limitations_avg": [ 14.5, 16.874537030686206 ], "wc_review_avg": [ 541.75, 189.43254076319622 ], "wc_reply_reviewers_avg": [ 116.75, 84.91576708715526 ], "wc_reply_authors_avg": [ 72.25, 101.34193357144909 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5268429568978814383&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "mila.quebec;umontreal.ca;nyu.edu;mila.quebec;mila.quebec", "author_num": 5, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Quebec AI Institute;University of Montreal;New York University;Quebec Artificial Intelligence Institute", "aff_unique_dep": "AI;;;Artificial Intelligence", "aff_unique_url": "https://mila.quebec;https://wwwumontreal.ca;https://www.nyu.edu;https://mila.quebec", "aff_unique_abbr": "Mila;UM;NYU;Mila", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Canada;United States" }, { "title": "Adversarial Counterfactual Environment Model Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70318", "id": "rHAX0LRwk8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/df927a06a0d9f5f06d9cd4a91ce58e56-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rHAX0LRwk8", "openreview": "https://openreview.net/forum?id=rHAX0LRwk8", "poster": "/media/PosterPDFs/NeurIPS%202023/70318.png?t=1702123805.360974", "slides": "https://nips.cc/virtual/2023/poster/70318", "video": "https://nips.cc/virtual/2023/poster/70318", "author_site": "Xiong-Hui Chen, Yang Yu, Zhengmao Zhu, ZhiHua Yu, Chen Zhenjun, Chenghe Wang, Yinan Wu, Rong-Jun Qin, Hongqiu Wu, Ruijin Ding, Huang Fangsheng", "tldr": "", "abstract": "An accurate environment dynamics model is crucial for various downstream tasks in sequential decision-making, such as counterfactual prediction, off-policy evaluation, and offline reinforcement learning. \nCurrently, these models were learned through empirical risk minimization (ERM) by step-wise fitting of historical transition data. This way was previously believed unreliable over long-horizon rollouts because of the compounding errors, which can lead to uncontrollable inaccuracies in predictions. In this paper, we find that the challenge extends beyond just long-term prediction errors: we reveal that even when planning with one step, learned dynamics models can also perform poorly due to the selection bias of behavior policies during data collection. \nThis issue will significantly mislead the policy optimization process even in identifying single-step optimal actions, further leading to a greater risk in sequential decision-making scenarios.\nTo tackle this problem, we introduce a novel model-learning objective called adversarial weighted empirical risk minimization (AWRM). AWRM incorporates an adversarial policy that exploits the model to generate a data distribution that weakens the model's prediction accuracy, and subsequently, the model is learned under this adversarial data distribution.\nWe implement a practical algorithm, GALILEO, for AWRM and evaluate it on two synthetic tasks, three continuous-control tasks, and \\textit{a real-world application}. The experiments demonstrate that GALILEO can accurately predict counterfactual actions and improve various downstream tasks, including offline policy evaluation and improvement, as well as online decision-making.", "keywords": "environment model learning;offline reinforcement learning;off-policy evaluation;individual treatment effects estimation;causal inference;adversarial learning", "primary_area": "", "supplementary_material": "", "author": "Xiong-Hui Chen;Yang Yu;Zhengmao Zhu;ZhiHua Yu;Chen Zhenjun;Chenghe Wang;Yinan Wu;Rong-Jun Qin;Hongqiu Wu;Ruijin Ding;Huang Fangsheng", "authorids": "~Xiong-Hui_Chen1;~Yang_Yu5;~Zhengmao_Zhu1;~ZhiHua_Yu2;~Chen_Zhenjun1;~Chenghe_Wang1;~Yinan_Wu2;~Rong-Jun_Qin1;~Hongqiu_Wu2;~Ruijin_Ding1;~Huang_Fangsheng1", "gender": "M;M;M;M;M;M;M;M;M;M;M", "homepage": "http://www.lamda.nju.edu.cn/chenxh/;https://lamda.nju.edu.cn/zhuzm;https://github.com/Heisenberg106;https://https://cs.nju.edu.cn/chenzj/index.htm;https://github.com/DrZero0;http://www.yinan.com;http://www.lamda.nju.edu.cn/qinrj/;http://www.lamda.nju.edu.cn/wuhq/;;;http://www.lamda.nju.edu.cn/yuy", "dblp": "241/7938;;26/8877;322/3650;181/7478;150/4842;;195/2044;;195/1649;46/2181-1", "google_scholar": "H5pguCYAAAAJ;;;;P3QBHrUAAAAJ;;;;https://scholar.google.com.hk/citations?user=APiHnPoAAAAJ;;PG2lDSwAAAAJ", "orcid": ";;;;;;;;;;", "linkedin": ";;;;;;;;;https://www.linkedin.cn/incareer/in/\u65b9\u80dc-\u9ec4-6047a8b7;", "or_profile": "~Xiong-Hui_Chen1;~Zhengmao_Zhu1;~ZhiHua_Yu2;~Chen_Zhenjun1;~Chenghe_Wang1;~Yinan_Wu2;~Rong-Jun_Qin1;~Hongqiu_Wu2;~Ruijin_Ding1;~Huang_Fangsheng1;~Yang_Yu2", "aff": "Nanjing University;Nanjing University;Meituan;Meituan;Nanjing University;Meituan;Nanjing University;;;meituan;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;meituan.com;meituan.com.cn;nju.edu.cn;meituan.com;nju.edu.cn;;;meituan.com;nju.edu.cn", "position": "PhD student;PhD student;Researcher;Researcher;MS student;Researcher;PhD student;;;Researcher;Professor", "bibtex": "@inproceedings{\nchen2023adversarial,\ntitle={Adversarial Counterfactual Environment Model Learning},\nauthor={Xiong-Hui Chen and Yang Yu and Zhengmao Zhu and ZhiHua Yu and Chen Zhenjun and Chenghe Wang and Yinan Wu and Rong-Jun Qin and Hongqiu Wu and Ruijin Ding and Huang Fangsheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rHAX0LRwk8}\n}", "github": "", "project": "", "reviewers": "So7i;wiCk;h4bm;qQGz;E1XE", "pdf_size": 12507205, "rating": "5;6;6;8;8", "confidence": "2;2;3;3;5", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "2;3;3;3;3", "wc_summary": "60;110;61;166;211", "wc_strengths": "53;107;52;178;132", "wc_weaknesses": "157;69;16;62;246", "wc_questions": "30;17;147;49;178", "wc_limitations": "1;20;1;6;78", "wc_review": "301;323;277;461;845", "wc_reply_reviewers": "18;16;0;29;187", "wc_reply_authors": "17;26;0;0;184", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;1;2", "rating_avg": [ 6.6, 1.2 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 121.6, 59.27090348560582 ], "wc_strengths_avg": [ 104.4, 48.11070566932063 ], "wc_weaknesses_avg": [ 110.0, 81.8852856134727 ], "wc_questions_avg": [ 84.2, 65.47488067954 ], "wc_limitations_avg": [ 21.2, 29.239699040858813 ], "wc_review_avg": [ 441.4, 211.6767346686924 ], "wc_reply_reviewers_avg": [ 50.0, 69.12307863514182 ], "wc_reply_authors_avg": [ 45.4, 70.02171091882859 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.7607257743127308, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16988655499564427098&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "nju.edu.cn;nju.edu.cn;meituan.com;meituan.com.cn;nju.edu.cn;meituan.com;nju.edu.cn;;;meituan.com;nju.edu.cn", "author_num": 11, "aff_unique_index": "0;0;1;1;0;1;0;1;0", "aff_unique_norm": "Nanjing University;Meituan", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.meituan.com", "aff_unique_abbr": "Nanjing U;Meituan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ARTIC3D: Learning Robust Articulated 3D Shapes from Noisy Web Image Collections", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70317", "id": "rJc5Lsn5QU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/96aca14d6c4dcd3adf54bc2c5ad7f138-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rJc5Lsn5QU", "openreview": "https://openreview.net/forum?id=rJc5Lsn5QU", "poster": "/media/PosterPDFs/NeurIPS%202023/70317.png?t=1699471114.918735", "slides": "https://nips.cc/virtual/2023/poster/70317", "video": "https://nips.cc/virtual/2023/poster/70317", "author_site": "Chun-Han Yao, Amit Raj, Wei-Chih Hung, Michael Rubinstein, Yuanzhen Li, Ming-Hsuan Yang, Varun Jampani", "tldr": "", "abstract": "Estimating 3D articulated shapes like animal bodies from monocular images is inherently challenging due to the ambiguities of camera viewpoint, pose, texture, lighting, etc. We propose ARTIC3D, a self-supervised framework to reconstruct per-instance 3D shapes from a sparse image collection in-the-wild. Specifically, ARTIC3D is built upon a skeleton-based surface representation and is further guided by 2D diffusion priors from Stable Diffusion. First, we enhance the input images with occlusions/truncation via 2D diffusion to obtain cleaner mask estimates and semantic features. Second, we perform diffusion-guided 3D optimization to estimate shape and texture that are of high-fidelity and faithful to input images. We also propose a novel technique to calculate more stable image-level gradients via diffusion models compared to existing alternatives. Finally, we produce realistic animations by fine-tuning the rendered shape and texture under rigid part transformations. Extensive evaluations on multiple existing datasets as well as newly introduced noisy web image collections with occlusions and truncation demonstrate that ARTIC3D outputs are more robust to noisy images, higher quality in terms of shape and texture details, and more realistic when animated.", "keywords": "3D articulated shape;animal body estimation;diffusion for 3D", "primary_area": "", "supplementary_material": "/attachment/af310ef84bcbef598eb9e0b923840bfc200d9181.zip", "author": "Chun-Han Yao;Amit Raj;Wei-Chih Hung;Michael Rubinstein;Yuanzhen Li;Ming-Hsuan Yang;Varun Jampani", "authorids": "~Chun-Han_Yao1;~Amit_Raj1;~Wei-Chih_Hung1;~Michael_Rubinstein1;~Yuanzhen_Li1;~Ming-Hsuan_Yang1;~Varun_Jampani2", "gender": "M;M;M;M;F;M;M", "homepage": "https://chhankyao.github.io/;https://amitraj93.github.io/;;http://people.csail.mit.edu/mrub/;http://people.csail.mit.edu/yzli/;https://faculty.ucmerced.edu/mhyang/;https://varunjampani.github.io/", "dblp": "184/9458;84/531;70/2879;16/1356;97/371;79/3711.html;124/2785", "google_scholar": "https://scholar.google.com/citations?hl=en;JVumcGEAAAAJ;AjaDLjYAAAAJ;ttBdcmsAAAAJ;k1eaag4AAAAJ;p9-ohHsAAAAJ;1Cv6Sf4AAAAJ", "orcid": ";;;;0000-0002-9831-8249;0000-0003-4848-2304;", "linkedin": ";;;;yuanzhen-yz-li-5561655/;minghsuanyang/;", "or_profile": "~Chun-Han_Yao1;~Amit_Raj1;~Wei-Chih_Hung1;~Michael_Rubinstein1;~Yuanzhen_Li1;~Ming-Hsuan_Yang1;~Varun_Jampani1", "aff": "University of California at Merced;Google DeepMind;Waymo;Google;Google;University of California at Merced;Google Research", "aff_domain": "ucmerced.edu;google.com;waymo.com;google.com;google.com;umcerced.edu;google.com", "position": "PhD student;Researcher;Researcher;Research Scientist;Software Engineer;Professor;Researcher", "bibtex": "@inproceedings{\nyao2023articd,\ntitle={{ARTIC}3D: Learning Robust Articulated 3D Shapes from Noisy Web Image Collections},\nauthor={Chun-Han Yao and Amit Raj and Wei-Chih Hung and Michael Rubinstein and Yuanzhen Li and Ming-Hsuan Yang and Varun Jampani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rJc5Lsn5QU}\n}", "github": "", "project": "", "reviewers": "atAr;GaMw;V4Hg;MsPL", "pdf_size": 6800423, "rating": "5;5;6;6", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "novelty": "2;2;3;2", "presentation": "2;3;3;3", "wc_summary": "91;72;97;58", "wc_strengths": "185;42;101;62", "wc_weaknesses": "117;119;140;162", "wc_questions": "157;72;1;62", "wc_limitations": "1;31;9;21", "wc_review": "551;336;348;365", "wc_reply_reviewers": "0;0;13;20", "wc_reply_authors": "102;102;102;38", "reply_reviewers": "0;0;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.5, 15.46770829825802 ], "wc_strengths_avg": [ 97.5, 54.79279149669234 ], "wc_weaknesses_avg": [ 134.5, 18.255136263528684 ], "wc_questions_avg": [ 73.0, 55.59226564909907 ], "wc_limitations_avg": [ 15.5, 11.434596626029272 ], "wc_review_avg": [ 400.0, 87.7866732482784 ], "wc_reply_reviewers_avg": [ 8.25, 8.613216588476108 ], "wc_reply_authors_avg": [ 86.0, 27.712812921102035 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4168976719467486927&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ucmerced.edu;google.com;waymo.com;google.com;google.com;umcerced.edu;google.com", "author_num": 7, "aff_unique_index": "0;1;2;1;1;0;1", "aff_unique_norm": "University of California, Merced;Google;Waymo", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://www.ucmerced.edu;https://deepmind.com;https://www.waymo.com", "aff_unique_abbr": "UC Merced;DeepMind;Waymo", "aff_campus_unique_index": "0;2;2;0;2", "aff_campus_unique": "Merced;;Mountain View", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Preconditioning Matters: Fast Global Convergence of Non-convex Matrix Factorization via Scaled Gradient Descent", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70316", "id": "rLpLjCBW4J", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f02f1185b97518ab5bd7ebde466992d3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rLpLjCBW4J", "openreview": "https://openreview.net/forum?id=rLpLjCBW4J", "poster": "/media/PosterPDFs/NeurIPS%202023/70316.png?t=1699694394.714109", "slides": "https://nips.cc/virtual/2023/poster/70316", "video": "https://nips.cc/virtual/2023/poster/70316", "author_site": "Xixi Jia, Hailin Wang, Jiangjun Peng, Xiangchu Feng, Deyu Meng", "tldr": "", "abstract": "Low-rank matrix factorization (LRMF) is a canonical problem in non-convex optimization, the objective function to be minimized is non-convex and even non-smooth, which makes the global convergence guarantee of gradient-based algorithm quite challenging. Recent work made a breakthrough on proving that standard gradient descent converges to the $\\varepsilon$-global minima after $O( \\frac{d \\kappa^2}{\\tau^2} {\\rm ln} \\frac{d \\sigma_d}{\\tau} + \\frac{d \\kappa^2}{\\tau^2} {\\rm ln} \\frac{\\sigma_d}{\\varepsilon})$ iterations from small initialization with a very small learning rate (both are related to the small constant $\\tau$). While the dependence of the convergence on the \\textit{condition number} $\\kappa$ and \\textit{small learning rate} makes it not practical especially for ill-conditioned LRMF problem.\n\nIn this paper, we show that precondition helps in accelerating the convergence and prove that the scaled gradient descent (ScaledGD) and its variant, alternating scaled gradient descent (AltScaledGD) converge to an $\\varepsilon$-global minima after $O( {\\rm ln} \\frac{d}{\\delta} + {\\rm ln} \\frac{d}{\\varepsilon})$ iterations from general random initialization. Meanwhile, for small initialization as in gradient descent, both ScaledGD and AltScaledGD converge to $\\varepsilon$-global minima after only $O({\\rm ln} \\frac{d}{\\varepsilon})$ iterations. Furthermore, we prove that as a proximity to the alternating minimization, AltScaledGD converges faster than ScaledGD, its global convergence does not rely on small learning rate and small initialization, which certificates the advantages of AltScaledGD in LRMF.", "keywords": "Non-convex optimization;matrix factorization;low rank;scaled gradient descent", "primary_area": "", "supplementary_material": "/attachment/b1caad111c1ca308d632e6a72278457071644a83.zip", "author": "Xixi Jia;Hailin Wang;Jiangjun Peng;Xiangchu Feng;Deyu Meng", "authorids": "~Xixi_Jia2;~Hailin_Wang2;~Jiangjun_Peng1;~Xiangchu_Feng1;~Deyu_Meng1", "gender": "M;M;M;M;M", "homepage": ";https://github.com/wanghailin97;https://teacher.nwpu.edu.cn/pengjj;;http://dymeng.gr.xjtu.edu.cn", "dblp": "216/9686;255/2196;;80/4229;22/5614", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=FdYTyjgAAAAJ;3crYjMoAAAAJ;https://scholar.google.com.hk/citations?user=SuG38OIAAAAJ;an6w-64AAAAJ", "orcid": ";0000-0002-7797-2719;0000-0001-9645-5154;;0000-0002-1294-8283", "linkedin": ";;;;", "or_profile": "~Xixi_Jia2;~Hailin_Wang2;~Jiangjun_Peng1;~Xiangchu_Feng1;~Deyu_Meng1", "aff": "Xidian University;Xi'an Jiaotong University;Xi'an Jiaotong University;;Xi'an Jiaotong University", "aff_domain": "xidian.edu.cn;xjtu.edu.cn;xjtu.edu.cn;;xjtu.edu.cn", "position": "Associate Professor;PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\njia2023preconditioning,\ntitle={Preconditioning Matters: Fast Global Convergence of Non-convex Matrix Factorization via Scaled Gradient Descent},\nauthor={Xixi Jia and Hailin Wang and Jiangjun Peng and Xiangchu Feng and Deyu Meng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rLpLjCBW4J}\n}", "github": "", "project": "", "reviewers": "beQc;Cp2y;eEnz;QnG3", "pdf_size": 742320, "rating": "4;6;6;7", "confidence": "4;4;2;2", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "1;3;2;3", "wc_summary": "88;98;114;249", "wc_strengths": "91;41;67;92", "wc_weaknesses": "148;120;81;270", "wc_questions": "1;88;179;20", "wc_limitations": "1;1;29;4", "wc_review": "329;348;470;635", "wc_reply_reviewers": "68;12;33;45", "wc_reply_authors": "561;110;36;66", "reply_reviewers": "2;1;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 137.25, 65.18195685924135 ], "wc_strengths_avg": [ 72.75, 20.8851023459307 ], "wc_weaknesses_avg": [ 154.75, 70.66602790591813 ], "wc_questions_avg": [ 72.0, 69.73162840490677 ], "wc_limitations_avg": [ 8.75, 11.755317945508747 ], "wc_review_avg": [ 445.5, 122.05429119863012 ], "wc_reply_reviewers_avg": [ 39.5, 20.254629100529094 ], "wc_reply_authors_avg": [ 193.25, 213.94552460848533 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9585921710538004493&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "xidian.edu.cn;xjtu.edu.cn;xjtu.edu.cn;;xjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Xidian University;Xi'an Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.xjtu.edu.cn", "aff_unique_abbr": "Xidian;XJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Continuous-time Analysis of Anchor Acceleration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70315", "id": "rN99gLCBe4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/678cffc05549fdabda971127602084c6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rN99gLCBe4", "openreview": "https://openreview.net/forum?id=rN99gLCBe4", "poster": "/media/PosterPDFs/NeurIPS%202023/70315.png?t=1701940038.8194964", "slides": "https://nips.cc/virtual/2023/poster/70315", "video": "https://nips.cc/virtual/2023/poster/70315", "author_site": "Jaewook Suh, Jisun Park, Ernest Ryu", "tldr": "", "abstract": "Recently, the anchor acceleration, an acceleration mechanism distinct from Nesterov's, has been discovered for minimax optimization and fixed-point problems, but its mechanism is not understood well, much less so than Nesterov acceleration. In this work, we analyze continuous-time models of anchor acceleration. We provide tight, unified analyses for characterizing the convergence rate as a function of the anchor coefficient $\\beta(t)$, thereby providing insight into the anchor acceleration mechanism and its accelerated $\\mathcal{O}(1/k^2)$-convergence rate. Finally, we present an adaptive method inspired by the continuous-time analyses and establish its effectiveness through theoretical analyses and experiments.", "keywords": "acceleration;convex optimization;continuous-time analysis;monotone operator;monotone inclusion;minimax optimization;fixed-point problem;anchor acceleration", "primary_area": "", "supplementary_material": "/attachment/88c4a30a23cd52ee23d4e4d6faa6c1e1342d7087.zip", "author": "Jaewook J. Suh;Jisun Park;Ernest K. Ryu", "authorids": "~Jaewook_J._Suh1;~Jisun_Park2;~Ernest_K._Ryu1", "gender": "M;F;M", "homepage": "https://jaewookjsuh.github.io/;https://jisunp515.github.io/;http://www.math.snu.ac.kr/~ernestryu/", "dblp": "323/9242;52/1945-3;165/5192", "google_scholar": "https://scholar.google.com/citations?hl=ko;ItMOLM4AAAAJ;CNOqUZoAAAAJ", "orcid": ";;0000-0001-6820-9095", "linkedin": ";;", "or_profile": "~Jaewook_J._Suh1;~Jisun_Park2;~Ernest_K._Ryu1", "aff": "Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsuh2023continuoustime,\ntitle={Continuous-time Analysis of Anchor Acceleration},\nauthor={Jaewook J. Suh and Jisun Park and Ernest K. Ryu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rN99gLCBe4}\n}", "github": "", "project": "", "reviewers": "aLVj;MP6a;KkKu;mK3f", "pdf_size": 1457005, "rating": "6;6;8;8", "confidence": "2;3;3;4", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "73;79;117;120", "wc_strengths": "124;62;33;150", "wc_weaknesses": "146;69;160;81", "wc_questions": "76;165;67;104", "wc_limitations": "29;6;2;15", "wc_review": "448;381;379;470", "wc_reply_reviewers": "11;76;31;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 97.25, 21.3819433167334 ], "wc_strengths_avg": [ 92.25, 46.82080200082011 ], "wc_weaknesses_avg": [ 114.0, 39.54111783953509 ], "wc_questions_avg": [ 103.0, 38.30796261875591 ], "wc_limitations_avg": [ 13.0, 10.36822067666386 ], "wc_review_avg": [ 419.5, 40.26474885057648 ], "wc_reply_reviewers_avg": [ 32.25, 26.54595072699413 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3301472757708477595&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Efficient Learning of Linear Graph Neural Networks via Node Subsampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70314", "id": "rQI3FOzo1f", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ada418ae9b6677dcda32d9dca0f7441f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rQI3FOzo1f", "openreview": "https://openreview.net/forum?id=rQI3FOzo1f", "poster": "/media/PosterPDFs/NeurIPS%202023/70314.png?t=1701986669.8085122", "slides": "https://nips.cc/virtual/2023/poster/70314", "video": "https://nips.cc/virtual/2023/poster/70314", "author_site": "Seiyun Shin, Ilan Shomorony, Han Zhao", "tldr": "", "abstract": "Graph Neural Networks (GNNs) are a powerful class of machine learning models with applications in recommender systems, drug discovery, social network analysis, and computer vision. One challenge with their implementation is that GNNs often take large-scale graphs as inputs, which imposes significant computational/storage costs in the training and testing phases. In particular, the message passing operations of a GNN require multiplication of the graph adjacency matrix $A \\in \\mathbb{R}^{n \\times n}$ and the data matrix $X \\in \\mathbb{R}^{n \\times d}$, and the $O(n^2 d)$ time complexity can be prohibitive for large $n$. Thus, a natural question is whether it is possible to perform the GNN operations in (quasi-)linear time by avoiding the full computation of $A X$. To study this question, we consider the setting of a regression task on a two-layer Linear Graph Convolutional Network (GCN). We develop an efficient training algorithm based on (1) performing node subsampling, (2) estimating the leverage scores of $A X$ based on the subsampled graph, and (3) performing leverage score sampling on $A X$. We show that our proposed scheme learns the regression model observing only $O(nd\\epsilon^{-2}\\log n)$ entries of $A$ in time $O(nd^2 \\epsilon^{-2}\\log n)$, with the guarantee that the learned weights deviate by at most $\\epsilon$ under the $\\ell_2$ norm from the model learned using the entire adjacency matrix $A$. We present empirical results for regression problems on real-world graphs and show that our algorithm significantly outperforms other baseline sampling strategies that exploit the same number of observations.", "keywords": "Graph neural networks;Random sampling;Regression", "primary_area": "", "supplementary_material": "/attachment/9e327a074317394fca5f017e7d3a4b4765ee1dad.zip", "author": "Seiyun Shin;Ilan Shomorony;Han Zhao", "authorids": "~Seiyun_Shin1;~Ilan_Shomorony1;~Han_Zhao1", "gender": "M;M;M", "homepage": "https://seiyun-shin.github.io/;http://www.ilanshomorony.com;https://hanzhaoml.github.io/", "dblp": "180/8229;31/9223;03/3520-2", "google_scholar": ";fMAg4zEAAAAJ;x942ipYAAAAJ", "orcid": ";;0000-0002-8579-1600", "linkedin": ";;", "or_profile": "~Seiyun_Shin1;~Ilan_Shomorony1;~Han_Zhao1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illinois.edu;illinois.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nshin2023efficient,\ntitle={Efficient Learning of Linear Graph Neural Networks via Node Subsampling},\nauthor={Seiyun Shin and Ilan Shomorony and Han Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rQI3FOzo1f}\n}", "github": "", "project": "", "reviewers": "S2HF;J15Y;XHGm;F1DR", "pdf_size": 1094894, "rating": "3;4;5;8", "confidence": "3;4;3;3", "soundness": "4;3;3;4", "novelty": "2;2;2;3", "presentation": "3;2;2;4", "wc_summary": "33;52;65;72", "wc_strengths": "40;32;63;315", "wc_weaknesses": "232;55;223;71", "wc_questions": "1;60;84;60", "wc_limitations": "1;1;61;18", "wc_review": "307;200;496;536", "wc_reply_reviewers": "38;202;47;96", "wc_reply_authors": "200;1195;21;28", "reply_reviewers": "1;2;1;1", "reply_authors": "3;5;2;2", "rating_avg": [ 5.0, 1.8708286933869707 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 55.5, 14.84082207965583 ], "wc_strengths_avg": [ 112.5, 117.46595251390933 ], "wc_weaknesses_avg": [ 145.25, 82.5056816225404 ], "wc_questions_avg": [ 51.25, 30.621683493890405 ], "wc_limitations_avg": [ 20.25, 24.529319191530774 ], "wc_review_avg": [ 384.75, 137.32329554740522 ], "wc_reply_reviewers_avg": [ 95.75, 65.19346209551998 ], "wc_reply_authors_avg": [ 361.0, 486.81772769692765 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3086066999241838, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=34479558594359279&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "illinois.edu;illinois.edu;illinois.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "RD-Suite: A Benchmark for Ranking Distillation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73447", "id": "rR1c6rzXHa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/701eba0f98c6f28ffee0de5969d8d034-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=rR1c6rzXHa", "openreview": "https://openreview.net/forum?id=rR1c6rzXHa", "poster": "/media/PosterPDFs/NeurIPS%202023/73447.png?t=1701825836.5996664", "slides": "https://nips.cc/virtual/2023/poster/73447", "video": "https://nips.cc/virtual/2023/poster/73447", "author_site": "Zhen Qin, Rolf Jagerman, Rama Kumar Pasumarthi, Honglei Zhuang, He Zhang, Aijun Bai, Kai Hui, Le Yan, Xuanhui Wang", "tldr": "", "abstract": "The distillation of ranking models has become an important topic in both academia and industry. In recent years, several advanced methods have been proposed to tackle this problem, often leveraging ranking information from teacher rankers that is absent in traditional classification settings. To date, there is no well-established consensus on how to evaluate this class of models. Moreover, inconsistent benchmarking on a wide range of tasks and datasets make it difficult to assess or invigorate advances in this field. This paper first examines representative prior arts on ranking distillation, and raises three questions to be answered around methodology and reproducibility. To that end, we propose a systematic and unified benchmark, Ranking Distillation Suite (RD-Suite), which is a suite of tasks with 4 large real-world datasets, encompassing two major modalities (textual and numeric) and two applications (standard distillation and distillation transfer). RD-Suite consists of benchmark results that challenge some of the common wisdom in the field, and the release of datasets with teacher scores and evaluation scripts for future research. RD-Suite paves the way towards better understanding of ranking distillation, facilities more research in this direction, and presents new challenges.", "keywords": "Ranking;Distillation;Benchmark;LLM", "primary_area": "", "supplementary_material": "/attachment/9d0c8f20555511cd3a07531e1fb290440b9baf01.pdf", "author": "Zhen Qin;Rolf Jagerman;Rama Kumar Pasumarthi;Honglei Zhuang;He Zhang;Aijun Bai;Kai Hui;Le Yan;Xuanhui Wang", "authorids": "~Zhen_Qin5;~Rolf_Jagerman2;~Rama_Kumar_Pasumarthi1;~Honglei_Zhuang1;~He_Zhang11;~Aijun_Bai1;~Kai_Hui1;~Le_Yan1;~Xuanhui_Wang1", "gender": "M;;M;M;M;M;M;M;M", "homepage": "http://alumni.cs.ucr.edu/~zqin001/;https://www.jagerman.nl/;;https://hongleizhuang.github.io/;;https://aijunbai.github.io/;https://khui.github.io/;;", "dblp": ";144/7357;125/3197;10/9988;;117/3251;37/10077;67/2358;67/2661", "google_scholar": "Kv1yk3YAAAAJ;4yjQ964AAAAJ;https://scholar.google.co.in/citations?user=aMUoVMMAAAAJ;FxEDj4wAAAAJ;;;VorTj3AAAAAJ;X_knTr4AAAAJ;", "orcid": "0000-0001-6739-134X;0000-0002-5169-495X;;0000-0001-8134-1509;;;0000-0002-3110-7404;;", "linkedin": ";;;;he-zhang-bb8a72a7/;;;;", "or_profile": "~Zhen_Qin5;~Rolf_Jagerman2;~Rama_Kumar_Pasumarthi1;~Honglei_Zhuang1;~He_Zhang11;~Aijun_Bai1;~Kai_Hui1;~Le_Yan1;~Xuanhui_Wang1", "aff": "Google Deepmind;Google;Google;Google DeepMind;Google;Google;Google;Google;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "position": "Researcher;Researcher;Researcher;Research Scientist;Software Engineer;Researcher;Software Engineer;Software Engineer;Software Engineer", "bibtex": "@inproceedings{\nqin2023rdsuite,\ntitle={{RD}-Suite: A Benchmark for Ranking Distillation},\nauthor={Zhen Qin and Rolf Jagerman and Rama Kumar Pasumarthi and Honglei Zhuang and He Zhang and Aijun Bai and Kai Hui and Le Yan and Xuanhui Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=rR1c6rzXHa}\n}", "github": "", "project": "", "reviewers": "KNat;uUSL;rCz5;1Yku", "pdf_size": 260983, "rating": "5;6;7;8", "confidence": "3;1;3;3", "wc_summary_and_contributions": "31;145;59;193", "wc_strengths": "27;107;59;30", "wc_improvement": "91;166;52;23", "wc_limitations": "16;4;1;15", "wc_correctness": "14;4;11;1", "wc_clarity": "9;4;10;1", "wc_relation_to_prior_work": "1;3;11;1", "wc_documentation": "6;4;11;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "196;438;215;266", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "167;173;51;67", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_and_contributions_avg": [ 107.0, 65.0384501660364 ], "wc_strengths_avg": [ 55.75, 32.119892590106836 ], "wc_improvement_avg": [ 83.0, 53.65165421494476 ], "wc_limitations_avg": [ 9.0, 6.59545297913646 ], "wc_correctness_avg": [ 7.5, 5.220153254455275 ], "wc_clarity_avg": [ 6.0, 3.6742346141747673 ], "wc_relation_to_prior_work_avg": [ 4.0, 4.123105625617661 ], "wc_documentation_avg": [ 5.5, 3.640054944640259 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 278.75, 95.43944415177616 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 114.5, 55.827860428284374 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18236420463804409800&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "author_num": 9, "aff_unique_index": "0;1;1;1;1;1;1;1;1", "aff_unique_norm": "DeepMind;Google", "aff_unique_dep": "DeepMind;Google", "aff_unique_url": "https://deepmind.com;https://www.google.com", "aff_unique_abbr": "DeepMind;Google", "aff_campus_unique_index": "1;1;1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;0;1;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Homotopy-based training of NeuralODEs for accurate dynamics discovery", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70313", "id": "rUFckPrzXR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cc56ae4929d792351a66c39aafb4a34d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rUFckPrzXR", "openreview": "https://openreview.net/forum?id=rUFckPrzXR", "poster": "/media/PosterPDFs/NeurIPS%202023/70313.png?t=1701954106.7886002", "slides": "https://nips.cc/virtual/2023/poster/70313", "video": "https://nips.cc/virtual/2023/poster/70313", "author_site": "Joon-Hyuk Ko, Hankyul Koh, Nojun Park, Wonho Jhe", "tldr": "", "abstract": "Neural Ordinary Differential Equations (NeuralODEs) present an attractive way to extract dynamical laws from time series data, as they bridge neural networks with the differential equation-based modeling paradigm of the physical sciences. However, these models often display long training times and suboptimal results, especially for longer duration data. While a common strategy in the literature imposes strong constraints to the NeuralODE architecture to inherently promote stable model dynamics, such methods are ill-suited for dynamics discovery as the unknown governing equation is not guaranteed to satisfy the assumed constraints. In this paper, we develop a new training method for NeuralODEs, based on synchronization and homotopy optimization, that does not require changes to the model architecture. We show that synchronizing the model dynamics and the training data tames the originally irregular loss landscape, which homotopy optimization can then leverage to enhance training. Through benchmark experiments, we demonstrate our method achieves competitive or better training loss while often requiring less than half the number of training epochs compared to other model-agnostic techniques. Furthermore, models trained with our method display better extrapolation capabilities, highlighting the effectiveness of our method.", "keywords": "neural ordinary differential equations;synchronization;homotopy optimization;loss landscape;dynamical systems", "primary_area": "", "supplementary_material": "/attachment/750389ac5abc835de0b5dd859eb63ab3c9389a2b.zip", "author": "Joon-Hyuk Ko;Hankyul Koh;Nojun Park;Wonho Jhe", "authorids": "~Joon-Hyuk_Ko1;~Hankyul_Koh1;~Nojun_Park1;~Wonho_Jhe1", "gender": "M;M;M;M", "homepage": "https://www.kias.re.kr/kias/people/faculty/viewMember.do?memberId=11024&menuNo=403021;;http://jhe.snu.ac.kr/?p=1278;http://jhe.snu.ac.kr/", "dblp": "281/7112;330/4266;281/6879;226/8082", "google_scholar": "t7lTWNQAAAAJ;https://scholar.google.com/citations?view_op=list_works;;z_c9ABQAAAAJ", "orcid": "0000-0001-9283-3859;;;0000-0002-4716-5449", "linkedin": "joon-hyuk-ko-577843271/;hankyul-koh-015177277/;;wonho-jhe-861457b3/", "or_profile": "~Joon-Hyuk_Ko1;~Hankyul_Koh1;~Nojun_Park1;~Wonho_Jhe1", "aff": "Seoul National University;Seoul National University;Massachusetts Institute of Technology;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;mit.edu;snu.ac.kr", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nko2023homotopybased,\ntitle={Homotopy-based training of Neural{ODE}s for accurate dynamics discovery},\nauthor={Joon-Hyuk Ko and Hankyul Koh and Nojun Park and Wonho Jhe},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rUFckPrzXR}\n}", "github": "", "project": "", "reviewers": "U4Km;bK9U;jYQA;9EGS;84Va", "pdf_size": 19997126, "rating": "6;6;6;6;7", "confidence": "4;3;4;3;4", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;4;4;4", "wc_summary": "74;96;33;83;186", "wc_strengths": "98;58;41;209;112", "wc_weaknesses": "97;43;71;2;88", "wc_questions": "28;93;113;2;45", "wc_limitations": "7;29;5;2;19", "wc_review": "304;319;263;298;450", "wc_reply_reviewers": "55;14;141;11;0", "wc_reply_authors": "0;0;173;0;0", "reply_reviewers": "1;1;2;1;0", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 94.4, 50.41666391184565 ], "wc_strengths_avg": [ 103.6, 58.66719696730022 ], "wc_weaknesses_avg": [ 60.2, 34.42905749508691 ], "wc_questions_avg": [ 56.2, 41.08235631022155 ], "wc_limitations_avg": [ 12.4, 10.11137972781163 ], "wc_review_avg": [ 326.8, 64.27876787867048 ], "wc_reply_reviewers_avg": [ 44.2, 51.87446385265105 ], "wc_reply_authors_avg": [ 34.6, 69.2 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18035061862011802574&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "snu.ac.kr;snu.ac.kr;mit.edu;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Seoul National University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://web.mit.edu", "aff_unique_abbr": "SNU;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Locality Sensitive Hashing in Fourier Frequency Domain For Soft Set Containment Search", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70312", "id": "rUf0GV5CuU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b016cbec36ff7118db303229c9048733-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rUf0GV5CuU", "openreview": "https://openreview.net/forum?id=rUf0GV5CuU", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70312", "video": "https://nips.cc/virtual/2023/poster/70312", "author_site": "Indradyumna Roy, Rishi Agarwal, Soumen Chakrabarti, Anirban Dasgupta, Abir De", "tldr": "", "abstract": "In many search applications related to passage retrieval, text entailment, and subgraph search, the query and each 'document' is a set of elements, with a document being relevant if it contains the query. These elements are not represented by atomic IDs, but by embedded representations, thereby extending set containment to *soft* set containment. Recent applications address soft set containment by encoding sets into fixed-size vectors and checking for elementwise *vector* *dominance*. This 0/1 property can be relaxed to an asymmetric *hinge* *distance* for scoring and ranking candidate documents. Here we focus on data-sensitive, trainable indices for fast retrieval of relevant documents. Existing LSH methods are designed for mostly symmetric or few simple asymmetric distance functions, which are not suitable for hinge distance. Instead, we transform hinge distance into a proposed *dominance* *similarity* measure, to which we then apply a Fourier transform, thereby expressing dominance similarity as an expectation of inner products of functions in the frequency domain. Next, we approximate the expectation with an importance-sampled estimate. The overall consequence is that now we can use a traditional LSH, but in the frequency domain. To ensure that the LSH uses hash bits efficiently, we learn hash functions that are sensitive to both corpus and query distributions, mapped to the frequency domain. Our experiments show that the proposed asymmetric dominance similarity is critical to the targeted applications, and that our LSH, which we call FourierHashNet, provides a better query time vs. retrieval quality trade-off, compared to several baselines. Both the Fourier transform and the trainable hash codes contribute to performance gains.", "keywords": "Locality sensitive hashing;Fourier transform;Order embeddings", "primary_area": "", "supplementary_material": "", "author": "Indradyumna Roy;Rishi Agarwal;Soumen Chakrabarti;Anirban Dasgupta;Abir De", "authorids": "~Indradyumna_Roy1;~Rishi_Agarwal1;~Soumen_Chakrabarti1;~Anirban_Dasgupta1;~Abir_De1", "gender": "M;M;Not Specified;M;M", "homepage": "https://indradyumna.github.io/;https://rishiagarwal2000.github.io;https://www.cse.iitb.ac.in/~soumen/;https://sites.google.com/site/anirbandasgupta;", "dblp": "124/9185.html;;c/SChakrabarti;54/385-1;118/7174", "google_scholar": "qb70i84AAAAJ;mKJs6cAAAAAJ;https://scholar.google.com.tw/citations?user=LfF2zfQAAAAJ;plJC8R0AAAAJ;https://scholar.google.co.in/citations?user=_9ZKKbIAAAAJ", "orcid": ";0000-0002-1284-2593;;;", "linkedin": ";rishi-agarwal-a473a2202/?originalSubdomain=in;;;", "or_profile": "~Indradyumna_Roy1;~Rishi_Agarwal1;~Soumen_Chakrabarti1;~Anirban_Dasgupta1;~Abir_De1", "aff": "Indian Institute of Technology Bombay;Stanford University;Indian Institute of Technology Bombay;IIT Gandhinagar;Indian Institute of Technology Bombay,", "aff_domain": "iitb.ac.in;stanford.edu;iitb.ac.in;iitgn.ac.in;iitb.ac.in", "position": "PhD student;MS student;Professor;Professor;Assistant Professor", "bibtex": "@inproceedings{\nroy2023locality,\ntitle={Locality Sensitive Hashing in Fourier Frequency Domain For Soft Set Containment Search},\nauthor={Indradyumna Roy and Rishi Agarwal and Soumen Chakrabarti and Anirban Dasgupta and Abir De},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rUf0GV5CuU}\n}", "github": "", "project": "", "reviewers": "wHYk;AbHb;2JDE", "pdf_size": 4147653, "rating": "6;6;7", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "3;4;3", "presentation": "3;2;3", "wc_summary": "71;221;69", "wc_strengths": "88;227;63", "wc_weaknesses": "1;624;12", "wc_questions": "17;342;7", "wc_limitations": "7;4;11", "wc_review": "184;1418;162", "wc_reply_reviewers": "0;636;0", "wc_reply_authors": "0;656;0", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 120.33333333333333, 71.18676531178781 ], "wc_strengths_avg": [ 126.0, 72.1433757642839 ], "wc_weaknesses_avg": [ 212.33333333333334, 291.1269291716053 ], "wc_questions_avg": [ 122.0, 155.61705133649934 ], "wc_limitations_avg": [ 7.333333333333333, 2.8674417556808756 ], "wc_review_avg": [ 588.0, 586.9673471894895 ], "wc_reply_reviewers_avg": [ 212.0, 299.8132752230961 ], "wc_reply_authors_avg": [ 218.66666666666666, 309.2413656389168 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1323224082570810849&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "iitb.ac.in;stanford.edu;iitb.ac.in;iitgn.ac.in;iitb.ac.in", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Indian Institute of Technology Bombay;Stanford University;Indian Institute of Technology Gandhinagar", "aff_unique_dep": ";;", "aff_unique_url": "https://www.iitb.ac.in;https://www.stanford.edu;https://www.iitgn.ac.in", "aff_unique_abbr": "IIT Bombay;Stanford;IITGN", "aff_campus_unique_index": "0;1;0;2;0", "aff_campus_unique": "Bombay;Stanford;Gandhinagar", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "India;United States" }, { "title": "PRED: Pre-training via Semantic Rendering on LiDAR Point Clouds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70311", "id": "rUldfB4SPT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/903f778fe1341e5351b5b63e0e6b197f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rUldfB4SPT", "openreview": "https://openreview.net/forum?id=rUldfB4SPT", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70311", "video": "https://nips.cc/virtual/2023/poster/70311", "author_site": "Hao Yang, Haiyang Wang, Di Dai, Liwei Wang", "tldr": "", "abstract": "Pre-training is crucial in 3D-related fields such as autonomous driving where point cloud annotation is costly and challenging. Many recent studies on point cloud pre-training, however, have overlooked the issue of incompleteness, where only a fraction of the points are captured by LiDAR, leading to ambiguity during the training phase. On the other hand, images offer more comprehensive information and richer semantics that can bolster point cloud encoders in addressing the incompleteness issue inherent in point clouds. Yet, incorporating images into point cloud pre-training presents its own challenges due to occlusions, potentially causing misalignments between points and pixels. In this work, we propose PRED, a novel image-assisted pre-training framework for outdoor point clouds in an occlusion-aware manner. The main ingredient of our framework is a Birds-Eye-View (BEV) feature map conditioned semantic rendering, leveraging the semantics of images for supervision through neural rendering. We further enhance our model's performance by incorporating point-wise masking with a high mask ratio (95%). Extensive experiments demonstrate PRED's superiority over prior point cloud pre-training methods, providing significant improvements on various large-scale datasets for 3D perception tasks. Codes will be available at https://github.com/PRED4pc/PRED.", "keywords": "Pre-Train;Autonomous Driving;LiDAR;3D Object Detection", "primary_area": "", "supplementary_material": "/attachment/acd5ac16cc6c58bc1d40de10047aafde8f4c0765.pdf", "author": "Hao Yang;Haiyang Wang;Di Dai;Liwei Wang", "authorids": "~Hao_Yang16;~Haiyang_Wang2;~Di_Dai1;~Liwei_Wang1", "gender": ";M;M;M", "homepage": "https://github.com/haoy945;https://scholar.google.com/citations?user=R3Av3IkAAAAJ&hl=en;https://github.com/Ericdd90/PKU;http://www.liweiwang-pku.com/", "dblp": ";22/3326;175/8881;", "google_scholar": ";R3Av3IkAAAAJ;;VZHxoh8AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Hao_Yang16;~Haiyang_Wang2;~Di_Dai1;~Liwei_Wang1", "aff": "Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "MS student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nyang2023pred,\ntitle={{PRED}: Pre-training via Semantic Rendering on Li{DAR} Point Clouds},\nauthor={Hao Yang and Haiyang Wang and Di Dai and Liwei Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rUldfB4SPT}\n}", "github": "", "project": "", "reviewers": "sbFA;8P8s;4EsX;73N2;4GhG", "pdf_size": 3573389, "rating": "5;5;5;8;8", "confidence": "5;4;3;4;5", "soundness": "3;2;2;3;3", "novelty": "2;3;3;4;3", "presentation": "4;3;3;2;3", "wc_summary": "47;73;127;146;60", "wc_strengths": "56;49;155;78;54", "wc_weaknesses": "151;69;110;235;140", "wc_questions": "62;15;6;160;18", "wc_limitations": "5;34;6;8;3", "wc_review": "321;240;404;627;275", "wc_reply_reviewers": "55;76;14;24;168", "wc_reply_authors": "53;53;0;22;163", "reply_reviewers": "1;1;1;1;2", "reply_authors": "2;2;1;2;3", "rating_avg": [ 6.2, 1.469693845669907 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 90.6, 38.836065712170175 ], "wc_strengths_avg": [ 78.4, 39.57069622839609 ], "wc_weaknesses_avg": [ 141.0, 54.92176253544673 ], "wc_questions_avg": [ 52.2, 57.27966480348851 ], "wc_limitations_avg": [ 11.2, 11.513470371699404 ], "wc_review_avg": [ 373.4, 138.183356450768 ], "wc_reply_reviewers_avg": [ 67.4, 54.92394741822551 ], "wc_reply_authors_avg": [ 58.2, 56.09777179175658 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.32732683535398854, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10055242312335436020&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Wasserstein Gradient Flows for Optimizing Gaussian Mixture Policies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70310", "id": "rW4mNcDxpS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/429b5216a4d08850c586fbf809e17877-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rW4mNcDxpS", "openreview": "https://openreview.net/forum?id=rW4mNcDxpS", "poster": "/media/PosterPDFs/NeurIPS%202023/70310.png?t=1701713466.8607864", "slides": "https://nips.cc/virtual/2023/poster/70310", "video": "https://nips.cc/virtual/2023/poster/70310", "author_site": "Hanna Ziesche, Leonel Rozo", "tldr": "", "abstract": "Robots often rely on a repertoire of previously-learned motion policies for performing tasks of diverse complexities. When facing unseen task conditions or when new task requirements arise, robots must adapt their motion policies accordingly. In this context, policy optimization is the \\emph{de facto} paradigm to adapt robot policies as a function of task-specific objectives. Most commonly-used motion policies carry particular structures that are often overlooked in policy optimization algorithms. We instead propose to leverage the structure of probabilistic policies by casting the policy optimization as an optimal transport problem. Specifically, we focus on robot motion policies that build on Gaussian mixture models (GMMs) and formulate the policy optimization as a Wassertein gradient flow over the GMMs space. This naturally allows us to constrain the policy updates via the $L^2$-Wasserstein distance between GMMs to enhance the stability of the policy optimization process. Furthermore, we leverage the geometry of the Bures-Wasserstein manifold to optimize the Gaussian distributions of the GMM policy via Riemannian optimization. We evaluate our approach on common robotic settings: Reaching motions, collision-avoidance behaviors, and multi-goal tasks. Our results show that our method outperforms common policy optimization baselines in terms of task success rate and low-variance solutions.", "keywords": "Policy optimization;robot learning;reinforcement learning;Gaussian mixture models;optimal transport;robotics", "primary_area": "", "supplementary_material": "/attachment/b758d8ef9eae92e2f5b0e345d7ad0c25d6c3e855.zip", "author": "Hanna Ziesche;Leonel Rozo", "authorids": "~Hanna_Ziesche1;~Leonel_Rozo1", "gender": "F;M", "homepage": ";https://leonelrozo.weebly.com/", "dblp": "284/0793;10/9515", "google_scholar": ";https://scholar.google.it/citations?user=vLWgi-YAAAAJ", "orcid": "0000-0003-2042-3660;0000-0001-5970-9135", "linkedin": ";leonelrozo/", "or_profile": "~Hanna_Carolin_Maria_Ziesche1;~Leonel_Dario_Rozo1", "aff": "Robert Bosch GmbH, Bosch;Robert Bosch GmbH, Bosch", "aff_domain": "de.bosch.com;de.bosch.com", "position": "Research Scientist;Principal Researcher", "bibtex": "@inproceedings{\nziesche2023wasserstein,\ntitle={Wasserstein Gradient Flows for Optimizing Gaussian Mixture Policies},\nauthor={Hanna Ziesche and Leonel Rozo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rW4mNcDxpS}\n}", "github": "", "project": "", "reviewers": "UKYq;uu8b;vzrR;zWwA", "pdf_size": 10230587, "rating": "4;5;7;7", "confidence": "4;3;3;5", "soundness": "4;3;3;4", "novelty": "1;2;3;3", "presentation": "4;3;3;4", "wc_summary": "49;80;105;70", "wc_strengths": "28;26;239;103", "wc_weaknesses": "461;115;83;146", "wc_questions": "170;2;1;28", "wc_limitations": "33;40;1;0", "wc_review": "741;263;429;347", "wc_reply_reviewers": "874;15;60;35", "wc_reply_authors": "1782;26;94;36", "reply_reviewers": "2;1;2;1", "reply_authors": "5;2;3;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 76.0, 20.137030565602267 ], "wc_strengths_avg": [ 99.0, 86.58233076095838 ], "wc_weaknesses_avg": [ 201.25, 151.6119635780765 ], "wc_questions_avg": [ 50.25, 69.9799078307481 ], "wc_limitations_avg": [ 18.5, 18.172781845386247 ], "wc_review_avg": [ 445.0, 180.6931099959265 ], "wc_reply_reviewers_avg": [ 246.0, 362.92630105849315 ], "wc_reply_authors_avg": [ 484.5, 749.5617052651503 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6765568015249323471&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "de.bosch.com;de.bosch.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Robert Bosch GmbH", "aff_unique_dep": "", "aff_unique_url": "https://www.bosch.com", "aff_unique_abbr": "Bosch", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "OceanBench: The Sea Surface Height Edition", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73446", "id": "rXi13M3PKc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f6ccbf94fa57c2ae372ece91b537574d-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=rXi13M3PKc", "openreview": "https://openreview.net/forum?id=rXi13M3PKc", "poster": "/media/PosterPDFs/NeurIPS%202023/73446.png?t=1701429382.999261", "slides": "https://nips.cc/virtual/2023/poster/73446", "video": "https://nips.cc/virtual/2023/poster/73446", "author_site": "J. Emmanuel Johnson, Quentin Febvre, Anastasiia Gorbunova, Sam Metref, Maxime Ballarotta, Julien Le Sommer, ronan fablet", "tldr": "", "abstract": "The ocean is a crucial component of the Earth's system. \nIt profoundly influences human activities and plays a critical role in climate regulation. \nOur understanding has significantly improved over the last decades with the advent of satellite remote sensing data, allowing us to capture essential sea surface quantities over the globe, e.g., sea surface height (SSH). \nDespite their ever-increasing abundance, ocean satellite data presents challenges for information extraction due to their sparsity and irregular sampling, signal complexity, and noise. \nMachine learning (ML) techniques have demonstrated their capabilities in dealing with large-scale, complex signals. \nTherefore we see an opportunity for these ML models to harness the full extent of the information contained in ocean satellite data. \nHowever, data representation and relevant evaluation metrics can be the defining factors when determining the success of applied ML. \nThe processing steps from the raw observation data to a ML-ready state and from model outputs to interpretable quantities require domain expertise, which can be a significant barrier to entry for ML researchers. \nIn addition, imposing fixed processing steps, like committing to specific variables, regions, and geometries, will narrow the scope of ML models and their potential impact on real-world applications. \nOceanBench is a unifying framework that provides standardized processing steps that comply with domain-expert standards. \nIt is designed with a flexible and pedagogical abstraction: it a) provides plug-and-play data and pre-configured pipelines for ML researchers to benchmark their models w.r.t. ML and domain-related baselines and b) provides a transparent and configurable framework for researchers to customize and extend the pipeline for their tasks. \nIn this work, we demonstrate the OceanBench framework through a first edition dedicated to SSH interpolation challenges. \nWe provide datasets and ML-ready benchmarking pipelines for the long-standing problem of interpolating observations from simulated ocean satellite data, multi-modal and multi-sensor fusion issues, and transfer-learning to real ocean satellite observations. \nThe OceanBench framework is available at https://github.com/jejjohnson/oceanbench and the dataset registry is available at https://github.com/quentinf00/oceanbench-data-registry.", "keywords": "Sea Surface Height;Ocean Satellite Data;Geoprocessing Pipeline;Interpolation;Inverse Problems", "primary_area": "", "supplementary_material": "/attachment/bd80d0838caad84eebe95272e86a00988228bc3a.pdf", "author": "Juan Emmanuel Johnson;Quentin Febvre;Anastasia Gorbunova;Sammy Metref;Maxime Ballarotta;Julien Le Sommer;Ronan Fablet", "authorids": "~Juan_Emmanuel_Johnson1;~Quentin_Febvre1;gorbunoa@univ-grenoble-alpes.fr;metrefs@univ-grenoble-alpes.fr;mballarotta@groupcls.com;julien.lesommer@univ-grenoble-alpes.fr;~Ronan_Fablet2", "gender": "M;M;;;;;", "homepage": "https://jejjohnson.netlify.app/;;;;;;", "dblp": ";;;;;;", "google_scholar": "h-wdX7gAAAAJ;;;;;;", "orcid": ";0000-0002-4989-3036;;;;;", "linkedin": "jejjohnson/;;;;;;", "or_profile": "~Juan_Emmanuel_Johnson1;~Quentin_Febvre1;gorbunoa@univ-grenoble-alpes.fr;metrefs@univ-grenoble-alpes.fr;mballarotta@groupcls.com;julien.lesommer@univ-grenoble-alpes.fr;~Ronan_Fablet2", "aff": "Universit\u00e9 Grenoble Alpes;IMT Atlantique;;;;;", "aff_domain": "univ-grenoble-alpes.fr;imt-atlantique.fr;;;;;", "position": "Postdoc;PhD student;;;;;", "bibtex": "@inproceedings{\njohnson2023oceanbench,\ntitle={OceanBench: The Sea Surface Height Edition},\nauthor={Juan Emmanuel Johnson and Quentin Febvre and Anastasia Gorbunova and Sammy Metref and Maxime Ballarotta and Julien Le Sommer and Ronan Fablet},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=rXi13M3PKc}\n}", "github": "", "project": "", "reviewers": "DJRc;fpi1;t7YW;nYg2", "pdf_size": 4869128, "rating": "6;6;7;7", "confidence": "2;4;3;4", "wc_summary_and_contributions": "17;61;52;53", "wc_strengths": "10;123;36;95", "wc_improvement": "91;73;77;176", "wc_limitations": "9;67;9;71", "wc_correctness": "39;47;8;6", "wc_clarity": "28;36;10;21", "wc_relation_to_prior_work": "21;61;26;71", "wc_documentation": "22;26;9;68", "wc_additional_feedback": "1;1;1;1", "wc_review": "238;495;228;562", "wc_reply_reviewers": "21;0;23;33", "wc_reply_authors": "404;655;552;1110", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 45.75, 16.96135312998347 ], "wc_strengths_avg": [ 66.0, 45.07216435894775 ], "wc_improvement_avg": [ 104.25, 41.96054694591099 ], "wc_limitations_avg": [ 39.0, 30.033314835362415 ], "wc_correctness_avg": [ 25.0, 18.23458252881047 ], "wc_clarity_avg": [ 23.75, 9.54921462739214 ], "wc_relation_to_prior_work_avg": [ 44.75, 21.614520582238228 ], "wc_documentation_avg": [ 31.25, 22.128883839904805 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 380.75, 149.6786140368757 ], "wc_reply_reviewers_avg": [ 19.25, 12.007809958522827 ], "wc_reply_authors_avg": [ 680.25, 263.6687078513489 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5484530639000749358&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "univ-grenoble-alpes.fr;imt-atlantique.fr;;;;;", "author_num": 7, "aff_unique_index": "0;1", "aff_unique_norm": "Universit\u00e9 Grenoble Alpes;IMT Atlantique", "aff_unique_dep": ";", "aff_unique_url": "https://www.univ-grenoble-alpes.fr;https://www.imt-atlantique.fr", "aff_unique_abbr": "UGA;IMT Atlantique", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "On the impact of activation and normalization in obtaining isometric embeddings at initialization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70309", "id": "rY4sA9qYKy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7d535a224c8ae54ba75bac0457b6b279-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rY4sA9qYKy", "openreview": "https://openreview.net/forum?id=rY4sA9qYKy", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70309", "video": "https://nips.cc/virtual/2023/poster/70309", "author_site": "Amir Joudaki, Hadi Daneshmand, Francis Bach", "tldr": "", "abstract": "In this paper, we explore the structure of the penultimate Gram matrix in deep neural networks, which contains the pairwise inner products of outputs corresponding to a batch of inputs. In several architectures it has been observed that this Gram matrix becomes degenerate with depth at initialization, which dramatically slows training. Normalization layers, such as batch or layer normalization, play a pivotal role in preventing the rank collapse issue. Despite promising advances, the existing theoretical results do not extend to layer normalization, which is widely used in transformers, and can not quantitatively characterize the role of non-linear activations. To bridge this gap, we prove that layer normalization, in conjunction with activation layers, biases the Gram matrix of a multilayer perceptron towards the identity matrix at an exponential rate with depth at initialization. We quantify this rate using the Hermite expansion of the activation function.", "keywords": "dynamical isometry;Lyapunov analysis;random neural networks", "primary_area": "", "supplementary_material": "/attachment/921350c854baf93294c1e85df929f37ad0231cee.zip", "author": "Amir Joudaki;Hadi Daneshmand;Francis Bach", "authorids": "~Amir_Joudaki1;~Hadi_Daneshmand1;~Francis_Bach1", "gender": ";;M", "homepage": ";;http://www.di.ens.fr/~fbach", "dblp": ";;b/FrancisRBach", "google_scholar": ";;https://scholar.google.fr/citations?user=6PJWcFEAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Amir_Joudaki1;~Hadi_Daneshmand1;~Francis_Bach1", "aff": ";;Ecole Normale Superieure", "aff_domain": ";;ens.fr", "position": ";;Faculty", "bibtex": "@inproceedings{\njoudaki2023on,\ntitle={On the impact of activation and normalization in obtaining isometric embeddings at initialization},\nauthor={Amir Joudaki and Hadi Daneshmand and Francis Bach},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rY4sA9qYKy}\n}", "github": "", "project": "", "reviewers": "Qj2n;nfuS;D7v5;tNrQ;B3t5", "pdf_size": 785064, "rating": "5;5;6;6;7", "confidence": "3;4;3;3;3", "soundness": "3;2;3;2;4", "novelty": "2;2;3;2;4", "presentation": "2;2;2;2;2", "wc_summary": "13;32;40;89;46", "wc_strengths": "26;10;66;122;44", "wc_weaknesses": "53;63;31;181;252", "wc_questions": "108;2;38;100;26", "wc_limitations": "66;2;55;10;1", "wc_review": "266;109;230;502;369", "wc_reply_reviewers": "169;4;122;238;28", "wc_reply_authors": "252;27;112;538;44", "reply_reviewers": "1;1;2;2;1", "reply_authors": "2;2;3;3;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 44.0, 25.099800796022265 ], "wc_strengths_avg": [ 53.6, 38.95433223660752 ], "wc_weaknesses_avg": [ 116.0, 85.70180861568792 ], "wc_questions_avg": [ 54.8, 41.88746829303485 ], "wc_limitations_avg": [ 26.8, 27.909854890342945 ], "wc_review_avg": [ 295.2, 132.65504136669665 ], "wc_reply_reviewers_avg": [ 112.2, 87.11463711684736 ], "wc_reply_authors_avg": [ 194.6, 189.10906905804384 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5345224838248487, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7497660875798354010&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";;ens.fr", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Ecole Normale Superieure", "aff_unique_dep": "", "aff_unique_url": "https://www.ens.fr", "aff_unique_abbr": "ENS", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "title": "Swarm Reinforcement Learning for Adaptive Mesh Refinement", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70308", "id": "rZqRu8e4uc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e85454a113e8b41e017c81875ae68d47-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rZqRu8e4uc", "openreview": "https://openreview.net/forum?id=rZqRu8e4uc", "poster": "/media/PosterPDFs/NeurIPS%202023/70308.png?t=1701442294.5344262", "slides": "https://nips.cc/virtual/2023/poster/70308", "video": "https://nips.cc/virtual/2023/poster/70308", "author_site": "Niklas Freymuth, Philipp Dahlinger, Tobias W\u00fcrth, Simon Reisch, Luise K\u00e4rger, Gerhard Neumann", "tldr": "", "abstract": "The Finite Element Method, an important technique in engineering, is aided by Adaptive Mesh Refinement (AMR), which dynamically refines mesh regions to allow for a favorable trade-off between computational speed and simulation accuracy. Classical methods for AMR depend on task-specific heuristics or expensive error estimators, hindering their use for complex simulations. Recent learned AMR methods tackle these problems, but so far scale only to simple toy examples. We formulate AMR as a novel Adaptive Swarm Markov Decision Process in which a mesh is modeled as a system of simple collaborating agents that may split into multiple new agents. This framework allows for a spatial reward formulation that simplifies the credit assignment problem, which we combine with Message Passing Networks to propagate information between neighboring mesh elements. We experimentally validate the effectiveness of our approach, Adaptive Swarm Mesh Refinement (ASMR), showing that it learns reliable, scalable, and efficient refinement strategies on a set of challenging problems. Our approach significantly speeds up computation, achieving up to 30-fold improvement compared to uniform refinements in complex simulations. Additionally, we outperform learned baselines and achieve a refinement quality that is on par with a traditional error-based AMR strategy without expensive oracle information about the error signal.", "keywords": "Adaptive Mesh Refinement;Finite Element Method;Swarm Reinforcement Learning;Graph Neural Networks", "primary_area": "", "supplementary_material": "/attachment/14c0af622e891ff16b068bfa52dcb8eeb927d15f.zip", "author": "Niklas Freymuth;Philipp Dahlinger;Tobias Daniel W\u00fcrth;Simon Reisch;Luise K\u00e4rger;Gerhard Neumann", "authorids": "~Niklas_Freymuth1;~Philipp_Dahlinger1;~Tobias_Daniel_W\u00fcrth1;simon.reisch@student.kit.edu;luise.kaerger@kit.edu;~Gerhard_Neumann2", "gender": "M;M;;;;", "homepage": ";https://alr.anthropomatik.kit.edu/21.php;https://www.fast.kit.edu/lbt/5192_15949.php;;;", "dblp": "255/7209;307/5318;344/3384;;;", "google_scholar": "FK1DbrcAAAAJ;K0jR3YUAAAAJ;;;;", "orcid": ";;0000-0003-0671-6133;;;", "linkedin": ";;;;;", "or_profile": "~Niklas_Freymuth1;~Philipp_Dahlinger1;~Tobias_Daniel_W\u00fcrth1;simon.reisch@student.kit.edu;luise.kaerger@kit.edu;~Gerhard_Neumann2", "aff": "Karlsruhe Institute of Technology;Karlsruhe Institute of Technology;Karlsruher Institut f\u00fcr Technologie;;;", "aff_domain": "kit.edu;kit.edu;kit.edu;;;", "position": "PhD student;PhD student;PhD student;;;", "bibtex": "@inproceedings{\nfreymuth2023swarm,\ntitle={Swarm Reinforcement Learning for Adaptive Mesh Refinement},\nauthor={Niklas Freymuth and Philipp Dahlinger and Tobias Daniel W{\\\"u}rth and Simon Reisch and Luise K{\\\"a}rger and Gerhard Neumann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rZqRu8e4uc}\n}", "github": "", "project": "", "reviewers": "qjoP;Tf6s;LCc4;ymhS;fwaD", "pdf_size": 16877547, "rating": "4;4;6;6;6", "confidence": "5;4;3;3;3", "soundness": "2;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;4;2;3", "wc_summary": "86;111;126;100;69", "wc_strengths": "166;43;279;74;71", "wc_weaknesses": "595;40;195;232;71", "wc_questions": "78;1;301;61;19", "wc_limitations": "25;70;5;10;18", "wc_review": "950;265;906;477;248", "wc_reply_reviewers": "398;0;0;0;0", "wc_reply_authors": "780;0;0;0;0", "reply_reviewers": "1;0;0;0;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 98.4, 19.703806738800502 ], "wc_strengths_avg": [ 126.6, 86.74698842034806 ], "wc_weaknesses_avg": [ 226.6, 197.87733574111007 ], "wc_questions_avg": [ 92.0, 108.11845355904791 ], "wc_limitations_avg": [ 25.6, 23.22584767021432 ], "wc_review_avg": [ 569.2, 304.1877052084781 ], "wc_reply_reviewers_avg": [ 79.6, 159.2 ], "wc_reply_authors_avg": [ 156.0, 312.0 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9185586535436918, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11680650197277308639&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "kit.edu;kit.edu;kit.edu;;;", "author_num": 6, "aff_unique_index": "0;0;1", "aff_unique_norm": "Karlsruhe Institute of Technology;Karlsruher Institut f\u00fcr Technologie", "aff_unique_dep": ";", "aff_unique_url": "https://www.kit.edu;https://www.kit.edu", "aff_unique_abbr": "KIT;KIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Adaptive Test-Time Personalization for Federated Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70307", "id": "rbw9xCU6Ci", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f555b62384279b98732204cb1a670a23-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rbw9xCU6Ci", "openreview": "https://openreview.net/forum?id=rbw9xCU6Ci", "poster": "/media/PosterPDFs/NeurIPS%202023/70307.png?t=1701709268.5614703", "slides": "https://nips.cc/virtual/2023/poster/70307", "video": "https://nips.cc/virtual/2023/poster/70307", "author_site": "Wenxuan Bao, Tianxin Wei, Haohan Wang, Jingrui He", "tldr": "", "abstract": "Personalized federated learning algorithms have shown promising results in adapting models to various distribution shifts. However, most of these methods require labeled data on testing clients for personalization, which is usually unavailable in real-world scenarios. In this paper, we introduce a novel setting called test-time personalized federated learning (TTPFL), where clients locally adapt a global model in an unsupervised way without relying on any labeled data during test-time. While traditional test-time adaptation (TTA) can be used in this scenario, most of them inherently assume training data come from a single domain, while they come from multiple clients (source domains) with different distributions. Overlooking these domain interrelationships can result in suboptimal generalization. Moreover, most TTA algorithms are designed for a specific kind of distribution shift and lack the flexibility to handle multiple kinds of distribution shifts in FL. In this paper, we find that this lack of flexibility partially results from their pre-defining which modules to adapt in the model. To tackle this challenge, we propose a novel algorithm called ATP to adaptively learns the adaptation rates for each module in the model from distribution shifts among source domains. Theoretical analysis proves the strong generalization of ATP. Extensive experiments demonstrate its superiority in handling various distribution shifts including label shift, image corruptions, and domain shift, outperforming existing TTA methods across multiple datasets and model architectures. Our code is available at https://github.com/baowenxuan/ATP.", "keywords": "federated learning;personalized federated learning;test-time adaptation", "primary_area": "", "supplementary_material": "", "author": "Wenxuan Bao;Tianxin Wei;Haohan Wang;Jingrui He", "authorids": "~Wenxuan_Bao1;~Tianxin_Wei1;~Haohan_Wang1;~Jingrui_He1", "gender": ";;M;F", "homepage": ";https://weitianxin.github.io/;http://cs.cmu.edu/~haohanw;https://www.hejingrui.org", "dblp": ";277/5800;132/4066;34/2685", "google_scholar": ";_LU2-kMAAAAJ;nZxJGeUAAAAJ;hXpZynkAAAAJ", "orcid": ";0000-0003-4450-2005;;0000-0002-6429-6272", "linkedin": ";tianxin-wei-7063a2180/;haohanwang/;", "or_profile": "~Wenxuan_Bao1;~Tianxin_Wei1;~Haohan_Wang1;~Jingrui_He1", "aff": ";University of Illinois, Urbana-Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": ";uiuc.edu;illinois.edu;illinois.edu", "position": ";PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nbao2023adaptive,\ntitle={Adaptive Test-Time Personalization for Federated Learning},\nauthor={Wenxuan Bao and Tianxin Wei and Haohan Wang and Jingrui He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rbw9xCU6Ci}\n}", "github": "", "project": "", "reviewers": "h3BM;yJjv;2b9x;A9BH", "pdf_size": 1205195, "rating": "4;5;5;5", "confidence": "4;4;4;4", "soundness": "2;3;2;3", "novelty": "2;3;3;2", "presentation": "2;3;2;3", "wc_summary": "31;72;92;57", "wc_strengths": "18;55;53;49", "wc_weaknesses": "145;154;255;166", "wc_questions": "2;109;58;27", "wc_limitations": "29;13;2;15", "wc_review": "225;403;460;314", "wc_reply_reviewers": "219;172;0;18", "wc_reply_authors": "777;137;0;0", "reply_reviewers": "2;2;0;1", "reply_authors": "3;2;1;1", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 63.0, 22.259829289552066 ], "wc_strengths_avg": [ 43.75, 15.022899187573616 ], "wc_weaknesses_avg": [ 180.0, 43.93745554763043 ], "wc_questions_avg": [ 49.0, 39.91866731242415 ], "wc_limitations_avg": [ 14.75, 9.60143218483576 ], "wc_review_avg": [ 350.5, 89.20341921697845 ], "wc_reply_reviewers_avg": [ 102.25, 94.93254183892897 ], "wc_reply_authors_avg": [ 228.5, 321.57775109606075 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2404637854022569306&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": ";uiuc.edu;illinois.edu;illinois.edu", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Illinois;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://illinois.edu", "aff_unique_abbr": "UIUC;UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Why think step by step? Reasoning emerges from the locality of experience", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70306", "id": "rcXXNFVlEn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e0af79ad53a336b4c4b4f7e2a68eb609-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rcXXNFVlEn", "openreview": "https://openreview.net/forum?id=rcXXNFVlEn", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70306", "video": "https://nips.cc/virtual/2023/poster/70306", "author_site": "Ben Prystawski, Michael Li, Noah Goodman", "tldr": "", "abstract": "Humans have a powerful and mysterious capacity to reason. Working through a set of mental steps enables us to make inferences we would not be capable of making directly even though we get no additional data from the world. Similarly, when large language models generate intermediate steps (a chain of thought) before answering a question, they often produce better answers than they would directly. We investigate why and how chain-of-thought reasoning is useful in language models, testing the hypothesis that reasoning is effective when training data consists of overlapping local clusters of variables that influence each other strongly. These training conditions enable the chaining of accurate local inferences to estimate relationships between variables that were not seen together in training. We prove that there will exist a \"reasoning gap\", where reasoning through intermediate variables reduces bias, for the simple case of an autoregressive density estimator trained on local samples from a chain-structured probabilistic model. We then test our hypothesis experimentally in more complex models, training an autoregressive language model on samples from Bayes nets but only including a subset of variables in each sample. We test language models\u2019 ability to match conditional probabilities with and without intermediate reasoning steps, finding that intermediate steps are only helpful when the training data is locally structured with respect to dependencies between variables. The combination of locally structured observations and reasoning is much more data-efficient than training on all variables. Our results illustrate how the effectiveness of reasoning step by step is rooted in the local statistical structure of the training data.", "keywords": "chain-of-thought; language models; reasoning", "primary_area": "", "supplementary_material": "", "author": "Ben Prystawski;Michael Y. Li;Noah Goodman", "authorids": "~Ben_Prystawski1;~Michael_Y._Li1;~Noah_Goodman1", "gender": "M;;", "homepage": "https://benprystawski.com/;https://cocolab.stanford.edu/;https://michaelyli.github.io/", "dblp": "278/4830;96/1216;40/2032", "google_scholar": "Ns8jBNsAAAAJ;OUpIbcQAAAAJ;", "orcid": "0000-0003-3501-7091;;", "linkedin": ";;", "or_profile": "~Ben_Prystawski1;~Noah_Goodman1;~Michael_Yifan_Li1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nprystawski2023why,\ntitle={Why think step by step? Reasoning emerges from the locality of experience},\nauthor={Ben Prystawski and Michael Y. Li and Noah Goodman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rcXXNFVlEn}\n}", "github": "", "project": "", "reviewers": "tGUi;fiMT;jbx9;jyTS", "pdf_size": 661024, "rating": "7;7;7;8", "confidence": "4;3;3;3", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "3;2;4;3", "wc_summary": "236;208;409;378", "wc_strengths": "79;80;144;211", "wc_weaknesses": "55;157;143;333", "wc_questions": "16;323;57;172", "wc_limitations": "6;4;9;45", "wc_review": "392;772;762;1139", "wc_reply_reviewers": "37;3;11;76", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 307.75, 87.01257093087182 ], "wc_strengths_avg": [ 128.5, 54.42655601817921 ], "wc_weaknesses_avg": [ 172.0, 100.84145972763385 ], "wc_questions_avg": [ 142.0, 119.12388509446794 ], "wc_limitations_avg": [ 16.0, 16.837458240482736 ], "wc_review_avg": [ 766.25, 264.129111420911 ], "wc_reply_reviewers_avg": [ 31.75, 28.472574523565655 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 89, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9011512957639020699&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "stanford.edu;stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Parallel Spiking Neurons with High Efficiency and Ability to Learn Long-term Dependencies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70305", "id": "rfTFJvTkr2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a834ac3dfdb90da54292c2c932c997cc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rfTFJvTkr2", "openreview": "https://openreview.net/forum?id=rfTFJvTkr2", "poster": "/media/PosterPDFs/NeurIPS%202023/70305.png?t=1702902904.9374099", "slides": "https://nips.cc/virtual/2023/poster/70305", "video": "https://nips.cc/virtual/2023/poster/70305", "author_site": "Wei Fang, Zhaofei Yu, Zhaokun Zhou, Ding Chen, Yanqi Chen, Zhengyu Ma, Timoth\u00e9e Masquelier, Yonghong Tian", "tldr": "", "abstract": "Vanilla spiking neurons in Spiking Neural Networks (SNNs) use charge-fire-reset neuronal dynamics, which can only be simulated serially and can hardly learn long-time dependencies. We find that when removing reset, the neuronal dynamics can be reformulated in a non-iterative form and parallelized. By rewriting neuronal dynamics without reset to a general formulation, we propose the Parallel Spiking Neuron (PSN), which generates hidden states that are independent of their predecessors, resulting in parallelizable neuronal dynamics and extremely high simulation speed. The weights of inputs in the PSN are fully connected, which maximizes the utilization of temporal information. To avoid the use of future inputs for step-by-step inference, the weights of the PSN can be masked, resulting in the masked PSN. By sharing weights across time-steps based on the masked PSN, the sliding PSN is proposed to handle sequences of varying lengths. We evaluate the PSN family on simulation speed and temporal/static data classification, and the results show the overwhelming advantage of the PSN family in efficiency and accuracy. To the best of our knowledge, this is the first study about parallelizing spiking neurons and can be a cornerstone for the spiking deep learning research. Our codes are available at https://github.com/fangwei123456/Parallel-Spiking-Neuron.", "keywords": "Spiking Neural Network;SNN;deep learning;spiking neuron;neuromorphic computing", "primary_area": "", "supplementary_material": "/attachment/1f19dac4c548da479d2d41c7644fd06b60ffcf09.pdf", "author": "Wei Fang;Zhaofei Yu;Zhaokun Zhou;Ding Chen;Yanqi Chen;Zhengyu Ma;Timoth\u00e9e Masquelier;Yonghong Tian", "authorids": "~Wei_Fang2;~Zhaofei_Yu1;~Zhaokun_Zhou1;~Ding_Chen2;~Yanqi_Chen1;~Zhengyu_Ma1;~Timoth\u00e9e_Masquelier1;~Yonghong_Tian1", "gender": ";M;M;M;M;;;M", "homepage": "https://fangwei123456.github.io/;https://yuzhaofei.github.io;;https://lucifer2859.github.io/;;;;http://www.pkuml.org", "dblp": ";166/0573;;78/3806;284/9379;;07/7226;86/5857", "google_scholar": "https://scholar.google.com.hk/citations?user=e2lED2gAAAAJ;qaUgD50AAAAJ;;rEXkGekAAAAJ;QzFrppAAAAAJ;;fkzUZ-oAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-4454-6630;0009-0002-7583-4619;0000-0002-7658-9259;;0000-0001-8629-9506;0000-0002-2978-5935", "linkedin": ";;;ding-chen-sjtu-pku/;;;;", "or_profile": "~Wei_Fang2;~Zhaofei_Yu1;~Zhaokun_Zhou1;~Ding_Chen2;~Yanqi_Chen1;~Zhengyu_Ma1;~Timoth\u00e9e_Masquelier1;~Yonghong_Tian1", "aff": "School of Computer Science, Peking University;Peking University;Peking University;Shanghai Jiaotong University;Peking University;;CNRS;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;sjtu.edu.cn;pku.edu.cn;;cnrs.fr;pku.edu.cn", "position": "PhD student;Assistant Professor;PhD student;PhD student;PhD student;;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nfang2023parallel,\ntitle={Parallel Spiking Neurons with High Efficiency and Ability to Learn Long-term Dependencies},\nauthor={Wei Fang and Zhaofei Yu and Zhaokun Zhou and Ding Chen and Yanqi Chen and Zhengyu Ma and Timoth{\\'e}e Masquelier and Yonghong Tian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rfTFJvTkr2}\n}", "github": "", "project": "", "reviewers": "G2d4;P1JU;Xbwd;sjmV", "pdf_size": 0, "rating": "6;6;7;7", "confidence": "4;5;4;4", "soundness": "3;4;4;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "46;73;36;105", "wc_strengths": "22;43;80;107", "wc_weaknesses": "101;55;3;228", "wc_questions": "15;78;221;81", "wc_limitations": "2;70;28;130", "wc_review": "186;319;368;651", "wc_reply_reviewers": "0;13;13;42", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 65.0, 26.767517628648346 ], "wc_strengths_avg": [ 63.0, 32.810059433045836 ], "wc_weaknesses_avg": [ 96.75, 83.33179165240598 ], "wc_questions_avg": [ 98.75, 75.34047716865085 ], "wc_limitations_avg": [ 57.5, 48.381298039635105 ], "wc_review_avg": [ 381.0, 169.5125364095529 ], "wc_reply_reviewers_avg": [ 17.0, 15.378556499229699 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1355171113970382210&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;sjtu.edu.cn;pku.edu.cn;;cnrs.fr;pku.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;1;0;2;0", "aff_unique_norm": "Peking University;Shanghai Jiao Tong University;Centre National de la Recherche Scientifique", "aff_unique_dep": "School of Computer Science;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.sjtu.edu.cn;https://www.cnrs.fr", "aff_unique_abbr": "PKU;SJTU;CNRS", "aff_campus_unique_index": "0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "China;France" }, { "title": "Policy Optimization for Continuous Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70304", "id": "rfcak9EV99", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2c53bc01e30711a08f6ac86919193022-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rfcak9EV99", "openreview": "https://openreview.net/forum?id=rfcak9EV99", "poster": "/media/PosterPDFs/NeurIPS%202023/70304.png?t=1699417913.7285476", "slides": "https://nips.cc/virtual/2023/poster/70304", "video": "https://nips.cc/virtual/2023/poster/70304", "author_site": "HANYANG ZHAO, Wenpin Tang, David Yao", "tldr": "", "abstract": "We study reinforcement learning (RL) in the setting of continuous time and space, for an infinite horizon with a discounted objective and the underlying dynamics driven by a stochastic differential equation. Built upon recent advances in the continuous approach to RL, we develop a notion of occupation time (specifically for a discounted objective), and show how it can be effectively used to derive performance difference and local approximation formulas. We further extend these results to illustrate their applications in the PG (policy gradient) and TRPO/PPO (trust region policy optimization/ proximal policy optimization) methods, which have been familiar and powerful tools in the discrete RL setting but under-developed in continuous RL. Through numerical experiments, we demonstrate the effectiveness and advantages of our approach.", "keywords": "exploratory stochastic control;occupation time;performance difference;policy optimization", "primary_area": "", "supplementary_material": "/attachment/6cf0bf52bdb10705adb6648c7b2019fabdc20baf.pdf", "author": "Hanyang Zhao;Wenpin Tang;David Yao", "authorids": "~Hanyang_Zhao1;~Wenpin_Tang1;~David_Yao1", "gender": "M;M;", "homepage": "https://hanyang1999.github.io/;http://www.columbia.edu/~wt2319/;http://www.columbia.edu/~yao", "dblp": "348/6982.html;240/4543;y/DavidDYao.html", "google_scholar": "ipCfUaQAAAAJ;-oBYNVQAAAAJ;", "orcid": ";;", "linkedin": "hanyang-zhao/;;", "or_profile": "~Hanyang_Zhao1;~Wenpin_Tang1;~David_Yao1", "aff": "Columbia University;Columbia University;Columbia University", "aff_domain": "columbia.edu;columbia.edu;columbia.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhao2023policy,\ntitle={Policy Optimization for Continuous Reinforcement Learning},\nauthor={Hanyang Zhao and Wenpin Tang and David Yao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rfcak9EV99}\n}", "github": "", "project": "", "reviewers": "vuEi;9Un8;Mbd6", "pdf_size": 2055002, "rating": "5;5;7", "confidence": "3;3;3", "soundness": "2;2;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "211;51;108", "wc_strengths": "79;36;27", "wc_weaknesses": "153;42;221", "wc_questions": "66;181;40", "wc_limitations": "48;1;1", "wc_review": "557;311;397", "wc_reply_reviewers": "0;32;58", "wc_reply_authors": "0;0;161", "reply_reviewers": "0;1;1", "reply_authors": "1;1;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 123.33333333333333, 66.21345952464817 ], "wc_strengths_avg": [ 47.333333333333336, 22.691163233490013 ], "wc_weaknesses_avg": [ 138.66666666666666, 73.77593705869747 ], "wc_questions_avg": [ 95.66666666666667, 61.266267680964184 ], "wc_limitations_avg": [ 16.666666666666668, 22.15601247717849 ], "wc_review_avg": [ 421.6666666666667, 101.93243949902416 ], "wc_reply_reviewers_avg": [ 30.0, 23.72059583287626 ], "wc_reply_authors_avg": [ 53.666666666666664, 75.8961278473561 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16412450460191442652&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "columbia.edu;columbia.edu;columbia.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Counterfactual Evaluation of Peer-Review Assignment Policies", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70303", "id": "rhIfzCZoXG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b7d795e655c1463d7299688d489e8ef4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rhIfzCZoXG", "openreview": "https://openreview.net/forum?id=rhIfzCZoXG", "poster": "/media/PosterPDFs/NeurIPS%202023/70303.png?t=1702168584.7209468", "slides": "https://nips.cc/virtual/2023/poster/70303", "video": "https://nips.cc/virtual/2023/poster/70303", "author_site": "Martin Saveski, Steven Jecmen, Nihar Shah, Johan Ugander", "tldr": "", "abstract": "Peer review assignment algorithms aim to match research papers to suitable expert reviewers, working to maximize the quality of the resulting reviews. A key challenge in designing effective assignment policies is evaluating how changes to the assignment algorithm map to changes in review quality. In this work, we leverage recently proposed policies that introduce randomness in peer-review assignment\u2014in order to mitigate fraud\u2014as a valuable opportunity to evaluate counterfactual assignment policies. Specifically, we exploit how such randomized assignments provide a positive probability of observing the reviews of many assignment policies of interest. To address challenges in applying standard off-policy evaluation methods, such as violations of positivity, we introduce novel methods for partial identification based on monotonicity and Lipschitz smoothness assumptions for the mapping between reviewer-paper covariates and outcomes. We apply our methods to peer-review data from two computer science venues: the TPDP'21 workshop (95 papers and 35 reviewers) and the AAAI'22 conference (8,450 papers and 3,145 reviewers). We consider estimates of (i) the effect on review quality when changing weights in the assignment algorithm, e.g., weighting reviewers' bids vs. textual similarity (between the review's past papers and the submission), and (ii) the \"cost of randomization\", capturing the difference in expected quality between the perturbed and unperturbed optimal match. We find that placing higher weight on text similarity results in higher review quality and that introducing randomization in the reviewer-paper assignment only marginally reduces the review quality. Our methods for partial identification may be of independent interest, while our off-policy approach can likely find use in evaluating a broad class of algorithmic matching systems.", "keywords": "peer review;causal inference;counterfactual policy evaluation", "primary_area": "", "supplementary_material": "/attachment/ba1c808e7b364e6b9493db2fdd3864a06afee5b6.pdf", "author": "Martin Saveski;Steven Jecmen;Nihar B Shah;Johan Ugander", "authorids": "~Martin_Saveski1;~Steven_Jecmen1;~Nihar_B_Shah1;~Johan_Ugander1", "gender": "M;M;M;M", "homepage": "http://martinsaveski.com/;https://sjecmen.github.io/;https://cs.cmu.edu/~nihars;http://stanford.edu/~jugander/", "dblp": "138/9642;266/4836;74/7113;13/10542.html", "google_scholar": "M3D870YAAAAJ;SMn18BwAAAAJ;BF39lMQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-5158-9677;0000-0001-5655-4086", "linkedin": ";;;", "or_profile": "~Martin_Saveski1;~Steven_Jecmen1;~Nihar_B_Shah1;~Johan_Ugander1", "aff": "Stanford University;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Stanford University", "aff_domain": "stanford.edu;cs.cmu.edu;cmu.edu;stanford.edu", "position": "Postdoc;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nsaveski2023counterfactual,\ntitle={Counterfactual Evaluation of Peer-Review Assignment Policies},\nauthor={Martin Saveski and Steven Jecmen and Nihar B Shah and Johan Ugander},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rhIfzCZoXG}\n}", "github": "", "project": "", "reviewers": "5Zed;LpsF;4Cne;rhmn", "pdf_size": 1015563, "rating": "4;5;6;8", "confidence": "2;3;3;3", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "65;88;98;89", "wc_strengths": "71;45;32;58", "wc_weaknesses": "63;24;130;73", "wc_questions": "4;23;85;183", "wc_limitations": "1;55;57;62", "wc_review": "204;235;402;465", "wc_reply_reviewers": "38;0;172;0", "wc_reply_authors": "0;0;54;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 85.0, 12.186057606953941 ], "wc_strengths_avg": [ 51.5, 14.534441853748634 ], "wc_weaknesses_avg": [ 72.5, 37.911080174534725 ], "wc_questions_avg": [ 73.75, 69.82612333503845 ], "wc_limitations_avg": [ 43.75, 24.81305100143874 ], "wc_review_avg": [ 326.5, 109.84193188395768 ], "wc_reply_reviewers_avg": [ 52.5, 70.71598122065478 ], "wc_reply_authors_avg": [ 13.5, 23.382685902179844 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6831300510639732, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4061593727653767114&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "email": "stanford.edu;cs.cmu.edu;cmu.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Stanford University;Carnegie Mellon University", "aff_unique_dep": ";School of Computer Science", "aff_unique_url": "https://www.stanford.edu;https://www.cmu.edu", "aff_unique_abbr": "Stanford;CMU", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Stanford;Pittsburgh;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "DreamHuman: Animatable 3D Avatars from Text", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70302", "id": "rheCTpRrxI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/21912f7057935149fa58408ee8cb460e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rheCTpRrxI", "openreview": "https://openreview.net/forum?id=rheCTpRrxI", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70302", "video": "https://nips.cc/virtual/2023/poster/70302", "author_site": "Nikos Kolotouros, Thiemo Alldieck, Andrei Zanfir, Eduard Bazavan, Mihai Fieraru, Cristian Sminchisescu", "tldr": "", "abstract": "We present \\emph{DreamHuman}, a method to generate realistic animatable 3D human avatar models entirely from textual descriptions. Recent text-to-3D methods have made considerable strides in generation, but are still lacking in important aspects. Control and often spatial resolution remain limited, existing methods produce fixed rather than 3D human models that can be placed in different poses (i.e. re-posable or animatable), and anthropometric consistency for complex structures like people remains a challenge. \\emph{DreamHuman} connects large text-to-image synthesis models, neural radiance fields, and statistical human body models in a novel optimization framework. This makes it possible to generate dynamic 3D human avatars with high-quality textures and learnt per-instance rigid and non rigid geometric deformations. We demonstrate that our method is capable to generate a wide variety of animatable, realistic 3D human models from text. These have diverse appearance, clothing, skin tones and body shapes, and outperform both generic text-to-3D approaches and previous text-based 3D avatar generators in visual fidelity.", "keywords": "text to 3d; 3d avatars", "primary_area": "", "supplementary_material": "/attachment/2bcc964526c62d30e01d9f306b41295f0ae96050.pdf", "author": "Nikos Kolotouros;Thiemo Alldieck;Andrei Zanfir;Eduard Gabriel Bazavan;Mihai Fieraru;Cristian Sminchisescu", "authorids": "~Nikos_Kolotouros1;~Thiemo_Alldieck1;~Andrei_Zanfir1;~Eduard_Gabriel_Bazavan2;~Mihai_Fieraru1;~Cristian_Sminchisescu1", "gender": "M;;M;;M;", "homepage": "https://www.nikoskolot.com;;;;https://mihaifieraru.github.io;http://www.maths.lth.se/sminchisescu/", "dblp": "241/5232;153/9814;65/10771;;218/6212;96/3826", "google_scholar": "397EbTsAAAAJ;tJlD24EAAAAJ;;;3mKhWKEAAAAJ;https://scholar.google.se/citations?hl=en", "orcid": ";0000-0002-9107-4173;;;;", "linkedin": ";thiemoalldieck/;;;;", "or_profile": "~Nikos_Kolotouros1;~Thiemo_Alldieck1;~Andrei_Zanfir1;~Eduard_Gabriel_Bazavan2;~Mihai_Fieraru1;~Cristian_Sminchisescu1", "aff": "Google Research;Google;Google;;imar;Lund University", "aff_domain": "research.google.com;google.com;google.com;;imar.ro;lth.se", "position": "Researcher;Researcher;Researcher;;PhD student;Professor", "bibtex": "@inproceedings{\nkolotouros2023dreamhuman,\ntitle={DreamHuman: Animatable 3D Avatars from Text},\nauthor={Nikos Kolotouros and Thiemo Alldieck and Andrei Zanfir and Eduard Gabriel Bazavan and Mihai Fieraru and Cristian Sminchisescu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rheCTpRrxI}\n}", "github": "", "project": "", "reviewers": "FnWG;hMhh;KMW9;urhH", "pdf_size": 11989381, "rating": "6;6;6;8", "confidence": "4;4;3;5", "soundness": "2;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "124;153;15;56", "wc_strengths": "41;99;30;81", "wc_weaknesses": "204;92;173;127", "wc_questions": "65;22;143;33", "wc_limitations": "1;50;18;2", "wc_review": "435;416;379;299", "wc_reply_reviewers": "55;47;26;29", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.0, 54.47476479985939 ], "wc_strengths_avg": [ 62.75, 28.252212302756046 ], "wc_weaknesses_avg": [ 149.0, 42.81938813201328 ], "wc_questions_avg": [ 65.75, 47.314770421085214 ], "wc_limitations_avg": [ 17.75, 19.803724397193573 ], "wc_review_avg": [ 382.25, 52.112258634605354 ], "wc_reply_reviewers_avg": [ 39.25, 12.132085558550928 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 96, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2717447275715002340&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "research.google.com;google.com;google.com;;imar.ro;lth.se", "author_num": 6, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Google;imar;Lund University", "aff_unique_dep": "Google Research;;", "aff_unique_url": "https://research.google;;https://www.lunduniversity.lu.se", "aff_unique_abbr": "Google Research;;LU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;2", "aff_country_unique": "United States;;Sweden" }, { "title": "Transformed Low-Rank Parameterization Can Help Robust Generalization for Tensor Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70301", "id": "rih3hsSWx8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/092c2d45005ea2db40fc24c470663416-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rih3hsSWx8", "openreview": "https://openreview.net/forum?id=rih3hsSWx8", "poster": "/media/PosterPDFs/NeurIPS%202023/70301.png?t=1701520768.2186372", "slides": "https://nips.cc/virtual/2023/poster/70301", "video": "https://nips.cc/virtual/2023/poster/70301", "author_site": "Andong Wang, Chao Li, Mingyuan Bai, Zhong Jin, Guoxu Zhou, Qibin Zhao", "tldr": "", "abstract": "Multi-channel learning has gained significant attention in recent applications, where neural networks with t-product layers (t-NNs) have shown promising performance through novel feature mapping in the transformed domain. \nHowever, despite the practical success of t-NNs, the theoretical analysis of their generalization remains unexplored. We address this gap by deriving upper bounds on the generalization error of t-NNs in both standard and adversarial settings. Notably, it reveals that t-NNs compressed with exact transformed low-rank parameterization can achieve tighter adversarial generalization bounds compared to non-compressed models. While exact transformed low-rank weights are rare in practice, the analysis demonstrates that through adversarial training with gradient flow, highly over-parameterized t-NNs with the ReLU activation can be implicitly regularized towards a transformed low-rank parameterization under certain conditions. Moreover, this paper establishes sharp adversarial generalization bounds for t-NNs with approximately transformed low-rank weights. Our analysis highlights the potential of transformed low-rank parameterization in enhancing the robust generalization of t-NNs, offering valuable insights for further research and development.", "keywords": "Tensor SVD; Tensor Neural Networks; Transformed Low-rankness; Adversarial Generalization; Implicit Bias.", "primary_area": "", "supplementary_material": "", "author": "Andong Wang;Chao Li;Mingyuan Bai;Zhong Jin;Guoxu Zhou;Qibin Zhao", "authorids": "~Andong_Wang1;~Chao_Li12;~Mingyuan_Bai1;~Zhong_Jin1;~Guoxu_Zhou1;~Qibin_Zhao1", "gender": "M;M;F;M;M;M", "homepage": "https://www.patternrecognition.asia/wad/;https://chaoliatriken.github.io;;https://www.patternrecognition.asia/jin/;https://teacher.gdut.edu.cn/gxzhou/zh_CN/index.htm;https://qibinzhao.github.io", "dblp": "190/5540;;205/2305;;33/7727;13/1193", "google_scholar": "vuPyxGwAAAAJ;i4JrumAAAAAJ;https://scholar.google.co.jp/citations?user=lo0_2rMAAAAJ;;BIUkSFEAAAAJ;https://scholar.google.co.jp/citations?hl=en", "orcid": ";;0000-0002-2454-4219;0000-0002-4293-0869;;0000-0002-4442-3182", "linkedin": ";;;;;", "or_profile": "~Andong_Wang1;~Chao_Li12;~Mingyuan_Bai1;~Zhong_Jin1;~Guoxu_Zhou1;~Qibin_Zhao1", "aff": "RIKEN AIP;RIKEN;RIKEN;Nanjing University of Science and Technology;Guangdong University of Technology;RIKEN", "aff_domain": "riken.jp;riken.jp;riken.jp;njust.edu.cn;gdut.edu.cn;riken.jp", "position": "Postdoc;Researcher;Postdoc;Full Professor;Full Professor;Team Leader", "bibtex": "@inproceedings{\nwang2023transformed,\ntitle={Transformed Low-Rank Parameterization Can Help Robust Generalization for Tensor Neural Networks},\nauthor={Andong Wang and Chao Li and Mingyuan Bai and Zhong Jin and Guoxu Zhou and Qibin Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rih3hsSWx8}\n}", "github": "", "project": "", "reviewers": "yC9Y;EpHo;pKvo;83W1", "pdf_size": 986428, "rating": "4;5;7;7", "confidence": "1;4;2;2", "soundness": "2;3;4;3", "novelty": "1;2;3;3", "presentation": "1;3;4;3", "wc_summary": "22;26;101;136", "wc_strengths": "17;31;72;60", "wc_weaknesses": "122;95;17;59", "wc_questions": "51;263;38;78", "wc_limitations": "4;34;7;31", "wc_review": "216;449;235;364", "wc_reply_reviewers": "19;13;25;33", "wc_reply_authors": "55;60;20;88", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 2.25, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 71.25, 48.86396934347434 ], "wc_strengths_avg": [ 45.0, 21.988633427296023 ], "wc_weaknesses_avg": [ 73.25, 39.42318480285427 ], "wc_questions_avg": [ 107.5, 90.92991806880725 ], "wc_limitations_avg": [ 19.0, 13.583077707206124 ], "wc_review_avg": [ 316.0, 95.59550198623364 ], "wc_reply_reviewers_avg": [ 22.5, 7.399324293474371 ], "wc_reply_authors_avg": [ 55.75, 24.1699710384601 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.044151078568834795, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9885558875461964474&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "riken.jp;riken.jp;riken.jp;njust.edu.cn;gdut.edu.cn;riken.jp", "author_num": 6, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "RIKEN;Nanjing University of Science and Technology;Guangdong University of Technology", "aff_unique_dep": "Advanced Institute for Computational Science;;", "aff_unique_url": "https://www.aip.riken.jp;http://www.nust.edu.cn/;http://www.gdut.edu.cn", "aff_unique_abbr": "RIKEN AIP;NUST;GDUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1;0", "aff_country_unique": "Japan;China" }, { "title": "False Discovery Proportion control for aggregated Knockoffs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70300", "id": "rlPUJ60bwM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f6712d5191d2501dfc7024389f7bfcdd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rlPUJ60bwM", "openreview": "https://openreview.net/forum?id=rlPUJ60bwM", "poster": "/media/PosterPDFs/NeurIPS%202023/70300.png?t=1702030238.656988", "slides": "https://nips.cc/virtual/2023/poster/70300", "video": "https://nips.cc/virtual/2023/poster/70300", "author_site": "Alexandre Blain, Bertrand Thirion, Olivier Grisel, Pierre Neuvial", "tldr": "", "abstract": "Controlled variable selection is an important analytical step in various scientific fields, such as brain imaging or genomics. In these high-dimensional data settings, considering too many variables leads to poor models and high costs, hence the need for statistical guarantees on false positives. Knockoffs are a popular statistical tool for conditional variable selection in high dimension. However, they control for the expected proportion of false discoveries (FDR) and not the actual proportion of false discoveries (FDP). We present a new method, KOPI, that controls the proportion of false discoveries for Knockoff-based inference. The proposed method also relies on a new type of aggregation to address the undesirable randomness associated with classical Knockoff inference. We demonstrate FDP control and substantial power gains over existing Knockoff-based methods in various simulation settings and achieve good sensitivity/specificity tradeoffs on brain imaging data.", "keywords": "Knockoffs;Derandomization of Knockoffs;False Discoveries Proportion control;Controlled variable selection;Statistical inference;High-dimensional inference", "primary_area": "", "supplementary_material": "/attachment/40bad1cdf6ca9d06fb2c57fb22f277c48aa897ee.pdf", "author": "Alexandre Blain;Bertrand Thirion;Olivier Grisel;Pierre Neuvial", "authorids": "~Alexandre_Blain1;~Bertrand_Thirion1;~Olivier_Grisel1;~Pierre_Neuvial1", "gender": "Not Specified;M;M;M", "homepage": ";http://pages.saclay.inria.fr/bertrand.thirion;http://ogrisel.com;https://www.math.univ-toulouse.fr/~pneuvial/", "dblp": "327/8243;62/2019;86/10824;39/3907.html", "google_scholar": "5OwKvioAAAAJ;MeKi5_AAAAAJ;duoYY64AAAAJ;V2LO-5wAAAAJ", "orcid": ";http:// 0000-0001-5018-7895;;0000-0003-3584-9998", "linkedin": ";;oliviergrisel/;", "or_profile": "~Alexandre_Blain1;~Bertrand_Thirion1;~Olivier_Grisel1;~Pierre_Neuvial1", "aff": "INRIA;INRIA;Inria;Universit\u00e9 de Toulouse", "aff_domain": "inria.fr;inria.fr;inria.fr;univ-toulouse.fr", "position": "PhD student;Full Professor;Research Engineer;Researcher", "bibtex": "@inproceedings{\nblain2023false,\ntitle={False Discovery Proportion control for aggregated Knockoffs},\nauthor={Alexandre Blain and Bertrand Thirion and Olivier Grisel and Pierre Neuvial},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rlPUJ60bwM}\n}", "github": "", "project": "", "reviewers": "zfG6;DZHu;MbEh;DefH", "pdf_size": 484589, "rating": "5;6;6;7", "confidence": "1;4;2;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "22;50;84;115", "wc_strengths": "22;23;44;60", "wc_weaknesses": "22;83;70;316", "wc_questions": "22;1;26;59", "wc_limitations": "22;1;1;13", "wc_review": "110;158;225;563", "wc_reply_reviewers": "0;0;93;26", "wc_reply_authors": "0;0;35;27", "reply_reviewers": "0;0;1;1", "reply_authors": "0;1;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 67.75, 35.01696017646306 ], "wc_strengths_avg": [ 37.25, 15.801503093060482 ], "wc_weaknesses_avg": [ 122.75, 113.86258164998719 ], "wc_questions_avg": [ 27.0, 20.77257807784099 ], "wc_limitations_avg": [ 9.25, 8.842369591913696 ], "wc_review_avg": [ 264.0, 177.39363009984322 ], "wc_reply_reviewers_avg": [ 29.75, 38.028772002261654 ], "wc_reply_authors_avg": [ 15.5, 15.75595125658873 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.82915619758885 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6324555320336759, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=555602577297889304&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 9, "email": "inria.fr;inria.fr;inria.fr;univ-toulouse.fr", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "INRIA;Universit\u00e9 de Toulouse", "aff_unique_dep": ";", "aff_unique_url": "https://www.inria.fr;https://www.univ-toulouse.fr", "aff_unique_abbr": "INRIA;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Managing Temporal Resolution in Continuous Value Estimation: A Fundamental Trade-off", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70299", "id": "rmQgQCZWiP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c4d66eae503694424123b93ac0fbaf17-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rmQgQCZWiP", "openreview": "https://openreview.net/forum?id=rmQgQCZWiP", "poster": "/media/PosterPDFs/NeurIPS%202023/70299.png?t=1702361808.1201391", "slides": "https://nips.cc/virtual/2023/poster/70299", "video": "https://nips.cc/virtual/2023/poster/70299", "author_site": "Zichen (Vincent) Zhang, Johannes Kirschner, Junxi Zhang, Francesco Zanini, Alex Ayoub, Masood Dehghan, Dale Schuurmans", "tldr": "", "abstract": "A default assumption in reinforcement learning (RL) and optimal control is that observations arrive at discrete time points on a fixed clock cycle. Yet, many applications involve continuous-time systems where the time discretization, in principle, can be managed. The impact of time discretization on RL methods has not been fully characterized in existing theory, but a more detailed analysis of its effect could reveal opportunities for improving data-efficiency. We address this gap by analyzing Monte-Carlo policy evaluation for LQR systems and uncover a fundamental trade-off between approximation and statistical error in value estimation. Importantly, these two errors behave differently to time discretization, leading to an optimal choice of temporal resolution for a given data budget. These findings show that managing the temporal resolution can provably improve policy evaluation efficiency in LQR systems with finite data. Empirically, we demonstrate the trade-off in numerical simulations of LQR instances and standard RL benchmarks for non-linear continuous control.", "keywords": "Reinforcement Learning;Policy Evaluation;Temporal Discretization;Continuous Time;LQR", "primary_area": "", "supplementary_material": "/attachment/485c155e559001c16fd05b0bc7af6159676a0b1d.zip", "author": "Zichen Zhang;Johannes Kirschner;Junxi Zhang;Francesco Zanini;Alex Ayoub;Masood Dehghan;Dale Schuurmans", "authorids": "~Zichen_Zhang1;~Johannes_Kirschner1;~Junxi_Zhang1;~Francesco_Zanini1;~Alex_Ayoub1;~Masood_Dehghan1;~Dale_Schuurmans1", "gender": ";;M;;M;M;", "homepage": ";;https://junxi-zhang.github.io/;;;;", "dblp": "200/8127;223/0106;;;266/8071;97/11044;", "google_scholar": "https://scholar.google.ca/citations?user=nSh2eD4AAAAJ;https://scholar.google.ch/citations?user=IgO2ThIAAAAJ;https://scholar.google.ca/citations?hl=en;;eh0TSgYAAAAJ;GUd_h1MAAAAJ;", "orcid": ";0000-0002-7228-8280;0000-0001-5318-2045;;;;", "linkedin": ";;;;;;", "or_profile": "~Zichen_Zhang1;~Johannes_Kirschner1;~Junxi_Zhang1;~Francesco_Zanini1;~Alex_Ayoub1;~Masood_Dehghan1;~Dale_Schuurmans1", "aff": "University of Alberta;University of Alberta;University of Alberta;;Huawei Technologies Ltd.;;", "aff_domain": "ualberta.ca;ualberta.ca;ualberta.ca;;huawei.com;;", "position": "PhD student;Postdoc;PhD student;;Intern;;", "bibtex": "@inproceedings{\nzhang2023managing,\ntitle={Managing Temporal Resolution in Continuous Value Estimation: A Fundamental Trade-off},\nauthor={Zichen Zhang and Johannes Kirschner and Junxi Zhang and Francesco Zanini and Alex Ayoub and Masood Dehghan and Dale Schuurmans},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rmQgQCZWiP}\n}", "github": "", "project": "", "reviewers": "sR6e;uBm7;cepd;37Kf;hJmD", "pdf_size": 670614, "rating": "6;6;6;7;7", "confidence": "3;3;4;3;3", "soundness": "4;3;3;3;4", "novelty": "2;3;2;3;3", "presentation": "3;3;3;3;4", "wc_summary": "84;121;81;60;76", "wc_strengths": "56;103;122;91;26", "wc_weaknesses": "106;132;91;94;48", "wc_questions": "1;120;21;11;8", "wc_limitations": "1;43;1;93;1", "wc_review": "248;519;316;349;159", "wc_reply_reviewers": "30;67;0;22;24", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 84.4, 20.08581589082206 ], "wc_strengths_avg": [ 79.6, 34.366262525913406 ], "wc_weaknesses_avg": [ 94.2, 27.249954128401757 ], "wc_questions_avg": [ 32.2, 44.36845726414206 ], "wc_limitations_avg": [ 27.8, 36.43295211755424 ], "wc_review_avg": [ 318.2, 119.62173715508399 ], "wc_reply_reviewers_avg": [ 28.6, 21.721878371816743 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6754799172119869639&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ualberta.ca;ualberta.ca;ualberta.ca;;huawei.com;;", "author_num": 7, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Alberta;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.ualberta.ca;https://www.huawei.com", "aff_unique_abbr": "UAlberta;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Canada;China" }, { "title": "AdaPlanner: Adaptive Planning from Feedback with Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70298", "id": "rnKgbKmelt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b5c8c1c117618267944b2617add0a766-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rnKgbKmelt", "openreview": "https://openreview.net/forum?id=rnKgbKmelt", "poster": "/media/PosterPDFs/NeurIPS%202023/70298.png?t=1699761749.7272918", "slides": "https://nips.cc/virtual/2023/poster/70298", "video": "https://nips.cc/virtual/2023/poster/70298", "author_site": "Haotian Sun, Yuchen Zhuang, Lingkai Kong, Bo Dai, Chao Zhang", "tldr": "", "abstract": "Large language models (LLMs) have recently demonstrated the potential in acting as autonomous agents for sequential decision-making tasks. However, most existing methods either take actions greedily without planning or rely on static plans that are not adaptable to environmental feedback. Consequently, the sequential decision-making performance of LLM agents degenerates with problem complexity and plan horizons increase. We propose a closed-loop approach, AdaPlanner, which allows the LLM agent to refine its self-generated plan adaptively in response to environmental feedback. In AdaPlanner, the LLM agent adaptively refines its plan from feedback with both in-plan and out-of-plan refinement strategies. To mitigate hallucination, we develop a code-style LLM prompt structure that facilitates plan generation across a variety of tasks, environments, and agent capabilities. Furthermore, we propose a skill discovery mechanism that leverages successful plans as few-shot exemplars, enabling the agent to plan and refine with fewer task demonstrations. Our experiments in the ALFWorld and MiniWoB++ environments demonstrate that AdaPlanner outperforms state-of-the-art baselines by 3.73% and 4.11% while utilizing 2x and 600x fewer samples, respectively. The implementation of AdaPlanner is available at https://github.com/haotiansun14/AdaPlanner.", "keywords": "Large language models;decision making;closed-loop planning", "primary_area": "", "supplementary_material": "", "author": "Haotian Sun;Yuchen Zhuang;Lingkai Kong;Bo Dai;Chao Zhang", "authorids": "~Haotian_Sun1;~Yuchen_Zhuang1;~Lingkai_Kong1;~Bo_Dai1;~Chao_Zhang15", "gender": "M;M;M;;", "homepage": "https://haotiansun.tech/;https://night-chen.github.io/;https://lingkai-kong.com/;https://bo-dai.github.io/;http://chaozhang.org/", "dblp": "12/8162;191/5231.html;20/10253;64/2903;94/3019-14", "google_scholar": "lcWkVCQAAAAJ;T-f6XlEAAAAJ;https://scholar.google.com/citations?hl=en;TIKl_foAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-9013-7016;;0000-0001-6480-513X;0009-0002-8070-574X;0000-0003-3009-598X", "linkedin": "haotian-sun-159597218/;;;;", "or_profile": "~Haotian_Sun1;~Yuchen_Zhuang1;~Lingkai_Kong1;~Bo_Dai1;~Chao_Zhang15", "aff": "Georgia Institute of Technology;Adobe Systems;Georgia Institute of Technology;Google Brain;Georgia Institute of Technology", "aff_domain": "gatech.edu;adobe.com;gatech.edu;google.com;gatech.edu", "position": "PhD student;Intern;PhD student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nsun2023adaplanner,\ntitle={AdaPlanner: Adaptive Planning from Feedback with Language Models},\nauthor={Haotian Sun and Yuchen Zhuang and Lingkai Kong and Bo Dai and Chao Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rnKgbKmelt}\n}", "github": "", "project": "", "reviewers": "GDYQ;oy34;4ZaS;aJpk", "pdf_size": 615730, "rating": "5;6;6;7", "confidence": "4;4;3;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;2", "wc_summary": "87;136;33;106", "wc_strengths": "91;16;23;36", "wc_weaknesses": "133;81;93;177", "wc_questions": "63;643;56;66", "wc_limitations": "29;89;17;22", "wc_review": "403;965;222;407", "wc_reply_reviewers": "30;88;13;43", "wc_reply_authors": "20;583;0;24", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 90.5, 37.51333096380539 ], "wc_strengths_avg": [ 41.5, 29.46608219631514 ], "wc_weaknesses_avg": [ 121.0, 37.62977544445356 ], "wc_questions_avg": [ 207.0, 251.75086891607742 ], "wc_limitations_avg": [ 39.25, 29.03769102390891 ], "wc_review_avg": [ 499.25, 279.08992726359725 ], "wc_reply_reviewers_avg": [ 43.5, 27.807373122968663 ], "wc_reply_authors_avg": [ 156.75, 246.26345140925804 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 114, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15076713477866623053&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "gatech.edu;adobe.com;gatech.edu;google.com;gatech.edu", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Georgia Institute of Technology;Adobe;Google", "aff_unique_dep": ";Adobe Systems Incorporated;Google Brain", "aff_unique_url": "https://www.gatech.edu;https://www.adobe.com;https://brain.google.com", "aff_unique_abbr": "Georgia Tech;Adobe;Google Brain", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "ConRad: Image Constrained Radiance Fields for 3D Generation from a Single Image", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70297", "id": "roGYQvarnC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0b68d474baf8dff30f3280c199a32089-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=roGYQvarnC", "openreview": "https://openreview.net/forum?id=roGYQvarnC", "poster": "/media/PosterPDFs/NeurIPS%202023/70297.png?t=1702355102.229137", "slides": "https://nips.cc/virtual/2023/poster/70297", "video": "https://nips.cc/virtual/2023/poster/70297", "author_site": "Senthil Purushwalkam, Nikhil Naik", "tldr": "", "abstract": "We present a novel method for reconstructing 3D objects from a single RGB image. Our method leverages the latest image generation models to infer the hidden 3D structure while remaining faithful to the input image. While existing methods obtain impressive results in generating 3D models from text prompts, they do not provide an easy approach for conditioning on input RGB data. Naive extensions of these methods often lead to improper alignment in appearance between the input image and the 3D reconstructions. We address these challenges by introducing Image Constrained Radiance Fields (ConRad), a novel variant of neural radiance fields. ConRad is an efficient 3D representation that explicitly captures the appearance of an input image in one viewpoint. We propose a training algorithm that leverages the single RGB image in conjunction with pretrained Diffusion Models to optimize the parameters of a ConRad representation. Extensive experiments show that ConRad representations can simplify preservation of image details while producing a realistic 3D reconstruction. Compared to existing state-of-the-art baselines, we show that our 3D reconstructions remain more faithful to the input and produce more consistent 3D models while demonstrating significantly improved quantitative performance on a ShapeNet object benchmark.", "keywords": "3D;generation;diffusion;viewpoint", "primary_area": "", "supplementary_material": "/attachment/8b2c58afba46cce797602b7f806b99e934b9eeb2.zip", "author": "Senthil Purushwalkam;Nikhil Naik", "authorids": "~Senthil_Purushwalkam1;~Nikhil_Naik1", "gender": "M;M", "homepage": "https://www.senthilpurushwalkam.com/;mit.edu/~naik", "dblp": "130/6222;43/7797", "google_scholar": "T3Tt0S8AAAAJ;M1IgIyMAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Senthil_Purushwalkam1;~Nikhil_Naik1", "aff": "Salesforce;MIT", "aff_domain": "salesforce.com; ", "position": "Researcher;Graduate Student", "bibtex": "@inproceedings{\npurushwalkam2023conrad,\ntitle={ConRad: Image Constrained Radiance Fields for 3D Generation from a Single Image},\nauthor={Senthil Purushwalkam and Nikhil Naik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=roGYQvarnC}\n}", "github": "", "project": "", "reviewers": "mCGc;ZVcc;yDN8;1JJW;GS6t", "pdf_size": 27205182, "rating": "5;6;6;7;7", "confidence": "4;4;4;4;5", "soundness": "3;2;4;4;4", "novelty": "2;3;3;4;4", "presentation": "2;3;3;4;4", "wc_summary": "50;73;98;176;69", "wc_strengths": "22;65;109;124;110", "wc_weaknesses": "143;208;287;411;128", "wc_questions": "4;98;8;103;5", "wc_limitations": "11;33;1;13;1", "wc_review": "230;477;503;827;313", "wc_reply_reviewers": "16;28;30;40;12", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.8 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 93.2, 44.13343403815298 ], "wc_strengths_avg": [ 86.0, 37.64571688784795 ], "wc_weaknesses_avg": [ 235.4, 104.23166505433942 ], "wc_questions_avg": [ 43.6, 46.504193359309 ], "wc_limitations_avg": [ 11.8, 11.702991070662236 ], "wc_review_avg": [ 470.0, 205.3465363720557 ], "wc_reply_reviewers_avg": [ 25.2, 10.087616170334792 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5345224838248487, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7410870971347040289&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "salesforce.com; ", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Salesforce;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.salesforce.com;https://web.mit.edu", "aff_unique_abbr": "Salesforce;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "On the Role of Noise in the Sample Complexity of Learning Recurrent Neural Networks: Exponential Gaps for Long Sequences", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70296", "id": "rpuEARqB54", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fe03053bd2cf5b5c56de1e463bc53e1a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rpuEARqB54", "openreview": "https://openreview.net/forum?id=rpuEARqB54", "poster": "/media/PosterPDFs/NeurIPS%202023/70296.png?t=1701972700.0169652", "slides": "https://nips.cc/virtual/2023/poster/70296", "video": "https://nips.cc/virtual/2023/poster/70296", "author_site": "Alireza F. Pour, Hassan Ashtiani", "tldr": "", "abstract": "We consider the class of noisy multi-layered sigmoid recurrent neural networks with $w$ (unbounded) weights for classification of sequences of length $T$, where independent noise distributed according to $\\mathcal{N}(0,\\sigma^2)$ is added to the output of each neuron in the network. Our main result shows that the sample complexity of PAC learning this class can be bounded by $O (w\\log(T/\\sigma))$. For the non-noisy version of the same class (i.e., $\\sigma=0$), we prove a lower bound of $\\Omega (wT)$ for the sample complexity. \n Our results indicate an exponential gap in the dependence of sample complexity on $T$ for noisy versus non-noisy networks. Moreover, given the mild logarithmic dependence of the upper bound on $1/\\sigma$, this gap still holds even for numerically negligible values of $\\sigma$.", "keywords": "PAC Learning;Recurrent Neural Networks;Noise;Sample Complexity", "primary_area": "", "supplementary_material": "/attachment/31f21c42c528d2a656cc04e86dfc6fd1bfed3b6b.pdf", "author": "Alireza Fathollah Pour;Hassan Ashtiani", "authorids": "~Alireza_Fathollah_Pour1;~Hassan_Ashtiani1", "gender": "M;M", "homepage": ";https://www.cas.mcmaster.ca/ashtiani/", "dblp": "322/4092;164/5733", "google_scholar": "JX7pP24AAAAJ;", "orcid": ";", "linkedin": "alirezapour/;", "or_profile": "~Alireza_Fathollah_Pour1;~Hassan_Ashtiani1", "aff": "McMaster University;McMaster University", "aff_domain": "mcmaster.ca;mcmaster.ca", "position": "Researcher;Assistant Professor", "bibtex": "@inproceedings{\npour2023on,\ntitle={On the Role of Noise in the Sample Complexity of Learning Recurrent Neural Networks: Exponential Gaps for Long Sequences},\nauthor={Alireza Fathollah Pour and Hassan Ashtiani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rpuEARqB54}\n}", "github": "", "project": "", "reviewers": "Ei2R;KR7t;hjDk;9rJ1", "pdf_size": 354521, "rating": "5;6;6;7", "confidence": "3;3;3;4", "soundness": "3;3;4;4", "novelty": "3;3;3;4", "presentation": "3;3;4;4", "wc_summary": "82;193;76;51", "wc_strengths": "27;104;31;32", "wc_weaknesses": "214;44;141;64", "wc_questions": "3;68;289;7", "wc_limitations": "1;11;1;8", "wc_review": "327;420;538;162", "wc_reply_reviewers": "70;40;11;4", "wc_reply_authors": "449;174;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 100.5, 54.655740778073806 ], "wc_strengths_avg": [ 48.5, 32.09750769140807 ], "wc_weaknesses_avg": [ 115.75, 67.2992384800898 ], "wc_questions_avg": [ 91.75, 116.75910028772918 ], "wc_limitations_avg": [ 5.25, 4.380353866983808 ], "wc_review_avg": [ 361.75, 137.4452163591007 ], "wc_reply_reviewers_avg": [ 31.25, 26.12828926661675 ], "wc_reply_authors_avg": [ 155.75, 183.60606607626013 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YyGhvtj2YEAJ:scholar.google.com/&scioq=On+the+Role+of+Noise+in+the+Sample+Complexity+of+Learning+Recurrent+Neural+Networks:+Exponential+Gaps+for+Long+Sequences&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "mcmaster.ca;mcmaster.ca", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "McMaster University", "aff_unique_dep": "", "aff_unique_url": "https://www.mcmaster.ca", "aff_unique_abbr": "McMaster", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "PointGPT: Auto-regressively Generative Pre-training from Point Clouds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70295", "id": "rqE0fEQDqs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5ed5c3c846f684a54975ad7a2525199f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rqE0fEQDqs", "openreview": "https://openreview.net/forum?id=rqE0fEQDqs", "poster": "/media/PosterPDFs/NeurIPS%202023/70295.png?t=1697285956.5017045", "slides": "https://nips.cc/virtual/2023/poster/70295", "video": "https://nips.cc/virtual/2023/poster/70295", "author_site": "Guangyan Chen, Meiling Wang, Yi Yang, Kai Yu, Li Yuan, Yufeng Yue", "tldr": "", "abstract": "Large language models (LLMs) based on the generative pre-training transformer (GPT) have demonstrated remarkable effectiveness across a diverse range of downstream tasks. Inspired by the advancements of the GPT, we present PointGPT, a novel approach that extends the concept of GPT to point clouds, addressing the challenges associated with disorder properties, low information density, and task gaps. Specifically, a point cloud auto-regressive generation task is proposed to pre-train transformer models. Our method partitions the input point cloud into multiple point patches and arranges them in an ordered sequence based on their spatial proximity. Then, an extractor-generator based transformer decode, with a dual masking strategy, learns latent representations conditioned on the preceding point patches, aiming to predict the next one in an auto-regressive manner. To explore scalability and enhance performance, a larger pre-training dataset is collected. Additionally, a subsequent post-pre-training stage is introduced, incorporating a labeled hybrid dataset. Our scalable approach allows for learning high-capacity models that generalize well, achieving state-of-the-art performance on various downstream tasks. In particular, our approach achieves classification accuracies of 94.9% on the ModelNet40 dataset and 93.4% on the ScanObjectNN dataset, outperforming all other transformer models. Furthermore, our method also attains new state-of-the-art accuracies on all four few-shot learning benchmarks. Codes are available at https://github.com/CGuangyan-BIT/PointGPT.", "keywords": "Generative Pre-training Transformer; GPT; Auto-regressively Generative Pre-training; Self-supervised Learning; Point clouds", "primary_area": "", "supplementary_material": "/attachment/06ef3a70b24c5fae7effd92f8a0a8b337314f034.zip", "author": "Guangyan Chen;Meiling Wang;Yi Yang;Kai Yu;Li Yuan;Yufeng Yue", "authorids": "~Guangyan_Chen1;~Meiling_Wang2;~Yi_Yang34;~Kai_Yu8;~Li_Yuan2;~Yufeng_Yue1", "gender": "M;F;M;M;M;M", "homepage": ";http://www.bit.edu.cn;;;https://yfyue-bit.github.io/;https://yuanli2333.github.io/", "dblp": "309/5775;17/1320-2.html;33/4854-9;;194/9143;98/4583-7", "google_scholar": "Ic8DObsAAAAJ;;;;https://scholar.google.com.sg/citations?user=7M_xficAAAAJ;-5juAR0AAAAJ", "orcid": "0000-0002-2903-1957;0000-0002-3618-7423;0000-0003-3964-2433;0009-0002-1128-3491;;0000-0002-2120-5588", "linkedin": ";;;;;", "or_profile": "~Guangyan_Chen1;~Meiling_Wang2;~Yi_Yang34;~Kai_Yu8;~Yufeng_Yue1;~Yuan_LI2", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Peking University", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;pku.edu.cn", "position": "PhD student;Full Professor;Full Professor;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2023pointgpt,\ntitle={Point{GPT}: Auto-regressively Generative Pre-training from Point Clouds},\nauthor={Guangyan Chen and Meiling Wang and Yi Yang and Kai Yu and Li Yuan and Yufeng Yue},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rqE0fEQDqs}\n}", "github": "", "project": "", "reviewers": "LJmd;EUQH;4dwU;5U9K", "pdf_size": 1804648, "rating": "5;5;6;7", "confidence": "4;3;5;5", "soundness": "3;2;3;4", "novelty": "2;2;3;4", "presentation": "2;3;3;3", "wc_summary": "58;67;45;49", "wc_strengths": "71;86;53;31", "wc_weaknesses": "108;144;78;2", "wc_questions": "3;2;18;133", "wc_limitations": "1;1;1;9", "wc_review": "241;300;195;224", "wc_reply_reviewers": "56;0;84;0", "wc_reply_authors": "45;0;201;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 54.75, 8.496322733983215 ], "wc_strengths_avg": [ 60.25, 20.535031044534605 ], "wc_weaknesses_avg": [ 83.0, 52.278102490430925 ], "wc_questions_avg": [ 39.0, 54.63972913549261 ], "wc_limitations_avg": [ 3.0, 3.4641016151377544 ], "wc_review_avg": [ 240.0, 38.34709897762802 ], "wc_reply_reviewers_avg": [ 35.0, 36.373066958946424 ], "wc_reply_authors_avg": [ 61.5, 82.60901887808619 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8181818181818182, "gs_citation": 110, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3460393170647566362&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Beijing Institute of Technology;Peking University", "aff_unique_dep": ";", "aff_unique_url": "http://www.bit.edu.cn/;http://www.pku.edu.cn", "aff_unique_abbr": "BIT;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "D\u00e4RF: Boosting Radiance Fields from Sparse Input Views with Monocular Depth Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70294", "id": "rsrfEIdawr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d7a6f4830a18b6974326310478bfa489-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rsrfEIdawr", "openreview": "https://openreview.net/forum?id=rsrfEIdawr", "poster": "/media/PosterPDFs/NeurIPS%202023/70294.png?t=1702039884.7085192", "slides": "https://nips.cc/virtual/2023/poster/70294", "video": "https://nips.cc/virtual/2023/poster/70294", "author_site": "Jiuhn Song, Seonghoon Park, Honggyu An, Seokju Cho, Min-Seop Kwak, Sungjin Cho, Seungryong Kim", "tldr": "", "abstract": "Neural radiance field (NeRF) shows powerful performance in novel view synthesis and 3D geometry reconstruction, but it suffers from critical performance degradation when the number of known viewpoints is drastically reduced. Existing works attempt to overcome this problem by employing external priors, but their success is limited to certain types of scenes or datasets. Employing monocular depth estimation (MDE) networks, pretrained on large-scale RGB-D datasets, with powerful generalization capability may be a key to solving this problem: however, using MDE in conjunction with NeRF comes with a new set of challenges due to various ambiguity problems exhibited by monocular depths. In this light, we propose a novel framework, dubbed D\u00e4RF, that achieves robust NeRF reconstruction with a handful of real-world images by combining the strengths of NeRF and monocular depth estimation through online complementary training. Our framework imposes the MDE network's powerful geometry prior to NeRF representation at both seen and unseen viewpoints to enhance its robustness and coherence. In addition, we overcome the ambiguity problems of monocular depths through patch-wise scale-shift fitting and geometry distillation, which adapts the MDE network to produce depths aligned accurately with NeRF geometry. Experiments show our framework achieves state-of-the-art results both quantitatively and qualitatively, demonstrating consistent and reliable performance in both indoor and outdoor real-world datasets.", "keywords": "Neural Radiance Fields;3D Reconstruction;Few-shot NeRF;Monocular Priors", "primary_area": "", "supplementary_material": "/attachment/8d5c735058af501a5c2c7d3cc274c6cf7818c095.pdf", "author": "Jiuhn Song;Seonghoon Park;Honggyu An;Seokju Cho;Min-Seop Kwak;Sungjin Cho;Seungryong Kim", "authorids": "~Jiuhn_Song1;~Seonghoon_Park1;~Honggyu_An1;~Seokju_Cho1;~Min-Seop_Kwak1;~Sungjin_Cho1;~Seungryong_Kim1", "gender": ";M;M;M;M;M;M", "homepage": "https://github.com/uhnjiny;;https://github.com/hg010303;https://seokju-cho.github.io;;https://github.com/boazcho1;https://cvlab.korea.ac.kr/members/faculty", "dblp": "338/9375;243/0069-2.html;336/7259;294/4755;338/9125;;141/9955", "google_scholar": ";Rvv2FLEAAAAJ;https://scholar.google.co.kr/citations?user=W69-kzMAAAAJ;mvPfpnMAAAAJ;;;cIK1hS8AAAAJ", "orcid": ";;;0000-0003-1199-9596;;;", "linkedin": ";%EC%84%B1%ED%9B%88-%EB%B0%95-5a4a07239/;;seokju-cho-43601b174/;matthewmatics96;;", "or_profile": "~Jiuhn_Song1;~Seonghoon_Park1;~Honggyu_An1;~Seokju_Cho1;~Min-Seop_Kwak1;~Sungjin_Cho1;~Seungryong_Kim1", "aff": "Korea University;Korea University;Korea University;Korea University;;Korea University;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr;;korea.ac.kr;korea.ac.kr", "position": "MS student;MS student;Undergrad student;PhD student;;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nsong2023drf,\ntitle={D\\\"a{RF}: Boosting Radiance Fields from Sparse Input Views with Monocular Depth Adaptation},\nauthor={Jiuhn Song and Seonghoon Park and Honggyu An and Seokju Cho and Min-Seop Kwak and Sungjin Cho and Seungryong Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rsrfEIdawr}\n}", "github": "", "project": "", "reviewers": "Ndsp;oMo1;Eioc;VhkU;KpuH", "pdf_size": 8187353, "rating": "5;5;5;6;6", "confidence": "4;3;3;4;5", "soundness": "3;3;2;2;3", "novelty": "2;3;2;2;3", "presentation": "2;3;3;2;3", "wc_summary": "99;53;38;58;55", "wc_strengths": "101;56;38;61;167", "wc_weaknesses": "235;87;106;272;104", "wc_questions": "103;9;4;4;53", "wc_limitations": "33;12;4;1;16", "wc_review": "571;217;190;396;395", "wc_reply_reviewers": "644;0;0;119;30", "wc_reply_authors": "782;0;0;63;0", "reply_reviewers": "3;0;0;2;1", "reply_authors": "4;1;1;2;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 60.6, 20.40196069009055 ], "wc_strengths_avg": [ 84.6, 46.05475002646307 ], "wc_weaknesses_avg": [ 160.8, 76.872361743347 ], "wc_questions_avg": [ 34.6, 38.846364051221066 ], "wc_limitations_avg": [ 13.2, 11.26765281680262 ], "wc_review_avg": [ 353.8, 138.7074619477986 ], "wc_reply_reviewers_avg": [ 158.6, 246.58759092865964 ], "wc_reply_authors_avg": [ 169.0, 307.4696733012868 ], "reply_reviewers_avg": [ 1.2, 1.16619037896906 ], "reply_authors_avg": [ 1.8, 1.1661903789690602 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7637626158259732, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2006864568723644841&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr;;korea.ac.kr;korea.ac.kr", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Break It Down: Evidence for Structural Compositionality in Neural Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70293", "id": "rwbzMiuFQl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/85069585133c4c168c865e65d72e9775-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rwbzMiuFQl", "openreview": "https://openreview.net/forum?id=rwbzMiuFQl", "poster": "/media/PosterPDFs/NeurIPS%202023/70293.png?t=1702000185.897953", "slides": "https://nips.cc/virtual/2023/poster/70293", "video": "https://nips.cc/virtual/2023/poster/70293", "author_site": "Michael Lepori, Thomas Serre, Ellie Pavlick", "tldr": "", "abstract": "Though modern neural networks have achieved impressive performance in both vision and language tasks, we know little about the functions that they implement. One possibility is that neural networks implicitly break down complex tasks into subroutines, implement modular solutions to these subroutines, and compose them into an overall solution to a task --- a property we term structural compositionality. Another possibility is that they may simply learn to match new inputs to learned templates, eliding task decomposition entirely. Here, we leverage model pruning techniques to investigate this question in both vision and language across a variety of architectures, tasks, and pretraining regimens. Our results demonstrate that models oftentimes implement solutions to subroutines via modular subnetworks, which can be ablated while maintaining the functionality of other subnetworks. This suggests that neural networks may be able to learn compositionality, obviating the need for specialized symbolic mechanisms.", "keywords": "Deep Learning;Compositionality;Cognitive Science", "primary_area": "", "supplementary_material": "", "author": "Michael A. Lepori;Thomas Serre;Ellie Pavlick", "authorids": "~Michael_A._Lepori1;~Thomas_Serre1;~Ellie_Pavlick1", "gender": "M;M;F", "homepage": "https://lepori.xyz/;https://serre-lab.clps.brown.edu/;http://cs.brown.edu/people/epavlick/", "dblp": "262/0162;;141/4059", "google_scholar": "G1fepc8AAAAJ;kZlPW4wAAAAJ;sFyrSa8AAAAJ", "orcid": ";;", "linkedin": "michael-lepori-925426124/;;", "or_profile": "~Michael_A._Lepori1;~Thomas_Serre1;~Ellie_Pavlick1", "aff": "Brown University;Universit\u00e9 de Toulouse;Brown University", "aff_domain": "brown.edu;univ-toulouse.fr;brown.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nlepori2023break,\ntitle={Break It Down: Evidence for Structural Compositionality in Neural Networks},\nauthor={Michael A. Lepori and Thomas Serre and Ellie Pavlick},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rwbzMiuFQl}\n}", "github": "", "project": "", "reviewers": "xQUi;DSTn;W9E9;oWEk", "pdf_size": 1820981, "rating": "6;7;7;8", "confidence": "3;4;4;4", "soundness": "3;3;3;4", "novelty": "2;4;4;4", "presentation": "3;4;4;3", "wc_summary": "136;130;143;29", "wc_strengths": "194;70;93;85", "wc_weaknesses": "204;4;71;6", "wc_questions": "82;135;87;157", "wc_limitations": "85;279;49;38", "wc_review": "701;618;443;315", "wc_reply_reviewers": "127;25;20;49", "wc_reply_authors": "322;0;78;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.8660254037844386 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 109.5, 46.703854230673514 ], "wc_strengths_avg": [ 110.5, 48.91063279083598 ], "wc_weaknesses_avg": [ 71.25, 81.24461520617844 ], "wc_questions_avg": [ 115.25, 31.767711595266032 ], "wc_limitations_avg": [ 112.75, 97.54582256560246 ], "wc_review_avg": [ 519.25, 150.26372649445375 ], "wc_reply_reviewers_avg": [ 55.25, 42.85075845303091 ], "wc_reply_authors_avg": [ 100.0, 132.06816421833082 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7816230005853409920&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "brown.edu;univ-toulouse.fr;brown.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Brown University;Universit\u00e9 de Toulouse", "aff_unique_dep": ";", "aff_unique_url": "https://www.brown.edu;https://www.univ-toulouse.fr", "aff_unique_abbr": "Brown;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;France" }, { "title": "Reconstructing the Mind's Eye: fMRI-to-Image with Contrastive Learning and Diffusion Priors", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70292", "id": "rwrblCYb2A", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4ddab70bf41ffe5d423840644d3357f4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rwrblCYb2A", "openreview": "https://openreview.net/forum?id=rwrblCYb2A", "poster": "/media/PosterPDFs/NeurIPS%202023/70292.png?t=1702067100.4895072", "slides": "https://nips.cc/virtual/2023/poster/70292", "video": "https://nips.cc/virtual/2023/poster/70292", "author_site": "Paul Scotti, Atmadeep Banerjee, Jimmie Goode, Stepan Shabalin, Alex Nguyen, ethan cohen, Aidan Dempster, Nathalie Verlinde, Elad Yundler, David Weisberg, Kenneth Norman, Tanishq Abraham", "tldr": "", "abstract": "We present MindEye, a novel fMRI-to-image approach to retrieve and reconstruct viewed images from brain activity. Our model comprises two parallel submodules that are specialized for retrieval (using contrastive learning) and reconstruction (using a diffusion prior). MindEye can map fMRI brain activity to any high dimensional multimodal latent space, like CLIP image space, enabling image reconstruction using generative models that accept embeddings from this latent space. We comprehensively compare our approach with other existing methods, using both qualitative side-by-side comparisons and quantitative evaluations, and show that MindEye achieves state-of-the-art performance in both reconstruction and retrieval tasks. In particular, MindEye can retrieve the exact original image even among highly similar candidates indicating that its brain embeddings retain fine-grained image-specific information. This allows us to accurately retrieve images even from large-scale databases like LAION-5B. We demonstrate through ablations that MindEye's performance improvements over previous methods result from specialized submodules for retrieval and reconstruction, improved training techniques, and training models with orders of magnitude more parameters. Furthermore, we show that MindEye can better preserve low-level image features in the reconstructions by using img2img, with outputs from a separate autoencoder. All code is available on GitHub.", "keywords": "fMRI;computational neuroscience;mind reading;diffusion models", "primary_area": "", "supplementary_material": "/attachment/4379a383911aa3261ab0f57df605b72934bcddb9.pdf", "author": "Paul Steven Scotti;Atmadeep Banerjee;Jimmie Goode;Stepan Shabalin;Alex Nguyen;Cohen Ethan;Aidan James Dempster;Nathalie Verlinde;Elad Yundler;David Weisberg;Kenneth Norman;Tanishq Mathew Abraham", "authorids": "~Paul_Steven_Scotti1;~Atmadeep_Banerjee1;~Jimmie_Goode1;~Stepan_Shabalin2;~Alex_Nguyen1;~Cohen_Ethan1;~Aidan_James_Dempster1;~Nathalie_Verlinde1;~Elad_Yundler1;~David_Weisberg1;~Kenneth_Norman1;~Tanishq_Mathew_Abraham1", "gender": "M;M;M;M;M;M;;;M;M;M;M", "homepage": "https://paulscotti.github.io/;https://atom-101.github.io;;https://neverix.github.io;https://anhhuyalex.github.io/;;;https://newcollegewest.princeton.edu/people/nathalie-verlinde-24;;http://compmem.princeton.edu/david-weisberg-2/;https://compmem.princeton.edu;https://tanishq.ai", "dblp": "323/1744;;222/3909;;;42/1136;261/3839;;;;99/6772;", "google_scholar": "AE-k4ukAAAAJ;lMSaCAkAAAAJ;qxgjXMYAAAAJ;;;;6A8gbWoAAAAJ;;;;A4ycnDQAAAAJ;hIyhkfQAAAAJ", "orcid": "0000-0003-4912-8809;;;;;;0009-0004-7831-6045;;;;;", "linkedin": "paulscotti/;;https://linkedin.com/in/jimgoo;stepan-shabalin-155a08237/;;ethan-cohen-179284105;aidan-dempster-4b3444179/;;elad-yundler-4219a717/;;;", "or_profile": "~Paul_Steven_Scotti1;~Atmadeep_Banerjee1;~Jimmie_Goode1;~Stepan_Shabalin2;~Alex_Nguyen1;~Cohen_Ethan1;~Aidan_James_Dempster1;~Nathalie_Verlinde1;~Elad_Yundler1;~David_Weisberg1;~Kenneth_Norman1;~Tanishq_Mathew_Abraham1", "aff": "Princeton University;;MedARC;Georgia Institute of Technology;Princeton University;Ecole Normale Sup\u00e9rieure de Paris;University of Toronto;Princeton University;Hebrew University of Jerusalem;Princeton University;Princeton University;University of California, Davis", "aff_domain": "princeton.edu;;medarc.ai;gatech.edu;princeton.edu;ens.fr;utoronto.ca;princeton.edu;huji.ac.il;princeton.edu;princeton.edu;ucdavis.edu", "position": "Postdoc;;Researcher;Undergrad student;PhD student;PhD student;Undergrad student;Undergrad student;MS student;Undergrad student;Full Professor;PhD student", "bibtex": "@inproceedings{\nscotti2023reconstructing,\ntitle={Reconstructing the Mind's Eye: f{MRI}-to-Image with Contrastive Learning and Diffusion Priors},\nauthor={Paul Steven Scotti and Atmadeep Banerjee and Jimmie Goode and Stepan Shabalin and Alex Nguyen and Cohen Ethan and Aidan James Dempster and Nathalie Verlinde and Elad Yundler and David Weisberg and Kenneth Norman and Tanishq Mathew Abraham},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rwrblCYb2A}\n}", "github": "", "project": "", "reviewers": "pqrP;M8r4;DnYs;Pn57", "pdf_size": 5934783, "rating": "6;7;7;8", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;4;4;4", "wc_summary": "116;113;143;93", "wc_strengths": "36;49;56;84", "wc_weaknesses": "121;91;129;36", "wc_questions": "50;177;6;113", "wc_limitations": "5;11;29;185", "wc_review": "328;441;363;511", "wc_reply_reviewers": "38;177;13;49", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 116.25, 17.795715776557007 ], "wc_strengths_avg": [ 56.25, 17.55526986406076 ], "wc_weaknesses_avg": [ 94.25, 36.49229370702806 ], "wc_questions_avg": [ 86.5, 64.62391198310421 ], "wc_limitations_avg": [ 57.5, 74.14007013754438 ], "wc_review_avg": [ 410.75, 70.87444885147256 ], "wc_reply_reviewers_avg": [ 69.25, 63.562469272362286 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 118, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15492193241387588355&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "princeton.edu;;medarc.ai;gatech.edu;princeton.edu;ens.fr;utoronto.ca;princeton.edu;huji.ac.il;princeton.edu;princeton.edu;ucdavis.edu", "author_num": 12, "aff_unique_index": "0;1;2;0;3;4;0;5;0;0;6", "aff_unique_norm": "Princeton University;MedARC;Georgia Institute of Technology;Ecole Normale Sup\u00e9rieure de Paris;University of Toronto;Hebrew University of Jerusalem;University of California, Davis", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.princeton.edu;https://www.medarc.org;https://www.gatech.edu;https://www.ens.fr;https://www.utoronto.ca;https://www.huji.ac.il;https://www.ucdavis.edu", "aff_unique_abbr": "Princeton;;Georgia Tech;ENS Paris;U of T;HUJI;UC Davis", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Paris;Jerusalem;Davis", "aff_country_unique_index": "0;0;0;0;1;2;0;3;0;0;0", "aff_country_unique": "United States;France;Canada;Israel" }, { "title": "Matrix Compression via Randomized Low Rank and Low Precision Factorization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70291", "id": "rxsCTtkqA9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3bf4b55960aaa23553cd2a6bdc6e1b57-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rxsCTtkqA9", "openreview": "https://openreview.net/forum?id=rxsCTtkqA9", "poster": "/media/PosterPDFs/NeurIPS%202023/70291.png?t=1702063126.2764084", "slides": "https://nips.cc/virtual/2023/poster/70291", "video": "https://nips.cc/virtual/2023/poster/70291", "author_site": "Rajarshi Saha, Varun Srivastava, Mert Pilanci", "tldr": "", "abstract": "Matrices are exceptionally useful in various fields of study as they provide a convenient framework to organize and manipulate data in a structured manner. However, modern matrices can involve billions of elements, making their storage and processing quite demanding in terms of computational resources and memory usage. Although prohibitively large, such matrices are often approximately low rank. We propose an algorithm that exploits this structure to obtain a low rank decomposition of any matrix $\\mathbf{A}$ as $\\mathbf{A} \\approx \\mathbf{L}\\mathbf{R}$, where $\\mathbf{L}$ and $\\mathbf{R}$ are the low rank factors. The total number of elements in $\\mathbf{L}$ and $\\mathbf{R}$ can be significantly less than that in $\\mathbf{A}$. Furthermore, the entries of $\\mathbf{L}$ and $\\mathbf{R}$ are quantized to low precision formats -- compressing $\\mathbf{A}$ by giving us a low rank and low precision factorization. Our algorithm first computes an approximate basis of the range space of $\\mathbf{A}$ by randomly sketching its columns, followed by a quantization of the vectors constituting this basis. It then computes approximate projections of the columns of $\\mathbf{A}$ onto this quantized basis. We derive upper bounds on the approximation error of our algorithm, and analyze the impact of target rank and quantization bit-budget. The tradeoff between compression ratio and approximation accuracy allows for flexibility in choosing these parameters based on specific application requirements. We empirically demonstrate the efficacy of our algorithm in image compression, nearest neighbor classification of image and text embeddings, and compressing the layers of LlaMa-$7$b. Our results illustrate that we can achieve compression ratios as aggressive as one bit per matrix coordinate, all while surpassing or maintaining the performance of traditional compression techniques.", "keywords": "Matrix compression;Randomized low rank factorization;Randomized SVD;Sketching;Quantized embeddings;Random matrices", "primary_area": "", "supplementary_material": "/attachment/b2d0b7df1ebe7a92e7c67623cb0d3d62c0cf6278.pdf", "author": "Rajarshi Saha;Varun Srivastava;Mert Pilanci", "authorids": "~Rajarshi_Saha1;varunsrivastava.v@gmail.com;~Mert_Pilanci3", "gender": "M;;M", "homepage": "https://sites.google.com/view/rajarshi-saha/;;https://stanford.edu/~pilanci/", "dblp": "232/9641;;45/8056", "google_scholar": "https://scholar.google.com/citations?hl=en;;aSAS-aAAAAAJ", "orcid": "0000-0002-7863-9811;;", "linkedin": "rajarshisaha95/;;mert-pilanci-ba615743/", "or_profile": "~Rajarshi_Saha1;varunsrivastava.v@gmail.com;~Mert_Pilanci3", "aff": "Stanford University;;Stanford University", "aff_domain": "stanford.edu;;stanford.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nsaha2023matrix,\ntitle={Matrix Compression via Randomized Low Rank and Low Precision Factorization},\nauthor={Rajarshi Saha and Varun Srivastava and Mert Pilanci},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rxsCTtkqA9}\n}", "github": "", "project": "", "reviewers": "ipPc;L5cK;DCJv;WupA;z4CA;2AWv;D7Xb", "pdf_size": 2760048, "rating": "5;5;5;6;6;7;7", "confidence": "2;4;3;2;4;4;4", "soundness": "2;3;2;3;2;3;3", "novelty": "2;3;2;3;2;3;3", "presentation": "3;3;3;2;3;3;4", "wc_summary": "67;143;55;76;85;105;155", "wc_strengths": "38;54;36;24;30;76;47", "wc_weaknesses": "83;125;61;14;500;139;13", "wc_questions": "54;162;125;24;38;153;53", "wc_limitations": "24;5;4;7;1;18;56", "wc_review": "266;489;281;145;654;491;324", "wc_reply_reviewers": "0;68;86;0;426;83;29", "wc_reply_authors": "0;159;668;0;635;72;0", "reply_reviewers": "0;1;2;0;2;1;1", "reply_authors": "1;2;3;1;3;2;1", "rating_avg": [ 5.857142857142857, 0.8329931278350429 ], "confidence_avg": [ 3.2857142857142856, 0.880630571852711 ], "soundness_avg": [ 2.5714285714285716, 0.49487165930539345 ], "novelty_avg": [ 2.5714285714285716, 0.49487165930539345 ], "presentation_avg": [ 3.0, 0.5345224838248488 ], "wc_summary_avg": [ 98.0, 35.42799538702207 ], "wc_strengths_avg": [ 43.57142857142857, 16.158652196774536 ], "wc_weaknesses_avg": [ 133.57142857142858, 156.31091465670698 ], "wc_questions_avg": [ 87.0, 53.50567393143593 ], "wc_limitations_avg": [ 16.428571428571427, 17.879414682470873 ], "wc_review_avg": [ 378.57142857142856, 160.54448172467622 ], "wc_reply_reviewers_avg": [ 98.85714285714286, 137.74555068928316 ], "wc_reply_authors_avg": [ 219.14285714285714, 278.7084644628959 ], "reply_reviewers_avg": [ 1.0, 0.7559289460184544 ], "reply_authors_avg": [ 1.8571428571428572, 0.8329931278350429 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.44513190725972585, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7093736020871277455&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "stanford.edu;;stanford.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "EgoEnv: Human-centric environment representations from egocentric video", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70290", "id": "rybsHQ4DXy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bd2605c5d854837aaf095537e82f1883-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rybsHQ4DXy", "openreview": "https://openreview.net/forum?id=rybsHQ4DXy", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70290", "video": "https://nips.cc/virtual/2023/poster/70290", "author_site": "Tushar Nagarajan, Santhosh Kumar Ramakrishnan, Ruta Desai, James Hillis, Kristen Grauman", "tldr": "", "abstract": "First-person video highlights a camera-wearer's activities in the context of their persistent environment. However, current video understanding approaches reason over visual features from short video clips that are detached from the underlying physical space and capture only what is immediately visible. To facilitate human-centric environment understanding, we present an approach that links egocentric video and the environment by learning representations that are predictive of the camera-wearer's (potentially unseen) local surroundings. We train such models using videos from agents in simulated 3D environments where the environment is fully observable, and test them on human-captured real-world videos from unseen environments. On two human-centric video tasks, we show that models equipped with our environment-aware features consistently outperform their counterparts with traditional clip features. Moreover, despite being trained exclusively on simulated videos, our approach successfully handles real-world videos from HouseTours and Ego4D, and achieves state-of-the-art results on the Ego4D NLQ challenge.", "keywords": "egocentric video;3D environment;sim2real;sim-to-real;episodic memory", "primary_area": "", "supplementary_material": "/attachment/081de6e53ea6e63d27a71aecaba0a91a17270059.pdf", "author": "Tushar Nagarajan;Santhosh Kumar Ramakrishnan;Ruta Desai;James Hillis;Kristen Grauman", "authorids": "~Tushar_Nagarajan1;~Santhosh_Kumar_Ramakrishnan1;~Ruta_Desai1;~James_Hillis1;~Kristen_Grauman1", "gender": ";M;M;F;F", "homepage": "https://tushar-n.github.io/;https://srama2512.github.io/;https://scholar.google.com/citations?user=PbY8v7oAAAAJ&hl=en;http://www.cs.utexas.edu/~grauman/;http://rutadesai.github.io/", "dblp": "207/8308;199/1913;;57/4553;128/0409", "google_scholar": "KAKqSwIAAAAJ;zr9B1YgAAAAJ;PbY8v7oAAAAJ;Jp6Mz1sAAAAJ;https://scholar.google.ca/citations?user=bwZFR4EAAAAJ", "orcid": ";;;;", "linkedin": ";;;;ruta-desai", "or_profile": "~Tushar_Nagarajan1;~Santhosh_Kumar_Ramakrishnan1;~James_Hillis1;~Kristen_Grauman1;~Ruta_Parimal_Desai1", "aff": "University of Texas, Austin;University of Texas, Austin;;University of Texas, Austin;Meta, FAIR", "aff_domain": "utexas.edu;utexas.edu;;utexas.edu;meta.com", "position": "PhD student;PhD student;;Professor;Researcher", "bibtex": "@inproceedings{\nnagarajan2023egoenv,\ntitle={EgoEnv: Human-centric environment representations from egocentric video},\nauthor={Tushar Nagarajan and Santhosh Kumar Ramakrishnan and Ruta Desai and James Hillis and Kristen Grauman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rybsHQ4DXy}\n}", "github": "", "project": "", "reviewers": "chrV;eEwQ;iTsH;ssfq;9ZdK", "pdf_size": 3137037, "rating": "7;7;7;8;8", "confidence": "4;3;4;3;3", "soundness": "3;3;3;4;4", "novelty": "3;3;3;3;4", "presentation": "3;3;4;3;3", "wc_summary": "76;119;306;71;77", "wc_strengths": "118;80;41;91;90", "wc_weaknesses": "67;129;45;67;133", "wc_questions": "69;24;21;35;83", "wc_limitations": "31;13;21;25;18", "wc_review": "361;365;434;289;401", "wc_reply_reviewers": "25;0;30;30;29", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.4, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 129.8, 89.78062151711805 ], "wc_strengths_avg": [ 84.0, 24.92388412747901 ], "wc_weaknesses_avg": [ 88.2, 35.87979933054253 ], "wc_questions_avg": [ 46.4, 25.008798451744937 ], "wc_limitations_avg": [ 21.6, 6.118823416311343 ], "wc_review_avg": [ 370.0, 48.4231349666665 ], "wc_reply_reviewers_avg": [ 22.8, 11.548160026601641 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6666666666666665, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13388925322461497776&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "utexas.edu;utexas.edu;;utexas.edu;meta.com", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Texas at Austin;Meta", "aff_unique_dep": ";Meta", "aff_unique_url": "https://www.utexas.edu;https://meta.org", "aff_unique_abbr": "UT Austin;Meta", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Private Federated Frequency Estimation: Adapting to the Hardness of the Instance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70289", "id": "rzDBoh1tBh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5bf40077b2bac53399676d33d564ef58-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rzDBoh1tBh", "openreview": "https://openreview.net/forum?id=rzDBoh1tBh", "poster": "/media/PosterPDFs/NeurIPS%202023/70289.png?t=1701539301.339692", "slides": "https://nips.cc/virtual/2023/poster/70289", "video": "https://nips.cc/virtual/2023/poster/70289", "author_site": "Jingfeng Wu, Wennan Zhu, Peter Kairouz, Vladimir Braverman", "tldr": "", "abstract": "In federated frequency estimation (FFE), multiple clients work together to estimate the frequency of their local data by communicating with a server, while maintaining the security constraint of $\\mathtt{secsum}$ where the server can only access the sum of client-held vectors. For FFE with a single communication round, it is known that count sketch is nearly information-theoretically optimal [Chen et al., 2022]. However, when multiple communication rounds are allowed, we propose a new sketch algorithm that is provably more accurate than a naive adaptation of count sketch. Furthermore, we show that both our sketch algorithm and count sketch can achieve better accuracy when the problem instance is simpler. Therefore, we propose a two-phase approach to enable the use of a smaller sketch size for simpler problems. Finally, we provide mechanisms to make our proposed algorithm differentially private. We verify the performance of our methods through experiments conducted on real datasets.", "keywords": "sketch;federated analytics;privacy", "primary_area": "", "supplementary_material": "/attachment/10d8f581778b5cc198388a734f5b6072399cf289.pdf", "author": "Jingfeng Wu;Wennan Zhu;Peter Kairouz;Vladimir Braverman", "authorids": "~Jingfeng_Wu1;~Wennan_Zhu1;~Peter_Kairouz1;~Vladimir_Braverman1", "gender": "M;F;M;Unspecified", "homepage": "https://uuujf.github.io;https://wennanzhu.github.io/;https://kairouzp.github.io/;http://www.cs.jhu.edu/~vova/", "dblp": ";203/8213;129/1254;14/4758", "google_scholar": "z-KILD8AAAAJ;wR1XK5sAAAAJ;m8NUgw0AAAAJ;https://scholar.google.com.tw/citations?user=DTthB48AAAAJ", "orcid": "0009-0009-3414-4487;;;", "linkedin": "jingfeng-wu-79205b184/;;kayrouzp;", "or_profile": "~Jingfeng_Wu1;~Wennan_Zhu1;~Peter_Kairouz1;~Vladimir_Braverman1", "aff": "Johns Hopkins University;Google;Google;Department of Computer Science, Whiting School of Engineering", "aff_domain": "jhu.edu;google.com;google.com;cs.jhu.edu", "position": "PhD student;Researcher;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nwu2023private,\ntitle={Private Federated Frequency Estimation: Adapting to the Hardness of the Instance},\nauthor={Jingfeng Wu and Wennan Zhu and Peter Kairouz and Vladimir Braverman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rzDBoh1tBh}\n}", "github": "", "project": "", "reviewers": "2XAM;vZGP;q1hW;gbH4", "pdf_size": 527129, "rating": "5;6;7;7", "confidence": "3;3;5;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;4", "wc_summary": "148;85;222;221", "wc_strengths": "20;57;56;226", "wc_weaknesses": "73;21;250;178", "wc_questions": "205;27;28;37", "wc_limitations": "1;2;28;29", "wc_review": "447;192;584;691", "wc_reply_reviewers": "0;0;256;36", "wc_reply_authors": "0;0;203;0", "reply_reviewers": "0;0;3;1", "reply_authors": "1;1;4;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 169.0, 57.03069349043548 ], "wc_strengths_avg": [ 89.75, 80.06364655697367 ], "wc_weaknesses_avg": [ 130.5, 89.20902420719554 ], "wc_questions_avg": [ 74.25, 75.58893768270593 ], "wc_limitations_avg": [ 15.0, 13.509256086106296 ], "wc_review_avg": [ 478.5, 186.65543120948826 ], "wc_reply_reviewers_avg": [ 73.0, 106.67239567948214 ], "wc_reply_authors_avg": [ 50.75, 87.90157848412052 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CARklAYSChgJ:scholar.google.com/&scioq=Private+Federated+Frequency+Estimation:+Adapting+to+the+Hardness+of+the+Instance&hl=en&as_sdt=0,14", "gs_version_total": 5, "email": "jhu.edu;google.com;google.com;cs.jhu.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Johns Hopkins University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.jhu.edu;https://www.google.com", "aff_unique_abbr": "JHU;Google", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Mountain View;Baltimore", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "GALOPA: Graph Transport Learning with Optimal Plan Alignment", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70288", "id": "rzlqOVExUA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1d35af80e775e342f4cd3792e4405837-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=rzlqOVExUA", "openreview": "https://openreview.net/forum?id=rzlqOVExUA", "poster": "/media/PosterPDFs/NeurIPS%202023/70288.png?t=1697815190.6681743", "slides": "https://nips.cc/virtual/2023/poster/70288", "video": "https://nips.cc/virtual/2023/poster/70288", "author_site": "Yejiang Wang, Yuhai Zhao, Daniel Zhengkui Wang, Ling Li", "tldr": "", "abstract": "Self-supervised learning on graph aims to learn graph representations in an unsupervised manner. While graph contrastive learning (GCL - relying on graph augmentation for creating perturbation views of anchor graphs and maximizing/minimizing similarity for positive/negative pairs) is a popular self-supervised method, it faces challenges in finding label-invariant augmented graphs and determining the exact extent of similarity between sample pairs to be achieved. In this work, we propose an alternative self-supervised solution that (i) goes beyond the label invariance assumption without distinguishing between positive/negative samples, (ii) can calibrate the encoder for preserving not only the structural information inside the graph, but the matching information between different graphs, (iii) learns isometric embeddings that preserve the distance between graphs, a by-product of our objective. Motivated by optimal transport theory, this scheme relays on an observation that the optimal transport plans between node representations at the output space, which measure the matching probability between two distributions, should be consistent to the plans between the corresponding graphs at the input space. The experimental findings include: (i) The plan alignment strategy significantly outperforms the counterpart using the transport distance; (ii) The proposed model shows superior performance using only node attributes as calibration signals, without relying on edge information; (iii) Our model maintains robust results even under high perturbation rates; (iv) Extensive experiments on various benchmarks validate the effectiveness of the proposed method.", "keywords": "graph neural network; self-supervised learning; optimal transport;", "primary_area": "", "supplementary_material": "/attachment/6f50f7132253ec76d001a3324f0f0b1237da91a2.zip", "author": "Yejiang Wang;Yuhai Zhao;Daniel Zhengkui Wang;Ling Li", "authorids": "~Yejiang_Wang1;~Yuhai_Zhao1;~Daniel_Zhengkui_Wang1;~Ling_Li10", "gender": "M;;M;F", "homepage": "https://neu.edu.cn;https://neu.edu.cn;https://www.singaporetech.edu.sg/directory/faculty/daniel-zhengkui-wang;https://www.sxu.edu.cn/", "dblp": "https://dblp.uni-trier.de/pid/281/8211;73/5946.html;07/9078;", "google_scholar": "https://scholar.google.com.hk/citations?user=LVRwELgAAAAJ;https://scholar.google.com.hk/citations?user=UNRsYLAAAAAJ;https://scholar.google.com.sg/citations?user=OuAerIgAAAAJ;", "orcid": "0000-0001-7908-4275;0000-0002-1080-0859;;0000-0002-2063-4006", "linkedin": ";;;", "or_profile": "~Yejiang_Wang1;~Yuhai_Zhao1;~Daniel_Zhengkui_Wang1;~Ling_Li10", "aff": "Northeastern University;Northeastern University;Singapore Institute of Technology;Northeastern University", "aff_domain": "neu.edu.cn;neu.edu.cn;singaporetech.edu.sg;neu.edu.cn", "position": "PhD student;Full Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nwang2023galopa,\ntitle={{GALOPA}: Graph Transport Learning with Optimal Plan Alignment},\nauthor={Yejiang Wang and Yuhai Zhao and Daniel Zhengkui Wang and Ling Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=rzlqOVExUA}\n}", "github": "", "project": "", "reviewers": "bARL;zpGL;YJ1t;q42Z", "pdf_size": 914896, "rating": "5;5;6;7", "confidence": "4;4;3;3", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "3;4;3;4", "wc_summary": "152;54;85;99", "wc_strengths": "55;63;77;172", "wc_weaknesses": "118;59;102;41", "wc_questions": "124;42;328;64", "wc_limitations": "12;1;1;13", "wc_review": "461;219;593;389", "wc_reply_reviewers": "43;0;208;10", "wc_reply_authors": "977;31;1646;34", "reply_reviewers": "1;0;2;1", "reply_authors": "3;2;6;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 97.5, 35.42950747611375 ], "wc_strengths_avg": [ 91.75, 46.99667541433117 ], "wc_weaknesses_avg": [ 80.0, 31.184932259025352 ], "wc_questions_avg": [ 139.5, 112.89264812201014 ], "wc_limitations_avg": [ 6.75, 5.7608593109014565 ], "wc_review_avg": [ 415.5, 134.99166640944915 ], "wc_reply_reviewers_avg": [ 65.25, 83.93859362653153 ], "wc_reply_authors_avg": [ 672.0, 681.840523876368 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15023808331780098924&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "neu.edu.cn;neu.edu.cn;singaporetech.edu.sg;neu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Northeastern University;Singapore Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.northeastern.edu;https://www.singaporetech.edu.sg", "aff_unique_abbr": "NEU;SIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Singapore" }, { "id": "rzu41O7us0", "title": "Deconvolving Complex Neuronal Networks into Interpretable Task-Specific Connectomes", "track": "main", "status": "Reject", "tldr": "", "abstract": "Neuronal responses associated with complex tasks are superpositions of several elementary physiological and functional responses. Important challenges in this context relate to identification of elementary responses (also known as basic functional neuronal networks), combinations of responses for given tasks, and their use in task and efficacy prediction, and physiological characterization. Task-specific functional MRI (fMRI) images provide excellent datasets for studying the neuronal basis of cognitive processes. In this work, we focus on the problem of deconvolving task-specific aggregate neuronal networks into elementary networks, to use these networks for functional characterization, and to ``explain'' these networks by mapping them to underlying physiological regions of the brain. This task poses a number of challenges due to very high dimensionality, small number of samples, acquisition variability, and noise. We propose a deconvolution method based on supervised non-negative matrix factorization (SupNMF) that identifies elementary networks as factors of a suitably constructed matrix. We show the following important results: (i) SupNMF reveals cognitive \"building blocks\" of task connectomes that are physiologically interpretable; (ii) SupNMF factors can be used to predict tasks with high accuracy; and (iii) SupNMF outperforms other supervised factoring techniques both in terms of prediction accuracy and interpretability. More broadly, our framework provides important insights into the physiological underpinnings of brain function and individual performance.", "keywords": "functional connectomics;supervised dimension reduction;supervised nonnegative matrix factorization", "primary_area": "", "supplementary_material": "/attachment/f77924661a9f7f38af24ec1f188536c5ec7fdc46.pdf", "author": "Yifan Wang;Ananth Grama;Vikram Ravindra", "authorids": "~Yifan_Wang14;~Ananth_Grama1;~Vikram_Ravindra1", "gender": "F;M;", "homepage": "https://cacayaya.github.io/;https://www.cs.purdue.edu/homes/ayg/;https://www.vikramravindra.com", "dblp": ";;155/8679", "google_scholar": "hqL5jWYAAAAJ;https://scholar.google.com.tw/citations?user=bpsZlEQAAAAJ;DiUUnAUAAAAJ", "orcid": ";;0000-0002-9315-6594", "linkedin": "yifan-wang-66521524b/;;vikramravinda/", "or_profile": "~Yifan_Wang14;~Ananth_Grama1;~Vikram_Ravindra1", "aff": "Purdue University;Purdue University;University of Cincinnati", "aff_domain": "purdue.edu;purdue.edu;uc.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\nwang2023deconvolving,\ntitle={Deconvolving Complex Neuronal Networks into Interpretable Task-Specific Connectomes},\nauthor={Yifan Wang and Ananth Grama and Vikram Ravindra},\nyear={2023},\nurl={https://openreview.net/forum?id=rzu41O7us0}\n}", "github": "", "project": "", "reviewers": "1xzo;zaHM;jVGg;vACv;Rosr", "site": "https://openreview.net/forum?id=rzu41O7us0", "pdf_size": 4191456, "rating": "4;4;4;5;7", "confidence": "4;4;4;2;4", "soundness": "2;3;2;3;4", "novelty": "2;2;3;3;4", "presentation": "2;3;3;3;4", "wc_summary": "140;65;67;48;187", "wc_strengths": "100;28;109;24;169", "wc_weaknesses": "142;119;335;33;79", "wc_questions": "95;176;131;31;85", "wc_limitations": "1;10;61;8;11", "wc_review": "478;398;703;144;531", "wc_reply_reviewers": "13;98;18;0;18", "wc_reply_authors": "0;123;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 4.8, 1.16619037896906 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 101.4, 53.248849752835035 ], "wc_strengths_avg": [ 86.0, 54.44630382312467 ], "wc_weaknesses_avg": [ 141.6, 103.56370020427042 ], "wc_questions_avg": [ 103.6, 48.33880428806654 ], "wc_limitations_avg": [ 18.2, 21.68317319951118 ], "wc_review_avg": [ 450.8, 183.13426768357692 ], "wc_reply_reviewers_avg": [ 29.4, 34.92620792470891 ], "wc_reply_authors_avg": [ 24.6, 49.2 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.0857492925712544, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:X3zo1B1rq_kJ:scholar.google.com/&scioq=Deconvolving+Complex+Neuronal+Networks+into+Interpretable+Task-Specific+Connectomes&hl=en&as_sdt=0,44", "gs_version_total": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Purdue University;University of Cincinnati", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://www.uc.edu", "aff_unique_abbr": "Purdue;UC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Focused Transformer: Contrastive Training for Context Scaling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70287", "id": "s1FjXzJ0jy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8511d06d5590f4bda24d42087802cc81-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=s1FjXzJ0jy", "openreview": "https://openreview.net/forum?id=s1FjXzJ0jy", "poster": "/media/PosterPDFs/NeurIPS%202023/70287.png?t=1701800918.2783954", "slides": "https://nips.cc/virtual/2023/poster/70287", "video": "https://nips.cc/virtual/2023/poster/70287", "author_site": "Szymon Tworkowski, Konrad Staniszewski, Miko\u0142aj Pacek, Yuhuai Wu, Henryk Michalewski, Piotr Mi\u0142o\u015b", "tldr": "", "abstract": "Large language models have an exceptional capability to incorporate new information in a contextual manner. However, the full potential of such an approach is often restrained due to a limitation in the effective context length. One solution to this issue is to endow an attention layer with access to an additional context, which comprises of (key, value) pairs. Yet, as the number of documents increases, the proportion of relevant keys to irrelevant ones decreases, leading the model to focus more on the irrelevant keys. We identify a significant challenge, dubbed the distraction issue, where keys linked to different semantic values might overlap, making them hard to distinguish. To tackle this problem, we introduce the Focused Transformer (FoT), a technique that employs a training process inspired by contrastive learning. This novel approach enhances the structure of the (key, value) space, enabling an extension of the context length. Our method allows for fine-tuning pre-existing, large-scale models to lengthen their effective context. This is demonstrated by our fine-tuning of $3 B$ and $7 B$ OpenLLaMA checkpoints. The resulting models, which we name LongLLaMA, exhibit advancements in tasks requiring a long context. We further illustrate that our LongLLaMA models adeptly manage a $256 k$ context length for passkey retrieval.", "keywords": "Transformers;Language Models;Natural Language Processing", "primary_area": "", "supplementary_material": "", "author": "Szymon Tworkowski;Konrad Staniszewski;Miko\u0142aj Pacek;Yuhuai Wu;Henryk Michalewski;Piotr Mi\u0142o\u015b", "authorids": "~Szymon_Tworkowski1;~Konrad_Staniszewski1;~Miko\u0142aj_Pacek2;~Yuhuai_Wu1;~Henryk_Michalewski1;~Piotr_Mi\u0142o\u015b1", "gender": ";;;M;M;", "homepage": "https://syzymon.github.io;;;http://www.cs.toronto.edu/~ywu/;https://www.mimuw.edu.pl/~henrykm/;", "dblp": "304/8909;;;;https://dblp.uni-trier.de/pers/hd/m/Michalewski:Henryk;208/0989.html", "google_scholar": "1V8AeXYAAAAJ;;;https://scholar.google.ca/citations?user=bOQGfFIAAAAJ;YdHW1ycAAAAJ;Se68XecAAAAJ", "orcid": ";;;;;", "linkedin": "szymon-tworkowski/;;;;henryk-michalewski-8a230a27/;piotr-milos-4b02151/", "or_profile": "~Szymon_Tworkowski1;~Konrad_Staniszewski1;~Miko\u0142aj_Pacek2;~Yuhuai_Wu1;~Henryk_Michalewski1;~Piotr_Mi\u0142o\u015b1", "aff": "University of Warsaw;;;Stanford University;Google DeepMind;IDEAS NCBR", "aff_domain": "uw.edu.pl;;;stanford.edu;google.com;ideas-ncbr.pl", "position": "MS student;;;Postdoc;Researcher;Researcher", "bibtex": "@inproceedings{\ntworkowski2023focused,\ntitle={Focused Transformer: Contrastive Training for Context Scaling},\nauthor={Szymon Tworkowski and Konrad Staniszewski and Miko{\\l}aj Pacek and Yuhuai Wu and Henryk Michalewski and Piotr Mi{\\l}o{\\'s}},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=s1FjXzJ0jy}\n}", "github": "", "project": "", "reviewers": "Sux9;v42b;GWpw;fPf8", "pdf_size": 680606, "rating": "5;6;8;8", "confidence": "4;4;4;5", "soundness": "3;2;4;4", "novelty": "2;2;3;4", "presentation": "3;2;4;3", "wc_summary": "114;295;96;74", "wc_strengths": "80;87;53;59", "wc_weaknesses": "348;202;54;81", "wc_questions": "51;133;103;70", "wc_limitations": "10;34;7;12", "wc_review": "603;751;313;296", "wc_reply_reviewers": "29;25;60;64", "wc_reply_authors": "30;0;11;177", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 144.75, 87.8958901200733 ], "wc_strengths_avg": [ 69.75, 14.13108276106258 ], "wc_weaknesses_avg": [ 171.25, 116.27419103137204 ], "wc_questions_avg": [ 89.25, 31.371762781201824 ], "wc_limitations_avg": [ 15.75, 10.685855136581255 ], "wc_review_avg": [ 490.75, 193.55409450590292 ], "wc_reply_reviewers_avg": [ 44.5, 17.613914953808536 ], "wc_reply_authors_avg": [ 54.5, 71.53495648981692 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 125, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9230341592198565561&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "uw.edu.pl;;;stanford.edu;google.com;ideas-ncbr.pl", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Warsaw;Stanford University;Google;Institute for Development, Economic Analysis, and Simulation (IDEAS)", "aff_unique_dep": ";;Google DeepMind;", "aff_unique_url": "https://www.uw.edu.pl;https://www.stanford.edu;https://deepmind.com;https://www.ideas-ncbr.gov.pl", "aff_unique_abbr": "UW;Stanford;DeepMind;IDEAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Poland;United States;United Kingdom" }, { "title": "On Certified Generalization in Structured Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70286", "id": "s1jQ91yFAb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/61674667d642ae52f6bb281bea90ee29-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=s1jQ91yFAb", "openreview": "https://openreview.net/forum?id=s1jQ91yFAb", "poster": "/media/PosterPDFs/NeurIPS%202023/70286.png?t=1699194970.6006799", "slides": "https://nips.cc/virtual/2023/poster/70286", "video": "https://nips.cc/virtual/2023/poster/70286", "author_site": "Bastian Boll, Christoph Schn\u00f6rr", "tldr": "", "abstract": "In structured prediction, target objects have rich internal structure which does not factorize into independent components and violates common i.i.d. assumptions. This challenge becomes apparent through the exponentially large output space in applications such as image segmentation or scene graph generation.\nWe present a novel PAC-Bayesian risk bound for structured prediction wherein the rate of generalization scales not only with the number of structured examples but also with their size.\nThe underlying assumption, conforming to ongoing research on generative models, is that data are generated by the Knothe-Rosenblatt rearrangement of a factorizing reference measure. This allows to explicitly distill the structure between random output variables into a Wasserstein dependency matrix. \nOur work makes a preliminary step towards leveraging powerful generative models to establish generalization bounds for discriminative downstream tasks in the challenging setting of structured prediction.", "keywords": "Structured Prediction;PAC-Bayes;Concentration Inequalities;Statistical Learning Theory;Knothe-Rosenblatt Rearrangement", "primary_area": "", "supplementary_material": "/attachment/9c012fb31115f9617faec795487d3da90e9821ba.zip", "author": "Bastian Boll;Christoph Schnoerr", "authorids": "~Bastian_Boll1;~Christoph_Schnoerr1", "gender": "M;", "homepage": "https://ipa.math.uni-heidelberg.de;https://ipa.math.uni-heidelberg.de", "dblp": "274/2194;59/5226", "google_scholar": ";https://scholar.google.de/citations?user=C-5j7CQAAAAJ", "orcid": "0000-0002-3490-3350;", "linkedin": ";", "or_profile": "~Bastian_Boll1;~Christoph_Schnoerr1", "aff": "Heidelberg University, Ruprecht-Karls-Universit\u00e4t Heidelberg;Ruprecht-Karls-Universit\u00e4t Heidelberg", "aff_domain": "iwr.uni-heidelberg.de;uni-heidelberg.de", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nboll2023on,\ntitle={On Certified Generalization in Structured Prediction},\nauthor={Bastian Boll and Christoph Schnoerr},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=s1jQ91yFAb}\n}", "github": "", "project": "", "reviewers": "H5Qp;dT9a;uxkb;kVX1;zkw9", "pdf_size": 528114, "rating": "6;6;6;6;6", "confidence": "3;3;2;4;3", "soundness": "3;3;3;2;4", "novelty": "3;3;3;3;2", "presentation": "2;3;3;3;3", "wc_summary": "46;39;24;75;187", "wc_strengths": "58;66;77;41;91", "wc_weaknesses": "36;207;38;162;64", "wc_questions": "6;5;169;54;88", "wc_limitations": "25;1;11;7;4", "wc_review": "171;318;319;339;434", "wc_reply_reviewers": "27;52;20;50;67", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 74.2, 58.78571254990451 ], "wc_strengths_avg": [ 66.6, 16.930445948054647 ], "wc_weaknesses_avg": [ 101.4, 70.02742319977226 ], "wc_questions_avg": [ 64.4, 60.901888312268284 ], "wc_limitations_avg": [ 9.6, 8.380930735902785 ], "wc_review_avg": [ 316.2, 84.2505786330278 ], "wc_reply_reviewers_avg": [ 43.2, 17.2673101553195 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ycgT2gVwwi0J:scholar.google.com/&scioq=On+Certified+Generalization+in+Structured+Prediction&hl=en&as_sdt=0,44", "gs_version_total": 7, "email": "iwr.uni-heidelberg.de;uni-heidelberg.de", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Heidelberg University;Ruprecht-Karls-Universit\u00e4t Heidelberg", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-heidelberg.de;https://www.uni-heidelberg.de/", "aff_unique_abbr": "Uni Heidelberg;Uni Heidelberg", "aff_campus_unique_index": "0", "aff_campus_unique": "Heidelberg;", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "NeuroEvoBench: Benchmarking Evolutionary Optimizers for Deep Learning Applications", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73445", "id": "s6qtLyR6uJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/660ba7851661638c559df47743c69e40-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=s6qtLyR6uJ", "openreview": "https://openreview.net/forum?id=s6qtLyR6uJ", "poster": "/media/PosterPDFs/NeurIPS%202023/73445.png?t=1699491039.4181628", "slides": "https://nips.cc/virtual/2023/poster/73445", "video": "https://nips.cc/virtual/2023/poster/73445", "author_site": "Robert Lange, Yujin Tang, Yingtao Tian", "tldr": "", "abstract": "Recently, the Deep Learning community has become interested in evolutionary optimization (EO) as a means to address hard optimization problems, e.g. meta-learning through long inner loop unrolls or optimizing non-differentiable operators. One core reason for this trend has been the recent innovation in hardware acceleration and compatible software -- making distributed population evaluations much easier than before. Unlike for gradient descent-based methods though, there is a lack of hyperparameter understanding and best practices for EO \u2013 arguably due to severely less `graduate student descent' and benchmarking being performed for EO methods. Additionally, classical benchmarks from the evolutionary community provide few practical insights for Deep Learning applications. This poses challenges for newcomers to hardware-accelerated EO and hinders significant adoption. Hence, we establish a new benchmark of EO methods (NEB) tailored toward Deep Learning applications and exhaustively evaluate traditional and meta-learned EO. We investigate core scientific questions including resource allocation, fitness shaping, normalization, regularization & scalability of EO. The benchmark is open-sourced at https://github.com/neuroevobench/neuroevobench under Apache-2.0 license.", "keywords": "Evolution;Evolution Strategies;Evolutionary Optimization;Benchmark;Neuroevolution;Black-Box Optimization", "primary_area": "", "supplementary_material": "/attachment/67b1b2f29db08b2b1e09c7fcbc1c885ef4fa5724.pdf", "author": "Robert Tjarko Lange;Yujin Tang;Yingtao Tian", "authorids": "~Robert_Tjarko_Lange1;~Yujin_Tang1;~Yingtao_Tian1", "gender": ";M;", "homepage": "https://roberttlange.github.io/;;https://alantian.net/", "dblp": "245/9152;190/1177.html;180/5335", "google_scholar": "https://scholar.google.es/citations?user=cTrc3x4AAAAJ;https://scholar.google.co.jp/citations?user=3czUzRYAAAAJ;17Fe5K0AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Robert_Tjarko_Lange1;~Yujin_Tang1;~Yingtao_Tian1", "aff": "TU Berlin;Google;Google", "aff_domain": "tu-berlin.de;google.com;google.com", "position": "PhD student;Researcher;Research Scientist", "bibtex": "@inproceedings{\nlange2023neuroevobench,\ntitle={NeuroEvoBench: Benchmarking Evolutionary Optimizers for Deep Learning Applications},\nauthor={Robert Tjarko Lange and Yujin Tang and Yingtao Tian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=s6qtLyR6uJ}\n}", "github": "", "project": "", "reviewers": "YBAt;geZa;1pB9;ezSc", "pdf_size": 11188298, "rating": "5;7;7;7", "confidence": "3;1;3;2", "wc_summary_and_contributions": "46;51;43;88", "wc_strengths": "47;45;23;36", "wc_improvement": "234;10;92;20", "wc_limitations": "19;1;1;55", "wc_correctness": "68;22;1;13", "wc_clarity": "9;1;2;17", "wc_relation_to_prior_work": "5;1;2;29", "wc_documentation": "17;8;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "446;140;166;260", "wc_reply_reviewers": "0;14;212;5", "wc_reply_authors": "1097;219;629;412", "reply_reviewers": "0;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 57.0, 18.12456896039186 ], "wc_strengths_avg": [ 37.75, 9.470348462437906 ], "wc_improvement_avg": [ 89.0, 89.49301648732151 ], "wc_limitations_avg": [ 19.0, 22.045407685048602 ], "wc_correctness_avg": [ 26.0, 25.367301787931645 ], "wc_clarity_avg": [ 7.25, 6.417748826496718 ], "wc_relation_to_prior_work_avg": [ 9.25, 11.497282287566918 ], "wc_documentation_avg": [ 6.75, 6.5717197140474575 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 253.0, 120.03749414245534 ], "wc_reply_reviewers_avg": [ 57.75, 89.19746352895916 ], "wc_reply_authors_avg": [ 589.25, 327.0675580059875 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12423498665690720675&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tu-berlin.de;google.com;google.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Technische Universit\u00e4t Berlin;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.tu-berlin.de;https://www.google.com", "aff_unique_abbr": "TU Berlin;Google", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Berlin;Mountain View", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Germany;United States" }, { "title": "Don\u2019t Stop Pretraining? Make Prompt-based Fine-tuning Powerful Learner", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70285", "id": "s7xWeJQACI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1289f9195d2ef8cfdfe5f50930c4a7c4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=s7xWeJQACI", "openreview": "https://openreview.net/forum?id=s7xWeJQACI", "poster": "/media/PosterPDFs/NeurIPS%202023/70285.png?t=1699078086.175832", "slides": "https://nips.cc/virtual/2023/poster/70285", "video": "https://nips.cc/virtual/2023/poster/70285", "author_site": "Zhengxiang Shi, Aldo Lipani", "tldr": "", "abstract": "Language models (LMs) trained on vast quantities of unlabelled data have greatly advanced the field of natural language processing (NLP). In this study, we re-visit the widely accepted notion in NLP that continued pre-training LMs on task-related texts improves the performance of fine-tuning (FT) in downstream tasks. Through experiments on eight single-sentence tasks and eight sentence-pair tasks in both semi-supervised and fully-supervised settings, we find that conventional continued pre-training does not consistently provide benefits and can even be detrimental for sentence-pair tasks or when prompt-based FT is used. To tackle these issues, we propose Prompt-based Continued Pre-training (PCP), which combines the idea of instruction tuning with conventional continued pre-training. Our approach aims to improve the performance of prompt-based FT by presenting both task-related texts and prompt templates to LMs through unsupervised pre-training objectives before fine-tuning for the target task. Our empirical evaluations on 21 benchmarks demonstrate that the PCP consistently improves the performance of state-of-the-art prompt-based FT approaches (up to 20.1% absolute) in both semi-supervised and fully-supervised settings, even with only hundreds of unlabelled examples. Additionally, prompt-based FT with PCP outperforms state-of-the-art semi-supervised approaches with greater simplicity, eliminating the need for an iterative process and extra data augmentation. Our further analysis explores the performance lower bound of the PCP and reveals that the advantages of PCP persist across different sizes of models and datasets.", "keywords": "Continued Pre-training;Prompt-based Fine-tuning;Language Models", "primary_area": "", "supplementary_material": "/attachment/4b0e33cfb362b93b64469da327295bc8961c031b.pdf", "author": "Zhengxiang Shi;Aldo Lipani", "authorids": "~Zhengxiang_Shi1;~Aldo_Lipani1", "gender": "M;M", "homepage": "https://shizhengyan.github.io/;", "dblp": "219/7021;150/5264", "google_scholar": "TF8l2ZEAAAAJ;", "orcid": "0000-0003-3074-3035;", "linkedin": "zhengxiang-shi/;", "or_profile": "~Zhengxiang_Shi1;~Aldo_Lipani1", "aff": "University College London;University College London, University of London", "aff_domain": "ucl.ac.uk;ucl.ac.uk", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nshi2023dont,\ntitle={Don{\\textquoteright}t Stop Pretraining? Make Prompt-based Fine-tuning Powerful Learner},\nauthor={Zhengxiang Shi and Aldo Lipani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=s7xWeJQACI}\n}", "github": "", "project": "", "reviewers": "TYSB;CwVD;fyTk;bM1j;pe8v", "pdf_size": 731593, "rating": "5;5;6;7;7", "confidence": "4;4;4;5;3", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;2;4;3", "wc_summary": "61;46;115;46;107", "wc_strengths": "26;56;53;78;32", "wc_weaknesses": "120;69;109;77;9", "wc_questions": "25;187;143;32;14", "wc_limitations": "13;1;29;10;1", "wc_review": "245;359;449;243;163", "wc_reply_reviewers": "0;0;0;57;0", "wc_reply_authors": "0;0;0;42;0", "reply_reviewers": "0;0;0;1;0", "reply_authors": "1;1;1;2;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 75.0, 30.006665926090488 ], "wc_strengths_avg": [ 49.0, 18.568791021496256 ], "wc_weaknesses_avg": [ 76.8, 38.886501514021546 ], "wc_questions_avg": [ 80.2, 70.8559101275257 ], "wc_limitations_avg": [ 10.8, 10.283968105745952 ], "wc_review_avg": [ 291.8, 100.38804709725157 ], "wc_reply_reviewers_avg": [ 11.4, 22.8 ], "wc_reply_authors_avg": [ 8.4, 16.8 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4847088347662590538&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ucl.ac.uk;ucl.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University College London", "aff_unique_dep": "", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Soft-Unification in Deep Probabilistic Logic", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70284", "id": "s86M8naPSv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bf215fa7fe70a38c5e967e59c44a99d0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=s86M8naPSv", "openreview": "https://openreview.net/forum?id=s86M8naPSv", "poster": "/media/PosterPDFs/NeurIPS%202023/70284.png?t=1701600788.8810675", "slides": "https://nips.cc/virtual/2023/poster/70284", "video": "https://nips.cc/virtual/2023/poster/70284", "author_site": "Jaron Maene, Luc De Raedt", "tldr": "", "abstract": "A fundamental challenge in neuro-symbolic AI is to devise primitives that fuse the logical and neural concepts. The Neural Theorem Prover has proposed the notion of soft-unification to turn the symbolic comparison between terms (i.e. unification) into a comparison in embedding space. It has been shown that soft-unification is a powerful mechanism that can be used to learn logic rules in an end-to-end differentiable manner. We study soft-unification from a conceptual point and outline several desirable properties of this operation. These include non-redundancy in the proof, well-defined proof scores, and non-sparse gradients. Unfortunately, these properties are not satisfied by previous systems such as the Neural Theorem Prover. Therefore, we introduce a more principled framework called DeepSoftLog based on probabilistic rather than fuzzy semantics. Our experiments demonstrate that DeepSoftLog can outperform the state-of-the-art on neuro-symbolic benchmarks, highlighting the benefits of these properties.", "keywords": "neuro-symbolic AI;probabilistic logic;embeddings", "primary_area": "", "supplementary_material": "/attachment/8822b7e7bf9a68d20ed340cd584691f884e5a6e8.pdf", "author": "Jaron Maene;Luc De Raedt", "authorids": "~Jaron_Maene1;~Luc_De_Raedt1", "gender": ";M", "homepage": ";https://people.cs.kuleuven.be/~luc.deraedt/", "dblp": ";r/LucDeRaedt", "google_scholar": ";https://scholar.google.com.tw/citations?user=dgobB6AAAAAJ", "orcid": ";0000-0002-6860-6303", "linkedin": ";", "or_profile": "~Jaron_Maene1;~Luc_De_Raedt1", "aff": ";KU Leuven, Belgium", "aff_domain": ";cs.kuleuven.be", "position": ";Full Professor", "bibtex": "@inproceedings{\nmaene2023softunification,\ntitle={Soft-Unification in Deep Probabilistic Logic},\nauthor={Jaron Maene and Luc De Raedt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=s86M8naPSv}\n}", "github": "", "project": "", "reviewers": "bQCz;nJ1y;4neU", "pdf_size": 514248, "rating": "6;6;7", "confidence": "4;3;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;4", "wc_summary": "43;93;75", "wc_strengths": "213;57;75", "wc_weaknesses": "3;182;219", "wc_questions": "12;84;125", "wc_limitations": "1;1;9", "wc_review": "272;417;503", "wc_reply_reviewers": "32;0;23", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 70.33333333333333, 20.677416559027762 ], "wc_strengths_avg": [ 115.0, 69.68500556073738 ], "wc_weaknesses_avg": [ 134.66666666666666, 94.31978701323611 ], "wc_questions_avg": [ 73.66666666666667, 46.70712531890135 ], "wc_limitations_avg": [ 3.6666666666666665, 3.7712361663282534 ], "wc_review_avg": [ 397.3333333333333, 95.32517447604745 ], "wc_reply_reviewers_avg": [ 18.333333333333332, 13.474255287605159 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=631425959374940545&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";cs.kuleuven.be", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "KU Leuven", "aff_unique_dep": "", "aff_unique_url": "https://www.kuleuven.be", "aff_unique_abbr": "KU Leuven", "aff_country_unique_index": "0", "aff_country_unique": "Belgium" }, { "title": "AND: Adversarial Neural Degradation for Learning Blind Image Super-Resolution", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70283", "id": "s8QsYV1VZ2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/42eb37cdbefd7abae0835f4b67548c39-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=s8QsYV1VZ2", "openreview": "https://openreview.net/forum?id=s8QsYV1VZ2", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70283", "video": "https://nips.cc/virtual/2023/poster/70283", "author_site": "Fangzhou Luo, Xiaolin Wu, Yanhui Guo", "tldr": "", "abstract": "Learnt deep neural networks for image super-resolution fail easily if the assumed degradation model in training mismatches that of the real degradation source at the inference stage. Instead of attempting to exhaust all degradation variants in simulation, which is unwieldy and impractical, we propose a novel adversarial neural degradation (AND) model that can, when trained in conjunction with a deep restoration neural network under a minmax criterion, generate a wide range of highly nonlinear complex degradation effects without any explicit supervision. The AND model has a unique advantage over the current state of the art in that it can generalize much better to unseen degradation variants and hence deliver significantly improved restoration performance on real-world images.", "keywords": "Blind Image Super-Resolution", "primary_area": "", "supplementary_material": "/attachment/cb5f6cc99eca6cf5fcf969cbb372a5735416884b.pdf", "author": "Fangzhou Luo;Xiaolin Wu;Yanhui Guo", "authorids": "~Fangzhou_Luo1;~Xiaolin_Wu2;~Yanhui_Guo1", "gender": "M;;M", "homepage": ";http://www.ece.mcmaster.ca/~xwu;", "dblp": "214/9024.html;w/XiaolinWu;", "google_scholar": "5sP_muYAAAAJ;ZuQnEIgAAAAJ;XwxwxfQAAAAJ", "orcid": "0000-0002-5945-5064;;0000-0002-9908-3795", "linkedin": ";;", "or_profile": "~Fangzhou_Luo1;~Xiaolin_Wu2;~Yanhui_Guo1", "aff": "McMaster University;McMaster University;McMaster University", "aff_domain": "mcmaster.ca;mcmaster.ca;mcmaster.ca", "position": "PhD student;Full Professor;Ph.D.", "bibtex": "@inproceedings{\nluo2023and,\ntitle={{AND}: Adversarial Neural Degradation for Learning Blind Image Super-Resolution},\nauthor={Fangzhou Luo and Xiaolin Wu and Yanhui Guo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=s8QsYV1VZ2}\n}", "github": "", "project": "", "reviewers": "BEQG;Qwb9;9QoZ;JnSR;vm11", "pdf_size": 2564133, "rating": "3;4;6;7;7", "confidence": "5;5;5;4;5", "soundness": "3;3;2;3;3", "novelty": "2;3;3;3;3", "presentation": "2;2;2;3;3", "wc_summary": "81;71;81;51;41", "wc_strengths": "25;26;52;16;49", "wc_weaknesses": "125;229;281;55;43", "wc_questions": "19;39;2;3;22", "wc_limitations": "7;9;4;5;18", "wc_review": "257;374;420;130;173", "wc_reply_reviewers": "59;136;35;90;0", "wc_reply_authors": "87;566;149;215;0", "reply_reviewers": "1;2;1;1;0", "reply_authors": "2;4;2;2;1", "rating_avg": [ 5.4, 1.624807680927192 ], "confidence_avg": [ 4.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 65.0, 16.24807680927192 ], "wc_strengths_avg": [ 33.6, 14.263239463740346 ], "wc_weaknesses_avg": [ 146.6, 94.27958421630846 ], "wc_questions_avg": [ 17.0, 13.66747965061591 ], "wc_limitations_avg": [ 8.6, 5.0039984012787215 ], "wc_review_avg": [ 270.8, 111.79516984199273 ], "wc_reply_reviewers_avg": [ 64.0, 46.523112535598905 ], "wc_reply_authors_avg": [ 203.4, 194.67573038260315 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4923659639173309, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15408822457720473567&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "email": "mcmaster.ca;mcmaster.ca;mcmaster.ca", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "McMaster University", "aff_unique_dep": "", "aff_unique_url": "https://www.mcmaster.ca", "aff_unique_abbr": "McMaster", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "RH-BrainFS: Regional Heterogeneous Multimodal Brain Networks Fusion Strategy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70282", "id": "s97ezbqoDZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b9c353d02e565f0f7cba94c4f3584eaa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=s97ezbqoDZ", "openreview": "https://openreview.net/forum?id=s97ezbqoDZ", "poster": "/media/PosterPDFs/NeurIPS%202023/70282.png?t=1696990104.3946962", "slides": "https://nips.cc/virtual/2023/poster/70282", "video": "https://nips.cc/virtual/2023/poster/70282", "author_site": "Hongting Ye, Yalu Zheng, Yueying Li, Ke Zhang, Youyong Kong, Yonggui Yuan", "tldr": "", "abstract": "Multimodal fusion has become an important research technique in neuroscience that completes downstream tasks by extracting complementary information from multiple modalities. Existing multimodal research on brain networks mainly focuses on two modalities, structural connectivity (SC) and functional connectivity (FC). Recently, extensive literature has shown that the relationship between SC and FC is complex and not a simple one-to-one mapping. The coupling of structure and function at the regional level is heterogeneous. However, all previous studies have neglected the modal regional heterogeneity between SC and FC and fused their representations via \"simple patterns\", which are inefficient ways of multimodal fusion and affect the overall performance of the model. In this paper, to alleviate the issue of regional heterogeneity of multimodal brain networks, we propose a novel Regional Heterogeneous multimodal Brain networks Fusion Strategy (RH-BrainFS). Briefly, we introduce a brain subgraph networks module to extract regional characteristics of brain networks, and further use a new transformer-based fusion bottleneck module to alleviate the issue of regional heterogeneity between SC and FC. To the best of our knowledge, this is the first paper to explicitly state the issue of structural-functional modal regional heterogeneity and to propose a\nsolution. Extensive experiments demonstrate that the proposed method outperforms several state-of-the-art methods in a variety of neuroscience tasks.", "keywords": "Multimodal;Neuroscience;Subgraph;Transformer", "primary_area": "", "supplementary_material": "/attachment/3b99e6e7d2efc1da1fc901d16e94d2b625044a2e.zip", "author": "Hongting Ye;Yalu Zheng;Yueying Li;Ke Zhang;Youyong Kong;Yonggui Yuan", "authorids": "~Hongting_Ye1;220212084@seu.edu.cn;230228504@seu.edu.cn;~Ke_Zhang12;~Youyong_Kong1;yygylh2000@sina.com", "gender": "M;;;M;M;", "homepage": ";;;;https://cse.seu.edu.cn/2023/1024/c23024a469537/page.htm;", "dblp": ";;;;154/7641;", "google_scholar": ";;;;;", "orcid": "0009-0002-6648-3255;;;;;", "linkedin": ";;;ke-zhang-45b0aa207/;;", "or_profile": "~Hongting_Ye1;220212084@seu.edu.cn;230228504@seu.edu.cn;~Ke_Zhang12;~Youyong_Kong1;yygylh2000@sina.com", "aff": "Southeast University;;;Southeast University;Southeast University;", "aff_domain": "seu.edu.cn;;;seu.edu.cn;seu.edu.cn;", "position": "MS student;;;MS student;Associate Professor;", "bibtex": "@inproceedings{\nye2023rhbrainfs,\ntitle={{RH}-Brain{FS}: Regional Heterogeneous Multimodal Brain Networks Fusion Strategy},\nauthor={Hongting Ye and Yalu Zheng and Yueying Li and Ke Zhang and Youyong Kong and Yonggui Yuan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=s97ezbqoDZ}\n}", "github": "", "project": "", "reviewers": "UEYi;kgkZ;A4mz;7Tgh;8rTt", "pdf_size": 1659461, "rating": "3;5;5;6;7", "confidence": "4;1;4;4;5", "soundness": "3;2;2;3;3", "novelty": "2;2;2;3;2", "presentation": "3;2;3;4;3", "wc_summary": "110;34;76;79;115", "wc_strengths": "109;1;86;181;111", "wc_weaknesses": "213;1;359;621;46", "wc_questions": "126;1;4;311;28", "wc_limitations": "80;1;4;52;4", "wc_review": "638;38;529;1244;304", "wc_reply_reviewers": "35;0;130;1092;15", "wc_reply_authors": "68;0;249;724;10", "reply_reviewers": "1;0;2;2;1", "reply_authors": "2;0;2;3;2", "rating_avg": [ 5.2, 1.32664991614216 ], "confidence_avg": [ 3.6, 1.3564659966250538 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 82.8, 29.04754722863877 ], "wc_strengths_avg": [ 97.6, 57.84669394183215 ], "wc_weaknesses_avg": [ 248.0, 225.60496448438363 ], "wc_questions_avg": [ 94.0, 117.65882882299994 ], "wc_limitations_avg": [ 28.2, 32.12724700312805 ], "wc_review_avg": [ 550.6, 403.10772753694516 ], "wc_reply_reviewers_avg": [ 254.4, 421.24035894011865 ], "wc_reply_authors_avg": [ 210.2, 272.02235202277035 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.9797958971132712 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.2667325346846322, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2267871498433627266&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "seu.edu.cn;;;seu.edu.cn;seu.edu.cn;", "author_num": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Southeast University", "aff_unique_dep": "", "aff_unique_url": "https://www.seu.edu.cn/", "aff_unique_abbr": "SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Doubly Robust Augmented Transfer for Meta-Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70281", "id": "sABYNWKcwK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f31bf160569618084ba9bdc2a8de29d0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sABYNWKcwK", "openreview": "https://openreview.net/forum?id=sABYNWKcwK", "poster": "/media/PosterPDFs/NeurIPS%202023/70281.png?t=1699365063.4993575", "slides": "https://nips.cc/virtual/2023/poster/70281", "video": "https://nips.cc/virtual/2023/poster/70281", "author_site": "Yuankun Jiang, Nuowen Kan, Chenglin Li, Wenrui Dai, Junni Zou, Hongkai Xiong", "tldr": "", "abstract": "Meta-reinforcement learning (Meta-RL), though enabling a fast adaptation to learn new skills by exploiting the common structure shared among different tasks, suffers performance degradation in the sparse-reward setting. Current hindsight-based sample transfer approaches can alleviate this issue by transferring relabeled trajectories from other tasks to a new task so as to provide informative experience for the target reward function, but are unfortunately constrained with the unrealistic assumption that tasks differ only in reward functions. In this paper, we propose a doubly robust augmented transfer (DRaT) approach, aiming at addressing the more general sparse reward meta-RL scenario with both dynamics mismatches and varying reward functions across tasks. Specifically, we design a doubly robust augmented estimator for efficient value-function evaluation, which tackles dynamics mismatches with the optimal importance weight of transition distributions achieved by minimizing the theoretically derived upper bound of mean squared error (MSE) between the estimated values of transferred samples and their true values in the target task. Due to its intractability, we then propose an interval-based approximation to this optimal importance weight, which is guaranteed to cover the optimum with a constrained and sample-independent upper bound on the MSE approximation error. Based on our theoretical findings, we finally develop a DRaT algorithm for transferring informative samples across tasks during the training of meta-RL. We implement DRaT on an off-policy meta-RL baseline, and empirically show that it significantly outperforms other hindsight-based approaches on various sparse-reward MuJoCo locomotion tasks with varying dynamics and reward functions.", "keywords": "Meta-reinforcement learning;doubly robust (DR);sample transfer", "primary_area": "", "supplementary_material": "/attachment/5746cdf8c7c8723a2f37e4520bcd6934934a009c.pdf", "author": "Yuankun Jiang;Nuowen Kan;Chenglin Li;Wenrui Dai;Junni Zou;Hongkai Xiong", "authorids": "~Yuankun_Jiang1;~Nuowen_Kan1;~Chenglin_Li2;~Wenrui_Dai1;~Junni_Zou1;~Hongkai_Xiong1", "gender": "M;M;M;;F;M", "homepage": "http://min.sjtu.edu.cn/;;https://min.sjtu.edu.cn/En/FacultyShow/4?Vid=17;;http://www.cs.sjtu.edu.cn/~zou-jn;http://min.sjtu.edu.cn", "dblp": ";226/2477;;16/5135.html;91/4613;21/3569", "google_scholar": ";OKrLi6UAAAAJ;ltW2JMcAAAAJ;Xg8MhyAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;bB16iN4AAAAJ", "orcid": ";0000-0002-6028-1284;;;;0000-0003-4552-0029", "linkedin": ";;;;;", "or_profile": "~Yuankun_Jiang1;~Nuowen_Kan1;~Chenglin_Li2;~Wenrui_Dai1;~Junni_Zou1;~Hongkai_Xiong1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;Full Professor;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\njiang2023doubly,\ntitle={Doubly Robust Augmented Transfer for Meta-Reinforcement Learning},\nauthor={Yuankun Jiang and Nuowen Kan and Chenglin Li and Wenrui Dai and Junni Zou and Hongkai Xiong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sABYNWKcwK}\n}", "github": "", "project": "", "reviewers": "L57L;gRwA;xMYP;cabJ", "pdf_size": 626807, "rating": "6;7;7;7", "confidence": "3;3;3;3", "soundness": "3;4;3;3", "novelty": "3;4;4;3", "presentation": "3;4;4;3", "wc_summary": "77;82;124;116", "wc_strengths": "235;178;80;55", "wc_weaknesses": "115;101;77;120", "wc_questions": "59;130;93;58", "wc_limitations": "6;7;10;37", "wc_review": "492;498;384;386", "wc_reply_reviewers": "20;20;13;30", "wc_reply_authors": "60;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 99.75, 20.522853115490545 ], "wc_strengths_avg": [ 137.0, 72.9006172813372 ], "wc_weaknesses_avg": [ 103.25, 16.67895380412093 ], "wc_questions_avg": [ 85.0, 29.5550334122633 ], "wc_limitations_avg": [ 15.0, 12.786711852544421 ], "wc_review_avg": [ 440.0, 55.04543577809154 ], "wc_reply_reviewers_avg": [ 20.75, 6.057020719792859 ], "wc_reply_authors_avg": [ 15.0, 25.98076211353316 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14158426809485652675&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SutraNets: Sub-series Autoregressive Networks for Long-Sequence, Probabilistic Forecasting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70280", "id": "sC4RbbVKbu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6171c9e600432a42688ad61a525951bf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sC4RbbVKbu", "openreview": "https://openreview.net/forum?id=sC4RbbVKbu", "poster": "/media/PosterPDFs/NeurIPS%202023/70280.png?t=1701458818.8573895", "slides": "https://nips.cc/virtual/2023/poster/70280", "video": "https://nips.cc/virtual/2023/poster/70280", "author_site": "Shane Bergsma, Tim Zeyl, Lei Guo", "tldr": "", "abstract": "We propose SutraNets, a novel method for neural probabilistic forecasting of long-sequence time series. SutraNets use an autoregressive generative model to factorize the likelihood of long sequences into products of conditional probabilities. When generating long sequences, most autoregressive approaches suffer from harmful error accumulation, as well as challenges in modeling long-distance dependencies. SutraNets treat long, univariate prediction as multivariate prediction over lower-frequency sub-series. Autoregression proceeds across time and across sub-series in order to ensure coherent multivariate (and, hence, high-frequency univariate) outputs. Since sub-series can be generated using fewer steps, SutraNets effectively reduce error accumulation and signal path distances. We find SutraNets to significantly improve forecasting accuracy over competitive alternatives on six real-world datasets, including when we vary the number of sub-series and scale up the depth and width of the underlying sequence models.", "keywords": "time series;probabilistic forecasting;autoregressive generative models;neural networks", "primary_area": "", "supplementary_material": "/attachment/43158653589a40de1333c56d5710ddaf6791cd9f.pdf", "author": "Shane Bergsma;Tim Zeyl;Lei Guo", "authorids": "~Shane_Bergsma1;~Tim_Zeyl1;~Lei_Guo6", "gender": "M;;", "homepage": "https://sites.google.com/site/shaneabergsma/;;", "dblp": "57/2540;138/9656;", "google_scholar": "https://scholar.google.ca/citations?hl=en;KJgdqxcAAAAJ;", "orcid": ";;", "linkedin": ";;guolei", "or_profile": "~Shane_Bergsma1;~Tim_Zeyl1;~Lei_Guo6", "aff": "Huawei Canada;Huawei Technologies Ltd.;Huawei Canada Research Center", "aff_domain": "huawei.com;huawei.com;huawei.com", "position": "Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nbergsma2023sutranets,\ntitle={SutraNets: Sub-series Autoregressive Networks for Long-Sequence, Probabilistic Forecasting},\nauthor={Shane Bergsma and Tim Zeyl and Lei Guo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sC4RbbVKbu}\n}", "github": "", "project": "", "reviewers": "6VCF;iCD8;BQ6D;7ywN;MDp2", "pdf_size": 484371, "rating": "4;5;6;6;6", "confidence": "5;4;4;5;4", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;2;4;3;4", "wc_summary": "47;62;68;37;61", "wc_strengths": "14;40;58;76;25", "wc_weaknesses": "191;33;187;167;41", "wc_questions": "2;9;26;193;16", "wc_limitations": "10;28;34;4;1", "wc_review": "264;172;373;477;144", "wc_reply_reviewers": "362;117;48;42;0", "wc_reply_authors": "1420;595;0;0;0", "reply_reviewers": "1;2;1;1;0", "reply_authors": "3;2;1;1;1", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 55.0, 11.331372379372237 ], "wc_strengths_avg": [ 42.6, 22.30336297512104 ], "wc_weaknesses_avg": [ 123.8, 71.38179039503002 ], "wc_questions_avg": [ 49.2, 72.33643618536927 ], "wc_limitations_avg": [ 15.4, 13.200000000000001 ], "wc_review_avg": [ 286.0, 124.6547231355475 ], "wc_reply_reviewers_avg": [ 113.8, 129.65245851891896 ], "wc_reply_authors_avg": [ 403.0, 558.2794998923031 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4082482904638631, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17695900102420772921&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "huawei.com;huawei.com;huawei.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei", "aff_unique_url": "https://www.huawei.com/ca-en/", "aff_unique_abbr": "Huawei Canada", "aff_campus_unique_index": "1", "aff_campus_unique": ";Canada", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Canada;China" }, { "title": "Q-DM: An Efficient Low-bit Quantized Diffusion Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70279", "id": "sFGkL5BsPi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f1ee1cca0721de55bb35cf28ab95e1b4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sFGkL5BsPi", "openreview": "https://openreview.net/forum?id=sFGkL5BsPi", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70279", "video": "https://nips.cc/virtual/2023/poster/70279", "author_site": "Yanjing Li, Sheng Xu, Xianbin Cao, Xiao Sun, Baochang Zhang", "tldr": "", "abstract": "Denoising diffusion generative models are capable of generating high-quality data, but suffers from the computation-costly generation process, due to a iterative noise estimation using full-precision networks. As an intuitive solution, quantization can significantly reduce the computational and memory consumption by low-bit parameters and operations. However, low-bit noise estimation networks in diffusion models (DMs) remain unexplored yet and perform much worse than the full-precision counterparts as observed in our experimental studies. In this paper, we first identify that the bottlenecks of low-bit quantized DMs come from a large distribution oscillation on activations and accumulated quantization error caused by the multi-step denoising process. To address these issues, we first develop a Timestep-aware Quantization (TaQ) method and a Noise-estimating Mimicking (NeM) scheme for low-bit quantized DMs (Q-DM) to effectively eliminate such oscillation and accumulated error respectively, leading to well-performed low-bit DMs. In this way, we propose an efficient Q-DM to calculate low-bit DMs by considering both training and inference process in the same framework. We evaluate our methods on popular DDPM and DDIM models. Extensive experimental results show that our method achieves a much better performance than the prior arts. For example, the 4-bit Q-DM theoretically accelerates the 1000-step DDPM by 7.8x and achieves a FID score of 5.17, on the unconditional CIFAR-10 dataset.", "keywords": "network quantization;diffusion model;image synthesize", "primary_area": "", "supplementary_material": "", "author": "Yanjing Li;Sheng Xu;Xianbin Cao;Xiao Sun;Baochang Zhang", "authorids": "~Yanjing_Li2;~Sheng_Xu4;~Xianbin_Cao2;~Xiao_Sun2;~Baochang_Zhang1", "gender": ";M;M;M;M", "homepage": ";;http://www.ee.buaa.edu.cn/info/1205/22851.htm;https://jimmysuen.github.io/;https://dblp.uni-trier.de/pid/80/3887-1.html", "dblp": "62/201;10/1887-7.html;22/3485;151/8845;https://dblp.uni-trier.de/pid/80/3887-1.html", "google_scholar": "2rE-GM8AAAAJ;https://scholar.google.com.hk/citations?user=ZLR31ccAAAAJ;;wYIe0tYAAAAJ;", "orcid": "0000-0003-3745-8755;0000-0002-7742-275X;;;", "linkedin": ";;;;", "or_profile": "~Yanjing_Li2;~Sheng_Xu4;~Xianbin_Cao2;~Xiao_Sun2;~Baochang_Zhang1", "aff": "Beihang University;Beihang University;Beihang University;Shanghai Artificial Intelligence Laboratory;Beihang University", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;pjlab.org.cn;buaa.edu.cn", "position": "PhD student;PhD student;Full Professor;Principal Researcher;Professor", "bibtex": "@inproceedings{\nli2023qdm,\ntitle={Q-{DM}: An Efficient Low-bit Quantized Diffusion Model},\nauthor={Yanjing Li and Sheng Xu and Xianbin Cao and Xiao Sun and Baochang Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sFGkL5BsPi}\n}", "github": "", "project": "", "reviewers": "bfDz;mdg9;acST;3MiP;6Qdf", "pdf_size": 754119, "rating": "4;4;5;7;7", "confidence": "5;4;4;4;5", "soundness": "2;2;2;3;3", "novelty": "2;2;2;3;4", "presentation": "2;2;2;4;3", "wc_summary": "76;49;57;62;124", "wc_strengths": "14;34;31;94;119", "wc_weaknesses": "112;377;109;27;71", "wc_questions": "218;38;62;191;2", "wc_limitations": "4;14;3;15;1", "wc_review": "424;512;262;389;317", "wc_reply_reviewers": "51;488;0;16;0", "wc_reply_authors": "38;911;0;13;0", "reply_reviewers": "1;2;0;1;0", "reply_authors": "2;3;1;2;1", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 73.6, 26.687824939473806 ], "wc_strengths_avg": [ 58.4, 40.637913332256616 ], "wc_weaknesses_avg": [ 139.2, 122.8338715501551 ], "wc_questions_avg": [ 102.2, 86.10783936436914 ], "wc_limitations_avg": [ 7.4, 5.885575587824865 ], "wc_review_avg": [ 380.8, 86.40694416538523 ], "wc_reply_reviewers_avg": [ 111.0, 189.4180561614969 ], "wc_reply_authors_avg": [ 192.4, 359.5678517331604 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.060192926542884564, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18389954595549727186&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;pjlab.org.cn;buaa.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Beihang University;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";", "aff_unique_url": "http://www.buaa.edu.cn/;http://www.shailab.org/", "aff_unique_abbr": "BUAA;Shanghai AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "VOCE: Variational Optimization with Conservative Estimation for Offline Safe Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70278", "id": "sIU3WujeSl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6a7c2a320f5f36bb98f8eb878c6f1180-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sIU3WujeSl", "openreview": "https://openreview.net/forum?id=sIU3WujeSl", "poster": "/media/PosterPDFs/NeurIPS%202023/70278.png?t=1699620794.8825176", "slides": "https://nips.cc/virtual/2023/poster/70278", "video": "https://nips.cc/virtual/2023/poster/70278", "author_site": "Jiayi Guan, Guang Chen, Jiaming Ji, Long Yang, ao zhou, Zhijun Li, changjun jiang", "tldr": "", "abstract": "Offline safe reinforcement learning (RL) algorithms promise to learn policies that satisfy safety constraints directly in offline datasets without interacting with the environment. This arrangement is particularly important in scenarios with high sampling costs and potential dangers, such as autonomous driving and robotics. However, the influence of safety constraints and out-of-distribution (OOD) actions have made it challenging for previous methods to achieve high reward returns while ensuring safety. In this work, we propose a Variational Optimization with Conservative Eestimation algorithm (VOCE) to solve the problem of optimizing safety policies in the offline dataset. Concretely, we reframe the problem of offline safe RL using probabilistic inference, which introduces variational distributions to make the optimization of policies more flexible. Subsequently, we utilize pessimistic estimation methods to estimate the Q-value of cost and reward, which mitigates the extrapolation errors induced by OOD actions. Finally, extensive experiments demonstrate that the VOCE algorithm achieves competitive performance across multiple experimental tasks, particularly outperforming state-of-the-art algorithms in terms of safety.", "keywords": "Offline safe reinforcement learning;Pessimistic conservative estimation;Variational optimization;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/3d9f717ab9d376873b6f246cb24212f3c4855373.zip", "author": "Jiayi Guan;Guang Chen;Jiaming Ji;Long Yang;Ao Zhou;Zhijun Li;changjun jiang", "authorids": "~Jiayi_Guan1;~Guang_Chen4;~Jiaming_Ji2;~Long_Yang4;~Ao_Zhou1;~Zhijun_Li2;~changjun_jiang2", "gender": "M;M;M;M;;M;M", "homepage": ";;https://jijiaming.com;https://person.zju.edu.cn/longyang;https://github.com/aozhouza;;https://cs.tongji.edu.cn/info/1033/2865.htm", "dblp": ";09/4891-1;313/9356.html;;;;", "google_scholar": "PbNNo9cAAAAJ;https://scholar.google.com.hk/citations?user=kBhIyv4AAAAJ;aW8WbYYAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";0000-0002-7416-592X;;;;;0000-0002-2543-8928", "linkedin": ";guang-chen-2879064a;;;;;", "or_profile": "~Jiayi_Guan1;~Guang_Chen4;~Jiaming_Ji2;~Long_Yang4;~Ao_Zhou1;~Zhijun_Li2;~changjun_jiang2", "aff": "Tongji University;Tongji University;Peking University;Peking University;Tongji University;University of Science and Technology of China, Tsinghua University;Tongji University", "aff_domain": "tongji.edu.cn;tongji.edu.cn;pku.edu.cn;pku.edu.cn;tongji.edu.cn;ustc.edu.cn;tongji.edu.cn", "position": "PhD student;Full Professor;PhD student;Postdoc;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nguan2023voce,\ntitle={{VOCE}: Variational Optimization with Conservative Estimation for Offline Safe Reinforcement Learning},\nauthor={Jiayi Guan and Guang Chen and Jiaming Ji and Long Yang and Ao Zhou and Zhijun Li and changjun jiang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sIU3WujeSl}\n}", "github": "", "project": "", "reviewers": "GTEu;wQ6W;kEdv;cmps", "pdf_size": 3759672, "rating": "5;7;7;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "78;46;105;80", "wc_strengths": "67;18;101;48", "wc_weaknesses": "89;83;148;61", "wc_questions": "77;73;124;32", "wc_limitations": "2;9;6;5", "wc_review": "313;229;484;226", "wc_reply_reviewers": "0;13;33;136", "wc_reply_authors": "0;25;49;508", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 77.25, 20.94486810653149 ], "wc_strengths_avg": [ 58.5, 30.120590963658067 ], "wc_weaknesses_avg": [ 95.25, 32.18986641786511 ], "wc_questions_avg": [ 76.5, 32.592176975464525 ], "wc_limitations_avg": [ 5.5, 2.5 ], "wc_review_avg": [ 313.0, 104.72105805424236 ], "wc_reply_reviewers_avg": [ 45.5, 53.55604541039228 ], "wc_reply_authors_avg": [ 145.5, 210.00535707452798 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9702431719402942168&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "tongji.edu.cn;tongji.edu.cn;pku.edu.cn;pku.edu.cn;tongji.edu.cn;ustc.edu.cn;tongji.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;1;0;2;0", "aff_unique_norm": "Tongji University;Peking University;University of Science and Technology of China", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tongji.edu.cn;http://www.pku.edu.cn;http://www.ustc.edu.cn/", "aff_unique_abbr": "Tongji;Peking U;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Cross-links Matter for Link Prediction: Rethinking the Debiased GNN from a Data Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70277", "id": "sJDkwMVqb9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fba4a59c7a569fce120eea9aa9227052-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sJDkwMVqb9", "openreview": "https://openreview.net/forum?id=sJDkwMVqb9", "poster": "/media/PosterPDFs/NeurIPS%202023/70277.png?t=1699805178.4435444", "slides": "https://nips.cc/virtual/2023/poster/70277", "video": "https://nips.cc/virtual/2023/poster/70277", "author_site": "Zihan Luo, Hong Huang, Jianxun Lian, Xiran Song, Xing Xie, Hai Jin", "tldr": "", "abstract": "Recently, the bias-related issues in GNN-based link prediction have raised widely spread concerns. In this paper, we emphasize the bias on links across different node clusters, which we call cross-links, after considering its significance in both easing information cocoons and preserving graph connectivity. Instead of following the objective-oriented mechanism in prior works with compromised utility, we empirically find that existing GNN models face severe data bias between internal-links (links within the same cluster) and cross-links, and this inspires us to rethink the bias issue on cross-links from a data perspective. Specifically, we design a simple yet effective twin-structure framework, which can be easily applied to most of GNNs to mitigate the bias as well as boost their utility in an end-to-end manner. The basic idea is to generate debiased node embeddings as demonstrations, and fuse them into the embeddings of original GNNs. In particular, we learn debiased node embeddings with the help of augmented supervision signals, and a novel dynamic training strategy is designed to effectively fuse debiased node embeddings with the original node embeddings. Experiments on three datasets with six common GNNs show that our framework can not only alleviate the bias between internal-links and cross-links, but also boost the overall accuracy. Comparisons with other state-of-the-art methods also verify the superiority of our method.", "keywords": "Cross-links;Debias;Graph Neural Networks;Link Prediction", "primary_area": "", "supplementary_material": "/attachment/739f819b4b8a74d9dca1adbe66123b057c9c5442.pdf", "author": "Zihan Luo;Hong Huang;Jianxun Lian;Xiran Song;Xing Xie;Hai Jin", "authorids": "~Zihan_Luo2;~Hong_Huang2;~Jianxun_Lian1;~Xiran_Song1;~Xing_Xie3;~Hai_Jin1", "gender": "M;;M;M;M;M", "homepage": "https://luozhhh.github.io/;http://faculty.hust.edu.cn/honghuang/en/index.htm;https://www.microsoft.com/en-us/research/people/jialia/;https://xiransong.info;http://research.microsoft.com/en-us/people/xingx/;http://www.linkedin.com/in/jinhust", "dblp": "167/1837-1;74/3859-1;161/0030;326/4622;08/6809-1;98/4156", "google_scholar": "JWUJkawAAAAJ;-3d0B50AAAAJ;tSq7dIkAAAAJ;g2IkfUcAAAAJ;5EQfAFIAAAAJ;", "orcid": "0000-0002-7142-448X;0000-0002-5282-551X;0000-0003-3108-5601;0000-0002-6737-8513;0000-0002-8608-8482;0000-0002-3934-7605", "linkedin": ";;;;xingx/;jinhust", "or_profile": "~Zihan_Luo2;~Hong_Huang2;~Jianxun_Lian1;~Xiran_Song1;~Xing_Xie3;~Hai_Jin1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Microsoft Research;Huazhong University of Science and Technology;Microsoft Research Asia;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;microsoft.com;hust.edu.cn;microsoft.com;hust.edu.cn", "position": "PhD student;Associate Professor;Researcher;MS student;Senior Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nluo2023crosslinks,\ntitle={Cross-links Matter for Link Prediction: Rethinking the Debiased {GNN} from a Data Perspective},\nauthor={Zihan Luo and Hong Huang and Jianxun Lian and Xiran Song and Xing Xie and Hai Jin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sJDkwMVqb9}\n}", "github": "", "project": "", "reviewers": "a1aW;Lg87;Nju4;sQxV;6Hpo", "pdf_size": 2201726, "rating": "5;6;6;6;6", "confidence": "3;3;4;3;3", "soundness": "3;2;2;2;2", "novelty": "3;2;3;2;2", "presentation": "3;2;3;3;2", "wc_summary": "95;103;93;103;73", "wc_strengths": "114;33;67;32;63", "wc_weaknesses": "136;350;79;21;43", "wc_questions": "2;86;44;122;50", "wc_limitations": "40;31;21;1;32", "wc_review": "387;603;304;279;261", "wc_reply_reviewers": "80;20;53;48;39", "wc_reply_authors": "0;11;0;24;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;2;1;2;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 93.4, 10.983624174196786 ], "wc_strengths_avg": [ 61.8, 29.90250825599752 ], "wc_weaknesses_avg": [ 125.8, 118.65816448942735 ], "wc_questions_avg": [ 60.8, 40.58768286069063 ], "wc_limitations_avg": [ 25.0, 13.431306712304652 ], "wc_review_avg": [ 366.8, 125.73368681463214 ], "wc_reply_reviewers_avg": [ 48.0, 19.56527536223296 ], "wc_reply_authors_avg": [ 7.0, 9.50789145920377 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.25000000000000006, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4452001848663629139&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "hust.edu.cn;hust.edu.cn;microsoft.com;hust.edu.cn;microsoft.com;hust.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;1;0", "aff_unique_norm": "Huazhong University of Science and Technology;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "http://www.hust.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "HUST;MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "ELDEN: Exploration via Local Dependencies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70276", "id": "sL4pJBXkxu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/31ed129feae64a7e44a15b148c15558d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sL4pJBXkxu", "openreview": "https://openreview.net/forum?id=sL4pJBXkxu", "poster": "/media/PosterPDFs/NeurIPS%202023/70276.png?t=1701487172.2452188", "slides": "https://nips.cc/virtual/2023/poster/70276", "video": "https://nips.cc/virtual/2023/poster/70276", "author_site": "Zizhao Wang, Jiaheng Hu, Peter Stone, Roberto Mart\u00edn-Mart\u00edn", "tldr": "", "abstract": "Tasks with large state space and sparse rewards present a longstanding challenge to reinforcement learning. In these tasks, an agent needs to explore the state space efficiently until it finds a reward. To deal with this problem, the community has proposed to augment the reward function with intrinsic reward, a bonus signal that encourages the agent to visit interesting states. In this work, we propose a new way of defining interesting states for environments with factored state spaces and complex chained dependencies, where an agent's actions may change the value of one entity that, in order, may affect the value of another entity. Our insight is that, in these environments, interesting states for exploration are states where the agent is uncertain whether (as opposed to how) entities such as the agent or objects have some influence on each other. We present ELDEN, Exploration via Local DepENdencies, a novel intrinsic reward that encourages the discovery of new interactions between entities. ELDEN utilizes a novel scheme --- the partial derivative of the learned dynamics to model the local dependencies between entities accurately and computationally efficiently. The uncertainty of the predicted dependencies is then used as an intrinsic reward to encourage exploration toward new interactions. We evaluate the performance of ELDEN on four different domains with complex dependencies, ranging from 2D grid worlds to 3D robotic tasks. In all domains, ELDEN correctly identifies local dependencies and learns successful policies, significantly outperforming previous state-of-the-art exploration methods.", "keywords": "reinforcement learning; intrinsic motivation; exploration", "primary_area": "", "supplementary_material": "/attachment/21ec7994023e4947b3fe978d65ee5d8402cdd4cb.pdf", "author": "Zizhao Wang;Jiaheng Hu;Peter Stone;Roberto Mart\u00edn-Mart\u00edn", "authorids": "~Zizhao_Wang3;~Jiaheng_Hu1;~Peter_Stone1;~Roberto_Mart\u00edn-Mart\u00edn1", "gender": "M;M;M;M", "homepage": ";https://jiahenghu.github.io/;http://www.cs.utexas.edu/~pstone;https://robertomartinmartin.com/", "dblp": "245/5008;;s/PeterStone;153/7670", "google_scholar": "https://scholar.google.ca/citations?user=V4KQIWsAAAAJ;;qnwjcfAAAAAJ;XOJE8OEAAAAJ", "orcid": ";;0000-0002-6795-420X;0000-0002-9586-2759", "linkedin": ";;;", "or_profile": "~Zizhao_Wang3;~Jiaheng_Hu1;~Peter_Stone1;~Roberto_Mart\u00edn-Mart\u00edn1", "aff": "University of Texas at Austin;University of Texas at Austin;University of Texas, Austin;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023elden,\ntitle={{ELDEN}: Exploration via Local Dependencies},\nauthor={Zizhao Wang and Jiaheng Hu and Peter Stone and Roberto Mart{\\'\\i}n-Mart{\\'\\i}n},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sL4pJBXkxu}\n}", "github": "", "project": "", "reviewers": "xY6d;G2vP;FZmh;kab1", "pdf_size": 6487783, "rating": "5;5;5;7", "confidence": "4;4;3;4", "soundness": "3;2;3;4", "novelty": "2;2;3;3", "presentation": "2;3;4;4", "wc_summary": "614;31;34;77", "wc_strengths": "31;55;46;80", "wc_weaknesses": "31;246;133;157", "wc_questions": "25;26;35;36", "wc_limitations": "3;45;11;6", "wc_review": "704;403;259;356", "wc_reply_reviewers": "14;291;177;216", "wc_reply_authors": "0;78;414;27", "reply_reviewers": "1;2;2;2", "reply_authors": "1;2;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 189.0, 246.04775959150695 ], "wc_strengths_avg": [ 53.0, 17.790446874657196 ], "wc_weaknesses_avg": [ 141.75, 76.55512719602783 ], "wc_questions_avg": [ 30.5, 5.024937810560445 ], "wc_limitations_avg": [ 16.25, 16.843025262701474 ], "wc_review_avg": [ 430.5, 166.2234941276353 ], "wc_reply_reviewers_avg": [ 174.5, 101.31757004587112 ], "wc_reply_authors_avg": [ 129.75, 166.48479660317335 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8477777772376160668&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "utexas.edu;utexas.edu;utexas.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "On skip connections and normalisation layers in deep optimisation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70275", "id": "sLhXMkI0kx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2f4d6f8e0f4f543db12260696b2a3551-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sLhXMkI0kx", "openreview": "https://openreview.net/forum?id=sLhXMkI0kx", "poster": "/media/PosterPDFs/NeurIPS%202023/70275.png?t=1702307934.9233623", "slides": "https://nips.cc/virtual/2023/poster/70275", "video": "https://nips.cc/virtual/2023/poster/70275", "author_site": "Lachlan MacDonald, Jack Valmadre, Hemanth Saratchandran, Simon Lucey", "tldr": "", "abstract": "We introduce a general theoretical framework, designed for the study of gradient optimisation of deep neural networks, that encompasses ubiquitous architecture choices including batch normalisation, weight normalisation and skip connections. Our framework determines the curvature and regularity properties of multilayer loss landscapes in terms of their constituent layers, thereby elucidating the roles played by normalisation layers and skip connections in globalising these properties. We then demonstrate the utility of this framework in two respects. First, we give the only proof of which we are aware that a class of deep neural networks can be trained using gradient descent to global optima even when such optima only exist at infinity, as is the case for the cross-entropy cost. Second, we identify a novel causal mechanism by which skip connections accelerate training, which we verify predictively with ResNets on MNIST, CIFAR10, CIFAR100 and ImageNet.", "keywords": "optimisation;optimization;skip;connection;normalisation;normalization;deep;learning;polyak;lojasiewicz;lipschitz", "primary_area": "", "supplementary_material": "/attachment/d18d7df4ba234f817973162e60ad727ea2c50b79.zip", "author": "Lachlan Ewen MacDonald;Jack Valmadre;Hemanth Saratchandran;Simon Lucey", "authorids": "~Lachlan_Ewen_MacDonald1;~Jack_Valmadre1;~Hemanth_Saratchandran1;~Simon_Lucey2", "gender": ";M;;M", "homepage": "https://researchers.adelaide.edu.au/profile/lachlan.macdonald;https://jack.valmadre.net/;;https://www.adelaide.edu.au/directory/simon.lucey", "dblp": "306/7691;50/8535;;01/3542", "google_scholar": "r953DlQAAAAJ;_VSBqL0AAAAJ;;vmAe35UAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Lachlan_Ewen_MacDonald1;~Jack_Valmadre1;~Hemanth_Saratchandran1;~Simon_Lucey2", "aff": "University of Adelaide;University of Adelaide;;University of Adelaide", "aff_domain": "adelaide.edu.au;adelaide.edu.au;;adelaide.edu.au", "position": "Postdoc;Lecturer;;Full Professor", "bibtex": "@inproceedings{\nmacdonald2023on,\ntitle={On skip connections and normalisation layers in deep optimisation},\nauthor={Lachlan Ewen MacDonald and Jack Valmadre and Hemanth Saratchandran and Simon Lucey},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sLhXMkI0kx}\n}", "github": "", "project": "", "reviewers": "9ZMW;aLTj;kvh8;LNGS", "pdf_size": 521235, "rating": "4;6;6;7", "confidence": "4;4;3;3", "soundness": "2;4;3;3", "novelty": "1;2;3;2", "presentation": "3;4;2;3", "wc_summary": "158;94;86;127", "wc_strengths": "87;26;133;89", "wc_weaknesses": "131;410;327;318", "wc_questions": "62;62;3;121", "wc_limitations": "38;1;1;19", "wc_review": "476;593;550;674", "wc_reply_reviewers": "260;171;15;183", "wc_reply_authors": "77;129;0;360", "reply_reviewers": "2;1;1;2", "reply_authors": "4;3;1;3", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 116.25, 28.586491565073178 ], "wc_strengths_avg": [ 83.75, 38.07476198218447 ], "wc_weaknesses_avg": [ 296.5, 102.06003135410062 ], "wc_questions_avg": [ 62.0, 41.71930009000631 ], "wc_limitations_avg": [ 14.75, 15.303185942802891 ], "wc_review_avg": [ 573.25, 71.6567338077867 ], "wc_reply_reviewers_avg": [ 157.25, 88.94485651233578 ], "wc_reply_authors_avg": [ 141.5, 134.23952473098228 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:W7uXPk-tLpAJ:scholar.google.com/&scioq=On+skip+connections+and+normalisation+layers+in+deep+optimisation&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "adelaide.edu.au;adelaide.edu.au;;adelaide.edu.au", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Adelaide", "aff_unique_dep": "", "aff_unique_url": "https://www.adelaide.edu.au", "aff_unique_abbr": "Adelaide", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "Error Bounds for Learning with Vector-Valued Random Features", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70274", "id": "sLr1sohnmo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e34d908241aef40440e61d2a27715424-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sLr1sohnmo", "openreview": "https://openreview.net/forum?id=sLr1sohnmo", "poster": "/media/PosterPDFs/NeurIPS%202023/70274.png?t=1698980154.142277", "slides": "https://nips.cc/virtual/2023/poster/70274", "video": "https://nips.cc/virtual/2023/poster/70274", "author_site": "Samuel Lanthaler, Nicholas H. Nelsen", "tldr": "", "abstract": "This paper provides a comprehensive error analysis of learning with vector-valued random features (RF). The theory is developed for RF ridge regression in a fully general infinite-dimensional input-output setting, but nonetheless applies to and improves existing finite-dimensional analyses. In contrast to comparable work in the literature, the approach proposed here relies on a direct analysis of the underlying risk functional and completely avoids the explicit RF ridge regression solution formula in terms of random matrices. This removes the need for concentration results in random matrix theory or their generalizations to random operators. The main results established in this paper include strong consistency of vector-valued RF estimators under model misspecification and minimax optimal convergence rates in the well-specified setting. The parameter complexity (number of random features) and sample complexity (number of labeled data) required to achieve such rates are comparable with Monte Carlo intuition and free from logarithmic factors.", "keywords": "random features;random feature model;operator learning;vector-valued", "primary_area": "", "supplementary_material": "", "author": "Samuel Lanthaler;Nicholas H. Nelsen", "authorids": "~Samuel_Lanthaler1;~Nicholas_H._Nelsen1", "gender": "M;", "homepage": "https://slanthaler.github.io/;https://www.nicholashnelsen.com/", "dblp": ";265/6175", "google_scholar": "v-Jv3LoAAAAJ;cfVmdyYAAAAJ", "orcid": "0000-0003-1911-246X;0000-0002-8328-1199", "linkedin": ";nickhnelsen/", "or_profile": "~Samuel_Lanthaler1;~Nicholas_H_Nelsen1", "aff": "California Institute of Technology;California Institute of Technology", "aff_domain": "caltech.edu;caltech.edu", "position": "Postdoc;PhD student", "bibtex": "@inproceedings{\nlanthaler2023error,\ntitle={Error Bounds for Learning with Vector-Valued Random Features},\nauthor={Samuel Lanthaler and Nicholas H. Nelsen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sLr1sohnmo}\n}", "github": "", "project": "", "reviewers": "HQhU;1Snc;1g4u;ykP5;efr3", "pdf_size": 719532, "rating": "6;6;6;8;9", "confidence": "4;4;3;4;3", "soundness": "3;3;3;4;4", "novelty": "3;3;3;4;4", "presentation": "3;2;3;4;4", "wc_summary": "97;79;309;72;97", "wc_strengths": "57;72;117;33;126", "wc_weaknesses": "181;190;153;11;86", "wc_questions": "52;118;265;125;71", "wc_limitations": "9;17;22;25;11", "wc_review": "396;476;866;266;391", "wc_reply_reviewers": "12;37;0;16;24", "wc_reply_authors": "22;14;0;30;23", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 7.0, 1.2649110640673518 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 130.8, 89.64463174111432 ], "wc_strengths_avg": [ 81.0, 35.44573317058063 ], "wc_weaknesses_avg": [ 124.2, 67.31834816749442 ], "wc_questions_avg": [ 126.2, 74.68172467210435 ], "wc_limitations_avg": [ 16.8, 6.144916598294887 ], "wc_review_avg": [ 479.0, 204.83163818121457 ], "wc_reply_reviewers_avg": [ 17.8, 12.33531515608742 ], "wc_reply_authors_avg": [ 17.8, 10.244998779892558 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3227486121839514, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9712817331021301949&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "caltech.edu;caltech.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "California Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.caltech.edu", "aff_unique_abbr": "Caltech", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pasadena", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Projection-Free Online Convex Optimization via Efficient Newton Iterations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70273", "id": "sOOg1xJADA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/03261886741f1f21f52f2a2d570616a2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sOOg1xJADA", "openreview": "https://openreview.net/forum?id=sOOg1xJADA", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70273", "video": "https://nips.cc/virtual/2023/poster/70273", "author_site": "Khashayar Gatmiry, Zak Mhammedi", "tldr": "", "abstract": "This paper presents new projection-free algorithms for Online Convex Optimization (OCO) over a convex domain $\\mathcal{K} \\subset \\mathbb{R}^d$. Classical OCO algorithms (such as Online Gradient Descent) typically need to perform Euclidean projections onto the convex set $\\mathcal{K}$ to ensure feasibility of their iterates. Alternative algorithms, such as those based on the Frank-Wolfe method, swap potentially-expensive Euclidean projections onto $\\mathcal{K}$ for linear optimization over $\\mathcal{K}$. However, such algorithms have a sub-optimal regret in OCO compared to projection-based algorithms. In this paper, we look at a third type of algorithms that output approximate Newton iterates using a self-concordant barrier for the set of interest. The use of a self-concordant barrier automatically ensures feasibility without the need of projections. However, the computation of the Newton iterates requires a matrix inverse, which can still be expensive. As our main contribution, we show how the stability of the Newton iterates can be leveraged to only compute the inverse Hessian a vanishing fractions of the rounds, leading to a new efficient projection-free OCO algorithm with a state-of-the-art regret bound.", "keywords": "Online Learning;Online convex optimization;projection-free;Newton method", "primary_area": "", "supplementary_material": "", "author": "Khashayar Gatmiry;Zakaria Mhammedi", "authorids": "~Khashayar_Gatmiry1;~Zakaria_Mhammedi1", "gender": "M;M", "homepage": "http://ce.sharif.edu/~kgatmiry/;", "dblp": ";192/1360", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Khashayar_Gatmiry1;~Zakaria_Mhammedi1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu", "position": "PhD student;Postdoc", "bibtex": "@inproceedings{\ngatmiry2023projectionfree,\ntitle={Projection-Free Online Convex Optimization via Efficient Newton Iterations},\nauthor={Khashayar Gatmiry and Zakaria Mhammedi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sOOg1xJADA}\n}", "github": "", "project": "", "reviewers": "kmvY;eL2Y;y1W5;WCK3", "pdf_size": 523195, "rating": "4;6;7;8", "confidence": "3;3;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;2;3", "wc_summary": "28;32;42;75", "wc_strengths": "12;224;45;26", "wc_weaknesses": "63;2;109;185", "wc_questions": "1;2;10;4", "wc_limitations": "1;2;15;24", "wc_review": "105;262;221;314", "wc_reply_reviewers": "22;15;66;8", "wc_reply_authors": "57;0;77;9", "reply_reviewers": "1;1;2;1", "reply_authors": "2;1;3;2", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 44.25, 18.471261462065875 ], "wc_strengths_avg": [ 76.75, 85.81775748643167 ], "wc_weaknesses_avg": [ 89.75, 66.81831709943015 ], "wc_questions_avg": [ 4.25, 3.491060010942235 ], "wc_limitations_avg": [ 10.5, 9.5524865872714 ], "wc_review_avg": [ 225.5, 76.98214078602906 ], "wc_reply_reviewers_avg": [ 27.75, 22.63155982251334 ], "wc_reply_authors_avg": [ 35.75, 32.19763190049852 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8451542547285166, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5375767983482395779&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "mit.edu;mit.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Contrast Everything: A Hierarchical Contrastive Framework for Medical Time-Series", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70272", "id": "sOQBHlCmzp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ae7d9c77b5ff9e3b7833a68523b880f2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sOQBHlCmzp", "openreview": "https://openreview.net/forum?id=sOQBHlCmzp", "poster": "/media/PosterPDFs/NeurIPS%202023/70272.png?t=1699894110.8196738", "slides": "https://nips.cc/virtual/2023/poster/70272", "video": "https://nips.cc/virtual/2023/poster/70272", "author_site": "Yihe Wang, Yu Han, Haishuai Wang, Xiang Zhang", "tldr": "", "abstract": "Contrastive representation learning is crucial in medical time series analysis as it alleviates dependency on labor-intensive, domain-specific, and scarce expert annotations. However, existing contrastive learning methods primarily focus on one single data level, which fails to fully exploit the intricate nature of medical time series. To address this issue, we present COMET, an innovative hierarchical framework that leverages data consistencies at all inherent levels in medical time series. Our meticulously designed model systematically captures data consistency from four potential levels: observation, sample, trial, and patient levels. By developing contrastive loss at multiple levels, we can learn effective representations that preserve comprehensive data consistency, maximizing information utilization in a self-supervised manner. We conduct experiments in the challenging patient-independent setting. We compare COMET against six baselines using three diverse datasets, which include ECG signals for myocardial infarction and EEG signals for Alzheimer\u2019s and Parkinson\u2019s diseases. The results demonstrate that COMET consistently outperforms all baselines, particularly in setup with 10% and 1% labeled data fractions across all datasets. These results underscore the significant impact of our framework in advancing contrastive representation learning techniques for medical time series. The source code is available at https://github.com/DL4mHealth/COMET.", "keywords": "Deep Learning;Contrastive Learning;Self-supervised Learning;Time Series;Healthcare", "primary_area": "", "supplementary_material": "/attachment/150c52bcdad75fc2c48409375f9dbfd3995d333b.pdf", "author": "Yihe Wang;Yu Han;Haishuai Wang;Xiang Zhang", "authorids": "~Yihe_Wang2;hanyu21@mails.ucas.ac.cn;~Haishuai_Wang2;~Xiang_Zhang10", "gender": "M;;M;M", "homepage": "https://webpages.charlotte.edu/ywang145/;;https://www.linkedin.com/in/haishuai-wang-b5241775/;http://xiangzhang.info/", "dblp": ";;163/0767;https://dblp.uni-trier.de/pers/hd/z/Zhang_0012:Xiang", "google_scholar": "1JRRcVkAAAAJ;;;0hCzMi4AAAAJ", "orcid": "0009-0000-5220-5452;;0000-0003-1617-0920;", "linkedin": "yihe-wang-4a8a641b5/;;;", "or_profile": "~Yihe_Wang2;hanyu21@mails.ucas.ac.cn;~Haishuai_Wang2;~Xiang_Zhang10", "aff": "University of North Carolina at Charlotte;;Zhejiang University;University of North Carolina at Charlotte", "aff_domain": "uncc.edu;;zju.edu.cn;uncc.edu", "position": "PhD student;;Research Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023contrast,\ntitle={Contrast Everything: A Hierarchical Contrastive Framework for Medical Time-Series},\nauthor={Yihe Wang and Yu Han and Haishuai Wang and Xiang Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sOQBHlCmzp}\n}", "github": "", "project": "", "reviewers": "Trt3;jbn1;p5Bk;RVrY", "pdf_size": 774215, "rating": "4;5;6;8", "confidence": "5;4;4;5", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "55;39;52;87", "wc_strengths": "23;33;62;116", "wc_weaknesses": "141;225;155;247", "wc_questions": "173;2;4;75", "wc_limitations": "7;1;10;25", "wc_review": "399;300;283;550", "wc_reply_reviewers": "139;0;67;18", "wc_reply_authors": "758;0;67;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 58.25, 17.65467360219384 ], "wc_strengths_avg": [ 58.5, 36.15591237958185 ], "wc_weaknesses_avg": [ 192.0, 44.955533585978046 ], "wc_questions_avg": [ 63.5, 69.72266489456639 ], "wc_limitations_avg": [ 10.75, 8.842369591913696 ], "wc_review_avg": [ 383.0, 106.10607899644582 ], "wc_reply_reviewers_avg": [ 56.0, 53.82843114934709 ], "wc_reply_authors_avg": [ 206.25, 319.72517495499164 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.16903085094570333, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8719123322206469677&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uncc.edu;;zju.edu.cn;uncc.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of North Carolina at Charlotte;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uncc.edu;https://www.zju.edu.cn", "aff_unique_abbr": "UNCC;ZJU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Charlotte;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "title": "A Measure-Theoretic Axiomatisation of Causality", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70271", "id": "sPLTQSf6GI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5aadf1e309cc03cab3ec35afb7c9d0c8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sPLTQSf6GI", "openreview": "https://openreview.net/forum?id=sPLTQSf6GI", "poster": "/media/PosterPDFs/NeurIPS%202023/70271.png?t=1699632699.5679991", "slides": "https://nips.cc/virtual/2023/poster/70271", "video": "https://nips.cc/virtual/2023/poster/70271", "author_site": "Junhyung Park, Simon Buchholz, Bernhard Sch\u00f6lkopf, Krikamol Muandet", "tldr": "", "abstract": "Causality is a central concept in a wide range of research areas, yet there is still no universally agreed axiomatisation of causality. We view causality both as an extension of probability theory and as a study of what happens when one intervenes on a system, and argue in favour of taking Kolmogorov's measure-theoretic axiomatisation of probability as the starting point towards an axiomatisation of causality. To that end, we propose the notion of a causal space, consisting of a probability space along with a collection of transition probability kernels, called causal kernels, that encode the causal information of the space. Our proposed framework is not only rigorously grounded in measure theory, but it also sheds light on long-standing limitations of existing frameworks including, for example, cycles, latent variables and stochastic processes.", "keywords": "Causality;probability theory;causal models", "primary_area": "", "supplementary_material": "/attachment/487f39f59dd098b2244d532c3e84b0aaf9a63216.pdf", "author": "Junhyung Park;Simon Buchholz;Bernhard Sch\u00f6lkopf;Krikamol Muandet", "authorids": "~Junhyung_Park1;~Simon_Buchholz1;~Bernhard_Sch\u00f6lkopf1;~Krikamol_Muandet1", "gender": "M;;;M", "homepage": "https://junhyung-park.github.io;https://www.is.mpg.de/person/sbuchholz;;http://krikamol.org", "dblp": "87/2417;207/9068;;34/1240", "google_scholar": "rMBiq2oAAAAJ;;;E2z5uYsAAAAJ", "orcid": ";;;0000-0002-4182-5282", "linkedin": "jun-park-5886ba134/;;;krikamol-muandet/", "or_profile": "~Junhyung_Park1;~Simon_Buchholz1;~Bernhard_Sch\u00f6lkopf1;~Krikamol_Muandet1", "aff": "Max Planck Institute for Intelligent Systems, Max-Planck Institute;Max-Planck Institute;;CISPA Helmholtz Center for Information Security", "aff_domain": "tuebingen.mpg.de;mpg.de;;cispa.saarland", "position": "PhD student;Postdoc;;Associate Professor", "bibtex": "@inproceedings{\npark2023a,\ntitle={A Measure-Theoretic Axiomatisation of Causality},\nauthor={Junhyung Park and Simon Buchholz and Bernhard Sch{\\\"o}lkopf and Krikamol Muandet},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sPLTQSf6GI}\n}", "github": "", "project": "", "reviewers": "xRcV;aMnb;TfjY;kUcy;pcib", "pdf_size": 499054, "rating": "4;7;7;8;8", "confidence": "3;4;3;3;4", "soundness": "2;4;4;4;4", "novelty": "2;3;3;4;4", "presentation": "2;3;3;4;3", "wc_summary": "25;41;87;51;64", "wc_strengths": "17;64;44;149;42", "wc_weaknesses": "66;8;508;305;29", "wc_questions": "74;228;447;42;602", "wc_limitations": "1;3;4;2;1", "wc_review": "183;344;1090;549;738", "wc_reply_reviewers": "0;71;264;13;9", "wc_reply_authors": "0;0;202;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.8, 1.469693845669907 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.6, 0.8000000000000002 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 53.6, 21.010473578670233 ], "wc_strengths_avg": [ 63.2, 45.41981946243292 ], "wc_weaknesses_avg": [ 183.2, 194.18486037793986 ], "wc_questions_avg": [ 278.6, 216.03110887092166 ], "wc_limitations_avg": [ 2.2, 1.16619037896906 ], "wc_review_avg": [ 580.8, 316.0148097795418 ], "wc_reply_reviewers_avg": [ 71.4, 99.4959295649827 ], "wc_reply_authors_avg": [ 40.4, 80.8 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.38888888888888884, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8588693301217436168&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "tuebingen.mpg.de;mpg.de;;cispa.saarland", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.;CISPA Helmholtz Center for Information Security", "aff_unique_dep": "Intelligent Systems;;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.mpg.de;https://www.cispa.de/", "aff_unique_abbr": "MPI-IS;MPG;CISPA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Cocktail: Mixing Multi-Modality Control for Text-Conditional Image Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70270", "id": "sQBGVw5qH9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/668563ef18fbfef0b66af491ea334d5f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sQBGVw5qH9", "openreview": "https://openreview.net/forum?id=sQBGVw5qH9", "poster": "/media/PosterPDFs/NeurIPS%202023/70270.png?t=1699351245.469806", "slides": "https://nips.cc/virtual/2023/poster/70270", "video": "https://nips.cc/virtual/2023/poster/70270", "author_site": "Minghui Hu, Jianbin Zheng, Daqing Liu, Chuanxia Zheng, Chaoyue Wang, Dacheng Tao, Tat-Jen Cham", "tldr": "", "abstract": "Text-conditional diffusion models are able to generate high-fidelity images with diverse contents.\nHowever, linguistic representations frequently exhibit ambiguous descriptions of the envisioned objective imagery, requiring the incorporation of additional control signals to bolster the efficacy of text-guided diffusion models. \nIn this work, we propose Cocktail, a pipeline to mix various modalities into one embedding, amalgamated with a generalized ControlNet (gControlNet), a controllable normalisation (ControlNorm), and a spatial guidance sampling method, to actualize multi-modal and spatially-refined control for text-conditional diffusion models. \nSpecifically, we introduce a hyper-network gControlNet, dedicated to the alignment and infusion of the control signals from disparate modalities into the pre-trained diffusion model. \ngControlNet is capable of accepting flexible modality signals, encompassing the simultaneous reception of any combination of modality signals, or the supplementary fusion of multiple modality signals. \nThe control signals are then fused and injected into the backbone model according to our proposed ControlNorm.\nFurthermore, our advanced spatial guidance sampling methodology proficiently incorporates the control signal into the designated region, thereby circumventing the manifestation of undesired objects within the generated image.\nWe demonstrate the results of our method in controlling various modalities, proving high-quality synthesis and fidelity to multiple external signals.", "keywords": "Multi-modality;Image Generation;Diffusion", "primary_area": "", "supplementary_material": "", "author": "Minghui Hu;Jianbin Zheng;Daqing Liu;Chuanxia Zheng;Chaoyue Wang;Dacheng Tao;Tat-Jen Cham", "authorids": "~Minghui_Hu1;~Jianbin_Zheng1;~Daqing_Liu1;~Chuanxia_Zheng1;~Chaoyue_Wang2;~Dacheng_Tao1;~Tat-Jen_Cham1", "gender": "M;;M;M;M;;M", "homepage": "https://mhh0318.github.io/;https://github.com/jabir-zheng;http://home.ustc.edu.cn/~liudq/;http://www.chuanxiaz.com/;;;https://personal.ntu.edu.sg/astjcham/", "dblp": "163/9000-1;;225/5519;195/8988;174/7172;;29/3808", "google_scholar": "https://scholar.google.es/citations?user=9jfGj64AAAAJ;5kjgOx0AAAAJ;TbBfOVEAAAAJ;mvpE6bIAAAAJ;https://scholar.google.com.au/citations?user=ioj1BycAAAAJ;;Lx3X7W0AAAAJ", "orcid": ";;0000-0002-8286-0105;;;;0000-0001-5264-2572", "linkedin": ";;;chuanxia-zheng-80a3b8110/;;;tatjencham/", "or_profile": "~Minghui_Hu1;~Jianbin_Zheng1;~Daqing_Liu1;~Chuanxia_Zheng1;~Chaoyue_Wang2;~Dacheng_Tao1;~Tat-Jen_Cham1", "aff": "Nanyang Technological University;South China University of Technology;JD.com Inc.;University of Oxford;JD.com;;Nanyang Technological University", "aff_domain": "ntu.edu.sg;scut.edu.cn;jd.com;ox.ac.uk;jd.com;;ntu.edu.sg", "position": "PhD student;MS student;Researcher;Postdoc;Researcher;;Associate Professor", "bibtex": "@inproceedings{\nhu2023cocktail,\ntitle={Cocktail: Mixing Multi-Modality Control for Text-Conditional Image Generation},\nauthor={Minghui Hu and Jianbin Zheng and Daqing Liu and Chuanxia Zheng and Chaoyue Wang and Dacheng Tao and Tat-Jen Cham},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sQBGVw5qH9}\n}", "github": "", "project": "", "reviewers": "vJhF;u8fB;KthK;a1qK;H6cw", "pdf_size": 36335633, "rating": "3;5;6;6;8", "confidence": "3;4;3;4;3", "soundness": "2;3;2;4;4", "novelty": "2;2;3;3;3", "presentation": "1;2;3;3;4", "wc_summary": "72;130;75;288;45", "wc_strengths": "23;116;89;209;36", "wc_weaknesses": "88;193;147;209;36", "wc_questions": "35;114;96;55;1", "wc_limitations": "1;8;8;52;33", "wc_review": "219;561;415;813;151", "wc_reply_reviewers": "316;0;19;9;6", "wc_reply_authors": "534;0;0;0;0", "reply_reviewers": "3;0;1;1;1", "reply_authors": "4;1;1;1;1", "rating_avg": [ 5.6, 1.624807680927192 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 122.0, 87.47342453568398 ], "wc_strengths_avg": [ 94.6, 66.53901111378197 ], "wc_weaknesses_avg": [ 134.6, 64.79691350674042 ], "wc_questions_avg": [ 60.2, 40.84801096748775 ], "wc_limitations_avg": [ 20.4, 19.189580506097574 ], "wc_review_avg": [ 431.8, 239.34109551015263 ], "wc_reply_reviewers_avg": [ 70.0, 123.15356267684666 ], "wc_reply_authors_avg": [ 106.8, 213.6 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.05025189076296065, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16532243384879303332&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ntu.edu.sg;scut.edu.cn;jd.com;ox.ac.uk;jd.com;;ntu.edu.sg", "author_num": 7, "aff_unique_index": "0;1;2;3;2;0", "aff_unique_norm": "Nanyang Technological University;South China University of Technology;JD.com;University of Oxford", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.scut.edu.cn;https://www.jd.com;https://www.ox.ac.uk", "aff_unique_abbr": "NTU;SCUT;JD.com;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2;1;0", "aff_country_unique": "Singapore;China;United Kingdom" }, { "title": "Understanding and Addressing the Pitfalls of Bisimulation-based Representations in Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70269", "id": "sQyRQjun46", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5a1667459d0cdeb2fe6b2f0dffc5cb9d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sQyRQjun46", "openreview": "https://openreview.net/forum?id=sQyRQjun46", "poster": "/media/PosterPDFs/NeurIPS%202023/70269.png?t=1698287912.1436503", "slides": "https://nips.cc/virtual/2023/poster/70269", "video": "https://nips.cc/virtual/2023/poster/70269", "author_site": "Hongyu Zang, Xin Li, Leiji Zhang, Yang Liu, Baigui Sun, Riashat Islam, Riashat Islam, Remi Tachet des Combes, Romain Laroche", "tldr": "", "abstract": "While bisimulation-based approaches hold promise for learning robust state representations for Reinforcement Learning (RL) tasks, their efficacy in offline RL tasks has not been up to par. In some instances, their performance has even significantly underperformed alternative methods. We aim to understand why bisimulation methods succeed in online settings, but falter in offline tasks. Our analysis reveals that missing transitions in the dataset are particularly harmful to the bisimulation principle, leading to ineffective estimation. We also shed light on the critical role of reward scaling in bounding the scale of bisimulation measurements and of the value error they induce. Based on these findings, we propose to apply the expectile operator for representation learning to our offline RL setting, which helps to prevent overfitting to incomplete data. Meanwhile, by introducing an appropriate reward scaling strategy, we avoid the risk of feature collapse in representation space. We implement these recommendations on two state-of-the-art bisimulation-based algorithms, MICo and SimSR, and demonstrate performance gains on two benchmark suites: D4RL and Visual D4RL. Codes are provided at \\url{https://github.com/zanghyu/Offline_Bisimulation}.", "keywords": "Bisimulation metrics;Reinforcement Learning;Representation Learning;Offline RL", "primary_area": "", "supplementary_material": "/attachment/b875c0cba87a3b6322902acc1cbceb4f0248fa26.zip", "author": "Hongyu Zang;Xin Li;Leiji Zhang;Yang Liu;Baigui Sun;Riashat Islam;Remi Tachet des Combes;Romain Laroche", "authorids": "~Hongyu_Zang1;~Xin_Li31;~Leiji_Zhang3;~Yang_Liu51;~Baigui_Sun1;~Riashat_Islam1;~Remi_Tachet_des_Combes1;~Romain_Laroche1", "gender": "M;F;M;M;M;M;M;M", "homepage": "https://zanghyu.github.io/;https://cs.bit.edu.cn/szdw/jsml/js/lixin/index.htm;https://github.com/Javazlj;;;https://riashat.github.io/;;https://www.researchgate.net/profile/Romain_Laroche", "dblp": "212/2592.html;09/1365-33.html;;27/3367-5;186/8016;198/0459;146/0392;65/9019", "google_scholar": "2kmSy50AAAAJ;https://scholar.google.com/citations?hl=zh-TW;;t1emSE0AAAAJ;ZNhTHywAAAAJ;https://scholar.google.ca/citations?user=2_4Rs44AAAAJ;1MZF70cAAAAJ;RiIOKJMAAAAJ", "orcid": ";0000-0003-4257-4347;;;0000-0001-7722-4748;;;", "linkedin": ";;;;;;;romain-laroche-6282397/?originalSubdomain=ca", "or_profile": "~Hongyu_Zang1;~Xin_Li31;~Leiji_Zhang3;~Yang_Liu51;~Baigui_Sun1;~Riashat_Islam1;~Remi_Tachet_des_Combes1;~Romain_Laroche1", "aff": ";Beijing Institute of Technology;Beijing Institute of Technology;Alibaba Group;Alibaba Group;Mila - Quebec AI Institute;Microsoft Research;Microsoft", "aff_domain": ";bit.edu.cn;bit.edu.cn;alibaba-inc.com;alibaba-inc.com;mcgill.ca;microsoft.com;microsoft.com", "position": ";Associate Professor;MS student;Researcher at Alibaba Group;Researcher;PhD student;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nzang2023understanding,\ntitle={Understanding and Addressing the Pitfalls of Bisimulation-based Representations in Offline Reinforcement Learning},\nauthor={Hongyu Zang and Xin Li and Leiji Zhang and Yang Liu and Baigui Sun and Riashat Islam and Remi Tachet des Combes and Romain Laroche},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sQyRQjun46}\n}", "github": "", "project": "", "reviewers": "vGKe;BVFy;KC23;LXid;aN21;ndNc", "pdf_size": 1033683, "rating": "4;6;6;6;6;7", "confidence": "3;2;4;3;2;3", "soundness": "2;3;3;3;4;3", "novelty": "2;3;3;3;3;4", "presentation": "2;4;3;2;3;4", "wc_summary": "275;117;94;117;114;51", "wc_strengths": "79;81;76;52;322;67", "wc_weaknesses": "164;176;368;170;239;41", "wc_questions": "774;41;122;57;2;233", "wc_limitations": "17;24;17;33;2;5", "wc_review": "1309;439;677;429;679;397", "wc_reply_reviewers": "323;658;18;24;73;61", "wc_reply_authors": "1037;1456;31;31;31;31", "reply_reviewers": "1;5;1;1;1;1", "reply_authors": "4;6;2;2;2;2", "rating_avg": [ 5.833333333333333, 0.8975274678557505 ], "confidence_avg": [ 2.8333333333333335, 0.6871842709362768 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 128.0, 69.6802219667341 ], "wc_strengths_avg": [ 112.83333333333333, 94.04505421457432 ], "wc_weaknesses_avg": [ 193.0, 97.89790600416333 ], "wc_questions_avg": [ 204.83333333333334, 265.0291912643251 ], "wc_limitations_avg": [ 16.333333333333332, 10.577754435081621 ], "wc_review_avg": [ 655.0, 314.39677691308054 ], "wc_reply_reviewers_avg": [ 192.83333333333334, 232.4273482665545 ], "wc_reply_authors_avg": [ 436.1666666666667, 585.6194488649509 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.4907119849998596 ], "reply_authors_avg": [ 3.0, 1.5275252316519468 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.04503773491110452, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1439259291434518011&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": ";bit.edu.cn;bit.edu.cn;alibaba-inc.com;alibaba-inc.com;mcgill.ca;microsoft.com;microsoft.com", "author_num": 8, "aff_unique_index": "0;0;1;1;2;3;3", "aff_unique_norm": "Beijing Institute of Technology;Alibaba Group;Quebec AI Institute;Microsoft", "aff_unique_dep": ";;AI Institute;Microsoft Research", "aff_unique_url": "http://www.bit.edu.cn/;https://www.alibaba.com;https://mila.quebec;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "BIT;Alibaba;Mila;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;2;2", "aff_country_unique": "China;Canada;United States" }, { "title": "Let the Flows Tell: Solving Graph Combinatorial Problems with GFlowNets", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70268", "id": "sTjW3JHs2V", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/27571b74d6cd650b8eb6cf1837953ae8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sTjW3JHs2V", "openreview": "https://openreview.net/forum?id=sTjW3JHs2V", "poster": "/media/PosterPDFs/NeurIPS%202023/70268.png?t=1702912091.1517572", "slides": "https://nips.cc/virtual/2023/poster/70268", "video": "https://nips.cc/virtual/2023/poster/70268", "author_site": "Dinghuai Zhang, Hanjun Dai, Nikolay Malkin, Aaron Courville, Yoshua Bengio, Ling Pan", "tldr": "", "abstract": "Combinatorial optimization (CO) problems are often NP-hard and thus out of reach for exact algorithms, making them a tempting domain to apply machine learning methods. The highly structured constraints in these problems can hinder either optimization or sampling directly in the solution space.\nOn the other hand, GFlowNets have recently emerged as a powerful machinery to efficiently sample from composite unnormalized densities sequentially and have the potential to amortize such solution-searching processes in CO, as well as generate diverse solution candidates.\nIn this paper, we design Markov decision processes (MDPs) for different combinatorial problems and propose to train conditional GFlowNets to sample from the solution space. \nEfficient training techniques are also developed to benefit long-range credit assignment.\nThrough extensive experiments on a variety of different CO tasks with synthetic and realistic data, we demonstrate that GFlowNet policies can efficiently find high-quality solutions.\nOur implementation is open-sourced at https://github.com/zdhNarsil/GFlowNet-CombOpt.", "keywords": "graph; combinatorial optimization; sampling; gflownets", "primary_area": "", "supplementary_material": "", "author": "Dinghuai Zhang;Hanjun Dai;Nikolay Malkin;Aaron Courville;Yoshua Bengio;Ling Pan", "authorids": "~Dinghuai_Zhang1;~Hanjun_Dai1;~Nikolay_Malkin1;~Aaron_Courville3;~Yoshua_Bengio1;~Ling_Pan1", "gender": ";M;;;M;F", "homepage": ";https://hanjun-dai.github.io;;;http://yoshuabengio.org;https://ling-pan.github.io/", "dblp": ";144/7311;;56/1688;56/953;199/9303/", "google_scholar": ";obpl7GQAAAAJ;;https://scholar.google.ca/citations?user=km6CP8cAAAAJ;kukA0LcAAAAJ;qZ_zlacAAAAJ", "orcid": ";;;;;", "linkedin": ";hanjun-dai;;;yoshuabengio/?originalSubdomain=ca;", "or_profile": "~Dinghuai_Zhang1;~Hanjun_Dai1;~Nikolay_Malkin1;~Aaron_Courville3;~Yoshua_Bengio1;~Ling_Pan1", "aff": ";Google Research;;Universit\u00e9 de Montr\u00e9al;University of Montreal;Montreal Institute for Learning Algorithms (MILA)", "aff_domain": ";google.com;; ;umontreal.ca;mila.umontreal.ca", "position": ";Researcher;;Assistant Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nzhang2023let,\ntitle={Let the Flows Tell: Solving Graph Combinatorial Problems with {GF}lowNets},\nauthor={Dinghuai Zhang and Hanjun Dai and Nikolay Malkin and Aaron Courville and Yoshua Bengio and Ling Pan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sTjW3JHs2V}\n}", "github": "", "project": "", "reviewers": "VHLn;n6CJ;gYzR;PEdA", "pdf_size": 2297490, "rating": "5;6;7;8", "confidence": "3;4;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;4;4;3", "wc_summary": "75;148;208;187", "wc_strengths": "29;140;196;175", "wc_weaknesses": "314;337;269;169", "wc_questions": "8;380;58;123", "wc_limitations": "1;4;27;21", "wc_review": "427;1009;758;675", "wc_reply_reviewers": "38;43;353;61", "wc_reply_authors": "71;55;170;51", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 154.5, 50.69763308084511 ], "wc_strengths_avg": [ 135.0, 64.38555738673077 ], "wc_weaknesses_avg": [ 272.25, 64.43358984256581 ], "wc_questions_avg": [ 142.25, 143.19283327038403 ], "wc_limitations_avg": [ 13.25, 11.008519428151999 ], "wc_review_avg": [ 717.25, 207.85135914879172 ], "wc_reply_reviewers_avg": [ 123.75, 132.6336590010243 ], "wc_reply_authors_avg": [ 86.75, 48.64347335460329 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 81, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1503694986043745281&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";google.com;; ;umontreal.ca;mila.umontreal.ca", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Google;Universit\u00e9 de Montr\u00e9al;University of Montreal;Montreal Institute for Learning Algorithms", "aff_unique_dep": "Google Research;;;Artificial Intelligence", "aff_unique_url": "https://research.google;https://www.umontreal.ca;https://wwwumontreal.ca;https://mila.quebec", "aff_unique_abbr": "Google Research;UdeM;UM;MILA", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Mountain View;;Montreal", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;Canada" }, { "title": "D-Separation for Causal Self-Explanation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70267", "id": "sUFGPYS25Q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/87e82678c0d6e5b729398426f82e9af6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sUFGPYS25Q", "openreview": "https://openreview.net/forum?id=sUFGPYS25Q", "poster": "/media/PosterPDFs/NeurIPS%202023/70267.png?t=1699581503.5904174", "slides": "https://nips.cc/virtual/2023/poster/70267", "video": "https://nips.cc/virtual/2023/poster/70267", "author_site": "Wei Liu, Jun Wang, Haozhao Wang, Ruixuan Li, Zhiying Deng, YuanKai Zhang, Yang Qiu", "tldr": "", "abstract": "Rationalization aims to strengthen the interpretability of NLP models by extracting a subset of human-intelligible pieces of their inputting texts. Conventional works generally employ the maximum mutual information (MMI) criterion to find the rationale that is most indicative of the target label. However, this criterion can be influenced by spurious features that correlate with the causal rationale or the target label. Instead of attempting to rectify the issues of the MMI criterion, we propose a novel criterion to uncover the causal rationale, termed the Minimum Conditional Dependence (MCD) criterion, which is grounded on our finding that the non-causal features and the target label are \\emph{d-separated} by the causal rationale. By minimizing the dependence between the non-selected parts of the input and the target label conditioned on the selected rationale candidate, all the causes of the label are compelled to be selected. In this study, we employ a simple and practical measure for dependence, specifically the KL-divergence, to validate our proposed MCD criterion. Empirically, we demonstrate that MCD improves the F1 score by up to 13.7% compared to previous state-of-the-art MMI-based methods.\nOur code is in an anonymous repository: https://anonymous.4open.science/r/MCD-CE88.", "keywords": "interpretability;causal inference;rationalization;self-explaining", "primary_area": "", "supplementary_material": "/attachment/da4a07f1fa9eca70091a82a261c967dc4f42805b.pdf", "author": "Wei Liu;Jun Wang;Haozhao Wang;Ruixuan Li;Zhiying Deng;YuanKai Zhang;Yang Qiu", "authorids": "~Wei_Liu30;~Jun_Wang1;~Haozhao_Wang1;~Ruixuan_Li1;~Zhiying_Deng2;~YuanKai_Zhang1;~Yang_Qiu2", "gender": ";;M;M;;;M", "homepage": ";;https://wanghaozhao.mysxl.cn/;http://idc.hust.edu.cn/rxli/index.html;;;", "dblp": ";w/JunWang18;224/4500.html;60/4429.html;;;57/7487", "google_scholar": ";Anp1nPUAAAAJ;https://scholar.google.com.hk/citations?user=yFrOuMEAAAAJ;https://scholar.google.com/scholar?q=ruixuan+li;;;zlapu8EAAAAJ", "orcid": ";0000-0002-9515-076X;0000-0002-7591-5315;0000-0002-7791-5511;;;0000-0002-3564-0521", "linkedin": ";hustwj/;;https://www.linkedin.cn/incareer/in/ruixuan-li-b367319;;;https://www.linkedin.cn/in/%E6%B4%8B-%E9%82%B1-4b0756186", "or_profile": "~Wei_Liu30;~Jun_Wang1;~Haozhao_Wang1;~Ruixuan_Li1;~Zhiying_Deng2;~YuanKai_Zhang1;~Yang_Qiu2", "aff": ";iWudao Tech.;Huazhong University of Science and Technology;Huazhong University of Science and Technology;;;Huazhong University of Science and Technology", "aff_domain": ";iwudao.tech;hust.edu.cn;hust.edu.cn;;;hust.edu.cn", "position": ";Consultant;Postdoc;Full Professor;;;PhD student", "bibtex": "@inproceedings{\nliu2023dseparation,\ntitle={D-Separation for Causal Self-Explanation},\nauthor={Wei Liu and Jun Wang and Haozhao Wang and Ruixuan Li and Zhiying Deng and YuanKai Zhang and Yang Qiu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sUFGPYS25Q}\n}", "github": "", "project": "", "reviewers": "bHMi;kLrd;Rbzy;xX9m", "pdf_size": 389576, "rating": "3;5;7;7", "confidence": "5;4;4;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "26;328;73;91", "wc_strengths": "17;31;107;43", "wc_weaknesses": "233;49;93;50", "wc_questions": "53;36;246;54", "wc_limitations": "4;16;69;5", "wc_review": "333;460;588;243", "wc_reply_reviewers": "244;0;14;22", "wc_reply_authors": "1598;19;19;19", "reply_reviewers": "2;0;1;1", "reply_authors": "5;2;2;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 129.5, 117.0352510998289 ], "wc_strengths_avg": [ 49.5, 34.449238017697866 ], "wc_weaknesses_avg": [ 106.25, 75.30396735896456 ], "wc_questions_avg": [ 97.25, 86.178231009925 ], "wc_limitations_avg": [ 23.5, 26.688012290165037 ], "wc_review_avg": [ 406.0, 130.32459476246223 ], "wc_reply_reviewers_avg": [ 70.0, 100.7670581092849 ], "wc_reply_authors_avg": [ 413.75, 683.7270562878143 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1540513629181875597&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";iwudao.tech;hust.edu.cn;hust.edu.cn;;;hust.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "iWudao Tech;Huazhong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": ";http://www.hust.edu.cn", "aff_unique_abbr": ";HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Weakly-Supervised Audio-Visual Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70266", "id": "sUqG96QqZM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/377b2e39e97e917b9e625b35241e33df-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sUqG96QqZM", "openreview": "https://openreview.net/forum?id=sUqG96QqZM", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70266", "video": "https://nips.cc/virtual/2023/poster/70266", "author_site": "Shentong Mo, Shentong Mo, Bhiksha Raj", "tldr": "", "abstract": "Audio-visual segmentation is a challenging task that aims to predict pixel-level masks for sound sources in a video.\n Previous work applied a comprehensive manually designed architecture with countless pixel-wise accurate masks as supervision. However, these pixel-level masks are expensive and not available in all cases.\n In this work, we aim to simplify the supervision as the instance-level annotation, $\\textit{i.e.}$, weakly-supervised audio-visual segmentation.\n We present a novel Weakly-Supervised Audio-Visual Segmentation framework, namely WS-AVS, that can learn multi-scale audio-visual alignment with multi-scale multiple-instance contrastive learning for audio-visual segmentation.\n Extensive experiments on AVSBench demonstrate the effectiveness of our WS-AVS in the weakly-supervised audio-visual segmentation of single-source and multi-source scenarios.", "keywords": "audio-visual learning;visual sound localization;audio-visual segmentation", "primary_area": "", "supplementary_material": "/attachment/350143330bbc395f3e62adb9252e5fc124a695d7.zip", "author": "Shentong Mo;Bhiksha Raj", "authorids": "~Shentong_Mo1;~Bhiksha_Raj1", "gender": ";M", "homepage": ";https://www.cs.cmu.edu/directory/bhikshar/", "dblp": ";60/3996", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Shentong_Mo1;~Bhiksha_Raj1", "aff": ";Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": ";mbzuai.ac.ae", "position": ";Full Professor", "bibtex": "@inproceedings{\nmo2023weaklysupervised,\ntitle={Weakly-Supervised Audio-Visual Segmentation},\nauthor={Shentong Mo and Bhiksha Raj},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sUqG96QqZM}\n}", "github": "", "project": "", "reviewers": "CQaV;DJ6f;XL4i;yiGS", "pdf_size": 787205, "rating": "4;5;6;6", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "1;2;3;2", "presentation": "2;3;3;3", "wc_summary": "60;64;50;44", "wc_strengths": "35;66;45;60", "wc_weaknesses": "247;168;199;652", "wc_questions": "2;35;5;28", "wc_limitations": "1;1;25;36", "wc_review": "345;334;324;820", "wc_reply_reviewers": "32;27;40;14", "wc_reply_authors": "489;0;66;70", "reply_reviewers": "1;1;2;1", "reply_authors": "4;1;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 54.5, 7.92148975887743 ], "wc_strengths_avg": [ 51.5, 12.216791722870616 ], "wc_weaknesses_avg": [ 316.5, 195.73515269363344 ], "wc_questions_avg": [ 17.5, 14.256577429383253 ], "wc_limitations_avg": [ 15.75, 15.2540978100968 ], "wc_review_avg": [ 455.75, 210.43095661047593 ], "wc_reply_reviewers_avg": [ 28.25, 9.443913383762052 ], "wc_reply_authors_avg": [ 156.25, 194.11385190140348 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17736005973834372046&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";mbzuai.ac.ae", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": "", "aff_unique_url": "https://mbzuai.ac.ae", "aff_unique_abbr": "MBZUAI", "aff_country_unique_index": "0", "aff_country_unique": "United Arab Emirates" }, { "title": "Ordering-based Conditions for Global Convergence of Policy Gradient Methods", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70265", "id": "sW8yGZ4uVJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/61c00c07e6d27285e4b952e96cc65666-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sW8yGZ4uVJ", "openreview": "https://openreview.net/forum?id=sW8yGZ4uVJ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70265", "video": "https://nips.cc/virtual/2023/poster/70265", "author_site": "Jincheng Mei, Bo Dai, Alekh Agarwal, Mohammad Ghavamzadeh, Csaba Szepesvari, Dale Schuurmans", "tldr": "", "abstract": "We prove that, for finite-arm bandits with linear function approximation, the global convergence of policy gradient (PG) methods depends on inter-related properties between the policy update and the representation. textcolor{blue}{First}, we establish a few key observations that frame the study: \\textbf{(i)} Global convergence can be achieved under linear function approximation without policy or reward realizability, both for the standard Softmax PG and natural policy gradient (NPG). \\textbf{(ii)} Approximation error is not a key quantity for characterizing global convergence in either algorithm. \\textbf{(iii)} The conditions on the representation that imply global convergence are different between these two algorithms. Overall, these observations call into question approximation error as an appropriate quantity for characterizing the global convergence of PG methods under linear function approximation. \\textcolor{blue}{Second}, motivated by these observations, we establish new general results: \\textbf{(i)} NPG with linear function approximation achieves global convergence \\emph{if and only if} the projection of the reward onto the representable space preserves the optimal action's rank, a quantity that is not strongly related to approximation error. \\textbf{(ii)} The global convergence of Softmax PG occurs if the representation satisfies a non-domination condition and can preserve the ranking of rewards, which goes well beyond policy or reward realizability. We provide experimental results to support these theoretical findings.", "keywords": "reinforcement learning;policy gradient;policy optimization;function approximation;global convergence", "primary_area": "", "supplementary_material": "/attachment/29990e1600b0b1cd014d6efc42617301e1ffb7d4.pdf", "author": "Jincheng Mei;Bo Dai;Alekh Agarwal;Mohammad Ghavamzadeh;Csaba Szepesvari;Dale Schuurmans", "authorids": "~Jincheng_Mei1;~Bo_Dai1;~Alekh_Agarwal2;~Mohammad_Ghavamzadeh2;~Csaba_Szepesvari1;~Dale_Schuurmans1", "gender": "M;;M;;M;", "homepage": "https://jinchengmei.github.io;https://bo-dai.github.io/;https://alekhagarwal.net;;https://sites.ualberta.ca/~szepesva/;", "dblp": "149/1408;64/2903;;;http://dblp.uni-trier.de/pers/hd/s/Szepesv=aacute=ri:Csaba;", "google_scholar": ";TIKl_foAAAAJ;9nnDvooAAAAJ;;https://scholar.google.ca/citations?user=zvC19mQAAAAJ;", "orcid": ";0009-0002-8070-574X;;;;", "linkedin": ";;;;csaba-szepesvari-09376b1?trk=hp-identity-name;", "or_profile": "~Jincheng_Mei1;~Bo_Dai1;~Alekh_Agarwal2;~Mohammad_Ghavamzadeh2;~Csaba_Szepesvari1;~Dale_Schuurmans1", "aff": "Google DeepMind;Google Brain;Google;;Google DeepMind;", "aff_domain": "google.com;google.com;google.com;;google.com;", "position": "Research Scientist;Research Scientist;Researcher;;Research Scientist;", "bibtex": "@inproceedings{\nmei2023orderingbased,\ntitle={Ordering-based Conditions for Global Convergence of Policy Gradient Methods},\nauthor={Jincheng Mei and Bo Dai and Alekh Agarwal and Mohammad Ghavamzadeh and Csaba Szepesvari and Dale Schuurmans},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sW8yGZ4uVJ}\n}", "github": "", "project": "", "reviewers": "m2vF;hbxK;BpdK;WYfH;P4aQ", "pdf_size": 5962385, "rating": "5;7;8;8;9", "confidence": "4;4;3;5;4", "soundness": "2;4;3;3;4", "novelty": "3;4;4;4;4", "presentation": "2;3;4;4;4", "wc_summary": "64;126;66;159;242", "wc_strengths": "256;51;151;182;217", "wc_weaknesses": "216;27;62;438;278", "wc_questions": "191;129;38;435;573", "wc_limitations": "1;1;12;12;7", "wc_review": "728;334;329;1226;1317", "wc_reply_reviewers": "121;0;57;334;93", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.4, 1.3564659966250536 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 131.4, 66.09568821035151 ], "wc_strengths_avg": [ 171.4, 69.65802179218126 ], "wc_weaknesses_avg": [ 204.2, 149.5786080962114 ], "wc_questions_avg": [ 273.2, 199.46368090456969 ], "wc_limitations_avg": [ 6.6, 4.923413450036469 ], "wc_review_avg": [ 786.8, 422.3919506808813 ], "wc_reply_reviewers_avg": [ 121.0, 113.92102527628515 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10282150237955045381&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "google.com;google.com;google.com;;google.com;", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "DISCOVER: Making Vision Networks Interpretable via Competition and Dissection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70264", "id": "sWNOvNXGLP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/55aeba84b402008d3ed10440d906b4e1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sWNOvNXGLP", "openreview": "https://openreview.net/forum?id=sWNOvNXGLP", "poster": "/media/PosterPDFs/NeurIPS%202023/70264.png?t=1697041242.547278", "slides": "https://nips.cc/virtual/2023/poster/70264", "video": "https://nips.cc/virtual/2023/poster/70264", "author_site": "Konstantinos Panousis, Sotirios Chatzis", "tldr": "", "abstract": "Modern deep networks are highly complex and their inferential outcome very hard to interpret. This is a serious obstacle to their transparent deployment in safety-critical or bias-aware applications. This work contributes to *post-hoc* interpretability, and specifically Network Dissection. Our goal is to present a framework that makes it easier to *discover* the individual functionality of each neuron in a network trained on a vision task; discovery is performed in terms of textual description generation. To achieve this objective, we leverage: (i) recent advances in multimodal vision-text models and (ii) network layers founded upon the novel concept of stochastic local competition between linear units. In this setting, only a *small subset* of layer neurons are activated *for a given input*, leading to extremely high activation sparsity (as low as only $\\approx 4\\%$). Crucially, our proposed method infers (sparse) neuron activation patterns that enables the neurons to activate/specialize to inputs with specific characteristics, diversifying their individual functionality. This capacity of our method supercharges the potential of dissection processes: human understandable descriptions are generated only for the very few active neurons, thus facilitating the direct investigation of the network's decision process. As we experimentally show, our approach: (i) yields Vision Networks that retain or improve classification performance, and (ii) realizes a principled framework for text-based description and examination of the generated neuronal representations.", "keywords": "Interpretability;Explainability;Network Dissection;Competitive Networks;Sparsity;Multimodal Models", "primary_area": "", "supplementary_material": "/attachment/e8a36c309553247477db8f1f00c9fa0f562f1716.zip", "author": "Konstantinos P. Panousis;Sotirios Chatzis", "authorids": "~Konstantinos_P._Panousis1;~Sotirios_Chatzis1", "gender": ";M", "homepage": ";https://www.cut.ac.cy/eecei/staff/sotirios.chatzis/", "dblp": "220/4070;25/6133", "google_scholar": ";https://scholar.google.gr/citations?user=__Y_0hQAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Konstantinos_P._Panousis1;~Sotirios_Chatzis1", "aff": "Cyprus University of Technology;Cyprus University of Technology", "aff_domain": "cut.ac.cy;cut.ac.cy", "position": "Postdoc;Associate Professor", "bibtex": "@inproceedings{\npanousis2023discover,\ntitle={{DISCOVER}: Making Vision Networks Interpretable via Competition and Dissection},\nauthor={Konstantinos P. Panousis and Sotirios Chatzis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sWNOvNXGLP}\n}", "github": "", "project": "", "reviewers": "kXaB;crQf;BNcZ;PAgG;CPpE", "pdf_size": 7804464, "rating": "2;4;5;6;7", "confidence": "5;3;2;2;3", "soundness": "2;2;3;3;3", "novelty": "1;3;3;2;3", "presentation": "2;2;3;3;4", "wc_summary": "95;73;72;56;176", "wc_strengths": "105;52;24;21;83", "wc_weaknesses": "376;266;124;54;361", "wc_questions": "109;8;22;4;75", "wc_limitations": "20;7;1;1;4", "wc_review": "705;406;243;136;699", "wc_reply_reviewers": "140;48;303;33;149", "wc_reply_authors": "758;33;392;30;1439", "reply_reviewers": "3;1;2;1;2", "reply_authors": "5;2;2;2;4", "rating_avg": [ 4.8, 1.7204650534085253 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 94.4, 42.645515590739436 ], "wc_strengths_avg": [ 57.0, 32.83291031876401 ], "wc_weaknesses_avg": [ 236.2, 127.90058639427734 ], "wc_questions_avg": [ 43.6, 41.38888739746455 ], "wc_limitations_avg": [ 6.6, 7.059745037889115 ], "wc_review_avg": [ 437.8, 232.2338476622217 ], "wc_reply_reviewers_avg": [ 134.6, 96.35060975416813 ], "wc_reply_authors_avg": [ 530.4, 528.0752219144542 ], "reply_reviewers_avg": [ 1.8, 0.7483314773547883 ], "reply_authors_avg": [ 3.0, 1.2649110640673518 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7428336299615156, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16069928642863590484&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cut.ac.cy;cut.ac.cy", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Cyprus University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.cut.ac.cy", "aff_unique_abbr": "CUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Cyprus" }, { "id": "sXD4idbnBw", "title": "Why Differentially Private Local SGD -- An Analysis of Synchronized-Only Biased Iterate", "track": "main", "status": "Reject", "tldr": "", "abstract": "We argue to use Differentially-Private Local Stochastic Gradient Descent (DP-LSGD) in both centralized and distributed setups, and explain why DP-LSGD enjoys higher clipping efficiency and produces less clipping bias compared to classic Differentially-Private Stochastic Gradient Descent (DP-SGD). For both convex and non-convex optimization, we present generic analysis on noisy synchronized-only iterates in LSGD, the building block of federated learning, and study its applications to differentially-private gradient methods with clipping-based sensitivity control. We point out that given the current {\\em decompose-then-compose} framework, there is no essential gap between the privacy analysis of centralized and distributed learning, and DP-SGD is a special case of DP-LSGD. We thus build a unified framework to characterize the clipping bias via the second moment of local updates, which initiates a direction to systematically instruct DP optimization by variance reduction. We show DP-LSGD with multiple local iterations can produce more concentrated local updates and then enables a more efficient exploitation of the clipping budget with a better utility-privacy tradeoff. In addition, we prove that DP-LSGD can converge faster to a small neighborhood of global/local optimum compared to regular DP-SGD. Thorough experiments on practical deep learning tasks are provided to support our developed theory. ", "keywords": "Local SGD;Differential Privacy;Clipping", "primary_area": "", "supplementary_material": "/attachment/407025fbbbc49e81df378ba659f6d1a747f2daed.pdf", "author": "Hanshen Xiao;Lam M. Nguyen;Marten van Dijk;Srini Devadas", "authorids": "~Hanshen_Xiao1;~Lam_M._Nguyen1;~Marten_van_Dijk1;~Srini_Devadas1", "gender": "M;M;M;M", "homepage": ";https://www.cwi.nl/people/marten-van-dijk;https://people.csail.mit.edu/devadas/;https://lamnguyen-mltd.github.io/", "dblp": "184/4766.html;32/1399.html;14/3973.html;181/1428", "google_scholar": "e3ZhEDEAAAAJ;byCWPiwAAAAJ;https://scholar.google.com.tw/citations?user=-yrzguMAAAAJ;DeFL5Q8AAAAJ", "orcid": ";0000-0001-9388-8050;0000-0001-8253-7714;", "linkedin": ";marten-van-dijk-51554012/?originalSubdomain=nl;;lam-m-nguyen-71b54750/", "or_profile": "~Hanshen_Xiao1;~Marten_van_Dijk1;~Srini_Devadas1;~Lam_M_Nguyen1", "aff": "Massachusetts Institute of Technology;Centrum voor Wiskunde en Informatica;Massachusetts Institute of Technology;IBM Research, Thomas J. Watson Research Center", "aff_domain": "mit.edu;cwi.nl;mit.edu;ibm.com", "position": "PhD student;Full Professor;Full Professor;Staff Research Scientist", "bibtex": "@misc{\nxiao2023why,\ntitle={Why Differentially Private Local {SGD} -- An Analysis of Synchronized-Only Biased Iterate},\nauthor={Hanshen Xiao and Lam M. Nguyen and Marten van Dijk and Srini Devadas},\nyear={2023},\nurl={https://openreview.net/forum?id=sXD4idbnBw}\n}", "github": "", "project": "", "reviewers": "SLyE;624r;WAtK;1PJn;7rJQ;y8pk", "site": "https://openreview.net/forum?id=sXD4idbnBw", "pdf_size": 964464, "rating": "3;4;5;5;7;7", "confidence": "2;4;5;1;5;3", "soundness": "2;2;3;3;3;3", "novelty": "2;2;3;3;3;3", "presentation": "1;3;4;4;3;3", "wc_summary": "106;54;74;59;89;75", "wc_strengths": "16;56;66;60;68;62", "wc_weaknesses": "116;269;240;53;256;18", "wc_questions": "5;7;171;3;44;1", "wc_limitations": "1;12;6;12;19;1", "wc_review": "244;398;557;187;476;157", "wc_reply_reviewers": "81;0;41;0;57;14", "wc_reply_authors": "428;324;378;0;43;38", "reply_reviewers": "1;0;1;0;1;1", "reply_authors": "2;2;2;1;2;2", "rating_avg": [ 5.166666666666667, 1.4624940645653537 ], "confidence_avg": [ 3.3333333333333335, 1.4907119849998598 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 76.16666666666667, 17.544388149934314 ], "wc_strengths_avg": [ 54.666666666666664, 17.72631440041111 ], "wc_weaknesses_avg": [ 158.66666666666666, 100.85908762008289 ], "wc_questions_avg": [ 38.5, 61.05666766755836 ], "wc_limitations_avg": [ 8.5, 6.5 ], "wc_review_avg": [ 336.5, 149.99416655323634 ], "wc_reply_reviewers_avg": [ 32.166666666666664, 30.218188047745166 ], "wc_reply_authors_avg": [ 201.83333333333334, 177.91235357769725 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.8333333333333333, 0.3726779962499649 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.35675303400633795, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6943841841626546814&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Massachusetts Institute of Technology;Centrum voor Wiskunde en Informatica;IBM", "aff_unique_dep": ";;IBM Research", "aff_unique_url": "https://web.mit.edu;https://www.cwi.nl/;https://www.ibm.com/research", "aff_unique_abbr": "MIT;CWI;IBM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Yorktown Heights", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Netherlands" }, { "title": "DiffPack: A Torsional Diffusion Model for Autoregressive Protein Side-Chain Packing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70263", "id": "sXMQPKbLXf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/96a54c09569ebbdd9ecb22f5012e6b66-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sXMQPKbLXf", "openreview": "https://openreview.net/forum?id=sXMQPKbLXf", "poster": "/media/PosterPDFs/NeurIPS%202023/70263.png?t=1701849945.78476", "slides": "https://nips.cc/virtual/2023/poster/70263", "video": "https://nips.cc/virtual/2023/poster/70263", "author_site": "Yangtian Zhang, Zuobai Zhang, Bozitao Zhong, Bozitao Zhong, Sanchit Misra, Jian Tang", "tldr": "", "abstract": "Proteins play a critical role in carrying out biological functions, and their 3D structures are essential in determining their functions. \nAccurately predicting the conformation of protein side-chains given their backbones is important for applications in protein structure prediction, design and protein-protein interactions. Traditional methods are computationally intensive and have limited accuracy, while existing machine learning methods treat the problem as a regression task and overlook the restrictions imposed by the constant covalent bond lengths and angles. In this work, we present DiffPack, a torsional diffusion model that learns the joint distribution of side-chain torsional angles, the only degrees of freedom in side-chain packing, by diffusing and denoising on the torsional space. To avoid issues arising from simultaneous perturbation of all four torsional angles, we propose autoregressively generating the four torsional angles from $\\chi_1$ to $\\chi_4$ and training diffusion models for each torsional angle. We evaluate the method on several benchmarks for protein side-chain packing and show that our method achieves improvements of 11.9% and 13.5% in angle accuracy on CASP13 and CASP14, respectively, with a significantly smaller model size ($60\\times$ fewer parameters). Additionally, we show the effectiveness of our method in enhancing side-chain predictions in the AlphaFold2 model. Code is available at https://github.com/DeepGraphLearning/DiffPack.", "keywords": "protein side-chain packing;diffusion models;autoregressive models;geometric deep learning", "primary_area": "", "supplementary_material": "", "author": "Yangtian Zhang;Zuobai Zhang;Bozitao Zhong;Sanchit Misra;Jian Tang", "authorids": "~Yangtian_Zhang1;~Zuobai_Zhang1;~Bozitao_Zhong1;sanchit.misra@intel.com;~Jian_Tang1", "gender": "M;M;M;;", "homepage": "https://zytzrh.github.io/;https://oxer11.github.io/;;;http://www.jian-tang.com", "dblp": "314/6014;256/9098.html;310/9462;;181/2667-5", "google_scholar": "q1lP-y0AAAAJ;UCDbNccAAAAJ;6k_LhSoAAAAJ;;https://scholar.google.ca/citations?user=1ir6WUEAAAAJ", "orcid": "0000-0003-4969-6670;;0000-0001-9363-6099;;", "linkedin": ";;;;", "or_profile": "~Yangtian_Zhang1;~Zuobai_Zhang1;~Bozitao_Zhong1;sanchit.misra@intel.com;~Jian_Tang1", "aff": ";Mila - Universit\u00e9 de Montr\u00e9al;Universit\u00e9 de Montr\u00e9al;;Mila, HEC Montreal", "aff_domain": ";umontreal.ca;umontreal.ca;;hec.ca", "position": ";PhD student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023diffpack,\ntitle={DiffPack: A Torsional Diffusion Model for Autoregressive Protein Side-Chain Packing},\nauthor={Yangtian Zhang and Zuobai Zhang and Bozitao Zhong and Sanchit Misra and Jian Tang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sXMQPKbLXf}\n}", "github": "", "project": "", "reviewers": "Npgs;uGQU;KyoN;cTo4", "pdf_size": 7662699, "rating": "5;6;6;7", "confidence": "3;4;4;4", "soundness": "3;3;3;4", "novelty": "2;3;2;3", "presentation": "3;4;3;4", "wc_summary": "132;84;35;78", "wc_strengths": "59;102;31;85", "wc_weaknesses": "204;134;389;249", "wc_questions": "25;5;59;135", "wc_limitations": "9;14;59;23", "wc_review": "429;339;573;570", "wc_reply_reviewers": "206;10;34;47", "wc_reply_authors": "348;21;29;30", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 82.25, 34.382953625306826 ], "wc_strengths_avg": [ 69.25, 26.873546472321067 ], "wc_weaknesses_avg": [ 244.0, 93.20675941153624 ], "wc_questions_avg": [ 56.0, 49.52776998815917 ], "wc_limitations_avg": [ 26.25, 19.562400159489634 ], "wc_review_avg": [ 477.75, 99.00852236045138 ], "wc_reply_reviewers_avg": [ 74.25, 77.21520251867504 ], "wc_reply_authors_avg": [ 107.0, 139.18512851594454 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17521373277304401058&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";umontreal.ca;umontreal.ca;;hec.ca", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;HEC Montreal", "aff_unique_dep": "Mila;HEC Business School", "aff_unique_url": "https://www.umontreal.ca;https://www.hec.ca", "aff_unique_abbr": "UdeM;HEC", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Montr\u00e9al;;Montreal", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "DreamSparse: Escaping from Plato\u2019s Cave with 2D Diffusion Model Given Sparse Views", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70262", "id": "sZNBYvunEr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0a003511b09274348b8117f5f3b94c93-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sZNBYvunEr", "openreview": "https://openreview.net/forum?id=sZNBYvunEr", "poster": "/media/PosterPDFs/NeurIPS%202023/70262.png?t=1702204403.1359794", "slides": "https://nips.cc/virtual/2023/poster/70262", "video": "https://nips.cc/virtual/2023/poster/70262", "author_site": "Paul Yoo, Jiaxian Guo, Yutaka Matsuo, Shixiang (Shane) Gu", "tldr": "", "abstract": "Synthesizing novel view images from a few views is a challenging but practical problem. Existing methods often struggle with producing high-quality results or necessitate per-object optimization in such few-view settings due to the insufficient information provided. In this work, we explore leveraging the strong 2D priors in pre-trained diffusion models for synthesizing novel view images. 2D diffusion models, nevertheless, lack 3D awareness, leading to distorted image synthesis and compromising the identity. To address these problems, we propose $\\textit{DreamSparse}$, a framework that enables the frozen pre-trained diffusion model to generate geometry and identity-consistent novel view images. Specifically, DreamSparse incorporates a geometry module designed to capture features about spatial information from sparse views as a 3D prior. Subsequently, a spatial guidance model is introduced to convert rendered feature maps as spatial information for the generative process. This information is then used to guide the pre-trained diffusion model to\nencourage the synthesis of geometrically consistent images without further tuning. Leveraging the strong image priors in the pre-trained diffusion models, DreamSparse is capable of synthesizing high-quality novel views for both object and object-centric scene-level images and generalising to open-set images.\nExperimental results demonstrate that our framework can effectively synthesize novel view images from sparse views and outperforms baselines in both trained and open-set category images. More results can be found on our project page: https://sites.google.com/view/dreamsparse-webpage.", "keywords": "Novel View Synthesis;Diffusion Model", "primary_area": "", "supplementary_material": "", "author": "Paul Yoo;Jiaxian Guo;Yutaka Matsuo;Shixiang Shane Gu", "authorids": "~Paul_Yoo1;~Jiaxian_Guo2;~Yutaka_Matsuo1;~Shixiang_Shane_Gu1", "gender": "Not Specified;M;M;M", "homepage": "https://yoopaul.github.io;;http://ymatsuo.com;https://sites.google.com/view/gugurus/home", "dblp": "281/9904;206/6264;m/YMatsuo.html;121/0550", "google_scholar": ";wQgPocEAAAAJ;Dy8iau4AAAAJ;B8wslVsAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Paul_Yoo1;~Jiaxian_Guo2;~Yutaka_Matsuo1;~Shixiang_Gu1", "aff": "The University of Tokyo;The University of Tokyo, The University of Tokyo;The University of Tokyo;OpenAI", "aff_domain": "weblab.t.u-tokyo.ac.jp;weblab.t.u-tokyo.ac.jp;u-tokyo.ac.jp;openai.com", "position": "Researcher;Postdoc;Associate Professor;Researcher", "bibtex": "@inproceedings{\nyoo2023dreamsparse,\ntitle={DreamSparse: Escaping from Plato{\\textquoteright}s Cave with 2D Diffusion Model Given Sparse Views},\nauthor={Paul Yoo and Jiaxian Guo and Yutaka Matsuo and Shixiang Shane Gu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sZNBYvunEr}\n}", "github": "", "project": "", "reviewers": "ZbmG;MN71;rgvp;fx5m;ZkGz", "pdf_size": 10376533, "rating": "5;5;6;6;7", "confidence": "4;5;4;4;4", "soundness": "3;3;3;4;3", "novelty": "2;1;3;2;3", "presentation": "3;3;2;4;3", "wc_summary": "65;70;71;139;150", "wc_strengths": "55;47;88;140;136", "wc_weaknesses": "364;174;110;301;368", "wc_questions": "124;30;76;40;83", "wc_limitations": "61;14;10;55;10", "wc_review": "669;335;355;675;747", "wc_reply_reviewers": "366;192;15;45;68", "wc_reply_authors": "1615;551;0;851;548", "reply_reviewers": "1;1;1;1;1", "reply_authors": "4;3;1;3;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 99.0, 37.36843587842552 ], "wc_strengths_avg": [ 93.2, 39.096802938347786 ], "wc_weaknesses_avg": [ 263.4, 103.92227865092258 ], "wc_questions_avg": [ 70.6, 33.52372294361114 ], "wc_limitations_avg": [ 30.0, 22.98695282111137 ], "wc_review_avg": [ 556.2, 174.72996308590007 ], "wc_reply_reviewers_avg": [ 137.2, 129.286348854007 ], "wc_reply_authors_avg": [ 713.0, 528.1071860900967 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6, 1.019803902718557 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5345224838248487, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7975492534014953007&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "weblab.t.u-tokyo.ac.jp;weblab.t.u-tokyo.ac.jp;u-tokyo.ac.jp;openai.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Tokyo;OpenAI", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://openai.com", "aff_unique_abbr": "UTokyo;OpenAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Japan;United States" }, { "title": "Quantizable Transformers: Removing Outliers by Helping Attention Heads Do Nothing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70261", "id": "sbusw6LD41", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/edbcb7583fd8921dad78adecfe06a99b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sbusw6LD41", "openreview": "https://openreview.net/forum?id=sbusw6LD41", "poster": "/media/PosterPDFs/NeurIPS%202023/70261.png?t=1701264835.704122", "slides": "https://nips.cc/virtual/2023/poster/70261", "video": "https://nips.cc/virtual/2023/poster/70261", "author_site": "Yelysei Bondarenko, Markus Nagel, Tijmen Blankevoort", "tldr": "", "abstract": "Transformer models have been widely adopted in various domains over the last years and especially large language models have advanced the field of AI significantly. Due to their size, the capability of these networks has increased tremendously, but this has come at the cost of a significant increase in necessary compute. Quantization is one of the most effective ways for reducing the computational time and memory consumption of neural networks. Many studies have shown, however, that modern transformer models tend to learn strong outliers in their activations, making them difficult to quantize. To retain acceptable performance, the existence of these outliers requires activations to be in higher-bitwidth or the use of different numeric formats, extra fine-tuning, or other workarounds. We show that strong outliers are related to very specific behavior of attention heads that try to learn a \"no-op\", or just a partial update of the residual. To achieve the exact zeros needed in the attention matrix for a no-update, the input to the softmax is pushed to be larger and larger during training, causing outliers in other parts of the network. Based on these observations, we propose two simple (independent) modifications to the attention mechanism - _clipped softmax_ and _gated attention_. We empirically show that models pre-trained using our methods learn significantly smaller outliers while maintaining and sometimes even improving the floating-point task performance. This enables us to quantize transformers to full INT8 quantization of the activations without any additional effort. We demonstrate the effectiveness of our methods on both language models (BERT, OPT) and vision transformers.", "keywords": "transformers;LLM;softmax;attention;outliers;quantization;post-training quantization", "primary_area": "", "supplementary_material": "/attachment/f82e9a8a3a4e2bd9f377e6dfa266b853e411c224.pdf", "author": "Yelysei Bondarenko;Markus Nagel;Tijmen Blankevoort", "authorids": "~Yelysei_Bondarenko2;~Markus_Nagel1;~Tijmen_Blankevoort1", "gender": ";M;M", "homepage": "https://github.com/yell;;", "dblp": "295/8514;38/1463;", "google_scholar": "4mHNa28AAAAJ;akNuBBEAAAAJ;OGEyrG8AAAAJ", "orcid": ";;", "linkedin": ";;tijmen-blankevoort-a5633a24/", "or_profile": "~Yelysei_Bondarenko2;~Markus_Nagel1;~Tijmen_Blankevoort1", "aff": "Qualcomm Inc, QualComm;Qualcomm AI Research;Qualcomm Inc, QualComm", "aff_domain": "qti.qualcomm.com;qualcomm.com;qti.qualcomm.com", "position": "Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nbondarenko2023quantizable,\ntitle={Quantizable Transformers: Removing Outliers by Helping Attention Heads Do Nothing},\nauthor={Yelysei Bondarenko and Markus Nagel and Tijmen Blankevoort},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sbusw6LD41}\n}", "github": "", "project": "", "reviewers": "NwNc;Vxgc;sWJn;Akrm", "pdf_size": 15511008, "rating": "6;6;6;7", "confidence": "3;4;5;5", "soundness": "3;3;3;4", "novelty": "3;3;2;3", "presentation": "4;3;3;3", "wc_summary": "117;63;81;58", "wc_strengths": "56;38;49;90", "wc_weaknesses": "279;119;10;19", "wc_questions": "1;39;26;20", "wc_limitations": "5;15;1;30", "wc_review": "458;274;167;217", "wc_reply_reviewers": "17;12;45;12", "wc_reply_authors": "1;1;115;1", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 79.75, 23.14492384951828 ], "wc_strengths_avg": [ 58.25, 19.421315609401955 ], "wc_weaknesses_avg": [ 106.75, 108.2598147975508 ], "wc_questions_avg": [ 21.5, 13.683932183404009 ], "wc_limitations_avg": [ 12.75, 11.188722000300123 ], "wc_review_avg": [ 279.0, 110.0613465300148 ], "wc_reply_reviewers_avg": [ 21.5, 13.720422734012244 ], "wc_reply_authors_avg": [ 29.5, 49.363448015713004 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 89, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7680332718929107343&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "qti.qualcomm.com;qualcomm.com;qti.qualcomm.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Qualcomm Incorporated;Qualcomm", "aff_unique_dep": ";Qualcomm AI Research", "aff_unique_url": "https://www.qualcomm.com;https://www.qualcomm.com/research", "aff_unique_abbr": "Qualcomm;QAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Unleashing the Full Potential of Product Quantization for Large-Scale Image Retrieval", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70260", "id": "scG0cwftEe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c2469e35d469e3c0eca09dbe484eb474-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=scG0cwftEe", "openreview": "https://openreview.net/forum?id=scG0cwftEe", "poster": "/media/PosterPDFs/NeurIPS%202023/70260.png?t=1701756437.1561878", "slides": "https://nips.cc/virtual/2023/poster/70260", "video": "https://nips.cc/virtual/2023/poster/70260", "author_site": "Yu Liang, Shiliang Zhang, Li Ken Li, Xiaoyu Wang", "tldr": "", "abstract": "Due to its promising performance, deep hashing has become a prevalent method for approximate nearest neighbors search (ANNs). However, most of current deep hashing methods are validated on relatively small-scale datasets, leaving potential threats when are applied to large-scale real-world scenarios. Specifically, they can be constrained either by the computational cost due to the large number of training categories and samples, or unsatisfactory accuracy. To tackle those issues, we propose a novel deep hashing framework based on product quantization (PQ). It uses a softmax-based differentiable PQ branch to learn a set of predefined PQ codes of the classes. Our method is easy to implement, does not involve large-scale matrix operations, and learns highly discriminate compact codes. We validate our method on multiple large-scaled datasets, including ImageNet100, ImageNet1K, and Glint360K, where the category size scales from 100 to 360K and sample number scales from 10K to 17 million, respectively. Extensive experiments demonstrate the superiority of our method. Code is available at https://github.com/yuleung/FPPQ.", "keywords": "Deep Hash;Image Retrieval;Product Quantization", "primary_area": "", "supplementary_material": "/attachment/186ebaecb1f7d3bbb87145ad8bc4650127d0a91e.pdf", "author": "Yu Liang;Shiliang Zhang;Kenli Li;Xiaoyu Wang", "authorids": "~Yu_Liang2;~Shiliang_Zhang3;~Kenli_Li1;~Xiaoyu_Wang1", "gender": "M;M;M;M", "homepage": ";https://www.pkuvmc.com;http://csee.hnu.edu.cn/people/likenli;http://www.xiaoyumu.com", "dblp": "65/1700;52/6186;l/KenliLi.html;58/4775-2", "google_scholar": ";7phvKK4AAAAJ;https://scholar.google.com/citations?view_op=list_works;Bce64xEAAAAJ", "orcid": "0000-0003-2229-2430;0000-0001-9053-9314;0000-0002-2635-7716;0000-0002-6431-8822", "linkedin": ";;;", "or_profile": "~Yu_Liang2;~Shiliang_Zhang3;~Kenli_Li1;~Xiaoyu_Wang1", "aff": "Hunan University;Peking University;Hunan University;Intellifusion", "aff_domain": "hnu.edu.cn;pku.edu.cn;hnu.edu.cn;intellif.com", "position": "PhD student;Associate Professor;Full Professor;Chief Scientist", "bibtex": "@inproceedings{\nliang2023unleashing,\ntitle={Unleashing the Full Potential of Product Quantization for Large-Scale Image Retrieval},\nauthor={Yu Liang and Shiliang Zhang and Kenli Li and Xiaoyu Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=scG0cwftEe}\n}", "github": "", "project": "", "reviewers": "hk7b;WmwN;kMZn;AU32", "pdf_size": 4680461, "rating": "5;6;6;7", "confidence": "3;4;3;5", "soundness": "2;3;3;3", "novelty": "3;3;3;2", "presentation": "2;3;3;2", "wc_summary": "86;49;58;66", "wc_strengths": "56;85;23;55", "wc_weaknesses": "625;95;40;85", "wc_questions": "13;25;23;43", "wc_limitations": "35;17;47;1", "wc_review": "815;271;191;250", "wc_reply_reviewers": "205;117;0;23", "wc_reply_authors": "1067;347;99;12", "reply_reviewers": "1;2;0;1", "reply_authors": "4;3;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 64.75, 13.663363421939708 ], "wc_strengths_avg": [ 54.75, 21.935986415021322 ], "wc_weaknesses_avg": [ 211.25, 239.77528542366497 ], "wc_questions_avg": [ 26.0, 10.816653826391969 ], "wc_limitations_avg": [ 25.0, 17.4928556845359 ], "wc_review_avg": [ 381.75, 251.8505261062601 ], "wc_reply_reviewers_avg": [ 86.25, 81.3737519105516 ], "wc_reply_authors_avg": [ 381.25, 414.55902776323666 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13328577802799737416&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "hnu.edu.cn;pku.edu.cn;hnu.edu.cn;intellif.com", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Hunan University;Peking University;Intellifusion", "aff_unique_dep": ";;", "aff_unique_url": "http://www.hunu.edu.cn/;http://www.pku.edu.cn;https://www.intellifusion.com/", "aff_unique_abbr": "HNU;Peking U;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "VAST: A Vision-Audio-Subtitle-Text Omni-Modality Foundation Model and Dataset", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70259", "id": "scYa9DYUAy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e6b2b48b5ed90d07c305932729927781-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=scYa9DYUAy", "openreview": "https://openreview.net/forum?id=scYa9DYUAy", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70259", "video": "https://nips.cc/virtual/2023/poster/70259", "author_site": "Sihan Chen, Handong Li, Qunbo Wang, Zijia Zhao, Mingzhen Sun, Xinxin Zhu, Jing Liu", "tldr": "", "abstract": "Vision and text have been fully explored in contemporary video-text foundational models, while other modalities such as audio and subtitles in videos have not received sufficient attention. In this paper, we resort to establish connections between multi-modality video tracks, including Vision, Audio, and Subtitle, and Text by exploring an automatically generated large-scale omni-modality video caption dataset called VAST-27M. Specifically, we first collect 27 million open-domain video clips and separately train a vision and an audio captioner to generate vision and audio captions. Then, we employ an off-the-shelf Large Language Model (LLM) to integrate the generated captions, together with subtitles and instructional prompts into omni-modality captions. Based on the proposed VAST-27M dataset, we train an omni-modality video-text foundational model named VAST, which can perceive and process vision, audio, and subtitle modalities from video, and better support various tasks including vision-text, audio-text, and multi-modal video-text tasks (retrieval, captioning and QA). Extensive experiments have been conducted to demonstrate the effectiveness of our proposed VAST-27M corpus and VAST foundation model. VAST achieves 22 new state-of-the-art results on various cross-modality benchmarks.", "keywords": "Cross-Modality Foundation Model;Cross-Modality Pretraining Dataset", "primary_area": "", "supplementary_material": "/attachment/6d46edc2d8804bb3e613e1cf4e98f53abab31210.pdf", "author": "Sihan Chen;Handong Li;Qunbo Wang;Zijia Zhao;Mingzhen Sun;Xinxin Zhu;Jing Liu", "authorids": "~Sihan_Chen3;~Handong_Li1;~Qunbo_Wang1;~Zijia_Zhao1;~Mingzhen_Sun1;~Xinxin_Zhu1;~Jing_Liu1", "gender": "M;M;;F;;F;", "homepage": ";https://github.com/lihanddd;;https://github.com/iva-mzsun;https://scholar.google.com/citations?user=MGL0W4gAAAAJ&hl=zh-CN&oi=ao;http://www.nlpr.ia.ac.cn/iva/liujing/;https://blog.csdn.net/JoeCucu?type=blog", "dblp": ";;228/4336;296/4017;;72/2590-1.html;296/3659", "google_scholar": "4pHKj8kAAAAJ;-LnWwgIAAAAJ;;;MGL0W4gAAAAJ;sOI-S7oAAAAJ;", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Sihan_Chen3;~Handong_Li1;~Qunbo_Wang1;~Mingzhen_Sun1;~Xinxin_Zhu1;~Jing_Liu1;~Joe_Z1", "aff": ", Institute of automation, Chinese academy of science;Beijing Institute of Technology;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;, Institute of automation, Chinese academy of science;Institute of automation, Chinese academy of science;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "nlpr.ia.ac.cn;bit.edu.cn;ia.ac.cn;ia.ac.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn;ia.ac.cn", "position": "PhD student;Undergrad student;Assistant Professor;PhD student;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nchen2023vast,\ntitle={{VAST}: A Vision-Audio-Subtitle-Text Omni-Modality Foundation Model and Dataset},\nauthor={Sihan Chen and Handong Li and Qunbo Wang and Zijia Zhao and Mingzhen Sun and Xinxin Zhu and Jing Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=scYa9DYUAy}\n}", "github": "", "project": "", "reviewers": "zFyv;v49r;1sdZ;hHHX", "pdf_size": 7880423, "rating": "3;5;5;6", "confidence": "4;4;5;5", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "1;2;3;3", "wc_summary": "79;110;52;58", "wc_strengths": "52;52;45;58", "wc_weaknesses": "156;306;360;100", "wc_questions": "194;2;5;23", "wc_limitations": "66;2;37;1", "wc_review": "547;472;499;240", "wc_reply_reviewers": "10;73;10;17", "wc_reply_authors": "729;299;23;23", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 74.75, 22.68672519338126 ], "wc_strengths_avg": [ 51.75, 4.602988159880492 ], "wc_weaknesses_avg": [ 230.5, 106.12610423453789 ], "wc_questions_avg": [ 56.0, 80.07808689023483 ], "wc_limitations_avg": [ 26.5, 27.02313823374332 ], "wc_review_avg": [ 439.5, 118.27193242692874 ], "wc_reply_reviewers_avg": [ 27.5, 26.424420523447623 ], "wc_reply_authors_avg": [ 268.5, 288.7607140869408 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 131, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3518887429033065474&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "nlpr.ia.ac.cn;bit.edu.cn;ia.ac.cn;ia.ac.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn;ia.ac.cn", "author_num": 7, "aff_unique_index": "0;1;0;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Beijing Institute of Technology", "aff_unique_dep": "Institute of Automation;", "aff_unique_url": "http://www.ia.cas.cn;http://www.bit.edu.cn/", "aff_unique_abbr": "CAS;BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Retrieval-Augmented Multiple Instance Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70258", "id": "scaKiAtbI3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4e5f5e4504759e3957e3eef2a44a535e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=scaKiAtbI3", "openreview": "https://openreview.net/forum?id=scaKiAtbI3", "poster": "/media/PosterPDFs/NeurIPS%202023/70258.png?t=1700021247.0724697", "slides": "https://nips.cc/virtual/2023/poster/70258", "video": "https://nips.cc/virtual/2023/poster/70258", "author_site": "Yufei CUI, Ziquan Liu, Yixin Chen, Yuchen Lu, Xinyue Yu, Xue (Steve) Liu, Tei-Wei Kuo, Miguel Rodrigues, Chun Jason Xue, Chun Jason XUE, Antoni Chan", "tldr": "", "abstract": "Multiple Instance Learning (MIL) is a crucial weakly supervised learning method applied across various domains, e.g., medical diagnosis based on whole slide images (WSIs). Recent advancements in MIL algorithms have yielded exceptional performance when the training and test data originate from the same domain, such as WSIs obtained from the same hospital. However, this paper reveals a performance deterioration of MIL models when tested on an out-of-domain test set, exemplified by WSIs sourced from a novel hospital. To address this challenge, this paper introduces the Retrieval-AugMented MIL (RAM-MIL) framework, which integrates Optimal Transport (OT) as the distance metric for nearest neighbor retrieval. The development of RAM-MIL is driven by two key insights. First, a theoretical discovery indicates that reducing the input's intrinsic dimension can minimize the approximation error in attention-based MIL. Second, previous studies highlight a link between input intrinsic dimension and the feature merging process with the retrieved data. Empirical evaluations conducted on WSI classification demonstrate that the proposed RAM-MIL framework achieves state-of-the-art performance in both in-domain scenarios, where the training and retrieval data are in the same domain, and more crucially, in out-of-domain scenarios, where the (unlabeled) retrieval data originates from a different domain. Furthermore, the use of the transportation matrix derived from OT renders the retrieval results interpretable at the instance level, in contrast to the vanilla $l_2$ distance, and allows for visualization for human experts. *Code can be found at \\url{https://github.com/ralphc1212/ram-mil*.", "keywords": "Multiple Instance Learning;Whole Slide Imaging;Nearest Neighbor Retrieval", "primary_area": "", "supplementary_material": "/attachment/63509bd15409125a001df02e74c29f7f2213443e.zip", "author": "Yufei CUI;Ziquan Liu;Yixin CHEN;Yuchen Lu;Xinyue Yu;Xue Liu;Tei-Wei Kuo;Miguel R. D. Rodrigues;Chun Jason Xue;Antoni B. Chan", "authorids": "~Yufei_CUI2;~Ziquan_Liu1;~Yixin_CHEN11;~Yuchen_Lu1;~Xinyue_Yu2;~Xue_Liu1;~Tei-Wei_Kuo1;~Miguel_R._D._Rodrigues1;~Chun_Jason_Xue1;~Antoni_B._Chan1", "gender": "M;M;;M;F;M;M;M;M;M", "homepage": ";https://sites.google.com/view/ziquanliu;;http://jackhaha363.github.io/;;http://www.cs.mcgill.ca/~xueliu/;http://www.csie.ntu.edu.tw/~ktw;https://www.ee.ucl.ac.uk/iiml/;https://www.cs.cityu.edu.hk/~jasonxue/;http://www.cs.cityu.edu.hk/~abchan/", "dblp": "188/0049;207/9035;;223/4762;;l/XueLiu;07/3181.html;21/6763;x/ChunJasonXue.html;55/5814", "google_scholar": ";https://scholar.google.com.hk/citations?user=x28OqBkAAAAJ;https://scholar.google.com.hk/citations?view_op=list_works;https://scholar.google.ca/citations?hl=en;;https://scholar.google.com.tw/citations?user=rfLIRakAAAAJ;https://scholar.google.com.tw/citations?user=gBuHlIQAAAAJ;;C6oyGQkAAAAJ;j4vFSn8AAAAJ", "orcid": ";;0000-0002-6373-7531;;;;;;;0000-0002-2886-2513", "linkedin": ";;;;selina-xinyue-yu-830b011b0/;;;;;", "or_profile": "~Yufei_CUI2;~Ziquan_Liu1;~Yixin_CHEN11;~Yuchen_Lu1;~Xinyue_Yu2;~Xue_Liu1;~Tei-Wei_Kuo1;~Miguel_R._D._Rodrigues1;~Chun_Jason_Xue1;~Antoni_B._Chan1", "aff": "McGill University;University College London, University of London;Harbin Institute of Technology;University of Montreal;Universit\u00e9 de Montr\u00e9al;McGill University;National Taiwan University;University College London;City University of Hong Kong;City University of Hong Kong", "aff_domain": "cs.mcgill.ca;ucl.ac.uk;hit.edu.cn;umontreal.ca;umontreal.ca;mcgill.ca;ntu.edu.tw;ucl.ac.uk;cityu.edu.hk;cityu.edu.hk", "position": "Postdoc;Postdoc;MS student;PhD student;MS student;Full Professor;Full Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ncui2023retrievalaugmented,\ntitle={Retrieval-Augmented Multiple Instance Learning},\nauthor={Yufei CUI and Ziquan Liu and Yixin CHEN and Yuchen Lu and Xinyue Yu and Xue Liu and Tei-Wei Kuo and Miguel R. D. Rodrigues and Chun Jason Xue and Antoni B. Chan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=scaKiAtbI3}\n}", "github": "", "project": "", "reviewers": "WXnS;5E1m;7vyh;yGH7;q4PZ", "pdf_size": 3589250, "rating": "4;5;6;6;7", "confidence": "4;5;4;4;4", "soundness": "2;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "3;3;2;3;3", "wc_summary": "95;123;54;39;98", "wc_strengths": "178;54;90;48;31", "wc_weaknesses": "370;372;156;67;45", "wc_questions": "54;4;118;162;34", "wc_limitations": "6;22;23;32;13", "wc_review": "703;575;441;348;221", "wc_reply_reviewers": "0;734;135;28;51", "wc_reply_authors": "27;1837;212;0;0", "reply_reviewers": "0;2;2;1;1", "reply_authors": "2;5;2;1;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 81.8, 30.78571097116323 ], "wc_strengths_avg": [ 80.2, 52.54483799575368 ], "wc_weaknesses_avg": [ 202.0, 142.90836224658094 ], "wc_questions_avg": [ 74.4, 57.58333092136995 ], "wc_limitations_avg": [ 19.2, 8.930845424706442 ], "wc_review_avg": [ 457.6, 168.68384629240586 ], "wc_reply_reviewers_avg": [ 189.6, 275.907665714456 ], "wc_reply_authors_avg": [ 415.2, 715.3022857505769 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.2, 1.469693845669907 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.29417420270727607, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3062511686192011959&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "cs.mcgill.ca;ucl.ac.uk;hit.edu.cn;umontreal.ca;umontreal.ca;mcgill.ca;ntu.edu.tw;ucl.ac.uk;cityu.edu.hk;cityu.edu.hk", "author_num": 10, "aff_unique_index": "0;1;2;3;4;0;5;1;6;6", "aff_unique_norm": "McGill University;University College London;Harbin Institute of Technology;University of Montreal;Universit\u00e9 de Montr\u00e9al;National Taiwan University;City University of Hong Kong", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.mcgill.ca;https://www.ucl.ac.uk;http://www.hit.edu.cn/;https://wwwumontreal.ca;https://www.umontreal.ca;https://www.ntu.edu.tw;https://www.cityu.edu.hk", "aff_unique_abbr": "McGill;UCL;HIT;UM;UdeM;NTU;CityU", "aff_campus_unique_index": "1;2;3;3", "aff_campus_unique": ";Harbin;Taiwan;Hong Kong SAR", "aff_country_unique_index": "0;1;2;0;0;0;2;1;2;2", "aff_country_unique": "Canada;United Kingdom;China" }, { "title": "On Sample-Efficient Offline Reinforcement Learning: Data Diversity, Posterior Sampling and Beyond", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70257", "id": "sdlh4gVOj8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c056d6cf7b7108418f2b8c307dfaab02-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sdlh4gVOj8", "openreview": "https://openreview.net/forum?id=sdlh4gVOj8", "poster": "/media/PosterPDFs/NeurIPS%202023/70257.png?t=1702084780.1862009", "slides": "https://nips.cc/virtual/2023/poster/70257", "video": "https://nips.cc/virtual/2023/poster/70257", "author_site": "Thanh Nguyen-Tang, Raman Arora", "tldr": "", "abstract": "We seek to understand what facilitates sample-efficient learning from historical datasets for sequential decision-making, a problem that is popularly known as offline reinforcement learning (RL). Further, we are interested in algorithms that enjoy sample efficiency while leveraging (value) function approximation. In this paper, we address these fundamental questions by (i) proposing a notion of data diversity that subsumes the previous notions of coverage measures in offline RL and (ii) using this notion to \\emph{unify} three distinct classes of offline RL algorithms based on version spaces (VS), regularized optimization (RO), and posterior sampling (PS). We establish that VS-based, RO-based, and PS-based algorithms, under standard assumptions, achieve \\emph{comparable} sample efficiency, which recovers the state-of-the-art sub-optimality bounds for finite and linear model classes with the standard assumptions. This result is surprising, given that the prior work suggested an unfavorable sample complexity of the RO-based algorithm compared to the VS-based algorithm, whereas posterior sampling is rarely considered in offline RL due to its explorative nature. Notably, our proposed model-free PS-based algorithm for offline RL is \\emph{novel}, with sub-optimality bounds that are \\emph{frequentist} (i.e., worst-case) in nature.", "keywords": "reinforcement learning;offline reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Thanh Nguyen-Tang;Raman Arora", "authorids": "~Thanh_Nguyen-Tang1;~Raman_Arora1", "gender": "M;M", "homepage": "http://www.cs.jhu.edu/~raman/Home.html;https://thanhnguyentang.github.io/", "dblp": ";287/5102.html", "google_scholar": "Spe0xdkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-1917-2190", "linkedin": ";thanhnguyentang/", "or_profile": "~Raman_Arora1;~Thanh_Tang_Nguyen2", "aff": "Johns Hopkins University;Johns Hopkins University", "aff_domain": "jhu.edu;jhu.edu", "position": "Associate Professor;Postdoc", "bibtex": "@inproceedings{\nnguyen-tang2023on,\ntitle={On Sample-Efficient Offline Reinforcement Learning: Data Diversity, Posterior Sampling and Beyond},\nauthor={Thanh Nguyen-Tang and Raman Arora},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sdlh4gVOj8}\n}", "github": "", "project": "", "reviewers": "niYV;ufo5;YpYn;FhSe", "pdf_size": 602943, "rating": "4;5;6;7", "confidence": "4;3;3;2", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;4;4", "wc_summary": "55;87;88;55", "wc_strengths": "42;58;109;61", "wc_weaknesses": "202;124;256;38", "wc_questions": "1;1;59;92", "wc_limitations": "1;1;9;2", "wc_review": "301;271;521;248", "wc_reply_reviewers": "438;58;22;11", "wc_reply_authors": "1355;264;28;10", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 71.25, 16.253845698787718 ], "wc_strengths_avg": [ 67.5, 25.024987512484397 ], "wc_weaknesses_avg": [ 155.0, 82.24962005991274 ], "wc_questions_avg": [ 38.25, 39.03443992168967 ], "wc_limitations_avg": [ 3.25, 3.344772040064913 ], "wc_review_avg": [ 335.25, 108.87693741100546 ], "wc_reply_reviewers_avg": [ 132.25, 177.3786557058092 ], "wc_reply_authors_avg": [ 414.25, 552.3116760489497 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9486832980505139, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3109084823790795760&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "jhu.edu;jhu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Data Market Design through Deep Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70256", "id": "sgCrNMOuXp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1577ea3eaf8dacb99f64e4496c3ecddf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sgCrNMOuXp", "openreview": "https://openreview.net/forum?id=sgCrNMOuXp", "poster": "/media/PosterPDFs/NeurIPS%202023/70256.png?t=1701897650.3206308", "slides": "https://nips.cc/virtual/2023/poster/70256", "video": "https://nips.cc/virtual/2023/poster/70256", "author_site": "Sai Srivatsa Ravindranath, Yanchen Jiang, David Parkes", "tldr": "", "abstract": "The _data market design_ problem is a problem in economic theory to find a set of signaling schemes (statistical experiments) to maximize expected revenue to the information seller, where each experiment reveals some of the information known to a seller and has a corresponding price. Each buyer has their own decision to make in a world environment, and their subjective expected value for the information associated with a particular experiment comes from the improvement in this decision and depends on their prior and value for different outcomes. In a setting with multiple buyers, a buyer's expected value for an experiment may also depend on the information sold to others. We introduce the application of deep learning for the design of revenue-optimal data markets, looking to expand the frontiers of what can be understood and achieved. Relative to earlier work on deep learning for auction design, we must learn signaling schemes rather than allocation rules and handle _obedience constraints_ — these arising from modeling the downstream actions of buyers — in addition to incentive constraints on bids. Our experiments demonstrate that this new deep learning framework can almost precisely replicate all known solutions from theory, expand to more complex settings, and be used to establish the optimality of new designs for data markets and make conjectures in regard to the structure of optimal designs.", "keywords": "Data Markets;Information Design;Differentiable Economics;Economics;Deep Learning;Mechanism Design;Algorithmic Game Theory", "primary_area": "", "supplementary_material": "", "author": "Sai Srivatsa Ravindranath;Yanchen Jiang;David C. Parkes", "authorids": "~Sai_Srivatsa_Ravindranath2;~Yanchen_Jiang1;~David_C._Parkes1", "gender": "M;M;M", "homepage": "http://saisrivatsa.com/;https://yanchenjiang.github.io/;https://parkes.seas.harvard.edu/", "dblp": "169/0389;359/6965;p/DavidCParkes.html", "google_scholar": "https://scholar.google.co.in/citations?user=dKJ9ca4AAAAJ;ULefWRkAAAAJ;JUn8PgwAAAAJ", "orcid": ";;0000-0002-2701-3464", "linkedin": ";;", "or_profile": "~Sai_Srivatsa_Ravindranath2;~Yanchen_Jiang1;~David_C._Parkes1", "aff": "Harvard University;Harvard University;Google", "aff_domain": "harvard.edu;g.harvard.edu;deepmind.com", "position": "PhD student;PhD student;Senior Research Scientist", "bibtex": "@inproceedings{\nravindranath2023data,\ntitle={Data Market Design through Deep Learning},\nauthor={Sai Srivatsa Ravindranath and Yanchen Jiang and David C. Parkes},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sgCrNMOuXp}\n}", "github": "", "project": "", "reviewers": "nspR;u4po;QSAL;wNRV;zvsA", "pdf_size": 1126168, "rating": "5;5;5;6;6", "confidence": "3;3;3;4;2", "soundness": "2;3;3;4;4", "novelty": "2;2;3;3;3", "presentation": "2;2;3;2;3", "wc_summary": "57;88;70;249;164", "wc_strengths": "38;57;35;64;86", "wc_weaknesses": "374;68;67;86;153", "wc_questions": "361;218;21;19;110", "wc_limitations": "147;46;1;2;71", "wc_review": "977;477;194;420;584", "wc_reply_reviewers": "132;214;0;50;64", "wc_reply_authors": "207;631;0;0;289", "reply_reviewers": "1;2;0;1;2", "reply_authors": "2;2;1;1;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 125.6, 71.99055493604699 ], "wc_strengths_avg": [ 56.0, 18.601075237738275 ], "wc_weaknesses_avg": [ 149.6, 116.52742166546035 ], "wc_questions_avg": [ 145.8, 129.96830382827963 ], "wc_limitations_avg": [ 53.4, 53.876154279978074 ], "wc_review_avg": [ 530.4, 257.1105598764858 ], "wc_reply_reviewers_avg": [ 92.0, 74.15659107591179 ], "wc_reply_authors_avg": [ 225.4, 232.5963026361339 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5615937269036551407&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "harvard.edu;g.harvard.edu;deepmind.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Harvard University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.harvard.edu;https://www.google.com", "aff_unique_abbr": "Harvard;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "FD-Align: Feature Discrimination Alignment for Fine-tuning Pre-Trained Models in Few-Shot Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70255", "id": "shXnfALjuH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/87cf37e2085655bad7bad0a014e0edad-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=shXnfALjuH", "openreview": "https://openreview.net/forum?id=shXnfALjuH", "poster": "/media/PosterPDFs/NeurIPS%202023/70255.png?t=1700205388.766189", "slides": "https://nips.cc/virtual/2023/poster/70255", "video": "https://nips.cc/virtual/2023/poster/70255", "author_site": "Kun Song, Huimin Ma, Bochao Zou, Huishuai Zhang, Weiran Huang", "tldr": "", "abstract": "Due to the limited availability of data, existing few-shot learning methods trained from scratch fail to achieve satisfactory performance. In contrast, large-scale pre-trained models such as CLIP demonstrate remarkable few-shot and zero-shot capabilities. To enhance the performance of pre-trained models for downstream tasks, fine-tuning the model on downstream data is frequently necessary. However, fine-tuning the pre-trained model leads to a decrease in its generalizability in the presence of distribution shift, while the limited number of samples in few-shot learning makes the model highly susceptible to overfitting. Consequently, existing methods for fine-tuning few-shot learning primarily focus on fine-tuning the model's classification head or introducing additional structure. In this paper, we introduce a fine-tuning approach termed Feature Discrimination Alignment (FD-Align). Our method aims to bolster the model's generalizability by preserving the consistency of spurious features across the fine-tuning process. Extensive experimental results validate the efficacy of our approach for both ID and OOD tasks. Once fine-tuned, the model can seamlessly integrate with existing methods, leading to performance improvements. Our code can be found in https://github.com/skingorz/FD-Align.", "keywords": "few-shot learning;CLIP;fine-tuning", "primary_area": "", "supplementary_material": "/attachment/c5024c62f5b5c68fb97d92f1e4301aea0d37c4da.zip", "author": "Kun Song;Huimin Ma;Bochao Zou;Huishuai Zhang;Weiran Huang", "authorids": "~Kun_Song3;~Huimin_Ma1;~Bochao_Zou1;~Huishuai_Zhang3;~Weiran_Huang1", "gender": "M;F;M;M;M", "homepage": "https://www.sanker.plus;http://server.3dimagelab.cn:5000;;https://www.weiranhuang.com;https://huishuai-git.github.io", "dblp": "96/855-4;69/7694-1;197/9774;170/0073-1;144/7537", "google_scholar": "mUDDC_4AAAAJ;32hwVLEAAAAJ;Cb29A3cAAAAJ;AjJ2rf8AAAAJ;w1srHyIAAAAJ", "orcid": "0009-0003-0004-3780;;;;", "linkedin": ";;;;", "or_profile": "~Kun_Song3;~Huimin_Ma1;~Bochao_Zou1;~Weiran_Huang1;~Huishuai_Zhang2", "aff": "University of Science and Technology Beijing;University of Science and Technology Beijing;University of Science and Technology Beijing;Shanghai Jiaotong University;Microsoft Research Asia", "aff_domain": "ustb.edu.cn;ustb.edu.cn;ustb.edu.cn;sjtu.edu.cn;microsoft.com", "position": "PhD student;Full Professor;Lecturer;Associate Professor;Researcher", "bibtex": "@inproceedings{\nsong2023fdalign,\ntitle={{FD}-Align: Feature Discrimination Alignment for Fine-tuning Pre-Trained Models in Few-Shot Learning},\nauthor={Kun Song and Huimin Ma and Bochao Zou and Huishuai Zhang and Weiran Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=shXnfALjuH}\n}", "github": "", "project": "", "reviewers": "TEuf;ePYg;vxTm;EKx5;vJi7;LEKB", "pdf_size": 1148948, "rating": "3;3;4;4;5;6", "confidence": "5;4;5;3;4;3", "soundness": "1;1;2;2;3;3", "novelty": "1;1;2;2;2;3", "presentation": "3;2;2;2;3;2", "wc_summary": "67;51;92;78;91;68", "wc_strengths": "30;9;57;44;54;83", "wc_weaknesses": "186;176;226;304;166;30", "wc_questions": "6;2;17;2;60;118", "wc_limitations": "1;1;7;1;5;14", "wc_review": "290;239;399;429;376;313", "wc_reply_reviewers": "123;171;50;179;0;0", "wc_reply_authors": "540;623;802;552;0;0", "reply_reviewers": "1;1;1;1;0;0", "reply_authors": "3;3;3;4;1;1", "rating_avg": [ 4.166666666666667, 1.0671873729054748 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "novelty_avg": [ 1.8333333333333333, 0.6871842709362769 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 74.5, 14.38459824488215 ], "wc_strengths_avg": [ 46.166666666666664, 23.031982594258405 ], "wc_weaknesses_avg": [ 181.33333333333334, 81.88338591490272 ], "wc_questions_avg": [ 34.166666666666664, 42.530054732571834 ], "wc_limitations_avg": [ 4.833333333333333, 4.705198071164368 ], "wc_review_avg": [ 341.0, 65.98232086450632 ], "wc_reply_reviewers_avg": [ 87.16666666666667, 74.52385896849113 ], "wc_reply_authors_avg": [ 419.5, 308.69496810497793 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5738190417570045, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14867100550447459758&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ustb.edu.cn;ustb.edu.cn;ustb.edu.cn;sjtu.edu.cn;microsoft.com", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "University of Science and Technology Beijing;Shanghai Jiao Tong University;Microsoft", "aff_unique_dep": ";;Research", "aff_unique_url": "http://www.ustb.edu.cn;https://www.sjtu.edu.cn;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "USTB;SJTU;MSR Asia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Survival Instinct in Offline Reinforcement Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70254", "id": "shePL2nbwl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c3e969ea20542a6a11e6caeac736a0b9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=shePL2nbwl", "openreview": "https://openreview.net/forum?id=shePL2nbwl", "poster": "/media/PosterPDFs/NeurIPS%202023/70254.png?t=1702487723.102361", "slides": "https://nips.cc/virtual/2023/poster/70254", "video": "https://nips.cc/virtual/2023/poster/70254", "author_site": "Anqi Li, Dipendra Misra, Andrey Kolobov, Ching-An Cheng", "tldr": "", "abstract": "We present a novel observation about the behavior of offline reinforcement learning (RL) algorithms: on many benchmark datasets, offline RL can produce well-performing and safe policies even when trained with \"wrong\" reward labels, such as those that are zero everywhere or are negatives of the true rewards. This phenomenon cannot be easily explained by offline RL's return maximization objective. Moreover, it gives offline RL a degree of robustness that is uncharacteristic of its online RL counterparts, which are known to be sensitive to reward design. We demonstrate that this surprising robustness property is attributable to an interplay between the notion of *pessimism* in offline RL algorithms and certain implicit biases in common data collection practices. As we prove in this work, pessimism endows the agent with a *survival instinct*, i.e., an incentive to stay within the data support in the long term, while the limited and biased data coverage further constrains the set of survival policies. Formally, given a reward class -- which may not even contain the true reward -- we identify conditions on the training data distribution that enable offline RL to learn a near-optimal and safe policy from any reward within the class. We argue that the survival instinct should be taken into account when interpreting results from existing offline RL benchmarks and when creating future ones. Our empirical and theoretical results suggest a new paradigm for offline RL, whereby an agent is \"nudged\" to learn a desirable behavior with imperfect reward but purposely biased data coverage. Please visit our website [https://survival-instinct.github.io](https://survival-instinct.github.io) for accompanied code and videos.", "keywords": "Offline RL;safe RL", "primary_area": "", "supplementary_material": "", "author": "Anqi Li;Dipendra Misra;Andrey Kolobov;Ching-An Cheng", "authorids": "~Anqi_Li1;~Dipendra_Misra1;~Andrey_Kolobov1;~Ching-An_Cheng1", "gender": ";M;M;M", "homepage": "https://anqili.github.io;https://dipendramisra.com/;https://www.microsoft.com/en-us/research/people/akolobov/;http://www.chinganc.com", "dblp": ";218/6569;95/3462;123/6369", "google_scholar": "HG08FCMAAAAJ;rIoPIFsAAAAJ;xEWgxBsAAAAJ;bMZFLZ_V4goC", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Anqi_Li1;~Dipendra_Misra1;~Andrey_Kolobov1;~Ching-An_Cheng1", "aff": "Department of Computer Science, University of Washington;Microsoft Research;Microsoft;Microsoft Research", "aff_domain": "cs.washington.edu;microsoft.com;microsoft.com;microsoft.com", "position": "PhD student;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nli2023survival,\ntitle={Survival Instinct in Offline Reinforcement Learning},\nauthor={Anqi Li and Dipendra Misra and Andrey Kolobov and Ching-An Cheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=shePL2nbwl}\n}", "github": "", "project": "", "reviewers": "k1TU;Sf8f;kbuj;NS2u", "pdf_size": 11945879, "rating": "6;6;7;8", "confidence": "3;4;3;3", "soundness": "4;4;4;3", "novelty": "3;3;3;4", "presentation": "3;4;4;4", "wc_summary": "90;79;163;110", "wc_strengths": "41;23;112;105", "wc_weaknesses": "72;207;157;269", "wc_questions": "172;97;85;193", "wc_limitations": "3;25;38;171", "wc_review": "378;431;555;848", "wc_reply_reviewers": "133;15;15;247", "wc_reply_authors": "16;0;0;358", "reply_reviewers": "1;1;1;2", "reply_authors": "2;1;1;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 110.5, 32.283896914715854 ], "wc_strengths_avg": [ 70.25, 38.854697270729055 ], "wc_weaknesses_avg": [ 176.25, 72.08805379534115 ], "wc_questions_avg": [ 136.75, 46.54231945230061 ], "wc_limitations_avg": [ 59.25, 65.72052571305254 ], "wc_review_avg": [ 553.0, 182.02884386821776 ], "wc_reply_reviewers_avg": [ 102.5, 96.33664930855754 ], "wc_reply_authors_avg": [ 93.5, 152.8487814802591 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11264477891549876119&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "cs.washington.edu;microsoft.com;microsoft.com;microsoft.com", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Washington;Microsoft", "aff_unique_dep": "Department of Computer Science;Microsoft Research", "aff_unique_url": "https://www.washington.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UW;MSR", "aff_campus_unique_index": "0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Beyond MLE: Convex Learning for Text Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70253", "id": "sla7V80uWA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1c3d419b754cb4de0a67a453cb28d959-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sla7V80uWA", "openreview": "https://openreview.net/forum?id=sla7V80uWA", "poster": "/media/PosterPDFs/NeurIPS%202023/70253.png?t=1702264530.9954898", "slides": "https://nips.cc/virtual/2023/poster/70253", "video": "https://nips.cc/virtual/2023/poster/70253", "author_site": "Chenze Shao, Zhengrui Ma, Min Zhang, Yang Feng", "tldr": "", "abstract": "Maximum likelihood estimation (MLE) is a statistical method used to estimate the parameters of a probability distribution that best explain the observed data. In the context of text generation, MLE is often used to train generative language models, which can then be used to generate new text. However, we argue that MLE is not always necessary and optimal, especially for closed-ended text generation tasks like machine translation. In these tasks, the goal of model is to generate the most appropriate response, which does not necessarily require it to estimate the entire data distribution with MLE. To this end, we propose a novel class of training objectives based on convex functions, which enables text generation models to focus on highly probable outputs without having to estimate the entire data distribution. We investigate the theoretical properties of the optimal predicted distribution when applying convex functions to the loss, demonstrating that convex functions can sharpen the optimal distribution, thereby enabling the model to better capture outputs with high probabilities. Experiments on various text generation tasks and models show the effectiveness of our approach. It enables autoregressive models to bridge the gap between greedy and beam search, and facilitates the learning of non-autoregressive models with a maximum improvement of 9+ BLEU points. Moreover, our approach also exhibits significant impact on large language models (LLMs), substantially enhancing their generative capability on various tasks. Source code is available at \\url{https://github.com/ictnlp/Convex-Learning}.", "keywords": "Maximum likelihood estimation;Convex function;Text generation", "primary_area": "", "supplementary_material": "/attachment/ee7603fb5555c4cc147ef59ea865905e2beb5c10.zip", "author": "Chenze Shao;Zhengrui Ma;Min Zhang;Yang Feng", "authorids": "~Chenze_Shao1;~Zhengrui_Ma1;~Min_Zhang9;~Yang_Feng4", "gender": "M;M;M;", "homepage": ";http://nlp.ict.ac.cn/~mazhengrui;https://zhangmin-nlp-ai.github.io/;http://people.ucas.edu.cn/~yangfeng?language=en", "dblp": "227/3123;276/3133;83/5342-5;07/6095-4.html", "google_scholar": "LH_rZf8AAAAJ;dUgq6tEAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Chenze_Shao1;~Zhengrui_Ma1;~Min_Zhang9;~Yang_Feng4", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Harbin Institute of Technology, Shenzhen;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;hit.edu.cn;ict.ac.cn", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nshao2023beyond,\ntitle={Beyond {MLE}: Convex Learning for Text Generation},\nauthor={Chenze Shao and Zhengrui Ma and Min Zhang and Yang Feng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sla7V80uWA}\n}", "github": "", "project": "", "reviewers": "MNhM;CKPv;hk8n;pbsn;v7Us", "pdf_size": 442828, "rating": "3;5;7;7;7", "confidence": "4;5;4;3;5", "soundness": "2;2;3;4;4", "novelty": "2;2;3;4;4", "presentation": "3;3;3;4;4", "wc_summary": "81;39;120;209;65", "wc_strengths": "40;45;72;84;80", "wc_weaknesses": "119;112;128;163;44", "wc_questions": "59;161;142;93;127", "wc_limitations": "11;26;12;31;21", "wc_review": "310;383;474;580;337", "wc_reply_reviewers": "0;0;29;0;23", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 1.6 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 102.8, 59.24322746103557 ], "wc_strengths_avg": [ 64.2, 18.203296404772406 ], "wc_weaknesses_avg": [ 113.2, 38.78865813610984 ], "wc_questions_avg": [ 116.4, 36.329602254910526 ], "wc_limitations_avg": [ 20.2, 7.782030583337487 ], "wc_review_avg": [ 416.8, 98.81578821220828 ], "wc_reply_reviewers_avg": [ 10.4, 12.877888025604198 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.13363062095621217, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11569675323528856061&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ict.ac.cn;ict.ac.cn;hit.edu.cn;ict.ac.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Chinese Academy of Sciences;Harbin Institute of Technology", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;http://en.hhit.edu.cn/", "aff_unique_abbr": "CAS;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "AV-NeRF: Learning Neural Fields for Real-World Audio-Visual Scene Synthesis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70252", "id": "snY3FOnlQi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/760dff0f9c0e9ed4d7e22918c73351d4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=snY3FOnlQi", "openreview": "https://openreview.net/forum?id=snY3FOnlQi", "poster": "/media/PosterPDFs/NeurIPS%202023/70252.png?t=1702227722.8087742", "slides": "https://nips.cc/virtual/2023/poster/70252", "video": "https://nips.cc/virtual/2023/poster/70252", "author_site": "Susan Liang, Chao Huang, Yapeng Tian, Anurag Kumar, Chenliang Xu", "tldr": "", "abstract": "Can machines recording an audio-visual scene produce realistic, matching audio-visual experiences at novel positions and novel view directions? We answer it by studying a new task---real-world audio-visual scene synthesis---and a first-of-its-kind NeRF-based approach for multimodal learning. Concretely, given a video recording of an audio-visual scene, the task is to synthesize new videos with spatial audios along arbitrary novel camera trajectories in that scene. We propose an acoustic-aware audio generation module that integrates prior knowledge of audio propagation into NeRF, in which we implicitly associate audio generation with the 3D geometry and material properties of a visual environment. Furthermore, we present a coordinate transformation module that expresses a view direction relative to the sound source, enabling the model to learn sound source-centric acoustic fields. To facilitate the study of this new task, we collect a high-quality Real-World Audio-Visual Scene (RWAVS) dataset. We demonstrate the advantages of our method on this real-world dataset and the simulation-based SoundSpaces dataset. Notably, we refer readers to view our demo videos for convincing comparisons.", "keywords": "Scene synthesis;audio-visual;NeRF", "primary_area": "", "supplementary_material": "/attachment/2c9fb3be53f1e64da7dce7400eb68c6e9b490960.zip", "author": "Susan Liang;Chao Huang;Yapeng Tian;Anurag Kumar;Chenliang Xu", "authorids": "~Susan_Liang1;~Chao_Huang3;~Yapeng_Tian1;~Anurag_Kumar1;~Chenliang_Xu1", "gender": "M;M;M;M;M", "homepage": "https://liangsusan-git.github.io/;https://wikichao.github.io/;http://www.yapengtian.com/;https://anuragkr90.github.io/;https://www.cs.rochester.edu/~cxu22/", "dblp": "299/1813;18/4087;176/4020;33/2741-3;117/4770", "google_scholar": "x3HBE2gAAAAJ;5yYP5RIAAAAJ;lxCqdpoAAAAJ;HH5cCX0AAAAJ;https://scholar.google.com.tw/citations?user=54HfyDIAAAAJ", "orcid": "0009-0009-3523-1339;;;;", "linkedin": ";;;anurag-kumar-90;", "or_profile": "~Susan_Liang1;~Chao_Huang3;~Yapeng_Tian1;~Anurag_Kumar1;~Chenliang_Xu1", "aff": "University of Rochester;Department of Computer Science, University of Rochester;University of Texas at Dallas;Meta;University of Rochester", "aff_domain": "rochester.edu;cs.rochester.edu;utdallas.edu;fb.com;rochester.edu", "position": "PhD student;PhD student;Assistant Professor;Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nliang2023avnerf,\ntitle={{AV}-Ne{RF}: Learning Neural Fields for Real-World Audio-Visual Scene Synthesis},\nauthor={Susan Liang and Chao Huang and Yapeng Tian and Anurag Kumar and Chenliang Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=snY3FOnlQi}\n}", "github": "", "project": "", "reviewers": "GFnA;b4XE;4psZ;Kx4V;iWmP", "pdf_size": 14913352, "rating": "5;5;6;6;8", "confidence": "4;4;4;5;4", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;4", "presentation": "2;3;3;3;4", "wc_summary": "138;183;92;135;82", "wc_strengths": "77;92;71;84;107", "wc_weaknesses": "86;98;89;220;100", "wc_questions": "2;15;80;51;163", "wc_limitations": "2;9;36;64;1", "wc_review": "305;397;368;554;453", "wc_reply_reviewers": "21;15;46;85;22", "wc_reply_authors": "19;26;21;198;21", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 126.0, 36.2381014955254 ], "wc_strengths_avg": [ 86.2, 12.544321424453376 ], "wc_weaknesses_avg": [ 118.6, 50.97293399442492 ], "wc_questions_avg": [ 62.2, 57.366889404952055 ], "wc_limitations_avg": [ 22.4, 24.368832553078942 ], "wc_review_avg": [ 415.4, 84.13940812722657 ], "wc_reply_reviewers_avg": [ 37.8, 25.871992578848655 ], "wc_reply_authors_avg": [ 57.0, 70.5379330573274 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8418216489350270334&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "rochester.edu;cs.rochester.edu;utdallas.edu;fb.com;rochester.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of Rochester;University of Texas at Dallas;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "https://www.rochester.edu;https://www.utdallas.edu;https://meta.com", "aff_unique_abbr": "U of R;UT Dallas;Meta", "aff_campus_unique_index": "1", "aff_campus_unique": ";Dallas", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Dynamically Masked Discriminator for GANs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70251", "id": "sodl2c3aTM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/47fc64d05a394955b1ae2487bfef1ab0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sodl2c3aTM", "openreview": "https://openreview.net/forum?id=sodl2c3aTM", "poster": "/media/PosterPDFs/NeurIPS%202023/70251.png?t=1699665838.5769322", "slides": "https://nips.cc/virtual/2023/poster/70251", "video": "https://nips.cc/virtual/2023/poster/70251", "author_site": "Wentian Zhang, Haozhe Liu, Bing Li, Jinheng Xie, Yawen Huang, Yuexiang Li, Yefeng Zheng, Bernard Ghanem", "tldr": "", "abstract": "Training Generative Adversarial Networks (GANs) remains a challenging problem. The discriminator trains the generator by learning the distribution of real/generated data. However, the distribution of generated data changes throughout the training process, which is difficult for the discriminator to learn. In this paper, we propose a novel method for GANs from the viewpoint of online continual learning. We observe that the discriminator model, trained on historically generated data, often slows down its adaptation to the changes in the new arrival generated data, which accordingly decreases the quality of generated results. By treating the generated data in training as a stream, we propose to detect whether the discriminator slows down the learning of new knowledge in generated data. Therefore, we can explicitly enforce the discriminator to learn new knowledge fast. Particularly, we propose a new discriminator, which automatically detects its retardation and then dynamically masks its features, such that the discriminator can adaptively learn the temporally-vary distribution of generated data. Experimental results show our method outperforms the state-of-the-art approaches.", "keywords": "Generative model;Generative Adversarial Network", "primary_area": "", "supplementary_material": "/attachment/34562009e7a6ecefb7950ba2419767a266b0c097.zip", "author": "Wentian Zhang;Haozhe Liu;Bing Li;Jinheng Xie;Yawen Huang;Yuexiang Li;Yefeng Zheng;Bernard Ghanem", "authorids": "~Wentian_Zhang1;~Haozhe_Liu1;~Bing_Li7;~Jinheng_Xie1;~Yawen_Huang4;~Yuexiang_Li1;~Yefeng_Zheng2;~Bernard_Ghanem1", "gender": "M;M;F;M;M;M;M;F", "homepage": ";https://haozheliu-st.github.io/;https://cemse.kaust.edu.sa/vcc/people/person/bing-li;https://sierkinhane.github.io/;https://yuexiangli.github.io;https://en.westlake.edu.cn/faculty/yefeng-zheng.html;https://ivul.kaust.edu.sa;https://ieeexplore.ieee.org/author/37085817742", "dblp": "229/0641;201/5238;13/2692-24;273/4278;165/6204;44/6510;37/2516;122/0805.html", "google_scholar": "OnflwHYAAAAJ;QX51P54AAAAJ;;smbRMokAAAAJ;WsKu4EMAAAAJ;vAIECxgAAAAJ;rVsGTeEAAAAJ;", "orcid": ";;;;;0000-0003-2195-2847;0000-0002-5534-587X;", "linkedin": ";;;;;yefeng-zheng-bb45641/?originalSubdomain=cn;bernardghanem/;", "or_profile": "~Wentian_Zhang1;~Haozhe_Liu1;~Bing_Li7;~Jinheng_Xie1;~Yuexiang_Li1;~Yefeng_Zheng2;~Bernard_Ghanem1;~YW_Huang1", "aff": "Shenzhen University;King Abdullah University of Science and Technology;KAUST;National University of Singapore;Tencent Jarvis Lab;Tencent Jarvis Lab;King Abdullah University of Science and Technology;Tencent", "aff_domain": "szu.edu.cn;kaust.edu.sa;kaust.edu.sa;nus.edu;tencent.com;tencent.com;kaust.edu.sa;tencent.com", "position": "MS student;PhD student;Postdoc;PhD student;Researcher;Director;Full Professor;Researcher", "bibtex": "@inproceedings{\nzhang2023dynamically,\ntitle={Dynamically Masked Discriminator for {GAN}s},\nauthor={Wentian Zhang and Haozhe Liu and Bing Li and Jinheng Xie and Yawen Huang and Yuexiang Li and Yefeng Zheng and Bernard Ghanem},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sodl2c3aTM}\n}", "github": "", "project": "", "reviewers": "J72A;ZBcx;zVaP;dcqD;zHGZ", "pdf_size": 3050282, "rating": "4;4;5;7;7", "confidence": "4;4;4;5;4", "soundness": "2;4;3;4;3", "novelty": "2;3;2;4;3", "presentation": "3;4;3;4;3", "wc_summary": "90;57;227;87;104", "wc_strengths": "74;60;34;26;111", "wc_weaknesses": "112;75;376;41;64", "wc_questions": "23;7;5;39;38", "wc_limitations": "20;26;1;4;10", "wc_review": "319;225;643;197;327", "wc_reply_reviewers": "0;115;0;18;31", "wc_reply_authors": "0;400;190;34;39", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;2;2;2;2", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 113.0, 59.022029785496194 ], "wc_strengths_avg": [ 61.0, 30.41052449399714 ], "wc_weaknesses_avg": [ 133.6, 123.35088163446582 ], "wc_questions_avg": [ 22.4, 14.554724318928201 ], "wc_limitations_avg": [ 12.2, 9.47417542586161 ], "wc_review_avg": [ 342.2, 158.7884126754846 ], "wc_reply_reviewers_avg": [ 32.8, 42.73359334294274 ], "wc_reply_authors_avg": [ 132.6, 148.90480180303118 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5897678246195885, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11010423673914144089&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 10, "email": "szu.edu.cn;kaust.edu.sa;kaust.edu.sa;nus.edu;tencent.com;tencent.com;kaust.edu.sa;tencent.com", "author_num": 8, "aff_unique_index": "0;1;1;2;3;3;1;3", "aff_unique_norm": "Shenzhen University;King Abdullah University of Science and Technology;National University of Singapore;Tencent", "aff_unique_dep": ";;;Jarvis Lab", "aff_unique_url": "https://www.szu.edu.cn;https://www.kast.kau.edu.sa;https://www.nus.edu.sg;https://www.tencent.com", "aff_unique_abbr": "SZU;KAUST;NUS;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2;0;0;1;0", "aff_country_unique": "China;Saudi Arabia;Singapore" }, { "title": "Unsupervised Semantic Correspondence Using Stable Diffusion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70250", "id": "sovxUzPzLN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1a074a28c3a6f2056562d00649ae6416-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sovxUzPzLN", "openreview": "https://openreview.net/forum?id=sovxUzPzLN", "poster": "/media/PosterPDFs/NeurIPS%202023/70250.png?t=1702086731.089621", "slides": "https://nips.cc/virtual/2023/poster/70250", "video": "https://nips.cc/virtual/2023/poster/70250", "author_site": "Eric Hedlin, Gopal Sharma, Shweta Mahajan, Hossam Isack, Abhishek Kar, Andrea Tagliasacchi, Kwang Moo Yi", "tldr": "", "abstract": "Text-to-image diffusion models are now capable of generating images that are often indistinguishable from real images. To generate such images, these models must understand the semantics of the objects they are asked to generate. In this work we show that, without any training, one can leverage this semantic knowledge within diffusion models to find semantic correspondences \u2013 locations in multiple images that have the same semantic meaning. Specifically, given an image, we optimize the prompt embeddings of these models for maximum attention on the regions of interest. These optimized embeddings capture semantic information about the location, which can then be transferred to another image. By doing so we obtain results on par with the strongly supervised state of the art on the PF-Willow dataset and significantly outperform (20.9% relative for the SPair-71k dataset) any existing weakly- or unsupervised method on PF-Willow, CUB-200 and SPair-71k datasets.", "keywords": "Semantic Correspondence;Stable Diffusion;Optimization-based Inference", "primary_area": "", "supplementary_material": "/attachment/2ff20762e773d5ed6fbc6b6bd725695e20186455.pdf", "author": "Eric Hedlin;Gopal Sharma;Shweta Mahajan;Hossam Isack;Abhishek Kar;Andrea Tagliasacchi;Kwang Moo Yi", "authorids": "~Eric_Hedlin1;~Gopal_Sharma3;~Shweta_Mahajan1;~Hossam_Isack1;~Abhishek_Kar1;~Andrea_Tagliasacchi2;~Kwang_Moo_Yi1", "gender": "M;;M;M;M;M;M", "homepage": "https://ehedlin.github.io/;https://s-mahajan.github.io/;https://abhishekkar.info;http://taiya.github.io;https://hippogriff.github.io/;http://www.hossamisack.com/;https://www.cs.ubc.ca/~kmyi/", "dblp": "319/5678;249/2353;46/11300;46/5514;190/8365;93/8652;30/5082", "google_scholar": "x6t__GoAAAAJ;https://scholar.google.de/citations?user=DUKzkPMAAAAJ;TIpmrtoAAAAJ;1RmD-YsAAAAJ;Oe3bmrQAAAAJ;https://scholar.google.ca/citations?user=GNd6TgYAAAAJ;https://scholar.google.com.tw/citations?user=pr6rIJEAAAAJ", "orcid": ";0000-0002-5413-9142;;;;;0000-0001-9036-3822", "linkedin": ";shweta-mahajan-4808a017/;abhishekkar/;;;;kwang-moo-yi-194932b3", "or_profile": "~Eric_Hedlin1;~Shweta_Mahajan1;~Abhishek_Kar1;~Andrea_Tagliasacchi2;~Gopal_Sharma1;~Hossam_N._Isack1;~Kwang_Yi1", "aff": "University of British Columbia;University of British Columbia;Google;Google DeepMind;University of British Columbia;Google;University of British Columbia", "aff_domain": "ubc.ca;ubc.ca;google.com;google.com;ubc.ca;google.com;ubc.ca", "position": "PhD student;Postdoc;Researcher;Researcher;Postdoc;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nhedlin2023unsupervised,\ntitle={Unsupervised Semantic Correspondence Using Stable Diffusion},\nauthor={Eric Hedlin and Gopal Sharma and Shweta Mahajan and Hossam Isack and Abhishek Kar and Andrea Tagliasacchi and Kwang Moo Yi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sovxUzPzLN}\n}", "github": "", "project": "", "reviewers": "Vgvc;pziK;5igN;DBqD;6281", "pdf_size": 10864548, "rating": "5;5;5;6;7", "confidence": "4;4;4;4;3", "soundness": "2;3;3;2;3", "novelty": "3;3;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "71;60;218;154;49", "wc_strengths": "57;38;96;56;46", "wc_weaknesses": "115;102;68;192;41", "wc_questions": "4;7;2;339;68", "wc_limitations": "29;1;14;12;9", "wc_review": "276;208;398;753;213", "wc_reply_reviewers": "25;0;16;33;21", "wc_reply_authors": "44;0;65;12;12", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 110.4, 65.33176868874743 ], "wc_strengths_avg": [ 58.6, 19.955951493226276 ], "wc_weaknesses_avg": [ 103.6, 51.25075609198366 ], "wc_questions_avg": [ 84.0, 129.87224491784224 ], "wc_limitations_avg": [ 13.0, 9.143303560529969 ], "wc_review_avg": [ 369.6, 203.56875988225698 ], "wc_reply_reviewers_avg": [ 19.0, 11.009087155618309 ], "wc_reply_authors_avg": [ 26.6, 24.129649810969077 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8750000000000001, "gs_citation": 89, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15427087366768604108&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ubc.ca;ubc.ca;google.com;google.com;ubc.ca;google.com;ubc.ca", "author_num": 7, "aff_unique_index": "0;0;1;1;0;1;0", "aff_unique_norm": "University of British Columbia;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ubc.ca;https://www.google.com", "aff_unique_abbr": "UBC;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;2;0;1;0", "aff_country_unique": "Canada;United States;United Kingdom" }, { "title": "Belief Projection-Based Reinforcement Learning for Environments with Delayed Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70249", "id": "sq0m11cUMV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0252a434b18962c94910c07cd9a7fecc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sq0m11cUMV", "openreview": "https://openreview.net/forum?id=sq0m11cUMV", "poster": "/media/PosterPDFs/NeurIPS%202023/70249.png?t=1699401392.1632879", "slides": "https://nips.cc/virtual/2023/poster/70249", "video": "https://nips.cc/virtual/2023/poster/70249", "author_site": "Jangwon Kim, Hangyeol Kim, Jiwook Kang, Jongchan Baek, Soohee Han", "tldr": "", "abstract": "We present a novel actor-critic algorithm for an environment with delayed feedback, which addresses the state-space explosion problem of conventional approaches. Conventional approaches use an augmented state constructed from the last observed state and actions executed since visiting the last observed state. Using the augmented state space, the correct Markov decision process for delayed environments can be constructed; however, this causes the state space to explode as the number of delayed timesteps increases, leading to slow convergence. Our proposed algorithm, called Belief-Projection-Based Q-learning (BPQL), addresses the state-space explosion problem by evaluating the values of the critic for which the input state size is equal to the original state-space size rather than that of the augmented one. We compare BPQL to traditional approaches in continuous control tasks and demonstrate that it significantly outperforms other algorithms in terms of asymptotic performance and sample efficiency. We also show that BPQL solves long-delayed environments, which conventional approaches are unable to do.", "keywords": "time-delay system;reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/91b8f4eb9eafd9e27915f7fc3687c34080ce3033.zip", "author": "Jangwon Kim;Hangyeol Kim;Jiwook Kang;Jongchan Baek;Soohee Han", "authorids": "~Jangwon_Kim2;~Hangyeol_Kim1;~Jiwook_Kang1;~Jongchan_Baek1;~Soohee_Han1", "gender": "M;M;M;M;M", "homepage": ";;;;https://www.cocel.postech.ac.kr/", "dblp": ";;;202/0675;38/4328", "google_scholar": "QsbjNcUAAAAJ;https://scholar.google.co.kr/citations?user=gGPghCsAAAAJ;jDy3WAwAAAAJ;WwfXmHYAAAAJ;bZNLufsAAAAJ", "orcid": "0000-0003-0228-3502;;0000-0002-9262-5184;0000-0002-3098-9775;", "linkedin": ";;jiwook-kang-6617831ab/;jongchan-baek-79512584;", "or_profile": "~Jangwon_Kim2;~Hangyeol_Kim1;~Jiwook_Kang1;~Jongchan_Baek1;~Soohee_Han1", "aff": "Pohang University of Science and Technology;Pohang University of Science and Technology;POSTECH;Pohang University of Science and Technology;Pohang University of Science and Technology", "aff_domain": "postech.ac.kr;postech.ac.kr;postech.ac.kr;postech.ac.kr;postech.ac.kr", "position": "MS student;MS student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkim2023belief,\ntitle={Belief Projection-Based Reinforcement Learning for Environments with Delayed Feedback},\nauthor={Jangwon Kim and Hangyeol Kim and Jiwook Kang and Jongchan Baek and Soohee Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sq0m11cUMV}\n}", "github": "", "project": "", "reviewers": "3qfv;ozYa;iPGm;HRKg;MGmU", "pdf_size": 3340262, "rating": "4;6;7;7;7", "confidence": "1;2;3;3;3", "soundness": "2;3;3;3;4", "novelty": "2;2;3;3;3", "presentation": "2;2;3;4;3", "wc_summary": "187;165;43;111;47", "wc_strengths": "182;132;60;42;95", "wc_weaknesses": "419;681;211;63;175", "wc_questions": "1;138;114;3;161", "wc_limitations": "1;16;8;5;19", "wc_review": "790;1132;436;224;497", "wc_reply_reviewers": "19;92;380;0;71", "wc_reply_authors": "783;82;683;0;1497", "reply_reviewers": "1;1;2;0;2", "reply_authors": "2;2;3;1;3", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 2.4, 0.8 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 110.6, 59.01050753891209 ], "wc_strengths_avg": [ 102.2, 50.423803902522074 ], "wc_weaknesses_avg": [ 309.8, 218.42930206361967 ], "wc_questions_avg": [ 83.4, 68.10756198837248 ], "wc_limitations_avg": [ 9.8, 6.734983296193095 ], "wc_review_avg": [ 615.8, 315.20050761380446 ], "wc_reply_reviewers_avg": [ 112.4, 137.91098578430945 ], "wc_reply_authors_avg": [ 609.0, 542.7533509799824 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9861168645694258, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15743283534492369123&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "postech.ac.kr;postech.ac.kr;postech.ac.kr;postech.ac.kr;postech.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Pohang University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.postech.ac.kr", "aff_unique_abbr": "POSTECH", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Pohang", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "What\u2019s Left? Concept Grounding with Logic-Enhanced Foundation Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70248", "id": "sq4o3tjWaj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/79fea214543ba263952ac3f4e5452b14-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sq4o3tjWaj", "openreview": "https://openreview.net/forum?id=sq4o3tjWaj", "poster": "/media/PosterPDFs/NeurIPS%202023/70248.png?t=1701415590.6883197", "slides": "https://nips.cc/virtual/2023/poster/70248", "video": "https://nips.cc/virtual/2023/poster/70248", "author_site": "Joy Hsu, Jiayuan Mao, Josh Tenenbaum, Jiajun Wu", "tldr": "", "abstract": "Recent works such as VisProg and ViperGPT have smartly composed foundation models for visual reasoning\u2014using large language models (LLMs) to produce programs that can be executed by pre-trained vision-language models. However, they operate in limited domains, such as 2D images, not fully exploiting the generalization of language: abstract concepts like \u201c*left*\u201d can also be grounded in 3D, temporal, and action data, as in moving to your *left*. This limited generalization stems from these inference-only methods\u2019 inability to learn or adapt pre-trained models to a new domain. We propose the **L**ogic-**E**nhanced **F**ounda**T**ion Model (**LEFT**), a unified framework that *learns* to ground and reason with concepts across domains with a differentiable, domain-independent, first-order logic-based program executor. LEFT has an LLM interpreter that outputs a program represented in a general, logic-based reasoning language, which is shared across all domains and tasks. LEFT\u2019s executor then executes the program with trainable domain-specific grounding modules. We show that LEFT flexibly learns concepts in four domains: 2D images, 3D scenes, human motions, and robotic manipulation. It exhibits strong reasoning ability in a wide variety of tasks, including those that are complex and not seen during training, and can be easily applied to new domains.", "keywords": "concept learning;visual reasoning;large language models;neuro-symbolic learning", "primary_area": "", "supplementary_material": "", "author": "Joy Hsu;Jiayuan Mao;Joshua B. Tenenbaum;Jiajun Wu", "authorids": "~Joy_Hsu2;~Jiayuan_Mao1;~Joshua_B._Tenenbaum1;~Jiajun_Wu1", "gender": "F;F;;M", "homepage": "https://web.stanford.edu/~joycj/;http://jiayuanm.com;;https://jiajunwu.com", "dblp": "258/5012;200/8283;t/JoshuaBTenenbaum;117/4768", "google_scholar": "Zr7RJT4AAAAJ;-xaOIZIAAAAJ;;2efgcS0AAAAJ", "orcid": ";0000-0003-4798-3748;;0000-0002-4176-343X", "linkedin": ";;;jiajunwu/", "or_profile": "~Joy_Hsu2;~Jiayuan_Mao1;~Joshua_B._Tenenbaum1;~Jiajun_Wu1", "aff": "Stanford University;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Stanford University", "aff_domain": "stanford.edu;mit.edu;mit.edu;stanford.edu", "position": "PhD student;PhD student;Professor;Assistant Professor", "bibtex": "@inproceedings{\nhsu2023whats,\ntitle={What{\\textquoteright}s Left? Concept Grounding with Logic-Enhanced Foundation Models},\nauthor={Joy Hsu and Jiayuan Mao and Joshua B. Tenenbaum and Jiajun Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sq4o3tjWaj}\n}", "github": "", "project": "", "reviewers": "B8BR;DugZ;2ce2;xPiC", "pdf_size": 2486935, "rating": "5;6;6;7", "confidence": "4;3;5;5", "soundness": "2;2;3;3", "novelty": "3;4;3;3", "presentation": "3;3;3;3", "wc_summary": "109;146;29;64", "wc_strengths": "136;113;25;75", "wc_weaknesses": "348;504;160;423", "wc_questions": "49;175;4;126", "wc_limitations": "27;29;12;39", "wc_review": "669;967;230;727", "wc_reply_reviewers": "17;33;32;26", "wc_reply_authors": "65;65;18;65", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.0, 44.32268042436062 ], "wc_strengths_avg": [ 87.25, 42.02603359823527 ], "wc_weaknesses_avg": [ 358.75, 127.32119815647354 ], "wc_questions_avg": [ 88.5, 66.31176366226433 ], "wc_limitations_avg": [ 26.75, 9.65336728815391 ], "wc_review_avg": [ 648.25, 266.06707331047187 ], "wc_reply_reviewers_avg": [ 27.0, 6.363961030678928 ], "wc_reply_authors_avg": [ 53.25, 20.351596988934308 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14715209544059585340&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "stanford.edu;mit.edu;mit.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Stanford University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://web.mit.edu", "aff_unique_abbr": "Stanford;MIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Sparsity-Preserving Differentially Private Training of Large Embedding Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70247", "id": "sqTcCXkG4P", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/23ff02034404b65776080cbf7148addd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sqTcCXkG4P", "openreview": "https://openreview.net/forum?id=sqTcCXkG4P", "poster": "/media/PosterPDFs/NeurIPS%202023/70247.png?t=1702065769.3926694", "slides": "https://nips.cc/virtual/2023/poster/70247", "video": "https://nips.cc/virtual/2023/poster/70247", "author_site": "Badih Ghazi, Yangsibo Huang, Pritish Kamath, Ravi Kumar, Pasin Manurangsi, Amer Sinha, Chiyuan Zhang", "tldr": "", "abstract": "As the use of large embedding models in recommendation systems and language applications increases, concerns over user data privacy have also risen. DP-SGD, a training algorithm that combines differential privacy with stochastic gradient descent, has been the workhorse in protecting user privacy without compromising model accuracy by much. However, applying DP-SGD naively to embedding models can destroy gradient sparsity, leading to reduced training efficiency. To address this issue, we present two new algorithms, DP-FEST and DP-AdaFEST, that preserve gradient sparsity during the private training of large embedding models. Our algorithms achieve substantial reductions ($10^6 \\times$) in gradient size, while maintaining comparable levels of accuracy, on benchmark real-world datasets.", "keywords": "Differential Privacy;Recommendation Systems;Embedding Models;Efficient Machine Learning", "primary_area": "", "supplementary_material": "/attachment/a2aa596269e32f242662faec46aa6b93ebbe1fdd.zip", "author": "Badih Ghazi;Yangsibo Huang;Pritish Kamath;Ravi Kumar;Pasin Manurangsi;Amer Sinha;Chiyuan Zhang", "authorids": "~Badih_Ghazi1;~Yangsibo_Huang2;~Pritish_Kamath2;~Ravi_Kumar1;~Pasin_Manurangsi2;~Amer_Sinha1;~Chiyuan_Zhang1", "gender": ";F;M;M;M;M;M", "homepage": "https://sites.google.com/view/badihghazi/home;https://hazelsuko07.github.io/yangsibo/;https://pritishkamath.github.io/;https://sites.google.com/site/ravik53/;https://pasin30055.github.io/;;http://pluskid.org", "dblp": "125/2134;;https://dblp.org/pers/k/Kamath:Pritish.html;k/RaviKumar.html;133/2059;;21/8315", "google_scholar": "GBJLTN8AAAAJ;NMPUDa0AAAAJ;1JFARhUAAAAJ;J_XhIsgAAAAJ;35hM-PkAAAAJ;;l_G2vr0AAAAJ", "orcid": ";;;0000-0002-2203-2586;;;", "linkedin": "badih-ghazi-608379132/;;;ravi-kumar-a3a9631;;amersinha/;", "or_profile": "~Badih_Ghazi1;~Yangsibo_Huang2;~Pritish_Kamath2;~Ravi_Kumar1;~Pasin_Manurangsi2;~Amer_Sinha1;~Chiyuan_Zhang1", "aff": "Google;Princeton University;Google Research;Google;Google;Research, Google;Google", "aff_domain": "google.com;princeton.edu;google.com;google.com;google.com;research.google.com;google.com", "position": "Researcher;PhD student;Research Scientist;Research Scientist;Research Scientist;Researcher;Research Scientist", "bibtex": "@inproceedings{\nghazi2023sparsitypreserving,\ntitle={Sparsity-Preserving Differentially Private Training of Large Embedding Models},\nauthor={Badih Ghazi and Yangsibo Huang and Pritish Kamath and Ravi Kumar and Pasin Manurangsi and Amer Sinha and Chiyuan Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sqTcCXkG4P}\n}", "github": "", "project": "", "reviewers": "KMWx;ah8R;AuCL;upr1", "pdf_size": 1284558, "rating": "3;5;6;6", "confidence": "4;3;4;4", "soundness": "2;3;3;3", "novelty": "1;2;2;2", "presentation": "1;3;2;3", "wc_summary": "52;48;91;87", "wc_strengths": "43;38;18;61", "wc_weaknesses": "78;89;91;73", "wc_questions": "71;22;174;258", "wc_limitations": "1;1;6;16", "wc_review": "245;198;380;495", "wc_reply_reviewers": "869;14;38;166", "wc_reply_authors": "2131;163;47;279", "reply_reviewers": "2;1;1;1", "reply_authors": "5;2;2;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 69.5, 19.60229578391266 ], "wc_strengths_avg": [ 40.0, 15.313392831113555 ], "wc_weaknesses_avg": [ 82.75, 7.495832175282475 ], "wc_questions_avg": [ 131.25, 91.45866552711121 ], "wc_limitations_avg": [ 6.0, 6.123724356957945 ], "wc_review_avg": [ 329.5, 116.59009391882314 ], "wc_reply_reviewers_avg": [ 271.75, 349.630072362204 ], "wc_reply_authors_avg": [ 655.0, 856.1074698891489 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17881120756988495404&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "google.com;princeton.edu;google.com;google.com;google.com;research.google.com;google.com", "author_num": 7, "aff_unique_index": "0;1;0;0;0;0;0", "aff_unique_norm": "Google;Princeton University", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.princeton.edu", "aff_unique_abbr": "Google;Princeton", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "HASSOD: Hierarchical Adaptive Self-Supervised Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70246", "id": "sqkGJjIRfG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b9ecf4d84999a61783c360c3782e801e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sqkGJjIRfG", "openreview": "https://openreview.net/forum?id=sqkGJjIRfG", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70246", "video": "https://nips.cc/virtual/2023/poster/70246", "author_site": "Shengcao Cao, Dhiraj Joshi, Liangyan Gui, Yu-Xiong Wang", "tldr": "", "abstract": "The human visual perception system demonstrates exceptional capabilities in learning without explicit supervision and understanding the part-to-whole composition of objects. Drawing inspiration from these two abilities, we propose Hierarchical Adaptive Self-Supervised Object Detection (HASSOD), a novel approach that learns to detect objects and understand their compositions without human supervision. HASSOD employs a hierarchical adaptive clustering strategy to group regions into object masks based on self-supervised visual representations, adaptively determining the number of objects per image. Furthermore, HASSOD identifies the hierarchical levels of objects in terms of composition, by analyzing coverage relations between masks and constructing tree structures. This additional self-supervised learning task leads to improved detection performance and enhanced interpretability. Lastly, we abandon the inefficient multi-round self-training process utilized in prior methods and instead adapt the Mean Teacher framework from semi-supervised learning, which leads to a smoother and more efficient training process. Through extensive experiments on prevalent image datasets, we demonstrate the superiority of HASSOD over existing methods, thereby advancing the state of the art in self-supervised object detection. Notably, we improve Mask AR from 20.2 to 22.5 on LVIS, and from 17.0 to 26.0 on SA-1B. Project page: https://HASSOD-NeurIPS23.github.io.", "keywords": "self-supervised learning;object detection", "primary_area": "", "supplementary_material": "", "author": "Shengcao Cao;Dhiraj Joshi;Liangyan Gui;Yu-Xiong Wang", "authorids": "~Shengcao_Cao1;~Dhiraj_Joshi1;~Liangyan_Gui1;~Yu-Xiong_Wang1", "gender": "M;M;F;", "homepage": "https://shengcao-cao.github.io/;https://dhirajjoshi.weebly.com/;;https://yxw.cs.illinois.edu/", "dblp": "236/4681;https://dblp.uni-trier.de/pers/hd/j/Joshi:Dhiraj;155/5055;35/10700", "google_scholar": "yMYTz3AAAAAJ;TYmV4V8AAAAJ;3aE0r9QAAAAJ;T_Q-xDkAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Shengcao_Cao1;~Dhiraj_Joshi1;~Liangyan_Gui1;~Yu-Xiong_Wang1", "aff": "Adobe Systems;IBM Research;UIUC;Department of Computer Science, University of Illinois Urbana-Champaign", "aff_domain": "adobe.com;ibm.com;cs.illinois.edu;cs.illinois.edu", "position": "Intern;Research Scientist;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ncao2023hassod,\ntitle={{HASSOD}: Hierarchical Adaptive Self-Supervised Object Detection},\nauthor={Shengcao Cao and Dhiraj Joshi and Liangyan Gui and Yu-Xiong Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sqkGJjIRfG}\n}", "github": "", "project": "", "reviewers": "p4z2;wiMX;cMyQ;s64Y", "pdf_size": 17268509, "rating": "4;5;5;5", "confidence": "5;5;4;4", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "97;73;88;110", "wc_strengths": "145;30;20;49", "wc_weaknesses": "409;28;217;149", "wc_questions": "61;148;22;66", "wc_limitations": "32;10;19;49", "wc_review": "744;289;366;423", "wc_reply_reviewers": "341;22;71;0", "wc_reply_authors": "989;281;452;180", "reply_reviewers": "3;1;1;0", "reply_authors": "4;4;4;3", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 92.0, 13.47219358530748 ], "wc_strengths_avg": [ 61.0, 49.60342730094363 ], "wc_weaknesses_avg": [ 200.75, 137.97893861020964 ], "wc_questions_avg": [ 74.25, 45.86052223863134 ], "wc_limitations_avg": [ 27.5, 14.67140075112121 ], "wc_review_avg": [ 455.5, 173.22023553846128 ], "wc_reply_reviewers_avg": [ 108.5, 136.67205273939513 ], "wc_reply_authors_avg": [ 475.5, 312.00360574839516 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 3.75, 0.4330127018922193 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4419036223611239997&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 9, "email": "adobe.com;ibm.com;cs.illinois.edu;cs.illinois.edu", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Adobe;IBM;University of Illinois Urbana-Champaign", "aff_unique_dep": "Adobe Systems Incorporated;IBM Research;", "aff_unique_url": "https://www.adobe.com;https://www.ibm.com/research;https://www illinois.edu", "aff_unique_abbr": "Adobe;IBM;UIUC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Stable and low-precision training for large-scale vision-language models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70245", "id": "sqqASmpA2R", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/20bd42d82998bc61732c00452228e814-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sqqASmpA2R", "openreview": "https://openreview.net/forum?id=sqqASmpA2R", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70245", "video": "https://nips.cc/virtual/2023/poster/70245", "author_site": "Mitchell Wortsman, Tim Dettmers, Luke Zettlemoyer, Ari Morcos, Ali Farhadi, Ludwig Schmidt", "tldr": "", "abstract": "We introduce new methods for 1) accelerating and 2) stabilizing training for large language-vision models. 1) For acceleration, we introduce SwitchBack, a linear layer for int8 quantized training which provides a speed-up of 13-25% while matching the performance of bfloat16 training within 0.1 percentage points for the 1B parameter CLIP ViT-Huge---the largest int8 training to date. Our main focus is int8 as GPU support for float8 is rare, though we also analyze float8 training through simulation. While SwitchBack proves effective for float8, we show that standard techniques are also successful if the network is trained and initialized so that large feature magnitudes are discouraged, which we accomplish via layer-scale initialized with zeros. 2) For stability, we analyze loss spikes and find they consistently occur 1-8 iterations after the squared gradients become under-estimated by their AdamW second moment estimator. As a result, we recommend an AdamW-Adafactor hybrid which avoids loss spikes when training a CLIP ViT-Huge model and outperforms gradient clipping at the scales we test.", "keywords": "CLIP;int8;stability", "primary_area": "", "supplementary_material": "/attachment/11d53a3caa62bc2537d77b578db7f0c4578c99fd.pdf", "author": "Mitchell Wortsman;Tim Dettmers;Luke Zettlemoyer;Ari S. Morcos;Ali Farhadi;Ludwig Schmidt", "authorids": "~Mitchell_Wortsman1;~Tim_Dettmers2;~Luke_Zettlemoyer1;~Ari_S._Morcos1;~Ali_Farhadi3;~Ludwig_Schmidt1", "gender": "M;M;M;M;M;M", "homepage": "https://mitchellnw.github.io/;https://timdettmers.com/;https://www.cs.washington.edu/people/faculty/lsz/;https://homes.cs.washington.edu/~ali/;http://people.csail.mit.edu/ludwigs/;http://www.arimorcos.com", "dblp": "232/2273;172/1045;21/6793;37/5826;141/2720;217/3720", "google_scholar": "fzRnjFgAAAAJ;lHI3w5kAAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;jeOFRDsAAAAJ;SWMKy70AAAAJ;v-A_7UsAAAAJ", "orcid": ";;;;;", "linkedin": ";;luke-zettlemoyer-a0109b226/;;ludwig-schmidt-87ba3612/;", "or_profile": "~Mitchell_Wortsman1;~Tim_Dettmers2;~Luke_Zettlemoyer1;~Ali_Farhadi3;~Ludwig_Schmidt1;~Ari_Morcos1", "aff": "Google;University of Washington;Meta;University of Washington;Allen Institute for Artificial Intelligence;Meta AI (FAIR)", "aff_domain": "google.com;cs.washington.edu;meta.com;cs.uw.edu;allenai.org;meta.com", "position": "Intern;PhD student;Researcher;Full Professor;Researcher;Research Scientist", "bibtex": "@inproceedings{\nwortsman2023stable,\ntitle={Stable and low-precision training for large-scale vision-language models},\nauthor={Mitchell Wortsman and Tim Dettmers and Luke Zettlemoyer and Ari S. Morcos and Ali Farhadi and Ludwig Schmidt},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sqqASmpA2R}\n}", "github": "", "project": "", "reviewers": "AbFj;Nme8;rd6a;sdBF;35W5", "pdf_size": 6798275, "rating": "3;6;6;6;8", "confidence": "5;3;3;2;2", "soundness": "3;3;3;3;4", "novelty": "1;2;2;3;4", "presentation": "2;3;3;3;4", "wc_summary": "40;129;67;95;70", "wc_strengths": "35;46;110;177;151", "wc_weaknesses": "116;57;189;84;92", "wc_questions": "3;56;3;44;45", "wc_limitations": "1;7;3;3;4", "wc_review": "195;295;372;403;362", "wc_reply_reviewers": "154;5;0;12;25", "wc_reply_authors": "120;0;0;39;12", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;1;1;2;2", "rating_avg": [ 5.8, 1.6 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 1.019803902718557 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 80.2, 29.982661656363994 ], "wc_strengths_avg": [ 103.8, 56.03356137173506 ], "wc_weaknesses_avg": [ 107.6, 44.84908025812793 ], "wc_questions_avg": [ 30.2, 22.604424345689495 ], "wc_limitations_avg": [ 3.6, 1.9595917942265424 ], "wc_review_avg": [ 325.4, 74.1096484946461 ], "wc_reply_reviewers_avg": [ 39.2, 58.01172295321007 ], "wc_reply_authors_avg": [ 34.2, 45.20353968440967 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.912870929175277, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10585161959025072829&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "google.com;cs.washington.edu;meta.com;cs.uw.edu;allenai.org;meta.com", "author_num": 6, "aff_unique_index": "0;1;2;1;3;2", "aff_unique_norm": "Google;University of Washington;Meta;Allen Institute for Artificial Intelligence", "aff_unique_dep": "Google;;Meta Platforms, Inc.;", "aff_unique_url": "https://www.google.com;https://www.washington.edu;https://meta.com;https://allenai.org", "aff_unique_abbr": "Google;UW;Meta;AI2", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "srJvUWZu6L", "title": "ViDA: Homeostatic Visual Domain Adapter for Continual Test Time Adaptation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Since real-world machine systems are running in non-stationary and continually changing environments, Continual Test-Time Adaptation (CTTA) task is proposed to adapt the pre-trained model to continually changing target domains. Recently, existing methods mainly focus on model-based adaptation, which aims to leverage a self-training manner to extract the target domain knowledge. However, pseudo labels can be noisy and the updated model parameters are uncertain under dynamic data distributions, leading to error accumulation and catastrophic forgetting in the continual adaptation process. To tackle these challenges and maintain the model plasticity, we tactfully design a Visual Domain Adapter (ViDA) for CTTA, explicitly handling both domain-specific and domain-agnostic knowledge. Specifically, we first comprehensively explore the different domain representations of the adapters with trainable high and low-rank embedding space. Then we inject ViDAs into the pre-trained model, which leverages high-rank and low-rank prototypes to adapt the current domain distribution and maintain the continual domain-shared knowledge, respectively. To adapt to the various distribution shifts of each sample in target domains, we further propose a Homeostatic Knowledge Allotment (HKA) strategy, which adaptively merges knowledge from each ViDA with different rank prototypes. Extensive experiments conducted on four widely-used benchmarks demonstrate that our proposed method achieves state-of-the-art performance in both classification and segmentation CTTA tasks. In addition, our method can be regarded as a novel transfer paradigm and showcases promising results in zero-shot adaptation of foundation models to continual downstream tasks and distributions.", "keywords": "Domain Adapter;Continual Test Time Adaptation;Efficient Fine-tuning", "primary_area": "", "supplementary_material": "/attachment/4dead16c295676ded07d9afa2c3406f08b795ed7.zip", "author": "Jiaming Liu;Senqiao Yang;Peidong Jia;Ming Lu;Yandong Guo;Wei Xue;Shanghang Zhang", "authorids": "~Jiaming_Liu2;~Senqiao_Yang1;~Peidong_Jia1;~Ming_Lu2;~Yandong_Guo2;~Wei_Xue5;~Shanghang_Zhang4", "gender": "M;;M;;M;M;", "homepage": "https://github.com/liujiaming1996;;https://iamstupidd.github.io/;;;http://www.wei-xue.com;", "dblp": ";;;;28/4272;;", "google_scholar": "cPki5sUAAAAJ;;;;fWDoWsQAAAAJ;77lSoywAAAAJ;", "orcid": "0000-0002-6770-4390;;;;;;", "linkedin": ";;;;;;", "or_profile": "~Jiaming_Liu2;~Senqiao_Yang1;~Peidong_Jia1;~Ming_Lu2;~Yandong_Guo2;~Wei_Xue5;~Shanghang_Zhang4", "aff": "Peking University;;Peking University;;AI^2 Robotics;Hong Kong Baptist University;", "aff_domain": "pku.edu.cn;;pku.edu.cn;;ai2robotics.com;hkbu.edu.hk;", "position": "PhD student;;PhD student;;Chief Scientist;Assistant Professor;", "bibtex": "@misc{\nliu2023vida,\ntitle={Vi{DA}: Homeostatic Visual Domain Adapter for Continual Test Time Adaptation},\nauthor={Jiaming Liu and Senqiao Yang and Peidong Jia and Ming Lu and Yandong Guo and Wei Xue and Shanghang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=srJvUWZu6L}\n}", "github": "", "project": "", "reviewers": "dPMr;vo5W;56Wj;DEpm", "site": "https://openreview.net/forum?id=srJvUWZu6L", "pdf_size": 0, "rating": "5;5;6;6", "confidence": "4;5;4;4", "soundness": "3;2;3;2", "novelty": "3;2;3;2", "presentation": "3;3;3;2", "wc_summary": "67;65;104;64", "wc_strengths": "32;24;63;50", "wc_weaknesses": "115;167;148;251", "wc_questions": "5;31;176;41", "wc_limitations": "7;7;73;15", "wc_review": "226;294;564;421", "wc_reply_reviewers": "0;26;100;149", "wc_reply_authors": "102;98;113;134", "reply_reviewers": "0;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 75.0, 16.777961735562517 ], "wc_strengths_avg": [ 42.25, 15.237699957670777 ], "wc_weaknesses_avg": [ 170.25, 50.19648892103909 ], "wc_questions_avg": [ 63.25, 66.40924257962892 ], "wc_limitations_avg": [ 25.5, 27.617928959282953 ], "wc_review_avg": [ 376.25, 129.02785551965127 ], "wc_reply_reviewers_avg": [ 68.75, 59.09896361189424 ], "wc_reply_authors_avg": [ 111.75, 13.970952007647869 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9284065084499873574&as_sdt=20005&sciodt=0,9&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Peking University;AI^2 Robotics;Hong Kong Baptist University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;http://ai2robotics.org/;https://www.hkbu.edu.hk", "aff_unique_abbr": "Peking U;AI^2 Robotics;HKBU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "The Simplicity Bias in Multi-Task RNNs: Shared Attractors, Reuse of Dynamics, and Geometric Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70244", "id": "stDm3S0CV7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/50d6dbc809b0dc96f7f1090810537acc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=stDm3S0CV7", "openreview": "https://openreview.net/forum?id=stDm3S0CV7", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70244", "video": "https://nips.cc/virtual/2023/poster/70244", "author_site": "Elia Turner, Omri Barak", "tldr": "", "abstract": "How does a single interconnected neural population perform multiple tasks, each with its own dynamical requirements? The relation between task requirements and neural dynamics in Recurrent Neural Networks (RNNs) has been investigated for single tasks. The forces shaping joint dynamics of multiple tasks, however, are largely unexplored. In this work, we first construct a systematic framework to study multiple tasks in RNNs, minimizing interference from input and output correlations with the hidden representation. This allows us to reveal how RNNs tend to share attractors and reuse dynamics, a tendency we define as the \"simplicity bias\".\nWe find that RNNs develop attractors sequentially during training, preferentially reusing existing dynamics and opting for simple solutions when possible. This sequenced emergence and preferential reuse encapsulate the simplicity bias. Through concrete examples, we demonstrate that new attractors primarily emerge due to task demands or architectural constraints, illustrating a balance between simplicity bias and external factors.\nWe examine the geometry of joint representations within a single attractor, by constructing a family of tasks from a set of functions. We show that the steepness of the associated functions controls their alignment within the attractor. This arrangement again highlights the simplicity bias, as points with similar input spacings undergo comparable transformations to reach the shared attractor.\nOur findings propose compelling applications. The geometry of shared attractors might allow us to infer the nature of unknown tasks. Furthermore, the simplicity bias implies that without specific incentives, modularity in RNNs may not spontaneously emerge, providing insights into the conditions required for network specialization.", "keywords": "Computational Neural Models; Recurrent Neural Networks; Multiple Tasks; Geometry;Dynamical Systems;Attractors;Neuroscience", "primary_area": "", "supplementary_material": "/attachment/2232bcb5a79ac2eeefaff667566e261ce98f87ac.pdf", "author": "Elia Turner;Omri Barak", "authorids": "~Elia_Turner1;~Omri_Barak1", "gender": "F;M", "homepage": ";https://barak.net.technion.ac.il", "dblp": ";96/2991", "google_scholar": "https://scholar.google.com/citations?hl=en;6BrZ2isAAAAJ", "orcid": ";0000-0002-7894-6344", "linkedin": ";", "or_profile": "~Elia_Turner1;~Omri_Barak1", "aff": "Technion, Technion;Technion", "aff_domain": "technion.ac.il;technion.ac.il", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nturner2023the,\ntitle={The Simplicity Bias in Multi-Task {RNN}s: Shared Attractors, Reuse of Dynamics, and Geometric Representation},\nauthor={Elia Turner and Omri Barak},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=stDm3S0CV7}\n}", "github": "", "project": "", "reviewers": "ZQ4N;toNv;yVHj;6PJE", "pdf_size": 1517041, "rating": "4;6;6;7", "confidence": "4;3;4;3", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "1;3;3;3", "wc_summary": "22;59;186;98", "wc_strengths": "21;66;161;55", "wc_weaknesses": "170;113;215;124", "wc_questions": "2;32;215;88", "wc_limitations": "48;1;188;30", "wc_review": "263;271;965;395", "wc_reply_reviewers": "45;9;71;9", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 91.25, 60.94823623370901 ], "wc_strengths_avg": [ 75.75, 51.93926741878441 ], "wc_weaknesses_avg": [ 155.5, 40.46294601237038 ], "wc_questions_avg": [ 84.25, 81.55481285613989 ], "wc_limitations_avg": [ 66.75, 71.9839391809034 ], "wc_review_avg": [ 473.5, 288.5528547770755 ], "wc_reply_reviewers_avg": [ 33.5, 26.16772821625905 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1804720180879675982&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "technion.ac.il;technion.ac.il", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Riemannian SAM: Sharpness-Aware Minimization on Riemannian Manifolds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70243", "id": "strvrjSi3C", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cf701db0e3b4d0b8681ca6915ac3e87e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=strvrjSi3C", "openreview": "https://openreview.net/forum?id=strvrjSi3C", "poster": "/media/PosterPDFs/NeurIPS%202023/70243.png?t=1701926247.9319701", "slides": "https://nips.cc/virtual/2023/poster/70243", "video": "https://nips.cc/virtual/2023/poster/70243", "author_site": "Jihun Yun, Eunho Yang", "tldr": "", "abstract": "Contemporary advances in the field of deep learning have embarked upon an exploration of the underlying geometric properties of data, thus encouraging the investigation of techniques that consider general manifolds, for example, hyperbolic or orthogonal neural networks. However, the optimization algorithms for training such geometric deep learning models still remain highly under-explored. In this paper, we introduce Riemannian SAM by generalizing conventional Euclidean SAM to Riemannian manifolds. We successfully formulate the sharpness-aware minimization on Riemannian manifolds, leading to one of a novel instantiation, Lorentz SAM. In addition, SAM variants proposed in previous studies such as Fisher SAM can be derived as special examples under our Riemannian SAM framework. We provide the convergence analysis of Riemannian SAM under a less aggressively decaying ascent learning rate than Euclidean SAM. Our analysis serves as a theoretically sound contribution encompassing a diverse range of manifolds, also providing the guarantees for SAM variants such as Fisher SAM, whose convergence analyses are absent. Lastly, we illustrate the superiority of Riemannian SAM in terms of generalization over previous Riemannian optimization algorithms through experiments on knowledge graph completion and machine translation tasks.", "keywords": "optimization;riemannian;manifolds;sharpness-aware", "primary_area": "", "supplementary_material": "/attachment/b979792f3b7f1445bb7ac66c274738b0064f615a.zip", "author": "Jihun Yun;Eunho Yang", "authorids": "~Jihun_Yun2;~Eunho_Yang1", "gender": "M;M", "homepage": "https://github.com/abcdxyzpqrst;https://sites.google.com/site/hleehome2/", "dblp": "241/9676;96/2621", "google_scholar": "ELv5qfEAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Jihun_Yun2;~Eunho_Yang1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nyun2023riemannian,\ntitle={Riemannian {SAM}: Sharpness-Aware Minimization on Riemannian Manifolds},\nauthor={Jihun Yun and Eunho Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=strvrjSi3C}\n}", "github": "", "project": "", "reviewers": "zxyx;SuW3;nQre;wvpH", "pdf_size": 753285, "rating": "3;4;6;6", "confidence": "4;4;3;4", "soundness": "3;3;4;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "98;82;48;69", "wc_strengths": "20;21;76;19", "wc_weaknesses": "125;39;195;128", "wc_questions": "23;149;1;309", "wc_limitations": "1;1;1;49", "wc_review": "267;292;321;574", "wc_reply_reviewers": "0;56;139;67", "wc_reply_authors": "0;76;33;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.25, 18.30812661087966 ], "wc_strengths_avg": [ 34.0, 24.259018941416407 ], "wc_weaknesses_avg": [ 121.75, 55.36865087754983 ], "wc_questions_avg": [ 120.5, 122.60811555521111 ], "wc_limitations_avg": [ 13.0, 20.784609690826528 ], "wc_review_avg": [ 363.5, 123.02540388066198 ], "wc_reply_reviewers_avg": [ 65.5, 49.45957945636012 ], "wc_reply_authors_avg": [ 27.25, 31.20396609407208 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1013431695765347522&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "kaist.ac.kr;kaist.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "CEIL: Generalized Contextual Imitation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70242", "id": "suzMI2P1rT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ee90fb9511b263f2ff971be9b374f9ee-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=suzMI2P1rT", "openreview": "https://openreview.net/forum?id=suzMI2P1rT", "poster": "/media/PosterPDFs/NeurIPS%202023/70242.png?t=1701913891.8212311", "slides": "https://nips.cc/virtual/2023/poster/70242", "video": "https://nips.cc/virtual/2023/poster/70242", "author_site": "Jinxin Liu, Li He, Yachen Kang, Zifeng Zhuang, Donglin Wang, Huazhe Xu", "tldr": "", "abstract": "In this paper, we present ContExtual Imitation Learning (CEIL), a general and broadly applicable algorithm for imitation learning (IL). Inspired by the formulation of hindsight information matching, we derive CEIL by explicitly learning a hindsight embedding function together with a contextual policy using the hindsight embeddings. To achieve the expert matching objective for IL, we advocate for optimizing a contextual variable such that it biases the contextual policy towards mimicking expert behaviors. Beyond the typical learning from demonstrations (LfD) setting, CEIL is a generalist that can be effectively applied to multiple settings including: 1) learning from observations (LfO), 2) offline IL, 3) cross-domain IL (mismatched experts), and 4) one-shot IL settings. Empirically, we evaluate CEIL on the popular MuJoCo tasks (online) and the D4RL dataset (offline). Compared to prior state-of-the-art baselines, we show that CEIL is more sample-efficient in most online IL tasks and achieves better or competitive performances in offline tasks.", "keywords": "imitation learning;reinforcement learning;offline imitation learning", "primary_area": "", "supplementary_material": "/attachment/ca365fc12e25947f9e52cde9c2993bebfb7b9794.zip", "author": "Jinxin Liu;Li He;Yachen Kang;Zifeng Zhuang;Donglin Wang;Huazhe Xu", "authorids": "~Jinxin_Liu1;~Li_He3;~Yachen_Kang1;~Zifeng_Zhuang1;~Donglin_Wang1;~Huazhe_Xu1", "gender": ";;M;M;M;M", "homepage": ";;;;https://milab.westlake.edu.cn/;http://hxu.rocks", "dblp": ";;247/6551.html;276/5034;;164/9006", "google_scholar": ";MKMKMrIAAAAJ;LCTdGEcAAAAJ;;https://scholar.google.ca/citations?user=-fo6wdwAAAAJ;t9HPFawAAAAJ", "orcid": ";;;;0000-0002-8188-3735;", "linkedin": ";https://www.linkedin.cn/incareer/in/%E7%AB%8B-%E4%BD%95-94a95823a;;;;", "or_profile": "~Jinxin_Liu1;~Li_He3;~Yachen_Kang1;~Zifeng_Zhuang1;~Donglin_Wang1;~Huazhe_Xu1", "aff": ";Westlake University;Zhejiang University;Zhejiang University;Westlake University;Tsinghua University", "aff_domain": ";westlake.edu.cn;zju.edu.cn;zju.edu.cn;westlake.edu.cn;tsinghua.edu.cn", "position": ";Research Assistant;PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nliu2023ceil,\ntitle={{CEIL}: Generalized Contextual Imitation Learning},\nauthor={Jinxin Liu and Li He and Yachen Kang and Zifeng Zhuang and Donglin Wang and Huazhe Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=suzMI2P1rT}\n}", "github": "", "project": "", "reviewers": "STjS;2c9A;7vt9;PiwE;vVku", "pdf_size": 1691859, "rating": "6;6;6;6;7", "confidence": "4;4;3;4;3", "soundness": "3;3;3;3;3", "novelty": "3;3;3;4;3", "presentation": "4;4;3;3;3", "wc_summary": "54;108;73;115;281", "wc_strengths": "67;75;185;52;226", "wc_weaknesses": "221;162;54;200;141", "wc_questions": "120;114;67;132;29", "wc_limitations": "23;4;60;63;39", "wc_review": "485;463;439;562;716", "wc_reply_reviewers": "83;10;14;12;17", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 126.2, 80.57642335075441 ], "wc_strengths_avg": [ 121.0, 70.58895097676407 ], "wc_weaknesses_avg": [ 155.6, 58.008964824413134 ], "wc_questions_avg": [ 92.4, 38.62952238897085 ], "wc_limitations_avg": [ 37.8, 22.319498202244603 ], "wc_review_avg": [ 533.0, 100.36931802099683 ], "wc_reply_reviewers_avg": [ 27.2, 27.995713957675736 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6123724356957947, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10236999078663367915&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": ";westlake.edu.cn;zju.edu.cn;zju.edu.cn;westlake.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "Westlake University;Zhejiang University;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.westlake.edu.cn;https://www.zju.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "WU;ZJU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Unified, Scalable Framework for Neural Population Decoding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70241", "id": "sw2Y0sirtM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8ca113d122584f12a6727341aaf58887-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sw2Y0sirtM", "openreview": "https://openreview.net/forum?id=sw2Y0sirtM", "poster": "/media/PosterPDFs/NeurIPS%202023/70241.png?t=1702162469.6729155", "slides": "https://nips.cc/virtual/2023/poster/70241", "video": "https://nips.cc/virtual/2023/poster/70241", "author_site": "Mehdi Azabou, Vinam Arora, Venkataramana Ganesh, Ximeng Mao, Santosh Nachimuthu, Michael Mendelson, Blake Richards, Matthew Perich, Guillaume Lajoie, Eva Dyer", "tldr": "", "abstract": "Our ability to use deep learning approaches to decipher neural activity would likely benefit from greater scale, in terms of both the model size and the datasets. However, the integration of many neural recordings into one unified model is challenging, as each recording contains the activity of different neurons from different individual animals. In this paper, we introduce a training framework and architecture designed to model the population dynamics of neural activity across diverse, large-scale neural recordings. Our method first tokenizes individual spikes within the dataset to build an efficient representation of neural events that captures the fine temporal structure of neural activity. We then employ cross-attention and a PerceiverIO backbone to further construct a latent tokenization of neural population activities. Utilizing this architecture and training framework, we construct a large-scale multi-session model trained on large datasets from seven nonhuman primates, spanning over 158 different sessions of recording from over 27,373 neural units and over 100 hours of recordings. In a number of different tasks, we demonstrate that our pretrained model can be rapidly adapted to new, unseen sessions with unspecified neuron correspondence, enabling few-shot performance with minimal labels. This work presents a powerful new approach for building deep learning tools to analyze neural data and stakes out a clear path to training at scale for neural decoding models.", "keywords": "neural population;brain decoder;transformer;tokenization;sequence-to-sequence;electrophysiology;brain-computer interfaces", "primary_area": "", "supplementary_material": "", "author": "Mehdi Azabou;Vinam Arora;Venkataramana Ganesh;Ximeng Mao;Santosh B Nachimuthu;Michael Jacob Mendelson;Blake Aaron Richards;Matthew G Perich;Guillaume Lajoie;Eva L Dyer", "authorids": "~Mehdi_Azabou2;~Vinam_Arora1;~Venkataramana_Ganesh1;~Ximeng_Mao1;~Santosh_B_Nachimuthu2;~Michael_Jacob_Mendelson1;~Blake_Aaron_Richards1;~Matthew_G_Perich1;~Guillaume_Lajoie1;~Eva_L_Dyer1", "gender": "M;M;M;;M;M;M;M;M;F", "homepage": "https://www.mehai.dev;;https://venkys.website;;;;http://linclab.org;;https://dms.umontreal.ca/~lajoie/;http://dyerlab.gatech.edu", "dblp": "281/8371;;;133/3410;;218/1451;70/10850;205/2626;31/10384;64/8509.html", "google_scholar": "jXxyYCoAAAAJ;XHVqHR4AAAAJ;;Q_0TqBgAAAAJ;;;https://scholar.google.ca/citations?user=1CPY1LsAAAAJ;https://scholar.google.ch/citations?user=tTUtLsQAAAAJ;;Sb_jcHcAAAAJ", "orcid": ";;;0000-0003-4488-8796;;;0000-0001-9662-2151;0000-0001-9800-2386;;", "linkedin": ";vinam-arora/;https://linkedin.com/in/g-venkataramana;;santoshnachimuthu/;mmend/;;;;", "or_profile": "~Mehdi_Azabou2;~Vinam_Arora1;~Venkataramana_Ganesh1;~Ximeng_Mao1;~Santosh_B_Nachimuthu2;~Michael_Jacob_Mendelson1;~Blake_Aaron_Richards1;~Matthew_G_Perich1;~Guillaume_Lajoie1;~Eva_Dyer1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;University of Montreal;Medtronic;Georgia Institute of Technology;Mila - Quebec Artificial Intelligence Institute;Universit\u00e9 de Montr\u00e9al;Mila - Quebec Artificial Intelligence Institute;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;umontreal.ca;medtronic.com;gatech.edu;mila.quebec;umontreal.ca;mila.quebec;gatech.edu", "position": "PhD student;MS student;Researcher;PhD student;Intern;Undergrad student;Associate Professor;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nazabou2023a,\ntitle={A Unified, Scalable Framework for Neural Population Decoding},\nauthor={Mehdi Azabou and Vinam Arora and Venkataramana Ganesh and Ximeng Mao and Santosh B Nachimuthu and Michael Jacob Mendelson and Blake Aaron Richards and Matthew G Perich and Guillaume Lajoie and Eva L Dyer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sw2Y0sirtM}\n}", "github": "", "project": "", "reviewers": "c1Lq;Mfux;BY6C;Vmcn", "pdf_size": 3431135, "rating": "6;7;7;8", "confidence": "4;3;4;4", "soundness": "3;3;4;4", "novelty": "3;3;4;4", "presentation": "2;3;3;4", "wc_summary": "92;147;118;80", "wc_strengths": "72;89;163;111", "wc_weaknesses": "296;132;119;85", "wc_questions": "98;46;58;103", "wc_limitations": "92;1;31;16", "wc_review": "650;415;489;395", "wc_reply_reviewers": "334;122;21;27", "wc_reply_authors": "152;69;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 109.25, 25.762133063859444 ], "wc_strengths_avg": [ 108.75, 34.23722389446901 ], "wc_weaknesses_avg": [ 158.0, 81.50153372789987 ], "wc_questions_avg": [ 76.25, 24.681724007856502 ], "wc_limitations_avg": [ 35.0, 34.57600323924094 ], "wc_review_avg": [ 487.25, 100.27555783938577 ], "wc_reply_reviewers_avg": [ 126.0, 126.5958135168774 ], "wc_reply_authors_avg": [ 55.25, 62.5594717049305 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17904234003443709433&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 12, "email": "gatech.edu;gatech.edu;gatech.edu;umontreal.ca;medtronic.com;gatech.edu;mila.quebec;umontreal.ca;mila.quebec;gatech.edu", "author_num": 10, "aff_unique_index": "0;0;0;1;2;0;3;4;3;0", "aff_unique_norm": "Georgia Institute of Technology;University of Montreal;Medtronic;Quebec Artificial Intelligence Institute;Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": ";;;Artificial Intelligence;", "aff_unique_url": "https://www.gatech.edu;https://wwwumontreal.ca;https://www.medtronic.com;https://mila.quebec;https://www.umontreal.ca", "aff_unique_abbr": "Georgia Tech;UM;Medtronic;Mila;UdeM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;1;1;1;0", "aff_country_unique": "United States;Canada" }, { "title": "The noise level in linear regression with dependent data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70240", "id": "swNtr6vGqg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ecffd829f90b0a4b6aa017b6df15904f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=swNtr6vGqg", "openreview": "https://openreview.net/forum?id=swNtr6vGqg", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70240", "video": "https://nips.cc/virtual/2023/poster/70240", "author_site": "Ingvar Ziemann, Stephen Tu, George J. Pappas, Nikolai Matni", "tldr": "", "abstract": "We derive upper bounds for random design linear regression with dependent ($\\beta$-mixing) data absent any realizability assumptions. In contrast to the strictly realizable martingale noise regime, no sharp \\emph{instance-optimal} non-asymptotics are available in the literature. Up to constant factors, our analysis correctly recovers the variance term predicted by the Central Limit Theorem---the noise level of the problem---and thus exhibits graceful degradation as we introduce misspecification. Past a burn-in, our result is sharp in the moderate deviations regime, and in particular does not inflate the leading order term by mixing time factors.", "keywords": "Learning Theory;Learning with dependent data;Time-Series", "primary_area": "", "supplementary_material": "", "author": "Ingvar Ziemann;Stephen Tu;George J. Pappas;Nikolai Matni", "authorids": "~Ingvar_Ziemann1;~Stephen_Tu1;~George_J._Pappas1;~Nikolai_Matni2", "gender": "M;;M;M", "homepage": "https://www.kth.se/profile/ziemann;https://stephentu.github.io/;http://www.georgejpappas.org/;https://nikolaimatni.github.io", "dblp": "247/4222;09/8165;p/GeorgeJPappas;52/8135", "google_scholar": "https://scholar.google.se/citations?user=_RBAS2IAAAAJ;JQcDmB8AAAAJ;https://scholar.google.com.tw/citations?user=Kia-4B0AAAAJ;ZDPCh_EAAAAJ", "orcid": ";;0000-0001-9081-0637;", "linkedin": ";;;", "or_profile": "~Ingvar_Ziemann1;~Stephen_Tu1;~George_Pappas1;~Nikolai_Matni1", "aff": "University of Pennsylvania;Google;School of Engineering and Applied Science, University of Pennsylvania;School of Engineering and Applied Science, University of Pennsylvania", "aff_domain": "upenn.edu;google.com;seas.upenn.edu;seas.upenn.edu", "position": "Postdoc;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nziemann2023the,\ntitle={The noise level in linear regression with dependent data},\nauthor={Ingvar Ziemann and Stephen Tu and George J. Pappas and Nikolai Matni},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=swNtr6vGqg}\n}", "github": "", "project": "", "reviewers": "8guK;2Nqt;vLcM;6Vi5;BbWm", "pdf_size": 418659, "rating": "3;4;6;7;7", "confidence": "4;4;3;4;3", "soundness": "2;3;3;4;3", "novelty": "1;2;3;3;3", "presentation": "2;3;4;4;3", "wc_summary": "61;33;77;116;178", "wc_strengths": "66;61;170;151;92", "wc_weaknesses": "198;65;5;197;117", "wc_questions": "282;59;362;16;16", "wc_limitations": "13;1;1;25;7", "wc_review": "620;219;615;505;410", "wc_reply_reviewers": "18;10;123;18;29", "wc_reply_authors": "0;0;272;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 5.4, 1.624807680927192 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 93.0, 50.26728558416498 ], "wc_strengths_avg": [ 108.0, 44.54660480889649 ], "wc_weaknesses_avg": [ 116.4, 75.10952003574513 ], "wc_questions_avg": [ 147.0, 145.95615780089582 ], "wc_limitations_avg": [ 9.4, 8.979977728257458 ], "wc_review_avg": [ 473.8, 149.18364521622337 ], "wc_reply_reviewers_avg": [ 39.6, 42.13597038161101 ], "wc_reply_authors_avg": [ 54.4, 108.8 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5527707983925667, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3638993145690387546&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "upenn.edu;google.com;seas.upenn.edu;seas.upenn.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Pennsylvania;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.upenn.edu;https://www.google.com", "aff_unique_abbr": "UPenn;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Meta-in-context learning in large language models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70239", "id": "sx0xpaO0za", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cda04d7ea67ea1376bf8c6962d8541e0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sx0xpaO0za", "openreview": "https://openreview.net/forum?id=sx0xpaO0za", "poster": "/media/PosterPDFs/NeurIPS%202023/70239.png?t=1701438662.4135044", "slides": "https://nips.cc/virtual/2023/poster/70239", "video": "https://nips.cc/virtual/2023/poster/70239", "author_site": "Julian Coda-Forno, Marcel Binz, Zeynep Akata, Zeynep Akata, Matt Botvinick, Jane Wang, Eric Schulz", "tldr": "", "abstract": "Large language models have shown tremendous performance in a variety of tasks. In-context learning -- the ability to improve at a task after being provided with a number of demonstrations -- is seen as one of the main contributors to their success. In the present paper, we demonstrate that the in-context learning abilities of large language models can be recursively improved via in-context learning itself. We coin this phenomenon meta-in-context learning. Looking at two idealized domains, a one-dimensional regression task and a two-armed bandit task, we show that meta-in-context learning adaptively reshapes a large language model's priors over expected tasks. Furthermore, we find that meta-in-context learning modifies the in-context learning strategies of such models. Finally, we broaden the scope of our investigation to encompass two diverse benchmarks: one focusing on real-world regression problems and the other encompassing multiple NLP tasks. In both cases, we observe competitive performance comparable to that of traditional learning algorithms. Taken together, our work improves our understanding of in-context learning and paves the way toward adapting large language models to the environment they are applied purely through meta-in-context learning rather than traditional finetuning.", "keywords": "Large language models;in-context learning;meta-learning;GPT-3", "primary_area": "", "supplementary_material": "/attachment/7d809a8ba00d412e79992001a86f9d4c82db70f1.pdf", "author": "Julian Coda-Forno;Marcel Binz;Zeynep Akata;Matthew Botvinick;Jane X Wang;Eric Schulz", "authorids": "~Julian_Coda-Forno1;~Marcel_Binz1;~Zeynep_Akata1;~Matthew_Botvinick1;~Jane_X_Wang1;~Eric_Schulz1", "gender": "M;M;F;;M;F", "homepage": ";;https://eml-unitue.de/people/zeynep-akata;;https://cpilab.org;http://www.janexwang.com", "dblp": ";212/5102;117/4838;98/5712;124/0016;88/10757", "google_scholar": "beVJGycAAAAJ;https://scholar.google.de/citations?user=Lvm9Q8QAAAAJ;jQl9RtkAAAAJ;;;https://scholar.google.co.uk/citations?user=YizAq4gAAAAJ", "orcid": ";;0000-0002-1432-7747;;;", "linkedin": ";;zeynep-akata-36182045/?ppe=1;;;", "or_profile": "~Julian_Coda-Forno1;~Marcel_Binz1;~Zeynep_Akata1;~Matthew_Botvinick1;~Eric_Schulz1;~Jane_Wang1", "aff": "Max Planck Institute for Biological Cybernetics, Max-Planck Institute;Max Planck Institute for Biological Cybernetics, Max-Planck Institute;University of T\u00fcbingen;Google DeepMind;Max Planck Institute for Biological Cybernetics;Google DeepMind", "aff_domain": "tuebingen.mpg.de;tuebingen.mpg.de;uni-tuebingen.de;google.com;tuebingen.mpg.de;google.com", "position": "PhD student;Postdoc;Full Professor;Researcher;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\ncoda-forno2023metaincontext,\ntitle={Meta-in-context learning in large language models},\nauthor={Julian Coda-Forno and Marcel Binz and Zeynep Akata and Matthew Botvinick and Jane X Wang and Eric Schulz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sx0xpaO0za}\n}", "github": "", "project": "", "reviewers": "NP3F;6CRk;xRJY;ShPz;3waD", "pdf_size": 508656, "rating": "4;5;5;5;7", "confidence": "3;3;4;4;3", "soundness": "3;2;3;3;4", "novelty": "2;2;2;3;2", "presentation": "2;3;2;4;4", "wc_summary": "70;68;79;104;146", "wc_strengths": "12;36;76;159;108", "wc_weaknesses": "94;203;516;263;179", "wc_questions": "5;73;25;64;56", "wc_limitations": "9;1;1;8;26", "wc_review": "190;381;697;598;515", "wc_reply_reviewers": "0;129;52;239;121", "wc_reply_authors": "0;326;69;308;422", "reply_reviewers": "0;2;1;1;2", "reply_authors": "1;2;2;2;3", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 93.4, 29.254743205162473 ], "wc_strengths_avg": [ 78.2, 52.12446642412755 ], "wc_weaknesses_avg": [ 251.0, 143.18240115321436 ], "wc_questions_avg": [ 44.6, 25.554647326856227 ], "wc_limitations_avg": [ 9.0, 9.143303560529969 ], "wc_review_avg": [ 476.2, 176.67303133189287 ], "wc_reply_reviewers_avg": [ 108.2, 80.71034630083061 ], "wc_reply_authors_avg": [ 225.0, 161.77762515255316 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.16666666666666663, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1882153993468364803&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "tuebingen.mpg.de;tuebingen.mpg.de;uni-tuebingen.de;google.com;tuebingen.mpg.de;google.com", "author_num": 6, "aff_unique_index": "0;0;1;2;0;2", "aff_unique_norm": "Max Planck Institute for Biological Cybernetics;University of T\u00fcbingen;Google", "aff_unique_dep": "Biological Cybernetics;;Google DeepMind", "aff_unique_url": "https://www.biological-cybernetics.de;https://www.uni-tuebingen.de/;https://deepmind.com", "aff_unique_abbr": "MPIBC;Uni T\u00fcbingen;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;1", "aff_country_unique": "Germany;United Kingdom" }, { "title": "Is RLHF More Difficult than Standard RL? A Theoretical Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70238", "id": "sxZLrBqg50", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/efb9629755e598c4f261c44aeb6fde5e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sxZLrBqg50", "openreview": "https://openreview.net/forum?id=sxZLrBqg50", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70238", "video": "https://nips.cc/virtual/2023/poster/70238", "author_site": "Yuanhao Wang, Qinghua Liu, Chi Jin", "tldr": "", "abstract": "Reinforcement learning from Human Feedback (RLHF) learns from preference signals, while standard Reinforcement Learning (RL) directly learns from reward signals. Preferences arguably contain less information than rewards, which makes preference-based RL seemingly more difficult. This paper theoretically proves that, for a wide range of preference models, we can solve preference-based RL directly using existing algorithms and techniques for reward-based RL, with small or no extra costs. Specifically, (1) for preferences that are drawn from reward-based probabilistic models, we reduce the problem to robust reward-based RL that can tolerate small errors in rewards; (2) for general arbitrary preferences where the objective is to find the von Neumann winner, we reduce the problem to multiagent reward-based RL which finds Nash equilibria for factored Markov games under a restricted set of policies. The latter case can be further reduce to adversarial MDP when preferences only depend on the final state. We instantiate all reward-based RL subroutines by concrete provable algorithms, and apply our theory to a large class of models including tabular MDPs and MDPs with generic function approximation. We further provide guarantees when K-wise comparisons are available.", "keywords": "reinforcement learning theory;reinforcement learning from human feedback;preference-based reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Yuanhao Wang;Qinghua Liu;Chi Jin", "authorids": "~Yuanhao_Wang1;~Qinghua_Liu1;~Chi_Jin1", "gender": ";M;M", "homepage": ";http://qinghual2020.github.io/;https://sites.google.com/view/cjin/home", "dblp": ";;126/1802-1", "google_scholar": "yj2b7pgAAAAJ;CotFJJsAAAAJ;GINhGvwAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yuanhao_Wang1;~Qinghua_Liu1;~Chi_Jin1", "aff": "Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2023is,\ntitle={Is {RLHF} More Difficult than Standard {RL}? A Theoretical Perspective},\nauthor={Yuanhao Wang and Qinghua Liu and Chi Jin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sxZLrBqg50}\n}", "github": "", "project": "", "reviewers": "ZkeP;Sju6;XC4t;nikD", "pdf_size": 489438, "rating": "4;5;6;7", "confidence": "3;1;4;3", "soundness": "2;2;4;4", "novelty": "3;1;3;4", "presentation": "1;1;4;3", "wc_summary": "69;83;67;170", "wc_strengths": "54;9;28;89", "wc_weaknesses": "287;66;55;45", "wc_questions": "8;53;254;55", "wc_limitations": "23;49;1;20", "wc_review": "441;260;405;379", "wc_reply_reviewers": "316;254;65;0", "wc_reply_authors": "342;402;0;0", "reply_reviewers": "2;2;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 2.25, 1.299038105676658 ], "wc_summary_avg": [ 97.25, 42.45217897823385 ], "wc_strengths_avg": [ 45.0, 30.008332176247315 ], "wc_weaknesses_avg": [ 113.25, 100.58920170674385 ], "wc_questions_avg": [ 92.5, 95.11703317492614 ], "wc_limitations_avg": [ 23.25, 17.09349291397168 ], "wc_review_avg": [ 371.25, 67.89836154135091 ], "wc_reply_reviewers_avg": [ 158.75, 130.18328425723482 ], "wc_reply_authors_avg": [ 186.0, 187.20576914187234 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30779350562554625, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13081051200981910559&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "princeton.edu;princeton.edu;princeton.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Neural Collapse Perspective on Feature Evolution in Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70237", "id": "sxao2udWXi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2dd8a2a8685602586c1173f0b644d0e3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=sxao2udWXi", "openreview": "https://openreview.net/forum?id=sxao2udWXi", "poster": "/media/PosterPDFs/NeurIPS%202023/70237.png?t=1699753599.0548358", "slides": "https://nips.cc/virtual/2023/poster/70237", "video": "https://nips.cc/virtual/2023/poster/70237", "author_site": "Vignesh Kothapalli, Tom Tirer, Joan Bruna", "tldr": "", "abstract": "Graph neural networks (GNNs) have become increasingly popular for classification tasks on graph-structured data. Yet, the interplay between graph topology and feature evolution in GNNs is not well understood. In this paper, we focus on node-wise classification, illustrated with community detection on stochastic block model graphs, and explore the feature evolution through the lens of the \"Neural Collapse\" (NC) phenomenon. When training instance-wise deep classifiers (e.g. for image classification) beyond the zero training error point, NC demonstrates a reduction in the deepest features' within-class variability and an increased alignment of their class means to certain symmetric structures. We start with an empirical study that shows that a decrease in within-class variability is also prevalent in the node-wise classification setting, however, not to the extent observed in the instance-wise case. Then, we theoretically study this distinction. Specifically, we show that even an \"optimistic\" mathematical model requires that the graphs obey a strict structural condition in order to possess a minimizer with exact collapse. Furthermore, by studying the gradient dynamics of this model, we provide reasoning for the partial collapse observed empirically. Finally, we present a study on the evolution of within- and between-class feature variability across layers of a well-trained GNN and contrast the behavior with spectral methods.", "keywords": "Neural collapse;Graph neural networks;Community detection", "primary_area": "", "supplementary_material": "", "author": "Vignesh Kothapalli;Tom Tirer;Joan Bruna", "authorids": "~Vignesh_Kothapalli1;~Tom_Tirer1;~Joan_Bruna1", "gender": "M;;M", "homepage": "https://kvignesh1420.github.io/;https://tirertom.wixsite.com/homepage;http://cims.nyu.edu/~bruna", "dblp": "205/3929.html;175/4884;44/8776", "google_scholar": "o-Q9KhwAAAAJ;_6bZV20AAAAJ;L4bNmsMAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Vignesh_Kothapalli1;~Tom_Tirer1;~Joan_Bruna1", "aff": "NYU Courant;Bar-Ilan University;New York University", "aff_domain": "cims.nyu.edu;biu.ac.il;nyu.edu", "position": "MS student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nkothapalli2023a,\ntitle={A Neural Collapse Perspective on Feature Evolution in Graph Neural Networks},\nauthor={Vignesh Kothapalli and Tom Tirer and Joan Bruna},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=sxao2udWXi}\n}", "github": "", "project": "", "reviewers": "GzMi;t9NA;pASb;4uoY", "pdf_size": 3339097, "rating": "3;5;7;7", "confidence": "5;4;4;3", "soundness": "2;3;4;2", "novelty": "1;2;3;4", "presentation": "2;3;3;4", "wc_summary": "183;54;113;277", "wc_strengths": "27;35;65;45", "wc_weaknesses": "80;125;45;654", "wc_questions": "80;223;248;308", "wc_limitations": "1;23;1;1", "wc_review": "371;460;472;1285", "wc_reply_reviewers": "402;263;0;0", "wc_reply_authors": "1314;403;0;0", "reply_reviewers": "2;2;0;0", "reply_authors": "4;2;1;1", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 156.75, 83.09745784318555 ], "wc_strengths_avg": [ 43.0, 14.212670403551895 ], "wc_weaknesses_avg": [ 226.0, 248.7277628251418 ], "wc_questions_avg": [ 214.75, 83.70595856926793 ], "wc_limitations_avg": [ 6.5, 9.526279441628825 ], "wc_review_avg": [ 647.0, 370.4099080748246 ], "wc_reply_reviewers_avg": [ 166.25, 173.3614360231248 ], "wc_reply_authors_avg": [ 429.25, 536.6522966502613 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18396874514227389248&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "cims.nyu.edu;biu.ac.il;nyu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "New York University;Bar-Ilan University", "aff_unique_dep": ";", "aff_unique_url": "https://www.courant.nyu.edu;https://www.biu.ac.il", "aff_unique_abbr": "NYU;BIU", "aff_campus_unique_index": "0", "aff_campus_unique": "Courant Institute of Mathematical Sciences;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Israel" }, { "title": "Riemannian Projection-free Online Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70236", "id": "szFqlNRxeS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8305a0049227f7dd2bb91e11090f8cfa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=szFqlNRxeS", "openreview": "https://openreview.net/forum?id=szFqlNRxeS", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70236", "video": "https://nips.cc/virtual/2023/poster/70236", "author_site": "Zihao Hu, Guanghui Wang, Jacob Abernethy", "tldr": "", "abstract": "The projection operation is a critical component in a wide range of optimization algorithms, such as online gradient descent (OGD), \nfor enforcing constraints and achieving optimal regret bounds. However, it suffers from computational complexity limitations in high-dimensional settings or \nwhen dealing with ill-conditioned constraint sets. Projection-free algorithms address this issue by replacing the projection oracle with more efficient optimization\n subroutines. But to date, these methods have been developed primarily in the Euclidean setting, and while there has been growing interest in optimization on \n Riemannian manifolds, there has been essentially no work in trying to utilize projection-free tools here. An apparent issue is that non-trivial affine functions \n are generally non-convex in such domains. In this paper, we present methods for obtaining sub-linear regret guarantees in online geodesically convex optimization \n on curved spaces for two scenarios: when we have access to (a) a separation oracle or (b) a linear optimization oracle. For geodesically convex losses, and \n when a separation oracle is available, our algorithms achieve $O(T^{\\frac{1}{2}})$, $O(T^{\\frac{3}{4}})$ and $O(T^{\\frac{1}{2}})$ adaptive regret guarantees in the full\n information setting, the bandit setting with one-point feedback and the bandit setting with two-point feedback, respectively. When a linear optimization oracle is \n available, we obtain regret rates of $O(T^{\\frac{3}{4}})$ for geodesically convex losses \nand $O(T^{\\frac{2}{3}}\\log T)$ for strongly geodesically convex losses.", "keywords": "Online learning;Riemannian optimization;projection-free optimization", "primary_area": "", "supplementary_material": "/attachment/96628b1a98703181891de8ab6e9bc58333ba3a07.pdf", "author": "Zihao Hu;Guanghui Wang;Jacob Abernethy", "authorids": "~Zihao_Hu1;~Guanghui_Wang3;~Jacob_Abernethy1", "gender": ";M;M", "homepage": ";http://www.lamda.nju.edu.cn/wanggh/;https://www.cc.gatech.edu/~jabernethy9/", "dblp": "174/8733;44/2323-6;91/2520", "google_scholar": ";oNgvRg4AAAAJ;FDu4ciwAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zihao_Hu1;~Guanghui_Wang3;~Jacob_Abernethy1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;cc.gatech.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nhu2023riemannian,\ntitle={Riemannian Projection-free Online Learning},\nauthor={Zihao Hu and Guanghui Wang and Jacob Abernethy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=szFqlNRxeS}\n}", "github": "", "project": "", "reviewers": "xiK4;FXr6;4T7s;puFc;2HSe", "pdf_size": 514697, "rating": "5;7;7;7;8", "confidence": "2;3;4;5;1", "soundness": "3;3;3;2;3", "novelty": "3;2;2;2;3", "presentation": "2;4;3;3;4", "wc_summary": "213;60;135;49;127", "wc_strengths": "67;89;70;92;62", "wc_weaknesses": "442;149;169;1329;17", "wc_questions": "173;224;74;2;19", "wc_limitations": "1;15;1;2;59", "wc_review": "896;537;449;1474;284", "wc_reply_reviewers": "74;19;56;110;4", "wc_reply_authors": "26;11;21;25;12", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 3.0, 1.4142135623730951 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 116.8, 59.182429825075616 ], "wc_strengths_avg": [ 76.0, 12.149074038789951 ], "wc_weaknesses_avg": [ 421.2, 474.4699779754246 ], "wc_questions_avg": [ 98.4, 86.64086795502455 ], "wc_limitations_avg": [ 15.6, 22.339203208709122 ], "wc_review_avg": [ 728.0, 423.3669802901497 ], "wc_reply_reviewers_avg": [ 52.6, 38.09251895057611 ], "wc_reply_authors_avg": [ 19.0, 6.356099432828281 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14767892222644912839&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "gatech.edu;gatech.edu;cc.gatech.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A unified framework for information-theoretic generalization bounds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70235", "id": "t0fkjO4aZj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fa67d13ba6c73637593bbcc92f6400ff-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=t0fkjO4aZj", "openreview": "https://openreview.net/forum?id=t0fkjO4aZj", "poster": "/media/PosterPDFs/NeurIPS%202023/70235.png?t=1701550426.7227588", "slides": "https://nips.cc/virtual/2023/poster/70235", "video": "https://nips.cc/virtual/2023/poster/70235", "author_site": "Yifeng Chu, Maxim Raginsky", "tldr": "", "abstract": "This paper presents a general methodology for deriving information-theoretic generalization bounds for learning algorithms. The main technical tool is a probabilistic decorrelation lemma based on a change of measure and a relaxation of Young's inequality in $L_{\\psi_p}$ Orlicz spaces. Using the decorrelation lemma in combination with other techniques, such as symmetrization, couplings, and chaining in the space of probability measures, we obtain new upper bounds on the generalization error, both in expectation and in high probability, and recover as special cases many of the existing generalization bounds, including the ones based on mutual information, conditional mutual information, stochastic chaining, and PAC-Bayes inequalities. In addition, the Fernique--Talagrand upper bound on the expected supremum of a subgaussian process emerges as a special case.", "keywords": "generalization bounds;information theory;chaining;PAC-Bayes;couplings", "primary_area": "", "supplementary_material": "/attachment/76c587cdda5ba518622d36a87ca33ede4d3af177.zip", "author": "Yifeng Chu;Maxim Raginsky", "authorids": "~Yifeng_Chu1;~Maxim_Raginsky1", "gender": "M;M", "homepage": ";http://maxim.ece.illinois.edu", "dblp": "346/4417;91/6905", "google_scholar": ";MiVoUjEAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Yifeng_Chu1;~Maxim_Raginsky1", "aff": "University of Illinois, Urbana-Champaign;University of Illinois, Urbana-Champaign", "aff_domain": "illinois.edu;illinois.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nchu2023a,\ntitle={A unified framework for information-theoretic generalization bounds},\nauthor={Yifeng Chu and Maxim Raginsky},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=t0fkjO4aZj}\n}", "github": "", "project": "", "reviewers": "HuV6;7JF4;5Qrr;U1pj", "pdf_size": 358778, "rating": "6;6;6;7", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "66;142;124;48", "wc_strengths": "134;191;27;74", "wc_weaknesses": "427;749;34;73", "wc_questions": "127;182;54;39", "wc_limitations": "2;68;10;2", "wc_review": "756;1332;249;236", "wc_reply_reviewers": "0;682;17;5", "wc_reply_authors": "0;539;0;0", "reply_reviewers": "0;2;1;1", "reply_authors": "1;3;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 95.0, 39.05124837953327 ], "wc_strengths_avg": [ 106.5, 61.79198977213794 ], "wc_weaknesses_avg": [ 320.75, 290.81469615547286 ], "wc_questions_avg": [ 100.5, 57.638962516686576 ], "wc_limitations_avg": [ 20.5, 27.617928959282953 ], "wc_review_avg": [ 643.25, 449.5483149784904 ], "wc_reply_reviewers_avg": [ 176.0, 292.2045516414828 ], "wc_reply_authors_avg": [ 134.75, 233.3938463199062 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5514157872146125647&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "illinois.edu;illinois.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Object-Centric Learning for Real-World Videos by Predicting Temporal Feature Similarities", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70234", "id": "t1jLRFvBqm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c1fdec0d7ea1affa15bd09dd0fd3af05-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=t1jLRFvBqm", "openreview": "https://openreview.net/forum?id=t1jLRFvBqm", "poster": "/media/PosterPDFs/NeurIPS%202023/70234.png?t=1702330511.6250393", "slides": "https://nips.cc/virtual/2023/poster/70234", "video": "https://nips.cc/virtual/2023/poster/70234", "author_site": "Andrii Zadaianchuk, Maximilian Seitzer, Georg Martius", "tldr": "", "abstract": "Unsupervised video-based object-centric learning is a promising avenue to learn structured representations from large, unlabeled video collections, but previous approaches have only managed to scale to real-world datasets in restricted domains.\nRecently, it was shown that the reconstruction of pre-trained self-supervised features leads to object-centric representations on unconstrained real-world image datasets.\nBuilding on this approach, we propose a novel way to use such pre-trained features in the form of a temporal feature similarity loss.\nThis loss encodes semantic and temporal correlations between image patches and is a natural way to introduce a motion bias for object discovery.\nWe demonstrate that this loss leads to state-of-the-art performance on the challenging synthetic MOVi datasets.\nWhen used in combination with the feature reconstruction loss, our model is the first object-centric video model that scales to unconstrained video datasets such as YouTube-VIS.\n\nhttps://martius-lab.github.io/videosaur/", "keywords": "object-centric learning;video;representation learning;self-supervised learning;unsupervised learning", "primary_area": "", "supplementary_material": "", "author": "Andrii Zadaianchuk;Maximilian Seitzer;Georg Martius", "authorids": "~Andrii_Zadaianchuk1;~Maximilian_Seitzer1;~Georg_Martius1", "gender": "M;;M", "homepage": "https://zadaianchuk.github.io/;;https://uni-tuebingen.de/de/264672", "dblp": "274/9441;;47/2706", "google_scholar": ";;https://scholar.google.de/citations?user=b-JF-UIAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Andrii_Zadaianchuk1;~Maximilian_Seitzer1;~Georg_Martius1", "aff": "Max-Planck-Institute for Intelligent Systems, Max-Planck Institute;;Max Planck Institute for Intelligent Systems", "aff_domain": "is.mpg.de;;tuebingen.mpg.de", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nzadaianchuk2023objectcentric,\ntitle={Object-Centric Learning for Real-World Videos by Predicting Temporal Feature Similarities},\nauthor={Andrii Zadaianchuk and Maximilian Seitzer and Georg Martius},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=t1jLRFvBqm}\n}", "github": "", "project": "", "reviewers": "XQ9f;L1X2;avhJ;tzwP", "pdf_size": 19650077, "rating": "5;6;7;8", "confidence": "5;5;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;4;4;4", "wc_summary": "228;61;59;68", "wc_strengths": "37;39;78;90", "wc_weaknesses": "495;226;17;66", "wc_questions": "70;244;3;46", "wc_limitations": "6;1;11;14", "wc_review": "836;571;168;284", "wc_reply_reviewers": "171;218;0;26", "wc_reply_authors": "118;678;0;0", "reply_reviewers": "2;2;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 104.0, 71.66937979360502 ], "wc_strengths_avg": [ 61.0, 23.39871791359518 ], "wc_weaknesses_avg": [ 201.0, 186.5087129332032 ], "wc_questions_avg": [ 90.75, 91.67708274154452 ], "wc_limitations_avg": [ 8.0, 4.949747468305833 ], "wc_review_avg": [ 464.75, 259.7338782292368 ], "wc_reply_reviewers_avg": [ 103.75, 92.71562705391146 ], "wc_reply_authors_avg": [ 199.0, 280.7151581229628 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8944271909999159, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18107366862384959195&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "is.mpg.de;;tuebingen.mpg.de", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Max-Planck-Institute for Intelligent Systems;Max Planck Institute for Intelligent Systems", "aff_unique_dep": "Intelligent Systems;Intelligent Systems", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.mpi-is.mpg.de", "aff_unique_abbr": "MPI-IS;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Tailoring Self-Attention for Graph via Rooted Subtrees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70233", "id": "t2hEZadBBk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e90ba1fc564a69809d7391bf76a5f087-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=t2hEZadBBk", "openreview": "https://openreview.net/forum?id=t2hEZadBBk", "poster": "/media/PosterPDFs/NeurIPS%202023/70233.png?t=1699797941.0598679", "slides": "https://nips.cc/virtual/2023/poster/70233", "video": "https://nips.cc/virtual/2023/poster/70233", "author_site": "Siyuan Huang, Yunchong Song, Jiayue Zhou, Zhouhan Lin", "tldr": "", "abstract": "Attention mechanisms have made significant strides in graph learning, yet they still exhibit notable limitations: local attention faces challenges in capturing long-range information due to the inherent problems of the message-passing scheme, while global attention cannot reflect the hierarchical neighborhood structure and fails to capture fine-grained local information. In this paper, we propose a novel multi-hop graph attention mechanism, named Subtree Attention (STA), to address the aforementioned issues. STA seamlessly bridges the fully-attentional structure and the rooted subtree, with theoretical proof that STA approximates the global attention under extreme settings. By allowing direct computation of attention weights among multi-hop neighbors, STA mitigates the inherent problems in existing graph attention mechanisms. Further we devise an efficient form for STA by employing kernelized softmax, which yields a linear time complexity. Our resulting GNN architecture, the STAGNN, presents a simple yet performant STA-based graph neural network leveraging a hop-aware attention strategy. Comprehensive evaluations on ten node classification datasets demonstrate that STA-based models outperform existing graph transformers and mainstream GNNs. The code\nis available at https://github.com/LUMIA-Group/SubTree-Attention.", "keywords": "Graph Based Learning", "primary_area": "", "supplementary_material": "/attachment/f4a27ca854c8dc79ff081dd79cf9fa73fe3d8a04.zip", "author": "Siyuan Huang;Yunchong Song;Jiayue Zhou;Zhouhan Lin", "authorids": "~Siyuan_Huang8;~Yunchong_Song1;~Jiayue_Zhou1;~Zhouhan_Lin1", "gender": "M;M;F;M", "homepage": "https://github.com/SiyuanHuangSJTU;https://github.com/realCrush;https://github.com/chloejvzi;https://hantek.github.io", "dblp": "62/885-3;339/6816;;121/7919.html", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;C-TqDNsAAAAJ;;https://scholar.google.ca/citations?user=LNZ4efwAAAAJ", "orcid": ";;;0009-0009-7204-0689", "linkedin": "siyuan-huang-885863235/;;;https://ca.linkedin.com/in/zhouhan-lin-34b98975", "or_profile": "~Siyuan_Huang8;~Yunchong_Song1;~Jiayue_Zhou1;~Zhouhan_Lin1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "Undergrad student;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nhuang2023tailoring,\ntitle={Tailoring Self-Attention for Graph via Rooted Subtrees},\nauthor={Siyuan Huang and Yunchong Song and Jiayue Zhou and Zhouhan Lin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=t2hEZadBBk}\n}", "github": "", "project": "", "reviewers": "sk7A;ds4S;K8yB;smZ8", "pdf_size": 717937, "rating": "5;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;2;3", "novelty": "2;2;3;3", "presentation": "2;2;3;4", "wc_summary": "79;81;154;176", "wc_strengths": "58;53;23;66", "wc_weaknesses": "351;78;42;64", "wc_questions": "44;15;97;38", "wc_limitations": "7;1;1;24", "wc_review": "539;228;317;368", "wc_reply_reviewers": "24;0;28;0", "wc_reply_authors": "26;0;35;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 122.5, 43.21168823362494 ], "wc_strengths_avg": [ 50.0, 16.263455967290593 ], "wc_weaknesses_avg": [ 133.75, 126.08404934804402 ], "wc_questions_avg": [ 48.5, 30.02082610455615 ], "wc_limitations_avg": [ 8.25, 9.41740410091868 ], "wc_review_avg": [ 363.0, 113.29386567683177 ], "wc_reply_reviewers_avg": [ 13.0, 13.076696830622021 ], "wc_reply_authors_avg": [ 15.25, 15.578430601315397 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2515269529501508459&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Scalable Membership Inference Attacks via Quantile Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70232", "id": "t3WCiGjHqd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/01328d0767830e73a612f9073e9ff15f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=t3WCiGjHqd", "openreview": "https://openreview.net/forum?id=t3WCiGjHqd", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70232", "video": "https://nips.cc/virtual/2023/poster/70232", "author_site": "Martin Bertran, Shuai Tang, Aaron Roth, Michael Kearns, Jamie Morgenstern, Steven Wu", "tldr": "", "abstract": "Membership inference attacks are designed to determine, using black box access to trained models, whether a particular example was used in training or not. Membership inference can be formalized as a hypothesis testing problem. The most effective existing attacks estimate the distribution of some test statistic (usually the model's confidence on the true label) on points that were (and were not) used in training by training many \\emph{shadow models}---i.e. models of the same architecture as the model being attacked, trained on a random subsample of data. While effective, these attacks are extremely computationally expensive, especially when the model under attack is large. \\footnotetext[0]{\nMartin and Shuai are the lead authors, and other authors are ordered alphabetically. \\{maberlop,shuat\\}@amazon.com}\n\nWe introduce a new class of attacks based on performing quantile regression on the distribution of confidence scores induced by the model under attack on points that are not used in training. We show that our method is competitive with state-of-the-art shadow model attacks, while requiring substantially less compute because our attack requires training only a single model. Moreover, unlike shadow model attacks, our proposed attack does not require any knowledge of the architecture of the model under attack and is therefore truly ``black-box\". We show the efficacy of this approach in an extensive series of experiments on various datasets and model architectures. Our code is available at \\href{https://github.com/amazon-science/quantile-mia}{github.com/amazon-science/quantile-mia.}", "keywords": "machine learning;privacy;membership inference", "primary_area": "", "supplementary_material": "", "author": "Martin Andres Bertran;Shuai Tang;Aaron Roth;Michael Kearns;Jamie Heather Morgenstern;Steven Wu", "authorids": "~Martin_Andres_Bertran1;~Shuai_Tang1;~Aaron_Roth1;~Michael_Kearns2;~Jamie_Heather_Morgenstern1;~Steven_Wu1", "gender": "M;M;M;;;M", "homepage": ";http://shuaitang.github.io;http://www.cis.upenn.edu/~aaroth/;http://jamiemorgenstern.com;https://www.cis.upenn.edu/~mkearns/;https://zstevenwu.com/", "dblp": "154/1944;;80/3311;64/8610;78/6858;137/8350", "google_scholar": "1kki_voAAAAJ;fJVeBrAAAAAJ;https://scholar.google.com.tw/citations?user=kLUQrrYAAAAJ;https://scholar.google.com/citations?hl=en;8iQk0DIAAAAJ;MbF6rTEAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;zstevenwu/", "or_profile": "~Martin_Andres_Bertran1;~Shuai_Tang1;~Aaron_Roth1;~Jamie_Heather_Morgenstern1;~Michael_J._Kearns1;~Zhiwei_Steven_Wu1", "aff": ";Amazon Web Services;University of Pennsylvania;;University of Pennsylvania;Carnegie Mellon University", "aff_domain": ";amazon.com;upenn.edu;;upenn.edu;cmu.edu", "position": ";Applied Scientist;Full Professor;;Professor;Assistant Professor", "bibtex": "@inproceedings{\nbertran2023scalable,\ntitle={Scalable Membership Inference Attacks via Quantile Regression},\nauthor={Martin Andres Bertran and Shuai Tang and Aaron Roth and Michael Kearns and Jamie Heather Morgenstern and Steven Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=t3WCiGjHqd}\n}", "github": "", "project": "", "reviewers": "bLLW;648h;NGxz;Fy4T;kgzg;5DZs", "pdf_size": 2305238, "rating": "3;5;5;5;7;7", "confidence": "4;3;3;3;4;4", "soundness": "2;2;2;3;3;3", "novelty": "2;2;2;3;3;3", "presentation": "3;3;3;2;3;3", "wc_summary": "58;111;121;83;103;70", "wc_strengths": "19;86;22;52;9;54", "wc_weaknesses": "250;308;276;73;10;171", "wc_questions": "83;45;65;38;46;45", "wc_limitations": "9;5;82;1;26;59", "wc_review": "419;555;566;247;194;399", "wc_reply_reviewers": "338;157;36;28;14;128", "wc_reply_authors": "0;133;0;0;4;82", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "1;2;1;1;2;2", "rating_avg": [ 5.333333333333333, 1.3743685418725535 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 91.0, 22.501851775650227 ], "wc_strengths_avg": [ 40.333333333333336, 26.398653164297773 ], "wc_weaknesses_avg": [ 181.33333333333334, 108.71931240073627 ], "wc_questions_avg": [ 53.666666666666664, 15.509853498842457 ], "wc_limitations_avg": [ 30.333333333333332, 30.18645759644914 ], "wc_review_avg": [ 396.6666666666667, 140.0722036030783 ], "wc_reply_reviewers_avg": [ 116.83333333333333, 112.28893781470886 ], "wc_reply_authors_avg": [ 36.5, 52.33784481615574 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.24253562503633294, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14520471396692441079&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": ";amazon.com;upenn.edu;;upenn.edu;cmu.edu", "author_num": 6, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Amazon;University of Pennsylvania;Carnegie Mellon University", "aff_unique_dep": "Amazon Web Services;;", "aff_unique_url": "https://aws.amazon.com;https://www.upenn.edu;https://www.cmu.edu", "aff_unique_abbr": "AWS;UPenn;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "QuadAttac$K$: A Quadratic Programming Approach to Learning Ordered Top-$K$ Adversarial Attacks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70231", "id": "t3vPEjgNtj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9902a53031ebbbab73898028073d4790-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=t3vPEjgNtj", "openreview": "https://openreview.net/forum?id=t3vPEjgNtj", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70231", "video": "https://nips.cc/virtual/2023/poster/70231", "author_site": "Thomas Paniagua, Ryan Grainger, Tianfu Wu", "tldr": "", "abstract": "The adversarial vulnerability of Deep Neural Networks (DNNs) has been well-known and widely concerned, often under the context of learning top-$1$ attacks (e.g., fooling a DNN to classify a cat image as dog). This paper shows that the concern is much more serious by learning significantly more aggressive ordered top-$K$ clear-box targeted attacks proposed in~\\citep{zhang2020learning}. We propose a novel and rigorous quadratic programming (QP) method of learning ordered top-$K$ attacks with low computing cost, dubbed as \\textbf{QuadAttac$K$}. Our QuadAttac$K$ directly solves the QP to satisfy the attack constraint in the feature embedding space (i.e., the input space to the final linear classifier), which thus exploits the semantics of the feature embedding space (i.e., the principle of class coherence). With the optimized feature embedding vector perturbation, it then computes the adversarial perturbation in the data space via the vanilla one-step back-propagation. In experiments, the proposed QuadAttac$K$ is tested in the ImageNet-1k classification using ResNet-50, DenseNet-121, and Vision Transformers (ViT-B and DEiT-S). It successfully pushes the boundary of successful ordered top-$K$ attacks from $K=10$ up to $K=20$ at a cheap budget ($1\\times 60$) and further improves attack success rates for $K=5$ for all tested models, while retaining the performance for $K=1$.", "keywords": "Ordered Top-K Clear-Box Targeted Adversarial Attack;Deep Neural Networks;Quadratic Programming;Robustness", "primary_area": "", "supplementary_material": "", "author": "Thomas Paniagua;Ryan Grainger;Tianfu Wu", "authorids": "~Thomas_Paniagua1;~Ryan_Grainger1;~Tianfu_Wu1", "gender": ";M;M", "homepage": ";;https://research.ece.ncsu.edu/ivmcl/", "dblp": "250/7314;;08/4148-1", "google_scholar": ";;8XcYgk0AAAAJ", "orcid": ";;0000-0001-8911-5506", "linkedin": "thomaspaniagua;https://linkedin.com/in/ryan-grainger-84945297;", "or_profile": "~Thomas_Paniagua1;~Ryan_Grainger1;~Tianfu_Wu1", "aff": "Applied Research Associates;North Carolina State University;North Carolina State University", "aff_domain": "ara.com;ncsu.edu;ncsu.edu", "position": "Scientist;PhD student;Associate Professor", "bibtex": "@inproceedings{\npaniagua2023quadattack,\ntitle={QuadAttac\\$K\\$: A Quadratic Programming Approach to Learning Ordered Top-\\$K\\$ Adversarial Attacks},\nauthor={Thomas Paniagua and Ryan Grainger and Tianfu Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=t3vPEjgNtj}\n}", "github": "", "project": "", "reviewers": "Xdne;xrmU;2efR;wPrk", "pdf_size": 25496996, "rating": "5;5;5;6", "confidence": "4;3;2;3", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;2;3", "wc_summary": "66;71;58;61", "wc_strengths": "191;64;76;135", "wc_weaknesses": "337;66;115;56", "wc_questions": "210;2;1;49", "wc_limitations": "22;1;1;1", "wc_review": "826;204;251;302", "wc_reply_reviewers": "13;0;18;22", "wc_reply_authors": "23;0;0;16", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 64.0, 4.949747468305833 ], "wc_strengths_avg": [ 116.5, 50.71735403192876 ], "wc_weaknesses_avg": [ 143.5, 113.92651140099042 ], "wc_questions_avg": [ 65.5, 85.65191182921721 ], "wc_limitations_avg": [ 6.25, 9.093266739736606 ], "wc_review_avg": [ 395.75, 250.81105936541155 ], "wc_reply_reviewers_avg": [ 13.25, 8.287792227125388 ], "wc_reply_authors_avg": [ 9.75, 10.059199769365355 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:M48xQzJkHeYJ:scholar.google.com/&scioq=QuadAttac%24K%24:+A+Quadratic+Programming+Approach+to+Learning+Ordered+Top-%24K%24+Adversarial+Attacks&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "ara.com;ncsu.edu;ncsu.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Applied Research Associates;North Carolina State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ara.com;https://www.ncsu.edu", "aff_unique_abbr": "ARA;NCSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Trans-Dimensional Generative Modeling via Jump Diffusion Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70230", "id": "t6nA7x3GAC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/83a10a480fbec91c88f6a9293b4d2b05-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=t6nA7x3GAC", "openreview": "https://openreview.net/forum?id=t6nA7x3GAC", "poster": "/media/PosterPDFs/NeurIPS%202023/70230.png?t=1701376310.7795026", "slides": "https://nips.cc/virtual/2023/poster/70230", "video": "https://nips.cc/virtual/2023/poster/70230", "author_site": "Andrew Campbell, William Harvey, Christian Weilbach, Valentin De Bortoli, Thomas Rainforth, Arnaud Doucet", "tldr": "", "abstract": "We propose a new class of generative model that naturally handles data of varying dimensionality by jointly modeling the state and dimension of each datapoint. The generative process is formulated as a jump diffusion process that makes jumps between different dimensional spaces. We first define a dimension destroying forward noising process, before deriving the dimension creating time-reversed generative process along with a novel evidence lower bound training objective for learning to approximate it.\nSimulating our learned approximation to the time-reversed generative process then provides an effective way of sampling data of varying dimensionality by jointly generating state values and dimensions. \nWe demonstrate our approach on molecular and video datasets of varying dimensionality, reporting better compatibility with test-time diffusion guidance imputation tasks and improved interpolation capabilities versus fixed dimensional models that generate state values and dimensions separately.", "keywords": "diffusion;score-based;score;markov chain;jump diffusion;poisson", "primary_area": "", "supplementary_material": "/attachment/8c83b58bdb7e2b201ddaa225f764091c783016c9.pdf", "author": "Andrew Campbell;William Harvey;Christian Dietrich Weilbach;Valentin De Bortoli;Tom Rainforth;Arnaud Doucet", "authorids": "~Andrew_Campbell4;~William_Harvey1;~Christian_Dietrich_Weilbach1;~Valentin_De_Bortoli1;~Tom_Rainforth1;~Arnaud_Doucet2", "gender": ";M;M;;M;", "homepage": ";https://www.cs.ubc.ca/~wsgh/;https://whilo.github.io/;https://vdeborto.github.io/;http://www.robots.ox.ac.uk/~twgr;https://www.stats.ox.ac.uk/~doucet/", "dblp": "93/3398;26/8210-2;;224/9338;166/1198;68/1628", "google_scholar": ";https://scholar.google.co.uk/citations?user=kDd7nBkAAAAJ;;;https://scholar.google.co.uk/citations?user=ieLRNKMAAAAJ;W4SZGV8AAAAJ", "orcid": "0000-0003-2086-0238;;;;;0000-0002-7662-419X", "linkedin": ";;;;;", "or_profile": "~Andrew_Campbell4;~William_Harvey1;~Christian_Dietrich_Weilbach1;~Valentin_De_Bortoli1;~Tom_Rainforth1;~Arnaud_Doucet2", "aff": "University of Oxford;University of British Columbia;;University of Oxford;;University of Oxford", "aff_domain": "ox.ac.uk;cs.ubc.ca;;ox.ac.uk;ox.ac.uk;ox.ac.uk", "position": "PhD student;PhD student;;Postdoc;Postdoc;Full Professor", "bibtex": "@inproceedings{\ncampbell2023transdimensional,\ntitle={Trans-Dimensional Generative Modeling via Jump Diffusion Models},\nauthor={Andrew Campbell and William Harvey and Christian Dietrich Weilbach and Valentin De Bortoli and Tom Rainforth and Arnaud Doucet},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=t6nA7x3GAC}\n}", "github": "", "project": "", "reviewers": "tNRG;fiGg;EZPD;FTvW", "pdf_size": 4739036, "rating": "7;7;7;8", "confidence": "4;3;4;4", "soundness": "3;3;4;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "57;68;95;106", "wc_strengths": "44;44;72;173", "wc_weaknesses": "54;1;143;94", "wc_questions": "6;20;134;54", "wc_limitations": "1;16;8;16", "wc_review": "162;149;452;443", "wc_reply_reviewers": "10;20;66;122", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 81.5, 19.78004044485248 ], "wc_strengths_avg": [ 83.25, 53.063052117268946 ], "wc_weaknesses_avg": [ 73.0, 52.16799785308997 ], "wc_questions_avg": [ 53.5, 49.646248599466205 ], "wc_limitations_avg": [ 10.25, 6.2599920127744575 ], "wc_review_avg": [ 301.5, 146.10698135270607 ], "wc_reply_reviewers_avg": [ 54.5, 44.32550056118938 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3859363621497535245&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "ox.ac.uk;cs.ubc.ca;;ox.ac.uk;ox.ac.uk;ox.ac.uk", "author_num": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Oxford;University of British Columbia", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.ubc.ca", "aff_unique_abbr": "Oxford;UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;Canada" }, { "title": "Achieving Cross Modal Generalization with Multimodal Unified Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70229", "id": "t7ZowrDWVw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c89f09849eb5af489abb122394ff0f0b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=t7ZowrDWVw", "openreview": "https://openreview.net/forum?id=t7ZowrDWVw", "poster": "/media/PosterPDFs/NeurIPS%202023/70229.png?t=1699517213.5207376", "slides": "https://nips.cc/virtual/2023/poster/70229", "video": "https://nips.cc/virtual/2023/poster/70229", "author_site": "Yan Xia, Hai Huang, Jieming Zhu, Zhou Zhao", "tldr": "", "abstract": "This paper introduces a novel task called Cross Modal Generalization (CMG), which addresses the challenge of learning a unified discrete representation from paired multimodal data during pre-training. Then in downstream tasks, the model can achieve zero-shot generalization ability in other modalities when only one modal is labeled. Existing approaches in multimodal representation learning focus more on coarse-grained alignment or rely on the assumption that \n information from different modalities is completely aligned, which is impractical in real-world scenarios. To overcome this limitation, we propose \\textbf{Uni-Code}, which contains two key contributions: the Dual Cross-modal Information Disentangling (DCID) module and the Multi-Modal Exponential Moving Average (MM-EMA). These methods facilitate bidirectional supervision between modalities and align semantically equivalent information in a shared discrete latent space, enabling fine-grained unified representation of multimodal sequences. During pre-training, we investigate various modality combinations, including audio-visual, audio-text, and the tri-modal combination of audio-visual-text. Extensive experiments on various downstream tasks, i.e., cross-modal event classification, localization, cross-modal retrieval, query-based video segmentation, and cross-dataset event localization, demonstrate the effectiveness of our proposed methods. The code is available at https://github.com/haihuangcode/CMG.", "keywords": "multi-modal;discrete representation;mutual information estimation", "primary_area": "", "supplementary_material": "/attachment/1cdeabede9deed1aa8546bdf480424470ddb9734.zip", "author": "Yan Xia;Hai Huang;Jieming Zhu;Zhou Zhao", "authorids": "~Yan_Xia4;~Hai_Huang6;~Jieming_Zhu2;~Zhou_Zhao2", "gender": "M;;M;M", "homepage": "https://github.com/marmot-xy;https://haihuangcode.github.io/;https://jiemingzhu.github.io/;https://dblp.uni-trier.de/pid/75/7785.html?", "dblp": "17/6518-6;51/944-13.html;10/2717;75/7785", "google_scholar": "6kEbV3IAAAAJ;FKvBzQwAAAAJ;oNKerP8AAAAJ;https://scholar.google.com.hk/citations?user=IIoFY90AAAAJ", "orcid": "0000-0003-4631-741X;0009-0003-8813-2306;0000-0002-5666-8320;0000-0001-6121-0384", "linkedin": ";;;", "or_profile": "~Yan_Xia4;~Hai_Huang6;~Jieming_Zhu2;~Zhou_Zhao2", "aff": "Zhejiang University;Northeastern University;Huawei Noah's Ark Lab;Zhejiang University", "aff_domain": "zju.edu.cn;neu.edu.cn;huawei.com;zju.edu.cn", "position": "PhD student;Undergrad student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nxia2023achieving,\ntitle={Achieving Cross Modal Generalization with Multimodal Unified Representation},\nauthor={Yan Xia and Hai Huang and Jieming Zhu and Zhou Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=t7ZowrDWVw}\n}", "github": "", "project": "", "reviewers": "w8Lb;bPma;y1JE;WtGp;jARS;kKgn", "pdf_size": 4003466, "rating": "5;5;5;6;7;8", "confidence": "4;2;4;4;3;4", "soundness": "3;3;3;3;3;4", "novelty": "3;3;3;2;3;4", "presentation": "3;3;3;4;3;4", "wc_summary": "72;61;93;199;79;82", "wc_strengths": "19;25;42;125;38;60", "wc_weaknesses": "138;24;53;225;15;5", "wc_questions": "63;2;83;50;36;15", "wc_limitations": "13;2;9;7;22;2", "wc_review": "305;114;280;606;190;164", "wc_reply_reviewers": "38;23;25;0;0;44", "wc_reply_authors": "25;15;25;0;0;206", "reply_reviewers": "1;1;1;0;0;1", "reply_authors": "2;2;2;1;1;2", "rating_avg": [ 6.0, 1.1547005383792515 ], "confidence_avg": [ 3.5, 0.7637626158259734 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 97.66666666666667, 46.34532218993508 ], "wc_strengths_avg": [ 51.5, 35.36830030785572 ], "wc_weaknesses_avg": [ 76.66666666666667, 79.62132600977928 ], "wc_questions_avg": [ 41.5, 27.536339626028727 ], "wc_limitations_avg": [ 9.166666666666666, 6.914156170897179 ], "wc_review_avg": [ 276.5, 161.213884844534 ], "wc_reply_reviewers_avg": [ 21.666666666666668, 16.918103387266026 ], "wc_reply_authors_avg": [ 45.166666666666664, 72.65309506292367 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.18898223650461363, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6590944597632967058&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "email": "zju.edu.cn;neu.edu.cn;huawei.com;zju.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Zhejiang University;Northeastern University;Huawei", "aff_unique_dep": ";;Noah's Ark Lab", "aff_unique_url": "https://www.zju.edu.cn;https://www.northeastern.edu;https://www.huawei.com", "aff_unique_abbr": "ZJU;NEU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Joint Bayesian Inference of Graphical Structure and Parameters with a Single Generative Flow Network", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70228", "id": "t7lnhhi7De", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/639a9a172c044fbb64175b5fad42e9a5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=t7lnhhi7De", "openreview": "https://openreview.net/forum?id=t7lnhhi7De", "poster": "/media/PosterPDFs/NeurIPS%202023/70228.png?t=1702064641.1203935", "slides": "https://nips.cc/virtual/2023/poster/70228", "video": "https://nips.cc/virtual/2023/poster/70228", "author_site": "Tristan Deleu, Mizu Nishikawa-Toomey, Jithendaraa Subramanian, Nikolay Malkin, Laurent Charlin, Yoshua Bengio", "tldr": "", "abstract": "Generative Flow Networks (GFlowNets), a class of generative models over discrete and structured sample spaces, have been previously applied to the problem of inferring the marginal posterior distribution over the directed acyclic graph (DAG) of a Bayesian Network, given a dataset of observations. Based on recent advances extending this framework to non-discrete sample spaces, we propose in this paper to approximate the joint posterior over not only the structure of a Bayesian Network, but also the parameters of its conditional probability distributions. We use a single GFlowNet whose sampling policy follows a two-phase process: the DAG is first generated sequentially one edge at a time, and then the corresponding parameters are picked once the full structure is known. Since the parameters are included in the posterior distribution, this leaves more flexibility for the local probability models of the Bayesian Network, making our approach applicable even to non-linear models parametrized by neural networks. We show that our method, called JSP-GFN, offers an accurate approximation of the joint posterior, while comparing favorably against existing methods on both simulated and real data.", "keywords": "bayesian network;bayesian;structure learning;causal discovery;gflownet", "primary_area": "", "supplementary_material": "/attachment/fddff5d8acc930a628b41802c25cb03002364e96.pdf", "author": "Tristan Deleu;Mizu Nishikawa-Toomey;Jithendaraa Subramanian;Nikolay Malkin;Laurent Charlin;Yoshua Bengio", "authorids": "~Tristan_Deleu1;~Mizu_Nishikawa-Toomey1;~Jithendaraa_Subramanian1;~Nikolay_Malkin1;~Laurent_Charlin1;~Yoshua_Bengio1", "gender": ";F;M;;M;M", "homepage": "https://tristandeleu.github.io/;https://mila.quebec/en/person/mizu-nishikawa-toomey/;https://jithendaraa.github.io/;;http://www.cs.toronto.edu/~lcharlin/;http://yoshuabengio.org", "dblp": "192/1896;;281/6755;;48/5717;56/953", "google_scholar": "nLNwh-wAAAAJ;QyRLU-cAAAAJ;s0BzYvYAAAAJ;;Cul0g2YAAAAJ;kukA0LcAAAAJ", "orcid": ";;;;0000-0002-6545-9459;", "linkedin": ";https://linkedin.com/in/mizu-nishikawa-toomey-20261316b;jithendaraa-subramanian-85a22b176/;;;yoshuabengio/?originalSubdomain=ca", "or_profile": "~Tristan_Deleu1;~Mizu_Nishikawa-Toomey1;~Jithendaraa_Subramanian1;~Nikolay_Malkin1;~Laurent_Charlin1;~Yoshua_Bengio1", "aff": "University of Montreal;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;McGill University, McGill University;;Mila - Quebec Artificial Intelligence Institute;University of Montreal", "aff_domain": "umontreal.ca;mila.umontreal.ca;mail.mcgill.ca;;mila.quebec;umontreal.ca", "position": "PhD student;PhD student;MS student;;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\ndeleu2023joint,\ntitle={Joint Bayesian Inference of Graphical Structure and Parameters with a Single Generative Flow Network},\nauthor={Tristan Deleu and Mizu Nishikawa-Toomey and Jithendaraa Subramanian and Nikolay Malkin and Laurent Charlin and Yoshua Bengio},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=t7lnhhi7De}\n}", "github": "", "project": "", "reviewers": "jHMM;5gCu;MmTz;9gh3;c3AM;rtyV", "pdf_size": 956148, "rating": "5;5;5;6;6;8", "confidence": "3;4;2;3;2;2", "soundness": "3;3;3;3;3;4", "novelty": "3;2;3;3;3;3", "presentation": "2;3;2;3;3;4", "wc_summary": "81;156;93;98;18;55", "wc_strengths": "54;129;67;57;25;96", "wc_weaknesses": "147;245;188;163;25;22", "wc_questions": "138;34;51;78;1;17", "wc_limitations": "16;1;1;1;11;1", "wc_review": "436;565;400;397;80;191", "wc_reply_reviewers": "443;87;0;0;0;80", "wc_reply_authors": "1315;85;0;418;0;17", "reply_reviewers": "3;1;0;0;0;1", "reply_authors": "4;2;1;2;1;2", "rating_avg": [ 5.833333333333333, 1.0671873729054748 ], "confidence_avg": [ 2.6666666666666665, 0.7453559924999298 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 2.8333333333333335, 0.6871842709362768 ], "wc_summary_avg": [ 83.5, 42.16139624506443 ], "wc_strengths_avg": [ 71.33333333333333, 33.17964570167814 ], "wc_weaknesses_avg": [ 131.66666666666666, 82.29958822632238 ], "wc_questions_avg": [ 53.166666666666664, 45.119901989649264 ], "wc_limitations_avg": [ 5.166666666666667, 6.066758241067098 ], "wc_review_avg": [ 344.8333333333333, 161.42636780347323 ], "wc_reply_reviewers_avg": [ 101.66666666666667, 157.16304343649693 ], "wc_reply_authors_avg": [ 305.8333333333333, 474.3871192274184 ], "reply_reviewers_avg": [ 0.8333333333333334, 1.0671873729054748 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.48890120703870477, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4373012997556732388&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "umontreal.ca;mila.umontreal.ca;mail.mcgill.ca;;mila.quebec;umontreal.ca", "author_num": 6, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of Montreal;McGill University;Quebec Artificial Intelligence Institute", "aff_unique_dep": ";;Artificial Intelligence", "aff_unique_url": "https://wwwumontreal.ca;https://www.mcgill.ca;https://mila.quebec", "aff_unique_abbr": "UM;McGill;Mila", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Rewiring Neurons in Non-Stationary Environments", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70227", "id": "t7ozN4AXd0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/599221d7ebf6b3403190f38a3f282a1c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=t7ozN4AXd0", "openreview": "https://openreview.net/forum?id=t7ozN4AXd0", "poster": "/media/PosterPDFs/NeurIPS%202023/70227.png?t=1700894819.2785501", "slides": "https://nips.cc/virtual/2023/poster/70227", "video": "https://nips.cc/virtual/2023/poster/70227", "author_site": "Zhicheng Sun, Yadong Mu", "tldr": "", "abstract": "The human brain rewires itself for neuroplasticity in the presence of new tasks. We are inspired to harness this key process in continual reinforcement learning, prioritizing adaptation to non-stationary environments. In distinction to existing rewiring approaches that rely on pruning or dynamic routing, which may limit network capacity and plasticity, this work presents a novel rewiring scheme by permuting hidden neurons. Specifically, the neuron permutation is parameterized to be end-to-end learnable and can rearrange all available synapses to explore a large span of weight space, thereby promoting adaptivity. In addition, we introduce two main designs to steer the rewiring process in continual reinforcement learning: first, a multi-mode rewiring strategy is proposed which diversifies the policy and encourages exploration when encountering new environments. Secondly, to ensure stability on history tasks, the network is devised to cache each learned wiring while subtly updating its weights, allowing for retrospective recovery of any previous state appropriate for the task. Meanwhile, an alignment mechanism is curated to achieve better plasticity-stability tradeoff by jointly optimizing cached wirings and weights. Our proposed method is comprehensively evaluated on 18 continual reinforcement learning scenarios ranging from locomotion to manipulation, demonstrating its advantages over state-of-the-art competitors in performance-efficiency tradeoffs. Code is available at https://github.com/feifeiobama/RewireNeuron.", "keywords": "continual learning;reinforcement learning;brain-inspired learning", "primary_area": "", "supplementary_material": "/attachment/f46287ba07e648c1a3d91ec1b2c393adc5bcadd0.pdf", "author": "Zhicheng Sun;Yadong MU", "authorids": "~Zhicheng_Sun1;~Yadong_MU1", "gender": "M;M", "homepage": "https://feifeiobama.github.io;http://www.muyadong.com/", "dblp": "331/1484-1;55/1817", "google_scholar": "Xa8dgkYAAAAJ;https://scholar.google.com.tw/citations?user=Fqqx4HsAAAAJ", "orcid": ";", "linkedin": "zhicheng-sun;", "or_profile": "~Zhicheng_Sun1;~Yadong_MU1", "aff": "Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nsun2023rewiring,\ntitle={Rewiring Neurons in Non-Stationary Environments},\nauthor={Zhicheng Sun and Yadong MU},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=t7ozN4AXd0}\n}", "github": "", "project": "", "reviewers": "NgVj;cvgK;CtSV;AnPE;nWMB", "pdf_size": 488752, "rating": "5;6;6;7;8", "confidence": "3;4;3;3;4", "soundness": "2;3;1;3;4", "novelty": "2;3;3;3;4", "presentation": "3;2;2;3;3", "wc_summary": "119;95;61;109;156", "wc_strengths": "37;59;96;21;150", "wc_weaknesses": "211;59;39;25;351", "wc_questions": "59;138;56;249;40", "wc_limitations": "9;14;113;31;19", "wc_review": "435;365;365;435;716", "wc_reply_reviewers": "161;27;97;18;21", "wc_reply_authors": "325;0;0;0;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 1.019803902718557 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 108.0, 30.996774025694997 ], "wc_strengths_avg": [ 72.6, 46.15885613834035 ], "wc_weaknesses_avg": [ 137.0, 126.09837429562683 ], "wc_questions_avg": [ 108.4, 78.11427526387223 ], "wc_limitations_avg": [ 37.2, 38.597409239481344 ], "wc_review_avg": [ 463.2, 130.2188926385108 ], "wc_reply_reviewers_avg": [ 64.8, 56.26508686565764 ], "wc_reply_authors_avg": [ 65.0, 130.0 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4803844614152616, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3749760987564337406&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;pku.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Cheap and Quick: Efficient Vision-Language Instruction Tuning for Large Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70226", "id": "t877958UGZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5e84e4413268b713f0d4a1b23a9dae57-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=t877958UGZ", "openreview": "https://openreview.net/forum?id=t877958UGZ", "poster": "/media/PosterPDFs/NeurIPS%202023/70226.png?t=1701960806.1197474", "slides": "https://nips.cc/virtual/2023/poster/70226", "video": "https://nips.cc/virtual/2023/poster/70226", "author_site": "Gen Luo, Yiyi Zhou, Tianhe Ren, Shengxin Chen, Xiaoshuai Sun, Rongrong Ji", "tldr": "", "abstract": "Recently, growing interest has been aroused in extending the multimodal capability of large language models (LLMs), e.g., vision-language (VL) learning, which is regarded as the next milestone of artificial general intelligence. However, existing solutions are prohibitively expensive, which not only need to optimize excessive parameters, but also require another large-scale pre-training before VL instruction tuning. In this paper, we propose a novel and affordable solution for the effective VL adaption of LLMs, called Mixture-of-Modality Adaptation (MMA). Instead of using large neural networks to connect the image encoder and LLM, MMA adopts lightweight modules, i.e., adapters, to bridge the gap between LLMs and VL tasks, which also enables the joint optimization of the image and language models. Meanwhile, MMA is also equipped with a routing algorithm to help LLMs achieve an automatic shift between single- and multi-modal instructions without compromising their ability of natural language understanding. To validate MMA, we apply it to a recent LLM called LLaMA and term this formed large vision-language instructed model as LaVIN. To validate MMA and LaVIN, we conduct extensive experiments under two setups, namely multimodal science question answering and multimodal dialogue. The experimental results not only demonstrate the competitive performance and the superior training efficiency of LaVIN than existing multimodal LLMs, but also confirm its great potential as a general-purpose chatbot. More importantly, the actual expenditure of LaVIN is extremely cheap, e.g., only 1.4 training hours with 3.8M trainable parameters, greatly confirming the effectiveness of MMA. Our code is anonymously released at: https://anonymous.4open.science/r/LaVIN--1067.", "keywords": "vision-language instruction tuning;multimodal LLM;efficient training", "primary_area": "", "supplementary_material": "/attachment/c0224ca92937f6fed074f15d1f29a599e0a91f9a.pdf", "author": "Gen Luo;Yiyi Zhou;Tianhe Ren;Shengxin Chen;Xiaoshuai Sun;Rongrong Ji", "authorids": "~Gen_Luo1;~Yiyi_Zhou1;~Tianhe_Ren1;~Shengxin_Chen1;~Xiaoshuai_Sun3;~Rongrong_Ji5", "gender": "M;;M;M;M;M", "homepage": ";https://rentainhe.github.io/;https://scholar.google.com/citations?user=xJQnggMAAAAJ&hl=zh-CN;https://sites.google.com/view/xssun;http://mac.xmu.edu.cn/rrji-en.html;https://github.com/luogen1996", "dblp": "174/0086;;;26/5787.html;86/5681;195/2078", "google_scholar": "w3_2ep0AAAAJ;cW4ILs0AAAAJ;xJQnggMAAAAJ;KPMK3B4AAAAJ;;EyZqU9gAAAAJ", "orcid": ";;;0000-0003-3912-9306;;", "linkedin": ";;;;;", "or_profile": "~Yiyi_Zhou1;~Tianhe_Ren1;~Shengxin_Chen1;~Xiaoshuai_Sun3;~Rongrong_Ji5;~Gen_Luogen1", "aff": "Xiamen University;The International Digital Economy Academy;Xiamen University;Xiamen University;Xiamen University;Xiamen University", "aff_domain": "xmu.edu.cn;idea.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn", "position": "Associate Professor;Researcher;MS student;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nluo2023cheap,\ntitle={Cheap and Quick: Efficient Vision-Language Instruction Tuning for Large Language Models},\nauthor={Gen Luo and Yiyi Zhou and Tianhe Ren and Shengxin Chen and Xiaoshuai Sun and Rongrong Ji},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=t877958UGZ}\n}", "github": "", "project": "", "reviewers": "mD9H;3xnb;4c4u;mWDr;q2tp", "pdf_size": 2905277, "rating": "4;5;6;6;7", "confidence": "5;3;3;4;3", "soundness": "3;2;3;4;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "74;70;79;134;66", "wc_strengths": "56;63;49;47;45", "wc_weaknesses": "199;9;158;76;69", "wc_questions": "1;104;4;24;1", "wc_limitations": "3;1;1;13;1", "wc_review": "333;247;291;294;182", "wc_reply_reviewers": "93;0;15;94;0", "wc_reply_authors": "134;0;0;77;0", "reply_reviewers": "1;0;1;3;0", "reply_authors": "2;1;1;4;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 84.6, 25.07269431074371 ], "wc_strengths_avg": [ 52.0, 6.6332495807108 ], "wc_weaknesses_avg": [ 102.2, 67.76252651724255 ], "wc_questions_avg": [ 26.8, 39.54440541973037 ], "wc_limitations_avg": [ 3.8, 4.66476151587624 ], "wc_review_avg": [ 269.4, 51.492135321813954 ], "wc_reply_reviewers_avg": [ 40.4, 43.70171621343949 ], "wc_reply_authors_avg": [ 42.2, 54.73719028229345 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 1.8, 1.1661903789690602 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6864064729836441, "gs_citation": 136, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4819532122205705554&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "xmu.edu.cn;idea.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Xiamen University;International Digital Economy Academy", "aff_unique_dep": ";", "aff_unique_url": "https://www.xmu.edu.cn;", "aff_unique_abbr": "XMU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;Unknown" }, { "title": "Uncertainty Estimation for Safety-critical Scene Segmentation via Fine-grained Reward Maximization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70225", "id": "t9Swbo82dB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/71ec377d5df1fc61ee7770857820519b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=t9Swbo82dB", "openreview": "https://openreview.net/forum?id=t9Swbo82dB", "poster": "/media/PosterPDFs/NeurIPS%202023/70225.png?t=1701927016.2188444", "slides": "https://nips.cc/virtual/2023/poster/70225", "video": "https://nips.cc/virtual/2023/poster/70225", "author_site": "Hongzheng Yang, Cheng Chen, Yueyao CHEN, Scheppach, Hon Chi Yip, DOU QI", "tldr": "", "abstract": "Uncertainty estimation plays an important role for future reliable deployment of deep segmentation models in safety-critical scenarios such as medical applications. However, existing methods for uncertainty estimation have been limited by the lack of explicit guidance for calibrating the prediction risk and model confidence. In this work, we propose a novel fine-grained reward maximization (FGRM) framework, to address uncertainty estimation by directly utilizing an uncertainty metric related reward function with a reinforcement learning based model tuning algorithm. This would benefit the model uncertainty estimation with direct optimization guidance for model calibration. Specifically, our method designs a new uncertainty estimation reward function using the calibration metric, which is maximized to fine-tune an evidential learning pre-trained segmentation model for calibrating prediction risk. Importantly, we innovate an effective fine-grained parameter update scheme, which imposes fine-grained reward-weighting of each network parameter according to the parameter importance quantified by the fisher information matrix. To the best of our knowledge, this is the first work exploring reward optimization for model uncertainty estimation in safety-critical vision tasks. The effectiveness of our method is demonstrated on two large safety-critical surgical scene segmentation datasets under two different uncertainty estimation settings. With real-time one forward pass at inference, our method outperforms state-of-the-art methods by a clear margin on all the calibration metrics of uncertainty estimation, while maintaining a high task accuracy for the segmentation results. Code is available at https://github.com/med-air/FGRM.", "keywords": "uncertainty estimation;semantic segmentation;medical application", "primary_area": "", "supplementary_material": "/attachment/72a72c25c0abe864d54cfed6407b80528e5d1601.pdf", "author": "Hongzheng Yang;Cheng Chen;Yueyao Chen;Markus Scheppach;Hon Chi Yip;Qi Dou", "authorids": "~Hongzheng_Yang1;~Cheng_Chen1;yueyaochen0823@gmail.com;markus.scheppach@uk-augsburg.de;hcyip@surgery.cuhk.edu.hk;~Qi_Dou2", "gender": ";F;;;;F", "homepage": ";https://cchen-cc.github.io/;;;;https://www.cse.cuhk.edu.hk/~qdou", "dblp": ";10/217-13;;;;165/7846", "google_scholar": ";https://scholar.google.com.hk/citations?user=bRe3FlcAAAAJ;;;;https://scholar.google.com.hk/citations?user=iHh7IJQAAAAJ", "orcid": ";;;;;0000-0002-3416-9950", "linkedin": ";;;;;", "or_profile": "~Hongzheng_Yang1;~Cheng_Chen1;yueyaochen0823@gmail.com;markus.scheppach@uk-augsburg.de;hcyip@surgery.cuhk.edu.hk;~Qi_Dou2", "aff": ";Harvard Medical School;;;;The Chinese University of Hong Kong", "aff_domain": ";harvard.edu;;;;cuhk.edu.hk", "position": ";Postdoc;;;;Assistant Professor", "bibtex": "@inproceedings{\nyang2023uncertainty,\ntitle={Uncertainty Estimation for Safety-critical Scene Segmentation via Fine-grained Reward Maximization},\nauthor={Hongzheng Yang and Cheng Chen and Yueyao Chen and Markus Scheppach and Hon Chi Yip and Qi Dou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=t9Swbo82dB}\n}", "github": "", "project": "", "reviewers": "jxwz;DNkz;iwgk;mEtW;WcWT", "pdf_size": 3677628, "rating": "4;5;6;7;8", "confidence": "4;5;3;2;3", "soundness": "2;4;3;3;4", "novelty": "2;3;3;3;4", "presentation": "3;4;3;2;3", "wc_summary": "62;120;99;146;140", "wc_strengths": "35;144;63;80;127", "wc_weaknesses": "51;35;40;390;62", "wc_questions": "3;45;88;27;142", "wc_limitations": "93;84;46;52;1", "wc_review": "244;428;336;695;472", "wc_reply_reviewers": "194;0;17;223;180", "wc_reply_authors": "987;0;0;95;201", "reply_reviewers": "1;0;1;1;1", "reply_authors": "3;1;1;2;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 113.4, 30.539155194602223 ], "wc_strengths_avg": [ 89.8, 40.34550780446319 ], "wc_weaknesses_avg": [ 115.6, 137.5159627097887 ], "wc_questions_avg": [ 61.0, 49.12433205652775 ], "wc_limitations_avg": [ 55.2, 32.529371343448986 ], "wc_review_avg": [ 435.0, 151.85519418182574 ], "wc_reply_reviewers_avg": [ 122.8, 94.50375653909214 ], "wc_reply_authors_avg": [ 256.6, 372.6599522352784 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6933752452815363, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3919517287126481390&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";harvard.edu;;;;cuhk.edu.hk", "author_num": 6, "aff_unique_index": "0;1", "aff_unique_norm": "Harvard University;Chinese University of Hong Kong", "aff_unique_dep": "Medical School;", "aff_unique_url": "https://hms.harvard.edu;https://www.cuhk.edu.hk", "aff_unique_abbr": "HMS;CUHK", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Boston;Hong Kong SAR", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "title": "A Bounded Ability Estimation for Computerized Adaptive Testing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70224", "id": "tAwjG5bM7H", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0730b81dbc16cce7e85b519cb7fe5a8d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tAwjG5bM7H", "openreview": "https://openreview.net/forum?id=tAwjG5bM7H", "poster": "/media/PosterPDFs/NeurIPS%202023/70224.png?t=1699842250.05479", "slides": "https://nips.cc/virtual/2023/poster/70224", "video": "https://nips.cc/virtual/2023/poster/70224", "author_site": "Yan Zhuang, Qi Liu, Guanhao Zhao, Zhenya Huang, Weizhe Huang, Zachary Pardos, Enhong Chen, Jinze Wu, Xin Li", "tldr": "", "abstract": "Computerized adaptive testing (CAT), as a tool that can efficiently measure student's ability, has been widely used in various standardized tests (e.g., GMAT and GRE). The adaptivity of CAT refers to the selection of the most informative questions for each student, reducing test length. Existing CAT methods do not explicitly target ability estimation accuracy since there is no student's true ability as ground truth; therefore, these methods cannot be guaranteed to make the estimate converge to the true with such limited responses. In this paper, we analyze the statistical properties of estimation and find a theoretical approximation of the true ability: the ability estimated by full responses to question bank. Based on this, a Bounded Ability Estimation framework for CAT (BECAT) is proposed in a data-summary manner, which selects a question subset that closely matches the gradient of the full responses. Thus, we develop an expected gradient difference approximation to design a simple greedy selection algorithm, and show the rigorous theoretical and error upper-bound guarantees of its ability estimate. Experiments on both real-world and synthetic datasets, show that it can reach the same estimation accuracy using 15\\% less questions on average, significantly reducing test length.", "keywords": "adaptive learning;computerized adaptive testing;educational measurement;cognitive diagnosis", "primary_area": "", "supplementary_material": "", "author": "Yan Zhuang;Qi Liu;GuanHao Zhao;Zhenya Huang;Weizhe Huang;Zachary Pardos;Enhong Chen;Jinze Wu;Xin Li", "authorids": "~Yan_Zhuang4;~Qi_Liu3;~GuanHao_Zhao1;~Zhenya_Huang2;~Weizhe_Huang1;~Zachary_Pardos1;~Enhong_Chen1;~Jinze_Wu1;~Xin_Li56", "gender": "M;M;M;M;M;;M;;M", "homepage": "http://home.ustc.edu.cn/~zykb/;http://staff.ustc.edu.cn/~qiliuql/;https://base.ustc.edu.cn/;http://staff.ustc.edu.cn/~huangzhy/;https://github.com/weizhehuang0827;https://gse.berkeley.edu/zachary-pardos;http://staff.ustc.edu.cn/~cheneh;;https://www.scopus.com/authid/detail.uri?authorId=57196399539", "dblp": ";95/2446-3;;178/8690;;45/6140.html;07/258;;09/1365-64", "google_scholar": "7MX_P5cAAAAJ;5EoHAFwAAAAJ;;dVZuU90AAAAJ;;OQvFtBMAAAAJ;Q9h02J0AAAAJ;;", "orcid": "0000-0001-7351-377X;0000-0001-6956-5550;0009-0001-1824-2169;0000-0003-1661-0420;;0000-0002-6016-7051;0000-0002-4835-4102;0000-0001-9957-5733;", "linkedin": ";;;;;zacharypardos/;;;", "or_profile": "~Yan_Zhuang4;~Qi_Liu3;~GuanHao_Zhao1;~Zhenya_Huang2;~Weizhe_Huang1;~Zachary_Pardos1;~Enhong_Chen1;~Jinze_Wu1;~Xin_Li56", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of California, Berkeley;University of Science and Technology of China;;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;berkeley.edu;ustc.edu.cn;;ustc.edu.cn", "position": "PhD student;Full Professor;PhD student;Associate Professor;MS student;Associate Professor;Full Professor;;Assistant Professor", "bibtex": "@inproceedings{\nzhuang2023a,\ntitle={A Bounded Ability Estimation for Computerized Adaptive Testing},\nauthor={Yan Zhuang and Qi Liu and GuanHao Zhao and Zhenya Huang and Weizhe Huang and Zachary Pardos and Enhong Chen and Jinze Wu and Xin Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tAwjG5bM7H}\n}", "github": "", "project": "", "reviewers": "aSHy;DatR;GvrK;KCbW;TJRB", "pdf_size": 1017796, "rating": "4;5;6;6;8", "confidence": "3;3;4;3;5", "soundness": "2;2;3;3;3", "novelty": "3;3;3;3;4", "presentation": "2;3;3;1;3", "wc_summary": "71;39;37;110;85", "wc_strengths": "70;122;12;101;166", "wc_weaknesses": "67;190;37;176;143", "wc_questions": "120;9;161;70;2", "wc_limitations": "8;5;1;68;30", "wc_review": "336;365;248;525;426", "wc_reply_reviewers": "152;174;34;88;121", "wc_reply_authors": "579;98;58;69;138", "reply_reviewers": "1;1;1;1;2", "reply_authors": "2;2;2;2;3", "rating_avg": [ 5.8, 1.32664991614216 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 68.4, 27.796402644946703 ], "wc_strengths_avg": [ 94.2, 51.607751355779875 ], "wc_weaknesses_avg": [ 122.6, 60.38079164767551 ], "wc_questions_avg": [ 72.4, 61.80161810179407 ], "wc_limitations_avg": [ 22.4, 24.920674148184677 ], "wc_review_avg": [ 380.0, 92.44025097326382 ], "wc_reply_reviewers_avg": [ 113.8, 49.33315315282411 ], "wc_reply_authors_avg": [ 188.4, 197.25171735627552 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.8668451156610704, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4764018658113961211&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 6, "email": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;berkeley.edu;ustc.edu.cn;;ustc.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;0;0;1;0;0", "aff_unique_norm": "University of Science and Technology of China;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.berkeley.edu", "aff_unique_abbr": "USTC;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Understanding Deep Gradient Leakage via Inversion Influence Functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70223", "id": "tBib2fWr3r", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0c4dd7e3d9f528f0b4f2aca9fbcdca8d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tBib2fWr3r", "openreview": "https://openreview.net/forum?id=tBib2fWr3r", "poster": "/media/PosterPDFs/NeurIPS%202023/70223.png?t=1697511413.2609017", "slides": "https://nips.cc/virtual/2023/poster/70223", "video": "https://nips.cc/virtual/2023/poster/70223", "author_site": "Haobo Zhang, Junyuan Hong, Yuyang Deng, Mehrdad Mahdavi, Jiayu Zhou", "tldr": "", "abstract": "Deep Gradient Leakage (DGL) is a highly effective attack that recovers private training images from gradient vectors.\nThis attack casts significant privacy challenges on distributed learning from clients with sensitive data, where clients are required to share gradients.\n Defending against such attacks requires but lacks an understanding of when and how privacy leakage happens, mostly because of the black-box nature of deep networks.\n In this paper, we propose a novel Inversion Influence Function (I$^2$F) that establishes a closed-form connection between the recovered images and the private gradients by implicitly solving the DGL problem.\n Compared to directly solving DGL, I$^2$F is scalable for analyzing deep networks, requiring only oracle access to gradients and Jacobian-vector products.\n We empirically demonstrate that I$^2$F effectively approximated the DGL generally on different model architectures, datasets, modalities, attack implementations, and perturbation-based defenses.\n With this novel tool, we provide insights into effective gradient perturbation directions, the unfairness of privacy protection, and privacy-preferred model initialization.\n Our codes are provided in https://github.com/illidanlab/inversion-influence-function.", "keywords": "Deep Learning;Privacy;Federated Learning;Influence Function", "primary_area": "", "supplementary_material": "/attachment/95b66649cf6b2dcbd31452556201aacc2b986416.pdf", "author": "Haobo Zhang;Junyuan Hong;Yuyang Deng;Mehrdad Mahdavi;Jiayu Zhou", "authorids": "~Haobo_Zhang1;~Junyuan_Hong1;~Yuyang_Deng3;~Mehrdad_Mahdavi2;~Jiayu_Zhou1", "gender": ";M;M;M;M", "homepage": ";https://jyhong.gitlab.io/;https://sites.psu.edu/yuyangdeng/;http://www.cse.psu.edu/~mzm616/;http://jiayuzhou.github.io/", "dblp": ";185/1316;261/9253;88/4321;73/1353", "google_scholar": ";7Cbv6doAAAAJ;bfV3XWUAAAAJ;HzxnwocAAAAJ;https://scholar.google.com.tw/citations?user=yQKlLTQAAAAJ", "orcid": ";0000-0002-5718-5187;;;0000-0003-4336-6777", "linkedin": ";;;;jiayuzhou/", "or_profile": "~Haobo_Zhang1;~Junyuan_Hong1;~Yuyang_Deng3;~Mehrdad_Mahdavi2;~Jiayu_Zhou1", "aff": ";Michigan State University;Pennsylvania State University;Toyota Technological Institute at Chicago;Michigan State University", "aff_domain": ";msu.edu;psu.edu;ttic.edu;msu.edu", "position": ";PhD student;PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nzhang2023understanding,\ntitle={Understanding Deep Gradient Leakage via Inversion Influence Functions},\nauthor={Haobo Zhang and Junyuan Hong and Yuyang Deng and Mehrdad Mahdavi and Jiayu Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tBib2fWr3r}\n}", "github": "", "project": "", "reviewers": "VRVE;BBkr;X287;FFT2;NgNT", "pdf_size": 1420846, "rating": "6;6;7;7;8", "confidence": "4;3;4;4;4", "soundness": "3;3;3;3;3", "novelty": "3;3;2;3;4", "presentation": "3;2;3;2;4", "wc_summary": "107;62;172;45;123", "wc_strengths": "50;55;107;49;176", "wc_weaknesses": "56;269;216;178;142", "wc_questions": "25;82;102;34;1", "wc_limitations": "6;12;13;115;37", "wc_review": "244;480;610;421;479", "wc_reply_reviewers": "288;67;65;19;41", "wc_reply_authors": "608;209;153;32;34", "reply_reviewers": "2;2;1;1;1", "reply_authors": "3;4;3;2;2", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 101.8, 45.19911503558449 ], "wc_strengths_avg": [ 87.4, 49.30963394713045 ], "wc_weaknesses_avg": [ 172.2, 71.74510436259746 ], "wc_questions_avg": [ 48.8, 37.42405643433111 ], "wc_limitations_avg": [ 36.6, 40.60837352074077 ], "wc_review_avg": [ 446.8, 118.79966329918616 ], "wc_reply_reviewers_avg": [ 96.0, 97.59098319004681 ], "wc_reply_authors_avg": [ 207.2, 211.78989588741007 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5345224838248487, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4972266754254378950&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 11, "email": ";msu.edu;psu.edu;ttic.edu;msu.edu", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Michigan State University;Pennsylvania State University;Toyota Technological Institute at Chicago", "aff_unique_dep": ";;", "aff_unique_url": "https://www.msu.edu;https://www.psu.edu;https://www.tti-chicago.org", "aff_unique_abbr": "MSU;PSU;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Replicable Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70222", "id": "tBwRbgsol1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/313829757739365201b5adb3a1cbd9bd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tBwRbgsol1", "openreview": "https://openreview.net/forum?id=tBwRbgsol1", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70222", "video": "https://nips.cc/virtual/2023/poster/70222", "author_site": "Eric Eaton, Marcel Hussing, Michael Kearns, Jessica Sorrell", "tldr": "", "abstract": "The replicability crisis in the social, behavioral, and data sciences has led to the formulation of algorithm frameworks for replicability --- i.e., a requirement that an algorithm produce identical outputs (with high probability) when run on two different samples from the same underlying distribution. While still in its infancy, provably replicable algorithms have been developed for many fundamental tasks in machine learning and statistics, including statistical query learning, the heavy hitters problem, and distribution testing. In this work we initiate the study of replicable reinforcement learning, providing a provably replicable algorithm for parallel value iteration, and a provably replicable version of R-Max in the episodic setting. These are the first formal replicability results for control problems, which present different challenges for replication than batch learning settings.", "keywords": "Reinforcement Learning;Learning Theory;Replicability;Reproducibility", "primary_area": "", "supplementary_material": "/attachment/d6d99ac9e216f9897099f6182bd04a5478ab12ac.zip", "author": "ERIC EATON;Marcel Hussing;Michael Kearns;Jessica Sorrell", "authorids": "~ERIC_EATON1;~Marcel_Hussing1;~Michael_Kearns2;~Jessica_Sorrell1", "gender": ";M;F;", "homepage": ";https://marcelhussing.github.io/;https://jess-sorrell.github.io/;https://www.cis.upenn.edu/~mkearns/", "dblp": "22/2336;250/2621;222/3271;78/6858", "google_scholar": "QIZWnnQAAAAJ;LEDpvJEAAAAJ;hBsSfjAAAAAJ;8iQk0DIAAAAJ", "orcid": ";;;", "linkedin": ";marcel-hussing-628264158/;;", "or_profile": "~ERIC_EATON1;~Marcel_Hussing1;~Jessica_Sorrell1;~Michael_J._Kearns1", "aff": "University of Pennsylvania;School of Engineering and Applied Science, University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;seas.upenn.edu;upenn.edu;upenn.edu", "position": "Faculty;PhD student;Postdoc;Professor", "bibtex": "@inproceedings{\neaton2023replicable,\ntitle={Replicable Reinforcement Learning},\nauthor={ERIC EATON and Marcel Hussing and Michael Kearns and Jessica Sorrell},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tBwRbgsol1}\n}", "github": "", "project": "", "reviewers": "fsAq;BN9y;u4VH;vrU3", "pdf_size": 543837, "rating": "4;4;6;7", "confidence": "4;3;3;2", "soundness": "3;2;3;3", "novelty": "2;2;2;3", "presentation": "2;2;3;3", "wc_summary": "54;27;32;81", "wc_strengths": "47;30;31;33", "wc_weaknesses": "90;102;6;23", "wc_questions": "160;5;171;26", "wc_limitations": "18;30;17;23", "wc_review": "369;194;257;186", "wc_reply_reviewers": "0;16;382;14", "wc_reply_authors": "0;0;1015;0", "reply_reviewers": "0;1;2;1", "reply_authors": "1;1;3;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 48.5, 21.33658829335187 ], "wc_strengths_avg": [ 35.25, 6.869315832017043 ], "wc_weaknesses_avg": [ 55.25, 41.408785299740444 ], "wc_questions_avg": [ 90.5, 75.46688015281936 ], "wc_limitations_avg": [ 22.0, 5.1478150704935 ], "wc_review_avg": [ 251.5, 73.20006830597906 ], "wc_reply_reviewers_avg": [ 103.0, 161.19863522995473 ], "wc_reply_authors_avg": [ 253.75, 439.5078924206026 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3762210296081187504&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "upenn.edu;seas.upenn.edu;upenn.edu;upenn.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "On the Power of SVD in the Stochastic Block Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70221", "id": "tC0r8duG9z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/678594bcff6f99f3b7a8ff459989b1a3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tC0r8duG9z", "openreview": "https://openreview.net/forum?id=tC0r8duG9z", "poster": "/media/PosterPDFs/NeurIPS%202023/70221.png?t=1701308056.7050548", "slides": "https://nips.cc/virtual/2023/poster/70221", "video": "https://nips.cc/virtual/2023/poster/70221", "author_site": "Xinyu Mao, Jiapeng Zhang", "tldr": "", "abstract": "A popular heuristic method for improving clustering results is to apply dimensionality reduction before running clustering algorithms.\nIt has been observed that spectral-based dimensionality reduction tools, such as PCA or SVD, improve the performance of clustering algorithms in many applications. This phenomenon indicates that spectral method not only serves as a dimensionality reduction tool, but also contributes to the clustering procedure in some sense. It is an interesting question to understand the behavior of spectral steps in clustering problems.\n\nAs an initial step in this direction, this paper studies the power of vanilla-SVD algorithm in the stochastic block model (SBM). We show that, in the symmetric setting, vanilla-SVD algorithm recovers all clusters correctly. This result answers an open question posed by Van Vu (Combinatorics Probability and Computing, 2018) in the symmetric setting.", "keywords": "Clustering Algorithms;Stochastic Block Model;Spectral Algorithms", "primary_area": "", "supplementary_material": "/attachment/d6eb7947b3769dbdd5bac913381ac2763f7d3332.pdf", "author": "Xinyu Mao;Jiapeng Zhang", "authorids": "~Xinyu_Mao1;~Jiapeng_Zhang2", "gender": "M;M", "homepage": "https://sparkmxy.github.io/;https://sites.google.com/site/jiapeng0708/home", "dblp": "49/8919;38/9461", "google_scholar": ";9eQOP14AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Xinyu_Mao1;~Jiapeng_Zhang2", "aff": "University of Southern California;University of Southern California", "aff_domain": "usc.edu;usc.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nmao2023on,\ntitle={On the Power of {SVD} in the Stochastic Block Model},\nauthor={Xinyu Mao and Jiapeng Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tC0r8duG9z}\n}", "github": "", "project": "", "reviewers": "Qju9;89Uu;b4S2;bCxf;qMce", "pdf_size": 403533, "rating": "5;5;7;7;7", "confidence": "2;4;3;2;3", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;3", "presentation": "2;3;2;2;4", "wc_summary": "37;102;38;42;92", "wc_strengths": "79;183;110;42;123", "wc_weaknesses": "222;23;158;71;243", "wc_questions": "108;211;40;52;95", "wc_limitations": "7;15;12;41;5", "wc_review": "453;534;358;248;558", "wc_reply_reviewers": "10;0;9;19;0", "wc_reply_authors": "18;0;18;15;0", "reply_reviewers": "1;0;1;1;0", "reply_authors": "2;1;2;2;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 62.2, 28.63843571147 ], "wc_strengths_avg": [ 107.4, 47.008935320851506 ], "wc_weaknesses_avg": [ 143.4, 84.91077670119383 ], "wc_questions_avg": [ 101.2, 60.509172858336115 ], "wc_limitations_avg": [ 16.0, 12.992305415129373 ], "wc_review_avg": [ 430.2, 114.92327875587262 ], "wc_reply_reviewers_avg": [ 7.6, 7.116178749862878 ], "wc_reply_authors_avg": [ 10.2, 8.399999999999999 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.21821789023599233, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13622426300326433356&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "usc.edu;usc.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Robust and Opponent-Aware League Training Method for StarCraft II", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70220", "id": "tDAu3FPJn9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/94796017d01c5a171bdac520c199d9ed-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tDAu3FPJn9", "openreview": "https://openreview.net/forum?id=tDAu3FPJn9", "poster": "/media/PosterPDFs/NeurIPS%202023/70220.png?t=1699430567.3653083", "slides": "https://nips.cc/virtual/2023/poster/70220", "video": "https://nips.cc/virtual/2023/poster/70220", "author_site": "Ruozi Huang, Xipeng Wu, Hongsheng Yu, Zhong Fan, Haobo Fu, Qiang Fu, Wei Yang", "tldr": "", "abstract": "It is extremely difficult to train a superhuman Artificial Intelligence (AI) for games of similar size to StarCraft II. AlphaStar is the first AI that beat human professionals in the full game of StarCraft II, using a league training framework that is inspired by a game-theoretic approach. In this paper, we improve AlphaStar's league training in two significant aspects. We train goal-conditioned exploiters, whose abilities of spotting weaknesses in the main agent and the entire league are greatly improved compared to the unconditioned exploiters in AlphaStar. In addition, we endow the agents in the league with the new ability of opponent modeling, which makes the agent more responsive to the opponent's real-time strategy. Based on these improvements, we train a better and superhuman AI with orders of magnitude less resources than AlphaStar (see Table 1 for a full comparison). Considering the iconic role of StarCraft II in game AI research, we believe our method and results on StarCraft II provide valuable design principles on how one would utilize the general league training framework for obtaining a least-exploitable strategy in various, large-scale, real-world games.", "keywords": "StarCraft II;league training;AlphaStar;opponent-modeling;reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/580d21403f369f103a3689c61332f730ee128a14.zip", "author": "Ruozi Huang;Xipeng Wu;Hongsheng Yu;Zhong Fan;Haobo Fu;QIANG FU;Yang Wei", "authorids": "~Ruozi_Huang1;~Xipeng_Wu1;~Hongsheng_Yu1;~Zhong_Fan1;~Haobo_Fu2;~QIANG_FU8;~Yang_Wei2", "gender": ";M;;;M;M;M", "homepage": "https://github.com/rose1111111;;;;;;", "dblp": ";255/6144;35/10189.html;;85/8571;;03/1094-32.html", "google_scholar": ";;;;LFdJXNcAAAAJ;gANaxT0AAAAJ;", "orcid": ";;;;;;", "linkedin": ";https://www.linkedin.com/public-profile/settings?trk=d_flagship3_profile_self_view_public_profile;yhs-17ba75111/;;haobo-fu-382b0784/;;", "or_profile": "~Ruozi_Huang1;~Xipeng_Wu1;~Hongsheng_Yu1;~Zhong_Fan1;~Haobo_Fu2;~QIANG_FU8;~Yang_Wei2", "aff": "Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab", "aff_domain": "tencent.com;tencent.com;tencent.com;;tencent.com;tencent.com;tencent.com", "position": "Researcher;Researcher;Researcher;;Principal Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nhuang2023a,\ntitle={A Robust and Opponent-Aware League Training Method for StarCraft {II}},\nauthor={Ruozi Huang and Xipeng Wu and Hongsheng Yu and Zhong Fan and Haobo Fu and QIANG FU and Yang Wei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tDAu3FPJn9}\n}", "github": "", "project": "", "reviewers": "eEEJ;zEct;sqN1;Xpov", "pdf_size": 5435629, "rating": "6;7;7;8", "confidence": "4;4;4;4", "soundness": "3;3;4;4", "novelty": "2;3;4;4", "presentation": "3;3;4;3", "wc_summary": "162;152;39;117", "wc_strengths": "347;22;24;112", "wc_weaknesses": "80;264;88;72", "wc_questions": "345;16;40;43", "wc_limitations": "102;2;9;47", "wc_review": "1036;456;200;391", "wc_reply_reviewers": "85;22;97;0", "wc_reply_authors": "11;8;23;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 117.5, 48.303726564313855 ], "wc_strengths_avg": [ 126.25, 132.52994944539896 ], "wc_weaknesses_avg": [ 126.0, 79.87490219086344 ], "wc_questions_avg": [ 111.0, 135.50461246762046 ], "wc_limitations_avg": [ 40.0, 39.67996975805299 ], "wc_review_avg": [ 520.75, 312.0059093991651 ], "wc_reply_reviewers_avg": [ 51.0, 40.96950085124299 ], "wc_reply_authors_avg": [ 10.5, 8.261355820929152 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1110391596020491891&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 3, "email": "tencent.com;tencent.com;tencent.com;;tencent.com;tencent.com;tencent.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Tencent", "aff_unique_dep": "Tencent AI Lab", "aff_unique_url": "https://ai.tencent.com", "aff_unique_abbr": "Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Doubly Constrained Fair Clustering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70219", "id": "tECyQO1QOp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2ab87e2179b8ea209b52463802d62560-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tECyQO1QOp", "openreview": "https://openreview.net/forum?id=tECyQO1QOp", "poster": "/media/PosterPDFs/NeurIPS%202023/70219.png?t=1702184949.5627458", "slides": "https://nips.cc/virtual/2023/poster/70219", "video": "https://nips.cc/virtual/2023/poster/70219", "author_site": "John Dickerson, Seyed Esmaeili, Jamie Morgenstern, Claire Jie Zhang", "tldr": "", "abstract": "The remarkable attention which fair clustering has received in the last few years has resulted in a significant number of different notions of fairness. Despite the fact that these notions are well-justified, they are often motivated and studied in a disjoint manner where one fairness desideratum is considered exclusively in isolation from the others. This leaves the understanding of the relations between different fairness notions as an important open problem in fair clustering. In this paper, we take the first step in this direction. Specifically, we consider the two most prominent demographic representation fairness notions in clustering: (1) Group Fairness ($\\textbf{GF}$), where the different demographic groups are supposed to have close to population-level representation in each cluster and (2) Diversity in Center Selection ($\\textbf{DS}$), where the selected centers are supposed to have close to population-level representation of each group. We show that given a constant approximation algorithm for one constraint ($\\textbf{GF}$ or $\\textbf{DS}$ only) we can obtain a constant approximation solution that satisfies both constraints simultaneously. Interestingly, we prove that any given solution that satisfies the $\\textbf{GF}$ constraint can always be post-processed at a bounded degradation to the clustering cost to additionally satisfy the $\\textbf{DS}$ constraint while the same statement is not true given a solution that satisfies $\\textbf{DS}$ instead. Furthermore, we show that both $\\textbf{GF}$ and $\\textbf{DS}$ are incompatible (having an empty feasibility set in the worst case) with a collection of other distance-based fairness notions. Finally, we carry experiments to validate our theoretical findings.", "keywords": "Fairness;Clustering;Approximation Algorithms", "primary_area": "", "supplementary_material": "/attachment/f0c8ee81cc6fbbe41b42f6f8cd1cd4ff8fa8fee0.zip", "author": "John P Dickerson;Seyed A. Esmaeili;Jamie Heather Morgenstern;Claire Jie Zhang", "authorids": "~John_P_Dickerson1;~Seyed_A._Esmaeili1;~Jamie_Heather_Morgenstern1;~Claire_Jie_Zhang1", "gender": "M;M;;F", "homepage": "https://jpdickerson.com/;https://sa-esmaeili.github.io/;http://jamiemorgenstern.com;https://sites.google.com/view/claire-zhang", "dblp": "75/8479;128/4703;64/8610;84/6889", "google_scholar": "https://scholar.google.com.tw/citations?user=QgDpfCQAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;1GO0T1MAAAAJ", "orcid": "0000-0003-2231-680X;;;", "linkedin": "john-dickerson-83a74a7/;;;", "or_profile": "~John_P_Dickerson1;~Seyed_A._Esmaeili1;~Jamie_Heather_Morgenstern1;~Claire_Jie_Zhang1", "aff": "Optimized Markets, Inc;University of Maryland, College Park;;Department of Computer Science, University of Washington", "aff_domain": "optimizedmarkets.com;umd.edu;;cs.washington.edu", "position": "Consultant;PhD student;;PhD student", "bibtex": "@inproceedings{\ndickerson2023doubly,\ntitle={Doubly Constrained Fair Clustering},\nauthor={John P Dickerson and Seyed A. Esmaeili and Jamie Heather Morgenstern and Claire Jie Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tECyQO1QOp}\n}", "github": "", "project": "", "reviewers": "scu4;nxpJ;dBjW;Q947", "pdf_size": 8129512, "rating": "6;6;6;6", "confidence": "5;3;3;5", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;4;3", "wc_summary": "70;216;77;101", "wc_strengths": "44;25;43;19", "wc_weaknesses": "40;12;64;50", "wc_questions": "7;62;65;13", "wc_limitations": "12;1;41;20", "wc_review": "173;316;290;203", "wc_reply_reviewers": "0;21;0;7", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 116.0, 58.86849751777261 ], "wc_strengths_avg": [ 32.75, 10.96300597464035 ], "wc_weaknesses_avg": [ 41.5, 19.04599695474091 ], "wc_questions_avg": [ 36.75, 26.85493436968335 ], "wc_limitations_avg": [ 18.5, 14.637281168304447 ], "wc_review_avg": [ 245.5, 59.188258970846576 ], "wc_reply_reviewers_avg": [ 7.0, 8.573214099741124 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13241780598106201443&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "optimizedmarkets.com;umd.edu;;cs.washington.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Optimized Markets, Inc;University of Maryland;University of Washington", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": ";https://www/umd.edu;https://www.washington.edu", "aff_unique_abbr": ";UMD;UW", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";College Park;Seattle", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Safety Verification of Decision-Tree Policies in Continuous Time", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70218", "id": "tEKBU5XOTw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2f89a23a19d1617e7fb16d4f7a049ce2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tEKBU5XOTw", "openreview": "https://openreview.net/forum?id=tEKBU5XOTw", "poster": "/media/PosterPDFs/NeurIPS%202023/70218.png?t=1699542887.6876905", "slides": "https://nips.cc/virtual/2023/poster/70218", "video": "https://nips.cc/virtual/2023/poster/70218", "author_site": "Christian Schilling, Anna Lukina, Emir Demirovi\u0107, Kim Larsen", "tldr": "", "abstract": "Decision trees have gained popularity as interpretable surrogate models for learning-based control policies. However, providing safety guarantees for systems controlled by decision trees is an open challenge. We show that the problem is undecidable even for systems with the simplest dynamics, and PSPACE-complete for finite-horizon properties. The latter can be verified for discrete-time systems via bounded model checking. However, for continuous-time systems, such an approach requires discretization, thereby weakening the guarantees for the original system. This paper presents the first algorithm to directly verify decision-tree controlled system in continuous time. The key aspect of our method is exploiting the decision-tree structure to propagate a set-based approximation through the decision nodes. We demonstrate the effectiveness of our approach by verifying safety of several decision trees distilled to imitate neural-network policies for nonlinear systems.", "keywords": "safety verification;decision tree;reinforcement learning;controller;continuous time", "primary_area": "", "supplementary_material": "", "author": "Christian Schilling;Anna Lukina;Emir Demirovi\u0107;Kim Guldstrand Larsen", "authorids": "~Christian_Schilling1;a.lukina@tudelft.nl;~Emir_Demirovi\u01071;kgl@cs.aau.dk", "gender": "M;;Not Specified;", "homepage": "https://www.christianschilling.net/;;http://www.emirdemirovic.com;", "dblp": "72/2103-1;;;", "google_scholar": "d_uKdigAAAAJ;;;", "orcid": "0000-0003-3658-1065;;;", "linkedin": ";;;", "or_profile": "~Christian_Schilling1;a.lukina@tudelft.nl;~Emir_Demirovi\u01071;kgl@cs.aau.dk", "aff": "Aalborg University;;Delft University of Technology;", "aff_domain": "cs.aau.dk;;tudelft.nl;", "position": "Assistant Professor;;Assistant Professor;", "bibtex": "@inproceedings{\nschilling2023safety,\ntitle={Safety Verification of Decision-Tree Policies in Continuous Time},\nauthor={Christian Schilling and Anna Lukina and Emir Demirovi{\\'c} and Kim Guldstrand Larsen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tEKBU5XOTw}\n}", "github": "", "project": "", "reviewers": "xs5Q;jTr9;77nS;yJkM", "pdf_size": 2071606, "rating": "6;6;7;7", "confidence": "3;3;5;3", "soundness": "3;3;4;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "51;116;115;181", "wc_strengths": "87;60;151;151", "wc_weaknesses": "14;88;241;284", "wc_questions": "14;7;75;74", "wc_limitations": "2;21;45;49", "wc_review": "168;292;627;739", "wc_reply_reviewers": "19;18;232;22", "wc_reply_authors": "0;0;366;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 115.75, 45.963980462966866 ], "wc_strengths_avg": [ 112.25, 39.90848907187542 ], "wc_weaknesses_avg": [ 156.75, 109.99403392911817 ], "wc_questions_avg": [ 42.5, 32.09750769140807 ], "wc_limitations_avg": [ 29.25, 19.031224343168255 ], "wc_review_avg": [ 456.5, 234.0774444494813 ], "wc_reply_reviewers_avg": [ 72.75, 91.95481227211548 ], "wc_reply_authors_avg": [ 91.5, 158.48264889255228 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8619372502843978178&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 6, "email": "cs.aau.dk;;tudelft.nl;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Aalborg University;Delft University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.aau.dk;https://www.tudelft.nl", "aff_unique_abbr": "AAU;TU Delft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Denmark;Netherlands" }, { "title": "PPi: Pretraining Brain Signal Model for Patient-independent Seizure Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70217", "id": "tEmFyqjaJh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dbeb7e621d4a554069a6a775da0f7273-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tEmFyqjaJh", "openreview": "https://openreview.net/forum?id=tEmFyqjaJh", "poster": "/media/PosterPDFs/NeurIPS%202023/70217.png?t=1698460329.358638", "slides": "https://nips.cc/virtual/2023/poster/70217", "video": "https://nips.cc/virtual/2023/poster/70217", "author_site": "Zhizhang Yuan, Daoze Zhang, YANG YANG, Junru Chen, Yafeng Li", "tldr": "", "abstract": "Automated seizure detection is of great importance to epilepsy diagnosis and treatment. An emerging method used in seizure detection, stereoelectroencephalography (SEEG), can provide detailed and stereoscopic brainwave information. However, modeling SEEG in clinical scenarios will face challenges like huge domain shift between different patients and dramatic pattern evolution among different brain areas. In this study, we propose a Pretraining-based model for Patient-independent seizure detection (PPi) to address these challenges. Firstly, we design two novel self-supervised tasks which can extract rich information from abundant SEEG data while preserving the unique characteristics between brain signals recorded from different brain areas. Then two techniques channel background subtraction and brain region enhancement are proposed to effectively tackle the domain shift problem. Extensive experiments show that PPi outperforms the SOTA baselines on two public datasets and a real-world clinical dataset collected by ourselves, which demonstrates the effectiveness and practicability of PPi. Finally, visualization analysis illustrates the rationality of the two domain generalization techniques.", "keywords": "Brain signal;Seizure detection;Pretraining;Domain generalization", "primary_area": "", "supplementary_material": "", "author": "Zhizhang Yuan;Daoze Zhang;Yang Yang;Junru Chen;Yafeng Li", "authorids": "~Zhizhang_Yuan1;~Daoze_Zhang1;~Yang_Yang35;~Junru_Chen1;~Yafeng_Li3", "gender": "M;M;M;M;M", "homepage": ";https://daozezhang.github.io/;http://yangy.org;https://mrnobodycali.github.io/;https://nuozhu.net/", "dblp": ";;;212/6753;84/8135", "google_scholar": "UzkMfYAAAAAJ;5beC28QAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";;0000-0002-5058-4417;;", "linkedin": ";;;;", "or_profile": "~Zhizhang_Yuan1;~Daoze_Zhang1;~Yang_Yang35;~Junru_Chen1;~Yafeng_Li3", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;", "position": "Undergrad student;Undergrad student;Associate Professor;PhD student;", "bibtex": "@inproceedings{\nyuan2023ppi,\ntitle={{PP}i: Pretraining Brain Signal Model for Patient-independent Seizure Detection},\nauthor={Zhizhang Yuan and Daoze Zhang and Yang Yang and Junru Chen and Yafeng Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tEmFyqjaJh}\n}", "github": "", "project": "", "reviewers": "DeFx;zvti;LsDA;oXnd;iQPF;M3UK", "pdf_size": 8182743, "rating": "5;5;6;6;7;8", "confidence": "3;4;4;5;3;3", "soundness": "2;2;3;3;3;3", "novelty": "2;2;2;3;3;3", "presentation": "3;2;3;3;2;3", "wc_summary": "253;74;70;81;70;103", "wc_strengths": "203;45;45;65;106;134", "wc_weaknesses": "365;61;89;24;109;169", "wc_questions": "124;83;229;37;182;54", "wc_limitations": "5;46;10;11;2;11", "wc_review": "950;309;443;218;469;471", "wc_reply_reviewers": "10;20;242;45;13;8", "wc_reply_authors": "14;18;637;625;12;11", "reply_reviewers": "1;1;2;1;1;1", "reply_authors": "2;2;3;2;2;2", "rating_avg": [ 6.166666666666667, 1.0671873729054748 ], "confidence_avg": [ 3.6666666666666665, 0.7453559924999298 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 108.5, 65.60170221368752 ], "wc_strengths_avg": [ 99.66666666666667, 56.384592063986965 ], "wc_weaknesses_avg": [ 136.16666666666666, 111.53238194453763 ], "wc_questions_avg": [ 118.16666666666667, 68.72024608674086 ], "wc_limitations_avg": [ 14.166666666666666, 14.622091353686573 ], "wc_review_avg": [ 476.6666666666667, 231.04448826049833 ], "wc_reply_reviewers_avg": [ 56.333333333333336, 83.94574967732964 ], "wc_reply_authors_avg": [ 219.5, 291.0032932230607 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.3726779962499649 ], "reply_authors_avg": [ 2.1666666666666665, 0.3726779962499649 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.349215147884789, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3809985066356734794&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Federated Compositional Deep AUC Maximization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70216", "id": "tF7W8ai8J3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1e7b192fc8b3acb93749c5accfa60e0c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tF7W8ai8J3", "openreview": "https://openreview.net/forum?id=tF7W8ai8J3", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70216", "video": "https://nips.cc/virtual/2023/poster/70216", "author_site": "Xinwen Zhang, Yihan Zhang, Tianbao Yang, Richard Souvenir, Hongchang Gao", "tldr": "", "abstract": "Federated learning has attracted increasing attention due to the promise of balancing privacy and large-scale learning; numerous approaches have been proposed. However, most existing approaches focus on problems with balanced data, and prediction performance is far from satisfactory for many real-world applications where the number of samples in different classes is highly imbalanced. To address this challenging problem, we developed a novel federated learning method for imbalanced data by directly optimizing the area under curve (AUC) score. In particular, we formulate the AUC maximization problem as a federated compositional minimax optimization problem, develop a local stochastic compositional gradient descent ascent with momentum algorithm, and provide bounds on the computational and communication complexities of our algorithm. To the best of our knowledge, this is the first work to achieve such favorable theoretical results. Finally, extensive experimental results confirm the efficacy of our method.", "keywords": "federated learning;compositional optimization;minimax optimization;AUC maximization", "primary_area": "", "supplementary_material": "/attachment/b2f62b88d1d7c5a1dc99031b7f7e48eebe07cdb0.pdf", "author": "Xinwen Zhang;Yihan Zhang;Tianbao Yang;Richard Souvenir;Hongchang Gao", "authorids": "~Xinwen_Zhang3;~Yihan_Zhang2;~Tianbao_Yang1;~Richard_Souvenir2;~Hongchang_Gao3", "gender": "F;M;M;M;", "homepage": ";;https://people.tamu.edu/~tianbao-yang/publications.html;https://cis.temple.edu/~souvenir/;", "dblp": "14/3612;;56/7047;95/5553;", "google_scholar": "6pZyGBQAAAAJ;-QpEMpMAAAAJ;https://scholar.google.com.tw/citations?user=BCxFU0EAAAAJ;1sMNiJIAAAAJ;", "orcid": "0009-0002-1981-7523;;;0000-0002-6066-0946;", "linkedin": "xinwen-zhang-54a485249;yihan-zhang-793062256/;;;", "or_profile": "~Xinwen_Zhang3;~Yihan_Zhang2;~Tianbao_Yang1;~Richard_Souvenir2;~Hongchang_Gao3", "aff": "Temple University;Temple University;Texas A&M University - College Station;Temple University;", "aff_domain": "temple.edu;temple.edu;tamu.edu;temple.edu;", "position": "PhD student;PhD student;Associate Professor;Full Professor;", "bibtex": "@inproceedings{\nzhang2023federated,\ntitle={Federated Compositional Deep {AUC} Maximization},\nauthor={Xinwen Zhang and Yihan Zhang and Tianbao Yang and Richard Souvenir and Hongchang Gao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tF7W8ai8J3}\n}", "github": "", "project": "", "reviewers": "5nau;jnCq;BTwE", "pdf_size": 698218, "rating": "3;5;7", "confidence": "3;3;5", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "34;69;45", "wc_strengths": "15;30;92", "wc_weaknesses": "103;105;39", "wc_questions": "2;99;91", "wc_limitations": "2;16;1", "wc_review": "156;319;268", "wc_reply_reviewers": "72;63;9", "wc_reply_authors": "511;199;0", "reply_reviewers": "1;2;1", "reply_authors": "2;3;1", "rating_avg": [ 5.0, 1.632993161855452 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 49.333333333333336, 14.613540144521982 ], "wc_strengths_avg": [ 45.666666666666664, 33.32999983331666 ], "wc_weaknesses_avg": [ 82.33333333333333, 30.652170486860395 ], "wc_questions_avg": [ 64.0, 43.96210489349511 ], "wc_limitations_avg": [ 6.333333333333333, 6.847546194724712 ], "wc_review_avg": [ 247.66666666666666, 68.08001827914626 ], "wc_reply_reviewers_avg": [ 48.0, 27.820855486487112 ], "wc_reply_authors_avg": [ 236.66666666666666, 210.30823939055637 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9436993679487706445&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 8, "email": "temple.edu;temple.edu;tamu.edu;temple.edu;", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Temple University;Texas A&M University", "aff_unique_dep": ";", "aff_unique_url": "https://www.temple.edu;https://www.tamu.edu", "aff_unique_abbr": "Temple;TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Single-Call Stochastic Extragradient Methods for Structured Non-monotone Variational Inequalities: Improved Analysis under Weaker Conditions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70215", "id": "tFeaLw9AWn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ccf02786d28730e8311676ffa842e216-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tFeaLw9AWn", "openreview": "https://openreview.net/forum?id=tFeaLw9AWn", "poster": "/media/PosterPDFs/NeurIPS%202023/70215.png?t=1702068239.926667", "slides": "https://nips.cc/virtual/2023/poster/70215", "video": "https://nips.cc/virtual/2023/poster/70215", "author_site": "Sayantan Choudhury, Eduard Gorbunov, Nicolas Loizou", "tldr": "", "abstract": "Single-call stochastic extragradient methods, like stochastic past extragradient (SPEG) and stochastic optimistic gradient (SOG), have gained a lot of interest in recent years and are one of the most efficient algorithms for solving large-scale min-max optimization and variational inequalities problems (VIP) appearing in various machine learning tasks. However, despite their undoubted popularity, current convergence analyses of SPEG and SOG require strong assumptions like bounded variance or growth conditions. In addition, several important questions regarding the convergence properties of these methods are still open, including mini-batching, efficient step-size selection, and convergence guarantees under different sampling strategies. In this work, we address these questions and provide convergence guarantees for two large classes of structured non-monotone VIPs: (i) quasi-strongly monotone problems (a generalization of strongly monotone problems) and (ii) weak Minty variational inequalities (a generalization of monotone and Minty VIPs). We introduce the expected residual condition, explain its benefits, and show how it allows us to obtain a strictly weaker bound than previously used growth conditions, expected co-coercivity, or bounded variance assumptions. Finally, our convergence analysis holds under the arbitrary sampling paradigm, which includes importance sampling and various mini-batching strategies as special cases.", "keywords": "Optimization;Machine Learning;Extragradient Methods;Min-Max Optimization", "primary_area": "", "supplementary_material": "/attachment/edffd9bd30a59810857b00fc643472cc4f3fb068.zip", "author": "Sayantan Choudhury;Eduard Gorbunov;Nicolas Loizou", "authorids": "~Sayantan_Choudhury1;~Eduard_Gorbunov1;~Nicolas_Loizou1", "gender": "M;M;M", "homepage": "https://sites.google.com/view/sayantan-homepage/home;https://eduardgorbunov.github.io;https://nicolasloizou.github.io/", "dblp": ";215/5512.html;173/4958", "google_scholar": ";https://scholar.google.ru/citations?user=85j2RqQAAAAJ;https://scholar.google.co.uk/citations?user=mvDmzAQAAAAJ", "orcid": ";;", "linkedin": "sayantan-choudhury-21168941;;", "or_profile": "~Sayantan_Choudhury1;~Eduard_Gorbunov1;~Nicolas_Loizou1", "aff": "Johns Hopkins University;Mohamed bin Zayed University of Artificial Intelligence;Johns Hopkins University", "aff_domain": "jhu.edu;mbzuai.ac.ae;jhu.edu", "position": "PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nchoudhury2023singlecall,\ntitle={Single-Call Stochastic Extragradient Methods for Structured Non-monotone Variational Inequalities: Improved Analysis under Weaker Conditions},\nauthor={Sayantan Choudhury and Eduard Gorbunov and Nicolas Loizou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tFeaLw9AWn}\n}", "github": "", "project": "", "reviewers": "rrNt;bysm;Bwgk;dTzE", "pdf_size": 1379379, "rating": "3;6;6;7", "confidence": "5;3;5;3", "soundness": "3;4;3;4", "novelty": "2;3;2;2", "presentation": "4;4;2;3", "wc_summary": "31;110;184;151", "wc_strengths": "9;70;305;38", "wc_weaknesses": "350;86;197;2", "wc_questions": "16;86;4;213", "wc_limitations": "1;7;1;51", "wc_review": "407;359;691;455", "wc_reply_reviewers": "345;147;35;325", "wc_reply_authors": "480;258;455;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 119.0, 57.17079674099356 ], "wc_strengths_avg": [ 105.5, 117.18468329948244 ], "wc_weaknesses_avg": [ 158.75, 130.29078056409057 ], "wc_questions_avg": [ 79.75, 83.06134780991698 ], "wc_limitations_avg": [ 15.0, 20.92844953645635 ], "wc_review_avg": [ 478.0, 127.57350822173073 ], "wc_reply_reviewers_avg": [ 213.0, 128.46011054019843 ], "wc_reply_authors_avg": [ 298.25, 192.4686662810339 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6666666666666667, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4993471591460995676&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "jhu.edu;mbzuai.ac.ae;jhu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Johns Hopkins University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.jhu.edu;https://mbzuai.ac.ae", "aff_unique_abbr": "JHU;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;United Arab Emirates" }, { "title": "Maximum State Entropy Exploration using Predecessor and Successor Representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70214", "id": "tFsxtqGmkn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9c7900fac04a701cbed83256b76dbaa3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tFsxtqGmkn", "openreview": "https://openreview.net/forum?id=tFsxtqGmkn", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70214", "video": "https://nips.cc/virtual/2023/poster/70214", "author_site": "Arnav Kumar Jain, Lucas Lehnert, Irina Rish, Glen Berseth", "tldr": "", "abstract": "Animals have a developed ability to explore that aids them in important tasks such as locating food, exploring for shelter, and finding misplaced items. These exploration skills necessarily track where they have been so that they can plan for finding items with relative efficiency. Contemporary exploration algorithms often learn a less efficient exploration strategy because they either condition only on the current state or simply rely on making random open-loop exploratory moves. In this work, we propose $\\eta\\psi$-Learning, a method to learn efficient exploratory policies by conditioning on past episodic experience to make the next exploratory move. Specifically, $\\eta\\psi$-Learning learns an exploration policy that maximizes the entropy of the state visitation distribution of a single trajectory. Furthermore, we demonstrate how variants of the predecessor representation and successor representations can be combined to predict the state visitation entropy. Our experiments demonstrate the efficacy of $\\eta\\psi$-Learning to strategically explore the environment and maximize the state coverage with limited samples.", "keywords": "Reinforcement Learning;Maximum state entropy exploration;Non-Markovian exploration;Successor Representation", "primary_area": "", "supplementary_material": "", "author": "Arnav Kumar Jain;Lucas Lehnert;Irina Rish;Glen Berseth", "authorids": "~Arnav_Kumar_Jain2;~Lucas_Lehnert1;~Irina_Rish1;~Glen_Berseth1", "gender": "M;;F;M", "homepage": "https://arnavkj1995.github.io/;https://lucaslehnert.github.io/;http://irina-rish.com;http://fracturedplane.com/", "dblp": "190/7826;156/2254;;147/5478", "google_scholar": "https://scholar.google.co.in/citations?user=tu7wKckAAAAJ;;Avse5gIAAAAJ;https://scholar.google.ca/citations?user=-WZcuuwAAAAJ", "orcid": ";0000-0001-5897-499X;;0000-0001-7351-8028", "linkedin": ";;irina-rish-8b2162;glen-berseth-0523278b?trk=hp-identity-name", "or_profile": "~Arnav_Kumar_Jain2;~Lucas_Lehnert1;~Irina_Rish1;~Glen_Berseth1", "aff": "Universit\u00e9 de Montr\u00e9al;Meta FAIR;University of Montreal;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al", "aff_domain": "umontreal.ca;fb.com;mila.quebec;mila.umontreal.ca", "position": "PhD student;Postdoc;Professor;Assistant Professor", "bibtex": "@inproceedings{\njain2023maximum,\ntitle={Maximum State Entropy Exploration using Predecessor and Successor Representations},\nauthor={Arnav Kumar Jain and Lucas Lehnert and Irina Rish and Glen Berseth},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tFsxtqGmkn}\n}", "github": "", "project": "", "reviewers": "L5xE;RqRN;RPnY;gBvc", "pdf_size": 6176434, "rating": "5;6;6;8", "confidence": "5;4;4;4", "soundness": "2;3;2;4", "novelty": "2;2;2;3", "presentation": "3;4;3;4", "wc_summary": "72;95;41;21", "wc_strengths": "42;68;34;76", "wc_weaknesses": "57;41;210;98", "wc_questions": "4;525;10;47", "wc_limitations": "1;26;24;14", "wc_review": "176;755;319;256", "wc_reply_reviewers": "0;43;18;26", "wc_reply_authors": "140;143;81;0", "reply_reviewers": "0;1;1;1", "reply_authors": "3;3;2;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 57.25, 28.375825979167548 ], "wc_strengths_avg": [ 55.0, 17.46424919657298 ], "wc_weaknesses_avg": [ 101.5, 66.00189391222042 ], "wc_questions_avg": [ 146.5, 219.14664040317842 ], "wc_limitations_avg": [ 16.25, 9.908960591303208 ], "wc_review_avg": [ 376.5, 224.32621335902766 ], "wc_reply_reviewers_avg": [ 21.75, 15.465687828221544 ], "wc_reply_authors_avg": [ 91.0, 58.0646191755358 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10400756799841207259&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "umontreal.ca;fb.com;mila.quebec;mila.umontreal.ca", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;Meta;University of Montreal", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.umontreal.ca;https://meta.com;https://wwwumontreal.ca", "aff_unique_abbr": "UdeM;Meta;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Canada;United States" }, { "title": "Distribution-Free Model-Agnostic Regression Calibration via Nonparametric Methods", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70213", "id": "tGPx7HdBr4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a81dc87f7b3b7ab8489d5bb48c4a8d92-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tGPx7HdBr4", "openreview": "https://openreview.net/forum?id=tGPx7HdBr4", "poster": "/media/PosterPDFs/NeurIPS%202023/70213.png?t=1701640302.2350612", "slides": "https://nips.cc/virtual/2023/poster/70213", "video": "https://nips.cc/virtual/2023/poster/70213", "author_site": "Shang Liu, Zhongze Cai, Xiaocheng Li", "tldr": "", "abstract": "In this paper, we consider the uncertainty quantification problem for regression models. Specifically, we consider an individual calibration objective for characterizing the quantiles of the prediction model. While such an objective is well-motivated from downstream tasks such as newsvendor cost, the existing methods have been largely heuristic and lack of statistical guarantee in terms of individual calibration. We show via simple examples that the existing methods focusing on population-level calibration guarantees such as average calibration or sharpness can lead to harmful and unexpected results. We propose simple nonparametric calibration methods that are agnostic of the underlying prediction model and enjoy both computational efficiency and statistical consistency. Our approach enables a better understanding of the possibility of individual calibration, and we establish matching upper and lower bounds for the calibration error of our proposed methods. Technically, our analysis combines the nonparametric analysis with a covering number argument for parametric analysis, which advances the existing theoretical analyses in the literature of nonparametric density estimation and quantile bandit problems. Importantly, the nonparametric perspective sheds new theoretical insights into regression calibration in terms of the curse of dimensionality and reconciles the existing results on the impossibility of individual calibration. To our knowledge, we make the first effort to reach both individual calibration and finite-sample guarantee with minimal assumptions in terms of conformal prediction. Numerical experiments show the advantage of such a simple approach under various metrics, and also under covariates shift. We hope our work provides a simple benchmark and a starting point of theoretical ground for future research on regression calibration.", "keywords": "Regression calibration;model recalibration;conditional quantile;nonparametric method", "primary_area": "", "supplementary_material": "/attachment/1efb3754481afa9befcc60cf38b8837680ab9736.zip", "author": "Shang Liu;Zhongze Cai;Xiaocheng Li", "authorids": "~Shang_Liu2;~Zhongze_Cai2;~Xiaocheng_Li1", "gender": "M;M;M", "homepage": "https://liushangnoname.github.io/;https://zhongzecai.github.io/;http://xiaocheng-li.github.io/", "dblp": ";;171/2155", "google_scholar": "https://scholar.google.co.uk/citations?user=gnnxAZoAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Shang_Liu2;~Zhongze_Cai2;~Xiaocheng_Li1", "aff": "Imperial College London, Imperial College London;Imperial College London;Imperial College London", "aff_domain": "imperial.ac.uk;ic.ac.uk;imperial.ac.uk", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nliu2023distributionfree,\ntitle={Distribution-Free Model-Agnostic Regression Calibration via Nonparametric Methods},\nauthor={Shang Liu and Zhongze Cai and Xiaocheng Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tGPx7HdBr4}\n}", "github": "", "project": "", "reviewers": "bxwF;2q3v;oVNP;aPS1", "pdf_size": 676767, "rating": "4;6;6;8", "confidence": "5;3;3;2", "soundness": "3;3;3;3", "novelty": "1;3;3;3", "presentation": "3;3;3;4", "wc_summary": "87;54;40;134", "wc_strengths": "83;33;25;59", "wc_weaknesses": "294;61;51;157", "wc_questions": "17;64;51;75", "wc_limitations": "33;7;1;28", "wc_review": "514;219;168;453", "wc_reply_reviewers": "525;22;0;279", "wc_reply_authors": "1191;0;0;28", "reply_reviewers": "2;1;0;1", "reply_authors": "3;1;1;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.75, 36.17578610065025 ], "wc_strengths_avg": [ 50.0, 22.825424421026653 ], "wc_weaknesses_avg": [ 140.75, 97.67900234953262 ], "wc_questions_avg": [ 51.75, 21.787324296480282 ], "wc_limitations_avg": [ 17.25, 13.534677683639163 ], "wc_review_avg": [ 338.5, 147.69986459032384 ], "wc_reply_reviewers_avg": [ 206.5, 214.1150391728708 ], "wc_reply_authors_avg": [ 304.75, 511.8043449405251 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9733285267845752, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7493759407315087995&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 5, "email": "imperial.ac.uk;ic.ac.uk;imperial.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Latent Graph Inference with Limited Supervision", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70212", "id": "tGuMwFnRZX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/67101f97dc23fcc10346091181fff6cb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tGuMwFnRZX", "openreview": "https://openreview.net/forum?id=tGuMwFnRZX", "poster": "/media/PosterPDFs/NeurIPS%202023/70212.png?t=1702174938.1481967", "slides": "https://nips.cc/virtual/2023/poster/70212", "video": "https://nips.cc/virtual/2023/poster/70212", "author_site": "Jianglin Lu, Yi Xu, Huan Wang, Yue Bai, Yun Fu", "tldr": "", "abstract": "Latent graph inference (LGI) aims to jointly learn the underlying graph structure and node representations from data features. However, existing LGI methods commonly suffer from the issue of supervision starvation, where massive edge weights are learned without semantic supervision and do not contribute to the training loss. Consequently, these supervision-starved weights, which determine the predictions of testing samples, cannot be semantically optimal, resulting in poor generalization. In this paper, we observe that this issue is actually caused by the graph sparsification operation, which severely destroys the important connections established between pivotal nodes and labeled ones. To address this, we propose to restore the corrupted affinities and replenish the missed supervision for better LGI. The key challenge then lies in identifying the critical nodes and recovering the corrupted affinities. We begin by defining the pivotal nodes as k-hop starved nodes, which can be identified based on a given adjacency matrix. Considering the high computational burden, we further present a more efficient alternative inspired by CUR matrix decomposition. Subsequently, we eliminate the starved nodes by reconstructing the destroyed connections. Extensive experiments on representative benchmarks demonstrate that reducing the starved nodes consistently improves the performance of state-of-the-art LGI methods, especially under extremely limited supervision (6.12% improvement on Pubmed with a labeling rate of only 0.3%).", "keywords": "Latent Graph Inference;CUR Matrix Decomposition;Graph Neural Networks", "primary_area": "", "supplementary_material": "", "author": "Jianglin Lu;Yi Xu;Huan Wang;Yue Bai;Yun Fu", "authorids": "~Jianglin_Lu2;~Yi_Xu9;~Huan_Wang3;~Yue_Bai1;~Yun_Fu1", "gender": ";M;M;M;M", "homepage": ";https://sites.google.com/view/homepage-of-yi-xu;https://huanwang.tech/;https://yueb17.github.io/;http://www1.ece.neu.edu/~yunfu/", "dblp": ";14/5580-5;70/6155-14;119/0848;00/5815-1", "google_scholar": ";https://scholar.google.com.hk/citations?user=12bRAdsAAAAJ;0-On0y4AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=h-JEcQ8AAAAJ", "orcid": ";0000-0001-5857-4179;0000-0001-6951-901X;;0000-0002-5098-2853", "linkedin": ";yi-xu-884755185/;huanwang-zju/;;furaymond/", "or_profile": "~Jianglin_Lu2;~Yi_Xu9;~Huan_Wang3;~Yue_Bai1;~Yun_Fu1", "aff": ";Honda Research Institute;Northeastern University;Northeastern University;Northeastern University", "aff_domain": ";honda-ri.de;neu.edu;neu.edu;northeastern.edu", "position": ";Intern;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nlu2023latent,\ntitle={Latent Graph Inference with Limited Supervision},\nauthor={Jianglin Lu and Yi Xu and Huan Wang and Yue Bai and Yun Fu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tGuMwFnRZX}\n}", "github": "", "project": "", "reviewers": "Qcor;tzHX;6cu2;se4r;Qy2b", "pdf_size": 598117, "rating": "5;6;7;7;7", "confidence": "3;5;3;4;4", "soundness": "3;3;4;3;3", "novelty": "3;3;4;3;3", "presentation": "2;3;4;3;3", "wc_summary": "115;84;92;82;80", "wc_strengths": "55;81;51;71;38", "wc_weaknesses": "292;134;18;54;131", "wc_questions": "180;82;29;48;2", "wc_limitations": "1;63;1;31;1", "wc_review": "643;444;191;286;252", "wc_reply_reviewers": "860;27;19;46;44", "wc_reply_authors": "1195;46;32;223;263", "reply_reviewers": "3;1;1;1;1", "reply_authors": "6;2;2;3;2", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 90.6, 12.86234815265082 ], "wc_strengths_avg": [ 59.2, 15.157836257197133 ], "wc_weaknesses_avg": [ 125.8, 94.33217902709552 ], "wc_questions_avg": [ 68.2, 61.67138720671038 ], "wc_limitations_avg": [ 19.4, 24.703036250631218 ], "wc_review_avg": [ 363.2, 162.9691995439629 ], "wc_reply_reviewers_avg": [ 199.2, 330.55674248152917 ], "wc_reply_authors_avg": [ 351.8, 431.5661710560734 ], "reply_reviewers_avg": [ 1.4, 0.8000000000000002 ], "reply_authors_avg": [ 3.0, 1.5491933384829668 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.13363062095621225, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3148352326671253697&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": ";honda-ri.de;neu.edu;neu.edu;northeastern.edu", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Honda Research Institute;Northeastern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.honda-ri.com;https://www.northeastern.edu", "aff_unique_abbr": "HRI;NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Japan;United States" }, { "title": "NurViD: A Large Expert-Level Video Database for Nursing Procedure Activity Understanding", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73444", "id": "tIW4kbnJIM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3a48b0eaba26ba862220a307a9edb0bb-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=tIW4kbnJIM", "openreview": "https://openreview.net/forum?id=tIW4kbnJIM", "poster": "/media/PosterPDFs/NeurIPS%202023/73444.png?t=1699097142.6915092", "slides": "https://nips.cc/virtual/2023/poster/73444", "video": "https://nips.cc/virtual/2023/poster/73444", "author_site": "Ming Hu, Lin Wang, Siyuan Yan, Don Ma, Qingli Ren, Peng Xia, Wei Feng, Peibo Duan, Lie Ju, Zongyuan Ge", "tldr": "", "abstract": "The application of deep learning to nursing procedure activity understanding has the potential to greatly enhance the quality and safety of nurse-patient interactions. By utilizing the technique, we can facilitate training and education, improve quality control, and enable operational compliance monitoring. However, the development of automatic recognition systems in this field is currently hindered by the scarcity of appropriately labeled datasets. The existing video datasets pose several limitations: 1) these datasets are small-scale in size to support comprehensive investigations of nursing activity; 2) they primarily focus on single procedures, lacking expert-level annotations for various nursing procedures and action steps; and 3) they lack temporally localized annotations, which prevents the effective localization of targeted actions within longer video sequences. To mitigate these limitations, we propose NurViD, a large video dataset with expert-level annotation for nursing procedure activity understanding. NurViD consists of over 1.5k videos totaling 144 hours, making it approximately four times longer than the existing largest nursing activity datasets. Notably, it encompasses 51 distinct nursing procedures and 177 action steps, providing a much more comprehensive coverage compared to existing datasets that primarily focus on limited procedures. To evaluate the efficacy of current deep learning methods on nursing activity understanding, we establish three benchmarks on NurViD: procedure recognition on untrimmed videos, procedure and action recognition on trimmed videos, and action detection. Our benchmark and code will be available at https://github.com/minghu0830/NurViD-benchmark.", "keywords": "Activity understanding; Action recognition; Nursing procedure; Standard level grading", "primary_area": "", "supplementary_material": "/attachment/91a128444a9fb48f4900a28001de44a8c32487aa.pdf", "author": "Ming Hu;Lin Wang;Siyuan Yan;Don Ma;Qingli Ren;Peng Xia;Wei Feng;Peibo Duan;Lie Ju;Zongyuan Ge", "authorids": "~Ming_Hu4;~Lin_Wang13;~Siyuan_Yan1;~Don_Ma1;~Qingli_Ren1;~Peng_Xia1;~Wei_Feng5;~Peibo_Duan1;~Lie_Ju1;~Zongyuan_Ge1", "gender": "M;M;M;M;F;M;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=9QWdvl8AAAAJ&hl=zh-CN;;https://siyuanyan1.github.io/;;;https://richard-peng-xia.github.io;https://fengweie.github.io/;https://scholar.google.com/citations?hl=zh-CN&user=wdIMVqsAAAAJ&view_op=list_works&sortby=pubdate;https://mmai.group/peoples/julie/;https://research.monash.edu/en/persons/zongyuan-ge", "dblp": ";;206/1562;;;;;;231/5596;147/2757", "google_scholar": "9QWdvl8AAAAJ;;LGcOLREAAAAJ;;;8OVOf1EAAAAJ;https://scholar.google.com.hk/citations?user=WN6gj_wAAAAJ;https://scholar.google.com/citations?hl=zh-CN;Q6XB27gAAAAJ;https://scholar.google.com.au/citations?user=Q0gUrcIAAAAJ", "orcid": ";0000-0003-2374-0725;0000-0002-6372-8336;;0000-0002-2759-5062;;0000-0002-7398-6988;;;0000-0002-5880-8673", "linkedin": ";;siyuan-yan-1496671bb/?originalSubdomain=au;don-ma-6223b4218/en?trk=contact-info;;;;;;", "or_profile": "~Ming_Hu4;~Lin_Wang13;~Siyuan_Yan1;~Don_Ma1;~Qingli_Ren1;~Peng_Xia1;~Wei_Feng5;~Peibo_Duan1;~Lie_Ju1;~Zongyuan_Ge1", "aff": "Monash University;;Monash University;University of Wisconsin - Madison;Shanxi Medical University;Suzhou University;Monash University;Monash University;Monash University;Monash University", "aff_domain": "monash.edu;;monash.edu;wisc.edu;sxmu.edu;suda.edu.cn;monash.edu;monash.edu;monash.edu;monash.edu", "position": "PhD student;;PhD student;Undergrad student;Lecturer;Undergrad student;PhD student;Lecturer;PhD student;Associate Professor", "bibtex": "@inproceedings{\nhu2023nurvid,\ntitle={NurViD: A Large Expert-Level Video Database for Nursing Procedure Activity Understanding},\nauthor={Ming Hu and Lin Wang and Siyuan Yan and Don Ma and Qingli Ren and Peng Xia and Wei Feng and Peibo Duan and Lie Ju and Zongyuan Ge},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=tIW4kbnJIM}\n}", "github": "", "project": "", "reviewers": "sFtw;Y27H;CHPV;ysqj", "pdf_size": 1620645, "rating": "6;6;7;7", "confidence": "4;3;4;5", "wc_summary_and_contributions": "49;117;92;128", "wc_strengths": "149;90;45;51", "wc_improvement": "17;108;46;59", "wc_limitations": "144;44;1;17", "wc_correctness": "13;45;1;10", "wc_clarity": "5;162;1;3", "wc_relation_to_prior_work": "1;69;1;1", "wc_documentation": "7;46;13;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "386;682;201;275", "wc_reply_reviewers": "0;0;0;35", "wc_reply_authors": "512;682;227;514", "reply_reviewers": "0;0;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 96.5, 30.36856927812043 ], "wc_strengths_avg": [ 83.75, 41.444993666304256 ], "wc_improvement_avg": [ 57.5, 32.882366094914765 ], "wc_limitations_avg": [ 51.5, 55.57202533649462 ], "wc_correctness_avg": [ 17.25, 16.618889854620253 ], "wc_clarity_avg": [ 42.75, 68.8635426041966 ], "wc_relation_to_prior_work_avg": [ 18.0, 29.444863728670914 ], "wc_documentation_avg": [ 17.75, 16.57369904396722 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 386.0, 183.1406563273158 ], "wc_reply_reviewers_avg": [ 8.75, 15.155444566227676 ], "wc_reply_authors_avg": [ 483.75, 163.50592496909707 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9545051815041172446&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "monash.edu;;monash.edu;wisc.edu;sxmu.edu;suda.edu.cn;monash.edu;monash.edu;monash.edu;monash.edu", "author_num": 10, "aff_unique_index": "0;0;1;2;3;0;0;0;0", "aff_unique_norm": "Monash University;University of Wisconsin-Madison;Shanxi Medical University;Suzhou University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.monash.edu;https://www.wisc.edu;http://www.sxmu.edu.cn;https://www.suda.edu.cn", "aff_unique_abbr": "Monash;UW-Madison;SXMU;Suda", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;1;2;2;0;0;0;0", "aff_country_unique": "Australia;United States;China" }, { "title": "Pre-Training Protein Encoder via Siamese Sequence-Structure Diffusion Trajectory Prediction", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70211", "id": "tIzbNQko3c", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/873c86d9a979ab80d8e2919510d4446b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tIzbNQko3c", "openreview": "https://openreview.net/forum?id=tIzbNQko3c", "poster": "/media/PosterPDFs/NeurIPS%202023/70211.png?t=1698700291.559022", "slides": "https://nips.cc/virtual/2023/poster/70211", "video": "https://nips.cc/virtual/2023/poster/70211", "author_site": "Zuobai Zhang, Minghao Xu, Aurelie Lozano, Vijil Chenthamarakshan, Payel Das, Jian Tang", "tldr": "", "abstract": "Self-supervised pre-training methods on proteins have recently gained attention, with most approaches focusing on either protein sequences or structures, neglecting the exploration of their joint distribution, which is crucial for a comprehensive understanding of protein functions by integrating co-evolutionary information and structural characteristics. In this work, inspired by the success of denoising diffusion models in generative tasks, we propose the DiffPreT approach to pre-train a protein encoder by sequence-structure joint diffusion modeling. DiffPreT guides the encoder to recover the native protein sequences and structures from the perturbed ones along the joint diffusion trajectory, which acquires the joint distribution of sequences and structures. Considering the essential protein conformational variations, we enhance DiffPreT by a method called Siamese Diffusion Trajectory Prediction (SiamDiff) to capture the correlation between different conformers of a protein. SiamDiff attains this goal by maximizing the mutual information between representations of diffusion trajectories of structurally-correlated conformers. We study the effectiveness of DiffPreT and SiamDiff on both atom- and residue-level structure-based protein understanding tasks. Experimental results show that the performance of DiffPreT is consistently competitive on all tasks, and SiamDiff achieves new state-of-the-art performance, considering the mean ranks on all tasks. Code will be released upon acceptance.", "keywords": "Protein representation learning;diffusion models;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/0ca2e77c53ad804a7361b384e32c7fc5f222cfe4.pdf", "author": "Zuobai Zhang;Minghao Xu;Aurelie Lozano;Vijil Chenthamarakshan;Payel Das;Jian Tang", "authorids": "~Zuobai_Zhang1;~Minghao_Xu1;~Aurelie_Lozano1;~Vijil_Chenthamarakshan1;~Payel_Das1;~Jian_Tang1", "gender": "M;M;F;M;F;", "homepage": "https://oxer11.github.io/;https://chrisallenming.github.io/;https://research.ibm.com/people/aurelie-lozano;https://researcher.watson.ibm.com/researcher/view.php?person=us-ecvijil;;http://www.jian-tang.com", "dblp": "256/9098.html;;06/274;;56/7926;181/2667-5", "google_scholar": "UCDbNccAAAAJ;Oh5S2skAAAAJ;4wTGaDsAAAAJ;g9hboJ0AAAAJ;;https://scholar.google.ca/citations?user=1ir6WUEAAAAJ", "orcid": ";;;;;", "linkedin": ";xuminghao118/;;;;", "or_profile": "~Zuobai_Zhang1;~Minghao_Xu1;~Aurelie_Lozano1;~Vijil_Chenthamarakshan1;~Payel_Das1;~Jian_Tang1", "aff": "Mila - Universit\u00e9 de Montr\u00e9al;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal;IBM Research;International Business Machines;IBM, International Business Machines;Mila, HEC Montreal", "aff_domain": "umontreal.ca;mila.umontreal.ca;us.ibm.com;ibm.com;us.ibm.com;hec.ca", "position": "PhD student;Researcher;Principal Researcher;Senior Technical Staff member;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023pretraining,\ntitle={Pre-Training Protein Encoder via Siamese Sequence-Structure Diffusion Trajectory Prediction},\nauthor={Zuobai Zhang and Minghao Xu and Aurelie Lozano and Vijil Chenthamarakshan and Payel Das and Jian Tang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tIzbNQko3c}\n}", "github": "", "project": "", "reviewers": "E714;Qtp8;s72W;MBi3", "pdf_size": 4059386, "rating": "6;7;7;7", "confidence": "3;3;3;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;3;4", "wc_summary": "78;82;67;84", "wc_strengths": "71;48;58;90", "wc_weaknesses": "112;80;13;222", "wc_questions": "197;575;67;55", "wc_limitations": "18;22;1;20", "wc_review": "476;807;206;471", "wc_reply_reviewers": "15;113;99;55", "wc_reply_authors": "21;108;37;40", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 77.75, 6.5717197140474575 ], "wc_strengths_avg": [ 66.75, 15.706288549495072 ], "wc_weaknesses_avg": [ 106.75, 75.52276146963908 ], "wc_questions_avg": [ 223.5, 210.43942121190128 ], "wc_limitations_avg": [ 15.25, 8.347903928532 ], "wc_review_avg": [ 490.0, 213.1325878414655 ], "wc_reply_reviewers_avg": [ 70.5, 38.532453853861945 ], "wc_reply_authors_avg": [ 51.5, 33.41032774457623 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5460911498606229668&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "umontreal.ca;mila.umontreal.ca;us.ibm.com;ibm.com;us.ibm.com;hec.ca", "author_num": 6, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;University of Montreal;IBM;International Business Machines Corporation;International Business Machines;HEC Montreal", "aff_unique_dep": "Mila;Montreal Institute for Learning Algorithms;IBM Research;;;HEC Business School", "aff_unique_url": "https://www.umontreal.ca;https://www.umontreal.ca;https://www.ibm.com/research;https://www.ibm.com;https://www.ibm.com;https://www.hec.ca", "aff_unique_abbr": "UdeM;UM;IBM;IBM;IBM;HEC", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Montr\u00e9al;Montreal;", "aff_country_unique_index": "0;0;1;1;1;0", "aff_country_unique": "Canada;United States" }, { "title": "Can You Rely on Your Model Evaluation? Improving Model Evaluation with Synthetic Test Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70210", "id": "tJ88RBqupo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/05fb0f4e645cad23e0ab59d6b9901428-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tJ88RBqupo", "openreview": "https://openreview.net/forum?id=tJ88RBqupo", "poster": "/media/PosterPDFs/NeurIPS%202023/70210.png?t=1701711917.6700673", "slides": "https://nips.cc/virtual/2023/poster/70210", "video": "https://nips.cc/virtual/2023/poster/70210", "author_site": "Boris van Breugel, Nabeel Seedat, Fergus Imrie, Mihaela van der Schaar", "tldr": "", "abstract": "Evaluating the performance of machine learning models on diverse and underrepresented subgroups is essential for ensuring fairness and reliability in real-world applications. However, accurately assessing model performance becomes challenging due to two main issues: (1) a scarcity of test data, especially for small subgroups, and (2) possible distributional shifts in the model's deployment setting, which may not align with the available test data. In this work, we introduce 3S Testing, a deep generative modeling framework to facilitate model evaluation by generating synthetic test sets for small subgroups and simulating distributional shifts. Our experiments demonstrate that 3S-Testing outperforms traditional baselines---including real test data alone---in estimating model performance on minority subgroups and under plausible distributional shifts. In addition, 3S offers intervals around its performance estimates, exhibiting superior coverage of the ground truth compared to existing approaches. Overall, these results raise the question of whether we need a paradigm shift away from limited real test data towards synthetic test data.", "keywords": "model evaluation;tabular;synthetic data", "primary_area": "", "supplementary_material": "/attachment/79aefac4e03ac65751f17640f4caa16a849dcb1e.pdf", "author": "Boris van Breugel;Nabeel Seedat;Fergus Imrie;Mihaela van der Schaar", "authorids": "~Boris_van_Breugel2;~Nabeel_Seedat1;~Fergus_Imrie1;~Mihaela_van_der_Schaar2", "gender": ";;;F", "homepage": ";;;https://www.vanderschaar-lab.com", "dblp": "284/0835;227/8368;281/4466;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;4qCGgpsAAAAJ;DZ3S--MAAAAJ", "orcid": ";;0000-0002-6241-0123;", "linkedin": ";nabeel-seedat/;;", "or_profile": "~Boris_van_Breugel2;~Nabeel_Seedat1;~Fergus_Imrie1;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;University of Cambridge;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "cam.ac.uk;cam.ac.uk;ucla.edu;ucla.edu", "position": "PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nbreugel2023can,\ntitle={Can You Rely on Your Model Evaluation? Improving Model Evaluation with Synthetic Test Data},\nauthor={Boris van Breugel and Nabeel Seedat and Fergus Imrie and Mihaela van der Schaar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tJ88RBqupo}\n}", "github": "", "project": "", "reviewers": "6cRw;vjS3;zuki;DA3a;fN4G", "pdf_size": 1316887, "rating": "5;6;6;6;7", "confidence": "3;4;4;3;3", "soundness": "2;3;4;3;4", "novelty": "3;3;3;3;3", "presentation": "2;3;4;3;4", "wc_summary": "49;50;54;30;63", "wc_strengths": "35;127;26;38;67", "wc_weaknesses": "112;225;30;134;8", "wc_questions": "127;49;77;4;2", "wc_limitations": "4;22;1;4;9", "wc_review": "327;473;188;210;149", "wc_reply_reviewers": "74;166;10;20;3", "wc_reply_authors": "117;57;10;35;14", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 49.2, 10.796295661012623 ], "wc_strengths_avg": [ 58.6, 36.85973412817841 ], "wc_weaknesses_avg": [ 101.8, 77.81105319940092 ], "wc_questions_avg": [ 51.8, 47.03785709404713 ], "wc_limitations_avg": [ 8.0, 7.4565407529228995 ], "wc_review_avg": [ 269.4, 117.83140498186381 ], "wc_reply_reviewers_avg": [ 54.6, 61.04948812234219 ], "wc_reply_authors_avg": [ 46.6, 38.99025519280427 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13475246115015546214&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;cam.ac.uk;ucla.edu;ucla.edu", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;UCLA", "aff_campus_unique_index": "0;0;1;1", "aff_campus_unique": "Cambridge;Los Angeles", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Offline RL with Discrete Proxy Representations for Generalizability in POMDPs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70209", "id": "tJN664ZNVG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ce1c1ff5d94079dea348a2317a889281-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tJN664ZNVG", "openreview": "https://openreview.net/forum?id=tJN664ZNVG", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70209", "video": "https://nips.cc/virtual/2023/poster/70209", "author_site": "Pengjie Gu, Xinyu Cai, Dong Xing, Xinrun Wang, Mengchen Zhao, Mengchen Zhao, Bo An", "tldr": "", "abstract": "Offline Reinforcement Learning (RL) has demonstrated promising results in various applications by learning policies from previously collected datasets, reducing the need for online exploration and interactions. However, real-world scenarios usually involve partial observability, which brings crucial challenges of the deployment of offline RL methods: i) the policy trained on data with full observability is not robust against the masked observations during execution, and ii) the information of which parts of observations are masked is usually unknown during training. In order to address these challenges, we present Offline RL with DiscrEte pRoxy representations (ORDER), a probabilistic framework which leverages novel state representations to improve the robustness against diverse masked observabilities. Specifically, we propose a discrete representation of the states and use a proxy representation to recover the states from masked partial observable trajectories. The training of ORDER can be compactly described as the following three steps. i) Learning the discrete state representations on data with full observations, ii) Training the decision module based on the discrete representations, and iii) Training the proxy discrete representations on the data with various partial observations, aligning with the discrete representations. We conduct extensive experiments to evaluate ORDER, showcasing its effectiveness in offline RL for diverse partially observable scenarios and highlighting the significance of discrete proxy representations in generalization performance.\nORDER is a flexible framework to employ any offline RL algorithms and we hope that ORDER can pave the way for the deployment of RL policy against various partial observabilities in the real world.", "keywords": "Offline RL;POMDP", "primary_area": "", "supplementary_material": "/attachment/0900a58daf7fbcdabe64be533b5f26501a3da314.pdf", "author": "Pengjie Gu;Xinyu Cai;Dong Xing;Xinrun Wang;Mengchen Zhao;Bo An", "authorids": "~Pengjie_Gu1;~Xinyu_Cai1;~Dong_Xing1;~Xinrun_Wang1;~Mengchen_Zhao1;~Bo_An2", "gender": "M;M;M;M;M;M", "homepage": ";;;https://rainwangphy.github.io/;https://batmanzzmc.github.io/;https://personal.ntu.edu.sg/boan/", "dblp": "226/1222;;https://dblp.uni-trier.de/pid/116/8390;199/6413;178/8719;42/6178-1.html", "google_scholar": ";;S_IrSW8AAAAJ;ROANfPUAAAAJ;nLgORGMAAAAJ;PEEpuNwAAAAJ", "orcid": ";;;;;0000-0002-7064-7438", "linkedin": ";xinyu-cai-241223147/;;;;", "or_profile": "~Pengjie_Gu1;~Xinyu_Cai1;~Dong_Xing1;~Xinrun_Wang1;~Mengchen_Zhao1;~Bo_An2", "aff": "Nanyang Technological University;Nanyang Technological University;Zhejiang University;Nanyang Technological University;Huawei Noah's Ark Lab;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;zju.edu.cn;ntu.edu.sg;huawei.com;ntu.edu.sg", "position": "PhD student;PhD student;PhD student;Postdoc;Research Scientist;Full Professor", "bibtex": "@inproceedings{\ngu2023offline,\ntitle={Offline {RL} with Discrete Proxy Representations for Generalizability in {POMDP}s},\nauthor={Pengjie Gu and Xinyu Cai and Dong Xing and Xinrun Wang and Mengchen Zhao and Bo An},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tJN664ZNVG}\n}", "github": "", "project": "", "reviewers": "AAho;D69B;iLBR;iZED", "pdf_size": 12852001, "rating": "4;6;6;7", "confidence": "4;4;3;3", "soundness": "2;3;3;2", "novelty": "1;3;3;3", "presentation": "2;3;3;3", "wc_summary": "154;152;49;159", "wc_strengths": "60;152;47;48", "wc_weaknesses": "399;357;23;16", "wc_questions": "769;165;183;117", "wc_limitations": "5;73;28;35", "wc_review": "1387;899;330;375", "wc_reply_reviewers": "641;81;34;83", "wc_reply_authors": "2468;1220;144;211", "reply_reviewers": "3;2;1;2", "reply_authors": "6;5;2;3", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 128.5, 45.97009897748753 ], "wc_strengths_avg": [ 76.75, 43.74571407578119 ], "wc_weaknesses_avg": [ 198.75, 179.88103707728615 ], "wc_questions_avg": [ 308.5, 266.9620759583653 ], "wc_limitations_avg": [ 35.25, 24.457871943405053 ], "wc_review_avg": [ 747.75, 431.5595990126972 ], "wc_reply_reviewers_avg": [ 209.75, 249.75325323206502 ], "wc_reply_authors_avg": [ 1010.75, 943.1620685226903 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 4.0, 1.5811388300841898 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Yqi9O4OZ1kkJ:scholar.google.com/&scioq=Offline+RL+with+Discrete+Proxy+Representations+for+Generalizability+in+POMDPs&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "ntu.edu.sg;ntu.edu.sg;zju.edu.cn;ntu.edu.sg;huawei.com;ntu.edu.sg", "author_num": 6, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "Nanyang Technological University;Zhejiang University;Huawei", "aff_unique_dep": ";;Noah's Ark Lab", "aff_unique_url": "https://www.ntu.edu.sg;https://www.zju.edu.cn;https://www.huawei.com", "aff_unique_abbr": "NTU;ZJU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1;0", "aff_country_unique": "Singapore;China" }, { "title": "Parallel-mentoring for Offline Model-based Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70208", "id": "tJwyg9Zg9G", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f189e7580acad0fc7fd45405817ddee3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tJwyg9Zg9G", "openreview": "https://openreview.net/forum?id=tJwyg9Zg9G", "poster": "/media/PosterPDFs/NeurIPS%202023/70208.png?t=1697337417.2819066", "slides": "https://nips.cc/virtual/2023/poster/70208", "video": "https://nips.cc/virtual/2023/poster/70208", "author_site": "Can (Sam) Chen, Christopher Beckham, Zixuan Liu, Xue (Steve) Liu, Chris Pal", "tldr": "", "abstract": "We study offline model-based optimization to maximize a black-box objective function with a static dataset of designs and scores. These designs encompass a variety of domains, including materials, robots, DNA sequences, and proteins. A common approach trains a proxy on the static dataset and performs gradient ascent to obtain new designs. However, this often results in poor designs due to the proxy inaccuracies for out-of-distribution designs. Recent studies indicate that (a) gradient ascent with a mean ensemble of proxies generally outperforms simple gradient ascent, and (b) a trained proxy provides weak ranking supervision signals for design selection. Motivated by (a) and (b), we propose $\\textit{parallel-mentoring}$ as an effective and novel method that facilitates mentoring among proxies, creating a more robust ensemble to mitigate the out-of-distribution issue. We focus on the three-proxy case in the main paper and our method consists of two modules. The first module, $\\textit{voting-based pairwise supervision}$, operates on three parallel proxies and captures their ranking supervision signals as pairwise comparison labels. These labels are combined through majority voting to generate consensus labels, which incorporates ranking supervision signals from all proxies and enables mutual mentoring. Yet, label noise arises due to possible incorrect consensus. To alleviate this, we introduce an $\\textit{adaptive soft-labeling}$ module with soft-labels initialized as consensus labels. Based on bi-level optimization, this module fine-tunes proxies in the inner level and learns more accurate labels in the outer level to adaptively mentor proxies, resulting in a more robust ensemble. Experiments validate the effectiveness of our method. Our code is available here.", "keywords": "offline model-based optimization;bi-level optimization", "primary_area": "", "supplementary_material": "/attachment/0adeb2f51d36d4754ce48e0218d8c3439de1b380.zip", "author": "Can Chen;Christopher Beckham;Zixuan Liu;Xue Liu;Christopher Pal", "authorids": "~Can_Chen3;~Christopher_Beckham1;~Zixuan_Liu1;~Xue_Liu1;~Christopher_Pal1", "gender": "Not Specified;M;M;M;", "homepage": ";;;http://www.cs.mcgill.ca/~xueliu/;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ&hl=en&oi=ao", "dblp": "370/4546.html;192/1312;;l/XueLiu;45/1217", "google_scholar": ";;yjKOHbEAAAAJ;https://scholar.google.com.tw/citations?user=rfLIRakAAAAJ;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ", "orcid": ";;;;", "linkedin": "can-chen-018851202/;;;;", "or_profile": "~Can_Chen3;~Christopher_Beckham1;~Zixuan_Liu1;~Xue_Liu1;~Christopher_Pal1", "aff": "Mila - Quebec AI Institute;Polytechnique Montreal;University of Washington;McGill University;Polytechnique Montreal", "aff_domain": "mila.quebec;polymtl.ca;uw.edu;mcgill.ca;polymtl.ca", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023parallelmentoring,\ntitle={Parallel-mentoring for Offline Model-based Optimization},\nauthor={Can Chen and Christopher Beckham and Zixuan Liu and Xue Liu and Christopher Pal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tJwyg9Zg9G}\n}", "github": "", "project": "", "reviewers": "VCj9;CizP;NEb9;usfZ", "pdf_size": 1067251, "rating": "5;5;7;7", "confidence": "2;4;5;4", "soundness": "2;2;3;4", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "94;65;86;117", "wc_strengths": "34;27;65;157", "wc_weaknesses": "52;61;166;188", "wc_questions": "16;6;102;116", "wc_limitations": "7;1;12;13", "wc_review": "203;160;431;591", "wc_reply_reviewers": "28;21;36;15", "wc_reply_authors": "163;42;40;31", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 90.5, 18.607794065928395 ], "wc_strengths_avg": [ 70.75, 51.80914494565607 ], "wc_weaknesses_avg": [ 116.75, 60.833276255680985 ], "wc_questions_avg": [ 60.0, 49.37610758251404 ], "wc_limitations_avg": [ 8.25, 4.763139720814412 ], "wc_review_avg": [ 346.25, 174.85333139520105 ], "wc_reply_reviewers_avg": [ 25.0, 7.842193570679061 ], "wc_reply_authors_avg": [ 69.0, 54.4288526426931 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=517094128321019075&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mila.quebec;polymtl.ca;uw.edu;mcgill.ca;polymtl.ca", "author_num": 5, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "Quebec AI Institute;Polytechnique Montreal;University of Washington;McGill University", "aff_unique_dep": "AI Institute;;;", "aff_unique_url": "https://mila.quebec;https://www.polymtl.ca;https://www.washington.edu;https://www.mcgill.ca", "aff_unique_abbr": "Mila;PolyMTL;UW;McGill", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Canada;United States" }, { "title": "Emergent Communication in Interactive Sketch Question Answering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70207", "id": "tLEDsaKuDh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/746cf1bc2337700f7f0c35c7b02638cc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tLEDsaKuDh", "openreview": "https://openreview.net/forum?id=tLEDsaKuDh", "poster": "/media/PosterPDFs/NeurIPS%202023/70207.png?t=1702032486.6006432", "slides": "https://nips.cc/virtual/2023/poster/70207", "video": "https://nips.cc/virtual/2023/poster/70207", "author_site": "Zixing Lei, Yiming Zhang, Yuxin Xiong, Siheng Chen", "tldr": "", "abstract": "Vision-based emergent communication (EC) aims to learn to communicate through sketches and demystify the evolution of human communication. Ironically, previous works neglect multi-round interaction, which is indispensable in human communication. To fill this gap, we first introduce a novel Interactive Sketch Question Answering (ISQA) task, where two collaborative players are interacting through sketches to answer a question about an image. To accomplish this task, we design a new and efficient interactive EC system, which can achieve an effective balance among three evaluation factors, including the question answering accuracy, drawing complexity and human interpretability. Our experimental results demonstrate that multi-round interactive mechanism facilitates tar- geted and efficient communication between intelligent agents. The code will be released.", "keywords": "Emergent communication;Interactive;Question Answering", "primary_area": "", "supplementary_material": "", "author": "Zixing Lei;Yiming Zhang;Yuxin Xiong;Siheng Chen", "authorids": "~Zixing_Lei1;~Yiming_Zhang11;~Yuxin_Xiong1;~Siheng_Chen1", "gender": "M;M;F;M", "homepage": "https://chezacar.github.io;;;https://siheng-chen.github.io/", "dblp": "324/4844;;357/5881.html;136/4945", "google_scholar": "4r3VI9EAAAAJ;https://scholar.google.com/citations?hl=en;T_v5hCYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";;yuxin-xiong-b325652b5/;", "or_profile": "~Zixing_Lei1;~Yiming_Zhang11;~Yuxin_Xiong1;~Siheng_Chen2", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "MS student;Undergrad student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nlei2023emergent,\ntitle={Emergent Communication in Interactive Sketch Question Answering},\nauthor={Zixing Lei and Yiming Zhang and Yuxin Xiong and Siheng Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tLEDsaKuDh}\n}", "github": "", "project": "", "reviewers": "APpT;ATWv;jxDd;NLdK", "pdf_size": 5041505, "rating": "4;4;6;7", "confidence": "3;4;4;4", "soundness": "1;3;3;3", "novelty": "1;2;3;3", "presentation": "3;3;3;3", "wc_summary": "67;91;99;82", "wc_strengths": "25;107;70;113", "wc_weaknesses": "182;642;101;155", "wc_questions": "6;1;14;127", "wc_limitations": "10;1;34;4", "wc_review": "290;842;318;481", "wc_reply_reviewers": "216;0;36;4", "wc_reply_authors": "768;0;32;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 84.75, 11.882234638316145 ], "wc_strengths_avg": [ 78.75, 35.13100482479828 ], "wc_weaknesses_avg": [ 270.0, 216.74524216231367 ], "wc_questions_avg": [ 37.0, 52.16799785308997 ], "wc_limitations_avg": [ 12.25, 12.968712349342937 ], "wc_review_avg": [ 482.75, 219.86288340690885 ], "wc_reply_reviewers_avg": [ 64.0, 88.85943956609225 ], "wc_reply_authors_avg": [ 200.0, 328.19506394825623 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18430789387180851451&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Not All Neuro-Symbolic Concepts Are Created Equal: Analysis and Mitigation of Reasoning Shortcuts", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70206", "id": "tLTtqySDFb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e560202b6e779a82478edb46c6f8f4dd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tLTtqySDFb", "openreview": "https://openreview.net/forum?id=tLTtqySDFb", "poster": "/media/PosterPDFs/NeurIPS%202023/70206.png?t=1702356558.2846081", "slides": "https://nips.cc/virtual/2023/poster/70206", "video": "https://nips.cc/virtual/2023/poster/70206", "author_site": "Emanuele Marconato, Stefano Teso, Antonio Vergari, Andrea Passerini", "tldr": "", "abstract": "Neuro-Symbolic (NeSy) predictive models hold the promise of improved compliance with given constraints, systematic generalization, and interpretability, as they allow to infer labels that are consistent with some prior knowledge by reasoning over high-level concepts extracted from sub-symbolic inputs. It was recently shown that NeSy predictors are affected by *reasoning shortcuts*: they can attain high accuracy but by leveraging concepts with \\textit{unintended semantics}, thus coming short of their promised advantages. Yet, a systematic characterization of reasoning shortcuts and of potential mitigation strategies is missing. This work fills this gap by characterizing them as unintended optima of the learning objective and identifying four key conditions behind their occurrence. Based on this, we derive several natural mitigation strategies, and analyze their efficacy both theoretically and empirically. Our analysis shows reasoning shortcuts are difficult to deal with, casting doubts on the trustworthiness and interpretability of existing NeSy solutions.", "keywords": "Neuro-Symbolic Integration;Trustworthy AI;Concept Learning;Learning Shortcuts;Mitigation Strategies", "primary_area": "", "supplementary_material": "/attachment/e6dd3baf0964c783920d56f14f11d57a623a8d4f.pdf", "author": "Emanuele Marconato;Stefano Teso;Antonio Vergari;Andrea Passerini", "authorids": "~Emanuele_Marconato1;~Stefano_Teso1;~Antonio_Vergari3;~Andrea_Passerini2", "gender": ";;M;M", "homepage": ";https://stefanoteso.github.io/;http://disi.unitn.it/~passerini/;http://nolovedeeplearning.com", "dblp": "321/3331;78/8359;00/6186;http://dblp.uni-trier.de/pers/hd/v/Vergari:Antonio", "google_scholar": "H0gXWAgAAAAJ;uaQCyXkAAAAJ;https://scholar.google.it/citations?user=IIXgkLoAAAAJ;YK0NLaUAAAAJ", "orcid": ";;0000-0002-2765-5395;0000-0003-0036-5678", "linkedin": "emanuele-marconato-108449195;;;", "or_profile": "~Emanuele_Marconato1;~Stefano_Teso1;~Andrea_Passerini2;~antonio_vergari2", "aff": "University of Pisa;University of Trento;University of Trento;University of Edinburgh", "aff_domain": "unipi.it;unitn.it;unitn.it;ed.ac.uk", "position": "PhD student;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nmarconato2023not,\ntitle={Not All Neuro-Symbolic Concepts Are Created Equal: Analysis and Mitigation of Reasoning Shortcuts},\nauthor={Emanuele Marconato and Stefano Teso and Antonio Vergari and Andrea Passerini},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tLTtqySDFb}\n}", "github": "", "project": "", "reviewers": "szNE;p1H9;Qpe9;yCW5", "pdf_size": 1274491, "rating": "6;7;7;7", "confidence": "2;4;3;4", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "3;3;4;4", "wc_summary": "213;55;49;52", "wc_strengths": "149;45;73;34", "wc_weaknesses": "512;72;380;332", "wc_questions": "3;249;34;2", "wc_limitations": "7;17;23;2", "wc_review": "884;438;559;422", "wc_reply_reviewers": "13;65;286;12", "wc_reply_authors": "0;0;351;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 92.25, 69.74731177615378 ], "wc_strengths_avg": [ 75.25, 44.890839822841365 ], "wc_weaknesses_avg": [ 324.0, 159.72476326481126 ], "wc_questions_avg": [ 72.0, 102.99757278693512 ], "wc_limitations_avg": [ 12.25, 8.227241335952167 ], "wc_review_avg": [ 575.75, 185.6830296499925 ], "wc_reply_reviewers_avg": [ 94.0, 112.90482717758351 ], "wc_reply_authors_avg": [ 87.75, 151.987458364169 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9727719809819947459&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "unipi.it;unitn.it;unitn.it;ed.ac.uk", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of Pisa;University of Trento;University of Edinburgh", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unipi.it;https://www.unitn.it;https://www.ed.ac.uk", "aff_unique_abbr": "UNIP;UniTN;Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Italy;United Kingdom" }, { "title": "Optimistic Active Exploration of Dynamical Systems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70205", "id": "tLrkjK128n", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/77b5aaf2826c95c98e5eb4ab830073de-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tLrkjK128n", "openreview": "https://openreview.net/forum?id=tLrkjK128n", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70205", "video": "https://nips.cc/virtual/2023/poster/70205", "author_site": "Bhavya, Lenart Treven, Cansu Sancaktar, Sebastian Blaes, Stelian Coros, Andreas Krause", "tldr": "", "abstract": "Reinforcement learning algorithms commonly seek to optimize policies for solving one particular task. How should we explore an unknown dynamical system such that the estimated model allows us to solve multiple downstream tasks in a zero-shot manner? \nIn this paper, we address this challenge, by developing an algorithm -- OPAX -- for active exploration. OPAX uses well-calibrated probabilistic models to quantify the epistemic uncertainty about the unknown dynamics. It optimistically---w.r.t. to plausible dynamics---maximizes the information gain between the unknown dynamics and state observations. We show how the resulting optimization problem can be reduced to an optimal control problem that can be solved at each episode using standard approaches. We analyze our algorithm for general models, and, in the case of Gaussian process dynamics, we give a sample complexity bound and\nshow that the epistemic uncertainty converges to zero. In our experiments, we compare OPAX with other heuristic active exploration approaches on several environments. Our experiments show that OPAX is not only theoretically sound but also performs well for zero-shot planning on novel downstream tasks.", "keywords": "Active Exploration;Reinforcement Learning;Dynamical Systems", "primary_area": "", "supplementary_material": "", "author": "Bhavya Sukhija;Lenart Treven;Cansu Sancaktar;Sebastian Blaes;Stelian Coros;Andreas Krause", "authorids": "~Bhavya_Sukhija1;~Lenart_Treven1;~Cansu_Sancaktar1;~Sebastian_Blaes1;~Stelian_Coros1;~Andreas_Krause1", "gender": "M;M;F;M;M;M", "homepage": ";;https://csancaktar.github.io;https://sblaes.com;http://crl.ethz.ch/index.html;https://las.inf.ethz.ch/krausea", "dblp": "312/4742;267/9666;256/5345;163/8117;;87/1831-1.html", "google_scholar": ";CDnzTWkAAAAJ;9JqNY7UAAAAJ;https://scholar.google.de/citations?user=ftV9OHMAAAAJ;sX31JjwAAAAJ;https://scholar.google.ch/citations?user=eDHv58AAAAAJ", "orcid": "0000-0001-6238-9734;;;;;0000-0001-7260-9673", "linkedin": ";lenart-treven/;cansu-sancaktar-61715b140/;sebastian-blaes/;;krausea/", "or_profile": "~Bhavya_Sukhija1;~Lenart_Treven1;~Cansu_Sancaktar1;~Sebastian_Blaes1;~Stelian_Coros1;~Andreas_Krause1", "aff": "ETHZ - ETH Zurich;Swiss Federal Institute of Technology;Max Planck Institute for Intelligent Systems, Max-Planck Institute;Max Planck Institute for Intelligent Systems, Max Planck Institute for Intelligent Systems;ETHZ - ETH Zurich;ETH Zurich", "aff_domain": "ethz.ch;ethz.ch;tuebingen.mpg.de;is.tue.mpg.de;ethz.ch;ethz.ch", "position": "PhD student;PhD student;PhD student;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsukhija2023optimistic,\ntitle={Optimistic Active Exploration of Dynamical Systems},\nauthor={Bhavya Sukhija and Lenart Treven and Cansu Sancaktar and Sebastian Blaes and Stelian Coros and Andreas Krause},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tLrkjK128n}\n}", "github": "", "project": "", "reviewers": "Y1JX;EYiX;zJUZ;V3Yd;meUw;kKQP", "pdf_size": 1579315, "rating": "5;5;5;6;6;6", "confidence": "3;2;3;4;3;4", "soundness": "3;3;3;3;3;3", "novelty": "2;2;3;2;3;2", "presentation": "3;4;3;3;3;2", "wc_summary": "110;62;41;32;65;33", "wc_strengths": "61;64;35;64;70;78", "wc_weaknesses": "78;64;252;174;126;395", "wc_questions": "580;41;53;5;176;95", "wc_limitations": "19;19;8;21;1;1", "wc_review": "848;250;389;296;438;602", "wc_reply_reviewers": "1565;0;0;445;131;88", "wc_reply_authors": "2547;0;0;1219;389;149", "reply_reviewers": "5;0;0;5;1;1", "reply_authors": "7;1;1;5;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.1666666666666665, 0.6871842709362768 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 57.166666666666664, 26.940778673890545 ], "wc_strengths_avg": [ 62.0, 13.279056191361393 ], "wc_weaknesses_avg": [ 181.5, 114.16909973076487 ], "wc_questions_avg": [ 158.33333333333334, 196.00141722843628 ], "wc_limitations_avg": [ 11.5, 8.51958527942137 ], "wc_review_avg": [ 470.5, 202.717989663802 ], "wc_reply_reviewers_avg": [ 371.5, 554.3887775439423 ], "wc_reply_authors_avg": [ 717.3333333333334, 918.2673297514667 ], "reply_reviewers_avg": [ 2.0, 2.160246899469287 ], "reply_authors_avg": [ 3.1666666666666665, 2.1921577396609844 ], "replies_avg": [ 41, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7276068751089989, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2679558924836280937&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "ethz.ch;ethz.ch;tuebingen.mpg.de;is.tue.mpg.de;ethz.ch;ethz.ch", "author_num": 6, "aff_unique_index": "0;1;2;2;0;0", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology;Max Planck Institute for Intelligent Systems", "aff_unique_dep": ";;Intelligent Systems", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch;https://www.mpi-is.mpg.de", "aff_unique_abbr": "ETHZ;ETH Zurich;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "Switzerland;Germany" }, { "title": "Multimodal C4: An Open, Billion-scale Corpus of Images Interleaved with Text", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73443", "id": "tOd8rSjcWz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1c6bed78d3813886d3d72595dbecb80b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=tOd8rSjcWz", "openreview": "https://openreview.net/forum?id=tOd8rSjcWz", "poster": "/media/PosterPDFs/NeurIPS%202023/73443.png?t=1701689122.9882128", "slides": "https://nips.cc/virtual/2023/poster/73443", "video": "https://nips.cc/virtual/2023/poster/73443", "author_site": "Wanrong Zhu, Jack Hessel, Anas Awadalla, Samir Yitzhak Gadre, Jesse Dodge, Alex Fang, Youngjae Yu, Ludwig Schmidt, William Yang Wang, Yejin Choi", "tldr": "", "abstract": "In-context vision and language models like Flamingo support arbitrarily interleaved sequences of images and text as input.\nThis format not only enables few-shot learning via interleaving independent supervised (image, text) examples, but also, more complex prompts involving interaction between images, e.g., ``What do image A and image B have in common?''\nTo support this interface, pretraining occurs over web corpora that similarly contain interleaved images+text.\nTo date, however, large-scale data of this form have not been publicly available.\n\n\nWe release Multimodal C4, an augmentation of the popular text-only C4 corpus with images interleaved.\nWe use a linear assignment algorithm to place images into longer bodies of text using CLIP features, a process that we show outperforms alternatives.\nMultimodal C4 spans everyday topics like cooking, travel, technology, etc. A manual inspection of a random sample of documents shows that a vast majority (88\\%) of images are topically relevant, and that linear assignment frequently selects individual sentences specifically well-aligned with each image (80\\%). \nAfter filtering NSFW images, ads, etc., the resulting corpus consists of 101.2M documents with 571M images interleaved in 43B English tokens.", "keywords": "Multimodal dataset;vision-and-language;interleaved text and image sequence", "primary_area": "", "supplementary_material": "/attachment/ec4854da0295fac5aa3e12a19c545daf7f6cf574.pdf", "author": "Wanrong Zhu;Jack Hessel;Anas Awadalla;Samir Yitzhak Gadre;Jesse Dodge;Alex Fang;Youngjae Yu;Ludwig Schmidt;William Yang Wang;Yejin Choi", "authorids": "~Wanrong_Zhu1;~Jack_Hessel1;~Anas_Awadalla1;~Samir_Yitzhak_Gadre1;~Jesse_Dodge1;~Alex_Fang1;~Youngjae_Yu1;~Ludwig_Schmidt1;~William_Yang_Wang2;~Yejin_Choi1", "gender": ";M;M;M;M;;M;M;;F", "homepage": ";https://www.jmhessel.com;https://github.com/anas-awadalla;https://sagadre.github.io/;http://www.cs.cmu.edu/~jessed/;;https://yj-yu.github.io/home/;http://people.csail.mit.edu/ludwigs/;;https://yejinc.github.io/", "dblp": ";https://dblp.uni-trier.de/pid/132/5250.html;;246/7901;49/11425;260/0449;188/6210;141/2720;;89/579-1", "google_scholar": ";SxQQ1msAAAAJ;https://scholar.google.com/citations?hl=en;oAhlg9gAAAAJ;nHy_1doAAAAJ;;https://scholar.google.co.kr/citations?user=WDO24ZYAAAAJ;SWMKy70AAAAJ;;vhP-tlcAAAAJ", "orcid": ";0000-0002-4012-8979;;;;;;;;", "linkedin": ";;;;;alex-fang-8a11a8115/;;ludwig-schmidt-87ba3612/;;", "or_profile": "~Wanrong_Zhu1;~Jack_Hessel1;~Anas_Awadalla1;~Samir_Yitzhak_Gadre1;~Jesse_Dodge1;~Alex_Fang1;~Youngjae_Yu1;~Ludwig_Schmidt1;~William_Yang_Wang2;~Yejin_Choi1", "aff": ";Allen Institute for Artificial Intelligence;Department of Computer Science, University of Washington;Columbia University;Allen Institute for Artificial Intelligence;Department of Computer Science, University of Washington;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;;Department of Computer Science, University of Washington", "aff_domain": ";allenai.org;cs.washington.edu;columbia.edu;allenai.org;cs.washington.edu;allenai.org;allenai.org;;cs.washington.edu", "position": ";Researcher;Undergrad student;PhD student;Researcher;PhD student;Postdoc;Researcher;;Full Professor", "bibtex": "@inproceedings{\nzhu2023multimodal,\ntitle={Multimodal C4: An Open, Billion-scale Corpus of Images Interleaved with Text},\nauthor={Wanrong Zhu and Jack Hessel and Anas Awadalla and Samir Yitzhak Gadre and Jesse Dodge and Alex Fang and Youngjae Yu and Ludwig Schmidt and William Yang Wang and Yejin Choi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=tOd8rSjcWz}\n}", "github": "", "project": "", "reviewers": "y4Co;VvxB;GHE4;eWRn;5HBh", "pdf_size": 3043115, "rating": "7;7;7;8;10", "confidence": "4;4;3;4;4", "wc_summary_and_contributions": "65;84;52;57;93", "wc_strengths": "71;50;82;67;10", "wc_improvement": "62;122;34;82;47", "wc_limitations": "1;62;129;40;22", "wc_correctness": "3;19;1;15;1", "wc_clarity": "9;167;1;7;1", "wc_relation_to_prior_work": "1;1;1;8;1", "wc_documentation": "1;19;1;8;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "214;525;302;285;177", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "324;630;93;474;172", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.8, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 70.2, 15.765785740013087 ], "wc_strengths_avg": [ 56.0, 25.19523764523764 ], "wc_improvement_avg": [ 69.4, 30.774015012669373 ], "wc_limitations_avg": [ 50.8, 43.96998976574819 ], "wc_correctness_avg": [ 7.8, 7.652450587883596 ], "wc_clarity_avg": [ 37.0, 65.07841424005352 ], "wc_relation_to_prior_work_avg": [ 2.4, 2.8 ], "wc_documentation_avg": [ 6.0, 7.042726744663604 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 300.6, 121.14223045660007 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 338.6, 195.7218434411448 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.3429971702850177, "gs_citation": 176, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15474863726827064995&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";allenai.org;cs.washington.edu;columbia.edu;allenai.org;cs.washington.edu;allenai.org;allenai.org;;cs.washington.edu", "author_num": 10, "aff_unique_index": "0;1;2;0;1;0;0;1", "aff_unique_norm": "Allen Institute for Artificial Intelligence;University of Washington;Columbia University", "aff_unique_dep": ";Department of Computer Science;", "aff_unique_url": "https://allenai.org;https://www.washington.edu;https://www.columbia.edu", "aff_unique_abbr": "AI2;UW;Columbia", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Non-Stationary Bandits with Auto-Regressive Temporal Dependency", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70204", "id": "tP50lLiZIo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/186a213d720568b31f9b59c085a23e5a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tP50lLiZIo", "openreview": "https://openreview.net/forum?id=tP50lLiZIo", "poster": "/media/PosterPDFs/NeurIPS%202023/70204.png?t=1701742798.404951", "slides": "https://nips.cc/virtual/2023/poster/70204", "video": "https://nips.cc/virtual/2023/poster/70204", "author_site": "Qinyi Chen, Negin Golrezaei, Djallel Bouneffouf", "tldr": "", "abstract": "Traditional multi-armed bandit (MAB) frameworks, predominantly examined under stochastic or adversarial settings, often overlook the temporal dynamics inherent in many real-world applications such as recommendation systems and online advertising. This paper introduces a novel non-stationary MAB framework that captures the temporal structure of these real-world dynamics through an auto-regressive (AR) reward structure. We propose an algorithm that integrates two key mechanisms: (i) an alternation mechanism adept at leveraging temporal dependencies to dynamically balance exploration and exploitation, and (ii) a restarting mechanism designed to discard out-of-date information. Our algorithm achieves a regret upper bound that nearly matches the lower bound, with regret measured against a robust dynamic benchmark. Finally, via a real-world case study on tourism demand prediction, we demonstrate both the efficacy of our algorithm and the broader applicability of our techniques to more complex, rapidly evolving time series.", "keywords": "non-stationary bandits; autoregressive model; low-regret policy; online learning algorithms", "primary_area": "", "supplementary_material": "/attachment/810b6d7f71e9dce9108d6ca20d9b3ebc1c594b38.zip", "author": "Qinyi Chen;Negin Golrezaei;Djallel Bouneffouf", "authorids": "~Qinyi_Chen1;~Negin_Golrezaei1;~Djallel_Bouneffouf2", "gender": ";F;M", "homepage": ";https://www.mit.edu/~golrezae/;", "dblp": ";37/10099.html;45/11240-1", "google_scholar": "lY2VAB0AAAAJ;k9uWzAIAAAAJ;", "orcid": "0000-0002-2912-2728;;", "linkedin": "qinyi-chen-4735aa112/;;", "or_profile": "~Qinyi_Chen1;~Negin_Golrezaei1;~Djallel_Bouneffouf2", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;", "aff_domain": "mit.edu;mit.edu;", "position": "PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nchen2023nonstationary,\ntitle={Non-Stationary Bandits with Auto-Regressive Temporal Dependency},\nauthor={Qinyi Chen and Negin Golrezaei and Djallel Bouneffouf},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tP50lLiZIo}\n}", "github": "", "project": "", "reviewers": "jsRP;6t1b;s53A;UBzK", "pdf_size": 649073, "rating": "3;5;6;7", "confidence": "3;3;2;4", "soundness": "3;3;2;4", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "159;44;17;71", "wc_strengths": "25;71;18;28", "wc_weaknesses": "307;193;34;48", "wc_questions": "119;36;75;158", "wc_limitations": "1;5;2;18", "wc_review": "611;349;146;323", "wc_reply_reviewers": "575;190;7;4", "wc_reply_authors": "969;215;0;0", "reply_reviewers": "2;2;1;1", "reply_authors": "3;3;1;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.75, 53.33092442476504 ], "wc_strengths_avg": [ 35.5, 20.81465829649865 ], "wc_weaknesses_avg": [ 145.5, 112.11266654575655 ], "wc_questions_avg": [ 97.0, 45.8530260724415 ], "wc_limitations_avg": [ 6.5, 6.800735254367722 ], "wc_review_avg": [ 357.25, 166.02465931300688 ], "wc_reply_reviewers_avg": [ 194.0, 232.51129004846194 ], "wc_reply_authors_avg": [ 296.0, 398.34721035799913 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.23904572186687872, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10174892690706258906&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "mit.edu;mit.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "D$^2$CSG: Unsupervised Learning of Compact CSG Trees with Dual Complements and Dropouts", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70203", "id": "tQYGjnxPOm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4732d425125832887f6c5a9675d49ead-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tQYGjnxPOm", "openreview": "https://openreview.net/forum?id=tQYGjnxPOm", "poster": "/media/PosterPDFs/NeurIPS%202023/70203.png?t=1701745738.7938082", "slides": "https://nips.cc/virtual/2023/poster/70203", "video": "https://nips.cc/virtual/2023/poster/70203", "author_site": "Fenggen Yu, Qimin Chen, Maham Tanveer, Ali Mahdavi Amiri, Hao Zhang", "tldr": "", "abstract": "We present D$^2$CSG, a neural model composed of two dual and complementary network branches, with dropouts, for unsupervised learning of compact constructive solid geometry (CSG) representations of 3D CAD shapes. Our network is trained to reconstruct a 3D shape by a fixed-order assembly of quadric primitives, with both branches producing a union of primitive intersections or inverses. A key difference between D$^2$CSG and all prior neural CSG models is its dedicated residual branch to assemble the potentially complex shape complement, which is subtracted from an overall shape modeled by the cover branch. With the shape complements, our network is provably general, while the weight dropout further improves compactness of the CSG tree by removing redundant primitives. We demonstrate both quantitatively and qualitatively that D$^2$CSG produces compact CSG reconstructions with superior quality and more natural primitives than all existing alternatives, especially over complex and high-genus CAD shapes.", "keywords": "3D reconstruction;constructive solid geometry;unsupervised learning;compact shape assembly", "primary_area": "", "supplementary_material": "/attachment/978b76bd37753c9d2a3ff520824331c3e6028945.pdf", "author": "Fenggen Yu;Qimin Chen;Maham Tanveer;Ali Mahdavi Amiri;Hao Zhang", "authorids": "~Fenggen_Yu1;~Qimin_Chen1;~Maham_Tanveer1;~Ali_Mahdavi_Amiri1;~Hao_Zhang25", "gender": "M;M;F;M;M", "homepage": "https://fenggenyu.github.io/;https://qiminchen.github.io/;http://mtanveer.com;https://www.sfu.ca/~amahdavi;https://www2.cs.sfu.ca/~haoz/", "dblp": "207/8046;271/7566;176/6740;33/10499.html;z/HaoZhang2", "google_scholar": "https://scholar.google.ca/citations?user=USxuT2QAAAAJ;N5MghGIAAAAJ;;https://scholar.google.ca/citations?user=M9eTADwAAAAJ;osTl-5IAAAAJ", "orcid": ";0009-0004-8447-0137;;;", "linkedin": ";;;;", "or_profile": "~Fenggen_Yu1;~Qimin_Chen1;~Maham_Tanveer1;~Ali_Mahdavi_Amiri1;~Hao_Richard_Zhang1", "aff": "Simon Fraser University;Simon Fraser University;Simon Fraser University;Simon Fraser University;Amazon", "aff_domain": "sfu.ca;sfu.ca;sfu.ca;sfu.ca;amazon.com", "position": "PhD student;PhD student;PhD student;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nyu2023dcsg,\ntitle={D\\${\\textasciicircum}2\\${CSG}: Unsupervised Learning of Compact {CSG} Trees with Dual Complements and Dropouts},\nauthor={Fenggen Yu and Qimin Chen and Maham Tanveer and Ali Mahdavi Amiri and Hao Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tQYGjnxPOm}\n}", "github": "", "project": "", "reviewers": "AcKp;DMnr;dmBa;T4uV;rfjZ", "pdf_size": 32171940, "rating": "6;6;6;7;8", "confidence": "4;5;2;5;3", "soundness": "3;4;4;4;4", "novelty": "3;3;3;3;4", "presentation": "3;3;2;3;4", "wc_summary": "150;108;82;145;112", "wc_strengths": "55;69;37;121;73", "wc_weaknesses": "55;136;33;705;142", "wc_questions": "74;44;58;349;96", "wc_limitations": "15;1;1;76;5", "wc_review": "349;358;211;1396;428", "wc_reply_reviewers": "4;18;0;27;30", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.8, 1.16619037896906 ], "soundness_avg": [ 3.8, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 119.4, 25.2 ], "wc_strengths_avg": [ 71.0, 28.0 ], "wc_weaknesses_avg": [ 214.2, 249.15489158352884 ], "wc_questions_avg": [ 124.2, 113.72317266063237 ], "wc_limitations_avg": [ 19.6, 28.66077458827657 ], "wc_review_avg": [ 548.4, 429.5982309088342 ], "wc_reply_reviewers_avg": [ 15.8, 12.006664815842907 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.08574929257125445, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6904002008512876892&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "sfu.ca;sfu.ca;sfu.ca;sfu.ca;amazon.com", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Simon Fraser University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.sfu.ca;https://www.amazon.com", "aff_unique_abbr": "SFU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Canada;United States" }, { "title": "Modeling Human Visual Motion Processing with Trainable Motion Energy Sensing and a Self-attention Network", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70202", "id": "tRKimbAk5D", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4c9477b9e2c7ec0ad3f4f15077aaf85a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tRKimbAk5D", "openreview": "https://openreview.net/forum?id=tRKimbAk5D", "poster": "/media/PosterPDFs/NeurIPS%202023/70202.png?t=1697169467.2769327", "slides": "https://nips.cc/virtual/2023/poster/70202", "video": "https://nips.cc/virtual/2023/poster/70202", "author_site": "Zitang Sun, Yen-Ju Chen, Yung-Hao Yang, Shin'ya Nishida", "tldr": "", "abstract": "Visual motion processing is essential for humans to perceive and interact with dynamic environments. Despite extensive research in cognitive neuroscience, image-computable models that can extract informative motion flow from natural scenes in a manner consistent with human visual processing have yet to be established. Meanwhile, recent advancements in computer vision (CV), propelled by deep learning, have led to significant progress in optical flow estimation, a task closely related to motion perception. Here we propose an image-computable model of human motion perception by bridging the gap between biological and CV models. Specifically, we introduce a novel two-stages approach that combines trainable motion energy sensing with a recurrent self-attention network for adaptive motion integration and segregation. This model architecture aims to capture the computations in V1-MT, the core structure for motion perception in the biological visual system, while providing the ability to derive informative motion flow for a wide range of stimuli, including complex natural scenes. In silico neurophysiology reveals that our model's unit responses are similar to mammalian neural recordings regarding motion pooling and speed tuning. The proposed model can also replicate human responses to a range of stimuli examined in past psychophysical studies. The experimental results on the Sintel benchmark demonstrate that our model predicts human responses better than the ground truth, whereas the state-of-the-art CV models show the opposite. Our study provides a computational architecture consistent with human visual motion processing, although the physiological correspondence may not be exact.", "keywords": "motion perception;optical flow estimation;attention mechanism;psychophysics;In silico neurophysiology;human vision", "primary_area": "", "supplementary_material": "/attachment/968b9f3ff9b3b1a32425c5ae10a150e2bc2d06d1.zip", "author": "Zitang Sun;Yen-Ju Chen;Yung-Hao Yang;Shin'ya Nishida", "authorids": "~Zitang_Sun1;~Yen-Ju_Chen2;~Yung-Hao_Yang1;~Shin'ya_Nishida1", "gender": "M;;;M", "homepage": "https://www.researchgate.net/profile/Zitang-Sun;;;https://researchmap.jp/shinyanishida/?lang=en", "dblp": "289/1020;;;83/1571", "google_scholar": ";;;https://scholar.google.co.jp/citations?user=bxhQU8EAAAAJ", "orcid": "0000-0003-2267-421X;;;0000-0002-5098-4752", "linkedin": ";;;", "or_profile": "~Zitang_Sun1;~Yen-Ju_Chen2;~Yung-Hao_Yang1;~Shin'ya_Nishida1", "aff": "Kyoto University;;;Kyoto University", "aff_domain": "kyoto-u.ac.jp;;;kyoto-u.ac.jp", "position": "PhD student;;;Full Professor", "bibtex": "@inproceedings{\nsun2023modeling,\ntitle={Modeling Human Visual Motion Processing with Trainable Motion Energy Sensing and a Self-attention Network},\nauthor={Zitang Sun and Yen-Ju Chen and Yung-Hao Yang and Shin'ya Nishida},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tRKimbAk5D}\n}", "github": "", "project": "", "reviewers": "n3Ed;tFBw;QXFT;rMjX;gk6R", "pdf_size": 14174411, "rating": "6;6;7;7;8", "confidence": "3;4;2;4;3", "soundness": "3;3;4;3;4", "novelty": "3;2;3;3;4", "presentation": "2;3;4;3;3", "wc_summary": "62;67;93;63;76", "wc_strengths": "109;34;45;103;191", "wc_weaknesses": "209;867;27;81;132", "wc_questions": "32;1;1;104;173", "wc_limitations": "6;1;4;38;29", "wc_review": "418;970;170;389;601", "wc_reply_reviewers": "0;14;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 72.2, 11.513470371699404 ], "wc_strengths_avg": [ 96.4, 56.01285566724839 ], "wc_weaknesses_avg": [ 263.2, 307.8027940094112 ], "wc_questions_avg": [ 62.2, 66.97283031200041 ], "wc_limitations_avg": [ 15.6, 14.974645237867906 ], "wc_review_avg": [ 509.6, 267.8003734127345 ], "wc_reply_reviewers_avg": [ 2.8, 5.6 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.28571428571428564, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2722197598119580823&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": "kyoto-u.ac.jp;;;kyoto-u.ac.jp", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Kyoto University", "aff_unique_dep": "", "aff_unique_url": "https://www.kyoto-u.ac.jp", "aff_unique_abbr": "Kyoto U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Human-Guided Complexity-Controlled Abstractions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70201", "id": "tSEeRl7ACo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d94b46ec30adee2bbb134f813fc9dde0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tSEeRl7ACo", "openreview": "https://openreview.net/forum?id=tSEeRl7ACo", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70201", "video": "https://nips.cc/virtual/2023/poster/70201", "author_site": "Andi Peng, Mycal Tucker, Eoin Kenny, Noga Zaslavsky, Pulkit Agrawal, Julie A Shah", "tldr": "", "abstract": "Neural networks often learn task-specific latent representations that fail to generalize to novel settings or tasks. Conversely, humans learn discrete representations (i.e., concepts or words) at a variety of abstraction levels (e.g., \"bird\" vs. \"sparrow'\") and use the appropriate abstraction based on tasks. Inspired by this, we train neural models to generate a spectrum of discrete representations, and control the complexity of the representations (roughly, how many bits are allocated for encoding inputs) by tuning the entropy of the distribution over representations. In finetuning experiments, using only a small number of labeled examples for a new task, we show that (1) tuning the representation to a task-appropriate complexity level supports the greatest finetuning performance, and (2) in a human-participant study, users were able to identify the appropriate complexity level for a downstream task via visualizations of discrete representations. Our results indicate a promising direction for rapid model finetuning by leveraging human insight.", "keywords": "human-in-the-loop;representation learning;interpretability", "primary_area": "", "supplementary_material": "/attachment/b06fbf18ef787d6afd8ad576e5e8018bbaec4efc.pdf", "author": "Andi Peng;Mycal Tucker;Eoin M. Kenny;Noga Zaslavsky;Pulkit Agrawal;Julie Shah", "authorids": "~Andi_Peng1;~Mycal_Tucker1;~Eoin_M._Kenny1;~Noga_Zaslavsky1;~Pulkit_Agrawal1;~Julie_Shah2", "gender": "F;M;F;M;F;M", "homepage": "https://andipeng.com/;http://mycaltucker.com;https://www.nogsky.com;https://people.eecs.berkeley.edu/~pulkitag/;https://interactive.mit.edu;https://eoinkenny.github.io/", "dblp": "242/9185;256/5146;160/8830;149/2672;;", "google_scholar": "S63gb38AAAAJ;V1kgcxIAAAAJ;VdYiwjwAAAAJ;UpZmJI0AAAAJ;;AzMTFY4AAAAJ", "orcid": ";;0000-0003-3941-3518;;;0000-0001-5800-2525", "linkedin": ";;;;;", "or_profile": "~Andi_Peng1;~Mycal_Tucker1;~Noga_Zaslavsky1;~Pulkit_Agrawal1;~Julie_Shah2;~Eoin_Kenny1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;Postdoc;Assistant Professor;Professor;Postdoc", "bibtex": "@inproceedings{\npeng2023humanguided,\ntitle={Human-Guided Complexity-Controlled Abstractions},\nauthor={Andi Peng and Mycal Tucker and Eoin M. Kenny and Noga Zaslavsky and Pulkit Agrawal and Julie Shah},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tSEeRl7ACo}\n}", "github": "", "project": "", "reviewers": "99tN;6aXd;TMXo;Nw28", "pdf_size": 4706983, "rating": "5;5;6;6", "confidence": "3;4;5;3", "soundness": "3;2;3;4", "novelty": "2;2;3;2", "presentation": "3;1;3;3", "wc_summary": "366;171;36;60", "wc_strengths": "160;84;44;57", "wc_weaknesses": "1101;313;224;87", "wc_questions": "103;160;33;101", "wc_limitations": "39;37;11;78", "wc_review": "1769;765;348;383", "wc_reply_reviewers": "789;154;222;59", "wc_reply_authors": "205;0;497;0", "reply_reviewers": "2;1;2;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 158.25, 130.30804848511852 ], "wc_strengths_avg": [ 86.25, 44.95761893161158 ], "wc_weaknesses_avg": [ 431.25, 394.97112236212917 ], "wc_questions_avg": [ 99.25, 44.990971316476376 ], "wc_limitations_avg": [ 41.25, 23.920441049445557 ], "wc_review_avg": [ 816.25, 573.8734072075479 ], "wc_reply_reviewers_avg": [ 306.0, 284.8060743734234 ], "wc_reply_authors_avg": [ 175.5, 203.61299074469684 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8593470326966601560&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "ForecastPFN: Synthetically-Trained Zero-Shot Forecasting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70200", "id": "tScBQRNgjk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0731f0e65559059eb9cd9d6f44ce2dd8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tScBQRNgjk", "openreview": "https://openreview.net/forum?id=tScBQRNgjk", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70200", "video": "https://nips.cc/virtual/2023/poster/70200", "author_site": "Samuel Dooley, Gurnoor Singh Khurana, Chirag Mohapatra, Siddartha V Naidu, Colin White", "tldr": "", "abstract": "The vast majority of time-series forecasting approaches require a substantial training dataset. However, many real-life forecasting applications have very little initial observations, sometimes just 40 or fewer. Thus, the applicability of most forecasting methods is restricted in data-sparse commercial applications. While there is recent work in the setting of very limited initial data (so-called `zero-shot' forecasting), its performance is inconsistent depending on the data used for pretraining. In this work, we take a different approach and devise ForecastPFN, the first zero-shot forecasting model trained purely on a novel synthetic data distribution. ForecastPFN is a prior-data fitted network, trained to approximate Bayesian inference, which can make predictions on a new time series dataset in a single forward pass. Through extensive experiments, we show that zero-shot predictions made by ForecastPFN are more accurate and faster compared to state-of-the-art forecasting methods, even when the other methods are allowed to train on hundreds of additional in-distribution data points.", "keywords": "Forecasting;Zero-shot;Synthetic Data", "primary_area": "", "supplementary_material": "", "author": "Samuel Dooley;Gurnoor Singh Khurana;Chirag Mohapatra;Siddartha Venkat Naidu;Colin White", "authorids": "~Samuel_Dooley1;~Gurnoor_Singh_Khurana1;~Chirag_Mohapatra1;~Siddartha_Venkat_Naidu1;~Colin_White1", "gender": ";M;M;M;M", "homepage": ";;;;https://crwhite.ml/", "dblp": ";;;;136/9162", "google_scholar": ";a4aHHUIAAAAJ;;;LS6HY-gAAAAJ", "orcid": ";;;;", "linkedin": ";gurnoor-singh-48b1b7191;chiragmohapatra13/;siddarthanaidu/;", "or_profile": "~Samuel_Dooley1;~Gurnoor_Singh_Khurana1;~Chirag_Mohapatra1;~Siddartha_Venkat_Naidu1;~Colin_White1", "aff": ";Indian Institute of Technology Bombay, Indian Institute of Technology, Bombay;Indian Institute of Technology, Delhi;;Abacus.AI", "aff_domain": ";cse.iitb.ac.in;iitd.ac.in;;abacus.ai", "position": ";Undergrad student;Undergrad student;;Head of Research", "bibtex": "@inproceedings{\ndooley2023forecastpfn,\ntitle={Forecast{PFN}: Synthetically-Trained Zero-Shot Forecasting},\nauthor={Samuel Dooley and Gurnoor Singh Khurana and Chirag Mohapatra and Siddartha Venkat Naidu and Colin White},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tScBQRNgjk}\n}", "github": "", "project": "", "reviewers": "3SYZ;5AR9;MwwS;458V", "pdf_size": 2326384, "rating": "5;5;5;6", "confidence": "3;3;4;4", "soundness": "1;2;3;3", "novelty": "3;2;3;2", "presentation": "3;3;3;3", "wc_summary": "66;74;91;23", "wc_strengths": "91;64;98;26", "wc_weaknesses": "293;157;110;7", "wc_questions": "212;6;100;678", "wc_limitations": "107;1;2;66", "wc_review": "769;302;401;800", "wc_reply_reviewers": "501;9;39;13", "wc_reply_authors": "2231;409;137;0", "reply_reviewers": "2;1;1;1", "reply_authors": "5;2;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 63.5, 25.064915718988566 ], "wc_strengths_avg": [ 69.75, 28.269904492233433 ], "wc_weaknesses_avg": [ 141.75, 102.80412199907161 ], "wc_questions_avg": [ 249.0, 258.19566224086725 ], "wc_limitations_avg": [ 44.0, 44.90545623863541 ], "wc_review_avg": [ 568.0, 219.58483554198364 ], "wc_reply_reviewers_avg": [ 140.5, 208.4532321649151 ], "wc_reply_authors_avg": [ 694.25, 899.3718293898247 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 73, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6974559701953774937&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": ";cse.iitb.ac.in;iitd.ac.in;;abacus.ai", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Indian Institute of Technology Bombay;Indian Institute of Technology Delhi;Abacus.AI", "aff_unique_dep": ";;", "aff_unique_url": "https://www.iitb.ac.in;https://www.iitdelhi.ac.in;https://www.abacus.ai", "aff_unique_abbr": "IIT Bombay;IIT Delhi;Abacus.AI", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Bombay;Delhi;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "India;United States" }, { "title": "Language Semantic Graph Guided Data-Efficient Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70199", "id": "tUyW68cRqr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4bbd69ce4cea6aa3a08bde08a40fd65a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tUyW68cRqr", "openreview": "https://openreview.net/forum?id=tUyW68cRqr", "poster": "/media/PosterPDFs/NeurIPS%202023/70199.png?t=1701773002.156948", "slides": "https://nips.cc/virtual/2023/poster/70199", "video": "https://nips.cc/virtual/2023/poster/70199", "author_site": "Wenxuan Ma, Shuang Li, lincan Cai, Jingxuan Kang", "tldr": "", "abstract": "Developing generalizable models that can effectively learn from limited data and with minimal reliance on human supervision is a significant objective within the machine learning community, particularly in the era of deep neural networks. Therefore, to achieve data-efficient learning, researchers typically explore approaches that can leverage more related or unlabeled data without necessitating additional manual labeling efforts, such as Semi-Supervised Learning (SSL), Transfer Learning (TL), and Data Augmentation (DA).\nSSL leverages unlabeled data in the training process, while TL enables the transfer of expertise from related data distributions. DA broadens the dataset by synthesizing new data from existing examples. However, the significance of additional knowledge contained within labels has been largely overlooked in research. In this paper, we propose a novel perspective on data efficiency that involves exploiting the semantic information contained in the labels of the available data. Specifically, we introduce a Language Semantic Graph (LSG) which is constructed from labels manifest as natural language descriptions. Upon this graph, an auxiliary graph neural network is trained to extract high-level semantic relations and then used to guide the training of the primary model, enabling more adequate utilization of label knowledge. Across image, video, and audio modalities, we utilize the LSG method in both TL and SSL scenarios and illustrate its versatility in significantly enhancing performance compared to other data-efficient learning approaches. Additionally, our in-depth analysis shows that the LSG method also expedites the training process.", "keywords": "Data-Efficient Learning;Language Semantic Graph", "primary_area": "", "supplementary_material": "/attachment/09e3c85bb03925bd9f0daebc727275d5ab0496d5.pdf", "author": "Wenxuan Ma;Shuang Li;Lincan Cai;Jingxuan Kang", "authorids": "~Wenxuan_Ma2;~Shuang_Li6;~Lincan_Cai1;~Jingxuan_Kang1", "gender": "M;M;;", "homepage": ";https://shuangli.xyz;https://github.com/cailincan0129;", "dblp": "289/0784-1;43/6294-8;;", "google_scholar": "u7aJOt8AAAAJ;VXCiAc4AAAAJ;wH-dNbAAAAAJ;", "orcid": "0000-0001-5402-6028;0000-0001-6807-9905;;", "linkedin": ";;;", "or_profile": "~Wenxuan_Ma2;~Shuang_Li6;~Lincan_Cai1;~Jingxuan_Kang1", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;", "position": "MS student;Associate Professor;Undergrad student;", "bibtex": "@inproceedings{\nma2023language,\ntitle={Language Semantic Graph Guided Data-Efficient Learning},\nauthor={Wenxuan Ma and Shuang Li and Lincan Cai and Jingxuan Kang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tUyW68cRqr}\n}", "github": "", "project": "", "reviewers": "3r7Y;ZDzt;A9AK;pDMD;biPo", "pdf_size": 935090, "rating": "4;4;6;6;7", "confidence": "4;4;4;4;5", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "1;3;3;3;4", "wc_summary": "64;185;163;66;262", "wc_strengths": "65;33;61;40;192", "wc_weaknesses": "15;197;49;59;36", "wc_questions": "118;73;180;95;74", "wc_limitations": "27;5;18;42;48", "wc_review": "289;493;471;302;612", "wc_reply_reviewers": "97;322;34;86;19", "wc_reply_authors": "571;1086;10;834;9", "reply_reviewers": "3;2;1;2;1", "reply_authors": "4;4;2;3;2", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 148.0, 75.32595834106593 ], "wc_strengths_avg": [ 78.2, 58.176971389029866 ], "wc_weaknesses_avg": [ 71.2, 64.59845199383652 ], "wc_questions_avg": [ 108.0, 39.582824558133794 ], "wc_limitations_avg": [ 28.0, 15.658863304850707 ], "wc_review_avg": [ 433.4, 122.45913604137505 ], "wc_reply_reviewers_avg": [ 111.6, 109.30068618265852 ], "wc_reply_authors_avg": [ 502.0, 433.8557363917181 ], "reply_reviewers_avg": [ 1.8, 0.7483314773547883 ], "reply_authors_avg": [ 3.0, 0.8944271909999159 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6666666666666666, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6225829875093645636&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "bit.edu.cn;bit.edu.cn;bit.edu.cn;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Beijing Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.bit.edu.cn/", "aff_unique_abbr": "BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Ignorance is Bliss: Robust Control via Information Gating", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70198", "id": "tW2KSph9o8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/797be96e4481c3fe5d675c1ba5352969-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tW2KSph9o8", "openreview": "https://openreview.net/forum?id=tW2KSph9o8", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70198", "video": "https://nips.cc/virtual/2023/poster/70198", "author_site": "Manan Tomar, Riashat Islam, Riashat Islam, Matthew Taylor, Sergey Levine, Philip Bachman", "tldr": "", "abstract": "Informational parsimony provides a useful inductive bias for learning representations that achieve better generalization by being robust to noise and spurious correlations. We propose *information gating* as a way to learn parsimonious representations that identify the minimal information required for a task. When gating information, we can learn to reveal as little information as possible so that a task remains solvable, or hide as little information as possible so that a task becomes unsolvable. We gate information using a differentiable parameterization of the signal-to-noise ratio, which can be applied to arbitrary values in a network, e.g., erasing pixels at the input layer or activations in some intermediate layer. When gating at the input layer, our models learn which visual cues matter for a given task. When gating intermediate layers, our models learn which activations are needed for subsequent stages of computation. We call our approach *InfoGating*. We apply InfoGating to various objectives such as multi-step forward and inverse dynamics models, Q-learning, and behavior cloning, highlighting how InfoGating can naturally help in discarding information not relevant for control. Results show that learning to identify and use minimal information can improve generalization in downstream tasks. Policies based on InfoGating are considerably more robust to irrelevant visual features, leading to improved pretraining and finetuning of RL models.", "keywords": "representation learning;mutual information", "primary_area": "", "supplementary_material": "/attachment/d8054180fa8ec81bf026ef30acdd2897d8980931.zip", "author": "Manan Tomar;Riashat Islam;Matthew E. Taylor;Sergey Levine;Philip Bachman", "authorids": "~Manan_Tomar1;~Riashat_Islam1;~Matthew_E._Taylor2;~Sergey_Levine1;~Philip_Bachman1", "gender": "M;M;M;M;M", "homepage": "https://manantomar.github.io/;https://riashat.github.io/;https://people.eecs.berkeley.edu/~svlevine/;;https://irll.ca", "dblp": "241/6227;198/0459;80/7594;;46/4287.html", "google_scholar": ";https://scholar.google.ca/citations?user=2_4Rs44AAAAJ;8R35rCwAAAAJ;;edQgLXcAAAAJ", "orcid": ";;;;0000-0001-8946-0211", "linkedin": ";;;;", "or_profile": "~Manan_Tomar1;~Riashat_Islam1;~Sergey_Levine1;~Philip_Bachman1;~Matthew_Taylor1", "aff": "Microsoft;Mila - Quebec AI Institute;Google;Microsoft;Washington State University, Pullman", "aff_domain": "microsoft.com;mcgill.ca;google.com;microsoft.com;wsu.edu", "position": "Intern;PhD student;Research Scientist;Researcher;Adjunct Professor", "bibtex": "@inproceedings{\ntomar2023ignorance,\ntitle={Ignorance is Bliss: Robust Control via Information Gating},\nauthor={Manan Tomar and Riashat Islam and Matthew E. Taylor and Sergey Levine and Philip Bachman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tW2KSph9o8}\n}", "github": "", "project": "", "reviewers": "WnVz;TDKb;8eSS;L82J;xdoN", "pdf_size": 2393672, "rating": "5;6;6;6;7", "confidence": "4;4;1;3;3", "soundness": "3;3;3;3;3", "novelty": "3;2;2;2;3", "presentation": "2;3;3;3;3", "wc_summary": "109;59;57;120;68", "wc_strengths": "154;40;72;95;56", "wc_weaknesses": "451;86;271;40;79", "wc_questions": "124;62;78;31;20", "wc_limitations": "1;183;17;7;11", "wc_review": "839;430;495;293;234", "wc_reply_reviewers": "73;22;148;31;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 82.6, 26.53752060762271 ], "wc_strengths_avg": [ 83.4, 39.70692634793078 ], "wc_weaknesses_avg": [ 185.4, 155.05689278455185 ], "wc_questions_avg": [ 63.0, 36.932370625238775 ], "wc_limitations_avg": [ 43.8, 69.79512876985041 ], "wc_review_avg": [ 458.2, 211.9975471556216 ], "wc_reply_reviewers_avg": [ 54.8, 52.27389405812427 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.28867513459481287, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17871383981584978937&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "microsoft.com;mcgill.ca;google.com;microsoft.com;wsu.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Microsoft;Quebec AI Institute;Google;Washington State University", "aff_unique_dep": "Microsoft Corporation;AI Institute;Google;", "aff_unique_url": "https://www.microsoft.com;https://mila.quebec;https://www.google.com;https://wsu.edu", "aff_unique_abbr": "Microsoft;Mila;Google;WSU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Pullman", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Canada" }, { "id": "taQ64d2KBX", "title": "Learning Dynamical Systems from Noisy Data with Inverse-Explicit Integrators", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce the mean inverse integrator (MII), a novel approach to increase the accuracy when training neural networks to approximate vector fields of dynamical systems from noisy data. This method can be used to average multiple trajectories obtained by numerical integrators such as Runge--Kutta methods. We show that the class of mono-implicit Runge--Kutta methods (MIRK) has particular advantages when used in connection with MII. When training vector field approximations, explicit expressions for the loss functions are obtained when inserting the training data in the MIRK formulae, unlocking symmetric and high order integrators that would otherwise be implicit for initial value problems. The combined approach of applying MIRK within MII yields a significantly lower error compared to the plain use of the numerical integrator without averaging the trajectories. This is demonstrated with experiments using data from several (chaotic) Hamiltonian systems. Additionally, we perform a sensitivity analysis of the loss functions under normally distributed perturbations, supporting the favourable performance of MII.", "keywords": "Deep neural networks;Hamiltonian systems;ODE discretization;Runge-Kutta;Geometric numerical integration", "primary_area": "", "supplementary_material": "/attachment/7a6ebff3e31482df6e3cb9151ab927f4f5526569.zip", "author": "H\u00e5kon Noren;S\u00f8lve Eidnes;Elena Celledoni", "authorids": "~H\u00e5kon_Noren1;~S\u00f8lve_Eidnes1;~Elena_Celledoni1", "gender": "M;M;F", "homepage": "https://www.ntnu.no/ansatte/hakon.noren;https://www.sintef.no/alle-ansatte/ansatt/solve.eidnes/;https://www.ntnu.edu/employees/elena.celledoni", "dblp": ";207/9111;", "google_scholar": "https://scholar.google.no/citations?user=joKiXfcAAAAJ;64BMmdUAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0005-8272-4970;0000-0002-1002-3543;0000-0002-2863-2603", "linkedin": ";;elena-celledoni-631b1995/", "or_profile": "~H\u00e5kon_Noren1;~S\u00f8lve_Eidnes1;~Elena_Celledoni1", "aff": "Norwegian University of Science and Technology;SINTEF Digital;Norwegian University of Science and Technology", "aff_domain": "ntnu.no;sintef.no;ntnu.no", "position": "PhD student;Researcher;Full Professor", "bibtex": "@misc{\nnoren2023learning,\ntitle={Learning Dynamical Systems from Noisy Data with Inverse-Explicit Integrators},\nauthor={H{\\r{a}}kon Noren and S{\\o}lve Eidnes and Elena Celledoni},\nyear={2023},\nurl={https://openreview.net/forum?id=taQ64d2KBX}\n}", "github": "", "project": "", "reviewers": "1rk9;yxuC;J6Zi;HMrx;j2qw", "site": "https://openreview.net/forum?id=taQ64d2KBX", "pdf_size": 1745623, "rating": "4;5;5;6;6", "confidence": "4;2;2;4;2", "soundness": "3;2;3;3;3", "novelty": "2;3;2;3;2", "presentation": "2;2;3;2;3", "wc_summary": "67;27;81;79;61", "wc_strengths": "48;27;70;18;35", "wc_weaknesses": "194;38;66;55;27", "wc_questions": "119;137;35;395;1", "wc_limitations": "10;18;1;176;1", "wc_review": "438;247;253;723;125", "wc_reply_reviewers": "72;27;27;21;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 63.0, 19.473058311420935 ], "wc_strengths_avg": [ 39.6, 18.11739495622922 ], "wc_weaknesses_avg": [ 76.0, 60.51446108162907 ], "wc_questions_avg": [ 137.4, 138.41040423320786 ], "wc_limitations_avg": [ 41.2, 67.69756273308516 ], "wc_review_avg": [ 357.2, 208.4498980570631 ], "wc_reply_reviewers_avg": [ 29.4, 23.499787233079367 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.21821789023599233, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4437387845580001305&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Norwegian University of Science and Technology;SINTEF", "aff_unique_dep": ";Digital", "aff_unique_url": "https://www.ntnu.no;https://www.sintef.no", "aff_unique_abbr": "NTNU;SINTEF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Norway" }, { "title": "Tracr: Compiled Transformers as a Laboratory for Interpretability", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70197", "id": "tbbId8u7nP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/771155abaae744e08576f1f3b4b7ac0d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tbbId8u7nP", "openreview": "https://openreview.net/forum?id=tbbId8u7nP", "poster": "/media/PosterPDFs/NeurIPS%202023/70197.png?t=1701442757.980318", "slides": "https://nips.cc/virtual/2023/poster/70197", "video": "https://nips.cc/virtual/2023/poster/70197", "author_site": "David Lindner, Janos Kramar, Sebastian Farquhar, Matthew Rahtz, Tom McGrath, Vladimir Mikulik", "tldr": "", "abstract": "We show how to \"compile\" human-readable programs into standard decoder-only transformer models. Our compiler, Tracr, generates models with known structure. This structure can be used to design experiments. For example, we use it to study \"superposition\" in transformers that execute multi-step algorithms. Additionally, the known structure of Tracr-compiled models can serve as _ground-truth_ for evaluating interpretability methods. Commonly, because the \"programs\" learned by transformers are unknown it is unclear whether an interpretation succeeded. We demonstrate our approach by implementing and examining programs including computing token frequencies, sorting, and parenthesis checking. We provide an open-source implementation of Tracr at https://github.com/google-deepmind/tracr.", "keywords": "interpretability;transformers;language models;RASP;Tracr;mechanistic interpretability", "primary_area": "", "supplementary_material": "/attachment/b7946c993bff496badb72d6bd10883f69114cf40.zip", "author": "David Lindner;Janos Kramar;Sebastian Farquhar;Matthew Rahtz;Thomas McGrath;Vladimir Mikulik", "authorids": "~David_Lindner1;~Janos_Kramar1;~Sebastian_Farquhar1;~Matthew_Rahtz1;~Thomas_McGrath1;~Vladimir_Mikulik1", "gender": ";M;;M;;", "homepage": ";;https://sebastianfarquhar.com/;http://amid.fish/;;", "dblp": ";49/9013;215/5432;175/1944;;", "google_scholar": ";;bvShhTEAAAAJ;;;", "orcid": ";;;;;", "linkedin": ";;;;https://uk.linkedin.com/in/tom-mcgrath-7337bb151;", "or_profile": "~David_Lindner1;~Janos_Kramar1;~Sebastian_Farquhar1;~Matthew_Rahtz1;~Thomas_McGrath1;~Vladimir_Mikulik1", "aff": ";Google DeepMind;Google DeepMind;Google DeepMind;Google;", "aff_domain": ";deepmind.com;google.com;deepmind.com;google.com;", "position": ";Researcher;Researcher;Researcher;Research Scientist;", "bibtex": "@inproceedings{\nlindner2023tracr,\ntitle={Tracr: Compiled Transformers as a Laboratory for Interpretability},\nauthor={David Lindner and Janos Kramar and Sebastian Farquhar and Matthew Rahtz and Thomas McGrath and Vladimir Mikulik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tbbId8u7nP}\n}", "github": "", "project": "", "reviewers": "ACvV;PkEE;767m;XH5L", "pdf_size": 1724258, "rating": "6;7;7;8", "confidence": "5;4;3;4", "soundness": "3;4;3;3", "novelty": "3;4;3;3", "presentation": "4;4;4;3", "wc_summary": "67;152;88;73", "wc_strengths": "54;240;89;116", "wc_weaknesses": "136;415;47;301", "wc_questions": "74;11;58;73", "wc_limitations": "34;13;17;8", "wc_review": "365;831;299;571", "wc_reply_reviewers": "130;35;22;29", "wc_reply_authors": "143;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.0, 33.7860918130523 ], "wc_strengths_avg": [ 124.75, 70.07629770471611 ], "wc_weaknesses_avg": [ 224.75, 142.72416578841862 ], "wc_questions_avg": [ 54.0, 25.622255950637914 ], "wc_limitations_avg": [ 18.0, 9.772410142846033 ], "wc_review_avg": [ 516.5, 207.4481863020258 ], "wc_reply_reviewers_avg": [ 54.0, 44.11915683691156 ], "wc_reply_authors_avg": [ 35.75, 61.92081637058736 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=956579595086255459&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": ";deepmind.com;google.com;deepmind.com;google.com;", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "CQM: Curriculum Reinforcement Learning with a Quantized World Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70196", "id": "tcotyjon2a", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f93df618c6907bc0a03222040d70d004-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tcotyjon2a", "openreview": "https://openreview.net/forum?id=tcotyjon2a", "poster": "/media/PosterPDFs/NeurIPS%202023/70196.png?t=1701681411.8492868", "slides": "https://nips.cc/virtual/2023/poster/70196", "video": "https://nips.cc/virtual/2023/poster/70196", "author_site": "Seungjae Lee, Daesol Cho, Jonghae Park, H. Jin Kim", "tldr": "", "abstract": "Recent curriculum Reinforcement Learning (RL) has shown notable progress in solving complex tasks by proposing sequences of surrogate tasks. However, the previous approaches often face challenges when they generate curriculum goals in a high-dimensional space. Thus, they usually rely on manually specified goal spaces. To alleviate this limitation and improve the scalability of the curriculum, we propose a novel curriculum method that automatically defines the semantic goal space which contains vital information for the curriculum process, and suggests curriculum goals over it. To define the semantic goal space, our method discretizes continuous observations via vector quantized-variational autoencoders (VQ-VAE) and restores the temporal relations between the discretized observations by a graph. Concurrently, ours suggests uncertainty and temporal distance-aware curriculum goals that converges to the final goals over the automatically composed goal space. We demonstrate that the proposed method allows efficient explorations in an uninformed environment with raw goal examples only. Also, ours outperforms the state-of-the-art curriculum RL methods on data efficiency and performance, in various goal-reaching tasks even with ego-centric visual inputs.", "keywords": "Reinforcement Learning;Curriculum Learning;Goal-conditioned RL", "primary_area": "", "supplementary_material": "", "author": "Seungjae Lee;Daesol Cho;Jonghae Park;H. Jin Kim", "authorids": "~Seungjae_Lee2;~Daesol_Cho1;~Jonghae_Park2;~H._Jin_Kim1", "gender": ";;M;F", "homepage": "https://sjlee.cc;https://dscho1234.github.io;https://github.com/pjhae;http://larr.snu.ac.kr", "dblp": ";317/6937;;91/5753", "google_scholar": "hpR9h74AAAAJ;3ZRfI74AAAAJ;;TLQUwIMAAAAJ", "orcid": ";0000-0002-4105-4422;;", "linkedin": ";;;", "or_profile": "~Seungjae_Lee2;~Daesol_Cho1;~Jonghae_Park2;~H._Jin_Kim1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "MS student;PhD student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nlee2023cqm,\ntitle={{CQM}: Curriculum Reinforcement Learning with a Quantized World Model},\nauthor={Seungjae Lee and Daesol Cho and Jonghae Park and H. Jin Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tcotyjon2a}\n}", "github": "", "project": "", "reviewers": "paJb;G8ip;vggh;eib8;uT8t", "pdf_size": 17077288, "rating": "5;5;6;7;8", "confidence": "4;3;5;4;4", "soundness": "3;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;2;3", "wc_summary": "71;37;92;192;132", "wc_strengths": "13;48;39;56;47", "wc_weaknesses": "271;99;161;287;51", "wc_questions": "161;27;46;82;144", "wc_limitations": "11;47;3;5;35", "wc_review": "527;258;341;622;409", "wc_reply_reviewers": "27;0;10;18;19", "wc_reply_authors": "44;79;21;33;21", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 104.8, 53.379396774411 ], "wc_strengths_avg": [ 40.6, 14.813507349712966 ], "wc_weaknesses_avg": [ 173.8, 92.84481676431916 ], "wc_questions_avg": [ 92.0, 52.73708372672876 ], "wc_limitations_avg": [ 20.2, 17.6 ], "wc_review_avg": [ 431.4, 129.77611490563277 ], "wc_reply_reviewers_avg": [ 14.8, 9.15204895091804 ], "wc_reply_authors_avg": [ 39.6, 21.481154531356083 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2711630722733202, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17427104623969460172&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "td6xbEOPLr", "title": "FATE: Fairness Attacks on Graph Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study fairness attacks on graph learning to answer the following question: How can we achieve poisoning attacks on a graph learning model to exacerbate the bias? We answer this question via a bi-level optimization problem and propose a meta learning-based attacking framework named FATE. The proposed framework is broadly applicable with respect to various fairness definitions and graph learning models, as well as arbitrary choices of manipulation operations. We further instantiate FATE to attack statistical parity and individual fairness on graph neural networks. We conduct extensive experimental evaluations on real-world datasets in the task of semi-supervised node classification. The experimental results demonstrate that FATE could amplify the bias of graph neural networks with or without fairness consideration while maintaining the utility on the downstream task. We hope this paper provides insights into the adversarial robustness of fair graph learning and can shed light on designing robust and fair graph learning in future studies.", "keywords": "graph learning;fairness;adversarial attacks", "primary_area": "", "supplementary_material": "/attachment/6a5f69f59a934798134d97fd8689bed3567443cf.pdf", "author": "Jian Kang;Yinglong Xia;Ross Maciejewski;Jiebo Luo;Hanghang Tong", "authorids": "~Jian_Kang1;~Yinglong_Xia1;~Ross_Maciejewski1;~Jiebo_Luo1;~Hanghang_Tong3", "gender": "M;M;M;;M", "homepage": "https://jiank2.github.io/;;http://rmaciejewski.faculty.asu.edu/;http://tonghanghang.org;https://www.cs.rochester.edu/u/jluo/", "dblp": "56/6072-8;61/3251;81/5349.html;58/1757;25/5545", "google_scholar": "U_jFlOQAAAAJ;;https://scholar.google.com.tw/citations?user=nChgOjEAAAAJ;RaINcuUAAAAJ;CcbnBvgAAAAJ", "orcid": "0000-0003-3902-7131;0000-0002-8155-5440;;0000-0003-4405-3887;0000-0002-4516-9729", "linkedin": "jiank2/;;;htong/;jieboluo/", "or_profile": "~Jian_Kang1;~Yinglong_Xia1;~Ross_Maciejewski1;~Hanghang_Tong3;~Jiebo_Luo3", "aff": "University of Illinois Urbana-Champaign;Meta;Arizona State University;University of Illinois, Urbana Champaign;University of Rochester", "aff_domain": "illinois.edu;meta.com;asu.edu;illinois.edu;rochester.edu", "position": "PhD student;Researcher;Full Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nkang2023fate,\ntitle={{FATE}: Fairness Attacks on Graph Learning},\nauthor={Jian Kang and Yinglong Xia and Ross Maciejewski and Jiebo Luo and Hanghang Tong},\nyear={2023},\nurl={https://openreview.net/forum?id=td6xbEOPLr}\n}", "github": "", "project": "", "reviewers": "NH2N;G7MB;Lw8H;BvDw;AtfZ", "site": "https://openreview.net/forum?id=td6xbEOPLr", "pdf_size": 17579534, "rating": "5;5;5;6;7", "confidence": "4;3;4;3;2", "soundness": "3;2;2;3;3", "novelty": "2;3;2;3;3", "presentation": "3;4;3;3;2", "wc_summary": "76;94;107;63;160", "wc_strengths": "41;68;289;63;43", "wc_weaknesses": "71;262;105;30;50", "wc_questions": "456;14;41;29;18", "wc_limitations": "17;15;3;1;14", "wc_review": "661;453;545;186;285", "wc_reply_reviewers": "286;0;0;0;22", "wc_reply_authors": "779;69;70;65;57", "reply_reviewers": "2;0;0;0;1", "reply_authors": "4;2;2;2;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 100.0, 33.555923471125034 ], "wc_strengths_avg": [ 100.8, 94.70036958745197 ], "wc_weaknesses_avg": [ 103.6, 83.00024096350563 ], "wc_questions_avg": [ 111.6, 172.4559074082416 ], "wc_limitations_avg": [ 10.0, 6.6332495807108 ], "wc_review_avg": [ 426.0, 171.8115246425571 ], "wc_reply_reviewers_avg": [ 61.6, 112.5230643023909 ], "wc_reply_authors_avg": [ 208.0, 285.5366876602725 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 2.4, 0.8000000000000002 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8685990362153793, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IGGPetU5r5YJ:scholar.google.com/&scioq=FATE:+Fairness+Attacks+on+Graph+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "University of Illinois Urbana-Champaign;Meta;Arizona State University;University of Rochester", "aff_unique_dep": ";Meta Platforms, Inc.;;", "aff_unique_url": "https://illinois.edu;https://meta.com;https://www.asu.edu;https://www.rochester.edu", "aff_unique_abbr": "UIUC;Meta;ASU;U of R", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "FACE: Evaluating Natural Language Generation with Fourier Analysis of Cross-Entropy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70195", "id": "tdyLryDebq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/37094fdc81632915a5738293cf9b7ad4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tdyLryDebq", "openreview": "https://openreview.net/forum?id=tdyLryDebq", "poster": "/media/PosterPDFs/NeurIPS%202023/70195.png?t=1699863764.1761608", "slides": "https://nips.cc/virtual/2023/poster/70195", "video": "https://nips.cc/virtual/2023/poster/70195", "author_site": "Zuhao Yang, Yingfang Yuan, Yang Xu, SHUO ZHAN, Huajun Bai, Kefan Chen", "tldr": "", "abstract": "Measuring the distance between machine-produced and human language is a critical open problem. Inspired by empirical findings from psycholinguistics on the periodicity of entropy in language, we propose FACE, a set of metrics based on Fourier Analysis of the estimated Cross-Entropy of language, for measuring the similarity between model-generated and human-written languages. Based on an open-ended generation task and the experimental data from previous studies, we find that FACE can effectively identify the human-model gap, scales with model size, reflects the outcomes of different sampling methods for decoding, correlates well with other evaluation metrics and with human judgment scores.", "keywords": "natural language generation; evaluation metrics; cross-entropy; language model", "primary_area": "", "supplementary_material": "/attachment/485e94656c550b09fddf409336fd8bb277a9e138.zip", "author": "Zuhao Yang;Yingfang Yuan;Yang Xu;SHUO ZHAN;Huajun Bai;Kefan Chen", "authorids": "~Zuhao_Yang1;~Yingfang_Yuan1;~Yang_Xu6;~SHUO_ZHAN1;~Huajun_Bai1;~Kefan_Chen4", "gender": "M;M;M;M;M;M", "homepage": "https://mwxely.github.io/;https://yuanjames.github.io/;https://clcs.sdsu.edu/;;https://github.com/baihuajun24;", "dblp": "338/9785;284/0759;61/3906-24.html;;230/3568;186/9998", "google_scholar": "SF83uh0AAAAJ;nT2T8M4AAAAJ;DnivueEAAAAJ;;bn5nR50AAAAJ;LENo52IAAAAJ", "orcid": ";;0000-0002-5402-9904;;0000-0001-6017-9776;", "linkedin": "zuhao-yang-a39285231/;;;shuo-zhan-85b34a276/;;kefan-chen-24387417b/", "or_profile": "~Zuhao_Yang1;~Yingfang_Yuan1;~Yang_Xu6;~SHUO_ZHAN1;~Huajun_Bai1;~Kefan_Chen4", "aff": "Nanyang Technological University;Heriot-Watt University;San Diego State University;Nanyang Technological University;;Heriot-Watt University", "aff_domain": "e.ntu.edu.sg;hw.ac.uk;sdsu.edu;ntu.edu.sg;;hw.ac.uk", "position": "MS student;PhD student;Assistant Professor;MS student;;PhD student", "bibtex": "@inproceedings{\nyang2023face,\ntitle={{FACE}: Evaluating Natural Language Generation with Fourier Analysis of Cross-Entropy},\nauthor={Zuhao Yang and Yingfang Yuan and Yang Xu and SHUO ZHAN and Huajun Bai and Kefan Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tdyLryDebq}\n}", "github": "", "project": "", "reviewers": "wEMM;AtQ2;mMGf;s437;v6cq", "pdf_size": 2006675, "rating": "3;4;5;6;6", "confidence": "4;3;4;4;4", "soundness": "2;3;3;3;3", "novelty": "2;1;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "34;174;62;213;30", "wc_strengths": "48;60;48;56;63", "wc_weaknesses": "214;327;188;52;33", "wc_questions": "2;364;9;23;17", "wc_limitations": "6;41;7;1;16", "wc_review": "304;966;314;345;159", "wc_reply_reviewers": "439;396;106;6;5", "wc_reply_authors": "709;796;0;0;0", "reply_reviewers": "3;1;1;1;1", "reply_authors": "3;2;1;1;1", "rating_avg": [ 4.8, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 102.6, 76.04104154994197 ], "wc_strengths_avg": [ 55.0, 6.131883886702357 ], "wc_weaknesses_avg": [ 162.8, 108.94292083471969 ], "wc_questions_avg": [ 83.0, 140.6797782198991 ], "wc_limitations_avg": [ 14.2, 14.246403054806501 ], "wc_review_avg": [ 417.6, 281.6115054467768 ], "wc_reply_reviewers_avg": [ 190.4, 189.51158275947145 ], "wc_reply_authors_avg": [ 301.0, 369.6733693410982 ], "reply_reviewers_avg": [ 1.4, 0.8000000000000002 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3429971702850177, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13903996858374351392&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "e.ntu.edu.sg;hw.ac.uk;sdsu.edu;ntu.edu.sg;;hw.ac.uk", "author_num": 6, "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "Nanyang Technological University;Heriot-Watt University;San Diego State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.hw.ac.uk;https://www.sdsu.edu", "aff_unique_abbr": "NTU;HWU;SDSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;1", "aff_country_unique": "Singapore;United Kingdom;United States" }, { "title": "Stable Diffusion is Unstable", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70194", "id": "tesBViWnbx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b733cdd80ed2ae7e3156d8c33108c5d5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tesBViWnbx", "openreview": "https://openreview.net/forum?id=tesBViWnbx", "poster": "/media/PosterPDFs/NeurIPS%202023/70194.png?t=1699940859.8303864", "slides": "https://nips.cc/virtual/2023/poster/70194", "video": "https://nips.cc/virtual/2023/poster/70194", "author_site": "Chengbin Du, Yanxi Li, Zhongwei Qiu, Chang Xu", "tldr": "", "abstract": "Recently, text-to-image models have been thriving. Despite their powerful generative capacity, our research has uncovered a lack of robustness in this generation process. Specifically, the introduction of small perturbations to the text prompts can result in the blending of primary subjects with other categories or their complete disappearance in the generated images. In this paper, we propose **Auto-attack on Text-to-image Models (ATM)**, a gradient-based approach, to effectively and efficiently generate such perturbations. By learning a Gumbel Softmax distribution, we can make the discrete process of word replacement or extension continuous, thus ensuring the differentiability of the perturbation generation. Once the distribution is learned, ATM can sample multiple attack samples simultaneously. These attack samples can prevent the generative model from generating the desired subjects without tampering with the category keywords in the prompt. ATM has achieved a 91.1\\% success rate in short-text attacks and an 81.2\\% success rate in long-text attacks. Further empirical analysis revealed three attack patterns based on: 1) variability in generation speed, 2) similarity of coarse-grained characteristics, and 3) polysemy of words. The code is available at https://github.com/duchengbin8/Stable_Diffusion_is_Unstable", "keywords": "Adversarial Attack;Generative Model;Diffusion Model;Latent Diffusion Model;Conditional Latent Diffusion Model", "primary_area": "", "supplementary_material": "/attachment/8885243f15fbdf2a426ed9bc28f04e36d74a158b.pdf", "author": "Chengbin Du;Yanxi Li;Zhongwei Qiu;Chang Xu", "authorids": "~Chengbin_Du1;~Yanxi_Li1;~Zhongwei_Qiu1;~Chang_Xu4", "gender": "M;M;M;", "homepage": "https://www.sydney.edu.au/engineering/about/our-people/research-students/chengbin-du-321.html;;https://ericzw.github.io/;https://sydney.edu.au/engineering/about/our-people/academic-staff/c-xu.html", "dblp": "340/4112;24/5261-1;246/5883;97/2966-2", "google_scholar": "https://scholar.google.com.au/citations?hl=en;;uVV3rqcAAAAJ;N4F_3eoAAAAJ", "orcid": "0009-0009-3062-4723;;;0000-0002-4756-0609", "linkedin": ";yanxi-li-3245a511a/;;", "or_profile": "~Chengbin_Du1;~Yanxi_Li1;~Zhongwei_Qiu1;~Charles_Xu1", "aff": ";University of Sydney;University of Science and Technology Beijing;University of Sydney", "aff_domain": ";uni.sydney.edu.au;ustb.edu.cn;sydney.edu.au", "position": ";PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ndu2023stable,\ntitle={Stable Diffusion is Unstable},\nauthor={Chengbin Du and Yanxi Li and Zhongwei Qiu and Chang Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tesBViWnbx}\n}", "github": "", "project": "", "reviewers": "L6BS;czYq;adct;kKNr", "pdf_size": 14428514, "rating": "6;6;7;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;4;3", "presentation": "3;3;4;3", "wc_summary": "18;95;55;89", "wc_strengths": "35;147;90;87", "wc_weaknesses": "43;155;22;118", "wc_questions": "49;6;108;2", "wc_limitations": "21;7;1;1", "wc_review": "166;410;276;297", "wc_reply_reviewers": "21;15;46;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 64.25, 30.75203245315665 ], "wc_strengths_avg": [ 89.75, 39.63190003015248 ], "wc_weaknesses_avg": [ 84.5, 54.13178363955875 ], "wc_questions_avg": [ 41.25, 42.716361034151774 ], "wc_limitations_avg": [ 7.5, 8.170067319184096 ], "wc_review_avg": [ 287.25, 86.58918812415324 ], "wc_reply_reviewers_avg": [ 20.5, 16.590660023037056 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3547842810068879296&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";uni.sydney.edu.au;ustb.edu.cn;sydney.edu.au", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Sydney;University of Science and Technology Beijing", "aff_unique_dep": ";", "aff_unique_url": "https://www.sydney.edu.au;http://www.ustb.edu.cn", "aff_unique_abbr": "USYD;USTB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Australia;China" }, { "title": "SheetCopilot: Bringing Software Productivity to the Next Level through Large Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70193", "id": "tfyr2zRVoK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0ff30c4bf31db0119a6219e0d250e037-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tfyr2zRVoK", "openreview": "https://openreview.net/forum?id=tfyr2zRVoK", "poster": "/media/PosterPDFs/NeurIPS%202023/70193.png?t=1698641001.038527", "slides": "https://nips.cc/virtual/2023/poster/70193", "video": "https://nips.cc/virtual/2023/poster/70193", "author_site": "Hongxin Li, Jingran Su, Yuntao Chen, Qing Li, ZHAO-XIANG ZHANG", "tldr": "", "abstract": "Computer end users have spent billions of hours completing daily tasks like tabular data processing and project timeline scheduling. Most of these tasks are repetitive and error-prone, yet most end users lack the skill to automate these burdensome works. With the advent of large language models (LLMs), directing software with natural language user requests become a reachable goal. In this work, we propose a SheetCopilot agent that takes natural language task and control spreadsheet to fulfill the requirements. We propose a set of atomic actions as an abstraction of spreadsheet software functionalities. We further design a state machine-based task planning framework for LLMs to robustly interact with spreadsheets. We curate a representative dataset containing 221 spreadsheet control tasks and establish a fully automated evaluation pipeline for rigorously benchmarking the ability of LLMs in software control tasks. Our SheetCopilot correctly completes 44.3\\% of tasks for a single generation, outperforming the strong code generation baseline by a wide margin. Our project page: https://sheetcopilot.github.io/.", "keywords": "Large Language Model; Task Planning; Embodied AI; Robotics; Software Automation; Decision making", "primary_area": "", "supplementary_material": "/attachment/dba4c5e83fb1ee3bd3642b7cc62db2ac43acc14d.zip", "author": "Hongxin Li;Jingran Su;Yuntao Chen;Qing Li;Zhaoxiang Zhang", "authorids": "~Hongxin_Li1;~Jingran_Su1;~Yuntao_Chen1;~Qing_Li5;~Zhaoxiang_Zhang3", "gender": "M;M;M;M;M", "homepage": ";;;https://www4.comp.polyu.edu.hk/~csqli/;http://zhaoxiangzhang.net", "dblp": ";253/1898;203/8284;(2024-11-14-1812689);55/2285-1.html", "google_scholar": "BO1d4M8AAAAJ;;iLOoUqIAAAAJ;https://scholar.google.co.in/citations?user=D1LEg-YAAAAJ;qxWfV6cAAAAJ", "orcid": "0000-0002-1445-7357;;;0000-0003-3370-471X;", "linkedin": ";;;;", "or_profile": "~Hongxin_Li1;~Jingran_Su1;~Yuntao_Chen1;~Qing_Li5;~Zhaoxiang_Zhang3", "aff": "Institute of Automation\uff0cChinese Academy of Sciences;Hong Kong Polytechnic University;Centre for Artificial Intelligence and Robotics (CAIR), Hong Kong Institute of Science & Innovation, Chinese Academy of Sciences;Hong Kong Polytechnic University;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;polyu.edu.hk;cair-cas.org.hk;polyu.edu.hk;ia.ac.cn", "position": "PhD student;PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2023sheetcopilot,\ntitle={SheetCopilot: Bringing Software Productivity to the Next Level through Large Language Models},\nauthor={Hongxin Li and Jingran Su and Yuntao Chen and Qing Li and Zhaoxiang Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tfyr2zRVoK}\n}", "github": "", "project": "", "reviewers": "TF2z;HU2o;XbAE;bHxV", "pdf_size": 12465671, "rating": "4;6;6;7", "confidence": "4;4;4;5", "soundness": "2;3;3;4", "novelty": "2;2;2;4", "presentation": "3;3;4;4", "wc_summary": "122;69;15;42", "wc_strengths": "70;103;109;139", "wc_weaknesses": "142;132;262;25", "wc_questions": "57;55;83;6", "wc_limitations": "49;80;26;1", "wc_review": "440;439;495;213", "wc_reply_reviewers": "0;11;26;11", "wc_reply_authors": "0;0;25;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 62.0, 39.55376088313221 ], "wc_strengths_avg": [ 105.25, 24.498724456591614 ], "wc_weaknesses_avg": [ 140.25, 83.92965804767704 ], "wc_questions_avg": [ 50.25, 27.83320858255476 ], "wc_limitations_avg": [ 39.0, 29.129023327259016 ], "wc_review_avg": [ 396.75, 108.48127718643434 ], "wc_reply_reviewers_avg": [ 12.0, 9.246621004453464 ], "wc_reply_authors_avg": [ 6.25, 10.825317547305483 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13636796905526329389&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ia.ac.cn;polyu.edu.hk;cair-cas.org.hk;polyu.edu.hk;ia.ac.cn", "author_num": 5, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "Chinese Academy of Sciences;Hong Kong Polytechnic University", "aff_unique_dep": "Institute of Automation;", "aff_unique_url": "http://www.ia.cas.cn;https://www.polyu.edu.hk", "aff_unique_abbr": "CAS;PolyU", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Hong Kong SAR;Hong Kong", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Bypassing spike sorting: Density-based decoding using spike localization from dense multielectrode probes", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70192", "id": "tgQRMrsxht", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f499387f191d6be56e68966181095878-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tgQRMrsxht", "openreview": "https://openreview.net/forum?id=tgQRMrsxht", "poster": "/media/PosterPDFs/NeurIPS%202023/70192.png?t=1701909926.6452951", "slides": "https://nips.cc/virtual/2023/poster/70192", "video": "https://nips.cc/virtual/2023/poster/70192", "author_site": "Yizi Zhang, Tianxiao He, Julien Boussard, Charles Windolf, Olivier Winter, Eric Trautmann, Noam Roth, Hailey Barrell, Mark Churchland, Nicholas A Steinmetz, Erdem Varol, Cole Hurwitz, Liam Paninski", "tldr": "", "abstract": "Neural decoding and its applications to brain computer interfaces (BCI) are essential for understanding the association between neural activity and behavior. A prerequisite for many decoding approaches is spike sorting, the assignment of action potentials (spikes) to individual neurons. Current spike sorting algorithms, however, can be inaccurate and do not properly model uncertainty of spike assignments, therefore discarding information that could potentially improve decoding performance. Recent advances in high-density probes (e.g., Neuropixels) and computational methods now allow for extracting a rich set of spike features from unsorted data; these features can in turn be used to directly decode behavioral correlates. To this end, we propose a spike sorting-free decoding method that directly models the distribution of extracted spike features using a mixture of Gaussians (MoG) encoding the uncertainty of spike assignments, without aiming to solve the spike clustering problem explicitly. We allow the mixing proportion of the MoG to change over time in response to the behavior and develop variational inference methods to fit the resulting model and to perform decoding. We benchmark our method with an extensive suite of recordings from different animals and probe geometries, demonstrating that our proposed decoder can consistently outperform current methods based on thresholding (i.e. multi-unit activity) and spike sorting. Open source code is available at https://github.com/yzhang511/density_decoding.", "keywords": "neural decoding;brain-computer interfaces;spike sorting;variational inference;generative models", "primary_area": "", "supplementary_material": "/attachment/d14da2c0d9f54ead9caf83eaff073b9127d8dcff.pdf", "author": "Yizi Zhang;Tianxiao He;Julien Boussard;Charlie Windolf;Olivier Winter;Eric M. Trautmann;Noam Roth;Hailey Barrel;Mark M Churchland;Nick Steinmetz;Erdem Varol;Cole Lincoln Hurwitz;Liam Paninski", "authorids": "~Yizi_Zhang1;~Tianxiao_He1;~Julien_Boussard1;~Charlie_Windolf1;~Olivier_Winter1;emt2177@columbia.edu;noamroth@uw.edu;haileyb2@uw.edu;~Mark_M_Churchland1;nick.steinmetz@gmail.com;~Erdem_Varol2;~Cole_Lincoln_Hurwitz1;~Liam_Paninski1", "gender": ";M;;;M;;;;;;M;;", "homepage": ";;;;;;;;;;https://www.neuroinformaticslab.com/;https://colehurwitz.github.io/;", "dblp": ";369/8475;236/5937;;;;;;;;;;94/2691", "google_scholar": ";https://scholar.google.com/citations?hl=en;;;;;;;;;7GlElV0AAAAJ;https://scholar.google.co.uk/citations?hl=en;", "orcid": ";;;;0000-0001-9278-2721;;;;;;;;", "linkedin": ";tianxiao-he-bba619227/;;;;;;;;;;;", "or_profile": "~Yizi_Zhang1;~Tianxiao_He1;~Julien_Boussard1;~Charlie_Windolf1;~Olivier_Winter1;emt2177@columbia.edu;noamroth@uw.edu;haileyb2@uw.edu;~Mark_M_Churchland1;nick.steinmetz@gmail.com;~Erdem_Varol2;~Cole_Lincoln_Hurwitz1;~Liam_Paninski1", "aff": ";Columbia University;Columbia University;;;;;;;;New York University;Columbia University;Columbia University", "aff_domain": ";columbia.edu;columbia.edu;;;;;;;;nyu.edu;columbia.edu;columbia.edu", "position": ";Undergrad student;PhD student;;;;;;;;Assistant Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzhang2023bypassing,\ntitle={Bypassing spike sorting: Density-based decoding using spike localization from dense multielectrode probes},\nauthor={Yizi Zhang and Tianxiao He and Julien Boussard and Charlie Windolf and Olivier Winter and Eric M. Trautmann and Noam Roth and Hailey Barrel and Mark M Churchland and Nick Steinmetz and Erdem Varol and Cole Lincoln Hurwitz and Liam Paninski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tgQRMrsxht}\n}", "github": "", "project": "", "reviewers": "Fmkq;tDhN;Cpfb;C56U", "pdf_size": 8484095, "rating": "4;7;8;8", "confidence": "4;4;3;4", "soundness": "3;3;4;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "79;42;99;63", "wc_strengths": "79;60;67;56", "wc_weaknesses": "399;51;244;54", "wc_questions": "120;41;20;196", "wc_limitations": "1;12;4;46", "wc_review": "678;206;434;415", "wc_reply_reviewers": "0;46;283;0", "wc_reply_authors": "0;0;370;0", "reply_reviewers": "0;1;2;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.75, 1.6393596310755 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 70.75, 20.932928605429293 ], "wc_strengths_avg": [ 65.5, 8.73212459828649 ], "wc_weaknesses_avg": [ 187.0, 145.239457448725 ], "wc_questions_avg": [ 94.25, 69.57864255646268 ], "wc_limitations_avg": [ 15.75, 17.92170471802278 ], "wc_review_avg": [ 433.25, 167.24140486135602 ], "wc_reply_reviewers_avg": [ 82.25, 117.41459662239615 ], "wc_reply_authors_avg": [ 92.5, 160.21469970012114 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.44022545316281186, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6276425566706893689&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";columbia.edu;columbia.edu;;;;;;;;nyu.edu;columbia.edu;columbia.edu", "author_num": 13, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Columbia University;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.columbia.edu;https://www.nyu.edu", "aff_unique_abbr": "Columbia;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "GlyphControl: Glyph Conditional Control for Visual Text Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70191", "id": "thPI8hrA4V", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8951bbdcf234132bcce680825e7cb354-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=thPI8hrA4V", "openreview": "https://openreview.net/forum?id=thPI8hrA4V", "poster": "/media/PosterPDFs/NeurIPS%202023/70191.png?t=1701985965.886854", "slides": "https://nips.cc/virtual/2023/poster/70191", "video": "https://nips.cc/virtual/2023/poster/70191", "author_site": "Yukang Yang, Dongnan Gui, YUHUI YUAN, Weicong Liang, Haisong Ding, Han Hu, Kai Chen", "tldr": "", "abstract": "Recently, there has been an increasing interest in developing diffusion-based text-to-image generative models capable of generating coherent and well-formed visual text. In this paper, we propose a novel and efficient approach called GlyphControl to address this task. Unlike existing methods that rely on character-aware text encoders like ByT5 and require retraining of text-to-image models, our approach leverages additional glyph conditional information to enhance the performance of the off-the-shelf Stable-Diffusion model in generating accurate visual text. By incorporating glyph instructions, users can customize the content, location, and size of the generated text according to their specific requirements. To facilitate further research in visual text generation, we construct a training benchmark dataset called LAION-Glyph. We evaluate the effectiveness of our approach by measuring OCR-based metrics, CLIP score, and FID of the generated visual text. Our empirical evaluations demonstrate that GlyphControl outperforms the recent DeepFloyd IF approach in terms of OCR accuracy, CLIP score, and FID, highlighting the efficacy of our method.", "keywords": "Generative Models;Visual Text Generation;Diffusion Models", "primary_area": "", "supplementary_material": "", "author": "Yukang Yang;Dongnan Gui;Yuhui Yuan;Weicong Liang;Haisong Ding;Han Hu;Kai Chen", "authorids": "~Yukang_Yang1;~Dongnan_Gui1;~Yuhui_Yuan1;~Weicong_Liang1;~Haisong_Ding1;~Han_Hu1;~Kai_Chen2", "gender": "M;M;M;M;M;M;M", "homepage": ";https://github.com/guidongnan;;;;https://ancientmooner.github.io/;", "dblp": ";;190/7361;330/4850;213/8490;;", "google_scholar": "ASZWVzEAAAAJ;https://scholar.google.cz/citations?user=shLKvwcAAAAJ;PzyvzksAAAAJ;QvHDIygAAAAJ;RO6BtyMAAAAJ;Jkss014AAAAJ;https://scholar.google.co.jp/citations?user=kPDp3cUAAAAJ", "orcid": ";;;;;;", "linkedin": ";;rainbowsecret/;weicong-liang-victor;;;", "or_profile": "~Yukang_Yang1;~Dongnan_Gui1;~Yuhui_Yuan1;~Weicong_Liang1;~Haisong_Ding1;~Han_Hu1;~Kai_Chen2", "aff": "Microsoft Research;University of Science and Technology of China;Microsoft Research;Peking University;Microsoft;Microsft Research Asia;Microsoft", "aff_domain": "research.microsoft.com;ustc.edu.cn;microsoft.com;pku.edu.cn;microsoft.com;microsoft.com;microsoft.com", "position": "Intern;Undergrad student;Senior Researcher;MS student;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nyang2023glyphcontrol,\ntitle={GlyphControl: Glyph Conditional Controllable Visual Text Generation},\nauthor={Yukang Yang and Dongnan Gui and Yuhui Yuan and Weicong Liang and Haisong Ding and Han Hu and Kai Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=thPI8hrA4V}\n}", "github": "", "project": "", "reviewers": "wGfp;Cb1X;xxJ1;kTnz;Yxcw", "pdf_size": 16731929, "rating": "4;4;6;6;7", "confidence": "5;4;5;4;4", "soundness": "2;2;3;3;4", "novelty": "2;2;3;3;3", "presentation": "2;3;3;4;4", "wc_summary": "54;27;61;66;54", "wc_strengths": "55;22;52;62;112", "wc_weaknesses": "82;141;235;33;18", "wc_questions": "51;11;3;20;18", "wc_limitations": "18;1;34;23;4", "wc_review": "260;202;385;204;206", "wc_reply_reviewers": "0;67;71;35;24", "wc_reply_authors": "78;419;41;30;23", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 52.4, 13.484806264830059 ], "wc_strengths_avg": [ 60.6, 29.117692216245437 ], "wc_weaknesses_avg": [ 101.8, 79.28026236081713 ], "wc_questions_avg": [ 20.6, 16.329115101560156 ], "wc_limitations_avg": [ 16.0, 12.214745187681975 ], "wc_review_avg": [ 251.4, 70.24414566353555 ], "wc_reply_reviewers_avg": [ 39.4, 26.71778433927484 ], "wc_reply_authors_avg": [ 118.2, 151.59076489021356 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.2721655269759087, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3906850816328380005&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "research.microsoft.com;ustc.edu.cn;microsoft.com;pku.edu.cn;microsoft.com;microsoft.com;microsoft.com", "author_num": 7, "aff_unique_index": "0;1;0;2;0;0;0", "aff_unique_norm": "Microsoft;University of Science and Technology of China;Peking University", "aff_unique_dep": "Microsoft Research;;", "aff_unique_url": "https://www.microsoft.com/en-us/research;http://www.ustc.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "MSR;USTC;Peking U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0;1;0;1;0", "aff_country_unique": "United States;China" }, { "title": "No Train No Gain: Revisiting Efficient Training Algorithms For Transformer-based Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70190", "id": "thbXgJ8gNK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/51f3d6252706100325ddc435ba0ade0e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=thbXgJ8gNK", "openreview": "https://openreview.net/forum?id=thbXgJ8gNK", "poster": "/media/PosterPDFs/NeurIPS%202023/70190.png?t=1702745330.2821724", "slides": "https://nips.cc/virtual/2023/poster/70190", "video": "https://nips.cc/virtual/2023/poster/70190", "author_site": "Jean Kaddour, Oscar Key, Piotr Nawrot, Pasquale Minervini, Matt Kusner", "tldr": "", "abstract": "The computation necessary for training Transformer-based language models has skyrocketed in recent years.\nThis trend has motivated research on efficient training algorithms designed to improve training, validation, and downstream performance faster than standard training. In this work, we revisit three categories of such algorithms: dynamic architectures (layer stacking, layer dropping), batch selection (selective backprop., RHO-loss), and efficient optimizers (Lion, Sophia). When pre-training BERT and T5 with a fixed computation budget using such methods, we find that their training, validation, and downstream gains vanish compared to a baseline with a fully-decayed learning rate. We define an evaluation protocol that enables computation to be done on arbitrary machines by mapping all computation time to a reference machine which we call reference system time. We discuss the limitations of our proposed protocol and release our code to encourage rigorous research in efficient training procedures: https://github.com/JeanKaddour/NoTrainNoGain.", "keywords": "language models;transformers;efficient training", "primary_area": "", "supplementary_material": "/attachment/e5304f4bc5a576bae976cc9fcc54987c336e1bbc.pdf", "author": "Jean Kaddour;Oscar Key;Piotr Nawrot;Pasquale Minervini;Matt Kusner", "authorids": "~Jean_Kaddour1;~Oscar_Key1;~Piotr_Nawrot1;~Pasquale_Minervini4;~Matt_Kusner1", "gender": "M;M;M;M;M", "homepage": "https://jeankaddour.com/;https://oscarkey.github.io;https://piotrnawrot.github.io;http://mkusner.github.io;https://www.neuralnoise.com", "dblp": ";276/1203;304/8773.html;120/7700.html;58/10142", "google_scholar": "z90bmSMAAAAJ;;9wrNHUQAAAAJ;57KRSu8AAAAJ;https://scholar.google.it/citations?user=9sk6CSgAAAA", "orcid": ";;0009-0003-8552-9447;;0000-0002-8442-602X", "linkedin": ";;piotr-nawrot/;;pasquale-mauro-minervini-47a08324/", "or_profile": "~Jean_Kaddour1;~Oscar_Key1;~Piotr_Nawrot1;~Matt_Kusner1;~Pasquale_Minervini1", "aff": "University College London;University College London;NVIDIA;University College London;University of Edinburgh, University of Edinburgh", "aff_domain": "ucl.ac.uk;ucl.ac.uk;nvidia.com;ucl.ac.uk;ed.ac.uk", "position": "PhD student;PhD student;Intern;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nkaddour2023no,\ntitle={No Train No Gain: Revisiting Efficient Training Algorithms For Transformer-based Language Models},\nauthor={Jean Kaddour and Oscar Key and Piotr Nawrot and Pasquale Minervini and Matt Kusner},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=thbXgJ8gNK}\n}", "github": "", "project": "", "reviewers": "b1c1;BpVn;VTeA;DsDW;iTEM", "pdf_size": 1035935, "rating": "5;6;6;6;7", "confidence": "4;4;4;5;3", "soundness": "3;3;3;2;3", "novelty": "2;2;3;3;3", "presentation": "3;2;3;2;3", "wc_summary": "75;196;54;73;61", "wc_strengths": "56;191;61;120;73", "wc_weaknesses": "160;124;56;757;85", "wc_questions": "18;133;44;158;39", "wc_limitations": "11;180;8;7;33", "wc_review": "320;824;223;1115;291", "wc_reply_reviewers": "0;127;15;201;62", "wc_reply_authors": "0;0;0;353;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 91.8, 52.67029523365139 ], "wc_strengths_avg": [ 100.2, 50.72829585152649 ], "wc_weaknesses_avg": [ 236.4, 262.6637394083927 ], "wc_questions_avg": [ 78.4, 56.037844355399685 ], "wc_limitations_avg": [ 47.8, 66.78143454583766 ], "wc_review_avg": [ 554.6, 352.4500531990313 ], "wc_reply_reviewers_avg": [ 81.0, 74.55736046829985 ], "wc_reply_authors_avg": [ 70.6, 141.2 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.49999999999999994, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6920811827901580047&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "ucl.ac.uk;ucl.ac.uk;nvidia.com;ucl.ac.uk;ed.ac.uk", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "University College London;NVIDIA;University of Edinburgh", "aff_unique_dep": ";NVIDIA Corporation;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.nvidia.com;https://www.ed.ac.uk", "aff_unique_abbr": "UCL;NVIDIA;Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Feature learning via mean-field Langevin dynamics: classifying sparse parities and beyond", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70189", "id": "tj86aGVNb3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6cc321baf0a8611b1d1bdbd18822667b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tj86aGVNb3", "openreview": "https://openreview.net/forum?id=tj86aGVNb3", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70189", "video": "https://nips.cc/virtual/2023/poster/70189", "author_site": "Taiji Suzuki, Denny Wu, Kazusato Oko, Atsushi Nitanda", "tldr": "", "abstract": "Neural network in the mean-field regime is known to be capable of \\textit{feature learning}, unlike the kernel (NTK) counterpart. Recent works have shown that mean-field neural networks can be globally optimized by a noisy gradient descent update termed the \\textit{mean-field Langevin dynamics} (MFLD). However, all existing guarantees for MFLD only considered the \\textit{optimization} efficiency, and it is unclear if this algorithm leads to improved \\textit{generalization} performance and sample complexity due to the presence of feature learning. To fill this gap, in this work we study the statistical and computational complexity of MFLD in learning a class of binary classification problems. Unlike existing margin bounds for neural networks, we avoid the typical norm control by utilizing the perspective that MFLD optimizes the \\textit{distribution} of parameters rather than the parameter itself; this leads to an improved analysis of the sample complexity and convergence rate. We apply our general framework to the learning of $k$-sparse parity functions, where we prove that unlike kernel methods, two-layer neural networks optimized by MFLD achieves a sample complexity where the degree $k$ is ``decoupled'' from the exponent in the dimension dependence.", "keywords": "mean-field regime;feature learning;Neural network optimization;sparse parity function;classification;sample complexity", "primary_area": "", "supplementary_material": "/attachment/0380d7ae4ce376fa6d1176ce66b450462bec3c7d.zip", "author": "Taiji Suzuki;Denny Wu;Kazusato Oko;Atsushi Nitanda", "authorids": "~Taiji_Suzuki1;~Denny_Wu2;~Kazusato_Oko1;~Atsushi_Nitanda1", "gender": "M;M;M;M", "homepage": "http://ibis.t.u-tokyo.ac.jp/suzuki/;https://dennywu1.github.io/;;https://sites.google.com/site/atsushinitanda", "dblp": "08/312;;;155/1884", "google_scholar": "x8osrBsAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.co.jp/citations?user=LyVvaf8AAAAJ", "orcid": ";;;", "linkedin": ";;kazusatooko/;", "or_profile": "~Taiji_Suzuki1;~Denny_Wu2;~Kazusato_Oko1;~Atsushi_Nitanda1", "aff": "The University of Tokyo;University of Toronto;The University of Tokyo;Kyushu Institute of Technology", "aff_domain": "tokyo.ac.jp;toronto.edu;u-tokyo.ac.jp;kyutech.ac.jp", "position": "Associate Professor;PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\nsuzuki2023feature,\ntitle={Feature learning via mean-field Langevin dynamics: classifying sparse parities and beyond},\nauthor={Taiji Suzuki and Denny Wu and Kazusato Oko and Atsushi Nitanda},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tj86aGVNb3}\n}", "github": "", "project": "", "reviewers": "zm9j;uexs;1qsa;afRb", "pdf_size": 520468, "rating": "6;6;6;6", "confidence": "3;4;2;3", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "3;3;2;2", "wc_summary": "76;52;63;48", "wc_strengths": "57;95;103;50", "wc_weaknesses": "110;24;185;87", "wc_questions": "42;232;42;28", "wc_limitations": "66;7;17;10", "wc_review": "351;410;410;223", "wc_reply_reviewers": "26;122;55;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 59.75, 10.871407452579449 ], "wc_strengths_avg": [ 76.25, 23.05834989759675 ], "wc_weaknesses_avg": [ 101.5, 57.578207683115664 ], "wc_questions_avg": [ 86.0, 84.4866853415377 ], "wc_limitations_avg": [ 25.0, 23.947860029656095 ], "wc_review_avg": [ 348.5, 76.3560737597213 ], "wc_reply_reviewers_avg": [ 53.75, 42.346044679521135 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6269762526147442075&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "tokyo.ac.jp;toronto.edu;u-tokyo.ac.jp;kyutech.ac.jp", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Tokyo;University of Toronto;Kyushu Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.utoronto.ca;https://www.kyutech.ac.jp", "aff_unique_abbr": "UTokyo;U of T;Kyutech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Japan;Canada" }, { "title": "The Harvard USPTO Patent Dataset: A Large-Scale, Well-Structured, and Multi-Purpose Corpus of Patent Applications", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73442", "id": "tk27oD2cBw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b4b02a09f2e6ad29fdbeb1386d68f4c4-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=tk27oD2cBw", "openreview": "https://openreview.net/forum?id=tk27oD2cBw", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73442", "video": "https://nips.cc/virtual/2023/poster/73442", "author_site": "Mirac Suzgun, Luke Melas-Kyriazi, Suproteem Sarkar, Scott D Kominers, Stuart Shieber", "tldr": "", "abstract": "Innovation is a major driver of economic and social development, and information about many kinds of innovation is embedded in semi-structured data from patents and patent applications. Though the impact and novelty of innovations expressed in patent data are difficult to measure through traditional means, machine learning offers a promising set of techniques for evaluating novelty, summarizing contributions, and embedding semantics. In this paper, we introduce the Harvard USPTO Patent Dataset (HUPD), a large-scale, well-structured, and multi-purpose corpus of English-language patent applications filed to the United States Patent and Trademark Office (USPTO) between 2004 and 2018. With more than 4.5 million patent documents, HUPD is two to three times larger than comparable corpora. Unlike other NLP patent datasets, HUPD contains the inventor-submitted versions of patent applications, not the final versions of granted patents, allowing us to study patentability at the time of filing using NLP methods for the first time. It is also novel in its inclusion of rich structured data alongside the text of patent filings: By providing each application\u2019s metadata along with all of its text fields, HUPD enables researchers to perform new sets of NLP tasks that leverage variation in structured covariates. As a case study on the types of research HUPD makes possible, we introduce a new task to the NLP community -- patent acceptance prediction. We additionally show the structured metadata provided in HUPD allows us to conduct explicit studies of concept shifts for this task. We find that performance on patent acceptance prediction decays when models trained in one context are evaluated on different innovation categories and over time. Finally, we demonstrate how HUPD can be used for three additional tasks: Multi-class classification of patent subject areas, language modeling, and abstractive summarization. Put together, our publicly-available dataset aims to advance research extending language and classification models to diverse and dynamic real-world data distributions.", "keywords": "hupd;patents;innovation;dataset;harvard-uspto;uspto;intellectual-property;nlp;classification;summarization;language-modeling;patent-analysis", "primary_area": "", "supplementary_material": "", "author": "Mirac Suzgun;Luke Melas-Kyriazi;Suproteem K Sarkar;Scott Kominers;Stuart Shieber", "authorids": "~Mirac_Suzgun1;~Luke_Melas-Kyriazi1;~Suproteem_K_Sarkar1;~Scott_Kominers1;~Stuart_Shieber1", "gender": ";M;;M;M", "homepage": "https://web.stanford.edu/people/~msuzgun/;https://lukemelas.github.io/;https://suproteem.is/researching/;http://scottkom.com/;http://www.eecs.harvard.edu/~shieber/", "dblp": "230/3470;228/5680;;52/7071;", "google_scholar": "C3aEsC8AAAAJ;https://scholar.google.com/citations?hl=en;;YHSRCCsAAAAJ;", "orcid": ";;;;0000-0002-7733-8195", "linkedin": ";;;scott-kominers/;", "or_profile": "~Mirac_Suzgun1;~Luke_Melas-Kyriazi1;~Suproteem_K_Sarkar1;~Scott_Kominers1;~Stuart_Shieber1", "aff": "Stanford University;University of Oxford, University of Oxford;Harvard University;Meta Facebook;Harvard University", "aff_domain": "stanford.edu;robots.ox.ac.uk;harvard.edu;facebook.com;harvard.edu", "position": "PhD student;PhD student;PhD student;Advisor;Full Professor", "bibtex": "@inproceedings{\nsuzgun2023the,\ntitle={The Harvard {USPTO} Patent Dataset: A Large-Scale, Well-Structured, and Multi-Purpose Corpus of Patent Applications},\nauthor={Mirac Suzgun and Luke Melas-Kyriazi and Suproteem K Sarkar and Scott Kominers and Stuart Shieber},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=tk27oD2cBw}\n}", "github": "", "project": "", "reviewers": "sQe3;Gojv;4nA3;YoRT;DL8S", "pdf_size": 12809407, "rating": "7;7;7;7;8", "confidence": "4;3;5;4;5", "wc_summary_and_contributions": "74;58;83;146;63", "wc_strengths": "23;56;103;73;75", "wc_improvement": "37;113;387;119;99", "wc_limitations": "14;1;23;149;9", "wc_correctness": "1;1;38;11;89", "wc_clarity": "6;1;16;8;15", "wc_relation_to_prior_work": "12;1;9;120;13", "wc_documentation": "1;1;24;46;7", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "169;233;684;673;371", "wc_reply_reviewers": "0;0;120;0;0", "wc_reply_authors": "181;500;482;289;307", "reply_reviewers": "0;0;1;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.2, 0.39999999999999997 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 84.8, 31.808175049820132 ], "wc_strengths_avg": [ 66.0, 26.260236099471765 ], "wc_improvement_avg": [ 151.0, 121.54340788376801 ], "wc_limitations_avg": [ 39.2, 55.36208088574706 ], "wc_correctness_avg": [ 28.0, 33.370645783382734 ], "wc_clarity_avg": [ 9.2, 5.635601121442148 ], "wc_relation_to_prior_work_avg": [ 31.0, 44.69899327725402 ], "wc_documentation_avg": [ 15.8, 17.29045979724079 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 426.0, 216.28499716808838 ], "wc_reply_reviewers_avg": [ 24.0, 48.0 ], "wc_reply_authors_avg": [ 351.8, 121.68713982997546 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5345224838248488, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15416863033483477276&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "stanford.edu;robots.ox.ac.uk;harvard.edu;facebook.com;harvard.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "Stanford University;University of Oxford;Harvard University;Meta", "aff_unique_dep": ";;;Meta Platforms, Inc.", "aff_unique_url": "https://www.stanford.edu;https://www.ox.ac.uk;https://www.harvard.edu;https://meta.com", "aff_unique_abbr": "Stanford;Oxford;Harvard;Meta", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Exponential Lower Bounds for Fictitious Play in Potential Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70188", "id": "tkenkPYkxj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/66820ab16b817d8a6b00d60b3d24b83a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tkenkPYkxj", "openreview": "https://openreview.net/forum?id=tkenkPYkxj", "poster": "/media/PosterPDFs/NeurIPS%202023/70188.png?t=1702797790.6747236", "slides": "https://nips.cc/virtual/2023/poster/70188", "video": "https://nips.cc/virtual/2023/poster/70188", "author_site": "Ioannis Panageas, Nikolas Patris, Stratis Skoulakis, Volkan Cevher", "tldr": "", "abstract": "Fictitious Play (FP) is a simple and natural dynamic for repeated play with many applications in game theory and multi-agent reinforcement learning. It was introduced by Brown and its convergence properties for two-player zero-sum games was established later by Robinson. Potential games [Monderer and Shapley 1996] is another class of games which exhibit the FP property [Monderer and Shapley 1996], i.e., FP dynamics converges to a Nash equilibrium if all agents follows it. Nevertheless, except for two-player zero-sum games and for specific instances of payoff matrices [Abernethy et. al. 2021] or for adversarial tie-breaking rules [Daskalakis and Pan, 2014], the \\textit{convergence rate} of FP is unknown. In this work, we focus on the rate of convergence of FP when applied to potential games and more specifically identical payoff games. We prove that FP can take exponential time (in the number of strategies) to reach a Nash equilibrium, even if the game is restricted to \\textit{two agents}. To prove this, we recursively construct a two-player coordination game with a unique Nash equilibrium. Moreover, every approximate Nash equilibrium in the constructed game must be close to the pure Nash equilibrium in $\\ell_1$-distance.", "keywords": "fictitious play;convergence rate;potential games", "primary_area": "", "supplementary_material": "/attachment/40643afc6d6fb8199d84fbbbf005017aa9f548c6.zip", "author": "Ioannis Panageas;Nikolas Patris;Stratis Skoulakis;Volkan Cevher", "authorids": "~Ioannis_Panageas1;~Nikolas_Patris1;~Stratis_Skoulakis2;~Volkan_Cevher1", "gender": "M;M;M;M", "homepage": "https://panageas.github.io;https://npatris.github.io/;http://www.corelab.ntua.gr/~sskoul/;http://lions.epfl.ch", "dblp": "139/3829;297/4669;183/0979.html;70/5301", "google_scholar": "5NiFWuwAAAAJ;https://scholar.google.com/citations?hl=en;Juo2Tk8AAAAJ;https://scholar.google.ch/citations?user=hlWhzU8AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ioannis_Panageas1;~Nikolas_Patris1;~Stratis_Skoulakis2;~Volkan_Cevher1", "aff": "Donald Bren School of Information and Computer Sciences, University of California, Irvine;University of California, Irvine;EPFL - EPF Lausanne;Amazon Development Center Germany", "aff_domain": "ics.uci.edu;uci.edu;epfl.ch;amazon.de", "position": "Assistant Professor;PhD student;Postdoc;Amazon Scholar", "bibtex": "@inproceedings{\npanageas2023exponential,\ntitle={Exponential Lower Bounds for Fictitious Play in Potential Games},\nauthor={Ioannis Panageas and Nikolas Patris and Stratis Skoulakis and Volkan Cevher},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tkenkPYkxj}\n}", "github": "", "project": "", "reviewers": "a6So;nmF9;hanH;TVCP", "pdf_size": 1084951, "rating": "7;7;7;7", "confidence": "4;3;4;3", "soundness": "4;3;4;3", "novelty": "3;3;3;3", "presentation": "4;3;3;4", "wc_summary": "70;75;44;94", "wc_strengths": "103;106;69;53", "wc_weaknesses": "24;164;30;24", "wc_questions": "59;21;73;22", "wc_limitations": "15;14;1;1", "wc_review": "271;380;217;194", "wc_reply_reviewers": "70;174;23;17", "wc_reply_authors": "21;185;12;14", "reply_reviewers": "1;3;1;1", "reply_authors": "2;4;2;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 70.75, 17.851820635442202 ], "wc_strengths_avg": [ 82.75, 22.498611068241523 ], "wc_weaknesses_avg": [ 60.5, 59.805936160217406 ], "wc_questions_avg": [ 43.75, 22.796655456447994 ], "wc_limitations_avg": [ 7.75, 6.7592529172978875 ], "wc_review_avg": [ 265.5, 71.77220910631078 ], "wc_reply_reviewers_avg": [ 71.0, 62.90866395020641 ], "wc_reply_authors_avg": [ 58.0, 73.39959127951599 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2794490508301719023&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 6, "email": "ics.uci.edu;uci.edu;epfl.ch;amazon.de", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of California, Irvine;EPFL;Amazon", "aff_unique_dep": "Donald Bren School of Information and Computer Sciences;;Development Center", "aff_unique_url": "https://www.uci.edu;https://www.epfl.ch;https://www.amazon.de", "aff_unique_abbr": "UCI;EPFL;Amazon", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Irvine;Lausanne;", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "United States;Switzerland;Germany" }, { "title": "SPACE: Single-round Participant Amalgamation for Contribution Evaluation in Federated Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70187", "id": "tmxjuIFSEc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/14a812fa4b6bf244d055e37a7cd2f557-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tmxjuIFSEc", "openreview": "https://openreview.net/forum?id=tmxjuIFSEc", "poster": "/media/PosterPDFs/NeurIPS%202023/70187.png?t=1701696325.5162988", "slides": "https://nips.cc/virtual/2023/poster/70187", "video": "https://nips.cc/virtual/2023/poster/70187", "author_site": "Yi-Chung Chen, Hsi-Wen Chen, Shun-Gui Wang, Ming-syan Chen", "tldr": "", "abstract": "The evaluation of participant contribution in federated learning (FL) has recently gained significant attention due to its applicability in various domains, such as incentive mechanisms, robustness enhancement, and client selection. Previous approaches have predominantly relied on the widely adopted Shapley value for participant evaluation. However, the computation of the Shapley value is expensive, despite using techniques like gradient-based model reconstruction and truncating unnecessary evaluations. Therefore, we present an efficient approach called Single-round Participants Amalgamation for Contribution Evaluation (SPACE). SPACE incorporates two novel components, namely Federated Knowledge Amalgamation and Prototype-based Model Evaluation to reduce the evaluation effort by eliminating the dependence on the size of the validation set and enabling participant evaluation within a single communication round. Experimental results demonstrate that SPACE outperforms state-of-the-art methods in terms of both running time and Pearson\u2019s Correlation Coefficient (PCC). Furthermore, extensive experiments conducted on applications, client reweighting, and client selection highlight the effectiveness of SPACE. The code is available at https://github.com/culiver/SPACE.", "keywords": "Federated Learning;Contribution Evaluation;Shapley Value;Knowledge Amalgamation", "primary_area": "", "supplementary_material": "", "author": "Yi-Chung Chen;Hsi-Wen Chen;Shun-Guei Wang;Ming-Syan Chen", "authorids": "~Yi-Chung_Chen1;~Hsi-Wen_Chen1;~Shun-Guei_Wang1;~Ming-Syan_Chen2", "gender": "Not Specified;M;M;M", "homepage": ";;https://arbor.ee.ntu.edu.tw/~mschen;https://sites.google.com/view/yichungchen/home", "dblp": "39/9713.html;;c/MingSyanChen;02/448", "google_scholar": "https://scholar.google.com.tw/citations?user=ZupA27cAAAAJ;;KTmCrFkAAAAJ;https://scholar.google.com.tw/citations?user=7UI6GS8AAAAJ", "orcid": ";;0000-0002-0711-8197;", "linkedin": "hsi-wen-chen-674395134;sgwang3790/;;", "or_profile": "~Hsi-Wen_Chen1;~Shun-Guei_Wang1;~Ming-Syan_Chen2;~YI_CHUNG_CHEN1", "aff": "National Taiwan University;National Taiwan University;National Taiwan University;National Taiwan University", "aff_domain": "ntu.edu.tw;ntu.edu.tw;ntu.edu;ntu.edu.tw", "position": "PhD student;MS student;Full Professor;MS student", "bibtex": "@inproceedings{\nchen2023space,\ntitle={{SPACE}: Single-round Participant Amalgamation for Contribution Evaluation in Federated Learning},\nauthor={Yi-Chung Chen and Hsi-Wen Chen and Shun-Guei Wang and Ming-Syan Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tmxjuIFSEc}\n}", "github": "", "project": "", "reviewers": "AsTD;AA9d;a6mr;HEwC;vCp6", "pdf_size": 890072, "rating": "4;5;5;6;6", "confidence": "4;3;2;4;4", "soundness": "2;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "39;56;189;163;123", "wc_strengths": "17;26;91;50;45", "wc_weaknesses": "103;42;101;129;52", "wc_questions": "2;212;115;105;32", "wc_limitations": "15;1;3;30;29", "wc_review": "176;337;499;477;281", "wc_reply_reviewers": "67;18;19;0;22", "wc_reply_authors": "631;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 114.0, 58.47392581313486 ], "wc_strengths_avg": [ 45.8, 25.623426780975255 ], "wc_weaknesses_avg": [ 85.4, 33.02483913662563 ], "wc_questions_avg": [ 93.2, 73.1994535498729 ], "wc_limitations_avg": [ 15.6, 12.32233744059949 ], "wc_review_avg": [ 354.0, 121.20726050860155 ], "wc_reply_reviewers_avg": [ 25.2, 22.283626275810672 ], "wc_reply_authors_avg": [ 126.2, 252.39999999999998 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2004459314343183, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12575397241929868248&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "ntu.edu.tw;ntu.edu.tw;ntu.edu;ntu.edu.tw", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "National Taiwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.tw", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Add and Thin: Diffusion for Temporal Point Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70186", "id": "tn9Dldam9L", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b1d9c7e7bd265d81aae8d74a7a6bd7f1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tn9Dldam9L", "openreview": "https://openreview.net/forum?id=tn9Dldam9L", "poster": "/media/PosterPDFs/NeurIPS%202023/70186.png?t=1702590809.0856118", "slides": "https://nips.cc/virtual/2023/poster/70186", "video": "https://nips.cc/virtual/2023/poster/70186", "author_site": "David L\u00fcdke, Marin Bilo\u0161, Oleksandr Shchur, Marten Lienen, Stephan G\u00fcnnemann", "tldr": "", "abstract": "Autoregressive neural networks within the temporal point process (TPP) framework have become the standard for modeling continuous-time event data. Even though these models can expressively capture event sequences in a one-step-ahead fashion, they are inherently limited for long-term forecasting applications due to the accumulation of errors caused by their sequential nature. To overcome these limitations, we derive ADD-THIN, a principled probabilistic denoising diffusion model for TPPs that operates on entire event sequences. Unlike existing diffusion approaches, ADD-THIN naturally handles data with discrete and continuous components. In experiments on synthetic and real-world datasets, our model matches the state-of-the-art TPP models in density estimation and strongly outperforms them in forecasting.", "keywords": "Point Processes;Diffusion;Temporal Data;Generative Model;Forecasting;Density Estimation;Denoising", "primary_area": "", "supplementary_material": "/attachment/2af1d4247d8afc41954fa34729398209b33ccf98.pdf", "author": "David L\u00fcdke;Marin Bilo\u0161;Oleksandr Shchur;Marten Lienen;Stephan G\u00fcnnemann", "authorids": "~David_L\u00fcdke1;~Marin_Bilo\u01611;~Oleksandr_Shchur1;~Marten_Lienen1;~Stephan_G\u00fcnnemann1", "gender": "M;;M;;M", "homepage": ";;http://shchur.github.io/;https://martenlienen.com;http://www.daml.in.tum.de", "dblp": "328/9755;;210/2544;192/3468;43/3011", "google_scholar": ";;np39q6IAAAAJ;;", "orcid": ";;;;", "linkedin": "david-l\u00fcdke/;;;;", "or_profile": "~David_L\u00fcdke1;~Marin_Bilo\u01611;~Oleksandr_Shchur1;~Marten_Lienen1;~Stephan_G\u00fcnnemann1", "aff": "Technische Universit\u00e4t M\u00fcnchen;;Amazon;Technical University Munich;Technical University Munich", "aff_domain": "tum.de;;amazon.com;tum.de;tum.de", "position": "PhD student;;Researcher;PhD student;Professor", "bibtex": "@inproceedings{\nl{\\\"u}dke2023add,\ntitle={Add and Thin: Diffusion for Temporal Point Processes},\nauthor={David L{\\\"u}dke and Marin Bilo{\\v{s}} and Oleksandr Shchur and Marten Lienen and Stephan G{\\\"u}nnemann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tn9Dldam9L}\n}", "github": "", "project": "", "reviewers": "1xQ2;RwFj;6sim;1d9q", "pdf_size": 8651828, "rating": "5;6;7;7", "confidence": "4;4;4;3", "soundness": "2;3;3;4", "novelty": "3;3;3;4", "presentation": "1;1;3;3", "wc_summary": "76;78;61;27", "wc_strengths": "71;22;55;116", "wc_weaknesses": "571;169;7;97", "wc_questions": "2;52;1;132", "wc_limitations": "46;17;13;47", "wc_review": "766;338;137;419", "wc_reply_reviewers": "72;221;17;30", "wc_reply_authors": "0;726;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 1.0 ], "wc_summary_avg": [ 60.5, 20.426698215815495 ], "wc_strengths_avg": [ 66.0, 33.8452360015409 ], "wc_weaknesses_avg": [ 211.0, 215.6246739127969 ], "wc_questions_avg": [ 46.75, 53.36372831802516 ], "wc_limitations_avg": [ 30.75, 15.81731646013318 ], "wc_review_avg": [ 415.0, 227.17284168667697 ], "wc_reply_reviewers_avg": [ 85.0, 81.10795275433846 ], "wc_reply_authors_avg": [ 181.5, 314.36722157375124 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2546531821438830567&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "tum.de;;amazon.com;tum.de;tum.de", "author_num": 5, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Amazon;Technical University of Munich", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.tum.de;https://www.amazon.com;https://www.tum.de", "aff_unique_abbr": "TUM;Amazon;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Germany;United States" }, { "title": "Dream the Impossible: Outlier Imagination with Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70185", "id": "tnRboxQIec", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bf5311df07f3efce97471921e6d2f159-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tnRboxQIec", "openreview": "https://openreview.net/forum?id=tnRboxQIec", "poster": "/media/PosterPDFs/NeurIPS%202023/70185.png?t=1698418388.8724232", "slides": "https://nips.cc/virtual/2023/poster/70185", "video": "https://nips.cc/virtual/2023/poster/70185", "author_site": "Xuefeng Du, Yiyou Sun, Jerry Zhu, Yixuan Li", "tldr": "", "abstract": "Utilizing auxiliary outlier datasets to regularize the machine learning model has demonstrated promise for out-of-distribution (OOD) detection and safe prediction. Due to the labor intensity in data collection and cleaning, automating outlier data generation has been a long-desired alternative. Despite the appeal, generating photo-realistic outliers in the high dimensional pixel space has been an open challenge for the field. To tackle the problem, this paper proposes a new framework Dream-OOD, which enables imagining photo-realistic outliers by way of diffusion models, provided with only the in-distribution (ID) data and classes. Specifically, Dream-OOD learns a text-conditioned latent space based on ID data, and then samples outliers in the low-likelihood region via the latent, which can be decoded into images by the diffusion model. Different from prior works [16, 95], Dream-OOD enables visualizing and understanding the imagined outliers, directly in the pixel space. We conduct comprehensive quantitative and qualitative studies to understand the efficacy of Dream-OOD, and show that training with the samples generated by Dream-OOD can significantly benefit OOD detection performance.", "keywords": "Outlier imagination;machine learning", "primary_area": "", "supplementary_material": "/attachment/e4cf4f0db897f2d0c166b40da23dce9ad233b49a.pdf", "author": "Xuefeng Du;Yiyou Sun;Jerry Zhu;Yixuan Li", "authorids": "~Xuefeng_Du1;~Yiyou_Sun1;~Jerry_Zhu1;~Yixuan_Li1", "gender": "M;M;F;M", "homepage": "https://d12306.github.io/;https://sunyiyou.github.io/;http://pages.cs.wisc.edu/~sharonli/;http://pages.cs.wisc.edu/~jerryzhu/", "dblp": "34/3557;211/5630;144/6087-1;z/XiaojinZhu", "google_scholar": "GE_aEh4AAAAJ;IKqlQo4AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=hqTu-QcAAAAJ", "orcid": ";;;", "linkedin": "xuefeng-du-094723192/;;liyixuan;", "or_profile": "~Xuefeng_Du1;~Yiyou_Sun1;~Yixuan_Li1;~Xiaojin_Zhu1", "aff": "University of Wisconsin, Madison;University of Wisconsin, Madison;Cornell University;University of Wisconsin, Madison", "aff_domain": "wisc.edu;wisc.edu;cornell.edu;wisc.edu", "position": "PhD student;PhD student;Graduate Student;Associate Professor", "bibtex": "@inproceedings{\ndu2023dream,\ntitle={Dream the Impossible: Outlier Imagination with Diffusion Models},\nauthor={Xuefeng Du and Yiyou Sun and Jerry Zhu and Yixuan Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tnRboxQIec}\n}", "github": "", "project": "", "reviewers": "i54j;Qkax;3rTL;thbj;94Wq", "pdf_size": 8605600, "rating": "5;5;6;7;7", "confidence": "3;3;3;2;3", "soundness": "2;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;3;3;4", "wc_summary": "62;59;75;77;68", "wc_strengths": "15;62;33;75;70", "wc_weaknesses": "5;120;46;45;56", "wc_questions": "188;60;120;18;56", "wc_limitations": "8;29;7;7;11", "wc_review": "278;330;281;222;261", "wc_reply_reviewers": "54;23;23;8;133", "wc_reply_authors": "60;50;23;17;166", "reply_reviewers": "1;1;1;1;2", "reply_authors": "3;3;2;2;3", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 68.2, 7.025667228100119 ], "wc_strengths_avg": [ 51.0, 23.143033509028154 ], "wc_weaknesses_avg": [ 54.4, 37.16234653516917 ], "wc_questions_avg": [ 88.4, 59.550314860628575 ], "wc_limitations_avg": [ 12.4, 8.428523002282192 ], "wc_review_avg": [ 274.4, 34.85168575549825 ], "wc_reply_reviewers_avg": [ 48.2, 44.968433372756046 ], "wc_reply_authors_avg": [ 63.2, 53.85684728982936 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.6, 0.4898979485566356 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5590169943749475, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5332740770144181867&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "wisc.edu;wisc.edu;cornell.edu;wisc.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Wisconsin;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.cornell.edu", "aff_unique_abbr": "UW;Cornell", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "SafeDICE: Offline Safe Imitation Learning with Non-Preferred Demonstrations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70184", "id": "toEGuA9Qfn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ed2fb79f2664c3d9ba878be7e575b2af-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=toEGuA9Qfn", "openreview": "https://openreview.net/forum?id=toEGuA9Qfn", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70184", "video": "https://nips.cc/virtual/2023/poster/70184", "author_site": "Youngsoo Jang, Geon-Hyeong Kim, Jongmin Lee, Sungryull Sohn, Byoungjip Kim, Honglak Lee, Moontae Lee", "tldr": "", "abstract": "We consider offline safe imitation learning (IL), where the agent aims to learn the safe policy that mimics preferred behavior while avoiding non-preferred behavior from non-preferred demonstrations and unlabeled demonstrations. This problem setting corresponds to various real-world scenarios, where satisfying safety constraints is more important than maximizing the expected return. However, it is very challenging to learn the policy to avoid constraint-violating (i.e. non-preferred) behavior, as opposed to standard imitation learning which learns the policy to mimic given demonstrations. In this paper, we present a hyperparameter-free offline safe IL algorithm, SafeDICE, that learns safe policy by leveraging the non-preferred demonstrations in the space of stationary distributions. Our algorithm directly estimates the stationary distribution corrections of the policy that imitate the demonstrations excluding the non-preferred behavior. In the experiments, we demonstrate that our algorithm learns a more safe policy that satisfies the cost constraint without degrading the reward performance, compared to baseline algorithms.", "keywords": "Imitation learning;Preference-based learning;Safe imitation learning", "primary_area": "", "supplementary_material": "", "author": "Youngsoo Jang;Geon-Hyeong Kim;Jongmin Lee;Sungryull Sohn;Byoungjip Kim;Honglak Lee;Moontae Lee", "authorids": "~Youngsoo_Jang2;~Geon-Hyeong_Kim2;~Jongmin_Lee1;~Sungryull_Sohn1;~Byoungjip_Kim1;~Honglak_Lee2;~Moontae_Lee1", "gender": ";M;M;M;;;", "homepage": "http://www.ysjang.me;https://sites.google.com/view/ghkim;https://www.jmlee.kr;;;;https://moontae.people.uic.edu", "dblp": "195/0471;231/7707;68/222-4.html;172/9884;;;132/1761", "google_scholar": "6EoBBggAAAAJ;https://scholar.google.co.kr/citations?user=IJL0uXoAAAAJ;https://scholar.google.co.kr/citations?user=rFcK8EEAAAAJ;https://scholar.google.com/citations?hl=en;;;BMvYy9cAAAAJ", "orcid": ";;;;;;0000-0001-5542-3463", "linkedin": ";;jmlee123/;;;;moontae-lee-975248123/", "or_profile": "~Youngsoo_Jang2;~Geon-Hyeong_Kim2;~Jongmin_Lee1;~Sungryull_Sohn1;~Byoungjip_Kim1;~Honglak_Lee2;~Moontae_Lee1", "aff": "LG AI Research;LG AI Research;University of California, Berkeley;LG AI Research;;;University of Illinois, Chicago", "aff_domain": "lgresearch.ai;lgresearch.ai;berkeley.edu;lgresearch.ai;;;uic.edu", "position": "Researcher;Researcher;Postdoc;Researcher;;;Assistant Professor", "bibtex": "@inproceedings{\njang2023safedice,\ntitle={Safe{DICE}: Offline Safe Imitation Learning with Non-Preferred Demonstrations},\nauthor={Youngsoo Jang and Geon-Hyeong Kim and Jongmin Lee and Sungryull Sohn and Byoungjip Kim and Honglak Lee and Moontae Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=toEGuA9Qfn}\n}", "github": "", "project": "", "reviewers": "ERpc;jY8W;Ajjv;kwD7", "pdf_size": 6112555, "rating": "5;5;6;7", "confidence": "4;3;4;4", "soundness": "2;3;3;4", "novelty": "2;2;2;3", "presentation": "2;3;3;4", "wc_summary": "100;44;92;203", "wc_strengths": "40;56;81;29", "wc_weaknesses": "434;72;155;38", "wc_questions": "260;44;72;143", "wc_limitations": "32;20;10;72", "wc_review": "866;236;410;485", "wc_reply_reviewers": "424;52;42;40", "wc_reply_authors": "1695;0;639;703", "reply_reviewers": "2;1;1;1", "reply_authors": "5;1;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 109.75, 57.941241788556795 ], "wc_strengths_avg": [ 51.5, 19.551214796017153 ], "wc_weaknesses_avg": [ 174.75, 155.61069211336346 ], "wc_questions_avg": [ 129.75, 83.40975662355095 ], "wc_limitations_avg": [ 33.5, 23.553131426627754 ], "wc_review_avg": [ 499.25, 230.20357838226582 ], "wc_reply_reviewers_avg": [ 139.5, 164.31904941302454 ], "wc_reply_authors_avg": [ 759.25, 606.1585498035972 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16302807704584748855&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "lgresearch.ai;lgresearch.ai;berkeley.edu;lgresearch.ai;;;uic.edu", "author_num": 7, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "LG;University of California, Berkeley;University of Illinois at Chicago", "aff_unique_dep": "LG AI Research;;", "aff_unique_url": "https://www.lgaires.com;https://www.berkeley.edu;https://www.uic.edu", "aff_unique_abbr": "LG AI;UC Berkeley;UIC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Berkeley;Chicago", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "South Korea;United States" }, { "title": "Derandomized novelty detection with FDR control via conformal e-values", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70183", "id": "toYvRJ7Zmy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cec8ad7715d0d13899d5d7d31970f527-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=toYvRJ7Zmy", "openreview": "https://openreview.net/forum?id=toYvRJ7Zmy", "poster": "/media/PosterPDFs/NeurIPS%202023/70183.png?t=1701854171.769505", "slides": "https://nips.cc/virtual/2023/poster/70183", "video": "https://nips.cc/virtual/2023/poster/70183", "author_site": "Meshi Bashari, Amir Epstein, Yaniv Romano, Matteo Sesia", "tldr": "", "abstract": "Conformal inference provides a general distribution-free method to rigorously calibrate the output of any machine learning algorithm for novelty detection. While this approach has many strengths, it has the limitation of being randomized, in the sense that it may lead to different results when analyzing twice the same data and this can hinder the interpretation of any findings. We propose to make conformal inferences more stable by leveraging suitable conformal e-values instead of p-values to quantify statistical significance. This solution allows the evidence gathered from multiple analyses of the same data to be aggregated effectively while provably controlling the false discovery rate. Further, we show that the proposed method can reduce randomness without much loss of power compared to standard conformal inference, partly thanks to an innovative way of weighting conformal e-values based on additional side information carefully extracted from the same data. Simulations with synthetic and real data confirm this solution can be effective at eliminating random noise in the inferences obtained with state-of-the-art alternative techniques, sometimes also leading to higher power.", "keywords": "Conformal inference;Derandomization;E-values;False discovery rate;Out-of-distribution testing;Testing for outliers;Uncertainty", "primary_area": "", "supplementary_material": "/attachment/828ac767e7c339602df42aba741f05143367c870.pdf", "author": "Meshi Bashari;Amir Epstein;Yaniv Romano;Matteo Sesia", "authorids": "~Meshi_Bashari1;~Amir_Epstein1;~Yaniv_Romano1;~Matteo_Sesia1", "gender": ";;M;", "homepage": ";;https://sites.google.com/view/yaniv-romano/;https://msesia.github.io/", "dblp": "340/4012;;142/0021;280/1260", "google_scholar": ";;L_m67ywAAAAJ;qFtP1MQAAAAJ", "orcid": ";;;0000-0001-9046-907X", "linkedin": "meshi-bashari;;;matteo-sesia", "or_profile": "~Meshi_Bashari1;~Amir_Epstein1;~Yaniv_Romano1;~Matteo_Sesia1", "aff": "Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;;Technion, Technion;University of Southern California", "aff_domain": "campus.technion.ac.il;;technion.ac.il;usc.edu", "position": "MS student;;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nbashari2023derandomized,\ntitle={Derandomized novelty detection with {FDR} control via conformal e-values},\nauthor={Meshi Bashari and Amir Epstein and Yaniv Romano and Matteo Sesia},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=toYvRJ7Zmy}\n}", "github": "", "project": "", "reviewers": "TGb1;ccpQ;Ed41;gnkM", "pdf_size": 435056, "rating": "5;6;7;7", "confidence": "2;4;4;4", "soundness": "2;3;3;4", "novelty": "3;3;3;3", "presentation": "2;3;3;4", "wc_summary": "40;63;100;133", "wc_strengths": "65;25;164;207", "wc_weaknesses": "99;126;49;110", "wc_questions": "44;49;1;159", "wc_limitations": "2;39;1;11", "wc_review": "250;302;315;620", "wc_reply_reviewers": "0;82;0;117", "wc_reply_authors": "0;258;0;0", "reply_reviewers": "0;2;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 84.0, 35.47534355013352 ], "wc_strengths_avg": [ 115.25, 73.25426608737541 ], "wc_weaknesses_avg": [ 96.0, 28.78367592924851 ], "wc_questions_avg": [ 63.25, 58.34541541543774 ], "wc_limitations_avg": [ 13.25, 15.368392889303683 ], "wc_review_avg": [ 371.75, 145.3760210626223 ], "wc_reply_reviewers_avg": [ 49.75, 51.265851207212 ], "wc_reply_authors_avg": [ 64.5, 111.71727708819259 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8234572788413299678&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "campus.technion.ac.il;;technion.ac.il;usc.edu", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Technion - Israel Institute of Technology;University of Southern California", "aff_unique_dep": ";", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.usc.edu", "aff_unique_abbr": "Technion;USC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Israel;United States" }, { "title": "NetHack is Hard to Hack", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70182", "id": "tp2nEZ5zfP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/764ba7236fb63743014fafbd87dd4f0e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tp2nEZ5zfP", "openreview": "https://openreview.net/forum?id=tp2nEZ5zfP", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70182", "video": "https://nips.cc/virtual/2023/poster/70182", "author_site": "Ulyana Piterbarg, Lerrel Pinto, Rob Fergus", "tldr": "", "abstract": "Neural policy learning methods have achieved remarkable results in various control problems, ranging from Atari games to simulated locomotion. However, these methods struggle in long-horizon tasks, especially in open-ended environments with multi-modal observations, such as the popular dungeon-crawler game, NetHack. Intriguingly, the NeurIPS 2021 NetHack Challenge revealed that symbolic agents outperformed neural approaches by over four times in median game score. In this paper, we delve into the reasons behind this performance gap and present an extensive study on neural policy learning for NetHack. To conduct this study, we analyze the winning symbolic agent, extending its codebase to track internal strategy selection in order to generate one of the largest available demonstration datasets. Utilizing this dataset, we examine (i) the advantages of an action hierarchy; (ii) enhancements in neural architecture; and (iii) the integration of reinforcement learning with imitation learning. Our investigations produce a state-of-the-art neural agent that surpasses previous fully neural policies by 127% in offline settings and 25% in online settings on median game score. However, we also demonstrate that mere scaling is insufficient to bridge the performance gap with the best symbolic models or even the top human players.", "keywords": "imitation learning;NetHack", "primary_area": "", "supplementary_material": "/attachment/61c358ba4220914b1e59a0006e00f5adeb852dae.zip", "author": "Ulyana Piterbarg;Lerrel Pinto;Rob Fergus", "authorids": "~Ulyana_Piterbarg1;~Lerrel_Pinto1;~Rob_Fergus1", "gender": "F;M;M", "homepage": "https://upiterbarg.github.io/;https://www.lerrelpinto.com/;http://cs.nyu.edu/fergus/", "dblp": "284/4477;168/8304;77/3763", "google_scholar": ";pmVPj94AAAAJ;https://scholar.google.com.tw/citations?user=GgQ9GEkAAAAJ", "orcid": "0000-0002-8363-9648;;", "linkedin": ";;", "or_profile": "~Ulyana_Piterbarg1;~Lerrel_Pinto1;~Rob_Fergus1", "aff": "New York University;New York University;Google", "aff_domain": "cims.nyu.edu;cs.nyu.edu;google.com", "position": "PhD student;Assistant Professor;Research scientist", "bibtex": "@inproceedings{\npiterbarg2023nethack,\ntitle={NetHack is Hard to Hack},\nauthor={Ulyana Piterbarg and Lerrel Pinto and Rob Fergus},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tp2nEZ5zfP}\n}", "github": "", "project": "", "reviewers": "83YB;eWjQ;qyRc;PtAe;Ub8t", "pdf_size": 7208845, "rating": "6;6;7;7;7", "confidence": "4;2;4;5;2", "soundness": "3;3;3;2;3", "novelty": "2;2;3;3;4", "presentation": "4;3;3;3;3", "wc_summary": "144;89;626;129;384", "wc_strengths": "359;171;29;53;2", "wc_weaknesses": "273;64;162;323;2", "wc_questions": "1;17;111;435;2", "wc_limitations": "114;53;82;103;19", "wc_review": "891;394;1010;1043;409", "wc_reply_reviewers": "55;0;124;294;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.4, 1.2 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 274.4, 204.03587919775288 ], "wc_strengths_avg": [ 122.8, 131.43576377835674 ], "wc_weaknesses_avg": [ 164.8, 121.16666208161385 ], "wc_questions_avg": [ 113.2, 165.99325287492863 ], "wc_limitations_avg": [ 74.2, 34.55662020510686 ], "wc_review_avg": [ 749.4, 288.56375378761624 ], "wc_reply_reviewers_avg": [ 94.6, 109.62773371733998 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.27216552697590857, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12897987653535288575&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cims.nyu.edu;cs.nyu.edu;google.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "New York University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.nyu.edu;https://www.google.com", "aff_unique_abbr": "NYU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "tpIUgkq0xa", "title": "Mastering Symbolic Operations: Augmenting Language Models with Compiled Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Language models (LMs) proficiency in handling deterministic symbolic reasoning and rule-based tasks remains limited due to their dependency implicit learning on textual data. To enable fully rule comprehension ability, we explore how to incorporate compiled neural networks (CoNNs) which weight is specially designed into the architecture of LMs, to achieve high accuracy and robust performance. CoNNs are transformer-based neural networks that execute rules through artificially generated attention weights. Our method, which call \"Neural Comprehension\", by incorporating CoNN modules into the LM, the framework effectively tackles rule-intensive challenges. Our experiments on symbolic reasoning tasks and real-world arithmetic reasoning tasks demonstrate the superior performance of our method compared to existing techniques. Furthermore, our LM achieves flawless execution on symbolic operations tasks, highlighting the potential of our method in enabling LMs to possess true symbolic comprehension capabilities.", "keywords": "Language Models;Compiled Neural Networks;Neural Comprehension;Symbolic Operations;Length Generalization", "primary_area": "", "supplementary_material": "/attachment/2a77acb972ee006c55ffb267ef4a07ffb9d0f740.zip", "author": "Yixuan Weng;Minjun Zhu;Fei Xia;Bin Li;Shizhu He;Kang Liu;Jun Zhao", "authorids": "~Yixuan_Weng1;~Minjun_Zhu2;~Fei_Xia4;~Bin_Li14;~Shizhu_He2;~Kang_Liu1;~Jun_Zhao4", "gender": "M;F;M;M;M;M;M", "homepage": "https://wengsyx.github.io/;;https://github.com/Alex0xf;https://libincn.top;https://heshizhu.github.io/;http://www.nlpr.ia.ac.cn/cip/~liukang/index.html;http://nlpr-web.ia.ac.cn/cip/english/~junzhao/index.html", "dblp": "298/8205;271/6029;79/1081;89/6764-83;136/8650;42/4903.html;https://dblp.uni-trier.de/pid/47/2026-1.html", "google_scholar": "O1XsDEMAAAAJ;cm2ub2kAAAAJ;;2ZIBEWgAAAAJ;zBPIt3QAAAAJ;DtZCfl0AAAAJ;https://scholar.google.com.hk/citations?user=HljRttwAAAAJ", "orcid": "0000-0002-9720-8689;;0009-0002-4609-9950;0000-0002-6508-5071;;;", "linkedin": ";;;;;;", "or_profile": "~Yixuan_Weng1;~Minjun_Zhu2;~Fei_Xia4;~Bin_Li14;~Shizhu_He2;~Kang_Liu1;~Jun_Zhao4", "aff": "Institute of Automation, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of automation, Chinese Academy of Sciences;Hunan University;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science", "aff_domain": "ia.ac.cn;ucas.ac.cn;ia.ac.cn;hnu.edu.cn;ia.ac.cn;ia.ac.cn;nlpr.ia.ac.cn", "position": "MS student;MS student;MS student;PhD student;Associate Researcher;Professor;Full Professor", "bibtex": "@misc{\nweng2023mastering,\ntitle={Mastering Symbolic Operations: Augmenting Language Models with Compiled Neural Networks},\nauthor={Yixuan Weng and Minjun Zhu and Fei Xia and Bin Li and Shizhu He and Kang Liu and Jun Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=tpIUgkq0xa}\n}", "github": "", "project": "", "reviewers": "gBrw;aik1;QFXM;xyMG", "site": "https://openreview.net/forum?id=tpIUgkq0xa", "pdf_size": 0, "rating": "3;4;6;7", "confidence": "3;4;3;4", "soundness": "3;2;2;4", "novelty": "2;3;3;4", "presentation": "2;1;3;4", "wc_summary": "108;49;150;165", "wc_strengths": "44;51;38;65", "wc_weaknesses": "203;141;108;50", "wc_questions": "81;27;108;137", "wc_limitations": "46;1;36;31", "wc_review": "482;269;440;448", "wc_reply_reviewers": "322;115;74;170", "wc_reply_authors": "1041;375;0;331", "reply_reviewers": "1;1;1;2", "reply_authors": "4;2;1;2", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 118.0, 44.983330245769935 ], "wc_strengths_avg": [ 49.5, 10.062305898749054 ], "wc_weaknesses_avg": [ 125.5, 55.34663494739314 ], "wc_questions_avg": [ 88.25, 40.53008142108772 ], "wc_limitations_avg": [ 28.5, 16.77050983124842 ], "wc_review_avg": [ 409.75, 82.77794090215097 ], "wc_reply_reviewers_avg": [ 170.25, 94.0009973351347 ], "wc_reply_authors_avg": [ 436.75, 377.77796058002116 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.316227766016838, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11908960632930766199&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;2;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;Hunan University", "aff_unique_dep": "Institute of Automation;;", "aff_unique_url": "http://www.ia.cas.cn;http://www.ucas.ac.cn;http://www.hunu.edu.cn/", "aff_unique_abbr": "CAS;UCAS;HNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Disentangled Counterfactual Learning for Physical Audiovisual Commonsense Reasoning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70181", "id": "trHfuGQyyr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/29571f8fda54fe93631c41aad4215abc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=trHfuGQyyr", "openreview": "https://openreview.net/forum?id=trHfuGQyyr", "poster": "/media/PosterPDFs/NeurIPS%202023/70181.png?t=1701780000.1125896", "slides": "https://nips.cc/virtual/2023/poster/70181", "video": "https://nips.cc/virtual/2023/poster/70181", "author_site": "Changsheng Lv, Shuai Zhang, Yapeng Tian, Mengshi Qi, Huadong Ma", "tldr": "", "abstract": "In this paper, we propose a Disentangled Counterfactual Learning (DCL) approach for physical audiovisual commonsense reasoning. The task aims to infer objects\u2019 physics commonsense based on both video and audio input, with the main challenge is how to imitate the reasoning ability of humans. Most of the current methods fail to take full advantage of different characteristics in multi-modal data, and lacking causal reasoning ability in models impedes the progress of implicit physical knowledge inferring. To address these issues, our proposed DCL method decouples videos into static (time-invariant) and dynamic (time-varying) factors in the latent space by the disentangled sequential encoder, which adopts a variational autoencoder (VAE) to maximize the mutual information with a contrastive loss function. Furthermore, we introduce a counterfactual learning module to augment the model\u2019s reasoning ability by modeling physical knowledge relationships among different objects under counterfactual intervention. Our proposed method is a plug-and-play module that can be incorporated into any baseline. In experiments, we show that our proposed method improves baseline methods and achieves state-of-the-art performance. Our source code is available at https://github.com/Andy20178/DCL.", "keywords": "Physical Audiovisual\uff1bCommonsense Reasoning", "primary_area": "", "supplementary_material": "/attachment/d6e932ea6caac93f7dd642a3a0c46106b5447ca1.pdf", "author": "Changsheng Lv;Shuai Zhang;Yapeng Tian;Mengshi Qi;Huadong Ma", "authorids": "~Changsheng_Lv1;~Shuai_Zhang17;~Yapeng_Tian1;~Mengshi_Qi1;~Huadong_Ma1", "gender": "M;M;M;M;M", "homepage": ";https://github.com/newbiefff;http://www.yapengtian.com/;https://jueduilingdu.github.io/;https://scs.bupt.edu.cn/", "dblp": "283/7940.html;;176/4020;191/2586;04/6217", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;;lxCqdpoAAAAJ;_gH7-4wAAAAJ;", "orcid": ";;;0000-0002-6955-6635;", "linkedin": "https://www.linkedin.cn/incareer/in/ACoAADLwE-YBkckp_8MBgBFWqOa1UbOQUfxvypY;;;mengshi-qi-684abb97/;", "or_profile": "~Changsheng_Lv1;~Shuai_Zhang17;~Yapeng_Tian1;~Mengshi_Qi1;~Huadong_Ma1", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;University of Texas at Dallas;Beijing University of Posts and Telecommunications;Beijing University of Post and Telecommunication", "aff_domain": "bupt.edu.cn;bupt.edu.cn;utdallas.edu;bupt.edu.cn;bupt.edu.cn", "position": "PhD student;MS student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nlv2023disentangled,\ntitle={Disentangled Counterfactual Learning for Physical Audiovisual Commonsense Reasoning},\nauthor={Changsheng Lv and Shuai Zhang and Yapeng Tian and Mengshi Qi and Huadong Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=trHfuGQyyr}\n}", "github": "", "project": "", "reviewers": "ZZdS;PAiU;j4kk;QHKe;FCQF", "pdf_size": 1142941, "rating": "4;5;5;5;8", "confidence": "4;3;3;3;3", "soundness": "3;3;2;3;3", "novelty": "2;2;2;2;3", "presentation": "2;3;1;3;4", "wc_summary": "52;120;80;87;70", "wc_strengths": "60;169;38;76;70", "wc_weaknesses": "159;205;121;199;173", "wc_questions": "18;10;3;186;375", "wc_limitations": "20;7;9;1;59", "wc_review": "309;511;251;549;747", "wc_reply_reviewers": "0;0;19;48;98", "wc_reply_authors": "65;65;65;30;30", "reply_reviewers": "0;0;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 81.8, 22.435685859808252 ], "wc_strengths_avg": [ 82.6, 45.09368026675135 ], "wc_weaknesses_avg": [ 171.4, 30.289272028228083 ], "wc_questions_avg": [ 118.4, 145.30051617251743 ], "wc_limitations_avg": [ 19.2, 20.8269056751117 ], "wc_review_avg": [ 473.4, 178.0310085350302 ], "wc_reply_reviewers_avg": [ 33.0, 36.943199644860215 ], "wc_reply_authors_avg": [ 51.0, 17.146428199482248 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5160468465421401, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=680088458660234567&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "bupt.edu.cn;bupt.edu.cn;utdallas.edu;bupt.edu.cn;bupt.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;University of Texas at Dallas", "aff_unique_dep": ";", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.utdallas.edu", "aff_unique_abbr": "BUPT;UT Dallas", "aff_campus_unique_index": "0;0;1;0;0", "aff_campus_unique": "Beijing;Dallas", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Secure Out-of-Distribution Task Generalization with Energy-Based Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70180", "id": "tt7bQnTdRm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d39e3ae9a11b79691709a7a6e06a63d9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tt7bQnTdRm", "openreview": "https://openreview.net/forum?id=tt7bQnTdRm", "poster": "/media/PosterPDFs/NeurIPS%202023/70180.png?t=1702214367.3567379", "slides": "https://nips.cc/virtual/2023/poster/70180", "video": "https://nips.cc/virtual/2023/poster/70180", "author_site": "Shengzhuang Chen, Long-Kai Huang, Jonathan Richard Schwarz, Yilun Du, Ying Wei", "tldr": "", "abstract": "The success of meta-learning on out-of-distribution (OOD) tasks in the wild has proved to be hit-and-miss.\nTo safeguard the generalization capability of the meta-learned prior knowledge to OOD tasks, in particularly safety-critical applications, necessitates detection of an OOD task followed by adaptation of the task towards the prior. \nNonetheless, the reliability of estimated uncertainty on OOD tasks by existing Bayesian meta-learning methods is restricted by incomplete coverage of the feature distribution shift and insufficient expressiveness of the meta-learned prior. \nBesides, they struggle to adapt an OOD task, running parallel to the line of cross-domain task adaptation solutions which are vulnerable to overfitting.\nTo this end, we build a single coherent framework that supports both detection and adaptation of OOD tasks, while remaining compatible with off-the-shelf meta-learning backbones. \nThe proposed Energy-Based Meta-Learning (EBML) framework learns to characterize any arbitrary meta-training task distribution with the composition of two expressive neural-network-based energy functions. We deploy the sum of the two energy functions, being proportional to the joint distribution of a task, as a reliable score for detecting OOD tasks; during meta-testing, we adapt the OOD task to in-distribution tasks by energy minimization.\nExperiments on four regression and classification datasets demonstrate the effectiveness of our proposal.", "keywords": "meta-generalization;out-of-distribution tasks", "primary_area": "", "supplementary_material": "/attachment/587e199d2f37a4a1f8b44cb5a338f3cd2e660e78.zip", "author": "Shengzhuang Chen;Long-Kai Huang;Jonathan Richard Schwarz;Yilun Du;Ying Wei", "authorids": "~Shengzhuang_Chen1;~Long-Kai_Huang1;~Jonathan_Richard_Schwarz1;~Yilun_Du1;~Ying_Wei1", "gender": "M;;;F;M", "homepage": ";https://sites.google.com/site/longkaihugo/home;https://yilundu.github.io;https://wei-ying.net/;https://jonathan-schwarz.github.io", "dblp": ";133/2006;204/4379;14/4899-1;211/7673", "google_scholar": "kpKst1UAAAAJ;CaP64WUAAAAJ;;5UpFdKsAAAAJ;Efs3XxQAAAAJ", "orcid": ";0000-0001-5263-1443;;;", "linkedin": "jerry-chen-45bb15156;;;;schwarzjonathan/", "or_profile": "~Shengzhuang_Chen1;~Long-Kai_Huang1;~Yilun_Du1;~Ying_Wei1;~Jonathan_Schwarz1", "aff": "City University of Hong Kong;Tencent;Massachusetts Institute of Technology;City University of Hong Kong;Google DeepMind", "aff_domain": "cityu.edu.hk;tencent.com;mit.edu;cityu.edu.hk;google.com", "position": "PhD student;Researcher;PhD student;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nchen2023secure,\ntitle={Secure Out-of-Distribution Task Generalization with Energy-Based Models},\nauthor={Shengzhuang Chen and Long-Kai Huang and Jonathan Richard Schwarz and Yilun Du and Ying Wei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tt7bQnTdRm}\n}", "github": "", "project": "", "reviewers": "qEmy;Ghnn;aM9D;92KR;u8ca", "pdf_size": 1383309, "rating": "5;5;6;7;7", "confidence": "2;3;4;5;3", "soundness": "3;3;3;3;4", "novelty": "3;2;3;3;4", "presentation": "2;3;3;3;4", "wc_summary": "49;67;77;111;153", "wc_strengths": "38;58;28;93;157", "wc_weaknesses": "35;121;67;344;112", "wc_questions": "140;5;2;24;7", "wc_limitations": "4;6;2;9;10", "wc_review": "266;257;176;581;439", "wc_reply_reviewers": "39;0;0;25;122", "wc_reply_authors": "221;0;0;23;270", "reply_reviewers": "1;0;0;1;1", "reply_authors": "2;1;1;2;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 91.4, 36.82173271316819 ], "wc_strengths_avg": [ 74.8, 46.7221574844313 ], "wc_weaknesses_avg": [ 135.8, 108.65247351073053 ], "wc_questions_avg": [ 35.6, 52.758316879900555 ], "wc_limitations_avg": [ 6.2, 2.9933259094191533 ], "wc_review_avg": [ 343.8, 146.33577826355386 ], "wc_reply_reviewers_avg": [ 37.2, 44.968433372756046 ], "wc_reply_authors_avg": [ 102.8, 117.83955193397503 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6577935144802719, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6965179864393621151&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cityu.edu.hk;tencent.com;mit.edu;cityu.edu.hk;google.com", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "City University of Hong Kong;Tencent;Massachusetts Institute of Technology;Google", "aff_unique_dep": ";Tencent Holdings Limited;;Google DeepMind", "aff_unique_url": "https://www.cityu.edu.hk;https://www.tencent.com;https://web.mit.edu;https://deepmind.com", "aff_unique_abbr": "CityU;Tencent;MIT;DeepMind", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;1;0;2", "aff_country_unique": "China;United States;United Kingdom" }, { "title": "The Bayesian Stability Zoo", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70179", "id": "tw4QaiiJex", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c2586b71fd150fb56952e253a9c551cc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tw4QaiiJex", "openreview": "https://openreview.net/forum?id=tw4QaiiJex", "poster": "/media/PosterPDFs/NeurIPS%202023/70179.png?t=1701970103.380378", "slides": "https://nips.cc/virtual/2023/poster/70179", "video": "https://nips.cc/virtual/2023/poster/70179", "author_site": "Shay Moran, Hilla Schefler, Jonathan Shafer", "tldr": "", "abstract": "We show that many definitions of stability found in the learning theory literature are equivalent to one another. \nWe distinguish between two families of definitions of stability: distribution-dependent and distribution-independent Bayesian stability. Within each family, we establish equivalences between various definitions, encompassing approximate differential privacy, pure differential privacy, replicability, global stability, perfect generalization, TV stability, mutual information stability, KL-divergence stability, and R\u00e9nyi-divergence stability. Along the way, we prove boosting results that enable the amplification of the stability of a learning rule. This work is a step towards a more systematic taxonomy of stability notions in learning theory, which can promote clarity and an improved understanding of an array of stability concepts that have emerged in recent years.", "keywords": "Algorithmic stability;Replicability;Differential Privacy;KL Stability;Mutual Information Stability;Global Stability;Perfect Generalization;PAC Learning;Littlestone Dimension;Clique Dimension;PAC Bayes", "primary_area": "", "supplementary_material": "", "author": "Shay Moran;Hilla Schefler;Jonathan Shafer", "authorids": "~Shay_Moran1;~Hilla_Schefler1;~Jonathan_Shafer2", "gender": "M;F;", "homepage": "http://www.cs.technion.ac.il/~shaymrn/;;", "dblp": "119/5111;344/4495;", "google_scholar": "kALYnggAAAAJ;;", "orcid": ";;", "linkedin": ";hilla-schefler/;", "or_profile": "~Shay_Moran1;~Hilla_Schefler1;~Jonathan_Shafer2", "aff": "Google;Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;", "aff_domain": "google.com;campus.technion.ac.il;", "position": "Visiting Faculty;MS student;", "bibtex": "@inproceedings{\nmoran2023the,\ntitle={The Bayesian Stability Zoo},\nauthor={Shay Moran and Hilla Schefler and Jonathan Shafer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tw4QaiiJex}\n}", "github": "", "project": "", "reviewers": "xEWt;RbiN;uUZW;c5ne;V99Z", "pdf_size": 459657, "rating": "5;5;5;7;8", "confidence": "3;3;2;1;2", "soundness": "2;2;3;3;4", "novelty": "3;2;3;3;3", "presentation": "1;1;2;3;4", "wc_summary": "42;42;37;83;51", "wc_strengths": "45;30;57;53;34", "wc_weaknesses": "259;96;156;29;43", "wc_questions": "32;20;9;119;1", "wc_limitations": "1;88;12;29;1", "wc_review": "379;276;271;313;130", "wc_reply_reviewers": "186;89;0;14;19", "wc_reply_authors": "1481;231;0;0;0", "reply_reviewers": "2;2;0;1;1", "reply_authors": "6;3;1;1;1", "rating_avg": [ 6.0, 1.2649110640673518 ], "confidence_avg": [ 2.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.2, 1.16619037896906 ], "wc_summary_avg": [ 51.0, 16.625281952496323 ], "wc_strengths_avg": [ 43.8, 10.457533169921097 ], "wc_weaknesses_avg": [ 116.6, 84.07758321931 ], "wc_questions_avg": [ 36.2, 42.69613565652048 ], "wc_limitations_avg": [ 26.2, 32.553955212846255 ], "wc_review_avg": [ 273.8, 81.6024509435838 ], "wc_reply_reviewers_avg": [ 61.6, 69.42794826292939 ], "wc_reply_authors_avg": [ 342.4, 576.2869424167096 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.4, 1.9595917942265424 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6338656910463873, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18040919583409894622&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "google.com;campus.technion.ac.il;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Google;Technion - Israel Institute of Technology", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.technion.ac.il/en/", "aff_unique_abbr": "Google;Technion", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Israel" }, { "title": "DinoSR: Self-Distillation and Online Clustering for Self-supervised Speech Representation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70178", "id": "twmHKU3Ds4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b6404bf461c3c3186bdf5f55756af908-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=twmHKU3Ds4", "openreview": "https://openreview.net/forum?id=twmHKU3Ds4", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70178", "video": "https://nips.cc/virtual/2023/poster/70178", "author_site": "Alexander Liu, Heng-Jui Chang, Michael Auli, Wei-Ning Hsu, Jim Glass", "tldr": "", "abstract": "In this paper, we introduce self-distillation and online clustering for self-supervised speech representation learning (DinoSR) which combines masked language modeling, self-distillation, and online clustering. We show that these concepts complement each other and result in a strong representation learning model for speech. DinoSR first extracts contextualized embeddings from the input audio with a teacher network, then runs an online clustering system on the embeddings to yield a machine-discovered phone inventory, and finally uses the discretized tokens to guide a student network. We show that DinoSR surpasses previous state-of-the-art performance in several downstream tasks, and provide a detailed analysis of the model and the learned discrete units.", "keywords": "speech representation learning;self-supervised learning;self-distillation;discrete representation learning", "primary_area": "", "supplementary_material": "/attachment/38bbdde4e2324924620809be04d32c29da380183.zip", "author": "Alexander H. Liu;Heng-Jui Chang;Michael Auli;Wei-Ning Hsu;James R. Glass", "authorids": "~Alexander_H._Liu1;~Heng-Jui_Chang1;~Michael_Auli1;~Wei-Ning_Hsu2;~James_R._Glass1", "gender": "M;M;;;", "homepage": "https://alexander-h-liu.github.io/;https://people.csail.mit.edu/hengjui/;;;", "dblp": "227/2380;264/5138;;;", "google_scholar": "LIiCDa0AAAAJ;Kzn0Ks8AAAAJ;;;", "orcid": ";;;;", "linkedin": ";heng-jui-chang;;;", "or_profile": "~Alexander_H._Liu1;~Heng-Jui_Chang1;~Michael_Auli1;~Wei-Ning_Hsu2;~James_R._Glass1", "aff": "Meta Facebook;Massachusetts Institute of Technology;;;", "aff_domain": "meta.com;mit.edu;;;", "position": "Intern;MS student;;;", "bibtex": "@inproceedings{\nliu2023dinosr,\ntitle={Dino{SR}: Self-Distillation and Online Clustering for Self-supervised Speech Representation Learning},\nauthor={Alexander H. Liu and Heng-Jui Chang and Michael Auli and Wei-Ning Hsu and James R. Glass},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=twmHKU3Ds4}\n}", "github": "", "project": "", "reviewers": "Z4mQ;zY5D;RFky;UMJ9;Jtdv;Maey", "pdf_size": 726391, "rating": "5;6;7;7;7;7", "confidence": "4;5;4;4;3;4", "soundness": "3;3;3;4;3;3", "novelty": "3;2;3;3;3;3", "presentation": "4;3;4;3;3;3", "wc_summary": "127;50;80;108;167;84", "wc_strengths": "27;35;86;134;107;61", "wc_weaknesses": "132;174;58;68;67;86", "wc_questions": "19;83;25;103;13;275", "wc_limitations": "1;80;38;45;11;11", "wc_review": "306;422;287;458;365;517", "wc_reply_reviewers": "35;0;73;0;0;47", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "1;0;1;0;0;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.5, 0.7637626158259734 ], "confidence_avg": [ 4.0, 0.5773502691896257 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 102.66666666666667, 37.406178931056594 ], "wc_strengths_avg": [ 75.0, 38.13572253587616 ], "wc_weaknesses_avg": [ 97.5, 41.90365934696714 ], "wc_questions_avg": [ 86.33333333333333, 90.88209700241053 ], "wc_limitations_avg": [ 31.0, 26.913441499245938 ], "wc_review_avg": [ 392.5, 81.67976085502046 ], "wc_reply_reviewers_avg": [ 25.833333333333332, 28.162721617217485 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3779644730092272, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12886138487838985333&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 8, "email": "meta.com;mit.edu;;;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Meta;Massachusetts Institute of Technology", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://web.mit.edu", "aff_unique_abbr": "Meta;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Fed-FA: Theoretically Modeling Client Data Divergence for Federated Language Backdoor Defense", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70177", "id": "txPdKZrrZF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c39578c86423df5f9e8834ce1cd456e4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=txPdKZrrZF", "openreview": "https://openreview.net/forum?id=txPdKZrrZF", "poster": "/media/PosterPDFs/NeurIPS%202023/70177.png?t=1697091499.67462", "slides": "https://nips.cc/virtual/2023/poster/70177", "video": "https://nips.cc/virtual/2023/poster/70177", "author_site": "Zhiyuan Zhang, Deli Chen, Hao Zhou, Fandong Meng, Jie Zhou, Xu Sun", "tldr": "", "abstract": "Federated learning algorithms enable neural network models to be trained across multiple decentralized edge devices without sharing private data. However, they are susceptible to backdoor attacks launched by malicious clients. Existing robust federated aggregation algorithms heuristically detect and exclude suspicious clients based on their parameter distances, but they are ineffective on Natural Language Processing (NLP) tasks. The main reason is that, although text backdoor patterns are obvious at the underlying dataset level, they are usually hidden at the parameter level, since injecting backdoors into texts with discrete feature space has less impact on the statistics of the model parameters. To settle this issue, we propose to identify backdoor clients by explicitly modeling the data divergence among clients in federated NLP systems. Through theoretical analysis, we derive the f-divergence indicator to estimate the client data divergence with aggregation updates and Hessians. Furthermore, we devise a dataset synthesization method with a Hessian reassignment mechanism guided by the diffusion theory to address the key challenge of inaccessible datasets in calculating clients' data Hessians.\nWe then present the novel Federated F-Divergence-Based Aggregation~(\\textbf{Fed-FA}) algorithm, which leverages the f-divergence indicator to detect and discard suspicious clients. Extensive empirical results show that Fed-FA outperforms all the parameter distance-based methods in defending against backdoor attacks among various natural language backdoor attack scenarios.", "keywords": "federated learning;backdoor learning;robust federated aggregation;data divergence", "primary_area": "", "supplementary_material": "/attachment/410fe402e55f31492c7cffca291668bcf7907d50.zip", "author": "Zhiyuan Zhang;Deli Chen;Hao Zhou;Fandong Meng;Jie Zhou;Xu Sun", "authorids": "~Zhiyuan_Zhang1;~Deli_Chen1;~Hao_Zhou8;~Fandong_Meng3;~Jie_Zhou8;~Xu_Sun1", "gender": "M;M;M;M;M;M", "homepage": "https://pkuzzy.github.io/;;;http://fandongmeng.github.io/;;https://xusun.org/", "dblp": "https://dblp.uni-trier.de/pid/72/1760-1;50/2637;;117/4056.html;00/5012-16;37/1971-1", "google_scholar": "gSEzCUkAAAAJ;8YpGRDcAAAAJ;q3WaozcAAAAJ;sA8U4S0AAAAJ;https://scholar.google.com.hk/citations?user=OijxQCMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-8158-2377;0000-0002-5899-5165;", "linkedin": ";;;;;", "or_profile": "~Zhiyuan_Zhang1;~Deli_Chen1;~Hao_Zhou8;~Fandong_Meng3;~Jie_Zhou8;~Xu_Sun1", "aff": "Peking University;WeChat AI;Tencent;WeChat AI, Tencent Inc.;WeChat AI, Tencent Inc.;Peking University", "aff_domain": "pku.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;pku.edu.cn", "position": "PhD student;Researcher;Researcher;Principal Researcher;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nzhang2023fedfa,\ntitle={Fed-{FA}: Theoretically Modeling Client Data Divergence for Federated Language Backdoor Defense},\nauthor={Zhiyuan Zhang and Deli Chen and Hao Zhou and Fandong Meng and Jie Zhou and Xu Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=txPdKZrrZF}\n}", "github": "", "project": "", "reviewers": "z5C7;2F5q;hfUd;hSLg;CgUg", "pdf_size": 994351, "rating": "5;5;5;5;7", "confidence": "4;4;3;2;3", "soundness": "3;3;3;3;4", "novelty": "3;3;3;3;3", "presentation": "3;2;2;2;3", "wc_summary": "167;53;122;65;50", "wc_strengths": "75;33;45;53;82", "wc_weaknesses": "70;156;219;60;52", "wc_questions": "14;2;4;97;80", "wc_limitations": "52;2;28;49;153", "wc_review": "378;246;418;324;417", "wc_reply_reviewers": "102;39;59;62;22", "wc_reply_authors": "80;324;478;146;11", "reply_reviewers": "1;1;2;1;1", "reply_authors": "2;3;3;2;2", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 91.4, 45.906862232132575 ], "wc_strengths_avg": [ 57.6, 18.347751905887545 ], "wc_weaknesses_avg": [ 111.4, 65.49992366407766 ], "wc_questions_avg": [ 39.4, 40.65267518872528 ], "wc_limitations_avg": [ 56.8, 51.32406842798026 ], "wc_review_avg": [ 356.6, 65.07103810452081 ], "wc_reply_reviewers_avg": [ 56.8, 26.843248685656512 ], "wc_reply_authors_avg": [ 207.8, 170.52436776015327 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.1336306209562122, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11998699653759717762&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "pku.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;2;2;0", "aff_unique_norm": "Peking University;WeChat;Tencent", "aff_unique_dep": ";WeChat AI;Tencent Holdings Limited", "aff_unique_url": "http://www.pku.edu.cn;https://www.wechat.com;https://www.tencent.com", "aff_unique_abbr": "Peking U;WeChat AI;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "InstanT: Semi-supervised Learning with Instance-dependent Thresholds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70176", "id": "txv7TnPvOi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/088d99765bc121c6df215da7d45bc4e9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=txv7TnPvOi", "openreview": "https://openreview.net/forum?id=txv7TnPvOi", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70176", "video": "https://nips.cc/virtual/2023/poster/70176", "author_site": "Muyang Li, Runze Wu, Haoyu Liu, Jun Yu, Xun Yang, Bo Han, Tongliang Liu", "tldr": "", "abstract": "Semi-supervised learning (SSL) has been a fundamental challenge in machine learning for decades. The primary family of SSL algorithms, known as pseudo-labeling, involves assigning pseudo-labels to confident unlabeled instances and incorporating them into the training set. Therefore, the selection criteria of confident instances are crucial to the success of SSL. Recently, there has been growing interest in the development of SSL methods that use dynamic or adaptive thresholds. Yet, these methods typically apply the same threshold to all samples, or use class-dependent thresholds for instances belonging to a certain class, while neglecting instance-level information. In this paper, we propose the study of instance-dependent thresholds, which has the highest degree of freedom compared with existing methods. Specifically, we devise a novel instance-dependent threshold function for all unlabeled instances by utilizing their instance-level ambiguity and the instance-dependent error rates of pseudo-labels, so instances that are more likely to have incorrect pseudo-labels will have higher thresholds. Furthermore, we demonstrate that our instance-dependent threshold function provides a bounded probabilistic guarantee for the correctness of the pseudo-labels it assigns.", "keywords": "semi-supervised learning;pseudo-labeling", "primary_area": "", "supplementary_material": "", "author": "Muyang Li;Runze Wu;Haoyu Liu;Jun Yu;Xun Yang;Bo Han;Tongliang Liu", "authorids": "~Muyang_Li3;~Runze_Wu1;~Haoyu_Liu1;~Jun_Yu3;~Xun_Yang1;~Bo_Han1;~Tongliang_Liu1", "gender": "M;M;;M;M;;M", "homepage": ";https://wu-runze.github.io/;;https://faculty.ustc.edu.cn/yujun_AI/en/index.htm;https://sites.google.com/site/xunyangprofile/;;https://tongliang-liu.github.io/", "dblp": "87/10837;;;50/5754-1.html;72/4772-1;;150/6667", "google_scholar": ";8Uxbo9AAAAAJ;;efZyqyQAAAAJ;ro8lzsUAAAAJ;;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ", "orcid": ";0000-0002-6986-5825;;0000-0002-3197-8103;0000-0003-0201-1638;;", "linkedin": "%E6%B2%90%E9%98%B3-%E6%9D%8E-5b78a5163/;;;;;;", "or_profile": "~Muyang_Li3;~Runze_Wu1;~Haoyu_Liu1;~Jun_Yu3;~Xun_Yang1;~Bo_Han1;~Tongliang_Liu1", "aff": "University of Sydney;NetEase Corp;;University of Science and Technology of China;University of Science and Technology of China;;University of Sydney", "aff_domain": "usyd.edu.au;netease.com;;ustc.edu.cn;ustc.edu.cn;;sydney.edu.au", "position": "PhD student;Principal Researcher;;Associate Professor;Full Professor;;Lecturer", "bibtex": "@inproceedings{\nli2023instant,\ntitle={InstanT: Semi-supervised Learning with Instance-dependent Thresholds},\nauthor={Muyang Li and Runze Wu and Haoyu Liu and Jun Yu and Xun Yang and Bo Han and Tongliang Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=txv7TnPvOi}\n}", "github": "", "project": "", "reviewers": "kWe6;1e9N;zzrr;4nX9;Cia8", "pdf_size": 964576, "rating": "5;5;6;7;7", "confidence": "3;4;3;4;4", "soundness": "2;3;3;4;3", "novelty": "2;2;3;4;3", "presentation": "3;3;4;4;3", "wc_summary": "21;38;50;34;81", "wc_strengths": "73;24;54;54;200", "wc_weaknesses": "137;88;166;130;148", "wc_questions": "77;243;5;4;167", "wc_limitations": "1;15;8;4;64", "wc_review": "309;408;283;226;660", "wc_reply_reviewers": "0;22;41;11;17", "wc_reply_authors": "39;44;49;0;51", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 44.8, 20.33125672456083 ], "wc_strengths_avg": [ 81.0, 61.533730587377846 ], "wc_weaknesses_avg": [ 133.8, 25.926048676958086 ], "wc_questions_avg": [ 99.2, 93.49310134977875 ], "wc_limitations_avg": [ 18.4, 23.27745690577044 ], "wc_review_avg": [ 377.2, 153.17362697279188 ], "wc_reply_reviewers_avg": [ 18.2, 13.555810562264435 ], "wc_reply_authors_avg": [ 36.6, 18.76805797092496 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.45643546458763845, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9728693682212205648&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "usyd.edu.au;netease.com;;ustc.edu.cn;ustc.edu.cn;;sydney.edu.au", "author_num": 7, "aff_unique_index": "0;1;2;2;0", "aff_unique_norm": "University of Sydney;NetEase Corporation;University of Science and Technology of China", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sydney.edu.au;https://www.163.com;http://www.ustc.edu.cn", "aff_unique_abbr": "USYD;NetEase;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "Australia;China" }, { "title": "GEX: A flexible method for approximating influence via Geometric Ensemble", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70175", "id": "tz4ECtAu8e", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1297ca5c906f4bada8f5f6f4e80f9dd2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tz4ECtAu8e", "openreview": "https://openreview.net/forum?id=tz4ECtAu8e", "poster": "/media/PosterPDFs/NeurIPS%202023/70175.png?t=1701754533.5248363", "slides": "https://nips.cc/virtual/2023/poster/70175", "video": "https://nips.cc/virtual/2023/poster/70175", "author_site": "SungYub Kim, Kyungsu Kim, Eunho Yang", "tldr": "", "abstract": "Through a deeper understanding of predictions of neural networks, Influence Function (IF) has been applied to various tasks such as detecting and relabeling mislabeled samples, dataset pruning, and separation of data sources in practice. However, we found standard approximations of IF suffer from performance degradation due to oversimplified influence distributions caused by their bilinear approximation, suppressing the expressive power of samples with a relatively strong influence. To address this issue, we propose a new interpretation of existing IF approximations as an average relationship between two linearized losses over parameters sampled from the Laplace approximation (LA). In doing so, we highlight two significant limitations of current IF approximations: the linearity of gradients and the singularity of Hessian. Accordingly, by improving each point, we introduce a new IF approximation method with the following features: i) the removal of linearization to alleviate the bilinear constraint and ii) the utilization of Geometric Ensemble (GE) tailored for non-linear losses. Empirically, our approach outperforms existing IF approximations for downstream tasks with lighter computation, thereby providing new feasibility of low-complexity/nonlinear-based IF design.", "keywords": "Influence Function;Geometric Ensemble;Loss Landscape", "primary_area": "", "supplementary_material": "/attachment/c2db05418205c8390c7ace2749956333035e8eb4.pdf", "author": "SungYub Kim;Kyungsu Kim;Eunho Yang", "authorids": "~SungYub_Kim1;~Kyungsu_Kim3;~Eunho_Yang1", "gender": "M;M;M", "homepage": "https://sungyubkim.github.io;https://sites.google.com/site/hleehome2/;https://aibl.snu.ac.kr/", "dblp": "236/4532;96/2621;19/2698-2.html", "google_scholar": "m2rhgrkAAAAJ;;FdXfsvgAAAAJ", "orcid": ";;0000-0001-6622-6545", "linkedin": ";;kyungsu-kim-501b11211", "or_profile": "~SungYub_Kim1;~Eunho_Yang1;~Kyung-Su_Kim1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Massachusetts General Hospital, Harvard Medical School", "aff_domain": "kaist.ac.kr;kaist.ac.kr;mgh.harvard.edu", "position": "PhD student;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nkim2023gex,\ntitle={{GEX}: A flexible method for approximating influence via Geometric Ensemble},\nauthor={SungYub Kim and Kyungsu Kim and Eunho Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tz4ECtAu8e}\n}", "github": "", "project": "", "reviewers": "TEqY;sNxP;8KEq;KU2o;FeGS", "pdf_size": 702189, "rating": "5;6;6;7;7", "confidence": "3;2;2;3;3", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "87;29;99;122;55", "wc_strengths": "62;35;56;76;91", "wc_weaknesses": "179;75;61;180;30", "wc_questions": "89;61;58;51;11", "wc_limitations": "8;3;27;2;1", "wc_review": "425;203;301;431;188", "wc_reply_reviewers": "15;162;19;145;10", "wc_reply_authors": "45;64;21;515;23", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;3;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 78.4, 32.824381182285826 ], "wc_strengths_avg": [ 64.0, 18.878559267062727 ], "wc_weaknesses_avg": [ 105.0, 62.54918065010924 ], "wc_questions_avg": [ 54.0, 25.09183134009951 ], "wc_limitations_avg": [ 8.2, 9.703607576566563 ], "wc_review_avg": [ 309.6, 104.19136240591156 ], "wc_reply_reviewers_avg": [ 70.2, 68.28586969498156 ], "wc_reply_authors_avg": [ 133.6, 191.34847791398812 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.21821789023599233, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7789603423549053799&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "kaist.ac.kr;kaist.ac.kr;mgh.harvard.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Massachusetts General Hospital", "aff_unique_dep": ";Harvard Medical School", "aff_unique_url": "https://www.kaist.ac.kr;https://www.massgeneral.org", "aff_unique_abbr": "KAIST;MGH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "South Korea;United States" }, { "title": "Mr. HiSum: A Large-scale Dataset for Video Highlight Detection and Summarization", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73441", "id": "tz7XkY6S9Z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7f880e3a325b06e3601af1384a653038-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=tz7XkY6S9Z", "openreview": "https://openreview.net/forum?id=tz7XkY6S9Z", "poster": "/media/PosterPDFs/NeurIPS%202023/73441.png?t=1701407339.6263144", "slides": "https://nips.cc/virtual/2023/poster/73441", "video": "https://nips.cc/virtual/2023/poster/73441", "author_site": "Jinhwan Sul, Jihoon Han, Joonseok Lee", "tldr": "", "abstract": "Video highlight detection is a task to automatically select the most engaging moments from a long video. This problem is highly challenging since it aims to learn a general way of finding highlights from a variety of videos in the real world.The task has an innate subjectivity because the definition of a highlight differs across individuals. Therefore, to detect consistent and meaningful highlights, prior benchmark datasets have been labeled by multiple (5-20) raters. Due to the high cost of manual labeling, most existing public benchmarks are in extremely small scale, containing only a few tens or hundreds of videos. This insufficient benchmark scale causes multiple issues such as unstable evaluation or high sensitivity in traintest splits. We present Mr. HiSum, a large-scale dataset for video highlight detection and summarization, containing 31,892 videos and reliable labels aggregated over 50,000+ users per video. We empirically prove reliability of the labels as frame importance by cross-dataset transfer and user study.", "keywords": "video highlight detection;youtube 8M;large-scale dataset;video summarization", "primary_area": "", "supplementary_material": "/attachment/b589de602419fab0f6341d591ec2f21cd0cc5877.pdf", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nsul2023mr,\ntitle={Mr. HiSum: A Large-scale Dataset for Video Highlight Detection and Summarization},\nauthor={Jinhwan Sul and Jihoon Han and Joonseok Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=tz7XkY6S9Z}\n}", "github": "", "project": "", "reviewers": "2KA9;bGs5;g5Rw;gNZt;CELp", "pdf_size": 741073, "rating": "5;6;6;6;7", "confidence": "5;5;3;4;4", "wc_summary_and_contributions": "23;124;137;115;41", "wc_strengths": "25;172;109;67;41", "wc_improvement": "214;366;235;90;47", "wc_limitations": "22;18;79;14;5", "wc_correctness": "9;17;14;6;5", "wc_clarity": "8;20;105;32;2", "wc_relation_to_prior_work": "5;60;1;18;1", "wc_documentation": "8;44;14;5;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "315;822;695;348;144", "wc_reply_reviewers": "139;31;0;0;0", "wc_reply_authors": "1014;935;1151;531;212", "reply_reviewers": "1;1;0;0;0", "reply_authors": "3;2;2;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 88.0, 46.604720790924176 ], "wc_strengths_avg": [ 82.8, 52.87872918291437 ], "wc_improvement_avg": [ 190.4, 113.15935666130308 ], "wc_limitations_avg": [ 27.6, 26.310454196003533 ], "wc_correctness_avg": [ 10.2, 4.621688003316537 ], "wc_clarity_avg": [ 33.4, 37.253724646000165 ], "wc_relation_to_prior_work_avg": [ 17.0, 22.387496510329154 ], "wc_documentation_avg": [ 14.4, 15.396103403134184 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 464.8, 252.815664071671 ], "wc_reply_reviewers_avg": [ 34.0, 53.855361850051665 ], "wc_reply_authors_avg": [ 768.6, 346.5248043069933 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15694190596526925615&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "", "author_num": 1 }, { "title": "Knowledge Distillation Performs Partial Variance Reduction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70174", "id": "tzxP9Rx0LV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ee1f0da706829d7f198eac0edaacc338-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=tzxP9Rx0LV", "openreview": "https://openreview.net/forum?id=tzxP9Rx0LV", "poster": "/media/PosterPDFs/NeurIPS%202023/70174.png?t=1701715746.2992225", "slides": "https://nips.cc/virtual/2023/poster/70174", "video": "https://nips.cc/virtual/2023/poster/70174", "author_site": "Mher Safaryan, Alexandra Peste, Alexandra Peste, Dan Alistarh, Dan Alistarh", "tldr": "", "abstract": "Knowledge distillation is a popular approach for enhancing the performance of \"student\" models, with lower representational capacity, by taking advantage of more powerful \"teacher\" models. Despite its apparent simplicity, the underlying mechanics behind knowledge distillation (KD) are not yet fully understood. In this work, we shed new light on the inner workings of this method, by examining it from an optimization perspective. Specifically, we show that, in the context of linear and deep linear models, KD can be interpreted as a novel type of stochastic variance reduction mechanism. We provide a detailed convergence analysis of the resulting dynamics, which hold under standard assumptions for both strongly-convex and non-convex losses, showing that KD acts as a form of \\emph{partial variance reduction}, which can reduce the stochastic gradient noise, but may not eliminate it completely, depending on the properties of the ``teacher'' model. Our analysis puts further emphasis on the need for careful parametrization of KD, in particular w.r.t. the weighting of the distillation loss, and is validated empirically on both linear models and deep neural networks.", "keywords": "knowledge distillation;stochastic optimization;variance reduction", "primary_area": "", "supplementary_material": "/attachment/94e3b7e28cbb317efb865646d5f2e80a67517a54.zip", "author": "Mher Safaryan;Alexandra Peste;Dan Alistarh", "authorids": "~Mher_Safaryan1;~Alexandra_Peste1;~Dan_Alistarh7", "gender": "M;;M", "homepage": "https://mher-safaryan.github.io;;http://people.csail.mit.edu/alistarh/", "dblp": "259/1444;171/3503;36/3251.html", "google_scholar": "dJNwgT8AAAAJ;AF1bfdYAAAAJ;https://scholar.google.com.tw/citations?user=75q-6ZQAAAAJ", "orcid": "0000-0001-6290-1398;;", "linkedin": "mher-safaryan-94565a257/;alexandra-peste-08a1b2130/;", "or_profile": "~Mher_Safaryan1;~Alexandra_Peste1;~Dan_Alistarh1", "aff": "Institute of Science and Technology;Institute of Science and Technology Austria;Institute of Science and Technology", "aff_domain": "ist.ac.at;ist.ac.at;ist.ac.at", "position": "Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nsafaryan2023knowledge,\ntitle={Knowledge Distillation Performs Partial Variance Reduction},\nauthor={Mher Safaryan and Alexandra Peste and Dan Alistarh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=tzxP9Rx0LV}\n}", "github": "", "project": "", "reviewers": "AWoB;6fjL;ASQK;1HYD", "pdf_size": 672571, "rating": "6;6;7;7", "confidence": "3;3;3;3", "soundness": "4;2;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "109;98;105;103", "wc_strengths": "121;94;54;69", "wc_weaknesses": "325;110;59;108", "wc_questions": "57;139;1;112", "wc_limitations": "12;1;10;10", "wc_review": "624;442;229;402", "wc_reply_reviewers": "16;161;14;0", "wc_reply_authors": "0;129;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 103.75, 3.960744879438715 ], "wc_strengths_avg": [ 84.5, 25.46075411294803 ], "wc_weaknesses_avg": [ 150.5, 102.79713031014047 ], "wc_questions_avg": [ 77.25, 53.020632776307 ], "wc_limitations_avg": [ 8.25, 4.264680527307995 ], "wc_review_avg": [ 424.25, 140.38585220740728 ], "wc_reply_reviewers_avg": [ 47.75, 65.67486200975226 ], "wc_reply_authors_avg": [ 32.25, 55.858638544096294 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15218267598426405661&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ist.ac.at;ist.ac.at;ist.ac.at", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Institute of Science and Technology;Institute of Science and Technology Austria", "aff_unique_dep": ";", "aff_unique_url": ";https://www.ist.ac.at", "aff_unique_abbr": ";IST Austria", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";Austria" }, { "title": "PlanE: Representation Learning over Planar Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70173", "id": "u2RJ0I3o3j", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/33b47b3d2441a17b95344cd635f3dd01-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=u2RJ0I3o3j", "openreview": "https://openreview.net/forum?id=u2RJ0I3o3j", "poster": "/media/PosterPDFs/NeurIPS%202023/70173.png?t=1701826213.4584725", "slides": "https://nips.cc/virtual/2023/poster/70173", "video": "https://nips.cc/virtual/2023/poster/70173", "author_site": "Radoslav Dimitrov, Zeyang Zhao, Ralph Abboud, Ismail Ceylan", "tldr": "", "abstract": "Graph neural networks are prominent models for representation learning over graphs, where the idea is to iteratively compute representations of nodes of an input graph through a series of transformations in such a way that the learned graph function is isomorphism-invariant on graphs, which makes the learned representations graph invariants. On the other hand, it is well-known that graph invariants learned by these class of models are incomplete: there are pairs of non-isomorphic graphs which cannot be distinguished by standard graph neural networks. This is unsurprising given the computational difficulty of graph isomorphism testing on general graphs, but the situation begs to differ for special graph classes, for which efficient graph isomorphism testing algorithms are known, such as planar graphs. The goal of this work is to design architectures for efficiently learning complete invariants of planar graphs. Inspired by the classical planar graph isomorphism algorithm of Hopcroft and Tarjan, we propose PlanE as a framework for planar representation learning. PlanE includes architectures which can learn complete invariants over planar graphs while remaining practically scalable. We empirically validate the strong performance of the resulting model architectures on well-known planar graph benchmarks, achieving multiple state-of-the-art results.", "keywords": "Graph Representation Learning; Planar Graphs; Graph Property Prediction", "primary_area": "", "supplementary_material": "/attachment/a78bacdfc21b7d5b4d7e5c76f6f74bc794cd1839.pdf", "author": "Radoslav Dimitrov;Zeyang Zhao;Ralph Abboud;Ismail Ilkan Ceylan", "authorids": "~Radoslav_Dimitrov1;zeyang.zhao@cs.ox.ac.uk;~Ralph_Abboud1;~Ismail_Ilkan_Ceylan2", "gender": ";;M;", "homepage": ";;https://www.ralphabboud.com;https://www.cs.ox.ac.uk/people/ismaililkan.ceylan/", "dblp": ";;226/4657;147/6111", "google_scholar": ";;KBiHfLQAAAAJ;avJ5kQcAAAAJ", "orcid": ";;;0000-0003-4118-4689", "linkedin": ";;;", "or_profile": "~Radoslav_Dimitrov1;zeyang.zhao@cs.ox.ac.uk;~Ralph_Abboud1;~Ismail_Ilkan_Ceylan2", "aff": ";;Schmidt Futures;University of Oxford", "aff_domain": ";;schmidtfutures.com;oxford.ac.uk", "position": ";;Researcher;Assistant Professor", "bibtex": "@inproceedings{\ndimitrov2023plane,\ntitle={PlanE: Representation Learning over Planar Graphs},\nauthor={Radoslav Dimitrov and Zeyang Zhao and Ralph Abboud and Ismail Ilkan Ceylan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=u2RJ0I3o3j}\n}", "github": "", "project": "", "reviewers": "o9ac;Wih2;hVMw;fXaJ;fYDw", "pdf_size": 764057, "rating": "4;6;6;6;6", "confidence": "4;4;3;3;4", "soundness": "2;4;3;3;3", "novelty": "1;3;3;3;3", "presentation": "2;3;3;1;3", "wc_summary": "120;70;53;63;127", "wc_strengths": "50;20;84;53;132", "wc_weaknesses": "348;73;203;146;261", "wc_questions": "4;238;2;41;25", "wc_limitations": "9;19;1;6;37", "wc_review": "531;420;343;309;582", "wc_reply_reviewers": "1005;12;37;80;41", "wc_reply_authors": "1899;55;368;50;11", "reply_reviewers": "4;1;2;1;1", "reply_authors": "5;2;4;2;2", "rating_avg": [ 5.6, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 86.6, 30.689411854905266 ], "wc_strengths_avg": [ 67.8, 37.959978925178554 ], "wc_weaknesses_avg": [ 206.2, 94.32581831078912 ], "wc_questions_avg": [ 62.0, 89.16277250063504 ], "wc_limitations_avg": [ 14.4, 12.737346662472527 ], "wc_review_avg": [ 437.0, 105.23307464861035 ], "wc_reply_reviewers_avg": [ 235.0, 385.6148337395749 ], "wc_reply_authors_avg": [ 476.6, 722.7078524549183 ], "reply_reviewers_avg": [ 1.8, 1.1661903789690602 ], "reply_authors_avg": [ 3.0, 1.2649110640673518 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11075618523304744472&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": ";;schmidtfutures.com;oxford.ac.uk", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Schmidt Futures;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://www.schmidtfutures.com;https://www.ox.ac.uk", "aff_unique_abbr": "Schmidt Futures;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "UUKG: Unified Urban Knowledge Graph Dataset for Urban Spatiotemporal Prediction", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73440", "id": "u2cXRGm95Y", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c4a30a4dd840cfeff30ba4d2661ff097-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=u2cXRGm95Y", "openreview": "https://openreview.net/forum?id=u2cXRGm95Y", "poster": "/media/PosterPDFs/NeurIPS%202023/73440.png?t=1697782680.6462514", "slides": "https://nips.cc/virtual/2023/poster/73440", "video": "https://nips.cc/virtual/2023/poster/73440", "author_site": "Yansong Ning, Hao Liu, Hao Wang, Zhenyu Zeng, Hui Xiong", "tldr": "", "abstract": "Accurate Urban SpatioTemporal Prediction (USTP) is of great importance to the development and operation of the smart city. As an emerging building block, multi-sourced urban data are usually integrated as urban knowledge graphs (UrbanKGs) to provide critical knowledge for urban spatiotemporal prediction models. However, existing UrbanKGs are often tailored for specific downstream prediction tasks and are not publicly available, which limits the potential advancement. This paper presents UUKG, the unified urban knowledge graph dataset for knowledge-enhanced urban spatiotemporal predictions. Specifically, we first construct UrbanKGs consisting of millions of triplets for two metropolises by connecting heterogeneous urban entities such as administrative boroughs, POIs, and road segments. \nMoreover, we conduct qualitative and quantitative analysis on constructed UrbanKGs and uncover diverse high-order structural patterns, such as hierarchies and cycles, that can be leveraged to benefit downstream USTP tasks. To validate and facilitate the use of UrbanKGs, we implement and evaluate 15 KG embedding methods on the KG completion task and integrate the learned KG embeddings into 9 spatiotemporal models for five different USTP tasks. The extensive experimental results not only provide benchmarks of knowledge-enhanced USTP models under different task settings but also highlight the potential of state-of-the-art high-order structure-aware UrbanKG embedding methods. We hope the proposed UUKG fosters research on urban knowledge graphs and broad smart city applications. The dataset and source code are available at https://github.com/usail-hkust/UUKG/.", "keywords": "Urban knowledge graph;Urban computing;Knowledge graph representation", "primary_area": "", "supplementary_material": "/attachment/b01ba592984e0a7c5c8818e108cdb07b92d511ae.pdf", "author": "Yansong Ning;Hao Liu;Hao Henry Wang;Zhenyu Zeng;Hui Xiong", "authorids": "~Yansong_Ning3;~Hao_Liu17;~Hao_Henry_Wang1;zhenyu.zzy@alibaba-inc.com;~Hui_Xiong1", "gender": "M;;;;M", "homepage": "https://yasning.github.io/homepage/;https://raymondhliu.github.io/;;;https://www.hkust-gz.edu.cn/people/hui-xiong/", "dblp": "349/7861.html;09/3214-26;;;262/1686-1.html", "google_scholar": "2_GJmgMAAAAJ;;;;cVDF1tkAAAAJ", "orcid": ";0000-0003-4271-1567;;;0000-0001-6016-6465", "linkedin": ";;;;", "or_profile": "~Yansong_Ning3;~Hao_Liu17;~Hao_Henry_Wang1;zhenyu.zzy@alibaba-inc.com;~Hui_Xiong1", "aff": "The Hong Kong University of Science and Technology (Guangzhou);The Hong Kong University of Science and Technology (Guangzhou);;;Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "hkust.edu;hkust-gz.edu.cn;;;hkust.edu", "position": "MS student;Assistant Professor;;;Full Professor", "bibtex": "@inproceedings{\nning2023uukg,\ntitle={{UUKG}: Unified Urban Knowledge Graph Dataset for Urban Spatiotemporal Prediction},\nauthor={Yansong Ning and Hao Liu and Hao Henry Wang and Zhenyu Zeng and Hui Xiong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=u2cXRGm95Y}\n}", "github": "", "project": "", "reviewers": "E5fz;fgmx;6kYe;SDLE;QJBt", "pdf_size": 1028305, "rating": "6;6;7;7;7", "confidence": "4;4;4;4;3", "wc_summary_and_contributions": "66;25;74;42;149", "wc_strengths": "45;32;67;44;22", "wc_improvement": "100;2;169;4;49", "wc_limitations": "27;408;1;41;49", "wc_correctness": "29;1;1;1;1", "wc_clarity": "8;1;1;1;1", "wc_relation_to_prior_work": "7;6;1;1;1", "wc_documentation": "4;7;7;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "287;483;322;136;274", "wc_reply_reviewers": "21;14;23;0;0", "wc_reply_authors": "414;1340;560;222;133", "reply_reviewers": "1;1;1;0;0", "reply_authors": "2;3;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 71.2, 42.60234735316823 ], "wc_strengths_avg": [ 42.0, 15.086417732516887 ], "wc_improvement_avg": [ 64.8, 63.224678725953204 ], "wc_limitations_avg": [ 105.2, 152.27659045303056 ], "wc_correctness_avg": [ 6.6, 11.2 ], "wc_clarity_avg": [ 2.4, 2.8 ], "wc_relation_to_prior_work_avg": [ 3.2, 2.7129319932501073 ], "wc_documentation_avg": [ 4.0, 2.6832815729997477 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 300.4, 111.09743471385826 ], "wc_reply_reviewers_avg": [ 11.6, 9.93176721434811 ], "wc_reply_authors_avg": [ 533.8, 429.6176905109938 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10898395736885451032&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "email": "hkust.edu;hkust-gz.edu.cn;;;hkust.edu", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Guangzhou;Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Robust Data Valuation with Weighted Banzhaf Values", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70172", "id": "u359tNBpxF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bdb0596d13cfccf2db6f0cc5280d2a3f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=u359tNBpxF", "openreview": "https://openreview.net/forum?id=u359tNBpxF", "poster": "/media/PosterPDFs/NeurIPS%202023/70172.png?t=1701783479.444797", "slides": "https://nips.cc/virtual/2023/poster/70172", "video": "https://nips.cc/virtual/2023/poster/70172", "author_site": "Weida Li, Yaoliang Yu", "tldr": "", "abstract": "Data valuation, a principled way to rank the importance of each training datum, has become increasingly important. However, existing value-based approaches (e.g., Shapley) are known to suffer from the stochasticity inherent in utility functions that render consistent and reliable ranking difficult. Recently, Wang and Jia (2023) proposed the noise-structure-agnostic framework to advocate the Banzhaf value for its robustness against such stochasticity as it achieves the largest safe margin among many alternatives. Surprisingly, our empirical study shows that the Banzhaf value is not always the most robust when compared with a broader family: weighted Banzhaf values. To analyze this scenario, we introduce the concept of Kronecker noise to parameterize stochasticity, through which we prove that the uniquely robust semi-value, which can be analytically derived from the underlying Kronecker noise, lies in the family of weighted Banzhaf values while minimizing the worst-case entropy. In addition, we adopt the maximum sample reuse principle to design an estimator to efficiently approximate weighted Banzhaf values, and show that it enjoys the best time complexity in terms of achieving an $(\\epsilon, \\delta)$-approximation. Our theory is verified under both synthetic and authentic noises. For the latter, we fit a Kronecker noise to the inherent stochasticity, which is then plugged in to generate the predicted most robust semi-value. Our study suggests that weighted Banzhaf values are promising when facing undue noises in data valuation.", "keywords": "data valuation;robustness;weighted Banzhaf values", "primary_area": "", "supplementary_material": "", "author": "Weida Li;Yaoliang Yu", "authorids": "~Weida_Li1;~Yaoliang_Yu1", "gender": ";M", "homepage": ";https://cs.uwaterloo.ca/~y328yu/", "dblp": "121/8659;90/4989", "google_scholar": "claK_XkAAAAJ;https://scholar.google.ca/citations?user=zbXIQMsAAAAJ", "orcid": ";0000-0002-3823-0720", "linkedin": ";", "or_profile": "~Weida_Li1;~Yaoliang_Yu1", "aff": "University of Waterloo;University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca", "position": "Intern;Associate Professor", "bibtex": "@inproceedings{\nli2023robust,\ntitle={Robust Data Valuation with Weighted Banzhaf Values},\nauthor={Weida Li and Yaoliang Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=u359tNBpxF}\n}", "github": "", "project": "", "reviewers": "WZtW;CJa5;Zq9F;SBfP", "pdf_size": 37644450, "rating": "6;6;6;6", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;4;3;3", "wc_summary": "59;105;140;157", "wc_strengths": "38;44;191;300", "wc_weaknesses": "47;82;127;376", "wc_questions": "467;93;109;199", "wc_limitations": "30;1;47;36", "wc_review": "641;325;614;1068", "wc_reply_reviewers": "91;57;27;136", "wc_reply_authors": "320;288;0;251", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 115.25, 37.4991666574072 ], "wc_strengths_avg": [ 143.25, 109.29175403478526 ], "wc_weaknesses_avg": [ 158.0, 129.01744068148307 ], "wc_questions_avg": [ 217.0, 149.8866238194723 ], "wc_limitations_avg": [ 28.5, 17.00735135169495 ], "wc_review_avg": [ 662.0, 265.1178983018687 ], "wc_reply_reviewers_avg": [ 77.75, 40.54241606022019 ], "wc_reply_authors_avg": [ 214.75, 126.36727226620032 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3120910700202967564&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "uwaterloo.ca;uwaterloo.ca", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Uncovering motifs of concurrent signaling across multiple neuronal populations", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70171", "id": "u39QQh5L8Q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6cf7a37e761f55b642cf0939b4c64bb8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=u39QQh5L8Q", "openreview": "https://openreview.net/forum?id=u39QQh5L8Q", "poster": "/media/PosterPDFs/NeurIPS%202023/70171.png?t=1702056911.7320373", "slides": "https://nips.cc/virtual/2023/poster/70171", "video": "https://nips.cc/virtual/2023/poster/70171", "author_site": "Evren Gokcen, Anna Jasper, Alison Xu, Adam Kohn, Christian Machens, Byron M Yu", "tldr": "", "abstract": "Modern recording techniques now allow us to record from distinct neuronal populations in different brain networks. However, especially as we consider multiple (more than two) populations, new conceptual and statistical frameworks are needed to characterize the multi-dimensional, concurrent flow of signals among these populations. Here, we develop a dimensionality reduction framework that determines (1) the subset of populations described by each latent dimension, (2) the direction of signal flow among those populations, and (3) how those signals evolve over time within and across experimental trials. We illustrate these features in simulation, and further validate the method by applying it to previously studied recordings from neuronal populations in macaque visual areas V1 and V2. Then we study interactions across select laminar compartments of areas V1, V2, and V3d, recorded simultaneously with multiple Neuropixels probes. Our approach uncovered signatures of selective communication across these three areas that related to their retinotopic alignment. This work advances the study of concurrent signaling across multiple neuronal populations.", "keywords": "neuroscience;multi-population neural recordings;dimensionality reduction;latent variable models;Gaussian processes", "primary_area": "", "supplementary_material": "/attachment/5d4bb63993086ea91cbdeb1b38878728042155e8.pdf", "author": "Evren Gokcen;Anna Ivic Jasper;Alison Xu;Adam Kohn;Christian K. Machens;Byron M. Yu", "authorids": "~Evren_Gokcen1;~Anna_Ivic_Jasper1;~Alison_Xu1;~Adam_Kohn1;~Christian_K._Machens1;~Byron_M._Yu1", "gender": "M;;;M;M;", "homepage": ";;;;http://www.machenslab.org;", "dblp": "369/7203;;;69/4963;61/1246;", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;https://scholar.google.pt/citations?user=-4z7V1oAAAAJ;", "orcid": "0000-0002-2301-1429;;;;0000-0003-1717-1562;", "linkedin": ";;;;;", "or_profile": "~Evren_Gokcen1;~Anna_Ivic_Jasper1;~Alison_Xu1;~Adam_Kohn1;~Christian_K._Machens1;~Byron_M._Yu1", "aff": "Carnegie Mellon University;;;Einsteinmed;Champalimaud Centre for the Unknown;", "aff_domain": "andrew.cmu.edu;;;einsteinmed.edu;champalimaud.org;", "position": "PhD student;;;Full Professor;Full Professor;", "bibtex": "@inproceedings{\ngokcen2023uncovering,\ntitle={Uncovering motifs of concurrent signaling across multiple neuronal populations},\nauthor={Evren Gokcen and Anna Ivic Jasper and Alison Xu and Adam Kohn and Christian K. Machens and Byron M. Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=u39QQh5L8Q}\n}", "github": "", "project": "", "reviewers": "EMi6;DL1P;DK8X;qoeh", "pdf_size": 1390716, "rating": "7;7;7;8", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "4;4;3;4", "wc_summary": "124;128;71;111", "wc_strengths": "98;119;71;117", "wc_weaknesses": "112;133;40;270", "wc_questions": "140;113;106;82", "wc_limitations": "131;23;1;14", "wc_review": "605;516;289;594", "wc_reply_reviewers": "32;27;21;21", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 108.5, 22.544400635190993 ], "wc_strengths_avg": [ 101.25, 19.292161620720474 ], "wc_weaknesses_avg": [ 138.75, 83.25675648258225 ], "wc_questions_avg": [ 110.25, 20.668514702319566 ], "wc_limitations_avg": [ 42.25, 51.83326634507997 ], "wc_review_avg": [ 501.0, 127.11608867488017 ], "wc_reply_reviewers_avg": [ 25.25, 4.602988159880492 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2628457121757457694&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "andrew.cmu.edu;;;einsteinmed.edu;champalimaud.org;", "author_num": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "Carnegie Mellon University;Einstein Medical Center;Champalimaud Centre for the Unknown", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.einstein.edu;https://www.champalimaud.org", "aff_unique_abbr": "CMU;Einstein;CCU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Portugal" }, { "title": "Graph Denoising Diffusion for Inverse Protein Folding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70170", "id": "u4YXKKG5dX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/20888d00c5df685de2c09790040e0327-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=u4YXKKG5dX", "openreview": "https://openreview.net/forum?id=u4YXKKG5dX", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70170", "video": "https://nips.cc/virtual/2023/poster/70170", "author_site": "Kai Yi, Bingxin Zhou, Yiqing Shen, Yiqing Shen, Pietro Li\u00f3, Yuguang Wang", "tldr": "", "abstract": "Inverse protein folding is challenging due to its inherent one-to-many mapping characteristic, where numerous possible amino acid sequences can fold into a single, identical protein backbone. This task involves not only identifying viable sequences but also representing the sheer diversity of potential solutions. However, existing discriminative models, such as transformer-based auto-regressive models, struggle to encapsulate the diverse range of plausible solutions. In contrast, diffusion probabilistic models, as an emerging genre of generative approaches, offer the potential to generate a diverse set of sequence candidates for determined protein backbones. We propose a novel graph denoising diffusion model for inverse protein folding, where a given protein backbone guides the diffusion process on the corresponding amino acid residue types. The model infers the joint distribution of amino acids conditioned on the nodes' physiochemical properties and local environment. Moreover, we utilize amino acid replacement matrices for the diffusion forward process, encoding the biologically-meaningful prior knowledge of amino acids from their spatial and sequential neighbors as well as themselves, which reduces the sampling space of the generative process. Our model achieves state-of-the-art performance over a set of popular baseline methods in sequence recovery and exhibits great potential in generating diverse protein sequences for a determined protein backbone structure.", "keywords": "inverse folding;graph neural networks;roto-translation equivariance;diffusion model", "primary_area": "", "supplementary_material": "/attachment/f35fac7f5af5f1d5243373631017357cc91dd74f.pdf", "author": "Kai Yi;Bingxin Zhou;Yiqing Shen;Pietro Lio;Yu Guang Wang", "authorids": "~Kai_Yi2;~Bingxin_Zhou1;~Yiqing_Shen1;~Pietro_Lio1;~Yu_Guang_Wang1", "gender": "M;F;;M;M", "homepage": ";;;https://www.cst.cam.ac.uk/people/pl219;https://yuguangwang.github.io/", "dblp": ";;;l/PietroLio.html;03/10023-1", "google_scholar": "A_YCRFwAAAAJ;OsvArmcAAAAJ;;https://scholar.google.co.uk/citations?user=3YrWf7EAAAAJ;cMSEByAAAAAJ", "orcid": ";;;0000-0002-0540-5053;", "linkedin": ";;;;", "or_profile": "~Kai_Yi2;~Bingxin_Zhou1;~Yiqing_Shen1;~Pietro_Lio1;~Yu_Guang_Wang1", "aff": "University of New South Wales;Shanghai Jiaotong University;;University of Cambridge;Shanghai Jiaotong University", "aff_domain": "unsw.edu.au;sjtu.edu.cn;;cam.ac.uk;sjtu.edu.cn", "position": "PhD student;Researcher;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nyi2023graph,\ntitle={Graph Denoising Diffusion for Inverse Protein Folding},\nauthor={Kai Yi and Bingxin Zhou and Yiqing Shen and Pietro Lio and Yu Guang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=u4YXKKG5dX}\n}", "github": "", "project": "", "reviewers": "ZARz;92pD;j843;YmFn", "pdf_size": 5204060, "rating": "5;6;6;7", "confidence": "4;5;3;3", "soundness": "3;2;2;3", "novelty": "3;3;3;3", "presentation": "3;4;2;3", "wc_summary": "79;137;78;86", "wc_strengths": "107;114;49;108", "wc_weaknesses": "85;156;142;128", "wc_questions": "64;142;344;22", "wc_limitations": "1;25;29;8", "wc_review": "336;574;642;352", "wc_reply_reviewers": "0;231;356;64", "wc_reply_authors": "0;300;384;0", "reply_reviewers": "0;2;2;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 95.0, 24.443813123160634 ], "wc_strengths_avg": [ 94.5, 26.405491853021786 ], "wc_weaknesses_avg": [ 127.75, 26.592997198510737 ], "wc_questions_avg": [ 143.0, 123.77802712921223 ], "wc_limitations_avg": [ 15.75, 11.60549438843516 ], "wc_review_avg": [ 476.0, 134.29072938963435 ], "wc_reply_reviewers_avg": [ 162.75, 139.8595277412304 ], "wc_reply_authors_avg": [ 171.0, 173.55978796944873 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16114716832202163092&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 8, "email": "unsw.edu.au;sjtu.edu.cn;;cam.ac.uk;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of New South Wales;Shanghai Jiao Tong University;University of Cambridge", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unsw.edu.au;https://www.sjtu.edu.cn;https://www.cam.ac.uk", "aff_unique_abbr": "UNSW;SJTU;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "Australia;China;United Kingdom" }, { "title": "MADG: Margin-based Adversarial Learning for Domain Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70169", "id": "u6BYyPuD29", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b87d9d19ecb5927f7e18c537908610ef-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=u6BYyPuD29", "openreview": "https://openreview.net/forum?id=u6BYyPuD29", "poster": "/media/PosterPDFs/NeurIPS%202023/70169.png?t=1702130936.3696568", "slides": "https://nips.cc/virtual/2023/poster/70169", "video": "https://nips.cc/virtual/2023/poster/70169", "author_site": "Aveen Dayal, Vimal K B, Linga Reddy Cenkeramaddi, C Mohan, Abhinav Kumar, Vineeth N Balasubramanian", "tldr": "", "abstract": "Domain Generalization (DG) techniques have emerged as a popular approach to address the challenges of domain shift in Deep Learning (DL), with the goal of generalizing well to the target domain unseen during the training. In recent years, numerous methods have been proposed to address the DG setting, among which one popular approach is the adversarial learning-based methodology. The main idea behind adversarial DG methods is to learn domain-invariant features by minimizing a discrepancy metric. However, most adversarial DG methods use 0-1 loss based $\\mathcal{H}\\Delta\\mathcal{H}$ divergence metric. In contrast, the margin loss-based discrepancy metric has the following advantages: more informative, tighter, practical, and efficiently optimizable. To mitigate this gap, this work proposes a novel adversarial learning DG algorithm, $\\textbf{MADG}$, motivated by a margin loss-based discrepancy metric. The proposed $\\textbf{MADG}$ model learns domain-invariant features across all source domains and uses adversarial training to generalize well to the unseen target domain. We also provide a theoretical analysis of the proposed $\\textbf{MADG}$ model based on the unseen target error bound. Specifically, we construct the link between the source and unseen domains in the real-valued hypothesis space and derive the generalization bound using margin loss and Rademacher complexity. We extensively experiment with the $\\textbf{MADG}$ model on popular real-world DG datasets, VLCS, PACS, OfficeHome, DomainNet, and TerraIncognita. We evaluate the proposed algorithm on DomainBed's benchmark and observe consistent performance across all the datasets.", "keywords": "Domain Generalization;Margin Loss;Adversarial Learning;Domain Adaptation", "primary_area": "", "supplementary_material": "/attachment/509c5fc0ea9d63c3f4febe425b5691bd59f93c4b.pdf", "author": "Aveen Dayal;Vimal K B;Linga Reddy Cenkeramaddi;C Krishna Mohan;Abhinav Kumar;Vineeth N. Balasubramanian", "authorids": "~Aveen_Dayal1;~Vimal_K_B1;~Linga_Reddy_Cenkeramaddi1;~C_Krishna_Mohan1;~Abhinav_Kumar4;~Vineeth_N._Balasubramanian2", "gender": "M;M;M;M;;M", "homepage": "https://sites.google.com/iith.ac.in/aveen-dayal/home?authuser=0;;https://www.uia.no/kk/profil/lingac;https://www.iith.ac.in/~ckm/;https://people.iith.ac.in/abhinavkumar/;https://people.iith.ac.in/vineethnb/", "dblp": "286/2724;;08/5277;30/4639;;88/4691", "google_scholar": "KqkN9IgAAAAJ;;n5BfOZYAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?user=kVHpPOcAAAAJ;https://scholar.google.co.in/citations?user=7soDcboAAAAJ", "orcid": "0000-0001-6792-9170;;0000-0002-1023-2118;0000-0002-7316-0836;0000-0002-6468-7054;0000-0003-2656-0375", "linkedin": "aveen-dayal/;vimalkb07/;iitlingareddy/;;abhinav-kumar-91921916/;vineethnb?originalSubdomain=in", "or_profile": "~Aveen_Dayal1;~Vimal_K_B1;~Linga_Reddy_Cenkeramaddi1;~C_Krishna_Mohan1;~Abhinav_Kumar4;~Vineeth_Balasubramanian1", "aff": "Indian Institute of Technology, Hyderabad;Indian Institute of Technology, Hyderabad;University of Agder;Indian Institute of Technology Hyderabad;Indian Institute of Technology, Hyderabad;Indian Institute of Technology Hyderabad", "aff_domain": "iith.ac.in;iith.ac.in;uia.no;iith.ac.in;iith.ac.in;iith.ac.in", "position": "PhD student;MS student;Full Professor;Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ndayal2023madg,\ntitle={{MADG}: Margin-based Adversarial Learning for Domain Generalization},\nauthor={Aveen Dayal and Vimal K B and Linga Reddy Cenkeramaddi and C Krishna Mohan and Abhinav Kumar and Vineeth N. Balasubramanian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=u6BYyPuD29}\n}", "github": "", "project": "", "reviewers": "zZgJ;wf8A;u5h1;MRUq;FJmm", "pdf_size": 723882, "rating": "5;5;5;6;6", "confidence": "3;2;4;3;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;3;2;3", "wc_summary": "73;54;60;144;143", "wc_strengths": "20;79;38;71;65", "wc_weaknesses": "205;262;182;206;143", "wc_questions": "2;3;32;2;81", "wc_limitations": "23;7;3;3;62", "wc_review": "323;405;315;426;494", "wc_reply_reviewers": "167;73;11;109;282", "wc_reply_authors": "706;474;85;738;448", "reply_reviewers": "2;1;1;1;2", "reply_authors": "3;2;2;3;4", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 94.8, 40.236302016959755 ], "wc_strengths_avg": [ 54.6, 22.1142488002645 ], "wc_weaknesses_avg": [ 199.6, 38.670919306372845 ], "wc_questions_avg": [ 24.0, 30.731091747609618 ], "wc_limitations_avg": [ 19.6, 22.44638055455712 ], "wc_review_avg": [ 392.6, 66.95849460673381 ], "wc_reply_reviewers_avg": [ 128.4, 91.99043428531034 ], "wc_reply_authors_avg": [ 490.2, 234.1814680968586 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5764374234433092123&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "iith.ac.in;iith.ac.in;uia.no;iith.ac.in;iith.ac.in;iith.ac.in", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Indian Institute of Technology Hyderabad;University of Agder", "aff_unique_dep": ";", "aff_unique_url": "https://www.iith.ac.in;https://www.uia.no", "aff_unique_abbr": "IIT Hyderabad;UiA", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hyderabad;", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "India;Norway" }, { "title": "Real-World Image Variation by Aligning Diffusion Inversion Chain", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70168", "id": "u6Ibs4hTJH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/61960fdfda4d4e95fa1c1f6e64bfe8bc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=u6Ibs4hTJH", "openreview": "https://openreview.net/forum?id=u6Ibs4hTJH", "poster": "/media/PosterPDFs/NeurIPS%202023/70168.png?t=1695905799.3886058", "slides": "https://nips.cc/virtual/2023/poster/70168", "video": "https://nips.cc/virtual/2023/poster/70168", "author_site": "Yuechen Zhang, Jinbo Xing, Eric Lo, Jiaya Jia", "tldr": "", "abstract": "Recent diffusion model advancements have enabled high-fidelity images to be generated using text prompts. However, a domain gap exists between generated images and real-world images, which poses a challenge in generating high-quality variations of real-world images. Our investigation uncovers that this domain gap originates from a latents' distribution gap in different diffusion processes. To address this issue, we propose a novel inference pipeline called Real-world Image Variation by ALignment (RIVAL) that utilizes diffusion models to generate image variations from a single image exemplar. Our pipeline enhances the generation quality of image variations by aligning the image generation process to the source image's inversion chain. \nSpecifically, we demonstrate that step-wise latent distribution alignment is essential for generating high-quality variations. \nTo attain this, we design a cross-image self-attention injection for feature interaction and a step-wise distribution normalization to align the latent features. Incorporating these alignment processes into a diffusion model allows RIVAL to generate high-quality image variations without further parameter optimization. Our experimental results demonstrate that our proposed approach outperforms existing methods concerning semantic similarity and perceptual quality. This generalized inference pipeline can be easily applied to other diffusion-based generation tasks, such as image-conditioned text-to-image generation and stylization. Project page: https://rival-diff.github.io", "keywords": "image variation;diffusion model;image generation;text-driven image editing", "primary_area": "", "supplementary_material": "/attachment/a4ddd773166fadce6f6f734fd28d98d9fbfab736.pdf", "author": "Yuechen ZHANG;Jinbo Xing;Eric Lo;Jiaya Jia", "authorids": "~Yuechen_ZHANG1;~Jinbo_Xing1;~Eric_Lo1;~Jiaya_Jia1", "gender": "M;M;;M", "homepage": "https://julianjuaner.github.io/;https://doubiiu.github.io/;;https://jiaya.me", "dblp": "298/8473;283/5334;;31/5649", "google_scholar": "8OijNgkAAAAJ;ZB0gLU0AAAAJ;;https://scholar.google.com.tw/citations?user=XPAkzTEAAAAJ", "orcid": ";0000-0002-2181-1879;;", "linkedin": ";jinbo-xing-3158821a1/;;", "or_profile": "~Yuechen_ZHANG1;~Jinbo_Xing1;~Eric_Lo1;~Jiaya_Jia1", "aff": "The Chinese University of Hong Kong;Department of Computer Science and Engineering, The Chinese University of Hong Kong;;Department of Computer Science and Engineering, Hong Kong University of Science and Technology", "aff_domain": "cuhk.edu.hk;cse.cuhk.edu.hk;;cse.ust.hk", "position": "PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nzhang2023realworld,\ntitle={Real-World Image Variation by Aligning Diffusion Inversion Chain},\nauthor={Yuechen ZHANG and Jinbo Xing and Eric Lo and Jiaya Jia},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=u6Ibs4hTJH}\n}", "github": "", "project": "", "reviewers": "SLMY;4HxC;5k69;aMYr;mFBx", "pdf_size": 15409779, "rating": "5;6;7;7;7", "confidence": "4;4;3;2;5", "soundness": "3;3;3;2;3", "novelty": "3;3;3;3;3", "presentation": "2;3;4;3;3", "wc_summary": "79;97;125;169;71", "wc_strengths": "44;136;93;152;19", "wc_weaknesses": "194;108;42;230;341", "wc_questions": "62;79;37;92;14", "wc_limitations": "9;1;13;15;1", "wc_review": "388;421;310;658;446", "wc_reply_reviewers": "44;22;20;26;28", "wc_reply_authors": "112;60;94;61;82", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 108.2, 35.611234182487976 ], "wc_strengths_avg": [ 88.8, 51.222651239466316 ], "wc_weaknesses_avg": [ 183.0, 102.76186062932103 ], "wc_questions_avg": [ 56.8, 28.22339455132922 ], "wc_limitations_avg": [ 7.8, 5.878775382679628 ], "wc_review_avg": [ 444.6, 116.11993799516085 ], "wc_reply_reviewers_avg": [ 28.0, 8.48528137423857 ], "wc_reply_authors_avg": [ 81.8, 19.84338680769994 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.29417420270727607, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4850546544882108330&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cuhk.edu.hk;cse.cuhk.edu.hk;;cse.ust.hk", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Chinese University of Hong Kong;Hong Kong University of Science and Technology", "aff_unique_dep": ";Department of Computer Science and Engineering", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.ust.hk", "aff_unique_abbr": "CUHK;HKUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Flocks of Stochastic Parrots: Differentially Private Prompt Learning for Large Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70167", "id": "u6Xv3FuF8N", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f26119b4ffe38c24d97e4c49d334b99e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=u6Xv3FuF8N", "openreview": "https://openreview.net/forum?id=u6Xv3FuF8N", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70167", "video": "https://nips.cc/virtual/2023/poster/70167", "author_site": "Haonan Duan, Adam Dziedzic, Nicolas Papernot, Franziska Boenisch", "tldr": "", "abstract": "Large language models (LLMs) are excellent in-context learners. However, the sensitivity of data contained in prompts raises privacy concerns. Our work first shows that these concerns are valid: we instantiate a simple but highly effective membership inference attack against the data used to prompt LLMs. To address this vulnerability, one could forego prompting and resort to fine-tuning LLMs with known algorithms for private gradient descent. However, this comes at the expense of the practicality and efficiency offered by prompting. Therefore, we propose to privately learn to prompt. We first show that soft prompts can be obtained privately through gradient descent on downstream data. However, this is not the case for discrete prompts. Thus, we orchestrate a noisy vote among an ensemble of LLMs presented with different prompts, i.e., a flock of stochastic parrots. The vote privately transfers the flock's knowledge into a single public prompt. We show that LLMs prompted with our private algorithms closely match the non-private baselines. For example, using GPT3 as the base model, we achieve a downstream accuracy of 92.7% on the sst2 dataset with $(\\varepsilon=0.147, \\delta=10^{-6})$-differential privacy vs. 95.2% for the non-private baseline. Through our experiments, we also show that our prompt-based approach is easily deployed with existing commercial~APIs.", "keywords": "differential privacy;in-context learning;trustworthy ML", "primary_area": "", "supplementary_material": "/attachment/cd1eb242f85a6e17c3cec985087a68ab75161fa0.zip", "author": "Haonan Duan;Adam Dziedzic;Nicolas Papernot;Franziska Boenisch", "authorids": "~Haonan_Duan2;~Adam_Dziedzic1;~Nicolas_Papernot1;~Franziska_Boenisch2", "gender": "M;;M;", "homepage": "https://www.cs.toronto.edu/~haonand/;;https://www.papernot.fr;", "dblp": "273/7767;;162/1405;", "google_scholar": "5WVNRqoAAAAJ;;cGxq0cMAAAAJ;", "orcid": ";;;", "linkedin": ";;nicolaspapernot;", "or_profile": "~Haonan_Duan2;~Adam_Dziedzic1;~Nicolas_Papernot1;~Franziska_Boenisch2", "aff": "Department of Computer Science, University of Toronto;;Google;", "aff_domain": "cs.toronto.edu;;google.com;", "position": "PhD student;;Research Scientist;", "bibtex": "@inproceedings{\nduan2023flocks,\ntitle={Flocks of Stochastic Parrots: Differentially Private Prompt Learning for Large Language Models},\nauthor={Haonan Duan and Adam Dziedzic and Nicolas Papernot and Franziska Boenisch},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=u6Xv3FuF8N}\n}", "github": "", "project": "", "reviewers": "RZeY;p7La;k8ve;Hn4c;96HV", "pdf_size": 1001405, "rating": "5;5;6;7;7", "confidence": "3;5;4;4;4", "soundness": "3;3;3;3;4", "novelty": "3;2;2;3;3", "presentation": "2;3;3;3;4", "wc_summary": "104;327;130;93;82", "wc_strengths": "71;80;135;105;83", "wc_weaknesses": "59;419;299;49;18", "wc_questions": "90;137;154;47;23", "wc_limitations": "1;86;46;24;18", "wc_review": "325;1049;764;318;224", "wc_reply_reviewers": "48;0;85;0;13", "wc_reply_authors": "0;0;86;0;15", "reply_reviewers": "1;0;1;0;1", "reply_authors": "1;1;2;1;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 147.2, 91.30038335078336 ], "wc_strengths_avg": [ 94.8, 22.99913041834408 ], "wc_weaknesses_avg": [ 168.8, 160.43740212307105 ], "wc_questions_avg": [ 90.2, 50.2847889525252 ], "wc_limitations_avg": [ 35.0, 29.284808348357004 ], "wc_review_avg": [ 536.0, 317.66082540974423 ], "wc_reply_reviewers_avg": [ 29.2, 32.96907642018502 ], "wc_reply_authors_avg": [ 20.2, 33.408980828513755 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 86, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7756144700976401785&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "cs.toronto.edu;;google.com;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Toronto;Google", "aff_unique_dep": "Department of Computer Science;Google", "aff_unique_url": "https://www.utoronto.ca;https://www.google.com", "aff_unique_abbr": "U of T;Google", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Toronto;Mountain View", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;United States" }, { "title": "ReDS: Offline RL With Heteroskedastic Datasets via Support Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70166", "id": "u8srPlinoj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4b5d47949866d06ab5c03022b4a5a551-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=u8srPlinoj", "openreview": "https://openreview.net/forum?id=u8srPlinoj", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70166", "video": "https://nips.cc/virtual/2023/poster/70166", "author_site": "Anikait Singh, Aviral Kumar, Quan Vuong, Yevgen Chebotar, Sergey Levine", "tldr": "", "abstract": "Offline reinforcement learning (RL) learns policies entirely from static datasets. Practical applications of offline RL will inevitably require learning from datasets where the variability of demonstrated behaviors changes non-uniformly across the state space. For example, at a red light, nearly all human drivers behave similarly by stopping, but when merging onto a highway, some drivers merge quickly, efficiently, and safely, while many hesitate or merge dangerously. Both theoretically and empirically, we show that typical offline RL methods, which are based on distribution constraints fail to learn from data with such non-uniform variability, due to the requirement to stay close to the behavior policy **to the same extent** across the state space. Ideally, the learned policy should be free to choose **per state** how closely to follow the behavior policy to maximize long-term return, as long as the learned policy stays within the support of the behavior policy. To instantiate this principle, we reweight the data distribution in conservative Q-learning (CQL) to obtain an approximate support constraint formulation. The reweighted distribution is a mixture of the current policy and an additional policy trained to mine poor actions that are likely under the behavior policy. Our method, CQL (ReDS), is theoretically motivated, and improves performance across a wide range of offline RL problems in games, navigation, and pixel-based manipulation.", "keywords": "offline RL;support constraints;heteroskedastic data", "primary_area": "", "supplementary_material": "/attachment/903f9c8be6018d03569bef6a1ffb906daa4fb169.zip", "author": "Anikait Singh;Aviral Kumar;Quan Vuong;Yevgen Chebotar;Sergey Levine", "authorids": "~Anikait_Singh1;~Aviral_Kumar2;~Quan_Vuong2;~Yevgen_Chebotar1;~Sergey_Levine1", "gender": "M;M;M;M;M", "homepage": "https://asap7772.github.io/;https://aviralkumar2907.github.io/;https://quanvuong.github.io;;https://people.eecs.berkeley.edu/~svlevine/", "dblp": "302/3876;202/7961;;01/11424;80/7594", "google_scholar": "lPaISmIAAAAJ;;NSWI3OwAAAAJ;ADkiClQAAAAJ;8R35rCwAAAAJ", "orcid": ";;;;", "linkedin": "asap7772/;;;;", "or_profile": "~Anikait_Singh1;~Aviral_Kumar2;~Quan_Vuong2;~Yevgen_Chebotar1;~Sergey_Levine1", "aff": "University of California, Berkeley;University of California, Berkeley;;Google;Google", "aff_domain": "berkeley.edu;berkeley.edu;;google.com;google.com", "position": "Undergrad student;PhD student;;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nsingh2023reds,\ntitle={Re{DS}: Offline {RL} With Heteroskedastic Datasets via Support Constraints},\nauthor={Anikait Singh and Aviral Kumar and Quan Vuong and Yevgen Chebotar and Sergey Levine},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=u8srPlinoj}\n}", "github": "", "project": "", "reviewers": "huak;XMxS;5pSd;uYnF", "pdf_size": 18139189, "rating": "5;5;6;6", "confidence": "4;4;4;5", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "78;70;112;46", "wc_strengths": "21;69;15;68", "wc_weaknesses": "310;119;59;155", "wc_questions": "125;116;107;42", "wc_limitations": "84;45;1;15", "wc_review": "618;419;294;326", "wc_reply_reviewers": "220;153;331;61", "wc_reply_authors": "468;300;692;519", "reply_reviewers": "2;2;3;2", "reply_authors": "2;2;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 76.5, 23.637893307145628 ], "wc_strengths_avg": [ 43.25, 25.341418665891617 ], "wc_weaknesses_avg": [ 160.75, 92.74258730486227 ], "wc_questions_avg": [ 97.5, 32.668792447839266 ], "wc_limitations_avg": [ 36.25, 31.82275129526044 ], "wc_review_avg": [ 414.25, 126.27821466903941 ], "wc_reply_reviewers_avg": [ 191.25, 98.4692210794825 ], "wc_reply_authors_avg": [ 494.75, 139.76654642653227 ], "reply_reviewers_avg": [ 2.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6177438240619795539&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "berkeley.edu;berkeley.edu;;google.com;google.com", "author_num": 5, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Google", "aff_campus_unique_index": "0;0;1;1", "aff_campus_unique": "Berkeley;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "(S)GD over Diagonal Linear Networks: Implicit bias, Large Stepsizes and Edge of Stability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70165", "id": "uAyElhYKxg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5da6ce80e97671b70c01a2e703b868b3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uAyElhYKxg", "openreview": "https://openreview.net/forum?id=uAyElhYKxg", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70165", "video": "https://nips.cc/virtual/2023/poster/70165", "author_site": "Mathieu Even, Scott Pesme, Suriya Gunasekar, Nicolas Flammarion", "tldr": "", "abstract": "In this paper, we investigate the impact of stochasticity and large stepsizes on the implicit regularisation of gradient descent (GD) and stochastic gradient descent (SGD) over $2$-layer diagonal linear networks. We prove the convergence of GD and SGD with macroscopic stepsizes in an overparametrised regression setting and characterise their solutions through an implicit regularisation problem. Our crisp characterisation leads to qualitative insights about the impact of stochasticity and stepsizes on the recovered solution. Specifically, we show that large stepsizes consistently benefit SGD for sparse regression problems, while they can hinder the recovery of sparse solutions for GD. These effects are magnified for stepsizes in a tight window just below the divergence threshold, in the ``edge of stability'' regime. Our findings are supported by experimental results.", "keywords": "SGD;GD;implicit bias;large stepsizes;edge of stability;diagonal linear networks", "primary_area": "", "supplementary_material": "/attachment/e65ffacd1b014688185868e9d0402c9b8231da7e.pdf", "author": "Mathieu Even;Scott Pesme;Suriya Gunasekar;Nicolas Flammarion", "authorids": "~Mathieu_Even1;~Scott_Pesme1;~Suriya_Gunasekar1;~Nicolas_Flammarion1", "gender": "M;M;;M", "homepage": "https://scholar.google.com/citations?user=Mn8_1hQAAAAJ&hl=fr;https://scottpesme.github.io/;http://sgunasekar.github.io;", "dblp": "277/9882;268/7836;;164/7417", "google_scholar": ";BwCLRb0AAAAJ;EkREu_QAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Mathieu_Even1;~Scott_Pesme1;~Suriya_Gunasekar1;~Nicolas_Flammarion1", "aff": "INRIA Paris;Swiss Federal Institute of Technology Lausanne;Microsoft;Swiss Federal Institute of Technology Lausanne", "aff_domain": "inria.fr;epfl.ch;microsoft.com;epfl.ch", "position": "PhD student;PhD student;Senior Researcher;Assistant Professor", "bibtex": "@inproceedings{\neven2023sgd,\ntitle={(S){GD} over Diagonal Linear Networks: Implicit bias, Large Stepsizes and Edge of Stability},\nauthor={Mathieu Even and Scott Pesme and Suriya Gunasekar and Nicolas Flammarion},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uAyElhYKxg}\n}", "github": "", "project": "", "reviewers": "Y9wy;ZVE8;1MhA;uxxP;JT8J", "pdf_size": 4650508, "rating": "5;6;6;6;7", "confidence": "4;2;4;4;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;2;3", "presentation": "3;3;4;3;4", "wc_summary": "50;88;72;22;47", "wc_strengths": "89;50;57;70;170", "wc_weaknesses": "42;83;75;90;166", "wc_questions": "145;1;42;108;21", "wc_limitations": "38;1;7;5;1", "wc_review": "364;223;253;295;405", "wc_reply_reviewers": "10;9;27;10;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.6, 0.8000000000000002 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 55.8, 22.59557478799776 ], "wc_strengths_avg": [ 87.2, 43.475970374449375 ], "wc_weaknesses_avg": [ 91.2, 40.85780219248216 ], "wc_questions_avg": [ 63.4, 54.400735289148436 ], "wc_limitations_avg": [ 10.4, 13.994284547628721 ], "wc_review_avg": [ 308.0, 67.77019994068189 ], "wc_reply_reviewers_avg": [ 11.2, 8.749857141690944 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2142364527770485854&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "inria.fr;epfl.ch;microsoft.com;epfl.ch", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "INRIA;Swiss Federal Institute of Technology Lausanne;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.inria.fr;https://www.epfl.ch;https://www.microsoft.com", "aff_unique_abbr": "INRIA;EPFL;Microsoft", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Paris;Lausanne;", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "France;Switzerland;United States" }, { "title": "Efficient Robust Bayesian Optimization for Arbitrary Uncertain inputs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70164", "id": "uDV4lA0gZ6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/868f2f9a9950f7b0538b3ce7eb4c8eb8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uDV4lA0gZ6", "openreview": "https://openreview.net/forum?id=uDV4lA0gZ6", "poster": "/media/PosterPDFs/NeurIPS%202023/70164.png?t=1700355225.1839242", "slides": "https://nips.cc/virtual/2023/poster/70164", "video": "https://nips.cc/virtual/2023/poster/70164", "author_site": "Lin Yang, Junlong Lyu, Wenlong Lyu, Zhitang Chen", "tldr": "", "abstract": "Bayesian Optimization (BO) is a sample-efficient optimization algorithm widely employed across various applications. In some challenging BO tasks, input uncertainty arises due to the inevitable randomness in the optimization process, such as machining errors, execution noise, or contextual variability. This uncertainty deviates the input from the intended value before evaluation, resulting in significant performance fluctuations in the final result. In this paper, we introduce a novel robust Bayesian Optimization algorithm, AIRBO, which can effectively identify a robust optimum that performs consistently well under arbitrary input uncertainty. Our method directly models the uncertain inputs of arbitrary distributions by empowering the Gaussian Process with the Maximum Mean Discrepancy (MMD) and further accelerates the posterior inference via Nystrom approximation. Rigorous theoretical regret bound is established under MMD estimation error and extensive experiments on synthetic functions and real problems demonstrate that our approach can handle various input uncertainties and achieve a state-of-the-art performance.", "keywords": "bayesian optimization;robust optimization", "primary_area": "", "supplementary_material": "/attachment/0013ec7f8a6842b6608376afd2166cc824fe6388.pdf", "author": "Lin Yang;Junlong Lyu;Wenlong Lyu;Zhitang Chen", "authorids": "~Lin_Yang8;~Junlong_Lyu1;~Wenlong_Lyu1;~Zhitang_Chen1", "gender": "M;M;M;M", "homepage": "https://yanglin-jason.github.io/;;;", "dblp": "20/2970-9;243/2962.html;219/4148;06/10875", "google_scholar": "https://scholar.google.com.hk/citations?user=PlgosSoAAAAJ;S8ogqFcAAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Lin_Yang8;~Junlong_Lyu1;~Wenlong_Lyu1;~Zhitang_Chen1", "aff": "Noah's Ark Lab, Huawei Technologies Ltd.;Huawei Technologies Ltd.;;Huawei Technologies Ltd.", "aff_domain": "huawei.com;huawei.com;;huawei.com", "position": "Researcher;Researcher;;Researcher", "bibtex": "@inproceedings{\nyang2023efficient,\ntitle={Efficient Robust Bayesian Optimization for Arbitrary Uncertain inputs},\nauthor={Lin Yang and Junlong Lyu and Wenlong Lyu and Zhitang Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uDV4lA0gZ6}\n}", "github": "", "project": "", "reviewers": "UjwV;JQUf;Yh2J;jZyz", "pdf_size": 1968621, "rating": "5;5;6;6", "confidence": "3;4;4;4", "soundness": "2;3;2;3", "novelty": "3;2;2;2", "presentation": "1;3;3;3", "wc_summary": "78;45;82;111", "wc_strengths": "65;32;52;102", "wc_weaknesses": "372;154;119;297", "wc_questions": "3;4;174;10", "wc_limitations": "1;6;7;31", "wc_review": "519;241;434;551", "wc_reply_reviewers": "531;23;291;52", "wc_reply_authors": "616;37;791;696", "reply_reviewers": "2;1;2;1", "reply_authors": "4;2;3;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 79.0, 23.39871791359518 ], "wc_strengths_avg": [ 62.75, 25.52817071393875 ], "wc_weaknesses_avg": [ 235.5, 103.23395759148246 ], "wc_questions_avg": [ 47.75, 72.93961543633199 ], "wc_limitations_avg": [ 11.25, 11.627015954233485 ], "wc_review_avg": [ 436.25, 120.56403900002687 ], "wc_reply_reviewers_avg": [ 224.25, 205.3793745730082 ], "wc_reply_authors_avg": [ 535.0, 294.1181735289406 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13076694289507328482&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "huawei.com;huawei.com;;huawei.com", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Noah's Ark Lab", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Static and Sequential Malicious Attacks in the Context of Selective Forgetting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70163", "id": "uEJfW3OtUm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ed4bacc8c7ca1ee0e1d4e0ef376b7ac7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uEJfW3OtUm", "openreview": "https://openreview.net/forum?id=uEJfW3OtUm", "poster": "/media/PosterPDFs/NeurIPS%202023/70163.png?t=1699583497.4656858", "slides": "https://nips.cc/virtual/2023/poster/70163", "video": "https://nips.cc/virtual/2023/poster/70163", "author_site": "Chenxu Zhao, Wei Qian, Rex Ying, Mengdi Huai", "tldr": "", "abstract": "With the growing demand for the right to be forgotten, there is an increasing need for machine learning models to forget sensitive data and its impact. To address this, the paradigm of selective forgetting (a.k.a machine unlearning) has been extensively studied, which aims to remove the impact of requested data from a well-trained model without retraining from scratch. Despite its significant success, limited attention has been given to the security vulnerabilities of the unlearning system concerning malicious data update requests. Motivated by this, in this paper, we explore the possibility and feasibility of malicious data update requests during the unlearning process. Specifically, we first propose a new class of malicious selective forgetting attacks, which involves a static scenario where all the malicious data update requests are provided by the adversary at once. Additionally, considering the sequential setting where the data update requests arrive sequentially, we also design a novel framework for sequential forgetting attacks, which is formulated as a stochastic optimal control problem. We also propose novel optimization algorithms that can find the effective malicious data update requests. We perform theoretical analyses for the proposed selective forgetting attacks, and extensive experimental results validate the effectiveness of our proposed selective forgetting attacks. The source code is available in the supplementary material.", "keywords": "Selective forgetting;static setting;sequential setting;security and robustness", "primary_area": "", "supplementary_material": "/attachment/20cb2ebd74c0a7f319cc702c581ff48daba99dc8.zip", "author": "CHENXU ZHAO;Wei Qian;Zhitao Ying;Mengdi Huai", "authorids": "~CHENXU_ZHAO2;~Wei_Qian5;~Zhitao_Ying1;~Mengdi_Huai1", "gender": "M;M;M;F", "homepage": ";;https://www.cs.yale.edu/homes/ying-rex;https://mdhuai.github.io/", "dblp": ";;209/4936;150/8482", "google_scholar": "6J8ln3QAAAAJ;n1gDJZQAAAAJ;6fqNXooAAAAJ;40ZYTzEAAAAJ", "orcid": ";0000-0002-3298-9218;;0000-0001-6368-5973", "linkedin": "chenxu-zhao-2b6590181/;;rex-ying-92770148/;", "or_profile": "~CHENXU_ZHAO2;~Wei_Qian5;~Zhitao_Ying1;~Mengdi_Huai1", "aff": "Iowa State University;Iowa State University;Yale University;Iowa State University", "aff_domain": "iastate.edu;cs.iastate.edu;yale.edu;iastate.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhao2023static,\ntitle={Static and Sequential Malicious Attacks in the Context of Selective Forgetting},\nauthor={CHENXU ZHAO and Wei Qian and Zhitao Ying and Mengdi Huai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uEJfW3OtUm}\n}", "github": "", "project": "", "reviewers": "MTyE;MEi8;EC3c;a1jN", "pdf_size": 584309, "rating": "4;5;6;7", "confidence": "4;5;4;4", "soundness": "3;2;3;3", "novelty": "3;3;3;4", "presentation": "2;3;3;3", "wc_summary": "51;52;81;166", "wc_strengths": "19;44;48;102", "wc_weaknesses": "81;155;156;185", "wc_questions": "235;6;2;98", "wc_limitations": "1;21;2;47", "wc_review": "387;278;289;598", "wc_reply_reviewers": "0;31;0;31", "wc_reply_authors": "0;17;0;28", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.5, 46.89616188986045 ], "wc_strengths_avg": [ 53.25, 30.26032881513352 ], "wc_weaknesses_avg": [ 144.25, 38.45370593323874 ], "wc_questions_avg": [ 85.25, 94.60278801388466 ], "wc_limitations_avg": [ 17.75, 18.673175948402566 ], "wc_review_avg": [ 388.0, 128.45427201926762 ], "wc_reply_reviewers_avg": [ 15.5, 15.5 ], "wc_reply_authors_avg": [ 11.25, 11.903255857117413 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1980742608120694304&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "iastate.edu;cs.iastate.edu;yale.edu;iastate.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Iowa State University;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.iastate.edu;https://www.yale.edu", "aff_unique_abbr": "ISU;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Focus Your Attention when Few-Shot Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70162", "id": "uFlE0qgtRO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bbb7506579431a85861a05fff048d3e1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uFlE0qgtRO", "openreview": "https://openreview.net/forum?id=uFlE0qgtRO", "poster": "/media/PosterPDFs/NeurIPS%202023/70162.png?t=1698249464.1800768", "slides": "https://nips.cc/virtual/2023/poster/70162", "video": "https://nips.cc/virtual/2023/poster/70162", "author_site": "Haoqing Wang, Shibo Jie, Zhihong Deng", "tldr": "", "abstract": "Since many pre-trained vision transformers emerge and provide strong representation for various downstream tasks, we aim to adapt them to few-shot image classification tasks in this work. The input images typically contain multiple entities. The model may not focus on the class-related entities for the current few-shot task, even with fine-tuning on support samples, and the noise information from the class-independent ones harms performance. To this end, we first propose a method that uses the attention and gradient information to automatically locate the positions of key entities, denoted as position prompts, in the support images. Then we employ the cross-entropy loss between their many-hot presentation and the attention logits to optimize the model to focus its attention on the key entities during fine-tuning. This ability then can generalize to the query samples. Our method is applicable to different vision transformers (e.g., columnar or pyramidal ones), and also to different pre-training ways (e.g., single-modal or vision-language pre-training). Extensive experiments show that our method can improve the performance of full or parameter-efficient fine-tuning methods on few-shot tasks. Code is available at https://github.com/Haoqing-Wang/FORT.", "keywords": "few-shot image classification;fine-tuning;vision transformers", "primary_area": "", "supplementary_material": "/attachment/e179c28eb08d4e50c61c95585981ef9b6cf83d74.zip", "author": "Haoqing Wang;Shibo Jie;Zhi-Hong Deng", "authorids": "~Haoqing_Wang1;~Shibo_Jie1;~Zhi-Hong_Deng1", "gender": "M;M;M", "homepage": ";;http://www.cis.pku.edu.cn/jzyg/szdw/dzh.htm", "dblp": "251/8849;318/9497;161/4814-1", "google_scholar": "A2kCYnUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.tw/citations?user=tRoAxlsAAAAJ", "orcid": ";;0000-0002-0263-8142", "linkedin": ";;", "or_profile": "~Haoqing_Wang1;~Shibo_Jie1;~Zhi-Hong_Deng1", "aff": "Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2023focus,\ntitle={Focus Your Attention when Few-Shot Classification},\nauthor={Haoqing Wang and Shibo Jie and Zhi-Hong Deng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uFlE0qgtRO}\n}", "github": "", "project": "", "reviewers": "SFDV;nhXH;h2aC;TpQg;8eth", "pdf_size": 0, "rating": "4;5;5;5;5", "confidence": "5;4;4;3;4", "soundness": "2;3;2;3;2", "novelty": "2;3;2;3;2", "presentation": "3;3;2;3;3", "wc_summary": "99;112;98;116;63", "wc_strengths": "38;46;54;88;32", "wc_weaknesses": "72;204;169;232;145", "wc_questions": "172;51;30;44;4", "wc_limitations": "35;7;4;34;1", "wc_review": "416;420;355;514;245", "wc_reply_reviewers": "209;48;282;0;20", "wc_reply_authors": "366;33;424;0;0", "reply_reviewers": "1;1;3;0;1", "reply_authors": "3;2;4;1;1", "rating_avg": [ 4.8, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 97.6, 18.682612237050794 ], "wc_strengths_avg": [ 51.6, 19.652989594461197 ], "wc_weaknesses_avg": [ 164.4, 54.905737405120064 ], "wc_questions_avg": [ 60.2, 58.16665711556751 ], "wc_limitations_avg": [ 16.2, 15.065191668213187 ], "wc_review_avg": [ 390.0, 88.54603322566177 ], "wc_reply_reviewers_avg": [ 111.8, 112.61687262573045 ], "wc_reply_authors_avg": [ 164.6, 189.39651527945279 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 2.2, 1.16619037896906 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.790569415042095, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=713096675040537153&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "FairLISA: Fair User Modeling with Limited Sensitive Attributes Information", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70161", "id": "uFpjPJMkv6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/81a12aed87eb9c75dfdf91ed99d5519d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uFpjPJMkv6", "openreview": "https://openreview.net/forum?id=uFpjPJMkv6", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70161", "video": "https://nips.cc/virtual/2023/poster/70161", "author_site": "zheng zhang, Qi Liu, Hao Jiang, Fei Wang, Yan Zhuang, Le Wu, Weibo Gao, Enhong Chen", "tldr": "", "abstract": "User modeling techniques profile users' latent characteristics (e.g., preference) from their observed behaviors, and play a crucial role in decision-making. Unfortunately, traditional user models may unconsciously capture biases related to sensitive attributes (e.g., gender) from behavior data, even when this sensitive information is not explicitly provided. This can lead to unfair issues and discrimination against certain groups based on these sensitive attributes. Recent studies have been proposed to improve fairness by explicitly decorrelating user modeling results and sensitive attributes. However, most existing approaches assume that fully sensitive attribute labels are available in the training set, which is unrealistic due to collection limitations like privacy concerns, and hence bear the limitation of performance. In this paper, we focus on a practical situation with limited sensitive data and propose a novel FairLISA framework, which can efficiently utilize data with known and unknown sensitive attributes to facilitate fair model training. We first propose a novel theoretical perspective to build the relationship between data with both known and unknown sensitive attributes with the fairness objective. Then, based on this, we provide a general adversarial framework to effectively leverage the whole user data for fair user modeling. We conduct experiments on representative user modeling tasks including recommender system and cognitive diagnosis. The results demonstrate that our FairLISA can effectively improve fairness while retaining high accuracy in scenarios with different ratios of missing sensitive attributes.", "keywords": "fairness;user modeling", "primary_area": "", "supplementary_material": "", "author": "Zheng Zhang;Qi Liu;Hao Jiang;Fei Wang;Yan Zhuang;Le Wu;Weibo Gao;Enhong Chen", "authorids": "~Zheng_Zhang20;~Qi_Liu3;~Hao_Jiang15;~Fei_Wang19;~Yan_Zhuang4;~Le_Wu1;~Weibo_Gao1;~Enhong_Chen1", "gender": ";M;;M;M;F;;M", "homepage": ";http://staff.ustc.edu.cn/~qiliuql/;;;http://home.ustc.edu.cn/~zykb/;http://le-wu.com/;;http://staff.ustc.edu.cn/~cheneh", "dblp": ";95/2446-3;;;;121/4234;;07/258", "google_scholar": ";5EoHAFwAAAAJ;;6-aoQBkAAAAJ;7MX_P5cAAAAJ;4EzlnxwAAAAJ;;Q9h02J0AAAAJ", "orcid": ";0000-0001-6956-5550;;0000-0001-6890-619X;0000-0001-7351-377X;0000-0003-4556-0581;;0000-0002-4835-4102", "linkedin": ";;;;;;;", "or_profile": "~Zheng_Zhang20;~Qi_Liu3;~Hao_Jiang15;~Fei_Wang19;~Yan_Zhuang4;~Le_Wu1;~Weibo_Gao1;~Enhong_Chen1", "aff": ";University of Science and Technology of China;;University of Science and Technology of China;University of Science and Technology of China;Hefei University of Technology;;University of Science and Technology of China", "aff_domain": ";ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;hfut.edu;;ustc.edu.cn", "position": ";Full Professor;;PhD student;PhD student;Full Professor;;Full Professor", "bibtex": "@inproceedings{\nzhang2023fairlisa,\ntitle={Fair{LISA}: Fair User Modeling with Limited Sensitive Attributes Information},\nauthor={Zheng Zhang and Qi Liu and Hao Jiang and Fei Wang and Yan Zhuang and Le Wu and Weibo Gao and Enhong Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uFpjPJMkv6}\n}", "github": "", "project": "", "reviewers": "KpgW;Gbnv;teqm;QZCP;xseN", "pdf_size": 592067, "rating": "4;5;6;7;8", "confidence": "4;4;3;4;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;3;4;3", "wc_summary": "41;62;13;64;74", "wc_strengths": "10;28;60;77;154", "wc_weaknesses": "154;3;209;64;114", "wc_questions": "65;3;2;22;89", "wc_limitations": "2;276;8;43;28", "wc_review": "272;372;292;270;459", "wc_reply_reviewers": "0;163;11;79;36", "wc_reply_authors": "0;1067;0;274;0", "reply_reviewers": "0;2;1;1;1", "reply_authors": "1;3;1;2;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 50.8, 21.738445206591937 ], "wc_strengths_avg": [ 65.8, 49.9615852430645 ], "wc_weaknesses_avg": [ 108.8, 71.1488580372166 ], "wc_questions_avg": [ 36.2, 34.90214893097558 ], "wc_limitations_avg": [ 71.4, 103.33169891180536 ], "wc_review_avg": [ 333.0, 73.16829914655663 ], "wc_reply_reviewers_avg": [ 57.8, 59.19932432046839 ], "wc_reply_authors_avg": [ 268.2, 413.2574984195689 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7408561312365138869&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;hfut.edu;;ustc.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Science and Technology of China;Hefei University of Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;http://www.hfut.edu.cn/", "aff_unique_abbr": "USTC;HUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ScenarioNet: Open-Source Platform for Large-Scale Traffic Scenario Simulation and Modeling", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73439", "id": "uHlKNCDAJb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0c26a501df8fb919a0350e2df06b5d39-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=uHlKNCDAJb", "openreview": "https://openreview.net/forum?id=uHlKNCDAJb", "poster": "/media/PosterPDFs/NeurIPS%202023/73439.png?t=1701739532.7135832", "slides": "https://nips.cc/virtual/2023/poster/73439", "video": "https://nips.cc/virtual/2023/poster/73439", "author_site": "Quanyi Li, Zhenghao (Mark) Peng, Lan Feng, Zhizheng Liu, Chenda Duan, Wenjie Mo, Bolei Zhou", "tldr": "", "abstract": "Large-scale driving datasets such as Waymo Open Dataset and nuScenes substantially accelerate autonomous driving research, especially for perception tasks such as 3D detection and trajectory forecasting. Since the driving logs in these datasets contain HD maps and detailed object annotations which accurately reflect the real-world complexity of traffic behaviors, we can harvest a massive number of complex traffic scenarios and recreate their digital twins in simulation. Compared to the hand-crafted scenarios often used in existing simulators, data-driven scenarios collected from the real world can facilitate many research opportunities in machine learning and autonomous driving. In this work, we present ScenarioNet, an open-source platform for large-scale traffic scenario modeling and simulation. ScenarioNet defines a unified scenario description format and collects a large-scale repository of real-world traffic scenarios from the heterogeneous data in various driving datasets including Waymo, nuScenes, Lyft L5, and nuPlan datasets. These scenarios can be further replayed and interacted with in multiple views from Bird-Eye-View layout to realistic 3D rendering in MetaDrive simulator. This provides a benchmark for evaluating the safety of autonomous driving stacks in simulation before their real-world deployment. We further demonstrate the strengths of ScenarioNet on large-scale scenario generation, imitation learning, and reinforcement learning in both single-agent and multi-agent settings. Code, demo videos, and website are available at https://github.com/metadriverse/scenarionet", "keywords": "Autonomous Driving;Real-world Scenarios;Reinforcement Learning;Traffic Simulation;AD Stack Testing", "primary_area": "", "supplementary_material": "/attachment/29ec6aaedcf0ac8e0d9bdb73f474da62b2c7ca3b.pdf", "author": "Quanyi Li;Zhenghao Peng;Lan Feng;Zhizheng Liu;Chenda Duan;Wenjie Mo;Bolei Zhou", "authorids": "~Quanyi_Li1;~Zhenghao_Peng1;~Lan_Feng1;~Zhizheng_Liu1;~Chenda_Duan1;~Wenjie_Mo1;~Bolei_Zhou5", "gender": "M;M;M;M;M;M;M", "homepage": "https://quanyili.github.io;https://pengzhenghao.github.io;https://alan-lanfeng.github.io/;;https://chendaduan.com/;https://wenjie-mo.github.io/;https://boleizhou.github.io/", "dblp": "270/7691;220/3963;231/7529;287/4356;349/8266;344/2097;46/8066", "google_scholar": "Ty49X3UAAAAJ;JZ8ws6IAAAAJ;8-QJ-kkAAAAJ;Asc7j9oAAAAJ;DooYOyoAAAAJ;;9D4aG8AAAAAJ", "orcid": ";;;0009-0006-9426-3718;0009-0003-8652-3960;;", "linkedin": "https://www.linkedin.com/mwlite/in/quanyi-li-2b7985183;;;zhizheng-liu-4921401a6/;chenda-d/;wenjie-mo;", "or_profile": "~Quanyi_Li1;~Zhenghao_Peng1;~Lan_Feng1;~Zhizheng_Liu1;~Chenda_Duan1;~Wenjie_Mo1;~Bolei_Zhou5", "aff": "Shanghai Artificial Intelligence Laboratory;University of California, Los Angeles;ETHZ - ETH Zurich;ETHZ - ETH Zurich;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "pjlab.org.cn;cs.ucla.edu;ethz.ch;ethz.ch;ucla.edu;ucla.edu;ucla.edu", "position": "Researcher;PhD student;MS student;MS student;MS student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nli2023scenarionet,\ntitle={ScenarioNet: Open-Source Platform for Large-Scale Traffic Scenario Simulation and Modeling},\nauthor={Quanyi Li and Zhenghao Peng and Lan Feng and Zhizheng Liu and Chenda Duan and Wenjie Mo and Bolei Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=uHlKNCDAJb}\n}", "github": "", "project": "", "reviewers": "KCPD;Wnbm;Ei6A;DPk1;7uZv", "pdf_size": 10229146, "rating": "6;6;7;7;7", "confidence": "3;3;3;3;4", "wc_summary_and_contributions": "32;53;327;45;102", "wc_strengths": "41;37;174;52;146", "wc_improvement": "70;69;422;121;234", "wc_limitations": "65;7;73;61;27", "wc_correctness": "11;6;72;15;37", "wc_clarity": "41;7;33;25;33", "wc_relation_to_prior_work": "20;10;15;25;31", "wc_documentation": "31;10;31;14;50", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "312;200;1148;359;661", "wc_reply_reviewers": "0;0;193;0;0", "wc_reply_authors": "383;379;960;681;370", "reply_reviewers": "0;0;1;0;0", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 111.8, 110.1769485872612 ], "wc_strengths_avg": [ 90.0, 58.04481027619954 ], "wc_improvement_avg": [ 183.2, 133.66435575724742 ], "wc_limitations_avg": [ 46.6, 25.279240494919936 ], "wc_correctness_avg": [ 28.2, 24.326117651610584 ], "wc_clarity_avg": [ 27.8, 11.565465835840769 ], "wc_relation_to_prior_work_avg": [ 20.2, 7.35934779718964 ], "wc_documentation_avg": [ 27.2, 14.274452704044382 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 536.0, 341.9268927709548 ], "wc_reply_reviewers_avg": [ 38.6, 77.19999999999999 ], "wc_reply_authors_avg": [ 554.6, 234.3865183836306 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5665998064954083573&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 9, "email": "pjlab.org.cn;cs.ucla.edu;ethz.ch;ethz.ch;ucla.edu;ucla.edu;ucla.edu", "author_num": 7, "aff_unique_index": "0;1;2;2;1;1;1", "aff_unique_norm": "Shanghai Artificial Intelligence Laboratory;University of California, Los Angeles;ETH Zurich", "aff_unique_dep": ";;", "aff_unique_url": "http://www.shailab.org/;https://www.ucla.edu;https://www.ethz.ch", "aff_unique_abbr": "Shanghai AI Lab;UCLA;ETHZ", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;2;2;1;1;1", "aff_country_unique": "China;United States;Switzerland" }, { "title": "Building Socio-culturally Inclusive Stereotype Resources with Community Engagement", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73438", "id": "uIj1jDc8k6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0dc91de822b71c66a7f54fa121d8cbb9-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=uIj1jDc8k6", "openreview": "https://openreview.net/forum?id=uIj1jDc8k6", "poster": "/media/PosterPDFs/NeurIPS%202023/73438.png?t=1701411106.6979687", "slides": "https://nips.cc/virtual/2023/poster/73438", "video": "https://nips.cc/virtual/2023/poster/73438", "author_site": "Sunipa Dev, Jaya Goyal, Dinesh Tewari, Shachi Dave, Vinodkumar Prabhakaran", "tldr": "", "abstract": "With rapid development and deployment of generative language models in global settings, there is an urgent need to also scale our measurements of harm, not just in the number and types of harms covered, but also how well they account for local cultural contexts, including marginalized identities and the social biases experienced by them.\nCurrent evaluation paradigms are limited in their abilities to address this, as they are not representative of diverse, locally situated but global, socio-cultural perspectives. It is imperative that our evaluation resources are enhanced and calibrated by including people and experiences from different cultures and societies worldwide, in order to prevent gross underestimations or skews in measurements of harm. In this work, we demonstrate a socio-culturally aware expansion of evaluation resources in the Indian societal context, specifically for the harm of stereotyping. We devise a community engaged effort to build a resource which contains stereotypes for axes of disparity that are uniquely present in India. The resultant resource increases the number of stereotypes known for and in the Indian context by over 1000 stereotypes across many unique identities. We also demonstrate the utility and effectiveness of such expanded resources for evaluations of language models.\nCONTENT WARNING: This paper contains examples of stereotypes that may be offensive.", "keywords": "stereotype;dataset;evaluation;language models;cross cultural", "primary_area": "", "supplementary_material": "", "author": "Sunipa Dev;Jaya Goyal;Dinesh Tewari;Shachi Dave;Vinodkumar Prabhakaran", "authorids": "~Sunipa_Dev1;jaya.goyal@gmail.com;dineshtewari@google.com;~Shachi_Dave1;~Vinodkumar_Prabhakaran2", "gender": "F;;;;M", "homepage": "https://sunipa.github.io/;;;https://research.google/people/106762/;https://www.cs.stanford.edu/~vinod/", "dblp": "222/1630;;;66/3545;64/9281", "google_scholar": "EV1DgP0AAAAJ;;;;HwryFLcAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Sunipa_Dev1;jaya.goyal@gmail.com;dineshtewari@google.com;~Shachi_Dave1;~Vinodkumar_Prabhakaran2", "aff": "Google;;;Research, Google;Google", "aff_domain": "google.com;;;research.google.com;google.com", "position": "Research Scientist;;;Researcher;Research Scientist", "bibtex": "@inproceedings{\ndev2023building,\ntitle={Building Socio-culturally Inclusive Stereotype Resources with Community Engagement},\nauthor={Sunipa Dev and Jaya Goyal and Dinesh Tewari and Shachi Dave and Vinodkumar Prabhakaran},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=uIj1jDc8k6}\n}", "github": "", "project": "", "reviewers": "PwCG;9hoA;eMmS;9GE9", "pdf_size": 212051, "rating": "6;6;6;7", "confidence": "4;3;3;4", "wc_summary_and_contributions": "77;33;26;137", "wc_strengths": "60;41;64;112", "wc_improvement": "133;131;63;188", "wc_limitations": "65;98;17;36", "wc_correctness": "9;7;9;4", "wc_clarity": "3;20;49;2", "wc_relation_to_prior_work": "12;39;35;4", "wc_documentation": "8;7;25;6", "wc_additional_feedback": "1;1;1;1", "wc_review": "368;377;289;490", "wc_reply_reviewers": "18;21;26;15", "wc_reply_authors": "349;660;179;353", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 68.25, 44.24576250896802 ], "wc_strengths_avg": [ 69.25, 26.166533969939543 ], "wc_improvement_avg": [ 128.75, 44.31915500096995 ], "wc_limitations_avg": [ 54.0, 30.618621784789728 ], "wc_correctness_avg": [ 7.25, 2.0463381929681126 ], "wc_clarity_avg": [ 18.5, 19.00657780874821 ], "wc_relation_to_prior_work_avg": [ 22.5, 14.84082207965583 ], "wc_documentation_avg": [ 11.5, 7.826237921249264 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 381.0, 71.64146843832837 ], "wc_reply_reviewers_avg": [ 20.0, 4.06201920231798 ], "wc_reply_authors_avg": [ 385.25, 173.4796457801318 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1449642450034051188&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com;;;research.google.com;google.com", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Synthcity: a benchmark framework for diverse use cases of tabular synthetic data", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73437", "id": "uIppiU2JKP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/09723c9f291f6056fd1885081859c186-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=uIppiU2JKP", "openreview": "https://openreview.net/forum?id=uIppiU2JKP", "poster": "/media/PosterPDFs/NeurIPS%202023/73437.png?t=1697707836.7595446", "slides": "https://nips.cc/virtual/2023/poster/73437", "video": "https://nips.cc/virtual/2023/poster/73437", "author_site": "Zhaozhi Qian, Rob Davis, Mihaela van der Schaar", "tldr": "", "abstract": "Accessible high-quality data is the bread and butter of machine learning research, and the demand for data has exploded as larger and more advanced ML models are built across different domains. Yet, real data often contain sensitive information, are subject to various biases, and are costly to acquire, which compromise their quality and accessibility. Synthetic data have thus emerged as a complement to, sometimes even a replacement for, real data for ML training. However, the landscape of synthetic data research has been fragmented due to the diverse range of data modalities, such as tabular, time series, and images, and the wide array of use cases, including privacy preservation, fairness considerations, and data augmentation. This fragmentation poses practical challenges when comparing and selecting synthetic data generators in for different problem settings. To this end, we develop Synthcity, an open-source Python library that allows researchers and practitioners to perform one-click benchmarking of synthetic data generators across data modalities and use cases. Beyond benchmarking, Synthcity serves as a centralized toolkit for accessing cutting-edge data generators. In addition, Synthcity\u2019s flexible plug-in style API makes it easy to incorporate additional data generators into the framework. Using examples of tabular data generation and data augmentation, we illustrate the general applicability of Synthcity, and the insight one can obtain.", "keywords": "Synthetic data;generative models;data augmentation;privacy", "primary_area": "", "supplementary_material": "/attachment/15129db407827bf6fe79063b81ecaa956a3cd312.pdf", "author": "Zhaozhi Qian;Rob Davis;Mihaela van der Schaar", "authorids": "~Zhaozhi_Qian1;~Rob_Davis1;~Mihaela_van_der_Schaar2", "gender": ";M;F", "homepage": ";;https://www.vanderschaar-lab.com", "dblp": "194/2443;;", "google_scholar": "PuTDB5gAAAAJ;;DZ3S--MAAAAJ", "orcid": "0000-0002-4561-0342;;", "linkedin": ";robert-davis-1b380112a/;", "or_profile": "~Zhaozhi_Qian1;~Rob_Davis1;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;;University of California, Los Angeles", "aff_domain": "cam.ac.uk;;ucla.edu", "position": "Postdoc;;Full Professor", "bibtex": "@inproceedings{\nqian2023synthcity,\ntitle={Synthcity: a benchmark framework for diverse use cases of tabular synthetic data},\nauthor={Zhaozhi Qian and Rob Davis and Mihaela van der Schaar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=uIppiU2JKP}\n}", "github": "", "project": "", "reviewers": "qioZ;T6Ey;rLVX", "pdf_size": 296879, "rating": "6;6;7", "confidence": "4;4;4", "wc_summary_and_contributions": "174;57;67", "wc_strengths": "66;69;103", "wc_improvement": "283;23;116", "wc_limitations": "4;216;13", "wc_correctness": "36;56;12", "wc_clarity": "5;22;12", "wc_relation_to_prior_work": "27;46;7", "wc_documentation": "10;43;13", "wc_additional_feedback": "1;1;1", "wc_review": "606;533;344", "wc_reply_reviewers": "12;14;14", "wc_reply_authors": "451;657;167", "reply_reviewers": "1;1;1", "reply_authors": "1;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 99.33333333333333, 52.95490744229681 ], "wc_strengths_avg": [ 79.33333333333333, 16.779617264870957 ], "wc_improvement_avg": [ 140.66666666666666, 107.56806630635734 ], "wc_limitations_avg": [ 77.66666666666667, 97.8854205464509 ], "wc_correctness_avg": [ 34.666666666666664, 17.98765008430939 ], "wc_clarity_avg": [ 13.0, 6.97614984548545 ], "wc_relation_to_prior_work_avg": [ 26.666666666666668, 15.923427883328248 ], "wc_documentation_avg": [ 22.0, 14.89966442575134 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 494.3333333333333, 110.40028180318302 ], "wc_reply_reviewers_avg": [ 13.333333333333334, 0.9428090415820634 ], "wc_reply_authors_avg": [ 425.0, 200.88470988770317 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17585202233106796664&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 4, "email": "cam.ac.uk;;ucla.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;UCLA", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Cambridge;Los Angeles", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Exploring Geometry of Blind Spots in Vision models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70160", "id": "uJ3qNIsDGF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/90043ebd68500f9efe84fedf860a64f3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uJ3qNIsDGF", "openreview": "https://openreview.net/forum?id=uJ3qNIsDGF", "poster": "/media/PosterPDFs/NeurIPS%202023/70160.png?t=1702568721.7612402", "slides": "https://nips.cc/virtual/2023/poster/70160", "video": "https://nips.cc/virtual/2023/poster/70160", "author_site": "Sriram Balasubramanian, Gaurang Sriramanan, Vinu Sankar Sadasivan, Soheil Feizi", "tldr": "", "abstract": "Despite the remarkable success of deep neural networks in a myriad of settings, several works have demonstrated their overwhelming sensitivity to near-imperceptible perturbations, known as adversarial attacks. On the other hand, prior works have also observed that deep networks can be under-sensitive, wherein large-magnitude perturbations in input space do not induce appreciable changes to network activations. In this work, we study in detail the phenomenon of under-sensitivity in vision models such as CNNs and Transformers, and present techniques to study the geometry and extent of \u201cequi-confidence\u201d level sets of such networks. We propose a Level Set Traversal algorithm that iteratively explores regions of high confidence with respect to the input space using orthogonal components of the local gradients. Given a source image, we use this algorithm to identify inputs that lie in the same equi-confidence level set as the source image despite being perceptually similar to arbitrary images from other classes. We further observe that the source image is linearly connected by a high-confidence path to these inputs, uncovering a star-like structure for level sets of deep networks. Furthermore, we attempt to identify and estimate the extent of these connected higher-dimensional regions over which the model maintains a high degree of confidence.", "keywords": "Neural networks;Vision models;blind spots;undersensitivity;invariance;level set geometry;input connectivity", "primary_area": "", "supplementary_material": "", "author": "Sriram Balasubramanian;Gaurang Sriramanan;Vinu Sankar Sadasivan;Soheil Feizi", "authorids": "~Sriram_Balasubramanian2;~Gaurang_Sriramanan1;~Vinu_Sankar_Sadasivan1;~Soheil_Feizi2", "gender": "M;M;M;M", "homepage": "http://www.sriram.live;https://gaurangsriramanan.github.io/;https://vinusankars.github.io/;https://www.cs.umd.edu/~sfeizi/", "dblp": "33/666;262/3916;244/8052;57/2132", "google_scholar": "HsiolTEAAAAJ;t76Uk8oAAAAJ;y1IKIw0AAAAJ;lptAmrMAAAAJ", "orcid": ";;;", "linkedin": ";gaurang-sriramanan-16141a1a3/;vinusankars/;", "or_profile": "~Sriram_Balasubramanian2;~Gaurang_Sriramanan1;~Vinu_Sankar_Sadasivan1;~Soheil_Feizi2", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;umd.edu;umd.edu", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nbalasubramanian2023exploring,\ntitle={Exploring Geometry of Blind Spots in Vision models},\nauthor={Sriram Balasubramanian and Gaurang Sriramanan and Vinu Sankar Sadasivan and Soheil Feizi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uJ3qNIsDGF}\n}", "github": "", "project": "", "reviewers": "LWdq;QPKJ;yXeg;o1gw;N6FN", "pdf_size": 24585450, "rating": "5;6;7;7;8", "confidence": "4;4;3;3;3", "soundness": "3;3;4;4;4", "novelty": "3;2;3;4;4", "presentation": "3;2;3;4;3", "wc_summary": "201;84;73;145;81", "wc_strengths": "123;133;112;61;80", "wc_weaknesses": "213;183;39;153;148", "wc_questions": "72;22;97;31;60", "wc_limitations": "36;2;6;3;5", "wc_review": "645;424;327;393;374", "wc_reply_reviewers": "63;214;17;83;42", "wc_reply_authors": "52;1076;19;55;42", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;4;2;2;2", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 116.8, 49.31693421128284 ], "wc_strengths_avg": [ 101.8, 27.080620376941145 ], "wc_weaknesses_avg": [ 147.2, 58.91145898719535 ], "wc_questions_avg": [ 56.4, 27.32471408816568 ], "wc_limitations_avg": [ 10.4, 12.877888025604198 ], "wc_review_avg": [ 432.6, 110.76208737650262 ], "wc_reply_reviewers_avg": [ 83.8, 68.69468683966758 ], "wc_reply_authors_avg": [ 248.8, 413.7928950574188 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.4, 0.8000000000000002 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8807048459279795, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11061320030954606972&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "umd.edu;umd.edu;umd.edu;umd.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "M5HisDoc: A Large-scale Multi-style Chinese Historical Document Analysis Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73436", "id": "uJT68uPtC0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f7b424d242cc6bb7708cff241367334d-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=uJT68uPtC0", "openreview": "https://openreview.net/forum?id=uJT68uPtC0", "poster": "/media/PosterPDFs/NeurIPS%202023/73436.png?t=1701693836.1371217", "slides": "https://nips.cc/virtual/2023/poster/73436", "video": "https://nips.cc/virtual/2023/poster/73436", "author_site": "Yongxin Shi, Chongyu Liu, Dezhi Peng, Cheng Jian, Jiarong Huang, Lianwen Jin", "tldr": "", "abstract": "Recognizing and organizing text in correct reading order plays a crucial role in historical document analysis and preservation. While existing methods have shown promising performance, they often struggle with challenges such as diverse layouts, low image quality, style variations, and distortions. This is primarily due to the lack of consideration for these issues in the current benchmarks, which hinders the development and evaluation of historical document analysis and recognition (HDAR) methods in complex real-world scenarios. To address this gap, this paper introduces a complex multi-style Chinese historical document analysis benchmark, named M5HisDoc. The M5 indicates five properties of style, ie., Multiple layouts, Multiple document types, Multiple calligraphy styles, Multiple backgrounds, and Multiple challenges. The M5HisDoc dataset consists of two subsets, M5HisDoc-R (Regular) and M5HisDoc-H (Hard). The M5HisDoc-R subset comprises 4,000 historical document images. To ensure high-quality annotations, we meticulously perform manual annotation and triple-checking. To replicate real-world conditions for historical document analysis applications, we incorporate image rotation, distortion, and resolution reduction into M5HisDoc-R subset to form a new challenging subset named M5HisDoc-H, which contains the same number of images as M5HisDoc-R. The dataset exhibits diverse styles, significant scale variations, dense texts, and an extensive character set. We conduct benchmarking experiments on five tasks: text line detection, text line recognition, character detection, character recognition, and reading order prediction. We also conduct cross-validation with other benchmarks. Experimental results demonstrate that the M5HisDoc dataset can offer new challenges and great opportunities for future research in this field, thereby providing deep insights into the solution for HDAR. The dataset is available at https://github.com/HCIILAB/M5HisDoc.", "keywords": "Historical document analysis;text detection;text recognition;reading order prediction", "primary_area": "", "supplementary_material": "/attachment/b000eb30da9722c752b29e81c5f0cb11940822b1.pdf", "author": "Yongxin Shi;Chongyu Liu;Dezhi Peng;Cheng Jian;Jiarong Huang;Lianwen Jin", "authorids": "~Yongxin_Shi2;~Chongyu_Liu2;~Dezhi_Peng1;~Cheng_Jian1;~Jiarong_Huang1;~Lianwen_Jin1", "gender": ";;M;M;M;M", "homepage": ";https://www.scut.edu.cn/new/;;;https://www.scut.edu.cn/;http://www.dlvc-lab.net/lianwen/", "dblp": "359/4310;211/4070.html;217/2342;;;54/3221", "google_scholar": "e-3XAoAAAAAJ;dW7AgfgAAAAJ;6zNgcjAAAAAJ;;;WMUStEUAAAAJ", "orcid": "0009-0003-2650-1663;;0000-0002-3263-3449;0000-0002-5831-7129;;0000-0002-5456-0957", "linkedin": ";;;;;", "or_profile": "~Yongxin_Shi2;~Chongyu_Liu2;~Dezhi_Peng1;~Cheng_Jian1;~Jiarong_Huang1;~Lianwen_Jin1", "aff": "South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology", "aff_domain": "scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn", "position": "MS student;PhD student;PhD student;MS student;MS student;Professor", "bibtex": "@inproceedings{\nshi2023mhisdoc,\ntitle={M5HisDoc: A Large-scale Multi-style Chinese Historical Document Analysis Benchmark},\nauthor={Yongxin Shi and Chongyu Liu and Dezhi Peng and Cheng Jian and Jiarong Huang and Lianwen Jin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=uJT68uPtC0}\n}", "github": "", "project": "", "reviewers": "oBLA;RZNS;mes1;eX73;5Ty8", "pdf_size": 23201248, "rating": "6;6;7;7;8", "confidence": "3;3;4;3;5", "wc_summary_and_contributions": "145;48;41;87;57", "wc_strengths": "28;89;30;53;57", "wc_improvement": "191;21;230;135;2", "wc_limitations": "19;80;24;66;94", "wc_correctness": "27;1;55;6;8", "wc_clarity": "6;1;5;94;2", "wc_relation_to_prior_work": "13;1;38;23;1", "wc_documentation": "12;1;34;67;5", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "442;243;458;532;227", "wc_reply_reviewers": "20;76;21;0;0", "wc_reply_authors": "639;338;823;1418;231", "reply_reviewers": "1;1;1;0;0", "reply_authors": "1;2;2;3;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.8 ], "wc_summary_and_contributions_avg": [ 75.6, 38.08201675331809 ], "wc_strengths_avg": [ 51.4, 22.150395030337496 ], "wc_improvement_avg": [ 115.8, 90.55694341131442 ], "wc_limitations_avg": [ 56.6, 30.037310132566798 ], "wc_correctness_avg": [ 19.4, 19.865548066942427 ], "wc_clarity_avg": [ 21.6, 36.246930904560735 ], "wc_relation_to_prior_work_avg": [ 15.2, 14.062716664997557 ], "wc_documentation_avg": [ 23.8, 24.424577785501228 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 380.4, 122.64517927745877 ], "wc_reply_reviewers_avg": [ 23.4, 27.853904573685895 ], "wc_reply_authors_avg": [ 689.8, 420.6801160026464 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8685990362153793, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1412584568364640385&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "South China University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.scut.edu.cn", "aff_unique_abbr": "SCUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Fair Allocation of Indivisible Chores: Beyond Additive Costs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70159", "id": "uJmsYZiu3E", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/aa5d22c77b380e2261332bb641b3c2e3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uJmsYZiu3E", "openreview": "https://openreview.net/forum?id=uJmsYZiu3E", "poster": "/media/PosterPDFs/NeurIPS%202023/70159.png?t=1701447404.9401772", "slides": "https://nips.cc/virtual/2023/poster/70159", "video": "https://nips.cc/virtual/2023/poster/70159", "author_site": "Bo Li, Fangxiao Wang, Yu Zhou", "tldr": "", "abstract": "We study the maximin share (MMS) fair allocation of $m$ indivisible tasks to $n$ agents who have costs for completing the assigned tasks.\nIt is known that exact MMS fairness cannot be guaranteed, and so far the best-known approximation for additive cost functions is $\\frac{13}{11}$ by Huang and Segal-Halevi [EC, 2023]; however, beyond additivity, very little is known. \nIn this work, we first prove that no algorithm can ensure better than $\\min\\{n,\\frac{\\log m}{\\log \\log m}\\}$-approximation if the cost functions are submodular. \nThis result also shows a sharp contrast with the allocation of goods where constant approximations exist as shown by Barman and Krishnamurthy [TEAC, 2020] and Ghodsi et al. [AIJ, 2022]. \nWe then prove that for subadditive costs, there always exists an allocation that is $\\min\\{n,\\lceil\\log m\\rceil\\}$-approximation, and thus the approximation ratio is asymptotically tight.\nBesides multiplicative approximation, we also consider the ordinal relaxation, 1-out-of-$d$ MMS, which was recently proposed by Hosseini et al. [JAIR and AAMAS, 2022]. \nOur impossibility result implies that for any $d\\ge 2$, a 1-out-of-$d$ MMS allocation may not exist.\nDue to these hardness results for general subadditive costs, we turn to studying two specific subadditive costs, namely, bin packing and job scheduling. \nFor both settings, we show that constant approximate allocations exist for both multiplicative and ordinal relaxations of MMS.", "keywords": "fair allocation of chores;beyond additive cost functions;bin packing;job scheduling", "primary_area": "", "supplementary_material": "/attachment/58bdd34ae333e1dadb29f8203529fd608252f072.pdf", "author": "Bo Li;Fangxiao Wang;Yu Zhou", "authorids": "~Bo_Li27;~Fangxiao_Wang1;~Yu_Zhou12", "gender": "M;M;M", "homepage": "https://www4.comp.polyu.edu.hk/~bo2li/;;", "dblp": "50/3402-37;;", "google_scholar": ";;", "orcid": ";0000-0003-4211-4551;0000-0001-6799-8379", "linkedin": ";;", "or_profile": "~Bo_Li27;~Fangxiao_Wang1;~Yu_Zhou12", "aff": "The Hong Kong Polytechnic University;Hong Kong Polytechnic University;Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;polyu.edu.hk;polyu.edu.hk", "position": "Assistant Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nli2023fair,\ntitle={Fair Allocation of Indivisible Chores: Beyond Additive Costs},\nauthor={Bo Li and Fangxiao Wang and Yu Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uJmsYZiu3E}\n}", "github": "", "project": "", "reviewers": "6vH3;MebF;AniE;RP1j", "pdf_size": 540554, "rating": "5;5;6;7", "confidence": "4;3;4;4", "soundness": "4;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "137;174;116;122", "wc_strengths": "27;94;43;69", "wc_weaknesses": "76;449;20;24", "wc_questions": "20;71;2;1", "wc_limitations": "1;22;2;1", "wc_review": "261;810;183;217", "wc_reply_reviewers": "15;18;13;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 137.25, 22.554101622543072 ], "wc_strengths_avg": [ 58.25, 25.508576988926684 ], "wc_weaknesses_avg": [ 142.25, 178.47461304062267 ], "wc_questions_avg": [ 23.5, 28.447319733148852 ], "wc_limitations_avg": [ 6.5, 8.958236433584458 ], "wc_review_avg": [ 367.75, 256.8261814924639 ], "wc_reply_reviewers_avg": [ 15.0, 1.8708286933869707 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11523498123064870655&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "polyu.edu.hk;polyu.edu.hk;polyu.edu.hk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong Polytechnic University", "aff_unique_dep": "", "aff_unique_url": "https://www.polyu.edu.hk", "aff_unique_abbr": "PolyU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "The Graph Pencil Method: Mapping Subgraph Densities to Stochastic Block Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70158", "id": "uN71BdBEG8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fb9f53edbfd80b3a543f7963b63363ff-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uN71BdBEG8", "openreview": "https://openreview.net/forum?id=uN71BdBEG8", "poster": "/media/PosterPDFs/NeurIPS%202023/70158.png?t=1703110882.3372104", "slides": "https://nips.cc/virtual/2023/poster/70158", "video": "https://nips.cc/virtual/2023/poster/70158", "author_site": "Lee Gunderson, Gecia Bravo-Hermsdorff, Peter Orbanz", "tldr": "", "abstract": "In this work, we describe a method that determines an exact map from a finite set of subgraph densities to the parameters of a stochastic block model (SBM) matching these densities. Given a number K of blocks, the subgraph densities of a finite number of stars and bistars uniquely determines a single element of the class of all degree-separated stochastic block models with K blocks. Our method makes it possible to translate estimates of these subgraph densities into model parameters, and hence to use subgraph densities directly for inference. The computational overhead is negligible; computing the translation map is polynomial in K, but independent of the graph size once the subgraph densities are given.", "keywords": "Stochastic block model;SBM;graphons;matrix pencil method;method of moments", "primary_area": "", "supplementary_material": "/attachment/81b68865a5937aff33eace881f8d7220b171b83f.pdf", "author": "Lee M. Gunderson;Gecia Bravo-Hermsdorff;Peter Orbanz", "authorids": "~Lee_M._Gunderson1;~Gecia_Bravo-Hermsdorff1;~Peter_Orbanz2", "gender": "M;F;", "homepage": "https://leemgunderson.github.io/;https://gecia.github.io/;", "dblp": "236/6235;236/6201;", "google_scholar": "dXZ2pDsAAAAJ;Jq9GtykAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Lee_M._Gunderson1;~Gecia_Bravo-Hermsdorff1;~Peter_Orbanz2", "aff": "University College London, University of London;;", "aff_domain": "ucl.ac.uk;;", "position": "Postdoc;;", "bibtex": "@inproceedings{\ngunderson2023the,\ntitle={The Graph Pencil Method: Mapping Subgraph Densities to Stochastic Block Models},\nauthor={Lee M. Gunderson and Gecia Bravo-Hermsdorff and Peter Orbanz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uN71BdBEG8}\n}", "github": "", "project": "", "reviewers": "hN5K;fpMW;TUi3", "pdf_size": 629398, "rating": "5;6;6", "confidence": "3;3;4", "soundness": "3;1;3", "novelty": "2;1;3", "presentation": "2;2;2", "wc_summary": "49;31;92", "wc_strengths": "55;20;38", "wc_weaknesses": "27;180;203", "wc_questions": "33;161;181", "wc_limitations": "31;28;41", "wc_review": "195;420;555", "wc_reply_reviewers": "0;187;90", "wc_reply_authors": "66;463;231", "reply_reviewers": "0;4;1", "reply_authors": "2;5;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 57.333333333333336, 25.590796956892312 ], "wc_strengths_avg": [ 37.666666666666664, 14.29063407348401 ], "wc_weaknesses_avg": [ 136.66666666666666, 78.11245454827005 ], "wc_questions_avg": [ 125.0, 65.56421788343599 ], "wc_limitations_avg": [ 33.333333333333336, 5.557777333511022 ], "wc_review_avg": [ 390.0, 148.49242404917499 ], "wc_reply_reviewers_avg": [ 92.33333333333333, 76.36025726049441 ], "wc_reply_authors_avg": [ 253.33333333333334, 162.84211849382075 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.699673171197595 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2703755764689302515&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "ucl.ac.uk;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University College London", "aff_unique_dep": "", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "Adaptive Linear Estimating Equations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70157", "id": "uNmKBZrRZC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a399456a191ca36c7c78dff367887f0a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uNmKBZrRZC", "openreview": "https://openreview.net/forum?id=uNmKBZrRZC", "poster": "/media/PosterPDFs/NeurIPS%202023/70157.png?t=1701390173.9637082", "slides": "https://nips.cc/virtual/2023/poster/70157", "video": "https://nips.cc/virtual/2023/poster/70157", "author_site": "Mufang Ying, Koulik Khamaru, Cun-Hui Zhang", "tldr": "", "abstract": "Sequential data collection has emerged as a widely adopted technique for enhancing the efficiency of data gathering processes. Despite its advantages, such data collection mechanism often introduces complexities to the statistical inference procedure. For instance, the ordinary least squares (OLS) estimator in an adaptive linear regression model can exhibit non-normal asymptotic behavior, posing challenges for accurate inference and interpretation. In this paper, we propose a general method for constructing debiased estimator which remedies this issue. It makes use of the idea of adaptive linear estimating equations, and we establish theoretical guarantees of asymptotic normality, supplemented by discussions on achieving near-optimal asymptotic variance. A salient feature of our estimator is that in the context of multi-armed bandits, our estimator retains the non-asymptotic performance of the least squares estimator while obtaining asymptotic normality property. Consequently, this work helps connect two fruitful paradigms of adaptive inference: a) non-asymptotic inference using concentration inequalities and b) asymptotic inference via asymptotic normality.", "keywords": "bandit algorithm;statistical inference;adaptively collected data;asymptotic normality", "primary_area": "", "supplementary_material": "/attachment/122d6a5f7c7bd0f0558577aa544608bd02d86836.zip", "author": "Mufang Ying;Koulik Khamaru;Cun-Hui Zhang", "authorids": "~Mufang_Ying1;kk1241@stat.rutgers.edu;~Cun-Hui_Zhang1", "gender": ";;M", "homepage": ";;https://statistics.rutgers.edu/people-pages/faculty/people/130-faculty/376-cun-hui-zhang", "dblp": ";;", "google_scholar": ";;_cxs104AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Mufang_Ying1;kk1241@stat.rutgers.edu;~Cun-Hui_Zhang1", "aff": ";;Rutgers University", "aff_domain": ";;rutgers.edu", "position": ";;Full Professor", "bibtex": "@inproceedings{\nying2023adaptive,\ntitle={Adaptive Linear Estimating Equations},\nauthor={Mufang Ying and Koulik Khamaru and Cun-Hui Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uNmKBZrRZC}\n}", "github": "", "project": "", "reviewers": "Z7he;G4km;M8Ec;ADJL", "pdf_size": 676344, "rating": "5;6;6;7", "confidence": "1;3;2;2", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "45;102;62;35", "wc_strengths": "36;89;40;46", "wc_weaknesses": "44;46;65;11", "wc_questions": "15;12;3;72", "wc_limitations": "13;1;37;1", "wc_review": "153;250;207;165", "wc_reply_reviewers": "15;0;15;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 61.0, 25.563646062328434 ], "wc_strengths_avg": [ 52.75, 21.22940178149163 ], "wc_weaknesses_avg": [ 41.5, 19.42292459955503 ], "wc_questions_avg": [ 25.5, 27.207535720825582 ], "wc_limitations_avg": [ 13.0, 14.696938456699069 ], "wc_review_avg": [ 193.75, 38.16657569130351 ], "wc_reply_reviewers_avg": [ 10.75, 6.2599920127744575 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13875752782904847892&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 11, "email": ";;rutgers.edu", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Rutgers University", "aff_unique_dep": "", "aff_unique_url": "https://www.rutgers.edu", "aff_unique_abbr": "Rutgers", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Sample Complexity Bounds for Score-Matching: Causal Discovery and Generative Modeling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70156", "id": "uNnPWR66b8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0a3dc35a2391cabcb59a6b123544e3db-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uNnPWR66b8", "openreview": "https://openreview.net/forum?id=uNnPWR66b8", "poster": "/media/PosterPDFs/NeurIPS%202023/70156.png?t=1701479844.818891", "slides": "https://nips.cc/virtual/2023/poster/70156", "video": "https://nips.cc/virtual/2023/poster/70156", "author_site": "Zhenyu Zhu, Francesco Locatello, Volkan Cevher", "tldr": "", "abstract": "This paper provides statistical sample complexity bounds for score-matching and its applications in causal discovery. We demonstrate that accurate estimation of the score function is achievable by training a standard deep ReLU neural network using stochastic gradient descent. We establish bounds on the error rate of recovering causal relationships using the score-matching-based causal discovery method of Rolland et al. [2022], assuming a sufficiently good estimation of the score function. Finally, we analyze the upper bound of score-matching estimation within the score-based generative modeling, which has been applied for causal discovery but is also of independent interest within the domain of generative models.", "keywords": "Causal discovery;Score matching;Score-based generative modeling", "primary_area": "", "supplementary_material": "/attachment/f8acbae4a46c1851e5d4a3e61aa073056d883882.pdf", "author": "Zhenyu Zhu;Francesco Locatello;Volkan Cevher", "authorids": "~Zhenyu_Zhu1;~Francesco_Locatello1;~Volkan_Cevher1", "gender": "M;M;M", "homepage": "https://zhuzhenyu1997.github.io/;https://twitter.com/FrancescoLocat8;http://lions.epfl.ch", "dblp": ";195/6074;70/5301", "google_scholar": "rft3OB4AAAAJ;;https://scholar.google.ch/citations?user=hlWhzU8AAAAJ", "orcid": ";;", "linkedin": "zhenyu-zhu-045471139/;;", "or_profile": "~Zhenyu_Zhu1;~Francesco_Locatello1;~Volkan_Cevher1", "aff": "Swiss Federal Institute of Technology Lausanne;Amazon;Amazon Development Center Germany", "aff_domain": "epfl.ch;amazon.com;amazon.de", "position": "PhD student;Senior Applied Scientist;Amazon Scholar", "bibtex": "@inproceedings{\nzhu2023sample,\ntitle={Sample Complexity Bounds for Score-Matching: Causal Discovery and Generative Modeling},\nauthor={Zhenyu Zhu and Francesco Locatello and Volkan Cevher},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uNnPWR66b8}\n}", "github": "", "project": "", "reviewers": "LFXr;Ndh6;RLDi;wqRq;FzFv", "pdf_size": 305362, "rating": "4;4;6;6;7", "confidence": "3;2;2;3;4", "soundness": "3;3;3;3;4", "novelty": "3;3;2;2;4", "presentation": "2;2;2;3;4", "wc_summary": "64;51;127;75;156", "wc_strengths": "27;153;66;99;81", "wc_weaknesses": "179;175;168;139;114", "wc_questions": "145;47;134;1;309", "wc_limitations": "1;18;30;2;10", "wc_review": "416;444;525;316;670", "wc_reply_reviewers": "59;143;42;63;90", "wc_reply_authors": "162;70;20;20;95", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;2;2;2;2", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 94.6, 40.10286772788201 ], "wc_strengths_avg": [ 85.2, 41.37825515896 ], "wc_weaknesses_avg": [ 155.0, 24.827404213892358 ], "wc_questions_avg": [ 127.2, 105.61515042833581 ], "wc_limitations_avg": [ 12.2, 10.81480466767662 ], "wc_review_avg": [ 474.2, 118.52830885488918 ], "wc_reply_reviewers_avg": [ 79.4, 35.330440133120334 ], "wc_reply_authors_avg": [ 73.4, 52.973955865123 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5345224838248488, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12568268137765805575&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "epfl.ch;amazon.com;amazon.de", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.epfl.ch;https://www.amazon.com", "aff_unique_abbr": "EPFL;Amazon", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Switzerland;United States;Germany" }, { "title": "Breadcrumbs to the Goal: Goal-Conditioned Exploration from Human-in-the-Loop Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70155", "id": "uOEeui0rL7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c7c7cf10082e454b9662a686ce6f1b6f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uOEeui0rL7", "openreview": "https://openreview.net/forum?id=uOEeui0rL7", "poster": "/media/PosterPDFs/NeurIPS%202023/70155.png?t=1702132091.1957831", "slides": "https://nips.cc/virtual/2023/poster/70155", "video": "https://nips.cc/virtual/2023/poster/70155", "author_site": "Marcel Torne Villasevil, Max Balsells I Pamies, Zihan Wang, Samedh Desai, Tao Chen, Pulkit Agrawal, Abhishek Gupta", "tldr": "", "abstract": "Exploration and reward specification are fundamental and intertwined challenges for reinforcement learning. Solving sequential decision making tasks with a non-trivial element of exploration requires either specifying carefully designed reward functions or relying on indiscriminate, novelty seeking exploration bonuses. Human supervisors can provide effective guidance in the loop to direct the exploration process, but prior methods to leverage this guidance require constant synchronous high-quality human feedback, which is expensive and impractical to obtain. In this work, we propose a technique - Human Guided Exploration (HUGE), that is able to leverage low-quality feedback from non-expert users, which is infrequent, asynchronous and noisy, to guide exploration for reinforcement learning, without requiring careful reward specification. The key idea is to separate the challenges of directed exploration and policy learning - human feedback is used to direct exploration, while self-supervised policy learning is used to independently learn unbiased behaviors from the collected data. We show that this procedure can leverage noisy, asynchronous human feedback to learn tasks with no hand-crafted reward design or exploration bonuses. We show that HUGE is able to learn a variety of challenging multi-stage robotic navigation and manipulation tasks in simulation using crowdsourced feedback from non-expert users. Moreover, this paradigm can be scaled to learning directly on real-world robots.", "keywords": "Learning from human preferences;self-supervised learning;exploration in reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/db8e1d223e573fc1f49485a430a2cab9605f4ac3.zip", "author": "Marcel Torne Villasevil;Max Balsells I Pamies;Zihan Wang;Samedh Desai;Tao Chen;Pulkit Agrawal;Abhishek Gupta", "authorids": "~Marcel_Torne_Villasevil1;~Max_Balsells_I_Pamies1;~Zihan_Wang14;~Samedh_Desai1;~Tao_Chen1;~Pulkit_Agrawal1;~Abhishek_Gupta1", "gender": "M;M;M;M;M;M;M", "homepage": "https://marceltorne.github.io;https://www.linkedin.com/in/max-balsells/;https://avinwangzh.github.io/;;https://taochenshh.github.io;https://people.eecs.berkeley.edu/~pulkitag/;https://homes.cs.washington.edu/~abhgupta/", "dblp": "352/5363;;;;;149/2672;18/6404-4", "google_scholar": "ITlelQ8AAAAJ;;;;gdUv1PIAAAAJ;UpZmJI0AAAAJ;1wLVDP4AAAAJ", "orcid": ";;;;;;", "linkedin": "marceltorne/;;;sdesai1287;;;", "or_profile": "~Marcel_Torne_Villasevil1;~Max_Balsells_I_Pamies1;~Zihan_Wang14;~Samedh_Desai1;~Tao_Chen1;~Pulkit_Agrawal1;~Abhishek_Gupta1", "aff": "Harvard University, Harvard University;Universidad Polit\u00e9cnica de Cataluna;University of Washington;University of Washington;Massachusetts Institute of Technology;Massachusetts Institute of Technology;University of Washington", "aff_domain": "g.harvard.edu;upc.edu;uw.edu;uw.edu;mit.edu;mit.edu;uw.edu", "position": "MS student;Undergrad student;PhD student;Undergrad student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nvillasevil2023breadcrumbs,\ntitle={Breadcrumbs to the Goal: Supervised Goal Selection from Human-in-the-Loop Feedback},\nauthor={Marcel Torne Villasevil and Max Balsells I Pamies and Zihan Wang and Samedh Desai and Tao Chen and Pulkit Agrawal and Abhishek Gupta},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uOEeui0rL7}\n}", "github": "", "project": "", "reviewers": "4hMa;MJKZ;erJy;EF3C", "pdf_size": 49423872, "rating": "5;6;7;7", "confidence": "4;4;4;3", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "135;231;143;104", "wc_strengths": "60;54;78;71", "wc_weaknesses": "351;208;95;131", "wc_questions": "11;160;3;122", "wc_limitations": "5;3;55;6", "wc_review": "562;656;374;434", "wc_reply_reviewers": "34;58;10;49", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 153.25, 47.19308741754453 ], "wc_strengths_avg": [ 65.75, 9.33742469849155 ], "wc_weaknesses_avg": [ 196.25, 98.22773284566838 ], "wc_questions_avg": [ 74.0, 68.39225102305085 ], "wc_limitations_avg": [ 17.25, 21.821720830401986 ], "wc_review_avg": [ 506.5, 109.82144599302998 ], "wc_reply_reviewers_avg": [ 37.75, 18.171062159378575 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10020623200778484119&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "g.harvard.edu;upc.edu;uw.edu;uw.edu;mit.edu;mit.edu;uw.edu", "author_num": 7, "aff_unique_index": "0;1;2;2;3;3;2", "aff_unique_norm": "Harvard University;Universitat Polit\u00e8cnica de Catalunya;University of Washington;Massachusetts Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.harvard.edu;https://www.upc.edu;https://www.washington.edu;https://web.mit.edu", "aff_unique_abbr": "Harvard;UPC;UW;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "United States;Spain" }, { "title": "Data Selection for Language Models via Importance Resampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70154", "id": "uPSQv0leAu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6b9aa8f418bde2840d5f4ab7a02f663b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uPSQv0leAu", "openreview": "https://openreview.net/forum?id=uPSQv0leAu", "poster": "/media/PosterPDFs/NeurIPS%202023/70154.png?t=1701377065.5253515", "slides": "https://nips.cc/virtual/2023/poster/70154", "video": "https://nips.cc/virtual/2023/poster/70154", "author_site": "Sang Michael Xie, Shibani Santurkar, Tengyu Ma, Percy Liang", "tldr": "", "abstract": "Selecting a suitable pretraining dataset is crucial for both general-domain (e.g., GPT-3) and domain-specific (e.g., Codex) language models (LMs). We formalize this problem as selecting a subset of a large raw unlabeled dataset to match a desired target distribution given unlabeled target samples. Due to the scale and dimensionality of the raw text data, existing methods use simple heuristics or require human experts to manually curate data. Instead, we extend the classic importance resampling approach used in low-dimensions for LM data selection. We propose Data Selection with Importance Resampling (DSIR), an efficient and scalable framework that estimates importance weights in a reduced feature space for tractability and selects data with importance resampling according to these weights. We instantiate the DSIR framework with hashed n-gram features for efficiency, enabling the selection of 100M documents from the full Pile dataset in 4.5 hours. To measure whether hashed n-gram features preserve the aspects of the data that are relevant to the target, we define KL reduction, a data metric that measures the proximity between the selected pretraining data and the target on some feature space. Across 8 data selection methods (including expert selection), KL reduction on hashed n-gram features highly correlates with average downstream accuracy (r=0.82). When selecting data for continued pretraining on a specific domain, DSIR performs comparably to expert curation across 8 target distributions. When pretraining general-domain models (target is Wikipedia and books), DSIR improves over random selection and heuristic filtering baselines by 2--2.5% on the GLUE benchmark.", "keywords": "Language models;pretraining;data selection;fine-tuning", "primary_area": "", "supplementary_material": "", "author": "Sang Michael Xie;Shibani Santurkar;Tengyu Ma;Percy Liang", "authorids": "~Sang_Michael_Xie1;~Shibani_Santurkar1;~Tengyu_Ma1;~Percy_Liang1", "gender": ";;M;", "homepage": "https://cs.stanford.edu/~eix/;https://shibanisanturkar.com/;http://ai.stanford.edu/~tengyuma/;https://cs.stanford.edu/~pliang/", "dblp": "220/3987;153/2146;54/9061;04/1701", "google_scholar": "EBNa5IEAAAAJ;QMkbFp8AAAAJ;i38QlUwAAAAJ;pouyVyUAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Sang_Michael_Xie1;~Shibani_Santurkar1;~Tengyu_Ma1;~Percy_Liang1", "aff": "Stanford University;Stanford University;Facebook AI Research;Stanford University", "aff_domain": "stanford.edu;stanford.edu;fb.com;stanford.edu", "position": "PhD student;Postdoc;Visiting Scientist;Associate Professor", "bibtex": "@inproceedings{\nxie2023data,\ntitle={Data Selection for Language Models via Importance Resampling},\nauthor={Sang Michael Xie and Shibani Santurkar and Tengyu Ma and Percy Liang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uPSQv0leAu}\n}", "github": "", "project": "", "reviewers": "NB9k;LSWj;LQ7V;znTv;rytZ", "pdf_size": 982600, "rating": "4;4;6;8;8", "confidence": "4;4;4;5;3", "soundness": "2;3;2;3;4", "novelty": "2;2;3;4;4", "presentation": "3;2;3;4;4", "wc_summary": "50;129;58;187;124", "wc_strengths": "35;51;40;160;101", "wc_weaknesses": "92;164;217;70;246", "wc_questions": "13;90;41;146;98", "wc_limitations": "1;6;6;1;13", "wc_review": "191;440;362;564;582", "wc_reply_reviewers": "94;37;67;44;10", "wc_reply_authors": "78;196;55;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;4;2;1;1", "rating_avg": [ 6.0, 1.7888543819998317 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 109.6, 50.575092684047554 ], "wc_strengths_avg": [ 77.4, 47.48304960720194 ], "wc_weaknesses_avg": [ 157.8, 68.35320036399173 ], "wc_questions_avg": [ 77.6, 46.39224073053596 ], "wc_limitations_avg": [ 5.4, 4.409081537009721 ], "wc_review_avg": [ 427.8, 143.4439263266312 ], "wc_reply_reviewers_avg": [ 50.4, 28.38732111348304 ], "wc_reply_authors_avg": [ 65.8, 71.93997497914494 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.2, 1.16619037896906 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 183, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10975119190373924882&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "stanford.edu;stanford.edu;fb.com;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Stanford University;Meta", "aff_unique_dep": ";Facebook AI Research", "aff_unique_url": "https://www.stanford.edu;https://research.facebook.com", "aff_unique_abbr": "Stanford;FAIR", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Logic for Expressing Log-Precision Transformers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70153", "id": "uR8TtWCIsr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a48e5877c7bf86a513950ab23b360498-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uR8TtWCIsr", "openreview": "https://openreview.net/forum?id=uR8TtWCIsr", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70153", "video": "https://nips.cc/virtual/2023/poster/70153", "author_site": "William Merrill, Ashish Sabharwal", "tldr": "", "abstract": "One way to interpret the reasoning power of transformer-based language models is to describe the types of logical rules they can resolve over some input text. Recently, Chiang et al. (2023) showed that finite-precision transformer classifiers can be equivalently expressed in a generalization of first-order logic. However, finite-precision transformers are a weak transformer variant because, as we show, a single head can only attend to a constant number of tokens and, in particular, cannot represent uniform attention. Since attending broadly is a core capability for transformers, we ask whether a minimally more expressive model that can attend universally can also be characterized in logic. To this end, we analyze transformers whose forward pass is computed in $\\log n$ precision on contexts of length $n$. We prove any log-precision transformer classifier can be equivalently expressed as a first-order logic sentence that, in addition to standard universal and existential quantifiers, may also contain majority-vote quantifiers. This is the tightest known upper bound and first logical characterization of log-precision transformers.", "keywords": "transformers;logic;reasoning;circuit complexity;mechanistic interpretability", "primary_area": "", "supplementary_material": "/attachment/eb8778046567ec7385cbfca32ae7201e28adc86c.pdf", "author": "William Merrill;Ashish Sabharwal", "authorids": "~William_Merrill1;~Ashish_Sabharwal1", "gender": "M;M", "homepage": "http://lambdaviking.com;", "dblp": "19/3512;13/154", "google_scholar": "CyjChJQAAAAJ;7VspfeAAAAAJ", "orcid": ";", "linkedin": "william-merrill-15ab0743/;ashish-sabharwal-82a2b661", "or_profile": "~William_Merrill1;~Ashish_Sabharwal1", "aff": "New York University;Allen Institute for AI", "aff_domain": "nyu.edu;allenai.org", "position": "Graduate student;Principal Researcher", "bibtex": "@inproceedings{\nmerrill2023a,\ntitle={A Logic for Expressing Log-Precision Transformers},\nauthor={William Merrill and Ashish Sabharwal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uR8TtWCIsr}\n}", "github": "", "project": "", "reviewers": "ihd9;NeeV;kdtb;27Gc", "pdf_size": 739134, "rating": "7;7;7;7", "confidence": "4;4;3;3", "soundness": "3;4;4;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "92;220;46;81", "wc_strengths": "73;94;49;128", "wc_weaknesses": "77;285;9;116", "wc_questions": "58;1;45;46", "wc_limitations": "35;22;7;32", "wc_review": "335;622;156;403", "wc_reply_reviewers": "10;90;0;57", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 109.75, 65.88009942311866 ], "wc_strengths_avg": [ 86.0, 29.008619408720573 ], "wc_weaknesses_avg": [ 121.75, 101.7334138815758 ], "wc_questions_avg": [ 37.5, 21.68524844220144 ], "wc_limitations_avg": [ 24.0, 10.931605554537724 ], "wc_review_avg": [ 379.0, 166.80077937467797 ], "wc_reply_reviewers_avg": [ 39.25, 36.355020286062285 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6745859656021992812&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "nyu.edu;allenai.org", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "New York University;Allen Institute for AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://allenai.org", "aff_unique_abbr": "NYU;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Sampling weights of deep neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70152", "id": "uRHpgo6TMR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c7201deff8d507a8fe2e86d34094e154-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uRHpgo6TMR", "openreview": "https://openreview.net/forum?id=uRHpgo6TMR", "poster": "/media/PosterPDFs/NeurIPS%202023/70152.png?t=1699867909.0990872", "slides": "https://nips.cc/virtual/2023/poster/70152", "video": "https://nips.cc/virtual/2023/poster/70152", "author_site": "Erik L Bolager, Iryna Burak, Chinmay Datar, Qing Sun, Felix Dietrich", "tldr": "", "abstract": "We introduce a probability distribution, combined with an efficient sampling algorithm, for weights and biases of fully-connected neural networks. In a supervised learning context, no iterative optimization or gradient computations of internal network parameters are needed to obtain a trained network. The sampling is based on the idea of random feature models. However, instead of a data-agnostic distribution, e.g., a normal distribution, we use both the input and the output training data to sample shallow and deep networks. We prove that sampled networks are universal approximators. For Barron functions, we show that the $L^2$-approximation error of sampled shallow networks decreases with the square root of the number of neurons. Our sampling scheme is invariant to rigid body transformations and scaling of the input data, which implies many popular pre-processing techniques are not required. In numerical experiments, we demonstrate that sampled networks achieve accuracy comparable to iteratively trained ones, but can be constructed orders of magnitude faster. Our test cases involve a classification benchmark from OpenML, sampling of neural operators to represent maps in function spaces, and transfer learning using well-known architectures.", "keywords": "random sampling;neural network parameters;iterative optimization", "primary_area": "", "supplementary_material": "", "author": "Erik Lien Bolager;Iryna Burak;Chinmay Datar;Qing Sun;Felix Dietrich", "authorids": "erik.bolager@tum.de;iryna.burak@tum.de;chinmay.datar@tum.de;qing.sun@tum.de;~Felix_Dietrich1", "gender": ";;;;M", "homepage": ";;;;https://www.fd-research.com/", "dblp": ";;;;145/1097", "google_scholar": ";;;;https://scholar.google.de/citations?user=-RbM93sAAAAJ", "orcid": ";;;;0000-0002-2906-1769", "linkedin": ";;;;", "or_profile": "erik.bolager@tum.de;iryna.burak@tum.de;chinmay.datar@tum.de;qing.sun@tum.de;~Felix_Dietrich1", "aff": ";;;;Technische Universit\u00e4t M\u00fcnchen", "aff_domain": ";;;;tum.de", "position": ";;;;Postdoc", "bibtex": "@inproceedings{\nbolager2023sampling,\ntitle={Sampling weights of deep neural networks},\nauthor={Erik Lien Bolager and Iryna Burak and Chinmay Datar and Qing Sun and Felix Dietrich},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uRHpgo6TMR}\n}", "github": "", "project": "", "reviewers": "ReAz;b7pE;KCHD;5eGE;jKG3;LFwn", "pdf_size": 1185388, "rating": "4;6;6;6;6;6", "confidence": "3;3;2;3;4;1", "soundness": "2;3;3;3;3;2", "novelty": "2;3;3;3;3;2", "presentation": "2;3;2;4;3;1", "wc_summary": "26;83;65;131;109;112", "wc_strengths": "104;24;53;65;126;94", "wc_weaknesses": "212;89;412;93;67;103", "wc_questions": "6;128;89;91;151;43", "wc_limitations": "1;24;29;41;12;20", "wc_review": "349;348;648;421;465;372", "wc_reply_reviewers": "0;66;122;55;20;73", "wc_reply_authors": "0;91;36;37;0;4", "reply_reviewers": "0;1;1;1;1;1", "reply_authors": "1;2;2;2;1;2", "rating_avg": [ 5.666666666666667, 0.7453559924999298 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.5, 0.9574271077563381 ], "wc_summary_avg": [ 87.66666666666667, 34.793038895094455 ], "wc_strengths_avg": [ 77.66666666666667, 34.022868126534476 ], "wc_weaknesses_avg": [ 162.66666666666666, 120.82586183797278 ], "wc_questions_avg": [ 84.66666666666667, 48.75334746341926 ], "wc_limitations_avg": [ 21.166666666666668, 12.615026313444174 ], "wc_review_avg": [ 433.8333333333333, 104.39574810413603 ], "wc_reply_reviewers_avg": [ 56.0, 39.11095328250983 ], "wc_reply_authors_avg": [ 28.0, 32.29551052391029 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.372677996249965 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.15811388300841903, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12114723296796880347&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 8, "email": ";;;;tum.de", "author_num": 5, "aff_unique_index": "0", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen", "aff_unique_dep": "", "aff_unique_url": "https://www.tum.de", "aff_unique_abbr": "TUM", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "Self-Supervised Reinforcement Learning that Transfers using Random Features", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70151", "id": "uRewSnLJAa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b048dd19ba6d85b9066aa93b4de9ad4a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uRewSnLJAa", "openreview": "https://openreview.net/forum?id=uRewSnLJAa", "poster": "/media/PosterPDFs/NeurIPS%202023/70151.png?t=1702186302.401397", "slides": "https://nips.cc/virtual/2023/poster/70151", "video": "https://nips.cc/virtual/2023/poster/70151", "author_site": "Boyuan Chen, Chuning Zhu, Pulkit Agrawal, Kaiqing Zhang, Abhishek Gupta", "tldr": "", "abstract": "Model-free reinforcement learning algorithms have exhibited great potential in solving single-task sequential decision-making problems with high-dimensional observations and long horizons, but are known to be hard to generalize across tasks. Model-based RL, on the other hand, learns task-agnostic models of the world that naturally enables transfer across different reward functions, but struggles to scale to complex environments due to the compounding error. To get the best of both worlds, we propose a self-supervised reinforcement learning method that enables the transfer of behaviors across tasks with different rewards, while circumventing the challenges of model-based RL. In particular, we show self-supervised pre-training of model-free reinforcement learning with a number of random features as rewards allows implicit modeling of long-horizon environment dynamics. Then, planning techniques like model-predictive control using these implicit models enable fast adaptation to problems with new reward functions. Our method is self-supervised in that it can be trained on offline datasets without reward labels, but can then be quickly deployed on new tasks. We validate that our proposed method enables transfer across tasks on a variety of manipulation and locomotion domains in simulation, opening the door to generalist decision-making agents.", "keywords": "deep reinforcement learning;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/5ff5406f3501b71797ac9ae0173ec21a4de4ee06.zip", "author": "Boyuan Chen;Chuning Zhu;Pulkit Agrawal;Kaiqing Zhang;Abhishek Gupta", "authorids": "~Boyuan_Chen2;~Chuning_Zhu1;~Pulkit_Agrawal1;~Kaiqing_Zhang3;~Abhishek_Gupta1", "gender": "M;M;M;M;M", "homepage": "https://boyuan.space/;https://homes.cs.washington.edu/~zchuning/;https://people.eecs.berkeley.edu/~pulkitag/;https://homes.cs.washington.edu/~abhgupta/;https://kzhang66.github.io/", "dblp": "193/7174-3.html;295/9468;149/2672;18/6404-4;", "google_scholar": "rEL4-fgAAAAJ;;UpZmJI0AAAAJ;1wLVDP4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0009-1960-9135;;;;", "linkedin": "boyuan99/;chuning-zhu-39b086167/;;;", "or_profile": "~Boyuan_Chen2;~Chuning_Zhu1;~Pulkit_Agrawal1;~Abhishek_Gupta1;~kaiqing_zhang1", "aff": "Massachusetts Institute of Technology;University of Washington;Massachusetts Institute of Technology;University of Washington;University of Maryland, College Park", "aff_domain": "mit.edu;cs.washington.edu;mit.edu;uw.edu;umd.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2023selfsupervised,\ntitle={Self-Supervised Reinforcement Learning that Transfers using Random Features},\nauthor={Boyuan Chen and Chuning Zhu and Pulkit Agrawal and Kaiqing Zhang and Abhishek Gupta},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uRewSnLJAa}\n}", "github": "", "project": "", "reviewers": "W1dV;8qBK;BJ94;1b4X;gqpD", "pdf_size": 2884289, "rating": "4;5;5;7;7", "confidence": "4;4;4;4;2", "soundness": "3;3;2;4;3", "novelty": "3;3;3;3;2", "presentation": "2;2;3;3;4", "wc_summary": "79;95;88;150;41", "wc_strengths": "32;44;67;83;39", "wc_weaknesses": "85;138;322;571;145", "wc_questions": "8;75;170;1;125", "wc_limitations": "4;5;35;22;1", "wc_review": "208;357;682;827;351", "wc_reply_reviewers": "0;31;98;231;20", "wc_reply_authors": "23;23;19;23;23", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.6, 1.2 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 90.6, 35.069074695520555 ], "wc_strengths_avg": [ 53.0, 19.0473095212946 ], "wc_weaknesses_avg": [ 252.2, 178.3226289622268 ], "wc_questions_avg": [ 75.8, 65.5542523410953 ], "wc_limitations_avg": [ 13.4, 13.062924634246345 ], "wc_review_avg": [ 485.0, 231.01601676074324 ], "wc_reply_reviewers_avg": [ 76.0, 84.19738713285585 ], "wc_reply_authors_avg": [ 22.2, 1.5999999999999999 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5833333333333333, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15095277649046318405&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "mit.edu;cs.washington.edu;mit.edu;uw.edu;umd.edu", "author_num": 5, "aff_unique_index": "0;1;0;1;2", "aff_unique_norm": "Massachusetts Institute of Technology;University of Washington;University of Maryland", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.washington.edu;https://www/umd.edu", "aff_unique_abbr": "MIT;UW;UMD", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Alternating Gradient Descent and Mixture-of-Experts for Integrated Multimodal Perception", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70150", "id": "uTlKUAm68H", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fa384d5f9e85380833d523766af5941c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uTlKUAm68H", "openreview": "https://openreview.net/forum?id=uTlKUAm68H", "poster": "/media/PosterPDFs/NeurIPS%202023/70150.png?t=1702159722.372366", "slides": "https://nips.cc/virtual/2023/poster/70150", "video": "https://nips.cc/virtual/2023/poster/70150", "author_site": "Hassan Akbari, Dan Kondratyuk, Yin Cui, Rachel Hornung, Huisheng Wang, Hartwig Adam", "tldr": "", "abstract": "We present Integrated Multimodal Perception (IMP), a simple and scalable multimodal multi-task training and modeling approach. IMP integrates multimodal inputs including image, video, text, and audio into a single Transformer encoder with minimal modality-specific components. IMP makes use of a novel design that combines Alternating Gradient Descent (AGD) and Mixture-of-Experts (MoE) for efficient model & task scaling. We conduct extensive empirical studies and reveal the following key insights:\n 1) performing gradient descent updates by alternating on diverse modalities, loss functions, and tasks, with varying input resolutions, efficiently improves the model.\n 2) sparsification with MoE on a single modality-agnostic encoder substantially improves the performance, outperforming dense models that use modality-specific encoders or additional fusion layers and greatly mitigating the conflicts between modalities. \nIMP achieves competitive performance on a wide range of downstream tasks including video classification, image classification, image-text, and video-text retrieval. Most notably, we train a sparse IMP-MoE-L focusing on video tasks that achieves new state-of-the-art in zero-shot video classification: 77.0% on Kinetics-400, 76.8% on Kinetics-600, and 68.3% on Kinetics-700, improving the previous state-of-the-art by +5%, +6.7%, and +5.8%, respectively, while using only 15% of their total training computational cost.", "keywords": "Alternating Gradient Descent;Multimodal;Mixture of Experts;AGD;MoE;Deep Learning;Optimization", "primary_area": "", "supplementary_material": "/attachment/fddf7fa0eaadfc0c848aad1441f760591d20ea20.pdf", "author": "Hassan Akbari;Dan Kondratyuk;Yin Cui;Rachel Hornung;Huisheng Wang;Hartwig Adam", "authorids": "~Hassan_Akbari1;~Dan_Kondratyuk1;~Yin_Cui1;~Rachel_Hornung1;~Huisheng_Wang1;~Hartwig_Adam1", "gender": "M;M;M;;;He/him", "homepage": "https://hassanakbari.com;https://dankondratyuk.com;https://ycui.me/;;;https://research.google/people/author37870/", "dblp": "207/8265;;47/8023.html;;;75/948", "google_scholar": "https://scholar.google.com;hl1fJgIAAAAJ;iP5m52IAAAAJ;;4evU9_YAAAAJ;fWd88tEAAAAJ", "orcid": ";0000-0002-7670-7243;0000-0003-2882-2033;;;0000-0003-1258-4341", "linkedin": "hassan-akbari-48a1b270/;dankondratyuk;;;;hartwig-adam-1873392/", "or_profile": "~Hassan_Akbari1;~Dan_Kondratyuk1;~Yin_Cui1;~Rachel_Hornung1;~Huisheng_Wang1;~Hartwig_Adam1", "aff": "Google;Google;Google;;Google;Google Research", "aff_domain": "google.com;google.com;google.com;;google.com;google.com", "position": "Research Scientist;Researcher;Research Scientist;;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nakbari2023alternating,\ntitle={Alternating Gradient Descent and Mixture-of-Experts for Integrated Multimodal Perception},\nauthor={Hassan Akbari and Dan Kondratyuk and Yin Cui and Rachel Hornung and Huisheng Wang and Hartwig Adam},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uTlKUAm68H}\n}", "github": "", "project": "", "reviewers": "tAnb;SjW7;SrPj;tYu8", "pdf_size": 671517, "rating": "3;5;5;5", "confidence": "4;3;4;4", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "147;51;75;46", "wc_strengths": "91;30;46;34", "wc_weaknesses": "181;56;100;109", "wc_questions": "47;83;259;30", "wc_limitations": "1;1;47;8", "wc_review": "467;221;527;227", "wc_reply_reviewers": "127;13;139;17", "wc_reply_authors": "230;0;36;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.75, 40.34460930533347 ], "wc_strengths_avg": [ 50.25, 24.252577182641847 ], "wc_weaknesses_avg": [ 111.5, 44.85810963471376 ], "wc_questions_avg": [ 104.75, 91.08889888455124 ], "wc_limitations_avg": [ 14.25, 19.122957407263137 ], "wc_review_avg": [ 360.5, 138.15480447671735 ], "wc_reply_reviewers_avg": [ 74.0, 59.16924876994806 ], "wc_reply_authors_avg": [ 66.5, 95.53402535222726 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13944505113934074584&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "google.com;google.com;google.com;;google.com;google.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Complementary Benefits of Contrastive Learning and Self-Training Under Distribution Shift", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70149", "id": "uWGH6jDTVv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/26f96550613971371c5d07f37f0e06c0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uWGH6jDTVv", "openreview": "https://openreview.net/forum?id=uWGH6jDTVv", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70149", "video": "https://nips.cc/virtual/2023/poster/70149", "author_site": "Saurabh Garg, Amrith Setlur, Zachary Lipton, Sivaraman Balakrishnan, Virginia Smith, Aditi Raghunathan", "tldr": "", "abstract": "Self-training and contrastive learning have emerged as leading techniques for incorporating unlabeled data, both under distribution shift (unsupervised domain adaptation) and when it is absent (semi-supervised learning). However, despite the popularity and compatibility of these techniques, their efficacy in combination remains surprisingly unexplored. In this paper, we first undertake a systematic empirical investigation of this combination, finding (i) that in domain adaptation settings, self-training and contrastive learning offer significant complementary gains; and (ii) that in semi-supervised learning settings, surprisingly, the benefits are not synergistic. Across eight distribution shift datasets (e.g., BREEDs, WILDS), we demonstrate that the combined method obtains 3--8\\% higher accuracy than either approach independently. Finally, we theoretically analyze these techniques in a simplified model of distribution shift demonstrating scenarios under which the features produced by contrastive learning can yield a good initialization for self-training to further amplify gains and achieve optimal performance, even when either method alone would fail.", "keywords": "contrastive learning; self training; distribution shift; semi supervised learning; unsupervised domain adaptation", "primary_area": "", "supplementary_material": "", "author": "Saurabh Garg;Amrith Setlur;Zachary Chase Lipton;Sivaraman Balakrishnan;Virginia Smith;Aditi Raghunathan", "authorids": "~Saurabh_Garg3;~Amrith_Setlur1;~Zachary_Chase_Lipton1;~Sivaraman_Balakrishnan1;~Virginia_Smith1;~Aditi_Raghunathan1", "gender": "M;M;Unspecified;M;F;F", "homepage": "http://saurabhgarg1996.github.io/;http://ars22.github.io;http://zacklipton.com;http://www.stat.cmu.edu/~siva/;;https://www.cs.cmu.edu/~aditirag/", "dblp": "80/208;https://dblp.uni-trier.de/pers/hd/s/Setlur:Amrith;;52/10671;120/0921;166/1409", "google_scholar": "SAnJ1hIAAAAJ;https://scholar.google.ru/citations?user=i7V1kJgAAAAJ;MN9Kfg8AAAAJ;o7yFQXUAAAAJ;;Ch9iRwQAAAAJ", "orcid": ";0000-0002-7061-3094;;;;", "linkedin": "saurabh-garg-b680b5b8/;;;;;", "or_profile": "~Saurabh_Garg3;~Amrith_Setlur1;~Zachary_Chase_Lipton1;~Sivaraman_Balakrishnan1;~Virginia_Smith1;~Aditi_Raghunathan1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ngarg2023complementary,\ntitle={Complementary Benefits of Contrastive Learning and Self-Training Under Distribution Shift},\nauthor={Saurabh Garg and Amrith Setlur and Zachary Chase Lipton and Sivaraman Balakrishnan and Virginia Smith and Aditi Raghunathan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uWGH6jDTVv}\n}", "github": "", "project": "", "reviewers": "C2y1;Srqn;fFuY;272G", "pdf_size": 1228263, "rating": "6;6;7;7", "confidence": "3;3;4;4", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "33;72;246;136", "wc_strengths": "43;62;140;171", "wc_weaknesses": "165;116;132;263", "wc_questions": "19;16;108;2", "wc_limitations": "17;13;3;2", "wc_review": "277;279;629;574", "wc_reply_reviewers": "24;16;29;77", "wc_reply_authors": "0;0;0;141", "reply_reviewers": "1;1;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 121.75, 80.61133605145122 ], "wc_strengths_avg": [ 104.0, 53.08012810836085 ], "wc_weaknesses_avg": [ 169.0, 57.074512700504066 ], "wc_questions_avg": [ 36.25, 41.91882035553959 ], "wc_limitations_avg": [ 8.75, 6.417748826496718 ], "wc_review_avg": [ 439.75, 162.91619778278647 ], "wc_reply_reviewers_avg": [ 36.5, 23.837994882120434 ], "wc_reply_authors_avg": [ 35.25, 61.054790966802926 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1621676519636407134&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cmu.edu;cmu.edu;cmu.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Neural Implicit through Volume Rendering with Attentive Depth Fusion Priors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70148", "id": "uWNqy09dFW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/68637ee6b30276f900bc67320466b69f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uWNqy09dFW", "openreview": "https://openreview.net/forum?id=uWNqy09dFW", "poster": "/media/PosterPDFs/NeurIPS%202023/70148.png?t=1702194514.8368156", "slides": "https://nips.cc/virtual/2023/poster/70148", "video": "https://nips.cc/virtual/2023/poster/70148", "author_site": "Pengchong Hu, Zhizhong Han", "tldr": "", "abstract": "Learning neural implicit representations has achieved remarkable performance in 3D reconstruction from multi-view images. Current methods use volume rendering to render implicit representations into either RGB or depth images that are supervised by the multi-view ground truth. However, rendering a view each time suffers from incomplete depth at holes and unawareness of occluded structures from the depth supervision, which severely affects the accuracy of geometry inference via volume rendering. To resolve this issue, we propose to learn neural implicit representations from multi-view RGBD images through volume rendering with an attentive depth fusion prior. Our prior allows neural networks to sense coarse 3D structures from the Truncated Signed Distance Function (TSDF) fused from all available depth images for rendering. The TSDF enables accessing the missing depth at holes on one depth image and the occluded parts that are invisible from the current view. By introducing a novel attention mechanism, we allow neural networks to directly use the depth fusion prior with the inferred occupancy as the learned implicit function. Our attention mechanism works with either a one-time fused TSDF that represents a whole scene or an incrementally fused TSDF that represents a partial scene in the context of Simultaneous Localization and Mapping (SLAM). Our evaluations on widely used benchmarks including synthetic and real-world scans show our superiority over the latest neural implicit methods.", "keywords": "3D Reconstruction;SDF;Neural Rendering;Implicit Representations;SLAM", "primary_area": "", "supplementary_material": "/attachment/4b372ef937fc52c8725686cad92a7a71112cf396.pdf", "author": "Pengchong Hu;Zhizhong Han", "authorids": "~Pengchong_Hu1;~Zhizhong_Han2", "gender": "M;M", "homepage": "https://github.com/pengchongH;https://h312h.github.io/", "dblp": "309/1991;166/5173", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Pengchong_Hu1;~Zhizhong_Han2", "aff": "Wayne State University;Wayne State University", "aff_domain": "wayne.edu;wayne.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhu2023learning,\ntitle={Learning Neural Implicit through Volume Rendering with Attentive Depth Fusion Priors},\nauthor={Pengchong Hu and Zhizhong Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uWNqy09dFW}\n}", "github": "", "project": "", "reviewers": "sZZA;QXhf;3uRZ;X87y", "pdf_size": 22385235, "rating": "5;5;6;7", "confidence": "5;4;5;4", "soundness": "2;3;4;3", "novelty": "1;2;4;3", "presentation": "2;3;4;3", "wc_summary": "467;59;133;86", "wc_strengths": "75;88;248;184", "wc_weaknesses": "672;183;95;337", "wc_questions": "310;63;115;24", "wc_limitations": "196;32;1;9", "wc_review": "1720;425;592;640", "wc_reply_reviewers": "836;23;40;575", "wc_reply_authors": "803;18;30;675", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;4", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 186.25, 164.23972570605443 ], "wc_strengths_avg": [ 148.75, 71.1033578672625 ], "wc_weaknesses_avg": [ 321.75, 219.98565294127707 ], "wc_questions_avg": [ 128.0, 109.92497441437045 ], "wc_limitations_avg": [ 59.5, 79.62568681022475 ], "wc_review_avg": [ 844.25, 511.87321428259946 ], "wc_reply_reviewers_avg": [ 368.5, 349.4570789095565 ], "wc_reply_authors_avg": [ 381.5, 360.3779266270341 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=671227094076009653&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "wayne.edu;wayne.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Wayne State University", "aff_unique_dep": "", "aff_unique_url": "https://wayne.edu", "aff_unique_abbr": "WSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Amazon-M2: A Multilingual Multi-locale Shopping Session Dataset for Recommendation and Text Generation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73435", "id": "uXBO47JcJT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/193df57a2366d032fb18dcac0698d09a-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=uXBO47JcJT", "openreview": "https://openreview.net/forum?id=uXBO47JcJT", "poster": "/media/PosterPDFs/NeurIPS%202023/73435.png?t=1699921948.3531067", "slides": "https://nips.cc/virtual/2023/poster/73435", "video": "https://nips.cc/virtual/2023/poster/73435", "author_site": "Wei Jin, Haitao Mao, Zheng Li, Haoming Jiang, Chen Luo, Hongzhi Wen, Haoyu Han, Hanqing Lu, Zhengyang Wang, Ruirui Li, Zhen Li, Monica Cheng, Rahul Goutam, Haiyang Zhang, Karthik Subbian, Suhang Wang, Yizhou Sun, Jiliang Tang, Bing Yin, Xianfeng Tang", "tldr": "", "abstract": "Modeling customer shopping intentions is a crucial task for e-commerce, as it directly impacts user experience and engagement. Thus, accurately understanding customer preferences is essential for providing personalized recommendations. Session-based recommendation, which utilizes customer session data to predict their next interaction, has become increasingly popular. \nHowever, existing session datasets have limitations in terms of item attributes, user diversity, and dataset scale. As a result, they cannot comprehensively capture the spectrum of user behaviors and preferences.\nTo bridge this gap, we present the Amazon Multilingual Multi-locale Shopping Session Dataset, namely Amazon-M2. It is the first multilingual dataset consisting of millions of user sessions from six different locales, where the major languages of products are English, German, Japanese, French, Italian, and Spanish.\nRemarkably, the dataset can help us enhance personalization and understanding of user preferences, which can benefit various existing tasks as well as enable new tasks. To test the potential of the dataset, we introduce three tasks in this work:\n(1) next-product recommendation, (2) next-product recommendation with domain shifts, and (3) next-product title generation.\nWith the above tasks, we benchmark a range of algorithms on our proposed dataset, drawing new insights for further research and practice. \nIn addition, based on the proposed dataset and tasks, we hosted a competition in the KDD CUP 2023 https://www.aicrowd.com/challenges/amazon-kdd-cup-23-multilingual-recommendation-challenge and have attracted thousands of users and submissions. The winning solutions and the associated workshop can be accessed at our website~https://kddcup23.github.io/.", "keywords": "session based recommendation;multilingual language models;recommender system;pre-training & fine-tuning;transfer learning;text generation", "primary_area": "", "supplementary_material": "/attachment/03b7225a4f9cb4997b357c910f274d86dcb540b8.pdf", "author": "Wei Jin;Haitao Mao;Zheng Li;Haoming Jiang;Chen Luo;Hongzhi Wen;Haoyu Han;Hanqing Lu;Zhengyang Wang;Ruirui Li;Zhen Li;Monica Xiao Cheng;Rahul Goutam;Haiyang Zhang;Karthik Subbian;Suhang Wang;Yizhou Sun;Jiliang Tang;Bing Yin;Xianfeng Tang", "authorids": "~Wei_Jin4;~Haitao_Mao1;~Zheng_Li9;~Haoming_Jiang1;~Chen_Luo3;~Hongzhi_Wen1;~Haoyu_Han1;~Hanqing_Lu3;~Zhengyang_Wang1;~Ruirui_Li3;~Zhen_Li16;~Monica_Xiao_Cheng1;~Rahul_Goutam1;~Haiyang_Zhang4;~Karthik_Subbian1;~Suhang_Wang1;~Yizhou_Sun1;~Jiliang_Tang1;~Bing_Yin1;~Xianfeng_Tang1", "gender": ";M;M;M;M;M;M;M;M;F;;M;M;M;F;M;M;M;M;M", "homepage": "http://www.cs.emory.edu/~wjin30/;https://hmjianggatech.github.io;https://chen-luo.com/;https://www.cse.msu.edu/~wenhongz/;https://cse.msu.edu/~hanhaoy1/;;;https://ruiruili.mystrikingly.com/;;;;;http://mailtosuka.googlepages.com;https://faculty.ist.psu.edu/szw494/;http://web.cs.ucla.edu/~yzsun/;https://www.cse.msu.edu/~tangjili/;;https://xta.ng/;;https://hsqmlzno1.github.io/", "dblp": "66/2173-9;230/3684;46/4719-3.html;179/0477;257/5633-1;39/6752;;12/8221-2;;;;;32/5843;136/9440;37/3868;64/10812;;33/7694;;10/1143-18", "google_scholar": "eWow24EAAAAJ;XaFhuG8AAAAJ;4EoNAFcAAAAJ;;;pNYuJQIAAAAJ;A4fNBtEAAAAJ;gYCtd6cAAAAJ;;X3BPCpwAAAAJ;;;;cdT_WMMAAAAJ;https://scholar.google.com.tw/citations?user=TQgOjK0AAAAJ;WtzKMWAAAAAJ;qSOxydEAAAAJ;u1PEv-QAAAAJ;3GmlKM4AAAAJ;https://scholar.google.com.hk/citations?user=P6fwn4AAAAAJ", "orcid": ";;0000-0001-5339-5817;0000-0003-0775-8538;0000-0002-2529-6042;;0000-0002-5146-2884;;;0000-0002-1140-687X;;;;0000-0003-3448-4878;;0000-0001-7125-3898;0000-0002-5890-0031;;;", "linkedin": ";;chen-luo-a7a45b84/;;;;;;zhenlimathstat/;monica-c-214a798/;rahul-goutam;haiyang-z-63b3b835;;;;;bingyin;xianfengtang/;;", "or_profile": "~Wei_Jin4;~Haoming_Jiang1;~Chen_Luo3;~Hongzhi_Wen1;~Haoyu_Han1;~Hanqing_Lu3;~Zhengyang_Wang1;~Ruirui_Li3;~Zhen_Li16;~Monica_Xiao_Cheng1;~Rahul_Goutam1;~Haiyang_Zhang4;~Karthik_Subbian1;~Suhang_Wang1;~Yizhou_Sun1;~Jiliang_Tang1;~Bing_Yin1;~Xianfeng_Tang1;~Mao_Haitao1;~zheng_li4", "aff": "Michigan State University;Amazon;Amazon;Michigan State University;Michigan State University;Amazon;Amazon;Amazon;Amazon;Amazon;;Amazon;Amazon;Pennsylvania State University;University of California, Los Angeles;Michigan State University;Amazon;Amazon;Michigan State University;Amazon", "aff_domain": "msu.edu;amazon.com;amazon.com;msu.edu;msu.edu;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;;amazon.com;amazon.com;psu.edu;ucla.edu;msu.edu;amazon.com;amazon.com;msu.edu;amazon.com", "position": "PhD student;Principal Researcher;Researcher;PhD student;PhD student;Researcher;Researcher;Researcher;Researcher;Researcher;;Principal Researcher;Researcher;Assistant Professor;Associate Professor;Full Professor;Senior Science Manager;Researcher;PhD student;Researcher", "bibtex": "@inproceedings{\njin2023amazonm,\ntitle={Amazon-M2: A Multilingual Multi-locale Shopping Session Dataset for Recommendation and Text Generation},\nauthor={Wei Jin and Haitao Mao and Zheng Li and Haoming Jiang and Chen Luo and Hongzhi Wen and Haoyu Han and Hanqing Lu and Zhengyang Wang and Ruirui Li and Zhen Li and Monica Xiao Cheng and Rahul Goutam and Haiyang Zhang and Karthik Subbian and Suhang Wang and Yizhou Sun and Jiliang Tang and Bing Yin and Xianfeng Tang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=uXBO47JcJT}\n}", "github": "", "project": "", "reviewers": "vaN1;P6fP;7DAG;hA57", "pdf_size": 833629, "rating": "7;7;7;7", "confidence": "3;4;4;4", "wc_summary_and_contributions": "119;114;40;79", "wc_strengths": "62;212;49;100", "wc_improvement": "38;71;120;50", "wc_limitations": "1;18;5;4", "wc_correctness": "1;1;2;1", "wc_clarity": "1;1;8;1", "wc_relation_to_prior_work": "1;1;14;1", "wc_documentation": "1;1;13;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "225;420;252;238", "wc_reply_reviewers": "10;11;22;0", "wc_reply_authors": "134;549;976;605", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;3;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 88.0, 31.709619991415853 ], "wc_strengths_avg": [ 105.75, 64.1419324623136 ], "wc_improvement_avg": [ 69.75, 31.32391259086259 ], "wc_limitations_avg": [ 7.0, 6.519202405202649 ], "wc_correctness_avg": [ 1.25, 0.4330127018922193 ], "wc_clarity_avg": [ 2.75, 3.031088913245535 ], "wc_relation_to_prior_work_avg": [ 4.25, 5.629165124598851 ], "wc_documentation_avg": [ 4.0, 5.196152422706632 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 283.75, 79.24132444627614 ], "wc_reply_reviewers_avg": [ 10.75, 7.790218225441442 ], "wc_reply_authors_avg": [ 566.0, 298.55234046980775 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 20, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6907450846261910178&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "msu.edu;amazon.com;amazon.com;msu.edu;msu.edu;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;;amazon.com;amazon.com;psu.edu;ucla.edu;msu.edu;amazon.com;amazon.com;msu.edu;amazon.com", "author_num": 20, "aff_unique_index": "0;1;1;0;0;1;1;1;1;1;1;1;2;3;0;1;1;0;1", "aff_unique_norm": "Michigan State University;Amazon;Pennsylvania State University;University of California, Los Angeles", "aff_unique_dep": ";Amazon.com, Inc.;;", "aff_unique_url": "https://www.msu.edu;https://www.amazon.com;https://www.psu.edu;https://www.ucla.edu", "aff_unique_abbr": "MSU;Amazon;PSU;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "uY4rqdLls9", "title": "Dual control variate for faster black-box variational inference", "track": "main", "status": "Reject", "tldr": "", "abstract": "Black-box variational inference is a widely-used framework for Bayesian posterior inference, but in some cases suffers from high variance in gradient estimates, harming accuracy and efficiency. This variance comes from two sources of randomness: Data subsampling and Monte Carlo sampling. Whereas existing control variates only address Monte Carlo noise and incremental gradient methods typically only address data subsampling, we propose a new \"dual\" control variate capable of jointly reducing variance from both sources of noise. We confirm that this leads to reduced variance and improved optimization in several real-world applications.", "keywords": "approximate inference;variational inference", "primary_area": "", "supplementary_material": "/attachment/990c270b719f97f7656351a37f09734a507da4c8.zip", "author": "Xi Wang;Tomas Geffner;Justin Domke", "authorids": "~Xi_Wang4;~Tomas_Geffner1;~Justin_Domke1", "gender": "M;M;Unspecified", "homepage": ";https://people.umass.edu/tgeffner/;https://people.cs.umass.edu/~domke/", "dblp": ";201/5406;39/5186", "google_scholar": "giztudUAAAAJ;KIIe2K8AAAAJ;", "orcid": ";;", "linkedin": "wang-xi-660a47153/;tomasgeffner/;", "or_profile": "~Xi_Wang4;~Tomas_Geffner1;~Justin_Domke1", "aff": "University of Massachusetts, Amherst;Department of Computer Science, University of Massachusetts, Amherst;University of Massachusetts at Amherst", "aff_domain": "umass.edu;cs.umass.edu;umass.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@misc{\nwang2023dual,\ntitle={Dual control variate for faster black-box variational inference},\nauthor={Xi Wang and Tomas Geffner and Justin Domke},\nyear={2023},\nurl={https://openreview.net/forum?id=uY4rqdLls9}\n}", "github": "", "project": "", "reviewers": "UG9s;11Uw;xUai;NBCj;SSK5", "site": "https://openreview.net/forum?id=uY4rqdLls9", "pdf_size": 882592, "rating": "5;5;6;7;7", "confidence": "4;1;3;5;4", "soundness": "3;3;3;4;3", "novelty": "3;2;3;3;3", "presentation": "3;2;2;3;2", "wc_summary": "94;56;48;151;58", "wc_strengths": "72;72;97;108;121", "wc_weaknesses": "126;97;67;226;86", "wc_questions": "19;101;75;362;648", "wc_limitations": "10;4;18;29;1", "wc_review": "321;330;305;876;914", "wc_reply_reviewers": "19;4;19;63;125", "wc_reply_authors": "0;0;27;31;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;2;2;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.4, 1.3564659966250536 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 81.4, 38.239246854508 ], "wc_strengths_avg": [ 94.0, 19.503845774615836 ], "wc_weaknesses_avg": [ 120.4, 56.15193674309017 ], "wc_questions_avg": [ 241.0, 235.25730594393875 ], "wc_limitations_avg": [ 12.4, 10.131140113531153 ], "wc_review_avg": [ 549.2, 282.7135652917985 ], "wc_reply_reviewers_avg": [ 46.0, 44.163333207537676 ], "wc_reply_authors_avg": [ 11.6, 14.263239463740346 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6593804733957871, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17857762019652510553&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Massachusetts Amherst", "aff_unique_dep": "", "aff_unique_url": "https://www.umass.edu", "aff_unique_abbr": "UMass Amherst", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Amherst", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "uZedGmxGUg", "title": "EasyTPP: Towards Open Benchmarking the Temporal Point Processes", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Continuous-time event sequences play a vital role in real-world domains such as healthcare, finance, online shopping, social networks, and so on. To model such data, temporal point processes (TPPs) have emerged as the most natural and competitive models, making a significant impact in both academic and application communities. Despite the emergence of many powerful models in recent years, there hasn't been a central benchmark for these models and future research endeavors. This lack of standardization impedes researchers and practitioners from comparing methods and reproducing results, potentially slowing down progress in this field. In this paper, we present EasyTPP, the first central repository of research assets (e.g., data, models, evaluation programs, documentations) in the area of event sequence modeling. Our EasyTPP makes several unique contributions to this area: a unified interface of using existing datasets and adding new datasets; a wide range of evaluation programs that are easy to use and extend as well as facilitate reproducible research; \nimplementations of popular neural TPPs, together with a rich library of modules by composing which one could quickly build complex models. Our benchmark is open-sourced: all the data and implementation can be found at this \\href{https://github.com/ant-research/EasyTemporalPointProcess}{\\textcolor{blue}{Github repository}}.} We will actively maintain this benchmark and welcome contributions from other researchers and practitioners. Our benchmark will help promote reproducible research in this field, thus accelerating research progress as well as making more significant real-world impacts.", "keywords": "Event sequence;Temporal point process;open benchmaking", "primary_area": "", "supplementary_material": "/attachment/a6302c05ad848305ab2ecabc927f249bc367ec88.pdf", "author": "Siqiao Xue;Xiaoming Shi;Zhixuan Chu;Yan Wang;Hongyan Hao;Caigao JIANG;Chen Pan;James Y. Zhang;Qingsong Wen;JUN ZHOU;Hongyuan Mei", "authorids": "~Siqiao_Xue1;~Xiaoming_Shi2;~Zhixuan_Chu1;~Yan_Wang34;~Hongyan_Hao1;~Caigao_JIANG2;~Chen_Pan3;~James_Y._Zhang1;~Qingsong_Wen2;~JUN_ZHOU6;~Hongyuan_Mei1", "gender": "M;M;M;;M;M;M;M;M;M;M", "homepage": "https://www.antgroup.com/en;;;https://ai.nju.edu.cn/main.htm;;;;https://scholar.google.com/citations?user=Ywakh_sAAAAJ;https://scholar.google.com/citations?user=mCVvloEAAAAJ&hl=en;http://www.cs.jhu.edu/~hmei/;https://sites.google.com/site/qingsongwen8/", "dblp": "302/7766;65/9789-1;258/1233;;264/1941;292/3817;;151/3086;99/3847-11;164/5576;27/561", "google_scholar": "pZqTpoEAAAAJ;0WMTWacAAAAJ;a4IuTngAAAAJ;https://scholar.google.com/citations?view_op=list_works;;;;Ywakh_sAAAAJ;mCVvloEAAAAJ;g_zaiVIAAAAJ;vjPJvwYAAAAJ", "orcid": ";0000-0003-0764-8961;;0009-0006-2938-357X;0000-0002-0867-7628;;;0000-0001-6519-676X;0000-0001-6033-6102;;0000-0003-4516-2524", "linkedin": ";;;;;caigao-jiang-309710194;https://www.linkedin.cn/incareer/in/chen-pan-7b7bb645;jamesymzhang/;;hongyuan-mei-57687858?trk=nav_responsive_tab_profile_pic;qingsong-wen-22814156/", "or_profile": "~Siqiao_Xue1;~Xiaoming_Shi2;~Zhixuan_Chu1;~Yan_Wang34;~Hongyan_Hao1;~Caigao_JIANG2;~Chen_Pan3;~James_Y._Zhang1;~JUN_ZHOU6;~Hongyuan_Mei1;~Qingsong_Wen1", "aff": "Alibaba;Ant Group;Ant Group;Alibaba Group;;Alibaba Group;;Ant Group;Ant Group;Toyota Technological Institute at Chicago;Alibaba Group", "aff_domain": "alibaba-inc.com;antgroup.com;antgroup.com;antgroup.com;;alibaba-inc.com;;alipay.com;antgroup.com;ttic.edu;alibaba-inc.com", "position": "researcher;Researcher;Researcher;Researcher;;Researcher;;managing director;Researcher;Research Assistant Professor;Researcher", "bibtex": "@misc{\nxue2023easytpp,\ntitle={Easy{TPP}: Towards Open Benchmarking the Temporal Point Processes},\nauthor={Siqiao Xue and Xiaoming Shi and Zhixuan Chu and Yan Wang and Hongyan Hao and Caigao JIANG and Chen Pan and James Y. Zhang and Qingsong Wen and JUN ZHOU and Hongyuan Mei},\nyear={2023},\nurl={https://openreview.net/forum?id=uZedGmxGUg}\n}", "github": "", "project": "", "reviewers": "mYWh;Q2cp;3bs8;JyMg", "site": "https://openreview.net/forum?id=uZedGmxGUg", "pdf_size": 812091, "rating": "5;6;7;7", "confidence": "4;4;4;2", "wc_summary_and_contributions": "205;49;77;87", "wc_strengths": "26;113;158;28", "wc_improvement": "108;228;18;41", "wc_limitations": "1;14;6;22", "wc_correctness": "1;21;10;12", "wc_clarity": "1;9;70;209", "wc_relation_to_prior_work": "1;29;46;20", "wc_documentation": "1;10;82;56", "wc_additional_feedback": "1;1;1;1", "wc_review": "345;474;468;476", "wc_reply_reviewers": "0;65;20;82", "wc_reply_authors": "483;260;138;676", "reply_reviewers": "0;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_and_contributions_avg": [ 104.5, 59.6720202439971 ], "wc_strengths_avg": [ 81.25, 56.53925627384923 ], "wc_improvement_avg": [ 98.75, 81.61916135320186 ], "wc_limitations_avg": [ 10.75, 7.980444849756184 ], "wc_correctness_avg": [ 11.0, 7.106335201775948 ], "wc_clarity_avg": [ 72.25, 83.34079133293612 ], "wc_relation_to_prior_work_avg": [ 24.0, 16.232683080747925 ], "wc_documentation_avg": [ 37.25, 33.20673877392961 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 440.75, 55.35961976025486 ], "wc_reply_reviewers_avg": [ 41.75, 33.07850510527947 ], "wc_reply_authors_avg": [ 389.25, 206.66806115120934 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3784551467323369907&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1;2;2;1;1;3;2", "aff_unique_norm": "Alibaba Group Holding Limited;Ant Group;Alibaba Group;Toyota Technological Institute at Chicago", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.alibaba.com;https://www.antgroup.com;https://www.alibaba.com;https://www.tti-chicago.org", "aff_unique_abbr": "Alibaba;Ant Group;Alibaba;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "CL-NeRF: Continual Learning of Neural Radiance Fields for Evolving Scene Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70147", "id": "uZjpSBTPik", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6c7154e394e24c69409256ccf8bf0804-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uZjpSBTPik", "openreview": "https://openreview.net/forum?id=uZjpSBTPik", "poster": "/media/PosterPDFs/NeurIPS%202023/70147.png?t=1701956896.724359", "slides": "https://nips.cc/virtual/2023/poster/70147", "video": "https://nips.cc/virtual/2023/poster/70147", "author_site": "Xiuzhe Wu, Peng Dai, Weipeng DENG, Handi Chen, Yang Wu, Yan-Pei Cao, Ying Shan, Xiaojuan Qi", "tldr": "", "abstract": "Existing methods for adapting Neural Radiance Fields (NeRFs) to scene changes require extensive data capture and model retraining, which is both time-consuming and labor-intensive. In this paper, we tackle the challenge of efficiently adapting NeRFs to real-world scene changes over time using a few new images while retaining the memory of unaltered areas, focusing on the continual learning aspect of NeRFs. To this end, we propose CL-NeRF, which consists of two key components: a lightweight expert adaptor for adapting to new changes and evolving scene representations and a conflict-aware knowledge distillation learning objective for memorizing unchanged parts. We also present a new benchmark for evaluating Continual Learning of NeRFs with comprehensive metrics. Our extensive experiments demonstrate that CL-NeRF can synthesize high-quality novel views of both changed and unchanged regions with high training efficiency, surpassing existing methods in terms of reducing forgetting and adapting to changes. Code and benchmark will be made available.", "keywords": "Neural Radiance Field; Continual Learning; Scene Representation", "primary_area": "", "supplementary_material": "/attachment/9b4b69ffb23c97dcf89ed6e859637a4b14efe2b0.zip", "author": "Xiuzhe Wu;Peng Dai;Weipeng DENG;Handi Chen;Yang Wu;Yan-Pei Cao;Ying Shan;XIAOJUAN QI", "authorids": "~Xiuzhe_Wu1;~Peng_Dai3;~Weipeng_DENG1;~Handi_Chen1;~Yang_Wu1;~Yan-Pei_Cao1;~Ying_Shan2;~XIAOJUAN_QI2", "gender": ";M;M;F;M;M;M;F", "homepage": ";https://daipengwa.github.io/;;;;https://yanpei.me/;;https://xjqi.github.io/", "dblp": ";08/3547-3;331/7435;;56/1428-1;141/6343;68/5910;176/1445-1.html", "google_scholar": ";2fGIJBsAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.hk/citations?user=vwOQ-UIAAAAJ;50194vkAAAAJ;4oXBp9UAAAAJ;bGn0uacAAAAJ", "orcid": ";;;0000-0002-4223-3502;;;0000-0001-7673-8325;", "linkedin": ";;;;;;YingShanProfile/;", "or_profile": "~Xiuzhe_Wu1;~Peng_Dai3;~Weipeng_DENG1;~Handi_Chen1;~Yang_Wu1;~Yan-Pei_Cao1;~Ying_Shan2;~XIAOJUAN_QI2", "aff": ";University of Hong Kong;University of Hong Kong;University of Hong Kong;Tencent AI Lab;Tencent ARC Lab, Tencent AI Lab;Tencent PCG ARC Lab;University of Hong Kong", "aff_domain": ";eee.hku.hk;hku.hk;hku.hk;tencent.com;tencent.com;arc.tencent.com;hku.hk", "position": ";PhD student;PhD student;PhD student;Principal Researcher;Principal Researcher;Director;Assistant Professor", "bibtex": "@inproceedings{\nwu2023clnerf,\ntitle={{CL}-Ne{RF}: Continual Learning of Neural Radiance Fields for Evolving Scene Representation},\nauthor={Xiuzhe Wu and Peng Dai and Weipeng DENG and Handi Chen and Yang Wu and Yan-Pei Cao and Ying Shan and XIAOJUAN QI},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uZjpSBTPik}\n}", "github": "", "project": "", "reviewers": "Fafa;tKPz;QJef;f2t1;fqk8", "pdf_size": 16249959, "rating": "5;5;5;6;7", "confidence": "4;4;4;5;4", "soundness": "3;3;3;3;4", "novelty": "3;2;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "88;114;67;87;120", "wc_strengths": "44;43;39;91;97", "wc_weaknesses": "398;159;276;86;113", "wc_questions": "35;62;9;86;102", "wc_limitations": "5;13;24;9;18", "wc_review": "570;391;415;359;450", "wc_reply_reviewers": "357;24;70;87;261", "wc_reply_authors": "1436;0;586;282;577", "reply_reviewers": "2;1;1;2;2", "reply_authors": "6;1;2;2;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 95.2, 19.405153954555473 ], "wc_strengths_avg": [ 62.8, 25.6 ], "wc_weaknesses_avg": [ 206.4, 115.77668159003349 ], "wc_questions_avg": [ 58.8, 33.65352878971238 ], "wc_limitations_avg": [ 13.8, 6.675327707311455 ], "wc_review_avg": [ 437.0, 72.85876748888907 ], "wc_reply_reviewers_avg": [ 159.8, 127.22798434306817 ], "wc_reply_authors_avg": [ 576.2, 481.23025673787384 ], "reply_reviewers_avg": [ 1.6, 0.4898979485566356 ], "reply_authors_avg": [ 2.6, 1.7435595774162693 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.25000000000000006, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7988189143506951215&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "email": ";eee.hku.hk;hku.hk;hku.hk;tencent.com;tencent.com;arc.tencent.com;hku.hk", "author_num": 8, "aff_unique_index": "0;0;0;1;1;1;0", "aff_unique_norm": "University of Hong Kong;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.hku.hk;https://ai.tencent.com", "aff_unique_abbr": "HKU;Tencent AI Lab", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Small Total-Cost Constraints in Contextual Bandits with Knapsacks, with Application to Fairness", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70146", "id": "uZvG0HLkOB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/921dcb622bd0119c8f4f34644ce87ee0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uZvG0HLkOB", "openreview": "https://openreview.net/forum?id=uZvG0HLkOB", "poster": "/media/PosterPDFs/NeurIPS%202023/70146.png?t=1697817058.6114078", "slides": "https://nips.cc/virtual/2023/poster/70146", "video": "https://nips.cc/virtual/2023/poster/70146", "author_site": "Evgenii Chzhen, Christophe Giraud, Zhen LI, Gilles Stoltz", "tldr": "", "abstract": "We consider contextual bandit problems with knapsacks [CBwK], a problem where at each round, a scalar reward is obtained and vector-valued costs are suffered. The learner aims to maximize the cumulative rewards while ensuring that the cumulative costs are lower than some predetermined cost constraints. We assume that contexts come from a continuous set, that costs can be signed, and that the expected reward and cost functions, while unknown, may be uniformly estimated---a typical assumption in the literature. In this setting, total cost constraints had so far to be at least of order $T^{3/4}$, where $T$ is the number of rounds, and were even typically assumed to depend linearly on $T$. We are however motivated to use CBwK to impose a fairness constraint of equalized average costs between groups: the budget associated with the corresponding cost constraints should be as close as possible to the natural deviations, of order $\\sqrt{T}$. To that end, we introduce a dual strategy based on projected-gradient-descent updates, that is able to deal with total-cost constraints of the order of $\\sqrt{T}$ up to poly-logarithmic terms. This strategy is more direct and simpler than existing strategies in the literature. It relies on a careful, adaptive, tuning of the step size.", "keywords": "mutli-armed bandits;bandits with knapsacks;primal-dual approaches", "primary_area": "", "supplementary_material": "/attachment/507fd0cf6823665022d09f99d0792c2006a9ba14.pdf", "author": "Evgenii E Chzhen;Christophe Giraud;Zhen LI;Gilles Stoltz", "authorids": "~Evgenii_E_Chzhen1;~Christophe_Giraud1;~Zhen_LI12;~Gilles_Stoltz1", "gender": "M;;M;M", "homepage": "https://echzhen.com;https://www.imo.universite-paris-saclay.fr/~giraud/;;https://www.imo.universite-paris-saclay.fr/fr/perso/gilles-stoltz/", "dblp": "198/1158;15/3175-2;;18/3915", "google_scholar": ";gF-ziCAAAAAJ;;", "orcid": ";0009-0004-1836-5742;;", "linkedin": ";;zhenlihec;", "or_profile": "~Evgenii_E_Chzhen1;~Christophe_Giraud1;~Zhen_LI12;~Gilles_Stoltz1", "aff": "CNRS/University Paris-Saclay;Universit\u00e9 Paris Saclay;;CNRS", "aff_domain": "universite-paris-saclay.fr;universite-paris-saclay.fr;;cnrs.fr", "position": "Researcher;Full Professor;;Researcher", "bibtex": "@inproceedings{\nchzhen2023small,\ntitle={Small Total-Cost Constraints in Contextual Bandits with Knapsacks, with Application to Fairness},\nauthor={Evgenii E Chzhen and Christophe Giraud and Zhen LI and Gilles Stoltz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uZvG0HLkOB}\n}", "github": "", "project": "", "reviewers": "3rBq;EATd;aT3N;Hs7n", "pdf_size": 1876671, "rating": "4;6;6;7", "confidence": "3;3;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;4;4", "wc_summary": "33;123;92;86", "wc_strengths": "21;48;87;32", "wc_weaknesses": "76;14;91;53", "wc_questions": "35;17;16;28", "wc_limitations": "1;1;9;5", "wc_review": "166;203;295;204", "wc_reply_reviewers": "71;4;0;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 83.5, 32.361242250568814 ], "wc_strengths_avg": [ 47.0, 25.0099980007996 ], "wc_weaknesses_avg": [ 58.5, 29.038767191463208 ], "wc_questions_avg": [ 24.0, 7.905694150420948 ], "wc_limitations_avg": [ 4.0, 3.3166247903554 ], "wc_review_avg": [ 217.0, 47.56574397610112 ], "wc_reply_reviewers_avg": [ 21.25, 28.94283158227612 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11336640728889859991&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "email": "universite-paris-saclay.fr;universite-paris-saclay.fr;;cnrs.fr", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University Paris-Saclay;Universit\u00e9 Paris Saclay;Centre National de la Recherche Scientifique", "aff_unique_dep": ";;", "aff_unique_url": "https://www.universite-paris-saclay.fr;https://www.universite-paris-saclay.fr;https://www.cnrs.fr", "aff_unique_abbr": "Paris-Saclay;UPSaclay;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Domain Agnostic Fourier Neural Operators", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70145", "id": "ubap5FKbJs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/940a7634dab556b67af15bacd337f7db-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ubap5FKbJs", "openreview": "https://openreview.net/forum?id=ubap5FKbJs", "poster": "/media/PosterPDFs/NeurIPS%202023/70145.png?t=1702247810.045606", "slides": "https://nips.cc/virtual/2023/poster/70145", "video": "https://nips.cc/virtual/2023/poster/70145", "author_site": "Ning Liu, Siavash Jafarzadeh, Yue Yu", "tldr": "", "abstract": "Fourier neural operators (FNOs) can learn highly nonlinear mappings between function spaces, and have recently become a popular tool for learning responses of complex physical systems. However, to achieve good accuracy and efficiency, FNOs rely on the Fast Fourier transform (FFT), which is restricted to modeling problems on rectangular domains. To lift such a restriction and permit FFT on irregular geometries as well as topology changes, we introduce domain agnostic Fourier neural operator (DAFNO), a novel neural operator architecture for learning surrogates with irregular geometries and evolving domains. The key idea is to incorporate a smoothed characteristic function in the integral layer architecture of FNOs, and leverage FFT to achieve rapid computations, in such a way that the geometric information is explicitly encoded in the architecture. In our empirical evaluation, DAFNO has achieved state-of-the-art accuracy as compared to baseline neural operator models on two benchmark datasets of material modeling and airfoil simulation. To further demonstrate the capability and generalizability of DAFNO in handling complex domains with topology changes, we consider a brittle material fracture evolution problem. With only one training crack simulation sample, DAFNO has achieved generalizability to unseen loading scenarios and substantially different crack patterns from the trained scenario. Our code and data accompanying this paper are available at https://github.com/ningliu-iga/DAFNO.", "keywords": "Operator-Regression Neural Networks;Neural Operators;Data-Driven Physics Modeling;Geometrical and Topological Shape Changes", "primary_area": "", "supplementary_material": "/attachment/1bdb239f4abbc3c1738b1dfce7d67b0851a0f498.pdf", "author": "Ning Liu;Siavash Jafarzadeh;Yue Yu", "authorids": "~Ning_Liu6;~Siavash_Jafarzadeh1;~Yue_Yu3", "gender": ";M;", "homepage": ";;", "dblp": ";;", "google_scholar": ";BDo5e7sAAAAJ;", "orcid": ";;", "linkedin": ";siavash-jafarzadeh/;", "or_profile": "~Ning_Liu6;~Siavash_Jafarzadeh1;~Yue_Yu3", "aff": ";Lehigh University;", "aff_domain": ";lehigh.edu;", "position": ";Assistant Professor;", "bibtex": "@inproceedings{\nliu2023domain,\ntitle={Domain Agnostic Fourier Neural Operators},\nauthor={Ning Liu and Siavash Jafarzadeh and Yue Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ubap5FKbJs}\n}", "github": "", "project": "", "reviewers": "BzBv;dnax;UPhM;FUid;fNa3", "pdf_size": 1948806, "rating": "5;6;6;6;7", "confidence": "5;3;3;4;4", "soundness": "3;3;2;4;3", "novelty": "3;3;2;3;3", "presentation": "3;3;2;4;3", "wc_summary": "108;101;86;119;73", "wc_strengths": "37;87;37;113;70", "wc_weaknesses": "274;364;141;268;77", "wc_questions": "1;253;70;70;59", "wc_limitations": "1;9;4;48;17", "wc_review": "421;814;338;618;296", "wc_reply_reviewers": "71;52;22;194;42", "wc_reply_authors": "0;32;32;210;32", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;2;2;2;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 97.4, 16.230834852218784 ], "wc_strengths_avg": [ 68.8, 29.355749011054037 ], "wc_weaknesses_avg": [ 224.8, 102.49956097466955 ], "wc_questions_avg": [ 90.6, 85.14599227209699 ], "wc_limitations_avg": [ 15.8, 16.98705389406886 ], "wc_review_avg": [ 497.4, 193.177224330406 ], "wc_reply_reviewers_avg": [ 76.2, 60.986555895541436 ], "wc_reply_authors_avg": [ 61.2, 75.42519472961274 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3815959646385554024&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": ";lehigh.edu;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Lehigh University", "aff_unique_dep": "", "aff_unique_url": "https://www.lehigh.edu", "aff_unique_abbr": "Lehigh", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "SnapFusion: Text-to-Image Diffusion Model on Mobile Devices within Two Seconds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70144", "id": "ubgdInLSF9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/41bcc9d3bddd9c90e1f44b29e26d97ff-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ubgdInLSF9", "openreview": "https://openreview.net/forum?id=ubgdInLSF9", "poster": "/media/PosterPDFs/NeurIPS%202023/70144.png?t=1697824116.1354053", "slides": "https://nips.cc/virtual/2023/poster/70144", "video": "https://nips.cc/virtual/2023/poster/70144", "author_site": "Yanyu Li, Huan Wang, Qing Jin, Ju Hu, Pavlo Chemerys, Yun Fu, Yanzhi Wang, Sergey Tulyakov, Jian Ren", "tldr": "", "abstract": "Text-to-image diffusion models can create stunning images from natural language descriptions that rival the work of professional artists and photographers. However, these models are large, with complex network architectures and tens of denoising iterations, making them computationally expensive and slow to run. As a result, high-end GPUs and cloud-based inference are required to run diffusion models at scale. This is costly and has privacy implications, especially when user data is sent to a third party. To overcome these challenges, we present a generic approach that, for the first time, unlocks running text-to-image diffusion models on mobile devices in **less than 2 seconds**. We achieve so by introducing efficient network architecture and improving step distillation. Specifically, we propose an efficient UNet by identifying the redundancy of the original model and reducing the computation of the image decoder via data distillation. \nFurther, we enhance the step distillation by exploring training strategies and introducing regularization from classifier-free guidance. Our extensive experiments on MS-COCO show that our model with $8$ denoising steps achieves better FID and CLIP scores than Stable Diffusion v$1.5$ with $50$ steps. Our work democratizes content creation by bringing powerful text-to-image diffusion models to the hands of users.", "keywords": "Text-to-Image;Diffusion model;mobile devices;distillation", "primary_area": "", "supplementary_material": "/attachment/e02449eea23c9f890159762cfa751c11e75668cd.zip", "author": "Yanyu Li;Huan Wang;Qing Jin;Ju Hu;Pavlo Chemerys;Yun Fu;Yanzhi Wang;Sergey Tulyakov;Jian Ren", "authorids": "~Yanyu_Li1;~Huan_Wang3;~Qing_Jin1;~Ju_Hu1;~Pavlo_Chemerys1;~Yun_Fu1;~Yanzhi_Wang3;~Sergey_Tulyakov1;~Jian_Ren2", "gender": ";M;;M;;M;M;M;M", "homepage": ";https://huanwang.tech/;;;;http://www1.ece.neu.edu/~yunfu/;https://web.northeastern.edu/yanzhiwang/;http://www.stulyakov.com/;https://alanspike.github.io/", "dblp": "194/5818;70/6155-14;37/11144;;;00/5815-1;;40/6115;59/2180-5", "google_scholar": "https://scholar.google.com/citations?hl=en;0-On0y4AAAAJ;X9iggBcAAAAJ;ozJiSMcAAAAJ;;https://scholar.google.com.tw/citations?user=h-JEcQ8AAAAJ;https://scholar.google.com/citations?hl=en;mgzXR0sAAAAJ;https://scholar.google.co.jp/citations?user=vDALiU4AAAAJ", "orcid": ";0000-0001-6951-901X;0000-0001-8795-9297;;;0000-0002-5098-2853;;;", "linkedin": ";huanwang-zju/;;erichuju;pashachemerys/;furaymond/;;sergeytulyakov/;", "or_profile": "~Yanyu_Li1;~Huan_Wang3;~Qing_Jin1;~Ju_Hu1;~Pavlo_Chemerys1;~Yun_Fu1;~Yanzhi_Wang3;~Sergey_Tulyakov1;~Jian_Ren2", "aff": "Northeastern University;Northeastern University;Northeastern University;Snap Inc.;Snap Inc.;Northeastern University;Northeastern University;;Snap Inc.", "aff_domain": "northeastern.edu;neu.edu;northeastern.edu;snapchat.com;snap.com;northeastern.edu;northeastern.edu;;snapchat.com", "position": "PhD student;PhD student;PhD Student;Researcher;Software Engineer;Full Professor;Associate Professor;;Research Scientist", "bibtex": "@inproceedings{\nli2023snapfusion,\ntitle={SnapFusion: Text-to-Image Diffusion Model on Mobile Devices within Two Seconds},\nauthor={Yanyu Li and Huan Wang and Qing Jin and Ju Hu and Pavlo Chemerys and Yun Fu and Yanzhi Wang and Sergey Tulyakov and Jian Ren},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ubgdInLSF9}\n}", "github": "", "project": "", "reviewers": "oXj2;3k2m;vQqa;5sgC", "pdf_size": 24725982, "rating": "5;7;7;8", "confidence": "4;5;3;4", "soundness": "3;3;2;3", "novelty": "2;3;3;4", "presentation": "4;4;2;4", "wc_summary": "58;84;83;48", "wc_strengths": "39;134;45;47", "wc_weaknesses": "136;530;168;76", "wc_questions": "5;6;47;4", "wc_limitations": "1;1;12;1", "wc_review": "239;755;355;176", "wc_reply_reviewers": "0;171;64;0", "wc_reply_authors": "55;35;35;0", "reply_reviewers": "0;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 68.25, 15.658464164789597 ], "wc_strengths_avg": [ 66.25, 39.22610737761268 ], "wc_weaknesses_avg": [ 227.5, 177.7434949583247 ], "wc_questions_avg": [ 15.5, 18.200274723201296 ], "wc_limitations_avg": [ 3.75, 4.763139720814412 ], "wc_review_avg": [ 381.25, 225.13371027014148 ], "wc_reply_reviewers_avg": [ 58.75, 69.87622986395301 ], "wc_reply_authors_avg": [ 31.25, 19.803724397193573 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 174, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9252855657122153047&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "northeastern.edu;neu.edu;northeastern.edu;snapchat.com;snap.com;northeastern.edu;northeastern.edu;;snapchat.com", "author_num": 9, "aff_unique_index": "0;0;0;1;1;0;0;1", "aff_unique_norm": "Northeastern University;Snap Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.northeastern.edu;https://www.snapinc.com", "aff_unique_abbr": "NEU;Snap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Uncovering Meanings of Embeddings via Partial Orthogonality", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70143", "id": "ubp5s2tgXq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/65a925049647eab0aa06a9faf1cd470b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ubp5s2tgXq", "openreview": "https://openreview.net/forum?id=ubp5s2tgXq", "poster": "/media/PosterPDFs/NeurIPS%202023/70143.png?t=1701408156.9450536", "slides": "https://nips.cc/virtual/2023/poster/70143", "video": "https://nips.cc/virtual/2023/poster/70143", "author_site": "Yibo Jiang, Bryon Aragam, Victor Veitch", "tldr": "", "abstract": "Machine learning tools often rely on embedding text as vectors of real numbers.\nIn this paper, we study how the semantic structure of language is encoded in the algebraic structure of such embeddings.\nSpecifically, we look at a notion of \"semantic independence\" capturing the idea that, e.g., \"eggplant\" and \"tomato\" are independent given \"vegetable\". \nAlthough such examples are intuitive, it is difficult to formalize such a notion of semantic independence. The key observation here is that any sensible formalization should obey a set of so-called independence axioms, and thus any algebraic encoding of this structure should also obey these axioms. This leads us naturally to use partial orthogonality as the relevant algebraic structure. We develop theory and methods that allow us to demonstrate that partial orthogonality does indeed capture semantic independence.\nComplementary to this, we also introduce the concept of independence preserving embeddings where embeddings preserve the conditional independence structures of a distribution, and we prove the existence of such embeddings and approximations to them.", "keywords": "embedding;representation;graphical models;partial orthogonality;Markov boundary", "primary_area": "", "supplementary_material": "", "author": "Yibo Jiang;Bryon Aragam;Victor Veitch", "authorids": "~Yibo_Jiang2;~Bryon_Aragam1;~Victor_Veitch1", "gender": "M;;", "homepage": ";http://bryonaragam.com/;http://victorveitch.com", "dblp": "54/2193;140/7564;167/5650", "google_scholar": "hvQo2gQAAAAJ;u-W3_9QAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yibo_Jiang2;~Bryon_Aragam1;~Victor_Veitch1", "aff": "University of Chicago;Booth School of Business;Google", "aff_domain": "uchicago.edu;chicagobooth.edu;google.com", "position": "PhD student;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\njiang2023uncovering,\ntitle={Uncovering Meanings of Embeddings via Partial Orthogonality},\nauthor={Yibo Jiang and Bryon Aragam and Victor Veitch},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ubp5s2tgXq}\n}", "github": "", "project": "", "reviewers": "3K9o;DCJZ;zRa2;1qJm", "pdf_size": 583224, "rating": "2;4;6;6", "confidence": "4;3;3;2", "soundness": "1;2;3;3", "novelty": "2;3;2;3", "presentation": "1;3;3;2", "wc_summary": "77;110;62;287", "wc_strengths": "17;104;67;111", "wc_weaknesses": "50;115;154;63", "wc_questions": "4;81;107;137", "wc_limitations": "1;1;1;8", "wc_review": "149;411;391;606", "wc_reply_reviewers": "0;44;15;119", "wc_reply_authors": "47;455;31;49", "reply_reviewers": "0;1;1;1", "reply_authors": "3;3;2;3", "rating_avg": [ 4.5, 1.6583123951777 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 134.0, 90.02499652874195 ], "wc_strengths_avg": [ 74.75, 37.298625980054545 ], "wc_weaknesses_avg": [ 95.5, 41.620307543313515 ], "wc_questions_avg": [ 82.25, 49.33241834737073 ], "wc_limitations_avg": [ 2.75, 3.031088913245535 ], "wc_review_avg": [ 389.25, 162.1548257067917 ], "wc_reply_reviewers_avg": [ 44.5, 45.828484592008934 ], "wc_reply_authors_avg": [ 145.5, 178.82603278046517 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11147755943868130417&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "uchicago.edu;chicagobooth.edu;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Chicago;University of Chicago Booth School of Business;Google", "aff_unique_dep": ";Booth School of Business;Google", "aff_unique_url": "https://www.uchicago.edu;https://www.chicagobooth.edu;https://www.google.com", "aff_unique_abbr": "UChicago;Booth;Google", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Chicago;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "HyenaDNA: Long-Range Genomic Sequence Modeling at Single Nucleotide Resolution", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70142", "id": "ubzNoJjOKj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/86ab6927ee4ae9bde4247793c46797c7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ubzNoJjOKj", "openreview": "https://openreview.net/forum?id=ubzNoJjOKj", "poster": "/media/PosterPDFs/NeurIPS%202023/70142.png?t=1702092231.4432018", "slides": "https://nips.cc/virtual/2023/poster/70142", "video": "https://nips.cc/virtual/2023/poster/70142", "author_site": "Eric Nguyen, Michael Poli, Marjan Faizi, Armin Thomas, Michael Wornow, Callum Birch-Sykes, Stefano Massaroli, Aman Patel, Clayton Rabideau, Yoshua Bengio, Stefano Ermon, Christopher R\u00e9, Stephen Baccus", "tldr": "", "abstract": "Genomic (DNA) sequences encode an enormous amount of information for gene regulation and protein synthesis. Similar to natural language models, researchers have proposed foundation models in genomics to learn generalizable features from unlabeled genome data that can then be fine-tuned for downstream tasks such as identifying regulatory elements. Due to the quadratic scaling of attention, previous Transformer-based genomic models have used 512 to 4k tokens as context (<0.001% of the human genome), significantly limiting the modeling of long-range interactions in DNA. In addition, these methods rely on tokenizers or fixed k-mers to aggregate meaningful DNA units, losing single nucleotide resolution (i.e. DNA \"characters\") where subtle genetic variations can completely alter protein function via single nucleotide polymorphisms (SNPs). Recently, Hyena, a large language model based on implicit convolutions was shown to match attention in quality while allowing longer context lengths and lower time complexity. Leveraging Hyena\u2019s new long-range capabilities, we present HyenaDNA, a genomic foundation model pretrained on the human reference genome with context lengths of up to 1 million tokens at the single nucleotide-level \u2013 an up to 500x increase over previous dense attention-based models. HyenaDNA scales sub-quadratically in sequence length (training up to 160x faster than Transformer), uses single nucleotide tokens, and has full global context at each layer. We explore what longer context enables - including the first use of in-context learning in genomics for simple adaptation to novel tasks without updating pretrained model weights. On fine-tuned benchmarks from the Nucleotide Transformer, HyenaDNA reaches state-of-the-art (SotA) on 12 of 18 datasets using a model with orders of magnitude less parameters and pretraining data.1 On the GenomicBenchmarks, HyenaDNA surpasses SotA on 7 of 8 datasets on average by +10 accuracy points. Code at https://github.com/HazyResearch/hyena-dna.", "keywords": "genomics;hyena;foundation models;large language models;transformers", "primary_area": "", "supplementary_material": "", "author": "Eric Nguyen;Michael Poli;Marjan Faizi;Armin W Thomas;Michael Wornow;Callum Birch-Sykes;Stefano Massaroli;Aman Patel;Clayton M. Rabideau;Yoshua Bengio;Stefano Ermon;Christopher Re;Stephen Baccus", "authorids": "~Eric_Nguyen1;~Michael_Poli1;~Marjan_Faizi1;~Armin_W_Thomas1;~Michael_Wornow1;~Callum_Birch-Sykes1;~Stefano_Massaroli1;~Aman_Patel2;~Clayton_M._Rabideau1;~Yoshua_Bengio1;~Stefano_Ermon1;~Christopher_Re1;~Stephen_Baccus2", "gender": "M;M;F;Non-Binary;;M;;M;M;M;M;;", "homepage": "http://erictnguyen.com;;;;https://michaelwornow.net;;;;https://syntensor.com;http://yoshuabengio.org;http://cs.stanford.edu/~ermon/;;", "dblp": ";;;228/8292;295/5424.html;;;;;56/953;47/8135;;", "google_scholar": "66TLwGUAAAAJ;RgIBwboAAAAJ;https://scholar.google.de/citations?user=zdvC7dsAAAAJ;awtZJwkAAAAJ;rXYzcbcAAAAJ;;IwCfl4UAAAAJ;;;kukA0LcAAAAJ;;;", "orcid": ";;;0000-0002-9947-5705;0000-0003-2215-6527;0000-0001-6329-9191;;;;;;;", "linkedin": "nguyenstanford/;;;;;callum-birch-sykes/;;amanspatel/;;yoshuabengio/?originalSubdomain=ca;;;", "or_profile": "~Eric_Nguyen1;~Michael_Poli1;~Marjan_Faizi1;~Armin_W_Thomas1;~Michael_Wornow1;~Callum_Birch-Sykes1;~Stefano_Massaroli1;~Aman_Patel2;~Clayton_M._Rabideau1;~Yoshua_Bengio1;~Stefano_Ermon1;~Christopher_Re1;~Stephen_Baccus2", "aff": "Stanford University;Stanford University;Harvard Medical School;Stanford University;Stanford University;Syntensor;MILA;Stanford University;University of Cambridge;University of Montreal;Stanford University;;", "aff_domain": "stanford.edu;stanford.edu;hms.harvard.edu;stanford.edu;stanford.edu;syntensor.com;mila.quebec;stanford.edu;cam.ac.uk;umontreal.ca;stanford.edu;;", "position": "PhD student;PhD student;Postdoc;Postdoc;PhD student;Researcher;Postdoc;PhD student;PhD student;Full Professor;Associate Professor;;", "bibtex": "@inproceedings{\nnguyen2023hyenadna,\ntitle={Hyena{DNA}: Long-Range Genomic Sequence Modeling at Single Nucleotide Resolution},\nauthor={Eric Nguyen and Michael Poli and Marjan Faizi and Armin W Thomas and Michael Wornow and Callum Birch-Sykes and Stefano Massaroli and Aman Patel and Clayton M. Rabideau and Yoshua Bengio and Stefano Ermon and Christopher Re and Stephen Baccus},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ubzNoJjOKj}\n}", "github": "", "project": "", "reviewers": "JfVK;DWDg;nDFb;YJ4C;LTPa", "pdf_size": 1992096, "rating": "7;7;7;7;8", "confidence": "3;5;5;4;4", "soundness": "4;3;3;3;4", "novelty": "3;3;3;2;4", "presentation": "3;3;3;3;3", "wc_summary": "89;101;33;95;20", "wc_strengths": "80;44;47;153;165", "wc_weaknesses": "97;80;327;182;100", "wc_questions": "189;234;26;249;83", "wc_limitations": "27;1;38;21;112", "wc_review": "482;460;471;700;480", "wc_reply_reviewers": "153;206;0;290;30", "wc_reply_authors": "399;585;17;334;16", "reply_reviewers": "2;3;0;1;1", "reply_authors": "3;4;2;3;2", "rating_avg": [ 7.2, 0.39999999999999997 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.6, 34.02116987994387 ], "wc_strengths_avg": [ 97.8, 51.681331252203634 ], "wc_weaknesses_avg": [ 157.2, 91.98130244783447 ], "wc_questions_avg": [ 156.2, 87.23623100524232 ], "wc_limitations_avg": [ 39.8, 38.049441520211566 ], "wc_review_avg": [ 518.6, 91.03318076393903 ], "wc_reply_reviewers_avg": [ 135.8, 108.29293605771339 ], "wc_reply_authors_avg": [ 270.2, 222.92904700823533 ], "reply_reviewers_avg": [ 1.4, 1.019803902718557 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.13363062095621223, "gs_citation": 312, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8529903844568240086&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "email": "stanford.edu;stanford.edu;hms.harvard.edu;stanford.edu;stanford.edu;syntensor.com;mila.quebec;stanford.edu;cam.ac.uk;umontreal.ca;stanford.edu;;", "author_num": 13, "aff_unique_index": "0;0;1;0;0;2;3;0;4;5;0", "aff_unique_norm": "Stanford University;Harvard University;Syntensor;Mila;University of Cambridge;University of Montreal", "aff_unique_dep": ";Medical School;;;;", "aff_unique_url": "https://www.stanford.edu;https://hms.harvard.edu;;https://mila.quebec;https://www.cam.ac.uk;https://wwwumontreal.ca", "aff_unique_abbr": "Stanford;HMS;;MILA;Cambridge;UM", "aff_campus_unique_index": "0;0;1;0;0;0;3;0", "aff_campus_unique": "Stanford;Boston;;Cambridge", "aff_country_unique_index": "0;0;0;0;0;2;0;3;2;0", "aff_country_unique": "United States;;Canada;United Kingdom" }, { "title": "Judging LLM-as-a-Judge with MT-Bench and Chatbot Arena", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73434", "id": "uccHPGDlao", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/91f18a1287b398d378ef22505bf41832-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=uccHPGDlao", "openreview": "https://openreview.net/forum?id=uccHPGDlao", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73434", "video": "https://nips.cc/virtual/2023/poster/73434", "author_site": "Lianmin Zheng, Wei-Lin Chiang, Ying Sheng, Siyuan Zhuang, Zhanghao Wu, Yonghao Zhuang, Zi Lin, Zhuohan Li, Dacheng Li, Eric Xing, Hao Zhang, Joseph Gonzalez, Ion Stoica", "tldr": "", "abstract": "Evaluating large language model (LLM) based chat assistants is challenging due to their broad capabilities and the inadequacy of existing benchmarks in measuring human preferences.\nTo address this, we explore using strong LLMs as judges to evaluate these models on more open-ended questions.\nWe examine the usage and limitations of LLM-as-a-judge, including position, verbosity, and self-enhancement biases, as well as limited reasoning ability, and propose solutions to mitigate some of them.\nWe then verify the agreement between LLM judges and human preferences by introducing two benchmarks: MT-bench, a multi-turn question set; and Chatbot Arena, a crowdsourced battle platform.\nOur results reveal that strong LLM judges like GPT-4 can match both controlled and crowdsourced human preferences well, achieving over 80\\% agreement, the same level of agreement between humans.\nHence, LLM-as-a-judge is a scalable and explainable way to approximate human preferences, which are otherwise very expensive to obtain.\nAdditionally, we show our benchmark and traditional benchmarks complement each other by evaluating several variants of LLaMA and Vicuna.\nThe MT-bench questions, 3K expert votes, and 30K conversations with human preferences are publicly available at https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge.", "keywords": "large language models;human preference;llm-as-a-judge;benchmark;evaluation", "primary_area": "", "supplementary_material": "", "author": "Lianmin Zheng;Wei-Lin Chiang;Ying Sheng;Siyuan Zhuang;Zhanghao Wu;Yonghao Zhuang;Zi Lin;Zhuohan Li;Dacheng Li;Eric Xing;Hao Zhang;Joseph E. Gonzalez;Ion Stoica", "authorids": "~Lianmin_Zheng2;~Wei-Lin_Chiang1;~Ying_Sheng1;~Siyuan_Zhuang1;~Zhanghao_Wu1;~Yonghao_Zhuang1;~Zi_Lin1;~Zhuohan_Li1;~Dacheng_Li1;~Eric_Xing1;~Hao_Zhang2;~Joseph_E._Gonzalez1;~Ion_Stoica1", "gender": "M;;F;M;M;M;F;M;;M;M;M;M", "homepage": "http://lmzheng.net/;https://infwinston.github.io/;https://sites.google.com/view/yingsheng;https://suquark.github.io/;https://zhanghaowu.me;https://zyhowell.github.io/;https://zi-lin.com/;http://zhuohan.li;;http://www.cs.cmu.edu/~epxing/;https://cseweb.ucsd.edu/~haozhang/;http://eecs.berkeley.edu/~jegonzal;http://people.eecs.berkeley.edu/~istoica/", "dblp": "211/7027;174/2148;262/6232.html;;;;81/2999;;;36/3855;55/2270-25;61/8262;s/IonStoica", "google_scholar": "_7Q8uIYAAAAJ;https://scholar.google.com/citations?hl=en;xMhGYpgAAAAJ;KSZmI5EAAAAJ;YfyMDFgAAAAJ;oh297TsAAAAJ;kgZYttUAAAAJ;;;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ;H1d4BS8AAAAJ;https://scholar.google.com.tw/citations?user=gM2WW9UAAAAJ;vN-is70AAAAJ", "orcid": ";;0000-0002-1883-2126;0009-0007-3787-0316;;;;;;;;0000-0003-2921-956X;", "linkedin": ";;;siyuanzhuang;;;zi-lin/;;;;;;ionstoica", "or_profile": "~Lianmin_Zheng2;~Wei-Lin_Chiang1;~Ying_Sheng1;~Siyuan_Zhuang1;~Zhanghao_Wu1;~Yonghao_Zhuang1;~Zi_Lin1;~Zhuohan_Li1;~Dacheng_Li1;~Eric_Xing1;~Hao_Zhang2;~Joseph_E._Gonzalez1;~Ion_Stoica1", "aff": "University of California, Berkeley;University of California, Berkeley;Stanford University;University of California, Berkeley;University of California, Berkeley;Carnegie Mellon University;University of California, San Diego;University of California, Berkeley;;School of Computer Science, Carnegie Mellon University;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;stanford.edu;berkeley.edu;berkeley.edu;andrew.cmu.edu;ucsd.edu;berkeley.edu;;cs.cmu.edu;berkeley.edu;berkeley.edu;berkeley.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;Graduate student;PhD student;;Full Professor;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzheng2023judging,\ntitle={Judging {LLM}-as-a-Judge with {MT}-Bench and Chatbot Arena},\nauthor={Lianmin Zheng and Wei-Lin Chiang and Ying Sheng and Siyuan Zhuang and Zhanghao Wu and Yonghao Zhuang and Zi Lin and Zhuohan Li and Dacheng Li and Eric Xing and Hao Zhang and Joseph E. Gonzalez and Ion Stoica},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=uccHPGDlao}\n}", "github": "", "project": "", "reviewers": "Zr4f;1xKM;QiZU;BsjF", "pdf_size": 1580141, "rating": "6;7;7;8", "confidence": "4;4;4;5", "wc_summary_and_contributions": "76;74;80;65", "wc_strengths": "78;109;154;155", "wc_improvement": "80;104;226;406", "wc_limitations": "16;56;133;7", "wc_correctness": "1;66;21;4", "wc_clarity": "1;23;56;25", "wc_relation_to_prior_work": "1;35;16;10", "wc_documentation": "1;4;34;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "255;472;721;674", "wc_reply_reviewers": "6;108;110;4", "wc_reply_authors": "269;215;511;321", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 73.75, 5.494315243958978 ], "wc_strengths_avg": [ 124.0, 32.41141774128371 ], "wc_improvement_avg": [ 204.0, 129.0968628588627 ], "wc_limitations_avg": [ 53.0, 49.73429400323282 ], "wc_correctness_avg": [ 23.0, 25.971137826441105 ], "wc_clarity_avg": [ 26.25, 19.587942719948924 ], "wc_relation_to_prior_work_avg": [ 15.5, 12.459935794377111 ], "wc_documentation_avg": [ 10.0, 13.910427743243556 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 530.5, 184.52980789021595 ], "wc_reply_reviewers_avg": [ 57.0, 52.009614495783374 ], "wc_reply_authors_avg": [ 329.0, 111.56164215356459 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 3519, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2115026370478138399&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "berkeley.edu;berkeley.edu;stanford.edu;berkeley.edu;berkeley.edu;andrew.cmu.edu;ucsd.edu;berkeley.edu;;cs.cmu.edu;berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 13, "aff_unique_index": "0;0;1;0;0;2;3;0;2;0;0;0", "aff_unique_norm": "University of California, Berkeley;Stanford University;Carnegie Mellon University;University of California, San Diego", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.berkeley.edu;https://www.stanford.edu;https://www.cmu.edu;https://www.ucsd.edu", "aff_unique_abbr": "UC Berkeley;Stanford;CMU;UCSD", "aff_campus_unique_index": "0;0;1;0;0;3;0;4;0;0;0", "aff_campus_unique": "Berkeley;Stanford;;San Diego;Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Turbulence in Focus: Benchmarking Scaling Behavior of 3D Volumetric Super-Resolution with BLASTNet 2.0 Data", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73433", "id": "ugRnHKMK95", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f458af2455b1e12608c2a16c308d663d-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=ugRnHKMK95", "openreview": "https://openreview.net/forum?id=ugRnHKMK95", "poster": "/media/PosterPDFs/NeurIPS%202023/73433.png?t=1701486691.0019372", "slides": "https://nips.cc/virtual/2023/poster/73433", "video": "https://nips.cc/virtual/2023/poster/73433", "author_site": "Wai Tong Chung, Bassem Akoush, Pushan Sharma, Alex Tamkin, Ki Sung Jung, Jacqueline Chen, Jack Guo, Davy Brouzet, Mohsen Talei, Bruno Savard, Alexei Poludnenko, Matthias Ihme", "tldr": "", "abstract": "Analysis of compressible turbulent flows is essential for applications related to propulsion, energy generation, and the environment. \nHere, we present BLASTNet 2.0, a 2.2 TB network-of-datasets containing 744 full-domain samples from 34 high-fidelity direct numerical simulations, which addresses the current limited availability of 3D high-fidelity reacting and non-reacting compressible turbulent flow simulation data. With this data, we benchmark a total of 49 variations of five deep learning approaches for 3D super-resolution - which can be applied for improving scientific imaging, simulations, turbulence models, as well as in computer vision applications. We perform neural scaling analysis on these models to examine the performance of different machine learning (ML) approaches, including two scientific ML techniques. We demonstrate that (i) predictive performance can scale with model size and cost, (ii) architecture matters significantly, especially for smaller models, and (iii) the benefits of physics-based losses can persist with increasing model size. The outcomes of this benchmark study are anticipated to offer insights that can aid the design of 3D super-resolution models, especially for turbulence models, while this data is expected to foster ML methods for a broad range of flow physics applications. This data is publicly available with download links and browsing tools consolidated at https://blastnet.github.io.", "keywords": "Super-resolution;3D;Neural Scaling Laws;Physics-informed Loss;Computational Fluid Dynamics;Partial Differential Equations;Turbulent Reacting Flows;Direct Numerical Simulation;Fluid Mechanics;Combustion;Computer Vision", "primary_area": "", "supplementary_material": "", "author": "Wai Tong Chung;Bassem Akoush;Pushan Sharma;Alex Tamkin;Ki Sung Jung;Jacqueline Chen;Jack Guo;Davy Brouzet;Mohsen Talei;Bruno Savard;Alexei Y Poludnenko;Matthias Ihme", "authorids": "~Wai_Tong_Chung1;~Bassem_Akoush1;~Pushan_Sharma1;~Alex_Tamkin1;~Ki_Sung_Jung1;~Jacqueline_Chen1;~Jack_Guo1;~Davy_Brouzet1;~Mohsen_Talei1;~Bruno_Savard1;~Alexei_Y_Poludnenko1;~Matthias_Ihme1", "gender": "M;;M;;M;F;;;;M;;M", "homepage": "https://waitong94.github.io/;;https://orcid.org/my-orcid?orcid=0000-0001-9670-7147;;;;;;https://people.eng.unimelb.edu.au/mohsent/;https://www.polymtl.ca/expertises/en/savard-bruno;https://me.engr.uconn.edu/blog/faculty/poludnenko-alexei/;https://me.stanford.edu/people/matthias-ihme", "dblp": "274/2974.html;357/5331;;;316/5034;65/6338;;;227/1733;https://dblp.uni-trier.de/pid/163/6309;132/0928;60/5219", "google_scholar": "LgFfklwAAAAJ;iv8JoRQAAAAJ;;;BrRefdgAAAAJ;-YNowMsAAAAJ;eLgsT6mBAI4C;hU7dmLkAAAAJ;https://scholar.google.com.au/citations?user=9GwdoTwAAAAJ;pRCe-ZEAAAAJ;;", "orcid": "0000-0002-6447-4008;0000-0001-5160-4451;;;;;0000-0003-4090-9289;;0000-0001-5923-2461;0000-0002-6989-8942;;0000-0002-4158-7050", "linkedin": "wai-tong-chung-a5111067/;;;;;;jackguo1/;;mohsen-talei-00279555/;;;", "or_profile": "~Wai_Tong_Chung1;~Bassem_Akoush1;~Pushan_Sharma1;~Alex_Tamkin1;~Ki_Sung_Jung1;~Jacqueline_Chen1;~Jack_Guo1;~Davy_Brouzet1;~Mohsen_Talei1;~Bruno_Savard1;~Alexei_Y_Poludnenko1;~Matthias_Ihme1", "aff": "Stanford University;Stanford University;;;Sandia National Laboratories;Sandia National Laboratories;Stanford University;Stanford University;University of Melbourne;\u00c9cole Polytechnique de Montr\u00e9al, Universit\u00e9 de Montr\u00e9al;University of Connecticut;Stanford University", "aff_domain": "stanford.edu;stanford.edu;;;sandia.gov;sandia.gov;stanford.edu;stanford.edu;unimelb.edu;polymtl.ca;uconn.edu;stanford.edu", "position": "PhD student;PhD student;;;Postdoc;Principal Researcher;PhD student;Postdoc;Assistant Professor;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchung2023turbulence,\ntitle={Turbulence in Focus: Benchmarking Scaling Behavior of 3D Volumetric Super-Resolution with {BLASTN}et 2.0 Data},\nauthor={Wai Tong Chung and Bassem Akoush and Pushan Sharma and Alex Tamkin and Ki Sung Jung and Jacqueline Chen and Jack Guo and Davy Brouzet and Mohsen Talei and Bruno Savard and Alexei Y Poludnenko and Matthias Ihme},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=ugRnHKMK95}\n}", "github": "", "project": "", "reviewers": "yabe;o9yf;5NmK;AcWj", "pdf_size": 12637667, "rating": "6;7;7;7", "confidence": "5;5;4;5", "wc_summary_and_contributions": "61;81;85;51", "wc_strengths": "64;111;85;83", "wc_improvement": "522;253;210;256", "wc_limitations": "23;14;4;27", "wc_correctness": "211;7;28;130", "wc_clarity": "5;7;1;5", "wc_relation_to_prior_work": "14;10;1;19", "wc_documentation": "6;18;3;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "907;502;418;577", "wc_reply_reviewers": "87;0;519;38", "wc_reply_authors": "1038;152;2360;898", "reply_reviewers": "1;0;4;1", "reply_authors": "3;1;6;3", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 69.5, 14.026760139105537 ], "wc_strengths_avg": [ 85.75, 16.723860200324566 ], "wc_improvement_avg": [ 310.25, 123.60092030401715 ], "wc_limitations_avg": [ 17.0, 8.860022573334675 ], "wc_correctness_avg": [ 94.0, 82.0213386869539 ], "wc_clarity_avg": [ 4.5, 2.179449471770337 ], "wc_relation_to_prior_work_avg": [ 11.0, 6.59545297913646 ], "wc_documentation_avg": [ 8.0, 5.873670062235365 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 601.0, 185.40631057221327 ], "wc_reply_reviewers_avg": [ 161.0, 208.97966408241737 ], "wc_reply_authors_avg": [ 1112.0, 795.3577811274622 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 3.25, 1.7853571071357126 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8850560713596838374&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "stanford.edu;stanford.edu;;;sandia.gov;sandia.gov;stanford.edu;stanford.edu;unimelb.edu;polymtl.ca;uconn.edu;stanford.edu", "author_num": 12, "aff_unique_index": "0;0;1;1;0;0;2;3;4;0", "aff_unique_norm": "Stanford University;Sandia National Laboratories;University of Melbourne;\u00c9cole Polytechnique de Montr\u00e9al;University of Connecticut", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.stanford.edu;https://www.sandia.gov;https://www.unimelb.edu.au;https://www.polymtl.ca;https://www.uconn.edu", "aff_unique_abbr": "Stanford;SNL;UniMelb;Polytechnique Montr\u00e9al;UConn", "aff_campus_unique_index": "0;0;0;0;2;0", "aff_campus_unique": "Stanford;;Montr\u00e9al", "aff_country_unique_index": "0;0;0;0;0;0;1;2;0;0", "aff_country_unique": "United States;Australia;Canada" }, { "title": "Mechanic: A Learning Rate Tuner", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70141", "id": "uhKtQMn21D", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/955499a8e2860ed746717c1374224c43-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uhKtQMn21D", "openreview": "https://openreview.net/forum?id=uhKtQMn21D", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70141", "video": "https://nips.cc/virtual/2023/poster/70141", "author_site": "Ashok Cutkosky, Aaron Defazio, Harsh Mehta", "tldr": "", "abstract": "We introduce a technique for tuning the learning rate scale factor of any base optimization algorithm and schedule automatically, which we call Mechanic. Our method provides a practical realization of recent theoretical reductions for accomplishing a similar goal in online convex optimization. We rigorously evaluate Mechanic on a range of large scale deep learning tasks with varying batch sizes, schedules, and base optimization algorithms. These experiments demonstrate that depending on the problem, Mechanic either comes very close to, matches or even improves upon manual tuning of learning rates.", "keywords": "optimization;deep learning;online convex optimization", "primary_area": "", "supplementary_material": "/attachment/e9f6b100acbc25e3b954115b2f12431ed8d10e1f.pdf", "author": "Ashok Cutkosky;Aaron Defazio;Harsh Mehta", "authorids": "~Ashok_Cutkosky1;~Aaron_Defazio1;~Harsh_Mehta1", "gender": ";M;M", "homepage": "http://www.cs.stanford.edu/~ashokc;https://www.aarondefazio.com/;", "dblp": "191/6725;116/2969;122/1475", "google_scholar": "h4AbGp0AAAAJ;KEzJsdkAAAAJ;murJPNoAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ashok_Cutkosky1;~Aaron_Defazio1;~Harsh_Mehta1", "aff": "Boston University;Meta;Google Research", "aff_domain": "bu.edu;meta.com;google.com", "position": "Assistant Professor;Research Scientist;Software Engineer", "bibtex": "@inproceedings{\ncutkosky2023mechanic,\ntitle={Mechanic: A Learning Rate Tuner},\nauthor={Ashok Cutkosky and Aaron Defazio and Harsh Mehta},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uhKtQMn21D}\n}", "github": "", "project": "", "reviewers": "mCwi;2EmS;DR7W;7tau", "pdf_size": 2414470, "rating": "6;6;7;7", "confidence": "4;3;3;3", "soundness": "3;2;3;3", "novelty": "3;3;4;4", "presentation": "3;3;3;1", "wc_summary": "94;75;203;97", "wc_strengths": "145;110;41;86", "wc_weaknesses": "761;142;21;313", "wc_questions": "87;131;211;352", "wc_limitations": "41;5;1;6", "wc_review": "1128;463;477;854", "wc_reply_reviewers": "220;54;125;40", "wc_reply_authors": "446;560;312;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 117.25, 50.22138488731668 ], "wc_strengths_avg": [ 95.5, 37.818646194701365 ], "wc_weaknesses_avg": [ 309.25, 280.69233601935053 ], "wc_questions_avg": [ 195.25, 100.82751360615812 ], "wc_limitations_avg": [ 13.25, 16.13032857693854 ], "wc_review_avg": [ 730.5, 277.9734699571166 ], "wc_reply_reviewers_avg": [ 109.75, 71.34554996634338 ], "wc_reply_authors_avg": [ 329.5, 209.51073958153077 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18141151415566798295&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "bu.edu;meta.com;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Boston University;Meta;Google", "aff_unique_dep": ";Meta Platforms, Inc.;Google Research", "aff_unique_url": "https://www.bu.edu;https://meta.com;https://research.google", "aff_unique_abbr": "BU;Meta;Google Research", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Annotator: A Generic Active Learning Baseline for LiDAR Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70140", "id": "uiiVSVADDc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/976cc04f0cbaad7790ce0d665e44f90f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uiiVSVADDc", "openreview": "https://openreview.net/forum?id=uiiVSVADDc", "poster": "/media/PosterPDFs/NeurIPS%202023/70140.png?t=1699200534.1905556", "slides": "https://nips.cc/virtual/2023/poster/70140", "video": "https://nips.cc/virtual/2023/poster/70140", "author_site": "Binhui Xie, Shuang Li, Qingju Guo, Chi Liu, Xinjing Cheng", "tldr": "", "abstract": "Active learning, a label-efficient paradigm, empowers models to interactively query an oracle for labeling new data. In the realm of LiDAR semantic segmentation, the challenges stem from the sheer volume of point clouds, rendering annotation labor-intensive and cost-prohibitive. This paper presents Annotator, a general and efficient active learning baseline, in which a voxel-centric online selection strategy is tailored to efficiently probe and annotate the salient and exemplar voxel girds within each LiDAR scan, even under distribution shift. Concretely, we first execute an in-depth analysis of several common selection strategies such as Random, Entropy, Margin, and then develop voxel confusion degree (VCD) to exploit the local topology relations and structures of point clouds. Annotator excels in diverse settings, with a particular focus on active learning (AL), active source-free domain adaptation (ASFDA), and active domain adaptation (ADA). It consistently delivers exceptional performance across LiDAR semantic segmentation benchmarks, spanning both simulation-to-real and real-to-real scenarios. Surprisingly, Annotator exhibits remarkable efficiency, requiring significantly fewer annotations, e.g., just labeling five voxels per scan in the SynLiDAR \u2192 SemanticKITTI task. This results in impressive performance, achieving 87.8% fully-supervised performance under AL, 88.5% under ASFDA, and 94.4% under ADA. We envision that Annotator will offer a simple, general, and efficient solution for label-efficient 3D applications.", "keywords": "Active Learning;LiDAR Semantic Segmentation;Domain Adaptation", "primary_area": "", "supplementary_material": "/attachment/45bf1072aad68510aa82a1b546aafd8ca71de8ab.pdf", "author": "Binhui Xie;Shuang Li;qingju guo;Chi Harold Liu;Xinjing Cheng", "authorids": "~Binhui_Xie1;~Shuang_Li6;~qingju_guo1;~Chi_Harold_Liu1;~Xinjing_Cheng1", "gender": "M;M;M;M;M", "homepage": "https://binhuixie.github.io/;https://shuangli.xyz;;;https://github.com/WAAutomation", "dblp": ";43/6294-8;45/4723.html;217/1937;", "google_scholar": "cbVMMCwAAAAJ;VXCiAc4AAAAJ;3IgFTEkAAAAJ;8QbRVCsAAAAJ;", "orcid": ";0000-0001-6807-9905;;;", "linkedin": ";;;;", "or_profile": "~Binhui_Xie1;~Shuang_Li6;~Chi_Harold_Liu1;~Xinjing_Cheng1;~\u5e86\u4e3e_\u90ed1", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Inceptio;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;inceptio.ai;bit.edu.cn", "position": "PhD student;Associate Professor;Full Professor;Research Scientist;Undergrad student", "bibtex": "@inproceedings{\nxie2023annotator,\ntitle={Annotator: A Generic Active Learning Baseline for Li{DAR} Semantic Segmentation},\nauthor={Binhui Xie and Shuang Li and qingju guo and Chi Harold Liu and Xinjing Cheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uiiVSVADDc}\n}", "github": "", "project": "", "reviewers": "KrDz;a35W;dFZX;N28Y;zYAC", "pdf_size": 8120362, "rating": "6;6;6;6;7", "confidence": "3;4;4;4;4", "soundness": "3;3;2;2;3", "novelty": "1;3;2;2;3", "presentation": "2;3;3;2;3", "wc_summary": "47;76;76;125;127", "wc_strengths": "52;70;55;172;112", "wc_weaknesses": "135;79;138;195;114", "wc_questions": "56;46;50;41;95", "wc_limitations": "2;26;7;18;16", "wc_review": "292;297;326;551;464", "wc_reply_reviewers": "117;61;0;102;0", "wc_reply_authors": "34;133;0;25;0", "reply_reviewers": "1;2;0;1;0", "reply_authors": "2;2;1;2;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 90.2, 31.09598044763985 ], "wc_strengths_avg": [ 92.2, 45.2831094338717 ], "wc_weaknesses_avg": [ 132.2, 37.806877681183884 ], "wc_questions_avg": [ 57.6, 19.334942461771124 ], "wc_limitations_avg": [ 13.8, 8.44748483277715 ], "wc_review_avg": [ 386.0, 103.60115829468317 ], "wc_reply_reviewers_avg": [ 56.0, 49.262561849745495 ], "wc_reply_authors_avg": [ 38.4, 49.18780336628176 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.2500000000000001, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10308772155823901742&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "bit.edu.cn;bit.edu.cn;bit.edu.cn;inceptio.ai;bit.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Beijing Institute of Technology;Inceptio", "aff_unique_dep": ";", "aff_unique_url": "http://www.bit.edu.cn/;", "aff_unique_abbr": "BIT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "title": "Enhancing Knowledge Transfer for Task Incremental Learning with Data-free Subnetwork", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70139", "id": "uj9PxVTVqq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d7b3cef7c31b94a4a533db83d01a8882-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uj9PxVTVqq", "openreview": "https://openreview.net/forum?id=uj9PxVTVqq", "poster": "/media/PosterPDFs/NeurIPS%202023/70139.png?t=1700636376.8246949", "slides": "https://nips.cc/virtual/2023/poster/70139", "video": "https://nips.cc/virtual/2023/poster/70139", "author_site": "Qiang Gao, Xiaojun Shan, Yuchen Zhang, Fan Zhou", "tldr": "", "abstract": "As there exist competitive subnetworks within a dense network in concert with Lottery Ticket Hypothesis, we introduce a novel neuron-wise task incremental learning method, namely Data-free Subnetworks (DSN), which attempts to enhance the elastic knowledge transfer across the tasks that sequentially arrive. Specifically, DSN primarily seeks to transfer knowledge to the new coming task from the learned tasks by selecting the affiliated weights of a small set of neurons to be activated, including the reused neurons from prior tasks via neuron-wise masks. And it also transfers possibly valuable knowledge to the earlier tasks via data-free replay. Especially, DSN inherently relieves the catastrophic forgetting and the unavailability of past data or possible privacy concerns. The comprehensive experiments conducted on four benchmark datasets demonstrate the effectiveness of the proposed DSN in the context of task-incremental learning by comparing it to several state-of-the-art baselines. In particular, DSN enables the knowledge transfer to the earlier tasks, which is often overlooked by prior efforts.", "keywords": "data-free subnetwork;task-incremental learning;knowledge transfer;mask", "primary_area": "", "supplementary_material": "/attachment/91f067cf5e8b959d6e7f79699a0e58079e582e86.pdf", "author": "Qiang Gao;Xiaojun Shan;Yuchen Zhang;Fan Zhou", "authorids": "~Qiang_Gao1;xiaojunshan@std.uestc.edu.cn;yuchenzhang@std.uestc.edu.cn;~Fan_Zhou11", "gender": "M;;;M", "homepage": "https://qianggao.xyz/;;;https://sise.uestc.edu.cn/info/1035/9375.htm", "dblp": "43/5917-3;;;63/3122-2", "google_scholar": "3KPOGeAAAAAJ;;;https://scholar.google.com.hk/citations?hl=zh-CN", "orcid": "0000-0002-9621-5414;;;0000-0002-8038-8150", "linkedin": ";;;", "or_profile": "~Qiang_Gao1;xiaojunshan@std.uestc.edu.cn;yuchenzhang@std.uestc.edu.cn;~Fan_Zhou11", "aff": "Southwestern University of Finance and Economics;;;University of Electronic Science and Technology of China", "aff_domain": "swufe.edu.cn;;;uestc.edu.cn", "position": "Associate Professor;;;Full Professor", "bibtex": "@inproceedings{\ngao2023enhancing,\ntitle={Enhancing Knowledge Transfer for Task Incremental Learning with Data-free Subnetwork},\nauthor={Qiang Gao and Xiaojun Shan and Yuchen Zhang and Fan Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uj9PxVTVqq}\n}", "github": "", "project": "", "reviewers": "Um41;ai52;iEXK;cKSd;bsXa", "pdf_size": 1048053, "rating": "4;4;5;6;6", "confidence": "4;4;3;4;4", "soundness": "2;3;2;4;3", "novelty": "2;3;3;3;3", "presentation": "2;3;2;4;3", "wc_summary": "53;32;59;174;67", "wc_strengths": "54;23;135;129;71", "wc_weaknesses": "252;132;63;236;346", "wc_questions": "91;6;1;134;154", "wc_limitations": "17;1;1;93;8", "wc_review": "467;194;259;766;646", "wc_reply_reviewers": "238;0;259;221;66", "wc_reply_authors": "940;0;372;809;302", "reply_reviewers": "2;0;1;2;1", "reply_authors": "3;1;2;3;3", "rating_avg": [ 5.0, 0.8944271909999159 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 77.0, 49.86782529848279 ], "wc_strengths_avg": [ 82.4, 43.36634639902237 ], "wc_weaknesses_avg": [ 205.8, 98.52999543286298 ], "wc_questions_avg": [ 77.2, 63.546518394008025 ], "wc_limitations_avg": [ 24.0, 34.99714274051526 ], "wc_review_avg": [ 466.4, 218.7387482820545 ], "wc_reply_reviewers_avg": [ 156.8, 103.91419537291331 ], "wc_reply_authors_avg": [ 484.6, 344.52378727745344 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.4, 0.8 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6343866483442049062&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "swufe.edu.cn;;;uestc.edu.cn", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Southwestern University of Finance and Economics;University of Electronic Science and Technology of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.swufe.edu.cn;https://www.uestc.edu.cn", "aff_unique_abbr": "SWUFE;UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "umHruATvCD", "title": "Learning Interpretable Characteristic Kernels via Decision Forests", "track": "main", "status": "Reject", "tldr": "", "abstract": "Decision forests are popular tools for classification and regression. These forests naturally generate proximity matrices that measure the frequency of observations appearing in the same leaf node. While other kernels are known to have strong theoretical properties such as being characteristic, there is no similar result available for decision forest-based kernels. In addition, existing approaches to independence and k-sample testing may require unfeasibly large sample sizes and are not interpretable. In this manuscript, we prove that the decision forest induced proximity is a characteristic kernel, enabling consistent independence and k-sample testing via decision forests. We leverage this to introduce kernel mean embedding random forest (KMERF), which is a valid and consistent method for independence and k-sample testing. Our extensive simulations demonstrate that KMERF outperforms other tests across a variety of independence and two-sample testing scenarios. Additionally, the test is interpretable, and its key features are readily discernible. This work therefore demonstrates the existence of a test that is both more powerful and more interpretable than existing methods, flying in the face of conventional wisdom of the trade-off between the two.", "keywords": "kernel learning;random forest;hypothesis testing", "primary_area": "", "supplementary_material": "/attachment/fb27689866c11c5b412667856dc13e3326c948bb.zip", "author": "Sambit Panda;Cencheng Shen;Joshua T Vogelstein", "authorids": "~Sambit_Panda1;~Cencheng_Shen2;~Joshua_T_Vogelstein1", "gender": "M;;M", "homepage": "https://sampan.me/;;https://neurodata.io/", "dblp": ";;04/700", "google_scholar": "-V3CmPoAAAAJ;;DWPfdT4AAAAJ", "orcid": "0000-0001-8455-4243;;0000-0003-2487-6237", "linkedin": "sampan501/;;jovo1/", "or_profile": "~Sambit_Panda1;~Cencheng_Shen2;~Joshua_T_Vogelstein1", "aff": "Johns Hopkins University;;Johns Hopkins University", "aff_domain": "jhu.edu;;jhu.edu", "position": "PhD student;;Associate Professor", "bibtex": "@misc{\npanda2023learning,\ntitle={Learning Interpretable Characteristic Kernels via Decision Forests},\nauthor={Sambit Panda and Cencheng Shen and Joshua T Vogelstein},\nyear={2023},\nurl={https://openreview.net/forum?id=umHruATvCD}\n}", "github": "", "project": "", "reviewers": "h7qn;11WJ;qW4w;CaNi;zSKe", "site": "https://openreview.net/forum?id=umHruATvCD", "pdf_size": 333036, "rating": "4;5;6;6;6", "confidence": "1;3;3;4;4", "soundness": "3;2;4;3;3", "novelty": "2;2;4;3;2", "presentation": "1;2;4;3;3", "wc_summary": "16;24;72;113;167", "wc_strengths": "2;21;63;83;88", "wc_weaknesses": "105;78;101;36;145", "wc_questions": "11;63;21;23;45", "wc_limitations": "2;30;50;11;15", "wc_review": "136;216;307;266;460", "wc_reply_reviewers": "0;55;35;11;21", "wc_reply_authors": "0;500;548;0;0", "reply_reviewers": "0;2;1;1;1", "reply_authors": "1;2;2;1;1", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 78.4, 56.46450212301531 ], "wc_strengths_avg": [ 51.4, 34.167821118707586 ], "wc_weaknesses_avg": [ 93.0, 35.73793502708292 ], "wc_questions_avg": [ 32.6, 18.821264569629747 ], "wc_limitations_avg": [ 21.6, 16.835676404587968 ], "wc_review_avg": [ 277.0, 107.8072353787073 ], "wc_reply_reviewers_avg": [ 24.4, 19.158288023724875 ], "wc_reply_authors_avg": [ 209.6, 257.1548949563278 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.912870929175277, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6887284855751361522&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Energy-Based Sliced Wasserstein Distance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70138", "id": "umvV3yvo4N", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3a23caeb904c822575fa56fb114ca499-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=umvV3yvo4N", "openreview": "https://openreview.net/forum?id=umvV3yvo4N", "poster": "/media/PosterPDFs/NeurIPS%202023/70138.png?t=1699042483.5151682", "slides": "https://nips.cc/virtual/2023/poster/70138", "video": "https://nips.cc/virtual/2023/poster/70138", "author_site": "Khai Nguyen, Nhat Ho", "tldr": "", "abstract": "The sliced Wasserstein (SW) distance has been widely recognized as a statistically effective and computationally efficient metric between two probability measures. A key component of the SW distance is the slicing distribution. There are two existing approaches for choosing this distribution. The first approach is using a fixed prior distribution. The second approach is optimizing for the best distribution which belongs to a parametric family of distributions and can maximize the expected distance. However, both approaches have their limitations. A fixed prior distribution is non-informative in terms of highlighting projecting directions that can discriminate two general probability measures. Doing optimization for the best distribution is often expensive and unstable. Moreover, designing the parametric family of the candidate distribution could be easily misspecified. To address the issues, we propose to design the slicing distribution as an energy-based distribution that is parameter-free and has the density proportional to an energy function of the projected one-dimensional Wasserstein distance. We then derive a novel sliced Wasserstein variant, energy-based sliced Waserstein (EBSW) distance, and investigate its topological, statistical, and computational properties via importance sampling, sampling importance resampling, and Markov Chain methods. Finally, we conduct experiments on point-cloud gradient flow, color transfer, and point-cloud reconstruction to show the favorable performance of the EBSW.", "keywords": "Sliced Wasserstein;Monte Carlo Methods;Point-Cloud;Optimal Transport", "primary_area": "", "supplementary_material": "/attachment/320f62c47858624e5bcc1eb4a46816778430865e.zip", "author": "Khai Nguyen;Nhat Ho", "authorids": "~Khai_Nguyen1;~Nhat_Ho1", "gender": "M;M", "homepage": "https://khainb.com;https://nhatptnk8912.github.io/", "dblp": "120/4308;203/4479", "google_scholar": "im5fNaQAAAAJ;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ", "orcid": ";", "linkedin": ";nhat-pham-minh-ho-267b8164/", "or_profile": "~Khai_Nguyen1;~Nhat_Ho1", "aff": "University of Texas, Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2023energybased,\ntitle={Energy-Based Sliced Wasserstein Distance},\nauthor={Khai Nguyen and Nhat Ho},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=umvV3yvo4N}\n}", "github": "", "project": "", "reviewers": "QgRs;krQV;vaua;dYij", "pdf_size": 6710551, "rating": "5;6;6;6", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;2", "wc_summary": "98;30;207;20", "wc_strengths": "52;23;142;55", "wc_weaknesses": "85;102;233;32", "wc_questions": "5;59;519;85", "wc_limitations": "8;14;19;35", "wc_review": "248;228;1120;227", "wc_reply_reviewers": "211;91;22;12", "wc_reply_authors": "265;73;25;29", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.75, 74.57672223958357 ], "wc_strengths_avg": [ 68.0, 44.51404272810997 ], "wc_weaknesses_avg": [ 113.0, 73.93578294709538 ], "wc_questions_avg": [ 167.0, 205.2656814959578 ], "wc_limitations_avg": [ 19.0, 10.024968827881711 ], "wc_review_avg": [ 455.75, 383.5963861925709 ], "wc_reply_reviewers_avg": [ 84.0, 79.38198788138277 ], "wc_reply_authors_avg": [ 98.0, 98.23950325607312 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8177486242637249986&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "utexas.edu;utexas.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Sample-efficient Multi-objective Molecular Optimization with GFlowNets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70137", "id": "uoG1fLIK2s", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fbc9981dd6316378aee7fd5975250f21-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uoG1fLIK2s", "openreview": "https://openreview.net/forum?id=uoG1fLIK2s", "poster": "/media/PosterPDFs/NeurIPS%202023/70137.png?t=1702970461.9601374", "slides": "https://nips.cc/virtual/2023/poster/70137", "video": "https://nips.cc/virtual/2023/poster/70137", "author_site": "Yiheng Zhu, Jialu Wu, Chaowen Hu, Jiahuan Yan, kim hsieh, Tingjun Hou, Jian Wu", "tldr": "", "abstract": "Many crucial scientific problems involve designing novel molecules with desired properties, which can be formulated as a black-box optimization problem over the *discrete* chemical space. In practice, multiple conflicting objectives and costly evaluations (e.g., wet-lab experiments) make the *diversity* of candidates paramount. Computational methods have achieved initial success but still struggle with considering diversity in both objective and search space. To fill this gap, we propose a multi-objective Bayesian optimization (MOBO) algorithm leveraging the hypernetwork-based GFlowNets (HN-GFN) as an acquisition function optimizer, with the purpose of sampling a diverse batch of candidate molecular graphs from an approximate Pareto front. Using a single preference-conditioned hypernetwork, HN-GFN learns to explore various trade-offs between objectives. We further propose a hindsight-like off-policy strategy to share high-performing molecules among different preferences in order to speed up learning for HN-GFN. We empirically illustrate that HN-GFN has adequate capacity to generalize over preferences. Moreover, experiments in various real-world MOBO settings demonstrate that our framework predominantly outperforms existing methods in terms of candidate quality and sample efficiency. The code is available at https://github.com/violet-sto/HN-GFN.", "keywords": "drug discovery;multi-objective molecular optimization;Bayesian optimization;generative flow networks", "primary_area": "", "supplementary_material": "", "author": "Yiheng Zhu;Jialu Wu;Chaowen Hu;Jiahuan Yan;Chang-Yu Hsieh;Tingjun Hou;Jian Wu", "authorids": "~Yiheng_Zhu3;~Jialu_Wu1;~Chaowen_Hu1;~Jiahuan_Yan1;~Chang-Yu_Hsieh1;~Tingjun_Hou1;~Jian_Wu6", "gender": "M;M;M;M;F;M;", "homepage": ";https://github.com/jhrsya;;;http://cadd.zju.edu.cn;https://scholar.google.com/citations?hl=zh-TW&user=VO9XIXYAAAAJ;", "dblp": ";264/3787;334/7537;140/4305;75/6017;96/2744-1;337/6241", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;_wQJGDcAAAAJ;K-AjhSgAAAAJ;vHW2kqUAAAAJ;https://scholar.google.com/citations?hl=zh-TW;E5p4rjMAAAAJ", "orcid": "0000-0001-8020-9979;;0000-0002-2002-2579;0000-0002-6242-4218;;;", "linkedin": ";;;;;;", "or_profile": "~Yiheng_Zhu3;~Chaowen_Hu1;~Jiahuan_Yan1;~Chang-Yu_Hsieh1;~Tingjun_Hou1;~Jian_Wu6;~wu_jialu1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;MS student;PhD student;Full Professor;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nzhu2023sampleefficient,\ntitle={Sample-efficient Multi-objective Molecular Optimization with {GF}lowNets},\nauthor={Yiheng Zhu and Jialu Wu and Chaowen Hu and Jiahuan Yan and Chang-Yu Hsieh and Tingjun Hou and Jian Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uoG1fLIK2s}\n}", "github": "", "project": "", "reviewers": "hutP;9YCN;Dc63;BkDd", "pdf_size": 846738, "rating": "5;5;6;6", "confidence": "5;3;3;3", "soundness": "4;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;2;4", "wc_summary": "74;52;93;80", "wc_strengths": "113;78;40;165", "wc_weaknesses": "302;22;101;144", "wc_questions": "3;198;115;10", "wc_limitations": "1;59;5;34", "wc_review": "493;409;354;433", "wc_reply_reviewers": "77;43;25;37", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.75, 14.821858857781638 ], "wc_strengths_avg": [ 99.0, 46.02716589146023 ], "wc_weaknesses_avg": [ 142.25, 102.08421768324426 ], "wc_questions_avg": [ 81.5, 80.57449968817679 ], "wc_limitations_avg": [ 24.75, 23.519938350259338 ], "wc_review_avg": [ 422.25, 49.886746736984165 ], "wc_reply_reviewers_avg": [ 45.5, 19.30673457630782 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14818751117008287094&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Minimax Forward and Backward Learning of Evolving Tasks with Performance Guarantees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70136", "id": "uoRiO855Sj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cf4114c34a2b93019aa6e70f99680fae-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uoRiO855Sj", "openreview": "https://openreview.net/forum?id=uoRiO855Sj", "poster": "/media/PosterPDFs/NeurIPS%202023/70136.png?t=1699954239.549194", "slides": "https://nips.cc/virtual/2023/poster/70136", "video": "https://nips.cc/virtual/2023/poster/70136", "author_site": "Veronica Alvarez, Santiago Mazuelas, Jose A. Lozano", "tldr": "", "abstract": "For a sequence of classification tasks that arrive over time, it is common that tasks are evolving in the sense that consecutive tasks often have a higher similarity. The incremental learning of a growing sequence of tasks holds promise to enable accurate classification even with few samples per task by leveraging information from all the tasks in the sequence (forward and backward learning). However, existing techniques developed for continual learning and concept drift adaptation are either designed for tasks with time-independent similarities or only aim to learn the last task in the sequence. This paper presents incremental minimax risk classifiers (IMRCs) that effectively exploit forward and backward learning and account for evolving tasks. In addition, we analytically characterize the performance improvement provided by forward and backward learning in terms of the tasks\u2019 expected quadratic change and the number of tasks. The experimental evaluation shows that IMRCs can result in a significant performance improvement, especially for reduced sample sizes.", "keywords": "Concept drift;Continual learning;Minimax classification;Performance guarantees", "primary_area": "", "supplementary_material": "", "author": "Veronica Alvarez;Santiago Mazuelas;Jose A. Lozano", "authorids": "~Veronica_Alvarez1;~Santiago_Mazuelas1;~Jose_A._Lozano1", "gender": "F;M;M", "homepage": ";https://smazuelas.wordpress.com/;", "dblp": "279/6433;84/6071;", "google_scholar": "ZqAgh24AAAAJ;LW_RYf0AAAAJ;lhzoWpwAAAAJ", "orcid": "0000-0001-9775-5888;0000-0002-6608-8581;", "linkedin": "veronica-alvarez-castro;santiago-mazuelas-85586151/?originalSubdomain=es;", "or_profile": "~Veronica_Alvarez1;~Santiago_Mazuelas1;~Jose_A._Lozano1", "aff": "Basque Center for Applied Mathematics;Basque Center for Applied Mathematics;Basque Center for Applied Mathematics", "aff_domain": "bcamath.org;bcamath.org;bcamath.org", "position": "Postdoc;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nalvarez2023minimax,\ntitle={Minimax Forward and Backward Learning of Evolving Tasks with Performance Guarantees},\nauthor={Veronica Alvarez and Santiago Mazuelas and Jose A. Lozano},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uoRiO855Sj}\n}", "github": "", "project": "", "reviewers": "C5UC;spbD;V6pS;qXgD;3qiG", "pdf_size": 432637, "rating": "5;5;5;6;7", "confidence": "5;1;3;3;4", "soundness": "2;3;3;4;4", "novelty": "3;3;2;4;3", "presentation": "3;3;3;3;3", "wc_summary": "49;99;66;134;137", "wc_strengths": "35;77;28;51;182", "wc_weaknesses": "16;95;78;62;25", "wc_questions": "215;3;90;149;225", "wc_limitations": "2;1;1;7;23", "wc_review": "317;275;263;403;592", "wc_reply_reviewers": "11;15;249;20;148", "wc_reply_authors": "0;0;74;0;153", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;2;1;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.2, 1.32664991614216 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 97.0, 35.32138162643132 ], "wc_strengths_avg": [ 74.6, 56.28001421463929 ], "wc_weaknesses_avg": [ 55.2, 30.32754523531372 ], "wc_questions_avg": [ 136.4, 82.62590392848963 ], "wc_limitations_avg": [ 6.8, 8.4 ], "wc_review_avg": [ 370.0, 121.37215496150672 ], "wc_reply_reviewers_avg": [ 88.6, 95.29029331469182 ], "wc_reply_authors_avg": [ 45.4, 60.95769024495596 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.2638224265055432, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16173561042612311078&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "bcamath.org;bcamath.org;bcamath.org", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Basque Center for Applied Mathematics", "aff_unique_dep": "", "aff_unique_url": "https://www.bcamath.org/", "aff_unique_abbr": "BCAM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Spain" }, { "title": "PriorBand: Practical Hyperparameter Optimization in the Age of Deep Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70135", "id": "uoiwugtpCH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1704fe7aaff33a54802b83a016050ab8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uoiwugtpCH", "openreview": "https://openreview.net/forum?id=uoiwugtpCH", "poster": "/media/PosterPDFs/NeurIPS%202023/70135.png?t=1701875795.0711148", "slides": "https://nips.cc/virtual/2023/poster/70135", "video": "https://nips.cc/virtual/2023/poster/70135", "author_site": "Neeratyoy Mallik, Edward Bergman, Carl Hvarfner, Danny Stoll, Maciej Janowski, Marius Lindauer, Luigi Nardi, Frank Hutter", "tldr": "", "abstract": "Hyperparameters of Deep Learning (DL) pipelines are crucial for their downstream performance. \nWhile a large number of methods for Hyperparameter Optimization (HPO) have been developed, their incurred costs are often untenable for modern DL.\nConsequently, manual experimentation is still the most prevalent approach to optimize hyperparameters, relying on the researcher's intuition, domain knowledge, and cheap preliminary explorations.\nTo resolve this misalignment between HPO algorithms and DL researchers, we propose PriorBand, an HPO algorithm tailored to DL, able to utilize both expert beliefs and cheap proxy tasks. \nEmpirically, we demonstrate PriorBand's efficiency across a range of DL benchmarks and show its gains under informative expert input and robustness against poor expert beliefs.", "keywords": "Hyperparameter Optimization;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/0b2284bf5aff69c888c6eb6ba0970c3f1e49a04d.pdf", "author": "Neeratyoy Mallik;Eddie Bergman;Carl Hvarfner;Danny Stoll;Maciej Janowski;Marius Lindauer;Luigi Nardi;Frank Hutter", "authorids": "~Neeratyoy_Mallik1;~Eddie_Bergman1;~Carl_Hvarfner1;~Danny_Stoll1;~Maciej_Janowski1;~Marius_Lindauer1;~Luigi_Nardi1;~Frank_Hutter1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://ml.informatik.uni-freiburg.de/profile/mallik/;https://github.com/eddiebergman;https://portal.research.lu.se/portal/sv/persons/carl-hvarfner(cd140b82-9fed-4e88-868e-1cf569dcbeb7).html;https://ml.informatik.uni-freiburg.de/profile/stoll/;https://github.com/worstseed;https://www.ai.uni-hannover.de/de/institut/team/lindauer;;http://ml.informatik.uni-freiburg.de/~hutter/", "dblp": "178/9789;;319/3033;232/3297;219/8260;28/9142;60/7206;89/5383", "google_scholar": "https://scholar.google.de/citations?user=SGtKoyMAAAAJ;;https://scholar.google.se/citations?hl=en;;;https://scholar.google.de/citations?user=0Sxx7DUAAAAJ;https://scholar.google.it/citations?user=Kgs3zQoAAAAJ;https://scholar.google.de/citations?user=YUrxwrkAAAAJ", "orcid": ";;;;;;0000-0002-4601-2264;0000-0002-2037-3694", "linkedin": "neeratyoy/;eddie-bergman-356736153/;carl-hvarfner-a97421153/;Danny-Stoll-AI/;;;nardiluigi/;frank-hutter-9190b24b/", "or_profile": "~Neeratyoy_Mallik1;~Eddie_Bergman1;~Carl_Hvarfner1;~Danny_Stoll1;~Maciej_Janowski1;~Marius_Lindauer1;~Luigi_Nardi1;~Frank_Hutter1", "aff": "University of Freiburg;Albert-Ludwigs-Universit\u00e4t Freiburg;Lund University;University of Freiburg;Albert-Ludwigs-Universit\u00e4t Freiburg;Leibniz Universit\u00e4t Hannover;Stanford University;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_domain": "uni-freiburg.de;uni-freiburg.de;lu.se;uni-freiburg.de;uni-freiburg.de;uni-hannover.de;stanford.edu;uni-freiburg.de", "position": "PhD student;Researcher;PhD student;PhD student;PhD student;Full Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nmallik2023priorband,\ntitle={PriorBand: Practical Hyperparameter Optimization in the Age of Deep Learning},\nauthor={Neeratyoy Mallik and Eddie Bergman and Carl Hvarfner and Danny Stoll and Maciej Janowski and Marius Lindauer and Luigi Nardi and Frank Hutter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uoiwugtpCH}\n}", "github": "", "project": "", "reviewers": "meuK;QGiN;ejik;zHth", "pdf_size": 1168495, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "2;3;4;3", "novelty": "3;2;3;2", "presentation": "2;4;4;3", "wc_summary": "67;88;76;100", "wc_strengths": "9;59;46;61", "wc_weaknesses": "221;122;120;112", "wc_questions": "19;74;2;116", "wc_limitations": "56;31;2;18", "wc_review": "372;374;246;407", "wc_reply_reviewers": "17;180;67;21", "wc_reply_authors": "39;509;0;19", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 82.75, 12.43734296383275 ], "wc_strengths_avg": [ 43.75, 20.873128658636684 ], "wc_weaknesses_avg": [ 143.75, 44.75698269544094 ], "wc_questions_avg": [ 52.75, 45.18503623988809 ], "wc_limitations_avg": [ 26.75, 19.76581645164196 ], "wc_review_avg": [ 349.75, 61.4913611818766 ], "wc_reply_reviewers_avg": [ 71.25, 65.78896184011418 ], "wc_reply_authors_avg": [ 141.75, 212.47985198601774 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11507531299476090211&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "uni-freiburg.de;uni-freiburg.de;lu.se;uni-freiburg.de;uni-freiburg.de;uni-hannover.de;stanford.edu;uni-freiburg.de", "author_num": 8, "aff_unique_index": "0;1;2;0;1;3;4;1", "aff_unique_norm": "University of Freiburg;Albert-Ludwigs-Universit\u00e4t Freiburg;Lund University;Leibniz Universit\u00e4t Hannover;Stanford University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.uni-freiburg.de;https://www.uni-freiburg.de;https://www.lunduniversity.lu.se;https://www.leibniz.uni-hannover.de/;https://www.stanford.edu", "aff_unique_abbr": "UoF;Albert-Ludwigs-Universit\u00e4t;LU;LUH;Stanford", "aff_campus_unique_index": "1;1;2;1", "aff_campus_unique": ";Freiburg;Stanford", "aff_country_unique_index": "0;0;1;0;0;0;2;0", "aff_country_unique": "Germany;Sweden;United States" }, { "title": "Optimal approximation using complex-valued neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70134", "id": "uotGmrcooz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/05b69cc4c8ff6e24c5de1ecd27223d37-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uotGmrcooz", "openreview": "https://openreview.net/forum?id=uotGmrcooz", "poster": "/media/PosterPDFs/NeurIPS%202023/70134.png?t=1701420877.9401455", "slides": "https://nips.cc/virtual/2023/poster/70134", "video": "https://nips.cc/virtual/2023/poster/70134", "author_site": "Paul Geuchen, Felix Voigtlaender", "tldr": "", "abstract": "Complex-valued neural networks (CVNNs) have recently shown promising empirical success, for instance for increasing the stability of recurrent neural networks and for improving the performance in tasks with complex-valued inputs, such as MRI fingerprinting. While the overwhelming success of Deep Learning in the real-valued case is supported by a growing mathematical foundation, such a foundation is still largely lacking in the complex-valued case. We thus analyze the expressivity of CVNNs by studying their approximation properties. Our results yield the first quantitative approximation bounds for CVNNs that apply to a wide class of activation functions including the popular modReLU and complex cardioid activation functions. Precisely, our results apply to any activation function that is smooth but not polyharmonic on some non-empty open set; this is the natural generalization of the class of smooth and non-polynomial activation functions to the complex setting. Our main result shows that the approximation error scales as $m^{-k/(2n)}$ for $m \\to \\infty$ where $m$ is the number of neurons, $k$ the smoothness of the target function and $n$ is the (complex) input dimension. Under a natural continuity assumption, we show that this rate is optimal; we further discuss the optimality when dropping this assumption. Moreover, we prove that the problem of approximating $C^k$-functions using continuous approximation methods unavoidably suffers from the curse of dimensionality.", "keywords": "complex-valued neural networks;approximation rates", "primary_area": "", "supplementary_material": "", "author": "Paul Geuchen;Felix Voigtlaender", "authorids": "~Paul_Geuchen1;~Felix_Voigtlaender1", "gender": "M;M", "homepage": "https://www.ku.de/mgf/mathematik/lehrstuehle-professuren/lehrstuhl-fuer-reliable-machine-learning/team-des-lehrstuhls;http://voigtlaender.xyz/", "dblp": "344/1196;81/9894", "google_scholar": ";https://scholar.google.de/citations?user=gdF7t4wAAAAJ", "orcid": ";0000-0002-5061-2756", "linkedin": ";", "or_profile": "~Paul_Geuchen1;~Felix_Voigtlaender1", "aff": "Katholische Universit\u00e4t Eichst\u00e4tt-Ingolstadt;Katholische Universit\u00e4t Eichst\u00e4tt-Ingolstadt", "aff_domain": "ku.de;ku.de", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ngeuchen2023optimal,\ntitle={Optimal approximation using complex-valued neural networks},\nauthor={Paul Geuchen and Felix Voigtlaender},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uotGmrcooz}\n}", "github": "", "project": "", "reviewers": "GqHo;yApE;6LvE;ve6Y", "pdf_size": 2538396, "rating": "3;5;7;7", "confidence": "4;3;4;3", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;4;3", "wc_summary": "229;60;89;132", "wc_strengths": "35;52;171;71", "wc_weaknesses": "175;44;226;163", "wc_questions": "336;4;32;67", "wc_limitations": "54;1;2;54", "wc_review": "829;161;520;487", "wc_reply_reviewers": "126;0;216;124", "wc_reply_authors": "539;0;262;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 127.5, 63.955062348495915 ], "wc_strengths_avg": [ 82.25, 52.79855585146245 ], "wc_weaknesses_avg": [ 152.0, 66.68957939588464 ], "wc_questions_avg": [ 109.75, 132.5186307656399 ], "wc_limitations_avg": [ 27.75, 26.252380844411046 ], "wc_review_avg": [ 499.25, 236.49986786465652 ], "wc_reply_reviewers_avg": [ 116.5, 76.84237112426972 ], "wc_reply_authors_avg": [ 200.25, 222.91520248740326 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14942615877444721778&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "ku.de;ku.de", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Katholische Universit\u00e4t Eichst\u00e4tt-Ingolstadt", "aff_unique_dep": "", "aff_unique_url": "https://www.ku-eichstaett.de", "aff_unique_abbr": "KU Eichst\u00e4tt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Fused Gromov-Wasserstein Graph Mixup for Graph-level Classifications", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70133", "id": "uqkUguNu40", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3173c427cb4ed2d5eaab029c17f221ae-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uqkUguNu40", "openreview": "https://openreview.net/forum?id=uqkUguNu40", "poster": "/media/PosterPDFs/NeurIPS%202023/70133.png?t=1698904596.9482522", "slides": "https://nips.cc/virtual/2023/poster/70133", "video": "https://nips.cc/virtual/2023/poster/70133", "author_site": "Xinyu Ma, Xu Chu, Yasha Wang, Yang Lin, Junfeng Zhao, Liantao Ma, Wenwu Zhu", "tldr": "", "abstract": "Graph data augmentation has shown superiority in enhancing generalizability and robustness of GNNs in graph-level classifications. However, existing methods primarily focus on the augmentation in the graph signal space and the graph structure space independently, neglecting the joint interaction between them. In this paper, we address this limitation by formulating the problem as an optimal transport problem that aims to find an optimal inter-graph node matching strategy considering the interactions between graph structures and signals. To solve this problem, we propose a novel graph mixup algorithm called FGWMixup, which seeks a \"midpoint\" of source graphs in the Fused Gromov-Wasserstein (FGW) metric space. To enhance the scalability of our method, we introduce a relaxed FGW solver that accelerates FGWMixup by improving the convergence rate from $\\mathcal{O}(t^{-1})$ to $\\mathcal{O}(t^{-2})$. Extensive experiments conducted on five datasets using both classic (MPNNs) and advanced (Graphormers) GNN backbones demonstrate that \\mname\\xspace effectively improves the generalizability and robustness of GNNs. Codes are available at https://github.com/ArthurLeoM/FGWMixup.", "keywords": "Graph Data Augmentation;Graph Mixup;Fused Gromov Wasserstein", "primary_area": "", "supplementary_material": "/attachment/bcca874dc5d120ca39eaedc20cf30e9834970438.zip", "author": "Xinyu Ma;Xu Chu;Yasha Wang;Yang Lin;Junfeng Zhao;Liantao Ma;Wenwu Zhu", "authorids": "~Xinyu_Ma3;~Xu_Chu1;~Yasha_Wang3;~Yang_Lin2;~Junfeng_Zhao1;~Liantao_Ma1;~Wenwu_Zhu1", "gender": "M;;M;M;F;Not Specified;M", "homepage": ";;;;https://cs.pku.edu.cn/info/1084/1224.htm;https://scholar.google.com/citations?view_op=list_works&hl=en&user=necbkJkAAAAJ;http://media.cs.tsinghua.edu.cn/en/zww", "dblp": "43/7894;;70/2725.html;59/5166;72/3918-1;193/6198;97/6308-1.html", "google_scholar": "ygvzwbUAAAAJ;;;https://scholar.google.com.hk/citations?user=oAffgtgAAAAJ;;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.tw/citations?user=7t2jzpgAAAAJ", "orcid": "0000-0003-4574-0830;;;;;0000-0001-5233-0624;0000-0003-2236-9290", "linkedin": ";;;;;;", "or_profile": "~Xinyu_Ma3;~Xu_Chu1;~Yasha_Wang3;~Yang_Lin2;~Junfeng_Zhao1;~Liantao_Ma1;~Wenwu_Zhu1", "aff": "Peking University;;Peking University;Peking University;Peking University;Peking University;Tsinghua University", "aff_domain": "pku.edu.cn;;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;tsinghua.edu.cn", "position": "PhD student;;Full Professor;PhD student;Full Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\nma2023fused,\ntitle={Fused Gromov-Wasserstein Graph Mixup for Graph-level Classifications},\nauthor={Xinyu Ma and Xu Chu and Yasha Wang and Yang Lin and Junfeng Zhao and Liantao Ma and Wenwu Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uqkUguNu40}\n}", "github": "", "project": "", "reviewers": "ZCHq;Ake3;WT1n;JSkL;Cjiv", "pdf_size": 839218, "rating": "4;5;6;6;6", "confidence": "3;4;5;3;4", "soundness": "2;3;2;3;3", "novelty": "2;3;2;2;2", "presentation": "3;3;2;2;2", "wc_summary": "69;71;100;108;98", "wc_strengths": "38;25;152;181;72", "wc_weaknesses": "187;149;1014;173;79", "wc_questions": "61;32;64;32;199", "wc_limitations": "10;11;46;26;1", "wc_review": "365;288;1376;520;449", "wc_reply_reviewers": "155;68;850;34;95", "wc_reply_authors": "483;55;973;25;73", "reply_reviewers": "1;1;3;1;1", "reply_authors": "2;2;4;2;2", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 89.2, 16.04244370412438 ], "wc_strengths_avg": [ 93.6, 62.15014078825566 ], "wc_weaknesses_avg": [ 320.4, 348.78509142450457 ], "wc_questions_avg": [ 77.6, 62.220896811280376 ], "wc_limitations_avg": [ 18.8, 15.791136754521505 ], "wc_review_avg": [ 599.6, 395.96595813276673 ], "wc_reply_reviewers_avg": [ 240.4, 307.36597079052194 ], "wc_reply_authors_avg": [ 321.8, 366.39344972310846 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 2.4, 0.8 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4677071733467428, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1633385159726523691&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "pku.edu.cn;;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;tsinghua.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Peking University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Peking U;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "All Points Matter: Entropy-Regularized Distribution Alignment for Weakly-supervised 3D Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70132", "id": "utQms7PPx5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f86c5c4d4dca70d30b1c12a33a2bc1a4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=utQms7PPx5", "openreview": "https://openreview.net/forum?id=utQms7PPx5", "poster": "/media/PosterPDFs/NeurIPS%202023/70132.png?t=1699273629.2435324", "slides": "https://nips.cc/virtual/2023/poster/70132", "video": "https://nips.cc/virtual/2023/poster/70132", "author_site": "Liyao Tang, Zhe Chen, Shanshan Zhao, Chaoyue Wang, Dacheng Tao", "tldr": "", "abstract": "Pseudo-labels are widely employed in weakly supervised 3D segmentation tasks where only sparse ground-truth labels are available for learning.\nExisting methods often rely on empirical label selection strategies, such as confidence thresholding, to generate beneficial pseudo-labels for model training.\nThis approach may, however, hinder the comprehensive exploitation of unlabeled data points.\nWe hypothesize that this selective usage arises from the noise in pseudo-labels generated on unlabeled data. The noise in pseudo-labels may result in significant discrepancies between pseudo-labels and model predictions, thus confusing and affecting the model training greatly.\nTo address this issue, we propose a novel learning strategy to regularize the generated pseudo-labels and effectively narrow the gaps between pseudo-labels and model predictions.\nMore specifically, our method introduces an Entropy Regularization loss and a Distribution Alignment loss for weakly supervised learning in 3D segmentation tasks, resulting in an ERDA learning strategy.\nInterestingly, by using KL distance to formulate the distribution alignment loss, it reduces to a deceptively simple cross-entropy-based loss which optimizes both the pseudo-label generation network and the 3D segmentation network simultaneously.\nDespite the simplicity, our method promisingly improves the performance.\nWe validate the effectiveness through extensive experiments on various baselines and large-scale datasets.\nResults show that ERDA effectively enables the effective usage of all unlabeled data points for learning and achieves state-of-the-art performance under different settings.\nRemarkably, our method can outperform fully-supervised baselines using only 1\\% of true annotations.\nCode and model will be made publicly available at https://github.com/LiyaoTang/ERDA.", "keywords": "point cloud segmentation;weak supervision", "primary_area": "", "supplementary_material": "/attachment/67cad8edb004492d7555afd40c809d62963636dc.pdf", "author": "Liyao Tang;Zhe Chen;Shanshan Zhao;Chaoyue Wang;Dacheng Tao", "authorids": "~Liyao_Tang1;~Zhe_Chen5;~Shanshan_Zhao2;~Chaoyue_Wang2;~Dacheng_Tao1", "gender": ";M;M;M;", "homepage": "https://github.com/LiyaoTang;;https://sshan-zhao.github.io/;;", "dblp": "315/9210;06/4240-13;;174/7172;", "google_scholar": "Q4ozmNYAAAAJ;https://scholar.google.cz/citations?user=Jgt6vEAAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com.au/citations?user=ioj1BycAAAAJ;", "orcid": ";0000-0001-5004-8975;0000-0003-0682-8645;;", "linkedin": ";zhe-chen-ba941b135/;;;", "or_profile": "~Liyao_Tang1;~Zhe_Chen5;~Shanshan_Zhao2;~Chaoyue_Wang2;~Dacheng_Tao1", "aff": "University of Sydney, University of Sydney;University of Sydney;JD Explore Academy;JD.com;", "aff_domain": "uni.sydney.edu.au;sydney.edu.au;jd.com;jd.com;", "position": "PhD student;Postdoc;Researcher;Researcher;", "bibtex": "@inproceedings{\ntang2023all,\ntitle={All Points Matter: Entropy-Regularized Distribution Alignment for Weakly-supervised 3D Segmentation},\nauthor={Liyao Tang and Zhe Chen and Shanshan Zhao and Chaoyue Wang and Dacheng Tao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=utQms7PPx5}\n}", "github": "", "project": "", "reviewers": "ZBhv;aBL8;qxVg;iXj3;7SFL", "pdf_size": 3561232, "rating": "5;5;6;7;7", "confidence": "4;4;3;4;4", "soundness": "3;3;3;4;4", "novelty": "3;3;3;3;3", "presentation": "3;3;2;4;3", "wc_summary": "55;47;71;77;111", "wc_strengths": "36;13;26;45;190", "wc_weaknesses": "95;382;303;214;347", "wc_questions": "2;3;161;51;139", "wc_limitations": "2;10;1;10;16", "wc_review": "190;455;562;397;803", "wc_reply_reviewers": "19;15;143;17;27", "wc_reply_authors": "8;8;117;13;13", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 72.2, 22.184679398179277 ], "wc_strengths_avg": [ 62.0, 64.87834769782597 ], "wc_weaknesses_avg": [ 268.2, 103.25386191324758 ], "wc_questions_avg": [ 71.2, 67.09515630803762 ], "wc_limitations_avg": [ 7.8, 5.6000000000000005 ], "wc_review_avg": [ 481.4, 201.32421612910852 ], "wc_reply_reviewers_avg": [ 44.2, 49.56773143891094 ], "wc_reply_authors_avg": [ 31.8, 42.65864507928025 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6832693675069803411&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uni.sydney.edu.au;sydney.edu.au;jd.com;jd.com;", "author_num": 5, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Sydney;JD;JD.com", "aff_unique_dep": ";JD Explore Academy;", "aff_unique_url": "https://www.sydney.edu.au;;https://www.jd.com", "aff_unique_abbr": "USYD;;JD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;2", "aff_country_unique": "Australia;;China" }, { "title": "Can semi-supervised learning use all the data effectively? A lower bound perspective", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70131", "id": "utreNaM1VY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/458fa8ee331566383d8e74bdb647f829-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=utreNaM1VY", "openreview": "https://openreview.net/forum?id=utreNaM1VY", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70131", "video": "https://nips.cc/virtual/2023/poster/70131", "author_site": "Alexandru Tifrea, Gizem Y\u00fcce, Amartya Sanyal, Fanny Yang", "tldr": "", "abstract": "Prior theoretical and empirical works have established that semi-supervised learning algorithms can leverage the unlabeled data to improve over the labeled sample complexity of supervised learning (SL) algorithms. However, existing theoretical work focuses on regimes where the unlabeled data is sufficient to learn a good decision boundary using unsupervised learning (UL) alone. This begs the question: Can SSL algorithms simultaneously improve upon both UL and SL? To this end, we derive a tight lower bound for 2-Gaussian mixture models that explicitly depends on the labeled and the unlabeled dataset size as well as the signal-to-noise ratio of the mixture distribution. Surprisingly, our result implies that no SSL algorithm improves upon the minimax-optimal statistical error rates of SL or UL algorithms for these distributions. Nevertheless, in our real-world experiments, SSL algorithms can often outperform UL and SL algorithms. In summary, our work suggests that while it is possible to prove the performance gains of SSL algorithms, this would require careful tracking of constants in the theoretical analysis.", "keywords": "semi-supervised learning;statistical lower bound", "primary_area": "", "supplementary_material": "/attachment/5a5f85f7d8d9dce4c739123f16f0aa009d37f0fe.pdf", "author": "Alexandru Tifrea;Gizem Y\u00fcce;Amartya Sanyal;Fanny Yang", "authorids": "~Alexandru_Tifrea1;~Gizem_Y\u00fcce1;~Amartya_Sanyal1;~Fanny_Yang1", "gender": "M;F;M;", "homepage": ";;https://amartya18x.github.io;http://www.fanny-yang.de", "dblp": "183/4666;308/0499;203/8807;126/4852", "google_scholar": "i7T1FUsAAAAJ;;;BfDKicQAAAAJ", "orcid": ";;0000-0002-4190-0449;", "linkedin": ";gizem-y%C3%BCce-026795158;;", "or_profile": "~Alexandru_Tifrea1;~Gizem_Y\u00fcce1;~Amartya_Sanyal1;~Fanny_Yang1", "aff": "Swiss Federal Institute of Technology;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "PhD student;MS student;Postdoc;Professor", "bibtex": "@inproceedings{\ntifrea2023can,\ntitle={Can semi-supervised learning use all the data effectively? A lower bound perspective},\nauthor={Alexandru Tifrea and Gizem Y{\\\"u}ce and Amartya Sanyal and Fanny Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=utreNaM1VY}\n}", "github": "", "project": "", "reviewers": "KqNv;4XfH;9F3v;N4Bc", "pdf_size": 640315, "rating": "5;6;7;8", "confidence": "2;3;4;3", "soundness": "3;3;3;4", "novelty": "3;3;4;4", "presentation": "3;3;3;4", "wc_summary": "57;72;64;35", "wc_strengths": "48;46;57;69", "wc_weaknesses": "81;17;32;27", "wc_questions": "8;82;67;50", "wc_limitations": "20;19;1;7", "wc_review": "214;236;221;188", "wc_reply_reviewers": "0;10;0;26", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 57.0, 13.765899897936205 ], "wc_strengths_avg": [ 55.0, 9.082951062292475 ], "wc_weaknesses_avg": [ 39.25, 24.70197360536198 ], "wc_questions_avg": [ 51.75, 27.680092124124155 ], "wc_limitations_avg": [ 11.75, 8.042853971072706 ], "wc_review_avg": [ 214.75, 17.36915369268175 ], "wc_reply_reviewers_avg": [ 9.0, 10.63014581273465 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6324555320336758, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Swiss Federal Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Training Your Image Restoration Network Better with Random Weight Network as Optimization Function", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70130", "id": "uv3ge0goPa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/043f0503c4f652c737add3690aa5d12c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uv3ge0goPa", "openreview": "https://openreview.net/forum?id=uv3ge0goPa", "poster": "/media/PosterPDFs/NeurIPS%202023/70130.png?t=1701424303.0480063", "slides": "https://nips.cc/virtual/2023/poster/70130", "video": "https://nips.cc/virtual/2023/poster/70130", "author_site": "man zhou, Naishan Zheng, Yuan Xu, Chun-Le Guo, Chongyi Li", "tldr": "", "abstract": "The blooming progress made in deep learning-based image restoration has been largely attributed to the availability of high-quality, large-scale datasets and advanced network structures. However, optimization functions such as L_1 and L_2 are still de facto. In this study, we propose to investigate new optimization functions to improve image restoration performance. Our key insight is that ``random weight network can be acted as a constraint for training better image restoration networks''. However, not all random weight networks are suitable as constraints. We draw inspiration from Functional theory and show that alternative random weight networks should be represented in the form of a strict mathematical manifold. We explore the potential of our random weight network prototypes that satisfy this requirement: Taylor's unfolding network, invertible neural network, central difference convolution, and zero-order filtering. We investigate these prototypes from four aspects: 1) random weight strategies, 2) network architectures, 3) network depths, and 4) combinations of random weight networks. Furthermore, we devise the random weight in two variants: the weights are randomly initialized only once during the entire training procedure, and the weights are randomly initialized in each training epoch. Our approach can be directly integrated into existing networks without incurring additional training and testing computational costs. We perform extensive experiments across multiple image restoration tasks, including image denoising, low-light image enhancement, and guided image super-resolution to demonstrate the consistent performance gains achieved by our method. Upon acceptance of this paper, we will release the code.", "keywords": "Image restoration;low-light image enhancement;image de-noising", "primary_area": "", "supplementary_material": "/attachment/d4f39dcef006deabd393f0f2ec42430e9dc8cf26.pdf", "author": "Man Zhou;Naishan Zheng;Yuan Xu;Chun-Le Guo;Chongyi Li", "authorids": "~Man_Zhou4;~Naishan_Zheng1;~Yuan_Xu3;~Chun-Le_Guo1;~Chongyi_Li1", "gender": ";M;M;;", "homepage": ";;;;", "dblp": ";324/4929;89/3127;;", "google_scholar": ";aL_WRTkAAAAJ;https://scholar.google.com/citations?hl=en;;", "orcid": ";0000-0002-7451-8780;;;", "linkedin": ";;;;", "or_profile": "~Man_Zhou4;~Naishan_Zheng1;~Yuan_Xu3;~Chun-Le_Guo1;~Chongyi_Li1", "aff": ";University of Science and Technology of China;Nanyang Technological University;;", "aff_domain": ";ustc.edu.cn;ntu.edu.sg;;", "position": ";PhD student;Postdoc;;", "bibtex": "@inproceedings{\nzhou2023training,\ntitle={Training Your Image Restoration Network Better with Random Weight Network as Optimization Function},\nauthor={Man Zhou and Naishan Zheng and Yuan Xu and Chun-Le Guo and Chongyi Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uv3ge0goPa}\n}", "github": "", "project": "", "reviewers": "UxgD;d53F;VxMP;Bde2", "pdf_size": 1301410, "rating": "4;7;8;8", "confidence": "5;4;5;5", "soundness": "2;3;4;4", "novelty": "2;2;3;4", "presentation": "3;3;4;3", "wc_summary": "84;66;61;80", "wc_strengths": "26;47;103;137", "wc_weaknesses": "298;221;96;65", "wc_questions": "5;8;17;3", "wc_limitations": "38;21;14;14", "wc_review": "451;363;291;299", "wc_reply_reviewers": "0;20;43;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 1.6393596310755 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.75, 9.522998477370455 ], "wc_strengths_avg": [ 78.25, 44.075928804734225 ], "wc_weaknesses_avg": [ 170.0, 94.1886405040438 ], "wc_questions_avg": [ 8.25, 5.356071321407137 ], "wc_limitations_avg": [ 21.75, 9.807522622966516 ], "wc_review_avg": [ 351.0, 64.1248781675256 ], "wc_reply_reviewers_avg": [ 15.75, 17.725334975678173 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.08804509063256237, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1670532785623382401&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";ustc.edu.cn;ntu.edu.sg;;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "University of Science and Technology of China;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.ntu.edu.sg", "aff_unique_abbr": "USTC;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;Singapore" }, { "title": "Dynamic Context Pruning for Efficient and Interpretable Autoregressive Transformers", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70129", "id": "uvdJgFFzby", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cdaac2a02c4fdcae77ba083b110efcc3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uvdJgFFzby", "openreview": "https://openreview.net/forum?id=uvdJgFFzby", "poster": "/media/PosterPDFs/NeurIPS%202023/70129.png?t=1701802735.570968", "slides": "https://nips.cc/virtual/2023/poster/70129", "video": "https://nips.cc/virtual/2023/poster/70129", "author_site": "Sotiris Anagnostidis, Dario Pavllo, Luca Biggio, Lorenzo Noci, Aurelien Lucchi, Thomas Hofmann", "tldr": "", "abstract": "Autoregressive Transformers adopted in Large Language Models (LLMs) are hard to scale to long sequences. Despite several works trying to reduce their computational cost, most of LLMs still adopt attention layers between all pairs of tokens in the sequence, thus incurring a quadratic cost. In this study, we present a novel approach that dynamically prunes contextual information while preserving the model's expressiveness, resulting in reduced memory and computational requirements during inference. Our method employs a learnable mechanism that determines which uninformative tokens can be dropped from the context at any point across the generation process. By doing so, our approach not only addresses performance concerns but also enhances interpretability, providing valuable insight into the model's decision-making process. Our technique can be applied to existing pre-trained models through a straightforward fine-tuning process, and the pruning strength can be specified by a sparsity parameter. Notably, our empirical findings demonstrate that we can effectively prune up to 80\\% of the context without significant performance degradation on downstream tasks, offering a valuable tool for mitigating inference costs. Our reference implementation achieves up to $2\\times$ increase in inference throughput and even greater memory savings.", "keywords": "Transformers;Context-pruning;Efficient Transformer", "primary_area": "", "supplementary_material": "", "author": "Sotiris Anagnostidis;Dario Pavllo;Luca Biggio;Lorenzo Noci;Aurelien Lucchi;Thomas Hofmann", "authorids": "~Sotiris_Anagnostidis1;~Dario_Pavllo2;~Luca_Biggio1;~Lorenzo_Noci1;~Aurelien_Lucchi1;~Thomas_Hofmann1", "gender": "M;;M;M;M;M", "homepage": ";;;;http://people.inf.ethz.ch/alucchi/;http://www.da.inf.ethz.ch/", "dblp": "286/1763;218/5320;279/2333;268/6839;14/5780;h/ThHofmann", "google_scholar": "qjzTKWUAAAAJ;5A_sjVQAAAAJ;6HtmuegAAAAJ;;https://scholar.google.ch/citations?user=V1ONSgIAAAAJ;T3hAyLkAAAAJ", "orcid": ";;;;;", "linkedin": "sotiris-anagnostidis-b064a5129/;dario-pavllo/;;lorenzo-noci-97aa59130;;thomas-hofmann-1ab2402/", "or_profile": "~Sotiris_Anagnostidis1;~Dario_Pavllo2;~Luca_Biggio1;~Lorenzo_Noci1;~Aurelien_Lucchi1;~Thomas_Hofmann1", "aff": "ETH Zurich;ETHZ - ETH Zurich;Swiss Federal Institute of Technology;ETHZ - ETH Zurich;University of Basel;Swiss Federal Institute of Technology", "aff_domain": "inf.ethz.ch;ethz.ch;ethz.ch;ethz.ch;unibas.ch;ethz.ch", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nanagnostidis2023dynamic,\ntitle={Dynamic Context Pruning for Efficient and Interpretable Autoregressive Transformers},\nauthor={Sotiris Anagnostidis and Dario Pavllo and Luca Biggio and Lorenzo Noci and Aurelien Lucchi and Thomas Hofmann},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uvdJgFFzby}\n}", "github": "", "project": "", "reviewers": "jYJo;uWWw;LhSf;65eF", "pdf_size": 3277626, "rating": "7;7;7;7", "confidence": "3;4;4;4", "soundness": "3;4;4;4", "novelty": "3;3;3;3", "presentation": "4;4;4;3", "wc_summary": "118;98;106;145", "wc_strengths": "63;65;64;76", "wc_weaknesses": "34;181;154;65", "wc_questions": "23;119;69;54", "wc_limitations": "22;36;1;12", "wc_review": "260;499;394;352", "wc_reply_reviewers": "20;49;15;18", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 116.75, 17.795715776557007 ], "wc_strengths_avg": [ 67.0, 5.244044240850758 ], "wc_weaknesses_avg": [ 108.5, 60.76388730158728 ], "wc_questions_avg": [ 66.25, 34.67978517811205 ], "wc_limitations_avg": [ 17.75, 12.891373084353738 ], "wc_review_avg": [ 376.25, 85.85562008395257 ], "wc_reply_reviewers_avg": [ 25.5, 13.683932183404009 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10585236850580397166&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "inf.ethz.ch;ethz.ch;ethz.ch;ethz.ch;unibas.ch;ethz.ch", "author_num": 6, "aff_unique_index": "0;0;1;0;2;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology;University of Basel", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch;https://www.unibas.ch", "aff_unique_abbr": "ETHZ;ETH Zurich;UniBas", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Switzerland" }, { "id": "uz7JaQgAJb", "title": "Neural McKean-Vlasov Processes: Inferring Distributional Dependence", "track": "main", "status": "Reject", "tldr": "", "abstract": "McKean-Vlasov stochastic differential equations (MV-SDEs) provide a mathematical description of the behavior of an infinite number of interacting particles by imposing a dependence on the particle density.\nThese processes differ from standard Ito-SDEs to the extent that MV-SDEs include distributional information in their individual particle parameterization.\nAs such, we study the influence of explicitly including distributional information in the parameterization of the SDE.\nWe first propose a series of semi-parametric methods for representing MV-SDEs, and then propose corresponding estimators for inferring parameters from data based on the underlying properties of the MV-SDE.\nBy analyzing the properties of the different architectures and estimators, we consider their relationship to standard Ito-SDEs and consider their applicability in relevant machine learning problems.\nWe empirically compare the performance of the different architectures on a series of real and synthetic datasets for time series and probabilistic modeling.\nThe results suggest that including the distributional dependence in MV-SDEs is an effective modeling framework for temporal data under an exchangeability assumption while maintaining strong performance for standard Ito-SDE problems due to the richer class of probability flows associated with MV-SDEs. ", "keywords": "McKean-Vlasov;stochastic process;ito diffusion", "primary_area": "", "supplementary_material": "/attachment/1c67369ace98c45636680c72ed8910d31dc746f7.zip", "author": "Haoming Yang;Ali Hasan;Yuting Ng;Vahid Tarokh", "authorids": "~Haoming_Yang1;~Ali_Hasan1;~Yuting_Ng1;~Vahid_Tarokh1", "gender": "M;;;", "homepage": "https://imkeithyang.github.io;https://alluly.github.io;http://yutingng.com/;", "dblp": ";200/8502.html;207/0736;", "google_scholar": "uz7goREAAAAJ;4De_LnYAAAAJ;;", "orcid": ";;;", "linkedin": "haoming-yang-2a8a9612b/;;;", "or_profile": "~Haoming_Yang1;~Ali_Hasan1;~Yuting_Ng1;~Vahid_Tarokh1", "aff": "Duke University;Duke University;Duke University;", "aff_domain": "duke.edu;duke.edu;duke.edu;", "position": "PhD student;Graduate student;PhD student;", "bibtex": "@misc{\nyang2023neural,\ntitle={Neural McKean-Vlasov Processes: Inferring Distributional Dependence},\nauthor={Haoming Yang and Ali Hasan and Yuting Ng and Vahid Tarokh},\nyear={2023},\nurl={https://openreview.net/forum?id=uz7JaQgAJb}\n}", "github": "", "project": "", "reviewers": "Ev1V;DRq5;KoGe;bDXo", "site": "https://openreview.net/forum?id=uz7JaQgAJb", "pdf_size": 9453863, "rating": "2;6;7;8", "confidence": "4;3;2;3", "soundness": "1;2;4;4", "novelty": "1;3;3;3", "presentation": "1;3;4;4", "wc_summary": "88;52;34;298", "wc_strengths": "5;20;101;136", "wc_weaknesses": "331;19;100;175", "wc_questions": "5;550;233;252", "wc_limitations": "5;1;1;15", "wc_review": "434;642;469;876", "wc_reply_reviewers": "911;554;107;182", "wc_reply_authors": "1005;1406;39;88", "reply_reviewers": "4;2;1;1", "reply_authors": "5;5;2;2", "rating_avg": [ 5.75, 2.277608394786075 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 1.299038105676658 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 118.0, 105.72606112023658 ], "wc_strengths_avg": [ 65.5, 54.68317840067456 ], "wc_weaknesses_avg": [ 156.25, 114.98994521261413 ], "wc_questions_avg": [ 260.0, 193.5962293021225 ], "wc_limitations_avg": [ 5.5, 5.722761571129799 ], "wc_review_avg": [ 605.25, 175.03338967179948 ], "wc_reply_reviewers_avg": [ 438.5, 321.0455575148175 ], "wc_reply_authors_avg": [ 634.5, 588.5926010408218 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7761505257063328, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:U5sENi0KLoYJ:scholar.google.com/&scioq=Neural+McKean-Vlasov+Processes:+Inferring+Distributional+Dependence&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Online robust non-stationary estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70128", "id": "uzOBDerK1j", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9e15d892c63903ecc278e0dd05536951-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=uzOBDerK1j", "openreview": "https://openreview.net/forum?id=uzOBDerK1j", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70128", "video": "https://nips.cc/virtual/2023/poster/70128", "author_site": "Abishek Sankararaman, Balakrishnan Narayanaswamy", "tldr": "", "abstract": "The real-time estimation of time-varying parameters from high-dimensional, heavy-tailed and corrupted data-streams is a common sub-routine in systems ranging from those for network monitoring and anomaly detection to those for traffic scheduling in data-centers. For estimation tasks that can be cast as minimizing a strongly convex loss function, we prove that an appropriately tuned version of the {\\ttfamily clipped Stochastic Gradient Descent} (SGD) is simultaneously {\\em(i)} adaptive to drift, {\\em (ii)} robust to heavy-tailed inliers and arbitrary corruptions, {\\em(iii)} requires no distributional knowledge and {\\em (iv)} can be implemented in an online streaming fashion. All prior estimation algorithms have only been proven to posses a subset of these practical desiderata. A observation we make is that, neither the $\\mathcal{O}\\left(\\frac{1}{t}\\right)$ learning rate for {\\ttfamily clipped SGD} known to be optimal for strongly convex loss functions of a \\emph{stationary} data-stream, nor the $\\mathcal{O}(1)$ learning rate known to be optimal for being adaptive to drift in a \\emph{noiseless} environment can be used. Instead, a learning rate of $T^{-\\alpha}$ for $ \\alpha < 1$ where $T$ is the stream-length is needed to balance adaptivity to potential drift and to combat noise. We develop a new inductive argument and combine it with a martingale concentration result to derive high-probability under \\emph{any learning rate} on data-streams exhibiting \\emph{arbitrary distribution shift} - a proof strategy that may be of independent interest. Further, using the classical doubling-trick, we relax the knowledge of the stream length $T$. Ours is the first online estimation algorithm that is provably robust to heavy-tails, corruptions and distribution shift simultaneously. We complement our theoretical results empirically on synthetic and real data.", "keywords": "Estimation;heavy-tails;distribution shifts;regret", "primary_area": "", "supplementary_material": "", "author": "Abishek Sankararaman;Murali Balakrishnan", "authorids": "~Abishek_Sankararaman1;~Murali_Balakrishnan1", "gender": "M;M", "homepage": "http://abishek90.github.io/;https://sites.google.com/site/muralibalki/", "dblp": "https://dblp.uni-trier.de/pers/hd/s/Sankararaman:Abishek;12/5012", "google_scholar": "3T9FHn0AAAAJ;mKzKZfUAAAAJ", "orcid": ";0009-0006-4377-8295", "linkedin": ";", "or_profile": "~Abishek_Sankararaman1;~Murali_Balakrishnan1", "aff": "Amazon;Amazon", "aff_domain": "amazon.com;amazon.com", "position": "Researcher;Principal Researcher", "bibtex": "@inproceedings{\nsankararaman2023online,\ntitle={Online robust non-stationary estimation},\nauthor={Abishek Sankararaman and Murali Balakrishnan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=uzOBDerK1j}\n}", "github": "", "project": "", "reviewers": "6VQW;stYK;MGHA;nCPY;ijft", "pdf_size": 2590198, "rating": "4;4;7;7;8", "confidence": "3;2;3;3;1", "soundness": "3;2;3;3;4", "novelty": "2;2;3;3;4", "presentation": "1;2;3;3;4", "wc_summary": "121;101;180;68;247", "wc_strengths": "15;20;108;34;32", "wc_weaknesses": "297;394;336;78;1", "wc_questions": "59;2;35;1;129", "wc_limitations": "1;1;11;1;1", "wc_review": "493;518;670;182;410", "wc_reply_reviewers": "0;198;120;14;0", "wc_reply_authors": "0;395;43;0;0", "reply_reviewers": "0;1;2;1;0", "reply_authors": "1;2;2;1;1", "rating_avg": [ 6.0, 1.6733200530681511 ], "confidence_avg": [ 2.4, 0.8 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 143.4, 63.33592977133911 ], "wc_strengths_avg": [ 41.8, 33.860891896109294 ], "wc_weaknesses_avg": [ 221.2, 153.47885847894491 ], "wc_questions_avg": [ 45.2, 47.19491498032388 ], "wc_limitations_avg": [ 3.0, 4.0 ], "wc_review_avg": [ 454.6, 160.11945540751753 ], "wc_reply_reviewers_avg": [ 66.4, 79.69341252575397 ], "wc_reply_authors_avg": [ 87.6, 154.5996119011946 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.29880715233359845, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4863671462330191803&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "amazon.com;amazon.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Temporal Continual Learning with Prior Compensation for Human Motion Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70127", "id": "v0GzRLvVp3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cf7a83a5342befd11d3d65beba1be5b0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=v0GzRLvVp3", "openreview": "https://openreview.net/forum?id=v0GzRLvVp3", "poster": "/media/PosterPDFs/NeurIPS%202023/70127.png?t=1701494002.610415", "slides": "https://nips.cc/virtual/2023/poster/70127", "video": "https://nips.cc/virtual/2023/poster/70127", "author_site": "Jianwei Tang, Jiangxin Sun, Xiaotong Lin, lifang zhang, Wei-Shi Zheng, Jian-Fang Hu", "tldr": "", "abstract": "Human Motion Prediction (HMP) aims to predict future poses at different moments according to past motion sequences. Previous approaches have treated the prediction of various moments equally, resulting in two main limitations: the learning of short-term predictions is hindered by the focus on long-term predictions, and the incorporation of prior information from past predictions into subsequent predictions is limited. In this paper, we introduce a novel multi-stage training framework called Temporal Continual Learning (TCL) to address the above challenges. To better preserve prior information, we introduce the Prior Compensation Factor (PCF). We incorporate it into the model training to compensate for the lost prior information. Furthermore, we derive a more reasonable optimization objective through theoretical derivation. It is important to note that our TCL framework can be easily integrated with different HMP backbone models and adapted to various datasets and applications. Extensive experiments on four HMP benchmark datasets demonstrate the effectiveness and flexibility of TCL. The code is available at https://github.com/hyqlat/TCL.", "keywords": "Human Motion Prediction; Temporal Continual Learning; Prior Compensation Factor", "primary_area": "", "supplementary_material": "/attachment/1f04b02c189f1543997dd562c284cf953530dcdf.pdf", "author": "Jianwei Tang;Jiangxin Sun;Xiaotong Lin;lifang zhang;Wei-Shi Zheng;Jian-Fang Hu", "authorids": "~Jianwei_Tang1;~Jiangxin_Sun1;~Xiaotong_Lin3;~lifang_zhang1;~Wei-Shi_Zheng3;~Jian-Fang_Hu1", "gender": "M;M;;F;M;M", "homepage": "https://hyqlat.github.io/;https://sunjiangxin.github.io/website;;https://cs.dgut.edu.cn/info/1012/6098.htm;http://www.isee-ai.cn/~zhwshi;https://isee-ai.cn/~hujianfang/", "dblp": ";251/3751;;;30/8399;", "google_scholar": ";a3ckPz0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;AwqDDGoAAAAJ;https://scholar.google.com.sg/citations?user=4WsBaB4AAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Jianwei_Tang1;~Jiangxin_Sun1;~Xiaotong_Lin3;~lifang_zhang1;~Wei-Shi_Zheng3;~Jian-Fang_Hu1", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;DONGGUAN UNIVERSITY OF TECHNOLOGY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY", "aff_domain": "mail2.sysu.edu.cn;sysu.edu.cn;mail2.sysu.edu.cn;dgut.edu.cn;sysu.edu.cn;sysu.edu.cn", "position": "MS student;MS student;MS student;Lecturer;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ntang2023temporal,\ntitle={Temporal Continual Learning with Prior Compensation for Human Motion Prediction},\nauthor={Jianwei Tang and Jiangxin Sun and Xiaotong Lin and lifang zhang and Wei-Shi Zheng and Jian-Fang Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=v0GzRLvVp3}\n}", "github": "", "project": "", "reviewers": "aXof;vWPd;GqjD;hcCQ;skai", "pdf_size": 787080, "rating": "5;5;5;5;6", "confidence": "5;3;5;3;4", "soundness": "2;2;3;2;3", "novelty": "2;2;3;2;3", "presentation": "2;2;3;2;2", "wc_summary": "95;61;90;64;177", "wc_strengths": "80;39;82;57;121", "wc_weaknesses": "215;155;64;96;450", "wc_questions": "314;45;153;61;12", "wc_limitations": "3;4;9;32;21", "wc_review": "707;304;398;310;781", "wc_reply_reviewers": "998;33;22;0;57", "wc_reply_authors": "2450;22;23;0;21", "reply_reviewers": "8;1;1;0;1", "reply_authors": "7;2;2;1;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 97.4, 42.04093243494963 ], "wc_strengths_avg": [ 75.8, 27.592752671670873 ], "wc_weaknesses_avg": [ 196.0, 137.09996353026503 ], "wc_questions_avg": [ 117.0, 109.04127658827184 ], "wc_limitations_avg": [ 13.8, 11.124747188138706 ], "wc_review_avg": [ 500.0, 203.33715843396652 ], "wc_reply_reviewers_avg": [ 222.0, 388.4342930277913 ], "wc_reply_authors_avg": [ 503.2, 973.4374967094702 ], "reply_reviewers_avg": [ 2.2, 2.9257477676655586 ], "reply_authors_avg": [ 2.8, 2.1354156504062622 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17861187489601612369&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "mail2.sysu.edu.cn;sysu.edu.cn;mail2.sysu.edu.cn;dgut.edu.cn;sysu.edu.cn;sysu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Sun Yat-sen University;Dongguan University of Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.sysu.edu.cn;https://www.dgut.edu.cn", "aff_unique_abbr": "SYSU;DGUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Egocentric Planning for Scalable Embodied Task Achievement", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70126", "id": "v0lkbp66Uw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ab0b1be09c317cb068aecfa7fa86a7e3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=v0lkbp66Uw", "openreview": "https://openreview.net/forum?id=v0lkbp66Uw", "poster": "/media/PosterPDFs/NeurIPS%202023/70126.png?t=1699955854.6258225", "slides": "https://nips.cc/virtual/2023/poster/70126", "video": "https://nips.cc/virtual/2023/poster/70126", "author_site": "Xiatoian Liu, Hector Palacios, Christian Muise", "tldr": "", "abstract": "Embodied agents face significant challenges when tasked with performing actions in diverse environments, particularly in generalizing across object types and executing suitable actions to accomplish tasks. Furthermore, agents should exhibit robustness, minimizing the execution of illegal actions. In this work, we present Egocentric Planning, an innovative approach that combines symbolic planning and Object-oriented POMDPs to solve tasks in complex environments, harnessing existing models for visual perception and natural language processing. We evaluated our approach in ALFRED, a simulated environment designed for domestic tasks, and demonstrated its high scalability, achieving an impressive 36.07\\% unseen success rate in the ALFRED benchmark and winning the ALFRED challenge at CVPR Embodied AI workshop. Our method requires reliable perception and the specification or learning of a symbolic description of the preconditions and effects of the agent's actions, as well as what object types reveal information about others. It can naturally scale to solve new tasks beyond ALFRED, as long as they can be solved using the available skills. This work offers a solid baseline for studying end-to-end and hybrid methods that aim to generalize to new tasks, including recent approaches relying on LLMs, but often struggle to scale to long sequences of actions or produce robust plans for novel tasks.", "keywords": "Embodied AI;High-Level Actions;Symbolic Reasoning;Replanning;ALFRED Challenge;Flexible Task Achievement;User-Goal Understanding;Object Types and Actions;Perception Grounding", "primary_area": "", "supplementary_material": "/attachment/2b19dbfe976a7024ada2beb2e83b1a8a6e74a8ba.pdf", "author": "Xiaotian Liu;Hector Palacios;Christian Muise", "authorids": "~Xiaotian_Liu1;~Hector_Palacios1;~Christian_Muise1", "gender": "M;M;M", "homepage": ";http://hectorpalacios.net/;http://www.haz.ca/", "dblp": ";73/1219;70/1862", "google_scholar": "DZ0bs9MAAAAJ;https://scholar.google.ca/citations?user=f3ViHrcAAAAJ;HUzuGj8AAAAJ", "orcid": ";0000-0002-1184-0940;", "linkedin": ";https://linkedin.com/in/hectorpal;christianmuise/", "or_profile": "~Xiaotian_Liu1;~Hector_Palacios1;~Christian_Muise1", "aff": "University of Toronto;ServiceNow;Queens University", "aff_domain": "utoronto.ca;servicenow.com;queensu.ca", "position": "PhD student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nliu2023egocentric,\ntitle={Egocentric Planning for Scalable Embodied Task Achievement},\nauthor={Xiaotian Liu and Hector Palacios and Christian Muise},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=v0lkbp66Uw}\n}", "github": "", "project": "", "reviewers": "vi1s;oR6u;ZvXu;JTSQ;dRsd", "pdf_size": 940987, "rating": "4;5;6;6;7", "confidence": "3;4;4;4;4", "soundness": "3;3;3;3;4", "novelty": "3;3;3;3;3", "presentation": "2;2;2;3;4", "wc_summary": "189;111;128;100;253", "wc_strengths": "51;35;40;57;109", "wc_weaknesses": "247;545;112;150;41", "wc_questions": "331;177;46;57;112", "wc_limitations": "87;176;50;34;30", "wc_review": "905;1044;376;398;545", "wc_reply_reviewers": "0;177;242;0;0", "wc_reply_authors": "111;57;655;0;0", "reply_reviewers": "0;1;3;0;0", "reply_authors": "2;2;4;1;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 156.2, 57.3459676001722 ], "wc_strengths_avg": [ 58.4, 26.469605210505122 ], "wc_weaknesses_avg": [ 219.0, 176.04204043352826 ], "wc_questions_avg": [ 144.6, 104.16640533300553 ], "wc_limitations_avg": [ 75.4, 54.17600945067844 ], "wc_review_avg": [ 653.6, 271.95632002216826 ], "wc_reply_reviewers_avg": [ 83.8, 104.67167716244926 ], "wc_reply_authors_avg": [ 164.6, 248.6480243235405 ], "reply_reviewers_avg": [ 0.8, 1.1661903789690604 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7844645405527362, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9481714056808812883&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "utoronto.ca;servicenow.com;queensu.ca", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Toronto;ServiceNow;Queens University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utoronto.ca;https://www.servicenow.com;https://www.queensu.ca", "aff_unique_abbr": "U of T;ServiceNow;Queen's U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Canada;United States" }, { "title": "H2RBox-v2: Incorporating Symmetry for Boosting Horizontal Box Supervised Oriented Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70125", "id": "v1VVKaMYbk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b9603de9e49d0838e53b6c9cf9d06556-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=v1VVKaMYbk", "openreview": "https://openreview.net/forum?id=v1VVKaMYbk", "poster": "/media/PosterPDFs/NeurIPS%202023/70125.png?t=1697468463.7803879", "slides": "https://nips.cc/virtual/2023/poster/70125", "video": "https://nips.cc/virtual/2023/poster/70125", "author_site": "Yi Yu, Xue Yang, Xue Yang, Qingyun Li, Yue Zhou, Feipeng Da, Junchi Yan", "tldr": "", "abstract": "With the rapidly increasing demand for oriented object detection, e.g. in autonomous driving and remote sensing, the recently proposed paradigm involving weakly-supervised detector H2RBox for learning rotated box (RBox) from the more readily-available horizontal box (HBox) has shown promise. This paper presents H2RBox-v2, to further bridge the gap between HBox-supervised and RBox-supervised oriented object detection. Specifically, we propose to leverage the reflection symmetry via flip and rotate consistencies, using a weakly-supervised network branch similar to H2RBox, together with a novel self-supervised branch that learns orientations from the symmetry inherent in visual objects. The detector is further stabilized and enhanced by practical techniques to cope with peripheral issues e.g. angular periodicity. To our best knowledge, H2RBox-v2 is the first symmetry-aware self-supervised paradigm for oriented object detection. In particular, our method shows less susceptibility to low-quality annotation and insufficient training data compared to H2RBox. Specifically, H2RBox-v2 achieves very close performance to a rotation annotation trained counterpart -- Rotated FCOS: 1) DOTA-v1.0/1.5/2.0: 72.31%/64.76%/50.33% vs. 72.44%/64.53%/51.77%; 2) HRSC: 89.66% vs. 88.99%; 3) FAIR1M: 42.27% vs. 41.25%.", "keywords": "Oriented object detection;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/cd59e1523a6be6fea708e1fc9a48e0e972088714.pdf", "author": "Yi Yu;Xue Yang;Qingyun Li;Yue Zhou;Feipeng Da;Junchi Yan", "authorids": "~Yi_Yu4;~Xue_Yang2;~Qingyun_Li1;~Yue_Zhou4;~Feipeng_Da1;~Junchi_Yan2", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/yuyi1005;https://yangxue.site/;https://github.com/Li-Qingyun;https://zytx121.github.io/;https://automation.seu.edu.cn/2019/0528/c24504a275222/page.htm;http://thinklab.sjtu.edu.cn/", "dblp": "99/111-10;13/1779-5;65/10015;78/6191-5;04/6129;60/7949.html", "google_scholar": ";2xTlvV0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=v-aQ8GsAAAAJ;;ga230VoAAAAJ", "orcid": ";0000-0002-7084-9101;0000-0001-5101-4937;0000-0002-3080-6721;;0000-0001-9639-7679", "linkedin": ";;;;;", "or_profile": "~Yi_Yu4;~Xue_Yang2;~Qingyun_Li1;~Yue_Zhou4;~Feipeng_Da1;~Junchi_Yan1", "aff": "Southeast University;Shanghai Jiaotong University;Harbin Institute of Technology;Shanghai Jiaotong University;Southeast University;Shanghai Jiaotong University", "aff_domain": "seu.edu.cn;sjtu.edu.cn;hit.edu.cn;sjtu.edu.cn;seu.edu.cn;sjtu.edu.cn", "position": "Postdoc;PhD student;PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nyu2023hrboxv,\ntitle={H2{RB}ox-v2: Incorporating Symmetry for Boosting Horizontal Box Supervised Oriented Object Detection},\nauthor={Yi Yu and Xue Yang and Qingyun Li and Yue Zhou and Feipeng Da and Junchi Yan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=v1VVKaMYbk}\n}", "github": "", "project": "", "reviewers": "tfHb;Pshp;W3Ay;k1cY", "pdf_size": 4120859, "rating": "5;7;7;7", "confidence": "5;4;5;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;4", "wc_summary": "63;57;78;47", "wc_strengths": "51;40;166;83", "wc_weaknesses": "183;646;96;63", "wc_questions": "46;124;101;121", "wc_limitations": "34;2;3;1", "wc_review": "377;869;444;315", "wc_reply_reviewers": "323;47;29;61", "wc_reply_authors": "272;22;15;21", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 61.25, 11.233320969330485 ], "wc_strengths_avg": [ 85.0, 49.36091571273774 ], "wc_weaknesses_avg": [ 247.0, 234.49626862702954 ], "wc_questions_avg": [ 98.0, 31.29696470905765 ], "wc_limitations_avg": [ 10.0, 13.874436925511608 ], "wc_review_avg": [ 501.25, 217.1662669477007 ], "wc_reply_reviewers_avg": [ 115.0, 120.62338081814819 ], "wc_reply_authors_avg": [ 82.5, 109.4406231707404 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10500744701056486037&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "seu.edu.cn;sjtu.edu.cn;hit.edu.cn;sjtu.edu.cn;seu.edu.cn;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;1;0;1", "aff_unique_norm": "Southeast University;Shanghai Jiao Tong University;Harbin Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.seu.edu.cn/;https://www.sjtu.edu.cn;http://www.hit.edu.cn/", "aff_unique_abbr": "SEU;SJTU;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MonoUNI: A Unified Vehicle and Infrastructure-side Monocular 3D Object Detection Network with Sufficient Depth Clues", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70124", "id": "v2oGdhbKxi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2703a0e3c2b33506295a77762338cf24-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=v2oGdhbKxi", "openreview": "https://openreview.net/forum?id=v2oGdhbKxi", "poster": "/media/PosterPDFs/NeurIPS%202023/70124.png?t=1697794302.6190684", "slides": "https://nips.cc/virtual/2023/poster/70124", "video": "https://nips.cc/virtual/2023/poster/70124", "author_site": "Jia Jinrang, Zhenjia Li, Yifeng Shi", "tldr": "", "abstract": "Monocular 3D detection of vehicle and infrastructure sides are two important topics in autonomous driving. Due to diverse sensor installations and focal lengths, researchers are faced with the challenge of constructing algorithms for the two topics based on different prior knowledge. In this paper, by taking into account the diversity of pitch angles and focal lengths, we propose a unified optimization target named normalized depth, which realizes the unification of 3D detection problems for the two sides. Furthermore, to enhance the accuracy of monocular 3D detection, 3D normalized cube depth of obstacle is developed to promote the learning of depth information. We posit that the richness of depth clues is a pivotal factor impacting the detection performance on both the vehicle and infrastructure sides. A richer set of depth clues facilitates the model to learn better spatial knowledge, and the 3D normalized cube depth offers sufficient depth clues. Extensive experiments demonstrate the effectiveness of our approach. Without introducing any extra information, our method, named MonoUNI, achieves state-of-the-art performance on five widely used monocular 3D detection benchmarks, including Rope3D and DAIR-V2X-I for the infrastructure side, KITTI and Waymo for the vehicle side, and nuScenes for the cross-dataset evaluation.", "keywords": "3D detection;deep learning;autonomous driving", "primary_area": "", "supplementary_material": "/attachment/787b8f775cc05fb523e0efe75648b60c44a8f2dd.pdf", "author": "Jinrang Jia;Zhenjia Li;Yifeng Shi", "authorids": "~Jinrang_Jia1;~Zhenjia_Li1;~Yifeng_Shi1", "gender": "M;M;M", "homepage": "https://github.com/jjrCN;https://github.com/zhenjiaa;https://github.com/MRshiyf", "dblp": "300/8919;;249/5498", "google_scholar": "fLcVZKkAAAAJ;;KlHuj2QAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jinrang_Jia1;~Zhenjia_Li1;~Yifeng_Shi1", "aff": "Baidu;Baidu;Baidu", "aff_domain": "baidu.com;baidu.com;baidu.com", "position": "Engineer;Researcher;Principal Engineer", "bibtex": "@inproceedings{\njia2023monouni,\ntitle={Mono{UNI}: A Unified Vehicle and Infrastructure-side Monocular 3D Object Detection Network with Sufficient Depth Clues},\nauthor={Jinrang Jia and Zhenjia Li and Yifeng Shi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=v2oGdhbKxi}\n}", "github": "", "project": "", "reviewers": "yykW;LnrW;YJqg;rxxf", "pdf_size": 1952717, "rating": "4;5;5;7", "confidence": "3;4;3;5", "soundness": "3;2;3;3", "novelty": "3;2;2;3", "presentation": "3;3;2;3", "wc_summary": "89;126;72;91", "wc_strengths": "43;116;91;150", "wc_weaknesses": "230;348;172;196", "wc_questions": "2;56;88;26", "wc_limitations": "2;45;51;19", "wc_review": "366;691;474;482", "wc_reply_reviewers": "0;84;0;45", "wc_reply_authors": "0;412;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.5, 19.62778642639052 ], "wc_strengths_avg": [ 100.0, 39.00640972968417 ], "wc_weaknesses_avg": [ 236.5, 67.59252917297887 ], "wc_questions_avg": [ 43.0, 32.264531609803356 ], "wc_limitations_avg": [ 29.25, 19.803724397193573 ], "wc_review_avg": [ 503.25, 117.68044654911877 ], "wc_reply_reviewers_avg": [ 32.25, 35.074028853269766 ], "wc_reply_authors_avg": [ 103.0, 178.40123317959436 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.899228803025897, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9681030187089970821&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": "baidu.com;baidu.com;baidu.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Baidu", "aff_unique_dep": "Baidu, Inc.", "aff_unique_url": "https://www.baidu.com", "aff_unique_abbr": "Baidu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "v30HbVOxJR", "title": "You Shall not Pass: the Zero-Gradient Problem in Predict and Optimize for Convex Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Predict and optimize is an increasingly popular decision-making paradigm that employs machine learning to predict unknown parameters of optimization problems. Instead of minimizing the prediction error of the parameters, it trains predictive models using task performance as a loss function. In the convex optimization domain, predict and optimize has seen significant progress due to recently developed methods for differentiating optimization problem solutions over the problem parameters. This paper identifies a yet unnoticed drawback of this approach -- the zero-gradient problem -- and introduces a method to solve it. The suggested method is based on the mathematical properties of differential optimization and is verified using two real-world benchmarks.", "keywords": "Predict and optimize;differential optimization;convex optimization", "primary_area": "", "supplementary_material": "/attachment/5592c73fb78390dd152fc1e233467bf62ee71803.zip", "author": "Grigorii Veviurko;Wendelin Boehmer;Mathijs de Weerdt", "authorids": "~Grigorii_Veviurko1;~Wendelin_Boehmer1;~Mathijs_de_Weerdt1", "gender": "M;M;M", "homepage": ";https://reinforceAI.net;http://www.alg.ewi.tudelft.nl/weerdt/", "dblp": ";08/9988;91/3015", "google_scholar": "2jVnBAIAAAAJ;https://scholar.google.de/citations?user=wI5MV8IAAAAJ;https://scholar.google.com.tw/citations?user=9GJ8AvgAAAAJ", "orcid": ";0000-0002-4398-6792;0000-0002-0470-6241", "linkedin": ";wendelin-boehmer;mdeweerdt/", "or_profile": "~Grigorii_Veviurko1;~Wendelin_Boehmer1;~Mathijs_Weerdt1", "aff": "Delft University of Technology;Delft University of Technology;Delft University of Technology", "aff_domain": "tudelft.nl;tudelft.nl;tudelft.nl", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nveviurko2023you,\ntitle={You Shall not Pass: the Zero-Gradient Problem in Predict and Optimize for Convex Optimization},\nauthor={Grigorii Veviurko and Wendelin Boehmer and Mathijs de Weerdt},\nyear={2023},\nurl={https://openreview.net/forum?id=v30HbVOxJR}\n}", "github": "", "project": "", "reviewers": "1CVQ;ZB3i;Yhee;8mJH;46Fp", "site": "https://openreview.net/forum?id=v30HbVOxJR", "pdf_size": 532901, "rating": "3;4;6;6;7", "confidence": "4;4;3;1;4", "soundness": "2;3;3;3;4", "novelty": "2;2;3;3;3", "presentation": "1;2;4;3;3", "wc_summary": "50;223;86;44;84", "wc_strengths": "18;119;69;96;163", "wc_weaknesses": "589;485;39;36;157", "wc_questions": "14;3;355;44;2", "wc_limitations": "14;1;2;1;2", "wc_review": "685;831;551;221;408", "wc_reply_reviewers": "1287;259;63;0;0", "wc_reply_authors": "1491;566;0;0;0", "reply_reviewers": "4;1;1;0;0", "reply_authors": "5;2;1;1;1", "rating_avg": [ 5.2, 1.469693845669907 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 97.4, 65.08947687606654 ], "wc_strengths_avg": [ 93.0, 48.551004109081 ], "wc_weaknesses_avg": [ 261.2, 231.72604514814472 ], "wc_questions_avg": [ 83.6, 136.5468417796618 ], "wc_limitations_avg": [ 4.0, 5.019960159204453 ], "wc_review_avg": [ 539.2, 212.13618267518626 ], "wc_reply_reviewers_avg": [ 321.8, 491.8623384647375 ], "wc_reply_authors_avg": [ 411.4, 582.6125985592828 ], "reply_reviewers_avg": [ 1.2, 1.4696938456699067 ], "reply_authors_avg": [ 2.0, 1.5491933384829668 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3734080224074693, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XfCCf51zDpwJ:scholar.google.com/&scioq=You+Shall+not+Pass:+the+Zero-Gradient+Problem+in+Predict+and+Optimize+for+Convex+Optimization&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;0;0", "aff_unique_norm": "Delft University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.tudelft.nl", "aff_unique_abbr": "TU Delft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "title": "A Performance-Driven Benchmark for Feature Selection in Tabular Deep Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73432", "id": "v4PMCdSaAT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/82f39c7409155b74d15d73b048f06771-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=v4PMCdSaAT", "openreview": "https://openreview.net/forum?id=v4PMCdSaAT", "poster": "/media/PosterPDFs/NeurIPS%202023/73432.png?t=1701671158.403168", "slides": "https://nips.cc/virtual/2023/poster/73432", "video": "https://nips.cc/virtual/2023/poster/73432", "author_site": "Valeriia Cherepanova, Roman Levin, Gowthami Somepalli, Jonas Geiping, C. Bayan Bruss, Andrew Wilson, Tom Goldstein, Micah Goldblum", "tldr": "", "abstract": "Academic tabular benchmarks often contain small sets of curated features. In contrast, data scientists typically collect as many features as possible into their datasets, and even engineer new features from existing ones. To prevent over-fitting in subsequent downstream modeling, practitioners commonly use automated feature selection methods that identify a reduced subset of informative features. Existing benchmarks for tabular feature selection consider classical downstream models, toy synthetic datasets, or do not evaluate feature selectors on the basis of downstream performance. We construct a challenging feature selection benchmark evaluated on downstream neural networks including transformers, using real datasets and multiple methods for generating extraneous features. We also propose Deep Lasso -- an input-gradient-based analogue of LASSO for neural networks that outperforms classical feature selection methods on challenging problems such as selecting from corrupted or second-order features.", "keywords": "tabular deep learning;tabular data;feature selection;deep lasso;lasso", "primary_area": "", "supplementary_material": "", "author": "Valeriia Cherepanova;Roman Levin;Gowthami Somepalli;Jonas Geiping;C. Bayan Bruss;Andrew Gordon Wilson;Tom Goldstein;Micah Goldblum", "authorids": "~Valeriia_Cherepanova1;~Roman_Levin1;~Gowthami_Somepalli1;~Jonas_Geiping1;~C._Bayan_Bruss1;~Andrew_Gordon_Wilson1;~Tom_Goldstein1;~Micah_Goldblum1", "gender": "F;M;F;M;M;Not Specified;M;", "homepage": "https://www.vcherepanova.com/;;https://somepago.github.io/;https://jonasgeiping.github.io/;https://www.cbbruss.com;https://cims.nyu.edu/~andrewgw;https://www.cs.umd.edu/~tomg/;", "dblp": ";278/3201;286/5012;190/7229;;65/10453;25/8184;241/7231", "google_scholar": "PySUqqUAAAAJ;WWKiF4wAAAAJ;T2ezBDsAAAAJ;https://scholar.google.de/citations?user=206vNCEAAAAJ;ClqvGRQAAAAJ;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ;KmSuVtgAAAAJ;pGDKzuUAAAAJ", "orcid": ";;;;;;;", "linkedin": ";rilevin;;;bayan-bruss/;;;", "or_profile": "~Valeriia_Cherepanova1;~Roman_Levin1;~Gowthami_Somepalli1;~Jonas_Geiping1;~C._Bayan_Bruss1;~Andrew_Gordon_Wilson1;~Tom_Goldstein1;~Micah_Goldblum1", "aff": "University of Maryland, College Park;Amazon;University of Maryland, College Park;University of Maryland, College Park;Capital One;New York University;University of Maryland, College Park;New York University", "aff_domain": "umd.edu;amazon.com;umd.edu;umd.edu;capitalone.com;nyu.edu;umd.edu;nyu.edu", "position": "PhD student;Research Scientist;PhD student;Postdoc;Director of Applied Research;Associate Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\ncherepanova2023a,\ntitle={A Performance-Driven Benchmark for Feature Selection in Tabular Deep Learning},\nauthor={Valeriia Cherepanova and Roman Levin and Gowthami Somepalli and Jonas Geiping and C. Bayan Bruss and Andrew Gordon Wilson and Tom Goldstein and Micah Goldblum},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=v4PMCdSaAT}\n}", "github": "", "project": "", "reviewers": "Lpp4;rx9X;rBeS", "pdf_size": 915689, "rating": "5;7;7", "confidence": "3;3;4", "wc_summary_and_contributions": "41;81;42", "wc_strengths": "44;84;152", "wc_improvement": "130;95;79", "wc_limitations": "6;90;6", "wc_correctness": "66;52;55", "wc_clarity": "7;11;10", "wc_relation_to_prior_work": "63;23;11", "wc_documentation": "18;42;19", "wc_additional_feedback": "1;1;1", "wc_review": "376;479;375", "wc_reply_reviewers": "10;20;0", "wc_reply_authors": "939;645;573", "reply_reviewers": "1;1;0", "reply_authors": "3;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 54.666666666666664, 18.624953392931992 ], "wc_strengths_avg": [ 93.33333333333333, 44.58200932613463 ], "wc_improvement_avg": [ 101.33333333333333, 21.296843793284385 ], "wc_limitations_avg": [ 34.0, 39.59797974644666 ], "wc_correctness_avg": [ 57.666666666666664, 6.018490028422596 ], "wc_clarity_avg": [ 9.333333333333334, 1.699673171197595 ], "wc_relation_to_prior_work_avg": [ 32.333333333333336, 22.23110933404409 ], "wc_documentation_avg": [ 26.333333333333332, 11.08552609887726 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 410.0, 48.792075859371536 ], "wc_reply_reviewers_avg": [ 10.0, 8.16496580927726 ], "wc_reply_authors_avg": [ 719.0, 158.3161394173064 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5556888268514620516&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "umd.edu;amazon.com;umd.edu;umd.edu;capitalone.com;nyu.edu;umd.edu;nyu.edu", "author_num": 8, "aff_unique_index": "0;1;0;0;2;3;0;3", "aff_unique_norm": "University of Maryland;Amazon;Capital One;New York University", "aff_unique_dep": ";Amazon.com, Inc.;;", "aff_unique_url": "https://www/umd.edu;https://www.amazon.com;https://www.capitalone.com;https://www.nyu.edu", "aff_unique_abbr": "UMD;Amazon;Capital One;NYU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "UniControl: A Unified Diffusion Model for Controllable Visual Generation In the Wild", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70123", "id": "v54eUIayFh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/862f45ccecb2275851bc8acebb8b4d65-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=v54eUIayFh", "openreview": "https://openreview.net/forum?id=v54eUIayFh", "poster": "/media/PosterPDFs/NeurIPS%202023/70123.png?t=1702318542.491242", "slides": "https://nips.cc/virtual/2023/poster/70123", "video": "https://nips.cc/virtual/2023/poster/70123", "author_site": "Can Qin, Shu Zhang, Ning Yu, Yihao Feng, Xinyi Yang, Yingbo Zhou, Huan Wang, Juan Carlos Niebles, Caiming Xiong, Silvio Savarese, Stefano Ermon, Yun Fu, Ran Xu", "tldr": "", "abstract": "Achieving machine autonomy and human control often represent divergent objectives in the design of interactive AI systems. Visual generative foundation models such as Stable Diffusion show promise in navigating these goals, especially when prompted with arbitrary languages. However, they often fall short in generating images with spatial, structural, or geometric controls. The integration of such controls, which can accommodate various visual conditions in a single unified model, remains an unaddressed challenge. In response, we introduce UniControl, a new generative foundation model that consolidates a wide array of controllable condition-to-image (C2I) tasks within a singular framework, while still allowing for arbitrary language prompts. UniControl enables pixel-level-precise image generation, where visual conditions primarily influence the generated structures and language prompts guide the style and context. To equip UniControl with the capacity to handle diverse visual conditions, we augment pretrained text-to-image diffusion models and introduce a task-aware HyperNet to modulate the diffusion models, enabling the adaptation to different C2I tasks simultaneously. Trained on nine unique C2I tasks, UniControl demonstrates impressive zero-shot generation abilities with unseen visual conditions. Experimental results show that UniControl often surpasses the performance of single-task-controlled methods of comparable model sizes. This control versatility positions UniControl as a significant advancement in the realm of controllable visual generation.", "keywords": "Image Generation;Multi-modal;HyperNet", "primary_area": "", "supplementary_material": "/attachment/14d7da290147db36de879be9ceb5bc7a878b7aa7.zip", "author": "Can Qin;Shu Zhang;Ning Yu;Yihao Feng;Xinyi Yang;Yingbo Zhou;Huan Wang;Juan Carlos Niebles;Caiming Xiong;Silvio Savarese;Stefano Ermon;Yun Fu;Ran Xu", "authorids": "~Can_Qin1;~Shu_Zhang1;~Ning_Yu2;~Yihao_Feng1;~Xinyi_Yang2;~Yingbo_Zhou1;~Huan_Wang1;~Juan_Carlos_Niebles1;~Caiming_Xiong1;~Silvio_Savarese1;~Stefano_Ermon1;~Yun_Fu1;~Ran_Xu1", "gender": "M;M;;M;F;;M;M;M;M;M;M;M", "homepage": "http://canqin.tech;;;;https://www.linkedin.com/in/xinyiyang4/;;http://www.cs.yale.edu/homes/wang-huan/;http://www.niebles.net/;http://cmxiong.com/;;http://cs.stanford.edu/~ermon/;http://www1.ece.neu.edu/~yunfu/;", "dblp": "214/2488;30/2700-7;;204/3696;80/9004-2;72/8614;70/6155-16.html;26/647;80/7282;50/3578;47/8135;00/5815-1;", "google_scholar": "QCik-YcAAAAJ;k9zsuBIAAAAJ;;uqnNle0AAAAJ;zxtgr18AAAAJ;H_6RQ7oAAAAJ;7NpTttkAAAAJ;hqNhUCYAAAAJ;vaSdahkAAAAJ;ImpbxLsAAAAJ;;https://scholar.google.com.tw/citations?user=h-JEcQ8AAAAJ;sgBB2sUAAAAJ", "orcid": ";;;;0009-0009-4612-9698;;;;;;;0000-0002-5098-2853;", "linkedin": ";shu-zhang-5b34b320;;;xinyiyang4/;yingbozhou/;huanwangyale/;;caiming-xiong-150a1417;;;furaymond/;", "or_profile": "~Can_Qin1;~Shu_Zhang1;~Ning_Yu2;~Yihao_Feng1;~Xinyi_Yang2;~Yingbo_Zhou1;~Huan_Wang1;~Juan_Carlos_Niebles1;~Caiming_Xiong1;~Silvio_Savarese1;~Stefano_Ermon1;~Yun_Fu1;~Ran_Xu1", "aff": "Northeastern University;Salesforce Research;;Salesforce AI Research;Salesforce Research;Salesforce Research;Salesforce.com;Stanford University;Salesforce Research;Stanford University;Stanford University;Northeastern University;SalesForce.com", "aff_domain": "neu.edu;salesforce.com;;salesforce.com;salesforce.com;salesforce.com;salesforce.com;stanford.edu;salesforce.com;stanford.edu;stanford.edu;northeastern.edu;salesforce.com", "position": "PhD student;Researcher;;Researcher;Researcher;Research Scientist;Researcher;Adjunct Professor;Research Scientist;Adjunct Professor;Associate Professor;Full Professor;senior manager", "bibtex": "@inproceedings{\nqin2023unicontrol,\ntitle={UniControl: A Unified Diffusion Model for Controllable Visual Generation In the Wild},\nauthor={Can Qin and Shu Zhang and Ning Yu and Yihao Feng and Xinyi Yang and Yingbo Zhou and Huan Wang and Juan Carlos Niebles and Caiming Xiong and Silvio Savarese and Stefano Ermon and Yun Fu and Ran Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=v54eUIayFh}\n}", "github": "", "project": "", "reviewers": "HFTZ;y5r6;J9bc;Haxt;hjKo", "pdf_size": 37263078, "rating": "5;5;5;6;6", "confidence": "4;3;4;4;4", "soundness": "2;3;3;3;3", "novelty": "2;2;3;2;3", "presentation": "2;3;2;3;3", "wc_summary": "44;75;57;91;83", "wc_strengths": "49;44;57;37;50", "wc_weaknesses": "577;160;156;20;227", "wc_questions": "99;5;6;68;165", "wc_limitations": "13;12;1;10;9", "wc_review": "782;296;277;226;534", "wc_reply_reviewers": "34;71;35;13;37", "wc_reply_authors": "47;83;41;31;42", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 70.0, 17.204650534085253 ], "wc_strengths_avg": [ 47.4, 6.6513156593263565 ], "wc_weaknesses_avg": [ 228.0, 187.01550737839898 ], "wc_questions_avg": [ 68.6, 60.30124376826733 ], "wc_limitations_avg": [ 9.0, 4.242640687119285 ], "wc_review_avg": [ 423.0, 208.5454386938252 ], "wc_reply_reviewers_avg": [ 38.0, 18.65475810617763 ], "wc_reply_authors_avg": [ 48.8, 17.870646322950943 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.40824829046386313, "gs_citation": 136, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13513427978200918159&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 7, "email": "neu.edu;salesforce.com;;salesforce.com;salesforce.com;salesforce.com;salesforce.com;stanford.edu;salesforce.com;stanford.edu;stanford.edu;northeastern.edu;salesforce.com", "author_num": 13, "aff_unique_index": "0;1;1;1;1;1;2;1;2;2;0;1", "aff_unique_norm": "Northeastern University;Salesforce;Stanford University", "aff_unique_dep": ";Salesforce Research;", "aff_unique_url": "https://www.northeastern.edu;https://research.salesforce.com;https://www.stanford.edu", "aff_unique_abbr": "NEU;Salesforce;Stanford", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Improving Adversarial Robustness via Information Bottleneck Distillation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70122", "id": "v5Aaxk4sSy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/233278d812e74a4f9848410881db86b1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=v5Aaxk4sSy", "openreview": "https://openreview.net/forum?id=v5Aaxk4sSy", "poster": "/media/PosterPDFs/NeurIPS%202023/70122.png?t=1702035880.2177527", "slides": "https://nips.cc/virtual/2023/poster/70122", "video": "https://nips.cc/virtual/2023/poster/70122", "author_site": "Huafeng Kuang, Hong Liu, Yongjian Wu, Shin'ichi Satoh, Rongrong Ji", "tldr": "", "abstract": "Previous studies have shown that optimizing the information bottleneck can significantly improve the robustness of deep neural networks. Our study closely examines the information bottleneck principle and proposes an Information Bottleneck Distillation approach. This specially designed, robust distillation technique utilizes prior knowledge obtained from a robust pre-trained model to boost information bottlenecks. Specifically, we propose two distillation strategies that align with the two optimization processes of the information bottleneck. Firstly, we use a robust soft-label distillation method to increase the mutual information between latent features and output prediction. Secondly, we introduce an adaptive feature distillation method that automatically transfers relevant knowledge from the teacher model to the student model, thereby reducing the mutual information between the input and latent features. We conduct extensive experiments to evaluate our approach's robustness against state-of-the-art adversarial attackers such as PGD-attack and AutoAttack. Our experimental results demonstrate the effectiveness of our approach in significantly improving adversarial robustness. Our code is available at https://github.com/SkyKuang/IBD.", "keywords": "Information Bottleneck;Adversarial training;Adversarial robustness;Knowledge distillation", "primary_area": "", "supplementary_material": "", "author": "Huafeng Kuang;Hong Liu;YONGJIAN WU;Shin'ichi Satoh;Rongrong Ji", "authorids": "~Huafeng_Kuang1;~Hong_Liu9;~YONGJIAN_WU2;~Shin'ichi_Satoh1;~Rongrong_Ji5", "gender": ";Non-Binary;;M;M", "homepage": ";https://lynnhongliu.github.io/hliu/;https://open.youtu.qq.com/;http://www.satoh-lab.nii.ac.jp/;http://mac.xmu.edu.cn/rrji-en.html", "dblp": "251/3442;29/5010-9;;50/290;86/5681", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;BC7N2dYAAAAJ;;https://scholar.google.co.jp/citations?user=7aEF5cQAAAAJ;", "orcid": ";0000-0001-5318-6388;;0000-0001-6995-6447;", "linkedin": ";;;shin-ichi-satoh-a8669573/;", "or_profile": "~Huafeng_Kuang1;~Hong_Liu9;~YONGJIAN_WU2;~Shin'ichi_Satoh1;~Rongrong_Ji5", "aff": "Xiamen University;National Institute of Informatics;;National Institute of Informatics;Xiamen University", "aff_domain": "xmu.edu.cn;nii.ac.jp;;nii.ac.jp;xmu.edu.cn", "position": "Researcher;Postdoc;;Professor;Full Professor", "bibtex": "@inproceedings{\nkuang2023improving,\ntitle={Improving Adversarial Robustness via Information Bottleneck Distillation},\nauthor={Huafeng Kuang and Hong Liu and YONGJIAN WU and Shin'ichi Satoh and Rongrong Ji},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=v5Aaxk4sSy}\n}", "github": "", "project": "", "reviewers": "wRTW;rCCu;fxg6;Dvz1;4SqU", "pdf_size": 1211622, "rating": "5;5;5;6;7", "confidence": "3;3;4;4;4", "soundness": "2;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "67;67;65;84;50", "wc_strengths": "53;47;25;46;120", "wc_weaknesses": "3;96;95;59;1", "wc_questions": "172;6;2;31;1", "wc_limitations": "1;11;7;7;1", "wc_review": "296;227;194;227;173", "wc_reply_reviewers": "17;29;0;30;0", "wc_reply_authors": "40;39;0;28;0", "reply_reviewers": "1;1;0;1;0", "reply_authors": "2;2;1;2;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 66.6, 10.781465577554844 ], "wc_strengths_avg": [ 58.2, 32.31965346348874 ], "wc_weaknesses_avg": [ 50.8, 42.020947157340466 ], "wc_questions_avg": [ 42.4, 65.72244669821718 ], "wc_limitations_avg": [ 5.4, 3.8781438859330635 ], "wc_review_avg": [ 223.4, 41.71618390984487 ], "wc_reply_reviewers_avg": [ 15.2, 13.22724461102916 ], "wc_reply_authors_avg": [ 21.4, 17.973313550928776 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6123724356957947, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14654711607866480585&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "email": "xmu.edu.cn;nii.ac.jp;;nii.ac.jp;xmu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Xiamen University;National Institute of Informatics", "aff_unique_dep": ";", "aff_unique_url": "https://www.xmu.edu.cn;https://www.nii.ac.jp/", "aff_unique_abbr": "XMU;NII", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "China;Japan" }, { "title": "Winner Takes It All: Training Performant RL Populations for Combinatorial Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70121", "id": "v6VpqGcGAR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/97b983c974551153d20ddfabb62a5203-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=v6VpqGcGAR", "openreview": "https://openreview.net/forum?id=v6VpqGcGAR", "poster": "/media/PosterPDFs/NeurIPS%202023/70121.png?t=1702422423.2123148", "slides": "https://nips.cc/virtual/2023/poster/70121", "video": "https://nips.cc/virtual/2023/poster/70121", "author_site": "Nathan Grinsztajn, Daniel Furelos-Blanco, Shikha Surana, Cl\u00e9ment Bonnet, Tom Barrett", "tldr": "", "abstract": "Applying reinforcement learning (RL) to combinatorial optimization problems is attractive as it removes the need for expert knowledge or pre-solved instances. However, it is unrealistic to expect an agent to solve these (often NP-)hard problems in a single shot at inference due to their inherent complexity. Thus, leading approaches often implement additional search strategies, from stochastic sampling and beam-search to explicit fine-tuning. In this paper, we argue for the benefits of learning a population of complementary policies, which can be simultaneously rolled out at inference. To this end, we introduce Poppy, a simple training procedure for populations. Instead of relying on a predefined or hand-crafted notion of diversity, Poppy induces an unsupervised specialization targeted solely at maximizing the performance of the population. We show that Poppy produces a set of complementary policies, and obtains state-of-the-art RL results on three popular NP-hard problems: traveling salesman, capacitated vehicle routing, and job-shop scheduling.", "keywords": "Combinatorial Optimization;Reinforcement Learning;TSP;CVRP;JSSP", "primary_area": "", "supplementary_material": "/attachment/f837ecca1b81bc01e56103da42727b2371c385e0.zip", "author": "Nathan Grinsztajn;Daniel Furelos-Blanco;Shikha Surana;Cl\u00e9ment Bonnet;Thomas D Barrett", "authorids": "~Nathan_Grinsztajn1;~Daniel_Furelos-Blanco1;~Shikha_Surana1;~Cl\u00e9ment_Bonnet1;~Thomas_D_Barrett1", "gender": "M;;F;M;M", "homepage": "https://nathangrinsztajn.github.io/;https://www.danielfurelos.com;;;", "dblp": ";223/0191;;305/3524;248/8263", "google_scholar": "yVHIYEYAAAAJ;https://scholar.google.co.uk/citations?user=IfMKjBgAAAAJ;;H6euRhAAAAAJ;nJa1KGIAAAAJ", "orcid": "0000-0001-6817-5972;;;;0000-0001-6241-3028", "linkedin": "nathan-grinsztajn-960379139/?locale=en_US;;shikha-surana/;clement-bonnet16/;tom-barrett-62b180a2/", "or_profile": "~Nathan_Grinsztajn1;~Daniel_Furelos-Blanco1;~Shikha_Surana1;~Cl\u00e9ment_Bonnet1;~Thomas_D_Barrett1", "aff": "InstaDeep;Imperial College London, Imperial College London;InstaDeep;InstaDeep;InstaDeep", "aff_domain": "instadeep.com;imperial.ac.uk;instadeep.com;instadeep.com;instadeep.com", "position": "Researcher;PhD student;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\ngrinsztajn2023winner,\ntitle={Winner Takes It All: Training Performant {RL} Populations for Combinatorial Optimization},\nauthor={Nathan Grinsztajn and Daniel Furelos-Blanco and Shikha Surana and Cl{\\'e}ment Bonnet and Thomas D Barrett},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=v6VpqGcGAR}\n}", "github": "", "project": "", "reviewers": "51fw;Ym2T;ybFK;wqBo", "pdf_size": 1248516, "rating": "5;5;6;8", "confidence": "4;4;4;4", "soundness": "2;3;4;4", "novelty": "2;3;3;3", "presentation": "3;4;4;3", "wc_summary": "39;53;55;166", "wc_strengths": "26;67;94;238", "wc_weaknesses": "258;49;193;47", "wc_questions": "7;58;128;0", "wc_limitations": "1;6;1;11", "wc_review": "331;233;471;462", "wc_reply_reviewers": "112;90;33;19", "wc_reply_authors": "743;242;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 78.25, 51.036139156484005 ], "wc_strengths_avg": [ 106.25, 79.8259826121796 ], "wc_weaknesses_avg": [ 136.75, 91.67980966385129 ], "wc_questions_avg": [ 48.25, 51.197534120307004 ], "wc_limitations_avg": [ 4.75, 4.14578098794425 ], "wc_review_avg": [ 374.25, 98.59354694907776 ], "wc_reply_reviewers_avg": [ 63.5, 38.616706229299254 ], "wc_reply_authors_avg": [ 246.25, 303.33840426164306 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3963766458363444022&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 6, "email": "instadeep.com;imperial.ac.uk;instadeep.com;instadeep.com;instadeep.com", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "InstaDeep;Imperial College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.instadeep.com;https://www.imperial.ac.uk", "aff_unique_abbr": "InstaDeep;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Deep Equilibrium Based Neural Operators for Steady-State PDEs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70120", "id": "v6YzxwJlQn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/32cc61322f1e2f56f989d29ccc7cfbb7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=v6YzxwJlQn", "openreview": "https://openreview.net/forum?id=v6YzxwJlQn", "poster": "/media/PosterPDFs/NeurIPS%202023/70120.png?t=1701981130.5352252", "slides": "https://nips.cc/virtual/2023/poster/70120", "video": "https://nips.cc/virtual/2023/poster/70120", "author_site": "Tanya Marwah, Ashwini Pokle, J. Zico Kolter, Zachary Lipton, Jianfeng Lu, Andrej Risteski", "tldr": "", "abstract": "Data-driven machine learning approaches are being increasingly used to solve partial differential equations (PDEs). They have shown particularly striking successes when training an operator, which takes as input a PDE in some family, and outputs its solution. However, the architectural design space, especially given structural knowledge of the PDE family of interest, is still poorly understood. We seek to remedy this gap by studying the benefits of weight-tied neural network architectures for steady-state PDEs. To achieve this, we first demonstrate that the solution of most steady-state PDEs can be expressed as a fixed point of a non-linear operator. Motivated by this observation, we propose FNO-DEQ, a deep equilibrium variant of the FNO architecture that directly solves for the solution of a steady-state PDE as the infinite-depth fixed point of an implicit operator layer using a black-box root solver and differentiates analytically through this fixed point resulting in $\\mathcal{O}(1)$ training memory. Our experiments indicate that FNO-DEQ-based architectures outperform FNO-based baselines with $4\\times$ the number of parameters in predicting the solution to steady-state PDEs such as Darcy Flow and steady-state incompressible Navier-Stokes. Finally, we show FNO-DEQ is more robust when trained with datasets with more noisy observations than the FNO-based baselines, demonstrating the benefits of using appropriate inductive biases in architectural design for different neural network based PDE solvers. Further, we show a universal approximation result that demonstrates that FNO-DEQ can approximate the solution to any steady-state PDE that can be written as a fixed point equation.", "keywords": "Deep Equilibrium Models;Partial Differential Equations;Neural Operators", "primary_area": "", "supplementary_material": "/attachment/bf34e9a9e484fd336e46dcb277b4f11707e07062.pdf", "author": "Tanya Marwah;Ashwini Pokle;J Zico Kolter;Zachary Chase Lipton;Jianfeng Lu;Andrej Risteski", "authorids": "~Tanya_Marwah1;~Ashwini_Pokle1;~J_Zico_Kolter1;~Zachary_Chase_Lipton1;~Jianfeng_Lu1;~Andrej_Risteski2", "gender": "F;F;Unspecified;M;M;M", "homepage": "https://tm157.github.io/;https://ashwinipokle.github.io/;http://zacklipton.com;https://services.math.duke.edu/~jianfeng/;;http://www.zicokolter.com", "dblp": "190/7486;228/5527;;82/6187-1.html;63/11143;67/2526", "google_scholar": "_Y_XvN4AAAAJ;o_1YtVoAAAAJ;MN9Kfg8AAAAJ;ej9SRrAAAAAJ;;UXh1I6UAAAAJ", "orcid": ";;;0000-0001-6255-5165;;", "linkedin": ";;;;;", "or_profile": "~Tanya_Marwah1;~Ashwini_Pokle1;~Zachary_Chase_Lipton1;~Jianfeng_Lu1;~Andrej_Risteski2;~Zico_Kolter1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Duke University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;andrew.cmu.edu;cmu.edu;duke.edu;cmu.edu;cmu.edu", "position": "PhD student;PhD student;Assistant Professor;Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nmarwah2023deep,\ntitle={Deep Equilibrium Based Neural Operators for Steady-State {PDE}s},\nauthor={Tanya Marwah and Ashwini Pokle and J Zico Kolter and Zachary Chase Lipton and Jianfeng Lu and Andrej Risteski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=v6YzxwJlQn}\n}", "github": "", "project": "", "reviewers": "Kt2x;SM2z;HGbF;wRdk", "pdf_size": 2736767, "rating": "5;6;7;9", "confidence": "1;5;3;5", "soundness": "2;4;3;4", "novelty": "3;4;3;4", "presentation": "3;4;3;3", "wc_summary": "105;65;111;62", "wc_strengths": "137;30;25;100", "wc_weaknesses": "169;189;112;66", "wc_questions": "5;1;20;44", "wc_limitations": "5;1;1;48", "wc_review": "421;286;269;320", "wc_reply_reviewers": "0;0;44;10", "wc_reply_authors": "37;37;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.75, 1.479019945774904 ], "confidence_avg": [ 3.5, 1.6583123951777 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 85.75, 22.37604746151563 ], "wc_strengths_avg": [ 73.0, 47.37615433949868 ], "wc_weaknesses_avg": [ 134.0, 48.36837809974612 ], "wc_questions_avg": [ 17.5, 16.859715300087366 ], "wc_limitations_avg": [ 13.75, 19.84155991851447 ], "wc_review_avg": [ 324.0, 58.93640640554869 ], "wc_reply_reviewers_avg": [ 13.5, 18.07622748252522 ], "wc_reply_authors_avg": [ 18.5, 18.5 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6625413488689132, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10670306070992490946&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cmu.edu;andrew.cmu.edu;cmu.edu;duke.edu;cmu.edu;cmu.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Carnegie Mellon University;Duke University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.duke.edu", "aff_unique_abbr": "CMU;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Adaptive Algorithms for Relaxed Pareto Set Identification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70119", "id": "v6jIxRRDyD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6e976e7930460b5c3167a104ba8cc39c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=v6jIxRRDyD", "openreview": "https://openreview.net/forum?id=v6jIxRRDyD", "poster": "/media/PosterPDFs/NeurIPS%202023/70119.png?t=1699958617.924882", "slides": "https://nips.cc/virtual/2023/poster/70119", "video": "https://nips.cc/virtual/2023/poster/70119", "author_site": "Cyrille KONE, Emilie Kaufmann, Laura Richert", "tldr": "", "abstract": "In this paper we revisit the fixed-confidence identification of the Pareto optimal set in a multi-objective multi-armed bandit model. As the sample complexity to identify the exact Pareto set can be very large, a relaxation allowing to output some additional near-optimal arms has been studied. In this work we also tackle alternative relaxations that allow instead to identify a relevant \\emph{subset} of the Pareto set. Notably, we propose a single sampling strategy, called Adaptive Pareto Exploration, that can be used in conjunction with different stopping rules to take into account different relaxations of the Pareto Set Identification problem. We analyze the sample complexity of these different combinations, quantifying in particular the reduction in sample complexity that occurs when one seeks to identify at most $k$ Pareto optimal arms. We showcase the good practical performance of Adaptive Pareto Exploration on a real-world scenario, in which we adaptively explore several vaccination strategies against Covid-19 in order to find the optimal ones when multiple immunogenicity criteria are taken into account.", "keywords": "bandit;pure-exploration;pareto front;pareto set", "primary_area": "", "supplementary_material": "/attachment/68d82aae4975fb31ce5ade315e9c6c82309ae479.zip", "author": "Cyrille Kone;Emilie Kaufmann;Laura Richert", "authorids": "~Cyrille_Kone1;~Emilie_Kaufmann1;laura.richert@u-bordeaux.fr", "gender": "M;F;", "homepage": "http://cyrille-kone.github.io;https://emiliekaufmann.github.io/;", "dblp": ";67/11350;", "google_scholar": ";9GE1vx4AAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Cyrille_Kone1;~Emilie_Kaufmann1;laura.richert@u-bordeaux.fr", "aff": "Universit\u00e9 de Lille;CNRS;", "aff_domain": "univ-lille.fr;cnrs.fr;", "position": "PhD student;Researcher;", "bibtex": "@inproceedings{\nkone2023adaptive,\ntitle={Adaptive Algorithms for Relaxed Pareto Set Identification},\nauthor={Cyrille Kone and Emilie Kaufmann and Laura Richert},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=v6jIxRRDyD}\n}", "github": "", "project": "", "reviewers": "J7Kd;rUsc;yuHu;9Lr4", "pdf_size": 728760, "rating": "5;6;6;6", "confidence": "2;2;3;3", "soundness": "2;3;4;3", "novelty": "2;3;3;2", "presentation": "2;3;4;3", "wc_summary": "22;104;84;71", "wc_strengths": "22;10;137;50", "wc_weaknesses": "69;18;135;87", "wc_questions": "15;11;113;3", "wc_limitations": "2;1;9;5", "wc_review": "130;144;478;216", "wc_reply_reviewers": "9;22;20;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 70.25, 30.235533731025818 ], "wc_strengths_avg": [ 54.75, 49.655689502815285 ], "wc_weaknesses_avg": [ 77.25, 41.85913878712748 ], "wc_questions_avg": [ 35.5, 44.95275297465106 ], "wc_limitations_avg": [ 4.25, 3.112474899497183 ], "wc_review_avg": [ 242.0, 140.10710188994705 ], "wc_reply_reviewers_avg": [ 12.75, 8.870597499605086 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18342494080579668694&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 12, "email": "univ-lille.fr;cnrs.fr;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Universit\u00e9 de Lille;Centre National de la Recherche Scientifique", "aff_unique_dep": ";", "aff_unique_url": "https://www.univ-lille.fr;https://www.cnrs.fr", "aff_unique_abbr": "UdeL;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "End-To-End Latent Variational Diffusion Models for Inverse Problems in High Energy Physics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70118", "id": "v7WWesSiOu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cd830afc6208a346e4ec5caf1b08b4b4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=v7WWesSiOu", "openreview": "https://openreview.net/forum?id=v7WWesSiOu", "poster": "/media/PosterPDFs/NeurIPS%202023/70118.png?t=1702655447.6072872", "slides": "https://nips.cc/virtual/2023/poster/70118", "video": "https://nips.cc/virtual/2023/poster/70118", "author_site": "Alexander Shmakov, Kevin Greif, Michael Fenton, Aishik Ghosh, Pierre Baldi, Daniel Whiteson", "tldr": "", "abstract": "High-energy collisions at the Large Hadron Collider (LHC) provide valuable insights into open questions in particle physics. However, detector effects must be corrected before measurements can be compared to certain theoretical predictions or measurements from other detectors. Methods to solve this inverse problem of mapping detector observations to theoretical quantities of the underlying collision are essential parts of many physics analyses at the LHC. We investigate and compare various generative deep learning methods to approximate this inverse mapping. We introduce a novel unified architecture, termed latent variational diffusion models, which combines the latent learning of cutting-edge generative art approaches with an end-to-end variational framework. We demonstrate the effectiveness of this approach for reconstructing global distributions of theoretical kinematic quantities, as well as for ensuring the adherence of the learned posterior distributions to known physics constraints. Our unified approach achieves a distribution-free distance to the truth of over 20 times smaller than non-latent state-of-the-art baseline and 3 times smaller than traditional latent diffusion models.", "keywords": "Diffusion;Variational;VAE;LDM;Physics;Unfolding", "primary_area": "", "supplementary_material": "/attachment/21d1e6741183c4ac868df732ca0fb2e71e4959dc.zip", "author": "Alexander Shmakov;Kevin Greif;Michael James Fenton;Aishik Ghosh;Pierre Baldi;Daniel Whiteson", "authorids": "~Alexander_Shmakov1;kgreif@uci.edu;~Michael_James_Fenton1;aishikg@uci.edu;~Pierre_Baldi1;~Daniel_Whiteson1", "gender": ";;;;;", "homepage": ";;https://mfenton.web.cern.ch/;;;", "dblp": ";;;;;09/4532", "google_scholar": ";;;;;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Alexander_Shmakov1;kgreif@uci.edu;~Michael_James_Fenton1;aishikg@uci.edu;~Pierre_Baldi1;~Daniel_Whiteson1", "aff": ";;University of California, Irvine;;;University of California, Irvine", "aff_domain": ";;uci.edu;;;uci.edu", "position": ";;Postdoc;;;Professor", "bibtex": "@inproceedings{\nshmakov2023endtoend,\ntitle={End-To-End Latent Variational Diffusion Models for Inverse Problems in High Energy Physics},\nauthor={Alexander Shmakov and Kevin Greif and Michael James Fenton and Aishik Ghosh and Pierre Baldi and Daniel Whiteson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=v7WWesSiOu}\n}", "github": "", "project": "", "reviewers": "sYuc;aaox;GAKk;Btsg", "pdf_size": 3797192, "rating": "4;4;7;9", "confidence": "4;4;4;5", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "3;2;4;4", "wc_summary": "59;70;27;66", "wc_strengths": "25;55;101;19", "wc_weaknesses": "202;35;43;1", "wc_questions": "27;475;145;1", "wc_limitations": "51;21;1;1", "wc_review": "364;656;317;88", "wc_reply_reviewers": "0;0;18;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 2.1213203435596424 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 55.5, 16.91892431568863 ], "wc_strengths_avg": [ 50.0, 32.449961479175904 ], "wc_weaknesses_avg": [ 70.25, 77.68325109056649 ], "wc_questions_avg": [ 162.0, 188.68227261722285 ], "wc_limitations_avg": [ 18.5, 20.463381929681123 ], "wc_review_avg": [ 356.25, 202.11924079612015 ], "wc_reply_reviewers_avg": [ 4.5, 7.794228634059948 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8164965809277261, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6708430926207122801&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";;uci.edu;;;uci.edu", "author_num": 6, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Irvine", "aff_unique_dep": "", "aff_unique_url": "https://www.uci.edu", "aff_unique_abbr": "UCI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Irvine", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Pitfall of Optimism: Distributional Reinforcement Learning by Randomizing Risk Criterion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70117", "id": "v8u3EFAyW9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b1eb88348ee19a33c81cf5bc3fb8e9d2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=v8u3EFAyW9", "openreview": "https://openreview.net/forum?id=v8u3EFAyW9", "poster": "/media/PosterPDFs/NeurIPS%202023/70117.png?t=1698293748.1411788", "slides": "https://nips.cc/virtual/2023/poster/70117", "video": "https://nips.cc/virtual/2023/poster/70117", "author_site": "Taehyun Cho, Seungyub Han, Heesoo Lee, Kyungjae Lee, Jungwoo Lee", "tldr": "", "abstract": "Distributional reinforcement learning algorithms have attempted to utilize estimated uncertainty for exploration, such as optimism in the face of uncertainty. However, using the estimated variance for optimistic exploration may cause biased data collection and hinder convergence or performance. In this paper, we present a novel distributional reinforcement learning that selects actions by randomizing risk criterion without losing the risk-neutral objective. We provide a perturbed distributional Bellman optimality operator by distorting the risk measure. Also,we prove the convergence and optimality of the proposed method with the weaker contraction property. Our theoretical results support that the proposed method does not fall into biased exploration and is guaranteed to converge to an optimal return. Finally, we empirically show that our method outperforms other existing distribution-based algorithms in various environments including Atari 55 games.", "keywords": "distributional reinforcement learning;risk", "primary_area": "", "supplementary_material": "/attachment/58813d5c6e69308d6e9482298c9f7169b536afca.zip", "author": "Taehyun Cho;Seungyub Han;Heesoo Lee;Kyungjae Lee;Jungwoo Lee", "authorids": "~Taehyun_Cho1;~Seungyub_Han1;~Heesoo_Lee1;~Kyungjae_Lee1;~Jungwoo_Lee1", "gender": "M;M;M;M;M", "homepage": ";;https://orcid.org/0000-0001-5525-1892;https://sites.google.com/view/kyungjaelee;https://cml.snu.ac.kr", "dblp": "274/0287;347/8731;;13/7265-1;34/516-1", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;ot1-XNAAAAAJ;;https://scholar.google.co.kr/citations?user=OZZJagIAAAAJ;j98IWfoAAAAJ", "orcid": "0000-0003-1047-9847;0009-0001-8704-8968;;0000-0003-0147-2715;0000-0002-6804-980X", "linkedin": ";;;;", "or_profile": "~Taehyun_Cho1;~Seungyub_Han1;~Heesoo_Lee1;~Kyungjae_Lee1;~Jungwoo_Lee1", "aff": "Seoul National University;Seoul National University;Seoul National University;ChungAng University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;cau.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ncho2023pitfall,\ntitle={Pitfall of Optimism: Distributional Reinforcement Learning by Randomizing Risk Criterion},\nauthor={Taehyun Cho and Seungyub Han and Heesoo Lee and Kyungjae Lee and Jungwoo Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=v8u3EFAyW9}\n}", "github": "", "project": "", "reviewers": "jYGR;qLSL;rSdX;u65W;B5Gh", "pdf_size": 3810598, "rating": "4;5;6;7;7", "confidence": "4;3;4;4;4", "soundness": "2;2;3;3;4", "novelty": "2;2;3;3;3", "presentation": "3;2;3;3;3", "wc_summary": "119;179;101;77;91", "wc_strengths": "34;152;51;154;196", "wc_weaknesses": "138;216;227;222;396", "wc_questions": "174;248;143;75;287", "wc_limitations": "3;88;4;14;191", "wc_review": "468;883;526;542;1161", "wc_reply_reviewers": "0;36;9;30;74", "wc_reply_authors": "0;364;23;29;22", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;3;2;2;2", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 113.4, 35.539274049985885 ], "wc_strengths_avg": [ 117.4, 63.3706556696394 ], "wc_weaknesses_avg": [ 239.8, 84.6271823943111 ], "wc_questions_avg": [ 185.4, 75.30896360991831 ], "wc_limitations_avg": [ 60.0, 72.72688636261007 ], "wc_review_avg": [ 716.0, 266.00526310582654 ], "wc_reply_reviewers_avg": [ 29.8, 25.740240869113872 ], "wc_reply_authors_avg": [ 87.6, 138.55049620986566 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3429971702850177, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8709756511881132930&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;cau.ac.kr;snu.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Seoul National University;Chungang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;http://www.cau.ac.kr", "aff_unique_abbr": "SNU;CAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Deep Neural Collapse Is Provably Optimal for the Deep Unconstrained Features Model", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70116", "id": "v9yC7sSXf3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a60c43ba078b723d3d517d28c50ded4c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=v9yC7sSXf3", "openreview": "https://openreview.net/forum?id=v9yC7sSXf3", "poster": "/media/PosterPDFs/NeurIPS%202023/70116.png?t=1701824570.4001768", "slides": "https://nips.cc/virtual/2023/poster/70116", "video": "https://nips.cc/virtual/2023/poster/70116", "author_site": "Peter S\u00faken\u00edk, Marco Mondelli, Christoph Lampert", "tldr": "", "abstract": "Neural collapse (NC) refers to the surprising structure of the last layer of deep neural networks in the terminal phase of gradient descent training. Recently, an increasing amount of experimental evidence has pointed to the propagation of NC to earlier layers of neural networks. However, while the NC in the last layer is well studied theoretically, much less is known about its multi-layered counterpart - deep neural collapse (DNC). In particular, existing work focuses either on linear layers or only on the last two layers at the price of an extra assumption. Our work fills this gap by generalizing the established analytical framework for NC - the unconstrained features model - to multiple non-linear layers. Our key technical contribution is to show that, in a deep unconstrained features model, the unique global optimum for binary classification exhibits all the properties typical of DNC. This explains the existing experimental evidence of DNC. We also empirically show that (i) by optimizing deep unconstrained features models via gradient descent, the resulting solution agrees well with our theory, and (ii) trained networks recover the unconstrained features suitable for the occurrence of DNC, thus supporting the validity of this modeling principle.", "keywords": "neural collapse;unconstrained features model;deep learning", "primary_area": "", "supplementary_material": "/attachment/200989b9e784bd90017c545d20db0a3d4714fb4f.zip", "author": "Peter S\u00faken\u00edk;Marco Mondelli;Christoph H Lampert", "authorids": "~Peter_S\u00faken\u00edk1;~Marco_Mondelli1;~Christoph_H_Lampert1", "gender": "M;M;M", "homepage": "https://research-explorer.app.ist.ac.at/person/d64d6a8d-eb8e-11eb-b029-96fd216dec3c;http://marcomondelli.com;http://cvml.ist.ac.at/", "dblp": "304/2274;120/7089;67/2136", "google_scholar": "qEhrUDAAAAAJ;BHdSb5AAAAAJ;https://scholar.google.at/citations?user=iCf3SwgAAAAJ", "orcid": ";;0000-0001-8622-7887", "linkedin": ";;", "or_profile": "~Peter_S\u00faken\u00edk1;~Marco_Mondelli1;~Christoph_H_Lampert1", "aff": "Institute of Science and Technology;Institute of Science and Technology;Institute of Science and Technology Austria", "aff_domain": "ist.ac.at;ist.ac.at;ist.ac.at", "position": "PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\ns{\\'u}ken{\\'\\i}k2023deep,\ntitle={Deep Neural Collapse Is Provably Optimal for the Deep Unconstrained Features Model},\nauthor={Peter S{\\'u}ken{\\'\\i}k and Marco Mondelli and Christoph H Lampert},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=v9yC7sSXf3}\n}", "github": "", "project": "", "reviewers": "9Ryi;nhUP;FUzi;Egs3", "pdf_size": 4353565, "rating": "6;6;7;7", "confidence": "4;5;4;4", "soundness": "3;3;4;3", "novelty": "3;2;3;3", "presentation": "3;3;4;4", "wc_summary": "101;120;44;83", "wc_strengths": "53;246;66;103", "wc_weaknesses": "266;262;126;154", "wc_questions": "2;101;2;342", "wc_limitations": "1;2;9;26", "wc_review": "423;731;247;708", "wc_reply_reviewers": "0;0;0;34", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 87.0, 28.062430400804562 ], "wc_strengths_avg": [ 117.0, 76.7039764288658 ], "wc_weaknesses_avg": [ 202.0, 62.80127387243033 ], "wc_questions_avg": [ 111.75, 138.94310886114505 ], "wc_limitations_avg": [ 9.5, 10.012492197250394 ], "wc_review_avg": [ 527.25, 202.23300299407117 ], "wc_reply_reviewers_avg": [ 8.5, 14.722431864335457 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13637683701853604423&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "ist.ac.at;ist.ac.at;ist.ac.at", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Institute of Science and Technology;Institute of Science and Technology Austria", "aff_unique_dep": ";", "aff_unique_url": ";https://www.ist.ac.at", "aff_unique_abbr": ";IST Austria", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";Austria" }, { "title": "MVDiffusion: Enabling Holistic Multi-view Image Generation with Correspondence-Aware Diffusion", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70115", "id": "vA0vj1mY77", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a0da690a47b2f52faa63f6fe054057b5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vA0vj1mY77", "openreview": "https://openreview.net/forum?id=vA0vj1mY77", "poster": "/media/PosterPDFs/NeurIPS%202023/70115.png?t=1699426074.3945503", "slides": "https://nips.cc/virtual/2023/poster/70115", "video": "https://nips.cc/virtual/2023/poster/70115", "author_site": "Shitao Tang, Fuyang Zhang, Jiacheng Chen, Peng Wang, Yasutaka Furukawa", "tldr": "", "abstract": "This paper introduces MVDiffusion, a simple yet effective method for generating consistent multi-view images from text prompts given pixel-to-pixel correspondences (e.g., perspective crops from a panorama or multi-view images given depth maps and poses). Unlike prior methods that rely on iterative image warping and inpainting, MVDiffusion simultaneously generates all images with a global awareness, effectively addressing the prevalent error accumulation issue. At its core, MVDiffusion processes perspective images in parallel with a pre-trained text-to-image diffusion model, while integrating novel correspondence-aware attention layers to facilitate cross-view interactions. For panorama generation, while only trained with 10k panoramas, MVDiffusion is able to generate high-resolution photorealistic images for arbitrary texts or extrapolate one perspective image to a 360-degree view. For multi-view depth-to-image generation, MVDiffusion demonstrates state-of-the-art performance for texturing a scene mesh. The project page is at https://mvdiffusion.github.io/.", "keywords": "multiview; image generation; generative model; diffusion models", "primary_area": "", "supplementary_material": "/attachment/355d4598983d392c0252a5aedbf7c5973366aec9.zip", "author": "Shitao Tang;Fuyang Zhang;Jiacheng Chen;Peng Wang;Yasutaka Furukawa", "authorids": "~Shitao_Tang1;~Fuyang_Zhang1;~Jiacheng_Chen1;~Peng_Wang2;~Yasutaka_Furukawa1", "gender": "M;M;;M;M", "homepage": "https://tangshitao.github.io/;https://zhangfuyang.github.io/;;https://pengwangucla.github.io/peng-wang.github.io/;https://www.cs.sfu.ca/~furukawa", "dblp": "203/8797;138/8101;;95/4442;37/1720", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;;Svk4ntYAAAAJ;https://scholar.google.com.tw/citations?user=wCxzFrMAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Shitao_Tang1;~Fuyang_Zhang1;~Jiacheng_Chen1;~Peng_Wang2;~Yasutaka_Furukawa1", "aff": "Simon Fraser University;Simon Fraser University;;Bytedance US AILab;Simon Fraser University", "aff_domain": "sfu.ca;sfu.ca;;bytedance.com;sfu.ca", "position": "PhD student;PhD student;;Research Scientist;Associate Professor", "bibtex": "@inproceedings{\ntang2023mvdiffusion,\ntitle={{MVD}iffusion: Enabling Holistic Multi-view Image Generation with Correspondence-Aware Diffusion},\nauthor={Shitao Tang and Fuyang Zhang and Jiacheng Chen and Peng Wang and Yasutaka Furukawa},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vA0vj1mY77}\n}", "github": "", "project": "", "reviewers": "2y4s;xhAT;VE1c;x9w4", "pdf_size": 12352076, "rating": "5;6;7;7", "confidence": "3;5;4;3", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "36;33;52;90", "wc_strengths": "20;50;53;87", "wc_weaknesses": "118;73;53;337", "wc_questions": "4;51;49;14", "wc_limitations": "4;41;14;1", "wc_review": "182;248;221;529", "wc_reply_reviewers": "0;0;0;44", "wc_reply_authors": "0;0;0;8", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 52.75, 22.68672519338126 ], "wc_strengths_avg": [ 52.5, 23.732888572611635 ], "wc_weaknesses_avg": [ 145.25, 113.18209884959724 ], "wc_questions_avg": [ 29.5, 20.81465829649865 ], "wc_limitations_avg": [ 15.0, 15.763882770434446 ], "wc_review_avg": [ 295.0, 137.12220826693246 ], "wc_reply_reviewers_avg": [ 11.0, 19.05255888325765 ], "wc_reply_authors_avg": [ 2.0, 3.4641016151377544 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "sfu.ca;sfu.ca;;bytedance.com;sfu.ca", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Simon Fraser University;ByteDance", "aff_unique_dep": ";AI Lab", "aff_unique_url": "https://www.sfu.ca;https://www.bytedance.com", "aff_unique_abbr": "SFU;", "aff_campus_unique_index": "1", "aff_campus_unique": ";United States", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Canada;United States" }, { "title": "Reflexion: language agents with verbal reinforcement learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70114", "id": "vAElhFcKW6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1b44b878bb782e6954cd888628510e90-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vAElhFcKW6", "openreview": "https://openreview.net/forum?id=vAElhFcKW6", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70114", "video": "https://nips.cc/virtual/2023/poster/70114", "author_site": "Noah Shinn, Federico Cassano, Ashwin Gopinath, Karthik Narasimhan, Shunyu Yao", "tldr": "", "abstract": "Large language models (LLMs) have been increasingly used to interact with external environments (e.g., games, compilers, APIs) as goal-driven agents. However, it remains challenging for these language agents to quickly and efficiently learn from trial-and-error as traditional reinforcement learning methods require extensive training samples and expensive model fine-tuning. We propose \\emph{Reflexion}, a novel framework to reinforce language agents not by updating weights, but instead through linguistic feedback. Concretely, Reflexion agents verbally reflect on task feedback signals, then maintain their own reflective text in an episodic memory buffer to induce better decision-making in subsequent trials. Reflexion is flexible enough to incorporate various types (scalar values or free-form language) and sources (external or internally simulated) of feedback signals, and obtains significant improvements over a baseline agent across diverse tasks (sequential decision-making, coding, language reasoning). For example, Reflexion achieves a 91\\% pass@1 accuracy on the HumanEval coding benchmark, surpassing the previous state-of-the-art GPT-4 that achieves 80\\%. We also conduct ablation and analysis studies using different feedback signals, feedback incorporation methods, and agent types, and provide insights into how they affect performance. We release all code, demos, and datasets at \\url{https://github.com/noahshinn024/reflexion}.", "keywords": "language model;reasoning;decision making;programming", "primary_area": "", "supplementary_material": "/attachment/112623c92978ac8add7aed68a6af3f0f852d3ab1.gz", "author": "Noah Shinn;Federico Cassano;Ashwin Gopinath;Karthik R Narasimhan;Shunyu Yao", "authorids": "~Noah_Shinn1;~Federico_Cassano1;~Ashwin_Gopinath1;~Karthik_R_Narasimhan1;~Shunyu_Yao1", "gender": ";;Not Specified;M;M", "homepage": "https://noahshinn.com;https://federico.codes;https://ashwingopinath.com/;http://www.karthiknarasimhan.com;https://ysymyth.github.io", "dblp": "342/9223;326/8944;;147/0322;156/1038", "google_scholar": "zTfIpA4AAAAJ;n8tK15oAAAAJ;https://scholar.google.com/citations?hl=en;euc0GX4AAAAJ;qJBXk9cAAAAJ", "orcid": ";0000-0002-9318-7454;;;", "linkedin": ";;;;", "or_profile": "~Noah_Shinn1;~Federico_Cassano1;~Ashwin_Gopinath1;~Karthik_R_Narasimhan1;~Shunyu_Yao1", "aff": "Northeastern University;Northeastern University;Massachusetts Institute of Technology;Princeton University;Princeton University", "aff_domain": "northeastern.edu;neu.edu;mit.edu;princeton.edu;princeton.edu", "position": "Undergrad student;Undergrad student;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nshinn2023reflexion,\ntitle={Reflexion: language agents with verbal reinforcement learning},\nauthor={Noah Shinn and Federico Cassano and Ashwin Gopinath and Karthik R Narasimhan and Shunyu Yao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vAElhFcKW6}\n}", "github": "", "project": "", "reviewers": "BKdQ;U48e;6XLB;V8mX;k1Ks", "pdf_size": 509025, "rating": "4;6;7;7;7", "confidence": "3;4;4;4;3", "soundness": "2;3;4;3;3", "novelty": "3;3;3;4;4", "presentation": "3;3;4;3;3", "wc_summary": "38;73;55;77;80", "wc_strengths": "25;87;64;85;109", "wc_weaknesses": "79;136;222;98;206", "wc_questions": "15;75;5;73;106", "wc_limitations": "3;10;3;7;96", "wc_review": "160;381;349;340;597", "wc_reply_reviewers": "0;48;152;89;225", "wc_reply_authors": "0;0;36;0;163", "reply_reviewers": "0;1;1;1;2", "reply_authors": "1;1;2;1;3", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 64.6, 15.882065356873456 ], "wc_strengths_avg": [ 74.0, 28.340783334269364 ], "wc_weaknesses_avg": [ 148.2, 56.99964912172706 ], "wc_questions_avg": [ 54.8, 38.53517873320428 ], "wc_limitations_avg": [ 23.8, 36.19613239007726 ], "wc_review_avg": [ 365.4, 139.27325658574944 ], "wc_reply_reviewers_avg": [ 102.8, 78.88574015625385 ], "wc_reply_authors_avg": [ 39.8, 63.15821403428061 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4900980294098034, "gs_citation": 1443, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3200512419935847312&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "northeastern.edu;neu.edu;mit.edu;princeton.edu;princeton.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;2", "aff_unique_norm": "Northeastern University;Massachusetts Institute of Technology;Princeton University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.northeastern.edu;https://web.mit.edu;https://www.princeton.edu", "aff_unique_abbr": "NEU;MIT;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "An Exploration-by-Optimization Approach to Best of Both Worlds in Linear Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70113", "id": "vBHKSTgcYQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e262fc23ec7275230ee77c55d0cc9555-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vBHKSTgcYQ", "openreview": "https://openreview.net/forum?id=vBHKSTgcYQ", "poster": "/media/PosterPDFs/NeurIPS%202023/70113.png?t=1702312646.5558267", "slides": "https://nips.cc/virtual/2023/poster/70113", "video": "https://nips.cc/virtual/2023/poster/70113", "author_site": "Shinji Ito, Kei Takemura", "tldr": "", "abstract": "In this paper, we consider how to construct best-of-both-worlds linear bandit algorithms that achieve nearly optimal performance for both stochastic and adversarial environments. For this purpose, we show that a natural approach referred to as exploration by optimization [Lattimore and Szepesv\u00e1ri, 2020] works well. Specifically, an algorithm constructed using this approach achieves $O(d \\sqrt{ T \\log{T}})$-regret in adversarial environments and $O(\\frac{d^2 \\log T}{\\Delta_{\\min}} )$-regret in stochastic environments. Symbols $d$, $T$ and $\\Delta_{\\min}$ here represent the dimensionality of the action set, the time horizon, and the minimum sub-optimality gap, respectively. We also show that this algorithm has even better theoretical guarantees for important special cases including the multi-armed bandit problem and multitask bandits.", "keywords": "bandit;linear bandit;best of both worlds;exploration by optimization", "primary_area": "", "supplementary_material": "", "author": "Shinji Ito;Kei Takemura", "authorids": "~Shinji_Ito1;~Kei_Takemura1", "gender": "M;M", "homepage": "https://researchmap.jp/shinji_ito?lang=en;", "dblp": "49/852;248/9211", "google_scholar": "https://scholar.google.co.jp/citations?user=GX0V06wAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Shinji_Ito1;~Kei_Takemura1", "aff": "NEC;NEC Corporation", "aff_domain": "nec.com;nec.com", "position": "Principal Researcher;Researcher", "bibtex": "@inproceedings{\nito2023an,\ntitle={An Exploration-by-Optimization Approach to Best of Both Worlds in Linear Bandits},\nauthor={Shinji Ito and Kei Takemura},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vBHKSTgcYQ}\n}", "github": "", "project": "", "reviewers": "KqE4;X2KX;mre2;JH8B", "pdf_size": 381719, "rating": "5;6;6;7", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "30;181;23;63", "wc_strengths": "59;148;74;113", "wc_weaknesses": "40;241;84;47", "wc_questions": "5;110;4;4", "wc_limitations": "1;44;4;7", "wc_review": "135;724;189;234", "wc_reply_reviewers": "0;25;15;17", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.25, 63.45618567169004 ], "wc_strengths_avg": [ 98.5, 34.7167106736799 ], "wc_weaknesses_avg": [ 103.0, 81.40945890988344 ], "wc_questions_avg": [ 30.75, 45.75683009125523 ], "wc_limitations_avg": [ 14.0, 17.449928366615147 ], "wc_review_avg": [ 320.5, 235.5827879960673 ], "wc_reply_reviewers_avg": [ 14.25, 9.03811374126261 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17020672301947810854&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nec.com;nec.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "NEC Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.nec.com", "aff_unique_abbr": "NEC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Neural Algorithmic Reasoning Without Intermediate Supervision", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70112", "id": "vBwSACOB3x", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a2370db7c99791ad5d9f3ef48ad6d464-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vBwSACOB3x", "openreview": "https://openreview.net/forum?id=vBwSACOB3x", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70112", "video": "https://nips.cc/virtual/2023/poster/70112", "author_site": "Gleb Rodionov, Liudmila Prokhorenkova", "tldr": "", "abstract": "Neural algorithmic reasoning is an emerging area of machine learning focusing on building models that can imitate the execution of classic algorithms, such as sorting, shortest paths, etc. One of the main challenges is to learn algorithms that are able to generalize to out-of-distribution data, in particular with significantly larger input sizes. Recent work on this problem has demonstrated the advantages of learning algorithms step-by-step, giving models access to all intermediate steps of the original algorithm. In this work, we instead focus on learning neural algorithmic reasoning only from the input-output pairs without appealing to the intermediate supervision. We propose simple but effective architectural improvements and also build a self-supervised objective that can regularise intermediate computations of the model without access to the algorithm trajectory. We demonstrate that our approach is competitive to its trajectory-supervised counterpart on tasks from the CLRS Algorithmic Reasoning Benchmark and achieves new state-of-the-art results for several problems, including sorting, where we obtain significant improvements. Thus, learning without intermediate supervision is a promising direction for further research on neural reasoners.", "keywords": "neural algorithmic reasoning;graph neural networks;self-supervised regularization", "primary_area": "", "supplementary_material": "/attachment/9f42008873c86437e6f09296b452abda69032207.zip", "author": "Gleb Rodionov;Liudmila Prokhorenkova", "authorids": "~Gleb_Rodionov1;~Liudmila_Prokhorenkova1", "gender": ";F", "homepage": ";", "dblp": ";45/11468", "google_scholar": ";https://scholar.google.ru/citations?user=6JyZlSEAAAAJ", "orcid": ";", "linkedin": "rodionovgleb/;", "or_profile": "~Gleb_Rodionov1;~Liudmila_Prokhorenkova1", "aff": "Yandex;Yandex", "aff_domain": "yandex-team.ru;yandex-team.ru", "position": "Researcher;Researcher", "bibtex": "@inproceedings{\nrodionov2023neural,\ntitle={Neural Algorithmic Reasoning Without Intermediate Supervision},\nauthor={Gleb Rodionov and Liudmila Prokhorenkova},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vBwSACOB3x}\n}", "github": "", "project": "", "reviewers": "Fw9t;uiKS;y7mK;P3vy;Sqsb", "pdf_size": 424663, "rating": "4;5;6;7;7", "confidence": "4;3;2;3;4", "soundness": "2;2;3;3;2", "novelty": "1;3;3;3;3", "presentation": "2;2;2;3;4", "wc_summary": "56;119;70;62;148", "wc_strengths": "137;27;104;44;89", "wc_weaknesses": "281;272;198;22;275", "wc_questions": "104;37;46;36;4", "wc_limitations": "4;7;7;29;13", "wc_review": "582;462;425;193;529", "wc_reply_reviewers": "726;337;43;0;34", "wc_reply_authors": "827;816;0;0;0", "reply_reviewers": "2;2;1;0;1", "reply_authors": "3;2;1;1;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 91.0, 36.16628264005025 ], "wc_strengths_avg": [ 80.2, 40.02699089364575 ], "wc_weaknesses_avg": [ 209.6, 98.58722026713198 ], "wc_questions_avg": [ 45.4, 32.57974831087558 ], "wc_limitations_avg": [ 12.0, 8.988882021697693 ], "wc_review_avg": [ 438.2, 134.00507453078038 ], "wc_reply_reviewers_avg": [ 228.0, 277.0306842210805 ], "wc_reply_authors_avg": [ 328.6, 402.4661973383603 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.1833396994056422, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13803462093051130195&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "yandex-team.ru;yandex-team.ru", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Yandex", "aff_unique_dep": "", "aff_unique_url": "https://yandex.com", "aff_unique_abbr": "Yandex", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Russian Federation" }, { "id": "vBx0yNQmik", "title": "Federated Virtual Learning on Heterogeneous Data with Local-global Distillation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Despite Federated Learning (FL)'s trend for learning machine learning models in a distributed manner, it is susceptible to performance drops when training on heterogeneous data. In addition, FL inevitability faces the challenges of synchronization, efficiency and privacy. Recently, dataset distillation has been explored in order to improve the efficiency and scalability of FL by creating a smaller, synthetic dataset that retains the performance of a model trained on the local private datasets. We discover that using distilled local datasets can amplify the heterogeneity issue in FL. To address this, we propose a new method, called Federated Virtual Learning on Heterogeneous Data with Local-Global Distillation (FedLGD) which trains FL using a smaller synthetic dataset (referred as virtual data) created through a combination of local and global dataset distillation. Specifically, to handle synchronization and class imbalance, we propose iterative distribution matching to allow clients to have the same amount of balanced local virtual data; to harmonize the domain shifts, we use federated gradient matching to distill global virtual data that are shared with clients without hindering data privacy to rectify heterogeneous local training via enforcing local-global feature similarity. We experiment on both benchmark and real-world datasets that contain heterogeneous data from different sources, and further scale up to an FL scenario that contains large number of clients with heterogeneous and class imbalance data. Our method outperforms state-of-the-art heterogeneous FL algorithms under various settings with a very limited amount of distilled virtual data.", "keywords": "Federated Learning;Dataset Distillation;Data Heterogeneity", "primary_area": "", "supplementary_material": "/attachment/264d608ddf6404c0c4b024e3d4a178291a5156c7.pdf", "author": "Chun-Yin Huang;Ruinan Jin;Can Zhao;Daguang Xu;Xiaoxiao Li", "authorids": "~Chun-Yin_Huang1;~Ruinan_Jin4;~Can_Zhao1;~Daguang_Xu2;~Xiaoxiao_Li1", "gender": "M;;F;M;Unspecified", "homepage": ";https://nanboy-ronan.github.io/Personal-Web/;https://research.nvidia.com/person/can-zhao;https://daguangxu.net/;https://xxlya.github.io/", "dblp": ";280/2578;35/2787-1;;71/8042", "google_scholar": "moi11dgAAAAJ;;CdzhxtYAAAAJ;r_VHYHAAAAAJ;sdENOQ4AAAAJ", "orcid": ";;0000-0001-7286-3452;;", "linkedin": ";;can-zhao-jhu/;;", "or_profile": "~Chun-Yin_Huang1;~Ruinan_Jin4;~Can_Zhao1;~Daguang_Xu2;~Xiaoxiao_Li1", "aff": "University of British Columbia;University of British Columbia;NVIDIA;NVIDIA;University of British Columbia", "aff_domain": "ubc.ca;ubc.ca;nvidia.com;nvidia.com;ece.ubc.ca", "position": "PhD student;Undergrad student;Applied deep learning research scientist;Research Manager;Assistant Professor", "bibtex": "@misc{\nhuang2023federated,\ntitle={Federated Virtual Learning on Heterogeneous Data with Local-global Distillation},\nauthor={Chun-Yin Huang and Ruinan Jin and Can Zhao and Daguang Xu and Xiaoxiao Li},\nyear={2023},\nurl={https://openreview.net/forum?id=vBx0yNQmik}\n}", "github": "", "project": "", "reviewers": "DZFG;5ytS;TJpJ;zf9v", "site": "https://openreview.net/forum?id=vBx0yNQmik", "pdf_size": 1414928, "rating": "4;5;6;6", "confidence": "4;3;4;3", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "3;3;2;3", "wc_summary": "49;12;131;87", "wc_strengths": "55;27;54;53", "wc_weaknesses": "72;54;200;272", "wc_questions": "41;58;43;41", "wc_limitations": "7;21;39;8", "wc_review": "224;172;467;461", "wc_reply_reviewers": "0;13;21;0", "wc_reply_authors": "122;73;55;73", "reply_reviewers": "0;1;1;0", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.75, 44.200537327050675 ], "wc_strengths_avg": [ 47.25, 11.712706775122479 ], "wc_weaknesses_avg": [ 149.5, 90.3922009910147 ], "wc_questions_avg": [ 45.75, 7.119515432949071 ], "wc_limitations_avg": [ 18.75, 12.93010054098575 ], "wc_review_avg": [ 331.0, 134.2814209040104 ], "wc_reply_reviewers_avg": [ 8.5, 8.958236433584458 ], "wc_reply_authors_avg": [ 80.75, 24.923633362734254 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4740619818693935435&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "University of British Columbia;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.ubc.ca;https://www.nvidia.com", "aff_unique_abbr": "UBC;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "Canada;United States" }, { "title": "On Class Distributions Induced by Nearest Neighbor Graphs for Node Classification of Tabular Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70111", "id": "vEzcRdiTkP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5c1863f711c721648387ac2ef745facb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vEzcRdiTkP", "openreview": "https://openreview.net/forum?id=vEzcRdiTkP", "poster": "/media/PosterPDFs/NeurIPS%202023/70111.png?t=1701420652.7657602", "slides": "https://nips.cc/virtual/2023/poster/70111", "video": "https://nips.cc/virtual/2023/poster/70111", "tldr": "", "abstract": "Researchers have used nearest neighbor graphs to transform classical machine learning problems on tabular data into node classification tasks to solve with graph representation learning methods. Such artificial structures often reflect the homophily assumption, believed to be a key factor in the performances of deep graph networks. In light of recent results demystifying these beliefs, we introduce a theoretical framework to understand the benefits of Nearest Neighbor (NN) graphs when a graph structure is missing. We formally analyze the Cross-Class Neighborhood Similarity (CCNS), used to empirically evaluate the usefulness of structures, in the context of nearest neighbor graphs. Moreover, we study the class separability induced by deep graph networks on a k-NN graph. Motivated by the theory, our quantitative experiments demonstrate that, under full supervision, employing a k-NN graph offers no benefits compared to a structure-agnostic baseline. Qualitative analyses suggest that our framework is good at estimating the CCNS and hint at k-NN graphs never being useful for such classification tasks under full supervision, thus advocating for the study of alternative graph construction techniques in combination with deep graph networks.", "keywords": "Deep Graph Networks;Graph Neural Networks;Graph Representation Learning;Nearest Neighbors;Node Classification;Tabular Data", "primary_area": "", "supplementary_material": "/attachment/2441bc6459f8fa265c9c634efd5f693ce204666a.zip", "author": "Federico Errica", "authorids": "~Federico_Errica1", "gender": "M", "homepage": "http://pages.di.unipi.it/errica/", "dblp": "203/9424", "google_scholar": "https://scholar.google.co.uk/citations?user=VJ0n2gQAAAAJ", "orcid": "0000-0001-5181-2904", "linkedin": "https://it.linkedin.com/in/federicoerrica", "or_profile": "~Federico_Errica1", "aff": "NEC", "aff_domain": "neclab.eu", "position": "Researcher", "bibtex": "@inproceedings{\nerrica2023on,\ntitle={On Class Distributions Induced by Nearest Neighbor Graphs for Node Classification of Tabular Data},\nauthor={Federico Errica},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vEzcRdiTkP}\n}", "github": "", "project": "", "reviewers": "TGZz;MyQ5;CVih;Ysph;Cqgk;VEGV", "pdf_size": 1593866, "rating": "5;5;5;6;6;7", "confidence": "4;5;1;2;3;2", "soundness": "2;2;3;2;2;4", "novelty": "2;2;3;2;3;3", "presentation": "3;2;2;3;3;3", "wc_summary": "33;39;108;82;160;230", "wc_strengths": "26;36;11;55;112;91", "wc_weaknesses": "65;65;61;143;117;42", "wc_questions": "2;242;1;91;37;33", "wc_limitations": "2;7;6;9;1;6", "wc_review": "128;389;187;380;427;402", "wc_reply_reviewers": "11;152;24;31;0;0", "wc_reply_authors": "0;893;44;20;51;0", "reply_reviewers": "1;2;1;1;0;0", "reply_authors": "1;4;2;2;2;1", "rating_avg": [ 5.666666666666667, 0.7453559924999299 ], "confidence_avg": [ 2.8333333333333335, 1.3437096247164249 ], "soundness_avg": [ 2.5, 0.7637626158259734 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 108.66666666666667, 69.09815884345657 ], "wc_strengths_avg": [ 55.166666666666664, 35.78368653761407 ], "wc_weaknesses_avg": [ 82.16666666666667, 35.507823863981805 ], "wc_questions_avg": [ 67.66666666666667, 83.50182965393965 ], "wc_limitations_avg": [ 5.166666666666667, 2.793842435706702 ], "wc_review_avg": [ 318.8333333333333, 116.24602741121475 ], "wc_reply_reviewers_avg": [ 36.333333333333336, 52.9800801140286 ], "wc_reply_authors_avg": [ 168.0, 324.8184108082545 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.6871842709362768 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.3882901373576605, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6870218738974443257&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "neclab.eu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "NEC Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.nec.com", "aff_unique_abbr": "NEC", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "title": "Self-supervised video pretraining yields robust and more human-aligned visual representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70110", "id": "vF8ukt5l1R", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cf57022dff0929796f85ac99d7cefa86-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vF8ukt5l1R", "openreview": "https://openreview.net/forum?id=vF8ukt5l1R", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70110", "video": "https://nips.cc/virtual/2023/poster/70110", "author_site": "Nikhil Parthasarathy, S. M. Ali Eslami, Joao Carreira, Olivier Henaff", "tldr": "", "abstract": "Humans learn powerful representations of objects and scenes by observing how they evolve over time. Yet, outside of specific tasks that require explicit temporal understanding, static image pretraining remains the dominant paradigm for learning visual foundation models. We question this mismatch, and ask whether video pretraining can yield visual representations that bear the hallmarks of human perception: generalisation across tasks, robustness to perturbations, and consistency with human judgements. To that end we propose a novel procedure for curating videos, and develop a contrastive framework which learns from the complex transformations therein. This simple paradigm for distilling knowledge from videos, called VITO, yields general representations that far outperform prior video pretraining methods on image understanding tasks, and image pretraining methods on video understanding tasks. Moreover, VITO representations are significantly more robust to natural and synthetic deformations than image-, video-, and adversarially-trained\nones. Finally, VITO\u2019s predictions are strongly aligned with human judgements, surpassing models that were specifically trained for that purpose. Together, these results suggest that video pretraining could be a simple way of learning unified, robust, and human-aligned representations of the visual world.", "keywords": "self-supervised learning;contrastive;video pretraining;representation learning;visual representation;human alignment;robustness;shape-bias;saliency", "primary_area": "", "supplementary_material": "", "author": "Nikhil Parthasarathy;S. M. Ali Eslami;Joao Carreira;Olivier J Henaff", "authorids": "~Nikhil_Parthasarathy1;~S._M._Ali_Eslami1;~Joao_Carreira1;~Olivier_J_Henaff1", "gender": "M;M;M;", "homepage": ";http://arkitus.com/research;;https://www.olivierhenaff.com/", "dblp": "209/4951;117/4847;61/5621-1;156/0035.html", "google_scholar": "X9mO4ckAAAAJ;skyUvycAAAAJ;https://scholar.google.pt/citations?user=IUZ-7_cAAAAJ;Sx75CVsAAAAJ", "orcid": ";;;0000-0001-8183-9489", "linkedin": "nikparth/;;jo%C3%A3o-carreira-56238a7/;", "or_profile": "~Nikhil_Parthasarathy1;~S._M._Ali_Eslami1;~Joao_Carreira1;~Olivier_J_Henaff1", "aff": "New York University;Google;Google DeepMind;Google DeepMind", "aff_domain": "nyu.edu;google.com;google.com;google.com", "position": "PhD student;Researcher;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nparthasarathy2023selfsupervised,\ntitle={Self-supervised video pretraining yields robust and more human-aligned visual representations},\nauthor={Nikhil Parthasarathy and S. M. Ali Eslami and Joao Carreira and Olivier J Henaff},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vF8ukt5l1R}\n}", "github": "", "project": "", "reviewers": "Pto4;3k9B;jN1F;Xjw1;i1Wo;3hVk", "pdf_size": 4740349, "rating": "5;5;6;6;6;7", "confidence": "4;4;3;4;3;4", "soundness": "3;3;3;3;3;3", "novelty": "3;2;3;2;3;3", "presentation": "3;4;3;3;3;4", "wc_summary": "78;70;113;112;102;149", "wc_strengths": "51;44;99;63;57;149", "wc_weaknesses": "166;292;163;150;91;79", "wc_questions": "169;35;94;42;5;55", "wc_limitations": "13;2;22;4;1;25", "wc_review": "477;443;491;371;256;457", "wc_reply_reviewers": "0;35;170;33;21;54", "wc_reply_authors": "39;328;147;0;328;0", "reply_reviewers": "0;1;2;1;1;1", "reply_authors": "2;2;3;1;2;1", "rating_avg": [ 5.833333333333333, 0.6871842709362768 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 104.0, 25.839246635044656 ], "wc_strengths_avg": [ 77.16666666666667, 36.58058076205036 ], "wc_weaknesses_avg": [ 156.83333333333334, 69.31910911782471 ], "wc_questions_avg": [ 66.66666666666667, 52.86670870111822 ], "wc_limitations_avg": [ 11.166666666666666, 9.581521567869872 ], "wc_review_avg": [ 415.8333333333333, 81.01525919369895 ], "wc_reply_reviewers_avg": [ 52.166666666666664, 55.140477167161166 ], "wc_reply_authors_avg": [ 140.33333333333334, 141.50225753990247 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 1.8333333333333333, 0.6871842709362768 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.1714985851425088, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2410311123489968594&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nyu.edu;google.com;google.com;google.com", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "New York University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.nyu.edu;https://www.google.com", "aff_unique_abbr": "NYU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Generalized Weighted Path Consistency for Mastering Atari Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70109", "id": "vHRLS8HhK1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9d87a0c38431d0ec8d8b8ece95198c04-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vHRLS8HhK1", "openreview": "https://openreview.net/forum?id=vHRLS8HhK1", "poster": "/media/PosterPDFs/NeurIPS%202023/70109.png?t=1698287466.0951543", "slides": "https://nips.cc/virtual/2023/poster/70109", "video": "https://nips.cc/virtual/2023/poster/70109", "author_site": "Dengwei Zhao, Shikui Tu, Lei Xu", "tldr": "", "abstract": "Reinforcement learning with the help of neural-guided search consumes huge computational resources to achieve remarkable performance. Path consistency (PC), i.e., $f$ values on one optimal path should be identical, was previously imposed on MCTS by PCZero to improve the learning efficiency of AlphaZero. Not only PCZero still lacks a theoretical support but also considers merely board games. In this paper, PCZero is generalized into GW-PCZero for real applications with non-zero immediate reward. A weighting mechanism is introduced to reduce the variance caused by scouting's uncertainty on the $f$ value estimation. For the first time, it is theoretically proved that neural-guided MCTS is guaranteed to find the optimal solution under the constraint of PC. Experiments are conducted on the Atari $100$k benchmark with $26$ games and GW-PCZero achieves $198\\%$ mean human performance, higher than the state-of-the-art EfficientZero's $194\\\\%$, while consuming only $25\\\\%$ of the computational resources consumed by EfficientZero.", "keywords": "Monte Carlo Tree Search;Reinforcement learning;Path consistency.", "primary_area": "", "supplementary_material": "/attachment/4cc3b89d211d8c0941a2ed80925fe7639d6914a6.pdf", "author": "Dengwei Zhao;Shikui Tu;Lei Xu", "authorids": "~Dengwei_Zhao1;~Shikui_Tu1;~Lei_Xu7", "gender": "M;M;M", "homepage": "http://cmach.sjtu.edu.cn/;http://www.cs.sjtu.edu.cn/~tushikui;http://www.cse.cuhk.edu.hk/~lxu/", "dblp": "323/9550;04/115;19/360-1", "google_scholar": "Va0JP5EAAAAJ;Yewd61kAAAAJ;rN2ny9kAAAAJ", "orcid": "0000-0003-4764-2759;0000-0001-6270-0449;0000-0002-2752-1573", "linkedin": ";;", "or_profile": "~Dengwei_Zhao1;~Shikui_Tu1;~Lei_Xu7", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhao2023generalized,\ntitle={Generalized Weighted Path Consistency for Mastering Atari Games},\nauthor={Dengwei Zhao and Shikui Tu and Lei Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vHRLS8HhK1}\n}", "github": "", "project": "", "reviewers": "s6tP;YYov;15wJ;Dd1j;hxZc", "pdf_size": 400477, "rating": "4;4;5;7;7", "confidence": "5;4;3;4;3", "soundness": "2;2;3;3;3", "novelty": "1;2;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "178;132;106;119;119", "wc_strengths": "20;33;37;116;206", "wc_weaknesses": "247;216;96;44;140", "wc_questions": "74;137;84;130;428", "wc_limitations": "64;6;56;1;96", "wc_review": "583;524;379;410;989", "wc_reply_reviewers": "793;153;28;0;71", "wc_reply_authors": "1537;0;0;0;131", "reply_reviewers": "3;1;1;0;2", "reply_authors": "4;1;1;1;2", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 130.8, 24.991198450654583 ], "wc_strengths_avg": [ 82.4, 70.42897131152776 ], "wc_weaknesses_avg": [ 148.6, 74.84276852174831 ], "wc_questions_avg": [ 170.6, 131.04441994987806 ], "wc_limitations_avg": [ 44.6, 36.16407056734626 ], "wc_review_avg": [ 577.0, 218.9438284126776 ], "wc_reply_reviewers_avg": [ 209.0, 296.5393734396834 ], "wc_reply_authors_avg": [ 333.6, 603.8352755512053 ], "reply_reviewers_avg": [ 1.4, 1.019803902718557 ], "reply_authors_avg": [ 1.8, 1.1661903789690602 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5122717640554154, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3202763464443991338&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Accelerated Zeroth-order Method for Non-Smooth Stochastic Convex Optimization Problem with Infinite Variance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70108", "id": "vHSQTEIFkp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ca24eb48806df3af49e5ac59d8a46f67-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vHSQTEIFkp", "openreview": "https://openreview.net/forum?id=vHSQTEIFkp", "poster": "/media/PosterPDFs/NeurIPS%202023/70108.png?t=1701892463.51664", "slides": "https://nips.cc/virtual/2023/poster/70108", "video": "https://nips.cc/virtual/2023/poster/70108", "author_site": "Nikita Kornilov, Ohad Shamir, Aleksandr Lobanov, Darina Dvinskikh, Alexander Gasnikov, Innokentiy Shibaev, Eduard Gorbunov, Samuel Horv\u00e1th", "tldr": "", "abstract": "In this paper, we consider non-smooth stochastic convex optimization with two function evaluations per round under infinite noise variance. In the classical setting when noise has finite variance, an optimal algorithm, built upon the batched accelerated gradient method, was proposed in (Gasnikov et. al., 2022). This optimality is defined in terms of iteration and oracle complexity, as well as the maximal admissible level of adversarial noise. However, the assumption of finite variance is burdensome and it might not hold in many practical scenarios. To address this, we demonstrate how to adapt a refined clipped version of the accelerated gradient (Stochastic Similar Triangles) method from (Sadiev et al., 2023) for a two-point zero-order oracle. This adaptation entails extending the batching technique to accommodate infinite variance \u2014 a non-trivial task that stands as a distinct contribution of this paper.", "keywords": "stochastic optimization;gradient-free optimization;zero-order oracle;gradient clipping;infinite variance", "primary_area": "", "supplementary_material": "/attachment/36d6ee51175ea108aeb30efb42ded97e2d87bf6b.pdf", "author": "Nikita Kornilov;Ohad Shamir;Aleksandr Lobanov;Darina Dvinskikh;Alexander Gasnikov;Innokentiy Andreevich Shibaev;Eduard Gorbunov;Samuel Horv\u00e1th", "authorids": "kornilov.nm@phystech.edu;~Ohad_Shamir1;~Aleksandr_Lobanov1;~Darina_Dvinskikh1;~Alexander_Gasnikov1;~Innokentiy_Andreevich_Shibaev1;~Eduard_Gorbunov1;~Samuel_Horv\u00e1th1", "gender": ";;M;F;;;M;M", "homepage": ";http://www.wisdom.weizmann.ac.il/~shamiro/;https://alexander-lobanov.github.io/;;;;https://eduardgorbunov.github.io;https://sites.google.com/view/samuelhorvath", "dblp": ";12/5897;360/8623;217/3565;;282/1296;215/5512.html;234/8604", "google_scholar": ";all0DHsAAAAJ;https://scholar.google.com/citations?hl=ru;5ILnTRsAAAAJ;;;https://scholar.google.ru/citations?user=85j2RqQAAAAJ;k252J7kAAAAJ", "orcid": ";;0000-0003-1620-9581;0000-0003-1757-1021;;0000-0001-6609-3738;;0000-0003-0619-9260", "linkedin": ";;;;;;;samuel-horvath/", "or_profile": "kornilov.nm@phystech.edu;~Ohad_Shamir1;~Aleksandr_Lobanov1;~Darina_Dvinskikh1;~Alexander_Gasnikov1;~Innokentiy_Andreevich_Shibaev1;~Eduard_Gorbunov1;~Samuel_Horv\u00e1th1", "aff": ";Weizmann Institute;Moscow Institute of Physics and Technology;Moscow Institute of Physics and Technology;;Moscow Institute of Physics and Technology;Mohamed bin Zayed University of Artificial Intelligence;MBZUAI", "aff_domain": ";weizmann.ac.il;mipt.ru;phystech.edu;;phystech.edu;mbzuai.ac.ae;mbzuai.ac.ae", "position": ";Associate Professor;Researcher;Researcher;;PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nkornilov2023accelerated,\ntitle={Accelerated Zeroth-order Method for Non-Smooth Stochastic Convex Optimization Problem with Infinite Variance},\nauthor={Nikita Kornilov and Ohad Shamir and Aleksandr Lobanov and Darina Dvinskikh and Alexander Gasnikov and Innokentiy Andreevich Shibaev and Eduard Gorbunov and Samuel Horv{\\'a}th},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vHSQTEIFkp}\n}", "github": "", "project": "", "reviewers": "M3Ug;zmkR;6zia;FKLk;nSeP", "pdf_size": 409178, "rating": "5;6;6;7;7", "confidence": "3;3;3;4;3", "soundness": "3;3;3;4;3", "novelty": "2;2;3;3;3", "presentation": "2;2;1;2;4", "wc_summary": "42;44;372;67;85", "wc_strengths": "42;23;95;62;36", "wc_weaknesses": "255;53;61;103;149", "wc_questions": "72;14;34;12;2", "wc_limitations": "22;1;1;1;1", "wc_review": "433;135;563;245;273", "wc_reply_reviewers": "10;27;39;11;11", "wc_reply_authors": "13;16;24;14;14", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.9797958971132712 ], "wc_summary_avg": [ 122.0, 125.99841268841445 ], "wc_strengths_avg": [ 51.6, 25.08066984751404 ], "wc_weaknesses_avg": [ 124.2, 73.80623279913425 ], "wc_questions_avg": [ 26.8, 24.870866490735704 ], "wc_limitations_avg": [ 5.2, 8.399999999999999 ], "wc_review_avg": [ 329.8, 150.59667991028223 ], "wc_reply_reviewers_avg": [ 19.6, 11.586198686368192 ], "wc_reply_authors_avg": [ 16.2, 4.019950248448357 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5345224838248487, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=739846394592425863&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "email": ";weizmann.ac.il;mipt.ru;phystech.edu;;phystech.edu;mbzuai.ac.ae;mbzuai.ac.ae", "author_num": 8, "aff_unique_index": "0;1;1;1;2;2", "aff_unique_norm": "Weizmann Institute of Science;Moscow Institute of Physics and Technology;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.weizmann.org.il;https://www.mipt.ru/en;https://mbzuai.ac.ae", "aff_unique_abbr": "Weizmann;MIPT;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;2;2", "aff_country_unique": "Israel;Russian Federation;United Arab Emirates" }, { "title": "Accelerated Quasi-Newton Proximal Extragradient: Faster Rate for Smooth Convex Optimization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70107", "id": "vIGNYQ4Alv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/19c9708f31ec44b5b1cbd67f91d05d95-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vIGNYQ4Alv", "openreview": "https://openreview.net/forum?id=vIGNYQ4Alv", "poster": "/media/PosterPDFs/NeurIPS%202023/70107.png?t=1702314659.791004", "slides": "https://nips.cc/virtual/2023/poster/70107", "video": "https://nips.cc/virtual/2023/poster/70107", "author_site": "Ruichen Jiang, Aryan Mokhtari", "tldr": "", "abstract": "In this paper, we propose an accelerated quasi-Newton proximal extragradient method for solving unconstrained smooth convex optimization problems. With access only to the gradients of the objective, we prove that our method can achieve a convergence rate of $\\mathcal{O}\\bigl(\\min\\\\{\\frac{1}{k^2}, \\frac{\\sqrt{d\\log k}}{k^{2.5}}\\\\}\\bigr)$, where $d$ is the problem dimension and $k$ is the number of iterations. In particular, in the regime where $k = \\mathcal{O}(d)$, our method matches the _optimal rate_ of $\\mathcal{O}(\\frac{1}{k^2})$ by Nesterov's accelerated gradient (NAG). Moreover, in the the regime where $k = \\Omega(d \\log d)$, it outperforms NAG and converges at a _faster rate_ of $\\mathcal{O}\\bigl(\\frac{\\sqrt{d\\log k}}{k^{2.5}}\\bigr)$. To the best of our knowledge, this result is the first to demonstrate a provable gain for a quasi-Newton-type method over NAG in the convex setting. To achieve such results, we build our method on a recent variant of the Monteiro-Svaiter acceleration framework and adopt an online learning perspective to update the Hessian approximation matrices, in which we relate the convergence rate of our method to the dynamic regret of a specific online convex optimization problem in the space of matrices.", "keywords": "convex optimization;quasi-Newton methods;Monteiro-Svaiter acceleration;Nesterov's accelerated gradient;online learning", "primary_area": "", "supplementary_material": "/attachment/132c301eef324dd54f79a55d677d5f5944752f7f.zip", "author": "Ruichen Jiang;Aryan Mokhtari", "authorids": "~Ruichen_Jiang1;~Aryan_Mokhtari3", "gender": ";M", "homepage": "https://ruichen-jiang.github.io/;https://sites.utexas.edu/mokhtari/", "dblp": "271/7916;140/7407", "google_scholar": "BGFt1UMAAAAJ;glcep6EAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Ruichen_Jiang1;~Aryan_Mokhtari3", "aff": "University of Texas at Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\njiang2023accelerated,\ntitle={Accelerated Quasi-Newton Proximal Extragradient: Faster Rate for Smooth Convex Optimization},\nauthor={Ruichen Jiang and Aryan Mokhtari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vIGNYQ4Alv}\n}", "github": "", "project": "", "reviewers": "CWLw;H1ti;MQHh;N8Fk", "pdf_size": 1344259, "rating": "5;6;7;8", "confidence": "3;4;4;4", "soundness": "3;3;4;4", "novelty": "3;3;4;4", "presentation": "3;3;3;4", "wc_summary": "85;53;82;78", "wc_strengths": "85;16;39;66", "wc_weaknesses": "69;82;372;198", "wc_questions": "1;30;1;62", "wc_limitations": "1;1;2;46", "wc_review": "241;182;496;450", "wc_reply_reviewers": "5;0;15;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 74.5, 12.658988901172163 ], "wc_strengths_avg": [ 51.5, 26.21545345783666 ], "wc_weaknesses_avg": [ 180.25, 121.56556872733331 ], "wc_questions_avg": [ 23.5, 25.184320518926057 ], "wc_limitations_avg": [ 12.5, 19.345542122153102 ], "wc_review_avg": [ 342.25, 133.3986038157821 ], "wc_reply_reviewers_avg": [ 7.5, 5.5901699437494745 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10056985633218929623&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "utexas.edu;utexas.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Transformer as a hippocampal memory consolidation model based on NMDAR-inspired nonlinearity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70106", "id": "vKpVJxplmB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2f1eb4c897e63870eee9a0a0f7a10332-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vKpVJxplmB", "openreview": "https://openreview.net/forum?id=vKpVJxplmB", "poster": "/media/PosterPDFs/NeurIPS%202023/70106.png?t=1702224262.4731877", "slides": "https://nips.cc/virtual/2023/poster/70106", "video": "https://nips.cc/virtual/2023/poster/70106", "author_site": "Dong Kyum Kim, Jea Kwon, Meeyoung Cha, C. Lee", "tldr": "", "abstract": "The hippocampus plays a critical role in learning, memory, and spatial representation, processes that depend on the NMDA receptor (NMDAR). Inspired by recent findings that compare deep learning models to the hippocampus, we propose a new nonlinear activation function that mimics NMDAR dynamics. NMDAR-like nonlinearity shifts short-term working memory into long-term reference memory in transformers, thus enhancing a process that is similar to memory consolidation in the mammalian brain. We design a navigation task assessing these two memory functions and show that manipulating the activation function (i.e., mimicking the Mg$^{2+}$-gating of NMDAR) disrupts long-term memory processes. Our experiments suggest that place cell-like functions and reference memory reside in the feed-forward network layer of transformers and that nonlinearity drives these processes. We discuss the role of NMDAR-like nonlinearity in establishing this striking resemblance between transformer architecture and hippocampal spatial representation.", "keywords": "Transformer;NMDA;long-term memory;reference memory;memory consolidation", "primary_area": "", "supplementary_material": "/attachment/0d5538950357ca17553b3c610b3d8f3f40e4c1a2.zip", "author": "Dong-Kyum Kim;Jea Kwon;Meeyoung Cha;C. Justin Lee", "authorids": "~Dong-Kyum_Kim1;~Jea_Kwon1;~Meeyoung_Cha2;~C._Justin_Lee1", "gender": "M;F;M;M", "homepage": "https://jeakwon.github.io;https://www.mpi-sp.org/cha;https://www.ibs.re.kr/glia/;https://kdkyum.github.io", "dblp": ";57/4924;;260/0772", "google_scholar": "6I0mg_EAAAAJ;iFlnVCoAAAAJ;v3aGpogAAAAJ;-pvD9xUAAAAJ", "orcid": "0000-0003-4318-4383;0000-0003-4085-9648;0000-0002-3555-0980;0000-0001-6630-2181", "linkedin": ";meeyoungcha/;;kdkyum", "or_profile": "~Jea_Kwon1;~Meeyoung_Cha2;~C._Justin_Lee1;~Dongkyum_Kim1", "aff": "Institute for Basic Science;Korea Advanced Institute of Science & Technology;Insitute for Basic Science;Institute for Basic Science", "aff_domain": "ibs.re.kr;kaist.ac.kr;ibs.re.kr;ibs.re.kr", "position": "Postdoc;Full Professor;Principal Researcher;Postdoc", "bibtex": "@inproceedings{\nkim2023transformer,\ntitle={Transformer as a hippocampal memory consolidation model based on {NMDAR}-inspired nonlinearity},\nauthor={Dong-Kyum Kim and Jea Kwon and Meeyoung Cha and C. Justin Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vKpVJxplmB}\n}", "github": "", "project": "", "reviewers": "r1mY;TzRp;xzxf;FYwU", "pdf_size": 9758272, "rating": "3;5;6;7", "confidence": "4;2;4;4", "soundness": "1;3;3;3", "novelty": "3;2;2;3", "presentation": "3;3;3;3", "wc_summary": "46;80;71;82", "wc_strengths": "45;58;54;21", "wc_weaknesses": "117;130;51;149", "wc_questions": "74;61;186;22", "wc_limitations": "4;18;1;28", "wc_review": "286;347;363;302", "wc_reply_reviewers": "56;36;36;29", "wc_reply_authors": "1021;68;70;61", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.75, 14.324367350776788 ], "wc_strengths_avg": [ 44.5, 14.361406616345072 ], "wc_weaknesses_avg": [ 111.75, 36.873940662749895 ], "wc_questions_avg": [ 85.75, 60.9605405159764 ], "wc_limitations_avg": [ 12.75, 10.894379284750462 ], "wc_review_avg": [ 324.5, 31.531730050855124 ], "wc_reply_reviewers_avg": [ 39.25, 10.084022015049353 ], "wc_reply_authors_avg": [ 305.0, 413.39629896746777 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.09759000729485331, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13710169137515082225&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ibs.re.kr;kaist.ac.kr;ibs.re.kr;ibs.re.kr", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Institute for Basic Science;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ibs.re.kr;https://www.kaist.ac.kr", "aff_unique_abbr": "IBS;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Exploiting Correlated Auxiliary Feedback in Parameterized Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70105", "id": "vM5VnNQ4n7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0e0157ce5ea15831072be4744cbd5334-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vM5VnNQ4n7", "openreview": "https://openreview.net/forum?id=vM5VnNQ4n7", "poster": "/media/PosterPDFs/NeurIPS%202023/70105.png?t=1701875277.7608385", "slides": "https://nips.cc/virtual/2023/poster/70105", "video": "https://nips.cc/virtual/2023/poster/70105", "author_site": "Arun Verma, Zhongxiang Dai, Zhongxiang Dai, YAO SHU, Bryan Kian Hsiang Low", "tldr": "", "abstract": "We study a novel variant of the parameterized bandits problem in which the learner can observe additional auxiliary feedback that is correlated with the observed reward. The auxiliary feedback is readily available in many real-life applications, e.g., an online platform that wants to recommend the best-rated services to its users can observe the user's rating of service (rewards) and collect additional information like service delivery time (auxiliary feedback). In this paper, we first develop a method that exploits auxiliary feedback to build a reward estimator with tight confidence bounds, leading to a smaller regret. We then characterize the regret reduction in terms of the correlation coefficient between reward and its auxiliary feedback. Experimental results in different settings also verify the performance gain achieved by our proposed method.", "keywords": "Parameterized Bandits;Auxiliary Feedback;Control Variate;Regret Minimization", "primary_area": "", "supplementary_material": "/attachment/845c4560c5a98f24804ed11ec84199f53f9928a3.zip", "author": "Arun Verma;Zhongxiang Dai;Yao Shu;Bryan Kian Hsiang Low", "authorids": "~Arun_Verma1;~Zhongxiang_Dai1;~Yao_Shu1;~Bryan_Kian_Hsiang_Low1", "gender": "M;M;M;M", "homepage": "https://arunv3rma.github.io/;https://daizhongxiang.github.io/;https://yao.notion.site;http://www.comp.nus.edu.sg/~lowkh", "dblp": "28/3688;172/4968;44/1338;97/4877", "google_scholar": "https://scholar.google.co.in/citations?user=tBcixlUAAAAJ;1v8xOIYAAAAJ;https://scholar.google.com.au/citations?hl=en;https://scholar.google.com.tw/citations?user=2P-Q09UAAAAJ", "orcid": ";;;", "linkedin": ";;yao-shu-a5640514b;", "or_profile": "~Arun_Verma1;~Zhongxiang_Dai1;~Yao_Shu1;~Bryan_Kian_Hsiang_Low1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;nus.edu.sg;nus.edu.sg;nus.edu.sg", "position": "Postdoc;Postdoc;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nverma2023exploiting,\ntitle={Exploiting Correlated Auxiliary Feedback in Parameterized Bandits},\nauthor={Arun Verma and Zhongxiang Dai and Yao Shu and Bryan Kian Hsiang Low},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vM5VnNQ4n7}\n}", "github": "", "project": "", "reviewers": "jBfm;HUuF;CBCn;vUBt", "pdf_size": 5980118, "rating": "4;5;6;6", "confidence": "4;2;3;3", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "62;85;94;49", "wc_strengths": "60;70;198;65", "wc_weaknesses": "443;265;244;144", "wc_questions": "139;2;39;3", "wc_limitations": "14;2;1;1", "wc_review": "718;424;576;262", "wc_reply_reviewers": "416;192;18;22", "wc_reply_authors": "1050;634;40;36", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.5, 17.89553016817328 ], "wc_strengths_avg": [ 98.25, 57.69911177825877 ], "wc_weaknesses_avg": [ 274.0, 107.75203014328778 ], "wc_questions_avg": [ 45.75, 55.86311394829329 ], "wc_limitations_avg": [ 4.5, 5.5 ], "wc_review_avg": [ 495.0, 170.01470524634038 ], "wc_reply_reviewers_avg": [ 162.0, 162.5976629598347 ], "wc_reply_authors_avg": [ 440.0, 428.06307946376313 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pjdxf1rw3aYJ:scholar.google.com/&scioq=Exploiting+Correlated+Auxiliary+Feedback+in+Parameterized+Bandits&hl=en&as_sdt=0,33", "gs_version_total": 11, "email": "nus.edu.sg;nus.edu.sg;nus.edu.sg;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "vN9OBsZtYH", "title": "Fairness under Noise Perturbation: from the Perspective of Distribution Shift", "track": "main", "status": "Reject", "tldr": "", "abstract": "Much work on fairness assumes access to clean data during training. In practice, however, due to privacy or legal concern, the collected data can be inaccurate or intentionally perturbed by agents. Under such scenarios, fairness measures on noisy data become a biased estimation of ground-truth discrimination, leading to unfairness for a seemingly fair model during deployment. Current work on noise-tolerant fairness assumes a group-wise universal flip, which can become trivial during training, and requires extra tools for noise rate estimation. In light of existing limitations, in this work, we consider such problem from a novel perspective of distribution shift, where we consider a normalizing flow framework for noise-tolerant fairness without requiring noise rate estimation, which is applicable to both \\emph{sensitive attribute noise} and \\emph{label noise}. We formulate the noise perturbation as both group- and label-dependent, and we discuss theoretically the connections between fairness measures under noisy and clean data. We prove theoretically the transferability of fairness from noisy to clean data under both types of noise. Experimental results on three datasets show that our method outperforms state-of-the-art alternatives, with better or comparable improvements in group fairness and with relatively small decrease in accuracy under single exposure and the simultaneous presence of two types of noise.", "keywords": "noise-tolerant fairness;distribution shift;fair representation learning", "primary_area": "", "supplementary_material": "/attachment/fa5ce1f86459ae11d12709a999eed1506339576d.zip", "author": "Junyi Chai;Xiaoqian Wang", "authorids": "~Junyi_Chai1;~Xiaoqian_Wang1", "gender": "M;F", "homepage": ";https://engineering.purdue.edu/~joywang/", "dblp": "323/9078;151/3215-1", "google_scholar": "fucMzpYAAAAJ;I3tc214AAAAJ", "orcid": "0000-0002-4324-5361;", "linkedin": "junyi-chai-260869256/?trk=opento_sprofile_details;", "or_profile": "~Junyi_Chai1;~Xiaoqian_Wang1", "aff": "Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nchai2023fairness,\ntitle={Fairness under Noise Perturbation: from the Perspective of Distribution Shift},\nauthor={Junyi Chai and Xiaoqian Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=vN9OBsZtYH}\n}", "github": "", "project": "", "reviewers": "5tmy;fq6s;vtBi;Dpoc;Vz2U", "site": "https://openreview.net/forum?id=vN9OBsZtYH", "pdf_size": 1070726, "rating": "4;5;5;6;6", "confidence": "4;4;1;4;3", "soundness": "3;2;3;4;3", "novelty": "3;2;3;3;3", "presentation": "2;2;2;4;2", "wc_summary": "104;110;69;55;123", "wc_strengths": "257;61;32;64;78", "wc_weaknesses": "806;189;21;79;51", "wc_questions": "166;9;199;23;32", "wc_limitations": "49;31;5;14;19", "wc_review": "1382;400;326;235;303", "wc_reply_reviewers": "59;61;39;0;29", "wc_reply_authors": "172;116;30;0;39", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 92.2, 25.79457307264456 ], "wc_strengths_avg": [ 98.4, 80.69845103841833 ], "wc_weaknesses_avg": [ 229.2, 293.93087622772805 ], "wc_questions_avg": [ 85.8, 79.9784971101608 ], "wc_limitations_avg": [ 23.6, 15.226293048539427 ], "wc_review_avg": [ 529.2, 429.64422491172854 ], "wc_reply_reviewers_avg": [ 37.6, 22.339203208709126 ], "wc_reply_authors_avg": [ 71.4, 63.2 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.045834924851410594, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1ecVC39xeU8J:scholar.google.com/&scioq=Fairness+under+Noise+Perturbation:+from+the+Perspective+of+Distribution+Shift&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Suggesting Variable Order for Cylindrical Algebraic Decomposition via Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70104", "id": "vNsdFwjPtL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/efcb5b06ce8bb672ffa26b9dc5cdd0f9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vNsdFwjPtL", "openreview": "https://openreview.net/forum?id=vNsdFwjPtL", "poster": "/media/PosterPDFs/NeurIPS%202023/70104.png?t=1699886201.0710196", "slides": "https://nips.cc/virtual/2023/poster/70104", "video": "https://nips.cc/virtual/2023/poster/70104", "author_site": "Fuqi Jia, Yuhang Dong, Minghao Liu, Pei Huang, Feifei Ma, Jian Zhang", "tldr": "", "abstract": "Cylindrical Algebraic Decomposition (CAD) is one of the pillar algorithms of symbolic computation, and its worst-case complexity is double exponential to the number of variables. Researchers found that variable order dramatically affects efficiency and proposed various heuristics. \nThe existing learning-based methods are all supervised learning methods that cannot cope with diverse polynomial sets.\nThis paper proposes two Reinforcement Learning (RL) approaches combined with Graph Neural Networks (GNN) for Suggesting Variable Order (SVO). One is GRL-SVO(UP), a branching heuristic integrated with CAD. The other is GRL-SVO(NUP), a fast heuristic providing a total order directly. We generate a random dataset and collect a real-world dataset from SMT-LIB. The experiments show that our approaches outperform state-of-the-art learning-based heuristics and are competitive with the best expert-based heuristics. Interestingly, our models show a strong generalization ability, working well on various datasets even if they are only trained on a 3-var random dataset. The source code and data are available at https://github.com/dongyuhang22/GRL-SVO.", "keywords": "Reinforcement Learning;Graph Neural Network;Cylindrical Algebraic Decomposition.", "primary_area": "", "supplementary_material": "", "author": "Fuqi Jia;Yuhang Dong;Minghao Liu;Pei Huang;Feifei Ma;Jian Zhang", "authorids": "~Fuqi_Jia1;~Yuhang_Dong3;~Minghao_Liu1;~Pei_Huang1;~Feifei_Ma1;~Jian_Zhang27", "gender": "M;;M;M;F;M", "homepage": "https://fuqi-jia.github.io/;;https://minghao-liu.github.io/;;http://lcs.ios.ac.cn/~maff/;http://lcs.ios.ac.cn/~zj/", "dblp": "305/0579;;119/3234-1.html;59/1856-2;59/556;07/314-1.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;qR3mRHIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;", "orcid": "0000-0001-9947-2187;0000-0002-8711-2755;;;;0000-0001-8523-3505", "linkedin": "fuqi-jia-06a3a3304/;;;;;", "or_profile": "~Fuqi_Jia1;~Yuhang_Dong3;~Minghao_Liu1;~Pei_Huang1;~Feifei_Ma1;~Jian_Zhang27", "aff": "Chinese Academy of Sciences, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Software, Chinese Academy of Sciences;Stanford University;Chinese Academy of Sciences, Chinese Academy of Sciences;Institute of Software, Chinese Academy of Sciences", "aff_domain": "ios.ac.cn;ucas.ac.cn;ios.ac.cn;cs.stanford.edu;ios.ac.cn;ios.ac.cn", "position": "PhD student;MS student;PhD student;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\njia2023suggesting,\ntitle={Suggesting Variable Order for Cylindrical Algebraic Decomposition via Reinforcement Learning},\nauthor={Fuqi Jia and Yuhang Dong and Minghao Liu and Pei Huang and Feifei Ma and Jian Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vNsdFwjPtL}\n}", "github": "", "project": "", "reviewers": "gZi1;Ydm9;zrjH;HXCT", "pdf_size": 888939, "rating": "5;6;7;8", "confidence": "3;3;3;3", "soundness": "3;3;2;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "148;147;166;131", "wc_strengths": "37;36;107;122", "wc_weaknesses": "156;34;59;331", "wc_questions": "77;22;58;2", "wc_limitations": "12;1;11;2", "wc_review": "430;240;401;588", "wc_reply_reviewers": "26;24;40;53", "wc_reply_authors": "694;23;21;31", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 148.0, 12.389511693363866 ], "wc_strengths_avg": [ 75.5, 39.3605132080363 ], "wc_weaknesses_avg": [ 145.0, 116.6554756537386 ], "wc_questions_avg": [ 39.75, 29.414069762615306 ], "wc_limitations_avg": [ 6.5, 5.024937810560445 ], "wc_review_avg": [ 414.75, 123.46532914142334 ], "wc_reply_reviewers_avg": [ 35.75, 11.712706775122479 ], "wc_reply_authors_avg": [ 192.25, 289.7096606949792 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11446990977972163465&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 5, "email": "ios.ac.cn;ucas.ac.cn;ios.ac.cn;cs.stanford.edu;ios.ac.cn;ios.ac.cn", "author_num": 6, "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.cas.cn;http://www.ucas.ac.cn;https://www.stanford.edu", "aff_unique_abbr": "CAS;UCAS;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Imitation Learning from Imperfection: Theoretical Justifications and Algorithms", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70103", "id": "vO04AzsB49", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3af25aa3de8b7b02ddbd1b6be5031be8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vO04AzsB49", "openreview": "https://openreview.net/forum?id=vO04AzsB49", "poster": "/media/PosterPDFs/NeurIPS%202023/70103.png?t=1697079884.7467904", "slides": "https://nips.cc/virtual/2023/poster/70103", "video": "https://nips.cc/virtual/2023/poster/70103", "author_site": "Ziniu Li, Tian Xu, Zeyu Qin, Yang Yu, Zhi-Quan Luo", "tldr": "", "abstract": "Imitation learning (IL) algorithms excel in acquiring high-quality policies from expert data for sequential decision-making tasks. But, their effectiveness is hampered when faced with limited expert data. To tackle this challenge, a novel framework called (offline) IL with supplementary data has been proposed, which enhances learning by incorporating an additional yet imperfect dataset obtained inexpensively from sub-optimal policies. Nonetheless, learning becomes challenging due to the potential inclusion of out-of-expert-distribution samples. In this work, we propose a mathematical formalization of this framework, uncovering its limitations. Our theoretical analysis reveals that a naive approach\u2014applying the behavioral cloning (BC) algorithm concept to the combined set of expert and supplementary data\u2014may fall short of vanilla BC, which solely relies on expert data. This deficiency arises due to the distribution shift between the two data sources. To address this issue, we propose a new importance-sampling-based technique for selecting data within the expert distribution. We prove that the proposed method eliminates the gap of the naive approach, highlighting its efficacy when handling imperfect data. Empirical studies demonstrate that our method outperforms previous state-of-the-art methods in tasks including robotic locomotion control, Atari video games, and image classification. Overall, our work underscores the potential of improving IL by leveraging diverse data sources through effective data selection.", "keywords": "imitation learning;distribution shift;policy optimization;data selection", "primary_area": "", "supplementary_material": "/attachment/e21515ff4231e431ef5098ddf7d8299ea75515f8.pdf", "author": "Ziniu Li;Tian Xu;Zeyu Qin;Yang Yu;Zhi-Quan Luo", "authorids": "~Ziniu_Li1;~Tian_Xu2;~Zeyu_Qin1;~Yang_Yu5;~Zhi-Quan_Luo1", "gender": "M;M;M;M;M", "homepage": "http://www.liziniu.org/;http://www.lamda.nju.edu.cn/xut/;https://alan-qin.github.io/;;http://www.lamda.nju.edu.cn/yuy", "dblp": "254/0986;07/2985-3;271/5778;;46/2181-1", "google_scholar": "80UnKQQAAAAJ;e5mnk1wAAAAJ;3LXI4-MAAAAJ;dW3gcXoAAAAJ;PG2lDSwAAAAJ", "orcid": ";;0000-0003-1733-7892;;", "linkedin": ";;zeyu-qin-546398179/;;", "or_profile": "~Ziniu_Li1;~Tian_Xu2;~Zeyu_Qin1;~Zhi-Quan_Luo1;~Yang_Yu2", "aff": "The Chinese University of Hong Kong, Shenzhen;Nanjing University;Hong Kong University of Science and Technology;The Chinese University of Hong Kong, Shenzhen;Nanjing University", "aff_domain": "cuhk.edu.cn;nju.edu.cn;ust.hk;cuhk.edu.cn;nju.edu.cn", "position": "PhD student;PhD student;PhD student;Full Professor;Professor", "bibtex": "@inproceedings{\nli2023imitation,\ntitle={Imitation Learning from Imperfection: Theoretical Justifications and Algorithms},\nauthor={Ziniu Li and Tian Xu and Zeyu Qin and Yang Yu and Zhi-Quan Luo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vO04AzsB49}\n}", "github": "", "project": "", "reviewers": "FJAh;Yb9D;Y1mx;LjZi", "pdf_size": 1911093, "rating": "6;6;7;7", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "3;2;2;3", "presentation": "3;3;3;3", "wc_summary": "110;160;127;141", "wc_strengths": "213;124;75;67", "wc_weaknesses": "439;59;167;57", "wc_questions": "202;43;336;27", "wc_limitations": "123;2;61;18", "wc_review": "1087;388;766;310", "wc_reply_reviewers": "243;55;115;0", "wc_reply_authors": "163;43;46;0", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 134.5, 18.364367672206956 ], "wc_strengths_avg": [ 119.75, 58.09206055908157 ], "wc_weaknesses_avg": [ 180.5, 155.73936560805686 ], "wc_questions_avg": [ 152.0, 126.35465958958538 ], "wc_limitations_avg": [ 51.0, 46.834816109385976 ], "wc_review_avg": [ 637.75, 311.4758216940763 ], "wc_reply_reviewers_avg": [ 103.25, 90.35589355432218 ], "wc_reply_authors_avg": [ 63.0, 60.53511377704679 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4820297528087677376&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cuhk.edu.cn;nju.edu.cn;ust.hk;cuhk.edu.cn;nju.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "Chinese University of Hong Kong;Nanjing University;Hong Kong University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.nju.edu.cn;https://www.ust.hk", "aff_unique_abbr": "CUHK;Nanjing U;HKUST", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Shenzhen;;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Data Pruning via Moving-one-Sample-out", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70102", "id": "vO6ZdPWaHc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3abe23bf7e295b44369c24465d68987a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vO6ZdPWaHc", "openreview": "https://openreview.net/forum?id=vO6ZdPWaHc", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70102", "video": "https://nips.cc/virtual/2023/poster/70102", "author_site": "Haoru Tan, Sitong Wu, Fei Du, Yukang Chen, Zhibin Wang, Fan Wang, Xiaojuan Qi", "tldr": "", "abstract": "In this paper, we propose a novel data-pruning approach called moving-one-sample-out (MoSo), which aims to identify and remove the least informative samples from the training set. The core insight behind MoSo is to determine the importance of each sample by assessing its impact on the optimal empirical risk. This is achieved by measuring the extent to which the empirical risk changes when a particular sample is excluded from the training set. Instead of using the computationally expensive leaving-one-out-retraining procedure, we propose an efficient first-order approximator that only requires gradient information from different training stages. The key idea behind our approximation is that samples with gradients that are consistently aligned with the average gradient of the training set are more informative and should receive higher scores, which could be intuitively understood as follows: if the gradient from a specific sample is consistent with the average gradient vector, it implies that optimizing the network using the sample will yield a similar effect on all remaining samples. \nExperimental results demonstrate that MoSo effectively mitigates severe performance degradation at high pruning ratios and achieves satisfactory performance across various settings. Experimental results demonstrate that MoSo effectively mitigates severe performance degradation at high pruning ratios and outperforms state-of-the-art methods by a large margin across various settings.", "keywords": "Data Valuation;Deep Learning;Data Pruning;Coreset Selection.", "primary_area": "", "supplementary_material": "/attachment/7151250ead6a9bcd45c38e9bd907c1b52e4bd65c.pdf", "author": "Haoru Tan;Sitong Wu;Fei Du;Yukang Chen;Zhibin Wang;Fan Wang;XIAOJUAN QI", "authorids": "~Haoru_Tan1;~Sitong_Wu1;~Fei_Du1;~Yukang_Chen1;~Zhibin_Wang1;~Fan_Wang6;~XIAOJUAN_QI2", "gender": "M;F;M;M;M;F;F", "homepage": "https://www.dvlab.ai/;https://stonewst.github.io/;;https://yukangchen.com/;;;https://xjqi.github.io/", "dblp": ";226/3944;;225/4601;;;176/1445-1.html", "google_scholar": ";https://scholar.google.com.hk/citations?user=0ao4z_MAAAAJ;0iHvDYwAAAAJ;6p0ygKUAAAAJ;YHzKee8AAAAJ;WCRGTHsAAAAJ;bGn0uacAAAAJ", "orcid": ";;;;0000-0001-7618-7973;0000-0001-7320-1119;", "linkedin": ";;;;;;", "or_profile": "~Haoru_Tan1;~Sitong_Wu1;~Fei_Du1;~Yukang_Chen1;~Zhibin_Wang1;~Fan_Wang6;~XIAOJUAN_QI2", "aff": "University of Hong Kong;The Chinese University of Hong Kong;Alibaba Group;The Chinese University of Hong Kong;Alibaba Group;Alibaba Group;University of Hong Kong", "aff_domain": "hku.hk;cse.cuhk.edu.hk;alibaba-inc.com;cuhk.edu.hk;alibaba-inc.com;alibaba-inc.com;hku.hk", "position": "PhD student;PhD student;Researcher;PhD student;Researcher;Senior Staff Algorithm Engineer;Assistant Professor", "bibtex": "@inproceedings{\ntan2023data,\ntitle={Data Pruning via Moving-one-Sample-out},\nauthor={Haoru Tan and Sitong Wu and Fei Du and Yukang Chen and Zhibin Wang and Fan Wang and XIAOJUAN QI},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vO6ZdPWaHc}\n}", "github": "", "project": "", "reviewers": "XiFz;xfpA;DWe5;QDiJ;eYrR", "pdf_size": 576398, "rating": "3;5;6;6;7", "confidence": "3;3;4;4;3", "soundness": "1;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "53;61;75;91;61", "wc_strengths": "18;31;86;54;48", "wc_weaknesses": "138;27;380;215;84", "wc_questions": "6;114;739;231;47", "wc_limitations": "10;8;18;8;10", "wc_review": "225;241;1298;599;250", "wc_reply_reviewers": "127;24;40;42;17", "wc_reply_authors": "529;44;57;103;48", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;2;2;2;2", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.8000000000000002 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 68.2, 13.422369388450015 ], "wc_strengths_avg": [ 47.4, 23.09632005320328 ], "wc_weaknesses_avg": [ 168.8, 122.4637089100277 ], "wc_questions_avg": [ 227.4, 266.89668413077 ], "wc_limitations_avg": [ 10.8, 3.7094473981982814 ], "wc_review_avg": [ 522.6, 412.13036772361244 ], "wc_reply_reviewers_avg": [ 50.0, 39.64341054954782 ], "wc_reply_authors_avg": [ 156.2, 187.5882725545496 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3611575592573077, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15300485287928408604&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "hku.hk;cse.cuhk.edu.hk;alibaba-inc.com;cuhk.edu.hk;alibaba-inc.com;alibaba-inc.com;hku.hk", "author_num": 7, "aff_unique_index": "0;1;2;1;2;2;0", "aff_unique_norm": "University of Hong Kong;Chinese University of Hong Kong;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.hku.hk;https://www.cuhk.edu.hk;https://www.alibaba.com", "aff_unique_abbr": "HKU;CUHK;Alibaba", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "vOBfqhmOoM", "title": "Physics-informed generative model for drug-like molecule conformers", "track": "main", "status": "Reject", "tldr": "", "abstract": "Diffusion-based methods have been successfully applied to molecule conformer generation using implicit physical modeling. In contrast, conventional, rules-based approaches employ an explicit physical model such as a classical force field parameterization. In order to combine the advantages of both approaches, we present a diffusion-based, physics-informed denoising model (PIDM) for conformer generation that is constructed from molecule subgraph patterns borrowed from classical force fields. The result is a model that is resistant to overfitting and explainable. Using recent advances in denoising score matching, we naturally separate the task of training and generation while providing a smooth transition between deterministic and stochastic generative schemes that adapt to any number of denoising steps. We demonstrate conformer generation quality that outperforms the current state-of-the-art while employing a fraction of parameters.", "keywords": "deep learning;graphical models;Probabilistic Models and Methods;Bioinformatics and Systems Biology", "primary_area": "", "supplementary_material": "/attachment/718456735bff6dbf0eedd7c89173bf2ae3576539.gz", "author": "David C Williams", "authorids": "~David_C_Williams2", "gender": "M", "homepage": "https://nobiastx.com", "dblp": "28/977.html", "google_scholar": "xrNLWWcAAAAJ", "orcid": "0000-0002-4123-9339", "linkedin": "david-williams-73104a23/", "or_profile": "~David_C_Williams2", "aff": "Nobias Therapeutics", "aff_domain": "nobiastx.com", "position": "Principal Researcher", "bibtex": "@misc{\nwilliams2023physicsinformed,\ntitle={Physics-informed generative model for drug-like molecule conformers},\nauthor={David C Williams},\nyear={2023},\nurl={https://openreview.net/forum?id=vOBfqhmOoM}\n}", "github": "", "project": "", "reviewers": "J8rt;htQL;7Zu8;4243", "site": "https://openreview.net/forum?id=vOBfqhmOoM", "pdf_size": 1902436, "rating": "3;3;3;4", "confidence": "4;3;5;4", "soundness": "2;2;3;2", "novelty": "2;2;2;2", "presentation": "2;1;1;2", "wc_summary": "63;18;62;60", "wc_strengths": "66;47;29;42", "wc_weaknesses": "328;341;795;77", "wc_questions": "3;71;18;143", "wc_limitations": "1;1;2;7", "wc_review": "461;478;906;329", "wc_reply_reviewers": "438;69;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 3.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 1.5, 0.5 ], "wc_summary_avg": [ 50.75, 18.93904696651867 ], "wc_strengths_avg": [ 46.0, 13.285330255586423 ], "wc_weaknesses_avg": [ 385.25, 258.9154060692411 ], "wc_questions_avg": [ 58.75, 54.81046889053222 ], "wc_limitations_avg": [ 2.75, 2.48746859276655 ], "wc_review_avg": [ 543.5, 217.09041895026138 ], "wc_reply_reviewers_avg": [ 126.75, 181.8947154262597 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5454610079604301485&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0", "aff_unique_norm": "Nobias Therapeutics", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "" }, { "title": "Going Beyond Linear Mode Connectivity: The Layerwise Linear Feature Connectivity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70101", "id": "vORUHrVEnH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bf3ee5a5422b0e2a88b0c9c6ed3b6144-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vORUHrVEnH", "openreview": "https://openreview.net/forum?id=vORUHrVEnH", "poster": "/media/PosterPDFs/NeurIPS%202023/70101.png?t=1699947768.7272475", "slides": "https://nips.cc/virtual/2023/poster/70101", "video": "https://nips.cc/virtual/2023/poster/70101", "author_site": "Zhanpeng Zhou, Yongyi Yang, Xiaojiang Yang, Junchi Yan, Wei Hu", "tldr": "", "abstract": "Recent work has revealed many intriguing empirical phenomena in neural network training, despite the poorly understood and highly complex loss landscapes and training dynamics. One of these phenomena, Linear Mode Connectivity (LMC), has gained considerable attention due to the intriguing observation that different solutions can be connected by a linear path in the parameter space while maintaining near-constant training and test losses. In this work, we introduce a stronger notion of linear connectivity, Layerwise Linear Feature Connectivity (LLFC), which says that the feature maps of every layer in different trained networks are also linearly connected. We provide comprehensive empirical evidence for LLFC across a wide range of settings, demonstrating that whenever two trained networks satisfy LMC (via either spawning or permutation methods), they also satisfy LLFC in nearly all the layers. Furthermore, we delve deeper into the underlying factors contributing to LLFC, which reveal new insights into the permutation approaches. The study of LLFC transcends and advances our understanding of LMC by adopting a feature-learning perspective.", "keywords": "Linear Mode Connectivity;Permutation Invariance;Optimization Landscape;Science of Deep Learning", "primary_area": "", "supplementary_material": "/attachment/548e400aeae1f154634b9b0788d314bd4a29fd5c.zip", "author": "Zhanpeng Zhou;Yongyi Yang;Xiaojiang Yang;Junchi Yan;Wei Hu", "authorids": "~Zhanpeng_Zhou1;~Yongyi_Yang1;~Xiaojiang_Yang1;~Junchi_Yan2;~Wei_Hu1", "gender": "M;M;M;M;M", "homepage": "https://zzp1012.github.io/;https://thinklab.sjtu.edu.cn/;https://weihu.me;http://thinklab.sjtu.edu.cn/;https://fftyyy.github.io", "dblp": ";;;60/7949.html;05/3653", "google_scholar": "idxXY3UAAAAJ;;ZybgAqkAAAAJ;ga230VoAAAAJ;EmL0jD0AAAAJ", "orcid": ";;;0000-0001-9639-7679;", "linkedin": ";;;;yongyi-yang-528922218/?originalSubdomain=cn", "or_profile": "~Zhanpeng_Zhou1;~Xiaojiang_Yang1;~Wei_Hu1;~Junchi_Yan1;~Yang_Yongyi1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Google;Shanghai Jiaotong University;University of Michigan - Ann Arbor", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;google.com;sjtu.edu.cn;umich.edu", "position": "PhD student;PhD student;Visiting researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\nzhou2023going,\ntitle={Going Beyond Linear Mode Connectivity: The Layerwise Linear Feature Connectivity},\nauthor={Zhanpeng Zhou and Yongyi Yang and Xiaojiang Yang and Junchi Yan and Wei Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vORUHrVEnH}\n}", "github": "", "project": "", "reviewers": "4GwW;ZZ9i;Vyah;6KFq;Dt29", "pdf_size": 2950258, "rating": "5;6;7;7;8", "confidence": "4;4;3;3;3", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "4;3;4;3;4", "wc_summary": "72;158;139;109;77", "wc_strengths": "65;132;52;57;149", "wc_weaknesses": "224;226;67;108;52", "wc_questions": "140;138;96;152;19", "wc_limitations": "99;16;10;11;32", "wc_review": "600;670;364;437;329", "wc_reply_reviewers": "317;19;28;21;24", "wc_reply_authors": "322;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 111.0, 33.68679266418814 ], "wc_strengths_avg": [ 91.0, 40.982923272992615 ], "wc_weaknesses_avg": [ 135.4, 75.42307339269595 ], "wc_questions_avg": [ 109.0, 48.82622246293481 ], "wc_limitations_avg": [ 33.6, 33.636884516851445 ], "wc_review_avg": [ 480.0, 133.1209975924159 ], "wc_reply_reviewers_avg": [ 81.8, 117.63910914317567 ], "wc_reply_authors_avg": [ 64.4, 128.8 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8807048459279795, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5249892062619524477&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "sjtu.edu.cn;sjtu.edu.cn;google.com;sjtu.edu.cn;umich.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Shanghai Jiao Tong University;Google;University of Michigan", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.google.com;https://www.umich.edu", "aff_unique_abbr": "SJTU;Google;UM", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Ann Arbor", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "China;United States" }, { "title": "PIXIU: A Comprehensive Benchmark, Instruction Dataset and Large Language Model for Finance", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73431", "id": "vTrRq6vCQH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6a386d703b50f1cf1f61ab02a15967bb-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=vTrRq6vCQH", "openreview": "https://openreview.net/forum?id=vTrRq6vCQH", "poster": "/media/PosterPDFs/NeurIPS%202023/73431.png?t=1699498575.2411695", "slides": "https://nips.cc/virtual/2023/poster/73431", "video": "https://nips.cc/virtual/2023/poster/73431", "author_site": "Qianqian Xie, Weiguang Han, Xiao Zhang, Yanzhao Lai, Min Peng, Alejandro Lopez-Lira, Jimin Huang", "tldr": "", "abstract": "Although large language models (LLMs) have shown great performance in natural language processing (NLP) in the financial domain, there are no publicly available financially tailored LLMs, instruction tuning datasets, and evaluation benchmarks, which is critical for continually pushing forward the open-source development of financial artificial intelligence (AI). This paper introduces PIXIU, a comprehensive framework including the first financial LLM based on fine-tuning LLaMA with instruction data, the first instruction data with 128K data samples to support the fine-tuning, and an evaluation benchmark with 8 tasks and 15 datasets. We first construct the large-scale multi-task instruction data considering a variety of financial tasks, financial document types, and financial data modalities. We then propose a financial LLM called FinMA by fine-tuning LLaMA with the constructed dataset to be able to follow instructions for various financial tasks. To support the evaluation of financial LLMs, we propose a standardized benchmark that covers a set of critical financial tasks, including six financial NLP tasks and two financial prediction tasks. With this benchmark, we conduct a detailed analysis of FinMA and several existing LLMs, uncovering their strengths and weaknesses in handling critical financial tasks. The model, datasets, benchmark, and experimental results are open-sourced to facilitate future research in financial AI.", "keywords": "Financial Natural Language Processing;Large Language Models;Fine-tuning;Benchmarking", "primary_area": "", "supplementary_material": "/attachment/2e8568ae3110bb9ed64351dacc446bb65f14b320.pdf", "author": "Qianqian Xie;Weiguang Han;Xiao Zhang;Yanzhao Lai;Min Peng;Alejandro Lopez-Lira;Jimin Huang", "authorids": "~Qianqian_Xie1;~Weiguang_Han1;zhangx767@mail2.sysu.edu.cn;laiyanzhao@swjtu.edu.cn;~Min_Peng2;alejandro.lopez-lira@warrington.ufl.edu;~Jimin_Huang1", "gender": "F;M;;;F;;M", "homepage": ";https://orcid.org/0000-0003-1821-4667;;;;;", "dblp": ";258/5894;;;;;163/4119", "google_scholar": "UYW7X_0AAAAJ;;;;;;SnQ_CycAAAAJ", "orcid": "0000-0002-9588-7454;;;;0000-0002-8766-1105;;0000-0002-3501-3907", "linkedin": ";;;;;;", "or_profile": "~Qianqian_Xie1;~Weiguang_Han1;zhangx767@mail2.sysu.edu.cn;laiyanzhao@swjtu.edu.cn;~Min_Peng2;alejandro.lopez-lira@warrington.ufl.edu;~Jimin_Huang1", "aff": "Yale University;Wuhan University;;;Wuhan University;;Chancefocus AMC.", "aff_domain": "yale.edu;whu.edu.cn;;;whu.edu.cn;;chancefocus.com", "position": "Postdoc;PhD student;;;Full Professor;;Researcher", "bibtex": "@inproceedings{\nxie2023pixiu,\ntitle={{PIXIU}: A Comprehensive Benchmark, Instruction Dataset and Large Language Model for Finance},\nauthor={Qianqian Xie and Weiguang Han and Xiao Zhang and Yanzhao Lai and Min Peng and Alejandro Lopez-Lira and Jimin Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=vTrRq6vCQH}\n}", "github": "", "project": "", "reviewers": "7TUR;v2GB;TmzR;kysA", "pdf_size": 350120, "rating": "5;5;7;8", "confidence": "4;4;3;4", "wc_summary_and_contributions": "84;133;167;105", "wc_strengths": "69;133;89;97", "wc_improvement": "269;73;139;161", "wc_limitations": "8;75;14;20", "wc_correctness": "9;22;8;114", "wc_clarity": "10;3;12;518", "wc_relation_to_prior_work": "16;7;13;35", "wc_documentation": "17;16;34;34", "wc_additional_feedback": "1;1;1;1", "wc_review": "483;463;477;1085", "wc_reply_reviewers": "41;93;10;24", "wc_reply_authors": "891;1171;450;629", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 122.25, 31.13980571551467 ], "wc_strengths_avg": [ 97.0, 23.15167380558045 ], "wc_improvement_avg": [ 160.5, 70.51772826743641 ], "wc_limitations_avg": [ 29.25, 26.75233634656981 ], "wc_correctness_avg": [ 38.25, 44.08160047003738 ], "wc_clarity_avg": [ 135.75, 220.7174381420734 ], "wc_relation_to_prior_work_avg": [ 17.75, 10.473180032826706 ], "wc_documentation_avg": [ 25.25, 8.757139944068497 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 627.0, 264.5259911615492 ], "wc_reply_reviewers_avg": [ 42.0, 31.424512724941337 ], "wc_reply_authors_avg": [ 785.25, 272.39344246879364 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11975796857093830110&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "yale.edu;whu.edu.cn;;;whu.edu.cn;;chancefocus.com", "author_num": 7, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Yale University;Wuhan University;Chancefocus AMC", "aff_unique_dep": ";;", "aff_unique_url": "https://www.yale.edu;http://www.whu.edu.cn/;", "aff_unique_abbr": "Yale;WHU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;China;" }, { "title": "Faster Margin Maximization Rates for Generic Optimization Methods", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70100", "id": "vTug54Uunq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c4cfdc27b46659e70a142ac249485a49-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vTug54Uunq", "openreview": "https://openreview.net/forum?id=vTug54Uunq", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70100", "video": "https://nips.cc/virtual/2023/poster/70100", "author_site": "Guanghui Wang, Zihao Hu, Vidya Muthukumar, Jacob Abernethy", "tldr": "", "abstract": "First-order optimization methods tend to inherently favor certain solutions over others when minimizing a given training objective with multiple local optima. This phenomenon, known as \\emph{implicit bias}, plays a critical role in understanding the generalization capabilities of optimization algorithms. Recent research has revealed that gradient-descent-based methods exhibit an implicit bias for the $\\ell_2$-maximal margin classifier in the context of separable binary classification. In contrast, generic optimization methods, such as mirror descent and steepest descent, have been shown to converge to maximal margin classifiers defined by alternative geometries. However, while gradient-descent-based algorithms demonstrate fast implicit bias rates, the implicit bias rates of generic optimization methods have been relatively slow. To address this limitation, in this paper, we present a series of state-of-the-art implicit bias rates for mirror descent and steepest descent algorithms. Our primary technique involves transforming a generic optimization algorithm into an online learning dynamic that solves a regularized bilinear game, providing a unified framework for analyzing the implicit bias of various optimization methods. The accelerated rates are derived leveraging the regret bounds of online learning algorithms within this game framework.", "keywords": "Implicit bias;margin maximization;zero-sum game;online learning", "primary_area": "", "supplementary_material": "/attachment/3921fd3fbae3c75f8d796d88a83fc3da1c9d8578.pdf", "author": "Guanghui Wang;Zihao Hu;Vidya Muthukumar;Jacob Abernethy", "authorids": "~Guanghui_Wang3;~Zihao_Hu1;~Vidya_Muthukumar3;~Jacob_Abernethy1", "gender": "M;;F;M", "homepage": "http://www.lamda.nju.edu.cn/wanggh/;;https://vmuthukumar.ece.gatech.edu;https://www.cc.gatech.edu/~jabernethy9/", "dblp": "44/2323-6;174/8733;149/0019;91/2520", "google_scholar": "oNgvRg4AAAAJ;;K2OEs2YAAAAJ;FDu4ciwAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Guanghui_Wang3;~Zihao_Hu1;~Vidya_Muthukumar3;~Jacob_Abernethy1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;cc.gatech.edu", "position": "PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023faster,\ntitle={Faster Margin Maximization Rates for Generic Optimization Methods},\nauthor={Guanghui Wang and Zihao Hu and Vidya Muthukumar and Jacob Abernethy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vTug54Uunq}\n}", "github": "", "project": "", "reviewers": "VcXn;HfJS;Gj8T;bnuX;bPGG", "pdf_size": 425699, "rating": "6;6;7;7;8", "confidence": "2;2;4;2;2", "soundness": "3;3;3;4;4", "novelty": "2;3;3;3;4", "presentation": "3;3;3;3;3", "wc_summary": "88;40;87;108;146", "wc_strengths": "62;14;101;79;233", "wc_weaknesses": "88;20;233;119;148", "wc_questions": "221;58;46;67;30", "wc_limitations": "36;1;37;7;23", "wc_review": "495;133;504;380;580", "wc_reply_reviewers": "93;0;78;4;4", "wc_reply_authors": "0;0;487;0;0", "reply_reviewers": "1;0;3;1;1", "reply_authors": "1;1;4;1;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 2.4, 0.8 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.8, 34.35345688573422 ], "wc_strengths_avg": [ 97.8, 73.4122605563948 ], "wc_weaknesses_avg": [ 121.6, 70.0930809709489 ], "wc_questions_avg": [ 84.4, 69.41642456940576 ], "wc_limitations_avg": [ 20.8, 14.702380759591284 ], "wc_review_avg": [ 418.4, 156.3567715194964 ], "wc_reply_reviewers_avg": [ 35.8, 40.88226999568395 ], "wc_reply_authors_avg": [ 97.4, 194.8 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.13363062095621217, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9480693799488773798&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "gatech.edu;gatech.edu;gatech.edu;cc.gatech.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Unified Framework for Uniform Signal Recovery in Nonlinear Generative Compressed Sensing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70099", "id": "vUXNNLatFv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1a04df6a405210aab4986994b873db9b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vUXNNLatFv", "openreview": "https://openreview.net/forum?id=vUXNNLatFv", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70099", "video": "https://nips.cc/virtual/2023/poster/70099", "author_site": "Junren Chen, Jonathan Scarlett, Michael Ng, Zhaoqiang Liu", "tldr": "", "abstract": "In generative compressed sensing (GCS), we want to recover a signal $\\mathbf{x^*}\\in\\mathbb{R}^n$ from $m$ measurements ($m\\ll n$) using a generative prior $\\mathbf{x^*}\\in G(\\mathbb{B}_2^k(r))$, where $G$ is typically an $L$-Lipschitz continuous generative model and $\\mathbb{B}_2^k(r)$ represents the radius-$r$ $\\ell_2$-ball in $\\mathbb{R}^k$. Under nonlinear measurements, most prior results are non-uniform, i.e., they hold with high probability for a fixed $\\mathbf{x^*}$ rather than for all $\\mathbf{x^*}$ simultaneously. In this paper, we build a unified framework to derive uniform recovery guarantees for nonlinear GCS where the observation model is nonlinear and possibly discontinuous or unknown. Our framework accommodates GCS with 1-bit/uniformly quantized observations and single index model as canonical examples. Specifically, using a single realization of the sensing ensemble and generalized Lasso, all $\\mathbf{x^*}\\in G(\\mathbb{B}_2^k(r))$ can be recovered up to an $\\ell_2$-error at most $\\epsilon$ using roughly $\\tilde{O}({k}/{\\epsilon^2})$ samples, with omitted logarithmic factors typically being dominated by $\\log L$. Notably, this almost coincides with existing non-uniform guarantees up to logarithmic factors, hence the uniformity costs very little. As part of our technical contributions, we introduce Lipschitz approximation to handle discontinuous observation models. We also develop a concentration inequality that produces tighter bound for product process whose index sets have low metric entropy. Experimental results are presented to corroborate our theory.", "keywords": "Compressed sensing;generative models;nonlinearity;uniform recovery", "primary_area": "", "supplementary_material": "/attachment/6b89ef1bc7e6f40759d198dc7ad76eb22c39bc10.zip", "author": "Junren Chen;Jonathan Scarlett;Michael Ng;Zhaoqiang Liu", "authorids": "~Junren_Chen1;~Jonathan_Scarlett1;~Michael_Ng3;~Zhaoqiang_Liu1", "gender": "M;M;;M", "homepage": "https://junrenchen58.github.io/;https://www.comp.nus.edu.sg/~scarlett/;https://hkumath.hku.hk/~mng/;", "dblp": ";78/9667;;198/1405", "google_scholar": "pw3tOroAAAAJ;https://scholar.google.co.uk/citations?user=a4D08aQAAAAJ;;EmGrPbIAAAAJ", "orcid": "0000-0003-3606-9598;;;", "linkedin": ";;;", "or_profile": "~Junren_Chen1;~Jonathan_Scarlett1;~Michael_Ng3;~Zhaoqiang_Liu1", "aff": "The University of Hong Kong;National University of Singapore;The University of Hong Kong;", "aff_domain": "hku.hk;nus.edu.sg;hku.hk;", "position": "PhD student;Associate Professor;Full Professor;", "bibtex": "@inproceedings{\nchen2023a,\ntitle={A Unified Framework for Uniform Signal Recovery in Nonlinear Generative Compressed Sensing},\nauthor={Junren Chen and Jonathan Scarlett and Michael Ng and Zhaoqiang Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vUXNNLatFv}\n}", "github": "", "project": "", "reviewers": "85GG;6eZp;K1vm", "pdf_size": 1306322, "rating": "3;6;6", "confidence": "4;4;3", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;2;2", "wc_summary": "56;70;35", "wc_strengths": "36;35;129", "wc_weaknesses": "313;112;29", "wc_questions": "2;28;26", "wc_limitations": "10;1;5", "wc_review": "417;246;224", "wc_reply_reviewers": "652;25;0", "wc_reply_authors": "1272;416;0", "reply_reviewers": "2;1;0", "reply_authors": "3;3;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 53.666666666666664, 14.383632673594278 ], "wc_strengths_avg": [ 66.66666666666667, 44.07821331325589 ], "wc_weaknesses_avg": [ 151.33333333333334, 119.23180038153507 ], "wc_questions_avg": [ 18.666666666666668, 11.8133634311129 ], "wc_limitations_avg": [ 5.333333333333333, 3.6817870057290873 ], "wc_review_avg": [ 295.6666666666667, 86.26445128530962 ], "wc_reply_reviewers_avg": [ 225.66666666666666, 301.63591003430315 ], "wc_reply_authors_avg": [ 562.6666666666666, 529.5465565515043 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6181457126984143&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "hku.hk;nus.edu.sg;hku.hk;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Hong Kong;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;https://www.nus.edu.sg", "aff_unique_abbr": "HKU;NUS", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;Singapore" }, { "title": "SustainGym: Reinforcement Learning Environments for Sustainable Energy Systems", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73430", "id": "vZ9tA3o3hr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ba74855789913e5ed36f87288af79e5b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=vZ9tA3o3hr", "openreview": "https://openreview.net/forum?id=vZ9tA3o3hr", "poster": "/media/PosterPDFs/NeurIPS%202023/73430.png?t=1702428774.4597354", "slides": "https://nips.cc/virtual/2023/poster/73430", "video": "https://nips.cc/virtual/2023/poster/73430", "author_site": "Christopher Yeh, Victor Li, Rajeev Datta, Julio Arroyo, Nicolas Christianson, Chi Zhang, Yize Chen, Mohammad Mehdi Hosseini, Azarang Golmohammadi, Yuanyuan Shi, Yisong Yue, Adam Wierman", "tldr": "", "abstract": "The lack of standardized benchmarks for reinforcement learning (RL) in sustainability applications has made it difficult to both track progress on specific domains and identify bottlenecks for researchers to focus their efforts. In this paper, we present SustainGym, a suite of five environments designed to test the performance of RL algorithms on realistic sustainable energy system tasks, ranging from electric vehicle charging to carbon-aware data center job scheduling. The environments test RL algorithms under realistic distribution shifts as well as in multi-agent settings. We show that standard off-the-shelf RL algorithms leave significant room for improving performance and highlight the challenges ahead for introducing RL to real-world sustainability tasks.", "keywords": "reinforcement learning;sustainability;energy systems;multi-agent;distribution shift", "primary_area": "", "supplementary_material": "/attachment/13183da2ccfcfbfc591942b236b791b889335dc5.pdf", "author": "Christopher Yeh;Victor Li;Rajeev Datta;Julio Arroyo;Nicolas Christianson;Chi Zhang;Yize Chen;Mohammad Mehdi Hosseini;Azarang Golmohammadi;Yuanyuan Shi;Yisong Yue;Adam Wierman", "authorids": "~Christopher_Yeh1;vhli@caltech.edu;~Rajeev_Datta1;~Julio_Arroyo1;~Nicolas_Christianson1;chz056@ucsd.edu;~Yize_Chen1;~Mohammad_Mehdi_Hosseini1;agolmohammadi@beyond.ai;~Yuanyuan_Shi1;~Yisong_Yue1;~Adam_Wierman1", "gender": ";;M;;;;M;M;;;M;M", "homepage": "https://chrisyeh96.github.io;;https://rajeev-datta.github.io;;https://nicochristianson.com/;;https://sites.google.com/view/yizechen;;;;http://www.yisongyue.com;https://adamwierman.com/", "dblp": "210/2480;;;;322/8648;;198/0893;;;;28/1244;56/4447", "google_scholar": "6Omq2pcAAAAJ;;https://scholar.google.com/citations?hl=en;;XS2UFA8AAAAJ;;G1NiRmwAAAAJ;HT2bj5IAAAAJ;;;tEk4qo8AAAAJ;4OvOdSgAAAAJ", "orcid": "0000-0002-7624-6168;;;;0000-0001-8330-8964;;0000-0003-4481-3858;;;;0000-0001-9127-1989;0000-0002-5923-0199", "linkedin": ";;rajeev-datta-1795ba1b1/;julio-arroyo/;;;;;;;yisongyue/;adam-wierman-a529474/", "or_profile": "~Christopher_Yeh1;vhli@caltech.edu;~Rajeev_Datta1;~Julio_Arroyo1;~Nicolas_Christianson1;chz056@ucsd.edu;~Yize_Chen1;~Mohammad_Mehdi_Hosseini1;agolmohammadi@beyond.ai;~Yuanyuan_Shi1;~Yisong_Yue1;~Adam_Wierman1", "aff": "California Institute of Technology;;California Institute of Technology;California Institute of Technology;California Institute of Technology;;Hong Kong University of Science and Technology;;;;California Institute of Technology;California Institute of Technology", "aff_domain": "caltech.edu;;caltech.edu;caltech.edu;caltech.edu;;ust.hk;;;;caltech.edu;caltech.edu", "position": "PhD student;;Undergrad student;Undergrad student;PhD student;;Assistant Professor;;;;Full Professor;Professor", "bibtex": "@inproceedings{\nyeh2023sustaingym,\ntitle={SustainGym: Reinforcement Learning Environments for Sustainable Energy Systems},\nauthor={Christopher Yeh and Victor Li and Rajeev Datta and Julio Arroyo and Nicolas Christianson and Chi Zhang and Yize Chen and Mohammad Mehdi Hosseini and Azarang Golmohammadi and Yuanyuan Shi and Yisong Yue and Adam Wierman},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=vZ9tA3o3hr}\n}", "github": "", "project": "", "reviewers": "E4iv;pDRB;4BfE;P6Jo", "pdf_size": 1207915, "rating": "5;6;6;7", "confidence": "4;3;4;4", "wc_summary_and_contributions": "89;50;83;22", "wc_strengths": "69;56;41;28", "wc_improvement": "131;146;84;103", "wc_limitations": "63;7;152;30", "wc_correctness": "8;36;47;14", "wc_clarity": "58;5;12;14", "wc_relation_to_prior_work": "16;6;53;13", "wc_documentation": "23;33;30;15", "wc_additional_feedback": "1;1;1;1", "wc_review": "458;340;503;240", "wc_reply_reviewers": "34;0;0;7", "wc_reply_authors": "1071;256;817;395", "reply_reviewers": "1;0;0;1", "reply_authors": "3;1;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 61.0, 26.972207918522354 ], "wc_strengths_avg": [ 48.5, 15.435349040433131 ], "wc_improvement_avg": [ 116.0, 24.072806234421446 ], "wc_limitations_avg": [ 63.0, 55.10444628158421 ], "wc_correctness_avg": [ 26.25, 15.880412463157246 ], "wc_clarity_avg": [ 22.25, 20.90902915010642 ], "wc_relation_to_prior_work_avg": [ 22.0, 18.261982367749674 ], "wc_documentation_avg": [ 25.25, 6.94172168845741 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 385.25, 102.83816169107652 ], "wc_reply_reviewers_avg": [ 10.25, 14.00669482783144 ], "wc_reply_authors_avg": [ 634.75, 325.7532616874312 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12715174282370010367&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "caltech.edu;;caltech.edu;caltech.edu;caltech.edu;;ust.hk;;;;caltech.edu;caltech.edu", "author_num": 12, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "California Institute of Technology;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.caltech.edu;https://www.ust.hk", "aff_unique_abbr": "Caltech;HKUST", "aff_campus_unique_index": "0;0;0;0;1;0;0", "aff_campus_unique": "Pasadena;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "ForkMerge: Mitigating Negative Transfer in Auxiliary-Task Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70098", "id": "vZHk1QlBQW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/60f9118a849e8e9a0c67e2a36ad80ebf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vZHk1QlBQW", "openreview": "https://openreview.net/forum?id=vZHk1QlBQW", "poster": "/media/PosterPDFs/NeurIPS%202023/70098.png?t=1697277625.1977432", "slides": "https://nips.cc/virtual/2023/poster/70098", "video": "https://nips.cc/virtual/2023/poster/70098", "author_site": "Junguang Jiang, Baixu Chen, Junwei Pan, Ximei Wang, Dapeng Liu, Jie Jiang, Mingsheng Long", "tldr": "", "abstract": "Auxiliary-Task Learning (ATL) aims to improve the performance of the target task by leveraging the knowledge obtained from related tasks. Occasionally, learning multiple tasks simultaneously results in lower accuracy than learning only the target task, which is known as negative transfer. This problem is often attributed to the gradient conflicts among tasks, and is frequently tackled by coordinating the task gradients in previous works. However, these optimization-based methods largely overlook the auxiliary-target generalization capability. To better understand the root cause of negative transfer, we experimentally investigate it from both optimization and generalization perspectives. Based on our findings, we introduce ForkMerge, a novel approach that periodically forks the model into multiple branches, automatically searches the varying task weights by minimizing target validation errors, and dynamically merges all branches to filter out detrimental task-parameter updates. On a series of auxiliary-task learning benchmarks, ForkMerge outperforms existing methods and effectively mitigates negative transfer.", "keywords": "Auxiliary-Task Learning;Negative Transfer", "primary_area": "", "supplementary_material": "/attachment/0d817aa31007b6d04670fb2da7cd71c55b58582d.pdf", "author": "Junguang Jiang;Baixu Chen;Junwei Pan;Ximei Wang;Dapeng Liu;jie jiang;Mingsheng Long", "authorids": "~Junguang_Jiang2;~Baixu_Chen2;~Junwei_Pan1;~Ximei_Wang1;~Dapeng_Liu2;~jie_jiang3;~Mingsheng_Long5", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/tsingcbx99;https://junwei-pan.github.io/;https://wxm17.github.io/;https://openreview.net/profile?id=~Dapeng_Liu2;https://baike.baidu.com/item/%E8%92%8B%E6%9D%B0/58674740;https://junguangjiang.github.io/;http://ise.thss.tsinghua.edu.cn/~mlong", "dblp": "279/4076;210/6440;89/8876;;32/7018-15.html;276/3175;74/9023", "google_scholar": ";sUaBkFkAAAAJ;WmOCCVgAAAAJ;;;dXS9TPUAAAAJ;_MjXpXkAAAAJ", "orcid": ";0009-0003-2697-7012;;0009-0003-2973-9167;0000-0001-9658-5127;;0000-0002-5412-9120", "linkedin": ";;;;;;", "or_profile": "~Baixu_Chen2;~Junwei_Pan1;~Ximei_Wang1;~Dapeng_Liu2;~jie_jiang3;~junguang_jiang1;~Mingsheng_Long2", "aff": "Tsinghua University;Tencent;Tencent;Tencent;Tencent AI Lab;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;Researcher;Researcher;Researcher;VP;MS student;Associate Professor", "bibtex": "@inproceedings{\njiang2023forkmerge,\ntitle={ForkMerge: Mitigating Negative Transfer in Auxiliary-Task Learning},\nauthor={Junguang Jiang and Baixu Chen and Junwei Pan and Ximei Wang and Dapeng Liu and jie jiang and Mingsheng Long},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vZHk1QlBQW}\n}", "github": "", "project": "", "reviewers": "QCm1;pM6c;G3Ms;yZ6C;ECtn;uS5H", "pdf_size": 2190296, "rating": "4;6;6;6;7;7", "confidence": "2;4;3;4;5;3", "soundness": "2;3;3;2;3;3", "novelty": "1;3;2;2;3;3", "presentation": "3;3;3;3;3;2", "wc_summary": "84;213;59;67;104;90", "wc_strengths": "32;112;51;18;42;62", "wc_weaknesses": "103;281;55;245;30;165", "wc_questions": "76;258;217;2;1;103", "wc_limitations": "10;33;8;2;2;49", "wc_review": "305;897;390;334;179;469", "wc_reply_reviewers": "113;351;0;97;0;29", "wc_reply_authors": "878;88;0;556;0;53", "reply_reviewers": "1;1;0;1;0;1", "reply_authors": "3;2;1;2;1;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.9574271077563381 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.7453559924999298 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 102.83333333333333, 51.41794974087897 ], "wc_strengths_avg": [ 52.833333333333336, 29.868694127612763 ], "wc_weaknesses_avg": [ 146.5, 93.0694185362016 ], "wc_questions_avg": [ 109.5, 98.40519972711469 ], "wc_limitations_avg": [ 17.333333333333332, 17.603661235347857 ], "wc_review_avg": [ 429.0, 226.9530054144837 ], "wc_reply_reviewers_avg": [ 98.33333333333333, 121.24584758067203 ], "wc_reply_authors_avg": [ 262.5, 335.9403468871619 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.8333333333333333, 0.6871842709362768 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6963106238227913, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=908849417900801580&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "tsinghua.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;1;1;0;0", "aff_unique_norm": "Tsinghua University;Tencent", "aff_unique_dep": ";Tencent Holdings Limited", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.tencent.com", "aff_unique_abbr": "THU;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Extremal Domain Translation with Neural Optimal Transport", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70097", "id": "vZRiMjo826", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7eed2822411dc37b3768ae04561caafa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vZRiMjo826", "openreview": "https://openreview.net/forum?id=vZRiMjo826", "poster": "/media/PosterPDFs/NeurIPS%202023/70097.png?t=1701683583.763056", "slides": "https://nips.cc/virtual/2023/poster/70097", "video": "https://nips.cc/virtual/2023/poster/70097", "author_site": "Milena Gazdieva, Alexander Korotin, Daniil Selikhanovych, Evgeny Burnaev", "tldr": "", "abstract": "In many unpaired image domain translation problems, e.g., style transfer or super-resolution, it is important to keep the translated image similar to its respective input image. We propose the extremal transport (ET) which is a mathematical formalization of the theoretically best possible unpaired translation between a pair of domains w.r.t. the given similarity function. Inspired by the recent advances in neural optimal transport (OT), we propose a scalable algorithm to approximate ET maps as a limit of partial OT maps. We test our algorithm on toy examples and on the unpaired image-to-image translation task. The code is publicly available at https://github.com/milenagazdieva/ExtremalNeuralOptimalTransport", "keywords": "optimal transport;partial optimal transport;neural networks;domain translation", "primary_area": "", "supplementary_material": "/attachment/6b8f6576115171156ed70c4585919f9522e4b88c.pdf", "author": "Milena Gazdieva;Alexander Korotin;Daniil Selikhanovych;Evgeny Burnaev", "authorids": "~Milena_Gazdieva1;~Alexander_Korotin2;~Daniil_Selikhanovych1;~Evgeny_Burnaev1", "gender": "F;M;M;M", "homepage": ";https://github.com/Daniil-Selikhanovych;http://faculty.skoltech.ru/people/evgenyburnaev;https://akorotin.netlify.app", "dblp": "309/6585;243/8927;144/7845;209/9906", "google_scholar": "h52_Zx8AAAAJ;N9bInaYAAAAJ;https://scholar.google.ru/citations?user=pCRdcOwAAAAJ;https://scholar.google.ru/citations?user=1rIIvjAAAAAJ", "orcid": "0000-0003-0047-1577;0000-0001-8789-5924;0000-0001-8424-0690;0000-0003-4286-925X", "linkedin": ";;;", "or_profile": "~Milena_Gazdieva1;~Daniil_Selikhanovych1;~Evgeny_Burnaev1;~Alexander_Andreevich_Korotin1", "aff": "Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology", "aff_domain": "skoltech.ru;skoltech.ru;skoltech.ru;skoltech.ru", "position": "PhD student;PhD student;Full Professor;Head of Research Group", "bibtex": "@inproceedings{\ngazdieva2023extremal,\ntitle={Extremal Domain Translation with Neural Optimal Transport},\nauthor={Milena Gazdieva and Alexander Korotin and Daniil Selikhanovych and Evgeny Burnaev},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vZRiMjo826}\n}", "github": "", "project": "", "reviewers": "Mz3K;iXGC;3fqr;Cqh5;URfJ", "pdf_size": 28722637, "rating": "4;6;7;7;9", "confidence": "4;3;3;3;3", "soundness": "2;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "2;4;4;3;4", "wc_summary": "50;169;75;98;56", "wc_strengths": "81;43;79;75;150", "wc_weaknesses": "567;90;454;38;51", "wc_questions": "51;37;146;16;100", "wc_limitations": "1;11;6;15;6", "wc_review": "750;350;760;242;363", "wc_reply_reviewers": "260;0;56;0;14", "wc_reply_authors": "1225;0;0;0;0", "reply_reviewers": "2;0;1;0;1", "reply_authors": "3;1;1;1;1", "rating_avg": [ 6.6, 1.624807680927192 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 89.6, 43.093386963663 ], "wc_strengths_avg": [ 85.6, 35.04054794092124 ], "wc_weaknesses_avg": [ 240.0, 224.38805672316875 ], "wc_questions_avg": [ 70.0, 46.99361658778775 ], "wc_limitations_avg": [ 7.8, 4.791659420284375 ], "wc_review_avg": [ 493.0, 218.03119042925945 ], "wc_reply_reviewers_avg": [ 66.0, 99.14837366290988 ], "wc_reply_authors_avg": [ 245.0, 490.0 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8000946913656628, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2188469789918492279&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "skoltech.ru;skoltech.ru;skoltech.ru;skoltech.ru", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Skolkovo Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.skoltech.ru", "aff_unique_abbr": "Skoltech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "Data-Driven Network Neuroscience: On Data Collection and Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73429", "id": "vZf7jrX1el", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/44e3a3115ca26e5127851acd0cedd0d9-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=vZf7jrX1el", "openreview": "https://openreview.net/forum?id=vZf7jrX1el", "poster": "/media/PosterPDFs/NeurIPS%202023/73429.png?t=1701398739.807942", "slides": "https://nips.cc/virtual/2023/poster/73429", "video": "https://nips.cc/virtual/2023/poster/73429", "author_site": "Jiaxing Xu, Yunhan Yang, David Huang, Sophi Shilpa Gururajapathy, Yiping Ke, Miao Qiao, Alan Wang, Haribalan Kumar, Josh McGeown, Eryn Kwon", "tldr": "", "abstract": "This paper presents a comprehensive and quality collection of functional human brain network data for potential research in the intersection of neuroscience, machine learning, and graph analytics. \nAnatomical and functional MRI images have been used to understand the functional connectivity of the human brain and are particularly important in identifying underlying neurodegenerative conditions such as Alzheimer's, Parkinson's, and Autism. Recently, the study of the brain in the form of brain networks using machine learning and graph analytics has become increasingly popular, especially to predict the early onset of these conditions. A brain network, represented as a graph, retains rich structural and positional information that traditional examination methods are unable to capture. However, the lack of publicly accessible brain network data prevents researchers from data-driven explorations. One of the main difficulties lies in the complicated domain-specific preprocessing steps and the exhaustive computation required to convert the data from MRI images into brain networks. We bridge this gap by collecting a large amount of MRI images from public databases and a private source, working with domain experts to make sensible design choices, and preprocessing the MRI images to produce a collection of brain network datasets. The datasets originate from 6 different sources, cover 4 brain conditions, and consist of a total of 2,702 subjects. \nWe test our graph datasets on 12 machine learning models to provide baselines and validate the data quality on a recent graph analysis model. To lower the barrier to entry and promote the research in this interdisciplinary field, we release our brain network data and complete preprocessing details including codes at https://doi.org/10.17608/k6.auckland.21397377 and https://github.com/brainnetuoa/data_driven_network_neuroscience.", "keywords": "Network Neuroscience;Graph Analysis;Brain Network Datasets;Brain Network Analysis;Graph Classification", "primary_area": "", "supplementary_material": "/attachment/f1777e6b3b3ed7882c48fdfec975e5d631411c83.pdf", "author": "Jiaxing Xu;Yunhan Yang;David Tse Jung Huang;Sophi Shilpa Gururajapathy;Yiping Ke;Miao Qiao;Alan Wang;Haribalan Kumar;Josh McGeown;Eryn Kwon", "authorids": "~Jiaxing_Xu2;~Yunhan_Yang2;~David_Tse_Jung_Huang1;~Sophi_Shilpa_Gururajapathy1;~Yiping_Ke1;~Miao_Qiao1;alan.wang@auckland.ac.nz;~Haribalan_Kumar1;j.mcgeown@matai.org.nz;e.kwon@auckland.ac.nz", "gender": ";M;;F;F;F;;;;", "homepage": ";https://profiles.auckland.ac.nz/yunhan-yang;;;https://keyiping.wixsite.com/index;https://miaoqiao.github.io/;;;;", "dblp": ";;;;07/3111;93/9883;;;;", "google_scholar": ";;https://scholar.google.co.nz/citations?user=6RvkTyYAAAAJ;;https://scholar.google.com.tw/citations?user=30Fp0YYAAAAJ;https://scholar.google.com.tw/citations?user=Guff-1wAAAAJ;;;;", "orcid": ";;;;0000-0001-9473-3202;;;;;", "linkedin": ";;;sophi-shilpa-g-b5747516;;;;;;", "or_profile": "~Jiaxing_Xu2;~Yunhan_Yang2;~David_Tse_Jung_Huang1;~Sophi_Shilpa_Gururajapathy1;~Yiping_Ke1;~Miao_Qiao1;alan.wang@auckland.ac.nz;~Haribalan_Kumar1;j.mcgeown@matai.org.nz;e.kwon@auckland.ac.nz", "aff": ";University of Auckland;University of Auckland;;Nanyang Technological University;University of Auckland;;;;", "aff_domain": ";auckland.ac.nz;auckland.ac.nz;;ntu.edu.sg;auckland.ac.nz;;;;", "position": ";PhD student;Postdoc;;Associate Professor;Lecturer;;;;", "bibtex": "@inproceedings{\nxu2023datadriven,\ntitle={Data-Driven Network Neuroscience: On Data Collection and Benchmark},\nauthor={Jiaxing Xu and Yunhan Yang and David Tse Jung Huang and Sophi Shilpa Gururajapathy and Yiping Ke and Miao Qiao and Alan Wang and Haribalan Kumar and Josh McGeown and Eryn Kwon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=vZf7jrX1el}\n}", "github": "", "project": "", "reviewers": "NQuQ;iqnx;ebHo;v8Nt;RUkN", "pdf_size": 1616595, "rating": "4;6;6;7;7", "confidence": "4;1;4;3;5", "wc_summary_and_contributions": "68;29;68;89;98", "wc_strengths": "40;25;88;75;11", "wc_improvement": "191;389;67;155;47", "wc_limitations": "90;4;63;101;51", "wc_correctness": "165;4;49;9;6", "wc_clarity": "8;14;40;65;3", "wc_relation_to_prior_work": "1;26;47;18;5", "wc_documentation": "13;17;17;43;2", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "577;509;440;556;224", "wc_reply_reviewers": "16;0;10;0;22", "wc_reply_authors": "861;322;567;553;211", "reply_reviewers": "1;0;1;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.4, 1.3564659966250538 ], "wc_summary_and_contributions_avg": [ 70.4, 23.80420130985285 ], "wc_strengths_avg": [ 47.8, 29.294368059406914 ], "wc_improvement_avg": [ 169.8, 121.95474570511801 ], "wc_limitations_avg": [ 61.8, 34.03174988154444 ], "wc_correctness_avg": [ 46.6, 61.48365636492352 ], "wc_clarity_avg": [ 26.0, 23.29806858947754 ], "wc_relation_to_prior_work_avg": [ 19.4, 16.451139778143034 ], "wc_documentation_avg": [ 18.4, 13.46996659238619 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 461.2, 127.5890277414167 ], "wc_reply_reviewers_avg": [ 9.6, 8.708616422830897 ], "wc_reply_authors_avg": [ 502.8, 224.84874916263155 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2030829682888621739&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": ";auckland.ac.nz;auckland.ac.nz;;ntu.edu.sg;auckland.ac.nz;;;;", "author_num": 10, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Auckland;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.auckland.ac.nz;https://www.ntu.edu.sg", "aff_unique_abbr": "UoA;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "New Zealand;Singapore" }, { "id": "vcNjibzV3P", "title": "Complete Neural Networks for Complete Euclidean Graphs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Neural networks for point clouds, which respect their natural invariance to permutation and rigid motion, have enjoyed recent success in modeling geometric phenomena, from molecular dynamics to recommender systems. Yet, to date, no architecture with polynomial complexity is known to be complete, that is, able to distinguish between any pair of non-isomorphic point clouds. We fill this theoretical gap by showing that point clouds can be completely determined, up to permutation and rigid motion, by applying the 3-WL graph isomorphism test to the point cloud's centralized Gram matrix. Moreover, we formulate a Euclidean variant of the 2-WL test and show that it is also sufficient to achieve completeness. We then show how our complete Euclidean WL tests can be simulated by a Euclidean graph neural network of moderate size and demonstrate their separation capability on highly-symmetrical point clouds.\n", "keywords": "Graph Neural Networks;Weisfeiler Leman Test;graph isomorphism;universal approximation;equivariant neural network;Euclidean graphs", "primary_area": "", "supplementary_material": "/attachment/931d268d65b4daf2e0f3f5fbe6a184033b5aee02.zip", "author": "Snir Hordan;Nadav Dym;Tal Amir;Steven J. Gortler", "authorids": "~Snir_Hordan1;~Nadav_Dym1;~Tal_Amir1;~Steven_J._Gortler1", "gender": "M;;;", "homepage": "https://snirhordan.github.io/;;https://tal-amir.github.io/;", "dblp": "339/0274;;;", "google_scholar": "T2YJQPoAAAAJ;;https://scholar.google.co.il/citations?user=Lx2W9vMAAAAJ;", "orcid": ";;0009-0003-1868-1860;", "linkedin": "senirhordan/;;;", "or_profile": "~Snir_Hordan1;~Nadav_Dym1;~Tal_Amir1;~Steven_J._Gortler1", "aff": "Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;;Technion - Israel Institute of Technology, Technion;", "aff_domain": "campus.technion.ac.il;;technion.ac.il;", "position": "PhD student;;Postdoc;", "bibtex": "@misc{\nhordan2023complete,\ntitle={Complete Neural Networks for Complete Euclidean Graphs},\nauthor={Snir Hordan and Nadav Dym and Tal Amir and Steven J. Gortler},\nyear={2023},\nurl={https://openreview.net/forum?id=vcNjibzV3P}\n}", "github": "", "project": "", "reviewers": "7AxF;eAUn;5HoU;Z66V", "site": "https://openreview.net/forum?id=vcNjibzV3P", "pdf_size": 898973, "rating": "4;5;5;7", "confidence": "4;4;2;5", "soundness": "3;4;3;3", "novelty": "2;2;3;3", "presentation": "2;3;2;3", "wc_summary": "59;176;44;86", "wc_strengths": "67;40;26;128", "wc_weaknesses": "250;147;19;90", "wc_questions": "152;67;2;350", "wc_limitations": "20;15;1;61", "wc_review": "548;445;92;715", "wc_reply_reviewers": "945;38;0;180", "wc_reply_authors": "2360;0;0;152", "reply_reviewers": "3;1;0;1", "reply_authors": "6;1;1;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 91.25, 51.192650839744566 ], "wc_strengths_avg": [ 65.25, 39.111219617905036 ], "wc_weaknesses_avg": [ 126.5, 84.5 ], "wc_questions_avg": [ 142.75, 130.94536074256317 ], "wc_limitations_avg": [ 24.25, 22.331312097590683 ], "wc_review_avg": [ 450.0, 228.0449517090874 ], "wc_reply_reviewers_avg": [ 290.75, 383.64265599643636 ], "wc_reply_authors_avg": [ 628.0, 1001.8942059918303 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.5, 2.0615528128088303 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4736842105263159, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9430528791363623251&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Structured Voronoi Sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70096", "id": "vf77fTbgG3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/64ae05e3f1a88ebac7f9263b69f4e702-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vf77fTbgG3", "openreview": "https://openreview.net/forum?id=vf77fTbgG3", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70096", "video": "https://nips.cc/virtual/2023/poster/70096", "author_site": "Afra Amini, Li Du, Ryan Cotterell", "tldr": "", "abstract": "Gradient-based sampling algorithms have demonstrated their effectiveness in text generation, especially in the context of controlled text generation. However, there exists a lack of theoretically grounded and principled approaches for this task. In this paper, we take an important step toward building a principled approach for sampling from language models with gradient-based methods. We use discrete distributions given by language models to define densities and develop an algorithm based on Hamiltonian Monte Carlo to sample from them. We name our gradient-based technique Structured Voronoi Sampling (SVS). In an experimental setup where the reference distribution is known, we show that the empirical distribution of SVS samples is closer to the reference distribution compared to alternative sampling schemes. Furthermore, in a controlled generation task, SVS is able to generate fluent and diverse samples while following the control targets significantly better than other methods.", "keywords": "Natural Language Processing;Text Generation;Controlled Generation;MCMC;HMC;Langevin Dynamics", "primary_area": "", "supplementary_material": "", "author": "Afra Amini;Li Du;Ryan Cotterell", "authorids": "~Afra_Amini1;~Li_Du2;~Ryan_Cotterell1", "gender": "F;M;Not Specified", "homepage": ";;https://rycolab.io/", "dblp": "270/4959;;146/4361.html", "google_scholar": ";efDU43kAAAAJ;DexOqtoAAAAJ", "orcid": ";;", "linkedin": "afraamini;;", "or_profile": "~Afra_Amini1;~Li_Du2;~Ryan_D_Cotterell1", "aff": "Research, Google;Johns Hopkins University;Swiss Federal Institute of Technology", "aff_domain": "research.google.com;cs.jhu.edu;ethz.ch", "position": "Intern;PhD student;Assistant Professor", "bibtex": "@inproceedings{\namini2023structured,\ntitle={Structured Voronoi Sampling},\nauthor={Afra Amini and Li Du and Ryan Cotterell},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vf77fTbgG3}\n}", "github": "", "project": "", "reviewers": "iTvg;qwXS;v2AJ;p6Uf;tTqL;BCGv", "pdf_size": 2622740, "rating": "5;6;6;7;7;8", "confidence": "2;2;3;4;3;4", "soundness": "3;3;4;4;3;4", "novelty": "3;2;3;3;3;4", "presentation": "2;3;3;3;3;3", "wc_summary": "125;71;41;101;130;97", "wc_strengths": "89;39;58;75;42;199", "wc_weaknesses": "170;53;41;207;60;541", "wc_questions": "36;18;84;196;167;404", "wc_limitations": "46;7;1;29;5;2", "wc_review": "466;188;225;608;404;1243", "wc_reply_reviewers": "33;29;16;19;172;15", "wc_reply_authors": "31;0;0;0;35;0", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "2;1;1;1;2;1", "rating_avg": [ 6.5, 0.9574271077563381 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 94.16666666666667, 30.69428104531671 ], "wc_strengths_avg": [ 83.66666666666667, 54.459975109146804 ], "wc_weaknesses_avg": [ 178.66666666666666, 173.68042172014924 ], "wc_questions_avg": [ 150.83333333333334, 130.18757834584508 ], "wc_limitations_avg": [ 15.0, 16.76305461424021 ], "wc_review_avg": [ 522.3333333333334, 352.15085151426547 ], "wc_reply_reviewers_avg": [ 47.333333333333336, 56.144654440313566 ], "wc_reply_authors_avg": [ 11.0, 15.599145275730121 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8085795174014058768&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "research.google.com;cs.jhu.edu;ethz.ch", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;Johns Hopkins University;Swiss Federal Institute of Technology", "aff_unique_dep": "Google Research;;", "aff_unique_url": "https://research.google;https://www.jhu.edu;https://www.ethz.ch", "aff_unique_abbr": "Google;JHU;ETH Zurich", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Switzerland" }, { "id": "vfDPA1Ft0c", "title": "Best Arm Identification for Stochastic Rising Bandits", "track": "main", "status": "Reject", "tldr": "", "abstract": "Stochastic Rising Bandits (SRBs) model sequential decision-making problems in which the expected rewards of the available options increase every time they are selected. This setting captures a wide range of scenarios in which the available options are learning entities whose performance improves (in expectation) over time. While previous works addressed the regret minimization problem, this paper, focuses on the fixed-budget Best Arm Identification (BAI) problem for SRBs. In this scenario, given a fixed budget of rounds, we are asked to provide a recommendation about the best option at the end of the identification process. We propose two algorithms to tackle the above-mentioned setting, namely R-UCBE, which resorts to a UCB-like approach, and R-SR, which employs a successive reject procedure. Then, we prove that, with a sufficiently large budget, they provide guarantees on the probability of properly identifying the optimal option at the end of the learning process. Furthermore, we derive a lower bound on the error probability, matched by our R-SR (up to logarithmic factors), and illustrate how the need for a sufficiently large budget is unavoidable in the SRB setting.\nFinally, we numerically validate the proposed algorithms in both synthetic and real-world environments and compare them with the currently available BAI strategies.", "keywords": "Best Arm Identification;Rising;Rested;Stochastic;Bandits", "primary_area": "", "supplementary_material": "/attachment/f08f36fe897b55a00f11044fec4fae750e97e101.zip", "author": "Marco Mussi;Alessandro Montenegro;Francesco Trov\u00f2;Marcello Restelli;Alberto Maria Metelli", "authorids": "~Marco_Mussi1;~Alessandro_Montenegro1;~Francesco_Trov\u00f21;~Marcello_Restelli1;~Alberto_Maria_Metelli2", "gender": "M;M;M;M;M", "homepage": "https://marcomussi.github.io/;;https://trovo.faculty.polimi.it/;http://home.deib.polimi.it/restelli/;https://albertometelli.github.io/", "dblp": "321/0756;;69/11487;64/1011;209/4941", "google_scholar": "3gca-JUAAAAJ;CugD-ogAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=xdgxRiEAAAAJ;R31IsPwAAAAJ", "orcid": "0000-0001-8356-6744;;0000-0001-5796-7667;0000-0002-6322-1076;0000-0002-3424-5212", "linkedin": "marcomussi95/;alessandro-montenegro-3266291b7/;;;", "or_profile": "~Marco_Mussi1;~Alessandro_Montenegro1;~Francesco_Trov\u00f21;~Marcello_Restelli1;~Alberto_Maria_Metelli2", "aff": "Politecnico di Milano;Politecnico di Milano;Politecnico di Milano;Politecnico di Milano;Politecnico di Milano", "aff_domain": "polimi.it;polimi.it;polimi.it;polimi.it;polimi.it", "position": "PhD student;MS student;Assistant Professor;Associate Professor;Postdoc", "bibtex": "@misc{\nmussi2023best,\ntitle={Best Arm Identification for Stochastic Rising Bandits},\nauthor={Marco Mussi and Alessandro Montenegro and Francesco Trov{\\`o} and Marcello Restelli and Alberto Maria Metelli},\nyear={2023},\nurl={https://openreview.net/forum?id=vfDPA1Ft0c}\n}", "github": "", "project": "", "reviewers": "Sk8e;2LCh;uPrW;fA5g", "site": "https://openreview.net/forum?id=vfDPA1Ft0c", "pdf_size": 910405, "rating": "5;6;6;7", "confidence": "3;3;2;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "124;80;52;143", "wc_strengths": "73;18;149;94", "wc_weaknesses": "122;55;34;125", "wc_questions": "82;2;110;41", "wc_limitations": "1;1;2;12", "wc_review": "402;156;347;415", "wc_reply_reviewers": "8;0;0;0", "wc_reply_authors": "29;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 99.75, 35.80764583158184 ], "wc_strengths_avg": [ 83.5, 46.90682253148256 ], "wc_weaknesses_avg": [ 84.0, 40.20572098594925 ], "wc_questions_avg": [ 58.75, 40.93516214698557 ], "wc_limitations_avg": [ 4.0, 4.636809247747852 ], "wc_review_avg": [ 330.0, 103.65085624344837 ], "wc_reply_reviewers_avg": [ 2.0, 3.4641016151377544 ], "wc_reply_authors_avg": [ 7.25, 12.55736835487436 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17019165035024854401&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 11, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Politecnico di Milano", "aff_unique_dep": "", "aff_unique_url": "https://www.polimi.it", "aff_unique_abbr": "Polimi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Italy" }, { "title": "JourneyDB: A Benchmark for Generative Image Understanding", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73428", "id": "vfzXDRTcF4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9bc59aff4685e39e1a8175d5303248a1-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=vfzXDRTcF4", "openreview": "https://openreview.net/forum?id=vfzXDRTcF4", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73428", "video": "https://nips.cc/virtual/2023/poster/73428", "author_site": "Keqiang Sun, Junting Pan, Yuying Ge, Hao Li, Haodong Duan, Xiaoshi Wu, Renrui Zhang, Aojun Zhou, Zipeng Qin, Yi Wang, Jifeng Dai, Yu Qiao, Limin Wang, Hongsheng Li", "tldr": "", "abstract": "While recent advancements in vision-language models have had a transformative impact on multi-modal comprehension, the extent to which these models possess the ability to comprehend generated images remains uncertain. Synthetic images, in comparison to real data, encompass a higher level of diversity in terms of both content and style, thereby presenting significant challenges for the models to fully grasp. In light of this challenge, we introduce a comprehensive dataset, referred to as JourneyDB, that caters to the domain of generative images within the context of multi-modal visual understanding. Our meticulously curated dataset comprises 4 million distinct and high-quality generated images, each paired with the corresponding text prompts that were employed in their creation. Furthermore, we additionally introduce an external subset with results of another 22 text-to-image generative models, which makes JourneyDB a comprehensive benchmark for evaluating the comprehension of generated images. On our dataset, we have devised four benchmarks to assess the performance of generated image comprehension in relation to both content and style interpretation. These benchmarks encompass prompt inversion, style retrieval, image captioning, and visual question answering. Lastly, we evaluate the performance of state-of-the-art multi-modal models when applied to the JourneyDB dataset, providing a comprehensive analysis of their strengths and limitations in comprehending generated content. We anticipate that the proposed dataset and benchmarks will facilitate further research in the field of generative content understanding. The dataset is publicly available at https://journeydb.github.io.", "keywords": "Midjourney;Dataset;Generative Image Understanding", "primary_area": "", "supplementary_material": "", "author": "", "authorids": "", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nsun2023journeydb,\ntitle={Journey{DB}: A Benchmark for Generative Image Understanding},\nauthor={Keqiang Sun and Junting Pan and Yuying Ge and Hao Li and Haodong Duan and Xiaoshi Wu and Renrui Zhang and Aojun Zhou and Zipeng Qin and Yi Wang and Jifeng Dai and Yu Qiao and Limin Wang and Hongsheng Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=vfzXDRTcF4}\n}", "github": "", "project": "", "reviewers": "r9bw;J527;Q5nG;Wqw6", "pdf_size": 3934068, "rating": "4;5;6;8", "confidence": "4;4;3;4", "wc_summary_and_contributions": "43;45;55;135", "wc_strengths": "21;13;14;35", "wc_improvement": "124;90;263;73", "wc_limitations": "48;41;53;16", "wc_correctness": "2;9;61;15", "wc_clarity": "1;19;9;26", "wc_relation_to_prior_work": "14;12;32;20", "wc_documentation": "56;11;64;4", "wc_additional_feedback": "1;1;1;1", "wc_review": "310;241;552;325", "wc_reply_reviewers": "0;151;322;9", "wc_reply_authors": "1464;1647;3149;364", "reply_reviewers": "0;2;3;1", "reply_authors": "5;6;8;2", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 69.5, 38.08871223866725 ], "wc_strengths_avg": [ 20.75, 8.78564169540279 ], "wc_improvement_avg": [ 137.5, 74.74790966976936 ], "wc_limitations_avg": [ 39.5, 14.221462653327892 ], "wc_correctness_avg": [ 21.75, 23.12331074911203 ], "wc_clarity_avg": [ 13.75, 9.522998477370455 ], "wc_relation_to_prior_work_avg": [ 19.5, 7.794228634059948 ], "wc_documentation_avg": [ 33.75, 26.517682779609533 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 357.0, 116.95511959721986 ], "wc_reply_reviewers_avg": [ 120.5, 130.84819448505968 ], "wc_reply_authors_avg": [ 1656.0, 991.8742359795419 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 5.25, 2.165063509461097 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 0, 0 ], "corr_rating_confidence": -0.09759000729485331, "gs_citation": 87, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=736236792145113555&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "", "author_num": 1 }, { "title": "SpokenWOZ: A Large-Scale Speech-Text Benchmark for Spoken Task-Oriented Dialogue Agents", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73427", "id": "viktK3nO5b", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7b16688a2b053a1b01474ab5c78ce662-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=viktK3nO5b", "openreview": "https://openreview.net/forum?id=viktK3nO5b", "poster": "/media/PosterPDFs/NeurIPS%202023/73427.png?t=1697459354.1696136", "slides": "https://nips.cc/virtual/2023/poster/73427", "video": "https://nips.cc/virtual/2023/poster/73427", "author_site": "Shuzheng Si, Wentao Ma, Haoyu Gao, Yuchuan Wu, Ting-En Lin, Yinpei Dai, Hangyu Li, Rui Yan, Fei Huang, Yongbin Li", "tldr": "", "abstract": "Task-oriented dialogue (TOD) models have made significant progress in recent years. However, previous studies primarily focus on datasets written by annotators, which has resulted in a gap between academic research and real-world spoken con- versation scenarios. While several small-scale spoken TOD datasets are proposed to address robustness issues such as ASR errors, they ignore the unique challenges in spoken conversation. To tackle the limitations, we introduce SpokenWOZ, a large-scale speech-text dataset for spoken TOD, containing 8 domains, 203k turns, 5.7k dialogues and 249 hours of audios from human-to-human spoken conversations. SpokenWOZ further incorporates common spoken characteristics such as word-by-word processing and reasoning in spoken language. Based on these characteristics, we present cross-turn slot and reasoning slot detection as new challenges. We conduct experiments on various baselines, including text-modal models, newly proposed dual-modal models, and LLMs, e.g., ChatGPT. The results show that the current models still have substantial room for improvement in spoken conversation, where the most advanced dialogue state tracker only achieves 25.65% in joint goal accuracy and the SOTA end-to-end model only correctly completes the user request in 52.1% of dialogues. Our dataset, code, and leaderboard are available at https://spokenwoz.github.io/SpokenWOZ-github.io/.", "keywords": "Task-oriented Dialogue;Spoken Conversation;Dialogue State Tracking;Response Generation;Natural Language Processing", "primary_area": "", "supplementary_material": "/attachment/aba0a313481c83a8a73323cfe9124f37a27a3936.pdf", "author": "Shuzheng Si;Wentao Ma;Haoyu Gao;Yuchuan Wu;Ting-En Lin;Yinpei Dai;Hangyu Li;Rui Yan;Fei Huang;Yongbin Li", "authorids": "~Shuzheng_Si1;~Wentao_Ma1;~Haoyu_Gao2;~Yuchuan_Wu1;~Ting-En_Lin1;~Yinpei_Dai1;~Hangyu_Li4;~Rui_Yan2;~Fei_Huang1;~Yongbin_Li2", "gender": "M;M;M;M;M;M;M;M;M;M", "homepage": ";;;;https://yinpeidai.github.io/;https://gsai.ruc.edu.cn/english/ruiyan;https://yongbin-li.github.io/;https://sites.google.com/view/fei-huang;;https://tnlin.github.io/", "dblp": "324/3680;39/8088;;87/8595;209/9564;19/2405-1;;h/FeiHuang.html;;242/8515", "google_scholar": "https://scholar.google.com.hk/citations?user=zO2XyZUAAAAJ;yJ6YqE4AAAAJ;;y-cwb-MAAAAJ;EzAk5DUAAAAJ;eLw6g-UAAAAJ;xF5VrokAAAAJ;9r98PpoAAAAJ;RaI5-4kAAAAJ;XNdFVMAAAAAJ", "orcid": ";;0009-0009-3187-2737;;;0000-0002-3356-6823;;;;", "linkedin": ";;;;;;;fei-huang-cas-cmu;;", "or_profile": "~Shuzheng_Si1;~Wentao_Ma1;~Haoyu_Gao2;~Yuchuan_Wu1;~Yinpei_Dai1;~Rui_Yan2;~Yongbin_Li2;~Fei_Huang2;~Li_Hangyu1;~Tony_Lin1", "aff": "Peking University;Alibaba Group;University of Science and Technology of China;Alibaba Group;University of Michigan - Ann Arbor;Renmin University of China;Alibaba Group;Alibaba Group US;;Alibaba Group", "aff_domain": "pku.edu.cn;alibaba-inc.com;ustc.edu.cn;alibaba-inc.com;umich.edu;ruc.edu.cn;alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com", "position": "MS student;Researcher;MS student;Researcher;PhD student;Associate Professor;Researcher;Senior Research Director;;Researcher", "bibtex": "@inproceedings{\nsi2023spokenwoz,\ntitle={Spoken{WOZ}: A Large-Scale Speech-Text Benchmark for Spoken Task-Oriented Dialogue Agents},\nauthor={Shuzheng Si and Wentao Ma and Haoyu Gao and Yuchuan Wu and Ting-En Lin and Yinpei Dai and Hangyu Li and Rui Yan and Fei Huang and Yongbin Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=viktK3nO5b}\n}", "github": "", "project": "", "reviewers": "F6kt;bThz;cbUA;QaYG;fz7a", "pdf_size": 1226730, "rating": "5;6;7;9;9", "confidence": "4;3;4;4;4", "wc_summary_and_contributions": "57;93;52;66;128", "wc_strengths": "44;26;25;90;207", "wc_improvement": "190;106;70;102;73", "wc_limitations": "1;1;1;33;12", "wc_correctness": "6;1;1;85;14", "wc_clarity": "7;6;1;59;6", "wc_relation_to_prior_work": "1;1;1;89;71", "wc_documentation": "1;1;1;47;40", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "308;236;153;572;552", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "787;760;368;75;92", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.2, 1.6 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 79.2, 28.20921835145384 ], "wc_strengths_avg": [ 78.4, 68.488247166941 ], "wc_improvement_avg": [ 108.2, 43.43454846087386 ], "wc_limitations_avg": [ 9.6, 12.451505933018705 ], "wc_correctness_avg": [ 21.4, 32.15338240372232 ], "wc_clarity_avg": [ 15.8, 21.70161284328886 ], "wc_relation_to_prior_work_avg": [ 32.6, 39.11828217087248 ], "wc_documentation_avg": [ 18.0, 20.938003725283842 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 364.2, 168.90754867678353 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 416.4, 309.6892636175817 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.37500000000000006, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=556654392282644561&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;alibaba-inc.com;ustc.edu.cn;alibaba-inc.com;umich.edu;ruc.edu.cn;alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com", "author_num": 10, "aff_unique_index": "0;1;2;1;3;4;1;1;1", "aff_unique_norm": "Peking University;Alibaba Group;University of Science and Technology of China;University of Michigan;Renmin University of China", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.alibaba.com;http://www.ustc.edu.cn;https://www.umich.edu;http://www.ruc.edu.cn", "aff_unique_abbr": "Peking U;Alibaba;USTC;UM;RUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0;0;1;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "A Novel Approach for Effective Multi-View Clustering with Information-Theoretic Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70095", "id": "vlDbqzwczj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8c64bc3f7796d31caa7c3e6b969bf7da-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vlDbqzwczj", "openreview": "https://openreview.net/forum?id=vlDbqzwczj", "poster": "/media/PosterPDFs/NeurIPS%202023/70095.png?t=1698048664.3767984", "slides": "https://nips.cc/virtual/2023/poster/70095", "video": "https://nips.cc/virtual/2023/poster/70095", "author_site": "Chenhang Cui, Yazhou Ren, Jingyu Pu, Jiawei Li, Xiaorong Pu, Tianyi Wu, Yutao Shi, Lifang He", "tldr": "", "abstract": "Multi-view clustering (MVC) is a popular technique for improving clustering performance using various data sources. However, existing methods primarily focus on acquiring consistent information while often neglecting the issue of redundancy across multiple views.\nThis study presents a new approach called Sufficient Multi-View Clustering (SUMVC) that examines the multi-view clustering framework from an information-theoretic standpoint. Our proposed method consists of two parts. Firstly, we develop a simple and reliable multi-view clustering method SCMVC (simple consistent multi-view clustering) that employs variational analysis to generate consistent information. Secondly, we propose a sufficient representation lower bound to enhance consistent information and minimise unnecessary information among views. The proposed SUMVC method offers a promising solution to the problem of multi-view clustering and provides a new perspective for analyzing multi-view data. \n To verify the effectiveness of our model, we conducted a theoretical analysis based on the Bayes Error Rate, and experiments on multiple multi-view datasets demonstrate the superior performance of SUMVC.", "keywords": "multi-view learning\uff0c clustering", "primary_area": "", "supplementary_material": "/attachment/30d7c35972c8d2038f3c605985cff53c802f132c.zip", "author": "Chenhang Cui;Yazhou Ren;Jingyu Pu;Jiawei Li;Xiaorong Pu;Tianyi Wu;Yutao Shi;Lifang He", "authorids": "~Chenhang_Cui1;~Yazhou_Ren1;~Jingyu_Pu1;~Jiawei_Li14;~Xiaorong_Pu1;~Tianyi_Wu6;~Yutao_Shi1;~Lifang_He1", "gender": "M;M;M;;F;M;M;F", "homepage": "https://gzcch.github.io;https://yazhou-ren.github.io/;;https://github.com/LzayJW/LazyJW.github.io;https://yjsjy.uestc.edu.cn/gmis/jcsjgl/dsfc/dsgrjj/10368?yxsh=08;https://github.com/TianyiWu233;https://github.com/syt59421;https://engineering.lehigh.edu/faculty/lifang-he", "dblp": "347/3407;157/2928;;;74/6232.html;;;129/8146", "google_scholar": "V5X1gdAAAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.com.sg/citations?hl=zh-CN;;;obgTcyoAAAAJ", "orcid": ";;;;0000-0001-7387-7194;;;0000-0001-7810-9071", "linkedin": ";;;;;;;", "or_profile": "~Chenhang_Cui1;~Yazhou_Ren1;~Jingyu_Pu1;~Jiawei_Li14;~Xiaorong_Pu1;~Tianyi_Wu6;~Yutao_Shi1;~Lifang_He1", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;Lehigh University", "aff_domain": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;lehigh.edu", "position": "Undergrad student;Associate Professor;MS student;Undergrad student;Full Professor;Undergrad student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\ncui2023a,\ntitle={A Novel Approach for Effective Multi-View Clustering with Information-Theoretic Perspective},\nauthor={Chenhang Cui and Yazhou Ren and Jingyu Pu and Jiawei Li and Xiaorong Pu and Tianyi Wu and Yutao Shi and Lifang He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vlDbqzwczj}\n}", "github": "", "project": "", "reviewers": "XaYq;ugfB;79CP;89XX;NC94", "pdf_size": 851866, "rating": "6;6;6;7;7", "confidence": "4;4;5;4;4", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "123;83;45;61;99", "wc_strengths": "71;63;51;48;78", "wc_weaknesses": "193;37;116;64;166", "wc_questions": "25;49;3;2;480", "wc_limitations": "9;9;1;9;60", "wc_review": "421;241;216;184;883", "wc_reply_reviewers": "54;17;37;0;179", "wc_reply_authors": "0;0;0;0;264", "reply_reviewers": "1;1;1;0;2", "reply_authors": "1;1;1;1;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.2, 27.498363587675538 ], "wc_strengths_avg": [ 62.2, 11.443775600735973 ], "wc_weaknesses_avg": [ 115.2, 58.942005395133954 ], "wc_questions_avg": [ 111.8, 184.9014872844456 ], "wc_limitations_avg": [ 17.6, 21.42521878534733 ], "wc_review_avg": [ 389.0, 260.35283751094397 ], "wc_reply_reviewers_avg": [ 57.4, 63.46841734280129 ], "wc_reply_authors_avg": [ 52.8, 105.6 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9522977779144160500&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;lehigh.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0;1", "aff_unique_norm": "University of Electronic Science and Technology of China;Lehigh University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uestc.edu.cn;https://www.lehigh.edu", "aff_unique_abbr": "UESTC;Lehigh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "On the Overlooked Pitfalls of Weight Decay and How to Mitigate Them: A Gradient-Norm Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70094", "id": "vnGcubtzR1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/040d3b6af368bf71f952c18da5713b48-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vnGcubtzR1", "openreview": "https://openreview.net/forum?id=vnGcubtzR1", "poster": "/media/PosterPDFs/NeurIPS%202023/70094.png?t=1697953314.7249548", "slides": "https://nips.cc/virtual/2023/poster/70094", "video": "https://nips.cc/virtual/2023/poster/70094", "author_site": "Zeke Xie, Zhiqiang Xu, Jingzhao Zhang, Issei Sato, Masashi Sugiyama", "tldr": "", "abstract": "Weight decay is a simple yet powerful regularization technique that has been very widely used in training of deep neural networks (DNNs). While weight decay has attracted much attention, previous studies fail to discover some overlooked pitfalls on large gradient norms resulted by weight decay. In this paper, we discover that, weight decay can unfortunately lead to large gradient norms at the final phase (or the terminated solution) of training, which often indicates bad convergence and poor generalization. To mitigate the gradient-norm-centered pitfalls, we present the first practical scheduler for weight decay, called the Scheduled Weight Decay (SWD) method that can dynamically adjust the weight decay strength according to the gradient norm and significantly penalize large gradient norms during training. Our experiments also support that SWD indeed mitigates large gradient norms and often significantly outperforms the conventional constant weight decay strategy for Adaptive Moment Estimation (Adam).", "keywords": "Weight Decay;Regularization;Optimization;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/9ed756e223b9b813ce27f98068dca985db16bfa7.pdf", "author": "Zeke Xie;zhiqiang xu;Jingzhao Zhang;Issei Sato;Masashi Sugiyama", "authorids": "~Zeke_Xie1;~zhiqiang_xu1;~Jingzhao_Zhang2;~Issei_Sato2;~Masashi_Sugiyama1", "gender": "M;M;M;;M", "homepage": "https://sites.google.com/view/zeke-xie;https://scholar.google.com/citations?user=0R20iBMAAAAJ&hl=en;https://sites.google.com/view/jingzhao/home;https://www.ml.is.s.u-tokyo.ac.jp/issei-sato-en;http://www.ms.k.u-tokyo.ac.jp/sugi/", "dblp": "210/1039;72/51-3.html;220/5559;;35/1228", "google_scholar": "https://scholar.google.co.jp/citations?user=ysXmZCMAAAAJ;;8NudxYsAAAAJ;;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ", "orcid": ";0000-0002-5693-8933;;;0000-0001-6658-6743", "linkedin": ";;;;", "or_profile": "~Zeke_Xie1;~zhiqiang_xu1;~Jingzhao_Zhang2;~Issei_Sato2;~Masashi_Sugiyama1", "aff": "Baidu;Mohamed bin Zayed University of Artificial Intelligence;Tsinghua University;The University of Tokyo;The University of Tokyo", "aff_domain": "baidu.com;mbzuai.ac.ae;mail.tsinghua.edu.cn;u-tokyo.ac.jp;u-tokyo.ac.jp", "position": "Researcher;Assistant Professor;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxie2023on,\ntitle={On the Overlooked Pitfalls of Weight Decay and How to Mitigate Them: A Gradient-Norm Perspective},\nauthor={Zeke Xie and zhiqiang xu and Jingzhao Zhang and Issei Sato and Masashi Sugiyama},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vnGcubtzR1}\n}", "github": "", "project": "", "reviewers": "xE1B;3dfT;e7t4;tfGK", "pdf_size": 1081326, "rating": "4;6;6;7", "confidence": "4;3;3;4", "soundness": "2;3;3;3", "novelty": "1;3;3;3", "presentation": "2;2;3;4", "wc_summary": "75;99;575;42", "wc_strengths": "36;60;38;56", "wc_weaknesses": "342;59;398;55", "wc_questions": "10;18;9;2", "wc_limitations": "2;2;2;15", "wc_review": "465;238;1022;170", "wc_reply_reviewers": "261;11;177;0", "wc_reply_authors": "517;0;636;0", "reply_reviewers": "1;1;4;0", "reply_authors": "3;1;4;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 197.75, 218.74342847272007 ], "wc_strengths_avg": [ 47.5, 10.618380290797651 ], "wc_weaknesses_avg": [ 213.5, 157.7537638219767 ], "wc_questions_avg": [ 9.75, 5.673402858955108 ], "wc_limitations_avg": [ 5.25, 5.629165124598851 ], "wc_review_avg": [ 473.75, 334.8494997756455 ], "wc_reply_reviewers_avg": [ 112.25, 110.87239286675471 ], "wc_reply_authors_avg": [ 288.25, 291.3042867861714 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2294157338705618, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13849448887806829669&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 7, "email": "baidu.com;mbzuai.ac.ae;mail.tsinghua.edu.cn;u-tokyo.ac.jp;u-tokyo.ac.jp", "author_num": 5, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Baidu;Mohamed bin Zayed University of Artificial Intelligence;Tsinghua University;University of Tokyo", "aff_unique_dep": "Baidu, Inc.;;;", "aff_unique_url": "https://www.baidu.com;https://mbzuai.ac.ae;https://www.tsinghua.edu.cn;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "Baidu;MBZUAI;THU;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;2", "aff_country_unique": "China;United Arab Emirates;Japan" }, { "title": "Higher-Order Uncoupled Dynamics Do Not Lead to Nash Equilibrium - Except When They Do", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70093", "id": "vnTUuecp2v", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/605e02ae04cba1ebf6a08206299e76b9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vnTUuecp2v", "openreview": "https://openreview.net/forum?id=vnTUuecp2v", "poster": "/media/PosterPDFs/NeurIPS%202023/70093.png?t=1701977046.1368144", "slides": "https://nips.cc/virtual/2023/poster/70093", "video": "https://nips.cc/virtual/2023/poster/70093", "author_site": "Sarah Toonsi, Jeff Shamma", "tldr": "", "abstract": "The framework of multi-agent learning explores the dynamics of how an agent's strategies evolve in response to the evolving strategies of other agents. Of particular interest is whether or not agent strategies converge to well known solution concepts such as Nash Equilibrium (NE). In \"higher order'' learning, agent dynamics include auxiliary states that can capture phenomena such as path dependencies. We introduce higher-order gradient play dynamics that resemble projected gradient ascent with auxiliary states. The dynamics are \"payoff based'' and \"uncoupled'' in that each agent's dynamics depend on its own evolving payoff and has no explicit dependence on the utilities of other agents. We first show that for any specific game with an isolated completely mixed-strategy NE, there exist higher-order gradient play dynamics that lead (locally) to that NE, both for the specific game and nearby games with perturbed utility functions. Conversely, we show that for any higher-order gradient play dynamics, there exists a game with a unique isolated completely mixed-strategy NE for which the dynamics do not lead to NE. Finally, we show that convergence to the mixed-strategy equilibrium in coordination games, comes at the expense of the dynamics being inherently internally unstable.", "keywords": "Learning in games;Nash equilibrium;Uncoupled Dynamics", "primary_area": "", "supplementary_material": "", "author": "Sarah Asad Toonsi;Jeff S Shamma", "authorids": "~Sarah_Asad_Toonsi1;~Jeff_S_Shamma1", "gender": "F;", "homepage": ";https://ise.illinois.edu/directory/profile/jshamma", "dblp": ";", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;ixE1z7UAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Sarah_Asad_Toonsi1;~Jeff_S_Shamma1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "uiuc.edu;illinois.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ntoonsi2023higherorder,\ntitle={Higher-Order Uncoupled Dynamics Do Not Lead to Nash Equilibrium - Except When They Do},\nauthor={Sarah Asad Toonsi and Jeff S Shamma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vnTUuecp2v}\n}", "github": "", "project": "", "reviewers": "tNmK;mLZz;VfQG;ZZgJ;f6g4", "pdf_size": 710750, "rating": "5;6;6;7;8", "confidence": "2;2;4;2;3", "soundness": "3;3;3;3;4", "novelty": "3;2;3;4;4", "presentation": "2;3;3;2;4", "wc_summary": "202;92;123;226;87", "wc_strengths": "55;33;35;103;317", "wc_weaknesses": "156;140;27;98;9", "wc_questions": "254;45;30;49;16", "wc_limitations": "2;10;33;15;5", "wc_review": "669;320;248;491;434", "wc_reply_reviewers": "157;55;106;30;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 2.6, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 146.0, 57.3794388261161 ], "wc_strengths_avg": [ 108.6, 107.20746242682922 ], "wc_weaknesses_avg": [ 86.0, 58.9406481131655 ], "wc_questions_avg": [ 78.8, 88.37510961803667 ], "wc_limitations_avg": [ 13.0, 10.936178491593852 ], "wc_review_avg": [ 432.4, 145.6387311122972 ], "wc_reply_reviewers_avg": [ 69.6, 55.837621725857915 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.19611613513818402, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9588390913930271732&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uiuc.edu;illinois.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Conditional score-based diffusion models for Bayesian inference in infinite dimensions", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70092", "id": "voG6nEW9BV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4c79c359b3c5f077c0b955f93cb0f53e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=voG6nEW9BV", "openreview": "https://openreview.net/forum?id=voG6nEW9BV", "poster": "/media/PosterPDFs/NeurIPS%202023/70092.png?t=1699575759.82404", "slides": "https://nips.cc/virtual/2023/poster/70092", "video": "https://nips.cc/virtual/2023/poster/70092", "author_site": "Lorenzo Baldassari, Ali Siahkoohi, Josselin Garnier, Knut Solna, Maarten V. de Hoop", "tldr": "", "abstract": "Since their initial introduction, score-based diffusion models (SDMs) have been successfully applied to solve a variety of linear inverse problems in finite-dimensional vector spaces due to their ability to efficiently approximate the posterior distribution. However, using SDMs for inverse problems in infinite-dimensional function spaces has only been addressed recently, primarily through methods that learn the unconditional score. While this approach is advantageous for some inverse problems, it is mostly heuristic and involves numerous computationally costly forward operator evaluations during posterior sampling. To address these limitations, we propose a theoretically grounded method for sampling from the posterior of infinite-dimensional Bayesian linear inverse problems based on amortized conditional SDMs. In particular, we prove that one of the most successful approaches for estimating the conditional score in finite dimensions\u2014the conditional denoising estimator\u2014can also be applied in infinite dimensions. A significant part of our analysis is dedicated to demonstrating that extending infinite-dimensional SDMs to the conditional setting requires careful consideration, as the conditional score typically blows up for small times, contrarily to the unconditional score. We conclude by presenting stylized and large-scale numerical examples that validate our approach, offer additional insights, and demonstrate that our method enables large-scale, discretization-invariant Bayesian inference.", "keywords": "score-based generative models;diffusion models;inverse problems;bayesian inference;infinite dimensions", "primary_area": "", "supplementary_material": "", "author": "Lorenzo Baldassari;Ali Siahkoohi;Josselin Garnier;Knut Solna;Maarten V. de Hoop", "authorids": "~Lorenzo_Baldassari1;~Ali_Siahkoohi1;~Josselin_Garnier1;~Knut_Solna1;~Maarten_V._de_Hoop2", "gender": ";M;M;M;", "homepage": ";https://alisiahkoohi.github.io;https://www.josselin-garnier.org;https://www.math.uci.edu/~ksolna/;http://maartendehoop.rice.edu/", "dblp": "267/1949;00/10956;63/3241;;60/4525", "google_scholar": "3MHSGawAAAAJ;sxRMqYIAAAAJ;nJDy0O8AAAAJ;https://scholar.google.com/citations?view_op=new_articles;", "orcid": "0000-0002-1912-2089;0000-0001-8779-2247;;0000-0002-8508-8288;", "linkedin": ";alisiahkoohi/;;;", "or_profile": "~Lorenzo_Baldassari1;~Ali_Siahkoohi1;~Josselin_Garnier1;~Knut_Solna1;~Maarten_v._de_Hoop1", "aff": "Rice University;Rice University;Ecole polytechnique;University of California, Irvine;Rice University", "aff_domain": "rice.edu;rice.edu;polytechnique.edu;uci.edu;rice.edu", "position": "Postdoc;Postdoc;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nbaldassari2023conditional,\ntitle={Conditional score-based diffusion models for Bayesian inference in infinite dimensions},\nauthor={Lorenzo Baldassari and Ali Siahkoohi and Josselin Garnier and Knut Solna and Maarten V. de Hoop},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=voG6nEW9BV}\n}", "github": "", "project": "", "reviewers": "WxYS;pjkQ;XSPZ;Q2NB;hg8t", "pdf_size": 32697651, "rating": "6;6;7;8;8", "confidence": "3;4;2;3;3", "soundness": "3;3;3;3;4", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "60;29;359;32;55", "wc_strengths": "55;76;15;57;81", "wc_weaknesses": "157;57;908;25;30", "wc_questions": "32;118;189;38;113", "wc_limitations": "2;1;16;61;44", "wc_review": "306;281;1487;213;323", "wc_reply_reviewers": "11;18;22;12;23", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 0.8944271909999159 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 107.0, 126.59067896176244 ], "wc_strengths_avg": [ 56.8, 23.25854681617061 ], "wc_weaknesses_avg": [ 235.4, 339.65311716514543 ], "wc_questions_avg": [ 98.0, 58.0723686446489 ], "wc_limitations_avg": [ 24.8, 23.84449622030208 ], "wc_review_avg": [ 522.0, 483.95123721300683 ], "wc_reply_reviewers_avg": [ 17.2, 4.955804677345547 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.35355339059327373, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7351420581316863264&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "email": "rice.edu;rice.edu;polytechnique.edu;uci.edu;rice.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Rice University;Ecole Polytechnique;University of California, Irvine", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rice.edu;https://www.polytechnique.edu;https://www.uci.edu", "aff_unique_abbr": "Rice;X;UCI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Irvine", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;France" }, { "title": "Combinatorial Optimization with Policy Adaptation using Latent Space Search", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70091", "id": "vpMBqdt9Hl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/18d3a2f3068d6c669dcae19ceca1bc24-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vpMBqdt9Hl", "openreview": "https://openreview.net/forum?id=vpMBqdt9Hl", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70091", "video": "https://nips.cc/virtual/2023/poster/70091", "author_site": "Felix Chalumeau, Shikha Surana, Cl\u00e9ment Bonnet, Nathan Grinsztajn, Arnu Pretorius, Alexandre Laterre, Tom Barrett", "tldr": "", "abstract": "Combinatorial Optimization underpins many real-world applications and yet, designing performant algorithms to solve these complex, typically NP-hard, problems remains a significant research challenge. Reinforcement Learning (RL) provides a versatile framework for designing heuristics across a broad spectrum of problem domains. However, despite notable progress, RL has not yet supplanted industrial solvers as the go-to solution. Current approaches emphasize pre-training heuristics that construct solutions, but often rely on search procedures with limited variance, such as stochastically sampling numerous solutions from a single policy, or employing computationally expensive fine-tuning of the policy on individual problem instances. Building on the intuition that performant search at inference time should be anticipated during pre-training, we propose COMPASS, a novel RL approach that parameterizes a distribution of diverse and specialized policies conditioned on a continuous latent space. We evaluate COMPASS across three canonical problems - Travelling Salesman, Capacitated Vehicle Routing, and Job-Shop Scheduling - and demonstrate that our search strategy (i) outperforms state-of-the-art approaches in 9 out of 11 standard benchmarking tasks and (ii) generalizes better, surpassing all other approaches on a set of 18 procedurally transformed instance distributions.", "keywords": "Reinforcement Learning;Combinatorial Optimization;TSP;CVRP;JSSP", "primary_area": "", "supplementary_material": "/attachment/4749543510517cadea029233203035305cfbc1fe.zip", "author": "Felix Chalumeau;Shikha Surana;Cl\u00e9ment Bonnet;Nathan Grinsztajn;Arnu Pretorius;Alexandre Laterre;Thomas D Barrett", "authorids": "~Felix_Chalumeau1;~Shikha_Surana1;~Cl\u00e9ment_Bonnet1;~Nathan_Grinsztajn1;~Arnu_Pretorius1;~Alexandre_Laterre1;~Thomas_D_Barrett1", "gender": "M;F;M;M;M;M;M", "homepage": ";;;https://nathangrinsztajn.github.io/;;;", "dblp": "286/1636;;305/3524;;188/4368;223/4200;248/8263", "google_scholar": "YAC6ZzIAAAAJ;;H6euRhAAAAAJ;yVHIYEYAAAAJ;zZ6ydrAAAAAJ;HrMSaicAAAAJ;nJa1KGIAAAAJ", "orcid": ";;;0000-0001-6817-5972;;;0000-0001-6241-3028", "linkedin": "f%C3%A9lix-chalumeau-083457172/;shikha-surana/;clement-bonnet16/;nathan-grinsztajn-960379139/?locale=en_US;arnupretorius/;reinforce/;tom-barrett-62b180a2/", "or_profile": "~Felix_Chalumeau1;~Shikha_Surana1;~Cl\u00e9ment_Bonnet1;~Nathan_Grinsztajn1;~Arnu_Pretorius1;~Alexandre_Laterre1;~Thomas_D_Barrett1", "aff": "InstaDeep;InstaDeep;InstaDeep;InstaDeep;InstaDeep;InstaDeep;InstaDeep", "aff_domain": "instadeep.com;instadeep.com;instadeep.com;instadeep.com;instadeep.com;instadeep.com;instadeep.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;head of research;Researcher", "bibtex": "@inproceedings{\nchalumeau2023combinatorial,\ntitle={Combinatorial Optimization with Policy Adaptation using Latent Space Search},\nauthor={Felix Chalumeau and Shikha Surana and Cl{\\'e}ment Bonnet and Nathan Grinsztajn and Arnu Pretorius and Alexandre Laterre and Thomas D Barrett},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vpMBqdt9Hl}\n}", "github": "", "project": "", "reviewers": "GNoz;ubWG;Qr8r;YBD4", "pdf_size": 1655214, "rating": "5;6;6;6", "confidence": "4;5;4;4", "soundness": "2;3;4;3", "novelty": "3;3;3;2", "presentation": "2;3;4;3", "wc_summary": "45;131;92;124", "wc_strengths": "34;86;48;83", "wc_weaknesses": "103;121;82;657", "wc_questions": "49;89;104;0", "wc_limitations": "1;10;1;11", "wc_review": "232;437;327;875", "wc_reply_reviewers": "142;124;13;474", "wc_reply_authors": "381;406;0;951", "reply_reviewers": "2;2;1;2", "reply_authors": "2;2;1;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 98.0, 33.94849039353591 ], "wc_strengths_avg": [ 62.75, 22.331312097590683 ], "wc_weaknesses_avg": [ 240.75, 240.7180664179571 ], "wc_questions_avg": [ 60.5, 40.30198506277327 ], "wc_limitations_avg": [ 5.75, 4.763139720814412 ], "wc_review_avg": [ 467.75, 246.06236506219312 ], "wc_reply_reviewers_avg": [ 188.25, 172.21552630352468 ], "wc_reply_authors_avg": [ 434.5, 338.8351369028897 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16321855650961393806&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "instadeep.com;instadeep.com;instadeep.com;instadeep.com;instadeep.com;instadeep.com;instadeep.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "InstaDeep", "aff_unique_dep": "", "aff_unique_url": "https://www.instadeep.com", "aff_unique_abbr": "InstaDeep", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "TransHP: Image Classification with Hierarchical Prompting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70090", "id": "vpQuCsZXz2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/59b7c1e1716c4feadefd6c70b1dd4630-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vpQuCsZXz2", "openreview": "https://openreview.net/forum?id=vpQuCsZXz2", "poster": "/media/PosterPDFs/NeurIPS%202023/70090.png?t=1697986992.8782175", "slides": "https://nips.cc/virtual/2023/poster/70090", "video": "https://nips.cc/virtual/2023/poster/70090", "author_site": "Wenhao Wang, Yifan Sun, Wei Li, Yi Yang", "tldr": "", "abstract": "This paper explores a hierarchical prompting mechanism for the hierarchical image classification (HIC) task. Different from prior HIC methods, our hierarchical prompting is the first to explicitly inject ancestor-class information as a tokenized hint that benefits the descendant-class discrimination. We think it well imitates human visual recognition, i.e., humans may use the ancestor class as a prompt to draw focus on the subtle differences among descendant classes. We model this prompting mechanism into a Transformer with Hierarchical Prompting (TransHP). TransHP consists of three steps: 1) learning a set of prompt tokens to represent the coarse (ancestor) classes, 2) on-the-fly predicting the coarse class of the input image at an intermediate block, and 3) injecting the prompt token of the predicted coarse class into the intermediate feature. Though the parameters of TransHP maintain the same for all input images, the injected coarse-class prompt conditions (modifies) the subsequent feature extraction and encourages a dynamic focus on relatively subtle differences among the descendant classes. Extensive experiments show that TransHP improves image classification on accuracy (e.g., improving ViT-B/16 by +2.83% ImageNet classification accuracy), training data efficiency (e.g., +12.69% improvement under 10% ImageNet training data), and model explainability. Moreover, TransHP also performs favorably against prior HIC methods, showing that TransHP well exploits the hierarchical information. The code is available at: https://github.com/WangWenhao0716/TransHP.", "keywords": "hierarchical image classification;hierarchical prompting;vision transformer", "primary_area": "", "supplementary_material": "/attachment/1861c91b01cc030eb8f0ffb99bf2aaf0bf810517.zip", "author": "Wenhao Wang;Yifan Sun;Wei Li;Yi Yang", "authorids": "~Wenhao_Wang2;~Yifan_Sun2;~Wei_Li55;~Yi_Yang22", "gender": "M;M;M;M", "homepage": "http://wangwenhao0716.github.io/;https://yifansun-reid.github.io;https://github.com/lw-2018;https://person.zju.edu.cn/yiyang", "dblp": ";99/10261-3.html;;33/4854-1.html", "google_scholar": "k3mq3XMAAAAJ;uUZEL7UAAAAJ;hDubMJwAAAAJ;RMSuNFwAAAAJ", "orcid": "0000-0001-8727-1572;0000-0003-3532-6521;;", "linkedin": ";;;", "or_profile": "~Wenhao_Wang2;~Yifan_Sun2;~Wei_Li55;~Yi_Yang22", "aff": "University of Technology Sydney;Baidu;National University of Singapore;Zhejiang University", "aff_domain": "uts.edu.au;baidu.com;nus.edu;zju.edu.cn", "position": "PhD student;Senior Expert;Intern;Full Professor", "bibtex": "@inproceedings{\nwang2023transhp,\ntitle={Trans{HP}: Image Classification with Hierarchical Prompting},\nauthor={Wenhao Wang and Yifan Sun and Wei Li and Yi Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vpQuCsZXz2}\n}", "github": "", "project": "", "reviewers": "9axF;e5cs;rPvW;Y5zG;7VFy", "pdf_size": 1170307, "rating": "4;5;5;5;6", "confidence": "3;4;5;4;4", "soundness": "3;2;3;2;3", "novelty": "2;2;3;2;2", "presentation": "3;3;3;3;3", "wc_summary": "63;74;85;54;289", "wc_strengths": "46;31;46;45;50", "wc_weaknesses": "135;328;453;110;92", "wc_questions": "11;12;23;39;114", "wc_limitations": "7;9;18;31;12", "wc_review": "262;454;625;279;557", "wc_reply_reviewers": "0;141;142;12;55", "wc_reply_authors": "0;453;213;0;18", "reply_reviewers": "0;2;1;1;1", "reply_authors": "1;3;2;1;2", "rating_avg": [ 5.0, 0.6324555320336759 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 113.0, 88.61376868184763 ], "wc_strengths_avg": [ 43.6, 6.529931086925803 ], "wc_weaknesses_avg": [ 223.6, 142.54627318874387 ], "wc_questions_avg": [ 39.8, 38.446846424641905 ], "wc_limitations_avg": [ 15.4, 8.63944442658207 ], "wc_review_avg": [ 435.4, 145.33354740045397 ], "wc_reply_reviewers_avg": [ 70.0, 61.17842757050887 ], "wc_reply_authors_avg": [ 136.8, 177.38703447546553 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.49999999999999994, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13768413969255465211&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "uts.edu.au;baidu.com;nus.edu;zju.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Technology Sydney;Baidu;National University of Singapore;Zhejiang University", "aff_unique_dep": ";Baidu, Inc.;;", "aff_unique_url": "https://www.uts.edu.au;https://www.baidu.com;https://www.nus.edu.sg;https://www.zju.edu.cn", "aff_unique_abbr": "UTS;Baidu;NUS;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "Australia;China;Singapore" }, { "title": "Online PCA in Converging Self-consistent Field Equations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70089", "id": "vq11gurmUY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/969c14957c0df5ce2db642b3a5fa985c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vq11gurmUY", "openreview": "https://openreview.net/forum?id=vq11gurmUY", "poster": "/media/PosterPDFs/NeurIPS%202023/70089.png?t=1701569346.914151", "slides": "https://nips.cc/virtual/2023/poster/70089", "video": "https://nips.cc/virtual/2023/poster/70089", "author_site": "Xihan Li, Xiang Chen, Rasul Tutunov, Haitham Bou Ammar, Lei Wang, Jun Wang", "tldr": "", "abstract": "Self-consistent Field (SCF) equation is a type of nonlinear eigenvalue problem in which the matrix to be eigen-decomposed is a function of its own eigenvectors. It is of great significance in computational science for its connection to the Schr\u00f6dinger equation. Traditional fixed-point iteration methods for solving such equations suffer from non-convergence issues. In this work, we present a novel perspective on such SCF equations as a principal component analysis (PCA) for non-stationary time series, in which a distribution and its own top principal components are mutually updated over time, and the equilibrium state of the model corresponds to the solution of the SCF equations. By the new perspective, online PCA techniques are able to engage in so as to enhance the convergence of the model towards the equilibrium state, acting as a new set of tools for converging the SCF equations. With several numerical adaptations, we then develop a new algorithm for converging the SCF equation, and demonstrated its high convergence capacity with experiments on both synthesized and real electronic structure scenarios.", "keywords": "Self-consistent Field Equation;Computational Science;Online PCA", "primary_area": "", "supplementary_material": "/attachment/48a09f16be2046bba35d54437a304db1be58641e.pdf", "author": "Xihan Li;Xiang Chen;Rasul Tutunov;Haitham Bou Ammar;Lei Wang;Jun Wang", "authorids": "~Xihan_Li1;~Xiang_Chen8;~Rasul_Tutunov3;~Haitham_Bou_Ammar1;~Lei_Wang3;~Jun_Wang2", "gender": "M;;;M;M;M", "homepage": "https://snowkylin.github.io/;;;;http://wangleiphy.github.io/;http://www0.cs.ucl.ac.uk/staff/jun.wang/", "dblp": "81/4133-1.html;;;;;w/JunWang12", "google_scholar": "2Y-QNGEAAAAJ;2cj3OTIAAAAJ;Zcov4c4AAAAJ;https://scholar.google.co.uk/citations?user=AE5suDoAAAAJ;t4m9TCIAAAAJ;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ", "orcid": "0000-0002-7000-7983;;;;;", "linkedin": ";;;;;", "or_profile": "~Xihan_Li1;~Xiang_Chen8;~Rasul_Tutunov3;~Haitham_Bou_Ammar1;~Lei_Wang3;~Jun_Wang2", "aff": "University College London;Huawei Technologies Ltd.;;Huawei R&D UK;Chinese Academy of Sciences;University College London", "aff_domain": "ucl.ac.uk;huawei.com;;huawei.com;iphy.ac.cn;ucl.ac.uk", "position": "PhD student;Researcher;;Principal Researcher;Full Professor;Professor", "bibtex": "@inproceedings{\nli2023online,\ntitle={Online {PCA} in Converging Self-consistent Field Equations},\nauthor={Xihan Li and Xiang Chen and Rasul Tutunov and Haitham Bou Ammar and Lei Wang and Jun Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vq11gurmUY}\n}", "github": "", "project": "", "reviewers": "h8kr;uFr2;EC9k;myiv", "pdf_size": 537227, "rating": "5;6;6;7", "confidence": "3;4;3;3", "soundness": "2;3;2;3", "novelty": "3;2;2;3", "presentation": "3;3;3;3", "wc_summary": "237;14;64;40", "wc_strengths": "71;12;79;23", "wc_weaknesses": "292;26;18;77", "wc_questions": "76;20;7;107", "wc_limitations": "36;1;12;5", "wc_review": "712;73;180;252", "wc_reply_reviewers": "97;58;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.75, 87.39958523929046 ], "wc_strengths_avg": [ 46.25, 29.14939965076468 ], "wc_weaknesses_avg": [ 103.25, 111.29998876909198 ], "wc_questions_avg": [ 52.5, 40.77070026379238 ], "wc_limitations_avg": [ 13.5, 13.573871960498227 ], "wc_review_avg": [ 304.25, 243.87740260220912 ], "wc_reply_reviewers_avg": [ 38.75, 41.130128859511245 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YHoPBRDLokEJ:scholar.google.com/&scioq=Online+PCA+in+Converging+Self-consistent+Field+Equations&hl=en&as_sdt=0,5", "gs_version_total": 7, "email": "ucl.ac.uk;huawei.com;;huawei.com;iphy.ac.cn;ucl.ac.uk", "author_num": 6, "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "University College London;Huawei;Chinese Academy of Sciences", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.huawei.com;https://www.cas.cn", "aff_unique_abbr": "UCL;Huawei;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "United Kingdom;China" }, { "title": "Revisiting the Minimalist Approach to Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70088", "id": "vqGWslLeEw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/26cce1e512793f2072fd27c391e04652-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vqGWslLeEw", "openreview": "https://openreview.net/forum?id=vqGWslLeEw", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70088", "video": "https://nips.cc/virtual/2023/poster/70088", "author_site": "Denis Tarasov, Vladislav Kurenkov, Alexander Nikulin, Sergey Kolesnikov", "tldr": "", "abstract": "Recent years have witnessed significant advancements in offline reinforcement learning (RL), resulting in the development of numerous algorithms with varying degrees of complexity. While these algorithms have led to noteworthy improvements, many incorporate seemingly minor design choices that impact their effectiveness beyond core algorithmic advances. However, the effect of these design choices on established baselines remains understudied. In this work, we aim to bridge this gap by conducting a retrospective analysis of recent works in offline RL and propose ReBRAC, a minimalistic algorithm that integrates such design elements built on top of the TD3+BC method. We evaluate ReBRAC on 51 datasets with both proprioceptive and visual state spaces using D4RL and V-D4RL benchmarks, demonstrating its state-of-the-art performance among ensemble-free methods in both offline and offline-to-online settings. To further illustrate the efficacy of these design choices, we perform a large-scale ablation study and hyperparameter sensitivity analysis on the scale of thousands of experiments.", "keywords": "Offline Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/d9174b3807c5761e1c12211793334eee4eb17676.zip", "author": "Denis Tarasov;Vladislav Kurenkov;Alexander Nikulin;Sergey Kolesnikov", "authorids": "~Denis_Tarasov1;~Vladislav_Kurenkov1;~Alexander_Nikulin1;~Sergey_Kolesnikov1", "gender": ";M;M;M", "homepage": "https://dt6a.github.io/;https://vkurenkov.me;https://howuhh.github.io/;https://scitator.com", "dblp": "255/7697;251/9126;314/6349;191/1945", "google_scholar": "LQcCkD8AAAAJ;w09vtVsAAAAJ;yACvnqUAAAAJ;iukbpVEAAAAJ", "orcid": "0000-0001-9744-5265;0000-0003-4078-1086;;", "linkedin": "tarasovdeal/;;;scitator/", "or_profile": "~Denis_Tarasov1;~Vladislav_Kurenkov1;~Alexander_Nikulin1;~Sergey_Kolesnikov1", "aff": "Jacobs University Bremen;Tinkoff;Higher School of Economics, Higher School of Economics;Tinkoff", "aff_domain": "jacobs-university.de;tinkoff.ai;edu.hse.ru;tinkoff.ru", "position": "Undergrad student;Researcher;MS student;Principal Researcher", "bibtex": "@inproceedings{\ntarasov2023revisiting,\ntitle={Revisiting the Minimalist Approach to Offline Reinforcement Learning},\nauthor={Denis Tarasov and Vladislav Kurenkov and Alexander Nikulin and Sergey Kolesnikov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vqGWslLeEw}\n}", "github": "", "project": "", "reviewers": "DJeX;8F2Q;HdeU;7C4n", "pdf_size": 1029195, "rating": "5;6;7;7", "confidence": "4;2;5;4", "soundness": "3;3;4;4", "novelty": "2;3;3;3", "presentation": "3;3;4;4", "wc_summary": "27;77;71;58", "wc_strengths": "28;47;77;161", "wc_weaknesses": "59;245;33;36", "wc_questions": "59;10;293;65", "wc_limitations": "4;2;29;7", "wc_review": "177;381;503;327", "wc_reply_reviewers": "592;73;47;14", "wc_reply_authors": "1132;0;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 58.25, 19.30511590226798 ], "wc_strengths_avg": [ 78.25, 50.86931786450453 ], "wc_weaknesses_avg": [ 93.25, 88.18836374488417 ], "wc_questions_avg": [ 106.75, 109.62749427036997 ], "wc_limitations_avg": [ 10.5, 10.828203913853857 ], "wc_review_avg": [ 347.0, 117.03845521878696 ], "wc_reply_reviewers_avg": [ 181.5, 237.92278159100275 ], "wc_reply_authors_avg": [ 283.0, 490.17037854199225 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3458572319330373, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11180715780166906434&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "jacobs-university.de;tinkoff.ai;edu.hse.ru;tinkoff.ru", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Jacobs University;Tinkoff Bank;Higher School of Economics", "aff_unique_dep": ";;", "aff_unique_url": "https://www.jacobs-university.de;https://www.tinkoff.ru;https://www.hse.ru", "aff_unique_abbr": "JUB;Tinkoff;HSE", "aff_campus_unique_index": "0", "aff_campus_unique": "Bremen;", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Germany;Russian Federation" }, { "title": "The Geometry of Neural Nets' Parameter Spaces Under Reparametrization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70087", "id": "vtLNwa6uX0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/395371f778ebd4854b88521100af30ad-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vtLNwa6uX0", "openreview": "https://openreview.net/forum?id=vtLNwa6uX0", "poster": "/media/PosterPDFs/NeurIPS%202023/70087.png?t=1701706102.8707774", "slides": "https://nips.cc/virtual/2023/poster/70087", "video": "https://nips.cc/virtual/2023/poster/70087", "author_site": "Agustinus Kristiadi, Felix Dangel, Philipp Hennig", "tldr": "", "abstract": "Model reparametrization, which follows the change-of-variable rule of calculus, is a popular way to improve the training of neural nets. But it can also be problematic since it can induce inconsistencies in, e.g., Hessian-based flatness measures, optimization trajectories, and modes of probability densities. This complicates downstream analyses: e.g. one cannot definitively relate flatness with generalization since arbitrary reparametrization changes their relationship. In this work, we study the invariance of neural nets under reparametrization from the perspective of Riemannian geometry. From this point of view, invariance is an inherent property of any neural net if one explicitly represents the metric and uses the correct associated transformation rules. This is important since although the metric is always present, it is often implicitly assumed as identity, and thus dropped from the notation, then lost under reparametrization. We discuss implications for measuring the flatness of minima, optimization, and for probability-density maximization. Finally, we explore some interesting directions where invariance is useful.", "keywords": "neural network;invariance;equivariance;reparametrization;riemannian geometry;parameter space", "primary_area": "", "supplementary_material": "/attachment/47246039bf5b108df6975a8525fbc542aa839dfb.pdf", "author": "Agustinus Kristiadi;Felix Dangel;Philipp Hennig", "authorids": "~Agustinus_Kristiadi1;~Felix_Dangel1;~Philipp_Hennig1", "gender": ";M;M", "homepage": "https://agustinus.kristia.de;https://f-dangel.com;http://mml.inf.uni-tuebingen.de", "dblp": "215/3954;236/4218;08/9077", "google_scholar": "_1qe2mYAAAAJ;9hlJ9W0AAAAJ;https://scholar.google.de/citations?user=UeG5w08AAAAJ", "orcid": "0000-0003-1615-1121;0000-0002-1414-8554;0000-0001-7293-6092", "linkedin": "agustinus-kristiadi/;;", "or_profile": "~Agustinus_Kristiadi1;~Felix_Dangel1;~Philipp_Hennig1", "aff": "University of Tuebingen;Max Planck Institute for Intelligent Systems, Max-Planck Institute;University of T\u00fcbingen", "aff_domain": "uni-tuebingen.de;tue.mpg.de;uni-tuebingen.de", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkristiadi2023the,\ntitle={The Geometry of Neural Nets' Parameter Spaces Under Reparametrization},\nauthor={Agustinus Kristiadi and Felix Dangel and Philipp Hennig},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vtLNwa6uX0}\n}", "github": "", "project": "", "reviewers": "Poha;ecDa;togv;LwSr", "pdf_size": 727983, "rating": "6;6;7;8", "confidence": "1;3;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;2;3;4", "wc_summary": "59;83;412;122", "wc_strengths": "70;85;120;41", "wc_weaknesses": "114;180;523;19", "wc_questions": "1;72;418;135", "wc_limitations": "6;21;22;9", "wc_review": "250;441;1495;326", "wc_reply_reviewers": "26;38;78;27", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 169.0, 142.08624141696478 ], "wc_strengths_avg": [ 79.0, 28.469281690973517 ], "wc_weaknesses_avg": [ 209.0, 190.1065490718297 ], "wc_questions_avg": [ 156.5, 158.24427319811608 ], "wc_limitations_avg": [ 14.5, 7.088723439378913 ], "wc_review_avg": [ 628.0, 505.1598756829366 ], "wc_reply_reviewers_avg": [ 42.25, 21.1704392963396 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.48420012470625223, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8679804273134544843&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 9, "email": "uni-tuebingen.de;tue.mpg.de;uni-tuebingen.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Tuebingen;Max Planck Institute for Intelligent Systems;University of T\u00fcbingen", "aff_unique_dep": ";Intelligent Systems;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.mpi-is.mpg.de;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;MPI-IS;Uni T\u00fcbingen", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Train Once, Get a Family: State-Adaptive Balances for Offline-to-Online Reinforcement Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70086", "id": "vtoY8qJjTR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9318763d049edf9a1f2779b2a59911d3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vtoY8qJjTR", "openreview": "https://openreview.net/forum?id=vtoY8qJjTR", "poster": "/media/PosterPDFs/NeurIPS%202023/70086.png?t=1701792948.470539", "slides": "https://nips.cc/virtual/2023/poster/70086", "video": "https://nips.cc/virtual/2023/poster/70086", "author_site": "Shenzhi Wang, Qisen Yang, Jiawei Gao, Matthieu Lin, HAO CHEN, Liwei Wu, Ning Jia, Shiji Song, Gao Huang", "tldr": "", "abstract": "Offline-to-online reinforcement learning (RL) is a training paradigm that combines pre-training on a pre-collected dataset with fine-tuning in an online environment. However, the incorporation of online fine-tuning can intensify the well-known distributional shift problem. Existing solutions tackle this problem by imposing a policy constraint on the policy improvement objective in both offline and online learning. They typically advocate a single balance between policy improvement and constraints across diverse data collections. This one-size-fits-all manner may not optimally leverage each collected sample due to the significant variation in data quality across different states. To this end, we introduce Family Offline-to-Online RL (FamO2O), a simple yet effective framework that empowers existing algorithms to determine state-adaptive improvement-constraint balances. FamO2O utilizes a universal model to train a family of policies with different improvement/constraint intensities, and a balance model to select a suitable policy for each state. Theoretically, we prove that state-adaptive balances are necessary for achieving a higher policy performance upper bound. Empirically, extensive experiments show that FamO2O offers a statistically significant improvement over various existing methods, achieving state-of-the-art performance on the D4RL benchmark. Codes are available at https://github.com/LeapLabTHU/FamO2O.", "keywords": "reinforcement learning;offline-to-online reinforcement learning;offline reinforcement learning;policy improvement;policy constraint", "primary_area": "", "supplementary_material": "", "author": "Shenzhi Wang;Qisen Yang;Jiawei Gao;Matthieu Gaetan Lin;HAO CHEN;Liwei Wu;Ning Jia;Shiji Song;Gao Huang", "authorids": "~Shenzhi_Wang1;~Qisen_Yang1;~Jiawei_Gao1;~Matthieu_Gaetan_Lin1;~HAO_CHEN41;~Liwei_Wu5;~Ning_Jia1;~Shiji_Song1;~Gao_Huang1", "gender": ";F;M;M;M;M;M;M;M", "homepage": ";https://qisen-yang.netlify.app/;https://gao-jiawei.com/;https://linyuhongg.github.io;;;https://www.dur.ac.uk/research/directory/staff/?mode=staff&id=16471;;http://www.gaohuang.net", "dblp": ";;124/9335-4;;;;;72/5351;", "google_scholar": ";VwblgV0AAAAJ;NJxUNrcAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=TYN-kpkAAAAJ;dg1JyaUAAAAJ;https://scholar.google.com/citations?view_op=list_works;;-P9LwcgAAAAJ", "orcid": ";0000-0002-2587-2660;;;;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Shenzhi_Wang1;~Qisen_Yang1;~Jiawei_Gao1;~Matthieu_Gaetan_Lin1;~HAO_CHEN41;~Liwei_Wu5;~Ning_Jia1;~Shiji_Song1;~Gao_Huang1", "aff": ";Tsinghua University;Tsinghua University;Tsinghua University;Sensetime;SenseTime;Beijing Academy of Artificial Intelligence;Tsinghua University;Tsinghua University", "aff_domain": ";tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;sensetime.com;sensetime.com;baai.ac.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": ";PhD student;Undergrad student;PhD student;Researcher;Researcher;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023train,\ntitle={Train Once, Get a Family: State-Adaptive Balances for Offline-to-Online Reinforcement Learning},\nauthor={Shenzhi Wang and Qisen Yang and Jiawei Gao and Matthieu Gaetan Lin and HAO CHEN and Liwei Wu and Ning Jia and Shiji Song and Gao Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vtoY8qJjTR}\n}", "github": "", "project": "", "reviewers": "pp8g;BkWu;urL3;APCr", "pdf_size": 1382016, "rating": "6;7;7;7", "confidence": "3;3;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "118;29;148;61", "wc_strengths": "54;24;25;102", "wc_weaknesses": "237;1;122;231", "wc_questions": "2;12;298;384", "wc_limitations": "1;14;6;36", "wc_review": "412;80;599;814", "wc_reply_reviewers": "55;30;23;649", "wc_reply_authors": "81;35;45;1000", "reply_reviewers": "1;1;1;4", "reply_authors": "2;2;2;5", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 89.0, 46.652974181717504 ], "wc_strengths_avg": [ 51.25, 31.68102744546016 ], "wc_weaknesses_avg": [ 147.75, 96.29998701972913 ], "wc_questions_avg": [ 174.0, 169.7822134382751 ], "wc_limitations_avg": [ 14.25, 13.386093530227555 ], "wc_review_avg": [ 476.25, 269.39039979182627 ], "wc_reply_reviewers_avg": [ 189.25, 265.7031943729695 ], "wc_reply_authors_avg": [ 290.25, 410.1313051938367 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5799418293204783764&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": ";tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;sensetime.com;sensetime.com;baai.ac.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;1;1;2;0;0", "aff_unique_norm": "Tsinghua University;SenseTime;Beijing Academy of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.sensetime.com;https://www.baaic.cn", "aff_unique_abbr": "THU;SenseTime;BAAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "HT-Step: Aligning Instructional Articles with How-To Videos", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73426", "id": "vv3cocNsEK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9d58d85bfc041b4f901c62ba37a3f322-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=vv3cocNsEK", "openreview": "https://openreview.net/forum?id=vv3cocNsEK", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73426", "video": "https://nips.cc/virtual/2023/poster/73426", "author_site": "Triantafyllos Afouras, Effrosyni Mavroudi, Tushar Nagarajan, Huiyu Wang, Lorenzo Torresani", "tldr": "", "abstract": "We introduce HT-Step, a large-scale dataset containing temporal annotations of instructional article steps in cooking videos. It includes 122k segment-level annotations over 20k narrated videos (approximately 2.3k hours) of the HowTo100M dataset.\nEach annotation provides a temporal interval, and a categorical step label from a taxonomy of 4,958 unique steps automatically mined from wikiHow articles which include rich descriptions of each step.\nOur dataset significantly surpasses existing labeled step datasets in terms of scale, number of tasks, and richness of natural language step descriptions. Based on these annotations, we introduce a strongly supervised benchmark for aligning instructional articles with how-to videos and present a comprehensive evaluation of baseline methods for this task.\nBy publicly releasing these annotations and defining rigorous evaluation protocols and metrics,\nwe hope to significantly accelerate research in the field of procedural activity understanding.", "keywords": "step annotations;temporal article grounding;instructional video;instructional articles;how-to", "primary_area": "", "supplementary_material": "/attachment/6217039c6b767379a04225d20629bcc0648524f6.zip", "author": "Triantafyllos Afouras;Effrosyni Mavroudi;Tushar Nagarajan;Huiyu Wang;Lorenzo Torresani", "authorids": "~Triantafyllos_Afouras1;~Effrosyni_Mavroudi1;~Tushar_Nagarajan1;~Huiyu_Wang1;~Lorenzo_Torresani1", "gender": "M;F;;;M", "homepage": "http://www.robots.ox.ac.uk/~afourast/;;https://tushar-n.github.io/;http://csrhddlam.github.io/;https://ltorresa.github.io", "dblp": "175/5771;166/6300;207/8308;;75/2854", "google_scholar": "https://scholar.google.co.uk/citations?user=TkBHFfgAAAAJ;vYRzGGEAAAAJ;KAKqSwIAAAAJ;SnmuYloAAAAJ;ss8KR5gAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Triantafyllos_Afouras1;~Effrosyni_Mavroudi1;~Tushar_Nagarajan1;~Huiyu_Wang1;~Lorenzo_Torresani1", "aff": "Meta;Meta Facebook;University of Texas, Austin;Meta Platforms;Meta", "aff_domain": "meta.com;fb.com;utexas.edu;meta.com;meta.com", "position": "Researcher;Researcher;PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\nafouras2023htstep,\ntitle={{HT}-Step: Aligning Instructional Articles with How-To Videos},\nauthor={Triantafyllos Afouras and Effrosyni Mavroudi and Tushar Nagarajan and Huiyu Wang and Lorenzo Torresani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=vv3cocNsEK}\n}", "github": "", "project": "", "reviewers": "6wqD;ssNp;rjWS;XtLe", "pdf_size": 7787893, "rating": "5;6;7;7", "confidence": "3;4;4;4", "wc_summary_and_contributions": "131;47;45;198", "wc_strengths": "47;30;128;173", "wc_improvement": "128;43;91;1", "wc_limitations": "37;3;1;17", "wc_correctness": "31;8;1;6", "wc_clarity": "9;9;1;3", "wc_relation_to_prior_work": "40;6;1;3", "wc_documentation": "20;4;1;9", "wc_additional_feedback": "1;1;1;1", "wc_review": "444;151;270;411", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "772;330;745;10", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 105.25, 63.81369367149969 ], "wc_strengths_avg": [ 94.5, 58.52563540876767 ], "wc_improvement_avg": [ 65.75, 48.01757490752735 ], "wc_limitations_avg": [ 14.5, 14.378803844548406 ], "wc_correctness_avg": [ 11.5, 11.543396380615196 ], "wc_clarity_avg": [ 5.5, 3.570714214271425 ], "wc_relation_to_prior_work_avg": [ 12.5, 15.976545308670458 ], "wc_documentation_avg": [ 8.5, 7.22841614740048 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 319.0, 116.95511959721986 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 464.25, 315.3952876946642 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14049675030503997030&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "meta.com;fb.com;utexas.edu;meta.com;meta.com", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Meta;University of Texas at Austin", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.utexas.edu", "aff_unique_abbr": "Meta;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70085", "id": "vvoWPYqZJA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9a6a435e75419a836fe47ab6793623e6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vvoWPYqZJA", "openreview": "https://openreview.net/forum?id=vvoWPYqZJA", "poster": "/media/PosterPDFs/NeurIPS%202023/70085.png?t=1697200847.9809546", "slides": "https://nips.cc/virtual/2023/poster/70085", "video": "https://nips.cc/virtual/2023/poster/70085", "author_site": "Wenliang Dai, Junnan Li, Junnan Li, DONGXU LI, Anthony Tiong, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale N Fung, Steven Hoi", "tldr": "", "abstract": "Large-scale pre-training and instruction tuning have been successful at creating general-purpose language models with broad competence. However, building general-purpose vision-language models is challenging due to the rich input distributions and task diversity resulting from the additional visual input. Although vision-language pretraining has been widely studied, vision-language instruction tuning remains under-explored. In this paper, we conduct a systematic and comprehensive study on vision-language instruction tuning based on the pretrained BLIP-2 models. We gather 26 publicly available datasets, covering a wide variety of tasks and capabilities, and transform them into instruction tuning format. Additionally, we introduce an instruction-aware Query Transformer, which extracts informative features tailored to the given instruction. Trained on 13 held-in datasets, InstructBLIP attains state-of-the-art zero-shot performance across all 13 held-out datasets, substantially outperforming BLIP-2 and larger Flamingo models. Our models also lead to state-of-the-art performance when finetuned on individual downstream tasks (e.g., 90.7% accuracy on ScienceQA questions with image contexts). Furthermore, we qualitatively demonstrate the advantages of InstructBLIP over concurrent multimodal models. All InstructBLIP models are open-source.", "keywords": "Vision-Language Models;Instruction Tuning;Zero-shot", "primary_area": "", "supplementary_material": "/attachment/5da14fa349801aa865181c9802c662a39a55493b.pdf", "author": "Wenliang Dai;Junnan Li;Dongxu Li;Anthony Tiong;Junqi Zhao;Weisheng Wang;Boyang Li;Pascale Fung;Steven Hoi", "authorids": "~Wenliang_Dai1;~Junnan_Li2;~Dongxu_Li3;~Anthony_Tiong1;~Junqi_Zhao1;wang1900@ntu.edu.sg;~Boyang_Li1;~Pascale_Fung1;~Steven_Hoi2", "gender": "M;;;;;;Unspecified;F;M", "homepage": "https://wenliangdai.github.io/;;;;;;http://www.boyangli.org;http://pascale.home.ece.ust.hk/;http://stevenhoi.com", "dblp": "263/9790;;;;;;70/1211-1;29/4187;", "google_scholar": "-_xy3jAAAAAJ;;;;;;QwL4z2UAAAAJ;;JoLjflYAAAAJ", "orcid": ";;;;;;0000-0002-6230-2376;;", "linkedin": "wenliang-dai-145116123/;;;;junqizhao/;;;;", "or_profile": "~Wenliang_Dai1;~Junnan_Li2;~Dongxu_Li3;~Anthony_Tiong1;~Junqi_Zhao1;wang1900@ntu.edu.sg;~Boyang_Li1;~Pascale_Fung1;~Steven_Hoi2", "aff": "Hong Kong University of Science and Technology;;;;Nanyang Technological University;;Nanyang Technological University;HKUST;Singapore Management University", "aff_domain": "ust.hk;;;;ntu.edu.sg;;ntu.edu.sg;ece.ust.hk;smu.edu.sg", "position": "PhD student;;;;Researcher;;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ndai2023instructblip,\ntitle={Instruct{BLIP}: Towards General-purpose Vision-Language Models with Instruction Tuning},\nauthor={Wenliang Dai and Junnan Li and Dongxu Li and Anthony Tiong and Junqi Zhao and Weisheng Wang and Boyang Li and Pascale Fung and Steven Hoi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vvoWPYqZJA}\n}", "github": "", "project": "", "reviewers": "prhX;r68F;whUx;WE2s;b9mY", "pdf_size": 3676214, "rating": "5;6;6;8;8", "confidence": "3;4;4;5;5", "soundness": "3;3;3;4;4", "novelty": "3;3;2;4;4", "presentation": "3;3;3;4;4", "wc_summary": "147;73;52;70;64", "wc_strengths": "49;59;48;103;54", "wc_weaknesses": "37;81;88;143;101", "wc_questions": "29;193;1;118;87", "wc_limitations": "37;11;1;10;38", "wc_review": "299;417;190;444;344", "wc_reply_reviewers": "0;1258;0;154;13", "wc_reply_authors": "36;1478;0;140;0", "reply_reviewers": "0;5;0;2;1", "reply_authors": "2;4;1;2;1", "rating_avg": [ 6.6, 1.2 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 81.2, 33.67729205265768 ], "wc_strengths_avg": [ 62.6, 20.57765778702717 ], "wc_weaknesses_avg": [ 90.0, 34.12916641232247 ], "wc_questions_avg": [ 85.6, 67.74540574828673 ], "wc_limitations_avg": [ 19.4, 15.186836405255704 ], "wc_review_avg": [ 338.8, 90.48182137866146 ], "wc_reply_reviewers_avg": [ 285.0, 489.96408031609826 ], "wc_reply_authors_avg": [ 330.8, 575.8883225070639 ], "reply_reviewers_avg": [ 1.6, 1.8547236990991407 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.9799578870122229, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "ust.hk;;;;ntu.edu.sg;;ntu.edu.sg;ece.ust.hk;smu.edu.sg", "author_num": 9, "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "Hong Kong University of Science and Technology;Nanyang Technological University;Singapore Management University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ust.hk;https://www.ntu.edu.sg;https://www.smu.edu.sg", "aff_unique_abbr": "HKUST;NTU;SMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "China;Singapore" }, { "title": "Optimal Regret Is Achievable with Bounded Approximate Inference Error: An Enhanced Bayesian Upper Confidence Bound Framework", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70084", "id": "vwr4bHHsRT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/33bb58be3f0e903c75afa73d75b5c67e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vwr4bHHsRT", "openreview": "https://openreview.net/forum?id=vwr4bHHsRT", "poster": "/media/PosterPDFs/NeurIPS%202023/70084.png?t=1702334909.9149787", "slides": "https://nips.cc/virtual/2023/poster/70084", "video": "https://nips.cc/virtual/2023/poster/70084", "author_site": "Ziyi Huang, Henry Lam, Amirhossein Meisami, Haofeng Zhang", "tldr": "", "abstract": "Bayesian bandit algorithms with approximate Bayesian inference have been widely used in real-world applications. However, there is a large discrepancy between the superior practical performance of these approaches and their theoretical justification. Previous research only indicates a negative theoretical result: Thompson sampling could have a worst-case linear regret $\\Omega(T)$ with a constant threshold on the inference error measured by one $\\alpha$-divergence. To bridge this gap, we propose an Enhanced Bayesian Upper Confidence Bound (EBUCB) framework that can efficiently accommodate bandit problems in the presence of approximate inference. Our theoretical analysis demonstrates that for Bernoulli multi-armed bandits, EBUCB can achieve the optimal regret order $O(\\log T)$ if the inference error measured by two different $\\alpha$-divergences is less than a constant, regardless of how large this constant is. To our best knowledge, our study provides the first theoretical regret bound that is better than $o(T)$ in the setting of constant approximate inference error. Furthermore, in concordance with the negative results in previous studies, we show that only one bounded $\\alpha$-divergence is insufficient to guarantee a sub-linear regret.", "keywords": "Bayesian bandits;approximate Bayesian inference;Bayesian Upper Confidence Bound;optimal regret order;bounded inference error", "primary_area": "", "supplementary_material": "/attachment/80428ab55ba9e7abeb409b3309282a47690ef71c.pdf", "author": "Ziyi Huang;Henry Lam;Amirhossein Meisami;Haofeng Zhang", "authorids": "~Ziyi_Huang1;~Henry_Lam1;~Amirhossein_Meisami1;~Haofeng_Zhang1", "gender": "F;;M;", "homepage": "https://structurefunctionlab.ee.columbia.edu/people/ziyi-huang;http://www.columbia.edu/~khl2114/;;", "dblp": ";35/9508;;", "google_scholar": "KWfiGJUAAAAJ;Bnj50x0AAAAJ;;", "orcid": "0000-0001-6985-0298;;;", "linkedin": "ziyi-huang-083683135/;;amirmeisami/;", "or_profile": "~Ziyi_Huang1;~Henry_Lam1;~Amirhossein_Meisami1;~Haofeng_Zhang1", "aff": "Columbia University;Columbia University;;", "aff_domain": "columbia.edu;columbia.edu;;", "position": "Researcher;Associate Professor;;", "bibtex": "@inproceedings{\nhuang2023optimal,\ntitle={Optimal Regret Is Achievable with Bounded Approximate Inference Error: An Enhanced Bayesian Upper Confidence Bound Framework},\nauthor={Ziyi Huang and Henry Lam and Amirhossein Meisami and Haofeng Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vwr4bHHsRT}\n}", "github": "", "project": "", "reviewers": "XLXW;Fvi2;tsC9", "pdf_size": 3516717, "rating": "5;6;7", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "90;53;91", "wc_strengths": "21;66;50", "wc_weaknesses": "100;99;24", "wc_questions": "120;4;17", "wc_limitations": "41;1;4", "wc_review": "372;223;186", "wc_reply_reviewers": "43;26;14", "wc_reply_authors": "69;28;13", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.0, 17.682382946499793 ], "wc_strengths_avg": [ 45.666666666666664, 18.624953392931992 ], "wc_weaknesses_avg": [ 74.33333333333333, 35.593382655893905 ], "wc_questions_avg": [ 47.0, 51.89091121445707 ], "wc_limitations_avg": [ 15.333333333333334, 18.190351532856337 ], "wc_review_avg": [ 260.3333333333333, 80.39209469151112 ], "wc_reply_reviewers_avg": [ 27.666666666666668, 11.897712198383164 ], "wc_reply_authors_avg": [ 36.666666666666664, 23.66901396810231 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7321458212343960499&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "columbia.edu;columbia.edu;;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning from Rich Semantics and Coarse Locations for Long-tailed Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70083", "id": "vybQs1Gbuk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f5fcd88d3deb97bb62559208cfa0ab62-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vybQs1Gbuk", "openreview": "https://openreview.net/forum?id=vybQs1Gbuk", "poster": "/media/PosterPDFs/NeurIPS%202023/70083.png?t=1701875895.5433261", "slides": "https://nips.cc/virtual/2023/poster/70083", "video": "https://nips.cc/virtual/2023/poster/70083", "author_site": "Lingchen Meng, Xiyang Dai, Jianwei Yang, Dongdong Chen, Yinpeng Chen, Yinpeng Chen, Mengchen Liu, Yi-Ling Chen, Zuxuan Wu, Lu Yuan, Yu-Gang Jiang", "tldr": "", "abstract": "Long-tailed object detection (LTOD) aims to handle the extreme data imbalance in real-world datasets, where many tail classes have scarce instances. One popular strategy is to explore extra data with image-level labels, yet it produces limited results due to (1) semantic ambiguity---an image-level label only captures a salient part of the image, ignoring the remaining rich semantics within the image; and (2) location sensitivity---the label highly depends on the locations and crops of the original image, which may change after data transformations like random cropping.\nTo remedy this, we propose RichSem, a simple but effective method, which is robust to learn rich semantics from coarse locations without the need of accurate bounding boxes. RichSem leverages rich semantics from images, which are then served as additional ``soft supervision'' for training detectors. Specifically, we add a semantic branch \nto our detector to learn these soft semantics and enhance feature representations for long-tailed object detection. The semantic branch is only used for training and is removed during inference. RichSem achieves consistent improvements on both overall and rare-category of LVIS under different backbones and detectors. \nOur method achieves state-of-the-art performance without requiring complex training and testing procedures. Moreover, we show the effectiveness of our method on other long-tailed datasets with additional experiments.", "keywords": "Long-tail object detection;visual semantics;soft supervision", "primary_area": "", "supplementary_material": "/attachment/a637f6227f0d3f2a39c4b29d77bb5c7fbcd9254b.pdf", "author": "Lingchen Meng;Xiyang Dai;Jianwei Yang;Dongdong Chen;Yinpeng Chen;Mengchen Liu;Yi-Ling Chen;Zuxuan Wu;Lu Yuan;Yu-Gang Jiang", "authorids": "~Lingchen_Meng1;~Xiyang_Dai4;~Jianwei_Yang1;~Dongdong_Chen1;~Yinpeng_Chen1;~Mengchen_Liu3;~Yi-Ling_Chen3;~Zuxuan_Wu1;~Lu_Yuan1;~Yu-Gang_Jiang1", "gender": "M;M;M;M;M;M;M;M;M;M", "homepage": "https://menglcool.github.io;http://www.dongdongchen.bid/;https://scholar.google.com/citations?user=V_VpLksAAAAJ&hl=en;https://scholar.google.com/citations?user=cOPQtYgAAAAJ&hl=en;https://zxwu.azurewebsites.net/;https://www.microsoft.com/en-us/research/people/luyuan/;https://fvl.fudan.edu.cn/people/yugangjiang/;https://jwyang.github.io/;https://sites.google.com/site/xiyangdai/;", "dblp": "300/8453;92/1489-1;45/6977;;150/8447;;24/5818;;176/5470;72/3762-4", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.sg/citations?user=sYKpKqEAAAAJ;;cOPQtYgAAAAJ;7t12hVkAAAAJ;k9TsUVsAAAAJ;f3_FP8AAAAAJ;Cl9byD8AAAAJ;QC8RwcoAAAAJ;jI5oDhYAAAAJ", "orcid": ";;;;;;;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Lingchen_Meng1;~Dongdong_Chen1;~Yinpeng_Chen1;~Mengchen_Liu3;~Zuxuan_Wu1;~Lu_Yuan1;~Yu-Gang_Jiang1;~Jianwei_Yang2;~Xiyang_Dai2;~Yi-Ling_Chen2", "aff": "Fudan University;Microsoft Research;Microsoft;Microsoft;Fudan University;Microsoft;Fudan University;Microsoft;Microsoft;Microsoft", "aff_domain": "fudan.edu.cn;microsoft.com;microsoft.com;microsoft.com;fudan.edu;microsoft.com;fudan.edu.cn;microsoft.com;microsoft.com;microsoft.com", "position": "PhD student;Principal Researcher;Researcher;Researcher;Associate Professor;Principal Research Manager;Full Professor;Researcher;Researcher;Senior Research Engineer", "bibtex": "@inproceedings{\nmeng2023learning,\ntitle={Learning from Rich Semantics and Coarse Locations for Long-tailed Object Detection},\nauthor={Lingchen Meng and Xiyang Dai and Jianwei Yang and Dongdong Chen and Yinpeng Chen and Mengchen Liu and Yi-Ling Chen and Zuxuan Wu and Lu Yuan and Yu-Gang Jiang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vybQs1Gbuk}\n}", "github": "", "project": "", "reviewers": "vo8U;YRik;ndNj;gFff", "pdf_size": 3340409, "rating": "5;5;6;7", "confidence": "4;4;3;5", "soundness": "2;3;3;4", "novelty": "3;3;2;3", "presentation": "3;3;3;4", "wc_summary": "71;99;53;118", "wc_strengths": "51;30;58;67", "wc_weaknesses": "363;110;83;82", "wc_questions": "54;5;5;119", "wc_limitations": "6;5;5;12", "wc_review": "545;249;204;398", "wc_reply_reviewers": "127;60;103;43", "wc_reply_authors": "333;0;640;0", "reply_reviewers": "2;1;2;1", "reply_authors": "2;1;3;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 85.25, 25.02373872945448 ], "wc_strengths_avg": [ 51.5, 13.647344063956181 ], "wc_weaknesses_avg": [ 159.5, 118.02648007968381 ], "wc_questions_avg": [ 45.75, 46.78341052125208 ], "wc_limitations_avg": [ 7.0, 2.9154759474226504 ], "wc_review_avg": [ 349.0, 134.01678999289604 ], "wc_reply_reviewers_avg": [ 83.25, 33.40939239196068 ], "wc_reply_authors_avg": [ 243.25, 266.3675796714007 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2637956564945080033&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 8, "email": "fudan.edu.cn;microsoft.com;microsoft.com;microsoft.com;fudan.edu;microsoft.com;fudan.edu.cn;microsoft.com;microsoft.com;microsoft.com", "author_num": 10, "aff_unique_index": "0;1;1;1;0;1;0;1;1;1", "aff_unique_norm": "Fudan University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.fudan.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Fudan;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0;1;0;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Adaptive whitening with fast gain modulation and slow synaptic plasticity", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70082", "id": "vz7SdRqWGM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/297fe652867e4897e9f1fe1cd715de19-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vz7SdRqWGM", "openreview": "https://openreview.net/forum?id=vz7SdRqWGM", "poster": "/media/PosterPDFs/NeurIPS%202023/70082.png?t=1702229772.2849195", "slides": "https://nips.cc/virtual/2023/poster/70082", "video": "https://nips.cc/virtual/2023/poster/70082", "author_site": "Lyndon Duong, Eero Simoncelli, Dmitri Chklovskii, David Lipshutz", "tldr": "", "abstract": "Neurons in early sensory areas rapidly adapt to changing sensory statistics, both by normalizing the variance of their individual responses and by reducing correlations between their responses. Together, these transformations may be viewed as an adaptive form of statistical whitening. Existing mechanistic models of adaptive whitening exclusively use either synaptic plasticity or gain modulation as the biological substrate for adaptation; however, on their own, each of these models has significant limitations. In this work, we unify these approaches in a normative multi-timescale mechanistic model that adaptively whitens its responses with complementary computational roles for synaptic plasticity and gain modulation. Gains are modified on a fast timescale to adapt to the current statistical context, whereas synapses are modified on a slow timescale to match structural properties of the input statistics that are invariant across contexts. Our model is derived from a novel multi-timescale whitening objective that factorizes the inverse whitening matrix into basis vectors, which correspond to synaptic weights, and a diagonal matrix, which corresponds to neuronal gains. We test our model on synthetic and natural datasets and find that the synapses learn optimal configurations over long timescales that enable adaptive whitening on short timescales using gain modulation.", "keywords": "neuroscience;adaptation;whitening;efficient coding;recurrent neural network;gain modulation;synaptic plasticity;local learning rules", "primary_area": "", "supplementary_material": "/attachment/533afee31b7db088aeb176009341ca8a6559a93c.zip", "author": "Lyndon Duong;Eero P Simoncelli;Dmitri Chklovskii;David Lipshutz", "authorids": "~Lyndon_Duong1;~Eero_P_Simoncelli1;~Dmitri_Chklovskii1;~David_Lipshutz1", "gender": "M;;M;M", "homepage": "https://www.lyndonduong.com;;https://lipshutzlab.com;https://www.cns.nyu.edu/~eero/", "dblp": "334/2354.html;06/2796;173/4650;30/5604", "google_scholar": "3AQW-SsAAAAJ;7Bgb5TUAAAAJ;XeWdtXcAAAAJ;MplR7_cAAAAJ", "orcid": "0000-0003-0575-1033;;0000-0001-9347-8326;0000-0002-1206-527X", "linkedin": "lyndon-duong-a9107223a/;;;eero-simoncelli-445782123", "or_profile": "~Lyndon_Duong1;~Dmitri_Chklovskii1;~David_Lipshutz1;~Eero_Peter_Simoncelli1", "aff": "New York University;Simons Foundation;Flatiron Institute;New York University", "aff_domain": "nyu.edu;simonsfoundation.org;flatironinstitute.org;nyu.edu", "position": "PhD student;Group Leader;Associate Research Scientist;Full Professor", "bibtex": "@inproceedings{\nduong2023adaptive,\ntitle={Adaptive whitening with fast gain modulation and slow synaptic plasticity},\nauthor={Lyndon Duong and Eero P Simoncelli and Dmitri Chklovskii and David Lipshutz},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vz7SdRqWGM}\n}", "github": "", "project": "", "reviewers": "yS92;NSc1;3ayL;yTCH", "pdf_size": 2425678, "rating": "6;7;7;7", "confidence": "4;4;5;3", "soundness": "3;3;4;3", "novelty": "2;2;3;3", "presentation": "2;4;4;3", "wc_summary": "93;54;191;37", "wc_strengths": "51;73;63;70", "wc_weaknesses": "346;136;76;88", "wc_questions": "107;64;244;120", "wc_limitations": "11;1;7;42", "wc_review": "608;328;581;357", "wc_reply_reviewers": "121;20;14;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 93.75, 59.70500397789117 ], "wc_strengths_avg": [ 64.25, 8.46684711093805 ], "wc_weaknesses_avg": [ 161.5, 108.8611500949719 ], "wc_questions_avg": [ 133.75, 66.94167237229736 ], "wc_limitations_avg": [ 15.25, 15.848895860595462 ], "wc_review_avg": [ 468.5, 126.7763779258581 ], "wc_reply_reviewers_avg": [ 42.75, 45.22927702274269 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6311575132927323242&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "email": "nyu.edu;simonsfoundation.org;flatironinstitute.org;nyu.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "New York University;Simons Foundation;Flatiron Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nyu.edu;https://www.simonsfoundation.org;https://flatironinstitute.org", "aff_unique_abbr": "NYU;Simons Foundation;Flatiron", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "GAUCHE: A Library for Gaussian Processes in Chemistry", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70081", "id": "vzrA6uqOis", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f2b1b2e974fa5ea622dd87f22815f423-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=vzrA6uqOis", "openreview": "https://openreview.net/forum?id=vzrA6uqOis", "poster": "/media/PosterPDFs/NeurIPS%202023/70081.png?t=1702172648.2330449", "slides": "https://nips.cc/virtual/2023/poster/70081", "video": "https://nips.cc/virtual/2023/poster/70081", "author_site": "Ryan-Rhys Griffiths, Leo Klarner, Henry Moss, Aditya Ravuri, Sang Truong, Yuanqi Du, Samuel Stanton, Gary Tom, Bojana Rankovic, Arian Jamasb, Arian Jamasb, Aryan Deshwal, Julius Schwartz, Austin Tripp, Gregory Kell, Simon Frieder, Anthony Bourached, Alex Chan, Jacob Moss, Chengzhi Guo, Johannes Peter D\u00fcrholt, Saudamini Chaurasia, Ji Won Park, Felix Strieth-Kalthoff, Alpha Lee, Bingqing Cheng, Alan Aspuru-Guzik, Philippe Schwaller, Jian Tang", "tldr": "", "abstract": "We introduce GAUCHE, an open-source library for GAUssian processes in CHEmistry. Gaussian processes have long been a cornerstone of probabilistic machine learning, affording particular advantages for uncertainty quantification and Bayesian optimisation. Extending Gaussian processes to molecular representations, however, necessitates kernels defined over structured inputs such as graphs, strings and bit vectors. By providing such kernels in a modular, robust and easy-to-use framework, we seek to enable expert chemists and materials scientists to make use of state-of-the-art black-box optimization techniques. Motivated by scenarios frequently encountered in practice, we showcase applications for GAUCHE in molecular discovery, chemical reaction optimisation and protein design. The codebase is made available at https://github.com/leojklarner/gauche.", "keywords": "Gaussian processes;Bayesian optimization;Chemistry;Molecular Machine Learning;Applications;Software", "primary_area": "", "supplementary_material": "", "author": "Ryan-Rhys Griffiths;Leo Klarner;Henry Moss;Aditya Ravuri;Sang T. Truong;Yuanqi Du;Samuel Don Stanton;Gary Tom;Bojana Rankovi\u0107;Arian Rokkum Jamasb;Aryan Deshwal;Julius Schwartz;Austin Tripp;Gregory Kell;Simon Frieder;Anthony Bourached;Alex James Chan;Jacob Moss;Chengzhi Guo;Johannes P. D\u00fcrholt;Saudamini Chaurasia;Ji Won Park;Felix Strieth-Kalthoff;Alpha Lee;Bingqing Cheng;Alan Aspuru-Guzik;Philippe Schwaller;Jian Tang", "authorids": "~Ryan-Rhys_Griffiths1;~Leo_Klarner2;~Henry_Moss1;~Aditya_Ravuri1;~Sang_T._Truong1;~Yuanqi_Du1;~Samuel_Don_Stanton1;~Gary_Tom1;~Bojana_Rankovi\u01071;~Arian_Rokkum_Jamasb1;~Aryan_Deshwal1;~Julius_Schwartz1;~Austin_Tripp1;~Gregory_Kell3;~Simon_Frieder1;~Anthony_Bourached1;~Alex_James_Chan1;~Jacob_Moss1;~Chengzhi_Guo1;~Johannes_P._D\u00fcrholt1;~Saudamini_Chaurasia1;~Ji_Won_Park1;~Felix_Strieth-Kalthoff1;~Alpha_Lee1;~Bingqing_Cheng1;~Alan_Aspuru-Guzik2;~Philippe_Schwaller1;~Jian_Tang1", "gender": "M;;;M;M;M;M;;F;;M;;M;M;;F;M;;;;F;F;M;M;F;M;M;", "homepage": ";;;http://falmity.com;https://cs.stanford.edu/~sttruong;https://yuanqidu.github.io/;https://samuelstanton.github.io/;https://gkwt.xyz;;https://www.jamasb.io;https://aryandeshwal.github.io/;;https://www.austintripp.ca/;https://kclpure.kcl.ac.uk/portal/en/persons/gregory-kell(57a640dc-3835-4d91-80fd-61eb09f41963).html;;https://www.maths.tcd.ie/~bouracha/index.html;https://alexjchan.com;;;;;;https://fsk-lab.github.io;https://www.phy.cam.ac.uk/directory/alpha-lee;https://sites.google.com/site/tonicbq/;http://matter.toronto.edu;https://schwallergroup.github.io;http://www.jian-tang.com", "dblp": "241/7007;;;;301/9134;266/2837;264/1895;;;296/2021;246/3012.html;;267/5455;302/4277.html;;;268/6948;;;;;83/10554;;;;;209/9632;181/2667-5", "google_scholar": "https://scholar.google.co.uk/citations?user=RBKs-lEAAAAJ;;;ugVeU9AAAAAJ;oXPm0dAAAAAJ;fAc_zZMAAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.co.uk/citations?user=hYm9a-UAAAAJ;wNEYBrAAAAAJ;;WAvRaxMAAAAJ;dE82P4YAAAAJ;;https://scholar.google.com/citations?hl=en;yfy_BGIAAAAJ;;yKlEug8AAAAJ;;;URG3MMYAAAAJ;https://scholar.google.ca/citations?user=LGZDAgIAAAAJ;;https://scholar.google.co.uk/citations?user=s5ZqEskAAAAJ;Ag_6KEgAAAAJ;Tz0I4ywAAAAJ;https://scholar.google.ca/citations?user=1ir6WUEAAAAJ", "orcid": "0000-0003-3117-4559;;;;;;;;;0000-0002-6727-7579;;;0000-0002-0138-7740;0000-0002-7481-7238;;;;;;;;0000-0002-0692-1092;0000-0003-1357-5500;;;0000-0002-8277-4434;0000-0003-3046-6576;", "linkedin": "ryan-rhys-griffiths-689b73128/;;;;sangttruong/;;samuel-stanton-06004997/;;bojana-rankovi%C4%87/;jamasb/;aryan-deshwal-a27835120/;julius-schwartz-7785a9a8/;;gregory-kell-a0b649a7/;;;alex-chan-040081131/;cobbles/;czguo/;;saudaminichaurasia;;felix-strieth-kalthoff-11115b150/?locale=en_US;;;;;", "or_profile": "~Ryan-Rhys_Griffiths1;~Leo_Klarner2;~Henry_Moss1;~Aditya_Ravuri1;~Sang_T._Truong1;~Yuanqi_Du1;~Samuel_Don_Stanton1;~Gary_Tom1;~Bojana_Rankovi\u01071;~Arian_Rokkum_Jamasb1;~Aryan_Deshwal1;~Julius_Schwartz1;~Austin_Tripp1;~Gregory_Kell3;~Simon_Frieder1;~Anthony_Bourached1;~Alex_James_Chan1;~Jacob_Moss1;~Chengzhi_Guo1;~Johannes_P._D\u00fcrholt1;~Saudamini_Chaurasia1;~Ji_Won_Park1;~Felix_Strieth-Kalthoff1;~Alpha_Lee1;~Bingqing_Cheng1;~Alan_Aspuru-Guzik2;~Philippe_Schwaller1;~Jian_Tang1", "aff": "Meta Facebook;;;NII, Tokyo Institute of Technology;Stanford University;Cornell University;Genentech;SandboxAQ;EPFL - EPF Lausanne;University of Cambridge;;;University of Cambridge;King's College London, University of London;;;University of Cambridge;University of Cambridge;;;;Genentech;University of Toronto;University of Cambridge;University of California, Berkeley;University of Toronto;Swiss Federal Institute of Technology Lausanne;Mila, HEC Montreal", "aff_domain": "facebook.com;;;nii.ac.jp;stanford.edu;cornell.edu;gene.com;sandboxaq.com;epfl.ch;cam.ac.uk;;;cam.ac.uk;kcl.ac.uk;;;cam.ac.uk;cam.ac.uk;;;;gene.com;utoronto.ca;cam.ac.uk;berkeley.edu;utoronto.ca;epfl.ch;hec.ca", "position": "Postdoc;;;Intern;PhD student;PhD student;Researcher;Intern;PhD student;PhD student;;;PhD student;PhD student;;;PhD student;PhD student;;;;Researcher;Postdoc;Assistant Professor;Assistant Professor;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ngriffiths2023gauche,\ntitle={{GAUCHE}: A Library for Gaussian Processes in Chemistry},\nauthor={Ryan-Rhys Griffiths and Leo Klarner and Henry Moss and Aditya Ravuri and Sang T. Truong and Yuanqi Du and Samuel Don Stanton and Gary Tom and Bojana Rankovi{\\'c} and Arian Rokkum Jamasb and Aryan Deshwal and Julius Schwartz and Austin Tripp and Gregory Kell and Simon Frieder and Anthony Bourached and Alex James Chan and Jacob Moss and Chengzhi Guo and Johannes P. D{\\\"u}rholt and Saudamini Chaurasia and Ji Won Park and Felix Strieth-Kalthoff and Alpha Lee and Bingqing Cheng and Alan Aspuru-Guzik and Philippe Schwaller and Jian Tang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=vzrA6uqOis}\n}", "github": "", "project": "", "reviewers": "jdZg;rHxF;JWPZ;oVt5;rK4e", "pdf_size": 766397, "rating": "3;5;6;7;8", "confidence": "4;4;3;3;4", "soundness": "2;3;2;3;4", "novelty": "2;2;3;3;4", "presentation": "2;3;2;4;4", "wc_summary": "40;57;66;53;143", "wc_strengths": "27;41;41;85;129", "wc_weaknesses": "23;135;252;191;152", "wc_questions": "11;45;306;135;143", "wc_limitations": "10;8;35;1;74", "wc_review": "111;286;700;465;641", "wc_reply_reviewers": "0;57;53;244;176", "wc_reply_authors": "0;20;587;784;778", "reply_reviewers": "0;1;1;2;2", "reply_authors": "1;2;2;4;3", "rating_avg": [ 5.8, 1.7204650534085253 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 71.8, 36.56993300513415 ], "wc_strengths_avg": [ 64.6, 37.659527347007426 ], "wc_weaknesses_avg": [ 150.6, 75.420421637644 ], "wc_questions_avg": [ 128.0, 102.50463404158859 ], "wc_limitations_avg": [ 25.6, 26.79253627411933 ], "wc_review_avg": [ 440.6, 219.35414288314686 ], "wc_reply_reviewers_avg": [ 106.0, 89.87769467448528 ], "wc_reply_authors_avg": [ 433.8, 353.2695288303252 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 28, 0 ], "corr_rating_confidence": -0.3322052985133746, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11788712126295787736&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "facebook.com;;;nii.ac.jp;stanford.edu;cornell.edu;gene.com;sandboxaq.com;epfl.ch;cam.ac.uk;;;cam.ac.uk;kcl.ac.uk;;;cam.ac.uk;cam.ac.uk;;;;gene.com;utoronto.ca;cam.ac.uk;berkeley.edu;utoronto.ca;epfl.ch;hec.ca", "author_num": 28, "aff_unique_index": "0;1;2;3;4;5;6;7;7;8;7;7;4;9;7;10;9;11;12", "aff_unique_norm": "Meta;Tokyo Institute of Technology;Stanford University;Cornell University;Genentech;SandboxAQ;EPFL;University of Cambridge;King's College London;University of Toronto;University of California, Berkeley;Swiss Federal Institute of Technology Lausanne;HEC Montreal", "aff_unique_dep": "Meta Platforms, Inc.;;;;;;;;;;;;HEC Business School", "aff_unique_url": "https://meta.com;https://www.titech.ac.jp;https://www.stanford.edu;https://www.cornell.edu;https://www.genentech.com;https://sandbox-aq.com;https://www.epfl.ch;https://www.cam.ac.uk;https://www.kcl.ac.uk;https://www.utoronto.ca;https://www.berkeley.edu;https://www.epfl.ch;https://www.hec.ca", "aff_unique_abbr": "Meta;Titech;Stanford;Cornell;Genentech;;EPFL;Cambridge;KCL;U of T;UC Berkeley;EPFL;HEC", "aff_campus_unique_index": "1;2;3;4;4;4;4;4;5;3;6", "aff_campus_unique": ";Tokyo;Stanford;Lausanne;Cambridge;Berkeley;Montreal", "aff_country_unique_index": "0;1;0;0;0;0;2;3;3;3;3;3;0;4;3;0;4;2;4", "aff_country_unique": "United States;Japan;Switzerland;United Kingdom;Canada" }, { "title": "Visual Instruction Tuning", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70080", "id": "w0H2xGHlkw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6dcf277ea32ce3288914faf369fe6de0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=w0H2xGHlkw", "openreview": "https://openreview.net/forum?id=w0H2xGHlkw", "poster": "/media/PosterPDFs/NeurIPS%202023/70080.png?t=1702186050.664807", "slides": "https://nips.cc/virtual/2023/poster/70080", "video": "https://nips.cc/virtual/2023/poster/70080", "author_site": "Haotian Liu, Chunyuan Li, Qingyang Wu, Yong Jae Lee", "tldr": "", "abstract": "Instruction tuning large language models (LLMs) using machine-generated instruction-following data has been shown to improve zero-shot capabilities on new tasks, but the idea is less explored in the multimodal field. We present the first attempt to use language-only GPT-4 to generate multimodal language-image instruction-following data. By instruction tuning on such generated data, we introduce LLaVA: Large Language and Vision Assistant, an end-to-end trained large multimodal model that connects a vision encoder and an LLM for general-purpose visual and language understanding. To facilitate future research on visual instruction following, we construct two evaluation benchmarks with diverse and challenging application-oriented tasks. Our experiments show that LLaVA demonstrates impressive multimodal chat abilities, sometimes exhibiting the behaviors of multimodal GPT-4 on unseen images/instructions, and yields a 85.1% relative score compared with GPT-4 on a synthetic multimodal instruction-following dataset. When fine-tuned on Science QA, the synergy of LLaVA and GPT-4 achieves a new state-of-the-art accuracy of 92.53%. We make GPT-4 generated visual instruction tuning data, our model, and code publicly available.", "keywords": "visual instruction tuning;instruction tuning;multimodal;LLM;GPT", "primary_area": "", "supplementary_material": "", "author": "Haotian Liu;Chunyuan Li;Qingyang Wu;Yong Jae Lee", "authorids": "~Haotian_Liu1;~Chunyuan_Li1;~Qingyang_Wu1;~Yong_Jae_Lee2", "gender": ";;M;M", "homepage": "https://hliu.cc;http://chunyuan.li/;https://qywu.github.io/about.html;https://pages.cs.wisc.edu/~yongjaelee/", "dblp": "66/10511;64/9590;;15/5471", "google_scholar": "Xo6wfnQAAAAJ;Zd7WmXUAAAAJ;https://scholar.google.ca/citations?user=HDiw-TsAAAAJ;4GTpCxcAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Haotian_Liu1;~Chunyuan_Li1;~Qingyang_Wu1;~Yong_Jae_Lee1", "aff": "Department of Computer Science, University of Wisconsin - Madison;Microsoft Research;Amazon;University of Wisconsin - Madison", "aff_domain": "cs.wisc.edu;microsoft.com;amazon.com;cs.wisc.edu", "position": "PhD student;Principal Researcher;Intern;Associate Professor", "bibtex": "@inproceedings{\nliu2023visual,\ntitle={Visual Instruction Tuning},\nauthor={Haotian Liu and Chunyuan Li and Qingyang Wu and Yong Jae Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=w0H2xGHlkw}\n}", "github": "", "project": "", "reviewers": "RuBm;mhaS;A2dU;MLCz", "pdf_size": 5894130, "rating": "5;6;8;8", "confidence": "4;5;4;5", "soundness": "4;2;3;4", "novelty": "3;4;4;4", "presentation": "3;4;3;4", "wc_summary": "93;96;69;79", "wc_strengths": "85;55;70;67", "wc_weaknesses": "179;766;76;51", "wc_questions": "2;143;58;21", "wc_limitations": "12;9;9;1", "wc_review": "371;1069;282;219", "wc_reply_reviewers": "0;510;28;0", "wc_reply_authors": "35;1310;35;35", "reply_reviewers": "0;2;1;0", "reply_authors": "2;4;2;2", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 84.25, 10.894379284750462 ], "wc_strengths_avg": [ 69.25, 10.685855136581255 ], "wc_weaknesses_avg": [ 268.0, 291.49528298070277 ], "wc_questions_avg": [ 56.0, 54.115616969595756 ], "wc_limitations_avg": [ 7.75, 4.085033659592048 ], "wc_review_avg": [ 485.25, 341.3270975179088 ], "wc_reply_reviewers_avg": [ 134.5, 217.09617684335208 ], "wc_reply_authors_avg": [ 353.75, 552.0911949125797 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 6760, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9083483030705185424&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 18, "email": "cs.wisc.edu;microsoft.com;amazon.com;cs.wisc.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Wisconsin-Madison;Microsoft;Amazon", "aff_unique_dep": "Department of Computer Science;Microsoft Research;Amazon.com, Inc.", "aff_unique_url": "https://www.wisc.edu;https://www.microsoft.com/en-us/research;https://www.amazon.com", "aff_unique_abbr": "UW-Madison;MSR;Amazon", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Optimal Learners for Realizable Regression: PAC Learning and Online Learning", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70079", "id": "w116w62fxH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8c22e5e918198702765ecff4b20d0a90-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=w116w62fxH", "openreview": "https://openreview.net/forum?id=w116w62fxH", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70079", "video": "https://nips.cc/virtual/2023/poster/70079", "author_site": "Idan Attias, Steve Hanneke, Alkis Kalavasis, Amin Karbasi, Grigoris Velegkas", "tldr": "", "abstract": "In this work, we aim to characterize the statistical complexity of realizable regression both in the PAC learning setting and the online learning setting. Previous work had established the sufficiency of finiteness of the fat shattering dimension for PAC learnability and the necessity of finiteness of the scaled Natarajan dimension, but little progress had been made towards a more complete characterization since the work of Simon 1997 (SICOMP '97). To this end, we first introduce a minimax instance optimal learner for realizable regression and propose a novel dimension that both qualitatively and quantitatively characterizes which classes of real-valued predictors are learnable. We then identify a combinatorial dimension related to the graph dimension that characterizes ERM learnability in the realizable setting. Finally, we establish a necessary condition for learnability based on a combinatorial dimension related to the DS dimension, and conjecture that it may also be sufficient in this context. Additionally, in the context of online learning we provide a dimension that characterizes the minimax instance optimal cumulative loss up to a constant factor and design an optimal online learner for realizable regression, thus resolving an open question raised by Daskalakis and Golowich in STOC '22.", "keywords": "Learning Theory;Regression;PAC Learning;Online Learning", "primary_area": "", "supplementary_material": "/attachment/c516fec60973e253824b02ab98c6c00002ee3549.pdf", "author": "Idan Attias;Steve Hanneke;Alkis Kalavasis;Amin Karbasi;Grigoris Velegkas", "authorids": "~Idan_Attias1;~Steve_Hanneke1;~Alkis_Kalavasis1;~Amin_Karbasi3;~Grigoris_Velegkas1", "gender": "M;M;M;M;M", "homepage": "https://www.idanattias.com;http://www.stevehanneke.com;https://alkisk.github.io/;;http://seas.yale.edu/faculty-research/faculty-directory/amin-karbasi", "dblp": "228/6803;40/154;269/9425;254/1885;49/7411", "google_scholar": "-L6uUy0AAAAJ;fEhNO7YAAAAJ;NgVIFJwAAAAJ;Ty1kgP0AAAAJ;https://scholar.google.com.tw/citations?user=VusVB38AAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Idan_Attias1;~Steve_Hanneke1;~Alkis_Kalavasis1;~Grigoris_Velegkas1;~amin_karbasi1", "aff": "Tel Aviv University;Purdue University;National Technical University of Athens;Yale University;Google", "aff_domain": "tau.ac.il;purdue.edu;ntua.gr;yale.edu;google.com", "position": "PhD student;Assistant Professor;PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\nattias2023optimal,\ntitle={Optimal Learners for Realizable Regression: {PAC} Learning and Online Learning},\nauthor={Idan Attias and Steve Hanneke and Alkis Kalavasis and Amin Karbasi and Grigoris Velegkas},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=w116w62fxH}\n}", "github": "", "project": "", "reviewers": "DqR8;EXPx;JHXn;seAb;dt9f", "pdf_size": 739625, "rating": "6;7;8;8;9", "confidence": "2;3;3;3;4", "soundness": "3;4;4;4;4", "novelty": "3;4;4;3;4", "presentation": "3;4;4;4;4", "wc_summary": "58;81;144;39;136", "wc_strengths": "22;61;45;134;15", "wc_weaknesses": "57;24;25;128;15", "wc_questions": "12;6;44;43;1", "wc_limitations": "1;2;49;1;13", "wc_review": "150;174;307;345;180", "wc_reply_reviewers": "0;0;28;65;52", "wc_reply_authors": "0;0;0;41;22", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;2;2", "rating_avg": [ 7.6, 1.0198039027185568 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.8, 0.39999999999999997 ], "novelty_avg": [ 3.6, 0.4898979485566356 ], "presentation_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_avg": [ 91.6, 41.773675921565726 ], "wc_strengths_avg": [ 55.4, 42.57980742088907 ], "wc_weaknesses_avg": [ 49.8, 41.614420577487316 ], "wc_questions_avg": [ 21.2, 18.540765895722863 ], "wc_limitations_avg": [ 13.2, 18.465102220134064 ], "wc_review_avg": [ 231.2, 78.97189373441667 ], "wc_reply_reviewers_avg": [ 29.0, 26.487733009829288 ], "wc_reply_authors_avg": [ 12.6, 16.56019323558756 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9302605094190635, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11746014918951750690&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "tau.ac.il;purdue.edu;ntua.gr;yale.edu;google.com", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Tel Aviv University;Purdue University;National Technical University of Athens;Yale University;Google", "aff_unique_dep": ";;;;Google", "aff_unique_url": "https://www.tau.ac.il;https://www.purdue.edu;https://www.ntua.gr;https://www.yale.edu;https://www.google.com", "aff_unique_abbr": "TAU;Purdue;NTUA;Yale;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;2;1;1", "aff_country_unique": "Israel;United States;Greece" }, { "title": "Balanced Training for Sparse GANs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70078", "id": "w2F8Fm6Sg3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2c28efa5a86dca4b603a36c08f49f240-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=w2F8Fm6Sg3", "openreview": "https://openreview.net/forum?id=w2F8Fm6Sg3", "poster": "/media/PosterPDFs/NeurIPS%202023/70078.png?t=1701153158.7342236", "slides": "https://nips.cc/virtual/2023/poster/70078", "video": "https://nips.cc/virtual/2023/poster/70078", "author_site": "Yite Wang, Jing Wu, NAIRA HOVAKIMYAN, Ruoyu Sun", "tldr": "", "abstract": "Over the past few years, there has been growing interest in developing larger and deeper neural networks, including deep generative models like generative adversarial networks (GANs). However, GANs typically come with high computational complexity, leading researchers to explore methods for reducing the training and inference costs. One such approach gaining popularity in supervised learning is dynamic sparse training (DST), which maintains good performance while enjoying excellent training efficiency. Despite its potential benefits, applying DST to GANs presents challenges due to the adversarial nature of the training process. In this paper, we propose a novel metric called the balance ratio (BR) to study the balance between the sparse generator and discriminator. We also introduce a new method called balanced dynamic sparse training (ADAPT), which seeks to control the BR during GAN training to achieve a good trade-off between performance and computational cost. Our proposed method shows promising results on multiple datasets, demonstrating its effectiveness.", "keywords": "Dynamics sparse training; pruning; neural network pruning; empirical deep learning", "primary_area": "", "supplementary_material": "/attachment/3132748fcf7ed95980d7bc06ec1764548734a6bb.pdf", "author": "Yite Wang;Jing Wu;Naira Hovakimyan;Ruoyu Sun", "authorids": "~Yite_Wang1;~Jing_Wu5;~Naira_Hovakimyan1;~Ruoyu_Sun1", "gender": "M;M;F;", "homepage": "https://yitewang.github.io/;https://jingwu6.github.io/;http://naira.mechse.illinois.edu/;https://ruoyus.github.io/", "dblp": "317/0407;;;30/9879-1", "google_scholar": "wEGgxUIAAAAJ;yxmjOkcAAAAJ;8mA9QpUAAAAJ;PsfzbCMAAAAJ", "orcid": ";;;", "linkedin": "yite-wang-261057140/;;nhovakim/;", "or_profile": "~Yite_Wang1;~Jing_Wu5;~Naira_Hovakimyan1;~Ruoyu_Sun1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;The Chinese University of Hong Kong", "aff_domain": "illinois.edu;illinois.edu;illinois.edu;cuhk.edu.cn", "position": "PhD student;PhD student;professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023balanced,\ntitle={Balanced Training for Sparse {GAN}s},\nauthor={Yite Wang and Jing Wu and Naira Hovakimyan and Ruoyu Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=w2F8Fm6Sg3}\n}", "github": "", "project": "", "reviewers": "PnSi;sFon;cAMN;6W4E", "pdf_size": 930345, "rating": "4;5;5;8", "confidence": "5;3;4;4", "soundness": "3;2;3;4", "novelty": "2;4;3;4", "presentation": "3;3;3;3", "wc_summary": "61;35;78;235", "wc_strengths": "35;47;50;36", "wc_weaknesses": "141;77;205;31", "wc_questions": "1;27;43;0", "wc_limitations": "1;19;17;0", "wc_review": "239;205;393;302", "wc_reply_reviewers": "0;243;40;9", "wc_reply_authors": "47;47;115;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;3;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 102.25, 78.15809299106523 ], "wc_strengths_avg": [ 42.0, 6.59545297913646 ], "wc_weaknesses_avg": [ 113.5, 65.7019786612245 ], "wc_questions_avg": [ 17.75, 18.15729880791744 ], "wc_limitations_avg": [ 9.25, 8.78564169540279 ], "wc_review_avg": [ 284.75, 71.53451963912248 ], "wc_reply_reviewers_avg": [ 73.0, 99.26479738557875 ], "wc_reply_authors_avg": [ 52.25, 40.9961888472575 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.23570226039551584, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16809988218863012916&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "illinois.edu;illinois.edu;illinois.edu;cuhk.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.cuhk.edu.hk", "aff_unique_abbr": "UIUC;CUHK", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Urbana-Champaign;Hong Kong SAR", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Minimax Optimal Rate for Parameter Estimation in Multivariate Deviated Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70077", "id": "w3ghbKBJg4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5fed79713d97df88f9912c8d886fccb3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=w3ghbKBJg4", "openreview": "https://openreview.net/forum?id=w3ghbKBJg4", "poster": "/media/PosterPDFs/NeurIPS%202023/70077.png?t=1701118905.2312367", "slides": "https://nips.cc/virtual/2023/poster/70077", "video": "https://nips.cc/virtual/2023/poster/70077", "author_site": "Dat Do, Huy Nguyen, Khai Nguyen, Nhat Ho", "tldr": "", "abstract": "We study the maximum likelihood estimation (MLE) in the multivariate deviated model where the data are generated from the density function $(1-\\lambda^{\\ast})h_{0}(x)+\\lambda^{\\ast}f(x|\\mu^{\\ast}, \\Sigma^{\\ast})$ in which $h_{0}$ is a known function, $\\lambda^{\\ast} \\in [0,1]$ and $(\\mu^{\\ast}, \\Sigma^{\\ast})$ are unknown parameters to estimate. The main challenges in deriving the convergence rate of the MLE mainly come from two issues: (1) The interaction between the function $h_{0}$ and the density function $f$; (2) The deviated proportion $\\lambda^{\\ast}$ can go to the extreme points of $[0,1]$ as the sample size tends to infinity. To address these challenges, we develop the \\emph{distinguishability condition} to capture the linear independent relation between the function $h_{0}$ and the density function $f$. We then provide comprehensive convergence rates of the MLE via the vanishing rate of $\\lambda^{\\ast}$ to zero as well as the distinguishability of two functions $h_{0}$ and $f$.", "keywords": "Mixture Model;Minimax Rate;Maximum Likelihood Estimation", "primary_area": "", "supplementary_material": "", "author": "Dat Do;Huy Nguyen;Khai Nguyen;Nhat Ho", "authorids": "~Dat_Do1;~Huy_Nguyen5;~Khai_Nguyen1;~Nhat_Ho1", "gender": "M;M;M;M", "homepage": "https://lsa.umich.edu/stats/people/phd-students/dodat.html;https://huynm99.github.io/;https://khainb.com;https://nhatptnk8912.github.io/", "dblp": "221/4662;48/6075;120/4308;203/4479", "google_scholar": ";_YYwzhQAAAAJ;im5fNaQAAAAJ;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ", "orcid": ";;;", "linkedin": ";huy-nguyen-081199/;;nhat-pham-minh-ho-267b8164/", "or_profile": "~Dat_Do1;~Huy_Nguyen5;~Khai_Nguyen1;~Nhat_Ho1", "aff": "University of Michigan;University of Texas at Austin;University of Texas, Austin;University of Texas, Austin", "aff_domain": "umich.edu;utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ndo2023minimax,\ntitle={Minimax Optimal Rate for Parameter Estimation in Multivariate Deviated Models},\nauthor={Dat Do and Huy Nguyen and Khai Nguyen and Nhat Ho},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=w3ghbKBJg4}\n}", "github": "", "project": "", "reviewers": "1EFM;HL4W;bb5i;9DGR;UU3o", "pdf_size": 1029930, "rating": "3;6;6;6;7", "confidence": "3;2;2;4;3", "soundness": "2;4;2;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;2;3;3", "wc_summary": "39;95;90;384;111", "wc_strengths": "13;76;25;3;67", "wc_weaknesses": "67;121;81;3;63", "wc_questions": "30;130;2;3;32", "wc_limitations": "1;1;2;1;5", "wc_review": "150;423;200;394;278", "wc_reply_reviewers": "109;11;34;11;12", "wc_reply_authors": "773;20;23;26;23", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;2;2;2;2", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 143.8, 122.49963265251043 ], "wc_strengths_avg": [ 36.8, 29.3148426569204 ], "wc_weaknesses_avg": [ 67.0, 38.01052485825472 ], "wc_questions_avg": [ 39.4, 47.06421145626473 ], "wc_limitations_avg": [ 2.0, 1.5491933384829668 ], "wc_review_avg": [ 289.0, 106.15460423363652 ], "wc_reply_reviewers_avg": [ 35.4, 37.83437590340298 ], "wc_reply_authors_avg": [ 173.0, 300.00599994000123 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.07881104062391008, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2607730714213762049&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "umich.edu;utexas.edu;utexas.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Michigan;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.umich.edu;https://www.utexas.edu", "aff_unique_abbr": "UM;UT Austin", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "How Far Can Camels Go? Exploring the State of Instruction Tuning on Open Resources", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73425", "id": "w4zZNC4ZaV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ec6413875e4ab08d7bc4d8e225263398-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=w4zZNC4ZaV", "openreview": "https://openreview.net/forum?id=w4zZNC4ZaV", "poster": "/media/PosterPDFs/NeurIPS%202023/73425.png?t=1702202042.9834287", "slides": "https://nips.cc/virtual/2023/poster/73425", "video": "https://nips.cc/virtual/2023/poster/73425", "author_site": "Yizhong Wang, Hamish Ivison, Pradeep Dasigi, Jack Hessel, Tushar Khot, Khyathi Chandu, David Wadden, Kelsey MacMillan, Noah Smith, Iz Beltagy, Hannaneh Hajishirzi", "tldr": "", "abstract": "In this work we explore recent advances in instruction-tuning language models on a range of open instruction-following datasets. Despite recent claims that open models can be on par with state-of-the-art proprietary models, these claims are often accompanied by limited evaluation, making it difficult to compare models across the board and determine the utility of various resources. We provide a large set of instruction-tuned models from 6.7B to 65B parameters in size, trained on 12 instruction datasets ranging from manually curated (e.g., OpenAssistant) to synthetic and distilled (e.g., Alpaca) and systematically evaluate them on their factual knowledge, reasoning, multilinguality, coding, safety, and open-ended instruction following abilities through a collection of automatic, model-based, and human-based metrics. We further introduce T\u00fclu, our best performing instruction-tuned model suite finetuned on a combination of high-quality open resources.\n\nOur experiments show that different instruction-tuning datasets can uncover or enhance specific skills, while no single dataset (or combination) provides the best performance across all evaluations. Interestingly, we find that model and human preference-based evaluations fail to reflect differences in model capabilities exposed by benchmark-based evaluations, suggesting the need for the type of systemic evaluation performed in this work. Our evaluations show that the best model in any given evaluation reaches on average 87% of ChatGPT performance, and 73% of GPT-4 performance, suggesting that further investment in building better base models and instruction-tuning data is required to close the gap. We release our instruction-tuned models, including a fully finetuned 65B T\u00fclu, along with our code, data, and evaluation framework to facilitate future research.", "keywords": "Instruction tuning;large language models;open-source;systematic evaluation", "primary_area": "", "supplementary_material": "", "author": "Yizhong Wang;Hamish Ivison;Pradeep Dasigi;Jack Hessel;Tushar Khot;Khyathi Chandu;David Wadden;Kelsey MacMillan;Noah A. Smith;Iz Beltagy;Hannaneh Hajishirzi", "authorids": "~Yizhong_Wang2;~Hamish_Ivison1;~Pradeep_Dasigi1;~Jack_Hessel1;~Tushar_Khot1;~Khyathi_Chandu1;~David_Wadden1;~Kelsey_MacMillan1;~Noah_A._Smith2;~Iz_Beltagy1;~Hannaneh_Hajishirzi1", "gender": "M;;M;M;M;;M;F;;M;F", "homepage": "https://yizhong-wang.com;https://hamishivi.github.io;https://pdasigi.github.io/;https://www.jmhessel.com;https://allenai.org/team/tushark/;;https://dwadden.github.io/;;;http://beltagy.net/;https://homes.cs.washington.edu/~hannaneh/", "dblp": "79/3601;288/1956;27/7184;https://dblp.uni-trier.de/pid/132/5250.html;83/8117;;239/4346;;;;52/1296", "google_scholar": "y5zpqdAAAAAJ;;https://scholar.google.com/citations?authorid=Bpd76vcAAAAJ;SxQQ1msAAAAJ;_8mkIjgAAAAJ;;BeTUvHIAAAAJ;;;jkV6H1gAAAAJ;LOV6_WIAAAAJ", "orcid": ";0000-0002-0069-7659;0000-0001-7127-1316;0000-0002-4012-8979;;;;;;;", "linkedin": ";;;;;;david-wadden-0076a995/;;;beltagy/;", "or_profile": "~Yizhong_Wang2;~Hamish_Ivison1;~Pradeep_Dasigi1;~Jack_Hessel1;~Tushar_Khot1;~Khyathi_Chandu1;~David_Wadden1;~Kelsey_MacMillan1;~Noah_A._Smith2;~Iz_Beltagy1;~Hannaneh_Hajishirzi1", "aff": "Department of Computer Science, University of Washington;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;;Allen Institute for Artificial Intelligence;University of Washington", "aff_domain": "cs.washington.edu;allenai.org;allenai.org;allenai.org;allenai.org;;allenai.org;allenai.org;;allenai.org;uw.edu", "position": "PhD student;Researcher;Research Scientist;Researcher;Lead Research Scientist;;Researcher;Researcher;;Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nwang2023how,\ntitle={How Far Can Camels Go? Exploring the State of Instruction Tuning on Open Resources},\nauthor={Yizhong Wang and Hamish Ivison and Pradeep Dasigi and Jack Hessel and Tushar Khot and Khyathi Chandu and David Wadden and Kelsey MacMillan and Noah A. Smith and Iz Beltagy and Hannaneh Hajishirzi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=w4zZNC4ZaV}\n}", "github": "", "project": "", "reviewers": "W1Ez;rWkW;LRM9;9TsL;hHUW", "pdf_size": 1500891, "rating": "6;7;8;8;8", "confidence": "5;1;4;4;4", "wc_summary_and_contributions": "29;183;85;182;105", "wc_strengths": "24;127;185;31;56", "wc_improvement": "114;88;396;12;92", "wc_limitations": "52;23;38;7;35", "wc_correctness": "1;19;12;1;22", "wc_clarity": "1;5;14;5;9", "wc_relation_to_prior_work": "33;19;69;8;25", "wc_documentation": "17;28;10;9;21", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "272;493;810;256;366", "wc_reply_reviewers": "34;0;99;0;75", "wc_reply_authors": "549;511;429;206;159", "reply_reviewers": "1;0;1;0;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 7.4, 0.7999999999999999 ], "confidence_avg": [ 3.6, 1.3564659966250538 ], "wc_summary_and_contributions_avg": [ 116.8, 59.14862635767631 ], "wc_strengths_avg": [ 84.6, 62.03418412456152 ], "wc_improvement_avg": [ 140.4, 132.3655544316572 ], "wc_limitations_avg": [ 31.0, 15.139352694220449 ], "wc_correctness_avg": [ 11.0, 8.78635305459552 ], "wc_clarity_avg": [ 6.8, 4.4 ], "wc_relation_to_prior_work_avg": [ 30.8, 20.76920797719547 ], "wc_documentation_avg": [ 17.0, 7.0710678118654755 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 439.4, 203.59921414386648 ], "wc_reply_reviewers_avg": [ 41.6, 39.822606644969895 ], "wc_reply_authors_avg": [ 370.8, 159.25878311728997 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.036860489038724305, "gs_citation": 315, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4520548076856064571&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 7, "email": "cs.washington.edu;allenai.org;allenai.org;allenai.org;allenai.org;;allenai.org;allenai.org;;allenai.org;uw.edu", "author_num": 11, "aff_unique_index": "0;1;1;1;1;1;1;1;0", "aff_unique_norm": "University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "UW;AI2", "aff_campus_unique_index": "0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Hyper-HMM: aligning human brains and semantic features in a common latent event space", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70076", "id": "w6krZiUa7t", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/558a100caa93422df215fadb9e9b1dd7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=w6krZiUa7t", "openreview": "https://openreview.net/forum?id=w6krZiUa7t", "poster": "/media/PosterPDFs/NeurIPS%202023/70076.png?t=1704585004.3646455", "slides": "https://nips.cc/virtual/2023/poster/70076", "video": "https://nips.cc/virtual/2023/poster/70076", "author_site": "Caroline Lee, Jane Han, Ma Feilong, Guo Jiahui, James Haxby, Christopher Baldassano", "tldr": "", "abstract": "Naturalistic stimuli evoke complex neural responses with spatial and temporal properties that differ across individuals. Current alignment methods focus on either spatial hyperalignment (assuming exact temporal correspondence) or temporal alignment (assuming exact spatial correspondence). Here, we propose a hybrid model, the Hyper-HMM, that simultaneously aligns both temporal and spatial features across brains. The model learns to linearly project voxels to a reduced-dimension latent space, in which timecourses are segmented into corresponding temporal events. This approach allows tracking of each individual's mental trajectory through an event sequence, and also allows for alignment with other feature spaces such as stimulus content. Using an fMRI dataset in which students watch videos of class lectures, we demonstrate that the Hyper-HMM can be used to map all participants and the semantic content of the videos into a common low-dimensional space, and that these mappings generalize to held-out data. Our model provides a new window into individual cognitive dynamics evoked by complex naturalistic stimuli.", "keywords": "Brain Imaging;Other Cognitive Science;Other Neuroscience", "primary_area": "", "supplementary_material": "/attachment/275b2f69937ff28f19c96151b89ea86a9adf32c0.zip", "author": "Caroline Lee;Jane Han;Ma Feilong;Guo Jiahui;James Haxby;Christopher Baldassano", "authorids": "~Caroline_Lee1;~Jane_Han1;~Ma_Feilong1;~Guo_Jiahui1;~James_Haxby1;~Christopher_Baldassano1", "gender": "F;;;;M;F", "homepage": ";https://han-jane.github.io/;;;http://www.chrisbaldassano.com/;https://scholar.google.com/citations?hl=en&user=JBd1p1QAAAAJ", "dblp": ";;211/5897;30/2474.html;125/4554;164/1216", "google_scholar": "8bMS1qQAAAAJ;;2_X6hk8AAAAJ;https://scholar.google.com/citations?hl=en;6YJQed0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-6838-3971;;;0000-0002-1528-9025", "linkedin": ";;;;;", "or_profile": "~Caroline_Lee1;~Jane_Han1;~Ma_Feilong1;~James_Haxby1;~Christopher_Baldassano1;~Guo_Jiahui2", "aff": "Dartmouth College;Dartmouth College;Dartmouth College;Dartmouth College;Columbia University;Dartmouth College", "aff_domain": "dartmouth.edu;dartmouth.edu;dartmouth.edu;dartmouth.edu;columbia.edu;dartmouth.edu", "position": "PhD student;PhD student;Postdoc;Full Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nlee2023hyperhmm,\ntitle={Hyper-{HMM}: aligning human brains and semantic features in a common latent event space},\nauthor={Caroline Lee and Jane Han and Ma Feilong and Guo Jiahui and James Haxby and Christopher Baldassano},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=w6krZiUa7t}\n}", "github": "", "project": "", "reviewers": "XnCP;jeCv;wrdu;jAF3;1rZM", "pdf_size": 3145469, "rating": "4;4;6;6;8", "confidence": "4;4;3;4;4", "soundness": "2;2;3;3;4", "novelty": "3;2;3;2;4", "presentation": "2;2;3;3;4", "wc_summary": "177;102;35;122;132", "wc_strengths": "61;6;73;28;59", "wc_weaknesses": "160;6;142;44;344", "wc_questions": "98;228;89;564;13", "wc_limitations": "1;6;16;16;1", "wc_review": "497;348;355;774;549", "wc_reply_reviewers": "219;54;0;287;129", "wc_reply_authors": "470;0;0;60;21", "reply_reviewers": "1;1;0;2;2", "reply_authors": "2;1;1;2;2", "rating_avg": [ 5.6, 1.4966629547095764 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 113.6, 46.34911002381815 ], "wc_strengths_avg": [ 45.4, 24.67873578609731 ], "wc_weaknesses_avg": [ 139.2, 117.63400868796404 ], "wc_questions_avg": [ 198.4, 195.4283500416457 ], "wc_limitations_avg": [ 8.0, 6.782329983125268 ], "wc_review_avg": [ 504.6, 155.89047437223354 ], "wc_reply_reviewers_avg": [ 137.8, 104.82633256963635 ], "wc_reply_authors_avg": [ 110.2, 181.22957815985782 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.13363062095621217, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7506622616116300894&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "dartmouth.edu;dartmouth.edu;dartmouth.edu;dartmouth.edu;columbia.edu;dartmouth.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Dartmouth College;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.dartmouth.edu;https://www.columbia.edu", "aff_unique_abbr": "Dartmouth;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Compositional Sculpting of Iterative Generative Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70075", "id": "w79RtqIyoM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/29dd9e016b7b2f15ceb0ea93dbf1fa53-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=w79RtqIyoM", "openreview": "https://openreview.net/forum?id=w79RtqIyoM", "poster": "/media/PosterPDFs/NeurIPS%202023/70075.png?t=1701794689.2649195", "slides": "https://nips.cc/virtual/2023/poster/70075", "video": "https://nips.cc/virtual/2023/poster/70075", "author_site": "Timur Garipov, Sebastiaan De Peuter, Ge Yang, Vikas Garg, Samuel Kaski, Tommi Jaakkola", "tldr": "", "abstract": "High training costs of generative models and the need to fine-tune them for specific tasks have created a strong interest in model reuse and composition.\nA key challenge in composing iterative generative processes, such as GFlowNets and diffusion models, is that to realize the desired target distribution, all steps of the generative process need to be coordinated, and satisfy delicate balance conditions.\nIn this work, we propose Compositional Sculpting: a general approach for defining compositions of iterative generative processes. We then introduce a method for sampling from these compositions built on classifier guidance.\nWe showcase ways to accomplish compositional sculpting in both GFlowNets and diffusion models. We highlight two binary operations $\\\\unicode{x2014}$ the $\\\\textit{harmonic mean}\\\\unicode{x00A0}(p_1 \\\\otimes p_2$) and the $\\\\textit{contrast}\\\\unicode{x00A0}(p_1 \\\\,\\\\unicode{x25D1}\\\\,\\\\, p_2$) between pairs, and the generalization of these operations to multiple component distributions.\nWe offer empirical results on image and molecular generation tasks. Project codebase: https://github.com/timgaripov/compositional-sculpting.", "keywords": "generative model composition;GFlowNets;diffusion models;classifier guidance;probabilistic methods", "primary_area": "", "supplementary_material": "", "author": "Timur Garipov;Sebastiaan De Peuter;Ge Yang;Vikas Garg;Samuel Kaski;Tommi S. Jaakkola", "authorids": "~Timur_Garipov1;~Sebastiaan_De_Peuter1;~Ge_Yang1;~Vikas_Garg2;~Samuel_Kaski1;~Tommi_S._Jaakkola1", "gender": "M;M;M;;M;", "homepage": "https://timgaripov.github.io/;;http://www.episodeyang.com;;https://people.aalto.fi/samuel.kaski;", "dblp": "190/7045;298/1740;48/4561-3;;64/5826;", "google_scholar": "gWQzBQMAAAAJ;xxp2We8AAAAJ;vaQcF6kAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": ";0000-0002-0684-0110;0000-0001-7520-7055;;0000-0003-1925-9154;", "linkedin": "timur-garipov-5a133a24b/;sebastiaan-de-peuter/;;;samuel-kaski-27790/;", "or_profile": "~Timur_Garipov1;~Sebastiaan_De_Peuter1;~Ge_Yang1;~Vikas_Garg2;~Samuel_Kaski1;~Tommi_S._Jaakkola1", "aff": "Massachusetts Institute of Technology;Aalto University;Massachusetts Institute of Technology;;Aalto University;", "aff_domain": "mit.edu;aalto.fi;mit.edu;;aalto.fi;", "position": "PhD student;PhD student;Postdoc;;Full Professor;", "bibtex": "@inproceedings{\ngaripov2023compositional,\ntitle={Compositional Sculpting of Iterative Generative Processes},\nauthor={Timur Garipov and Sebastiaan De Peuter and Ge Yang and Vikas Garg and Samuel Kaski and Tommi S. Jaakkola},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=w79RtqIyoM}\n}", "github": "", "project": "", "reviewers": "m7vZ;Ean9;eEwT;UsMn;VKR5;ZgyF", "pdf_size": 17599080, "rating": "6;6;6;6;7;7", "confidence": "1;5;3;4;2;4", "soundness": "3;4;2;3;3;3", "novelty": "3;3;2;3;3;4", "presentation": "2;4;3;3;3;3", "wc_summary": "58;95;191;64;104;129", "wc_strengths": "19;101;139;67;51;46", "wc_weaknesses": "101;381;181;43;34;32", "wc_questions": "12;4;157;399;85;82", "wc_limitations": "17;1;36;11;24;120", "wc_review": "207;582;704;584;298;409", "wc_reply_reviewers": "9;66;193;122;43;14", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.1666666666666665, 1.3437096247164249 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 106.83333333333333, 44.61470858609548 ], "wc_strengths_avg": [ 70.5, 39.301187429050195 ], "wc_weaknesses_avg": [ 128.66666666666666, 124.37934805353429 ], "wc_questions_avg": [ 123.16666666666667, 133.5033291303587 ], "wc_limitations_avg": [ 34.833333333333336, 39.5871050834598 ], "wc_review_avg": [ 464.0, 174.42954642682147 ], "wc_reply_reviewers_avg": [ 74.5, 64.95318827175974 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.08770580193070289, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10611498052627126063&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "mit.edu;aalto.fi;mit.edu;;aalto.fi;", "author_num": 6, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Aalto University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.aalto.fi", "aff_unique_abbr": "MIT;Aalto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United States;Finland" }, { "title": "InfoCD: A Contrastive Chamfer Distance Loss for Point Cloud Completion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70074", "id": "w7LxAZfDfv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f2ea1943896474b7cd9796b93e526f6f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=w7LxAZfDfv", "openreview": "https://openreview.net/forum?id=w7LxAZfDfv", "poster": "/media/PosterPDFs/NeurIPS%202023/70074.png?t=1702532105.009966", "slides": "https://nips.cc/virtual/2023/poster/70074", "video": "https://nips.cc/virtual/2023/poster/70074", "author_site": "Fangzhou Lin, Yun Yue, Ziming Zhang, Songlin Hou, Kazunori Yamada, Vijaya Kolachalama, Venkatesh Saligrama", "tldr": "", "abstract": "A point cloud is a discrete set of data points sampled from a 3D geometric surface. Chamfer distance (CD) is a popular metric and training loss to measure the distances between point clouds, but also well known to be sensitive to outliers. To address this issue, in this paper we propose InfoCD, a novel contrastive Chamfer distance loss to learn to spread the matched points for better distribution alignments between point clouds as well as accounting for a surface similarity estimator. We show that minimizing InfoCD is equivalent to maximizing a lower bound of the mutual information between the underlying geometric surfaces represented by the point clouds, leading to a regularized CD metric which is robust and computationally efficient for deep learning. We conduct comprehensive experiments for point cloud completion using InfoCD and observe significant improvements consistently over all the popular baseline networks trained with CD-based losses, leading to new state-of-the-art results on several benchmark datasets. Demo code is available at https://github.com/Zhang-VISLab/NeurIPS2023-InfoCD.", "keywords": "Contrastive learning; Point cloud completion", "primary_area": "", "supplementary_material": "/attachment/55e4931e80849f465e34c827a0eac4a99d2822be.zip", "author": "Fangzhou Lin;Yun Yue;Ziming Zhang;Songlin Hou;Kazunori Yamada;Vijaya B Kolachalama;Venkatesh Saligrama", "authorids": "~Fangzhou_Lin1;~Yun_Yue1;~Ziming_Zhang4;~Songlin_Hou1;~Kazunori_Yamada1;~Vijaya_B_Kolachalama1;~Venkatesh_Saligrama1", "gender": "M;;M;M;M;;M", "homepage": ";https://yunyuny.github.io/;;https://yamada-kd.com/;https://vkola-lab.github.io;https://venkatesh-saligrama.github.io/;https://zimingzhang.wordpress.com/", "dblp": "30/8685;;267/6618.html;;;67/4721;", "google_scholar": "https://scholar.google.co.jp/citations?user=ninTViIAAAAJ;Xuz8JrkAAAAJ;sWVl1TgAAAAJ;;https://scholar.google.com/citations?hl=en;S4z3uzMAAAAJ;2yqx3oIAAAAJ", "orcid": "0000-0002-1749-3599;;;;;0000-0002-0675-2268;", "linkedin": ";;songlin-hou/;;;venkatesh-saligrama-91175a16/;", "or_profile": "~Fangzhou_Lin1;~Yun_Yue1;~Songlin_Hou1;~Kazunori_Yamada1;~Vijaya_B_Kolachalama1;~Venkatesh_Saligrama1;~Ziming_Zhang1", "aff": "Worcester Polytechnic Institute;Worcester Polytechnic Institute;Georgia Institute of Technology;Tohoku University;Boston University, Boston University;Boston University;Worcester Polytechnic Institute", "aff_domain": "wpi.edu;wpi.edu;gatech.edu;tohoku.ac.jp;bu.edu;bu.edu;wpi.edu", "position": "Researcher;PhD student;MS student;Full Professor;Associate Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nlin2023infocd,\ntitle={Info{CD}: A Contrastive Chamfer Distance Loss for Point Cloud Completion},\nauthor={Fangzhou Lin and Yun Yue and Ziming Zhang and Songlin Hou and Kazunori Yamada and Vijaya B Kolachalama and Venkatesh Saligrama},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=w7LxAZfDfv}\n}", "github": "", "project": "", "reviewers": "SCvJ;mFMW;Lt2U;hcgP;eAft", "pdf_size": 1066827, "rating": "5;5;5;6;9", "confidence": "4;5;4;4;5", "soundness": "2;3;3;3;4", "novelty": "2;3;2;3;4", "presentation": "2;3;3;3;4", "wc_summary": "34;50;96;65;65", "wc_strengths": "22;76;87;19;165", "wc_weaknesses": "26;269;184;84;109", "wc_questions": "17;2;16;1;3", "wc_limitations": "1;6;23;7;10", "wc_review": "100;403;406;176;352", "wc_reply_reviewers": "21;143;482;404;14", "wc_reply_authors": "62;558;41;899;6", "reply_reviewers": "1;2;1;5;1", "reply_authors": "3;3;2;7;2", "rating_avg": [ 6.0, 1.5491933384829668 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 62.0, 20.503658210182884 ], "wc_strengths_avg": [ 73.8, 53.25936537361293 ], "wc_weaknesses_avg": [ 134.4, 84.27716179369119 ], "wc_questions_avg": [ 7.8, 7.138627319029898 ], "wc_limitations_avg": [ 9.4, 7.391887445030531 ], "wc_review_avg": [ 287.4, 125.80238471507606 ], "wc_reply_reviewers_avg": [ 212.8, 195.0419442068808 ], "wc_reply_authors_avg": [ 313.2, 356.2737150001386 ], "reply_reviewers_avg": [ 2.0, 1.5491933384829668 ], "reply_authors_avg": [ 3.4, 1.8547236990991407 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.52704627669473, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3861841488897344146&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "wpi.edu;wpi.edu;gatech.edu;tohoku.ac.jp;bu.edu;bu.edu;wpi.edu", "author_num": 7, "aff_unique_index": "0;0;1;2;3;3;0", "aff_unique_norm": "Worcester Polytechnic Institute;Georgia Institute of Technology;Tohoku University;Boston University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.wpi.edu;https://www.gatech.edu;https://www.tohoku.ac.jp;https://www.bu.edu", "aff_unique_abbr": "WPI;Georgia Tech;Tohoku U;BU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;Japan" }, { "title": "Interpretable Reward Redistribution in Reinforcement Learning: A Causal Approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70073", "id": "w7TyuWhGZP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/402e12102d6ec3ea3df40ce1b23d423a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=w7TyuWhGZP", "openreview": "https://openreview.net/forum?id=w7TyuWhGZP", "poster": "/media/PosterPDFs/NeurIPS%202023/70073.png?t=1699960482.8781693", "slides": "https://nips.cc/virtual/2023/poster/70073", "video": "https://nips.cc/virtual/2023/poster/70073", "author_site": "Yudi Zhang, Yali Du, Biwei Huang, Ziyan Wang, Jun Wang, Meng Fang, Mykola Pechenizkiy", "tldr": "", "abstract": "A major challenge in reinforcement learning is to determine which state-action pairs are responsible for future rewards that are delayed. Reward redistribution serves as a solution to re-assign credits for each time step from observed sequences. While the majority of current approaches construct the reward redistribution in an uninterpretable manner, we propose to explicitly model the contributions of state and action from a causal perspective, resulting in an interpretable reward redistribution and preserving policy invariance. In this paper, we start by studying the role of causal generative models in reward redistribution by characterizing the generation of Markovian rewards and trajectory-wise long-term return and further propose a framework, called Generative Return Decomposition (GRD), for policy optimization in delayed reward scenarios. Specifically, GRD first identifies the unobservable Markovian rewards and causal relations in the generative process. Then, GRD makes use of the identified causal generative model to form a compact representation to train policy over the most favorable subspace of the state space of the agent. Theoretically, we show that the unobservable Markovian reward function is identifiable, as well as the underlying causal structure and causal models. Experimental results show that our method outperforms state-of-the-art methods and the provided visualization further demonstrates the interpretability of our method.\nThe project page is located at [https://reedzyd.github.io/GenerativeReturnDecomposition/](https://reedzyd.github.io/GenerativeReturnDecomposition/).", "keywords": "Reinforcement learning;sparse reward;return decomposition;causal modeling", "primary_area": "", "supplementary_material": "/attachment/79dec78f14982b3d4dbdd80fc7fcc6a8af90e287.pdf", "author": "Yudi Zhang;Yali Du;Biwei Huang;Ziyan Wang;Jun Wang;Meng Fang;Mykola Pechenizkiy", "authorids": "~Yudi_Zhang3;~Yali_Du1;~Biwei_Huang1;~Ziyan_Wang3;~Jun_Wang2;~Meng_Fang1;~Mykola_Pechenizkiy1", "gender": "F;;F;M;M;M;M", "homepage": "https://github.com/ReedZyd;;;https://ziyan-wang98.github.io/;http://www0.cs.ucl.ac.uk/staff/jun.wang/;;http://www.win.tue.nl/~mpechen/", "dblp": "344/3890;;165/3288;;w/JunWang12;67/463;37/4649", "google_scholar": "https://scholar.google.com/citations?hl=en;;;1Yu8JFIAAAAJ;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ;IcNYP1oAAAAJ;https://scholar.google.com.tw/citations?user=F0uFT_kAAAAJ", "orcid": ";;;;;;0000-0003-4955-0743", "linkedin": ";;;;;;mpechen/", "or_profile": "~Yudi_Zhang3;~Yali_Du1;~Biwei_Huang1;~Ziyan_Wang3;~Jun_Wang2;~Meng_Fang1;~Mykola_Pechenizkiy1", "aff": "Eindhoven University of Technology;;University of California, San Diego;King's College London;University College London;Eindhoven University of Technology;Eindhoven University of Technology", "aff_domain": "tue.nl;;ucsd.edu;kcl.ac.uk;ucl.ac.uk;tue.nl;tue.nl", "position": "PhD student;;Assistant Professor;PhD student;Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023interpretable,\ntitle={Interpretable Reward Redistribution in Reinforcement Learning: A Causal Approach},\nauthor={Yudi Zhang and Yali Du and Biwei Huang and Ziyan Wang and Jun Wang and Meng Fang and Mykola Pechenizkiy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=w7TyuWhGZP}\n}", "github": "", "project": "", "reviewers": "ynzt;24n7;EmbG;Xv6U;6iFi", "pdf_size": 5777967, "rating": "4;5;5;5;7", "confidence": "2;3;4;1;5", "soundness": "2;2;3;2;3", "novelty": "2;2;3;2;4", "presentation": "2;2;3;2;2", "wc_summary": "85;29;153;139;214", "wc_strengths": "5;21;84;75;68", "wc_weaknesses": "152;12;258;27;86", "wc_questions": "10;160;2;27;33", "wc_limitations": "1;1;2;27;6", "wc_review": "253;223;499;295;407", "wc_reply_reviewers": "0;39;25;0;13", "wc_reply_authors": "20;21;79;0;13", "reply_reviewers": "0;1;1;0;1", "reply_authors": "2;2;3;1;2", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 3.0, 1.4142135623730951 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 124.0, 62.78853398511547 ], "wc_strengths_avg": [ 50.6, 31.525227992831393 ], "wc_weaknesses_avg": [ 107.0, 90.21308109138053 ], "wc_questions_avg": [ 46.4, 57.89162288276258 ], "wc_limitations_avg": [ 7.4, 9.97196068985433 ], "wc_review_avg": [ 335.4, 102.90500473737902 ], "wc_reply_reviewers_avg": [ 15.4, 15.02797391533536 ], "wc_reply_authors_avg": [ 26.6, 27.251421981247145 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7216878364870322, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7557970246701830968&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "email": "tue.nl;;ucsd.edu;kcl.ac.uk;ucl.ac.uk;tue.nl;tue.nl", "author_num": 7, "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "Eindhoven University of Technology;University of California, San Diego;King's College London;University College London", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tue.nl;https://www.ucsd.edu;https://www.kcl.ac.uk;https://www.ucl.ac.uk", "aff_unique_abbr": "TU/e;UCSD;KCL;UCL", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;1;2;2;0;0", "aff_country_unique": "Netherlands;United States;United Kingdom" }, { "id": "w91JqNQLwy", "title": "A Fast and Provable Algorithm for Sparse Phase Retrieval", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study the sparse phase retrieval problem, which aims to recover a sparse signal from a limited number of phaseless measurements. Existing algorithms for sparse phase retrieval primarily rely on first-order methods with linear convergence rate. In this paper, we propose an efficient second-order algorithm based on Newton projection, which maintains the same per-iteration computational complexity as popular first-order methods. The proposed algorithm is theoretically guaranteed to converge to the ground truth (up to a global sign) at a quadratic convergence rate after at most $\\mathcal{O}\\big(\\log (\\Vert\\boldsymbol{x}^{\\natural} \\Vert /x_{\\min}^{\\natural})\\big)$ iterations, provided a sample complexity of $\\mathcal{O}(s^2\\log n)$, where $\\boldsymbol{x}^{\\natural} \\in \\mathbb{R}^n$ represents an $s$-sparse ground truth signal. Numerical experiments demonstrate that our algorithm not only outperforms state-of-the-art methods in terms of achieving a significantly faster convergence rate, but also excels in attaining a higher success rate for exact signal recovery from noise-free measurements and providing enhanced signal reconstruction in noisy scenarios.", "keywords": "Sparse Phase Retrieval;Nonconvex Optimization;Quadratic Convergence", "primary_area": "", "supplementary_material": "/attachment/17384eb6b456e6b311787e213228e4d711b67ca1.zip", "author": "Jian-Feng CAI;Yu Long;Ruixue WEN;Jiaxi Ying", "authorids": "~Jian-Feng_CAI1;~Yu_Long2;~Ruixue_WEN1;~Jiaxi_Ying1", "gender": "M;;F;M", "homepage": "https://www.math.ust.hk/~jfcai/;;http://wrx.homepage;https://jxying.github.io/", "dblp": ";;;179/2448", "google_scholar": "Mo4v5iwAAAAJ;;;_IzItlcAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Jian-Feng_CAI1;~Yu_Long2;~Ruixue_WEN1;~Jiaxi_Ying1", "aff": "Hong Kong University of Science and Technology;;;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;;;ust.hk", "position": "Full Professor;;;Postdoc", "bibtex": "@misc{\ncai2023a,\ntitle={A Fast and Provable Algorithm for Sparse Phase Retrieval},\nauthor={Jian-Feng CAI and Yu Long and Ruixue WEN and Jiaxi Ying},\nyear={2023},\nurl={https://openreview.net/forum?id=w91JqNQLwy}\n}", "github": "", "project": "", "reviewers": "kapg;Sczg;CNCT;K7tc", "site": "https://openreview.net/forum?id=w91JqNQLwy", "pdf_size": 590387, "rating": "3;5;7;8", "confidence": "5;4;3;4", "soundness": "3;2;3;4", "novelty": "2;2;3;4", "presentation": "3;3;3;4", "wc_summary": "67;104;52;63", "wc_strengths": "76;16;62;19", "wc_weaknesses": "146;229;45;17", "wc_questions": "31;45;47;45", "wc_limitations": "1;1;88;8", "wc_review": "321;395;294;152", "wc_reply_reviewers": "45;8;32;73", "wc_reply_authors": "347;44;95;62", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.75, 1.920286436967152 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.5, 19.551214796017153 ], "wc_strengths_avg": [ 43.25, 26.242856170775315 ], "wc_weaknesses_avg": [ 109.25, 84.1557336133433 ], "wc_questions_avg": [ 42.0, 6.4031242374328485 ], "wc_limitations_avg": [ 24.5, 36.77295201639379 ], "wc_review_avg": [ 290.5, 88.0979568435046 ], "wc_reply_reviewers_avg": [ 39.5, 23.4574082114798 ], "wc_reply_authors_avg": [ 137.0, 122.61525190611485 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7364596943186587, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12132307416779700585&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Ambient Diffusion: Learning Clean Distributions from Corrupted Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70072", "id": "wBJBLy9kBY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/012af729c5d14d279581fc8a5db975a1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wBJBLy9kBY", "openreview": "https://openreview.net/forum?id=wBJBLy9kBY", "poster": "/media/PosterPDFs/NeurIPS%202023/70072.png?t=1702322741.4810457", "slides": "https://nips.cc/virtual/2023/poster/70072", "video": "https://nips.cc/virtual/2023/poster/70072", "author_site": "Giannis Daras, Kulin Shah, Yuval Dagan, Aravind Gollakota, Alex Dimakis, Adam Klivans", "tldr": "", "abstract": "We present the first diffusion-based framework that can learn an unknown distribution using only highly-corrupted samples. This problem arises in scientific applications where access to uncorrupted samples is impossible or expensive to acquire. Another benefit of our approach is the ability to train generative models that are less likely to memorize any individual training sample, since they never observe clean training data. \n\nOur main idea is to introduce additional measurement distortion during the diffusion process and require the model to predict the original corrupted image from the further corrupted image. We prove that our method leads to models that learn the conditional expectation of the full uncorrupted image given this additional measurement corruption. This holds for any corruption process that satisfies some technical conditions (and in particular includes inpainting and compressed sensing). We train models on standard benchmarks (CelebA, CIFAR-10 and AFHQ) and show that we can learn the distribution even when all the training samples have 90\\% of their pixels missing. We also show that we can finetune foundation models on small corrupted datasets (e.g. MRI scans with block corruptions) and learn the clean distribution without memorizing the training set.", "keywords": "corrupted data;generative models;ambient gan;inverse problems;learning from measurements", "primary_area": "", "supplementary_material": "/attachment/bf2e33fd453d2ccd313c9edeb54583b0487241ba.zip", "author": "Giannis Daras;Kulin Shah;Yuval Dagan;Aravind Gollakota;Alex Dimakis;Adam Klivans", "authorids": "~Giannis_Daras1;~Kulin_Shah1;~Yuval_Dagan1;~Aravind_Gollakota1;~Alex_Dimakis1;~Adam_Klivans1", "gender": "M;M;M;M;M;M", "homepage": "https://giannisdaras.github.io/;https://kulinshah98.github.io/;https://yuvaldagan.wordpress.com/;https://aravind-pg.github.io;https://people.eecs.berkeley.edu/~alexdimakis/;http://www.cs.utexas.edu/~klivans", "dblp": "254/2703;215/3581;190/7292;264/1576;19/5000.html;k/AdamRKlivans", "google_scholar": "LaScvbQAAAAJ;https://scholar.google.co.in/citations?user=67OmLg4AAAAJ;;;JSFmVQEAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;alex-dimakis-b1b20320/;", "or_profile": "~Giannis_Daras1;~Kulin_Shah1;~Yuval_Dagan1;~Aravind_Gollakota1;~Alex_Dimakis1;~Adam_Klivans1", "aff": "University of Texas, Austin;University of Texas, Austin;Massachusetts Institute of Technology;University of Texas, Austin;University of Texas at Austin;University of Texas, Austin", "aff_domain": "utexas.edu;cs.utexas.edu;mit.edu;utexas.edu;utexas.edu;cs.utexas.edu", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor;Professor", "bibtex": "@inproceedings{\ndaras2023ambient,\ntitle={Ambient Diffusion: Learning Clean Distributions from Corrupted Data},\nauthor={Giannis Daras and Kulin Shah and Yuval Dagan and Aravind Gollakota and Alex Dimakis and Adam Klivans},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wBJBLy9kBY}\n}", "github": "", "project": "", "reviewers": "q1F8;qLWX;GQa2;Tq9h", "pdf_size": 9549818, "rating": "6;6;6;7", "confidence": "4;4;4;2", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "73;125;49;55", "wc_strengths": "60;115;72;105", "wc_weaknesses": "164;733;299;45", "wc_questions": "10;59;4;30", "wc_limitations": "29;46;10;29", "wc_review": "336;1078;434;264", "wc_reply_reviewers": "11;516;22;11", "wc_reply_authors": "49;732;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 75.5, 29.912372022292047 ], "wc_strengths_avg": [ 88.0, 22.68259244442751 ], "wc_weaknesses_avg": [ 310.25, 260.09169056315505 ], "wc_questions_avg": [ 25.75, 21.47527648250425 ], "wc_limitations_avg": [ 28.5, 12.737739202856996 ], "wc_review_avg": [ 528.0, 323.2243802685682 ], "wc_reply_reviewers_avg": [ 140.0, 217.13014530460757 ], "wc_reply_authors_avg": [ 195.25, 310.5377392524136 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10056157867991554912&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "utexas.edu;cs.utexas.edu;mit.edu;utexas.edu;utexas.edu;cs.utexas.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "University of Texas at Austin;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://web.mit.edu", "aff_unique_abbr": "UT Austin;MIT", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Improving Self-supervised Molecular Representation Learning using Persistent Homology", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70071", "id": "wEiUGpcr0M", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6b555e8552240d6dfe0767146c9ebf36-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wEiUGpcr0M", "openreview": "https://openreview.net/forum?id=wEiUGpcr0M", "poster": "/media/PosterPDFs/NeurIPS%202023/70071.png?t=1699881114.8547847", "slides": "https://nips.cc/virtual/2023/poster/70071", "video": "https://nips.cc/virtual/2023/poster/70071", "author_site": "Yuankai Luo, Lei Shi, Veronika Thost", "tldr": "", "abstract": "Self-supervised learning (SSL) has great potential for molecular representation learning given the complexity of molecular graphs, the large amounts of unlabelled data available, the considerable cost of obtaining labels experimentally, and the hence often only small training datasets. The importance of the topic is reflected in the variety of paradigms and architectures that have been investigated recently, most focus on designing views for contrastive learning.\nIn this paper, we study SSL based on persistent homology (PH), a mathematical tool for modeling topological features of data that persist across multiple scales. It has several unique features which particularly suit SSL, naturally offering: different views of the data, stability in terms of distance preservation, and the opportunity to flexibly incorporate domain knowledge.\nWe (1) investigate an autoencoder, which shows the general representational power of PH, and (2) propose a contrastive loss that complements existing approaches. \nWe rigorously evaluate our approach for molecular property prediction and demonstrate its particular features in improving the embedding space:\nafter SSL, the representations are better and offer considerably more predictive power than the baselines over different probing tasks; our loss \nincreases baseline performance, sometimes largely; and we often obtain substantial improvements over very small datasets, a common scenario in practice.", "keywords": "Graph Neural Networks;Molecular Representation Learning;Persistent Homology;Contrastive Learning;Self-supervised Learning", "primary_area": "", "supplementary_material": "", "author": "Yuankai Luo;Lei Shi;Veronika Thost", "authorids": "~Yuankai_Luo2;~Lei_Shi13;~Veronika_Thost1", "gender": "M;M;F", "homepage": "https://luoyk1999.github.io/;https://leishidata.com/;https://mitibmwatsonailab.mit.edu/people/veronika-thost/", "dblp": "299/6707;29/563-2;132/3874", "google_scholar": "33f_QqAAAAAJ;NmaU6U0AAAAJ;TyScgJ0AAAAJ", "orcid": "0000-0003-3844-7214;;0000-0003-4984-1532", "linkedin": ";;", "or_profile": "~Yuankai_Luo2;~Lei_Shi13;~Veronika_Thost1", "aff": "Beihang University;Beihang University;IBM Research", "aff_domain": "buaa.edu.cn;buaa.edu.cn;ibm.com", "position": "PhD student;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nluo2023improving,\ntitle={Improving Self-supervised Molecular Representation Learning using Persistent Homology},\nauthor={Yuankai Luo and Lei Shi and Veronika Thost},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wEiUGpcr0M}\n}", "github": "", "project": "", "reviewers": "U3kd;9rP5;XtrW;recC;bcsi", "pdf_size": 2051715, "rating": "3;5;6;7;7", "confidence": "4;4;4;4;2", "soundness": "3;2;3;3;3", "novelty": "1;2;3;3;3", "presentation": "2;2;4;3;3", "wc_summary": "80;74;49;54;78", "wc_strengths": "27;32;119;40;12", "wc_weaknesses": "271;225;192;7;30", "wc_questions": "77;223;46;802;54", "wc_limitations": "6;10;39;14;8", "wc_review": "461;564;445;917;182", "wc_reply_reviewers": "525;178;21;9;0", "wc_reply_authors": "1058;868;13;9;0", "reply_reviewers": "3;1;1;1;0", "reply_authors": "4;3;2;2;1", "rating_avg": [ 5.6, 1.4966629547095764 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 67.0, 12.89961239727768 ], "wc_strengths_avg": [ 46.0, 37.62446012901713 ], "wc_weaknesses_avg": [ 145.0, 106.54013328319051 ], "wc_questions_avg": [ 240.4, 288.0740182661394 ], "wc_limitations_avg": [ 15.4, 12.09297316626478 ], "wc_review_avg": [ 513.8, 237.78258977477728 ], "wc_reply_reviewers_avg": [ 146.6, 200.186513032222 ], "wc_reply_authors_avg": [ 389.6, 472.0375408799601 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4677071733467426, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10208430291509893909&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 9, "email": "buaa.edu.cn;buaa.edu.cn;ibm.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Beihang University;IBM", "aff_unique_dep": ";IBM Research", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.ibm.com/research", "aff_unique_abbr": "BUAA;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "Sharp Calibrated Gaussian Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70070", "id": "wFH5hZAwYz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7319b7561ffe5e2f6419acd4a2f52d6b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wFH5hZAwYz", "openreview": "https://openreview.net/forum?id=wFH5hZAwYz", "poster": "/media/PosterPDFs/NeurIPS%202023/70070.png?t=1702506436.971423", "slides": "https://nips.cc/virtual/2023/poster/70070", "video": "https://nips.cc/virtual/2023/poster/70070", "author_site": "Alexandre Capone, Sandra Hirche, Geoff Pleiss, Geoff Pleiss", "tldr": "", "abstract": "While Gaussian processes are a mainstay for various engineering and scientific applications, the uncertainty estimates don't satisfy frequentist guarantees and can be miscalibrated in practice. State-of-the-art approaches for designing calibrated models rely on inflating the Gaussian process posterior variance, which yields confidence intervals that are potentially too coarse. To remedy this, we present a calibration approach that generates predictive quantiles using a computation inspired by the vanilla Gaussian process posterior variance but using a different set of hyperparameters chosen to satisfy an empirical calibration constraint. This results in a calibration approach that is considerably more flexible than existing approaches, which we optimize to yield tight predictive quantiles. Our approach is shown to yield a calibrated model under reasonable assumptions. Furthermore, it outperforms existing approaches in sharpness when employed for calibrated regression.", "keywords": "Gaussian Processes;Frequentist Statistics;Kernel Methods;Model Selection and Structure Learning;Regression", "primary_area": "", "supplementary_material": "/attachment/fd4f85769c087ce59c032ebb0f1db7dbdcfde8b2.zip", "author": "Alexandre Capone;Sandra Hirche;Geoff Pleiss", "authorids": "~Alexandre_Capone1;~Sandra_Hirche1;~Geoff_Pleiss1", "gender": "M;F;M", "homepage": "https://acapone1.github.io/;http://www.itr.ei.tum.de;http://geoffpleiss.com", "dblp": "238/8124.html;89/6985;199/1693.html", "google_scholar": "VD_C8GcAAAAJ;;XO8T-Y4AAAAJ", "orcid": "0000-0002-4358-0012;;0000-0002-7009-0967", "linkedin": "alexandre-c-92b678134/;;", "or_profile": "~Alexandre_Capone1;~Sandra_Hirche1;~Geoff_Pleiss1", "aff": "Technical University Munich;Technical University Munich;Columbia University", "aff_domain": "tum.de;tum.de;columbia.edu", "position": "PhD student;Full Professor;Postdoc", "bibtex": "@inproceedings{\ncapone2023sharp,\ntitle={Sharp Calibrated Gaussian Processes},\nauthor={Alexandre Capone and Sandra Hirche and Geoff Pleiss},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wFH5hZAwYz}\n}", "github": "", "project": "", "reviewers": "k7Z4;XbYQ;XLKC;jTuD", "pdf_size": 4290215, "rating": "5;6;6;7", "confidence": "3;4;3;3", "soundness": "2;2;3;3", "novelty": "3;3;3;3", "presentation": "3;1;3;3", "wc_summary": "109;113;81;120", "wc_strengths": "555;81;16;37", "wc_weaknesses": "414;65;189;34", "wc_questions": "183;252;23;89", "wc_limitations": "2;26;20;7", "wc_review": "1263;537;329;287", "wc_reply_reviewers": "728;175;53;114", "wc_reply_authors": "887;656;127;12", "reply_reviewers": "5;2;1;1", "reply_authors": "6;3;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 105.75, 14.821858857781638 ], "wc_strengths_avg": [ 172.25, 222.22215798610182 ], "wc_weaknesses_avg": [ 175.5, 149.41301817445492 ], "wc_questions_avg": [ 136.75, 87.5224971078865 ], "wc_limitations_avg": [ 13.75, 9.65336728815391 ], "wc_review_avg": [ 604.0, 392.0726973406845 ], "wc_reply_reviewers_avg": [ 267.5, 269.3459671129308 ], "wc_reply_authors_avg": [ 420.5, 362.6627220986464 ], "reply_reviewers_avg": [ 2.25, 1.6393596310755 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7843957179510055844&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "tum.de;tum.de;columbia.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Technical University of Munich;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://www.columbia.edu", "aff_unique_abbr": "TUM;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Germany;United States" }, { "title": "Restart Sampling for Improving Generative Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70069", "id": "wFuemocyHZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f2543511e5f4d4764857f9ad833a977d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wFuemocyHZ", "openreview": "https://openreview.net/forum?id=wFuemocyHZ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70069", "video": "https://nips.cc/virtual/2023/poster/70069", "author_site": "Yilun Xu, Mingyang Deng, Xiang Cheng, Yonglong Tian, Ziming Liu, Tommi Jaakkola", "tldr": "", "abstract": "Generative processes that involve solving differential equations, such as diffusion models, frequently necessitate balancing speed and quality. ODE-based samplers are fast but plateau in performance while SDE-based samplers deliver higher sample quality at the cost of increased sampling time. We attribute this difference to sampling errors: ODE-samplers involve smaller discretization errors while stochasticity in SDE contracts accumulated errors. Based on these findings, we propose a novel sampling algorithm called \\textit{Restart} in order to better balance discretization errors and contraction. The sampling method alternates between adding substantial noise in additional forward steps and strictly following a backward ODE. Empirically, Restart sampler surpasses previous SDE and ODE samplers in both speed and accuracy. Restart not only outperforms the previous best SDE results, but also accelerates the sampling speed by 10-fold / 2-fold on CIFAR-10 / ImageNet $64{\\times} 64$. In addition, it attains significantly better sample quality than ODE samplers within comparable sampling times. Moreover, Restart better balances text-image alignment/visual quality versus diversity than previous samplers in the large-scale text-to-image Stable Diffusion model pre-trained on LAION $512{\\times} 512$. Code is available at https://github.com/Newbeeer/diffusion_restart_sampling", "keywords": "Generative models;diffusion models;PFGM;sampling", "primary_area": "", "supplementary_material": "/attachment/b1e2485595ff83189e2f3df5cecd9fc337ae681f.zip", "author": "Yilun Xu;Mingyang Deng;Xiang Cheng;Yonglong Tian;Ziming Liu;Tommi S. Jaakkola", "authorids": "~Yilun_Xu1;~Mingyang_Deng1;~Xiang_Cheng1;~Yonglong_Tian1;~Ziming_Liu2;~Tommi_S._Jaakkola1", "gender": "M;M;M;;M;", "homepage": "http://yilun-xu.com;https://lambertae.github.io/;https://sites.google.com/berkeley.edu/xiangcheng/home;http://people.csail.mit.edu/yonglong/;https://kindxiaoming.github.io/;", "dblp": ";;29/1059-6;151/6328;;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;-WJinlEAAAAJ;https://scholar.google.com.hk/citations?user=OsP7JHAAAAAJ;0b32RKAAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Yilun_Xu1;~Mingyang_Deng1;~Xiang_Cheng1;~Yonglong_Tian1;~Ziming_Liu2;~Tommi_S._Jaakkola1", "aff": "Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Google;Massachusetts Institute of Technology;", "aff_domain": "mit.edu;;mit.edu;google.com;mit.edu;", "position": "PhD student;;Postdoc;Researcher;PhD student;", "bibtex": "@inproceedings{\nxu2023restart,\ntitle={Restart Sampling for Improving Generative Processes},\nauthor={Yilun Xu and Mingyang Deng and Xiang Cheng and Yonglong Tian and Ziming Liu and Tommi S. Jaakkola},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wFuemocyHZ}\n}", "github": "", "project": "", "reviewers": "kog8;YGP7;csQs;HtQK;kn2f", "pdf_size": 39329895, "rating": "5;5;6;6;7", "confidence": "4;3;4;3;4", "soundness": "3;3;4;3;3", "novelty": "3;2;3;2;3", "presentation": "3;3;3;4;4", "wc_summary": "40;211;44;45;145", "wc_strengths": "18;68;43;67;80", "wc_weaknesses": "288;125;35;2;226", "wc_questions": "24;91;29;14;71", "wc_limitations": "4;1;8;1;9", "wc_review": "374;496;159;129;531", "wc_reply_reviewers": "43;37;21;0;25", "wc_reply_authors": "169;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 97.0, 69.37146387384368 ], "wc_strengths_avg": [ 55.2, 22.139557357815445 ], "wc_weaknesses_avg": [ 135.2, 109.06768540681514 ], "wc_questions_avg": [ 45.8, 29.822139426942528 ], "wc_limitations_avg": [ 4.6, 3.3823069050575527 ], "wc_review_avg": [ 337.8, 166.87168723303543 ], "wc_reply_reviewers_avg": [ 25.2, 14.891608375189028 ], "wc_reply_authors_avg": [ 33.8, 67.6 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3273268353539886, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14785643920079013085&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mit.edu;;mit.edu;google.com;mit.edu;", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://web.mit.edu;https://www.google.com", "aff_unique_abbr": "MIT;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Online Corrupted User Detection and Regret Minimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70068", "id": "wHhPIv5G8Q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/697200c9d1710c2799720b660abd11bb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wHhPIv5G8Q", "openreview": "https://openreview.net/forum?id=wHhPIv5G8Q", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70068", "video": "https://nips.cc/virtual/2023/poster/70068", "author_site": "Zhiyong Wang, Jize Xie, Tong Yu, Shuai Li, John C.S. Lui", "tldr": "", "abstract": "In real-world online web systems, multiple users usually arrive sequentially into the system. For applications like click fraud and fake reviews, some users can maliciously perform corrupted (disrupted) behaviors to trick the system. Therefore, it is crucial to design efficient online learning algorithms to robustly learn from potentially corrupted user behaviors and accurately identify the corrupted users in an online manner. Existing works propose bandit algorithms robust to adversarial corruption. However, these algorithms are designed for a single user, and cannot leverage the implicit social relations among multiple users for more efficient learning. Moreover, none of them consider how to detect corrupted users online in the multiple-user scenario. In this paper, we present an important online learning problem named LOCUD to learn and utilize unknown user relations from disrupted behaviors to speed up learning, and identify the corrupted users in an online setting. To robustly learn and utilize the unknown relations among potentially corrupted users, we propose a novel bandit algorithm RCLUB-WCU. To detect the corrupted users, we devise a novel online detection algorithm OCCUD based on RCLUB-WCU's inferred user relations. We prove a regret upper bound for RCLUB-WCU, which asymptotically matches the lower bound with respect to $T$ up to logarithmic factors, and matches the state-of-the-art results in degenerate cases. We also give a theoretical guarantee for the detection accuracy of OCCUD. With extensive experiments, our methods achieve superior performance over previous bandit algorithms and high corrupted user detection accuracy.", "keywords": "online learning;online corrupted user detection;clustering of bandits", "primary_area": "", "supplementary_material": "/attachment/7fd09f005651858e4af5c7e2023a87799a59c282.zip", "author": "Zhiyong Wang;Jize Xie;Tong Yu;Shuai Li;John C.S. Lui", "authorids": "~Zhiyong_Wang9;~Jize_Xie1;~Tong_Yu3;~Shuai_Li3;~John_C.S._Lui2", "gender": "M;M;;F;M", "homepage": "https://zhiyongwangwzy.github.io/;;https://www.linkedin.com/in/tong-yu-42790744;http://shuaili8.github.io;http://www.cse.cuhk.edu.hk/~cslui/Index.html", "dblp": ";339/2280;32/1593-1;57/2281-10;l/JohnCSLui", "google_scholar": "https://scholar.google.com/citations?hl=en;cX6B3HsAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=kMZgQxcAAAAJ;https://scholar.google.com.tw/citations?user=7LVjQ7MAAAAJ", "orcid": ";0000-0001-9702-5025;0000-0002-5991-2050;;0000-0001-7466-0384", "linkedin": "zhiyong-wang-a44aaa1a3/;;tong-yu-42790744;;", "or_profile": "~Zhiyong_Wang9;~Jize_Xie1;~Tong_Yu3;~Shuai_Li3;~John_C.S._Lui2", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;Shanghai Jiaotong University;Adobe Research;John Hopcroft Center, Shanghai Jiao Tong University;The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;sjtu.edu.cn;adobe.com;sjtu.edu.cn;cse.cuhk.edu.hk", "position": "PhD student;Undergrad student;Senior Research Scientist;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023online,\ntitle={Online Corrupted User Detection and Regret Minimization},\nauthor={Zhiyong Wang and Jize Xie and Tong Yu and Shuai Li and John C.S. Lui},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wHhPIv5G8Q}\n}", "github": "", "project": "", "reviewers": "Lisv;HbaT;GoSQ;wNgt", "pdf_size": 0, "rating": "5;6;7;7", "confidence": "2;3;2;2", "soundness": "2;3;4;3", "novelty": "2;4;4;3", "presentation": "3;3;4;3", "wc_summary": "77;192;169;106", "wc_strengths": "29;97;51;161", "wc_weaknesses": "196;207;109;273", "wc_questions": "2;42;1;171", "wc_limitations": "1;17;1;17", "wc_review": "305;555;331;728", "wc_reply_reviewers": "20;17;0;28", "wc_reply_authors": "33;56;0;31", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 136.0, 46.38426457323647 ], "wc_strengths_avg": [ 84.5, 50.52474641203061 ], "wc_weaknesses_avg": [ 196.25, 58.34970008491903 ], "wc_questions_avg": [ 54.0, 69.5449494931156 ], "wc_limitations_avg": [ 9.0, 8.0 ], "wc_review_avg": [ 479.75, 173.17242130316248 ], "wc_reply_reviewers_avg": [ 16.25, 10.207227831296802 ], "wc_reply_authors_avg": [ 30.0, 19.912307751739878 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9347227988100815793&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 9, "email": "cse.cuhk.edu.hk;sjtu.edu.cn;adobe.com;sjtu.edu.cn;cse.cuhk.edu.hk", "author_num": 5, "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Shanghai Jiao Tong University;Adobe", "aff_unique_dep": "Department of Computer Science and Engineering;;Adobe Research", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.sjtu.edu.cn;https://research.adobe.com", "aff_unique_abbr": "CUHK;SJTU;Adobe", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Hong Kong SAR;;Shanghai", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "A Single-Loop Accelerated Extra-Gradient Difference Algorithm with Improved Complexity Bounds for Constrained Minimax Optimization", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70067", "id": "wIlmx4bHrO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c242f2b7f60d8c685b6481939330e241-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wIlmx4bHrO", "openreview": "https://openreview.net/forum?id=wIlmx4bHrO", "poster": "/media/PosterPDFs/NeurIPS%202023/70067.png?t=1699606897.539563", "slides": "https://nips.cc/virtual/2023/poster/70067", "video": "https://nips.cc/virtual/2023/poster/70067", "author_site": "Yuanyuan Liu, Fanhua Shang, Weixin An, Junhao Liu, Hongying Liu, Zhouchen Lin", "tldr": "", "abstract": "In this paper, we propose a novel extra-gradient difference acceleration algorithm for solving constrained nonconvex-nonconcave (NC-NC) minimax problems. In particular, we design a new extra-gradient difference step to obtain an important quasi-cocoercivity property, which plays a key role to significantly improve the convergence rate in the constrained NC-NC setting without additional structural assumption. Then momentum acceleration is also introduced into our dual accelerating update step. Moreover, we prove that, to find an $\\epsilon$-stationary point of the function $f$, our algorithm attains the complexity $\\mathcal{O}(\\epsilon^{-2})$ in the constrained NC-NC setting, while the best-known complexity bound is $\\widetilde{\\mathcal{O}}(\\epsilon^{-4})$, where $\\widetilde{\\mathcal{O}}(\\cdot)$ hides logarithmic factors compared to $\\mathcal{O}(\\cdot)$. As the special cases of the constrained NC-NC setting, our algorithm can also obtain the same complexity $\\mathcal{O}(\\epsilon^{-2})$ for both the nonconvex-concave (NC-C) and convex-nonconcave (C-NC) cases, while the best-known complexity bounds are $\\widetilde{\\mathcal{O}}(\\epsilon^{-2.5})$ for the NC-C case and $\\widetilde{\\mathcal{O}}(\\epsilon^{-4})$ for the C-NC case. For fair comparison with existing algorithms, we also analyze the complexity bound to find $\\epsilon$-stationary point of the primal function $\\phi$ for the constrained NC-C problem, which shows that our algorithm can improve the complexity bound from $\\widetilde{\\mathcal{O}}(\\epsilon^{-3})$ to $\\mathcal{O}(\\epsilon^{-2})$. To the best of our knowledge, this is the first time that the proposed algorithm improves the best-known complexity bounds from $\\mathcal{O}(\\epsilon^{-4})$ and $\\widetilde{\\mathcal{O}}(\\epsilon^{-3})$ to $\\mathcal{O}(\\epsilon^{-2})$ in both the NC-NC and NC-C settings.", "keywords": "Constrained Minimax Optimization; nonconvex- nonconcave", "primary_area": "", "supplementary_material": "/attachment/0d93bd18befd0637b82a35fc9be68f3b9be0e37c.pdf", "author": "Yuanyuan Liu;Fanhua Shang;Weixin An;Junhao Liu;Hongying Liu;Zhouchen Lin", "authorids": "~Yuanyuan_Liu1;~Fanhua_Shang2;~Weixin_An1;~Junhao_Liu3;~Hongying_Liu2;~Zhouchen_Lin1", "gender": "Not Specified;M;;;F;M", "homepage": "https://dblp.uni-trier.de/pid/97/2119-1.html;https://sites.google.com/site/fanhua217/home;https://sites.google.com/view/lkong;;;https://zhouchenlin.github.io", "dblp": "97/2119-1;66/9057;;312/3496.html;43/8776;l/ZhouchenLin", "google_scholar": "https://scholar.google.com/scholar?q=Yuanyuan+Liu;rk_HZTkAAAAJ;;;S0pp67AAAAAJ;https://scholar.google.com.tw/citations?user=TanjFwoAAAAJ", "orcid": "0000-0001-8646-8533;0000-0002-1040-352X;;;0000-0001-5961-5569;0000-0003-1493-7569", "linkedin": ";;;;;", "or_profile": "~Yuanyuan_Liu1;~Fanhua_Shang2;~Weixin_An1;~Junhao_Liu3;~Hongying_Liu2;~Zhouchen_Lin1", "aff": "The Chinese University of Hong Kong;Tianjin University;Xidian University, Tsinghua University;Xi'an University of Electronic Science and Technology;Xidian University;Peking University", "aff_domain": "cuhk.edu.hk;tju.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;pku.edu.cn", "position": "Postdoc;Full Professor;PhD student;MS student;Associate Professor;Professor", "bibtex": "@inproceedings{\nliu2023a,\ntitle={A Single-Loop Accelerated Extra-Gradient Difference Algorithm with Improved Complexity Bounds for Constrained Minimax Optimization},\nauthor={Yuanyuan Liu and Fanhua Shang and Weixin An and Junhao Liu and Hongying Liu and Zhouchen Lin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wIlmx4bHrO}\n}", "github": "", "project": "", "reviewers": "RSU3;VxsC;Lyoc;gZnc", "pdf_size": 2625293, "rating": "6;8;9;9", "confidence": "2;4;5;3", "soundness": "3;3;4;3", "novelty": "3;3;4;4", "presentation": "3;4;4;4", "wc_summary": "95;68;44;113", "wc_strengths": "86;117;45;96", "wc_weaknesses": "104;38;3;96", "wc_questions": "3;83;30;150", "wc_limitations": "4;16;3;21", "wc_review": "292;322;125;476", "wc_reply_reviewers": "0;40;29;31", "wc_reply_authors": "17;23;23;28", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 8.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.0, 26.239283526803852 ], "wc_strengths_avg": [ 86.0, 26.182054923172092 ], "wc_weaknesses_avg": [ 60.25, 41.72753886823425 ], "wc_questions_avg": [ 66.5, 56.1449018166387 ], "wc_limitations_avg": [ 11.0, 7.713624310270756 ], "wc_review_avg": [ 303.75, 124.5920844195168 ], "wc_reply_reviewers_avg": [ 25.0, 15.016657417681207 ], "wc_reply_authors_avg": [ 22.75, 3.897114317029974 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7302967433402214, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12095052208444656348&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cuhk.edu.hk;tju.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;2;4", "aff_unique_norm": "Chinese University of Hong Kong;Tianjin University;Xidian University;Xi'an University of Electronic Science and Technology;Peking University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.cuhk.edu.hk;http://www.tju.edu.cn;http://www.xidian.edu.cn/;http://www.xidian.edu.cn/;http://www.pku.edu.cn", "aff_unique_abbr": "CUHK;TJU;Xidian;Xidian University;Peking U", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Hong Kong SAR;;Xi'an", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning a 1-layer conditional generative model in total variation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70066", "id": "wImYhdu4VF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/86b8ad667206fb9a52ae575fbf1cd6be-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wImYhdu4VF", "openreview": "https://openreview.net/forum?id=wImYhdu4VF", "poster": "/media/PosterPDFs/NeurIPS%202023/70066.png?t=1702321855.5320883", "slides": "https://nips.cc/virtual/2023/poster/70066", "video": "https://nips.cc/virtual/2023/poster/70066", "author_site": "Ajil Jalal, Justin Kang, Ananya Uppal, Kannan Ramchandran, Eric Price", "tldr": "", "abstract": "A conditional generative model is a method for sampling from a conditional distribution $p(y \\mid x)$. For example, one may want to sample an image of a cat given the label ``cat''. A feed-forward conditional generative model is a function $g(x, z)$ that takes the input $x$ and a random seed $z$, and outputs a sample $y$ from $p(y \\mid x)$. Ideally the distribution of outputs $(x, g(x, z))$ would be close in total variation to the ideal distribution $(x, y)$.\n\nGeneralization bounds for other learning models require assumptions on the distribution of $x$, even in simple settings like linear regression with Gaussian noise. We show these assumptions are unnecessary in our model, for both linear regression and single-layer ReLU networks. Given samples $(x, y)$, we show how to learn a 1-layer ReLU conditional generative model in total variation. As our result has no assumption on the distribution of inputs $x$, if we are given access to the internal activations of a deep generative model, we can compose our 1-layer guarantee to progressively learn the deep model using a near-linear number of samples.", "keywords": "Generative models;distribution learning;maximum likelihood estimation", "primary_area": "", "supplementary_material": "/attachment/c4df75b9049ee2e8cfa82a555a8695869cf2412a.pdf", "author": "Ajil Jalal;Justin Kang;Ananya Uppal;Kannan Ramchandran;Eric Price", "authorids": "~Ajil_Jalal1;justin_kang@berkeley.edu;~Ananya_Uppal1;~Kannan_Ramchandran1;~Eric_Price1", "gender": "M;;F;M;", "homepage": ";;https://ananyauppal.github.io/;https://www.eecs.berkeley.edu/~kannanr/;", "dblp": "173/5088;;220/5296;53/5765;", "google_scholar": "ePC7IC0AAAAJ;;kCdRr1gAAAAJ;https://scholar.google.com.tw/citations?user=DcV-5RAAAAAJ;", "orcid": ";;;0000-0002-4567-328X;", "linkedin": ";;;;", "or_profile": "~Ajil_Jalal1;justin_kang@berkeley.edu;~Ananya_Uppal1;~Kannan_Ramchandran1;~Eric_Price1", "aff": "University of California, Berkeley;;University of Texas at Austin;University of California, Berkeley;", "aff_domain": "berkeley.edu;;utexas.edu;berkeley.edu;", "position": "Postdoc;;Postdoc;Full Professor;", "bibtex": "@inproceedings{\njalal2023learning,\ntitle={Learning a 1-layer conditional generative model in total variation},\nauthor={Ajil Jalal and Justin Kang and Ananya Uppal and Kannan Ramchandran and Eric Price},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wImYhdu4VF}\n}", "github": "", "project": "", "reviewers": "zitB;77Ng;9DZK;NYne;9ZPm", "pdf_size": 1605352, "rating": "3;5;6;7;7", "confidence": "3;4;2;3;3", "soundness": "3;3;3;3;3", "novelty": "1;2;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "97;94;72;115;154", "wc_strengths": "15;100;46;107;99", "wc_weaknesses": "82;65;26;88;48", "wc_questions": "1;152;329;86;2", "wc_limitations": "1;70;1;5;1", "wc_review": "196;481;474;401;304", "wc_reply_reviewers": "81;116;0;7;38", "wc_reply_authors": "25;133;0;0;319", "reply_reviewers": "2;1;0;1;2", "reply_authors": "2;2;1;1;2", "rating_avg": [ 5.6, 1.4966629547095764 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 106.4, 27.441574298862665 ], "wc_strengths_avg": [ 73.4, 36.477938538245276 ], "wc_weaknesses_avg": [ 61.8, 22.70154179786034 ], "wc_questions_avg": [ 114.0, 121.46275149196975 ], "wc_limitations_avg": [ 15.6, 27.244081926172516 ], "wc_review_avg": [ 371.2, 108.37232118949932 ], "wc_reply_reviewers_avg": [ 48.4, 44.265562235218475 ], "wc_reply_authors_avg": [ 95.4, 122.1222338478952 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.21128856368212912, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gZCOzZtnvKoJ:scholar.google.com/&scioq=Learning+a+1-layer+conditional+generative+model+in+total+variation&hl=en&as_sdt=0,33", "gs_version_total": 6, "email": "berkeley.edu;;utexas.edu;berkeley.edu;", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Berkeley;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.utexas.edu", "aff_unique_abbr": "UC Berkeley;UT Austin", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Berkeley;Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "An Efficient and Robust Framework for Approximate Nearest Neighbor Search with Attribute Constraint", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70065", "id": "wLFXTAWa5V", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/32e41d6b0a51a63a9a90697da19d235d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wLFXTAWa5V", "openreview": "https://openreview.net/forum?id=wLFXTAWa5V", "poster": "/media/PosterPDFs/NeurIPS%202023/70065.png?t=1701400480.7901115", "slides": "https://nips.cc/virtual/2023/poster/70065", "video": "https://nips.cc/virtual/2023/poster/70065", "author_site": "Mengzhao Wang, Lingwei Lv, Xiaoliang Xu, Yuxiang Wang, Qiang Yue, Jiongkang Ni", "tldr": "", "abstract": "This paper introduces an efficient and robust framework for hybrid query (HQ) processing, which combines approximate nearest neighbor search (ANNS) with attribute constraint. HQ aims to find objects that are similar to a feature vector and match some structured attributes. Existing methods handle ANNS and attribute filtering separately, leading to inefficiency and inaccuracy. Our framework, called native hybrid query (NHQ), builds a composite index based on proximity graph (PG) and applies joint pruning for HQ. We can easily adapt existing PGs to this framework for efficient HQ processing. We also propose two new navigable PGs (NPGs) with optimized edge selection and routing, which improve the overall ANNS performance. We implement five HQ methods based on the proposed NPGs and existing PGs in NHQ, and show that they outperform the state-of-the-art methods on 10 real-world datasets (up to 315$\\times$ faster with the same accuracy).", "keywords": "approximate nearest neighbor search;attribute filtering;high-dimensional vector;proximity graph", "primary_area": "", "supplementary_material": "/attachment/75cf76118c338a65ab9780c471fa83657d6f066e.zip", "author": "Mengzhao Wang;Lingwei Lv;Xiaoliang Xu;Yuxiang Wang;Qiang Yue;Jiongkang Ni", "authorids": "~Mengzhao_Wang1;~Lingwei_Lv1;~Xiaoliang_Xu1;~Yuxiang_Wang1;~Qiang_Yue1;~Jiongkang_Ni1", "gender": ";M;M;M;M;M", "homepage": "https://mzwang.top/;https://github.com/AshenOn3;https://faculty.hdu.edu.cn/jsjxy/xxl/main.htm;https://wyxlss.github.io/;https://github.com/Hanano-Yuuki;https://lsyhprum.github.io/", "dblp": "284/3282;317/0704;48/2710;https://dblp.uni-trier.de/pid/62/1637-1;317/1375;", "google_scholar": "nS6D8QUAAAAJ;;EzDVfKoAAAAJ;https://scholar.google.com/citations?hl=en;;", "orcid": "0000-0003-3806-1012;;0000-0001-8040-6809;0000-0003-3240-2912;;", "linkedin": ";;;;;", "or_profile": "~Mengzhao_Wang1;~Lingwei_Lv1;~Xiaoliang_Xu1;~Yuxiang_Wang1;~Jiongkang_Ni1;~Yue_Qiang1", "aff": "Zhejiang University;Hangzhou Dianzi University;Hangzhou Dianzi University;Hangzhou Dianzi University;Hangzhou Dianzi University;Hangzhou Dianzi University", "aff_domain": "zju.edu.cn;hdu.edu.cn;hdu.edu.cn;hdu.edu.cn;hdu.edu.cn;hdu.edu.cn", "position": "PhD student;MS student;Full Professor;Associate Professor;MS student;PhD student", "bibtex": "@inproceedings{\nwang2023an,\ntitle={An Efficient and Robust Framework for Approximate Nearest Neighbor Search with Attribute Constraint},\nauthor={Mengzhao Wang and Lingwei Lv and Xiaoliang Xu and Yuxiang Wang and Qiang Yue and Jiongkang Ni},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wLFXTAWa5V}\n}", "github": "", "project": "", "reviewers": "XZ58;MN6o;ffbB;7xuq", "pdf_size": 1377533, "rating": "4;6;7;8", "confidence": "3;3;5;3", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;2;4", "wc_summary": "164;67;144;87", "wc_strengths": "15;64;65;93", "wc_weaknesses": "149;66;324;53", "wc_questions": "93;47;133;79", "wc_limitations": "1;5;2;21", "wc_review": "422;249;668;333", "wc_reply_reviewers": "0;0;105;68", "wc_reply_authors": "0;0;44;659", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 115.5, 39.777506206397604 ], "wc_strengths_avg": [ 59.25, 28.07467720206236 ], "wc_weaknesses_avg": [ 148.0, 108.08098815240356 ], "wc_questions_avg": [ 88.0, 30.870698080866262 ], "wc_limitations_avg": [ 7.25, 8.073877630977572 ], "wc_review_avg": [ 418.0, 156.76574881012752 ], "wc_reply_reviewers_avg": [ 43.25, 45.18503623988809 ], "wc_reply_authors_avg": [ 175.75, 279.5821659190729 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.29277002188455997, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2513430704929468170&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;hdu.edu.cn;hdu.edu.cn;hdu.edu.cn;hdu.edu.cn;hdu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Zhejiang University;Hangzhou Dianzi University", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;http://www.hdu.edu.cn/", "aff_unique_abbr": "ZJU;HGHDU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Calibrating Neural Simulation-Based Inference with Differentiable Coverage Probability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70064", "id": "wLiMhVJ7fx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/03a9a9c1e15850439653bb971a4ad4b3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wLiMhVJ7fx", "openreview": "https://openreview.net/forum?id=wLiMhVJ7fx", "poster": "/media/PosterPDFs/NeurIPS%202023/70064.png?t=1699614726.017287", "slides": "https://nips.cc/virtual/2023/poster/70064", "video": "https://nips.cc/virtual/2023/poster/70064", "author_site": "Maciej Falkiewicz, Naoya Takeishi, Imahn Shekhzadeh, Antoine Wehenkel, Arnaud Delaunoy, Gilles Louppe, Alexandros Kalousis", "tldr": "", "abstract": "Bayesian inference allows expressing the uncertainty of posterior belief under a probabilistic model given prior information and the likelihood of the evidence. Predominantly, the likelihood function is only implicitly established by a simulator posing the need for simulation-based inference (SBI). However, the existing algorithms can yield overconfident posteriors (Hermans *et al.*, 2022) defeating the whole purpose of credibility if the uncertainty quantification is inaccurate. We propose to include a calibration term directly into the training objective of the neural model in selected amortized SBI techniques. By introducing a relaxation of the classical formulation of calibration error we enable end-to-end backpropagation. The proposed method is not tied to any particular neural model and brings moderate computational overhead compared to the profits it introduces. It is directly applicable to existing computational pipelines allowing reliable black-box posterior inference. We empirically show on six benchmark problems that the proposed method achieves competitive or better results in terms of coverage and expected posterior density than the previously existing approaches.", "keywords": "simulation-based inference;inverse problem;bayesian inference;uncertainty quantification;generative modeling", "primary_area": "", "supplementary_material": "/attachment/d595883a39d5dfece9b1cf9cdbbb9d744bee5af0.zip", "author": "Maciej Falkiewicz;Naoya Takeishi;Imahn Shekhzadeh;Antoine Wehenkel;Arnaud Delaunoy;Gilles Louppe;Alexandros Kalousis", "authorids": "~Maciej_Falkiewicz1;~Naoya_Takeishi1;imahn.shekhzadeh@hesge.ch;~Antoine_Wehenkel1;~Arnaud_Delaunoy1;~Gilles_Louppe1;~Alexandros_Kalousis1", "gender": ";;;M;M;M;M", "homepage": ";https://ntake.jp/;;https://awehenkel.github.io/;;http://glouppe.github.io;http://dmml.ch/alexandros-kalousis/", "dblp": "225/0679;143/0393;;199/9454;277/5958;05/9382;68/6004", "google_scholar": "https://scholar.google.ch/citations?user=08jtE7MAAAAJ;https://scholar.google.co.jp/citations?user=rqF9bAsAAAAJ;;https://scholar.google.be/citations?user=LFz-kl0ZkNkC;f5bhfbEAAAAJ;F_77d4QAAAAJ;uVkn9UEAAAAJ", "orcid": ";0000-0003-0111-2269;;0000-0001-5022-3999;;0000-0002-2082-3106;", "linkedin": ";;;antoine-wehenkel-abb34498/;arnaud-delaunoy-a2439818a/;;", "or_profile": "~Maciej_Falkiewicz1;~Naoya_Takeishi1;imahn.shekhzadeh@hesge.ch;~Antoine_Wehenkel1;~Arnaud_Delaunoy1;~Gilles_Louppe1;~Alexandros_Kalousis1", "aff": "University of Geneva;HES-SO;;Apple;Universit\u00e9 de Li\u00e8ge;University of Li\u00e8ge;University of Applied Sciences Western Switzerland", "aff_domain": "unige.ch;hesge.ch;;apple.com;ulg.ac.be;uliege.be;hesge.ch", "position": "PhD student;Postdoc;;Postdoc;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nfalkiewicz2023calibrating,\ntitle={Calibrating Neural Simulation-Based Inference with Differentiable Coverage Probability},\nauthor={Maciej Falkiewicz and Naoya Takeishi and Imahn Shekhzadeh and Antoine Wehenkel and Arnaud Delaunoy and Gilles Louppe and Alexandros Kalousis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wLiMhVJ7fx}\n}", "github": "", "project": "", "reviewers": "JSnb;KAKy;Lpwd;T8qr", "pdf_size": 880848, "rating": "6;7;7;7", "confidence": "4;3;4;3", "soundness": "4;3;3;3", "novelty": "3;3;3;3", "presentation": "2;4;4;3", "wc_summary": "62;40;122;202", "wc_strengths": "58;82;78;51", "wc_weaknesses": "288;144;111;63", "wc_questions": "46;85;238;13", "wc_limitations": "30;13;45;19", "wc_review": "484;364;594;348", "wc_reply_reviewers": "88;9;255;20", "wc_reply_authors": "0;0;261;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 106.5, 62.77539326838184 ], "wc_strengths_avg": [ 67.25, 13.06474263045392 ], "wc_weaknesses_avg": [ 151.5, 83.90619762568198 ], "wc_questions_avg": [ 95.5, 86.12926331973355 ], "wc_limitations_avg": [ 26.75, 12.173228823939851 ], "wc_review_avg": [ 447.5, 99.5828800547564 ], "wc_reply_reviewers_avg": [ 93.0, 98.30310269772771 ], "wc_reply_authors_avg": [ 65.25, 113.01631519386925 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12501800284562169384&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "unige.ch;hesge.ch;;apple.com;ulg.ac.be;uliege.be;hesge.ch", "author_num": 7, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "University of Geneva;Haute Ecole Sp\u00e9cialis\u00e9e de Suisse Occidentale;Apple;Universit\u00e9 de Li\u00e8ge;University of Li\u00e8ge;University of Applied Sciences Western Switzerland", "aff_unique_dep": ";;Apple Inc.;;;", "aff_unique_url": "https://www.unige.ch;https://www.hes-so.ch;https://www.apple.com;https://www.ulg.ac.be;https://www.ulg.ac.be;https://www.hes-so.ch/en", "aff_unique_abbr": "UNIGE;HES-SO;Apple;ULi\u00e8ge;ULi\u00e8ge;HES-SO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;2;0", "aff_country_unique": "Switzerland;United States;Belgium" }, { "title": "A Closer Look at the Robustness of Contrastive Language-Image Pre-Training (CLIP)", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70063", "id": "wMNpMe0vp3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2c6be9f09e08ca166cdc0aa26306c61f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wMNpMe0vp3", "openreview": "https://openreview.net/forum?id=wMNpMe0vp3", "poster": "/media/PosterPDFs/NeurIPS%202023/70063.png?t=1702384517.1056657", "slides": "https://nips.cc/virtual/2023/poster/70063", "video": "https://nips.cc/virtual/2023/poster/70063", "author_site": "Weijie Tu, Weijian Deng, Tom Gedeon", "tldr": "", "abstract": "Contrastive Language-Image Pre-training (CLIP) models have demonstrated remarkable generalization capabilities across multiple challenging distribution shifts. However, there is still much to be explored in terms of their robustness to the variations of specific visual factors. In real-world applications, reliable and safe systems must consider other safety measures beyond classification accuracy, such as predictive uncertainty. Yet, the effectiveness of CLIP models on such safety-related objectives is less-explored. Driven by the above, this work comprehensively investigates the safety measures of CLIP models, specifically focusing on three key properties: resilience to visual factor variations, calibrated uncertainty estimations, and the ability to detect anomalous inputs. To this end, we study $83$ CLIP models and $127$ ImageNet classifiers. They are diverse in architecture (pre)training distribution and training strategies. We consider $10$ visual factors (\\emph{e.g.}, shape and pattern), $5$ types of out-of-distribution data, and $8$ natural and challenging test conditions with different shift types, such as texture, style, and perturbation shifts. Our study has unveiled several previously unknown insights into CLIP models. For instance, they are not consistently more calibrated than other ImageNet models, which contradicts existing findings. Additionally, our analysis underscores the significance of training source design by showcasing its profound influence on the three key properties. We believe our comprehensive study can shed light on and help guide the development of more robust and reliable CLIP models.", "keywords": "CLIP", "primary_area": "", "supplementary_material": "/attachment/0d5f2d813d99d4303d2deeb8bd5ea27b89d9ac33.pdf", "author": "Weijie Tu;Weijian Deng;Tom Gedeon", "authorids": "~Weijie_Tu1;~Weijian_Deng1;~Tom_Gedeon1", "gender": "M;M;M", "homepage": ";http://weijiandeng.xyz;https://cs.anu.edu.au/people/Tom.Gedeon/", "dblp": "344/1001;198/1517;g/TamasDGedeon.html", "google_scholar": ";https://scholar.google.com.hk/citations?user=lReHnAEAAAAJ;https://scholar.google.com.tw/citations?user=lPTjWIkAAAAJ", "orcid": ";;0000-0001-8356-4909", "linkedin": "weijie-tu;;tom-gedeon", "or_profile": "~Weijie_Tu1;~Weijian_Deng1;~Tom_Gedeon1", "aff": "Australian National University;Australian National University;Curtin University of Technology", "aff_domain": "anu.edu.au;anu.edu.au;curtin.edu.au", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ntu2023a,\ntitle={A Closer Look at the Robustness of Contrastive Language-Image Pre-Training ({CLIP})},\nauthor={Weijie Tu and Weijian Deng and Tom Gedeon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wMNpMe0vp3}\n}", "github": "", "project": "", "reviewers": "jPAm;rieU;A6cY;8Bsp;9D3z", "pdf_size": 1189773, "rating": "4;5;6;6;7", "confidence": "4;3;4;4;3", "soundness": "3;3;4;4;4", "novelty": "2;3;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "197;73;49;35;32", "wc_strengths": "38;48;152;50;79", "wc_weaknesses": "585;61;241;83;15", "wc_questions": "811;60;142;3;34", "wc_limitations": "4;3;10;7;30", "wc_review": "1635;245;594;178;190", "wc_reply_reviewers": "2046;10;41;0;0", "wc_reply_authors": "2321;17;25;0;0", "reply_reviewers": "4;1;1;0;0", "reply_authors": "5;2;2;1;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 77.2, 61.62596855222642 ], "wc_strengths_avg": [ 73.4, 41.60576883077634 ], "wc_weaknesses_avg": [ 197.0, 208.37274293918577 ], "wc_questions_avg": [ 210.0, 304.0230254437976 ], "wc_limitations_avg": [ 10.8, 9.907572861200668 ], "wc_review_avg": [ 568.4, 554.7030196420424 ], "wc_reply_reviewers_avg": [ 419.4, 813.4390204557437 ], "wc_reply_authors_avg": [ 472.6, 924.251177981397 ], "reply_reviewers_avg": [ 1.2, 1.4696938456699067 ], "reply_authors_avg": [ 2.2, 1.469693845669907 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3202563076101743, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12219914904974264762&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "anu.edu.au;anu.edu.au;curtin.edu.au", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Australian National University;Curtin University", "aff_unique_dep": ";", "aff_unique_url": "https://www.anu.edu.au;https://www.curtin.edu.au", "aff_unique_abbr": "ANU;Curtin", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "Leveraging Early-Stage Robustness in Diffusion Models for Efficient and High-Quality Image Synthesis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70062", "id": "wNpsGwixjG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/04261fce1705c4f02f062866717d592a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wNpsGwixjG", "openreview": "https://openreview.net/forum?id=wNpsGwixjG", "poster": "/media/PosterPDFs/NeurIPS%202023/70062.png?t=1701510786.03799", "slides": "https://nips.cc/virtual/2023/poster/70062", "video": "https://nips.cc/virtual/2023/poster/70062", "author_site": "Yulhwa Kim, Dongwon Jo, Hyesung Jeon, Taesu Kim, Daehyun Ahn, Hyungjun Kim, jae-joon kim", "tldr": "", "abstract": "While diffusion models have demonstrated exceptional image generation capabilities, the iterative noise estimation process required for these models is compute-intensive and their practical implementation is limited by slow sampling speeds. In this paper, we propose a novel approach to speed up the noise estimation network by leveraging the robustness of early-stage diffusion models. Our findings indicate that inaccurate computation during the early-stage of the reverse diffusion process has minimal impact on the quality of generated images, as this stage primarily outlines the image while later stages handle the finer details that require more sensitive information. To improve computational efficiency, we combine our findings with post-training quantization (PTQ) to introduce a method that utilizes low-bit activation for the early reverse diffusion process while maintaining high-bit activation for the later stages. Experimental results show that the proposed method can accelerate the early-stage computation without sacrificing the quality of the generated images.", "keywords": "diffusion models;post-training quantization", "primary_area": "", "supplementary_material": "/attachment/0014554334035f04fc88fcdd5d209b0b912f0313.zip", "author": "Yulhwa Kim;Dongwon Jo;Hyesung Jeon;Taesu Kim;Daehyun Ahn;Hyungjun Kim;jae-joon kim", "authorids": "~Yulhwa_Kim1;~Dongwon_Jo1;~Hyesung_Jeon1;~Taesu_Kim1;~Daehyun_Ahn1;~Hyungjun_Kim2;~jae-joon_kim1", "gender": ";M;F;M;;;M", "homepage": "https://eic.skku.edu/;https://vlsi.snu.ac.kr;;;;;http://vlsi.snu.ac.kr", "dblp": "223/9434;;;44/6997;223/9559;;", "google_scholar": "VRkM404AAAAJ;https://scholar.google.co.kr/citations?user=GA_k0xAAAAAJ;;zzII2gsAAAAJ;https://scholar.google.co.kr/citations?user=a4e-yE4AAAAJ;pX2macYAAAAJ;Ee994T0AAAAJ", "orcid": "0000-0003-3735-821X;;;;;0000-0001-8403-1557;", "linkedin": ";dongwon-jo-36378b244/;hyesung-jeon-460407270;;;;", "or_profile": "~Yulhwa_Kim1;~Dongwon_Jo1;~Hyesung_Jeon1;~Taesu_Kim1;~Daehyun_Ahn1;~Hyungjun_Kim2;~jae-joon_kim1", "aff": "Seoul National University;Seoul National University;Seoul National University;SqueezeBits Inc.;SqueezeBits Inc.;SqueezeBits Inc.;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;squeezebits.com;squeezebits.com;squeezebits.com;snu.ac.kr", "position": "Researcher;PhD student;Undergrad student;Researcher;Researcher;CEO;Full Professor", "bibtex": "@inproceedings{\nkim2023leveraging,\ntitle={Leveraging Early-Stage Robustness in Diffusion Models for Efficient and High-Quality Image Synthesis},\nauthor={Yulhwa Kim and Dongwon Jo and Hyesung Jeon and Taesu Kim and Daehyun Ahn and Hyungjun Kim and jae-joon kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wNpsGwixjG}\n}", "github": "", "project": "", "reviewers": "eumA;4DBQ;ZFyZ;rGNp;ctiM", "pdf_size": 35910013, "rating": "4;5;5;5;6", "confidence": "4;4;5;3;4", "soundness": "3;3;3;4;4", "novelty": "2;3;2;4;3", "presentation": "3;3;2;4;4", "wc_summary": "53;75;82;50;69", "wc_strengths": "63;55;71;21;70", "wc_weaknesses": "173;69;185;97;116", "wc_questions": "25;30;27;20;106", "wc_limitations": "11;16;6;2;23", "wc_review": "325;245;371;190;384", "wc_reply_reviewers": "137;20;107;0;92", "wc_reply_authors": "0;0;0;0;781", "reply_reviewers": "1;1;1;0;2", "reply_authors": "1;1;1;1;3", "rating_avg": [ 5.0, 0.6324555320336759 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 65.8, 12.416118556135006 ], "wc_strengths_avg": [ 56.0, 18.41738309315414 ], "wc_weaknesses_avg": [ 128.0, 44.40720662234904 ], "wc_questions_avg": [ 41.6, 32.36417772785213 ], "wc_limitations_avg": [ 11.6, 7.391887445030532 ], "wc_review_avg": [ 303.0, 74.56808968989348 ], "wc_reply_reviewers_avg": [ 71.2, 52.41144913089124 ], "wc_reply_authors_avg": [ 156.2, 312.4 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18069803303842397362&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;squeezebits.com;squeezebits.com;squeezebits.com;snu.ac.kr", "author_num": 7, "aff_unique_index": "0;0;0;1;1;1;0", "aff_unique_norm": "Seoul National University;SqueezeBits Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;", "aff_unique_abbr": "SNU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Progressive Ensemble Distillation: Building Ensembles for Efficient Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70061", "id": "wNxyDofh74", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/87425754bcc35f2bc62ef4a421a772d6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wNxyDofh74", "openreview": "https://openreview.net/forum?id=wNxyDofh74", "poster": "/media/PosterPDFs/NeurIPS%202023/70061.png?t=1697418235.4110103", "slides": "https://nips.cc/virtual/2023/poster/70061", "video": "https://nips.cc/virtual/2023/poster/70061", "author_site": "Don Dennis, Abhishek Shetty, Anish Prasad Sevekari, Kazuhito Koishida, Virginia Smith", "tldr": "", "abstract": "Knowledge distillation is commonly used to compress an ensemble of models into a single model. In this work we study the problem of progressive ensemble distillation: Given a large, pretrained teacher model , we seek to decompose the model into an ensemble of smaller, low-inference cost student models . The resulting ensemble allows for flexibly tuning accuracy vs. inference cost, which can be useful for a multitude of applications in efficient inference. Our method, B-DISTIL, uses a boosting procedure that allows function composition based aggregation rules to construct expressive ensembles with similar performance as using much smaller student models. We demonstrate the effectiveness of B-DISTIL by decomposing pretrained models across a variety of image, speech, and sensor datasets. Our method comes with strong theoretical guarantees in terms of convergence as well as generalization.", "keywords": "Edge computing;compression;efficient inference;distillation and inference;run-time tradeoff;inference-time tradeoff;on-device;user-side;client-side", "primary_area": "", "supplementary_material": "", "author": "Don Dennis;Abhishek Shetty;Anish Sevekari;Kazuhito Koishida;Virginia Smith", "authorids": "~Don_Dennis2;~Abhishek_Shetty1;~Anish_Sevekari1;~Kazuhito_Koishida1;~Virginia_Smith1", "gender": ";M;;F;M", "homepage": "https://dkdennis.xyz;https://ashettyv.github.io/;https://asgweb-production.azurewebsites.net/applied-sciences/people/kazuhito-koishida;;", "dblp": "227/4804;223/4770;;120/0921;231/7677", "google_scholar": "https://scholar.google.co.in/citations?user=GaPs1q0AAAAJ;https://scholar.google.co.in/citations?user=M-y2aLUAAAAJ;;;2f7XqqcAAAAJ", "orcid": ";;;;", "linkedin": ";;;;anish-sevekari/", "or_profile": "~Don_Dennis2;~Abhishek_Shetty1;~Kazuhito_Koishida1;~Virginia_Smith1;~Anish_Prasad_Sevekari1", "aff": "Machine Learning Department, School of Computer Science;University of California, Berkeley;Microsoft Corporation;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "mld.cs.cmu.edu;berkeley.edu;microsoft.com;cmu.edu;cmu.edu", "position": "PhD student;PhD student;Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\ndennis2023progressive,\ntitle={Progressive Ensemble Distillation: Building Ensembles for Efficient Inference},\nauthor={Don Dennis and Abhishek Shetty and Anish Sevekari and Kazuhito Koishida and Virginia Smith},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wNxyDofh74}\n}", "github": "", "project": "", "reviewers": "85M5;2TpN;HW4G;nVQk;KanN", "pdf_size": 598881, "rating": "5;5;6;6;6", "confidence": "3;4;3;2;2", "soundness": "3;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;1;3;3;3", "wc_summary": "76;63;101;86;71", "wc_strengths": "40;67;33;13;107", "wc_weaknesses": "84;287;239;51;80", "wc_questions": "41;204;211;129;139", "wc_limitations": "1;27;2;9;5", "wc_review": "242;648;586;288;402", "wc_reply_reviewers": "27;55;104;12;14", "wc_reply_authors": "34;0;233;0;0", "reply_reviewers": "1;1;2;1;1", "reply_authors": "2;1;2;1;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 79.4, 13.124023773218335 ], "wc_strengths_avg": [ 52.0, 32.48384213728419 ], "wc_weaknesses_avg": [ 148.2, 95.63555824064603 ], "wc_questions_avg": [ 144.8, 61.5545286717395 ], "wc_limitations_avg": [ 8.8, 9.516301802696255 ], "wc_review_avg": [ 433.2, 160.06298760175633 ], "wc_reply_reviewers_avg": [ 42.4, 34.41278832062291 ], "wc_reply_authors_avg": [ 53.4, 90.76034376312155 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7637626158259732, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11590454803309976932&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mld.cs.cmu.edu;berkeley.edu;microsoft.com;cmu.edu;cmu.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Carnegie Mellon University;University of California, Berkeley;Microsoft", "aff_unique_dep": "Machine Learning Department;;Microsoft Corporation", "aff_unique_url": "https://www.cs.cmu.edu/ml;https://www.berkeley.edu;https://www.microsoft.com", "aff_unique_abbr": "CMU;UC Berkeley;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Small batch deep reinforcement learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70060", "id": "wPqEvmwFEh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/528388f1ad3a481249a97cbb698d2fe6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wPqEvmwFEh", "openreview": "https://openreview.net/forum?id=wPqEvmwFEh", "poster": "/media/PosterPDFs/NeurIPS%202023/70060.png?t=1701267568.9836714", "slides": "https://nips.cc/virtual/2023/poster/70060", "video": "https://nips.cc/virtual/2023/poster/70060", "author_site": "Johan Obando Ceron, Marc Bellemare, Pablo Samuel Castro", "tldr": "", "abstract": "In value-based deep reinforcement learning with replay memories, the batch size parameter specifies how many transitions to sample for each gradient update. Although critical to the learning process, this value is typically not adjusted when proposing new algorithms. In this work we present a broad empirical study that suggests reducing the batch size can result in a number of significant performance gains; this is surprising, as the general tendency when training neural networks is towards larger batch sizes for improved performance. We complement our experimental findings with a set of empirical analyses towards better understanding this phenomenon.", "keywords": "Reinforcement Learning;Deep Reinforcement Learning;Value based;Batch Size", "primary_area": "", "supplementary_material": "/attachment/f3aa8744c2dbf29bd24048bbc97d5ef9ff238b4e.pdf", "author": "Johan Samir Obando Ceron;Marc G Bellemare;Pablo Samuel Castro", "authorids": "~Johan_Samir_Obando_Ceron1;~Marc_G_Bellemare1;~Pablo_Samuel_Castro1", "gender": "M;M;M", "homepage": "https://johanobandoc.github.io;http://www.marcgbellemare.info;https://psc-g.github.io/", "dblp": ";38/4525;05/5455", "google_scholar": "KViAb3EAAAAJ;https://scholar.google.co.uk/citations?user=uyYPun0AAAAJ;https://scholar.google.ca/citations?user=jn5r6TsAAAAJ", "orcid": ";;", "linkedin": "johan-obando/;;pablo-samuel-castro-2113641b/", "or_profile": "~Johan_Samir_Obando_Ceron1;~Marc_G_Bellemare1;~Pablo_Samuel_Castro1", "aff": "Mila - Quebec AI Institute, Universit\u00e9 de Montr\u00e9al;Google;Google", "aff_domain": "mila.umontreal.ca;google.com;google.com", "position": "MS student;Research Scientist;Researcher", "bibtex": "@inproceedings{\nceron2023small,\ntitle={Small batch deep reinforcement learning},\nauthor={Johan Samir Obando Ceron and Marc G Bellemare and Pablo Samuel Castro},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wPqEvmwFEh}\n}", "github": "", "project": "", "reviewers": "KbLx;vSto;scQb;oeF6", "pdf_size": 3451365, "rating": "4;7;7;7", "confidence": "4;4;4;5", "soundness": "2;4;4;3", "novelty": "3;3;3;3", "presentation": "3;4;4;3", "wc_summary": "93;116;62;37", "wc_strengths": "148;79;95;64", "wc_weaknesses": "571;102;163;69", "wc_questions": "70;40;43;1", "wc_limitations": "1;9;2;2", "wc_review": "883;346;365;173", "wc_reply_reviewers": "79;98;104;18", "wc_reply_authors": "71;31;137;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 77.0, 30.008332176247315 ], "wc_strengths_avg": [ 96.5, 31.68990375498165 ], "wc_weaknesses_avg": [ 226.25, 201.87790245591518 ], "wc_questions_avg": [ 38.5, 24.60182920028509 ], "wc_limitations_avg": [ 3.5, 3.2015621187164243 ], "wc_review_avg": [ 441.75, 265.51212307538805 ], "wc_reply_reviewers_avg": [ 74.75, 34.03949911499874 ], "wc_reply_authors_avg": [ 59.75, 51.21218116815569 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6999098063906326751&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "mila.umontreal.ca;google.com;google.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;Google", "aff_unique_dep": "Mila - Quebec AI Institute;Google", "aff_unique_url": "https://www.mila.quebec/;https://www.google.com", "aff_unique_abbr": "Mila;Google", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Montr\u00e9al;Mountain View", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Canada;United States" }, { "title": "Critical Initialization of Wide and Deep Neural Networks using Partial Jacobians: General Theory and Applications", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70059", "id": "wRJqZRxDEX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7e02f2910ea7911a37c4691f4201c878-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wRJqZRxDEX", "openreview": "https://openreview.net/forum?id=wRJqZRxDEX", "poster": "/media/PosterPDFs/NeurIPS%202023/70059.png?t=1701900326.387539", "slides": "https://nips.cc/virtual/2023/poster/70059", "video": "https://nips.cc/virtual/2023/poster/70059", "author_site": "Darshil Doshi, Tianyu He, Andrey Gromov", "tldr": "", "abstract": "Deep neural networks are notorious for defying theoretical treatment. However, when the number of parameters in each layer tends to infinity, the network function is a Gaussian process (GP) and quantitatively predictive description is possible. Gaussian approximation allows one to formulate criteria for selecting hyperparameters, such as variances of weights and biases, as well as the learning rate. These criteria rely on the notion of criticality defined for deep neural networks. In this work we describe a new practical way to diagnose criticality. We introduce *partial Jacobians* of a network, defined as derivatives of preactivations in layer $l$ with respect to preactivations in layer $l_0\\leq l$. We derive recurrence relations for the norms of partial Jacobians and utilize these relations to analyze criticality of deep fully connected neural networks with LayerNorm and/or residual connections. We derive and implement a simple and cheap numerical test that allows one to select optimal initialization for a broad class of deep neural networks; containing fully connected, convolutional and normalization layers. Using these tools we show quantitatively that proper stacking of the LayerNorm (applied to preactivations) and residual connections leads to an architecture that is critical for any initialization. Finally, we apply our methods to analyze ResNet and MLP-Mixer architectures; demonstrating the everywhere-critical regime.", "keywords": "Criticality;Gaussian Process;Jacobian;LayerNorm;Residual connections;ResNet", "primary_area": "", "supplementary_material": "/attachment/29b5f17303335edb5b32e392ef062a6ad7b9b18f.zip", "author": "Darshil Doshi;Tianyu He;Andrey Gromov", "authorids": "~Darshil_Doshi1;~Tianyu_He2;~Andrey_Gromov1", "gender": "M;M;M", "homepage": ";;", "dblp": ";;", "google_scholar": "4dp-dEMAAAAJ;STDwwY8AAAAJ;D056qfMAAAAJ", "orcid": "0000-0003-3578-9016;0000-0002-2592-9698;", "linkedin": "darshil-doshi-2b010b7b/;;andrey-gromov-2329a241", "or_profile": "~Darshil_Doshi1;~Tianyu_He2;~Andrey_Gromov1", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;umd.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ndoshi2023critical,\ntitle={Critical Initialization of Wide and Deep Neural Networks using Partial Jacobians: General Theory and Applications},\nauthor={Darshil Doshi and Tianyu He and Andrey Gromov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wRJqZRxDEX}\n}", "github": "", "project": "", "reviewers": "NnYC;EkqX;8MUF;RvQ9", "pdf_size": 1901146, "rating": "6;6;7;7", "confidence": "3;4;3;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "2;2;4;3", "wc_summary": "89;199;191;120", "wc_strengths": "52;95;160;146", "wc_weaknesses": "108;221;154;122", "wc_questions": "205;92;71;4", "wc_limitations": "26;32;257;4", "wc_review": "480;639;833;396", "wc_reply_reviewers": "0;66;34;0", "wc_reply_authors": "0;214;41;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 149.75, 46.64426545675256 ], "wc_strengths_avg": [ 113.25, 42.84492385335747 ], "wc_weaknesses_avg": [ 151.25, 43.58540466715893 ], "wc_questions_avg": [ 93.0, 72.37057413065064 ], "wc_limitations_avg": [ 79.75, 102.86489926111823 ], "wc_review_avg": [ 587.0, 166.69583078169651 ], "wc_reply_reviewers_avg": [ 25.0, 27.440845468024488 ], "wc_reply_authors_avg": [ 63.75, 88.3469722175016 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14177938411512049956&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "umd.edu;umd.edu;umd.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Improving Diffusion-Based Image Synthesis with Context Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70058", "id": "wRhLd65bDt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7664a7e946a84ac5e97649a967717cf2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wRhLd65bDt", "openreview": "https://openreview.net/forum?id=wRhLd65bDt", "poster": "/media/PosterPDFs/NeurIPS%202023/70058.png?t=1698818455.6698134", "slides": "https://nips.cc/virtual/2023/poster/70058", "video": "https://nips.cc/virtual/2023/poster/70058", "author_site": "Ling Yang, Jingwei Liu, Shenda Hong, Zhilong Zhang, Zhilin Huang, Zheming Cai, Wentao Zhang, Bin CUI", "tldr": "", "abstract": "Diffusion models are a new class of generative models, and have dramatically promoted image generation with unprecedented quality and diversity. Existing diffusion models mainly try to reconstruct input image from a corrupted one with a pixel-wise or feature-wise constraint along spatial axes. However, such point-based reconstruction may fail to make each predicted pixel/feature fully preserve its neighborhood context, impairing diffusion-based image synthesis. As a powerful source of automatic supervisory signal, context has been well studied for learning representations. Inspired by this, we for the first time propose ConPreDiff to improve diffusion-based image synthesis with context prediction. We explicitly reinforce each point to predict its neighborhood context (i.e., multi-stride pixels/features) with a context decoder at the end of diffusion denoising blocks in training stage, and remove the decoder for inference. In this way, each point can better reconstruct itself by preserving its semantic connections with neighborhood context. This new paradigm of ConPreDiff can generalize to arbitrary discrete and continuous diffusion backbones without introducing extra parameters in sampling procedure. Extensive experiments are conducted on unconditional image generation, text-to-image generation and image inpainting tasks. Our ConPreDiff consistently outperforms previous methods and achieves new SOTA text-to-image generation results on MS-COCO, with a zero-shot FID score of 6.21.", "keywords": "Diffusion Model;Image Generation", "primary_area": "", "supplementary_material": "/attachment/3aa6e48accd4e69fb438ff3e8d21f9b49924d02d.zip", "author": "Ling Yang;Jingwei Liu;Shenda Hong;Zhilong Zhang;Zhilin Huang;Zheming Cai;Wentao Zhang;Bin CUI", "authorids": "~Ling_Yang1;~Jingwei_Liu4;~Shenda_Hong1;~Zhilong_Zhang1;~Zhilin_Huang1;~Zheming_Cai1;~Wentao_Zhang1;~Bin_CUI2", "gender": "M;M;;;M;M;;M", "homepage": "https://yangling0818.github.io/;;;;https://zerinhwang03.github.io/;;;https://cuibinpku.github.io/index.html", "dblp": "01/24-6.html;;;06/8799;266/8046.html;;;55/5031.html", "google_scholar": "https://scholar.google.com.hk/citations?user=sIKujqAAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;;;IJAU8KoAAAAJ", "orcid": "0000-0003-1905-8053;;;0009-0009-9307-8440;0000-0003-3417-743X;0009-0000-1983-0897;;0000-0003-1681-4677", "linkedin": ";%E7%BB%8F%E7%BA%AC-%E5%88%98-181399263/;;;;;;", "or_profile": "~Ling_Yang1;~Jingwei_Liu4;~Shenda_Hong1;~Zhilong_Zhang1;~Zhilin_Huang1;~Zheming_Cai1;~Wentao_Zhang1;~Bin_CUI2", "aff": "Peking University;Peking University;;Korea Advanced Institute of Science & Technology;Tsinghua University;Beijing University of Posts and Telecommunications;;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;;kaist.ac.kr;mails.tsinghua.edu.cn;bupt.edu.cn;;pku.edu.cn", "position": "PhD student;PhD student;;Intern;PhD student;Undergrad student;;Full Professor", "bibtex": "@inproceedings{\nyang2023improving,\ntitle={Improving Diffusion-Based Image Synthesis with Context Prediction},\nauthor={Ling Yang and Jingwei Liu and Shenda Hong and Zhilong Zhang and Zhilin Huang and Zheming Cai and Wentao Zhang and Bin CUI},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wRhLd65bDt}\n}", "github": "", "project": "", "reviewers": "DpwQ;2mXr;ydZz;qzoS;m5Qc", "pdf_size": 11341970, "rating": "5;6;6;6;6", "confidence": "3;5;3;5;4", "soundness": "3;3;3;4;3", "novelty": "3;2;3;4;3", "presentation": "3;2;3;4;3", "wc_summary": "67;70;75;71;59", "wc_strengths": "34;62;63;36;17", "wc_weaknesses": "107;125;99;14;37", "wc_questions": "58;41;49;23;165", "wc_limitations": "13;1;26;4;158", "wc_review": "279;299;312;148;436", "wc_reply_reviewers": "94;9;20;16;17", "wc_reply_authors": "82;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 68.4, 5.3516352641038605 ], "wc_strengths_avg": [ 42.4, 17.69293644367718 ], "wc_weaknesses_avg": [ 76.4, 43.02371439101929 ], "wc_questions_avg": [ 67.2, 50.24101909794427 ], "wc_limitations_avg": [ 40.4, 59.43938088506642 ], "wc_review_avg": [ 294.8, 91.70692449319189 ], "wc_reply_reviewers_avg": [ 31.2, 31.606328480226864 ], "wc_reply_authors_avg": [ 16.4, 32.8 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5590169943749476, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1057760244271185901&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "pku.edu.cn;pku.edu.cn;;kaist.ac.kr;mails.tsinghua.edu.cn;bupt.edu.cn;;pku.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Peking University;Korea Advanced Institute of Science and Technology;Tsinghua University;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.kaist.ac.kr;https://www.tsinghua.edu.cn;http://www.bupt.edu.cn/", "aff_unique_abbr": "Peking U;KAIST;THU;BUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;South Korea" }, { "id": "wS3PPBUDX8", "title": "Constructing Semantics-Aware Adversarial Examples with Probabilistic Perspective", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this study, we introduce a novel, probabilistic viewpoint on adversarial examples, achieved through box-constrained Langevin Monte Carlo (LMC). Proceeding from this perspective, we develop an innovative approach for generating semantics-aware adversarial examples in a principled manner. This methodology transcends the restriction imposed by geometric distance, instead opting for semantic constraints. Our approach empowers individuals to incorporate their personal comprehension of semantics into the model. Through human evaluation, we validate that our semantics-aware adversarial examples maintain their inherent meaning. Experimental findings on the MNIST and SVHN datasets demonstrate that our semantics-aware adversarial examples can effectively circumvent robust adversarial training methods tailored for traditional adversarial attacks.", "keywords": "Adversarial examples;Energy-based model", "primary_area": "", "supplementary_material": "/attachment/77dcb314d0431bcd41785e643daf78e4f1cb0be5.zip", "author": "Andi Zhang;Damon Wischik", "authorids": "~Andi_Zhang2;~Damon_Wischik1", "gender": "M;", "homepage": "http://andi.ac;https://www.cl.cam.ac.uk/~djw1005/", "dblp": "200/8255-1;18/4263.html", "google_scholar": "qGAOAoYAAAAJ;", "orcid": ";", "linkedin": "zhangandi/;", "or_profile": "~Andi_Zhang2;~Damon_Wischik1", "aff": "University of Cambridge;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk", "position": "PhD student;Lecturer", "bibtex": "@misc{\nzhang2023constructing,\ntitle={Constructing Semantics-Aware Adversarial Examples with Probabilistic Perspective},\nauthor={Andi Zhang and Damon Wischik},\nyear={2023},\nurl={https://openreview.net/forum?id=wS3PPBUDX8}\n}", "github": "", "project": "", "reviewers": "C5cG;tkih;oacq;AGWj", "site": "https://openreview.net/forum?id=wS3PPBUDX8", "pdf_size": 966949, "rating": "5;5;5;6", "confidence": "4;5;4;4", "soundness": "3;3;1;3", "novelty": "3;3;2;3", "presentation": "2;3;3;3", "wc_summary": "108;87;167;134", "wc_strengths": "59;54;37;38", "wc_weaknesses": "216;145;247;68", "wc_questions": "49;45;141;102", "wc_limitations": "1;1;5;1", "wc_review": "433;332;597;343", "wc_reply_reviewers": "17;187;635;38", "wc_reply_authors": "96;411;1037;190", "reply_reviewers": "1;2;3;1", "reply_authors": "2;3;4;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 124.0, 29.891470355270247 ], "wc_strengths_avg": [ 47.0, 9.669539802906858 ], "wc_weaknesses_avg": [ 169.0, 69.04708538381617 ], "wc_questions_avg": [ 84.25, 39.74528273896161 ], "wc_limitations_avg": [ 2.0, 1.7320508075688772 ], "wc_review_avg": [ 426.25, 106.083398795476 ], "wc_reply_reviewers_avg": [ 219.25, 248.81958825623033 ], "wc_reply_authors_avg": [ 433.5, 366.7141257164769 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7616503380374975058&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Transitivity Recovering Decompositions: Interpretable and Robust Fine-Grained Relationships", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70057", "id": "wUNPmdE273", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/beba7cfdac084a0f53f378d42cbe2824-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wUNPmdE273", "openreview": "https://openreview.net/forum?id=wUNPmdE273", "poster": "/media/PosterPDFs/NeurIPS%202023/70057.png?t=1701641962.1307669", "slides": "https://nips.cc/virtual/2023/poster/70057", "video": "https://nips.cc/virtual/2023/poster/70057", "author_site": "ABHRA CHAUDHURI, Massimiliano Mancini, Zeynep Akata, Zeynep Akata, Anjan Dutta", "tldr": "", "abstract": "Recent advances in fine-grained representation learning leverage local-to-global (emergent) relationships for achieving state-of-the-art results. The relational representations relied upon by such methods, however, are abstract. We aim to deconstruct this abstraction by expressing them as interpretable graphs over image views. We begin by theoretically showing that abstract relational representations are nothing but a way of recovering transitive relationships among local views. Based on this, we design Transitivity Recovering Decompositions (TRD), a graph-space search algorithm that identifies interpretable equivalents of abstract emergent relationships at both instance and class levels, and with no post-hoc computations. We additionally show that TRD is provably robust to noisy views, with empirical evidence also supporting this finding. The latter allows TRD to perform at par or even better than the state-of-the-art, while being fully interpretable. Implementation is available at https://github.com/abhrac/trd.", "keywords": "Interpretability;Robustness;Fine-Grained Representation Learning;Graph Theory;Information Theory", "primary_area": "", "supplementary_material": "/attachment/9a10b5923bfadb3170d4964cfcf624a28c6e322b.pdf", "author": "Abhra Chaudhuri;Massimiliano Mancini;Zeynep Akata;Anjan Dutta", "authorids": "~Abhra_Chaudhuri1;~Massimiliano_Mancini1;~Zeynep_Akata1;~Anjan_Dutta1", "gender": "M;M;F;M", "homepage": "https://sites.google.com/view/abhrachaudhuri/;https://mancinimassimiliano.github.io/;https://eml-unitue.de/people/zeynep-akata;https://sites.google.com/site/2adutta/", "dblp": "330/4583;192/2058;117/4838;91/8278-1", "google_scholar": "6KWxpxkAAAAJ;https://scholar.google.it/citations?user=bqTPA8kAAAAJ;jQl9RtkAAAAJ;https://scholar.google.co.uk/citations?user=1aKTzmIAAAAJ", "orcid": ";0000-0001-8595-9955;0000-0002-1432-7747;0000-0002-1667-2245", "linkedin": "abhra-chaudhuri-126a09150;;zeynep-akata-36182045/?ppe=1;anjan-dutta-a97b4071/", "or_profile": "~Abhra_Chaudhuri1;~Massimiliano_Mancini1;~Zeynep_Akata1;~Anjan_Dutta1", "aff": "University of Exeter;University of Tuebingen;University of T\u00fcbingen;University of Surrey", "aff_domain": "exeter.ac.uk;uni-tuebingen.de;uni-tuebingen.de;surrey.ac.uk", "position": "PhD student;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchaudhuri2023transitivity,\ntitle={Transitivity Recovering Decompositions: Interpretable and Robust Fine-Grained Relationships},\nauthor={Abhra Chaudhuri and Massimiliano Mancini and Zeynep Akata and Anjan Dutta},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wUNPmdE273}\n}", "github": "", "project": "", "reviewers": "7SVw;VJoJ;mGG3", "pdf_size": 1502200, "rating": "5;6;7", "confidence": "4;3;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;3;2", "wc_summary": "27;98;59", "wc_strengths": "20;54;59", "wc_weaknesses": "142;208;102", "wc_questions": "29;3;119", "wc_limitations": "5;11;23", "wc_review": "223;374;362", "wc_reply_reviewers": "0;10;93", "wc_reply_authors": "0;0;48", "reply_reviewers": "0;1;1", "reply_authors": "1;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 61.333333333333336, 29.0325487838889 ], "wc_strengths_avg": [ 44.333333333333336, 17.326921891156037 ], "wc_weaknesses_avg": [ 150.66666666666666, 43.70608907489003 ], "wc_questions_avg": [ 50.333333333333336, 49.701330185642135 ], "wc_limitations_avg": [ 13.0, 7.483314773547883 ], "wc_review_avg": [ 319.6666666666667, 68.52898818910303 ], "wc_reply_reviewers_avg": [ 34.333333333333336, 41.6839963961657 ], "wc_reply_authors_avg": [ 16.0, 22.627416997969522 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1224801447103265719&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "exeter.ac.uk;uni-tuebingen.de;uni-tuebingen.de;surrey.ac.uk", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Exeter;University of Tuebingen;University of T\u00fcbingen;University of Surrey", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.exeter.ac.uk;https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/;https://www.surrey.ac.uk", "aff_unique_abbr": "Exeter;Uni T\u00fcbingen;Uni T\u00fcbingen;Surrey", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "What can a Single Attention Layer Learn? A Study Through the Random Features Lens", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70056", "id": "wX8GuzDSJR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/274db6bf1b01d8b4f07feaeb8c46f474-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wX8GuzDSJR", "openreview": "https://openreview.net/forum?id=wX8GuzDSJR", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70056", "video": "https://nips.cc/virtual/2023/poster/70056", "author_site": "Hengyu Fu, Tianyu Guo, Yu Bai, Song Mei", "tldr": "", "abstract": "Attention layers---which map a sequence of inputs to a sequence of outputs---are core building blocks of the Transformer architecture which has achieved significant breakthroughs in modern artificial intelligence. This paper presents a rigorous theoretical study on the learning and generalization of a single multi-head attention layer, with a sequence of key vectors and a separate query vector as input. We consider the random feature setting where the attention layer has a large number of heads, with randomly sampled frozen query and key matrices, and trainable value matrices. We show that such a random-feature attention layer can express a broad class of target functions that are permutation invariant to the key vectors. We further provide quantitative excess risk bounds for learning these target functions from finite samples, using random feature attention with finitely many heads.\n\nOur results feature several implications unique to the attention structure compared with existing random features theory for neural networks, such as (1) Advantages in the sample complexity over standard two-layer random-feature networks; (2) Concrete and natural classes of functions that can be learned efficiently by a random-feature attention layer; and (3) The effect of the sampling distribution of the query-key weight matrix (the product of the query and key matrix), where Gaussian random weights with a non-zero mean result in better sample complexities over the zero-mean counterpart for learning certain natural target functions. Experiments on simulated data corroborate our theoretical findings and further illustrate the interplay between the sample size and the complexity of the target function.", "keywords": "transformers;attention;deep learning theory;random features", "primary_area": "", "supplementary_material": "/attachment/f217b5d8f3bdc0f86594c13c6486d0d9a06e83a5.pdf", "author": "Hengyu Fu;Tianyu Guo;Yu Bai;Song Mei", "authorids": "fhy2021@stu.pku.edu.cn;tianyu_guo@berkeley.edu;~Yu_Bai1;~Song_Mei1", "gender": ";;;M", "homepage": ";;https://yubai.org;https://www.stat.berkeley.edu/~songmei/", "dblp": ";;03/6325-17.html;https://dblp.org/pers/hd/m/Mei:Song", "google_scholar": ";;owqhKD8AAAAJ;https://scholar.google.com.hk/citations?hl=en", "orcid": ";;;", "linkedin": ";;;", "or_profile": "fhy2021@stu.pku.edu.cn;tianyu_guo@berkeley.edu;~Yu_Bai1;~Song_Mei1", "aff": ";;Salesforce Research;University of California, Berkeley", "aff_domain": ";;salesforce.com;berkeley.edu", "position": ";;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nfu2023what,\ntitle={What can a Single Attention Layer Learn? A Study Through the Random Features Lens},\nauthor={Hengyu Fu and Tianyu Guo and Yu Bai and Song Mei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wX8GuzDSJR}\n}", "github": "", "project": "", "reviewers": "giTQ;6d3X;5oTn;3Deb", "pdf_size": 1419716, "rating": "5;5;5;7", "confidence": "3;3;3;3", "soundness": "3;3;3;4", "novelty": "2;2;2;3", "presentation": "4;3;3;4", "wc_summary": "26;124;92;327", "wc_strengths": "33;57;60;90", "wc_weaknesses": "127;276;173;65", "wc_questions": "160;199;67;173", "wc_limitations": "2;1;9;177", "wc_review": "348;657;401;832", "wc_reply_reviewers": "246;276;485;40", "wc_reply_authors": "411;296;623;11", "reply_reviewers": "2;2;3;1", "reply_authors": "3;3;3;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 142.25, 112.36630945261129 ], "wc_strengths_avg": [ 60.0, 20.23610634484806 ], "wc_weaknesses_avg": [ 160.25, 77.03692296554945 ], "wc_questions_avg": [ 149.75, 49.79646071760522 ], "wc_limitations_avg": [ 47.25, 74.97457902516025 ], "wc_review_avg": [ 559.5, 195.97002321783808 ], "wc_reply_reviewers_avg": [ 261.75, 157.69016297791057 ], "wc_reply_authors_avg": [ 335.25, 220.91669810134317 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2033671031239688056&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";;salesforce.com;berkeley.edu", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Salesforce;University of California, Berkeley", "aff_unique_dep": "Salesforce Research;", "aff_unique_url": "https://research.salesforce.com;https://www.berkeley.edu", "aff_unique_abbr": "Salesforce;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Language-driven Scene Synthesis using Multi-conditional Diffusion Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70055", "id": "wYKU1C77sa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/623e5a86fcedca573d33390dd1173e6b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wYKU1C77sa", "openreview": "https://openreview.net/forum?id=wYKU1C77sa", "poster": "/media/PosterPDFs/NeurIPS%202023/70055.png?t=1697256321.3899202", "slides": "https://nips.cc/virtual/2023/poster/70055", "video": "https://nips.cc/virtual/2023/poster/70055", "author_site": "An Dinh Vuong, Minh Nhat VU, Toan Nguyen, Baoru Huang, Dzung Nguyen, Thieu Vo, Anh Nguyen", "tldr": "", "abstract": "Scene synthesis is a challenging problem with several industrial applications. Recently, substantial efforts have been directed to synthesize the scene using human motions, room layouts, or spatial graphs as the input. However, few studies have addressed this problem from multiple modalities, especially combining text prompts. In this paper, we propose a language-driven scene synthesis task, which is a new task that integrates text prompts, human motion, and existing objects for scene synthesis. Unlike other single-condition synthesis tasks, our problem involves multiple conditions and requires a strategy for processing and encoding them into a unified space. To address the challenge, we present a multi-conditional diffusion model, which differs from the implicit unification approach of other diffusion literature by explicitly predicting the guiding points for the original data distribution. We demonstrate that our approach is theoretically supportive. The intensive experiment results illustrate that our method outperforms state-of-the-art benchmarks and enables natural scene editing applications. The source code and dataset can be accessed at https://lang-scene-synth.github.io/.", "keywords": "scene synthesis;language-driven;diffusion models;multi-conditional generation;3D point cloud", "primary_area": "", "supplementary_material": "/attachment/a32d51f925dab07c0ead108fe7a330171543c622.zip", "author": "An Dinh Vuong;Minh Nhat VU;Toan Tien Nguyen;Baoru Huang;Dzung Nguyen;Thieu Vo;Anh Nguyen", "authorids": "~An_Dinh_Vuong1;~Minh_Nhat_VU2;~Toan_Tien_Nguyen1;~Baoru_Huang1;~Dzung_Nguyen2;~Thieu_Vo1;~Anh_Nguyen2", "gender": "M;M;M;F;M;M;M", "homepage": "https://andvg3.github.io/;https://www.acin.tuwien.ac.at/en/;https://toannguyen1904.github.io/;https://baoru.netlify.app/;;https://sites.google.com/tdtu.edu.vn/vongocthieu;https://www.csc.liv.ac.uk/~anguyen/", "dblp": ";204/6820;;238/1618;;;52/5285-3.html", "google_scholar": "CUpnG-YAAAAJ;;PhqGEY8AAAAJ;unbPvWAAAAAJ;;CM2qJSoAAAAJ;https://scholar.google.co.uk/citations?user=gEbaF0sAAAAJ", "orcid": "0009-0003-8533-9897;0000-0003-0692-8830;0009-0008-0534-647X;;;;0000-0002-1449-211X", "linkedin": "an-vuong-360105205/;;toannguyen1904;;dung-nguyen-69b44430/;;", "or_profile": "~An_Dinh_Vuong1;~Minh_Nhat_VU2;~Toan_Tien_Nguyen1;~Baoru_Huang1;~Dzung_Nguyen2;~Thieu_Vo1;~Anh_Nguyen2", "aff": "FPT Software - AIC Lab;Technische Universit\u00e4t Wien (TU Wien);Ho Chi Minh city University of Science, Vietnam National University;Imperial College London;FPT Software - AIC Lab;Ton Duc Thang University;University of Liverpool", "aff_domain": "fpt.com;tuwien.ac.at;hcmus.edu.vn;ic.ac.uk;fsoft.com.vn;tdtu.edu.vn;liverpool.ac.uk", "position": "Researcher;Researcher;Undergrad student;PhD student;Lab Manager;Lecturer;Assistant Professor", "bibtex": "@inproceedings{\nvuong2023languagedriven,\ntitle={Language-driven Scene Synthesis using Multi-conditional Diffusion Model},\nauthor={An Dinh Vuong and Minh Nhat VU and Toan Tien Nguyen and Baoru Huang and Dzung Nguyen and Thieu Vo and Anh Nguyen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wYKU1C77sa}\n}", "github": "", "project": "", "reviewers": "RVaZ;aT28;YsUe;uwsL;c9eB", "pdf_size": 26319227, "rating": "4;5;6;6;7", "confidence": "3;4;4;3;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;3;1;3", "wc_summary": "52;104;71;109;98", "wc_strengths": "17;45;183;137;113", "wc_weaknesses": "152;162;521;478;102", "wc_questions": "46;2;14;6;153", "wc_limitations": "1;4;42;18;11", "wc_review": "268;317;831;748;477", "wc_reply_reviewers": "0;0;70;28;9", "wc_reply_authors": "31;29;50;43;5", "reply_reviewers": "0;0;1;1;1", "reply_authors": "2;2;3;3;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 86.8, 21.79357703544785 ], "wc_strengths_avg": [ 99.0, 60.557410776881795 ], "wc_weaknesses_avg": [ 283.0, 178.4555967180632 ], "wc_questions_avg": [ 44.2, 56.555813140648944 ], "wc_limitations_avg": [ 15.2, 14.634206503941373 ], "wc_review_avg": [ 528.2, 225.8011514585344 ], "wc_reply_reviewers_avg": [ 21.4, 26.363611285254528 ], "wc_reply_authors_avg": [ 31.6, 15.383107618423528 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4803844614152616, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15011107155121864277&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "fpt.com;tuwien.ac.at;hcmus.edu.vn;ic.ac.uk;fsoft.com.vn;tdtu.edu.vn;liverpool.ac.uk", "author_num": 7, "aff_unique_index": "0;1;2;3;0;4;5", "aff_unique_norm": "FPT Software;Technische Universit\u00e4t Wien;Ho Chi Minh City University of Science;Imperial College London;Ton Duc Thang University;University of Liverpool", "aff_unique_dep": "AIC Lab;;;;;", "aff_unique_url": "https://www.fpt-software.com;https://www.tuwien.ac.at;;https://www.imperial.ac.uk;https://www.tdtu.edu.vn;https://www.liverpool.ac.uk", "aff_unique_abbr": ";TU Wien;;ICL;TDTU;Liv Uni", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0;0;2", "aff_country_unique": "Vietnam;Austria;United Kingdom" }, { "title": "Optimal Block-wise Asymmetric Graph Construction for Graph-based Semi-supervised Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70054", "id": "wYkfog48Bq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e142fd2b70f10db2543c64bca1417de8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wYkfog48Bq", "openreview": "https://openreview.net/forum?id=wYkfog48Bq", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70054", "video": "https://nips.cc/virtual/2023/poster/70054", "author_site": "Zixing Song, Yifei Zhang, Irwin King", "tldr": "", "abstract": "Graph-based semi-supervised learning (GSSL) serves as a powerful tool to model the underlying manifold structures of samples in high-dimensional spaces. It involves two phases: constructing an affinity graph from available data and inferring labels for unlabeled nodes on this graph. While numerous algorithms have been developed for label inference, the crucial graph construction phase has received comparatively less attention, despite its significant influence on the subsequent phase. In this paper, we present an optimal asymmetric graph structure for the label inference phase with theoretical motivations. Unlike existing graph construction methods, we differentiate the distinct roles that labeled nodes and unlabeled nodes could play. Accordingly, we design an efficient block-wise graph learning algorithm with a global convergence guarantee. Other benefits induced by our method, such as enhanced robustness to noisy node features, are explored as well. Finally, we perform extensive experiments on synthetic and real-world datasets to demonstrate its superiority to the state-of-the-art graph construction methods in GSSL.", "keywords": "Graph-based Semi-supervised Learning;Affinity Graph Construction", "primary_area": "", "supplementary_material": "/attachment/2b3835c9cf2e1d6bd1ef84b6304338d8c013428a.pdf", "author": "Zixing Song;Yifei Zhang;Irwin King", "authorids": "~Zixing_Song2;~Yifei_Zhang6;~Irwin_King1", "gender": ";M;M", "homepage": ";https://yifeiacc.github.io/;https://www.cse.cuhk.edu.hk/irwin.king/", "dblp": ";55/5266-1.html;k/IrwinKing", "google_scholar": ";DmwXESQAAAAJ;MXvC7tkAAAAJ", "orcid": ";0000-0003-4185-8663;0000-0001-8106-6447", "linkedin": ";;irwinking/", "or_profile": "~Zixing_Song2;~Yifei_Zhang6;~Irwin_King1", "aff": ";Department of Computer Science and Engineering, The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": ";cse.cuhk.edu.hk;cuhk.edu.hk", "position": ";PhD student;Full Professor", "bibtex": "@inproceedings{\nsong2023optimal,\ntitle={Optimal Block-wise Asymmetric Graph Construction for Graph-based Semi-supervised Learning},\nauthor={Zixing Song and Yifei Zhang and Irwin King},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wYkfog48Bq}\n}", "github": "", "project": "", "reviewers": "CLLx;n8ra;HpMF;1FFQ;7nqE", "pdf_size": 615219, "rating": "4;6;6;7;7", "confidence": "3;1;3;5;4", "soundness": "2;3;3;3;4", "novelty": "3;3;3;3;3", "presentation": "2;3;3;3;4", "wc_summary": "39;22;130;111;63", "wc_strengths": "50;6;337;269;64", "wc_weaknesses": "76;17;199;97;5", "wc_questions": "37;1;148;157;124", "wc_limitations": "10;1;117;8;4", "wc_review": "212;47;931;642;260", "wc_reply_reviewers": "59;0;0;52;118", "wc_reply_authors": "208;0;0;441;73", "reply_reviewers": "1;0;0;2;1", "reply_authors": "2;1;1;3;2", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.2, 1.32664991614216 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 73.0, 41.352146256270665 ], "wc_strengths_avg": [ 145.2, 132.02030146913012 ], "wc_weaknesses_avg": [ 78.8, 69.35820066870248 ], "wc_questions_avg": [ 93.4, 62.7394612664151 ], "wc_limitations_avg": [ 28.0, 44.609416046390926 ], "wc_review_avg": [ 418.4, 321.9891923652097 ], "wc_reply_reviewers_avg": [ 45.8, 43.86524820401681 ], "wc_reply_authors_avg": [ 144.4, 166.62364778145988 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.41286141192238524, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13837971045957075205&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";cse.cuhk.edu.hk;cuhk.edu.hk", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Collaboratively Learning Linear Models with Structured Missing Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70053", "id": "waDF0oACu2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/17f158c25b08758cf650130f7f173e51-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=waDF0oACu2", "openreview": "https://openreview.net/forum?id=waDF0oACu2", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70053", "video": "https://nips.cc/virtual/2023/poster/70053", "author_site": "Chen Cheng, Gary Cheng, John Duchi", "tldr": "", "abstract": "We study the problem of collaboratively learning least squares estimates for $m$ agents. Each agent observes a different subset of the features---e.g., containing data collected from sensors of varying resolution. Our goal is to determine how to coordinate the agents in order to produce the best estimator for each agent. We propose a distributed, semi-supervised algorithm Collab, consisting of three steps: local training, aggregation, and distribution. Our procedure does not require communicating the labeled data, making it communication efficient and useful in settings where the labeled data is inaccessible. Despite this handicap, our procedure is nearly asymptotically, local-minimax optimal---even among estimators allowed to communicate the labeled data such as imputation methods. We test our method on US Census data. We also discuss generalizations of our method to non-Gaussian feature settings, non-linear settings, and Federated Learning.", "keywords": "Collaborative Learning;Missing Data;Sensors;Linear Regression", "primary_area": "", "supplementary_material": "/attachment/f726ef92cdb7ae1a7d5264422ce24f9c053d2353.pdf", "author": "Chen Cheng;Gary Cheng;John Duchi", "authorids": "~Chen_Cheng2;~Gary_Cheng2;~John_Duchi1", "gender": "M;M;M", "homepage": "https://web.stanford.edu/~chen96/;http://garycheng.me;http://web.stanford.edu/~jduchi/", "dblp": "117/4972.html;;41/439", "google_scholar": "https://scholar.google.com/citations?hl=en;qArWV_wAAAAJ;i5srt20AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Chen_Cheng2;~Gary_Cheng2;~John_Duchi1", "aff": "Stanford University;Stanford University;Apple", "aff_domain": "stanford.edu;stanford.edu;apple.com", "position": "PhD student;PhD student;Consultant", "bibtex": "@inproceedings{\ncheng2023collaboratively,\ntitle={Collaboratively Learning Linear Models with Structured Missing Data},\nauthor={Chen Cheng and Gary Cheng and John Duchi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=waDF0oACu2}\n}", "github": "", "project": "", "reviewers": "vMLQ;93Dr;Mws6;N12y;jiy8", "pdf_size": 961176, "rating": "4;5;5;6;7", "confidence": "5;3;2;3;4", "soundness": "2;3;3;4;4", "novelty": "2;3;2;2;4", "presentation": "2;2;3;4;3", "wc_summary": "67;95;637;79;73", "wc_strengths": "12;154;2;66;54", "wc_weaknesses": "186;181;2;82;38", "wc_questions": "14;116;2;27;57", "wc_limitations": "52;17;2;11;1", "wc_review": "331;563;645;265;223", "wc_reply_reviewers": "15;69;95;183;0", "wc_reply_authors": "0;0;0;120;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;2;1", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 190.2, 223.5946332092969 ], "wc_strengths_avg": [ 57.6, 53.939225059320236 ], "wc_weaknesses_avg": [ 97.8, 74.43762489494141 ], "wc_questions_avg": [ 43.2, 40.749969325141834 ], "wc_limitations_avg": [ 16.6, 18.661189672687005 ], "wc_review_avg": [ 405.4, 167.78748463458174 ], "wc_reply_reviewers_avg": [ 72.4, 65.25519136436579 ], "wc_reply_authors_avg": [ 24.0, 48.0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.15384615384615383, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10324168939566267811&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "stanford.edu;stanford.edu;apple.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Stanford University;Apple", "aff_unique_dep": ";Apple Inc.", "aff_unique_url": "https://www.stanford.edu;https://www.apple.com", "aff_unique_abbr": "Stanford;Apple", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Provable benefits of score matching", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70052", "id": "waXoG35kbb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c11f8d40c119867e30e3421f696f931d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=waXoG35kbb", "openreview": "https://openreview.net/forum?id=waXoG35kbb", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70052", "video": "https://nips.cc/virtual/2023/poster/70052", "author_site": "Chirag Pabbaraju, Dhruv Rohatgi, Anish Prasad Sevekari, Holden Lee, Ankur Moitra, Andrej Risteski", "tldr": "", "abstract": "Score matching is an alternative to maximum likelihood (ML) for estimating a probability distribution parametrized up to a constant of proportionality. By fitting the ''score'' of the distribution, it sidesteps the need to compute this constant of proportionality (which is often intractable).\nWhile score matching and variants thereof are popular in practice, precise theoretical understanding of the benefits and tradeoffs with maximum likelihood---both computational and statistical---are not well understood. In this work, we give the first example of a natural exponential family of distributions such that the score matching loss is computationally efficient to optimize, and has a comparable statistical efficiency to ML, while the ML loss is intractable to optimize using a gradient-based method. The family consists of exponentials of polynomials of fixed degree, and our result can be viewed as a continuous analogue of recent developments in the discrete setting. Precisely, we show: (1) Designing a zeroth-order or first-order oracle for optimizing the maximum likelihood loss is NP-hard. (2) Maximum likelihood has a statistical efficiency polynomial in the ambient dimension and the radius of the parameters of the family. (3) \nMinimizing the score matching loss is both computationally and statistically efficient, with complexity polynomial in the ambient dimension.", "keywords": "theory;score matching;exponential families;sample complexity;computational hardness", "primary_area": "", "supplementary_material": "/attachment/1f645469939fb9a771cc31ffc7f5cb2b830324ba.pdf", "author": "Chirag Pabbaraju;Dhruv Rohatgi;Anish Sevekari;Holden Lee;Ankur Moitra;Andrej Risteski", "authorids": "~Chirag_Pabbaraju1;~Dhruv_Rohatgi1;~Anish_Sevekari1;~Holden_Lee1;~Ankur_Moitra1;~Andrej_Risteski2", "gender": "M;M;M;M;M;M", "homepage": "https://web.stanford.edu/~cpabbara/;http://www.mit.edu/~drohatgi/;http://holdenlee.github.io;http://people.csail.mit.edu/moitra/;;", "dblp": "231/7619;223/4465;150/3407;04/952;63/11143;231/7677", "google_scholar": "IAGcpHkAAAAJ;NUd_d6UAAAAJ;hR9rFHgAAAAJ;https://scholar.google.com.tw/citations?user=umFQktIAAAAJ;;2f7XqqcAAAAJ", "orcid": "0000-0002-3424-691X;;;;;", "linkedin": "chirag-pabbaraju-277a4ba5/;;;;;anish-sevekari/", "or_profile": "~Chirag_Pabbaraju1;~Dhruv_Rohatgi1;~Holden_Lee1;~Ankur_Moitra1;~Andrej_Risteski2;~Anish_Prasad_Sevekari1", "aff": "Stanford University;Massachusetts Institute of Technology;Johns Hopkins University;Massachusetts Institute of Technology;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cs.stanford.edu;mit.edu;jh.edu;mit.edu;cmu.edu;cmu.edu", "position": "PhD student;PhD student;Assistant Professor;;Assistant Professor;PhD student", "bibtex": "@inproceedings{\npabbaraju2023provable,\ntitle={Provable benefits of score matching},\nauthor={Chirag Pabbaraju and Dhruv Rohatgi and Anish Sevekari and Holden Lee and Ankur Moitra and Andrej Risteski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=waXoG35kbb}\n}", "github": "", "project": "", "reviewers": "bZrt;BCHk;xxrR;fYcf", "pdf_size": 423060, "rating": "6;7;7;7", "confidence": "3;4;3;3", "soundness": "4;4;3;4", "novelty": "2;3;3;3", "presentation": "3;4;3;3", "wc_summary": "368;178;27;42", "wc_strengths": "53;25;18;29", "wc_weaknesses": "37;241;117;30", "wc_questions": "30;77;12;41", "wc_limitations": "43;1;1;6", "wc_review": "531;522;175;148", "wc_reply_reviewers": "35;10;8;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 153.75, 136.97148425858575 ], "wc_strengths_avg": [ 31.25, 13.160072188251856 ], "wc_weaknesses_avg": [ 106.25, 84.97462856641387 ], "wc_questions_avg": [ 40.0, 23.73815494093844 ], "wc_limitations_avg": [ 12.75, 17.583728273605686 ], "wc_review_avg": [ 344.0, 182.77718676027376 ], "wc_reply_reviewers_avg": [ 16.25, 10.917302780449024 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3234652916596948555&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "cs.stanford.edu;mit.edu;jh.edu;mit.edu;cmu.edu;cmu.edu", "author_num": 6, "aff_unique_index": "0;1;2;1;3;3", "aff_unique_norm": "Stanford University;Massachusetts Institute of Technology;Johns Hopkins University;Carnegie Mellon University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.stanford.edu;https://web.mit.edu;https://www.jhu.edu;https://www.cmu.edu", "aff_unique_abbr": "Stanford;MIT;JHU;CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "High-dimensional Asymptotics of Denoising Autoencoders", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70051", "id": "wbbTqsiKzl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2722a0ccf6acfe3d144fdbb0dedd80b5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wbbTqsiKzl", "openreview": "https://openreview.net/forum?id=wbbTqsiKzl", "poster": "/media/PosterPDFs/NeurIPS%202023/70051.png?t=1701424888.439827", "slides": "https://nips.cc/virtual/2023/poster/70051", "video": "https://nips.cc/virtual/2023/poster/70051", "author_site": "Hugo Cui, Lenka Zdeborov\u00e1", "tldr": "", "abstract": "We address the problem of denoising data from a Gaussian mixture using a two-layer non-linear autoencoder with tied weights and a skip connection. We consider the high-dimensional limit where the number of training samples and the input dimension jointly tend to infinity while the number of hidden units remains bounded. We provide closed-form expressions for the denoising mean-squared test error. Building on this result, we quantitatively characterize the advantage of the considered architecture over the autoencoder without the skip connection that relates closely to principal component analysis. We further show that our results capture accurately the learning curves on a range of real datasets.", "keywords": "statistical physics;replica method;autoencoder;exact asymptotics", "primary_area": "", "supplementary_material": "/attachment/1fa164d922c2ba949162e3f4f62f8b03468a7da5.zip", "author": "Hugo Cui;Lenka Zdeborova", "authorids": "~Hugo_Cui1;~Lenka_Zdeborova1", "gender": ";F", "homepage": ";http://artax.karlin.mff.cuni.cz/~zdebl9am/", "dblp": ";27/6064.html", "google_scholar": ";https://scholar.google.fr/citations?user=gkCjy_UAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Hugo_Cui1;~Lenka_Zdeborova1", "aff": ";Swiss Federal Institute of Technology Lausanne", "aff_domain": ";epfl.ch", "position": ";Associate Professor", "bibtex": "@inproceedings{\ncui2023highdimensional,\ntitle={High-dimensional Asymptotics of Denoising Autoencoders},\nauthor={Hugo Cui and Lenka Zdeborova},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wbbTqsiKzl}\n}", "github": "", "project": "", "reviewers": "pba7;ZGsS;DrHV;zMEV", "pdf_size": 805700, "rating": "7;7;7;7", "confidence": "3;4;4;3", "soundness": "3;3;3;4", "novelty": "3;4;3;4", "presentation": "3;3;3;3", "wc_summary": "238;71;47;103", "wc_strengths": "170;19;65;49", "wc_weaknesses": "80;14;126;25", "wc_questions": "93;61;130;100", "wc_limitations": "12;27;16;45", "wc_review": "593;192;384;322", "wc_reply_reviewers": "42;28;18;18", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 114.75, 73.87954723737822 ], "wc_strengths_avg": [ 75.75, 56.86552118815056 ], "wc_weaknesses_avg": [ 61.25, 44.97429821575874 ], "wc_questions_avg": [ 96.0, 24.525496936861444 ], "wc_limitations_avg": [ 25.0, 12.786711852544421 ], "wc_review_avg": [ 372.75, 144.81259441084535 ], "wc_reply_reviewers_avg": [ 26.5, 9.836157786453 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5340420222131540631&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 2, "email": ";epfl.ch", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0", "aff_country_unique": "Switzerland" }, { "title": "Improved Best-of-Both-Worlds Guarantees for Multi-Armed Bandits: FTRL with General Regularizers and Multiple Optimal Arms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70050", "id": "wbg4JEM5Jp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/62bf42cc047db5b290e7d5737c1f6a8d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wbg4JEM5Jp", "openreview": "https://openreview.net/forum?id=wbg4JEM5Jp", "poster": "/media/PosterPDFs/NeurIPS%202023/70050.png?t=1699891140.011918", "slides": "https://nips.cc/virtual/2023/poster/70050", "video": "https://nips.cc/virtual/2023/poster/70050", "author_site": "Tiancheng Jin, Junyan Liu, Haipeng Luo", "tldr": "", "abstract": "We study the problem of designing adaptive multi-armed bandit algorithms that perform optimally in both the stochastic setting and the adversarial setting simultaneously (often known as a best-of-both-world guarantee). \nA line of recent works shows that when configured and analyzed properly, the Follow-the-Regularized-Leader (FTRL) algorithm, originally designed for the adversarial setting, can in fact optimally adapt to the stochastic setting as well. \nSuch results, however, critically rely on an assumption that there exists one unique optimal arm. \nRecently, Ito [2021] took the first step to remove such an undesirable uniqueness assumption for one particular FTRL algorithm with\nthe 1/2-Tsallis entropy regularizer. \nIn this work, we significantly improve and generalize this result, showing that uniqueness is unnecessary for FTRL with a broad family of regularizers and a new learning rate schedule. \nFor some regularizers, our regret bounds also improve upon prior results even when uniqueness holds. We further provide an application of our results to the decoupled exploration and exploitation problem, demonstrating that our techniques are broadly applicable.", "keywords": "multi-armed bandit;best of both worlds;Follow-the-Regularized-Leader;Tsallis entropy;Shannon entropy;Log-barrier", "primary_area": "", "supplementary_material": "/attachment/ebd1579f9563b831b82ec6f80c1da3131d6c7b88.pdf", "author": "Tiancheng Jin;Junyan Liu;Haipeng Luo", "authorids": "~Tiancheng_Jin2;~Junyan_Liu1;~Haipeng_Luo1", "gender": "M;;M", "homepage": ";;https://haipeng-luo.net/", "dblp": "233/1230;;62/2576", "google_scholar": ";;ct2hw4UAAAAJ", "orcid": ";;", "linkedin": "tiancheng-jin-gray;;", "or_profile": "~Tiancheng_Jin2;~Junyan_Liu1;~Haipeng_Luo1", "aff": "University of Southern California;;University of Southern California", "aff_domain": "usc.edu;;usc.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\njin2023improved,\ntitle={Improved Best-of-Both-Worlds Guarantees for Multi-Armed Bandits: {FTRL} with General Regularizers and Multiple Optimal Arms},\nauthor={Tiancheng Jin and Junyan Liu and Haipeng Luo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wbg4JEM5Jp}\n}", "github": "", "project": "", "reviewers": "36bt;VdeD;toae;aiz3", "pdf_size": 635697, "rating": "6;6;6;7", "confidence": "4;3;3;5", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "71;68;152;138", "wc_strengths": "54;34;129;133", "wc_weaknesses": "211;35;159;147", "wc_questions": "316;5;46;80", "wc_limitations": "1;5;49;1", "wc_review": "653;147;535;499", "wc_reply_reviewers": "19;20;84;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 107.25, 38.08789177678386 ], "wc_strengths_avg": [ 87.5, 44.09365033652805 ], "wc_weaknesses_avg": [ 138.0, 64.1482657598785 ], "wc_questions_avg": [ 111.75, 120.87674507530387 ], "wc_limitations_avg": [ 14.0, 20.273134932713294 ], "wc_review_avg": [ 458.5, 188.64980784511815 ], "wc_reply_reviewers_avg": [ 34.75, 28.472574523565655 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18134507852902927574&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "usc.edu;;usc.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Consistent Aggregation of Objectives with Diverse Time Preferences Requires Non-Markovian Rewards", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70049", "id": "wcdF6jR0Sp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/08342dc6ab69f23167b4123086ad4d38-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wcdF6jR0Sp", "openreview": "https://openreview.net/forum?id=wcdF6jR0Sp", "poster": "/media/PosterPDFs/NeurIPS%202023/70049.png?t=1702317595.1331718", "slides": "https://nips.cc/virtual/2023/poster/70049", "video": "https://nips.cc/virtual/2023/poster/70049", "tldr": "", "abstract": "As the capabilities of artificial agents improve, they are being increasingly deployed to service multiple diverse objectives and stakeholders. However, the composition of these objectives is often performed ad hoc, with no clear justification. This paper takes a normative approach to multi-objective agency: from a set of intuitively appealing axioms, it is shown that Markovian aggregation of Markovian reward functions is not possible when the time preference (discount factor) for each objective may vary. It follows that optimal multi-objective agents must admit rewards that are non-Markovian with respect to the individual objectives. To this end, a practical non-Markovian aggregation scheme is proposed, which overcomes the impossibility with only one additional parameter for each objective. This work offers new insights into sequential, multi-objective agency and intertemporal choice, and has practical implications for the design of AI systems deployed to serve multiple generations of principals with varying time preference.", "keywords": "Normative Agency Design;Reward Design;Sequential Decision Making;Reinforcement Learning;Intertemporal Fairness;Multi-Objective Decision Making", "primary_area": "", "supplementary_material": "/attachment/e50ed06b3739a2239018adf7d5890616c0d40c0d.pdf", "author": "Silviu Pitis", "authorids": "~Silviu_Pitis1", "gender": "M", "homepage": "https://silviupitis.com", "dblp": "https://dblp.org/pers/hd/p/Pitis:Silviu", "google_scholar": "oYlo1ycAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Silviu_Pitis1", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\npitis2023consistent,\ntitle={Consistent Aggregation of Objectives with Diverse Time Preferences Requires Non-Markovian Rewards},\nauthor={Silviu Pitis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wcdF6jR0Sp}\n}", "github": "", "project": "", "reviewers": "qGsM;j2th;WDhN", "pdf_size": 427757, "rating": "6;6;6", "confidence": "4;4;3", "soundness": "4;4;4", "novelty": "2;3;3", "presentation": "2;3;2", "wc_summary": "78;64;138", "wc_strengths": "26;83;97", "wc_weaknesses": "181;112;349", "wc_questions": "1;287;441", "wc_limitations": "7;7;1", "wc_review": "293;553;1026", "wc_reply_reviewers": "23;73;168", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 93.33333333333333, 32.097074979228594 ], "wc_strengths_avg": [ 68.66666666666667, 30.706495874470743 ], "wc_weaknesses_avg": [ 214.0, 99.52889027814989 ], "wc_questions_avg": [ 243.0, 182.30377578828876 ], "wc_limitations_avg": [ 5.0, 2.8284271247461903 ], "wc_review_avg": [ 624.0, 303.428190296595 ], "wc_reply_reviewers_avg": [ 88.0, 60.13872850889572 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9825126752551858040&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "", "author_num": 1 }, { "title": "T2I-CompBench: A Comprehensive Benchmark for Open-world Compositional Text-to-image Generation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73424", "id": "weHBzTLXpH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f8ad010cdd9143dbb0e9308c093aff24-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=weHBzTLXpH", "openreview": "https://openreview.net/forum?id=weHBzTLXpH", "poster": "/media/PosterPDFs/NeurIPS%202023/73424.png?t=1701756003.9448438", "slides": "https://nips.cc/virtual/2023/poster/73424", "video": "https://nips.cc/virtual/2023/poster/73424", "author_site": "Kaiyi Huang, Kaiyue Sun, Enze Xie, Zhenguo Li, Xihui Liu", "tldr": "", "abstract": "Despite the stunning ability to generate high-quality images by recent text-to-image models, current approaches often struggle to effectively compose objects with different attributes and relationships into a complex and coherent scene. We propose T2I-CompBench, a comprehensive benchmark for open-world compositional text-to-image generation, consisting of 6,000 compositional text prompts from 3 categories (attribute binding, object relationships, and complex compositions) and 6 sub-categories (color binding, shape binding, texture binding, spatial relationships, non-spatial relationships, and complex compositions). We further propose several evaluation metrics specifically designed to evaluate compositional text-to-image generation and explore the potential and limitations of multimodal LLMs for evaluation. We introduce a new approach, Generative mOdel finetuning with Reward-driven Sample selection (GORS), to boost the compositional text-to-image generation abilities of pretrained text-to-image models. Extensive experiments and evaluations are conducted to benchmark previous methods on T2I-CompBench, and to validate the effectiveness of our proposed evaluation metrics and GORS approach. Project page is available at https://karine-h.github.io/T2I-CompBench/.", "keywords": "benchmark;compositional text-to-image generation", "primary_area": "", "supplementary_material": "/attachment/d977112bca994fc794512223b26aa787485a4f3c.zip", "author": "Kaiyi Huang;Kaiyue Sun;Enze Xie;Zhenguo Li;Xihui Liu", "authorids": "~Kaiyi_Huang1;~Kaiyue_Sun1;~Enze_Xie1;~Zhenguo_Li1;~Xihui_Liu1", "gender": "F;;M;M;F", "homepage": ";;https://xieenze.github.io/;http://www.ee.columbia.edu/~zgli/;https://xh-liu.github.io/", "dblp": "264/1235;;218/5441;23/6479;184/3911", "google_scholar": "dB86D_cAAAAJ;;42MVVPgAAAAJ;XboZC1AAAAAJ;https://scholar.google.com.hk/citations?user=4YL23GMAAAAJ", "orcid": ";;;;0000-0003-1831-9952", "linkedin": ";;;;", "or_profile": "~Kaiyi_Huang1;~Kaiyue_Sun1;~Enze_Xie1;~Zhenguo_Li1;~Xihui_Liu1", "aff": "University of Hong Kong;;Huawei Noah's Ark Lab;Huawei Noah's Ark Lab;University of Hong Kong", "aff_domain": "hku.hk;;huawei.com;huawei.com;hku.hk", "position": "PhD student;;Researcher;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nhuang2023ticompbench,\ntitle={T2I-CompBench: A Comprehensive Benchmark for Open-world Compositional Text-to-image Generation},\nauthor={Kaiyi Huang and Kaiyue Sun and Enze Xie and Zhenguo Li and Xihui Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=weHBzTLXpH}\n}", "github": "", "project": "", "reviewers": "1VSr;Qip7;ZzBg", "pdf_size": 9882737, "rating": "6;6;7", "confidence": "4;3;5", "wc_summary_and_contributions": "57;124;123", "wc_strengths": "50;176;70", "wc_improvement": "101;38;181", "wc_limitations": "9;157;16", "wc_correctness": "14;84;34", "wc_clarity": "8;1;8", "wc_relation_to_prior_work": "29;1;20", "wc_documentation": "57;13;24", "wc_additional_feedback": "1;1;1", "wc_review": "326;595;477", "wc_reply_reviewers": "0;194;21", "wc_reply_authors": "825;1348;470", "reply_reviewers": "0;2;1", "reply_authors": "2;4;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 101.33333333333333, 31.351058816073326 ], "wc_strengths_avg": [ 98.66666666666667, 55.28913897764089 ], "wc_improvement_avg": [ 106.66666666666667, 58.516854172299986 ], "wc_limitations_avg": [ 60.666666666666664, 68.17787193967133 ], "wc_correctness_avg": [ 44.0, 29.43920288775949 ], "wc_clarity_avg": [ 5.666666666666667, 3.299831645537222 ], "wc_relation_to_prior_work_avg": [ 16.666666666666668, 11.67142760000773 ], "wc_documentation_avg": [ 31.333333333333332, 18.696404883173546 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 466.0, 110.09389931629575 ], "wc_reply_reviewers_avg": [ 71.66666666666667, 86.92653347639155 ], "wc_reply_authors_avg": [ 881.0, 360.62260975522133 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 234, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9876799503035031162&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "hku.hk;;huawei.com;huawei.com;hku.hk", "author_num": 5, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Hong Kong;Huawei", "aff_unique_dep": ";Noah's Ark Lab", "aff_unique_url": "https://www.hku.hk;https://www.huawei.com", "aff_unique_abbr": "HKU;Huawei", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Outlier-Robust Wasserstein DRO", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70048", "id": "wg3d2FKAm8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c67b138497305835e76fdedd48dd4e59-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wg3d2FKAm8", "openreview": "https://openreview.net/forum?id=wg3d2FKAm8", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70048", "video": "https://nips.cc/virtual/2023/poster/70048", "author_site": "Sloan Nietert, Ziv Goldfeld, Soroosh Shafiee", "tldr": "", "abstract": "Distributionally robust optimization (DRO) is an effective approach for data-driven decision-making in the presence of uncertainty. Geometric uncertainty due to~sampling or localized perturbations of data points is captured by Wasserstein DRO (WDRO), which seeks to learn a model that performs uniformly well over a Wasserstein ball centered around the observed data distribution. However, WDRO fails to account for non-geometric perturbations such as adversarial outliers, which can greatly distort the Wasserstein distance measurement and impede the learned model. We address this gap by proposing a novel outlier-robust WDRO framework for decision-making under both geometric (Wasserstein) perturbations and non-geometric (total variation (TV)) contamination that allows an $\\varepsilon$-fraction of data to be arbitrarily corrupted. We design an uncertainty set using a certain robust Wasserstein ball that accounts for both perturbation types and derive minimax optimal excess risk bounds for this procedure that explicitly capture the Wasserstein and TV risks. We prove a strong duality result that enables tractable convex reformulations and efficient computation of our outlier-robust WDRO problem. When the loss function depends only on low-dimensional features of the data, we eliminate certain dimension dependencies from the risk bounds that are unavoidable in the general setting. Finally, we present experiments validating our theory on standard regression and classification tasks.", "keywords": "distributionally robust optimization;robust statistics;optimal transport;Wasserstein distance", "primary_area": "", "supplementary_material": "", "author": "Sloan Nietert;Ziv Goldfeld;Soroosh Shafiee", "authorids": "~Sloan_Nietert1;~Ziv_Goldfeld1;~Soroosh_Shafiee1", "gender": "M;M;M", "homepage": "https://www.cs.cornell.edu/~nietert/;http://people.ece.cornell.edu/zivg/;https://sorooshafiee.github.io/", "dblp": "283/4446;119/3922;176/9018", "google_scholar": "DeqKNOgAAAAJ;YKRiYRAAAAAJ;NEfjljMAAAAJ", "orcid": ";;0000-0001-9095-2686", "linkedin": ";;https://linkedin.com/in/sorooshafiee", "or_profile": "~Sloan_Nietert1;~Ziv_Goldfeld1;~Soroosh_Shafieezadeh-Abadeh1", "aff": "Cornell University;Cornell University;Carnegie Mellon University", "aff_domain": "cornell.edu;cornell.edu;cmu.edu", "position": "PhD student;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nnietert2023outlierrobust,\ntitle={Outlier-Robust Wasserstein {DRO}},\nauthor={Sloan Nietert and Ziv Goldfeld and Soroosh Shafiee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wg3d2FKAm8}\n}", "github": "", "project": "", "reviewers": "L4Xq;ZiUC;zQim;vYVj", "pdf_size": 615157, "rating": "6;6;6;6", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "4;3;3;3", "wc_summary": "87;228;132;63", "wc_strengths": "102;83;66;86", "wc_weaknesses": "496;238;241;110", "wc_questions": "69;187;117;29", "wc_limitations": "1;1;36;50", "wc_review": "755;737;592;338", "wc_reply_reviewers": "261;125;37;166", "wc_reply_authors": "460;0;0;290", "reply_reviewers": "2;1;1;2", "reply_authors": "3;1;1;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 127.5, 63.08922253443927 ], "wc_strengths_avg": [ 84.25, 12.774486291041217 ], "wc_weaknesses_avg": [ 271.25, 140.1202608476019 ], "wc_questions_avg": [ 100.5, 58.862127042776834 ], "wc_limitations_avg": [ 22.0, 21.575449010391416 ], "wc_review_avg": [ 605.5, 166.86896056486958 ], "wc_reply_reviewers_avg": [ 147.25, 80.53066186242356 ], "wc_reply_authors_avg": [ 187.5, 196.89781613821927 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1399345417619670689&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 7, "email": "cornell.edu;cornell.edu;cmu.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Cornell University;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.cmu.edu", "aff_unique_abbr": "Cornell;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CrossCodeEval: A Diverse and Multilingual Benchmark for Cross-File Code Completion", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73423", "id": "wgDcbBMSfh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/920f2dced7d32ab2ba2f1970bc306af6-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=wgDcbBMSfh", "openreview": "https://openreview.net/forum?id=wgDcbBMSfh", "poster": "/media/PosterPDFs/NeurIPS%202023/73423.png?t=1702350243.44707", "slides": "https://nips.cc/virtual/2023/poster/73423", "video": "https://nips.cc/virtual/2023/poster/73423", "author_site": "Yangruibo Ding, Zijian Wang, Wasi Ahmad, Hantian Ding, Ming Tan, Nihal Jain, Murali Krishna Ramanathan, Ramesh Nallapati, Parminder Bhatia, Dan Roth, Bing Xiang", "tldr": "", "abstract": "Code completion models have made significant progress in recent years, yet current popular evaluation datasets, such as HumanEval and MBPP, predominantly focus on code completion tasks within a single file. This over-simplified setting falls short of representing the real-world software development scenario where repositories span multiple files with numerous cross-file dependencies, and accessing and understanding cross-file context is often required to complete the code correctly. \n\nTo fill in this gap, we propose CrossCodeEval, a diverse and multilingual code completion benchmark that necessitates an in-depth cross-file contextual understanding to complete the code accurately. CrossCodeEval is built on a diverse set of real-world, open-sourced, permissively-licensed repositories in four popular programming languages: Python, Java, TypeScript, and C#. To create examples that strictly require cross-file context for accurate completion, we propose a straightforward yet efficient static-analysis-based approach to pinpoint the use of cross-file context within the current file. \n\nExtensive experiments on state-of-the-art code language models like CodeGen and StarCoder demonstrate that CrossCodeEval is extremely challenging when the relevant cross-file context is absent, and we see clear improvements when adding these context into the prompt. However, despite such improvements, the pinnacle of performance remains notably unattained even with the highest-performing model, indicating that CrossCodeEval is also capable of assessing model's capability in leveraging extensive context to make better code completion. Finally, we benchmarked various methods in retrieving cross-file context, and show that CrossCodeEval can also be used to measure the capability of code retrievers.", "keywords": "Code Benchmark;Code Evaluation;Code Completion;Cross-file Context;Project-level Context;Code Retrieval", "primary_area": "", "supplementary_material": "", "author": "Yangruibo Ding;Zijian Wang;Wasi Uddin Ahmad;Hantian Ding;Ming Tan;Nihal Jain;Murali Krishna Ramanathan;Ramesh Nallapati;Parminder Bhatia;Dan Roth;Bing Xiang", "authorids": "~Yangruibo_Ding1;~Zijian_Wang1;~Wasi_Uddin_Ahmad1;~Hantian_Ding1;~Ming_Tan2;~Nihal_Jain1;~Murali_Krishna_Ramanathan1;~Ramesh_Nallapati1;~Parminder_Bhatia1;~Dan_Roth3;~Bing_Xiang2", "gender": ";;M;M;;M;M;M;M;M;", "homepage": ";;http://wasiahmad.github.io/;;https://www.linkedin.com/in/ming-tan-18b3436a/;https://nihaljn.github.io;;;;https://www.cis.upenn.edu/~danroth/;", "dblp": ";;183/0576;242/8095;;302/3995;75/541.html;59/4797;168/8615;r/DanRoth;", "google_scholar": ";;YCHJZOMAAAAJ;nEuMO58AAAAJ;;;;;;E-bpPWgAAAAJ;A6yjdJAAAAAJ", "orcid": ";;;;;;;;;;", "linkedin": ";;ahmadwasi/;;;;;;;dan-roth-8667361/;", "or_profile": "~Yangruibo_Ding1;~Zijian_Wang1;~Wasi_Uddin_Ahmad1;~Hantian_Ding1;~Ming_Tan2;~Nihal_Jain1;~Murali_Krishna_Ramanathan1;~Ramesh_Nallapati1;~Parminder_Bhatia1;~Dan_Roth3;~Bing_Xiang2", "aff": ";;Amazon;Amazon;Amazon;Amazon;Amazon;Amazon Web Services;Amazon;Amazon;Goldman Sachs", "aff_domain": ";;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;gs.com", "position": ";;Applied Scientist;Researcher;Applied Scientist;Applied Scientist;Principal Researcher;Senior Principal Scientist;Principal Researcher;VP and Distinguished Scientist;Managing Director", "bibtex": "@inproceedings{\nding2023crosscodeeval,\ntitle={CrossCodeEval: A Diverse and Multilingual Benchmark for Cross-File Code Completion},\nauthor={Yangruibo Ding and Zijian Wang and Wasi Uddin Ahmad and Hantian Ding and Ming Tan and Nihal Jain and Murali Krishna Ramanathan and Ramesh Nallapati and Parminder Bhatia and Dan Roth and Bing Xiang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=wgDcbBMSfh}\n}", "github": "", "project": "", "reviewers": "BzTx;C4R7;t83P;K8CN;1pD2", "pdf_size": 4179387, "rating": "6;6;6;6;7", "confidence": "4;5;5;4;3", "wc_summary_and_contributions": "241;143;55;100;141", "wc_strengths": "174;15;96;57;95", "wc_improvement": "41;63;186;88;177", "wc_limitations": "219;7;23;1;55", "wc_correctness": "77;3;6;37;180", "wc_clarity": "10;22;10;1;58", "wc_relation_to_prior_work": "11;1;22;1;90", "wc_documentation": "8;1;5;1;100", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "782;256;404;287;897", "wc_reply_reviewers": "0;0;24;0;0", "wc_reply_authors": "178;196;286;129;236", "reply_reviewers": "0;0;1;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 136.0, 61.57272123270174 ], "wc_strengths_avg": [ 87.4, 52.530372166966416 ], "wc_improvement_avg": [ 111.0, 59.5214247141313 ], "wc_limitations_avg": [ 61.0, 81.19113252073775 ], "wc_correctness_avg": [ 60.6, 65.39296598258868 ], "wc_clarity_avg": [ 20.2, 20.04395170618808 ], "wc_relation_to_prior_work_avg": [ 25.0, 33.41257248402164 ], "wc_documentation_avg": [ 23.0, 38.59015418471401 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 525.2, 263.84798653770315 ], "wc_reply_reviewers_avg": [ 4.8, 9.6 ], "wc_reply_authors_avg": [ 205.0, 53.118734924694884 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.8017837257372732, "gs_citation": 122, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7911660837549170070&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": ";;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;gs.com", "author_num": 11, "aff_unique_index": "0;0;0;0;0;0;0;0;1", "aff_unique_norm": "Amazon;Goldman Sachs", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.goldmansachs.com", "aff_unique_abbr": "Amazon;GS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Stein $\\Pi$-Importance Sampling", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70047", "id": "wiidCRA3at", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e389b15166cf98966ba058965a8c17e3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wiidCRA3at", "openreview": "https://openreview.net/forum?id=wiidCRA3at", "poster": "/media/PosterPDFs/NeurIPS%202023/70047.png?t=1697135250.5473504", "slides": "https://nips.cc/virtual/2023/poster/70047", "video": "https://nips.cc/virtual/2023/poster/70047", "author_site": "Congye Wang, Ye Chen, Heishiro Kanagawa, Chris Oates", "tldr": "", "abstract": "Stein discrepancies have emerged as a powerful tool for retrospective improvement of Markov chain Monte Carlo output. However, the question of how to design Markov chains that are well-suited to such post-processing has yet to be addressed. This paper studies Stein importance sampling, in which weights are assigned to the states visited by a $\\Pi$-invariant Markov chain to obtain a consistent approximation of $P$, the intended target. Surprisingly, the optimal choice of $\\Pi$ is not identical to the target $P$; we therefore propose an explicit construction for $\\Pi$ based on a novel variational argument. Explicit conditions for convergence of Stein $\\Pi$-Importance Sampling are established. For $\\approx 70$% of tasks in the PosteriorDB benchmark, a significant improvement over the analogous post-processing of $P$-invariant Markov chains is reported.", "keywords": "Bayesian;discrepancy;kernel;sampling;Stein's method", "primary_area": "", "supplementary_material": "/attachment/a5ee66ed641d80b1eac938a95a33d72c7f4ea669.zip", "author": "Congye Wang;Wilson Ye Chen;Heishiro Kanagawa;Chris J. Oates", "authorids": "~Congye_Wang1;~Wilson_Ye_Chen2;~Heishiro_Kanagawa1;~Chris_J._Oates1", "gender": "M;M;M;", "homepage": "https://congyewang.github.io/;;;https://oates.work", "dblp": "369/8180.html;https://dblp.uni-trier.de/pers/hd/c/Chen:Wilson_Ye;182/8957;118/6076", "google_scholar": "nyt9y-UAAAAJ;jX8lC6EAAAAJ;aS_WmUwAAAAJ;W_Ul5jMAAAAJ", "orcid": "0009-0001-0167-1362;;;", "linkedin": "congyewang;;;", "or_profile": "~Congye_Wang1;~Wilson_Ye_Chen2;~Heishiro_Kanagawa1;~Chris_J._Oates1", "aff": "University of Newcastle-upon-Tyne;University of Sydney, Australia;University of Newcastle-upon-Tyne;Newcastle University", "aff_domain": "ncl.ac.uk;sydney.edu.au;ncl.ac.uk;ncl.ac.uk", "position": "PhD student;Assistant Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nwang2023stein,\ntitle={Stein \\${\\textbackslash}Pi\\$-Importance Sampling},\nauthor={Congye Wang and Wilson Ye Chen and Heishiro Kanagawa and Chris J. Oates},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wiidCRA3at}\n}", "github": "", "project": "", "reviewers": "m4Rk;yEHQ;fuoL;pfRv;X2c9", "pdf_size": 3205357, "rating": "6;6;6;7;7", "confidence": "4;2;3;3;2", "soundness": "3;2;3;3;4", "novelty": "3;2;3;3;4", "presentation": "2;2;4;4;4", "wc_summary": "133;24;73;141;52", "wc_strengths": "24;51;128;147;165", "wc_weaknesses": "46;51;297;270;27", "wc_questions": "30;31;196;92;47", "wc_limitations": "1;1;7;20;21", "wc_review": "234;158;701;670;312", "wc_reply_reviewers": "8;14;156;12;21", "wc_reply_authors": "0;379;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.9797958971132712 ], "wc_summary_avg": [ 84.6, 45.59210457963089 ], "wc_strengths_avg": [ 103.0, 55.40758070878027 ], "wc_weaknesses_avg": [ 138.2, 119.21308652996113 ], "wc_questions_avg": [ 79.2, 62.58881689247689 ], "wc_limitations_avg": [ 10.0, 8.854377448471462 ], "wc_review_avg": [ 415.0, 226.38021114929634 ], "wc_reply_reviewers_avg": [ 42.2, 57.055762198046224 ], "wc_reply_authors_avg": [ 75.8, 151.6 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.32732683535398854, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3301702668663202650&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ncl.ac.uk;sydney.edu.au;ncl.ac.uk;ncl.ac.uk", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Newcastle;University of Sydney;Newcastle University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ncl.ac.uk;https://www.sydney.edu.au;https://www.ncl.ac.uk", "aff_unique_abbr": "Newcastle;USYD;NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;Australia" }, { "title": "Zero-shot Visual Relation Detection via Composite Visual Cues from Large Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70046", "id": "wiv21EJ0Vd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9ca825deb6ce588c96f880728d3b8aea-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wiv21EJ0Vd", "openreview": "https://openreview.net/forum?id=wiv21EJ0Vd", "poster": "/media/PosterPDFs/NeurIPS%202023/70046.png?t=1701664166.0876825", "slides": "https://nips.cc/virtual/2023/poster/70046", "video": "https://nips.cc/virtual/2023/poster/70046", "author_site": "Lin Li, Jun Xiao, Guikun Chen, Jian Shao, Yueting Zhuang, Long Chen", "tldr": "", "abstract": "Pretrained vision-language models, such as CLIP, have demonstrated strong generalization capabilities, making them promising tools in the realm of zero-shot visual recognition. Visual relation detection (VRD) is a typical task that identifies relationship (or interaction) types between object pairs within an image. However, naively utilizing CLIP with prevalent class-based prompts for zero-shot VRD has several weaknesses, e.g., it struggles to distinguish between different fine-grained relation types and it neglects essential spatial information of two objects. To this end, we propose a novel method for zero-shot VRD: RECODE, which solves RElation detection via COmposite DEscription prompts. Specifically, RECODE first decomposes each predicate category into subject, object, and spatial components. Then, it leverages large language models (LLMs) to generate description-based prompts (or visual cues) for each component. Different visual cues enhance the discriminability of similar relation categories from different perspectives, which significantly boosts performance in VRD. To dynamically fuse different cues, we further introduce a chain-of-thought method that prompts LLMs to generate reasonable weights for different visual cues. Extensive experiments on four VRD benchmarks have demonstrated the effectiveness and interpretability of RECODE.", "keywords": "Visual relation detection;Zero-short learning;Scene graph generation", "primary_area": "", "supplementary_material": "/attachment/a11f9b508abf1022f62b54e3ab1c4aeecd14c1e2.pdf", "author": "Lin Li;Jun Xiao;Guikun Chen;Jian Shao;Yueting Zhuang;Long Chen", "authorids": "~Lin_Li18;~Jun_Xiao1;~Guikun_Chen1;~Jian_Shao1;~Yueting_Zhuang1;~Long_Chen8", "gender": "F;M;M;M;M;M", "homepage": ";;https://guikunchen.github.io/;https://person.zju.edu.cn/jshao;https://person.zju.edu.cn/yzhuang;https://zjuchenlong.github.io/", "dblp": "73/2252-65;71/2308-1;342/9515;;;64/5725-16", "google_scholar": "4-z7znIAAAAJ;fqOwFhQAAAAJ;I1TOdpkAAAAJ;VUN-9cQAAAAJ;1RD7UJAAAAAJ;https://scholar.google.com.sg/citations?user=-gtmMpIAAAAJ", "orcid": "0000-0002-5678-4487;;;;;0000-0001-6148-9709", "linkedin": ";;;;;", "or_profile": "~Lin_Li18;~Jun_Xiao1;~Guikun_Chen1;~Jian_Shao1;~Yueting_Zhuang1;~Long_Chen8", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Columbia University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;columbia.edu", "position": "PhD student;Full Professor;PhD student;Associate Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nli2023zeroshot,\ntitle={Zero-shot Visual Relation Detection via Composite Visual Cues from Large Language Models},\nauthor={Lin Li and Jun Xiao and Guikun Chen and Jian Shao and Yueting Zhuang and Long Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wiv21EJ0Vd}\n}", "github": "", "project": "", "reviewers": "EWRp;4NUf;q7Rt;CZ8p;iRhq", "pdf_size": 1904832, "rating": "5;5;5;5;6", "confidence": "4;4;5;3;4", "soundness": "3;2;2;3;4", "novelty": "2;3;3;3;4", "presentation": "3;2;3;3;4", "wc_summary": "65;66;96;89;119", "wc_strengths": "31;69;107;245;47", "wc_weaknesses": "426;203;148;63;38", "wc_questions": "45;39;70;252;8", "wc_limitations": "14;7;46;22;11", "wc_review": "581;384;467;671;223", "wc_reply_reviewers": "251;68;23;29;15", "wc_reply_authors": "1368;34;61;30;43", "reply_reviewers": "2;1;1;1;1", "reply_authors": "6;2;3;2;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 87.0, 20.169283576765935 ], "wc_strengths_avg": [ 99.8, 76.94777449673252 ], "wc_weaknesses_avg": [ 175.6, 138.43785609435014 ], "wc_questions_avg": [ 82.8, 86.87324098938636 ], "wc_limitations_avg": [ 20.0, 13.899640283115243 ], "wc_review_avg": [ 465.2, 155.57686203288714 ], "wc_reply_reviewers_avg": [ 77.2, 88.7927925002925 ], "wc_reply_authors_avg": [ 307.2, 530.50745517853 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 3.0, 1.5491933384829668 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10527188868356686312&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;columbia.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Zhejiang University;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.columbia.edu", "aff_unique_abbr": "ZJU;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "AllSim: Simulating and Benchmarking Resource Allocation Policies in Multi-User Systems", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73422", "id": "wiw5mnja8W", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0296e17ec30fc36007edaaa2f96b5f17-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=wiw5mnja8W", "openreview": "https://openreview.net/forum?id=wiw5mnja8W", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73422", "video": "https://nips.cc/virtual/2023/poster/73422", "author_site": "Jeroen Berrevoets, Daniel Jarrett, Alex Chan, Mihaela van der Schaar", "tldr": "", "abstract": "Numerous real-world systems, ranging from healthcare to energy grids, involve users competing for finite and potentially scarce resources. Designing policies for resource allocation in such real-world systems is challenging for many reasons, including the changing nature of user types and their (possibly urgent) need for resources. Researchers have developed numerous machine learning solutions for determining resource allocation policies in these challenging settings. However, a key limitation has been the absence of good methods and test-beds for benchmarking these policies; almost all resource allocation policies are benchmarked in environments which are either completely synthetic or do not allow _any_ deviation from historical data. In this paper we introduce AllSim, which is a benchmarking environment for realistically simulating the impact and utility of policies for resource allocation in systems in which users compete for such scarce resources. Building such a benchmarking environment is challenging because it needs to successfully take into account _the entire collective_ of potential users and the impact a resource allocation policy has on all the other users in the system. AllSim's benchmarking environment is modular (each component being parameterized individually), learnable (informed by historical data), and customizable (adaptable to changing conditions). These, when interacting with an allocation policy, produce a dataset of simulated outcomes for evaluation and comparison of such policies. We believe AllSim is an essential step towards a more systematic evaluation of policies for scarce resource allocation compared to current approaches for benchmarking such methods.", "keywords": "resource allocation;simulation", "primary_area": "", "supplementary_material": "/attachment/ca8dae55c4658149dd7fe4e5ea00fbaed9f44677.pdf", "author": "Jeroen Berrevoets;Daniel Jarrett;Alex James Chan;Mihaela van der Schaar", "authorids": "~Jeroen_Berrevoets1;~Daniel_Jarrett1;~Alex_James_Chan1;~Mihaela_van_der_Schaar2", "gender": ";;M;F", "homepage": "https://jeroenbe.github.io;https://danieljarrett.github.io;https://alexjchan.com;https://www.vanderschaar-lab.com", "dblp": "236/4591;230/8183;268/6948;", "google_scholar": "https://scholar.google.be/citations?user=Bq1dFNQAAAAJ;Pczk-PQAAAAJ;yfy_BGIAAAAJ;DZ3S--MAAAAJ", "orcid": ";0000-0002-2204-6515;;", "linkedin": ";danjarrett/;alex-chan-040081131/;", "or_profile": "~Jeroen_Berrevoets1;~Daniel_Jarrett1;~Alex_James_Chan1;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;University of Cambridge;University of Cambridge;University of California, Los Angeles", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk;ucla.edu", "position": "PhD student;Ph.D.;PhD student;Full Professor", "bibtex": "@inproceedings{\nberrevoets2023allsim,\ntitle={AllSim: Simulating and Benchmarking Resource Allocation Policies in Multi-User Systems},\nauthor={Jeroen Berrevoets and Daniel Jarrett and Alex James Chan and Mihaela van der Schaar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=wiw5mnja8W}\n}", "github": "", "project": "", "reviewers": "uqdE;tL4u;XpaH;zixr;pbmH", "pdf_size": 526492, "rating": "3;6;6;7;8", "confidence": "5;3;4;3;4", "wc_summary_and_contributions": "49;67;144;130;62", "wc_strengths": "26;50;77;42;63", "wc_improvement": "250;1;84;302;69", "wc_limitations": "16;58;41;24;64", "wc_correctness": "19;1;4;36;1", "wc_clarity": "5;21;33;19;1", "wc_relation_to_prior_work": "1;1;10;47;1", "wc_documentation": "9;1;9;24;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "376;201;403;625;263", "wc_reply_reviewers": "0;0;95;0;0", "wc_reply_authors": "796;285;343;621;219", "reply_reviewers": "0;0;1;0;0", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.0, 1.6733200530681511 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 90.4, 38.753580479744066 ], "wc_strengths_avg": [ 51.6, 17.465394355696642 ], "wc_improvement_avg": [ 141.2, 114.74737469763743 ], "wc_limitations_avg": [ 40.6, 18.607525359380812 ], "wc_correctness_avg": [ 12.2, 13.644046320648432 ], "wc_clarity_avg": [ 15.8, 11.565465835840769 ], "wc_relation_to_prior_work_avg": [ 12.0, 17.84376641855637 ], "wc_documentation_avg": [ 8.8, 8.4 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 373.6, 145.66756674016355 ], "wc_reply_reviewers_avg": [ 19.0, 38.0 ], "wc_reply_authors_avg": [ 452.8, 219.5234839373683 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6388765649999399, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9663227409941712488&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk;ucla.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;UCLA", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Cambridge;Los Angeles", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "wjqT8OBm0y", "title": "A Refutation of Shapley Values for Explainability", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent work demonstrated the existence of Boolean functions for which Shapley values provide misleading information about the\nrelative importance of features in rule-based explanations. Such misleading information was broadly categorized into a number of\npossible issues. Each of those issues relates with features being relevant or irrelevant for a prediction, and all are significant\nregarding the inadequacy of Shapley values for rule-based explainability. This earlier work devised a brute-force approach to identify Boolean functions, defined on small numbers of features, and also associated instances, which displayed such inadequacy-revealing issues, and so served as evidence to the inadequacy of Shapley values for rule-based explainability. However, an outstanding question is how frequently such inadequacy-revealing issues can occur for Boolean functions with arbitrary large numbers of features. It is plain that a brute-force approach would be unlikely to provide insights on how to tackle this question. This paper answers the above question by proving that, for any number of features, there exist Boolean functions that exhibit one or more inadequacy-revealing issues, thereby contributing decisive arguments against the use of Shapley values as the theoretical underpinning of feature-attribution methods in explainability.", "keywords": "Shapley values;Explainability", "primary_area": "", "supplementary_material": "/attachment/007dea4f3984d7d65ac14a2f4d3d8e2e47abd39d.pdf", "author": "Xuanxiang Huang;Joao Marques-Silva", "authorids": "~Xuanxiang_Huang1;~Joao_Marques-Silva1", "gender": "M;M", "homepage": "http://jpmarquessilva.github.io;", "dblp": "s/JoaoPMarquesSilva;https://dblp.uni-trier.de/pid/245/3594.html", "google_scholar": "1b9hppwAAAAJ;Zgajt7cAAAAJ", "orcid": "0000-0002-6632-3086;", "linkedin": "jpmarquessilva/;", "or_profile": "~Joao_Marques-Silva1;~Xuanxiang_Huang2", "aff": "CNRS;Universit\u00e9 de Toulouse", "aff_domain": "cnrs.fr;univ-toulouse.fr", "position": "Senior Researcher (Directeur de Recherche);PhD student", "bibtex": "@misc{\nhuang2023a,\ntitle={A Refutation of Shapley Values for Explainability},\nauthor={Xuanxiang Huang and Joao Marques-Silva},\nyear={2023},\nurl={https://openreview.net/forum?id=wjqT8OBm0y}\n}", "github": "", "project": "", "reviewers": "6JiF;QRCo;4AZv;HAap", "site": "https://openreview.net/forum?id=wjqT8OBm0y", "pdf_size": 424058, "rating": "3;4;4;6", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "1;2;2;2", "presentation": "2;4;3;4", "wc_summary": "23;367;178;92", "wc_strengths": "11;71;29;200", "wc_weaknesses": "24;179;44;120", "wc_questions": "1;13;613;67", "wc_limitations": "1;74;23;46", "wc_review": "60;704;887;525", "wc_reply_reviewers": "350;134;290;0", "wc_reply_authors": "469;309;231;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 4.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 165.0, 128.9050037818548 ], "wc_strengths_avg": [ 77.75, 73.8626258672138 ], "wc_weaknesses_avg": [ 91.75, 61.80766538221614 ], "wc_questions_avg": [ 173.5, 254.9602910258772 ], "wc_limitations_avg": [ 36.0, 27.101660465735304 ], "wc_review_avg": [ 544.0, 307.3540303949177 ], "wc_reply_reviewers_avg": [ 193.5, 136.73605961852198 ], "wc_reply_authors_avg": [ 252.25, 169.0286588126404 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17073457275599542518&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Centre National de la Recherche Scientifique;Universit\u00e9 de Toulouse", "aff_unique_dep": ";", "aff_unique_url": "https://www.cnrs.fr;https://www.univ-toulouse.fr", "aff_unique_abbr": "CNRS;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "VillanDiffusion: A Unified Backdoor Attack Framework for Diffusion Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70045", "id": "wkIBfnGPTA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6b055b95d689b1f704d8f92191cdb788-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wkIBfnGPTA", "openreview": "https://openreview.net/forum?id=wkIBfnGPTA", "poster": "/media/PosterPDFs/NeurIPS%202023/70045.png?t=1701512357.205919", "slides": "https://nips.cc/virtual/2023/poster/70045", "video": "https://nips.cc/virtual/2023/poster/70045", "author_site": "Sheng-Yen Chou, Pin-Yu Chen, Tsung-Yi Ho", "tldr": "", "abstract": "Diffusion Models (DMs) are state-of-the-art generative models that learn a reversible corruption process from iterative noise addition and denoising. They are the backbone of many generative AI applications, such as text-to-image conditional generation. However, recent studies have shown that basic unconditional DMs (e.g., DDPM and DDIM) are vulnerable to backdoor injection, a type of output manipulation attack triggered by a maliciously embedded pattern at model input. This paper presents a unified backdoor attack framework (VillanDiffusion) to expand the current scope of backdoor analysis for DMs. Our framework covers mainstream unconditional and conditional DMs (denoising-based and score-based) and various training-free samplers for holistic evaluations. Experiments show that our unified framework facilitates the backdoor analysis of different DM configurations and provides new insights into caption-based backdoor attacks on DMs.", "keywords": "backdoor;diffusion model;trustworthy", "primary_area": "", "supplementary_material": "/attachment/8d44cea9fcc5f1d9e2a493638dc0fab9fa4a5d83.pdf", "author": "Sheng-Yen Chou;Pin-Yu Chen;Tsung-Yi Ho", "authorids": "~Sheng-Yen_Chou2;~Pin-Yu_Chen1;~Tsung-Yi_Ho2", "gender": ";M;M", "homepage": "https://frankccccc.github.io/blog/;http://www.pinyuchen.com;https://www.cse.cuhk.edu.hk/people/faculty/tsung-yi-ho/", "dblp": "336/3125;39/8969;63/4181.html", "google_scholar": "y4L5ImMAAAAJ;jxwlCUUAAAAJ;TRDUYkAAAAAJ", "orcid": ";0000-0003-1039-8369;0000-0001-7348-5625", "linkedin": "sheng-yen-chou-ba89b2184;pin-yu-chen-940062a2;", "or_profile": "~Sheng-Yen_Chou2;~Pin-Yu_Chen1;~Tsung-Yi_Ho2", "aff": "The Chinese University of Hong Kong;International Business Machines;Department of Computer Science and Engineering, The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.hk;ibm.com;cse.cuhk.edu.hk", "position": "Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nchou2023villandiffusion,\ntitle={VillanDiffusion: A Unified Backdoor Attack Framework for Diffusion Models},\nauthor={Sheng-Yen Chou and Pin-Yu Chen and Tsung-Yi Ho},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wkIBfnGPTA}\n}", "github": "", "project": "", "reviewers": "zhWX;FRNp;FG5D;mTjd", "pdf_size": 8067178, "rating": "5;5;5;6", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "3;2;2;2", "presentation": "3;2;2;3", "wc_summary": "38;26;39;71", "wc_strengths": "26;29;70;52", "wc_weaknesses": "282;104;32;65", "wc_questions": "5;16;167;4", "wc_limitations": "43;1;1;1", "wc_review": "394;176;309;193", "wc_reply_reviewers": "19;10;0;0", "wc_reply_authors": "23;11;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 43.5, 16.680827317612277 ], "wc_strengths_avg": [ 44.25, 17.949582167838894 ], "wc_weaknesses_avg": [ 120.75, 96.52298948955114 ], "wc_questions_avg": [ 48.0, 68.86581154680457 ], "wc_limitations_avg": [ 11.5, 18.186533479473212 ], "wc_review_avg": [ 268.0, 88.94661320140301 ], "wc_reply_reviewers_avg": [ 7.25, 7.917543811056558 ], "wc_reply_authors_avg": [ 8.5, 9.5 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16289864856633093116&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cuhk.edu.hk;ibm.com;cse.cuhk.edu.hk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.ibm.com", "aff_unique_abbr": "CUHK;IBM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "Maximization of Average Precision for Deep Learning with Adversarial Ranking Robustness", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70044", "id": "wm5Ane9VRO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/31f04c174a6af322e9417b7a9a91097a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wm5Ane9VRO", "openreview": "https://openreview.net/forum?id=wm5Ane9VRO", "poster": "/media/PosterPDFs/NeurIPS%202023/70044.png?t=1702008366.0989876", "slides": "https://nips.cc/virtual/2023/poster/70044", "video": "https://nips.cc/virtual/2023/poster/70044", "author_site": "Gang Li, Gang Li, Wei Tong, Tianbao Yang", "tldr": "", "abstract": "This paper seeks to address a gap in optimizing Average Precision (AP) while ensuring adversarial robustness, an area that has not been extensively explored to the best of our knowledge. AP maximization for deep learning has widespread applications, particularly when there is a significant imbalance between positive and negative examples. Although numerous studies have been conducted on adversarial training, they primarily focus on robustness concerning accuracy, ensuring that the average accuracy on adversarially perturbed examples is well maintained. However, this type of adversarial robustness is insufficient for many applications, as minor perturbations on a single example can significantly impact AP while not greatly influencing the accuracy of the prediction system. To tackle this issue, we introduce a novel formulation that combines an AP surrogate loss with a regularization term representing adversarial ranking robustness, which maintains the consistency between ranking of clean data and that of perturbed data. We then devise an efficient stochastic optimization algorithm to optimize the resulting objective. Our empirical studies, which compare our method to current leading adversarial training baselines and other robust AP maximization strategies, demonstrate the effectiveness of the proposed approach. Notably, our methods outperform a state-of-the-art method (TRADES) by more than 4\\% in terms of robust AP against PGD attacks while achieving 7\\% higher AP on clean data simultaneously on CIFAR10 and CIFAR100.The code is available at: ", "keywords": "Adversarial Average Precision Maximization;Robust Average Precision;Adversarial Ranking Robustness;Adversarial Training", "primary_area": "", "supplementary_material": "/attachment/ecbe23298e563627376ef734de741ece17be6b70.zip", "author": "Gang Li;Wei Tong;Tianbao Yang", "authorids": "~Gang_Li17;wei.tong@gm.com;~Tianbao_Yang1", "gender": "M;;M", "homepage": "https://github.com/GangLii;;https://people.tamu.edu/~tianbao-yang/publications.html", "dblp": ";;56/7047", "google_scholar": ";;https://scholar.google.com.tw/citations?user=BCxFU0EAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Gang_Li17;wei.tong@gm.com;~Tianbao_Yang1", "aff": "University of Iowa;;Texas A&M University - College Station", "aff_domain": "uiowa.edu;;tamu.edu", "position": "PhD student;;Associate Professor", "bibtex": "@inproceedings{\nli2023maximization,\ntitle={Maximization of Average Precision for Deep Learning with Adversarial Ranking Robustness},\nauthor={Gang Li and Wei Tong and Tianbao Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wm5Ane9VRO}\n}", "github": "", "project": "", "reviewers": "Szkp;N7Ak;rUtg;axcz;YhER", "pdf_size": 1337308, "rating": "5;6;6;7;8", "confidence": "3;4;4;2;5", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "85;68;60;103;58", "wc_strengths": "28;49;26;77;90", "wc_weaknesses": "166;143;88;53;110", "wc_questions": "4;10;73;90;71", "wc_limitations": "13;10;5;32;3", "wc_review": "296;280;252;355;332", "wc_reply_reviewers": "23;14;0;28;123", "wc_reply_authors": "0;0;0;0;1683", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;4", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 74.8, 17.010584939971935 ], "wc_strengths_avg": [ 54.0, 25.729360660537214 ], "wc_weaknesses_avg": [ 112.0, 39.844698518121575 ], "wc_questions_avg": [ 49.6, 35.454759906111335 ], "wc_limitations_avg": [ 12.6, 10.326664514740468 ], "wc_review_avg": [ 303.0, 36.67151483099655 ], "wc_reply_reviewers_avg": [ 37.6, 43.747457068954304 ], "wc_reply_authors_avg": [ 336.6, 673.2 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 1.2 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.34615384615384615, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5765263590102241399&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uiowa.edu;;tamu.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Iowa;Texas A&M University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uiowa.edu;https://www.tamu.edu", "aff_unique_abbr": "UIowa;TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "BayesDAG: Gradient-Based Posterior Inference for Causal Discovery", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70043", "id": "woptnU6fh1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/05cf28e3d3c9a179d789c55270fe6f72-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=woptnU6fh1", "openreview": "https://openreview.net/forum?id=woptnU6fh1", "poster": "/media/PosterPDFs/NeurIPS%202023/70043.png?t=1701865849.8188512", "slides": "https://nips.cc/virtual/2023/poster/70043", "video": "https://nips.cc/virtual/2023/poster/70043", "author_site": "Yashas Annadani, Nick Pawlowski, Joel Jennings, Joel Jennings, Stefan Bauer, Cheng Zhang, Wenbo Gong", "tldr": "", "abstract": "Bayesian causal discovery aims to infer the posterior distribution over causal models from observed data, quantifying epistemic uncertainty and benefiting downstream tasks. However, computational challenges arise due to joint inference over combinatorial space of Directed Acyclic Graphs (DAGs) and nonlinear functions. Despite recent progress towards efficient posterior inference over DAGs, existing methods are either limited to variational inference on node permutation matrices for linear causal models, leading to compromised inference accuracy, or continuous relaxation of adjacency matrices constrained by a DAG regularizer, which cannot ensure resulting graphs are DAGs. In this work, we introduce a scalable Bayesian causal discovery framework based on a combination of stochastic gradient Markov Chain Monte Carlo (SG-MCMC) and Variational Inference (VI) that overcomes these limitations. Our approach directly samples DAGs from the posterior without requiring any DAG regularization, simultaneously draws function parameter samples and is applicable to both linear and nonlinear causal models. To enable our approach, we derive a novel equivalence to the permutation-based DAG learning, which opens up possibilities of using any relaxed gradient estimator defined over permutations. To our knowledge, this is the first framework applying gradient-based MCMC sampling for causal discovery. Empirical evaluation on synthetic and real-world datasets demonstrate our approach's effectiveness compared to state-of-the-art baselines.", "keywords": "Causal Discovery;Structure Learning;Bayesian Inference;Variational Inference;MCMC;Generative Model", "primary_area": "", "supplementary_material": "", "author": "Yashas Annadani;Nick Pawlowski;Joel Jennings;Stefan Bauer;Cheng Zhang;Wenbo Gong", "authorids": "~Yashas_Annadani1;~Nick_Pawlowski2;~Joel_Jennings1;~Stefan_Bauer1;~Cheng_Zhang1;~Wenbo_Gong1", "gender": ";M;;F;M;", "homepage": "https://yashasannadani.com;http://nickpawlowski.de;https://cifar.ca/bios/stefan-bauer/;http://cheng-zhang.org;;", "dblp": "190/7411;198/1040;;82/6384-5;222/3265-1;217/1994", "google_scholar": "ExgzcVMAAAAJ;https://scholar.google.de/citations?user=a5u9fVYAAAAJ;O-oICE8AAAAJ;r40iAwIAAAAJ;J99ffO0AAAAJ;", "orcid": ";0000-0002-2748-7977;;;;", "linkedin": ";nickpawlowski;;;wenbo-gong-786867119/;", "or_profile": "~Yashas_Annadani1;~Nick_Pawlowski2;~Stefan_Bauer1;~Cheng_Zhang1;~Wenbo_Gong1;~Joel_Nicholas_Jennings1", "aff": "KTH Royal Institute of Technology;Broad Institute;KTH Royal Institute of Technology;Microsoft;Microsoft;Microsoft", "aff_domain": "kth.se;broadinstitute.org;kth.se;microsoft.com;microsoft.com;microsoft.com", "position": "PhD student;Visiting Researcher;Assistant Professor;Principal Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nannadani2023bayesdag,\ntitle={Bayes{DAG}: Gradient-Based Posterior Inference for Causal Discovery},\nauthor={Yashas Annadani and Nick Pawlowski and Joel Jennings and Stefan Bauer and Cheng Zhang and Wenbo Gong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=woptnU6fh1}\n}", "github": "", "project": "", "reviewers": "R7H9;hANm;zwvL;3g7c", "pdf_size": 700861, "rating": "5;5;6;7", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "194;71;103;54", "wc_strengths": "75;76;66;61", "wc_weaknesses": "234;97;222;120", "wc_questions": "17;183;51;39", "wc_limitations": "1;29;21;23", "wc_review": "521;456;463;297", "wc_reply_reviewers": "0;32;256;66", "wc_reply_authors": "80;58;179;148", "reply_reviewers": "0;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.5, 54.03933752369657 ], "wc_strengths_avg": [ 69.5, 6.264982043070834 ], "wc_weaknesses_avg": [ 168.25, 60.449875930393766 ], "wc_questions_avg": [ 72.5, 64.9519052838329 ], "wc_limitations_avg": [ 18.5, 10.523782589924593 ], "wc_review_avg": [ 434.25, 83.16061267210577 ], "wc_reply_reviewers_avg": [ 88.5, 99.48241050557631 ], "wc_reply_authors_avg": [ 116.25, 49.124204828169994 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8336893238735997446&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "kth.se;broadinstitute.org;kth.se;microsoft.com;microsoft.com;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;0;2;2;2", "aff_unique_norm": "KTH Royal Institute of Technology;Broad Institute;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.kth.se;https://www.broadinstitute.org;https://www.microsoft.com", "aff_unique_abbr": "KTH;Broad;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1;1", "aff_country_unique": "Sweden;United States" }, { "title": "ContinuAR: Continuous Autoregression For Infinite-Fidelity Fusion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70042", "id": "wpfsnu5syT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/93cf20db85fabb0fd4bb89346510629c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wpfsnu5syT", "openreview": "https://openreview.net/forum?id=wpfsnu5syT", "poster": "/media/PosterPDFs/NeurIPS%202023/70042.png?t=1701423001.7056692", "slides": "https://nips.cc/virtual/2023/poster/70042", "video": "https://nips.cc/virtual/2023/poster/70042", "author_site": "WEI XING, Yuxin Wang, Zheng Xing", "tldr": "", "abstract": "Multi-fidelity fusion has become an important surrogate technique, which provides insights into expensive computer simulations and effectively improves decision-making, e.g., optimization, with less computational cost. Multi-fidelity fusion is much more computationally efficient compared to traditional single-fidelity surrogates. Despite the fast advancement of multi-fidelity fusion techniques, they lack a systematic framework to make use of the fidelity indicator, deal with high-dimensional and arbitrary data structure, and scale well to infinite-fidelity problems. In this work, we first generalize the popular autoregression (AR) to derive a novel linear fidelity differential equation (FiDE), paving the way to tractable infinite-fidelity fusion. We generalize FiDE to a high-dimensional system, which also provides a unifying framework to seemly bridge the gap between many multi- and single-fidelity GP-based models. We then propose ContinuAR, a rank-1 approximation solution to FiDEs, which is tractable to train, compatible with arbitrary multi-fidelity data structure, linearly scalable to the output dimension, and most importantly, delivers consistent SOTA performance with a significant margin over the baseline methods. Compared to the SOTA infinite-fidelity fusion, IFC, ContinuAR achieves up to 4x improvement in accuracy and 62,500x speedup in training time.", "keywords": "Gaussian process;autoregression;multi fidelity;nonparametric Bayesian", "primary_area": "", "supplementary_material": "/attachment/08d93054aeae64ff5a64fb55db472b5ba2b0a6af.zip", "author": "WEI W. XING;Yuxin Wang;Zheng Xing", "authorids": "~WEI_W._XING1;~Yuxin_Wang4;~Zheng_Xing2", "gender": "M;F;M", "homepage": ";;https://github.com/zen-xingle", "dblp": ";;", "google_scholar": "https://scholar.google.com/citations?hl=en;;", "orcid": ";;", "linkedin": ";yuxin-wang-742861249/;", "or_profile": "~WEI_W._XING1;~Yuxin_Wang4;~Zheng_Xing2", "aff": "University of Sheffield;National University of Singapore;", "aff_domain": "shef.ac.uk;nus.edu;", "position": "Lecturer;MS student;", "bibtex": "@inproceedings{\nxing2023continuar,\ntitle={Continu{AR}: Continuous Autoregression For Infinite-Fidelity Fusion},\nauthor={WEI W. XING and Yuxin Wang and Zheng Xing},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wpfsnu5syT}\n}", "github": "", "project": "", "reviewers": "Pa1c;5GR5;KCHb;JqSS", "pdf_size": 8046991, "rating": "3;6;6;7", "confidence": "3;3;3;4", "soundness": "2;3;2;4", "novelty": "2;3;3;4", "presentation": "1;3;2;4", "wc_summary": "124;49;35;133", "wc_strengths": "82;70;79;98", "wc_weaknesses": "210;73;102;132", "wc_questions": "120;23;595;110", "wc_limitations": "12;24;45;11", "wc_review": "548;239;856;484", "wc_reply_reviewers": "61;0;0;0", "wc_reply_authors": "172;0;55;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;2;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 85.25, 43.64845358085439 ], "wc_strengths_avg": [ 82.25, 10.108783309577865 ], "wc_weaknesses_avg": [ 129.25, 51.07531204016281 ], "wc_questions_avg": [ 212.0, 224.32008380882885 ], "wc_limitations_avg": [ 23.0, 13.693063937629153 ], "wc_review_avg": [ 531.75, 219.8776648502526 ], "wc_reply_reviewers_avg": [ 15.25, 26.413774815425377 ], "wc_reply_authors_avg": [ 56.75, 70.22597453933979 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896258, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3551723343778249916&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "shef.ac.uk;nus.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Sheffield;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.sheffield.ac.uk;https://www.nus.edu.sg", "aff_unique_abbr": "Sheffield;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;Singapore" }, { "title": "Granger Components Analysis: Unsupervised learning of latent temporal dependencies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70041", "id": "wqIm0Qsgy0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f66340d6f28dae6aab0176892c9065e7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wqIm0Qsgy0", "openreview": "https://openreview.net/forum?id=wqIm0Qsgy0", "poster": "/media/PosterPDFs/NeurIPS%202023/70041.png?t=1701479044.6855302", "slides": "https://nips.cc/virtual/2023/poster/70041", "video": "https://nips.cc/virtual/2023/poster/70041", "tldr": "", "abstract": "A new technique for unsupervised learning of time series data based on the notion of Granger causality is presented. The technique learns pairs of projections of a multivariate data set such that the resulting components -- \"driving\" and \"driven\" -- maximize the strength of the Granger causality between the latent time series (how strongly the past of the driving signal predicts the present of the driven signal). A coordinate descent algorithm that learns pairs of coefficient vectors in an alternating fashion is developed and shown to blindly identify the underlying sources (up to scale) on simulated vector autoregressive (VAR) data. The technique is tested on scalp electroencephalography (EEG) data from a motor imagery experiment where the resulting components lateralize with the side of the cued hand, and also on functional magnetic resonance imaging (fMRI) data, where the recovered components express previously reported resting-state networks.", "keywords": "components analysis;unsupervised learning;Granger Causality", "primary_area": "", "supplementary_material": "/attachment/6fdc80a90f1753d5da4ea1534acce180b76fb1cc.pdf", "author": "Jacek Dmochowski", "authorids": "~Jacek_Dmochowski2", "gender": "M", "homepage": "", "dblp": "", "google_scholar": "c3FulGIAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Jacek_Dmochowski2", "aff": "City College of New York", "aff_domain": "bme.ccny.cuny.edu", "position": "Associate Professor", "bibtex": "@inproceedings{\ndmochowski2023granger,\ntitle={Granger Components Analysis: Unsupervised learning of latent temporal dependencies},\nauthor={Jacek Dmochowski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wqIm0Qsgy0}\n}", "github": "", "project": "", "reviewers": "XpwK;BFyu;nBt7;57Zq", "pdf_size": 5184432, "rating": "5;5;6;7", "confidence": "5;4;3;4", "soundness": "3;2;3;3", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "31;47;88;193", "wc_strengths": "31;25;46;56", "wc_weaknesses": "54;171;174;221", "wc_questions": "3;50;68;430", "wc_limitations": "40;23;11;31", "wc_review": "159;316;387;931", "wc_reply_reviewers": "35;30;26;272", "wc_reply_authors": "0;0;22;177", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 89.75, 63.13230155791883 ], "wc_strengths_avg": [ 39.5, 12.216791722870616 ], "wc_weaknesses_avg": [ 155.0, 61.59139550294343 ], "wc_questions_avg": [ 137.75, 170.39127765234932 ], "wc_limitations_avg": [ 26.25, 10.662434056068061 ], "wc_review_avg": [ 448.25, 290.6693783321525 ], "wc_reply_reviewers_avg": [ 90.75, 104.69330207802217 ], "wc_reply_authors_avg": [ 49.75, 74.01477892961648 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Fa_EA1iUpCUJ:scholar.google.com/&scioq=Granger+Components+Analysis:+Unsupervised+learning+of+latent+temporal+dependencies&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "bme.ccny.cuny.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "City College of New York", "aff_unique_dep": "", "aff_unique_url": "https://www.ccny.cuny.edu", "aff_unique_abbr": "CCNY", "aff_campus_unique_index": "0", "aff_campus_unique": "New York", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Subject-driven Text-to-Image Generation via Apprenticeship Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70040", "id": "wv3bHyQbX7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6091bf1542b118287db4088bc16be8d9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wv3bHyQbX7", "openreview": "https://openreview.net/forum?id=wv3bHyQbX7", "poster": "/media/PosterPDFs/NeurIPS%202023/70040.png?t=1701914259.9753087", "slides": "https://nips.cc/virtual/2023/poster/70040", "video": "https://nips.cc/virtual/2023/poster/70040", "author_site": "Wenhu Chen, Hexiang Hu, Yandong Li, Nataniel Ruiz, Xuhui Jia, Ming-Wei Chang, William Cohen", "tldr": "", "abstract": "Recent text-to-image generation models like DreamBooth have made remarkable progress in generating highly customized images of a target subject, by fine-tuning an ``expert model'' for a given subject from a few examples.\nHowever, this process is expensive, since a new expert model must be learned for each subject. \nIn this paper, we present SuTI, a Subject-driven Text-to-Image generator that replaces subject-specific fine tuning with {in-context} learning.\nGiven a few demonstrations of a new subject, SuTI can instantly generate novel renditions of the subject in different scenes, without any subject-specific optimization.\nSuTI is powered by {apprenticeship learning}, where a single apprentice model is learned from data generated by a massive number of subject-specific expert models. \nSpecifically, we mine millions of image clusters from the Internet, each centered around a specific visual subject. We adopt these clusters to train a massive number of expert models, each specializing in a different subject. The apprentice model SuTI then learns to imitate the behavior of these fine-tuned experts. \nSuTI can generate high-quality and customized subject-specific images 20x faster than optimization-based SoTA methods. On the challenging DreamBench and DreamBench-v2, our human evaluation shows that SuTI significantly outperforms existing models like InstructPix2Pix, Textual Inversion, Imagic, Prompt2Prompt, Re-Imagen and DreamBooth.", "keywords": "Diffusion Model;Image Generation;Image Editing;In-Context Learning", "primary_area": "", "supplementary_material": "/attachment/eb68c9a4f4c3778ebf00d111b6b42a3b869a5041.pdf", "author": "Wenhu Chen;Hexiang Hu;YANDONG LI;Nataniel Ruiz;Xuhui Jia;Ming-Wei Chang;William W. Cohen", "authorids": "~Wenhu_Chen3;~Hexiang_Hu1;~YANDONG_LI1;~Nataniel_Ruiz1;~Xuhui_Jia1;~Ming-Wei_Chang3;~William_W._Cohen2", "gender": ";;M;M;M;;M", "homepage": ";;https://cold-winter.github.io/;https://natanielruiz.github.io/;https://scholar.google.com/citations?view_op=search_authors&mauthors=xuhui+jia&hl=en&oi=ao;;https://wwcohen.github.io/", "dblp": ";;;205/3222;116/8360;;c/WWCohen.html", "google_scholar": ";;kRLb6PkAAAAJ;https://scholar.google.fr/citations?user=CiOmcSIAAAAJ;https://scholar.google.com/citations?view_op=search_authors;;8ys-38kAAAAJ", "orcid": ";;0000-0003-2448-1294;;;;", "linkedin": ";;;nataniel-ruiz/;;;", "or_profile": "~Wenhu_Chen3;~Hexiang_Hu1;~YANDONG_LI1;~Nataniel_Ruiz1;~Xuhui_Jia1;~Ming-Wei_Chang3;~William_W._Cohen2", "aff": ";;Google;Boston University;Google;;Google DeepMind", "aff_domain": ";;google.com;bu.edu;google.com;;google.com", "position": ";;Software Engineer;PhD student;Researcher;;Principle Scientist", "bibtex": "@inproceedings{\nchen2023subjectdriven,\ntitle={Subject-driven Text-to-Image Generation via Apprenticeship Learning},\nauthor={Wenhu Chen and Hexiang Hu and YANDONG LI and Nataniel Ruiz and Xuhui Jia and Ming-Wei Chang and William W. Cohen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wv3bHyQbX7}\n}", "github": "", "project": "", "reviewers": "JnTg;HXFy;F8hk;6um5;9TJ3", "pdf_size": 18177065, "rating": "5;6;6;6;7", "confidence": "5;5;4;5;4", "soundness": "2;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "4;3;3;4;3", "wc_summary": "75;83;82;111;96", "wc_strengths": "27;45;32;69;200", "wc_weaknesses": "118;184;53;264;97", "wc_questions": "31;12;26;146;86", "wc_limitations": "20;7;1;1;15", "wc_review": "271;331;194;591;494", "wc_reply_reviewers": "97;38;51;22;0", "wc_reply_authors": "60;0;36;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "3;1;2;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 89.4, 12.753038853543888 ], "wc_strengths_avg": [ 74.6, 64.3633436048812 ], "wc_weaknesses_avg": [ 143.2, 73.7059020703227 ], "wc_questions_avg": [ 60.2, 49.74494949238566 ], "wc_limitations_avg": [ 8.8, 7.6 ], "wc_review_avg": [ 376.2, 145.82372920756072 ], "wc_reply_reviewers_avg": [ 41.6, 32.512151574449824 ], "wc_reply_authors_avg": [ 19.2, 24.709512338368796 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.6454972243679028, "gs_citation": 193, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8387844834853806667&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";;google.com;bu.edu;google.com;;google.com", "author_num": 7, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Google;Boston University", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.bu.edu", "aff_unique_abbr": "Google;BU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "wv79UiY5U7", "title": "Data Curation for Image Captioning with Text-to-Image Generative Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "\n Recent advances in image captioning are driven by increasingly larger-scale vision--language pretraining, relying on massive computational resources and increasingly large datasets. Instead of solely focusing on scaling pretraining, we ask whether it is possible to improve performance by improving the quality of the samples in existing datasets. We pursue this question through two approaches to data curation: one that assumes that some examples should be avoided due to mismatches between the image and caption, and one that assumes that the mismatch can be addressed by replacing the image, for which we use the state-of-the-art Stable Diffusion model. These approaches are evaluated using the BLIP model on the COCO and Flickr30K datasets. Models trained with our data curation approaches consistently outperform their baselines, indicating that better image captioning models can be trained by curating existing resources. Finally, we conduct a human study to understand the errors made by the Stable Diffusion model and highlight directions for future work in text-to-image generation.", "keywords": "Vision-language learning;Image captioning;Data curation;Text-to-image generation;Stable Diffusion", "primary_area": "", "supplementary_material": "/attachment/533e45fa8e3782f3a974d771e3d63476637771ce.zip", "author": "Wenyan Li;Jonas F. Lotz;Chen Qiu;Desmond Elliott", "authorids": "~Wenyan_Li1;~Jonas_F._Lotz1;chen@wust.edu.cn;~Desmond_Elliott1", "gender": "F;M;;", "homepage": "https://wenyanli.org/;;;", "dblp": "21/6731-1;;;46/7536", "google_scholar": "JvcZHCsAAAAJ;rQi0nEcAAAAJ;;", "orcid": "0000-0001-7143-4453;0000-0001-6405-0590;;", "linkedin": ";jonas-f-lotz-ab7805113/;;", "or_profile": "~Wenyan_Li1;~Jonas_F._Lotz1;chen@wust.edu.cn;~Desmond_Elliott1", "aff": "University of Copenhagen;University of Copenhagen;;University of Copenhagen", "aff_domain": "di.ku;diku.dk;;ku.dk", "position": "PhD student;PhD student;;Assistant Professor", "bibtex": "@misc{\nli2023data,\ntitle={Data Curation for Image Captioning with Text-to-Image Generative Models},\nauthor={Wenyan Li and Jonas F. Lotz and Chen Qiu and Desmond Elliott},\nyear={2023},\nurl={https://openreview.net/forum?id=wv79UiY5U7}\n}", "github": "", "project": "", "reviewers": "9L7M;ymVw;ndRN;tP6L;Hsed", "site": "https://openreview.net/forum?id=wv79UiY5U7", "pdf_size": 1735803, "rating": "3;3;3;4;4", "confidence": "4;5;5;4;4", "soundness": "2;1;2;2;3", "novelty": "2;1;2;2;2", "presentation": "2;2;4;3;3", "wc_summary": "132;65;111;56;58", "wc_strengths": "19;13;120;58;63", "wc_weaknesses": "252;99;497;98;102", "wc_questions": "1;51;77;33;53", "wc_limitations": "72;9;21;1;1", "wc_review": "476;237;826;246;277", "wc_reply_reviewers": "128;40;638;15;18", "wc_reply_authors": "204;0;0;29;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 3.4, 0.4898979485566356 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.0, 0.6324555320336759 ], "novelty_avg": [ 1.8, 0.4000000000000001 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 84.4, 31.15509589136262 ], "wc_strengths_avg": [ 54.6, 38.359353487774015 ], "wc_weaknesses_avg": [ 209.6, 155.3455503064056 ], "wc_questions_avg": [ 43.0, 25.234896472940004 ], "wc_limitations_avg": [ 20.8, 26.62630278502819 ], "wc_review_avg": [ 412.4, 224.45364777610544 ], "wc_reply_reviewers_avg": [ 167.8, 238.65992541689943 ], "wc_reply_authors_avg": [ 46.6, 79.49742134182719 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6666666666666665, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10411921439979449297&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Copenhagen", "aff_unique_dep": "", "aff_unique_url": "https://www.ku.dk", "aff_unique_abbr": "UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "title": "Adapting Fairness Interventions to Missing Values", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70039", "id": "wwkQUiaKbo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ba0ad9d1e0c737800b2340b9cd68c208-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wwkQUiaKbo", "openreview": "https://openreview.net/forum?id=wwkQUiaKbo", "poster": "/media/PosterPDFs/NeurIPS%202023/70039.png?t=1701482713.457251", "slides": "https://nips.cc/virtual/2023/poster/70039", "video": "https://nips.cc/virtual/2023/poster/70039", "author_site": "Raymond Feng, Flavio Calmon, Hao Wang", "tldr": "", "abstract": "Missing values in real-world data pose a significant and unique challenge to algorithmic fairness. Different demographic groups may be unequally affected by missing data, and the standard procedure for handling missing values where first data is imputed, then the imputed data is used for classification\u2014a procedure referred to as \"impute-then-classify\"\u2014can exacerbate discrimination. In this paper, we analyze how missing values affect algorithmic fairness. We first prove that training a classifier from imputed data can significantly worsen the achievable values of group fairness and average accuracy. This is because imputing data results in the loss of the missing pattern of the data, which often conveys information about the predictive label. We present scalable and adaptive algorithms for fair classification with missing values. These algorithms can be combined with any preexisting fairness-intervention algorithm to handle all possible missing patterns while preserving information encoded within the missing patterns. Numerical experiments with state-of-the-art fairness interventions demonstrate that our adaptive algorithms consistently achieve higher fairness and accuracy than impute-then-classify across different datasets.", "keywords": "algorithmic fairness;discrimination;missing values;machine learning", "primary_area": "", "supplementary_material": "", "author": "Raymond Feng;Flavio Calmon;Hao Wang", "authorids": "~Raymond_Feng1;~Flavio_Calmon1;~Hao_Wang22", "gender": "M;;M", "homepage": ";http://people.seas.harvard.edu/~flavio/;https://haowang94.github.io", "dblp": "276/5253;89/4611;", "google_scholar": ";P8N_YH4AAAAJ;A3WtYhAAAAAJ", "orcid": ";;", "linkedin": "raymond-feng-4a3473195/;;", "or_profile": "~Raymond_Feng1;~Flavio_Calmon1;~Hao_Wang22", "aff": "Harvard University;Harvard University;MIT-IBM Watson AI Lab", "aff_domain": "harvard.edu;harvard.edu;ibm.com", "position": "Undergrad student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nfeng2023adapting,\ntitle={Adapting Fairness Interventions to Missing Values},\nauthor={Raymond Feng and Flavio Calmon and Hao Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wwkQUiaKbo}\n}", "github": "", "project": "", "reviewers": "SEin;RyBS;oma2;ZFNz;GNEv", "pdf_size": 1392222, "rating": "5;6;6;7;7", "confidence": "4;4;4;2;3", "soundness": "3;4;3;3;2", "novelty": "2;4;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "89;39;134;113;97", "wc_strengths": "42;125;206;198;92", "wc_weaknesses": "174;119;308;127;196", "wc_questions": "35;246;71;20;76", "wc_limitations": "6;43;54;1;7", "wc_review": "346;572;773;459;468", "wc_reply_reviewers": "0;218;0;35;18", "wc_reply_authors": "0;639;0;42;28", "reply_reviewers": "0;2;0;1;1", "reply_authors": "1;3;1;2;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 94.4, 31.683434157300564 ], "wc_strengths_avg": [ 132.6, 62.576672970045315 ], "wc_weaknesses_avg": [ 184.8, 67.95704525654422 ], "wc_questions_avg": [ 89.6, 81.0150603283118 ], "wc_limitations_avg": [ 22.2, 21.84856974723975 ], "wc_review_avg": [ 523.6, 143.77009424772595 ], "wc_reply_reviewers_avg": [ 54.2, 82.9274381613227 ], "wc_reply_authors_avg": [ 141.8, 249.13161180388167 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8017837257372731, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6293771081454439088&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "harvard.edu;harvard.edu;ibm.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Harvard University;Massachusetts Institute of Technology", "aff_unique_dep": ";IBM Watson AI Lab", "aff_unique_url": "https://www.harvard.edu;https://www.mitibmwatsonailab.org", "aff_unique_abbr": "Harvard;MIT-IBM AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Federated Learning with Bilateral Curation for Partially Class-Disjoint Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70038", "id": "wwmKVO8bsR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/65b721a1df04c1098567f70d483d6468-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wwmKVO8bsR", "openreview": "https://openreview.net/forum?id=wwmKVO8bsR", "poster": "/media/PosterPDFs/NeurIPS%202023/70038.png?t=1699434401.38239", "slides": "https://nips.cc/virtual/2023/poster/70038", "video": "https://nips.cc/virtual/2023/poster/70038", "author_site": "Ziqing Fan, ruipeng zhang, Jiangchao Yao, Bo Han, Ya Zhang, Yanfeng Wang", "tldr": "", "abstract": "Partially class-disjoint data (PCDD), a common yet under-explored data formation where each client contributes a part of classes (instead of all classes) of samples, severely challenges the performance of federated algorithms. Without full classes, the local objective will contradict the global objective, yielding the angle collapse problem for locally missing classes and the space waste problem for locally existing classes. As far as we know, none of the existing methods can intrinsically mitigate PCDD challenges to achieve holistic improvement in the bilateral views (both global view and local view) of federated learning. To address this dilemma, we are inspired by the strong generalization of simplex Equiangular Tight Frame (ETF) on the imbalanced data, and propose a novel approach called FedGELA where the classifier is globally fixed as a simplex ETF while locally adapted to the personal distributions. Globally, FedGELA provides fair and equal discrimination for all classes and avoids inaccurate updates of the classifier, while locally it utilizes the space of locally missing classes for locally existing classes. We conduct extensive experiments on a range of datasets to demonstrate that our FedGELA achieves promising performance (averaged improvement of 3.9% to FedAvg and 1.5% to best baselines) and provide both local and global convergence guarantees.", "keywords": "federated learning;data heterogeneity;partially class-disjoint data", "primary_area": "", "supplementary_material": "/attachment/3486da65711600b5bbfe1d54b7bcc70c328f30cb.pdf", "author": "Ziqing Fan;Ruipeng Zhang;Jiangchao Yao;Bo Han;Ya Zhang;Yanfeng Wang", "authorids": "~Ziqing_Fan1;~Ruipeng_Zhang1;~Jiangchao_Yao1;~Bo_Han1;~Ya_Zhang1;~Yanfeng_Wang1", "gender": ";M;M;;F;M", "homepage": ";https://frankzhangrp.github.io/;https://sunarker.github.io/;;https://annzhanglion.github.io/;https://cmic.sjtu.edu.cn/wangyanfeng/", "dblp": ";;166/5900;;85/3714-2;55/5407-1.html", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;w8oDh9QAAAAJ;;pbjw9sMAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0002-4372-4987;;;0000-0002-5390-9053;0000-0002-3196-2347", "linkedin": ";;;;;", "or_profile": "~Ziqing_Fan1;~Ruipeng_Zhang1;~Jiangchao_Yao1;~Bo_Han1;~Ya_Zhang1;~Yanfeng_Wang1", "aff": ";Shanghai Jiaotong University;Shanghai Artificial Intelligence Laboratory;;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": ";sjtu.edu.cn;pjlab.org.cn;;sjtu.edu.cn;sjtu.edu.cn", "position": ";PhD student;Researcher;;Professor;Full Professor", "bibtex": "@inproceedings{\nfan2023federated,\ntitle={Federated Learning with Bilateral Curation for Partially Class-Disjoint Data},\nauthor={Ziqing Fan and Ruipeng Zhang and Jiangchao Yao and Bo Han and Ya Zhang and Yanfeng Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wwmKVO8bsR}\n}", "github": "", "project": "", "reviewers": "UNoN;J5UX;SUwJ;nvCV", "pdf_size": 876536, "rating": "5;6;6;6", "confidence": "3;2;3;2", "soundness": "3;3;3;3", "novelty": "3;3;2;2", "presentation": "3;3;3;3", "wc_summary": "93;364;132;69", "wc_strengths": "70;117;24;39", "wc_weaknesses": "215;74;53;27", "wc_questions": "2;61;286;58", "wc_limitations": "2;9;21;12", "wc_review": "382;625;516;205", "wc_reply_reviewers": "0;45;46;13", "wc_reply_authors": "68;33;228;24", "reply_reviewers": "0;1;2;1", "reply_authors": "2;2;5;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 164.5, 117.35522996441189 ], "wc_strengths_avg": [ 62.5, 35.57035282366482 ], "wc_weaknesses_avg": [ 92.25, 72.79895260235548 ], "wc_questions_avg": [ 101.75, 108.94121121045056 ], "wc_limitations_avg": [ 11.0, 6.819090848492928 ], "wc_review_avg": [ 432.0, 156.7912625116591 ], "wc_reply_reviewers_avg": [ 26.0, 20.03746490951388 ], "wc_reply_authors_avg": [ 88.25, 82.34189395441423 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5735405398717853446&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";sjtu.edu.cn;pjlab.org.cn;;sjtu.edu.cn;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.shailab.org/", "aff_unique_abbr": "SJTU;Shanghai AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Natural Actor-Critic for Robust Reinforcement Learning with Function Approximation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70037", "id": "wxkBdtDbmH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/007f4927e60699392425f267d43f0940-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wxkBdtDbmH", "openreview": "https://openreview.net/forum?id=wxkBdtDbmH", "poster": "/media/PosterPDFs/NeurIPS%202023/70037.png?t=1702082412.0368364", "slides": "https://nips.cc/virtual/2023/poster/70037", "video": "https://nips.cc/virtual/2023/poster/70037", "author_site": "Ruida Zhou, Tao Liu, Min Cheng, Dileep Kalathil, P. R. Kumar, Chao Tian", "tldr": "", "abstract": "We study robust reinforcement learning (RL) with the goal of determining a well-performing policy that is robust against model mismatch between the training simulator and the testing environment. Previous policy-based robust RL algorithms mainly focus on the tabular setting under uncertainty sets that facilitate robust policy evaluation, but are no longer tractable when the number of states scales up. To this end, we propose two novel uncertainty set formulations, one based on double sampling and the other on an integral probability metric. Both make large-scale robust RL tractable even when one only has access to a simulator. We propose a robust natural actor-critic (RNAC) approach that incorporates the new uncertainty sets and employs function approximation. We provide finite-time convergence guarantees for the proposed RNAC algorithm to the optimal robust policy within the function approximation error. Finally, we demonstrate the robust performance of the policy learned by our proposed RNAC approach in multiple MuJoCo environments and a real-world TurtleBot navigation task.", "keywords": "robust reinforcement learning;policy-based approach;function approximation;actor-critic", "primary_area": "", "supplementary_material": "/attachment/b359a211571674b3827c804c89ef4a44a85b2959.zip", "author": "Ruida Zhou;Tao Liu;Min Cheng;Dileep Kalathil;Panganamala Kumar;Chao Tian", "authorids": "~Ruida_Zhou1;~Tao_Liu8;~Min_Cheng2;~Dileep_Kalathil1;~Panganamala_Kumar1;~Chao_Tian2", "gender": "M;M;F;M;M;", "homepage": "https://sites.google.com/view/ruida-zhou;;;http://people.tamu.edu/~dileep.kalathil/;https://cesg.tamu.edu/faculty/p-r-kumar/;", "dblp": "215/2026;43/656-35.html;;44/8356;https://dblp.org/pers/k/Kumar:P=_R=.html;", "google_scholar": "kXbo1twAAAAJ;XQjEQ4MAAAAJ;VB0mNHgAAAAJ;S24XFwwAAAAJ;qGUpTVwAAAAJ;", "orcid": ";0000-0001-7879-5315;;;0000-0003-0389-5367;", "linkedin": ";tao-liu-a19661174/;min-cheng-602775211;;;", "or_profile": "~Ruida_Zhou1;~Tao_Liu8;~Min_Cheng2;~Dileep_Kalathil1;~Panganamala_Kumar1;~Chao_Tian2", "aff": "Texas A&M University;Texas A&M University - College Station;Texas A&M University - College Station;Texas A&M University;Texas A&M;", "aff_domain": "tamu.edu;tamu.edu;tamu.edu;tamu.edu;tamu.edu;", "position": "PhD student;PhD student;PhD student;Associate Professor;Full Professor;", "bibtex": "@inproceedings{\nzhou2023natural,\ntitle={Natural Actor-Critic for Robust Reinforcement Learning with Function Approximation},\nauthor={Ruida Zhou and Tao Liu and Min Cheng and Dileep Kalathil and Panganamala Kumar and Chao Tian},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wxkBdtDbmH}\n}", "github": "", "project": "", "reviewers": "4Et9;haxo;Fq66;r1LZ", "pdf_size": 3086877, "rating": "5;6;6;7", "confidence": "4;2;1;3", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;2", "wc_summary": "48;72;64;195", "wc_strengths": "25;27;28;35", "wc_weaknesses": "123;145;32;58", "wc_questions": "21;41;58;64", "wc_limitations": "4;1;6;16", "wc_review": "221;286;188;368", "wc_reply_reviewers": "11;72;11;9", "wc_reply_authors": "23;42;61;20", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.75, 58.52082962501471 ], "wc_strengths_avg": [ 28.75, 3.766629793329841 ], "wc_weaknesses_avg": [ 89.5, 46.10043383743802 ], "wc_questions_avg": [ 46.0, 16.718253497300488 ], "wc_limitations_avg": [ 6.75, 5.629165124598851 ], "wc_review_avg": [ 265.75, 68.76181716621515 ], "wc_reply_reviewers_avg": [ 25.75, 26.714930282521795 ], "wc_reply_authors_avg": [ 36.5, 16.469669092000604 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.31622776601683794, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14980046085931687414&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "tamu.edu;tamu.edu;tamu.edu;tamu.edu;tamu.edu;", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Searching for Optimal Per-Coordinate Step-sizes with Multidimensional Backtracking", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70036", "id": "wzPcffMZ3b", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/07e436cdeb48e2a67618274f5d5eff85-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wzPcffMZ3b", "openreview": "https://openreview.net/forum?id=wzPcffMZ3b", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70036", "video": "https://nips.cc/virtual/2023/poster/70036", "author_site": "Frederik Kunstner, Victor Sanches Portella, Mark Schmidt, Nicholas Harvey", "tldr": "", "abstract": "The backtracking line-search is an effective technique to automatically tune the step-size in smooth optimization. It guarantees similar performance to using the theoretically optimal step-size. Many approaches have been developed to instead tune per-coordinate step-sizes, also known as diagonal preconditioners, but none of the existing methods are provably competitive with the optimal per-coordinate step-sizes. We propose multidimensional backtracking, an extension of the backtracking line-search to find good diagonal preconditioners for smooth convex problems. Our key insight is that the gradient with respect to the step-sizes, also known as hyper-gradients, yields separating hyperplanes that let us search for good preconditioners using cutting-plane methods. As black-box cutting-plane approaches like the ellipsoid method are computationally prohibitive, we develop an efficient algorithm tailored to our setting. Multidimensional backtracking is provably competitive with the best diagonal preconditioner and requires no manual tuning.", "keywords": "line-search;gradient descent;hypergradient;adaptive methods;smooth;convex;optimization;preconditioning", "primary_area": "", "supplementary_material": "/attachment/b2305e2930108dac16f0b96b243003977df00513.zip", "author": "Frederik Kunstner;Victor S. Portella;Mark Schmidt;Nick Harvey", "authorids": "~Frederik_Kunstner1;~Victor_S._Portella1;~Mark_Schmidt1;~Nick_Harvey1", "gender": ";;M;", "homepage": "https://fkunstner.github.io/;;https://www.cs.ubc.ca/~nickhar/;https://www.ime.usp.br/~victorsp/", "dblp": "230/3921;35/2638;93/4141;266/8056", "google_scholar": "EhpYjPAAAAAJ;https://scholar.google.com/citations?hl=en;;qNG3KiIAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Frederik_Kunstner1;~Mark_Schmidt1;~Nick_Harvey1;~Victor_Sanches_Portella1", "aff": "University of British Columbia;University of British Columbia;University of British Columbia;Department of Computer Science, University of British Columbia", "aff_domain": "cs.ubc.ca;ubc.ca;ubc.ca;cs.ubc.ca", "position": "PhD student;Assistant Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nkunstner2023searching,\ntitle={Searching for Optimal Per-Coordinate Step-sizes with Multidimensional Backtracking},\nauthor={Frederik Kunstner and Victor S. Portella and Mark Schmidt and Nick Harvey},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wzPcffMZ3b}\n}", "github": "", "project": "", "reviewers": "eKZX;5i17;hZtF;JVBS", "pdf_size": 2933812, "rating": "5;6;8;8", "confidence": "1;1;4;4", "soundness": "3;3;4;4", "novelty": "2;3;4;4", "presentation": "2;2;4;4", "wc_summary": "129;51;36;106", "wc_strengths": "87;27;50;71", "wc_weaknesses": "36;45;20;1", "wc_questions": "59;1;1;79", "wc_limitations": "13;2;1;1", "wc_review": "324;126;108;258", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 2.5, 1.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 80.5, 38.252450901870326 ], "wc_strengths_avg": [ 58.75, 22.54301443906737 ], "wc_weaknesses_avg": [ 25.5, 16.740669042783207 ], "wc_questions_avg": [ 35.0, 34.72751070837067 ], "wc_limitations_avg": [ 4.25, 5.0682837331783235 ], "wc_review_avg": [ 204.0, 90.29950165975447 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9622504486493764, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11010992762001246099&as_sdt=5,30&sciodt=0,30&hl=en", "gs_version_total": 7, "email": "cs.ubc.ca;ubc.ca;ubc.ca;cs.ubc.ca", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Vancouver", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "FAST: a Fused and Accurate Shrinkage Tree for Heterogeneous Treatment Effects Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70035", "id": "wzg0BsV8rQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/01830c92c6558179fa6d7fb1edff692c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=wzg0BsV8rQ", "openreview": "https://openreview.net/forum?id=wzg0BsV8rQ", "poster": "/media/PosterPDFs/NeurIPS%202023/70035.png?t=1701501212.166046", "slides": "https://nips.cc/virtual/2023/poster/70035", "video": "https://nips.cc/virtual/2023/poster/70035", "author_site": "Jia Gu, Caizhi Tang, Han Yan, Qing Cui, Longfei Li, Jun Zhou", "tldr": "", "abstract": "This paper proposes a novel strategy for estimating the heterogeneous treatment effect called the Fused and Accurate Shrinkage Tree ($\\mathrm{FAST}$). Our approach utilizes both trial and observational data to improve the accuracy and robustness of the estimator. Inspired by the concept of shrinkage estimation in statistics, we develop an optimal weighting scheme and a corresponding estimator that balances the unbiased estimator based on the trial data with the potentially biased estimator based on the observational data. Specifically, combined with tree-based techniques, we introduce a new split criterion that utilizes both trial data and observational data to more accurately estimate the treatment effect. Furthermore, we confirm the consistency of our proposed tree-based estimator and demonstrate the effectiveness of our criterion in reducing prediction error through theoretical analysis. The advantageous finite sample performance of the $\\mathrm{FAST}$ and its ensemble version over existing methods is demonstrated via simulations and real data analysis.", "keywords": "Data fusion;heterogeneous treatment effects estimation;shrinkage estimation;tree-based method", "primary_area": "", "supplementary_material": "/attachment/16ac27ea3997f9706aa69d07a7f8cadb70304a2d.pdf", "author": "Jia Gu;Caizhi Tang;Han Yan;Qing Cui;Longfei Li;JUN ZHOU", "authorids": "~Jia_Gu3;~Caizhi_Tang1;~Han_Yan3;~Qing_Cui1;~Longfei_Li3;~JUN_ZHOU6", "gender": "M;M;M;M;M;M", "homepage": "https://jia-gu.github.io;;https://www.songxichen.com/index.php/people/HanYan;;;https://scholar.google.com/citations?user=mCVvloEAAAAJ&hl=en", "dblp": ";;;18/4013;;99/3847-11", "google_scholar": "hlkePPcAAAAJ;ADyoI2oAAAAJ;;_SfHjS4AAAAJ;x5rfjSEAAAAJ;mCVvloEAAAAJ", "orcid": ";;;0000-0002-4909-4568;;0000-0001-6033-6102", "linkedin": ";;;;;", "or_profile": "~Jia_Gu3;~Caizhi_Tang1;~Han_Yan3;~Qing_Cui1;~Longfei_Li3;~JUN_ZHOU6", "aff": "Peking University;;Peking University;Ant Group;Alibaba Group;Ant Group", "aff_domain": "pku.edu.cn;;pku.edu.cn;antgroup.com;alibaba-inc.com;antgroup.com", "position": "PhD student;;PhD student;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\ngu2023fast,\ntitle={{FAST}: a Fused and Accurate Shrinkage Tree for Heterogeneous Treatment Effects Estimation},\nauthor={Jia Gu and Caizhi Tang and Han Yan and Qing Cui and Longfei Li and JUN ZHOU},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=wzg0BsV8rQ}\n}", "github": "", "project": "", "reviewers": "jsqm;7DZZ;YpW7;7Nsn;tnj1", "pdf_size": 0, "rating": "3;6;6;7;7", "confidence": "3;2;3;4;3", "soundness": "2;3;3;4;3", "novelty": "2;2;3;3;4", "presentation": "2;3;3;4;3", "wc_summary": "108;79;62;82;87", "wc_strengths": "69;17;73;70;63", "wc_weaknesses": "210;62;294;57;38", "wc_questions": "148;1;2;57;136", "wc_limitations": "4;1;10;3;16", "wc_review": "539;160;441;269;340", "wc_reply_reviewers": "105;22;0;0;6", "wc_reply_authors": "322;0;0;0;0", "reply_reviewers": "1;1;0;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 83.6, 14.813507349712962 ], "wc_strengths_avg": [ 58.4, 20.95328136593407 ], "wc_weaknesses_avg": [ 132.2, 101.67477563289728 ], "wc_questions_avg": [ 68.8, 63.224678725953204 ], "wc_limitations_avg": [ 6.8, 5.491812087098392 ], "wc_review_avg": [ 349.8, 131.72911599187174 ], "wc_reply_reviewers_avg": [ 26.6, 40.017995951821476 ], "wc_reply_authors_avg": [ 64.4, 128.8 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.21516574145596756, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4574849132673405445&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;;pku.edu.cn;antgroup.com;alibaba-inc.com;antgroup.com", "author_num": 6, "aff_unique_index": "0;0;1;2;1", "aff_unique_norm": "Peking University;Ant Group;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.antgroup.com;https://www.alibaba.com", "aff_unique_abbr": "Peking U;Ant Group;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "x1FgW3vSM6", "title": "Flag Aggregator: Scalable Distributed Training under Failures and Augmented Losses using Convex Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Modern ML applications increasingly rely on complex deep learning models and large datasets. There has been an exponential growth in the amount of computation needed to train the largest models. Therefore, to scale computation and data, these models are inevitably trained in a distributed manner in clusters of nodes, and their updates are aggregated before being applied to the model. However, a distributed setup is prone to Byzantine failures of individual nodes, components, and software. With data augmentation added to these settings, there is a critical need for robust and efficient aggregation systems. We define the quality of workers as reconstruction ratios $\\in (0,1]$, and formulate aggregation as a Maximum Likelihood Estimation procedure using Beta densities. We show that the Regularized form of log-likelihood wrt subspace can be approximately solved using iterative least squares solver, and provide convergence guarantees using recent Convex Optimization landscape results. Our empirical findings demonstrate that our approach significantly enhances the robustness of state-of-the-art Byzantine resilient aggregators. We evaluate our method in a distributed setup with a parameter server, and show simultaneous improvements in communication efficiency and accuracy across various tasks.", "keywords": "Robust;Aggregation;Distributed;Training;Failure;Augmented;Byzantine;Resilience", "primary_area": "", "supplementary_material": "/attachment/fc8cddfee42324c3a678cc3566ba57390e2522f9.zip", "author": "Hamidreza Almasi;Harsh Mishra;Balajee Vamanan;Sathya N. Ravi", "authorids": "~Hamidreza_Almasi1;~Harsh_Mishra1;~Balajee_Vamanan1;~Sathya_N._Ravi1", "gender": "M;M;M;M", "homepage": "https://hamidralmasi.github.io/;;https://www.cs.uic.edu/~balajee/;http://sathyaravi.com", "dblp": "241/0508;;;159/2123", "google_scholar": "OkUGKRAAAAAJ;;https://scholar.google.com.tw/citations?user=GKvAsQMAAAAJ;FW-0thoAAAAJ", "orcid": "0000-0002-4479-6464;;;0000-0003-3881-6323", "linkedin": "hamidralmasi/;harsh-mishra-515624144;;sathya-narayanan-ravi-74a5a128/", "or_profile": "~Hamidreza_Almasi1;~Harsh_Mishra1;~Balajee_Vamanan1;~Sathya_N._Ravi1", "aff": "University of Illinois Chicago;University of Illinois at Chicago;University of Illinois at Chicago;University of Illinois, Chicago", "aff_domain": "uic.edu;uic.edu;uic.edu;uic.edu", "position": "PhD student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nalmasi2023flag,\ntitle={Flag Aggregator: Scalable Distributed Training under Failures and Augmented Losses using Convex Optimization},\nauthor={Hamidreza Almasi and Harsh Mishra and Balajee Vamanan and Sathya N. Ravi},\nyear={2023},\nurl={https://openreview.net/forum?id=x1FgW3vSM6}\n}", "github": "", "project": "", "reviewers": "1ghS;AykZ;Pt2v;rLy5;Zke2", "site": "https://openreview.net/forum?id=x1FgW3vSM6", "pdf_size": 1333253, "rating": "3;5;6;6;6", "confidence": "4;1;3;3;3", "soundness": "1;2;3;2;3", "novelty": "2;3;3;2;3", "presentation": "1;3;3;2;2", "wc_summary": "53;61;75;121;84", "wc_strengths": "21;26;40;126;32", "wc_weaknesses": "441;86;102;225;75", "wc_questions": "10;18;111;276;88", "wc_limitations": "1;1;1;51;12", "wc_review": "526;192;329;799;291", "wc_reply_reviewers": "15;29;13;77;21", "wc_reply_authors": "38;33;33;36;34", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 2.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 78.8, 23.684594148939937 ], "wc_strengths_avg": [ 49.0, 39.01794458963722 ], "wc_weaknesses_avg": [ 185.8, 138.50833909913152 ], "wc_questions_avg": [ 100.6, 95.9824984046571 ], "wc_limitations_avg": [ 13.2, 19.374209661299734 ], "wc_review_avg": [ 427.4, 215.17862347361552 ], "wc_reply_reviewers_avg": [ 31.0, 23.664319132398465 ], "wc_reply_authors_avg": [ 34.8, 1.9390719429665317 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.31506301890630223, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17474371774670655893&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Illinois at Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uic.edu", "aff_unique_abbr": "UIC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Taming Local Effects in Graph-based Spatiotemporal Forecasting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70034", "id": "x2PH6q32LR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ad58c61c71efd5436134a3ecc87da6ea-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=x2PH6q32LR", "openreview": "https://openreview.net/forum?id=x2PH6q32LR", "poster": "/media/PosterPDFs/NeurIPS%202023/70034.png?t=1701421904.8800166", "slides": "https://nips.cc/virtual/2023/poster/70034", "video": "https://nips.cc/virtual/2023/poster/70034", "author_site": "Andrea Cini, Ivan Marisca, Daniele Zambon, Cesare Alippi", "tldr": "", "abstract": "Spatiotemporal graph neural networks have shown to be effective in time series forecasting applications, achieving better performance than standard univariate predictors in several settings. These architectures take advantage of a graph structure and relational inductive biases to learn a single (global) inductive model to predict any number of the input time series, each associated with a graph node. Despite the gain achieved in computational and data efficiency w.r.t. fitting a set of local models, relying on a single global model can be a limitation whenever some of the time series are generated by a different spatiotemporal stochastic process. The main objective of this paper is to understand the interplay between globality and locality in graph-based spatiotemporal forecasting, while contextually proposing a methodological framework to rationalize the practice of including trainable node embeddings in such architectures. We ascribe to trainable node embeddings the role of amortizing the learning of specialized components. Moreover, embeddings allow for 1) effectively combining the advantages of shared message-passing layers with node-specific parameters and 2) efficiently transferring the learned model to new node sets. Supported by strong empirical evidence, we provide insights and guidelines for specializing graph-based models to the dynamics of each time series and show how this aspect plays a crucial role in obtaining accurate predictions.", "keywords": "time series forecasting;spatiotemporal forecasting;graph-based spatiotemporal forecasting;graph neural networks", "primary_area": "", "supplementary_material": "", "author": "Andrea Cini;Ivan Marisca;Daniele Zambon;Cesare Alippi", "authorids": "~Andrea_Cini1;~Ivan_Marisca1;~Daniele_Zambon1;~Cesare_Alippi1", "gender": "M;M;;M", "homepage": "https://andreacini.github.io/;https://marshka.github.io/;https://dzambon.github.io/;https://alippi.faculty.polimi.it/", "dblp": "249/8223;298/8039;185/1319;84/6337", "google_scholar": "bQI2UIUAAAAJ;loKgz80AAAAJ;https://scholar.google.ch/citations?user=JaQGQEwAAAAJ;SCZObbIAAAAJ", "orcid": ";0000-0002-9713-1626;0000-0003-3722-9784;", "linkedin": ";ivanmarisca;;", "or_profile": "~Andrea_Cini1;~Ivan_Marisca1;~Daniele_Zambon1;~Cesare_Alippi1", "aff": "Imperial College London;Universit\u00e0 della Svizzera Italiana;Universita della Svizzera Italiana;Politecnico di Milano", "aff_domain": "ic.ac.uk;usi.ch;usi.ch;polimi.it", "position": "Visiting PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\ncini2023taming,\ntitle={Taming Local Effects in Graph-based Spatiotemporal Forecasting},\nauthor={Andrea Cini and Ivan Marisca and Daniele Zambon and Cesare Alippi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=x2PH6q32LR}\n}", "github": "", "project": "", "reviewers": "K1NT;jVAx;Lyxe;Zmtu", "pdf_size": 544294, "rating": "4;5;6;7", "confidence": "5;4;3;3", "soundness": "3;3;3;4", "novelty": "1;3;3;4", "presentation": "2;1;3;3", "wc_summary": "180;92;127;87", "wc_strengths": "35;52;115;47", "wc_weaknesses": "238;84;87;243", "wc_questions": "47;18;21;232", "wc_limitations": "10;2;1;9", "wc_review": "510;248;351;618", "wc_reply_reviewers": "93;12;17;29", "wc_reply_authors": "305;53;8;12", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 121.5, 37.12478956169314 ], "wc_strengths_avg": [ 62.25, 31.075512867851433 ], "wc_weaknesses_avg": [ 163.0, 77.52741450609584 ], "wc_questions_avg": [ 79.5, 88.76513955376852 ], "wc_limitations_avg": [ 5.5, 4.031128874149275 ], "wc_review_avg": [ 431.75, 142.38745555701178 ], "wc_reply_reviewers_avg": [ 37.75, 32.491345001399985 ], "wc_reply_authors_avg": [ 94.5, 122.8016693697606 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9438798074485388, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13206728541185938606&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ic.ac.uk;usi.ch;usi.ch;polimi.it", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Imperial College London;Universit\u00e0 della Svizzera italiana;Universita della Svizzera Italiana;Politecnico di Milano", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.imperial.ac.uk;https://www.usi.ch;https://www.usi.ch;https://www.polimi.it", "aff_unique_abbr": "ICL;USI;USI;Polimi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "United Kingdom;Switzerland;Italy" }, { "title": "Online Constrained Meta-Learning: Provable Guarantees for Generalization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70033", "id": "x2xQEszznV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/320e941f53db45bddc8757d1c8c4f6aa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=x2xQEszznV", "openreview": "https://openreview.net/forum?id=x2xQEszznV", "poster": "/media/PosterPDFs/NeurIPS%202023/70033.png?t=1697266801.8527439", "slides": "https://nips.cc/virtual/2023/poster/70033", "video": "https://nips.cc/virtual/2023/poster/70033", "author_site": "Siyuan Xu, Minghui Zhu", "tldr": "", "abstract": "Meta-learning has attracted attention due to its strong ability to learn experiences from known tasks, which can speed up and enhance the learning process for new tasks. However, most existing meta-learning approaches only can learn from tasks without any constraint. This paper proposes an online constrained meta-learning framework, which continuously learns meta-knowledge from sequential learning tasks, and the learning tasks are subject to hard constraints. Beyond existing meta-learning analyses, we provide the upper bounds of optimality gaps and constraint violations produced by the proposed framework, which considers the dynamic regret of online learning, as well as the generalization ability of the task-specific models. Moreover, we provide a practical algorithm for the framework, and validate its superior effectiveness through experiments conducted on meta-imitation learning and few-shot image classification.", "keywords": "meta-learning; generalization", "primary_area": "", "supplementary_material": "/attachment/2a007094a051af2766872b5f413efecdd20c1501.zip", "author": "Siyuan Xu;Minghui Zhu", "authorids": "~Siyuan_Xu4;~Minghui_Zhu1", "gender": "M;", "homepage": ";", "dblp": ";", "google_scholar": "ZV1580IAAAAJ;", "orcid": ";", "linkedin": "siyuan-xu-45b2b1169/;", "or_profile": "~Siyuan_Xu4;~Minghui_Zhu1", "aff": "Pennsylvania State University;", "aff_domain": "psu.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nxu2023online,\ntitle={Online Constrained Meta-Learning: Provable Guarantees for Generalization},\nauthor={Siyuan Xu and Minghui Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=x2xQEszznV}\n}", "github": "", "project": "", "reviewers": "MvUS;FYCS;CTub;jLc8", "pdf_size": 6113233, "rating": "6;6;7;8", "confidence": "3;3;4;4", "soundness": "3;3;4;3", "novelty": "3;2;3;3", "presentation": "2;2;2;3", "wc_summary": "79;77;88;108", "wc_strengths": "117;33;74;25", "wc_weaknesses": "200;65;214;28", "wc_questions": "79;98;73;69", "wc_limitations": "26;13;32;9", "wc_review": "501;286;481;239", "wc_reply_reviewers": "37;11;21;34", "wc_reply_authors": "0;0;0;59", "reply_reviewers": "1;1;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 88.0, 12.267844146385297 ], "wc_strengths_avg": [ 62.25, 36.66998091082132 ], "wc_weaknesses_avg": [ 126.75, 81.4597293145515 ], "wc_questions_avg": [ 79.75, 11.121488209767612 ], "wc_limitations_avg": [ 20.0, 9.354143466934854 ], "wc_review_avg": [ 376.75, 115.66843778663218 ], "wc_reply_reviewers_avg": [ 25.75, 10.425329730996522 ], "wc_reply_authors_avg": [ 14.75, 25.54774941164094 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17109204405802006133&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 7, "email": "psu.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Stochastic Distributed Optimization under Average Second-order Similarity: Algorithms and Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70032", "id": "x5JCDCvR4b", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/05e552739c2629f3324c1063a382b4bd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=x5JCDCvR4b", "openreview": "https://openreview.net/forum?id=x5JCDCvR4b", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70032", "video": "https://nips.cc/virtual/2023/poster/70032", "author_site": "Dachao Lin, Yuze Han, Haishan Ye, Zhihua Zhang", "tldr": "", "abstract": "We study finite-sum distributed optimization problems involving a master node and $n-1$ local nodes under the popular $\\delta$-similarity and $\\mu$-strong convexity conditions. We propose two new algorithms, SVRS and AccSVRS, motivated by previous works. The non-accelerated SVRS method combines the techniques of gradient sliding and variance reduction and achieves a better communication complexity of $\\tilde{\\mathcal{O}}(n {+} \\sqrt{n}\\delta/\\mu)$ compared to existing non-accelerated algorithms. Applying the framework proposed in Katyusha X, we also develop a directly accelerated version named AccSVRS with the $\\tilde{\\mathcal{O}}(n {+} n^{3/4}\\sqrt{\\delta/\\mu})$ communication complexity. In contrast to existing results, our complexity bounds are entirely smoothness-free and exhibit superiority in ill-conditioned cases. Furthermore, we establish a nearly matched lower bound to verify the tightness of our AccSVRS method.", "keywords": "distributed optimization;convex optimization;second-order similarity;client sampling", "primary_area": "", "supplementary_material": "/attachment/6889d3759b52456ea09b03e8185a11fc63e78b18.pdf", "author": "Dachao Lin;Yuze Han;Haishan Ye;Zhihua Zhang", "authorids": "~Dachao_Lin1;~Yuze_Han1;~Haishan_Ye2;~Zhihua_Zhang1", "gender": "M;;M;M", "homepage": ";;;http://www.math.pku.edu.cn/teachers/zhzhang/", "dblp": "76/8488;;162/0002.html;52/5331", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Dachao_Lin1;~Yuze_Han1;~Haishan_Ye2;~Zhihua_Zhang1", "aff": "Peking University;;Xi'an Jiaotong University;Peking University", "aff_domain": "pku.edu.cn;;xjtu.edu.cn;pku.edu.cn", "position": "PhD student;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nlin2023stochastic,\ntitle={Stochastic Distributed Optimization under Average Second-order Similarity: Algorithms and Analysis},\nauthor={Dachao Lin and Yuze Han and Haishan Ye and Zhihua Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=x5JCDCvR4b}\n}", "github": "", "project": "", "reviewers": "Y65R;ZtXX;FHTz;pJR8;LQ8y", "pdf_size": 760010, "rating": "4;5;5;7;8", "confidence": "3;4;1;4;3", "soundness": "2;3;3;3;3", "novelty": "2;2;2;3;4", "presentation": "2;2;3;3;3", "wc_summary": "304;95;70;63;45", "wc_strengths": "30;32;59;46;42", "wc_weaknesses": "251;330;48;51;187", "wc_questions": "64;1;22;145;1", "wc_limitations": "1;15;6;1;1", "wc_review": "650;473;205;306;276", "wc_reply_reviewers": "19;360;0;16;32", "wc_reply_authors": "31;678;31;0;0", "reply_reviewers": "1;3;0;1;1", "reply_authors": "2;5;2;1;1", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 115.4, 95.65479601149124 ], "wc_strengths_avg": [ 41.8, 10.476640682967034 ], "wc_weaknesses_avg": [ 173.4, 110.84872574820155 ], "wc_questions_avg": [ 46.6, 54.312429516640115 ], "wc_limitations_avg": [ 4.8, 5.455272678794342 ], "wc_review_avg": [ 382.0, 160.27850760473157 ], "wc_reply_reviewers_avg": [ 85.4, 137.67730386668677 ], "wc_reply_authors_avg": [ 148.0, 265.362393718477 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.2, 1.469693845669907 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.24845199749997662, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9757545411586831188&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "pku.edu.cn;;xjtu.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Peking University;Xi'an Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.xjtu.edu.cn", "aff_unique_abbr": "Peking U;XJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Improving *day-ahead* Solar Irradiance Time Series Forecasting by Leveraging Spatio-Temporal Context", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70031", "id": "x5ZruOa4ax", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/070a57c5ef1e58cc90201b11d369b3c2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=x5ZruOa4ax", "openreview": "https://openreview.net/forum?id=x5ZruOa4ax", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70031", "video": "https://nips.cc/virtual/2023/poster/70031", "author_site": "Oussama Boussif, Ghait Boukachab, Dan Assouline, Stefano Massaroli, Tianle Yuan, Loubna Benabbou, Yoshua Bengio", "tldr": "", "abstract": "Solar power harbors immense potential in mitigating climate change by substantially reducing CO$_{2}$ emissions. Nonetheless, the inherent variability of solar irradiance poses a significant challenge for seamlessly integrating solar power into the electrical grid. While the majority of prior research has centered on employing purely time series-based methodologies for solar forecasting, only a limited number of studies have taken into account factors such as cloud cover or the surrounding physical context.\nIn this paper, we put forth a deep learning architecture designed to harness spatio-temporal context using satellite data, to attain highly accurate day-ahead time-series forecasting for any given station, with a particular emphasis on forecasting Global Horizontal Irradiance (GHI). We also suggest a methodology to extract a distribution for each time step prediction, which can serve as a very valuable measure of uncertainty attached to the forecast. When evaluating models, we propose a testing scheme in which we separate particularly difficult examples from easy ones, in order to capture the model performances in crucial situations, which in the case of this study are the days suffering from varying cloudy conditions. Furthermore, we present a new multi-modal dataset gathering satellite imagery over a large zone and time series for solar irradiance and other related physical variables from multiple geographically diverse solar stations. Our approach exhibits robust performance in solar irradiance forecasting, including zero-shot generalization tests at unobserved solar stations, and holds great promise in promoting the effective integration of solar power into the grid.", "keywords": "Time series forecasting;multi-modal learning;solar irradiance;context-enriched learning", "primary_area": "", "supplementary_material": "/attachment/32445f73741250cab6030f30c8e98949e6ff847b.pdf", "author": "Oussama Boussif;Ghait Boukachab;Dan Assouline;Stefano Massaroli;Tianle Yuan;Loubna Benabbou;Yoshua Bengio", "authorids": "~Oussama_Boussif1;~Ghait_Boukachab1;~Dan_Assouline1;~Stefano_Massaroli1;~Tianle_Yuan1;~Loubna_Benabbou1;~Yoshua_Bengio1", "gender": "M;M;M;;M;F;M", "homepage": "https://jaggbow.github.io/;;;;;https://www.uqar.ca/universite/a-propos-de-l-uqar/departements/unites-departementales-des-sciences-de-la-gestion/benabbou-lobna;http://yoshuabengio.org", "dblp": "321/0990;;229/5075;;233/5791;;56/953", "google_scholar": "RwtLLioAAAAJ;https://scholar.google.ca/citations?view_op=search_authors;https://scholar.google.ca/citations?user=y0BUUIgAAAAJ;IwCfl4UAAAAJ;https://scholar.google.com/citations?hl=en;S8bzEmUAAAAJ;kukA0LcAAAAJ", "orcid": ";;;;0000-0002-2187-3017;;", "linkedin": "oussama-boussif/;ghait-boukachab/;dan-assouline-4201735b/;;;lbenabbou/;yoshuabengio/?originalSubdomain=ca", "or_profile": "~Oussama_Boussif1;~Ghait_Boukachab1;~Dan_Assouline1;~Stefano_Massaroli1;~Tianle_Yuan1;~Loubna_Benabbou1;~Yoshua_Bengio1", "aff": "Universit\u00e9 de Montr\u00e9al;Universit\u00e9 du Qu\u00e9bec \u00e0 Rimouski;Mila (Quebec Artificial Intelligence Institute) + Universit\u00e9 de Montr\u00e9al;MILA;UMBC GESTAR-II;;University of Montreal", "aff_domain": "umontreal.ca;uqar.uquebec.ca;mila.umontreal.ca;mila.quebec;umbc.edu;;umontreal.ca", "position": "PhD student;MS student;Postdoc;Postdoc;Researcher;;Full Professor", "bibtex": "@inproceedings{\nboussif2023improving,\ntitle={Improving *day-ahead* Solar Irradiance Time Series Forecasting by Leveraging Spatio-Temporal Context},\nauthor={Oussama Boussif and Ghait Boukachab and Dan Assouline and Stefano Massaroli and Tianle Yuan and Loubna Benabbou and Yoshua Bengio},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=x5ZruOa4ax}\n}", "github": "", "project": "", "reviewers": "WJ1J;zf5t;eJfQ;1GwN", "pdf_size": 12551104, "rating": "6;6;7;7", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "101;73;26;126", "wc_strengths": "62;53;93;59", "wc_weaknesses": "269;30;70;129", "wc_questions": "18;157;249;108", "wc_limitations": "1;2;9;69", "wc_review": "451;315;447;491", "wc_reply_reviewers": "612;310;174;210", "wc_reply_authors": "809;732;37;60", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.5, 37.12478956169314 ], "wc_strengths_avg": [ 66.75, 15.497983739828868 ], "wc_weaknesses_avg": [ 124.5, 90.55523176492896 ], "wc_questions_avg": [ 133.0, 83.48952030045447 ], "wc_limitations_avg": [ 20.25, 28.314086600135983 ], "wc_review_avg": [ 426.0, 66.35510530471637 ], "wc_reply_reviewers_avg": [ 326.5, 172.1997386757599 ], "wc_reply_authors_avg": [ 409.5, 362.11634870577166 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1617081239881254138&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "umontreal.ca;uqar.uquebec.ca;mila.umontreal.ca;mila.quebec;umbc.edu;;umontreal.ca", "author_num": 7, "aff_unique_index": "0;1;2+0;3;4;5", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;Universit\u00e9 du Qu\u00e9bec \u00e0 Rimouski;Quebec Artificial Intelligence Institute;Mila;University of Maryland, Baltimore County;University of Montreal", "aff_unique_dep": ";;Artificial Intelligence;;GESTAR-II;", "aff_unique_url": "https://www.umontreal.ca;https://www.uqar.ca;https://mila.quebec;https://mila.quebec;https://www.umbc.edu;https://wwwumontreal.ca", "aff_unique_abbr": "UdeM;UQAR;Mila;MILA;UMBC;UM", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Rimouski;Montreal", "aff_country_unique_index": "0;0;0+0;0;1;0", "aff_country_unique": "Canada;United States" }, { "id": "x5aH1we8Bb", "title": "Adv3D: Generating 3D Adversarial Examples in Driving Scenarios with NeRF", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep neural networks (DNNs) have been proven extremely susceptible to adversarial examples, which raises special safety-critical concerns for DNN-based autonomous driving stacks (i.e., 3D object detection). Although there are extensive works on image-level attacks, most are restricted to 2D pixel spaces, and such attacks are not always physically realistic in our 3D world. Here we present Adv3D, the first exploration of modeling adversarial examples as Neural Radiance Fields (NeRFs). Advances in NeRF provide photorealistic appearances and 3D accurate generation, yielding a more realistic and realizable adversarial example. We train our adversarial NeRF by minimizing the surrounding objects' confidence predicted by 3D detectors on the training set. Then we evaluate Adv3D on the unseen validation set and show that it can cause a large performance reduction when rendering NeRF in any sampled pose. To generate physically realizable adversarial examples, we propose primitive-aware sampling and semantic-guided regularization that enable 3D patch attacks with camouflage adversarial texture. Experimental results demonstrate that the trained adversarial NeRF generalizes well to different poses, scenes, and 3D detectors. Finally, we provide a defense method to our attacks that involves adversarial training through data augmentation.\n", "keywords": "Adversarial Examples;Autonomous Driving;3D Object Detection;NeRF", "primary_area": "", "supplementary_material": "/attachment/d1c994c40e78ba50e5c8b7d830e1afdd25b0634d.pdf", "author": "Leheng Li;Qing LIAN;Ying-Cong Chen", "authorids": "~Leheng_Li1;~Qing_LIAN3;~Ying-Cong_Chen1", "gender": "M;M;M", "homepage": "https://len-li.github.io/;https://www.lianqing11.github.io;https://www.yingcong.me/", "dblp": "285/6899;234/4406;137/6578", "google_scholar": ";;https://scholar.google.com.hk/citations?user=n7j4bJUAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Leheng_Li1;~Qing_LIAN3;~Ying-Cong_Chen1", "aff": "The Hong Kong University of Science and Technology (Guangzhou);Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "connect.hkust-gz.edu.cn;ust.hk;hkust-gz.edu.cn", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nli2023advd,\ntitle={Adv3D: Generating 3D Adversarial Examples in Driving Scenarios with Ne{RF}},\nauthor={Leheng Li and Qing LIAN and Ying-Cong Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=x5aH1we8Bb}\n}", "github": "", "project": "", "reviewers": "RUyN;Xm3G;RYj2;R8Y1;s7Sz", "site": "https://openreview.net/forum?id=x5aH1we8Bb", "pdf_size": 2169813, "rating": "4;4;5;5;7", "confidence": "5;4;4;4;5", "soundness": "2;3;3;4;2", "novelty": "2;2;3;3;3", "presentation": "3;3;2;3;4", "wc_summary": "264;128;99;63;60", "wc_strengths": "34;94;53;75;82", "wc_weaknesses": "53;380;53;233;149", "wc_questions": "5;79;34;62;177", "wc_limitations": "17;12;8;35;1", "wc_review": "373;693;247;468;469", "wc_reply_reviewers": "191;0;0;85;22", "wc_reply_authors": "414;0;0;298;17", "reply_reviewers": "2;0;0;1;1", "reply_authors": "3;1;1;2;2", "rating_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 122.8, 74.90100132842016 ], "wc_strengths_avg": [ 67.6, 21.453204888780604 ], "wc_weaknesses_avg": [ 173.6, 123.1447928253566 ], "wc_questions_avg": [ 71.4, 58.48965720535555 ], "wc_limitations_avg": [ 14.6, 11.46472851837321 ], "wc_review_avg": [ 450.0, 146.19986320103038 ], "wc_reply_reviewers_avg": [ 59.6, 72.70378257009742 ], "wc_reply_authors_avg": [ 145.8, 175.613666894123 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.372677996249965, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16632669849778633231&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Guangzhou;Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "FedNAR: Federated Optimization with Normalized Annealing Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70030", "id": "x5fs7TXKDc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ec52572b9e16b91edff5dc70e2642240-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=x5fs7TXKDc", "openreview": "https://openreview.net/forum?id=x5fs7TXKDc", "poster": "/media/PosterPDFs/NeurIPS%202023/70030.png?t=1698227223.6345634", "slides": "https://nips.cc/virtual/2023/poster/70030", "video": "https://nips.cc/virtual/2023/poster/70030", "author_site": "Junbo Li, Ang Li, Chong Tian, Qirong Ho, Eric Xing, Hongyi Wang", "tldr": "", "abstract": "Weight decay is a standard technique to improve generalization performance in modern deep neural network optimization, and is also widely adopted in federated learning (FL) to prevent overfitting in local clients. In this paper, we first explore the choices of weight decay and identify that weight decay value appreciably influences the convergence of existing FL algorithms. While preventing overfitting is crucial, weight decay can introduce a different optimization goal towards the global objective, which is further amplified in FL due to multiple local updates and heterogeneous data distribution.\nTo address this challenge, we develop {\\it Federated optimization with Normalized Annealing Regularization} (FedNAR), a simple yet effective and versatile algorithmic plug-in that can be seamlessly integrated into any existing FL algorithms. Essentially, we regulate the magnitude of each update by performing co-clipping of the gradient and weight decay.\nWe provide a comprehensive theoretical analysis of FedNAR's convergence rate and conduct extensive experiments on both vision and language datasets with different backbone federated optimization algorithms. Our experimental results consistently demonstrate that incorporating FedNAR into existing FL algorithms leads to accelerated convergence and heightened model accuracy. Moreover, FedNAR exhibits resilience in the face of various hyperparameter configurations. Specifically, FedNAR has the ability to self-adjust the weight decay when the initial specification is not optimal, while the accuracy of traditional FL algorithms would markedly decline. Our codes are released at \\href{https://anonymous.4open.science/r/fednar-BE8F}{https://anonymous.4open.science/r/fednar-BE8F}.", "keywords": "Federated learning;weight decay;adaptive hyperparameters", "primary_area": "", "supplementary_material": "/attachment/5db8aef7c9c95939adaaea31fc7facfde6fbd650.pdf", "author": "Junbo Li;Ang Li;Chong Tian;Qirong Ho;Eric Xing;Hongyi Wang", "authorids": "~Junbo_Li3;~Ang_Li6;~Chong_Tian2;~Qirong_Ho1;~Eric_Xing1;~Hongyi_Wang1", "gender": "M;M;M;;M;M", "homepage": "https://ljb121002.github.io/;https://www.ang-li.com;https://github.com/RefrainTC;;http://www.cs.cmu.edu/~epxing/;https://hwang595.github.io/", "dblp": "74/8487-3;33/2805-5;;13/7590;36/3855;15/832-1.html", "google_scholar": "XVSPtCoAAAAJ;JVKSaWIAAAAJ;;tR3AZbwAAAAJ;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ;zYdZORsAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;hongyi-wang-b89651102/", "or_profile": "~Junbo_Li3;~Ang_Li6;~Chong_Tian2;~Qirong_Ho1;~Eric_Xing1;~Hongyi_Wang1", "aff": "University of California, Santa Cruz;Duke University;Mohamed bin Zayed University of Artificial Intelligence;Petuum, Inc.;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "ucsc.edu;duke.edu;mbzuai.ac.ae;petuum.com;cs.cmu.edu;andrew.cmu.edu", "position": "MS student;PhD student;MS student;CTO;Full Professor;Researcher", "bibtex": "@inproceedings{\nli2023fednar,\ntitle={Fed{NAR}: Federated Optimization with Normalized Annealing Regularization},\nauthor={Junbo Li and Ang Li and Chong Tian and Qirong Ho and Eric Xing and Hongyi Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=x5fs7TXKDc}\n}", "github": "", "project": "", "reviewers": "t5c6;8tVP;D8ei;4Utv", "pdf_size": 1637613, "rating": "6;6;7;7", "confidence": "3;3;4;3", "soundness": "4;3;4;3", "novelty": "3;3;4;3", "presentation": "4;3;4;3", "wc_summary": "119;76;104;82", "wc_strengths": "73;86;152;54", "wc_weaknesses": "55;157;63;94", "wc_questions": "87;33;30;38", "wc_limitations": "1;11;29;5", "wc_review": "335;363;378;273", "wc_reply_reviewers": "0;17;16;23", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 95.25, 17.224619008848933 ], "wc_strengths_avg": [ 91.25, 36.873940662749895 ], "wc_weaknesses_avg": [ 92.25, 40.12091100660602 ], "wc_questions_avg": [ 47.0, 23.27015255644019 ], "wc_limitations_avg": [ 11.5, 10.712142642814275 ], "wc_review_avg": [ 337.25, 40.17695234833025 ], "wc_reply_reviewers_avg": [ 14.0, 8.514693182963201 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16100139070532095632&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ucsc.edu;duke.edu;mbzuai.ac.ae;petuum.com;cs.cmu.edu;andrew.cmu.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;4;4", "aff_unique_norm": "University of California, Santa Cruz;Duke University;Mohamed bin Zayed University of Artificial Intelligence;Petuum, Inc.;Carnegie Mellon University", "aff_unique_dep": ";;;;School of Computer Science", "aff_unique_url": "https://www.ucsc.edu;https://www.duke.edu;https://mbzuai.ac.ae;https://www.petuum.com;https://www.cmu.edu", "aff_unique_abbr": "UCSC;Duke;MBZUAI;Petuum;CMU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Santa Cruz;;Pittsburgh", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;United Arab Emirates" }, { "title": "Neural Ideal Large Eddy Simulation: Modeling Turbulence with Neural Stochastic Differential Equations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70029", "id": "x6cOcxRnxG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dabaded617b3be96c3ed161498a7d71c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=x6cOcxRnxG", "openreview": "https://openreview.net/forum?id=x6cOcxRnxG", "poster": "/media/PosterPDFs/NeurIPS%202023/70029.png?t=1701578684.3231542", "slides": "https://nips.cc/virtual/2023/poster/70029", "video": "https://nips.cc/virtual/2023/poster/70029", "author_site": "Anudhyan Boral, Zhong Yi Wan, Leonardo Zepeda-N\u00fa\u00f1ez, James Lottes, Qing Wang, Yi-Fan Chen, John Anderson, Fei Sha", "tldr": "", "abstract": "We introduce a data-driven learning framework that assimilates two powerful ideas: ideal large eddy simulation (LES) from turbulence closure modeling and neural stochastic differential equations (SDE) for stochastic modeling. The ideal LES models the LES flow by treating each full-order trajectory as a random realization of the underlying dynamics, as such, the effect of small-scales is marginalized to obtain the deterministic evolution of the LES state. However, ideal LES is analytically intractable. In our work, we use a latent neural SDE to model the evolution of the stochastic process and an encoder-decoder pair for transforming between the latent space and the desired ideal flow field. This stands in sharp contrast to other types of neural parameterization of closure models where each trajectory is treated as a deterministic realization of the dynamics. We show the effectiveness of our approach (niLES \u2013 neural ideal LES) on two challenging chaotic dynamical systems: Kolmogorov flow at a Reynolds number of 20,000 and flow past a cylinder at Reynolds number 500. Compared to competing methods, our method can handle non-uniform geometries using unstructured meshes seamlessly. In particular, niLES leads to trajectories with more accurate statistics and enhances stability, particularly for long-horizon rollouts. (Source codes and datasets will be made publicly available.)", "keywords": "partial differential equations;physics;turbulence;stochastic differential equations;physical simulation;neural differential equations", "primary_area": "", "supplementary_material": "/attachment/35f98faf9e58d942609fbf42a91e3233580b5e2d.pdf", "author": "Anudhyan Boral;Zhong Yi Wan;Leonardo Zepeda-Nunez;James Lottes;Qing Wang;Yi-Fan Chen;John Roberts Anderson;Fei Sha", "authorids": "~Anudhyan_Boral1;~Zhong_Yi_Wan1;~Leonardo_Zepeda-Nunez1;jlottes@google.com;~Qing_Wang16;yifanchen@google.com;janders@google.com;~Fei_Sha3", "gender": ";M;M;;M;;;", "homepage": ";;https://www.math.wisc.edu/~lzepeda/;;;;;", "dblp": ";338/6288;;;97/6505-32;;;", "google_scholar": ";T1FxBHsAAAAJ;qbMVyzQAAAAJ;;https://scholar.google.com/citations?hl=en;;;", "orcid": ";;;;0000-0002-9414-5184;;;", "linkedin": ";zhong1wan/;;;john-qingwang/;;;", "or_profile": "~Anudhyan_Boral1;~Zhong_Yi_Wan1;~Leonardo_Zepeda-Nunez1;jlottes@google.com;~Qing_Wang16;yifanchen@google.com;janders@google.com;~Fei_Sha3", "aff": ";Google;University of Wisconsin, Madison;;Google;;;", "aff_domain": ";google.com;wisc.edu;;google.com;;;", "position": ";Researcher;Assistant Professor;;Researcher;;;", "bibtex": "@inproceedings{\nboral2023neural,\ntitle={Neural Ideal Large Eddy Simulation: Modeling Turbulence with Neural Stochastic Differential Equations},\nauthor={Anudhyan Boral and Zhong Yi Wan and Leonardo Zepeda-Nunez and James Lottes and Qing Wang and Yi-Fan Chen and John Roberts Anderson and Fei Sha},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=x6cOcxRnxG}\n}", "github": "", "project": "", "reviewers": "a9bB;GBgQ;SnNJ;hr9G", "pdf_size": 11683542, "rating": "6;6;7;8", "confidence": "3;3;4;5", "soundness": "3;3;3;3", "novelty": "3;3;2;4", "presentation": "3;3;3;4", "wc_summary": "96;33;58;51", "wc_strengths": "44;30;36;37", "wc_weaknesses": "194;16;455;148", "wc_questions": "25;35;13;95", "wc_limitations": "1;4;4;5", "wc_review": "360;118;566;336", "wc_reply_reviewers": "190;20;63;11", "wc_reply_authors": "529;0;28;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;2;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 59.5, 22.96192500641007 ], "wc_strengths_avg": [ 36.75, 4.968651728587948 ], "wc_weaknesses_avg": [ 203.25, 159.35710683869735 ], "wc_questions_avg": [ 42.0, 31.575306807693888 ], "wc_limitations_avg": [ 3.5, 1.5 ], "wc_review_avg": [ 345.0, 158.64740779476983 ], "wc_reply_reviewers_avg": [ 71.0, 71.45977889694313 ], "wc_reply_authors_avg": [ 139.25, 225.31242198334294 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5358699660713105736&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";google.com;wisc.edu;;google.com;;;", "author_num": 8, "aff_unique_index": "0;1;0", "aff_unique_norm": "Google;University of Wisconsin", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.wisc.edu", "aff_unique_abbr": "Google;UW", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Mountain View;Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Lending Interaction Wings to Recommender Systems with Conversational Agents", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70028", "id": "x7q7w07r6Y", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/58cd3b02902d79aea4b3b603fb0d0941-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=x7q7w07r6Y", "openreview": "https://openreview.net/forum?id=x7q7w07r6Y", "poster": "/media/PosterPDFs/NeurIPS%202023/70028.png?t=1698486417.2931535", "slides": "https://nips.cc/virtual/2023/poster/70028", "video": "https://nips.cc/virtual/2023/poster/70028", "author_site": "Jiarui Jin, Xianyu Chen, Fanghua Ye, Mengyue Yang, Yue Feng, Weinan Zhang, Yong Yu, Jun Wang", "tldr": "", "abstract": "An intelligent conversational agent (a.k.a., chat-bot) could embrace conversational technologies to obtain user preferences online, to overcome inherent limitations of recommender systems trained over the offline historical user behaviors. In this paper, we propose CORE, a new offline-training and online-checking framework to plug a COnversational agent into REcommender systems. Unlike most prior conversational recommendation approaches that systemically combine conversational and recommender parts through a reinforcement learning framework, CORE bridges the conversational agent and recommender system through a unified uncertainty minimization framework, which can be easily applied to any existing recommendation approach. Concretely, CORE treats a recommender system as an offline estimator to produce an estimated relevance score for each item, while CORE regards a conversational agent as an online checker that checks these estimated scores in each online session. We define uncertainty as the sum of unchecked relevance scores. In this regard, the conversational agent acts to minimize uncertainty via querying either attributes or items. Towards uncertainty minimization, we derive the certainty gain of querying each attribute and item, and develop a novel online decision tree algorithm to decide what to query at each turn. Our theoretical analysis reveals the bound of the expected number of turns of CORE in a cold-start setting. Experimental results demonstrate that CORE can be seamlessly employed on a variety of recommendation approaches, and can consistently bring significant improvements in both hot-start and cold-start settings.", "keywords": "Conversational Agent;Recommender System;Conversational Recommendation", "primary_area": "", "supplementary_material": "/attachment/def998b205eac673347476f26b56ed11d1fa9bf9.pdf", "author": "Jiarui Jin;Xianyu Chen;Fanghua Ye;Mengyue Yang;Yue Feng;Weinan Zhang;Yong Yu;Jun Wang", "authorids": "~Jiarui_Jin1;~Xianyu_Chen2;~Fanghua_Ye1;~Mengyue_Yang1;~Yue_Feng1;~Weinan_Zhang1;~Yong_Yu1;~Jun_Wang2", "gender": "M;;M;F;F;M;;M", "homepage": "https://jinjiarui.github.io/;;https://www.fanghuaye.xyz/;https://ymy4323460.github.io/;;http://wnzhang.net;https://apex.sjtu.edu.cn/members/yyu;http://www0.cs.ucl.ac.uk/staff/jun.wang/", "dblp": "241/9563;179/0991;203/0957;262/3824.html;13/6965-2.html;28/10261-1;43/5685.html;w/JunWang12", "google_scholar": "unCPHQEAAAAJ;https://scholar.google.com.hk/citations?user=TdAE7MEAAAAJ;UXN7iUsAAAAJ;kJJkqdcAAAAJ;ZNOC0lYAAAAJ;Qzss0GEAAAAJ;;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ", "orcid": "0000-0001-6458-1586;0000-0002-9368-9526;;;;0000-0002-0127-2425;0000-0003-4457-2820;", "linkedin": "jiarui-jerry-jin-ba4a84176/;;fanghua-ye-81084587/;;;;;", "or_profile": "~Jiarui_Jin1;~Xianyu_Chen2;~Fanghua_Ye1;~Mengyue_Yang1;~Yue_Feng1;~Weinan_Zhang1;~Yong_Yu1;~Jun_Wang2", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;University College London;University College London;University College London;Shanghai Jiaotong University;Shanghai Jiaotong University;University College London", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;sjtu.edu.cn;sjtu.edu.cn;ucl.ac.uk", "position": "PhD student;MS student;PhD student;PhD student;PhD student;Associate Professor;Full Professor;Professor", "bibtex": "@inproceedings{\njin2023lending,\ntitle={Lending Interaction Wings to Recommender Systems with Conversational Agents},\nauthor={Jiarui Jin and Xianyu Chen and Fanghua Ye and Mengyue Yang and Yue Feng and Weinan Zhang and Yong Yu and Jun Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=x7q7w07r6Y}\n}", "github": "", "project": "", "reviewers": "HpBp;7XeM;SPx5;48VR;URPy;8sN6", "pdf_size": 690467, "rating": "4;4;6;6;6;7", "confidence": "4;3;3;4;4;3", "soundness": "2;2;3;3;2;3", "novelty": "2;1;2;3;3;3", "presentation": "1;2;2;4;3;3", "wc_summary": "118;195;45;55;101;122", "wc_strengths": "48;21;54;49;138;130", "wc_weaknesses": "189;388;382;65;270;267", "wc_questions": "95;29;64;40;1;124", "wc_limitations": "130;10;40;1;26;1", "wc_review": "580;643;585;210;536;644", "wc_reply_reviewers": "17;107;179;10;35;16", "wc_reply_authors": "0;738;703;0;0;0", "reply_reviewers": "1;1;2;1;1;1", "reply_authors": "1;2;2;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.3333333333333335, 0.7453559924999298 ], "presentation_avg": [ 2.5, 0.9574271077563381 ], "wc_summary_avg": [ 106.0, 49.44357052910587 ], "wc_strengths_avg": [ 73.33333333333333, 44.23296910174079 ], "wc_weaknesses_avg": [ 260.1666666666667, 111.41725878675868 ], "wc_questions_avg": [ 58.833333333333336, 41.172064423452085 ], "wc_limitations_avg": [ 34.666666666666664, 44.846652296712435 ], "wc_review_avg": [ 533.0, 149.26039885604845 ], "wc_reply_reviewers_avg": [ 60.666666666666664, 62.286078344647414 ], "wc_reply_authors_avg": [ 240.16666666666666, 339.7972025913234 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.3726779962499649 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.14907119849998596, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4257454034831555006&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "sjtu.edu.cn;sjtu.edu.cn;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;sjtu.edu.cn;sjtu.edu.cn;ucl.ac.uk", "author_num": 8, "aff_unique_index": "0;0;1;1;1;0;0;1", "aff_unique_norm": "Shanghai Jiao Tong University;University College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.ucl.ac.uk", "aff_unique_abbr": "SJTU;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;1;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Recasting Continual Learning as Sequence Modeling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70027", "id": "x816mCbWpR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dee254cdacbab59f17dc6a8fbdffa59f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=x816mCbWpR", "openreview": "https://openreview.net/forum?id=x816mCbWpR", "poster": "/media/PosterPDFs/NeurIPS%202023/70027.png?t=1700112452.0411932", "slides": "https://nips.cc/virtual/2023/poster/70027", "video": "https://nips.cc/virtual/2023/poster/70027", "author_site": "Soochan Lee, Jaehyeon Son, Gunhee Kim", "tldr": "", "abstract": "In this work, we aim to establish a strong connection between two significant bodies of machine learning research: continual learning and sequence modeling.\nThat is, we propose to formulate continual learning as a sequence modeling problem, allowing advanced sequence models to be utilized for continual learning.\nUnder this formulation, the continual learning process becomes the forward pass of a sequence model.\nBy adopting the meta-continual learning (MCL) framework, we can train the sequence model at the meta-level, on multiple continual learning episodes.\nAs a specific example of our new formulation, we demonstrate the application of Transformers and their efficient variants as MCL methods.\nOur experiments on seven benchmarks, covering both classification and regression, show that sequence models can be an attractive solution for general MCL.", "keywords": "meta-continual learning;sequence modeling;Transformers;efficient Transformers", "primary_area": "", "supplementary_material": "/attachment/8d1fe475265181209887384953538b25c2e91235.zip", "author": "Soochan Lee;Jaehyeon Son;Gunhee Kim", "authorids": "~Soochan_Lee1;~Jaehyeon_Son1;~Gunhee_Kim1", "gender": "M;M;M", "homepage": "https://soochanlee.com;https://jaehyeon-son.github.io/;http://vision.snu.ac.kr/gunhee/", "dblp": "230/1398;359/3097.html;45/115", "google_scholar": "8O3MKJkAAAAJ;q7SrBsgAAAAJ;https://scholar.google.co.kr/citations?user=CiSdOV0AAAAJ", "orcid": "0000-0002-1425-9262;0009-0004-2726-1144;0000-0002-9543-7453", "linkedin": ";jaehyeon-son-a626202b3/;", "or_profile": "~Soochan_Lee1;~Jaehyeon_Son1;~Gunhee_Kim1", "aff": "Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nlee2023recasting,\ntitle={Recasting Continual Learning as Sequence Modeling},\nauthor={Soochan Lee and Jaehyeon Son and Gunhee Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=x816mCbWpR}\n}", "github": "", "project": "", "reviewers": "FrpD;JJ3m;3kpM;N8DE", "pdf_size": 1662019, "rating": "4;6;6;6", "confidence": "4;5;3;3", "soundness": "3;3;3;4", "novelty": "2;3;3;2", "presentation": "4;3;3;4", "wc_summary": "60;120;32;122", "wc_strengths": "93;101;34;61", "wc_weaknesses": "345;282;77;54", "wc_questions": "88;7;71;44", "wc_limitations": "66;16;1;66", "wc_review": "652;526;215;347", "wc_reply_reviewers": "382;22;0;45", "wc_reply_authors": "567;16;0;106", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 83.5, 38.79110722833263 ], "wc_strengths_avg": [ 72.25, 26.677471769266294 ], "wc_weaknesses_avg": [ 189.5, 126.24678213720934 ], "wc_questions_avg": [ 52.5, 30.598202561588483 ], "wc_limitations_avg": [ 37.25, 29.23503890881625 ], "wc_review_avg": [ 435.0, 166.96855991473365 ], "wc_reply_reviewers_avg": [ 112.25, 156.55091024966927 ], "wc_reply_authors_avg": [ 172.25, 231.46314501449254 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8250709984016325165&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Thrust: Adaptively Propels Large Language Models with External Knowledge", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70026", "id": "x9FOu3W6iy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dd058e9ec9dc012a273594d717c46ef3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=x9FOu3W6iy", "openreview": "https://openreview.net/forum?id=x9FOu3W6iy", "poster": "/media/PosterPDFs/NeurIPS%202023/70026.png?t=1702071875.8039157", "slides": "https://nips.cc/virtual/2023/poster/70026", "video": "https://nips.cc/virtual/2023/poster/70026", "author_site": "Xinran Zhao, Hongming Zhang, Xiaoman Pan, Wenlin Yao, Dong Yu, Jianshu Chen", "tldr": "", "abstract": "Although large-scale pre-trained language models (PTLMs) are shown to encode rich knowledge in their model parameters, the inherent knowledge in PTLMs can be opaque or static, making external knowledge necessary. However, the existing information retrieval techniques could be costly and may even introduce noisy and sometimes misleading knowledge. To address these challenges, we propose the instance-level adaptive propulsion of external knowledge (IAPEK), where we only conduct the retrieval when necessary. To achieve this goal, we propose to model whether a PTLM contains enough knowledge to solve an instance with a novel metric, Thrust, which leverages the representation distribution of a small amount of seen instances. Extensive experiments demonstrate that Thrust is a good measurement of models' instance-level knowledgeability. Moreover, we can achieve higher cost-efficiency with the Thrust score as the retrieval indicator than the naive usage of external knowledge on 88% of the evaluated tasks with 26% average performance improvement. Such findings shed light on the real-world practice of knowledge-enhanced LMs with a limited budget for knowledge seeking due to computation latency or costs.", "keywords": "knowledge-intensive natural language processing;pre-trained language models;instance-level adaptive knowledge usage", "primary_area": "", "supplementary_material": "/attachment/2f5c8e664649e9672afac27fec5df577186b4535.zip", "author": "Xinran Zhao;Hongming Zhang;Xiaoman Pan;Wenlin Yao;Dong Yu;Jianshu Chen", "authorids": "~Xinran_Zhao1;~Hongming_Zhang2;~Xiaoman_Pan2;~Wenlin_Yao1;~Dong_Yu2;~Jianshu_Chen1", "gender": "M;M;M;M;M;M", "homepage": "https://colinzhaoust.github.io/;http://www.cse.ust.hk/~hzhangal/;https://wenlinyao.github.io/;https://sites.google.com/view/dongyu888/;https://chenjianshu.github.io/;https://panx27.github.io/homepage/", "dblp": "155/0562;;203/8711;71/4598-1;11/3124;148/9210", "google_scholar": "iX71amEAAAAJ;i5ETuuQAAAAJ;qwo2A24AAAAJ;tMY31_gAAAAJ;jQeFWdoAAAAJ;tRPF03IAAAAJ", "orcid": ";;;0000-0003-0520-6844;;", "linkedin": ";;;dongyu/;;", "or_profile": "~Xinran_Zhao1;~Hongming_Zhang2;~Wenlin_Yao1;~Dong_Yu2;~Jianshu_Chen1;~Xiaoman_Pan1", "aff": "Stanford University;Tencent AI Lab Seattle;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab", "aff_domain": "stanford.edu;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com", "position": "MS student;Researcher;Researcher;Distinguished Scientist;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nzhao2023thrust,\ntitle={Thrust: Adaptively Propels Large Language Models with External Knowledge},\nauthor={Xinran Zhao and Hongming Zhang and Xiaoman Pan and Wenlin Yao and Dong Yu and Jianshu Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=x9FOu3W6iy}\n}", "github": "", "project": "", "reviewers": "2iTX;RnNH;LAud;eb2S", "pdf_size": 841730, "rating": "4;5;6;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "142;129;72;103", "wc_strengths": "57;64;32;153", "wc_weaknesses": "82;286;42;162", "wc_questions": "10;53;7;69", "wc_limitations": "16;33;161;17", "wc_review": "307;565;314;504", "wc_reply_reviewers": "0;12;87;27", "wc_reply_authors": "0;9;25;60", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 111.5, 26.781523481684157 ], "wc_strengths_avg": [ 76.5, 45.741119356657634 ], "wc_weaknesses_avg": [ 143.0, 93.18261640456336 ], "wc_questions_avg": [ 34.75, 26.873546472321067 ], "wc_limitations_avg": [ 56.75, 60.565563648000506 ], "wc_review_avg": [ 422.5, 114.0843985828036 ], "wc_reply_reviewers_avg": [ 31.5, 33.44024521441193 ], "wc_reply_authors_avg": [ 23.5, 22.89650628371062 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2312251813622925804&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "stanford.edu;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Stanford University;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.stanford.edu;https://ai.tencent.com", "aff_unique_abbr": "Stanford;Tencent AI Lab", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Stanford;Seattle;", "aff_country_unique_index": "0;0;1;1;1;1", "aff_country_unique": "United States;China" }, { "title": "Do Not Marginalize Mechanisms, Rather Consolidate!", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70025", "id": "xBhvMu4J03", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bf73c283ed3108ee9f84da2e29bcc336-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xBhvMu4J03", "openreview": "https://openreview.net/forum?id=xBhvMu4J03", "poster": "/media/PosterPDFs/NeurIPS%202023/70025.png?t=1699661713.444616", "slides": "https://nips.cc/virtual/2023/poster/70025", "video": "https://nips.cc/virtual/2023/poster/70025", "author_site": "Moritz Willig, Matej Ze\u010devi\u0107, Devendra Dhami, Kristian Kersting", "tldr": "", "abstract": "Structural causal models (SCMs) are a powerful tool for understanding the complex causal relationships that underlie many real-world systems. As these systems grow in size, the number of variables and complexity of interactions between them does, too. Thus, becoming convoluted and difficult to analyze. This is particularly true in the context of machine learning and artificial intelligence, where an ever increasing amount of data demands for new methods to simplify and compress large scale SCM. While methods for marginalizing and abstracting SCM already exist today, they may destroy the causality of the marginalized model. To alleviate this, we introduce the concept of consolidating causal mechanisms to transform large-scale SCM while preserving consistent interventional behaviour. We show consolidation is a powerful method for simplifying SCM, discuss reduction of computational complexity and give a perspective on generalizing abilities of consolidated SCM.", "keywords": "Structural Causal Models;Marginalization;Consolidation;Compression", "primary_area": "", "supplementary_material": "/attachment/173670e37fec40e229b2d6dcddd3ac3f7c83c809.pdf", "author": "Moritz Willig;Matej Ze\u010devi\u0107;Devendra Singh Dhami;Kristian Kersting", "authorids": "~Moritz_Willig1;~Matej_Ze\u010devi\u01071;~Devendra_Singh_Dhami1;~Kristian_Kersting1", "gender": "M;M;M;M", "homepage": ";https://sites.google.com/view/devendradhami;http://www.ml.informatik.tu-darmstadt.de/;https://www.matej-zecevic.de", "dblp": "247/6267;201/2130;40/3793;286/1847", "google_scholar": "https://scholar.google.de/citations?user=1EDRWTsAAAAJ;aVlaHfkAAAAJ;QY-earAAAAAJ;gzJZcPUAAAAJ", "orcid": ";;0000-0002-2873-9152;", "linkedin": ";;;", "or_profile": "~Moritz_Willig1;~Devendra_Singh_Dhami1;~Kristian_Kersting1;~Matej_Zecevic1", "aff": "Technical University of Darmstadt;CS Department, TU Darmstadt, TU Darmstadt;TU Darmstadt;TU Darmstadt", "aff_domain": "cs.tu-darmstadt.de;cs.tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de", "position": "PhD student;Postdoctoral researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\nwillig2023do,\ntitle={Do Not Marginalize Mechanisms, Rather Consolidate!},\nauthor={Moritz Willig and Matej Ze{\\v{c}}evi{\\'c} and Devendra Singh Dhami and Kristian Kersting},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xBhvMu4J03}\n}", "github": "", "project": "", "reviewers": "tWPL;CKKq;ZzBw", "pdf_size": 725174, "rating": "6;6;7", "confidence": "3;3;3", "soundness": "3;3;2", "novelty": "3;2;3", "presentation": "2;2;3", "wc_summary": "39;97;158", "wc_strengths": "27;20;76", "wc_weaknesses": "38;18;506", "wc_questions": "41;105;10", "wc_limitations": "6;3;11", "wc_review": "151;243;761", "wc_reply_reviewers": "19;11;179", "wc_reply_authors": "68;43;411", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 98.0, 48.586692279539534 ], "wc_strengths_avg": [ 41.0, 24.91318258807306 ], "wc_weaknesses_avg": [ 187.33333333333334, 225.4792427007171 ], "wc_questions_avg": [ 52.0, 39.5558676641869 ], "wc_limitations_avg": [ 6.666666666666667, 3.2998316455372216 ], "wc_review_avg": [ 385.0, 268.5119488340634 ], "wc_reply_reviewers_avg": [ 69.66666666666667, 77.37929668558351 ], "wc_reply_authors_avg": [ 174.0, 167.89480833744284 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1216066226228342588&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 7, "email": "cs.tu-darmstadt.de;cs.tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Technical University of Darmstadt;Technische Universit\u00e4t Darmstadt", "aff_unique_dep": ";Computer Science Department", "aff_unique_url": "https://www.tu-darmstadt.de;https://www.tu-darmstadt.de", "aff_unique_abbr": "TUD;TU Darmstadt", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "SODA: Robust Training of Test-Time Data Adaptors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70024", "id": "xBqjoG0NxM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/893ca2e5ff5bb258da30e0a82f4c8de9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xBqjoG0NxM", "openreview": "https://openreview.net/forum?id=xBqjoG0NxM", "poster": "/media/PosterPDFs/NeurIPS%202023/70024.png?t=1699863432.9811077", "slides": "https://nips.cc/virtual/2023/poster/70024", "video": "https://nips.cc/virtual/2023/poster/70024", "author_site": "Zige Wang, Yonggang Zhang, Zhen Fang, Long Lan, Wenjing Yang, Bo Han", "tldr": "", "abstract": "Adapting models deployed to test distributions can mitigate the performance degradation caused by distribution shifts. However, privacy concerns may render model parameters inaccessible. One promising approach involves utilizing zeroth-order optimization (ZOO) to train a data adaptor to adapt the test data to fit the deployed models. Nevertheless, the data adaptor trained with ZOO typically brings restricted improvements due to the potential corruption of data features caused by the data adaptor. To address this issue, we revisit ZOO in the context of test-time data adaptation. We find that the issue directly stems from the unreliable estimation of the gradients used to optimize the data adaptor, which is inherently due to the unreliable nature of the pseudo-labels assigned to the test data. Based on this observation, we propose pseudo-label-robust data adaptation (SODA) to improve the performance of data adaptation. Specifically, SODA leverages high-confidence predicted labels as reliable labels to optimize the data adaptor with ZOO for label prediction. For data with low-confidence predictions, SODA encourages the adaptor to preserve data information to mitigate data corruption. Empirical results indicate that SODA can significantly enhance the performance of deployed models in the presence of distribution shifts without requiring access to model parameters.", "keywords": "test-time data adaptation;zeroth-order optimization;out-of-distribution generalization", "primary_area": "", "supplementary_material": "/attachment/d25cc4da70c188ffa3622844e5969a59f5098c2f.zip", "author": "Zige Wang;Yonggang Zhang;Zhen Fang;Long Lan;Wenjing Yang;Bo Han", "authorids": "~Zige_Wang1;~Yonggang_Zhang1;~Zhen_Fang2;~Long_Lan2;~Wenjing_Yang1;~Bo_Han1", "gender": "F;M;M;M;F;M", "homepage": "https://github.com/ZigeW;https://yonggangzhangben.github.io/index.html;https://fang-zhen.github.io/index.html;https://lan-long.github.io/;https://www.researchgate.net/scientific-contributions/Wen-Jing-Yang-2056467943;https://bhanml.github.io/", "dblp": "239/2542;27/6859-3;;124/2136.html;48/3396-2;241/0472-3", "google_scholar": ";XSbEr98AAAAJ;OzD6WJcAAAAJ;https://scholar.google.com.au/citations?user=huVW6Y8AAAAJ;;nTNjqHwAAAAJ", "orcid": ";0000-0002-4080-7592;0000-0003-0602-6255;;;", "linkedin": ";;;;;", "or_profile": "~Zige_Wang1;~Yonggang_Zhang1;~Zhen_Fang2;~Long_Lan2;~Wenjing_Yang1;~bo_han2", "aff": "Peking University;Hong Kong Baptist University;University of Technology Sydney;National University of Defense Technology;National University of Defense Technology;RIKEN", "aff_domain": "pku.edu.cn;hkbu.edu.hk;uts.edu.au;nudt.edu.cn;nudt.edu.cn;riken.jp", "position": "PhD student;Postdoc;Postdoc;Associate Professor;Associate Professor;Adjunct Scientist", "bibtex": "@inproceedings{\nwang2023soda,\ntitle={{SODA}: Robust Training of Test-Time Data Adaptors},\nauthor={Zige Wang and Yonggang Zhang and Zhen Fang and Long Lan and Wenjing Yang and Bo Han},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xBqjoG0NxM}\n}", "github": "", "project": "", "reviewers": "xmoq;SVqY;1NX6;H74J", "pdf_size": 1024880, "rating": "5;5;5;6", "confidence": "3;4;3;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "65;92;92;81", "wc_strengths": "23;46;80;107", "wc_weaknesses": "152;181;91;120", "wc_questions": "3;3;4;107", "wc_limitations": "8;1;1;50", "wc_review": "251;323;268;465", "wc_reply_reviewers": "39;31;62;66", "wc_reply_authors": "25;56;0;17", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;1;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.5, 11.05667219374799 ], "wc_strengths_avg": [ 64.0, 32.05464084964921 ], "wc_weaknesses_avg": [ 136.0, 33.771289581536564 ], "wc_questions_avg": [ 29.25, 44.890839822841365 ], "wc_limitations_avg": [ 15.0, 20.40833163195855 ], "wc_review_avg": [ 326.75, 84.13790762789387 ], "wc_reply_reviewers_avg": [ 49.5, 14.84082207965583 ], "wc_reply_authors_avg": [ 24.5, 20.303940504246953 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16757974606908528634&as_sdt=5,38&sciodt=0,38&hl=en", "gs_version_total": 7, "email": "pku.edu.cn;hkbu.edu.hk;uts.edu.au;nudt.edu.cn;nudt.edu.cn;riken.jp", "author_num": 6, "aff_unique_index": "0;1;2;3;3;4", "aff_unique_norm": "Peking University;Hong Kong Baptist University;University of Technology Sydney;National University of Defense Technology;RIKEN", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.hkbu.edu.hk;https://www.uts.edu.au;http://www.nudt.edu.cn/;https://www.riken.jp", "aff_unique_abbr": "Peking U;HKBU;UTS;NUDT;RIKEN", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;0;0;2", "aff_country_unique": "China;Australia;Japan" }, { "title": "Probabilistic inverse optimal control for non-linear partially observable systems disentangles perceptual uncertainty and behavioral costs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70023", "id": "xDHzQQ4lnC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/16347f6e665376fd9a9a290dbfe0db5b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xDHzQQ4lnC", "openreview": "https://openreview.net/forum?id=xDHzQQ4lnC", "poster": "/media/PosterPDFs/NeurIPS%202023/70023.png?t=1702223760.0320396", "slides": "https://nips.cc/virtual/2023/poster/70023", "video": "https://nips.cc/virtual/2023/poster/70023", "author_site": "Dominik Straub, Matthias Schultheis, Heinz Koeppl, Constantin Rothkopf", "tldr": "", "abstract": "Inverse optimal control can be used to characterize behavior in sequential decision-making tasks. Most existing work, however, is limited to fully observable or linear systems, or requires the action signals to be known. Here, we introduce a probabilistic approach to inverse optimal control for partially observable stochastic non-linear systems with unobserved action signals, which unifies previous approaches to inverse optimal control with maximum causal entropy formulations. Using an explicit model of the noise characteristics of the sensory and motor systems of the agent in conjunction with local linearization techniques, we derive an approximate likelihood function for the model parameters, which can be computed within a single forward pass. We present quantitative evaluations on stochastic and partially observable versions of two classic control tasks and two human behavioral tasks. Importantly, we show that our method can disentangle perceptual factors and behavioral costs despite the fact that epistemic and pragmatic actions are intertwined in sequential decision-making under uncertainty, such as in active sensing and active learning. The proposed method has broad applicability, ranging from imitation learning to sensorimotor neuroscience.", "keywords": "inverse optimal control;probabilistic modeling;motor control;cognitive science", "primary_area": "", "supplementary_material": "", "author": "Dominik Straub;Matthias Schultheis;Heinz Koeppl;Constantin A. Rothkopf", "authorids": "~Dominik_Straub1;~Matthias_Schultheis1;~Heinz_Koeppl1;~Constantin_A._Rothkopf1", "gender": "M;M;M;M", "homepage": "https://dominikstrb.github.io;;;https://www.pip.tu-darmstadt.de", "dblp": "304/8169;;41/6084;71/5555", "google_scholar": "nU54Q7cAAAAJ;JqD6llUAAAAJ;https://scholar.google.de/citations?user=WaPW80kAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-5263-2622;;;", "linkedin": ";;;", "or_profile": "~Dominik_Straub1;~Matthias_Schultheis1;~Heinz_Koeppl1;~Constantin_Rothkopf1", "aff": "TU Darmstadt;TU Darmstadt;TU Darmstadt;Technische Universit\u00e4t Darmstadt", "aff_domain": "tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nstraub2023probabilistic,\ntitle={Probabilistic inverse optimal control for non-linear partially observable systems disentangles perceptual uncertainty and behavioral costs},\nauthor={Dominik Straub and Matthias Schultheis and Heinz Koeppl and Constantin A. Rothkopf},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xDHzQQ4lnC}\n}", "github": "", "project": "", "reviewers": "EbpC;V7nV;CoMo;TQks;McGM;1CfE;xp9a;ff8k", "pdf_size": 2679433, "rating": "4;5;5;5;6;6;7;7", "confidence": "2;4;3;3;1;3;4;2", "soundness": "3;3;3;2;3;3;3;4", "novelty": "2;2;2;2;3;3;3;3", "presentation": "3;2;3;3;3;4;3;2", "wc_summary": "63;109;137;257;92;93;63;97", "wc_strengths": "14;69;38;59;48;105;85;32", "wc_weaknesses": "42;215;184;68;16;16;121;202", "wc_questions": "91;254;93;1;96;25;208;380", "wc_limitations": "80;43;7;1;7;78;8;34", "wc_review": "290;690;459;386;259;317;485;745", "wc_reply_reviewers": "97;9;68;0;0;0;0;93", "wc_reply_authors": "0;0;0;0;0;0;0;429", "reply_reviewers": "1;1;1;0;0;0;0;2", "reply_authors": "1;1;1;1;1;1;1;2", "rating_avg": [ 5.625, 0.9921567416492215 ], "confidence_avg": [ 2.75, 0.9682458365518543 ], "soundness_avg": [ 3.0, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.875, 0.5994789404140899 ], "wc_summary_avg": [ 113.875, 58.54365358431262 ], "wc_strengths_avg": [ 56.25, 27.720705257983607 ], "wc_weaknesses_avg": [ 108.0, 78.35336623272799 ], "wc_questions_avg": [ 143.5, 119.48535475111584 ], "wc_limitations_avg": [ 32.25, 30.27271874146754 ], "wc_review_avg": [ 453.875, 169.43910226095983 ], "wc_reply_reviewers_avg": [ 33.375, 41.611108793205695 ], "wc_reply_authors_avg": [ 53.625, 141.87841405583868 ], "reply_reviewers_avg": [ 0.625, 0.6959705453537527 ], "reply_authors_avg": [ 1.125, 0.33071891388307384 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.03253000243161777, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15441864459729216567&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TU Darmstadt", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Darmstadt;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "LVM-Med: Learning Large-Scale Self-Supervised Vision Models for Medical Imaging via Second-order Graph Matching", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70022", "id": "xE7oH5iVGK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/58cc11cda2a2679e8af5c6317aed0af8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xE7oH5iVGK", "openreview": "https://openreview.net/forum?id=xE7oH5iVGK", "poster": "/media/PosterPDFs/NeurIPS%202023/70022.png?t=1701869823.7271814", "slides": "https://nips.cc/virtual/2023/poster/70022", "video": "https://nips.cc/virtual/2023/poster/70022", "author_site": "Duy M. H. Nguyen, Hoang Nguyen, Nghiem Diep, Tan Ngoc Pham, Tri Cao, Binh Nguyen, Paul Swoboda, Paul Swoboda, Nhat Ho, Shadi Albarqouni, Pengtao Xie, Pengtao Xie, Daniel Sonntag, Mathias Niepert", "tldr": "", "abstract": "Obtaining large pre-trained models that can be fine-tuned to new tasks with limited annotated samples has remained an open challenge for medical imaging data. While pre-trained networks on ImageNet and vision-language foundation models trained on web-scale data are the prevailing approaches, their effectiveness on medical tasks is limited due to the significant domain shift between natural and medical images. To bridge this gap, we introduce LVM-Med, the first family of deep networks trained on large-scale medical datasets. We have collected approximately 1.3 million medical images from 55 publicly available datasets, covering a large number of organs and modalities such as CT, MRI, X-ray, and Ultrasound. We benchmark several state-of-the-art self-supervised algorithms on this dataset and propose a novel self-supervised contrastive learning algorithm using a graph-matching formulation. The proposed approach makes three contributions: (i) it integrates prior pair-wise image similarity metrics based on local and global information; (ii) it captures the structural constraints of feature embeddings through a loss function constructed through a combinatorial graph-matching objective, and (iii) it can be trained efficiently end-to-end using modern gradient-estimation techniques for black-box solvers. We thoroughly evaluate the proposed LVM-Med on 15 downstream medical tasks ranging from segmentation and classification to object detection, and both for the in and out-of-distribution settings. LVM-Med empirically outperforms a number of state-of-the-art supervised, self-supervised, and foundation models. For challenging tasks such as Brain Tumor Classification or Diabetic Retinopathy Grading, LVM-Med improves previous vision-language models trained on 1 billion masks by 6-7% while using only a ResNet-50.", "keywords": "medical imaging; self-supervised learning; graph matching; large-vision model", "primary_area": "", "supplementary_material": "", "author": "Duy Minh Ho Nguyen;Hoang Nguyen;Nghiem Tuong Diep;Tan Ngoc Pham;Tri Cao;Binh T. Nguyen;Paul Swoboda;Nhat Ho;Shadi Albarqouni;Pengtao Xie;Daniel Sonntag;Mathias Niepert", "authorids": "~Duy_Minh_Ho_Nguyen1;~Hoang_Nguyen3;~Nghiem_Tuong_Diep1;~Tan_Ngoc_Pham1;~Tri_Cao1;~Binh_T._Nguyen1;~Paul_Swoboda1;~Nhat_Ho1;~Shadi_Albarqouni1;~Pengtao_Xie3;~Daniel_Sonntag2;~Mathias_Niepert1", "gender": "M;M;M;M;M;M;M;M;M;M;M;M", "homepage": ";https://hoangnguyen210.github.io/;;https://ngctnnnn.github.io/resume/;https://sites.google.com/site/ntbinhpolytechnique/;http://paulswoboda.net;https://nhatptnk8912.github.io/;https://albarqouni.github.io/;https://pengtaoxie.github.io/;https://www.dfki.de/~sonntag/;http://www.matlog.net;https://caothientri2001vn.github.io/", "dblp": "199/8349;;;334/5571;06/2545;17/3730;203/4479;165/7751;133/1998;83/5858.html;n/MathiasNiepert;", "google_scholar": "_NIyeykAAAAJ;3fI7aSYAAAAJ;3YPTp-gAAAAJ;-agvSxkAAAAJ;dXEb3PMAAAAJ;https://scholar.google.de/citations?hl=en;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ;https://scholar.google.de/citations?user=CPuApzoAAAAJ;cnncomYAAAAJ;v7i6Uz4AAAAJ;https://scholar.google.de/citations?user=p5vLzq0AAAAJ;QytvkD4AAAAJ", "orcid": ";0009-0007-5740-8161;0000-0001-7406-1250;0000-0002-6824-172X;0000-0001-5249-9702;;;0000-0003-2157-2211;;;;0000-0001-7865-8476", "linkedin": ";hoang-nguyen-377b54231/;diep-tuong-nghiem-63aa31144/;ngctnnnn/;;;nhat-pham-minh-ho-267b8164/;shadialbarqouni/;;;;", "or_profile": "~Duy_Minh_Ho_Nguyen1;~Hoang_Nguyen3;~Nghiem_Tuong_Diep1;~Tan_Ngoc_Pham1;~Binh_T._Nguyen1;~Paul_Swoboda1;~Nhat_Ho1;~Shadi_Albarqouni1;~Pengtao_Xie3;~Daniel_Sonntag2;~Mathias_Niepert1;~Tri_Cao2", "aff": "German Research Center for AI;German Research Center for AI;Ho Chi Minh city University of Science, Vietnam National University;Ho Chi Minh city University of Science, Vietnam National University;Ho Chi Minh city University of Science, Vietnam National University;Universit\u00e4t Mannheim;University of Texas, Austin;Helmholtz Center Munich;Carnegie Mellon University;Carl von Ossietzky Universit\u00e4t Oldenburg;NEC;Ho Chi Minh city University of Science, Vietnam National University", "aff_domain": "dfki.de;dfki.de;hcmus.edu.vn;hcmus.edu.vn;hcmus.edu.vn;uni-mannheim.de;utexas.edu;helmholtz-muenchen.de; ;uol.de;neclab.eu;hcmus.edu.vn", "position": "Researcher;Intern;Undergrad student;MS student;Associate Professor;Assistant Professor;Assistant Professor;Helmholtz AI Young Investigator Group Leader;Graduate Student;Full Professor;Research Scientist;Undergrad student", "bibtex": "@inproceedings{\nnguyen2023lvmmed,\ntitle={{LVM}-Med: Learning Large-Scale Self-Supervised Vision Models for Medical Imaging via Second-order Graph Matching},\nauthor={Duy Minh Ho Nguyen and Hoang Nguyen and Nghiem Tuong Diep and Tan Ngoc Pham and Tri Cao and Binh T. Nguyen and Paul Swoboda and Nhat Ho and Shadi Albarqouni and Pengtao Xie and Daniel Sonntag and Mathias Niepert},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xE7oH5iVGK}\n}", "github": "", "project": "", "reviewers": "jRh7;WEtP;yfed;xKHb;XBHj", "pdf_size": 3387531, "rating": "6;6;7;8;8", "confidence": "4;3;4;4;4", "soundness": "3;3;3;3;4", "novelty": "2;3;3;4;4", "presentation": "2;3;3;3;4", "wc_summary": "70;56;78;133;53", "wc_strengths": "38;21;213;27;33", "wc_weaknesses": "220;161;109;161;3", "wc_questions": "106;2;40;43;1", "wc_limitations": "1;2;8;3;6", "wc_review": "435;242;448;367;96", "wc_reply_reviewers": "102;31;94;12;11", "wc_reply_authors": "441;24;26;16;19", "reply_reviewers": "2;1;1;1;1", "reply_authors": "3;2;2;2;2", "rating_avg": [ 7.0, 0.8944271909999159 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 78.0, 28.97585201508318 ], "wc_strengths_avg": [ 66.4, 73.52169747768342 ], "wc_weaknesses_avg": [ 130.8, 72.92297306062062 ], "wc_questions_avg": [ 38.4, 38.25493432225443 ], "wc_limitations_avg": [ 4.0, 2.6076809620810595 ], "wc_review_avg": [ 317.6, 132.70207232745088 ], "wc_reply_reviewers_avg": [ 50.0, 39.914909495074646 ], "wc_reply_authors_avg": [ 105.2, 167.93736927795433 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.5590169943749476, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8631925948719657729&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "dfki.de;dfki.de;hcmus.edu.vn;hcmus.edu.vn;hcmus.edu.vn;uni-mannheim.de;utexas.edu;helmholtz-muenchen.de; ;uol.de;neclab.eu;hcmus.edu.vn", "author_num": 12, "aff_unique_index": "0;0;1;1;1;2;3;4;5;6;7;1", "aff_unique_norm": "German Research Center for Artificial Intelligence;Ho Chi Minh City University of Science;University of Mannheim;University of Texas at Austin;Helmholtz Center Munich;Carnegie Mellon University;Carl von Ossietzky University of Oldenburg;NEC Corporation", "aff_unique_dep": ";;;;;;;", "aff_unique_url": "https://www.dfki.de/;;https://www.uni-mannheim.de;https://www.utexas.edu;https://www.helmholtz-munich.de;https://www.cmu.edu;https://www.uni-oldenburg.de/;https://www.nec.com", "aff_unique_abbr": "DFKI;;UM;UT Austin;HMGU;CMU;UvO;NEC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;1;1;1;0;2;0;2;0;3;1", "aff_country_unique": "Germany;Vietnam;United States;Japan" }, { "title": "Dissecting Chain-of-Thought: Compositionality through In-Context Filtering and Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70021", "id": "xEhKwsqxMa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/45e15bae91a6f213d45e203b8a29be48-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xEhKwsqxMa", "openreview": "https://openreview.net/forum?id=xEhKwsqxMa", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70021", "video": "https://nips.cc/virtual/2023/poster/70021", "author_site": "Yingcong Li, Kartik Sreenivasan, Angeliki Giannou, Dimitris Papailiopoulos, Samet Oymak", "tldr": "", "abstract": "Chain-of-thought (CoT) is a method that enables language models to handle complex reasoning tasks by decomposing them into simpler steps. Despite its success, the underlying mechanics of CoT are not yet fully understood. In an attempt to shed light on this, our study investigates the impact of CoT on the ability of transformers to in-context learn a simple to study, yet general family of compositional functions: multi-layer perceptrons (MLPs). In this setting, we find that the success of CoT can be attributed to breaking down in-context learning of a compositional function into two distinct phases: focusing on and filtering data related to each step of the composition and in-context learning the single-step composition function. Through both experimental and theoretical evidence, we demonstrate how CoT significantly reduces the sample complexity of in-context learning (ICL) and facilitates the learning of complex functions that non-CoT methods struggle with. Furthermore, we illustrate how transformers can transition from vanilla in-context learning to mastering a compositional function with CoT by simply incorporating additional layers that perform the necessary data-filtering for CoT via the attention mechanism. In addition to these test-time benefits, we show CoT helps accelerate pretraining by learning shortcuts to represent complex functions and filtering plays an important role in this process. These findings collectively provide insights into the mechanics of CoT, inviting further investigation of its role in complex reasoning tasks.", "keywords": "chain-of-thought;in-context learning;attention;compositional learning;approximation;length generalization", "primary_area": "", "supplementary_material": "/attachment/3c3a094f0df6944ae79e7a34e268fbcc42d80951.zip", "author": "Yingcong Li;Kartik Sreenivasan;Angeliki Giannou;Dimitris Papailiopoulos;Samet Oymak", "authorids": "~Yingcong_Li1;~Kartik_Sreenivasan1;~Angeliki_Giannou1;~Dimitris_Papailiopoulos1;~Samet_Oymak2", "gender": ";M;F;M;M", "homepage": "https://yingcong-li.github.io/;https://ksreenivasan.github.io/;https://sites.google.com/view/angeliki-giannou/home;http://papail.io;https://sota.engin.umich.edu/", "dblp": "244/4435;177/2408;283/5898.html;;89/8771", "google_scholar": "9uWgjIUAAAAJ;BP0WzIQAAAAJ;;hYi6i9sAAAAJ;AY6InkoAAAAJ", "orcid": ";;;;", "linkedin": ";kartiksreenivasan/;;;", "or_profile": "~Yingcong_Li1;~Kartik_Sreenivasan1;~Angeliki_Giannou1;~Dimitris_Papailiopoulos1;~Samet_Oymak1", "aff": "University of California, Riverside;University of Wisconsin, Madison;University of Wisconsin - Madison;University of Wisconsin - Madison;University of California, Riverside", "aff_domain": "ucr.edu;wisc.edu;wisc.edu;wisc.edu;ucr.edu", "position": "PhD student;PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2023dissecting,\ntitle={Dissecting Chain-of-Thought: Compositionality through In-Context Filtering and Learning},\nauthor={Yingcong Li and Kartik Sreenivasan and Angeliki Giannou and Dimitris Papailiopoulos and Samet Oymak},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xEhKwsqxMa}\n}", "github": "", "project": "", "reviewers": "8qtM;mo8c;wZge;zLZS", "pdf_size": 1142168, "rating": "5;5;6;7", "confidence": "4;3;3;4", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "3;1;1;2", "wc_summary": "71;126;146;116", "wc_strengths": "37;73;4;59", "wc_weaknesses": "78;259;129;275", "wc_questions": "67;36;173;134", "wc_limitations": "5;36;151;28", "wc_review": "258;530;603;612", "wc_reply_reviewers": "10;51;60;104", "wc_reply_authors": "0;463;26;23", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 114.75, 27.471576219794887 ], "wc_strengths_avg": [ 43.25, 26.042033330752037 ], "wc_weaknesses_avg": [ 185.25, 83.90582518514432 ], "wc_questions_avg": [ 102.5, 53.956000593075835 ], "wc_limitations_avg": [ 55.0, 56.581799193733666 ], "wc_review_avg": [ 500.75, 143.71390851271147 ], "wc_reply_reviewers_avg": [ 56.25, 33.39442318711314 ], "wc_reply_authors_avg": [ 128.0, 193.67369465159692 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8999382911174549626&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ucr.edu;wisc.edu;wisc.edu;wisc.edu;ucr.edu", "author_num": 5, "aff_unique_index": "0;1;2;2;0", "aff_unique_norm": "University of California, Riverside;University of Wisconsin;University of Wisconsin-Madison", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucr.edu;https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "UCR;UW;UW-Madison", "aff_campus_unique_index": "0;1;1;1;0", "aff_campus_unique": "Riverside;Madison", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Kullback-Leibler Maillard Sampling for Multi-armed Bandits with Bounded Rewards", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70020", "id": "xF89MjFbWp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bdebb4549d5a79501bc151411abdb6d7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xF89MjFbWp", "openreview": "https://openreview.net/forum?id=xF89MjFbWp", "poster": "/media/PosterPDFs/NeurIPS%202023/70020.png?t=1702268986.3644686", "slides": "https://nips.cc/virtual/2023/poster/70020", "video": "https://nips.cc/virtual/2023/poster/70020", "author_site": "Hao Qin, Kwang-Sung Jun, Chicheng Zhang", "tldr": "", "abstract": "We study $K$-armed bandit problems where the reward distributions of the arms are all supported on the $[0,1]$ interval. Maillard sampling\\cite{maillard13apprentissage}, an attractive alternative to Thompson sampling, has recently been shown to achieve competitive regret guarantees in the sub-Gaussian reward setting\\cite{bian2022maillard} while maintaining closed-form action probabilities, which is useful for offline policy evaluation. In this work, we analyze the Kullback-Leibler Maillard Sampling (KL-MS) algorithm, a natural extension of Maillard sampling {and a special case of Minimum Empirical Divergence (MED)~\\cite{honda2011asymptotically}} for achieving a KL-style finite-time gap-dependent regret bound. We show that KL-MS enjoys the asymptotic optimality when the rewards are Bernoulli and has an {adaptive} worst-case regret bound of the form $O(\\sqrt{\\mu^*(1-\\mu^*) K T \\ln K} + K \\ln T)$, where $\\mu^*$ is the expected reward of the optimal arm, and $T$ is the time horizon length; {this is the first time such adaptivity is reported in the literature for an algorithm with asymptotic optimality guarantees.}", "keywords": "multi-armed bandits;bounded rewards", "primary_area": "", "supplementary_material": "/attachment/203362bec07fae72c5d0dcc98c810a1bfe5f2b8b.pdf", "author": "Hao Qin;Kwang-Sung Jun;Chicheng Zhang", "authorids": "~Hao_Qin2;~Kwang-Sung_Jun1;~Chicheng_Zhang1", "gender": "M;M;M", "homepage": "https://github.com/MjolnirT;http://kwangsungjun.github.io;http://zcc1307.github.io", "dblp": ";88/8411;149/2402", "google_scholar": "nlLu4ZQAAAAJ;VgvC7o8AAAAJ;29B3BAgAAAAJ", "orcid": ";;", "linkedin": "hao-qin-183a89197/;;", "or_profile": "~Hao_Qin2;~Kwang-Sung_Jun1;~Chicheng_Zhang1", "aff": "University of Arizona;University of Arizona;University of Arizona", "aff_domain": "arizona.edu;cs.arizona.edu;arizona.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nqin2023kullbackleibler,\ntitle={Kullback-Leibler Maillard Sampling for Multi-armed Bandits with Bounded Rewards},\nauthor={Hao Qin and Kwang-Sung Jun and Chicheng Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xF89MjFbWp}\n}", "github": "", "project": "", "reviewers": "QYoJ;Mj4M;nCy6;GStc", "pdf_size": 496646, "rating": "5;5;6;6", "confidence": "4;4;4;3", "soundness": "3;3;4;3", "novelty": "2;3;2;3", "presentation": "3;2;3;3", "wc_summary": "118;201;199;117", "wc_strengths": "17;72;277;101", "wc_weaknesses": "184;716;215;85", "wc_questions": "5;15;13;159", "wc_limitations": "1;33;1;1", "wc_review": "325;1037;705;463", "wc_reply_reviewers": "37;124;213;23", "wc_reply_authors": "116;247;211;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 158.75, 41.25757506204164 ], "wc_strengths_avg": [ 116.75, 97.31488837788389 ], "wc_weaknesses_avg": [ 300.0, 244.92958171686817 ], "wc_questions_avg": [ 48.0, 64.19501538281614 ], "wc_limitations_avg": [ 9.0, 13.856406460551018 ], "wc_review_avg": [ 632.5, 270.26052245934846 ], "wc_reply_reviewers_avg": [ 99.25, 76.22458592868838 ], "wc_reply_authors_avg": [ 143.5, 95.67784487539421 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9270747452292777555&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "arizona.edu;cs.arizona.edu;arizona.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Arizona", "aff_unique_dep": "", "aff_unique_url": "https://www.arizona.edu", "aff_unique_abbr": "UA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Boosting Spectral Clustering on Incomplete Data via Kernel Correction and Affinity Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70019", "id": "xFtuNq23D5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e5aa7171449b83f8b4eec1623eac9906-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xFtuNq23D5", "openreview": "https://openreview.net/forum?id=xFtuNq23D5", "poster": "/media/PosterPDFs/NeurIPS%202023/70019.png?t=1701782138.9353304", "slides": "https://nips.cc/virtual/2023/poster/70019", "video": "https://nips.cc/virtual/2023/poster/70019", "author_site": "Fangchen Yu, Runze Zhao, Zhan Shi, Yiwen Lu, Jicong Fan, Yicheng Zeng, Jianfeng Mao, Wenye Li", "tldr": "", "abstract": "Spectral clustering has gained popularity for clustering non-convex data due to its simplicity and effectiveness. It is essential to construct a similarity graph using a high-quality affinity measure that models the local neighborhood relations among the data samples. However, incomplete data can lead to inaccurate affinity measures, resulting in degraded clustering performance. To address these issues, we propose an imputation-free framework with two novel approaches to improve spectral clustering on incomplete data. Firstly, we introduce a new kernel correction method that enhances the quality of the kernel matrix estimated on incomplete data with a theoretical guarantee, benefiting classical spectral clustering on pre-defined kernels. Secondly, we develop a series of affinity learning methods that equip the self-expressive framework with $\\ell_p$-norm to construct an intrinsic affinity matrix with an adaptive extension. Our methods outperform existing data imputation and distance calibration techniques on benchmark datasets, offering a promising solution to spectral clustering on incomplete data in various real-world applications.", "keywords": "Spectral Clustering;Incomplete Data;Kernel Correction;Self-expressive Affinity Learning", "primary_area": "", "supplementary_material": "", "author": "Fangchen Yu;Runze Zhao;Zhan Shi;Yiwen Lu;Jicong Fan;Yicheng Zeng;Jianfeng Mao;Wenye Li", "authorids": "~Fangchen_Yu1;~Runze_Zhao2;~Zhan_Shi5;~Yiwen_Lu2;~Jicong_Fan2;~Yicheng_Zeng1;~Jianfeng_Mao1;~Wenye_Li1", "gender": "M;M;M;;M;M;M;M", "homepage": "https://sciyu.github.io/;https://randulfzhao.github.io/;;;https://jicongfan.github.io/;http://sribd.cn/en/teacher/558;https://sds.cuhk.edu.cn/en/teacher/268;", "dblp": "305/0356;;;;139/1570;232/0043;;39/5505", "google_scholar": "fQtgwlgAAAAJ;GBkB8_QAAAAJ;uwJDpLMAAAAJ;;vdJsnhIAAAAJ;;https://scholar.google.com.hk/citations?user=cDzZKz8AAAAJ;", "orcid": "0000-0002-1256-2719;;;;0000-0001-9665-0355;;;", "linkedin": "fangchen-yu-614a98212/;;;https://www.linkedin.cn/incareer/in/ACoAAENI2mIB13MI0XRskPeyRAxnVG0TpIvvRkY;;;;", "or_profile": "~Fangchen_Yu1;~Runze_Zhao2;~Zhan_Shi5;~Yiwen_Lu2;~Jicong_Fan2;~Yicheng_Zeng1;~Jianfeng_Mao1;~Wenye_Li1", "aff": "vivo AI Lab;The Chinese University of Hongkong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hongkong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;Shenzhen Research Institute of Big Data;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "vivo.com;link.cuhk.edu.cn;cuhk.edu.cn;link.cuhk.edu.cn;cuhk.edu.cn;sribd.cn;cuhk.edu.cn;cuhk.edu.cn", "position": "Research internship;Undergrad student;Undergrad student;Undergrad student;Research Assistant Professor;Researcher;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nyu2023boosting,\ntitle={Boosting Spectral Clustering on Incomplete Data via Kernel Correction and Affinity Learning},\nauthor={Fangchen Yu and Runze Zhao and Zhan Shi and Yiwen Lu and Jicong Fan and Yicheng Zeng and Jianfeng Mao and Wenye Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xFtuNq23D5}\n}", "github": "", "project": "", "reviewers": "hJi1;7HKb;GE1q;7MjD", "pdf_size": 461487, "rating": "5;5;7;7", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "73;102;86;258", "wc_strengths": "50;44;45;105", "wc_weaknesses": "213;346;89;245", "wc_questions": "213;77;84;627", "wc_limitations": "213;62;10;3", "wc_review": "762;631;314;1238", "wc_reply_reviewers": "15;145;66;91", "wc_reply_authors": "30;498;48;517", "reply_reviewers": "1;2;1;2", "reply_authors": "2;3;2;3", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 129.75, 74.75418048510733 ], "wc_strengths_avg": [ 61.0, 25.5049014897137 ], "wc_weaknesses_avg": [ 223.25, 91.74523148371254 ], "wc_questions_avg": [ 250.25, 224.15549848263817 ], "wc_limitations_avg": [ 72.0, 84.53697415923993 ], "wc_review_avg": [ 736.25, 332.3359557736719 ], "wc_reply_reviewers_avg": [ 79.25, 46.81012176869443 ], "wc_reply_authors_avg": [ 273.25, 234.43269289926266 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7008342970708187525&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "vivo.com;link.cuhk.edu.cn;cuhk.edu.cn;link.cuhk.edu.cn;cuhk.edu.cn;sribd.cn;cuhk.edu.cn;cuhk.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;1;2;3;2;2", "aff_unique_norm": "vivo;Chinese University of Hong Kong, Shenzhen;Chinese University of Hong Kong;Shenzhen Research Institute of Big Data", "aff_unique_dep": "vivo AI Lab;;;", "aff_unique_url": "https://vivo.com;https://www.cuhk.edu.cn;https://www.cuhk.edu.cn;http://www.sribd.cn", "aff_unique_abbr": "vivo;CUHK;CUHK;", "aff_campus_unique_index": "1;1;1;1;1;1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "State2Explanation: Concept-Based Explanations to Benefit Agent Learning and User Understanding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70018", "id": "xGz0wAIJrS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d4387c37b3b06e55f86eccdb8cd1f829-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xGz0wAIJrS", "openreview": "https://openreview.net/forum?id=xGz0wAIJrS", "poster": "/media/PosterPDFs/NeurIPS%202023/70018.png?t=1701461577.166756", "slides": "https://nips.cc/virtual/2023/poster/70018", "video": "https://nips.cc/virtual/2023/poster/70018", "author_site": "Devleena Das, Sonia Chernova, Been Kim", "tldr": "", "abstract": "As more non-AI experts use complex AI systems for daily tasks, there has been an increasing effort to develop methods that produce explanations of AI decision making that are understandable by non-AI experts. Towards this effort, leveraging higher-level concepts and producing concept-based explanations have become a popular method. Most concept-based explanations have been developed for classification techniques, and we posit that the few existing methods for sequential decision making are limited in scope. In this work, we first contribute a desiderata for defining ``concepts'' in sequential decision making settings. Additionally, inspired by the Protege Effect which states explaining knowledge often reinforces one's self-learning, we explore how concept-based explanations of an RL agent's decision making can in turn improve the agent's learning rate, as well as improve end-user understanding of the agent's decision making. To this end, we contribute a unified framework, State2Explanation (S2E), that involves learning a joint embedding model between state-action pairs and concept-based explanations, and leveraging such learned model to both (1) inform reward shaping during an agent's training, and (2) provide explanations to end-users at deployment for improved task performance. Our experimental validations, in Connect 4 and Lunar Lander, demonstrate the success of S2E in providing a dual-benefit, successfully informing reward shaping and improving agent learning rate, as well as significantly improving end user task performance at deployment time.", "keywords": "Concept-Based Explanations;Reinforcement Learning;Human-AI Interaction", "primary_area": "", "supplementary_material": "", "author": "Devleena Das;Sonia Chernova;Been Kim", "authorids": "~Devleena_Das1;~Sonia_Chernova2;~Been_Kim1", "gender": ";F;", "homepage": "https://ddevleena.com/;https://www.cc.gatech.edu/~chernova/;https://beenkim.github.io/", "dblp": ";27/1140;https://dblp.uni-trier.de/pers/k/Kim:Been.html", "google_scholar": "xzt3VP0AAAAJ;EYo_WkEAAAAJ;", "orcid": ";0000-0001-6320-0825;", "linkedin": ";;", "or_profile": "~Devleena_Das1;~Sonia_Chernova2;~Been_Kim1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Google DeepMind", "aff_domain": "gatech.edu;gatech.edu;google.com", "position": "PhD student;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\ndas2023stateexplanation,\ntitle={State2Explanation: Concept-Based Explanations to Benefit Agent Learning and User Understanding},\nauthor={Devleena Das and Sonia Chernova and Been Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xGz0wAIJrS}\n}", "github": "", "project": "", "reviewers": "GSRV;smxt;g9Rv;ppHL", "pdf_size": 19733379, "rating": "4;6;7;7", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "2;3;4;4", "presentation": "2;3;4;3", "wc_summary": "66;106;79;60", "wc_strengths": "98;49;75;68", "wc_weaknesses": "510;60;134;132", "wc_questions": "164;34;40;61", "wc_limitations": "3;4;4;31", "wc_review": "841;253;332;352", "wc_reply_reviewers": "476;31;154;21", "wc_reply_authors": "1004;0;964;0", "reply_reviewers": "1;1;2;1", "reply_authors": "3;1;3;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 77.75, 17.69710428290459 ], "wc_strengths_avg": [ 72.5, 17.528548142958105 ], "wc_weaknesses_avg": [ 209.0, 176.32073048850495 ], "wc_questions_avg": [ 74.75, 52.49464258379135 ], "wc_limitations_avg": [ 10.5, 11.84271928232701 ], "wc_review_avg": [ 444.5, 231.8927553848977 ], "wc_reply_reviewers_avg": [ 170.5, 183.99252702215924 ], "wc_reply_authors_avg": [ 492.0, 492.2032100667366 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3025219504478123288&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "gatech.edu;gatech.edu;google.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Georgia Institute of Technology;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.gatech.edu;https://deepmind.com", "aff_unique_abbr": "Georgia Tech;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Towards Better Dynamic Graph Learning: New Architecture and Unified Library", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70017", "id": "xHNzWHbklj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d611019afba70d547bd595e8a4158f55-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xHNzWHbklj", "openreview": "https://openreview.net/forum?id=xHNzWHbklj", "poster": "/media/PosterPDFs/NeurIPS%202023/70017.png?t=1699792193.890505", "slides": "https://nips.cc/virtual/2023/poster/70017", "video": "https://nips.cc/virtual/2023/poster/70017", "author_site": "Le Yu, Leilei Sun, Bowen Du, Weifeng Lv", "tldr": "", "abstract": "We propose DyGFormer, a new Transformer-based architecture for dynamic graph learning. DyGFormer is conceptually simple and only needs to learn from nodes' historical first-hop interactions by: (1) a neighbor co-occurrence encoding scheme that explores the correlations of the source node and destination node based on their historical sequences; (2) a patching technique that divides each sequence into multiple patches and feeds them to Transformer, allowing the model to effectively and efficiently benefit from longer histories. We also introduce DyGLib, a unified library with standard training pipelines, extensible coding interfaces, and comprehensive evaluating protocols to promote reproducible, scalable, and credible dynamic graph learning research. By performing exhaustive experiments on thirteen datasets for dynamic link prediction and dynamic node classification tasks, we find that DyGFormer achieves state-of-the-art performance on most of the datasets, demonstrating its effectiveness in capturing nodes' correlations and long-term temporal dependencies. Moreover, some results of baselines are inconsistent with previous reports, which may be caused by their diverse but less rigorous implementations, showing the importance of DyGLib. All the used resources are publicly available at https://github.com/yule-BUAA/DyGLib.", "keywords": "dynamic graph learning;Transformer-based architecture;dynamic graph library", "primary_area": "", "supplementary_material": "/attachment/e7ca4c69e744ef2fee4848738d77971908bfa11b.pdf", "author": "Le Yu;Leilei Sun;Bowen Du;Weifeng Lv", "authorids": "~Le_Yu2;~Leilei_Sun2;~Bowen_Du2;~Weifeng_Lv1", "gender": "M;M;M;M", "homepage": "https://yule-buaa.github.io/;https://scse.buaa.edu.cn/info/1079/9207.htm;http://scse.buaa.edu.cn/info/1387/8141.htm;http://www.buaa.edu.cn", "dblp": "23/7122-4;152/1810.html;;15/2736.html", "google_scholar": "-h_ehVsAAAAJ;QVHvhM4AAAAJ;oEt7RiIAAAAJ;", "orcid": "0000-0002-4908-3199;0000-0002-0157-1716;0000-0003-0975-2367;0000-0003-0227-0891", "linkedin": ";;;", "or_profile": "~Le_Yu2;~Leilei_Sun2;~Bowen_Du2;~Weifeng_Lv1", "aff": "Beihang University;Beihang University;Beihang University;Beihang University", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "position": "PhD student;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyu2023towards,\ntitle={Towards Better Dynamic Graph Learning: New Architecture and Unified Library},\nauthor={Le Yu and Leilei Sun and Bowen Du and Weifeng Lv},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xHNzWHbklj}\n}", "github": "", "project": "", "reviewers": "eUNf;ErNY;QwYr;Ea5H;zX7A", "pdf_size": 852866, "rating": "4;5;7;7;8", "confidence": "4;4;3;3;5", "soundness": "3;3;3;4;3", "novelty": "2;3;4;4;3", "presentation": "1;3;3;4;4", "wc_summary": "181;85;54;45;88", "wc_strengths": "82;97;31;112;176", "wc_weaknesses": "141;717;74;83;118", "wc_questions": "68;160;13;37;6", "wc_limitations": "1;2;14;7;1", "wc_review": "473;1061;186;284;389", "wc_reply_reviewers": "49;0;24;11;27", "wc_reply_authors": "66;0;14;14;12", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 6.2, 1.469693845669907 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 1.0954451150103321 ], "wc_summary_avg": [ 90.6, 48.226963412597314 ], "wc_strengths_avg": [ 99.6, 46.932291655106724 ], "wc_weaknesses_avg": [ 226.6, 246.3823045593981 ], "wc_questions_avg": [ 56.8, 55.97642360851575 ], "wc_limitations_avg": [ 5.0, 5.019960159204453 ], "wc_review_avg": [ 478.6, 306.8332446134219 ], "wc_reply_reviewers_avg": [ 22.2, 16.50939126679115 ], "wc_reply_authors_avg": [ 21.2, 22.99913041834408 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.036369648372665424, "gs_citation": 128, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3772613827931800498&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Robust Bayesian Satisficing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70016", "id": "xINPCvgULc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/daa098aa8e1fc718943ff1ab7b5b30c9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xINPCvgULc", "openreview": "https://openreview.net/forum?id=xINPCvgULc", "poster": "/media/PosterPDFs/NeurIPS%202023/70016.png?t=1701286352.14207", "slides": "https://nips.cc/virtual/2023/poster/70016", "video": "https://nips.cc/virtual/2023/poster/70016", "author_site": "Artun Saday, Y. Cahit Y\u0131ld\u0131r\u0131m, Cem Tekin", "tldr": "", "abstract": "Distributional shifts pose a significant challenge to achieving robustness in contemporary machine learning. To overcome this challenge, robust satisficing (RS) seeks a robust solution to an unspecified distributional shift while achieving a utility above a desired threshold. This paper focuses on the problem of RS in contextual Bayesian optimization when there is a discrepancy between the true and reference distributions of the context. We propose a novel robust Bayesian satisficing algorithm called RoBOS for noisy black-box optimization. Our algorithm guarantees sublinear lenient regret under certain assumptions on the amount of distribution shift. In addition, we define a weaker notion of regret called robust satisficing regret, in which our algorithm achieves a sublinear upper bound independent of the amount of distribution shift. To demonstrate the effectiveness of our method, we apply it to various learning problems and compare it to other approaches, such as distributionally robust optimization.", "keywords": "robust satisficing;regret minimization;Gaussian processes", "primary_area": "", "supplementary_material": "", "author": "Artun Saday;Y. Cahit Y\u0131ld\u0131r\u0131m;Cem Tekin", "authorids": "~Artun_Saday1;~Y._Cahit_Y\u0131ld\u0131r\u0131m1;~Cem_Tekin2", "gender": "M;M;M", "homepage": ";https://github.com/cahity;http://kilyos.ee.bilkent.edu.tr/~cemtekin/", "dblp": "355/0000;;98/7659.html", "google_scholar": "SfahOaUAAAAJ;A_jDtPoAAAAJ;https://scholar.google.com/citations?hl=tr", "orcid": "0009-0005-6023-1454;;", "linkedin": "artun-saday-354b8b20a?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BI0F9Zm1PTwSOGLv3lwWAWw%3D%3D;;", "or_profile": "~Artun_Saday1;~Y._Cahit_Y\u0131ld\u0131r\u0131m1;~Cem_Tekin2", "aff": "Bilkent University;Bilkent University;Bilkent University", "aff_domain": "bilkent.edu.tr;bilkent.edu.tr;bilkent.edu.tr", "position": "MS student;MS student;Associate Professor", "bibtex": "@inproceedings{\nsaday2023robust,\ntitle={Robust Bayesian Satisficing},\nauthor={Artun Saday and Y. Cahit Y{\\i}ld{\\i}r{\\i}m and Cem Tekin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xINPCvgULc}\n}", "github": "", "project": "", "reviewers": "bA8n;KF6g;Gzxd;gjzH", "pdf_size": 608696, "rating": "5;6;6;6", "confidence": "4;3;3;3", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "2;2;3;3", "wc_summary": "55;72;140;241", "wc_strengths": "20;109;78;123", "wc_weaknesses": "91;348;205;154", "wc_questions": "317;9;235;21", "wc_limitations": "1;1;80;9", "wc_review": "484;539;738;548", "wc_reply_reviewers": "132;14;18;22", "wc_reply_authors": "446;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 127.0, 73.09924760214705 ], "wc_strengths_avg": [ 82.5, 39.588508433635134 ], "wc_weaknesses_avg": [ 199.5, 94.76945710512433 ], "wc_questions_avg": [ 145.5, 133.74883177059903 ], "wc_limitations_avg": [ 22.75, 33.214266513051285 ], "wc_review_avg": [ 577.25, 95.98795497352779 ], "wc_reply_reviewers_avg": [ 46.5, 49.44441323344833 ], "wc_reply_authors_avg": [ 111.5, 193.1236650439298 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8509694282767563001&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "bilkent.edu.tr;bilkent.edu.tr;bilkent.edu.tr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Bilkent University", "aff_unique_dep": "", "aff_unique_url": "https://www.bilkent.edu.tr", "aff_unique_abbr": "Bilkent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "T\u00fcrkiye" }, { "title": "Mathematical Capabilities of ChatGPT", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73421", "id": "xJ7YWXQOrg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/58168e8a92994655d6da3939e7cc0918-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=xJ7YWXQOrg", "openreview": "https://openreview.net/forum?id=xJ7YWXQOrg", "poster": "/media/PosterPDFs/NeurIPS%202023/73421.png?t=1702248513.815312", "slides": "https://nips.cc/virtual/2023/poster/73421", "video": "https://nips.cc/virtual/2023/poster/73421", "author_site": "Simon Frieder, Luca Pinchetti, Chevalier, Ryan-Rhys Griffiths, Tommaso Salvatori, Thomas Lukasiewicz, Philipp Petersen, Julius Berner", "tldr": "", "abstract": "We investigate the mathematical capabilities of two versions of ChatGPT (released 9-January-2023 and 30-January-2023) and of GPT-4 by testing them on publicly available datasets, as well as hand-crafted ones, using a novel evaluation scheme. In contrast to formal mathematics, where large databases of formal proofs are available (e.g., mathlib, the Lean Mathematical Library), current datasets of natural-language mathematics used to benchmark language models either cover only elementary mathematics or are very small. We address this by publicly releasing two new datasets: GHOSTS and miniGHOSTS. These are the first natural-language datasets curated by working researchers in mathematics that (1) aim to cover graduate-level mathematics, (2) provide a holistic overview of the mathematical capabilities of language models, and (3) distinguish multiple dimensions of mathematical reasoning. These datasets test, by using 1636 human expert evaluations, whether ChatGPT and GPT-4 can be helpful assistants to professional mathematicians by emulating use cases that arise in the daily professional activities of mathematicians. We benchmark the models on a range of fine-grained performance metrics. For advanced mathematics, this is the most detailed evaluation effort to date. We find that ChatGPT and GPT-4 can be used most successfully as mathematical assistants for querying facts, acting as mathematical search engines and knowledge base interfaces. GPT-4 can additionally be used for undergraduate-level mathematics but fails on graduate-level difficulty. Contrary to many positive reports in the media about GPT-4 and ChatGPT's exam-solving abilities (a potential case of selection bias), their overall mathematical performance is well below the level of a graduate student. Hence, if you aim to use ChatGPT to pass a graduate-level math exam, you would be better off copying from your average peer!", "keywords": "datasets;LLMs;ChatGPT;mathematical capabilities;evaluation;benchmarking", "primary_area": "", "supplementary_material": "", "author": "Simon Frieder;Luca Pinchetti;Alexis Chevalier;Ryan-Rhys Griffiths;Tommaso Salvatori;Thomas Lukasiewicz;Philipp Christian Petersen;Julius Berner", "authorids": "~Simon_Frieder1;~Luca_Pinchetti1;~Alexis_Chevalier1;~Ryan-Rhys_Griffiths1;~Tommaso_Salvatori1;~Thomas_Lukasiewicz2;~Philipp_Christian_Petersen1;~Julius_Berner1", "gender": ";M;Not Specified;M;M;;;M", "homepage": ";;https://www.ias.edu/scholars/alexis-chevalier;;https://www.cs.ox.ac.uk/people/tommaso.salvatori/;https://www.cs.ox.ac.uk/people/thomas.lukasiewicz/;http://www.pc-petersen.eu;https://jberner.info/", "dblp": ";;;241/7007;270/2016;l/ThomasLukasiewicz;180/7135;227/2217", "google_scholar": ";;;https://scholar.google.co.uk/citations?user=RBKs-lEAAAAJ;https://scholar.google.com/citations?hl=en;arjucpEAAAAJ;;73-D2jgAAAAJ", "orcid": ";;;0000-0003-3117-4559;;;;0000-0002-5648-648X", "linkedin": ";luca-pinchetti-414230222/;alexchvl;ryan-rhys-griffiths-689b73128/;;;;julius-berner/", "or_profile": "~Simon_Frieder1;~Luca_Pinchetti1;~Alexis_Chevalier1;~Ryan-Rhys_Griffiths1;~Tommaso_Salvatori1;~Thomas_Lukasiewicz2;~Philipp_Christian_Petersen1;~Julius_Berner1", "aff": ";Department of Computer Science, University of Oxford;Institue for Advanced Study, Princeton;Meta Facebook;VERSES;Department of Computer Science, University of Oxford;Universit\u00e4t Vienna;University of Vienna", "aff_domain": ";cs.ox.ac.uk;ias.edu;facebook.com;verses.ai;cs.ox.ac.uk;univie.ac.at;univie.ac.at", "position": ";PhD student;Postdoc;Postdoc;Researcher;Full Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nfrieder2023mathematical,\ntitle={Mathematical Capabilities of Chat{GPT}},\nauthor={Simon Frieder and Luca Pinchetti and Alexis Chevalier and Ryan-Rhys Griffiths and Tommaso Salvatori and Thomas Lukasiewicz and Philipp Christian Petersen and Julius Berner},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=xJ7YWXQOrg}\n}", "github": "", "project": "", "reviewers": "3gvx;7RM4;2boP;Sgwr;9g86", "pdf_size": 921237, "rating": "5;5;7;7;8", "confidence": "4;4;4;4;3", "wc_summary_and_contributions": "47;38;95;91;144", "wc_strengths": "54;76;149;76;189", "wc_improvement": "64;198;134;103;149", "wc_limitations": "43;43;138;1;208", "wc_correctness": "45;7;149;1;101", "wc_clarity": "1;14;106;1;136", "wc_relation_to_prior_work": "62;1;153;1;126", "wc_documentation": "56;1;164;1;167", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "373;379;1089;276;1221", "wc_reply_reviewers": "149;465;35;0;0", "wc_reply_authors": "1896;1391;1201;528;0", "reply_reviewers": "1;1;1;0;0", "reply_authors": "5;3;2;1;0", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 83.0, 38.07886552931954 ], "wc_strengths_avg": [ 108.8, 51.38637951831205 ], "wc_improvement_avg": [ 129.6, 44.911468468532625 ], "wc_limitations_avg": [ 86.6, 75.510529067144 ], "wc_correctness_avg": [ 60.6, 56.77182399747255 ], "wc_clarity_avg": [ 51.6, 57.649284470841444 ], "wc_relation_to_prior_work_avg": [ 68.6, 62.61181997035384 ], "wc_documentation_avg": [ 77.8, 74.37580251667877 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 667.6, 401.81070169919565 ], "wc_reply_reviewers_avg": [ 129.8, 176.30133295015102 ], "wc_reply_authors_avg": [ 1003.2, 666.1577590931446 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 1.7204650534085253 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.6666666666666666, "gs_citation": 660, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10481878331342547334&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 11, "email": ";cs.ox.ac.uk;ias.edu;facebook.com;verses.ai;cs.ox.ac.uk;univie.ac.at;univie.ac.at", "author_num": 8, "aff_unique_index": "0;1;2;0;4;4", "aff_unique_norm": "University of Oxford;Institute for Advanced Study;Meta;;University of Vienna", "aff_unique_dep": "Department of Computer Science;;Meta Platforms, Inc.;;", "aff_unique_url": "https://www.ox.ac.uk;https://wwwIAS.edu;https://meta.com;;https://univie.ac.at", "aff_unique_abbr": "Oxford;IAS;Meta;;UV", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Oxford;Princeton;", "aff_country_unique_index": "0;1;1;0;3;3", "aff_country_unique": "United Kingdom;United States;;Austria" }, { "title": "Knowledge-Augmented Reasoning Distillation for Small Language Models in Knowledge-Intensive Tasks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70015", "id": "xJLEQQrFia", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/97faedc90260eae5c400f92d5831c3d7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xJLEQQrFia", "openreview": "https://openreview.net/forum?id=xJLEQQrFia", "poster": "/media/PosterPDFs/NeurIPS%202023/70015.png?t=1702002600.6314964", "slides": "https://nips.cc/virtual/2023/poster/70015", "video": "https://nips.cc/virtual/2023/poster/70015", "author_site": "Minki Kang, Seanie Lee, Jinheon Baek, Kenji Kawaguchi, Sung Ju Hwang", "tldr": "", "abstract": "Large Language Models (LLMs) have shown promising performance in knowledge-intensive reasoning tasks that require a compound understanding of knowledge. \nHowever, deployment of the LLMs in real-world applications can be challenging due to their high computational requirements and concerns on data privacy.\nPrevious studies have focused on building task-specific small Language Models (LMs) by fine-tuning them with labeled data or distilling LLMs. However, these approaches are ill-suited for knowledge-intensive reasoning tasks due to the limited capacity of small LMs in memorizing the knowledge required.\nMotivated by our theoretical analysis on memorization, we propose Knowledge-Augmented Reasoning Distillation (KARD), a novel method that fine-tunes small LMs to generate rationales obtained from LLMs with augmented knowledge retrieved from an external knowledge base. Moreover, we further propose a neural reranker to obtain documents relevant to rationale generation. We empirically show that KARD significantly improves the performance of small T5 and GPT models on the challenging knowledge-intensive reasoning datasets, namely MedQA-USMLE, StrategyQA, and OpenbookQA.\nNotably, our method makes the 250M T5 models achieve superior performance against the fine-tuned 3B models, having 12 times larger parameters, on both MedQA-USMLE and StrategyQA benchmarks.", "keywords": "language model;distillation;reasoning;knowledge augmentation", "primary_area": "", "supplementary_material": "/attachment/ee5a88f644eae1ff9c1a42538db4a332b2bf9f04.zip", "author": "Minki Kang;Seanie Lee;Jinheon Baek;Kenji Kawaguchi;Sung Ju Hwang", "authorids": "~Minki_Kang1;~Seanie_Lee1;~Jinheon_Baek1;~Kenji_Kawaguchi1;~Sung_Ju_Hwang1", "gender": "M;M;M;;", "homepage": "https://nardien.github.io;https://seanie12.github.io/;https://jinheonbaek.github.io;https://ml.comp.nus.edu.sg/#members;", "dblp": "232/2406;219/6771;262/6003;;", "google_scholar": "90G751oAAAAJ;zrZu6GkAAAAJ;U1FHaSUAAAAJ;aLl3rYoAAAAJ;", "orcid": ";;0000-0002-9367-560X;;", "linkedin": ";;jinheon-baek-8100a8144/;;", "or_profile": "~Minki_Kang1;~Seanie_Lee1;~Jinheon_Baek1;~Kenji_Kawaguchi1;~Sung_Ju_Hwang1", "aff": "AITRICS;Apple;Microsoft Research;National University of Singapore;", "aff_domain": "aitrics.com;apple.com;microsoft.com;nus.edu;", "position": "Researcher;Intern;Intern;Presidential Young Professor;", "bibtex": "@inproceedings{\nkang2023knowledgeaugmented,\ntitle={Knowledge-Augmented Reasoning Distillation for Small Language Models in Knowledge-Intensive Tasks},\nauthor={Minki Kang and Seanie Lee and Jinheon Baek and Kenji Kawaguchi and Sung Ju Hwang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xJLEQQrFia}\n}", "github": "", "project": "", "reviewers": "RfCw;Z8sP;7NNq;AUtH;jTmB", "pdf_size": 913625, "rating": "4;6;6;7;7", "confidence": "4;5;3;5;4", "soundness": "2;3;3;3;4", "novelty": "2;3;3;3;2", "presentation": "3;4;3;4;4", "wc_summary": "35;160;162;67;288", "wc_strengths": "38;34;52;23;99", "wc_weaknesses": "73;172;57;77;172", "wc_questions": "38;56;44;41;24", "wc_limitations": "7;12;1;7;12", "wc_review": "191;434;316;215;595", "wc_reply_reviewers": "139;85;0;0;0", "wc_reply_authors": "938;72;0;0;0", "reply_reviewers": "1;1;0;0;0", "reply_authors": "4;2;1;1;1", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 142.4, 88.44568955014144 ], "wc_strengths_avg": [ 49.2, 26.573671180324332 ], "wc_weaknesses_avg": [ 110.2, 50.901473456079835 ], "wc_questions_avg": [ 40.6, 10.307278981380101 ], "wc_limitations_avg": [ 7.8, 4.069397989875161 ], "wc_review_avg": [ 350.2, 149.49434771923654 ], "wc_reply_reviewers_avg": [ 44.8, 57.46442377680298 ], "wc_reply_authors_avg": [ 202.0, 369.0550094498109 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 1.1661903789690604 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.24397501823713333, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17560684648528822927&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "aitrics.com;apple.com;microsoft.com;nus.edu;", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "AITRICS;Apple;Microsoft;National University of Singapore", "aff_unique_dep": ";Apple Inc.;Microsoft Research;", "aff_unique_url": "https://www.aitrics.com;https://www.apple.com;https://www.microsoft.com/en-us/research;https://www.nus.edu.sg", "aff_unique_abbr": "AITRICS;Apple;MSR;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "South Korea;United States;Singapore" }, { "title": "Validated Image Caption Rating Dataset", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73420", "id": "xKYtTmtyI2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c0b91f9a3587bf35287f41dba5d20233-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=xKYtTmtyI2", "openreview": "https://openreview.net/forum?id=xKYtTmtyI2", "poster": "/media/PosterPDFs/NeurIPS%202023/73420.png?t=1699591024.1012955", "slides": "https://nips.cc/virtual/2023/poster/73420", "video": "https://nips.cc/virtual/2023/poster/73420", "author_site": "Lothar D Narins, Andrew Scott, Aakash Gautam, Anagha Kulkarni, Mar Castanon, Benjamin Kao, Shasta Ihorn, Yue-Ting Siu, James M. Mason, Alexander Blum, Ilmi Yoon", "tldr": "", "abstract": "We present a new high-quality validated image caption rating (VICR) dataset. How well a caption fits an image can be difficult to assess due to the subjective nature of caption quality. How do we evaluate whether a caption is good? We generated a new dataset to help answer this question by using our new image caption rating system, which consists of a novel robust rating scale and gamified approach to gathering human ratings. We show that our approach is consistent and teachable. 113 participants were involved in generating the dataset, which is composed of 68,217 ratings among 15,646 image-caption pairs. Our new dataset has greater inter-rater agreement than the state of the art, and custom machine learning rating predictors that were trained on our dataset outperform previous metrics. We improve over Flickr8k-Expert in Kendall's $W$ by 12\\% and in Fleiss' $\\kappa$ by 19\\%, and thus provide a new benchmark dataset for image caption rating.", "keywords": "dataset;human-in-the-loop;image captioning;visually-impaired;multimodal learning", "primary_area": "", "supplementary_material": "/attachment/4c8593aa8a3f5b5648a64b0827ede48f5b2ffa9a.zip", "author": "Lothar Narins;Andrew T Scott;Aakash Gautam;Anagha Kulkarni;Mar Castanon;Benjamin Kao;Shasta Ihorn;Yue-Ting Siu;James M Mason;Alexander Mario Blum;Ilmi Yoon", "authorids": "~Lothar_Narins1;~Andrew_T_Scott1;~Aakash_Gautam1;~Anagha_Kulkarni2;~Mar_Castanon1;~Benjamin_Kao1;~Shasta_Ihorn1;~Yue-Ting_Siu1;~James_M_Mason1;~Alexander_Mario_Blum1;~Ilmi_Yoon1", "gender": "M;M;;Not Specified;;M;;;;;F", "homepage": ";;https://aakash.xyz/;https://faculty.sfsu.edu/~ak;https://github.com/jcastan6;;http://www.sfsu.edu;http://www.tplus.education/;;;https://cs.sfsu.edu/people/faculty/ilmi-yoon", "dblp": ";;;;;;;;;;", "google_scholar": ";;b3DcFdsAAAAJ;;;;;hUx1178AAAAJ;;6o5DYFUAAAAJ;8J0vqLUAAAAJ", "orcid": "0009-0004-4030-7186;0009-0009-1362-0413;;;;0009-0008-7907-9554;;;; 0000-0002-5887-7417;0000-0002-2418-5287", "linkedin": "lothar-narins/;andrew-taylor-scott/;;;;benjamin-kao/;;yuetingsiu/;;;ilmi-yoon-40a06bb/?lipi=urn%3Ali%3Apage%3Ad_flagship3_feed%3BRRChpd6BRwSawAU3UOzYuQ%3D%3D", "or_profile": "~Lothar_Narins1;~Andrew_T_Scott1;~Aakash_Gautam1;~Anagha_Kulkarni2;~Mar_Castanon1;~Benjamin_Kao1;~Shasta_Ihorn1;~Yue-Ting_Siu1;~James_M_Mason1;~Alexander_Mario_Blum1;~Ilmi_Yoon1", "aff": "San Francisco State University;;San Francisco State University;San Francisco State University;;San Francisco State University;San Francisco State University;San Francisco State University;;Stanford University;San Francisco State University", "aff_domain": "sfsu.edu;;sfsu.edu;sfsu.edu;;sfsu.edu;sfsu.edu;sfsu.edu;;stanford.edu;sfsu.edu", "position": "MS student;;Assistant Professor;Associate Professor;;MS student;Assistant Professor;Associate Professor;;Postdoc;Full Professor", "bibtex": "@inproceedings{\nnarins2023validated,\ntitle={Validated Image Caption Rating Dataset},\nauthor={Lothar Narins and Andrew T Scott and Aakash Gautam and Anagha Kulkarni and Mar Castanon and Benjamin Kao and Shasta Ihorn and Yue-Ting Siu and James M Mason and Alexander Mario Blum and Ilmi Yoon},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=xKYtTmtyI2}\n}", "github": "", "project": "", "reviewers": "Rnsb;Qb6S;ktRk;CvUE", "pdf_size": 4250025, "rating": "6;7;7;7", "confidence": "4;4;4;4", "wc_summary_and_contributions": "110;77;68;95", "wc_strengths": "127;61;70;51", "wc_improvement": "325;182;166;35", "wc_limitations": "168;24;9;88", "wc_correctness": "9;26;12;6", "wc_clarity": "10;1;4;80", "wc_relation_to_prior_work": "25;11;1;22", "wc_documentation": "1;10;4;23", "wc_additional_feedback": "1;1;1;1", "wc_review": "776;393;335;401", "wc_reply_reviewers": "0;154;20;58", "wc_reply_authors": "712;539;390;785", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 87.5, 16.224980739587952 ], "wc_strengths_avg": [ 77.25, 29.498940658945703 ], "wc_improvement_avg": [ 177.0, 102.73022924144577 ], "wc_limitations_avg": [ 72.25, 62.739042868057844 ], "wc_correctness_avg": [ 13.25, 7.660776723022281 ], "wc_clarity_avg": [ 23.75, 32.63721035873011 ], "wc_relation_to_prior_work_avg": [ 14.75, 9.496709956611289 ], "wc_documentation_avg": [ 9.5, 8.440971508067067 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 476.25, 174.92480527358035 ], "wc_reply_reviewers_avg": [ 58.0, 59.21148537234985 ], "wc_reply_authors_avg": [ 606.5, 153.64000130174432 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15256336923176888192&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "sfsu.edu;;sfsu.edu;sfsu.edu;;sfsu.edu;sfsu.edu;sfsu.edu;;stanford.edu;sfsu.edu", "author_num": 11, "aff_unique_index": "0;0;0;0;0;0;1;0", "aff_unique_norm": "San Francisco State University;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sfsu.edu;https://www.stanford.edu", "aff_unique_abbr": "SFSU;Stanford", "aff_campus_unique_index": "0;0;0;0;0;0;1;0", "aff_campus_unique": "San Francisco;Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Hierarchical Multi-Agent Skill Discovery", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70014", "id": "xMgO04HDOS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c276c3303c0723c83a43b95a44a1fcbf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xMgO04HDOS", "openreview": "https://openreview.net/forum?id=xMgO04HDOS", "poster": "/media/PosterPDFs/NeurIPS%202023/70014.png?t=1700228094.3811097", "slides": "https://nips.cc/virtual/2023/poster/70014", "video": "https://nips.cc/virtual/2023/poster/70014", "author_site": "Mingyu Yang, Yaodong Yang, Zhenbo Lu, Wengang Zhou, Houqiang Li", "tldr": "", "abstract": "Skill discovery has shown significant progress in unsupervised reinforcement learning. This approach enables the discovery of a wide range of skills without any extrinsic reward, which can be effectively combined to tackle complex tasks. However, such unsupervised skill learning has not been well applied to multi-agent reinforcement learning (MARL) due to two primary challenges. One is how to learn skills not only for the individual agents but also for the entire team, and the other is how to coordinate the skills of different agents to accomplish multi-agent tasks. To address these challenges, we present Hierarchical Multi-Agent Skill Discovery (HMASD), a two-level hierarchical algorithm for discovering both team and individual skills in MARL. The high-level policy employs a transformer structure to realize sequential skill assignment, while the low-level policy learns to discover valuable team and individual skills. We evaluate HMASD on sparse reward multi-agent benchmarks, and the results show that HMASD achieves significant performance improvements compared to strong MARL baselines.", "keywords": "Multi-Agent Reinforcement Learning;Hierarchical Skill Discovery;Probabilistic Graphical Model", "primary_area": "", "supplementary_material": "/attachment/5d805927131cc9514e3c719763a7d8d493043963.zip", "author": "Mingyu Yang;Yaodong Yang;Zhenbo Lu;Wengang Zhou;Houqiang Li", "authorids": "~Mingyu_Yang1;~Yaodong_Yang1;~Zhenbo_Lu1;~Wengang_Zhou1;~Houqiang_Li1", "gender": "M;M;M;M;M", "homepage": ";https://www.yangyaodong.com;;http://staff.ustc.edu.cn/~zhwg/index.html;https://staff.ustc.edu.cn/~lihq/", "dblp": ";170/1496-1;42/501;22/4544-1;59/7017.html", "google_scholar": ";https://scholar.google.co.uk/citations?user=6yL0xw8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;8s1JF8YAAAAJ;7sFMIKoAAAAJ", "orcid": "0000-0002-6077-6711;0000-0001-8132-5613;;0000-0003-1690-9836;0000-0003-2188-3028", "linkedin": ";yaodong-yang;;;", "or_profile": "~Mingyu_Yang1;~Yaodong_Yang1;~Zhenbo_Lu1;~Wengang_Zhou1;~Houqiang_Li1", "aff": "University of Science and Technology of China;Peking University;Institute of Artificial Intelligence, Hefei Comprehensive National Science Center;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;pku.edu.cn;iai.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "MS student;Assistant Professor;Researcher;Full Professor;Professor", "bibtex": "@inproceedings{\nyang2023hierarchical,\ntitle={Hierarchical Multi-Agent Skill Discovery},\nauthor={Mingyu Yang and Yaodong Yang and Zhenbo Lu and Wengang Zhou and Houqiang Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xMgO04HDOS}\n}", "github": "", "project": "", "reviewers": "HRuC;Jm7Q;Ty5M;7A65;skAV", "pdf_size": 1773360, "rating": "5;5;6;6;6", "confidence": "5;2;4;3;3", "soundness": "3;2;3;3;3", "novelty": "3;2;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "82;105;72;80;93", "wc_strengths": "34;12;54;73;133", "wc_weaknesses": "138;52;93;55;44", "wc_questions": "40;239;243;89;56", "wc_limitations": "14;12;6;38;30", "wc_review": "308;420;468;335;356", "wc_reply_reviewers": "0;80;35;43;0", "wc_reply_authors": "0;27;0;0;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 86.4, 11.46472851837321 ], "wc_strengths_avg": [ 61.2, 41.24754538151331 ], "wc_weaknesses_avg": [ 76.4, 35.13744441475504 ], "wc_questions_avg": [ 133.4, 89.27396036919164 ], "wc_limitations_avg": [ 20.0, 12.0 ], "wc_review_avg": [ 377.4, 58.4725576659684 ], "wc_reply_reviewers_avg": [ 31.6, 29.937267744401794 ], "wc_reply_authors_avg": [ 5.4, 10.8 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.0800640769025436, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5526762885853756175&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ustc.edu.cn;pku.edu.cn;iai.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Science and Technology of China;Peking University;Hefei Comprehensive National Science Center", "aff_unique_dep": ";;Institute of Artificial Intelligence", "aff_unique_url": "http://www.ustc.edu.cn;http://www.pku.edu.cn;http://www.hfcn.edu.cn", "aff_unique_abbr": "USTC;Peking U;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hefei", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Optimal Algorithms for the Inhomogeneous Spiked Wigner Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70013", "id": "xNUmTRYtV1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f0a6b46b0183a62a2db973014e3429f4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xNUmTRYtV1", "openreview": "https://openreview.net/forum?id=xNUmTRYtV1", "poster": "/media/PosterPDFs/NeurIPS%202023/70013.png?t=1701989492.323679", "slides": "https://nips.cc/virtual/2023/poster/70013", "video": "https://nips.cc/virtual/2023/poster/70013", "author_site": "Aleksandr Pak, Justin Ko, Justin Ko, Florent Krzakala", "tldr": "", "abstract": "We study a spiked Wigner problem with an inhomogeneous noise profile. Our aim in this problem is to recover the signal passed through an inhomogeneous low-rank matrix channel. While the information-theoretic performances are well-known, we focus on the algorithmic problem. First, we derive an approximate message-passing algorithm (AMP) for the inhomogeneous problem and show that its rigorous state evolution coincides with the information-theoretic optimal Bayes fixed-point equations. Second, we deduce a simple and efficient spectral method that outperforms PCA and is shown to match the information-theoretic transition.", "keywords": "Spectral Method;Community detection;Wigner Spike model;Random Matrix;BBP transition;Approximate Message Passing;Spin glasses;Statistical Physics", "primary_area": "", "supplementary_material": "/attachment/cb2f398f21e42b4b50850ef698d98cd74b80f2a5.pdf", "author": "Alexander Pak;Justin Ko;Florent Krzakala", "authorids": "aleksandr.pak@ens-lyon.fr;justin.ko@ens-lyon.fr;~Florent_Krzakala1", "gender": ";;", "homepage": ";;http://Krzakala.org", "dblp": ";;25/1282", "google_scholar": ";;https://scholar.google.fr/citations?user=3jDeUlMAAAAJ", "orcid": ";;0000-0003-2313-2578", "linkedin": ";;", "or_profile": "aleksandr.pak@ens-lyon.fr;justin.ko@ens-lyon.fr;~Florent_Krzakala1", "aff": ";;Swiss Federal Institute of Technology Lausanne", "aff_domain": ";;epfl.ch", "position": ";;Full Professor", "bibtex": "@inproceedings{\npak2023optimal,\ntitle={Optimal Algorithms for the Inhomogeneous Spiked Wigner Model},\nauthor={Alexander Pak and Justin Ko and Florent Krzakala},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xNUmTRYtV1}\n}", "github": "", "project": "", "reviewers": "qRVY;ifZx;R9KJ;kH5e", "pdf_size": 412332, "rating": "5;6;6;7", "confidence": "4;4;4;2", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "93;179;152;114", "wc_strengths": "75;86;219;42", "wc_weaknesses": "378;253;282;114", "wc_questions": "415;307;271;51", "wc_limitations": "1;4;24;9", "wc_review": "962;829;948;330", "wc_reply_reviewers": "281;79;0;17", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 134.5, 33.27536626394967 ], "wc_strengths_avg": [ 105.5, 67.5 ], "wc_weaknesses_avg": [ 256.75, 94.51289594547403 ], "wc_questions_avg": [ 261.0, 132.31779925618474 ], "wc_limitations_avg": [ 9.5, 8.845903006477066 ], "wc_review_avg": [ 767.25, 257.68136816619085 ], "wc_reply_reviewers_avg": [ 94.25, 111.75727045700427 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8444762749569951569&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";;epfl.ch", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0", "aff_country_unique": "Switzerland" }, { "title": "RL-based Stateful Neural Adaptive Sampling and Denoising for Real-Time Path Tracing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70012", "id": "xNyR7DXUzJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d1422213c9f2bdd5178b77d166fba86a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xNyR7DXUzJ", "openreview": "https://openreview.net/forum?id=xNyR7DXUzJ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70012", "video": "https://nips.cc/virtual/2023/poster/70012", "author_site": "Antoine Scardigli, Lukas Cavigelli, Lorenz K. M\u00fcller", "tldr": "", "abstract": "Monte-Carlo path tracing is a powerful technique for realistic image synthesis but suffers from high levels of noise at low sample counts, limiting its use in real-time applications. To address this, we propose a framework with end-to-end training of a sampling importance network, a latent space encoder network, and a denoiser network. Our approach uses reinforcement learning to optimize the sampling importance network, thus avoiding explicit numerically approximated gradients. Our method does not aggregate the sampled values per pixel by averaging but keeps all sampled values which are then fed into the latent space encoder. The encoder replaces handcrafted spatiotemporal heuristics by learned representations in a latent space. Finally, a neural denoiser is trained to refine the output image. Our approach increases visual quality on several challenging datasets and reduces rendering times for equal quality by a factor of 1.6x compared to the previous state-of-the-art, making it a promising solution for real-time applications.", "keywords": "computer graphics;rendering;ray tracing;GPU acceleration;RL;spatiotemporal latent space", "primary_area": "", "supplementary_material": "/attachment/15b93abd5874a18f73513ea42c9dd1772b4eefd1.zip", "author": "Antoine Scardigli;Lukas Cavigelli;Lorenz K Muller", "authorids": "~Antoine_Scardigli1;~Lukas_Cavigelli1;~Lorenz_K_Muller1", "gender": "M;M;M", "homepage": ";;", "dblp": "321/1677;137/9406;139/1372", "google_scholar": "https://scholar.google.com/citations?hl=fr;https://scholar.google.ch/citations?user=15o2H4cAAAAJ;https://scholar.google.ch/citations?user=DxppwfcAAAAJ", "orcid": ";0000-0003-1767-7715;", "linkedin": "antoine-scardigli/;lcavigelli/;", "or_profile": "~Antoine_Scardigli1;~Lukas_Cavigelli1;~Lorenz_K_Muller1", "aff": "Huawei Technologies Ltd.;Huawei Technologies;Huawei Technologies Ltd.", "aff_domain": "huawei.com;huawei.com;huawei.com", "position": "Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nscardigli2023rlbased,\ntitle={{RL}-based Stateful Neural Adaptive Sampling and Denoising for Real-Time Path Tracing},\nauthor={Antoine Scardigli and Lukas Cavigelli and Lorenz K Muller},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xNyR7DXUzJ}\n}", "github": "", "project": "", "reviewers": "YEyS;1MpK;WTeK;CLYz", "pdf_size": 20605491, "rating": "5;5;5;6", "confidence": "3;2;1;3", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;3;2;3", "wc_summary": "80;44;174;66", "wc_strengths": "24;23;63;83", "wc_weaknesses": "127;132;169;105", "wc_questions": "6;3;47;117", "wc_limitations": "39;10;17;15", "wc_review": "276;212;470;386", "wc_reply_reviewers": "34;0;0;104", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 91.0, 49.60846701924985 ], "wc_strengths_avg": [ 48.25, 25.74271741677634 ], "wc_weaknesses_avg": [ 133.25, 23.004075725836064 ], "wc_questions_avg": [ 43.25, 45.99116763031789 ], "wc_limitations_avg": [ 20.25, 11.121488209767612 ], "wc_review_avg": [ 336.0, 99.2874614440313 ], "wc_reply_reviewers_avg": [ 34.5, 42.45880356298326 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:tq7t3u0uEqgJ:scholar.google.com/&scioq=RL-based+Stateful+Neural+Adaptive+Sampling+and+Denoising+for+Real-Time+Path+Tracing&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "huawei.com;huawei.com;huawei.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Proximity-Informed Calibration for Deep Neural Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70011", "id": "xOJUmwwlJc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d826f5aadb26db488b8686097ceea2d1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xOJUmwwlJc", "openreview": "https://openreview.net/forum?id=xOJUmwwlJc", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70011", "video": "https://nips.cc/virtual/2023/poster/70011", "author_site": "Miao Xiong, Ailin Deng, Pang Wei Koh, Jiaying Wu, Shen Li, Jianqing Xu, Bryan Hooi", "tldr": "", "abstract": "Confidence calibration is central to providing accurate and interpretable uncertainty estimates, especially under safety-critical scenarios. However, we find that existing calibration algorithms often overlook the issue of proximity bias, a phenomenon where models tend to be more overconfident in low proximity data (i.e., data lying in the sparse region of the data distribution) compared to high proximity samples, and thus suffer from inconsistent miscalibration across different proximity samples. We examine the problem over $504$ pretrained ImageNet models and observe that: 1) Proximity bias exists across a wide variety of model architectures and sizes; 2) Transformer-based models are relatively more susceptible to proximity bias than CNN-based models; 3) Proximity bias persists even after performing popular calibration algorithms like temperature scaling; 4) Models tend to overfit more heavily on low proximity samples than on high proximity samples. Motivated by the empirical findings, we propose ProCal, a plug-and-play algorithm with a theoretical guarantee to adjust sample confidence based on proximity. To further quantify the effectiveness of calibration algorithms in mitigating proximity bias, we introduce proximity-informed expected calibration error (PIECE) with theoretical analysis. We show that ProCal is effective in addressing proximity bias and improving calibration on balanced, long-tail, and distribution-shift settings under four metrics over various model architectures. We believe our findings on proximity bias will guide the development of fairer and better-calibrated} models, contributing to the broader pursuit of trustworthy AI.", "keywords": "Calibration;Uncertainty Estimation;Trustworthiness;Fairness;Multicalibration", "primary_area": "", "supplementary_material": "", "author": "Miao Xiong;Ailin Deng;Pang Wei Koh;Jiaying Wu;Shen Li;Jianqing Xu;Bryan Hooi", "authorids": "~Miao_Xiong2;~Ailin_Deng1;~Pang_Wei_Koh1;~Jiaying_Wu2;~Shen_Li2;~Jianqing_Xu1;~Bryan_Hooi1", "gender": "F;;M;;M;;", "homepage": "https://miaoxiong2320.github.io/;https://d-ailin.github.io;http://cs.stanford.edu/~pangwei;https://jiayingwu19.github.io/;https://github.com/MathsShen;;http://bhooi.github.io", "dblp": ";70/3580;10/10453;15/4299.html;;;169/9975", "google_scholar": "yQ4U_5IAAAAJ;;Nn990CkAAAAJ;mrfO62wAAAAJ;;;", "orcid": ";;;;;;0000-0002-5645-1754", "linkedin": "miao-xiong-9b1892187/;;;;;;", "or_profile": "~Miao_Xiong2;~Ailin_Deng1;~Pang_Wei_Koh1;~Jiaying_Wu2;~Shen_Li2;~Jianqing_Xu1;~Bryan_Hooi1", "aff": "National University of Singapore;National University of Singapore;Google;National University of Singapore;National University of Singapore;;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu.sg;google.com;u.nus.edu;u.nus.edu;;nus.edu.sg", "position": "PhD student;PhD student;Researcher;PhD student;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nxiong2023proximityinformed,\ntitle={Proximity-Informed Calibration for Deep Neural Networks},\nauthor={Miao Xiong and Ailin Deng and Pang Wei Koh and Jiaying Wu and Shen Li and Jianqing Xu and Bryan Hooi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xOJUmwwlJc}\n}", "github": "", "project": "", "reviewers": "4HP9;Wiui;4gc8;1D75", "pdf_size": 4444023, "rating": "6;6;7;7", "confidence": "3;3;4;5", "soundness": "3;3;3;4", "novelty": "3;3;4;3", "presentation": "4;3;3;3", "wc_summary": "115;90;65;93", "wc_strengths": "136;79;127;203", "wc_weaknesses": "237;69;152;3", "wc_questions": "137;43;10;176", "wc_limitations": "5;12;24;51", "wc_review": "630;293;378;526", "wc_reply_reviewers": "120;17;28;203", "wc_reply_authors": "375;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 90.75, 17.725334975678173 ], "wc_strengths_avg": [ 136.25, 44.21184795956849 ], "wc_weaknesses_avg": [ 115.25, 87.91011034005133 ], "wc_questions_avg": [ 91.5, 67.46295279633111 ], "wc_limitations_avg": [ 23.0, 17.53567791675018 ], "wc_review_avg": [ 456.75, 130.21784631915858 ], "wc_reply_reviewers_avg": [ 92.0, 75.54137938904744 ], "wc_reply_authors_avg": [ 93.75, 162.37976320958225 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2986519151636511100&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "u.nus.edu;nus.edu.sg;google.com;u.nus.edu;u.nus.edu;;nus.edu.sg", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "National University of Singapore;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.nus.edu.sg;https://www.google.com", "aff_unique_abbr": "NUS;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "Singapore;United States" }, { "title": "CrossGNN: Confronting Noisy Multivariate Time Series Via Cross Interaction Refinement", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70010", "id": "xOzlW2vUYc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9278abf072b58caf21d48dd670b4c721-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xOzlW2vUYc", "openreview": "https://openreview.net/forum?id=xOzlW2vUYc", "poster": "/media/PosterPDFs/NeurIPS%202023/70010.png?t=1699609377.5908034", "slides": "https://nips.cc/virtual/2023/poster/70010", "video": "https://nips.cc/virtual/2023/poster/70010", "author_site": "Qihe Huang, Lei Shen, Ruixin Zhang, Shouhong Ding, Binwu Wang, Zhengyang Zhou, Yang Wang", "tldr": "", "abstract": "Recently, multivariate time series (MTS) forecasting techniques have seen rapid development and widespread applications across various fields. Transformer-based and GNN-based methods have shown promising potential due to their strong ability to model interaction of time and variables. However, by conducting a comprehensive analysis of the real-world data, we observe that the temporal fluctuations and heterogeneity between variables are not well handled by existing methods. To address the above issues, we propose CrossGNN, a linear complexity GNN model to refine the cross-scale and cross-variable interaction for MTS. To deal with the unexpected noise in time dimension, an adaptive multi-scale identifier (AMSI) is leveraged to construct multi-scale time series with reduced noise. A Cross-Scale GNN is proposed to extract the scales with clearer trend and weaker noise. Cross-Variable GNN is proposed to utilize the homogeneity and heterogeneity between different variables. By simultaneously focusing on edges with higher saliency scores and constraining those edges with lower scores, the time and space complexity (i.e., $O(L)$) of CrossGNN can be linear with the input sequence length $L$. Extensive experimental results on 8 real-world MTS datasets demonstrate the effectiveness of CrossGNN compared with state-of-the-art methods.", "keywords": "Time Series Forecasting;", "primary_area": "", "supplementary_material": "/attachment/3f356fb24464064c64e28b932aa45f1ce448faba.pdf", "author": "Qihe Huang;Lei Shen;Ruixin Zhang;Shouhong Ding;Binwu Wang;Zhengyang Zhou;Yang Wang", "authorids": "~Qihe_Huang2;~Lei_Shen5;~Ruixin_Zhang1;~Shouhong_Ding3;~Binwu_Wang1;~Zhengyang_Zhou1;~Yang_Wang32", "gender": "M;M;M;M;M;M;M", "homepage": ";;;;https://continualgoing.github.io/;http://home.ustc.edu.cn/~zzy0929/Home/;http://staff.ustc.edu.cn/~angyan/", "dblp": ";;;119/6735;262/4302;246/8238;", "google_scholar": ";Rp4s7vIAAAAJ;DsGKajoAAAAJ;OGf40fkAAAAJ;;dPElQLUAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0001-8960-6583;;0000-0002-2463-4839;0000-0002-3175-3553;0000-0002-4638-0382;0000-0003-4728-7347;0000-0002-6079-7053", "linkedin": ";;;;;;", "or_profile": "~Qihe_Huang2;~Lei_Shen5;~Ruixin_Zhang1;~Shouhong_Ding3;~Binwu_Wang1;~Zhengyang_Zhou1;~Yang_Wang32", "aff": "University of Science and Technology of China;Tencent Youtu Lab;Tencent Youtu Lab;Tencent Youtu Lab;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;tencent.com;tencent.com;tencent.com;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;Researcher;Researcher;researcher;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nhuang2023crossgnn,\ntitle={Cross{GNN}: Confronting Noisy Multivariate Time Series Via Cross Interaction Refinement},\nauthor={Qihe Huang and Lei Shen and Ruixin Zhang and Shouhong Ding and Binwu Wang and Zhengyang Zhou and Yang Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xOzlW2vUYc}\n}", "github": "", "project": "", "reviewers": "MKp9;F3ix;LSa5;zRqD", "pdf_size": 4574684, "rating": "5;5;6;6", "confidence": "3;5;4;4", "soundness": "2;2;3;2", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "47;70;127;328", "wc_strengths": "35;34;51;60", "wc_weaknesses": "115;113;142;151", "wc_questions": "28;19;146;84", "wc_limitations": "10;1;5;7", "wc_review": "235;237;471;630", "wc_reply_reviewers": "175;91;205;14", "wc_reply_authors": "947;1583;191;73", "reply_reviewers": "2;2;1;1", "reply_authors": "4;5;2;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 143.0, 110.70907821854539 ], "wc_strengths_avg": [ 45.0, 10.977249200050075 ], "wc_weaknesses_avg": [ 130.25, 16.57369904396722 ], "wc_questions_avg": [ 69.25, 50.82998622860329 ], "wc_limitations_avg": [ 5.75, 3.2691742076555053 ], "wc_review_avg": [ 393.25, 166.9975673475515 ], "wc_reply_reviewers_avg": [ 121.25, 74.70065260759105 ], "wc_reply_authors_avg": [ 698.5, 610.9212306017855 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16787403634150538631&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 5, "email": "ustc.edu.cn;tencent.com;tencent.com;tencent.com;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;1;0;0;0", "aff_unique_norm": "University of Science and Technology of China;Tencent", "aff_unique_dep": ";Youtu Lab", "aff_unique_url": "http://www.ustc.edu.cn;https://www.tencent.com", "aff_unique_abbr": "USTC;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning DAGs from Data with Few Root Causes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70009", "id": "xPLaXSuSvQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/367ab3106d990825d5b47ce91db75a73-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xPLaXSuSvQ", "openreview": "https://openreview.net/forum?id=xPLaXSuSvQ", "poster": "/media/PosterPDFs/NeurIPS%202023/70009.png?t=1702100092.05645", "slides": "https://nips.cc/virtual/2023/poster/70009", "video": "https://nips.cc/virtual/2023/poster/70009", "author_site": "Panagiotis Misiakos, Chris Wendler, Markus P\u00fcschel", "tldr": "", "abstract": "We present a novel perspective and algorithm for learning directed acyclic graphs (DAGs) from data generated by a linear structural equation model (SEM). First, we show that a linear SEM can be viewed as a linear transform that, in prior work, computes the data from a dense input vector of random valued root causes (as we will call them) associated with the nodes. Instead, we consider the case of (approximately) few root causes and also introduce noise in the measurement of the data. Intuitively, this means that the DAG data is produced by few data generating events whose effect percolates through the DAG. We prove identifiability in this new setting and show that the true DAG is the global minimizer of the $L^0$-norm of the vector of root causes. For data satisfying the few root causes assumption, we show superior performance compared to prior DAG learning methods.", "keywords": "directed acyclic graph;few root causes;structural equation models;linear SEMs;additive noise", "primary_area": "", "supplementary_material": "", "author": "Panagiotis Misiakos;Chris Wendler;Markus P\u00fcschel", "authorids": "~Panagiotis_Misiakos1;~Chris_Wendler1;~Markus_P\u00fcschel1", "gender": "M;M;M", "homepage": "https://acl.inf.ethz.ch/people/panosm/;https://wendlerc.github.io/;https://acl.inf.ethz.ch/", "dblp": "270/4194;248/7764;37/6355", "google_scholar": "PlqKbB4AAAAJ;https://scholar.google.com/citations?hl=en;az9ZryAAAAAJ", "orcid": ";;0000-0001-8834-8551", "linkedin": ";;", "or_profile": "~Panagiotis_Misiakos1;~Chris_Wendler1;~Markus_P\u00fcschel1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;Swiss Federal Institute of Technology;Department of Computer Science, ETHZ - ETH Zurich", "aff_domain": "inf.ethz.ch;ethz.ch;inf.ethz.ch", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nmisiakos2023learning,\ntitle={Learning {DAG}s from Data with Few Root Causes},\nauthor={Panagiotis Misiakos and Chris Wendler and Markus P{\\\"u}schel},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xPLaXSuSvQ}\n}", "github": "", "project": "", "reviewers": "STdi;NZKD;Aevs;LucQ", "pdf_size": 866581, "rating": "5;5;6;6", "confidence": "3;2;4;3", "soundness": "3;3;3;2", "novelty": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "92;60;54;71", "wc_strengths": "43;41;39;20", "wc_weaknesses": "151;85;83;39", "wc_questions": "203;84;21;356", "wc_limitations": "19;6;1;39", "wc_review": "508;276;198;525", "wc_reply_reviewers": "15;61;38;64", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 69.25, 14.48059045757458 ], "wc_strengths_avg": [ 35.75, 9.202581159652981 ], "wc_weaknesses_avg": [ 89.5, 39.98437194704951 ], "wc_questions_avg": [ 166.0, 127.68907549199344 ], "wc_limitations_avg": [ 16.25, 14.686303142724515 ], "wc_review_avg": [ 376.75, 142.57169249188283 ], "wc_reply_reviewers_avg": [ 44.5, 19.78004044485248 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3903044112421939569&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "inf.ethz.ch;ethz.ch;inf.ethz.ch", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Stability of Random Forests and Coverage of Random-Forest Prediction Intervals", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70008", "id": "xPqINp0Eu1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6452474601429509f3035dc81c233226-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xPqINp0Eu1", "openreview": "https://openreview.net/forum?id=xPqINp0Eu1", "poster": "/media/PosterPDFs/NeurIPS%202023/70008.png?t=1699585041.8880944", "slides": "https://nips.cc/virtual/2023/poster/70008", "video": "https://nips.cc/virtual/2023/poster/70008", "author_site": "Yan Wang, Huaiqing Wu, Dan Nettleton", "tldr": "", "abstract": "We establish stability of random forests under the mild condition that the squared response ($Y^2$) does not have a heavy tail. In particular, our analysis holds for the practical version of random forests that is implemented in popular packages like \\texttt{randomForest} in \\texttt{R}. Empirical results show that stability may persist even beyond our assumption and hold for heavy-tailed $Y^2$. Using the stability property, we prove a non-asymptotic lower bound for the coverage probability of prediction intervals constructed from the out-of-bag error of random forests. With another mild condition that is typically satisfied when $Y$ is continuous, we also establish a complementary upper bound, which can be similarly established for the jackknife prediction interval constructed from an arbitrary stable algorithm. We also discuss the asymptotic coverage probability under assumptions weaker than those considered in previous literature. Our work implies that random forests, with its stability property, is an effective machine learning method that can provide not only satisfactory point prediction but also justified interval prediction at almost no extra computational cost.", "keywords": "Stability;Prediction Intervals;Random Forests", "primary_area": "", "supplementary_material": "/attachment/95acabcc1866abf9a81a109ebda96e3ad527464e.zip", "author": "Yan Wang;Huaiqing Wu;Dan Nettleton", "authorids": "~Yan_Wang14;~Huaiqing_Wu1;~Dan_Nettleton1", "gender": "M;Not Specified;M", "homepage": ";https://www.stat.iastate.edu/people/huaiqing-wu;https://dnett.github.io", "dblp": ";;", "google_scholar": "https://scholar.google.com/citations?hl=en;;5TdAL2cAAAAJ", "orcid": "0000-0002-8937-3000;;0000-0002-6045-1036", "linkedin": ";;dan-nettleton-77761512b/", "or_profile": "~Yan_Wang14;~Huaiqing_Wu1;~Daniel_Nettleton1", "aff": "Iowa State University;Iowa State University;Iowa State University", "aff_domain": "iastate.edu;iastate.edu;iastate.edu", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023stability,\ntitle={Stability of Random Forests and Coverage of Random-Forest Prediction Intervals},\nauthor={Yan Wang and Huaiqing Wu and Dan Nettleton},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xPqINp0Eu1}\n}", "github": "", "project": "", "reviewers": "BF9X;sZkt;mxxw;1UHc", "pdf_size": 359132, "rating": "2;6;7;8", "confidence": "5;4;4;3", "soundness": "2;3;4;4", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "138;112;315;78", "wc_strengths": "37;21;46;45", "wc_weaknesses": "308;39;138;227", "wc_questions": "239;36;296;5", "wc_limitations": "6;2;15;6", "wc_review": "728;210;810;361", "wc_reply_reviewers": "0;0;16;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 2.277608394786075 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 160.75, 91.56247866893949 ], "wc_strengths_avg": [ 37.25, 10.0093706095838 ], "wc_weaknesses_avg": [ 178.0, 100.27711603351983 ], "wc_questions_avg": [ 144.0, 125.61249937804756 ], "wc_limitations_avg": [ 7.25, 4.763139720814412 ], "wc_review_avg": [ 527.25, 249.26629836381812 ], "wc_reply_reviewers_avg": [ 7.25, 7.327175444876422 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9313806308475995, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2036427521831279583&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "iastate.edu;iastate.edu;iastate.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Iowa State University", "aff_unique_dep": "", "aff_unique_url": "https://www.iastate.edu", "aff_unique_abbr": "ISU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The Tunnel Effect: Building Data Representations in Deep Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70007", "id": "xQOHOpe1Fv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f249db9ab5975586f36df46f8958c008-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xQOHOpe1Fv", "openreview": "https://openreview.net/forum?id=xQOHOpe1Fv", "poster": "/media/PosterPDFs/NeurIPS%202023/70007.png?t=1702050510.1085675", "slides": "https://nips.cc/virtual/2023/poster/70007", "video": "https://nips.cc/virtual/2023/poster/70007", "author_site": "Wojciech Masarczyk, Mateusz Ostaszewski, Ehsan Imani, Razvan Pascanu, Piotr Mi\u0142o\u015b, Tomasz Trzcinski", "tldr": "", "abstract": "Deep neural networks are widely known for their remarkable effectiveness across various tasks, with the consensus that deeper networks implicitly learn more complex data representations. This paper shows that sufficiently deep networks trained for supervised image classification split into two distinct parts that contribute to the resulting data representations differently. The initial layers create linearly-separable representations, while the subsequent layers, which we refer to as \\textit{the tunnel}, compress these representations and have a minimal impact on the overall performance. We explore the tunnel's behavior through comprehensive empirical studies, highlighting that it emerges early in the training process. Its depth depends on the relation between the network's capacity and task complexity. Furthermore, we show that the tunnel degrades out-of-distribution generalization and discuss its implications for continual learning.", "keywords": "representation learning;continual learning;training dynamics", "primary_area": "", "supplementary_material": "/attachment/9442171a3a81e0dd57a37654805d9a99ae6302e0.pdf", "author": "Wojciech Masarczyk;Mateusz Ostaszewski;Ehsan Imani;Razvan Pascanu;Piotr Mi\u0142o\u015b;Tomasz Trzcinski", "authorids": "~Wojciech_Masarczyk1;~Mateusz_Ostaszewski1;~Ehsan_Imani1;~Razvan_Pascanu1;~Piotr_Mi\u0142o\u015b1;~Tomasz_Trzcinski2", "gender": "M;;M;M;;M", "homepage": ";;;https://razp.info;;https://cvlab.ii.pw.edu.pl/ttrzcins/", "dblp": "248/8352;;210/3130;65/8368.html;208/0989.html;05/11408", "google_scholar": "WBbTd80AAAAJ;;;https://scholar.google.ca/citations?user=eSPY8LwAAAAJ;Se68XecAAAAJ;https://scholar.google.pl/citations?user=bJMRBFoAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;piotr-milos-4b02151/;", "or_profile": "~Wojciech_Masarczyk1;~Mateusz_Ostaszewski1;~Ehsan_Imani1;~Razvan_Pascanu1;~Piotr_Mi\u0142o\u015b1;~Tomasz_Trzcinski2", "aff": "Warsaw University of Technology;;University of Alberta;Google DeepMind;IDEAS NCBR;", "aff_domain": "pw.edu.pl;;ualberta.ca;google.com;ideas-ncbr.pl;", "position": "PhD student;;PhD student;Research Scientist;Researcher;", "bibtex": "@inproceedings{\nmasarczyk2023the,\ntitle={The Tunnel Effect: Building Data Representations in Deep Neural Networks},\nauthor={Wojciech Masarczyk and Mateusz Ostaszewski and Ehsan Imani and Razvan Pascanu and Piotr Mi{\\l}o{\\'s} and Tomasz Trzcinski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xQOHOpe1Fv}\n}", "github": "", "project": "", "reviewers": "dmw5;Kpac;xLcw;EeeK", "pdf_size": 2597196, "rating": "3;6;6;7", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "1;2;3;4", "presentation": "2;3;3;4", "wc_summary": "66;94;40;81", "wc_strengths": "46;59;47;80", "wc_weaknesses": "395;127;136;159", "wc_questions": "3;209;53;56", "wc_limitations": "1;72;9;57", "wc_review": "511;561;285;433", "wc_reply_reviewers": "503;63;36;41", "wc_reply_authors": "751;24;13;11", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 70.25, 20.07952937695503 ], "wc_strengths_avg": [ 58.0, 13.693063937629153 ], "wc_weaknesses_avg": [ 204.25, 110.74604959094478 ], "wc_questions_avg": [ 80.25, 77.25728121025227 ], "wc_limitations_avg": [ 34.75, 30.35107082130711 ], "wc_review_avg": [ 447.5, 104.32041986111827 ], "wc_reply_reviewers_avg": [ 160.75, 197.8590091454013 ], "wc_reply_authors_avg": [ 199.75, 318.3028235815699 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3379266175479589705&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "pw.edu.pl;;ualberta.ca;google.com;ideas-ncbr.pl;", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Warsaw University of Technology;University of Alberta;Google;Institute for Development, Economic Analysis, and Simulation (IDEAS)", "aff_unique_dep": ";;Google DeepMind;", "aff_unique_url": "https://www.pw.edu.pl;https://www.ualberta.ca;https://deepmind.com;https://www.ideas-ncbr.gov.pl", "aff_unique_abbr": "WUT;UAlberta;DeepMind;IDEAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Poland;Canada;United Kingdom" }, { "title": "Robust Model Reasoning and Fitting via Dual Sparsity Pursuit", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70006", "id": "xRfTcZdQxq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e1de63ec74f40d3234c4e053f3528e18-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xRfTcZdQxq", "openreview": "https://openreview.net/forum?id=xRfTcZdQxq", "poster": "/media/PosterPDFs/NeurIPS%202023/70006.png?t=1699540758.7814963", "slides": "https://nips.cc/virtual/2023/poster/70006", "video": "https://nips.cc/virtual/2023/poster/70006", "author_site": "Xingyu Jiang, Jiayi Ma", "tldr": "", "abstract": "In this paper, we contribute to solving a threefold problem: outlier rejection, true model reasoning and parameter estimation with a unified optimization modeling. To this end, we first pose this task as a sparse subspace recovering problem, to search a maximum of independent bases under an over-embedded data space. Then we convert the objective into a continuous optimization paradigm that estimates sparse solutions for both bases and errors. Wherein a fast and robust solver is proposed to accurately estimate the sparse subspace parameters and error entries, which is implemented by a proximal approximation method under the alternating optimization framework with the ``optimal'' sub-gradient descent. Extensive experiments regarding known and unknown model fitting on synthetic and challenging real datasets have demonstrated the superiority of our method against the state-of-the-art. We also apply our method to multi-class multi-model fitting and loop closure detection, and achieve promising results both in accuracy and efficiency. Code is released at: https://github.com/StaRainJ/DSP.", "keywords": "Model reasoning; Model fitting; Outliers; Sparse subspace learning; Feature matching", "primary_area": "", "supplementary_material": "/attachment/8c7a7e9e2df706422d543a072327bbfaa45c7cc6.zip", "author": "Xingyu Jiang;Jiayi Ma", "authorids": "~Xingyu_Jiang1;~Jiayi_Ma2", "gender": "M;M", "homepage": ";https://sites.google.com/site/jiayima2013/home", "dblp": ";96/9989", "google_scholar": "https://scholar.google.com.hk/citations?user=h2W90MQAAAAJ;73trMQkAAAAJ", "orcid": "0000-0001-9790-8856;0000-0003-3264-3265", "linkedin": ";", "or_profile": "~Xingyu_Jiang1;~Jiayi_Ma2", "aff": "Huazhong University of Science and Technology;Wuhan University", "aff_domain": "hust.edu.cn;whu.edu.cn", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\njiang2023robust,\ntitle={Robust Model Reasoning and Fitting via Dual Sparsity Pursuit},\nauthor={Xingyu Jiang and Jiayi Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xRfTcZdQxq}\n}", "github": "", "project": "", "reviewers": "WgUj;cPqq;1N35;bMPj;qrzv", "pdf_size": 0, "rating": "6;6;7;8;8", "confidence": "2;4;5;4;5", "soundness": "2;3;4;4;3", "novelty": "3;2;4;4;4", "presentation": "2;3;4;4;3", "wc_summary": "94;133;100;258;105", "wc_strengths": "23;57;56;49;111", "wc_weaknesses": "48;178;214;30;222", "wc_questions": "62;45;4;4;60", "wc_limitations": "56;7;20;1;12", "wc_review": "283;420;394;342;510", "wc_reply_reviewers": "22;16;67;14;61", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 0.8944271909999159 ], "confidence_avg": [ 4.0, 1.0954451150103321 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.4, 0.8 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 138.0, 61.471944820381275 ], "wc_strengths_avg": [ 59.2, 28.680306832389366 ], "wc_weaknesses_avg": [ 138.4, 82.69848849888369 ], "wc_questions_avg": [ 35.0, 25.984610830258745 ], "wc_limitations_avg": [ 19.2, 19.425756098540926 ], "wc_review_avg": [ 389.8, 76.24801636764067 ], "wc_reply_reviewers_avg": [ 36.0, 23.09112383579457 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6123724356957946, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2231756110487726602&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 5, "email": "hust.edu.cn;whu.edu.cn", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Huazhong University of Science and Technology;Wuhan University", "aff_unique_dep": ";", "aff_unique_url": "http://www.hust.edu.cn;http://www.whu.edu.cn/", "aff_unique_abbr": "HUST;WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Act As You Wish: Fine-Grained Control of Motion Diffusion Model with Hierarchical Semantic Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70005", "id": "xSEhb2j3TK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/31fc85f7461ce71eadf27fb7281973bd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xSEhb2j3TK", "openreview": "https://openreview.net/forum?id=xSEhb2j3TK", "poster": "/media/PosterPDFs/NeurIPS%202023/70005.png?t=1701487278.6179018", "slides": "https://nips.cc/virtual/2023/poster/70005", "video": "https://nips.cc/virtual/2023/poster/70005", "author_site": "Peng Jin, Yang Wu, Yanbo Fan, Zhongqian Sun, Wei Yang, Li Yuan", "tldr": "", "abstract": "Most text-driven human motion generation methods employ sequential modeling approaches, e.g., transformer, to extract sentence-level text representations automatically and implicitly for human motion synthesis. However, these compact text representations may overemphasize the action names at the expense of other important properties and lack fine-grained details to guide the synthesis of subtly distinct motion. In this paper, we propose hierarchical semantic graphs for fine-grained control over motion generation. Specifically, we disentangle motion descriptions into hierarchical semantic graphs including three levels of motions, actions, and specifics. Such global-to-local structures facilitate a comprehensive understanding of motion description and fine-grained control of motion generation. Correspondingly, to leverage the coarse-to-fine topology of hierarchical semantic graphs, we decompose the text-to-motion diffusion process into three semantic levels, which correspond to capturing the overall motion, local actions, and action specifics. Extensive experiments on two benchmark human motion datasets, including HumanML3D and KIT, with superior performances, justify the efficacy of our method. More encouragingly, by modifying the edge weights of hierarchical semantic graphs, our method can continuously refine the generated motion, which may have a far-reaching impact on the community. Code and pre-trained weights are available at https://github.com/jpthu17/GraphMotion.", "keywords": "Text-driven Motion Synthesis;Diffusion Models;Graph networks", "primary_area": "", "supplementary_material": "/attachment/025abcb86f17ef6fecf55a41855c0baf21e65eb3.pdf", "author": "Peng Jin;Yang Wu;Yanbo Fan;Zhongqian Sun;Yang Wei;Li Yuan", "authorids": "~Peng_Jin4;~Yang_Wu1;~Yanbo_Fan1;~Zhongqian_Sun1;~Yang_Wei2;~Li_Yuan2", "gender": "M;M;M;M;M;M", "homepage": ";;https://sites.google.com/site/yanbofan0124/;;;https://yuanli2333.github.io/", "dblp": "83/6151-1.html;56/1428-1;181/4574;70/8500;03/1094-32.html;98/4583-7", "google_scholar": "HHXLexAAAAAJ;https://scholar.google.com.hk/citations?user=vwOQ-UIAAAAJ;OlOqHyUAAAAJ;;;-5juAR0AAAAJ", "orcid": "0000-0001-9287-6410;;0000-0002-8530-485X;;;0000-0002-2120-5588", "linkedin": ";;;;;", "or_profile": "~Peng_Jin4;~Yang_Wu1;~Yanbo_Fan1;~Zhongqian_Sun1;~Yang_Wei2;~Yuan_LI2", "aff": "Peking University;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Peking University", "aff_domain": "pku.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;pku.edu.cn", "position": "PhD student;Principal Researcher;Associate Professor;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\njin2023act,\ntitle={Act As You Wish: Fine-Grained Control of Motion Diffusion Model with Hierarchical Semantic Graphs},\nauthor={Peng Jin and Yang Wu and Yanbo Fan and Zhongqian Sun and Yang Wei and Li Yuan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xSEhb2j3TK}\n}", "github": "", "project": "", "reviewers": "xtyr;E76p;Xx1r;DRsM", "pdf_size": 3843425, "rating": "5;6;6;6", "confidence": "3;4;4;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "54;63;51;69", "wc_strengths": "40;79;41;104", "wc_weaknesses": "122;76;182;68", "wc_questions": "55;87;4;54", "wc_limitations": "9;28;43;5", "wc_review": "280;333;321;300", "wc_reply_reviewers": "14;24;98;17", "wc_reply_authors": "104;121;947;101", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;4;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 59.25, 7.1545440106270926 ], "wc_strengths_avg": [ 66.0, 26.99073915253156 ], "wc_weaknesses_avg": [ 112.0, 45.36518488885502 ], "wc_questions_avg": [ 50.0, 29.690065678607045 ], "wc_limitations_avg": [ 21.25, 15.270478054075452 ], "wc_review_avg": [ 308.5, 20.254629100529094 ], "wc_reply_reviewers_avg": [ 38.25, 34.686993239541536 ], "wc_reply_authors_avg": [ 318.25, 363.0890903070485 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1514901826856417805&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;1;1;0", "aff_unique_norm": "Peking University;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "http://www.pku.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "Peking U;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "GSLB: The Graph Structure Learning Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73419", "id": "xT3i5GS3zU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/60bc87f3cf5257579435d92ec12c761b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=xT3i5GS3zU", "openreview": "https://openreview.net/forum?id=xT3i5GS3zU", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73419", "video": "https://nips.cc/virtual/2023/poster/73419", "author_site": "Zhixun Li, Liang Wang, Xin Sun, Yifan Luo, Yanqiao Zhu, Dingshuo Chen, Yingtao Luo, Xiangxin Zhou, Qiang Liu, Shu Wu, Liang Wang, Jeffrey Yu", "tldr": "", "abstract": "Graph Structure Learning (GSL) has recently garnered considerable attention due to its ability to optimize both the parameters of Graph Neural Networks (GNNs) and the computation graph structure simultaneously. Despite the proliferation of GSL methods developed in recent years, there is no standard experimental setting or fair comparison for performance evaluation, which creates a great obstacle to understanding the progress in this field. To fill this gap, we systematically analyze the performance of GSL in different scenarios and develop a comprehensive Graph Structure Learning Benchmark (GSLB) curated from 20 diverse graph datasets and 16 distinct GSL algorithms. Specifically, GSLB systematically investigates the characteristics of GSL in terms of three dimensions: effectiveness, robustness, and complexity. We comprehensively evaluate state-of-the-art GSL algorithms in node- and graph-level tasks, and analyze their performance in robust learning and model complexity. Further, to facilitate reproducible research, we have developed an easy-to-use library for training, evaluating, and visualizing different GSL methods. Empirical results of our extensive experiments demonstrate the ability of GSL and reveal its potential benefits on various downstream tasks, offering insights and opportunities for future research. The code of GSLB is available at: https://github.com/GSL-Benchmark/GSLB.", "keywords": "Graph Neural Networks;Graph Structure Learning;Data Mining", "primary_area": "", "supplementary_material": "/attachment/608d741471259cb2c2ad907aa2a642b1079cc6a7.pdf", "author": "Zhixun Li;Liang Wang;Xin Sun;Yifan Luo;Yanqiao Zhu;Dingshuo Chen;Yingtao Luo;Xiangxin Zhou;Qiang Liu;Shu Wu;Liang Wang;Jeffrey Xu Yu", "authorids": "~Zhixun_Li1;~Liang_Wang16;~Xin_Sun9;~Yifan_Luo2;~Yanqiao_Zhu1;~Dingshuo_Chen1;~Yingtao_Luo1;~Xiangxin_Zhou1;~Qiang_Liu8;~Shu_Wu1;~Liang_Wang3;~Jeffrey_Xu_Yu1", "gender": "M;M;M;M;M;M;;Not Specified;M;M;M;M", "homepage": ";https://azureleon1.github.io/;https://sunxin000.github.io/;https://luobots.github.io/;https://sxkdz.github.io;;https://yingtaoluo.github.io/;;https://john-qiangliu.tech/;http://www.shuwu.name;;http://www.se.cuhk.edu.hk/people/yu.html", "dblp": ";56/4499-56;;;67/8383-1;289/7535;278/2956;247/9275;61/3234-6;06/3577;56/4499-1;y/JXuYu", "google_scholar": ";PdzDZdgAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;NBbJT3AAAAAJ;jvrhEfIAAAAJ;g_MmNEoAAAAJ;eQgIWcQAAAAJ;https://scholar.google.co.jp/citations?user=D-lKLcMAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.tw/citations?user=iHevumsAAAAJ", "orcid": "0000-0001-6750-9002;0000-0002-4714-7582;;;0000-0003-2205-5304;;0000-0003-1794-3657;;0000-0002-9233-3827;0000-0003-2164-3577;;", "linkedin": ";;;yifan-luo-4674862b7/;;;;;;;;", "or_profile": "~Zhixun_Li1;~Liang_Wang16;~Xin_Sun9;~Yifan_Luo2;~Yanqiao_Zhu1;~Dingshuo_Chen1;~Yingtao_Luo1;~Xiangxin_Zhou1;~Qiang_Liu8;~Shu_Wu1;~Liang_Wang3;~Jeffrey_Xu_Yu1", "aff": "The Chinese University of Hong Kong;Institute of Automation, Chinese Academy of Sciences;University of Science and Technology of China;Beijing University of Posts and Telecommunications;University of California, Los Angeles;Institute of automation, Chinese Academy of Sciences;Carnegie Mellon University;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation\uff0c CAS\uff0cChina;The Chinese University of Hong Kong", "aff_domain": "se.cuhk.edu.hk;ia.ac.cn;ustc.edu.cn;bupt.edu.cn;ucla.edu;ia.ac.cn;andrew.cmu.edu;ia.ac.cn;nlpr.ia.ac.cn;ia.ac.cn;ia.ac.cn;cuhk.edu.hk", "position": "PhD student;PhD student;MS student;Undergrad student;PhD student;PhD student;PhD student;PhD student;Associate Professor;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2023gslb,\ntitle={{GSLB}: The Graph Structure Learning Benchmark},\nauthor={Zhixun Li and Liang Wang and Xin Sun and Yifan Luo and Yanqiao Zhu and Dingshuo Chen and Yingtao Luo and Xiangxin Zhou and Qiang Liu and Shu Wu and Liang Wang and Jeffrey Xu Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=xT3i5GS3zU}\n}", "github": "", "project": "", "reviewers": "h3AR;KDj1;QMnH;ee9D;wra6", "pdf_size": 530936, "rating": "6;6;6;7;7", "confidence": "3;4;3;3;4", "wc_summary_and_contributions": "148;58;29;84;56", "wc_strengths": "27;49;39;94;66", "wc_improvement": "149;83;70;72;154", "wc_limitations": "1;1;2;1;38", "wc_correctness": "24;71;1;15;14", "wc_clarity": "108;5;1;5;10", "wc_relation_to_prior_work": "20;14;1;15;65", "wc_documentation": "16;21;1;33;15", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "494;303;145;320;419", "wc_reply_reviewers": "13;106;0;0;0", "wc_reply_authors": "760;1362;237;204;876", "reply_reviewers": "1;1;0;0;0", "reply_authors": "1;3;2;1;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 75.0, 40.43760625952036 ], "wc_strengths_avg": [ 55.0, 23.31523107327054 ], "wc_improvement_avg": [ 105.6, 37.77088826066975 ], "wc_limitations_avg": [ 8.6, 14.705101155721437 ], "wc_correctness_avg": [ 25.0, 24.14125100321025 ], "wc_clarity_avg": [ 25.8, 41.19902911477405 ], "wc_relation_to_prior_work_avg": [ 23.0, 21.91802910847597 ], "wc_documentation_avg": [ 17.2, 10.322790320451153 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 336.2, 118.04981999139177 ], "wc_reply_reviewers_avg": [ 23.8, 41.40724574274411 ], "wc_reply_authors_avg": [ 687.8, 431.84738044823195 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.16666666666666666, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8357664871366066241&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "se.cuhk.edu.hk;ia.ac.cn;ustc.edu.cn;bupt.edu.cn;ucla.edu;ia.ac.cn;andrew.cmu.edu;ia.ac.cn;nlpr.ia.ac.cn;ia.ac.cn;ia.ac.cn;cuhk.edu.hk", "author_num": 12, "aff_unique_index": "0;1;2;3;4;1;5;1;1;1;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Chinese Academy of Sciences;University of Science and Technology of China;Beijing University of Posts and Telecommunications;University of California, Los Angeles;Carnegie Mellon University", "aff_unique_dep": ";Institute of Automation;;;;", "aff_unique_url": "https://www.cuhk.edu.hk;http://www.ia.cas.cn;http://www.ustc.edu.cn;http://www.bupt.edu.cn/;https://www.ucla.edu;https://www.cmu.edu", "aff_unique_abbr": "CUHK;CAS;USTC;BUPT;UCLA;CMU", "aff_campus_unique_index": "0;2;3;0", "aff_campus_unique": "Hong Kong SAR;;Beijing;Los Angeles", "aff_country_unique_index": "0;0;0;0;1;0;1;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Compact Neural Volumetric Video Representations with Dynamic Codebooks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70004", "id": "xTgM7XLN9P", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ef63b00ad8475605b2eaf520747f61d4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xTgM7XLN9P", "openreview": "https://openreview.net/forum?id=xTgM7XLN9P", "poster": "/media/PosterPDFs/NeurIPS%202023/70004.png?t=1702193339.444763", "slides": "https://nips.cc/virtual/2023/poster/70004", "video": "https://nips.cc/virtual/2023/poster/70004", "author_site": "Haoyu Guo, Sida Peng, Yunzhi Yan, Linzhan Mou, Yujun Shen, Hujun Bao, Xiaowei Zhou", "tldr": "", "abstract": "This paper addresses the challenge of representing high-fidelity volumetric videos with low storage cost. Some recent feature grid-based methods have shown superior performance of fast learning implicit neural representations from input 2D images. However, such explicit representations easily lead to large model sizes when modeling dynamic scenes. To solve this problem, our key idea is reducing the spatial and temporal redundancy of feature grids, which intrinsically exist due to the self-similarity of scenes. To this end, we propose a novel neural representation, named dynamic codebook, which first merges similar features for the model compression and then compensates for the potential decline in rendering quality by a set of dynamic codes. Experiments on the NHR and DyNeRF datasets demonstrate that the proposed approach achieves state-of-the-art rendering quality, while being able to achieve more storage efficiency. The source code is available at https://github.com/zju3dv/compact_vv.", "keywords": "Computer Vision;3D Vision;Volumetric Video", "primary_area": "", "supplementary_material": "/attachment/f2d8f2da8a19b3c80eefec8cb9ecc722cd5eed8c.pdf", "author": "Haoyu Guo;Sida Peng;Yunzhi Yan;Linzhan Mou;Yujun Shen;Hujun Bao;Xiaowei Zhou", "authorids": "~Haoyu_Guo1;~Sida_Peng1;~Yunzhi_Yan1;~Linzhan_Mou1;~Yujun_Shen1;~Hujun_Bao1;~Xiaowei_Zhou3", "gender": "M;M;M;;;M;M", "homepage": "https://github.com/ghy0324;http://pengsida.net/;https://github.com/yunzhiy;;;http://www.cad.zju.edu.cn/home/bao/;http://xzhou.me/", "dblp": "277/3724;232/3246;152/4375;;;b/HujunBao;", "google_scholar": "56yI64J5Ot8C;;_kvj3i8AAAAJ;;;AZCcDmsAAAAJ;E1vVpg4AAAAJ", "orcid": ";;;;;0000-0002-2662-0334;", "linkedin": ";;;;;;", "or_profile": "~Haoyu_Guo1;~Sida_Peng1;~Yunzhi_Yan1;~Linzhan_Mou1;~Yujun_Shen1;~Hujun_Bao1;~Xiaowei_Zhou3", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;;;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;;;zju.edu.cn;zju.edu.cn", "position": "PhD student;PhD student;Undergrad student;;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nguo2023compact,\ntitle={Compact Neural Volumetric Video Representations with Dynamic Codebooks},\nauthor={Haoyu Guo and Sida Peng and Yunzhi Yan and Linzhan Mou and Yujun Shen and Hujun Bao and Xiaowei Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xTgM7XLN9P}\n}", "github": "", "project": "", "reviewers": "uPwZ;ZNi1;Jvh8;u1Fc;bWLa", "pdf_size": 4425824, "rating": "3;4;6;7;7", "confidence": "5;4;4;4;4", "soundness": "3;3;3;3;4", "novelty": "2;2;3;3;4", "presentation": "3;3;3;3;3", "wc_summary": "34;32;141;159;75", "wc_strengths": "37;41;42;15;77", "wc_weaknesses": "85;151;23;13;81", "wc_questions": "29;28;228;70;166", "wc_limitations": "36;1;22;5;2", "wc_review": "221;253;456;262;401", "wc_reply_reviewers": "531;90;11;0;38", "wc_reply_authors": "562;104;21;0;13", "reply_reviewers": "3;1;1;0;1", "reply_authors": "4;2;2;1;2", "rating_avg": [ 5.4, 1.624807680927192 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.2, 53.048656910425166 ], "wc_strengths_avg": [ 42.4, 19.89572818471342 ], "wc_weaknesses_avg": [ 70.6, 49.725647305992915 ], "wc_questions_avg": [ 104.2, 79.72051178962663 ], "wc_limitations_avg": [ 13.2, 13.702554506368513 ], "wc_review_avg": [ 318.6, 92.41341893902639 ], "wc_reply_reviewers_avg": [ 134.0, 200.92087995029286 ], "wc_reply_authors_avg": [ 140.0, 214.1354711391833 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7385489458759966, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8211977012082862844&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;;;zju.edu.cn;zju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Rethinking Incentives in Recommender Systems: Are Monotone Rewards Always Beneficial?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70003", "id": "xUyBP16Q5J", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ebfabf372037aaa4a8d92c9b457ece3e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xUyBP16Q5J", "openreview": "https://openreview.net/forum?id=xUyBP16Q5J", "poster": "/media/PosterPDFs/NeurIPS%202023/70003.png?t=1701667105.1238296", "slides": "https://nips.cc/virtual/2023/poster/70003", "video": "https://nips.cc/virtual/2023/poster/70003", "author_site": "Fan Yao, Chuanhao Li, Karthik Abinav Sankararaman, Yiming Liao, Yan Zhu, Qifan Wang, Hongning Wang, Haifeng Xu", "tldr": "", "abstract": "The past decade has witnessed the flourishing of a new profession as media content creators, who rely on revenue streams from online content recommendation platforms. The reward mechanism employed by these platforms creates a competitive environment among creators which affects their production choices and, consequently, content distribution and system welfare. It is thus crucial to design the platform's reward mechanism in order to steer the creators' competition towards a desirable welfare outcome in the long run. This work makes two major contributions in this regard: first, we uncover a fundamental limit about a class of widely adopted mechanisms, coined \\emph{Merit-based Monotone Mechanisms}, by showing that they inevitably lead to a constant fraction loss of the optimal welfare. To circumvent this limitation, we introduce \\emph{Backward Rewarding Mechanisms} (BRMs) and show that the competition game resultant from BRMs possesses a potential game structure. BRMs thus naturally induce strategic creators' collective behaviors towards optimizing the potential function, which can be designed to match any given welfare metric. In addition, the class of BRM can be parameterized so that it allows the platform to directly optimize welfare within the feasible mechanism space even when the welfare metric is not explicitly defined.", "keywords": "Recommender system;Mechanism design;Potential function;Optimization", "primary_area": "", "supplementary_material": "", "author": "Fan Yao;Chuanhao Li;Karthik Abinav Sankararaman;Yiming Liao;Yan Zhu;Qifan Wang;Hongning Wang;Haifeng Xu", "authorids": "~Fan_Yao2;~Chuanhao_Li1;~Karthik_Abinav_Sankararaman1;~Yiming_Liao1;~Yan_Zhu1;~Qifan_Wang2;~Hongning_Wang1;~Haifeng_Xu1", "gender": "M;;M;M;M;M;M;M", "homepage": "https://github.com/MarcusYF/MarcusYF.github.io;https://cyrilli.github.io/;http://karthikabinavs.xyz;;;https://wqfcr.github.io/;http://www.cs.virginia.edu/~hw5x/;http://www.haifeng-xu.com/", "dblp": ";195/9947;154/4666;203/0243.html;;33/8610;05/6545;04/1895", "google_scholar": "Vb4MZPMAAAAJ;w2ShljkAAAAJ;uJ-Dhj4AAAAJ;https://scholar.google.com/citations?hl=en;uH5WA4oAAAAJ;LrSyLosAAAAJ;qkdvKNoAAAAJ;nLgg388AAAAJ", "orcid": "0009-0006-4764-4198;;;;;0000-0002-7570-5756;0000-0002-6524-9195;", "linkedin": ";;;yiming-liao-0382359a;;;;", "or_profile": "~Fan_Yao2;~Chuanhao_Li1;~Karthik_Abinav_Sankararaman1;~Yiming_Liao1;~Yan_Zhu1;~Qifan_Wang2;~Hongning_Wang1;~Haifeng_Xu1", "aff": "Meta Facebook;University of Virginia;Meta Facebook;Meta;Meta Facebook;Meta AI;University of Virginia;University of Chicago", "aff_domain": "meta.com;virginia.edu;fb.com;meta.com;fb.com;fb.com;virginia.edu;cs.uchicago.edu", "position": "Intern;PhD student;Research Scientist;Research Scientist;Research Scientist;Principal Researcher;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nyao2023rethinking,\ntitle={Rethinking Incentives in Recommender Systems: Are Monotone Rewards Always Beneficial?},\nauthor={Fan Yao and Chuanhao Li and Karthik Abinav Sankararaman and Yiming Liao and Yan Zhu and Qifan Wang and Hongning Wang and Haifeng Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xUyBP16Q5J}\n}", "github": "", "project": "", "reviewers": "oV5f;TzPr;hvxp;88Nz", "pdf_size": 957849, "rating": "5;6;6;8", "confidence": "3;2;3;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "95;79;279;204", "wc_strengths": "253;51;129;42", "wc_weaknesses": "42;13;276;65", "wc_questions": "163;12;6;596", "wc_limitations": "65;13;1;2", "wc_review": "618;168;691;909", "wc_reply_reviewers": "4;10;39;69", "wc_reply_authors": "45;50;68;70", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 164.25, 81.86994259189389 ], "wc_strengths_avg": [ 118.75, 84.57060659590896 ], "wc_weaknesses_avg": [ 99.0, 103.83881740466809 ], "wc_questions_avg": [ 194.25, 240.32933133514936 ], "wc_limitations_avg": [ 20.25, 26.261902063635834 ], "wc_review_avg": [ 596.5, 269.5649272438831 ], "wc_reply_reviewers_avg": [ 30.5, 25.86986664055306 ], "wc_reply_authors_avg": [ 58.25, 10.917302780449024 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.6488856845230502, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9066233065581594488&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "meta.com;virginia.edu;fb.com;meta.com;fb.com;fb.com;virginia.edu;cs.uchicago.edu", "author_num": 8, "aff_unique_index": "0;1;0;0;0;0;1;2", "aff_unique_norm": "Meta;University of Virginia;University of Chicago", "aff_unique_dep": "Meta Platforms, Inc.;;", "aff_unique_url": "https://meta.com;https://www.virginia.edu;https://www.uchicago.edu", "aff_unique_abbr": "Meta;UVA;UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Fair Graph Distillation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70002", "id": "xW0ayZxPWs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ff6540c54a847ef9114a332c101f5edc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xW0ayZxPWs", "openreview": "https://openreview.net/forum?id=xW0ayZxPWs", "poster": "/media/PosterPDFs/NeurIPS%202023/70002.png?t=1698396141.927653", "slides": "https://nips.cc/virtual/2023/poster/70002", "video": "https://nips.cc/virtual/2023/poster/70002", "author_site": "Qizhang Feng, Zhimeng (Stephen) Jiang, Ruiquan Li, Yicheng Wang, Na Zou, Jiang Bian, Xia Hu", "tldr": "", "abstract": "As graph neural networks (GNNs) struggle with large-scale graphs due to high computational demands, data distillation for graph data promises to alleviate this issue by distilling a large real graph into a smaller distilled graph while maintaining comparable prediction performance for GNNs trained on both graphs. However, we observe that GNNs trained on distilled graphs may exhibit more severe group fairness problems than those trained on real graphs. Motivated by this observation, we propose \\textit{fair graph distillation} (\\Algnameabbr), an approach for generating small distilled \\textit{fair and informative} graphs based on the graph distillation method. The challenge lies in the deficiency of sensitive attributes for nodes in the distilled graph, making most debiasing methods (e.g., regularization and adversarial debiasing) intractable for distilled graphs. We develop a simple yet effective bias metric, called coherence, for distilled graphs. Based on the proposed coherence metric, we introduce a framework for fair graph distillation using a bi-level optimization algorithm. Extensive experiments demonstrate that the proposed algorithm can achieve better prediction performance-fairness trade-offs across various datasets and GNN architectures.", "keywords": "Graph Distillation;Algorithmic Fairness", "primary_area": "", "supplementary_material": "/attachment/e25a96d6e98aea39c3cd611daa4b70bf86fa743c.zip", "author": "Qizhang Feng;Zhimeng Jiang;Ruiquan Li;Yicheng Wang;Na Zou;Jiang Bian;Xia Hu", "authorids": "~Qizhang_Feng1;~Zhimeng_Jiang1;~Ruiquan_Li1;~Yicheng_Wang1;~Na_Zou2;~Jiang_Bian2;~Xia_Hu4", "gender": "M;M;M;M;F;M;M", "homepage": ";http://www.zhimengjiang.com/;https://www.google.com/;;https://nzou1.github.io/;https://jiangbian.me/;https://cs.rice.edu/~xh37/index.html", "dblp": "323/5667.html;217/3235;;;152/0090-1.html;09/851-1;256/9406.html", "google_scholar": ";5Es3Yk4AAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;ysr--voAAAAJ;https://scholar.google.com.tw/citations?user=pcCS60IAAAAJ", "orcid": "0000-0002-2574-0270;0000-0001-6933-3952;;;0000-0003-1984-795X;0000-0002-2238-5429;", "linkedin": "qizhang-feng-355478197/;;;;na-zou-a1721535/;;", "or_profile": "~Qizhang_Feng1;~Zhimeng_Jiang1;~Ruiquan_Li1;~Yicheng_Wang1;~Na_Zou2;~Jiang_Bian2;~Xia_Hu2", "aff": "Texas A&M;Texas A&M University;University of Science and Technology of China;Texas A&M;Texas A&M University - College Station;University of Florida;Rice University", "aff_domain": "tamu.edu;tamu.edu;ustc.edu;tamu.edu;tamu.edu;ufl.edu;rice.edu", "position": "PhD student;PhD student;Undergrad student;PhD student;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nfeng2023fair,\ntitle={Fair Graph Distillation},\nauthor={Qizhang Feng and Zhimeng Jiang and Ruiquan Li and Yicheng Wang and Na Zou and Jiang Bian and Xia Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xW0ayZxPWs}\n}", "github": "", "project": "", "reviewers": "5Zjc;QWwu;jsrW;LkqE", "pdf_size": 1250602, "rating": "5;5;6;7", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;4;3", "wc_summary": "58;125;90;70", "wc_strengths": "104;40;46;42", "wc_weaknesses": "109;316;78;22", "wc_questions": "3;3;39;76", "wc_limitations": "14;3;23;1", "wc_review": "288;487;276;211", "wc_reply_reviewers": "11;7;21;0", "wc_reply_authors": "33;0;37;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 85.75, 25.380849079571785 ], "wc_strengths_avg": [ 58.0, 26.645825188948457 ], "wc_weaknesses_avg": [ 131.25, 111.12914784159915 ], "wc_questions_avg": [ 30.25, 30.22726418318403 ], "wc_limitations_avg": [ 10.25, 8.870597499605086 ], "wc_review_avg": [ 315.5, 103.25817158946792 ], "wc_reply_reviewers_avg": [ 9.75, 7.595228765481656 ], "wc_reply_authors_avg": [ 17.5, 17.55704986607944 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14589392893946454478&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "tamu.edu;tamu.edu;ustc.edu;tamu.edu;tamu.edu;ufl.edu;rice.edu", "author_num": 7, "aff_unique_index": "0;0;1;0;0;2;3", "aff_unique_norm": "Texas A&M University;University of Science and Technology of China;University of Florida;Rice University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tamu.edu;http://www.ustc.edu.cn;https://www.ufl.edu;https://www.rice.edu", "aff_unique_abbr": "TAMU;USTC;UF;Rice", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Robust Data Pruning under Label Noise via Maximizing Re-labeling Accuracy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70001", "id": "xWCp0uLcpG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ebb6bee50913ba7e1efeb91a1d47a002-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xWCp0uLcpG", "openreview": "https://openreview.net/forum?id=xWCp0uLcpG", "poster": "/media/PosterPDFs/NeurIPS%202023/70001.png?t=1701673923.7892957", "slides": "https://nips.cc/virtual/2023/poster/70001", "video": "https://nips.cc/virtual/2023/poster/70001", "author_site": "Dongmin Park, Seola Choi, Doyoung Kim, Hwanjun Song, Jae-Gil Lee", "tldr": "", "abstract": "Data pruning, which aims to downsize a large training set into a small informative subset, is crucial for reducing the enormous computational costs of modern deep learning. Though large-scale data collections invariably contain annotation noise and numerous robust learning methods have been developed, data pruning for the noise-robust learning scenario has received little attention. With state-of-the-art Re-labeling methods that self-correct erroneous labels while training, it is challenging to identify which subset induces the most accurate re-labeling of erroneous labels in the entire training set. In this paper, we formalize the problem of data pruning with re-labeling. We first show that the likelihood of a training example being correctly re-labeled is proportional to the prediction confidence of its neighborhood in the subset. Therefore, we propose a novel data pruning algorithm, Prune4Rel, that finds a subset maximizing the total neighborhood confidence of all training examples, thereby maximizing the re-labeling accuracy and generalization performance. Extensive experiments on four real and one synthetic noisy datasets show that Prune4Rel outperforms the baselines with Re-labeling models by up to 9.1% as well as those with a standard model by up to 21.6%.", "keywords": "Data Pruning;Data Subset Selection;Noisy Labels;Relabeling;Self-training", "primary_area": "", "supplementary_material": "/attachment/58ebb66465ddd60ee13eb413eed67661415c6c3f.pdf", "author": "Dongmin Park;Seola Choi;Doyoung Kim;Hwanjun Song;Jae-Gil Lee", "authorids": "~Dongmin_Park1;~Seola_Choi1;~Doyoung_Kim2;~Hwanjun_Song2;~Jae-Gil_Lee1", "gender": "M;F;M;M;M", "homepage": "https://dongmean.github.io/;https://github.com/seolachoi;;https://songhwanjun.github.io/;https://dm.kaist.ac.kr/jaegil/", "dblp": "82/2651;;;204/3381;28/3904", "google_scholar": "https://scholar.google.co.kr/citations?user=4xXYQl0AAAAJ;https://scholar.google.co.kr/citations?user=U6P2mAgAAAAJ;vEAbNDYAAAAJ;Ijzuc-8AAAAJ;https://scholar.google.com.tw/citations?user=h9mbv9MAAAAJ", "orcid": ";;;0000-0002-1105-0818;0000-0002-8711-7732", "linkedin": "dongmin-park-82995613a/;seolachoi/;;;", "or_profile": "~Dongmin_Park1;~Seola_Choi1;~Doyoung_Kim2;~Hwanjun_Song2;~Jae-Gil_Lee1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Amazon Web Services;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;amazon.com;kaist.ac.kr", "position": "PhD student;Undergrad student;Researcher;Research Scientist;Full Professor", "bibtex": "@inproceedings{\npark2023robust,\ntitle={Robust Data Pruning under Label Noise via Maximizing Re-labeling Accuracy},\nauthor={Dongmin Park and Seola Choi and Doyoung Kim and Hwanjun Song and Jae-Gil Lee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xWCp0uLcpG}\n}", "github": "", "project": "", "reviewers": "ALmL;jBWT;rFfD;N3nf", "pdf_size": 631120, "rating": "5;5;6;7", "confidence": "4;5;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "152;47;55;119", "wc_strengths": "105;42;30;56", "wc_weaknesses": "97;65;95;80", "wc_questions": "1;218;3;156", "wc_limitations": "7;4;8;36", "wc_review": "362;376;191;447", "wc_reply_reviewers": "54;37;30;27", "wc_reply_authors": "39;155;23;31", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 93.25, 43.92251700437943 ], "wc_strengths_avg": [ 58.25, 28.516442625264464 ], "wc_weaknesses_avg": [ 84.25, 12.910751333675357 ], "wc_questions_avg": [ 94.5, 95.06445182085677 ], "wc_limitations_avg": [ 13.75, 12.93010054098575 ], "wc_review_avg": [ 344.0, 94.02925076804557 ], "wc_reply_reviewers_avg": [ 37.0, 10.464224768228174 ], "wc_reply_authors_avg": [ 62.0, 53.99073994677235 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4169335122605573745&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;amazon.com;kaist.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Amazon", "aff_unique_dep": ";Amazon Web Services", "aff_unique_url": "https://www.kaist.ac.kr;https://aws.amazon.com", "aff_unique_abbr": "KAIST;AWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Learning Curves for Noisy Heterogeneous Feature-Subsampled Ridge Ensembles", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/70000", "id": "xXfDB8kJUs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9c940ba3be5bc9020ec74279d6e37c8a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xXfDB8kJUs", "openreview": "https://openreview.net/forum?id=xXfDB8kJUs", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/70000", "video": "https://nips.cc/virtual/2023/poster/70000", "author_site": "Ben Ruben, Cengiz Pehlevan", "tldr": "", "abstract": "Feature bagging is a well-established ensembling method which aims to reduce\nprediction variance by combining predictions of many estimators trained on subsets\nor projections of features. Here, we develop a theory of feature-bagging in noisy\nleast-squares ridge ensembles and simplify the resulting learning curves in the special\ncase of equicorrelated data. Using analytical learning curves, we demonstrate\nthat subsampling shifts the double-descent peak of a linear predictor. This leads\nus to introduce heterogeneous feature ensembling, with estimators built on varying\nnumbers of feature dimensions, as a computationally efficient method to mitigate\ndouble-descent. Then, we compare the performance of a feature-subsampling\nensemble to a single linear predictor, describing a trade-off between noise amplification\ndue to subsampling and noise reduction due to ensembling. Our qualitative\ninsights carry over to linear classifiers applied to image classification tasks with\nrealistic datasets constructed using a state-of-the-art deep learning feature map.", "keywords": "ridge regression;ensembling methods", "primary_area": "", "supplementary_material": "", "author": "Benjamin Samuel Ruben;Cengiz Pehlevan", "authorids": "~Benjamin_Samuel_Ruben1;~Cengiz_Pehlevan2", "gender": "M;", "homepage": "https://benruben.me;https://pehlevan.seas.harvard.edu/", "dblp": ";145/3480", "google_scholar": "7Kju8PcAAAAJ;veDLTPEAAAAJ", "orcid": ";0000-0001-9767-6063", "linkedin": ";", "or_profile": "~Benjamin_Samuel_Ruben1;~Cengiz_Pehlevan2", "aff": "Harvard University;School of Engineering and Applied Sciences, Harvard University", "aff_domain": "harvard.edu;seas.harvard.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nruben2023learning,\ntitle={Learning Curves for Noisy Heterogeneous Feature-Subsampled Ridge Ensembles},\nauthor={Benjamin Samuel Ruben and Cengiz Pehlevan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xXfDB8kJUs}\n}", "github": "", "project": "", "reviewers": "QQ7H;gGcm;JR3L;tCNd;uQUq", "pdf_size": 3805163, "rating": "5;5;5;6;7", "confidence": "4;4;4;4;2", "soundness": "4;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;3;2;3", "wc_summary": "121;116;73;182;112", "wc_strengths": "133;14;25;130;112", "wc_weaknesses": "577;133;79;594;87", "wc_questions": "415;65;53;233;11", "wc_limitations": "8;81;14;136;1", "wc_review": "1254;409;244;1275;323", "wc_reply_reviewers": "317;19;0;291;6", "wc_reply_authors": "0;14;0;284;0", "reply_reviewers": "1;1;0;2;1", "reply_authors": "1;2;1;2;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 120.8, 35.0165675073957 ], "wc_strengths_avg": [ 82.8, 52.296845029121975 ], "wc_weaknesses_avg": [ 294.0, 238.7819088624597 ], "wc_questions_avg": [ 155.4, 150.28186850049477 ], "wc_limitations_avg": [ 48.0, 52.53189507337423 ], "wc_review_avg": [ 701.0, 463.0943748308761 ], "wc_reply_reviewers_avg": [ 126.6, 145.2096415531696 ], "wc_reply_authors_avg": [ 59.6, 112.33093963819586 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.875, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9851044590936806988&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "email": "harvard.edu;seas.harvard.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "For SALE: State-Action Representation Learning for Deep Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69999", "id": "xZvGrzRq17", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c20ac0df6c213db6d3a930fe9c7296c8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xZvGrzRq17", "openreview": "https://openreview.net/forum?id=xZvGrzRq17", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69999", "video": "https://nips.cc/virtual/2023/poster/69999", "author_site": "Scott Fujimoto, Wei-Di Chang, Edward Smith, Shixiang (Shane) Gu, Doina Precup, David Meger", "tldr": "", "abstract": "In reinforcement learning (RL), representation learning is a proven tool for complex image-based tasks, \nbut is often overlooked for environments with low-level states, such as physical control problems. This paper introduces SALE, a novel approach for learning embeddings that model the nuanced interaction between state and action, enabling effective representation learning from low-level states. We extensively study the design space of these embeddings and highlight important design considerations. We integrate SALE and an adaptation of checkpoints for RL into TD3 to form the TD7 algorithm, which significantly outperforms existing continuous control algorithms. On OpenAI gym benchmark tasks, TD7 has an average performance gain of 276.7% and 50.7% over TD3 at 300k and 5M time steps, respectively, and works in both the online and offline settings.", "keywords": "Deep reinforcement learning;representation learning", "primary_area": "", "supplementary_material": "", "author": "Scott Fujimoto;Wei-Di Chang;Edward J. Smith;Shixiang Shane Gu;Doina Precup;David Meger", "authorids": "~Scott_Fujimoto1;~Wei-Di_Chang1;~Edward_J._Smith1;~Shixiang_Shane_Gu1;~Doina_Precup1;~David_Meger2", "gender": ";;M;F;M;M", "homepage": ";;https://edwardsmith1884.github.io/;http://cs.mcgill.ca/~dprecup/;http://www.cim.mcgill.ca/~dmeger/;https://sites.google.com/view/gugurus/home", "dblp": "215/5503;205/3163;45/6211;p/DoinaPrecup;51/3415.html;121/0550", "google_scholar": "1Nk3WZoAAAAJ;https://scholar.google.com/citations?hl=en;FUUlY5wAAAAJ;https://scholar.google.com.tw/citations?user=j54VcVEAAAAJ;https://scholar.google.com.tw/citations?user=gFwEytkAAAAJ;B8wslVsAAAAJ", "orcid": ";;;;;", "linkedin": ";;edward-james-smith-721754b2/;;;", "or_profile": "~Scott_Fujimoto1;~Wei-Di_Chang1;~Edward_J._Smith1;~Doina_Precup1;~David_Meger2;~Shixiang_Gu1", "aff": "Mila;Samsung;McGill University;McGill University;McGill University;OpenAI", "aff_domain": "mila.quebec;samsung.com;mcgill.ca;mcgill.ca;mcgill.ca;openai.com", "position": "PhD student;Intern;PhD student;Associate Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\nfujimoto2023for,\ntitle={For {SALE}: State-Action Representation Learning for Deep Reinforcement Learning},\nauthor={Scott Fujimoto and Wei-Di Chang and Edward J. Smith and Shixiang Shane Gu and Doina Precup and David Meger},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xZvGrzRq17}\n}", "github": "", "project": "", "reviewers": "Casd;FeJr;xwBa;YfJA;sfVv", "pdf_size": 28007250, "rating": "5;6;7;7;7", "confidence": "4;4;4;3;4", "soundness": "3;3;3;4;3", "novelty": "2;3;3;3;3", "presentation": "2;3;3;3;4", "wc_summary": "195;63;84;39;83", "wc_strengths": "84;35;117;63;100", "wc_weaknesses": "460;72;89;283;51", "wc_questions": "60;15;3;128;62", "wc_limitations": "1;9;6;2;59", "wc_review": "800;194;299;515;355", "wc_reply_reviewers": "440;23;42;192;0", "wc_reply_authors": "514;0;0;16;0", "reply_reviewers": "3;1;1;1;0", "reply_authors": "3;1;1;2;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 92.8, 53.66712215127619 ], "wc_strengths_avg": [ 79.8, 28.631451238105274 ], "wc_weaknesses_avg": [ 191.0, 158.10755832660246 ], "wc_questions_avg": [ 53.6, 44.03907355973784 ], "wc_limitations_avg": [ 15.4, 21.987269043698902 ], "wc_review_avg": [ 432.6, 210.98492837167302 ], "wc_reply_reviewers_avg": [ 139.4, 164.67859605911147 ], "wc_reply_authors_avg": [ 106.0, 204.09409594596312 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.375, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1971209223341705104&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mila.quebec;samsung.com;mcgill.ca;mcgill.ca;mcgill.ca;openai.com", "author_num": 6, "aff_unique_index": "0;1;2;2;2;3", "aff_unique_norm": "Mila;Samsung;McGill University;OpenAI", "aff_unique_dep": "Quebec Artificial Intelligence Institute;Samsung;;", "aff_unique_url": "https://mila.quebec;https://www.samsung.com;https://www.mcgill.ca;https://openai.com", "aff_unique_abbr": "Mila;Samsung;McGill;OpenAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;2", "aff_country_unique": "Canada;South Korea;United States" }, { "title": "Practical Equivariances via Relational Conditional Neural Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69998", "id": "xax5eWeObb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5d1a382162cb5ed326f1d3dbbfac4c82-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xax5eWeObb", "openreview": "https://openreview.net/forum?id=xax5eWeObb", "poster": "/media/PosterPDFs/NeurIPS%202023/69998.png?t=1701903028.6444633", "slides": "https://nips.cc/virtual/2023/poster/69998", "video": "https://nips.cc/virtual/2023/poster/69998", "author_site": "Daolang Huang, Manuel Haussmann, Ulpu Remes, ST John, Gr\u00e9goire Clart\u00e9, Kevin Sebastian Luck, Kevin Luck, Samuel Kaski, Luigi Acerbi", "tldr": "", "abstract": "Conditional Neural Processes (CNPs) are a class of metalearning models popular for combining the runtime efficiency of amortized inference with reliable uncertainty quantification. Many relevant machine learning tasks, such as in spatio-temporal modeling, Bayesian Optimization and continuous control, inherently contain equivariances \u2013 for example to translation \u2013 which the model can exploit for maximal performance. However, prior attempts to include equivariances in CNPs do not scale effectively beyond two input dimensions. In this work, we propose Relational Conditional Neural Processes (RCNPs), an effective approach to incorporate equivariances into any neural process model. Our proposed method extends the applicability and impact of equivariant neural processes to higher dimensions. We empirically demonstrate the competitive performance of RCNPs on a large array of tasks naturally containing equivariances.", "keywords": "neural processes;equivariance;Gaussian processes", "primary_area": "", "supplementary_material": "/attachment/58ae75ad0c43311947ac531fcdfd94e01bbb474d.zip", "author": "Daolang Huang;Manuel Haussmann;Ulpu Remes;S. T. John;Gr\u00e9goire Clart\u00e9;Kevin Sebastian Luck;Samuel Kaski;Luigi Acerbi", "authorids": "~Daolang_Huang1;~Manuel_Haussmann1;~Ulpu_Remes1;~S._T._John1;~Gr\u00e9goire_Clart\u00e91;~Kevin_Sebastian_Luck1;~Samuel_Kaski1;~Luigi_Acerbi1", "gender": "M;;;;M;;M;M", "homepage": "https://www.huangdaolang.com;https://manuelhaussmann.github.io/;;;https://sites.google.com/view/gregoireclarte/homepage;;https://people.aalto.fi/samuel.kaski;http://luigiacerbi.com/", "dblp": "277/8410;198/2433;16/8717;218/6590;;153/7680;64/5826;72/1450", "google_scholar": "2togGHoAAAAJ;https://scholar.google.com/citations?hl=de;;Jf9j8GAAAAAJ;;;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=QYBZoGwAAAAJ", "orcid": ";;0000-0003-1435-0207;0000-0002-4540-395X;;;0000-0003-1925-9154;0000-0001-7471-7336", "linkedin": "daolanghuang/?originalSubdomain=fi;;;;;;samuel-kaski-27790/;luigi-acerbi-719b492/", "or_profile": "~Daolang_Huang1;~Manuel_Haussmann1;~Ulpu_Remes1;~S._T._John1;~Gr\u00e9goire_Clart\u00e91;~Kevin_Sebastian_Luck1;~Samuel_Kaski1;~Luigi_Acerbi1", "aff": "Aalto University;Aalto University;University of Helsinki;Aalto University;University of Helsinki;Aalto University;Aalto University;University of Helsinki", "aff_domain": "aalto.fi;aalto.fi;helsinki.fi;aalto.fi;helsinki.fi;aalto.fi;aalto.fi;helsinki.fi", "position": "PhD student;Postdoc;Postdoc;Postdoc;Postdoc;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nhuang2023practical,\ntitle={Practical Equivariances via Relational Conditional Neural Processes},\nauthor={Daolang Huang and Manuel Haussmann and Ulpu Remes and S. T. John and Gr{\\'e}goire Clart{\\'e} and Kevin Sebastian Luck and Samuel Kaski and Luigi Acerbi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xax5eWeObb}\n}", "github": "", "project": "", "reviewers": "bQ2F;C6E4;jSJf;N64F", "pdf_size": 1587527, "rating": "2;6;7;8", "confidence": "4;3;5;4", "soundness": "1;3;4;4", "novelty": "1;3;3;4", "presentation": "2;3;4;3", "wc_summary": "38;22;84;101", "wc_strengths": "36;50;36;117", "wc_weaknesses": "340;37;58;57", "wc_questions": "1;2;72;102", "wc_limitations": "67;4;22;20", "wc_review": "482;115;272;397", "wc_reply_reviewers": "362;0;0;19", "wc_reply_authors": "766;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 2.277608394786075 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 1.224744871391589 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 61.25, 32.32162588732194 ], "wc_strengths_avg": [ 59.75, 33.54381463101655 ], "wc_weaknesses_avg": [ 123.0, 125.5647243456537 ], "wc_questions_avg": [ 44.25, 44.04755952376931 ], "wc_limitations_avg": [ 28.25, 23.434749838647733 ], "wc_review_avg": [ 316.5, 138.2506781176859 ], "wc_reply_reviewers_avg": [ 95.25, 154.2033965254981 ], "wc_reply_authors_avg": [ 191.5, 331.68772964944003 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.15523010514126656, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10805417503113701850&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "email": "aalto.fi;aalto.fi;helsinki.fi;aalto.fi;helsinki.fi;aalto.fi;aalto.fi;helsinki.fi", "author_num": 8, "aff_unique_index": "0;0;1;0;1;0;0;1", "aff_unique_norm": "Aalto University;University of Helsinki", "aff_unique_dep": ";", "aff_unique_url": "https://www.aalto.fi;https://www.helsinki.fi", "aff_unique_abbr": "Aalto;UH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "Finland" }, { "id": "xbUz5DsW5T", "title": "Visual Abductive Reasoning Meets Driving Hazard Prediction: Problem Formulation and Dataset", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "This paper addresses the problem of predicting hazards that drivers may encounter while driving a car. We formulate it as a task of anticipating impending accidents using a single input image captured by car dashcams. Unlike existing approaches to driving hazard prediction that rely on computational simulations or anomaly detection from videos, this study focuses on high-level inference from static images. The problem needs predicting and reasoning about future events based on uncertain observations, which falls under visual abductive reasoning. To enable research in this understudied area, a new dataset named the DHPR (Driving Hazard Prediction and Reasoning) dataset is created. The dataset consists of 15K dashcam images of street scenes, and each image is associated with a tuple containing car speed, a hypothesized hazard description, and visual entities present in the scene. These are annotated by human annotators, who identify risky scenes and provide descriptions of potential accidents that could occur a few seconds later. We present several baseline methods and evaluate their performance on our dataset, identifying remaining issues and discussing future directions. This study contributes to the field by introducing a novel problem formulation and dataset, enabling researchers to explore the potential of multi-modal AI for driving hazard prediction.", "keywords": "Traffic Accident Anticipation;Driving Hazard;Visual Abductive Reasoning", "primary_area": "", "supplementary_material": "/attachment/15037928c243c30bf3efa2d24fe4c943e06aa898.pdf", "author": "Korawat Charoenpitaks;Van-Quang Nguyen;Masanori Suganuma;Masahiro Takahashi;Ryoma Niihara;Takayuki Okatani", "authorids": "~Korawat_Charoenpitaks1;~Van-Quang_Nguyen1;~Masanori_Suganuma1;masahiro.takahashi.j7x@jp.denso.com;~Ryoma_Niihara1;~Takayuki_Okatani1", "gender": "M;M;M;;M;M", "homepage": ";http://www.vision.is.tohoku.ac.jp/us/home/;https://sites.google.com/site/suganumamasanori/eng;;;", "dblp": "272/5814;;179/9075;;;18/4811", "google_scholar": ";qR180McAAAAJ;https://scholar.google.co.jp/citations?user=NpWGfwgAAAAJ;;;https://scholar.google.co.jp/citations?hl=en", "orcid": "0009-0002-1780-8164;;;;;", "linkedin": "korawat-charoenpitaks-7273776a/;;;;%E7%AB%9C%E9%A6%AC-%E6%96%B0%E5%8E%9F-49634162;", "or_profile": "~Korawat_Charoenpitaks1;~Van-Quang_Nguyen1;~Masanori_Suganuma1;masahiro.takahashi.j7x@jp.denso.com;~Ryoma_Niihara1;~Takayuki_Okatani1", "aff": "Tohoku University;RIKEN;Tohoku University;;DENSO CORPORATION;Tohoku University", "aff_domain": "tohoku.ac.jp;riken.jp;tohoku.ac.jp;;denso.com;tohoku.ac.jp", "position": "PhD student;Postdoc;Assistant Professor;;Manager;Full Professor", "bibtex": "@misc{\ncharoenpitaks2023visual,\ntitle={Visual Abductive Reasoning Meets Driving Hazard Prediction: Problem Formulation and Dataset},\nauthor={Korawat Charoenpitaks and Van-Quang Nguyen and Masanori Suganuma and Masahiro Takahashi and Ryoma Niihara and Takayuki Okatani},\nyear={2023},\nurl={https://openreview.net/forum?id=xbUz5DsW5T}\n}", "github": "", "project": "", "reviewers": "24dj;YbBX;s9mr", "site": "https://openreview.net/forum?id=xbUz5DsW5T", "pdf_size": 4026575, "rating": "5;6;7", "confidence": "5;4;3", "wc_summary_and_contributions": "61;47;131", "wc_strengths": "44;10;110", "wc_improvement": "30;41;46", "wc_limitations": "199;68;8", "wc_correctness": "5;32;13", "wc_clarity": "5;22;8", "wc_relation_to_prior_work": "8;8;17", "wc_documentation": "6;6;17", "wc_additional_feedback": "1;1;1", "wc_review": "359;235;351", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "711;431;646", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 79.66666666666667, 36.745370078721784 ], "wc_strengths_avg": [ 54.666666666666664, 41.51572660517404 ], "wc_improvement_avg": [ 39.0, 6.683312551921141 ], "wc_limitations_avg": [ 91.66666666666667, 79.75100138695578 ], "wc_correctness_avg": [ 16.666666666666668, 11.32352516764202 ], "wc_clarity_avg": [ 11.666666666666666, 7.408703590297622 ], "wc_relation_to_prior_work_avg": [ 11.0, 4.242640687119285 ], "wc_documentation_avg": [ 9.666666666666666, 5.185449728701348 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 315.0, 56.66274496233541 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 596.0, 119.65227397198377 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15602467243785992478&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Tohoku University;RIKEN;DENSO Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tohoku.ac.jp;https://www.riken.jp;https://www.denso.com", "aff_unique_abbr": "Tohoku U;RIKEN;DENSO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Japan" }, { "title": "On Evaluating Adversarial Robustness of Large Vision-Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69997", "id": "xbbknN9QFs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a97b58c4f7551053b0512f92244b0810-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xbbknN9QFs", "openreview": "https://openreview.net/forum?id=xbbknN9QFs", "poster": "/media/PosterPDFs/NeurIPS%202023/69997.png?t=1698643542.2976496", "slides": "https://nips.cc/virtual/2023/poster/69997", "video": "https://nips.cc/virtual/2023/poster/69997", "author_site": "Yunqing Zhao, Tianyu Pang, Chao Du, Xiao Yang, Chongxuan LI, Ngai-Man (Man) Cheung, Min Lin", "tldr": "", "abstract": "Large vision-language models (VLMs) such as GPT-4 have achieved unprecedented performance in response generation, especially with visual inputs, enabling more creative and adaptable interaction than large language models such as ChatGPT. Nonetheless, multimodal generation exacerbates safety concerns, since adversaries may successfully evade the entire system by subtly manipulating the most vulnerable modality (e.g., vision). To this end, we propose evaluating the robustness of open-source large VLMs in the most realistic and high-risk setting, where adversaries have only black-box system access and seek to deceive the model into returning the targeted responses. In particular, we first craft targeted adversarial examples against pretrained models such as CLIP and BLIP, and then transfer these adversarial examples to other VLMs such as MiniGPT-4, LLaVA, UniDiffuser, BLIP-2, and Img2Prompt. In addition, we observe that black-box queries on these VLMs can further improve the effectiveness of targeted evasion, resulting in a surprisingly high success rate for generating targeted responses. Our findings provide a quantitative understanding regarding the adversarial vulnerability of large VLMs and call for a more thorough examination of their potential security flaws before deployment in practice. Our project page: https://yunqing-me.github.io/AttackVLM/.", "keywords": "Large Vision-Language Models;Adversarial Robustness", "primary_area": "", "supplementary_material": "/attachment/7f175523c24bb75704c9270f013ec235de781961.zip", "author": "Yunqing Zhao;Tianyu Pang;Chao Du;Xiao Yang;Chongxuan Li;Ngai-man Cheung;Min Lin", "authorids": "~Yunqing_Zhao2;~Tianyu_Pang1;~Chao_Du1;~Xiao_Yang4;~Chongxuan_Li1;~Ngai-man_Cheung1;~Min_Lin1", "gender": ";M;M;M;M;M;M", "homepage": ";https://p2333.github.io/;https://duchao0726.github.io/;https://ml.cs.tsinghua.edu.cn/~xiaoyang/;http://ml.cs.tsinghua.edu.cn/~chongxuan;https://sites.google.com/site/mancheung0407/;https://linmin.me", "dblp": ";202/2550;75/7523;57/33851;161/9965;82/3605;", "google_scholar": ";wYDbtFsAAAAJ;QOp7xW0AAAAJ;bwkwp0MAAAAJ;UKMcQn4AAAAJ;https://scholar.google.com.sg/citations?hl=en;BGONmkIAAAAJ", "orcid": ";0000-0003-0639-6176;0000-0003-1244-6336;0000-0001-9502-9962;0000-0002-0912-9076;0000-0003-0135-3791;", "linkedin": ";%E5%A4%A9%E5%AE%87-%E5%BA%9E-b3999017a/;duchao/;;;;min-lin-08a3a422/", "or_profile": "~Yunqing_Zhao2;~Tianyu_Pang1;~Chao_Du1;~Xiao_Yang4;~Chongxuan_Li1;~Ngai-man_Cheung1;~Min_Lin1", "aff": ";Sea AI Lab;Sea AI Lab;Tsinghua University;Renmin University of China;Singapore University of Technology and Design;Sea AI Lab", "aff_domain": ";sea.com;sea.com;mail.tsinghua.edu.cn;ruc.edu.cn;sutd.edu.sg;sea.com", "position": ";Research Scientist;Research Scientist;PhD student;Assistant Professor;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nzhao2023on,\ntitle={On Evaluating Adversarial Robustness of Large Vision-Language Models},\nauthor={Yunqing Zhao and Tianyu Pang and Chao Du and Xiao Yang and Chongxuan Li and Ngai-man Cheung and Min Lin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xbbknN9QFs}\n}", "github": "", "project": "", "reviewers": "UbWE;zQtG;Xvuc;csPu", "pdf_size": 21668534, "rating": "6;7;7;7", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "4;3;3;4", "wc_summary": "83;64;69;160", "wc_strengths": "81;94;61;50", "wc_weaknesses": "175;62;56;168", "wc_questions": "150;10;49;16", "wc_limitations": "11;1;69;14", "wc_review": "500;231;304;408", "wc_reply_reviewers": "23;193;83;49", "wc_reply_authors": "24;558;387;24", "reply_reviewers": "1;3;2;1", "reply_authors": "2;4;3;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 94.0, 38.73628789649313 ], "wc_strengths_avg": [ 71.5, 17.09532099727876 ], "wc_weaknesses_avg": [ 115.25, 56.34436529059494 ], "wc_questions_avg": [ 56.25, 56.126531159514926 ], "wc_limitations_avg": [ 23.75, 26.564779313971346 ], "wc_review_avg": [ 360.75, 102.07687054372308 ], "wc_reply_reviewers_avg": [ 87.0, 64.79197481170026 ], "wc_reply_authors_avg": [ 248.25, 232.25672756671656 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 233, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5937308252715370580&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": ";sea.com;sea.com;mail.tsinghua.edu.cn;ruc.edu.cn;sutd.edu.sg;sea.com", "author_num": 7, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Sea AI Lab;Tsinghua University;Renmin University of China;Singapore University of Technology and Design", "aff_unique_dep": ";;;", "aff_unique_url": ";https://www.tsinghua.edu.cn;http://www.ruc.edu.cn;https://www.sutd.edu.sg", "aff_unique_abbr": ";THU;RUC;SUTD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;2", "aff_country_unique": ";China;Singapore" }, { "title": "Adversarial Resilience in Sequential Prediction via Abstention", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69996", "id": "xcGhx9FdxM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1967f962c7c2083618236d80eeb9d1ac-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xcGhx9FdxM", "openreview": "https://openreview.net/forum?id=xcGhx9FdxM", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69996", "video": "https://nips.cc/virtual/2023/poster/69996", "author_site": "Surbhi Goel, Steve Hanneke, Shay Moran, Abhishek Shetty", "tldr": "", "abstract": "We study the problem of sequential prediction in the stochastic setting with an adversary that is allowed to inject clean-label adversarial (or out-of-distribution) examples. Algorithms designed to handle purely stochastic data tend to fail in the presence of such adversarial examples, often leading to erroneous predictions. This is undesirable in many high-stakes applications such as medical recommendations, where abstaining from predictions on adversarial examples is preferable to misclassification. On the other hand, assuming fully adversarial data leads to very pessimistic bounds that are often vacuous in practice. \n \n To move away from these pessimistic guarantees, we propose a new model of sequential prediction that sits between the purely stochastic and fully adversarial settings by allowing the learner to abstain from making a prediction at no cost on adversarial examples, thereby asking the learner to make predictions with certainty. Assuming access to the marginal distribution on the non-adversarial examples, we design a learner whose error scales with the VC dimension (mirroring the stochastic setting) of the hypothesis class, as opposed to the Littlestone dimension which characterizes the fully adversarial setting. Furthermore, we design learners for VC dimension~1 classes and the class of axis-aligned rectangles, which work even in the absence of access to the marginal distribution. Our key technical contribution is a novel measure for quantifying uncertainty for learning VC classes, which may be of independent interest.", "keywords": "Sequential prediction;adversarial examples;abstention;out-of-distribution;VC Classes", "primary_area": "", "supplementary_material": "/attachment/d02d0e13d67d6ecca21e299aaca3c46daf7d4958.pdf", "author": "Surbhi Goel;Steve Hanneke;Shay Moran;Abhishek Shetty", "authorids": "~Surbhi_Goel1;~Steve_Hanneke1;~Shay_Moran1;~Abhishek_Shetty1", "gender": "F;M;M;M", "homepage": "https://www.surbhigoel.com;http://www.stevehanneke.com;http://www.cs.technion.ac.il/~shaymrn/;https://ashettyv.github.io/", "dblp": "190/7815;40/154;119/5111;223/4770", "google_scholar": "https://scholar.google.co.in/citations?user=Zqz4CQoAAAAJ;fEhNO7YAAAAJ;kALYnggAAAAJ;https://scholar.google.co.in/citations?user=M-y2aLUAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Surbhi_Goel1;~Steve_Hanneke1;~Shay_Moran1;~Abhishek_Shetty1", "aff": "University of Pennsylvania;Purdue University;Google;University of California, Berkeley", "aff_domain": "upenn.edu;purdue.edu;google.com;berkeley.edu", "position": "Assistant Professor;Assistant Professor;Visiting Faculty;PhD student", "bibtex": "@inproceedings{\ngoel2023adversarial,\ntitle={Adversarial Resilience in Sequential Prediction via Abstention},\nauthor={Surbhi Goel and Steve Hanneke and Shay Moran and Abhishek Shetty},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xcGhx9FdxM}\n}", "github": "", "project": "", "reviewers": "KALR;7wBe;mVCw;cSg3", "pdf_size": 347682, "rating": "5;5;6;6", "confidence": "3;3;3;3", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "54;56;241;139", "wc_strengths": "51;91;230;65", "wc_weaknesses": "133;161;459;61", "wc_questions": "2;32;123;69", "wc_limitations": "6;19;54;33", "wc_review": "246;359;1107;367", "wc_reply_reviewers": "46;54;2746;20", "wc_reply_authors": "136;0;1998;0", "reply_reviewers": "1;1;9;1", "reply_authors": "2;1;8;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 122.5, 76.53267276137689 ], "wc_strengths_avg": [ 109.25, 71.17715574536538 ], "wc_weaknesses_avg": [ 203.5, 151.95640822288476 ], "wc_questions_avg": [ 56.5, 45.13590588434002 ], "wc_limitations_avg": [ 28.0, 17.790446874657196 ], "wc_review_avg": [ 519.75, 342.40865570251 ], "wc_reply_reviewers_avg": [ 716.5, 1171.7997909199335 ], "wc_reply_authors_avg": [ 533.5, 847.3504292794098 ], "reply_reviewers_avg": [ 3.0, 3.4641016151377544 ], "reply_authors_avg": [ 3.0, 2.9154759474226504 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9669018081574416220&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "upenn.edu;purdue.edu;google.com;berkeley.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Pennsylvania;Purdue University;Google;University of California, Berkeley", "aff_unique_dep": ";;Google;", "aff_unique_url": "https://www.upenn.edu;https://www.purdue.edu;https://www.google.com;https://www.berkeley.edu", "aff_unique_abbr": "UPenn;Purdue;Google;UC Berkeley", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Towards Data-Agnostic Pruning At Initialization: What Makes a Good Sparse Mask?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69995", "id": "xdOoCWCYaY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fd5013ea0c3f96931dec77174eaf9d80-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xdOoCWCYaY", "openreview": "https://openreview.net/forum?id=xdOoCWCYaY", "poster": "/media/PosterPDFs/NeurIPS%202023/69995.png?t=1701394747.3098447", "slides": "https://nips.cc/virtual/2023/poster/69995", "video": "https://nips.cc/virtual/2023/poster/69995", "author_site": "Hoang Pham, The Anh Ta, Shiwei Liu, Shiwei Liu, Lichuan Xiang, Dung Le, Hongkai Wen, Long Tran-Thanh", "tldr": "", "abstract": "Pruning at initialization (PaI) aims to remove weights of neural networks before training in pursuit of training efficiency besides the inference. While off-the-shelf PaI methods manage to find trainable subnetworks that outperform random pruning, their performance in terms of both accuracy and computational reduction is far from satisfactory compared to post-training pruning and the understanding of PaI is missing. For instance, recent studies show that existing PaI methods only able to find good layerwise sparsities not weights, as the discovered subnetworks are surprisingly resilient against layerwise random mask shuffling and weight re-initialization.\nIn this paper, we study PaI from a brand-new perspective -- the topology of subnetworks. In particular, we propose a principled framework for analyzing the performance of Pruning and Initialization (PaI) methods with two quantities, namely, the number of effective paths and effective nodes. These quantities allow for a more comprehensive understanding of PaI methods, giving us an accurate assessment of different subnetworks at initialization. We systematically analyze the behavior of various PaI methods through our framework and observe a guiding principle for constructing effective subnetworks: *at a specific sparsity, the top-performing subnetwork always presents a good balance between the number of effective nodes and the number of effective paths.*\nInspired by this observation, we present a novel data-agnostic pruning method by solving a multi-objective optimization problem. By conducting extensive experiments across different architectures and datasets, our results demonstrate that our approach outperforms state-of-the-art PaI methods while it is able to discover subnetworks that have much lower inference FLOPs (up to 3.4$\\times$). Code will be fully released.", "keywords": "Pruning Neural Network;Sparsity;Neural Architecture Search", "primary_area": "", "supplementary_material": "/attachment/ecdc2df7fd8943c98bd2aac425fceaa60d7dc684.pdf", "author": "Hoang Pham;The-Anh Ta;Shiwei Liu;Lichuan Xiang;Dung D. Le;Hongkai Wen;Long Tran-Thanh", "authorids": "~Hoang_Pham2;~The-Anh_Ta1;~Shiwei_Liu2;~Lichuan_Xiang1;~Dung_D._Le2;~Hongkai_Wen1;~Long_Tran-Thanh1", "gender": "M;;M;M;M;;", "homepage": "https://pvh1602.github.io/;;https://shiweiliuiiiiiii.github.io/;;https://andrew-dungle.github.io/;;https://warwick.ac.uk/fac/sci/dcs/people/long_tran-thanh/", "dblp": ";;234/8697-3.html;294/8850;186/1477;;46/8333", "google_scholar": "aIsjeywAAAAJ;;73IbXtsAAAAJ;;https://scholar.google.com/citations?hl=en;;https://scholar.google.co.uk/citations?user=YBQai3gAAAAJ", "orcid": "0009-0004-2501-8271;;;;;;", "linkedin": ";;;lichuan-xiang-17ab43101/;;;", "or_profile": "~Hoang_Pham2;~The-Anh_Ta1;~Shiwei_Liu2;~Lichuan_Xiang1;~Dung_D._Le2;~Hongkai_Wen1;~Long_Tran-Thanh1", "aff": "FPT Software AI Center;;University of Texas at Austin;The university of Warwick;VinUniversity;;", "aff_domain": "fsoft.com.vn;;utexas.edu;warwick.ac.uk;vinuni.edu.vn;;", "position": "Researcher;;Postdoc;PhD student;Assistant Professor;;", "bibtex": "@inproceedings{\npham2023towards,\ntitle={Towards Data-Agnostic Pruning At Initialization: What Makes a Good Sparse Mask?},\nauthor={Hoang Pham and The-Anh Ta and Shiwei Liu and Lichuan Xiang and Dung D. Le and Hongkai Wen and Long Tran-Thanh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xdOoCWCYaY}\n}", "github": "", "project": "", "reviewers": "a28p;ygEG;HuF1;qyia;8roE", "pdf_size": 1864973, "rating": "5;6;6;6;6", "confidence": "4;4;4;3;5", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;2;3;4;4", "wc_summary": "67;209;74;107;106", "wc_strengths": "71;37;119;95;107", "wc_weaknesses": "519;179;240;125;409", "wc_questions": "6;296;40;48;94", "wc_limitations": "12;87;34;7;20", "wc_review": "675;808;507;382;736", "wc_reply_reviewers": "371;425;429;129;46", "wc_reply_authors": "1270;22;1083;21;26", "reply_reviewers": "3;1;3;1;1", "reply_authors": "4;2;4;2;2", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 112.6, 50.8668851021959 ], "wc_strengths_avg": [ 85.8, 29.109448637856403 ], "wc_weaknesses_avg": [ 294.4, 147.3439513519303 ], "wc_questions_avg": [ 96.8, 103.48023965956011 ], "wc_limitations_avg": [ 32.0, 28.97585201508318 ], "wc_review_avg": [ 621.6, 155.70818860933423 ], "wc_reply_reviewers_avg": [ 280.0, 160.6636237609497 ], "wc_reply_authors_avg": [ 484.4, 568.1853922796678 ], "reply_reviewers_avg": [ 1.8, 0.9797958971132713 ], "reply_authors_avg": [ 2.8, 0.9797958971132712 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2087539445770844493&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "fsoft.com.vn;;utexas.edu;warwick.ac.uk;vinuni.edu.vn;;", "author_num": 7, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "FPT Software;University of Texas at Austin;University of Warwick;VinUniversity", "aff_unique_dep": "AI Center;;;", "aff_unique_url": "https://www.fpt-software.com;https://www.utexas.edu;https://warwick.ac.uk;https://vinuni.edu.vn", "aff_unique_abbr": ";UT Austin;Warwick;VinUni", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Vietnam;United States;United Kingdom" }, { "title": "Efficient Symbolic Policy Learning with Differentiable Symbolic Expression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69994", "id": "xdQpmUPNHC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7207ffb9888068c0ee13ae3be023cada-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xdQpmUPNHC", "openreview": "https://openreview.net/forum?id=xdQpmUPNHC", "poster": "/media/PosterPDFs/NeurIPS%202023/69994.png?t=1700194962.8507557", "slides": "https://nips.cc/virtual/2023/poster/69994", "video": "https://nips.cc/virtual/2023/poster/69994", "author_site": "Jiaming Guo, Rui Zhang, Shaohui Peng, Qi Yi, Xing Hu, Ruizhi Chen, Zidong Du, xishan zhang, Ling Li, Qi Guo, Yunji Chen", "tldr": "", "abstract": "Deep reinforcement learning (DRL) has led to a wide range of advances in sequential decision-making tasks. However, the complexity of neural network policies makes it difficult to understand and deploy with limited computational resources. Currently, employing compact symbolic expressions as symbolic policies is a promising strategy to obtain simple and interpretable policies. Previous symbolic policy methods usually involve complex training processes and pre-trained neural network policies, which are inefficient and limit the application of symbolic policies. In this paper, we propose an efficient gradient-based learning method named Efficient Symbolic Policy Learning (ESPL) that learns the symbolic policy from scratch in an end-to-end way. We introduce a symbolic network as the search space and employ a path selector to find the compact symbolic policy. By doing so we represent the policy with a differentiable symbolic expression and train it in an off-policy manner which further improves the efficiency. In addition, in contrast with previous symbolic policies which only work in single-task RL because of complexity, we expand ESPL on meta-RL to generate symbolic policies for unseen tasks. Experimentally, we show that our approach generates symbolic policies with higher performance and greatly improves data efficiency for single-task RL. In meta-RL, we demonstrate that compared with neural network policies the proposed symbolic policy achieves higher performance and efficiency and shows the potential to be interpretable.", "keywords": "reinforcement learning;context variables;symbolic policy", "primary_area": "", "supplementary_material": "/attachment/278172ffce108156ff1bea49270d5935d78a19b8.zip", "author": "Jiaming Guo;Rui Zhang;Shaohui Peng;Qi Yi;Xing Hu;Ruizhi Chen;Zidong Du;Xishan Zhang;Ling Li;Qi Guo;Yunji Chen", "authorids": "~Jiaming_Guo2;~Rui_Zhang1;~Shaohui_Peng2;~Qi_Yi1;~Xing_Hu3;~Ruizhi_Chen3;~Zidong_Du1;~Xishan_Zhang1;~Ling_Li6;~Qi_Guo4;~Yunji_Chen1", "gender": "M;F;M;F;M;;;F;M;M;M", "homepage": ";;;;;https://zidongdu.github.io/;;;http://novel.ict.ac.cn/qguo;;", "dblp": "63/8512;60/2536-40;295/8813;49/10052-1;120/4143;44/11216;133/6391;92/5001-1;67/398-1;48/474;246/8768", "google_scholar": ";dse6jAsAAAAJ;veu6_ykAAAAJ;Hc3iRxUAAAAJ;x_wFaYgAAAAJ;https://scholar.google.com.sg/citations?user=8N9ym9YAAAAJ;;;;;", "orcid": ";;;;0000-0001-7219-4658;0000-0002-7603-4210;;0000-0001-8877-9052;;;", "linkedin": ";;;;;;;;;;", "or_profile": "~Jiaming_Guo2;~Rui_Zhang1;~Qi_Yi1;~Xing_Hu3;~Ruizhi_Chen3;~Zidong_Du1;~Xishan_Zhang1;~Ling_Li6;~Qi_Guo4;~Yunji_Chen1;~shaohui_peng1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, CAS;University of Science and Technology of China;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Software Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;, Cambricon Techonologies;Institute of Software, CAS;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;ustc.edu.cn;ict.ac.cn;iscas.ac.cn;ict.ac.cn;cambricon.com;iscas.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "PhD student;Assistant Professor;PhD student;Associate Professor;Assistant Professor;Full Professor;Researcher;Full Professor;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nguo2023efficient,\ntitle={Efficient Symbolic Policy Learning with Differentiable Symbolic Expression},\nauthor={Jiaming Guo and Rui Zhang and Shaohui Peng and Qi Yi and Xing Hu and Ruizhi Chen and Zidong Du and Xishan Zhang and Ling Li and Qi Guo and Yunji Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xdQpmUPNHC}\n}", "github": "", "project": "", "reviewers": "9VjU;uiuA;PhEA;iXeo", "pdf_size": 4161912, "rating": "5;5;6;7", "confidence": "5;4;3;4", "soundness": "3;3;2;3", "novelty": "3;2;2;4", "presentation": "3;2;3;4", "wc_summary": "38;69;28;118", "wc_strengths": "36;19;60;88", "wc_weaknesses": "142;108;227;113", "wc_questions": "13;236;33;36", "wc_limitations": "2;1;1;24", "wc_review": "231;433;349;379", "wc_reply_reviewers": "778;143;311;14", "wc_reply_authors": "1910;418;894;0", "reply_reviewers": "4;1;3;1", "reply_authors": "6;2;4;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 63.25, 35.03837182290296 ], "wc_strengths_avg": [ 50.75, 25.974747351995553 ], "wc_weaknesses_avg": [ 147.5, 47.699580710945455 ], "wc_questions_avg": [ 79.5, 90.78683825313006 ], "wc_limitations_avg": [ 7.0, 9.82344135219425 ], "wc_review_avg": [ 348.0, 73.95268757793728 ], "wc_reply_reviewers_avg": [ 311.5, 289.1889520711329 ], "wc_reply_authors_avg": [ 805.5, 711.8179191338189 ], "reply_reviewers_avg": [ 2.25, 1.299038105676658 ], "reply_authors_avg": [ 3.25, 1.920286436967152 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18012156901867991648&as_sdt=1005&sciodt=0,4&hl=en", "gs_version_total": 5, "email": "ict.ac.cn;ict.ac.cn;ustc.edu.cn;ict.ac.cn;iscas.ac.cn;ict.ac.cn;cambricon.com;iscas.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "author_num": 11, "aff_unique_index": "0;0;1;0;0;0;2;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Science and Technology of China;Cambricon Technologies", "aff_unique_dep": "Institute of Computing Technology;;", "aff_unique_url": "http://www.ict.ac.cn;http://www.ustc.edu.cn;https://www.cambricon.com", "aff_unique_abbr": "CAS;USTC;Cambricon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "xdtBFMAPD2", "title": "Explanation Shift: How Did Distribution Shift Impact the Model?", "track": "main", "status": "Reject", "tldr": "", "abstract": "The performance of machine learning models on new data is critical for their success in real-world applications. However, the model's performance may deteriorate if the new data is sampled from a different distribution than the training data. Current methods to detect shifts in the input or output data distributions have limitations in identifying model behavior changes. In this paper, we define \\emph{explanation shift} as the statistical comparison between how predictions from training data are explained and how predictions on new data are explained. We propose explanation shift as a key indicator to investigate the interaction between distribution shifts and learned models. We introduce an Explanation Shift Detector that operates on the explanation distributions, providing more sensitive and explainable changes in interactions between distribution shifts and learned models. We compare explanation shifts with other methods based on distribution shifts, showing that monitoring for explanation shifts results in more sensitive indicators for varying model behavior. We provide theoretical and experimental evidence and demonstrate the effectiveness of our approach on synthetic and real data. Additionally, we release an open-source Python package, skshift, which implements our method and provides usage tutorials for further reproducibility.", "keywords": "Model Monitoring;Distribution Shift;Explainable AI", "primary_area": "", "supplementary_material": "/attachment/34f9bf8dff5c9f5a4a903e8a32bffbd85003aade.zip", "author": "Carlos Mougan;Klaus Broelemann;David Masip;Gjergji Kasneci;Thanassis Tiropanis;Steffen Staab", "authorids": "~Carlos_Mougan1;~Klaus_Broelemann1;~David_Masip4;~Gjergji_Kasneci2;~Thanassis_Tiropanis1;~Steffen_Staab2", "gender": "M;;M;M;;M", "homepage": "https://cmougan.eu;;http://github.com/david26694/;https://www.gov.sot.tum.de/rds/prof-dr-gjergji-kasneci/;https://www.southampton.ac.uk/people/5x5rrv/professor-thanassis-tiropanis;https://www.ki.uni-stuttgart.de/de/institut/team/Staab-00004/", "dblp": "293/7915;00/7271.html;;69/3216;64/6175;s/SteffenStaab", "google_scholar": "dQ5WrokAAAAJ;;;Zbc8GK4AAAAJ;T0MK3pUAAAAJ;https://scholar.google.com/citations?hl=de", "orcid": ";;;0000-0002-3123-7268;0000-0002-6195-2852;0000-0002-0780-4154", "linkedin": "carlosmougan/;;;;tiropanis/;", "or_profile": "~Carlos_Mougan1;~Klaus_Broelemann1;~David_Masip4;~Gjergji_Kasneci2;~Thanassis_Tiropanis1;~Steffen_Staab2", "aff": "University of Southampton;SCHUFA;;University of Tuebingen;University of Southampton;University of Southampton", "aff_domain": "soton.ac.uk;schufa.de;;uni-tuebingen.de;ecs.soton.ac.uk;soton.ac.uk", "position": "PhD student;Principal Researcher;;Professor;Full Professor;Full Professor", "bibtex": "@misc{\nmougan2023explanation,\ntitle={Explanation Shift: How Did Distribution Shift Impact the Model?},\nauthor={Carlos Mougan and Klaus Broelemann and David Masip and Gjergji Kasneci and Thanassis Tiropanis and Steffen Staab},\nyear={2023},\nurl={https://openreview.net/forum?id=xdtBFMAPD2}\n}", "github": "", "project": "", "reviewers": "jhhk;dpht;YKxz;y637", "site": "https://openreview.net/forum?id=xdtBFMAPD2", "pdf_size": 958660, "rating": "3;5;5;7", "confidence": "4;3;3;3", "soundness": "2;2;2;4", "novelty": "2;2;3;3", "presentation": "2;2;2;4", "wc_summary": "38;74;85;78", "wc_strengths": "58;53;32;46", "wc_weaknesses": "252;211;254;25", "wc_questions": "337;60;3;219", "wc_limitations": "16;2;1;1", "wc_review": "701;400;375;369", "wc_reply_reviewers": "260;78;0;27", "wc_reply_authors": "432;16;0;0", "reply_reviewers": "1;2;0;1", "reply_authors": "4;2;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 68.75, 18.18481509391833 ], "wc_strengths_avg": [ 47.25, 9.781998773256925 ], "wc_weaknesses_avg": [ 185.5, 94.24038412485382 ], "wc_questions_avg": [ 154.75, 131.6707541559628 ], "wc_limitations_avg": [ 5.0, 6.363961030678928 ], "wc_review_avg": [ 461.25, 138.9071182481301 ], "wc_reply_reviewers_avg": [ 91.25, 101.37399814548107 ], "wc_reply_authors_avg": [ 112.0, 184.8675201326615 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9342819475098152131&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Southampton;SCHUFA Holding AG;University of Tuebingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.southampton.ac.uk;https://www.schufa.de;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Southampton;SCHUFA;Uni T\u00fcbingen", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "WordScape: a Pipeline to extract multilingual, visually rich Documents with Layout Annotations from Web Crawl Data", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73418", "id": "xewwYquInO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/52c1ce1a0eaf61e8b6e3a899c1b9c61f-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=xewwYquInO", "openreview": "https://openreview.net/forum?id=xewwYquInO", "poster": "/media/PosterPDFs/NeurIPS%202023/73418.png?t=1702354371.0091507", "slides": "https://nips.cc/virtual/2023/poster/73418", "video": "https://nips.cc/virtual/2023/poster/73418", "author_site": "Maurice Weber, Carlo Siebenschuh, Rory Butler, Anton Alexandrov, Valdemar Thanner, Georgios Tsolakis, Haris Jabbar, Ian Foster, Bo Li, Rick Stevens, Ce Zhang", "tldr": "", "abstract": "We introduce WordScape, a novel pipeline for the creation of cross-disciplinary, multilingual corpora comprising millions of pages with annotations for document layout detection. Relating visual and textual items on document pages has gained further significance with the advent of multimodal models. Various approaches proved effective for visual question answering or layout segmentation. However, the interplay of text, tables, and visuals remains challenging for a variety of document understanding tasks. In particular, many models fail to generalize well to diverse domains and new languages due to insufficient availability of training data. WordScape addresses these limitations. Our automatic annotation pipeline parses the Open XML structure of Word documents obtained from the web, jointly providing layout-annotated document images and their textual representations. In turn, WordScape offers unique properties as it (1) leverages the ubiquity of the Word file format on the internet, (2) is readily accessible through the Common Crawl web corpus, (3) is adaptive to domain-specific documents, and (4) offers culturally and linguistically diverse document pages with natural semantic structure and high-quality text. Together with the pipeline, we will additionally release 9.5M urls to word documents which can be processed using WordScape to create a dataset of over 40M pages. Finally, we investigate the quality of text and layout annotations extracted by WordScape, assess the impact on document understanding benchmarks, and demonstrate that manual labeling costs can be substantially reduced.", "keywords": "Web Data;Common Crawl;Document Processing;Layout Detection", "primary_area": "", "supplementary_material": "", "author": "Maurice Weber;Carlo Siebenschuh;Rory Marshall Butler;Anton Alexandrov;Valdemar Ragnar Thanner;Georgios Tsolakis;Haris Jabbar;Ian Foster;Bo Li;Rick Stevens;Ce Zhang", "authorids": "~Maurice_Weber1;~Carlo_Siebenschuh1;~Rory_Marshall_Butler1;~Anton_Alexandrov1;~Valdemar_Ragnar_Thanner1;~Georgios_Tsolakis1;~Haris_Jabbar1;~Ian_Foster2;~Bo_Li19;~Rick_Stevens1;~Ce_Zhang1", "gender": ";M;Not Specified;M;M;M;M;M;F;M;", "homepage": ";;;;https://programmwerk.ch;https://www.linkedin.com/in/george-tsolakis-80ab27254;;;http://boli.cs.illinois.edu/;https://cs.uchicago.edu/directory/rick-stevens/;", "dblp": ";;;;;;;f/IanTFoster;50/3402-26;s/RickLStevens;97/919", "google_scholar": ";https://scholar.google.com/citations?hl=en;;;;;8H_RIcEAAAAJ;VGoSakQAAAAJ;K8vJkTcAAAAJ;https://scholar.google.com.tw/citations?user=2oSSsLYAAAAJ;", "orcid": ";;0000-0003-1667-1709;;;;;0000-0003-2129-5269;;;", "linkedin": ";;rory-butler-68444a113/;anton-alexandrov-4624a2182/?originalSubdomain=bg;valdemarthanner/;;harisjabbar/;ianfoster/;;;", "or_profile": "~Maurice_Weber1;~Carlo_Siebenschuh1;~Rory_Marshall_Butler1;~Anton_Alexandrov1;~Valdemar_Ragnar_Thanner1;~Georgios_Tsolakis1;~Haris_Jabbar1;~Ian_Foster2;~Bo_Li19;~Rick_Stevens1;~Ce_Zhang1", "aff": ";The University of Chicago;University of Chicago;ETHZ - ETH Zurich;ETHZ - ETH Zurich;Department of Computer Science, ETHZ - ETH Zurich;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Argonne National Laboratory;University of Illinois, Urbana Champaign;University of Chicago;University of Chicago", "aff_domain": ";uchicago.edu;uchicago.edu;ethz.ch;ethz.ch;inf.ethz.ch;lmu.de;anl.gov;illinois.edu;uchicago.edu;uchicago.edu", "position": ";PhD student;PhD student;MS student;MS student;MS student;PhD student;Principal Researcher;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nweber2023wordscape,\ntitle={WordScape: a Pipeline to extract multilingual, visually rich Documents with Layout Annotations from Web Crawl Data},\nauthor={Maurice Weber and Carlo Siebenschuh and Rory Marshall Butler and Anton Alexandrov and Valdemar Ragnar Thanner and Georgios Tsolakis and Haris Jabbar and Ian Foster and Bo Li and Rick Stevens and Ce Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=xewwYquInO}\n}", "github": "", "project": "", "reviewers": "knqo;t2Lc;fwpL;qD1P;nyYp", "pdf_size": 5600405, "rating": "6;6;6;6;8", "confidence": "2;4;5;3;4", "wc_summary_and_contributions": "82;105;99;60;69", "wc_strengths": "86;113;82;191;54", "wc_improvement": "173;342;75;307;38", "wc_limitations": "40;24;262;27;14", "wc_correctness": "47;8;18;78;3", "wc_clarity": "19;14;9;5;5", "wc_relation_to_prior_work": "8;15;12;10;5", "wc_documentation": "30;47;56;53;5", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "486;669;614;732;194", "wc_reply_reviewers": "74;84;67;0;0", "wc_reply_authors": "352;780;754;704;429", "reply_reviewers": "1;1;1;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "wc_summary_and_contributions_avg": [ 83.0, 17.123083834403193 ], "wc_strengths_avg": [ 105.2, 46.79914529134053 ], "wc_improvement_avg": [ 187.0, 121.13298477293458 ], "wc_limitations_avg": [ 73.4, 94.66488261229715 ], "wc_correctness_avg": [ 30.8, 28.095551249263647 ], "wc_clarity_avg": [ 10.4, 5.4258639865002145 ], "wc_relation_to_prior_work_avg": [ 10.0, 3.40587727318528 ], "wc_documentation_avg": [ 38.2, 18.882796403075474 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 539.0, 190.58226570171738 ], "wc_reply_reviewers_avg": [ 45.0, 37.13758204299251 ], "wc_reply_authors_avg": [ 603.8, 177.5414317842458 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.196116135138184, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=592450655735891337&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 8, "email": ";uchicago.edu;uchicago.edu;ethz.ch;ethz.ch;inf.ethz.ch;lmu.de;anl.gov;illinois.edu;uchicago.edu;uchicago.edu", "author_num": 11, "aff_unique_index": "0;0;1;1;1;2;3;4;0;0", "aff_unique_norm": "University of Chicago;ETH Zurich;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Argonne National Laboratory;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.uchicago.edu;https://www.ethz.ch;https://www.lmu.de;https://www.anl.gov;https://illinois.edu", "aff_unique_abbr": "UChicago;ETHZ;LMU;ANL;UIUC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Zurich;Urbana-Champaign", "aff_country_unique_index": "0;0;1;1;1;2;0;0;0;0", "aff_country_unique": "United States;Switzerland;Germany" }, { "title": "Physics-Informed Bayesian Optimization of Variational Quantum Circuits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69993", "id": "xfBeVGJwyL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3adb85a348a18cdd74ce99fbbab20301-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xfBeVGJwyL", "openreview": "https://openreview.net/forum?id=xfBeVGJwyL", "poster": "/media/PosterPDFs/NeurIPS%202023/69993.png?t=1701679065.4654486", "slides": "https://nips.cc/virtual/2023/poster/69993", "video": "https://nips.cc/virtual/2023/poster/69993", "author_site": "Kim Nicoli, Christopher J. Anders, Lena Funcke, Tobias Hartung, Karl Jansen, Stefan K\u00fchn, Klaus-Robert M\u00fcller, Paolo Stornati, Pan Kessel, Shinichi Nakajima", "tldr": "", "abstract": "In this paper, we propose a novel and powerful method to harness Bayesian optimization for variational quantum eigensolvers (VQEs) - a hybrid quantum-classical protocol used to approximate the ground state of a quantum Hamiltonian. Specifically, we derive a *VQE-kernel* which incorporates important prior information about quantum circuits: the kernel feature map of the VQE-kernel exactly matches the known functional form of the VQE's objective function and thereby significantly reduces the posterior uncertainty.\nMoreover, we propose a novel acquisition function for Bayesian optimization called \\emph{Expected Maximum Improvement over Confident Regions} (EMICoRe) which can actively exploit the inductive bias of the VQE-kernel by treating regions with low predictive uncertainty as indirectly \"observed\". As a result, observations at as few as three points in the search domain are sufficient to determine the complete objective function along an entire one-dimensional subspace of the optimization landscape. \nOur numerical experiments demonstrate that our approach improves over state-of-the-art baselines.", "keywords": "Bayesian optimization;Expected improvement;Quantum computing;Variational Quantum Eigensolvers", "primary_area": "", "supplementary_material": "/attachment/c360cd97738235a201638bcf153dc7ce3ce52de5.pdf", "author": "Kim Andrea Nicoli;Christopher J. Anders;Lena Funcke;Tobias Hartung;Karl Jansen;Stefan Kuhn;Klaus Robert Muller;Paolo Stornati;Pan Kessel;Shinichi Nakajima", "authorids": "~Kim_Andrea_Nicoli1;~Christopher_J._Anders1;lfuncke@uni-bonn.de;tobias.hartung@nulondon.ac.uk;karl.jansen@desy.de;stefan.kuehn@desy.de;~Klaus_Robert_Muller1;paolo.stornati@desy.de;~Pan_Kessel1;~Shinichi_Nakajima2", "gender": "M;;;;;;M;;M;M", "homepage": ";https://cjanders.de;;;;;https://www.ml.tu-berlin.de/menue/members/klaus-robert_mueller/;;https://www.gene.com/scientists/our-scientists/pan-kessel;https://web.ml.tu-berlin.de/author/dr.-shinichi-nakajima/", "dblp": "238/0997;243/2919;;;;;m/KRMuller.html;;238/1381;97/6115.html", "google_scholar": "0GzYud8AAAAJ;https://scholar.google.de/citations?user=9SIAzH4AAAAJ;;;;;https://scholar.google.de/citations?hl=de;;uODjwl8AAAAJ;hXSvID4AAAAJ", "orcid": "0000-0001-5933-1822;0000-0003-3295-8486;;;;;0000-0002-3861-7685;;;0000-0003-3970-4569", "linkedin": ";;;;;;;;;", "or_profile": "~Kim_Andrea_Nicoli1;~Christopher_J._Anders1;lfuncke@uni-bonn.de;tobias.hartung@nulondon.ac.uk;karl.jansen@desy.de;stefan.kuehn@desy.de;~Klaus_Robert_Muller1;paolo.stornati@desy.de;~Pan_Kessel1;~Shinichi_Nakajima2", "aff": "Technische Universit\u00e4t Berlin;Technische Universit\u00e4t Berlin;;;;;TU Berlin;;TU Berlin;BIFOLD, TU Berlin", "aff_domain": "tu-berlin.de;tu-berlin.de;;;;;tu-berlin.de;;tu-berlin.de;tu-berlin.de", "position": "PhD student;PhD student;;;;;Full Professor;;Postdoc;Postdoc", "bibtex": "@inproceedings{\nnicoli2023physicsinformed,\ntitle={Physics-Informed Bayesian Optimization of Variational Quantum Circuits},\nauthor={Kim Andrea Nicoli and Christopher J. Anders and Lena Funcke and Tobias Hartung and Karl Jansen and Stefan Kuhn and Klaus Robert Muller and Paolo Stornati and Pan Kessel and Shinichi Nakajima},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xfBeVGJwyL}\n}", "github": "", "project": "", "reviewers": "1i6G;nt6b;93Tb;WANE", "pdf_size": 5099671, "rating": "5;6;6;7", "confidence": "4;4;3;3", "soundness": "2;1;3;4", "novelty": "3;2;2;3", "presentation": "2;3;4;4", "wc_summary": "33;132;159;110", "wc_strengths": "26;67;77;45", "wc_weaknesses": "213;228;80;38", "wc_questions": "40;89;128;93", "wc_limitations": "16;14;17;4", "wc_review": "328;530;461;290", "wc_reply_reviewers": "36;12;0;24", "wc_reply_authors": "193;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 108.5, 46.917480750782005 ], "wc_strengths_avg": [ 53.75, 19.76581645164196 ], "wc_weaknesses_avg": [ 139.75, 82.27507216648308 ], "wc_questions_avg": [ 87.5, 31.34086788842964 ], "wc_limitations_avg": [ 12.75, 5.165994579942956 ], "wc_review_avg": [ 402.25, 97.32002620221596 ], "wc_reply_reviewers_avg": [ 18.0, 13.416407864998739 ], "wc_reply_authors_avg": [ 48.25, 83.57145146519834 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8505455617520802799&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "tu-berlin.de;tu-berlin.de;;;;;tu-berlin.de;;tu-berlin.de;tu-berlin.de", "author_num": 10, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Berlin", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-berlin.de", "aff_unique_abbr": "TU Berlin", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Berlin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Efficient Bayesian Learning Curve Extrapolation using Prior-Data Fitted Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69992", "id": "xgTV6rmH6n", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3f1a5e8bfcc3005724d246abe454c1e5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xgTV6rmH6n", "openreview": "https://openreview.net/forum?id=xgTV6rmH6n", "poster": "/media/PosterPDFs/NeurIPS%202023/69992.png?t=1702037476.880074", "slides": "https://nips.cc/virtual/2023/poster/69992", "video": "https://nips.cc/virtual/2023/poster/69992", "author_site": "Steven Adriaensen, Herilalaina Rakotoarison, Samuel M\u00fcller, Frank Hutter", "tldr": "", "abstract": "Learning curve extrapolation aims to predict model performance in later epochs of training, based on the performance in earlier epochs.\nIn this work, we argue that, while the inherent uncertainty in the extrapolation of learning curves warrants a Bayesian approach, existing methods are (i) overly restrictive, and/or (ii) computationally expensive. We describe the first application of prior-data fitted neural networks (PFNs) in this context. A PFN is a transformer, pre-trained on data generated from a prior, to perform approximate Bayesian inference in a single forward pass. We propose LC-PFN, a PFN trained to extrapolate 10 million artificial right-censored learning curves generated from a parametric prior proposed in prior art using MCMC. We demonstrate that LC-PFN can approximate the posterior predictive distribution more accurately than MCMC, while being over 10 000 times faster. We also show that the same LC-PFN achieves competitive performance extrapolating a total of 20 000 real learning curves from four learning curve benchmarks (LCBench, NAS-Bench-201, Taskset, and PD1) that stem from training a wide range of model architectures (MLPs, CNNs, RNNs, and Transformers) on 53 different datasets with varying input modalities (tabular, image, text, and protein data). Finally, we investigate its potential in the context of model selection and find that a simple LC-PFN based predictive early stopping criterion obtains 2 - 6x speed-ups on 45 of these datasets, at virtually no overhead.", "keywords": "learning curve extrapolation;prior-data fitted networks;transformers;Bayesian inference;uncertainty estimation;model selection", "primary_area": "", "supplementary_material": "/attachment/b33e023385f310c82935315f14dabcf3f6223792.pdf", "author": "Steven Adriaensen;Herilalaina Rakotoarison;Samuel M\u00fcller;Frank Hutter", "authorids": "~Steven_Adriaensen1;~Herilalaina_Rakotoarison1;~Samuel_M\u00fcller1;~Frank_Hutter1", "gender": "M;M;;M", "homepage": ";https://scholar.google.fr/citations?user=pyws4AQAAAAJ&hl=en;https://uncoolis.cool;http://ml.informatik.uni-freiburg.de/~hutter/", "dblp": "148/1033;242/7961;284/9655;89/5383", "google_scholar": ";https://scholar.google.fr/citations?user=pyws4AQAAAAJ;pevYEjAAAAAJ;https://scholar.google.de/citations?user=YUrxwrkAAAAJ", "orcid": ";;;0000-0002-2037-3694", "linkedin": ";;;frank-hutter-9190b24b/", "or_profile": "~Steven_Adriaensen1;~Herilalaina_Rakotoarison1;~Samuel_M\u00fcller1;~Frank_Hutter1", "aff": "Universit\u00e4t Freiburg;University of Freiburg;University of Freiburg, Universit\u00e4t Freiburg;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_domain": "uni-freiburg.de;cs.uni-freiburg.de;cs.uni-freiburg.de;uni-freiburg.de", "position": "Postdoc;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nadriaensen2023efficient,\ntitle={Efficient Bayesian Learning Curve Extrapolation using Prior-Data Fitted Networks},\nauthor={Steven Adriaensen and Herilalaina Rakotoarison and Samuel M{\\\"u}ller and Frank Hutter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xgTV6rmH6n}\n}", "github": "", "project": "", "reviewers": "7zKX;tPbz;3hos;KctK", "pdf_size": 17713485, "rating": "5;6;7;7", "confidence": "3;2;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "98;24;89;53", "wc_strengths": "54;48;196;58", "wc_weaknesses": "78;106;183;98", "wc_questions": "47;45;36;120", "wc_limitations": "1;1;26;12", "wc_review": "278;224;530;341", "wc_reply_reviewers": "0;10;13;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 66.0, 29.52117883825102 ], "wc_strengths_avg": [ 89.0, 61.87891401761993 ], "wc_weaknesses_avg": [ 116.25, 39.86461463503692 ], "wc_questions_avg": [ 62.0, 33.741665637605976 ], "wc_limitations_avg": [ 10.0, 10.27131929208707 ], "wc_review_avg": [ 343.25, 115.49756490939538 ], "wc_reply_reviewers_avg": [ 5.75, 5.84700778176325 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17890887480753301222&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "uni-freiburg.de;cs.uni-freiburg.de;cs.uni-freiburg.de;uni-freiburg.de", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Freiburg;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-freiburg.de;https://www.uni-freiburg.de", "aff_unique_abbr": "Uni Freiburg;Albert-Ludwigs-Universit\u00e4t", "aff_campus_unique_index": "1", "aff_campus_unique": ";Freiburg", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Learning a Neuron by a Shallow ReLU Network: Dynamics and Implicit Bias for Correlated Inputs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69991", "id": "xgY4QcOiEZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4af24e6ce753c181e703f3f0be3b5e20-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xgY4QcOiEZ", "openreview": "https://openreview.net/forum?id=xgY4QcOiEZ", "poster": "/media/PosterPDFs/NeurIPS%202023/69991.png?t=1699530943.4710963", "slides": "https://nips.cc/virtual/2023/poster/69991", "video": "https://nips.cc/virtual/2023/poster/69991", "author_site": "Dmitry Chistikov, Matthias Englert, Ranko Lazic", "tldr": "", "abstract": "We prove that, for the fundamental regression task of learning a single neuron, training a one-hidden layer ReLU network of any width by gradient flow from a small initialisation converges to zero loss and is implicitly biased to minimise the rank of network parameters. By assuming that the training points are correlated with the teacher neuron, we complement previous work that considered orthogonal datasets. Our results are based on a detailed non-asymptotic analysis of the dynamics of each hidden neuron throughout the training. We also show and characterise a surprising distinction in this setting between interpolator networks of minimal rank and those of minimal Euclidean norm. Finally we perform a range of numerical experiments, which corroborate our theoretical findings.", "keywords": "implicit bias;implicit regularization;training dynamics;ReLU networks;gradient flow;theoretical analysis", "primary_area": "", "supplementary_material": "/attachment/743420717de3c8dd9cc23e45c19a0101134596c4.pdf", "author": "Dmitry Chistikov;Matthias Englert;Ranko Lazic", "authorids": "d.chistikov@warwick.ac.uk;~Matthias_Englert1;~Ranko_Lazic1", "gender": ";;M", "homepage": ";;https://warwick.ac.uk/fac/sci/dcs/people/ranko_lazic", "dblp": ";;l/RankoLazic", "google_scholar": ";;https://scholar.google.co.uk/citations?user=yGOk7boAAAAJ", "orcid": ";;0000-0003-3663-5182", "linkedin": ";;", "or_profile": "d.chistikov@warwick.ac.uk;~Matthias_Englert1;~Ranko_Lazic1", "aff": ";;University of Warwick", "aff_domain": ";;warwick.ac.uk", "position": ";;Full Professor", "bibtex": "@inproceedings{\nchistikov2023learning,\ntitle={Learning a Neuron by a Shallow Re{LU} Network: Dynamics and Implicit Bias for Correlated Inputs},\nauthor={Dmitry Chistikov and Matthias Englert and Ranko Lazic},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xgY4QcOiEZ}\n}", "github": "", "project": "", "reviewers": "ytEj;PDpC;6KPF;JZbY", "pdf_size": 403186, "rating": "6;6;6;7", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;4;3", "wc_summary": "67;108;76;116", "wc_strengths": "108;83;62;74", "wc_weaknesses": "312;155;76;93", "wc_questions": "78;41;46;4", "wc_limitations": "27;14;42;5", "wc_review": "592;401;302;292", "wc_reply_reviewers": "52;10;0;4", "wc_reply_authors": "47;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 91.75, 20.69269194667528 ], "wc_strengths_avg": [ 81.75, 16.887495373796554 ], "wc_weaknesses_avg": [ 159.0, 93.09940923550482 ], "wc_questions_avg": [ 42.25, 26.252380844411046 ], "wc_limitations_avg": [ 22.0, 13.946325680981353 ], "wc_review_avg": [ 396.75, 120.51011368345812 ], "wc_reply_reviewers_avg": [ 16.5, 20.80264406271472 ], "wc_reply_authors_avg": [ 11.75, 20.351596988934308 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15218386072560011867&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": ";;warwick.ac.uk", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Warwick", "aff_unique_dep": "", "aff_unique_url": "https://www.warwick.ac.uk", "aff_unique_abbr": "Warwick", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "Asymptotics of Bayesian Uncertainty Estimation in Random Features Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69990", "id": "xgzkuTGBTx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7e16384b94a1c7e4462a70bb8fb93ca9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xgzkuTGBTx", "openreview": "https://openreview.net/forum?id=xgzkuTGBTx", "poster": "/media/PosterPDFs/NeurIPS%202023/69990.png?t=1701880183.7060225", "slides": "https://nips.cc/virtual/2023/poster/69990", "video": "https://nips.cc/virtual/2023/poster/69990", "author_site": "Youngsoo Baek, Samuel Berchuck, Sayan Mukherjee", "tldr": "", "abstract": "In this paper we compare and contrast the behavior of the posterior predictive distribution to the risk of the \nthe maximum a posteriori estimator for the random features regression model in the overparameterized regime. We will focus on the variance of the posterior predictive distribution (Bayesian model average) and compare its asymptotics to that of the risk of the MAP estimator. In the regime where the model dimensions grow faster than any constant multiple of the number of samples, asymptotic agreement between these two quantities is governed by the phase transition in the signal-to-noise ratio. They also asymptotically agree with each other when the number of samples grow faster than any constant multiple of model dimensions. Numerical simulations illustrate finer distributional properties of the two quantities for finite dimensions. We conjecture they have Gaussian fluctuations and exhibit similar properties as found by previous authors in a Gaussian sequence model, this is of independent theoretical interest.", "keywords": "asymptotics;random features model;Bayesian inference", "primary_area": "", "supplementary_material": "/attachment/95f7a6cd3f69b922020811d805c781c54635ed39.zip", "author": "Youngsoo Baek;Samuel Berchuck;Sayan Mukherjee", "authorids": "~Youngsoo_Baek1;~Samuel_Berchuck1;~Sayan_Mukherjee1", "gender": "M;;M", "homepage": ";;", "dblp": ";;52/5375-4", "google_scholar": ";;", "orcid": "0000-0002-0143-2839;;", "linkedin": "youngsoo-baek-7930a594/;;", "or_profile": "~Youngsoo_Baek1;~Samuel_Berchuck1;~Sayan_Mukherjee1", "aff": "Duke University;;Max Planck Institute for Mathematics in the Sciences, Max-Planck Institute", "aff_domain": "duke.edu;;mis.mpg.de", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nbaek2023asymptotics,\ntitle={Asymptotics of Bayesian Uncertainty Estimation in Random Features Regression},\nauthor={Youngsoo Baek and Samuel Berchuck and Sayan Mukherjee},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xgzkuTGBTx}\n}", "github": "", "project": "", "reviewers": "Stco;gLVj;dJw1;KMbc", "pdf_size": 658110, "rating": "5;5;6;6", "confidence": "2;4;4;3", "soundness": "3;3;3;3", "novelty": "2;1;3;3", "presentation": "1;3;3;3", "wc_summary": "208;70;194;237", "wc_strengths": "37;89;152;67", "wc_weaknesses": "27;78;182;21", "wc_questions": "537;66;423;97", "wc_limitations": "15;9;18;1", "wc_review": "824;312;969;423", "wc_reply_reviewers": "50;43;68;0", "wc_reply_authors": "0;0;162;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 177.25, 63.83327893818396 ], "wc_strengths_avg": [ 86.25, 42.21004027479718 ], "wc_weaknesses_avg": [ 77.0, 64.54068484297328 ], "wc_questions_avg": [ 280.75, 203.58091143326774 ], "wc_limitations_avg": [ 10.75, 6.49519052838329 ], "wc_review_avg": [ 632.0, 272.26549542679845 ], "wc_reply_reviewers_avg": [ 40.25, 24.963723680572976 ], "wc_reply_authors_avg": [ 40.5, 70.14805770653953 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=870369052397424827&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "duke.edu;;mis.mpg.de", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Duke University;Max Planck Institute for Mathematics in the Sciences", "aff_unique_dep": ";Mathematics in the Sciences", "aff_unique_url": "https://www.duke.edu;https://www.mis.mpg.de", "aff_unique_abbr": "Duke;MPI MIS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Germany" }, { "title": "SituatedGen: Incorporating Geographical and Temporal Contexts into Generative Commonsense Reasoning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73417", "id": "xhbIud48JN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d4f2bc9885ecbe30f65031819ef8699f-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=xhbIud48JN", "openreview": "https://openreview.net/forum?id=xhbIud48JN", "poster": "/media/PosterPDFs/NeurIPS%202023/73417.png?t=1701750969.733854", "slides": "https://nips.cc/virtual/2023/poster/73417", "video": "https://nips.cc/virtual/2023/poster/73417", "author_site": "Yunxiang Zhang, Xiaojun Wan", "tldr": "", "abstract": "Recently, commonsense reasoning in text generation has attracted much attention. Generative commonsense reasoning is the task that requires machines, given a group of keywords, to compose a single coherent sentence with commonsense plausibility. While existing datasets targeting generative commonsense reasoning focus on everyday scenarios, it is unclear how well machines reason under specific geographical and temporal contexts. We formalize this challenging task as SituatedGen, where machines with commonsense should generate a pair of contrastive sentences given a group of keywords including geographical or temporal entities. We introduce a corresponding English dataset consisting of 8,268 contrastive sentence pairs, which are built upon several existing commonsense reasoning benchmarks with minimal manual labor. Experiments show that state-of-the-art generative language models struggle to generate sentences with commonsense plausibility and still lag far behind human performance. Our dataset is publicly available at https://github.com/yunx-z/situated_gen.", "keywords": "generative commonsense reasoning;generative language model", "primary_area": "", "supplementary_material": "/attachment/e0cc413f7714fae1c4a9b8949d495599a4b719f9.pdf", "author": "Yunxiang Zhang;Xiaojun Wan", "authorids": "~Yunxiang_Zhang2;~Xiaojun_Wan1", "gender": "M;M", "homepage": "https://yunx-z.github.io/;https://wanxiaojun.github.io", "dblp": "160/6176-2.html;07/1521", "google_scholar": "pbvWlJwAAAAJ;lTTeBdkAAAAJ", "orcid": ";", "linkedin": "%E4%BA%91%E7%BF%94-%E5%BC%A0-a97859196/;", "or_profile": "~Yunxiang_Zhang2;~Xiaojun_Wan1", "aff": "University of Michigan - Ann Arbor;Peking University", "aff_domain": "umich.edu;pku.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023situatedgen,\ntitle={SituatedGen: Incorporating Geographical and Temporal Contexts into Generative Commonsense Reasoning},\nauthor={Yunxiang Zhang and Xiaojun Wan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=xhbIud48JN}\n}", "github": "", "project": "", "reviewers": "63Tn;jj95;rfw9", "pdf_size": 541763, "rating": "6;7;7", "confidence": "3;3;3", "wc_summary_and_contributions": "76;273;46", "wc_strengths": "43;87;67", "wc_improvement": "154;92;59", "wc_limitations": "8;55;12", "wc_correctness": "17;102;29", "wc_clarity": "8;23;12", "wc_relation_to_prior_work": "6;19;40", "wc_documentation": "25;10;6", "wc_additional_feedback": "1;1;1", "wc_review": "338;662;272", "wc_reply_reviewers": "14;62;28", "wc_reply_authors": "199;131;84", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 131.66666666666666, 100.68542871515996 ], "wc_strengths_avg": [ 65.66666666666667, 17.987650084309387 ], "wc_improvement_avg": [ 101.66666666666667, 39.38132665222045 ], "wc_limitations_avg": [ 25.0, 21.275964529643932 ], "wc_correctness_avg": [ 49.333333333333336, 37.56180092712394 ], "wc_clarity_avg": [ 14.333333333333334, 6.342099196813483 ], "wc_relation_to_prior_work_avg": [ 21.666666666666668, 14.007934259633798 ], "wc_documentation_avg": [ 13.666666666666666, 8.178562764256865 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 424.0, 170.43473824311755 ], "wc_reply_reviewers_avg": [ 34.666666666666664, 20.154955277107963 ], "wc_reply_authors_avg": [ 138.0, 47.20875624994442 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10533755358845509544&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "umich.edu;pku.edu.cn", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Michigan;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.umich.edu;http://www.pku.edu.cn", "aff_unique_abbr": "UM;Peking U", "aff_campus_unique_index": "0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "title": "Text Alignment Is An Efficient Unified Model for Massive NLP Tasks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69989", "id": "xkkBFePoFn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f5708199bdc013c5b56406db305b991e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xkkBFePoFn", "openreview": "https://openreview.net/forum?id=xkkBFePoFn", "poster": "/media/PosterPDFs/NeurIPS%202023/69989.png?t=1702507360.2831905", "slides": "https://nips.cc/virtual/2023/poster/69989", "video": "https://nips.cc/virtual/2023/poster/69989", "author_site": "Yuheng Zha, Yichi Yang, Ruichen Li, Zhiting Hu", "tldr": "", "abstract": "Large language models (LLMs), typically designed as a function of next-word prediction, have excelled across extensive NLP tasks. Despite the generality, next-word prediction is often not an efficient formulation for many of the tasks, demanding an extreme scale of model parameters (10s or 100s of billions) and sometimes yielding suboptimal performance.\nIn practice, it is often desirable to build more efficient models---despite being less versatile, they still apply to a substantial subset of problems, delivering on par or even superior performance with much smaller model sizes.\nIn this paper, we propose text alignment as an efficient unified model for a wide range of crucial tasks involving text entailment, similarity, question answering (and answerability), factual consistency, and so forth. Given a pair of texts, the model measures the degree of alignment between their information. We instantiate an alignment model through lightweight finetuning of RoBERTa (355M parameters) using 5.9M examples from 28 datasets. Despite its compact size, extensive experiments show the model's efficiency and strong performance: (1) On over 20 datasets of aforementioned diverse tasks, the model matches or surpasses FLAN-T5 models that have around 2x or 10x more parameters; the single unified model also outperforms task-specific models finetuned on individual datasets; (2) When applied to evaluate factual consistency of language generation on 23 datasets, our model improves over various baselines, including the much larger GPT-3.5 (ChatGPT) and sometimes even GPT-4; (3) The lightweight model can also serve as an add-on component for LLMs such as GPT-3.5 in question answering tasks, improving the average exact match (EM) score by 17.94 and F1 score by 15.05 through identifying unanswerable questions.", "keywords": "Text Alignment;Efficient Unified Model;NLU Tasks;Factual Consistency Evaluation;QA with Unanswerable Question", "primary_area": "", "supplementary_material": "/attachment/ae21a6b731d65e3f7339005417cdfd0b768d551d.zip", "author": "Yuheng Zha;Yichi Yang;Ruichen Li;Zhiting Hu", "authorids": "~Yuheng_Zha1;~Yichi_Yang1;~Ruichen_Li4;~Zhiting_Hu3", "gender": ";;M;M", "homepage": "https://yuh-zha.github.io/;;;http://zhiting.ucsd.edu", "dblp": "295/8582;283/4448;;134/4031", "google_scholar": "yqjXrXEAAAAJ;;2Dv077YAAAAJ;N7_xhHoAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yuheng_Zha1;~Yichi_Yang1;~Ruichen_Li4;~Zhiting_Hu3", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego;Amazon", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu;amazon.com", "position": "MS student;MS student;MS student;Researcher", "bibtex": "@inproceedings{\nzha2023text,\ntitle={Text Alignment Is An Efficient Unified Model for Massive {NLP} Tasks},\nauthor={Yuheng Zha and Yichi Yang and Ruichen Li and Zhiting Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xkkBFePoFn}\n}", "github": "", "project": "", "reviewers": "zhhb;ayzf;Bk1X;6RaC", "pdf_size": 661341, "rating": "4;5;7;7", "confidence": "4;4;4;4", "soundness": "3;3;4;3", "novelty": "3;2;4;3", "presentation": "4;3;4;3", "wc_summary": "63;41;115;151", "wc_strengths": "40;25;45;121", "wc_weaknesses": "91;82;295;96", "wc_questions": "163;31;61;2", "wc_limitations": "21;7;1;2", "wc_review": "378;186;517;372", "wc_reply_reviewers": "356;0;12;12", "wc_reply_authors": "566;0;0;0", "reply_reviewers": "2;0;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 92.5, 43.159587579123134 ], "wc_strengths_avg": [ 57.75, 37.25167781456293 ], "wc_weaknesses_avg": [ 141.0, 89.0533547936292 ], "wc_questions_avg": [ 64.25, 60.7098632843132 ], "wc_limitations_avg": [ 7.75, 7.980444849756184 ], "wc_review_avg": [ 363.25, 117.6337005283775 ], "wc_reply_reviewers_avg": [ 95.0, 150.7680337472105 ], "wc_reply_authors_avg": [ 141.5, 245.08518927099612 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16894993755449625316&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ucsd.edu;ucsd.edu;ucsd.edu;amazon.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of California, San Diego;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.ucsd.edu;https://www.amazon.com", "aff_unique_abbr": "UCSD;Amazon", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Michelangelo: Conditional 3D Shape Generation based on Shape-Image-Text Aligned Latent Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69988", "id": "xmxgMij3LY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ea1a7f7bc0fc14142106a84c94c826d0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xmxgMij3LY", "openreview": "https://openreview.net/forum?id=xmxgMij3LY", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69988", "video": "https://nips.cc/virtual/2023/poster/69988", "author_site": "Zibo Zhao, Wen Liu, Xin Chen, Xianfang Zeng, Rui Wang, Pei Cheng, BIN FU, Tao Chen, Gang Yu, Shenghua Gao", "tldr": "", "abstract": "We present a novel alignment-before-generation approach to tackle the challenging task of generating general 3D shapes based on 2D images or texts. Directly learning a conditional generative model from images or texts to 3D shapes is prone to producing inconsistent results with the conditions because 3D shapes have an additional dimension whose distribution significantly differs from that of 2D images and texts. To bridge the domain gap among the three modalities and facilitate multi-modal-conditioned 3D shape generation, we explore representing 3D shapes in a shape-image-text-aligned space. Our framework comprises two models: a Shape-Image-Text-Aligned Variational Auto-Encoder (SITA-VAE) and a conditional Aligned Shape Latent Diffusion Model (ASLDM). The former model encodes the 3D shapes into the shape latent space aligned to the image and text and reconstructs the fine-grained 3D neural fields corresponding to given shape embeddings via the transformer-based decoder. The latter model learns a probabilistic mapping function from the image or text space to the latent shape space. Our extensive experiments demonstrate that our proposed approach can generate higher-quality and more diverse 3D shapes that better semantically conform to the visual or textural conditional inputs, validating the effectiveness of the shape-image-text-aligned space for cross-modality 3D shape generation.", "keywords": "Conditional 3D Shape Generation;Neural 3D Representation;3D Reconstruction", "primary_area": "", "supplementary_material": "/attachment/543ef066f714225e8a6241a6a4fc03dbf8564cdc.zip", "author": "Zibo Zhao;Wen Liu;Xin Chen;Xianfang Zeng;Rui Wang;Pei Cheng;BIN FU;Tao Chen;Gang YU;Shenghua Gao", "authorids": "~Zibo_Zhao1;~Wen_Liu2;~Xin_Chen16;~Xianfang_Zeng2;~Rui_Wang41;~Pei_Cheng1;~BIN_FU2;~Tao_Chen6;~Gang_YU2;~Shenghua_Gao1", "gender": "M;M;M;M;M;M;M;M;M;M", "homepage": "https://maikouuu.github.io/;https://github.com/StevenLiuWen;https://chenxin.tech/;;https://wrong.wang/;https://cp0000.github.io/;https://www.facebook.com/bin.fu.73/;https://eetchen.github.io/;https://skicyyu.org/;", "dblp": "237/0093;61/372-3;24/1518-40;241/9439;;124/7752;;69/510-3;;63/7642", "google_scholar": "x3EgqesAAAAJ;A6K6bkoAAAAJ;7qeAJZ4AAAAJ;;nVR6p7cAAAAJ;;;https://scholar.google.com.sg/citations?user=w3OoFL0AAAAJ;https://scholar.google.com.sg/citations?user=BJdigYsAAAAJ;fe-1v0MAAAAJ", "orcid": ";;0000-0002-9347-1367;0000-0003-1251-2129;;;;;0000-0001-5570-2710;", "linkedin": ";;xin-chen-cs/;;;;;;;", "or_profile": "~Zibo_Zhao1;~Wen_Liu2;~Xin_Chen16;~Xianfang_Zeng2;~Rui_Wang41;~Pei_Cheng1;~BIN_FU2;~Tao_Chen6;~Gang_YU2;~Shenghua_Gao1", "aff": "ShanghaiTech University;Tencent PCG;Tencent;Tencent PCG;Tencent PCG;Tencent GY Lab;Tencent;Fudan University;Tencent;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;fudan.edu.cn;tencent.com;shanghaitech.edu.cn", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;Researcher;Principal Researcher;Full Professor;Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nzhao2023michelangelo,\ntitle={Michelangelo: Conditional 3D Shape Generation based on Shape-Image-Text Aligned Latent Representation},\nauthor={Zibo Zhao and Wen Liu and Xin Chen and Xianfang Zeng and Rui Wang and Pei Cheng and BIN FU and Tao Chen and Gang YU and Shenghua Gao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xmxgMij3LY}\n}", "github": "", "project": "", "reviewers": "73Gb;rVoM;XFVg;fo3T", "pdf_size": 6844372, "rating": "5;5;6;6", "confidence": "3;4;5;5", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "53;101;62;74", "wc_strengths": "47;79;26;50", "wc_weaknesses": "65;69;173;38", "wc_questions": "223;120;4;36", "wc_limitations": "1;5;68;25", "wc_review": "389;374;333;223", "wc_reply_reviewers": "22;28;10;11", "wc_reply_authors": "29;40;29;24", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.5, 18.062391868188442 ], "wc_strengths_avg": [ 50.5, 18.874586088176873 ], "wc_weaknesses_avg": [ 86.25, 51.484827862196454 ], "wc_questions_avg": [ 95.75, 84.80676564991734 ], "wc_limitations_avg": [ 24.75, 26.57418860473448 ], "wc_review_avg": [ 329.75, 64.95142415682662 ], "wc_reply_reviewers_avg": [ 17.75, 7.562241731127087 ], "wc_reply_authors_avg": [ 30.5, 5.852349955359813 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17145666492025351222&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "shanghaitech.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;fudan.edu.cn;tencent.com;shanghaitech.edu.cn", "author_num": 10, "aff_unique_index": "0;1;1;1;1;1;1;2;1;0", "aff_unique_norm": "ShanghaiTech University;Tencent;Fudan University", "aff_unique_dep": ";PCG (Platform and Content Group);", "aff_unique_url": "https://www.shanghaitech.edu.cn;https://www.tencent.com;https://www.fudan.edu.cn", "aff_unique_abbr": "ShanghaiTech;Tencent PCG;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Fitting trees to $\\ell_1$-hyperbolic distances", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69987", "id": "xo2lbfQE8I", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/16bce4070c4e23434451b180348e3814-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xo2lbfQE8I", "openreview": "https://openreview.net/forum?id=xo2lbfQE8I", "poster": "/media/PosterPDFs/NeurIPS%202023/69987.png?t=1701547277.302056", "slides": "https://nips.cc/virtual/2023/poster/69987", "video": "https://nips.cc/virtual/2023/poster/69987", "author_site": "Joon-Hyeok Yim, Anna Gilbert", "tldr": "", "abstract": "Building trees to represent or to fit distances is a critical component of phylogenetic analysis, metric embeddings, approximation algorithms, geometric graph neural nets, and the analysis of hierarchical data. Much of the previous algorithmic work, however, has focused on generic metric spaces (i.e., those with no \\emph{a priori} constraints). Leveraging several ideas from the mathematical analysis of hyperbolic geometry and geometric group theory, we study the tree fitting problem as finding the relation between the hyperbolicity (ultrametricity) vector and the error of tree (ultrametric) embedding. That is, we define a vector of hyperbolicity (ultrametric) values over all triples of points and compare the $\\ell_p$ norms of this vector with the $\\ell_q$ norm of the distortion of the best tree fit to the distances. This formulation allows us to define the average hyperbolicity (ultrametricity) in terms of a normalized $\\ell_1$ norm of the hyperbolicity vector. Furthermore, we can interpret the classical tree fitting result of Gromov as a $p = q = \\infty$ result. We present an algorithm \\textsc{HCCRootedTreeFit} such that the $\\ell_1$ error of the output embedding is analytically bounded in terms of the $\\ell_1$-norm of the hyperbolicity vector (i.e., $p = q = 1$) and that this result is tight. Furthermore, this algorithm has significantly different theoretical and empirical performance as compared to Gromov's result and related algorithms. Finally, we show using \\textsc{HCCRootedTreeFit} and related tree fitting algorithms, that supposedly standard data sets for hierarchical data analysis and geometric graph neural networks have radically different tree fits than those of synthetic, truly tree-like data sets, suggesting that a much more refined analysis of these standard data sets is called for.", "keywords": "tree metric fitting;ultrametric fitting;$\\ell_1$-hyperbolicity", "primary_area": "", "supplementary_material": "/attachment/2dc71f560b2c2f6e62fae5a424df89f9a08d7c46.zip", "author": "Joon-Hyeok Yim;Anna Gilbert", "authorids": "~Joon-Hyeok_Yim1;~Anna_Gilbert2", "gender": "M;F", "homepage": "https://joonhyeokyim.github.io/;https://annacgilbert.github.io/", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": "joon-hyeok-yim-391307127/;", "or_profile": "~Joon-Hyeok_Yim1;~Anna_Gilbert2", "aff": "Yale University;Yale University", "aff_domain": "yale.edu;yale.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nyim2023fitting,\ntitle={Fitting trees to \\${\\textbackslash}ell\\_1\\$-hyperbolic distances},\nauthor={Joon-Hyeok Yim and Anna Gilbert},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xo2lbfQE8I}\n}", "github": "", "project": "", "reviewers": "x4xj;Fg5u;xUok;uKfW", "pdf_size": 445252, "rating": "5;5;6;6", "confidence": "3;1;1;2", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "2;1;3;3", "wc_summary": "42;254;100;57", "wc_strengths": "79;57;35;66", "wc_weaknesses": "65;326;59;93", "wc_questions": "68;25;5;204", "wc_limitations": "23;31;1;15", "wc_review": "277;693;200;435", "wc_reply_reviewers": "240;63;31;11", "wc_reply_authors": "156;6;44;0", "reply_reviewers": "2;1;2;1", "reply_authors": "3;2;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 1.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 113.25, 84.00409216222744 ], "wc_strengths_avg": [ 59.25, 16.037066439969625 ], "wc_weaknesses_avg": [ 135.75, 110.58791751362352 ], "wc_questions_avg": [ 75.5, 77.60315715227055 ], "wc_limitations_avg": [ 17.5, 11.07925990308017 ], "wc_review_avg": [ 401.25, 188.54492170302547 ], "wc_reply_reviewers_avg": [ 86.25, 90.6845494006559 ], "wc_reply_authors_avg": [ 51.5, 62.64782518172518 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:58XztGRCnyoJ:scholar.google.com/&scioq=Fitting+trees+to+%24%5Cell_1%24-hyperbolic+distances&hl=en&as_sdt=0,31", "gs_version_total": 6, "email": "yale.edu;yale.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Multi-modal Global Instance Tracking Benchmark (MGIT): Better Locating Target in Complex Spatio-temporal and Causal Relationship", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73416", "id": "xo6zDI8gvB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4ea14e6090343523ddcd5d3ca449695f-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=xo6zDI8gvB", "openreview": "https://openreview.net/forum?id=xo6zDI8gvB", "poster": "/media/PosterPDFs/NeurIPS%202023/73416.png?t=1699339599.5066648", "slides": "https://nips.cc/virtual/2023/poster/73416", "video": "https://nips.cc/virtual/2023/poster/73416", "author_site": "Shiyu Hu, Dailing Zhang, wu meiqi, Xiaokun Feng, Xuchen Li, Xin Zhao, Kaiqi Huang", "tldr": "", "abstract": "Tracking an arbitrary moving target in a video sequence is the foundation for high-level tasks like video understanding. Although existing visual-based trackers have demonstrated good tracking capabilities in short video sequences, they always perform poorly in complex environments, as represented by the recently proposed global instance tracking task, which consists of longer videos with more complicated narrative content. \nRecently, several works have introduced natural language into object tracking, desiring to address the limitations of relying only on a single visual modality. However, these selected videos are still short sequences with uncomplicated spatio-temporal and causal relationships, and the provided semantic descriptions are too simple to characterize video content.\nTo address these issues, we (1) first propose a new multi-modal global instance tracking benchmark named MGIT. It consists of 150 long video sequences with a total of 2.03 million frames, aiming to fully represent the complex spatio-temporal and causal relationships coupled in longer narrative content. \n(2) Each video sequence is annotated with three semantic grains (i.e., action, activity, and story) to model the progressive process of human cognition. We expect this multi-granular annotation strategy can provide a favorable environment for multi-modal object tracking research and long video understanding. \n(3) Besides, we execute comparative experiments on existing multi-modal object tracking benchmarks, which not only explore the impact of different annotation methods, but also validate that our annotation method is a feasible solution for coupling human understanding into semantic labels. \n(4) Additionally, we conduct detailed experimental analyses on MGIT, and hope the explored performance bottlenecks of existing algorithms can support further research in multi-modal object tracking. \nThe proposed benchmark, experimental results, and toolkit will be released gradually on http://videocube.aitestunion.com/.", "keywords": "Visual Object Tracking;Multi-modal Tracking;Long Video Understanding;Benchmark and Evaluation", "primary_area": "", "supplementary_material": "/attachment/db6590ce271a676b8235cc1c4124c8e16a9f0dd9.zip", "author": "Shiyu Hu;Dailing Zhang;Meiqi Wu;Xiaokun Feng;Xuchen Li;Xin Zhao;Kaiqi Huang", "authorids": "~Shiyu_Hu1;~Dailing_Zhang2;~Meiqi_Wu2;~Xiaokun_Feng1;~Xuchen_Li1;~Xin_Zhao4;~Kaiqi_Huang1", "gender": "F;M;M;M;M;M;F", "homepage": "https://huuuuusy.github.io/;https://github.com/zdl-hub;https://github.com/XiaokunFeng;;https://www.xinzhaoai.com/;https://people.ucas.ac.cn/~huangkaiqi?language=en;", "dblp": ";156/8892;314/9776;232/2889;68/2766-12;89/7026;335/6876", "google_scholar": "49W-Rx4AAAAJ;ApH4wOcAAAAJ;https://scholar.google.com.hk/citations?user=NqXtIPIAAAAJ;9zHkraUAAAAJ;Emz6Cbv7LqEC;caQ-OmYAAAAJ;fGc7NVAAAAAJ", "orcid": "0000-0002-5872-7566;;;0009-0009-2565-8857;0000-0002-7660-9897;;0009-0007-3155-4013", "linkedin": "hushiyu1995/;;;;;;https://www.linkedin.cn/incareer/in/ACoAADtQ7fQBfSKm1Qf8f8r0YhJ9nk4Qu9V7EMw", "or_profile": "~Shiyu_Hu1;~Dailing_Zhang2;~Xiaokun_Feng1;~Xuchen_Li1;~Xin_Zhao4;~Kaiqi_Huang1;~wu_meiqi2", "aff": "Chinese academy of science;Southeast University;Institute of automation, Chinese academy of science;Beijing University of Posts and Telecommunications;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science;University of Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;seu.edu.cn;ia.ac.cn;bupt.edu.cn;ia.ac.cn;nlpr.ia.ac.cn;ucas.ac.cn", "position": "PhD student;Undergrad student;PhD student;Undergrad student;Associate Professor;Professor;PhD student", "bibtex": "@inproceedings{\nhu2023a,\ntitle={A Multi-modal Global Instance Tracking Benchmark ({MGIT}): Better Locating Target in Complex Spatio-temporal and Causal Relationship},\nauthor={Shiyu Hu and Dailing Zhang and Meiqi Wu and Xiaokun Feng and Xuchen Li and Xin Zhao and Kaiqi Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=xo6zDI8gvB}\n}", "github": "", "project": "", "reviewers": "dttb;ZBXv;uBXK;u9AZ", "pdf_size": 6364378, "rating": "6;7;7;8", "confidence": "5;4;4;5", "wc_summary_and_contributions": "37;40;61;103", "wc_strengths": "62;125;158;109", "wc_improvement": "338;63;27;92", "wc_limitations": "13;84;72;1", "wc_correctness": "9;16;12;1", "wc_clarity": "4;13;5;1", "wc_relation_to_prior_work": "14;17;12;1", "wc_documentation": "2;25;4;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "480;384;352;310", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "727;687;706;680", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 60.25, 26.356925086208367 ], "wc_strengths_avg": [ 113.5, 34.586847211042524 ], "wc_improvement_avg": [ 130.0, 122.27632640867161 ], "wc_limitations_avg": [ 42.5, 36.003472054789384 ], "wc_correctness_avg": [ 9.5, 5.5 ], "wc_clarity_avg": [ 5.75, 4.437059837324712 ], "wc_relation_to_prior_work_avg": [ 11.0, 6.041522986797286 ], "wc_documentation_avg": [ 8.0, 9.874208829065749 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 381.5, 62.63186090162099 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 700.0, 18.261982367749674 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15175371499155876580&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ia.ac.cn;seu.edu.cn;ia.ac.cn;bupt.edu.cn;ia.ac.cn;nlpr.ia.ac.cn;ucas.ac.cn", "author_num": 7, "aff_unique_index": "0;1;0;2;0;0;3", "aff_unique_norm": "Chinese Academy of Sciences;Southeast University;Beijing University of Posts and Telecommunications;University of Chinese Academy of Sciences", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.cas.cn;https://www.seu.edu.cn/;http://www.bupt.edu.cn/;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;SEU;BUPT;UCAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "StableRep: Synthetic Images from Text-to-Image Models Make Strong Visual Representation Learners", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69986", "id": "xpjsOQtKqx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/971f1e59cd956cc094da4e2f78c6ea7c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xpjsOQtKqx", "openreview": "https://openreview.net/forum?id=xpjsOQtKqx", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69986", "video": "https://nips.cc/virtual/2023/poster/69986", "author_site": "Yonglong Tian, Lijie Fan, Phillip Isola, Huiwen Chang, Dilip Krishnan", "tldr": "", "abstract": "We investigate the potential of learning visual representations using synthetic images generated by text-to-image models. This is a natural question in the light of the excellent performance of such models in generating high-quality images. We consider specifically the Stable Diffusion, one of the leading open source text-to-image models. We show that (1) when the generative model is properly configured, training self-supervised methods on synthetic images can match or beat the real image counterpart;\n(2) by treating the multiple images generated from the same text prompt as positives for each other, we develop a multi-positive contrastive learning method, which we call StableRep. \nWith solely synthetic images, the representations learned by StableRep surpass the performance of representations learned by SimCLR and CLIP using the same set of text prompts and corresponding real images, on large scale datasets. \nWhen we further add language supervision, \\name~trained with 20M synthetic images (10M captions) achieves better accuracy than CLIP trained with 50M real images (50M captions).", "keywords": "representation learning;synthetic images;text-to-image models", "primary_area": "", "supplementary_material": "/attachment/3cdbd4ef98027daa439c999aeb8d9eed1b981c1a.pdf", "author": "Yonglong Tian;Lijie Fan;Phillip Isola;Huiwen Chang;Dilip Krishnan", "authorids": "~Yonglong_Tian1;~Lijie_Fan1;~Phillip_Isola1;~Huiwen_Chang2;~Dilip_Krishnan1", "gender": ";;M;F;M", "homepage": "http://people.csail.mit.edu/yonglong/;;http://web.mit.edu/phillipi/;;http://dilipkay.wordpress.com", "dblp": "151/6328;156/9941;36/9988;131/4389;08/2316", "google_scholar": "https://scholar.google.com.hk/citations?user=OsP7JHAAAAAJ;qthDk3oAAAAJ;ROILf3EAAAAJ;eZQNcvcAAAAJ;_MEuWIMAAAAJ", "orcid": ";;0000-0002-1411-6704;;", "linkedin": ";;phillip-isola-a9955b20/;;", "or_profile": "~Yonglong_Tian1;~Lijie_Fan1;~Phillip_Isola1;~Huiwen_Chang2;~Dilip_Krishnan1", "aff": "Google;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Research, Google;Google", "aff_domain": "google.com;mit.edu;mit.edu;research.google.com;google.com", "position": "Researcher;PhD student;Associate Professor;Researcher;Research Scientist", "bibtex": "@inproceedings{\ntian2023stablerep,\ntitle={StableRep: Synthetic Images from Text-to-Image Models Make Strong Visual Representation Learners},\nauthor={Yonglong Tian and Lijie Fan and Phillip Isola and Huiwen Chang and Dilip Krishnan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xpjsOQtKqx}\n}", "github": "", "project": "", "reviewers": "HH2w;DyTC;qM14;AnD7;FSjQ", "pdf_size": 5564418, "rating": "5;7;7;7;8", "confidence": "4;5;5;4;4", "soundness": "4;3;4;3;3", "novelty": "3;3;2;3;4", "presentation": "4;3;4;4;3", "wc_summary": "54;73;127;93;86", "wc_strengths": "35;56;150;79;64", "wc_weaknesses": "62;277;387;173;77", "wc_questions": "128;29;50;142;2", "wc_limitations": "1;26;18;1;6", "wc_review": "280;461;732;488;235", "wc_reply_reviewers": "0;110;54;45;67", "wc_reply_authors": "131;954;153;176;103", "reply_reviewers": "0;3;1;1;1", "reply_authors": "2;5;3;3;2", "rating_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 86.6, 24.171057072457547 ], "wc_strengths_avg": [ 76.8, 39.260157921231034 ], "wc_weaknesses_avg": [ 195.2, 123.03235346850843 ], "wc_questions_avg": [ 70.2, 55.23187485501465 ], "wc_limitations_avg": [ 10.4, 9.971960689854328 ], "wc_review_avg": [ 439.2, 176.42607517031036 ], "wc_reply_reviewers_avg": [ 55.2, 35.481826334054446 ], "wc_reply_authors_avg": [ 303.4, 326.1941753005409 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 3.0, 1.0954451150103321 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.1666666666666666, "gs_citation": 157, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15561767808036135008&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com;mit.edu;mit.edu;research.google.com;google.com", "author_num": 5, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Google;Massachusetts Institute of Technology", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://web.mit.edu", "aff_unique_abbr": "Google;MIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Beyond Unimodal: Generalising Neural Processes for Multimodal Uncertainty Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69985", "id": "xq1QvViDdW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/839e23e5b1c52cfd1268f4023a3af0d6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xq1QvViDdW", "openreview": "https://openreview.net/forum?id=xq1QvViDdW", "poster": "/media/PosterPDFs/NeurIPS%202023/69985.png?t=1701919022.3687482", "slides": "https://nips.cc/virtual/2023/poster/69985", "video": "https://nips.cc/virtual/2023/poster/69985", "author_site": "Myong Chol Jung, He Zhao, Joanna Dipnall, Lan Du", "tldr": "", "abstract": "Uncertainty estimation is an important research area to make deep neural networks (DNNs) more trustworthy. While extensive research on uncertainty estimation has been conducted with unimodal data, uncertainty estimation for multimodal data remains a challenge. Neural processes (NPs) have been demonstrated to be an effective uncertainty estimation method for unimodal data by providing the reliability of Gaussian processes with efficient and powerful DNNs. While NPs hold significant potential for multimodal uncertainty estimation, the adaptation of NPs for multimodal data has not been carefully studied. To bridge this gap, we propose Multimodal Neural Processes (MNPs) by generalising NPs for multimodal uncertainty estimation. Based on the framework of NPs, MNPs consist of several novel and principled mechanisms tailored to the characteristics of multimodal data. In extensive empirical evaluation, our method achieves state-of-the-art multimodal uncertainty estimation performance, showing its appealing robustness against noisy samples and reliability in out-of-distribution detection with faster computation time compared to the current state-of-the-art multimodal uncertainty estimation method.", "keywords": "Uncertainty estimation;multimodality;neural processes", "primary_area": "", "supplementary_material": "/attachment/75aef2db7ce6972666f33ca5f5453336f6b67071.pdf", "author": "Myong Chol Jung;He Zhao;Joanna Dipnall;Lan Du", "authorids": "~Myong_Chol_Jung1;~He_Zhao1;~Joanna_Dipnall1;~Lan_Du1", "gender": "M;;F;M", "homepage": ";;https://research.monash.edu/en/persons/joanna-dipnall;https://research.monash.edu/en/persons/lan-du", "dblp": "316/6405;;276/5588;98/1504-2", "google_scholar": ";;https://scholar.google.com.au/citations?hl=en;https://scholar.google.com.au/citations?user=HtiTsgwAAAAJ", "orcid": "0000-0002-8715-8120;;0000-0001-7543-0687;0000-0002-9925-0223", "linkedin": ";;;", "or_profile": "~Myong_Chol_Jung1;~He_Zhao1;~Joanna_Dipnall1;~Lan_Du1", "aff": "Monash University;;Monash University;Monash University", "aff_domain": "monash.edu;;monash.edu;monash.edu", "position": "PhD student;;Researcher;Senior Lecturer", "bibtex": "@inproceedings{\njung2023beyond,\ntitle={Beyond Unimodal: Generalising Neural Processes for Multimodal Uncertainty Estimation},\nauthor={Myong Chol Jung and He Zhao and Joanna Dipnall and Lan Du},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xq1QvViDdW}\n}", "github": "", "project": "", "reviewers": "2g2g;hoCV;RNhe;fi6w;U6Gp", "pdf_size": 912472, "rating": "5;6;6;6;7", "confidence": "3;4;3;3;4", "soundness": "3;3;3;2;4", "novelty": "3;3;2;3;3", "presentation": "2;3;2;2;3", "wc_summary": "50;66;57;176;106", "wc_strengths": "9;84;64;69;55", "wc_weaknesses": "101;97;120;318;145", "wc_questions": "15;41;75;7;2", "wc_limitations": "1;10;6;20;1", "wc_review": "176;298;322;590;309", "wc_reply_reviewers": "0;48;28;164;0", "wc_reply_authors": "0;0;0;574;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;1;1;3;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 91.0, 46.71616422610059 ], "wc_strengths_avg": [ 56.2, 25.40393670280258 ], "wc_weaknesses_avg": [ 156.2, 82.66413974632532 ], "wc_questions_avg": [ 28.0, 27.07027890510181 ], "wc_limitations_avg": [ 7.6, 7.059745037889116 ], "wc_review_avg": [ 339.0, 135.97058505426827 ], "wc_reply_reviewers_avg": [ 48.0, 60.76841284746542 ], "wc_reply_authors_avg": [ 114.8, 229.6 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6454972243679028, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18198015715734995785&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "monash.edu;;monash.edu;monash.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Monash University", "aff_unique_dep": "", "aff_unique_url": "https://www.monash.edu", "aff_unique_abbr": "Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "Calibrating \u201cCheap Signals\u201d in Peer Review without a Prior", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69984", "id": "xr3KAzboHY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/41badd36e935f8a80175e95d8bc6192e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xr3KAzboHY", "openreview": "https://openreview.net/forum?id=xr3KAzboHY", "poster": "/media/PosterPDFs/NeurIPS%202023/69984.png?t=1698656938.722562", "slides": "https://nips.cc/virtual/2023/poster/69984", "video": "https://nips.cc/virtual/2023/poster/69984", "author_site": "Yuxuan Lu, Yuqing Kong", "tldr": "", "abstract": "Peer review lies at the core of the academic process, but even well-intentioned reviewers can still provide noisy ratings. While ranking papers by average ratings may reduce noise, varying noise levels and systematic biases stemming from ``cheap'' signals (e.g. author identity, proof length) can lead to unfairness. \nDetecting and correcting bias is challenging, as ratings are subjective and unverifiable. Unlike previous works relying on prior knowledge or historical data, we propose a one-shot noise calibration process without any prior information. We ask reviewers to predict others' scores and use these predictions for calibration. Assuming reviewers adjust their predictions according to the noise, we demonstrate that the calibrated score results in a more robust ranking compared to average ratings, even with varying noise levels and biases.\nIn detail, we show that the error probability of the calibrated score approaches zero as the number of reviewers increases and is significantly lower compared to average ratings when the number of reviewers is small.", "keywords": "Peer prediction;Peer review;Calibration", "primary_area": "", "supplementary_material": "", "author": "Yuxuan Lu;Yuqing Kong", "authorids": "~Yuxuan_Lu1;~Yuqing_Kong1", "gender": ";F", "homepage": ";https://cfcs.pku.edu.cn/yuqkong/", "dblp": ";https://dblp.uni-trier.de/pers/k/Kong:Yuqing.html", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Yuxuan_Lu1;~Yuqing_Kong1", "aff": ";Peking University", "aff_domain": ";pku.edu.cn", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nlu2023calibrating,\ntitle={Calibrating {\\textquotedblleft}Cheap Signals{\\textquotedblright} in Peer Review without a Prior},\nauthor={Yuxuan Lu and Yuqing Kong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xr3KAzboHY}\n}", "github": "", "project": "", "reviewers": "UucL;VrqZ;6t8A;SV22;tv7v", "pdf_size": 1140565, "rating": "4;5;5;5;7", "confidence": "3;2;2;3;2", "soundness": "2;3;3;3;4", "novelty": "2;2;2;2;3", "presentation": "2;2;1;3;4", "wc_summary": "37;265;106;80;138", "wc_strengths": "24;53;73;24;70", "wc_weaknesses": "240;164;152;42;125", "wc_questions": "98;77;125;478;16", "wc_limitations": "12;121;15;17;9", "wc_review": "411;680;471;641;358", "wc_reply_reviewers": "0;13;223;0;50", "wc_reply_authors": "0;0;408;0;350", "reply_reviewers": "0;1;1;0;2", "reply_authors": "1;1;2;1;2", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 2.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 1.019803902718557 ], "wc_summary_avg": [ 125.2, 77.32890791935446 ], "wc_strengths_avg": [ 48.8, 21.36726468221892 ], "wc_weaknesses_avg": [ 144.6, 63.94247414668907 ], "wc_questions_avg": [ 158.8, 163.5914423189673 ], "wc_limitations_avg": [ 34.8, 43.18518264405049 ], "wc_review_avg": [ 512.2, 126.85645430958569 ], "wc_reply_reviewers_avg": [ 57.2, 84.89852766685651 ], "wc_reply_authors_avg": [ 151.6, 186.5750251239431 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5833333333333334, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4405670120591171713&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": ";pku.edu.cn", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "FaceComposer: A Unified Model for Versatile Facial Content Creation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69983", "id": "xrK3QA9mLo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2b4caf39e645680f826ae0a9e7ae9402-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xrK3QA9mLo", "openreview": "https://openreview.net/forum?id=xrK3QA9mLo", "poster": "/media/PosterPDFs/NeurIPS%202023/69983.png?t=1697456296.3103757", "slides": "https://nips.cc/virtual/2023/poster/69983", "video": "https://nips.cc/virtual/2023/poster/69983", "author_site": "Jiayu Wang, Kang Zhao, Yifeng Ma, Shiwei Zhang, Yingya Zhang, Yujun Shen, Deli Zhao, Jingren Zhou", "tldr": "", "abstract": "This work presents FaceComposer, a unified generative model that accomplishes a variety of facial content creation tasks, including text-conditioned face synthesis, text-guided face editing, face animation etc. Based on the latent diffusion framework, FaceComposer follows the paradigm of compositional generation and employs diverse face-specific conditions, e.g., Identity Feature and Projected Normalized Coordinate Code, to release the model creativity at all possible. To support text control and animation, we clean up some existing face image datasets and collect around 500 hours of talking-face videos, forming a high-quality large-scale multi-modal face database. A temporal self-attention module is incorporated into the U-Net structure, which allows learning the denoising process on the mixture of images and videos. Extensive experiments suggest that our approach not only achieves comparable or even better performance than state-of-the-arts on each single task, but also facilitates some combined tasks with one-time forward, demonstrating its potential in serving as a foundation generative model in face domain. We further develop an interface such that users can enjoy our one-step service to create, edit, and animate their own characters. Code, dataset, model, and interface will be made publicly available.", "keywords": "diffusion model; talking face generation; face generation", "primary_area": "", "supplementary_material": "/attachment/47074a43f2686648015617c705c22024714f6ae8.pdf", "author": "Jiayu Wang;Kang Zhao;Yifeng Ma;Shiwei Zhang;Yingya Zhang;Yujun Shen;Deli Zhao;Jingren Zhou", "authorids": "~Jiayu_Wang2;~Kang_Zhao7;~Yifeng_Ma1;~Shiwei_Zhang2;~Yingya_Zhang3;~Yujun_Shen1;~Deli_Zhao1;~Jingren_Zhou1", "gender": "M;;Not Specified;M;M;;M;M", "homepage": ";;;https://www.researchgate.net/profile/Shiwei_Zhang7/research;;;https://zhaodeli.github.io;", "dblp": ";;291/9177;;142/2510;;77/1992;84/2644", "google_scholar": "pOgc1A0AAAAJ;;0SxgRqoAAAAJ;ZO3OQ-8AAAAJ;16RDSEUAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": ";;;0000-0002-6929-5295;;;0000-0002-8838-578X;", "linkedin": ";;;;;;;", "or_profile": "~Jiayu_Wang2;~Kang_Zhao7;~Yifeng_Ma1;~Shiwei_Zhang2;~Yingya_Zhang3;~Yujun_Shen1;~Deli_Zhao1;~Jingren_Zhou1", "aff": "Alibaba Group;;NetEase, Inc.;Alibaba Group;Alibaba Group;;Alibaba Group;Alibaba Group", "aff_domain": "alibaba-inc.com;;netease.com;alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com", "position": "Researcher;;Intern;Researcher;Researcher;;Director;Researcher", "bibtex": "@inproceedings{\nwang2023facecomposer,\ntitle={FaceComposer: A Unified Model for Versatile Facial Content Creation},\nauthor={Jiayu Wang and Kang Zhao and Yifeng Ma and Shiwei Zhang and Yingya Zhang and Yujun Shen and Deli Zhao and Jingren Zhou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xrK3QA9mLo}\n}", "github": "", "project": "", "reviewers": "BHq7;tZ4V;BQjN;UEp9", "pdf_size": 9083443, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "1;3;3;4", "presentation": "3;3;3;4", "wc_summary": "150;41;113;62", "wc_strengths": "145;110;95;36", "wc_weaknesses": "315;93;139;15", "wc_questions": "99;28;63;39", "wc_limitations": "29;6;53;18", "wc_review": "738;278;463;170", "wc_reply_reviewers": "382;32;0;18", "wc_reply_authors": "1070;16;0;15", "reply_reviewers": "2;1;0;1", "reply_authors": "4;2;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 91.5, 42.7346463656832 ], "wc_strengths_avg": [ 96.5, 39.3605132080363 ], "wc_weaknesses_avg": [ 140.5, 110.06702503474871 ], "wc_questions_avg": [ 57.25, 27.224758952100935 ], "wc_limitations_avg": [ 26.5, 17.327723451163457 ], "wc_review_avg": [ 412.25, 215.28861442259318 ], "wc_reply_reviewers_avg": [ 108.0, 158.60012610335465 ], "wc_reply_authors_avg": [ 275.25, 458.8928932768517 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14086578410032547205&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "alibaba-inc.com;;netease.com;alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com", "author_num": 8, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Alibaba Group;NetEase, Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.163.com", "aff_unique_abbr": "Alibaba;NetEase", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "QuIP: 2-Bit Quantization of Large Language Models With Guarantees", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69982", "id": "xrk9g5vcXR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0df38cd13520747e1e64e5b123a78ef8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xrk9g5vcXR", "openreview": "https://openreview.net/forum?id=xrk9g5vcXR", "poster": "/media/PosterPDFs/NeurIPS%202023/69982.png?t=1701927718.8283396", "slides": "https://nips.cc/virtual/2023/poster/69982", "video": "https://nips.cc/virtual/2023/poster/69982", "author_site": "Jerry Chee, Yaohui Cai, Volodymyr Kuleshov, Christopher De Sa", "tldr": "", "abstract": "This work studies post-training parameter quantization in large language models (LLMs). We introduce quantization with incoherence processing (QuIP), a new method based on the insight that quantization benefits from incoherent weight and Hessian matrices, i.e., from the weights being even in magnitude and the directions in which it is important to round them accurately being unaligned with the coordinate axes. QuIP consists of two steps: (1) an adaptive rounding procedure minimizing a quadratic proxy objective; (2) efficient pre- and post-processing that ensures weight and Hessian incoherence via multiplication by random orthogonal matrices. We complement QuIP with the first theoretical analysis for an LLM-scale quantization algorithm, and show that our theory also applies to an existing method, OPTQ. Empirically, we find that our incoherence preprocessing improves several existing quantization algorithms and yields the first LLM quantization methods that produce viable results using only two bits per weight. Our code can be found at https://github.com/Cornell-RelaxML/QuIP.", "keywords": "Quantization;Large Language Models;Adaptive Rounding;Theoretical Guarantees", "primary_area": "", "supplementary_material": "", "author": "Jerry Chee;Yaohui Cai;Volodymyr Kuleshov;Christopher De Sa", "authorids": "~Jerry_Chee1;~Yaohui_Cai1;~Volodymyr_Kuleshov1;~Christopher_De_Sa2", "gender": ";;;", "homepage": "http://jerry-chee.github.io/;;https://www.cs.cornell.edu/~kuleshov/;", "dblp": "207/8369;;81/8612;", "google_scholar": "qyQpUAkAAAAJ;;RY_t8XAAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Jerry_Chee1;~Yaohui_Cai1;~Volodymyr_Kuleshov1;~Christopher_De_Sa2", "aff": "Cornell University;;Cornell University;", "aff_domain": "cornell.edu;;cornell.edu;", "position": "PhD student;;Assistant Professor;", "bibtex": "@inproceedings{\nchee2023quip,\ntitle={Qu{IP}: 2-Bit Quantization of Large Language Models With Guarantees},\nauthor={Jerry Chee and Yaohui Cai and Volodymyr Kuleshov and Christopher De Sa},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xrk9g5vcXR}\n}", "github": "", "project": "", "reviewers": "Rfzg;Jvy8;Av5n;t6VC", "pdf_size": 616403, "rating": "5;7;7;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "120;59;53;48", "wc_strengths": "76;146;138;117", "wc_weaknesses": "145;87;111;80", "wc_questions": "175;45;123;62", "wc_limitations": "1;2;11;81", "wc_review": "517;339;436;388", "wc_reply_reviewers": "201;209;142;0", "wc_reply_authors": "0;721;128;0", "reply_reviewers": "1;2;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.0, 29.129023327259016 ], "wc_strengths_avg": [ 119.25, 27.123559869604136 ], "wc_weaknesses_avg": [ 105.75, 25.410381736605217 ], "wc_questions_avg": [ 101.25, 51.51880724550987 ], "wc_limitations_avg": [ 23.75, 33.281939546847326 ], "wc_review_avg": [ 420.0, 65.66962768281849 ], "wc_reply_reviewers_avg": [ 138.0, 83.77051987423738 ], "wc_reply_authors_avg": [ 212.25, 298.33904789685175 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 178, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4044326007824184623&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "cornell.edu;;cornell.edu;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Adversarial Robustness in Graph Neural Networks: A Hamiltonian Approach", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69981", "id": "xtADRDRsM2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0a443a000e1cb2281480b3bac395b3b8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xtADRDRsM2", "openreview": "https://openreview.net/forum?id=xtADRDRsM2", "poster": "/media/PosterPDFs/NeurIPS%202023/69981.png?t=1702012795.4008982", "slides": "https://nips.cc/virtual/2023/poster/69981", "video": "https://nips.cc/virtual/2023/poster/69981", "author_site": "Kai Zhao, Qiyu Kang, Yang Song, Rui She, Sijie Wang, Wee Peng Tay", "tldr": "", "abstract": "Graph neural networks (GNNs) are vulnerable to adversarial perturbations, including those that affect both node features and graph topology. This paper investigates GNNs derived from diverse neural flows, concentrating on their connection to various stability notions such as BIBO stability, Lyapunov stability, structural stability, and conservative stability. We argue that Lyapunov stability, despite its common use, does not necessarily ensure adversarial robustness. Inspired by physics principles, we advocate for the use of conservative Hamiltonian neural flows to construct GNNs that are robust to adversarial attacks. The adversarial robustness of different neural flow GNNs is empirically compared on several benchmark datasets under a variety of adversarial attacks. Extensive numerical experiments demonstrate that GNNs leveraging conservative Hamiltonian flows with Lyapunov stability substantially improve robustness against adversarial perturbations. The implementation code of experiments is available at \\url{https://github.com/zknus/NeurIPS-2023-HANG-Robustness}.", "keywords": "adversarial robustness;graph neural networks", "primary_area": "", "supplementary_material": "/attachment/ac853cb7c389364364f49d080ef61de51593a175.zip", "author": "Kai Zhao;Qiyu Kang;Yang Song;Rui She;Sijie Wang;Wee Peng Tay", "authorids": "~Kai_Zhao7;~Qiyu_Kang2;~Yang_Song7;~Rui_She1;~Sijie_Wang1;~Wee_Peng_Tay1", "gender": "M;M;M;;M;M", "homepage": "https://c3-yang-song.github.io;;https://github.com/sijieaaa;https://personal.ntu.edu.sg/wptay/;;https://kangqiyu.github.io./publications.html", "dblp": "24/4470-12;59;21/4330.html;45/3753;;204/3718", "google_scholar": "https://scholar.google.com/citations?hl=en;GRfZ_TwAAAAJ;IUfQMOYAAAAJ;BkCI7rEAAAAJ;;https://scholar.google.com.sg/citations?user=cS45eEcAAAAJ", "orcid": ";0000-0002-5211-1664;0000-0002-0925-2365;0000-0002-1543-195X;;", "linkedin": ";;;;zhao-kai-29010b169/;", "or_profile": "~Yang_Song7;~Rui_She1;~Sijie_Wang1;~Wee_Peng_Tay1;~Zhao_Kai2;~QIYU_KANG1", "aff": "C3 AI;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "c3.ai;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": "Researcher;Postdoc;PhD student;Full Professor;PhD student;Postdoc", "bibtex": "@inproceedings{\nzhao2023adversarial,\ntitle={Adversarial Robustness in Graph Neural Networks: A Hamiltonian Approach},\nauthor={Kai Zhao and Qiyu Kang and Yang Song and Rui She and Sijie Wang and Wee Peng Tay},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xtADRDRsM2}\n}", "github": "", "project": "", "reviewers": "Yk3z;bDPL;Xukb;ehTk", "pdf_size": 944914, "rating": "5;6;7;7", "confidence": "1;2;5;4", "soundness": "3;2;2;3", "novelty": "2;2;3;2", "presentation": "3;2;1;3", "wc_summary": "45;150;45;59", "wc_strengths": "21;44;39;50", "wc_weaknesses": "112;388;224;143", "wc_questions": "3;16;3;28", "wc_limitations": "1;25;3;9", "wc_review": "182;623;314;289", "wc_reply_reviewers": "0;38;0;63", "wc_reply_authors": "0;150;28;52", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 1.5811388300841898 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 74.75, 43.8199440894212 ], "wc_strengths_avg": [ 38.5, 10.828203913853857 ], "wc_weaknesses_avg": [ 216.75, 106.99386664664475 ], "wc_questions_avg": [ 12.5, 10.404326023342406 ], "wc_limitations_avg": [ 9.5, 9.420721840708387 ], "wc_review_avg": [ 352.0, 164.12952202452794 ], "wc_reply_reviewers_avg": [ 25.25, 26.75233634656981 ], "wc_reply_authors_avg": [ 57.5, 56.48672410398748 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9534625892455922, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8655447918576360680&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "c3.ai;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "C3 AI;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.c3.ai;https://www.ntu.edu.sg", "aff_unique_abbr": "C3 AI;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "United States;Singapore" }, { "title": "Faster Discrete Convex Function Minimization with Predictions: The M-Convex Case", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69980", "id": "xtQ9IGRzIW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d84c0dd9b1bfeee361f3268dcaebf849-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xtQ9IGRzIW", "openreview": "https://openreview.net/forum?id=xtQ9IGRzIW", "poster": "/media/PosterPDFs/NeurIPS%202023/69980.png?t=1699237130.5147183", "slides": "https://nips.cc/virtual/2023/poster/69980", "video": "https://nips.cc/virtual/2023/poster/69980", "author_site": "Taihei Oki, Shinsaku Sakaue", "tldr": "", "abstract": "Recent years have seen a growing interest in accelerating optimization algorithms with machine-learned predictions. Sakaue and Oki (NeurIPS 2022) have developed a general framework that warm-starts the *L-convex function minimization* method with predictions, revealing the idea's usefulness for various discrete optimization problems. In this paper, we present a framework for using predictions to accelerate *M-convex function minimization*, thus complementing previous research and extending the range of discrete optimization algorithms that can benefit from predictions. Our framework is particularly effective for an important subclass called *laminar convex minimization*, which appears in many operations research applications. Our methods can improve time complexity bounds upon the best worst-case results by using predictions and even have potential to go beyond a lower-bound result.", "keywords": "algorithms with predictions;beyond the worst-case analysis of algorithms;time complexity;combinatorial optimization;discrete convex analysis;submodular functions", "primary_area": "", "supplementary_material": "/attachment/a7b881469737851b2bdb88a28943dacf1d59029b.pdf", "author": "Taihei Oki;Shinsaku Sakaue", "authorids": "~Taihei_Oki1;~Shinsaku_Sakaue1", "gender": "M;M", "homepage": "https://www.opt.mist.i.u-tokyo.ac.jp/~oki/;https://ssakaue.github.io/", "dblp": "185/4435;183/6350", "google_scholar": "o-QxZEYAAAAJ;https://scholar.google.co.jp/citations?user=9oTbrmEAAAAJ", "orcid": "0000-0002-6862-9484;", "linkedin": ";", "or_profile": "~Taihei_Oki1;~Shinsaku_Sakaue1", "aff": "The University of Tokyo;NTT", "aff_domain": "u-tokyo.ac.jp;ntt.co.jp", "position": "Project Research Associate;Researcher", "bibtex": "@inproceedings{\noki2023faster,\ntitle={Faster Discrete Convex Function Minimization with Predictions: The M-Convex Case},\nauthor={Taihei Oki and Shinsaku Sakaue},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xtQ9IGRzIW}\n}", "github": "", "project": "", "reviewers": "xsf6;NNSF;xgTQ;SFP3;KCcb;pZm4", "pdf_size": 750284, "rating": "5;5;5;6;7;7", "confidence": "1;3;1;3;4;4", "soundness": "2;3;2;4;4;3", "novelty": "2;2;2;2;3;2", "presentation": "3;3;2;3;4;3", "wc_summary": "75;113;81;156;29;135", "wc_strengths": "23;27;25;80;134;66", "wc_weaknesses": "152;78;121;160;8;163", "wc_questions": "24;37;25;256;118;117", "wc_limitations": "8;5;1;50;43;7", "wc_review": "282;260;253;702;332;488", "wc_reply_reviewers": "39;23;73;43;40;61", "wc_reply_authors": "119;98;144;73;32;131", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "2;2;2;2;2;3", "rating_avg": [ 5.833333333333333, 0.8975274678557507 ], "confidence_avg": [ 2.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.1666666666666665, 0.3726779962499649 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 98.16666666666667, 41.90631084799626 ], "wc_strengths_avg": [ 59.166666666666664, 39.980898216801265 ], "wc_weaknesses_avg": [ 113.66666666666667, 55.58976724382126 ], "wc_questions_avg": [ 96.16666666666667, 81.88491246187475 ], "wc_limitations_avg": [ 19.0, 19.672315572906 ], "wc_review_avg": [ 386.1666666666667, 162.0477467360229 ], "wc_reply_reviewers_avg": [ 46.5, 16.20442326444645 ], "wc_reply_authors_avg": [ 99.5, 37.88909253773879 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.1666666666666665, 0.3726779962499649 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8436958338752907, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3519631688331779922&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "u-tokyo.ac.jp;ntt.co.jp", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Tokyo;NTT Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.ntt.co.jp", "aff_unique_abbr": "UTokyo;NTT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "TIES-Merging: Resolving Interference When Merging Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69979", "id": "xtaX3WyCj1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1644c9af28ab7916874f6fd6228a9bcf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xtaX3WyCj1", "openreview": "https://openreview.net/forum?id=xtaX3WyCj1", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69979", "video": "https://nips.cc/virtual/2023/poster/69979", "author_site": "Prateek Yadav, Derek Tam, Leshem Choshen, Colin Raffel, Mohit Bansal", "tldr": "", "abstract": "Transfer learning \u2013 i.e., further fine-tuning a pre-trained model on a downstream task \u2013 can confer significant advantages, including improved downstream performance, faster convergence, and better sample efficiency. These advantages have led to a proliferation of task-specific fine-tuned models, which typically can only perform a single task and do not benefit from one another. Recently, model merging techniques have emerged as a solution to combine multiple task-specific models into a single multitask model without performing additional training. However, existing merging methods often ignore the interference between parameters of different models, resulting in large performance drops when merging multiple models. In this paper, we demonstrate that prior merging techniques inadvertently lose valuable information due to two major sources of interference: (a) interference due to redundant parameter values and (b) disagreement on the sign of a given parameter\u2019s values across models. To address this, we propose our method, TrIm, Elect Sign & Merge (TIES-Merging), which introduces three novel steps when merging models: (1) resetting parameters that only changed a small amount during fine-tuning, (2) resolving sign conflicts, and (3) merging only the parameters that are in alignment with the final agreed-upon sign. We find that TIES-Merging outperforms existing methods in diverse settings covering a range of modalities, domains, number of tasks, model sizes, architectures, and fine-tuning settings. We further analyze the impact of different types of interference on model parameters, highlight the importance of signs, and show that estimating the signs using the validation data could further improve performance.", "keywords": "Model Merging;Fusing;Collaborative Training;Robust Fine-tuning;Federated Learning", "primary_area": "", "supplementary_material": "/attachment/6624b5756ae1e6be064c15cc3ec819d883f848aa.zip", "author": "Prateek Yadav;Derek Tam;Leshem Choshen;Colin Raffel;Mohit Bansal", "authorids": "~Prateek_Yadav1;~Derek_Tam1;~Leshem_Choshen1;~Colin_Raffel1;~Mohit_Bansal2", "gender": "M;;Not Specified;;M", "homepage": "http://prateek-yadav.github.io;https://dptam.github.io/;https://ktilana.wixsite.com/leshem-choshen;http://colinraffel.com;https://www.cs.unc.edu/~mbansal/", "dblp": "220/5741;92/10347;218/5237;149/0082;32/5243.html", "google_scholar": "1lXhc0kAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;I66ZBYwAAAAJ;DN8QtscAAAAJ", "orcid": ";;0000-0002-0085-6496;;", "linkedin": "prateek-yadav-40bb34a8;;leshemchoshen/;;", "or_profile": "~Prateek_Yadav1;~Derek_Tam1;~Leshem_Choshen1;~Colin_Raffel1;~Mohit_Bansal2", "aff": "Department of Computer Science, University of North Carolina, Chapel Hill;Department of Computer Science, University of North Carolina, Chapel Hill;International Business Machines;University of North Carolina, Chapel Hill;University of North Carolina at Chapel Hill", "aff_domain": "cs.unc.edu;cs.unc.edu;ibm.com;unc.edu;unc.edu", "position": "Graduate Student;PhD student;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nyadav2023tiesmerging,\ntitle={{TIES}-Merging: Resolving Interference When Merging Models},\nauthor={Prateek Yadav and Derek Tam and Leshem Choshen and Colin Raffel and Mohit Bansal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xtaX3WyCj1}\n}", "github": "", "project": "", "reviewers": "b9mw;mRV1;ioFQ;rBCc;Rfna", "pdf_size": 1004411, "rating": "5;6;6;6;7", "confidence": "3;3;4;4;4", "soundness": "3;3;3;3;3", "novelty": "2;3;2;3;2", "presentation": "3;4;3;3;3", "wc_summary": "114;24;66;43;114", "wc_strengths": "104;41;87;14;38", "wc_weaknesses": "164;26;359;102;93", "wc_questions": "20;81;125;3;59", "wc_limitations": "25;1;81;8;4", "wc_review": "427;173;718;170;308", "wc_reply_reviewers": "20;49;68;0;192", "wc_reply_authors": "0;54;52;0;459", "reply_reviewers": "1;1;1;0;2", "reply_authors": "1;2;2;1;3", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 72.2, 36.630042315017874 ], "wc_strengths_avg": [ 56.8, 33.39101675600789 ], "wc_weaknesses_avg": [ 148.8, 113.84094166862816 ], "wc_questions_avg": [ 57.6, 43.559614323361494 ], "wc_limitations_avg": [ 23.8, 29.78187368182197 ], "wc_review_avg": [ 359.2, 203.18602314135683 ], "wc_reply_reviewers_avg": [ 65.8, 67.29160423113719 ], "wc_reply_authors_avg": [ 113.0, 174.61729582146208 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6454972243679028, "gs_citation": 290, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9119716425203950461&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cs.unc.edu;cs.unc.edu;ibm.com;unc.edu;unc.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of North Carolina;International Business Machines Corporation", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.unc.edu;https://www.ibm.com", "aff_unique_abbr": "UNC;IBM", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Chapel Hill;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Personalized Dictionary Learning for Heterogeneous Datasets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69978", "id": "xw6Szwu4xz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9f6f790f28a31fba89644f09faf4e0cb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xw6Szwu4xz", "openreview": "https://openreview.net/forum?id=xw6Szwu4xz", "poster": "/media/PosterPDFs/NeurIPS%202023/69978.png?t=1702155139.7419698", "slides": "https://nips.cc/virtual/2023/poster/69978", "video": "https://nips.cc/virtual/2023/poster/69978", "author_site": "Geyu Liang, Naichen Shi, Raed AL Kontar, Salar Fattahi", "tldr": "", "abstract": "We introduce a relevant yet challenging problem named Personalized Dictionary Learning (PerDL), where the goal is to learn sparse linear representations from heterogeneous datasets that share some commonality. In PerDL, we model each dataset's shared and unique features as global and local dictionaries. Challenges for PerDL not only are inherited from classical dictionary learning(DL), but also arise due to the unknown nature of the shared and unique features. In this paper, we rigorously formulate this problem and provide conditions under which the global and local dictionaries can be provably disentangled. Under these conditions, we provide a meta-algorithm called Personalized Matching and Averaging (PerMA) that can recover both global and local dictionaries from heterogeneous datasets. PerMA is highly efficient; it converges to the ground truth at a linear rate under suitable conditions. Moreover, it automatically borrows strength from strong learners to improve the prediction of weak learners. As a general framework for extracting global and local dictionaries, we show the application of PerDL in different learning tasks, such as training with imbalanced datasets and video surveillance.", "keywords": "Dictionary Learning;Data Heterogeneity;Personalization", "primary_area": "", "supplementary_material": "/attachment/d9c72a0178bb47ef719099bd9119b0e19dbef156.pdf", "author": "Geyu Liang;Naichen Shi;Raed Al Kontar;Salar Fattahi", "authorids": "~Geyu_Liang1;~Naichen_Shi1;~Raed_Al_Kontar1;~Salar_Fattahi2", "gender": "M;;M;M", "homepage": ";;https://alkontar.engin.umich.edu/;http://fattahi.engin.umich.edu/", "dblp": ";;216/2976;175/9308", "google_scholar": ";;x0ZxAl4AAAAJ;nca_I7gAAAAJ", "orcid": ";;0000-0002-4546-324X;", "linkedin": "geyuliang/;;raed-kontar/;", "or_profile": "~Geyu_Liang1;~Naichen_Shi1;~Raed_Al_Kontar1;~Salar_Fattahi2", "aff": "University of Michigan - Ann Arbor;;University of Michigan - Ann Arbor;University of Michigan", "aff_domain": "umich.edu;;umich.edu;umich.edu", "position": "PhD student;;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nliang2023personalized,\ntitle={Personalized Dictionary Learning for Heterogeneous Datasets},\nauthor={Geyu Liang and Naichen Shi and Raed Al Kontar and Salar Fattahi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xw6Szwu4xz}\n}", "github": "", "project": "", "reviewers": "ED5i;zsPp;Kgxg;JdsU", "pdf_size": 5907144, "rating": "4;6;6;7", "confidence": "5;3;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "69;142;73;86", "wc_strengths": "52;94;78;133", "wc_weaknesses": "93;166;18;34", "wc_questions": "36;115;50;20", "wc_limitations": "1;72;30;1", "wc_review": "251;589;249;274", "wc_reply_reviewers": "0;86;24;18", "wc_reply_authors": "0;49;21;21", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 92.5, 29.261749776799064 ], "wc_strengths_avg": [ 89.25, 29.371542349696245 ], "wc_weaknesses_avg": [ 77.75, 58.10496966697427 ], "wc_questions_avg": [ 55.25, 36.09276243237694 ], "wc_limitations_avg": [ 26.0, 29.077482697097423 ], "wc_review_avg": [ 340.75, 143.66345220688524 ], "wc_reply_reviewers_avg": [ 32.0, 32.4037034920393 ], "wc_reply_authors_avg": [ 22.75, 17.41228014936585 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.899228803025897, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=843903907718020455&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "umich.edu;;umich.edu;umich.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Unsupervised Polychromatic Neural Representation for CT Metal Artifact Reduction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69977", "id": "xx3QgKyghS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/dbf02b21d77409a2db30e56866a8ab3a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xx3QgKyghS", "openreview": "https://openreview.net/forum?id=xx3QgKyghS", "poster": "/media/PosterPDFs/NeurIPS%202023/69977.png?t=1698419568.5887043", "slides": "https://nips.cc/virtual/2023/poster/69977", "video": "https://nips.cc/virtual/2023/poster/69977", "author_site": "Qing Wu, Lixuan Chen, Ce Wang, Hongjiang Wei, S. Kevin Zhou, Jingyi Yu, Yuyao Zhang", "tldr": "", "abstract": "Emerging neural reconstruction techniques based on tomography (e.g., NeRF, NeAT, and NeRP) have started showing unique capabilities in medical imaging. In this work, we present a novel Polychromatic neural representation (Polyner) to tackle the challenging problem of CT imaging when metallic implants exist within the human body. CT metal artifacts arise from the drastic variation of metal's attenuation coefficients at various energy levels of the X-ray spectrum, leading to a nonlinear metal effect in CT measurements. Recovering CT images from metal-affected measurements hence poses a complicated nonlinear inverse problem where empirical models adopted in previous metal artifact reduction (MAR) approaches lead to signal loss and strongly aliased reconstructions. Polyner instead models the MAR problem from a nonlinear inverse problem perspective. Specifically, we first derive a polychromatic forward model to accurately simulate the nonlinear CT acquisition process. Then, we incorporate our forward model into the implicit neural representation to accomplish reconstruction. Lastly, we adopt a regularizer to preserve the physical properties of the CT images across different energy levels while effectively constraining the solution space. Our Polyner is an unsupervised method and does not require any external training data. Experimenting with multiple datasets shows that our Polyner achieves comparable or better performance than supervised methods on in-domain datasets while demonstrating significant performance improvements on out-of-domain datasets. To the best of our knowledge, our Polyner is the first unsupervised MAR method that outperforms its supervised counterparts. The code for this work is available at: https://github.com/iwuqing/Polyner.", "keywords": "Medical Image;Computed Tomography;Metal Arftiacts;Implicit Neural Representation;Unsupervised Learning", "primary_area": "", "supplementary_material": "/attachment/a600e393af4fe881061fe3f6e3dc2c9bb0952bca.pdf", "author": "Qing Wu;Lixuan Chen;Ce Wang;Hongjiang Wei;S Kevin Zhou;Jingyi Yu;Yuyao Zhang", "authorids": "~Qing_Wu3;~Lixuan_Chen1;~Ce_Wang1;hongjiang.wei@sjtu.edu.cn;~S_Kevin_Zhou1;~Jingyi_Yu5;zhangyy8@shanghaitech.edu.cn", "gender": "Not Specified;F;M;;M;M;", "homepage": "https://iwuqing.github.io/;https://maopaom.github.io/;https://scholar.google.com/citations?user=IBZRx3AAAAAJ&hl=en;;;;", "dblp": "62/66-1;122/9439;59/2300.html;;57/98;;", "google_scholar": "https://scholar.google.com.hk/citations?user=A1E80HUAAAAJ;5HxA0EEAAAAJ;IBZRx3AAAAAJ;;8eNm2GMAAAAJ;R9L_AfQAAAAJ;", "orcid": ";;0000-0002-1017-7972;;0000-0002-6881-4444;;", "linkedin": ";;;;s-kevin-zhou-231a094b/;;", "or_profile": "~Qing_Wu3;~Lixuan_Chen1;~Ce_Wang1;hongjiang.wei@sjtu.edu.cn;~S_Kevin_Zhou1;~Jingyi_Yu5;zhangyy8@shanghaitech.edu.cn", "aff": "ShanghaiTech University;ShanghaiTech University;Institute of Computing Techonology, Chinese Academy of Sciences;;University of Science and Technology of China;ShanghaiTech University;", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;ict.ac.cn;;ustc.edu.cn;shanghaitech.edu.cn;", "position": "PhD student;MS student;Postdoc;;Full Professor;Full Professor;", "bibtex": "@inproceedings{\nwu2023unsupervised,\ntitle={Unsupervised Polychromatic Neural Representation for {CT} Metal Artifact Reduction},\nauthor={Qing Wu and Lixuan Chen and Ce Wang and Hongjiang Wei and S Kevin Zhou and Jingyi Yu and Yuyao Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xx3QgKyghS}\n}", "github": "", "project": "", "reviewers": "oo65;KPnC;ibgY;QLvh", "pdf_size": 7731032, "rating": "4;5;7;8", "confidence": "5;4;4;4", "soundness": "2;3;4;4", "novelty": "2;2;4;3", "presentation": "4;2;4;4", "wc_summary": "66;30;235;128", "wc_strengths": "87;48;142;137", "wc_weaknesses": "233;83;27;188", "wc_questions": "165;9;54;89", "wc_limitations": "33;11;23;11", "wc_review": "584;181;481;553", "wc_reply_reviewers": "248;110;51;119", "wc_reply_authors": "572;130;37;58", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 114.75, 77.77330840333333 ], "wc_strengths_avg": [ 103.5, 38.59080201291494 ], "wc_weaknesses_avg": [ 132.75, 81.79356637291224 ], "wc_questions_avg": [ 79.25, 57.05425049897685 ], "wc_limitations_avg": [ 19.5, 9.205976319760984 ], "wc_review_avg": [ 449.75, 159.59851972997745 ], "wc_reply_reviewers_avg": [ 132.0, 71.88532534530258 ], "wc_reply_authors_avg": [ 199.25, 217.95340671804146 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7302967433402215, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15746311764597550636&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "shanghaitech.edu.cn;shanghaitech.edu.cn;ict.ac.cn;;ustc.edu.cn;shanghaitech.edu.cn;", "author_num": 7, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "ShanghaiTech University;Chinese Academy of Sciences;University of Science and Technology of China", "aff_unique_dep": ";Institute of Computing Technology;", "aff_unique_url": "https://www.shanghaitech.edu.cn;http://www.ict.ac.cn;http://www.ustc.edu.cn", "aff_unique_abbr": "ShanghaiTech;CAS;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "BasisFormer: Attention-based Time Series Forecasting with Learnable and Interpretable Basis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69976", "id": "xx3qRKvG0T", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e150e6d0a1e5214740c39c6e4503ba7a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xx3qRKvG0T", "openreview": "https://openreview.net/forum?id=xx3qRKvG0T", "poster": "/media/PosterPDFs/NeurIPS%202023/69976.png?t=1699589957.5822153", "slides": "https://nips.cc/virtual/2023/poster/69976", "video": "https://nips.cc/virtual/2023/poster/69976", "author_site": "Zelin Ni, Hang Yu, Shizhan Liu, Jianguo Li, Weiyao Lin", "tldr": "", "abstract": "Bases have become an integral part of modern deep learning-based models for time series forecasting due to their ability to act as feature extractors or future references. To be effective, a basis must be tailored to the specific set of time series data and exhibit distinct correlation with each time series within the set. However, current state-of-the-art methods are limited in their ability to satisfy both of these requirements simultaneously. To address this challenge, we propose BasisFormer, an end-to-end time series forecasting architecture that leverages learnable and interpretable bases. This architecture comprises three components: First, we acquire bases through adaptive self-supervised learning, which treats the historical and future sections of the time series as two distinct views and employs contrastive learning. Next, we design a Coef module that calculates the similarity coefficients between the time series and bases in the historical view via bidirectional cross-attention. Finally, we present a Forecast module that selects and consolidates the bases in the future view based on the similarity coefficients, resulting in accurate future predictions. Through extensive experiments on six datasets, we demonstrate that BasisFormer outperforms previous state-of-the-art methods by 11.04% and 15.78% respectively for univariate and multivariate forecasting tasks. Code is\navailable at: https://github.com/nzl5116190/Basisformer.", "keywords": "time series forecasting;basis learning;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/12afe94aa68c976b32c50b4d45b50c9fbd1803ed.pdf", "author": "Zelin Ni;Hang Yu;Shizhan Liu;Jianguo Li;Weiyao Lin", "authorids": "~Zelin_Ni1;~Hang_Yu1;~Shizhan_Liu1;~Jianguo_Li2;~Weiyao_Lin1", "gender": "M;M;M;M;M", "homepage": ";;https://weiyaolin.github.io/;https://sites.google.com/site/leeplus/;https://github.com/Zhazhan", "dblp": "327/8340;74/2568-2;42/6095;70/6237;264/9470", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;S9g81n8AAAAJ;n44GlFcAAAAJ;LtTzNK4AAAAJ", "orcid": ";;;;", "linkedin": ";hang-yu-7ba38844/;;;", "or_profile": "~Zelin_Ni1;~Hang_Yu1;~Weiyao_Lin1;~jianguo_Li1;~Liu_Shizhan1", "aff": "Shanghai Jiaotong University;Ant Group;Shanghai Jiaotong University;Ant Group;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;antgroup.com;sjtu.edu.cn;antgroup.com;sjtu.edu.cn", "position": "Undergrad student;Senior Algorithm Expert;Full Professor;Director;MS student", "bibtex": "@inproceedings{\nni2023basisformer,\ntitle={BasisFormer: Attention-based Time Series Forecasting with Learnable and Interpretable Basis},\nauthor={Zelin Ni and Hang Yu and Shizhan Liu and Jianguo Li and Weiyao Lin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xx3qRKvG0T}\n}", "github": "", "project": "", "reviewers": "CQBS;z6WM;u9Fd;twjH;yNQz", "pdf_size": 780967, "rating": "4;4;5;7;8", "confidence": "5;4;5;3;5", "soundness": "3;2;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "138;64;99;58;147", "wc_strengths": "65;30;33;99;80", "wc_weaknesses": "164;68;48;52;84", "wc_questions": "2;71;18;15;12", "wc_limitations": "1;1;41;1;1", "wc_review": "370;234;239;225;324", "wc_reply_reviewers": "0;0;12;0;12", "wc_reply_authors": "0;0;16;0;28", "reply_reviewers": "0;0;1;0;1", "reply_authors": "1;1;2;1;2", "rating_avg": [ 5.6, 1.624807680927192 ], "confidence_avg": [ 4.4, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 101.2, 36.62458190887644 ], "wc_strengths_avg": [ 61.4, 26.702808841018957 ], "wc_weaknesses_avg": [ 83.2, 42.362247343595925 ], "wc_questions_avg": [ 23.6, 24.303086223770016 ], "wc_limitations_avg": [ 9.0, 16.0 ], "wc_review_avg": [ 278.4, 58.04343201431149 ], "wc_reply_reviewers_avg": [ 4.8, 5.878775382679628 ], "wc_reply_authors_avg": [ 8.8, 11.42628548566856 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.18463723646899916, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8992331632359989975&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;antgroup.com;sjtu.edu.cn;antgroup.com;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Ant Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.antgroup.com", "aff_unique_abbr": "SJTU;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Towards Hybrid-grained Feature Interaction Selection for Deep Sparse Network", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69975", "id": "xxfHMqNcum", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9ab8da29b1eb3bec912a06e0879065cd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xxfHMqNcum", "openreview": "https://openreview.net/forum?id=xxfHMqNcum", "poster": "/media/PosterPDFs/NeurIPS%202023/69975.png?t=1701531434.2827272", "slides": "https://nips.cc/virtual/2023/poster/69975", "video": "https://nips.cc/virtual/2023/poster/69975", "author_site": "Fuyuan Lyu, Xing Tang, Dugang Liu, Chen Ma, Weihong Luo, Liang Chen, xiuqiang He, Xue (Steve) Liu", "tldr": "", "abstract": "Deep sparse networks are widely investigated as a neural network architecture for prediction tasks with high-dimensional sparse features, with which feature interaction selection is a critical component. While previous methods primarily focus on how to search feature interaction in a coarse-grained space, less attention has been given to a finer granularity. In this work, we introduce a hybrid-grained feature interaction selection approach that targets both feature field and feature value for deep sparse networks. To explore such expansive space, we propose a decomposed space which is calculated on the fly. We then develop a selection algorithm called OptFeature, which efficiently selects the feature interaction from both the feature field and the feature value simultaneously. Results from experiments on three large real-world benchmark datasets demonstrate that OptFeature performs well in terms of accuracy and efficiency. Additional studies support the feasibility of our method. All source code are publicly available\\footnote{https://anonymous.4open.science/r/OptFeature-Anonymous}.", "keywords": "Feature Interaction Search;Deep Sparse Network", "primary_area": "", "supplementary_material": "/attachment/fed4fe2f64ed22cee35121992d34803cf2ab947b.pdf", "author": "Fuyuan Lyu;Xing Tang;Dugang Liu;Chen Ma;Weihong Luo;Liang Chen;xiuqiang He;Xue Liu", "authorids": "~Fuyuan_Lyu1;~Xing_Tang2;~Dugang_Liu1;~Chen_Ma3;~Weihong_Luo1;~Liang_Chen16;~xiuqiang_He3;~Xue_Liu1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://fuyuanlyu.github.io;https://xingt-tang.github.io;https://dgliu.github.io/;https://allenjack.github.io;https://github.com/lobby66;https://leoncuhk.github.io/;https://he-xiuqiang.github.io/;http://www.cs.mcgill.ca/~xueliu/", "dblp": "265/6053;09/2824-7;234/6893;126/4567-1;;;11/5357-1;l/XueLiu", "google_scholar": "https://scholar.google.ca/citations?user=dOjmAVQAAAAJ;rtRexdQAAAAJ;JQiDCZUAAAAJ;https://scholar.google.ca/citations?user=sSy7nvsAAAAJ;;;3lprwmsAAAAJ;https://scholar.google.com.tw/citations?user=rfLIRakAAAAJ", "orcid": "0000-0001-9345-1828;0000-0003-4360-0754;0000-0003-3612-709X;0000-0001-7933-9813;;0000-0002-3149-0239;0000-0002-4115-8205;", "linkedin": "fuyuan-lyu-560756167/;;;ma-chen-93455693/?locale=en_US;;;;", "or_profile": "~Fuyuan_Lyu1;~Xing_Tang2;~Dugang_Liu1;~Chen_Ma3;~Weihong_Luo1;~Liang_Chen16;~xiuqiang_He3;~Xue_Liu1", "aff": "Huawei Technologies Ltd.;FiT,Tencent;Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ);City University of Hong Kong;FiT;Tencent;Tencent ;McGill University", "aff_domain": "huawei.com;tencent.com;ac.an;cityu.edu.hk;tencent.com;tencent.com;tencent.com;mcgill.ca", "position": "Intern;Researcher;Researcher;Assistant Professor;Researcher;Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nlyu2023towards,\ntitle={Towards Hybrid-grained Feature Interaction Selection for Deep Sparse Network},\nauthor={Fuyuan Lyu and Xing Tang and Dugang Liu and Chen Ma and Weihong Luo and Liang Chen and xiuqiang He and Xue Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xxfHMqNcum}\n}", "github": "", "project": "", "reviewers": "Bzgv;eecX;57Bi;21ZP", "pdf_size": 3642314, "rating": "4;5;6;6", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "novelty": "2;2;3;2", "presentation": "2;3;4;3", "wc_summary": "126;75;75;37", "wc_strengths": "74;67;52;45", "wc_weaknesses": "179;218;98;64", "wc_questions": "116;4;2;2", "wc_limitations": "3;2;10;6", "wc_review": "498;366;237;154", "wc_reply_reviewers": "18;134;27;28", "wc_reply_authors": "254;123;16;28", "reply_reviewers": "1;2;1;1", "reply_authors": "5;4;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 78.25, 31.63364506344471 ], "wc_strengths_avg": [ 59.5, 11.543396380615196 ], "wc_weaknesses_avg": [ 139.75, 61.532003867906006 ], "wc_questions_avg": [ 31.0, 49.08156476723211 ], "wc_limitations_avg": [ 5.25, 3.112474899497183 ], "wc_review_avg": [ 313.75, 130.46910553843773 ], "wc_reply_reviewers_avg": [ 51.75, 47.64648465521879 ], "wc_reply_authors_avg": [ 105.25, 95.3608279116745 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1091119959657132173&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "huawei.com;tencent.com;ac.an;cityu.edu.hk;tencent.com;tencent.com;tencent.com;mcgill.ca", "author_num": 8, "aff_unique_index": "0;1;2;3;4;1;1;5", "aff_unique_norm": "Huawei;Tencent;Guangdong Laboratory of Artificial Intelligence and Digital Economy;City University of Hong Kong;Florida Institute of Technology;McGill University", "aff_unique_dep": "Huawei Technologies;FiT;;;;", "aff_unique_url": "https://www.huawei.com;https://www.tencent.com;;https://www.cityu.edu.hk;https://www.fit.edu;https://www.mcgill.ca", "aff_unique_abbr": "Huawei;Tencent;GD-LAB;CityU;FIT;McGill", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Shenzhen;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;1;0;0;2", "aff_country_unique": "China;United States;Canada" }, { "title": "Color Equivariant Convolutional Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69974", "id": "xz8j3r3oUA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5f173562e7662b14fb5c5695f225ea46-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xz8j3r3oUA", "openreview": "https://openreview.net/forum?id=xz8j3r3oUA", "poster": "/media/PosterPDFs/NeurIPS%202023/69974.png?t=1700173822.3043492", "slides": "https://nips.cc/virtual/2023/poster/69974", "video": "https://nips.cc/virtual/2023/poster/69974", "author_site": "Attila Lengyel, Ombretta Strafforello, Robert-Jan Bruintjes, Alexander Gielisse, Jan van Gemert", "tldr": "", "abstract": "Color is a crucial visual cue readily exploited by Convolutional Neural Networks (CNNs) for object recognition. However, CNNs struggle if there is data imbalance between color variations introduced by accidental recording conditions. Color invariance addresses this issue but does so at the cost of removing all color information, which sacrifices discriminative power. In this paper, we propose Color Equivariant Convolutions (CEConvs), a novel deep learning building block that enables shape feature sharing across the color spectrum while retaining important color information. We extend the notion of equivariance from geometric to photometric transformations by incorporating parameter sharing over hue-shifts in a neural network. We demonstrate the benefits of CEConvs in terms of downstream performance to various tasks and improved robustness to color changes, including train-test distribution shifts. Our approach can be seamlessly integrated into existing architectures, such as ResNets, and offers a promising solution for addressing color-based domain shifts in CNNs.", "keywords": "color equivariance;equivariance;color robustness;equivariant convolutions", "primary_area": "", "supplementary_material": "", "author": "Attila Lengyel;Ombretta Strafforello;Robert-Jan Bruintjes;Alexander Gielisse;Jan van Gemert", "authorids": "~Attila_Lengyel1;~Ombretta_Strafforello1;~Robert-Jan_Bruintjes1;~Alexander_Gielisse1;~Jan_van_Gemert1", "gender": "M;F;M;M;M", "homepage": "https://attila94.github.io;;https://rjbruintjes.nl;https://sander-gielisse.nl/;https://jvgemert.github.io/", "dblp": "65/1580-1;286/6016;166/3241;359/5898;25/3153", "google_scholar": "i9jWpSwAAAAJ;o2Xe22kAAAAJ;RXVnqgcAAAAJ;https://scholar.google.com/citations?hl=nl;JUdMRGcAAAAJ", "orcid": ";;0000-0002-9798-0214;;0000-0002-3913-2786", "linkedin": "attilalengyel/;;;alexander-gielisse-a61258203/;jan-van-gemert-1628b94/", "or_profile": "~Attila_Lengyel1;~Ombretta_Strafforello1;~Robert-Jan_Bruintjes1;~Alexander_Gielisse1;~Jan_C_van_Gemert1", "aff": "Delft University of Technology;Delft University of Technology;Delft University of Technology;Delft University of Technology;Delft University of Technology", "aff_domain": "tudelft.nl;tudelft.nl;tudelft.nl;tudelft.nl;tudelft.nl", "position": "PhD student;PhD student;PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\nlengyel2023color,\ntitle={Color Equivariant Convolutional Networks},\nauthor={Attila Lengyel and Ombretta Strafforello and Robert-Jan Bruintjes and Alexander Gielisse and Jan van Gemert},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xz8j3r3oUA}\n}", "github": "", "project": "", "reviewers": "w9HG;gSRU;9e3n;81VY", "pdf_size": 5478387, "rating": "5;5;6;6", "confidence": "2;3;5;4", "soundness": "3;2;3;2", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "94;140;67;50", "wc_strengths": "68;166;70;71", "wc_weaknesses": "190;142;30;223", "wc_questions": "63;85;196;262", "wc_limitations": "4;49;13;12", "wc_review": "419;582;376;618", "wc_reply_reviewers": "43;204;257;0", "wc_reply_authors": "0;79;246;0", "reply_reviewers": "1;1;3;0", "reply_authors": "1;2;3;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 87.75, 34.00275724114149 ], "wc_strengths_avg": [ 93.75, 41.72753886823425 ], "wc_weaknesses_avg": [ 146.25, 73.03552217927931 ], "wc_questions_avg": [ 151.5, 81.30959352991503 ], "wc_limitations_avg": [ 19.5, 17.38533865071371 ], "wc_review_avg": [ 498.75, 103.17309484550708 ], "wc_reply_reviewers_avg": [ 126.0, 107.24970862431282 ], "wc_reply_authors_avg": [ 81.25, 100.43748055382513 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8944271909999159, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=284547531448199977&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "tudelft.nl;tudelft.nl;tudelft.nl;tudelft.nl;tudelft.nl", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Delft University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.tudelft.nl", "aff_unique_abbr": "TU Delft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Netherlands" }, { "title": "LIBERO: Benchmarking Knowledge Transfer for Lifelong Robot Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73415", "id": "xzEtNSuDJk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8c3c666820ea055a77726d66fc7d447f-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=xzEtNSuDJk", "openreview": "https://openreview.net/forum?id=xzEtNSuDJk", "poster": "/media/PosterPDFs/NeurIPS%202023/73415.png?t=1701148104.5613608", "slides": "https://nips.cc/virtual/2023/poster/73415", "video": "https://nips.cc/virtual/2023/poster/73415", "author_site": "Bo Liu, Yifeng Zhu, Chongkai Gao, Yihao Feng, Qiang Liu, Yuke Zhu, Peter Stone", "tldr": "", "abstract": "Lifelong learning offers a promising paradigm of building a generalist agent that learns and adapts over its lifespan. \nUnlike traditional lifelong learning problems in image and text domains, which primarily involve the transfer of declarative knowledge of entities and concepts, lifelong learning in decision-making (LLDM) also necessitates the transfer of procedural knowledge, such as actions and behaviors. To advance research in LLDM, we introduce LIBERO, a novel benchmark of lifelong learning for robot manipulation. Specifically, LIBERO highlights five key research topics in LLDM: 1) how to efficiently transfer declarative knowledge, procedural knowledge, or the mixture of both; 2) how to design effective policy architectures and 3) effective algorithms for LLDM; 4) the robustness of a lifelong learner with respect to task ordering; and 5) the effect of model pretraining for LLDM. We develop an extendible procedural generation pipeline that can in principle generate infinitely many tasks. For benchmarking purpose, we create four task suites (130 tasks in total) that we use to investigate the above-mentioned research topics. To support sample-efficient learning, we provide high-quality human-teleoperated demonstration data for all tasks. Our extensive experiments present several insightful or even unexpected discoveries: sequential finetuning outperforms existing lifelong learning methods in forward transfer, no single visual encoder architecture excels at all types of knowledge transfer, and naive supervised pretraining can hinder agents' performance in the subsequent LLDM.", "keywords": "lifelong learning;continual learning;multitask learning;robot learning", "primary_area": "", "supplementary_material": "/attachment/3b47a5a2a39a41a4ffe12fb26fb0e989691ce577.zip", "author": "Bo Liu;Yifeng Zhu;Chongkai Gao;Yihao Feng;qiang liu;Yuke Zhu;Peter Stone", "authorids": "~Bo_Liu13;~Yifeng_Zhu2;~Chongkai_Gao1;~Yihao_Feng1;~qiang_liu4;~Yuke_Zhu1;~Peter_Stone1", "gender": "M;M;M;M;M;M;M", "homepage": "https://cranial-xix.github.io/;https://cs.utexas.edu/~yifengz;https://chongkaigao.com/;;https://cs.utexas.edu/~yukez/;http://www.cs.utexas.edu/~pstone;https://www.cs.utexas.edu/~lqiang/", "dblp": ";;295/8658;204/3696;133/1772;s/PeterStone;61/3234-1", "google_scholar": "https://scholar.google.com/citations?hl=en;;l_mOqY8AAAAJ;uqnNle0AAAAJ;mWGyYMsAAAAJ;qnwjcfAAAAAJ;https://scholar.google.com.tw/citations?user=2qDh4WUAAAAJ", "orcid": ";;;;;0000-0002-6795-420X;", "linkedin": ";;;;;;", "or_profile": "~Bo_Liu13;~Yifeng_Zhu2;~Chongkai_Gao1;~Yihao_Feng1;~Yuke_Zhu1;~Peter_Stone1;~Qiang_Liu1", "aff": "University of Texas, Austin;The University of Texas at Austin;Tsinghua University;Salesforce AI Research;Computer Science Department, University of Texas, Austin;University of Texas, Austin;University of Texas, Austin", "aff_domain": "cs.utexas.edu;utexas.edu;tsinghua.edu.cn;salesforce.com;cs.utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;PhD student;MS student;Researcher;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nliu2023libero,\ntitle={{LIBERO}: Benchmarking Knowledge Transfer for Lifelong Robot Learning},\nauthor={Bo Liu and Yifeng Zhu and Chongkai Gao and Yihao Feng and qiang liu and Yuke Zhu and Peter Stone},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=xzEtNSuDJk}\n}", "github": "", "project": "", "reviewers": "5K6Q;2kT5;kBSE;XKZ6;3N9d", "pdf_size": 3753069, "rating": "6;6;7;7;7", "confidence": "4;4;4;2;4", "wc_summary_and_contributions": "55;48;106;164;88", "wc_strengths": "90;63;79;52;99", "wc_improvement": "157;137;109;73;102", "wc_limitations": "104;1;1;1;98", "wc_correctness": "97;1;46;21;140", "wc_clarity": "52;1;42;1;73", "wc_relation_to_prior_work": "73;1;21;1;71", "wc_documentation": "26;1;11;20;62", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "655;254;416;334;734", "wc_reply_reviewers": "133;33;112;0;55", "wc_reply_authors": "1087;554;546;391;708", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.8 ], "wc_summary_and_contributions_avg": [ 92.2, 41.71522503834781 ], "wc_strengths_avg": [ 76.6, 17.188368159892317 ], "wc_improvement_avg": [ 115.6, 29.035151110335214 ], "wc_limitations_avg": [ 41.0, 49.02652343374961 ], "wc_correctness_avg": [ 61.0, 50.91561646489218 ], "wc_clarity_avg": [ 33.8, 28.589508565206224 ], "wc_relation_to_prior_work_avg": [ 33.4, 32.357997465850694 ], "wc_documentation_avg": [ 24.0, 20.79422996891205 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 478.6, 185.26694254507467 ], "wc_reply_reviewers_avg": [ 66.6, 49.33396395993332 ], "wc_reply_authors_avg": [ 657.2, 237.1441755557155 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 108, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1396820234644351601&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "cs.utexas.edu;utexas.edu;tsinghua.edu.cn;salesforce.com;cs.utexas.edu;utexas.edu;utexas.edu", "author_num": 7, "aff_unique_index": "0;0;1;2;0;0;0", "aff_unique_norm": "University of Texas at Austin;Tsinghua University;Salesforce", "aff_unique_dep": ";;Salesforce AI Research", "aff_unique_url": "https://www.utexas.edu;https://www.tsinghua.edu.cn;https://www.salesforce.com", "aff_unique_abbr": "UT Austin;THU;Salesforce AI", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Molecule Joint Auto-Encoding: Trajectory Pretraining with 2D and 3D Diffusion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69973", "id": "xzmaFfw6oh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/acddda9cd6f310689f7657f947705a99-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=xzmaFfw6oh", "openreview": "https://openreview.net/forum?id=xzmaFfw6oh", "poster": "/media/PosterPDFs/NeurIPS%202023/69973.png?t=1702183709.4653127", "slides": "https://nips.cc/virtual/2023/poster/69973", "video": "https://nips.cc/virtual/2023/poster/69973", "author_site": "weitao Du, Jiujiu Chen, Xuecang Zhang, Zhi-Ming Ma, Shengchao Liu", "tldr": "", "abstract": "Recently, artificial intelligence for drug discovery has raised increasing interest in both machine learning and chemistry domains. The fundamental building block for drug discovery is molecule geometry and thus, the molecule's geometrical representation is the main bottleneck to better utilize machine learning techniques for drug discovery. In this work, we propose a pretraining method for molecule joint auto-encoding (MoleculeJAE). MoleculeJAE can learn both the 2D bond (topology) and 3D conformation (geometry) information, and a diffusion process model is applied to mimic the augmented trajectories of such two modalities, based on which, MoleculeJAE will learn the inherent chemical structure in a self-supervised manner. Thus, the pretrained geometrical representation in MoleculeJAE is expected to benefit downstream geometry-related tasks. Empirically, MoleculeJAE proves its effectiveness by reaching state-of-the-art performance on 15 out of 20 tasks by comparing it with 12 competitive baselines.", "keywords": "Molecule Joint Auto-encoding;Molecule Joint Self-supervised Learning;Markov processes;contrastive learning;molecule representation learning", "primary_area": "", "supplementary_material": "", "author": "weitao Du;Jiujiu Chen;Xuecang Zhang;Zhi-Ming Ma;Shengchao Liu", "authorids": "~weitao_Du1;~Jiujiu_Chen1;~Xuecang_Zhang1;~Zhi-Ming_Ma1;~Shengchao_Liu1", "gender": "M;F;;;M", "homepage": ";;;http://homepage.amss.ac.cn/research/homePage/8eb59241e2e74d828fb84eec0efadba5/myHomePage.html;https://chao1224.github.io/", "dblp": "17/10015;;01/8625.html;;", "google_scholar": ";uiW_ZjoAAAAJ;;;F1ws3XUAAAAJ", "orcid": ";;;;0000-0003-2030-2367", "linkedin": ";;;;", "or_profile": "~weitao_Du1;~Jiujiu_Chen1;~Xuecang_Zhang1;~Zhi-Ming_Ma1;~Shengchao_Liu1", "aff": "Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;University of Electronic Science and Technology of China;Huawei Technologies Co., Ltd.;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;MILA-UdeM", "aff_domain": "amss.ac.cn;uestc.edu.cn;huawei.com;amss.ac.cn;mila.quebec", "position": "Postdoc;MS student;Researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\ndu2023molecule,\ntitle={Molecule Joint Auto-Encoding: Trajectory Pretraining with 2D and 3D Diffusion},\nauthor={weitao Du and Jiujiu Chen and Xuecang Zhang and Zhi-Ming Ma and Shengchao Liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=xzmaFfw6oh}\n}", "github": "", "project": "", "reviewers": "oitq;fgxq;s6Nt;LUAH", "pdf_size": 2145226, "rating": "5;5;6;6", "confidence": "3;3;4;3", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "3;2;3;2", "wc_summary": "167;66;35;56", "wc_strengths": "196;29;43;75", "wc_weaknesses": "146;170;78;178", "wc_questions": "172;79;20;74", "wc_limitations": "1;7;1;7", "wc_review": "682;351;177;390", "wc_reply_reviewers": "57;61;18;0", "wc_reply_authors": "49;106;16;157", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 81.0, 50.896954722262116 ], "wc_strengths_avg": [ 85.75, 65.80036094125929 ], "wc_weaknesses_avg": [ 143.0, 39.331920878594275 ], "wc_questions_avg": [ 86.25, 54.64601998316071 ], "wc_limitations_avg": [ 4.0, 3.0 ], "wc_review_avg": [ 400.0, 181.48966912747403 ], "wc_reply_reviewers_avg": [ 34.0, 25.836021365527625 ], "wc_reply_authors_avg": [ 82.0, 53.95831724581485 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13867198005764791361&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "amss.ac.cn;uestc.edu.cn;huawei.com;amss.ac.cn;mila.quebec", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Chinese Academy of Sciences;University of Electronic Science and Technology of China;Huawei;Mila", "aff_unique_dep": "Academy of Mathematics and Systems Science;;Huawei Technologies;Montreal Institute for Learning Algorithms", "aff_unique_url": "http://www.cas.cn;https://www.uestc.edu.cn;https://www.huawei.com;https://mila.quebec", "aff_unique_abbr": "CAS;UESTC;Huawei;MILA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;Canada" }, { "title": "WITRAN: Water-wave Information Transmission and Recurrent Acceleration Network for Long-range Time Series Forecasting", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69972", "id": "y08bkEtNBK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2938ad0434a6506b125d8adaff084a4a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=y08bkEtNBK", "openreview": "https://openreview.net/forum?id=y08bkEtNBK", "poster": "/media/PosterPDFs/NeurIPS%202023/69972.png?t=1701870561.3353913", "slides": "https://nips.cc/virtual/2023/poster/69972", "video": "https://nips.cc/virtual/2023/poster/69972", "author_site": "Yuxin Jia, Youfang Lin, Xinyan Hao, Yan Lin, Shengnan Guo, Huaiyu Wan", "tldr": "", "abstract": "Capturing semantic information is crucial for accurate long-range time series forecasting, which involves modeling global and local correlations, as well as discovering long- and short-term repetitive patterns. Previous works have partially addressed these issues separately, but have not been able to address all of them simultaneously. Meanwhile, their time and memory complexities are still not sufficiently low for long-range forecasting. To address the challenge of capturing different types of semantic information, we propose a novel Water-wave Information Transmission (WIT) framework. This framework captures both long- and short-term repetitive patterns through bi-granular information transmission. It also models global and local correlations by recursively fusing and selecting information using Horizontal Vertical Gated Selective Unit (HVGSU). In addition, to improve the computing efficiency, we propose a generic Recurrent Acceleration Network (RAN) which reduces the time complexity to $\\mathcal{O}(\\sqrt{L})$ while maintaining the memory complexity at $\\mathcal{O}(L)$. Our proposed method, called Water-wave Information Transmission and Recurrent Acceleration Network (WITRAN), outperforms the state-of-the-art methods by 5.80% and 14.28% on long-range and ultra-long-range time series forecasting tasks respectively, as demonstrated by experiments on four benchmark datasets. The code is available at: https://github.com/Water2sea/WITRAN.", "keywords": "long-range time series forecasting;information transmission;long- and short-term repetitive patterns;global and local correlations", "primary_area": "", "supplementary_material": "", "author": "Yuxin Jia;Youfang Lin;Xinyan Hao;Yan Lin;Shengnan Guo;Huaiyu Wan", "authorids": "~Yuxin_Jia1;~Youfang_Lin1;~Xinyan_Hao1;~Yan_Lin1;~Shengnan_Guo1;~Huaiyu_Wan1", "gender": ";M;;M;F;M", "homepage": ";https://faculty.bjtu.edu.cn/7443/;;https://www.yanlincs.com;http://faculty.bjtu.edu.cn/9685/;https://faculty.bjtu.edu.cn/8793/", "dblp": ";12/4988;;27/586-6;163/0779-1;07/9988", "google_scholar": ";e8xT-e0AAAAJ;;nHMmG2UAAAAJ;3JsSBYsAAAAJ;T5wVWIUAAAAJ", "orcid": "0009-0006-8031-334X;0000-0002-5143-3645;0009-0002-1362-8633;0000-0002-2320-9777;0000-0002-3008-4511;0000-0002-0501-9363", "linkedin": ";youfang-lin-a1625091/;;;;", "or_profile": "~Yuxin_Jia1;~Youfang_Lin1;~Xinyan_Hao1;~Yan_Lin1;~Shengnan_Guo1;~Huaiyu_Wan1", "aff": "Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University;Aalborg University;Beijing Jiaotong University;Beijing Jiaotong University", "aff_domain": "bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;cs.aau.dk;bjtu.edu.cn;bjtu.edu.cn", "position": "PhD student;Full Professor;PhD student;PhD student;Lecturer;Full Professor", "bibtex": "@inproceedings{\njia2023witran,\ntitle={{WITRAN}: Water-wave Information Transmission and Recurrent Acceleration Network for Long-range Time Series Forecasting},\nauthor={Yuxin Jia and Youfang Lin and Xinyan Hao and Yan Lin and Shengnan Guo and Huaiyu Wan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=y08bkEtNBK}\n}", "github": "", "project": "", "reviewers": "2QPt;r3j3;Ku6W;1moF", "pdf_size": 29342210, "rating": "5;5;7;8", "confidence": "4;5;4;5", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "62;57;66;62", "wc_strengths": "38;60;67;139", "wc_weaknesses": "51;129;126;70", "wc_questions": "14;37;32;49", "wc_limitations": "2;12;1;2", "wc_review": "167;295;292;322", "wc_reply_reviewers": "0;347;14;15", "wc_reply_authors": "0;1299;8;7", "reply_reviewers": "0;2;1;1", "reply_authors": "1;5;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 61.75, 3.191786333700926 ], "wc_strengths_avg": [ 76.0, 37.914377220257755 ], "wc_weaknesses_avg": [ 94.0, 34.18332927027442 ], "wc_questions_avg": [ 33.0, 12.589678312014172 ], "wc_limitations_avg": [ 4.25, 4.493050188902857 ], "wc_review_avg": [ 269.0, 60.037488288568504 ], "wc_reply_reviewers_avg": [ 94.0, 146.18994493466369 ], "wc_reply_authors_avg": [ 328.5, 560.3269135067492 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3923993731927998021&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;cs.aau.dk;bjtu.edu.cn;bjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Beijing Jiao Tong University;Aalborg University", "aff_unique_dep": ";", "aff_unique_url": "http://www.njtu.edu.cn/en;https://www.aau.dk", "aff_unique_abbr": "BJTU;AAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;Denmark" }, { "title": "Learning Causal Models under Independent Changes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69971", "id": "y0OlQSZsyp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eee6efe709623f36483e3fbb0bb513dd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=y0OlQSZsyp", "openreview": "https://openreview.net/forum?id=y0OlQSZsyp", "poster": "/media/PosterPDFs/NeurIPS%202023/69971.png?t=1702583564.5619762", "slides": "https://nips.cc/virtual/2023/poster/69971", "video": "https://nips.cc/virtual/2023/poster/69971", "author_site": "Sarah Mameche, David Kaltenpoth, Jilles Vreeken", "tldr": "", "abstract": "In many scientific applications, we observe a system in different conditions in which its components may change, rather than in isolation. In our work, we are interested in explaining the generating process of such a multi-context system using a finite mixture of causal mechanisms. Recent work shows that this causal model is identifiable from data, but is limited to settings where the sparse mechanism shift hypothesis holds and only a subset of the causal conditionals change. As this assumption is not easily verifiable in practice, we study the more general principle that mechanism shifts are independent, which we formalize using the algorithmic notion of independence. We introduce an approach for causal discovery beyond partially directed graphs using Gaussian Process models, and give conditions under which we provably identify the correct causal model. In our experiments, we show that our method performs well in a range of synthetic settings, on realistic gene expression simulations, as well as on real-world cell signaling data.", "keywords": "independent mechanisms;causal discovery;information theory;gaussian processes", "primary_area": "", "supplementary_material": "", "author": "Sarah Mameche;David Kaltenpoth;Jilles Vreeken", "authorids": "~Sarah_Mameche1;~David_Kaltenpoth1;~Jilles_Vreeken2", "gender": "F;M;M", "homepage": "https://cispa.de/en/people/c01sama;;https://vreeken.eu", "dblp": "326/4243;234/7955;94/6462", "google_scholar": "uRajBQEAAAAJ;DjgvQhcAAAAJ;p5HEQfIAAAAJ", "orcid": ";;0000-0002-2310-2806", "linkedin": ";;jilles-vreeken-b3b05b58/", "or_profile": "~Sarah_Mameche1;~David_Kaltenpoth1;~Jilles_Vreeken2", "aff": "CISPA, saarland university, saarland informatics campus;CISPA;CISPA Helmholtz Center for Information Security", "aff_domain": "cispa.saarland;cispa.saarland;cispa.de", "position": "PhD student;PhD student;Tenured Faculty", "bibtex": "@inproceedings{\nmameche2023learning,\ntitle={Learning Causal Models under Independent Changes},\nauthor={Sarah Mameche and David Kaltenpoth and Jilles Vreeken},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=y0OlQSZsyp}\n}", "github": "", "project": "", "reviewers": "t48a;NaRE;oMbi;LpAE", "pdf_size": 420314, "rating": "4;7;7;7", "confidence": "3;3;4;3", "soundness": "2;3;3;2", "novelty": "2;2;3;3", "presentation": "2;2;2;2", "wc_summary": "132;40;154;197", "wc_strengths": "99;44;60;118", "wc_weaknesses": "315;159;117;216", "wc_questions": "87;17;155;76", "wc_limitations": "8;5;58;25", "wc_review": "641;265;544;632", "wc_reply_reviewers": "27;33;16;17", "wc_reply_authors": "14;19;16;11", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 130.75, 57.37322982018704 ], "wc_strengths_avg": [ 80.25, 29.583568074186047 ], "wc_weaknesses_avg": [ 201.75, 74.22726924789838 ], "wc_questions_avg": [ 83.75, 48.99681112072499 ], "wc_limitations_avg": [ 24.0, 21.059439688652688 ], "wc_review_avg": [ 520.5, 152.3031516417175 ], "wc_reply_reviewers_avg": [ 23.25, 7.084313657652377 ], "wc_reply_authors_avg": [ 15.0, 2.9154759474226504 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3747807090863715940&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cispa.saarland;cispa.saarland;cispa.de", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Saarland University;CISPA Helmholtz Center for Information Security", "aff_unique_dep": "CISPA;", "aff_unique_url": "https://www.uni-saarland.de;https://www.cispa.de/", "aff_unique_abbr": "Saarland U;CISPA", "aff_campus_unique_index": "0", "aff_campus_unique": "Saarland Informatics Campus;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "CSOT: Curriculum and Structure-Aware Optimal Transport for Learning with Noisy Labels", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69970", "id": "y50AnAbKp1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1b0da24d136f46bfaee78e8da907127e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=y50AnAbKp1", "openreview": "https://openreview.net/forum?id=y50AnAbKp1", "poster": "/media/PosterPDFs/NeurIPS%202023/69970.png?t=1701938400.8816547", "slides": "https://nips.cc/virtual/2023/poster/69970", "video": "https://nips.cc/virtual/2023/poster/69970", "author_site": "Wanxing Chang, Ye Shi, Jingya Wang", "tldr": "", "abstract": "Learning with noisy labels (LNL) poses a significant challenge in training a well-generalized model while avoiding overfitting to corrupted labels.\nRecent advances have achieved impressive performance by identifying clean labels and correcting corrupted labels for training.\nHowever, the current approaches rely heavily on the model\u2019s predictions and evaluate each sample independently without considering either the global or local structure of the sample distribution.\nThese limitations typically result in a suboptimal solution for the identification and correction processes, which eventually leads to models overfitting to incorrect labels.\nIn this paper, we propose a novel optimal transport (OT) formulation, called Curriculum and Structure-aware Optimal Transport (CSOT). CSOT concurrently considers the inter- and intra-distribution structure of the samples to construct a robust denoising and relabeling allocator.\nDuring the training process, the allocator incrementally assigns reliable labels to a fraction of the samples with the highest confidence. \nThese labels have both global discriminability and local coherence.\nNotably, CSOT is a new OT formulation with a nonconvex objective function and curriculum constraints, so it is not directly compatible with classical OT solvers. \nHere, we develop a lightspeed computational method that involves a scaling iteration within a generalized conditional gradient framework to solve CSOT efficiently.\nExtensive experiments demonstrate the superiority of our method over the current state-of-the-arts in LNL.", "keywords": "Learning with Noisy Labels;Optimal Transport;Curriculum Learning", "primary_area": "", "supplementary_material": "/attachment/414eee3d309362945e95f912ea249d84ec632500.pdf", "author": "Wanxing Chang;Ye Shi;Jingya Wang", "authorids": "~Wanxing_Chang1;~Ye_Shi1;~Jingya_Wang3", "gender": "Not Specified;M;F", "homepage": "https://changwxx.github.io/;http://faculty.sist.shanghaitech.edu.cn/faculty/shiye;https://faculty.sist.shanghaitech.edu.cn/faculty/wangjingya/", "dblp": "332/1430;34/11191-1;", "google_scholar": "07BLeI8AAAAJ;gMqbZPUAAAAJ;https://scholar.google.com.au/citations?user=vmvJV_IAAAAJ", "orcid": "0009-0004-0253-1830;;", "linkedin": ";;", "or_profile": "~Wanxing_Chang1;~Ye_Shi1;~Jingya_Wang3", "aff": "ShanghaiTech University;ShanghaiTech University;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "position": "MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nchang2023csot,\ntitle={{CSOT}: Curriculum and Structure-Aware Optimal Transport for Learning with Noisy Labels},\nauthor={Wanxing Chang and Ye Shi and Jingya Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=y50AnAbKp1}\n}", "github": "", "project": "", "reviewers": "Zh5c;qnim;7s92;ZdDn;gpRK", "pdf_size": 310908, "rating": "5;5;6;6;7", "confidence": "3;3;5;4;5", "soundness": "3;3;3;3;4", "novelty": "3;3;3;3;4", "presentation": "2;3;4;2;3", "wc_summary": "78;78;77;63;80", "wc_strengths": "48;71;74;33;153", "wc_weaknesses": "480;165;103;154;19", "wc_questions": "134;4;80;1;87", "wc_limitations": "5;1;1;22;45", "wc_review": "745;319;335;273;384", "wc_reply_reviewers": "11;40;10;36;0", "wc_reply_authors": "80;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 75.2, 6.1773780845922 ], "wc_strengths_avg": [ 75.8, 41.45069360095196 ], "wc_weaknesses_avg": [ 184.2, 156.62873299621626 ], "wc_questions_avg": [ 61.2, 51.409726706139956 ], "wc_limitations_avg": [ 14.8, 16.975276139138355 ], "wc_review_avg": [ 411.2, 170.62754760002852 ], "wc_reply_reviewers_avg": [ 19.4, 15.717506163510802 ], "wc_reply_authors_avg": [ 16.0, 32.0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8964214570007952, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12340713053590132502&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 8, "email": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "ShanghaiTech University", "aff_unique_dep": "", "aff_unique_url": "https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "On the Importance of Exploration for Generalization in Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69969", "id": "y5duN2j9s6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2a4310c4fd24bd336aa2f64f93cb5d39-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=y5duN2j9s6", "openreview": "https://openreview.net/forum?id=y5duN2j9s6", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69969", "video": "https://nips.cc/virtual/2023/poster/69969", "author_site": "Yiding Jiang, J. Zico Kolter, Roberta Raileanu", "tldr": "", "abstract": "Existing approaches for improving generalization in deep reinforcement learning (RL) have mostly focused on representation learning, neglecting RL-specific aspects such as exploration. We hypothesize that the agent's exploration strategy plays a key role in its ability to generalize to new environments.\nThrough a series of experiments in a tabular contextual MDP, \nwe show that exploration is helpful not only for efficiently finding the optimal policy for the training environments but also for acquiring knowledge that helps decision making in unseen environments. Based on these observations, we propose EDE: Exploration via Distributional Ensemble, a method that encourages the exploration of states with high epistemic uncertainty through an ensemble of Q-value distributions. \nThe proposed algorithm is the first value-based approach to achieve strong performance on both Procgen and Crafter, two benchmarks for generalization in RL with high-dimensional observations. The open-sourced implementation can be found at https://github.com/facebookresearch/ede.", "keywords": "reinforcement learning;generalization;procgen;crafter", "primary_area": "", "supplementary_material": "", "author": "Yiding Jiang;J Zico Kolter;Roberta Raileanu", "authorids": "~Yiding_Jiang2;~J_Zico_Kolter1;~Roberta_Raileanu2", "gender": "M;M;F", "homepage": "https://yidingjiang.github.io/;http://www.zicokolter.com;https://rraileanu.github.io/", "dblp": ";67/2526;215/5579", "google_scholar": "x9qzWg8AAAAJ;UXh1I6UAAAAJ;9hVXpJ0AAAAJ", "orcid": ";;", "linkedin": ";;roberta-raileanu-44b25660/", "or_profile": "~Yiding_Jiang2;~Zico_Kolter1;~Roberta_Raileanu1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Meta Facebook", "aff_domain": "andrew.cmu.edu;cmu.edu;fb.com", "position": "PhD student;Full Professor;Researcher", "bibtex": "@inproceedings{\njiang2023on,\ntitle={On the Importance of Exploration for Generalization in Reinforcement Learning},\nauthor={Yiding Jiang and J Zico Kolter and Roberta Raileanu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=y5duN2j9s6}\n}", "github": "", "project": "", "reviewers": "dHNU;saZD;PttU;zePN;tLzS", "pdf_size": 4926817, "rating": "4;5;5;7;7", "confidence": "5;4;3;3;3", "soundness": "3;2;3;3;3", "novelty": "2;2;2;3;3", "presentation": "4;3;2;4;3", "wc_summary": "107;40;27;155;61", "wc_strengths": "75;111;46;251;20", "wc_weaknesses": "782;80;37;126;137", "wc_questions": "80;37;211;27;71", "wc_limitations": "21;6;55;9;14", "wc_review": "1065;274;376;568;303", "wc_reply_reviewers": "760;24;281;422;0", "wc_reply_authors": "1929;0;573;620;0", "reply_reviewers": "2;1;1;2;0", "reply_authors": "5;1;2;3;1", "rating_avg": [ 5.6, 1.2 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 78.0, 47.12536472007405 ], "wc_strengths_avg": [ 100.6, 81.06935302566563 ], "wc_weaknesses_avg": [ 232.4, 277.08814482038025 ], "wc_questions_avg": [ 85.2, 65.97696567742412 ], "wc_limitations_avg": [ 21.0, 17.7426040929735 ], "wc_review_avg": [ 517.2, 292.4485595792874 ], "wc_reply_reviewers_avg": [ 297.4, 280.3523497315477 ], "wc_reply_authors_avg": [ 624.4, 704.8961909387792 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.4, 1.4966629547095764 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7916666666666667, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15477866327119129876&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "andrew.cmu.edu;cmu.edu;fb.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Carnegie Mellon University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.cmu.edu;https://meta.com", "aff_unique_abbr": "CMU;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Private Everlasting Prediction", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69968", "id": "y8UAQQHVTX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/abe31a12e83111fdf2cfd54deed5a2ce-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=y8UAQQHVTX", "openreview": "https://openreview.net/forum?id=y8UAQQHVTX", "poster": "/media/PosterPDFs/NeurIPS%202023/69968.png?t=1697134419.4914591", "slides": "https://nips.cc/virtual/2023/poster/69968", "video": "https://nips.cc/virtual/2023/poster/69968", "author_site": "Moni Naor, Kobbi Nissim, Uri Stemmer, Chao Yan", "tldr": "", "abstract": "A private learner is trained on a sample of labeled points and generates a hypothesis that can be used for predicting the labels of newly sampled points while protecting the privacy of the training set [Kasiviswannathan et al., FOCS 2008]. Past research uncovered that private learners may need to exhibit significantly higher sample complexity than non-private learners as is the case of learning of one-dimensional threshold functions [Bun et al., FOCS 2015, Alon et al., STOC 2019].\n\nWe explore prediction as an alternative to learning. A predictor answers a stream of classification queries instead of outputting a hypothesis. \nEarlier work has considered a private prediction model with a single classification query [Dwork and Feldman, COLT 2018]. We observe that when answering a stream of queries, a predictor must modify the hypothesis it uses over time, and in a manner that cannot rely solely on the training set.\n\nWe introduce {\\em private everlasting prediction} taking into account the privacy of both the training set {\\em and} the (adaptively chosen) queries made to the predictor. \nWe then present a generic construction of private everlasting predictors in the PAC model.\nThe sample complexity of the initial training sample in our construction is quadratic (up to polylog factors) in the VC dimension of the concept class. Our construction allows prediction for all concept classes with finite VC dimension, and in particular threshold functions \nover infinite domains, for which (traditional) private learning is known to be impossible.", "keywords": "Differential privacy;private learning;private prediction", "primary_area": "", "supplementary_material": "/attachment/104331818fd334848f016fd632748676a248d740.pdf", "author": "Moni Naor;Kobbi Nissim;Uri Stemmer;Chao Yan", "authorids": "moni.naor@gmail.com;~Kobbi_Nissim2;~Uri_Stemmer1;~Chao_Yan2", "gender": ";M;;", "homepage": ";http://people.cs.georgetown.edu/~kobbi/;https://www.uri.co.il/;", "dblp": ";65/801;125/8532;", "google_scholar": ";https://scholar.google.com.tw/citations?user=U-RE8IgAAAAJ;;1uJRrZcAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "moni.naor@gmail.com;~Kobbi_Nissim2;~Uri_Stemmer1;~Chao_Yan2", "aff": ";Georgetown University;Tel Aviv University;Georgetown University", "aff_domain": ";georgetwon.edu;tau.ac.il;cs.georgetown.edu", "position": ";Full Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nnaor2023private,\ntitle={Private Everlasting Prediction},\nauthor={Moni Naor and Kobbi Nissim and Uri Stemmer and Chao Yan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=y8UAQQHVTX}\n}", "github": "", "project": "", "reviewers": "NTTD;nCbQ;xv4Q;9JKU", "pdf_size": 523264, "rating": "7;7;8;9", "confidence": "3;3;3;4", "soundness": "3;3;4;4", "novelty": "3;2;4;4", "presentation": "4;2;4;3", "wc_summary": "64;58;238;168", "wc_strengths": "108;25;62;145", "wc_weaknesses": "43;143;62;55", "wc_questions": "43;9;168;349", "wc_limitations": "11;20;1;3", "wc_review": "269;255;531;720", "wc_reply_reviewers": "28;29;0;4", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 132.0, 75.21967827636595 ], "wc_strengths_avg": [ 85.0, 45.436769251345325 ], "wc_weaknesses_avg": [ 75.75, 39.41684284668167 ], "wc_questions_avg": [ 142.25, 133.2429641669683 ], "wc_limitations_avg": [ 8.75, 7.495832175282475 ], "wc_review_avg": [ 443.75, 193.70773732610684 ], "wc_reply_reviewers_avg": [ 15.25, 13.329947486768281 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9776760720606165420&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": ";georgetwon.edu;tau.ac.il;cs.georgetown.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Georgetown University;Tel Aviv University", "aff_unique_dep": ";", "aff_unique_url": "https://www.georgetown.edu;https://www.tau.ac.il", "aff_unique_abbr": "GU;TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Israel" }, { "title": "SNEkhorn: Dimension Reduction with Symmetric Entropic Affinities", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69967", "id": "y9U0IJ2uFr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8b54ecd9823fff6d37e61ece8f87e534-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=y9U0IJ2uFr", "openreview": "https://openreview.net/forum?id=y9U0IJ2uFr", "poster": "/media/PosterPDFs/NeurIPS%202023/69967.png?t=1701682920.680774", "slides": "https://nips.cc/virtual/2023/poster/69967", "video": "https://nips.cc/virtual/2023/poster/69967", "author_site": "Hugues Van Assel, Titouan Vayer, R\u00e9mi Flamary, Nicolas Courty", "tldr": "", "abstract": "Many approaches in machine learning rely on a weighted graph to encode the\nsimilarities between samples in a dataset. Entropic affinities (EAs), which are notably used in the popular Dimensionality Reduction (DR) algorithm t-SNE, are particular instances of such graphs. To ensure robustness to heterogeneous sampling densities, EAs assign a kernel bandwidth parameter to every sample in such a way that the entropy of each row in the affinity matrix is kept constant at a specific value, whose exponential is known as perplexity. EAs are inherently asymmetric and row-wise stochastic, but they are used in DR approaches after undergoing heuristic symmetrization methods that violate both the row-wise constant entropy and stochasticity properties. In this work, we uncover a novel characterization of EA as an optimal transport problem, allowing a natural symmetrization that can be computed efficiently using dual ascent. \nThe corresponding novel affinity matrix derives advantages from symmetric doubly stochastic normalization in terms of clustering performance, while also effectively controlling the entropy of each row thus making it particularly robust to varying noise levels. Following, we present a new DR algorithm, SNEkhorn, that leverages this new affinity matrix. We show its clear superiority to state-of-the-art approaches with several indicators on both synthetic and real-world datasets.", "keywords": "Dimension Reduction;Optimal Transport;Affinities", "primary_area": "", "supplementary_material": "/attachment/567f618c4f92a62eb2d65990eae231d745b214aa.zip", "author": "Hugues Van Assel;Titouan Vayer;R\u00e9mi Flamary;Nicolas Courty", "authorids": "~Hugues_Van_Assel1;~Titouan_Vayer1;~R\u00e9mi_Flamary1;~Nicolas_Courty1", "gender": "M;M;;M", "homepage": "https://huguesva.github.io;https://tvayer.github.io/;https://remi.flamary.com/;http://people.irisa.fr/Nicolas.Courty/", "dblp": ";220/5519;00/8318;74/4219", "google_scholar": "9Lf9wq8AAAAJ;https://scholar.google.fr/citations?user=PJEv3JgAAAAJ;https://scholar.google.fr/citations?user=zDnwxFQAAAAJ;https://scholar.google.fr/citations?user=ibEREjcAAAAJ", "orcid": ";;0000-0002-4212-6627;0000-0003-1353-0126", "linkedin": ";;;", "or_profile": "~Hugues_Van_Assel1;~Titouan_Vayer1;~R\u00e9mi_Flamary1;~Nicolas_Courty1", "aff": "Ecole Normale Sup\u00e9rieure de Lyon;INRIA;\u00c9cole Polytechnique;IRISA", "aff_domain": "ens-lyon.fr;inria.fr;polytechnique.edu;irisa.fr", "position": "PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nassel2023snekhorn,\ntitle={{SNE}khorn: Dimension Reduction with Symmetric Entropic Affinities},\nauthor={Hugues Van Assel and Titouan Vayer and R{\\'e}mi Flamary and Nicolas Courty},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=y9U0IJ2uFr}\n}", "github": "", "project": "", "reviewers": "8Btp;WhX7;s51e;W2ip", "pdf_size": 1922221, "rating": "6;6;7;8", "confidence": "3;4;3;5", "soundness": "4;3;4;4", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "77;148;93;35", "wc_strengths": "130;42;138;70", "wc_weaknesses": "58;1;118;197", "wc_questions": "65;64;94;46", "wc_limitations": "1;7;102;6", "wc_review": "331;262;545;354", "wc_reply_reviewers": "0;20;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.25, 40.48070528041724 ], "wc_strengths_avg": [ 95.0, 40.33608806019741 ], "wc_weaknesses_avg": [ 93.5, 72.67908915224515 ], "wc_questions_avg": [ 67.25, 17.195566289017645 ], "wc_limitations_avg": [ 29.0, 42.20781918081056 ], "wc_review_avg": [ 373.0, 104.91663357161246 ], "wc_reply_reviewers_avg": [ 5.0, 8.660254037844387 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15311887255945696100&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 17, "email": "ens-lyon.fr;inria.fr;polytechnique.edu;irisa.fr", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Ecole Normale Sup\u00e9rieure de Lyon;INRIA;Ecole Polytechnique;Institut de Recherche en Informatique et Automatique", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ens-lyon.fr;https://www.inria.fr;https://www.polytechnique.edu;https://www.irisa.fr", "aff_unique_abbr": "ENS de Lyon;INRIA;X;IRISA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Operation-Level Early Stopping for Robustifying Differentiable NAS", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69966", "id": "yAOwkf4FyL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e0bc6dbcbcc957b2aeadb20c39ba7f05-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yAOwkf4FyL", "openreview": "https://openreview.net/forum?id=yAOwkf4FyL", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69966", "video": "https://nips.cc/virtual/2023/poster/69966", "author_site": "Shen Jiang, Zipeng Ji, Guanghui Zhu, Chunfeng Yuan, Yihua Huang", "tldr": "", "abstract": "Differentiable NAS (DARTS) is a simple and efficient neural architecture search method that has been extensively adopted in various machine learning tasks.\n% \nNevertheless, DARTS still encounters several robustness issues, mainly the domination of skip connections.\n% \nThe resulting architectures are full of parametric-free operations, leading to performance collapse.\n% \nExisting methods suggest that the skip connection has additional advantages in optimization compared to other parametric operations and propose to alleviate the domination of skip connections by eliminating these additional advantages.\n% \nIn this paper, we analyze this issue from a simple and straightforward perspective and propose that the domination of skip connections results from parametric operations overfitting the training data while architecture parameters are trained on the validation data, leading to undesired behaviors.\n% \nBased on this observation, we propose the operation-level early stopping (OLES) method to overcome this issue and robustify DARTS without introducing any computation overhead.\n% \nExtensive experimental results can verify our hypothesis and the effectiveness of OLES.", "keywords": "Differentiable neural architecture search; Image classification; Failure of DARTS", "primary_area": "", "supplementary_material": "", "author": "Shen Jiang;Zipeng Ji;Guanghui Zhu;Chunfeng Yuan;Yihua Huang", "authorids": "~Shen_Jiang2;522022330025@smail.nju.edu.cn;~Guanghui_Zhu1;~Chunfeng_Yuan4;~Yihua_Huang1", "gender": "M;;M;F;M", "homepage": ";;https://cs.nju.edu.cn/zgh/;http://pasa-bigdata.nju.edu.cn/;http://cs.nju.edu.cn/yhuang", "dblp": ";;161/2889.html;;", "google_scholar": ";;tsH6LowAAAAJ;;", "orcid": "0000-0002-9018-9649;;0000-0002-5069-5950;;", "linkedin": ";;;;", "or_profile": "~Shen_Jiang2;522022330025@smail.nju.edu.cn;~Guanghui_Zhu1;~Chunfeng_Yuan4;~Yihua_Huang1", "aff": "Nanjing University;;Nanjing University;;Nanjing University", "aff_domain": "nju.edu.cn;;nju.edu.cn;;nju.edu.cn", "position": "MS student;;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\njiang2023operationlevel,\ntitle={Operation-Level Early Stopping for Robustifying Differentiable {NAS}},\nauthor={Shen Jiang and Zipeng Ji and Guanghui Zhu and Chunfeng Yuan and Yihua Huang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yAOwkf4FyL}\n}", "github": "", "project": "", "reviewers": "fTac;5e2k;nZNk;nDad;XYdd", "pdf_size": 1153238, "rating": "3;4;5;6;7", "confidence": "4;5;3;4;4", "soundness": "2;2;3;3;3", "novelty": "2;1;2;3;3", "presentation": "3;4;2;3;4", "wc_summary": "75;55;60;48;127", "wc_strengths": "74;52;35;62;208", "wc_weaknesses": "355;108;134;130;96", "wc_questions": "3;11;40;2;41", "wc_limitations": "2;1;1;7;1", "wc_review": "509;227;270;249;473", "wc_reply_reviewers": "0;0;0;24;10", "wc_reply_authors": "0;41;0;66;40", "reply_reviewers": "0;0;0;1;1", "reply_authors": "1;2;1;2;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 73.0, 28.41830396065184 ], "wc_strengths_avg": [ 86.2, 62.226682379828034 ], "wc_weaknesses_avg": [ 164.6, 96.22390555366165 ], "wc_questions_avg": [ 19.4, 17.511139311878026 ], "wc_limitations_avg": [ 2.4, 2.33238075793812 ], "wc_review_avg": [ 345.6, 120.0359946016194 ], "wc_reply_reviewers_avg": [ 6.8, 9.431860898041277 ], "wc_reply_authors_avg": [ 29.4, 25.74956310308973 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.22360679774997896, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=257614311197448226&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "nju.edu.cn;;nju.edu.cn;;nju.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Error Discovery By Clustering Influence Embeddings", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69965", "id": "yBVLXvJ1sb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8278a2e5f9db8489cd908d20c43f1f87-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yBVLXvJ1sb", "openreview": "https://openreview.net/forum?id=yBVLXvJ1sb", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69965", "video": "https://nips.cc/virtual/2023/poster/69965", "author_site": "Fulton Wang, Julius Adebayo, Julius Adebayo, Sarah Tan, Diego Garcia-Olano, Narine Kokhlikyan", "tldr": "", "abstract": "We present a method for identifying groups of test examples---slices---on which a model under-performs, a task now known as slice discovery. We formalize coherence---a requirement that erroneous predictions, within a slice, should be wrong for the same reason---as a key property that any slice discovery method should satisfy. We then use influence functions to derive a new slice discovery method, InfEmbed, which satisfies coherence by returning slices whose examples are influenced similarly by the training data. InfEmbed is simple, and consists of applying K-Means clustering to a novel representation we deem influence embeddings. We show InfEmbed outperforms current state-of-the-art methods on 2 benchmarks, and is effective for model debugging across several case studies.", "keywords": "Debugging;interpretability;influence functions", "primary_area": "", "supplementary_material": "/attachment/6e4d882faaef5396cadd9a45952590e90b7ad88b.pdf", "author": "Fulton Wang;Julius Adebayo;Sarah Tan;Diego Garcia-Olano;Narine Kokhlikyan", "authorids": "~Fulton_Wang1;~Julius_Adebayo1;~Sarah_Tan1;~Diego_Garcia-Olano1;~Narine_Kokhlikyan1", "gender": ";M;;M;", "homepage": "https://scholar.google.com/citations?user=jKi3eEIAAAAJ&hl=en;https://juliusadebayo.com/;;http://diegoolano.com/;", "dblp": ";146/1271;;166/5010;136/9284", "google_scholar": ";y1bnRg4AAAAJ;;https://scholar.google.com/citations?hl=en;oZjHXwUAAAAJ", "orcid": ";;;;0000-0002-5827-5141", "linkedin": ";;;https://www.linkedin.com/pub/diego-garcia-olano/a0/57b/18a;", "or_profile": "~Fulton_Wang1;~Julius_Adebayo1;~Sarah_Tan1;~Diego_Garcia-Olano1;~Narine_Kokhlikyan1", "aff": "Meta;Prescient Design / Genentech;;Meta AI;Meta", "aff_domain": "meta.com;gene.com;;meta.com;meta.com", "position": "Researcher;Postdoc;;Researcher;Research Scientist", "bibtex": "@inproceedings{\nwang2023error,\ntitle={Error Discovery By Clustering Influence Embeddings},\nauthor={Fulton Wang and Julius Adebayo and Sarah Tan and Diego Garcia-Olano and Narine Kokhlikyan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yBVLXvJ1sb}\n}", "github": "", "project": "", "reviewers": "MYaZ;2vT2;yzdi;6oL7;uzoE", "pdf_size": 745766, "rating": "6;6;7;7;7", "confidence": "3;3;4;3;3", "soundness": "3;3;3;3;3", "novelty": "2;4;3;3;3", "presentation": "3;4;3;3;3", "wc_summary": "45;62;161;123;51", "wc_strengths": "33;65;43;109;90", "wc_weaknesses": "66;159;167;23;61", "wc_questions": "38;306;122;32;27", "wc_limitations": "1;1;90;38;23", "wc_review": "183;593;583;325;252", "wc_reply_reviewers": "37;41;20;12;11", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 88.4, 45.71039269137818 ], "wc_strengths_avg": [ 68.0, 28.36899716239543 ], "wc_weaknesses_avg": [ 95.2, 57.377347446531545 ], "wc_questions_avg": [ 105.0, 106.38796924464721 ], "wc_limitations_avg": [ 30.6, 32.84265519107735 ], "wc_review_avg": [ 387.2, 170.02164568077797 ], "wc_reply_reviewers_avg": [ 24.2, 12.544321424453376 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8314502453737911685&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "meta.com;gene.com;;meta.com;meta.com", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Meta;Genentech", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.gene.com", "aff_unique_abbr": "Meta;Genentech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Balance, Imbalance, and Rebalance: Understanding Robust Overfitting from a Minimax Game Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69964", "id": "yBoVwpGa5E", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/32f9049217da6e718a426b07242dff73-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yBoVwpGa5E", "openreview": "https://openreview.net/forum?id=yBoVwpGa5E", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69964", "video": "https://nips.cc/virtual/2023/poster/69964", "author_site": "Yifei Wang, Liangchen Li, Jiansheng Yang, Zhouchen Lin, Yisen Wang", "tldr": "", "abstract": "Adversarial Training (AT) has become arguably the state-of-the-art algorithm for extracting robust features. However, researchers recently notice that AT suffers from severe robust overfitting problems, particularly after learning rate (LR) decay. In this paper, we explain this phenomenon by viewing adversarial training as a dynamic minimax game between the model trainer and the attacker. Specifically, we analyze how LR decay breaks the balance between the minimax game by empowering the trainer with a stronger memorization ability, and show such imbalance induces robust overfitting as a result of memorizing non-robust features. We validate this understanding with extensive experiments, and provide a holistic view of robust overfitting from the dynamics of both the two game players. This understanding further inspires us to alleviate robust overfitting by rebalancing the two players by either regularizing the trainer's capacity or improving the attack strength. Experiments show that the proposed ReBalanced Adversarial Training (ReBAT) can attain good robustness and does not suffer from robust overfitting even after very long training. Code is available at https://github.com/PKU-ML/ReBAT.", "keywords": "Adversarial Training", "primary_area": "", "supplementary_material": "", "author": "Yifei Wang;Liangchen Li;Jiansheng Yang;Zhouchen Lin;Yisen Wang", "authorids": "~Yifei_Wang1;~Liangchen_Li1;~Jiansheng_Yang1;~Zhouchen_Lin1;~Yisen_Wang1", "gender": "M;;M;M;M", "homepage": "https://yifeiwang77.com;;https://dics.pku.edu.cn/ryzc/qtjy/y/110330.htm;https://zhouchenlin.github.io;https://yisenwang.github.io/", "dblp": "00/555-1;;;l/ZhouchenLin;172/1346-1", "google_scholar": "-CLy6YsAAAAJ;;;https://scholar.google.com.tw/citations?user=TanjFwoAAAAJ;uMWPDboAAAAJ", "orcid": ";;;0000-0003-1493-7569;", "linkedin": ";;;;", "or_profile": "~Yifei_Wang1;~Liangchen_Li1;~Jiansheng_Yang1;~Zhouchen_Lin1;~Yisen_Wang1", "aff": "Peking University;;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;;Full Professor;Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023balance,\ntitle={Balance, Imbalance, and Rebalance: Understanding Robust Overfitting from a Minimax Game Perspective},\nauthor={Yifei Wang and Liangchen Li and Jiansheng Yang and Zhouchen Lin and Yisen Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yBoVwpGa5E}\n}", "github": "", "project": "", "reviewers": "kwEx;EtSp;Znmp;55GS", "pdf_size": 7428803, "rating": "4;6;6;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;4", "presentation": "3;3;3;2", "wc_summary": "69;67;64;76", "wc_strengths": "26;51;74;97", "wc_weaknesses": "203;48;56;162", "wc_questions": "6;33;5;2", "wc_limitations": "1;11;10;1", "wc_review": "305;210;209;338", "wc_reply_reviewers": "567;52;0;45", "wc_reply_authors": "2740;0;0;0", "reply_reviewers": "6;1;0;1", "reply_authors": "7;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.0, 4.415880433163924 ], "wc_strengths_avg": [ 62.0, 26.391286440793294 ], "wc_weaknesses_avg": [ 117.25, 66.90057922021303 ], "wc_questions_avg": [ 11.5, 12.5 ], "wc_limitations_avg": [ 5.75, 4.763139720814412 ], "wc_review_avg": [ 265.5, 57.20358380381425 ], "wc_reply_reviewers_avg": [ 166.0, 232.37577326390976 ], "wc_reply_authors_avg": [ 685.0, 1186.454803184681 ], "reply_reviewers_avg": [ 2.0, 2.345207879911715 ], "reply_authors_avg": [ 2.5, 2.598076211353316 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15564378337763276524&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Siamese Masked Autoencoders", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69963", "id": "yC3q7vInux", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7ffb9f1b57628932518505b532301603-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yC3q7vInux", "openreview": "https://openreview.net/forum?id=yC3q7vInux", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69963", "video": "https://nips.cc/virtual/2023/poster/69963", "author_site": "Agrim Gupta, Jiajun Wu, Jia Deng, Fei-Fei Li", "tldr": "", "abstract": "Establishing correspondence between images or scenes is a significant challenge in computer vision, especially given occlusions, viewpoint changes, and varying object appearances. In this paper, we present Siamese Masked Autoencoders (SiamMAE), a simple extension of Masked Autoencoders (MAE) for learning visual correspondence from videos. SiamMAE operates on pairs of randomly sampled video frames and asymmetrically masks them. These frames are processed independently by an encoder network, and a decoder composed of a sequence of cross-attention layers is tasked with predicting the missing patches in the future frame. By masking a large fraction (95%) of patches in the future frame while leaving the past frame unchanged, SiamMAE encourages the network to focus on object motion and learn object-centric representations. Despite its conceptual simplicity, features learned via SiamMAE outperform state-of-the-art self-supervised methods on video object segmentation, pose keypoint propagation, and semantic part propagation tasks. SiamMAE achieves competitive results without relying on data augmentation, handcrafted tracking-based pretext tasks, or other techniques to prevent representational collapse.", "keywords": "Representation Learning;Visual Correspondence;Self-supervised learning;Videos", "primary_area": "", "supplementary_material": "/attachment/63b716dbcb6a61f8942e59ae58bcb9168ddee94b.pdf", "author": "Agrim Gupta;Jiajun Wu;Jia Deng;Li Fei-Fei", "authorids": "~Agrim_Gupta1;~Jiajun_Wu1;~Jia_Deng1;~Li_Fei-Fei1", "gender": ";M;M;F", "homepage": ";https://jiajunwu.com;;https://profiles.stanford.edu/fei-fei-li", "dblp": "200/8282;117/4768;07/6526-1.html;79/2528", "google_scholar": "AxzVaI8AAAAJ;2efgcS0AAAAJ;U3Eub-EAAAAJ;rDfyQnIAAAAJ", "orcid": ";0000-0002-4176-343X;;", "linkedin": ";jiajunwu/;;fei-fei-li-4541247/", "or_profile": "~Agrim_Gupta1;~Jiajun_Wu1;~Jia_Deng1;~Li_Fei-Fei1", "aff": "Stanford University;Stanford University;Princeton University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;princeton.edu;stanford.edu", "position": "PhD student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ngupta2023siamese,\ntitle={Siamese Masked Autoencoders},\nauthor={Agrim Gupta and Jiajun Wu and Jia Deng and Li Fei-Fei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yC3q7vInux}\n}", "github": "", "project": "", "reviewers": "uZFR;5RkZ;h4WK;9jyT;e5XC", "pdf_size": 6942323, "rating": "6;7;7;7;7", "confidence": "4;4;5;3;4", "soundness": "3;4;3;3;4", "novelty": "3;3;3;3;3", "presentation": "3;4;4;3;4", "wc_summary": "48;114;68;56;165", "wc_strengths": "73;60;38;21;87", "wc_weaknesses": "74;107;69;90;151", "wc_questions": "64;88;7;7;93", "wc_limitations": "78;1;2;1;59", "wc_review": "337;370;184;175;555", "wc_reply_reviewers": "40;18;15;54;230", "wc_reply_authors": "0;0;0;21;118", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;2;2", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 90.2, 43.82875768259922 ], "wc_strengths_avg": [ 55.8, 23.726778120933318 ], "wc_weaknesses_avg": [ 98.2, 29.566196914720027 ], "wc_questions_avg": [ 51.8, 37.87030498952973 ], "wc_limitations_avg": [ 28.2, 33.45085948073681 ], "wc_review_avg": [ 324.2, 139.6042979281082 ], "wc_reply_reviewers_avg": [ 71.4, 80.59181099838867 ], "wc_reply_authors_avg": [ 27.8, 45.82750265942931 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 77, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2706685955994135285&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 10, "email": "stanford.edu;stanford.edu;princeton.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Stanford University;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.princeton.edu", "aff_unique_abbr": "Stanford;Princeton", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Credal Marginal MAP", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69962", "id": "yCBqKTvYe9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/953390c834451505703c9da45de634d8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yCBqKTvYe9", "openreview": "https://openreview.net/forum?id=yCBqKTvYe9", "poster": "/media/PosterPDFs/NeurIPS%202023/69962.png?t=1701424713.1935112", "slides": "https://nips.cc/virtual/2023/poster/69962", "video": "https://nips.cc/virtual/2023/poster/69962", "author_site": "Radu Marinescu, Debarun Bhattacharjya, Junkyu Lee, Fabio Cozman, Alexander Gray", "tldr": "", "abstract": "Credal networks extend Bayesian networks to allow for imprecision in probability values. Marginal MAP is a widely applicable mixed inference task that identifies the most likely assignment for a subset of variables (called MAP variables). However, the task is extremely difficult to solve in credal networks particularly because the evaluation of each complete MAP assignment involves exact likelihood computations (combinatorial sums) over the vertices of a complex joint credal set representing the space of all possible marginal distributions of the MAP variables. In this paper, we explore Credal Marginal MAP inference and develop new exact methods based on variable elimination and depth-first search as well as several approximation schemes based on the mini-bucket partitioning and stochastic local search. An extensive empirical evaluation demonstrates the effectiveness of our new methods on random as well as real-world benchmark problems.", "keywords": "graphical models;credal networks;probabilistic inference", "primary_area": "", "supplementary_material": "/attachment/a6dbce1e87fad2733b585eccaa5e1a94be3e1e69.zip", "author": "Radu Marinescu;Debarun Bhattacharjya;Junkyu Lee;Fabio Cozman;Alexander G. Gray", "authorids": "~Radu_Marinescu2;~Debarun_Bhattacharjya1;~Junkyu_Lee1;~Fabio_Cozman1;~Alexander_G._Gray1", "gender": ";M;;M;M", "homepage": ";https://researcher.watson.ibm.com/researcher/view.php?person=us-debarunb;https://www.linkedin.com/in/junkyul/;http://sites.poli.usp.br/p/fabio.cozman/;", "dblp": "m/RaduMarinescu2;98/5604;65/6241-1;g/FabioGagliardiCozman;85/110.html", "google_scholar": ";pwfVt-MAAAAJ;kigtlXEAAAAJ;https://scholar.google.com.br/citations?user=tzbfgcMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-6636-2886;;0000-0003-0337-7359", "linkedin": ";;junkyul/;;alexander-gray-b554b64/", "or_profile": "~Radu_Marinescu2;~Debarun_Bhattacharjya1;~Junkyu_Lee1;~Fabio_Cozman1;~Alexander_G._Gray1", "aff": "International Business Machines;International Business Machines;International Business Machines;Universidade de Sao Paulo;International Business Machines", "aff_domain": "ibm.com;ibm.com;ibm.com;usp.br;ibm.com", "position": "Researcher;Researcher;Researcher;Full Professor;VP, Foundations of AI", "bibtex": "@inproceedings{\nmarinescu2023credal,\ntitle={Credal Marginal {MAP}},\nauthor={Radu Marinescu and Debarun Bhattacharjya and Junkyu Lee and Fabio Cozman and Alexander G. Gray},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yCBqKTvYe9}\n}", "github": "", "project": "", "reviewers": "3qEE;VJAx;8qY7;9XGw", "pdf_size": 719100, "rating": "5;6;7;7", "confidence": "3;4;4;4", "soundness": "4;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "34;66;64;75", "wc_strengths": "52;59;141;59", "wc_weaknesses": "65;58;78;56", "wc_questions": "24;79;13;21", "wc_limitations": "18;10;1;6", "wc_review": "193;272;297;217", "wc_reply_reviewers": "0;15;5;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 59.75, 15.433324334050653 ], "wc_strengths_avg": [ 77.75, 36.62905267680288 ], "wc_weaknesses_avg": [ 64.25, 8.613216588476108 ], "wc_questions_avg": [ 34.25, 26.14741861063918 ], "wc_limitations_avg": [ 8.75, 6.219927652312364 ], "wc_review_avg": [ 244.75, 41.595522595587134 ], "wc_reply_reviewers_avg": [ 8.25, 6.057020719792859 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8390465110867579415&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ibm.com;ibm.com;ibm.com;usp.br;ibm.com", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "International Business Machines Corporation;Universidade de Sao Paulo", "aff_unique_dep": ";", "aff_unique_url": "https://www.ibm.com;https://www.usp.br", "aff_unique_abbr": "IBM;USP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Brazil" }, { "title": "Closing the gap between the upper bound and lower bound of Adam's iteration complexity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69961", "id": "yDvb3mlogA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7ac19fdcdf4f311f3e3ef2e7ef4784d7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yDvb3mlogA", "openreview": "https://openreview.net/forum?id=yDvb3mlogA", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69961", "video": "https://nips.cc/virtual/2023/poster/69961", "author_site": "Bohan Wang, Jingwen Fu, Huishuai Zhang, Nanning Zheng, Wei Chen", "tldr": "", "abstract": "Recently, Arjevani et al. [1] establish a lower bound of iteration complexity for the first-order optimization under an $L$-smooth condition and a bounded noise variance assumption. However, a thorough review of existing literature on Adam's convergence reveals a noticeable gap: none of them meet the above lower bound. In this paper, we close the gap by deriving a new convergence guarantee of Adam, with only an $L$-smooth condition and a bounded noise variance assumption. Our results remain valid across a broad spectrum of hyperparameters. Especially with properly chosen hyperparameters, we derive an upper bound of the iteration complexity of Adam and show that it meets the lower bound for first-order optimizers. To the best of our knowledge, this is the first to establish such a tight upper bound for Adam's convergence. Our proof utilizes novel techniques to handle the entanglement between momentum and adaptive learning rate and to convert the first-order term in the Descent Lemma to the gradient norm, which may be of independent interest.", "keywords": "Adam;Convergence;Upper Bound;Lower Bound", "primary_area": "", "supplementary_material": "/attachment/aab700dbacb9007051b8dfdcd6cab2755e27c6f1.pdf", "author": "Bohan Wang;Jingwen Fu;Huishuai Zhang;Nanning Zheng;Wei Chen", "authorids": "~Bohan_Wang1;~Jingwen_Fu1;~Huishuai_Zhang3;~Nanning_Zheng1;~Wei_Chen1", "gender": "M;M;M;F;M", "homepage": "https://bhwangfy.github.io/;https://www.jw-fu.cn/;;https://weichen-cas.github.io/;https://huishuai-git.github.io", "dblp": "202/1184;247/5290;07/256-1;;144/7537", "google_scholar": "LfkHCEUAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;w1srHyIAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Bohan_Wang1;~Jingwen_Fu1;~Nanning_Zheng1;~Wei_Chen1;~Huishuai_Zhang2", "aff": "Microsoft Research Asia, University of Science and Technology of China;Microsoft;Xi'an Jiaotong University; Chinese Academy of Sciences;Microsoft Research Asia", "aff_domain": "ustc.edu.cn;microsoft.com;xjtu.edu.cn;ict.ac.cn;microsoft.com", "position": "PhD student;Intern;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nwang2023closing,\ntitle={Closing the gap between the upper bound and lower bound of Adam's iteration complexity},\nauthor={Bohan Wang and Jingwen Fu and Huishuai Zhang and Nanning Zheng and Wei Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yDvb3mlogA}\n}", "github": "", "project": "", "reviewers": "b4JJ;oCro;tjB4;ncBJ", "pdf_size": 559865, "rating": "3;5;6;7", "confidence": "4;3;4;3", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;4", "wc_summary": "16;79;75;102", "wc_strengths": "150;62;80;67", "wc_weaknesses": "41;115;311;1", "wc_questions": "364;9;73;22", "wc_limitations": "7;1;12;1", "wc_review": "578;266;551;193", "wc_reply_reviewers": "1032;0;0;0", "wc_reply_authors": "3886;0;406;0", "reply_reviewers": "4;0;0;0", "reply_authors": "8;1;2;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 68.0, 31.741140496207755 ], "wc_strengths_avg": [ 89.75, 35.40038841594821 ], "wc_weaknesses_avg": [ 117.0, 119.23925528113634 ], "wc_questions_avg": [ 117.0, 144.59771782431423 ], "wc_limitations_avg": [ 5.25, 4.602988159880492 ], "wc_review_avg": [ 397.0, 169.74539758120102 ], "wc_reply_reviewers_avg": [ 258.0, 446.86910835277035 ], "wc_reply_authors_avg": [ 1073.0, 1632.5222816243581 ], "reply_reviewers_avg": [ 1.0, 1.7320508075688772 ], "reply_authors_avg": [ 3.0, 2.9154759474226504 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.50709255283711, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=967892666752930119&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "ustc.edu.cn;microsoft.com;xjtu.edu.cn;ict.ac.cn;microsoft.com", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Microsoft;Xi'an Jiao Tong University;Chinese Academy of Sciences", "aff_unique_dep": "Research;;", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/microsoft-research-asia;https://www.xjtu.edu.cn;https://www.cas.cn", "aff_unique_abbr": "MSRA;XJTU;CAS", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Asia;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Advancing Bayesian Optimization via Learning Correlated Latent Space", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69960", "id": "yE62KM4qsO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/98e967164ae2f6811b975d686dece3eb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yE62KM4qsO", "openreview": "https://openreview.net/forum?id=yE62KM4qsO", "poster": "/media/PosterPDFs/NeurIPS%202023/69960.png?t=1699613716.4385114", "slides": "https://nips.cc/virtual/2023/poster/69960", "video": "https://nips.cc/virtual/2023/poster/69960", "author_site": "Seunghun Lee, Jaewon Chu, Sihyeon Kim, Juyeon Ko, Hyunwoo Kim", "tldr": "", "abstract": "Bayesian optimization is a powerful method for optimizing black-box functions with limited function evaluations. Recent works have shown that optimization in a latent space through deep generative models such as variational autoencoders leads to effective and efficient Bayesian optimization for structured or discrete data. However, as the optimization does not take place in the input space, it leads to an inherent gap that results in potentially suboptimal solutions. To alleviate the discrepancy, we propose Correlated latent space Bayesian Optimization (CoBO), which focuses on learning correlated latent spaces characterized by a strong correlation between the distances in the latent space and the distances within the objective function. Specifically, our method introduces Lipschitz regularization, loss weighting, and trust region recoordination to minimize the inherent gap around the promising areas. We demonstrate the effectiveness of our approach on several optimization tasks in discrete data, such as molecule design and arithmetic expression fitting, and achieve high performance within a small budget.", "keywords": "Bayesian optimization;smoothness regularization;variational autoencoder", "primary_area": "", "supplementary_material": "/attachment/e2ee1861b3822b8a0056f98378f9c225976e6445.pdf", "author": "Seunghun Lee;Jaewon Chu;Sihyeon Kim;Juyeon Ko;Hyunwoo J. Kim", "authorids": "~Seunghun_Lee2;~Jaewon_Chu1;~Sihyeon_Kim1;~Juyeon_Ko1;~Hyunwoo_J._Kim3", "gender": "M;M;F;;M", "homepage": "https://github.com/llsshh319;https://github.com/allonsy07;;https://github.com/dewyeon;https://hyunwoojkim.com/publications", "dblp": "77/7676-1;355/0102;304/2362;317/5260;150/4259", "google_scholar": "LPuuGcAAAAAJ;X3RX138AAAAJ;;;https://scholar.google.co.kr/citations?user=LfBoJt8AAAAJ", "orcid": "0000-0001-9377-2832;;;;0000-0002-2181-9264", "linkedin": ";jaewon-chu-64b003262;sihyeon-kim-a91aaa212/;;", "or_profile": "~Seunghun_Lee2;~Jaewon_Chu1;~Sihyeon_Kim1;~Juyeon_Ko1;~Hyunwoo_Kim1", "aff": "Korea University;Korea University;Korea University;Korea University;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr", "position": "PhD student;PhD student;PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nlee2023advancing,\ntitle={Advancing Bayesian Optimization via Learning Correlated Latent Space},\nauthor={Seunghun Lee and Jaewon Chu and Sihyeon Kim and Juyeon Ko and Hyunwoo J. Kim},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yE62KM4qsO}\n}", "github": "", "project": "", "reviewers": "aw6W;ynAb;4zSv;Bvzj", "pdf_size": 1692272, "rating": "3;4;4;7", "confidence": "4;5;5;4", "soundness": "2;3;2;4", "novelty": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "111;125;100;77", "wc_strengths": "54;56;30;152", "wc_weaknesses": "651;338;217;486", "wc_questions": "7;7;102;215", "wc_limitations": "19;20;16;36", "wc_review": "842;546;465;966", "wc_reply_reviewers": "159;0;122;274", "wc_reply_authors": "499;0;290;303", "reply_reviewers": "1;0;2;2", "reply_authors": "2;1;3;3", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 103.25, 17.55526986406076 ], "wc_strengths_avg": [ 73.0, 46.74398357008097 ], "wc_weaknesses_avg": [ 423.0, 162.49153824122658 ], "wc_questions_avg": [ 82.75, 85.63987097141144 ], "wc_limitations_avg": [ 22.75, 7.790218225441442 ], "wc_review_avg": [ 704.75, 206.01623115667368 ], "wc_reply_reviewers_avg": [ 138.75, 97.76854044118691 ], "wc_reply_authors_avg": [ 273.0, 178.04072567814364 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.33333333333333337, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7177744240246428889&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Convergence of Adam Under Relaxed Assumptions", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69959", "id": "yEewbkBNzi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a3cc50126338b175e56bb3cad134db0b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yEewbkBNzi", "openreview": "https://openreview.net/forum?id=yEewbkBNzi", "poster": "/media/PosterPDFs/NeurIPS%202023/69959.png?t=1702268315.5095153", "slides": "https://nips.cc/virtual/2023/poster/69959", "video": "https://nips.cc/virtual/2023/poster/69959", "author_site": "Haochuan Li, Alexander Rakhlin, Ali Jadbabaie", "tldr": "", "abstract": "In this paper, we provide a rigorous proof of convergence of the Adaptive Moment Estimate (Adam) algorithm for a wide class of optimization objectives. Despite the popularity and efficiency of the Adam algorithm in training deep neural networks, its theoretical properties are not yet fully understood, and existing convergence proofs require unrealistically strong assumptions, such as globally bounded gradients, to show the convergence to stationary points. In this paper, we show that Adam provably converges to $\\epsilon$-stationary points with $\\mathcal{O}(\\epsilon^{-4})$ gradient complexity under far more realistic conditions. The key to our analysis is a new proof of boundedness of gradients along the optimization trajectory of Adam, under a generalized smoothness assumption according to which the local smoothness (i.e., Hessian norm when it exists) is bounded by a sub-quadratic function of the gradient norm. Moreover, we propose a variance-reduced version of Adam with an accelerated gradient complexity of $\\mathcal{O}(\\epsilon^{-3})$.", "keywords": "Non-convex optimization;Adam;Convergence;Variance reduction", "primary_area": "", "supplementary_material": "", "author": "Haochuan Li;Alexander Rakhlin;Ali Jadbabaie", "authorids": "~Haochuan_Li2;~Alexander_Rakhlin1;~Ali_Jadbabaie1", "gender": "M;M;M", "homepage": ";http://www.mit.edu/~rakhlin/;http://www.mit.edu/~jadbabai/www", "dblp": "https://dblp.org/pers/l/Li:Haochuan.html;59/407;83/3158", "google_scholar": "1yB0eLMAAAAJ;https://scholar.google.com.tw/citations?user=fds2VpgAAAAJ;ZBc_WwYAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Haochuan_Li2;~Alexander_Rakhlin1;~Ali_Jadbabaie1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2023convergence,\ntitle={Convergence of Adam Under Relaxed Assumptions},\nauthor={Haochuan Li and Alexander Rakhlin and Ali Jadbabaie},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yEewbkBNzi}\n}", "github": "", "project": "", "reviewers": "rhLM;Rvyw;LKzM;hwio;d5Nv", "pdf_size": 534247, "rating": "6;6;7;7;8", "confidence": "3;4;5;3;2", "soundness": "4;3;3;4;4", "novelty": "3;3;3;3;4", "presentation": "4;3;3;3;3", "wc_summary": "131;83;47;20;67", "wc_strengths": "91;29;78;173;58", "wc_weaknesses": "107;248;70;167;57", "wc_questions": "260;83;33;4;29", "wc_limitations": "7;1;1;1;7", "wc_review": "596;444;229;365;218", "wc_reply_reviewers": "190;17;0;98;18", "wc_reply_authors": "156;0;0;134;0", "reply_reviewers": "2;1;0;1;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 69.6, 37.22149916378974 ], "wc_strengths_avg": [ 85.8, 48.35452409030617 ], "wc_weaknesses_avg": [ 129.8, 70.35737345865037 ], "wc_questions_avg": [ 81.8, 92.71332158864764 ], "wc_limitations_avg": [ 3.4, 2.9393876913398134 ], "wc_review_avg": [ 370.4, 141.11073665742092 ], "wc_reply_reviewers_avg": [ 64.6, 71.34591789303717 ], "wc_reply_authors_avg": [ 58.0, 71.37506567422548 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.41931393468876726, "gs_citation": 81, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1871021466084575966&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "mit.edu;mit.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "PopSign ASL v1.0: An Isolated American Sign Language Dataset Collected via Smartphones", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73414", "id": "yEf8NSqTPu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/00dada608b8db212ea7d9d92b24c68de-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=yEf8NSqTPu", "openreview": "https://openreview.net/forum?id=yEf8NSqTPu", "poster": "/media/PosterPDFs/NeurIPS%202023/73414.png?t=1702252482.7126098", "slides": "https://nips.cc/virtual/2023/poster/73414", "video": "https://nips.cc/virtual/2023/poster/73414", "author_site": "Thad Starner, Sean Forbes, Matthew So, David Martin, Rohit Sridhar, Gururaj Deshpande, Sam Sepah, Sahir Shahryar, Khushi Bhardwaj, Tyler Kwok, Daksh Sehgal, Saad Hassan, Bill Neubauer, Sofia Vempala, Alec Tan, Jocelyn Heath, Unnathi Kumar, Priyanka Mosur, Tavenner Hall, Rajandeep Singh, Christopher Cui, Glenn Cameron, Sohier Dane, Garrett Tanzer", "tldr": "", "abstract": "PopSign is a smartphone-based bubble-shooter game that helps hearing parents\nof deaf infants learn sign language. To help parents practice their ability to sign,\nPopSign is integrating sign language recognition as part of its gameplay. For\ntraining the recognizer, we introduce the PopSign ASL v1.0 dataset that collects\nexamples of 250 isolated American Sign Language (ASL) signs using Pixel 4A\nsmartphone selfie cameras in a variety of environments. It is the largest publicly\navailable, isolated sign dataset by number of examples and is the first dataset to\nfocus on one-handed, smartphone signs. We collected over 210,000 examples\nat 1944x2592 resolution made by 47 consenting Deaf adult signers for whom\nAmerican Sign Language is their primary language. We manually reviewed 217,866\nof these examples, of which 175,022 (approximately 700 per sign) were the sign\nintended for the educational game. 39,304 examples were recognizable as a sign\nbut were not the desired variant or were a different sign. We provide a training set\nof 31 signers, a validation set of eight signers, and a test set of eight signers. A\nbaseline LSTM model for the 250-sign vocabulary achieves 82.1% accuracy (81.9%\nclass-weighted F1 score) on the validation set and 84.2% (83.9% class-weighted\nF1 score) on the test set. Gameplay suggests that accuracy will be sufficient for\ncreating educational games involving sign language recognition.", "keywords": "American Sign Language;gesture;dataset", "primary_area": "", "supplementary_material": "/attachment/d1cbd34fafbf93c8469b33ef56c46ff20032c55f.pdf", "author": "Thad Starner;Sean Forbes;Matthew So;David Martin;Rohit Sridhar;Gururaj Deshpande;Sam Sepah;Sahir Shahryar;Khushi Bhardwaj;Tyler Kwok;Daksh Sehgal;Saad Hassan;Bill Neubauer;Sofia Anandi Vempala;Alec Tan;Jocelyn Heath;Unnathi Utpal Kumar;Priyanka Vijayaraghavan Mosur;Tavenner M. Hall;Rajandeep Singh;Christopher Zhang Cui;Glenn Cameron;Sohier Dane;Garrett Tanzer", "authorids": "~Thad_Starner2;~Sean_Forbes1;~Matthew_So1;~David_Martin5;~Rohit_Sridhar1;~Gururaj_Deshpande1;~Sam_Sepah1;~Sahir_Shahryar1;~Khushi_Bhardwaj1;~Tyler_Kwok1;~Daksh_Sehgal1;~Saad_Hassan1;~Bill_Neubauer1;~Sofia_Anandi_Vempala1;~Alec_Tan1;~Jocelyn_Heath1;~Unnathi_Utpal_Kumar1;~Priyanka_Vijayaraghavan_Mosur1;~Tavenner_M._Hall1;~Rajandeep_Singh1;~Christopher_Zhang_Cui1;~Glenn_Cameron1;~Sohier_Dane1;~Garrett_Tanzer1", "gender": "M;M;;M;M;;M;M;F;M;M;M;;M;F;F;F;;M;M;M;;M;M", "homepage": "http://www.cc.gatech.edu/~thad;http://www.dpan.tv;;;;;https://www.SamSepah.com;https://sahirshahryar.com/;;;;https://saadhassan96.github.io/;;https://github.com/Alec-Tan;;;;;https://www.rajandeepsingh.com/;https://christopherzc.github.io/;https://glenncameronjr.com;;;", "dblp": "s/ThadStarner.html;;;;324/3358;;;;https://dblp.org/rec/journals/corr/abs-2305-10782;;;;;;;;;;;336/2458;;;238/9928.html;", "google_scholar": "qr8Vo9IAAAAJ;;LRkpHaYAAAAJ;;;;;;;HZwJJdgAAAAJ;;oy6dZIIAAAAJ;;;;;;https://scholar.google.com/citations?view_op=list_works;;uGTn8fUAAAAJ;;;https://scholar.google.com/citations?hl=en;", "orcid": ";0000-0002-0109-8885;;0000-0002-4479-5752;;;;;0000-0003-2129-3060;;;;0000-0002-5855-4503;;0009-0005-4043-0376;;;;0009-0001-0055-0331;0000-0002-0116-634X;;;;", "linkedin": ";seanforbes;matthew-so;;rohit-sridhar-b57432160/;gururaj-m-deshpande/;samsepah/;;khushibhardwaj/;tylerkwok/;daksh-sehgal/;saadhassan1/;;;jocelyn-heath;unnathi-kumar/;priyanka-mosur-ab661819a;;rajandeepsingh13/;christopher-cui-8b4053135/;glennjr/;sohierdane;;wcn4/", "or_profile": "~Thad_Starner2;~Sean_Forbes1;~Matthew_So1;~David_Martin5;~Rohit_Sridhar1;~Gururaj_Deshpande1;~Sam_Sepah1;~Sahir_Shahryar1;~Khushi_Bhardwaj1;~Tyler_Kwok1;~Daksh_Sehgal1;~Saad_Hassan1;~Sofia_Anandi_Vempala1;~Alec_Tan1;~Jocelyn_Heath1;~Unnathi_Utpal_Kumar1;~Priyanka_Vijayaraghavan_Mosur1;~Tavenner_M._Hall1;~Rajandeep_Singh1;~Christopher_Zhang_Cui1;~Glenn_Cameron1;~Sohier_Dane1;~Garrett_Tanzer1;~William_C_Neubauer1", "aff": "Research, Google;DPAN;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Google;;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Rochester Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;;Georgia Institute of Technology;Georgia Institute of Technology;;Google;Google;Georgia Institute of Technology", "aff_domain": "research.google.com;dpan.tv;gatech.edu;gatech.edu;gatech.edu;gatech.edu;google.com;;gatech.edu;gatech.edu;gatech.edu;rit.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu;;gatech.edu;gatech.edu;;google.com;google.com;gatech.edu", "position": "Researcher;Researcher;MS student;Undergrad student;PhD student;Undergrad student;Researcher;;Undergrad student;Undergrad student;Undergrad student;PhD student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;;MS student;MS student;;Researcher;Researcher;Undergrad student", "bibtex": "@inproceedings{\nstarner2023popsign,\ntitle={PopSign {ASL} v1.0: An Isolated American Sign Language Dataset Collected via Smartphones},\nauthor={Thad Starner and Sean Forbes and Matthew So and David Martin and Rohit Sridhar and Gururaj Deshpande and Sam Sepah and Sahir Shahryar and Khushi Bhardwaj and Tyler Kwok and Daksh Sehgal and Saad Hassan and Bill Neubauer and Sofia Anandi Vempala and Alec Tan and Jocelyn Heath and Unnathi Utpal Kumar and Priyanka Vijayaraghavan Mosur and Tavenner M. Hall and Rajandeep Singh and Christopher Zhang Cui and Glenn Cameron and Sohier Dane and Garrett Tanzer},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=yEf8NSqTPu}\n}", "github": "", "project": "", "reviewers": "HKTi;BJjx;cDoQ", "pdf_size": 2338795, "rating": "6;7;7", "confidence": "4;5;4", "wc_summary_and_contributions": "55;48;86", "wc_strengths": "53;75;67", "wc_improvement": "137;578;71", "wc_limitations": "6;110;12", "wc_correctness": "5;8;42", "wc_clarity": "5;84;9", "wc_relation_to_prior_work": "8;335;28", "wc_documentation": "6;66;11", "wc_additional_feedback": "1;1;1", "wc_review": "276;1305;327", "wc_reply_reviewers": "26;21;0", "wc_reply_authors": "497;1065;212", "reply_reviewers": "1;1;0", "reply_authors": "1;3;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 63.0, 16.51262143533445 ], "wc_strengths_avg": [ 65.0, 9.092121131323903 ], "wc_improvement_avg": [ 262.0, 225.06443521800597 ], "wc_limitations_avg": [ 42.666666666666664, 47.67482447674967 ], "wc_correctness_avg": [ 18.333333333333332, 16.77961726487096 ], "wc_clarity_avg": [ 32.666666666666664, 36.33486235314814 ], "wc_relation_to_prior_work_avg": [ 123.66666666666667, 149.65812893242904 ], "wc_documentation_avg": [ 27.666666666666668, 27.182510717166817 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 636.0, 473.51240744039643 ], "wc_reply_reviewers_avg": [ 15.666666666666666, 11.2644968324772 ], "wc_reply_authors_avg": [ 591.3333333333334, 354.56671523925587 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 24, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18329938962527266815&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "research.google.com;dpan.tv;gatech.edu;gatech.edu;gatech.edu;gatech.edu;google.com;;gatech.edu;gatech.edu;gatech.edu;rit.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu;;gatech.edu;gatech.edu;;google.com;google.com;gatech.edu", "author_num": 24, "aff_unique_index": "0;1;2;2;2;2;0;2;2;2;3;2;2;2;2;2;2;2;0;0;2", "aff_unique_norm": "Google;DPAN;Georgia Institute of Technology;Rochester Institute of Technology", "aff_unique_dep": "Google Research;;;", "aff_unique_url": "https://research.google;;https://www.gatech.edu;https://www.rit.edu", "aff_unique_abbr": "Google;;Georgia Tech;RIT", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Encoding Time-Series Explanations through Self-Supervised Model Behavior Consistency", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69958", "id": "yEfmhgwslQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/65ea878cb90b440e8b4cd34fe0959914-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yEfmhgwslQ", "openreview": "https://openreview.net/forum?id=yEfmhgwslQ", "poster": "/media/PosterPDFs/NeurIPS%202023/69958.png?t=1702313478.88111", "slides": "https://nips.cc/virtual/2023/poster/69958", "video": "https://nips.cc/virtual/2023/poster/69958", "author_site": "Owen Queen, Tom Hartvigsen, Teddy Koker, Huan He, Theodoros Tsiligkaridis, Marinka Zitnik", "tldr": "", "abstract": "Interpreting time series models is uniquely challenging because it requires identifying both the location of time series signals that drive model predictions and their matching to an interpretable temporal pattern. While explainers from other modalities can be applied to time series, their inductive biases do not transfer well to the inherently challenging interpretation of time series. We present TimeX, a time series consistency model for training explainers. TimeX trains an interpretable surrogate to mimic the behavior of a pretrained time series model. It addresses the issue of model faithfulness by introducing model behavior consistency, a novel formulation that preserves relations in the latent space induced by the pretrained model with relations in the latent space induced by TimeX. TimeX provides discrete attribution maps and, unlike existing interpretability methods, it learns a latent space of explanations that can be used in various ways, such as to provide landmarks to visually aggregate similar explanations and easily recognize temporal patterns. We evaluate TimeX on eight synthetic and real-world datasets and compare its performance against state-of-the-art interpretability methods. We also conduct case studies using physiological time series. Quantitative evaluations demonstrate that TimeX achieves the highest or second-highest performance in every metric compared to baselines across all datasets. Through case studies, we show that the novel components of TimeX show potential for training faithful, interpretable models that capture the behavior of pretrained time series models.", "keywords": "Explainability;Interpretability;Time Series;Explanations;Temporal patterns;Model Understanding;Latent space;Self-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Owen Queen;Thomas Hartvigsen;Teddy Koker;Huan He;Theodoros Tsiligkaridis;Marinka Zitnik", "authorids": "~Owen_Queen1;~Thomas_Hartvigsen1;~Teddy_Koker1;~Huan_He2;~Theodoros_Tsiligkaridis1;~Marinka_Zitnik1", "gender": "M;M;M;M;M;", "homepage": "https://owencqueen.github.io/;https://www.tomhartvigsen.com;https://teddykoker.com;https://hehuannb.github.io/;https://sites.google.com/view/theo-t;https://zitniklab.hms.harvard.edu", "dblp": ";211/5752;283/5878;;64/10412;53/11277.html", "google_scholar": "https://scholar.google.com/citations?hl=en;rIjeeRsAAAAJ;br990A8AAAAJ;https://scholar.google.com/citations?hl=en;hVUVOTIAAAAJ;YtUDgPIAAAAJ", "orcid": ";;;;;", "linkedin": ";;teddykoker/;huanheemory/;;", "or_profile": "~Owen_Queen1;~Thomas_Hartvigsen1;~Teddy_Koker1;~Huan_He2;~Theodoros_Tsiligkaridis1;~Marinka_Zitnik1", "aff": "Harvard Medical School, Harvard University;Massachusetts Institute of Technology;MIT Lincoln Laboratory, Massachusetts Institute of Technology;Harvard University;MIT Lincoln Laboratory, Massachusetts Institute of Technology;Harvard University", "aff_domain": "hms.harvard.edu;mit.edu;ll.mit.edu;hms.harvard.edu;ll.mit.edu;harvard.edu", "position": "Researcher;Postdoc;Researcher;Postdoc;Senior AI Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nqueen2023encoding,\ntitle={Encoding Time-Series Explanations through Self-Supervised Model Behavior Consistency},\nauthor={Owen Queen and Thomas Hartvigsen and Teddy Koker and Huan He and Theodoros Tsiligkaridis and Marinka Zitnik},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yEfmhgwslQ}\n}", "github": "", "project": "", "reviewers": "wJLT;Mf92;9pJd;7JHq;Uw18", "pdf_size": 3649795, "rating": "5;6;7;7;7", "confidence": "2;2;2;4;4", "soundness": "2;3;4;4;4", "novelty": "3;3;3;4;3", "presentation": "2;3;3;4;4", "wc_summary": "172;40;71;50;155", "wc_strengths": "115;42;51;32;241", "wc_weaknesses": "451;45;11;18;230", "wc_questions": "131;2;47;52;353", "wc_limitations": "8;37;18;13;72", "wc_review": "877;166;198;165;1051", "wc_reply_reviewers": "130;0;16;17;179", "wc_reply_authors": "494;106;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;2;1;1;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 3.4, 0.8 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 97.6, 54.993090475076954 ], "wc_strengths_avg": [ 96.2, 78.00358966098933 ], "wc_weaknesses_avg": [ 151.0, 170.15639864548146 ], "wc_questions_avg": [ 117.0, 125.0935649823763 ], "wc_limitations_avg": [ 29.6, 23.363218956299665 ], "wc_review_avg": [ 491.4, 389.9603056722569 ], "wc_reply_reviewers_avg": [ 68.4, 72.24015503859331 ], "wc_reply_authors_avg": [ 120.0, 191.4533885832267 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6123724356957947, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15243881318992896149&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 8, "email": "hms.harvard.edu;mit.edu;ll.mit.edu;hms.harvard.edu;ll.mit.edu;harvard.edu", "author_num": 6, "aff_unique_index": "0;1;1;0;1;0", "aff_unique_norm": "Harvard University;Massachusetts Institute of Technology", "aff_unique_dep": "Harvard Medical School;", "aff_unique_url": "https://www.harvard.edu;https://web.mit.edu", "aff_unique_abbr": "Harvard;MIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Augmented Memory Replay-based Continual Learning Approaches for Network Intrusion Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69957", "id": "yGLokEhdh9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3755a02b1035fbadd5f93a022170e46f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yGLokEhdh9", "openreview": "https://openreview.net/forum?id=yGLokEhdh9", "poster": "/media/PosterPDFs/NeurIPS%202023/69957.png?t=1699255220.414565", "slides": "https://nips.cc/virtual/2023/poster/69957", "video": "https://nips.cc/virtual/2023/poster/69957", "author_site": "suresh kumar amalapuram, Sumohana Channappayya, Bheemarjuna Reddy Tamma", "tldr": "", "abstract": "Intrusion detection is a form of anomalous activity detection in communication network traffic. Continual learning (CL) approaches to the intrusion detection task accumulate old knowledge while adapting to the latest threat knowledge. Previous works have shown the effectiveness of memory replay-based CL approaches for this task. In this work, we present two novel contributions to improve the performance of CL-based network intrusion detection in the context of class imbalance and scalability. First, we extend class balancing reservoir sampling (CBRS), a memory-based CL method, to address the problems of severe class imbalance for large datasets. Second, we propose a novel approach titled perturbation assistance for parameter approximation (PAPA) based on the Gaussian mixture model to reduce the number of \\textit{virtual stochastic gradient descent (SGD) parameter} computations needed to discover maximally interfering samples for CL. We demonstrate that the proposed approaches perform remarkably better than the baselines on standard intrusion detection benchmarks created over shorter periods (KDDCUP'99, NSL-KDD, CICIDS-2017/2018, UNSW-NB15, and CTU-13) and a longer period with distribution shift (AnoShift). We also validated proposed approaches on standard continual learning benchmarks (SVHN, CIFAR-10/100, and CLEAR-10/100) and anomaly detection benchmarks (SMAP, SMD, and MSL). Further, the proposed PAPA approach significantly lowers the number of virtual SGD update operations, thus resulting in training time savings in the range of 12 to 40\\% compared to the maximally interfered samples retrieval algorithm.", "keywords": "Continual learning;Class imbalance;scalability;Network intrusion detection;and Cybersecurity", "primary_area": "", "supplementary_material": "/attachment/5e76557c7ac1377c08a7106a4a4ce2c265070261.pdf", "author": "Suresh kumar Amalapuram;Sumohana S. Channappayya;Bheemarjuna Tamma", "authorids": "~Suresh_kumar_Amalapuram1;~Sumohana_S._Channappayya1;~Bheemarjuna_Tamma1", "gender": "M;M;M", "homepage": "https://www.linkedin.com/in/suresh-kumar-amalapuram-87a42969/;https://people.iith.ac.in/tbr/;https://www.iith.ac.in/~sumohana", "dblp": "305/7572;;49/904", "google_scholar": "https://scholar.google.co.in/citations?user=DSHKk8YAAAAJ;https://scholar.google.com.tw/citations?user=FYHCD2kAAAAJ;_VCOXFwAAAAJ", "orcid": ";0000-0002-4056-7963;", "linkedin": "suresh-kumar-amalapuram-87a42969/;;", "or_profile": "~Suresh_kumar_Amalapuram1;~Bheemarjuna_Tamma1;~Sumohana_S_Channappayya1", "aff": "Indian Institute of Technology Hyderabad;Indian Institute of Technology, Hyderabad, Dhirubhai Ambani Institute Of Information and Communication Technology;Indian Institute of Technology, Hyderabad", "aff_domain": "iith.ac.in;iith.ac.in;iith.ac.in", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\namalapuram2023augmented,\ntitle={Augmented Memory Replay-based Continual Learning Approaches for Network Intrusion Detection},\nauthor={Suresh kumar Amalapuram and Sumohana S. Channappayya and Bheemarjuna Tamma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yGLokEhdh9}\n}", "github": "", "project": "", "reviewers": "KabC;ZBnA;wTtL", "pdf_size": 1504002, "rating": "4;6;6", "confidence": "2;5;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "233;117;90", "wc_strengths": "24;115;52", "wc_weaknesses": "47;2806;156", "wc_questions": "221;232;101", "wc_limitations": "11;104;4", "wc_review": "536;3374;403", "wc_reply_reviewers": "271;126;26", "wc_reply_authors": "1477;34;44", "reply_reviewers": "2;1;1", "reply_authors": "4;2;2", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 146.66666666666666, 62.03404083422441 ], "wc_strengths_avg": [ 63.666666666666664, 38.055515004033545 ], "wc_weaknesses_avg": [ 1003.0, 1275.6898787192233 ], "wc_questions_avg": [ 184.66666666666666, 59.33146064460424 ], "wc_limitations_avg": [ 39.666666666666664, 45.58021013066478 ], "wc_review_avg": [ 1437.6666666666667, 1370.2706139867246 ], "wc_reply_reviewers_avg": [ 141.0, 100.58164179743073 ], "wc_reply_authors_avg": [ 518.3333333333334, 677.8919940193686 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9449111825230678, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7818767501557200957&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "iith.ac.in;iith.ac.in;iith.ac.in", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Indian Institute of Technology Hyderabad;Indian Institute of Technology, Hyderabad", "aff_unique_dep": ";", "aff_unique_url": "https://www.iith.ac.in;https://www.iith.ac.in", "aff_unique_abbr": "IIT Hyderabad;IIT Hyderabad", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hyderabad", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "title": "What Can We Learn from Unlearnable Datasets?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69956", "id": "yGs9vTRjaE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ee5bb72130c332c3d4bf8d231e617506-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yGs9vTRjaE", "openreview": "https://openreview.net/forum?id=yGs9vTRjaE", "poster": "/media/PosterPDFs/NeurIPS%202023/69956.png?t=1701374991.9315956", "slides": "https://nips.cc/virtual/2023/poster/69956", "video": "https://nips.cc/virtual/2023/poster/69956", "author_site": "Pedro Sandoval-Segura, Vasu Singla, Jonas Geiping, Micah Goldblum, Tom Goldstein", "tldr": "", "abstract": "In an era of widespread web scraping, unlearnable dataset methods have the potential to protect data privacy by preventing deep neural networks from generalizing. But in addition to a number of practical limitations that make their use unlikely, we make a number of findings that call into question their ability to safeguard data. First, it is widely believed that neural networks trained on unlearnable datasets only learn shortcuts, simpler rules that are not useful for generalization. In contrast, we find that networks actually can learn useful features that can be reweighed for high test performance, suggesting that image protection is not assured. Unlearnable datasets are also believed to induce learning shortcuts through linear separability of added perturbations. We provide a counterexample, demonstrating that linear separability of perturbations is not a necessary condition. To emphasize why linearly separable perturbations should not be relied upon, we propose an orthogonal projection attack which allows learning from unlearnable datasets published in ICML 2021 and ICLR 2023. Our proposed attack is significantly less complex than recently proposed techniques.", "keywords": "data poisoning;poisons;unlearnable dataset;data protection;imperceptible perturbations;adversarial machine learning", "primary_area": "", "supplementary_material": "/attachment/ea2830f516476efb52ebe51994c87dcc72244a0d.pdf", "author": "Pedro Sandoval-Segura;Vasu Singla;Jonas Geiping;Micah Goldblum;Tom Goldstein", "authorids": "~Pedro_Sandoval-Segura1;~Vasu_Singla1;~Jonas_Geiping1;~Micah_Goldblum1;~Tom_Goldstein1", "gender": "M;M;;M;M", "homepage": "https://www.cs.umd.edu/people/vsingla;https://jonasgeiping.github.io/;;https://www.cs.umd.edu/~tomg/;http://cs.umd.edu/~psando", "dblp": "270/9234;190/7229;241/7231;25/8184;242/4604", "google_scholar": "geHpT2IAAAAJ;https://scholar.google.de/citations?user=206vNCEAAAAJ;pGDKzuUAAAAJ;KmSuVtgAAAAJ;x-0RKroAAAAJ", "orcid": ";;;;0000-0003-1932-8092", "linkedin": ";;;;", "or_profile": "~Vasu_Singla1;~Jonas_Geiping1;~Micah_Goldblum1;~Tom_Goldstein1;~Pedro_Sandoval_Segura2", "aff": "Cruise LLC;University of Maryland, College Park;New York University;University of Maryland, College Park;Apple", "aff_domain": "getcruise.com;umd.edu;nyu.edu;umd.edu;apple.com", "position": "Intern;Postdoc;Postdoc;Full Professor;Intern", "bibtex": "@inproceedings{\nsandoval-segura2023what,\ntitle={What Can We Learn from Unlearnable Datasets?},\nauthor={Pedro Sandoval-Segura and Vasu Singla and Jonas Geiping and Micah Goldblum and Tom Goldstein},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yGs9vTRjaE}\n}", "github": "", "project": "", "reviewers": "vqjk;kEiu;zKV8;TH5w", "pdf_size": 14942698, "rating": "4;4;7;7", "confidence": "4;4;5;5", "soundness": "3;3;4;4", "novelty": "2;3;4;3", "presentation": "2;4;4;4", "wc_summary": "37;105;89;82", "wc_strengths": "51;130;189;63", "wc_weaknesses": "98;213;165;168", "wc_questions": "230;96;4;1", "wc_limitations": "52;85;11;1", "wc_review": "468;629;458;315", "wc_reply_reviewers": "171;139;15;18", "wc_reply_authors": "654;672;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 78.25, 25.232667318379164 ], "wc_strengths_avg": [ 108.25, 55.494932201057786 ], "wc_weaknesses_avg": [ 161.0, 41.04266073246227 ], "wc_questions_avg": [ 82.75, 93.19703589707132 ], "wc_limitations_avg": [ 37.25, 33.54381463101655 ], "wc_review_avg": [ 467.5, 111.16316836074797 ], "wc_reply_reviewers_avg": [ 85.75, 70.17611773245937 ], "wc_reply_authors_avg": [ 331.5, 331.56108034568837 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3576525936543069992&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "getcruise.com;umd.edu;nyu.edu;umd.edu;apple.com", "author_num": 5, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Cruise LLC;University of Maryland;New York University;Apple", "aff_unique_dep": ";;;Apple Inc.", "aff_unique_url": "https://www.cruisellc.com;https://www/umd.edu;https://www.nyu.edu;https://www.apple.com", "aff_unique_abbr": "Cruise;UMD;NYU;Apple", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69955", "id": "yHdTscY6Ci", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/77c33e6a367922d003ff102ffb92b658-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yHdTscY6Ci", "openreview": "https://openreview.net/forum?id=yHdTscY6Ci", "poster": "/media/PosterPDFs/NeurIPS%202023/69955.png?t=1699358839.486632", "slides": "https://nips.cc/virtual/2023/poster/69955", "video": "https://nips.cc/virtual/2023/poster/69955", "author_site": "Yongliang Shen, Kaitao Song, Xu Tan, Dongsheng Li, Weiming Lu, Yueting Zhuang", "tldr": "", "abstract": "Solving complicated AI tasks with different domains and modalities is a key step toward artificial general intelligence. While there are numerous AI models available for various domains and modalities, they cannot handle complicated AI tasks autonomously. Considering large language models (LLMs) have exhibited exceptional abilities in language understanding, generation, interaction, and reasoning, we advocate that LLMs could act as a controller to manage existing AI models to solve complicated AI tasks, with language serving as a generic interface to empower this. Based on this philosophy, we present HuggingGPT, an LLM-powered agent that leverages LLMs (e.g., ChatGPT) to connect various AI models in machine learning communities (e.g., Hugging Face) to solve AI tasks. Specifically, we use ChatGPT to conduct task planning when receiving a user request, select models according to their function descriptions available in Hugging Face, execute each subtask with the selected AI model, and summarize the response according to the execution results. By leveraging the strong language capability of ChatGPT and abundant AI models in Hugging Face, HuggingGPT can tackle a wide range of sophisticated AI tasks spanning different modalities and domains and achieve impressive results in language, vision, speech, and other challenging tasks, which paves a new way towards the realization of artificial general intelligence.", "keywords": "LLM;ChatGPT;Hugging Face;Autonomous LLM", "primary_area": "", "supplementary_material": "/attachment/706c73401488f9a53ececa19c74edaaacbaf0b07.zip", "author": "Yongliang Shen;Kaitao Song;Xu Tan;Dongsheng Li;Weiming Lu;Yueting Zhuang", "authorids": "~Yongliang_Shen1;~Kaitao_Song1;~Xu_Tan1;~Dongsheng_Li2;~Weiming_Lu1;~Yueting_Zhuang1", "gender": "M;M;M;M;;M", "homepage": ";;https://tan-xu.github.io/;http://recmind.cn;;https://person.zju.edu.cn/yzhuang", "dblp": "221/5612-1.html;222/2082;96/10484-3;254/0830-2.html;;", "google_scholar": "UT3NzFAAAAAJ;https://scholar.google.com.hk/citations?user=LLk9dR8AAAAJ;tob-U1oAAAAJ;VNg5rA8AAAAJ;;1RD7UJAAAAAJ", "orcid": ";;0000-0001-5631-0639;0000-0003-3103-8442;;", "linkedin": ";;;;;", "or_profile": "~Yongliang_Shen1;~Kaitao_Song1;~Xu_Tan1;~Dongsheng_Li2;~Weiming_Lu1;~Yueting_Zhuang1", "aff": ";Microsoft;Microsoft;Microsoft Research Asia;;Zhejiang University", "aff_domain": ";microsoft.com;microsoft.com;microsoft.com;;zju.edu.cn", "position": ";Researcher;Principal Researcher;Principal Researcher;;Full Professor", "bibtex": "@inproceedings{\nshen2023hugginggpt,\ntitle={Hugging{GPT}: Solving {AI} Tasks with Chat{GPT} and its Friends in Hugging Face},\nauthor={Yongliang Shen and Kaitao Song and Xu Tan and Dongsheng Li and Weiming Lu and Yueting Zhuang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yHdTscY6Ci}\n}", "github": "", "project": "", "reviewers": "hS4Z;FvPX;FNMr;WQcy;sGd8", "pdf_size": 3178871, "rating": "3;6;6;7;9", "confidence": "4;5;4;4;4", "soundness": "2;3;3;3;4", "novelty": "2;2;4;4;4", "presentation": "3;3;4;4;4", "wc_summary": "86;84;71;49;50", "wc_strengths": "58;18;66;26;66", "wc_weaknesses": "283;46;253;49;14", "wc_questions": "5;30;9;126;2", "wc_limitations": "78;1;7;4;13", "wc_review": "510;179;406;254;145", "wc_reply_reviewers": "404;0;58;0;0", "wc_reply_authors": "1732;0;715;0;0", "reply_reviewers": "3;0;1;0;0", "reply_authors": "5;1;2;1;1", "rating_avg": [ 6.2, 1.9390719429665317 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.2, 0.9797958971132712 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 68.0, 15.962455951387932 ], "wc_strengths_avg": [ 46.8, 20.614557962760202 ], "wc_weaknesses_avg": [ 129.0, 114.54780661365804 ], "wc_questions_avg": [ 34.4, 46.83844574705697 ], "wc_limitations_avg": [ 20.6, 28.973090963858173 ], "wc_review_avg": [ 298.8, 138.64400455843736 ], "wc_reply_reviewers_avg": [ 92.4, 157.41105424969365 ], "wc_reply_authors_avg": [ 489.4, 680.2186707228786 ], "reply_reviewers_avg": [ 0.8, 1.1661903789690602 ], "reply_authors_avg": [ 2.0, 1.5491933384829668 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.05157106231293971, "gs_citation": 1167, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14990757005844289549&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": ";microsoft.com;microsoft.com;microsoft.com;;zju.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Microsoft;Zhejiang University", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.zju.edu.cn", "aff_unique_abbr": "Microsoft;ZJU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United States;China" }, { "title": "Towards a Unified Analysis of Kernel-based Methods Under Covariate Shift", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69954", "id": "yIcCkMUCtL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e9b0ae84d6879b30c78cb8537466a4e0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yIcCkMUCtL", "openreview": "https://openreview.net/forum?id=yIcCkMUCtL", "poster": "/media/PosterPDFs/NeurIPS%202023/69954.png?t=1698139381.5226266", "slides": "https://nips.cc/virtual/2023/poster/69954", "video": "https://nips.cc/virtual/2023/poster/69954", "author_site": "Xingdong Feng, Xin HE, Caixing Wang, Chao Wang, Jingnan Zhang", "tldr": "", "abstract": "Covariate shift occurs prevalently in practice, where the input distributions of the source and target data are substantially different. Despite its practical importance in various learning problems, most of the existing methods only focus on some specific learning tasks and are not well validated theoretically and numerically. To tackle this problem, we propose a unified analysis of general nonparametric methods in a reproducing kernel Hilbert space (RKHS) under covariate shift. Our theoretical results are established for a general loss belonging to a rich loss function family, which includes many commonly used methods as special cases, such as mean regression, quantile regression, likelihood-based classification, and margin-based classification. Two types of covariate shift problems are the focus of this paper and the sharp convergence rates are established for a general loss function to provide a unified theoretical analysis, which concurs with the optimal results in literature where the squared loss is used. Extensive numerical studies on synthetic and real examples confirm our theoretical findings and further illustrate the effectiveness of our proposed method.", "keywords": "kernel methods;covariate shift;reproducing kernel Hilbert space (RKHS)", "primary_area": "", "supplementary_material": "/attachment/04baf85f05547104bb9688110ecc5decf5dfb3c5.pdf", "author": "Xingdong Feng;Xin HE;Caixing Wang;Chao Wang;Jingnan Zhang", "authorids": "~Xingdong_Feng1;~Xin_HE7;~Caixing_Wang1;~Chao_Wang39;~Jingnan_Zhang1", "gender": "M;M;M;M;", "homepage": "https://bb9.sufe.edu.cn/bbcswebdav/users/2011000070/index.htm;;http://wangcaixing96.com/;https://github.com/wangchao-afk;https://sites.google.com/view/guoqinghe", "dblp": ";;;;", "google_scholar": "nQyBQOsAAAAJ;;SLEH6XYAAAAJ;;aduqO4EAAAAJ", "orcid": ";0000-0002-6315-589X;0009-0009-3068-6094;;", "linkedin": ";;;;", "or_profile": "~Xingdong_Feng1;~Jingnan_Zhang1;~Wang_Caixing1;~Wang_Chao2;~Xin_HE6", "aff": "Shanghai University of Finance and Economics;University of Science and Technology of China;Shanghai University of Finance and Economics;Shanghai University of Finance and Economics;Shanghai University of Finance and Economics", "aff_domain": "sufe.edu.cn;ustc.edu.cn;shufe.edu.cn;sufe.edu;shufe.edu", "position": "Full Professor;Assistant Professor;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nfeng2023towards,\ntitle={Towards a Unified Analysis of Kernel-based Methods Under Covariate Shift},\nauthor={Xingdong Feng and Xin HE and Caixing Wang and Chao Wang and Jingnan Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yIcCkMUCtL}\n}", "github": "", "project": "", "reviewers": "bSPk;u3FW;qhGh;ZoXA", "pdf_size": 1863419, "rating": "5;6;7;7", "confidence": "4;1;3;4", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "94;56;48;264", "wc_strengths": "51;36;85;70", "wc_weaknesses": "133;29;320;38", "wc_questions": "10;47;63;100", "wc_limitations": "1;11;4;2", "wc_review": "289;179;520;474", "wc_reply_reviewers": "18;29;38;0", "wc_reply_authors": "21;19;21;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 115.5, 87.47999771376311 ], "wc_strengths_avg": [ 60.5, 18.580904176062045 ], "wc_weaknesses_avg": [ 130.0, 117.01922918905251 ], "wc_questions_avg": [ 55.0, 32.31872522238462 ], "wc_limitations_avg": [ 4.5, 3.905124837953327 ], "wc_review_avg": [ 365.5, 138.09145520270252 ], "wc_reply_reviewers_avg": [ 21.25, 14.16642156650719 ], "wc_reply_authors_avg": [ 15.25, 8.842369591913696 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9961865622081485756&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "sufe.edu.cn;ustc.edu.cn;shufe.edu.cn;sufe.edu;shufe.edu", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Shanghai University of Finance and Economics;University of Science and Technology of China", "aff_unique_dep": ";", "aff_unique_url": "http://www.sufe.edu.cn;http://www.ustc.edu.cn", "aff_unique_abbr": "SUFE;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Mechanism Design for Collaborative Normal Mean Estimation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69953", "id": "yKCLfOOIL7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/9af2b1d6acf561af9c4cf70d52c7a49d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yKCLfOOIL7", "openreview": "https://openreview.net/forum?id=yKCLfOOIL7", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69953", "video": "https://nips.cc/virtual/2023/poster/69953", "author_site": "Yiding Chen, Jerry Zhu, Kirthevasan Kandasamy", "tldr": "", "abstract": "We study collaborative normal mean estimation, where $m$ strategic agents collect i.i.d samples from a normal distribution $\\mathcal{N}(\\mu, \\sigma^2)$ at a cost. They all wish to estimate the mean $\\mu$. By sharing data with each other, agents can obtain better estimates while keeping the cost of data collection small. To facilitate this collaboration, we wish to design mechanisms that encourage agents to collect a sufficient amount of data and share it truthfully, so that they are all better off than working alone. In naive mechanisms, such as simply pooling and sharing all the data, an individual agent might find it beneficial to under-collect and/or fabricate data, which can lead to poor social outcomes. We design a novel mechanism that overcomes these challenges via two key techniques: first, when sharing the others' data with an agent, the mechanism corrupts this dataset proportional to how much the data reported by the agent differs from the others; second, we design minimax optimal estimators for the corrupted dataset. Our mechanism, which is Nash incentive compatible and individually rational, achieves a social penalty (sum of all agents' estimation errors and data collection costs) that is at most a factor 2 of the global minimum. When applied to high dimensional (non-Gaussian) distributions with bounded variance, this mechanism retains these three properties, but with slightly weaker results. Finally, in two special cases where we restrict the strategy space of the agents, we design mechanisms that essentially achieve the global minimum.", "keywords": "Mechanism design;statistical minimax estimation;federated learning", "primary_area": "", "supplementary_material": "/attachment/815b1e65164aa244d8bd23699e40b991f828d0b6.zip", "author": "Yiding Chen;Jerry Zhu;Kirthevasan Kandasamy", "authorids": "~Yiding_Chen1;~Jerry_Zhu1;~Kirthevasan_Kandasamy1", "gender": "M;M;M", "homepage": "https://chenyd.github.io;https://people.eecs.berkeley.edu/~kandasamy/research.html;http://pages.cs.wisc.edu/~jerryzhu/", "dblp": ";128/3628;z/XiaojinZhu", "google_scholar": "AtMBDPUAAAAJ;kohOJPcAAAAJ;https://scholar.google.com.tw/citations?user=hqTu-QcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yiding_Chen1;~Kirthevasan_Kandasamy1;~Xiaojin_Zhu1", "aff": "University of Wisconsin, Madison;Department of Computer Science, University of Wisconsin - Madison;University of Wisconsin, Madison", "aff_domain": "wisc.edu;cs.wisc.edu;wisc.edu", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nchen2023mechanism,\ntitle={Mechanism Design for Collaborative Normal Mean Estimation},\nauthor={Yiding Chen and Jerry Zhu and Kirthevasan Kandasamy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yKCLfOOIL7}\n}", "github": "", "project": "", "reviewers": "8RFX;T2ys;ipny;jdRd;Sj1t;LHqC", "pdf_size": 705330, "rating": "6;6;6;7;7;8", "confidence": "3;4;4;4;2;4", "soundness": "3;3;4;4;3;4", "novelty": "3;3;3;3;3;4", "presentation": "3;3;4;4;3;4", "wc_summary": "131;175;81;126;53;496", "wc_strengths": "52;31;11;153;47;71", "wc_weaknesses": "138;28;199;41;185;78", "wc_questions": "88;152;16;16;4;1", "wc_limitations": "59;0;1;3;1;1", "wc_review": "468;386;308;339;290;647", "wc_reply_reviewers": "82;11;432;10;33;0", "wc_reply_authors": "117;0;1729;0;0;0", "reply_reviewers": "1;1;3;1;1;0", "reply_authors": "2;1;4;1;1;1", "rating_avg": [ 6.666666666666667, 0.7453559924999299 ], "confidence_avg": [ 3.5, 0.7637626158259734 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.1666666666666665, 0.3726779962499649 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 177.0, 147.80279654548715 ], "wc_strengths_avg": [ 60.833333333333336, 45.167896662218936 ], "wc_weaknesses_avg": [ 111.5, 66.8649135695745 ], "wc_questions_avg": [ 46.166666666666664, 55.66092784789784 ], "wc_limitations_avg": [ 10.833333333333334, 21.559349608825297 ], "wc_review_avg": [ 406.3333333333333, 122.38827104297572 ], "wc_reply_reviewers_avg": [ 94.66666666666667, 153.23583856990578 ], "wc_reply_authors_avg": [ 307.6666666666667, 637.073691882571 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.8975274678557507 ], "reply_authors_avg": [ 1.6666666666666667, 1.1055415967851332 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3461836837560253801&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "wisc.edu;cs.wisc.edu;wisc.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Wisconsin;University of Wisconsin-Madison", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "UW;UW-Madison", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Generalized Information-theoretic Multi-view Clustering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69952", "id": "yN6NHZOXkg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b7aa34d2d24f9bab3056993b7bfa0f1b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yN6NHZOXkg", "openreview": "https://openreview.net/forum?id=yN6NHZOXkg", "poster": "/media/PosterPDFs/NeurIPS%202023/69952.png?t=1699584362.080452", "slides": "https://nips.cc/virtual/2023/poster/69952", "video": "https://nips.cc/virtual/2023/poster/69952", "author_site": "Weitian Huang, Sirui Yang, Hongmin Cai", "tldr": "", "abstract": "In an era of more diverse data modalities, multi-view clustering has become a fundamental tool for comprehensive data analysis and exploration. However, existing multi-view unsupervised learning methods often rely on strict assumptions on semantic consistency among samples. In this paper, we reformulate the multi-view clustering problem from an information-theoretic perspective and propose a general theoretical model. In particular, we define three desiderata under multi-view unsupervised learning in terms of mutual information, namely, comprehensiveness, concentration, and cross-diversity. The multi-view variational lower bound is then obtained by approximating the samples' high-dimensional mutual information. The Kullback\u2013Leibler divergence is utilized to deduce sample assignments. Ultimately the information-based multi-view clustering model leverages deep neural networks and Stochastic Gradient Variational Bayes to achieve representation learning and clustering simultaneously. Extensive experiments on both synthetic and real datasets with wide types demonstrate that the proposed method exhibits a more stable and superior clustering performance than state-of-the-art algorithms.", "keywords": "information bottleneck;multi-view clustering;variational autoencoders", "primary_area": "", "supplementary_material": "", "author": "Weitian Huang;Sirui Yang;Hongmin Cai", "authorids": "~Weitian_Huang1;~Sirui_Yang1;~Hongmin_Cai1", "gender": "M;M;M", "homepage": ";https://github.com/PureRRR;http://www2.scut.edu.cn/bioinformatics/", "dblp": ";;50/3384", "google_scholar": "0iBuNX8AAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-2747-7234", "linkedin": ";;", "or_profile": "~Weitian_Huang1;~Sirui_Yang1;~Hongmin_Cai1", "aff": ";South China University of Technology;South China University of Technology", "aff_domain": ";scut.edu.cn;scut.edu.cn", "position": ";MS student;Full Professor", "bibtex": "@inproceedings{\nhuang2023generalized,\ntitle={Generalized Information-theoretic Multi-view Clustering},\nauthor={Weitian Huang and Sirui Yang and Hongmin Cai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yN6NHZOXkg}\n}", "github": "", "project": "", "reviewers": "i2fg;Ayjx;bG4n;YKQa", "pdf_size": 657165, "rating": "4;6;6;6", "confidence": "4;4;4;5", "soundness": "2;3;3;2", "novelty": "2;3;3;2", "presentation": "1;2;3;1", "wc_summary": "50;85;44;42", "wc_strengths": "30;47;49;14", "wc_weaknesses": "137;158;60;228", "wc_questions": "104;35;8;2", "wc_limitations": "9;1;31;9", "wc_review": "330;326;192;295", "wc_reply_reviewers": "84;48;0;52", "wc_reply_authors": "233;28;0;24", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 55.25, 17.426631917843448 ], "wc_strengths_avg": [ 35.0, 14.19506956657839 ], "wc_weaknesses_avg": [ 145.75, 59.88478521294036 ], "wc_questions_avg": [ 37.25, 40.49305496007926 ], "wc_limitations_avg": [ 12.5, 11.169153951844338 ], "wc_review_avg": [ 285.75, 55.7959451931769 ], "wc_reply_reviewers_avg": [ 46.0, 30.0 ], "wc_reply_authors_avg": [ 71.25, 93.99833775126027 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1070456919014547372&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "email": ";scut.edu.cn;scut.edu.cn", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "South China University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.scut.edu.cn", "aff_unique_abbr": "SCUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Three-Way Trade-Off in Multi-Objective Learning: Optimization, Generalization and Conflict-Avoidance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69951", "id": "yPkbdJxQ0o", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ddcf34623ca2d63823b6d40e4d980580-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yPkbdJxQ0o", "openreview": "https://openreview.net/forum?id=yPkbdJxQ0o", "poster": "/media/PosterPDFs/NeurIPS%202023/69951.png?t=1704163575.3053682", "slides": "https://nips.cc/virtual/2023/poster/69951", "video": "https://nips.cc/virtual/2023/poster/69951", "author_site": "Lisha Chen, Heshan Fernando, Yiming Ying, Tianyi Chen", "tldr": "", "abstract": "Multi-objective learning (MOL) often arises in emerging machine learning problems when multiple learning criteria or tasks need to be addressed. Recent works have developed various _dynamic weighting_ algorithms for MOL, including MGDA and its variants, whose central idea is to find an update direction that _avoids conflicts_ among objectives. Albeit its appealing intuition, empirical studies show that dynamic weighting methods may not always outperform static alternatives. To bridge this gap between theory and practice, we focus on a new variant of stochastic MGDA - the Multi-objective gradient with Double sampling (MoDo) algorithm and study its generalization performance and the interplay with optimization through the lens of algorithm stability. We find that the rationale behind MGDA -- updating along conflict-avoidant direction - may \\emph{impede} dynamic weighting algorithms from achieving the optimal ${\\cal O}(1/\\sqrt{n})$ population risk, where $n$ is the number of training samples. We further highlight the variability of dynamic weights and their impact on the three-way trade-off among optimization, generalization, and conflict avoidance that is unique in MOL. Code is available at https://github.com/heshandevaka/Trade-Off-MOL.", "keywords": "Generalization;algorithm stability;multi-objective optimization;gradient conflict", "primary_area": "", "supplementary_material": "/attachment/5894467fc15ba24cde58da88d4864eb15ca0ec40.pdf", "author": "Lisha Chen;Heshan Devaka Fernando;Yiming Ying;Tianyi Chen", "authorids": "~Lisha_Chen1;~Heshan_Devaka_Fernando1;~Yiming_Ying1;~Tianyi_Chen5", "gender": "F;M;M;M", "homepage": "https://lisha-chen.github.io/;https://heshandevaka.github.io/;https://www.sydney.edu.au/science/about/our-people/academic-staff/yiming-ying.html;https://chentianyi1991.github.io/", "dblp": "123/6690;;41/2012;", "google_scholar": "fh73S6gAAAAJ;QOXlyxIAAAAJ;xnA_lMMAAAAJ;kFwvv38AAAAJ", "orcid": "0000-0003-3858-5537;;;", "linkedin": ";;;", "or_profile": "~Lisha_Chen1;~Heshan_Devaka_Fernando1;~Yiming_Ying1;~Tianyi_Chen5", "aff": "Rensselaer Polytechnic Institute;Rensselaer Polytechnic Institute;State University of New York at Albany;Rensselaer Polytechnic Institute", "aff_domain": "rpi.edu;rpi.edu;albany.edu;rpi.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2023threeway,\ntitle={Three-Way Trade-Off in Multi-Objective Learning: Optimization, Generalization and Conflict-Avoidance},\nauthor={Lisha Chen and Heshan Devaka Fernando and Yiming Ying and Tianyi Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yPkbdJxQ0o}\n}", "github": "", "project": "", "reviewers": "LuAm;UWFs;L3j5;CY8e", "pdf_size": 2039776, "rating": "4;5;6;7", "confidence": "3;3;4;3", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "65;39;87;95", "wc_strengths": "43;17;138;82", "wc_weaknesses": "186;64;195;143", "wc_questions": "31;27;62;3", "wc_limitations": "6;33;1;3", "wc_review": "331;180;483;326", "wc_reply_reviewers": "0;182;62;94", "wc_reply_authors": "114;482;505;622", "reply_reviewers": "0;2;1;2", "reply_authors": "3;4;2;3", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.5, 21.742814905158898 ], "wc_strengths_avg": [ 70.0, 45.56862956025779 ], "wc_weaknesses_avg": [ 147.0, 51.79285665031424 ], "wc_questions_avg": [ 30.75, 20.980645843252777 ], "wc_limitations_avg": [ 10.75, 12.968712349342937 ], "wc_review_avg": [ 330.0, 107.15176153475034 ], "wc_reply_reviewers_avg": [ 84.5, 65.65630205852291 ], "wc_reply_authors_avg": [ 430.75, 190.42501805172554 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7436794543852796542&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "rpi.edu;rpi.edu;albany.edu;rpi.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Rensselaer Polytechnic Institute;State University of New York", "aff_unique_dep": ";", "aff_unique_url": "https://www.rpi.edu;https://www.albany.edu", "aff_unique_abbr": "RPI;SUNY Albany", "aff_campus_unique_index": "1", "aff_campus_unique": ";Albany", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "yQSb1n56lE", "title": "RFold: RNA Secondary Structure Prediction with Decoupled Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "The secondary structure of ribonucleic acid (RNA) is more stable and accessible in the cell than its tertiary structure, making it essential for functional prediction. Although deep learning has shown promising results in this field, current methods suffer from poor generalization and high complexity. In this work, we present RFold, a simple yet effective RNA secondary structure prediction in an end-to-end manner. RFold introduces a decoupled optimization process that decomposes the vanilla constraint satisfaction problem into row-wise and column-wise optimization, simplifying the solving process while guaranteeing the validity of the output. Moreover, RFold adopts attention maps as informative representations instead of designing hand-crafted features. Extensive experiments demonstrate that RFold achieves competitive performance and about eight times faster inference efficiency than the state-of-the-art method.", "keywords": "Bioinformatics;Molecular structure prediction", "primary_area": "", "supplementary_material": "/attachment/3ef6335835a68da8e7a904756e231dcef1a1bada.zip", "author": "Cheng Tan;Zhangyang Gao;Lingzhi Gu;Ge Wang;Lirong Wu;Jun Xia;Jiangbin Zheng;Stan Z. Li", "authorids": "~Cheng_Tan1;~Zhangyang_Gao1;~Lingzhi_Gu2;~Ge_Wang3;~Lirong_Wu1;~Jun_Xia1;~Jiangbin_Zheng3;~Stan_Z._Li2", "gender": "M;M;F;;;M;M;M", "homepage": "https://chengtan9907.github.io/;;http://www.amss.ac.cn/;;;http://junxia97.github.io/;;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": "70/1533-12.html;275/3266;;34/5591;15/10330;;;l/StanZLi", "google_scholar": "6kTV6aMAAAAJ;4SclT-QAAAAJ;;https://scholar.google.com.hk/citations?user=t9GUEMoAAAAJ;Tk7TrCoAAAAJ;aPKKpSYAAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0003-1026-6083;;0000-0001-8553-6493;;;0000-0003-3305-0103;", "linkedin": ";;;;;;;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Cheng_Tan1;~Zhangyang_Gao1;~Lingzhi_Gu2;~Ge_Wang3;~Lirong_Wu1;~Jun_Xia1;~Jiangbin_Zheng3;~Stan_Z._Li1", "aff": "Zhejiang University & Westlake University;Westlake University, China;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;WESTLAKE UNIVERSITY;Westlake University;Westlake University, China;Westlake University;Westlake University", "aff_domain": "westlake.edu.cn;westlake.edu.cn;amss.ac.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn", "position": "PhD student;PhD student;MS student;PhD student;PhD student;PhD student;PhD student;Chair Professor", "bibtex": "@misc{\ntan2023rfold,\ntitle={{RF}old: {RNA} Secondary Structure Prediction with Decoupled Optimization},\nauthor={Cheng Tan and Zhangyang Gao and Lingzhi Gu and Ge Wang and Lirong Wu and Jun Xia and Jiangbin Zheng and Stan Z. Li},\nyear={2023},\nurl={https://openreview.net/forum?id=yQSb1n56lE}\n}", "github": "", "project": "", "reviewers": "G61p;FGw2;RVaK;ZyLM", "site": "https://openreview.net/forum?id=yQSb1n56lE", "pdf_size": 790644, "rating": "4;5;5;7", "confidence": "3;2;3;4", "soundness": "2;2;2;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "50;47;70;35", "wc_strengths": "38;61;81;71", "wc_weaknesses": "127;20;310;98", "wc_questions": "1;29;47;22", "wc_limitations": "7;12;64;5", "wc_review": "223;169;572;231", "wc_reply_reviewers": "126;12;231;0", "wc_reply_authors": "508;0;42;0", "reply_reviewers": "3;1;1;0", "reply_authors": "5;1;2;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 50.5, 12.579745625409124 ], "wc_strengths_avg": [ 62.75, 15.943258763502524 ], "wc_weaknesses_avg": [ 138.75, 106.33290882882872 ], "wc_questions_avg": [ 24.75, 16.467771555374455 ], "wc_limitations_avg": [ 22.0, 24.38237068047322 ], "wc_review_avg": [ 298.75, 159.5530867767841 ], "wc_reply_reviewers_avg": [ 92.25, 93.99567809213357 ], "wc_reply_authors_avg": [ 137.5, 214.59438482868092 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.6488856845230502, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11044875271931610050&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;1;1;1;1;1", "aff_unique_norm": "Zhejiang University;Westlake University;Chinese Academy of Sciences", "aff_unique_dep": ";;Academy of Mathematics and Systems Science", "aff_unique_url": "http://www.zju.edu.cn;https://www.westlake.edu.cn;http://www.cas.cn", "aff_unique_abbr": "ZJU;WU;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Fast Conditional Mixing of MCMC Algorithms for Non-log-concave Distributions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69950", "id": "yT0f93CeTw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2b00b3331bd0f5fbfdd966ac06338f6d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yT0f93CeTw", "openreview": "https://openreview.net/forum?id=yT0f93CeTw", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69950", "video": "https://nips.cc/virtual/2023/poster/69950", "author_site": "Xiang Cheng, Bohan Wang, Jingzhao Zhang, Yusong Zhu", "tldr": "", "abstract": "MCMC algorithms offer empirically efficient tools for sampling from a target distribution $\\pi(x) \\propto \\exp(-V(x))$. However, on the theory side, MCMC algorithms suffer from slow mixing rate when $\\pi(x)$ is non-log-concave. Our work examines this gap and shows that when Poincar\\'e-style inequality holds on a subset $\\mathcal{X}$ of the state space, the conditional distribution of MCMC iterates over $\\mathcal{X}$ mixes fast to the true conditional distribution. This fast mixing guarantee can hold in cases when global mixing is provably slow. We formalize the statement and quantify the conditional mixing rate. We further show that conditional mixing can have interesting implications for sampling from mixtures of Gaussians, parameter estimation for Gaussian mixture models, and Gibbs-sampling with well-connected local minima.", "keywords": "Sampling;MCMC;Conditional Mixing;Non-log-concave Distributions", "primary_area": "", "supplementary_material": "", "author": "Xiang Cheng;Bohan Wang;Jingzhao Zhang;Yusong Zhu", "authorids": "~Xiang_Cheng1;~Bohan_Wang1;~Jingzhao_Zhang2;~Yusong_Zhu1", "gender": "M;M;M;M", "homepage": "https://sites.google.com/berkeley.edu/xiangcheng/home;https://bhwangfy.github.io/;https://sites.google.com/view/jingzhao/home;https://github.com/zys996", "dblp": "29/1059-6;202/1184;220/5559;", "google_scholar": "-WJinlEAAAAJ;LfkHCEUAAAAJ;8NudxYsAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xiang_Cheng1;~Bohan_Wang1;~Jingzhao_Zhang2;~Yusong_Zhu1", "aff": "Massachusetts Institute of Technology;Microsoft Research Asia, University of Science and Technology of China;Tsinghua University;Tsinghua University", "aff_domain": "mit.edu;ustc.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": "Postdoc;PhD student;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\ncheng2023fast,\ntitle={Fast Conditional Mixing of {MCMC} Algorithms for Non-log-concave Distributions},\nauthor={Xiang Cheng and Bohan Wang and Jingzhao Zhang and Yusong Zhu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yT0f93CeTw}\n}", "github": "", "project": "", "reviewers": "7WcG;Lt2V;UgtB;Edvq", "pdf_size": 5761228, "rating": "6;7;7;7", "confidence": "1;4;4;4", "soundness": "3;2;3;4", "novelty": "3;3;3;3", "presentation": "3;2;4;2", "wc_summary": "76;38;521;67", "wc_strengths": "48;98;162;129", "wc_weaknesses": "11;504;295;174", "wc_questions": "23;93;58;226", "wc_limitations": "12;1;1;7", "wc_review": "170;734;1037;603", "wc_reply_reviewers": "7;383;6;158", "wc_reply_authors": "0;780;0;268", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 175.5, 199.96812245955604 ], "wc_strengths_avg": [ 109.25, 41.984372092482225 ], "wc_weaknesses_avg": [ 246.0, 179.84298707483703 ], "wc_questions_avg": [ 100.0, 76.84074440035052 ], "wc_limitations_avg": [ 5.25, 4.602988159880492 ], "wc_review_avg": [ 636.0, 311.708998907635 ], "wc_reply_reviewers_avg": [ 138.5, 154.1176498652896 ], "wc_reply_authors_avg": [ 262.0, 318.4525082331744 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13849386934241965928&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mit.edu;ustc.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Massachusetts Institute of Technology;Microsoft;Tsinghua University", "aff_unique_dep": ";Research;", "aff_unique_url": "https://web.mit.edu;https://www.microsoft.com/en-us/research/group/microsoft-research-asia;https://www.tsinghua.edu.cn", "aff_unique_abbr": "MIT;MSRA;THU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;China" }, { "title": "PGDiff: Guiding Diffusion Models for Versatile Face Restoration via Partial Guidance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69949", "id": "yThjbzhIUP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/661c37f3b098bdee53fd7d9c4ef6964a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yThjbzhIUP", "openreview": "https://openreview.net/forum?id=yThjbzhIUP", "poster": "/media/PosterPDFs/NeurIPS%202023/69949.png?t=1701511703.9047136", "slides": "https://nips.cc/virtual/2023/poster/69949", "video": "https://nips.cc/virtual/2023/poster/69949", "author_site": "Peiqing Yang, Shangchen Zhou, Qingyi Tao, Chen Change Loy", "tldr": "", "abstract": "Exploiting pre-trained diffusion models for restoration has recently become a favored alternative to the traditional task-specific training approach. Previous works have achieved noteworthy success by limiting the solution space using explicit degradation models. However, these methods often fall short when faced with complex degradations as they generally cannot be precisely modeled. In this paper, we introduce $\\textit{partial guidance}$, a fresh perspective that is more adaptable to real-world degradations compared to existing works. Rather than specifically defining the degradation process, our approach models the desired properties, such as image structure and color statistics of high-quality images, and applies this guidance during the reverse diffusion process. These properties are readily available and make no assumptions about the degradation process. When combined with a diffusion prior, this partial guidance can deliver appealing results across a range of restoration tasks. Additionally, our method can be extended to handle composite tasks by consolidating multiple high-quality image properties, achieved by integrating the guidance from respective tasks. Experimental results demonstrate that our method not only outperforms existing diffusion-prior-based approaches but also competes favorably with task-specific models.", "keywords": "Face Restoration;Diffusion", "primary_area": "", "supplementary_material": "/attachment/f8627296f94b277a5a555616c8166f2cc60a5a54.pdf", "author": "Peiqing Yang;Shangchen Zhou;Qingyi Tao;Chen Change Loy", "authorids": "~Peiqing_Yang1;~Shangchen_Zhou1;~Qingyi_Tao1;~Chen_Change_Loy2", "gender": "F;M;;M", "homepage": "https://pq-yang.github.io/;https://shangchenzhou.com;;https://www.mmlab-ntu.com/person/ccloy/index.html", "dblp": "219/5327;191/5298;;01/5855", "google_scholar": "s2yHgo8AAAAJ;https://scholar.google.com.hk/citations?user=suaDwBQAAAAJ;;https://scholar.google.co.uk/citations?user=559LF80AAAAJ", "orcid": ";0000-0001-8201-8877;;0000-0001-5345-1591", "linkedin": "Peiqing-Yang-197856185/;;;", "or_profile": "~Peiqing_Yang1;~Shangchen_Zhou1;~Qingyi_Tao1;~Chen_Change_Loy2", "aff": "National University of Singapore;Nanyang Technological University;;Nanyang Technological University", "aff_domain": "u.nus.edu;ntu.edu.sg;;ntu.edu.sg", "position": "MS student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nyang2023pgdiff,\ntitle={{PGD}iff: Guiding Diffusion Models for Versatile Face Restoration via Partial Guidance},\nauthor={Peiqing Yang and Shangchen Zhou and Qingyi Tao and Chen Change Loy},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yThjbzhIUP}\n}", "github": "", "project": "", "reviewers": "hvsJ;FgM3;ggEw;GVHx", "pdf_size": 17503515, "rating": "3;5;6;6", "confidence": "5;4;4;5", "soundness": "3;3;3;2", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "42;67;67;57", "wc_strengths": "39;44;81;52", "wc_weaknesses": "204;69;78;63", "wc_questions": "3;47;85;60", "wc_limitations": "59;15;8;65", "wc_review": "347;242;319;297", "wc_reply_reviewers": "27;19;17;15", "wc_reply_authors": "118;66;66;18", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 58.25, 10.231690964840562 ], "wc_strengths_avg": [ 54.0, 16.263455967290593 ], "wc_weaknesses_avg": [ 103.5, 58.268773798665094 ], "wc_questions_avg": [ 48.75, 29.73529048117741 ], "wc_limitations_avg": [ 36.75, 25.459526704163217 ], "wc_review_avg": [ 301.25, 38.525154120392564 ], "wc_reply_reviewers_avg": [ 19.5, 4.55521678957215 ], "wc_reply_authors_avg": [ 67.0, 35.369478367654786 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2221352725678047888&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "u.nus.edu;ntu.edu.sg;;ntu.edu.sg", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "National University of Singapore;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.ntu.edu.sg", "aff_unique_abbr": "NUS;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Diverse Shape Completion via Style Modulated Generative Adversarial Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69948", "id": "yVMlYSL1Bp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fa68ea2ff794ce792a688dec82c04f49-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yVMlYSL1Bp", "openreview": "https://openreview.net/forum?id=yVMlYSL1Bp", "poster": "/media/PosterPDFs/NeurIPS%202023/69948.png?t=1702007008.5640402", "slides": "https://nips.cc/virtual/2023/poster/69948", "video": "https://nips.cc/virtual/2023/poster/69948", "author_site": "Wesley Khademi, Fuxin Li", "tldr": "", "abstract": "Shape completion aims to recover the full 3D geometry of an object from a partial observation. This problem is inherently multi-modal since there can be many ways to plausibly complete the missing regions of a shape. Such diversity would be indicative of the underlying uncertainty of the shape and could be preferable for downstream tasks such as planning. In this paper, we propose a novel conditional generative adversarial network that can produce many diverse plausible completions of a partially observed point cloud. To enable our network to produce multiple completions for the same partial input, we introduce stochasticity into our network via style modulation. By extracting style codes from complete shapes during training, and learning a distribution over them, our style codes can explicitly carry shape category information leading to better completions. We further introduce diversity penalties and discriminators at multiple scales to prevent conditional mode collapse and to train without the need for multiple ground truth completions for each partial input. Evaluations across several synthetic and real datasets demonstrate that our method achieves significant improvements in respecting the partial observations while obtaining greater diversity in completions.", "keywords": "multimodal shape completion;point cloud completion;3d shape generation;generative modeling;generative adversarial networks", "primary_area": "", "supplementary_material": "/attachment/712ac15ea29abf9c6cb72b76bf5797ee38ced5be.zip", "author": "Wesley Khademi;Li Fuxin", "authorids": "~Wesley_Khademi1;~Li_Fuxin1", "gender": "M;M", "homepage": "https://wkhademi.github.io;http://web.engr.oregonstate.edu/~lif/", "dblp": "259/3007;03/2783", "google_scholar": "fyO3OFcAAAAJ;snDpfA0AAAAJ", "orcid": "0000-0002-1805-685X;", "linkedin": ";", "or_profile": "~Wesley_Khademi1;~Fuxin_Li1", "aff": "Meta;Apple", "aff_domain": "meta.com;apple.com", "position": "Intern;Researcher", "bibtex": "@inproceedings{\nkhademi2023diverse,\ntitle={Diverse Shape Completion via Style Modulated Generative Adversarial Networks},\nauthor={Wesley Khademi and Li Fuxin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yVMlYSL1Bp}\n}", "github": "", "project": "", "reviewers": "Ahkf;ohCS;R39m;iNun;A3pQ", "pdf_size": 14485235, "rating": "5;5;6;6;7", "confidence": "4;5;4;4;3", "soundness": "3;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "78;59;87;93;50", "wc_strengths": "12;18;49;44;36", "wc_weaknesses": "127;129;227;124;101", "wc_questions": "24;1;97;8;73", "wc_limitations": "25;25;49;5;12", "wc_review": "266;232;509;274;272", "wc_reply_reviewers": "23;42;147;0;14", "wc_reply_authors": "0;170;24;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;2;2;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 73.4, 16.402438843050138 ], "wc_strengths_avg": [ 31.8, 14.455448799674121 ], "wc_weaknesses_avg": [ 141.6, 43.87071916438116 ], "wc_questions_avg": [ 40.6, 37.78147694307357 ], "wc_limitations_avg": [ 23.2, 15.025311976794358 ], "wc_review_avg": [ 310.6, 100.35855718372999 ], "wc_reply_reviewers_avg": [ 45.2, 52.69307354861738 ], "wc_reply_authors_avg": [ 38.8, 66.25526394181823 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8451542547285165, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18137240897951000706&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "meta.com;apple.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Meta;Apple", "aff_unique_dep": "Meta Platforms, Inc.;Apple Inc.", "aff_unique_url": "https://meta.com;https://www.apple.com", "aff_unique_abbr": "Meta;Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "yVWBv0N1xC", "title": "LayerNAS: Neural Architecture Search in Polynomial Complexity", "track": "main", "status": "Reject", "tldr": "", "abstract": "\nNeural Architecture Search (NAS) has become a popular method for discovering effective model architectures, especially for target hardware. As such, NAS methods that find optimal architectures under constraints are essential. In our paper, we propose LayerNAS to address the challenge of multi-objective NAS by transforming it into a combinatorial optimization problem, which effectively constrains the search complexity to be polynomial. \n\nLayerNAS rigorously derives its method from the fundamental assumption that modifications to previous layers have no impact on the subsequent layers. When dealing with search spaces containing $L$ layers that meet this requirement, the method performs layerwise-search for each layer, selecting from a set of search options $\\mathbb{S}$. LayerNAS groups model candidates based on one objective, such as model size or latency, and searches for the optimal model based on another objective, thereby splitting the cost and reward elements of the search. This approach limits the search complexity to $ O(H \\cdot |\\mathbb{S}| \\cdot L) $, where $H$ is a constant set in LayerNAS.\n\nOur experiments show that LayerNAS is able to consistently discover superior models across a variety of search spaces in comparison to strong baselines, including search spaces derived from NATS-Bench, MobileNetV2 and MobileNetV3. \n", "keywords": "AutoML;Neural Architecture Search;Model Optimization", "primary_area": "", "supplementary_material": "/attachment/7a9e088e2fba6c0e34130864199363bfd457cebc.zip", "author": "Yicheng Fan;Dana Alon;JINGYUE SHEN;Daiyi Peng;Keshav Kumar;Yun Long;Xin Wang;Fotis Iliopoulos;Da-Cheng Juan;Erik Vee", "authorids": "~Yicheng_Fan1;~Dana_Alon1;~JINGYUE_SHEN1;~Daiyi_Peng1;~Keshav_Kumar1;~Yun_Long1;~Xin_Wang30;~Fotis_Iliopoulos1;~Da-Cheng_Juan1;~Erik_Vee1", "gender": ";;;M;M;M;M;M;;", "homepage": ";;https://www.linkedin.com/in/jingyue-brian-shen/;http://www.daiyip.org;https://keshv.in;https://scholar.google.com/citations?user=DNHsO0gAAAAJ&hl=en&oi=sra;;http://www.filiop.org/;;", "dblp": "289/0992;136/8637;229/7228;;;;;147/4790;47/1564;", "google_scholar": "LTZjlnwAAAAJ;0WEF4fkAAAAJ;wWAFw8UAAAAJ;_8Egwg8AAAAJ;;DNHsO0gAAAAJ;7BjA8ccAAAAJ;v3e5F-AAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;;;;;;", "linkedin": "https://linkedin.com/in/yicheng-fan-b4b79124;dana-alon;;;keshav-kr;;;;;", "or_profile": "~Yicheng_Fan1;~Dana_Alon1;~JINGYUE_SHEN1;~Daiyi_Peng1;~Keshav_Kumar1;~Yun_Long1;~Xin_Wang30;~Fotis_Iliopoulos1;~Da-Cheng_Juan1;~Erik_Vee1", "aff": "Google;Research, Google;Google;;;;Google;Google;Google Research;", "aff_domain": "google.com;research.google.com;google.com;;;;google.com;google.com;google.com;", "position": "Software Engineer;Researcher;Software Engineer;;;;Software Engineer;Researcher;Senior Software Engineer;", "bibtex": "@misc{\nfan2023layernas,\ntitle={Layer{NAS}: Neural Architecture Search in Polynomial Complexity},\nauthor={Yicheng Fan and Dana Alon and JINGYUE SHEN and Daiyi Peng and Keshav Kumar and Yun Long and Xin Wang and Fotis Iliopoulos and Da-Cheng Juan and Erik Vee},\nyear={2023},\nurl={https://openreview.net/forum?id=yVWBv0N1xC}\n}", "github": "", "project": "", "reviewers": "2kK8;rnJz;sGok;twJy", "site": "https://openreview.net/forum?id=yVWBv0N1xC", "pdf_size": 474726, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "3;3;4;3", "wc_summary": "191;85;59;16", "wc_strengths": "91;19;58;77", "wc_weaknesses": "66;52;104;16", "wc_questions": "133;75;4;1", "wc_limitations": "8;26;1;1", "wc_review": "489;257;226;111", "wc_reply_reviewers": "472;33;0;0", "wc_reply_authors": "1391;0;0;0", "reply_reviewers": "3;1;0;0", "reply_authors": "4;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 87.75, 64.50339138370943 ], "wc_strengths_avg": [ 61.25, 27.058963394779187 ], "wc_weaknesses_avg": [ 59.5, 31.5079355083763 ], "wc_questions_avg": [ 53.25, 54.74657523535148 ], "wc_limitations_avg": [ 9.0, 10.222524150130436 ], "wc_review_avg": [ 270.75, 137.24499080112176 ], "wc_reply_reviewers_avg": [ 126.25, 200.07295544375808 ], "wc_reply_authors_avg": [ 347.75, 602.320668332077 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14910580986110710568&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "FETV: A Benchmark for Fine-Grained Evaluation of Open-Domain Text-to-Video Generation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73413", "id": "yWpY5I3XyX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c481049f7410f38e788f67c171c64ad5-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=yWpY5I3XyX", "openreview": "https://openreview.net/forum?id=yWpY5I3XyX", "poster": "/media/PosterPDFs/NeurIPS%202023/73413.png?t=1699275451.000315", "slides": "https://nips.cc/virtual/2023/poster/73413", "video": "https://nips.cc/virtual/2023/poster/73413", "author_site": "Yuanxin Liu, Lei Li, Shuhuai Ren, Rundong Gao, Shicheng Li, Sishuo Chen, Xu Sun, Lu Hou", "tldr": "", "abstract": "Recently, open-domain text-to-video (T2V) generation models have made remarkable progress. However, the promising results are mainly shown by the qualitative cases of generated videos, while the quantitative evaluation of T2V models still faces two critical problems. Firstly, existing studies lack fine-grained evaluation of T2V models on different categories of text prompts. Although some benchmarks have categorized the prompts, their categorization either only focuses on a single aspect or fails to consider the temporal information in video generation. Secondly, it is unclear whether the automatic evaluation metrics are consistent with human standards. To address these problems, we propose **FETV**, a benchmark for **F**ine-grained **E**valuation of **T**ext-to-**V**ideo generation. FETV is multi-aspect, categorizing the prompts based on three orthogonal aspects: the major content, the attributes to control and the prompt complexity. FETV is also temporal-aware, which introduces several temporal categories tailored for video generation. \nBased on FETV, we conduct comprehensive manual evaluations of four representative T2V models, revealing their pros and cons on different categories of prompts from different aspects. We also extend FETV as a testbed to evaluate the reliability of automatic T2V metrics. The multi-aspect categorization of FETV enables fine-grained analysis of the metrics' reliability in different scenarios. We find that existing automatic metrics (e.g., CLIPScore and FVD) correlate poorly with human evaluation. To address this problem, we explore several solutions to improve CLIPScore and FVD, and develop two automatic metrics that exhibit significant higher correlation with humans than existing metrics. Benchmark page: https://github.com/llyx97/FETV.", "keywords": "text-to-video generation;fine-grained evaluation;multi-aspect benchmark", "primary_area": "", "supplementary_material": "", "author": "Yuanxin Liu;Lei Li;Shuhuai Ren;Rundong Gao;Shicheng Li;Sishuo Chen;Xu Sun;Lu Hou", "authorids": "~Yuanxin_Liu1;~Lei_Li14;~Shuhuai_Ren1;~Rundong_Gao1;~Shicheng_Li1;~Sishuo_Chen1;~Xu_Sun1;~Lu_Hou2", "gender": "M;M;M;;M;M;M;F", "homepage": "https://llyx97.github.io/;https://renshuhuai-andy.github.io/;;https://lscpku.github.io/;https://pkucss.github.io/;https://xusun.org/;https://lilei-nlp.github.io;https://houlu369.github.io/", "dblp": "55/5877;50/9511.html;;;279/6225;37/1971-1;13/7007-39;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=3X8yS-cAAAAJ;99RqPq4AAAAJ;hsTCc1MAAAAJ;Jn6gAIAAAAAJ;https://scholar.google.com/citations?hl=en;MeV4GGsAAAAJ;https://scholar.google.com.hk/citations?user=rnjoL5cAAAAJ", "orcid": ";;;;;;0009-0008-6984-5104;", "linkedin": ";shuhuai-ren-69580817a/;;;;;;", "or_profile": "~Yuanxin_Liu1;~Shuhuai_Ren1;~Rundong_Gao1;~Shicheng_Li1;~Sishuo_Chen1;~Xu_Sun1;~Tobias_Lee1;~LU_HOU1", "aff": "Peking University;Peking University;Peking University;Peking University;Peking University;Peking University;Peking University;Huawei Technologies Ltd.", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;huawei.com", "position": "PhD student;PhD student;PhD student;PhD student;MS student;Associate Professor;MS student;researcher", "bibtex": "@inproceedings{\nliu2023fetv,\ntitle={{FETV}: A Benchmark for Fine-Grained Evaluation of Open-Domain Text-to-Video Generation},\nauthor={Yuanxin Liu and Lei Li and Shuhuai Ren and Rundong Gao and Shicheng Li and Sishuo Chen and Xu Sun and Lu Hou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=yWpY5I3XyX}\n}", "github": "", "project": "", "reviewers": "2CxJ;5H2c;gcDZ;HZhg", "pdf_size": 11393355, "rating": "5;6;6;6", "confidence": "4;5;4;3", "wc_summary_and_contributions": "30;50;88;86", "wc_strengths": "86;31;56;38", "wc_improvement": "267;61;76;36", "wc_limitations": "6;41;56;32", "wc_correctness": "10;1;8;16", "wc_clarity": "2;1;4;7", "wc_relation_to_prior_work": "134;1;8;23", "wc_documentation": "12;1;5;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "548;188;302;240", "wc_reply_reviewers": "0;229;0;0", "wc_reply_authors": "1174;1676;376;585", "reply_reviewers": "0;2;0;0", "reply_authors": "3;3;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 63.5, 24.550967394381836 ], "wc_strengths_avg": [ 52.75, 21.25294097295713 ], "wc_improvement_avg": [ 110.0, 91.76328241731548 ], "wc_limitations_avg": [ 33.75, 18.171062159378575 ], "wc_correctness_avg": [ 8.75, 5.356071321407137 ], "wc_clarity_avg": [ 3.5, 2.29128784747792 ], "wc_relation_to_prior_work_avg": [ 41.5, 53.99305510896749 ], "wc_documentation_avg": [ 4.75, 4.493050188902857 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 319.5, 137.95923310891519 ], "wc_reply_reviewers_avg": [ 57.25, 99.15990873331822 ], "wc_reply_authors_avg": [ 952.75, 509.88301354330287 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 68, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16797871499191104183&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;huawei.com", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0;1", "aff_unique_norm": "Peking University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.pku.edu.cn;https://www.huawei.com", "aff_unique_abbr": "Peking U;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "OpenGSL: A Comprehensive Benchmark for Graph Structure Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73412", "id": "yXLyhKvK4D", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/39f8ef62e061042cca8c8f46d7e0e31b-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=yXLyhKvK4D", "openreview": "https://openreview.net/forum?id=yXLyhKvK4D", "poster": "/media/PosterPDFs/NeurIPS%202023/73412.png?t=1699358422.8605897", "slides": "https://nips.cc/virtual/2023/poster/73412", "video": "https://nips.cc/virtual/2023/poster/73412", "author_site": "Zhou Zhiyao, Sheng Zhou, Bochao Mao, Xuanyi Zhou, Jiawei Chen, Qiaoyu Tan, Daochen Zha, Yan Feng, Chun Chen, Can Wang", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have emerged as the *de facto* standard for representation learning on graphs, owing to their ability to effectively integrate graph topology and node attributes. However, the inherent suboptimal nature of node connections, resulting from the complex and contingent formation process of graphs, presents significant challenges in modeling them effectively. To tackle this issue, Graph Structure Learning (GSL), a family of data-centric learning approaches, has garnered substantial attention in recent years. The core concept behind GSL is to jointly optimize the graph structure and the corresponding GNN models. Despite the proposal of numerous GSL methods, the progress in this field remains unclear due to inconsistent experimental protocols, including variations in datasets, data processing techniques, and splitting strategies. In this paper, we introduce OpenGSL, the first comprehensive benchmark for GSL, aimed at addressing this gap. OpenGSL enables a fair comparison among state-of-the-art GSL methods by evaluating them across various popular datasets using uniform data processing and splitting strategies. Through extensive experiments, we observe that existing GSL methods do not consistently outperform vanilla GNN counterparts. We also find that there is no significant correlation between the homophily of the learned structure and task performance, challenging the common belief. Moreover, we observe that the learned graph structure demonstrates a strong generalization ability across different GNN models, despite the high computational and space consumption. We hope that our open-sourced library will facilitate rapid and equitable evaluation and inspire further innovative research in this field. The code of the benchmark can be found in https://github.com/OpenGSL/OpenGSL.", "keywords": "Graph Representation Learning;Graph Structure Learning;Benchmark", "primary_area": "", "supplementary_material": "", "author": "Zhiyao Zhou;Sheng Zhou;Bochao Mao;Xuanyi Zhou;Jiawei Chen;Qiaoyu Tan;Daochen Zha;Yan Feng;Chun Chen;Can Wang", "authorids": "~Zhiyao_Zhou1;~Sheng_Zhou1;~Bochao_Mao2;~Xuanyi_Zhou1;~Jiawei_Chen6;~Qiaoyu_Tan2;~Daochen_Zha1;~Yan_Feng3;~Chun_Chen1;~Can_Wang5", "gender": "M;M;M;M;M;;F;M;M;M", "homepage": "https://zhoushengisnoob.github.io/;;;https://jiawei-chen.github.io/;https://qiaoyu-tan.github.io/;http://dczha.com/;https://person.zju.edu.cn/en/0085162;https://person.zju.edu.cn/en/0082004;https://person.zju.edu.cn/en/wangcan;", "dblp": "34/4858-4.html;;;03/1390-7;197/5465.html;167/0903;;07/4182-0001.html;71/4716-1;349/7778", "google_scholar": "https://scholar.google.co.jp/citations?user=Ss76nMwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?view_op=list_works;;V9bOnV4AAAAJ;jK0NgMcAAAAJ;;;https://scholar.google.fr/citations?user=C63q3HoAAAAJ;https://scholar.google.cz/citations?user=qu5sArsAAAAJ", "orcid": "0000-0003-3645-1041;0000-0001-6823-459X;;0000-0002-4752-2629;0000-0001-8999-968X;0000-0002-6677-7504;;0000-0002-6198-7481;0000-0002-5890-4307;", "linkedin": ";;;;;daochen-zha;;;;", "or_profile": "~Sheng_Zhou1;~Bochao_Mao2;~Xuanyi_Zhou1;~Jiawei_Chen6;~Qiaoyu_Tan2;~Daochen_Zha1;~Yan_Feng3;~Chun_Chen1;~Can_Wang5;~Zhou_Zhiyao1", "aff": "Zhejiang University;East China Normal University;Zhejiang University;Zhejiang University;Texas A&M;Rice University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;ecnu.edu.cn;zju.edu.cn;zju.edu.cn;tamu.edu;rice.edu;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "Associate Professor;Undergrad student;Undergrad student;Researcher;PhD student;PhD student;Associate Professor;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nzhou2023opengsl,\ntitle={Open{GSL}: A Comprehensive Benchmark for Graph Structure Learning},\nauthor={Zhiyao Zhou and Sheng Zhou and Bochao Mao and Xuanyi Zhou and Jiawei Chen and Qiaoyu Tan and Daochen Zha and Yan Feng and Chun Chen and Can Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=yXLyhKvK4D}\n}", "github": "", "project": "", "reviewers": "Vfn3;idmy;cdzV;VFMG;zkm3", "pdf_size": 1119897, "rating": "4;6;6;7;7", "confidence": "5;3;4;4;3", "wc_summary_and_contributions": "37;72;68;207;48", "wc_strengths": "16;67;62;53;100", "wc_improvement": "603;297;161;70;137", "wc_limitations": "6;1;58;6;2", "wc_correctness": "16;15;25;19;12", "wc_clarity": "64;11;10;15;12", "wc_relation_to_prior_work": "1;15;16;10;15", "wc_documentation": "1;15;9;9;2", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "745;494;410;390;329", "wc_reply_reviewers": "312;0;0;5;0", "wc_reply_authors": "2686;520;621;252;393", "reply_reviewers": "1;0;0;1;0", "reply_authors": "6;1;1;1;1", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 86.4, 61.652574966500794 ], "wc_strengths_avg": [ 59.6, 26.971095639591653 ], "wc_improvement_avg": [ 253.6, 189.6540007487319 ], "wc_limitations_avg": [ 14.6, 21.795412361320444 ], "wc_correctness_avg": [ 17.4, 4.409081537009721 ], "wc_clarity_avg": [ 22.4, 20.86719914123599 ], "wc_relation_to_prior_work_avg": [ 11.4, 5.607138307550475 ], "wc_documentation_avg": [ 7.2, 5.15363949069005 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 473.6, 145.61401031494188 ], "wc_reply_reviewers_avg": [ 63.4, 124.3150835578692 ], "wc_reply_authors_avg": [ 894.4, 904.3045062366989 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 2.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.7319250547113999, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7081577246810646485&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 7, "email": "zju.edu.cn;ecnu.edu.cn;zju.edu.cn;zju.edu.cn;tamu.edu;rice.edu;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 10, "aff_unique_index": "0;1;0;0;2;3;0;0;0;0", "aff_unique_norm": "Zhejiang University;East China Normal University;Texas A&M University;Rice University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;http://www.ecnu.edu.cn;https://www.tamu.edu;https://www.rice.edu", "aff_unique_abbr": "ZJU;ECNU;TAMU;Rice", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Star-Shaped Denoising Diffusion Probabilistic Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69947", "id": "yYUdgbmhh9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1fcefa894924bb1688041b7a26fb8aea-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yYUdgbmhh9", "openreview": "https://openreview.net/forum?id=yYUdgbmhh9", "poster": "/media/PosterPDFs/NeurIPS%202023/69947.png?t=1701807533.4031358", "slides": "https://nips.cc/virtual/2023/poster/69947", "video": "https://nips.cc/virtual/2023/poster/69947", "author_site": "Andrey Okhotin, Dmitry Molchanov, Arkhipkin Vladimir, Grigory Bartosh, Viktor Ohanesian, Aibek Alanov, Dmitry Vetrov", "tldr": "", "abstract": "Denoising Diffusion Probabilistic Models (DDPMs) provide the foundation for the recent breakthroughs in generative modeling.\nTheir Markovian structure makes it difficult to define DDPMs with distributions other than Gaussian or discrete.\nIn this paper, we introduce Star-Shaped DDPM (SS-DDPM).\nIts *star-shaped diffusion process* allows us to bypass the need to define the transition probabilities or compute posteriors.\nWe establish duality between star-shaped and specific Markovian diffusions for the exponential family of distributions and derive efficient algorithms for training and sampling from SS-DDPMs.\nIn the case of Gaussian distributions, SS-DDPM is equivalent to DDPM.\nHowever, SS-DDPMs provide a simple recipe for designing diffusion models with distributions such as Beta, von Mises\u2013Fisher, Dirichlet, Wishart and others, which can be especially useful when data lies on a constrained manifold.\nWe evaluate the model in different settings and find it competitive even on image data, where Beta SS-DDPM achieves results comparable to a Gaussian DDPM.\nOur implementation is available at https://github.com/andrey-okhotin/star-shaped", "keywords": "Generative models;Diffusion;Exponential Family", "primary_area": "", "supplementary_material": "", "author": "Andrey Okhotin;Dmitry Molchanov;Arkhipkin Sergeevich Vladimir;Grigory Bartosh;Viktor Ohanesian;Aibek Alanov;Dmitry P. Vetrov", "authorids": "~Andrey_Okhotin1;~Dmitry_Molchanov1;~Arkhipkin_Sergeevich_Vladimir1;~Grigory_Bartosh1;~Viktor_Ohanesian1;~Aibek_Alanov1;~Dmitry_P._Vetrov1", "gender": "M;M;M;M;M;M;M", "homepage": ";;;;;;https://constructor.university/faculty-member/dmitry-vetrov", "dblp": ";194/2564;;;;228/9365;89/3348", "google_scholar": "EfSzqqIAAAAJ;https://scholar.google.ru/citations?user=tJ6JXRYAAAAJ;D-Ko0oAAAAAJ;;CevxSMoAAAAJ;MXJTRGoAAAAJ;https://scholar.google.ru/citations?user=7HU0UoUAAAAJ", "orcid": "0009-0009-4252-202X;;;;;;", "linkedin": "%D0%B0%D0%BD%D0%B4%D1%80%D0%B5%D0%B9-%D0%BE%D1%85%D0%BE%D1%82%D0%B8%D0%BD-300342247;;;grigory-bartosh-76004a163/;;;", "or_profile": "~Andrey_Okhotin1;~Dmitry_Molchanov1;~Arkhipkin_Sergeevich_Vladimir1;~Grigory_Bartosh1;~Viktor_Ohanesian1;~Aibek_Alanov1;~Dmitry_P._Vetrov1", "aff": "Moscow State University, Lomonosov Moscow State University;;Sber;University of Amsterdam;Imperial College London;Artificial Intelligence Research Institute;National Research University Higher School of Economics", "aff_domain": "cs.msu.ru;;sberbank.ru;uva.nl;imperial.ac.uk;airi.net;hse.ru", "position": "MS student;;Researcher;PhD student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nokhotin2023starshaped,\ntitle={Star-Shaped Denoising Diffusion Probabilistic Models},\nauthor={Andrey Okhotin and Dmitry Molchanov and Arkhipkin Sergeevich Vladimir and Grigory Bartosh and Viktor Ohanesian and Aibek Alanov and Dmitry P. Vetrov},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yYUdgbmhh9}\n}", "github": "", "project": "", "reviewers": "EaR3;P24X;EZ37;mgR2;7FuV;UBua", "pdf_size": 16614049, "rating": "5;6;6;7;8;8", "confidence": "5;4;4;4;4;4", "soundness": "1;3;3;3;4;4", "novelty": "2;3;3;3;4;3", "presentation": "2;4;4;2;4;3", "wc_summary": "117;61;83;77;56;54", "wc_strengths": "69;148;59;20;54;40", "wc_weaknesses": "153;233;58;70;38;25", "wc_questions": "22;147;76;8;20;14", "wc_limitations": "54;20;3;2;7;1", "wc_review": "415;609;279;177;175;134", "wc_reply_reviewers": "18;92;10;0;1;14", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "1;1;1;0;1;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.666666666666667, 1.1055415967851332 ], "confidence_avg": [ 4.166666666666667, 0.372677996249965 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 3.1666666666666665, 0.8975274678557507 ], "wc_summary_avg": [ 74.66666666666667, 21.71532996653644 ], "wc_strengths_avg": [ 65.0, 40.2326567189723 ], "wc_weaknesses_avg": [ 96.16666666666667, 73.64649044968507 ], "wc_questions_avg": [ 47.833333333333336, 49.66694630793759 ], "wc_limitations_avg": [ 14.5, 18.786076404259266 ], "wc_review_avg": [ 298.1666666666667, 167.05130615738653 ], "wc_reply_reviewers_avg": [ 22.5, 31.747703329007386 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.674199862463242, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9059266536718169694&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "cs.msu.ru;;sberbank.ru;uva.nl;imperial.ac.uk;airi.net;hse.ru", "author_num": 7, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "Lomonosov Moscow State University;Sberbank;University of Amsterdam;Imperial College London;Artificial Intelligence Research Institute;National Research University Higher School of Economics", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.msu.ru;https://www.sberbank.ru;https://www.uva.nl;https://www.imperial.ac.uk;;https://hse.ru", "aff_unique_abbr": "MSU;Sber;UvA;ICL;;HSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;3;0", "aff_country_unique": "Russian Federation;Netherlands;United Kingdom;United States" }, { "title": "Diplomat: A Dialogue Dataset for Situated PragMATic Reasoning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73411", "id": "yZQDF9f6bR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/924303c6a45685510877ee018cdc8f80-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=yZQDF9f6bR", "openreview": "https://openreview.net/forum?id=yZQDF9f6bR", "poster": "/media/PosterPDFs/NeurIPS%202023/73411.png?t=1697169073.924766", "slides": "https://nips.cc/virtual/2023/poster/73411", "video": "https://nips.cc/virtual/2023/poster/73411", "author_site": "Hengli Li, Song-Chun Zhu, Zilong Zheng", "tldr": "", "abstract": "The ability to discern and comprehend pragmatic meanings is a cornerstone of social and emotional intelligence, referred to as pragmatic reasoning. Despite the strides made in the development of Large Language Models (LLMs), such as ChatGPT, these models grapple with capturing the nuanced and ambiguous facets of language, falling short of the aspiration to build human-like conversational agents. In this work, we introduce a novel benchmark, the **DiPlomat**, which delves into the fundamental components of conversational pragmatic reasoning, encompassing situational context reasoning, open-world knowledge acquisition, and unified figurative language understanding. We start by collecting a new human-annotated dialogue dataset, composed of 4,177 multi-turn dialogues and a vocabulary of 48,900 words. Along with the dataset, two tasks are proposed to evaluate machines' pragmatic reasoning capabilities, namely, Pragmatic Reasoning and Identification(PIR) and Conversational Question Answering (CQA). Furthermore, we probe into a zero-shot natural language inference task, where the significance of context in pragmatic reasoning is underscored. Experimental findings illustrate the existing limitations of current prevailing LLMs in the realm of pragmatic reasoning, shedding light on the pressing need for further research to facilitate the emergence of emotional intelligence within human-like conversational agents.", "keywords": "Pragmatic Reasoning;Implicature;Conversation;Dataset;Inference", "primary_area": "", "supplementary_material": "/attachment/e9a7a6cec2b65f878aaff0167f7e724f732e7cc3.zip", "author": "Hengli Li;Song-Chun Zhu;Zilong Zheng", "authorids": "~Hengli_Li1;~Song-Chun_Zhu1;~Zilong_Zheng1", "gender": "M;M;M", "homepage": "https://github.com/Henry839;https://zhusongchun.net/;http://zilongzheng.github.io", "dblp": "349/4894;10/10313;218/5234", "google_scholar": ";https://scholar.google.com.tw/citations?user=Al8dyb4AAAAJ;9sDx70IAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Hengli_Li1;~Song-Chun_Zhu1;~Zilong_Zheng1", "aff": "Peking University;Peking University;Beijing Institute for General Artificial Intelligence", "aff_domain": "stu.pku.edu.cn;pku.edu.cn;bigai.ai", "position": "Undergrad student;Full Professor;Researcher", "bibtex": "@inproceedings{\nli2023diplomat,\ntitle={Diplomat: A Dialogue Dataset for Situated Prag{MAT}ic Reasoning},\nauthor={Hengli Li and Song-Chun Zhu and Zilong Zheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=yZQDF9f6bR}\n}", "github": "", "project": "", "reviewers": "r1EM;hCbW;xhK2;tasC;CPTz", "pdf_size": 2431080, "rating": "5;6;6;7;7", "confidence": "4;4;2;4;4", "wc_summary_and_contributions": "114;175;56;171;54", "wc_strengths": "9;123;99;152;35", "wc_improvement": "109;240;59;73;540", "wc_limitations": "6;167;51;3;14", "wc_correctness": "128;45;15;1;36", "wc_clarity": "59;15;6;8;14", "wc_relation_to_prior_work": "207;37;14;1;20", "wc_documentation": "5;30;11;1;44", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "638;833;312;411;758", "wc_reply_reviewers": "284;0;0;0;472", "wc_reply_authors": "4106;1488;767;170;1711", "reply_reviewers": "6;0;0;0;2", "reply_authors": "8;3;1;1;4", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.8 ], "wc_summary_and_contributions_avg": [ 114.0, 52.79015059648911 ], "wc_strengths_avg": [ 83.6, 53.65668644260471 ], "wc_improvement_avg": [ 204.2, 179.66791588928726 ], "wc_limitations_avg": [ 48.2, 61.83008976218618 ], "wc_correctness_avg": [ 45.0, 44.285437787155274 ], "wc_clarity_avg": [ 20.4, 19.602040710089344 ], "wc_relation_to_prior_work_avg": [ 55.8, 76.48110877857354 ], "wc_documentation_avg": [ 18.2, 16.28987415543779 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 590.4, 199.44984331906605 ], "wc_reply_reviewers_avg": [ 151.2, 194.49051390749113 ], "wc_reply_authors_avg": [ 1648.4, 1344.0221129133256 ], "reply_reviewers_avg": [ 1.6, 2.3323807579381204 ], "reply_authors_avg": [ 3.4, 2.576819745345025 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.13363062095621217, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10638882804692478329&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "stu.pku.edu.cn;pku.edu.cn;bigai.ai", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Peking University;Beijing Institute for General Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;http://www.bigaiai.org/", "aff_unique_abbr": "Peking U;BIGAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Complexity of Derivative-Free Policy Optimization for Structured $\\mathcal{H}_\\infty$ Control", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69946", "id": "yaJ4vZPnHX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1052b823a161aa2c808dd51c0f58dc37-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yaJ4vZPnHX", "openreview": "https://openreview.net/forum?id=yaJ4vZPnHX", "poster": "/media/PosterPDFs/NeurIPS%202023/69946.png?t=1701670651.4617343", "slides": "https://nips.cc/virtual/2023/poster/69946", "video": "https://nips.cc/virtual/2023/poster/69946", "author_site": "Xingang Guo, Darioush Keivan, Geir Dullerud, Peter Seiler, Bin Hu", "tldr": "", "abstract": "The applications of direct policy search in reinforcement learning and continuous control have received increasing attention.\nIn this work, we present novel theoretical results on the complexity of derivative-free policy optimization on an important class of robust control tasks, namely the structured $H_\\infty$ synthesis with static output feedback. \nOptimal $H_\\infty$ synthesis under structural constraints leads to a constrained nonconvex nonsmooth problem and is typically\naddressed using subgradient-based policy search techniques that are built upon the concept of Goldstein subdifferential or other notions of enlarged subdifferential. In this paper, we study the complexity of finding $(\\delta,\\epsilon)$-stationary points for such nonsmooth robust control design tasks using policy optimization methods which can only access the zeroth-order oracle (i.e. the $H_\\infty$ norm of the closed-loop system). First, we study the exact oracle setting and identify the coerciveness of the cost function to prove high-probability feasibility/complexity bounds for derivative-free policy optimization on this problem. Next, we derive a sample complexity result for the multi-input multi-output (MIMO) $H_\\infty$-norm estimation. We combine this with our analysis to obtain the first sample complexity of model-free, trajectory-based, zeroth-order policy optimization on finding $(\\delta,\\epsilon)$-stationary points for structured $H_\\infty$ control. \nNumerical results are also provided to demonstrate our theory.", "keywords": "Structured $\\mathcal{H}_\\infty$ Control;Nonsmooth Optimization;Complexity Analysis", "primary_area": "", "supplementary_material": "/attachment/115ac52198771740b073ae68c0625d994a11735c.zip", "author": "Xingang Guo;Darioush Keivan;Geir Dullerud;Peter Seiler;Bin Hu", "authorids": "~Xingang_Guo1;~Darioush_Keivan1;~Geir_Dullerud1;~Peter_Seiler1;~Bin_Hu2", "gender": "M;M;M;M;M", "homepage": "https://sites.google.com/view/guoxingang;https://mechanical.illinois.edu/directory/faculty/dullerud/;https://seiler.engin.umich.edu/;;", "dblp": ";;48/185;;", "google_scholar": "8HmMeD8AAAAJ;https://scholar.google.com.tw/citations?user=afhaKpYAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?view_op=list_works", "orcid": ";;0000-0003-3423-1109;;", "linkedin": ";;;;darioush-keivan/", "or_profile": "~Xingang_Guo1;~Geir_Dullerud1;~Peter_Seiler1;~Bin_Hu2;~Darioush_Keivan_Esfahani1", "aff": "University of Illinois, Urbana-Champaign;University of Illinois;University of Michigan;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "uiuc.edu;illinois.edu;umich.edu;illinois.edu;uiuc.edu", "position": "PhD student;Full Professor;Associate Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nguo2023complexity,\ntitle={Complexity of Derivative-Free Policy Optimization for Structured \\${\\textbackslash}mathcal\\{H\\}\\_{\\textbackslash}infty\\$ Control},\nauthor={Xingang Guo and Darioush Keivan and Geir Dullerud and Peter Seiler and Bin Hu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yaJ4vZPnHX}\n}", "github": "", "project": "", "reviewers": "v5nm;UPcn;vRAL", "pdf_size": 1015152, "rating": "5;6;6", "confidence": "2;3;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;2", "wc_summary": "59;40;87", "wc_strengths": "49;45;72", "wc_weaknesses": "74;135;76", "wc_questions": "1;17;88", "wc_limitations": "1;15;11", "wc_review": "184;252;334", "wc_reply_reviewers": "10;13;28", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 62.0, 19.30457631409368 ], "wc_strengths_avg": [ 55.333333333333336, 11.897712198383164 ], "wc_weaknesses_avg": [ 95.0, 28.296053906272277 ], "wc_questions_avg": [ 35.333333333333336, 37.80946383586463 ], "wc_limitations_avg": [ 9.0, 5.887840577551898 ], "wc_review_avg": [ 256.6666666666667, 61.326086528400694 ], "wc_reply_reviewers_avg": [ 17.0, 7.874007874011811 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "uiuc.edu;illinois.edu;umich.edu;illinois.edu;uiuc.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;2", "aff_unique_norm": "University of Illinois;University of Michigan;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;https://www.umich.edu;https://illinois.edu", "aff_unique_abbr": "UIUC;UM;UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Understanding the detrimental class-level effects of data augmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69945", "id": "yageaKlk7S", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/38c05a5410a6ab7eeeb26c9dbebbc41b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yageaKlk7S", "openreview": "https://openreview.net/forum?id=yageaKlk7S", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69945", "video": "https://nips.cc/virtual/2023/poster/69945", "author_site": "Polina Kirichenko, Mark Ibrahim, Randall Balestriero, Diane Bouchacourt, Shanmukha Ramakrishna Vedantam, Hamed Firooz, Andrew Wilson", "tldr": "", "abstract": "Data augmentation (DA) encodes invariance and provides implicit regularization critical to a model's performance in image classification tasks. However, while DA improves average accuracy, recent studies have shown that its impact can be highly class dependent: achieving optimal average accuracy comes at the cost of significantly hurting individual class accuracy by as much as 20% on ImageNet. There has been little progress in resolving class-level accuracy drops due to a limited understanding of these effects. In this work, we present a framework for understanding how DA interacts with class-level learning dynamics. Using higher-quality multi-label annotations on ImageNet, we systematically categorize the affected classes and find that the majority are inherently ambiguous, co-occur, or involve fine-grained distinctions, while DA controls the model's bias towards one of the closely related classes. While many of the previously reported performance drops are explained by multi-label annotations, we identify other sources of accuracy degradations by analyzing class confusions. We show that simple class-conditional augmentation strategies informed by our framework improve performance on the negatively affected classes.", "keywords": "data augmentation;class-dependent bias", "primary_area": "", "supplementary_material": "/attachment/5f61f521113bf838b3c0237e2cb873db8477d883.pdf", "author": "Polina Kirichenko;Mark Ibrahim;Randall Balestriero;Diane Bouchacourt;Shanmukha Ramakrishna Vedantam;Hamed Firooz;Andrew Gordon Wilson", "authorids": "~Polina_Kirichenko1;~Mark_Ibrahim1;~Randall_Balestriero1;~Diane_Bouchacourt3;~Shanmukha_Ramakrishna_Vedantam1;~Hamed_Firooz1;~Andrew_Gordon_Wilson1", "gender": "F;;M;M;M;Not Specified;F", "homepage": "https://polkirichenko.github.io/;https://markibrahim.me/;https://randallbalestriero.github.io/;http://vrama91.github.io;;https://cims.nyu.edu/~andrewgw;https://dianebouchacourt.github.io/", "dblp": "239/8699;180/5660;175/5364;154/6748.html;;65/10453;176/1498", "google_scholar": "05uQHIgAAAAJ;AqYyoCMAAAAJ;S1x_xqcAAAAJ;v1CRzeAAAAAJ;4pKOL5gAAAAJ;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ;", "orcid": ";;;;;;", "linkedin": "polkirichenko/;;randallbalestriero/;;;;", "or_profile": "~Polina_Kirichenko1;~Mark_Ibrahim1;~Randall_Balestriero1;~Shanmukha_Ramakrishna_Vedantam1;~Hamed_Firooz1;~Andrew_Gordon_Wilson1;~Diane_Nicole_Bouchacourt1", "aff": "New York University;Facebook AI Research (FAIR) Meta;Meta Facebook;Meta Facebook;Meta Facebook;New York University;Meta AI Research", "aff_domain": "nyu.edu;ai.facebook.com;facebook.com;fb.com;facebook.com;nyu.edu;meta.com", "position": "PhD student;Researcher;Postdoc;Research Scientist;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nkirichenko2023understanding,\ntitle={Understanding the detrimental class-level effects of data augmentation},\nauthor={Polina Kirichenko and Mark Ibrahim and Randall Balestriero and Diane Bouchacourt and Shanmukha Ramakrishna Vedantam and Hamed Firooz and Andrew Gordon Wilson},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yageaKlk7S}\n}", "github": "", "project": "", "reviewers": "2Ged;wB8d;EjrR;nQVM", "pdf_size": 27151352, "rating": "4;6;7;7", "confidence": "5;4;3;4", "soundness": "3;3;4;2", "novelty": "3;3;3;2", "presentation": "3;3;4;3", "wc_summary": "51;128;98;98", "wc_strengths": "37;140;179;64", "wc_weaknesses": "71;20;63;224", "wc_questions": "15;117;44;265", "wc_limitations": "1;7;5;37", "wc_review": "175;412;389;688", "wc_reply_reviewers": "0;88;21;237", "wc_reply_authors": "0;319;0;27", "reply_reviewers": "0;2;1;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 93.75, 27.55335732719336 ], "wc_strengths_avg": [ 105.0, 57.02192560761167 ], "wc_weaknesses_avg": [ 94.5, 77.24150438721401 ], "wc_questions_avg": [ 110.25, 96.76614852312765 ], "wc_limitations_avg": [ 12.5, 14.309088021254185 ], "wc_review_avg": [ 416.0, 182.2155317199936 ], "wc_reply_reviewers_avg": [ 86.5, 92.76987657639737 ], "wc_reply_authors_avg": [ 86.5, 134.68574534819933 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8660254037844386, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7068166545599181318&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 7, "email": "nyu.edu;ai.facebook.com;facebook.com;fb.com;facebook.com;nyu.edu;meta.com", "author_num": 7, "aff_unique_index": "0;1;1;1;1;0;1", "aff_unique_norm": "New York University;Meta", "aff_unique_dep": ";Facebook AI Research", "aff_unique_url": "https://www.nyu.edu;https://www.meta.com", "aff_unique_abbr": "NYU;Meta AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "PAC-Bayesian Spectrally-Normalized Bounds for Adversarially Robust Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69944", "id": "ydKWoqWZ3t", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/720991812855c99df50bc8b36966cd81-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ydKWoqWZ3t", "openreview": "https://openreview.net/forum?id=ydKWoqWZ3t", "poster": "/media/PosterPDFs/NeurIPS%202023/69944.png?t=1701920042.6882195", "slides": "https://nips.cc/virtual/2023/poster/69944", "video": "https://nips.cc/virtual/2023/poster/69944", "author_site": "Jiancong Xiao, Ruoyu Sun, Zhi-Quan Luo", "tldr": "", "abstract": "Deep neural networks (DNNs) are vulnerable to adversarial attacks. It is found empirically that adversarially robust generalization is crucial in establishing defense algorithms against adversarial attacks. Therefore, it is interesting to study the theoretical guarantee of robust generalization. This paper focuses on norm-based complexity, based on a PAC-Bayes approach (Neyshabur et al., 2017). The main challenge lies in extending the key ingredient, which is a weight perturbation bound in standard settings, to the robust settings. Existing attempts heavily rely on additional strong assumptions, leading to loose bounds. In this paper, we address this issue and provide a spectrally-normalized robust generalization bound for DNNs. Compared to existing bounds, our bound offers two significant advantages: Firstly, it does not depend on additional assumptions. Secondly, it is considerably tighter, aligning with the bounds of standard generalization. Therefore, our result provides a different perspective on understanding robust generalization: The mismatch terms between standard and robust generalization bounds shown in previous studies do not contribute to the poor robust generalization. Instead, these disparities solely due to mathematical issues. Finally, we extend the main result to adversarial robustness against general non-$\\ell_p$ attacks and other neural network architectures.", "keywords": "Pac-Bayes;Adversarial Robustness;Generalization", "primary_area": "", "supplementary_material": "/attachment/f11d4fe9a12f1140d64afa212f41eb5d7000d4ee.pdf", "author": "Jiancong Xiao;Ruoyu Sun;Zhi-Quan Luo", "authorids": "~Jiancong_Xiao1;~Ruoyu_Sun1;~Zhi-Quan_Luo1", "gender": "M;;M", "homepage": "https://jiancongxiao.github.io;https://ruoyus.github.io/;", "dblp": "330/4306;30/9879-1;", "google_scholar": "_vGY3joAAAAJ;PsfzbCMAAAAJ;dW3gcXoAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jiancong_Xiao1;~Ruoyu_Sun1;~Zhi-Quan_Luo1", "aff": "The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nxiao2023pacbayesian,\ntitle={{PAC}-Bayesian Spectrally-Normalized Bounds for Adversarially Robust Generalization},\nauthor={Jiancong Xiao and Ruoyu Sun and Zhi-Quan Luo},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ydKWoqWZ3t}\n}", "github": "", "project": "", "reviewers": "Ccn3;nNum;AB19;FsEL", "pdf_size": 709650, "rating": "4;5;6;7", "confidence": "4;4;3;5", "soundness": "2;3;3;4", "novelty": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "11;19;39;51", "wc_strengths": "9;41;91;136", "wc_weaknesses": "207;74;348;203", "wc_questions": "2;5;26;19", "wc_limitations": "2;6;9;4", "wc_review": "231;145;513;413", "wc_reply_reviewers": "71;35;14;16", "wc_reply_authors": "74;15;15;15", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 30.0, 15.84297951775486 ], "wc_strengths_avg": [ 69.25, 48.3651475755011 ], "wc_weaknesses_avg": [ 208.0, 96.93038739218987 ], "wc_questions_avg": [ 13.0, 9.874208829065749 ], "wc_limitations_avg": [ 5.25, 2.5860201081971503 ], "wc_review_avg": [ 325.5, 145.19211411092547 ], "wc_reply_reviewers_avg": [ 34.0, 22.880122377295102 ], "wc_reply_authors_avg": [ 29.75, 25.54774941164094 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3162277660168379, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17686768824861072379&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.cn", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Shenzhen;Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "A generative model of the hippocampal formation trained with theta driven local learning rules", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69943", "id": "yft4JlxsRf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/05ab457c7b769f01c2973e2a5ab66ad9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yft4JlxsRf", "openreview": "https://openreview.net/forum?id=yft4JlxsRf", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69943", "video": "https://nips.cc/virtual/2023/poster/69943", "author_site": "Tom M George, Kimberly Stachenfeld, Caswell Barry, Claudia Clopath, Tomoki Fukai", "tldr": "", "abstract": "Advances in generative models have recently revolutionised machine learning. Meanwhile, in neuroscience, generative models have long been thought fundamental to animal intelligence. Understanding the biological mechanisms that support these processes promises to shed light on the relationship between biological and artificial intelligence. In animals, the hippocampal formation is thought to learn and use a generative model to support its role in spatial and non-spatial memory. Here we introduce a biologically plausible model of the hippocampal formation tantamount to a Helmholtz machine that we apply to a temporal stream of inputs. A novel component of our model is that fast theta-band oscillations (5-10 Hz) gate the direction of information flow throughout the network, training it akin to a high-frequency wake-sleep algorithm. Our model accurately infers the latent state of high-dimensional sensory environments and generates realistic sensory predictions. Furthermore, it can learn to path integrate by developing a ring attractor connectivity structure matching previous theoretical proposals and flexibly transfer this structure between environments. Whereas many models trade-off biological plausibility with generality, our model captures a variety of hippocampal cognitive functions under one biologically plausible local learning rule.", "keywords": "hippocampus;path integration;local learning;generative models;oscillations;inference;Helmholtz machine;wake-sleep", "primary_area": "", "supplementary_material": "/attachment/ca4f963443a48a534cd818f6ab20396178c6c9cb.pdf", "author": "Tom George;Kim Stachenfeld;Caswell Barry;Claudia Clopath;Tomoki Fukai", "authorids": "~Tom_George1;~Kim_Stachenfeld1;~Caswell_Barry1;~Claudia_Clopath1;~Tomoki_Fukai1", "gender": ";F;;;M", "homepage": "https://github.com/TomGeorge1234;https://neurokim.com/;;;https://groups.oist.jp/ncbc", "dblp": ";155/1888;220/3769;37/6388;", "google_scholar": ";jNtH2WUAAAAJ;;;https://scholar.google.co.jp/citations?user=iO7jHc4AAAAJ", "orcid": "0000-0002-4527-8810;;;;my-orcid?orcid=0000-0001-6977-5638", "linkedin": ";;;;", "or_profile": "~Tom_George1;~Kim_Stachenfeld1;~Caswell_Barry1;~Claudia_Clopath1;~Tomoki_Fukai1", "aff": "University College London, University of London;Google DeepMind;University College London;Imperial College London;Okinawa Institute of Science and Technology (OIST)", "aff_domain": "ucl.ac.uk;deepmind.com;ucl.ac.uk;ic.ac.uk;oist.jp", "position": "PhD student;Research Scientist;Principal Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\ngeorge2023a,\ntitle={A generative model of the hippocampal formation trained with theta driven local learning rules},\nauthor={Tom George and Kim Stachenfeld and Caswell Barry and Claudia Clopath and Tomoki Fukai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yft4JlxsRf}\n}", "github": "", "project": "", "reviewers": "iw5X;PiE7;Lhyx;UjUg", "pdf_size": 4783388, "rating": "4;6;7;7", "confidence": "1;4;4;3", "soundness": "3;3;3;3", "novelty": "1;3;3;2", "presentation": "4;3;4;4", "wc_summary": "45;64;174;334", "wc_strengths": "23;120;159;59", "wc_weaknesses": "165;503;313;795", "wc_questions": "37;60;262;914", "wc_limitations": "1;22;39;48", "wc_review": "271;769;947;2150", "wc_reply_reviewers": "595;73;21;211", "wc_reply_authors": "1062;158;9;2096", "reply_reviewers": "2;1;1;2", "reply_authors": "3;3;2;6", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 154.25, 114.87030730349771 ], "wc_strengths_avg": [ 90.25, 52.70377121231459 ], "wc_weaknesses_avg": [ 444.0, 235.41665191740367 ], "wc_questions_avg": [ 318.25, 354.92138213976347 ], "wc_limitations_avg": [ 27.5, 17.92344832893492 ], "wc_review_avg": [ 1034.25, 690.1845314841531 ], "wc_reply_reviewers_avg": [ 225.0, 224.6196785680186 ], "wc_reply_authors_avg": [ 831.25, 833.9962155189914 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8333333333333331, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4322003014768188061&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ucl.ac.uk;deepmind.com;ucl.ac.uk;ic.ac.uk;oist.jp", "author_num": 5, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "University College London;Google;Imperial College London;Okinawa Institute of Science and Technology", "aff_unique_dep": ";Google DeepMind;;", "aff_unique_url": "https://www.ucl.ac.uk;https://deepmind.com;https://www.imperial.ac.uk;https://www.oist.jp", "aff_unique_abbr": "UCL;DeepMind;ICL;OIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United Kingdom;Japan" }, { "title": "Symmetry-Informed Geometric Representation for Molecules, Proteins, and Crystalline Materials", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73410", "id": "ygXSNrIU1p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d07379f3acf3af51dfc8598862cadfa0-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=ygXSNrIU1p", "openreview": "https://openreview.net/forum?id=ygXSNrIU1p", "poster": "/media/PosterPDFs/NeurIPS%202023/73410.png?t=1702183756.5651653", "slides": "https://nips.cc/virtual/2023/poster/73410", "video": "https://nips.cc/virtual/2023/poster/73410", "author_site": "Shengchao Liu, weitao Du, Yanjing Li, Zhuoxinran Li, Zhiling Zheng, Chenru Duan, Zhi-Ming Ma, Omar Yaghi, Animashree Anandkumar, Christian Borgs, Jennifer Chayes, Hongyu Guo, Jian Tang", "tldr": "", "abstract": "Artificial intelligence for scientific discovery has recently generated significant interest within the machine learning and scientific communities, particularly in the domains of chemistry, biology, and material discovery. For these scientific problems, molecules serve as the fundamental building blocks, and machine learning has emerged as a highly effective and powerful tool for modeling their geometric structures. Nevertheless, due to the rapidly evolving process of the field and the knowledge gap between science ({\\eg}, physics, chemistry, \\& biology) and machine learning communities, a benchmarking study on geometrical representation for such data has not been conducted. To address such an issue, in this paper, we first provide a unified view of the current symmetry-informed geometric methods, classifying them into three main categories: invariance, equivariance with spherical frame basis, and equivariance with vector frame basis. Then we propose a platform, coined Geom3D, which enables benchmarking the effectiveness of geometric strategies. Geom3D contains 16 advanced symmetry-informed geometric representation models and 14 geometric pretraining methods over 52 diverse tasks, including small molecules, proteins, and crystalline materials. We hope that Geom3D can, on the one hand, eliminate barriers for machine learning researchers interested in exploring scientific problems; and, on the other hand, provide valuable guidance for researchers in computational chemistry, structural biology, and materials science, aiding in the informed selection of representation techniques for specific applications. The source code is available on \\href{https://github.com/chao1224/Geom3D}{the GitHub repository}.", "keywords": "physics;geometric representation;equivariant;group theory;pretraining;molecule;chemistry;protein;biology;crystal;crystalline material", "primary_area": "", "supplementary_material": "/attachment/8bfd9c66fecde6759b55f64fc7719e965ff66fe0.pdf", "author": "Shengchao Liu;weitao Du;Yanjing Li;Zhuoxinran Li;Zhiling Zheng;Chenru Duan;Zhi-Ming Ma;Omar M. Yaghi;Anima Anandkumar;Christian Borgs;Jennifer T Chayes;Hongyu Guo;Jian Tang", "authorids": "~Shengchao_Liu1;~weitao_Du1;~Yanjing_Li3;~Zhuoxinran_Li2;~Zhiling_Zheng1;~Chenru_Duan1;~Zhi-Ming_Ma1;~Omar_M._Yaghi2;~Anima_Anandkumar1;~Christian_Borgs2;~Jennifer_T_Chayes1;~Hongyu_Guo1;~Jian_Tang1", "gender": "M;M;M;F;M;M;;M;M;F;M;;F", "homepage": "https://chao1224.github.io/;;https://www.linkedin.com/in/yanjing-li-6002a4220/;https://zoezxrli.github.io/;https://orcid.org/0000-0001-6090-2258;https://www.deepprinciple.com;http://homepage.amss.ac.cn/research/homePage/8eb59241e2e74d828fb84eec0efadba5/myHomePage.html;https://chemistry.berkeley.edu/faculty/chem/yaghi;http://christianborgs.com;http://jenniferchayes.com/;https://hongyuharryguo.github.io/;http://www.jian-tang.com;http://tensorlab.cms.caltech.edu/users/anima/", "dblp": ";17/10015;;;;;;;b/ChristianBorgs;;;181/2667-5;", "google_scholar": "F1ws3XUAAAAJ;;;;Mz2bKMMAAAAJ;canPgVoAAAAJ;;;;YAHWbtkAAAAJ;https://scholar.google.ca/citations?user=bZUqlakAAAAJ;https://scholar.google.ca/citations?user=1ir6WUEAAAAJ;bEcLezcAAAAJ", "orcid": "0000-0003-2030-2367;;;;0000-0001-6090-2258;0000-0003-2592-4237;;;;;;;", "linkedin": ";;;;;chenru-duan-8882a010b/;;;;;harry-h-y-guo-a582087/;;anima-anandkumar-35171b1/", "or_profile": "~Shengchao_Liu1;~weitao_Du1;~Yanjing_Li3;~Zhuoxinran_Li2;~Zhiling_Zheng1;~Chenru_Duan1;~Zhi-Ming_Ma1;~Omar_M._Yaghi2;~Christian_Borgs2;~Jennifer_T_Chayes1;~Hongyu_Guo1;~Jian_Tang1;~anima_anandkumar1", "aff": "MILA-UdeM;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;Carnegie Mellon University;University of Toronto;University of California, Berkeley;Microsoft;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;University of California, Berkeley;Electrical Engineering & Computer Science Department, University of California, Berkeley;University of California, Berkeley;National Research Council Canada;Mila, HEC Montreal;California Institute of Technology", "aff_domain": "mila.quebec;amss.ac.cn;andrew.cmu.edu;utoronto.ca;berkeley.edu;microsoft.com;amss.ac.cn;berkeley.edu;eecs.berkeley.edu;berkeley.edu;nrc-cnrc.gc.ca;hec.ca;caltech.edu", "position": "PhD student;Postdoc;MS student;Undergrad student;PhD student;Researcher;Full Professor;Full Professor;Full Professor;Full Professor;Senior Research Officer;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023symmetryinformed,\ntitle={Symmetry-Informed Geometric Representation for Molecules, Proteins, and Crystalline Materials},\nauthor={Shengchao Liu and weitao Du and Yanjing Li and Zhuoxinran Li and Zhiling Zheng and Chenru Duan and Zhi-Ming Ma and Omar M. Yaghi and Anima Anandkumar and Christian Borgs and Jennifer T Chayes and Hongyu Guo and Jian Tang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=ygXSNrIU1p}\n}", "github": "", "project": "", "reviewers": "1hwb;pq7Y;8p61;XV1e;m8T2", "pdf_size": 5680328, "rating": "5;6;6;7;8", "confidence": "2;3;4;5;5", "wc_summary_and_contributions": "76;66;64;35;191", "wc_strengths": "49;60;56;56;114", "wc_improvement": "172;213;113;46;72", "wc_limitations": "28;8;2;45;21", "wc_correctness": "4;8;1;77;1", "wc_clarity": "26;68;1;6;1", "wc_relation_to_prior_work": "3;56;1;1;1", "wc_documentation": "78;68;1;3;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "437;548;240;270;403", "wc_reply_reviewers": "143;0;0;116;11", "wc_reply_authors": "1042;1314;413;1414;341", "reply_reviewers": "1;0;0;1;1", "reply_authors": "3;2;1;4;2", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 3.8, 1.16619037896906 ], "wc_summary_and_contributions_avg": [ 86.4, 54.05404702702657 ], "wc_strengths_avg": [ 67.0, 23.765521244020714 ], "wc_improvement_avg": [ 123.2, 61.855961717525666 ], "wc_limitations_avg": [ 20.8, 15.197368193210298 ], "wc_correctness_avg": [ 18.2, 29.512031444819243 ], "wc_clarity_avg": [ 20.4, 25.523322667709238 ], "wc_relation_to_prior_work_avg": [ 12.4, 21.81375712709757 ], "wc_documentation_avg": [ 30.2, 35.09643856575764 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 379.6, 112.87267162604064 ], "wc_reply_reviewers_avg": [ 54.0, 62.363450834603434 ], "wc_reply_authors_avg": [ 904.8, 448.39509363952675 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.9081082718950221, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6514439393595401362&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "mila.quebec;amss.ac.cn;andrew.cmu.edu;utoronto.ca;berkeley.edu;microsoft.com;amss.ac.cn;berkeley.edu;eecs.berkeley.edu;berkeley.edu;nrc-cnrc.gc.ca;hec.ca;caltech.edu", "author_num": 13, "aff_unique_index": "0;1;2;3;4;5;1;4;4;4;6;7;8", "aff_unique_norm": "Mila;Chinese Academy of Sciences;Carnegie Mellon University;University of Toronto;University of California, Berkeley;Microsoft;National Research Council Canada;HEC Montreal;California Institute of Technology", "aff_unique_dep": "Montreal Institute for Learning Algorithms;Academy of Mathematics and Systems Science;;;;Microsoft Corporation;;HEC Business School;", "aff_unique_url": "https://mila.quebec;http://www.cas.cn;https://www.cmu.edu;https://www.utoronto.ca;https://www.berkeley.edu;https://www.microsoft.com;https://www.nrc-cnrc.gc.ca;https://www.hec.ca;https://www.caltech.edu", "aff_unique_abbr": "MILA;CAS;CMU;U of T;UC Berkeley;Microsoft;NRC-CNRC;HEC;Caltech", "aff_campus_unique_index": "1;1;1;1;2;3", "aff_campus_unique": ";Berkeley;Montreal;Pasadena", "aff_country_unique_index": "0;1;2;0;2;2;1;2;2;2;0;0;2", "aff_country_unique": "Canada;China;United States" }, { "title": "Uncertainty Quantification over Graph with Conformalized Graph Neural Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69942", "id": "ygjQCOyNfh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/54a1495b06c4ee2f07184afb9a37abda-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ygjQCOyNfh", "openreview": "https://openreview.net/forum?id=ygjQCOyNfh", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69942", "video": "https://nips.cc/virtual/2023/poster/69942", "author_site": "Kexin Huang, Ying Jin, Emmanuel Candes, Jure Leskovec", "tldr": "", "abstract": "Graph Neural Networks (GNNs) are powerful machine learning prediction models on graph-structured data. However, GNNs lack rigorous uncertainty estimates, limiting their reliable deployment in settings where the cost of errors is significant. We propose conformalized GNN (CF-GNN), extending conformal prediction (CP) to graph-based models for guaranteed uncertainty estimates. Given an entity in the graph, CF-GNN produces a prediction set/interval that provably contains the true label with pre-defined coverage probability (e.g. 90%). We establish a permutation invariance condition that enables the validity of CP on graph data and provide an exact characterization of the test-time coverage. Moreover, besides valid coverage, it is crucial to reduce the prediction set size/interval length for practical use. We observe a key connection between non-conformity scores and network structures, which motivates us to develop a topology-aware output correction model that learns to update the prediction and produces more efficient prediction sets/intervals. Extensive experiments show that CF-GNN achieves any pre-defined target marginal coverage while significantly reducing the prediction set/interval size by up to 74% over the baselines. It also empirically achieves satisfactory conditional coverage over various raw and network features.", "keywords": "Graph Neural Networks;Conformal Prediction;Uncertainty Quantification", "primary_area": "", "supplementary_material": "/attachment/f3b6a5afae552948fbda9ca4506a1f5ab2d45124.zip", "author": "Kexin Huang;Ying Jin;Emmanuel Candes;Jure Leskovec", "authorids": "~Kexin_Huang1;~Ying_Jin4;~Emmanuel_Candes1;~Jure_Leskovec1", "gender": "M;F;;", "homepage": "https://www.kexinhuang.com/;https://ying531.github.io/;http://statweb.stanford.edu/~candes/;http://cs.stanford.edu/~jure/", "dblp": ";https://dblp.org/rec/conf/icml/JinWL20;;l/JureLeskovec", "google_scholar": "ogEXTOgAAAAJ;lT5KFUkAAAAJ;nRQi4O8AAAAJ;Q_kKkIUAAAAJ", "orcid": ";;;0000-0002-5411-923X", "linkedin": ";;;leskovec/", "or_profile": "~Kexin_Huang1;~Ying_Jin4;~Emmanuel_Candes1;~Jure_Leskovec1", "aff": "Stanford University;Stanford University;Stanford University;Kumo.AI", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;kumo.ai", "position": "PhD student;PhD student;Full Professor;Chief Scientist", "bibtex": "@inproceedings{\nhuang2023uncertainty,\ntitle={Uncertainty Quantification over Graph with Conformalized Graph Neural Networks},\nauthor={Kexin Huang and Ying Jin and Emmanuel Candes and Jure Leskovec},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ygjQCOyNfh}\n}", "github": "", "project": "", "reviewers": "Wep3;rkWb;y5h3;XvAj;M4K7", "pdf_size": 1292140, "rating": "6;7;7;7;8", "confidence": "3;3;2;3;5", "soundness": "3;3;3;3;4", "novelty": "3;3;3;3;4", "presentation": "3;3;3;4;4", "wc_summary": "231;53;93;69;57", "wc_strengths": "35;63;56;86;81", "wc_weaknesses": "132;30;2;143;168", "wc_questions": "77;56;85;6;119", "wc_limitations": "11;21;8;5;63", "wc_review": "486;223;244;309;488", "wc_reply_reviewers": "0;35;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 0.6324555320336759 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 100.6, 66.67413291524682 ], "wc_strengths_avg": [ 64.2, 18.323755073674175 ], "wc_weaknesses_avg": [ 95.0, 66.14529461722881 ], "wc_questions_avg": [ 68.6, 37.30201066966766 ], "wc_limitations_avg": [ 21.6, 21.38784701647176 ], "wc_review_avg": [ 350.0, 115.40017331009517 ], "wc_reply_reviewers_avg": [ 7.0, 14.0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6454972243679028, "gs_citation": 87, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2131833819508442175&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "stanford.edu;stanford.edu;stanford.edu;kumo.ai", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Stanford University;Kumo.AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.kumo.ai", "aff_unique_abbr": "Stanford;Kumo.AI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "FiGURe: Simple and Efficient Unsupervised Node Representations with Filter Augmentations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69941", "id": "yh0OkiUk5h", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6f479ea488e0908ac8b1b37b27fd134c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yh0OkiUk5h", "openreview": "https://openreview.net/forum?id=yh0OkiUk5h", "poster": "/media/PosterPDFs/NeurIPS%202023/69941.png?t=1699536470.3187861", "slides": "https://nips.cc/virtual/2023/poster/69941", "video": "https://nips.cc/virtual/2023/poster/69941", "author_site": "Chanakya Ekbote, Ajinkya Deshpande, Arun Iyer, SUNDARARAJAN SELLAMANICKAM, Ramakrishna Bairi", "tldr": "", "abstract": "Unsupervised node representations learnt using contrastive learning-based methods have shown good performance on downstream tasks. However, these methods rely on augmentations that mimic low-pass filters, limiting their performance on tasks requiring different eigen-spectrum parts. This paper presents a simple filter-based augmentation method to capture different parts of the eigen-spectrum. We show significant improvements using these augmentations. Further, we show that sharing the same weights across these different filter augmentations is possible, reducing the computational load. In addition, previous works have shown that good performance on downstream tasks requires high dimensional representations. Working with high dimensions increases the computations, especially when multiple augmentations are involved. We mitigate this problem and recover good performance through lower dimensional embeddings using simple random Fourier feature projections. Our method, FiGURe, achieves an average gain of up to 4.4\\%, compared to the state-of-the-art unsupervised models, across all datasets in consideration, both homophilic and heterophilic. Our code can be found at: https://github.com/Microsoft/figure.", "keywords": "Graph Neural Networks;Unsupervised Representation Learning;Graph Filters", "primary_area": "", "supplementary_material": "/attachment/e499e1121fffc68fa80a648d77e891bf637c16a7.pdf", "author": "Chanakya Ekbote;Ajinkya Deshpande;Arun Iyer;SUNDARARAJAN SELLAMANICKAM;Ramakrishna B Bairi", "authorids": "~Chanakya_Ekbote1;ajinkya.deshpande56@gmail.com;~Arun_Iyer1;~SUNDARARAJAN_SELLAMANICKAM2;~Ramakrishna_B_Bairi1", "gender": ";;M;;", "homepage": ";;;;https://www.microsoft.com/en-us/research/people/rbairi/", "dblp": ";;262/6555;;", "google_scholar": ";;https://scholar.google.co.in/citations?user=Ngm0j_EAAAAJ;https://scholar.google.co.in/citations?user=JOk66doAAAAJ;", "orcid": ";;0000-0001-7377-7599;;", "linkedin": ";;iyerarunshankar/;;", "or_profile": "~Chanakya_Ekbote1;ajinkya.deshpande56@gmail.com;~Arun_Iyer1;~SUNDARARAJAN_SELLAMANICKAM2;~Ramakrishna_B_Bairi1", "aff": ";;Microsoft;Microsoft;Microsoft", "aff_domain": ";;microsoft.com;microsoft.com;microsoft.com", "position": ";;Principal Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nekbote2023figure,\ntitle={Fi{GUR}e: Simple and Efficient Unsupervised Node Representations with Filter Augmentations},\nauthor={Chanakya Ekbote and Ajinkya Deshpande and Arun Iyer and SUNDARARAJAN SELLAMANICKAM and Ramakrishna B Bairi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yh0OkiUk5h}\n}", "github": "", "project": "", "reviewers": "wrwF;xu4A;Ck44;qpib", "pdf_size": 1834980, "rating": "3;5;6;6", "confidence": "3;3;3;3", "soundness": "2;3;3;4", "novelty": "2;3;2;3", "presentation": "1;3;2;4", "wc_summary": "99;131;58;99", "wc_strengths": "47;93;16;169", "wc_weaknesses": "326;190;383;245", "wc_questions": "84;200;249;184", "wc_limitations": "1;1;1;1", "wc_review": "557;615;707;698", "wc_reply_reviewers": "0;0;439;0", "wc_reply_authors": "161;0;1937;0", "reply_reviewers": "0;0;3;0", "reply_authors": "2;1;5;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 96.75, 25.907286619790966 ], "wc_strengths_avg": [ 81.25, 57.59503016754137 ], "wc_weaknesses_avg": [ 286.0, 74.00337830126406 ], "wc_questions_avg": [ 179.25, 59.980726070963826 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 644.25, 61.83597254026171 ], "wc_reply_reviewers_avg": [ 109.75, 190.09257613068428 ], "wc_reply_authors_avg": [ 524.5, 818.1517279820413 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3545613360728507480&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";;microsoft.com;microsoft.com;microsoft.com", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Visual Programming for Step-by-Step Text-to-Image Generation and Evaluation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69940", "id": "yhBFG9Y85R", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/13250eb13871b3c2c0a0667b54bad165-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yhBFG9Y85R", "openreview": "https://openreview.net/forum?id=yhBFG9Y85R", "poster": "/media/PosterPDFs/NeurIPS%202023/69940.png?t=1699541920.6005516", "slides": "https://nips.cc/virtual/2023/poster/69940", "video": "https://nips.cc/virtual/2023/poster/69940", "author_site": "Jaemin Cho, Abhay Zala, Mohit Bansal", "tldr": "", "abstract": "As large language models have demonstrated impressive performance in many domains, recent works have adopted language models (LMs) as controllers of visual modules for vision-and-language tasks. While existing work focuses on equipping LMs with visual understanding, we propose two novel interpretable/explainable visual programming frameworks for text-to-image (T2I) generation and evaluation. First, we introduce VPGen, an interpretable step-by-step T2I generation framework that decomposes T2I generation into three steps: object/count generation, layout generation, and image generation. We employ an LM to handle the first two steps (object/count generation and layout generation), by finetuning it on text-layout pairs. Our step-by-step T2I generation framework provides stronger spatial control than end-to-end models, the dominant approach for this task. Furthermore, we leverage the world knowledge of pretrained LMs, overcoming the limitation of previous layout-guided T2I works that can only handle predefined object classes. We demonstrate that our VPGen has improved control in counts/spatial relations/scales of objects than state-of-the-art T2I generation models. Second, we introduce VPEval, an interpretable and explainable evaluation framework for T2I generation based on visual programming. Unlike previous T2I evaluations with a single scoring model that is accurate in some skills but unreliable in others, VPEval produces evaluation programs that invoke a set of visual modules that are experts in different skills, and also provides visual+textual explanations of the evaluation results. Our analysis shows that VPEval provides a more human-correlated evaluation for skill-specific and open-ended prompts than widely used single model-based evaluation. We hope that our work encourages future progress on interpretable/explainable generation and evaluation for T2I models.", "keywords": "text-to-image generation; visual programming; text-to-image evaluation; step-by-step generation; interpretability; explainability", "primary_area": "", "supplementary_material": "", "author": "Jaemin Cho;Abhay Zala;Mohit Bansal", "authorids": "~Jaemin_Cho1;~Abhay_Zala1;~Mohit_Bansal2", "gender": "M;;M", "homepage": "https://j-min.io;;https://www.cs.unc.edu/~mbansal/", "dblp": "130/8348-1;278/2061.html;32/5243.html", "google_scholar": "IbQZoHQAAAAJ;8mfWxD8AAAAJ;DN8QtscAAAAJ", "orcid": "0000-0002-1558-6169;;", "linkedin": ";;", "or_profile": "~Jaemin_Cho1;~Abhay_Zala1;~Mohit_Bansal2", "aff": "University of North Carolina, Chapel Hill;Department of Computer Science, University of North Carolina at Chapel Hill;University of North Carolina at Chapel Hill", "aff_domain": "unc.edu;cs.unc.edu;unc.edu", "position": "PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\ncho2023visual,\ntitle={Visual Programming for Step-by-Step Text-to-Image Generation and Evaluation},\nauthor={Jaemin Cho and Abhay Zala and Mohit Bansal},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yhBFG9Y85R}\n}", "github": "", "project": "", "reviewers": "1fZw;bJid;aEEd;PtVL;GZ2e", "pdf_size": 20734860, "rating": "5;6;6;7;8", "confidence": "4;3;4;3;4", "soundness": "3;3;3;3;4", "novelty": "3;3;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "75;66;297;86;144", "wc_strengths": "84;49;195;77;201", "wc_weaknesses": "130;130;492;50;33", "wc_questions": "1;33;213;18;29", "wc_limitations": "1;6;7;27;1", "wc_review": "291;284;1204;258;408", "wc_reply_reviewers": "0;75;158;29;0", "wc_reply_authors": "0;33;30;51;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;2;2;2;1", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 133.6, 86.11294908432761 ], "wc_strengths_avg": [ 121.2, 63.81974616057322 ], "wc_weaknesses_avg": [ 167.0, 167.33678615295563 ], "wc_questions_avg": [ 58.8, 77.89326029895012 ], "wc_limitations_avg": [ 8.4, 9.624967532412773 ], "wc_review_avg": [ 489.0, 361.2135102678193 ], "wc_reply_reviewers_avg": [ 52.4, 59.49991596632721 ], "wc_reply_authors_avg": [ 22.8, 19.95394697797907 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.08006407690254366, "gs_citation": 77, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1955057305586846439&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "unc.edu;cs.unc.edu;unc.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of North Carolina;University of North Carolina at Chapel Hill", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.unc.edu;https://www.unc.edu", "aff_unique_abbr": "UNC;UNC Chapel Hill", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Finite-Time Analysis of Whittle Index based Q-Learning for Restless Multi-Armed Bandits with Neural Network Function Approximation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69939", "id": "yhNHpLWJDl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5c7c66dfc9f93f0c738947f3b1c13832-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yhNHpLWJDl", "openreview": "https://openreview.net/forum?id=yhNHpLWJDl", "poster": "/media/PosterPDFs/NeurIPS%202023/69939.png?t=1701441547.4813445", "slides": "https://nips.cc/virtual/2023/poster/69939", "video": "https://nips.cc/virtual/2023/poster/69939", "author_site": "GUOJUN XIONG, Jian Li", "tldr": "", "abstract": "Whittle index policy is a heuristic to the intractable restless multi-armed bandits (RMAB) problem. Although it is provably asymptotically optimal, finding Whittle indices remains difficult. In this paper, we present Neural-Q-Whittle, a Whittle index based Q-learning algorithm for RMAB with neural network function approximation, which is an example of nonlinear two-timescale stochastic approximation with Q-function values updated on a faster timescale and Whittle indices on a slower timescale. Despite the empirical success of deep Q-learning, the non-asymptotic convergence rate of Neural-Q-Whittle, which couples neural networks with two-timescale Q-learning largely remains unclear. This paper provides a finite-time analysis of Neural-Q-Whittle, where data are generated from a Markov chain, and Q-function is approximated by a ReLU neural network. Our analysis leverages a Lyapunov drift approach to capture the evolution of two coupled parameters, and the nonlinearity in value function approximation further requires us to characterize the approximation error. Combing these provide Neural-Q-Whittle with $\\mathcal{O}(1/k^{2/3})$ convergence rate, where $k$ is the number of iterations.", "keywords": "Restless bandits;Whittle index policy;Q-learning;Two-timescale stochastic approximation;Neural network function approximation", "primary_area": "", "supplementary_material": "/attachment/3645409d9e1e5a075df5ad4007a7c4f50f282a8f.zip", "author": "GUOJUN XIONG;Jian Li", "authorids": "~GUOJUN_XIONG1;~Jian_Li14", "gender": ";M", "homepage": "https://xionggj001.github.io/;https://sites.google.com/stonybrook.edu/jianli", "dblp": "214/2134.html;33/5448-8", "google_scholar": "FIBwLnoAAAAJ;h039Yq4AAAAJ", "orcid": ";", "linkedin": "guojun-%E5%9B%BD%E9%92%A7-xiong-48696aa6/;", "or_profile": "~GUOJUN_XIONG1;~Jian_Li14", "aff": "State University of New York at Stony Brook;State University of New York, Binghamton", "aff_domain": "stonybrook.edu;binghamton.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nxiong2023finitetime,\ntitle={Finite-Time Analysis of Whittle Index based Q-Learning for Restless Multi-Armed Bandits with Neural Network Function Approximation},\nauthor={GUOJUN XIONG and Jian Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yhNHpLWJDl}\n}", "github": "", "project": "", "reviewers": "ZyEQ;rZoR;bjcG;PM6q;KAmk", "pdf_size": 804737, "rating": "6;6;6;7;7", "confidence": "4;3;3;4;3", "soundness": "3;3;3;4;4", "novelty": "3;2;3;3;3", "presentation": "3;3;3;2;2", "wc_summary": "147;39;79;38;157", "wc_strengths": "107;24;93;60;54", "wc_weaknesses": "201;48;47;36;132", "wc_questions": "317;21;6;22;104", "wc_limitations": "87;8;1;16;21", "wc_review": "859;140;226;172;468", "wc_reply_reviewers": "33;17;9;18;31", "wc_reply_authors": "14;14;15;15;15", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 92.0, 51.27182462132589 ], "wc_strengths_avg": [ 67.6, 29.4659125092029 ], "wc_weaknesses_avg": [ 92.8, 64.14795398140146 ], "wc_questions_avg": [ 94.0, 116.69275898700828 ], "wc_limitations_avg": [ 26.6, 30.961912085657755 ], "wc_review_avg": [ 373.0, 268.89403117213294 ], "wc_reply_reviewers_avg": [ 21.6, 9.068627239003707 ], "wc_reply_authors_avg": [ 14.6, 0.48989794855663565 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.16666666666666669, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5936872381624211097&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "stonybrook.edu;binghamton.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "State University of New York at Stony Brook;State University of New York at Binghamton", "aff_unique_dep": ";", "aff_unique_url": "https://www.stonybrook.edu;https://www.binghamton.edu", "aff_unique_abbr": "SUNY Stony Brook;SUNY Binghamton", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Stony Brook;Binghamton", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "E2PNet: Event to Point Cloud Registration with Spatio-Temporal Representation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69938", "id": "yiehppUCO2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3a2d1bf9bc0a9794cf82c1341a7a75e6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yiehppUCO2", "openreview": "https://openreview.net/forum?id=yiehppUCO2", "poster": "/media/PosterPDFs/NeurIPS%202023/69938.png?t=1701158006.8746014", "slides": "https://nips.cc/virtual/2023/poster/69938", "video": "https://nips.cc/virtual/2023/poster/69938", "author_site": "Xiuhong Lin, Changjie Qiu, zhipeng cai, Siqi Shen, Yu Zang, Weiquan Liu, Xuesheng Bian, Matthias M\u00fcller, Cheng Wang", "tldr": "", "abstract": "Event cameras have emerged as a promising vision sensor in recent years due to their unparalleled temporal resolution and dynamic range. While registration of 2D RGB images to 3D point clouds is a long-standing problem in computer vision, no prior work studies 2D-3D registration for event cameras. To this end, we propose E2PNet, the first learning-based method for event-to-point cloud registration.\nThe core of E2PNet is a novel feature representation network called Event-Points-to-Tensor (EP2T), which encodes event data into a 2D grid-shaped feature tensor. This grid-shaped feature enables matured RGB-based frameworks to be easily used for event-to-point cloud registration, without changing hyper-parameters and the training procedure. EP2T treats the event input as spatio-temporal point clouds. Unlike standard 3D learning architectures that treat all dimensions of point clouds equally, the novel sampling and information aggregation modules in EP2T are designed to handle the inhomogeneity of the spatial and temporal dimensions. Experiments on the MVSEC and VECtor datasets demonstrate the superiority of E2PNet over hand-crafted and other learning-based methods. Compared to RGB-based registration, E2PNet is more robust to extreme illumination or fast motion due to the use of event data. Beyond 2D-3D registration, we also show the potential of EP2T for other vision tasks such as flow estimation, event-to-image reconstruction and object recognition. The source code can be found at: https://github.com/Xmu-qcj/E2PNet.", "keywords": "event camera;2D-3D registration;representation learning", "primary_area": "", "supplementary_material": "/attachment/6720be1a4627edb330b2231c7b2fe4fbd8cfbace.pdf", "author": "Xiuhong Lin;Changjie Qiu;zhipeng cai;Siqi Shen;Yu Zang;Weiquan Liu;Xuesheng Bian;Matthias M\u00fcller;Cheng Wang", "authorids": "~Xiuhong_Lin1;~Changjie_Qiu1;~zhipeng_cai3;~Siqi_Shen5;~Yu_Zang2;~Weiquan_Liu1;~Xuesheng_Bian1;~Matthias_M\u00fcller1;~Cheng_Wang2", "gender": ";;M;;M;M;F;;M", "homepage": ";;https://zhipengcai.github.io;;https://asc.xmu.edu.cn/t/zangyu;https://cec.jmu.edu.cn/info/1009/6440.htm;;https://matthias.pw;https://chwang.xmu.edu.cn/index_en.htm", "dblp": ";;;;;03/1188;253/5024;169/4686-1;54/2062-3", "google_scholar": ";;;;C6yF-0gAAAAJ;vNDNtP8AAAAJ;luhqZbUAAAAJ;AeMLOMEAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0002-7193-1414;0000-0002-5934-1139;;;0000-0001-6075-796X", "linkedin": ";;;;;;;;", "or_profile": "~Xiuhong_Lin1;~Changjie_Qiu1;~zhipeng_cai3;~Siqi_Shen5;~Yu_Zang2;~Weiquan_Liu1;~Xuesheng_Bian1;~Matthias_M\u00fcller1;~Cheng_Wang2", "aff": ";;Intel;;Xiamen University;Xiamen University;;Intel;Xiamen University", "aff_domain": ";;intel.com;;xmu.edu.cn;xmu.edu.cn;;intel.com;xmu.edu.cn", "position": ";;Researcher;;Associate Professor;Postdoc;;Researcher;Full Professor", "bibtex": "@inproceedings{\nlin2023epnet,\ntitle={E2{PN}et: Event to Point Cloud Registration with Spatio-Temporal Representation Learning},\nauthor={Xiuhong Lin and Changjie Qiu and zhipeng cai and Siqi Shen and Yu Zang and Weiquan Liu and Xuesheng Bian and Matthias M{\\\"u}ller and Cheng Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yiehppUCO2}\n}", "github": "", "project": "", "reviewers": "N3N6;mNvD;W8Di;mSRE", "pdf_size": 3937847, "rating": "5;5;6;7", "confidence": "4;5;4;3", "soundness": "2;2;4;2", "novelty": "2;2;4;3", "presentation": "2;3;3;1", "wc_summary": "47;120;96;74", "wc_strengths": "14;144;145;56", "wc_weaknesses": "102;69;147;154", "wc_questions": "2;4;65;76", "wc_limitations": "1;37;105;29", "wc_review": "166;374;558;389", "wc_reply_reviewers": "36;0;14;12", "wc_reply_authors": "335;79;78;14", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 84.25, 26.966414296305693 ], "wc_strengths_avg": [ 89.75, 56.7290710306453 ], "wc_weaknesses_avg": [ 118.0, 34.61935874622752 ], "wc_questions_avg": [ 36.75, 33.980693047670464 ], "wc_limitations_avg": [ 43.0, 38.2099463490856 ], "wc_review_avg": [ 371.75, 139.03664085412882 ], "wc_reply_reviewers_avg": [ 15.5, 12.99038105676658 ], "wc_reply_authors_avg": [ 126.5, 123.22438881974624 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5349700384653545923&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";;intel.com;;xmu.edu.cn;xmu.edu.cn;;intel.com;xmu.edu.cn", "author_num": 9, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Intel;Xiamen University", "aff_unique_dep": "Intel Corporation;", "aff_unique_url": "https://www.intel.com;https://www.xmu.edu.cn", "aff_unique_abbr": "Intel;XMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "United States;China" }, { "title": "OBJECT 3DIT: Language-guided 3D-aware Image Editing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69937", "id": "yjWVd8Fhqt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0b0153a91f827b14e8bfea4e211362f3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yjWVd8Fhqt", "openreview": "https://openreview.net/forum?id=yjWVd8Fhqt", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69937", "video": "https://nips.cc/virtual/2023/poster/69937", "author_site": "Oscar Michel, Anand Bhattad, Eli VanderBilt, Ranjay Krishna, Aniruddha Kembhavi, Tanmay Gupta", "tldr": "", "abstract": "Existing image editing tools, while powerful, typically disregard the underlying 3D geometry from which the image is projected. As a result, edits made using these tools may become detached from the geometry and lighting conditions that are at the foundation of the image formation process; such edits break the portrayal of a coherent 3D world. 3D-aware generative models are a promising solution, but currently only succeed on small datasets or at the level of a single object. In this work, we formulate the new task of language-guided 3D-aware editing, where objects in an image should be edited according to a language instruction while remaining consistent with the underlying 3D scene. To promote progress towards this goal, we release OBJect: a benchmark dataset of 400K editing examples created from procedurally generated 3D scenes. Each example consists of an input image, editing instruction in language, and the edited image. We also introduce 3DIT: single and multi-task models for four editing tasks. Our models show impressive abilities to understand the 3D composition of entire scenes, factoring in surrounding objects, surfaces, lighting conditions, shadows, and physically-plausible object configurations. Surprisingly, training on only synthetic scenes from \\dataset, editing capabilities of 3DIT generalize to real-world images.", "keywords": "computer vision;image editing;generative modeling;diffusion models;3D", "primary_area": "", "supplementary_material": "/attachment/c732e24ff404cda4f4413baef62c5fe86f96edc9.zip", "author": "Oscar Michel;Anand Bhattad;Eli VanderBilt;Ranjay Krishna;Aniruddha Kembhavi;Tanmay Gupta", "authorids": "~Oscar_Michel1;~Anand_Bhattad1;~Eli_VanderBilt1;~Ranjay_Krishna1;~Aniruddha_Kembhavi1;~Tanmay_Gupta1", "gender": "M;;M;M;M;M", "homepage": ";https://anandbhattad.github.io/;https://www.elivanderbilt.com/;http://ranjaykrishna.com;https://anikem.github.io/;http://tanmaygupta.info/", "dblp": "308/2324;215/4305;263/1958;167/3785;81/7583;62/1086", "google_scholar": "D0WvX4YAAAAJ;XUsauXIAAAAJ;;IcqahyAAAAAJ;JnUevM0AAAAJ;https://scholar.google.co.in/citations?user=zblQKM8AAAAJ", "orcid": ";;;0000-0001-8784-2531;;", "linkedin": ";;eli-vanderbilt-a9710716;ranjay-krishna-1a344444/;;", "or_profile": "~Oscar_Michel1;~Anand_Bhattad1;~Eli_VanderBilt1;~Ranjay_Krishna1;~Aniruddha_Kembhavi1;~Tanmay_Gupta1", "aff": "Allen Institute for Artificial Intelligence;University of Illinois Urbana Champaign;Allen Institute for Artificial Intelligence;University of Washington;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence", "aff_domain": "allenai.org;illinois.edu;allenai.org;cs.washington.edu;allenai.org;allenai.org", "position": "Researcher;PhD student;Researcher;Assistant Professor;Research Manager;Research Scientist", "bibtex": "@inproceedings{\nmichel2023object,\ntitle={{OBJECT} 3{DIT}: Language-guided 3D-aware Image Editing},\nauthor={Oscar Michel and Anand Bhattad and Eli VanderBilt and Ranjay Krishna and Aniruddha Kembhavi and Tanmay Gupta},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yjWVd8Fhqt}\n}", "github": "", "project": "", "reviewers": "eZ2Z;yCLA;Rpi6;1T99;SoaB", "pdf_size": 4156632, "rating": "5;5;5;5;6", "confidence": "3;3;5;4;4", "soundness": "3;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "3;3;4;4;3", "wc_summary": "75;152;58;237;88", "wc_strengths": "56;113;78;200;65", "wc_weaknesses": "101;317;177;115;274", "wc_questions": "5;6;72;1;57", "wc_limitations": "6;22;7;2;10", "wc_review": "243;610;392;555;494", "wc_reply_reviewers": "31;39;142;0;21", "wc_reply_authors": "27;35;88;0;29", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 122.0, 65.70540312637918 ], "wc_strengths_avg": [ 102.4, 52.507523270480014 ], "wc_weaknesses_avg": [ 196.8, 85.63737501815432 ], "wc_questions_avg": [ 28.2, 30.062601351180504 ], "wc_limitations_avg": [ 9.4, 6.8 ], "wc_review_avg": [ 458.8, 129.91289389433214 ], "wc_reply_reviewers_avg": [ 46.6, 49.45543448398771 ], "wc_reply_authors_avg": [ 35.8, 28.74299914761854 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.13363062095621223, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6023857969790326197&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "allenai.org;illinois.edu;allenai.org;cs.washington.edu;allenai.org;allenai.org", "author_num": 6, "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "Allen Institute for Artificial Intelligence;University of Illinois Urbana-Champaign;University of Washington", "aff_unique_dep": ";;", "aff_unique_url": "https://allenai.org;https://illinois.edu;https://www.washington.edu", "aff_unique_abbr": "AI2;UIUC;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Mind the spikes: Benign overfitting of kernels and neural networks in fixed dimension", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69936", "id": "yjYwbZBJyl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/421f83663c02cdaec8c3c38337709989-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yjYwbZBJyl", "openreview": "https://openreview.net/forum?id=yjYwbZBJyl", "poster": "/media/PosterPDFs/NeurIPS%202023/69936.png?t=1701469269.0671105", "slides": "https://nips.cc/virtual/2023/poster/69936", "video": "https://nips.cc/virtual/2023/poster/69936", "author_site": "Moritz Haas, David Holzm\u00fcller, Ulrike Luxburg, Ingo Steinwart", "tldr": "", "abstract": "The success of over-parameterized neural networks trained to near-zero training error has caused great interest in the phenomenon of benign overfitting, where estimators are statistically consistent even though they interpolate noisy training data. While benign overfitting in fixed dimension has been established for some learning methods, current literature suggests that for regression with typical kernel methods and wide neural networks, benign overfitting requires a high-dimensional setting, where the dimension grows with the sample size. In this paper, we show that the smoothness of the estimators, and not the dimension, is the key: benign overfitting is possible if and only if the estimator's derivatives are large enough. We generalize existing inconsistency results to non-interpolating models and more kernels to show that benign overfitting with moderate derivatives is impossible in fixed dimension. Conversely, we show that benign overfitting is possible for regression with a sequence of spiky-smooth kernels with large derivatives. Using neural tangent kernels, we translate our results to wide neural networks. We prove that while infinite-width networks do not overfit benignly with the ReLU activation, this can be fixed by adding small high-frequency fluctuations to the activation function. Our experiments verify that such neural networks, while overfitting, can indeed generalize well even on low-dimensional data sets.", "keywords": "benign overfitting;kernels;neural tangent kernel;consistency;learning theory", "primary_area": "", "supplementary_material": "/attachment/8c42b79bfa142048a762f54c2d25b6a293278aab.zip", "author": "Moritz Haas;David Holzm\u00fcller;Ulrike von Luxburg;Ingo Steinwart", "authorids": "~Moritz_Haas1;~David_Holzm\u00fcller1;~Ulrike_von_Luxburg1;~Ingo_Steinwart1", "gender": ";M;F;M", "homepage": "https://www.tml.cs.uni-tuebingen.de/team/haas/index.php;https://www.isa.uni-stuttgart.de/en/institute/team/Holzmueller/;;https://www.isa.uni-stuttgart.de/en/institute/team/Steinwart-00002/", "dblp": "332/4834;207/7947;06/1082;89/3492", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.de/citations?user=pIT7A7QAAAAJ;mMifMdoAAAAJ;https://scholar.google.de/citations?user=zFuwHeAAAAAJ", "orcid": ";0000-0002-9443-0049;;0000-0002-4436-7109", "linkedin": ";david-holzm%C3%BCller-164a9b256/;;", "or_profile": "~Moritz_Haas1;~David_Holzm\u00fcller1;~Ulrike_von_Luxburg1;~Ingo_Steinwart1", "aff": "Amazon;University of Stuttgart;University of Tuebingen;University of Stuttgart", "aff_domain": "amazon.com;uni-stuttgart.de;uni-tuebingen.de;uni-stuttgart.de", "position": "Intern;PhD student;Professor;Full Professor", "bibtex": "@inproceedings{\nhaas2023mind,\ntitle={Mind the spikes: Benign overfitting of kernels and neural networks in fixed dimension},\nauthor={Moritz Haas and David Holzm{\\\"u}ller and Ulrike von Luxburg and Ingo Steinwart},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yjYwbZBJyl}\n}", "github": "", "project": "", "reviewers": "GSDW;jpYR;P771;m3mo;Si3s", "pdf_size": 1910154, "rating": "6;6;6;7;8", "confidence": "2;3;4;3;4", "soundness": "3;3;3;4;3", "novelty": "3;3;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "42;77;83;75;55", "wc_strengths": "96;52;99;113;85", "wc_weaknesses": "196;109;39;110;1", "wc_questions": "227;271;19;1;34", "wc_limitations": "1;3;22;1;7", "wc_review": "562;512;262;300;182", "wc_reply_reviewers": "31;15;9;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 66.4, 15.409088227406578 ], "wc_strengths_avg": [ 89.0, 20.54263858417414 ], "wc_weaknesses_avg": [ 91.0, 67.10290604735387 ], "wc_questions_avg": [ 110.4, 114.4964628274603 ], "wc_limitations_avg": [ 6.8, 7.909487973314076 ], "wc_review_avg": [ 363.6, 147.46606389268007 ], "wc_reply_reviewers_avg": [ 11.0, 11.50651989091402 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.46770717334674267, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7460219604054024752&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 10, "email": "amazon.com;uni-stuttgart.de;uni-tuebingen.de;uni-stuttgart.de", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Amazon;University of Stuttgart;University of Tuebingen", "aff_unique_dep": "Amazon.com, Inc.;;", "aff_unique_url": "https://www.amazon.com;https://www.uni-stuttgart.de;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Amazon;USTuttgart;Uni T\u00fcbingen", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;Germany" }, { "title": "DiffTraj: Generating GPS Trajectory with Diffusion Probabilistic Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69935", "id": "ykMdzevPkJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/cd9b4a28fb9eebe0430c3312a4898a41-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ykMdzevPkJ", "openreview": "https://openreview.net/forum?id=ykMdzevPkJ", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69935", "video": "https://nips.cc/virtual/2023/poster/69935", "author_site": "Yuanshao Zhu, Yongchao Ye, Shiyao Zhang, Xiangyu Zhao, James Yu", "tldr": "", "abstract": "Pervasive integration of GPS-enabled devices and data acquisition technologies has led to an exponential increase in GPS trajectory data, fostering advancements in spatial-temporal data mining research. Nonetheless, GPS trajectories contain personal geolocation information, rendering serious privacy concerns when working with raw data. A promising approach to address this issue is trajectory generation, which involves replacing original data with generated, privacy-free alternatives. Despite the potential of trajectory generation, the complex nature of human behavior and its inherent stochastic characteristics pose challenges in generating high-quality trajectories. \nIn this work, we propose a spatial-temporal diffusion probabilistic model for trajectory generation (DiffTraj). This model effectively combines the generative abilities of diffusion models with the spatial-temporal features derived from real trajectories. The core idea is to reconstruct and synthesize geographic trajectories from white noise through a reverse trajectory denoising process. Furthermore, we propose a Trajectory UNet (Traj-UNet) deep neural network to embed conditional information and accurately estimate noise levels during the reverse process. Experiments on two real-world datasets show that DiffTraj can be intuitively applied to generate high-fidelity trajectories while retaining the original distributions. Moreover, the generated results can support downstream trajectory analysis tasks and significantly outperform other methods in terms of geo-distribution evaluations.", "keywords": "Trajectory Generation;Diffusion Model;Urban Computing;Spatial-temporal Data Mining", "primary_area": "", "supplementary_material": "/attachment/590471a8f1f05be4b1e7be989f62a40df075ae9b.zip", "author": "Yuanshao Zhu;Yongchao Ye;Shiyao Zhang;Xiangyu Zhao;James Yu", "authorids": "~Yuanshao_Zhu1;~Yongchao_Ye1;zhangsy@sustech.edu.cn;~Xiangyu_Zhao1;~James_Yu1", "gender": ";M;;M;", "homepage": ";;;https://zhaoxyai.github.io/;", "dblp": ";259/1930;;08/890-1.html;", "google_scholar": ";u6IHWCkAAAAJ;;;", "orcid": ";0000-0001-9782-218X;;0000-0003-2926-4416;", "linkedin": ";;;;", "or_profile": "~Yuanshao_Zhu1;~Yongchao_Ye1;zhangsy@sustech.edu.cn;~Xiangyu_Zhao1;~James_Yu1", "aff": ";Southern University of Science and Technology;;City University of Hong Kong;", "aff_domain": ";sustech.edu.cn;;cityu.edu.hk;", "position": ";MS student;;Assistant Professor;", "bibtex": "@inproceedings{\nzhu2023difftraj,\ntitle={DiffTraj: Generating {GPS} Trajectory with Diffusion Probabilistic Model},\nauthor={Yuanshao Zhu and Yongchao Ye and Shiyao Zhang and Xiangyu Zhao and James Yu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ykMdzevPkJ}\n}", "github": "", "project": "", "reviewers": "nB5g;8UQL;uaXp;gS1x", "pdf_size": 15517640, "rating": "4;7;7;8", "confidence": "4;4;5;4", "soundness": "3;3;4;3", "novelty": "2;3;4;3", "presentation": "3;3;3;3", "wc_summary": "40;185;178;141", "wc_strengths": "36;105;254;244", "wc_weaknesses": "336;138;66;50", "wc_questions": "7;225;57;72", "wc_limitations": "1;1;15;18", "wc_review": "420;654;570;525", "wc_reply_reviewers": "0;55;35;79", "wc_reply_authors": "0;31;24;33", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 136.0, 57.89214109013416 ], "wc_strengths_avg": [ 159.75, 92.59150878995331 ], "wc_weaknesses_avg": [ 147.5, 113.76620763653854 ], "wc_questions_avg": [ 90.25, 81.43517360453038 ], "wc_limitations_avg": [ 8.75, 7.8222439235810075 ], "wc_review_avg": [ 542.25, 84.41082572750962 ], "wc_reply_reviewers_avg": [ 42.25, 28.94283158227612 ], "wc_reply_authors_avg": [ 22.0, 13.133925536563698 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8434782976257871662&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 9, "email": ";sustech.edu.cn;;cityu.edu.hk;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Southern University of Science and Technology;City University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.sustech.edu.cn;https://www.cityu.edu.hk", "aff_unique_abbr": "SUSTech;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Deep Momentum Multi-Marginal Schr\u00f6dinger Bridge", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69934", "id": "ykvvv0gc4R", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b2c39fe6ce838440faf03a0f780e7a63-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ykvvv0gc4R", "openreview": "https://openreview.net/forum?id=ykvvv0gc4R", "poster": "/media/PosterPDFs/NeurIPS%202023/69934.png?t=1697212221.3875446", "slides": "https://nips.cc/virtual/2023/poster/69934", "video": "https://nips.cc/virtual/2023/poster/69934", "author_site": "Tianrong Chen, Guan-Horng Liu, Molei Tao, Evangelos Theodorou", "tldr": "", "abstract": "It is a crucial challenge to reconstruct population dynamics using unlabeled samples from distributions at coarse time intervals. Recent approaches such as flow-based models or Schr\u00f6dinger Bridge (SB) models have demonstrated appealing performance, yet the inferred sample trajectories either fail to account for the underlying stochasticity or are unnecessarily rigid. In this article, we extend SB into phase space and propose $\\underline{D}$eep $\\underline{M}$omentum Multi-Marginal $\\underline{S}$chr\u00f6dinger $\\underline{B}$ridge (DMSB), a novel computational framework that learns the smooth measure-valued spline for stochastic systems that satisfy position marginal constraints across time. By tailoring the celebrated Bregman Iteration and extending the Iteration Proportional Fitting to phase space, we manage to handle high-dimensional multi-marginal trajectory inference tasks efficiently. Our algorithm outperforms baselines significantly, as evidenced by experiments for synthetic datasets and a real-world single-cell RNA sequence dataset. Additionally, the proposed approach can reasonably reconstruct the evolution of velocity distribution, from position snapshots only, when there is a ground truth velocity that is nevertheless inaccessible.", "keywords": "Schr\u00f6dinger Bridge;Trajectory Inference;Optimal Transport", "primary_area": "", "supplementary_material": "", "author": "Tianrong Chen;Guan-Horng Liu;Molei Tao;Evangelos Theodorou", "authorids": "~Tianrong_Chen1;~Guan-Horng_Liu1;~Molei_Tao1;~Evangelos_Theodorou1", "gender": "M;;;M", "homepage": "https://tianrongchen.github.io/;https://ghliu.github.io;http://people.math.gatech.edu/~mtao8/;", "dblp": "227/7295;143/6907;56/9263;155/9964", "google_scholar": "r9D3Fg50gMoC;2Dt0VJ4AAAAJ;;", "orcid": ";;;", "linkedin": "tianrong-chen-757b3216a/;;;", "or_profile": "~Tianrong_Chen1;~Guan-Horng_Liu1;~Molei_Tao1;~Evangelos_Theodorou1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;gatech.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2023deep,\ntitle={Deep Momentum Multi-Marginal Schr\\\"odinger Bridge},\nauthor={Tianrong Chen and Guan-Horng Liu and Molei Tao and Evangelos Theodorou},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ykvvv0gc4R}\n}", "github": "", "project": "", "reviewers": "EhNn;3Whf;A2vV;5VcR;17Zf", "pdf_size": 3169654, "rating": "6;6;6;6;6", "confidence": "4;3;4;2;4", "soundness": "3;4;4;3;2", "novelty": "3;3;3;3;3", "presentation": "3;3;1;3;2", "wc_summary": "83;270;196;145;36", "wc_strengths": "158;205;35;201;70", "wc_weaknesses": "211;106;127;57;445", "wc_questions": "99;15;356;106;105", "wc_limitations": "44;72;70;13;19", "wc_review": "595;668;784;522;675", "wc_reply_reviewers": "0;0;942;0;24", "wc_reply_authors": "0;0;1082;0;0", "reply_reviewers": "0;0;3;0;1", "reply_authors": "1;1;4;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 146.0, 82.39660187167915 ], "wc_strengths_avg": [ 133.8, 69.28607363677062 ], "wc_weaknesses_avg": [ 189.2, 137.24197608603572 ], "wc_questions_avg": [ 136.2, 115.12671279941941 ], "wc_limitations_avg": [ 43.6, 24.67873578609731 ], "wc_review_avg": [ 648.8, 87.52919512939668 ], "wc_reply_reviewers_avg": [ 193.2, 374.5153668409348 ], "wc_reply_authors_avg": [ 216.4, 432.8 ], "reply_reviewers_avg": [ 0.8, 1.1661903789690602 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4601907773708058783&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "gatech.edu;gatech.edu;gatech.edu;gatech.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Understanding How Consistency Works in Federated Learning via Stage-wise Relaxed Initialization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69933", "id": "ylPX5D7It7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fef126561bbf9d4467dbb8d27334b8fe-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ylPX5D7It7", "openreview": "https://openreview.net/forum?id=ylPX5D7It7", "poster": "/media/PosterPDFs/NeurIPS%202023/69933.png?t=1696836538.4379766", "slides": "https://nips.cc/virtual/2023/poster/69933", "video": "https://nips.cc/virtual/2023/poster/69933", "author_site": "Yan Sun, Li Shen, Dacheng Tao", "tldr": "", "abstract": "Federated learning (FL) is a distributed paradigm that coordinates massive local clients to collaboratively train a global model via stage-wise local training processes on the heterogeneous dataset.\n Previous works have implicitly studied that FL suffers from the \"client-drift\" problem, which is caused by the inconsistent optimum across local clients. However, till now it still lacks solid theoretical analysis to explain the impact of this local inconsistency. \n To alleviate the negative impact of the \"client drift\" and explore its substance in FL, in this paper, we first design an efficient FL algorithm FedInit, which allows employing the personalized relaxed initialization state at the beginning of each local training stage. Specifically, FedInit initializes the local state by moving away from the current global state towards the reverse direction of the latest local state. This relaxed initialization helps to revise the local divergence and enhance the local consistency level.\n Moreover, to further understand how inconsistency disrupts performance in FL, we introduce the excess risk analysis and study the divergence term to investigate the test error of the proposed FedInit method. Our studies show that on the non-convex objectives, optimization error is not sensitive to this local inconsistency, while it mainly affects the generalization error bound in FedInit. \n Extensive experiments are conducted to validate this conclusion. Our proposed FedInit could achieve state-of-the-art (SOTA) results compared to several advanced benchmarks without any additional costs. Meanwhile, stage-wise relaxed initialization could also be incorporated into the current advanced algorithms to achieve higher performance in the FL paradigm.", "keywords": "federated learning;local consistency;personalized initialization;excess risk", "primary_area": "", "supplementary_material": "/attachment/145e1938ac5765ac3d49968a5520f887b1504349.zip", "author": "Yan Sun;Li Shen;Dacheng Tao", "authorids": "~Yan_Sun3;~Li_Shen1;~Dacheng_Tao1", "gender": "M;M;", "homepage": ";https://sites.google.com/site/mathshenli/home;", "dblp": ";91/3680-8;", "google_scholar": "_-hoDQkAAAAJ;yVhgENIAAAAJ;", "orcid": "0000-0003-2271-252X;;", "linkedin": ";;", "or_profile": "~Yan_Sun3;~Li_Shen1;~Dacheng_Tao1", "aff": "University of Sydney;JD Explore Academy;", "aff_domain": "uni.sydney.edu.au;jd.com;", "position": "MS student;Researcher;", "bibtex": "@inproceedings{\nsun2023understanding,\ntitle={Understanding How Consistency Works in Federated Learning via Stage-wise Relaxed Initialization},\nauthor={Yan Sun and Li Shen and Dacheng Tao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ylPX5D7It7}\n}", "github": "", "project": "", "reviewers": "g4w5;sALG;9rh8;qyTU", "pdf_size": 1164445, "rating": "5;6;6;7", "confidence": "3;3;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "55;54;62;52", "wc_strengths": "35;52;36;145", "wc_weaknesses": "89;94;207;170", "wc_questions": "118;22;2;4", "wc_limitations": "32;11;1;4", "wc_review": "329;233;308;375", "wc_reply_reviewers": "254;21;21;0", "wc_reply_authors": "1339;35;49;0", "reply_reviewers": "3;1;1;0", "reply_authors": "6;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 55.75, 3.766629793329841 ], "wc_strengths_avg": [ 67.0, 45.53570028010989 ], "wc_weaknesses_avg": [ 140.0, 50.264301447448766 ], "wc_questions_avg": [ 36.5, 47.69433928675394 ], "wc_limitations_avg": [ 12.0, 12.103718436910205 ], "wc_review_avg": [ 311.25, 51.265851207212 ], "wc_reply_reviewers_avg": [ 74.0, 104.27607587553341 ], "wc_reply_authors_avg": [ 355.75, 567.9601108352592 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 1.920286436967152 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13108773715661598736&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 7, "email": "uni.sydney.edu.au;jd.com;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Sydney;JD", "aff_unique_dep": ";JD Explore Academy", "aff_unique_url": "https://www.sydney.edu.au;", "aff_unique_abbr": "USYD;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "Australia;" }, { "id": "yloVae273c", "title": "Offline Primal-Dual Reinforcement Learning for Linear MDPs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Offline Reinforcement Learning (RL) aims to learn a near-optimal\n policy from a fixed dataset of transitions collected by another policy.\n This problem has attracted a lot of attention recently, but most existing\n methods with strong theoretical guarantees are restricted to finite-horizon\n or tabular settings. In constrast, few algorithms for\n infinite-horizon settings with function approximation and minimal assumptions\n on the dataset are both sample and computationally efficient.\n Another gap in the current literature is the lack of theoretical analysis for\n the average-reward setting, which is more challenging than the discounted setting.\n In this paper, we address both of these issues by proposing a primal-dual\n optimization method based on the linear programming formulation of RL.\nOur key contribution is a new reparametrization that allows us to derive low-variance gradient estimators that can be used in a stochastic optimization scheme using only samples from the behavior policy.\n Our method finds an $\\varepsilon$-optimal policy with\n $O(\\varepsilon^{-4})$ samples, improving on the previous $O(\\varepsilon^{-5})$,\n while being computationally efficient for\n infinite-horizon discounted and average-reward MDPs with realizable linear\n function approximation and partial coverage. Moreover, to the best of our\n knowledge, this is the first theoretical result for average-reward offline RL.", "keywords": "Reinforcement Learning;Offline Reinforcement Learning;Linear MDPs", "primary_area": "", "supplementary_material": "/attachment/26c847e7ca7ae79bb33e4e61577d5e46eb73d2b6.pdf", "author": "Germano Gabbianelli;Gergely Neu;Nneka Okolo;Matteo Papini", "authorids": "~Germano_Gabbianelli1;~Gergely_Neu1;~Nneka_Okolo1;~Matteo_Papini1", "gender": "M;M;F;M", "homepage": "https://germano.dev;http://cs.bme.hu/~gergo;;https://t3p.github.io/", "dblp": ";83/7606;331/5997;209/4897", "google_scholar": ";https://scholar.google.ch/citations?user=uz27G84AAAAJ;s8DIX2sAAAAJ;https://scholar.google.it/citations?user=A2WxZlsAAAAJ", "orcid": ";;0009-0004-0137-970X;0000-0002-3807-3171", "linkedin": ";;nneka-okolo-876410134/;matteo-papini/", "or_profile": "~Germano_Gabbianelli1;~Gergely_Neu1;~Nneka_Okolo1;~Matteo_Papini1", "aff": "Universitat Pompeu Fabra;Universitat Pompeu Fabra;Universitat Pompeu Fabra;Universitat Pompeu Fabra", "aff_domain": "upf.edu;upf.edu;upf.edu;upf.edu", "position": "PhD student;Assistant Professor;PhD student;Postdoc", "bibtex": "@misc{\ngabbianelli2023offline,\ntitle={Offline Primal-Dual Reinforcement Learning for Linear {MDP}s},\nauthor={Germano Gabbianelli and Gergely Neu and Nneka Okolo and Matteo Papini},\nyear={2023},\nurl={https://openreview.net/forum?id=yloVae273c}\n}", "github": "", "project": "", "reviewers": "C22H;zEPA;rDuz;6BYu;vy6G;7fb2", "site": "https://openreview.net/forum?id=yloVae273c", "pdf_size": 387904, "rating": "4;5;5;5;6;6", "confidence": "2;4;4;3;3;3", "soundness": "3;3;3;3;3;2", "novelty": "2;3;2;3;3;3", "presentation": "3;3;3;4;3;3", "wc_summary": "52;44;47;93;48;146", "wc_strengths": "31;76;19;116;55;103", "wc_weaknesses": "155;538;116;162;31;272", "wc_questions": "27;35;1;2;78;458", "wc_limitations": "2;1;1;1;1;1", "wc_review": "267;694;184;374;213;980", "wc_reply_reviewers": "78;553;74;11;23;20", "wc_reply_authors": "448;332;29;20;19;52", "reply_reviewers": "1;3;1;1;1;1", "reply_authors": "2;4;2;2;2;3", "rating_avg": [ 5.166666666666667, 0.6871842709362768 ], "confidence_avg": [ 3.1666666666666665, 0.6871842709362768 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 71.66666666666667, 37.19617662550219 ], "wc_strengths_avg": [ 66.66666666666667, 35.405586502069916 ], "wc_weaknesses_avg": [ 212.33333333333334, 162.07062932218437 ], "wc_questions_avg": [ 100.16666666666667, 162.07345728266412 ], "wc_limitations_avg": [ 1.1666666666666667, 0.3726779962499649 ], "wc_review_avg": [ 452.0, 290.574717872472 ], "wc_reply_reviewers_avg": [ 126.5, 192.5294349790009 ], "wc_reply_authors_avg": [ 150.0, 173.3176274935703 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.74535599249993 ], "reply_authors_avg": [ 2.5, 0.7637626158259734 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2941176470588236, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10789280168597606094&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Universitat Pompeu Fabra", "aff_unique_dep": "", "aff_unique_url": "https://www.upf.edu/", "aff_unique_abbr": "UPF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Spain" }, { "title": "Model-Based Control with Sparse Neural Dynamics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69932", "id": "ymBG2xs9Zf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/142cdba4b8d1e03f9ee131ac86bb0afc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ymBG2xs9Zf", "openreview": "https://openreview.net/forum?id=ymBG2xs9Zf", "poster": "/media/PosterPDFs/NeurIPS%202023/69932.png?t=1702346258.8406904", "slides": "https://nips.cc/virtual/2023/poster/69932", "video": "https://nips.cc/virtual/2023/poster/69932", "author_site": "Ziang Liu, Genggeng Zhou, Jeff He, Tobia Marcucci, Fei-Fei Li, Jiajun Wu, Yunzhu Li", "tldr": "", "abstract": "Learning predictive models from observations using deep neural networks (DNNs) is a promising new approach to many real-world planning and control problems. However, common DNNs are too unstructured for effective planning, and current control methods typically rely on extensive sampling or local gradient descent. In this paper, we propose a new framework for integrated model learning and predictive control that is amenable to efficient optimization algorithms. Specifically, we start with a ReLU neural model of the system dynamics and, with minimal losses in prediction accuracy, we gradually sparsify it by removing redundant neurons. This discrete sparsification process is approximated as a continuous problem, enabling an end-to-end optimization of both the model architecture and the weight parameters. The sparsified model is subsequently used by a mixed-integer predictive controller, which represents the neuron activations as binary variables and employs efficient branch-and-bound algorithms. Our framework is applicable to a wide variety of DNNs, from simple multilayer perceptrons to complex graph neural dynamics. It can efficiently handle tasks involving complicated contact dynamics, such as object pushing, compositional object sorting, and manipulation of deformable objects. Numerical and hardware experiments show that, despite the aggressive sparsification, our framework can deliver better closed-loop performance than existing state-of-the-art methods.", "keywords": "model learning;model-based control;neural network sparsification;mixed-integer programming;trajectory optimization", "primary_area": "", "supplementary_material": "/attachment/7b23fded0927bccf8d265e5d25fa1da8fd942ee2.zip", "author": "Ziang Liu;Genggeng Zhou;Jeff He;Tobia Marcucci;Li Fei-Fei;Jiajun Wu;Yunzhu Li", "authorids": "~Ziang_Liu2;~Genggeng_Zhou1;~Jeff_He1;~Tobia_Marcucci1;~Li_Fei-Fei1;~Jiajun_Wu1;~Yunzhu_Li1", "gender": ";M;M;M;F;M;M", "homepage": ";https://profiles.stanford.edu/genggeng-zhou;;https://tobiamarcucci.github.io;https://profiles.stanford.edu/fei-fei-li;https://jiajunwu.com;https://yunzhuli.github.io/", "dblp": ";;;;79/2528;117/4768;182/1831", "google_scholar": ";;;jfOVNcUAAAAJ;rDfyQnIAAAAJ;2efgcS0AAAAJ;WlA92lcAAAAJ", "orcid": ";;;;;0000-0002-4176-343X;", "linkedin": ";;jeff-he-99a96b163/;;fei-fei-li-4541247/;jiajunwu/;", "or_profile": "~Ziang_Liu2;~Genggeng_Zhou1;~Jeff_He1;~Tobia_Marcucci1;~Li_Fei-Fei1;~Jiajun_Wu1;~Yunzhu_Li1", "aff": ";Stanford University;Stanford University;Massachusetts Institute of Technology;Stanford University;Stanford University;Stanford University", "aff_domain": ";stanford.edu;stanford.edu;mit.edu;stanford.edu;stanford.edu;stanford.edu", "position": ";MS student;MS student;PhD student;Full Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nliu2023modelbased,\ntitle={Model-Based Control with Sparse Neural Dynamics},\nauthor={Ziang Liu and Genggeng Zhou and Jeff He and Tobia Marcucci and Li Fei-Fei and Jiajun Wu and Yunzhu Li},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ymBG2xs9Zf}\n}", "github": "", "project": "", "reviewers": "LeuW;fLUK;h4y2;H7kE", "pdf_size": 4824873, "rating": "5;5;5;6", "confidence": "3;3;3;4", "soundness": "2;2;3;3", "novelty": "2;4;2;3", "presentation": "3;4;2;3", "wc_summary": "74;53;89;44", "wc_strengths": "79;128;58;112", "wc_weaknesses": "602;161;55;183", "wc_questions": "45;51;35;30", "wc_limitations": "48;7;1;33", "wc_review": "848;400;238;402", "wc_reply_reviewers": "245;43;75;69", "wc_reply_authors": "1152;37;435;36", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;3;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 65.0, 17.621010186706094 ], "wc_strengths_avg": [ 94.25, 27.38955092731533 ], "wc_weaknesses_avg": [ 250.25, 208.76946017078265 ], "wc_questions_avg": [ 40.25, 8.227241335952167 ], "wc_limitations_avg": [ 22.25, 19.122957407263137 ], "wc_review_avg": [ 472.0, 227.05505940189926 ], "wc_reply_reviewers_avg": [ 108.0, 80.00624975587844 ], "wc_reply_authors_avg": [ 415.0, 455.5474728280248 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4317269223631815208&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": ";stanford.edu;stanford.edu;mit.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Stanford University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://web.mit.edu", "aff_unique_abbr": "Stanford;MIT", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Covariance-adaptive best arm identification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69931", "id": "ymHM1qRUeb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e82ef7865f29b40640f486bbbe7959a7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ymHM1qRUeb", "openreview": "https://openreview.net/forum?id=ymHM1qRUeb", "poster": "/media/PosterPDFs/NeurIPS%202023/69931.png?t=1699246821.8856204", "slides": "https://nips.cc/virtual/2023/poster/69931", "video": "https://nips.cc/virtual/2023/poster/69931", "author_site": "El Mehdi Saad, Gilles Blanchard, Nicolas Verzelen", "tldr": "", "abstract": "We consider the problem of best arm identification in the multi-armed bandit model, under fixed confidence. Given a confidence input $\\delta$, the goal is to identify the arm with the highest mean reward with a probability of at least $1 - \\delta$, while minimizing the number of arm pulls. While the literature provides solutions to this problem under the assumption of independent arms distributions, we propose a more flexible scenario where arms can be dependent and rewards can be sampled simultaneously. This framework allows the learner to estimate the covariance among the arms distributions, enabling a more efficient identification of the best arm. The relaxed setting we propose is relevant in various applications, such as clinical trials, where similarities between patients or drugs suggest underlying correlations in the outcomes. We introduce new algorithms that adapt to the unknown covariance of the arms and demonstrate through theoretical guarantees that substantial improvement can be achieved over the standard setting. Additionally, we provide new lower bounds for the relaxed setting and present numerical simulations that support their theoretical findings.", "keywords": "Multi-armed bandits;Best-arm identification;Adaptive identification", "primary_area": "", "supplementary_material": "/attachment/c9eba86fd04497a61373e4950c267567ca85f778.pdf", "author": "El Mehdi Saad;Gilles Blanchard;Nicolas Verzelen", "authorids": "~El_Mehdi_Saad1;~Gilles_Blanchard1;~Nicolas_Verzelen1", "gender": "M;;", "homepage": ";;https://verzelen.montpellier.inrae.fr/", "dblp": "279/4097;;40/1671.html", "google_scholar": "https://scholar.google.com/citations?hl=fr;;", "orcid": ";;", "linkedin": "el-mehdi-saad-b29949a9/;;", "or_profile": "~El_Mehdi_Saad1;~Gilles_Blanchard1;~Nicolas_Verzelen1", "aff": "INRA, Montpellier, France;;INRAE", "aff_domain": "inra.fr;;inrae.fr", "position": "Postdoc;;Associate Professor", "bibtex": "@inproceedings{\nsaad2023covarianceadaptive,\ntitle={Covariance-adaptive best arm identification},\nauthor={El Mehdi Saad and Gilles Blanchard and Nicolas Verzelen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ymHM1qRUeb}\n}", "github": "", "project": "", "reviewers": "ZvwR;THM3;pLAq;19g5;XxU8", "pdf_size": 416525, "rating": "5;5;6;6;7", "confidence": "4;5;4;3;3", "soundness": "3;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;2;2;3;3", "wc_summary": "113;60;119;65;99", "wc_strengths": "98;37;129;21;102", "wc_weaknesses": "129;37;1121;77;114", "wc_questions": "87;410;186;5;18", "wc_limitations": "82;1;1;5;40", "wc_review": "509;545;1556;173;373", "wc_reply_reviewers": "0;0;98;18;35", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 91.2, 24.36719105682885 ], "wc_strengths_avg": [ 77.4, 41.24366618039672 ], "wc_weaknesses_avg": [ 295.6, 413.9273366183973 ], "wc_questions_avg": [ 141.2, 148.94750753201615 ], "wc_limitations_avg": [ 25.8, 31.694794525284436 ], "wc_review_avg": [ 631.2, 480.4649414889707 ], "wc_reply_reviewers_avg": [ 30.2, 36.31198149371637 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7857142857142858, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-BAWEY_tS_cJ:scholar.google.com/&scioq=Covariance-adaptive+best+arm+identification&hl=en&as_sdt=0,5", "gs_version_total": 13, "email": "inra.fr;;inrae.fr", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "INRAE", "aff_unique_dep": "", "aff_unique_url": "https://www.inrae.fr", "aff_unique_abbr": "INRAE", "aff_campus_unique_index": "0", "aff_campus_unique": "Montpellier;", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "TOA: Task-oriented Active VQA", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69930", "id": "yoAmURKDJi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a95cc4f370bcc418e7b57d6512e28f52-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yoAmURKDJi", "openreview": "https://openreview.net/forum?id=yoAmURKDJi", "poster": "/media/PosterPDFs/NeurIPS%202023/69930.png?t=1702083483.0442653", "slides": "https://nips.cc/virtual/2023/poster/69930", "video": "https://nips.cc/virtual/2023/poster/69930", "author_site": "xiaoying xing, Mingfu Liang, Ying Wu", "tldr": "", "abstract": "Knowledge-based visual question answering (VQA) requires external knowledge to answer the question about an image. Early methods explicitly retrieve knowledge from external knowledge bases, which often introduce noisy information. Recently large language models like GPT-3 have shown encouraging performance as implicit knowledge source and revealed planning abilities. However, current large language models can not effectively understand image inputs, thus it remains an open problem to extract the image information and input to large language models. Prior works have used image captioning and object descriptions to represent the image. However, they may either drop the essential visual information to answer the question correctly or involve irrelevant objects to the task-of-interest. To address this problem, we propose to let large language models make an initial hypothesis according to their knowledge, then actively collect the visual evidence required to verify the hypothesis. In this way, the model can attend to the essential visual information in a task-oriented manner. We leverage several vision modules from the perspectives of spatial attention (i.e., Where to look) and attribute attention (i.e., What to look), which is similar to human cognition. The experiments show that our proposed method outperforms the baselines on open-ended knowledge-based VQA datasets and presents clear reasoning procedure with better interpretability.", "keywords": "knowledge-based visual question answering;task-oriented;active image understanding;large language model;visual reasoning;multi-round dialogue", "primary_area": "", "supplementary_material": "", "author": "Xiaoying Xing;Mingfu Liang;Ying Wu", "authorids": "~Xiaoying_Xing1;~Mingfu_Liang1;~Ying_Wu7", "gender": ";M;M", "homepage": ";https://mingfuliang.com/;http://www.ece.northwestern.edu/~yingwu", "dblp": "187/6612;241/9790;64/5840-1", "google_scholar": ";_uUUvt4AAAAJ;zAlz89wAAAAJ", "orcid": ";0000-0001-6779-2418;0000-0002-3523-7054", "linkedin": ";;ying-wu-a758497/", "or_profile": "~Xiaoying_Xing1;~Mingfu_Liang1;~Ying_Wu1", "aff": "Northwestern University;Northwestern University;Northwestern University", "aff_domain": "northwestern.edu;northwestern.edu;northwestern.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nxing2023toa,\ntitle={{TOA}: Task-oriented Active {VQA}},\nauthor={Xiaoying Xing and Mingfu Liang and Ying Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yoAmURKDJi}\n}", "github": "", "project": "", "reviewers": "8uLs;BgCz;TKN8;ckp5", "pdf_size": 1979804, "rating": "5;5;7;7", "confidence": "5;5;4;4", "soundness": "3;2;3;4", "novelty": "3;3;3;4", "presentation": "3;3;4;4", "wc_summary": "89;55;53;156", "wc_strengths": "71;29;67;49", "wc_weaknesses": "44;276;57;44", "wc_questions": "25;2;40;8", "wc_limitations": "38;24;30;11", "wc_review": "267;386;247;268", "wc_reply_reviewers": "11;44;79;15", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 88.25, 41.64957982981341 ], "wc_strengths_avg": [ 54.0, 16.64331697709324 ], "wc_weaknesses_avg": [ 105.25, 98.72531336997619 ], "wc_questions_avg": [ 18.75, 14.889173919328098 ], "wc_limitations_avg": [ 25.75, 9.858372076565177 ], "wc_review_avg": [ 292.0, 54.913568450793655 ], "wc_reply_reviewers_avg": [ 37.25, 27.261465477849864 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3322683455894470199&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "northwestern.edu;northwestern.edu;northwestern.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Northwestern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northwestern.edu", "aff_unique_abbr": "NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CaMP: Causal Multi-policy Planning for Interactive Navigation in Multi-room Scenes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69929", "id": "yoZTVn0T50", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/333581887bf483296118a97773cab0c1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yoZTVn0T50", "openreview": "https://openreview.net/forum?id=yoZTVn0T50", "poster": "/media/PosterPDFs/NeurIPS%202023/69929.png?t=1698405806.6515553", "slides": "https://nips.cc/virtual/2023/poster/69929", "video": "https://nips.cc/virtual/2023/poster/69929", "author_site": "Xiaohan Wang, Yuehu Liu, Xinhang Song, Beibei Wang, Shuqiang Jiang", "tldr": "", "abstract": "Visual navigation has been widely studied under the assumption that there may be several clear routes to reach the goal. However, in more practical scenarios such as a house with several messy rooms, there may not. Interactive Navigation (InterNav) considers agents navigating to their goals more effectively with object interactions, posing new challenges of learning interaction dynamics and extra action space. Previous works learn single vision-to-action policy with the guidance of designed representations. However, the causality between actions and outcomes is prone to be confounded when the attributes of obstacles are diverse and hard to measure. Learning policy for long-term action planning in complex scenes also leads to extensive inefficient exploration. In this paper, we introduce a causal diagram of InterNav clarifying the confounding bias caused by obstacles. To address the problem, we propose a multi-policy model that enables the exploration of counterfactual interactions as well as reduces unnecessary exploration. We develop a large-scale dataset containing 600k task episodes in 12k multi-room scenes based on the ProcTHOR simulator and showcase the effectiveness of our method with the evaluations on our dataset.", "keywords": "Embodied AI;Interactive Navigation;Causal Reinforcement Learning;Hierarchical Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Xiaohan Wang;Yuehu Liu;Xinhang Song;Beibei Wang;Shuqiang Jiang", "authorids": "~Xiaohan_Wang3;~Yuehu_Liu1;~Xinhang_Song1;~Beibei_Wang3;~Shuqiang_Jiang1", "gender": "M;M;M;M;M", "homepage": ";https://gr.xjtu.edu.cn/en/web/liuyh;;https://github.com/twb1235;https://people.ucas.edu.cn/~sqjiang?language=en", "dblp": ";https://dblp.uni-trier.de/pid/50/6184.html;125/2281;;90/3651", "google_scholar": ";;LQDB7QQAAAAJ;;4Rvn-ykAAAAJ", "orcid": "0000-0003-2396-0824;;;;0000-0002-1596-4326", "linkedin": ";;;;", "or_profile": "~Xiaohan_Wang3;~Yuehu_Liu1;~Xinhang_Song1;~Beibei_Wang3;~Shuqiang_Jiang1", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;Institute of Computing Technology, Chinese Academy of Sciences;Xi'an Jiaotong University;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "xjtu.edu.cn;xjtu.edu.cn;ict.ac.cn;xjtu.edu.cn;ict.ac.cn", "position": "PhD student;Full Professor;Associate Professor;MS student;Professor", "bibtex": "@inproceedings{\nwang2023camp,\ntitle={Ca{MP}: Causal Multi-policy Planning for Interactive Navigation in Multi-room Scenes},\nauthor={Xiaohan Wang and Yuehu Liu and Xinhang Song and Beibei Wang and Shuqiang Jiang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yoZTVn0T50}\n}", "github": "", "project": "", "reviewers": "7DX2;KGF8;6UQq;HVkt", "pdf_size": 2618661, "rating": "5;6;7;7", "confidence": "4;3;4;4", "soundness": "2;3;3;3", "novelty": "3;3;4;3", "presentation": "2;2;3;3", "wc_summary": "84;452;158;85", "wc_strengths": "197;52;90;143", "wc_weaknesses": "242;84;368;855", "wc_questions": "287;656;6;93", "wc_limitations": "71;3;5;2", "wc_review": "881;1247;627;1178", "wc_reply_reviewers": "513;15;447;167", "wc_reply_authors": "534;66;384;65", "reply_reviewers": "2;1;2;1", "reply_authors": "4;3;2;3", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 194.75, 151.5245442164404 ], "wc_strengths_avg": [ 120.5, 54.72887720390397 ], "wc_weaknesses_avg": [ 387.25, 288.1921017307726 ], "wc_questions_avg": [ 260.5, 249.97449869936733 ], "wc_limitations_avg": [ 20.25, 29.32042803234632 ], "wc_review_avg": [ 983.25, 247.41703154795144 ], "wc_reply_reviewers_avg": [ 285.5, 203.13234602101164 ], "wc_reply_authors_avg": [ 262.25, 203.77239140766838 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7615544642378208426&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "xjtu.edu.cn;xjtu.edu.cn;ict.ac.cn;xjtu.edu.cn;ict.ac.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;1", "aff_unique_norm": "Xi'an Jiao Tong University;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Computing Technology", "aff_unique_url": "https://www.xjtu.edu.cn;http://www.ict.ac.cn", "aff_unique_abbr": "XJTU;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Emergent Correspondence from Image Diffusion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69928", "id": "ypOiXjdfnU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0503f5dce343a1d06d16ba103dd52db1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ypOiXjdfnU", "openreview": "https://openreview.net/forum?id=ypOiXjdfnU", "poster": "/media/PosterPDFs/NeurIPS%202023/69928.png?t=1701809384.7225323", "slides": "https://nips.cc/virtual/2023/poster/69928", "video": "https://nips.cc/virtual/2023/poster/69928", "author_site": "Luming Tang, Menglin Jia, Qianqian Wang, Cheng Perng Phoo, Bharath Hariharan", "tldr": "", "abstract": "Finding correspondences between images is a fundamental problem in computer vision. In this paper, we show that correspondence emerges in image diffusion models without any explicit supervision. We propose a simple strategy to extract this implicit knowledge out of diffusion networks as image features, namely DIffusion FeaTures (DIFT), and use them to establish correspondences between real images. Without any additional fine-tuning or supervision on the task-specific data or annotations, DIFT is able to outperform both weakly-supervised methods and competitive off-the-shelf features in identifying semantic, geometric, and temporal correspondences. Particularly for semantic correspondence, DIFT from Stable Diffusion is able to outperform DINO and OpenCLIP by 19 and 14 accuracy points respectively on the challenging SPair-71k benchmark. It even outperforms the state-of-the-art supervised methods on 9 out of 18 categories while remaining on par for the overall performance. Project page: https://diffusionfeatures.github.io.", "keywords": "Correspondence;Diffusion Model", "primary_area": "", "supplementary_material": "", "author": "Luming Tang;Menglin Jia;Qianqian Wang;Cheng Perng Phoo;Bharath Hariharan", "authorids": "~Luming_Tang1;~Menglin_Jia1;~Qianqian_Wang2;~Cheng_Perng_Phoo1;~Bharath_Hariharan3", "gender": "M;;F;M;M", "homepage": "http://lumingtang.info/;https://kmnp.github.io/;https://www.cs.cornell.edu/~qqw/;https://cpphoo.github.io/;http://home.bharathh.info", "dblp": "203/8352;228/8465;118/6735-2;226/0521;05/8412", "google_scholar": "116n5vIAAAAJ;https://scholar.google.co.uk/citations?user=QOqB6coAAAAJ;VdmfIeUAAAAJ;kt9D2usAAAAJ;TpglobcAAAAJ", "orcid": ";;;;", "linkedin": "lt453/;;;;", "or_profile": "~Luming_Tang1;~Menglin_Jia1;~Qianqian_Wang2;~Cheng_Perng_Phoo1;~Bharath_Hariharan2", "aff": "Cornell University;Cornell University;Cornell University;Cornell University;Cornell University", "aff_domain": "cornell.edu;cornell.edu;cornell.edu;cornell.edu;cornell.edu", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ntang2023emergent,\ntitle={Emergent Correspondence from Image Diffusion},\nauthor={Luming Tang and Menglin Jia and Qianqian Wang and Cheng Perng Phoo and Bharath Hariharan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ypOiXjdfnU}\n}", "github": "", "project": "", "reviewers": "4Vxk;zLC3;rXbs;4nEG;u3f5", "pdf_size": 32500850, "rating": "4;6;7;7;7", "confidence": "5;5;4;4;5", "soundness": "3;2;3;3;2", "novelty": "3;3;3;3;4", "presentation": "3;3;3;4;3", "wc_summary": "48;162;131;66;84", "wc_strengths": "23;47;49;115;53", "wc_weaknesses": "149;456;87;78;122", "wc_questions": "25;243;299;80;31", "wc_limitations": "12;1;1;12;14", "wc_review": "257;909;567;351;304", "wc_reply_reviewers": "0;503;10;39;41", "wc_reply_authors": "0;34;33;31;33", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;2;2;2", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 98.2, 42.20142177699704 ], "wc_strengths_avg": [ 57.4, 30.65681001017555 ], "wc_weaknesses_avg": [ 178.4, 141.09656267960608 ], "wc_questions_avg": [ 135.6, 113.57746255309634 ], "wc_limitations_avg": [ 8.0, 5.761944116355173 ], "wc_review_avg": [ 477.6, 240.38602288818709 ], "wc_reply_reviewers_avg": [ 118.6, 192.8632676276123 ], "wc_reply_authors_avg": [ 26.2, 13.13620949893842 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5601120336112038, "gs_citation": 364, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12270541678834144233&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "email": "cornell.edu;cornell.edu;cornell.edu;cornell.edu;cornell.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Unifying Perspective on Multi-Calibration: Game Dynamics for Multi-Objective Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69927", "id": "ysqlhW0v26", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e55edcdb01ac45c839a602f96e09fbcb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ysqlhW0v26", "openreview": "https://openreview.net/forum?id=ysqlhW0v26", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69927", "video": "https://nips.cc/virtual/2023/poster/69927", "author_site": "Nika Haghtalab, Michael Jordan, Eric Zhao", "tldr": "", "abstract": "We provide a unifying framework for the design and analysis of multi-calibrated predictors. By placing the multi-calibration problem in the general setting of multi-objective learning---where learning guarantees must hold simultaneously over a set of distributions and loss functions---we exploit connections to game dynamics to achieve state-of-the-art guarantees for a diverse set of multi-calibration learning problems. In addition to shedding light on existing multi-calibration guarantees and greatly simplifying their analysis, our approach also yields improved guarantees, such as error tolerances that scale with the square-root of group size versus the constant tolerances guaranteed by prior works, and improving the complexity of $k$-class multi-calibration by an exponential factor of $k$ versus Gopalan et al.. Beyond multi-calibration, we use these game dynamics to address emerging considerations in the study of group fairness and multi-distribution learning.", "keywords": "multicalibration;multi-objective learning;learning theory;calibration;fairness;games", "primary_area": "", "supplementary_material": "/attachment/133400983e6f6d1d0d8ec8290e993ef2f8fe9b81.zip", "author": "Nika Haghtalab;Michael Jordan;Eric Zhao", "authorids": "~Nika_Haghtalab2;~Michael_Jordan1;~Eric_Zhao1", "gender": "F;M;M", "homepage": "https://people.eecs.berkeley.edu/~nika/;http://www.cs.berkeley.edu/~jordan/;https://eric-zhao.com", "dblp": ";j/MichaelIJordan;294/8327.html", "google_scholar": ";https://scholar.google.com.tw/citations?user=yxUduqMAAAAJ;6OfjaHQAAAAJ", "orcid": ";0000-0001-8935-817X;", "linkedin": ";;", "or_profile": "~Nika_Haghtalab2;~Michael_Jordan1;~Eric_Zhao1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu", "position": "Assistant Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nhaghtalab2023a,\ntitle={A Unifying Perspective on Multi-Calibration: Game Dynamics for Multi-Objective Learning},\nauthor={Nika Haghtalab and Michael Jordan and Eric Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ysqlhW0v26}\n}", "github": "", "project": "", "reviewers": "rNCE;vjmg;pd5T;ymik;Mwev", "pdf_size": 1880032, "rating": "4;5;6;6;7", "confidence": "3;2;4;3;1", "soundness": "2;3;3;4;4", "novelty": "3;3;3;3;3", "presentation": "3;3;2;4;3", "wc_summary": "224;52;113;130;34", "wc_strengths": "209;19;139;105;49", "wc_weaknesses": "234;89;145;164;57", "wc_questions": "210;98;4;275;2", "wc_limitations": "116;12;1;20;1", "wc_review": "993;270;402;694;143", "wc_reply_reviewers": "0;50;46;33;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 2.6, 1.019803902718557 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 110.6, 67.14640720098134 ], "wc_strengths_avg": [ 104.2, 67.08323188398127 ], "wc_weaknesses_avg": [ 137.8, 61.48625862743642 ], "wc_questions_avg": [ 117.8, 109.51237373009499 ], "wc_limitations_avg": [ 30.0, 43.59357750861932 ], "wc_review_avg": [ 500.4, 306.8019556652141 ], "wc_reply_reviewers_avg": [ 25.8, 21.802752119858628 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.34615384615384615, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14728211378567943884&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Expressivity-Preserving GNN Simulation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69926", "id": "ytTfonl9Wd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ebf95a6f3c575322da15d4fd0fc2b3c8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ytTfonl9Wd", "openreview": "https://openreview.net/forum?id=ytTfonl9Wd", "poster": "/media/PosterPDFs/NeurIPS%202023/69926.png?t=1702162082.6532714", "slides": "https://nips.cc/virtual/2023/poster/69926", "video": "https://nips.cc/virtual/2023/poster/69926", "author_site": "Fabian Jogl, Maximilian Thiessen, Thomas G\u00e4rtner", "tldr": "", "abstract": "We systematically investigate graph transformations that enable standard message passing to simulate state-of-the-art graph neural networks (GNNs) without loss of expressivity. Using these, many state-of-the-art GNNs can be implemented with message passing operations from standard libraries, eliminating many sources of implementation issues and allowing for better code optimization. We distinguish between weak and strong simulation: weak simulation achieves the same expressivity only after several message passing steps while strong simulation achieves this after every message passing step. Our contribution leads to a direct way to translate common operations of non-standard GNNs to graph transformations that allow for strong or weak simulation. Our empirical evaluation shows competitive predictive performance of message passing on transformed graphs for various molecular benchmark datasets, in several cases surpassing the original GNNs.", "keywords": "Graph Neural Networks;GNNs;Graphs;Message Passing;Expressiveness;Graph Transformations;Message Passing Graph Neural Networks", "primary_area": "", "supplementary_material": "", "author": "Fabian Jogl;Maximilian Thiessen;Thomas G\u00e4rtner", "authorids": "~Fabian_Jogl1;~Maximilian_Thiessen1;~Thomas_G\u00e4rtner2", "gender": "M;;M", "homepage": "https://fjo.gl/;https://maxthiessen.github.io;https://thomasgaertner.org/", "dblp": "292/7003;https://dblp.uni-trier.de/pid/274/6633;https://dblp.uni-trier.de/pers/hd/g/G=auml=rtner_0001:Thomas", "google_scholar": ";https://scholar.google.de/citations?user=XO5rGcwAAAAJ;sOI8QyoAAAAJ", "orcid": ";0000-0001-9333-2685;0000-0001-5985-9213", "linkedin": ";maximilian-thiessen/;", "or_profile": "~Fabian_Jogl1;~Maximilian_Thiessen1;~Thomas_G\u00e4rtner2", "aff": "TU Wien;TU Wien;TU Wien", "aff_domain": "tuwien.ac.at;tuwien.ac.at;tuwien.ac.at", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\njogl2023expressivitypreserving,\ntitle={Expressivity-Preserving {GNN} Simulation},\nauthor={Fabian Jogl and Maximilian Thiessen and Thomas G{\\\"a}rtner},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ytTfonl9Wd}\n}", "github": "", "project": "", "reviewers": "sDHK;5K9U;YhPG;r6Zx;GmCj;sQdR", "pdf_size": 586789, "rating": "4;6;6;7;7;8", "confidence": "4;4;3;3;2;3", "soundness": "3;3;4;4;3;4", "novelty": "2;2;3;3;3;4", "presentation": "3;2;3;4;3;3", "wc_summary": "162;48;58;108;29;124", "wc_strengths": "52;45;64;125;17;130", "wc_weaknesses": "101;15;125;66;20;78", "wc_questions": "49;224;15;22;14;152", "wc_limitations": "27;1;1;10;5;4", "wc_review": "391;333;263;331;85;488", "wc_reply_reviewers": "50;109;0;18;0;40", "wc_reply_authors": "0;18;0;0;0;0", "reply_reviewers": "1;1;0;1;0;1", "reply_authors": "1;2;1;1;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.1666666666666665, 0.6871842709362768 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.8333333333333335, 0.6871842709362768 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 88.16666666666667, 46.8202828792062 ], "wc_strengths_avg": [ 72.16666666666667, 41.61496792688366 ], "wc_weaknesses_avg": [ 67.5, 39.90300740545755 ], "wc_questions_avg": [ 79.33333333333333, 80.43976352249896 ], "wc_limitations_avg": [ 8.0, 9.018499505645789 ], "wc_review_avg": [ 315.1666666666667, 123.75300220825171 ], "wc_reply_reviewers_avg": [ 36.166666666666664, 37.542938380236336 ], "wc_reply_authors_avg": [ 3.0, 6.708203932499369 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6482037235521646, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13242092161765957646&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "tuwien.ac.at;tuwien.ac.at;tuwien.ac.at", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Wien", "aff_unique_dep": "", "aff_unique_url": "https://www.tuwien.ac.at", "aff_unique_abbr": "TU Wien", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Austria" }, { "title": "Epidemic Learning: Boosting Decentralized Learning with Randomized Communication", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69925", "id": "ytrhsvGP0r", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7172e147d916eef4cb1eb30016ce725f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ytrhsvGP0r", "openreview": "https://openreview.net/forum?id=ytrhsvGP0r", "poster": "/media/PosterPDFs/NeurIPS%202023/69925.png?t=1702225711.6798525", "slides": "https://nips.cc/virtual/2023/poster/69925", "video": "https://nips.cc/virtual/2023/poster/69925", "author_site": "Martijn De Vos, Sadegh Farhadkhani, Rachid Guerraoui, Anne-marie Kermarrec, Rafael Pires, Rishi Sharma", "tldr": "", "abstract": "We present Epidemic Learning (EL), a simple yet powerful decentralized learning (DL) algorithm that leverages changing communication topologies to achieve faster model convergence compared to conventional DL approaches. At each round of EL, each node sends its model updates to a random sample of $s$ other nodes (in a system of $n$ nodes). We provide an extensive theoretical analysis of EL, demonstrating that its changing topology culminates in superior convergence properties compared to the state-of-the-art (static and dynamic) topologies. Considering smooth non-convex loss functions, the number of transient iterations for EL, i.e., the rounds required to achieve asymptotic linear speedup, is in $O(n^3/s^2)$ which outperforms the best-known bound $O(n^3)$ by a factor of $s^2$, indicating the benefit of randomized communication for DL. We empirically evaluate EL in a 96-node network and compare its performance with state-of-the-art DL approaches. Our results illustrate that EL converges up to $ 1.7\\times$ quicker than baseline DL algorithms and attains $2.2 $\\% higher accuracy for the same communication volume.", "keywords": "Epidemic;Decentralized Learning;Randomized Communication;Peer sampling", "primary_area": "", "supplementary_material": "/attachment/71dbd71d7afd1b2e3a355d61e8b717636ae7daa3.zip", "author": "Martijn De Vos;Sadegh Farhadkhani;Rachid Guerraoui;Anne-marie Kermarrec;Rafael Pires;Rishi Sharma", "authorids": "~Martijn_De_Vos1;~Sadegh_Farhadkhani1;~Rachid_Guerraoui1;~Anne-marie_Kermarrec2;~Rafael_Pires1;~Rishi_Sharma2", "gender": ";M;M;F;M;M", "homepage": "https://devos50.github.io;https://sadeghfarhadkhani.github.io/;https://lpdwww.epfl.ch/rachid/;https://people.epfl.ch/anne-marie.kermarrec;https://pires.tech/;https://rishisharma.netlify.app/", "dblp": "137/4243;281/6141;g/RachidGuerraoui;86/676.html;189/6914;158/4544-1", "google_scholar": ";X4axFjgAAAAJ;;https://scholar.google.it/citations?user=aIAy-qcAAAAJ;https://scholar.google.ch/citations?user=EegvylkAAAAJ;jUfDXOsAAAAJ", "orcid": ";;;0000-0001-8187-724X;0000-0002-7826-1599;0000-0002-1928-1549", "linkedin": ";;;;rafaelppires/?locale=en_US;rishi-s8/", "or_profile": "~Martijn_De_Vos1;~Sadegh_Farhadkhani1;~Rachid_Guerraoui1;~Anne-marie_Kermarrec2;~Rafael_Pires1;~Rishi_Sharma2", "aff": "Delft University of Technology;EPFL;;School of Computer and Communication Sciences, EPFL - EPF Lausanne;EPFL - EPF Lausanne;EPFL - EPF Lausanne", "aff_domain": "tudelft.nl;epfl.ch;;ic.epfl.ch;epfl.ch;epfl.ch", "position": "Postdoc;PhD student;;Full Professor;Postdoc;PhD student", "bibtex": "@inproceedings{\nvos2023epidemic,\ntitle={Epidemic Learning: Boosting Decentralized Learning with Randomized Communication},\nauthor={Martijn De Vos and Sadegh Farhadkhani and Rachid Guerraoui and Anne-marie Kermarrec and Rafael Pires and Rishi Sharma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ytrhsvGP0r}\n}", "github": "", "project": "", "reviewers": "fwhW;VxtD;iaf3;f2yu;Grsp", "pdf_size": 853169, "rating": "5;5;5;6;7", "confidence": "3;3;3;1;3", "soundness": "2;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "55;96;93;82;94", "wc_strengths": "28;39;42;37;56", "wc_weaknesses": "250;425;66;25;139", "wc_questions": "3;9;61;300;58", "wc_limitations": "5;12;2;1;4", "wc_review": "341;581;264;445;351", "wc_reply_reviewers": "45;103;34;54;35", "wc_reply_authors": "0;252;298;0;37", "reply_reviewers": "1;1;2;1;1", "reply_authors": "1;2;3;1;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 2.6, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 84.0, 15.297058540778355 ], "wc_strengths_avg": [ 40.4, 9.09065454189081 ], "wc_weaknesses_avg": [ 181.0, 143.95971658766211 ], "wc_questions_avg": [ 86.2, 109.56532298131559 ], "wc_limitations_avg": [ 4.8, 3.867815921162743 ], "wc_review_avg": [ 396.4, 108.71908756055673 ], "wc_reply_reviewers_avg": [ 54.2, 25.466841186138492 ], "wc_reply_authors_avg": [ 117.4, 130.20230412707758 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.24999999999999997, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1274432252746081331&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "tudelft.nl;epfl.ch;;ic.epfl.ch;epfl.ch;epfl.ch", "author_num": 6, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Delft University of Technology;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.tudelft.nl;https://www.epfl.ch", "aff_unique_abbr": "TU Delft;EPFL", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Netherlands;Switzerland" }, { "title": "Canonical normalizing flows for manifold learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69924", "id": "yubwSWol6K", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/572a6f16ec44f794fb3e0f8a310acbc6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yubwSWol6K", "openreview": "https://openreview.net/forum?id=yubwSWol6K", "poster": "/media/PosterPDFs/NeurIPS%202023/69924.png?t=1701525821.4499316", "slides": "https://nips.cc/virtual/2023/poster/69924", "video": "https://nips.cc/virtual/2023/poster/69924", "author_site": "Kyriakos Flouris, Ender Konukoglu", "tldr": "", "abstract": "Manifold learning flows are a class of generative modelling techniques that assume a low-dimensional manifold description of the data. The embedding of such a manifold into the high-dimensional space of the data is achieved via learnable invertible transformations. Therefore, once the manifold is properly aligned via a reconstruction loss, the probability density is tractable on the manifold and maximum likelihood can be used to optimize the network parameters. Naturally, the lower-dimensional representation of the data requires an injective-mapping. Recent approaches were able to enforce that the density aligns with the modelled manifold, while efficiently calculating the density volume-change term when embedding to the higher-dimensional space. However, unless the injective-mapping is analytically predefined, the learned manifold is not necessarily an \\emph{efficient representation} of the data. Namely, the latent dimensions of such models frequently learn an entangled intrinsic basis, with degenerate information being stored in each dimension. Alternatively, if a locally orthogonal and/or sparse basis is to be learned, here coined canonical intrinsic basis, it can serve in learning a more compact latent space representation. Toward this end, we propose a canonical manifold learning flow method, where a novel optimization objective enforces the transformation matrix to have few prominent and non-degenerate basis functions. We demonstrate that by minimizing the off-diagonal manifold metric elements $\\ell_1$-norm, we can achieve such a basis, which is simultaneously sparse and/or orthogonal. Canonical manifold flow yields a more efficient use of the latent space, automatically generating fewer prominent and distinct dimensions to represent data, and consequently a better approximation of target distributions than other manifold flow methods in most experiments we conducted, resulting in lower FID scores.", "keywords": "manifold learning flows;normalizing flows;optimization;orthogonalization;sparsity;sparse learning;generative modeling;Riemannian manifold;geometry;metric tensor;orthogonal basis", "primary_area": "", "supplementary_material": "/attachment/c47a3027888301650c6db425381eea0df44e2ed2.zip", "author": "Kyriakos Flouris;Ender Konukoglu", "authorids": "~Kyriakos_Flouris1;~Ender_Konukoglu1", "gender": "Non-Binary;", "homepage": "https://k-flouris.github.io/;http://www.vision.ee.ethz.ch/~kender", "dblp": ";45/7041", "google_scholar": "JUKtIdkAAAAJ;https://scholar.google.ch/citations?user=OeEMrhQAAAAJ", "orcid": "0000-0001-7952-1922;", "linkedin": "kyriakosflouris123/;", "or_profile": "~Kyriakos_Flouris1;~Ender_Konukoglu1", "aff": "Swiss Federal Institute of Technology;ETHZ - ETH Zurich", "aff_domain": "ethz.ch;ethz.ch", "position": "Postdoc;Associate Professor", "bibtex": "@inproceedings{\nflouris2023canonical,\ntitle={Canonical normalizing flows for manifold learning},\nauthor={Kyriakos Flouris and Ender Konukoglu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yubwSWol6K}\n}", "github": "", "project": "", "reviewers": "8w9Q;tk9x;iKjw;7TiS", "pdf_size": 2576812, "rating": "5;5;6;7", "confidence": "3;4;3;4", "soundness": "3;2;2;4", "novelty": "3;2;3;3", "presentation": "3;1;2;4", "wc_summary": "87;48;186;107", "wc_strengths": "32;12;27;92", "wc_weaknesses": "89;157;558;81", "wc_questions": "58;85;348;140", "wc_limitations": "32;1;26;30", "wc_review": "298;303;1145;450", "wc_reply_reviewers": "21;86;218;38", "wc_reply_authors": "13;18;372;11", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 107.0, 50.30407538162291 ], "wc_strengths_avg": [ 40.75, 30.49077729412617 ], "wc_weaknesses_avg": [ 221.25, 196.65245358245596 ], "wc_questions_avg": [ 157.75, 113.74615378112792 ], "wc_limitations_avg": [ 22.25, 12.457427503300993 ], "wc_review_avg": [ 549.0, 349.4760363744559 ], "wc_reply_reviewers_avg": [ 90.75, 77.23786312424755 ], "wc_reply_authors_avg": [ 103.5, 155.0395110931404 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18300548749782161528&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "ethz.ch;ethz.ch", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Swiss Federal Institute of Technology;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Would I have gotten that reward? Long-term credit assignment by counterfactual contribution analysis", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69923", "id": "yvqqkOn9Pi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d8bd445c2abe1343cce0e14b361b2fb3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yvqqkOn9Pi", "openreview": "https://openreview.net/forum?id=yvqqkOn9Pi", "poster": "/media/PosterPDFs/NeurIPS%202023/69923.png?t=1702056992.4346392", "slides": "https://nips.cc/virtual/2023/poster/69923", "video": "https://nips.cc/virtual/2023/poster/69923", "author_site": "Alexander Meulemans, Simon Schug, Seijin Kobayashi, nathaniel daw, Gregory Wayne", "tldr": "", "abstract": "To make reinforcement learning more sample efficient, we need better credit assignment methods that measure an action\u2019s influence on future rewards. Building upon Hindsight Credit Assignment (HCA), we introduce Counterfactual Contribution Analysis (COCOA), a new family of model-based credit assignment algorithms. Our algorithms achieve precise credit assignment by measuring the contribution of actions upon obtaining subsequent rewards, by quantifying a counterfactual query: \u2018Would the agent still have reached this reward if it had taken another action?\u2019. We show that measuring contributions w.r.t. rewarding _states_, as is done in HCA, results in spurious estimates of contributions, causing HCA to degrade towards the high-variance REINFORCE estimator in many relevant environments. Instead, we measure contributions w.r.t. rewards or learned representations of the rewarding objects, resulting in gradient estimates with lower variance. We run experiments on a suite of problems specifically designed to evaluate long-term credit assignment capabilities. By using dynamic programming, we measure ground-truth policy gradients and show that the improved performance of our new model-based credit assignment methods is due to lower bias and variance compared to HCA and common baselines. Our results demonstrate how modeling action contributions towards rewarding outcomes can be leveraged for credit assignment, opening a new path towards sample-efficient reinforcement learning.", "keywords": "Reinforcement learning;Long-term credit assignment;contribution analysis;hindsight credit assignment;policy gradient methods", "primary_area": "", "supplementary_material": "/attachment/5585ba9f90866284630f68021fa48ada9dfd20e1.zip", "author": "Alexander Meulemans;Simon Schug;Seijin Kobayashi;Nathaniel Daw;Greg Wayne", "authorids": "~Alexander_Meulemans1;~Simon_Schug1;~Seijin_Kobayashi1;~Nathaniel_Daw1;~Greg_Wayne1", "gender": "M;;;M;M", "homepage": "http://alexandermeulemans.com/;https://smn.one/;;https://www.princeton.edu/~ndaw/;https://columbia.academia.edu/GregWayne", "dblp": "267/9546;262/5139;;38/929;150/8612", "google_scholar": "https://scholar.google.ch/citations?user=nnMccw4AAAAJ;T-ZAeg0AAAAJ;;BxlScrEAAAAJ;", "orcid": ";0000-0002-5305-2547;;0000-0001-5029-1430;", "linkedin": "alexander-meulemans-72589b146/;;;;", "or_profile": "~Alexander_Meulemans1;~Simon_Schug1;~Seijin_Kobayashi1;~Nathaniel_Daw1;~Greg_Wayne1", "aff": "Swiss Federal Institute of Technology;Swiss Federal Institute of Technology;;Google DeepMind;Google DeepMind", "aff_domain": "ethz.ch;ethz.ch;;deepmind.edu;deepmind.com", "position": "PhD student;PhD student;;Researcher;Researcher", "bibtex": "@inproceedings{\nmeulemans2023would,\ntitle={Would I have gotten that reward? Long-term credit assignment by counterfactual contribution analysis},\nauthor={Alexander Meulemans and Simon Schug and Seijin Kobayashi and Nathaniel Daw and Greg Wayne},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yvqqkOn9Pi}\n}", "github": "", "project": "", "reviewers": "nRru;5aMg;Embf;r14e;UxzB;GAJb;V1Ut", "pdf_size": 2040742, "rating": "4;6;6;6;7;7;8", "confidence": "4;4;4;1;4;3;4", "soundness": "4;2;3;3;3;3;3", "novelty": "2;3;3;3;3;3;4", "presentation": "3;2;3;4;2;3;3", "wc_summary": "70;166;128;22;113;70;73", "wc_strengths": "18;110;51;23;126;82;38", "wc_weaknesses": "29;333;151;15;693;18;53", "wc_questions": "85;44;88;11;29;73;48", "wc_limitations": "1;14;11;4;37;1;28", "wc_review": "203;667;429;75;998;244;240", "wc_reply_reviewers": "0;98;31;0;23;0;37", "wc_reply_authors": "0;0;0;0;0;0;0", "reply_reviewers": "0;1;1;0;1;0;1", "reply_authors": "1;1;1;1;1;1;1", "rating_avg": [ 6.285714285714286, 1.1605769149479945 ], "confidence_avg": [ 3.4285714285714284, 1.049781318335648 ], "soundness_avg": [ 3.0, 0.5345224838248488 ], "novelty_avg": [ 3.0, 0.5345224838248488 ], "presentation_avg": [ 2.857142857142857, 0.6388765649999398 ], "wc_summary_avg": [ 91.71428571428571, 43.82013329090469 ], "wc_strengths_avg": [ 64.0, 39.50768460511376 ], "wc_weaknesses_avg": [ 184.57142857142858, 232.89228481538544 ], "wc_questions_avg": [ 54.0, 26.960288256195312 ], "wc_limitations_avg": [ 13.714285714285714, 12.936264483053451 ], "wc_review_avg": [ 408.0, 298.4857019786949 ], "wc_reply_reviewers_avg": [ 27.0, 32.4257393351111 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5714285714285714, 0.4948716593053935 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.016750630254320217, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2027285396492216678&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "ethz.ch;ethz.ch;;deepmind.edu;deepmind.com", "author_num": 5, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Swiss Federal Institute of Technology;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.ethz.ch;https://deepmind.com", "aff_unique_abbr": "ETH Zurich;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "Switzerland;United Kingdom" }, { "title": "Computing Optimal Equilibria and Mechanisms via Learning in Zero-Sum Extensive-Form Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69922", "id": "yw1v4RqvPk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/07be1a0850e58ca29e2b6ce31fc0c791-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yw1v4RqvPk", "openreview": "https://openreview.net/forum?id=yw1v4RqvPk", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69922", "video": "https://nips.cc/virtual/2023/poster/69922", "author_site": "Brian Zhang, Gabriele Farina, Ioannis Anagnostides, Federico Cacciamani, Stephen McAleer, Andreas Haupt, Andrea Celli, Nicola Gatti, Vincent Conitzer, Tuomas Sandholm", "tldr": "", "abstract": "We introduce a new approach for computing optimal equilibria via learning in games. It applies to extensive-form settings with any number of players, including mechanism design, information design, and solution concepts such as correlated, communication, and certification equilibria. We observe that optimal equilibria are minimax equilibrium strategies of a player in an extensive-form zero-sum game. This reformulation allows to apply techniques for learning in zero-sum games, yielding the first learning dynamics that converge to optimal equilibria, not only in empirical averages, but also in iterates. We demonstrate the practical scalability and flexibility of our approach by attaining state-of-the-art performance in benchmark tabular games, and by computing an optimal mechanism for a sequential auction design problem using deep reinforcement learning.", "keywords": "extensive-form games;deep reinforcement learning;mechanism design;correlated equilibria", "primary_area": "", "supplementary_material": "", "author": "Brian Hu Zhang;Gabriele Farina;Ioannis Anagnostides;Federico Cacciamani;Stephen Marcus McAleer;Andreas Alexander Haupt;Andrea Celli;Nicola Gatti;Vincent Conitzer;Tuomas Sandholm", "authorids": "~Brian_Hu_Zhang1;~Gabriele_Farina1;~Ioannis_Anagnostides1;~Federico_Cacciamani1;~Stephen_Marcus_McAleer1;~Andreas_Alexander_Haupt1;~Andrea_Celli1;~Nicola_Gatti1;~Vincent_Conitzer2;~Tuomas_Sandholm1", "gender": ";M;M;M;M;M;M;M;M;M", "homepage": ";http://www.cs.cmu.edu/~gfarina/about/;;;https://www.andrew.cmu.edu/user/smcaleer/;https://www.andyhaupt.com/;https://andcelli.github.io/;https://www4.ceda.polimi.it/manifesti/manifesti/controller/ricerche/RicercaPerDocentiPublic.do?k_doc=75785&lang=EN&EVN_PRODOTTI=evento&__pj0=0&__pj1=d918ee8916afbd0005f5c0bc3c0ff350;https://www.cs.cmu.edu/~conitzer/;http://www.cs.cmu.edu/~sandholm", "dblp": "213/8211;;273/7648;285/5552;;158/5197.html;190/7301.html;g/NicolaGatti;c/VincentConitzer;s/TuomasSandholm", "google_scholar": ";sktDNcEAAAAJ;QVwDo_sAAAAJ;ntSIxxMAAAAJ;iEFL4-YAAAAJ;O6NknDYAAAAJ;9wQscqEAAAAJ;https://scholar.google.com.tw/citations?user=j-HrYREAAAAJ;juRk4lQAAAAJ;0DpK1EMAAAAJ", "orcid": ";;;;;0000-0002-2952-4188;;0000-0001-7349-3932;0000-0003-1899-7884;", "linkedin": ";;;;stephen-mcaleer/;indraos/;;nicola-gatti-1284b21;vincent-conitzer-2563082/;", "or_profile": "~Brian_Hu_Zhang1;~Gabriele_Farina1;~Ioannis_Anagnostides1;~Federico_Cacciamani1;~Stephen_Marcus_McAleer1;~Andreas_Alexander_Haupt1;~Andrea_Celli1;~Nicola_Gatti1;~Vincent_Conitzer2;~Tuomas_Sandholm1", "aff": "Carnegie Mellon University;FAIR, Meta AI;Carnegie Mellon University;Politecnico di Milano;Carnegie Mellon University;Massachusetts Institute of Technology;Bocconi University;Polytechnic Institute of Milan;University of Oxford;Carnegie Mellon University", "aff_domain": "cmu.edu;meta.com;cmu.edu;polimi.it;cmu.edu;mit.edu;unibocconi.it;polimi.it;oxford.ac.uk;cmu.edu", "position": "PhD student;Researcher;PhD student;PhD student;Postdoc;PhD student;Assistant Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023computing,\ntitle={Computing Optimal Equilibria and Mechanisms via Learning in Zero-Sum Extensive-Form Games},\nauthor={Brian Hu Zhang and Gabriele Farina and Ioannis Anagnostides and Federico Cacciamani and Stephen Marcus McAleer and Andreas Alexander Haupt and Andrea Celli and Nicola Gatti and Vincent Conitzer and Tuomas Sandholm},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yw1v4RqvPk}\n}", "github": "", "project": "", "reviewers": "c3NQ;JN1n;dPXL;dGJ5", "pdf_size": 661826, "rating": "5;6;6;7", "confidence": "2;4;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "84;122;68;47", "wc_strengths": "47;132;41;111", "wc_weaknesses": "139;64;89;94", "wc_questions": "18;66;25;173", "wc_limitations": "9;22;1;15", "wc_review": "297;406;224;440", "wc_reply_reviewers": "0;6;5;68", "wc_reply_authors": "0;0;0;13", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 80.25, 27.444261695297982 ], "wc_strengths_avg": [ 82.75, 39.511865306512675 ], "wc_weaknesses_avg": [ 96.5, 27.04163456597992 ], "wc_questions_avg": [ 70.5, 61.953611678416294 ], "wc_limitations_avg": [ 11.75, 7.725768570181222 ], "wc_review_avg": [ 341.75, 86.09406193228428 ], "wc_reply_reviewers_avg": [ 19.75, 27.949731662397046 ], "wc_reply_authors_avg": [ 3.25, 5.629165124598851 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2856527964106710786&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "cmu.edu;meta.com;cmu.edu;polimi.it;cmu.edu;mit.edu;unibocconi.it;polimi.it;oxford.ac.uk;cmu.edu", "author_num": 10, "aff_unique_index": "0;1;0;2;0;3;4;5;6;0", "aff_unique_norm": "Carnegie Mellon University;Meta;Politecnico di Milano;Massachusetts Institute of Technology;Bocconi University;Polytechnic Institute of Milan;University of Oxford", "aff_unique_dep": ";Meta AI;;;;;", "aff_unique_url": "https://www.cmu.edu;https://meta.ai;https://www.polimi.it;https://web.mit.edu;https://www.bocconi.edu;https://www.polimi.it/;https://www.ox.ac.uk", "aff_unique_abbr": "CMU;Meta AI;Polimi;MIT;Bocconi;Politecnico di Milano;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;1;1;2;0", "aff_country_unique": "United States;Italy;United Kingdom" }, { "title": "Revisiting Adversarial Robustness Distillation from the Perspective of Robust Fairness", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69921", "id": "ywrPcBEXdC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6111371a868af8dcfba0f96ad9e25ae3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ywrPcBEXdC", "openreview": "https://openreview.net/forum?id=ywrPcBEXdC", "poster": "/media/PosterPDFs/NeurIPS%202023/69921.png?t=1701418261.6305044", "slides": "https://nips.cc/virtual/2023/poster/69921", "video": "https://nips.cc/virtual/2023/poster/69921", "author_site": "Xinli Yue, Mou Ningping, Qian Wang, Lingchen Zhao", "tldr": "", "abstract": "Adversarial Robustness Distillation (ARD) aims to transfer the robustness of large teacher models to small student models, facilitating the attainment of robust performance on resource-limited devices. However, existing research on ARD primarily focuses on the overall robustness of student models, overlooking the crucial aspect of $\\textit{robust fairness}$. Specifically, these models may demonstrate strong robustness on some classes of data while exhibiting high vulnerability on other classes. Unfortunately, the \"buckets effect\" implies that the robustness of the deployed model depends on the classes with the lowest level of robustness. In this paper, we first investigate the inheritance of robust fairness during ARD and reveal that student models only partially inherit robust fairness from teacher models. We further validate this issue through fine-grained experiments with various model capacities and find that it may arise due to the gap in capacity between teacher and student models, as well as the existing methods treating each class equally during distillation. Based on these observations, we propose $\\textbf{Fair}$ $\\textbf{A}$dversarial $\\textbf{R}$obustness $\\textbf{D}$istillation (Fair-ARD), a novel framework for enhancing the robust fairness of student models by increasing the weights of difficult classes, and design a geometric perspective-based method to quantify the difficulty of different classes for determining the weights. Extensive experiments show that Fair-ARD surpasses both state-of-the-art ARD methods and existing robust fairness algorithms in terms of robust fairness (e.g., the worst-class robustness under AutoAttack is improved by at most 12.3\\% and 5.3\\% using ResNet18 on CIFAR10, respectively), while also slightly improving overall robustness. Our code is available at: [https://github.com/NISP-official/Fair-ARD](https://github.com/NISP-official/Fair-ARD).", "keywords": "Deep Learning;Knowledge Distillation;Adversarial Training;Fairness", "primary_area": "", "supplementary_material": "/attachment/98d9e2ad00af84e4ee0208146454a66e44ec8917.pdf", "author": "Xinli Yue;Ningping Mou;Qian Wang;Lingchen Zhao", "authorids": "~Xinli_Yue1;~Ningping_Mou1;~Qian_Wang13;~Lingchen_Zhao1", "gender": "M;M;;", "homepage": "https://xinliyue.github.io/;;;", "dblp": "369/7185;325/1551;;", "google_scholar": "mQ8pPpIAAAAJ;0M-v0ncAAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xinli_Yue1;~Ningping_Mou1;~Qian_Wang13;~Lingchen_Zhao1", "aff": "Wuhan University;Wuhan University;;", "aff_domain": "whu.edu.cn;whu.edu.cn;;", "position": "MS student;PhD student;;", "bibtex": "@inproceedings{\nyue2023revisiting,\ntitle={Revisiting Adversarial Robustness Distillation from the Perspective of Robust Fairness},\nauthor={Xinli Yue and Ningping Mou and Qian Wang and Lingchen Zhao},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ywrPcBEXdC}\n}", "github": "", "project": "", "reviewers": "5xrn;3Z9v;Mqsm;DXyp;qqxE", "pdf_size": 455216, "rating": "5;6;6;7;7", "confidence": "3;2;4;4;4", "soundness": "2;3;3;3;4", "novelty": "2;3;2;3;4", "presentation": "2;3;3;3;4", "wc_summary": "71;45;139;124;154", "wc_strengths": "35;36;42;73;102", "wc_weaknesses": "270;159;185;111;86", "wc_questions": "2;124;126;52;5", "wc_limitations": "2;1;1;5;25", "wc_review": "380;365;493;365;372", "wc_reply_reviewers": "10;0;23;23;13", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 106.6, 41.62018740947715 ], "wc_strengths_avg": [ 57.6, 26.188547115103578 ], "wc_weaknesses_avg": [ 162.2, 64.15418926305593 ], "wc_questions_avg": [ 61.8, 54.568855586314065 ], "wc_limitations_avg": [ 6.8, 9.21737489744233 ], "wc_review_avg": [ 395.0, 49.31125632145261 ], "wc_reply_reviewers_avg": [ 13.8, 8.657944328765344 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5345224838248488, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10602432547502333240&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "whu.edu.cn;whu.edu.cn;;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "What Distributions are Robust to Indiscriminate Poisoning Attacks for Linear Learners?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69920", "id": "yyLFUPNEiT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6e2986deda273d8fb903342841fcc4dc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yyLFUPNEiT", "openreview": "https://openreview.net/forum?id=yyLFUPNEiT", "poster": "/media/PosterPDFs/NeurIPS%202023/69920.png?t=1701753830.1954079", "slides": "https://nips.cc/virtual/2023/poster/69920", "video": "https://nips.cc/virtual/2023/poster/69920", "author_site": "Fnu Suya, Xiao Zhang, Yuan Tian, David Evans", "tldr": "", "abstract": "We study indiscriminate poisoning for linear learners where an adversary injects a few crafted examples into the training data with the goal of forcing the induced model to incur higher test error. Inspired by the observation that linear learners on some datasets are able to resist the best known attacks even without any defenses, we further investigate whether datasets can be inherently robust to indiscriminate poisoning attacks for linear learners. For theoretical Gaussian distributions, we rigorously characterize the behavior of an optimal poisoning attack, defined as the poisoning strategy that attains the maximum risk of the induced model at a given poisoning budget. Our results prove that linear learners can indeed be robust to indiscriminate poisoning if the class-wise data distributions are well-separated with low variance and the size of the constraint set containing all permissible poisoning points is also small. These findings largely explain the drastic variation in empirical attack performance of the state-of-the-art poisoning attacks on linear learners across benchmark datasets, making an important initial step towards understanding the underlying reasons some learning tasks are vulnerable to data poisoning attacks.", "keywords": "poisoning attacks; adversarial machine learning; machine learning security", "primary_area": "", "supplementary_material": "/attachment/74acffe84a6239baa7e037b63f21388626194a1c.zip", "author": "Fnu Suya;Xiao Zhang;Yuan Tian;David Evans", "authorids": "~Fnu_Suya1;~Xiao_Zhang2;~Yuan_Tian2;~David_Evans1", "gender": "M;M;F;Not Specified", "homepage": "https://fsuya.org;https://xiao-zhang.net;https://www.ytian.info/;https://www.cs.virginia.edu/evans/", "dblp": "211/7696;;;https://dblp.uni-trier.de/pid/e/DavidEvans", "google_scholar": "OmLIG8EAAAAJ;L-lz7CUAAAAJ;;DsR4PucAAAAJ", "orcid": ";0009-0008-1837-7670;;", "linkedin": ";;;", "or_profile": "~Fnu_Suya1;~Xiao_Zhang2;~Yuan_Tian2;~David_Evans1", "aff": "University of Virginia;CISPA Helmholtz Center for Information Security;University of Virginia;University of Virginia", "aff_domain": "virginia.edu;cispa.de;virginia.edu;virginia.edu", "position": "PhD student;Tenure-Track Faculty;Assistant Professor;Professor", "bibtex": "@inproceedings{\nsuya2023what,\ntitle={What Distributions are Robust to Indiscriminate Poisoning Attacks for Linear Learners?},\nauthor={Fnu Suya and Xiao Zhang and Yuan Tian and David Evans},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yyLFUPNEiT}\n}", "github": "", "project": "", "reviewers": "oRg1;Ya9d;5jje;J4a4", "pdf_size": 677066, "rating": "5;6;6;7", "confidence": "4;3;3;5", "soundness": "3;2;3;4", "novelty": "2;2;3;4", "presentation": "3;3;4;4", "wc_summary": "55;83;109;63", "wc_strengths": "75;122;107;88", "wc_weaknesses": "736;53;158;106", "wc_questions": "430;155;44;4", "wc_limitations": "34;1;31;4", "wc_review": "1330;414;449;265", "wc_reply_reviewers": "655;0;0;0", "wc_reply_authors": "2236;0;0;0", "reply_reviewers": "3;0;0;0", "reply_authors": "5;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 77.5, 20.850659461993043 ], "wc_strengths_avg": [ 98.0, 17.930421077041107 ], "wc_weaknesses_avg": [ 263.25, 275.45541835295234 ], "wc_questions_avg": [ 158.25, 166.36161666682614 ], "wc_limitations_avg": [ 17.5, 15.074813431681335 ], "wc_review_avg": [ 614.5, 418.8320068953661 ], "wc_reply_reviewers_avg": [ 163.75, 283.62331973940366 ], "wc_reply_authors_avg": [ 559.0, 968.2164014310024 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5237734532686152836&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "virginia.edu;cispa.de;virginia.edu;virginia.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Virginia;CISPA Helmholtz Center for Information Security", "aff_unique_dep": ";", "aff_unique_url": "https://www.virginia.edu;https://www.cispa.de/", "aff_unique_abbr": "UVA;CISPA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Germany" }, { "title": "SparseProp: Efficient Event-Based Simulation and Training of Sparse Recurrent Spiking Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69919", "id": "yzZbwQPkmP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0b443d358a391166d1fbf551fb53de02-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=yzZbwQPkmP", "openreview": "https://openreview.net/forum?id=yzZbwQPkmP", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69919", "video": "https://nips.cc/virtual/2023/poster/69919", "tldr": "", "abstract": "Spiking Neural Networks (SNNs) are biologically-inspired models that are capable of processing information in streams of action potentials. However, simulating and training SNNs is computationally expensive due to the need to solve large systems of coupled differential equations. In this paper, we propose a novel event-based algorithm called SparseProp for simulating and training sparse SNNs. Our algorithm reduces the computational cost of both forward pass and backward pass operations from O(N) to O(log(N)) per network spike, enabling numerically exact simulations of large spiking networks and their efficient training using backpropagation through time. By exploiting the sparsity of the network, SparseProp avoids iterating through all neurons at every spike and uses efficient state updates. We demonstrate the effectiveness of SparseProp for several classical integrate-and-fire neuron models, including simulating a sparse SNN with one million LIF neurons, which is sped up by more than four orders of magnitude compared to previous implementations. Our work provides an efficient and exact solution for training large-scale spiking neural networks and opens up new possibilities for building more sophisticated brain-inspired models.", "keywords": "spiking networks;event-based simulation;sparse networks;backpropagation;algorithm;neuroscience", "primary_area": "", "supplementary_material": "/attachment/b1c1f859e168e9e00f21879fd4cd1710dbbff939.pdf", "author": "Rainer Engelken", "authorids": "~Rainer_Engelken1", "gender": "M", "homepage": "https://ctn.zuckermaninstitute.columbia.edu/people/rainer-engelken", "dblp": "312/6447", "google_scholar": "HvZqeGQAAAAJ", "orcid": "0000-0001-7118-2129", "linkedin": "", "or_profile": "~Rainer_Engelken1", "aff": "Center for Theoretical Neuroscience, Columbia University", "aff_domain": "ctn.zuckermaninstitute.columbia.edu", "position": "Postdoc", "bibtex": "@inproceedings{\nengelken2023sparseprop,\ntitle={SparseProp: Efficient Event-Based Simulation and Training of Sparse Recurrent Spiking Neural Networks},\nauthor={Rainer Engelken},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=yzZbwQPkmP}\n}", "github": "", "project": "", "reviewers": "gRuk;XBiB;h5Gz;MMBd", "pdf_size": 601469, "rating": "4;4;7;9", "confidence": "3;3;4;5", "soundness": "1;2;3;4", "novelty": "3;2;4;4", "presentation": "3;2;3;4", "wc_summary": "74;88;73;87", "wc_strengths": "127;97;147;112", "wc_weaknesses": "242;255;302;87", "wc_questions": "68;72;31;43", "wc_limitations": "4;30;19;30", "wc_review": "515;542;572;359", "wc_reply_reviewers": "872;0;0;0", "wc_reply_authors": "584;164;0;0", "reply_reviewers": "2;0;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 2.1213203435596424 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 80.5, 7.0178344238090995 ], "wc_strengths_avg": [ 120.75, 18.498310733685926 ], "wc_weaknesses_avg": [ 221.5, 80.7975865976206 ], "wc_questions_avg": [ 53.5, 17.09532099727876 ], "wc_limitations_avg": [ 20.75, 10.662434056068061 ], "wc_review_avg": [ 497.0, 82.18576519081634 ], "wc_reply_reviewers_avg": [ 218.0, 377.58707605001524 ], "wc_reply_authors_avg": [ 187.0, 238.78651553217992 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.994936676326182, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17619847473926633535&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ctn.zuckermaninstitute.columbia.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "Center for Theoretical Neuroscience", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Pre-RMSNorm and Pre-CRMSNorm Transformers: Equivalent and Efficient Pre-LN Transformers", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69918", "id": "z06npyCwDq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8f1bacee31caf990a4f08d84f0ccb322-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=z06npyCwDq", "openreview": "https://openreview.net/forum?id=z06npyCwDq", "poster": "/media/PosterPDFs/NeurIPS%202023/69918.png?t=1702366331.7515495", "slides": "https://nips.cc/virtual/2023/poster/69918", "video": "https://nips.cc/virtual/2023/poster/69918", "author_site": "Zixuan Jiang, Jiaqi Gu, Hanqing Zhu, David Pan", "tldr": "", "abstract": "Transformers have achieved great success in machine learning applications.\nNormalization techniques, such as Layer Normalization (LayerNorm, LN) and Root Mean Square Normalization (RMSNorm), play a critical role in accelerating and stabilizing the training of Transformers.\nWhile LayerNorm recenters and rescales input vectors, RMSNorm only rescales the vectors by their RMS value.\nDespite being more computationally efficient, RMSNorm may compromise the representation ability of Transformers.\nThere is currently no consensus regarding the preferred normalization technique, as some models employ LayerNorm while others utilize RMSNorm, especially in recent large language models.\nIt is challenging to convert Transformers with one normalization to the other type.\nWhile there is an ongoing disagreement between the two normalization types,\nwe propose a solution to unify two mainstream Transformer architectures, Pre-LN and Pre-RMSNorm Transformers.\nBy removing the inherent redundant mean information in the main branch of Pre-LN Transformers, we can reduce LayerNorm to RMSNorm, achieving higher efficiency.\nWe further propose the Compressed RMSNorm (CRMSNorm) and Pre-CRMSNorm Transformer based on a lossless compression of the zero-mean vectors.\nWe formally establish the equivalence of Pre-LN, Pre-RMSNorm, and Pre-CRMSNorm Transformer variants in both training and inference.\nIt implies that Pre-LN Transformers can be substituted with Pre-(C)RMSNorm counterparts at almost no cost, offering the same arithmetic functionality along with free efficiency improvement.\nExperiments demonstrate that we can reduce the training and inference time of Pre-LN Transformers by 1% - 10%.", "keywords": "Transformer;Normalization;Layer Normalization;RMSNorm;Efficient Machine Learning", "primary_area": "", "supplementary_material": "/attachment/804e530f5a015ec5b467a0e0e83d195f006af499.zip", "author": "Zixuan Jiang;Jiaqi Gu;Hanqing Zhu;David Z. Pan", "authorids": "~Zixuan_Jiang1;~Jiaqi_Gu3;~Hanqing_Zhu1;~David_Z._Pan1", "gender": "M;M;M;M", "homepage": ";https://scopex-asu.github.io;https://zhuhanqing.github.io/;http://users.ece.utexas.edu/~dpan/", "dblp": "258/6469;;164/8690;p/DavidZhigangPan.html", "google_scholar": "8g6Q5PYAAAAJ;FeIV12MAAAAJ;myMcrNEAAAAJ;3aLlroEAAAAJ", "orcid": ";;;0000-0002-5705-2501", "linkedin": "utzixuanjiang/;;;davidzpan/", "or_profile": "~Zixuan_Jiang1;~Jiaqi_Gu3;~Hanqing_Zhu1;~David_Z._Pan1", "aff": "University of Texas, Austin;University of Texas, Austin;University of Texas, Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;PhD student;PhD student;Professor", "bibtex": "@inproceedings{\njiang2023prermsnorm,\ntitle={Pre-{RMSN}orm and Pre-{CRMSN}orm Transformers: Equivalent and Efficient Pre-{LN} Transformers},\nauthor={Zixuan Jiang and Jiaqi Gu and Hanqing Zhu and David Z. Pan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=z06npyCwDq}\n}", "github": "", "project": "", "reviewers": "j4qQ;NSno;d6Jr;AcCQ", "pdf_size": 789595, "rating": "5;7;7;7", "confidence": "5;3;4;3", "soundness": "3;1;4;3", "novelty": "3;3;4;3", "presentation": "3;3;4;3", "wc_summary": "70;147;113;95", "wc_strengths": "37;56;106;60", "wc_weaknesses": "225;320;159;73", "wc_questions": "20;458;1;58", "wc_limitations": "20;1;8;84", "wc_review": "372;982;387;370", "wc_reply_reviewers": "47;532;0;0", "wc_reply_authors": "45;1378;0;0", "reply_reviewers": "1;4;0;0", "reply_authors": "2;6;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 106.25, 28.047950014216724 ], "wc_strengths_avg": [ 64.75, 25.35128201886445 ], "wc_weaknesses_avg": [ 194.25, 90.41950840388373 ], "wc_questions_avg": [ 134.25, 188.0403879489723 ], "wc_limitations_avg": [ 28.25, 32.89661836724255 ], "wc_review_avg": [ 527.75, 262.3436439100441 ], "wc_reply_reviewers_avg": [ 144.75, 224.4007297225212 ], "wc_reply_authors_avg": [ 355.75, 590.4821652683509 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 2.5, 2.0615528128088303 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15686642715599911830&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "utexas.edu;utexas.edu;utexas.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Thin and deep Gaussian processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69917", "id": "z2BHMLA8pM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2aa212d6f40c1cb19b777e83db00ec6a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=z2BHMLA8pM", "openreview": "https://openreview.net/forum?id=z2BHMLA8pM", "poster": "/media/PosterPDFs/NeurIPS%202023/69917.png?t=1701101398.1904795", "slides": "https://nips.cc/virtual/2023/poster/69917", "video": "https://nips.cc/virtual/2023/poster/69917", "author_site": "Daniel Augusto de Souza, Alexander Nikitin, ST John, Magnus Ross, Mauricio A \u00c1lvarez, Marc Deisenroth, Jo\u00e3o Paulo Gomes, Diego Mesquita, C\u00e9sar Lincoln Mattos", "tldr": "", "abstract": "Gaussian processes (GPs) can provide a principled approach to uncertainty quantification with easy-to-interpret kernel hyperparameters, such as the lengthscale, which controls the correlation distance of function values.However, selecting an appropriate kernel can be challenging.\n\nDeep GPs avoid manual kernel engineering by successively parameterizing kernels with GP layers, allowing them to learn low-dimensional embeddings of the inputs that explain the output data.\nFollowing the architecture of deep neural networks, the most common deep GPs warp the input space layer-by-layer but lose all the interpretability of shallow GPs. An alternative construction is to successively parameterize the lengthscale of a kernel, improving the interpretability but ultimately giving away the notion of learning lower-dimensional embeddings. Unfortunately, both methods are susceptible to particular pathologies which may hinder fitting and limit their interpretability.\n\nThis work proposes a novel synthesis of both previous approaches: {Thin and Deep GP} (TDGP). Each TDGP layer defines locally linear transformations of the original input data maintaining the concept of latent embeddings while also retaining the interpretation of lengthscales of a kernel. Moreover, unlike the prior solutions, TDGP induces non-pathological manifolds that admit learning lower-dimensional representations.\n\nWe show with theoretical and experimental results that i) TDGP is, unlike previous models, tailored to specifically discover lower-dimensional manifolds in the input data, ii) TDGP behaves well when increasing the number of layers, and iii) TDGP performs well in standard benchmark datasets.", "keywords": "Gaussian Processes;Deep Gaussian Processes;non-stationary kernels", "primary_area": "", "supplementary_material": "/attachment/61eed27bcdf681415eb7a16510a8cc45b4701458.zip", "author": "Daniel Augusto de Souza;Alexander V Nikitin;S. T. John;Magnus Ross;Mauricio A \u00c1lvarez;Marc Peter Deisenroth;Jo\u00e3o Paulo Pordeus Gomes;Diego Mesquita;C\u00e9sar Lincoln Mattos", "authorids": "~Daniel_Augusto_de_Souza1;~Alexander_V_Nikitin1;~S._T._John1;~Magnus_Ross1;~Mauricio_A_\u00c1lvarez1;~Marc_Peter_Deisenroth1;~Jo\u00e3o_Paulo_Pordeus_Gomes1;~Diego_Mesquita1;~C\u00e9sar_Lincoln_Mattos1", "gender": ";Not Specified;;M;;;M;M;M", "homepage": ";https://anikitin.me;;https://magnusross.github.io;;;;https://weakly-informative.github.io;", "dblp": ";320/6937;218/6590;294/9158;;;163/4376.html;163/4293;150/2808", "google_scholar": ";;Jf9j8GAAAAAJ;dPfmkBcAAAAJ;;;https://scholar.google.com.br/citations?user=q3mkKj8AAAAJ;;DCKOV4oAAAAJ", "orcid": ";;0000-0002-4540-395X;;;;0000-0003-1686-595X;;0000-0002-2404-3625", "linkedin": ";;;;;;;;", "or_profile": "~Daniel_Augusto_de_Souza1;~Alexander_V_Nikitin1;~S._T._John1;~Magnus_Ross1;~Mauricio_A_\u00c1lvarez1;~Marc_Peter_Deisenroth1;~Jo\u00e3o_Paulo_Pordeus_Gomes1;~Diego_Mesquita1;~C\u00e9sar_Lincoln_Mattos1", "aff": ";Aalto University;Aalto University;University of Manchester;;;Universidade Federal do Cear\u00e1;Getulio Vargas Foundation;Federal University of Cear\u00e1", "aff_domain": ";aalto.fi;aalto.fi;manchester.ac.uk;;;ufc.br;fgv.br;ufc.br", "position": ";PhD student;Postdoc;PhD student;;;Associate Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nsouza2023thin,\ntitle={Thin and deep Gaussian processes},\nauthor={Daniel Augusto de Souza and Alexander V Nikitin and S. T. John and Magnus Ross and Mauricio A {\\'A}lvarez and Marc Peter Deisenroth and Jo{\\~a}o Paulo Pordeus Gomes and Diego Mesquita and C{\\'e}sar Lincoln Mattos},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=z2BHMLA8pM}\n}", "github": "", "project": "", "reviewers": "HPwG;CrCc;DvNb;6DJk", "pdf_size": 1143042, "rating": "4;4;7;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;4;4", "wc_summary": "58;158;151;120", "wc_strengths": "11;58;144;57", "wc_weaknesses": "154;143;120;502", "wc_questions": "8;61;98;215", "wc_limitations": "19;47;1;10", "wc_review": "250;467;514;904", "wc_reply_reviewers": "0;269;64;221", "wc_reply_authors": "0;426;0;41", "reply_reviewers": "0;2;1;1", "reply_authors": "1;3;1;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 121.75, 39.486548342441885 ], "wc_strengths_avg": [ 67.5, 48.075461516245475 ], "wc_weaknesses_avg": [ 229.75, 157.661623421808 ], "wc_questions_avg": [ 95.5, 76.04768241044562 ], "wc_limitations_avg": [ 19.25, 17.239127008059313 ], "wc_review_avg": [ 533.75, 235.8202440419397 ], "wc_reply_reviewers_avg": [ 138.5, 110.19187810360617 ], "wc_reply_authors_avg": [ 116.75, 179.32843472243883 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5773502691896258, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8372380953503560428&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": ";aalto.fi;aalto.fi;manchester.ac.uk;;;ufc.br;fgv.br;ufc.br", "author_num": 9, "aff_unique_index": "0;0;1;2;3;4", "aff_unique_norm": "Aalto University;University of Manchester;Universidade Federal do Cear\u00e1;Getulio Vargas Foundation;Federal University of Cear\u00e1", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.aalto.fi;https://www.manchester.ac.uk;https://www.ufc.br;https://fgv.br;https://www.uece.br", "aff_unique_abbr": "Aalto;UoM;UFC;FGV;UFC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;2;2", "aff_country_unique": "Finland;United Kingdom;Brazil" }, { "title": "Online List Labeling with Predictions", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69916", "id": "z37ki6nqAY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/bd8284e53b6d177cbede82def77d4951-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=z37ki6nqAY", "openreview": "https://openreview.net/forum?id=z37ki6nqAY", "poster": "/media/PosterPDFs/NeurIPS%202023/69916.png?t=1701826794.1339562", "slides": "https://nips.cc/virtual/2023/poster/69916", "video": "https://nips.cc/virtual/2023/poster/69916", "author_site": "Samuel McCauley, Ben Moseley, Aidin Niaparast, Shikha Singh", "tldr": "", "abstract": "A growing line of work shows how learned predictions can be used to break through worst-case barriers to improve the running time of an algorithm. However, incorporating predictions into data structures with strong theoretical guarantees remains underdeveloped. This paper takes a step in this direction by showing that predictions can be leveraged in the fundamental online list labeling problem. In the problem, $n$ items arrive over time and must be stored in sorted order in an array of size $\\Theta(n)$. The array slot of an element is its label and the goal is to maintain sorted order while minimizing the total number of elements moved (i.e., relabeled). We design a new list labeling data structure and bound its performance in two models. In the worst-case learning-augmented model, we give guarantees in terms of the error in the predictions. Our data structure provides strong guarantees: it is optimal for any prediction error and guarantees the best-known worst-case bound even when the predictions are entirely erroneous. We also consider a stochastic error model and bound the performance in terms of the expectation and variance of the error. Finally, the theoretical results are demonstrated empirically. In particular, we show that our data structure has strong performance on real temporal data sets where predictions are constructed from elements that arrived in the past, as is typically done in a practical use case.", "keywords": "Algorithms with Predictions;Data Structures;Learned Indices;Online List Labeling;Resource Allocation;Beyond Worst Case Analysis", "primary_area": "", "supplementary_material": "/attachment/f11120c7d1e28c3d0d30a0c4aca0d250b043f8e1.zip", "author": "Samuel McCauley;Benjamin Moseley;Aidin Niaparast;Shikha Singh", "authorids": "~Samuel_McCauley1;~Benjamin_Moseley1;~Aidin_Niaparast1;~Shikha_Singh2", "gender": ";M;;F", "homepage": "http://dept.cs.williams.edu/~sam/;http://www.andrew.cmu.edu/user/moseleyb/;;https://www.cs.williams.edu/~shikha/", "dblp": "09/11461;28/5638;;124/3768-2", "google_scholar": ";qq-SXN8AAAAJ;;0aLpDg4AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Samuel_McCauley1;~Benjamin_Moseley1;~Aidin_Niaparast1;~Shikha_Singh2", "aff": "Williams College;RelationalAI;;Williams College", "aff_domain": "cs.williams.edu;relational.ai;;williams.edu", "position": "Assistant Professor;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nmccauley2023online,\ntitle={Online List Labeling with Predictions},\nauthor={Samuel McCauley and Benjamin Moseley and Aidin Niaparast and Shikha Singh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=z37ki6nqAY}\n}", "github": "", "project": "", "reviewers": "x3tY;p1qT;v8Wd;QPB8", "pdf_size": 625907, "rating": "6;7;8;8", "confidence": "4;4;4;3", "soundness": "3;3;4;4", "novelty": "2;3;3;3", "presentation": "2;3;4;3", "wc_summary": "148;93;404;57", "wc_strengths": "20;8;117;124", "wc_weaknesses": "184;10;77;38", "wc_questions": "61;0;299;19", "wc_limitations": "8;10;88;15", "wc_review": "421;121;985;253", "wc_reply_reviewers": "25;5;31;6", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 175.5, 135.84642063742425 ], "wc_strengths_avg": [ 67.25, 53.476046039324935 ], "wc_weaknesses_avg": [ 77.25, 66.06578161196612 ], "wc_questions_avg": [ 94.75, 119.9716112253228 ], "wc_limitations_avg": [ 30.25, 33.439310698637314 ], "wc_review_avg": [ 445.0, 329.3994535514593 ], "wc_reply_reviewers_avg": [ 16.75, 11.453711188955307 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18121383157516212865&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.williams.edu;relational.ai;;williams.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Williams College;RelationalAI", "aff_unique_dep": ";", "aff_unique_url": "https://www.williams.edu;https://www.relationalai.com", "aff_unique_abbr": "Williams;RelationalAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Joint Learning of Label and Environment Causal Independence for Graph Out-of-Distribution Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69915", "id": "z3HACY5CMa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0c6c92a0c5237761168eafd4549f1584-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=z3HACY5CMa", "openreview": "https://openreview.net/forum?id=z3HACY5CMa", "poster": "/media/PosterPDFs/NeurIPS%202023/69915.png?t=1702057618.489849", "slides": "https://nips.cc/virtual/2023/poster/69915", "video": "https://nips.cc/virtual/2023/poster/69915", "author_site": "Shurui Gui, Meng Liu, Xiner Li, Youzhi Luo, Shuiwang Ji", "tldr": "", "abstract": "We tackle the problem of graph out-of-distribution (OOD) generalization. Existing graph OOD algorithms either rely on restricted assumptions or fail to exploit environment information in training data. In this work, we propose to simultaneously incorporate label and environment causal independence (LECI) to fully make use of label and environment information, thereby addressing the challenges faced by prior methods on identifying causal and invariant subgraphs. We further develop an adversarial training strategy to jointly optimize these two properties for casual subgraph discovery with theoretical guarantees. Extensive experiments and analysis show that LECI significantly outperforms prior methods on both synthetic and real-world datasets, establishing LECI as a practical and effective solution for graph OOD generalization.", "keywords": "deep learning;graph neural network;out-of-distribution generalization;distribution shift", "primary_area": "", "supplementary_material": "", "author": "Shurui Gui;Meng Liu;Xiner Li;Youzhi Luo;Shuiwang Ji", "authorids": "~Shurui_Gui1;~Meng_Liu3;~Xiner_Li1;~Youzhi_Luo1;~Shuiwang_Ji1", "gender": "M;M;F;M;M", "homepage": "https://cm-bf.github.io;https://mengliu1998.github.io;;https://lyzustc.github.io/;http://people.tamu.edu/~sji", "dblp": "272/0674.html;41/7841-15;267/6459;280/0590;84/6405", "google_scholar": "U4AjtOkAAAAJ;https://scholar.google.com/citations?hl=en;bBQx_5MAAAAJ;3lqQFIoAAAAJ;BZGj6sAAAAAJ", "orcid": ";;;0000-0002-3763-0239;0000-0002-4205-4563", "linkedin": ";meng-liu-4a1813197/;;youzhi-luo-139981172/;shuiwang-ji-9a040715/", "or_profile": "~Shurui_Gui1;~Meng_Liu3;~Xiner_Li1;~Youzhi_Luo1;~Shuiwang_Ji1", "aff": "Texas A&M University;Texas A&M University - College Station;Texas A&M University - College Station;Texas A&M University;Texas A&M University", "aff_domain": "tamu.edu;tamu.edu;tamu.edu;tamu.edu;tamu.edu", "position": "PhD student;PhD student;PhD student;PhD student;Professor", "bibtex": "@inproceedings{\ngui2023joint,\ntitle={Joint Learning of Label and Environment Causal Independence for Graph Out-of-Distribution Generalization},\nauthor={Shurui Gui and Meng Liu and Xiner Li and Youzhi Luo and Shuiwang Ji},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=z3HACY5CMa}\n}", "github": "", "project": "", "reviewers": "psug;Csim;YxXK;6JF6;tVF4", "pdf_size": 5436920, "rating": "3;5;5;6;6", "confidence": "4;4;4;3;4", "soundness": "2;2;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "59;86;59;120;42", "wc_strengths": "44;111;70;93;69", "wc_weaknesses": "236;170;80;107;79", "wc_questions": "2;354;129;39;87", "wc_limitations": "1;73;11;13;58", "wc_review": "342;794;349;372;335", "wc_reply_reviewers": "42;204;0;85;81", "wc_reply_authors": "535;607;83;126;336", "reply_reviewers": "1;1;0;1;1", "reply_authors": "3;3;2;2;2", "rating_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 73.2, 27.315929418564544 ], "wc_strengths_avg": [ 77.4, 22.861321046693693 ], "wc_weaknesses_avg": [ 134.4, 60.612209991057085 ], "wc_questions_avg": [ 122.2, 123.60161811238557 ], "wc_limitations_avg": [ 31.2, 28.694250295137525 ], "wc_review_avg": [ 438.4, 178.2342279137203 ], "wc_reply_reviewers_avg": [ 82.4, 68.14279125483488 ], "wc_reply_authors_avg": [ 337.4, 210.30511168300214 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4564354645876385, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15108985983459465407&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "tamu.edu;tamu.edu;tamu.edu;tamu.edu;tamu.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Regularized Conditional GAN for Posterior Sampling in Image Recovery Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69914", "id": "z4vKRmq7UO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d8b29f07599fecdba93d87ed27a65524-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=z4vKRmq7UO", "openreview": "https://openreview.net/forum?id=z4vKRmq7UO", "poster": "/media/PosterPDFs/NeurIPS%202023/69914.png?t=1701891251.6736143", "slides": "https://nips.cc/virtual/2023/poster/69914", "video": "https://nips.cc/virtual/2023/poster/69914", "author_site": "Matthew Bendel, Rizwan Ahmad, Philip Schniter", "tldr": "", "abstract": "In image recovery problems, one seeks to infer an image from distorted, incomplete, and/or noise-corrupted measurements.\nSuch problems arise in magnetic resonance imaging (MRI), computed tomography, deblurring, super-resolution, inpainting, phase retrieval, image-to-image translation, and other applications. Given a training set of signal/measurement pairs, we seek to do more than just produce one good image estimate. Rather, we aim to rapidly and accurately sample from the posterior distribution. To do this,\nwe propose a regularized conditional Wasserstein GAN that generates dozens of high-quality posterior samples per second. Our regularization comprises an $\\ell_1$ penalty and an adaptively weighted standard-deviation reward. Using quantitative evaluation metrics like conditional Fr\u00e9chet inception distance, we demonstrate that our method produces state-of-the-art posterior samples in both multicoil MRI and large-scale inpainting applications. The code for our model can be found here: https://github.com/matt-bendel/rcGAN.", "keywords": "Generative adversarial network;inverse problems;posterior sampling;cGAN;GAN", "primary_area": "", "supplementary_material": "/attachment/7c94436a99020e818c2d58c911f36a94ee6bf446.zip", "author": "Matthew C Bendel;Rizwan Ahmad;Philip Schniter", "authorids": "~Matthew_C_Bendel1;~Rizwan_Ahmad1;~Philip_Schniter2", "gender": "M;M;M", "homepage": ";https://u.osu.edu/ahmad/;https://phil-schniter.web.app", "dblp": "331/8228;24/3366;s/PhilipSchniter.html", "google_scholar": "U575k_8AAAAJ;9mbdAO4AAAAJ;4Pp9KfYAAAAJ", "orcid": ";0000-0002-5917-3788;0000-0003-0939-7545", "linkedin": "matthew-bendel-399232131/;;phil-schniter-1050645/", "or_profile": "~Matthew_C_Bendel1;~Rizwan_Ahmad1;~Philip_Schniter2", "aff": "Ohio State University, Columbus;Ohio State University, Columbus;Ohio State University, Columbus", "aff_domain": "osu.edu;osu.edu;osu.edu", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nbendel2023a,\ntitle={A Regularized Conditional {GAN} for Posterior Sampling in Image Recovery Problems},\nauthor={Matthew C Bendel and Rizwan Ahmad and Philip Schniter},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=z4vKRmq7UO}\n}", "github": "", "project": "", "reviewers": "pyUh;qA2y;N79n;ur5t", "pdf_size": 5246338, "rating": "5;6;6;7", "confidence": "3;3;4;3", "soundness": "2;3;4;3", "novelty": "3;3;3;3", "presentation": "3;3;2;4", "wc_summary": "107;64;210;81", "wc_strengths": "74;52;155;53", "wc_weaknesses": "201;186;300;340", "wc_questions": "494;2;343;3", "wc_limitations": "9;11;41;7", "wc_review": "885;315;1049;484", "wc_reply_reviewers": "122;48;362;111", "wc_reply_authors": "371;0;625;0", "reply_reviewers": "1;1;2;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 115.5, 56.667892143611624 ], "wc_strengths_avg": [ 83.5, 42.20485754033533 ], "wc_weaknesses_avg": [ 256.75, 65.02835919812217 ], "wc_questions_avg": [ 210.5, 214.7422874051592 ], "wc_limitations_avg": [ 17.0, 13.92838827718412 ], "wc_review_avg": [ 683.25, 295.7130154389556 ], "wc_reply_reviewers_avg": [ 160.75, 119.57293799183827 ], "wc_reply_authors_avg": [ 249.0, 264.69888552844344 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11982870849068080666&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 9, "email": "osu.edu;osu.edu;osu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Columbus", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "z9NLqoFvZ0", "title": "State-wise Constrained Policy Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Reinforcement Learning (RL) algorithms have shown tremendous success in simulation environments, but their application to real-world problems faces significant challenges, with safety being a major concern. In particular, enforcing state-wise constraints is essential for many challenging tasks such as autonomous driving and robot manipulation. However, existing safe RL algorithms under the framework of Constrained Markov Decision Process (CMDP) do not consider state-wise constraints. To address this gap, we propose State-wise Constrained Policy Optimization (SCPO), the first general-purpose policy search algorithm for state-wise constrained reinforcement learning. SCPO provides guarantees for state-wise constraint satisfaction in expectation. In particular, we introduce the framework of Maximum Markov Decision Process, and prove that the worst-case safety violation is bounded under SCPO. We demonstrate the effectiveness of our approach on training neural network policies for extensive robot locomotion tasks, where the agent must satisfy a variety of state-wise safety constraints. Our results show that SCPO significantly outperforms existing methods and can handle state-wise constraints in high-dimensional robotics tasks.", "keywords": "Safe Reinforcement Learning;State-wise Safety Guarantee;Trust Region Optimization", "primary_area": "", "supplementary_material": "/attachment/afad5baff70b4b3d3435d563144bc758dbc7ab0d.zip", "author": "Weiye Zhao;Rui Chen;Yifan Sun;Tianhao Wei;Changliu Liu", "authorids": "~Weiye_Zhao1;~Rui_Chen11;yifansu2@andrew.cmu.edu;~Tianhao_Wei1;~Changliu_Liu1", "gender": "M;M;;M;F", "homepage": "https://github.com/CaesarAndylaw;https://ruichen.pub/;;;http://www.cs.cmu.edu/~cliu6/index.html", "dblp": "228/6863;;;222/5386;166/3563", "google_scholar": "P-79KOcAAAAJ;XiUE0wMAAAAJ;;V22j1C0AAAAJ;", "orcid": "0000-0002-8426-5238;0000-0002-8671-8771;;;", "linkedin": ";;;;", "or_profile": "~Weiye_Zhao1;~Rui_Chen11;yifansu2@andrew.cmu.edu;~Tianhao_Wei1;~Changliu_Liu1", "aff": "Carnegie Mellon University;Carnegie Mellon University;;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;andrew.cmu.edu;;andrew.cmu.edu;cmu.edu", "position": "PhD student;PhD student;;PhD student;Assistant Professor", "bibtex": "@misc{\nzhao2023statewise,\ntitle={State-wise Constrained Policy Optimization},\nauthor={Weiye Zhao and Rui Chen and Yifan Sun and Tianhao Wei and Changliu Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=z9NLqoFvZ0}\n}", "github": "", "project": "", "reviewers": "2Qxj;teqG;aqq3;DdAh", "site": "https://openreview.net/forum?id=z9NLqoFvZ0", "pdf_size": 11419802, "rating": "4;4;7;7", "confidence": "3;3;3;4", "soundness": "2;3;3;3", "novelty": "2;2;4;3", "presentation": "3;3;3;3", "wc_summary": "62;46;48;64", "wc_strengths": "21;11;116;101", "wc_weaknesses": "74;31;41;69", "wc_questions": "59;141;1;168", "wc_limitations": "80;1;1;1", "wc_review": "296;230;207;403", "wc_reply_reviewers": "0;0;0;140", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 55.0, 8.06225774829855 ], "wc_strengths_avg": [ 62.25, 46.687123492457744 ], "wc_weaknesses_avg": [ 53.75, 18.18481509391833 ], "wc_questions_avg": [ 92.25, 66.23207304622134 ], "wc_limitations_avg": [ 20.75, 34.208003449485325 ], "wc_review_avg": [ 284.0, 76.0756202735147 ], "wc_reply_reviewers_avg": [ 35.0, 60.6217782649107 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896258, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1975768402285703586&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "CycleNet: Rethinking Cycle Consistency in Text-Guided Diffusion for Image Manipulation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69913", "id": "z9d9DsjAPH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/21293a43d0321c5602dd893be2c2332b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=z9d9DsjAPH", "openreview": "https://openreview.net/forum?id=z9d9DsjAPH", "poster": "/media/PosterPDFs/NeurIPS%202023/69913.png?t=1699862176.0584397", "slides": "https://nips.cc/virtual/2023/poster/69913", "video": "https://nips.cc/virtual/2023/poster/69913", "author_site": "Sihan Xu, Ziqiao Ma, Yidong Huang, Honglak Lee, Joyce Chai", "tldr": "", "abstract": "Diffusion models (DMs) have enabled breakthroughs in image synthesis tasks but lack an intuitive interface for consistent image-to-image (I2I) translation. Various methods have been explored to address this issue, including mask-based methods, attention-based methods, and image-conditioning. However, it remains a critical challenge to enable unpaired I2I translation with pre-trained DMs while maintaining satisfying consistency. This paper introduces Cyclenet, a novel but simple method that incorporates cycle consistency into DMs to regularize image manipulation. We validate Cyclenet on unpaired I2I tasks of different granularities. Besides the scene and object level translation, we additionally contribute a multi-domain I2I translation dataset to study the physical state changes of objects. Our empirical studies show that Cyclenet is superior in translation consistency and quality, and can generate high-quality images for out-of-domain distributions with a simple change of the textual prompt. Cyclenet is a practical framework, which is robust even with very limited training data (around 2k) and requires minimal computational resources (1 GPU) to train. Project homepage: https://cyclenetweb.github.io/", "keywords": "Image to image translation;latent diffusion models;conditional diffusion models", "primary_area": "", "supplementary_material": "/attachment/c06a4628bbb3916b7eb5fa1f6d77d03c2f348e1b.zip", "author": "Sihan Xu;Ziqiao Ma;Yidong Huang;Honglak Lee;Joyce Chai", "authorids": "~Sihan_Xu2;~Ziqiao_Ma1;owenhji@umich.edu;~Honglak_Lee2;~Joyce_Chai2", "gender": "M;Not Specified;;;", "homepage": "https://sihanxu.github.io/;http://mars-tin.github.io/;;;", "dblp": ";287/7595-1.html;;;", "google_scholar": ";WbybssYAAAAJ;;;", "orcid": ";0000-0002-0760-4638;;;", "linkedin": ";;;;", "or_profile": "~Sihan_Xu2;~Ziqiao_Ma1;owenhji@umich.edu;~Honglak_Lee2;~Joyce_Chai2", "aff": "University of Michigan - Ann Arbor;Amazon Science;;;", "aff_domain": "umich.edu;amazon.com;;;", "position": "Undergrad student;Research Intern;;;", "bibtex": "@inproceedings{\nxu2023cyclenet,\ntitle={CycleNet: Rethinking Cycle Consistency in Text-Guided Diffusion for Image Manipulation},\nauthor={Sihan Xu and Ziqiao Ma and Yidong Huang and Honglak Lee and Joyce Chai},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=z9d9DsjAPH}\n}", "github": "", "project": "", "reviewers": "yCsT;Tt5a;rhuF;sYAZ;aEiE", "pdf_size": 11289449, "rating": "2;5;5;6;6", "confidence": "5;4;4;4;4", "soundness": "2;3;3;4;3", "novelty": "2;2;3;2;3", "presentation": "2;2;3;4;3", "wc_summary": "39;61;105;40;76", "wc_strengths": "77;30;62;13;57", "wc_weaknesses": "149;108;135;93;86", "wc_questions": "95;6;20;4;2", "wc_limitations": "20;6;2;1;26", "wc_review": "380;211;324;151;247", "wc_reply_reviewers": "54;76;25;0;28", "wc_reply_authors": "732;47;38;0;49", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 4.8, 1.469693845669907 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 64.2, 24.636558201177372 ], "wc_strengths_avg": [ 47.8, 23.09458811063752 ], "wc_weaknesses_avg": [ 114.2, 24.194214184387143 ], "wc_questions_avg": [ 25.4, 35.37004382242125 ], "wc_limitations_avg": [ 11.0, 10.119288512538814 ], "wc_review_avg": [ 262.6, 81.13347028199891 ], "wc_reply_reviewers_avg": [ 36.6, 26.089078174592522 ], "wc_reply_authors_avg": [ 173.2, 279.9595685094546 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9525793444156804, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17168671085117068915&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "umich.edu;amazon.com;;;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "University of Michigan;Amazon", "aff_unique_dep": ";Amazon Science", "aff_unique_url": "https://www.umich.edu;https://www.amazon.science", "aff_unique_abbr": "UM;Amazon Science", "aff_campus_unique_index": "0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Towards Foundation Models for Scientific Machine Learning: Characterizing Scaling and Transfer Behavior", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69912", "id": "zANxvzflMl", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e15790966a4a9d85d688635c88ee6d8a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zANxvzflMl", "openreview": "https://openreview.net/forum?id=zANxvzflMl", "poster": "/media/PosterPDFs/NeurIPS%202023/69912.png?t=1701903689.682543", "slides": "https://nips.cc/virtual/2023/poster/69912", "video": "https://nips.cc/virtual/2023/poster/69912", "author_site": "Shashank Subramanian, Peter Harrington, Kurt Keutzer, Wahid Bhimji, Dmitriy Morozov, Michael Mahoney, Amir Gholami", "tldr": "", "abstract": "Pre-trained machine learning (ML) models have shown great performance for a\nwide range of applications, in particular in natural language processing (NLP)\nand computer vision (CV). Here, we study how pre-training could be used for\nscientific machine learning (SciML) applications, specifically in the context of\ntransfer learning. We study the transfer behavior of these models as (i) the pretrained\nmodel size is scaled, (ii) the downstream training dataset size is scaled,\n(iii) the physics parameters are systematically pushed out of distribution, and (iv)\nhow a single model pre-trained on a mixture of different physics problems can\nbe adapted to various downstream applications. We find that\u2014when fine-tuned\nappropriately\u2014transfer learning can help reach desired accuracy levels with orders\nof magnitude fewer downstream examples (across different tasks that can even be\nout-of-distribution) than training from scratch, with consistent behaviour across a\nwide range of downstream examples. We also find that fine-tuning these models\nyields more performance gains as model size increases, compared to training from\nscratch on new downstream tasks. These results hold for a broad range of PDE\nlearning tasks. All in all, our results demonstrate the potential of the \u201cpre-train and\nfine-tune\u201d paradigm for SciML problems, demonstrating a path towards building\nSciML foundation models. Our code is available as open-source.", "keywords": "scientific machine learning;scaling;transfer learning;neural operators;foundation models", "primary_area": "", "supplementary_material": "", "author": "Shashank Subramanian;Peter Harrington;Kurt Keutzer;Wahid Bhimji;Dmitriy Morozov;Michael W. Mahoney;Amir Gholami", "authorids": "~Shashank_Subramanian1;~Peter_Harrington1;~Kurt_Keutzer1;~Wahid_Bhimji1;~Dmitriy_Morozov1;~Michael_W._Mahoney1;~Amir_Gholami2", "gender": ";;M;M;M;;", "homepage": ";;https://people.eecs.berkeley.edu/~keutzer/;https://www.nersc.gov/about/nersc-staff/data-analytics-services/wahid-bhimji/;https://mrzv.org;;", "dblp": ";227/7462;k/KurtKeutzer.html;63/9223;80/5570;;", "google_scholar": ";Wy1O7t0AAAAJ;ID9QePIAAAAJ;COnXcM0AAAAJ;;;", "orcid": ";;0000-0003-3868-8501;;;;", "linkedin": ";;kurtkeutzer/;wahid-bhimji-56063818;;;", "or_profile": "~Shashank_Subramanian1;~Peter_Harrington1;~Kurt_Keutzer1;~Wahid_Bhimji1;~Dmitriy_Morozov1;~Michael_W._Mahoney1;~Amir_Gholami2", "aff": ";Lawrence Berkeley National Lab;University of California, Berkeley;Lawrence Berkeley National Lab;Lawrence Berkeley National Lab;;", "aff_domain": ";lbl.gov;berkeley.edu;lbl.gov;lbl.gov;;", "position": ";Researcher;Full Professor;Associate Professor;Researcher;;", "bibtex": "@inproceedings{\nsubramanian2023towards,\ntitle={Towards Foundation Models for Scientific Machine Learning: Characterizing Scaling and Transfer Behavior},\nauthor={Shashank Subramanian and Peter Harrington and Kurt Keutzer and Wahid Bhimji and Dmitriy Morozov and Michael W. Mahoney and Amir Gholami},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zANxvzflMl}\n}", "github": "", "project": "", "reviewers": "o2Fy;9JLh;Hq79;QnfD", "pdf_size": 11902698, "rating": "6;6;6;7", "confidence": "3;2;2;3", "soundness": "3;4;3;3", "novelty": "3;3;2;3", "presentation": "4;4;3;4", "wc_summary": "41;51;131;87", "wc_strengths": "55;43;290;103", "wc_weaknesses": "275;55;91;449", "wc_questions": "17;63;88;203", "wc_limitations": "59;6;6;28", "wc_review": "447;218;606;870", "wc_reply_reviewers": "0;44;34;49", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.5, 35.30934720438768 ], "wc_strengths_avg": [ 122.75, 99.13721551465927 ], "wc_weaknesses_avg": [ 217.5, 157.56506592515996 ], "wc_questions_avg": [ 92.75, 68.55791347466753 ], "wc_limitations_avg": [ 24.75, 21.718367802392518 ], "wc_review_avg": [ 535.25, 237.43354333370843 ], "wc_reply_reviewers_avg": [ 31.75, 19.109879643786353 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 85, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10785044586455516350&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": ";lbl.gov;berkeley.edu;lbl.gov;lbl.gov;;", "author_num": 7, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Lawrence Berkeley National Laboratory;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.lbl.gov;https://www.berkeley.edu", "aff_unique_abbr": "LBNL;UC Berkeley", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Decision Stacks: Flexible Reinforcement Learning via Modular Generative Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69911", "id": "zAQK5r1enm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/fe1c4991d57f37dfef62d01b3901ca54-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zAQK5r1enm", "openreview": "https://openreview.net/forum?id=zAQK5r1enm", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69911", "video": "https://nips.cc/virtual/2023/poster/69911", "author_site": "Siyan Zhao, Aditya Grover", "tldr": "", "abstract": "Reinforcement learning presents an attractive paradigm to reason about several distinct aspects of sequential decision making, such as specifying complex goals, planning future observations and actions, and critiquing their utilities. However, the combined integration of these capabilities poses competing algorithmic challenges in retaining maximal expressivity while allowing for flexibility in modeling choices for efficient learning and inference. We present Decision Stacks, a generative framework that decomposes goal-conditioned policy agents into 3 generative modules. These modules simulate the temporal evolution of observations, rewards, and actions via independent generative models that can be learned in parallel via teacher forcing. Our framework guarantees both expressivity and flexibility in designing individual modules to account for key factors such as architectural bias, optimization objective and dynamics, transferrability across domains, and inference speed. Our empirical results demonstrate the effectiveness of Decision Stacks for offline policy optimization for several MDP and POMDP environments, outperforming existing methods and enabling flexible generative decision making.", "keywords": "reinforcement learning;generative models;offline RL;sequential decision making", "primary_area": "", "supplementary_material": "", "author": "Siyan Zhao;Aditya Grover", "authorids": "~Siyan_Zhao1;~Aditya_Grover1", "gender": "F;M", "homepage": "https://siyan-zhao.github.io/;https://aditya-grover.github.io", "dblp": "161/3857;162/5052", "google_scholar": ";oOhnPUgAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Siyan_Zhao1;~Aditya_Grover1", "aff": "University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;ucla.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhao2023decision,\ntitle={Decision Stacks: Flexible Reinforcement Learning via Modular Generative Models},\nauthor={Siyan Zhao and Aditya Grover},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zAQK5r1enm}\n}", "github": "", "project": "", "reviewers": "ywNv;xgrf;Xfrm;vCY4;E8pR", "pdf_size": 916322, "rating": "3;4;5;6;7", "confidence": "4;4;3;3;4", "soundness": "2;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "3;3;4;3;3", "wc_summary": "55;90;58;60;61", "wc_strengths": "22;88;27;108;83", "wc_weaknesses": "129;118;156;238;223", "wc_questions": "48;133;3;195;125", "wc_limitations": "59;123;1;41;1", "wc_review": "313;552;245;642;493", "wc_reply_reviewers": "325;109;276;0;19", "wc_reply_authors": "1419;405;826;0;16", "reply_reviewers": "1;1;2;0;1", "reply_authors": "5;3;4;1;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 64.8, 12.765578717786358 ], "wc_strengths_avg": [ 65.6, 34.62138067726358 ], "wc_weaknesses_avg": [ 172.8, 48.938328537047525 ], "wc_questions_avg": [ 100.8, 67.60000000000001 ], "wc_limitations_avg": [ 45.0, 45.09545431637206 ], "wc_review_avg": [ 449.0, 148.26058140989466 ], "wc_reply_reviewers_avg": [ 145.8, 132.4800362318791 ], "wc_reply_authors_avg": [ 533.2, 536.3966442848053 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.2886751345948129, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=879684009679343873&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "cs.ucla.edu;ucla.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "One-Line-of-Code Data Mollification Improves Optimization of Likelihood-based Generative Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69910", "id": "zAXg8dW8ZO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1516a7f7507d5550db5c7f29e995ec8c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zAXg8dW8ZO", "openreview": "https://openreview.net/forum?id=zAXg8dW8ZO", "poster": "/media/PosterPDFs/NeurIPS%202023/69910.png?t=1698134635.3411348", "slides": "https://nips.cc/virtual/2023/poster/69910", "video": "https://nips.cc/virtual/2023/poster/69910", "author_site": "Ba-Hien Tran, Giulio Franzese, Pietro Michiardi, Maurizio Filippone", "tldr": "", "abstract": "Generative Models (GMs) have attracted considerable attention due to their tremendous success in various domains, such as computer vision where they are capable to generate impressive realistic-looking images. Likelihood-based GMs are attractive due to the possibility to generate new data by a single model evaluation. However, they typically achieve lower sample quality compared to state-of-the-art score-based Diffusion Models (DMs). This paper provides a significant step in the direction of addressing this limitation. The idea is to borrow one of the strengths of score-based DMs, which is the ability to perform accurate density estimation in low-density regions and to address manifold overfitting by means of data mollification. We propose a view of data mollification within likelihood-based GMs as a continuation method, whereby the optimization objective smoothly transitions from simple-to-optimize to the original target. Crucially, data mollification can be implemented by adding one line of code in the optimization loop, and we demonstrate that this provides a boost in generation quality of likelihood-based GMs, without computational overheads. We report results on real-world image data sets and UCI benchmarks with popular likelihood-based GMs, including variants of variational autoencoders and normalizing flows, showing large improvements in FID score and density estimation.", "keywords": "Generative Models;Normalizing Flows;Variational Autoencoders", "primary_area": "", "supplementary_material": "", "author": "Ba-Hien Tran;Giulio Franzese;Pietro Michiardi;Maurizio Filippone", "authorids": "~Ba-Hien_Tran2;~Giulio_Franzese1;~Pietro_Michiardi1;~Maurizio_Filippone1", "gender": "M;M;M;M", "homepage": ";http://www.eurecom.fr/~michiard/;;https://tranbahien.github.io/", "dblp": "217/1859.html;54/3028;35/5597;279/6617.html", "google_scholar": "kEtx_WwAAAAJ;https://scholar.google.com.tw/citations?user=mlx1eCgAAAAJ;https://scholar.google.com.tw/citations?user=ILUeAloAAAAJ;FW26AagAAAAJ", "orcid": "0000-0003-4244-2053;;;", "linkedin": ";;;", "or_profile": "~Giulio_Franzese1;~Pietro_Michiardi1;~Maurizio_Filippone1;~Ba-Hien_TRAN1", "aff": "Eurecom;EURECOM;Eurecom;Sorbonne Universit\u00e9", "aff_domain": "eurecom.fr;eurecom.fr;eurecom.fr;sorbonne-universite.fr", "position": "Postdoc;Full Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\ntran2023onelineofcode,\ntitle={One-Line-of-Code Data Mollification Improves Optimization of Likelihood-based Generative Models},\nauthor={Ba-Hien Tran and Giulio Franzese and Pietro Michiardi and Maurizio Filippone},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zAXg8dW8ZO}\n}", "github": "", "project": "", "reviewers": "S7wY;qdL5;sZKb;yxxe", "pdf_size": 22256417, "rating": "3;4;6;6", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "1;2;2;2", "presentation": "3;2;3;4", "wc_summary": "90;167;105;88", "wc_strengths": "27;45;153;128", "wc_weaknesses": "170;250;298;170", "wc_questions": "50;103;233;203", "wc_limitations": "19;177;357;72", "wc_review": "356;742;1146;661", "wc_reply_reviewers": "0;124;202;12", "wc_reply_authors": "0;932;461;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;3;2;1", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 112.5, 32.14420632089086 ], "wc_strengths_avg": [ 88.25, 53.3730971557769 ], "wc_weaknesses_avg": [ 222.0, 54.699177324709375 ], "wc_questions_avg": [ 147.25, 73.95395526947831 ], "wc_limitations_avg": [ 156.25, 129.0995255607084 ], "wc_review_avg": [ 726.25, 281.8602268855966 ], "wc_reply_reviewers_avg": [ 84.5, 83.31116371771553 ], "wc_reply_authors_avg": [ 348.25, 386.01578659427906 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3093574945443756826&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "eurecom.fr;eurecom.fr;eurecom.fr;sorbonne-universite.fr", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "EURECOM;Sorbonne Universit\u00e9", "aff_unique_dep": ";", "aff_unique_url": "https://www.eurecom.fr;https://www.sorbonne-universite.fr", "aff_unique_abbr": ";Sorbonne U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Quasi-Monte Carlo Graph Random Features", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69909", "id": "zCFfv49MjE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2f9b3ee2bcea04b327c09d7e3145bd1e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zCFfv49MjE", "openreview": "https://openreview.net/forum?id=zCFfv49MjE", "poster": "/media/PosterPDFs/NeurIPS%202023/69909.png?t=1701775687.572275", "slides": "https://nips.cc/virtual/2023/poster/69909", "video": "https://nips.cc/virtual/2023/poster/69909", "author_site": "Isaac Reid, Krzysztof M Choromanski, Adrian Weller", "tldr": "", "abstract": "We present a novel mechanism to improve the accuracy of the recently-introduced class of graph random features (GRFs). Our method induces negative correlations between the lengths of the algorithm's random walks by imposing antithetic termination: a procedure to sample more diverse random walks which may be of independent interest. It has a trivial drop-in implementation. We derive strong theoretical guarantees on the properties of these quasi-Monte Carlo GRFs (q-GRFs), proving that they yield lower-variance estimators of the $2$-regularised Laplacian kernel under mild conditions. Remarkably, our results hold for any graph topology. We demonstrate empirical accuracy improvements on a variety of tasks including a new practical application: time-efficient approximation of the graph diffusion process. To our knowledge, q-GRFs constitute the first rigorously studied quasi-Monte Carlo scheme for kernels defined on combinatorial objects, inviting new research on correlations between graph random walks.", "keywords": "Graph;discrete mathematics;quasi-Monte Carlo;kernel;scalability;Laplacian;clustering;random walks", "primary_area": "", "supplementary_material": "/attachment/d2ec353f598a499ecb220baa84887bfd3c5b3515.pdf", "author": "Isaac Reid;Krzysztof Marcin Choromanski;Adrian Weller", "authorids": "~Isaac_Reid3;~Krzysztof_Marcin_Choromanski1;~Adrian_Weller1", "gender": "M;;M", "homepage": "https://isaac-reid.github.io;;http://mlg.eng.cam.ac.uk/adrian/", "dblp": "287/4898;78/11411;73/8324", "google_scholar": "3JPyAi0AAAAJ;;https://scholar.google.co.uk/citations?user=Ek4hM10AAAAJ", "orcid": "0000-0002-1664-1975;;", "linkedin": ";;", "or_profile": "~Isaac_Reid3;~Krzysztof_Marcin_Choromanski1;~Adrian_Weller1", "aff": "University of Cambridge;Google Brain Robotics & Columbia University;University of Cambridge", "aff_domain": "cam.ac.uk;columbia.edu;cam.ac.uk", "position": "PhD student;research scientist & adjunct assistant professor;Principal Researcher", "bibtex": "@inproceedings{\nreid2023quasimonte,\ntitle={Quasi-Monte Carlo Graph Random Features},\nauthor={Isaac Reid and Adrian Weller and Krzysztof Marcin Choromanski},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zCFfv49MjE}\n}", "github": "", "project": "", "reviewers": "SAEb;qCjY;mBGP;wGeX", "pdf_size": 574871, "rating": "5;7;7;9", "confidence": "3;3;4;4", "soundness": "3;4;3;3", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "70;206;83;136", "wc_strengths": "75;28;129;50", "wc_weaknesses": "144;170;37;18", "wc_questions": "44;29;82;124", "wc_limitations": "1;4;25;16", "wc_review": "334;437;356;344", "wc_reply_reviewers": "337;46;55;29", "wc_reply_authors": "953;86;13;19", "reply_reviewers": "2;2;1;1", "reply_authors": "5;2;2;2", "rating_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 123.75, 53.53678641831241 ], "wc_strengths_avg": [ 70.5, 37.646380968162134 ], "wc_weaknesses_avg": [ 92.25, 65.7433456708738 ], "wc_questions_avg": [ 69.75, 36.799286677869176 ], "wc_limitations_avg": [ 11.5, 9.604686356149273 ], "wc_review_avg": [ 367.75, 40.73312534044006 ], "wc_reply_reviewers_avg": [ 116.75, 127.50367641758413 ], "wc_reply_authors_avg": [ 267.75, 396.6657125338665 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11825977910579606123&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;columbia.edu;cam.ac.uk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Cambridge;Google", "aff_unique_dep": ";Google Brain Robotics", "aff_unique_url": "https://www.cam.ac.uk;https://ai.google", "aff_unique_abbr": "Cambridge;Google", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Cambridge;Mountain View", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "A Novel Framework for Policy Mirror Descent with General Parameterization and Linear Convergence", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69908", "id": "zD6lXmTPPh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/61a9278dfef5f871b5e472389f8d6fa1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zD6lXmTPPh", "openreview": "https://openreview.net/forum?id=zD6lXmTPPh", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69908", "video": "https://nips.cc/virtual/2023/poster/69908", "author_site": "Carlo Alfano, Rui Yuan, Rui Yuan, Patrick Rebeschini", "tldr": "", "abstract": "Modern policy optimization methods in reinforcement learning, such as TRPO and PPO, owe their success to the use of parameterized policies. However, while theoretical guarantees have been established for this class of algorithms, especially in the tabular setting, the use of general parameterization schemes remains mostly unjustified. In this work, we introduce a novel framework for policy optimization based on mirror descent that naturally accommodates general parameterizations. The policy class induced by our scheme recovers known classes, e.g., softmax, and generates new ones depending on the choice of mirror map. Using our framework, we obtain the first result that guarantees linear convergence for a policy-gradient-based method involving general parameterization. To demonstrate the ability of our framework to accommodate general parameterization schemes, we provide its sample complexity when using shallow neural networks, show that it represents an improvement upon the previous best results, and empirically validate the effectiveness of our theoretical claims on classic control tasks.", "keywords": "Theory for Reinforcement Learning;Policy Optimization;Policy Gradient;Mirror Descent.", "primary_area": "", "supplementary_material": "", "author": "Carlo Alfano;Rui Yuan;Patrick Rebeschini", "authorids": "~Carlo_Alfano1;~Rui_Yuan1;~Patrick_Rebeschini1", "gender": "M;M;M", "homepage": ";https://rui-yuan91.github.io/;http://www.stats.ox.ac.uk/~rebeschi/", "dblp": "302/4418;;164/7439", "google_scholar": ";4QZgrj0AAAAJ;", "orcid": ";0000-0002-1768-9639;0000-0001-7772-4160", "linkedin": "carlo-alfano-7a4378171/;rui-yuan-phd-55135537/;patrick-rebeschini/", "or_profile": "~Carlo_Alfano1;~Rui_Yuan1;~Patrick_Rebeschini1", "aff": "University of Oxford;T\u00e9l\u00e9com Paris;University of Oxford", "aff_domain": "ox.ac.uk;telecom-paristech.fr;oxford.ac.uk", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nalfano2023a,\ntitle={A Novel Framework for Policy Mirror Descent with General Parameterization and Linear Convergence},\nauthor={Carlo Alfano and Rui Yuan and Patrick Rebeschini},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zD6lXmTPPh}\n}", "github": "", "project": "", "reviewers": "rns2;WBF5;zS6s;oNAx;qVps", "pdf_size": 929968, "rating": "5;6;7;7;7", "confidence": "2;4;4;4;2", "soundness": "2;2;2;3;3", "novelty": "3;2;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "289;56;86;80;54", "wc_strengths": "58;1;116;157;85", "wc_weaknesses": "152;174;127;148;42", "wc_questions": "16;1;379;31;46", "wc_limitations": "3;1;22;12;1", "wc_review": "518;233;730;428;228", "wc_reply_reviewers": "1117;25;448;68;17", "wc_reply_authors": "1454;0;1220;12;0", "reply_reviewers": "5;1;2;2;1", "reply_authors": "6;1;3;2;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 113.0, 88.90894218243741 ], "wc_strengths_avg": [ 83.4, 52.758316879900555 ], "wc_weaknesses_avg": [ 128.6, 45.79781654183963 ], "wc_questions_avg": [ 94.6, 142.9889506220673 ], "wc_limitations_avg": [ 7.8, 8.182909018191513 ], "wc_review_avg": [ 427.4, 188.32270176481643 ], "wc_reply_reviewers_avg": [ 335.0, 422.5650245820163 ], "wc_reply_authors_avg": [ 537.2, 657.2276317989073 ], "reply_reviewers_avg": [ 2.2, 1.469693845669907 ], "reply_authors_avg": [ 2.6, 1.8547236990991407 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15328832185966391284&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 10, "email": "ox.ac.uk;telecom-paristech.fr;oxford.ac.uk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Oxford;T\u00e9l\u00e9com Paris", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.telecom-paris.fr", "aff_unique_abbr": "Oxford;T\u00e9l\u00e9com Paris", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;France" }, { "title": "Leveraging Pre-trained Large Language Models to Construct and Utilize World Models for Model-based Task Planning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69907", "id": "zDbsSscmuj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f9f54762cbb4fe4dbffdd4f792c31221-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zDbsSscmuj", "openreview": "https://openreview.net/forum?id=zDbsSscmuj", "poster": "/media/PosterPDFs/NeurIPS%202023/69907.png?t=1702477301.7524335", "slides": "https://nips.cc/virtual/2023/poster/69907", "video": "https://nips.cc/virtual/2023/poster/69907", "author_site": "Lin Guan, Karthik Valmeekam, Sarath Sreedharan, Subbarao Kambhampati", "tldr": "", "abstract": "There is a growing interest in applying pre-trained large language models (LLMs) to planning problems. However, methods that use LLMs directly as planners are currently impractical due to several factors, including limited correctness of plans, strong reliance on feedback from interactions with simulators or even the actual environment, and the inefficiency in utilizing human feedback. In this work, we introduce a novel alternative paradigm that constructs an explicit world (domain) model in planning domain definition language (PDDL) and then uses it to plan with sound domain-independent planners. To address the fact that LLMs may not generate a fully functional PDDL model initially, we employ LLMs as an interface between PDDL and sources of corrective feedback, such as PDDL validators and humans. For users who lack a background in PDDL, we show that LLMs can translate PDDL into natural language and effectively encode corrective feedback back to the underlying domain model. Our framework not only enjoys the correctness guarantee offered by the external planners but also reduces human involvement by allowing users to correct domain models at the beginning, rather than inspecting and correcting (through interactive prompting) every generated plan as in previous work. On two IPC domains and a Household domain that is more complicated than commonly used benchmarks such as ALFWorld, we demonstrate that GPT-4 can be leveraged to produce high-quality PDDL models for over 40 actions, and the corrected PDDL models are then used to successfully solve 48 challenging planning tasks. Resources, including the source code, are released at: https://guansuns.github.io/pages/llm-dm.", "keywords": "LLMs;Planning;Domain Model;LLMs for Planning;LLMs for Heuristic Guidance", "primary_area": "", "supplementary_material": "/attachment/c71fcd8bd06153a28da9d9a4cf5882db99225961.pdf", "author": "Lin Guan;Karthik Valmeekam;Sarath Sreedharan;Subbarao Kambhampati", "authorids": "~Lin_Guan1;~Karthik_Valmeekam1;~Sarath_Sreedharan1;~Subbarao_Kambhampati1", "gender": "M;M;;M", "homepage": "https://guansuns.github.io/;;;http://rakaposhi.eas.asu.edu", "dblp": ";279/2957;162/5110;k/SKambhampati", "google_scholar": "c1L_gZoAAAAJ;CrYLDt4AAAAJ;;yl3L07sAAAAJ", "orcid": ";;;", "linkedin": "lin-guan/;;;", "or_profile": "~Lin_Guan1;~Karthik_Valmeekam1;~Sarath_Sreedharan1;~Subbarao_Kambhampati1", "aff": "Arizona State University;Arizona State University;Colorado State University;Arizona State University", "aff_domain": "asu.edu;asu.edu;colostate.edu;asu.edu", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nguan2023leveraging,\ntitle={Leveraging Pre-trained Large Language Models to Construct and Utilize World Models for Model-based Task Planning},\nauthor={Lin Guan and Karthik Valmeekam and Sarath Sreedharan and Subbarao Kambhampati},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zDbsSscmuj}\n}", "github": "", "project": "", "reviewers": "iFiN;bNZu;33uW;QSdH", "pdf_size": 649236, "rating": "5;6;7;7", "confidence": "4;4;3;5", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "69;205;115;129", "wc_strengths": "53;177;259;85", "wc_weaknesses": "112;263;248;93", "wc_questions": "152;54;479;72", "wc_limitations": "1;1;7;77", "wc_review": "387;700;1108;456", "wc_reply_reviewers": "83;109;305;21", "wc_reply_authors": "0;20;47;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 129.5, 48.915743886810105 ], "wc_strengths_avg": [ 143.5, 80.73877630977572 ], "wc_weaknesses_avg": [ 179.0, 76.97726937219844 ], "wc_questions_avg": [ 189.25, 171.3058303152581 ], "wc_limitations_avg": [ 21.5, 32.13642792844283 ], "wc_review_avg": [ 662.75, 282.1430266726435 ], "wc_reply_reviewers_avg": [ 129.5, 106.24852940158749 ], "wc_reply_authors_avg": [ 16.75, 19.279198634798075 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 201, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16966396405025622906&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "asu.edu;asu.edu;colostate.edu;asu.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Arizona State University;Colorado State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.asu.edu;https://www.colostate.edu", "aff_unique_abbr": "ASU;CSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "(Amplified) Banded Matrix Factorization: A unified approach to private training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69906", "id": "zEm6hF97Pz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ecc28b4ce9b39f5f23c3efb03e25b7bf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zEm6hF97Pz", "openreview": "https://openreview.net/forum?id=zEm6hF97Pz", "poster": "/media/PosterPDFs/NeurIPS%202023/69906.png?t=1703031494.8942127", "slides": "https://nips.cc/virtual/2023/poster/69906", "video": "https://nips.cc/virtual/2023/poster/69906", "author_site": "Christopher A. Choquette-Choo, Arun Ganesh, Ryan McKenna, H. Brendan McMahan, John Rush, Abhradeep Guha Thakurta, Zheng Xu", "tldr": "", "abstract": "Matrix factorization (MF) mechanisms for differential privacy (DP) have substantially improved the state-of-the-art in privacy-utility-computation tradeoffs for ML applications in a variety of scenarios, but in both the centralized and federated settings there remain instances where either MF cannot be easily applied, or other algorithms provide better tradeoffs (typically, as $\\epsilon$ becomes small).\nIn this work, we show how MF can subsume prior state-of-the-art algorithms in both federated and centralized training settings, across all privacy budgets. The key technique throughout is the construction of MF mechanisms with banded matrices (lower-triangular matrices with at most $\\hat{b}$ nonzero bands including the main diagonal). For cross-device federated learning (FL), this enables multiple-participations with a relaxed device participation schema compatible with practical FL infrastructure (as demonstrated by a production deployment). In the centralized setting, we prove that banded matrices enjoy the same privacy amplification results as the ubiquitous DP-SGD algorithm, but can provide strictly better performance in most scenarios---this lets us always at least match DP-SGD, and often outperform it", "keywords": "Machine Learning;Differential Privacy;Optimization;Private Machine Learning;Federated Learning;Privacy Amplification;Matrix Factorization", "primary_area": "", "supplementary_material": "/attachment/12a520cbf57ba5897bc83f2ad154fc0a7de79fdd.pdf", "author": "Christopher A. Choquette-Choo;Arun Ganesh;Ryan McKenna;Hugh Brendan McMahan;J Keith Rush;Abhradeep Guha Thakurta;Zheng Xu", "authorids": "~Christopher_A._Choquette-Choo1;~Arun_Ganesh1;~Ryan_McKenna2;~Hugh_Brendan_McMahan1;~J_Keith_Rush1;~Abhradeep_Guha_Thakurta1;~Zheng_Xu2", "gender": "M;M;;M;;M;", "homepage": "https://www.christopherchoquette.com;https://people.eecs.berkeley.edu/~arunganesh/;;;https://www.jkrush.com;https://athakurta.squarespace.com/;https://sites.google.com/site/xuzhustc/", "dblp": "250/9674;201/4732;;;249/8135;31/8315;83/2535-2", "google_scholar": "oDE4I64AAAAJ;fmwchbsAAAAJ;;;OrUyRAcAAAAJ;1rV69hMAAAAJ;TfWlMTYAAAAJ", "orcid": ";;;;;;0009-0003-6747-3953", "linkedin": "christopher-choquette-choo/;;;;;;zheng-xu-0a125236/", "or_profile": "~Christopher_A._Choquette-Choo1;~Arun_Ganesh1;~Ryan_McKenna2;~Hugh_Brendan_McMahan1;~J_Keith_Rush1;~Abhradeep_Guha_Thakurta1;~Zheng_Xu2", "aff": "Google Research, Brain Team;Google;;Google;Google;Google;Google", "aff_domain": "google.com;google.com;;google.com;google.com;google.com;google.com", "position": "Researcher;Researcher;;Research Scientist;Researcher;Senior Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nchoquette-choo2023amplified,\ntitle={(Amplified) Banded Matrix Factorization: A unified approach to private training},\nauthor={Christopher A. Choquette-Choo and Arun Ganesh and Ryan McKenna and Hugh Brendan McMahan and J Keith Rush and Abhradeep Guha Thakurta and Zheng Xu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zEm6hF97Pz}\n}", "github": "", "project": "", "reviewers": "4QAc;T4Mk;UhEM;2XT3;z4EU", "pdf_size": 1505216, "rating": "5;5;6;6;8", "confidence": "2;2;3;3;3", "soundness": "2;3;3;3;4", "novelty": "2;3;2;2;4", "presentation": "3;3;3;1;4", "wc_summary": "69;54;67;160;122", "wc_strengths": "28;37;38;86;84", "wc_weaknesses": "35;235;70;364;79", "wc_questions": "2;44;103;4;173", "wc_limitations": "1;1;109;1;9", "wc_review": "135;371;387;615;467", "wc_reply_reviewers": "0;14;4;21;15", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 94.4, 40.232325311868316 ], "wc_strengths_avg": [ 54.6, 25.07269431074371 ], "wc_weaknesses_avg": [ 156.6, 124.4742543661138 ], "wc_questions_avg": [ 65.2, 65.17484177195983 ], "wc_limitations_avg": [ 24.2, 42.51305681787655 ], "wc_review_avg": [ 395.0, 156.09228039848736 ], "wc_reply_reviewers_avg": [ 10.8, 7.678541528180986 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.74535599249993, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10444146443282905433&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com;google.com;;google.com;google.com;google.com;google.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Automated Classification of Model Errors on ImageNet", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69905", "id": "zEoP4vzFKy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/7480ed13740773505262791131c12b89-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zEoP4vzFKy", "openreview": "https://openreview.net/forum?id=zEoP4vzFKy", "poster": "/media/PosterPDFs/NeurIPS%202023/69905.png?t=1701432352.9860287", "slides": "https://nips.cc/virtual/2023/poster/69905", "video": "https://nips.cc/virtual/2023/poster/69905", "author_site": "Momchil Peychev, Mark M\u00fcller, Marc Fischer, Martin Vechev", "tldr": "", "abstract": "While the ImageNet dataset has been driving computer vision research over the past decade, significant label noise and ambiguity have made top-1 accuracy an insufficient measure of further progress. To address this, new label-sets and evaluation protocols have been proposed for ImageNet showing that state-of-the-art models already achieve over 95% accuracy and shifting the focus on investigating why the remaining errors persist.\n\nRecent work in this direction employed a panel of experts to manually categorize all remaining classification errors for two selected models. However, this process is time-consuming, prone to inconsistencies, and requires trained experts, making it unsuitable for regular model evaluation thus limiting its utility. To overcome these limitations, we propose the first automated error classification framework, a valuable tool to study how modeling choices affect error distributions. We use our framework to comprehensively evaluate the error distribution of over 900 models. Perhaps surprisingly, we find that across model architectures, scales, and pre-training corpora, top-1 accuracy is a strong predictor for the *portion* of all error types. In particular, we observe that the portion of severe errors drops significantly with top-1 accuracy indicating that, while it underreports a model's true performance, it remains a valuable performance metric.\n\nWe release all our code at https://github.com/eth-sri/automated-error-analysis.", "keywords": "ImageNet;evaluation;error classification;error analysis", "primary_area": "", "supplementary_material": "", "author": "Momchil Peychev;Mark Niklas Mueller;Marc Fischer;Martin Vechev", "authorids": "~Momchil_Peychev1;~Mark_Niklas_Mueller2;~Marc_Fischer1;~Martin_Vechev1", "gender": "M;M;M;M", "homepage": "https://www.sri.inf.ethz.ch/people/momchil;https://www.sri.inf.ethz.ch/people/mark;;https://www.sri.inf.ethz.ch/people/martin", "dblp": "210/2351;287/4254;37/9373-2;93/2189.html", "google_scholar": "RuhLJ8oAAAAJ;RBpmcCAAAAAJ;;https://scholar.google.ch/citations?user=aZ1Rh50AAAAJ", "orcid": "0000-0003-0927-6356;0000-0002-2496-6542;;", "linkedin": ";mark-m%C3%BCller-8bb4b1140/;;", "or_profile": "~Momchil_Peychev1;~Mark_Niklas_Mueller2;~Marc_Fischer1;~Martin_Vechev1", "aff": "ETH Zurich;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\npeychev2023automated,\ntitle={Automated Classification of Model Errors on ImageNet},\nauthor={Momchil Peychev and Mark Niklas Mueller and Marc Fischer and Martin Vechev},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zEoP4vzFKy}\n}", "github": "", "project": "", "reviewers": "GgcH;HYyY;rfV8;hvnS;kbqQ", "pdf_size": 26729501, "rating": "4;5;5;6;7", "confidence": "3;4;3;4;4", "soundness": "2;3;3;3;4", "novelty": "2;2;2;1;4", "presentation": "2;3;3;2;4", "wc_summary": "112;77;60;69;36", "wc_strengths": "125;31;28;15;121", "wc_weaknesses": "842;185;72;222;20", "wc_questions": "167;2;2;1;38", "wc_limitations": "6;68;7;1;7", "wc_review": "1252;363;169;308;222", "wc_reply_reviewers": "211;12;23;490;69", "wc_reply_authors": "0;0;0;1045;81", "reply_reviewers": "1;1;1;3;1", "reply_authors": "1;1;1;3;2", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.2, 0.9797958971132712 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 70.8, 24.766105870725823 ], "wc_strengths_avg": [ 64.0, 48.48917404947211 ], "wc_weaknesses_avg": [ 268.2, 296.1083585446382 ], "wc_questions_avg": [ 42.0, 64.06559138882587 ], "wc_limitations_avg": [ 17.8, 25.19841264841895 ], "wc_review_avg": [ 462.8, 400.2656118129561 ], "wc_reply_reviewers_avg": [ 161.0, 179.13681921927719 ], "wc_reply_authors_avg": [ 225.2, 411.0987229364742 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7205766921228922, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1692016157787391415&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "A High-Resolution Dataset for Instance Detection with Multi-View Object Capture", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73409", "id": "zFvvdJblZm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/832ea0ff01bd512aab28bf416db9489c-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=zFvvdJblZm", "openreview": "https://openreview.net/forum?id=zFvvdJblZm", "poster": "/media/PosterPDFs/NeurIPS%202023/73409.png?t=1702588236.1620846", "slides": "https://nips.cc/virtual/2023/poster/73409", "video": "https://nips.cc/virtual/2023/poster/73409", "author_site": "QIANQIAN SHEN, Yunhan Zhao, Nahyun Kwon, Jeeeun Kim, Yanan Li, Shu Kong", "tldr": "", "abstract": "Instance detection (InsDet) is a long-lasting problem in robotics and computer vision, aiming to detect object instances (predefined by some visual examples) in a cluttered scene. Despite its practical significance, its advancement is overshadowed by Object Detection, which aims to detect objects belonging to some predefined classes. One major reason is that current InsDet datasets are too small in scale by today's standards. For example, the popular InsDet dataset GMU (published in 2016) has only 23 instances, far less than COCO (80 classes), a well-known object detection dataset published in 2014. We are motivated to introduce a new InsDet dataset and protocol. First, we define a realistic setup for InsDet: training data consists of multi-view instance captures, along with diverse scene images allowing synthesizing training images by pasting instance images on them with free box annotations. Second, we release a real-world database, which contains multi-view capture of 100 object instances, and high-resolution (6k$\\times$8k) testing images. Third, we extensively study baseline methods for InsDet on our dataset, analyze their performance and suggest future work. Somewhat surprisingly, using the off-the-shelf class-agnostic segmentation model (Segment Anything Model, SAM) and the self-supervised feature representation DINOv2 performs the best, achieving $>$10 AP better than end-to-end trained InsDet models that repurpose object detectors (e.g., FasterRCNN and RetinaNet).", "keywords": "Instance Detection;Segment Anything;Object Detection;Cut-Paste-Learn;Dataset;Multi-View Object Capture", "primary_area": "", "supplementary_material": "/attachment/1d16ac1f3396df0178adc0744c5514fb45b2b3a5.pdf", "author": "QIANQIAN SHEN;Yunhan Zhao;Nahyun Kwon;Jeeeun Kim;Yanan Li;Shu Kong", "authorids": "~QIANQIAN_SHEN1;~Yunhan_Zhao1;~Nahyun_Kwon1;~Jeeeun_Kim1;~Yanan_Li4;~Shu_Kong1", "gender": "F;M;F;F;F;M", "homepage": ";https://yunhan-zhao.github.io/;http://nahyunkwon.github.io;https://jeeeunkim.com;https://yananlix1.github.io/;https://aimerykong.github.io/", "dblp": "254/4696;211/6894;;144/5556.html;61/7498-2.html;26/11141", "google_scholar": "BlDZFSIAAAAJ;8Jf-FhIAAAAJ;;https://scholar.google.com/citations?hl=en;9cTdt_kAAAAJ;sm9FdLoAAAAJ", "orcid": "0009-0001-4777-0542;;;;0000-0001-8482-7221;0000-0002-1362-5937", "linkedin": ";;;;;aimerykong/", "or_profile": "~QIANQIAN_SHEN1;~Yunhan_Zhao1;~Nahyun_Kwon1;~Jeeeun_Kim1;~Yanan_Li4;~Shu_Kong1", "aff": "Zhejiang Lab;Google;Texas A&M University - College Station;Texas A&M University - College Station;Zhejiang Lab;Texas A&M University - College Station", "aff_domain": "zhejianglab.com;google.com;tamu.edu;tamu.edu;zhejianglab.com;tamu.edu", "position": "Researcher;Intern;PhD student;Assistant Professor;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nshen2023a,\ntitle={A High-Resolution Dataset for Instance Detection with Multi-View Object Capture},\nauthor={QIANQIAN SHEN and Yunhan Zhao and Nahyun Kwon and Jeeeun Kim and Yanan Li and Shu Kong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=zFvvdJblZm}\n}", "github": "", "project": "", "reviewers": "f4oD;Yusx;3YDD", "pdf_size": 9743779, "rating": "6;7;7", "confidence": "3;3;3", "wc_summary_and_contributions": "118;119;68", "wc_strengths": "66;73;64", "wc_improvement": "116;661;184", "wc_limitations": "27;8;62", "wc_correctness": "1;5;8", "wc_clarity": "10;4;12", "wc_relation_to_prior_work": "1;17;25", "wc_documentation": "1;7;28", "wc_additional_feedback": "1;1;1", "wc_review": "341;895;452", "wc_reply_reviewers": "27;17;11", "wc_reply_authors": "707;1474;809", "reply_reviewers": "1;1;1", "reply_authors": "2;3;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 101.66666666666667, 23.809428571238094 ], "wc_strengths_avg": [ 67.66666666666667, 3.858612300930075 ], "wc_improvement_avg": [ 320.3333333333333, 242.48207264776414 ], "wc_limitations_avg": [ 32.333333333333336, 22.365648262955002 ], "wc_correctness_avg": [ 4.666666666666667, 2.8674417556808756 ], "wc_clarity_avg": [ 8.666666666666666, 3.39934634239519 ], "wc_relation_to_prior_work_avg": [ 14.333333333333334, 9.977753031397176 ], "wc_documentation_avg": [ 12.0, 11.575836902790225 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 562.6666666666666, 239.32451237226456 ], "wc_reply_reviewers_avg": [ 18.333333333333332, 6.599663291074443 ], "wc_reply_authors_avg": [ 996.6666666666666, 340.08462999017695 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11256785705367716160&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "zhejianglab.com;google.com;tamu.edu;tamu.edu;zhejianglab.com;tamu.edu", "author_num": 6, "aff_unique_index": "0;1;2;2;0;2", "aff_unique_norm": "Zhejiang Lab;Google;Texas A&M University", "aff_unique_dep": ";Google;", "aff_unique_url": "http://www.zhejianglab.com;https://www.google.com;https://www.tamu.edu", "aff_unique_abbr": ";Google;TAMU", "aff_campus_unique_index": "1;2;2;2", "aff_campus_unique": ";Mountain View;College Station", "aff_country_unique_index": "0;1;1;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Multi-Swap k-Means++", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69904", "id": "zGRWp7yRqd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/52d63f9e4b81f866bf69fb3c834aad47-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zGRWp7yRqd", "openreview": "https://openreview.net/forum?id=zGRWp7yRqd", "poster": "/media/PosterPDFs/NeurIPS%202023/69904.png?t=1700380437.0306375", "slides": "https://nips.cc/virtual/2023/poster/69904", "video": "https://nips.cc/virtual/2023/poster/69904", "author_site": "Lorenzo Beretta, Vincent Cohen-Addad, Silvio Lattanzi, Nikos Parotsidis", "tldr": "", "abstract": "The $k$-means++ algorithm of Arthur and Vassilvitskii (SODA 2007) is often the practitioners' choice algorithm for optimizing the popular $k$-means clustering objective and is known to give an $O(\\log k)$-approximation in expectation. To obtain higher quality solutions, Lattanzi and Sohler (ICML 2019) proposed augmenting $k$-means++ with $O(k \\log \\log k)$ local-search steps obtained through the $k$-means++ sampling distribution to yield a $c$-approximation to the $k$-means clustering problem, where $c$ is a large absolute constant. Here we generalize and extend their local-search algorithm by considering larger and more sophisticated local-search neighborhoods hence allowing to swap multiple centers at the same time. Our algorithm achieves a $9 + \\varepsilon$ approximation ratio, which is the best possible for local search. Importantly we show that our algorithm is practical, namely easy to implement and fast enough to run on a variety of classic datasets, and outputs solutions of better cost.", "keywords": "Clustering;k-means;approximation algorithms", "primary_area": "", "supplementary_material": "/attachment/f50059548c011e7ee43b83d8467e414920540451.pdf", "author": "Lorenzo Beretta;Vincent Cohen-Addad;Silvio Lattanzi;Nikos Parotsidis", "authorids": "~Lorenzo_Beretta1;~Vincent_Cohen-Addad1;~Silvio_Lattanzi1;~Nikos_Parotsidis1", "gender": "M;;M;M", "homepage": ";;https://sites.google.com/site/silviolattanzi/;https://sites.google.com/view/nikosparotsidis", "dblp": "34/8239-1;136/5814;46/6611;129/9110", "google_scholar": ";;vxUZ4AUAAAAJ;https://scholar.google.gr/citations?user=Txeb6wsAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Lorenzo_Beretta1;~Vincent_Cohen-Addad1;~Silvio_Lattanzi1;~Nikos_Parotsidis1", "aff": "University of Copenhagen;Google;Google;Google", "aff_domain": "diku.dk;google.com;google.com;google.com", "position": "PhD student;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nberetta2023multiswap,\ntitle={Multi-Swap k-Means++},\nauthor={Lorenzo Beretta and Vincent Cohen-Addad and Silvio Lattanzi and Nikos Parotsidis},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zGRWp7yRqd}\n}", "github": "", "project": "", "reviewers": "2sgw;FVuS;swvB;coFq", "pdf_size": 2111881, "rating": "3;7;7;8", "confidence": "4;4;4;3", "soundness": "2;4;3;3", "novelty": "2;4;4;4", "presentation": "2;3;3;4", "wc_summary": "250;115;130;165", "wc_strengths": "194;102;86;67", "wc_weaknesses": "795;15;83;32", "wc_questions": "416;94;13;52", "wc_limitations": "18;1;11;1", "wc_review": "1673;327;323;317", "wc_reply_reviewers": "71;11;11;30", "wc_reply_authors": "129;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 1.920286436967152 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 165.0, 52.321123841140874 ], "wc_strengths_avg": [ 112.25, 48.79741284125625 ], "wc_weaknesses_avg": [ 231.25, 326.44170612836837 ], "wc_questions_avg": [ 143.75, 159.77229891317205 ], "wc_limitations_avg": [ 7.75, 7.189401922274203 ], "wc_review_avg": [ 660.0, 584.8666514685207 ], "wc_reply_reviewers_avg": [ 30.75, 24.498724456591614 ], "wc_reply_authors_avg": [ 32.25, 55.858638544096294 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5261522196019801, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14785070408198286036&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "diku.dk;google.com;google.com;google.com", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Copenhagen;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ku.dk;https://www.google.com", "aff_unique_abbr": "UCPH;Google", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Denmark;United States" }, { "title": "Optimal Treatment Regimes for Proximal Causal Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69903", "id": "zGdH4tKtOW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/94ccfdb2ca14f33a86a0b9b7d0c1bfb1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zGdH4tKtOW", "openreview": "https://openreview.net/forum?id=zGdH4tKtOW", "poster": "/media/PosterPDFs/NeurIPS%202023/69903.png?t=1699504084.6553435", "slides": "https://nips.cc/virtual/2023/poster/69903", "video": "https://nips.cc/virtual/2023/poster/69903", "author_site": "Tao Shen, Yifan Cui", "tldr": "", "abstract": "A common concern when a policymaker draws causal inferences from and makes decisions based on observational data is that the measured covariates are insufficiently rich to account for all sources of confounding, i.e., the standard no confoundedness assumption fails to hold. The recently proposed proximal causal inference framework shows that proxy variables that abound in real-life scenarios can be leveraged to identify causal effects and therefore facilitate decision-making. Building upon this line of work, we propose a novel optimal individualized treatment regime based on so-called outcome and treatment confounding bridges. We then show that the value function of this new optimal treatment regime is superior to that of existing ones in the literature. Theoretical guarantees, including identification, superiority, excess value bound, and consistency of the estimated regime, are established. Furthermore, we demonstrate the proposed optimal regime via numerical experiments and a real data application.", "keywords": "Optimal treatment regimes;Policy-making;Proximal causal inference;Unmeasured confounding;Value function", "primary_area": "", "supplementary_material": "/attachment/9e0b280685f6b66dd32b8f808081d8c244cc2812.pdf", "author": "Tao Shen;Yifan Cui", "authorids": "~Tao_Shen6;~Yifan_Cui1", "gender": "M;M", "homepage": "https://scholar.google.com/citations?user=IaiPuOsAAAAJ&hl=zh-CN;https://sites.google.com/view/yifancui", "dblp": ";227/3562-1", "google_scholar": "IaiPuOsAAAAJ;", "orcid": "0009-0001-5464-4464;", "linkedin": ";", "or_profile": "~Tao_Shen6;~Yifan_Cui1", "aff": "National University of Singaore;Zhejiang University", "aff_domain": "u.nus.edu;zju.edu.cn", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nshen2023optimal,\ntitle={Optimal Treatment Regimes for Proximal Causal Learning},\nauthor={Tao Shen and Yifan Cui},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zGdH4tKtOW}\n}", "github": "", "project": "", "reviewers": "of5M;NTDa;VJQR;193J", "pdf_size": 689955, "rating": "5;6;6;7", "confidence": "5;2;4;4", "soundness": "4;3;2;4", "novelty": "3;3;2;3", "presentation": "4;2;2;4", "wc_summary": "80;34;97;78", "wc_strengths": "137;31;66;113", "wc_weaknesses": "321;139;1464;34", "wc_questions": "14;32;136;34", "wc_limitations": "9;6;85;1", "wc_review": "561;242;1848;260", "wc_reply_reviewers": "38;80;950;11", "wc_reply_authors": "18;29;926;11", "reply_reviewers": "1;1;3;1", "reply_authors": "2;2;4;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 72.25, 23.284920012746447 ], "wc_strengths_avg": [ 86.75, 41.08755894428385 ], "wc_weaknesses_avg": [ 489.5, 571.9206675754951 ], "wc_questions_avg": [ 54.0, 47.97916214358062 ], "wc_limitations_avg": [ 25.25, 34.61484508126535 ], "wc_review_avg": [ 727.75, 659.0729758532055 ], "wc_reply_reviewers_avg": [ 269.75, 393.5113562529041 ], "wc_reply_authors_avg": [ 246.0, 392.650608047409 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3244428422615251, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2495689419983685081&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "u.nus.edu;zju.edu.cn", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "National University of Singapore;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.zju.edu.cn", "aff_unique_abbr": "NUS;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Singapore;China" }, { "title": "Real3D-AD: A Dataset of Point Cloud Anomaly Detection", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73408", "id": "zGthDp4yYe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/611b896d447df43c898062358df4c114-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=zGthDp4yYe", "openreview": "https://openreview.net/forum?id=zGthDp4yYe", "poster": "/media/PosterPDFs/NeurIPS%202023/73408.png?t=1699258774.516612", "slides": "https://nips.cc/virtual/2023/poster/73408", "video": "https://nips.cc/virtual/2023/poster/73408", "author_site": "Jiaqi Liu, Guoyang Xie, Ruitao Chen, Xinpeng Li, Jinbao Wang, Yong Liu, Chengjie Wang, Feng Zheng", "tldr": "", "abstract": "High-precision point cloud anomaly detection is the gold standard for identifying the defects of advancing machining and precision manufacturing. Despite some methodological advances in this area, the scarcity of datasets and the lack of a systematic benchmark hinder its development. We introduce Real3D-AD, a challenging high-precision point cloud anomaly detection dataset, addressing the limitations in the field. With 1,254 high-resolution 3D items (from forty thousand to millions of points for each item), Real3D-AD is the largest dataset for high-precision 3D industrial anomaly detection to date. Real3D-AD surpasses existing 3D anomaly detection datasets available in terms of point cloud resolution (0.0010mm-0.0015mm), $360^{\\circ}$ degree coverage and perfect prototype. Additionally, we present a comprehensive benchmark for Real3D-AD, revealing the absence of baseline methods for high-precision point cloud anomaly detection. To address this, we propose Reg3D-AD, a registration-based 3D anomaly detection method incorporating a novel feature memory bank that preserves local and global representations. Extensive experiments on the Real3D-AD dataset highlight the effectiveness of Reg3D-AD. For reproducibility and accessibility, we provide the Real3D-AD dataset, benchmark source code, and Reg3D-AD on our website: https://github.com/M-3LAB/Real3D-AD.", "keywords": "Anomaly Detection;Industrial Image;Defect Detection;Computer Vision", "primary_area": "", "supplementary_material": "", "author": "Jiaqi Liu;Guoyang Xie;ruitao chen;Xinpeng Li;Jinbao Wang;Yong Liu;Chengjie Wang;Feng Zheng", "authorids": "~Jiaqi_Liu1;~Guoyang_Xie1;~ruitao_chen1;~Xinpeng_Li1;~Jinbao_Wang1;~Yong_Liu12;~Chengjie_Wang1;~Feng_Zheng1", "gender": "M;M;M;;M;M;M;M", "homepage": "https://jiaqiliu.cn;https://guoyang-xie.github.io/;https://orcid.org/0009-0008-4666-4474;;;;;http://faculty.sustech.edu.cn/fengzheng/", "dblp": "51/2773-4;189/9352;242/4484;;;;;39/800", "google_scholar": "plvyb6oAAAAJ;;;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=aqvFa1EAAAAJ;fqte5H4AAAAJ;PcmyXHMAAAAJ", "orcid": "0000-0002-2153-8411;;0009-0008-4666-4474;;0000-0001-5916-8965;;0000-0003-4216-8090;0000-0002-1701-9141", "linkedin": "https://www.linkedin.cn/injobs/in/jiaqi-liu-b32570236;https://www.linkedin.cn/injobs/in/xieguoyang;;;;;;", "or_profile": "~Jiaqi_Liu1;~Guoyang_Xie1;~ruitao_chen1;~Xinpeng_Li1;~Jinbao_Wang1;~Yong_Liu12;~Chengjie_Wang1;~Feng_Zheng1", "aff": "Southern University of Science and Technology;University of Surrey;Southern University of Science and Technology;;Southern University of Science and Technology;Tencent Youtu Lab;Tencent YouTu Lab;Southern University of Science and Technology", "aff_domain": "sustech.edu.cn;surrey.ac.uk;sustech.edu.cn;;sustech.edu.cn;tencent.com;tencent.com;sustech.edu.cn", "position": "MS student;PhD student;MS student;;Assistant Professor;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nliu2023realdad,\ntitle={Real3D-{AD}: A Dataset of Point Cloud Anomaly Detection},\nauthor={Jiaqi Liu and Guoyang Xie and ruitao chen and Xinpeng Li and Jinbao Wang and Yong Liu and Chengjie Wang and Feng Zheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=zGthDp4yYe}\n}", "github": "", "project": "", "reviewers": "6MP5;TiJd;vFUK;KAeA", "pdf_size": 15540080, "rating": "6;7;7;9", "confidence": "4;4;2;4", "wc_summary_and_contributions": "186;76;25;280", "wc_strengths": "105;62;114;142", "wc_improvement": "341;124;163;116", "wc_limitations": "28;32;1;44", "wc_correctness": "55;7;7;45", "wc_clarity": "225;5;7;23", "wc_relation_to_prior_work": "8;11;7;47", "wc_documentation": "42;13;7;52", "wc_additional_feedback": "1;1;1;1", "wc_review": "991;331;332;750", "wc_reply_reviewers": "302;7;30;23", "wc_reply_authors": "2673;736;1061;396", "reply_reviewers": "1;1;1;1", "reply_authors": "7;3;4;3", "rating_avg": [ 7.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_and_contributions_avg": [ 141.75, 98.77341494552064 ], "wc_strengths_avg": [ 105.75, 28.70866593905053 ], "wc_improvement_avg": [ 186.0, 91.23869793020941 ], "wc_limitations_avg": [ 26.25, 15.722197683530124 ], "wc_correctness_avg": [ 28.5, 21.788758569500924 ], "wc_clarity_avg": [ 65.0, 92.63908462414771 ], "wc_relation_to_prior_work_avg": [ 18.25, 16.663958113245485 ], "wc_documentation_avg": [ 28.5, 18.953891421024867 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 601.0, 282.6490757105001 ], "wc_reply_reviewers_avg": [ 90.5, 122.393831543914 ], "wc_reply_authors_avg": [ 1216.5, 873.1656486600924 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 4.25, 1.6393596310755 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5754039673686837943&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "sustech.edu.cn;surrey.ac.uk;sustech.edu.cn;;sustech.edu.cn;tencent.com;tencent.com;sustech.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;0;2;2;0", "aff_unique_norm": "Southern University of Science and Technology;University of Surrey;Tencent", "aff_unique_dep": ";;Youtu Lab", "aff_unique_url": "https://www.sustech.edu.cn;https://www.surrey.ac.uk;https://www.tencent.com", "aff_unique_abbr": "SUSTech;Surrey;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "China;United Kingdom" }, { "title": "New Complexity-Theoretic Frontiers of Tractability for Neural Network Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69902", "id": "zIEaOZ0saA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b07091c16719ad3990e3d1ccee6641f1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zIEaOZ0saA", "openreview": "https://openreview.net/forum?id=zIEaOZ0saA", "poster": "/media/PosterPDFs/NeurIPS%202023/69902.png?t=1702394118.1602721", "slides": "https://nips.cc/virtual/2023/poster/69902", "video": "https://nips.cc/virtual/2023/poster/69902", "author_site": "Cornelius Brand, Robert Ganian, Mathis Rocton", "tldr": "", "abstract": "In spite of the fundamental role of neural networks in contemporary machine learning research, our understanding of the computational complexity of optimally training neural networks remains limited even when dealing with the simplest kinds of activation functions. Indeed, while there has been a number of very recent results that establish ever-tighter lower bounds for the problem under linear and ReLU activation functions, little progress has been made towards the identification of novel polynomial-time tractable network architectures. In this article we obtain novel algorithmic upper bounds for training linear- and ReLU-activated neural networks to optimality which push the boundaries of tractability for these problems beyond the previous state of the art.", "keywords": "neural network training;computational complexity;ReLU networks;Linear networks", "primary_area": "", "supplementary_material": "", "author": "Cornelius Brand;Robert Ganian;Mathis Rocton", "authorids": "~Cornelius_Brand1;~Robert_Ganian1;~Mathis_Rocton1", "gender": "M;M;M", "homepage": ";https://www.ac.tuwien.ac.at/people/rganian/;", "dblp": "182/2067;80/7529;312/6785", "google_scholar": "29uTGgYAAAAJ;nNem5xIAAAAJ;NZe336QAAAAJ", "orcid": ";0000-0002-7762-8045;", "linkedin": ";;", "or_profile": "~Cornelius_Brand1;~Robert_Ganian1;~Mathis_Rocton1", "aff": "Technische Universit\u00e4t Wien;TU Wien Vienna University of Technology;Technische Universit\u00e4t Wien", "aff_domain": "tuwien.ac.at;tuwien.ac.at;tuwien.ac.at", "position": "Postdoc;Associate Professor;PhD student", "bibtex": "@inproceedings{\nbrand2023new,\ntitle={New Complexity-Theoretic Frontiers of Tractability for Neural Network Training},\nauthor={Cornelius Brand and Robert Ganian and Mathis Rocton},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zIEaOZ0saA}\n}", "github": "", "project": "", "reviewers": "Yud1;boAZ;t5Nf;SU6D;3t5Y", "pdf_size": 294599, "rating": "5;6;6;6;7", "confidence": "4;3;2;3;5", "soundness": "2;4;4;4;4", "novelty": "2;3;2;2;3", "presentation": "3;3;4;3;4", "wc_summary": "147;127;215;142;153", "wc_strengths": "33;43;60;80;86", "wc_weaknesses": "180;398;56;95;793", "wc_questions": "3;4;139;86;20", "wc_limitations": "8;5;2;66;57", "wc_review": "371;577;472;469;1109", "wc_reply_reviewers": "37;10;176;120;23", "wc_reply_authors": "454;0;98;0;0", "reply_reviewers": "1;1;2;1;1", "reply_authors": "3;1;2;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.6, 0.8000000000000002 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 156.8, 30.347322781425053 ], "wc_strengths_avg": [ 60.4, 20.460694025374604 ], "wc_weaknesses_avg": [ 304.4, 271.4837748374661 ], "wc_questions_avg": [ 50.4, 53.74234829257092 ], "wc_limitations_avg": [ 27.6, 27.889783075527856 ], "wc_review_avg": [ 599.6, 262.905001854282 ], "wc_reply_reviewers_avg": [ 73.2, 64.16042393874903 ], "wc_reply_authors_avg": [ 110.4, 175.94271795104225 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3100868364730211, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=365361334469944438&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "tuwien.ac.at;tuwien.ac.at;tuwien.ac.at", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Technische Universit\u00e4t Wien;Vienna University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.tuwien.ac.at;https://www.tuwien.ac.at", "aff_unique_abbr": "TU Wien;TU Wien", "aff_campus_unique_index": "1", "aff_campus_unique": ";Vienna", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Austria" }, { "id": "zJMutieTgh", "title": "Inference Attacks Against Face Recognition Model without Classification Layers", "track": "main", "status": "Reject", "tldr": "", "abstract": "Face recognition (FR) has been applied to nearly every aspect of daily life, but it is always accompanied by the underlying risk of leaking private information. At present, almost all attack models against FR rely heavily on the presence of a classification layer. However, in practice, the FR model can obtain complex feature embedding of the input via the model backbone, and then compare it with the target for inference, which does not explicitly involve the outputs of the classification layer adopting logit or other losses. In this work, we advocate a novel inference attack composed of two stages for practical FR models without a classification layer. The first stage is the membership inference attack. Specifically, We analyze the distances between the intermediate features and batch normalization (BN) parameters. The results indicate that this distance is a critical metric for membership inference. We thus design a simple but effective attack model that can determine whether a face image is from the training data set or not. The second stage is the model inversion attack, where sensitive private data is reconstructed using a pre-trained generative adversarial network (GAN) guided by the attack model in the first stage. To the best of our knowledge, the proposed attack model is the very first in the literature developed for FR models without a classification layer. We illustrate the application of the proposed attack model in the establishment of privacy-preserving FR techniques.", "keywords": "inference attack;face recogniton;classification;model inversion", "primary_area": "", "supplementary_material": "/attachment/3e5c57013182d770eac942d29d4fdadfe5c1363d.pdf", "author": "Yuanqing Huang;Yinggui Wang;Le Yang;Rui Lv;Lei Wang;Tao Wei", "authorids": "~Yuanqing_Huang1;~Yinggui_Wang1;~Le_Yang6;~Rui_Lv2;~Lei_Wang30;~Tao_Wei5", "gender": ";M;M;M;M;M", "homepage": ";;https://www.canterbury.ac.nz/engineering/contact-us/people/le-yang.html;;;", "dblp": ";136/1775;;;;", "google_scholar": ";;l-xC8hMAAAAJ;https://scholar.google.com/citations?hl=en;;Ao3wEckAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;%E7%A3%8A-%E7%8E%8B-b4994abb/;", "or_profile": "~Yuanqing_Huang1;~Yinggui_Wang1;~Le_Yang6;~Rui_Lv2;~Lei_Wang30;~Tao_Wei5", "aff": ";Ant Group;University of Canterbury;;Ant Group;Ant Group", "aff_domain": ";antgroup.com;canterbury.ac.nz;;antgroup.com;antgroup.com", "position": ";Principal Researcher;Lecturer;;Principal Researcher;Principal Researcher", "bibtex": "@misc{\nhuang2023inference,\ntitle={Inference Attacks Against Face Recognition Model without Classification Layers},\nauthor={Yuanqing Huang and Yinggui Wang and Le Yang and Rui Lv and Lei Wang and Tao Wei},\nyear={2023},\nurl={https://openreview.net/forum?id=zJMutieTgh}\n}", "github": "", "project": "", "reviewers": "LWa5;X6ex;wFe5;Ke6N", "site": "https://openreview.net/forum?id=zJMutieTgh", "pdf_size": 761896, "rating": "3;4;5;8", "confidence": "4;3;4;5", "soundness": "3;2;3;4", "novelty": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "52;62;75;82", "wc_strengths": "21;24;49;33", "wc_weaknesses": "131;43;58;295", "wc_questions": "6;25;6;142", "wc_limitations": "7;1;6;116", "wc_review": "217;155;194;668", "wc_reply_reviewers": "0;0;18;653", "wc_reply_authors": "546;502;139;584", "reply_reviewers": "0;0;1;4", "reply_authors": "3;3;4;5", "rating_avg": [ 5.0, 1.8708286933869707 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 67.75, 11.583932838203095 ], "wc_strengths_avg": [ 31.75, 10.894379284750462 ], "wc_weaknesses_avg": [ 131.75, 99.95842885920126 ], "wc_questions_avg": [ 44.75, 56.68057427373156 ], "wc_limitations_avg": [ 32.5, 48.26230413065667 ], "wc_review_avg": [ 308.5, 208.7372750612597 ], "wc_reply_reviewers_avg": [ 167.75, 280.25557532366776 ], "wc_reply_authors_avg": [ 442.75, 177.7545709679501 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 3.75, 0.82915619758885 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5210887189336153304&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Ant Group;University of Canterbury", "aff_unique_dep": ";", "aff_unique_url": "https://www.antgroup.com;https://www.canterbury.ac.nz", "aff_unique_abbr": "Ant Group;UC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;New Zealand" }, { "id": "zKjSmbYFZe", "title": "Efficient Post-Processing for Equal Opportunity in Fair Multi-Class Classification", "track": "main", "status": "Reject", "tldr": "", "abstract": "Fairness in machine learning is of growing concern as more instances of biased model behavior are documented while their adoption continues to rise. The majority of studies have focused on binary classification settings, despite the fact that many real-world problems are inherently multi-class. This paper considers fairness in multi-class classification under the notion of parity of true positive rates\u2014an extension of binary class equalized odds\u2014which ensures equal opportunity to qualified individuals regardless of their demographics. We focus on algorithm design and provide a post-processing method that derives fair classifiers from pre-trained score functions. The method is developed by analyzing the representation of the optimal fair classifier, and is efficient in both sample and time complexity, as it is implemented by linear programs on finite samples. We demonstrate its effectiveness at reducing disparity on benchmark datasets, particularly under large numbers of classes, where existing methods fall short.", "keywords": "fairness;equal opportunity;multi-class;classification", "primary_area": "", "supplementary_material": "/attachment/2ed1ff907243ceec8e64de360b4e11afd97b7411.zip", "author": "Ruicheng Xian;Han Zhao", "authorids": "~Ruicheng_Xian1;~Han_Zhao1", "gender": "M;M", "homepage": "https://rxian.github.io;https://hanzhaoml.github.io/", "dblp": "243/3086.html;03/3520-2", "google_scholar": "Nmk26z4AAAAJ;x942ipYAAAAJ", "orcid": ";0000-0002-8579-1600", "linkedin": ";", "or_profile": "~Ruicheng_Xian1;~Han_Zhao1", "aff": "University of Illinois Urbana-Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illinois.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nxian2023efficient,\ntitle={Efficient Post-Processing for Equal Opportunity in Fair Multi-Class Classification},\nauthor={Ruicheng Xian and Han Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=zKjSmbYFZe}\n}", "github": "", "project": "", "reviewers": "Hx2f;TMc5;EVhV;Wt5y", "site": "https://openreview.net/forum?id=zKjSmbYFZe", "pdf_size": 500765, "rating": "3;6;6;7", "confidence": "4;4;4;3", "soundness": "3;3;4;3", "novelty": "1;2;3;3", "presentation": "2;3;3;3", "wc_summary": "54;40;59;287", "wc_strengths": "12;41;24;71", "wc_weaknesses": "28;127;70;47", "wc_questions": "66;79;258;17", "wc_limitations": "10;1;1;26", "wc_review": "170;288;412;448", "wc_reply_reviewers": "51;73;0;0", "wc_reply_authors": "146;242;1266;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;4;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 110.0, 102.428023509194 ], "wc_strengths_avg": [ 37.0, 22.169799277395363 ], "wc_weaknesses_avg": [ 68.0, 37.1685350800916 ], "wc_questions_avg": [ 105.0, 91.30991183874838 ], "wc_limitations_avg": [ 9.5, 10.21028892833107 ], "wc_review_avg": [ 329.5, 109.55706275726818 ], "wc_reply_reviewers_avg": [ 31.0, 31.960913628993776 ], "wc_reply_authors_avg": [ 413.5, 499.6766454418297 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896258, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oDSVM0IQtsUJ:scholar.google.com/&scioq=Efficient+Post-Processing+for+Equal+Opportunity+in+Fair+Multi-Class+Classification&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Implicit variance regularization in non-contrastive SSL", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69901", "id": "zMNUNd9zs1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c837ab3eebe77bffac634939f22ac458-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zMNUNd9zs1", "openreview": "https://openreview.net/forum?id=zMNUNd9zs1", "poster": "/media/PosterPDFs/NeurIPS%202023/69901.png?t=1699546007.0950654", "slides": "https://nips.cc/virtual/2023/poster/69901", "video": "https://nips.cc/virtual/2023/poster/69901", "author_site": "Manu Srinath Halvagal, Axel Laborieux, Friedemann Zenke", "tldr": "", "abstract": "Non-contrastive SSL methods like BYOL and SimSiam rely on asymmetric predictor networks to avoid representational collapse without negative samples. Yet, how predictor networks facilitate stable learning is not fully understood. While previous theoretical analyses assumed Euclidean losses, most practical implementations rely on cosine similarity. To gain further theoretical insight into non-contrastive SSL, we analytically study learning dynamics in conjunction with Euclidean and cosine similarity in the eigenspace of closed-form linear predictor networks. We show that both avoid collapse through implicit variance regularization albeit through different dynamical mechanisms. Moreover, we find that the eigenvalues act as effective learning rate multipliers and propose a family of isotropic loss functions (IsoLoss) that equalize convergence rates across eigenmodes. Empirically, IsoLoss speeds up the initial learning dynamics and increases robustness, thereby allowing us to dispense with the EMA target network typically used with non-contrastive methods. Our analysis sheds light on the variance regularization mechanisms of non-contrastive SSL and lays the theoretical grounds for crafting novel loss functions that shape the learning dynamics of the predictor's spectrum.", "keywords": "Self-supervised learning;Non-contrastive learning;Learning dynamics", "primary_area": "", "supplementary_material": "/attachment/1495f64229b6685dbb649429da81a645fa0c2af5.zip", "author": "Manu Srinath Halvagal;Axel Laborieux;Friedemann Zenke", "authorids": "~Manu_Srinath_Halvagal1;~Axel_Laborieux1;~Friedemann_Zenke1", "gender": "M;M;M", "homepage": "https://mshalvagal.github.io;https://laborieux-axel.github.io/;https://fzenke.net", "dblp": "241/3481;260/0533;155/2110", "google_scholar": "https://scholar.google.co.uk/citations?user=z6yQv2UAAAAJ;wodyq68AAAAJ;_IxvO8QAAAAJ", "orcid": "0000-0001-8374-8257;0000-0003-3630-2863;0000-0003-1883-644X", "linkedin": "manu-srinath-halvagal-ab4394b7/;https://linkedin.com/in/axel-lbx;", "or_profile": "~Manu_Srinath_Halvagal1;~Axel_Laborieux1;~Friedemann_Zenke1", "aff": "Friedrich Miescher Institute for Biomedical Research;Friedrich Miescher Institute for Biomedical Research;Friedrich Miescher Institute", "aff_domain": "fmi.ch;fmi.ch;fmi.ch", "position": "PhD student;Postdoc;Principal Researcher", "bibtex": "@inproceedings{\nhalvagal2023implicit,\ntitle={Implicit variance regularization in non-contrastive {SSL}},\nauthor={Manu Srinath Halvagal and Axel Laborieux and Friedemann Zenke},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zMNUNd9zs1}\n}", "github": "", "project": "", "reviewers": "ZNsZ;qXm9;qE19", "pdf_size": 4140472, "rating": "5;6;7", "confidence": "3;3;3", "soundness": "2;3;3", "novelty": "2;3;4", "presentation": "2;2;2", "wc_summary": "48;87;101", "wc_strengths": "61;83;61", "wc_weaknesses": "146;308;41", "wc_questions": "70;128;129", "wc_limitations": "2;39;3", "wc_review": "327;645;335", "wc_reply_reviewers": "202;450;0", "wc_reply_authors": "141;500;0", "reply_reviewers": "2;1;0", "reply_authors": "3;2;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 78.66666666666667, 22.425184255405547 ], "wc_strengths_avg": [ 68.33333333333333, 10.370899457402697 ], "wc_weaknesses_avg": [ 165.0, 109.82713690158731 ], "wc_questions_avg": [ 109.0, 27.58018612458347 ], "wc_limitations_avg": [ 14.666666666666666, 17.21110752456745 ], "wc_review_avg": [ 435.6666666666667, 148.05704606295808 ], "wc_reply_reviewers_avg": [ 217.33333333333334, 184.03139828723673 ], "wc_reply_authors_avg": [ 213.66666666666666, 210.49201621175303 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7082072938775877937&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "fmi.ch;fmi.ch;fmi.ch", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Friedrich Miescher Institute for Biomedical Research;Friedrich Miescher Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.fmi.ch;https://www.fmi.ch", "aff_unique_abbr": "FMI;FMI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "FAMO: Fast Adaptive Multitask Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69900", "id": "zMeemcUeXL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b2fe1ee8d936ac08dd26f2ff58986c8f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zMeemcUeXL", "openreview": "https://openreview.net/forum?id=zMeemcUeXL", "poster": "/media/PosterPDFs/NeurIPS%202023/69900.png?t=1697296671.4168613", "slides": "https://nips.cc/virtual/2023/poster/69900", "video": "https://nips.cc/virtual/2023/poster/69900", "author_site": "Bo Liu, Yihao Feng, Peter Stone, Qiang Liu", "tldr": "", "abstract": "One of the grand enduring goals of AI is to create generalist agents that can learn multiple different tasks from diverse data via multitask learning (MTL). However, in practice, applying gradient descent (GD) on the average loss across all tasks may yield poor multitask performance due to severe under-optimization of certain tasks. Previous approaches that manipulate task gradients for a more balanced loss decrease require storing and computing all task gradients ($\\mathcal{O}(k)$ space and time where $k$ is the number of tasks), limiting their use in large-scale scenarios. In this work, we introduce Fast Adaptive Multitask Optimization (FAMO), a dynamic weighting method that decreases task losses in a balanced way using $\\mathcal{O}(1)$ space and time. We conduct an extensive set of experiments covering multi-task supervised and reinforcement learning problems. Our results indicate that FAMO achieves comparable or superior performance to state-of-the-art gradient manipulation techniques while offering significant improvements in space and computational efficiency. Code is available at \\url{https://github.com/Cranial-XIX/FAMO}.", "keywords": "multitask learning;multitask optimization;conflicting gradients;knowledge transfer", "primary_area": "", "supplementary_material": "/attachment/20956f7a2cad082fcdb5409f26a4d2b539f2ad80.zip", "author": "Bo Liu;Yihao Feng;Peter Stone;qiang liu", "authorids": "~Bo_Liu13;~Yihao_Feng1;~Peter_Stone1;~qiang_liu4", "gender": "M;M;M;M", "homepage": "https://cranial-xix.github.io/;;http://www.cs.utexas.edu/~pstone;https://www.cs.utexas.edu/~lqiang/", "dblp": ";204/3696;s/PeterStone;61/3234-1", "google_scholar": "https://scholar.google.com/citations?hl=en;uqnNle0AAAAJ;qnwjcfAAAAAJ;https://scholar.google.com.tw/citations?user=2qDh4WUAAAAJ", "orcid": ";;0000-0002-6795-420X;", "linkedin": ";;;", "or_profile": "~Bo_Liu13;~Yihao_Feng1;~Peter_Stone1;~Qiang_Liu1", "aff": "University of Texas, Austin;Salesforce AI Research;University of Texas, Austin;University of Texas, Austin", "aff_domain": "cs.utexas.edu;salesforce.com;utexas.edu;utexas.edu", "position": "PhD student;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nliu2023famo,\ntitle={{FAMO}: Fast Adaptive Multitask Optimization},\nauthor={Bo Liu and Yihao Feng and Peter Stone and qiang liu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zMeemcUeXL}\n}", "github": "", "project": "", "reviewers": "XmqU;DG6u;gjX4;otYb", "pdf_size": 3121445, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;4;3", "wc_summary": "124;37;268;59", "wc_strengths": "34;28;103;66", "wc_weaknesses": "452;192;217;174", "wc_questions": "310;129;32;48", "wc_limitations": "54;3;1;2", "wc_review": "974;389;621;349", "wc_reply_reviewers": "1092;129;0;118", "wc_reply_authors": "483;260;0;0", "reply_reviewers": "3;2;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 122.0, 90.15819430312477 ], "wc_strengths_avg": [ 57.75, 29.852763691155968 ], "wc_weaknesses_avg": [ 258.75, 112.6129988056441 ], "wc_questions_avg": [ 129.75, 110.37294731953115 ], "wc_limitations_avg": [ 15.0, 22.52776065213762 ], "wc_review_avg": [ 583.25, 248.35294944896467 ], "wc_reply_reviewers_avg": [ 334.75, 440.1132666712059 ], "wc_reply_authors_avg": [ 185.75, 201.78995886812604 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5581468458718101583&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "cs.utexas.edu;salesforce.com;utexas.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Texas at Austin;Salesforce", "aff_unique_dep": ";Salesforce AI Research", "aff_unique_url": "https://www.utexas.edu;https://www.salesforce.com", "aff_unique_abbr": "UT Austin;Salesforce AI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "P-Flow: A Fast and Data-Efficient Zero-Shot TTS through Speech Prompting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69899", "id": "zNA7u7wtIN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eb0965da1d2cb3fbbbb8dbbad5fa0bfc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zNA7u7wtIN", "openreview": "https://openreview.net/forum?id=zNA7u7wtIN", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69899", "video": "https://nips.cc/virtual/2023/poster/69899", "author_site": "Sungwon Kim, Kevin Shih, rohan badlani, Joao Felipe Santos, Evelina Bakhturina, Mikyas Desta, Rafael Valle, Sungroh Yoon, Bryan Catanzaro", "tldr": "", "abstract": "While recent large-scale neural codec language models have shown significant improvement in zero-shot TTS by training on thousands of hours of data, they suffer from drawbacks such as a lack of robustness, slow sampling speed similar to previous autoregressive TTS methods, and reliance on pre-trained neural codec representations. Our work proposes P-Flow, a fast and data-efficient zero-shot TTS model that uses speech prompts for speaker adaptation. P-Flow comprises a speech-prompted text encoder for speaker adaptation and a flow matching generative decoder for high-quality and fast speech synthesis. Our speech-prompted text encoder uses speech prompts and text input to generate speaker-conditional text representation. The flow matching generative decoder uses the speaker-conditional output to synthesize high-quality personalized speech significantly faster than in real-time. Unlike the neural codec language models, we specifically train P-Flow on LibriTTS dataset using a continuous mel-representation. Through our training method using continuous speech prompts, P-Flow matches the speaker similarity performance of the large-scale zero-shot TTS models with two orders of magnitude less training data and has more than 20$\\times$ faster sampling speed. Our results show that P-Flow has better pronunciation and is preferred in human likeness and speaker similarity to its recent state-of-the-art counterparts, thus defining P-Flow as an attractive and desirable alternative. We provide audio samples on our demo page: [https://research.nvidia.com/labs/adlr/projects/pflow](https://research.nvidia.com/labs/adlr/projects/pflow)", "keywords": "text-to-speech;zero-shot TTS;flow matching generative model", "primary_area": "", "supplementary_material": "/attachment/f434d584d938cade84800b7e6a400910c73783a2.pdf", "author": "Sungwon Kim;Kevin J. Shih;Rohan Badlani;Joao Felipe Santos;Evelina Bakhturina;Mikyas T. Desta;Rafael Valle;Sungroh Yoon;Bryan Catanzaro", "authorids": "~Sungwon_Kim2;~Kevin_J._Shih1;~Rohan_Badlani1;~Joao_Felipe_Santos2;~Evelina_Bakhturina1;~Mikyas_T._Desta1;~Rafael_Valle1;~Sungroh_Yoon1;~Bryan_Catanzaro1", "gender": "M;M;Non-Binary;;M;Not Specified;;M;M", "homepage": ";https://scholar.google.co.in/citations?user=sk-qH8wAAAAJ&hl=en;http://www.seaandsailor.com;;;http://rafaelvalle.github.io;http://ailab.snu.ac.kr;https://ctnzr.io;http://webhost.engr.illinois.edu/~kjshih2/", "dblp": ";;48/5148;211/6749;;;99/1474;14/4826;135/4912", "google_scholar": "6qGppvkAAAAJ;https://scholar.google.co.in/citations?user=sk-qH8wAAAAJ;u2tgePAAAAAJ;;dnMm8-EAAAAJ;SktxU8IAAAAJ;Bphl_fIAAAAJ;UZ6kI2AAAAAJ;4x3DhzAAAAAJ", "orcid": ";;0000-0003-3934-3943;;;;0000-0002-2367-197X;0000-0003-0034-7728;", "linkedin": "sungwon-kim-dsail/;;jo%C3%A3o-felipe-santos-27003a9/;evelina-bakhturina-25269775/;;vallerafael/;;bryancatanzaro/;", "or_profile": "~Sungwon_Kim2;~Rohan_Badlani1;~Joao_Felipe_Santos2;~Evelina_Bakhturina1;~Mikyas_T._Desta1;~Rafael_Valle1;~Sungroh_Yoon1;~Bryan_Catanzaro1;~Kevin_Jonathan_Shih1", "aff": "Seoul National University;NVIDIA;University of Montreal;NVIDIA;NVIDIA;NVIDIA;Seoul National University;NVIDIA;NVIDIA", "aff_domain": "snu.ac.kr;nvidia.com;umontreal.ca;nvidia.com;nvidia.com;nvidia.com;snu.ac.kr;nvidia.com;nvidia.com", "position": "PhD student;Researcher;PhD student;Researcher;Researcher;Senior Research Scientist;Full Professor;Vice President;Research Scientist", "bibtex": "@inproceedings{\nkim2023pflow,\ntitle={P-Flow: A Fast and Data-Efficient Zero-Shot {TTS} through Speech Prompting},\nauthor={Sungwon Kim and Kevin J. Shih and Rohan Badlani and Joao Felipe Santos and Evelina Bakhturina and Mikyas T. Desta and Rafael Valle and Sungroh Yoon and Bryan Catanzaro},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zNA7u7wtIN}\n}", "github": "", "project": "", "reviewers": "KPCd;zUdt;Bg7K;9LwJ", "pdf_size": 775325, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "3;3;2;4", "novelty": "3;2;2;3", "presentation": "3;3;3;3", "wc_summary": "92;19;127;99", "wc_strengths": "75;69;135;281", "wc_weaknesses": "30;130;96;344", "wc_questions": "316;1;103;97", "wc_limitations": "14;1;7;60", "wc_review": "527;220;468;881", "wc_reply_reviewers": "0;21;0;0", "wc_reply_authors": "0;34;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 84.25, 39.88342387508876 ], "wc_strengths_avg": [ 140.0, 85.39906322671227 ], "wc_weaknesses_avg": [ 150.0, 117.63502879669814 ], "wc_questions_avg": [ 129.25, 115.16591292565695 ], "wc_limitations_avg": [ 20.5, 23.264780248263683 ], "wc_review_avg": [ 524.0, 236.11967304737655 ], "wc_reply_reviewers_avg": [ 5.25, 9.093266739736606 ], "wc_reply_authors_avg": [ 8.5, 14.722431864335457 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4602867496532519171&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "snu.ac.kr;nvidia.com;umontreal.ca;nvidia.com;nvidia.com;nvidia.com;snu.ac.kr;nvidia.com;nvidia.com", "author_num": 9, "aff_unique_index": "0;1;2;1;1;1;0;1;1", "aff_unique_norm": "Seoul National University;NVIDIA;University of Montreal", "aff_unique_dep": ";NVIDIA Corporation;", "aff_unique_url": "https://www.snu.ac.kr;https://www.nvidia.com;https://wwwumontreal.ca", "aff_unique_abbr": "SNU;NVIDIA;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1;1;1;0;1;1", "aff_country_unique": "South Korea;United States;Canada" }, { "title": "Stabilized Neural Differential Equations for Learning Dynamics with Explicit Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69898", "id": "zO2dAQfvHf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/2a4179ef39846557e99f6bfac580ea2e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zO2dAQfvHf", "openreview": "https://openreview.net/forum?id=zO2dAQfvHf", "poster": "/media/PosterPDFs/NeurIPS%202023/69898.png?t=1701731648.2724798", "slides": "https://nips.cc/virtual/2023/poster/69898", "video": "https://nips.cc/virtual/2023/poster/69898", "author_site": "Alistair White, Niki Kilbertus, Maximilian Gelbrecht, Niklas Boers", "tldr": "", "abstract": "Many successful methods to learn dynamical systems from data have recently been introduced. However, ensuring that the inferred dynamics preserve known constraints, such as conservation laws or restrictions on the allowed system states, remains challenging. We propose stabilized neural differential equations (SNDEs), a method to enforce arbitrary manifold constraints for neural differential equations. Our approach is based on a stabilization term that, when added to the original dynamics, renders the constraint manifold provably asymptotically stable. Due to its simplicity, our method is compatible with all common neural differential equation (NDE) models and broadly applicable. In extensive empirical evaluations, we demonstrate that SNDEs outperform existing methods while broadening the types of constraints that can be incorporated into NDE training.", "keywords": "neural differential equations;neural ordinary differential equations;constraints;conservation laws;stabilization;dynamical systems;dynamics;scientific machine learning;physics-informed machine learning", "primary_area": "", "supplementary_material": "/attachment/368677ba191ee85089a8a7be378e5ae7c8d5f848.zip", "author": "Alistair White;Niki Kilbertus;Maximilian Gelbrecht;Niklas Boers", "authorids": "~Alistair_White1;~Niki_Kilbertus1;maximilian.gelbrecht@tum.de;~Niklas_Boers1", "gender": ";;;M", "homepage": "https://white-alistair.github.io/;;;https://www.professoren.tum.de/boers-niklas", "dblp": ";202/1966;;179/2571", "google_scholar": ";uQZjTq4AAAAJ;;kVry5EkAAAAJ", "orcid": ";;;0000-0002-1239-9034", "linkedin": ";;;", "or_profile": "~Alistair_White1;~Niki_Kilbertus1;maximilian.gelbrecht@tum.de;~Niklas_Boers1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Helmholtz AI;;Potsdam Institute for Climate Impact Research", "aff_domain": "tum.de;helmholtz-muenchen.de;;pik-potsdam.de", "position": "PhD student;Group Leader;;Principal Researcher", "bibtex": "@inproceedings{\nwhite2023stabilized,\ntitle={Stabilized Neural Differential Equations for Learning Dynamics with Explicit Constraints},\nauthor={Alistair White and Niki Kilbertus and Maximilian Gelbrecht and Niklas Boers},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zO2dAQfvHf}\n}", "github": "", "project": "", "reviewers": "RTPu;gVTB;kVz8;cZV4;pbb8", "pdf_size": 6842283, "rating": "3;6;6;6;7", "confidence": "3;4;3;3;2", "soundness": "2;3;2;3;3", "novelty": "2;3;3;3;3", "presentation": "3;4;4;3;4", "wc_summary": "93;41;60;122;86", "wc_strengths": "103;43;153;22;86", "wc_weaknesses": "503;21;183;62;138", "wc_questions": "6;104;248;30;177", "wc_limitations": "14;27;25;1;1", "wc_review": "719;236;669;237;488", "wc_reply_reviewers": "516;68;61;65;293", "wc_reply_authors": "2589;0;0;332;518", "reply_reviewers": "1;1;1;1;2", "reply_authors": "6;1;1;2;2", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 80.4, 27.889783075527856 ], "wc_strengths_avg": [ 81.4, 46.08079860419088 ], "wc_weaknesses_avg": [ 181.4, 170.46829617263148 ], "wc_questions_avg": [ 113.0, 90.26627277117406 ], "wc_limitations_avg": [ 13.6, 11.200000000000001 ], "wc_review_avg": [ 469.8, 205.41217101233312 ], "wc_reply_reviewers_avg": [ 200.6, 180.81659215901618 ], "wc_reply_authors_avg": [ 687.8, 971.1976935722201 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.4, 1.8547236990991407 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.23312620206007845, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14630845737768259343&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "tum.de;helmholtz-muenchen.de;;pik-potsdam.de", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Helmholtz Association of German Research Centres;Potsdam Institute for Climate Impact Research", "aff_unique_dep": ";Helmholtz AI;", "aff_unique_url": "https://www.tum.de;https://www.helmholtz-ai.de;https://www.pik-potsdam.de", "aff_unique_abbr": "TUM;Helmholtz AI;PIK", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Residual Alignment: Uncovering the Mechanisms of Residual Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69897", "id": "zOCIKYVaF5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b3f48945f6fb402b4b5cdcf490e72847-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zOCIKYVaF5", "openreview": "https://openreview.net/forum?id=zOCIKYVaF5", "poster": "/media/PosterPDFs/NeurIPS%202023/69897.png?t=1701791395.1674502", "slides": "https://nips.cc/virtual/2023/poster/69897", "video": "https://nips.cc/virtual/2023/poster/69897", "author_site": "Jianing Li, Vardan Papyan", "tldr": "", "abstract": "The ResNet architecture has been widely adopted in deep learning due to its significant boost to performance through the use of simple skip connections, yet the underlying mechanisms leading to its success remain largely unknown. In this paper, we conduct a thorough empirical study of the ResNet architecture in classification tasks by linearizing its constituent residual blocks using Residual Jacobians and measuring their singular value decompositions. Our measurements ([code](https://colab.research.google.com/drive/1yKjEg2yF616tnZFAfuN0aQ-E9v3JmyjN?usp=sharing)) reveal a process called Residual Alignment (RA) characterized by four properties:\n- **(RA1):** intermediate representations of a given input are *equispaced* on a *line*, embedded in high dimensional space, as observed by Gai and Zhang [2021];\n- **(RA2):** top left and right singular vectors of Residual Jacobians align with each other and across different depths;\n- **(RA3):** Residual Jacobians are at most rank $C$ for fully-connected ResNets, where $C$ is the number of classes; and\n- **(RA4):** top singular values of Residual Jacobians scale inversely with depth.\n\nRA consistently occurs in models that generalize well, in both fully-connected and convolutional architectures, across various depths and widths, for varying numbers of classes, on all tested benchmark datasets, but ceases to occur once the skip connections are removed. It also provably occurs in a novel mathematical model we propose. This phenomenon reveals a strong alignment between residual branches of a ResNet (RA2+4), imparting a highly rigid geometric structure to the intermediate representations as they progress *linearly* through the network (RA1) up to the final layer, where they undergo Neural Collapse.", "keywords": "Deep Learning;Residual Networks;Neural Networks;Generalization;Spectral Analysis", "primary_area": "", "supplementary_material": "/attachment/891c218975109f98f561fb03ee93ff43b9f5015f.pdf", "author": "Jianing Li;Vardan Papyan", "authorids": "~Jianing_Li3;~Vardan_Papyan1", "gender": ";M", "homepage": ";https://sites.google.com/view/vardan-papyan", "dblp": ";173/9783", "google_scholar": ";https://scholar.google.co.il/citations?user=VrE-Gd4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Jianing_Li3;~Vardan_Papyan1", "aff": ";University of Toronto", "aff_domain": ";toronto.edu", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nli2023residual,\ntitle={Residual Alignment: Uncovering the Mechanisms of Residual Networks},\nauthor={Jianing Li and Vardan Papyan},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zOCIKYVaF5}\n}", "github": "", "project": "", "reviewers": "7wWU;Tohn;EjRc;jJfi", "pdf_size": 15482355, "rating": "6;7;7;8", "confidence": "2;3;4;4", "soundness": "3;4;3;4", "novelty": "3;4;2;4", "presentation": "3;1;3;4", "wc_summary": "38;333;233;78", "wc_strengths": "27;286;165;257", "wc_weaknesses": "29;223;130;318", "wc_questions": "39;164;36;149", "wc_limitations": "1;1;10;19", "wc_review": "134;1007;574;821", "wc_reply_reviewers": "0;0;37;145", "wc_reply_authors": "26;0;0;66", "reply_reviewers": "0;0;1;2", "reply_authors": "2;1;1;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 170.5, 118.76973520219703 ], "wc_strengths_avg": [ 183.75, 100.92416707607747 ], "wc_weaknesses_avg": [ 175.0, 107.34756634409557 ], "wc_questions_avg": [ 97.0, 59.74529270160119 ], "wc_limitations_avg": [ 7.75, 7.46240577829965 ], "wc_review_avg": [ 634.0, 326.9931191936613 ], "wc_reply_reviewers_avg": [ 45.5, 59.3990740668573 ], "wc_reply_authors_avg": [ 23.0, 27.0 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16637715780963306814&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";toronto.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "Conformal PID Control for Time Series Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69896", "id": "zPYeYv6YYs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/47f2fad8c1111d07f83c91be7870f8db-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zPYeYv6YYs", "openreview": "https://openreview.net/forum?id=zPYeYv6YYs", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69896", "video": "https://nips.cc/virtual/2023/poster/69896", "author_site": "Anastasios Angelopoulos, Emmanuel Candes, Ryan Tibshirani", "tldr": "", "abstract": "We study the problem of uncertainty quantification for time series prediction, with the goal of providing easy-to-use algorithms with formal guarantees. The algorithms we present build upon ideas from conformal prediction and control theory, are able to prospectively model conformal scores in an online setting, and adapt to the presence of systematic errors due to seasonality, trends, and general distribution shifts. Our theory both simplifies and strengthens existing analyses in online conformal prediction. Experiments on 4-week-ahead forecasting of statewide COVID-19 death counts in the U.S. show an improvement in coverage over the ensemble forecaster used in\nofficial CDC communications. We also run experiments on predicting electricity demand, market returns, and temperature using autoregressive, Theta, Prophet, and Transformer models. We provide an extendable codebase for testing our methods and for the integration of new algorithms, data sets, and forecasting rules at [this link](http://github.com/aangelopoulos/conformal-time-series).", "keywords": "conformal prediction;time series;uncertainty quantification;distribution shift", "primary_area": "", "supplementary_material": "", "author": "Anastasios Nikolas Angelopoulos;Emmanuel Candes;Ryan Tibshirani", "authorids": "~Anastasios_Nikolas_Angelopoulos1;~Emmanuel_Candes1;~Ryan_Tibshirani1", "gender": "M;;", "homepage": "http://angelopoulos.ai;http://statweb.stanford.edu/~candes/;https://www.stat.berkeley.edu/~ryantibs/", "dblp": ";;", "google_scholar": "nfX25MMAAAAJ;nRQi4O8AAAAJ;", "orcid": ";;", "linkedin": "anastasiosa/;;", "or_profile": "~Anastasios_Nikolas_Angelopoulos1;~Emmanuel_Candes1;~Ryan_Tibshirani1", "aff": "University of California, Berkeley;Stanford University;University of California, Berkeley", "aff_domain": "berkeley.edu;stanford.edu;berkeley.edu", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nangelopoulos2023conformal,\ntitle={Conformal {PID} Control for Time Series Prediction},\nauthor={Anastasios Nikolas Angelopoulos and Emmanuel Candes and Ryan Tibshirani},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zPYeYv6YYs}\n}", "github": "", "project": "", "reviewers": "Cw13;H9oS;aXqf;bvK1", "pdf_size": 9060379, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "3;4;2;3", "novelty": "2;3;3;3", "presentation": "3;4;3;3", "wc_summary": "112;82;78;62", "wc_strengths": "76;91;72;179", "wc_weaknesses": "404;665;96;110", "wc_questions": "25;189;71;36", "wc_limitations": "14;1;67;12", "wc_review": "631;1028;384;399", "wc_reply_reviewers": "25;13;22;0", "wc_reply_authors": "0;47;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 83.5, 18.07622748252522 ], "wc_strengths_avg": [ 104.5, 43.591857037754195 ], "wc_weaknesses_avg": [ 318.75, 234.7076639140699 ], "wc_questions_avg": [ 80.25, 65.04373528634406 ], "wc_limitations_avg": [ 23.5, 25.59785147234041 ], "wc_review_avg": [ 610.5, 260.1734998034965 ], "wc_reply_reviewers_avg": [ 15.0, 9.72111104761179 ], "wc_reply_authors_avg": [ 11.75, 20.351596988934308 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 79, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3786409441219363222&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "berkeley.edu;stanford.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Berkeley;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.stanford.edu", "aff_unique_abbr": "UC Berkeley;Stanford", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Berkeley;Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Multi-scale Diffusion Denoised Smoothing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69895", "id": "zQ4yraDiRe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d51e2a4628b15518f58bd1056b2d9124-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zQ4yraDiRe", "openreview": "https://openreview.net/forum?id=zQ4yraDiRe", "poster": "/media/PosterPDFs/NeurIPS%202023/69895.png?t=1702224733.7237835", "slides": "https://nips.cc/virtual/2023/poster/69895", "video": "https://nips.cc/virtual/2023/poster/69895", "author_site": "Jongheon Jeong, Jinwoo Shin", "tldr": "", "abstract": "Along with recent diffusion models, randomized smoothing has become one of a few tangible approaches that offers adversarial robustness to models at scale, e.g., those of large pre-trained models. Specifically, one can perform randomized smoothing on any classifier via a simple \"denoise-and-classify\" pipeline, so-called denoised smoothing, given that an accurate denoiser is available - such as diffusion model. In this paper, we present scalable methods to address the current trade-off between certified robustness and accuracy in denoised smoothing. Our key idea is to \"selectively\" apply smoothing among multiple noise scales, coined multi-scale smoothing, which can be efficiently implemented with a single diffusion model. This approach also suggests a new objective to compare the collective robustness of multi-scale smoothed classifiers, and questions which representation of diffusion model would maximize the objective. To address this, we propose to further fine-tune diffusion model (a) to perform consistent denoising whenever the original image is recoverable, but (b) to generate rather diverse outputs otherwise. Our experiments show that the proposed multi-scale smoothing scheme, combined with diffusion fine-tuning, not only allows strong certified robustness at high noise scales but also maintains accuracy close to non-smoothed classifiers. Code is available at https://github.com/jh-jeong/smoothing-multiscale.", "keywords": "adversarial robustness;certified robustness;randomized smoothing;denoised smoothing;diffusion models", "primary_area": "", "supplementary_material": "", "author": "Jongheon Jeong;Jinwoo Shin", "authorids": "~Jongheon_Jeong1;~Jinwoo_Shin1", "gender": "M;M", "homepage": "https://jh-jeong.github.io;https://sites.google.com/site/mijirim/", "dblp": "241/5923;31/7062", "google_scholar": "mZB2qfcAAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ", "orcid": "0000-0002-4058-5774;", "linkedin": "jongheonj/;", "or_profile": "~Jongheon_Jeong1;~Jinwoo_Shin1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\njeong2023multiscale,\ntitle={Multi-scale Diffusion Denoised Smoothing},\nauthor={Jongheon Jeong and Jinwoo Shin},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zQ4yraDiRe}\n}", "github": "", "project": "", "reviewers": "rNFT;QpH2;Vnny;K2e2;b42a", "pdf_size": 1413645, "rating": "5;5;5;6;6", "confidence": "5;4;4;5;1", "soundness": "3;3;2;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;1;3;3", "wc_summary": "81;61;75;102;64", "wc_strengths": "120;70;33;50;47", "wc_weaknesses": "385;277;317;86;138", "wc_questions": "2;6;283;112;7", "wc_limitations": "1;9;49;7;33", "wc_review": "589;423;757;357;289", "wc_reply_reviewers": "0;197;42;15;0", "wc_reply_authors": "0;276;63;36;0", "reply_reviewers": "0;2;1;1;0", "reply_authors": "1;3;2;2;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.8, 1.469693845669907 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 76.6, 14.62326912834473 ], "wc_strengths_avg": [ 64.0, 30.390788077968626 ], "wc_weaknesses_avg": [ 240.6, 111.75079418062316 ], "wc_questions_avg": [ 82.0, 108.72166297477241 ], "wc_limitations_avg": [ 19.8, 18.225257199831226 ], "wc_review_avg": [ 483.0, 169.36587613802257 ], "wc_reply_reviewers_avg": [ 50.8, 74.69243602935975 ], "wc_reply_authors_avg": [ 75.0, 103.26277160719637 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.44444444444444436, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10314966767066425200&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "kaist.ac.kr;kaist.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Optimized Covariance Design for AB Test on Social Network under Interference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69894", "id": "zQOYGDc9pu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/760b5def8dcb1156aac454e9c0f5f406-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zQOYGDc9pu", "openreview": "https://openreview.net/forum?id=zQOYGDc9pu", "poster": "/media/PosterPDFs/NeurIPS%202023/69894.png?t=1701709236.458846", "slides": "https://nips.cc/virtual/2023/poster/69894", "video": "https://nips.cc/virtual/2023/poster/69894", "author_site": "Qianyi Chen, Bo Li, Lu Deng, Yong Wang", "tldr": "", "abstract": "Online A/B tests have become increasingly popular and important for social platforms. However, accurately estimating the global average treatment effect (GATE) has proven to be challenging due to network interference, which violates the Stable Unit Treatment Value Assumption (SUTVA) and poses great challenge to experimental design. Existing network experimental design research was mostly based on the unbiased Horvitz-Thompson (HT) estimator with substantial data trimming to ensure unbiasedness at the price of high resultant estimation variance. In this paper, we strive to balance the bias and variance in designing randomized network experiments. Under a potential outcome model with 1-hop interference, we derive the bias and variance of the standard HT estimator and reveal their relation to the network topological structure and the covariance of the treatment assignment vector. We then propose to formulate the experimental design problem as to optimize the covariance matrix of the treatment assignment vector to achieve the bias and variance balance by minimizing the mean squared error (MSE) of the estimator. An efficient projected gradient descent algorithm is presented to the implement of the desired randomization scheme. Finally, we carry out extensive simulation studies to demonstrate the advantages of our proposed method over other existing methods in many settings, with different levels of model misspecification.", "keywords": "AB test;interference;causal inference;optimization;social network", "primary_area": "", "supplementary_material": "/attachment/17d3ae1eda9b65593bf291a786ddc10f34b0600d.zip", "author": "Qianyi Chen;Bo Li;LU DENG;Yong Wang", "authorids": "~Qianyi_Chen2;~Bo_Li29;~LU_DENG1;~Yong_Wang11", "gender": "M;M;M;M", "homepage": "https://cqyiiii.github.io;http://www.sem.tsinghua.edu.cn/en/libo;;https://null.com", "dblp": "23/9743;50/3402-64;;", "google_scholar": ";GaJXFWMAAAAJ;;", "orcid": "0009-0001-3723-309X;0000-0001-5599-8857;0009-0005-0431-3594;0009-0009-4464-352X", "linkedin": ";;\u8def-\u9093-9a338ab6/;", "or_profile": "~Qianyi_Chen2;~Bo_Li29;~LU_DENG1;~Yong_Wang11", "aff": "Tsinghua University;Tsinghua University;Tencent;Tencent", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tencent.com;tencent.com", "position": "PhD student;Associate Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nchen2023optimized,\ntitle={Optimized Covariance Design for {AB} Test on Social Network under Interference},\nauthor={Qianyi Chen and Bo Li and LU DENG and Yong Wang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zQOYGDc9pu}\n}", "github": "", "project": "", "reviewers": "2fwK;EFcj;1xr3;Q5Zi", "pdf_size": 360698, "rating": "5;6;6;7", "confidence": "3;3;5;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "113;136;136;65", "wc_strengths": "100;44;193;61", "wc_weaknesses": "174;45;1389;147", "wc_questions": "8;195;114;3", "wc_limitations": "89;59;1;1", "wc_review": "484;479;1833;277", "wc_reply_reviewers": "22;37;104;13", "wc_reply_authors": "35;22;43;24", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 112.5, 28.98706608127149 ], "wc_strengths_avg": [ 99.5, 57.673650829473246 ], "wc_weaknesses_avg": [ 438.75, 550.7324100686285 ], "wc_questions_avg": [ 80.0, 79.83420319637442 ], "wc_limitations_avg": [ 37.5, 38.00986713999406 ], "wc_review_avg": [ 768.25, 620.3794705662011 ], "wc_reply_reviewers_avg": [ 44.0, 35.68613176011096 ], "wc_reply_authors_avg": [ 31.0, 8.514693182963201 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11412941626351858347&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;tsinghua.edu.cn;tencent.com;tencent.com", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Tsinghua University;Tencent", "aff_unique_dep": ";Tencent Holdings Limited", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.tencent.com", "aff_unique_abbr": "THU;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Sounding Bodies: Modeling 3D Spatial Sound of Humans Using Body Pose and Audio", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69893", "id": "zQTi3pziFp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8c234d9c7e738a793947e0282c36eb95-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zQTi3pziFp", "openreview": "https://openreview.net/forum?id=zQTi3pziFp", "poster": "/media/PosterPDFs/NeurIPS%202023/69893.png?t=1701888018.357418", "slides": "https://nips.cc/virtual/2023/poster/69893", "video": "https://nips.cc/virtual/2023/poster/69893", "author_site": "Xudong XU, Dejan Markovic, Jacob Sandakly, Todd Keebler, Steven Krenn, Alexander Richard", "tldr": "", "abstract": "While 3D human body modeling has received much attention in computer vision, modeling the acoustic equivalent, i.e. modeling 3D spatial audio produced by body motion and speech, has fallen short in the community. To close this gap, we present a model that can generate accurate 3D spatial audio for full human bodies. The system consumes, as input, audio signals from headset microphones and body pose, and produces, as output, a 3D sound field surrounding the transmitter's body, from which spatial audio can be rendered at any arbitrary position in the 3D space. We collect a first-of-its-kind multimodal dataset of human bodies, recorded with multiple cameras and a spherical array of 345 microphones. In an empirical evaluation, we demonstrate that our model can produce accurate body-induced sound fields when trained with a suitable loss. Dataset and code are available online.", "keywords": "sound field;spatial audio;virtual humans;human body;body modeling", "primary_area": "", "supplementary_material": "/attachment/5982d20b50e57f325ed04c373c0575ec7d9543ac.zip", "author": "Xudong XU;Dejan Markovic;Jacob Sandakly;Todd Keebler;Steven Krenn;Alexander Richard", "authorids": "~Xudong_XU1;~Dejan_Markovic1;jasandakly@meta.com;toddkeebler@meta.com;~Steven_Krenn1;~Alexander_Richard1", "gender": "M;M;;;M;M", "homepage": "https://sheldontsui.github.io;;;;;https://alexanderrichard.github.io", "dblp": "210/2741;17/1368;;;;73/9876", "google_scholar": "https://scholar.google.com.hk/citations?user=D8VMkA8AAAAJ;cyAYD3UAAAAJ;;;;https://scholar.google.de/citations?user=73DTbNAAAAAJ", "orcid": ";;;;;", "linkedin": ";dejanmarkovic07/;;;stevenkrenn/;", "or_profile": "~Xudong_XU1;~Dejan_Markovic1;jasandakly@meta.com;toddkeebler@meta.com;~Steven_Krenn1;~Alexander_Richard1", "aff": "The Chinese University of Hong Kong;Meta ;;;Meta Facebook;Meta", "aff_domain": "ie.cuhk.edu;meta.com;;;meta.com;meta.com", "position": "PhD student;Research Scientist;;;Researcher;Researcher", "bibtex": "@inproceedings{\nxu2023sounding,\ntitle={Sounding Bodies: Modeling 3D Spatial Sound of Humans Using Body Pose and Audio},\nauthor={Xudong XU and Dejan Markovic and Jacob Sandakly and Todd Keebler and Steven Krenn and Alexander Richard},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zQTi3pziFp}\n}", "github": "", "project": "", "reviewers": "1NH5;R8ko;k1Fj;of5R", "pdf_size": 17776654, "rating": "5;7;7;8", "confidence": "3;4;4;4", "soundness": "2;3;4;4", "novelty": "2;3;3;4", "presentation": "3;4;3;4", "wc_summary": "167;116;94;139", "wc_strengths": "29;197;102;140", "wc_weaknesses": "148;124;129;155", "wc_questions": "18;1;82;190", "wc_limitations": "26;11;1;41", "wc_review": "388;449;408;665", "wc_reply_reviewers": "14;39;59;41", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 129.0, 27.101660465735304 ], "wc_strengths_avg": [ 117.0, 61.02868178160168 ], "wc_weaknesses_avg": [ 139.0, 12.864680330268607 ], "wc_questions_avg": [ 72.75, 74.12615935012417 ], "wc_limitations_avg": [ 19.75, 15.155444566227676 ], "wc_review_avg": [ 477.5, 110.46379497373789 ], "wc_reply_reviewers_avg": [ 38.25, 16.021469970012117 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5755367985216295883&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "ie.cuhk.edu;meta.com;;;meta.com;meta.com", "author_num": 6, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Chinese University of Hong Kong;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.cuhk.edu.hk;https://meta.com", "aff_unique_abbr": "CUHK;Meta", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Revisiting Out-of-distribution Robustness in NLP: Benchmarks, Analysis, and LLMs Evaluations", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73407", "id": "zQU33Uh3qM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b6b5f50a2001ad1cbccca96e693c4ab4-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=zQU33Uh3qM", "openreview": "https://openreview.net/forum?id=zQU33Uh3qM", "poster": "/media/PosterPDFs/NeurIPS%202023/73407.png?t=1700156416.4123623", "slides": "https://nips.cc/virtual/2023/poster/73407", "video": "https://nips.cc/virtual/2023/poster/73407", "author_site": "Lifan Yuan, Yangyi Chen, Ganqu Cui, Hongcheng Gao, FangYuan Zou, Xingyi Cheng, Heng Ji, Zhiyuan Liu, Maosong Sun", "tldr": "", "abstract": "This paper reexamines the research on out-of-distribution (OOD) robustness in the field of NLP. We find that the distribution shift settings in previous studies commonly lack adequate challenges, hindering the accurate evaluation of OOD robustness. To address these issues, we propose a benchmark construction protocol that ensures clear differentiation and challenging distribution shifts. Then we introduce\nBOSS, a Benchmark suite for Out-of-distribution robustneSS evaluation covering 5 tasks and 20 datasets. Based on BOSS, we conduct a series of experiments on pre\u0002trained language models for analysis and evaluation of OOD robustness. First, for vanilla fine-tuning, we examine the relationship between in-distribution (ID) and OOD performance. We identify three typical types that unveil the inner learning\nmechanism, which could potentially facilitate the forecasting of OOD robustness, correlating with the advancements on ID datasets. Then, we evaluate 5 classic meth\u0002ods on BOSS and find that, despite exhibiting some effectiveness in specific cases, they do not offer significant improvement compared to vanilla fine-tuning. Further, we evaluate 5 LLMs with various adaptation paradigms and find that when sufficient ID data is available, fine-tuning domain-specific models outperform LLMs on ID examples significantly. However, in the case of OOD instances, prioritizing LLMs with in-context learning yields better results. We identify that both fine-tuned small models and LLMs face challenges in effectively addressing downstream tasks. The code is public at https://github.com/lifan-yuan/OOD_NLP.", "keywords": "NLP;OOD Robustness;Large Language Models", "primary_area": "", "supplementary_material": "/attachment/734a35db2b8a779e68b278a04c134801a3b19059.zip", "author": "Lifan Yuan;Yangyi Chen;Ganqu Cui;Hongcheng Gao;FangYuan Zou;Xingyi Cheng;Heng Ji;Zhiyuan Liu;Maosong Sun", "authorids": "~Lifan_Yuan1;~Yangyi_Chen1;~Ganqu_Cui1;~Hongcheng_Gao1;~FangYuan_Zou1;~Xingyi_Cheng3;~Heng_Ji3;~Zhiyuan_Liu1;~Maosong_Sun1", "gender": ";M;M;M;M;M;F;M;M", "homepage": ";https://yangyi-chen.github.io/;https://cgq15.github.io/;https://gao-hongcheng.github.io/;https://weibo.com/u/2163622992;;http://blender.cs.illinois.edu/hengji.html;http://nlp.csai.tsinghua.edu.cn/~lzy;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm", "dblp": ";05/10083;232/3064;318/1404;;206/6376;;53/3245-1;95/3291-1", "google_scholar": ";https://scholar.google.com/citations?hl=en;3IVSzZgAAAAJ;https://scholar.google.com/citations?hl=en;;shO7XmIAAAAJ;z7GCqT4AAAAJ;dT0v5u0AAAAJ;https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ", "orcid": ";;;;;;;0000-0002-7709-2543;", "linkedin": ";yangyi-chen-4006a11b2/;;;;;;;", "or_profile": "~Lifan_Yuan1;~Yangyi_Chen1;~Ganqu_Cui1;~Hongcheng_Gao1;~FangYuan_Zou1;~Xingyi_Cheng3;~Heng_Ji3;~Zhiyuan_Liu1;~Maosong_Sun1", "aff": ";Department of Computer Science, University of Illinois at Urbana-Champaign;Tsinghua University;Chongqing University;Tencent AI Lab;BioMap;University of Illinois, Urbana-Champaign;Tsinghua University;Tsinghua University", "aff_domain": ";cs.illinois.edu;tsinghua.edu.cn;cqu.edu.cn;tencent.com;biomap.com;uiuc.edu;tsinghua.edu.cn;tsinghua.edu.cn", "position": ";PhD student;PhD student;Undergrad student;Researcher;Principal Researcher;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nyuan2023revisiting,\ntitle={Revisiting Out-of-distribution Robustness in {NLP}: Benchmarks, Analysis, and {LLM}s Evaluations},\nauthor={Lifan Yuan and Yangyi Chen and Ganqu Cui and Hongcheng Gao and FangYuan Zou and Xingyi Cheng and Heng Ji and Zhiyuan Liu and Maosong Sun},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=zQU33Uh3qM}\n}", "github": "", "project": "", "reviewers": "wTiN;XEXa;5TMS;6XNi", "pdf_size": 2238033, "rating": "6;6;6;7", "confidence": "3;5;3;4", "wc_summary_and_contributions": "152;118;72;65", "wc_strengths": "89;104;91;103", "wc_improvement": "210;427;42;88", "wc_limitations": "34;14;120;116", "wc_correctness": "432;28;15;17", "wc_clarity": "42;12;14;14", "wc_relation_to_prior_work": "46;16;36;11", "wc_documentation": "1;24;7;2", "wc_additional_feedback": "1;1;1;1", "wc_review": "1007;744;398;417", "wc_reply_reviewers": "0;138;81;0", "wc_reply_authors": "2528;1114;1315;621", "reply_reviewers": "0;1;1;0", "reply_authors": "5;2;3;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 101.75, 35.442735503908274 ], "wc_strengths_avg": [ 96.75, 6.796138609534093 ], "wc_improvement_avg": [ 191.75, 149.0509560519489 ], "wc_limitations_avg": [ 71.0, 47.54997371187496 ], "wc_correctness_avg": [ 123.0, 178.4698854148789 ], "wc_clarity_avg": [ 20.5, 12.439855304624729 ], "wc_relation_to_prior_work_avg": [ 27.25, 14.306903927824496 ], "wc_documentation_avg": [ 8.5, 9.233092656309694 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 641.5, 251.88737562648907 ], "wc_reply_reviewers_avg": [ 54.75, 58.34113043128321 ], "wc_reply_authors_avg": [ 1394.5, 701.44939232991 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 97, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4611384523995177533&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";cs.illinois.edu;tsinghua.edu.cn;cqu.edu.cn;tencent.com;biomap.com;uiuc.edu;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 9, "aff_unique_index": "0;1;2;3;4;5;1;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Tsinghua University;Chongqing University;Tencent;BioMap;University of Illinois", "aff_unique_dep": "Department of Computer Science;;;Tencent AI Lab;;", "aff_unique_url": "https://illinois.edu;https://www.tsinghua.edu.cn;https://www.cqu.edu.cn;https://ai.tencent.com;;https://illinois.edu", "aff_unique_abbr": "UIUC;THU;CQU;Tencent AI Lab;;UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;1;1;1;0;1;1", "aff_country_unique": "United States;China;" }, { "id": "zR6V9fPRBn", "title": "Exploiting Negative Samples: A Catalyst for Cohort Discovery in Healthcare Analytics", "track": "main", "status": "Reject", "tldr": "", "abstract": "Healthcare analytics, particularly binary diagnosis or prognosis problems, present unique challenges due to the inherent asymmetry between positive and negative samples. While positive samples, representing patients who develop a disease, are defined through rigorous medical criteria, negative samples are defined in an open-ended manner, resulting in a vast potential set. Despite this fundamental asymmetry, previous research has underexplored the role of negative samples, possibly due to the enormous challenge of investigating an infinitely large negative sample space. To bridge this gap, we propose an approach to facilitate cohort discovery within negative samples, which could yield valuable insights into the studied disease, as well as its comorbidity and complications. We measure each sample\u2019s contribution using data Shapley values and construct the Negative Sample Shapley Field to model the distribution of all negative samples. Then we transform this field via manifold learning, preserving the data structure information while imposing an isotropy constraint in data Shapley values. Within this transformed space, we identify cohorts of medical interest through density-based clustering. We empirically evaluate the effectiveness of our approach on our hospital\u2019s electronic medical records. The medical insights revealed in the discovered cohorts are validated by clinicians, which affirms the medical value of our proposal in unveiling meaningful insights consistent with existing domain knowledge, thereby bolstering medical research and well-informed clinical decision-making.", "keywords": "Negative Samples;Cohort Discovery;Healthcare Analytics", "primary_area": "", "supplementary_material": "/attachment/b337c2b73c36cdd2c2800cf93af152af90a70c38.zip", "author": "Kaiping Zheng;Horng-Ruey Chua;Melanie Herschel;H. Jagadish;Beng Chin Ooi;James Wei Luen Yip", "authorids": "~Kaiping_Zheng1;~Horng-Ruey_Chua1;~Melanie_Herschel1;~H._Jagadish1;~Beng_Chin_Ooi1;~James_Wei_Luen_Yip2", "gender": "F;M;;;M;M", "homepage": "https://www.comp.nus.edu.sg/~kaiping/;;;;http://www.comp.nus.edu.sg/~ooibc/;", "dblp": "169/3246;https://dblp.uni-trier.de/pid/262/0046;;;o/BengChinOoi;07/10315", "google_scholar": "https://scholar.google.com.sg/citations?user=V1PdtzQAAAAJ;https://scholar.google.com.sg/citations?user=BtM-NyIAAAAJ;;;https://scholar.google.com.tw/citations?user=9560QjYAAAAJ;", "orcid": "0000-0001-8138-1543;0000-0003-1379-0585;;;0000-0003-4446-1100;0000-0001-5470-4554", "linkedin": ";;;;beng-chin-ooi-34b0634/;", "or_profile": "~Kaiping_Zheng1;~Horng-Ruey_Chua1;~Melanie_Herschel1;~H._Jagadish1;~Beng_Chin_Ooi1;~James_Wei_Luen_Yip2", "aff": "National University of Singapore;National University Hospital;;;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;nuhs.edu.sg;;;comp.nus.edu.sg;nus.edu.sg", "position": "Postdoc;Researcher;;;Full Professor;Associate Professor", "bibtex": "@misc{\nzheng2023exploiting,\ntitle={Exploiting Negative Samples: A Catalyst for Cohort Discovery in Healthcare Analytics},\nauthor={Kaiping Zheng and Horng-Ruey Chua and Melanie Herschel and H. Jagadish and Beng Chin Ooi and James Wei Luen Yip},\nyear={2023},\nurl={https://openreview.net/forum?id=zR6V9fPRBn}\n}", "github": "", "project": "", "reviewers": "Sxir;vvb4;vE7H;g4r8", "site": "https://openreview.net/forum?id=zR6V9fPRBn", "pdf_size": 2132703, "rating": "3;3;5;7", "confidence": "5;3;5;3", "soundness": "2;2;2;3", "novelty": "2;2;3;3", "presentation": "3;2;2;3", "wc_summary": "36;125;109;204", "wc_strengths": "63;99;106;172", "wc_weaknesses": "172;259;144;196", "wc_questions": "106;44;2;72", "wc_limitations": "1;11;1;50", "wc_review": "378;538;362;694", "wc_reply_reviewers": "497;106;92;34", "wc_reply_authors": "1633;635;117;21", "reply_reviewers": "3;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 4.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 118.5, 59.68458762528229 ], "wc_strengths_avg": [ 110.0, 39.338276525541886 ], "wc_weaknesses_avg": [ 192.75, 42.446289590493066 ], "wc_questions_avg": [ 56.0, 38.13135192987524 ], "wc_limitations_avg": [ 15.75, 20.191272867256288 ], "wc_review_avg": [ 493.0, 134.91849391391827 ], "wc_reply_reviewers_avg": [ 182.25, 183.71496264594236 ], "wc_reply_authors_avg": [ 601.5, 639.6942629100248 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4000752260447724841&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "National University of Singapore;National University Hospital", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;", "aff_unique_abbr": "NUS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Singapore;Unknown" }, { "title": "Stanford-ORB: A Real-World 3D Object Inverse Rendering Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73406", "id": "zRYSJbcRcV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/92a821f6c25b29241df6985ceb673a85-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=zRYSJbcRcV", "openreview": "https://openreview.net/forum?id=zRYSJbcRcV", "poster": "/media/PosterPDFs/NeurIPS%202023/73406.png?t=1702059551.23194", "slides": "https://nips.cc/virtual/2023/poster/73406", "video": "https://nips.cc/virtual/2023/poster/73406", "author_site": "Zhengfei Kuang, Yunzhi Zhang, Hong-Xing Yu, Samir Agarwala, Elliott / Shangzhe Wu, Jiajun Wu", "tldr": "", "abstract": "We introduce Stanford-ORB, a new real-world 3D Object inverse Rendering Benchmark. Recent advances in inverse rendering have enabled a wide range of real-world applications in 3D content generation, moving rapidly from research and commercial use cases to consumer devices. While the results continue to improve, there is no real-world benchmark that can quantitatively assess and compare the performance of various inverse rendering methods. Existing real-world datasets typically only consist of the shape and multi-view images of objects, which are not sufficient for evaluating the quality of material recovery and object relighting. Methods capable of recovering material and lighting often resort to synthetic data for quantitative evaluation, which on the other hand does not guarantee generalization to complex real-world environments. We introduce a new dataset of real-world objects captured under a variety of natural scenes with ground-truth 3D scans, multi-view images, and environment lighting. Using this dataset, we establish the first comprehensive real-world evaluation benchmark for object inverse rendering tasks from in-the-wild scenes, and compare the performance of various existing methods. All data, code, and models can be accessed at https://stanfordorb.github.io/", "keywords": "Inverse Rendering;3D Objects;BRDF", "primary_area": "", "supplementary_material": "", "author": "Zhengfei Kuang;Yunzhi Zhang;Hong-Xing Yu;Samir Agarwala;Shangzhe Wu;Jiajun Wu", "authorids": "~Zhengfei_Kuang1;~Yunzhi_Zhang1;~Hong-Xing_Yu1;~Samir_Agarwala1;~Shangzhe_Wu2;~Jiajun_Wu1", "gender": "M;F;M;M;M;M", "homepage": "https://zhengfeikuang.com;https://cs.stanford.edu/~yzzhang/;https://kovenyu.com;https://samiragarwala.github.io/;https://elliottwu.com/;https://jiajunwu.com", "dblp": ";58/10932;205/2676.html;326/1599;164/9884;117/4768", "google_scholar": ";https://scholar.google.com/citations?hl=en;kNKncZcAAAAJ;In59sxcAAAAJ;36NmvrMAAAAJ;2efgcS0AAAAJ", "orcid": ";;;;0000-0003-1011-5963;0000-0002-4176-343X", "linkedin": ";;;samiragarwala;;jiajunwu/", "or_profile": "~Zhengfei_Kuang1;~Yunzhi_Zhang1;~Hong-Xing_Yu1;~Samir_Agarwala1;~Shangzhe_Wu2;~Jiajun_Wu1", "aff": "Epic Games;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "epicgames.com;stanford.edu;cs.stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "Researcher;PhD student;PhD student;MS student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nkuang2023stanfordorb,\ntitle={Stanford-{ORB}: A Real-World 3D Object Inverse Rendering Benchmark},\nauthor={Zhengfei Kuang and Yunzhi Zhang and Hong-Xing Yu and Samir Agarwala and Shangzhe Wu and Jiajun Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=zRYSJbcRcV}\n}", "github": "", "project": "", "reviewers": "8Dpe;DSB3;gijK;uqj7", "pdf_size": 15770697, "rating": "6;7;7;7", "confidence": "5;4;3;4", "wc_summary_and_contributions": "64;53;53;76", "wc_strengths": "136;53;62;51", "wc_improvement": "582;121;203;279", "wc_limitations": "265;23;1;22", "wc_correctness": "223;32;1;14", "wc_clarity": "49;6;1;12", "wc_relation_to_prior_work": "58;17;1;5", "wc_documentation": "130;15;1;23", "wc_additional_feedback": "1;1;1;1", "wc_review": "1508;321;324;483", "wc_reply_reviewers": "82;0;0;0", "wc_reply_authors": "928;883;572;884", "reply_reviewers": "1;0;0;0", "reply_authors": "4;2;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 61.5, 9.5 ], "wc_strengths_avg": [ 75.5, 35.174564673923115 ], "wc_improvement_avg": [ 296.25, 174.18291391522877 ], "wc_limitations_avg": [ 77.75, 108.4651441708349 ], "wc_correctness_avg": [ 67.5, 90.45026257562772 ], "wc_clarity_avg": [ 17.0, 18.881207588499205 ], "wc_relation_to_prior_work_avg": [ 20.25, 22.576259654778955 ], "wc_documentation_avg": [ 42.25, 51.27072751580574 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 659.0, 494.5315965638596 ], "wc_reply_reviewers_avg": [ 20.5, 35.50704155516198 ], "wc_reply_authors_avg": [ 816.75, 142.46995297254787 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16013328723432995814&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "epicgames.com;stanford.edu;cs.stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Epic Games;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.epicgames.com;https://www.stanford.edu", "aff_unique_abbr": "Epic Games;Stanford", "aff_campus_unique_index": "1;1;1;1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Beta Diffusion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69892", "id": "zTSlm4nmlH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5fe1b43c882d746c187456eb4c8cdf52-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zTSlm4nmlH", "openreview": "https://openreview.net/forum?id=zTSlm4nmlH", "poster": "/media/PosterPDFs/NeurIPS%202023/69892.png?t=1699929177.2958148", "slides": "https://nips.cc/virtual/2023/poster/69892", "video": "https://nips.cc/virtual/2023/poster/69892", "author_site": "Mingyuan Zhou, Tianqi Chen, Zhendong Wang, Huangjie Zheng", "tldr": "", "abstract": "We introduce beta diffusion, a novel generative modeling method that integrates demasking and denoising to generate data within bounded ranges. Using scaled and shifted beta distributions, beta diffusion utilizes multiplicative transitions over time to create both forward and reverse diffusion processes, maintaining beta distributions in both the forward marginals and the reverse conditionals, given the data at any point in time. Unlike traditional diffusion-based generative models relying on additive Gaussian noise and reweighted evidence lower bounds (ELBOs), beta diffusion is multiplicative and optimized with KL-divergence upper bounds (KLUBs) derived from the convexity of the KL divergence. We demonstrate that the proposed KLUBs are more effective for optimizing beta diffusion compared to negative ELBOs, which can also be derived as the KLUBs of the same KL divergence with its two arguments swapped. The loss function of beta diffusion, expressed in terms of Bregman divergence, further supports the efficacy of KLUBs for optimization. Experimental results on both synthetic data and natural images demonstrate the unique capabilities of beta diffusion in generative modeling of range-bounded data and validate the effectiveness of KLUBs in optimizing diffusion models, thereby making them valuable additions to the family of diffusion-based generative models and the optimization techniques used to train them.", "keywords": "Diffusion models;KL-divergence upper bounds;multiplicative transitions;scaled and shifted beta distributions", "primary_area": "", "supplementary_material": "", "author": "Mingyuan Zhou;Tianqi Chen;Zhendong Wang;Huangjie Zheng", "authorids": "~Mingyuan_Zhou1;~Tianqi_Chen2;~Zhendong_Wang1;~Huangjie_Zheng1", "gender": "M;;M;M", "homepage": "http://mingyuanzhou.github.io;https://tqch.github.io;https://zhendong-wang.github.io/;", "dblp": ";94/8023;;192/2170", "google_scholar": "LXwCIisAAAAJ;jucvWbcAAAAJ;lRiIjhcAAAAJ;Vl5wCXsAAAAJ", "orcid": ";0000-0003-3604-3048;;0000-0003-0508-5034", "linkedin": ";tianqi-chen-4875671a3;;", "or_profile": "~Mingyuan_Zhou1;~Tianqi_Chen2;~Zhendong_Wang1;~Huangjie_Zheng1", "aff": "Google;University of Texas at Austin;University of Texas at Austin;University of Texas, Austin", "aff_domain": "google.com;utexas.edu;utexas.edu;utexas.edu", "position": "Researcher;PhD student;PhD student;PhD student", "bibtex": "@inproceedings{\nzhou2023beta,\ntitle={Beta Diffusion},\nauthor={Mingyuan Zhou and Tianqi Chen and Zhendong Wang and Huangjie Zheng},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zTSlm4nmlH}\n}", "github": "", "project": "", "reviewers": "buWN;FX5S;n1uL;Npra", "pdf_size": 1536371, "rating": "6;6;6;7", "confidence": "4;5;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "101;155;87;322", "wc_strengths": "47;151;41;76", "wc_weaknesses": "105;182;700;152", "wc_questions": "244;416;128;163", "wc_limitations": "1;38;57;45", "wc_review": "498;942;1013;758", "wc_reply_reviewers": "65;81;301;24", "wc_reply_authors": "116;82;220;16", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 166.25, 93.43814799106413 ], "wc_strengths_avg": [ 78.75, 43.76285525419931 ], "wc_weaknesses_avg": [ 284.75, 241.3103551445731 ], "wc_questions_avg": [ 237.75, 111.18087740254616 ], "wc_limitations_avg": [ 35.25, 20.90902915010642 ], "wc_review_avg": [ 802.75, 199.04192397582977 ], "wc_reply_reviewers_avg": [ 117.75, 107.82248142201144 ], "wc_reply_authors_avg": [ 108.5, 73.73432036711263 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16293729545247515849&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "google.com;utexas.edu;utexas.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Google;University of Texas at Austin", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.utexas.edu", "aff_unique_abbr": "Google;UT Austin", "aff_campus_unique_index": "0;1;1;1", "aff_campus_unique": "Mountain View;Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "$S^3$: Increasing GPU Utilization during Generative Inference for Higher Throughput", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69891", "id": "zUYfbdNl1m", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3a13be0c5dae69e0f08065f113fb10b8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zUYfbdNl1m", "openreview": "https://openreview.net/forum?id=zUYfbdNl1m", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69891", "video": "https://nips.cc/virtual/2023/poster/69891", "author_site": "Yunho Jin, Chun-Feng Wu, David Brooks, Gu-Yeon Wei", "tldr": "", "abstract": "Generating texts with a large language model (LLM) consumes massive amounts of memory. Apart from the already-large model parameters, the key/value (KV) cache that holds information about previous tokens in a sequence can grow to be even larger than the model itself. This problem is exacerbated in one of the current LLM serving frameworks which reserves the maximum sequence length of memory for the KV cache to guarantee generating a complete sequence as they do not know the output sequence length. This restricts us to use a smaller batch size leading to lower GPU utilization and above all, lower throughput. We argue that designing a system with a priori knowledge of the output sequence can mitigate this problem. To this end, we propose $S^3$, which predicts the output sequence length, schedules generation queries based on the prediction to increase device resource utilization and throughput, and handle mispredictions. Our proposed method achieves 6.49\u00d7 throughput over those systems that assume the worst case for the output sequence length.", "keywords": "Throughput;GPU utilization;Sequence length prediction", "primary_area": "", "supplementary_material": "/attachment/2fc44c6d6e319d47ec2a7b8dd9d3115e8589af6d.gz", "author": "Yunho Jin;Chun-Feng Wu;David Brooks;Gu-Yeon Wei", "authorids": "~Yunho_Jin1;~Chun-Feng_Wu1;~David_Brooks1;~Gu-Yeon_Wei1", "gender": "M;M;;M", "homepage": "https://www.linkedin.com/in/yunhojin94/;https://cfwu417.github.io/;;", "dblp": "290/8348;90/597.html;30/135;21/5583", "google_scholar": "Wgg-m2oAAAAJ;https://scholar.google.com.tw/citations?user=xj7c79YAAAAJ;vXHA_XYAAAAJ;IR0yJB8AAAAJ", "orcid": "0000-0002-0292-3322;0000-0002-6367-0517;;", "linkedin": ";;;", "or_profile": "~Yunho_Jin1;~Chun-Feng_Wu1;~David_Brooks1;~Gu-Yeon_Wei1", "aff": "Harvard University;Computer Science, National Yang Ming Chiao Tung University;Meta Facebook;Samsung", "aff_domain": "harvard.edu;cs.nycu.edu;facebook.com;samsung.com", "position": "PhD student;Assistant Professor;Visiting Research Scientist;Researcher", "bibtex": "@inproceedings{\njin2023s,\ntitle={\\$S{\\textasciicircum}3\\$: Increasing {GPU} Utilization during Generative Inference for Higher Throughput},\nauthor={Yunho Jin and Chun-Feng Wu and David Brooks and Gu-Yeon Wei},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zUYfbdNl1m}\n}", "github": "", "project": "", "reviewers": "RWv5;945f;s4gr;CH64;1WaJ", "pdf_size": 1125124, "rating": "5;5;6;6;7", "confidence": "4;3;3;2;4", "soundness": "4;2;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;3;3;4", "wc_summary": "60;144;165;98;85", "wc_strengths": "33;117;58;107;43", "wc_weaknesses": "156;227;27;105;34", "wc_questions": "86;222;18;2;163", "wc_limitations": "13;10;1;1;49", "wc_review": "348;720;269;313;374", "wc_reply_reviewers": "85;42;14;65;61", "wc_reply_authors": "294;0;0;140;86", "reply_reviewers": "1;1;1;2;1", "reply_authors": "4;1;1;2;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 110.4, 38.598445564556094 ], "wc_strengths_avg": [ 71.6, 34.079906103157036 ], "wc_weaknesses_avg": [ 109.8, 75.49145647025232 ], "wc_questions_avg": [ 98.2, 84.09613546412226 ], "wc_limitations_avg": [ 14.8, 17.758378304338493 ], "wc_review_avg": [ 404.8, 161.48981392025937 ], "wc_reply_reviewers_avg": [ 53.4, 23.971649922356203 ], "wc_reply_authors_avg": [ 104.0, 108.95136529663132 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0714285714285715, "gs_citation": 68, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15598073494657015915&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "harvard.edu;cs.nycu.edu;facebook.com;samsung.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Harvard University;National Yang Ming Chiao Tung University;Meta;Samsung", "aff_unique_dep": ";Computer Science;Meta Platforms, Inc.;Samsung", "aff_unique_url": "https://www.harvard.edu;https://www.nctu.edu.tw;https://meta.com;https://www.samsung.com", "aff_unique_abbr": "Harvard;NYCU;Meta;Samsung", "aff_campus_unique_index": "1", "aff_campus_unique": ";Taiwan", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "United States;China;South Korea" }, { "title": "Unpaired Multi-Domain Causal Representation Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69890", "id": "zW1uVN6Mbv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6c8985579293e0209bdaa4f21bb1d237-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zW1uVN6Mbv", "openreview": "https://openreview.net/forum?id=zW1uVN6Mbv", "poster": "/media/PosterPDFs/NeurIPS%202023/69890.png?t=1699436115.7543657", "slides": "https://nips.cc/virtual/2023/poster/69890", "video": "https://nips.cc/virtual/2023/poster/69890", "author_site": "Nils Sturma, Chandler Squires, Mathias Drton, Caroline Uhler", "tldr": "", "abstract": "The goal of causal representation learning is to find a representation of data that consists of causally related latent variables. We consider a setup where one has access to data from multiple domains that potentially share a causal representation. Crucially, observations in different domains are assumed to be unpaired, that is, we only observe the marginal distribution in each domain but not their joint distribution. In this paper, we give sufficient conditions for identifiability of the joint distribution and the shared causal graph in a linear setup. Identifiability holds if we can uniquely recover the joint distribution and the shared causal representation from the marginal distributions in each domain. We transform our results into a practical method to recover the shared latent causal graph.", "keywords": "linear structural equation models;causality;representation learning;independent component analysis;structure identifiability;multiple views;graphical model", "primary_area": "", "supplementary_material": "/attachment/7ae0996e20fba22daea144052bc2b38b1d5d7184.zip", "author": "Nils Sturma;Chandler Squires;Mathias Drton;Caroline Uhler", "authorids": "~Nils_Sturma1;~Chandler_Squires1;~Mathias_Drton2;~Caroline_Uhler1", "gender": "M;M;M;F", "homepage": "https://nilssturma.github.io/;https://chandlersquires.com;https://www.math.cit.tum.de/en/math/people/professors/drton-mathias/;https://www.carolineuhler.com/", "dblp": ";231/7704;78/3067;66/10813", "google_scholar": ";https://scholar.google.com.tr/citations?user=Nh3BtpUAAAAJ;CjRMyA4AAAAJ;https://scholar.google.com.tw/citations?user=dIJFcaoAAAAJ", "orcid": ";;0000-0001-5614-3025;", "linkedin": ";chandler-squires-749885a0/;;", "or_profile": "~Nils_Sturma1;~Chandler_Squires1;~Mathias_Drton2;~Caroline_Uhler1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Massachusetts Institute of Technology;Technische Universit\u00e4t M\u00fcnchen;Electrical Engineering & Computer Science, Massachusetts Institute of Technology", "aff_domain": "tum.de;mit.edu;tum.de;eecs.mit.edu", "position": "PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nsturma2023unpaired,\ntitle={Unpaired Multi-Domain Causal Representation Learning},\nauthor={Nils Sturma and Chandler Squires and Mathias Drton and Caroline Uhler},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zW1uVN6Mbv}\n}", "github": "", "project": "", "reviewers": "ny5v;aPgx;AWor;B2Nu", "pdf_size": 819242, "rating": "6;6;7;7", "confidence": "3;3;3;4", "soundness": "3;3;4;4", "novelty": "2;3;4;3", "presentation": "3;3;3;4", "wc_summary": "71;51;147;76", "wc_strengths": "12;74;72;90", "wc_weaknesses": "293;63;91;239", "wc_questions": "3;25;88;36", "wc_limitations": "3;5;22;37", "wc_review": "382;218;420;478", "wc_reply_reviewers": "82;40;38;279", "wc_reply_authors": "227;0;0;427", "reply_reviewers": "2;1;1;2", "reply_authors": "2;1;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.25, 36.29996556472196 ], "wc_strengths_avg": [ 62.0, 29.698484809834994 ], "wc_weaknesses_avg": [ 171.5, 96.91620091604912 ], "wc_questions_avg": [ 38.0, 31.216982557575932 ], "wc_limitations_avg": [ 16.75, 13.827056809024834 ], "wc_review_avg": [ 374.5, 96.60615922393355 ], "wc_reply_reviewers_avg": [ 109.75, 99.28336970510217 ], "wc_reply_authors_avg": [ 163.5, 178.1354821477181 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3065546761896951106&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "tum.de;mit.edu;tum.de;eecs.mit.edu", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://web.mit.edu", "aff_unique_abbr": "TUM;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Germany;United States" }, { "title": "Universality and Limitations of Prompt Tuning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69889", "id": "zWxKYyW9ik", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/eef6aecfe050b556c6a48d9c16b15558-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zWxKYyW9ik", "openreview": "https://openreview.net/forum?id=zWxKYyW9ik", "poster": "/media/PosterPDFs/NeurIPS%202023/69889.png?t=1702569811.2787986", "slides": "https://nips.cc/virtual/2023/poster/69889", "video": "https://nips.cc/virtual/2023/poster/69889", "author_site": "Yihan Wang, Jatin Chauhan, Wei Wang, Cho-Jui Hsieh", "tldr": "", "abstract": "Despite the demonstrated empirical efficacy of prompt tuning to adapt a pretrained language model for a new task, the theoretical underpinnings of the difference between \"tuning parameters before the input\" against \"the tuning of model weights\" are limited. We thus take one of the first steps to understand the role of soft-prompt tuning for transformer-based architectures. By considering a general purpose architecture, we analyze prompt tuning from the lens of both: universal approximation and limitations with finite-depth fixed-weight pretrained transformers for continuous-valued functions. Our universality result guarantees the existence of a strong transformer with a prompt to approximate any sequence-to-sequence function in the set of Lipschitz functions. The limitations of prompt tuning for limited-depth transformers are first proved by constructing a set of datasets, that cannot be memorized by a prompt of any length for a given single encoder layer. We also provide a lower bound on the required number of tunable prompt parameters and compare the result with the number of parameters required for a low-rank update (based on LoRA) for a single-layer setting. We finally extend our analysis to multi-layer settings by providing sufficient conditions under which the transformer can at best learn datasets from invertible functions only. Our theoretical claims are also corroborated by empirical results.", "keywords": "prompt-tuning; language model; expressive power", "primary_area": "", "supplementary_material": "", "author": "Yihan Wang;Jatin Chauhan;Wei Wang;Cho-Jui Hsieh", "authorids": "~Yihan_Wang2;~Jatin_Chauhan3;~Wei_Wang13;~Cho-Jui_Hsieh1", "gender": "F;M;F;M", "homepage": "https://yihanwang617.github.io;https://chauhanjatin10.github.io/;http://www.cs.ucla.edu/~weiwang;http://web.cs.ucla.edu/~chohsieh/index.html", "dblp": ";242/7749;w/WeiWang.html;14/2770", "google_scholar": ";kTiFFPcAAAAJ;UedS9LQAAAAJ;Wy89g4IAAAAJ", "orcid": ";;0000-0002-8180-2886;", "linkedin": ";;wei-wang-8800845/;", "or_profile": "~Yihan_Wang2;~Jatin_Chauhan3;~Wei_Wang13;~Cho-Jui_Hsieh1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;Amazon", "aff_domain": "ucla.edu;ucla.edu;ucla.edu;amazon.com", "position": "PhD student;MS student;Full Professor;visiting scholar", "bibtex": "@inproceedings{\nwang2023universality,\ntitle={Universality and Limitations of Prompt Tuning},\nauthor={Yihan Wang and Jatin Chauhan and Wei Wang and Cho-Jui Hsieh},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zWxKYyW9ik}\n}", "github": "", "project": "", "reviewers": "adez;E8G5;AVty;VZsv;1J6k", "pdf_size": 418653, "rating": "5;5;6;6;7", "confidence": "2;3;3;3;3", "soundness": "3;3;3;3;3", "novelty": "2;3;2;3;4", "presentation": "2;3;3;3;3", "wc_summary": "104;230;87;61;78", "wc_strengths": "48;125;43;47;101", "wc_weaknesses": "103;308;213;38;9", "wc_questions": "2;127;8;4;1", "wc_limitations": "2;13;14;12;9", "wc_review": "259;803;365;162;198", "wc_reply_reviewers": "0;15;102;0;0", "wc_reply_authors": "0;347;50;0;0", "reply_reviewers": "0;1;1;0;0", "reply_authors": "1;2;2;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 112.0, 60.61352984276695 ], "wc_strengths_avg": [ 72.8, 33.730698184294965 ], "wc_weaknesses_avg": [ 134.2, 111.64121102890276 ], "wc_questions_avg": [ 28.4, 49.35828197982583 ], "wc_limitations_avg": [ 10.0, 4.33589667773576 ], "wc_review_avg": [ 357.4, 233.19056584690557 ], "wc_reply_reviewers_avg": [ 23.4, 39.72706885739243 ], "wc_reply_authors_avg": [ 79.4, 135.1940827107459 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5345224838248487, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17007298143455838128&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ucla.edu;ucla.edu;ucla.edu;amazon.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of California, Los Angeles;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://www.amazon.com", "aff_unique_abbr": "UCLA;Amazon", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Statistical Limits of Adaptive Linear Models: Low-Dimensional Estimation and Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69888", "id": "zXckveawHa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3368e8f592b0d46ed85def795fd5168f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zXckveawHa", "openreview": "https://openreview.net/forum?id=zXckveawHa", "poster": "/media/PosterPDFs/NeurIPS%202023/69888.png?t=1701918301.5217688", "slides": "https://nips.cc/virtual/2023/poster/69888", "video": "https://nips.cc/virtual/2023/poster/69888", "author_site": "Licong Lin, Mufang Ying, Suvrojit Ghosh, Koulik Khamaru, Cun-Hui Zhang", "tldr": "", "abstract": "Estimation and inference in statistics pose significant challenges when data are collected adaptively. Even in linear models, the Ordinary Least Squares (OLS) estimator may fail to exhibit asymptotic normality for single coordinate estimation and have inflated error. This issue is highlighted by a recent minimax lower bound, which shows that the error of estimating a single coordinate can be enlarged by a multiple of $\\sqrt{d}$ when data are allowed to be arbitrarily adaptive, compared with the case when they are i.i.d. Our work explores this striking difference in estimation performance between utilizing i.i.d. and adaptive data. We investigate how the degree of adaptivity in data collection impacts the performance of estimating a low-dimensional parameter component in high-dimensional linear models. We identify conditions on the data collection mechanism under which the estimation error for a low-dimensional parameter component matches its counterpart in the i.i.d. setting, up to a factor that depends on the degree of adaptivity. We show that OLS or OLS on centered data can achieve this matching error. In addition, we propose a novel estimator for single coordinate inference via solving a Two-stage Adaptive Linear Estimating equation (TALE). Under a weaker form of adaptivity in data collection, we establish an asymptotic normality property of the proposed estimator.", "keywords": "Adaptive linear regression;bandit algorithms;high dimensional statistics;statistical inference", "primary_area": "", "supplementary_material": "/attachment/e56916ed686bba5221413b8e7f20c014772f863d.pdf", "author": "Licong Lin;Mufang Ying;Suvrojit Ghosh;Koulik Khamaru;Cun-Hui Zhang", "authorids": "~Licong_Lin2;~Mufang_Ying1;~Suvrojit_Ghosh1;~Koulik_Khamaru1;~Cun-Hui_Zhang1", "gender": "M;;;M;M", "homepage": "https://statistics.berkeley.edu/people/licong-lin;;;https://www.stat.berkeley.edu/~koulik/;https://statistics.rutgers.edu/people-pages/faculty/people/130-faculty/376-cun-hui-zhang", "dblp": ";;;218/5813;", "google_scholar": ";;;https://scholar.google.co.in/citations?user=IdSxI0YAAAAJ;_cxs104AAAAJ", "orcid": ";;;;", "linkedin": ";;suvrojit-ghosh-6a3161276?trk=contact-info;;", "or_profile": "~Licong_Lin2;~Mufang_Ying1;~Suvrojit_Ghosh1;~Koulik_Khamaru1;~Cun-Hui_Zhang1", "aff": "University of California, Berkeley;;Rutgers University;University of California, Berkeley;Rutgers University", "aff_domain": "berkeley.edu;;rutgers.edu;berkeley.edu;rutgers.edu", "position": "PhD student;;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nlin2023statistical,\ntitle={Statistical Limits of Adaptive Linear Models: Low-Dimensional Estimation and Inference},\nauthor={Licong Lin and Mufang Ying and Suvrojit Ghosh and Koulik Khamaru and Cun-Hui Zhang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zXckveawHa}\n}", "github": "", "project": "", "reviewers": "KiDv;1ej5;4hmk;tdeW;Pp5b;nCuW", "pdf_size": 649921, "rating": "5;5;5;6;7;7", "confidence": "3;3;3;2;3;1", "soundness": "3;3;3;3;3;3", "novelty": "2;3;2;3;3;3", "presentation": "3;3;3;2;3;3", "wc_summary": "103;54;96;128;240;27", "wc_strengths": "15;33;69;100;72;1", "wc_weaknesses": "145;105;101;7;148;1", "wc_questions": "47;40;138;131;211;1", "wc_limitations": "13;5;27;1;45;1", "wc_review": "323;237;431;367;716;31", "wc_reply_reviewers": "108;53;24;11;33;0", "wc_reply_authors": "110;38;37;29;0;0", "reply_reviewers": "1;1;1;1;1;0", "reply_authors": "2;2;2;2;1;0", "rating_avg": [ 5.833333333333333, 0.8975274678557507 ], "confidence_avg": [ 2.5, 0.7637626158259734 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 108.0, 67.66338251472013 ], "wc_strengths_avg": [ 48.333333333333336, 34.74510357190236 ], "wc_weaknesses_avg": [ 84.5, 59.67062370491307 ], "wc_questions_avg": [ 94.66666666666667, 71.60695186611113 ], "wc_limitations_avg": [ 15.333333333333334, 16.01735170231195 ], "wc_review_avg": [ 350.8333333333333, 206.58365268228644 ], "wc_reply_reviewers_avg": [ 38.166666666666664, 35.39970181920872 ], "wc_reply_authors_avg": [ 35.666666666666664, 36.790699307780976 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.5, 0.7637626158259734 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6078306738548308, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8334544696346024746&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "berkeley.edu;;rutgers.edu;berkeley.edu;rutgers.edu", "author_num": 5, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of California, Berkeley;Rutgers University", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.rutgers.edu", "aff_unique_abbr": "UC Berkeley;Rutgers", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Optimistic Natural Policy Gradient: a Simple Efficient Policy Optimization Framework for Online RL", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69887", "id": "zaQ7wV9NOg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0b13c22ca208bc08f3fd13793292f25f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zaQ7wV9NOg", "openreview": "https://openreview.net/forum?id=zaQ7wV9NOg", "poster": "/media/PosterPDFs/NeurIPS%202023/69887.png?t=1702052686.082504", "slides": "https://nips.cc/virtual/2023/poster/69887", "video": "https://nips.cc/virtual/2023/poster/69887", "author_site": "Qinghua Liu, Gellert Weisz, Andr\u00e1s Gy\u00f6rgy, Chi Jin, Csaba Szepesvari", "tldr": "", "abstract": "While policy optimization algorithms have played an important role in recent empirical success of Reinforcement Learning (RL), the existing theoretical understanding of policy optimization remains rather limited---they are either restricted to tabular MDPs or suffer from highly suboptimal sample complexity, especial in online RL where exploration is necessary. This paper proposes a simple efficient policy optimization framework---Optimistic NPG for online RL. Optimistic NPG can be viewed as simply combining of the classic natural policy gradient (NPG) algorithm [Kakade, 2001] with optimistic policy evaluation subroutines to encourage exploration. For $d$-dimensional linear MDPs, Optimistic NPG is computationally efficient, and learns an $\\epsilon$-optimal policy within $\\tilde{\\mathcal{O}}(d^2/\\epsilon^3)$ samples, which is the first computationally efficient algorithm whose sample complexity has the optimal dimension dependence $\\tilde{\\Theta}(d^2)$. It also improves over state-of-the-art results of policy optimization algorithms [Zanette et al., 2021] by a factor of $d$. For general function approximation that subsumes linear MDPs, Optimistic NPG, to our best knowledge, is also the first policy optimization algorithm that achieves the polynomial sample complexity for learning near-optimal policies.", "keywords": "Theory of reinforcement learning;policy optimization", "primary_area": "", "supplementary_material": "", "author": "Qinghua Liu;Gell\u00e9rt Weisz;Andr\u00e1s Gy\u00f6rgy;Chi Jin;Csaba Szepesvari", "authorids": "~Qinghua_Liu1;~Gell\u00e9rt_Weisz2;~Andr\u00e1s_Gy\u00f6rgy2;~Chi_Jin1;~Csaba_Szepesvari1", "gender": "M;M;M;;", "homepage": "http://qinghual2020.github.io/;https://sites.google.com/view/cjin/home;https://sites.ualberta.ca/~szepesva/;http://www.cs.bme.hu/~gya;", "dblp": ";126/1802-1;http://dblp.uni-trier.de/pers/hd/s/Szepesv=aacute=ri:Csaba;72/251-1;215/3618.html", "google_scholar": "CotFJJsAAAAJ;GINhGvwAAAAJ;https://scholar.google.ca/citations?user=zvC19mQAAAAJ;https://scholar.google.com/citations?hl=en;8u-RYZcAAAAJ", "orcid": ";;;0000-0003-0586-4337;", "linkedin": ";;csaba-szepesvari-09376b1?trk=hp-identity-name;;", "or_profile": "~Qinghua_Liu1;~Chi_Jin1;~Csaba_Szepesvari1;~Andras_Gyorgy1;~Gellert_Weisz1", "aff": "Princeton University;Princeton University;Google DeepMind;Google DeepMind;Google DeepMind", "aff_domain": "princeton.edu;princeton.edu;google.com;deepmind.com;deepmind.com", "position": "PhD student;Assistant Professor;Research Scientist;Research Scientist;Researcher", "bibtex": "@inproceedings{\nliu2023optimistic,\ntitle={Optimistic Natural Policy Gradient: a Simple Efficient Policy Optimization Framework for Online {RL}},\nauthor={Qinghua Liu and Gell{\\'e}rt Weisz and Andr{\\'a}s Gy{\\\"o}rgy and Chi Jin and Csaba Szepesvari},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zaQ7wV9NOg}\n}", "github": "", "project": "", "reviewers": "LFfR;1VAU;EF4H;BU1M;czwS", "pdf_size": 375654, "rating": "7;7;7;7;8", "confidence": "3;4;3;3;4", "soundness": "4;4;3;3;4", "novelty": "3;3;3;3;3", "presentation": "4;4;4;3;4", "wc_summary": "66;42;30;62;144", "wc_strengths": "66;44;57;47;59", "wc_weaknesses": "126;26;14;34;331", "wc_questions": "47;130;10;35;1", "wc_limitations": "14;1;24;1;13", "wc_review": "319;243;135;179;548", "wc_reply_reviewers": "22;12;0;0;19", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.2, 0.39999999999999997 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_avg": [ 68.8, 39.831645710414726 ], "wc_strengths_avg": [ 54.6, 8.064738061462382 ], "wc_weaknesses_avg": [ 106.2, 119.2248296287313 ], "wc_questions_avg": [ 44.6, 45.80218335407167 ], "wc_limitations_avg": [ 10.6, 8.731551981177228 ], "wc_review_avg": [ 284.8, 145.48182017008173 ], "wc_reply_reviewers_avg": [ 10.6, 9.243376006633074 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6123724356957947, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1963019752519718697&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "princeton.edu;princeton.edu;google.com;deepmind.com;deepmind.com", "author_num": 5, "aff_unique_index": "0;0;1;1;1", "aff_unique_norm": "Princeton University;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.princeton.edu;https://deepmind.com", "aff_unique_abbr": "Princeton;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "ASL Citizen: A Community-Sourced Dataset for Advancing Isolated Sign Language Recognition", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73405", "id": "zbEYTg2F1U", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/f29cf8f8b4996a4a453ef366cf496354-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=zbEYTg2F1U", "openreview": "https://openreview.net/forum?id=zbEYTg2F1U", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/73405", "video": "https://nips.cc/virtual/2023/poster/73405", "author_site": "Aashaka Desai, Lauren Berger, Fyodor Minakov, Nessa Milano, Chinmay Singh, Kriston Pumphrey, Richard Ladner, Hal Daum\u00e9 III, Alex X Lu, Naomi Caselli, Danielle Bragg", "tldr": "", "abstract": "Sign languages are used as a primary language by approximately 70 million D/deaf people world-wide. However, most communication technologies operate in spoken and written languages, creating inequities in access. To help tackle this problem, we release ASL Citizen, the first crowdsourced Isolated Sign Language Recognition (ISLR) dataset, collected with consent and containing 83,399 videos for 2,731 distinct signs filmed by 52 signers in a variety of environments. We propose that this dataset be used for sign language dictionary retrieval for American Sign Language (ASL), where a user demonstrates a sign to their webcam to retrieve matching signs from a dictionary. We show that training supervised machine learning classifiers with our dataset advances the state-of-the-art on metrics relevant for dictionary retrieval, achieving 63\\% accuracy and a recall-at-10 of 91\\%, evaluated entirely on videos of users who are not present in the training or validation sets.", "keywords": "Sign Language;Dataset;Crowdsourcing;Video Classification;Isolated Sign recognition;Dictionary Retrieval", "primary_area": "", "supplementary_material": "/attachment/8ab41202183d09fc9fd19255ad9e975cc8802009.zip", "author": "Aashaka Desai;Lauren Berger;Fyodor O Minakov;Vanessa Milan;Chinmay Singh;Kriston L Pumphrey;Richard Ladner;Hal Daum\u00e9 III;Alex Xijie Lu;Naomi Caselli;Danielle Bragg", "authorids": "~Aashaka_Desai1;~Lauren_Berger1;~Fyodor_O_Minakov1;~Vanessa_Milan1;~Chinmay_Singh2;~Kriston_L_Pumphrey1;~Richard_Ladner1;~Hal_Daum\u00e9_III1;~Alex_Xijie_Lu1;~Naomi_Caselli1;~Danielle_Bragg1", "gender": ";F;M;F;M;M;M;M;M;;", "homepage": ";;;;https://www.microsoft.com/en-us/research/people/chsingh/;;https://www.cs.washington.edu/people/faculty/ladner;http://hal3.name;http://alexluresearch.com/;;https://danibragg.com/", "dblp": ";;;;;;l/RELadner.html;77/2856.html;;;28/8356", "google_scholar": ";;;;;;hgOSI1EAAAAJ;PbEw81gAAAAJ;https://scholar.google.ca/citations?user=gz7gLggAAAAJ;yGdJT4oAAAAJ;6fd-LUEAAAAJ", "orcid": ";;0000-0002-0971-7011;;;;0000-0001-9413-6774;;0000-0001-9568-3155;;", "linkedin": ";http://www.linkedin.com/in/laurenrberger/;;vmilan;;kriston-lee-pumphrey/;richard-ladner-5641064/;;;;", "or_profile": "~Aashaka_Desai1;~Lauren_Berger1;~Fyodor_O_Minakov1;~Vanessa_Milan1;~Chinmay_Singh2;~Kriston_L_Pumphrey1;~Richard_Ladner1;~Hal_Daum\u00e9_III1;~Alex_Xijie_Lu1;~Naomi_Caselli1;~Danielle_Bragg1", "aff": ";Dell Technologies;;;Microsoft;;;Microsoft;Microsoft Research;Boston University, Boston University;Microsoft Research", "aff_domain": ";dell.com;;;microsoft.com;;;microsoft.com;microsoft.com;bu.edu;microsoft.com", "position": ";Sr. Consultant;;;Principal Research Software Development Engineer;;;Senior Principle Researcher;Senior Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\ndesai2023asl,\ntitle={{ASL} Citizen: A Community-Sourced Dataset for Advancing Isolated Sign Language Recognition},\nauthor={Aashaka Desai and Lauren Berger and Fyodor O Minakov and Vanessa Milan and Chinmay Singh and Kriston L Pumphrey and Richard Ladner and Hal Daum{\\'e} III and Alex Xijie Lu and Naomi Caselli and Danielle Bragg},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=zbEYTg2F1U}\n}", "github": "", "project": "", "reviewers": "RCuQ;j3yN;BfhD;UkwP", "pdf_size": 5035634, "rating": "6;6;8;9", "confidence": "3;4;5;4", "wc_summary_and_contributions": "90;114;165;23", "wc_strengths": "65;81;170;27", "wc_improvement": "147;62;179;28", "wc_limitations": "23;34;63;18", "wc_correctness": "12;37;28;6", "wc_clarity": "6;6;4;5", "wc_relation_to_prior_work": "10;16;2;25", "wc_documentation": "9;38;2;10", "wc_additional_feedback": "1;1;1;1", "wc_review": "363;389;614;143", "wc_reply_reviewers": "0;77;91;0", "wc_reply_authors": "610;586;766;440", "reply_reviewers": "0;2;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 98.0, 51.07347648241698 ], "wc_strengths_avg": [ 85.75, 52.44699705416889 ], "wc_improvement_avg": [ 104.0, 61.26581428496646 ], "wc_limitations_avg": [ 34.5, 17.44276354251241 ], "wc_correctness_avg": [ 20.75, 12.356678356257397 ], "wc_clarity_avg": [ 5.25, 0.82915619758885 ], "wc_relation_to_prior_work_avg": [ 13.25, 8.407585860400118 ], "wc_documentation_avg": [ 14.75, 13.77270852083932 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 377.25, 166.78185602756673 ], "wc_reply_reviewers_avg": [ 42.0, 42.290660907581 ], "wc_reply_authors_avg": [ 600.5, 115.59736156158583 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.5443310539518174, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5223497235446611449&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";dell.com;;;microsoft.com;;;microsoft.com;microsoft.com;bu.edu;microsoft.com", "author_num": 11, "aff_unique_index": "0;1;1;1;2;1", "aff_unique_norm": "Dell Technologies;Microsoft;Boston University", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.dell.com;https://www.microsoft.com;https://www.bu.edu", "aff_unique_abbr": "Dell;Microsoft;BU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Counting Distinct Elements Under Person-Level Differential Privacy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69886", "id": "zdli6OxpWd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/6e32c247076c2c0fb381e022c02d2c78-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zdli6OxpWd", "openreview": "https://openreview.net/forum?id=zdli6OxpWd", "poster": "/media/PosterPDFs/NeurIPS%202023/69886.png?t=1699560374.7844827", "slides": "https://nips.cc/virtual/2023/poster/69886", "video": "https://nips.cc/virtual/2023/poster/69886", "author_site": "Thomas Steinke, Alexander Knop", "tldr": "", "abstract": "We study the problem of counting the number of distinct elements in a dataset subject to the constraint of differential privacy. \nWe consider the challenging setting of person-level DP (a.k.a. user-level DP) where each person may contribute an unbounded number of items and hence the sensitivity is unbounded.\n\nOur approach is to compute a bounded-sensitivity version of this query, which reduces to solving a max-flow problem. \nThe sensitivity bound is optimized to balance the noise we must add to privatize the answer against the error of the approximation of the bounded-sensitivity query to the true number of unique elements.", "keywords": "differential privacy;user-level privacy;person-level privacy;sensitivity", "primary_area": "", "supplementary_material": "/attachment/fd56a044f445da50194ca6063b213d5990eb5a22.pdf", "author": "Thomas Steinke;Alexander Knop", "authorids": "~Thomas_Steinke2;~Alexander_Knop1", "gender": "M;", "homepage": "http://www.thomas-steinke.net/;", "dblp": "https://dblp.uni-trier.de/pid/73/4025-2.html;", "google_scholar": "kwnwhrgAAAAJ;", "orcid": ";", "linkedin": "thomas-steinke-2841248/;", "or_profile": "~Thomas_Steinke2;~Alexander_Knop1", "aff": "Google;", "aff_domain": "google.com;", "position": "Research Scientist;", "bibtex": "@inproceedings{\nsteinke2023counting,\ntitle={Counting Distinct Elements Under Person-Level Differential Privacy},\nauthor={Thomas Steinke and Alexander Knop},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zdli6OxpWd}\n}", "github": "", "project": "", "reviewers": "REV6;fhMe;FDcy;BQdm", "pdf_size": 400009, "rating": "5;5;6;7", "confidence": "3;4;4;4", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "109;32;84;87", "wc_strengths": "62;67;69;24", "wc_weaknesses": "303;34;22;270", "wc_questions": "127;1;217;53", "wc_limitations": "32;14;9;1", "wc_review": "633;148;401;435", "wc_reply_reviewers": "139;104;13;52", "wc_reply_authors": "196;347;0;41", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.0, 28.257742301889582 ], "wc_strengths_avg": [ 55.5, 18.364367672206956 ], "wc_weaknesses_avg": [ 157.25, 129.84485935145835 ], "wc_questions_avg": [ 99.5, 81.28191680810683 ], "wc_limitations_avg": [ 14.0, 11.379806676741042 ], "wc_review_avg": [ 404.25, 172.44328777891008 ], "wc_reply_reviewers_avg": [ 77.0, 48.202697019980114 ], "wc_reply_authors_avg": [ 146.0, 137.15137622349985 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9373245460874872360&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "google.com;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Interpreting Unsupervised Anomaly Detection in Security via Rule Extraction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69885", "id": "zfCNwRQ569", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c43b987f23fd5ea840df2b2be426315c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zfCNwRQ569", "openreview": "https://openreview.net/forum?id=zfCNwRQ569", "poster": "/media/PosterPDFs/NeurIPS%202023/69885.png?t=1702017690.4564183", "slides": "https://nips.cc/virtual/2023/poster/69885", "video": "https://nips.cc/virtual/2023/poster/69885", "author_site": "Ruoyu Li, Qing Li, Yu Zhang, Dan Zhao, Yong Jiang, Yong Yang", "tldr": "", "abstract": "Many security applications require unsupervised anomaly detection, as malicious data are extremely rare and often only unlabeled normal data are available for training (i.e., zero-positive). However, security operators are concerned about the high stakes of trusting black-box models due to their lack of interpretability. In this paper, we propose a post-hoc method to globally explain a black-box unsupervised anomaly detection model via rule extraction.\nFirst, we propose the concept of distribution decomposition rules that decompose the complex distribution of normal data into multiple compositional distributions. To find such rules, we design an unsupervised Interior Clustering Tree that incorporates the model prediction into the splitting criteria. Then, we propose the Compositional Boundary Exploration (CBE) algorithm to obtain the boundary inference rules that estimate the decision boundary of the original model on each compositional distribution. By merging these two types of rules into a rule set, we can present the inferential process of the unsupervised black-box model in a human-understandable way, and build a surrogate rule-based model for online deployment at the same time. \nWe conduct comprehensive experiments on the explanation of four distinct unsupervised anomaly detection models on various real-world datasets. The evaluation shows that our method outperforms existing methods in terms of diverse metrics including fidelity, correctness and robustness.", "keywords": "unsupervised anomaly detection;global explanation;rule extraction", "primary_area": "", "supplementary_material": "", "author": "Ruoyu Li;Qing Li;Yu Zhang;Dan Zhao;Yong Jiang;Yong Yang", "authorids": "~Ruoyu_Li4;~Qing_Li15;eli.yuzhang@gmail.com;~Dan_Zhao2;~Yong_Jiang3;~Yong_Yang8", "gender": "M;M;;F;M;", "homepage": "https://ruoyu-li.github.io;https://smartinternet.group/qing-li/;;;;", "dblp": "08/10026-3;181/2689-6;;10/3489-3;74/1552-1.html;", "google_scholar": "https://scholar.google.com/citations?hl=en;54AuaywAAAAJ;;;;", "orcid": "0000-0003-0754-2817;0000-0002-6071-473X;;0000-0001-9016-5594;;", "linkedin": ";;;;;", "or_profile": "~Ruoyu_Li4;~Qing_Li15;eli.yuzhang@gmail.com;~Dan_Zhao2;~Yong_Jiang3;~Yong_Yang8", "aff": "Tsinghua University;Pengcheng Laboratory;;Peng Cheng Laborotary;Tsinghua University;", "aff_domain": "mails.tsinghua.edu.cn;pcl.ac.cn;;pcl.ac.cn;tsinghua.edu.cn;", "position": "PhD student;Associate Professor;;Researcher;Full Professor;", "bibtex": "@inproceedings{\nli2023interpreting,\ntitle={Interpreting Unsupervised Anomaly Detection in Security via Rule Extraction},\nauthor={Ruoyu Li and Qing Li and Yu Zhang and Dan Zhao and Yong Jiang and Yong Yang},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zfCNwRQ569}\n}", "github": "", "project": "", "reviewers": "jw73;AbmJ;mhUf;eS8h", "pdf_size": 893252, "rating": "5;5;5;6", "confidence": "2;5;4;3", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;3;3", "wc_summary": "100;185;47;328", "wc_strengths": "45;162;15;32", "wc_weaknesses": "45;2408;147;9", "wc_questions": "261;211;24;226", "wc_limitations": "1;173;1;14", "wc_review": "452;3139;234;609", "wc_reply_reviewers": "52;804;359;210", "wc_reply_authors": "0;3331;2130;444", "reply_reviewers": "1;7;2;2", "reply_authors": "1;10;5;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 165.0, 106.20499046655011 ], "wc_strengths_avg": [ 63.5, 57.85542325486868 ], "wc_weaknesses_avg": [ 652.25, 1014.9456574122577 ], "wc_questions_avg": [ 180.5, 92.15883028771579 ], "wc_limitations_avg": [ 47.25, 72.79551840601178 ], "wc_review_avg": [ 1108.5, 1179.848825061923 ], "wc_reply_reviewers_avg": [ 356.25, 280.376866913089 ], "wc_reply_authors_avg": [ 1476.25, 1333.4486069961602 ], "reply_reviewers_avg": [ 3.0, 2.345207879911715 ], "reply_authors_avg": [ 4.75, 3.344772040064913 ], "replies_avg": [ 37, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15259948625002176563&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "mails.tsinghua.edu.cn;pcl.ac.cn;;pcl.ac.cn;tsinghua.edu.cn;", "author_num": 6, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Tsinghua University;Pengcheng Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;", "aff_unique_abbr": "THU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Temporal Causal Mediation through a Point Process: Direct and Indirect Effects of Healthcare Interventions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69884", "id": "zfHCKDzzC8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/b7d9b1d4a9464d5d1ece82198e351349-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zfHCKDzzC8", "openreview": "https://openreview.net/forum?id=zfHCKDzzC8", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69884", "video": "https://nips.cc/virtual/2023/poster/69884", "author_site": "\u00c7a\u011flar H\u0131zl\u0131, ST John, Anne Juuti, Tuure Saarinen, Kirsi Pietil\u00e4inen, Pekka Marttinen", "tldr": "", "abstract": "Deciding on an appropriate intervention requires a causal model of a treatment, the outcome, and potential mediators. Causal mediation analysis lets us distinguish between direct and indirect effects of the intervention, but has mostly been studied in a static setting. In healthcare, data come in the form of complex, irregularly sampled time-series, with dynamic interdependencies between a treatment, outcomes, and mediators across time. Existing approaches to dynamic causal mediation analysis are limited to regular measurement intervals, simple parametric models, and disregard long-range mediator--outcome interactions. To address these limitations, we propose a non-parametric mediator--outcome model where the mediator is assumed to be a temporal point process that interacts with the outcome process. With this model, we estimate the direct and indirect effects of an external intervention on the outcome, showing how each of these affects the whole future trajectory. We demonstrate on semi-synthetic data that our method can accurately estimate direct and indirect effects. On real-world healthcare data, our model infers clinically meaningful direct and indirect effect trajectories for blood glucose after a surgery.", "keywords": "Machine learning for healthcare;Causal mediation;Gaussian process;Point Process", "primary_area": "", "supplementary_material": "", "author": "\u00c7a\u011flar H\u0131zl\u0131;S. T. John;Anne Tuulikki Juuti;Tuure Tapani Saarinen;Kirsi Hannele Pietil\u00e4inen;Pekka Marttinen", "authorids": "~\u00c7a\u011flar_H\u0131zl\u01311;~S._T._John1;~Anne_Tuulikki_Juuti1;~Tuure_Tapani_Saarinen1;~Kirsi_Hannele_Pietil\u00e4inen1;~Pekka_Marttinen1", "gender": ";;F;M;F;M", "homepage": ";;;;;https://users.ics.aalto.fi/~pemartti/", "dblp": "257/6097;218/6590;;;;32/894", "google_scholar": ";Jf9j8GAAAAAJ;;;;id47-5cAAAAJ", "orcid": "0000-0002-7115-060X;0000-0002-4540-395X;0000-0002-9836-662X;0000-0002-7508-4849;0000-0002-8522-1288;0000-0001-7078-7927", "linkedin": ";;;;;", "or_profile": "~\u00c7a\u011flar_H\u0131zl\u01311;~S._T._John1;~Anne_Tuulikki_Juuti1;~Tuure_Tapani_Saarinen1;~Kirsi_Hannele_Pietil\u00e4inen1;~Pekka_Marttinen1", "aff": "Aalto University;Aalto University;University of Helsinki;University of Helsinki;University of Helsinki;Aalto University", "aff_domain": "aalto.fi;aalto.fi;helsinki.fi;helsinki.fi;helsinki.fi;aalto.fi", "position": "PhD student;Postdoc;Assistant Professor;Postdoc;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nh{\\i}zl{\\i}2023temporal,\ntitle={Temporal Causal Mediation through a Point Process: Direct and Indirect Effects of Healthcare Interventions},\nauthor={{\\c{C}}a{\\u{g}}lar H{\\i}zl{\\i} and S. T. John and Anne Tuulikki Juuti and Tuure Tapani Saarinen and Kirsi Hannele Pietil{\\\"a}inen and Pekka Marttinen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zfHCKDzzC8}\n}", "github": "", "project": "", "reviewers": "NnCo;jr2N;5ke6;bzqS", "pdf_size": 1345785, "rating": "5;5;7;7", "confidence": "4;3;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;2", "presentation": "4;2;3;4", "wc_summary": "79;42;104;341", "wc_strengths": "44;36;60;169", "wc_weaknesses": "232;146;147;233", "wc_questions": "2;191;35;203", "wc_limitations": "1;20;3;132", "wc_review": "358;435;349;1078", "wc_reply_reviewers": "233;145;8;75", "wc_reply_authors": "360;59;0;14", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 141.5, 117.27425122336105 ], "wc_strengths_avg": [ 77.25, 53.67203648083422 ], "wc_weaknesses_avg": [ 189.5, 43.00290687848904 ], "wc_questions_avg": [ 107.75, 90.10930862014202 ], "wc_limitations_avg": [ 39.0, 54.198708471696996 ], "wc_review_avg": [ 555.0, 303.79845292561976 ], "wc_reply_reviewers_avg": [ 115.25, 83.47567010812192 ], "wc_reply_authors_avg": [ 108.25, 146.97342446850723 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7291535630113614778&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 10, "email": "aalto.fi;aalto.fi;helsinki.fi;helsinki.fi;helsinki.fi;aalto.fi", "author_num": 6, "aff_unique_index": "0;0;1;1;1;0", "aff_unique_norm": "Aalto University;University of Helsinki", "aff_unique_dep": ";", "aff_unique_url": "https://www.aalto.fi;https://www.helsinki.fi", "aff_unique_abbr": "Aalto;UH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Finland" }, { "title": "Comparing Apples to Oranges: Learning Similarity Functions for Data Produced by Different Distributions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69883", "id": "zjpjsJeVJZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/59056767478c7df64e6250eadfeb0a04-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zjpjsJeVJZ", "openreview": "https://openreview.net/forum?id=zjpjsJeVJZ", "poster": "/media/PosterPDFs/NeurIPS%202023/69883.png?t=1698339661.9252074", "slides": "https://nips.cc/virtual/2023/poster/69883", "video": "https://nips.cc/virtual/2023/poster/69883", "author_site": "Leonidas Tsepenekas, Ivan Brugere, Freddy Lecue, Daniele Magazzeni", "tldr": "", "abstract": "Similarity functions measure how comparable pairs of elements are, and play a key role in a wide variety of applications, e.g., notions of Individual Fairness abiding by the seminal paradigm of Dwork et al., as well as Clustering problems. However, access to an accurate similarity function should not always be considered guaranteed, and this point was even raised by Dwork et al. For instance, it is reasonable to assume that when the elements to be compared are produced by different distributions, or in other words belong to different ``demographic'' groups, knowledge of their true similarity might be very difficult to obtain. In this work, we present an efficient sampling framework that learns these across-groups similarity functions, using only a limited amount of experts' feedback. We show analytical results with rigorous theoretical bounds, and empirically validate our algorithms via a large suite of experiments.", "keywords": "individual fairness; similarity learning; active learning", "primary_area": "", "supplementary_material": "/attachment/a76b6312e861daf7c52706e83d546e2227253a4c.zip", "author": "Leonidas Tsepenekas;Ivan Brugere;Freddy Lecue;Daniele Magazzeni", "authorids": "~Leonidas_Tsepenekas1;~Ivan_Brugere1;~Freddy_Lecue1;~Daniele_Magazzeni1", "gender": "M;M;;M", "homepage": "https://www.ltsepene.com/;;http://www-sop.inria.fr/members/Freddy.Lecue/;https://nms.kcl.ac.uk/daniele.magazzeni/", "dblp": "183/6430;50/10346;02/3657.html;14/4672", "google_scholar": "Teb9yeUAAAAJ;JGlGUcsAAAAJ;https://scholar.google.ca/citations?user=GLByS4gAAAAJ;", "orcid": ";0000-0002-2953-3746;;", "linkedin": ";ivanbrugere/;freddylecue/;", "or_profile": "~Leonidas_Tsepenekas1;~Ivan_Brugere1;~Freddy_Lecue1;~Daniele_Magazzeni1", "aff": "J.P. Morgan Chase;J.P. Morgan;INRIA;", "aff_domain": "jpmorgan.com;jpmchase.com;inria.fr;", "position": "Researcher;Researcher;Full Professor;", "bibtex": "@inproceedings{\ntsepenekas2023comparing,\ntitle={Comparing Apples to Oranges: Learning Similarity Functions for Data Produced by Different Distributions},\nauthor={Leonidas Tsepenekas and Ivan Brugere and Freddy Lecue and Daniele Magazzeni},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zjpjsJeVJZ}\n}", "github": "", "project": "", "reviewers": "qRef;L6oz;voHg;3J4J", "pdf_size": 1442698, "rating": "2;4;7;8", "confidence": "4;3;3;4", "soundness": "1;2;3;4", "novelty": "1;2;3;4", "presentation": "3;3;3;4", "wc_summary": "41;114;65;147", "wc_strengths": "94;99;79;207", "wc_weaknesses": "346;57;99;29", "wc_questions": "110;103;2;4", "wc_limitations": "4;1;118;1", "wc_review": "595;374;363;388", "wc_reply_reviewers": "493;16;11;0", "wc_reply_authors": "1676;0;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "4;1;1;1", "rating_avg": [ 5.25, 2.384848003542364 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 91.75, 41.34836756148905 ], "wc_strengths_avg": [ 119.75, 50.90861911307358 ], "wc_weaknesses_avg": [ 132.75, 125.61523593895765 ], "wc_questions_avg": [ 54.75, 51.81397012389612 ], "wc_limitations_avg": [ 31.0, 50.24440267333268 ], "wc_review_avg": [ 430.0, 95.67392539244953 ], "wc_reply_reviewers_avg": [ 130.0, 209.65805493708083 ], "wc_reply_authors_avg": [ 419.0, 725.7292883713595 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.10482848367219183, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15023160643046927937&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "jpmorgan.com;jpmchase.com;inria.fr;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "JPMorgan Chase & Co.;J.P. Morgan;INRIA", "aff_unique_dep": ";;", "aff_unique_url": "https://www.jpmorganchase.com;https://www.jpmorganchase.com;https://www.inria.fr", "aff_unique_abbr": "JPM;JPM;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;France" }, { "title": "Curriculum Learning With Infant Egocentric Videos", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69882", "id": "zkfyOkBVpz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/a9ad92a81748a31ef6f2ef68d775da46-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zkfyOkBVpz", "openreview": "https://openreview.net/forum?id=zkfyOkBVpz", "poster": "/media/PosterPDFs/NeurIPS%202023/69882.png?t=1701719319.0316916", "slides": "https://nips.cc/virtual/2023/poster/69882", "video": "https://nips.cc/virtual/2023/poster/69882", "author_site": "Saber Sheybani, Himanshu Hansaria, Justin Wood, Linda Smith, Zoran Tiganj", "tldr": "", "abstract": "Infants possess a remarkable ability to rapidly learn and process visual inputs. As an infant's mobility increases, so does the variety and dynamics of their visual inputs. Is this change in the properties of the visual inputs beneficial or even critical for the proper development of the visual system? To address this question, we used video recordings from infants wearing head-mounted cameras to train a variety of self-supervised learning models. Critically, we separated the infant data by age group and evaluated the importance of training with a curriculum aligned with developmental order. We found that initiating learning with the data from the youngest age group provided the strongest learning signal and led to the best learning outcomes in terms of downstream task performance. We then showed that the benefits of the data from the youngest age group are due to the slowness and simplicity of the visual experience. The results provide strong empirical evidence for the importance of the properties of the early infant experience and developmental progression in training. More broadly, our approach and findings take a noteworthy step towards reverse engineering the learning mechanisms in newborn brains using image-computable models from artificial intelligence.", "keywords": "Curriculum learning;Self-supervised learning;Slow changes;Infant development", "primary_area": "", "supplementary_material": "/attachment/13e7e3151aab4cf83852aad6fd980f8d95979665.pdf", "author": "Saber Sheybani;Himanshu Hansaria;Justin Newell Wood;Linda B. Smith;Zoran Tiganj", "authorids": "~Saber_Sheybani1;~Himanshu_Hansaria1;~Justin_Newell_Wood1;~Linda_B._Smith1;~Zoran_Tiganj1", "gender": "M;M;M;F;", "homepage": ";https://himanshuhansaria.me;http://www.buildingamind.com/;https://cogdev.lab.indiana.edu;https://homes.luddy.indiana.edu/ztiganj/", "dblp": ";;;82/6312;61/2669", "google_scholar": "8YtZDX8AAAAJ;;;J5XIZiUAAAAJ;BrWVCo8AAAAJ", "orcid": "0000-0002-4731-5814;;;;0000-0001-5581-9636", "linkedin": ";himanshuhansaria/;;;", "or_profile": "~Saber_Sheybani1;~Himanshu_Hansaria1;~Justin_Newell_Wood1;~Linda_B._Smith1;~Zoran_Tiganj1", "aff": "Indiana University;Indiana University;Indiana University at Bloomington;Indiana University;Indiana University, Bloomington", "aff_domain": "iu.edu;iu.edu;indiana.edu;iu.edu;iu.edu", "position": "PhD student;MS student;Associate Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nsheybani2023curriculum,\ntitle={Curriculum Learning With Infant Egocentric Videos},\nauthor={Saber Sheybani and Himanshu Hansaria and Justin Newell Wood and Linda B. Smith and Zoran Tiganj},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zkfyOkBVpz}\n}", "github": "", "project": "", "reviewers": "KVqq;L6Nk;Bo4Q;wvZP", "pdf_size": 1866307, "rating": "6;6;7;7", "confidence": "3;4;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;2;3;4", "wc_summary": "58;45;77;81", "wc_strengths": "31;26;62;260", "wc_weaknesses": "218;63;145;287", "wc_questions": "543;1;31;8", "wc_limitations": "12;9;42;1", "wc_review": "862;144;357;637", "wc_reply_reviewers": "36;36;105;143", "wc_reply_authors": "12;0;555;200", "reply_reviewers": "1;1;2;2", "reply_authors": "2;1;2;3", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 65.25, 14.566657131957214 ], "wc_strengths_avg": [ 94.75, 96.39858660789587 ], "wc_weaknesses_avg": [ 178.25, 83.35878777909382 ], "wc_questions_avg": [ 145.75, 229.62074710269542 ], "wc_limitations_avg": [ 16.0, 15.540270267920054 ], "wc_review_avg": [ 500.0, 272.48761439742543 ], "wc_reply_reviewers_avg": [ 80.0, 46.00543446159377 ], "wc_reply_authors_avg": [ 191.75, 224.21906141093356 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15192638907488752834&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "iu.edu;iu.edu;indiana.edu;iu.edu;iu.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Indiana University", "aff_unique_dep": "", "aff_unique_url": "https://www.indiana.edu", "aff_unique_abbr": "IU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Bloomington", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Scalable Fair Influence Maximization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69881", "id": "zmWNe1V6jg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d242dafdb2c5407ae420bc54c9325fdf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zmWNe1V6jg", "openreview": "https://openreview.net/forum?id=zmWNe1V6jg", "poster": "/media/PosterPDFs/NeurIPS%202023/69881.png?t=1699465133.2249534", "slides": "https://nips.cc/virtual/2023/poster/69881", "video": "https://nips.cc/virtual/2023/poster/69881", "author_site": "Xiaobin Rui, Zhixiao Wang, Jiayu Zhao, Lichao Sun, Wei Chen", "tldr": "", "abstract": "Given a graph $G$, a community structure $\\mathcal{C}$, and a budget $k$, the fair influence maximization problem aims to select a seed set $S$ ($|S|\\leq k$) that maximizes the influence spread while narrowing the influence gap between different communities. While various fairness notions exist, the welfare fairness notion, which balances fairness level and influence spread, has shown promising effectiveness. However, the lack of efficient algorithms for optimizing the welfare fairness objective function restricts its application to small-scale networks with only a few hundred nodes. In this paper, we adopt the objective function of welfare fairness to maximize the exponentially weighted summation over the influenced fraction of all communities. We first introduce an unbiased estimator for the fractional power of the arithmetic mean. Then, by adapting the reverse influence sampling (RIS) approach, we convert the optimization problem to a weighted maximum coverage problem. We also analyze the number of reverse reachable sets needed to approximate the fair influence at a high probability. Further, we present an efficient algorithm that guarantees $1-1/e - \\varepsilon$ approximation.", "keywords": "influence maximization;approximation algorithm;social fairness", "primary_area": "", "supplementary_material": "", "author": "Xiaobin Rui;Zhixiao Wang;Jiayu Zhao;Lichao Sun;Wei Chen", "authorids": "~Xiaobin_Rui2;~Zhixiao_Wang2;~Jiayu_Zhao2;~Lichao_Sun1;~Wei_Chen10", "gender": "M;M;M;M;M", "homepage": ";;;https://lichao-sun.github.io/;https://www.microsoft.com/en-us/research/people/weic/", "dblp": "222/9790;42/7560;;121/0780-1.html;c/WeiChen13", "google_scholar": "YiEfd4QAAAAJ;;;WhGUE7AAAAAJ;hlEPkxAAAAAJ", "orcid": "0000-0003-0951-1512;0000-0002-4256-1477;0000-0003-0832-5822;;", "linkedin": ";;;lichao-sun-b273a290/;", "or_profile": "~Xiaobin_Rui2;~Zhixiao_Wang2;~Jiayu_Zhao2;~Lichao_Sun1;~Wei_Chen10", "aff": "China University of Mining Technology - Xuzhou;China University of Mining Technology - Xuzhou;China University of Mining Technology - Xuzhou;Lehigh University;Microsoft Research", "aff_domain": "cumt.edu.cn;cumt.edu.cn;cumt.edu.cn;lehigh.edu;microsoft.com", "position": "Lecturer;Full Professor;MS student;Assistant Professor;Pricipal Researcher", "bibtex": "@inproceedings{\nrui2023scalable,\ntitle={Scalable Fair Influence Maximization},\nauthor={Xiaobin Rui and Zhixiao Wang and Jiayu Zhao and Lichao Sun and Wei Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zmWNe1V6jg}\n}", "github": "", "project": "", "reviewers": "tPHQ;uZjd;RYE8;ScbK", "pdf_size": 332614, "rating": "3;6;6;6", "confidence": "4;5;3;4", "soundness": "3;4;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;2", "wc_summary": "102;69;369;191", "wc_strengths": "21;36;93;55", "wc_weaknesses": "35;151;94;299", "wc_questions": "143;37;72;53", "wc_limitations": "14;13;47;8", "wc_review": "315;306;675;606", "wc_reply_reviewers": "40;244;0;211", "wc_reply_authors": "171;775;48;431", "reply_reviewers": "1;2;0;2", "reply_authors": "3;3;2;3", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 182.75, 116.42245273142119 ], "wc_strengths_avg": [ 51.25, 26.947866334832522 ], "wc_weaknesses_avg": [ 144.75, 98.04686379482008 ], "wc_questions_avg": [ 76.25, 40.48070528041724 ], "wc_limitations_avg": [ 20.5, 15.46770829825802 ], "wc_review_avg": [ 475.5, 166.82400906344387 ], "wc_reply_reviewers_avg": [ 123.75, 105.35742736039069 ], "wc_reply_authors_avg": [ 356.25, 278.51155721082745 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8401590156038209372&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cumt.edu.cn;cumt.edu.cn;cumt.edu.cn;lehigh.edu;microsoft.com", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "China University of Mining Technology;Lehigh University;Microsoft", "aff_unique_dep": ";;Microsoft Research", "aff_unique_url": "http://www.cumt.edu.cn/;https://www.lehigh.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "CUMT;Lehigh;MSR", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Xuzhou;", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "China;United States" }, { "title": "An Alternating Optimization Method for Bilevel Problems under the Polyak-\u0141ojasiewicz Condition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69880", "id": "zn5ihqknGj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/c981fd12b1d5703f19bd8289da9fc996-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zn5ihqknGj", "openreview": "https://openreview.net/forum?id=zn5ihqknGj", "poster": "/media/PosterPDFs/NeurIPS%202023/69880.png?t=1700618524.3857992", "slides": "https://nips.cc/virtual/2023/poster/69880", "video": "https://nips.cc/virtual/2023/poster/69880", "author_site": "Quan Xiao, Songtao Lu, Songtao Lu, Tianyi Chen", "tldr": "", "abstract": "Bilevel optimization has recently regained interest owing to its applications in emerging machine learning fields such as hyperparameter optimization, meta-learning, and reinforcement learning. Recent results have shown that simple alternating (implicit) gradient-based algorithms can match the convergence rate of single-level gradient descent (GD) when addressing bilevel problems with a strongly convex lower-level objective. However, it remains unclear whether this result can be generalized to bilevel problems beyond this basic setting. In this paper, we first introduce a stationary metric for the considered bilevel problems, which generalizes the existing metric, for a nonconvex lower-level objective that satisfies the Polyak-\u0141ojasiewicz (PL) condition. We then propose a Generalized ALternating mEthod for bilevel opTimization (GALET) tailored to BLO with convex PL LL problem and establish that GALET achieves an $\\epsilon$-stationary point for the considered problem within $\\tilde{\\cal O}(\\epsilon^{-1})$ iterations, which matches the iteration complexity of GD for single-level smooth nonconvex problems.", "keywords": "Bilevel optimization;nonconvex constrained optimization;convergence analysis", "primary_area": "", "supplementary_material": "/attachment/e02e968fa796be003beff36704469680688256c3.pdf", "author": "Quan Xiao;Songtao Lu;Tianyi Chen", "authorids": "~Quan_Xiao1;~Songtao_Lu1;~Tianyi_Chen5", "gender": "F;M;M", "homepage": "https://jenniferquanxiao.github.io;https://songtaogithub.github.io/;https://chentianyi1991.github.io/", "dblp": ";05/2887;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;LRsjX7kAAAAJ;kFwvv38AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Quan_Xiao1;~Songtao_Lu1;~Tianyi_Chen5", "aff": "Rensselaer Polytechnic Institute;IBM Thomas J. Watson Research Center;Rensselaer Polytechnic Institute", "aff_domain": "rpi.edu;ibm.com;rpi.edu", "position": "PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nxiao2023an,\ntitle={An Alternating Optimization Method for Bilevel Problems under the Polyak-{\\L}ojasiewicz Condition},\nauthor={Quan Xiao and Songtao Lu and Tianyi Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zn5ihqknGj}\n}", "github": "", "project": "", "reviewers": "k2BT;5R75;swWK;wxYg", "pdf_size": 2176325, "rating": "3;5;5;7", "confidence": "5;5;4;3", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "40;87;97;94", "wc_strengths": "40;78;32;60", "wc_weaknesses": "145;178;376;189", "wc_questions": "205;149;4;99", "wc_limitations": "5;11;1;153", "wc_review": "435;503;510;595", "wc_reply_reviewers": "0;19;90;0", "wc_reply_authors": "0;20;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.5, 23.092206477510977 ], "wc_strengths_avg": [ 52.5, 17.909494688572316 ], "wc_weaknesses_avg": [ 222.0, 90.3742219883524 ], "wc_questions_avg": [ 114.25, 73.87616327341317 ], "wc_limitations_avg": [ 42.5, 63.896400524599194 ], "wc_review_avg": [ 510.75, 56.78192934376217 ], "wc_reply_reviewers_avg": [ 27.25, 37.049797570297194 ], "wc_reply_authors_avg": [ 5.0, 8.660254037844387 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8528028654224418, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2465834299206742540&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "rpi.edu;ibm.com;rpi.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Rensselaer Polytechnic Institute;IBM", "aff_unique_dep": ";Research", "aff_unique_url": "https://www.rpi.edu;https://www.ibm.com/research", "aff_unique_abbr": "RPI;IBM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Yorktown Heights", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Optimizing over trained GNNs via symmetry breaking", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69879", "id": "znW5jNIOED", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8c8cd1b78cdae751265c88efc136e5bd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=znW5jNIOED", "openreview": "https://openreview.net/forum?id=znW5jNIOED", "poster": "/media/PosterPDFs/NeurIPS%202023/69879.png?t=1698845055.1167214", "slides": "https://nips.cc/virtual/2023/poster/69879", "video": "https://nips.cc/virtual/2023/poster/69879", "author_site": "Shiqiang Zhang, Juan Campos, Christian Feldmann, David Walz, Frederik Sandfort, Miriam Mathea, Calvin Tsay, Ruth Misener", "tldr": "", "abstract": "Optimization over trained machine learning models has applications including: verification, minimizing neural acquisition functions, and integrating a trained surrogate into a larger decision-making problem. This paper formulates and solves optimization problems constrained by trained graph neural networks (GNNs). To circumvent the symmetry issue caused by graph isomorphism, we propose two types of symmetry-breaking constraints: one indexing a node 0 and one indexing the remaining nodes by lexicographically ordering their neighbor sets. To guarantee that adding these constraints will not remove all symmetric solutions, we construct a graph indexing algorithm and prove that the resulting graph indexing satisfies the proposed symmetry-breaking constraints. For the classical GNN architectures considered in this paper, optimizing over a GNN with a fixed graph is equivalent to optimizing over a dense neural network. Thus, we study the case where the input graph is not fixed, implying that each edge is a decision variable, and develop two mixed-integer optimization formulations. To test our symmetry-breaking strategies and optimization formulations, we consider an application in molecular design.", "keywords": "Mixed-integer optimization;Graph neural network;Symmetry-breaking;Molecular design", "primary_area": "", "supplementary_material": "/attachment/9bd124acb862d2d2edbe91f0305ee9a2ab1d3006.zip", "author": "Shiqiang Zhang;Juan S Campos;Christian Wolfgang Feldmann;David Walz;Frederik Sandfort;Miriam Mathea;Calvin Tsay;Ruth Misener", "authorids": "~Shiqiang_Zhang1;~Juan_S_Campos1;~Christian_Wolfgang_Feldmann1;~David_Walz1;~Frederik_Sandfort1;~Miriam_Mathea1;~Calvin_Tsay1;~Ruth_Misener1", "gender": "M;;M;M;;F;;F", "homepage": ";;;;;;https://www.imperial.ac.uk/people/c.tsay;https://wp.doc.ic.ac.uk/rmisener/", "dblp": ";219/7664;;305/7982;;239/3998;204/0777;04/8800", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=LCg1qsUAAAAJ;;SAzirToAAAAJ;DPRT01QAAAAJ;;i59BQe0AAAAJ;AQxtWHoAAAAJ", "orcid": ";;0000-0002-3803-851X;0000-0001-8126-5315;;0000-0002-3214-1487;;0000-0001-5612-5417", "linkedin": ";juan-campos-3b6100133/;https://www.linkedin.com/in-christian-feldmann-308769231;walzds;;https://linkedin.com/in/miriam-mathea-407100101;;ruth-misener/", "or_profile": "~Shiqiang_Zhang1;~Juan_S_Campos1;~Christian_Wolfgang_Feldmann1;~David_Walz1;~Frederik_Sandfort1;~Miriam_Mathea1;~Calvin_Tsay1;~Ruth_Misener1", "aff": "Imperial College London, Imperial College London;;BASF;BASF;BASF SE;BASF SE;Imperial College London;Imperial College London", "aff_domain": "imperial.ac.uk;;basf.com;basf.com;basf.com;basf.com;imperial.ac.uk;imperial.ac.uk", "position": "PhD student;;Researcher;Researcher;Researcher;Principal Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023optimizing,\ntitle={Optimizing over trained {GNN}s via symmetry breaking},\nauthor={Shiqiang Zhang and Juan S Campos and Christian Wolfgang Feldmann and David Walz and Frederik Sandfort and Miriam Mathea and Calvin Tsay and Ruth Misener},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=znW5jNIOED}\n}", "github": "", "project": "", "reviewers": "eLVB;5u3v;ni19;eveB", "pdf_size": 576637, "rating": "6;7;7;7", "confidence": "2;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;4;4", "wc_summary": "58;57;130;36", "wc_strengths": "40;55;58;50", "wc_weaknesses": "131;127;238;80", "wc_questions": "56;30;132;14", "wc_limitations": "1;15;30;1", "wc_review": "286;284;588;181", "wc_reply_reviewers": "16;18;22;89", "wc_reply_authors": "10;15;14;248", "reply_reviewers": "1;1;1;3", "reply_authors": "2;2;2;4", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 70.25, 35.59757716474535 ], "wc_strengths_avg": [ 50.75, 6.832825184358224 ], "wc_weaknesses_avg": [ 144.0, 57.857583772570386 ], "wc_questions_avg": [ 58.0, 45.27692569068709 ], "wc_limitations_avg": [ 11.75, 11.986972094736853 ], "wc_review_avg": [ 334.75, 152.25533652388017 ], "wc_reply_reviewers_avg": [ 36.25, 30.531745773866255 ], "wc_reply_authors_avg": [ 71.75, 101.77518115925906 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=513461345287094260&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 8, "email": "imperial.ac.uk;;basf.com;basf.com;basf.com;basf.com;imperial.ac.uk;imperial.ac.uk", "author_num": 8, "aff_unique_index": "0;1;1;1;1;0;0", "aff_unique_norm": "Imperial College London;BASF SE", "aff_unique_dep": ";", "aff_unique_url": "https://www.imperial.ac.uk;https://www.basf.com", "aff_unique_abbr": "ICL;BASF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;0;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "Time-Reversed Dissipation Induces Duality Between Minimizing Gradient Norm and Function Value", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69878", "id": "znY173SCxu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/4947292b9f5e7d4ab792fa35537f8b96-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=znY173SCxu", "openreview": "https://openreview.net/forum?id=znY173SCxu", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69878", "video": "https://nips.cc/virtual/2023/poster/69878", "author_site": "Jaeyeon Kim, Asuman Ozdaglar, Chanwoo Park, Ernest Ryu", "tldr": "", "abstract": "In convex optimization, first-order optimization methods efficiently minimizing function values have been a central subject study since Nesterov's seminal work of 1983. Recently, however, Kim and Fessler's OGM-G and Lee et al.'s FISTA-G have been presented as alternatives that efficiently minimize the gradient magnitude instead. In this paper, we present H-duality, which represents a surprising one-to-one correspondence between methods efficiently minimizing function values and methods efficiently minimizing gradient magnitude. In continuous-time formulations, H-duality corresponds to reversing the time dependence of the dissipation/friction term. To the best of our knowledge, H-duality is different from Lagrange/Fenchel duality and is distinct from any previously known duality or symmetry relations. Using H-duality, we obtain a clearer understanding of the symmetry between Nesterov's method and OGM-G, derive a new class of methods efficiently reducing gradient magnitudes of smooth convex functions, and find a new composite minimization method that is simpler and faster than FISTA-G.", "keywords": "Convex Optimization;Acceleration;First-Order methods", "primary_area": "", "supplementary_material": "", "author": "Jaeyeon Kim;Asuman E. Ozdaglar;Chanwoo Park;Ernest K. Ryu", "authorids": "~Jaeyeon_Kim2;~Asuman_E._Ozdaglar1;~Chanwoo_Park2;~Ernest_K._Ryu1", "gender": "M;F;M;M", "homepage": "https://jaeyeonkim01.github.io/;https://asu.mit.edu/;https://chanwoo-park-official.github.io/;http://www.math.snu.ac.kr/~ernestryu/", "dblp": ";35/2875;;165/5192", "google_scholar": "1bXthLsAAAAJ;https://scholar.google.com.tw/citations?user=nWnBSOsAAAAJ;https://scholar.google.com/citations?hl=ko;CNOqUZoAAAAJ", "orcid": ";;;0000-0001-6820-9095", "linkedin": ";;chanwoo-park-ab5096237/;", "or_profile": "~Jaeyeon_Kim2;~Asuman_E._Ozdaglar1;~Chanwoo_Park2;~Ernest_K._Ryu1", "aff": "Seoul National University;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Seoul National University", "aff_domain": "snu.ac.kr;mit.edu;mit.edu;snu.ac.kr", "position": "Undergrad student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nkim2023timereversed,\ntitle={Time-Reversed Dissipation Induces Duality Between Minimizing Gradient Norm and Function Value},\nauthor={Jaeyeon Kim and Asuman E. Ozdaglar and Chanwoo Park and Ernest K. Ryu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=znY173SCxu}\n}", "github": "", "project": "", "reviewers": "2PtR;JyDy;Fq7n;8tCz;x8JQ", "pdf_size": 633263, "rating": "4;5;7;8;9", "confidence": "4;2;2;3;5", "soundness": "4;3;3;3;4", "novelty": "2;2;3;3;4", "presentation": "3;3;3;4;4", "wc_summary": "156;102;93;113;24", "wc_strengths": "35;18;93;110;137", "wc_weaknesses": "241;129;62;61;56", "wc_questions": "4;8;118;134;61", "wc_limitations": "3;2;16;6;3", "wc_review": "439;259;382;424;281", "wc_reply_reviewers": "34;9;0;66;29", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 1.8547236990991407 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 97.6, 42.673645262620816 ], "wc_strengths_avg": [ 78.6, 45.11585087305791 ], "wc_weaknesses_avg": [ 109.8, 70.91234025189128 ], "wc_questions_avg": [ 65.0, 53.9555372505918 ], "wc_limitations_avg": [ 6.0, 5.176871642217914 ], "wc_review_avg": [ 357.0, 73.7807562986447 ], "wc_reply_reviewers_avg": [ 27.6, 22.913751329714653 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.31438385661850643, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14690852077331644712&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "snu.ac.kr;mit.edu;mit.edu;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Seoul National University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://web.mit.edu", "aff_unique_abbr": "SNU;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Active Learning for Semantic Segmentation with Multi-class Label Query", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69877", "id": "znudaK78u8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/559a0998fab1d19b80e7e43a5852401c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=znudaK78u8", "openreview": "https://openreview.net/forum?id=znudaK78u8", "poster": "/media/PosterPDFs/NeurIPS%202023/69877.png?t=1701842914.8468122", "slides": "https://nips.cc/virtual/2023/poster/69877", "video": "https://nips.cc/virtual/2023/poster/69877", "author_site": "Sehyun Hwang, Sohyun Lee, Hoyoung Kim, Minhyeon Oh, Jungseul Ok, Suha Kwak", "tldr": "", "abstract": "This paper proposes a new active learning method for semantic segmentation. The core of our method lies in a new annotation query design. It samples informative local image regions ($\\textit{e.g.}$, superpixels), and for each of such regions, asks an oracle for a multi-hot vector indicating all classes existing in the region. This multi-class labeling strategy is substantially more efficient than existing ones like segmentation, polygon, and even dominant class labeling in terms of annotation time per click. However, it introduces the class ambiguity issue in training as it assigns partial labels ($\\textit{i.e.}$, a set of candidate classes) to individual pixels. We thus propose a new algorithm for learning semantic segmentation while disambiguating the partial labels in two stages. In the first stage, it trains a segmentation model directly with the partial labels through two new loss functions motivated by partial label learning and multiple instance learning. In the second stage, it disambiguates the partial labels by generating pixel-wise pseudo labels, which are used for supervised learning of the model. Equipped with a new acquisition function dedicated to the multi-class labeling, our method outperforms previous work on Cityscapes and PASCAL VOC 2012 while spending less annotation cost. Our code and results are available at [https://github.com/sehyun03/MulActSeg](https://github.com/sehyun03/MulActSeg).", "keywords": "semantic segmentation; active learning; partial label learning", "primary_area": "", "supplementary_material": "/attachment/fafd266db389d46d589f9b96de5d416bd5f11ff0.zip", "author": "Sehyun Hwang;Sohyun Lee;Hoyoung Kim;Minhyeon Oh;Jungseul Ok;Suha Kwak", "authorids": "~Sehyun_Hwang1;~Sohyun_Lee1;~Hoyoung_Kim1;~Minhyeon_Oh1;~Jungseul_Ok2;~Suha_Kwak3", "gender": "M;;M;;M;M", "homepage": "http://sehyun03.github.io/;https://sohyun-l.github.io/;https://cskhy16.github.io;https://github.com/mh-oh;https://sites.google.com/view/jungseulok;https://suhakwak.github.io/", "dblp": "322/8982;317/6799;05/5746;344/1908;117/3448;65/6173", "google_scholar": "GW4KY8IAAAAJ;https://scholar.google.com/citations?hl=ko;tuVPLyIAAAAJ;;KWG3UUMAAAAJ;-gscDIEAAAAJ", "orcid": "0000-0002-8541-9403;;;;0000-0003-4742-2473;", "linkedin": "sehyun-hwang-864690219;sohyun-lee-858616233/;hoyoung-kim-6142a6162/;;;", "or_profile": "~Sehyun_Hwang1;~Sohyun_Lee1;~Hoyoung_Kim1;~Minhyeon_Oh1;~Jungseul_Ok2;~Suha_Kwak3", "aff": "POSTECH;POSTECH;POSTECH;Pohang University of Science and Technology;POSTECH;POSTECH", "aff_domain": "postech.ac.kr;postech.ac.kr;postech.ac.kr;postech.edu;postech.ac.kr;postech.ac.kr", "position": "PhD student;PhD student;PhD student;MS student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nhwang2023active,\ntitle={Active Learning for Semantic Segmentation with Multi-class Label Query},\nauthor={Sehyun Hwang and Sohyun Lee and Hoyoung Kim and Minhyeon Oh and Jungseul Ok and Suha Kwak},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=znudaK78u8}\n}", "github": "", "project": "", "reviewers": "RB6d;9EZK;DZFu;FaJw", "pdf_size": 15064212, "rating": "5;5;6;7", "confidence": "4;5;3;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;4", "wc_summary": "214;63;79;91", "wc_strengths": "69;96;84;96", "wc_weaknesses": "253;50;78;51", "wc_questions": "11;99;4;117", "wc_limitations": "28;4;12;1", "wc_review": "575;312;257;356", "wc_reply_reviewers": "0;31;49;32", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 111.75, 59.863908158422134 ], "wc_strengths_avg": [ 86.25, 11.098986440211556 ], "wc_weaknesses_avg": [ 108.0, 84.46596947883805 ], "wc_questions_avg": [ 57.75, 50.71180828958873 ], "wc_limitations_avg": [ 11.25, 10.473180032826706 ], "wc_review_avg": [ 375.0, 120.67932714429593 ], "wc_reply_reviewers_avg": [ 28.0, 17.67766952966369 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17088700818929267826&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "postech.ac.kr;postech.ac.kr;postech.ac.kr;postech.edu;postech.ac.kr;postech.ac.kr", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Pohang University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.postech.ac.kr", "aff_unique_abbr": "POSTECH", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Pohang", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Towards Personalized Federated Learning via Heterogeneous Model Reassembly", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69876", "id": "zpVCITHknd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/5e2217482fa75556f1970be809acd3f8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zpVCITHknd", "openreview": "https://openreview.net/forum?id=zpVCITHknd", "poster": "/media/PosterPDFs/NeurIPS%202023/69876.png?t=1701565282.0387278", "slides": "https://nips.cc/virtual/2023/poster/69876", "video": "https://nips.cc/virtual/2023/poster/69876", "author_site": "Jiaqi Wang, Xingyi Yang, Suhan Cui, Liwei Che, Lingjuan Lyu, Dongkuan (DK) Xu, Fenglong Ma", "tldr": "", "abstract": "This paper focuses on addressing the practical yet challenging problem of model heterogeneity in federated learning, where clients possess models with different network structures. To track this problem, we propose a novel framework called pFedHR, which leverages heterogeneous model reassembly to achieve personalized federated learning. In particular, we approach the problem of heterogeneous model personalization as a model-matching optimization task on the server side. Moreover, pFedHR automatically and dynamically generates informative and diverse personalized candidates with minimal human intervention. Furthermore, our proposed heterogeneous model reassembly technique mitigates the adverse impact introduced by using public data with different distributions from the client data to a certain extent. Experimental results demonstrate that pFedHR outperforms baselines on three datasets under both IID and Non-IID settings. Additionally, pFedHR effectively reduces the adverse impact of using different public data and dynamically generates diverse personalized models in an automated manner.", "keywords": "Federated Learning", "primary_area": "", "supplementary_material": "/attachment/bddcc595d1edd563350e9251813475cf4bfaf41e.zip", "author": "Jiaqi Wang;Xingyi Yang;Suhan Cui;Liwei Che;Lingjuan Lyu;Dongkuan Xu;Fenglong Ma", "authorids": "~Jiaqi_Wang4;~Xingyi_Yang1;~Suhan_Cui1;~Liwei_Che1;~Lingjuan_Lyu1;~Dongkuan_Xu2;~Fenglong_Ma1", "gender": ";M;M;M;F;M;M", "homepage": ";https://adamdad.github.io/;;;https://sites.google.com/view/lingjuan-lyu;https://dongkuanx27.github.io/;https://fenglong-ma.github.io/", "dblp": ";;294/0930;280/3397;178/9876;142/8139;85/10856", "google_scholar": ";1n2OPtwAAAAJ;BYh25MsAAAAJ;wrQ-S_IAAAAJ;;https://scholar.google.com/citations?hl=en;DLJIxNMAAAAJ", "orcid": ";;;;;0000-0002-1456-9658;0000-0002-4999-0303", "linkedin": ";;%E8%8B%8F%E6%99%97-%E5%B4%94-aa067818b/;liweiche/;;dongkuan-dk-xu-%F0%9F%87%BA%F0%9F%87%A6-05038087/;fenglong-ma-69805832/", "or_profile": "~Jiaqi_Wang4;~Xingyi_Yang1;~Suhan_Cui1;~Liwei_Che1;~Lingjuan_Lyu1;~Dongkuan_Xu2;~Fenglong_Ma1", "aff": ";National University of Singapore;Pennsylvania State University;Pennsylvania State University;Sony;North Carolina State University;Pennsylvania State University", "aff_domain": ";nus.edu;psu.edu;psu.edu;sony.com;ncsu.edu;psu.edu", "position": ";PhD student;PhD student;MS student;scientist;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023towards,\ntitle={Towards Personalized Federated Learning via Heterogeneous Model Reassembly},\nauthor={Jiaqi Wang and Xingyi Yang and Suhan Cui and Liwei Che and Lingjuan Lyu and Dongkuan Xu and Fenglong Ma},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zpVCITHknd}\n}", "github": "", "project": "", "reviewers": "xdZD;nvYR;nMvK;nXSK", "pdf_size": 5316542, "rating": "5;6;7;8", "confidence": "4;4;4;5", "soundness": "3;3;3;3", "novelty": "2;3;4;3", "presentation": "3;3;3;3", "wc_summary": "109;45;100;87", "wc_strengths": "43;27;92;111", "wc_weaknesses": "452;156;68;101", "wc_questions": "79;69;5;1", "wc_limitations": "45;1;9;6", "wc_review": "728;298;274;306", "wc_reply_reviewers": "254;123;0;0", "wc_reply_authors": "603;316;0;0", "reply_reviewers": "2;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.25, 24.519125188309634 ], "wc_strengths_avg": [ 68.25, 34.390223901568305 ], "wc_weaknesses_avg": [ 194.25, 152.0959812092351 ], "wc_questions_avg": [ 38.5, 35.703641270884404 ], "wc_limitations_avg": [ 15.25, 17.41228014936585 ], "wc_review_avg": [ 401.5, 188.87231136405356 ], "wc_reply_reviewers_avg": [ 94.25, 105.01517747449651 ], "wc_reply_authors_avg": [ 229.75, 251.15968526019458 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12085723479860942333&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": ";nus.edu;psu.edu;psu.edu;sony.com;ncsu.edu;psu.edu", "author_num": 7, "aff_unique_index": "0;1;1;2;3;1", "aff_unique_norm": "National University of Singapore;Pennsylvania State University;Sony Corporation;North Carolina State University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nus.edu.sg;https://www.psu.edu;https://www.sony.com;https://www.ncsu.edu", "aff_unique_abbr": "NUS;PSU;Sony;NCSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2;1;1", "aff_country_unique": "Singapore;United States;Japan" }, { "title": "The Crucial Role of Normalization in Sharpness-Aware Minimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69875", "id": "zq4vFneRiA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/d616a353c711f11c722e3f28d2d9e956-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zq4vFneRiA", "openreview": "https://openreview.net/forum?id=zq4vFneRiA", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69875", "video": "https://nips.cc/virtual/2023/poster/69875", "author_site": "Yan Dai, Kwangjun Ahn, Suvrit Sra", "tldr": "", "abstract": "Sharpness-Aware Minimization (SAM) is a recently proposed gradient-based optimizer (Foret et al., ICLR 2021) that greatly improves the prediction performance of deep neural networks. Consequently, there has been a surge of interest in explaining its empirical success. We focus, in particular, on understanding ***the role played by normalization***, a key component of the SAM updates. We theoretically and empirically study the effect of normalization in SAM for both convex and non-convex functions, revealing two key roles played by normalization: i) it helps in stabilizing the algorithm; and ii) it enables the algorithm to drift along a continuum (manifold) of minima -- a property identified by recent theoretical works that is the key to better performance. We further argue that these two properties of normalization make SAM robust against the choice of hyper-parameters, supporting the practicality of SAM. Our conclusions are backed by various experiments.", "keywords": "Sharpness-Aware Minimization;Normalization;Deep Learning Theory", "primary_area": "", "supplementary_material": "/attachment/9758a4831919bd9668d9badd1df7091b033494a6.pdf", "author": "Yan Dai;Kwangjun Ahn;Suvrit Sra", "authorids": "~Yan_Dai1;~Kwangjun_Ahn2;~Suvrit_Sra1", "gender": "M;;", "homepage": "https://yandaichn.github.io/;http://kjahn.mit.edu/;https://optml.mit.edu", "dblp": "132/2047-2;;90/930", "google_scholar": "gkG4z3IAAAAJ;z94iNtgAAAAJ;eyCw9goAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yan_Dai1;~Kwangjun_Ahn2;~Suvrit_Sra1", "aff": "Tsinghua University;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "tsinghua.edu.cn;mit.edu;mit.edu", "position": "Undergrad student;PhD student;Associate Professor", "bibtex": "@inproceedings{\ndai2023the,\ntitle={The Crucial Role of Normalization in Sharpness-Aware Minimization},\nauthor={Yan Dai and Kwangjun Ahn and Suvrit Sra},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zq4vFneRiA}\n}", "github": "", "project": "", "reviewers": "a57m;FoGd;chBL;pAM7;ymqN", "pdf_size": 1452789, "rating": "5;5;6;7;7", "confidence": "3;2;4;4;3", "soundness": "3;3;3;4;4", "novelty": "2;2;3;1;3", "presentation": "3;3;4;3;3", "wc_summary": "99;74;58;222;61", "wc_strengths": "59;72;71;53;40", "wc_weaknesses": "328;74;4;260;19", "wc_questions": "193;32;288;137;90", "wc_limitations": "13;12;10;1;35", "wc_review": "692;264;431;673;245", "wc_reply_reviewers": "592;30;88;0;25", "wc_reply_authors": "254;0;0;0;0", "reply_reviewers": "2;1;1;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 102.8, 61.32992744166587 ], "wc_strengths_avg": [ 59.0, 11.916375287812984 ], "wc_weaknesses_avg": [ 137.0, 132.05453418947795 ], "wc_questions_avg": [ 148.0, 87.82482564742158 ], "wc_limitations_avg": [ 14.2, 11.232096865679177 ], "wc_review_avg": [ 461.0, 192.18220521161683 ], "wc_reply_reviewers_avg": [ 147.0, 224.360424317659 ], "wc_reply_authors_avg": [ 50.8, 101.6 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5976143046671968, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2855436262700991580&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;mit.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Tsinghua University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://web.mit.edu", "aff_unique_abbr": "THU;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "Shared Adversarial Unlearning: Backdoor Mitigation by Unlearning Shared Adversarial Examples", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69874", "id": "zqOcW3R9rd", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/520425a5a4c2fb7f7fc345078b188201-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zqOcW3R9rd", "openreview": "https://openreview.net/forum?id=zqOcW3R9rd", "poster": "/media/PosterPDFs/NeurIPS%202023/69874.png?t=1701446782.5253034", "slides": "https://nips.cc/virtual/2023/poster/69874", "video": "https://nips.cc/virtual/2023/poster/69874", "author_site": "Shaokui Wei, Mingda Zhang, Hongyuan Zha, Baoyuan Wu", "tldr": "", "abstract": "Backdoor attacks are serious security threats to machine learning models where an adversary can inject poisoned samples into the training set, causing a backdoored model which predicts poisoned samples with particular triggers to particular target classes, while behaving normally on benign samples. In this paper, we explore the task of purifying a backdoored model using a small clean dataset. By establishing the connection between backdoor risk and adversarial risk, we derive a novel upper bound for backdoor risk, which mainly captures the risk on the shared adversarial examples (SAEs) between the backdoored model and the purified model. This upper bound further suggests a novel bi-level optimization problem for mitigating backdoor using adversarial training techniques. To solve it, we propose Shared Adversarial Unlearning (SAU). Specifically, SAU first generates SAEs, and then, unlearns the generated SAEs such that they are either correctly classified by the purified model and/or differently classified by the two models, such that the backdoor effect in the backdoored model will be mitigated in the purified model. Experiments on various benchmark datasets and network architectures show that our proposed method achieves state-of-the-art performance for backdoor defense. The code is available at https://github.com/SCLBD/BackdoorBench (PyTorch) and https://github.com/shawkui/MindTrojan (MindSpore).", "keywords": "Backdoor Attack;Trustworthy AI;Backdoor Learning", "primary_area": "", "supplementary_material": "", "author": "Shaokui Wei;Mingda Zhang;Hongyuan Zha;Baoyuan Wu", "authorids": "~Shaokui_Wei1;~Mingda_Zhang2;~Hongyuan_Zha1;~Baoyuan_Wu1", "gender": "M;M;;M", "homepage": "https://shawkui.github.io/;https://github.com/mdzhangst;;https://sites.google.com/site/baoyuanwu2015/", "dblp": "323/4243;;z/HongyuanZha;73/7781", "google_scholar": "WHkEfnsAAAAJ;pmwwTcgAAAAJ;n1DQMIsAAAAJ;JNTG1KoAAAAJ", "orcid": ";;;0000-0003-2183-5990", "linkedin": ";;;", "or_profile": "~Shaokui_Wei1;~Mingda_Zhang2;~Hongyuan_Zha1;~Baoyuan_Wu1", "aff": "The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "position": "PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwei2023shared,\ntitle={Shared Adversarial Unlearning: Backdoor Mitigation by Unlearning Shared Adversarial Examples},\nauthor={Shaokui Wei and Mingda Zhang and Hongyuan Zha and Baoyuan Wu},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zqOcW3R9rd}\n}", "github": "", "project": "", "reviewers": "Do9c;mKLw;qs3i;1V93", "pdf_size": 1322024, "rating": "3;4;5;6", "confidence": "4;4;3;5", "soundness": "2;3;3;3", "novelty": "2;3;2;2", "presentation": "3;3;2;3", "wc_summary": "23;71;141;90", "wc_strengths": "32;38;24;26", "wc_weaknesses": "174;174;39;378", "wc_questions": "26;59;80;6", "wc_limitations": "13;6;19;6", "wc_review": "268;348;303;506", "wc_reply_reviewers": "60;0;0;89", "wc_reply_authors": "126;82;82;153", "reply_reviewers": "1;0;0;1", "reply_authors": "2;2;2;3", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.25, 42.26331151246906 ], "wc_strengths_avg": [ 30.0, 5.477225575051661 ], "wc_weaknesses_avg": [ 191.25, 121.08958460577854 ], "wc_questions_avg": [ 42.75, 28.647643882176418 ], "wc_limitations_avg": [ 11.0, 5.431390245600108 ], "wc_review_avg": [ 356.25, 90.99004066380013 ], "wc_reply_reviewers_avg": [ 37.25, 38.63531415686949 ], "wc_reply_authors_avg": [ 110.75, 30.293357357678268 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3162277660168379, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6759033113499482738&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.cn", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "The expressive power of pooling in Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69873", "id": "zqyVjCjhYD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e26f31de8b13ec569bf507e6ae2cd952-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zqyVjCjhYD", "openreview": "https://openreview.net/forum?id=zqyVjCjhYD", "poster": "/media/PosterPDFs/NeurIPS%202023/69873.png?t=1697622033.9806027", "slides": "https://nips.cc/virtual/2023/poster/69873", "video": "https://nips.cc/virtual/2023/poster/69873", "author_site": "Filippo Maria Bianchi, Veronica Lachi", "tldr": "", "abstract": "In Graph Neural Networks (GNNs), hierarchical pooling operators generate local summaries of the data by coarsening the graph structure and the vertex features. Considerable attention has been devoted to analyzing the expressive power of message-passing (MP) layers in GNNs, while a study on how graph pooling affects the expressiveness of a GNN is still lacking. Additionally, despite the recent advances in the design of pooling operators, there is not a principled criterion to compare them. In this work, we derive sufficient conditions for a pooling operator to fully preserve the expressive power of the MP layers before it. These conditions serve as a universal and theoretically-grounded criterion for choosing among existing pooling operators or designing new ones. Based on our theoretical findings, we analyze several existing pooling operators and identify those that fail to satisfy the expressiveness conditions. Finally, we introduce an experimental setup to verify empirically the expressive power of a GNN equipped with pooling layers, in terms of its capability to perform a graph isomorphism test.", "keywords": "Graph Neural Networks;Graph pooling;Expressive power", "primary_area": "", "supplementary_material": "/attachment/25dd2b5291a982a4c4313792496f70c941842988.pdf", "author": "Filippo Maria Bianchi;Veronica Lachi", "authorids": "~Filippo_Maria_Bianchi1;~Veronica_Lachi1", "gender": "M;F", "homepage": "https://sites.google.com/view/filippombianchi/home;", "dblp": "139/5968;329/9511", "google_scholar": "https://scholar.google.ca/citations?user=yb7cT1MAAAAJ;uly8D-sAAAAJ", "orcid": ";0000-0002-6947-7304", "linkedin": ";", "or_profile": "~Filippo_Maria_Bianchi1;~Veronica_Lachi1", "aff": "NORCE the Norwegian Research Center;University of Siena", "aff_domain": "norce.no;unisi.it", "position": "Researcher;PhD student", "bibtex": "@inproceedings{\nbianchi2023the,\ntitle={The expressive power of pooling in Graph Neural Networks},\nauthor={Filippo Maria Bianchi and Veronica Lachi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zqyVjCjhYD}\n}", "github": "", "project": "", "reviewers": "utJZ;QuJp;KM5y;DheR;UT1a", "pdf_size": 517271, "rating": "4;6;6;6;7", "confidence": "4;3;5;4;4", "soundness": "2;3;3;3;4", "novelty": "2;3;2;3;3", "presentation": "2;3;3;4;4", "wc_summary": "116;90;111;181;48", "wc_strengths": "38;87;58;181;49", "wc_weaknesses": "154;87;45;170;18", "wc_questions": "75;599;72;118;129", "wc_limitations": "1;46;1;19;59", "wc_review": "384;909;287;669;303", "wc_reply_reviewers": "141;233;22;62;0", "wc_reply_authors": "190;379;0;0;0", "reply_reviewers": "1;2;1;1;0", "reply_authors": "2;2;1;1;1", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 109.2, 43.17128675404521 ], "wc_strengths_avg": [ 82.6, 51.817371604511166 ], "wc_weaknesses_avg": [ 94.8, 59.32756526270062 ], "wc_questions_avg": [ 198.6, 201.47714510584072 ], "wc_limitations_avg": [ 25.2, 23.6 ], "wc_review_avg": [ 510.4, 242.04759862473333 ], "wc_reply_reviewers_avg": [ 91.6, 85.49292368377631 ], "wc_reply_authors_avg": [ 113.8, 151.65012363991005 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8892368546803445138&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "norce.no;unisi.it", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "NORCE;University of Siena", "aff_unique_dep": ";", "aff_unique_url": "https://www.norce.no;https://www.unisi.it", "aff_unique_abbr": "NORCE;UniSi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Norway;Italy" }, { "title": "Live Graph Lab: Towards Open, Dynamic and Real Transaction Graphs with NFT", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2023/poster/73404", "id": "zr1e15kczE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3be31c1a2fdcb7b748c53c3f4cb0e9d2-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=zr1e15kczE", "openreview": "https://openreview.net/forum?id=zr1e15kczE", "poster": "/media/PosterPDFs/NeurIPS%202023/73404.png?t=1699863944.3628793", "slides": "https://nips.cc/virtual/2023/poster/73404", "video": "https://nips.cc/virtual/2023/poster/73404", "author_site": "Zhen Zhang, Bingqiao Luo, Shengliang Lu, Bingsheng He", "tldr": "", "abstract": "Numerous studies have been conducted to investigate the properties of large-scale temporal graphs. Despite the ubiquity of these graphs in real-world scenarios, it's usually impractical for us to obtain the whole real-time graphs due to privacy concerns and technical limitations. In this paper, we introduce the concept of {\\it Live Graph Lab} for temporal graphs, which enables open, dynamic and real transaction graphs from blockchains. Among them, Non-fungible tokens (NFTs) have become one of the most prominent parts of blockchain over the past several years. With more than \\$40 billion market capitalization, this decentralized ecosystem produces massive, anonymous and real transaction activities, which naturally forms a complicated transaction network. However, there is limited understanding about the characteristics of this emerging NFT ecosystem from a temporal graph analysis perspective. To mitigate this gap, we instantiate a live graph with NFT transaction network and investigate its dynamics to provide new observations and insights. Specifically, through downloading and parsing the NFT transaction activities, we obtain a temporal graph with more than 4.5 million nodes and 124 million edges. Then, a series of measurements are presented to understand the properties of the NFT ecosystem. Through comparisons with social, citation, and web networks, our analyses give intriguing findings and point out potential directions for future exploration. Finally, we also study machine learning models in this live graph to enrich the current datasets and provide new opportunities for the graph community. The source codes and dataset are available at https://livegraphlab.github.io.", "keywords": "Graph Datasets;Non-Fungible Token;Blockchain;GNN", "primary_area": "", "supplementary_material": "/attachment/bdb999280db8d6e4c487e787b4cc1564e38c36f2.pdf", "author": "Zhen Zhang;Bingqiao Luo;Shengliang Lu;Bingsheng He", "authorids": "~Zhen_Zhang14;~Bingqiao_Luo1;~Shengliang_Lu1;~Bingsheng_He1", "gender": "M;;M;M", "homepage": "https://cszhangzhen.github.io/;;https://www.linkedin.com/in/shengliang-lu/;http://www.comp.nus.edu.sg/~hebs/", "dblp": "19/5112-23;344/3342;;h/BingshengHe.html", "google_scholar": "8hclVjIAAAAJ;;;https://scholar.google.com.tw/citations?user=RogYLKYAAAAJ", "orcid": "0000-0001-5769-8786;;;0000-0001-8618-4581", "linkedin": ";bingqiao-luo-3993031a3/;shengliang-lu/;bingsheng-he-7734b131", "or_profile": "~Zhen_Zhang14;~Bingqiao_Luo1;~Shengliang_Lu1;~Bingsheng_He1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;u.nus.edu;nus.edu.sg;nus.edu.sg", "position": "Postdoc;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzhang2023live,\ntitle={Live Graph Lab: Towards Open, Dynamic and Real Transaction Graphs with {NFT}},\nauthor={Zhen Zhang and Bingqiao Luo and Shengliang Lu and Bingsheng He},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2023},\nurl={https://openreview.net/forum?id=zr1e15kczE}\n}", "github": "", "project": "", "reviewers": "RoMo;dqJb;e4Wv;MnCg;qE1c", "pdf_size": 1217088, "rating": "5;6;7;8;8", "confidence": "4;3;3;4;5", "wc_summary_and_contributions": "89;49;72;155;71", "wc_strengths": "23;38;83;112;77", "wc_improvement": "156;7;48;288;148", "wc_limitations": "24;45;19;16;8", "wc_correctness": "61;1;13;9;24", "wc_clarity": "8;18;93;6;5", "wc_relation_to_prior_work": "39;13;57;8;11", "wc_documentation": "43;20;13;26;17", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "444;192;399;621;362", "wc_reply_reviewers": "17;74;0;126;48", "wc_reply_authors": "980;1166;782;810;798", "reply_reviewers": "1;1;0;1;1", "reply_authors": "4;4;2;2;2", "rating_avg": [ 6.8, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 87.2, 36.201657420620954 ], "wc_strengths_avg": [ 66.6, 32.11603960640228 ], "wc_improvement_avg": [ 129.4, 97.79079711302082 ], "wc_limitations_avg": [ 22.4, 12.435433245367848 ], "wc_correctness_avg": [ 21.6, 21.04851538707659 ], "wc_clarity_avg": [ 26.0, 33.81715540964378 ], "wc_relation_to_prior_work_avg": [ 25.6, 19.22082204277434 ], "wc_documentation_avg": [ 23.8, 10.49571341072154 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 403.6, 138.14572016533845 ], "wc_reply_reviewers_avg": [ 53.0, 44.49719092257398 ], "wc_reply_authors_avg": [ 907.2, 147.86804928719388 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.8, 0.9797958971132712 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.412514323662695, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13570777536776493098&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "nus.edu.sg;u.nus.edu;nus.edu.sg;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Learning Invariant Representations of Graph Neural Networks via Cluster Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69872", "id": "zrCmeqV3Sz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/8ed2293e714b7692b63117e330e551e8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zrCmeqV3Sz", "openreview": "https://openreview.net/forum?id=zrCmeqV3Sz", "poster": "/media/PosterPDFs/NeurIPS%202023/69872.png?t=1701157871.6955464", "slides": "https://nips.cc/virtual/2023/poster/69872", "video": "https://nips.cc/virtual/2023/poster/69872", "author_site": "Donglin Xia, Xiao Wang, Nian Liu, Chuan Shi", "tldr": "", "abstract": "Graph neural networks (GNNs) have become increasingly popular in modeling graph-structured data due to their ability to learn node representations by aggregating local structure information. However, it is widely acknowledged that the test graph structure may differ from the training graph structure, resulting in a structure shift. In this paper, we experimentally find that the performance of GNNs drops significantly when the structure shift happens, suggesting that the learned models may be biased towards specific structure patterns. To address this challenge, we propose the Cluster Information Transfer (\\textbf{CIT}) mechanism, which can learn invariant representations for GNNs, thereby improving their generalization ability to various and unknown test graphs with structure shift. The CIT mechanism achieves this by combining different cluster information with the nodes while preserving their cluster-independent information. By generating nodes across different clusters, the mechanism significantly enhances the diversity of the nodes and helps GNNs learn the invariant representations. We provide a theoretical analysis of the CIT mechanism, showing that the impact of changing clusters during structure shift can be mitigated after transfer. Additionally, the proposed mechanism is a plug-in that can be easily used to improve existing GNNs. We comprehensively evaluate our proposed method on three typical structure shift scenarios, demonstrating its effectiveness in enhancing GNNs' performance.", "keywords": "Graph neural networks;network representation learning;deep learning", "primary_area": "", "supplementary_material": "/attachment/191c64e9a6a249fb908aa157cb2cbd44af1e850d.pdf", "author": "Donglin Xia;Xiao Wang;Nian Liu;Chuan Shi", "authorids": "~Donglin_Xia1;~Xiao_Wang2;~Nian_Liu3;~Chuan_Shi1", "gender": "M;M;M;M", "homepage": "https://wangxiaocs.github.io/;https://liun-online.github.io/;http://www.shichuan.org/;", "dblp": "49/67-17;;64/3041-1;", "google_scholar": "MnzarAQAAAAJ;Tx8vRjUAAAAJ;tUq_v90AAAAJ;", "orcid": "0000-0002-4444-7811;0009-0000-8378-1129;0000-0002-3734-0266;0000-0003-3689-2170", "linkedin": ";;;", "or_profile": "~Xiao_Wang2;~Nian_Liu3;~Chuan_Shi1;~Xia_Donglin1", "aff": "Beihang University;Beijing University of Post and Telecommunication;Beijing University of Post and Telecommunication;Beijing University of Posts and Telecommunications", "aff_domain": "buaa.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "position": "Associate Professor;MS student;Full Professor;MS student", "bibtex": "@inproceedings{\nxia2023learning,\ntitle={Learning Invariant Representations of Graph Neural Networks via Cluster Generalization},\nauthor={Donglin Xia and Xiao Wang and Nian Liu and Chuan Shi},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zrCmeqV3Sz}\n}", "github": "", "project": "", "reviewers": "uNQ9;5H2e;Nbx4;nmEc;QyZE", "pdf_size": 2153292, "rating": "4;6;7;7;7", "confidence": "4;4;4;5;5", "soundness": "2;2;3;3;4", "novelty": "2;2;4;4;4", "presentation": "3;3;3;3;3", "wc_summary": "67;75;69;64;86", "wc_strengths": "65;32;50;34;54", "wc_weaknesses": "131;449;19;61;45", "wc_questions": "30;73;91;10;171", "wc_limitations": "12;41;1;1;11", "wc_review": "305;670;230;170;367", "wc_reply_reviewers": "0;237;9;0;0", "wc_reply_authors": "0;460;24;0;0", "reply_reviewers": "0;2;1;0;0", "reply_authors": "1;3;2;1;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.2, 0.9797958971132712 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.2, 7.782030583337488 ], "wc_strengths_avg": [ 47.0, 12.457929201917949 ], "wc_weaknesses_avg": [ 141.0, 158.40707054926557 ], "wc_questions_avg": [ 75.0, 56.08208270027068 ], "wc_limitations_avg": [ 13.2, 14.675149062275313 ], "wc_review_avg": [ 348.4, 174.06964123591453 ], "wc_reply_reviewers_avg": [ 49.2, 93.96467421323824 ], "wc_reply_authors_avg": [ 96.8, 181.83772985824476 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5601120336112038, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3116501499867787145&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "buaa.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Beihang University;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";", "aff_unique_url": "http://www.buaa.edu.cn/;http://www.bupt.edu.cn/", "aff_unique_abbr": "BUAA;BUPT", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Discover and Align Taxonomic Context Priors for Open-world Semi-Supervised Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69871", "id": "zrLxHYvIFL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/3c646b713f5de2cf1ab1939d49a4036d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zrLxHYvIFL", "openreview": "https://openreview.net/forum?id=zrLxHYvIFL", "poster": "/media/PosterPDFs/NeurIPS%202023/69871.png?t=1699600817.5577486", "slides": "https://nips.cc/virtual/2023/poster/69871", "video": "https://nips.cc/virtual/2023/poster/69871", "author_site": "Yu Wang, Yu Wang, Zhun Zhong, Pengchong Qiao, Xuxin Cheng, Xiawu Zheng, Xiawu Zheng, Chang Liu, Nicu Sebe, Rongrong Ji, Jie Chen", "tldr": "", "abstract": "Open-world Semi-Supervised Learning (OSSL) is a realistic and challenging task, aiming to classify unlabeled samples from both seen and novel classes using partially labeled samples from the seen classes. \nPrevious works typically explore the relationship of samples as priors on the pre-defined single-granularity labels to help novel class recognition. In fact, classes follow a taxonomy and samples can be classified at multiple levels of granularity, which contains more underlying relationships for supervision. We thus argue that learning with single-granularity labels results in sub-optimal representation learning and inaccurate pseudo labels, especially with unknown classes. In this paper, we take the initiative to explore and propose a uniformed framework, called Taxonomic context prIors Discovering and Aligning (TIDA), which exploits the relationship of samples under various granularity. It allows us to discover multi-granularity semantic concepts as taxonomic context priors (i.e., sub-class, target-class, and super-class), and then collaboratively leverage them to enhance representation learning and improve the quality of pseudo labels.\nSpecifically, TIDA comprises two components: i) A taxonomic context discovery module that constructs a set of hierarchical prototypes in the latent space to discover the underlying taxonomic context priors; ii) A taxonomic context-based prediction alignment module that enforces consistency across hierarchical predictions to build the reliable relationship between classes among various granularity and provide additions supervision. We demonstrate that these two components are mutually beneficial for an effective OSSL framework, which is theoretically explained from the perspective of the EM algorithm. Extensive experiments on seven commonly used datasets show that TIDA can significantly improve the performance and achieve a new state of the art. The source codes are publicly available at https://github.com/rain305f/TIDA.", "keywords": "open-world semi-supervised learning; novel class discovery;", "primary_area": "", "supplementary_material": "/attachment/725a011d5b3b73fa3d5070d3806e52267a41e47b.pdf", "author": "Yu Wang;Zhun Zhong;Pengchong Qiao;Xuxin Cheng;Xiawu Zheng;Chang Liu;Nicu Sebe;Rongrong Ji;Jie Chen", "authorids": "~Yu_Wang43;~Zhun_Zhong1;~Pengchong_Qiao1;~Xuxin_Cheng3;~Xiawu_Zheng1;~Chang_Liu9;~Nicu_Sebe1;~Rongrong_Ji5;~Jie_Chen15", "gender": "F;M;;;M;M;M;M;M", "homepage": "https://github.com/rain305f;http://zhunzhong.site;;;https://sites.google.com/view/zhengxiawu/%E9%A6%96%E9%A1%B5;https://www.au.tsinghua.edu.cn/en/info/1096/3484.htm;http://disi.unitn.it/~sebe/;http://mac.xmu.edu.cn/rrji-en.html;https://aimia-pku.github.io/", "dblp": ";32/6525;;;222/7865;52/5716-42;20/3519;86/5681;92/6289-1", "google_scholar": "lzsu-5MAAAAJ;nZizkQ0AAAAJ;;;jBgXocYAAAAJ;vsh1WP4AAAAJ;https://scholar.google.it/citations?user=stFCYOAAAAAJ;;https://scholar.google.fi/citations?user=ZAZFfwwAAAAJ", "orcid": ";;0000-0002-9292-2744;;0000-0002-6855-5403;0000-0001-6747-0646;0000-0002-6597-7248;;", "linkedin": ";;;;;;;;", "or_profile": "~Yu_Wang43;~Zhun_Zhong1;~Pengchong_Qiao1;~Xuxin_Cheng3;~Xiawu_Zheng1;~Chang_Liu9;~Nicu_Sebe1;~Rongrong_Ji5;~Jie_Chen15", "aff": "Peking University;University of Trento;Peking University;;PengCheng Lab;Tsinghua University;University of Trento;Xiamen University;Peking University", "aff_domain": "pku.edu.cn;unitn.it;stu.pku.edu.cn;;pcl.ac.cn;tsinghua.edu.cn;unitn.it;xmu.edu.cn;pku.edu.cn", "position": "MS student;Assistant Professor;PhD student;;Postdoc;Postdoc;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023discover,\ntitle={Discover and Align Taxonomic Context Priors for Open-world Semi-Supervised Learning},\nauthor={Yu Wang and Zhun Zhong and Pengchong Qiao and Xuxin Cheng and Xiawu Zheng and Chang Liu and Nicu Sebe and Rongrong Ji and Jie Chen},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zrLxHYvIFL}\n}", "github": "", "project": "", "reviewers": "uA1P;k9he;qAVC;Qvan", "pdf_size": 0, "rating": "5;5;5;6", "confidence": "3;4;3;3", "soundness": "2;2;3;3", "novelty": "3;2;3;2", "presentation": "3;2;2;3", "wc_summary": "114;56;79;99", "wc_strengths": "78;32;65;50", "wc_weaknesses": "193;194;23;100", "wc_questions": "53;79;23;99", "wc_limitations": "6;24;5;62", "wc_review": "444;385;195;410", "wc_reply_reviewers": "54;59;28;145", "wc_reply_authors": "57;659;39;71", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 87.0, 21.783020910791965 ], "wc_strengths_avg": [ 56.25, 17.151894939043906 ], "wc_weaknesses_avg": [ 127.5, 71.39502783807848 ], "wc_questions_avg": [ 63.5, 28.508770580296865 ], "wc_limitations_avg": [ 24.25, 23.069189409253198 ], "wc_review_avg": [ 358.5, 96.69151979361996 ], "wc_reply_reviewers_avg": [ 71.5, 44.03691633164157 ], "wc_reply_authors_avg": [ 206.5, 261.49713191543805 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16321963502963297220&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "pku.edu.cn;unitn.it;stu.pku.edu.cn;;pcl.ac.cn;tsinghua.edu.cn;unitn.it;xmu.edu.cn;pku.edu.cn", "author_num": 9, "aff_unique_index": "0;1;0;2;3;1;4;0", "aff_unique_norm": "Peking University;University of Trento;Pengcheng Lab;Tsinghua University;Xiamen University", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.unitn.it;;https://www.tsinghua.edu.cn;https://www.xmu.edu.cn", "aff_unique_abbr": "Peking U;UniTN;;THU;XMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;1;0;0", "aff_country_unique": "China;Italy" }, { "title": "Algorithm Selection for Deep Active Learning with Imbalanced Datasets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69870", "id": "zrUEHZ6s9C", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/1e77af93008ee6cd248a31723ce357d8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zrUEHZ6s9C", "openreview": "https://openreview.net/forum?id=zrUEHZ6s9C", "poster": "/media/PosterPDFs/NeurIPS%202023/69870.png?t=1702151484.523015", "slides": "https://nips.cc/virtual/2023/poster/69870", "video": "https://nips.cc/virtual/2023/poster/69870", "author_site": "Jifan Zhang, Shuai Shao, Saurabh Verma, Robert Nowak", "tldr": "", "abstract": "Label efficiency has become an increasingly important objective in deep learning applications. Active learning aims to reduce the number of labeled examples needed to train deep networks, but the empirical performance of active learning algorithms can vary dramatically across datasets and applications. It is difficult to know in advance which active learning strategy will perform well or best in a given application. To address this, we propose the first adaptive algorithm selection strategy for deep active learning. For any unlabeled dataset, our (meta) algorithm TAILOR (Thompson ActIve Learning algORithm selection) iteratively and adaptively chooses among a set of candidate active learning algorithms. TAILOR uses novel reward functions aimed at gathering class-balanced examples. Extensive experiments in multi-class and multi-label applications demonstrate TAILOR's effectiveness in achieving accuracy comparable or better than that of the best of the candidate algorithms. Our implementation of TAILOR is open-sourced at https://github.com/jifanz/TAILOR.", "keywords": "Deep Learning;Active Learning", "primary_area": "", "supplementary_material": "/attachment/8be90ab065b72971060cc826358945b4331ea927.zip", "author": "Jifan Zhang;Shuai Shao;saurabh verma;Robert D Nowak", "authorids": "~Jifan_Zhang1;~Shuai_Shao5;~saurabh_verma1;~Robert_D_Nowak1", "gender": "M;M;M;M", "homepage": "https://jifanz.github.io/;https://research.facebook.com/people/shao-shuai/;http://www-users.cs.umn.edu/~verma076/;http://nowak.ece.wisc.edu", "dblp": "277/6616;;118/5298;n/RobertDNowak", "google_scholar": "ZUOsJWcAAAAJ;Rw4NiLAAAAAJ;mUsHoggAAAAJ;fn13u8IAAAAJ", "orcid": ";;;", "linkedin": ";shuaishao1/;saurabh-verma-10076544/;", "or_profile": "~Jifan_Zhang1;~Shuai_Shao5;~saurabh_verma1;~Robert_D_Nowak1", "aff": "University of Wisconsin, Madison;Meta Research;Meta;University of Wisconsin - Madison", "aff_domain": "wisc.edu;facebook.com;meta.com;", "position": "PhD student;Researcher;Research Scientist ;Full Professor", "bibtex": "@inproceedings{\nzhang2023algorithm,\ntitle={Algorithm Selection for Deep Active Learning with Imbalanced Datasets},\nauthor={Jifan Zhang and Shuai Shao and saurabh verma and Robert D Nowak},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zrUEHZ6s9C}\n}", "github": "", "project": "", "reviewers": "Ra5K;4Aq9;Fgsj;LjXi;NzMT", "pdf_size": 1557770, "rating": "3;5;6;6;7", "confidence": "3;4;3;2;3", "soundness": "2;2;3;3;4", "novelty": "2;2;2;3;4", "presentation": "1;3;3;3;4", "wc_summary": "36;118;44;53;99", "wc_strengths": "18;118;72;75;116", "wc_weaknesses": "473;213;69;88;136", "wc_questions": "5;180;34;86;16", "wc_limitations": "1;34;1;19;2", "wc_review": "533;663;220;321;369", "wc_reply_reviewers": "12;121;17;59;30", "wc_reply_authors": "12;506;31;27;20", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 70.0, 32.4530430006186 ], "wc_strengths_avg": [ 79.8, 36.53163013061421 ], "wc_weaknesses_avg": [ 195.8, 147.24863327039745 ], "wc_questions_avg": [ 64.2, 64.22585149299307 ], "wc_limitations_avg": [ 11.4, 13.215142829345433 ], "wc_review_avg": [ 421.2, 157.61522769072792 ], "wc_reply_reviewers_avg": [ 47.8, 40.0769260298242 ], "wc_reply_authors_avg": [ 119.2, 193.50803600884382 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.23312620206007845, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10861079200573399247&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "wisc.edu;facebook.com;meta.com;", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of Wisconsin;Meta;University of Wisconsin-Madison", "aff_unique_dep": ";Meta Research;", "aff_unique_url": "https://www.wisc.edu;https://research.facebook.com;https://www.wisc.edu", "aff_unique_abbr": "UW;Meta Research;UW-Madison", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Generator Identification for Linear SDEs with Additive and Multiplicative Noise", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69869", "id": "zsOOqjaj2z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/ca642f8e1174012d67c05c1c9f969644-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zsOOqjaj2z", "openreview": "https://openreview.net/forum?id=zsOOqjaj2z", "poster": "/media/PosterPDFs/NeurIPS%202023/69869.png?t=1701435292.1941187", "slides": "https://nips.cc/virtual/2023/poster/69869", "video": "https://nips.cc/virtual/2023/poster/69869", "author_site": "Yuanyuan Wang, Xi Geng, Wei Huang, Biwei Huang, Mingming Gong", "tldr": "", "abstract": "In this paper, we present conditions for identifying the generator of a linear stochastic differential equation (SDE) from the distribution of its solution process with a given fixed initial state. These identifiability conditions are crucial in causal inference using linear SDEs as they enable the identification of the post-intervention distributions from its observational distribution. Specifically, we derive a sufficient and necessary condition for identifying the generator of linear SDEs with additive noise, as well as a sufficient condition for identifying the generator of linear SDEs with multiplicative noise. We show that the conditions derived for both types of SDEs are generic. Moreover, we offer geometric interpretations of the derived identifiability conditions to enhance their understanding. To validate our theoretical results, we perform a series of simulations, which support and substantiate the established findings.", "keywords": "Linear SDE;Identification;Causal inference", "primary_area": "", "supplementary_material": "/attachment/edb147cee51ca4416182b3f214b92aa43bb65cbc.zip", "author": "Yuanyuan Wang;Xi Geng;Wei Huang;Biwei Huang;Mingming Gong", "authorids": "~Yuanyuan_Wang5;~Xi_Geng1;~Wei_Huang8;~Biwei_Huang1;~Mingming_Gong1", "gender": "F;;F;F;M", "homepage": ";;https://sites.google.com/view/w-huang/home;;https://mingming-gong.github.io/", "dblp": "95/494;;;165/3288;98/8479", "google_scholar": "https://scholar.google.com.au/citations?user=gT6kOFQAAAAJ;;https://scholar.google.com.au/citations?user=qOd4sB0AAAAJ;;https://scholar.google.com.au/citations?user=6BmiCJIAAAAJ", "orcid": ";;0000-0002-5691-7411;;0000-0001-7147-5589", "linkedin": ";;;;", "or_profile": "~Yuanyuan_Wang5;~Xi_Geng1;~Wei_Huang8;~Biwei_Huang1;~Mingming_Gong1", "aff": "University of Melbourne;;University of Melbourne;University of California, San Diego;University of Melbourne", "aff_domain": "unimelb.edu.au;;unimelb.edu.au;ucsd.edu;unimelb.edu.au", "position": "PhD student;;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023generator,\ntitle={Generator Identification for Linear {SDE}s with Additive and Multiplicative Noise},\nauthor={Yuanyuan Wang and Xi Geng and Wei Huang and Biwei Huang and Mingming Gong},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zsOOqjaj2z}\n}", "github": "", "project": "", "reviewers": "8vBF;5yfh;b8xq;GdBe;Huoq;eRyW", "pdf_size": 523765, "rating": "4;4;4;5;7;7", "confidence": "2;1;3;2;1;3", "soundness": "3;3;2;2;3;3", "novelty": "2;2;3;2;3;3", "presentation": "2;2;3;3;2;3", "wc_summary": "24;39;100;25;38;173", "wc_strengths": "20;26;129;47;7;43", "wc_weaknesses": "422;437;534;70;64;127", "wc_questions": "23;10;131;16;41;112", "wc_limitations": "22;18;110;1;41;8", "wc_review": "511;530;1004;159;191;463", "wc_reply_reviewers": "373;241;929;42;184;0", "wc_reply_authors": "899;416;2092;141;1271;1338", "reply_reviewers": "1;2;4;1;3;0", "reply_authors": "4;4;7;3;6;4", "rating_avg": [ 5.166666666666667, 1.3437096247164249 ], "confidence_avg": [ 2.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 66.5, 54.10098581973037 ], "wc_strengths_avg": [ 45.333333333333336, 39.77715704047012 ], "wc_weaknesses_avg": [ 275.6666666666667, 192.95134677483395 ], "wc_questions_avg": [ 55.5, 47.940066750057824 ], "wc_limitations_avg": [ 33.333333333333336, 36.485918135935975 ], "wc_review_avg": [ 476.3333333333333, 278.3305389081279 ], "wc_reply_reviewers_avg": [ 294.8333333333333, 309.38402063167746 ], "wc_reply_authors_avg": [ 1026.1666666666667, 640.9860676246316 ], "reply_reviewers_avg": [ 1.8333333333333333, 1.343709624716425 ], "reply_authors_avg": [ 4.666666666666667, 1.3743685418725535 ], "replies_avg": [ 48, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=441485474402352377&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "unimelb.edu.au;;unimelb.edu.au;ucsd.edu;unimelb.edu.au", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Melbourne;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://www.unimelb.edu.au;https://www.ucsd.edu", "aff_unique_abbr": "UniMelb;UCSD", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Australia;United States" }, { "title": "An Optimization-based Approach To Node Role Discovery in Networks: Approximating Equitable Partitions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69868", "id": "ztDxO15N7f", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e1c73e9595126794186536cfbbed012f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=ztDxO15N7f", "openreview": "https://openreview.net/forum?id=ztDxO15N7f", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69868", "video": "https://nips.cc/virtual/2023/poster/69868", "author_site": "Michael Scholkemper, Michael T Schaub", "tldr": "", "abstract": "Similar to community detection, partitioning the nodes of a complex network according to their structural roles aims to identify fundamental building blocks of a network, which can be used, e.g., to find simplified descriptions of the network connectivity, to derive reduced order models for dynamical processes unfolding on processes, or as ingredients for various network analysis and graph mining tasks. In this work, we offer a fresh look on the problem of role extraction and its differences to community detection and present a definition of node roles and two associated optimization problems (cost functions) grounded in ideas related to graph-isomorphism tests, the Weisfeiler-Leman algorithm and equitable partitions. We present theoretical guarantees and validate our approach via a novel \u201crole-infused partition benchmark\u201d, a network model from which we can sample networks in which nodes are endowed with different roles in a stochastic way.", "keywords": "Role Extraction;Graph Learning;Node Embeddings;Weisfeiler Lehman;Equitable Partition", "primary_area": "", "supplementary_material": "/attachment/d2e3afc0dba46842c80259ccb2facc75d88b4ab3.zip", "author": "Michael Scholkemper;Michael T Schaub", "authorids": "~Michael_Scholkemper1;~Michael_T_Schaub1", "gender": ";", "homepage": ";https://michaelschaub.github.io/", "dblp": "293/8588;72/10263", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-3669-8119;0000-0003-2426-6404", "linkedin": ";", "or_profile": "~Michael_Scholkemper1;~Michael_T_Schaub1", "aff": "Rheinisch Westf\u00e4lische Technische Hochschule Aachen;RWTH Aachen University", "aff_domain": "rwth-aachen.de;rwth-aachen.de", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nscholkemper2023an,\ntitle={An Optimization-based Approach To Node Role Discovery in Networks: Approximating Equitable Partitions},\nauthor={Michael Scholkemper and Michael T Schaub},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=ztDxO15N7f}\n}", "github": "", "project": "", "reviewers": "rRxj;nMtV;WiNY;ZAin", "pdf_size": 520712, "rating": "5;5;6;6", "confidence": "3;3;4;3", "soundness": "3;2;3;3", "novelty": "3;3;2;3", "presentation": "2;3;3;4", "wc_summary": "76;94;63;58", "wc_strengths": "127;62;43;45", "wc_weaknesses": "431;49;106;49", "wc_questions": "162;303;19;73", "wc_limitations": "14;5;4;1", "wc_review": "810;513;235;226", "wc_reply_reviewers": "0;19;31;20", "wc_reply_authors": "0;10;28;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 72.75, 13.91716565971678 ], "wc_strengths_avg": [ 69.25, 34.14948755105997 ], "wc_weaknesses_avg": [ 158.75, 158.89678253507842 ], "wc_questions_avg": [ 139.25, 107.44853419195628 ], "wc_limitations_avg": [ 6.0, 4.847679857416329 ], "wc_review_avg": [ 446.0, 239.7425702706968 ], "wc_reply_reviewers_avg": [ 17.5, 11.146748404803978 ], "wc_reply_authors_avg": [ 9.5, 11.434596626029272 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17083215000446646271&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "rwth-aachen.de;rwth-aachen.de", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "RWTH Aachen University", "aff_unique_dep": "", "aff_unique_url": "https://www.rwth-aachen.de", "aff_unique_abbr": "RWTH", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Aachen", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "ztqf6bzuqQ", "title": "Hybrid Distillation: Connecting Masked Autoencoders with Contrastive Learners", "track": "main", "status": "Reject", "tldr": "", "abstract": "Representation learning has been evolving from traditional supervised training to Contrastive Learning (CL) and Masked Image Modeling (MIM). Previous works have demonstrated their pros and cons in specific scenarios, i.e., CL and supervised pre-training excel at capturing longer-range global patterns and enabling better feature discrimination, while MIM can introduce more local and diverse attention across all transformer layers. In this paper, we explore how to obtain a model that combines their strengths. We start by examining previous feature distillation and mask feature reconstruction methods and identify their limitations. We find that their increasing diversity mainly derives from the asymmetric designs, but these designs may in turn compromise the discrimination ability. In order to better obtain both discrimination and diversity, we propose a simple but effective Hybrid Distillation strategy, which utilizes both the supervised/CL teacher and the MIM teacher to jointly guide the student model. Hybrid Distill imitates the token relations of the MIM teacher to alleviate attention collapse, as well as distills the feature maps of the supervised/CL teacher to enable discrimination. Furthermore, a progressive redundant token masking strategy is also utilized to reduce the distilling costs and avoid falling into local optima. Experiment results prove that Hybrid Distill can achieve superior performance on different benchmarks.", "keywords": "contrastive learning;mask image moding;feature distillation", "primary_area": "", "supplementary_material": "/attachment/44b0e878db5f2f917275ebfd5818b9dd671fc592.pdf", "author": "Bowen Shi;XIAOPENG ZHANG;Yaoming Wang;Jin Li;Wenrui Dai;Junni Zou;Hongkai Xiong;Qi Tian", "authorids": "~Bowen_Shi2;~XIAOPENG_ZHANG7;~Yaoming_Wang1;~Jin_Li10;~Wenrui_Dai1;~Junni_Zou1;~Hongkai_Xiong1;~Qi_Tian3", "gender": "M;M;;;;F;M;M", "homepage": ";https://sites.google.com/site/zxphistory/;;;;http://www.cs.sjtu.edu.cn/~zou-jn;http://min.sjtu.edu.cn;https://www.qitian1987.com/index.html", "dblp": ";;;;16/5135.html;91/4613;21/3569;78/1467-1.html", "google_scholar": "lJHbpY0AAAAJ;Ud6aBAcAAAAJ;;;Xg8MhyAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;bB16iN4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;0000-0003-4552-0029;0000-0002-7252-5047", "linkedin": ";;;;;;;", "or_profile": "~Bowen_Shi2;~XIAOPENG_ZHANG7;~Yaoming_Wang1;~Jin_Li10;~Wenrui_Dai1;~Junni_Zou1;~Hongkai_Xiong1;~Qi_Tian3", "aff": "Shanghai Jiaotong University;Huawei Technologies Ltd.;;;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Huawei Technologies Ltd.", "aff_domain": "sjtu.edu.cn;huawei.com;;;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;huawei.com", "position": "PhD student;Principal Researcher;;;Associate Professor;Full Professor;Full Professor;Principal Researcher", "bibtex": "@misc{\nshi2023hybrid,\ntitle={Hybrid Distillation: Connecting Masked Autoencoders with Contrastive Learners},\nauthor={Bowen Shi and XIAOPENG ZHANG and Yaoming Wang and Jin Li and Wenrui Dai and Junni Zou and Hongkai Xiong and Qi Tian},\nyear={2023},\nurl={https://openreview.net/forum?id=ztqf6bzuqQ}\n}", "github": "", "project": "", "reviewers": "yZYM;CGRR;dWYQ;5Rvs;gLfn", "site": "https://openreview.net/forum?id=ztqf6bzuqQ", "pdf_size": 1245648, "rating": "4;4;5;5;6", "confidence": "4;3;5;5;5", "soundness": "3;2;1;3;4", "novelty": "3;2;2;2;3", "presentation": "2;2;1;3;3", "wc_summary": "88;16;62;39;100", "wc_strengths": "26;29;11;17;35", "wc_weaknesses": "478;223;331;8;168", "wc_questions": "2;27;2;127;1", "wc_limitations": "11;17;1;39;1", "wc_review": "605;312;407;230;305", "wc_reply_reviewers": "0;0;0;12;0", "wc_reply_authors": "99;105;102;0;0", "reply_reviewers": "0;0;0;1;0", "reply_authors": "2;2;2;1;1", "rating_avg": [ 4.8, 0.7483314773547882 ], "confidence_avg": [ 4.4, 0.7999999999999999 ], "soundness_avg": [ 2.6, 1.019803902718557 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 61.0, 30.854497241083024 ], "wc_strengths_avg": [ 23.6, 8.56971411425142 ], "wc_weaknesses_avg": [ 241.6, 157.6129436309087 ], "wc_questions_avg": [ 31.8, 48.602057569613244 ], "wc_limitations_avg": [ 13.8, 14.005713120009277 ], "wc_review_avg": [ 371.8, 129.43477121701108 ], "wc_reply_reviewers_avg": [ 2.4, 4.8 ], "wc_reply_authors_avg": [ 61.2, 50.005599686435126 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8017837257372732, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16173498847243949441&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;0;0;1", "aff_unique_norm": "Shanghai Jiao Tong University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.huawei.com", "aff_unique_abbr": "SJTU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Enhancing Adversarial Contrastive Learning via Adversarial Invariant Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69867", "id": "zuXyQsXVLF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/364071531ff2398e0fb8bae31f615b69-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zuXyQsXVLF", "openreview": "https://openreview.net/forum?id=zuXyQsXVLF", "poster": "/media/PosterPDFs/NeurIPS%202023/69867.png?t=1701873679.8942442", "slides": "https://nips.cc/virtual/2023/poster/69867", "video": "https://nips.cc/virtual/2023/poster/69867", "author_site": "Xilie Xu, Jingfeng ZHANG, Feng Liu, Masashi Sugiyama, Mohan Kankanhalli", "tldr": "", "abstract": "Adversarial contrastive learning (ACL) is a technique that enhances standard contrastive learning (SCL) by incorporating adversarial data to learn a robust representation that can withstand adversarial attacks and common corruptions without requiring costly annotations. To improve transferability, the existing work introduced the standard invariant regularization (SIR) to impose style-independence property to SCL, which can exempt the impact of nuisance style factors in the standard representation. However, it is unclear how the style-independence property benefits ACL-learned robust representations. In this paper, we leverage the technique of causal reasoning to interpret the ACL and propose adversarial invariant regularization (AIR) to enforce independence from style factors. We regulate the ACL using both SIR and AIR to output the robust representation. Theoretically, we show that AIR implicitly encourages the representational distance between different views of natural data and their adversarial variants to be independent of style factors. Empirically, our experimental results show that invariant regularization significantly improves the performance of state-of-the-art ACL methods in terms of both standard generalization and robustness on downstream tasks. To the best of our knowledge, we are the first to apply causal reasoning to interpret ACL and develop AIR for enhancing ACL-learned robust representations. Our source code is at https://github.com/GodXuxilie/Enhancing_ACL_via_AIR.", "keywords": "robust pre-training;adversarial contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Xilie Xu;Jingfeng Zhang;Feng Liu;Masashi Sugiyama;Mohan Kankanhalli", "authorids": "~Xilie_Xu1;~Jingfeng_Zhang1;~Feng_Liu2;~Masashi_Sugiyama1;~Mohan_Kankanhalli1", "gender": "M;M;M;M;M", "homepage": "https://godxuxilie.github.io/;https://zjfheart.github.io;https://fengliu90.github.io/index.html;http://www.ms.k.u-tokyo.ac.jp/sugi/;https://www.comp.nus.edu.sg/~mohan", "dblp": "259/2327;227/2664.html;77/1318-3;35/1228;09/3613.html", "google_scholar": "https://scholar.google.com/citations?hl=en;NS0P1FkAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ;6Lx_eowAAAAJ", "orcid": ";0000-0003-3491-8074;0000-0002-5005-9129;0000-0001-6658-6743;0000-0002-4846-2015", "linkedin": ";;alexfengliu;;mohan-kankanhalli-583417221", "or_profile": "~Xilie_Xu1;~Jingfeng_Zhang1;~Feng_Liu2;~Masashi_Sugiyama1;~Mohan_Kankanhalli1", "aff": "National University of Singapore;University of Auckland;University of Melbourne;The University of Tokyo;National University of Singapore", "aff_domain": "nus.edu.sg;auckland.ac.nz;unimelb.edu.au;u-tokyo.ac.jp;nus.edu.sg", "position": "PhD student;Assistant Professor;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxu2023enhancing,\ntitle={Enhancing Adversarial Contrastive Learning via Adversarial Invariant Regularization},\nauthor={Xilie Xu and Jingfeng Zhang and Feng Liu and Masashi Sugiyama and Mohan Kankanhalli},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zuXyQsXVLF}\n}", "github": "", "project": "", "reviewers": "fVht;paeS;ZaSM;Ai1a", "pdf_size": 415269, "rating": "4;6;6;8", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "44;50;49;65", "wc_strengths": "34;23;36;68", "wc_weaknesses": "230;59;76;91", "wc_questions": "74;3;17;3", "wc_limitations": "25;3;6;1", "wc_review": "407;138;184;228", "wc_reply_reviewers": "517;33;0;0", "wc_reply_authors": "872;26;0;0", "reply_reviewers": "2;1;0;0", "reply_authors": "4;2;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 52.0, 7.842193570679061 ], "wc_strengths_avg": [ 40.25, 16.768646337734005 ], "wc_weaknesses_avg": [ 114.0, 67.9227502387823 ], "wc_questions_avg": [ 24.25, 29.286302258905955 ], "wc_limitations_avg": [ 8.75, 9.54921462739214 ], "wc_review_avg": [ 239.25, 101.94453148649023 ], "wc_reply_reviewers_avg": [ 137.5, 219.51822247822616 ], "wc_reply_authors_avg": [ 224.5, 373.98495959062313 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6819324117058345786&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "nus.edu.sg;auckland.ac.nz;unimelb.edu.au;u-tokyo.ac.jp;nus.edu.sg", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "National University of Singapore;University of Auckland;University of Melbourne;University of Tokyo", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nus.edu.sg;https://www.auckland.ac.nz;https://www.unimelb.edu.au;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "NUS;UoA;UniMelb;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;3;0", "aff_country_unique": "Singapore;New Zealand;Australia;Japan" }, { "title": "Don\u2019t blame Dataset Shift! Shortcut Learning due to Gradients and Cross Entropy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69866", "id": "zyZkaqNnpa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/e35460304fdf6df523f068a59aaf8829-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zyZkaqNnpa", "openreview": "https://openreview.net/forum?id=zyZkaqNnpa", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69866", "video": "https://nips.cc/virtual/2023/poster/69866", "author_site": "Aahlad Manas Puli, Lily Zhang, Yoav Wald, Rajesh Ranganath", "tldr": "", "abstract": "Common explanations for shortcut learning assume that the shortcut improves prediction only under the training distribution. Thus, models trained in the typical way by minimizing log-loss using gradient descent, which we call default-ERM, should utilize the shortcut. However, even when the stable feature determines the label in the training distribution and the shortcut does not provide any additional information, like in perception tasks, default-ERM exhibits shortcut learning. Why are such solutions preferred when the loss can be driven to zero when using the stable feature alone? By studying a linear perception task, we show that default-ERM\u2019s preference for maximizing the margin, even without overparameterization, leads to models that depend more on the shortcut than the stable feature. This insight suggests that default-ERM\u2019s implicit inductive bias towards max-margin may be unsuitable for perception tasks. Instead, we consider inductive biases toward uniform margins. We show that uniform margins guarantee sole dependence on the perfect stable feature in the linear perception task and suggest alternative loss functions, termed margin control (MARG-CTRL), that encourage uniform-margin solutions. MARG-CTRL techniques mitigate shortcut learning on a variety of vision and language tasks, showing that changing inductive biases can remove the need for complicated shortcut-mitigating methods in perception tasks.", "keywords": "shortcut learning;spurious correlations;perfect stable feature;perception tasks;implicit bias in optimization;improving inductive biases", "primary_area": "", "supplementary_material": "", "author": "Aahlad Manas Puli;Lily H Zhang;Yoav Wald;Rajesh Ranganath", "authorids": "~Aahlad_Manas_Puli1;~Lily_H_Zhang1;~Yoav_Wald1;~Rajesh_Ranganath2", "gender": "M;F;;M", "homepage": "http://aahladmanas.github.io;https://lhz1029.github.io/;;", "dblp": "228/9272;267/6682;97/7057;165/8048", "google_scholar": "xWmCmBQAAAAJ;fmCi9ZQAAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Aahlad_Manas_Puli1;~Lily_H_Zhang1;~Rajesh_Ranganath2;~Yoav_Itzhak_Wald1", "aff": "New York University;New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu;nyu.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\npuli2023dont,\ntitle={Don{\\textquoteright}t blame Dataset Shift! Shortcut Learning due to Gradients and Cross Entropy},\nauthor={Aahlad Manas Puli and Lily H Zhang and Yoav Wald and Rajesh Ranganath},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zyZkaqNnpa}\n}", "github": "", "project": "", "reviewers": "PgqC;9QKU;Xk5J;8MoS", "pdf_size": 4350561, "rating": "5;6;6;6", "confidence": "3;3;3;3", "soundness": "4;3;3;3", "novelty": "4;3;3;3", "presentation": "4;3;3;2", "wc_summary": "132;49;86;67", "wc_strengths": "38;39;62;8", "wc_weaknesses": "61;31;107;52", "wc_questions": "52;157;139;158", "wc_limitations": "5;31;3;6", "wc_review": "288;307;397;291", "wc_reply_reviewers": "0;114;23;11", "wc_reply_authors": "0;242;5;7", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 83.5, 30.907118921051183 ], "wc_strengths_avg": [ 36.75, 19.17517926904466 ], "wc_weaknesses_avg": [ 62.75, 27.770262872360426 ], "wc_questions_avg": [ 126.5, 43.67207345661527 ], "wc_limitations_avg": [ 11.25, 11.453711188955307 ], "wc_review_avg": [ 320.75, 44.611517571138506 ], "wc_reply_reviewers_avg": [ 37.0, 45.19402615390667 ], "wc_reply_authors_avg": [ 63.5, 103.08855416582387 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13074804231173774997&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "nyu.edu;nyu.edu;nyu.edu;nyu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "What is Flagged in Uncertainty Quantification? Latent Density Models for Uncertainty Categorization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2023/poster/69865", "id": "zyhxRc9bew", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/0f0c4f3d83c58df58380af3b0729354c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=zyhxRc9bew", "openreview": "https://openreview.net/forum?id=zyhxRc9bew", "poster": "", "slides": "https://nips.cc/virtual/2023/poster/69865", "video": "https://nips.cc/virtual/2023/poster/69865", "author_site": "Hao Sun, Boris van Breugel, Jonathan Crabb\u00e9, Nabeel Seedat, Mihaela van der Schaar", "tldr": "", "abstract": "Uncertainty quantification (UQ) is essential for creating trustworthy machine learning models. Recent years have seen a steep rise in UQ methods that can flag suspicious examples, however, it is often unclear what exactly these methods identify. In this work, we propose a framework for categorizing uncertain examples flagged by UQ methods. We introduce the confusion density matrix---a kernel-based approximation of the misclassification density---and use this to categorize suspicious examples identified by a given uncertainty method into three classes: out-of-distribution (OOD) examples, boundary (Bnd) examples, and examples in regions of high in-distribution misclassification (IDM). Through extensive experiments, we show that our framework provides a new and distinct perspective for assessing differences between uncertainty quantification methods, thereby forming a valuable assessment benchmark.", "keywords": "Uncertainty Explaination;Uncertainty Quantification;Interpretability", "primary_area": "", "supplementary_material": "/attachment/6c5b743d813c77b74837f29e9f15e18c889fd90f.pdf", "author": "Hao Sun;Boris van Breugel;Jonathan Crabb\u00e9;Nabeel Seedat;Mihaela van der Schaar", "authorids": "~Hao_Sun1;~Boris_van_Breugel2;~Jonathan_Crabb\u00e91;~Nabeel_Seedat1;~Mihaela_van_der_Schaar2", "gender": "M;;M;;F", "homepage": "https://holarissun.github.io;;https://jonathancrabbe.github.io/;;https://www.vanderschaar-lab.com", "dblp": "SunLLZL19;284/0835;278/8353.html;227/8368;", "google_scholar": "7ZNoHJkAAAAJ;https://scholar.google.com/citations?hl=en;Y_Nmd2sAAAAJ;https://scholar.google.com/citations?hl=en;DZ3S--MAAAAJ", "orcid": ";;0000-0002-0341-7712;;", "linkedin": ";;jonathan-crabb%C3%A9-4ab5701a5/;nabeel-seedat/;", "or_profile": "~Hao_Sun1;~Boris_van_Breugel2;~Jonathan_Crabb\u00e91;~Nabeel_Seedat1;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;University of Cambridge;University of Cambridge;University of Cambridge;University of California, Los Angeles", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk;ucla.edu", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nsun2023what,\ntitle={What is Flagged in Uncertainty Quantification? Latent Density Models for Uncertainty Categorization},\nauthor={Hao Sun and Boris van Breugel and Jonathan Crabb{\\'e} and Nabeel Seedat and Mihaela van der Schaar},\nbooktitle={Thirty-seventh Conference on Neural Information Processing Systems},\nyear={2023},\nurl={https://openreview.net/forum?id=zyhxRc9bew}\n}", "github": "", "project": "", "reviewers": "AvJq;7E4k;hHZH;sVgx", "pdf_size": 1905201, "rating": "6;6;6;6", "confidence": "3;4;4;4", "soundness": "3;3;3;2", "novelty": "3;4;2;3", "presentation": "3;3;3;3", "wc_summary": "48;112;96;91", "wc_strengths": "40;64;88;99", "wc_weaknesses": "86;2;233;109", "wc_questions": "37;298;232;106", "wc_limitations": "14;37;1;48", "wc_review": "225;513;650;453", "wc_reply_reviewers": "73;23;44;88", "wc_reply_authors": "278;96;38;87", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.75, 23.678840765544244 ], "wc_strengths_avg": [ 72.75, 22.75274708689041 ], "wc_weaknesses_avg": [ 107.5, 82.68161827153602 ], "wc_questions_avg": [ 168.25, 102.47042256183 ], "wc_limitations_avg": [ 25.0, 18.506755523321747 ], "wc_review_avg": [ 460.25, 153.4460410046476 ], "wc_reply_reviewers_avg": [ 57.0, 25.20912533191106 ], "wc_reply_authors_avg": [ 124.75, 91.19039148945463 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8517447872853836303&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk;ucla.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;UCLA", "aff_campus_unique_index": "0;0;0;0;1", "aff_campus_unique": "Cambridge;Los Angeles", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United Kingdom;United States" } ]